==================== rep_count = 20 dimensions = [20, 40, 50, 100, 256] out_shape = [20, 50, 100, 256] in_shape = [40, 50, 100, 256] idx_dim = 0 out_shape = [40, 20, 100, 256] in_shape = [40, 50, 100, 256] idx_dim = 1 out_shape = [40, 50, 20, 256] in_shape = [40, 50, 100, 256] idx_dim = 2 out_shape = [40, 50, 100, 20] in_shape = [40, 50, 100, 256] idx_dim = 3 out_shape = [20, 50, 256, 100] in_shape = [40, 50, 256, 100] idx_dim = 0 out_shape = [40, 20, 256, 100] in_shape = [40, 50, 256, 100] idx_dim = 1 out_shape = [40, 50, 20, 100] in_shape = [40, 50, 256, 100] idx_dim = 2 out_shape = [40, 50, 256, 20] in_shape = [40, 50, 256, 100] idx_dim = 3 out_shape = [20, 100, 50, 256] in_shape = [40, 100, 50, 256] idx_dim = 0 B = [20, 100, 50, 256] (stride (100, 1, 512000, 2000)) A = [40, 100, 50, 256] (stride (25600, 256, 1024000, 1)) dim = 0 604.703 -> 603.650 ( -0.17%) [ +0.00% +0.17% +0.01% / +0.06% -0.17% -0.10%] index_select const : Elapsed 30.235 ms (604.703 ms / 20) 604.858 -> 605.095 ( +0.04%) [ +0.03% +0.17% +0.00% / +0.18% +0.04% +0.13%] index_select wrap : Elapsed 30.252 ms (605.046 ms / 20) 604.625 -> 604.698 ( +0.01%) [ +0.15% +0.15% +0.00% / +0.09% +0.01% +0.13%] index_select linear : Elapsed 30.278 ms (605.551 ms / 20) 604.456 -> 603.260 ( -0.20%) [ +0.05% +0.00% +0.05% / -0.03% -0.20% +0.01%] index_select reverse : Elapsed 30.239 ms (604.779 ms / 20) 603.726 -> 603.704 ( -0.00%) [ +0.03% +0.00% +0.06% / -0.00% +0.30% +0.25%] index_select skip64 : Elapsed 30.196 ms (603.913 ms / 20) 603.814 -> 604.571 ( +0.13%) [ +0.05% +0.07% +0.00% / +0.13% +0.21% +0.29%] index_select skip256 : Elapsed 30.205 ms (604.101 ms / 20) 604.465 -> 604.729 ( +0.04%) [ +0.01% +0.05% +0.00% / +0.05% +0.07% +0.04%] index_select spread : Elapsed 30.225 ms (604.503 ms / 20) 604.606 -> 604.751 ( +0.02%) [ +0.12% +0.07% +0.00% / +0.23% +0.03% +0.02%] index_select strided 3 : Elapsed 30.266 ms (605.311 ms / 20) 605.021 -> 603.978 ( -0.17%) [ +0.19% +0.00% +0.11% / +0.17% -0.00% -0.17%] index_select strided 5 : Elapsed 30.309 ms (606.182 ms / 20) 605.233 -> 602.973 ( -0.37%) [ +0.07% +0.00% +0.08% / -0.03% -0.37% -0.37%] index_select strided 7 : Elapsed 30.284 ms (605.674 ms / 20) 604.309 -> 602.546 ( -0.29%) [ +0.08% +0.00% +0.13% / +0.10% -0.29% -0.22%] index_select strided 8 : Elapsed 30.240 ms (604.810 ms / 20) 605.310 -> 604.321 ( -0.16%) [ +0.00% +0.08% +0.13% / +0.05% -0.16% -0.02%] index_select strided 16 : Elapsed 30.265 ms (605.310 ms / 20) 605.305 -> 604.687 ( -0.10%) [ +0.12% +0.00% +0.01% / +0.01% -0.10% -0.06%] index_select random : Elapsed 30.302 ms (606.038 ms / 20) 604.463 -> 604.629 ( +0.03%) [ +0.21% +0.14% +0.00% / +0.18% +0.03% +0.08%] index_select random_sorted : Elapsed 30.287 ms (605.733 ms / 20) 605.343 -> 604.859 ( -0.08%) [ +0.09% +0.04% +0.00% / -0.02% +0.07% -0.08%] index_select perm : Elapsed 30.294 ms (605.883 ms / 20) 602.644 -> 602.964 ( +0.05%) [ +0.00% +0.04% +0.04% / +0.05% +0.36% +0.31%] index_select perm_sorted : Elapsed 30.132 ms (602.644 ms / 20) out_shape = [40, 20, 50, 256] in_shape = [40, 100, 50, 256] idx_dim = 1 out_shape = [40, 100, 20, 256] in_shape = [40, 100, 50, 256] idx_dim = 2 out_shape = [40, 100, 50, 20] in_shape = [40, 100, 50, 256] idx_dim = 3 B = [40, 100, 50, 20] (stride (100000, 1000, 1, 50)) A = [40, 100, 50, 256] (stride (1, 512000, 40, 2000)) dim = 3 48.393 -> 48.382 ( -0.02%) [ +0.32% +0.08% +0.00% / -0.02% +0.43% +0.44%] index_select const : Elapsed 2.427 ms (48.547 ms / 20) 57.090 -> 57.213 ( +0.22%) [ +0.19% +0.00% +0.19% / +0.22% +1.22% +1.30%] index_select wrap : Elapsed 2.860 ms (57.199 ms / 20) 57.137 -> 57.189 ( +0.09%) [ +0.19% +0.08% +0.00% / +0.09% +1.23% +1.05%] index_select linear : Elapsed 2.862 ms (57.243 ms / 20) 57.980 -> 57.521 ( -0.79%) [ +0.09% +0.00% +0.09% / -0.03% -0.75% -0.79%] index_select reverse : Elapsed 2.902 ms (58.034 ms / 20) 48.371 -> 48.436 ( +0.13%) [ +0.38% +0.36% +0.00% / +0.13% +0.44% +0.28%] index_select skip64 : Elapsed 2.428 ms (48.554 ms / 20) 48.466 -> 48.433 ( -0.07%) [ +0.04% +0.14% +0.00% / -0.07% +0.02% +0.10%] index_select skip256 : Elapsed 2.424 ms (48.487 ms / 20) 57.886 -> 57.960 ( +0.13%) [ +0.23% +0.27% +0.00% / +0.27% +0.13% +0.28%] index_select spread : Elapsed 2.901 ms (58.017 ms / 20) 57.601 -> 57.644 ( +0.07%) [ +0.00% +0.13% +0.07% / +0.07% +0.95% +1.07%] index_select strided 3 : Elapsed 2.880 ms (57.601 ms / 20) 57.200 -> 57.332 ( +0.23%) [ +0.17% +0.21% +0.00% / +0.23% +1.11% +1.14%] index_select strided 5 : Elapsed 2.865 ms (57.298 ms / 20) 57.382 -> 57.527 ( +0.25%) [ +0.18% +0.00% +0.14% / +0.25% +0.29% +0.41%] index_select strided 7 : Elapsed 2.874 ms (57.486 ms / 20) 58.344 -> 57.620 ( -1.24%) [ +0.09% +0.00% +0.01% / +0.12% -0.97% -1.24%] index_select strided 8 : Elapsed 2.920 ms (58.394 ms / 20) 57.748 -> 57.614 ( -0.23%) [ +0.30% +0.00% +0.34% / +0.07% -0.21% -0.23%] index_select strided 16 : Elapsed 2.896 ms (57.923 ms / 20) 57.079 -> 57.159 ( +0.14%) [ +0.02% +0.00% +0.10% / +0.14% +0.55% +0.60%] index_select strided 64 : Elapsed 2.855 ms (57.090 ms / 20) 57.570 -> 57.398 ( -0.30%) [ +0.12% +0.00% +0.01% / +0.21% -0.30% -0.22%] index_select strided 100 : Elapsed 2.882 ms (57.641 ms / 20) 57.758 -> 56.649 ( -1.92%) [ +0.00% +0.08% +0.10% / -0.01% -1.81% -1.92%] index_select strided 255 : Elapsed 2.888 ms (57.758 ms / 20) 57.848 -> 57.923 ( +0.13%) [ +0.30% +0.18% +0.00% / +0.13% +0.31% +0.47%] index_select random : Elapsed 2.901 ms (58.023 ms / 20) 57.679 -> 57.627 ( -0.09%) [ +0.00% +0.03% +0.05% / -0.09% +1.13% +0.83%] index_select random_sorted : Elapsed 2.884 ms (57.679 ms / 20) 57.472 -> 57.259 ( -0.37%) [ +0.23% +0.00% +0.26% / +0.31% -0.37% -0.35%] index_select perm : Elapsed 2.880 ms (57.603 ms / 20) 57.676 -> 57.479 ( -0.34%) [ +0.00% +0.15% +0.12% / -0.05% -0.26% -0.34%] index_select perm_sorted : Elapsed 2.884 ms (57.676 ms / 20) out_shape = [20, 100, 256, 50] in_shape = [40, 100, 256, 50] idx_dim = 0 out_shape = [40, 20, 256, 50] in_shape = [40, 100, 256, 50] idx_dim = 1 out_shape = [40, 100, 20, 50] in_shape = [40, 100, 256, 50] idx_dim = 2 out_shape = [40, 100, 256, 20] in_shape = [40, 100, 256, 50] idx_dim = 3 B = [40, 100, 256, 20] (stride (25600, 256, 1, 1024000)) A = [40, 100, 256, 50] (stride (1, 512000, 2000, 40)) dim = 3 362.143 -> 360.554 ( -0.44%) [ +0.17% +0.00% +0.10% / +0.07% -0.44% -0.22%] index_select const : Elapsed 18.137 ms (362.750 ms / 20) 367.427 -> 362.957 ( -1.22%) [ +0.02% +0.00% +0.00% / +0.00% -1.22% -1.11%] index_select wrap : Elapsed 18.375 ms (367.498 ms / 20) 366.929 -> 362.765 ( -1.13%) [ +0.00% +0.13% +0.01% / +0.12% -1.10% -1.13%] index_select linear : Elapsed 18.346 ms (366.929 ms / 20) 366.140 -> 364.972 ( -0.32%) [ +0.00% +0.07% +0.07% / +0.07% -0.21% -0.32%] index_select reverse : Elapsed 18.307 ms (366.140 ms / 20) 362.661 -> 360.538 ( -0.59%) [ +0.00% +0.04% +0.02% / -0.03% -0.59% -0.54%] index_select skip64 : Elapsed 18.133 ms (362.661 ms / 20) 362.469 -> 361.221 ( -0.34%) [ +0.00% +0.07% +0.03% / +0.02% -0.34% -0.34%] index_select skip256 : Elapsed 18.123 ms (362.469 ms / 20) 368.783 -> 363.956 ( -1.31%) [ +0.12% +0.00% +0.21% / +0.02% -1.25% -1.31%] index_select spread : Elapsed 18.462 ms (369.232 ms / 20) 368.229 -> 366.224 ( -0.54%) [ +0.14% +0.10% +0.00% / +0.22% -0.54% -0.39%] index_select strided 3 : Elapsed 18.437 ms (368.741 ms / 20) 370.278 -> 367.074 ( -0.87%) [ +0.00% +0.07% +0.02% / +0.03% -0.76% -0.87%] index_select strided 5 : Elapsed 18.514 ms (370.278 ms / 20) 369.038 -> 368.990 ( -0.01%) [ +0.00% +0.28% +0.21% / +0.25% +0.11% -0.01%] index_select strided 7 : Elapsed 18.452 ms (369.038 ms / 20) 369.087 -> 369.368 ( +0.08%) [ +0.14% +0.00% +0.12% / +0.08% +0.11% +0.17%] index_select strided 8 : Elapsed 18.481 ms (369.611 ms / 20) 370.294 -> 369.368 ( -0.25%) [ +0.20% +0.00% +0.21% / +0.09% -0.25% -0.13%] index_select strided 16 : Elapsed 18.552 ms (371.035 ms / 20) 365.177 -> 365.389 ( +0.06%) [ +0.08% +0.24% +0.00% / +0.06% +0.92% +0.77%] index_select random : Elapsed 18.274 ms (365.483 ms / 20) 367.158 -> 362.425 ( -1.29%) [ +0.02% +0.00% +0.03% / +0.04% -1.29% -1.23%] index_select random_sorted : Elapsed 18.361 ms (367.218 ms / 20) 367.519 -> 367.680 ( +0.04%) [ +0.26% +0.03% +0.00% / +0.04% +0.30% +0.27%] index_select perm : Elapsed 18.423 ms (368.468 ms / 20) 367.071 -> 363.751 ( -0.90%) [ +0.19% +0.11% +0.00% / +0.07% -0.88% -0.90%] index_select perm_sorted : Elapsed 18.388 ms (367.753 ms / 20) B = [40, 100, 256, 20] (stride (1, 10240, 40, 1024000)) A = [40, 100, 256, 50] (stride (1, 40, 4000, 1024000)) dim = 3 587.104 -> 587.537 ( +0.07%) [ +0.10% +0.14% +0.00% / +0.07% +0.21% +0.26%] index_select const : Elapsed 29.386 ms (587.718 ms / 20) 590.940 -> 591.113 ( +0.03%) [ +0.17% +0.00% +0.09% / +0.03% +0.03% +0.08%] index_select wrap : Elapsed 29.596 ms (591.925 ms / 20) 591.028 -> 591.856 ( +0.14%) [ +0.00% +0.13% +0.19% / +0.19% +0.15% +0.14%] index_select linear : Elapsed 29.551 ms (591.028 ms / 20) 591.448 -> 591.116 ( -0.06%) [ +0.04% +0.00% +0.09% / -0.03% -0.01% -0.06%] index_select reverse : Elapsed 29.586 ms (591.712 ms / 20) 587.389 -> 587.371 ( -0.00%) [ +0.09% +0.12% +0.00% / -0.00% +0.16% +0.13%] index_select skip64 : Elapsed 29.395 ms (587.908 ms / 20) 586.942 -> 587.070 ( +0.02%) [ +0.20% +0.00% +0.17% / +0.02% +0.26% +0.27%] index_select skip256 : Elapsed 29.406 ms (588.119 ms / 20) 591.487 -> 591.211 ( -0.05%) [ +0.03% +0.03% +0.00% / -0.05% +0.17% +0.27%] index_select spread : Elapsed 29.584 ms (591.685 ms / 20) 591.410 -> 590.485 ( -0.16%) [ +0.03% +0.05% +0.00% / +0.01% -0.16% +0.08%] index_select strided 3 : Elapsed 29.580 ms (591.594 ms / 20) 591.500 -> 592.040 ( +0.09%) [ +0.04% +0.01% +0.00% / +0.09% +0.35% +0.32%] index_select strided 5 : Elapsed 29.587 ms (591.741 ms / 20) 591.803 -> 590.905 ( -0.15%) [ +0.00% +0.08% +0.06% / -0.01% -0.07% -0.15%] index_select strided 7 : Elapsed 29.590 ms (591.803 ms / 20) 591.429 -> 591.442 ( +0.00%) [ +0.02% +0.13% +0.00% / +0.08% +0.07% +0.00%] index_select strided 8 : Elapsed 29.577 ms (591.538 ms / 20) 591.359 -> 591.856 ( +0.08%) [ +0.06% +0.00% +0.22% / +0.14% +0.23% +0.08%] index_select strided 16 : Elapsed 29.584 ms (591.689 ms / 20) 591.436 -> 590.865 ( -0.10%) [ +0.00% +0.19% +0.06% / +0.12% -0.10% -0.01%] index_select random : Elapsed 29.572 ms (591.436 ms / 20) 590.271 -> 588.394 ( -0.32%) [ +0.10% +0.00% +0.05% / +0.12% -0.17% -0.32%] index_select random_sorted : Elapsed 29.542 ms (590.839 ms / 20) 591.033 -> 591.402 ( +0.06%) [ +0.00% +0.12% +0.16% / +0.14% +0.08% +0.06%] index_select perm : Elapsed 29.552 ms (591.033 ms / 20) 592.090 -> 590.829 ( -0.21%) [ +0.00% +0.09% +0.09% / +0.05% -0.21% -0.08%] index_select perm_sorted : Elapsed 29.605 ms (592.090 ms / 20) out_shape = [20, 256, 50, 100] in_shape = [40, 256, 50, 100] idx_dim = 0 out_shape = [40, 20, 50, 100] in_shape = [40, 256, 50, 100] idx_dim = 1 B = [40, 20, 50, 100] (stride (1, 40, 800, 40000)) A = [40, 256, 50, 100] (stride (12800, 1, 256, 512000)) dim = 1 140.108 -> 139.888 ( -0.16%) [ +0.17% +0.07% +0.00% / +0.34% -0.08% -0.16%] index_select const : Elapsed 7.018 ms (140.351 ms / 20) 140.365 -> 139.822 ( -0.39%) [ +0.00% +0.09% +0.12% / -0.23% -0.39% -0.21%] index_select wrap : Elapsed 7.018 ms (140.365 ms / 20) 140.389 -> 139.922 ( -0.33%) [ +0.00% +0.03% +0.09% / +0.05% -0.33% -0.30%] index_select linear : Elapsed 7.019 ms (140.389 ms / 20) 140.108 -> 139.710 ( -0.28%) [ +0.18% +0.00% +0.04% / +0.30% -0.17% -0.28%] index_select reverse : Elapsed 7.018 ms (140.357 ms / 20) 140.076 -> 139.562 ( -0.37%) [ +0.11% +0.26% +0.00% / +0.26% -0.09% -0.37%] index_select skip64 : Elapsed 7.011 ms (140.225 ms / 20) 140.148 -> 139.840 ( -0.22%) [ +0.10% +0.00% +0.23% / +0.18% -0.11% -0.22%] index_select skip256 : Elapsed 7.015 ms (140.291 ms / 20) 140.812 -> 140.436 ( -0.27%) [ +0.00% +0.10% +0.04% / +0.12% -0.27% -0.26%] index_select spread : Elapsed 7.041 ms (140.812 ms / 20) 140.487 -> 139.996 ( -0.35%) [ +0.00% +0.04% +0.04% / +0.25% -0.35% -0.35%] index_select strided 3 : Elapsed 7.024 ms (140.487 ms / 20) 140.527 -> 139.901 ( -0.45%) [ +0.00% +0.16% +0.00% / +0.09% -0.10% -0.45%] index_select strided 5 : Elapsed 7.026 ms (140.527 ms / 20) 140.486 -> 140.296 ( -0.14%) [ +0.27% +0.16% +0.00% / +0.25% -0.14% -0.01%] index_select strided 7 : Elapsed 7.043 ms (140.869 ms / 20) 140.832 -> 140.304 ( -0.37%) [ +0.06% +0.05% +0.00% / +0.01% -0.37% -0.36%] index_select strided 8 : Elapsed 7.046 ms (140.917 ms / 20) 140.753 -> 140.619 ( -0.10%) [ +0.05% +0.00% +0.19% / +0.14% -0.09% -0.10%] index_select strided 16 : Elapsed 7.041 ms (140.827 ms / 20) 140.744 -> 140.011 ( -0.52%) [ +0.00% +0.16% +0.19% / +0.05% -0.45% -0.52%] index_select strided 64 : Elapsed 7.037 ms (140.744 ms / 20) 140.674 -> 140.187 ( -0.35%) [ +0.18% +0.00% +0.14% / +0.26% -0.20% -0.35%] index_select strided 100 : Elapsed 7.046 ms (140.923 ms / 20) 140.036 -> 139.955 ( -0.06%) [ +0.42% +0.00% +0.21% / +0.21% -0.02% -0.06%] index_select strided 255 : Elapsed 7.031 ms (140.624 ms / 20) 140.821 -> 140.452 ( -0.26%) [ +0.00% +0.00% +0.00% / +0.07% -0.26% -0.25%] index_select random : Elapsed 7.041 ms (140.821 ms / 20) 140.499 -> 140.439 ( -0.04%) [ +0.09% +0.21% +0.00% / +0.21% -0.01% -0.04%] index_select random_sorted : Elapsed 7.032 ms (140.632 ms / 20) 140.604 -> 140.303 ( -0.21%) [ +0.13% +0.22% +0.00% / +0.18% -0.16% -0.21%] index_select perm : Elapsed 7.040 ms (140.791 ms / 20) 140.548 -> 140.379 ( -0.12%) [ +0.11% +0.00% +0.07% / +0.15% -0.12% -0.09%] index_select perm_sorted : Elapsed 7.035 ms (140.705 ms / 20) out_shape = [40, 256, 20, 100] in_shape = [40, 256, 50, 100] idx_dim = 2 out_shape = [40, 256, 50, 20] in_shape = [40, 256, 50, 100] idx_dim = 3 B = [40, 256, 50, 20] (stride (1, 40000, 40, 2000)) A = [40, 256, 50, 100] (stride (1, 40, 10240, 512000)) dim = 3 283.039 -> 281.995 ( -0.37%) [ +0.00% +0.13% +0.03% / +0.23% -0.30% -0.37%] index_select const : Elapsed 14.152 ms (283.039 ms / 20) 292.688 -> 291.335 ( -0.46%) [ +0.23% +0.00% +0.15% / +0.11% -0.46% -0.41%] index_select wrap : Elapsed 14.668 ms (293.370 ms / 20) 293.012 -> 291.278 ( -0.59%) [ +0.12% +0.02% +0.00% / +0.25% -0.42% -0.59%] index_select linear : Elapsed 14.668 ms (293.362 ms / 20) 293.142 -> 291.579 ( -0.53%) [ +0.00% +0.04% +0.03% / -0.08% -0.46% -0.53%] index_select reverse : Elapsed 14.657 ms (293.142 ms / 20) 283.134 -> 281.685 ( -0.51%) [ +0.00% +0.05% +0.08% / -0.08% -0.49% -0.51%] index_select skip64 : Elapsed 14.157 ms (283.134 ms / 20) 282.787 -> 281.132 ( -0.59%) [ +0.03% +0.00% +0.07% / +0.19% -0.35% -0.59%] index_select skip256 : Elapsed 14.144 ms (282.882 ms / 20) 293.600 -> 293.371 ( -0.08%) [ +0.00% +0.12% +0.03% / +0.10% -0.08% -0.05%] index_select spread : Elapsed 14.680 ms (293.600 ms / 20) 294.878 -> 291.627 ( -1.10%) [ +0.00% +0.23% +0.36% / +0.00% -1.09% -1.10%] index_select strided 3 : Elapsed 14.744 ms (294.878 ms / 20) 293.419 -> 293.081 ( -0.12%) [ +0.03% +0.34% +0.00% / +0.10% -0.12% +0.21%] index_select strided 5 : Elapsed 14.676 ms (293.521 ms / 20) 292.461 -> 293.018 ( +0.19%) [ +0.25% +0.00% +0.11% / +0.19% +0.24% +0.31%] index_select strided 7 : Elapsed 14.660 ms (293.193 ms / 20) 292.950 -> 292.044 ( -0.31%) [ +0.03% +0.00% +0.28% / -0.00% -0.31% -0.31%] index_select strided 8 : Elapsed 14.652 ms (293.038 ms / 20) 292.760 -> 292.453 ( -0.10%) [ +0.00% +0.17% +0.04% / -0.09% -0.10% -0.08%] index_select strided 16 : Elapsed 14.638 ms (292.760 ms / 20) 294.563 -> 292.553 ( -0.68%) [ +0.00% +0.07% +0.04% / -0.02% -0.54% -0.68%] index_select strided 64 : Elapsed 14.728 ms (294.563 ms / 20) 292.103 -> 292.306 ( +0.07%) [ +0.16% +0.07% +0.00% / +0.22% +0.14% +0.07%] index_select random : Elapsed 14.629 ms (292.573 ms / 20) 291.220 -> 291.716 ( +0.17%) [ +0.04% +0.18% +0.00% / +0.17% +0.50% +0.79%] index_select random_sorted : Elapsed 14.567 ms (291.336 ms / 20) 292.185 -> 291.340 ( -0.29%) [ +0.24% +0.13% +0.00% / +0.26% -0.28% -0.29%] index_select perm : Elapsed 14.644 ms (292.887 ms / 20) 292.525 -> 292.698 ( +0.06%) [ +0.00% +0.01% +0.06% / +0.06% +0.37% +0.21%] index_select perm_sorted : Elapsed 14.626 ms (292.525 ms / 20) out_shape = [20, 256, 100, 50] in_shape = [40, 256, 100, 50] idx_dim = 0 out_shape = [40, 20, 100, 50] in_shape = [40, 256, 100, 50] idx_dim = 1 out_shape = [40, 256, 20, 50] in_shape = [40, 256, 100, 50] idx_dim = 2 out_shape = [40, 256, 100, 20] in_shape = [40, 256, 100, 50] idx_dim = 3 out_shape = [20, 40, 100, 256] in_shape = [50, 40, 100, 256] idx_dim = 0 B = [20, 40, 100, 256] (stride (100, 512000, 1, 2000)) A = [50, 40, 100, 256] (stride (100, 5000, 1, 200000)) dim = 0 771.915 -> 773.876 ( +0.25%) [ +0.00% +0.22% +0.05% / +0.25% +2.07% +2.13%] index_select const : Elapsed 38.596 ms (771.915 ms / 20) 762.061 -> 761.142 ( -0.12%) [ +0.15% +0.39% +0.00% / -0.12% +2.06% +2.08%] index_select wrap : Elapsed 38.160 ms (763.193 ms / 20) 762.544 -> 764.930 ( +0.31%) [ +0.00% +0.38% +0.23% / +0.31% +1.77% +2.05%] index_select linear : Elapsed 38.127 ms (762.544 ms / 20) 750.951 -> 749.852 ( -0.15%) [ +0.00% +0.03% +0.10% / -0.15% +2.89% +2.38%] index_select reverse : Elapsed 37.548 ms (750.951 ms / 20) 771.527 -> 771.692 ( +0.02%) [ +0.00% +0.03% +0.16% / +0.02% +2.26% +2.35%] index_select skip64 : Elapsed 38.576 ms (771.527 ms / 20) 767.644 -> 769.502 ( +0.24%) [ +0.00% +0.24% +0.24% / +0.24% +2.52% +2.48%] index_select skip256 : Elapsed 38.382 ms (767.644 ms / 20) 746.410 -> 748.443 ( +0.27%) [ +0.17% +0.00% +0.35% / +0.27% +3.76% +3.67%] index_select spread : Elapsed 37.383 ms (747.657 ms / 20) 755.803 -> 758.241 ( +0.32%) [ +0.54% +0.00% +0.32% / +0.32% +1.34% +1.69%] index_select strided 3 : Elapsed 37.996 ms (759.915 ms / 20) 760.487 -> 762.129 ( +0.22%) [ +0.00% +0.41% +0.42% / +0.22% +0.58% +0.40%] index_select strided 5 : Elapsed 38.024 ms (760.487 ms / 20) 758.049 -> 759.467 ( +0.19%) [ +0.00% +0.11% +0.31% / +0.19% +0.64% +0.56%] index_select strided 7 : Elapsed 37.902 ms (758.049 ms / 20) 755.010 -> 755.181 ( +0.02%) [ +0.00% +0.28% +0.10% / +0.02% +1.30% +1.30%] index_select strided 8 : Elapsed 37.750 ms (755.010 ms / 20) 763.093 -> 762.732 ( -0.05%) [ +0.00% +0.10% +0.16% / -0.05% +0.56% +0.63%] index_select strided 16 : Elapsed 38.155 ms (763.093 ms / 20) 769.674 -> 767.157 ( -0.33%) [ +0.01% +0.00% +0.03% / -0.21% -0.27% -0.33%] index_select random : Elapsed 38.489 ms (769.781 ms / 20) 760.386 -> 758.464 ( -0.25%) [ +0.16% +0.17% +0.00% / -0.25% +1.83% +1.33%] index_select random_sorted : Elapsed 38.079 ms (761.575 ms / 20) 764.263 -> 764.000 ( -0.03%) [ +0.00% +0.01% +0.17% / -0.03% +0.81% +0.73%] index_select perm : Elapsed 38.213 ms (764.263 ms / 20) 754.982 -> 754.879 ( -0.01%) [ +0.34% +0.00% +0.55% / -0.01% +2.69% +2.70%] index_select perm_sorted : Elapsed 37.877 ms (757.535 ms / 20) out_shape = [50, 20, 100, 256] in_shape = [50, 40, 100, 256] idx_dim = 1 B = [50, 20, 100, 256] (stride (100, 1280000, 1, 5000)) A = [50, 40, 100, 256] (stride (40, 1, 2000, 200000)) dim = 1 978.011 -> 977.472 ( -0.06%) [ +0.10% +0.12% +0.00% / +0.04% -0.05% -0.06%] index_select const : Elapsed 48.950 ms (979.002 ms / 20) 978.289 -> 978.366 ( +0.01%) [ +0.13% +0.23% +0.00% / +0.10% +0.01% +0.04%] index_select wrap : Elapsed 48.978 ms (979.552 ms / 20) 978.501 -> 977.829 ( -0.07%) [ +0.16% +0.00% +0.19% / +0.05% +0.02% -0.07%] index_select linear : Elapsed 49.005 ms (980.093 ms / 20) 978.296 -> 977.572 ( -0.07%) [ +0.03% +0.13% +0.00% / -0.00% -0.07% -0.07%] index_select reverse : Elapsed 48.931 ms (978.616 ms / 20) 978.618 -> 977.823 ( -0.08%) [ +0.00% +0.10% +0.00% / -0.08% +0.06% -0.04%] index_select skip64 : Elapsed 48.931 ms (978.629 ms / 20) 977.180 -> 978.338 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.26% +0.25%] index_select skip256 : Elapsed 48.879 ms (977.583 ms / 20) 977.599 -> 977.208 ( -0.04%) [ +0.07% +0.00% +0.06% / -0.04% +0.11% +0.10%] index_select spread : Elapsed 48.915 ms (978.305 ms / 20) 978.549 -> 977.955 ( -0.06%) [ +0.13% +0.00% +0.14% / +0.10% -0.06% -0.03%] index_select strided 3 : Elapsed 48.989 ms (979.785 ms / 20) 979.261 -> 978.143 ( -0.11%) [ +0.02% +0.00% +0.05% / -0.03% -0.11% -0.07%] index_select strided 5 : Elapsed 48.971 ms (979.415 ms / 20) 978.525 -> 977.314 ( -0.12%) [ +0.00% +0.02% +0.18% / +0.08% -0.09% -0.12%] index_select strided 7 : Elapsed 48.926 ms (978.525 ms / 20) 978.789 -> 977.594 ( -0.12%) [ +0.02% +0.00% +0.04% / +0.06% -0.10% -0.12%] index_select strided 8 : Elapsed 48.948 ms (978.968 ms / 20) 978.413 -> 977.531 ( -0.09%) [ +0.00% +0.19% +0.12% / -0.09% +0.11% +0.06%] index_select strided 16 : Elapsed 48.921 ms (978.413 ms / 20) 978.514 -> 978.165 ( -0.04%) [ +0.02% +0.04% +0.00% / +0.07% -0.04% -0.01%] index_select random : Elapsed 48.936 ms (978.721 ms / 20) 978.642 -> 978.706 ( +0.01%) [ +0.08% +0.06% +0.00% / +0.06% +0.07% +0.01%] index_select random_sorted : Elapsed 48.972 ms (979.446 ms / 20) 979.603 -> 978.386 ( -0.12%) [ +0.04% +0.02% +0.00% / +0.04% -0.12% -0.12%] index_select perm : Elapsed 48.997 ms (979.949 ms / 20) 977.379 -> 978.434 ( +0.11%) [ +0.03% +0.00% +0.02% / +0.11% +0.14% +0.15%] index_select perm_sorted : Elapsed 48.884 ms (977.684 ms / 20) out_shape = [50, 40, 20, 256] in_shape = [50, 40, 100, 256] idx_dim = 2 B = [50, 40, 20, 256] (stride (1, 256000, 12800, 50)) A = [50, 40, 100, 256] (stride (100, 1280000, 1, 5000)) dim = 2 350.662 -> 350.235 ( -0.12%) [ +0.02% +0.00% +0.05% / +0.08% -0.08% -0.12%] index_select const : Elapsed 17.536 ms (350.719 ms / 20) 350.717 -> 350.209 ( -0.14%) [ +0.10% +0.02% +0.00% / +0.07% -0.12% -0.14%] index_select wrap : Elapsed 17.553 ms (351.057 ms / 20) 350.548 -> 350.213 ( -0.10%) [ +0.17% +0.09% +0.00% / +0.11% -0.10% -0.07%] index_select linear : Elapsed 17.556 ms (351.128 ms / 20) 350.461 -> 350.405 ( -0.02%) [ +0.11% +0.14% +0.00% / +0.11% -0.02% -0.01%] index_select reverse : Elapsed 17.543 ms (350.862 ms / 20) 350.479 -> 350.149 ( -0.09%) [ +0.03% +0.00% +0.12% / +0.10% -0.09% -0.08%] index_select skip64 : Elapsed 17.530 ms (350.593 ms / 20) 350.639 -> 350.034 ( -0.17%) [ +0.00% +0.03% +0.06% / -0.05% -0.17% -0.15%] index_select skip256 : Elapsed 17.532 ms (350.639 ms / 20) 350.945 -> 350.245 ( -0.20%) [ +0.04% +0.03% +0.00% / +0.04% -0.18% -0.20%] index_select spread : Elapsed 17.554 ms (351.070 ms / 20) 350.802 -> 350.272 ( -0.15%) [ +0.12% +0.12% +0.00% / +0.13% -0.06% -0.15%] index_select strided 3 : Elapsed 17.561 ms (351.220 ms / 20) 350.809 -> 350.348 ( -0.13%) [ +0.00% +0.12% +0.05% / +0.10% -0.13% -0.13%] index_select strided 5 : Elapsed 17.540 ms (350.809 ms / 20) 350.787 -> 350.380 ( -0.12%) [ +0.06% +0.08% +0.00% / +0.04% -0.12% -0.06%] index_select strided 7 : Elapsed 17.550 ms (351.006 ms / 20) 350.881 -> 350.078 ( -0.23%) [ +0.05% +0.09% +0.00% / +0.05% -0.23% -0.12%] index_select strided 8 : Elapsed 17.554 ms (351.073 ms / 20) 350.867 -> 350.226 ( -0.18%) [ +0.00% +0.03% +0.03% / +0.06% -0.18% -0.18%] index_select strided 16 : Elapsed 17.543 ms (350.867 ms / 20) 350.772 -> 350.396 ( -0.11%) [ +0.07% +0.04% +0.00% / +0.05% -0.10% -0.11%] index_select strided 64 : Elapsed 17.551 ms (351.024 ms / 20) 350.996 -> 350.170 ( -0.24%) [ +0.02% +0.00% +0.07% / -0.02% -0.24% -0.16%] index_select random : Elapsed 17.554 ms (351.072 ms / 20) 350.953 -> 350.373 ( -0.17%) [ +0.00% +0.00% +0.01% / +0.04% -0.17% -0.16%] index_select random_sorted : Elapsed 17.548 ms (350.953 ms / 20) 350.623 -> 350.110 ( -0.15%) [ +0.15% +0.09% +0.00% / +0.16% -0.15% -0.09%] index_select perm : Elapsed 17.557 ms (351.145 ms / 20) 350.898 -> 350.232 ( -0.19%) [ +0.00% +0.10% +0.05% / +0.06% -0.16% -0.19%] index_select perm_sorted : Elapsed 17.545 ms (350.898 ms / 20) B = [50, 40, 20, 256] (stride (256, 12800, 512000, 1)) A = [50, 40, 100, 256] (stride (25600, 1280000, 256, 1)) dim = 2 85.161 -> 84.535 ( -0.74%) [ +0.13% +0.00% +0.01% / +0.06% -0.74% -0.71%] index_select const : Elapsed 4.263 ms (85.269 ms / 20) 85.023 -> 85.115 ( +0.11%) [ +0.14% +0.00% +0.05% / +0.16% +0.11% +0.21%] index_select wrap : Elapsed 4.257 ms (85.146 ms / 20) 85.022 -> 85.129 ( +0.13%) [ +0.06% +0.04% +0.00% / +0.15% +0.13% +0.16%] index_select linear : Elapsed 4.254 ms (85.076 ms / 20) 84.997 -> 85.032 ( +0.04%) [ +0.08% +0.10% +0.00% / +0.04% +0.24% +0.17%] index_select reverse : Elapsed 4.253 ms (85.064 ms / 20) 85.151 -> 84.450 ( -0.82%) [ +0.16% +0.10% +0.00% / +0.14% -0.77% -0.82%] index_select skip64 : Elapsed 4.264 ms (85.288 ms / 20) 85.171 -> 84.479 ( -0.81%) [ +0.21% +0.05% +0.00% / +0.13% -0.77% -0.81%] index_select skip256 : Elapsed 4.268 ms (85.351 ms / 20) 84.981 -> 85.052 ( +0.08%) [ +0.10% +0.03% +0.00% / +0.08% +0.25% +0.29%] index_select spread : Elapsed 4.253 ms (85.069 ms / 20) 85.018 -> 85.018 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.11% +0.05%] index_select strided 3 : Elapsed 4.254 ms (85.079 ms / 20) 84.973 -> 85.025 ( +0.06%) [ +0.08% +0.00% +0.01% / +0.06% +0.31% +0.22%] index_select strided 5 : Elapsed 4.252 ms (85.037 ms / 20) 85.262 -> 85.064 ( -0.23%) [ +0.06% +0.01% +0.00% / +0.07% -0.23% -0.21%] index_select strided 7 : Elapsed 4.266 ms (85.314 ms / 20) 84.984 -> 84.976 ( -0.01%) [ +0.03% +0.03% +0.00% / -0.01% +0.19% +0.24%] index_select strided 8 : Elapsed 4.251 ms (85.011 ms / 20) 85.142 -> 85.120 ( -0.03%) [ +0.04% +0.00% +0.02% / +0.08% +0.03% -0.03%] index_select strided 16 : Elapsed 4.259 ms (85.174 ms / 20) 85.059 -> 84.976 ( -0.10%) [ +0.10% +0.08% +0.00% / +0.10% -0.10% -0.06%] index_select strided 64 : Elapsed 4.257 ms (85.145 ms / 20) 84.950 -> 85.042 ( +0.11%) [ +0.17% +0.00% +0.04% / +0.11% +0.35% +0.40%] index_select random : Elapsed 4.255 ms (85.097 ms / 20) 84.973 -> 85.043 ( +0.08%) [ +0.15% +0.03% +0.00% / +0.08% +0.40% +0.40%] index_select random_sorted : Elapsed 4.255 ms (85.097 ms / 20) 85.164 -> 85.121 ( -0.05%) [ +0.04% +0.09% +0.00% / +0.05% -0.05% -0.05%] index_select perm : Elapsed 4.260 ms (85.198 ms / 20) 85.056 -> 85.101 ( +0.05%) [ +0.12% +0.08% +0.00% / +0.05% +0.22% +0.21%] index_select perm_sorted : Elapsed 4.258 ms (85.156 ms / 20) B = [50, 40, 20, 256] (stride (1, 50, 512000, 2000)) A = [50, 40, 100, 256] (stride (10240, 1, 512000, 40)) dim = 2 244.256 -> 244.688 ( +0.18%) [ +0.83% +0.28% +0.00% / +0.48% +0.18% +0.35%] index_select const : Elapsed 12.314 ms (246.275 ms / 20) 246.120 -> 246.390 ( +0.11%) [ +0.12% +0.00% +0.03% / +0.36% +0.30% +0.11%] index_select wrap : Elapsed 12.321 ms (246.415 ms / 20) 245.914 -> 245.475 ( -0.18%) [ +0.00% +0.36% +0.48% / -0.18% +0.06% +0.44%] index_select linear : Elapsed 12.296 ms (245.914 ms / 20) 245.610 -> 247.025 ( +0.58%) [ +0.24% +0.00% +0.03% / +0.58% +0.59% +0.74%] index_select reverse : Elapsed 12.310 ms (246.204 ms / 20) 243.565 -> 244.378 ( +0.33%) [ +0.00% +0.29% +0.44% / +0.33% +0.67% +0.77%] index_select skip64 : Elapsed 12.178 ms (243.565 ms / 20) 243.259 -> 242.899 ( -0.15%) [ +0.48% +0.00% +0.11% / -0.15% +0.82% +1.22%] index_select skip256 : Elapsed 12.221 ms (244.421 ms / 20) 244.806 -> 245.468 ( +0.27%) [ +0.68% +0.00% +0.53% / +0.27% +1.06% +0.60%] index_select spread : Elapsed 12.324 ms (246.476 ms / 20) 245.323 -> 246.684 ( +0.55%) [ +0.19% +0.24% +0.00% / +0.55% +0.82% +0.77%] index_select strided 3 : Elapsed 12.290 ms (245.800 ms / 20) 246.353 -> 246.858 ( +0.20%) [ +0.42% +0.00% +0.16% / +0.20% +0.38% +0.41%] index_select strided 5 : Elapsed 12.369 ms (247.380 ms / 20) 245.525 -> 245.757 ( +0.09%) [ +0.00% +0.77% +0.43% / +0.57% +0.09% +0.68%] index_select strided 7 : Elapsed 12.276 ms (245.525 ms / 20) 244.838 -> 245.325 ( +0.20%) [ +0.00% +0.78% +0.71% / +0.66% +0.20% +0.68%] index_select strided 8 : Elapsed 12.242 ms (244.838 ms / 20) 246.656 -> 245.850 ( -0.33%) [ +0.44% +0.00% +0.47% / -0.33% -0.14% -0.01%] index_select strided 16 : Elapsed 12.387 ms (247.736 ms / 20) 245.851 -> 245.733 ( -0.05%) [ +0.03% +0.04% +0.00% / -0.05% +0.63% +0.16%] index_select strided 64 : Elapsed 12.296 ms (245.923 ms / 20) 245.658 -> 246.836 ( +0.48%) [ +0.27% +0.53% +0.00% / +0.48% +1.03% +0.49%] index_select random : Elapsed 12.316 ms (246.323 ms / 20) 244.160 -> 245.307 ( +0.47%) [ +0.75% +0.85% +0.00% / +0.96% +1.41% +0.47%] index_select random_sorted : Elapsed 12.299 ms (245.981 ms / 20) 244.305 -> 245.157 ( +0.35%) [ +0.94% +0.26% +0.00% / +0.35% +0.70% +0.69%] index_select perm : Elapsed 12.331 ms (246.612 ms / 20) 245.673 -> 245.092 ( -0.24%) [ +0.14% +0.29% +0.00% / -0.24% +0.43% -0.15%] index_select perm_sorted : Elapsed 12.301 ms (246.022 ms / 20) out_shape = [50, 40, 100, 20] in_shape = [50, 40, 100, 256] idx_dim = 3 out_shape = [20, 40, 256, 100] in_shape = [50, 40, 256, 100] idx_dim = 0 out_shape = [50, 20, 256, 100] in_shape = [50, 40, 256, 100] idx_dim = 1 out_shape = [50, 40, 20, 100] in_shape = [50, 40, 256, 100] idx_dim = 2 B = [50, 40, 20, 100] (stride (1, 100000, 5000, 50)) A = [50, 40, 256, 100] (stride (10240, 256, 1, 512000)) dim = 2 132.389 -> 132.405 ( +0.01%) [ +0.04% +0.00% +0.08% / +0.11% +0.01% +0.10%] index_select const : Elapsed 6.622 ms (132.448 ms / 20) 132.432 -> 132.421 ( -0.01%) [ +0.00% +0.11% +0.16% / -0.01% +0.17% +0.13%] index_select wrap : Elapsed 6.622 ms (132.432 ms / 20) 132.590 -> 132.504 ( -0.06%) [ +0.01% +0.00% +0.05% / +0.02% +0.08% -0.06%] index_select linear : Elapsed 6.630 ms (132.607 ms / 20) 132.367 -> 132.489 ( +0.09%) [ +0.08% +0.16% +0.00% / +0.09% +0.18% +0.22%] index_select reverse : Elapsed 6.624 ms (132.472 ms / 20) 132.364 -> 132.351 ( -0.01%) [ +0.14% +0.06% +0.00% / -0.01% +0.03% +0.14%] index_select skip64 : Elapsed 6.628 ms (132.552 ms / 20) 132.492 -> 132.400 ( -0.07%) [ +0.00% +0.01% +0.05% / -0.07% +0.04% -0.06%] index_select skip256 : Elapsed 6.625 ms (132.492 ms / 20) 133.135 -> 133.028 ( -0.08%) [ +0.01% +0.00% +0.11% / -0.08% +0.14% +0.19%] index_select spread : Elapsed 6.658 ms (133.153 ms / 20) 132.757 -> 132.610 ( -0.11%) [ +0.03% +0.06% +0.00% / -0.11% +0.10% +0.08%] index_select strided 3 : Elapsed 6.640 ms (132.797 ms / 20) 132.829 -> 132.841 ( +0.01%) [ +0.07% +0.09% +0.00% / +0.01% +0.14% +0.14%] index_select strided 5 : Elapsed 6.646 ms (132.928 ms / 20) 133.046 -> 133.086 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.17% +0.11%] index_select strided 7 : Elapsed 6.655 ms (133.091 ms / 20) 133.131 -> 132.938 ( -0.14%) [ +0.03% +0.00% +0.08% / -0.14% +0.11% +0.13%] index_select strided 8 : Elapsed 6.658 ms (133.167 ms / 20) 133.077 -> 133.081 ( +0.00%) [ +0.17% +0.04% +0.00% / +0.00% +0.15% +0.18%] index_select strided 16 : Elapsed 6.665 ms (133.299 ms / 20) 133.077 -> 133.085 ( +0.01%) [ +0.07% +0.01% +0.00% / +0.01% +0.04% +0.14%] index_select strided 64 : Elapsed 6.659 ms (133.175 ms / 20) 132.916 -> 133.223 ( +0.23%) [ +0.23% +0.00% +0.22% / +0.25% +0.23% +0.36%] index_select strided 100 : Elapsed 6.661 ms (133.219 ms / 20) 132.535 -> 132.546 ( +0.01%) [ +0.07% +0.00% +0.04% / +0.14% +0.01% +0.04%] index_select strided 255 : Elapsed 6.631 ms (132.623 ms / 20) 133.109 -> 133.121 ( +0.01%) [ +0.00% +0.09% +0.00% / +0.01% +0.08% +0.06%] index_select random : Elapsed 6.655 ms (133.109 ms / 20) 133.036 -> 133.066 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.12% +0.08%] index_select random_sorted : Elapsed 6.655 ms (133.103 ms / 20) 133.150 -> 133.039 ( -0.08%) [ +0.03% +0.03% +0.00% / -0.02% -0.08% +0.02%] index_select perm : Elapsed 6.659 ms (133.184 ms / 20) 132.797 -> 132.816 ( +0.01%) [ +0.18% +0.14% +0.00% / +0.07% +0.01% +0.15%] index_select perm_sorted : Elapsed 6.652 ms (133.031 ms / 20) out_shape = [50, 40, 256, 20] in_shape = [50, 40, 256, 100] idx_dim = 3 out_shape = [20, 100, 40, 256] in_shape = [50, 100, 40, 256] idx_dim = 0 out_shape = [50, 20, 40, 256] in_shape = [50, 100, 40, 256] idx_dim = 1 out_shape = [50, 100, 20, 256] in_shape = [50, 100, 40, 256] idx_dim = 2 B = [50, 100, 20, 256] (stride (25600, 1, 1280000, 100)) A = [50, 100, 40, 256] (stride (256, 512000, 12800, 1)) dim = 2 616.704 -> 617.184 ( +0.08%) [ +0.03% +0.00% +0.01% / +0.08% +0.29% +0.29%] index_select const : Elapsed 30.843 ms (616.870 ms / 20) 617.864 -> 617.068 ( -0.13%) [ +0.00% +0.05% +0.05% / -0.13% +0.27% +0.18%] index_select wrap : Elapsed 30.893 ms (617.864 ms / 20) 617.936 -> 618.032 ( +0.02%) [ +0.05% +0.06% +0.00% / +0.08% +0.02% +0.04%] index_select linear : Elapsed 30.912 ms (618.232 ms / 20) 617.242 -> 616.947 ( -0.05%) [ +0.06% +0.25% +0.00% / -0.05% +0.14% +0.05%] index_select reverse : Elapsed 30.882 ms (617.638 ms / 20) 616.685 -> 616.741 ( +0.01%) [ +0.00% +0.02% +0.03% / +0.01% +0.26% +0.37%] index_select skip64 : Elapsed 30.834 ms (616.685 ms / 20) 611.543 -> 612.248 ( +0.12%) [ +0.00% +0.08% +0.09% / +0.12% +1.27% +1.26%] index_select skip256 : Elapsed 30.577 ms (611.543 ms / 20) 612.087 -> 612.435 ( +0.06%) [ +0.11% +0.19% +0.00% / +0.06% +1.07% +1.03%] index_select spread : Elapsed 30.639 ms (612.782 ms / 20) 616.899 -> 617.784 ( +0.14%) [ +0.00% +0.10% +0.02% / +0.14% +0.31% +0.37%] index_select strided 3 : Elapsed 30.845 ms (616.899 ms / 20) 617.574 -> 617.445 ( -0.02%) [ +0.13% +0.00% +0.05% / -0.02% +0.16% +0.06%] index_select strided 5 : Elapsed 30.920 ms (618.400 ms / 20) 617.861 -> 614.748 ( -0.50%) [ +0.08% +0.01% +0.00% / +0.09% -0.50% -0.40%] index_select strided 7 : Elapsed 30.918 ms (618.356 ms / 20) 617.308 -> 615.161 ( -0.35%) [ +0.00% +0.15% +0.05% / +0.01% -0.21% -0.35%] index_select strided 8 : Elapsed 30.865 ms (617.308 ms / 20) 617.186 -> 616.881 ( -0.05%) [ +0.01% +0.11% +0.00% / -0.05% +0.15% +0.16%] index_select strided 16 : Elapsed 30.862 ms (617.244 ms / 20) 619.321 -> 618.827 ( -0.08%) [ +0.00% +0.02% +0.05% / -0.08% -0.03% -0.00%] index_select random : Elapsed 30.966 ms (619.321 ms / 20) 619.478 -> 617.894 ( -0.26%) [ +0.00% +0.04% +0.07% / -0.06% -0.26% -0.21%] index_select random_sorted : Elapsed 30.974 ms (619.478 ms / 20) 617.814 -> 617.984 ( +0.03%) [ +0.18% +0.00% +0.06% / +0.22% +0.13% +0.03%] index_select perm : Elapsed 30.945 ms (618.908 ms / 20) 615.067 -> 614.930 ( -0.02%) [ +0.00% +0.06% +0.12% / -0.02% +0.79% +0.70%] index_select perm_sorted : Elapsed 30.753 ms (615.067 ms / 20) out_shape = [50, 100, 40, 20] in_shape = [50, 100, 40, 256] idx_dim = 3 B = [50, 100, 40, 20] (stride (1, 40000, 50, 2000)) A = [50, 100, 40, 256] (stride (1024000, 10240, 256, 1)) dim = 3 107.273 -> 107.423 ( +0.14%) [ +0.73% +0.00% +1.11% / +0.14% +0.22% +0.23%] index_select const : Elapsed 5.403 ms (108.051 ms / 20) 107.755 -> 107.068 ( -0.64%) [ +0.00% +0.02% +0.05% / +0.35% -0.64% -0.25%] index_select wrap : Elapsed 5.388 ms (107.755 ms / 20) 107.219 -> 107.240 ( +0.02%) [ +1.15% +0.00% +0.57% / +0.53% +0.03% +0.02%] index_select linear : Elapsed 5.423 ms (108.454 ms / 20) 107.779 -> 107.444 ( -0.31%) [ +0.42% +0.00% +0.28% / -0.08% -0.03% -0.31%] index_select reverse : Elapsed 5.412 ms (108.234 ms / 20) 107.784 -> 107.355 ( -0.40%) [ +0.19% +0.20% +0.00% / +0.20% -0.34% -0.40%] index_select skip64 : Elapsed 5.399 ms (107.986 ms / 20) 107.456 -> 107.497 ( +0.04%) [ +0.16% +0.57% +0.00% / +0.04% +0.11% +0.17%] index_select skip256 : Elapsed 5.382 ms (107.631 ms / 20) 108.155 -> 107.624 ( -0.49%) [ +0.09% +0.00% +0.42% / -0.49% -0.10% +0.04%] index_select spread : Elapsed 5.413 ms (108.256 ms / 20) 107.573 -> 107.576 ( +0.00%) [ +0.00% +0.56% +0.90% / +0.57% +0.00% +0.35%] index_select strided 3 : Elapsed 5.379 ms (107.573 ms / 20) 107.248 -> 107.603 ( +0.33%) [ +0.15% +0.00% +0.25% / +1.14% +0.40% +0.33%] index_select strided 5 : Elapsed 5.370 ms (107.404 ms / 20) 108.253 -> 107.266 ( -0.91%) [ +0.00% +0.46% +0.24% / -0.34% -0.91% -0.25%] index_select strided 7 : Elapsed 5.413 ms (108.253 ms / 20) 108.190 -> 107.404 ( -0.73%) [ +0.23% +0.00% +0.25% / +0.31% -0.73% -0.51%] index_select strided 8 : Elapsed 5.422 ms (108.434 ms / 20) 108.358 -> 107.836 ( -0.48%) [ +0.00% +0.35% +0.45% / -0.43% -0.16% -0.48%] index_select strided 16 : Elapsed 5.418 ms (108.358 ms / 20) 107.809 -> 107.254 ( -0.51%) [ +0.11% +0.00% +0.27% / +0.64% -0.51% -0.29%] index_select strided 64 : Elapsed 5.396 ms (107.927 ms / 20) 108.100 -> 108.006 ( -0.09%) [ +0.47% +0.00% +0.53% / +0.16% -0.09% -0.04%] index_select strided 100 : Elapsed 5.431 ms (108.613 ms / 20) 107.804 -> 107.527 ( -0.26%) [ +0.00% +0.22% +0.03% / +0.13% -0.26% -0.24%] index_select strided 255 : Elapsed 5.390 ms (107.804 ms / 20) 108.394 -> 107.886 ( -0.47%) [ +0.04% +0.09% +0.00% / -0.19% -0.47% -0.17%] index_select random : Elapsed 5.422 ms (108.434 ms / 20) 107.910 -> 107.427 ( -0.45%) [ +0.46% +0.81% +0.00% / +0.25% -0.45% -0.18%] index_select random_sorted : Elapsed 5.420 ms (108.402 ms / 20) 108.317 -> 107.340 ( -0.90%) [ +0.33% +0.21% +0.00% / -0.19% -0.25% -0.90%] index_select perm : Elapsed 5.433 ms (108.670 ms / 20) 108.464 -> 107.959 ( -0.47%) [ +0.19% +0.05% +0.00% / +0.35% -0.38% -0.47%] index_select perm_sorted : Elapsed 5.434 ms (108.672 ms / 20) out_shape = [20, 100, 256, 40] in_shape = [50, 100, 256, 40] idx_dim = 0 out_shape = [50, 20, 256, 40] in_shape = [50, 100, 256, 40] idx_dim = 1 B = [50, 20, 256, 40] (stride (1, 512000, 50, 12800)) A = [50, 100, 256, 40] (stride (25600, 256, 1, 1280000)) dim = 1 good 333.988 -> 303.920 ( -9.00%) [ +0.10% +0.08% +0.00% / +0.06% -9.00% -8.96%] index_select const : Elapsed 16.716 ms (334.312 ms / 20) 315.335 -> 302.311 ( -4.13%) [ +0.08% +0.07% +0.00% / +0.08% -4.05% -4.13%] index_select wrap : Elapsed 15.779 ms (315.583 ms / 20) 315.365 -> 302.169 ( -4.18%) [ +0.07% +0.11% +0.00% / +0.02% -3.94% -4.18%] index_select linear : Elapsed 15.779 ms (315.573 ms / 20) 311.934 -> 296.780 ( -4.86%) [ +0.00% +0.07% +0.04% / +0.03% -4.86% -4.66%] index_select reverse : Elapsed 15.597 ms (311.934 ms / 20) good 333.660 -> 303.357 ( -9.08%) [ +0.17% +0.03% +0.00% / +0.10% -8.81% -9.08%] index_select skip64 : Elapsed 16.711 ms (334.216 ms / 20) good 333.558 -> 303.652 ( -8.97%) [ +0.17% +0.26% +0.00% / +0.12% -8.80% -8.97%] index_select skip256 : Elapsed 16.707 ms (334.141 ms / 20) 310.250 -> 309.747 ( -0.16%) [ +0.02% +0.10% +0.00% / -0.16% +1.80% +1.72%] index_select spread : Elapsed 15.516 ms (310.323 ms / 20) 316.858 -> 310.216 ( -2.10%) [ +0.12% +0.03% +0.00% / +0.13% -2.10% -2.07%] index_select strided 3 : Elapsed 15.862 ms (317.237 ms / 20) 309.268 -> 310.340 ( +0.35%) [ +0.44% +0.24% +0.00% / +0.35% +1.93% +2.14%] index_select strided 5 : Elapsed 15.531 ms (310.620 ms / 20) 309.911 -> 309.925 ( +0.00%) [ +0.00% +0.03% +0.12% / +0.02% +0.01% +0.00%] index_select strided 7 : Elapsed 15.496 ms (309.911 ms / 20) 313.411 -> 314.059 ( +0.21%) [ +0.29% +0.00% +0.06% / +0.21% +1.85% +1.86%] index_select strided 8 : Elapsed 15.716 ms (314.322 ms / 20) 306.728 -> 304.772 ( -0.64%) [ +0.06% +0.21% +0.00% / +0.29% -0.64% -0.38%] index_select strided 16 : Elapsed 15.346 ms (306.921 ms / 20) 308.574 -> 308.035 ( -0.17%) [ +0.04% +0.21% +0.00% / -0.17% -0.11% -0.07%] index_select strided 64 : Elapsed 15.435 ms (308.702 ms / 20) 311.466 -> 306.515 ( -1.59%) [ +0.04% +0.00% +0.16% / +0.49% -1.45% -1.59%] index_select random : Elapsed 15.579 ms (311.577 ms / 20) 306.743 -> 307.427 ( +0.22%) [ +0.13% +0.05% +0.00% / +0.22% +0.22% +0.27%] index_select random_sorted : Elapsed 15.357 ms (307.148 ms / 20) 318.826 -> 310.112 ( -2.73%) [ +0.14% +0.00% +0.11% / +0.07% -2.52% -2.73%] index_select perm : Elapsed 15.964 ms (319.275 ms / 20) 307.088 -> 304.076 ( -0.98%) [ +0.00% +0.18% +0.43% / +0.10% -0.89% -0.98%] index_select perm_sorted : Elapsed 15.354 ms (307.088 ms / 20) out_shape = [50, 100, 20, 40] in_shape = [50, 100, 256, 40] idx_dim = 2 out_shape = [50, 100, 256, 20] in_shape = [50, 100, 256, 40] idx_dim = 3 out_shape = [20, 256, 40, 100] in_shape = [50, 256, 40, 100] idx_dim = 0 out_shape = [50, 20, 40, 100] in_shape = [50, 256, 40, 100] idx_dim = 1 out_shape = [50, 256, 20, 100] in_shape = [50, 256, 40, 100] idx_dim = 2 out_shape = [50, 256, 40, 20] in_shape = [50, 256, 40, 100] idx_dim = 3 out_shape = [20, 256, 100, 40] in_shape = [50, 256, 100, 40] idx_dim = 0 B = [20, 256, 100, 40] (stride (10240, 40, 204800, 1)) A = [50, 256, 100, 40] (stride (1024000, 1, 10240, 256)) dim = 0 362.309 -> 362.007 ( -0.08%) [ +0.00% +0.18% +0.15% / +0.15% +0.10% -0.08%] index_select const : Elapsed 18.115 ms (362.309 ms / 20) 378.100 -> 377.715 ( -0.10%) [ +0.00% +0.01% +0.06% / +0.04% -0.09% -0.10%] index_select wrap : Elapsed 18.905 ms (378.100 ms / 20) 378.093 -> 377.623 ( -0.12%) [ +0.02% +0.00% +0.05% / -0.01% -0.12% -0.05%] index_select linear : Elapsed 18.908 ms (378.155 ms / 20) 378.125 -> 377.757 ( -0.10%) [ +0.04% +0.00% +0.05% / -0.00% -0.02% -0.10%] index_select reverse : Elapsed 18.913 ms (378.258 ms / 20) 362.402 -> 362.285 ( -0.03%) [ +0.08% +0.11% +0.00% / +0.08% +0.01% -0.03%] index_select skip64 : Elapsed 18.135 ms (362.708 ms / 20) 362.575 -> 362.089 ( -0.13%) [ +0.08% +0.05% +0.00% / -0.05% -0.06% -0.13%] index_select skip256 : Elapsed 18.143 ms (362.853 ms / 20) 378.034 -> 377.531 ( -0.13%) [ +0.02% +0.17% +0.00% / +0.05% -0.13% +0.13%] index_select spread : Elapsed 18.906 ms (378.126 ms / 20) 377.912 -> 377.627 ( -0.08%) [ +0.00% +0.01% +0.00% / +0.02% -0.07% -0.08%] index_select strided 3 : Elapsed 18.896 ms (377.912 ms / 20) 378.248 -> 378.170 ( -0.02%) [ +0.00% +0.05% +0.16% / +0.04% -0.02% -0.00%] index_select strided 5 : Elapsed 18.912 ms (378.248 ms / 20) 377.515 -> 378.036 ( +0.14%) [ +0.00% +0.32% +0.23% / +0.17% +0.33% +0.14%] index_select strided 7 : Elapsed 18.876 ms (377.515 ms / 20) 378.217 -> 377.684 ( -0.14%) [ +0.00% +0.08% +0.11% / -0.03% -0.14% +0.06%] index_select strided 8 : Elapsed 18.911 ms (378.217 ms / 20) 378.700 -> 377.412 ( -0.34%) [ +0.02% +0.00% +0.04% / +0.01% -0.29% -0.34%] index_select strided 16 : Elapsed 18.939 ms (378.775 ms / 20) 377.259 -> 377.829 ( +0.15%) [ +0.13% +0.19% +0.00% / +0.16% +0.15% +0.20%] index_select random : Elapsed 18.887 ms (377.743 ms / 20) 376.194 -> 376.021 ( -0.05%) [ +0.03% +0.06% +0.00% / +0.00% -0.05% +0.05%] index_select random_sorted : Elapsed 18.816 ms (376.317 ms / 20) 378.146 -> 377.557 ( -0.16%) [ +0.14% +0.22% +0.00% / +0.05% -0.16% -0.09%] index_select perm : Elapsed 18.934 ms (378.673 ms / 20) 378.122 -> 377.934 ( -0.05%) [ +0.16% +0.03% +0.00% / +0.02% +0.03% -0.05%] index_select perm_sorted : Elapsed 18.936 ms (378.714 ms / 20) out_shape = [50, 20, 100, 40] in_shape = [50, 256, 100, 40] idx_dim = 1 B = [50, 20, 100, 40] (stride (40, 2000, 40000, 1)) A = [50, 256, 100, 40] (stride (100, 200000, 1, 5000)) dim = 1 57.771 -> 57.572 ( -0.34%) [ +0.00% +0.10% +0.12% / -0.05% -0.26% -0.34%] index_select const : Elapsed 2.889 ms (57.771 ms / 20) 69.367 -> 69.383 ( +0.02%) [ +0.00% +0.34% +0.09% / +0.36% +0.23% +0.02%] index_select wrap : Elapsed 3.468 ms (69.367 ms / 20) 69.610 -> 69.471 ( -0.20%) [ +0.03% +0.00% +0.20% / -0.02% -0.09% -0.20%] index_select linear : Elapsed 3.482 ms (69.630 ms / 20) 69.727 -> 69.762 ( +0.05%) [ +0.11% +0.00% +0.30% / +0.05% +0.51% +0.30%] index_select reverse : Elapsed 3.490 ms (69.804 ms / 20) 57.686 -> 57.575 ( -0.19%) [ +0.33% +0.31% +0.00% / +0.10% -0.18% -0.19%] index_select skip64 : Elapsed 2.894 ms (57.878 ms / 20) 57.782 -> 57.615 ( -0.29%) [ +0.00% +0.01% +0.20% / +0.14% +0.02% -0.29%] index_select skip256 : Elapsed 2.889 ms (57.782 ms / 20) 70.095 -> 69.071 ( -1.46%) [ +0.00% +0.58% +0.25% / +0.24% -1.16% -1.46%] index_select spread : Elapsed 3.505 ms (70.095 ms / 20) 68.824 -> 69.023 ( +0.29%) [ +0.31% +0.00% +0.07% / +0.29% +0.92% +1.17%] index_select strided 3 : Elapsed 3.452 ms (69.036 ms / 20) 70.395 -> 69.439 ( -1.36%) [ +0.00% +0.11% +0.34% / -0.07% -1.36% -1.21%] index_select strided 5 : Elapsed 3.520 ms (70.395 ms / 20) 69.365 -> 69.363 ( -0.00%) [ +0.24% +0.41% +0.00% / -0.00% +0.32% +0.17%] index_select strided 7 : Elapsed 3.477 ms (69.534 ms / 20) 70.878 -> 69.806 ( -1.51%) [ +0.12% +0.00% +0.08% / +0.01% -1.47% -1.51%] index_select strided 8 : Elapsed 3.548 ms (70.966 ms / 20) 71.304 -> 70.055 ( -1.75%) [ +0.00% +0.43% +0.15% / +0.24% -1.75% -1.69%] index_select strided 16 : Elapsed 3.565 ms (71.304 ms / 20) 70.566 -> 69.985 ( -0.82%) [ +0.00% +0.10% +0.02% / +0.02% -0.82% -0.65%] index_select strided 64 : Elapsed 3.528 ms (70.566 ms / 20) 69.208 -> 69.111 ( -0.14%) [ +0.00% +0.09% +0.03% / -0.14% +0.22% +0.39%] index_select strided 100 : Elapsed 3.460 ms (69.208 ms / 20) 69.515 -> 68.723 ( -1.14%) [ +0.30% +0.64% +0.00% / -0.04% -1.14% -0.95%] index_select strided 255 : Elapsed 3.486 ms (69.726 ms / 20) 68.932 -> 68.999 ( +0.10%) [ +0.34% +0.26% +0.00% / +0.10% +1.63% +1.41%] index_select random : Elapsed 3.458 ms (69.168 ms / 20) 67.477 -> 67.888 ( +0.61%) [ +0.39% +0.43% +0.00% / +0.61% +3.16% +2.90%] index_select random_sorted : Elapsed 3.387 ms (67.740 ms / 20) 69.669 -> 69.402 ( -0.38%) [ +0.00% +0.83% +0.04% / +0.50% -0.29% -0.38%] index_select perm : Elapsed 3.483 ms (69.669 ms / 20) 69.949 -> 69.150 ( -1.14%) [ +0.00% +0.46% +0.03% / +0.38% -1.14% -1.05%] index_select perm_sorted : Elapsed 3.497 ms (69.949 ms / 20) out_shape = [50, 256, 20, 40] in_shape = [50, 256, 100, 40] idx_dim = 2 out_shape = [50, 256, 100, 20] in_shape = [50, 256, 100, 40] idx_dim = 3 B = [50, 256, 100, 20] (stride (512000, 1, 256, 25600)) A = [50, 256, 100, 40] (stride (25600, 100, 1, 1280000)) dim = 3 581.872 -> 577.467 ( -0.76%) [ +0.00% +0.02% +0.13% / +0.05% -0.76% -0.61%] index_select const : Elapsed 29.094 ms (581.872 ms / 20) 581.447 -> 579.563 ( -0.32%) [ +0.00% +0.12% +0.06% / -0.04% -0.22% -0.32%] index_select wrap : Elapsed 29.072 ms (581.447 ms / 20) 581.380 -> 579.021 ( -0.41%) [ +0.04% +0.04% +0.00% / +0.14% -0.07% -0.41%] index_select linear : Elapsed 29.079 ms (581.585 ms / 20) 580.142 -> 579.839 ( -0.05%) [ +0.00% +0.13% +0.07% / -0.03% -0.05% -0.05%] index_select reverse : Elapsed 29.007 ms (580.142 ms / 20) 580.650 -> 579.186 ( -0.25%) [ +0.07% +0.01% +0.00% / +0.07% -0.23% -0.25%] index_select skip64 : Elapsed 29.053 ms (581.058 ms / 20) 574.913 -> 575.836 ( +0.16%) [ +0.11% +0.00% +0.10% / +0.16% +0.73% +0.83%] index_select skip256 : Elapsed 28.777 ms (575.541 ms / 20) 575.635 -> 574.136 ( -0.26%) [ +0.00% +0.03% +0.03% / -0.26% +0.83% +0.93%] index_select spread : Elapsed 28.782 ms (575.635 ms / 20) 580.851 -> 579.261 ( -0.27%) [ +0.15% +0.13% +0.00% / +0.09% -0.13% -0.27%] index_select strided 3 : Elapsed 29.085 ms (581.698 ms / 20) 580.969 -> 579.607 ( -0.23%) [ +0.00% +0.17% +0.16% / +0.11% -0.23% -0.11%] index_select strided 5 : Elapsed 29.048 ms (580.969 ms / 20) 580.199 -> 574.143 ( -1.04%) [ +0.09% +0.18% +0.00% / +0.22% -1.04% -0.83%] index_select strided 7 : Elapsed 29.037 ms (580.732 ms / 20) 581.529 -> 574.215 ( -1.26%) [ +0.05% +0.00% +0.06% / +0.05% -1.26% -1.05%] index_select strided 8 : Elapsed 29.092 ms (581.845 ms / 20) 581.024 -> 578.787 ( -0.39%) [ +0.00% +0.27% +0.09% / +0.07% -0.12% -0.39%] index_select strided 16 : Elapsed 29.051 ms (581.024 ms / 20) 581.338 -> 578.838 ( -0.43%) [ +0.10% +0.00% +0.06% / +0.04% -0.35% -0.43%] index_select random : Elapsed 29.097 ms (581.938 ms / 20) 581.364 -> 579.692 ( -0.29%) [ +0.10% +0.07% +0.00% / -0.09% -0.20% -0.29%] index_select random_sorted : Elapsed 29.096 ms (581.926 ms / 20) 580.485 -> 580.153 ( -0.06%) [ +0.00% +0.20% +0.09% / +0.23% -0.06% +0.07%] index_select perm : Elapsed 29.024 ms (580.485 ms / 20) 574.213 -> 576.227 ( +0.35%) [ +0.00% +0.30% +0.27% / +0.35% +1.17% +1.11%] index_select perm_sorted : Elapsed 28.711 ms (574.213 ms / 20) out_shape = [20, 40, 50, 256] in_shape = [100, 40, 50, 256] idx_dim = 0 out_shape = [100, 20, 50, 256] in_shape = [100, 40, 50, 256] idx_dim = 1 out_shape = [100, 40, 20, 256] in_shape = [100, 40, 50, 256] idx_dim = 2 out_shape = [100, 40, 50, 20] in_shape = [100, 40, 50, 256] idx_dim = 3 out_shape = [20, 40, 256, 50] in_shape = [100, 40, 256, 50] idx_dim = 0 out_shape = [100, 20, 256, 50] in_shape = [100, 40, 256, 50] idx_dim = 1 B = [100, 20, 256, 50] (stride (12800, 1280000, 50, 1)) A = [100, 40, 256, 50] (stride (10240, 256, 1, 1024000)) dim = 1 555.283 -> 553.643 ( -0.30%) [ +0.26% +0.31% +0.00% / +0.19% -0.24% -0.30%] index_select const : Elapsed 27.836 ms (556.729 ms / 20) 567.069 -> 567.228 ( +0.03%) [ +0.09% +0.08% +0.00% / +0.03% +0.23% +0.24%] index_select wrap : Elapsed 28.378 ms (567.561 ms / 20) 567.267 -> 567.384 ( +0.02%) [ +0.00% +0.13% +0.01% / +0.22% +0.08% +0.02%] index_select linear : Elapsed 28.363 ms (567.267 ms / 20) 566.028 -> 565.899 ( -0.02%) [ +0.14% +0.07% +0.00% / -0.02% +0.46% +0.64%] index_select reverse : Elapsed 28.341 ms (566.817 ms / 20) 555.177 -> 552.621 ( -0.46%) [ +0.22% +0.17% +0.00% / +0.28% -0.46% -0.35%] index_select skip64 : Elapsed 27.819 ms (556.390 ms / 20) 555.574 -> 554.150 ( -0.26%) [ +0.12% +0.07% +0.00% / +0.16% -0.26% -0.21%] index_select skip256 : Elapsed 27.811 ms (556.216 ms / 20) 566.973 -> 567.690 ( +0.13%) [ +0.30% +0.00% +0.19% / +0.13% +0.32% +0.31%] index_select spread : Elapsed 28.435 ms (568.696 ms / 20) 566.572 -> 567.060 ( +0.09%) [ +0.13% +0.02% +0.00% / +0.09% +0.33% +0.43%] index_select strided 3 : Elapsed 28.366 ms (567.317 ms / 20) 564.644 -> 564.179 ( -0.08%) [ +0.07% +0.08% +0.00% / -0.08% +0.89% +0.97%] index_select strided 5 : Elapsed 28.253 ms (565.061 ms / 20) 568.621 -> 565.359 ( -0.57%) [ +0.30% +0.00% +0.10% / -0.08% -0.26% -0.57%] index_select strided 7 : Elapsed 28.517 ms (570.333 ms / 20) 566.772 -> 565.311 ( -0.26%) [ +0.00% +0.17% +0.25% / +0.15% -0.19% -0.26%] index_select strided 8 : Elapsed 28.339 ms (566.772 ms / 20) 563.436 -> 561.593 ( -0.33%) [ +0.03% +0.19% +0.00% / +0.06% -0.33% -0.14%] index_select strided 16 : Elapsed 28.180 ms (563.596 ms / 20) 566.748 -> 567.540 ( +0.14%) [ +0.00% +0.14% +0.17% / +0.14% +0.72% +0.79%] index_select random : Elapsed 28.337 ms (566.748 ms / 20) 567.759 -> 567.249 ( -0.09%) [ +0.00% +0.02% +0.09% / +0.08% +0.02% -0.09%] index_select random_sorted : Elapsed 28.388 ms (567.759 ms / 20) 569.378 -> 566.969 ( -0.42%) [ +0.21% +0.18% +0.00% / +0.14% -0.26% -0.42%] index_select perm : Elapsed 28.529 ms (570.573 ms / 20) 569.666 -> 567.475 ( -0.38%) [ +0.00% +0.07% +0.14% / +0.05% -0.16% -0.38%] index_select perm_sorted : Elapsed 28.483 ms (569.666 ms / 20) out_shape = [100, 40, 20, 50] in_shape = [100, 40, 256, 50] idx_dim = 2 B = [100, 40, 20, 50] (stride (40000, 20, 1, 800)) A = [100, 40, 256, 50] (stride (50, 1280000, 5000, 1)) dim = 2 111.898 -> 112.036 ( +0.12%) [ +0.02% +0.00% +0.12% / +0.14% +0.12% +0.18%] index_select const : Elapsed 5.596 ms (111.924 ms / 20) 112.075 -> 111.979 ( -0.09%) [ +0.03% +0.03% +0.00% / -0.09% +0.25% +0.07%] index_select wrap : Elapsed 5.605 ms (112.106 ms / 20) 111.878 -> 112.117 ( +0.21%) [ +0.17% +0.00% +0.27% / +0.21% +0.24% +0.24%] index_select linear : Elapsed 5.603 ms (112.066 ms / 20) 112.106 -> 112.036 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.04% -0.06% -0.06%] index_select reverse : Elapsed 5.606 ms (112.111 ms / 20) 111.925 -> 111.880 ( -0.04%) [ +0.20% +0.08% +0.00% / +0.20% -0.04% +0.03%] index_select skip64 : Elapsed 5.608 ms (112.153 ms / 20) 111.710 -> 111.764 ( +0.05%) [ +0.12% +0.16% +0.00% / +0.05% +0.26% +0.16%] index_select skip256 : Elapsed 5.592 ms (111.846 ms / 20) 111.939 -> 111.933 ( -0.01%) [ +0.12% +0.04% +0.00% / +0.02% +0.24% -0.01%] index_select spread : Elapsed 5.604 ms (112.078 ms / 20) 112.061 -> 112.168 ( +0.10%) [ +0.11% +0.07% +0.00% / +0.16% +0.22% +0.10%] index_select strided 3 : Elapsed 5.609 ms (112.184 ms / 20) 111.946 -> 111.815 ( -0.12%) [ +0.01% +0.00% +0.14% / -0.12% +0.23% +0.22%] index_select strided 5 : Elapsed 5.598 ms (111.958 ms / 20) 112.026 -> 111.821 ( -0.18%) [ +0.00% +0.07% +0.19% / +0.10% -0.18% -0.07%] index_select strided 7 : Elapsed 5.601 ms (112.026 ms / 20) 112.140 -> 111.835 ( -0.27%) [ +0.07% +0.10% +0.00% / +0.03% -0.20% -0.27%] index_select strided 8 : Elapsed 5.611 ms (112.223 ms / 20) 112.033 -> 111.992 ( -0.04%) [ +0.03% +0.00% +0.12% / +0.10% +0.06% -0.04%] index_select strided 16 : Elapsed 5.604 ms (112.072 ms / 20) 112.108 -> 111.894 ( -0.19%) [ +0.08% +0.10% +0.00% / -0.06% -0.10% -0.19%] index_select strided 64 : Elapsed 5.610 ms (112.197 ms / 20) 112.116 -> 112.089 ( -0.02%) [ +0.00% +0.03% +0.07% / -0.02% -0.02% +0.10%] index_select strided 100 : Elapsed 5.606 ms (112.116 ms / 20) 111.980 -> 112.034 ( +0.05%) [ +0.14% +0.05% +0.00% / +0.05% +0.17% +0.15%] index_select strided 255 : Elapsed 5.607 ms (112.136 ms / 20) 111.824 -> 111.857 ( +0.03%) [ +0.00% +0.09% +0.09% / +0.03% +0.25% +0.33%] index_select random : Elapsed 5.591 ms (111.824 ms / 20) 111.944 -> 111.887 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.11% +0.18%] index_select random_sorted : Elapsed 5.600 ms (112.005 ms / 20) 112.078 -> 112.072 ( -0.01%) [ +0.00% +0.13% +0.07% / +0.23% -0.01% +0.13%] index_select perm : Elapsed 5.604 ms (112.078 ms / 20) 112.068 -> 112.061 ( -0.01%) [ +0.00% +0.03% +0.08% / -0.01% +0.14% +0.08%] index_select perm_sorted : Elapsed 5.603 ms (112.068 ms / 20) out_shape = [100, 40, 256, 20] in_shape = [100, 40, 256, 50] idx_dim = 3 out_shape = [20, 50, 40, 256] in_shape = [100, 50, 40, 256] idx_dim = 0 out_shape = [100, 20, 40, 256] in_shape = [100, 50, 40, 256] idx_dim = 1 B = [100, 20, 40, 256] (stride (1, 1024000, 25600, 100)) A = [100, 50, 40, 256] (stride (1, 4000, 100, 200000)) dim = 1 650.266 -> 646.861 ( -0.52%) [ +0.77% +0.81% +0.00% / +0.37% -0.52% -0.37%] index_select const : Elapsed 32.764 ms (655.277 ms / 20) 653.530 -> 655.723 ( +0.34%) [ +0.00% +0.48% +0.52% / +0.60% +0.37% +0.34%] index_select wrap : Elapsed 32.676 ms (653.530 ms / 20) 655.718 -> 655.470 ( -0.04%) [ +0.00% +0.18% +0.11% / +0.29% +0.06% -0.04%] index_select linear : Elapsed 32.786 ms (655.718 ms / 20) 657.830 -> 655.938 ( -0.29%) [ +0.24% +0.01% +0.00% / +0.07% -0.24% -0.29%] index_select reverse : Elapsed 32.969 ms (659.381 ms / 20) 652.332 -> 647.618 ( -0.72%) [ +0.01% +0.00% +0.01% / +0.17% -0.61% -0.72%] index_select skip64 : Elapsed 32.619 ms (652.372 ms / 20) 649.876 -> 647.855 ( -0.31%) [ +0.65% +0.39% +0.00% / +0.81% -0.29% -0.31%] index_select skip256 : Elapsed 32.706 ms (654.111 ms / 20) 656.486 -> 657.289 ( +0.12%) [ +0.42% +0.10% +0.00% / +0.14% +0.12% +0.45%] index_select spread : Elapsed 32.962 ms (659.247 ms / 20) 659.027 -> 658.350 ( -0.10%) [ +0.00% +0.30% +0.03% / -0.06% -0.10% -0.04%] index_select strided 3 : Elapsed 32.951 ms (659.027 ms / 20) 654.914 -> 652.028 ( -0.44%) [ +0.15% +0.00% +0.00% / -0.12% -0.33% -0.44%] index_select strided 5 : Elapsed 32.795 ms (655.908 ms / 20) 658.829 -> 657.362 ( -0.22%) [ +0.19% +0.00% +0.00% / +0.19% -0.22% -0.20%] index_select strided 7 : Elapsed 33.002 ms (660.050 ms / 20) 658.164 -> 656.901 ( -0.19%) [ +0.21% +0.25% +0.00% / +0.23% +0.00% -0.19%] index_select strided 8 : Elapsed 32.978 ms (659.558 ms / 20) 657.328 -> 656.662 ( -0.10%) [ +0.16% +0.00% +0.20% / -0.10% +0.40% +0.30%] index_select strided 16 : Elapsed 32.920 ms (658.406 ms / 20) 657.674 -> 657.955 ( +0.04%) [ +0.39% +0.01% +0.00% / +0.04% +0.44% +0.66%] index_select random : Elapsed 33.011 ms (660.226 ms / 20) 654.286 -> 654.319 ( +0.01%) [ +0.00% +0.24% +0.34% / +0.01% +0.49% +0.74%] index_select random_sorted : Elapsed 32.714 ms (654.286 ms / 20) 658.713 -> 658.243 ( -0.07%) [ +0.00% +0.37% +0.15% / -0.07% +0.29% +0.27%] index_select perm : Elapsed 32.936 ms (658.713 ms / 20) 657.759 -> 657.251 ( -0.08%) [ +0.21% +0.00% +0.04% / +0.29% -0.02% -0.08%] index_select perm_sorted : Elapsed 32.958 ms (659.156 ms / 20) out_shape = [100, 50, 20, 256] in_shape = [100, 50, 40, 256] idx_dim = 2 out_shape = [100, 50, 40, 20] in_shape = [100, 50, 40, 256] idx_dim = 3 out_shape = [20, 50, 256, 40] in_shape = [100, 50, 256, 40] idx_dim = 0 out_shape = [100, 20, 256, 40] in_shape = [100, 50, 256, 40] idx_dim = 1 out_shape = [100, 50, 20, 40] in_shape = [100, 50, 256, 40] idx_dim = 2 B = [100, 50, 20, 40] (stride (1, 100, 200000, 5000)) A = [100, 50, 256, 40] (stride (256, 25600, 1, 1280000)) dim = 2 137.220 -> 137.340 ( +0.09%) [ +0.02% +0.20% +0.00% / +0.20% +0.31% +0.09%] index_select const : Elapsed 6.862 ms (137.244 ms / 20) 137.465 -> 137.484 ( +0.01%) [ +0.17% +0.08% +0.00% / +0.02% +0.09% +0.01%] index_select wrap : Elapsed 6.885 ms (137.697 ms / 20) 137.519 -> 137.413 ( -0.08%) [ +0.03% +0.00% +0.04% / +0.20% +0.01% -0.08%] index_select linear : Elapsed 6.878 ms (137.561 ms / 20) 137.391 -> 137.532 ( +0.10%) [ +0.17% +0.06% +0.00% / +0.10% +0.16% +0.12%] index_select reverse : Elapsed 6.881 ms (137.628 ms / 20) 137.299 -> 137.402 ( +0.08%) [ +0.00% +0.09% +0.02% / +0.08% +0.08% +0.17%] index_select skip64 : Elapsed 6.865 ms (137.299 ms / 20) 137.317 -> 137.377 ( +0.04%) [ +0.18% +0.00% +0.12% / +0.04% +0.07% +0.11%] index_select skip256 : Elapsed 6.878 ms (137.563 ms / 20) 137.924 -> 138.191 ( +0.19%) [ +0.16% +0.20% +0.00% / +0.20% +0.19% +0.21%] index_select spread : Elapsed 6.907 ms (138.141 ms / 20) 137.671 -> 137.628 ( -0.03%) [ +0.10% +0.00% +0.04% / +0.05% -0.03% +0.02%] index_select strided 3 : Elapsed 6.891 ms (137.814 ms / 20) 137.872 -> 137.788 ( -0.06%) [ +0.00% +0.08% +0.04% / +0.09% -0.02% -0.06%] index_select strided 5 : Elapsed 6.894 ms (137.872 ms / 20) 137.989 -> 138.054 ( +0.05%) [ +0.07% +0.06% +0.00% / +0.05% +0.08% +0.07%] index_select strided 7 : Elapsed 6.904 ms (138.083 ms / 20) 138.045 -> 138.058 ( +0.01%) [ +0.00% +0.14% +0.16% / +0.01% +0.04% +0.06%] index_select strided 8 : Elapsed 6.902 ms (138.045 ms / 20) 138.041 -> 138.068 ( +0.02%) [ +0.29% +0.04% +0.00% / +0.12% +0.03% +0.02%] index_select strided 16 : Elapsed 6.922 ms (138.444 ms / 20) 137.941 -> 138.036 ( +0.07%) [ +0.18% +0.00% +0.02% / +0.07% +0.07% +0.10%] index_select strided 64 : Elapsed 6.909 ms (138.183 ms / 20) 137.987 -> 138.030 ( +0.03%) [ +0.00% +0.01% +0.17% / +0.22% +0.03% +0.11%] index_select strided 100 : Elapsed 6.899 ms (137.987 ms / 20) 137.465 -> 137.359 ( -0.08%) [ +0.17% +0.05% +0.00% / +0.06% -0.08% +0.07%] index_select strided 255 : Elapsed 6.885 ms (137.694 ms / 20) 137.979 -> 138.019 ( +0.03%) [ +0.15% +0.21% +0.00% / +0.03% +0.03% +0.12%] index_select random : Elapsed 6.909 ms (138.188 ms / 20) 137.870 -> 138.021 ( +0.11%) [ +0.28% +0.05% +0.00% / +0.14% +0.11% +0.12%] index_select random_sorted : Elapsed 6.913 ms (138.256 ms / 20) 138.054 -> 137.909 ( -0.11%) [ +0.00% +0.12% +0.04% / -0.05% -0.11% +0.07%] index_select perm : Elapsed 6.903 ms (138.054 ms / 20) 137.931 -> 138.012 ( +0.06%) [ +0.00% +0.10% +0.04% / +0.06% +0.06% +0.14%] index_select perm_sorted : Elapsed 6.897 ms (137.931 ms / 20) out_shape = [100, 50, 256, 20] in_shape = [100, 50, 256, 40] idx_dim = 3 B = [100, 50, 256, 20] (stride (1, 25600, 100, 1280000)) A = [100, 50, 256, 40] (stride (512000, 256, 1, 12800)) dim = 3 374.510 -> 372.382 ( -0.57%) [ +0.01% +0.00% +0.49% / -0.09% -0.07% -0.57%] index_select const : Elapsed 18.727 ms (374.547 ms / 20) 373.863 -> 375.266 ( +0.38%) [ +0.00% +0.19% +1.09% / +0.97% +0.38% +0.51%] index_select wrap : Elapsed 18.693 ms (373.863 ms / 20) 378.331 -> 378.456 ( +0.03%) [ +0.00% +0.90% +0.36% / +0.03% +0.75% +0.36%] index_select linear : Elapsed 18.917 ms (378.331 ms / 20) 373.487 -> 372.340 ( -0.31%) [ +0.48% +0.28% +0.00% / -0.31% +0.36% +0.66%] index_select reverse : Elapsed 18.765 ms (375.294 ms / 20) 370.914 -> 371.713 ( +0.22%) [ +0.44% +0.00% +0.44% / +0.22% +1.10% +0.55%] index_select skip64 : Elapsed 18.627 ms (372.530 ms / 20) 363.778 -> 365.183 ( +0.39%) [ +0.03% +0.41% +0.00% / +0.39% +3.06% +2.63%] index_select skip256 : Elapsed 18.195 ms (363.892 ms / 20) 364.542 -> 365.910 ( +0.38%) [ +0.30% +0.36% +0.00% / +0.38% +3.29% +2.76%] index_select spread : Elapsed 18.281 ms (365.620 ms / 20) 377.489 -> 377.106 ( -0.10%) [ +0.00% +0.02% +0.27% / -0.10% +0.16% +0.58%] index_select strided 3 : Elapsed 18.874 ms (377.489 ms / 20) 375.558 -> 373.075 ( -0.66%) [ +0.06% +0.00% +0.01% / -0.66% +0.54% +0.08%] index_select strided 5 : Elapsed 18.790 ms (375.800 ms / 20) 373.980 -> 369.967 ( -1.07%) [ +0.63% +0.00% +0.71% / -0.24% -0.03% -1.07%] index_select strided 7 : Elapsed 18.818 ms (376.353 ms / 20) 370.089 -> 369.604 ( -0.13%) [ +0.62% +0.00% +1.01% / +0.55% -0.01% -0.13%] index_select strided 8 : Elapsed 18.619 ms (372.371 ms / 20) 374.417 -> 373.738 ( -0.18%) [ +0.00% +0.24% +0.20% / +0.34% -0.18% +0.75%] index_select strided 16 : Elapsed 18.721 ms (374.417 ms / 20) 377.989 -> 378.396 ( +0.11%) [ +0.00% +0.35% +0.45% / +0.11% +0.30% +0.28%] index_select random : Elapsed 18.899 ms (377.989 ms / 20) 374.855 -> 375.483 ( +0.17%) [ +0.45% +0.00% +0.01% / +0.17% +0.28% +0.27%] index_select random_sorted : Elapsed 18.828 ms (376.558 ms / 20) 372.346 -> 373.059 ( +0.19%) [ +0.92% +0.00% +0.44% / +0.35% +0.19% +0.94%] index_select perm : Elapsed 18.789 ms (375.773 ms / 20) 373.427 -> 374.408 ( +0.26%) [ +0.01% +0.28% +0.00% / +0.26% +1.03% +0.79%] index_select perm_sorted : Elapsed 18.674 ms (373.470 ms / 20) out_shape = [20, 256, 40, 50] in_shape = [100, 256, 40, 50] idx_dim = 0 B = [20, 256, 40, 50] (stride (512000, 50, 12800, 1)) A = [100, 256, 40, 50] (stride (512000, 2000, 50, 1)) dim = 0 65.335 -> 65.408 ( +0.11%) [ +0.22% +0.00% +0.14% / +0.19% +0.11% +0.16%] index_select const : Elapsed 3.274 ms (65.477 ms / 20) 65.564 -> 65.566 ( +0.00%) [ +0.16% +0.07% +0.00% / +0.11% +0.00% +0.11%] index_select wrap : Elapsed 3.284 ms (65.670 ms / 20) 65.557 -> 65.618 ( +0.09%) [ +0.16% +0.08% +0.00% / +0.30% +0.09% +0.15%] index_select linear : Elapsed 3.283 ms (65.660 ms / 20) 65.536 -> 65.577 ( +0.06%) [ +0.10% +0.05% +0.00% / +0.06% +0.10% +0.13%] index_select reverse : Elapsed 3.280 ms (65.601 ms / 20) 65.375 -> 65.404 ( +0.04%) [ +0.15% +0.05% +0.00% / +0.19% +0.15% +0.04%] index_select skip64 : Elapsed 3.274 ms (65.475 ms / 20) 65.370 -> 65.429 ( +0.09%) [ +0.15% +0.15% +0.00% / +0.20% +0.17% +0.09%] index_select skip256 : Elapsed 3.273 ms (65.467 ms / 20) 65.543 -> 65.663 ( +0.18%) [ +0.24% +0.02% +0.00% / +0.31% +0.18% +0.21%] index_select spread : Elapsed 3.285 ms (65.702 ms / 20) 65.614 -> 65.570 ( -0.07%) [ +0.09% +0.07% +0.00% / +0.02% -0.07% -0.01%] index_select strided 3 : Elapsed 3.284 ms (65.674 ms / 20) 65.491 -> 65.659 ( +0.26%) [ +0.34% +0.15% +0.00% / +0.26% +0.31% +0.31%] index_select strided 5 : Elapsed 3.286 ms (65.716 ms / 20) 65.541 -> 65.623 ( +0.13%) [ +0.13% +0.00% +0.09% / +0.13% +0.15% +0.15%] index_select strided 7 : Elapsed 3.281 ms (65.624 ms / 20) 65.556 -> 65.567 ( +0.02%) [ +0.10% +0.09% +0.00% / +0.02% +0.18% +0.25%] index_select strided 8 : Elapsed 3.281 ms (65.619 ms / 20) 65.528 -> 65.561 ( +0.05%) [ +0.13% +0.00% +0.07% / +0.13% +0.05% +0.25%] index_select strided 16 : Elapsed 3.281 ms (65.613 ms / 20) 65.495 -> 65.583 ( +0.13%) [ +0.23% +0.07% +0.00% / +0.13% +0.33% +0.31%] index_select strided 64 : Elapsed 3.282 ms (65.643 ms / 20) 65.555 -> 65.570 ( +0.02%) [ +0.06% +0.09% +0.00% / +0.02% +0.13% +0.11%] index_select random : Elapsed 3.280 ms (65.597 ms / 20) 65.566 -> 65.628 ( +0.09%) [ +0.02% +0.06% +0.00% / +0.11% +0.10% +0.09%] index_select random_sorted : Elapsed 3.279 ms (65.580 ms / 20) 65.631 -> 65.668 ( +0.06%) [ +0.11% +0.04% +0.00% / +0.14% +0.11% +0.06%] index_select perm : Elapsed 3.285 ms (65.702 ms / 20) 65.613 -> 65.551 ( -0.09%) [ +0.10% +0.07% +0.00% / +0.10% +0.18% -0.09%] index_select perm_sorted : Elapsed 3.284 ms (65.679 ms / 20) out_shape = [100, 20, 40, 50] in_shape = [100, 256, 40, 50] idx_dim = 1 out_shape = [100, 256, 20, 50] in_shape = [100, 256, 40, 50] idx_dim = 2 out_shape = [100, 256, 40, 20] in_shape = [100, 256, 40, 50] idx_dim = 3 out_shape = [20, 256, 50, 40] in_shape = [100, 256, 50, 40] idx_dim = 0 B = [20, 256, 50, 40] (stride (12800, 1, 256, 256000)) dim = 0 fill_cnt = 100 447.230 -> 447.259 ( +0.01%) [ +0.00% +0.17% +0.22% / +0.01% +0.62% +0.81%] index_fill_ const : Elapsed 22.362 ms (447.230 ms / 20) 480.139 -> 480.725 ( +0.12%) [ +0.00% +0.13% +0.21% / +0.12% +4.39% +4.60%] index_fill_ linear : Elapsed 24.007 ms (480.139 ms / 20) 532.148 -> 532.501 ( +0.07%) [ +0.27% +0.00% +0.27% / +0.07% +6.03% +6.08%] index_fill_ reverse : Elapsed 26.678 ms (533.567 ms / 20) 447.700 -> 447.720 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +1.18% +1.22%] index_fill_ skip64 : Elapsed 22.417 ms (448.339 ms / 20) 450.599 -> 451.630 ( +0.23%) [ +0.12% +0.07% +0.00% / +0.23% +0.32% +0.32%] index_fill_ skip256 : Elapsed 22.558 ms (451.157 ms / 20) 645.664 -> 626.299 ( -3.00%) [ +0.41% +0.51% +0.00% / +0.15% -2.17% -3.00%] index_fill_ spread : Elapsed 32.416 ms (648.322 ms / 20) 824.341 -> 813.875 ( -1.27%) [ +0.02% +0.00% +0.02% / +0.01% -1.27% -1.12%] index_fill_ strided 3 : Elapsed 41.225 ms (824.492 ms / 20) 753.155 -> 755.200 ( +0.27%) [ +0.00% +0.17% +0.08% / +0.27% +5.32% +5.21%] index_fill_ strided 5 : Elapsed 37.658 ms (753.155 ms / 20) 799.322 -> 798.338 ( -0.12%) [ +0.08% +0.00% +0.16% / -0.12% +2.27% +2.05%] index_fill_ strided 7 : Elapsed 39.997 ms (799.948 ms / 20) 778.621 -> 774.704 ( -0.50%) [ +0.03% +0.00% +0.00% / +0.05% -0.50% -0.43%] index_fill_ strided 8 : Elapsed 38.942 ms (778.836 ms / 20) 777.586 -> 764.632 ( -1.67%) [ +0.05% +0.05% +0.00% / +0.09% -1.57% -1.67%] index_fill_ strided 16 : Elapsed 38.899 ms (777.987 ms / 20) 796.803 -> 792.128 ( -0.59%) [ +0.33% +0.31% +0.00% / +0.16% -0.57% -0.59%] index_fill_ random : Elapsed 39.972 ms (799.434 ms / 20) 656.459 -> 645.332 ( -1.70%) [ +0.06% +0.06% +0.00% / -0.17% -1.70% -1.68%] index_fill_ random_sorted : Elapsed 32.841 ms (656.829 ms / 20) out_shape = [100, 20, 50, 40] in_shape = [100, 256, 50, 40] idx_dim = 1 B = [100, 20, 50, 40] (stride (40000, 40, 800, 1)) A = [100, 256, 50, 40] (stride (1, 200000, 4000, 100)) dim = 1 51.382 -> 51.185 ( -0.38%) [ +0.09% +0.19% +0.00% / +0.24% -0.37% -0.38%] index_select const : Elapsed 2.571 ms (51.427 ms / 20) 62.118 -> 62.045 ( -0.12%) [ +0.17% +0.28% +0.00% / +0.03% -0.01% -0.12%] index_select wrap : Elapsed 3.111 ms (62.221 ms / 20) 62.135 -> 62.117 ( -0.03%) [ +0.00% +0.07% +0.03% / +0.01% -0.03% -0.02%] index_select linear : Elapsed 3.107 ms (62.135 ms / 20) 62.025 -> 62.090 ( +0.10%) [ +0.00% +0.08% +0.09% / +0.10% +0.46% +0.22%] index_select reverse : Elapsed 3.101 ms (62.025 ms / 20) 51.442 -> 51.148 ( -0.57%) [ +0.06% +0.03% +0.00% / +0.00% -0.43% -0.57%] index_select skip64 : Elapsed 2.574 ms (51.475 ms / 20) 51.400 -> 51.171 ( -0.45%) [ +0.18% +0.00% +0.22% / +0.13% -0.45% -0.41%] index_select skip256 : Elapsed 2.575 ms (51.493 ms / 20) 62.141 -> 61.803 ( -0.54%) [ +0.18% +0.10% +0.00% / +0.20% -0.28% -0.54%] index_select spread : Elapsed 3.113 ms (62.251 ms / 20) 62.312 -> 61.906 ( -0.65%) [ +0.12% +0.05% +0.00% / -0.08% -0.65% -0.59%] index_select strided 3 : Elapsed 3.119 ms (62.388 ms / 20) 62.225 -> 61.924 ( -0.48%) [ +0.00% +0.10% +0.09% / +0.10% -0.20% -0.48%] index_select strided 5 : Elapsed 3.111 ms (62.225 ms / 20) 61.553 -> 61.644 ( +0.15%) [ +0.00% +0.18% +0.19% / +0.15% +0.96% +1.01%] index_select strided 7 : Elapsed 3.078 ms (61.553 ms / 20) 62.079 -> 61.654 ( -0.68%) [ +0.26% +0.36% +0.00% / +0.12% -0.68% -0.67%] index_select strided 8 : Elapsed 3.112 ms (62.240 ms / 20) 62.511 -> 61.780 ( -1.17%) [ +0.02% +0.00% +0.04% / -0.15% -0.86% -1.17%] index_select strided 16 : Elapsed 3.126 ms (62.524 ms / 20) 62.394 -> 61.928 ( -0.75%) [ +0.08% +0.00% +0.34% / +0.30% -0.73% -0.75%] index_select strided 64 : Elapsed 3.122 ms (62.441 ms / 20) 62.276 -> 61.825 ( -0.72%) [ +0.00% +0.26% +0.01% / +0.19% -0.72% -0.58%] index_select strided 100 : Elapsed 3.114 ms (62.276 ms / 20) 62.265 -> 62.095 ( -0.27%) [ +0.05% +0.11% +0.00% / +0.04% -0.27% -0.24%] index_select strided 255 : Elapsed 3.115 ms (62.294 ms / 20) 61.634 -> 61.148 ( -0.79%) [ +0.31% +0.48% +0.00% / +0.35% -0.79% -0.77%] index_select random : Elapsed 3.091 ms (61.826 ms / 20) 61.398 -> 60.793 ( -0.99%) [ +0.08% +0.27% +0.00% / -0.06% -0.87% -0.99%] index_select random_sorted : Elapsed 3.072 ms (61.448 ms / 20) 61.935 -> 62.058 ( +0.20%) [ +0.00% +0.35% +0.18% / +0.20% +0.44% +0.47%] index_select perm : Elapsed 3.097 ms (61.935 ms / 20) 62.354 -> 62.321 ( -0.05%) [ +0.01% +0.11% +0.00% / +0.03% -0.05% +0.16%] index_select perm_sorted : Elapsed 3.118 ms (62.359 ms / 20) out_shape = [100, 256, 20, 40] in_shape = [100, 256, 50, 40] idx_dim = 2 out_shape = [100, 256, 50, 20] in_shape = [100, 256, 50, 40] idx_dim = 3 B = [100, 256, 50, 20] (stride (20, 2000, 512000, 1)) A = [100, 256, 50, 40] (stride (256, 1, 25600, 1280000)) dim = 3 1049.838 -> 1048.948 ( -0.08%) [ +0.05% +0.00% +0.05% / -0.01% -0.07% -0.08%] index_select const : Elapsed 52.516 ms (1050.318 ms / 20) 1056.512 -> 1056.178 ( -0.03%) [ +0.00% +0.05% +0.09% / +0.07% -0.03% +0.04%] index_select wrap : Elapsed 52.826 ms (1056.512 ms / 20) 1056.807 -> 1056.469 ( -0.03%) [ +0.06% +0.00% +0.02% / +0.01% -0.03% -0.01%] index_select linear : Elapsed 52.871 ms (1057.430 ms / 20) 1054.926 -> 1056.100 ( +0.11%) [ +0.08% +0.05% +0.00% / +0.11% +0.17% +0.25%] index_select reverse : Elapsed 52.786 ms (1055.726 ms / 20) 1048.960 -> 1049.229 ( +0.03%) [ +0.00% +0.06% +0.02% / +0.03% +0.03% +0.04%] index_select skip64 : Elapsed 52.448 ms (1048.960 ms / 20) 1047.189 -> 1047.524 ( +0.03%) [ +0.01% +0.00% +0.03% / +0.03% +0.26% +0.20%] index_select skip256 : Elapsed 52.366 ms (1047.320 ms / 20) 1053.813 -> 1053.498 ( -0.03%) [ +0.01% +0.01% +0.00% / -0.03% +0.40% +0.37%] index_select spread : Elapsed 52.698 ms (1053.964 ms / 20) 1056.967 -> 1056.002 ( -0.09%) [ +0.01% +0.06% +0.00% / -0.05% -0.03% -0.09%] index_select strided 3 : Elapsed 52.853 ms (1057.069 ms / 20) 1056.666 -> 1056.239 ( -0.04%) [ +0.04% +0.05% +0.00% / -0.04% +0.08% +0.08%] index_select strided 5 : Elapsed 52.855 ms (1057.091 ms / 20) 1056.540 -> 1053.514 ( -0.29%) [ +0.03% +0.05% +0.00% / +0.01% -0.28% -0.29%] index_select strided 7 : Elapsed 52.841 ms (1056.810 ms / 20) 1056.632 -> 1053.653 ( -0.28%) [ +0.00% +0.01% +0.02% / +0.01% -0.27% -0.28%] index_select strided 8 : Elapsed 52.832 ms (1056.632 ms / 20) 1056.585 -> 1056.803 ( +0.02%) [ +0.08% +0.07% +0.00% / +0.02% +0.10% +0.05%] index_select strided 16 : Elapsed 52.871 ms (1057.411 ms / 20) 1056.171 -> 1056.520 ( +0.03%) [ +0.00% +0.02% +0.01% / +0.03% +0.05% +0.14%] index_select random : Elapsed 52.809 ms (1056.171 ms / 20) 1054.876 -> 1054.945 ( +0.01%) [ +0.00% +0.14% +0.09% / +0.01% +0.14% +0.19%] index_select random_sorted : Elapsed 52.744 ms (1054.876 ms / 20) 1056.319 -> 1055.788 ( -0.05%) [ +0.00% +0.02% +0.03% / +0.04% -0.05% +0.00%] index_select perm : Elapsed 52.816 ms (1056.319 ms / 20) 1052.994 -> 1053.940 ( +0.09%) [ +0.17% +0.10% +0.00% / +0.09% +0.31% +0.28%] index_select perm_sorted : Elapsed 52.737 ms (1054.739 ms / 20) out_shape = [20, 40, 50, 100] in_shape = [256, 40, 50, 100] idx_dim = 0 B = [20, 40, 50, 100] (stride (40, 1, 80000, 800)) A = [256, 40, 50, 100] (stride (2000, 50, 1, 512000)) dim = 0 52.708 -> 53.050 ( +0.65%) [ +1.07% +0.00% +1.20% / +0.65% +14.21% +14.20%] index_select const : Elapsed 2.664 ms (53.271 ms / 20) 52.239 -> 52.255 ( +0.03%) [ +0.27% +1.20% +0.00% / +0.03% +7.29% +13.13%] index_select wrap : Elapsed 2.619 ms (52.379 ms / 20) 52.160 -> 53.187 ( +1.97%) [ +0.00% +3.41% +1.01% / +1.97% +13.46% +13.58%] index_select linear : Elapsed 2.608 ms (52.160 ms / 20) 44.711 -> 45.109 ( +0.89%) [ +0.00% +3.00% +5.44% / +0.89% +29.53% +28.74%] index_select reverse : Elapsed 2.236 ms (44.711 ms / 20) 52.607 -> 53.014 ( +0.77%) [ +1.43% +0.00% +0.76% / +0.77% +17.97% +17.69%] index_select skip64 : Elapsed 2.668 ms (53.359 ms / 20) 52.759 -> 52.614 ( -0.27%) [ +0.84% +0.00% +0.28% / -0.27% +14.20% +13.43%] index_select skip256 : Elapsed 2.660 ms (53.200 ms / 20) 46.360 -> 48.364 ( +4.32%) [ +2.35% +1.95% +0.00% / +4.32% +19.61% +19.65%] index_select spread : Elapsed 2.373 ms (47.451 ms / 20) 47.333 -> 47.499 ( +0.35%) [ +0.88% +0.00% +0.09% / +0.35% +16.46% +14.23%] index_select strided 3 : Elapsed 2.387 ms (47.749 ms / 20) 46.537 -> 46.189 ( -0.75%) [ +1.83% +0.00% +1.10% / -0.75% +18.58% +19.95%] index_select strided 5 : Elapsed 2.369 ms (47.387 ms / 20) 49.746 -> 50.876 ( +2.27%) [ +0.97% +4.64% +0.00% / +2.27% +3.97% +4.90%] index_select strided 7 : Elapsed 2.511 ms (50.229 ms / 20) 51.974 -> 50.200 ( -3.41%) [ +0.38% +0.00% +0.48% / +2.98% -3.41% -2.77%] index_select strided 8 : Elapsed 2.609 ms (52.171 ms / 20) 51.375 -> 51.006 ( -0.72%) [ +1.14% +1.56% +0.00% / -0.72% +4.22% +4.35%] index_select strided 16 : Elapsed 2.598 ms (51.962 ms / 20) 50.074 -> 49.150 ( -1.85%) [ +0.11% +1.99% +0.00% / -1.85% +23.81% +20.72%] index_select strided 64 : Elapsed 2.506 ms (50.128 ms / 20) 46.278 -> 46.902 ( +1.35%) [ +0.00% +3.54% +1.77% / +1.35% +21.50% +18.75%] index_select strided 100 : Elapsed 2.314 ms (46.278 ms / 20) 51.682 -> 50.612 ( -2.07%) [ +0.83% +3.17% +0.00% / -2.07% +15.41% +17.58%] index_select strided 255 : Elapsed 2.606 ms (52.112 ms / 20) 50.770 -> 51.467 ( +1.37%) [ +0.00% +1.51% +1.42% / +1.37% +11.85% +17.48%] index_select random : Elapsed 2.538 ms (50.770 ms / 20) 48.591 -> 47.913 ( -1.40%) [ +2.61% +0.71% +0.00% / -1.40% +10.47% +12.00%] index_select random_sorted : Elapsed 2.493 ms (49.858 ms / 20) 48.778 -> 49.486 ( +1.45%) [ +1.66% +0.00% +0.23% / +1.45% +15.85% +13.43%] index_select perm : Elapsed 2.479 ms (49.586 ms / 20) 54.094 -> 54.818 ( +1.34%) [ +0.00% +3.31% +2.93% / +1.87% +1.34% +2.06%] index_select perm_sorted : Elapsed 2.705 ms (54.094 ms / 20) out_shape = [256, 20, 50, 100] in_shape = [256, 40, 50, 100] idx_dim = 1 B = [256, 20, 50, 100] (stride (100000, 100, 2000, 1)) A = [256, 40, 50, 100] (stride (100, 1280000, 25600, 1)) dim = 1 253.088 -> 253.195 ( +0.04%) [ +0.10% +0.04% +0.00% / +0.08% +0.04% +0.06%] index_select const : Elapsed 12.667 ms (253.346 ms / 20) 253.227 -> 253.395 ( +0.07%) [ +0.15% +0.10% +0.00% / +0.10% +0.07% +0.09%] index_select wrap : Elapsed 12.680 ms (253.606 ms / 20) 253.346 -> 253.391 ( +0.02%) [ +0.09% +0.05% +0.00% / +0.08% +0.02% +0.04%] index_select linear : Elapsed 12.678 ms (253.564 ms / 20) 253.437 -> 253.342 ( -0.04%) [ +0.03% +0.01% +0.00% / -0.02% -0.04% +0.01%] index_select reverse : Elapsed 12.675 ms (253.507 ms / 20) 253.002 -> 253.134 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.17% +0.05% +0.11%] index_select skip64 : Elapsed 12.660 ms (253.209 ms / 20) 253.253 -> 253.256 ( +0.00%) [ +0.07% +0.10% +0.00% / +0.08% +0.02% +0.00%] index_select skip256 : Elapsed 12.671 ms (253.430 ms / 20) 253.460 -> 253.496 ( +0.01%) [ +0.00% +0.03% +0.00% / +0.13% +0.01% +0.03%] index_select spread : Elapsed 12.673 ms (253.465 ms / 20) 253.292 -> 253.503 ( +0.08%) [ +0.12% +0.00% +0.00% / +0.08% +0.12% +0.11%] index_select strided 3 : Elapsed 12.680 ms (253.592 ms / 20) 253.338 -> 253.407 ( +0.03%) [ +0.10% +0.04% +0.00% / +0.03% +0.07% +0.06%] index_select strided 5 : Elapsed 12.680 ms (253.600 ms / 20) 253.507 -> 253.473 ( -0.01%) [ +0.02% +0.00% +0.01% / +0.08% -0.01% +0.02%] index_select strided 7 : Elapsed 12.678 ms (253.566 ms / 20) 253.458 -> 253.405 ( -0.02%) [ +0.04% +0.00% +0.04% / -0.02% +0.07% +0.03%] index_select strided 8 : Elapsed 12.678 ms (253.556 ms / 20) 253.283 -> 253.487 ( +0.08%) [ +0.04% +0.00% +0.03% / +0.09% +0.09% +0.08%] index_select strided 16 : Elapsed 12.670 ms (253.395 ms / 20) 253.495 -> 253.555 ( +0.02%) [ +0.04% +0.00% +0.07% / +0.09% +0.02% +0.04%] index_select random : Elapsed 12.680 ms (253.608 ms / 20) 253.383 -> 253.507 ( +0.05%) [ +0.03% +0.00% +0.06% / +0.05% +0.09% +0.10%] index_select random_sorted : Elapsed 12.672 ms (253.450 ms / 20) 253.351 -> 253.440 ( +0.04%) [ +0.10% +0.00% +0.07% / +0.05% +0.04% +0.17%] index_select perm : Elapsed 12.680 ms (253.597 ms / 20) 253.432 -> 253.478 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.10% +0.02% +0.02%] index_select perm_sorted : Elapsed 12.680 ms (253.601 ms / 20) out_shape = [256, 40, 20, 100] in_shape = [256, 40, 50, 100] idx_dim = 2 out_shape = [256, 40, 50, 20] in_shape = [256, 40, 50, 100] idx_dim = 3 B = [256, 40, 50, 20] (stride (1, 256000, 256, 12800)) A = [256, 40, 50, 100] (stride (200000, 1, 4000, 40)) dim = 3 322.239 -> 322.531 ( +0.09%) [ +0.00% +0.10% +0.23% / +0.09% +0.58% +0.67%] index_select const : Elapsed 16.112 ms (322.239 ms / 20) 322.494 -> 322.206 ( -0.09%) [ +0.04% +0.00% +0.17% / -0.09% +0.84% +0.77%] index_select wrap : Elapsed 16.131 ms (322.612 ms / 20) 322.859 -> 322.842 ( -0.01%) [ +0.00% +0.05% +0.00% / -0.01% +0.48% +0.68%] index_select linear : Elapsed 16.143 ms (322.859 ms / 20) 327.514 -> 321.465 ( -1.85%) [ +0.03% +0.00% +0.07% / +0.01% -1.85% -1.47%] index_select reverse : Elapsed 16.381 ms (327.613 ms / 20) 322.357 -> 323.004 ( +0.20%) [ +0.12% +0.04% +0.00% / +0.20% +0.46% +0.55%] index_select skip64 : Elapsed 16.137 ms (322.746 ms / 20) 321.571 -> 322.087 ( +0.16%) [ +0.01% +0.00% +0.04% / +0.16% +0.77% +0.78%] index_select skip256 : Elapsed 16.080 ms (321.600 ms / 20) 323.508 -> 323.306 ( -0.06%) [ +0.00% +0.08% +0.23% / +0.32% -0.06% +0.04%] index_select spread : Elapsed 16.175 ms (323.508 ms / 20) 325.607 -> 323.825 ( -0.55%) [ +0.00% +0.22% +0.11% / +0.01% -0.53% -0.55%] index_select strided 3 : Elapsed 16.280 ms (325.607 ms / 20) 325.058 -> 323.630 ( -0.44%) [ +0.00% +0.17% +0.25% / +0.19% -0.44% -0.41%] index_select strided 5 : Elapsed 16.253 ms (325.058 ms / 20) 325.844 -> 323.484 ( -0.72%) [ +0.14% +0.17% +0.00% / +0.07% -0.72% -0.53%] index_select strided 7 : Elapsed 16.315 ms (326.304 ms / 20) 324.362 -> 321.613 ( -0.85%) [ +0.08% +0.13% +0.00% / -0.17% -0.85% -0.74%] index_select strided 8 : Elapsed 16.231 ms (324.627 ms / 20) 323.624 -> 323.461 ( -0.05%) [ +0.00% +0.10% +0.09% / -0.05% +0.32% +0.16%] index_select strided 16 : Elapsed 16.181 ms (323.624 ms / 20) 319.791 -> 320.497 ( +0.22%) [ +0.14% +0.00% +0.28% / +0.22% +0.53% +0.64%] index_select strided 64 : Elapsed 16.011 ms (320.229 ms / 20) 321.826 -> 322.160 ( +0.10%) [ +0.19% +0.00% +0.18% / +0.10% +1.27% +1.18%] index_select random : Elapsed 16.121 ms (322.423 ms / 20) 325.833 -> 323.883 ( -0.60%) [ +0.10% +0.00% +0.08% / +0.12% -0.60% -0.35%] index_select random_sorted : Elapsed 16.308 ms (326.153 ms / 20) 323.648 -> 323.946 ( +0.09%) [ +0.14% +0.00% +0.13% / +0.09% +0.14% +0.21%] index_select perm : Elapsed 16.204 ms (324.089 ms / 20) 324.065 -> 324.327 ( +0.08%) [ +0.22% +0.00% +0.07% / +0.27% +0.12% +0.08%] index_select perm_sorted : Elapsed 16.239 ms (324.778 ms / 20) out_shape = [20, 40, 100, 50] in_shape = [256, 40, 100, 50] idx_dim = 0 out_shape = [256, 20, 100, 50] in_shape = [256, 40, 100, 50] idx_dim = 1 B = [256, 20, 100, 50] (stride (1, 256, 5120, 512000)) A = [256, 40, 100, 50] (stride (100, 25600, 1, 1024000)) dim = 1 932.214 -> 930.212 ( -0.21%) [ +0.00% +0.14% +0.07% / -0.21% +1.85% +1.99%] index_select const : Elapsed 46.611 ms (932.214 ms / 20) 928.331 -> 931.170 ( +0.31%) [ +0.06% +0.00% +0.11% / +0.31% +0.92% +1.08%] index_select wrap : Elapsed 46.442 ms (928.849 ms / 20) 929.200 -> 930.606 ( +0.15%) [ +0.15% +0.00% +0.29% / +0.15% +1.02% +0.77%] index_select linear : Elapsed 46.528 ms (930.557 ms / 20) 931.518 -> 930.800 ( -0.08%) [ +0.00% +0.14% +0.07% / -0.08% +0.99% +1.15%] index_select reverse : Elapsed 46.576 ms (931.518 ms / 20) 929.987 -> 930.993 ( +0.11%) [ +0.22% +0.00% +0.10% / +0.11% +2.49% +2.28%] index_select skip64 : Elapsed 46.600 ms (932.007 ms / 20) 928.008 -> 930.382 ( +0.26%) [ +0.00% +0.29% +0.34% / +0.26% +2.67% +2.65%] index_select skip256 : Elapsed 46.400 ms (928.008 ms / 20) 921.051 -> 923.946 ( +0.31%) [ +0.28% +0.00% +0.48% / +0.31% +3.09% +2.87%] index_select spread : Elapsed 46.183 ms (923.660 ms / 20) 934.752 -> 936.833 ( +0.22%) [ +0.30% +0.00% +0.07% / +0.22% +0.76% +0.85%] index_select strided 3 : Elapsed 46.878 ms (937.553 ms / 20) 924.469 -> 926.169 ( +0.18%) [ +0.07% +0.09% +0.00% / +0.18% +2.63% +2.55%] index_select strided 5 : Elapsed 46.256 ms (925.128 ms / 20) 935.234 -> 935.607 ( +0.04%) [ +0.02% +0.08% +0.00% / +0.04% +0.88% +0.79%] index_select strided 7 : Elapsed 46.770 ms (935.406 ms / 20) 919.160 -> 919.703 ( +0.06%) [ +0.00% +0.24% +0.13% / +0.06% +2.86% +2.86%] index_select strided 8 : Elapsed 45.958 ms (919.160 ms / 20) 920.421 -> 923.942 ( +0.38%) [ +0.41% +0.29% +0.00% / +0.38% +2.90% +3.03%] index_select strided 16 : Elapsed 46.209 ms (924.173 ms / 20) 926.131 -> 926.486 ( +0.04%) [ +0.23% +0.07% +0.00% / +0.04% +1.82% +1.59%] index_select random : Elapsed 46.411 ms (928.226 ms / 20) 928.866 -> 928.884 ( +0.00%) [ +0.22% +0.38% +0.00% / +0.00% +0.25% +0.31%] index_select random_sorted : Elapsed 46.546 ms (930.919 ms / 20) 934.704 -> 936.075 ( +0.15%) [ +0.00% +0.08% +0.00% / +0.15% +0.99% +0.86%] index_select perm : Elapsed 46.736 ms (934.728 ms / 20) 920.781 -> 923.385 ( +0.28%) [ +0.35% +0.00% +0.44% / +0.28% +2.61% +2.50%] index_select perm_sorted : Elapsed 46.199 ms (923.983 ms / 20) out_shape = [256, 40, 20, 50] in_shape = [256, 40, 100, 50] idx_dim = 2 out_shape = [256, 40, 100, 20] in_shape = [256, 40, 100, 50] idx_dim = 3 out_shape = [20, 50, 40, 100] in_shape = [256, 50, 40, 100] idx_dim = 0 out_shape = [256, 20, 40, 100] in_shape = [256, 50, 40, 100] idx_dim = 1 out_shape = [256, 50, 20, 100] in_shape = [256, 50, 40, 100] idx_dim = 2 B = [256, 50, 20, 100] (stride (100, 25600, 1280000, 1)) A = [256, 50, 40, 100] (stride (1, 256, 1280000, 12800)) dim = 2 612.310 -> 613.279 ( +0.16%) [ +0.23% +0.17% +0.00% / +0.16% +0.53% +0.54%] index_select const : Elapsed 30.687 ms (613.743 ms / 20) 652.990 -> 653.214 ( +0.03%) [ +0.13% +0.00% +0.10% / +0.03% +0.20% +0.10%] index_select wrap : Elapsed 32.693 ms (653.860 ms / 20) 653.293 -> 653.094 ( -0.03%) [ +0.08% +0.05% +0.00% / +0.04% +0.01% -0.03%] index_select linear : Elapsed 32.692 ms (653.836 ms / 20) 652.726 -> 651.969 ( -0.12%) [ +0.00% +0.04% +0.05% / -0.07% +0.09% -0.12%] index_select reverse : Elapsed 32.636 ms (652.726 ms / 20) 612.570 -> 613.125 ( +0.09%) [ +0.07% +0.23% +0.00% / +0.09% +0.52% +0.47%] index_select skip64 : Elapsed 30.651 ms (613.024 ms / 20) 612.848 -> 612.938 ( +0.01%) [ +0.04% +0.00% +0.16% / +0.01% +0.53% +0.25%] index_select skip256 : Elapsed 30.655 ms (613.099 ms / 20) 653.109 -> 653.476 ( +0.06%) [ +0.19% +0.02% +0.00% / +0.06% +0.19% +0.27%] index_select spread : Elapsed 32.716 ms (654.328 ms / 20) 651.956 -> 652.555 ( +0.09%) [ +0.14% +0.10% +0.00% / +0.09% +0.26% +0.14%] index_select strided 3 : Elapsed 32.642 ms (652.837 ms / 20) 651.489 -> 651.746 ( +0.04%) [ +0.12% +0.03% +0.00% / +0.04% +0.25% +0.31%] index_select strided 5 : Elapsed 32.613 ms (652.259 ms / 20) 651.358 -> 652.292 ( +0.14%) [ +0.21% +0.14% +0.00% / +0.17% +0.14% +0.18%] index_select strided 7 : Elapsed 32.637 ms (652.749 ms / 20) 650.929 -> 651.413 ( +0.07%) [ +0.00% +0.19% +0.18% / +0.07% +0.35% +0.41%] index_select strided 8 : Elapsed 32.546 ms (650.929 ms / 20) 650.587 -> 650.835 ( +0.04%) [ +0.07% +0.07% +0.00% / +0.04% +0.36% +0.27%] index_select strided 16 : Elapsed 32.552 ms (651.040 ms / 20) 649.960 -> 650.744 ( +0.12%) [ +0.00% +0.03% +0.21% / +0.14% +0.12% +0.20%] index_select random : Elapsed 32.498 ms (649.960 ms / 20) 647.910 -> 647.531 ( -0.06%) [ +0.12% +0.18% +0.00% / +0.16% +0.06% -0.06%] index_select random_sorted : Elapsed 32.434 ms (648.678 ms / 20) 650.713 -> 651.908 ( +0.18%) [ +0.13% +0.00% +0.22% / +0.18% +0.45% +0.53%] index_select perm : Elapsed 32.578 ms (651.569 ms / 20) 652.000 -> 653.027 ( +0.16%) [ +0.10% +0.15% +0.00% / +0.16% +0.20% +0.36%] index_select perm_sorted : Elapsed 32.633 ms (652.655 ms / 20) out_shape = [256, 50, 40, 20] in_shape = [256, 50, 40, 100] idx_dim = 3 B = [256, 50, 40, 20] (stride (40000, 20, 1000, 1)) A = [256, 50, 40, 100] (stride (4000, 1024000, 100, 1)) dim = 3 403.936 -> 404.435 ( +0.12%) [ +0.00% +0.04% +0.01% / +0.14% +0.13% +0.12%] index_select const : Elapsed 20.197 ms (403.936 ms / 20) 403.123 -> 403.055 ( -0.02%) [ +0.05% +0.08% +0.00% / -0.00% +0.08% -0.02%] index_select wrap : Elapsed 20.166 ms (403.321 ms / 20) 402.829 -> 402.835 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.10% +0.05% +0.00%] index_select linear : Elapsed 20.141 ms (402.829 ms / 20) 402.911 -> 402.821 ( -0.02%) [ +0.06% +0.00% +0.03% / -0.02% +0.09% +0.12%] index_select reverse : Elapsed 20.157 ms (403.145 ms / 20) 403.989 -> 404.166 ( +0.04%) [ +0.02% +0.00% +0.06% / +0.06% +0.04% +0.21%] index_select skip64 : Elapsed 20.204 ms (404.088 ms / 20) 403.244 -> 403.427 ( +0.05%) [ +0.03% +0.08% +0.00% / +0.05% +0.36% +0.26%] index_select skip256 : Elapsed 20.169 ms (403.378 ms / 20) 402.462 -> 402.449 ( -0.00%) [ +0.03% +0.04% +0.00% / -0.00% +0.13% +0.16%] index_select spread : Elapsed 20.129 ms (402.578 ms / 20) 402.992 -> 403.094 ( +0.03%) [ +0.09% +0.00% +0.08% / +0.12% +0.10% +0.03%] index_select strided 3 : Elapsed 20.168 ms (403.354 ms / 20) 403.029 -> 402.913 ( -0.03%) [ +0.00% +0.08% +0.02% / -0.01% +0.10% -0.03%] index_select strided 5 : Elapsed 20.151 ms (403.029 ms / 20) 402.914 -> 402.400 ( -0.13%) [ +0.00% +0.03% +0.11% / +0.07% -0.13% -0.11%] index_select strided 7 : Elapsed 20.146 ms (402.914 ms / 20) 403.165 -> 402.466 ( -0.17%) [ +0.00% +0.00% +0.03% / +0.05% -0.10% -0.17%] index_select strided 8 : Elapsed 20.158 ms (403.165 ms / 20) 402.801 -> 403.184 ( +0.10%) [ +0.00% +0.10% +0.02% / +0.10% +0.16% +0.16%] index_select strided 16 : Elapsed 20.140 ms (402.801 ms / 20) 403.112 -> 402.877 ( -0.06%) [ +0.00% +0.02% +0.04% / -0.06% +0.04% +0.04%] index_select strided 64 : Elapsed 20.156 ms (403.112 ms / 20) 403.013 -> 402.785 ( -0.06%) [ +0.06% +0.04% +0.00% / +0.01% -0.06% +0.01%] index_select random : Elapsed 20.162 ms (403.238 ms / 20) 402.986 -> 402.988 ( +0.00%) [ +0.00% +0.01% +0.04% / +0.02% +0.12% +0.00%] index_select random_sorted : Elapsed 20.149 ms (402.986 ms / 20) 402.311 -> 402.398 ( +0.02%) [ +0.02% +0.04% +0.00% / +0.02% +0.23% +0.16%] index_select perm : Elapsed 20.120 ms (402.395 ms / 20) 402.459 -> 402.376 ( -0.02%) [ +0.00% +0.00% +0.01% / -0.02% +0.14% +0.19%] index_select perm_sorted : Elapsed 20.123 ms (402.459 ms / 20) out_shape = [20, 50, 100, 40] in_shape = [256, 50, 100, 40] idx_dim = 0 B = [20, 50, 100, 40] (stride (200000, 1, 50, 5000)) A = [256, 50, 100, 40] (stride (200000, 40, 2000, 1)) dim = 0 76.204 -> 75.383 ( -1.08%) [ +0.12% +0.00% +0.01% / -0.00% -1.08% -0.76%] index_select const : Elapsed 3.815 ms (76.292 ms / 20) 75.864 -> 75.556 ( -0.41%) [ +0.00% +0.10% +0.26% / +0.06% -0.07% -0.41%] index_select wrap : Elapsed 3.793 ms (75.864 ms / 20) 75.731 -> 75.595 ( -0.18%) [ +0.15% +0.00% +0.24% / +0.20% -0.18% +0.01%] index_select linear : Elapsed 3.792 ms (75.846 ms / 20) 75.528 -> 75.692 ( +0.22%) [ +0.12% +0.28% +0.00% / +0.59% +0.22% +0.31%] index_select reverse : Elapsed 3.781 ms (75.616 ms / 20) 76.036 -> 75.469 ( -0.75%) [ +0.00% +0.02% +0.12% / +0.46% -0.64% -0.75%] index_select skip64 : Elapsed 3.802 ms (76.036 ms / 20) 75.693 -> 75.426 ( -0.35%) [ +0.00% +0.30% +0.31% / +0.27% -0.35% -0.17%] index_select skip256 : Elapsed 3.785 ms (75.693 ms / 20) 75.579 -> 75.462 ( -0.15%) [ +0.16% +0.00% +0.03% / -0.15% +0.35% +0.13%] index_select spread : Elapsed 3.785 ms (75.699 ms / 20) 75.876 -> 75.854 ( -0.03%) [ +0.25% +0.10% +0.00% / +0.09% +0.19% -0.03%] index_select strided 3 : Elapsed 3.803 ms (76.063 ms / 20) 75.611 -> 75.723 ( +0.15%) [ +0.50% +0.00% +0.41% / +0.24% +0.19% +0.15%] index_select strided 5 : Elapsed 3.800 ms (75.991 ms / 20) 75.621 -> 75.605 ( -0.02%) [ +0.00% +0.36% +0.19% / -0.02% +0.35% +0.20%] index_select strided 7 : Elapsed 3.781 ms (75.621 ms / 20) 75.843 -> 75.759 ( -0.11%) [ +0.18% +0.11% +0.00% / -0.04% +0.21% -0.11%] index_select strided 8 : Elapsed 3.799 ms (75.979 ms / 20) 75.700 -> 75.641 ( -0.08%) [ +0.30% +0.21% +0.00% / +0.09% +0.04% -0.08%] index_select strided 16 : Elapsed 3.796 ms (75.924 ms / 20) 75.948 -> 75.561 ( -0.51%) [ +0.00% +0.38% +0.30% / +0.06% -0.15% -0.51%] index_select strided 64 : Elapsed 3.797 ms (75.948 ms / 20) 75.778 -> 75.731 ( -0.06%) [ +0.55% +0.36% +0.00% / +0.01% -0.01% -0.06%] index_select strided 100 : Elapsed 3.810 ms (76.198 ms / 20) 75.904 -> 75.468 ( -0.57%) [ +0.37% +0.00% +0.17% / -0.06% -0.48% -0.57%] index_select strided 255 : Elapsed 3.809 ms (76.183 ms / 20) 75.646 -> 75.483 ( -0.22%) [ +0.17% +0.00% +0.07% / +0.09% -0.22% -0.21%] index_select random : Elapsed 3.789 ms (75.772 ms / 20) 75.687 -> 75.517 ( -0.22%) [ +0.12% +0.00% +0.31% / -0.22% +0.21% +0.12%] index_select random_sorted : Elapsed 3.789 ms (75.779 ms / 20) 75.742 -> 75.263 ( -0.63%) [ +0.24% +0.00% +0.57% / +0.42% -0.63% -0.44%] index_select perm : Elapsed 3.796 ms (75.924 ms / 20) 76.063 -> 75.598 ( -0.61%) [ +0.13% +0.00% +0.18% / +0.15% -0.50% -0.61%] index_select perm_sorted : Elapsed 3.808 ms (76.163 ms / 20) out_shape = [256, 20, 100, 40] in_shape = [256, 50, 100, 40] idx_dim = 1 out_shape = [256, 50, 20, 40] in_shape = [256, 50, 100, 40] idx_dim = 2 out_shape = [256, 50, 100, 20] in_shape = [256, 50, 100, 40] idx_dim = 3 out_shape = [20, 100, 40, 50] in_shape = [256, 100, 40, 50] idx_dim = 0 B = [20, 100, 40, 50] (stride (200000, 40, 1, 4000)) A = [256, 100, 40, 50] (stride (2000, 512000, 1, 40)) dim = 0 76.367 -> 74.486 ( -2.46%) [ +0.23% +0.22% +0.00% / +0.38% -2.03% -2.46%] index_select const : Elapsed 3.827 ms (76.540 ms / 20) 75.439 -> 75.929 ( +0.65%) [ +0.79% +0.24% +0.00% / +0.65% +4.04% +3.33%] index_select wrap : Elapsed 3.802 ms (76.038 ms / 20) 75.570 -> 75.470 ( -0.13%) [ +0.00% +0.17% +0.29% / -0.13% +3.43% +3.03%] index_select linear : Elapsed 3.778 ms (75.570 ms / 20) 77.887 -> 74.234 ( -4.69%) [ +0.00% +0.60% +0.36% / +0.14% -4.69% -4.20%] index_select reverse : Elapsed 3.894 ms (77.887 ms / 20) 76.262 -> 74.277 ( -2.60%) [ +0.50% +0.00% +0.07% / +0.36% -2.36% -2.60%] index_select skip64 : Elapsed 3.832 ms (76.641 ms / 20) 76.459 -> 74.533 ( -2.52%) [ +0.00% +0.09% +0.02% / -0.02% -2.52% -2.46%] index_select skip256 : Elapsed 3.823 ms (76.459 ms / 20) 76.436 -> 75.471 ( -1.26%) [ +0.14% +0.88% +0.00% / +0.36% -1.26% -0.97%] index_select spread : Elapsed 3.827 ms (76.540 ms / 20) 76.170 -> 75.954 ( -0.28%) [ +0.00% +0.16% +0.39% / -0.28% +0.09% -0.06%] index_select strided 3 : Elapsed 3.809 ms (76.170 ms / 20) 74.642 -> 75.139 ( +0.67%) [ +0.51% +0.32% +0.00% / +0.67% +1.54% +1.30%] index_select strided 5 : Elapsed 3.751 ms (75.020 ms / 20) 75.424 -> 75.249 ( -0.23%) [ +0.00% +0.03% +0.19% / -0.23% +0.40% +0.27%] index_select strided 7 : Elapsed 3.771 ms (75.424 ms / 20) 78.361 -> 75.575 ( -3.56%) [ +0.00% +0.22% +0.14% / -0.35% -3.56% -3.34%] index_select strided 8 : Elapsed 3.918 ms (78.361 ms / 20) 76.820 -> 74.097 ( -3.54%) [ +0.44% +0.00% +0.91% / +0.40% -3.17% -3.54%] index_select strided 16 : Elapsed 3.858 ms (77.157 ms / 20) 76.088 -> 76.404 ( +0.42%) [ +0.19% +0.00% +0.50% / +0.42% +2.71% +2.50%] index_select strided 64 : Elapsed 3.812 ms (76.236 ms / 20) 75.378 -> 74.206 ( -1.55%) [ +0.34% +0.23% +0.00% / -0.16% -1.09% -1.55%] index_select strided 100 : Elapsed 3.782 ms (75.636 ms / 20) 77.433 -> 74.568 ( -3.70%) [ +0.41% +0.14% +0.00% / +0.32% -3.70% -3.11%] index_select strided 255 : Elapsed 3.887 ms (77.750 ms / 20) 75.011 -> 74.888 ( -0.16%) [ +0.34% +0.28% +0.00% / -0.16% +3.20% +3.47%] index_select random : Elapsed 3.763 ms (75.266 ms / 20) 76.383 -> 76.431 ( +0.06%) [ +0.28% +0.00% +0.71% / +0.22% +0.30% +0.06%] index_select random_sorted : Elapsed 3.830 ms (76.594 ms / 20) 76.505 -> 76.509 ( +0.01%) [ +0.15% +0.16% +0.00% / +0.01% +2.44% +2.66%] index_select perm : Elapsed 3.831 ms (76.617 ms / 20) 77.138 -> 76.457 ( -0.88%) [ +0.04% +0.17% +0.00% / -0.05% -0.44% -0.88%] index_select perm_sorted : Elapsed 3.858 ms (77.165 ms / 20) out_shape = [256, 20, 40, 50] in_shape = [256, 100, 40, 50] idx_dim = 1 out_shape = [256, 100, 20, 50] in_shape = [256, 100, 40, 50] idx_dim = 2 B = [256, 100, 20, 50] (stride (1, 256000, 12800, 256)) dim = 2 fill_cnt = 40 695.667 -> 696.065 ( +0.06%) [ +0.06% +0.01% +0.00% / +0.15% +0.13% +0.06%] index_fill_ const : Elapsed 34.804 ms (696.071 ms / 20) 738.261 -> 739.478 ( +0.16%) [ +0.00% +0.15% +0.05% / +0.16% +1.41% +1.41%] index_fill_ linear : Elapsed 36.913 ms (738.261 ms / 20) 769.086 -> 768.658 ( -0.06%) [ +0.00% +0.09% +0.08% / -0.06% +2.14% +2.14%] index_fill_ reverse : Elapsed 38.454 ms (769.086 ms / 20) 689.162 -> 690.039 ( +0.13%) [ +0.06% +0.00% +0.10% / +0.13% +1.07% +1.04%] index_fill_ skip64 : Elapsed 34.480 ms (689.594 ms / 20) 693.335 -> 694.400 ( +0.15%) [ +0.21% +0.22% +0.00% / +0.15% +0.54% +0.58%] index_fill_ skip256 : Elapsed 34.738 ms (694.765 ms / 20) 778.946 -> 780.096 ( +0.15%) [ +0.00% +0.03% +0.21% / +0.15% +1.39% +1.33%] index_fill_ spread : Elapsed 38.947 ms (778.946 ms / 20) 822.363 -> 823.209 ( +0.10%) [ +0.00% +0.05% +0.18% / +0.10% +1.68% +1.78%] index_fill_ strided 3 : Elapsed 41.118 ms (822.363 ms / 20) 833.601 -> 821.227 ( -1.48%) [ +0.08% +0.10% +0.00% / +0.03% -1.48% -1.41%] index_fill_ strided 5 : Elapsed 41.715 ms (834.300 ms / 20) 827.802 -> 828.914 ( +0.13%) [ +0.19% +0.06% +0.00% / +0.13% +1.23% +1.09%] index_fill_ strided 7 : Elapsed 41.469 ms (829.376 ms / 20) 827.457 -> 817.885 ( -1.16%) [ +0.22% +0.06% +0.00% / -0.00% -0.91% -1.16%] index_fill_ strided 8 : Elapsed 41.465 ms (829.295 ms / 20) 820.991 -> 811.033 ( -1.21%) [ +0.00% +0.00% +0.08% / +0.11% -1.19% -1.21%] index_fill_ strided 16 : Elapsed 41.050 ms (820.991 ms / 20) 814.073 -> 813.669 ( -0.05%) [ +0.06% +0.12% +0.00% / -0.05% +1.81% +1.74%] index_fill_ random : Elapsed 40.726 ms (814.523 ms / 20) 762.481 -> 762.408 ( -0.01%) [ +0.00% +0.01% +0.15% / -0.01% +1.48% +1.54%] index_fill_ random_sorted : Elapsed 38.124 ms (762.481 ms / 20) out_shape = [256, 100, 40, 20] in_shape = [256, 100, 40, 50] idx_dim = 3 out_shape = [20, 100, 50, 40] in_shape = [256, 100, 50, 40] idx_dim = 0 out_shape = [256, 20, 50, 40] in_shape = [256, 100, 50, 40] idx_dim = 1 B = [256, 20, 50, 40] (stride (50, 12800, 1, 256000)) A = [256, 100, 50, 40] (stride (1, 512000, 256, 12800)) dim = 1 291.416 -> 291.750 ( +0.11%) [ +0.23% +0.00% +0.43% / +0.11% +0.53% +0.28%] index_select const : Elapsed 14.605 ms (292.096 ms / 20) 307.012 -> 306.708 ( -0.10%) [ +0.00% +0.05% +0.01% / -0.10% +0.32% +0.48%] index_select wrap : Elapsed 15.351 ms (307.012 ms / 20) 307.025 -> 307.240 ( +0.07%) [ +0.18% +0.00% +0.42% / +0.07% +0.35% +0.36%] index_select linear : Elapsed 15.379 ms (307.573 ms / 20) 306.789 -> 306.326 ( -0.15%) [ +0.00% +0.01% +0.22% / -0.15% +0.04% -0.01%] index_select reverse : Elapsed 15.339 ms (306.789 ms / 20) 290.966 -> 291.732 ( +0.26%) [ +0.45% +0.00% +0.30% / +0.26% +0.73% +0.52%] index_select skip64 : Elapsed 14.614 ms (292.284 ms / 20) 291.445 -> 291.988 ( +0.19%) [ +0.18% +0.30% +0.00% / +0.19% +0.57% +0.30%] index_select skip256 : Elapsed 14.599 ms (291.981 ms / 20) 306.461 -> 306.787 ( +0.11%) [ +0.00% +0.08% +0.18% / +0.11% +0.85% +0.90%] index_select spread : Elapsed 15.323 ms (306.461 ms / 20) 306.443 -> 306.600 ( +0.05%) [ +0.32% +0.38% +0.00% / +0.05% +0.35% +0.38%] index_select strided 3 : Elapsed 15.371 ms (307.416 ms / 20) 306.911 -> 307.233 ( +0.10%) [ +0.04% +0.00% +0.21% / +0.10% +0.79% +0.64%] index_select strided 5 : Elapsed 15.352 ms (307.039 ms / 20) 306.062 -> 306.279 ( +0.07%) [ +0.28% +0.00% +0.11% / +0.07% +0.58% +0.80%] index_select strided 7 : Elapsed 15.345 ms (306.910 ms / 20) 307.148 -> 307.174 ( +0.01%) [ +0.09% +0.20% +0.00% / +0.05% +0.01% +0.21%] index_select strided 8 : Elapsed 15.371 ms (307.414 ms / 20) 306.839 -> 307.154 ( +0.10%) [ +0.08% +0.19% +0.00% / +0.10% +0.58% +0.58%] index_select strided 16 : Elapsed 15.354 ms (307.086 ms / 20) 307.750 -> 307.934 ( +0.06%) [ +0.23% +0.18% +0.00% / +0.10% +0.06% +0.11%] index_select strided 64 : Elapsed 15.423 ms (308.460 ms / 20) 305.334 -> 305.440 ( +0.03%) [ +0.24% +0.25% +0.00% / +0.03% +1.02% +0.89%] index_select random : Elapsed 15.303 ms (306.053 ms / 20) 305.723 -> 306.911 ( +0.39%) [ +0.19% +0.00% +0.05% / +0.39% +0.97% +0.90%] index_select random_sorted : Elapsed 15.315 ms (306.292 ms / 20) 305.917 -> 305.935 ( +0.01%) [ +0.00% +0.28% +0.05% / +0.01% +0.90% +0.40%] index_select perm : Elapsed 15.296 ms (305.917 ms / 20) 305.366 -> 305.962 ( +0.20%) [ +0.40% +0.41% +0.00% / +0.20% +0.78% +0.73%] index_select perm_sorted : Elapsed 15.330 ms (306.595 ms / 20) out_shape = [256, 100, 20, 40] in_shape = [256, 100, 50, 40] idx_dim = 2 B = [256, 100, 20, 40] (stride (20, 204800, 1, 5120)) A = [256, 100, 50, 40] (stride (1, 512000, 256, 12800)) dim = 2 897.184 -> 897.151 ( -0.00%) [ +0.10% +0.06% +0.00% / -0.00% +0.57% +0.57%] index_select const : Elapsed 44.904 ms (898.070 ms / 20) 896.893 -> 895.000 ( -0.21%) [ +0.00% +0.00% +0.01% / +0.05% -0.21% -0.19%] index_select wrap : Elapsed 44.845 ms (896.893 ms / 20) 896.677 -> 895.288 ( -0.15%) [ +0.08% +0.02% +0.00% / +0.05% -0.15% -0.08%] index_select linear : Elapsed 44.868 ms (897.361 ms / 20) 894.431 -> 892.701 ( -0.19%) [ +0.00% +0.07% +0.00% / -0.19% +0.08% +0.05%] index_select reverse : Elapsed 44.724 ms (894.471 ms / 20) 894.822 -> 896.015 ( +0.13%) [ +0.16% +0.08% +0.00% / +0.13% +0.99% +0.94%] index_select skip64 : Elapsed 44.813 ms (896.255 ms / 20) 897.548 -> 896.817 ( -0.08%) [ +0.03% +0.09% +0.00% / -0.08% +0.77% +0.77%] index_select skip256 : Elapsed 44.891 ms (897.822 ms / 20) 899.377 -> 899.584 ( +0.02%) [ +0.03% +0.03% +0.00% / +0.02% +0.07% +0.17%] index_select spread : Elapsed 44.984 ms (899.670 ms / 20) 906.419 -> 895.593 ( -1.19%) [ +0.07% +0.02% +0.00% / -0.03% -1.19% -1.19%] index_select strided 3 : Elapsed 45.353 ms (907.054 ms / 20) 898.488 -> 898.132 ( -0.04%) [ +0.09% +0.00% +0.09% / -0.04% +0.64% +0.62%] index_select strided 5 : Elapsed 44.966 ms (899.316 ms / 20) 896.790 -> 895.719 ( -0.12%) [ +0.00% +0.06% +0.07% / -0.02% -0.08% -0.12%] index_select strided 7 : Elapsed 44.839 ms (896.790 ms / 20) 909.433 -> 902.019 ( -0.82%) [ +0.04% +0.00% +0.11% / +0.11% -0.82% -0.74%] index_select strided 8 : Elapsed 45.490 ms (909.797 ms / 20) 902.346 -> 900.104 ( -0.25%) [ +0.13% +0.06% +0.00% / -0.01% -0.25% -0.20%] index_select strided 16 : Elapsed 45.177 ms (903.538 ms / 20) 899.025 -> 896.473 ( -0.28%) [ +0.11% +0.06% +0.00% / +0.14% -0.28% -0.22%] index_select random : Elapsed 45.001 ms (900.021 ms / 20) 900.877 -> 893.890 ( -0.78%) [ +0.20% +0.03% +0.00% / +0.09% -0.78% -0.75%] index_select random_sorted : Elapsed 45.136 ms (902.722 ms / 20) 899.646 -> 898.137 ( -0.17%) [ +0.08% +0.00% +0.06% / +0.06% -0.17% +0.04%] index_select perm : Elapsed 45.019 ms (900.387 ms / 20) 906.384 -> 893.807 ( -1.39%) [ +0.08% +0.00% +0.09% / +0.01% -1.39% -1.32%] index_select perm_sorted : Elapsed 45.355 ms (907.099 ms / 20) B = [256, 100, 20, 40] (stride (1, 204800, 256, 5120)) A = [256, 100, 50, 40] (stride (100, 1, 25600, 1280000)) dim = 2 685.969 -> 689.154 ( +0.46%) [ +0.47% +0.59% +0.00% / +0.46% +4.47% +4.29%] index_select const : Elapsed 34.460 ms (689.205 ms / 20) 693.663 -> 694.342 ( +0.10%) [ +0.19% +0.00% +0.33% / +0.10% +2.67% +2.99%] index_select wrap : Elapsed 34.749 ms (694.988 ms / 20) 692.758 -> 696.616 ( +0.56%) [ +0.32% +0.34% +0.00% / +0.56% +2.72% +2.68%] index_select linear : Elapsed 34.748 ms (694.955 ms / 20) 683.506 -> 685.511 ( +0.29%) [ +0.06% +0.37% +0.00% / +0.29% +3.70% +3.22%] index_select reverse : Elapsed 34.196 ms (683.911 ms / 20) 687.743 -> 689.229 ( +0.22%) [ +0.00% +0.24% +0.01% / +0.22% +4.63% +4.44%] index_select skip64 : Elapsed 34.387 ms (687.743 ms / 20) 687.526 -> 688.039 ( +0.07%) [ +0.00% +0.14% +0.09% / +0.07% +4.17% +4.32%] index_select skip256 : Elapsed 34.376 ms (687.526 ms / 20) 693.689 -> 692.833 ( -0.12%) [ +0.07% +0.19% +0.00% / -0.12% +4.05% +3.66%] index_select spread : Elapsed 34.709 ms (694.186 ms / 20) 696.471 -> 698.670 ( +0.32%) [ +0.88% +0.36% +0.00% / +0.32% +2.95% +2.71%] index_select strided 3 : Elapsed 35.131 ms (702.616 ms / 20) 704.960 -> 704.795 ( -0.02%) [ +0.32% +0.00% +0.14% / -0.02% +1.92% +2.00%] index_select strided 5 : Elapsed 35.362 ms (707.236 ms / 20) 689.781 -> 690.783 ( +0.15%) [ +0.21% +0.33% +0.00% / +0.15% +3.67% +3.30%] index_select strided 7 : Elapsed 34.561 ms (691.226 ms / 20) 693.239 -> 694.108 ( +0.13%) [ +0.00% +0.07% +0.16% / +0.13% +2.65% +3.08%] index_select strided 8 : Elapsed 34.662 ms (693.239 ms / 20) 689.204 -> 689.095 ( -0.02%) [ +0.10% +0.21% +0.00% / -0.02% +4.46% +4.61%] index_select strided 16 : Elapsed 34.495 ms (689.897 ms / 20) 688.564 -> 687.505 ( -0.15%) [ +0.35% +0.04% +0.00% / -0.15% +3.86% +3.23%] index_select random : Elapsed 34.548 ms (690.959 ms / 20) 690.168 -> 690.565 ( +0.06%) [ +0.00% +0.16% +0.20% / +0.06% +2.45% +2.44%] index_select random_sorted : Elapsed 34.508 ms (690.168 ms / 20) 697.459 -> 698.988 ( +0.22%) [ +0.04% +0.09% +0.00% / +0.22% +2.01% +1.61%] index_select perm : Elapsed 34.887 ms (697.737 ms / 20) 690.807 -> 693.366 ( +0.37%) [ +0.00% +0.45% +0.60% / +0.37% +3.47% +3.04%] index_select perm_sorted : Elapsed 34.540 ms (690.807 ms / 20) out_shape = [256, 100, 50, 20] in_shape = [256, 100, 50, 40] idx_dim = 3 out_shape = [40, 50, 100, 256] in_shape = [20, 50, 100, 256] idx_dim = 0 B = [40, 50, 100, 256] (stride (25600, 1024000, 256, 1)) A = [20, 50, 100, 256] (stride (1280000, 1, 50, 5000)) dim = 0 594.948 -> 594.904 ( -0.01%) [ +0.09% +0.00% +0.11% / -0.01% +0.12% +0.10%] index_add_ linear : Elapsed 29.773 ms (595.462 ms / 20) 583.925 -> 584.157 ( +0.04%) [ +0.00% +0.03% +0.11% / +0.04% +0.16% +0.22%] index_copy_ linear : Elapsed 29.196 ms (583.925 ms / 20) 595.232 -> 595.406 ( +0.03%) [ +0.07% +0.00% +0.13% / +0.03% +0.12% +0.05%] index_add_ reverse : Elapsed 29.782 ms (595.635 ms / 20) 583.985 -> 584.718 ( +0.13%) [ +0.01% +0.00% +0.03% / +0.13% +0.16% +0.16%] index_copy_ reverse : Elapsed 29.203 ms (584.050 ms / 20) 594.753 -> 594.986 ( +0.04%) [ +0.08% +0.00% +0.10% / +0.04% +0.22% +0.25%] index_add_ spread : Elapsed 29.762 ms (595.238 ms / 20) 584.328 -> 584.430 ( +0.02%) [ +0.05% +0.00% +0.05% / +0.02% +0.16% +0.21%] index_copy_ spread : Elapsed 29.230 ms (584.606 ms / 20) 596.772 -> 595.854 ( -0.15%) [ +0.04% +0.00% +0.07% / +0.01% -0.15% -0.13%] index_add_ strided 3 : Elapsed 29.850 ms (596.992 ms / 20) 584.405 -> 584.790 ( +0.07%) [ +0.08% +0.08% +0.00% / +0.07% +0.07% +0.13%] index_copy_ strided 3 : Elapsed 29.244 ms (584.883 ms / 20) 594.829 -> 595.488 ( +0.11%) [ +0.13% +0.00% +0.08% / +0.14% +0.11% +0.22%] index_add_ strided 7 : Elapsed 29.780 ms (595.592 ms / 20) 584.185 -> 584.241 ( +0.01%) [ +0.05% +0.03% +0.00% / +0.18% +0.24% +0.01%] index_copy_ strided 7 : Elapsed 29.224 ms (584.471 ms / 20) 595.302 -> 594.975 ( -0.05%) [ +0.04% +0.00% +0.00% / +0.15% +0.01% -0.05%] index_add_ perm : Elapsed 29.776 ms (595.527 ms / 20) 584.508 -> 584.696 ( +0.03%) [ +0.00% +0.06% +0.10% / +0.20% +0.03% +0.05%] index_copy_ perm : Elapsed 29.225 ms (584.508 ms / 20) 595.514 -> 595.857 ( +0.06%) [ +0.00% +0.05% +0.04% / +0.06% +0.16% +0.14%] index_add_ perm_sorted : Elapsed 29.776 ms (595.514 ms / 20) 583.644 -> 584.529 ( +0.15%) [ +0.16% +0.16% +0.00% / +0.15% +0.16% +0.21%] index_copy_ perm_sorted : Elapsed 29.228 ms (584.554 ms / 20) 1110.731 -> 1110.540 ( -0.02%) [ +0.00% +0.03% +0.01% / -0.02% +0.13% +0.20%] index_select const : Elapsed 55.537 ms (1110.731 ms / 20) 1209.517 -> 1210.184 ( +0.06%) [ +0.02% +0.09% +0.00% / +0.06% +0.12% +0.10%] index_select wrap : Elapsed 60.487 ms (1209.743 ms / 20) 1148.292 -> 1148.647 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.28% +0.27%] index_select linear : Elapsed 57.449 ms (1148.976 ms / 20) 1170.549 -> 1171.322 ( +0.07%) [ +0.08% +0.05% +0.00% / +0.07% +0.18% +0.15%] index_select reverse : Elapsed 58.577 ms (1171.535 ms / 20) 1110.255 -> 1111.646 ( +0.13%) [ +0.08% +0.00% +0.06% / +0.13% +0.24% +0.16%] index_select skip64 : Elapsed 55.558 ms (1111.156 ms / 20) 1110.366 -> 1110.935 ( +0.05%) [ +0.16% +0.10% +0.00% / +0.05% +0.24% +0.18%] index_select skip256 : Elapsed 55.607 ms (1112.134 ms / 20) 1182.594 -> 1182.772 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.02% +0.04%] index_select spread : Elapsed 59.157 ms (1183.137 ms / 20) 1210.733 -> 1209.893 ( -0.07%) [ +0.00% +0.00% +0.04% / -0.07% +0.07% -0.01%] index_select strided 3 : Elapsed 60.537 ms (1210.733 ms / 20) 1203.198 -> 1203.076 ( -0.01%) [ +0.16% +0.13% +0.00% / +0.13% -0.00% -0.01%] index_select strided 5 : Elapsed 60.256 ms (1205.126 ms / 20) 1211.168 -> 1211.658 ( +0.04%) [ +0.12% +0.06% +0.00% / +0.06% +0.04% +0.12%] index_select strided 7 : Elapsed 60.631 ms (1212.611 ms / 20) 1206.414 -> 1204.654 ( -0.15%) [ +0.09% +0.07% +0.00% / +0.11% -0.09% -0.15%] index_select strided 8 : Elapsed 60.375 ms (1207.497 ms / 20) 1207.297 -> 1205.149 ( -0.18%) [ +0.08% +0.00% +0.06% / +0.04% -0.09% -0.18%] index_select strided 16 : Elapsed 60.411 ms (1208.217 ms / 20) 1207.581 -> 1205.219 ( -0.20%) [ +0.01% +0.05% +0.00% / +0.02% -0.17% -0.20%] index_select random : Elapsed 60.386 ms (1207.729 ms / 20) 1171.744 -> 1171.963 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.16% +0.02%] index_select random_sorted : Elapsed 58.598 ms (1171.956 ms / 20) out_shape = [20, 40, 100, 256] in_shape = [20, 50, 100, 256] idx_dim = 1 B = [20, 40, 100, 256] (stride (100, 512000, 1, 2000)) A = [20, 50, 100, 256] (stride (1, 512000, 5120, 20)) dim = 1 521.029 -> 517.278 ( -0.72%) [ +0.04% +0.02% +0.00% / +0.31% -0.72% -0.65%] index_select const : Elapsed 26.063 ms (521.255 ms / 20) 522.476 -> 520.271 ( -0.42%) [ +0.00% +0.28% +0.18% / +0.27% -0.42% -0.38%] index_select wrap : Elapsed 26.124 ms (522.476 ms / 20) 522.519 -> 519.777 ( -0.52%) [ +0.22% +0.09% +0.00% / +0.18% -0.52% -0.31%] index_select linear : Elapsed 26.182 ms (523.645 ms / 20) 521.888 -> 521.379 ( -0.10%) [ +0.15% +0.14% +0.00% / +0.09% -0.10% -0.07%] index_select reverse : Elapsed 26.134 ms (522.671 ms / 20) 519.472 -> 517.646 ( -0.35%) [ +0.00% +0.09% +0.31% / +0.27% -0.15% -0.35%] index_select skip64 : Elapsed 25.974 ms (519.472 ms / 20) 516.473 -> 517.185 ( +0.14%) [ +0.29% +0.00% +0.28% / +0.14% +0.27% +0.32%] index_select skip256 : Elapsed 25.899 ms (517.988 ms / 20) 517.176 -> 516.930 ( -0.05%) [ +0.00% +0.07% +0.13% / -0.05% +1.00% +0.77%] index_select spread : Elapsed 25.859 ms (517.176 ms / 20) 524.774 -> 520.809 ( -0.76%) [ +0.00% +0.14% +0.19% / +0.15% -0.76% -0.64%] index_select strided 3 : Elapsed 26.239 ms (524.774 ms / 20) 521.895 -> 521.171 ( -0.14%) [ +0.17% +0.33% +0.00% / +0.22% -0.11% -0.14%] index_select strided 5 : Elapsed 26.139 ms (522.776 ms / 20) 522.823 -> 516.593 ( -1.19%) [ +0.00% +0.14% +0.12% / +0.08% -0.97% -1.19%] index_select strided 7 : Elapsed 26.141 ms (522.823 ms / 20) 524.960 -> 516.338 ( -1.64%) [ +0.21% +0.00% +0.22% / +0.16% -1.63% -1.64%] index_select strided 8 : Elapsed 26.303 ms (526.052 ms / 20) 525.066 -> 519.964 ( -0.97%) [ +0.15% +0.00% +0.06% / -0.13% -0.80% -0.97%] index_select strided 16 : Elapsed 26.293 ms (525.867 ms / 20) 522.643 -> 520.756 ( -0.36%) [ +0.04% +0.23% +0.00% / -0.08% -0.30% -0.36%] index_select random : Elapsed 26.142 ms (522.843 ms / 20) 521.919 -> 520.000 ( -0.37%) [ +0.00% +0.07% +0.35% / +0.15% -0.28% -0.37%] index_select random_sorted : Elapsed 26.096 ms (521.919 ms / 20) 523.703 -> 519.068 ( -0.89%) [ +0.12% +0.23% +0.00% / +0.08% -0.89% -0.77%] index_select perm : Elapsed 26.216 ms (524.318 ms / 20) 518.903 -> 518.802 ( -0.02%) [ +0.11% +0.17% +0.00% / -0.02% +0.23% +0.38%] index_select perm_sorted : Elapsed 25.974 ms (519.476 ms / 20) out_shape = [20, 50, 40, 256] in_shape = [20, 50, 100, 256] idx_dim = 2 out_shape = [20, 50, 100, 40] in_shape = [20, 50, 100, 256] idx_dim = 3 B = [20, 50, 100, 40] (stride (1, 20, 40000, 1000)) A = [20, 50, 100, 256] (stride (12800, 256, 256000, 1)) dim = 3 133.347 -> 133.046 ( -0.23%) [ +0.12% +0.08% +0.00% / +0.10% +0.03% -0.23%] index_select const : Elapsed 6.675 ms (133.502 ms / 20) 133.866 -> 133.474 ( -0.29%) [ +0.00% +0.10% +0.08% / +0.06% -0.29% -0.21%] index_select wrap : Elapsed 6.693 ms (133.866 ms / 20) 133.874 -> 133.497 ( -0.28%) [ +0.07% +0.00% +0.04% / -0.08% -0.09% -0.28%] index_select linear : Elapsed 6.698 ms (133.968 ms / 20) 133.665 -> 133.623 ( -0.03%) [ +0.40% +0.20% +0.00% / +0.15% +0.02% -0.03%] index_select reverse : Elapsed 6.710 ms (134.206 ms / 20) 133.331 -> 133.154 ( -0.13%) [ +0.09% +0.04% +0.00% / +0.26% -0.13% -0.01%] index_select skip64 : Elapsed 6.673 ms (133.452 ms / 20) 133.400 -> 133.101 ( -0.22%) [ +0.00% +0.20% +0.19% / -0.09% -0.22% -0.20%] index_select skip256 : Elapsed 6.670 ms (133.400 ms / 20) 135.024 -> 134.767 ( -0.19%) [ +0.00% +0.28% +0.02% / +0.25% -0.19% -0.19%] index_select spread : Elapsed 6.751 ms (135.024 ms / 20) 134.791 -> 134.336 ( -0.34%) [ +0.00% +0.01% +0.04% / -0.05% -0.34% -0.25%] index_select strided 3 : Elapsed 6.740 ms (134.791 ms / 20) 135.013 -> 134.528 ( -0.36%) [ +0.00% +0.13% +0.06% / -0.06% -0.36% -0.25%] index_select strided 5 : Elapsed 6.751 ms (135.013 ms / 20) 135.267 -> 134.908 ( -0.27%) [ +0.00% +0.05% +0.09% / +0.07% +0.02% -0.27%] index_select strided 7 : Elapsed 6.763 ms (135.267 ms / 20) 135.349 -> 134.986 ( -0.27%) [ +0.00% +0.00% +0.11% / +0.01% -0.27% -0.17%] index_select strided 8 : Elapsed 6.767 ms (135.349 ms / 20) 135.330 -> 134.895 ( -0.32%) [ +0.23% +0.03% +0.00% / +0.14% -0.32% -0.10%] index_select strided 16 : Elapsed 6.782 ms (135.647 ms / 20) 135.167 -> 134.914 ( -0.19%) [ +0.17% +0.00% +0.04% / +0.14% -0.19% -0.12%] index_select strided 64 : Elapsed 6.770 ms (135.402 ms / 20) 135.226 -> 135.192 ( -0.03%) [ +0.16% +0.19% +0.00% / +0.06% -0.03% -0.02%] index_select strided 100 : Elapsed 6.772 ms (135.443 ms / 20) 133.767 -> 133.632 ( -0.10%) [ +0.00% +0.14% +0.17% / +0.05% +0.05% -0.10%] index_select strided 255 : Elapsed 6.688 ms (133.767 ms / 20) 135.250 -> 135.058 ( -0.14%) [ +0.06% +0.00% +0.00% / -0.00% -0.14% -0.13%] index_select random : Elapsed 6.767 ms (135.334 ms / 20) 134.856 -> 134.523 ( -0.25%) [ +0.05% +0.13% +0.00% / +0.01% -0.21% -0.25%] index_select random_sorted : Elapsed 6.746 ms (134.925 ms / 20) 135.167 -> 134.978 ( -0.14%) [ +0.07% +0.11% +0.00% / +0.14% -0.07% -0.14%] index_select perm : Elapsed 6.763 ms (135.264 ms / 20) 134.871 -> 134.667 ( -0.15%) [ +0.00% +0.03% +0.12% / +0.15% -0.15% -0.12%] index_select perm_sorted : Elapsed 6.744 ms (134.871 ms / 20) out_shape = [40, 50, 256, 100] in_shape = [20, 50, 256, 100] idx_dim = 0 out_shape = [20, 40, 256, 100] in_shape = [20, 50, 256, 100] idx_dim = 1 out_shape = [20, 50, 40, 100] in_shape = [20, 50, 256, 100] idx_dim = 2 out_shape = [20, 50, 256, 40] in_shape = [20, 50, 256, 100] idx_dim = 3 B = [20, 50, 256, 40] (stride (1, 5120, 20, 256000)) A = [20, 50, 256, 100] (stride (1, 20, 100000, 1000)) dim = 3 257.012 -> 249.622 ( -2.88%) [ +0.00% +0.06% +0.14% / +0.19% -2.88% -2.86%] index_select const : Elapsed 12.851 ms (257.012 ms / 20) 273.858 -> 274.432 ( +0.21%) [ +0.23% +0.00% +0.06% / +0.21% +1.13% +1.30%] index_select wrap : Elapsed 13.724 ms (274.478 ms / 20) 273.985 -> 274.359 ( +0.14%) [ +0.18% +0.00% +0.02% / +0.14% +1.23% +0.95%] index_select linear : Elapsed 13.724 ms (274.475 ms / 20) 272.611 -> 272.471 ( -0.05%) [ +0.24% +0.00% +0.12% / -0.05% +2.54% +2.65%] index_select reverse : Elapsed 13.664 ms (273.278 ms / 20) 257.192 -> 249.726 ( -2.90%) [ +0.27% +0.00% +0.19% / +0.20% -2.85% -2.90%] index_select skip64 : Elapsed 12.895 ms (257.897 ms / 20) 257.020 -> 249.741 ( -2.83%) [ +0.16% +0.00% +0.14% / +0.39% -2.83% -2.73%] index_select skip256 : Elapsed 12.871 ms (257.429 ms / 20) 277.303 -> 277.441 ( +0.05%) [ +0.26% +0.00% +0.30% / +0.05% +0.82% +0.73%] index_select spread : Elapsed 13.901 ms (278.018 ms / 20) 272.792 -> 272.846 ( +0.02%) [ +0.00% +0.06% +0.00% / +0.02% +1.91% +1.95%] index_select strided 3 : Elapsed 13.640 ms (272.797 ms / 20) 284.077 -> 283.985 ( -0.03%) [ +0.21% +0.10% +0.00% / +0.19% -0.03% +0.14%] index_select strided 5 : Elapsed 14.234 ms (284.680 ms / 20) 278.440 -> 278.420 ( -0.01%) [ +0.14% +0.35% +0.00% / +0.19% -0.01% +0.20%] index_select strided 7 : Elapsed 13.942 ms (278.835 ms / 20) 288.698 -> 285.434 ( -1.13%) [ +0.00% +0.03% +0.08% / -0.05% -1.13% -0.90%] index_select strided 8 : Elapsed 14.435 ms (288.698 ms / 20) 287.287 -> 280.768 ( -2.27%) [ +0.01% +0.10% +0.00% / -0.02% -2.27% -2.20%] index_select strided 16 : Elapsed 14.366 ms (287.328 ms / 20) 283.460 -> 279.976 ( -1.23%) [ +0.04% +0.19% +0.00% / +0.34% -1.23% -1.17%] index_select strided 64 : Elapsed 14.179 ms (283.585 ms / 20) 277.153 -> 277.073 ( -0.03%) [ +0.22% +0.10% +0.00% / +0.20% -0.03% +0.20%] index_select random : Elapsed 13.889 ms (277.771 ms / 20) 269.962 -> 270.032 ( +0.03%) [ +0.50% +0.00% +0.34% / +0.03% +0.10% +0.12%] index_select random_sorted : Elapsed 13.566 ms (271.316 ms / 20) 277.790 -> 277.488 ( -0.11%) [ +0.01% +0.02% +0.00% / -0.11% +0.45% +0.45%] index_select perm : Elapsed 13.891 ms (277.825 ms / 20) 277.905 -> 276.229 ( -0.60%) [ +0.10% +0.00% +0.16% / +0.38% -0.32% -0.60%] index_select perm_sorted : Elapsed 13.909 ms (278.187 ms / 20) out_shape = [40, 100, 50, 256] in_shape = [20, 100, 50, 256] idx_dim = 0 B = [40, 100, 50, 256] (stride (1, 512000, 10240, 40)) A = [20, 100, 50, 256] (stride (1280000, 256, 25600, 1)) dim = 0 622.598 -> 621.824 ( -0.12%) [ +0.05% +0.00% +0.06% / -0.02% -0.12% -0.04%] index_add_ linear : Elapsed 31.145 ms (622.891 ms / 20) 567.033 -> 566.985 ( -0.01%) [ +0.10% +0.04% +0.00% / +0.02% -0.01% +0.01%] index_copy_ linear : Elapsed 28.380 ms (567.606 ms / 20) 622.135 -> 622.456 ( +0.05%) [ +0.08% +0.09% +0.00% / +0.05% +0.13% +0.05%] index_add_ reverse : Elapsed 31.130 ms (622.605 ms / 20) 566.884 -> 566.962 ( +0.01%) [ +0.08% +0.04% +0.00% / +0.01% +0.12% +0.06%] index_copy_ reverse : Elapsed 28.368 ms (567.352 ms / 20) 622.604 -> 622.424 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.01% -0.03% -0.00%] index_add_ spread : Elapsed 31.139 ms (622.788 ms / 20) 567.198 -> 567.227 ( +0.01%) [ +0.00% +0.06% +0.05% / +0.05% +0.10% +0.01%] index_copy_ spread : Elapsed 28.360 ms (567.198 ms / 20) 621.003 -> 621.866 ( +0.14%) [ +0.11% +0.07% +0.00% / +0.14% +0.27% +0.23%] index_add_ strided 3 : Elapsed 31.085 ms (621.700 ms / 20) 566.891 -> 567.240 ( +0.06%) [ +0.01% +0.01% +0.00% / +0.07% +0.07% +0.06%] index_copy_ strided 3 : Elapsed 28.349 ms (566.973 ms / 20) 621.330 -> 621.531 ( +0.03%) [ +0.09% +0.07% +0.00% / +0.03% +0.17% +0.24%] index_add_ strided 7 : Elapsed 31.095 ms (621.894 ms / 20) 566.827 -> 566.882 ( +0.01%) [ +0.03% +0.07% +0.00% / +0.08% +0.03% +0.01%] index_copy_ strided 7 : Elapsed 28.349 ms (566.978 ms / 20) 619.982 -> 619.994 ( +0.00%) [ +0.11% +0.00% +0.04% / +0.00% +0.45% +0.41%] index_add_ perm : Elapsed 31.035 ms (620.693 ms / 20) 566.860 -> 567.183 ( +0.06%) [ +0.03% +0.00% +0.02% / +0.06% +0.10% +0.06%] index_copy_ perm : Elapsed 28.352 ms (567.046 ms / 20) 620.131 -> 620.797 ( +0.11%) [ +0.11% +0.00% +0.13% / +0.11% +0.44% +0.35%] index_add_ perm_sorted : Elapsed 31.041 ms (620.825 ms / 20) 566.783 -> 566.841 ( +0.01%) [ +0.01% +0.04% +0.00% / +0.01% +0.09% +0.07%] index_copy_ perm_sorted : Elapsed 28.343 ms (566.858 ms / 20) 1134.043 -> 1134.063 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.05% +0.01% +0.00%] index_select const : Elapsed 56.729 ms (1134.583 ms / 20) 1134.171 -> 1134.490 ( +0.03%) [ +0.00% +0.05% +0.09% / +0.08% +0.04% +0.03%] index_select wrap : Elapsed 56.709 ms (1134.171 ms / 20) 1134.310 -> 1134.197 ( -0.01%) [ +0.00% +0.02% +0.02% / -0.01% -0.01% +0.03%] index_select linear : Elapsed 56.715 ms (1134.310 ms / 20) 1134.134 -> 1134.115 ( -0.00%) [ +0.00% +0.07% +0.07% / -0.00% +0.06% +0.06%] index_select reverse : Elapsed 56.707 ms (1134.134 ms / 20) 1134.477 -> 1133.798 ( -0.06%) [ +0.01% +0.00% +0.03% / +0.01% -0.06% -0.04%] index_select skip64 : Elapsed 56.728 ms (1134.569 ms / 20) 1133.506 -> 1133.885 ( +0.03%) [ +0.01% +0.01% +0.00% / +0.03% +0.04% +0.03%] index_select skip256 : Elapsed 56.682 ms (1133.631 ms / 20) 1134.350 -> 1134.430 ( +0.01%) [ +0.01% +0.00% +0.07% / +0.04% +0.01% +0.01%] index_select spread : Elapsed 56.722 ms (1134.449 ms / 20) 1134.315 -> 1134.732 ( +0.04%) [ +0.03% +0.02% +0.00% / +0.05% +0.04% +0.04%] index_select strided 3 : Elapsed 56.731 ms (1134.630 ms / 20) 1134.272 -> 1134.178 ( -0.01%) [ +0.00% +0.06% +0.07% / +0.07% -0.01% +0.01%] index_select strided 5 : Elapsed 56.714 ms (1134.272 ms / 20) 1134.920 -> 1133.983 ( -0.08%) [ +0.02% +0.02% +0.00% / -0.02% -0.04% -0.08%] index_select strided 7 : Elapsed 56.756 ms (1135.124 ms / 20) 1134.344 -> 1134.439 ( +0.01%) [ +0.02% +0.00% +0.05% / +0.06% +0.01% +0.01%] index_select strided 8 : Elapsed 56.728 ms (1134.553 ms / 20) 1134.265 -> 1134.111 ( -0.01%) [ +0.05% +0.03% +0.00% / +0.08% +0.06% -0.01%] index_select strided 16 : Elapsed 56.744 ms (1134.875 ms / 20) 1134.627 -> 1134.454 ( -0.02%) [ +0.03% +0.00% +0.01% / +0.02% -0.02% +0.04%] index_select random : Elapsed 56.747 ms (1134.937 ms / 20) 1134.294 -> 1134.378 ( +0.01%) [ +0.07% +0.00% +0.03% / +0.02% +0.08% +0.01%] index_select random_sorted : Elapsed 56.754 ms (1135.071 ms / 20) B = [40, 100, 50, 256] (stride (1, 10240, 1024000, 40)) A = [20, 100, 50, 256] (stride (5000, 50, 1, 100000)) dim = 0 853.441 -> 853.001 ( -0.05%) [ +0.05% +0.00% +0.04% / -0.05% +0.34% +0.37%] index_add_ linear : Elapsed 42.691 ms (853.828 ms / 20) 890.264 -> 889.551 ( -0.08%) [ +0.09% +0.00% +0.06% / -0.08% +0.19% +0.21%] index_copy_ linear : Elapsed 44.555 ms (891.096 ms / 20) 852.549 -> 852.079 ( -0.06%) [ +0.23% +0.00% +0.08% / -0.06% +0.51% +0.43%] index_add_ reverse : Elapsed 42.725 ms (854.493 ms / 20) 890.775 -> 890.342 ( -0.05%) [ +0.00% +0.04% +0.04% / -0.05% +0.34% +0.27%] index_copy_ reverse : Elapsed 44.539 ms (890.775 ms / 20) 852.648 -> 853.748 ( +0.13%) [ +0.06% +0.28% +0.00% / +0.13% +0.51% +0.53%] index_add_ spread : Elapsed 42.658 ms (853.162 ms / 20) 890.509 -> 890.727 ( +0.02%) [ +0.16% +0.03% +0.00% / +0.02% +0.32% +0.30%] index_copy_ spread : Elapsed 44.598 ms (891.966 ms / 20) 853.354 -> 852.875 ( -0.06%) [ +0.01% +0.05% +0.00% / -0.06% +0.35% +0.31%] index_add_ strided 3 : Elapsed 42.674 ms (853.477 ms / 20) 889.594 -> 889.106 ( -0.05%) [ +0.00% +0.00% +0.04% / -0.05% +0.52% +0.47%] index_copy_ strided 3 : Elapsed 44.481 ms (889.617 ms / 20) 851.461 -> 853.858 ( +0.28%) [ +0.21% +0.00% +0.23% / +0.28% +0.53% +0.51%] index_add_ strided 7 : Elapsed 42.662 ms (853.230 ms / 20) 888.456 -> 888.728 ( +0.03%) [ +0.21% +0.14% +0.00% / +0.03% +0.53% +0.51%] index_copy_ strided 7 : Elapsed 44.517 ms (890.340 ms / 20) 852.886 -> 851.320 ( -0.18%) [ +0.00% +0.04% +0.11% / -0.18% +0.39% +0.29%] index_add_ perm : Elapsed 42.644 ms (852.886 ms / 20) 888.867 -> 888.605 ( -0.03%) [ +0.00% +0.04% +0.07% / -0.03% +0.38% +0.51%] index_copy_ perm : Elapsed 44.443 ms (888.867 ms / 20) 852.718 -> 854.159 ( +0.17%) [ +0.17% +0.00% +0.04% / +0.17% +0.51% +0.34%] index_add_ perm_sorted : Elapsed 42.710 ms (854.201 ms / 20) 889.309 -> 889.470 ( +0.02%) [ +0.00% +0.01% +0.02% / +0.02% +0.52% +0.39%] index_copy_ perm_sorted : Elapsed 44.465 ms (889.309 ms / 20) 1806.494 -> 1797.905 ( -0.48%) [ +0.06% +0.04% +0.00% / -0.03% -0.44% -0.48%] index_select const : Elapsed 90.376 ms (1807.511 ms / 20) 1816.328 -> 1818.607 ( +0.13%) [ +0.03% +0.04% +0.00% / +0.13% +0.27% +0.21%] index_select wrap : Elapsed 90.847 ms (1816.936 ms / 20) 1804.709 -> 1804.840 ( +0.01%) [ +0.04% +0.00% +0.00% / +0.01% +0.56% +0.51%] index_select linear : Elapsed 90.271 ms (1805.416 ms / 20) 1815.141 -> 1815.755 ( +0.03%) [ +0.00% +0.01% +0.09% / +0.03% +0.62% +0.56%] index_select reverse : Elapsed 90.757 ms (1815.141 ms / 20) 1805.303 -> 1798.646 ( -0.37%) [ +0.02% +0.03% +0.00% / +0.04% -0.32% -0.37%] index_select skip64 : Elapsed 90.283 ms (1805.663 ms / 20) 1805.880 -> 1798.283 ( -0.42%) [ +0.02% +0.00% +0.03% / -0.06% -0.42% -0.42%] index_select skip256 : Elapsed 90.310 ms (1806.195 ms / 20) 1808.469 -> 1807.817 ( -0.04%) [ +0.00% +0.01% +0.03% / -0.04% +0.27% +0.26%] index_select spread : Elapsed 90.423 ms (1808.469 ms / 20) 1824.495 -> 1825.414 ( +0.05%) [ +0.07% +0.00% +0.02% / +0.05% +0.14% +0.15%] index_select strided 3 : Elapsed 91.288 ms (1825.758 ms / 20) 1824.686 -> 1820.117 ( -0.25%) [ +0.00% +0.02% +0.06% / +0.06% -0.10% -0.25%] index_select strided 5 : Elapsed 91.234 ms (1824.686 ms / 20) 1824.005 -> 1822.253 ( -0.10%) [ +0.03% +0.04% +0.00% / -0.03% -0.05% -0.10%] index_select strided 7 : Elapsed 91.225 ms (1824.504 ms / 20) 1827.661 -> 1827.244 ( -0.02%) [ +0.00% +0.03% +0.01% / +0.06% -0.02% -0.02%] index_select strided 8 : Elapsed 91.383 ms (1827.661 ms / 20) 1825.576 -> 1822.789 ( -0.15%) [ +0.07% +0.02% +0.00% / +0.04% -0.06% -0.15%] index_select strided 16 : Elapsed 91.341 ms (1826.820 ms / 20) 1825.052 -> 1823.027 ( -0.11%) [ +0.00% +0.09% +0.08% / +0.10% -0.11% -0.07%] index_select random : Elapsed 91.253 ms (1825.052 ms / 20) 1807.851 -> 1808.077 ( +0.01%) [ +0.04% +0.05% +0.00% / +0.01% +0.41% +0.29%] index_select random_sorted : Elapsed 90.430 ms (1808.598 ms / 20) out_shape = [20, 40, 50, 256] in_shape = [20, 100, 50, 256] idx_dim = 1 out_shape = [20, 100, 40, 256] in_shape = [20, 100, 50, 256] idx_dim = 2 B = [20, 100, 40, 256] (stride (1, 5120, 512000, 20)) A = [20, 100, 50, 256] (stride (25600, 1, 512000, 100)) dim = 2 467.456 -> 468.083 ( +0.13%) [ +0.11% +0.00% +0.04% / +0.13% +0.73% +0.64%] index_select const : Elapsed 23.399 ms (467.975 ms / 20) 489.266 -> 489.304 ( +0.01%) [ +0.04% +0.05% +0.00% / +0.01% +0.07% +0.15%] index_select wrap : Elapsed 24.473 ms (489.451 ms / 20) 489.348 -> 489.362 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.10% +0.13%] index_select linear : Elapsed 24.480 ms (489.596 ms / 20) 488.864 -> 489.372 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.25% +0.19%] index_select reverse : Elapsed 24.469 ms (489.371 ms / 20) 467.108 -> 467.558 ( +0.10%) [ +0.14% +0.00% +0.14% / +0.10% +0.82% +0.79%] index_select skip64 : Elapsed 23.388 ms (467.751 ms / 20) 467.871 -> 467.807 ( -0.01%) [ +0.02% +0.06% +0.00% / -0.01% +0.69% +0.61%] index_select skip256 : Elapsed 23.399 ms (467.982 ms / 20) 489.882 -> 489.481 ( -0.08%) [ +0.01% +0.01% +0.00% / -0.08% -0.03% -0.08%] index_select spread : Elapsed 24.497 ms (489.935 ms / 20) 490.211 -> 489.951 ( -0.05%) [ +0.00% +0.01% +0.01% / +0.04% -0.05% -0.03%] index_select strided 3 : Elapsed 24.511 ms (490.211 ms / 20) 489.488 -> 489.360 ( -0.03%) [ +0.00% +0.03% +0.05% / -0.03% +0.11% +0.11%] index_select strided 5 : Elapsed 24.474 ms (489.488 ms / 20) 489.668 -> 489.638 ( -0.01%) [ +0.02% +0.11% +0.00% / -0.01% +0.08% +0.00%] index_select strided 7 : Elapsed 24.488 ms (489.769 ms / 20) 489.502 -> 489.522 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.14% +0.10% +0.00%] index_select strided 8 : Elapsed 24.487 ms (489.735 ms / 20) 489.432 -> 489.045 ( -0.08%) [ +0.08% +0.00% +0.11% / +0.11% +0.06% -0.08%] index_select strided 16 : Elapsed 24.492 ms (489.845 ms / 20) 488.773 -> 489.169 ( +0.08%) [ +0.03% +0.04% +0.00% / +0.08% +0.08% +0.19%] index_select random : Elapsed 24.445 ms (488.907 ms / 20) 486.526 -> 486.940 ( +0.09%) [ +0.08% +0.12% +0.00% / +0.09% +0.18% +0.15%] index_select random_sorted : Elapsed 24.345 ms (486.896 ms / 20) 489.127 -> 489.378 ( +0.05%) [ +0.01% +0.00% +0.08% / +0.05% +0.09% +0.16%] index_select perm : Elapsed 24.458 ms (489.158 ms / 20) 489.844 -> 489.073 ( -0.16%) [ +0.01% +0.04% +0.00% / -0.02% -0.02% -0.16%] index_select perm_sorted : Elapsed 24.494 ms (489.886 ms / 20) out_shape = [20, 100, 50, 40] in_shape = [20, 100, 50, 256] idx_dim = 3 B = [20, 100, 50, 40] (stride (40, 800, 80000, 1)) A = [20, 100, 50, 256] (stride (1, 1000, 20, 100000)) dim = 3 118.409 -> 118.326 ( -0.07%) [ +0.14% +0.00% +0.03% / +0.02% -0.07% +0.02%] index_select const : Elapsed 5.928 ms (118.569 ms / 20) 121.986 -> 122.042 ( +0.05%) [ +0.03% +0.00% +0.11% / +0.15% +0.13% +0.05%] index_select wrap : Elapsed 6.101 ms (122.024 ms / 20) 122.023 -> 121.922 ( -0.08%) [ +0.07% +0.00% +0.00% / -0.03% +0.11% -0.08%] index_select linear : Elapsed 6.106 ms (122.110 ms / 20) 121.965 -> 121.937 ( -0.02%) [ +0.00% +0.07% +0.13% / +0.25% -0.02% +0.10%] index_select reverse : Elapsed 6.098 ms (121.965 ms / 20) 118.598 -> 118.391 ( -0.17%) [ +0.01% +0.00% +0.04% / -0.07% -0.17% -0.17%] index_select skip64 : Elapsed 5.931 ms (118.615 ms / 20) 118.372 -> 118.186 ( -0.16%) [ +0.00% +0.11% +0.00% / +0.08% -0.16% +0.03%] index_select skip256 : Elapsed 5.919 ms (118.372 ms / 20) 121.780 -> 121.989 ( +0.17%) [ +0.12% +0.00% +0.12% / +0.17% +0.26% +0.27%] index_select spread : Elapsed 6.096 ms (121.928 ms / 20) 122.013 -> 121.935 ( -0.06%) [ +0.15% +0.00% +0.16% / +0.07% +0.07% -0.06%] index_select strided 3 : Elapsed 6.109 ms (122.190 ms / 20) 121.960 -> 121.978 ( +0.01%) [ +0.22% +0.23% +0.00% / +0.15% +0.16% +0.01%] index_select strided 5 : Elapsed 6.111 ms (122.229 ms / 20) 122.060 -> 121.921 ( -0.11%) [ +0.07% +0.03% +0.00% / +0.08% -0.11% +0.02%] index_select strided 7 : Elapsed 6.107 ms (122.148 ms / 20) 122.061 -> 121.888 ( -0.14%) [ +0.07% +0.04% +0.00% / +0.01% -0.09% -0.14%] index_select strided 8 : Elapsed 6.108 ms (122.151 ms / 20) 122.087 -> 122.058 ( -0.02%) [ +0.05% +0.00% +0.07% / +0.07% -0.02% +0.13%] index_select strided 16 : Elapsed 6.107 ms (122.145 ms / 20) 121.911 -> 122.007 ( +0.08%) [ +0.06% +0.00% +0.09% / +0.08% +0.18% +0.14%] index_select strided 64 : Elapsed 6.099 ms (121.986 ms / 20) 122.115 -> 122.020 ( -0.08%) [ +0.00% +0.02% +0.10% / +0.10% -0.08% +0.11%] index_select strided 100 : Elapsed 6.106 ms (122.115 ms / 20) 122.008 -> 121.911 ( -0.08%) [ +0.03% +0.00% +0.12% / +0.08% +0.00% -0.08%] index_select strided 255 : Elapsed 6.102 ms (122.043 ms / 20) 121.932 -> 121.964 ( +0.03%) [ +0.10% +0.04% +0.00% / +0.09% +0.03% +0.04%] index_select random : Elapsed 6.103 ms (122.052 ms / 20) 121.637 -> 121.562 ( -0.06%) [ +0.00% +0.06% +0.09% / -0.06% +0.02% +0.11%] index_select random_sorted : Elapsed 6.082 ms (121.637 ms / 20) 122.081 -> 121.973 ( -0.09%) [ +0.05% +0.09% +0.00% / +0.02% -0.09% +0.09%] index_select perm : Elapsed 6.107 ms (122.146 ms / 20) 122.066 -> 122.030 ( -0.03%) [ +0.00% +0.14% +0.06% / +0.07% -0.03% -0.02%] index_select perm_sorted : Elapsed 6.103 ms (122.066 ms / 20) out_shape = [40, 100, 256, 50] in_shape = [20, 100, 256, 50] idx_dim = 0 out_shape = [20, 40, 256, 50] in_shape = [20, 100, 256, 50] idx_dim = 1 out_shape = [20, 100, 40, 50] in_shape = [20, 100, 256, 50] idx_dim = 2 B = [20, 100, 40, 50] (stride (1, 1000, 100000, 20)) dim = 2 fill_cnt = 256 52.049 -> 51.936 ( -0.22%) [ +0.58% +0.11% +0.00% / -0.22% +0.48% +0.44%] index_fill_ const : Elapsed 2.618 ms (52.350 ms / 20) 68.841 -> 68.881 ( +0.06%) [ +0.00% +0.32% +0.15% / +0.06% +0.19% +0.81%] index_fill_ linear : Elapsed 3.442 ms (68.841 ms / 20) 72.631 -> 72.516 ( -0.16%) [ +0.36% +0.00% +0.38% / -0.16% +0.57% +0.36%] index_fill_ reverse : Elapsed 3.645 ms (72.893 ms / 20) 52.008 -> 51.928 ( -0.15%) [ +0.65% +0.13% +0.00% / -0.15% +0.51% +0.45%] index_fill_ skip64 : Elapsed 2.617 ms (52.347 ms / 20) 52.073 -> 51.990 ( -0.16%) [ +0.63% +0.14% +0.00% / -0.16% +0.43% +0.39%] index_fill_ skip256 : Elapsed 2.620 ms (52.399 ms / 20) 78.564 -> 77.174 ( -1.77%) [ +0.71% +0.00% +1.15% / -1.77% -0.36% -0.38%] index_fill_ spread : Elapsed 3.956 ms (79.125 ms / 20) 235.425 -> 234.970 ( -0.19%) [ +0.12% +0.07% +0.00% / -0.19% -0.13% -0.03%] index_fill_ strided 3 : Elapsed 11.786 ms (235.717 ms / 20) 175.909 -> 176.130 ( +0.13%) [ +0.00% +0.10% +0.24% / +0.13% +3.84% +3.57%] index_fill_ strided 5 : Elapsed 8.795 ms (175.909 ms / 20) 237.498 -> 236.620 ( -0.37%) [ +0.03% +0.12% +0.00% / -0.36% -0.37% +0.01%] index_fill_ strided 7 : Elapsed 11.878 ms (237.567 ms / 20) 136.576 -> 135.898 ( -0.50%) [ +0.47% +0.19% +0.00% / +0.13% -0.50% -0.35%] index_fill_ strided 8 : Elapsed 6.861 ms (137.214 ms / 20) 134.960 -> 134.539 ( -0.31%) [ +0.38% +0.00% +0.36% / +0.56% +0.11% -0.31%] index_fill_ strided 16 : Elapsed 6.773 ms (135.469 ms / 20) 222.353 -> 218.744 ( -1.62%) [ +0.22% +0.00% +0.09% / -0.09% -1.62% -1.36%] index_fill_ random : Elapsed 11.142 ms (222.847 ms / 20) 78.786 -> 78.171 ( -0.78%) [ +0.64% +0.83% +0.00% / -0.78% +0.62% -0.68%] index_fill_ random_sorted : Elapsed 3.964 ms (79.287 ms / 20) out_shape = [20, 100, 256, 40] in_shape = [20, 100, 256, 50] idx_dim = 3 out_shape = [40, 256, 50, 100] in_shape = [20, 256, 50, 100] idx_dim = 0 B = [40, 256, 50, 100] (stride (1280000, 5000, 1, 50)) A = [20, 256, 50, 100] (stride (100, 100000, 2000, 1)) dim = 0 638.615 -> 638.125 ( -0.08%) [ +0.26% +0.25% +0.00% / +0.08% +0.13% -0.08%] index_add_ linear : Elapsed 32.012 ms (640.244 ms / 20) 570.286 -> 568.535 ( -0.31%) [ +0.00% +0.09% +0.02% / +0.00% -0.18% -0.31%] index_copy_ linear : Elapsed 28.514 ms (570.286 ms / 20) 639.639 -> 639.400 ( -0.04%) [ +0.01% +0.00% +0.15% / -0.04% +0.26% +0.24%] index_add_ reverse : Elapsed 31.985 ms (639.700 ms / 20) 570.390 -> 570.078 ( -0.05%) [ +0.02% +0.00% +0.04% / -0.02% -0.05% +0.01%] index_copy_ reverse : Elapsed 28.524 ms (570.481 ms / 20) 638.941 -> 638.382 ( -0.09%) [ +0.18% +0.17% +0.00% / -0.06% +0.18% -0.09%] index_add_ spread : Elapsed 32.005 ms (640.090 ms / 20) 569.971 -> 569.627 ( -0.06%) [ +0.12% +0.00% +0.09% / +0.05% +0.11% -0.06%] index_copy_ spread : Elapsed 28.532 ms (570.646 ms / 20) 638.378 -> 638.119 ( -0.04%) [ +0.00% +0.01% +0.07% / -0.04% +0.22% +0.12%] index_add_ strided 3 : Elapsed 31.919 ms (638.378 ms / 20) 569.650 -> 569.674 ( +0.00%) [ +0.00% +0.12% +0.01% / +0.08% +0.02% +0.00%] index_copy_ strided 3 : Elapsed 28.483 ms (569.650 ms / 20) 636.720 -> 636.466 ( -0.04%) [ +0.20% +0.00% +0.01% / -0.04% +0.55% +0.71%] index_add_ strided 7 : Elapsed 31.901 ms (638.018 ms / 20) 569.510 -> 568.948 ( -0.10%) [ +0.01% +0.00% +0.07% / -0.10% -0.06% +0.13%] index_copy_ strided 7 : Elapsed 28.480 ms (569.591 ms / 20) 633.621 -> 633.873 ( +0.04%) [ +0.00% +0.21% +0.19% / +0.04% +1.05% +1.07%] index_add_ perm : Elapsed 31.681 ms (633.621 ms / 20) 566.522 -> 566.828 ( +0.05%) [ +0.12% +0.00% +0.08% / +0.05% +0.56% +0.56%] index_copy_ perm : Elapsed 28.361 ms (567.213 ms / 20) 633.227 -> 634.161 ( +0.15%) [ +0.00% +0.07% +0.10% / +0.15% +1.09% +0.81%] index_add_ perm_sorted : Elapsed 31.661 ms (633.227 ms / 20) 566.377 -> 566.694 ( +0.06%) [ +0.21% +0.07% +0.00% / +0.06% +0.71% +0.59%] index_copy_ perm_sorted : Elapsed 28.377 ms (567.539 ms / 20) 1205.706 -> 1204.117 ( -0.13%) [ +0.11% +0.09% +0.00% / +0.01% -0.05% -0.13%] index_select const : Elapsed 60.350 ms (1207.007 ms / 20) 1206.410 -> 1206.153 ( -0.02%) [ +0.05% +0.02% +0.00% / +0.02% -0.02% +0.02%] index_select wrap : Elapsed 60.348 ms (1206.959 ms / 20) 1206.576 -> 1205.956 ( -0.05%) [ +0.02% +0.00% +0.03% / +0.02% -0.04% -0.05%] index_select linear : Elapsed 60.340 ms (1206.797 ms / 20) 1205.521 -> 1205.787 ( +0.02%) [ +0.00% +0.01% +0.05% / +0.02% +0.09% +0.11%] index_select reverse : Elapsed 60.276 ms (1205.521 ms / 20) 1204.367 -> 1203.969 ( -0.03%) [ +0.00% +0.05% +0.09% / -0.03% +0.14% +0.19%] index_select skip64 : Elapsed 60.218 ms (1204.367 ms / 20) 1199.598 -> 1200.766 ( +0.10%) [ +0.09% +0.00% +0.11% / +0.10% +0.57% +0.58%] index_select skip256 : Elapsed 60.034 ms (1200.674 ms / 20) 1206.056 -> 1205.596 ( -0.04%) [ +0.00% +0.03% +0.01% / +0.04% -0.02% -0.04%] index_select spread : Elapsed 60.303 ms (1206.056 ms / 20) 1205.861 -> 1205.814 ( -0.00%) [ +0.06% +0.00% +0.00% / +0.07% -0.00% +0.00%] index_select strided 3 : Elapsed 60.330 ms (1206.595 ms / 20) 1206.107 -> 1200.434 ( -0.47%) [ +0.00% +0.00% +0.10% / +0.07% -0.47% -0.36%] index_select strided 5 : Elapsed 60.305 ms (1206.107 ms / 20) 1205.623 -> 1201.208 ( -0.37%) [ +0.03% +0.04% +0.00% / +0.08% -0.36% -0.37%] index_select strided 7 : Elapsed 60.300 ms (1205.992 ms / 20) 1205.903 -> 1206.446 ( +0.05%) [ +0.04% +0.00% +0.12% / +0.09% +0.05% +0.05%] index_select strided 8 : Elapsed 60.320 ms (1206.391 ms / 20) 1206.179 -> 1205.953 ( -0.02%) [ +0.04% +0.00% +0.00% / -0.01% -0.02% +0.03%] index_select strided 16 : Elapsed 60.330 ms (1206.608 ms / 20) 1204.352 -> 1205.544 ( +0.10%) [ +0.09% +0.00% +0.00% / +0.10% +0.21% +0.18%] index_select random : Elapsed 60.272 ms (1205.441 ms / 20) 1205.735 -> 1205.865 ( +0.01%) [ +0.12% +0.00% +0.09% / +0.07% +0.01% +0.01%] index_select random_sorted : Elapsed 60.357 ms (1207.147 ms / 20) out_shape = [20, 40, 50, 100] in_shape = [20, 256, 50, 100] idx_dim = 1 B = [20, 40, 50, 100] (stride (1, 100000, 20, 1000)) A = [20, 256, 50, 100] (stride (12800, 1, 256, 256000)) dim = 1 134.664 -> 134.322 ( -0.25%) [ +0.02% +0.11% +0.00% / -0.05% -0.25% -0.03%] index_select const : Elapsed 6.735 ms (134.695 ms / 20) 135.055 -> 134.968 ( -0.06%) [ +0.00% +0.01% +0.05% / -0.05% +0.10% -0.06%] index_select wrap : Elapsed 6.753 ms (135.055 ms / 20) 135.017 -> 134.964 ( -0.04%) [ +0.05% +0.00% +0.15% / -0.02% +0.10% -0.04%] index_select linear : Elapsed 6.754 ms (135.084 ms / 20) 134.874 -> 134.910 ( +0.03%) [ +0.00% +0.13% +0.16% / +0.24% +0.27% +0.03%] index_select reverse : Elapsed 6.744 ms (134.874 ms / 20) 134.504 -> 134.555 ( +0.04%) [ +0.00% +0.17% +0.12% / +0.07% +0.04% +0.09%] index_select skip64 : Elapsed 6.725 ms (134.504 ms / 20) 134.601 -> 134.495 ( -0.08%) [ +0.00% +0.09% +0.02% / +0.03% -0.08% +0.12%] index_select skip256 : Elapsed 6.730 ms (134.601 ms / 20) 136.334 -> 136.429 ( +0.07%) [ +0.00% +0.05% +0.11% / +0.07% +0.07% +0.12%] index_select spread : Elapsed 6.817 ms (136.334 ms / 20) 135.725 -> 135.789 ( +0.05%) [ +0.09% +0.17% +0.00% / +0.05% +0.05% +0.05%] index_select strided 3 : Elapsed 6.793 ms (135.853 ms / 20) 136.110 -> 136.071 ( -0.03%) [ +0.17% +0.10% +0.00% / -0.03% +0.07% +0.14%] index_select strided 5 : Elapsed 6.817 ms (136.344 ms / 20) 136.384 -> 136.528 ( +0.11%) [ +0.00% +0.02% +0.17% / +0.11% +0.12% +0.15%] index_select strided 7 : Elapsed 6.819 ms (136.384 ms / 20) 136.533 -> 136.619 ( +0.06%) [ +0.04% +0.00% +0.07% / +0.06% +0.07% +0.11%] index_select strided 8 : Elapsed 6.829 ms (136.588 ms / 20) 136.500 -> 136.489 ( -0.01%) [ +0.04% +0.00% +0.06% / -0.01% +0.03% +0.21%] index_select strided 16 : Elapsed 6.827 ms (136.549 ms / 20) 136.448 -> 136.537 ( +0.07%) [ +0.12% +0.00% +0.04% / +0.07% +0.08% +0.07%] index_select strided 64 : Elapsed 6.831 ms (136.615 ms / 20) 136.683 -> 136.551 ( -0.10%) [ +0.00% +0.03% +0.01% / -0.10% -0.01% +0.02%] index_select strided 100 : Elapsed 6.834 ms (136.683 ms / 20) 135.128 -> 135.022 ( -0.08%) [ +0.00% +0.01% +0.09% / -0.08% -0.06% -0.01%] index_select strided 255 : Elapsed 6.756 ms (135.128 ms / 20) 136.534 -> 136.568 ( +0.02%) [ +0.11% +0.10% +0.00% / +0.07% +0.02% +0.08%] index_select random : Elapsed 6.834 ms (136.690 ms / 20) 136.064 -> 136.133 ( +0.05%) [ +0.09% +0.04% +0.00% / +0.06% +0.05% +0.08%] index_select random_sorted : Elapsed 6.809 ms (136.183 ms / 20) 136.592 -> 136.576 ( -0.01%) [ +0.11% +0.00% +0.02% / -0.01% -0.01% -0.01%] index_select perm : Elapsed 6.837 ms (136.745 ms / 20) 136.082 -> 136.050 ( -0.02%) [ +0.01% +0.02% +0.00% / -0.02% -0.02% -0.00%] index_select perm_sorted : Elapsed 6.805 ms (136.096 ms / 20) B = [20, 40, 50, 100] (stride (1, 20, 800, 40000)) A = [20, 256, 50, 100] (stride (25600, 100, 512000, 1)) dim = 1 79.104 -> 79.471 ( +0.46%) [ +0.23% +0.06% +0.00% / +0.46% +2.03% +1.92%] index_select const : Elapsed 3.964 ms (79.289 ms / 20) 79.700 -> 79.879 ( +0.22%) [ +0.20% +0.03% +0.00% / +0.22% +1.06% +0.40%] index_select wrap : Elapsed 3.993 ms (79.860 ms / 20) 79.667 -> 79.544 ( -0.15%) [ +0.24% +0.11% +0.00% / -0.15% +0.72% +0.86%] index_select linear : Elapsed 3.993 ms (79.855 ms / 20) 78.256 -> 78.623 ( +0.47%) [ +0.00% +0.23% +0.25% / +0.47% +2.87% +2.64%] index_select reverse : Elapsed 3.913 ms (78.256 ms / 20) 78.851 -> 78.806 ( -0.06%) [ +0.27% +0.02% +0.00% / -0.06% +3.02% +3.02%] index_select skip64 : Elapsed 3.953 ms (79.065 ms / 20) 79.113 -> 79.284 ( +0.22%) [ +0.01% +0.43% +0.00% / +0.22% +2.27% +2.75%] index_select skip256 : Elapsed 3.956 ms (79.124 ms / 20) 78.511 -> 78.448 ( -0.08%) [ +0.54% +0.00% +0.77% / -0.08% +1.77% +2.13%] index_select spread : Elapsed 3.947 ms (78.933 ms / 20) 79.045 -> 78.933 ( -0.14%) [ +0.00% +0.04% +0.36% / -0.14% +1.25% +1.71%] index_select strided 3 : Elapsed 3.952 ms (79.045 ms / 20) 79.247 -> 79.253 ( +0.01%) [ +0.00% +0.21% +0.13% / +0.01% +1.47% +1.23%] index_select strided 5 : Elapsed 3.962 ms (79.247 ms / 20) 78.699 -> 78.637 ( -0.08%) [ +0.00% +0.20% +0.22% / -0.08% +2.36% +1.91%] index_select strided 7 : Elapsed 3.935 ms (78.699 ms / 20) 79.186 -> 79.311 ( +0.16%) [ +0.34% +0.33% +0.00% / +0.16% +0.77% +0.76%] index_select strided 8 : Elapsed 3.973 ms (79.456 ms / 20) 78.377 -> 78.896 ( +0.66%) [ +0.00% +0.46% +0.97% / +0.66% +2.68% +2.04%] index_select strided 16 : Elapsed 3.919 ms (78.377 ms / 20) 78.768 -> 78.715 ( -0.07%) [ +0.15% +0.57% +0.00% / -0.07% +2.52% +2.23%] index_select strided 64 : Elapsed 3.944 ms (78.887 ms / 20) 78.976 -> 79.027 ( +0.06%) [ +0.01% +0.09% +0.00% / +0.06% +2.09% +1.92%] index_select strided 100 : Elapsed 3.949 ms (78.984 ms / 20) 78.592 -> 78.905 ( +0.40%) [ +0.00% +0.46% +0.22% / +0.40% +2.54% +2.81%] index_select strided 255 : Elapsed 3.930 ms (78.592 ms / 20) 77.525 -> 77.819 ( +0.38%) [ +0.53% +0.00% +0.49% / +0.38% +3.97% +3.75%] index_select random : Elapsed 3.897 ms (77.933 ms / 20) 78.552 -> 78.766 ( +0.27%) [ +0.33% +0.14% +0.00% / +0.27% +2.42% +2.68%] index_select random_sorted : Elapsed 3.941 ms (78.812 ms / 20) 78.800 -> 79.016 ( +0.27%) [ +0.58% +0.76% +0.00% / +0.27% +1.52% +1.64%] index_select perm : Elapsed 3.963 ms (79.257 ms / 20) 79.594 -> 79.381 ( -0.27%) [ +0.00% +0.02% +0.02% / -0.27% +0.71% +0.87%] index_select perm_sorted : Elapsed 3.980 ms (79.594 ms / 20) out_shape = [20, 256, 40, 100] in_shape = [20, 256, 50, 100] idx_dim = 2 B = [20, 256, 40, 100] (stride (1, 80000, 20, 800)) A = [20, 256, 50, 100] (stride (100, 2000, 512000, 1)) dim = 2 556.552 -> 557.827 ( +0.23%) [ +0.00% +0.06% +0.12% / +0.23% +0.27% +0.37%] index_select const : Elapsed 27.828 ms (556.552 ms / 20) 557.550 -> 557.627 ( +0.01%) [ +0.00% +0.05% +0.06% / +0.01% +0.10% +0.09%] index_select wrap : Elapsed 27.878 ms (557.550 ms / 20) 557.355 -> 557.208 ( -0.03%) [ +0.13% +0.04% +0.00% / -0.03% +0.12% +0.27%] index_select linear : Elapsed 27.905 ms (558.099 ms / 20) 558.127 -> 558.142 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.05% +0.08% +0.00%] index_select reverse : Elapsed 27.934 ms (558.675 ms / 20) 557.583 -> 557.725 ( +0.03%) [ +0.02% +0.04% +0.00% / +0.03% +0.03% +0.14%] index_select skip64 : Elapsed 27.884 ms (557.683 ms / 20) 554.834 -> 555.515 ( +0.12%) [ +0.08% +0.12% +0.00% / +0.12% +0.57% +0.61%] index_select skip256 : Elapsed 27.763 ms (555.253 ms / 20) 555.587 -> 555.547 ( -0.01%) [ +0.08% +0.00% +0.05% / -0.01% +0.52% +0.60%] index_select spread : Elapsed 27.801 ms (556.014 ms / 20) 557.266 -> 557.751 ( +0.09%) [ +0.00% +0.08% +0.07% / +0.09% +0.19% +0.10%] index_select strided 3 : Elapsed 27.863 ms (557.266 ms / 20) 557.174 -> 557.617 ( +0.08%) [ +0.00% +0.07% +0.08% / +0.20% +0.18% +0.08%] index_select strided 5 : Elapsed 27.859 ms (557.174 ms / 20) 557.079 -> 556.452 ( -0.11%) [ +0.15% +0.17% +0.00% / +0.11% -0.11% -0.07%] index_select strided 7 : Elapsed 27.896 ms (557.925 ms / 20) 557.286 -> 556.351 ( -0.17%) [ +0.00% +0.03% +0.09% / +0.02% -0.12% -0.17%] index_select strided 8 : Elapsed 27.864 ms (557.286 ms / 20) 557.521 -> 557.533 ( +0.00%) [ +0.04% +0.03% +0.00% / +0.00% +0.14% +0.14%] index_select strided 16 : Elapsed 27.886 ms (557.721 ms / 20) 557.449 -> 557.918 ( +0.08%) [ +0.02% +0.00% +0.05% / +0.19% +0.14% +0.08%] index_select random : Elapsed 27.878 ms (557.566 ms / 20) 557.169 -> 557.050 ( -0.02%) [ +0.26% +0.00% +0.14% / -0.02% +0.23% +0.34%] index_select random_sorted : Elapsed 27.932 ms (558.635 ms / 20) 557.221 -> 557.608 ( +0.07%) [ +0.05% +0.00% +0.08% / +0.07% +0.19% +0.17%] index_select perm : Elapsed 27.875 ms (557.498 ms / 20) 554.862 -> 556.360 ( +0.27%) [ +0.14% +0.00% +0.07% / +0.27% +0.69% +0.58%] index_select perm_sorted : Elapsed 27.782 ms (555.644 ms / 20) out_shape = [20, 256, 50, 40] in_shape = [20, 256, 50, 100] idx_dim = 3 out_shape = [40, 256, 100, 50] in_shape = [20, 256, 100, 50] idx_dim = 0 out_shape = [20, 40, 100, 50] in_shape = [20, 256, 100, 50] idx_dim = 1 B = [20, 40, 100, 50] (stride (200000, 50, 2000, 1)) dim = 1 fill_cnt = 256 51.116 -> 51.002 ( -0.22%) [ +0.51% +0.16% +0.00% / -0.22% +1.29% +1.15%] index_fill_ const : Elapsed 2.569 ms (51.376 ms / 20) 53.029 -> 52.786 ( -0.46%) [ +0.40% +0.03% +0.00% / -0.46% -0.15% -0.11%] index_fill_ linear : Elapsed 2.662 ms (53.239 ms / 20) 53.356 -> 53.199 ( -0.29%) [ +0.48% +0.14% +0.00% / -0.29% +0.22% +0.11%] index_fill_ reverse : Elapsed 2.681 ms (53.613 ms / 20) 51.165 -> 51.064 ( -0.20%) [ +0.66% +0.26% +0.00% / -0.20% +1.11% +1.16%] index_fill_ skip64 : Elapsed 2.575 ms (51.502 ms / 20) 51.202 -> 51.172 ( -0.06%) [ +0.68% +0.34% +0.00% / -0.06% +1.13% +1.16%] index_fill_ skip256 : Elapsed 2.577 ms (51.549 ms / 20) 53.995 -> 53.768 ( -0.42%) [ +0.43% +0.11% +0.00% / -0.42% +0.56% +0.44%] index_fill_ spread : Elapsed 2.711 ms (54.226 ms / 20) 65.471 -> 65.337 ( -0.20%) [ +0.44% +0.14% +0.00% / -0.20% +0.13% +0.09%] index_fill_ strided 3 : Elapsed 3.288 ms (65.759 ms / 20) 63.787 -> 63.685 ( -0.16%) [ +0.58% +0.12% +0.00% / -0.16% +0.24% +0.22%] index_fill_ strided 5 : Elapsed 3.208 ms (64.155 ms / 20) 65.717 -> 65.522 ( -0.30%) [ +0.21% +0.00% +0.19% / -0.30% -0.07% -0.10%] index_fill_ strided 7 : Elapsed 3.293 ms (65.858 ms / 20) 61.789 -> 61.626 ( -0.26%) [ +0.45% +0.24% +0.00% / -0.26% -0.03% +0.00%] index_fill_ strided 8 : Elapsed 3.103 ms (62.065 ms / 20) 61.527 -> 61.320 ( -0.34%) [ +0.28% +0.13% +0.00% / -0.34% +0.31% +0.44%] index_fill_ strided 16 : Elapsed 3.085 ms (61.700 ms / 20) 64.792 -> 64.662 ( -0.20%) [ +0.33% +0.00% +0.04% / -0.20% +0.10% -0.06%] index_fill_ random : Elapsed 3.250 ms (65.008 ms / 20) 53.875 -> 53.827 ( -0.09%) [ +0.56% +0.01% +0.00% / -0.09% +0.52% +0.39%] index_fill_ random_sorted : Elapsed 2.709 ms (54.179 ms / 20) out_shape = [20, 256, 40, 50] in_shape = [20, 256, 100, 50] idx_dim = 2 out_shape = [20, 256, 100, 40] in_shape = [20, 256, 100, 50] idx_dim = 3 out_shape = [40, 20, 100, 256] in_shape = [50, 20, 100, 256] idx_dim = 0 out_shape = [50, 40, 100, 256] in_shape = [50, 20, 100, 256] idx_dim = 1 out_shape = [50, 20, 40, 256] in_shape = [50, 20, 100, 256] idx_dim = 2 B = [50, 20, 40, 256] (stride (800, 1, 20, 40000)) A = [50, 20, 100, 256] (stride (20, 1, 1000, 100000)) dim = 2 good 348.444 -> 328.389 ( -5.76%) [ +0.24% +0.00% +0.63% / +0.47% -5.76% -5.55%] index_select const : Elapsed 17.465 ms (349.297 ms / 20) 342.875 -> 343.296 ( +0.12%) [ +0.18% +0.00% +0.09% / +0.12% +0.85% +0.71%] index_select wrap : Elapsed 17.175 ms (343.505 ms / 20) 342.778 -> 342.454 ( -0.09%) [ +0.00% +0.07% +0.02% / -0.09% +0.86% +0.40%] index_select linear : Elapsed 17.139 ms (342.778 ms / 20) 343.330 -> 341.805 ( -0.44%) [ +0.37% +0.65% +0.00% / +0.16% -0.44% +0.42%] index_select reverse : Elapsed 17.230 ms (344.594 ms / 20) good 347.495 -> 327.749 ( -5.68%) [ +0.00% +0.68% +0.31% / +0.51% -5.46% -5.68%] index_select skip64 : Elapsed 17.375 ms (347.495 ms / 20) good 348.364 -> 327.956 ( -5.86%) [ +0.11% +0.00% +0.53% / +0.39% -5.69% -5.86%] index_select skip256 : Elapsed 17.438 ms (348.759 ms / 20) 348.462 -> 345.753 ( -0.78%) [ +0.49% +0.14% +0.00% / +0.01% -0.78% -0.68%] index_select spread : Elapsed 17.509 ms (350.174 ms / 20) 345.333 -> 344.799 ( -0.15%) [ +0.47% +0.00% +0.30% / +0.29% -0.15% +0.01%] index_select strided 3 : Elapsed 17.348 ms (346.957 ms / 20) 352.567 -> 351.878 ( -0.20%) [ +0.00% +0.78% +0.18% / +0.89% +0.21% -0.20%] index_select strided 5 : Elapsed 17.628 ms (352.567 ms / 20) 350.646 -> 337.969 ( -3.62%) [ +0.00% +0.05% +0.25% / -0.19% -3.46% -3.62%] index_select strided 7 : Elapsed 17.532 ms (350.646 ms / 20) 350.613 -> 340.239 ( -2.96%) [ +0.64% +0.40% +0.00% / +0.31% -2.48% -2.96%] index_select strided 8 : Elapsed 17.643 ms (352.860 ms / 20) 348.419 -> 346.829 ( -0.46%) [ +0.53% +0.33% +0.00% / +0.59% -0.46% -0.31%] index_select strided 16 : Elapsed 17.514 ms (350.276 ms / 20) 350.904 -> 340.218 ( -3.05%) [ +0.79% +0.00% +0.12% / +0.48% -2.56% -3.05%] index_select strided 64 : Elapsed 17.684 ms (353.675 ms / 20) 346.436 -> 343.917 ( -0.73%) [ +0.30% +0.00% +0.17% / +0.54% -0.73% -0.39%] index_select random : Elapsed 17.374 ms (347.478 ms / 20) 346.232 -> 347.578 ( +0.39%) [ +0.23% +0.00% +0.31% / +0.39% +0.48% +0.90%] index_select random_sorted : Elapsed 17.352 ms (347.041 ms / 20) 345.781 -> 340.762 ( -1.45%) [ +0.32% +0.00% +0.05% / +0.47% -1.45% -0.98%] index_select perm : Elapsed 17.344 ms (346.875 ms / 20) 347.744 -> 346.915 ( -0.24%) [ +0.36% +0.19% +0.00% / +0.03% -0.07% -0.24%] index_select perm_sorted : Elapsed 17.449 ms (348.979 ms / 20) out_shape = [50, 20, 100, 40] in_shape = [50, 20, 100, 256] idx_dim = 3 out_shape = [40, 20, 256, 100] in_shape = [50, 20, 256, 100] idx_dim = 0 out_shape = [50, 40, 256, 100] in_shape = [50, 20, 256, 100] idx_dim = 1 B = [50, 40, 256, 100] (stride (1024000, 100, 4000, 1)) A = [50, 20, 256, 100] (stride (256, 1280000, 1, 12800)) dim = 1 645.860 -> 646.167 ( +0.05%) [ +0.14% +0.00% +0.07% / +0.13% +0.16% +0.05%] index_add_ linear : Elapsed 32.339 ms (646.771 ms / 20) 627.895 -> 628.727 ( +0.13%) [ +0.09% +0.00% +0.11% / +0.13% +0.34% +0.40%] index_copy_ linear : Elapsed 31.423 ms (628.455 ms / 20) 643.744 -> 644.550 ( +0.13%) [ +0.14% +0.16% +0.00% / +0.13% +0.43% +0.20%] index_add_ reverse : Elapsed 32.232 ms (644.649 ms / 20) 626.626 -> 626.429 ( -0.03%) [ +0.00% +0.20% +0.14% / -0.03% +0.58% +0.42%] index_copy_ reverse : Elapsed 31.331 ms (626.626 ms / 20) 645.713 -> 645.023 ( -0.11%) [ +0.19% +0.00% +0.19% / -0.11% +0.05% +0.20%] index_add_ spread : Elapsed 32.347 ms (646.949 ms / 20) 628.071 -> 628.008 ( -0.01%) [ +0.23% +0.00% +0.08% / -0.01% +0.24% +0.39%] index_copy_ spread : Elapsed 31.476 ms (629.511 ms / 20) 646.378 -> 645.615 ( -0.12%) [ +0.00% +0.17% +0.08% / -0.12% +0.22% +0.16%] index_add_ strided 3 : Elapsed 32.319 ms (646.378 ms / 20) 628.376 -> 627.755 ( -0.10%) [ +0.00% +0.15% +0.06% / -0.10% +0.40% +0.33%] index_copy_ strided 3 : Elapsed 31.419 ms (628.376 ms / 20) 645.170 -> 645.739 ( +0.09%) [ +0.23% +0.00% +0.18% / +0.09% +0.19% +0.52%] index_add_ strided 7 : Elapsed 32.334 ms (646.686 ms / 20) 628.404 -> 628.266 ( -0.02%) [ +0.14% +0.00% +0.00% / -0.02% +0.23% +0.56%] index_copy_ strided 7 : Elapsed 31.465 ms (629.297 ms / 20) 644.325 -> 646.290 ( +0.30%) [ +0.10% +0.22% +0.00% / +0.30% +0.35% +0.36%] index_add_ perm : Elapsed 32.249 ms (644.982 ms / 20) 627.328 -> 628.515 ( +0.19%) [ +0.00% +0.22% +0.15% / +0.19% +0.44% +0.31%] index_copy_ perm : Elapsed 31.366 ms (627.328 ms / 20) 645.269 -> 646.371 ( +0.17%) [ +0.23% +0.24% +0.00% / +0.30% +0.17% +0.36%] index_add_ perm_sorted : Elapsed 32.336 ms (646.724 ms / 20) 626.680 -> 628.530 ( +0.30%) [ +0.39% +0.24% +0.00% / +0.30% +0.43% +0.74%] index_copy_ perm_sorted : Elapsed 31.455 ms (629.097 ms / 20) 1218.764 -> 1218.088 ( -0.06%) [ +0.14% +0.12% +0.00% / +0.17% -0.06% -0.04%] index_select const : Elapsed 61.024 ms (1220.487 ms / 20) 1322.300 -> 1322.557 ( +0.02%) [ +0.00% +0.02% +0.15% / +0.02% +0.28% +0.32%] index_select wrap : Elapsed 66.115 ms (1322.300 ms / 20) 1253.472 -> 1254.496 ( +0.08%) [ +0.10% +0.00% +0.24% / +0.08% +0.45% +0.33%] index_select linear : Elapsed 62.737 ms (1254.740 ms / 20) 1283.608 -> 1284.577 ( +0.08%) [ +0.00% +0.15% +0.22% / +0.08% +0.20% +0.27%] index_select reverse : Elapsed 64.180 ms (1283.608 ms / 20) 1220.458 -> 1217.880 ( -0.21%) [ +0.03% +0.03% +0.00% / +0.02% -0.09% -0.21%] index_select skip64 : Elapsed 61.042 ms (1220.848 ms / 20) 1219.049 -> 1217.998 ( -0.09%) [ +0.02% +0.05% +0.00% / +0.06% -0.05% -0.09%] index_select skip256 : Elapsed 60.963 ms (1219.262 ms / 20) 1294.617 -> 1293.508 ( -0.09%) [ +0.00% +0.00% +0.01% / +0.07% -0.09% +0.05%] index_select spread : Elapsed 64.732 ms (1294.639 ms / 20) 1323.266 -> 1322.551 ( -0.05%) [ +0.20% +0.07% +0.00% / +0.11% -0.05% -0.05%] index_select strided 3 : Elapsed 66.293 ms (1325.861 ms / 20) 1318.998 -> 1315.536 ( -0.26%) [ +0.00% +0.05% +0.01% / +0.04% -0.18% -0.26%] index_select strided 5 : Elapsed 65.950 ms (1318.998 ms / 20) 1321.785 -> 1322.912 ( +0.09%) [ +0.13% +0.00% +0.14% / +0.09% +0.13% +0.15%] index_select strided 7 : Elapsed 66.172 ms (1323.438 ms / 20) 1320.052 -> 1318.386 ( -0.13%) [ +0.16% +0.00% +0.16% / -0.08% +0.02% -0.13%] index_select strided 8 : Elapsed 66.108 ms (1322.164 ms / 20) 1318.865 -> 1318.046 ( -0.06%) [ +0.04% +0.03% +0.00% / -0.06% +0.27% +0.05%] index_select strided 16 : Elapsed 65.973 ms (1319.455 ms / 20) 1317.134 -> 1317.259 ( +0.01%) [ +0.11% +0.03% +0.00% / +0.10% +0.01% +0.27%] index_select random : Elapsed 65.931 ms (1318.616 ms / 20) 1276.148 -> 1276.400 ( +0.02%) [ +0.00% +0.10% +0.04% / +0.02% +0.20% +0.06%] index_select random_sorted : Elapsed 63.807 ms (1276.148 ms / 20) out_shape = [50, 20, 40, 100] in_shape = [50, 20, 256, 100] idx_dim = 2 out_shape = [50, 20, 256, 40] in_shape = [50, 20, 256, 100] idx_dim = 3 out_shape = [40, 100, 20, 256] in_shape = [50, 100, 20, 256] idx_dim = 0 out_shape = [50, 40, 20, 256] in_shape = [50, 100, 20, 256] idx_dim = 1 out_shape = [50, 100, 40, 256] in_shape = [50, 100, 20, 256] idx_dim = 2 out_shape = [50, 100, 20, 40] in_shape = [50, 100, 20, 256] idx_dim = 3 out_shape = [40, 100, 256, 20] in_shape = [50, 100, 256, 20] idx_dim = 0 out_shape = [50, 40, 256, 20] in_shape = [50, 100, 256, 20] idx_dim = 1 out_shape = [50, 100, 40, 20] in_shape = [50, 100, 256, 20] idx_dim = 2 out_shape = [50, 100, 256, 40] in_shape = [50, 100, 256, 20] idx_dim = 3 out_shape = [40, 256, 20, 100] in_shape = [50, 256, 20, 100] idx_dim = 0 out_shape = [50, 40, 20, 100] in_shape = [50, 256, 20, 100] idx_dim = 1 out_shape = [50, 256, 40, 100] in_shape = [50, 256, 20, 100] idx_dim = 2 out_shape = [50, 256, 20, 40] in_shape = [50, 256, 20, 100] idx_dim = 3 out_shape = [40, 256, 100, 20] in_shape = [50, 256, 100, 20] idx_dim = 0 out_shape = [50, 40, 100, 20] in_shape = [50, 256, 100, 20] idx_dim = 1 out_shape = [50, 256, 40, 20] in_shape = [50, 256, 100, 20] idx_dim = 2 out_shape = [50, 256, 100, 40] in_shape = [50, 256, 100, 20] idx_dim = 3 out_shape = [40, 20, 50, 256] in_shape = [100, 20, 50, 256] idx_dim = 0 B = [40, 20, 50, 256] (stride (50, 512000, 1, 2000)) A = [100, 20, 50, 256] (stride (1, 1280000, 100, 5000)) dim = 0 377.657 -> 377.750 ( +0.02%) [ +0.18% +0.02% +0.00% / +0.13% +0.02% +0.08%] index_select const : Elapsed 18.917 ms (378.333 ms / 20) 377.410 -> 377.776 ( +0.10%) [ +0.22% +0.14% +0.00% / +0.24% +0.22% +0.10%] index_select wrap : Elapsed 18.912 ms (378.235 ms / 20) 376.601 -> 376.624 ( +0.01%) [ +0.00% +0.21% +0.32% / +0.01% +0.42% +0.28%] index_select linear : Elapsed 18.830 ms (376.601 ms / 20) 376.480 -> 377.280 ( +0.21%) [ +0.03% +0.00% +0.13% / +0.21% +0.23% +0.28%] index_select reverse : Elapsed 18.829 ms (376.579 ms / 20) 377.876 -> 376.868 ( -0.27%) [ +0.04% +0.00% +0.21% / -0.27% +0.04% +0.05%] index_select skip64 : Elapsed 18.902 ms (378.031 ms / 20) 376.976 -> 377.528 ( +0.15%) [ +0.00% +0.30% +0.11% / +0.17% +0.15% +0.32%] index_select skip256 : Elapsed 18.849 ms (376.976 ms / 20) 376.094 -> 376.950 ( +0.23%) [ +0.29% +0.34% +0.00% / +0.23% +0.50% +0.48%] index_select spread : Elapsed 18.859 ms (377.177 ms / 20) 375.856 -> 377.700 ( +0.49%) [ +0.16% +0.47% +0.00% / +0.50% +0.53% +0.49%] index_select strided 3 : Elapsed 18.823 ms (376.470 ms / 20) 377.798 -> 377.050 ( -0.20%) [ +0.04% +0.10% +0.00% / -0.00% +0.10% -0.20%] index_select strided 5 : Elapsed 18.897 ms (377.932 ms / 20) 376.952 -> 376.954 ( +0.00%) [ +0.16% +0.12% +0.00% / +0.31% +0.15% +0.00%] index_select strided 7 : Elapsed 18.877 ms (377.540 ms / 20) 376.868 -> 377.377 ( +0.14%) [ +0.14% +0.00% +0.13% / +0.19% +0.31% +0.14%] index_select strided 8 : Elapsed 18.869 ms (377.378 ms / 20) 377.230 -> 377.504 ( +0.07%) [ +0.13% +0.00% +0.24% / +0.07% +0.09% +0.15%] index_select strided 16 : Elapsed 18.887 ms (377.733 ms / 20) 377.347 -> 377.374 ( +0.01%) [ +0.00% +0.09% +0.04% / +0.04% +0.01% +0.10%] index_select strided 64 : Elapsed 18.867 ms (377.347 ms / 20) 376.438 -> 378.052 ( +0.43%) [ +0.00% +0.49% +0.28% / +0.43% +0.43% +0.47%] index_select random : Elapsed 18.822 ms (376.438 ms / 20) 377.295 -> 377.302 ( +0.00%) [ +0.05% +0.00% +0.01% / +0.00% +0.16% +0.02%] index_select random_sorted : Elapsed 18.874 ms (377.476 ms / 20) 376.528 -> 377.170 ( +0.17%) [ +0.00% +0.10% +0.21% / +0.17% +0.44% +0.26%] index_select perm : Elapsed 18.826 ms (376.528 ms / 20) 376.512 -> 377.146 ( +0.17%) [ +0.01% +0.20% +0.00% / +0.27% +0.17% +0.21%] index_select perm_sorted : Elapsed 18.827 ms (376.531 ms / 20) out_shape = [100, 40, 50, 256] in_shape = [100, 20, 50, 256] idx_dim = 1 out_shape = [100, 20, 40, 256] in_shape = [100, 20, 50, 256] idx_dim = 2 out_shape = [100, 20, 50, 40] in_shape = [100, 20, 50, 256] idx_dim = 3 out_shape = [40, 20, 256, 50] in_shape = [100, 20, 256, 50] idx_dim = 0 out_shape = [100, 40, 256, 50] in_shape = [100, 20, 256, 50] idx_dim = 1 out_shape = [100, 20, 40, 50] in_shape = [100, 20, 256, 50] idx_dim = 2 out_shape = [100, 20, 256, 40] in_shape = [100, 20, 256, 50] idx_dim = 3 out_shape = [40, 50, 20, 256] in_shape = [100, 50, 20, 256] idx_dim = 0 out_shape = [100, 40, 20, 256] in_shape = [100, 50, 20, 256] idx_dim = 1 out_shape = [100, 50, 40, 256] in_shape = [100, 50, 20, 256] idx_dim = 2 out_shape = [100, 50, 20, 40] in_shape = [100, 50, 20, 256] idx_dim = 3 out_shape = [40, 50, 256, 20] in_shape = [100, 50, 256, 20] idx_dim = 0 B = [40, 50, 256, 20] (stride (256, 204800, 1, 10240)) A = [100, 50, 256, 20] (stride (1, 512000, 2000, 100)) dim = 0 238.261 -> 234.620 ( -1.53%) [ +0.15% +0.51% +0.00% / -0.09% -1.53% -1.09%] index_select const : Elapsed 11.931 ms (238.626 ms / 20) 238.199 -> 235.904 ( -0.96%) [ +0.19% +0.11% +0.00% / +0.18% -0.96% -0.70%] index_select wrap : Elapsed 11.933 ms (238.657 ms / 20) 238.492 -> 236.439 ( -0.86%) [ +0.00% +0.58% +0.13% / -0.08% -0.86% -0.77%] index_select linear : Elapsed 11.925 ms (238.492 ms / 20) 238.907 -> 236.820 ( -0.87%) [ +0.11% +0.11% +0.00% / -0.57% -0.87% -0.69%] index_select reverse : Elapsed 11.959 ms (239.173 ms / 20) 236.902 -> 235.262 ( -0.69%) [ +0.46% +0.00% +1.19% / +0.62% -0.69% +0.01%] index_select skip64 : Elapsed 11.900 ms (237.998 ms / 20) 237.676 -> 234.954 ( -1.15%) [ +0.29% +0.54% +0.00% / +0.79% -0.77% -1.15%] index_select skip256 : Elapsed 11.919 ms (238.371 ms / 20) 237.371 -> 236.516 ( -0.36%) [ +1.03% +0.80% +0.00% / +1.29% -0.36% -0.08%] index_select spread : Elapsed 11.991 ms (239.821 ms / 20) 237.123 -> 235.492 ( -0.69%) [ +1.05% +0.00% +0.62% / +0.92% -0.69% -0.53%] index_select strided 3 : Elapsed 11.980 ms (239.604 ms / 20) 238.395 -> 236.442 ( -0.82%) [ +0.54% +0.00% +0.27% / +0.39% -0.82% -0.74%] index_select strided 5 : Elapsed 11.984 ms (239.688 ms / 20) 237.858 -> 236.607 ( -0.53%) [ +0.63% +0.54% +0.00% / +0.28% -0.38% -0.53%] index_select strided 7 : Elapsed 11.968 ms (239.356 ms / 20) 237.825 -> 236.485 ( -0.56%) [ +0.20% +0.00% +0.88% / +0.46% -0.38% -0.56%] index_select strided 8 : Elapsed 11.915 ms (238.293 ms / 20) 238.132 -> 236.079 ( -0.86%) [ +0.00% +0.23% +0.58% / +0.31% -0.86% -0.49%] index_select strided 16 : Elapsed 11.907 ms (238.132 ms / 20) 237.983 -> 235.431 ( -1.07%) [ +0.00% +0.35% +0.11% / +0.02% -1.07% -0.99%] index_select strided 64 : Elapsed 11.899 ms (237.983 ms / 20) 237.671 -> 235.676 ( -0.84%) [ +0.37% +0.00% +0.88% / +0.49% -0.84% -0.22%] index_select random : Elapsed 11.927 ms (238.549 ms / 20) 238.834 -> 236.046 ( -1.17%) [ +0.04% +0.00% +0.34% / +0.15% -0.87% -1.17%] index_select random_sorted : Elapsed 11.946 ms (238.926 ms / 20) 237.948 -> 234.520 ( -1.44%) [ +0.69% +0.55% +0.00% / +0.08% -1.44% -1.20%] index_select perm : Elapsed 11.980 ms (239.595 ms / 20) 238.196 -> 235.670 ( -1.06%) [ +0.09% +0.56% +0.00% / +0.27% -1.06% -0.90%] index_select perm_sorted : Elapsed 11.921 ms (238.411 ms / 20) out_shape = [100, 40, 256, 20] in_shape = [100, 50, 256, 20] idx_dim = 1 B = [100, 40, 256, 20] (stride (800, 20, 80000, 1)) A = [100, 50, 256, 20] (stride (5120, 512000, 1, 256)) dim = 1 368.720 -> 368.994 ( +0.07%) [ +0.05% +0.27% +0.00% / +0.07% +0.27% +0.27%] index_select const : Elapsed 18.445 ms (368.905 ms / 20) 401.432 -> 401.655 ( +0.06%) [ +0.18% +0.20% +0.00% / +0.06% +0.28% +0.26%] index_select wrap : Elapsed 20.109 ms (402.173 ms / 20) 401.307 -> 402.011 ( +0.18%) [ +0.11% +0.12% +0.00% / +0.18% +0.25% +0.31%] index_select linear : Elapsed 20.088 ms (401.750 ms / 20) 402.438 -> 403.220 ( +0.19%) [ +0.09% +0.00% +0.11% / +0.19% +0.20% +0.38%] index_select reverse : Elapsed 20.140 ms (402.794 ms / 20) 369.127 -> 369.529 ( +0.11%) [ +0.26% +0.00% +0.20% / +0.11% +0.32% +0.13%] index_select skip64 : Elapsed 18.505 ms (370.091 ms / 20) 369.415 -> 369.752 ( +0.09%) [ +0.00% +0.12% +0.02% / +0.21% +0.09% +0.12%] index_select skip256 : Elapsed 18.471 ms (369.415 ms / 20) 402.153 -> 401.512 ( -0.16%) [ +0.06% +0.00% +0.17% / -0.16% +0.01% +0.11%] index_select spread : Elapsed 20.119 ms (402.386 ms / 20) 402.870 -> 403.224 ( +0.09%) [ +0.12% +0.00% +0.22% / +0.09% +0.17% +0.26%] index_select strided 3 : Elapsed 20.167 ms (403.338 ms / 20) 403.372 -> 402.323 ( -0.26%) [ +0.00% +0.04% +0.00% / -0.17% -0.18% -0.26%] index_select strided 5 : Elapsed 20.169 ms (403.386 ms / 20) 403.465 -> 403.445 ( -0.00%) [ +0.00% +0.02% +0.12% / -0.00% +0.06% +0.10%] index_select strided 7 : Elapsed 20.173 ms (403.465 ms / 20) 401.679 -> 401.972 ( +0.07%) [ +0.28% +0.18% +0.00% / +0.07% +0.51% +0.61%] index_select strided 8 : Elapsed 20.141 ms (402.812 ms / 20) 402.364 -> 401.770 ( -0.15%) [ +0.10% +0.00% +0.06% / -0.15% +0.38% +0.50%] index_select strided 16 : Elapsed 20.138 ms (402.760 ms / 20) 399.899 -> 400.089 ( +0.05%) [ +0.00% +0.10% +0.03% / +0.05% +0.22% +0.27%] index_select random : Elapsed 19.995 ms (399.899 ms / 20) 395.492 -> 395.922 ( +0.11%) [ +0.12% +0.11% +0.00% / +0.11% +0.32% +0.21%] index_select random_sorted : Elapsed 19.798 ms (395.951 ms / 20) 403.322 -> 402.617 ( -0.17%) [ +0.00% +0.00% +0.11% / +0.06% -0.17% -0.13%] index_select perm : Elapsed 20.166 ms (403.323 ms / 20) 402.102 -> 401.592 ( -0.13%) [ +0.07% +0.00% +0.10% / -0.13% +0.19% +0.15%] index_select perm_sorted : Elapsed 20.119 ms (402.379 ms / 20) out_shape = [100, 50, 40, 20] in_shape = [100, 50, 256, 20] idx_dim = 2 out_shape = [100, 50, 256, 40] in_shape = [100, 50, 256, 20] idx_dim = 3 B = [100, 50, 256, 40] (stride (10240, 1024000, 1, 256)) A = [100, 50, 256, 20] (stride (1000, 1, 100000, 50)) dim = 3 654.960 -> 652.037 ( -0.45%) [ +0.24% +0.00% +0.06% / -0.29% -0.45% -0.21%] index_add_ linear : Elapsed 32.827 ms (656.539 ms / 20) 642.829 -> 640.882 ( -0.30%) [ +0.19% +0.00% +0.11% / -0.17% -0.30% -0.17%] index_copy_ linear : Elapsed 32.202 ms (644.036 ms / 20) 652.667 -> 650.930 ( -0.27%) [ +0.35% +0.00% +0.06% / +0.31% -0.27% -0.05%] index_add_ reverse : Elapsed 32.747 ms (654.943 ms / 20) 640.756 -> 640.284 ( -0.07%) [ +0.37% +0.07% +0.00% / +0.35% -0.07% +0.03%] index_copy_ reverse : Elapsed 32.155 ms (643.098 ms / 20) 653.548 -> 652.553 ( -0.15%) [ +0.00% +0.14% +0.24% / +0.16% -0.03% -0.15%] index_add_ spread : Elapsed 32.677 ms (653.548 ms / 20) 642.189 -> 640.209 ( -0.31%) [ +0.00% +0.14% +0.08% / -0.07% -0.12% -0.31%] index_copy_ spread : Elapsed 32.109 ms (642.189 ms / 20) 654.210 -> 651.641 ( -0.39%) [ +0.18% +0.22% +0.00% / +0.22% -0.39% -0.15%] index_add_ strided 3 : Elapsed 32.768 ms (655.355 ms / 20) 642.487 -> 639.897 ( -0.40%) [ +0.00% +0.22% +0.12% / +0.27% -0.40% -0.19%] index_copy_ strided 3 : Elapsed 32.124 ms (642.487 ms / 20) 653.483 -> 652.014 ( -0.22%) [ +0.00% +0.10% +0.22% / +0.10% -0.22% -0.14%] index_add_ strided 7 : Elapsed 32.674 ms (653.483 ms / 20) 641.576 -> 640.766 ( -0.13%) [ +0.00% +0.06% +0.29% / +0.06% -0.13% -0.06%] index_copy_ strided 7 : Elapsed 32.079 ms (641.576 ms / 20) 653.538 -> 653.451 ( -0.01%) [ +0.06% +0.00% +0.01% / +0.13% +0.01% -0.01%] index_add_ perm : Elapsed 32.697 ms (653.941 ms / 20) 642.319 -> 641.962 ( -0.06%) [ +0.04% +0.02% +0.00% / +0.15% -0.06% -0.03%] index_copy_ perm : Elapsed 32.128 ms (642.560 ms / 20) 654.727 -> 653.115 ( -0.25%) [ +0.08% +0.01% +0.00% / +0.08% -0.25% -0.19%] index_add_ perm_sorted : Elapsed 32.761 ms (655.227 ms / 20) 642.172 -> 640.894 ( -0.20%) [ +0.13% +0.00% +0.09% / +0.09% -0.20% -0.11%] index_copy_ perm_sorted : Elapsed 32.150 ms (643.008 ms / 20) 1291.418 -> 1281.231 ( -0.79%) [ +0.06% +0.00% +0.08% / +0.11% -0.79% -0.73%] index_select const : Elapsed 64.609 ms (1292.187 ms / 20) 1317.155 -> 1316.650 ( -0.04%) [ +0.00% +0.09% +0.13% / +0.14% -0.04% -0.01%] index_select wrap : Elapsed 65.858 ms (1317.155 ms / 20) 1300.323 -> 1299.872 ( -0.03%) [ +0.00% +0.00% +0.02% / +0.06% -0.03% +0.05%] index_select linear : Elapsed 65.016 ms (1300.323 ms / 20) 1308.708 -> 1309.578 ( +0.07%) [ +0.00% +0.22% +0.04% / +0.07% +0.07% +0.17%] index_select reverse : Elapsed 65.435 ms (1308.708 ms / 20) 1290.699 -> 1277.563 ( -1.02%) [ +0.15% +0.00% +0.18% / +0.14% -1.02% -0.60%] index_select skip64 : Elapsed 64.632 ms (1292.641 ms / 20) 1292.046 -> 1280.217 ( -0.92%) [ +0.05% +0.14% +0.00% / +0.03% -0.83% -0.92%] index_select skip256 : Elapsed 64.634 ms (1292.678 ms / 20) 1310.892 -> 1309.021 ( -0.14%) [ +0.01% +0.09% +0.00% / +0.06% -0.13% -0.14%] index_select spread : Elapsed 65.552 ms (1311.049 ms / 20) 1324.358 -> 1324.943 ( +0.04%) [ +0.00% +0.16% +0.06% / +0.04% +0.11% +0.07%] index_select strided 3 : Elapsed 66.218 ms (1324.358 ms / 20) 1322.845 -> 1325.716 ( +0.22%) [ +0.22% +0.00% +0.11% / +0.22% +0.22% +0.24%] index_select strided 5 : Elapsed 66.288 ms (1325.757 ms / 20) 1327.400 -> 1327.479 ( +0.01%) [ +0.00% +0.05% +0.13% / +0.02% +0.16% +0.01%] index_select strided 7 : Elapsed 66.370 ms (1327.400 ms / 20) 1329.075 -> 1327.852 ( -0.09%) [ +0.04% +0.02% +0.00% / -0.05% -0.07% -0.09%] index_select strided 8 : Elapsed 66.480 ms (1329.592 ms / 20) 1327.283 -> 1326.217 ( -0.08%) [ +0.02% +0.00% +0.03% / -0.08% +0.14% +0.04%] index_select strided 16 : Elapsed 66.380 ms (1327.593 ms / 20) 1319.699 -> 1319.169 ( -0.04%) [ +0.09% +0.00% +0.07% / +0.11% -0.04% +0.05%] index_select random : Elapsed 66.044 ms (1320.886 ms / 20) 1304.064 -> 1303.424 ( -0.05%) [ +0.10% +0.00% +0.06% / -0.03% +0.03% -0.05%] index_select random_sorted : Elapsed 65.268 ms (1305.353 ms / 20) out_shape = [40, 256, 20, 50] in_shape = [100, 256, 20, 50] idx_dim = 0 out_shape = [100, 40, 20, 50] in_shape = [100, 256, 20, 50] idx_dim = 1 out_shape = [100, 256, 40, 50] in_shape = [100, 256, 20, 50] idx_dim = 2 out_shape = [100, 256, 20, 40] in_shape = [100, 256, 20, 50] idx_dim = 3 B = [100, 256, 20, 40] (stride (1, 80000, 4000, 100)) A = [100, 256, 20, 50] (stride (256000, 20, 1, 5120)) dim = 3 194.490 -> 187.506 ( -3.59%) [ +0.42% +1.47% +0.00% / +1.10% -3.41% -3.59%] index_select const : Elapsed 9.765 ms (195.308 ms / 20) 194.981 -> 193.074 ( -0.98%) [ +0.00% +0.29% +1.23% / -0.98% +0.32% +1.90%] index_select wrap : Elapsed 9.749 ms (194.981 ms / 20) 194.032 -> 196.397 ( +1.22%) [ +3.12% +2.75% +0.00% / +2.49% +1.41% +1.22%] index_select linear : Elapsed 10.005 ms (200.092 ms / 20) 188.348 -> 192.223 ( +2.06%) [ +0.00% +3.19% +3.26% / +2.06% +3.23% +2.13%] index_select reverse : Elapsed 9.417 ms (188.348 ms / 20) 193.373 -> 189.015 ( -2.25%) [ +0.00% +2.52% +1.68% / +1.91% +1.56% -2.25%] index_select skip64 : Elapsed 9.669 ms (193.373 ms / 20) 191.262 -> 188.939 ( -1.21%) [ +2.12% +0.00% +1.93% / +1.95% -1.21% +2.93%] index_select skip256 : Elapsed 9.766 ms (195.318 ms / 20) 189.131 -> 191.929 ( +1.48%) [ +1.48% +0.00% +0.30% / +1.48% +3.00% +3.13%] index_select spread : Elapsed 9.597 ms (191.937 ms / 20) 196.873 -> 197.774 ( +0.46%) [ +0.00% +1.66% +0.66% / +1.41% +0.46% +2.59%] index_select strided 3 : Elapsed 9.844 ms (196.873 ms / 20) 193.412 -> 193.863 ( +0.23%) [ +1.06% +0.00% +1.27% / +0.23% +2.14% +0.36%] index_select strided 5 : Elapsed 9.773 ms (195.461 ms / 20) 196.043 -> 192.583 ( -1.76%) [ +1.63% +1.09% +0.00% / +0.34% -1.76% +0.93%] index_select strided 7 : Elapsed 9.962 ms (199.242 ms / 20) 196.336 -> 189.447 ( -3.51%) [ +1.00% +0.18% +0.00% / +0.55% -3.51% -0.38%] index_select strided 8 : Elapsed 9.915 ms (198.306 ms / 20) 196.801 -> 194.484 ( -1.18%) [ +0.13% +0.78% +0.00% / -1.18% +0.21% +1.17%] index_select strided 16 : Elapsed 9.853 ms (197.062 ms / 20) 197.220 -> 199.200 ( +1.00%) [ +0.00% +0.03% +0.01% / +1.00% +2.04% +3.03%] index_select random : Elapsed 9.861 ms (197.220 ms / 20) 193.967 -> 195.955 ( +1.02%) [ +0.00% +1.79% +1.47% / +1.02% +2.54% +1.34%] index_select random_sorted : Elapsed 9.698 ms (193.967 ms / 20) 193.050 -> 194.328 ( +0.66%) [ +1.74% +0.40% +0.00% / +0.66% +2.97% +1.29%] index_select perm : Elapsed 9.821 ms (196.418 ms / 20) 186.965 -> 193.906 ( +3.71%) [ +0.00% +4.44% +3.01% / +3.71% +5.17% +4.07%] index_select perm_sorted : Elapsed 9.348 ms (186.965 ms / 20) out_shape = [40, 256, 50, 20] in_shape = [100, 256, 50, 20] idx_dim = 0 out_shape = [100, 40, 50, 20] in_shape = [100, 256, 50, 20] idx_dim = 1 B = [100, 40, 50, 20] (stride (1, 100, 80000, 4000)) A = [100, 256, 50, 20] (stride (1, 100000, 100, 5000)) dim = 1 68.507 -> 68.404 ( -0.15%) [ +0.00% +0.37% +0.37% / -0.15% +2.43% +2.70%] index_select const : Elapsed 3.425 ms (68.507 ms / 20) 84.617 -> 84.984 ( +0.43%) [ +0.00% +0.21% +0.19% / +0.43% +2.16% +1.37%] index_select wrap : Elapsed 4.231 ms (84.617 ms / 20) 84.694 -> 84.536 ( -0.19%) [ +0.00% +0.01% +0.00% / -0.19% +1.05% +1.56%] index_select linear : Elapsed 4.235 ms (84.694 ms / 20) 84.967 -> 84.895 ( -0.08%) [ +0.23% +0.17% +0.00% / -0.08% +0.37% +0.41%] index_select reverse : Elapsed 4.258 ms (85.160 ms / 20) 68.444 -> 68.434 ( -0.01%) [ +0.11% +0.00% +0.05% / -0.01% +2.39% +2.32%] index_select skip64 : Elapsed 3.426 ms (68.519 ms / 20) 68.318 -> 68.563 ( +0.36%) [ +0.36% +0.31% +0.00% / +0.36% +2.74% +2.84%] index_select skip256 : Elapsed 3.428 ms (68.564 ms / 20) 85.712 -> 84.069 ( -1.92%) [ +0.63% +0.65% +0.00% / +0.53% -1.92% -1.45%] index_select spread : Elapsed 4.312 ms (86.250 ms / 20) 84.378 -> 84.671 ( +0.35%) [ +0.00% +0.82% +0.42% / +0.35% +1.37% +0.77%] index_select strided 3 : Elapsed 4.219 ms (84.378 ms / 20) 84.486 -> 84.698 ( +0.25%) [ +0.15% +0.70% +0.00% / +0.25% +0.73% +0.95%] index_select strided 5 : Elapsed 4.230 ms (84.610 ms / 20) 86.391 -> 85.876 ( -0.60%) [ +0.00% +0.11% +0.24% / +0.56% -0.57% -0.60%] index_select strided 7 : Elapsed 4.320 ms (86.391 ms / 20) 84.112 -> 84.121 ( +0.01%) [ +0.18% +0.01% +0.00% / +0.01% +2.63% +2.38%] index_select strided 8 : Elapsed 4.213 ms (84.266 ms / 20) 85.608 -> 85.323 ( -0.33%) [ +0.00% +0.50% +0.29% / +0.13% -0.33% -0.05%] index_select strided 16 : Elapsed 4.280 ms (85.608 ms / 20) 81.270 -> 81.352 ( +0.10%) [ +0.00% +0.56% +0.16% / +0.10% +2.18% +2.02%] index_select strided 64 : Elapsed 4.063 ms (81.270 ms / 20) 84.451 -> 84.453 ( +0.00%) [ +0.10% +0.00% +0.34% / +0.00% +1.85% +1.60%] index_select strided 100 : Elapsed 4.227 ms (84.536 ms / 20) 84.718 -> 84.964 ( +0.29%) [ +0.29% +0.50% +0.00% / +0.29% +0.78% +1.02%] index_select strided 255 : Elapsed 4.248 ms (84.963 ms / 20) 83.668 -> 83.833 ( +0.20%) [ +0.00% +0.66% +0.60% / +0.20% +2.64% +2.62%] index_select random : Elapsed 4.183 ms (83.668 ms / 20) 83.307 -> 83.433 ( +0.15%) [ +0.00% +0.27% +0.20% / +0.15% +2.48% +2.34%] index_select random_sorted : Elapsed 4.165 ms (83.307 ms / 20) 83.151 -> 83.831 ( +0.82%) [ +0.91% +0.90% +0.00% / +0.82% +4.70% +5.07%] index_select perm : Elapsed 4.195 ms (83.905 ms / 20) 84.357 -> 84.286 ( -0.08%) [ +0.00% +0.35% +0.43% / -0.08% +1.74% +2.18%] index_select perm_sorted : Elapsed 4.218 ms (84.357 ms / 20) out_shape = [100, 256, 40, 20] in_shape = [100, 256, 50, 20] idx_dim = 2 out_shape = [100, 256, 50, 40] in_shape = [100, 256, 50, 20] idx_dim = 3 B = [100, 256, 50, 40] (stride (1, 4000, 1024000, 100)) A = [100, 256, 50, 20] (stride (1000, 100000, 1, 50)) dim = 3 894.132 -> 895.436 ( +0.15%) [ +0.00% +0.60% +0.29% / +0.15% +0.45% +0.65%] index_add_ linear : Elapsed 44.707 ms (894.132 ms / 20) 645.579 -> 645.928 ( +0.05%) [ +0.14% +0.11% +0.00% / +0.05% +0.60% +0.52%] index_copy_ linear : Elapsed 32.325 ms (646.498 ms / 20) 917.146 -> 918.962 ( +0.20%) [ +0.00% +0.20% +0.37% / +0.20% +0.83% +0.53%] index_add_ reverse : Elapsed 45.857 ms (917.146 ms / 20) 660.363 -> 660.711 ( +0.05%) [ +0.30% +0.20% +0.00% / +0.05% +0.60% +0.28%] index_copy_ reverse : Elapsed 33.118 ms (662.368 ms / 20) 911.978 -> 910.943 ( -0.11%) [ +0.38% +0.00% +0.13% / +0.08% -0.11% +0.10%] index_add_ spread : Elapsed 45.773 ms (915.454 ms / 20) 657.366 -> 656.598 ( -0.12%) [ +0.26% +0.00% +0.10% / -0.12% +0.34% +0.40%] index_copy_ spread : Elapsed 32.954 ms (659.075 ms / 20) 916.803 -> 914.015 ( -0.30%) [ +0.05% +0.00% +0.21% / -0.30% +0.38% +0.55%] index_add_ strided 3 : Elapsed 45.863 ms (917.257 ms / 20) 663.731 -> 663.351 ( -0.06%) [ +0.00% +0.02% +0.14% / -0.06% +0.08% +0.12%] index_copy_ strided 3 : Elapsed 33.187 ms (663.731 ms / 20) 910.951 -> 910.359 ( -0.06%) [ +0.00% +0.30% +0.46% / -0.06% +2.36% +2.35%] index_add_ strided 7 : Elapsed 45.548 ms (910.951 ms / 20) 658.249 -> 658.075 ( -0.03%) [ +0.00% +0.09% +0.03% / -0.03% +0.88% +1.13%] index_copy_ strided 7 : Elapsed 32.912 ms (658.249 ms / 20) 926.358 -> 904.917 ( -2.31%) [ +0.00% +0.06% +0.04% / -0.24% -2.31% -2.10%] index_add_ perm : Elapsed 46.318 ms (926.358 ms / 20) 660.857 -> 659.411 ( -0.22%) [ +0.00% +0.14% +0.10% / +0.21% -0.22% -0.19%] index_copy_ perm : Elapsed 33.043 ms (660.857 ms / 20) 924.369 -> 901.820 ( -2.44%) [ +0.02% +0.00% +0.00% / -0.02% -2.19% -2.44%] index_add_ perm_sorted : Elapsed 46.228 ms (924.557 ms / 20) 655.082 -> 649.692 ( -0.82%) [ +0.03% +0.06% +0.00% / -0.02% -0.79% -0.82%] index_copy_ perm_sorted : Elapsed 32.766 ms (655.310 ms / 20) 1369.823 -> 1371.029 ( +0.09%) [ +0.25% +0.14% +0.00% / +0.09% +0.56% +0.57%] index_select const : Elapsed 68.664 ms (1373.286 ms / 20) 1371.950 -> 1371.580 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.19% +0.23%] index_select wrap : Elapsed 68.635 ms (1372.702 ms / 20) 1370.831 -> 1372.674 ( +0.13%) [ +0.14% +0.15% +0.00% / +0.13% +0.29% +0.29%] index_select linear : Elapsed 68.640 ms (1372.793 ms / 20) 1371.177 -> 1370.208 ( -0.07%) [ +0.16% +0.00% +0.13% / -0.07% +0.29% +0.34%] index_select reverse : Elapsed 68.667 ms (1373.348 ms / 20) 1370.247 -> 1372.411 ( +0.16%) [ +0.22% +0.21% +0.00% / +0.16% +0.29% +0.32%] index_select skip64 : Elapsed 68.662 ms (1373.231 ms / 20) 1367.996 -> 1369.380 ( +0.10%) [ +0.23% +0.00% +0.11% / +0.10% +0.53% +0.38%] index_select skip256 : Elapsed 68.555 ms (1371.093 ms / 20) 1371.400 -> 1373.259 ( +0.14%) [ +0.09% +0.15% +0.00% / +0.14% +0.22% +0.28%] index_select spread : Elapsed 68.631 ms (1372.621 ms / 20) 1370.907 -> 1373.316 ( +0.18%) [ +0.13% +0.10% +0.00% / +0.18% +0.37% +0.19%] index_select strided 3 : Elapsed 68.638 ms (1372.751 ms / 20) 1372.670 -> 1371.249 ( -0.10%) [ +0.00% +0.04% +0.05% / -0.02% -0.08% -0.10%] index_select strided 5 : Elapsed 68.633 ms (1372.670 ms / 20) 1371.221 -> 1370.555 ( -0.05%) [ +0.00% +0.13% +0.12% / -0.01% +0.02% -0.05%] index_select strided 7 : Elapsed 68.561 ms (1371.221 ms / 20) 1371.711 -> 1372.294 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +0.19% +0.15%] index_select strided 8 : Elapsed 68.612 ms (1372.233 ms / 20) 1370.737 -> 1373.844 ( +0.23%) [ +0.00% +0.11% +0.09% / +0.23% +0.35% +0.33%] index_select strided 16 : Elapsed 68.537 ms (1370.737 ms / 20) 1371.115 -> 1371.569 ( +0.03%) [ +0.03% +0.00% +0.06% / +0.03% +0.29% +0.27%] index_select random : Elapsed 68.577 ms (1371.547 ms / 20) 1371.648 -> 1372.779 ( +0.08%) [ +0.11% +0.00% +0.01% / +0.08% +0.11% +0.24%] index_select random_sorted : Elapsed 68.655 ms (1373.109 ms / 20) B = [100, 256, 50, 40] (stride (256, 1, 25600, 1280000)) A = [100, 256, 50, 20] (stride (256000, 1, 5120, 256)) dim = 3 1144.392 -> 1137.614 ( -0.59%) [ +0.00% +0.11% +0.05% / -0.07% -0.59% -0.12%] index_add_ linear : Elapsed 57.220 ms (1144.392 ms / 20) 865.362 -> 858.953 ( -0.74%) [ +0.32% +0.00% +0.13% / -0.03% -0.74% -0.58%] index_copy_ linear : Elapsed 43.406 ms (868.111 ms / 20) 1150.280 -> 1139.845 ( -0.91%) [ +0.00% +0.06% +0.09% / +0.09% -0.81% -0.91%] index_add_ reverse : Elapsed 57.514 ms (1150.280 ms / 20) 864.367 -> 858.105 ( -0.72%) [ +0.07% +0.00% +0.19% / -0.18% -0.66% -0.72%] index_copy_ reverse : Elapsed 43.251 ms (865.012 ms / 20) 1147.038 -> 1134.316 ( -1.11%) [ +0.11% +0.00% +0.04% / +0.28% -0.82% -1.11%] index_add_ spread : Elapsed 57.412 ms (1148.246 ms / 20) 866.502 -> 856.742 ( -1.13%) [ +0.14% +0.00% +0.04% / +0.12% -0.91% -1.13%] index_copy_ spread : Elapsed 43.385 ms (867.702 ms / 20) 1147.711 -> 1144.514 ( -0.28%) [ +0.11% +0.00% +0.16% / -0.28% -0.02% -0.03%] index_add_ strided 3 : Elapsed 57.448 ms (1148.965 ms / 20) 864.873 -> 857.114 ( -0.90%) [ +0.27% +0.00% +0.02% / -0.04% -0.90% -0.57%] index_copy_ strided 3 : Elapsed 43.359 ms (867.173 ms / 20) 1151.088 -> 1138.879 ( -1.06%) [ +0.03% +0.15% +0.00% / -0.01% -1.06% -1.06%] index_add_ strided 7 : Elapsed 57.574 ms (1151.473 ms / 20) 866.799 -> 855.394 ( -1.32%) [ +0.09% +0.00% +0.11% / +0.17% -1.32% -1.28%] index_copy_ strided 7 : Elapsed 43.378 ms (867.555 ms / 20) 1138.133 -> 1131.214 ( -0.61%) [ +0.27% +0.00% +0.10% / +0.28% -0.47% -0.61%] index_add_ perm : Elapsed 57.062 ms (1141.248 ms / 20) 863.842 -> 857.869 ( -0.69%) [ +0.10% +0.00% +0.21% / +0.03% -0.69% -0.50%] index_copy_ perm : Elapsed 43.235 ms (864.696 ms / 20) 1148.819 -> 1144.172 ( -0.40%) [ +0.06% +0.05% +0.00% / -0.12% -0.40% -0.35%] index_add_ perm_sorted : Elapsed 57.477 ms (1149.538 ms / 20) 865.263 -> 857.066 ( -0.95%) [ +0.00% +0.10% +0.07% / -0.20% -0.86% -0.95%] index_copy_ perm_sorted : Elapsed 43.263 ms (865.263 ms / 20) 1864.054 -> 1855.943 ( -0.44%) [ +0.02% +0.01% +0.00% / +0.03% -0.44% -0.40%] index_select const : Elapsed 93.217 ms (1864.341 ms / 20) 1877.956 -> 1875.954 ( -0.11%) [ +0.04% +0.00% +0.17% / +0.17% +0.13% -0.11%] index_select wrap : Elapsed 93.932 ms (1878.646 ms / 20) 1879.957 -> 1868.318 ( -0.62%) [ +0.00% +0.13% +0.16% / +0.08% -0.60% -0.62%] index_select linear : Elapsed 93.998 ms (1879.957 ms / 20) 1904.845 -> 1885.155 ( -1.03%) [ +0.08% +0.25% +0.00% / +0.08% -1.03% -1.03%] index_select reverse : Elapsed 95.322 ms (1906.435 ms / 20) 1861.507 -> 1860.388 ( -0.06%) [ +0.00% +0.01% +0.10% / -0.01% +0.19% -0.06%] index_select skip64 : Elapsed 93.075 ms (1861.507 ms / 20) 1857.486 -> 1858.460 ( +0.05%) [ +0.00% +0.23% +0.01% / +0.14% +0.05% +0.07%] index_select skip256 : Elapsed 92.874 ms (1857.486 ms / 20) 1882.783 -> 1873.939 ( -0.47%) [ +0.00% +0.10% +0.18% / +0.06% -0.47% -0.29%] index_select spread : Elapsed 94.139 ms (1882.783 ms / 20) 1901.823 -> 1891.306 ( -0.55%) [ +0.00% +0.04% +0.06% / -0.05% -0.44% -0.55%] index_select strided 3 : Elapsed 95.091 ms (1901.823 ms / 20) 1848.173 -> 1849.251 ( +0.06%) [ +0.00% +0.00% +0.16% / +0.06% +2.30% +2.33%] index_select strided 5 : Elapsed 92.413 ms (1848.254 ms / 20) 1888.612 -> 1888.655 ( +0.00%) [ +0.04% +0.00% +0.07% / +0.05% +0.06% +0.00%] index_select strided 7 : Elapsed 94.470 ms (1889.397 ms / 20) 1889.430 -> 1888.271 ( -0.06%) [ +0.00% +0.15% +0.13% / -0.06% +0.34% +0.34%] index_select strided 8 : Elapsed 94.472 ms (1889.430 ms / 20) 1873.853 -> 1875.293 ( +0.08%) [ +0.06% +0.16% +0.00% / +0.08% +0.24% +0.31%] index_select strided 16 : Elapsed 93.752 ms (1875.048 ms / 20) 1863.932 -> 1865.575 ( +0.09%) [ +0.00% +0.08% +0.01% / +0.09% +0.75% +0.73%] index_select random : Elapsed 93.197 ms (1863.932 ms / 20) 1887.011 -> 1881.009 ( -0.32%) [ +0.03% +0.13% +0.00% / +0.00% -0.32% -0.32%] index_select random_sorted : Elapsed 94.384 ms (1887.670 ms / 20) out_shape = [40, 20, 50, 100] in_shape = [256, 20, 50, 100] idx_dim = 0 B = [40, 20, 50, 100] (stride (100, 200000, 4000, 1)) A = [256, 20, 50, 100] (stride (1, 25600, 512000, 256)) dim = 0 92.051 -> 92.149 ( +0.11%) [ +0.00% +0.19% +0.10% / +0.11% +0.22% +0.14%] index_select const : Elapsed 4.603 ms (92.051 ms / 20) 92.624 -> 92.633 ( +0.01%) [ +0.19% +0.00% +0.07% / +0.15% +0.01% +0.13%] index_select wrap : Elapsed 4.640 ms (92.804 ms / 20) 92.693 -> 92.744 ( +0.06%) [ +0.04% +0.14% +0.00% / +0.16% +0.06% +0.11%] index_select linear : Elapsed 4.637 ms (92.731 ms / 20) 92.619 -> 92.746 ( +0.14%) [ +0.26% +0.12% +0.00% / +0.29% +0.20% +0.14%] index_select reverse : Elapsed 4.643 ms (92.862 ms / 20) 92.052 -> 92.063 ( +0.01%) [ +0.02% +0.00% +0.16% / +0.03% +0.01% +0.10%] index_select skip64 : Elapsed 4.603 ms (92.070 ms / 20) 92.073 -> 92.013 ( -0.07%) [ +0.12% +0.00% +0.11% / -0.07% +0.02% -0.02%] index_select skip256 : Elapsed 4.609 ms (92.188 ms / 20) 94.577 -> 94.529 ( -0.05%) [ +0.08% +0.00% +0.00% / +0.08% -0.05% +0.01%] index_select spread : Elapsed 4.733 ms (94.653 ms / 20) 93.890 -> 93.888 ( -0.00%) [ +0.14% +0.00% +0.11% / -0.00% +0.02% +0.06%] index_select strided 3 : Elapsed 4.701 ms (94.021 ms / 20) 94.419 -> 94.375 ( -0.05%) [ +0.00% +0.02% +0.08% / +0.08% +0.03% -0.05%] index_select strided 5 : Elapsed 4.721 ms (94.419 ms / 20) 94.726 -> 94.615 ( -0.12%) [ +0.00% +0.03% +0.03% / -0.03% -0.04% -0.12%] index_select strided 7 : Elapsed 4.736 ms (94.726 ms / 20) 94.777 -> 94.800 ( +0.02%) [ +0.12% +0.06% +0.00% / +0.08% +0.02% +0.04%] index_select strided 8 : Elapsed 4.745 ms (94.890 ms / 20) 94.864 -> 94.744 ( -0.13%) [ +0.01% +0.00% +0.02% / -0.05% -0.13% -0.07%] index_select strided 16 : Elapsed 4.744 ms (94.876 ms / 20) 94.536 -> 94.486 ( -0.05%) [ +0.06% +0.07% +0.00% / +0.08% +0.01% -0.05%] index_select strided 64 : Elapsed 4.730 ms (94.594 ms / 20) 94.787 -> 94.819 ( +0.03%) [ +0.08% +0.00% +0.07% / +0.10% +0.05% +0.03%] index_select strided 100 : Elapsed 4.743 ms (94.863 ms / 20) 92.871 -> 92.773 ( -0.11%) [ +0.08% +0.03% +0.00% / -0.11% -0.01% +0.03%] index_select strided 255 : Elapsed 4.647 ms (92.943 ms / 20) 94.729 -> 94.670 ( -0.06%) [ +0.04% +0.00% +0.00% / +0.09% +0.06% -0.06%] index_select random : Elapsed 4.738 ms (94.769 ms / 20) 94.123 -> 94.090 ( -0.04%) [ +0.00% +0.01% +0.01% / -0.00% -0.02% -0.04%] index_select random_sorted : Elapsed 4.706 ms (94.123 ms / 20) 94.726 -> 94.708 ( -0.02%) [ +0.02% +0.00% +0.04% / +0.06% -0.02% +0.03%] index_select perm : Elapsed 4.737 ms (94.749 ms / 20) 94.378 -> 94.303 ( -0.08%) [ +0.03% +0.00% +0.03% / -0.04% -0.04% -0.08%] index_select perm_sorted : Elapsed 4.720 ms (94.405 ms / 20) out_shape = [256, 40, 50, 100] in_shape = [256, 20, 50, 100] idx_dim = 1 out_shape = [256, 20, 40, 100] in_shape = [256, 20, 50, 100] idx_dim = 2 out_shape = [256, 20, 50, 40] in_shape = [256, 20, 50, 100] idx_dim = 3 B = [256, 20, 50, 40] (stride (1, 512000, 256, 12800)) A = [256, 20, 50, 100] (stride (2000, 1, 512000, 20)) dim = 3 336.201 -> 336.472 ( +0.08%) [ +0.00% +0.06% +0.12% / +0.08% +2.82% +2.67%] index_select const : Elapsed 16.810 ms (336.201 ms / 20) 351.770 -> 349.491 ( -0.65%) [ +0.30% +0.09% +0.00% / +0.19% -0.65% -0.48%] index_select wrap : Elapsed 17.642 ms (352.840 ms / 20) 351.763 -> 351.242 ( -0.15%) [ +0.26% +0.18% +0.00% / +0.20% -0.14% -0.15%] index_select linear : Elapsed 17.634 ms (352.676 ms / 20) 345.390 -> 343.461 ( -0.56%) [ +0.10% +0.01% +0.00% / -0.06% -0.56% -0.55%] index_select reverse : Elapsed 17.287 ms (345.733 ms / 20) 336.677 -> 336.043 ( -0.19%) [ +0.00% +0.32% +0.02% / -0.19% +2.73% +2.73%] index_select skip64 : Elapsed 16.834 ms (336.677 ms / 20) 335.760 -> 336.716 ( +0.28%) [ +0.00% +0.15% +0.37% / +0.28% +2.99% +3.08%] index_select skip256 : Elapsed 16.788 ms (335.760 ms / 20) 345.565 -> 345.682 ( +0.03%) [ +0.21% +0.09% +0.00% / +0.03% +0.12% +0.52%] index_select spread : Elapsed 17.314 ms (346.279 ms / 20) 345.448 -> 345.033 ( -0.12%) [ +0.07% +0.00% +0.00% / -0.04% -0.12% +0.04%] index_select strided 3 : Elapsed 17.285 ms (345.706 ms / 20) 342.117 -> 342.059 ( -0.02%) [ +0.11% +0.18% +0.00% / -0.02% +0.71% +0.88%] index_select strided 5 : Elapsed 17.124 ms (342.489 ms / 20) 343.567 -> 344.296 ( +0.21%) [ +0.23% +0.13% +0.00% / +0.27% +0.21% +0.27%] index_select strided 7 : Elapsed 17.218 ms (344.357 ms / 20) 343.730 -> 343.744 ( +0.00%) [ +0.00% +0.01% +0.08% / +0.10% +0.11% +0.00%] index_select strided 8 : Elapsed 17.187 ms (343.730 ms / 20) 342.914 -> 343.279 ( +0.11%) [ +0.15% +0.31% +0.00% / +0.11% +0.47% +0.27%] index_select strided 16 : Elapsed 17.172 ms (343.432 ms / 20) 345.916 -> 346.845 ( +0.27%) [ +0.12% +0.08% +0.00% / +0.27% +0.63% +0.70%] index_select strided 64 : Elapsed 17.317 ms (346.336 ms / 20) 347.907 -> 348.045 ( +0.04%) [ +0.02% +0.25% +0.00% / +0.04% +2.03% +1.92%] index_select random : Elapsed 17.400 ms (347.992 ms / 20) 348.481 -> 349.414 ( +0.27%) [ +0.00% +0.02% +0.03% / +0.27% +0.42% +0.43%] index_select random_sorted : Elapsed 17.424 ms (348.481 ms / 20) 347.611 -> 344.469 ( -0.90%) [ +0.09% +0.23% +0.00% / +0.09% -0.73% -0.90%] index_select perm : Elapsed 17.396 ms (347.924 ms / 20) 344.345 -> 345.123 ( +0.23%) [ +0.04% +0.00% +0.29% / +0.23% +0.59% +0.56%] index_select perm_sorted : Elapsed 17.224 ms (344.481 ms / 20) out_shape = [40, 20, 100, 50] in_shape = [256, 20, 100, 50] idx_dim = 0 B = [40, 20, 100, 50] (stride (2000, 100, 1, 80000)) A = [256, 20, 100, 50] (stride (100000, 100, 1, 2000)) dim = 0 29.620 -> 29.880 ( +0.88%) [ +3.24% +0.00% +1.30% / +0.88% +8.25% +8.42%] index_select const : Elapsed 1.529 ms (30.581 ms / 20) 34.110 -> 33.088 ( -3.00%) [ +1.28% +0.00% +0.58% / -1.37% -2.43% -3.00%] index_select wrap : Elapsed 1.727 ms (34.547 ms / 20) 34.596 -> 32.893 ( -4.92%) [ +1.61% +0.00% +2.52% / -2.76% -3.97% -4.92%] index_select linear : Elapsed 1.758 ms (35.152 ms / 20) 33.707 -> 33.453 ( -0.75%) [ +0.00% +1.34% +2.90% / +3.41% -0.75% +1.09%] index_select reverse : Elapsed 1.685 ms (33.707 ms / 20) 29.895 -> 30.241 ( +1.16%) [ +0.00% +1.82% +0.54% / +1.16% +6.93% +6.77%] index_select skip64 : Elapsed 1.495 ms (29.895 ms / 20) 29.804 -> 30.051 ( +0.83%) [ +2.26% +1.70% +0.00% / +0.83% +8.60% +8.53%] index_select skip256 : Elapsed 1.524 ms (30.477 ms / 20) 32.607 -> 33.013 ( +1.25%) [ +0.00% +2.97% +1.50% / +1.25% +10.26% +9.54%] index_select spread : Elapsed 1.630 ms (32.607 ms / 20) 33.736 -> 33.677 ( -0.17%) [ +0.04% +0.00% +3.15% / -0.17% +7.15% +7.19%] index_select strided 3 : Elapsed 1.687 ms (33.748 ms / 20) 35.365 -> 35.510 ( +0.41%) [ +4.75% +0.00% +0.83% / +0.41% +2.98% +0.73%] index_select strided 5 : Elapsed 1.852 ms (37.044 ms / 20) 36.426 -> 34.684 ( -4.78%) [ +0.00% +0.85% +0.21% / -0.60% -3.96% -4.78%] index_select strided 7 : Elapsed 1.821 ms (36.426 ms / 20) 36.549 -> 36.817 ( +0.73%) [ +1.97% +2.96% +0.00% / +4.39% +0.73% +2.71%] index_select strided 8 : Elapsed 1.863 ms (37.269 ms / 20) 34.086 -> 34.377 ( +0.85%) [ +0.27% +0.00% +2.09% / +0.85% +5.10% +3.02%] index_select strided 16 : Elapsed 1.709 ms (34.177 ms / 20) 30.925 -> 30.808 ( -0.38%) [ +1.37% +0.00% +1.78% / -0.38% +12.34% +13.35%] index_select strided 64 : Elapsed 1.568 ms (31.350 ms / 20) 35.868 -> 36.680 ( +2.26%) [ +0.84% +0.52% +0.00% / +2.26% +6.85% +7.56%] index_select strided 100 : Elapsed 1.809 ms (36.170 ms / 20) 34.654 -> 34.975 ( +0.93%) [ +2.17% +0.86% +0.00% / +1.65% +0.93% +2.41%] index_select strided 255 : Elapsed 1.770 ms (35.407 ms / 20) 36.261 -> 36.299 ( +0.10%) [ +1.76% +0.74% +0.00% / +0.10% +2.05% +1.27%] index_select random : Elapsed 1.845 ms (36.898 ms / 20) 35.211 -> 34.996 ( -0.61%) [ +0.43% +0.00% +2.14% / +1.15% +0.92% -0.61%] index_select random_sorted : Elapsed 1.768 ms (35.363 ms / 20) 36.160 -> 36.008 ( -0.42%) [ +2.74% +2.89% +0.00% / -0.42% +1.23% +1.97%] index_select perm : Elapsed 1.857 ms (37.149 ms / 20) 34.750 -> 34.987 ( +0.68%) [ +0.43% +0.49% +0.00% / +0.77% +0.68% +2.50%] index_select perm_sorted : Elapsed 1.745 ms (34.899 ms / 20) out_shape = [256, 40, 100, 50] in_shape = [256, 20, 100, 50] idx_dim = 1 B = [256, 40, 100, 50] (stride (4000, 100, 1, 1024000)) A = [256, 20, 100, 50] (stride (100000, 5000, 50, 1)) dim = 1 925.269 -> 924.240 ( -0.11%) [ +0.01% +0.00% +0.06% / -0.11% +1.71% +1.62%] index_add_ linear : Elapsed 46.266 ms (925.324 ms / 20) 642.643 -> 642.589 ( -0.01%) [ +0.32% +0.20% +0.00% / +0.22% +0.02% -0.01%] index_copy_ linear : Elapsed 32.236 ms (644.723 ms / 20) 930.733 -> 917.438 ( -1.43%) [ +0.15% +0.38% +0.00% / +0.07% -1.26% -1.43%] index_add_ reverse : Elapsed 46.604 ms (932.084 ms / 20) 643.282 -> 644.045 ( +0.12%) [ +0.24% +0.21% +0.00% / +0.12% +0.42% +0.37%] index_copy_ reverse : Elapsed 32.240 ms (644.805 ms / 20) 934.630 -> 936.604 ( +0.21%) [ +0.33% +0.42% +0.00% / +0.21% +1.32% +1.26%] index_add_ spread : Elapsed 46.886 ms (937.715 ms / 20) 648.869 -> 649.604 ( +0.11%) [ +0.08% +0.04% +0.00% / +0.12% +0.11% +0.29%] index_copy_ spread : Elapsed 32.469 ms (649.378 ms / 20) 942.437 -> 942.477 ( +0.00%) [ +0.09% +0.37% +0.00% / +0.09% +0.09% +0.00%] index_add_ strided 3 : Elapsed 47.163 ms (943.262 ms / 20) 648.781 -> 648.467 ( -0.05%) [ +0.00% +0.04% +0.00% / -0.05% +0.25% +0.30%] index_copy_ strided 3 : Elapsed 32.440 ms (648.807 ms / 20) 931.311 -> 929.857 ( -0.16%) [ +0.25% +0.00% +0.24% / -0.16% +3.46% +3.52%] index_add_ strided 7 : Elapsed 46.681 ms (933.628 ms / 20) 651.807 -> 651.865 ( +0.01%) [ +0.00% +0.24% +0.12% / +0.01% +0.90% +0.87%] index_copy_ strided 7 : Elapsed 32.590 ms (651.807 ms / 20) 934.908 -> 934.740 ( -0.02%) [ +0.06% +0.00% +0.24% / -0.02% +0.13% +0.32%] index_add_ perm : Elapsed 46.772 ms (935.447 ms / 20) 650.393 -> 650.456 ( +0.01%) [ +0.17% +0.00% +0.19% / +0.01% +0.38% +0.35%] index_copy_ perm : Elapsed 32.575 ms (651.502 ms / 20) 927.917 -> 925.651 ( -0.24%) [ +0.01% +0.03% +0.00% / -0.24% +0.31% +0.32%] index_add_ perm_sorted : Elapsed 46.402 ms (928.040 ms / 20) 642.634 -> 642.484 ( -0.02%) [ +0.00% +0.04% +0.04% / -0.02% +0.43% +0.33%] index_copy_ perm_sorted : Elapsed 32.132 ms (642.634 ms / 20) 1362.280 -> 1359.678 ( -0.19%) [ +0.13% +0.03% +0.00% / -0.01% -0.19% -0.17%] index_select const : Elapsed 68.204 ms (1364.079 ms / 20) 1364.026 -> 1363.897 ( -0.01%) [ +0.17% +0.10% +0.00% / -0.01% +0.18% +0.16%] index_select wrap : Elapsed 68.318 ms (1366.354 ms / 20) 1364.026 -> 1364.106 ( +0.01%) [ +0.07% +0.10% +0.00% / +0.01% +0.12% +0.05%] index_select linear : Elapsed 68.247 ms (1364.940 ms / 20) 1362.798 -> 1364.376 ( +0.12%) [ +0.07% +0.01% +0.00% / +0.12% +0.19% +0.18%] index_select reverse : Elapsed 68.187 ms (1363.743 ms / 20) 1357.327 -> 1358.869 ( +0.11%) [ +0.12% +0.14% +0.00% / +0.11% +0.31% +0.45%] index_select skip64 : Elapsed 67.951 ms (1359.016 ms / 20) 1356.492 -> 1357.959 ( +0.11%) [ +0.14% +0.14% +0.00% / +0.11% +0.49% +0.48%] index_select skip256 : Elapsed 67.920 ms (1358.399 ms / 20) 1363.277 -> 1364.631 ( +0.10%) [ +0.10% +0.00% +0.02% / +0.10% +0.18% +0.21%] index_select spread : Elapsed 68.231 ms (1364.616 ms / 20) 1363.451 -> 1363.873 ( +0.03%) [ +0.06% +0.12% +0.00% / +0.05% +0.03% +0.10%] index_select strided 3 : Elapsed 68.215 ms (1364.299 ms / 20) 1363.147 -> 1357.299 ( -0.43%) [ +0.06% +0.01% +0.00% / +0.10% -0.31% -0.43%] index_select strided 5 : Elapsed 68.201 ms (1364.015 ms / 20) 1364.284 -> 1358.881 ( -0.40%) [ +0.00% +0.07% +0.01% / -0.02% -0.40% -0.31%] index_select strided 7 : Elapsed 68.214 ms (1364.284 ms / 20) 1363.334 -> 1363.215 ( -0.01%) [ +0.11% +0.07% +0.00% / -0.01% +0.21% +0.10%] index_select strided 8 : Elapsed 68.242 ms (1364.836 ms / 20) 1363.685 -> 1364.096 ( +0.03%) [ +0.07% +0.00% +0.11% / +0.03% +0.19% +0.10%] index_select strided 16 : Elapsed 68.233 ms (1364.656 ms / 20) 1363.438 -> 1362.935 ( -0.04%) [ +0.00% +0.20% +0.01% / -0.04% +0.24% +0.22%] index_select random : Elapsed 68.172 ms (1363.438 ms / 20) 1362.384 -> 1363.411 ( +0.08%) [ +0.00% +0.16% +0.15% / +0.08% +0.22% +0.27%] index_select random_sorted : Elapsed 68.119 ms (1362.384 ms / 20) out_shape = [256, 20, 40, 50] in_shape = [256, 20, 100, 50] idx_dim = 2 out_shape = [256, 20, 100, 40] in_shape = [256, 20, 100, 50] idx_dim = 3 out_shape = [40, 50, 20, 100] in_shape = [256, 50, 20, 100] idx_dim = 0 out_shape = [256, 40, 20, 100] in_shape = [256, 50, 20, 100] idx_dim = 1 B = [256, 40, 20, 100] (stride (1, 5120, 256, 204800)) A = [256, 50, 20, 100] (stride (1, 5120, 256, 256000)) dim = 1 736.032 -> 733.713 ( -0.32%) [ +0.34% +0.17% +0.00% / +0.29% -0.09% -0.32%] index_select const : Elapsed 36.928 ms (738.566 ms / 20) 745.391 -> 736.639 ( -1.17%) [ +0.24% +0.31% +0.00% / +0.23% -1.17% -1.17%] index_select wrap : Elapsed 37.357 ms (747.147 ms / 20) 746.745 -> 735.304 ( -1.53%) [ +0.18% +0.00% +0.22% / +0.07% -1.43% -1.53%] index_select linear : Elapsed 37.406 ms (748.119 ms / 20) 742.579 -> 738.291 ( -0.58%) [ +0.00% +0.36% +0.10% / -0.08% -0.58% -0.45%] index_select reverse : Elapsed 37.129 ms (742.579 ms / 20) 734.686 -> 735.493 ( +0.11%) [ +0.00% +0.12% +0.08% / +0.11% +0.18% +0.16%] index_select skip64 : Elapsed 36.734 ms (734.686 ms / 20) 734.276 -> 734.385 ( +0.01%) [ +0.08% +0.00% +0.22% / +0.18% +0.34% +0.01%] index_select skip256 : Elapsed 36.742 ms (734.850 ms / 20) 742.326 -> 736.879 ( -0.73%) [ +0.00% +0.32% +0.07% / +0.01% -0.67% -0.73%] index_select spread : Elapsed 37.116 ms (742.326 ms / 20) 741.069 -> 743.783 ( +0.37%) [ +0.12% +0.00% +0.30% / +0.37% +0.85% +0.78%] index_select strided 3 : Elapsed 37.097 ms (741.947 ms / 20) 752.040 -> 743.267 ( -1.17%) [ +0.02% +0.00% +0.00% / +0.26% -1.17% -1.16%] index_select strided 5 : Elapsed 37.609 ms (752.185 ms / 20) 753.494 -> 742.153 ( -1.51%) [ +0.05% +0.17% +0.00% / +0.08% -1.27% -1.51%] index_select strided 7 : Elapsed 37.695 ms (753.902 ms / 20) 745.661 -> 745.404 ( -0.03%) [ +0.25% +0.12% +0.00% / -0.03% +0.55% +0.39%] index_select strided 8 : Elapsed 37.376 ms (747.529 ms / 20) 740.828 -> 741.449 ( +0.08%) [ +0.02% +0.20% +0.00% / +0.08% +0.30% +0.60%] index_select strided 16 : Elapsed 37.049 ms (740.985 ms / 20) 741.608 -> 742.871 ( +0.17%) [ +0.00% +0.15% +0.12% / +0.17% +1.34% +1.38%] index_select random : Elapsed 37.080 ms (741.608 ms / 20) 740.607 -> 731.759 ( -1.19%) [ +0.00% +0.02% +0.11% / -0.00% -1.07% -1.19%] index_select random_sorted : Elapsed 37.030 ms (740.607 ms / 20) 748.651 -> 747.553 ( -0.15%) [ +0.15% +0.00% +0.24% / +0.16% -0.15% -0.04%] index_select perm : Elapsed 37.489 ms (749.786 ms / 20) 744.085 -> 735.891 ( -1.10%) [ +0.00% +0.06% +0.04% / +0.14% -1.10% -0.79%] index_select perm_sorted : Elapsed 37.204 ms (744.085 ms / 20) out_shape = [256, 50, 40, 100] in_shape = [256, 50, 20, 100] idx_dim = 2 B = [256, 50, 40, 100] (stride (4000, 1024000, 1, 40)) A = [256, 50, 20, 100] (stride (100000, 100, 5000, 1)) dim = 2 722.066 -> 722.037 ( -0.00%) [ +0.00% +0.00% +0.05% / +0.00% -0.00% +0.10%] index_add_ linear : Elapsed 36.104 ms (722.079 ms / 20) 648.995 -> 648.787 ( -0.03%) [ +0.02% +0.02% +0.00% / -0.03% +0.04% +0.03%] index_copy_ linear : Elapsed 32.458 ms (649.154 ms / 20) 721.743 -> 721.712 ( -0.00%) [ +0.00% +0.02% +0.02% / -0.00% +0.15% +0.14%] index_add_ reverse : Elapsed 36.087 ms (721.743 ms / 20) 648.799 -> 648.788 ( -0.00%) [ +0.02% +0.00% +0.00% / -0.00% +0.02% +0.05%] index_copy_ reverse : Elapsed 32.445 ms (648.903 ms / 20) 721.873 -> 722.248 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.12% +0.16%] index_add_ spread : Elapsed 36.111 ms (722.218 ms / 20) 648.865 -> 649.095 ( +0.04%) [ +0.00% +0.01% +0.00% / +0.06% +0.04% +0.04%] index_copy_ spread : Elapsed 32.443 ms (648.865 ms / 20) 721.404 -> 721.822 ( +0.06%) [ +0.11% +0.03% +0.00% / +0.06% +0.18% +0.15%] index_add_ strided 3 : Elapsed 36.108 ms (722.166 ms / 20) 648.790 -> 648.928 ( +0.02%) [ +0.03% +0.00% +0.03% / +0.02% +0.08% +0.04%] index_copy_ strided 3 : Elapsed 32.449 ms (648.980 ms / 20) 720.767 -> 720.966 ( +0.03%) [ +0.03% +0.00% +0.09% / +0.03% +0.15% +0.19%] index_add_ strided 7 : Elapsed 36.048 ms (720.951 ms / 20) 648.800 -> 648.896 ( +0.01%) [ +0.01% +0.00% +0.01% / +0.05% +0.02% +0.01%] index_copy_ strided 7 : Elapsed 32.443 ms (648.868 ms / 20) 719.827 -> 719.873 ( +0.01%) [ +0.09% +0.00% +0.02% / +0.01% +0.37% +0.44%] index_add_ perm : Elapsed 36.025 ms (720.497 ms / 20) 648.508 -> 648.476 ( -0.00%) [ +0.02% +0.00% +0.00% / -0.00% +0.14% +0.13%] index_copy_ perm : Elapsed 32.431 ms (648.628 ms / 20) 720.469 -> 720.093 ( -0.05%) [ +0.03% +0.03% +0.00% / -0.05% +0.33% +0.33%] index_add_ perm_sorted : Elapsed 36.033 ms (720.660 ms / 20) 648.500 -> 648.571 ( +0.01%) [ +0.00% +0.02% +0.05% / +0.01% +0.12% +0.12%] index_copy_ perm_sorted : Elapsed 32.425 ms (648.500 ms / 20) 1296.712 -> 1296.661 ( -0.00%) [ +0.03% +0.00% +0.00% / -0.00% +0.04% +0.04%] index_select const : Elapsed 64.857 ms (1297.142 ms / 20) 1297.271 -> 1297.614 ( +0.03%) [ +0.00% +0.06% +0.02% / +0.12% +0.05% +0.03%] index_select wrap : Elapsed 64.864 ms (1297.271 ms / 20) 1297.330 -> 1297.210 ( -0.01%) [ +0.01% +0.00% +0.00% / -0.01% +0.02% +0.01%] index_select linear : Elapsed 64.871 ms (1297.418 ms / 20) 1297.104 -> 1297.471 ( +0.03%) [ +0.04% +0.02% +0.00% / +0.03% +0.06% +0.08%] index_select reverse : Elapsed 64.881 ms (1297.624 ms / 20) 1296.655 -> 1296.814 ( +0.01%) [ +0.00% +0.01% +0.00% / +0.01% +0.04% +0.05%] index_select skip64 : Elapsed 64.833 ms (1296.655 ms / 20) 1295.314 -> 1295.547 ( +0.02%) [ +0.00% +0.03% +0.00% / +0.02% +0.17% +0.14%] index_select skip256 : Elapsed 64.766 ms (1295.314 ms / 20) 1297.122 -> 1297.399 ( +0.02%) [ +0.00% +0.06% +0.03% / +0.02% +0.06% +0.07%] index_select spread : Elapsed 64.856 ms (1297.122 ms / 20) 1297.602 -> 1297.602 ( +0.00%) [ +0.00% +0.01% +0.01% / +0.00% +0.03% +0.10%] index_select strided 3 : Elapsed 64.880 ms (1297.602 ms / 20) 1297.187 -> 1296.858 ( -0.03%) [ +0.00% +0.01% +0.03% / -0.02% -0.03% -0.01%] index_select strided 5 : Elapsed 64.859 ms (1297.187 ms / 20) 1297.874 -> 1297.137 ( -0.06%) [ +0.02% +0.02% +0.00% / +0.02% -0.06% -0.02%] index_select strided 7 : Elapsed 64.909 ms (1298.173 ms / 20) 1297.439 -> 1297.409 ( -0.00%) [ +0.03% +0.00% +0.03% / -0.00% +0.05% +0.04%] index_select strided 8 : Elapsed 64.893 ms (1297.859 ms / 20) 1297.420 -> 1297.774 ( +0.03%) [ +0.01% +0.00% +0.01% / +0.03% +0.05% +0.04%] index_select strided 16 : Elapsed 64.878 ms (1297.569 ms / 20) 1296.886 -> 1297.657 ( +0.06%) [ +0.03% +0.00% +0.02% / +0.06% +0.07% +0.10%] index_select random : Elapsed 64.865 ms (1297.291 ms / 20) 1296.952 -> 1297.399 ( +0.03%) [ +0.00% +0.04% +0.02% / +0.03% +0.05% +0.08%] index_select random_sorted : Elapsed 64.848 ms (1296.952 ms / 20) B = [256, 50, 40, 100] (stride (100, 25600, 1280000, 1)) A = [256, 50, 20, 100] (stride (1000, 20, 1, 256000)) dim = 2 697.901 -> 697.909 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.04% +0.00% +0.00%] index_add_ linear : Elapsed 34.895 ms (697.901 ms / 20) 682.395 -> 682.643 ( +0.04%) [ +0.03% +0.00% +0.04% / +0.05% +0.04% +0.07%] index_copy_ linear : Elapsed 34.129 ms (682.574 ms / 20) 697.720 -> 697.816 ( +0.01%) [ +0.01% +0.00% +0.00% / +0.02% +0.01% +0.04%] index_add_ reverse : Elapsed 34.888 ms (697.759 ms / 20) 682.393 -> 682.490 ( +0.01%) [ +0.02% +0.01% +0.00% / +0.02% +0.04% +0.01%] index_copy_ reverse : Elapsed 34.127 ms (682.537 ms / 20) 697.794 -> 697.821 ( +0.00%) [ +0.00% +0.01% +0.02% / +0.01% +0.00% +0.02%] index_add_ spread : Elapsed 34.890 ms (697.794 ms / 20) 682.392 -> 682.448 ( +0.01%) [ +0.00% +0.00% +0.03% / +0.05% +0.01% +0.02%] index_copy_ spread : Elapsed 34.121 ms (682.423 ms / 20) 697.888 -> 697.953 ( +0.01%) [ +0.00% +0.00% +0.00% / +0.02% +0.01% +0.02%] index_add_ strided 3 : Elapsed 34.894 ms (697.888 ms / 20) 682.431 -> 682.687 ( +0.04%) [ +0.00% +0.02% +0.02% / +0.04% +0.04% +0.05%] index_copy_ strided 3 : Elapsed 34.122 ms (682.431 ms / 20) 697.883 -> 697.794 ( -0.01%) [ +0.01% +0.02% +0.00% / +0.02% -0.01% +0.02%] index_add_ strided 7 : Elapsed 34.898 ms (697.961 ms / 20) 682.526 -> 682.518 ( -0.00%) [ +0.02% +0.02% +0.00% / +0.00% -0.00% +0.01%] index_copy_ strided 7 : Elapsed 34.132 ms (682.636 ms / 20) 697.726 -> 697.798 ( +0.01%) [ +0.01% +0.03% +0.00% / +0.02% +0.01% +0.06%] index_add_ perm : Elapsed 34.891 ms (697.829 ms / 20) 682.348 -> 682.578 ( +0.03%) [ +0.00% +0.03% +0.02% / +0.05% +0.03% +0.04%] index_copy_ perm : Elapsed 34.117 ms (682.348 ms / 20) 697.795 -> 697.944 ( +0.02%) [ +0.02% +0.03% +0.00% / +0.02% +0.02% +0.03%] index_add_ perm_sorted : Elapsed 34.896 ms (697.912 ms / 20) 682.443 -> 682.498 ( +0.01%) [ +0.03% +0.02% +0.00% / +0.01% +0.04% +0.01%] index_copy_ perm_sorted : Elapsed 34.133 ms (682.663 ms / 20) 1369.259 -> 1369.173 ( -0.01%) [ +0.00% +0.00% +0.01% / +0.02% -0.00% -0.01%] index_select const : Elapsed 68.463 ms (1369.259 ms / 20) 1369.113 -> 1368.982 ( -0.01%) [ +0.00% +0.00% +0.01% / -0.01% +0.04% +0.02%] index_select wrap : Elapsed 68.456 ms (1369.119 ms / 20) 1368.994 -> 1369.177 ( +0.01%) [ +0.02% +0.00% +0.01% / +0.01% +0.03% +0.04%] index_select linear : Elapsed 68.464 ms (1369.284 ms / 20) 1368.802 -> 1369.208 ( +0.03%) [ +0.00% +0.00% +0.02% / +0.04% +0.06% +0.03%] index_select reverse : Elapsed 68.443 ms (1368.856 ms / 20) 1369.248 -> 1369.075 ( -0.01%) [ +0.00% +0.01% +0.01% / +0.03% -0.01% -0.00%] index_select skip64 : Elapsed 68.462 ms (1369.248 ms / 20) 1369.289 -> 1369.050 ( -0.02%) [ +0.00% +0.01% +0.00% / +0.01% -0.02% +0.00%] index_select skip256 : Elapsed 68.465 ms (1369.297 ms / 20) 1368.946 -> 1368.876 ( -0.01%) [ +0.00% +0.01% +0.02% / -0.01% +0.02% +0.04%] index_select spread : Elapsed 68.447 ms (1368.946 ms / 20) 1369.078 -> 1369.071 ( -0.00%) [ +0.01% +0.00% +0.00% / -0.00% +0.03% +0.03%] index_select strided 3 : Elapsed 68.462 ms (1369.242 ms / 20) 1368.718 -> 1369.365 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.06% +0.05%] index_select strided 5 : Elapsed 68.449 ms (1368.970 ms / 20) 1369.229 -> 1369.285 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.04% +0.02%] index_select strided 7 : Elapsed 68.464 ms (1369.282 ms / 20) 1369.067 -> 1369.231 ( +0.01%) [ +0.01% +0.00% +0.02% / +0.01% +0.02% +0.04%] index_select strided 8 : Elapsed 68.464 ms (1369.271 ms / 20) 1368.812 -> 1369.121 ( +0.02%) [ +0.00% +0.02% +0.03% / +0.02% +0.05% +0.05%] index_select strided 16 : Elapsed 68.441 ms (1368.812 ms / 20) 1368.906 -> 1368.990 ( +0.01%) [ +0.02% +0.02% +0.00% / +0.01% +0.04% +0.04%] index_select random : Elapsed 68.460 ms (1369.207 ms / 20) 1369.232 -> 1369.207 ( -0.00%) [ +0.00% +0.01% +0.00% / -0.00% +0.01% +0.02%] index_select random_sorted : Elapsed 68.464 ms (1369.272 ms / 20) out_shape = [256, 50, 20, 40] in_shape = [256, 50, 20, 100] idx_dim = 3 out_shape = [40, 50, 100, 20] in_shape = [256, 50, 100, 20] idx_dim = 0 B = [40, 50, 100, 20] (stride (20, 80000, 800, 1)) A = [256, 50, 100, 20] (stride (100000, 2000, 1, 100)) dim = 0 41.338 -> 41.136 ( -0.49%) [ +0.04% +0.00% +0.02% / -0.02% -0.47% -0.49%] index_select const : Elapsed 2.068 ms (41.353 ms / 20) 54.678 -> 54.746 ( +0.12%) [ +0.12% +0.34% +0.00% / +0.12% +0.59% +0.42%] index_select wrap : Elapsed 2.737 ms (54.745 ms / 20) 54.770 -> 54.716 ( -0.10%) [ +0.07% +0.07% +0.00% / -0.10% +0.47% +0.51%] index_select linear : Elapsed 2.740 ms (54.807 ms / 20) 54.577 -> 54.497 ( -0.15%) [ +0.08% +0.00% +0.13% / -0.15% +0.18% +0.03%] index_select reverse : Elapsed 2.731 ms (54.619 ms / 20) 41.330 -> 41.093 ( -0.57%) [ +0.00% +0.02% +0.06% / +0.04% -0.53% -0.57%] index_select skip64 : Elapsed 2.066 ms (41.330 ms / 20) 41.343 -> 41.124 ( -0.53%) [ +0.00% +0.03% +0.06% / +0.17% -0.53% -0.47%] index_select skip256 : Elapsed 2.067 ms (41.343 ms / 20) 54.683 -> 54.314 ( -0.67%) [ +0.00% +0.75% +0.26% / +0.39% -0.67% -0.51%] index_select spread : Elapsed 2.734 ms (54.683 ms / 20) 54.352 -> 54.471 ( +0.22%) [ +0.46% +0.32% +0.00% / +0.57% +0.22% +0.27%] index_select strided 3 : Elapsed 2.730 ms (54.604 ms / 20) 54.659 -> 54.678 ( +0.03%) [ +0.19% +0.08% +0.00% / +0.03% +0.21% +0.33%] index_select strided 5 : Elapsed 2.738 ms (54.765 ms / 20) 54.888 -> 54.770 ( -0.21%) [ +0.20% +0.00% +0.01% / +0.13% +0.15% -0.21%] index_select strided 7 : Elapsed 2.750 ms (54.997 ms / 20) 54.476 -> 54.583 ( +0.20%) [ +0.36% +0.34% +0.00% / +0.20% +0.72% +0.28%] index_select strided 8 : Elapsed 2.734 ms (54.673 ms / 20) 54.395 -> 54.557 ( +0.30%) [ +0.46% +0.36% +0.00% / +0.30% +0.79% +0.67%] index_select strided 16 : Elapsed 2.732 ms (54.644 ms / 20) 53.157 -> 53.228 ( +0.13%) [ +0.11% +0.00% +0.12% / +0.13% +0.77% +0.98%] index_select strided 64 : Elapsed 2.661 ms (53.216 ms / 20) 54.631 -> 54.712 ( +0.15%) [ +0.35% +0.22% +0.00% / +0.15% +0.93% +0.70%] index_select strided 100 : Elapsed 2.741 ms (54.822 ms / 20) 54.319 -> 54.569 ( +0.46%) [ +0.34% +0.37% +0.00% / +0.46% +0.83% +0.69%] index_select strided 255 : Elapsed 2.725 ms (54.502 ms / 20) 54.916 -> 54.612 ( -0.55%) [ +0.07% +0.00% +0.10% / +0.01% -0.55% -0.54%] index_select random : Elapsed 2.748 ms (54.957 ms / 20) 53.984 -> 53.423 ( -1.04%) [ +0.22% +0.00% +0.16% / -0.03% -0.75% -1.04%] index_select random_sorted : Elapsed 2.705 ms (54.101 ms / 20) 54.618 -> 54.709 ( +0.17%) [ +0.21% +0.02% +0.00% / +0.17% +0.18% +0.28%] index_select perm : Elapsed 2.737 ms (54.733 ms / 20) 54.462 -> 54.339 ( -0.23%) [ +0.31% +0.00% +0.02% / -0.23% +0.67% +0.75%] index_select perm_sorted : Elapsed 2.732 ms (54.630 ms / 20) out_shape = [256, 40, 100, 20] in_shape = [256, 50, 100, 20] idx_dim = 1 out_shape = [256, 50, 40, 20] in_shape = [256, 50, 100, 20] idx_dim = 2 out_shape = [256, 50, 100, 40] in_shape = [256, 50, 100, 20] idx_dim = 3 out_shape = [40, 100, 20, 50] in_shape = [256, 100, 20, 50] idx_dim = 0 out_shape = [256, 40, 20, 50] in_shape = [256, 100, 20, 50] idx_dim = 1 out_shape = [256, 100, 40, 50] in_shape = [256, 100, 20, 50] idx_dim = 2 out_shape = [256, 100, 20, 40] in_shape = [256, 100, 20, 50] idx_dim = 3 out_shape = [40, 100, 50, 20] in_shape = [256, 100, 50, 20] idx_dim = 0 out_shape = [256, 40, 50, 20] in_shape = [256, 100, 50, 20] idx_dim = 1 out_shape = [256, 100, 40, 20] in_shape = [256, 100, 50, 20] idx_dim = 2 out_shape = [256, 100, 50, 40] in_shape = [256, 100, 50, 20] idx_dim = 3 B = [256, 100, 50, 40] (stride (2000, 512000, 1, 50)) A = [256, 100, 50, 20] (stride (5000, 50, 1, 1280000)) dim = 3 178.258 -> 178.312 ( +0.03%) [ +0.00% +0.01% +0.11% / +0.03% +0.26% +0.19%] index_add_ linear : Elapsed 8.913 ms (178.258 ms / 20) 165.533 -> 165.667 ( +0.08%) [ +0.00% +0.03% +0.13% / +0.08% +0.25% +0.18%] index_copy_ linear : Elapsed 8.277 ms (165.533 ms / 20) 178.305 -> 178.416 ( +0.06%) [ +0.07% +0.00% +0.12% / +0.07% +0.06% +0.12%] index_add_ reverse : Elapsed 8.922 ms (178.432 ms / 20) 165.513 -> 165.685 ( +0.10%) [ +0.00% +0.04% +0.14% / +0.11% +0.10% +0.15%] index_copy_ reverse : Elapsed 8.276 ms (165.513 ms / 20) 178.544 -> 178.545 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.04% +0.06%] index_add_ spread : Elapsed 8.927 ms (178.545 ms / 20) 165.768 -> 165.818 ( +0.03%) [ +0.07% +0.00% +0.04% / +0.03% +0.06% +0.19%] index_copy_ spread : Elapsed 8.294 ms (165.877 ms / 20) 178.530 -> 178.615 ( +0.05%) [ +0.04% +0.00% +0.05% / +0.06% +0.05% +0.12%] index_add_ strided 3 : Elapsed 8.930 ms (178.606 ms / 20) 165.637 -> 165.788 ( +0.09%) [ +0.03% +0.00% +0.03% / +0.09% +0.11% +0.20%] index_copy_ strided 3 : Elapsed 8.284 ms (165.684 ms / 20) 178.807 -> 178.513 ( -0.16%) [ +0.00% +0.04% +0.05% / -0.03% -0.12% -0.16%] index_add_ strided 7 : Elapsed 8.940 ms (178.807 ms / 20) 165.907 -> 165.800 ( -0.06%) [ +0.01% +0.00% +0.03% / +0.02% -0.05% -0.06%] index_copy_ strided 7 : Elapsed 8.296 ms (165.920 ms / 20) 178.668 -> 178.366 ( -0.17%) [ +0.00% +0.04% +0.12% / +0.13% -0.17% -0.11%] index_add_ perm : Elapsed 8.933 ms (178.668 ms / 20) 165.788 -> 165.736 ( -0.03%) [ +0.04% +0.00% +0.06% / +0.20% -0.03% +0.06%] index_copy_ perm : Elapsed 8.293 ms (165.854 ms / 20) 178.545 -> 178.437 ( -0.06%) [ +0.05% +0.00% +0.06% / +0.03% -0.06% -0.03%] index_add_ perm_sorted : Elapsed 8.932 ms (178.641 ms / 20) 165.714 -> 165.602 ( -0.07%) [ +0.03% +0.00% +0.05% / -0.07% +0.00% +0.02%] index_copy_ perm_sorted : Elapsed 8.288 ms (165.765 ms / 20) 330.570 -> 330.937 ( +0.11%) [ +0.00% +0.02% +0.10% / +0.11% +0.23% +0.28%] index_select const : Elapsed 16.528 ms (330.570 ms / 20) 331.528 -> 331.863 ( +0.10%) [ +0.00% +0.07% +0.14% / +0.11% +0.10% +0.13%] index_select wrap : Elapsed 16.576 ms (331.528 ms / 20) 330.904 -> 330.967 ( +0.02%) [ +0.00% +0.05% +0.13% / +0.08% +0.07% +0.02%] index_select linear : Elapsed 16.545 ms (330.904 ms / 20) 331.717 -> 331.859 ( +0.04%) [ +0.00% +0.01% +0.10% / +0.11% +0.04% +0.05%] index_select reverse : Elapsed 16.586 ms (331.717 ms / 20) 330.692 -> 330.936 ( +0.07%) [ +0.03% +0.00% +0.12% / +0.07% +0.28% +0.14%] index_select skip64 : Elapsed 16.540 ms (330.803 ms / 20) 330.785 -> 331.121 ( +0.10%) [ +0.02% +0.00% +0.12% / +0.10% +0.16% +0.18%] index_select skip256 : Elapsed 16.543 ms (330.854 ms / 20) 331.063 -> 331.319 ( +0.08%) [ +0.00% +0.00% +0.07% / +0.08% +0.09% +0.14%] index_select spread : Elapsed 16.553 ms (331.065 ms / 20) 331.580 -> 331.732 ( +0.05%) [ +0.00% +0.01% +0.14% / +0.05% +0.16% +0.13%] index_select strided 3 : Elapsed 16.579 ms (331.580 ms / 20) 331.470 -> 331.658 ( +0.06%) [ +0.01% +0.00% +0.01% / +0.06% +0.19% +0.16%] index_select strided 5 : Elapsed 16.575 ms (331.492 ms / 20) 331.645 -> 331.860 ( +0.06%) [ +0.00% +0.03% +0.04% / +0.09% +0.06% +0.07%] index_select strided 7 : Elapsed 16.582 ms (331.645 ms / 20) 331.800 -> 331.945 ( +0.04%) [ +0.01% +0.00% +0.08% / +0.10% +0.04% +0.10%] index_select strided 8 : Elapsed 16.591 ms (331.827 ms / 20) 331.840 -> 331.940 ( +0.03%) [ +0.06% +0.00% +0.10% / +0.12% +0.03% +0.04%] index_select strided 16 : Elapsed 16.602 ms (332.041 ms / 20) 331.603 -> 331.970 ( +0.11%) [ +0.00% +0.05% +0.06% / +0.14% +0.13% +0.11%] index_select random : Elapsed 16.580 ms (331.603 ms / 20) 330.992 -> 331.134 ( +0.04%) [ +0.00% +0.03% +0.05% / +0.11% +0.07% +0.04%] index_select random_sorted : Elapsed 16.550 ms (330.992 ms / 20) out_shape = [50, 40, 100, 256] in_shape = [20, 40, 100, 256] idx_dim = 0 out_shape = [20, 50, 100, 256] in_shape = [20, 40, 100, 256] idx_dim = 1 out_shape = [20, 40, 50, 256] in_shape = [20, 40, 100, 256] idx_dim = 2 B = [20, 40, 50, 256] (stride (2000, 50, 1, 40000)) A = [20, 40, 100, 256] (stride (40, 1, 800, 80000)) dim = 2 BEST 343.997 -> 25.153 (-92.69%) [ +0.28% +0.00% +0.36% / -92.52% -92.63% -92.69%] index_select const : Elapsed 17.248 ms (344.953 ms / 20) BEST 355.998 -> 132.742 (-62.71%) [ +0.25% +0.00% +0.05% / -62.71% -61.51% -61.10%] index_select wrap : Elapsed 17.845 ms (356.891 ms / 20) BEST 355.249 -> 132.304 (-62.76%) [ +0.35% +0.51% +0.00% / -62.76% -61.33% -61.24%] index_select linear : Elapsed 17.824 ms (356.487 ms / 20) BEST 353.318 -> 132.990 (-62.36%) [ +0.39% +0.00% +0.28% / -62.36% -61.24% -61.41%] index_select reverse : Elapsed 17.735 ms (354.695 ms / 20) BEST 343.773 -> 24.107 (-92.99%) [ +0.39% +0.00% +0.24% / -92.82% -92.99% -92.97%] index_select skip64 : Elapsed 17.256 ms (345.119 ms / 20) BEST 344.364 -> 25.199 (-92.68%) [ +0.11% +0.08% +0.00% / -92.36% -92.68% -92.68%] index_select skip256 : Elapsed 17.236 ms (344.727 ms / 20) BEST 353.225 -> 147.606 (-58.21%) [ +0.40% +0.03% +0.00% / -58.21% -57.90% -57.52%] index_select spread : Elapsed 17.732 ms (354.644 ms / 20) BEST 351.748 -> 145.598 (-58.61%) [ +0.42% +0.46% +0.00% / -58.61% -57.21% -57.46%] index_select strided 3 : Elapsed 17.661 ms (353.227 ms / 20) BEST 352.746 -> 59.998 (-82.99%) [ +0.17% +0.00% +0.16% / -82.99% -82.59% -82.72%] index_select strided 5 : Elapsed 17.667 ms (353.341 ms / 20) BEST 352.093 -> 138.389 (-60.70%) [ +0.08% +0.07% +0.00% / -60.70% -59.89% -59.67%] index_select strided 7 : Elapsed 17.619 ms (352.384 ms / 20) BEST 353.528 -> 90.571 (-74.38%) [ +0.06% +0.24% +0.00% / -74.38% -74.00% -73.91%] index_select strided 8 : Elapsed 17.686 ms (353.724 ms / 20) BEST 357.425 -> 89.720 (-74.90%) [ +0.35% +0.00% +0.06% / -74.81% -74.90% -74.88%] index_select strided 16 : Elapsed 17.933 ms (358.667 ms / 20) BEST 354.033 -> 89.738 (-74.65%) [ +0.09% +0.01% +0.00% / -74.65% -73.45% -73.36%] index_select strided 64 : Elapsed 17.718 ms (354.367 ms / 20) BEST 352.595 -> 107.439 (-69.53%) [ +0.13% +0.00% +0.15% / -69.01% -69.33% -69.53%] index_select random : Elapsed 17.653 ms (353.065 ms / 20) BEST 343.628 -> 103.266 (-69.95%) [ +0.00% +0.28% +0.05% / -69.02% -69.95% -69.83%] index_select random_sorted : Elapsed 17.181 ms (343.628 ms / 20) BEST 353.235 -> 139.146 (-60.61%) [ +0.00% +0.44% +0.10% / -58.23% -60.61% -60.36%] index_select perm : Elapsed 17.662 ms (353.235 ms / 20) BEST 352.653 -> 137.062 (-61.13%) [ +0.21% +0.00% +0.18% / -59.07% -60.94% -61.13%] index_select perm_sorted : Elapsed 17.669 ms (353.378 ms / 20) out_shape = [20, 40, 100, 50] in_shape = [20, 40, 100, 256] idx_dim = 3 B = [20, 40, 100, 50] (stride (1, 100000, 1000, 20)) A = [20, 40, 100, 256] (stride (100, 2000, 1, 80000)) dim = 3 71.884 -> 71.738 ( -0.20%) [ +0.23% +0.02% +0.00% / -0.20% +0.57% +0.90%] index_select const : Elapsed 3.602 ms (72.050 ms / 20) 71.379 -> 71.684 ( +0.43%) [ +0.90% +0.00% +0.61% / +0.43% +1.79% +2.10%] index_select wrap : Elapsed 3.601 ms (72.018 ms / 20) 71.754 -> 71.911 ( +0.22%) [ +0.09% +0.00% +0.09% / +0.22% +1.19% +1.32%] index_select linear : Elapsed 3.591 ms (71.821 ms / 20) 71.347 -> 71.523 ( +0.25%) [ +0.00% +0.47% +0.08% / +0.25% +1.77% +1.46%] index_select reverse : Elapsed 3.567 ms (71.347 ms / 20) 71.230 -> 71.430 ( +0.28%) [ +0.77% +0.00% +0.46% / +0.28% +1.73% +2.22%] index_select skip64 : Elapsed 3.589 ms (71.781 ms / 20) 71.672 -> 71.902 ( +0.32%) [ +0.00% +0.04% +0.01% / +0.32% +1.41% +1.41%] index_select skip256 : Elapsed 3.584 ms (71.672 ms / 20) 71.776 -> 71.576 ( -0.28%) [ +0.00% +0.29% +0.57% / -0.28% +1.94% +1.64%] index_select spread : Elapsed 3.589 ms (71.776 ms / 20) 71.432 -> 71.470 ( +0.05%) [ +0.08% +0.00% +0.16% / +0.05% +1.86% +1.45%] index_select strided 3 : Elapsed 3.574 ms (71.489 ms / 20) 71.643 -> 71.868 ( +0.31%) [ +0.00% +0.26% +0.70% / +0.31% +2.32% +2.41%] index_select strided 5 : Elapsed 3.582 ms (71.643 ms / 20) 71.254 -> 71.389 ( +0.19%) [ +0.02% +0.00% +0.29% / +0.19% +0.95% +1.47%] index_select strided 7 : Elapsed 3.563 ms (71.269 ms / 20) 71.938 -> 71.873 ( -0.09%) [ +0.06% +0.00% +0.29% / -0.09% +0.54% +0.62%] index_select strided 8 : Elapsed 3.599 ms (71.982 ms / 20) 71.668 -> 71.976 ( +0.43%) [ +0.79% +0.90% +0.00% / +0.43% +0.85% +1.23%] index_select strided 16 : Elapsed 3.612 ms (72.237 ms / 20) 71.824 -> 71.904 ( +0.11%) [ +0.71% +0.62% +0.00% / +0.11% +0.20% +0.87%] index_select strided 64 : Elapsed 3.617 ms (72.332 ms / 20) 71.291 -> 71.627 ( +0.47%) [ +0.00% +0.39% +0.12% / +0.47% +2.03% +1.78%] index_select strided 100 : Elapsed 3.565 ms (71.291 ms / 20) 71.781 -> 71.955 ( +0.24%) [ +0.00% +0.21% +0.11% / +0.24% +1.48% +0.93%] index_select strided 255 : Elapsed 3.589 ms (71.781 ms / 20) 71.199 -> 71.309 ( +0.15%) [ +0.20% +0.00% +0.03% / +0.15% +2.15% +1.59%] index_select random : Elapsed 3.567 ms (71.338 ms / 20) 70.959 -> 70.765 ( -0.27%) [ +0.26% +0.29% +0.00% / -0.27% +2.57% +2.83%] index_select random_sorted : Elapsed 3.557 ms (71.140 ms / 20) 71.748 -> 71.715 ( -0.05%) [ +0.38% +0.60% +0.00% / +0.32% +0.71% -0.05%] index_select perm : Elapsed 3.601 ms (72.024 ms / 20) 71.699 -> 71.883 ( +0.26%) [ +0.00% +0.23% +0.38% / +0.26% +1.03% +1.33%] index_select perm_sorted : Elapsed 3.585 ms (71.699 ms / 20) out_shape = [50, 40, 256, 100] in_shape = [20, 40, 256, 100] idx_dim = 0 out_shape = [20, 50, 256, 100] in_shape = [20, 40, 256, 100] idx_dim = 1 out_shape = [20, 40, 50, 100] in_shape = [20, 40, 256, 100] idx_dim = 2 B = [20, 40, 50, 100] (stride (1, 20, 80000, 800)) A = [20, 40, 256, 100] (stride (256, 512000, 1, 5120)) dim = 2 134.425 -> 134.277 ( -0.11%) [ +0.15% +0.10% +0.00% / -0.11% +0.19% +0.26%] index_select const : Elapsed 6.731 ms (134.626 ms / 20) 135.174 -> 135.383 ( +0.15%) [ +0.30% +0.30% +0.00% / +0.15% +0.30% +0.25%] index_select wrap : Elapsed 6.779 ms (135.575 ms / 20) 135.398 -> 135.277 ( -0.09%) [ +0.19% +0.01% +0.00% / +0.31% +0.17% -0.09%] index_select linear : Elapsed 6.783 ms (135.659 ms / 20) 135.108 -> 135.176 ( +0.05%) [ +0.00% +0.09% +0.13% / +0.13% +0.05% +0.13%] index_select reverse : Elapsed 6.755 ms (135.108 ms / 20) 134.463 -> 134.511 ( +0.04%) [ +0.06% +0.36% +0.00% / +0.21% +0.18% +0.04%] index_select skip64 : Elapsed 6.727 ms (134.543 ms / 20) 134.642 -> 134.599 ( -0.03%) [ +0.09% +0.16% +0.00% / +0.02% +0.21% -0.03%] index_select skip256 : Elapsed 6.738 ms (134.764 ms / 20) 136.926 -> 136.890 ( -0.03%) [ +0.00% +0.10% +0.16% / -0.03% +0.14% +0.14%] index_select spread : Elapsed 6.846 ms (136.926 ms / 20) 136.520 -> 136.408 ( -0.08%) [ +0.00% +0.04% +0.04% / +0.03% -0.08% -0.02%] index_select strided 3 : Elapsed 6.826 ms (136.520 ms / 20) 137.054 -> 136.950 ( -0.08%) [ +0.11% +0.11% +0.00% / +0.08% +0.02% -0.08%] index_select strided 5 : Elapsed 6.860 ms (137.206 ms / 20) 137.279 -> 137.181 ( -0.07%) [ +0.00% +0.08% +0.11% / -0.07% +0.02% +0.14%] index_select strided 7 : Elapsed 6.864 ms (137.279 ms / 20) 137.406 -> 137.441 ( +0.03%) [ +0.17% +0.06% +0.00% / +0.03% +0.06% +0.18%] index_select strided 8 : Elapsed 6.882 ms (137.645 ms / 20) 137.512 -> 137.523 ( +0.01%) [ +0.16% +0.00% +0.07% / +0.01% +0.07% +0.01%] index_select strided 16 : Elapsed 6.887 ms (137.732 ms / 20) 137.420 -> 137.375 ( -0.03%) [ +0.21% +0.12% +0.00% / -0.03% +0.07% +0.10%] index_select strided 64 : Elapsed 6.885 ms (137.704 ms / 20) 137.486 -> 137.393 ( -0.07%) [ +0.07% +0.00% +0.02% / +0.11% -0.07% +0.15%] index_select strided 100 : Elapsed 6.879 ms (137.576 ms / 20) 135.359 -> 135.320 ( -0.03%) [ +0.00% +0.16% +0.21% / +0.16% -0.03% +0.21%] index_select strided 255 : Elapsed 6.768 ms (135.359 ms / 20) 137.516 -> 137.428 ( -0.06%) [ +0.00% +0.12% +0.15% / +0.01% -0.06% -0.04%] index_select random : Elapsed 6.876 ms (137.516 ms / 20) 136.474 -> 136.663 ( +0.14%) [ +0.00% +0.02% +0.17% / +0.14% +0.19% +0.14%] index_select random_sorted : Elapsed 6.824 ms (136.474 ms / 20) 137.354 -> 137.371 ( +0.01%) [ +0.03% +0.03% +0.00% / +0.08% +0.01% +0.04%] index_select perm : Elapsed 6.870 ms (137.391 ms / 20) 136.667 -> 136.652 ( -0.01%) [ +0.06% +0.02% +0.00% / -0.01% +0.06% +0.02%] index_select perm_sorted : Elapsed 6.838 ms (136.750 ms / 20) out_shape = [20, 40, 256, 50] in_shape = [20, 40, 256, 100] idx_dim = 3 B = [20, 40, 256, 50] (stride (512000, 1, 40, 10240)) A = [20, 40, 256, 100] (stride (1, 20, 800, 204800)) dim = 3 248.167 -> 248.666 ( +0.20%) [ +0.00% +0.21% +0.40% / +0.20% +2.75% +2.68%] index_select const : Elapsed 12.408 ms (248.167 ms / 20) 275.021 -> 275.360 ( +0.12%) [ +0.31% +0.37% +0.00% / +0.12% +0.62% +0.55%] index_select wrap : Elapsed 13.794 ms (275.877 ms / 20) 275.522 -> 275.760 ( +0.09%) [ +0.26% +0.03% +0.00% / +0.09% +0.24% +0.37%] index_select linear : Elapsed 13.811 ms (276.225 ms / 20) 274.738 -> 275.561 ( +0.30%) [ +0.00% +0.33% +0.63% / +0.36% +0.37% +0.30%] index_select reverse : Elapsed 13.737 ms (274.738 ms / 20) 248.238 -> 248.194 ( -0.02%) [ +0.45% +0.11% +0.00% / -0.02% +2.81% +2.47%] index_select skip64 : Elapsed 12.467 ms (249.350 ms / 20) 248.454 -> 249.191 ( +0.30%) [ +0.00% +0.17% +0.14% / +0.30% +2.64% +2.82%] index_select skip256 : Elapsed 12.423 ms (248.454 ms / 20) 275.546 -> 275.305 ( -0.09%) [ +0.12% +0.13% +0.00% / -0.09% +0.55% +0.59%] index_select spread : Elapsed 13.794 ms (275.871 ms / 20) 277.413 -> 277.457 ( +0.02%) [ +0.00% +0.20% +0.04% / +0.02% +0.02% +0.32%] index_select strided 3 : Elapsed 13.871 ms (277.413 ms / 20) 277.281 -> 277.143 ( -0.05%) [ +0.01% +0.08% +0.00% / -0.05% +0.41% +0.40%] index_select strided 5 : Elapsed 13.866 ms (277.322 ms / 20) 274.997 -> 275.423 ( +0.15%) [ +0.00% +0.12% +0.02% / +0.15% +0.96% +0.84%] index_select strided 7 : Elapsed 13.750 ms (274.997 ms / 20) 276.424 -> 276.559 ( +0.05%) [ +0.05% +0.20% +0.00% / +0.05% +1.02% +0.97%] index_select strided 8 : Elapsed 13.829 ms (276.572 ms / 20) 275.178 -> 275.843 ( +0.24%) [ +0.08% +0.29% +0.00% / +0.24% +1.50% +1.65%] index_select strided 16 : Elapsed 13.769 ms (275.386 ms / 20) 276.656 -> 277.658 ( +0.36%) [ +0.02% +0.00% +0.11% / +0.36% +1.21% +1.09%] index_select strided 64 : Elapsed 13.835 ms (276.699 ms / 20) 275.668 -> 274.759 ( -0.33%) [ +0.42% +0.55% +0.00% / +0.24% -0.33% -0.14%] index_select random : Elapsed 13.842 ms (276.834 ms / 20) 272.296 -> 272.770 ( +0.17%) [ +0.33% +0.04% +0.00% / +0.17% +0.18% +0.26%] index_select random_sorted : Elapsed 13.660 ms (273.192 ms / 20) 275.331 -> 273.320 ( -0.73%) [ +0.07% +0.20% +0.00% / -0.10% -0.73% -0.73%] index_select perm : Elapsed 13.776 ms (275.528 ms / 20) 274.625 -> 275.046 ( +0.15%) [ +0.18% +0.09% +0.00% / +0.15% +0.23% +0.58%] index_select perm_sorted : Elapsed 13.756 ms (275.129 ms / 20) out_shape = [50, 100, 40, 256] in_shape = [20, 100, 40, 256] idx_dim = 0 out_shape = [20, 50, 40, 256] in_shape = [20, 100, 40, 256] idx_dim = 1 out_shape = [20, 100, 50, 256] in_shape = [20, 100, 40, 256] idx_dim = 2 B = [20, 100, 50, 256] (stride (12800, 256000, 256, 1)) A = [20, 100, 40, 256] (stride (1, 5120, 512000, 20)) dim = 2 286.043 -> 286.058 ( +0.01%) [ +0.01% +0.00% +0.00% / +0.01% +0.14% +0.15%] index_add_ linear : Elapsed 14.304 ms (286.076 ms / 20) 277.876 -> 278.020 ( +0.05%) [ +0.07% +0.00% +0.05% / +0.05% +0.11% +0.07%] index_copy_ linear : Elapsed 13.903 ms (278.069 ms / 20) 286.553 -> 286.526 ( -0.01%) [ +0.00% +0.00% +0.00% / -0.01% +0.03% +0.05%] index_add_ reverse : Elapsed 14.328 ms (286.563 ms / 20) 278.306 -> 278.228 ( -0.03%) [ +0.05% +0.00% +0.02% / -0.02% +0.01% -0.03%] index_copy_ reverse : Elapsed 13.922 ms (278.444 ms / 20) 285.779 -> 285.939 ( +0.06%) [ +0.00% +0.04% +0.02% / +0.06% +0.31% +0.34%] index_add_ spread : Elapsed 14.289 ms (285.779 ms / 20) 277.719 -> 277.858 ( +0.05%) [ +0.06% +0.05% +0.00% / +0.05% +0.23% +0.20%] index_copy_ spread : Elapsed 13.894 ms (277.877 ms / 20) 286.017 -> 286.083 ( +0.02%) [ +0.00% +0.08% +0.05% / +0.02% +0.21% +0.25%] index_add_ strided 3 : Elapsed 14.301 ms (286.017 ms / 20) 278.100 -> 278.244 ( +0.05%) [ +0.04% +0.00% +0.04% / +0.05% +0.06% +0.12%] index_copy_ strided 3 : Elapsed 13.911 ms (278.211 ms / 20) 286.191 -> 286.332 ( +0.05%) [ +0.07% +0.00% +0.04% / +0.05% +0.15% +0.17%] index_add_ strided 7 : Elapsed 14.320 ms (286.399 ms / 20) 277.915 -> 278.090 ( +0.06%) [ +0.00% +0.01% +0.07% / +0.11% +0.06% +0.11%] index_copy_ strided 7 : Elapsed 13.896 ms (277.915 ms / 20) 286.408 -> 286.342 ( -0.02%) [ +0.00% +0.02% +0.03% / -0.02% +0.22% +0.18%] index_add_ perm : Elapsed 14.320 ms (286.408 ms / 20) 278.174 -> 278.294 ( +0.04%) [ +0.05% +0.00% +0.01% / +0.04% +0.12% +0.06%] index_copy_ perm : Elapsed 13.915 ms (278.300 ms / 20) 286.358 -> 286.418 ( +0.02%) [ +0.04% +0.00% +0.05% / +0.03% +0.06% +0.02%] index_add_ perm_sorted : Elapsed 14.324 ms (286.484 ms / 20) 278.071 -> 277.969 ( -0.04%) [ +0.08% +0.05% +0.00% / +0.08% -0.00% -0.04%] index_copy_ perm_sorted : Elapsed 13.915 ms (278.290 ms / 20) 340.499 -> 340.370 ( -0.04%) [ +0.01% +0.00% +0.04% / +0.03% +0.04% -0.04%] index_select const : Elapsed 17.026 ms (340.530 ms / 20) 348.131 -> 348.227 ( +0.03%) [ +0.00% +0.02% +0.05% / +0.03% +0.10% +0.09%] index_select wrap : Elapsed 17.407 ms (348.131 ms / 20) 344.351 -> 344.336 ( -0.00%) [ +0.04% +0.00% +0.02% / -0.00% +0.10% +0.08%] index_select linear : Elapsed 17.224 ms (344.476 ms / 20) 347.765 -> 347.669 ( -0.03%) [ +0.02% +0.00% +0.03% / +0.00% -0.01% -0.03%] index_select reverse : Elapsed 17.392 ms (347.837 ms / 20) 340.462 -> 340.400 ( -0.02%) [ +0.00% +0.00% +0.04% / +0.09% -0.00% -0.02%] index_select skip64 : Elapsed 17.024 ms (340.477 ms / 20) 340.511 -> 340.439 ( -0.02%) [ +0.02% +0.04% +0.00% / +0.03% +0.02% -0.02%] index_select skip256 : Elapsed 17.029 ms (340.584 ms / 20) 347.239 -> 347.461 ( +0.06%) [ +0.01% +0.00% +0.02% / +0.06% +0.11% +0.15%] index_select spread : Elapsed 17.363 ms (347.263 ms / 20) 348.016 -> 348.025 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.00% +0.12% +0.09%] index_select strided 3 : Elapsed 17.405 ms (348.096 ms / 20) 347.646 -> 347.771 ( +0.04%) [ +0.00% +0.05% +0.07% / +0.04% +0.14% +0.10%] index_select strided 5 : Elapsed 17.382 ms (347.646 ms / 20) 347.827 -> 347.912 ( +0.02%) [ +0.05% +0.01% +0.00% / +0.02% +0.16% +0.15%] index_select strided 7 : Elapsed 17.401 ms (348.013 ms / 20) 347.719 -> 347.737 ( +0.01%) [ +0.07% +0.04% +0.00% / +0.01% +0.16% +0.14%] index_select strided 8 : Elapsed 17.398 ms (347.967 ms / 20) 347.942 -> 348.077 ( +0.04%) [ +0.02% +0.00% +0.07% / +0.04% +0.10% +0.07%] index_select strided 16 : Elapsed 17.400 ms (348.006 ms / 20) 347.717 -> 347.780 ( +0.02%) [ +0.04% +0.03% +0.00% / +0.02% +0.17% +0.17%] index_select random : Elapsed 17.393 ms (347.852 ms / 20) 345.067 -> 345.255 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.09% +0.09%] index_select random_sorted : Elapsed 17.262 ms (345.245 ms / 20) B = [20, 100, 50, 256] (stride (50, 256000, 1, 1000)) A = [20, 100, 40, 256] (stride (1, 204800, 5120, 20)) dim = 2 896.329 -> 895.386 ( -0.11%) [ +0.00% +0.00% +0.04% / +0.03% -0.11% -0.05%] index_add_ linear : Elapsed 44.817 ms (896.346 ms / 20) 671.810 -> 671.583 ( -0.03%) [ +0.02% +0.04% +0.00% / +0.07% -0.03% -0.03%] index_copy_ linear : Elapsed 33.596 ms (671.924 ms / 20) 896.621 -> 895.589 ( -0.12%) [ +0.02% +0.00% +0.05% / +0.01% -0.11% -0.12%] index_add_ reverse : Elapsed 44.838 ms (896.761 ms / 20) 672.133 -> 671.495 ( -0.09%) [ +0.00% +0.03% +0.02% / +0.03% -0.08% -0.09%] index_copy_ reverse : Elapsed 33.607 ms (672.133 ms / 20) 895.819 -> 895.836 ( +0.00%) [ +0.05% +0.04% +0.00% / +0.09% +0.00% +0.00%] index_add_ spread : Elapsed 44.811 ms (896.228 ms / 20) 671.775 -> 671.165 ( -0.09%) [ +0.04% +0.00% +0.06% / +0.05% -0.09% -0.07%] index_copy_ spread : Elapsed 33.604 ms (672.073 ms / 20) 895.808 -> 895.447 ( -0.04%) [ +0.08% +0.00% +0.04% / +0.09% -0.04% -0.01%] index_add_ strided 3 : Elapsed 44.827 ms (896.538 ms / 20) 671.672 -> 671.154 ( -0.08%) [ +0.03% +0.00% +0.07% / +0.06% -0.08% -0.06%] index_copy_ strided 3 : Elapsed 33.592 ms (671.842 ms / 20) 895.658 -> 895.121 ( -0.06%) [ +0.03% +0.00% +0.09% / +0.04% -0.06% -0.02%] index_add_ strided 7 : Elapsed 44.795 ms (895.892 ms / 20) 671.944 -> 671.112 ( -0.12%) [ +0.00% +0.00% +0.02% / -0.03% -0.12% -0.08%] index_copy_ strided 7 : Elapsed 33.599 ms (671.974 ms / 20) 894.507 -> 895.033 ( +0.06%) [ +0.04% +0.00% +0.05% / +0.06% +0.13% +0.08%] index_add_ perm : Elapsed 44.741 ms (894.828 ms / 20) 670.494 -> 670.700 ( +0.03%) [ +0.01% +0.00% +0.01% / +0.03% +0.11% +0.12%] index_copy_ perm : Elapsed 33.527 ms (670.533 ms / 20) 894.876 -> 895.050 ( +0.02%) [ +0.00% +0.04% +0.01% / +0.02% +0.04% +0.08%] index_add_ perm_sorted : Elapsed 44.744 ms (894.876 ms / 20) 670.690 -> 670.863 ( +0.03%) [ +0.01% +0.00% +0.01% / +0.03% +0.10% +0.12%] index_copy_ perm_sorted : Elapsed 33.537 ms (670.740 ms / 20) 843.270 -> 836.246 ( -0.83%) [ +0.00% +0.00% +0.01% / -0.01% -0.80% -0.83%] index_select const : Elapsed 42.165 ms (843.307 ms / 20) 840.541 -> 839.505 ( -0.12%) [ +0.00% +0.00% +0.03% / -0.00% -0.12% -0.12%] index_select wrap : Elapsed 42.028 ms (840.558 ms / 20) 839.353 -> 838.186 ( -0.14%) [ +0.03% +0.00% +0.05% / +0.03% -0.12% -0.14%] index_select linear : Elapsed 41.981 ms (839.618 ms / 20) 840.164 -> 838.630 ( -0.18%) [ +0.00% +0.02% +0.00% / -0.03% -0.18% -0.18%] index_select reverse : Elapsed 42.010 ms (840.203 ms / 20) 843.278 -> 836.565 ( -0.80%) [ +0.01% +0.00% +0.01% / +0.01% -0.77% -0.80%] index_select skip64 : Elapsed 42.167 ms (843.337 ms / 20) 841.887 -> 836.561 ( -0.63%) [ +0.00% +0.03% +0.00% / +0.01% -0.63% -0.63%] index_select skip256 : Elapsed 42.094 ms (841.887 ms / 20) 839.930 -> 839.331 ( -0.07%) [ +0.00% +0.01% +0.02% / +0.04% -0.07% -0.05%] index_select spread : Elapsed 41.996 ms (839.930 ms / 20) 839.791 -> 839.786 ( -0.00%) [ +0.03% +0.00% +0.02% / +0.04% -0.00% +0.02%] index_select strided 3 : Elapsed 42.000 ms (840.002 ms / 20) 841.664 -> 837.922 ( -0.44%) [ +0.06% +0.00% +0.05% / +0.05% -0.44% -0.43%] index_select strided 5 : Elapsed 42.109 ms (842.186 ms / 20) 840.381 -> 838.259 ( -0.25%) [ +0.00% +0.02% +0.02% / -0.03% -0.24% -0.25%] index_select strided 7 : Elapsed 42.019 ms (840.381 ms / 20) 839.430 -> 839.438 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.06% +0.03% +0.00%] index_select strided 8 : Elapsed 41.981 ms (839.611 ms / 20) 839.437 -> 839.617 ( +0.02%) [ +0.00% +0.04% +0.04% / +0.09% +0.02% +0.02%] index_select strided 16 : Elapsed 41.972 ms (839.437 ms / 20) 840.560 -> 839.818 ( -0.09%) [ +0.01% +0.00% +0.02% / +0.04% -0.09% -0.09%] index_select random : Elapsed 42.033 ms (840.662 ms / 20) 840.060 -> 838.823 ( -0.15%) [ +0.02% +0.00% +0.00% / +0.04% -0.13% -0.15%] index_select random_sorted : Elapsed 42.011 ms (840.221 ms / 20) out_shape = [20, 100, 40, 50] in_shape = [20, 100, 40, 256] idx_dim = 3 out_shape = [50, 100, 256, 40] in_shape = [20, 100, 256, 40] idx_dim = 0 out_shape = [20, 50, 256, 40] in_shape = [20, 100, 256, 40] idx_dim = 1 B = [20, 50, 256, 40] (stride (256, 204800, 1, 5120)) A = [20, 100, 256, 40] (stride (1, 20, 2000, 512000)) dim = 1 329.328 -> 331.077 ( +0.53%) [ +0.20% +0.00% +0.13% / +0.53% +6.55% +7.02%] index_select const : Elapsed 16.499 ms (329.983 ms / 20) 333.674 -> 332.208 ( -0.44%) [ +0.60% +0.00% +0.39% / -0.44% +2.12% +2.16%] index_select wrap : Elapsed 16.784 ms (335.681 ms / 20) 334.579 -> 333.904 ( -0.20%) [ +0.40% +0.34% +0.00% / -0.20% +1.61% +2.14%] index_select linear : Elapsed 16.796 ms (335.921 ms / 20) 333.041 -> 328.288 ( -1.43%) [ +0.39% +0.39% +0.00% / +0.01% -1.43% -1.31%] index_select reverse : Elapsed 16.716 ms (334.328 ms / 20) 328.191 -> 330.451 ( +0.69%) [ +0.83% +0.27% +0.00% / +0.69% +7.10% +7.11%] index_select skip64 : Elapsed 16.545 ms (330.910 ms / 20) 330.109 -> 328.826 ( -0.39%) [ +0.00% +0.14% +0.17% / -0.39% +6.10% +6.17%] index_select skip256 : Elapsed 16.505 ms (330.109 ms / 20) 334.842 -> 335.500 ( +0.20%) [ +0.44% +0.00% +0.60% / +0.20% +0.72% +1.12%] index_select spread : Elapsed 16.815 ms (336.308 ms / 20) 332.616 -> 333.368 ( +0.23%) [ +0.44% +0.68% +0.00% / +0.23% +0.34% +0.36%] index_select strided 3 : Elapsed 16.703 ms (334.064 ms / 20) 331.896 -> 331.505 ( -0.12%) [ +0.00% +0.33% +0.08% / -0.12% +1.36% +1.31%] index_select strided 5 : Elapsed 16.595 ms (331.896 ms / 20) 330.286 -> 330.106 ( -0.05%) [ +0.51% +0.33% +0.00% / -0.05% +1.64% +1.60%] index_select strided 7 : Elapsed 16.599 ms (331.986 ms / 20) 332.193 -> 331.895 ( -0.09%) [ +0.12% +0.35% +0.00% / -0.09% +1.62% +1.21%] index_select strided 8 : Elapsed 16.630 ms (332.605 ms / 20) 334.167 -> 333.326 ( -0.25%) [ +0.09% +0.05% +0.00% / -0.25% +1.25% +0.47%] index_select strided 16 : Elapsed 16.724 ms (334.479 ms / 20) 333.585 -> 332.668 ( -0.27%) [ +0.00% +0.04% +0.06% / -0.07% -0.27% +0.57%] index_select strided 64 : Elapsed 16.679 ms (333.585 ms / 20) 332.074 -> 333.800 ( +0.52%) [ +0.35% +0.00% +0.01% / +0.52% +2.52% +2.87%] index_select random : Elapsed 16.662 ms (333.247 ms / 20) 334.015 -> 334.097 ( +0.02%) [ +0.18% +0.00% +0.33% / +0.02% +1.08% +0.84%] index_select random_sorted : Elapsed 16.732 ms (334.631 ms / 20) 330.699 -> 329.641 ( -0.32%) [ +0.02% +0.20% +0.00% / -0.32% +0.07% +0.35%] index_select perm : Elapsed 16.539 ms (330.770 ms / 20) 333.929 -> 333.849 ( -0.02%) [ +0.57% +0.29% +0.00% / -0.02% +1.36% +1.62%] index_select perm_sorted : Elapsed 16.792 ms (335.843 ms / 20) out_shape = [20, 100, 50, 40] in_shape = [20, 100, 256, 40] idx_dim = 2 out_shape = [20, 100, 256, 50] in_shape = [20, 100, 256, 40] idx_dim = 3 out_shape = [50, 256, 40, 100] in_shape = [20, 256, 40, 100] idx_dim = 0 B = [50, 256, 40, 100] (stride (1024000, 1, 25600, 256)) A = [20, 256, 40, 100] (stride (100, 2000, 512000, 1)) dim = 0 644.662 -> 641.110 ( -0.55%) [ +0.00% +0.16% +0.16% / -0.03% -0.55% -0.45%] index_add_ linear : Elapsed 32.233 ms (644.662 ms / 20) 462.167 -> 460.251 ( -0.41%) [ +0.00% +0.11% +0.07% / +0.25% -0.21% -0.41%] index_copy_ linear : Elapsed 23.108 ms (462.167 ms / 20) 641.518 -> 642.301 ( +0.12%) [ +0.00% +0.49% +0.32% / +0.25% +0.12% +0.31%] index_add_ reverse : Elapsed 32.076 ms (641.518 ms / 20) 461.581 -> 460.970 ( -0.13%) [ +0.09% +0.06% +0.00% / +0.14% -0.04% -0.13%] index_copy_ reverse : Elapsed 23.099 ms (461.976 ms / 20) 643.270 -> 639.089 ( -0.65%) [ +0.11% +0.51% +0.00% / +0.25% -0.65% -0.18%] index_add_ spread : Elapsed 32.200 ms (643.999 ms / 20) 461.468 -> 460.453 ( -0.22%) [ +0.07% +0.12% +0.00% / +0.20% -0.17% -0.22%] index_copy_ spread : Elapsed 23.091 ms (461.810 ms / 20) 642.239 -> 639.972 ( -0.35%) [ +0.06% +0.30% +0.00% / -0.03% -0.35% +0.06%] index_add_ strided 3 : Elapsed 32.131 ms (642.628 ms / 20) 461.397 -> 460.410 ( -0.21%) [ +0.13% +0.00% +0.25% / -0.09% -0.21% -0.11%] index_copy_ strided 3 : Elapsed 23.099 ms (461.979 ms / 20) 641.633 -> 641.021 ( -0.10%) [ +0.22% +0.00% +0.15% / +0.58% +0.28% -0.10%] index_add_ strided 7 : Elapsed 32.151 ms (643.014 ms / 20) 461.480 -> 460.395 ( -0.24%) [ +0.12% +0.00% +0.00% / +0.11% -0.24% -0.15%] index_copy_ strided 7 : Elapsed 23.103 ms (462.053 ms / 20) 636.405 -> 639.866 ( +0.54%) [ +0.00% +0.66% +0.24% / +0.54% +1.14% +1.06%] index_add_ perm : Elapsed 31.820 ms (636.405 ms / 20) 454.662 -> 455.101 ( +0.10%) [ +0.01% +0.13% +0.00% / +0.10% +1.39% +1.45%] index_copy_ perm : Elapsed 22.737 ms (454.730 ms / 20) 636.869 -> 642.684 ( +0.91%) [ +0.33% +0.00% +0.41% / +0.91% +1.01% +0.97%] index_add_ perm_sorted : Elapsed 31.950 ms (638.998 ms / 20) 454.962 -> 455.766 ( +0.18%) [ +0.14% +0.10% +0.00% / +0.18% +1.31% +1.45%] index_copy_ perm_sorted : Elapsed 22.780 ms (455.592 ms / 20) 1248.435 -> 1247.516 ( -0.07%) [ +0.09% +0.04% +0.00% / +0.13% +0.05% -0.07%] index_select const : Elapsed 62.476 ms (1249.525 ms / 20) 1251.022 -> 1249.850 ( -0.09%) [ +0.00% +0.03% +0.09% / +0.12% -0.03% -0.09%] index_select wrap : Elapsed 62.551 ms (1251.022 ms / 20) 1250.443 -> 1249.947 ( -0.04%) [ +0.07% +0.04% +0.00% / +0.04% +0.09% -0.04%] index_select linear : Elapsed 62.567 ms (1251.349 ms / 20) 1251.152 -> 1251.456 ( +0.02%) [ +0.00% +0.12% +0.01% / +0.15% +0.02% +0.04%] index_select reverse : Elapsed 62.558 ms (1251.152 ms / 20) 1248.407 -> 1246.860 ( -0.12%) [ +0.09% +0.00% +0.15% / +0.11% +0.12% -0.12%] index_select skip64 : Elapsed 62.479 ms (1249.577 ms / 20) 1234.475 -> 1236.180 ( +0.14%) [ +0.20% +0.10% +0.00% / +0.14% +1.25% +1.12%] index_select skip256 : Elapsed 61.847 ms (1236.941 ms / 20) 1251.165 -> 1248.828 ( -0.19%) [ +0.00% +0.02% +0.02% / -0.02% -0.19% -0.09%] index_select spread : Elapsed 62.558 ms (1251.165 ms / 20) 1249.623 -> 1248.940 ( -0.05%) [ +0.17% +0.00% +0.18% / +0.16% +0.09% -0.05%] index_select strided 3 : Elapsed 62.589 ms (1251.771 ms / 20) 1251.335 -> 1240.550 ( -0.86%) [ +0.00% +0.07% +0.00% / +0.06% -0.86% -0.80%] index_select strided 5 : Elapsed 62.567 ms (1251.335 ms / 20) 1252.886 -> 1243.227 ( -0.77%) [ +0.00% +0.07% +0.01% / -0.21% -0.76% -0.77%] index_select strided 7 : Elapsed 62.644 ms (1252.886 ms / 20) 1250.805 -> 1249.885 ( -0.07%) [ +0.07% +0.02% +0.00% / +0.02% -0.07% -0.07%] index_select strided 8 : Elapsed 62.581 ms (1251.625 ms / 20) 1249.463 -> 1250.004 ( +0.04%) [ +0.10% +0.11% +0.00% / +0.14% +0.11% +0.04%] index_select strided 16 : Elapsed 62.537 ms (1250.731 ms / 20) 1250.056 -> 1248.978 ( -0.09%) [ +0.07% +0.00% +0.03% / -0.09% -0.01% -0.00%] index_select random : Elapsed 62.549 ms (1250.982 ms / 20) 1251.741 -> 1249.024 ( -0.22%) [ +0.06% +0.00% +0.02% / -0.05% -0.21% -0.22%] index_select random_sorted : Elapsed 62.626 ms (1252.525 ms / 20) B = [50, 256, 40, 100] (stride (10240, 1, 256, 512000)) A = [20, 256, 40, 100] (stride (40, 800, 1, 204800)) dim = 0 1008.091 -> 995.624 ( -1.24%) [ +0.20% +0.16% +0.00% / -0.06% -0.94% -1.24%] index_add_ linear : Elapsed 50.505 ms (1010.091 ms / 20) 764.489 -> 755.258 ( -1.21%) [ +0.00% +0.18% +0.06% / -0.08% -1.13% -1.21%] index_copy_ linear : Elapsed 38.224 ms (764.489 ms / 20) 1002.368 -> 993.866 ( -0.85%) [ +0.01% +0.04% +0.00% / +0.12% -0.80% -0.85%] index_add_ reverse : Elapsed 50.122 ms (1002.431 ms / 20) 763.602 -> 756.918 ( -0.88%) [ +0.08% +0.00% +0.01% / +0.06% -0.77% -0.88%] index_copy_ reverse : Elapsed 38.212 ms (764.245 ms / 20) 1000.795 -> 972.113 ( -2.87%) [ +0.32% +0.00% +0.07% / +0.27% -2.87% -2.67%] index_add_ spread : Elapsed 50.201 ms (1004.017 ms / 20) 764.167 -> 754.144 ( -1.31%) [ +0.00% +0.01% +0.19% / -0.16% -1.30% -1.31%] index_copy_ spread : Elapsed 38.208 ms (764.167 ms / 20) 1002.730 -> 998.132 ( -0.46%) [ +0.12% +0.02% +0.00% / -0.04% -0.46% -0.41%] index_add_ strided 3 : Elapsed 50.198 ms (1003.953 ms / 20) 762.847 -> 758.664 ( -0.55%) [ +0.18% +0.26% +0.00% / +0.05% -0.55% -0.55%] index_copy_ strided 3 : Elapsed 38.210 ms (764.203 ms / 20) 1035.345 -> 1005.164 ( -2.92%) [ +0.00% +0.24% +0.12% / -0.13% -2.74% -2.92%] index_add_ strided 7 : Elapsed 51.767 ms (1035.345 ms / 20) 768.630 -> 756.791 ( -1.54%) [ +0.00% +0.34% +0.11% / +0.07% -1.23% -1.54%] index_copy_ strided 7 : Elapsed 38.432 ms (768.630 ms / 20) 1006.028 -> 1000.706 ( -0.53%) [ +0.23% +0.00% +0.02% / +0.26% -0.53% -0.39%] index_add_ perm : Elapsed 50.416 ms (1008.323 ms / 20) 757.583 -> 753.252 ( -0.57%) [ +0.24% +0.18% +0.00% / +0.16% -0.57% -0.15%] index_copy_ perm : Elapsed 37.968 ms (759.367 ms / 20) 998.696 -> 974.030 ( -2.47%) [ +0.08% +0.04% +0.00% / +0.25% -2.33% -2.47%] index_add_ perm_sorted : Elapsed 49.975 ms (999.493 ms / 20) 757.854 -> 753.672 ( -0.55%) [ +0.14% +0.15% +0.00% / +0.01% -0.53% -0.55%] index_copy_ perm_sorted : Elapsed 37.945 ms (758.898 ms / 20) 2045.022 -> 2020.283 ( -1.21%) [ +0.00% +0.04% +0.07% / -0.12% -0.96% -1.21%] index_select const : Elapsed 102.251 ms (2045.022 ms / 20) 2051.479 -> 2038.770 ( -0.62%) [ +0.09% +0.02% +0.00% / -0.19% -0.62% -0.57%] index_select wrap : Elapsed 102.662 ms (2053.238 ms / 20) 2042.831 -> 2032.035 ( -0.53%) [ +0.00% +0.00% +0.01% / +0.02% -0.47% -0.53%] index_select linear : Elapsed 102.142 ms (2042.842 ms / 20) 2047.505 -> 2036.786 ( -0.52%) [ +0.17% +0.13% +0.00% / -0.13% -0.32% -0.52%] index_select reverse : Elapsed 102.550 ms (2051.005 ms / 20) 2041.914 -> 2026.737 ( -0.74%) [ +0.00% +0.01% +0.08% / +0.18% -0.74% -0.74%] index_select skip64 : Elapsed 102.096 ms (2041.914 ms / 20) 2041.540 -> 2022.664 ( -0.92%) [ +0.01% +0.00% +0.07% / -0.00% -0.92% -0.72%] index_select skip256 : Elapsed 102.089 ms (2041.781 ms / 20) 2046.443 -> 2033.075 ( -0.65%) [ +0.02% +0.02% +0.00% / -0.05% -0.60% -0.65%] index_select spread : Elapsed 102.339 ms (2046.783 ms / 20) 2047.589 -> 2043.276 ( -0.21%) [ +0.16% +0.00% +0.16% / +0.17% -0.18% -0.21%] index_select strided 3 : Elapsed 102.541 ms (2050.820 ms / 20) 2050.126 -> 2039.583 ( -0.51%) [ +0.10% +0.00% +0.16% / +0.06% -0.47% -0.51%] index_select strided 5 : Elapsed 102.606 ms (2052.114 ms / 20) 2048.842 -> 2039.607 ( -0.45%) [ +0.03% +0.21% +0.00% / +0.16% -0.36% -0.45%] index_select strided 7 : Elapsed 102.474 ms (2049.477 ms / 20) 2049.539 -> 2044.695 ( -0.24%) [ +0.05% +0.00% +0.07% / -0.03% -0.24% -0.22%] index_select strided 8 : Elapsed 102.527 ms (2050.542 ms / 20) 2046.971 -> 2044.793 ( -0.11%) [ +0.07% +0.07% +0.00% / +0.03% -0.04% -0.11%] index_select strided 16 : Elapsed 102.425 ms (2048.494 ms / 20) 2046.279 -> 2041.073 ( -0.25%) [ +0.00% +0.16% +0.08% / +0.10% -0.19% -0.25%] index_select random : Elapsed 102.314 ms (2046.279 ms / 20) 2046.694 -> 2033.012 ( -0.67%) [ +0.02% +0.04% +0.00% / +0.03% -0.67% -0.65%] index_select random_sorted : Elapsed 102.356 ms (2047.119 ms / 20) out_shape = [20, 50, 40, 100] in_shape = [20, 256, 40, 100] idx_dim = 1 out_shape = [20, 256, 50, 100] in_shape = [20, 256, 40, 100] idx_dim = 2 out_shape = [20, 256, 40, 50] in_shape = [20, 256, 40, 100] idx_dim = 3 out_shape = [50, 256, 100, 40] in_shape = [20, 256, 100, 40] idx_dim = 0 out_shape = [20, 50, 100, 40] in_shape = [20, 256, 100, 40] idx_dim = 1 B = [20, 50, 100, 40] (stride (200000, 4000, 40, 1)) A = [20, 256, 100, 40] (stride (1, 80000, 800, 20)) dim = 1 8.223 -> 8.250 ( +0.33%) [ +0.00% +0.01% +0.00% / +0.33% +3.60% +3.76%] index_select const : Elapsed 0.411 ms (8.223 ms / 20) 27.861 -> 27.550 ( -1.12%) [ +0.00% +0.82% +0.24% / -0.07% -1.06% -1.12%] index_select wrap : Elapsed 1.393 ms (27.861 ms / 20) 27.773 -> 27.595 ( -0.64%) [ +0.00% +0.54% +0.10% / +0.26% -0.64% +0.32%] index_select linear : Elapsed 1.389 ms (27.773 ms / 20) 27.617 -> 27.712 ( +0.34%) [ +0.00% +0.54% +0.13% / +0.57% +0.34% +0.41%] index_select reverse : Elapsed 1.381 ms (27.617 ms / 20) 8.250 -> 8.235 ( -0.18%) [ +0.13% +0.00% +0.00% / -0.18% +3.19% +3.26%] index_select skip64 : Elapsed 0.413 ms (8.261 ms / 20) 8.226 -> 8.238 ( +0.15%) [ +0.23% +0.00% +0.18% / +0.15% +3.56% +3.22%] index_select skip256 : Elapsed 0.412 ms (8.245 ms / 20) 27.464 -> 27.458 ( -0.02%) [ +0.00% +0.11% +0.31% / -0.02% +2.40% +2.20%] index_select spread : Elapsed 1.373 ms (27.464 ms / 20) 27.042 -> 26.992 ( -0.18%) [ +0.00% +0.29% +0.04% / -0.18% +3.20% +2.57%] index_select strided 3 : Elapsed 1.352 ms (27.042 ms / 20) 27.388 -> 27.699 ( +1.14%) [ +0.91% +1.36% +0.00% / +1.14% +2.24% +2.81%] index_select strided 5 : Elapsed 1.382 ms (27.636 ms / 20) 27.030 -> 26.916 ( -0.42%) [ +0.46% +0.00% +0.01% / -0.42% +0.62% +0.43%] index_select strided 7 : Elapsed 1.358 ms (27.155 ms / 20) 27.363 -> 27.428 ( +0.24%) [ +1.03% +1.10% +0.00% / +0.95% +0.72% +0.24%] index_select strided 8 : Elapsed 1.382 ms (27.645 ms / 20) 27.348 -> 27.514 ( +0.61%) [ +0.00% +0.30% +0.87% / +0.61% +2.74% +1.84%] index_select strided 16 : Elapsed 1.367 ms (27.348 ms / 20) 20.300 -> 20.286 ( -0.07%) [ +0.02% +0.00% +0.26% / +0.63% -0.07% +0.15%] index_select strided 64 : Elapsed 1.015 ms (20.304 ms / 20) 27.230 -> 27.172 ( -0.21%) [ +0.53% +0.41% +0.00% / -0.21% +3.25% +3.39%] index_select strided 100 : Elapsed 1.369 ms (27.373 ms / 20) 27.583 -> 27.525 ( -0.21%) [ +0.00% +0.60% +0.06% / -0.21% +0.20% +0.95%] index_select strided 255 : Elapsed 1.379 ms (27.583 ms / 20) 27.711 -> 27.924 ( +0.77%) [ +0.29% +0.81% +0.00% / +0.77% +1.09% +1.28%] index_select random : Elapsed 1.390 ms (27.790 ms / 20) 27.199 -> 26.601 ( -2.20%) [ +0.00% +0.16% +0.06% / -0.17% -2.20% -1.52%] index_select random_sorted : Elapsed 1.360 ms (27.199 ms / 20) 27.720 -> 27.839 ( +0.43%) [ +0.00% +0.48% +0.82% / +0.63% +0.43% +1.23%] index_select perm : Elapsed 1.386 ms (27.720 ms / 20) 28.162 -> 27.614 ( -1.95%) [ +0.84% +0.00% +0.74% / +0.85% -1.95% -0.75%] index_select perm_sorted : Elapsed 1.420 ms (28.398 ms / 20) out_shape = [20, 256, 50, 40] in_shape = [20, 256, 100, 40] idx_dim = 2 out_shape = [20, 256, 100, 50] in_shape = [20, 256, 100, 40] idx_dim = 3 B = [20, 256, 100, 50] (stride (5000, 100000, 50, 1)) A = [20, 256, 100, 40] (stride (4000, 80000, 40, 1)) dim = 3 733.616 -> 730.369 ( -0.44%) [ +0.00% +0.03% +0.07% / -0.03% -0.41% -0.44%] index_add_ linear : Elapsed 36.681 ms (733.616 ms / 20) 657.170 -> 656.888 ( -0.04%) [ +0.08% +0.01% +0.00% / -0.04% +0.02% +0.02%] index_copy_ linear : Elapsed 32.885 ms (657.691 ms / 20) 733.004 -> 730.884 ( -0.29%) [ +0.15% +0.00% +0.04% / -0.04% -0.29% -0.26%] index_add_ reverse : Elapsed 36.705 ms (734.094 ms / 20) 657.161 -> 656.752 ( -0.06%) [ +0.07% +0.00% +0.10% / -0.03% -0.06% -0.02%] index_copy_ reverse : Elapsed 32.882 ms (657.631 ms / 20) 730.291 -> 730.537 ( +0.03%) [ +0.31% +0.00% +0.44% / +0.25% +0.03% +0.25%] index_add_ spread : Elapsed 36.628 ms (732.555 ms / 20) 656.602 -> 656.871 ( +0.04%) [ +0.00% +0.02% +0.11% / +0.07% +0.06% +0.04%] index_copy_ spread : Elapsed 32.830 ms (656.602 ms / 20) 732.262 -> 730.391 ( -0.26%) [ +0.00% +0.02% +0.03% / +0.04% -0.15% -0.26%] index_add_ strided 3 : Elapsed 36.613 ms (732.262 ms / 20) 656.947 -> 656.802 ( -0.02%) [ +0.00% +0.04% +0.06% / -0.01% +0.03% -0.02%] index_copy_ strided 3 : Elapsed 32.847 ms (656.947 ms / 20) 731.054 -> 730.192 ( -0.12%) [ +0.32% +0.00% +0.18% / +0.24% +0.00% -0.12%] index_add_ strided 7 : Elapsed 36.668 ms (733.362 ms / 20) 657.020 -> 656.912 ( -0.02%) [ +0.00% +0.00% +0.01% / -0.02% +0.02% +0.03%] index_copy_ strided 7 : Elapsed 32.851 ms (657.020 ms / 20) 731.272 -> 730.684 ( -0.08%) [ +0.13% +0.26% +0.00% / +0.13% +0.18% -0.08%] index_add_ perm : Elapsed 36.612 ms (732.235 ms / 20) 656.106 -> 656.141 ( +0.01%) [ +0.17% +0.00% +0.09% / +0.01% +0.11% +0.08%] index_copy_ perm : Elapsed 32.862 ms (657.230 ms / 20) 732.217 -> 731.441 ( -0.11%) [ +0.06% +0.08% +0.00% / +0.02% -0.11% -0.08%] index_add_ perm_sorted : Elapsed 36.633 ms (732.660 ms / 20) 656.128 -> 655.955 ( -0.03%) [ +0.00% +0.13% +0.00% / -0.03% +0.03% +0.11%] index_copy_ perm_sorted : Elapsed 32.807 ms (656.148 ms / 20) 818.628 -> 818.577 ( -0.01%) [ +0.01% +0.00% +0.01% / -0.01% +0.04% +0.06%] index_select const : Elapsed 40.936 ms (818.717 ms / 20) 820.504 -> 819.750 ( -0.09%) [ +0.00% +0.03% +0.00% / -0.00% -0.09% +0.02%] index_select wrap : Elapsed 41.025 ms (820.504 ms / 20) 820.703 -> 819.704 ( -0.12%) [ +0.04% +0.00% +0.03% / +0.09% -0.12% +0.02%] index_select linear : Elapsed 41.052 ms (821.046 ms / 20) 820.591 -> 820.283 ( -0.04%) [ +0.02% +0.08% +0.00% / -0.04% +0.00% +0.04%] index_select reverse : Elapsed 41.039 ms (820.778 ms / 20) 818.411 -> 818.339 ( -0.01%) [ +0.06% +0.00% +0.00% / +0.05% +0.05% -0.01%] index_select skip64 : Elapsed 40.943 ms (818.862 ms / 20) 817.780 -> 817.197 ( -0.07%) [ +0.00% +0.01% +0.00% / -0.07% +0.17% +0.13%] index_select skip256 : Elapsed 40.889 ms (817.780 ms / 20) 820.794 -> 820.911 ( +0.01%) [ +0.04% +0.00% +0.12% / +0.12% +0.01% +0.04%] index_select spread : Elapsed 41.058 ms (821.154 ms / 20) 821.257 -> 820.864 ( -0.05%) [ +0.08% +0.06% +0.00% / -0.05% -0.02% +0.07%] index_select strided 3 : Elapsed 41.096 ms (821.929 ms / 20) 821.344 -> 819.793 ( -0.19%) [ +0.00% +0.03% +0.04% / -0.01% -0.19% -0.12%] index_select strided 5 : Elapsed 41.067 ms (821.344 ms / 20) 820.815 -> 820.238 ( -0.07%) [ +0.01% +0.12% +0.00% / +0.12% -0.07% -0.02%] index_select strided 7 : Elapsed 41.046 ms (820.929 ms / 20) 820.900 -> 820.990 ( +0.01%) [ +0.11% +0.00% +0.11% / +0.06% +0.01% +0.03%] index_select strided 8 : Elapsed 41.090 ms (821.800 ms / 20) 820.961 -> 821.329 ( +0.04%) [ +0.06% +0.00% +0.07% / +0.08% +0.04% +0.08%] index_select strided 16 : Elapsed 41.072 ms (821.431 ms / 20) 821.052 -> 821.291 ( +0.03%) [ +0.00% +0.13% +0.07% / +0.09% +0.11% +0.03%] index_select random : Elapsed 41.053 ms (821.052 ms / 20) 821.049 -> 821.475 ( +0.05%) [ +0.10% +0.06% +0.00% / +0.07% +0.05% +0.11%] index_select random_sorted : Elapsed 41.093 ms (821.861 ms / 20) out_shape = [50, 20, 100, 256] in_shape = [40, 20, 100, 256] idx_dim = 0 out_shape = [40, 50, 100, 256] in_shape = [40, 20, 100, 256] idx_dim = 1 B = [40, 50, 100, 256] (stride (1280000, 1, 12800, 50)) A = [40, 20, 100, 256] (stride (2000, 1, 20, 80000)) dim = 1 855.619 -> 853.668 ( -0.23%) [ +0.00% +0.00% +0.03% / -0.03% -0.23% -0.16%] index_add_ linear : Elapsed 42.782 ms (855.643 ms / 20) 799.480 -> 798.939 ( -0.07%) [ +0.02% +0.00% +0.03% / +0.01% -0.07% -0.07%] index_copy_ linear : Elapsed 39.982 ms (799.640 ms / 20) 854.862 -> 854.385 ( -0.06%) [ +0.25% +0.08% +0.00% / +0.04% +0.02% -0.06%] index_add_ reverse : Elapsed 42.848 ms (856.957 ms / 20) 799.458 -> 799.057 ( -0.05%) [ +0.06% +0.00% +0.03% / +0.06% -0.05% -0.01%] index_copy_ reverse : Elapsed 39.996 ms (799.922 ms / 20) 853.882 -> 854.245 ( +0.04%) [ +0.00% +0.19% +0.15% / +0.20% +0.04% +0.09%] index_add_ spread : Elapsed 42.694 ms (853.882 ms / 20) 799.097 -> 799.096 ( -0.00%) [ +0.00% +0.05% +0.09% / +0.08% +0.02% -0.00%] index_copy_ spread : Elapsed 39.955 ms (799.097 ms / 20) 854.556 -> 854.480 ( -0.01%) [ +0.12% +0.12% +0.00% / +0.21% +0.11% -0.01%] index_add_ strided 3 : Elapsed 42.778 ms (855.553 ms / 20) 799.182 -> 798.914 ( -0.03%) [ +0.05% +0.03% +0.00% / +0.02% +0.00% -0.03%] index_copy_ strided 3 : Elapsed 39.980 ms (799.600 ms / 20) 854.737 -> 854.075 ( -0.08%) [ +0.00% +0.04% +0.11% / -0.08% -0.03% -0.03%] index_add_ strided 7 : Elapsed 42.737 ms (854.737 ms / 20) 799.108 -> 798.911 ( -0.02%) [ +0.00% +0.01% +0.01% / +0.02% -0.00% -0.02%] index_copy_ strided 7 : Elapsed 39.955 ms (799.108 ms / 20) 853.102 -> 854.582 ( +0.17%) [ +0.00% +0.12% +0.13% / +0.20% +0.21% +0.17%] index_add_ perm : Elapsed 42.655 ms (853.102 ms / 20) 798.304 -> 798.934 ( +0.08%) [ +0.04% +0.00% +0.06% / +0.08% +0.08% +0.10%] index_copy_ perm : Elapsed 39.929 ms (798.589 ms / 20) 853.700 -> 854.437 ( +0.09%) [ +0.00% +0.01% +0.10% / +0.09% +0.10% +0.11%] index_add_ perm_sorted : Elapsed 42.685 ms (853.700 ms / 20) 798.383 -> 798.496 ( +0.01%) [ +0.00% +0.00% +0.02% / +0.01% +0.07% +0.09%] index_copy_ perm_sorted : Elapsed 39.920 ms (798.408 ms / 20) 2002.747 -> 2002.638 ( -0.01%) [ +0.00% +0.01% +0.02% / +0.01% -0.01% +0.00%] index_select const : Elapsed 100.137 ms (2002.747 ms / 20) 2003.014 -> 2002.484 ( -0.03%) [ +0.00% +0.00% +0.02% / +0.02% -0.01% -0.03%] index_select wrap : Elapsed 100.156 ms (2003.113 ms / 20) 2003.187 -> 2002.493 ( -0.03%) [ +0.01% +0.00% +0.00% / -0.03% -0.02% -0.02%] index_select linear : Elapsed 100.166 ms (2003.329 ms / 20) 2002.434 -> 2002.277 ( -0.01%) [ +0.01% +0.02% +0.00% / +0.02% +0.03% -0.01%] index_select reverse : Elapsed 100.131 ms (2002.616 ms / 20) 2002.839 -> 2002.414 ( -0.02%) [ +0.02% +0.00% +0.01% / -0.02% +0.01% +0.00%] index_select skip64 : Elapsed 100.164 ms (2003.274 ms / 20) 2001.000 -> 2001.532 ( +0.03%) [ +0.02% +0.04% +0.00% / +0.03% +0.12% +0.09%] index_select skip256 : Elapsed 100.066 ms (2001.311 ms / 20) 2002.832 -> 2002.463 ( -0.02%) [ +0.00% +0.02% +0.03% / +0.02% +0.01% -0.02%] index_select spread : Elapsed 100.142 ms (2002.832 ms / 20) 2002.868 -> 2002.582 ( -0.01%) [ +0.02% +0.00% +0.01% / +0.02% -0.01% -0.00%] index_select strided 3 : Elapsed 100.163 ms (2003.269 ms / 20) 2003.062 -> 2001.657 ( -0.07%) [ +0.00% +0.00% +0.02% / +0.01% -0.05% -0.07%] index_select strided 5 : Elapsed 100.155 ms (2003.110 ms / 20) 2003.200 -> 2001.301 ( -0.09%) [ +0.02% +0.00% +0.00% / +0.00% -0.09% -0.09%] index_select strided 7 : Elapsed 100.183 ms (2003.668 ms / 20) 2003.225 -> 2002.507 ( -0.04%) [ +0.01% +0.02% +0.00% / -0.00% -0.02% -0.04%] index_select strided 8 : Elapsed 100.174 ms (2003.478 ms / 20) 2002.794 -> 2002.524 ( -0.01%) [ +0.00% +0.01% +0.01% / +0.01% +0.01% -0.01%] index_select strided 16 : Elapsed 100.140 ms (2002.794 ms / 20) 2003.284 -> 2002.332 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.02% -0.05% -0.03%] index_select random : Elapsed 100.167 ms (2003.337 ms / 20) 2003.081 -> 2002.623 ( -0.02%) [ +0.01% +0.00% +0.01% / +0.02% -0.01% -0.02%] index_select random_sorted : Elapsed 100.164 ms (2003.283 ms / 20) out_shape = [40, 20, 50, 256] in_shape = [40, 20, 100, 256] idx_dim = 2 out_shape = [40, 20, 100, 50] in_shape = [40, 20, 100, 256] idx_dim = 3 out_shape = [50, 20, 256, 100] in_shape = [40, 20, 256, 100] idx_dim = 0 B = [50, 20, 256, 100] (stride (512000, 25600, 1, 256)) A = [40, 20, 256, 100] (stride (2000, 100, 80000, 1)) dim = 0 686.992 -> 684.338 ( -0.39%) [ +0.00% +0.12% +0.08% / +0.02% -0.39% -0.35%] index_add_ linear : Elapsed 34.350 ms (686.992 ms / 20) 498.547 -> 497.887 ( -0.13%) [ +0.00% +0.10% +0.03% / +0.07% -0.13% +0.02%] index_copy_ linear : Elapsed 24.927 ms (498.547 ms / 20) 686.298 -> 685.876 ( -0.06%) [ +0.26% +0.14% +0.00% / +0.13% -0.05% -0.06%] index_add_ reverse : Elapsed 34.404 ms (688.081 ms / 20) 498.096 -> 498.235 ( +0.03%) [ +0.24% +0.18% +0.00% / +0.03% +0.04% +0.06%] index_copy_ reverse : Elapsed 24.965 ms (499.307 ms / 20) 685.807 -> 683.820 ( -0.29%) [ +0.13% +0.00% +0.09% / -0.22% -0.29% -0.17%] index_add_ spread : Elapsed 34.333 ms (686.665 ms / 20) 498.696 -> 498.463 ( -0.05%) [ +0.09% +0.02% +0.00% / -0.05% +0.12% +0.12%] index_copy_ spread : Elapsed 24.958 ms (499.152 ms / 20) 687.092 -> 686.962 ( -0.02%) [ +0.00% +0.17% +0.16% / -0.02% +0.20% +0.10%] index_add_ strided 3 : Elapsed 34.355 ms (687.092 ms / 20) 498.303 -> 498.777 ( +0.10%) [ +0.14% +0.10% +0.00% / +0.18% +0.21% +0.10%] index_copy_ strided 3 : Elapsed 24.950 ms (498.996 ms / 20) 685.957 -> 685.744 ( -0.03%) [ +0.00% +0.09% +0.21% / +0.00% +0.33% -0.03%] index_add_ strided 7 : Elapsed 34.298 ms (685.957 ms / 20) 498.228 -> 497.633 ( -0.12%) [ +0.04% +0.08% +0.00% / -0.12% +0.04% +0.01%] index_copy_ strided 7 : Elapsed 24.921 ms (498.410 ms / 20) 681.292 -> 681.102 ( -0.03%) [ +0.13% +0.05% +0.00% / -0.03% +0.86% +0.87%] index_add_ perm : Elapsed 34.109 ms (682.179 ms / 20) 493.510 -> 493.679 ( +0.03%) [ +0.04% +0.00% +0.06% / +0.03% +1.07% +1.18%] index_copy_ perm : Elapsed 24.685 ms (493.703 ms / 20) 677.494 -> 679.016 ( +0.22%) [ +0.07% +0.13% +0.00% / +0.22% +1.40% +1.12%] index_add_ perm_sorted : Elapsed 33.899 ms (677.989 ms / 20) 492.296 -> 493.303 ( +0.20%) [ +0.14% +0.10% +0.00% / +0.20% +1.38% +1.29%] index_copy_ perm_sorted : Elapsed 24.649 ms (492.978 ms / 20) 640.566 -> 640.876 ( +0.05%) [ +0.12% +0.09% +0.00% / +0.05% +0.30% +0.32%] index_select const : Elapsed 32.067 ms (641.330 ms / 20) 641.274 -> 640.782 ( -0.08%) [ +0.00% +0.14% +0.03% / -0.08% -0.02% +0.16%] index_select wrap : Elapsed 32.064 ms (641.274 ms / 20) 640.635 -> 640.968 ( +0.05%) [ +0.11% +0.11% +0.00% / +0.10% +0.23% +0.05%] index_select linear : Elapsed 32.066 ms (641.321 ms / 20) 639.499 -> 639.806 ( +0.05%) [ +0.15% +0.15% +0.00% / +0.05% +0.60% +0.64%] index_select reverse : Elapsed 32.024 ms (640.480 ms / 20) 639.571 -> 640.713 ( +0.18%) [ +0.10% +0.28% +0.00% / +0.18% +0.48% +0.39%] index_select skip64 : Elapsed 32.012 ms (640.239 ms / 20) 633.436 -> 634.123 ( +0.11%) [ +0.19% +0.00% +0.05% / +0.11% +1.23% +1.44%] index_select skip256 : Elapsed 31.731 ms (634.626 ms / 20) 640.231 -> 641.112 ( +0.14%) [ +0.05% +0.19% +0.00% / +0.14% +0.17% +0.25%] index_select spread : Elapsed 32.028 ms (640.559 ms / 20) 641.119 -> 640.600 ( -0.08%) [ +0.12% +0.00% +0.11% / +0.05% -0.08% -0.03%] index_select strided 3 : Elapsed 32.093 ms (641.862 ms / 20) 640.960 -> 637.229 ( -0.58%) [ +0.09% +0.10% +0.00% / -0.02% -0.53% -0.58%] index_select strided 5 : Elapsed 32.075 ms (641.508 ms / 20) 641.291 -> 637.833 ( -0.54%) [ +0.00% +0.11% +0.00% / -0.11% -0.50% -0.54%] index_select strided 7 : Elapsed 32.065 ms (641.291 ms / 20) 640.601 -> 639.972 ( -0.10%) [ +0.12% +0.00% +0.09% / -0.10% +0.31% +0.23%] index_select strided 8 : Elapsed 32.070 ms (641.391 ms / 20) 640.245 -> 640.747 ( +0.08%) [ +0.02% +0.10% +0.00% / +0.08% +0.38% +0.27%] index_select strided 16 : Elapsed 32.019 ms (640.383 ms / 20) 641.805 -> 641.446 ( -0.06%) [ +0.01% +0.00% +0.00% / -0.06% +0.07% -0.01%] index_select random : Elapsed 32.095 ms (641.891 ms / 20) 640.486 -> 641.246 ( +0.12%) [ +0.03% +0.00% +0.09% / +0.12% +0.13% +0.28%] index_select random_sorted : Elapsed 32.033 ms (640.666 ms / 20) out_shape = [40, 50, 256, 100] in_shape = [40, 20, 256, 100] idx_dim = 1 out_shape = [40, 20, 50, 100] in_shape = [40, 20, 256, 100] idx_dim = 2 out_shape = [40, 20, 256, 50] in_shape = [40, 20, 256, 100] idx_dim = 3 out_shape = [50, 100, 20, 256] in_shape = [40, 100, 20, 256] idx_dim = 0 out_shape = [40, 50, 20, 256] in_shape = [40, 100, 20, 256] idx_dim = 1 out_shape = [40, 100, 50, 256] in_shape = [40, 100, 20, 256] idx_dim = 2 B = [40, 100, 50, 256] (stride (256, 512000, 10240, 1)) A = [40, 100, 20, 256] (stride (5120, 204800, 256, 1)) dim = 2 176.187 -> 176.337 ( +0.09%) [ +0.00% +0.02% +0.08% / +0.09% +0.24% +0.22%] index_add_ linear : Elapsed 8.809 ms (176.187 ms / 20) 169.280 -> 169.416 ( +0.08%) [ +0.00% +0.02% +0.03% / +0.08% +0.13% +0.19%] index_copy_ linear : Elapsed 8.464 ms (169.280 ms / 20) 176.232 -> 176.282 ( +0.03%) [ +0.06% +0.00% +0.02% / +0.03% +0.18% +0.19%] index_add_ reverse : Elapsed 8.817 ms (176.334 ms / 20) 169.266 -> 169.359 ( +0.05%) [ +0.01% +0.00% +0.01% / +0.05% +0.12% +0.11%] index_copy_ reverse : Elapsed 8.464 ms (169.276 ms / 20) 176.459 -> 176.458 ( -0.00%) [ +0.03% +0.00% +0.04% / +0.05% -0.00% +0.02%] index_add_ spread : Elapsed 8.825 ms (176.508 ms / 20) 169.390 -> 169.367 ( -0.01%) [ +0.00% +0.02% +0.01% / +0.02% -0.00% -0.01%] index_copy_ spread : Elapsed 8.469 ms (169.390 ms / 20) 176.189 -> 176.397 ( +0.12%) [ +0.00% +0.02% +0.08% / +0.12% +0.13% +0.16%] index_add_ strided 3 : Elapsed 8.809 ms (176.189 ms / 20) 169.273 -> 169.330 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.12% +0.03% +0.07%] index_copy_ strided 3 : Elapsed 8.466 ms (169.321 ms / 20) 176.651 -> 176.531 ( -0.07%) [ +0.01% +0.00% +0.07% / +0.04% -0.02% -0.07%] index_add_ strided 7 : Elapsed 8.834 ms (176.672 ms / 20) 169.540 -> 169.415 ( -0.07%) [ +0.07% +0.00% +0.13% / +0.12% -0.04% -0.07%] index_copy_ strided 7 : Elapsed 8.483 ms (169.667 ms / 20) 176.317 -> 176.463 ( +0.08%) [ +0.00% +0.02% +0.09% / +0.08% +0.20% +0.28%] index_add_ perm : Elapsed 8.816 ms (176.317 ms / 20) 169.330 -> 169.446 ( +0.07%) [ +0.00% +0.00% +0.06% / +0.12% +0.07% +0.12%] index_copy_ perm : Elapsed 8.466 ms (169.330 ms / 20) 176.115 -> 176.217 ( +0.06%) [ +0.03% +0.02% +0.00% / +0.06% +0.31% +0.27%] index_add_ perm_sorted : Elapsed 8.808 ms (176.163 ms / 20) 169.225 -> 169.284 ( +0.03%) [ +0.00% +0.04% +0.07% / +0.03% +0.15% +0.15%] index_copy_ perm_sorted : Elapsed 8.461 ms (169.225 ms / 20) 421.817 -> 420.601 ( -0.29%) [ +0.00% +0.02% +0.04% / +0.03% -0.28% -0.29%] index_select const : Elapsed 21.091 ms (421.817 ms / 20) 421.304 -> 421.543 ( +0.06%) [ +0.01% +0.00% +0.04% / +0.06% +0.12% +0.13%] index_select wrap : Elapsed 21.067 ms (421.344 ms / 20) 419.389 -> 419.615 ( +0.05%) [ +0.00% +0.02% +0.06% / +0.05% +0.29% +0.33%] index_select linear : Elapsed 20.969 ms (419.389 ms / 20) 420.775 -> 420.933 ( +0.04%) [ +0.00% +0.03% +0.03% / +0.04% +0.21% +0.23%] index_select reverse : Elapsed 21.039 ms (420.775 ms / 20) 421.934 -> 420.250 ( -0.40%) [ +0.00% +0.02% +0.03% / +0.05% -0.37% -0.40%] index_select skip64 : Elapsed 21.097 ms (421.934 ms / 20) 421.936 -> 420.528 ( -0.33%) [ +0.02% +0.00% +0.07% / +0.04% -0.33% -0.33%] index_select skip256 : Elapsed 21.101 ms (422.014 ms / 20) 420.648 -> 420.800 ( +0.04%) [ +0.00% +0.00% +0.02% / +0.04% +0.12% +0.07%] index_select spread : Elapsed 21.032 ms (420.648 ms / 20) 421.361 -> 421.458 ( +0.02%) [ +0.00% +0.01% +0.03% / +0.02% +0.11% +0.12%] index_select strided 3 : Elapsed 21.068 ms (421.361 ms / 20) 422.734 -> 421.558 ( -0.28%) [ +0.00% +0.01% +0.05% / +0.07% -0.24% -0.28%] index_select strided 5 : Elapsed 21.137 ms (422.734 ms / 20) 421.223 -> 421.587 ( +0.09%) [ +0.01% +0.00% +0.06% / +0.09% +0.12% +0.10%] index_select strided 7 : Elapsed 21.064 ms (421.283 ms / 20) 421.163 -> 421.340 ( +0.04%) [ +0.03% +0.00% +0.06% / +0.04% +0.15% +0.15%] index_select strided 8 : Elapsed 21.065 ms (421.306 ms / 20) 421.206 -> 421.348 ( +0.03%) [ +0.00% +0.00% +0.05% / +0.03% +0.14% +0.11%] index_select strided 16 : Elapsed 21.060 ms (421.210 ms / 20) 421.697 -> 421.754 ( +0.01%) [ +0.00% +0.02% +0.02% / +0.07% +0.03% +0.01%] index_select random : Elapsed 21.085 ms (421.697 ms / 20) 420.735 -> 420.565 ( -0.04%) [ +0.00% +0.00% +0.01% / +0.02% -0.02% -0.04%] index_select random_sorted : Elapsed 21.037 ms (420.735 ms / 20) B = [40, 100, 50, 256] (stride (1, 10240, 1024000, 40)) A = [40, 100, 20, 256] (stride (20, 204800, 1, 800)) dim = 2 648.421 -> 648.730 ( +0.05%) [ +0.11% +0.06% +0.00% / +0.05% +0.16% +0.17%] index_add_ linear : Elapsed 32.458 ms (649.166 ms / 20) 651.754 -> 652.329 ( +0.09%) [ +0.05% +0.00% +0.09% / +0.09% +0.12% +0.20%] index_copy_ linear : Elapsed 32.603 ms (652.068 ms / 20) 648.654 -> 648.886 ( +0.04%) [ +0.03% +0.00% +0.00% / +0.04% +0.10% +0.10%] index_add_ reverse : Elapsed 32.444 ms (648.872 ms / 20) 652.432 -> 652.344 ( -0.01%) [ +0.15% +0.00% +0.08% / -0.01% +0.08% +0.05%] index_copy_ reverse : Elapsed 32.671 ms (653.417 ms / 20) 648.902 -> 648.799 ( -0.02%) [ +0.03% +0.00% +0.00% / -0.02% +0.05% +0.10%] index_add_ spread : Elapsed 32.456 ms (649.112 ms / 20) 652.864 -> 652.588 ( -0.04%) [ +0.02% +0.00% +0.06% / -0.04% +0.00% +0.02%] index_copy_ spread : Elapsed 32.649 ms (652.970 ms / 20) 648.669 -> 648.700 ( +0.00%) [ +0.06% +0.00% +0.02% / +0.00% +0.10% +0.11%] index_add_ strided 3 : Elapsed 32.453 ms (649.055 ms / 20) 651.967 -> 651.786 ( -0.03%) [ +0.09% +0.00% +0.07% / -0.03% +0.26% +0.24%] index_copy_ strided 3 : Elapsed 32.628 ms (652.560 ms / 20) 648.736 -> 648.699 ( -0.01%) [ +0.03% +0.00% +0.00% / -0.01% +0.13% +0.10%] index_add_ strided 7 : Elapsed 32.447 ms (648.933 ms / 20) 652.913 -> 652.874 ( -0.01%) [ +0.00% +0.01% +0.08% / -0.01% +0.06% +0.08%] index_copy_ strided 7 : Elapsed 32.646 ms (652.913 ms / 20) 649.590 -> 649.224 ( -0.06%) [ +0.00% +0.02% +0.01% / +0.02% -0.06% -0.01%] index_add_ perm : Elapsed 32.480 ms (649.590 ms / 20) 652.212 -> 652.358 ( +0.02%) [ +0.00% +0.08% +0.06% / +0.02% +0.10% +0.11%] index_copy_ perm : Elapsed 32.611 ms (652.212 ms / 20) 649.596 -> 649.343 ( -0.04%) [ +0.04% +0.00% +0.01% / +0.01% -0.04% -0.01%] index_add_ perm_sorted : Elapsed 32.494 ms (649.876 ms / 20) 652.076 -> 652.720 ( +0.10%) [ +0.00% +0.06% +0.03% / +0.10% +0.22% +0.17%] index_copy_ perm_sorted : Elapsed 32.604 ms (652.076 ms / 20) 1675.189 -> 1675.024 ( -0.01%) [ +0.07% +0.00% +0.07% / -0.01% +0.10% +0.07%] index_select const : Elapsed 83.816 ms (1676.315 ms / 20) 1674.541 -> 1674.640 ( +0.01%) [ +0.08% +0.02% +0.00% / +0.01% +0.13% +0.12%] index_select wrap : Elapsed 83.795 ms (1675.905 ms / 20) 1673.967 -> 1675.362 ( +0.08%) [ +0.05% +0.17% +0.00% / +0.08% +0.14% +0.13%] index_select linear : Elapsed 83.741 ms (1674.814 ms / 20) 1674.152 -> 1675.653 ( +0.09%) [ +0.00% +0.06% +0.08% / +0.09% +0.11% +0.20%] index_select reverse : Elapsed 83.708 ms (1674.152 ms / 20) 1673.064 -> 1675.166 ( +0.13%) [ +0.20% +0.12% +0.00% / +0.13% +0.25% +0.23%] index_select skip64 : Elapsed 83.824 ms (1676.482 ms / 20) 1674.802 -> 1674.788 ( -0.00%) [ +0.03% +0.00% +0.07% / -0.00% +0.13% +0.11%] index_select skip256 : Elapsed 83.767 ms (1675.332 ms / 20) 1674.693 -> 1674.312 ( -0.02%) [ +0.00% +0.03% +0.11% / -0.02% +0.20% +0.14%] index_select spread : Elapsed 83.735 ms (1674.693 ms / 20) 1674.740 -> 1673.121 ( -0.10%) [ +0.04% +0.02% +0.00% / -0.10% +0.12% +0.12%] index_select strided 3 : Elapsed 83.774 ms (1675.486 ms / 20) 1673.454 -> 1674.965 ( +0.09%) [ +0.12% +0.00% +0.15% / +0.09% +0.11% +0.19%] index_select strided 5 : Elapsed 83.772 ms (1675.445 ms / 20) 1675.246 -> 1674.636 ( -0.04%) [ +0.05% +0.01% +0.00% / -0.04% +0.07% +0.00%] index_select strided 7 : Elapsed 83.804 ms (1676.086 ms / 20) 1674.323 -> 1673.839 ( -0.03%) [ +0.06% +0.12% +0.00% / -0.03% +0.19% +0.16%] index_select strided 8 : Elapsed 83.764 ms (1675.289 ms / 20) 1674.318 -> 1676.263 ( +0.12%) [ +0.16% +0.02% +0.00% / +0.12% +0.22% +0.22%] index_select strided 16 : Elapsed 83.846 ms (1676.916 ms / 20) 1674.697 -> 1673.782 ( -0.05%) [ +0.05% +0.04% +0.00% / -0.05% +0.17% +0.11%] index_select random : Elapsed 83.776 ms (1675.524 ms / 20) 1674.449 -> 1675.070 ( +0.04%) [ +0.00% +0.04% +0.14% / +0.04% +0.15% +0.07%] index_select random_sorted : Elapsed 83.722 ms (1674.449 ms / 20) B = [40, 100, 50, 256] (stride (5000, 50, 1, 200000)) A = [40, 100, 20, 256] (stride (100, 1, 1024000, 4000)) dim = 2 BEST 978.051 -> 250.401 (-74.40%) [ +0.00% +0.09% +0.08% / -73.74% -74.21% -74.40%] index_add_ linear : Elapsed 48.903 ms (978.051 ms / 20) BEST 709.043 -> 225.216 (-68.24%) [ +0.18% +0.05% +0.00% / -67.67% -68.24% -67.90%] index_copy_ linear : Elapsed 35.518 ms (710.351 ms / 20) BEST 977.350 -> 246.250 (-74.80%) [ +0.02% +0.00% +0.05% / -73.46% -74.80% -74.62%] index_add_ reverse : Elapsed 48.876 ms (977.519 ms / 20) BEST 708.380 -> 225.233 (-68.20%) [ +0.07% +0.00% +0.06% / -67.74% -67.83% -68.20%] index_copy_ reverse : Elapsed 35.445 ms (708.893 ms / 20) BEST 976.889 -> 276.378 (-71.71%) [ +0.25% +0.20% +0.00% / -70.74% -71.68% -71.71%] index_add_ spread : Elapsed 48.965 ms (979.297 ms / 20) BEST 709.066 -> 255.493 (-63.97%) [ +0.00% +0.07% +0.08% / -63.32% -63.86% -63.97%] index_copy_ spread : Elapsed 35.453 ms (709.066 ms / 20) BEST 978.156 -> 285.398 (-70.82%) [ +0.00% +0.01% +0.03% / -70.28% -70.67% -70.82%] index_add_ strided 3 : Elapsed 48.908 ms (978.156 ms / 20) BEST 709.046 -> 271.292 (-61.74%) [ +0.16% +0.00% +0.03% / -61.71% -61.74% -61.55%] index_copy_ strided 3 : Elapsed 35.509 ms (710.182 ms / 20) BEST 977.867 -> 283.926 (-70.96%) [ +0.17% +0.09% +0.00% / -70.22% -70.42% -70.96%] index_add_ strided 7 : Elapsed 48.976 ms (979.511 ms / 20) BEST 709.233 -> 274.880 (-61.24%) [ +0.00% +0.15% +0.06% / -61.24% -60.41% -61.23%] index_copy_ strided 7 : Elapsed 35.462 ms (709.233 ms / 20) BEST 978.129 -> 283.169 (-71.05%) [ +0.23% +0.16% +0.00% / -69.69% -70.52% -71.05%] index_add_ perm : Elapsed 49.019 ms (980.373 ms / 20) BEST 710.514 -> 268.530 (-62.21%) [ +0.11% +0.00% +0.03% / -61.93% -62.21% -61.87%] index_copy_ perm : Elapsed 35.565 ms (711.293 ms / 20) BEST 978.896 -> 274.831 (-71.92%) [ +0.00% +0.08% +0.12% / -70.95% -71.92% -71.36%] index_add_ perm_sorted : Elapsed 48.945 ms (978.896 ms / 20) BEST 708.529 -> 257.163 (-63.70%) [ +0.30% +0.27% +0.00% / -63.32% -63.70% -63.38%] index_copy_ perm_sorted : Elapsed 35.532 ms (710.632 ms / 20) BEST 1782.795 -> 172.890 (-90.30%) [ +0.00% +0.05% +0.11% / -90.29% -90.30% -90.27%] index_select const : Elapsed 89.140 ms (1782.795 ms / 20) BEST 1820.698 -> 495.863 (-72.77%) [ +0.06% +0.00% +0.05% / -72.77% -72.47% -72.47%] index_select wrap : Elapsed 91.094 ms (1821.879 ms / 20) BEST 1793.390 -> 363.940 (-79.71%) [ +0.00% +0.02% +0.02% / -79.42% -79.71% -79.62%] index_select linear : Elapsed 89.669 ms (1793.390 ms / 20) BEST 1802.614 -> 365.833 (-79.71%) [ +0.13% +0.01% +0.00% / -79.71% -79.57% -79.63%] index_select reverse : Elapsed 90.251 ms (1805.010 ms / 20) BEST 1783.806 -> 171.147 (-90.41%) [ +0.07% +0.00% +0.03% / -90.30% -90.41% -90.40%] index_select skip64 : Elapsed 89.249 ms (1784.990 ms / 20) BEST 1781.918 -> 172.904 (-90.30%) [ +0.05% +0.00% +0.05% / -90.15% -90.30% -90.27%] index_select skip256 : Elapsed 89.136 ms (1782.724 ms / 20) BEST 1802.957 -> 358.506 (-80.12%) [ +0.00% +0.02% +0.04% / -79.65% -79.97% -80.12%] index_select spread : Elapsed 90.148 ms (1802.957 ms / 20) BEST 1818.563 -> 497.362 (-72.65%) [ +0.07% +0.01% +0.00% / -72.56% -72.65% -72.54%] index_select strided 3 : Elapsed 90.988 ms (1819.759 ms / 20) BEST 1825.291 -> 182.263 (-90.01%) [ +0.07% +0.01% +0.00% / -90.01% -89.77% -89.74%] index_select strided 5 : Elapsed 91.332 ms (1826.636 ms / 20) BEST 1819.840 -> 498.546 (-72.60%) [ +0.00% +0.10% +0.12% / -72.60% -72.31% -72.16%] index_select strided 7 : Elapsed 90.992 ms (1819.840 ms / 20) BEST 1812.124 -> 188.563 (-89.59%) [ +0.00% +0.17% +0.06% / -89.59% -89.59% -89.59%] index_select strided 8 : Elapsed 90.606 ms (1812.124 ms / 20) BEST 1817.058 -> 188.792 (-89.61%) [ +0.06% +0.00% +0.10% / -89.58% -89.61% -89.61%] index_select strided 16 : Elapsed 90.911 ms (1818.219 ms / 20) BEST 1816.238 -> 404.279 (-77.74%) [ +0.04% +0.00% +0.06% / -77.74% -77.16% -77.41%] index_select random : Elapsed 90.852 ms (1817.049 ms / 20) BEST 1800.359 -> 325.440 (-81.92%) [ +0.15% +0.00% +0.09% / -81.92% -81.74% -81.61%] index_select random_sorted : Elapsed 90.156 ms (1803.115 ms / 20) out_shape = [40, 100, 20, 50] in_shape = [40, 100, 20, 256] idx_dim = 3 B = [40, 100, 20, 50] (stride (100000, 1000, 50, 1)) A = [40, 100, 20, 256] (stride (256, 10240, 1024000, 1)) dim = 3 149.440 -> 149.447 ( +0.00%) [ +0.09% +0.00% +0.10% / +0.00% +0.20% +0.11%] index_select const : Elapsed 7.479 ms (149.576 ms / 20) 149.688 -> 149.634 ( -0.04%) [ +0.00% +0.13% +0.27% / -0.04% +0.24% +0.08%] index_select wrap : Elapsed 7.484 ms (149.688 ms / 20) 149.231 -> 149.811 ( +0.39%) [ +0.25% +0.42% +0.00% / +0.39% +0.44% +0.40%] index_select linear : Elapsed 7.480 ms (149.608 ms / 20) 149.604 -> 149.622 ( +0.01%) [ +0.06% +0.00% +0.09% / +0.01% +0.17% +0.18%] index_select reverse : Elapsed 7.485 ms (149.693 ms / 20) 149.477 -> 149.595 ( +0.08%) [ +0.01% +0.00% +0.10% / +0.08% +0.12% +0.15%] index_select skip64 : Elapsed 7.474 ms (149.488 ms / 20) 149.294 -> 149.531 ( +0.16%) [ +0.00% +0.21% +0.21% / +0.16% +0.16% +0.21%] index_select skip256 : Elapsed 7.465 ms (149.294 ms / 20) 150.172 -> 149.979 ( -0.13%) [ +0.00% +0.15% +0.05% / -0.13% +0.12% +0.07%] index_select spread : Elapsed 7.509 ms (150.172 ms / 20) 149.881 -> 149.875 ( -0.00%) [ +0.18% +0.02% +0.00% / +0.05% +0.05% -0.00%] index_select strided 3 : Elapsed 7.508 ms (150.153 ms / 20) 150.117 -> 150.020 ( -0.06%) [ +0.00% +0.05% +0.02% / -0.06% +0.06% +0.10%] index_select strided 5 : Elapsed 7.506 ms (150.117 ms / 20) 150.300 -> 150.277 ( -0.02%) [ +0.02% +0.00% +0.12% / -0.02% +0.21% +0.13%] index_select strided 7 : Elapsed 7.516 ms (150.328 ms / 20) 150.124 -> 150.530 ( +0.27%) [ +0.20% +0.00% +0.33% / +0.27% +0.33% +0.31%] index_select strided 8 : Elapsed 7.521 ms (150.420 ms / 20) 150.409 -> 150.449 ( +0.03%) [ +0.09% +0.11% +0.00% / +0.03% +0.26% +0.18%] index_select strided 16 : Elapsed 7.527 ms (150.538 ms / 20) 150.474 -> 150.471 ( -0.00%) [ +0.09% +0.00% +0.01% / -0.00% +0.16% +0.11%] index_select strided 64 : Elapsed 7.530 ms (150.604 ms / 20) 150.493 -> 150.326 ( -0.11%) [ +0.06% +0.00% +0.03% / -0.11% +0.13% +0.16%] index_select strided 100 : Elapsed 7.529 ms (150.578 ms / 20) 149.576 -> 149.449 ( -0.08%) [ +0.23% +0.00% +0.02% / -0.08% +0.23% +0.09%] index_select strided 255 : Elapsed 7.496 ms (149.918 ms / 20) 150.265 -> 150.394 ( +0.09%) [ +0.11% +0.00% +0.18% / +0.09% +0.31% +0.19%] index_select random : Elapsed 7.521 ms (150.424 ms / 20) 149.919 -> 150.087 ( +0.11%) [ +0.08% +0.24% +0.00% / +0.11% +0.14% +0.16%] index_select random_sorted : Elapsed 7.502 ms (150.036 ms / 20) 150.127 -> 150.422 ( +0.20%) [ +0.00% +0.12% +0.19% / +0.31% +0.29% +0.20%] index_select perm : Elapsed 7.506 ms (150.127 ms / 20) 150.010 -> 149.832 ( -0.12%) [ +0.00% +0.00% +0.01% / -0.12% +0.15% +0.05%] index_select perm_sorted : Elapsed 7.501 ms (150.013 ms / 20) out_shape = [50, 100, 256, 20] in_shape = [40, 100, 256, 20] idx_dim = 0 out_shape = [40, 50, 256, 20] in_shape = [40, 100, 256, 20] idx_dim = 1 out_shape = [40, 100, 50, 20] in_shape = [40, 100, 256, 20] idx_dim = 2 out_shape = [40, 100, 256, 50] in_shape = [40, 100, 256, 20] idx_dim = 3 out_shape = [50, 256, 20, 100] in_shape = [40, 256, 20, 100] idx_dim = 0 out_shape = [40, 50, 20, 100] in_shape = [40, 256, 20, 100] idx_dim = 1 B = [40, 50, 20, 100] (stride (50, 1, 2000, 40000)) A = [40, 256, 20, 100] (stride (1, 80000, 4000, 40)) dim = 1 117.045 -> 116.546 ( -0.43%) [ +0.13% +0.00% +0.03% / +0.30% -0.43% -0.36%] index_select const : Elapsed 5.860 ms (117.193 ms / 20) 125.173 -> 125.275 ( +0.08%) [ +0.18% +0.00% +0.18% / +0.14% +0.23% +0.08%] index_select wrap : Elapsed 6.270 ms (125.395 ms / 20) 125.144 -> 125.194 ( +0.04%) [ +0.00% +0.30% +0.08% / +0.04% +0.26% +0.30%] index_select linear : Elapsed 6.257 ms (125.144 ms / 20) 124.976 -> 125.259 ( +0.23%) [ +0.00% +0.15% +0.44% / +0.23% +0.44% +0.38%] index_select reverse : Elapsed 6.249 ms (124.976 ms / 20) 117.152 -> 116.534 ( -0.53%) [ +0.10% +0.00% +0.14% / +0.10% -0.48% -0.53%] index_select skip64 : Elapsed 5.864 ms (117.272 ms / 20) 117.065 -> 116.443 ( -0.53%) [ +0.19% +0.00% +0.08% / -0.02% -0.35% -0.53%] index_select skip256 : Elapsed 5.864 ms (117.285 ms / 20) 124.704 -> 124.910 ( +0.17%) [ +0.11% +0.00% +0.18% / +0.17% +0.39% +0.58%] index_select spread : Elapsed 6.242 ms (124.837 ms / 20) 125.393 -> 125.394 ( +0.00%) [ +0.04% +0.17% +0.00% / +0.19% +0.12% +0.00%] index_select strided 3 : Elapsed 6.272 ms (125.446 ms / 20) 125.550 -> 125.405 ( -0.12%) [ +0.02% +0.00% +0.11% / -0.01% -0.02% -0.12%] index_select strided 5 : Elapsed 6.279 ms (125.571 ms / 20) 125.208 -> 125.064 ( -0.12%) [ +0.23% +0.00% +0.30% / +0.05% -0.11% -0.12%] index_select strided 7 : Elapsed 6.275 ms (125.502 ms / 20) 125.376 -> 124.948 ( -0.34%) [ +0.19% +0.00% +0.00% / +0.12% -0.34% -0.30%] index_select strided 8 : Elapsed 6.281 ms (125.616 ms / 20) 125.584 -> 125.472 ( -0.09%) [ +0.00% +0.09% +0.00% / -0.09% -0.03% -0.01%] index_select strided 16 : Elapsed 6.279 ms (125.588 ms / 20) 124.901 -> 124.798 ( -0.08%) [ +0.18% +0.00% +0.09% / +0.20% -0.08% +0.03%] index_select strided 64 : Elapsed 6.256 ms (125.128 ms / 20) 125.397 -> 125.423 ( +0.02%) [ +0.15% +0.00% +0.02% / +0.21% +0.02% +0.15%] index_select strided 100 : Elapsed 6.279 ms (125.590 ms / 20) 125.271 -> 125.282 ( +0.01%) [ +0.10% +0.00% +0.03% / +0.01% +0.16% +0.15%] index_select strided 255 : Elapsed 6.270 ms (125.395 ms / 20) 124.679 -> 124.750 ( +0.06%) [ +0.21% +0.00% +0.16% / +0.06% +0.57% +0.60%] index_select random : Elapsed 6.247 ms (124.942 ms / 20) 124.600 -> 124.777 ( +0.14%) [ +0.13% +0.07% +0.00% / +0.14% +0.64% +0.65%] index_select random_sorted : Elapsed 6.238 ms (124.760 ms / 20) 125.332 -> 125.256 ( -0.06%) [ +0.00% +0.06% +0.09% / +0.14% +0.16% -0.06%] index_select perm : Elapsed 6.267 ms (125.332 ms / 20) 125.265 -> 125.385 ( +0.10%) [ +0.00% +0.20% +0.13% / +0.22% +0.10% +0.17%] index_select perm_sorted : Elapsed 6.263 ms (125.265 ms / 20) out_shape = [40, 256, 50, 100] in_shape = [40, 256, 20, 100] idx_dim = 2 B = [40, 256, 50, 100] (stride (5000, 200000, 1, 50)) A = [40, 256, 20, 100] (stride (256, 1, 1024000, 10240)) dim = 2 831.526 -> 830.111 ( -0.17%) [ +0.01% +0.00% +0.04% / -0.08% -0.17% -0.04%] index_add_ linear : Elapsed 41.580 ms (831.599 ms / 20) 759.856 -> 758.860 ( -0.13%) [ +0.02% +0.08% +0.00% / -0.02% -0.13% -0.10%] index_copy_ linear : Elapsed 37.999 ms (759.981 ms / 20) 831.190 -> 831.300 ( +0.01%) [ +0.09% +0.10% +0.00% / +0.01% +0.04% +0.08%] index_add_ reverse : Elapsed 41.598 ms (831.962 ms / 20) 759.399 -> 759.092 ( -0.04%) [ +0.06% +0.02% +0.00% / -0.04% +0.00% +0.11%] index_copy_ reverse : Elapsed 37.991 ms (759.824 ms / 20) 831.875 -> 831.516 ( -0.04%) [ +0.06% +0.04% +0.00% / -0.04% -0.01% +0.03%] index_add_ spread : Elapsed 41.619 ms (832.376 ms / 20) 759.794 -> 759.596 ( -0.03%) [ +0.00% +0.00% +0.09% / +0.00% +0.01% -0.03%] index_copy_ spread : Elapsed 37.990 ms (759.794 ms / 20) 831.368 -> 831.267 ( -0.01%) [ +0.02% +0.00% +0.04% / -0.01% +0.09% +0.05%] index_add_ strided 3 : Elapsed 41.579 ms (831.573 ms / 20) 758.650 -> 759.451 ( +0.11%) [ +0.00% +0.09% +0.11% / +0.11% +0.22% +0.15%] index_copy_ strided 3 : Elapsed 37.933 ms (758.650 ms / 20) 830.037 -> 830.722 ( +0.08%) [ +0.00% +0.26% +0.07% / +0.08% +0.20% +0.09%] index_add_ strided 7 : Elapsed 41.502 ms (830.037 ms / 20) 758.430 -> 758.741 ( +0.04%) [ +0.07% +0.06% +0.00% / +0.04% +0.27% +0.21%] index_copy_ strided 7 : Elapsed 37.948 ms (758.955 ms / 20) 828.942 -> 830.161 ( +0.15%) [ +0.07% +0.09% +0.00% / +0.15% +0.42% +0.34%] index_add_ perm : Elapsed 41.477 ms (829.533 ms / 20) 757.682 -> 757.154 ( -0.07%) [ +0.04% +0.00% +0.04% / -0.07% +0.29% +0.30%] index_copy_ perm : Elapsed 37.900 ms (757.999 ms / 20) 828.632 -> 828.748 ( +0.01%) [ +0.06% +0.00% +0.14% / +0.01% +0.29% +0.28%] index_add_ perm_sorted : Elapsed 41.455 ms (829.109 ms / 20) 756.713 -> 756.314 ( -0.05%) [ +0.07% +0.14% +0.00% / -0.05% +0.28% +0.40%] index_copy_ perm_sorted : Elapsed 37.862 ms (757.244 ms / 20) 1906.358 -> 1906.706 ( +0.02%) [ +0.03% +0.00% +0.03% / +0.02% +0.12% +0.16%] index_select const : Elapsed 95.350 ms (1906.995 ms / 20) 1954.779 -> 1954.502 ( -0.01%) [ +0.02% +0.01% +0.00% / -0.01% +0.02% +0.02%] index_select wrap : Elapsed 97.758 ms (1955.156 ms / 20) 1916.838 -> 1915.342 ( -0.08%) [ +0.03% +0.00% +0.03% / -0.01% -0.08% -0.05%] index_select linear : Elapsed 95.868 ms (1917.362 ms / 20) 1933.800 -> 1933.520 ( -0.01%) [ +0.02% +0.02% +0.00% / -0.01% +0.04% +0.03%] index_select reverse : Elapsed 96.706 ms (1934.117 ms / 20) 1906.179 -> 1905.462 ( -0.04%) [ +0.00% +0.04% +0.01% / -0.04% +0.16% +0.18%] index_select skip64 : Elapsed 95.309 ms (1906.179 ms / 20) 1903.972 -> 1904.436 ( +0.02%) [ +0.01% +0.00% +0.03% / +0.02% +0.25% +0.28%] index_select skip256 : Elapsed 95.209 ms (1904.178 ms / 20) 1932.017 -> 1932.404 ( +0.02%) [ +0.05% +0.00% +0.10% / +0.04% +0.05% +0.02%] index_select spread : Elapsed 96.652 ms (1933.036 ms / 20) 1955.314 -> 1954.524 ( -0.04%) [ +0.01% +0.00% +0.00% / +0.04% -0.04% -0.02%] index_select strided 3 : Elapsed 97.778 ms (1955.567 ms / 20) 1952.614 -> 1949.909 ( -0.14%) [ +0.00% +0.08% +0.09% / -0.01% -0.11% -0.14%] index_select strided 5 : Elapsed 97.631 ms (1952.614 ms / 20) 1955.130 -> 1949.939 ( -0.27%) [ +0.01% +0.00% +0.00% / +0.02% -0.25% -0.27%] index_select strided 7 : Elapsed 97.764 ms (1955.281 ms / 20) 1953.318 -> 1954.211 ( +0.05%) [ +0.04% +0.00% +0.00% / +0.05% +0.10% +0.12%] index_select strided 8 : Elapsed 97.709 ms (1954.171 ms / 20) 1953.662 -> 1954.399 ( +0.04%) [ +0.04% +0.07% +0.00% / +0.06% +0.07% +0.04%] index_select strided 16 : Elapsed 97.726 ms (1954.529 ms / 20) 1951.625 -> 1952.594 ( +0.05%) [ +0.05% +0.00% +0.08% / +0.12% +0.05% +0.12%] index_select random : Elapsed 97.634 ms (1952.674 ms / 20) 1931.676 -> 1930.698 ( -0.05%) [ +0.02% +0.06% +0.00% / +0.02% -0.05% +0.03%] index_select random_sorted : Elapsed 96.601 ms (1932.017 ms / 20) out_shape = [40, 256, 20, 50] in_shape = [40, 256, 20, 100] idx_dim = 3 out_shape = [50, 256, 100, 20] in_shape = [40, 256, 100, 20] idx_dim = 0 out_shape = [40, 50, 100, 20] in_shape = [40, 256, 100, 20] idx_dim = 1 out_shape = [40, 256, 50, 20] in_shape = [40, 256, 100, 20] idx_dim = 2 B = [40, 256, 50, 20] (stride (1, 40000, 40, 2000)) A = [40, 256, 100, 20] (stride (20, 80000, 800, 1)) dim = 2 202.104 -> 194.885 ( -3.57%) [ +0.00% +0.19% +0.11% / -0.26% -3.57% -3.48%] index_select const : Elapsed 10.105 ms (202.104 ms / 20) 194.907 -> 194.842 ( -0.03%) [ +0.09% +0.10% +0.00% / -0.03% +1.46% +1.68%] index_select wrap : Elapsed 9.754 ms (195.079 ms / 20) 194.214 -> 195.434 ( +0.63%) [ +0.57% +0.23% +0.00% / +0.63% +2.40% +2.30%] index_select linear : Elapsed 9.766 ms (195.316 ms / 20) 195.603 -> 194.438 ( -0.60%) [ +0.25% +0.01% +0.00% / +0.35% -0.60% -0.44%] index_select reverse : Elapsed 9.805 ms (196.101 ms / 20) 201.246 -> 195.243 ( -2.98%) [ +0.47% +0.37% +0.00% / -0.11% -2.98% -2.84%] index_select skip64 : Elapsed 10.110 ms (202.194 ms / 20) 201.616 -> 194.195 ( -3.68%) [ +0.40% +0.00% +0.44% / +0.27% -3.00% -3.68%] index_select skip256 : Elapsed 10.122 ms (202.430 ms / 20) 195.321 -> 195.614 ( +0.15%) [ +0.00% +0.09% +0.57% / +0.15% +0.72% +0.51%] index_select spread : Elapsed 9.766 ms (195.321 ms / 20) 195.937 -> 195.475 ( -0.24%) [ +0.06% +0.00% +0.16% / +0.08% -0.12% -0.24%] index_select strided 3 : Elapsed 9.802 ms (196.047 ms / 20) 196.636 -> 197.401 ( +0.39%) [ +0.00% +0.19% +0.79% / +0.40% +0.39% +0.75%] index_select strided 5 : Elapsed 9.832 ms (196.636 ms / 20) 196.460 -> 193.228 ( -1.65%) [ +0.15% +0.00% +0.27% / +0.28% -1.65% -1.10%] index_select strided 7 : Elapsed 9.838 ms (196.764 ms / 20) 195.819 -> 194.036 ( -0.91%) [ +0.00% +0.33% +0.33% / -0.04% -0.91% -0.55%] index_select strided 8 : Elapsed 9.791 ms (195.819 ms / 20) 194.442 -> 193.367 ( -0.55%) [ +0.36% +0.74% +0.00% / +0.60% -0.55% +0.17%] index_select strided 16 : Elapsed 9.757 ms (195.133 ms / 20) 195.117 -> 195.479 ( +0.19%) [ +0.63% +0.66% +0.00% / +0.68% +0.70% +0.19%] index_select strided 64 : Elapsed 9.817 ms (196.349 ms / 20) 196.331 -> 196.023 ( -0.16%) [ +0.00% +0.22% +0.07% / -0.16% +0.00% +0.03%] index_select random : Elapsed 9.817 ms (196.331 ms / 20) 193.986 -> 194.404 ( +0.22%) [ +0.32% +0.67% +0.00% / +0.22% +1.48% +1.61%] index_select random_sorted : Elapsed 9.730 ms (194.602 ms / 20) 194.805 -> 195.819 ( +0.52%) [ +0.00% +0.07% +0.24% / +0.52% +1.63% +2.02%] index_select perm : Elapsed 9.740 ms (194.805 ms / 20) 195.176 -> 195.797 ( +0.32%) [ +0.17% +0.63% +0.00% / +0.32% +1.51% +0.89%] index_select perm_sorted : Elapsed 9.775 ms (195.507 ms / 20) B = [40, 256, 50, 20] (stride (1, 40, 204800, 10240)) dim = 2 fill_cnt = 100 45.014 -> 44.940 ( -0.16%) [ +0.07% +0.09% +0.00% / -0.16% +0.34% +0.36%] index_fill_ const : Elapsed 2.252 ms (45.047 ms / 20) 109.628 -> 109.851 ( +0.20%) [ +0.00% +0.01% +1.31% / +0.20% +0.87% +0.36%] index_fill_ linear : Elapsed 5.481 ms (109.628 ms / 20) good 133.872 -> 127.137 ( -5.03%) [ +0.00% +1.19% +1.72% / +1.77% -4.84% -5.03%] index_fill_ reverse : Elapsed 6.694 ms (133.872 ms / 20) 45.384 -> 45.310 ( -0.16%) [ +0.00% +0.01% +0.08% / -0.16% +0.19% +0.17%] index_fill_ skip64 : Elapsed 2.269 ms (45.384 ms / 20) 45.131 -> 45.077 ( -0.12%) [ +0.05% +0.04% +0.00% / -0.12% +0.32% +0.27%] index_fill_ skip256 : Elapsed 2.258 ms (45.155 ms / 20) 154.569 -> 150.431 ( -2.68%) [ +0.00% +0.09% +0.54% / +1.06% -2.68% -1.99%] index_fill_ spread : Elapsed 7.728 ms (154.569 ms / 20) 210.922 -> 210.052 ( -0.41%) [ +0.59% +0.00% +0.47% / -0.41% +1.61% +0.66%] index_fill_ strided 3 : Elapsed 10.608 ms (212.163 ms / 20) 203.939 -> 196.701 ( -3.55%) [ +0.78% +1.20% +0.00% / -0.02% -3.00% -3.55%] index_fill_ strided 5 : Elapsed 10.277 ms (205.539 ms / 20) 204.922 -> 204.656 ( -0.13%) [ +0.78% +1.01% +0.00% / +1.52% +0.38% -0.13%] index_fill_ strided 7 : Elapsed 10.326 ms (206.520 ms / 20) 201.321 -> 202.462 ( +0.57%) [ +0.00% +0.84% +0.81% / +0.57% +1.41% +0.99%] index_fill_ strided 8 : Elapsed 10.066 ms (201.321 ms / 20) 210.565 -> 208.221 ( -1.11%) [ +0.00% +0.85% +0.35% / +0.51% -0.87% -1.11%] index_fill_ strided 16 : Elapsed 10.528 ms (210.565 ms / 20) 200.548 -> 199.302 ( -0.62%) [ +0.00% +0.39% +0.33% / -0.62% +1.54% +1.93%] index_fill_ random : Elapsed 10.027 ms (200.548 ms / 20) 146.459 -> 147.840 ( +0.94%) [ +0.29% +0.00% +1.12% / +0.94% +1.21% +1.07%] index_fill_ random_sorted : Elapsed 7.345 ms (146.891 ms / 20) out_shape = [40, 256, 100, 50] in_shape = [40, 256, 100, 20] idx_dim = 3 B = [40, 256, 100, 50] (stride (1, 200000, 2000, 40)) A = [40, 256, 100, 20] (stride (512000, 2000, 1, 100)) dim = 3 615.858 -> 615.614 ( -0.04%) [ +0.31% +0.00% +0.29% / -0.04% +0.10% +0.26%] index_add_ linear : Elapsed 30.887 ms (617.750 ms / 20) 403.613 -> 403.652 ( +0.01%) [ +0.09% +0.12% +0.00% / +0.13% +0.14% +0.01%] index_copy_ linear : Elapsed 20.199 ms (403.981 ms / 20) 625.978 -> 625.732 ( -0.04%) [ +0.00% +0.37% +0.13% / +0.40% -0.04% +0.37%] index_add_ reverse : Elapsed 31.299 ms (625.978 ms / 20) 406.630 -> 407.763 ( +0.28%) [ +0.24% +0.34% +0.00% / +0.28% +0.63% +0.85%] index_copy_ reverse : Elapsed 20.380 ms (407.592 ms / 20) 624.601 -> 624.710 ( +0.02%) [ +0.10% +0.10% +0.00% / +0.02% +0.38% +0.17%] index_add_ spread : Elapsed 31.262 ms (625.244 ms / 20) 408.993 -> 408.231 ( -0.19%) [ +0.23% +0.01% +0.00% / +0.04% -0.19% -0.17%] index_copy_ spread : Elapsed 20.496 ms (409.927 ms / 20) 623.962 -> 622.055 ( -0.31%) [ +0.05% +0.09% +0.00% / +0.01% -0.25% -0.31%] index_add_ strided 3 : Elapsed 31.212 ms (624.247 ms / 20) 407.971 -> 409.095 ( +0.28%) [ +0.11% +0.00% +0.29% / +0.39% +0.32% +0.28%] index_copy_ strided 3 : Elapsed 20.422 ms (408.433 ms / 20) 622.808 -> 624.941 ( +0.34%) [ +0.00% +0.16% +0.18% / +0.34% +0.52% +0.75%] index_add_ strided 7 : Elapsed 31.140 ms (622.808 ms / 20) 410.523 -> 410.860 ( +0.08%) [ +0.00% +0.27% +0.24% / +0.12% +0.08% +0.18%] index_copy_ strided 7 : Elapsed 20.526 ms (410.523 ms / 20) 621.577 -> 620.945 ( -0.10%) [ +0.05% +0.00% +0.09% / -0.07% -0.10% -0.10%] index_add_ perm : Elapsed 31.094 ms (621.873 ms / 20) 409.371 -> 409.687 ( +0.08%) [ +0.00% +0.37% +0.55% / +0.34% +0.20% +0.08%] index_copy_ perm : Elapsed 20.469 ms (409.371 ms / 20) 617.649 -> 619.192 ( +0.25%) [ +0.20% +0.00% +0.29% / +0.25% +1.87% +1.77%] index_add_ perm_sorted : Elapsed 30.943 ms (618.867 ms / 20) 408.529 -> 407.458 ( -0.26%) [ +0.00% +0.26% +0.02% / +0.16% -0.06% -0.26%] index_copy_ perm_sorted : Elapsed 20.426 ms (408.529 ms / 20) 1035.315 -> 1034.451 ( -0.08%) [ +0.04% +0.00% +0.14% / -0.08% +0.74% +0.50%] index_select const : Elapsed 51.786 ms (1035.719 ms / 20) 1035.307 -> 1036.794 ( +0.14%) [ +0.12% +0.11% +0.00% / +0.14% +0.37% +0.18%] index_select wrap : Elapsed 51.825 ms (1036.509 ms / 20) 1037.798 -> 1038.931 ( +0.11%) [ +0.13% +0.00% +0.21% / +0.11% +0.26% +0.40%] index_select linear : Elapsed 51.957 ms (1039.145 ms / 20) 1036.452 -> 1037.875 ( +0.14%) [ +0.10% +0.05% +0.00% / +0.14% +0.51% +0.75%] index_select reverse : Elapsed 51.877 ms (1037.532 ms / 20) 1037.191 -> 1039.662 ( +0.24%) [ +0.00% +0.14% +0.00% / +0.37% +0.24% +0.46%] index_select skip64 : Elapsed 51.860 ms (1037.191 ms / 20) 1036.719 -> 1039.635 ( +0.28%) [ +0.37% +0.00% +0.27% / +0.28% +0.32% +0.34%] index_select skip256 : Elapsed 52.028 ms (1040.557 ms / 20) 1036.893 -> 1037.805 ( +0.09%) [ +0.10% +0.00% +0.10% / +0.09% +0.32% +0.12%] index_select spread : Elapsed 51.897 ms (1037.943 ms / 20) 1036.239 -> 1035.257 ( -0.09%) [ +0.04% +0.13% +0.00% / -0.09% +0.11% +0.24%] index_select strided 3 : Elapsed 51.834 ms (1036.680 ms / 20) 1036.254 -> 1037.769 ( +0.15%) [ +0.00% +0.13% +0.28% / +0.25% +0.24% +0.15%] index_select strided 5 : Elapsed 51.813 ms (1036.254 ms / 20) 1034.647 -> 1038.405 ( +0.36%) [ +0.56% +0.00% +0.34% / +0.36% +0.43% +0.57%] index_select strided 7 : Elapsed 52.021 ms (1040.410 ms / 20) 1035.272 -> 1037.412 ( +0.21%) [ +0.00% +0.23% +0.16% / +0.21% +0.42% +0.46%] index_select strided 8 : Elapsed 51.764 ms (1035.272 ms / 20) 1040.488 -> 1039.002 ( -0.14%) [ +0.07% +0.10% +0.00% / -0.14% -0.05% +0.13%] index_select strided 16 : Elapsed 52.059 ms (1041.180 ms / 20) 1036.054 -> 1037.405 ( +0.13%) [ +0.09% +0.00% +0.38% / +0.13% +0.39% +0.19%] index_select random : Elapsed 51.848 ms (1036.964 ms / 20) 1039.278 -> 1038.235 ( -0.10%) [ +0.00% +0.35% +0.23% / -0.10% +0.27% +0.26%] index_select random_sorted : Elapsed 51.964 ms (1039.278 ms / 20) out_shape = [50, 20, 40, 256] in_shape = [100, 20, 40, 256] idx_dim = 0 B = [50, 20, 40, 256] (stride (10240, 512000, 1, 40)) A = [100, 20, 40, 256] (stride (40, 4000, 1, 80000)) dim = 0 290.384 -> 291.038 ( +0.23%) [ +0.51% +0.50% +0.00% / +0.23% +3.48% +3.64%] index_select const : Elapsed 14.593 ms (291.854 ms / 20) 310.832 -> 310.357 ( -0.15%) [ +0.04% +0.00% +0.14% / +0.07% +0.35% -0.15%] index_select wrap : Elapsed 15.547 ms (310.947 ms / 20) 310.802 -> 310.151 ( -0.21%) [ +0.00% +0.05% +0.04% / -0.21% +0.43% +0.29%] index_select linear : Elapsed 15.540 ms (310.802 ms / 20) 305.366 -> 305.461 ( +0.03%) [ +0.03% +0.25% +0.00% / +0.03% +0.28% +0.78%] index_select reverse : Elapsed 15.273 ms (305.457 ms / 20) 290.593 -> 292.075 ( +0.51%) [ +0.52% +0.00% +0.28% / +0.51% +3.70% +4.08%] index_select skip64 : Elapsed 14.605 ms (292.100 ms / 20) 291.720 -> 292.350 ( +0.22%) [ +0.03% +0.00% +0.21% / +0.22% +2.97% +3.33%] index_select skip256 : Elapsed 14.591 ms (291.822 ms / 20) 312.468 -> 313.311 ( +0.27%) [ +0.00% +0.20% +0.18% / +0.27% +0.74% +0.73%] index_select spread : Elapsed 15.623 ms (312.468 ms / 20) 316.515 -> 315.817 ( -0.22%) [ +0.11% +0.00% +0.13% / +0.17% -0.22% -0.01%] index_select strided 3 : Elapsed 15.843 ms (316.861 ms / 20) 317.440 -> 318.250 ( +0.26%) [ +0.41% +0.04% +0.00% / +0.26% +0.83% +0.86%] index_select strided 5 : Elapsed 15.936 ms (318.729 ms / 20) 317.451 -> 318.218 ( +0.24%) [ +0.07% +0.00% +0.17% / +0.24% +0.99% +0.81%] index_select strided 7 : Elapsed 15.884 ms (317.670 ms / 20) 317.875 -> 318.861 ( +0.31%) [ +0.08% +0.28% +0.00% / +0.31% +1.31% +1.50%] index_select strided 8 : Elapsed 15.906 ms (318.125 ms / 20) 321.857 -> 321.435 ( -0.13%) [ +0.50% +0.24% +0.00% / -0.13% +0.22% +0.19%] index_select strided 16 : Elapsed 16.173 ms (323.456 ms / 20) 318.703 -> 320.371 ( +0.52%) [ +0.37% +0.65% +0.00% / +0.52% +0.77% +0.74%] index_select strided 64 : Elapsed 15.994 ms (319.882 ms / 20) 314.380 -> 315.768 ( +0.44%) [ +0.00% +0.79% +0.06% / +0.44% +1.18% +1.45%] index_select random : Elapsed 15.719 ms (314.380 ms / 20) 307.928 -> 308.355 ( +0.14%) [ +0.05% +0.00% +0.31% / +0.30% +0.14% +0.50%] index_select random_sorted : Elapsed 15.405 ms (308.090 ms / 20) 317.859 -> 317.569 ( -0.09%) [ +0.19% +0.00% +0.18% / -0.09% +0.34% +0.27%] index_select perm : Elapsed 15.924 ms (318.473 ms / 20) 313.672 -> 313.545 ( -0.04%) [ +0.17% +0.10% +0.00% / -0.04% +0.55% +0.57%] index_select perm_sorted : Elapsed 15.711 ms (314.211 ms / 20) out_shape = [100, 50, 40, 256] in_shape = [100, 20, 40, 256] idx_dim = 1 out_shape = [100, 20, 50, 256] in_shape = [100, 20, 40, 256] idx_dim = 2 out_shape = [100, 20, 40, 50] in_shape = [100, 20, 40, 256] idx_dim = 3 out_shape = [50, 20, 256, 40] in_shape = [100, 20, 256, 40] idx_dim = 0 out_shape = [100, 50, 256, 40] in_shape = [100, 20, 256, 40] idx_dim = 1 out_shape = [100, 20, 50, 40] in_shape = [100, 20, 256, 40] idx_dim = 2 out_shape = [100, 20, 256, 50] in_shape = [100, 20, 256, 40] idx_dim = 3 out_shape = [50, 40, 20, 256] in_shape = [100, 40, 20, 256] idx_dim = 0 out_shape = [100, 50, 20, 256] in_shape = [100, 40, 20, 256] idx_dim = 1 B = [100, 50, 20, 256] (stride (1, 25600, 1280000, 100)) A = [100, 40, 20, 256] (stride (204800, 5120, 1, 20)) dim = 1 553.505 -> 543.493 ( -1.81%) [ +0.11% +0.00% +0.49% / -0.46% -1.81% -0.86%] index_add_ linear : Elapsed 27.705 ms (554.096 ms / 20) 466.516 -> 462.280 ( -0.91%) [ +0.21% +0.00% +0.35% / -0.14% -0.91% -0.56%] index_copy_ linear : Elapsed 23.375 ms (467.492 ms / 20) 544.168 -> 544.661 ( +0.09%) [ +1.30% +0.00% +0.56% / +0.51% +0.61% +0.09%] index_add_ reverse : Elapsed 27.561 ms (551.220 ms / 20) 466.867 -> 463.662 ( -0.69%) [ +0.35% +0.00% +0.38% / +0.31% -0.69% -0.58%] index_copy_ reverse : Elapsed 23.424 ms (468.478 ms / 20) 560.796 -> 548.987 ( -2.11%) [ +0.00% +1.38% +1.61% / +1.60% -1.84% -2.11%] index_add_ spread : Elapsed 28.040 ms (560.796 ms / 20) 468.222 -> 463.684 ( -0.97%) [ +0.00% +0.12% +0.66% / +0.02% -0.97% -0.82%] index_copy_ spread : Elapsed 23.411 ms (468.222 ms / 20) 548.945 -> 541.310 ( -1.39%) [ +0.70% +0.73% +0.00% / +0.34% -1.39% -0.91%] index_add_ strided 3 : Elapsed 27.639 ms (552.771 ms / 20) 468.935 -> 464.665 ( -0.91%) [ +0.21% +0.05% +0.00% / -0.25% -0.86% -0.91%] index_copy_ strided 3 : Elapsed 23.497 ms (469.943 ms / 20) 567.261 -> 552.178 ( -2.66%) [ +0.46% +0.42% +0.00% / -0.22% -2.66% -1.72%] index_add_ strided 7 : Elapsed 28.493 ms (569.866 ms / 20) 471.862 -> 464.003 ( -1.67%) [ +0.47% +0.39% +0.00% / +0.57% -1.67% -1.66%] index_copy_ strided 7 : Elapsed 23.705 ms (474.103 ms / 20) 578.176 -> 549.398 ( -4.98%) [ +0.31% +0.00% +0.97% / -0.10% -4.98% -4.86%] index_add_ perm : Elapsed 28.997 ms (579.944 ms / 20) 471.316 -> 463.565 ( -1.64%) [ +0.26% +0.00% +0.01% / +0.15% -1.50% -1.64%] index_copy_ perm : Elapsed 23.628 ms (472.556 ms / 20) 560.909 -> 539.417 ( -3.83%) [ +0.00% +0.53% +0.04% / -0.35% -3.83% -3.82%] index_add_ perm_sorted : Elapsed 28.045 ms (560.909 ms / 20) 468.054 -> 461.472 ( -1.41%) [ +0.08% +0.00% +0.18% / +0.29% -1.21% -1.41%] index_copy_ perm_sorted : Elapsed 23.422 ms (468.448 ms / 20) 604.131 -> 600.147 ( -0.66%) [ +0.08% +0.00% +0.08% / -0.54% -0.27% -0.66%] index_select const : Elapsed 30.230 ms (604.592 ms / 20) 604.172 -> 599.267 ( -0.81%) [ +0.23% +0.48% +0.00% / +0.43% -0.46% -0.81%] index_select wrap : Elapsed 30.278 ms (605.551 ms / 20) 603.981 -> 600.178 ( -0.63%) [ +0.09% +0.16% +0.00% / +0.05% -0.63% -0.36%] index_select linear : Elapsed 30.226 ms (604.516 ms / 20) 605.272 -> 606.848 ( +0.26%) [ +0.00% +0.44% +0.14% / +0.26% +2.37% +2.10%] index_select reverse : Elapsed 30.264 ms (605.272 ms / 20) 601.292 -> 598.073 ( -0.54%) [ +0.35% +0.23% +0.00% / +0.13% -0.32% -0.54%] index_select skip64 : Elapsed 30.169 ms (603.384 ms / 20) 602.487 -> 598.437 ( -0.67%) [ +0.18% +0.33% +0.00% / +0.11% -0.26% -0.67%] index_select skip256 : Elapsed 30.179 ms (603.580 ms / 20) 602.820 -> 601.833 ( -0.16%) [ +0.00% +0.35% +0.20% / +0.14% -0.11% -0.16%] index_select spread : Elapsed 30.141 ms (602.820 ms / 20) 600.497 -> 599.313 ( -0.20%) [ +0.00% +0.02% +0.15% / -0.20% +0.42% +0.29%] index_select strided 3 : Elapsed 30.025 ms (600.497 ms / 20) 601.133 -> 601.067 ( -0.01%) [ +0.08% +0.00% +0.14% / +0.35% -0.01% +0.09%] index_select strided 5 : Elapsed 30.080 ms (601.608 ms / 20) 607.943 -> 605.181 ( -0.45%) [ +0.21% +0.28% +0.00% / +0.14% -0.27% -0.45%] index_select strided 7 : Elapsed 30.460 ms (609.194 ms / 20) 604.420 -> 604.002 ( -0.07%) [ +0.21% +0.32% +0.00% / -0.07% +0.81% +0.22%] index_select strided 8 : Elapsed 30.285 ms (605.697 ms / 20) 605.630 -> 606.477 ( +0.14%) [ +0.00% +0.25% +0.16% / +0.14% +0.31% +0.65%] index_select strided 16 : Elapsed 30.282 ms (605.630 ms / 20) 604.901 -> 604.852 ( -0.01%) [ +0.00% +0.03% +0.23% / -0.01% +0.57% +0.25%] index_select random : Elapsed 30.245 ms (604.901 ms / 20) 607.267 -> 602.054 ( -0.86%) [ +0.05% +0.00% +0.31% / -0.13% -0.27% -0.86%] index_select random_sorted : Elapsed 30.378 ms (607.564 ms / 20) out_shape = [100, 40, 50, 256] in_shape = [100, 40, 20, 256] idx_dim = 2 out_shape = [100, 40, 20, 50] in_shape = [100, 40, 20, 256] idx_dim = 3 out_shape = [50, 40, 256, 20] in_shape = [100, 40, 256, 20] idx_dim = 0 out_shape = [100, 50, 256, 20] in_shape = [100, 40, 256, 20] idx_dim = 1 out_shape = [100, 40, 50, 20] in_shape = [100, 40, 256, 20] idx_dim = 2 out_shape = [100, 40, 256, 50] in_shape = [100, 40, 256, 20] idx_dim = 3 out_shape = [50, 256, 20, 40] in_shape = [100, 256, 20, 40] idx_dim = 0 out_shape = [100, 50, 20, 40] in_shape = [100, 256, 20, 40] idx_dim = 1 B = [100, 50, 20, 40] (stride (40000, 800, 40, 1)) A = [100, 256, 20, 40] (stride (204800, 800, 1, 20)) dim = 1 32.924 -> 31.479 ( -4.39%) [ +0.20% +0.15% +0.00% / +0.05% -4.39% -4.28%] index_select const : Elapsed 1.650 ms (32.991 ms / 20) 44.049 -> 44.033 ( -0.04%) [ +0.06% +0.35% +0.00% / -0.04% +0.77% +0.77%] index_select wrap : Elapsed 2.204 ms (44.077 ms / 20) 43.987 -> 44.057 ( +0.16%) [ +0.19% +0.10% +0.00% / +0.16% +0.85% +0.84%] index_select linear : Elapsed 2.204 ms (44.071 ms / 20) 43.544 -> 43.705 ( +0.37%) [ +0.16% +0.00% +0.17% / +0.37% +1.73% +1.98%] index_select reverse : Elapsed 2.181 ms (43.615 ms / 20) 32.955 -> 31.474 ( -4.49%) [ +0.00% +0.03% +0.04% / +0.20% -4.49% -4.48%] index_select skip64 : Elapsed 1.648 ms (32.955 ms / 20) 32.997 -> 31.464 ( -4.65%) [ +0.08% +0.01% +0.00% / +0.10% -4.59% -4.65%] index_select skip256 : Elapsed 1.651 ms (33.022 ms / 20) 44.324 -> 44.297 ( -0.06%) [ +0.00% +0.04% +0.08% / -0.06% +0.33% +0.38%] index_select spread : Elapsed 2.216 ms (44.324 ms / 20) 43.959 -> 43.944 ( -0.03%) [ +0.14% +0.03% +0.00% / -0.03% +1.19% +0.81%] index_select strided 3 : Elapsed 2.201 ms (44.020 ms / 20) 44.480 -> 44.477 ( -0.01%) [ +0.05% +0.00% +0.32% / -0.01% +0.74% +0.69%] index_select strided 5 : Elapsed 2.225 ms (44.502 ms / 20) 44.073 -> 43.924 ( -0.34%) [ +0.00% +0.30% +0.15% / +0.44% -0.34% +0.08%] index_select strided 7 : Elapsed 2.204 ms (44.073 ms / 20) 43.904 -> 44.070 ( +0.38%) [ +0.25% +0.33% +0.00% / +0.38% +1.75% +1.83%] index_select strided 8 : Elapsed 2.201 ms (44.012 ms / 20) 44.279 -> 44.314 ( +0.08%) [ +0.19% +0.00% +0.46% / +0.08% +2.59% +2.00%] index_select strided 16 : Elapsed 2.218 ms (44.364 ms / 20) 42.651 -> 42.642 ( -0.02%) [ +0.19% +0.00% +0.07% / +0.04% -0.02% +0.01%] index_select strided 64 : Elapsed 2.137 ms (42.732 ms / 20) 44.428 -> 43.782 ( -1.45%) [ +0.41% +0.00% +0.37% / +0.02% -1.45% -1.31%] index_select strided 100 : Elapsed 2.230 ms (44.608 ms / 20) 44.343 -> 44.322 ( -0.05%) [ +0.02% +0.20% +0.00% / -0.05% +0.33% +0.32%] index_select strided 255 : Elapsed 2.218 ms (44.352 ms / 20) 44.229 -> 44.232 ( +0.01%) [ +0.33% +0.00% +0.09% / +0.11% +0.01% +0.06%] index_select random : Elapsed 2.219 ms (44.374 ms / 20) 43.825 -> 43.633 ( -0.44%) [ +0.06% +0.08% +0.00% / +0.06% -0.44% -0.38%] index_select random_sorted : Elapsed 2.193 ms (43.853 ms / 20) 44.075 -> 43.956 ( -0.27%) [ +0.02% +0.00% +0.09% / -0.27% +0.69% +0.49%] index_select perm : Elapsed 2.204 ms (44.083 ms / 20) 44.022 -> 44.108 ( +0.20%) [ +0.00% +0.35% +0.13% / +0.20% +0.93% +0.60%] index_select perm_sorted : Elapsed 2.201 ms (44.022 ms / 20) B = [100, 50, 20, 40] (stride (40000, 1, 2000, 50)) A = [100, 256, 20, 40] (stride (5120, 20, 1, 512000)) dim = 1 122.519 -> 122.794 ( +0.22%) [ +0.00% +0.01% +0.10% / +0.25% +0.26% +0.22%] index_select const : Elapsed 6.126 ms (122.519 ms / 20) 131.990 -> 132.147 ( +0.12%) [ +0.55% +0.20% +0.00% / +0.12% +0.77% +1.01%] index_select wrap : Elapsed 6.636 ms (132.715 ms / 20) 132.030 -> 132.119 ( +0.07%) [ +0.09% +0.00% +0.12% / +0.07% +1.01% +0.98%] index_select linear : Elapsed 6.607 ms (132.144 ms / 20) 132.699 -> 131.973 ( -0.55%) [ +0.08% +0.01% +0.00% / -0.16% -0.16% -0.55%] index_select reverse : Elapsed 6.640 ms (132.806 ms / 20) 122.539 -> 122.606 ( +0.05%) [ +0.00% +0.11% +0.14% / +0.05% +0.12% +0.21%] index_select skip64 : Elapsed 6.127 ms (122.539 ms / 20) 122.637 -> 122.558 ( -0.06%) [ +0.00% +0.10% +0.09% / +0.07% -0.06% +0.12%] index_select skip256 : Elapsed 6.132 ms (122.637 ms / 20) 132.715 -> 133.119 ( +0.30%) [ +0.17% +0.24% +0.00% / +0.30% +0.37% +0.63%] index_select spread : Elapsed 6.647 ms (132.940 ms / 20) 133.581 -> 133.332 ( -0.19%) [ +0.08% +0.03% +0.00% / -0.03% -0.14% -0.19%] index_select strided 3 : Elapsed 6.684 ms (133.689 ms / 20) 133.333 -> 132.928 ( -0.30%) [ +0.26% +0.16% +0.00% / +0.15% -0.30% -0.21%] index_select strided 5 : Elapsed 6.684 ms (133.686 ms / 20) 133.518 -> 132.808 ( -0.53%) [ +0.25% +0.15% +0.00% / +0.04% -0.44% -0.53%] index_select strided 7 : Elapsed 6.693 ms (133.851 ms / 20) 132.104 -> 131.261 ( -0.64%) [ +0.00% +0.12% +0.47% / -0.25% -0.47% -0.64%] index_select strided 8 : Elapsed 6.605 ms (132.104 ms / 20) 132.426 -> 132.250 ( -0.13%) [ +0.27% +0.32% +0.00% / +0.03% -0.13% +0.18%] index_select strided 16 : Elapsed 6.639 ms (132.783 ms / 20) 131.599 -> 131.785 ( +0.14%) [ +0.08% +0.05% +0.00% / +0.14% +1.72% +1.44%] index_select strided 64 : Elapsed 6.585 ms (131.698 ms / 20) 132.741 -> 132.674 ( -0.05%) [ +0.03% +0.06% +0.00% / -0.05% +0.10% +0.32%] index_select strided 100 : Elapsed 6.639 ms (132.786 ms / 20) 131.650 -> 131.664 ( +0.01%) [ +0.00% +0.03% +0.17% / +0.01% +0.31% +0.28%] index_select strided 255 : Elapsed 6.583 ms (131.650 ms / 20) 131.759 -> 131.873 ( +0.09%) [ +0.25% +0.20% +0.00% / +0.09% +0.86% +0.86%] index_select random : Elapsed 6.604 ms (132.082 ms / 20) 131.156 -> 131.476 ( +0.24%) [ +0.17% +0.00% +0.15% / +0.24% +0.73% +0.85%] index_select random_sorted : Elapsed 6.569 ms (131.382 ms / 20) 133.935 -> 133.271 ( -0.50%) [ +0.08% +0.09% +0.00% / +0.16% -0.42% -0.50%] index_select perm : Elapsed 6.702 ms (134.041 ms / 20) 133.542 -> 132.985 ( -0.42%) [ +0.00% +0.27% +0.03% / +0.07% -0.42% -0.20%] index_select perm_sorted : Elapsed 6.677 ms (133.542 ms / 20) B = [100, 50, 20, 40] (stride (1, 100, 200000, 5000)) A = [100, 256, 20, 40] (stride (20, 80000, 1, 2000)) dim = 1 69.964 -> 70.256 ( +0.42%) [ +0.67% +0.08% +0.00% / +0.42% +4.48% +5.14%] index_select const : Elapsed 3.522 ms (70.431 ms / 20) 85.929 -> 85.735 ( -0.23%) [ +0.86% +0.85% +0.00% / -0.23% +0.05% +0.70%] index_select wrap : Elapsed 4.333 ms (86.670 ms / 20) 86.037 -> 85.647 ( -0.45%) [ +0.00% +0.02% +0.32% / +0.41% -0.45% -0.15%] index_select linear : Elapsed 4.302 ms (86.037 ms / 20) 86.235 -> 86.364 ( +0.15%) [ +0.62% +0.69% +0.00% / +0.52% +0.97% +0.15%] index_select reverse : Elapsed 4.339 ms (86.772 ms / 20) 70.133 -> 69.824 ( -0.44%) [ +0.11% +0.00% +0.11% / -0.44% +4.90% +3.92%] index_select skip64 : Elapsed 3.510 ms (70.210 ms / 20) 70.052 -> 69.915 ( -0.20%) [ +0.25% +0.00% +0.42% / -0.20% +4.60% +5.44%] index_select skip256 : Elapsed 3.511 ms (70.224 ms / 20) 87.259 -> 86.815 ( -0.51%) [ +0.24% +0.00% +0.39% / -0.51% +0.28% +0.62%] index_select spread : Elapsed 4.373 ms (87.469 ms / 20) 87.706 -> 86.519 ( -1.35%) [ +0.19% +0.53% +0.00% / +0.28% -1.35% -1.11%] index_select strided 3 : Elapsed 4.394 ms (87.876 ms / 20) 87.412 -> 85.057 ( -2.69%) [ +0.00% +0.71% +0.10% / +0.30% -2.26% -2.69%] index_select strided 5 : Elapsed 4.371 ms (87.412 ms / 20) 86.985 -> 85.160 ( -2.10%) [ +0.36% +0.00% +0.25% / +0.05% -1.98% -2.10%] index_select strided 7 : Elapsed 4.365 ms (87.296 ms / 20) 86.475 -> 85.469 ( -1.16%) [ +0.00% +0.29% +0.35% / +0.15% -0.68% -1.16%] index_select strided 8 : Elapsed 4.324 ms (86.475 ms / 20) 85.692 -> 85.701 ( +0.01%) [ +0.00% +0.59% +0.30% / +0.01% +1.04% +1.32%] index_select strided 16 : Elapsed 4.285 ms (85.692 ms / 20) 84.124 -> 85.234 ( +1.32%) [ +1.09% +1.37% +0.00% / +1.32% +2.95% +2.33%] index_select strided 64 : Elapsed 4.252 ms (85.038 ms / 20) 86.912 -> 85.622 ( -1.48%) [ +0.10% +0.00% +0.36% / -0.49% -1.48% -1.39%] index_select strided 100 : Elapsed 4.350 ms (86.998 ms / 20) 86.190 -> 85.662 ( -0.61%) [ +0.00% +0.21% +0.02% / -0.61% -0.54% -0.14%] index_select strided 255 : Elapsed 4.309 ms (86.190 ms / 20) 86.017 -> 85.882 ( -0.16%) [ +0.34% +0.54% +0.00% / +0.64% -0.04% -0.16%] index_select random : Elapsed 4.315 ms (86.306 ms / 20) 86.003 -> 85.366 ( -0.74%) [ +0.81% +0.53% +0.00% / +0.19% -0.74% -0.60%] index_select random_sorted : Elapsed 4.335 ms (86.702 ms / 20) 86.628 -> 86.640 ( +0.01%) [ +0.68% +0.00% +0.44% / +0.23% +0.26% +0.01%] index_select perm : Elapsed 4.361 ms (87.215 ms / 20) 87.502 -> 87.035 ( -0.53%) [ +0.18% +0.00% +0.55% / +0.72% -0.53% -0.26%] index_select perm_sorted : Elapsed 4.383 ms (87.657 ms / 20) out_shape = [100, 256, 50, 40] in_shape = [100, 256, 20, 40] idx_dim = 2 out_shape = [100, 256, 20, 50] in_shape = [100, 256, 20, 40] idx_dim = 3 out_shape = [50, 256, 40, 20] in_shape = [100, 256, 40, 20] idx_dim = 0 B = [50, 256, 40, 20] (stride (1, 50, 256000, 12800)) A = [100, 256, 40, 20] (stride (5120, 1, 512000, 256)) dim = 0 382.366 -> 382.941 ( +0.15%) [ +0.04% +0.02% +0.00% / +0.15% +0.48% +0.54%] index_select const : Elapsed 19.126 ms (382.527 ms / 20) 402.557 -> 402.038 ( -0.13%) [ +0.18% +0.23% +0.00% / +0.13% -0.03% -0.13%] index_select wrap : Elapsed 20.165 ms (403.292 ms / 20) 403.047 -> 402.381 ( -0.17%) [ +0.01% +0.04% +0.00% / -0.15% -0.02% -0.17%] index_select linear : Elapsed 20.154 ms (403.084 ms / 20) 402.080 -> 402.484 ( +0.10%) [ +0.07% +0.00% +0.02% / +0.10% +0.48% +0.34%] index_select reverse : Elapsed 20.118 ms (402.361 ms / 20) 381.572 -> 382.217 ( +0.17%) [ +0.00% +0.12% +0.00% / +0.17% +0.80% +0.87%] index_select skip64 : Elapsed 19.079 ms (381.578 ms / 20) 381.690 -> 381.511 ( -0.05%) [ +0.00% +0.18% +0.10% / -0.05% +0.91% +0.87%] index_select skip256 : Elapsed 19.084 ms (381.690 ms / 20) 402.267 -> 402.392 ( +0.03%) [ +0.00% +0.12% +0.02% / +0.03% +0.47% +0.30%] index_select spread : Elapsed 20.113 ms (402.267 ms / 20) 406.367 -> 405.199 ( -0.29%) [ +0.15% +0.02% +0.00% / +0.05% -0.28% -0.29%] index_select strided 3 : Elapsed 20.349 ms (406.977 ms / 20) 406.312 -> 406.608 ( +0.07%) [ +0.00% +0.10% +0.13% / +0.07% +0.43% +0.31%] index_select strided 5 : Elapsed 20.316 ms (406.312 ms / 20) 405.573 -> 403.323 ( -0.55%) [ +0.21% +0.00% +0.05% / +0.06% -0.55% -0.47%] index_select strided 7 : Elapsed 20.321 ms (406.418 ms / 20) 406.476 -> 404.827 ( -0.41%) [ +0.13% +0.17% +0.00% / +0.06% -0.29% -0.41%] index_select strided 8 : Elapsed 20.351 ms (407.014 ms / 20) 403.951 -> 404.341 ( +0.10%) [ +0.18% +0.00% +0.27% / +0.10% +0.43% +0.30%] index_select strided 16 : Elapsed 20.233 ms (404.659 ms / 20) 405.632 -> 405.218 ( -0.10%) [ +0.00% +0.11% +0.02% / +0.12% -0.10% -0.03%] index_select strided 64 : Elapsed 20.282 ms (405.632 ms / 20) 406.380 -> 404.783 ( -0.39%) [ +0.10% +0.00% +0.00% / -0.00% -0.39% -0.22%] index_select random : Elapsed 20.339 ms (406.776 ms / 20) 399.547 -> 399.381 ( -0.04%) [ +0.10% +0.00% +0.14% / +0.07% +0.01% -0.04%] index_select random_sorted : Elapsed 19.998 ms (399.951 ms / 20) 405.284 -> 405.315 ( +0.01%) [ +0.28% +0.26% +0.00% / +0.01% +0.21% +0.17%] index_select perm : Elapsed 20.321 ms (406.410 ms / 20) 402.843 -> 403.893 ( +0.26%) [ +0.00% +0.24% +0.14% / +0.34% +0.32% +0.26%] index_select perm_sorted : Elapsed 20.142 ms (402.843 ms / 20) out_shape = [100, 50, 40, 20] in_shape = [100, 256, 40, 20] idx_dim = 1 out_shape = [100, 256, 50, 20] in_shape = [100, 256, 40, 20] idx_dim = 2 B = [100, 256, 50, 20] (stride (50, 100000, 1, 5000)) A = [100, 256, 40, 20] (stride (800, 80000, 1, 40)) dim = 2 1018.547 -> 1016.798 ( -0.17%) [ +0.00% +0.01% +0.03% / -0.01% -0.11% -0.17%] index_add_ linear : Elapsed 50.927 ms (1018.547 ms / 20) 751.264 -> 750.284 ( -0.13%) [ +0.02% +0.00% +0.00% / -0.00% -0.08% -0.13%] index_copy_ linear : Elapsed 37.571 ms (751.423 ms / 20) 1018.835 -> 1016.876 ( -0.19%) [ +0.02% +0.01% +0.00% / -0.01% -0.19% -0.15%] index_add_ reverse : Elapsed 50.951 ms (1019.022 ms / 20) 750.978 -> 750.169 ( -0.11%) [ +0.00% +0.03% +0.01% / +0.07% -0.11% -0.09%] index_copy_ reverse : Elapsed 37.549 ms (750.978 ms / 20) 1018.420 -> 1016.578 ( -0.18%) [ +0.02% +0.01% +0.00% / -0.04% -0.09% -0.18%] index_add_ spread : Elapsed 50.931 ms (1018.615 ms / 20) 751.153 -> 750.200 ( -0.13%) [ +0.00% +0.02% +0.04% / +0.02% -0.13% -0.12%] index_copy_ spread : Elapsed 37.558 ms (751.153 ms / 20) 1017.975 -> 1016.806 ( -0.11%) [ +0.02% +0.00% +0.02% / +0.02% -0.09% -0.11%] index_add_ strided 3 : Elapsed 50.907 ms (1018.131 ms / 20) 751.125 -> 750.594 ( -0.07%) [ +0.03% +0.00% +0.02% / +0.03% -0.07% -0.07%] index_copy_ strided 3 : Elapsed 37.569 ms (751.374 ms / 20) 1017.850 -> 1016.913 ( -0.09%) [ +0.06% +0.00% +0.06% / -0.01% -0.06% -0.09%] index_add_ strided 7 : Elapsed 50.922 ms (1018.448 ms / 20) 750.908 -> 750.402 ( -0.07%) [ +0.00% +0.06% +0.07% / +0.04% -0.07% -0.03%] index_copy_ strided 7 : Elapsed 37.545 ms (750.908 ms / 20) 1016.895 -> 1017.196 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.06% +0.05% +0.03%] index_add_ perm : Elapsed 50.861 ms (1017.222 ms / 20) 749.761 -> 750.126 ( +0.05%) [ +0.00% +0.03% +0.03% / +0.07% +0.08% +0.05%] index_copy_ perm : Elapsed 37.488 ms (749.761 ms / 20) 1017.299 -> 1016.839 ( -0.05%) [ +0.04% +0.01% +0.00% / +0.01% -0.05% -0.01%] index_add_ perm_sorted : Elapsed 50.888 ms (1017.751 ms / 20) 749.713 -> 749.837 ( +0.02%) [ +0.03% +0.00% +0.01% / +0.02% +0.12% +0.15%] index_copy_ perm_sorted : Elapsed 37.496 ms (749.916 ms / 20) 934.682 -> 933.891 ( -0.08%) [ +0.00% +0.05% +0.01% / -0.04% -0.08% -0.05%] index_select const : Elapsed 46.734 ms (934.682 ms / 20) 938.365 -> 937.196 ( -0.12%) [ +0.01% +0.00% +0.03% / +0.01% -0.05% -0.12%] index_select wrap : Elapsed 46.925 ms (938.493 ms / 20) 937.688 -> 937.406 ( -0.03%) [ +0.07% +0.00% +0.03% / +0.03% -0.03% +0.00%] index_select linear : Elapsed 46.918 ms (938.366 ms / 20) 938.295 -> 937.668 ( -0.07%) [ +0.04% +0.00% +0.00% / -0.04% -0.07% -0.05%] index_select reverse : Elapsed 46.932 ms (938.648 ms / 20) 934.678 -> 934.006 ( -0.07%) [ +0.06% +0.04% +0.00% / +0.02% -0.07% -0.06%] index_select skip64 : Elapsed 46.764 ms (935.283 ms / 20) 933.003 -> 933.096 ( +0.01%) [ +0.01% +0.02% +0.00% / +0.01% +0.07% +0.05%] index_select skip256 : Elapsed 46.653 ms (933.064 ms / 20) 939.005 -> 937.640 ( -0.15%) [ +0.00% +0.01% +0.01% / -0.01% -0.06% -0.15%] index_select spread : Elapsed 46.950 ms (939.005 ms / 20) 939.028 -> 938.696 ( -0.04%) [ +0.05% +0.00% +0.05% / +0.06% -0.03% -0.04%] index_select strided 3 : Elapsed 46.973 ms (939.453 ms / 20) 939.161 -> 936.813 ( -0.25%) [ +0.03% +0.00% +0.01% / -0.02% -0.25% -0.21%] index_select strided 5 : Elapsed 46.973 ms (939.452 ms / 20) 938.900 -> 936.852 ( -0.22%) [ +0.00% +0.05% +0.00% / -0.00% -0.20% -0.22%] index_select strided 7 : Elapsed 46.945 ms (938.900 ms / 20) 939.240 -> 937.761 ( -0.16%) [ +0.02% +0.00% +0.00% / -0.03% -0.16% -0.12%] index_select strided 8 : Elapsed 46.974 ms (939.470 ms / 20) 938.994 -> 938.020 ( -0.10%) [ +0.05% +0.02% +0.00% / +0.01% -0.09% -0.10%] index_select strided 16 : Elapsed 46.971 ms (939.418 ms / 20) 939.056 -> 938.377 ( -0.07%) [ +0.00% +0.09% +0.04% / +0.03% -0.06% -0.07%] index_select random : Elapsed 46.953 ms (939.056 ms / 20) 939.175 -> 938.332 ( -0.09%) [ +0.00% +0.01% +0.00% / +0.04% -0.09% -0.07%] index_select random_sorted : Elapsed 46.961 ms (939.221 ms / 20) out_shape = [100, 256, 40, 50] in_shape = [100, 256, 40, 20] idx_dim = 3 out_shape = [50, 20, 40, 100] in_shape = [256, 20, 40, 100] idx_dim = 0 out_shape = [256, 50, 40, 100] in_shape = [256, 20, 40, 100] idx_dim = 1 B = [256, 50, 40, 100] (stride (40, 10240, 1, 512000)) A = [256, 20, 40, 100] (stride (80000, 1, 20, 800)) dim = 1 944.565 -> 943.242 ( -0.14%) [ +0.35% +0.00% +0.29% / -0.14% +0.39% +0.32%] index_add_ linear : Elapsed 47.394 ms (947.884 ms / 20) 730.136 -> 730.224 ( +0.01%) [ +0.00% +0.13% +0.00% / +0.01% +0.57% +0.49%] index_copy_ linear : Elapsed 36.507 ms (730.136 ms / 20) 943.574 -> 943.954 ( +0.04%) [ +0.21% +0.33% +0.00% / +0.05% +0.23% +0.04%] index_add_ reverse : Elapsed 47.277 ms (945.544 ms / 20) 733.406 -> 733.416 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.64% +0.58%] index_copy_ reverse : Elapsed 36.679 ms (733.582 ms / 20) 937.745 -> 914.098 ( -2.52%) [ +0.10% +0.00% +0.15% / +0.13% -2.52% -2.20%] index_add_ spread : Elapsed 46.935 ms (938.700 ms / 20) 734.884 -> 731.175 ( -0.50%) [ +0.00% +0.01% +0.08% / +0.09% -0.50% -0.42%] index_copy_ spread : Elapsed 36.744 ms (734.884 ms / 20) 951.281 -> 937.131 ( -1.49%) [ +0.09% +0.00% +0.11% / -0.05% -1.34% -1.49%] index_add_ strided 3 : Elapsed 47.608 ms (952.159 ms / 20) 732.357 -> 733.061 ( +0.10%) [ +0.00% +0.16% +0.08% / +0.10% +0.56% +0.34%] index_copy_ strided 3 : Elapsed 36.618 ms (732.357 ms / 20) 969.200 -> 970.110 ( +0.09%) [ +0.29% +0.15% +0.00% / +0.09% +0.40% +0.32%] index_add_ strided 7 : Elapsed 48.600 ms (972.003 ms / 20) 740.583 -> 738.307 ( -0.31%) [ +0.00% +0.01% +0.00% / -0.16% -0.31% -0.21%] index_copy_ strided 7 : Elapsed 37.030 ms (740.607 ms / 20) 967.195 -> 966.155 ( -0.11%) [ +0.06% +0.00% +0.05% / -0.11% +0.25% +0.02%] index_add_ perm : Elapsed 48.389 ms (967.780 ms / 20) 730.977 -> 730.805 ( -0.02%) [ +0.00% +0.22% +0.04% / -0.02% +1.10% +0.97%] index_copy_ perm : Elapsed 36.549 ms (730.977 ms / 20) 935.012 -> 931.141 ( -0.41%) [ +0.00% +0.03% +0.02% / -0.41% +0.79% +0.49%] index_add_ perm_sorted : Elapsed 46.751 ms (935.012 ms / 20) 726.932 -> 726.718 ( -0.03%) [ +0.28% +0.00% +0.15% / -0.03% +1.03% +1.00%] index_copy_ perm_sorted : Elapsed 36.448 ms (728.956 ms / 20) 1958.431 -> 1955.646 ( -0.14%) [ +0.09% +0.00% +0.04% / -0.14% +0.25% +0.39%] index_select const : Elapsed 98.005 ms (1960.110 ms / 20) 1958.713 -> 1959.458 ( +0.04%) [ +0.00% +0.03% +0.03% / +0.04% +0.46% +0.44%] index_select wrap : Elapsed 97.936 ms (1958.713 ms / 20) 1959.060 -> 1959.101 ( +0.00%) [ +0.04% +0.01% +0.00% / +0.00% +0.39% +0.46%] index_select linear : Elapsed 97.993 ms (1959.854 ms / 20) 1958.738 -> 1958.869 ( +0.01%) [ +0.07% +0.03% +0.00% / +0.01% +0.51% +0.41%] index_select reverse : Elapsed 98.004 ms (1960.089 ms / 20) 1956.958 -> 1957.788 ( +0.04%) [ +0.02% +0.05% +0.00% / +0.04% +0.47% +0.55%] index_select skip64 : Elapsed 97.869 ms (1957.370 ms / 20) 1949.978 -> 1948.327 ( -0.08%) [ +0.00% +0.02% +0.04% / -0.08% +0.83% +0.81%] index_select skip256 : Elapsed 97.499 ms (1949.978 ms / 20) 1958.024 -> 1958.277 ( +0.01%) [ +0.00% +0.04% +0.03% / +0.01% +0.44% +0.42%] index_select spread : Elapsed 97.901 ms (1958.024 ms / 20) 1957.543 -> 1959.058 ( +0.08%) [ +0.10% +0.00% +0.01% / +0.08% +0.46% +0.52%] index_select strided 3 : Elapsed 97.970 ms (1959.407 ms / 20) 1959.657 -> 1957.231 ( -0.12%) [ +0.01% +0.00% +0.02% / -0.08% +0.04% -0.12%] index_select strided 5 : Elapsed 97.991 ms (1959.825 ms / 20) 1958.095 -> 1958.603 ( +0.03%) [ +0.00% +0.01% +0.04% / +0.03% +0.07% +0.03%] index_select strided 7 : Elapsed 97.905 ms (1958.095 ms / 20) 1958.387 -> 1959.135 ( +0.04%) [ +0.05% +0.07% +0.00% / +0.04% +0.47% +0.47%] index_select strided 8 : Elapsed 97.973 ms (1959.450 ms / 20) 1959.196 -> 1958.537 ( -0.03%) [ +0.04% +0.00% +0.05% / -0.03% +0.38% +0.41%] index_select strided 16 : Elapsed 97.995 ms (1959.905 ms / 20) 1956.713 -> 1957.565 ( +0.04%) [ +0.10% +0.14% +0.00% / +0.04% +0.48% +0.54%] index_select random : Elapsed 97.937 ms (1958.734 ms / 20) 1957.344 -> 1958.179 ( +0.04%) [ +0.00% +0.10% +0.03% / +0.04% +0.51% +0.45%] index_select random_sorted : Elapsed 97.867 ms (1957.344 ms / 20) out_shape = [256, 20, 50, 100] in_shape = [256, 20, 40, 100] idx_dim = 2 B = [256, 20, 50, 100] (stride (100, 25600, 512000, 1)) A = [256, 20, 40, 100] (stride (80000, 4000, 1, 40)) dim = 2 308.815 -> 308.950 ( +0.04%) [ +0.10% +0.02% +0.00% / +0.04% +0.06% +0.09%] index_add_ linear : Elapsed 15.456 ms (309.129 ms / 20) 296.616 -> 296.849 ( +0.08%) [ +0.16% +0.00% +0.04% / +0.08% +0.16% +0.15%] index_copy_ linear : Elapsed 14.854 ms (297.082 ms / 20) 308.902 -> 308.798 ( -0.03%) [ +0.00% +0.09% +0.06% / -0.02% +0.01% -0.03%] index_add_ reverse : Elapsed 15.445 ms (308.902 ms / 20) 296.791 -> 296.788 ( -0.00%) [ +0.00% +0.06% +0.06% / -0.00% +0.06% +0.02%] index_copy_ reverse : Elapsed 14.840 ms (296.791 ms / 20) 308.836 -> 309.001 ( +0.05%) [ +0.10% +0.03% +0.00% / +0.05% +0.07% +0.07%] index_add_ spread : Elapsed 15.457 ms (309.141 ms / 20) 296.696 -> 296.800 ( +0.04%) [ +0.10% +0.02% +0.00% / +0.04% +0.07% +0.08%] index_copy_ spread : Elapsed 14.850 ms (296.992 ms / 20) 308.661 -> 308.760 ( +0.03%) [ +0.10% +0.01% +0.00% / +0.03% +0.10% +0.10%] index_add_ strided 3 : Elapsed 15.449 ms (308.971 ms / 20) 296.616 -> 296.674 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.07% +0.14%] index_copy_ strided 3 : Elapsed 14.841 ms (296.821 ms / 20) 308.777 -> 308.711 ( -0.02%) [ +0.04% +0.07% +0.00% / +0.06% +0.08% -0.02%] index_add_ strided 7 : Elapsed 15.445 ms (308.908 ms / 20) 296.736 -> 296.671 ( -0.02%) [ +0.01% +0.00% +0.03% / +0.04% +0.10% -0.02%] index_copy_ strided 7 : Elapsed 14.839 ms (296.773 ms / 20) 308.727 -> 308.862 ( +0.04%) [ +0.00% +0.06% +0.01% / +0.04% +0.11% +0.10%] index_add_ perm : Elapsed 15.436 ms (308.727 ms / 20) 296.626 -> 296.832 ( +0.07%) [ +0.05% +0.04% +0.00% / +0.07% +0.12% +0.13%] index_copy_ perm : Elapsed 14.839 ms (296.784 ms / 20) 308.803 -> 308.693 ( -0.04%) [ +0.12% +0.09% +0.00% / +0.06% -0.04% +0.06%] index_add_ perm_sorted : Elapsed 15.459 ms (309.187 ms / 20) 296.726 -> 296.715 ( -0.00%) [ +0.06% +0.08% +0.00% / +0.08% -0.00% +0.10%] index_copy_ perm_sorted : Elapsed 14.845 ms (296.903 ms / 20) 370.450 -> 370.728 ( +0.08%) [ +0.06% +0.00% +0.07% / +0.08% +0.10% +0.09%] index_select const : Elapsed 18.533 ms (370.657 ms / 20) 370.982 -> 371.058 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.06% +0.05% +0.02%] index_select wrap : Elapsed 18.549 ms (370.982 ms / 20) 370.861 -> 371.209 ( +0.09%) [ +0.00% +0.06% +0.13% / +0.09% +0.12% +0.12%] index_select linear : Elapsed 18.543 ms (370.861 ms / 20) 370.789 -> 371.124 ( +0.09%) [ +0.00% +0.02% +0.07% / +0.09% +0.10% +0.10%] index_select reverse : Elapsed 18.539 ms (370.789 ms / 20) 370.595 -> 370.696 ( +0.03%) [ +0.00% +0.07% +0.04% / +0.08% +0.03% +0.10%] index_select skip64 : Elapsed 18.530 ms (370.595 ms / 20) 370.565 -> 370.643 ( +0.02%) [ +0.00% +0.11% +0.08% / +0.05% +0.08% +0.02%] index_select skip256 : Elapsed 18.528 ms (370.565 ms / 20) 371.022 -> 371.031 ( +0.00%) [ +0.04% +0.02% +0.00% / +0.08% +0.07% +0.00%] index_select spread : Elapsed 18.559 ms (371.180 ms / 20) 371.113 -> 371.015 ( -0.03%) [ +0.00% +0.05% +0.01% / +0.00% -0.03% +0.04%] index_select strided 3 : Elapsed 18.556 ms (371.113 ms / 20) 371.070 -> 370.800 ( -0.07%) [ +0.00% +0.02% +0.02% / -0.07% +0.02% +0.03%] index_select strided 5 : Elapsed 18.553 ms (371.070 ms / 20) 371.081 -> 371.165 ( +0.02%) [ +0.05% +0.00% +0.04% / +0.03% +0.02% +0.03%] index_select strided 7 : Elapsed 18.564 ms (371.285 ms / 20) 371.190 -> 371.117 ( -0.02%) [ +0.00% +0.00% +0.03% / +0.03% -0.02% +0.06%] index_select strided 8 : Elapsed 18.560 ms (371.190 ms / 20) 370.922 -> 371.322 ( +0.11%) [ +0.00% +0.06% +0.11% / +0.16% +0.14% +0.11%] index_select strided 16 : Elapsed 18.546 ms (370.922 ms / 20) 371.228 -> 371.126 ( -0.03%) [ +0.00% +0.01% +0.03% / -0.03% +0.04% +0.00%] index_select random : Elapsed 18.561 ms (371.228 ms / 20) 370.934 -> 371.042 ( +0.03%) [ +0.00% +0.04% +0.03% / +0.06% +0.09% +0.03%] index_select random_sorted : Elapsed 18.547 ms (370.934 ms / 20) out_shape = [256, 20, 40, 50] in_shape = [256, 20, 40, 100] idx_dim = 3 out_shape = [50, 20, 100, 40] in_shape = [256, 20, 100, 40] idx_dim = 0 B = [50, 20, 100, 40] (stride (1, 5000, 50, 100000)) A = [256, 20, 100, 40] (stride (4000, 1024000, 1, 100)) dim = 0 119.818 -> 119.894 ( +0.06%) [ +0.11% +0.24% +0.00% / +0.06% +1.67% +1.60%] index_select const : Elapsed 5.997 ms (119.949 ms / 20) 130.433 -> 130.506 ( +0.06%) [ +0.00% +0.18% +0.03% / +0.13% +0.10% +0.06%] index_select wrap : Elapsed 6.522 ms (130.433 ms / 20) 130.492 -> 130.567 ( +0.06%) [ +0.21% +0.00% +0.13% / +0.06% +0.07% +0.12%] index_select linear : Elapsed 6.538 ms (130.767 ms / 20) 130.076 -> 130.226 ( +0.12%) [ +0.21% +0.28% +0.00% / +0.12% +0.43% +0.28%] index_select reverse : Elapsed 6.518 ms (130.353 ms / 20) 119.899 -> 119.857 ( -0.04%) [ +0.03% +0.00% +0.03% / -0.04% +1.35% +1.39%] index_select skip64 : Elapsed 5.997 ms (119.940 ms / 20) 119.746 -> 120.007 ( +0.22%) [ +0.10% +0.00% +0.15% / +0.22% +1.66% +1.59%] index_select skip256 : Elapsed 5.993 ms (119.864 ms / 20) 131.244 -> 131.289 ( +0.03%) [ +0.00% +0.04% +0.01% / +0.03% +0.67% +0.99%] index_select spread : Elapsed 6.562 ms (131.244 ms / 20) 131.684 -> 132.096 ( +0.31%) [ +0.36% +0.14% +0.00% / +0.31% +0.55% +0.35%] index_select strided 3 : Elapsed 6.608 ms (132.160 ms / 20) 132.349 -> 131.901 ( -0.34%) [ +0.00% +0.10% +0.10% / -0.02% -0.34% -0.19%] index_select strided 5 : Elapsed 6.617 ms (132.349 ms / 20) 132.270 -> 131.599 ( -0.51%) [ +0.00% +0.02% +0.23% / +0.33% -0.51% -0.49%] index_select strided 7 : Elapsed 6.613 ms (132.270 ms / 20) 132.338 -> 131.393 ( -0.71%) [ +0.00% +0.05% +0.26% / +0.16% -0.71% -0.71%] index_select strided 8 : Elapsed 6.617 ms (132.338 ms / 20) 132.102 -> 132.391 ( +0.22%) [ +0.01% +0.00% +0.18% / +0.22% +0.30% +0.41%] index_select strided 16 : Elapsed 6.606 ms (132.111 ms / 20) 131.500 -> 131.256 ( -0.19%) [ +0.04% +0.00% +0.15% / -0.19% +0.72% +0.68%] index_select strided 64 : Elapsed 6.578 ms (131.559 ms / 20) 131.982 -> 132.044 ( +0.05%) [ +0.00% +0.06% +0.05% / +0.08% +0.10% +0.05%] index_select strided 100 : Elapsed 6.599 ms (131.982 ms / 20) 131.088 -> 131.089 ( +0.00%) [ +0.26% +0.00% +0.06% / +0.22% +0.00% +0.09%] index_select strided 255 : Elapsed 6.571 ms (131.425 ms / 20) 131.380 -> 131.617 ( +0.18%) [ +0.00% +0.21% +0.21% / +0.18% +1.00% +0.96%] index_select random : Elapsed 6.569 ms (131.380 ms / 20) 130.940 -> 131.343 ( +0.31%) [ +0.16% +0.00% +0.24% / +0.31% +0.71% +0.55%] index_select random_sorted : Elapsed 6.557 ms (131.148 ms / 20) 132.017 -> 132.065 ( +0.04%) [ +0.00% +0.24% +0.31% / +0.11% +0.06% +0.04%] index_select perm : Elapsed 6.601 ms (132.017 ms / 20) 131.497 -> 131.834 ( +0.26%) [ +0.12% +0.00% +0.10% / +0.31% +0.26% +0.59%] index_select perm_sorted : Elapsed 6.583 ms (131.660 ms / 20) out_shape = [256, 50, 100, 40] in_shape = [256, 20, 100, 40] idx_dim = 1 out_shape = [256, 20, 50, 40] in_shape = [256, 20, 100, 40] idx_dim = 2 out_shape = [256, 20, 100, 50] in_shape = [256, 20, 100, 40] idx_dim = 3 out_shape = [50, 40, 20, 100] in_shape = [256, 40, 20, 100] idx_dim = 0 out_shape = [256, 50, 20, 100] in_shape = [256, 40, 20, 100] idx_dim = 1 out_shape = [256, 40, 50, 100] in_shape = [256, 40, 20, 100] idx_dim = 2 out_shape = [256, 40, 20, 50] in_shape = [256, 40, 20, 100] idx_dim = 3 out_shape = [50, 40, 100, 20] in_shape = [256, 40, 100, 20] idx_dim = 0 out_shape = [256, 50, 100, 20] in_shape = [256, 40, 100, 20] idx_dim = 1 out_shape = [256, 40, 50, 20] in_shape = [256, 40, 100, 20] idx_dim = 2 B = [256, 40, 50, 20] (stride (1, 256000, 5120, 256)) A = [256, 40, 100, 20] (stride (80000, 100, 1, 4000)) dim = 2 319.744 -> 321.151 ( +0.44%) [ +0.00% +0.61% +0.12% / +0.69% +0.44% +0.60%] index_select const : Elapsed 15.987 ms (319.744 ms / 20) 318.721 -> 319.998 ( +0.40%) [ +0.76% +0.00% +0.43% / +0.40% +0.51% +0.74%] index_select wrap : Elapsed 16.057 ms (321.147 ms / 20) 318.614 -> 320.303 ( +0.53%) [ +0.37% +0.00% +0.23% / +0.59% +0.87% +0.53%] index_select linear : Elapsed 15.990 ms (319.798 ms / 20) 319.301 -> 320.721 ( +0.44%) [ +0.00% +0.39% +0.05% / +0.54% +0.44% +0.48%] index_select reverse : Elapsed 15.965 ms (319.301 ms / 20) 319.632 -> 319.611 ( -0.01%) [ +0.11% +0.00% +0.13% / -0.01% +0.57% +0.74%] index_select skip64 : Elapsed 15.999 ms (319.986 ms / 20) 319.207 -> 318.645 ( -0.18%) [ +0.00% +0.63% +0.33% / -0.18% +0.83% +0.62%] index_select skip256 : Elapsed 15.960 ms (319.207 ms / 20) 318.814 -> 320.336 ( +0.48%) [ +0.16% +0.03% +0.00% / +0.48% +0.58% +0.61%] index_select spread : Elapsed 15.966 ms (319.312 ms / 20) 318.667 -> 319.790 ( +0.35%) [ +0.21% +0.00% +0.17% / +0.35% +0.70% +0.53%] index_select strided 3 : Elapsed 15.966 ms (319.329 ms / 20) 319.085 -> 320.214 ( +0.35%) [ +0.00% +0.20% +0.28% / +0.37% +0.35% +0.67%] index_select strided 5 : Elapsed 15.954 ms (319.085 ms / 20) 318.815 -> 318.951 ( +0.04%) [ +0.60% +0.00% +0.10% / +0.22% +0.16% +0.04%] index_select strided 7 : Elapsed 16.037 ms (320.739 ms / 20) 319.047 -> 319.078 ( +0.01%) [ +0.00% +0.24% +0.13% / +0.01% +0.33% +0.16%] index_select strided 8 : Elapsed 15.952 ms (319.047 ms / 20) 319.700 -> 319.579 ( -0.04%) [ +0.00% +0.04% +0.12% / -0.04% +0.15% +0.49%] index_select strided 16 : Elapsed 15.985 ms (319.700 ms / 20) 318.176 -> 319.585 ( +0.44%) [ +0.64% +0.00% +0.66% / +0.44% +0.78% +0.84%] index_select strided 64 : Elapsed 16.011 ms (320.221 ms / 20) 319.377 -> 320.279 ( +0.28%) [ +0.48% +1.01% +0.00% / +0.28% +0.69% +0.40%] index_select random : Elapsed 16.045 ms (320.902 ms / 20) 319.633 -> 319.810 ( +0.06%) [ +0.23% +0.27% +0.00% / +0.06% +0.41% +0.41%] index_select random_sorted : Elapsed 16.019 ms (320.377 ms / 20) 318.428 -> 318.563 ( +0.04%) [ +0.38% +0.19% +0.00% / +0.04% +0.72% +0.98%] index_select perm : Elapsed 15.982 ms (319.647 ms / 20) 318.969 -> 319.214 ( +0.08%) [ +0.47% +0.00% +0.49% / +0.08% +0.54% +0.70%] index_select perm_sorted : Elapsed 16.024 ms (320.478 ms / 20) out_shape = [256, 40, 100, 50] in_shape = [256, 40, 100, 20] idx_dim = 3 out_shape = [50, 100, 20, 40] in_shape = [256, 100, 20, 40] idx_dim = 0 B = [50, 100, 20, 40] (stride (80000, 800, 40, 1)) dim = 0 fill_cnt = 256 10.845 -> 10.885 ( +0.37%) [ +0.12% +0.12% +0.00% / +0.37% +0.70% +0.75%] index_fill_ const : Elapsed 0.543 ms (10.858 ms / 20) 10.928 -> 10.963 ( +0.32%) [ +0.10% +0.09% +0.00% / +0.32% +0.74% +0.80%] index_fill_ linear : Elapsed 0.547 ms (10.939 ms / 20) 10.928 -> 10.975 ( +0.43%) [ +0.08% +0.15% +0.00% / +0.43% +0.79% +0.81%] index_fill_ reverse : Elapsed 0.547 ms (10.937 ms / 20) 10.836 -> 10.867 ( +0.29%) [ +0.18% +0.12% +0.00% / +0.29% +0.78% +0.74%] index_fill_ skip64 : Elapsed 0.543 ms (10.855 ms / 20) 10.830 -> 10.868 ( +0.35%) [ +0.12% +0.06% +0.00% / +0.35% +0.82% +0.78%] index_fill_ skip256 : Elapsed 0.542 ms (10.843 ms / 20) 10.981 -> 11.020 ( +0.36%) [ +0.00% +0.07% +0.02% / +0.36% +0.92% +0.82%] index_fill_ spread : Elapsed 0.549 ms (10.981 ms / 20) 11.264 -> 11.300 ( +0.32%) [ +0.09% +0.14% +0.00% / +0.32% +0.81% +0.89%] index_fill_ strided 3 : Elapsed 0.564 ms (11.274 ms / 20) 11.351 -> 11.381 ( +0.26%) [ +0.00% +0.06% +0.01% / +0.26% +0.75% +0.73%] index_fill_ strided 5 : Elapsed 0.568 ms (11.351 ms / 20) 11.356 -> 11.402 ( +0.41%) [ +0.20% +0.20% +0.00% / +0.41% +0.94% +0.79%] index_fill_ strided 7 : Elapsed 0.569 ms (11.379 ms / 20) 11.330 -> 11.350 ( +0.18%) [ +0.07% +0.07% +0.00% / +0.18% +0.67% +0.64%] index_fill_ strided 8 : Elapsed 0.567 ms (11.338 ms / 20) 11.267 -> 11.321 ( +0.48%) [ +0.00% +0.11% +0.03% / +0.48% +0.85% +0.76%] index_fill_ strided 16 : Elapsed 0.563 ms (11.267 ms / 20) 11.347 -> 11.377 ( +0.26%) [ +0.02% +0.05% +0.00% / +0.26% +0.77% +0.65%] index_fill_ random : Elapsed 0.567 ms (11.349 ms / 20) 10.981 -> 11.017 ( +0.33%) [ +0.16% +0.15% +0.00% / +0.33% +0.79% +0.77%] index_fill_ random_sorted : Elapsed 0.550 ms (10.999 ms / 20) out_shape = [256, 50, 20, 40] in_shape = [256, 100, 20, 40] idx_dim = 1 out_shape = [256, 100, 50, 40] in_shape = [256, 100, 20, 40] idx_dim = 2 out_shape = [256, 100, 20, 50] in_shape = [256, 100, 20, 40] idx_dim = 3 out_shape = [50, 100, 40, 20] in_shape = [256, 100, 40, 20] idx_dim = 0 B = [50, 100, 40, 20] (stride (100, 1, 100000, 5000)) A = [256, 100, 40, 20] (stride (40, 204800, 1, 10240)) dim = 0 81.428 -> 80.720 ( -0.87%) [ +0.00% +0.32% +0.23% / +0.08% -0.87% -0.79%] index_select const : Elapsed 4.071 ms (81.428 ms / 20) 87.451 -> 88.060 ( +0.70%) [ +1.33% +0.81% +0.00% / +0.70% +6.51% +5.97%] index_select wrap : Elapsed 4.431 ms (88.613 ms / 20) 87.559 -> 87.848 ( +0.33%) [ +0.00% +0.17% +0.37% / +0.33% +6.12% +6.13%] index_select linear : Elapsed 4.378 ms (87.559 ms / 20) 88.195 -> 88.968 ( +0.88%) [ +0.00% +0.03% +1.23% / +0.88% +4.87% +5.50%] index_select reverse : Elapsed 4.410 ms (88.195 ms / 20) 81.458 -> 81.005 ( -0.56%) [ +0.52% +0.25% +0.00% / +0.48% -0.56% -0.55%] index_select skip64 : Elapsed 4.094 ms (81.883 ms / 20) 81.284 -> 80.693 ( -0.73%) [ +0.00% +0.18% +0.06% / +0.60% -0.73% -0.13%] index_select skip256 : Elapsed 4.064 ms (81.284 ms / 20) 90.726 -> 90.805 ( +0.09%) [ +0.00% +0.08% +0.20% / +0.09% +2.22% +1.70%] index_select spread : Elapsed 4.536 ms (90.726 ms / 20) 89.950 -> 90.122 ( +0.19%) [ +0.47% +0.00% +0.25% / +0.19% +3.68% +3.76%] index_select strided 3 : Elapsed 4.519 ms (90.376 ms / 20) 90.863 -> 91.071 ( +0.23%) [ +0.00% +0.78% +0.88% / +0.23% +2.09% +2.12%] index_select strided 5 : Elapsed 4.543 ms (90.863 ms / 20) 91.558 -> 91.277 ( -0.31%) [ +0.19% +0.77% +0.00% / -0.31% +1.01% +1.23%] index_select strided 7 : Elapsed 4.586 ms (91.730 ms / 20) 91.555 -> 91.170 ( -0.42%) [ +0.33% +0.00% +0.09% / -0.42% +2.70% +2.29%] index_select strided 8 : Elapsed 4.593 ms (91.859 ms / 20) 91.566 -> 92.528 ( +1.05%) [ +0.00% +0.74% +0.94% / +1.05% +2.08% +1.72%] index_select strided 16 : Elapsed 4.578 ms (91.566 ms / 20) 93.779 -> 90.024 ( -4.00%) [ +0.00% +0.56% +0.33% / +0.22% -3.65% -4.00%] index_select strided 64 : Elapsed 4.689 ms (93.779 ms / 20) 91.400 -> 91.496 ( +0.11%) [ +0.31% +0.08% +0.00% / +0.11% +4.04% +4.39%] index_select strided 100 : Elapsed 4.584 ms (91.682 ms / 20) 89.550 -> 89.665 ( +0.13%) [ +0.35% +0.00% +0.17% / +0.13% +0.82% +0.52%] index_select strided 255 : Elapsed 4.493 ms (89.863 ms / 20) 90.526 -> 90.351 ( -0.19%) [ +0.43% +0.10% +0.00% / -0.19% +4.52% +4.53%] index_select random : Elapsed 4.546 ms (90.915 ms / 20) 90.575 -> 90.230 ( -0.38%) [ +0.22% +0.00% +0.57% / -0.38% +3.21% +2.78%] index_select random_sorted : Elapsed 4.539 ms (90.772 ms / 20) 91.751 -> 91.609 ( -0.15%) [ +0.00% +0.25% +0.21% / -0.06% -0.02% -0.15%] index_select perm : Elapsed 4.588 ms (91.751 ms / 20) 90.808 -> 90.491 ( -0.35%) [ +0.09% +0.00% +0.27% / -0.35% +1.85% +1.63%] index_select perm_sorted : Elapsed 4.545 ms (90.891 ms / 20) out_shape = [256, 50, 40, 20] in_shape = [256, 100, 40, 20] idx_dim = 1 out_shape = [256, 100, 50, 20] in_shape = [256, 100, 40, 20] idx_dim = 2 out_shape = [256, 100, 40, 50] in_shape = [256, 100, 40, 20] idx_dim = 3 out_shape = [100, 40, 50, 256] in_shape = [20, 40, 50, 256] idx_dim = 0 out_shape = [20, 100, 50, 256] in_shape = [20, 40, 50, 256] idx_dim = 1 out_shape = [20, 40, 100, 256] in_shape = [20, 40, 50, 256] idx_dim = 2 B = [20, 40, 100, 256] (stride (1024000, 1, 40, 4000)) A = [20, 40, 50, 256] (stride (12800, 256000, 1, 50)) dim = 2 374.429 -> 371.919 ( -0.67%) [ +0.51% +0.09% +0.00% / +0.25% -0.67% -0.43%] index_add_ linear : Elapsed 18.817 ms (376.336 ms / 20) 296.481 -> 293.558 ( -0.99%) [ +0.17% +0.02% +0.00% / +0.07% -0.80% -0.99%] index_copy_ linear : Elapsed 14.849 ms (296.979 ms / 20) 367.529 -> 355.281 ( -3.33%) [ +0.36% +0.00% +0.21% / +0.39% -3.00% -3.33%] index_add_ reverse : Elapsed 18.442 ms (368.847 ms / 20) 294.584 -> 292.028 ( -0.87%) [ +0.00% +0.94% +0.52% / +0.69% -0.26% -0.87%] index_copy_ reverse : Elapsed 14.729 ms (294.584 ms / 20) 381.304 -> 376.231 ( -1.33%) [ +0.00% +0.45% +0.72% / -0.00% -1.33% -0.65%] index_add_ spread : Elapsed 19.065 ms (381.304 ms / 20) 304.137 -> 302.605 ( -0.50%) [ +0.07% +0.31% +0.00% / +0.09% -0.50% +0.28%] index_copy_ spread : Elapsed 15.217 ms (304.341 ms / 20) 386.800 -> 380.526 ( -1.62%) [ +0.00% +0.27% +0.00% / -0.53% -1.04% -1.62%] index_add_ strided 3 : Elapsed 19.340 ms (386.804 ms / 20) 306.244 -> 305.286 ( -0.31%) [ +0.13% +0.00% +0.15% / -0.31% -0.17% -0.02%] index_copy_ strided 3 : Elapsed 15.333 ms (306.653 ms / 20) 396.718 -> 394.361 ( -0.59%) [ +0.00% +0.15% +0.35% / +0.34% -0.59% -0.28%] index_add_ strided 7 : Elapsed 19.836 ms (396.718 ms / 20) 310.907 -> 310.345 ( -0.18%) [ +0.11% +0.13% +0.00% / -0.18% +0.34% +0.32%] index_copy_ strided 7 : Elapsed 15.562 ms (311.237 ms / 20) 390.245 -> 392.889 ( +0.68%) [ +0.73% +0.00% +0.59% / +0.68% +1.07% +0.84%] index_add_ perm : Elapsed 19.655 ms (393.101 ms / 20) 307.270 -> 307.658 ( +0.13%) [ +0.40% +0.00% +0.04% / +0.13% +0.85% +0.79%] index_copy_ perm : Elapsed 15.425 ms (308.508 ms / 20) 376.690 -> 367.706 ( -2.38%) [ +0.00% +0.35% +0.43% / +0.72% -2.38% -1.91%] index_add_ perm_sorted : Elapsed 18.835 ms (376.690 ms / 20) 299.715 -> 298.947 ( -0.26%) [ +0.00% +0.41% +0.05% / -0.26% +0.50% -0.19%] index_copy_ perm_sorted : Elapsed 14.986 ms (299.715 ms / 20) 634.544 -> 632.961 ( -0.25%) [ +0.07% +0.18% +0.00% / +0.08% -0.25% -0.22%] index_select const : Elapsed 31.749 ms (634.982 ms / 20) 634.709 -> 633.538 ( -0.18%) [ +0.00% +0.18% +0.05% / +0.09% -0.18% +0.04%] index_select wrap : Elapsed 31.735 ms (634.709 ms / 20) 632.610 -> 632.553 ( -0.01%) [ +0.25% +0.30% +0.00% / +0.10% -0.01% +0.07%] index_select linear : Elapsed 31.709 ms (634.182 ms / 20) 633.046 -> 631.355 ( -0.27%) [ +0.26% +0.00% +0.22% / +0.19% -0.19% -0.27%] index_select reverse : Elapsed 31.734 ms (634.672 ms / 20) 633.290 -> 630.335 ( -0.47%) [ +0.00% +0.20% +0.12% / +0.28% -0.45% -0.47%] index_select skip64 : Elapsed 31.664 ms (633.290 ms / 20) 632.840 -> 632.504 ( -0.05%) [ +0.00% +0.07% +0.09% / +0.14% +0.00% -0.05%] index_select skip256 : Elapsed 31.642 ms (632.840 ms / 20) 632.710 -> 633.542 ( +0.13%) [ +0.00% +0.47% +0.37% / +0.32% +0.17% +0.13%] index_select spread : Elapsed 31.636 ms (632.710 ms / 20) 630.718 -> 632.511 ( +0.28%) [ +0.16% +0.00% +0.13% / +0.28% +0.29% +0.48%] index_select strided 3 : Elapsed 31.586 ms (631.724 ms / 20) 634.633 -> 633.102 ( -0.24%) [ +0.01% +0.00% +0.02% / +0.20% -0.24% +0.03%] index_select strided 5 : Elapsed 31.735 ms (634.693 ms / 20) 633.831 -> 628.601 ( -0.83%) [ +0.04% +0.09% +0.00% / +0.36% -0.83% -0.61%] index_select strided 7 : Elapsed 31.704 ms (634.088 ms / 20) 634.733 -> 631.738 ( -0.47%) [ +0.03% +0.00% +0.03% / +0.19% -0.16% -0.47%] index_select strided 8 : Elapsed 31.745 ms (634.904 ms / 20) 634.938 -> 633.199 ( -0.27%) [ +0.08% +0.00% +0.14% / +0.19% -0.16% -0.27%] index_select strided 16 : Elapsed 31.772 ms (635.433 ms / 20) 633.030 -> 633.323 ( +0.05%) [ +0.00% +0.09% +0.12% / +0.05% +0.14% +0.20%] index_select random : Elapsed 31.652 ms (633.030 ms / 20) 634.721 -> 633.056 ( -0.26%) [ +0.11% +0.00% +0.05% / +0.03% -0.24% -0.26%] index_select random_sorted : Elapsed 31.770 ms (635.408 ms / 20) out_shape = [20, 40, 50, 100] in_shape = [20, 40, 50, 256] idx_dim = 3 out_shape = [100, 40, 256, 50] in_shape = [20, 40, 256, 50] idx_dim = 0 out_shape = [20, 100, 256, 50] in_shape = [20, 40, 256, 50] idx_dim = 1 B = [20, 100, 256, 50] (stride (1280000, 1, 100, 25600)) dim = 1 fill_cnt = 40 185.897 -> 185.988 ( +0.05%) [ +0.01% +0.00% +0.07% / +0.05% +0.77% +0.79%] index_fill_ const : Elapsed 9.296 ms (185.914 ms / 20) 185.812 -> 185.676 ( -0.07%) [ +0.01% +0.00% +0.05% / +0.07% -0.07% -0.05%] index_fill_ linear : Elapsed 9.292 ms (185.837 ms / 20) 186.505 -> 186.310 ( -0.10%) [ +0.00% +0.06% +0.11% / +0.09% -0.10% -0.08%] index_fill_ reverse : Elapsed 9.325 ms (186.505 ms / 20) 186.216 -> 186.286 ( +0.04%) [ +0.00% +0.01% +0.07% / +0.04% +0.07% +0.07%] index_fill_ skip64 : Elapsed 9.311 ms (186.216 ms / 20) 186.462 -> 186.667 ( +0.11%) [ +0.04% +0.00% +0.08% / +0.13% +0.12% +0.11%] index_fill_ skip256 : Elapsed 9.327 ms (186.532 ms / 20) 187.063 -> 186.839 ( -0.12%) [ +0.00% +0.01% +0.01% / +0.03% -0.12% -0.11%] index_fill_ spread : Elapsed 9.353 ms (187.063 ms / 20) 187.237 -> 186.963 ( -0.15%) [ +0.01% +0.00% +0.05% / +0.09% -0.09% -0.15%] index_fill_ strided 3 : Elapsed 9.363 ms (187.261 ms / 20) 186.495 -> 186.758 ( +0.14%) [ +0.06% +0.00% +0.16% / +0.14% +0.23% +0.18%] index_fill_ strided 5 : Elapsed 9.330 ms (186.606 ms / 20) 185.937 -> 186.095 ( +0.08%) [ +0.00% +0.09% +0.07% / +0.08% +0.10% +0.09%] index_fill_ strided 7 : Elapsed 9.297 ms (185.937 ms / 20) 187.280 -> 187.335 ( +0.03%) [ +0.00% +0.02% +0.03% / +0.03% +0.26% +0.27%] index_fill_ strided 8 : Elapsed 9.364 ms (187.280 ms / 20) 187.142 -> 187.153 ( +0.01%) [ +0.00% +0.02% +0.04% / +0.01% +0.27% +0.23%] index_fill_ strided 16 : Elapsed 9.357 ms (187.142 ms / 20) 186.676 -> 186.723 ( +0.03%) [ +0.05% +0.00% +0.05% / +0.04% +0.04% +0.03%] index_fill_ strided 64 : Elapsed 9.338 ms (186.765 ms / 20) 187.092 -> 187.032 ( -0.03%) [ +0.00% +0.01% +0.01% / +0.03% +0.01% -0.03%] index_fill_ random : Elapsed 9.355 ms (187.092 ms / 20) 186.907 -> 187.075 ( +0.09%) [ +0.00% +0.00% +0.02% / +0.09% +0.11% +0.17%] index_fill_ random_sorted : Elapsed 9.345 ms (186.907 ms / 20) 187.415 -> 187.436 ( +0.01%) [ +0.02% +0.00% +0.02% / +0.01% +0.01% +0.02%] index_fill_ perm : Elapsed 9.373 ms (187.460 ms / 20) 185.947 -> 185.686 ( -0.14%) [ +0.05% +0.00% +0.00% / +0.09% -0.14% -0.13%] index_fill_ perm_sorted : Elapsed 9.302 ms (186.039 ms / 20) out_shape = [20, 40, 100, 50] in_shape = [20, 40, 256, 50] idx_dim = 2 out_shape = [20, 40, 256, 100] in_shape = [20, 40, 256, 50] idx_dim = 3 out_shape = [100, 50, 40, 256] in_shape = [20, 50, 40, 256] idx_dim = 0 out_shape = [20, 100, 40, 256] in_shape = [20, 50, 40, 256] idx_dim = 1 out_shape = [20, 50, 100, 256] in_shape = [20, 50, 40, 256] idx_dim = 2 out_shape = [20, 50, 40, 100] in_shape = [20, 50, 40, 256] idx_dim = 3 B = [20, 50, 40, 100] (stride (40, 80000, 1, 800)) dim = 3 fill_cnt = 256 20.454 -> 20.405 ( -0.24%) [ +0.10% +0.11% +0.00% / -0.24% +0.05% +0.06%] index_fill_ const : Elapsed 1.024 ms (20.474 ms / 20) 21.642 -> 21.561 ( -0.37%) [ +0.12% +0.00% +0.03% / -0.33% -0.35% -0.37%] index_fill_ linear : Elapsed 1.083 ms (21.669 ms / 20) 21.776 -> 21.662 ( -0.52%) [ +0.08% +0.00% +0.07% / -0.24% -0.42% -0.52%] index_fill_ reverse : Elapsed 1.090 ms (21.794 ms / 20) 20.527 -> 20.459 ( -0.33%) [ +0.20% +0.11% +0.00% / -0.33% -0.22% -0.20%] index_fill_ skip64 : Elapsed 1.028 ms (20.569 ms / 20) 20.448 -> 20.386 ( -0.30%) [ +0.08% +0.13% +0.00% / -0.30% +0.13% +0.04%] index_fill_ skip256 : Elapsed 1.023 ms (20.465 ms / 20) 21.279 -> 21.214 ( -0.31%) [ +0.17% +0.15% +0.00% / -0.31% +0.13% +0.20%] index_fill_ spread : Elapsed 1.066 ms (21.315 ms / 20) 24.865 -> 24.851 ( -0.06%) [ +0.06% +0.00% +0.07% / -0.06% +0.40% +0.18%] index_fill_ strided 3 : Elapsed 1.244 ms (24.881 ms / 20) 24.353 -> 24.282 ( -0.29%) [ +0.00% +0.09% +0.13% / -0.29% +0.16% +0.18%] index_fill_ strided 5 : Elapsed 1.218 ms (24.353 ms / 20) 24.930 -> 24.881 ( -0.20%) [ +0.10% +0.11% +0.00% / -0.20% +0.10% +0.12%] index_fill_ strided 7 : Elapsed 1.248 ms (24.956 ms / 20) 24.758 -> 24.702 ( -0.23%) [ +0.29% +0.00% +0.33% / -0.23% +0.21% +0.65%] index_fill_ strided 8 : Elapsed 1.241 ms (24.830 ms / 20) 24.720 -> 24.705 ( -0.06%) [ +0.22% +0.15% +0.00% / -0.06% +0.45% +0.19%] index_fill_ strided 16 : Elapsed 1.239 ms (24.774 ms / 20) 24.688 -> 24.628 ( -0.24%) [ +0.00% +0.00% +0.10% / -0.24% +0.28% +0.32%] index_fill_ strided 64 : Elapsed 1.234 ms (24.689 ms / 20) 24.498 -> 24.510 ( +0.05%) [ +0.06% +0.26% +0.00% / +0.05% +0.29% +0.45%] index_fill_ random : Elapsed 1.226 ms (24.513 ms / 20) 21.161 -> 21.099 ( -0.29%) [ +0.14% +0.13% +0.00% / -0.29% +0.06% +0.06%] index_fill_ random_sorted : Elapsed 1.060 ms (21.190 ms / 20) out_shape = [100, 50, 256, 40] in_shape = [20, 50, 256, 40] idx_dim = 0 out_shape = [20, 100, 256, 40] in_shape = [20, 50, 256, 40] idx_dim = 1 out_shape = [20, 50, 100, 40] in_shape = [20, 50, 256, 40] idx_dim = 2 out_shape = [20, 50, 256, 100] in_shape = [20, 50, 256, 40] idx_dim = 3 out_shape = [100, 256, 40, 50] in_shape = [20, 256, 40, 50] idx_dim = 0 B = [100, 256, 40, 50] (stride (512000, 40, 1, 10240)) A = [20, 256, 40, 50] (stride (1, 20, 5120, 204800)) dim = 0 440.368 -> 440.720 ( +0.08%) [ +0.00% +0.11% +0.22% / +0.08% +0.26% +0.10%] index_add_ linear : Elapsed 22.018 ms (440.368 ms / 20) 346.775 -> 346.028 ( -0.22%) [ +0.09% +0.00% +0.00% / -0.06% -0.22% -0.21%] index_copy_ linear : Elapsed 17.355 ms (347.091 ms / 20) 438.441 -> 439.881 ( +0.33%) [ +0.00% +0.21% +0.24% / +0.33% +0.47% +0.66%] index_add_ reverse : Elapsed 21.922 ms (438.441 ms / 20) 346.325 -> 346.417 ( +0.03%) [ +0.00% +0.21% +0.06% / +0.04% +0.03% +0.15%] index_copy_ reverse : Elapsed 17.316 ms (346.325 ms / 20) 441.692 -> 441.130 ( -0.13%) [ +0.00% +0.02% +0.04% / -0.11% +0.17% -0.13%] index_add_ spread : Elapsed 22.085 ms (441.692 ms / 20) 346.669 -> 346.566 ( -0.03%) [ +0.00% +0.04% +0.10% / +0.11% +0.03% -0.03%] index_copy_ spread : Elapsed 17.333 ms (346.669 ms / 20) 442.404 -> 440.155 ( -0.51%) [ +0.00% +0.40% +0.40% / +0.24% -0.51% -0.51%] index_add_ strided 3 : Elapsed 22.120 ms (442.404 ms / 20) 346.954 -> 345.878 ( -0.31%) [ +0.00% +0.05% +0.04% / -0.04% -0.31% +0.00%] index_copy_ strided 3 : Elapsed 17.348 ms (346.954 ms / 20) 439.396 -> 440.526 ( +0.26%) [ +0.25% +0.00% +0.16% / +0.26% +0.34% +0.34%] index_add_ strided 7 : Elapsed 22.024 ms (440.489 ms / 20) 346.759 -> 346.307 ( -0.13%) [ +0.13% +0.00% +0.07% / +0.07% -0.13% -0.12%] index_copy_ strided 7 : Elapsed 17.360 ms (347.196 ms / 20) 440.029 -> 439.918 ( -0.03%) [ +0.01% +0.00% +0.07% / -0.03% +0.07% -0.01%] index_add_ perm : Elapsed 22.004 ms (440.086 ms / 20) 346.726 -> 346.035 ( -0.20%) [ +0.05% +0.06% +0.00% / -0.02% -0.20% -0.17%] index_copy_ perm : Elapsed 17.345 ms (346.905 ms / 20) 439.702 -> 439.721 ( +0.00%) [ +0.00% +0.19% +0.11% / +0.25% +0.03% +0.00%] index_add_ perm_sorted : Elapsed 21.985 ms (439.702 ms / 20) 346.546 -> 345.873 ( -0.19%) [ +0.01% +0.14% +0.00% / +0.31% -0.03% -0.19%] index_copy_ perm_sorted : Elapsed 17.329 ms (346.575 ms / 20) 1962.481 -> 1958.591 ( -0.20%) [ +0.05% +0.02% +0.00% / +0.08% -0.20% -0.13%] index_select const : Elapsed 98.178 ms (1963.560 ms / 20) 1961.282 -> 1958.737 ( -0.13%) [ +0.01% +0.00% +0.04% / +0.11% -0.13% +0.08%] index_select wrap : Elapsed 98.073 ms (1961.453 ms / 20) 1956.819 -> 1958.884 ( +0.11%) [ +0.08% +0.03% +0.00% / +0.11% +0.17% +0.28%] index_select linear : Elapsed 97.917 ms (1958.349 ms / 20) 1960.309 -> 1959.358 ( -0.05%) [ +0.00% +0.07% +0.04% / -0.05% -0.05% -0.02%] index_select reverse : Elapsed 98.015 ms (1960.309 ms / 20) 1962.275 -> 1953.415 ( -0.45%) [ +0.07% +0.11% +0.00% / +0.11% -0.45% -0.41%] index_select skip64 : Elapsed 98.184 ms (1963.681 ms / 20) 1962.590 -> 1960.452 ( -0.11%) [ +0.05% +0.03% +0.00% / +0.01% -0.03% -0.11%] index_select skip256 : Elapsed 98.183 ms (1963.659 ms / 20) 1961.200 -> 1958.431 ( -0.14%) [ +0.14% +0.00% +0.21% / +0.05% -0.14% +0.07%] index_select spread : Elapsed 98.196 ms (1963.920 ms / 20) 1955.067 -> 1959.177 ( +0.21%) [ +0.00% +0.07% +0.05% / +0.23% +0.21% +0.34%] index_select strided 3 : Elapsed 97.753 ms (1955.067 ms / 20) 1958.978 -> 1959.107 ( +0.01%) [ +0.17% +0.00% +0.25% / +0.20% +0.05% +0.01%] index_select strided 5 : Elapsed 98.115 ms (1962.296 ms / 20) 1960.744 -> 1954.241 ( -0.33%) [ +0.00% +0.02% +0.00% / +0.08% -0.28% -0.33%] index_select strided 7 : Elapsed 98.041 ms (1960.824 ms / 20) 1960.449 -> 1958.205 ( -0.11%) [ +0.00% +0.04% +0.15% / +0.07% -0.11% -0.04%] index_select strided 8 : Elapsed 98.022 ms (1960.449 ms / 20) 1960.275 -> 1959.061 ( -0.06%) [ +0.00% +0.12% +0.07% / +0.07% -0.06% +0.04%] index_select strided 16 : Elapsed 98.014 ms (1960.275 ms / 20) 1957.224 -> 1959.563 ( +0.12%) [ +0.08% +0.00% +0.13% / +0.12% +0.18% +0.24%] index_select random : Elapsed 97.942 ms (1958.843 ms / 20) 1962.322 -> 1960.037 ( -0.12%) [ +0.02% +0.03% +0.00% / +0.06% -0.12% -0.07%] index_select random_sorted : Elapsed 98.136 ms (1962.713 ms / 20) out_shape = [20, 100, 40, 50] in_shape = [20, 256, 40, 50] idx_dim = 1 B = [20, 100, 40, 50] (stride (1, 40000, 20, 800)) A = [20, 256, 40, 50] (stride (1, 20, 256000, 5120)) dim = 1 good 74.375 -> 70.384 ( -5.37%) [ +0.35% +0.57% +0.00% / +0.08% -4.77% -5.37%] index_select const : Elapsed 3.732 ms (74.635 ms / 20) 104.350 -> 102.242 ( -2.02%) [ +0.29% +0.78% +0.00% / +0.36% -1.40% -2.02%] index_select wrap : Elapsed 5.233 ms (104.656 ms / 20) 104.559 -> 102.526 ( -1.94%) [ +0.24% +0.39% +0.00% / -0.07% -1.94% -1.52%] index_select linear : Elapsed 5.240 ms (104.808 ms / 20) 108.962 -> 108.652 ( -0.28%) [ +0.40% +0.10% +0.00% / -0.28% +0.03% -0.28%] index_select reverse : Elapsed 5.470 ms (109.402 ms / 20) 75.754 -> 72.349 ( -4.49%) [ +0.00% +0.34% +0.22% / +0.31% -4.49% -4.39%] index_select skip64 : Elapsed 3.788 ms (75.754 ms / 20) good 74.473 -> 70.599 ( -5.20%) [ +0.00% +0.11% +0.21% / +0.32% -4.78% -5.20%] index_select skip256 : Elapsed 3.724 ms (74.473 ms / 20) 110.856 -> 111.207 ( +0.32%) [ +0.00% +0.25% +0.20% / +0.32% +1.52% +1.41%] index_select spread : Elapsed 5.543 ms (110.856 ms / 20) 112.289 -> 112.346 ( +0.05%) [ +0.00% +0.27% +0.45% / +0.05% +0.60% +0.97%] index_select strided 3 : Elapsed 5.614 ms (112.289 ms / 20) 113.208 -> 113.153 ( -0.05%) [ +0.00% +0.08% +0.03% / -0.05% +1.09% +0.93%] index_select strided 5 : Elapsed 5.660 ms (113.208 ms / 20) 114.219 -> 114.183 ( -0.03%) [ +0.30% +0.00% +0.11% / -0.03% +1.26% +1.17%] index_select strided 7 : Elapsed 5.728 ms (114.567 ms / 20) 113.064 -> 113.127 ( +0.06%) [ +0.05% +0.00% +0.05% / +0.06% +1.28% +1.87%] index_select strided 8 : Elapsed 5.656 ms (113.126 ms / 20) 113.472 -> 113.678 ( +0.18%) [ +0.00% +0.07% +0.16% / +0.18% +3.30% +3.38%] index_select strided 16 : Elapsed 5.674 ms (113.472 ms / 20) 105.349 -> 103.773 ( -1.50%) [ +0.47% +0.00% +0.46% / +0.50% -1.45% -1.50%] index_select strided 64 : Elapsed 5.292 ms (105.848 ms / 20) 117.193 -> 117.179 ( -0.01%) [ +0.20% +0.00% +0.05% / -0.01% +0.05% +0.59%] index_select strided 100 : Elapsed 5.871 ms (117.425 ms / 20) 106.459 -> 106.617 ( +0.15%) [ +0.35% +0.21% +0.00% / +0.15% +3.34% +2.97%] index_select strided 255 : Elapsed 5.341 ms (106.827 ms / 20) 117.164 -> 114.585 ( -2.20%) [ +0.13% +0.00% +0.47% / +0.35% -1.91% -2.20%] index_select random : Elapsed 5.866 ms (117.321 ms / 20) 108.969 -> 108.251 ( -0.66%) [ +0.00% +0.79% +0.68% / +0.50% -0.66% -0.48%] index_select random_sorted : Elapsed 5.448 ms (108.969 ms / 20) 114.851 -> 114.819 ( -0.03%) [ +0.18% +0.50% +0.00% / -0.03% +1.83% +1.73%] index_select perm : Elapsed 5.753 ms (115.063 ms / 20) 108.719 -> 109.124 ( +0.37%) [ +0.18% +0.00% +0.37% / +0.37% +2.18% +1.98%] index_select perm_sorted : Elapsed 5.446 ms (108.917 ms / 20) out_shape = [20, 256, 100, 50] in_shape = [20, 256, 40, 50] idx_dim = 2 out_shape = [20, 256, 40, 100] in_shape = [20, 256, 40, 50] idx_dim = 3 out_shape = [100, 256, 50, 40] in_shape = [20, 256, 50, 40] idx_dim = 0 B = [100, 256, 50, 40] (stride (512000, 2000, 40, 1)) A = [20, 256, 50, 40] (stride (512000, 1, 256, 12800)) dim = 0 196.477 -> 193.779 ( -1.37%) [ +0.24% +0.00% +0.14% / +0.18% -1.37% -1.17%] index_add_ linear : Elapsed 9.847 ms (196.939 ms / 20) 189.672 -> 187.629 ( -1.08%) [ +0.00% +0.27% +0.09% / +0.15% -1.08% -0.91%] index_copy_ linear : Elapsed 9.484 ms (189.672 ms / 20) 196.304 -> 194.103 ( -1.12%) [ +0.00% +0.18% +0.07% / -0.47% -0.71% -1.12%] index_add_ reverse : Elapsed 9.815 ms (196.304 ms / 20) 189.606 -> 187.291 ( -1.22%) [ +0.39% +0.25% +0.00% / -0.45% -1.22% -0.89%] index_copy_ reverse : Elapsed 9.517 ms (190.338 ms / 20) 196.811 -> 194.745 ( -1.05%) [ +0.00% +0.01% +0.07% / -0.33% -1.05% -1.05%] index_add_ spread : Elapsed 9.841 ms (196.811 ms / 20) 189.853 -> 187.798 ( -1.08%) [ +0.15% +0.00% +0.28% / +0.08% -1.01% -1.08%] index_copy_ spread : Elapsed 9.507 ms (190.137 ms / 20) 196.354 -> 194.838 ( -0.77%) [ +0.00% +0.22% +0.28% / -0.01% -0.58% -0.77%] index_add_ strided 3 : Elapsed 9.818 ms (196.354 ms / 20) 189.462 -> 187.877 ( -0.84%) [ +0.00% +0.33% +0.36% / +0.31% -0.79% -0.84%] index_copy_ strided 3 : Elapsed 9.473 ms (189.462 ms / 20) 195.939 -> 194.789 ( -0.59%) [ +0.59% +0.47% +0.00% / +0.12% -0.51% -0.59%] index_add_ strided 7 : Elapsed 9.855 ms (197.094 ms / 20) 190.096 -> 188.031 ( -1.09%) [ +0.10% +0.00% +0.10% / +0.01% -1.09% -1.02%] index_copy_ strided 7 : Elapsed 9.514 ms (190.281 ms / 20) 195.733 -> 194.012 ( -0.88%) [ +0.14% +0.05% +0.00% / +0.09% -0.31% -0.88%] index_add_ perm : Elapsed 9.801 ms (196.015 ms / 20) 188.951 -> 187.154 ( -0.95%) [ +0.60% +0.40% +0.00% / +0.27% -0.51% -0.95%] index_copy_ perm : Elapsed 9.504 ms (190.088 ms / 20) 196.244 -> 195.286 ( -0.49%) [ +0.12% +0.05% +0.00% / -0.04% -0.37% -0.49%] index_add_ perm_sorted : Elapsed 9.824 ms (196.485 ms / 20) 189.232 -> 188.141 ( -0.58%) [ +0.32% +0.16% +0.00% / -0.25% -0.37% -0.58%] index_copy_ perm_sorted : Elapsed 9.492 ms (189.838 ms / 20) 868.212 -> 864.925 ( -0.38%) [ +0.07% +0.11% +0.00% / -0.05% -0.38% -0.34%] index_select const : Elapsed 43.441 ms (868.826 ms / 20) 1153.533 -> 1153.797 ( +0.02%) [ +0.24% +0.15% +0.00% / +0.24% +0.02% +0.09%] index_select wrap : Elapsed 57.813 ms (1156.260 ms / 20) 898.296 -> 893.714 ( -0.51%) [ +0.13% +0.02% +0.00% / +0.07% -0.45% -0.51%] index_select linear : Elapsed 44.972 ms (899.442 ms / 20) 942.142 -> 939.875 ( -0.24%) [ +0.00% +0.13% +0.24% / +0.18% -0.24% -0.23%] index_select reverse : Elapsed 47.107 ms (942.142 ms / 20) 868.051 -> 864.386 ( -0.42%) [ +0.13% +0.00% +0.00% / -0.05% -0.42% -0.22%] index_select skip64 : Elapsed 43.461 ms (869.211 ms / 20) 867.695 -> 865.074 ( -0.30%) [ +0.01% +0.00% +0.05% / +0.11% -0.29% -0.30%] index_select skip256 : Elapsed 43.389 ms (867.772 ms / 20) 1002.375 -> 1000.013 ( -0.24%) [ +0.07% +0.00% +0.05% / +0.03% -0.24% -0.18%] index_select spread : Elapsed 50.154 ms (1003.087 ms / 20) 1154.276 -> 1153.575 ( -0.06%) [ +0.01% +0.07% +0.00% / +0.14% -0.06% -0.06%] index_select strided 3 : Elapsed 57.721 ms (1154.417 ms / 20) 1101.174 -> 1100.429 ( -0.07%) [ +0.15% +0.00% +0.17% / +0.03% -0.07% +0.01%] index_select strided 5 : Elapsed 55.142 ms (1102.845 ms / 20) 1152.693 -> 1153.963 ( +0.11%) [ +0.08% +0.16% +0.00% / +0.12% +0.12% +0.11%] index_select strided 7 : Elapsed 57.682 ms (1153.634 ms / 20) 1123.959 -> 1118.411 ( -0.49%) [ +0.02% +0.08% +0.00% / -0.16% -0.49% -0.40%] index_select strided 8 : Elapsed 56.209 ms (1124.188 ms / 20) 1123.568 -> 1118.726 ( -0.43%) [ +0.03% +0.01% +0.00% / -0.04% -0.33% -0.43%] index_select strided 16 : Elapsed 56.193 ms (1123.867 ms / 20) 1136.984 -> 1134.892 ( -0.18%) [ +0.06% +0.00% +0.00% / +0.04% -0.17% -0.18%] index_select random : Elapsed 56.881 ms (1137.615 ms / 20) 993.777 -> 989.420 ( -0.44%) [ +0.02% +0.04% +0.00% / -0.06% -0.37% -0.44%] index_select random_sorted : Elapsed 49.699 ms (993.983 ms / 20) out_shape = [20, 100, 50, 40] in_shape = [20, 256, 50, 40] idx_dim = 1 out_shape = [20, 256, 100, 40] in_shape = [20, 256, 50, 40] idx_dim = 2 out_shape = [20, 256, 50, 100] in_shape = [20, 256, 50, 40] idx_dim = 3 out_shape = [100, 20, 50, 256] in_shape = [40, 20, 50, 256] idx_dim = 0 B = [100, 20, 50, 256] (stride (1000, 1, 20, 100000)) A = [40, 20, 50, 256] (stride (1000, 50, 1, 40000)) dim = 0 402.310 -> 404.627 ( +0.58%) [ +0.48% +0.00% +0.64% / +0.58% +1.68% +1.72%] index_add_ linear : Elapsed 20.212 ms (404.240 ms / 20) 301.791 -> 298.575 ( -1.07%) [ +0.16% +0.00% +0.45% / +0.61% -0.98% -1.07%] index_copy_ linear : Elapsed 15.114 ms (302.273 ms / 20) 410.996 -> 402.155 ( -2.15%) [ +0.02% +0.00% +0.48% / +0.27% -2.15% -1.67%] index_add_ reverse : Elapsed 20.554 ms (411.072 ms / 20) 300.336 -> 297.858 ( -0.83%) [ +0.40% +0.46% +0.00% / -0.76% -0.75% -0.83%] index_copy_ reverse : Elapsed 15.077 ms (301.549 ms / 20) 400.485 -> 401.386 ( +0.22%) [ +0.43% +0.33% +0.00% / +0.22% +2.03% +1.81%] index_add_ spread : Elapsed 20.111 ms (402.225 ms / 20) 293.040 -> 293.119 ( +0.03%) [ +0.02% +0.19% +0.00% / +0.03% +2.76% +2.85%] index_copy_ spread : Elapsed 14.654 ms (293.089 ms / 20) 409.807 -> 408.948 ( -0.21%) [ +0.10% +0.00% +0.05% / -0.17% +0.29% -0.21%] index_add_ strided 3 : Elapsed 20.512 ms (410.236 ms / 20) 297.285 -> 297.864 ( +0.19%) [ +0.40% +0.00% +0.17% / +0.28% +0.99% +0.19%] index_copy_ strided 3 : Elapsed 14.924 ms (298.473 ms / 20) 413.602 -> 413.277 ( -0.08%) [ +0.07% +0.00% +0.38% / -0.08% +0.19% +0.02%] index_add_ strided 7 : Elapsed 20.695 ms (413.900 ms / 20) 300.712 -> 301.271 ( +0.19%) [ +0.35% +0.15% +0.00% / +0.19% +2.93% +2.63%] index_copy_ strided 7 : Elapsed 15.088 ms (301.754 ms / 20) 415.320 -> 414.590 ( -0.18%) [ +0.63% +0.00% +0.33% / +0.08% +0.03% -0.18%] index_add_ perm : Elapsed 20.896 ms (417.927 ms / 20) 318.397 -> 304.890 ( -4.24%) [ +0.18% +0.33% +0.00% / -0.04% -4.15% -4.24%] index_copy_ perm : Elapsed 15.948 ms (318.963 ms / 20) 405.618 -> 404.118 ( -0.37%) [ +0.64% +0.00% +0.76% / -0.37% +2.70% +2.72%] index_add_ perm_sorted : Elapsed 20.411 ms (408.213 ms / 20) 301.426 -> 301.318 ( -0.04%) [ +0.57% +0.00% +0.64% / -0.04% +1.18% +1.31%] index_copy_ perm_sorted : Elapsed 15.157 ms (303.131 ms / 20) 820.202 -> 812.355 ( -0.96%) [ +0.25% +0.00% +0.06% / -0.35% -0.96% -0.96%] index_select const : Elapsed 41.114 ms (822.279 ms / 20) 865.077 -> 861.301 ( -0.44%) [ +0.44% +0.00% +0.45% / +0.26% -0.44% -0.32%] index_select wrap : Elapsed 43.445 ms (868.896 ms / 20) 839.591 -> 818.066 ( -2.56%) [ +0.28% +0.35% +0.00% / +0.31% -2.56% -2.19%] index_select linear : Elapsed 42.099 ms (841.974 ms / 20) 882.216 -> 846.962 ( -4.00%) [ +0.00% +0.26% +0.31% / +0.46% -4.00% -3.93%] index_select reverse : Elapsed 44.111 ms (882.216 ms / 20) 818.748 -> 811.025 ( -0.94%) [ +0.54% +0.00% +0.20% / -0.06% -0.55% -0.94%] index_select skip64 : Elapsed 41.158 ms (823.154 ms / 20) 820.241 -> 812.753 ( -0.91%) [ +0.00% +0.12% +0.01% / +0.11% -0.91% -0.76%] index_select skip256 : Elapsed 41.012 ms (820.241 ms / 20) 832.356 -> 831.893 ( -0.06%) [ +0.37% +0.00% +0.12% / -0.06% +1.96% +2.00%] index_select spread : Elapsed 41.772 ms (835.444 ms / 20) 858.781 -> 858.442 ( -0.04%) [ +0.34% +0.00% +0.44% / -0.04% +2.14% +1.76%] index_select strided 3 : Elapsed 43.084 ms (861.678 ms / 20) 878.391 -> 872.707 ( -0.65%) [ +0.00% +0.33% +0.57% / +0.05% -0.41% -0.65%] index_select strided 5 : Elapsed 43.920 ms (878.391 ms / 20) 868.639 -> 868.834 ( +0.02%) [ +0.00% +0.19% +0.07% / +0.02% +0.41% +0.73%] index_select strided 7 : Elapsed 43.432 ms (868.639 ms / 20) 866.861 -> 864.125 ( -0.32%) [ +0.00% +0.01% +0.04% / -0.32% +0.06% -0.10%] index_select strided 8 : Elapsed 43.343 ms (866.861 ms / 20) 881.587 -> 871.437 ( -1.15%) [ +0.11% +0.00% +0.09% / -0.03% -1.15% -1.13%] index_select strided 16 : Elapsed 44.127 ms (882.534 ms / 20) 855.737 -> 853.693 ( -0.24%) [ +0.05% +0.11% +0.00% / -0.24% +0.49% +0.91%] index_select random : Elapsed 42.808 ms (856.155 ms / 20) 830.631 -> 831.653 ( +0.12%) [ +0.01% +0.35% +0.00% / +0.12% +1.61% +1.57%] index_select random_sorted : Elapsed 41.536 ms (830.726 ms / 20) out_shape = [40, 100, 50, 256] in_shape = [40, 20, 50, 256] idx_dim = 1 out_shape = [40, 20, 100, 256] in_shape = [40, 20, 50, 256] idx_dim = 2 out_shape = [40, 20, 50, 100] in_shape = [40, 20, 50, 256] idx_dim = 3 out_shape = [100, 20, 256, 50] in_shape = [40, 20, 256, 50] idx_dim = 0 out_shape = [40, 100, 256, 50] in_shape = [40, 20, 256, 50] idx_dim = 1 out_shape = [40, 20, 100, 50] in_shape = [40, 20, 256, 50] idx_dim = 2 out_shape = [40, 20, 256, 100] in_shape = [40, 20, 256, 50] idx_dim = 3 out_shape = [100, 50, 20, 256] in_shape = [40, 50, 20, 256] idx_dim = 0 out_shape = [40, 100, 20, 256] in_shape = [40, 50, 20, 256] idx_dim = 1 B = [40, 100, 20, 256] (stride (1, 10240, 1024000, 40)) dim = 1 fill_cnt = 50 28.332 -> 27.148 ( -4.18%) [ +0.19% +0.07% +0.00% / +0.02% -4.03% -4.18%] index_fill_ const : Elapsed 1.419 ms (28.386 ms / 20) 78.427 -> 78.555 ( +0.16%) [ +0.68% +0.00% +1.54% / +0.16% +2.89% +1.41%] index_fill_ linear : Elapsed 3.948 ms (78.960 ms / 20) 80.225 -> 79.924 ( -0.38%) [ +0.62% +0.02% +0.00% / +0.24% -0.38% -0.29%] index_fill_ reverse : Elapsed 4.036 ms (80.725 ms / 20) 28.360 -> 27.170 ( -4.20%) [ +0.00% +0.17% +0.01% / -0.16% -4.20% -4.15%] index_fill_ skip64 : Elapsed 1.418 ms (28.360 ms / 20) 28.255 -> 27.194 ( -3.76%) [ +0.08% +0.00% +0.06% / -0.11% -3.71% -3.76%] index_fill_ skip256 : Elapsed 1.414 ms (28.278 ms / 20) 73.288 -> 74.698 ( +1.92%) [ +1.38% +1.52% +0.00% / +1.92% +8.11% +7.72%] index_fill_ spread : Elapsed 3.715 ms (74.298 ms / 20) 76.236 -> 76.471 ( +0.31%) [ +1.15% +0.06% +0.00% / +0.31% +1.55% +2.30%] index_fill_ strided 3 : Elapsed 3.855 ms (77.110 ms / 20) 82.597 -> 82.227 ( -0.45%) [ +0.00% +0.66% +1.40% / +0.50% -0.45% -0.01%] index_fill_ strided 5 : Elapsed 4.130 ms (82.597 ms / 20) 80.401 -> 80.681 ( +0.35%) [ +0.00% +0.05% +1.92% / +0.35% +3.54% +0.39%] index_fill_ strided 7 : Elapsed 4.020 ms (80.401 ms / 20) 75.303 -> 75.929 ( +0.83%) [ +0.00% +0.28% +1.71% / +1.41% +1.52% +0.83%] index_fill_ strided 8 : Elapsed 3.765 ms (75.303 ms / 20) 72.660 -> 72.369 ( -0.40%) [ +0.00% +1.61% +0.73% / +0.96% +1.48% -0.40%] index_fill_ strided 16 : Elapsed 3.633 ms (72.660 ms / 20) 80.063 -> 79.496 ( -0.71%) [ +0.00% +2.07% +2.88% / +2.70% +0.96% -0.71%] index_fill_ strided 64 : Elapsed 4.003 ms (80.063 ms / 20) 79.595 -> 80.618 ( +1.29%) [ +1.55% +0.18% +0.00% / +1.29% +5.61% +5.07%] index_fill_ random : Elapsed 4.042 ms (80.832 ms / 20) 67.617 -> 67.042 ( -0.85%) [ +0.00% +1.89% +0.14% / +0.33% -0.18% -0.85%] index_fill_ random_sorted : Elapsed 3.381 ms (67.617 ms / 20) 78.625 -> 78.262 ( -0.46%) [ +2.63% +0.27% +0.00% / -0.46% +1.05% +0.07%] index_fill_ perm : Elapsed 4.035 ms (80.696 ms / 20) 74.656 -> 75.482 ( +1.11%) [ +0.68% +1.45% +0.00% / +1.11% +6.98% +9.82%] index_fill_ perm_sorted : Elapsed 3.758 ms (75.166 ms / 20) out_shape = [40, 50, 100, 256] in_shape = [40, 50, 20, 256] idx_dim = 2 out_shape = [40, 50, 20, 100] in_shape = [40, 50, 20, 256] idx_dim = 3 out_shape = [100, 50, 256, 20] in_shape = [40, 50, 256, 20] idx_dim = 0 out_shape = [40, 100, 256, 20] in_shape = [40, 50, 256, 20] idx_dim = 1 B = [40, 100, 256, 20] (stride (2000, 1, 80000, 100)) A = [40, 50, 256, 20] (stride (5120, 204800, 1, 256)) dim = 1 451.422 -> 451.066 ( -0.08%) [ +0.00% +0.30% +0.15% / +0.32% -0.08% +0.12%] index_add_ linear : Elapsed 22.571 ms (451.422 ms / 20) 356.650 -> 356.276 ( -0.10%) [ +0.00% +0.04% +0.16% / +0.13% -0.10% -0.08%] index_copy_ linear : Elapsed 17.833 ms (356.650 ms / 20) 451.618 -> 451.027 ( -0.13%) [ +0.00% +0.02% +0.15% / +0.03% +0.02% -0.13%] index_add_ reverse : Elapsed 22.581 ms (451.618 ms / 20) 356.883 -> 356.513 ( -0.10%) [ +0.00% +0.05% +0.10% / +0.02% +0.09% -0.10%] index_copy_ reverse : Elapsed 17.844 ms (356.883 ms / 20) 451.365 -> 452.013 ( +0.14%) [ +0.17% +0.16% +0.00% / +0.14% +0.27% +0.16%] index_add_ spread : Elapsed 22.607 ms (452.132 ms / 20) 356.007 -> 356.769 ( +0.21%) [ +0.19% +0.18% +0.00% / +0.37% +0.22% +0.21%] index_copy_ spread : Elapsed 17.834 ms (356.688 ms / 20) 451.540 -> 450.829 ( -0.16%) [ +0.00% +0.11% +0.14% / -0.16% +0.02% +0.07%] index_add_ strided 3 : Elapsed 22.577 ms (451.540 ms / 20) 356.500 -> 356.109 ( -0.11%) [ +0.01% +0.06% +0.00% / -0.11% +0.08% +0.14%] index_copy_ strided 3 : Elapsed 17.826 ms (356.528 ms / 20) 451.730 -> 451.121 ( -0.13%) [ +0.04% +0.11% +0.00% / -0.13% +0.02% -0.10%] index_add_ strided 7 : Elapsed 22.597 ms (451.931 ms / 20) 356.256 -> 356.325 ( +0.02%) [ +0.04% +0.09% +0.00% / +0.02% +0.14% +0.14%] index_copy_ strided 7 : Elapsed 17.820 ms (356.404 ms / 20) 450.331 -> 450.467 ( +0.03%) [ +0.00% +0.00% +0.13% / +0.03% +0.44% +0.25%] index_add_ perm : Elapsed 22.517 ms (450.340 ms / 20) 355.472 -> 355.909 ( +0.12%) [ +0.01% +0.07% +0.00% / +0.12% +0.41% +0.37%] index_copy_ perm : Elapsed 17.776 ms (355.525 ms / 20) 450.282 -> 450.305 ( +0.01%) [ +0.04% +0.12% +0.00% / +0.01% +0.42% +0.50%] index_add_ perm_sorted : Elapsed 22.522 ms (450.442 ms / 20) 354.897 -> 355.044 ( +0.04%) [ +0.24% +0.24% +0.00% / +0.04% +0.56% +0.52%] index_copy_ perm_sorted : Elapsed 17.788 ms (355.758 ms / 20) 698.795 -> 698.900 ( +0.02%) [ +0.00% +0.04% +0.04% / +0.02% +0.02% +0.05%] index_select const : Elapsed 34.940 ms (698.795 ms / 20) 731.107 -> 730.733 ( -0.05%) [ +0.00% +0.04% +0.04% / -0.05% +0.07% +0.03%] index_select wrap : Elapsed 36.555 ms (731.107 ms / 20) 708.787 -> 709.242 ( +0.06%) [ +0.00% +0.06% +0.07% / +0.06% +0.10% +0.13%] index_select linear : Elapsed 35.439 ms (708.787 ms / 20) 720.753 -> 720.991 ( +0.03%) [ +0.07% +0.00% +0.06% / +0.09% +0.05% +0.03%] index_select reverse : Elapsed 36.064 ms (721.289 ms / 20) 698.579 -> 698.073 ( -0.07%) [ +0.00% +0.05% +0.06% / +0.02% -0.07% -0.07%] index_select skip64 : Elapsed 34.929 ms (698.579 ms / 20) 698.935 -> 698.603 ( -0.05%) [ +0.00% +0.03% +0.01% / +0.01% -0.01% -0.05%] index_select skip256 : Elapsed 34.947 ms (698.935 ms / 20) 722.343 -> 721.971 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% -0.02% +0.04%] index_select spread : Elapsed 36.117 ms (722.343 ms / 20) 729.664 -> 729.945 ( +0.04%) [ +0.00% +0.02% +0.14% / +0.04% +0.23% +0.24%] index_select strided 3 : Elapsed 36.483 ms (729.664 ms / 20) 731.584 -> 731.416 ( -0.02%) [ +0.00% +0.05% +0.01% / +0.05% +0.02% -0.02%] index_select strided 5 : Elapsed 36.579 ms (731.584 ms / 20) 731.163 -> 729.850 ( -0.18%) [ +0.07% +0.11% +0.00% / +0.08% -0.13% -0.18%] index_select strided 7 : Elapsed 36.583 ms (731.650 ms / 20) 731.120 -> 731.052 ( -0.01%) [ +0.00% +0.01% +0.05% / +0.03% -0.01% -0.01%] index_select strided 8 : Elapsed 36.556 ms (731.120 ms / 20) 731.631 -> 731.584 ( -0.01%) [ +0.02% +0.02% +0.00% / -0.01% -0.01% -0.01%] index_select strided 16 : Elapsed 36.589 ms (731.777 ms / 20) 730.057 -> 730.723 ( +0.09%) [ +0.13% +0.10% +0.00% / +0.09% +0.15% +0.16%] index_select random : Elapsed 36.549 ms (730.984 ms / 20) 718.923 -> 719.118 ( +0.03%) [ +0.15% +0.00% +0.12% / +0.09% +0.03% +0.12%] index_select random_sorted : Elapsed 36.000 ms (719.990 ms / 20) out_shape = [40, 50, 100, 20] in_shape = [40, 50, 256, 20] idx_dim = 2 out_shape = [40, 50, 256, 100] in_shape = [40, 50, 256, 20] idx_dim = 3 out_shape = [100, 256, 20, 50] in_shape = [40, 256, 20, 50] idx_dim = 0 out_shape = [40, 100, 20, 50] in_shape = [40, 256, 20, 50] idx_dim = 1 out_shape = [40, 256, 100, 50] in_shape = [40, 256, 20, 50] idx_dim = 2 out_shape = [40, 256, 20, 100] in_shape = [40, 256, 20, 50] idx_dim = 3 B = [40, 256, 20, 100] (stride (5120, 20, 1, 204800)) A = [40, 256, 20, 50] (stride (12800, 1, 512000, 256)) dim = 3 143.834 -> 144.145 ( +0.22%) [ +0.46% +0.00% +0.58% / +0.22% +0.86% +0.91%] index_add_ linear : Elapsed 7.225 ms (144.491 ms / 20) 140.100 -> 140.133 ( +0.02%) [ +0.89% +0.10% +0.00% / +0.02% +0.70% +0.92%] index_copy_ linear : Elapsed 7.067 ms (141.348 ms / 20) 144.229 -> 144.156 ( -0.05%) [ +0.39% +0.00% +0.15% / -0.05% +1.37% +1.32%] index_add_ reverse : Elapsed 7.240 ms (144.795 ms / 20) 140.292 -> 140.635 ( +0.24%) [ +0.63% +0.00% +0.85% / +0.24% +0.99% +1.01%] index_copy_ reverse : Elapsed 7.059 ms (141.174 ms / 20) 144.877 -> 144.816 ( -0.04%) [ +0.13% +0.00% +0.18% / -0.04% +0.32% +0.40%] index_add_ spread : Elapsed 7.254 ms (145.072 ms / 20) 139.766 -> 140.944 ( +0.84%) [ +1.07% +0.00% +0.53% / +0.84% +1.24% +0.94%] index_copy_ spread : Elapsed 7.063 ms (141.255 ms / 20) 144.317 -> 144.055 ( -0.18%) [ +0.43% +0.43% +0.00% / -0.18% +0.49% +0.80%] index_add_ strided 3 : Elapsed 7.247 ms (144.940 ms / 20) 140.079 -> 139.656 ( -0.30%) [ +0.59% +0.51% +0.00% / -0.30% +0.67% +1.19%] index_copy_ strided 3 : Elapsed 7.045 ms (140.906 ms / 20) 144.449 -> 144.443 ( -0.00%) [ +0.39% +0.00% +0.61% / -0.00% +0.91% +1.03%] index_add_ strided 7 : Elapsed 7.251 ms (145.010 ms / 20) 141.196 -> 140.359 ( -0.59%) [ +0.17% +0.00% +0.17% / -0.59% +0.32% +0.08%] index_copy_ strided 7 : Elapsed 7.072 ms (141.433 ms / 20) 144.213 -> 144.729 ( +0.36%) [ +0.00% +0.74% +0.57% / +0.36% +1.29% +0.65%] index_add_ perm : Elapsed 7.211 ms (144.213 ms / 20) 140.599 -> 140.431 ( -0.12%) [ +0.00% +0.35% +0.31% / -0.12% +1.05% +0.59%] index_copy_ perm : Elapsed 7.030 ms (140.599 ms / 20) 145.244 -> 145.450 ( +0.14%) [ +0.51% +0.36% +0.00% / +0.14% +0.21% +0.83%] index_add_ perm_sorted : Elapsed 7.299 ms (145.983 ms / 20) 141.119 -> 141.410 ( +0.21%) [ +0.14% +0.09% +0.00% / +0.21% +0.26% +0.21%] index_copy_ perm_sorted : Elapsed 7.066 ms (141.317 ms / 20) Good 253.131 -> 219.436 (-13.31%) [ +0.00% +0.00% +0.08% / +0.30% -13.29% -13.31%] index_select const : Elapsed 12.657 ms (253.131 ms / 20) 315.242 -> 315.387 ( +0.05%) [ +0.40% +0.53% +0.00% / +0.05% +0.36% +0.37%] index_select wrap : Elapsed 15.825 ms (316.499 ms / 20) 262.449 -> 262.668 ( +0.08%) [ +0.00% +0.18% +0.08% / +0.08% +1.45% +1.44%] index_select linear : Elapsed 13.122 ms (262.449 ms / 20) 295.398 -> 293.202 ( -0.74%) [ +0.00% +0.08% +0.04% / -0.12% -0.60% -0.74%] index_select reverse : Elapsed 14.770 ms (295.398 ms / 20) Good 253.147 -> 219.275 (-13.38%) [ +0.06% +0.00% +0.24% / +0.15% -13.38% -13.31%] index_select skip64 : Elapsed 12.665 ms (253.309 ms / 20) Good 253.270 -> 219.175 (-13.46%) [ +0.17% +0.00% +0.17% / +0.03% -13.46% -13.43%] index_select skip256 : Elapsed 12.684 ms (253.688 ms / 20) 303.081 -> 303.974 ( +0.29%) [ +0.00% +0.41% +0.38% / +0.41% +0.35% +0.29%] index_select spread : Elapsed 15.154 ms (303.081 ms / 20) 320.031 -> 319.214 ( -0.26%) [ +0.44% +0.30% +0.00% / -0.26% -0.06% +0.18%] index_select strided 3 : Elapsed 16.072 ms (321.432 ms / 20) 323.117 -> 323.776 ( +0.20%) [ +0.15% +0.27% +0.00% / +0.20% +2.24% +2.21%] index_select strided 5 : Elapsed 16.179 ms (323.588 ms / 20) 318.173 -> 318.916 ( +0.23%) [ +0.00% +0.26% +0.19% / +0.23% +0.28% +0.41%] index_select strided 7 : Elapsed 15.909 ms (318.173 ms / 20) 330.380 -> 329.680 ( -0.21%) [ +0.15% +0.00% +0.04% / +0.44% -0.01% -0.21%] index_select strided 8 : Elapsed 16.544 ms (330.877 ms / 20) 318.527 -> 317.437 ( -0.34%) [ +0.13% +0.00% +0.29% / -0.34% +0.03% -0.22%] index_select strided 16 : Elapsed 15.947 ms (318.937 ms / 20) 320.092 -> 320.663 ( +0.18%) [ +0.37% +0.44% +0.00% / +0.18% +0.61% +0.81%] index_select random : Elapsed 16.064 ms (321.281 ms / 20) 297.710 -> 295.609 ( -0.71%) [ +0.00% +0.14% +0.18% / -0.07% -0.35% -0.71%] index_select random_sorted : Elapsed 14.885 ms (297.710 ms / 20) out_shape = [100, 256, 50, 20] in_shape = [40, 256, 50, 20] idx_dim = 0 B = [100, 256, 50, 20] (stride (5120, 1, 512000, 256)) A = [40, 256, 50, 20] (stride (1, 40, 10240, 512000)) dim = 0 464.823 -> 460.261 ( -0.98%) [ +0.04% +0.03% +0.00% / -0.25% -0.98% -0.61%] index_add_ linear : Elapsed 23.250 ms (464.995 ms / 20) 362.802 -> 363.570 ( +0.21%) [ +0.20% +0.00% +0.49% / +0.21% +0.48% +0.43%] index_copy_ linear : Elapsed 18.176 ms (363.523 ms / 20) 459.669 -> 458.874 ( -0.17%) [ +0.20% +0.00% +0.01% / -0.17% +0.71% +0.67%] index_add_ reverse : Elapsed 23.030 ms (460.603 ms / 20) 361.140 -> 360.566 ( -0.16%) [ +0.02% +0.00% +0.03% / -0.16% +0.36% +0.36%] index_copy_ reverse : Elapsed 18.061 ms (361.211 ms / 20) 466.104 -> 463.860 ( -0.48%) [ +0.23% +0.00% +0.10% / +0.25% -0.47% -0.48%] index_add_ spread : Elapsed 23.359 ms (467.180 ms / 20) 367.696 -> 368.106 ( +0.11%) [ +0.13% +0.00% +0.25% / +0.31% +0.29% +0.11%] index_copy_ spread : Elapsed 18.408 ms (368.168 ms / 20) 478.533 -> 464.201 ( -2.99%) [ +0.00% +0.11% +0.10% / -0.06% -2.99% -2.84%] index_add_ strided 3 : Elapsed 23.927 ms (478.533 ms / 20) 367.933 -> 367.255 ( -0.18%) [ +0.00% +0.23% +0.19% / -0.18% -0.18% +0.09%] index_copy_ strided 3 : Elapsed 18.397 ms (367.933 ms / 20) 484.212 -> 469.937 ( -2.95%) [ +0.00% +0.04% +0.06% / -0.00% -2.88% -2.95%] index_add_ strided 7 : Elapsed 24.211 ms (484.212 ms / 20) 371.552 -> 370.747 ( -0.22%) [ +0.00% +0.19% +0.10% / +0.07% -0.12% -0.22%] index_copy_ strided 7 : Elapsed 18.578 ms (371.552 ms / 20) 476.861 -> 476.744 ( -0.02%) [ +0.00% +0.01% +0.29% / -0.02% +1.61% +1.65%] index_add_ perm : Elapsed 23.843 ms (476.861 ms / 20) 368.121 -> 367.944 ( -0.05%) [ +0.00% +0.08% +0.25% / -0.05% +0.27% +0.35%] index_copy_ perm : Elapsed 18.406 ms (368.121 ms / 20) 470.513 -> 468.175 ( -0.50%) [ +0.05% +0.06% +0.00% / -0.50% +0.57% +0.45%] index_add_ perm_sorted : Elapsed 23.538 ms (470.761 ms / 20) 368.195 -> 368.577 ( +0.10%) [ +0.00% +0.27% +0.38% / +0.24% +0.17% +0.10%] index_copy_ perm_sorted : Elapsed 18.410 ms (368.195 ms / 20) 974.432 -> 975.174 ( +0.08%) [ +0.09% +0.02% +0.00% / +0.08% +0.14% +0.24%] index_select const : Elapsed 48.765 ms (975.296 ms / 20) 975.614 -> 974.313 ( -0.13%) [ +0.17% +0.00% +0.09% / +0.20% -0.13% -0.08%] index_select wrap : Elapsed 48.863 ms (977.265 ms / 20) 974.838 -> 975.152 ( +0.03%) [ +0.17% +0.07% +0.00% / +0.04% +0.03% +0.03%] index_select linear : Elapsed 48.826 ms (976.521 ms / 20) 976.159 -> 975.640 ( -0.05%) [ +0.12% +0.00% +0.10% / +0.05% +0.07% -0.05%] index_select reverse : Elapsed 48.864 ms (977.285 ms / 20) 974.685 -> 973.163 ( -0.16%) [ +0.01% +0.00% +0.11% / -0.16% -0.01% -0.06%] index_select skip64 : Elapsed 48.741 ms (974.815 ms / 20) 974.009 -> 975.346 ( +0.14%) [ +0.03% +0.06% +0.00% / +0.14% +0.30% +0.31%] index_select skip256 : Elapsed 48.716 ms (974.329 ms / 20) 976.931 -> 973.829 ( -0.32%) [ +0.00% +0.01% +0.01% / -0.10% -0.32% -0.15%] index_select spread : Elapsed 48.847 ms (976.931 ms / 20) 973.467 -> 975.222 ( +0.18%) [ +0.09% +0.00% +0.30% / +0.24% +0.18% +0.19%] index_select strided 3 : Elapsed 48.716 ms (974.312 ms / 20) 976.767 -> 974.415 ( -0.24%) [ +0.01% +0.06% +0.00% / -0.01% -0.24% -0.11%] index_select strided 5 : Elapsed 48.843 ms (976.852 ms / 20) 976.041 -> 972.436 ( -0.37%) [ +0.26% +0.00% +0.11% / +0.18% -0.32% -0.37%] index_select strided 7 : Elapsed 48.929 ms (978.585 ms / 20) 977.663 -> 975.118 ( -0.26%) [ +0.00% +0.05% +0.02% / -0.02% -0.25% -0.26%] index_select strided 8 : Elapsed 48.883 ms (977.663 ms / 20) 978.068 -> 976.269 ( -0.18%) [ +0.06% +0.03% +0.00% / -0.08% -0.18% -0.10%] index_select strided 16 : Elapsed 48.934 ms (978.688 ms / 20) 975.452 -> 975.358 ( -0.01%) [ +0.15% +0.15% +0.00% / -0.01% -0.01% +0.03%] index_select random : Elapsed 48.844 ms (976.873 ms / 20) 976.208 -> 974.392 ( -0.19%) [ +0.00% +0.09% +0.12% / +0.08% -0.18% -0.19%] index_select random_sorted : Elapsed 48.810 ms (976.208 ms / 20) out_shape = [40, 100, 50, 20] in_shape = [40, 256, 50, 20] idx_dim = 1 out_shape = [40, 256, 100, 20] in_shape = [40, 256, 50, 20] idx_dim = 2 B = [40, 256, 100, 20] (stride (1, 800, 204800, 40)) A = [40, 256, 50, 20] (stride (1, 40000, 800, 40)) dim = 2 209.374 -> 209.566 ( +0.09%) [ +0.02% +0.10% +0.00% / +0.09% +1.54% +1.74%] index_add_ linear : Elapsed 10.471 ms (209.414 ms / 20) 210.660 -> 210.847 ( +0.09%) [ +0.05% +0.00% +0.27% / +0.09% +1.74% +1.77%] index_copy_ linear : Elapsed 10.539 ms (210.773 ms / 20) 209.954 -> 210.294 ( +0.16%) [ +0.36% +0.00% +0.26% / +0.16% +1.08% +0.83%] index_add_ reverse : Elapsed 10.536 ms (210.718 ms / 20) 209.405 -> 209.816 ( +0.20%) [ +0.15% +0.00% +0.03% / +0.20% +2.20% +1.77%] index_copy_ reverse : Elapsed 10.486 ms (209.711 ms / 20) 210.140 -> 209.964 ( -0.08%) [ +0.04% +0.00% +0.07% / -0.08% +1.00% +0.92%] index_add_ spread : Elapsed 10.511 ms (210.226 ms / 20) 209.878 -> 209.485 ( -0.19%) [ +0.02% +0.00% +0.21% / -0.19% +1.71% +2.12%] index_copy_ spread : Elapsed 10.496 ms (209.923 ms / 20) 209.647 -> 209.943 ( +0.14%) [ +0.21% +0.14% +0.00% / +0.14% +1.57% +1.39%] index_add_ strided 3 : Elapsed 10.504 ms (210.081 ms / 20) 209.331 -> 210.464 ( +0.54%) [ +0.00% +0.55% +0.22% / +0.54% +2.60% +2.75%] index_copy_ strided 3 : Elapsed 10.467 ms (209.331 ms / 20) 209.501 -> 210.077 ( +0.27%) [ +0.08% +0.36% +0.00% / +0.27% +1.32% +1.40%] index_add_ strided 7 : Elapsed 10.483 ms (209.662 ms / 20) 209.942 -> 209.964 ( +0.01%) [ +0.21% +0.35% +0.00% / +0.01% +1.94% +2.07%] index_copy_ strided 7 : Elapsed 10.519 ms (210.385 ms / 20) 210.363 -> 210.349 ( -0.01%) [ +0.19% +0.38% +0.00% / -0.01% +0.73% +0.90%] index_add_ perm : Elapsed 10.538 ms (210.755 ms / 20) 210.164 -> 210.280 ( +0.06%) [ +0.42% +0.51% +0.00% / +0.06% +1.87% +2.07%] index_copy_ perm : Elapsed 10.552 ms (211.039 ms / 20) 209.885 -> 210.123 ( +0.11%) [ +0.07% +0.00% +0.07% / +0.11% +1.03% +1.08%] index_add_ perm_sorted : Elapsed 10.502 ms (210.041 ms / 20) 209.633 -> 210.667 ( +0.49%) [ +0.00% +0.56% +0.23% / +0.49% +1.59% +1.87%] index_copy_ perm_sorted : Elapsed 10.482 ms (209.633 ms / 20) 469.296 -> 460.883 ( -1.79%) [ +0.00% +0.16% +0.26% / +0.13% -1.79% -1.55%] index_select const : Elapsed 23.465 ms (469.296 ms / 20) 461.576 -> 462.163 ( +0.13%) [ +0.00% +0.23% +0.25% / +0.13% +0.52% +1.04%] index_select wrap : Elapsed 23.079 ms (461.576 ms / 20) 459.249 -> 461.398 ( +0.47%) [ +0.37% +0.00% +0.55% / +0.52% +0.47% +0.54%] index_select linear : Elapsed 23.048 ms (460.951 ms / 20) 480.482 -> 477.134 ( -0.70%) [ +0.11% +0.00% +0.68% / +0.41% -0.46% -0.70%] index_select reverse : Elapsed 24.052 ms (481.031 ms / 20) 469.712 -> 462.209 ( -1.60%) [ +0.47% +0.11% +0.00% / -0.06% -1.36% -1.60%] index_select skip64 : Elapsed 23.596 ms (471.925 ms / 20) 470.134 -> 461.004 ( -1.94%) [ +0.00% +0.27% +0.15% / +0.35% -1.94% -1.75%] index_select skip256 : Elapsed 23.507 ms (470.134 ms / 20) 456.229 -> 457.286 ( +0.23%) [ +0.28% +0.00% +0.13% / +0.23% +2.48% +2.04%] index_select spread : Elapsed 22.875 ms (457.501 ms / 20) 467.232 -> 467.085 ( -0.03%) [ +0.20% +0.26% +0.00% / -0.03% +0.65% +1.04%] index_select strided 3 : Elapsed 23.408 ms (468.151 ms / 20) 472.583 -> 461.366 ( -2.37%) [ +0.16% +0.15% +0.00% / +0.02% -2.37% -0.85%] index_select strided 5 : Elapsed 23.667 ms (473.349 ms / 20) 468.936 -> 467.876 ( -0.23%) [ +0.00% +0.17% +0.30% / +0.64% -0.23% -0.06%] index_select strided 7 : Elapsed 23.447 ms (468.936 ms / 20) 460.990 -> 461.945 ( +0.21%) [ +0.00% +0.06% +0.04% / +0.21% +1.83% +2.08%] index_select strided 8 : Elapsed 23.049 ms (460.990 ms / 20) 461.077 -> 460.691 ( -0.08%) [ +0.00% +0.36% +0.24% / -0.08% +0.58% +0.74%] index_select strided 16 : Elapsed 23.054 ms (461.077 ms / 20) 470.673 -> 473.199 ( +0.54%) [ +0.00% +0.56% +0.30% / +0.54% +1.08% +1.52%] index_select random : Elapsed 23.534 ms (470.673 ms / 20) 463.115 -> 465.183 ( +0.45%) [ +0.00% +0.34% +0.23% / +0.45% +0.64% +0.96%] index_select random_sorted : Elapsed 23.156 ms (463.115 ms / 20) out_shape = [40, 256, 50, 100] in_shape = [40, 256, 50, 20] idx_dim = 3 out_shape = [100, 20, 40, 256] in_shape = [50, 20, 40, 256] idx_dim = 0 out_shape = [50, 100, 40, 256] in_shape = [50, 20, 40, 256] idx_dim = 1 out_shape = [50, 20, 100, 256] in_shape = [50, 20, 40, 256] idx_dim = 2 out_shape = [50, 20, 40, 100] in_shape = [50, 20, 40, 256] idx_dim = 3 out_shape = [100, 20, 256, 40] in_shape = [50, 20, 256, 40] idx_dim = 0 out_shape = [50, 100, 256, 40] in_shape = [50, 20, 256, 40] idx_dim = 1 out_shape = [50, 20, 100, 40] in_shape = [50, 20, 256, 40] idx_dim = 2 B = [50, 20, 100, 40] (stride (80000, 40, 800, 1)) A = [50, 20, 256, 40] (stride (40, 512000, 2000, 1)) dim = 2 34.332 -> 34.247 ( -0.25%) [ +0.03% +0.00% +0.13% / +0.15% -0.25% -0.10%] index_select const : Elapsed 1.717 ms (34.341 ms / 20) 36.682 -> 36.654 ( -0.08%) [ +0.00% +0.05% +0.05% / +0.22% +0.04% -0.08%] index_select wrap : Elapsed 1.834 ms (36.682 ms / 20) 36.732 -> 36.677 ( -0.15%) [ +0.03% +0.00% +0.06% / -0.07% -0.14% -0.15%] index_select linear : Elapsed 1.837 ms (36.743 ms / 20) 36.707 -> 36.734 ( +0.07%) [ +0.16% +0.00% +0.08% / +0.07% +0.15% +0.13%] index_select reverse : Elapsed 1.838 ms (36.764 ms / 20) 34.280 -> 34.342 ( +0.18%) [ +0.00% +0.04% +0.17% / +0.18% +0.42% +0.33%] index_select skip64 : Elapsed 1.714 ms (34.280 ms / 20) 34.343 -> 34.281 ( -0.18%) [ +0.00% +0.08% +0.03% / +0.07% -0.16% -0.18%] index_select skip256 : Elapsed 1.717 ms (34.343 ms / 20) 36.674 -> 36.704 ( +0.08%) [ +0.15% +0.07% +0.00% / +0.08% +0.59% +0.61%] index_select spread : Elapsed 1.836 ms (36.728 ms / 20) 36.748 -> 36.752 ( +0.01%) [ +0.02% +0.00% +0.09% / +0.11% +0.01% +0.10%] index_select strided 3 : Elapsed 1.838 ms (36.757 ms / 20) 36.773 -> 36.796 ( +0.06%) [ +0.03% +0.00% +0.10% / +0.13% +0.17% +0.06%] index_select strided 5 : Elapsed 1.839 ms (36.783 ms / 20) 36.867 -> 36.841 ( -0.07%) [ +0.01% +0.11% +0.00% / +0.07% -0.07% +0.07%] index_select strided 7 : Elapsed 1.844 ms (36.872 ms / 20) 36.836 -> 36.837 ( +0.00%) [ +0.04% +0.00% +0.09% / +0.00% +0.04% +0.09%] index_select strided 8 : Elapsed 1.843 ms (36.851 ms / 20) 36.893 -> 36.925 ( +0.09%) [ +0.02% +0.00% +0.09% / +0.09% +0.09% +0.10%] index_select strided 16 : Elapsed 1.845 ms (36.899 ms / 20) 36.324 -> 36.330 ( +0.02%) [ +0.10% +0.10% +0.00% / +0.02% +0.11% +0.14%] index_select strided 64 : Elapsed 1.818 ms (36.362 ms / 20) 36.744 -> 36.763 ( +0.05%) [ +0.07% +0.00% +0.17% / +0.05% +0.16% +0.18%] index_select strided 100 : Elapsed 1.838 ms (36.768 ms / 20) 36.702 -> 36.753 ( +0.14%) [ +0.00% +0.05% +0.16% / +0.14% +0.21% +0.28%] index_select strided 255 : Elapsed 1.835 ms (36.702 ms / 20) 36.648 -> 36.663 ( +0.04%) [ +0.00% +0.01% +0.08% / +0.04% +0.53% +0.62%] index_select random : Elapsed 1.832 ms (36.648 ms / 20) 36.454 -> 36.473 ( +0.05%) [ +0.00% +0.07% +0.10% / +0.05% +0.55% +0.63%] index_select random_sorted : Elapsed 1.823 ms (36.454 ms / 20) 36.864 -> 36.820 ( -0.12%) [ +0.06% +0.00% +0.01% / +0.09% -0.12% +0.09%] index_select perm : Elapsed 1.844 ms (36.887 ms / 20) 36.838 -> 36.829 ( -0.02%) [ +0.01% +0.00% +0.03% / -0.02% +0.11% +0.15%] index_select perm_sorted : Elapsed 1.842 ms (36.840 ms / 20) out_shape = [50, 20, 256, 100] in_shape = [50, 20, 256, 40] idx_dim = 3 out_shape = [100, 40, 20, 256] in_shape = [50, 40, 20, 256] idx_dim = 0 out_shape = [50, 100, 20, 256] in_shape = [50, 40, 20, 256] idx_dim = 1 B = [50, 100, 20, 256] (stride (256, 256000, 12800, 1)) A = [50, 40, 20, 256] (stride (1, 1000, 50, 40000)) dim = 1 204.465 -> 202.194 ( -1.11%) [ +0.00% +0.06% +0.00% / -0.23% -1.11% -0.65%] index_add_ linear : Elapsed 10.223 ms (204.465 ms / 20) 199.291 -> 196.663 ( -1.32%) [ +0.32% +0.16% +0.00% / -0.07% -1.32% -0.52%] index_copy_ linear : Elapsed 9.996 ms (199.920 ms / 20) 203.464 -> 201.752 ( -0.84%) [ +0.22% +0.00% +0.04% / +0.05% -0.84% -0.84%] index_add_ reverse : Elapsed 10.195 ms (203.907 ms / 20) 198.905 -> 196.405 ( -1.26%) [ +0.00% +0.33% +0.13% / -0.00% -1.26% -0.95%] index_copy_ reverse : Elapsed 9.945 ms (198.905 ms / 20) 202.589 -> 200.810 ( -0.88%) [ +0.01% +0.00% +0.06% / -0.00% -0.88% -0.64%] index_add_ spread : Elapsed 10.130 ms (202.606 ms / 20) 198.597 -> 196.040 ( -1.29%) [ +0.00% +0.12% +0.27% / +0.17% -1.29% -1.08%] index_copy_ spread : Elapsed 9.930 ms (198.597 ms / 20) 204.405 -> 201.055 ( -1.64%) [ +0.55% +0.11% +0.00% / -0.19% -1.64% -1.44%] index_add_ strided 3 : Elapsed 10.276 ms (205.526 ms / 20) 199.040 -> 195.971 ( -1.54%) [ +0.69% +0.41% +0.00% / -0.00% -1.54% -1.28%] index_copy_ strided 3 : Elapsed 10.021 ms (200.416 ms / 20) 203.680 -> 201.811 ( -0.92%) [ +0.00% +0.07% +0.09% / +0.38% -0.80% -0.92%] index_add_ strided 7 : Elapsed 10.184 ms (203.680 ms / 20) 199.022 -> 196.288 ( -1.37%) [ +0.10% +0.06% +0.00% / +0.04% -1.06% -1.37%] index_copy_ strided 7 : Elapsed 9.961 ms (199.224 ms / 20) 203.999 -> 202.072 ( -0.94%) [ +0.37% +0.00% +0.00% / +0.22% -0.25% -0.94%] index_add_ perm : Elapsed 10.238 ms (204.756 ms / 20) 199.456 -> 197.275 ( -1.09%) [ +0.01% +0.00% +0.00% / -0.35% -0.59% -1.09%] index_copy_ perm : Elapsed 9.973 ms (199.467 ms / 20) 204.499 -> 202.201 ( -1.12%) [ +0.27% +0.17% +0.00% / -0.11% -1.12% -1.12%] index_add_ perm_sorted : Elapsed 10.252 ms (205.045 ms / 20) 199.032 -> 196.533 ( -1.26%) [ +0.05% +0.24% +0.00% / -0.01% -1.05% -1.26%] index_copy_ perm_sorted : Elapsed 9.956 ms (199.126 ms / 20) 424.014 -> 424.959 ( +0.22%) [ +0.00% +0.07% +0.05% / +0.22% +0.85% +0.58%] index_select const : Elapsed 21.201 ms (424.014 ms / 20) 566.319 -> 565.171 ( -0.20%) [ +0.11% +0.00% +0.19% / +0.28% -0.20% -0.18%] index_select wrap : Elapsed 28.347 ms (566.947 ms / 20) 473.265 -> 461.944 ( -2.39%) [ +0.23% +0.09% +0.00% / -0.10% -2.04% -2.39%] index_select linear : Elapsed 23.717 ms (474.333 ms / 20) 529.001 -> 508.103 ( -3.95%) [ +0.00% +0.17% +0.20% / +0.28% -3.77% -3.95%] index_select reverse : Elapsed 26.450 ms (529.001 ms / 20) 423.861 -> 424.550 ( +0.16%) [ +0.00% +0.36% +0.19% / +0.16% +0.76% +0.87%] index_select skip64 : Elapsed 21.193 ms (423.861 ms / 20) 424.152 -> 425.202 ( +0.25%) [ +0.29% +0.00% +0.26% / +0.25% +0.52% +0.83%] index_select skip256 : Elapsed 21.269 ms (425.389 ms / 20) 526.330 -> 525.568 ( -0.14%) [ +0.27% +0.30% +0.00% / +0.10% -0.09% -0.14%] index_select spread : Elapsed 26.387 ms (527.742 ms / 20) 572.649 -> 572.029 ( -0.11%) [ +0.08% +0.08% +0.00% / -0.11% +0.25% +0.28%] index_select strided 3 : Elapsed 28.654 ms (573.087 ms / 20) 569.102 -> 565.451 ( -0.64%) [ +0.44% +0.00% +0.01% / -0.32% -0.61% -0.64%] index_select strided 5 : Elapsed 28.580 ms (571.597 ms / 20) 572.337 -> 568.315 ( -0.70%) [ +0.04% +0.00% +0.35% / -0.03% -0.64% -0.70%] index_select strided 7 : Elapsed 28.627 ms (572.538 ms / 20) 564.287 -> 559.414 ( -0.86%) [ +0.26% +0.11% +0.00% / +0.51% -0.52% -0.86%] index_select strided 8 : Elapsed 28.288 ms (565.755 ms / 20) 564.041 -> 557.236 ( -1.21%) [ +0.11% +0.14% +0.00% / -0.45% -1.21% -0.89%] index_select strided 16 : Elapsed 28.233 ms (564.663 ms / 20) 563.967 -> 560.213 ( -0.67%) [ +0.36% +0.18% +0.00% / +0.28% -0.46% -0.67%] index_select random : Elapsed 28.300 ms (566.001 ms / 20) 513.586 -> 513.231 ( -0.07%) [ +0.11% +0.00% +0.03% / -0.07% +0.38% +0.33%] index_select random_sorted : Elapsed 25.708 ms (514.168 ms / 20) out_shape = [50, 40, 100, 256] in_shape = [50, 40, 20, 256] idx_dim = 2 out_shape = [50, 40, 20, 100] in_shape = [50, 40, 20, 256] idx_dim = 3 B = [50, 40, 20, 100] (stride (80000, 1, 4000, 40)) A = [50, 40, 20, 256] (stride (1, 1000, 50, 40000)) dim = 3 55.202 -> 55.258 ( +0.10%) [ +0.00% +0.30% +0.63% / +0.10% +0.65% +1.25%] index_select const : Elapsed 2.760 ms (55.202 ms / 20) 69.586 -> 69.499 ( -0.13%) [ +0.28% +0.25% +0.00% / -0.13% +0.58% +0.26%] index_select wrap : Elapsed 3.489 ms (69.778 ms / 20) 69.645 -> 69.753 ( +0.16%) [ +0.00% +0.23% +0.03% / +0.16% +0.46% +0.70%] index_select linear : Elapsed 3.482 ms (69.645 ms / 20) 69.603 -> 69.435 ( -0.24%) [ +0.13% +0.00% +0.44% / -0.24% +0.45% +0.73%] index_select reverse : Elapsed 3.485 ms (69.694 ms / 20) 55.763 -> 55.743 ( -0.04%) [ +0.27% +0.00% +0.41% / -0.04% +1.38% +1.26%] index_select skip64 : Elapsed 2.796 ms (55.913 ms / 20) 55.333 -> 55.236 ( -0.18%) [ +0.24% +0.25% +0.00% / -0.18% +0.94% +1.03%] index_select skip256 : Elapsed 2.773 ms (55.465 ms / 20) 70.361 -> 69.598 ( -1.08%) [ +0.00% +0.58% +0.28% / +0.22% -0.87% -1.08%] index_select spread : Elapsed 3.518 ms (70.361 ms / 20) 69.263 -> 69.525 ( +0.38%) [ +0.26% +0.27% +0.00% / +0.38% +1.92% +1.59%] index_select strided 3 : Elapsed 3.472 ms (69.442 ms / 20) 69.937 -> 69.953 ( +0.02%) [ +0.02% +0.03% +0.00% / +0.02% +1.15% +0.91%] index_select strided 5 : Elapsed 3.498 ms (69.952 ms / 20) 68.678 -> 68.767 ( +0.13%) [ +0.26% +0.57% +0.00% / +0.13% +1.80% +1.85%] index_select strided 7 : Elapsed 3.443 ms (68.857 ms / 20) 70.880 -> 69.482 ( -1.97%) [ +0.42% +0.53% +0.00% / +0.33% -1.81% -1.97%] index_select strided 8 : Elapsed 3.559 ms (71.177 ms / 20) 70.766 -> 69.882 ( -1.25%) [ +0.00% +0.34% +0.10% / +0.02% -0.77% -1.25%] index_select strided 16 : Elapsed 3.538 ms (70.766 ms / 20) 66.626 -> 64.087 ( -3.81%) [ +0.28% +0.02% +0.00% / +0.02% -3.69% -3.81%] index_select strided 64 : Elapsed 3.341 ms (66.814 ms / 20) 69.483 -> 68.780 ( -1.01%) [ +0.38% +0.65% +0.00% / +0.50% -1.01% -0.98%] index_select strided 100 : Elapsed 3.487 ms (69.745 ms / 20) 70.151 -> 69.701 ( -0.64%) [ +0.02% +0.00% +0.04% / -0.30% +0.02% -0.64%] index_select strided 255 : Elapsed 3.508 ms (70.168 ms / 20) 70.720 -> 70.477 ( -0.34%) [ +0.14% +0.00% +0.36% / +0.05% -0.02% -0.34%] index_select random : Elapsed 3.541 ms (70.818 ms / 20) 69.264 -> 68.767 ( -0.72%) [ +0.00% +0.16% +0.39% / +0.37% -0.39% -0.72%] index_select random_sorted : Elapsed 3.463 ms (69.264 ms / 20) 69.045 -> 69.026 ( -0.03%) [ +0.24% +0.09% +0.00% / -0.03% +1.13% +1.39%] index_select perm : Elapsed 3.461 ms (69.210 ms / 20) 69.590 -> 69.347 ( -0.35%) [ +0.12% +0.28% +0.00% / -0.16% -0.14% -0.35%] index_select perm_sorted : Elapsed 3.484 ms (69.675 ms / 20) out_shape = [100, 40, 256, 20] in_shape = [50, 40, 256, 20] idx_dim = 0 out_shape = [50, 100, 256, 20] in_shape = [50, 40, 256, 20] idx_dim = 1 out_shape = [50, 40, 100, 20] in_shape = [50, 40, 256, 20] idx_dim = 2 out_shape = [50, 40, 256, 100] in_shape = [50, 40, 256, 20] idx_dim = 3 out_shape = [100, 256, 20, 40] in_shape = [50, 256, 20, 40] idx_dim = 0 out_shape = [50, 100, 20, 40] in_shape = [50, 256, 20, 40] idx_dim = 1 B = [50, 100, 20, 40] (stride (20, 1000, 1, 100000)) A = [50, 256, 20, 40] (stride (204800, 40, 10240, 1)) dim = 1 69.822 -> 67.908 ( -2.74%) [ +0.14% +0.00% +0.13% / -0.22% -2.74% -2.67%] index_select const : Elapsed 3.496 ms (69.919 ms / 20) 69.402 -> 69.336 ( -0.10%) [ +0.40% +0.23% +0.00% / +0.18% +0.19% -0.10%] index_select wrap : Elapsed 3.484 ms (69.680 ms / 20) 69.366 -> 69.341 ( -0.04%) [ +0.05% +0.00% +0.19% / +0.17% +0.11% -0.04%] index_select linear : Elapsed 3.470 ms (69.402 ms / 20) 69.251 -> 69.426 ( +0.25%) [ +0.38% +0.32% +0.00% / +0.25% +1.85% +1.72%] index_select reverse : Elapsed 3.476 ms (69.516 ms / 20) 69.633 -> 67.607 ( -2.91%) [ +0.08% +0.05% +0.00% / +0.26% -2.91% -2.61%] index_select skip64 : Elapsed 3.484 ms (69.690 ms / 20) 69.611 -> 67.632 ( -2.84%) [ +0.05% +0.00% +0.39% / -0.01% -2.40% -2.84%] index_select skip256 : Elapsed 3.482 ms (69.649 ms / 20) 70.169 -> 70.062 ( -0.15%) [ +0.15% +0.10% +0.00% / +0.17% -0.02% -0.15%] index_select spread : Elapsed 3.514 ms (70.272 ms / 20) 70.242 -> 69.890 ( -0.50%) [ +0.02% +0.00% +0.28% / -0.28% -0.34% -0.50%] index_select strided 3 : Elapsed 3.513 ms (70.256 ms / 20) 69.688 -> 69.710 ( +0.03%) [ +0.24% +0.00% +0.21% / +0.03% +0.38% +0.47%] index_select strided 5 : Elapsed 3.493 ms (69.852 ms / 20) 69.951 -> 69.829 ( -0.17%) [ +0.00% +0.17% +0.06% / +0.21% +0.22% -0.17%] index_select strided 7 : Elapsed 3.498 ms (69.951 ms / 20) 70.103 -> 69.661 ( -0.63%) [ +0.08% +0.17% +0.00% / -0.32% -0.50% -0.63%] index_select strided 8 : Elapsed 3.508 ms (70.159 ms / 20) 69.646 -> 69.726 ( +0.11%) [ +0.15% +0.07% +0.00% / +0.18% +0.41% +0.11%] index_select strided 16 : Elapsed 3.488 ms (69.752 ms / 20) 70.322 -> 69.135 ( -1.69%) [ +0.10% +0.00% +0.11% / -0.04% -1.69% -1.46%] index_select strided 64 : Elapsed 3.520 ms (70.391 ms / 20) 70.192 -> 69.752 ( -0.63%) [ +0.00% +0.13% +0.23% / +0.18% -0.40% -0.63%] index_select strided 100 : Elapsed 3.510 ms (70.192 ms / 20) 69.862 -> 69.753 ( -0.16%) [ +0.03% +0.00% +0.14% / -0.16% +0.16% -0.06%] index_select strided 255 : Elapsed 3.494 ms (69.883 ms / 20) 69.894 -> 69.638 ( -0.37%) [ +0.19% +0.50% +0.00% / +0.14% -0.37% -0.16%] index_select random : Elapsed 3.501 ms (70.027 ms / 20) 69.743 -> 69.383 ( -0.52%) [ +0.48% +0.00% +0.44% / -0.12% -0.52% +0.18%] index_select random_sorted : Elapsed 3.504 ms (70.081 ms / 20) 69.645 -> 69.631 ( -0.02%) [ +0.32% +0.00% +0.31% / -0.02% +0.46% +0.66%] index_select perm : Elapsed 3.493 ms (69.868 ms / 20) 70.150 -> 69.839 ( -0.44%) [ +0.42% +0.42% +0.00% / +0.15% -0.44% +0.22%] index_select perm_sorted : Elapsed 3.522 ms (70.447 ms / 20) out_shape = [50, 256, 100, 40] in_shape = [50, 256, 20, 40] idx_dim = 2 out_shape = [50, 256, 20, 100] in_shape = [50, 256, 20, 40] idx_dim = 3 out_shape = [100, 256, 40, 20] in_shape = [50, 256, 40, 20] idx_dim = 0 out_shape = [50, 100, 40, 20] in_shape = [50, 256, 40, 20] idx_dim = 1 out_shape = [50, 256, 100, 20] in_shape = [50, 256, 40, 20] idx_dim = 2 out_shape = [50, 256, 40, 100] in_shape = [50, 256, 40, 20] idx_dim = 3 out_shape = [100, 20, 40, 50] in_shape = [256, 20, 40, 50] idx_dim = 0 out_shape = [256, 100, 40, 50] in_shape = [256, 20, 40, 50] idx_dim = 1 out_shape = [256, 20, 100, 50] in_shape = [256, 20, 40, 50] idx_dim = 2 out_shape = [256, 20, 40, 100] in_shape = [256, 20, 40, 50] idx_dim = 3 out_shape = [100, 20, 50, 40] in_shape = [256, 20, 50, 40] idx_dim = 0 out_shape = [256, 100, 50, 40] in_shape = [256, 20, 50, 40] idx_dim = 1 B = [256, 100, 50, 40] (stride (200000, 1, 4000, 100)) A = [256, 20, 50, 40] (stride (1, 512000, 256, 12800)) dim = 1 482.643 -> 481.490 ( -0.24%) [ +0.16% +0.00% +0.06% / -0.01% -0.24% -0.24%] index_add_ linear : Elapsed 24.170 ms (483.397 ms / 20) 389.119 -> 388.045 ( -0.28%) [ +0.17% +0.12% +0.00% / -0.03% -0.28% -0.17%] index_copy_ linear : Elapsed 19.488 ms (389.764 ms / 20) 481.626 -> 482.457 ( +0.17%) [ +0.32% +0.00% +0.38% / +0.17% +0.60% +0.38%] index_add_ reverse : Elapsed 24.159 ms (483.172 ms / 20) 388.660 -> 388.700 ( +0.01%) [ +0.13% +0.08% +0.00% / +0.01% +0.20% +0.05%] index_copy_ reverse : Elapsed 19.458 ms (389.165 ms / 20) 481.815 -> 481.386 ( -0.09%) [ +0.14% +0.05% +0.00% / +0.08% -0.09% +0.06%] index_add_ spread : Elapsed 24.125 ms (482.495 ms / 20) 388.960 -> 388.787 ( -0.04%) [ +0.14% +0.02% +0.00% / -0.04% +0.02% +0.05%] index_copy_ spread : Elapsed 19.476 ms (389.520 ms / 20) 481.505 -> 479.765 ( -0.36%) [ +0.00% +0.02% +0.15% / +0.11% -0.36% -0.03%] index_add_ strided 3 : Elapsed 24.075 ms (481.505 ms / 20) 388.300 -> 388.502 ( +0.05%) [ +0.11% +0.00% +0.10% / +0.05% +0.33% +0.27%] index_copy_ strided 3 : Elapsed 19.437 ms (388.737 ms / 20) 481.606 -> 480.517 ( -0.23%) [ +0.04% +0.01% +0.00% / +0.02% -0.17% -0.23%] index_add_ strided 7 : Elapsed 24.090 ms (481.808 ms / 20) 387.979 -> 388.475 ( +0.13%) [ +0.00% +0.11% +0.06% / +0.13% +0.20% +0.26%] index_copy_ strided 7 : Elapsed 19.399 ms (387.979 ms / 20) 479.481 -> 480.729 ( +0.26%) [ +0.26% +0.00% +0.07% / +0.26% +0.28% +0.30%] index_add_ perm : Elapsed 24.036 ms (480.722 ms / 20) 386.272 -> 387.063 ( +0.20%) [ +0.28% +0.00% +0.21% / +0.20% +0.77% +0.85%] index_copy_ perm : Elapsed 19.368 ms (387.366 ms / 20) 479.325 -> 481.063 ( +0.36%) [ +0.46% +0.00% +0.38% / +0.44% +0.36% +0.64%] index_add_ perm_sorted : Elapsed 24.077 ms (481.536 ms / 20) 386.596 -> 386.498 ( -0.03%) [ +0.07% +0.00% +0.06% / -0.03% +0.61% +0.67%] index_copy_ perm_sorted : Elapsed 19.343 ms (386.858 ms / 20) 1971.007 -> 1969.401 ( -0.08%) [ +0.04% +0.00% +0.05% / +0.02% -0.08% -0.08%] index_select const : Elapsed 98.593 ms (1971.861 ms / 20) 2063.420 -> 2063.210 ( -0.01%) [ +0.06% +0.00% +0.06% / -0.01% +0.09% +0.11%] index_select wrap : Elapsed 103.231 ms (2064.627 ms / 20) 1974.862 -> 1974.524 ( -0.02%) [ +0.00% +0.03% +0.00% / +0.01% -0.02% -0.02%] index_select linear : Elapsed 98.743 ms (1974.867 ms / 20) 1995.658 -> 1995.651 ( -0.00%) [ +0.00% +0.02% +0.03% / +0.01% -0.00% +0.02%] index_select reverse : Elapsed 99.783 ms (1995.658 ms / 20) 1971.463 -> 1968.223 ( -0.16%) [ +0.00% +0.01% +0.01% / -0.03% -0.14% -0.16%] index_select skip64 : Elapsed 98.573 ms (1971.463 ms / 20) 1970.565 -> 1970.377 ( -0.01%) [ +0.03% +0.03% +0.00% / +0.03% +0.02% -0.01%] index_select skip256 : Elapsed 98.555 ms (1971.098 ms / 20) 1999.919 -> 1999.787 ( -0.01%) [ +0.07% +0.02% +0.00% / +0.01% +0.04% -0.01%] index_select spread : Elapsed 100.070 ms (2001.396 ms / 20) 2058.749 -> 2059.996 ( +0.06%) [ +0.05% +0.01% +0.00% / +0.06% +0.30% +0.38%] index_select strided 3 : Elapsed 102.993 ms (2059.868 ms / 20) 2059.005 -> 2059.758 ( +0.04%) [ +0.03% +0.00% +0.00% / +0.04% +0.06% +0.05%] index_select strided 5 : Elapsed 102.976 ms (2059.526 ms / 20) 2064.556 -> 2060.913 ( -0.18%) [ +0.02% +0.02% +0.00% / +0.01% -0.14% -0.18%] index_select strided 7 : Elapsed 103.247 ms (2064.940 ms / 20) 2062.994 -> 2061.513 ( -0.07%) [ +0.00% +0.01% +0.01% / +0.01% -0.07% -0.06%] index_select strided 8 : Elapsed 103.150 ms (2062.994 ms / 20) 2062.747 -> 2063.169 ( +0.02%) [ +0.02% +0.00% +0.03% / +0.02% +0.04% +0.06%] index_select strided 16 : Elapsed 103.154 ms (2063.073 ms / 20) 2053.696 -> 2054.093 ( +0.02%) [ +0.00% +0.02% +0.04% / +0.02% +0.19% +0.21%] index_select random : Elapsed 102.685 ms (2053.696 ms / 20) 1997.813 -> 1997.716 ( -0.00%) [ +0.01% +0.00% +0.00% / -0.00% +0.01% +0.06%] index_select random_sorted : Elapsed 99.897 ms (1997.938 ms / 20) B = [256, 100, 50, 40] (stride (5000, 1, 100, 1280000)) A = [256, 20, 50, 40] (stride (2000, 512000, 40, 1)) dim = 1 445.178 -> 444.745 ( -0.10%) [ +0.01% +0.01% +0.00% / -0.01% -0.10% -0.08%] index_add_ linear : Elapsed 22.262 ms (445.231 ms / 20) 297.706 -> 297.398 ( -0.10%) [ +0.01% +0.01% +0.00% / +0.02% -0.10% -0.10%] index_copy_ linear : Elapsed 14.886 ms (297.728 ms / 20) 445.294 -> 444.915 ( -0.09%) [ +0.03% +0.00% +0.02% / +0.00% -0.09% -0.07%] index_add_ reverse : Elapsed 22.271 ms (445.418 ms / 20) 297.761 -> 297.394 ( -0.12%) [ +0.00% +0.04% +0.02% / +0.00% -0.12% -0.07%] index_copy_ reverse : Elapsed 14.888 ms (297.761 ms / 20) 444.999 -> 445.090 ( +0.02%) [ +0.01% +0.01% +0.00% / +0.02% +0.04% +0.05%] index_add_ spread : Elapsed 22.252 ms (445.039 ms / 20) 297.533 -> 297.569 ( +0.01%) [ +0.03% +0.00% +0.05% / +0.03% +0.01% +0.02%] index_copy_ spread : Elapsed 14.881 ms (297.620 ms / 20) 445.096 -> 444.961 ( -0.03%) [ +0.00% +0.02% +0.04% / -0.03% +0.03% +0.02%] index_add_ strided 3 : Elapsed 22.255 ms (445.096 ms / 20) 297.739 -> 297.585 ( -0.05%) [ +0.00% +0.01% +0.03% / -0.00% -0.04% -0.05%] index_copy_ strided 3 : Elapsed 14.887 ms (297.739 ms / 20) 444.937 -> 444.894 ( -0.01%) [ +0.00% +0.02% +0.00% / -0.01% +0.06% +0.04%] index_add_ strided 7 : Elapsed 22.247 ms (444.937 ms / 20) 297.620 -> 297.524 ( -0.03%) [ +0.00% +0.01% +0.00% / +0.01% -0.03% +0.00%] index_copy_ strided 7 : Elapsed 14.881 ms (297.620 ms / 20) 444.825 -> 445.039 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.12% +0.08%] index_add_ perm : Elapsed 22.241 ms (444.825 ms / 20) 297.433 -> 297.467 ( +0.01%) [ +0.01% +0.00% +0.01% / +0.01% +0.07% +0.05%] index_copy_ perm : Elapsed 14.873 ms (297.459 ms / 20) 444.881 -> 444.924 ( +0.01%) [ +0.00% +0.04% +0.00% / +0.01% +0.08% +0.09%] index_add_ perm_sorted : Elapsed 22.244 ms (444.887 ms / 20) 297.389 -> 297.412 ( +0.01%) [ +0.00% +0.03% +0.04% / +0.01% +0.10% +0.11%] index_copy_ perm_sorted : Elapsed 14.869 ms (297.389 ms / 20) 1485.786 -> 1486.073 ( +0.02%) [ +0.01% +0.00% +0.00% / +0.02% +0.03% +0.03%] index_select const : Elapsed 74.297 ms (1485.943 ms / 20) 1486.999 -> 1487.008 ( +0.00%) [ +0.01% +0.01% +0.00% / +0.00% +0.00% +0.01%] index_select wrap : Elapsed 74.359 ms (1487.179 ms / 20) 1485.707 -> 1485.767 ( +0.00%) [ +0.01% +0.00% +0.00% / +0.00% +0.01% +0.00%] index_select linear : Elapsed 74.289 ms (1485.787 ms / 20) 1486.805 -> 1486.756 ( -0.00%) [ +0.00% +0.00% +0.01% / +0.01% +0.01% -0.00%] index_select reverse : Elapsed 74.340 ms (1486.805 ms / 20) 1485.614 -> 1485.721 ( +0.01%) [ +0.02% +0.00% +0.00% / +0.01% +0.06% +0.05%] index_select skip64 : Elapsed 74.292 ms (1485.849 ms / 20) 1485.573 -> 1485.764 ( +0.01%) [ +0.00% +0.01% +0.01% / +0.01% +0.05% +0.06%] index_select skip256 : Elapsed 74.279 ms (1485.573 ms / 20) 1485.847 -> 1485.864 ( +0.00%) [ +0.01% +0.00% +0.00% / +0.00% +0.02% +0.01%] index_select spread : Elapsed 74.296 ms (1485.923 ms / 20) 1486.909 -> 1486.925 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.03% +0.01%] index_select strided 3 : Elapsed 74.348 ms (1486.959 ms / 20) 1486.879 -> 1486.967 ( +0.01%) [ +0.02% +0.00% +0.01% / +0.02% +0.01% +0.02%] index_select strided 5 : Elapsed 74.357 ms (1487.138 ms / 20) 1487.010 -> 1486.676 ( -0.02%) [ +0.01% +0.00% +0.00% / +0.01% -0.01% -0.02%] index_select strided 7 : Elapsed 74.361 ms (1487.228 ms / 20) 1487.050 -> 1487.049 ( -0.00%) [ +0.01% +0.00% +0.00% / -0.00% +0.02% +0.02%] index_select strided 8 : Elapsed 74.357 ms (1487.141 ms / 20) 1486.940 -> 1486.821 ( -0.01%) [ +0.00% +0.01% +0.00% / -0.01% +0.01% +0.01%] index_select strided 16 : Elapsed 74.349 ms (1486.981 ms / 20) 1486.917 -> 1487.115 ( +0.01%) [ +0.00% +0.00% +0.00% / +0.01% +0.01% +0.03%] index_select random : Elapsed 74.346 ms (1486.917 ms / 20) 1485.773 -> 1485.735 ( -0.00%) [ +0.00% +0.00% +0.01% / -0.00% +0.02% +0.01%] index_select random_sorted : Elapsed 74.289 ms (1485.773 ms / 20) out_shape = [256, 20, 100, 40] in_shape = [256, 20, 50, 40] idx_dim = 2 out_shape = [256, 20, 50, 100] in_shape = [256, 20, 50, 40] idx_dim = 3 B = [256, 20, 50, 100] (stride (50, 1280000, 1, 12800)) A = [256, 20, 50, 40] (stride (40000, 40, 800, 1)) dim = 3 257.386 -> 257.400 ( +0.01%) [ +0.07% +0.00% +0.03% / +0.01% +0.03% +0.02%] index_add_ linear : Elapsed 12.879 ms (257.577 ms / 20) 250.174 -> 250.100 ( -0.03%) [ +0.01% +0.01% +0.00% / -0.03% -0.01% -0.00%] index_copy_ linear : Elapsed 12.510 ms (250.202 ms / 20) 257.488 -> 257.396 ( -0.04%) [ +0.00% +0.02% +0.03% / -0.04% +0.04% -0.01%] index_add_ reverse : Elapsed 12.874 ms (257.488 ms / 20) 250.107 -> 250.011 ( -0.04%) [ +0.01% +0.00% +0.03% / -0.04% +0.10% +0.02%] index_copy_ reverse : Elapsed 12.506 ms (250.126 ms / 20) 257.520 -> 257.499 ( -0.01%) [ +0.05% +0.00% +0.02% / -0.00% +0.02% -0.01%] index_add_ spread : Elapsed 12.882 ms (257.638 ms / 20) 250.209 -> 250.132 ( -0.03%) [ +0.05% +0.00% +0.04% / +0.02% +0.04% -0.03%] index_copy_ spread : Elapsed 12.517 ms (250.330 ms / 20) 257.389 -> 257.391 ( +0.00%) [ +0.00% +0.06% +0.05% / +0.00% +0.06% +0.13%] index_add_ strided 3 : Elapsed 12.869 ms (257.389 ms / 20) 250.157 -> 250.181 ( +0.01%) [ +0.00% +0.04% +0.05% / +0.04% +0.01% +0.08%] index_copy_ strided 3 : Elapsed 12.508 ms (250.157 ms / 20) 257.441 -> 257.504 ( +0.02%) [ +0.04% +0.00% +0.03% / +0.02% +0.04% +0.07%] index_add_ strided 7 : Elapsed 12.877 ms (257.542 ms / 20) 250.190 -> 250.317 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.06% +0.11%] index_copy_ strided 7 : Elapsed 12.516 ms (250.316 ms / 20) 257.381 -> 257.434 ( +0.02%) [ +0.00% +0.05% +0.06% / +0.02% +0.12% +0.08%] index_add_ perm : Elapsed 12.869 ms (257.381 ms / 20) 250.114 -> 250.164 ( +0.02%) [ +0.00% +0.06% +0.06% / +0.02% +0.13% +0.08%] index_copy_ perm : Elapsed 12.506 ms (250.114 ms / 20) 257.518 -> 257.450 ( -0.03%) [ +0.01% +0.00% +0.01% / -0.03% +0.04% +0.01%] index_add_ perm_sorted : Elapsed 12.878 ms (257.554 ms / 20) 250.142 -> 250.216 ( +0.03%) [ +0.02% +0.02% +0.00% / +0.05% +0.10% +0.03%] index_copy_ perm_sorted : Elapsed 12.510 ms (250.198 ms / 20) 625.769 -> 626.130 ( +0.06%) [ +0.00% +0.04% +0.00% / +0.06% +0.09% +0.09%] index_select const : Elapsed 31.289 ms (625.781 ms / 20) 626.064 -> 626.365 ( +0.05%) [ +0.00% +0.02% +0.05% / +0.10% +0.05% +0.06%] index_select wrap : Elapsed 31.303 ms (626.064 ms / 20) 625.777 -> 625.734 ( -0.01%) [ +0.08% +0.05% +0.00% / -0.01% +0.06% +0.04%] index_select linear : Elapsed 31.313 ms (626.259 ms / 20) 625.778 -> 625.973 ( +0.03%) [ +0.00% +0.04% +0.09% / +0.08% +0.05% +0.03%] index_select reverse : Elapsed 31.289 ms (625.778 ms / 20) 625.777 -> 625.565 ( -0.03%) [ +0.04% +0.00% +0.00% / -0.03% +0.12% +0.07%] index_select skip64 : Elapsed 31.302 ms (626.049 ms / 20) 625.640 -> 625.727 ( +0.01%) [ +0.00% +0.08% +0.08% / +0.01% +0.15% +0.16%] index_select skip256 : Elapsed 31.282 ms (625.640 ms / 20) 625.589 -> 625.885 ( +0.05%) [ +0.00% +0.05% +0.04% / +0.05% +0.09% +0.05%] index_select spread : Elapsed 31.279 ms (625.589 ms / 20) 626.819 -> 627.099 ( +0.04%) [ +0.03% +0.00% +0.01% / +0.09% +0.04% +0.10%] index_select strided 3 : Elapsed 31.349 ms (626.989 ms / 20) 627.141 -> 627.058 ( -0.01%) [ +0.07% +0.00% +0.00% / -0.01% +0.10% +0.03%] index_select strided 5 : Elapsed 31.378 ms (627.569 ms / 20) 627.716 -> 628.021 ( +0.05%) [ +0.02% +0.00% +0.03% / +0.05% +0.10% +0.08%] index_select strided 7 : Elapsed 31.393 ms (627.863 ms / 20) 628.033 -> 628.257 ( +0.04%) [ +0.04% +0.00% +0.06% / +0.04% +0.08% +0.10%] index_select strided 8 : Elapsed 31.413 ms (628.257 ms / 20) 628.025 -> 628.347 ( +0.05%) [ +0.00% +0.03% +0.05% / +0.08% +0.11% +0.05%] index_select strided 16 : Elapsed 31.401 ms (628.025 ms / 20) 627.170 -> 627.480 ( +0.05%) [ +0.00% +0.02% +0.07% / +0.05% +0.11% +0.08%] index_select random : Elapsed 31.359 ms (627.170 ms / 20) 625.809 -> 625.716 ( -0.01%) [ +0.03% +0.02% +0.00% / -0.01% +0.03% +0.01%] index_select random_sorted : Elapsed 31.298 ms (625.968 ms / 20) out_shape = [100, 40, 20, 50] in_shape = [256, 40, 20, 50] idx_dim = 0 out_shape = [256, 100, 20, 50] in_shape = [256, 40, 20, 50] idx_dim = 1 out_shape = [256, 40, 100, 50] in_shape = [256, 40, 20, 50] idx_dim = 2 out_shape = [256, 40, 20, 100] in_shape = [256, 40, 20, 50] idx_dim = 3 out_shape = [100, 40, 50, 20] in_shape = [256, 40, 50, 20] idx_dim = 0 out_shape = [256, 100, 50, 20] in_shape = [256, 40, 50, 20] idx_dim = 1 out_shape = [256, 40, 100, 20] in_shape = [256, 40, 50, 20] idx_dim = 2 out_shape = [256, 40, 50, 100] in_shape = [256, 40, 50, 20] idx_dim = 3 out_shape = [100, 50, 20, 40] in_shape = [256, 50, 20, 40] idx_dim = 0 B = [100, 50, 20, 40] (stride (1, 80000, 100, 2000)) A = [256, 50, 20, 40] (stride (40000, 1, 2000, 50)) dim = 0 103.166 -> 103.199 ( +0.03%) [ +0.00% +0.43% +0.39% / +0.29% +0.03% +0.14%] index_select const : Elapsed 5.158 ms (103.166 ms / 20) 119.610 -> 119.674 ( +0.05%) [ +0.13% +0.14% +0.00% / +0.15% +0.08% +0.05%] index_select wrap : Elapsed 5.988 ms (119.767 ms / 20) 119.716 -> 119.636 ( -0.07%) [ +0.21% +0.04% +0.00% / +0.08% -0.01% -0.07%] index_select linear : Elapsed 5.998 ms (119.966 ms / 20) 119.022 -> 119.293 ( +0.23%) [ +0.18% +0.00% +0.08% / +0.23% +0.65% +0.56%] index_select reverse : Elapsed 5.962 ms (119.235 ms / 20) 103.729 -> 103.236 ( -0.48%) [ +0.15% +0.05% +0.00% / +0.12% -0.29% -0.48%] index_select skip64 : Elapsed 5.194 ms (103.884 ms / 20) 103.253 -> 102.815 ( -0.42%) [ +0.17% +0.15% +0.00% / +0.20% -0.25% -0.42%] index_select skip256 : Elapsed 5.171 ms (103.426 ms / 20) 120.114 -> 119.853 ( -0.22%) [ +0.22% +0.00% +0.07% / +0.27% -0.22% -0.20%] index_select spread : Elapsed 6.019 ms (120.377 ms / 20) 119.753 -> 119.950 ( +0.16%) [ +0.15% +0.00% +0.35% / +0.19% +0.16% +0.21%] index_select strided 3 : Elapsed 5.997 ms (119.935 ms / 20) 119.489 -> 119.647 ( +0.13%) [ +0.00% +0.21% +0.02% / +0.13% +0.36% +0.36%] index_select strided 5 : Elapsed 5.974 ms (119.489 ms / 20) 120.024 -> 119.642 ( -0.32%) [ +0.23% +0.00% +0.04% / +0.27% -0.08% -0.32%] index_select strided 7 : Elapsed 6.015 ms (120.303 ms / 20) 120.243 -> 119.311 ( -0.78%) [ +0.16% +0.17% +0.00% / -0.16% -0.78% -0.52%] index_select strided 8 : Elapsed 6.022 ms (120.440 ms / 20) 120.099 -> 120.018 ( -0.07%) [ +0.00% +0.11% +0.21% / +0.02% -0.02% -0.07%] index_select strided 16 : Elapsed 6.005 ms (120.099 ms / 20) 118.144 -> 117.385 ( -0.64%) [ +0.00% +0.05% +0.31% / +0.13% -0.64% -0.43%] index_select strided 64 : Elapsed 5.907 ms (118.144 ms / 20) 120.253 -> 120.392 ( +0.12%) [ +0.11% +0.08% +0.00% / +0.19% +0.19% +0.12%] index_select strided 100 : Elapsed 6.019 ms (120.384 ms / 20) 119.444 -> 119.827 ( +0.32%) [ +0.00% +0.30% +0.24% / +0.39% +0.32% +0.34%] index_select strided 255 : Elapsed 5.972 ms (119.444 ms / 20) 119.968 -> 119.920 ( -0.04%) [ +0.04% +0.00% +0.03% / +0.11% -0.04% +0.04%] index_select random : Elapsed 6.001 ms (120.017 ms / 20) 117.783 -> 117.777 ( -0.01%) [ +0.00% +0.22% +0.38% / +0.20% +0.17% -0.01%] index_select random_sorted : Elapsed 5.889 ms (117.783 ms / 20) 120.185 -> 119.977 ( -0.17%) [ +0.06% +0.00% +0.27% / +0.17% -0.09% -0.17%] index_select perm : Elapsed 6.013 ms (120.262 ms / 20) 120.034 -> 119.862 ( -0.14%) [ +0.33% +0.00% +0.03% / +0.04% -0.01% -0.14%] index_select perm_sorted : Elapsed 6.021 ms (120.426 ms / 20) B = [100, 50, 20, 40] (stride (20, 2000, 1, 100000)) A = [256, 50, 20, 40] (stride (40000, 800, 1, 20)) dim = 0 70.604 -> 70.645 ( +0.06%) [ +0.00% +0.08% +0.02% / +0.06% +0.75% +0.83%] index_select const : Elapsed 3.530 ms (70.604 ms / 20) 79.159 -> 78.816 ( -0.43%) [ +0.61% +0.00% +0.62% / +0.23% -0.29% -0.43%] index_select wrap : Elapsed 3.982 ms (79.645 ms / 20) 79.479 -> 78.745 ( -0.92%) [ +0.54% +0.48% +0.00% / +0.28% -0.71% -0.92%] index_select linear : Elapsed 3.995 ms (79.906 ms / 20) 79.136 -> 78.510 ( -0.79%) [ +0.01% +0.35% +0.00% / -0.20% -0.79% -0.64%] index_select reverse : Elapsed 3.957 ms (79.142 ms / 20) 70.946 -> 70.960 ( +0.02%) [ +0.09% +0.14% +0.00% / +0.02% +0.11% +0.48%] index_select skip64 : Elapsed 3.551 ms (71.012 ms / 20) 70.446 -> 70.834 ( +0.55%) [ +0.33% +0.00% +0.08% / +0.55% +1.14% +0.91%] index_select skip256 : Elapsed 3.534 ms (70.678 ms / 20) 79.206 -> 78.609 ( -0.75%) [ +0.41% +0.00% +0.06% / +0.27% -0.42% -0.75%] index_select spread : Elapsed 3.976 ms (79.528 ms / 20) 78.702 -> 78.521 ( -0.23%) [ +0.46% +0.34% +0.00% / -0.23% +0.42% +0.44%] index_select strided 3 : Elapsed 3.953 ms (79.062 ms / 20) 79.128 -> 78.847 ( -0.36%) [ +0.09% +0.30% +0.00% / +0.45% -0.22% -0.36%] index_select strided 5 : Elapsed 3.960 ms (79.202 ms / 20) 80.732 -> 78.427 ( -2.86%) [ +0.11% +0.17% +0.00% / +0.34% -2.86% -2.81%] index_select strided 7 : Elapsed 4.041 ms (80.823 ms / 20) 79.945 -> 79.191 ( -0.94%) [ +0.02% +0.20% +0.00% / +0.16% -0.65% -0.94%] index_select strided 8 : Elapsed 3.998 ms (79.959 ms / 20) 79.349 -> 78.968 ( -0.48%) [ +0.26% +0.00% +0.05% / -0.13% -0.30% -0.48%] index_select strided 16 : Elapsed 3.978 ms (79.553 ms / 20) 79.023 -> 76.606 ( -3.06%) [ +0.00% +0.29% +0.24% / +0.10% -2.78% -3.06%] index_select strided 64 : Elapsed 3.951 ms (79.023 ms / 20) 79.777 -> 79.239 ( -0.67%) [ +0.06% +0.00% +0.00% / +0.17% -0.56% -0.67%] index_select strided 100 : Elapsed 3.991 ms (79.827 ms / 20) 78.931 -> 78.631 ( -0.38%) [ +0.00% +0.12% +0.20% / +0.03% -0.38% -0.13%] index_select strided 255 : Elapsed 3.947 ms (78.931 ms / 20) 79.075 -> 79.166 ( +0.12%) [ +0.85% +0.00% +0.67% / +0.26% +0.31% +0.12%] index_select random : Elapsed 3.987 ms (79.749 ms / 20) 78.439 -> 78.612 ( +0.22%) [ +0.22% +0.12% +0.00% / +0.22% +0.57% +0.79%] index_select random_sorted : Elapsed 3.930 ms (78.609 ms / 20) 78.179 -> 78.470 ( +0.37%) [ +0.07% +0.00% +0.03% / +0.37% +0.83% +0.65%] index_select perm : Elapsed 3.912 ms (78.237 ms / 20) 78.978 -> 78.472 ( -0.64%) [ +0.74% +0.00% +0.05% / -0.34% -0.64% -0.29%] index_select perm_sorted : Elapsed 3.978 ms (79.563 ms / 20) out_shape = [256, 100, 20, 40] in_shape = [256, 50, 20, 40] idx_dim = 1 out_shape = [256, 50, 100, 40] in_shape = [256, 50, 20, 40] idx_dim = 2 B = [256, 50, 100, 40] (stride (40, 10240, 512000, 1)) A = [256, 50, 20, 40] (stride (1, 204800, 256, 5120)) dim = 2 253.419 -> 252.470 ( -0.37%) [ +0.00% +0.39% +0.38% / +0.25% -0.37% +0.08%] index_add_ linear : Elapsed 12.671 ms (253.419 ms / 20) 244.644 -> 242.963 ( -0.69%) [ +0.00% +0.22% +0.13% / +0.24% -0.69% -0.00%] index_copy_ linear : Elapsed 12.232 ms (244.644 ms / 20) 255.330 -> 252.722 ( -1.02%) [ +0.00% +0.25% +0.16% / -0.07% -1.02% -0.73%] index_add_ reverse : Elapsed 12.767 ms (255.330 ms / 20) 245.892 -> 244.355 ( -0.63%) [ +0.02% +0.00% +0.12% / -0.09% -0.63% -0.36%] index_copy_ reverse : Elapsed 12.297 ms (245.935 ms / 20) 253.581 -> 252.413 ( -0.46%) [ +0.43% +0.00% +0.00% / +0.11% -0.46% -0.15%] index_add_ spread : Elapsed 12.734 ms (254.679 ms / 20) 245.048 -> 243.120 ( -0.79%) [ +0.17% +0.00% +0.10% / -0.03% -0.79% -0.45%] index_copy_ spread : Elapsed 12.273 ms (245.462 ms / 20) 253.704 -> 252.896 ( -0.32%) [ +0.08% +0.15% +0.00% / +0.18% -0.32% -0.17%] index_add_ strided 3 : Elapsed 12.695 ms (253.909 ms / 20) 244.631 -> 243.657 ( -0.40%) [ +0.09% +0.40% +0.00% / +0.26% -0.40% +0.07%] index_copy_ strided 3 : Elapsed 12.243 ms (244.856 ms / 20) 253.639 -> 252.407 ( -0.49%) [ +0.00% +0.36% +0.15% / +0.02% +0.02% -0.49%] index_add_ strided 7 : Elapsed 12.682 ms (253.639 ms / 20) 244.981 -> 243.925 ( -0.43%) [ +0.01% +0.33% +0.00% / +0.05% -0.02% -0.43%] index_copy_ strided 7 : Elapsed 12.251 ms (245.012 ms / 20) 252.668 -> 253.248 ( +0.23%) [ +0.71% +0.39% +0.00% / +0.42% +0.23% +0.49%] index_add_ perm : Elapsed 12.723 ms (254.463 ms / 20) 244.006 -> 243.805 ( -0.08%) [ +0.47% +0.26% +0.00% / +0.29% -0.08% +0.30%] index_copy_ perm : Elapsed 12.257 ms (245.141 ms / 20) 253.906 -> 253.594 ( -0.12%) [ +0.00% +0.25% +0.10% / +0.08% -0.12% -0.10%] index_add_ perm_sorted : Elapsed 12.695 ms (253.906 ms / 20) 244.886 -> 243.952 ( -0.38%) [ +0.16% +0.15% +0.00% / +0.13% -0.25% -0.38%] index_copy_ perm_sorted : Elapsed 12.264 ms (245.281 ms / 20) 1183.470 -> 1184.043 ( +0.05%) [ +0.01% +0.05% +0.00% / +0.05% +0.49% +0.50%] index_select const : Elapsed 59.178 ms (1183.566 ms / 20) 1357.017 -> 1357.399 ( +0.03%) [ +0.04% +0.05% +0.00% / +0.03% +0.11% +0.08%] index_select wrap : Elapsed 67.881 ms (1357.623 ms / 20) 1195.102 -> 1195.188 ( +0.01%) [ +0.00% +0.10% +0.10% / +0.01% +2.47% +2.25%] index_select linear : Elapsed 59.755 ms (1195.102 ms / 20) 1213.914 -> 1214.423 ( +0.04%) [ +0.00% +0.16% +0.09% / +0.04% +3.53% +3.60%] index_select reverse : Elapsed 60.696 ms (1213.914 ms / 20) 1184.691 -> 1183.354 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% +0.45% +0.35%] index_select skip64 : Elapsed 59.235 ms (1184.691 ms / 20) 1183.991 -> 1184.373 ( +0.03%) [ +0.08% +0.00% +0.03% / +0.03% +0.48% +0.45%] index_select skip256 : Elapsed 59.244 ms (1184.884 ms / 20) 1280.366 -> 1275.043 ( -0.42%) [ +0.03% +0.00% +0.05% / +0.08% -0.36% -0.42%] index_select spread : Elapsed 64.040 ms (1280.807 ms / 20) 1362.572 -> 1362.308 ( -0.02%) [ +0.01% +0.00% +0.03% / -0.02% +0.23% +0.12%] index_select strided 3 : Elapsed 68.134 ms (1362.681 ms / 20) 1347.999 -> 1345.537 ( -0.18%) [ +0.15% +0.00% +0.08% / +0.07% -0.14% -0.18%] index_select strided 5 : Elapsed 67.503 ms (1350.068 ms / 20) 1366.959 -> 1359.904 ( -0.52%) [ +0.07% +0.06% +0.00% / +0.07% -0.46% -0.52%] index_select strided 7 : Elapsed 68.397 ms (1367.934 ms / 20) 1311.486 -> 1313.034 ( +0.12%) [ +0.15% +0.13% +0.00% / +0.17% +0.12% +0.19%] index_select strided 8 : Elapsed 65.673 ms (1313.459 ms / 20) 1309.146 -> 1307.218 ( -0.15%) [ +0.00% +0.13% +0.09% / +0.02% -0.14% -0.15%] index_select strided 16 : Elapsed 65.457 ms (1309.146 ms / 20) 1351.496 -> 1349.362 ( -0.16%) [ +0.00% +0.05% +0.02% / +0.00% -0.16% -0.09%] index_select random : Elapsed 67.575 ms (1351.496 ms / 20) 1279.641 -> 1269.056 ( -0.83%) [ +0.00% +0.03% +0.02% / -0.08% -0.83% -0.79%] index_select random_sorted : Elapsed 63.982 ms (1279.641 ms / 20) out_shape = [256, 50, 20, 100] in_shape = [256, 50, 20, 40] idx_dim = 3 B = [256, 50, 20, 100] (stride (1000, 1, 50, 256000)) A = [256, 50, 20, 40] (stride (800, 204800, 40, 1)) dim = 3 222.034 -> 221.956 ( -0.04%) [ +0.27% +0.03% +0.00% / +0.03% -0.04% +0.00%] index_add_ linear : Elapsed 11.131 ms (222.623 ms / 20) 219.528 -> 218.390 ( -0.52%) [ +0.08% +0.00% +0.02% / +0.15% -0.41% -0.52%] index_copy_ linear : Elapsed 10.985 ms (219.709 ms / 20) 222.138 -> 221.845 ( -0.13%) [ +0.00% +0.04% +0.01% / -0.00% -0.00% -0.13%] index_add_ reverse : Elapsed 11.107 ms (222.138 ms / 20) 219.512 -> 218.109 ( -0.64%) [ +0.03% +0.00% +0.01% / -0.24% -0.20% -0.64%] index_copy_ reverse : Elapsed 10.979 ms (219.579 ms / 20) 222.285 -> 222.048 ( -0.11%) [ +0.22% +0.00% +0.22% / -0.03% -0.11% +0.05%] index_add_ spread : Elapsed 11.139 ms (222.782 ms / 20) 219.491 -> 218.333 ( -0.53%) [ +0.39% +0.00% +0.38% / -0.06% -0.48% -0.53%] index_copy_ spread : Elapsed 11.018 ms (220.350 ms / 20) 222.337 -> 221.736 ( -0.27%) [ +0.00% +0.08% +0.01% / -0.14% -0.27% -0.17%] index_add_ strided 3 : Elapsed 11.117 ms (222.337 ms / 20) 218.987 -> 217.916 ( -0.49%) [ +0.16% +0.48% +0.00% / -0.03% -0.04% -0.49%] index_copy_ strided 3 : Elapsed 10.967 ms (219.335 ms / 20) 221.872 -> 221.944 ( +0.03%) [ +0.17% +0.14% +0.00% / +0.03% +0.13% +0.10%] index_add_ strided 7 : Elapsed 11.113 ms (222.255 ms / 20) 219.436 -> 218.455 ( -0.45%) [ +0.08% +0.01% +0.00% / -0.27% -0.45% -0.17%] index_copy_ strided 7 : Elapsed 10.981 ms (219.613 ms / 20) 222.081 -> 221.876 ( -0.09%) [ +0.06% +0.15% +0.00% / -0.09% -0.07% +0.26%] index_add_ perm : Elapsed 11.111 ms (222.216 ms / 20) 219.613 -> 218.740 ( -0.40%) [ +0.10% +0.19% +0.00% / -0.32% -0.40% -0.05%] index_copy_ perm : Elapsed 10.991 ms (219.829 ms / 20) 222.052 -> 222.260 ( +0.09%) [ +0.14% +0.00% +0.22% / +0.09% +0.18% +0.27%] index_add_ perm_sorted : Elapsed 11.118 ms (222.360 ms / 20) 218.892 -> 218.192 ( -0.32%) [ +0.11% +0.00% +0.28% / -0.02% -0.32% -0.05%] index_copy_ perm_sorted : Elapsed 10.957 ms (219.138 ms / 20) 591.377 -> 588.771 ( -0.44%) [ +0.06% +0.00% +0.17% / -0.01% -0.29% -0.44%] index_select const : Elapsed 29.586 ms (591.724 ms / 20) 593.271 -> 591.202 ( -0.35%) [ +0.17% +0.11% +0.00% / +0.11% -0.35% -0.06%] index_select wrap : Elapsed 29.712 ms (594.250 ms / 20) 590.779 -> 590.254 ( -0.09%) [ +0.00% +0.33% +0.08% / +0.17% -0.08% -0.09%] index_select linear : Elapsed 29.539 ms (590.779 ms / 20) 591.435 -> 590.166 ( -0.21%) [ +0.01% +0.00% +0.19% / -0.21% -0.19% -0.19%] index_select reverse : Elapsed 29.576 ms (591.519 ms / 20) 590.069 -> 589.297 ( -0.13%) [ +0.00% +0.09% +0.42% / +0.28% -0.11% -0.13%] index_select skip64 : Elapsed 29.503 ms (590.069 ms / 20) 590.154 -> 589.535 ( -0.10%) [ +0.00% +0.14% +0.20% / +0.07% -0.01% -0.10%] index_select skip256 : Elapsed 29.508 ms (590.154 ms / 20) 593.233 -> 591.911 ( -0.22%) [ +0.20% +0.00% +0.22% / +0.07% -0.22% -0.22%] index_select spread : Elapsed 29.721 ms (594.427 ms / 20) 593.068 -> 591.637 ( -0.24%) [ +0.00% +0.06% +0.19% / +0.02% +0.05% -0.24%] index_select strided 3 : Elapsed 29.653 ms (593.068 ms / 20) 593.933 -> 592.151 ( -0.30%) [ +0.12% +0.07% +0.00% / +0.02% -0.30% -0.15%] index_select strided 5 : Elapsed 29.732 ms (594.633 ms / 20) 593.128 -> 591.992 ( -0.19%) [ +0.15% +0.07% +0.00% / +0.08% -0.11% -0.19%] index_select strided 7 : Elapsed 29.700 ms (593.993 ms / 20) 593.080 -> 591.259 ( -0.31%) [ +0.00% +0.14% +0.10% / +0.15% -0.14% -0.31%] index_select strided 8 : Elapsed 29.654 ms (593.080 ms / 20) 593.344 -> 592.435 ( -0.15%) [ +0.07% +0.00% +0.12% / +0.14% -0.15% -0.05%] index_select strided 16 : Elapsed 29.687 ms (593.736 ms / 20) 593.315 -> 592.767 ( -0.09%) [ +0.10% +0.00% +0.26% / +0.11% -0.09% -0.06%] index_select random : Elapsed 29.694 ms (593.880 ms / 20) 593.503 -> 592.099 ( -0.24%) [ +0.30% +0.18% +0.00% / +0.15% -0.24% -0.08%] index_select random_sorted : Elapsed 29.763 ms (595.257 ms / 20) out_shape = [100, 50, 40, 20] in_shape = [256, 50, 40, 20] idx_dim = 0 out_shape = [256, 100, 40, 20] in_shape = [256, 50, 40, 20] idx_dim = 1 out_shape = [256, 50, 100, 20] in_shape = [256, 50, 40, 20] idx_dim = 2 out_shape = [256, 50, 40, 100] in_shape = [256, 50, 40, 20] idx_dim = 3 out_shape = [256, 40, 50, 100] in_shape = [20, 40, 50, 100] idx_dim = 0 out_shape = [20, 256, 50, 100] in_shape = [20, 40, 50, 100] idx_dim = 1 out_shape = [20, 40, 256, 100] in_shape = [20, 40, 50, 100] idx_dim = 2 B = [20, 40, 256, 100] (stride (25600, 512000, 1, 256)) A = [20, 40, 50, 100] (stride (4000, 1, 80000, 40)) dim = 2 180.637 -> 180.532 ( -0.06%) [ +0.07% +0.00% +0.00% / -0.03% -0.06% +0.10%] index_add_ linear : Elapsed 9.039 ms (180.772 ms / 20) 131.972 -> 131.971 ( -0.00%) [ +0.04% +0.00% +0.04% / -0.00% +0.06% +0.00%] index_copy_ linear : Elapsed 6.601 ms (132.027 ms / 20) 180.057 -> 180.274 ( +0.12%) [ +0.00% +0.14% +0.22% / +0.12% +0.18% +0.30%] index_add_ reverse : Elapsed 9.003 ms (180.057 ms / 20) 131.693 -> 131.697 ( +0.00%) [ +0.00% +0.06% +0.25% / +0.00% +0.02% +0.28%] index_copy_ reverse : Elapsed 6.585 ms (131.693 ms / 20) 183.720 -> 183.586 ( -0.07%) [ +0.02% +0.00% +0.08% / -0.07% -0.00% -0.02%] index_add_ spread : Elapsed 9.188 ms (183.756 ms / 20) 133.201 -> 133.034 ( -0.13%) [ +0.00% +0.04% +0.06% / -0.13% -0.01% -0.02%] index_copy_ spread : Elapsed 6.660 ms (133.201 ms / 20) 182.228 -> 182.323 ( +0.05%) [ +0.01% +0.06% +0.00% / +0.08% +0.05% +0.09%] index_add_ strided 3 : Elapsed 9.112 ms (182.238 ms / 20) 132.615 -> 132.591 ( -0.02%) [ +0.11% +0.00% +0.02% / +0.08% -0.02% +0.08%] index_copy_ strided 3 : Elapsed 6.638 ms (132.762 ms / 20) 183.605 -> 183.398 ( -0.11%) [ +0.00% +0.12% +0.13% / -0.02% +0.04% -0.11%] index_add_ strided 5 : Elapsed 9.180 ms (183.605 ms / 20) 133.203 -> 133.001 ( -0.15%) [ +0.02% +0.05% +0.00% / +0.05% +0.03% -0.15%] index_copy_ strided 5 : Elapsed 6.662 ms (133.236 ms / 20) 184.068 -> 184.406 ( +0.18%) [ +0.18% +0.00% +0.14% / +0.18% +0.27% +0.41%] index_add_ strided 7 : Elapsed 9.220 ms (184.391 ms / 20) 133.029 -> 133.015 ( -0.01%) [ +0.02% +0.00% +0.09% / -0.01% +0.48% +0.52%] index_copy_ strided 7 : Elapsed 6.653 ms (133.054 ms / 20) 179.992 -> 179.963 ( -0.02%) [ +0.00% +0.06% +0.06% / -0.02% +0.29% +0.45%] index_add_ strided 255 : Elapsed 9.000 ms (179.992 ms / 20) 131.159 -> 131.387 ( +0.17%) [ +0.21% +0.00% +0.23% / +0.17% +0.64% +0.69%] index_copy_ strided 255 : Elapsed 6.572 ms (131.437 ms / 20) 186.765 -> 186.446 ( -0.17%) [ +0.00% +0.02% +0.02% / -0.03% -0.17% -0.03%] index_add_ perm : Elapsed 9.338 ms (186.765 ms / 20) 135.164 -> 134.949 ( -0.16%) [ +0.03% +0.00% +0.05% / +0.03% -0.16% -0.07%] index_copy_ perm : Elapsed 6.760 ms (135.202 ms / 20) 182.881 -> 182.873 ( -0.00%) [ +0.19% +0.00% +0.17% / -0.00% +0.17% +0.17%] index_add_ perm_sorted : Elapsed 9.162 ms (183.237 ms / 20) 132.933 -> 132.888 ( -0.03%) [ +0.04% +0.00% +0.00% / -0.03% -0.01% +0.10%] index_copy_ perm_sorted : Elapsed 6.649 ms (132.985 ms / 20) 630.562 -> 631.467 ( +0.14%) [ +0.00% +0.24% +0.31% / +0.18% +0.14% +0.16%] index_select const : Elapsed 31.528 ms (630.562 ms / 20) 693.529 -> 694.540 ( +0.15%) [ +0.22% +0.00% +0.14% / +0.15% +0.25% +0.37%] index_select wrap : Elapsed 34.753 ms (695.063 ms / 20) 638.860 -> 639.468 ( +0.10%) [ +0.00% +0.11% +0.03% / +0.10% +0.16% +0.16%] index_select linear : Elapsed 31.943 ms (638.860 ms / 20) 645.845 -> 646.108 ( +0.04%) [ +0.04% +0.00% +0.07% / +0.04% +0.43% +0.41%] index_select reverse : Elapsed 32.306 ms (646.129 ms / 20) 631.523 -> 629.915 ( -0.25%) [ +0.06% +0.03% +0.00% / -0.02% -0.25% -0.08%] index_select skip64 : Elapsed 31.595 ms (631.893 ms / 20) 630.335 -> 630.266 ( -0.01%) [ +0.00% +0.26% +0.25% / +0.28% -0.01% +0.19%] index_select skip256 : Elapsed 31.517 ms (630.335 ms / 20) 673.283 -> 673.432 ( +0.02%) [ +0.09% +0.00% +0.06% / +0.16% +0.02% +0.06%] index_select spread : Elapsed 33.693 ms (673.869 ms / 20) 695.153 -> 695.163 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.16% +0.00% +0.02%] index_select strided 3 : Elapsed 34.759 ms (695.173 ms / 20) 693.763 -> 693.676 ( -0.01%) [ +0.08% +0.00% +0.01% / +0.02% -0.01% +0.02%] index_select strided 5 : Elapsed 34.716 ms (694.326 ms / 20) 694.192 -> 694.058 ( -0.02%) [ +0.06% +0.00% +0.09% / -0.02% +0.24% +0.20%] index_select strided 7 : Elapsed 34.730 ms (694.596 ms / 20) 695.172 -> 694.521 ( -0.09%) [ +0.10% +0.09% +0.00% / -0.04% -0.02% -0.09%] index_select strided 8 : Elapsed 34.792 ms (695.833 ms / 20) 695.991 -> 694.468 ( -0.22%) [ +0.00% +0.00% +0.07% / -0.03% -0.17% -0.22%] index_select strided 16 : Elapsed 34.800 ms (695.991 ms / 20) 692.085 -> 692.377 ( +0.04%) [ +0.10% +0.06% +0.00% / +0.04% +0.37% +0.14%] index_select random : Elapsed 34.639 ms (692.776 ms / 20) 671.308 -> 671.467 ( +0.02%) [ +0.08% +0.00% +0.03% / +0.02% +0.21% +0.11%] index_select random_sorted : Elapsed 33.593 ms (671.854 ms / 20) out_shape = [20, 40, 50, 256] in_shape = [20, 40, 50, 100] idx_dim = 3 out_shape = [256, 40, 100, 50] in_shape = [20, 40, 100, 50] idx_dim = 0 out_shape = [20, 256, 100, 50] in_shape = [20, 40, 100, 50] idx_dim = 1 out_shape = [20, 40, 256, 50] in_shape = [20, 40, 100, 50] idx_dim = 2 B = [20, 40, 256, 50] (stride (10240, 256, 1, 204800)) A = [20, 40, 100, 50] (stride (200000, 50, 2000, 1)) dim = 2 153.754 -> 153.556 ( -0.13%) [ +0.02% +0.00% +0.13% / -0.01% -0.13% -0.03%] index_add_ linear : Elapsed 7.689 ms (153.783 ms / 20) 106.625 -> 106.766 ( +0.13%) [ +0.00% +0.17% +0.37% / +0.13% +0.15% +0.14%] index_copy_ linear : Elapsed 5.331 ms (106.625 ms / 20) 152.757 -> 152.988 ( +0.15%) [ +0.00% +0.00% +0.19% / +0.15% +0.21% +0.30%] index_add_ reverse : Elapsed 7.638 ms (152.757 ms / 20) 106.294 -> 106.496 ( +0.19%) [ +0.00% +0.03% +0.14% / +0.35% +0.28% +0.19%] index_copy_ reverse : Elapsed 5.315 ms (106.294 ms / 20) 158.893 -> 159.241 ( +0.22%) [ +0.07% +0.26% +0.00% / +0.29% +0.30% +0.22%] index_add_ spread : Elapsed 7.950 ms (159.003 ms / 20) 108.521 -> 108.706 ( +0.17%) [ +0.23% +0.10% +0.00% / +0.31% +0.32% +0.17%] index_copy_ spread : Elapsed 5.439 ms (108.773 ms / 20) 160.260 -> 160.531 ( +0.17%) [ +0.06% +0.34% +0.00% / +0.17% +0.43% +0.22%] index_add_ strided 3 : Elapsed 8.018 ms (160.359 ms / 20) 109.295 -> 109.422 ( +0.12%) [ +0.14% +0.24% +0.00% / +0.12% +0.14% +0.29%] index_copy_ strided 3 : Elapsed 5.472 ms (109.449 ms / 20) 164.043 -> 164.092 ( +0.03%) [ +0.00% +0.31% +0.23% / +0.09% +0.05% +0.03%] index_add_ strided 5 : Elapsed 8.202 ms (164.043 ms / 20) 110.811 -> 110.936 ( +0.11%) [ +0.00% +0.20% +0.30% / +0.29% +0.23% +0.11%] index_copy_ strided 5 : Elapsed 5.541 ms (110.811 ms / 20) 166.764 -> 166.748 ( -0.01%) [ +0.02% +0.00% +0.15% / +0.07% -0.01% +0.01%] index_add_ strided 7 : Elapsed 8.340 ms (166.794 ms / 20) 112.281 -> 112.251 ( -0.03%) [ +0.09% +0.00% +0.11% / +0.25% -0.03% +0.02%] index_copy_ strided 7 : Elapsed 5.619 ms (112.382 ms / 20) 153.677 -> 153.679 ( +0.00%) [ +0.33% +0.00% +0.14% / +0.00% +0.01% +0.20%] index_add_ strided 255 : Elapsed 7.709 ms (154.179 ms / 20) 106.579 -> 106.604 ( +0.02%) [ +0.29% +0.06% +0.00% / +0.14% +0.02% +0.42%] index_copy_ strided 255 : Elapsed 5.345 ms (106.893 ms / 20) 170.531 -> 170.513 ( -0.01%) [ +0.01% +0.00% +0.02% / -0.01% +0.09% +0.04%] index_add_ perm : Elapsed 8.528 ms (170.553 ms / 20) 114.993 -> 115.074 ( +0.07%) [ +0.00% +0.03% +0.02% / +0.09% +0.12% +0.07%] index_copy_ perm : Elapsed 5.750 ms (114.993 ms / 20) 157.414 -> 157.537 ( +0.08%) [ +0.44% +0.20% +0.00% / +0.25% +0.08% +0.50%] index_add_ perm_sorted : Elapsed 7.906 ms (158.113 ms / 20) 108.080 -> 108.249 ( +0.16%) [ +0.12% +0.31% +0.00% / +0.34% +0.16% +0.41%] index_copy_ perm_sorted : Elapsed 5.410 ms (108.209 ms / 20) 269.649 -> 269.718 ( +0.03%) [ +0.00% +0.28% +0.27% / +0.03% +1.37% +1.27%] index_select const : Elapsed 13.482 ms (269.649 ms / 20) 271.439 -> 271.804 ( +0.13%) [ +0.00% +0.10% +0.14% / +0.13% +0.32% +0.39%] index_select wrap : Elapsed 13.572 ms (271.439 ms / 20) 270.741 -> 269.770 ( -0.36%) [ +0.00% +0.05% +0.02% / -0.16% -0.36% -0.08%] index_select linear : Elapsed 13.537 ms (270.741 ms / 20) 271.228 -> 270.408 ( -0.30%) [ +0.00% +0.15% +0.02% / -0.13% -0.30% -0.25%] index_select reverse : Elapsed 13.561 ms (271.228 ms / 20) 271.609 -> 270.787 ( -0.30%) [ +0.16% +0.00% +0.10% / +0.10% -0.30% -0.29%] index_select skip64 : Elapsed 13.602 ms (272.049 ms / 20) 269.418 -> 270.184 ( +0.28%) [ +0.39% +0.00% +0.09% / +0.28% +1.57% +1.47%] index_select skip256 : Elapsed 13.523 ms (270.459 ms / 20) 271.427 -> 271.240 ( -0.07%) [ +0.01% +0.00% +0.06% / +0.14% +0.03% -0.07%] index_select spread : Elapsed 13.573 ms (271.452 ms / 20) 271.808 -> 271.587 ( -0.08%) [ +0.15% +0.00% +0.19% / +0.13% -0.07% -0.08%] index_select strided 3 : Elapsed 13.611 ms (272.214 ms / 20) 270.732 -> 271.616 ( +0.33%) [ +0.19% +0.00% +0.31% / +0.33% +0.47% +0.54%] index_select strided 5 : Elapsed 13.562 ms (271.235 ms / 20) 271.306 -> 271.584 ( +0.10%) [ +0.00% +0.23% +0.30% / +0.36% +0.10% +0.19%] index_select strided 7 : Elapsed 13.565 ms (271.306 ms / 20) 271.508 -> 271.502 ( -0.00%) [ +0.26% +0.00% +0.11% / +0.28% +0.13% -0.00%] index_select strided 8 : Elapsed 13.611 ms (272.212 ms / 20) 271.512 -> 271.380 ( -0.05%) [ +0.23% +0.00% +0.08% / +0.18% -0.05% +0.13%] index_select strided 16 : Elapsed 13.607 ms (272.130 ms / 20) 271.423 -> 271.398 ( -0.01%) [ +0.30% +0.00% +0.22% / +0.23% +0.19% -0.01%] index_select strided 64 : Elapsed 13.612 ms (272.240 ms / 20) 271.719 -> 271.773 ( +0.02%) [ +0.00% +0.19% +0.09% / +0.08% +0.02% +0.03%] index_select random : Elapsed 13.586 ms (271.719 ms / 20) 271.207 -> 271.156 ( -0.02%) [ +0.14% +0.27% +0.00% / +0.14% -0.02% +0.15%] index_select random_sorted : Elapsed 13.580 ms (271.591 ms / 20) out_shape = [20, 40, 100, 256] in_shape = [20, 40, 100, 50] idx_dim = 3 out_shape = [256, 50, 40, 100] in_shape = [20, 50, 40, 100] idx_dim = 0 B = [256, 50, 40, 100] (stride (40, 10240, 1, 512000)) dim = 0 fill_cnt = 20 14.172 -> 13.853 ( -2.25%) [ +1.03% +0.78% +0.00% / +0.35% -2.11% -2.25%] index_fill_ const : Elapsed 0.716 ms (14.318 ms / 20) 30.670 -> 31.428 ( +2.47%) [ +0.00% +0.93% +1.48% / +2.47% +14.22% +11.32%] index_fill_ linear : Elapsed 1.534 ms (30.670 ms / 20) 32.416 -> 32.597 ( +0.56%) [ +0.00% +0.53% +0.16% / +0.56% +3.21% +4.72%] index_fill_ reverse : Elapsed 1.621 ms (32.416 ms / 20) 14.235 -> 13.848 ( -2.72%) [ +0.17% +0.10% +0.00% / +0.20% -2.46% -2.72%] index_fill_ skip64 : Elapsed 0.713 ms (14.259 ms / 20) 14.760 -> 14.336 ( -2.87%) [ +0.00% +0.35% +0.61% / +0.07% -2.87% -2.83%] index_fill_ skip256 : Elapsed 0.738 ms (14.760 ms / 20) 35.789 -> 36.563 ( +2.16%) [ +1.39% +0.62% +0.00% / +2.16% +11.25% +11.31%] index_fill_ spread : Elapsed 1.814 ms (36.288 ms / 20) 31.784 -> 32.460 ( +2.13%) [ +0.00% +2.27% +0.69% / +2.13% +12.26% +12.13%] index_fill_ strided 3 : Elapsed 1.589 ms (31.784 ms / 20) good 37.697 -> 34.972 ( -7.23%) [ +0.00% +0.11% +0.53% / -0.36% -6.93% -7.23%] index_fill_ strided 5 : Elapsed 1.885 ms (37.697 ms / 20) 37.749 -> 36.515 ( -3.27%) [ +0.44% +0.76% +0.00% / -1.62% -3.24% -3.27%] index_fill_ strided 7 : Elapsed 1.896 ms (37.915 ms / 20) 32.015 -> 30.681 ( -4.17%) [ +3.02% +0.00% +3.77% / +6.19% -4.17% -3.85%] index_fill_ strided 8 : Elapsed 1.649 ms (32.983 ms / 20) 31.293 -> 31.376 ( +0.27%) [ +0.00% +0.55% +3.56% / +3.83% +0.89% +0.27%] index_fill_ strided 16 : Elapsed 1.565 ms (31.293 ms / 20) 32.524 -> 32.628 ( +0.32%) [ +0.84% +1.59% +0.00% / +0.32% +16.82% +14.65%] index_fill_ strided 64 : Elapsed 1.640 ms (32.796 ms / 20) 34.976 -> 34.338 ( -1.82%) [ +1.94% +0.85% +0.00% / -1.82% -0.86% -1.27%] index_fill_ strided 100 : Elapsed 1.783 ms (35.655 ms / 20) 32.737 -> 31.849 ( -2.71%) [ +0.63% +0.00% +0.65% / +2.28% -2.71% -0.02%] index_fill_ strided 255 : Elapsed 1.647 ms (32.942 ms / 20) 34.383 -> 34.346 ( -0.11%) [ +3.34% +0.00% +3.19% / +4.64% +2.61% -0.11%] index_fill_ random : Elapsed 1.777 ms (35.531 ms / 20) 33.191 -> 33.582 ( +1.18%) [ +3.25% +0.00% +0.04% / +1.18% +7.58% +7.70%] index_fill_ random_sorted : Elapsed 1.713 ms (34.269 ms / 20) 33.564 -> 34.405 ( +2.51%) [ +3.90% +2.06% +0.00% / +2.51% +8.60% +9.64%] index_fill_ perm : Elapsed 1.744 ms (34.872 ms / 20) bad 33.449 -> 35.363 ( +5.72%) [ +0.00% +5.58% +5.50% / +5.72% +8.03% +8.36%] index_fill_ perm_sorted : Elapsed 1.672 ms (33.449 ms / 20) out_shape = [20, 256, 40, 100] in_shape = [20, 50, 40, 100] idx_dim = 1 B = [20, 256, 40, 100] (stride (1, 80000, 20, 800)) A = [20, 50, 40, 100] (stride (50, 1, 1000, 40000)) dim = 1 152.944 -> 152.622 ( -0.21%) [ +0.00% +0.27% +0.17% / -0.03% -0.16% -0.21%] index_add_ linear : Elapsed 7.647 ms (152.944 ms / 20) 130.552 -> 130.594 ( +0.03%) [ +0.12% +0.04% +0.00% / +0.14% +0.07% +0.03%] index_copy_ linear : Elapsed 6.536 ms (130.715 ms / 20) 152.855 -> 152.372 ( -0.32%) [ +0.03% +0.00% +0.06% / -0.15% -0.32% -0.00%] index_add_ reverse : Elapsed 7.645 ms (152.907 ms / 20) 130.583 -> 130.473 ( -0.08%) [ +0.04% +0.10% +0.00% / -0.08% +0.07% +0.07%] index_copy_ reverse : Elapsed 6.532 ms (130.631 ms / 20) 154.812 -> 155.012 ( +0.13%) [ +0.14% +0.00% +0.10% / +0.13% +0.55% +0.58%] index_add_ spread : Elapsed 7.751 ms (155.028 ms / 20) 130.635 -> 130.651 ( +0.01%) [ +0.00% +0.01% +0.00% / +0.01% +0.01% +0.17%] index_copy_ spread : Elapsed 6.532 ms (130.637 ms / 20) 153.902 -> 151.869 ( -1.32%) [ +0.14% +0.06% +0.00% / -0.09% -1.13% -1.32%] index_add_ strided 3 : Elapsed 7.705 ms (154.110 ms / 20) 130.720 -> 130.510 ( -0.16%) [ +0.14% +0.06% +0.00% / +0.07% -0.16% -0.02%] index_copy_ strided 3 : Elapsed 6.545 ms (130.901 ms / 20) 155.528 -> 152.531 ( -1.93%) [ +0.00% +0.18% +0.10% / -0.20% -1.75% -1.93%] index_add_ strided 5 : Elapsed 7.776 ms (155.528 ms / 20) 130.515 -> 130.638 ( +0.09%) [ +0.00% +0.12% +0.28% / +0.09% +0.17% +0.27%] index_copy_ strided 5 : Elapsed 6.526 ms (130.515 ms / 20) 154.630 -> 151.005 ( -2.34%) [ +0.22% +0.00% +0.12% / -0.15% -2.18% -2.34%] index_add_ strided 7 : Elapsed 7.749 ms (154.972 ms / 20) 130.694 -> 130.576 ( -0.09%) [ +0.13% +0.04% +0.00% / -0.09% +0.02% +0.01%] index_copy_ strided 7 : Elapsed 6.543 ms (130.862 ms / 20) 152.173 -> 152.464 ( +0.19%) [ +0.52% +0.31% +0.00% / +0.19% +0.34% +0.52%] index_add_ strided 255 : Elapsed 7.648 ms (152.965 ms / 20) 130.621 -> 130.673 ( +0.04%) [ +0.11% +0.09% +0.00% / +0.04% +0.07% +0.11%] index_copy_ strided 255 : Elapsed 6.538 ms (130.764 ms / 20) 151.581 -> 151.568 ( -0.01%) [ +0.00% +0.09% +0.01% / -0.01% +2.23% +1.85%] index_add_ perm : Elapsed 7.579 ms (151.581 ms / 20) 130.363 -> 130.722 ( +0.28%) [ +0.15% +0.11% +0.00% / +0.28% +0.32% +0.38%] index_copy_ perm : Elapsed 6.528 ms (130.557 ms / 20) 152.110 -> 151.644 ( -0.31%) [ +0.10% +0.20% +0.00% / -0.31% +2.29% +2.14%] index_add_ perm_sorted : Elapsed 7.613 ms (152.262 ms / 20) 130.311 -> 130.312 ( +0.00%) [ +0.00% +0.20% +0.07% / +0.00% +0.41% +0.38%] index_copy_ perm_sorted : Elapsed 6.516 ms (130.311 ms / 20) 740.295 -> 740.983 ( +0.09%) [ +0.24% +0.00% +0.09% / +0.32% +0.14% +0.09%] index_select const : Elapsed 37.105 ms (742.090 ms / 20) 742.435 -> 745.124 ( +0.36%) [ +0.77% +0.34% +0.00% / +0.44% +0.36% +0.43%] index_select wrap : Elapsed 37.407 ms (748.148 ms / 20) 740.722 -> 741.476 ( +0.10%) [ +0.05% +0.00% +0.66% / +0.24% +0.10% +0.22%] index_select linear : Elapsed 37.054 ms (741.081 ms / 20) 743.074 -> 743.661 ( +0.08%) [ +0.02% +0.02% +0.00% / +0.11% +0.22% +0.08%] index_select reverse : Elapsed 37.160 ms (743.191 ms / 20) 741.334 -> 740.240 ( -0.15%) [ +0.16% +0.00% +0.10% / +0.10% -0.02% -0.15%] index_select skip64 : Elapsed 37.125 ms (742.504 ms / 20) 739.834 -> 741.469 ( +0.22%) [ +0.27% +0.30% +0.00% / +0.37% +0.22% +0.23%] index_select skip256 : Elapsed 37.093 ms (741.864 ms / 20) 742.239 -> 740.191 ( -0.28%) [ +0.00% +0.37% +0.54% / +0.36% -0.28% +0.03%] index_select spread : Elapsed 37.112 ms (742.239 ms / 20) 749.797 -> 749.501 ( -0.04%) [ +0.00% +0.16% +0.32% / +0.22% -0.04% +0.01%] index_select strided 3 : Elapsed 37.490 ms (749.797 ms / 20) 750.772 -> 750.048 ( -0.10%) [ +0.04% +0.00% +0.24% / -0.10% +0.05% +0.21%] index_select strided 5 : Elapsed 37.553 ms (751.065 ms / 20) 750.043 -> 751.977 ( +0.26%) [ +0.09% +0.00% +0.13% / +0.31% +0.33% +0.26%] index_select strided 7 : Elapsed 37.537 ms (750.747 ms / 20) 748.539 -> 751.127 ( +0.35%) [ +0.03% +0.24% +0.00% / +0.55% +0.36% +0.35%] index_select strided 8 : Elapsed 37.439 ms (748.772 ms / 20) 750.921 -> 750.544 ( -0.05%) [ +0.00% +0.13% +0.22% / +0.14% -0.05% -0.00%] index_select strided 16 : Elapsed 37.546 ms (750.921 ms / 20) 748.141 -> 749.482 ( +0.18%) [ +0.00% +0.33% +0.48% / +0.18% +0.29% +0.33%] index_select random : Elapsed 37.407 ms (748.141 ms / 20) 742.844 -> 739.293 ( -0.48%) [ +0.27% +0.00% +0.15% / +0.13% -0.48% +0.04%] index_select random_sorted : Elapsed 37.241 ms (744.815 ms / 20) out_shape = [20, 50, 256, 100] in_shape = [20, 50, 40, 100] idx_dim = 2 out_shape = [20, 50, 40, 256] in_shape = [20, 50, 40, 100] idx_dim = 3 out_shape = [256, 50, 100, 40] in_shape = [20, 50, 100, 40] idx_dim = 0 B = [256, 50, 100, 40] (stride (4000, 1024000, 1, 100)) A = [20, 50, 100, 40] (stride (2000, 1, 40000, 50)) dim = 0 98.689 -> 96.843 ( -1.87%) [ +0.00% +0.71% +0.32% / +0.26% -1.87% -1.65%] index_add_ linear : Elapsed 4.934 ms (98.689 ms / 20) 86.804 -> 85.826 ( -1.13%) [ +0.18% +0.00% +0.25% / +0.15% -1.13% -0.83%] index_copy_ linear : Elapsed 4.348 ms (86.960 ms / 20) 97.364 -> 93.890 ( -3.57%) [ +0.28% +0.00% +0.05% / -0.15% -3.57% -3.30%] index_add_ reverse : Elapsed 4.882 ms (97.641 ms / 20) 86.708 -> 84.890 ( -2.10%) [ +0.00% +0.04% +0.03% / +0.07% -2.03% -2.10%] index_copy_ reverse : Elapsed 4.335 ms (86.708 ms / 20) 99.524 -> 98.555 ( -0.97%) [ +0.41% +0.00% +0.37% / -0.27% -0.91% -0.97%] index_add_ spread : Elapsed 4.997 ms (99.931 ms / 20) 87.930 -> 86.612 ( -1.50%) [ +0.39% +0.00% +0.48% / -0.16% -1.50% -0.97%] index_copy_ spread : Elapsed 4.414 ms (88.274 ms / 20) 100.418 -> 95.599 ( -4.80%) [ +0.62% +0.45% +0.00% / +0.06% -4.80% -4.43%] index_add_ strided 3 : Elapsed 5.052 ms (101.038 ms / 20) 88.546 -> 84.990 ( -4.02%) [ +0.29% +0.38% +0.00% / -0.04% -4.02% -3.66%] index_copy_ strided 3 : Elapsed 4.440 ms (88.804 ms / 20) 101.907 -> 99.087 ( -2.77%) [ +0.00% +0.40% +0.40% / +0.05% -2.66% -2.77%] index_add_ strided 5 : Elapsed 5.095 ms (101.907 ms / 20) 88.945 -> 87.690 ( -1.41%) [ +0.00% +0.18% +0.07% / -0.27% -1.41% -1.36%] index_copy_ strided 5 : Elapsed 4.447 ms (88.945 ms / 20) 97.213 -> 96.753 ( -0.47%) [ +0.24% +0.08% +0.00% / -0.47% +1.33% +1.10%] index_add_ strided 7 : Elapsed 4.873 ms (97.451 ms / 20) 88.856 -> 86.329 ( -2.84%) [ +0.00% +0.14% +0.21% / +0.04% -2.84% -2.54%] index_copy_ strided 7 : Elapsed 4.443 ms (88.856 ms / 20) 95.105 -> 91.963 ( -3.30%) [ +0.00% +0.21% +0.09% / +0.02% -3.30% -3.19%] index_add_ strided 255 : Elapsed 4.755 ms (95.105 ms / 20) 86.007 -> 84.730 ( -1.48%) [ +0.23% +0.29% +0.00% / +0.46% -1.15% -1.48%] index_copy_ strided 255 : Elapsed 4.310 ms (86.209 ms / 20) Good 105.134 -> 93.979 (-10.61%) [ +0.00% +0.20% +0.06% / -0.04% -10.48% -10.61%] index_add_ perm : Elapsed 5.257 ms (105.134 ms / 20) 88.743 -> 87.654 ( -1.23%) [ +0.00% +0.22% +0.27% / +0.35% -1.23% -0.88%] index_copy_ perm : Elapsed 4.437 ms (88.743 ms / 20) good 103.984 -> 96.448 ( -7.25%) [ +1.02% +0.70% +0.00% / -0.38% -7.25% -7.16%] index_add_ perm_sorted : Elapsed 5.252 ms (105.040 ms / 20) 89.397 -> 87.598 ( -2.01%) [ +0.03% +0.43% +0.00% / -0.20% -1.95% -2.01%] index_copy_ perm_sorted : Elapsed 4.471 ms (89.421 ms / 20) 1586.981 -> 1559.254 ( -1.75%) [ +0.04% +0.00% +0.02% / +0.04% -1.75% -1.60%] index_select const : Elapsed 79.377 ms (1587.547 ms / 20) 1593.466 -> 1597.651 ( +0.26%) [ +0.23% +0.00% +0.13% / +0.26% +0.71% +0.72%] index_select wrap : Elapsed 79.858 ms (1597.158 ms / 20) 1544.861 -> 1545.406 ( +0.04%) [ +0.00% +0.23% +0.23% / +0.47% +0.04% +0.04%] index_select linear : Elapsed 77.243 ms (1544.861 ms / 20) 1549.909 -> 1551.872 ( +0.13%) [ +0.00% +0.12% +0.31% / +0.13% +0.50% +0.48%] index_select reverse : Elapsed 77.495 ms (1549.909 ms / 20) 1586.796 -> 1563.166 ( -1.49%) [ +0.08% +0.00% +0.04% / +0.04% -1.49% -1.05%] index_select skip64 : Elapsed 79.401 ms (1588.014 ms / 20) 1585.560 -> 1556.767 ( -1.82%) [ +0.03% +0.00% +0.11% / +0.10% -1.67% -1.82%] index_select skip256 : Elapsed 79.300 ms (1586.001 ms / 20) 1576.349 -> 1564.221 ( -0.77%) [ +0.08% +0.00% +0.05% / +0.00% -0.71% -0.77%] index_select spread : Elapsed 78.879 ms (1577.581 ms / 20) 1599.037 -> 1601.856 ( +0.18%) [ +0.00% +0.14% +0.07% / +0.18% +0.55% +0.42%] index_select strided 3 : Elapsed 79.952 ms (1599.037 ms / 20) 1602.608 -> 1604.624 ( +0.13%) [ +0.07% +0.32% +0.00% / +0.13% +0.56% +0.60%] index_select strided 5 : Elapsed 80.189 ms (1603.776 ms / 20) 1594.189 -> 1595.723 ( +0.10%) [ +0.00% +0.01% +0.06% / +0.10% +0.81% +0.71%] index_select strided 7 : Elapsed 79.709 ms (1594.189 ms / 20) 1597.827 -> 1596.573 ( -0.08%) [ +0.00% +0.25% +0.03% / -0.08% -0.07% -0.06%] index_select strided 8 : Elapsed 79.891 ms (1597.827 ms / 20) 1596.043 -> 1595.095 ( -0.06%) [ +0.16% +0.00% +0.16% / +0.45% -0.06% +0.11%] index_select strided 16 : Elapsed 79.926 ms (1598.523 ms / 20) 1592.947 -> 1592.081 ( -0.05%) [ +0.08% +0.00% +0.03% / -0.05% +0.58% +0.43%] index_select random : Elapsed 79.709 ms (1594.179 ms / 20) 1575.945 -> 1563.020 ( -0.82%) [ +0.15% +0.05% +0.00% / -0.04% -0.82% -0.64%] index_select random_sorted : Elapsed 78.912 ms (1578.231 ms / 20) out_shape = [20, 256, 100, 40] in_shape = [20, 50, 100, 40] idx_dim = 1 out_shape = [20, 50, 256, 40] in_shape = [20, 50, 100, 40] idx_dim = 2 out_shape = [20, 50, 100, 256] in_shape = [20, 50, 100, 40] idx_dim = 3 out_shape = [256, 100, 40, 50] in_shape = [20, 100, 40, 50] idx_dim = 0 out_shape = [20, 256, 40, 50] in_shape = [20, 100, 40, 50] idx_dim = 1 out_shape = [20, 100, 256, 50] in_shape = [20, 100, 40, 50] idx_dim = 2 out_shape = [20, 100, 40, 256] in_shape = [20, 100, 40, 50] idx_dim = 3 out_shape = [256, 100, 50, 40] in_shape = [20, 100, 50, 40] idx_dim = 0 out_shape = [20, 256, 50, 40] in_shape = [20, 100, 50, 40] idx_dim = 1 out_shape = [20, 100, 256, 40] in_shape = [20, 100, 50, 40] idx_dim = 2 out_shape = [20, 100, 50, 256] in_shape = [20, 100, 50, 40] idx_dim = 3 out_shape = [256, 20, 50, 100] in_shape = [40, 20, 50, 100] idx_dim = 0 out_shape = [40, 256, 50, 100] in_shape = [40, 20, 50, 100] idx_dim = 1 B = [40, 256, 50, 100] (stride (1, 4000, 1024000, 40)) A = [40, 20, 50, 100] (stride (100000, 5000, 1, 50)) dim = 1 83.320 -> 83.493 ( +0.21%) [ +0.17% +0.25% +0.00% / +0.21% +2.60% +2.64%] index_add_ linear : Elapsed 4.173 ms (83.459 ms / 20) 82.998 -> 83.646 ( +0.78%) [ +0.20% +0.20% +0.00% / +0.78% +2.61% +2.24%] index_copy_ linear : Elapsed 4.158 ms (83.166 ms / 20) 83.798 -> 83.507 ( -0.35%) [ +0.00% +0.13% +0.17% / -0.20% -0.35% -0.14%] index_add_ reverse : Elapsed 4.190 ms (83.798 ms / 20) 84.464 -> 84.207 ( -0.30%) [ +0.00% +0.33% +0.07% / -0.30% -0.19% -0.30%] index_copy_ reverse : Elapsed 4.223 ms (84.464 ms / 20) 85.301 -> 85.157 ( -0.17%) [ +0.00% +0.08% +0.06% / -0.17% +2.36% +2.36%] index_add_ spread : Elapsed 4.265 ms (85.301 ms / 20) 85.660 -> 85.841 ( +0.21%) [ +0.44% +0.04% +0.00% / +0.21% +1.45% +1.38%] index_copy_ spread : Elapsed 4.302 ms (86.040 ms / 20) 84.656 -> 84.516 ( -0.17%) [ +0.11% +0.08% +0.00% / -0.17% +1.42% +1.65%] index_add_ strided 3 : Elapsed 4.238 ms (84.750 ms / 20) 84.517 -> 84.711 ( +0.23%) [ +0.00% +0.58% +0.19% / +0.23% +0.90% +0.67%] index_copy_ strided 3 : Elapsed 4.226 ms (84.517 ms / 20) 86.729 -> 86.976 ( +0.28%) [ +0.42% +0.00% +0.30% / +0.28% +1.53% +1.34%] index_add_ strided 5 : Elapsed 4.355 ms (87.097 ms / 20) 85.709 -> 86.125 ( +0.49%) [ +0.82% +0.00% +0.51% / +0.49% +2.37% +2.01%] index_copy_ strided 5 : Elapsed 4.321 ms (86.414 ms / 20) 84.664 -> 84.795 ( +0.15%) [ +0.22% +0.41% +0.00% / +0.15% +1.51% +1.61%] index_add_ strided 7 : Elapsed 4.242 ms (84.849 ms / 20) 84.880 -> 85.594 ( +0.84%) [ +0.00% +0.36% +0.27% / +0.84% +0.93% +1.11%] index_copy_ strided 7 : Elapsed 4.244 ms (84.880 ms / 20) 83.326 -> 83.427 ( +0.12%) [ +0.12% +0.00% +0.21% / +0.14% +0.12% +0.18%] index_add_ strided 255 : Elapsed 4.171 ms (83.428 ms / 20) 82.966 -> 83.038 ( +0.09%) [ +0.09% +0.38% +0.00% / +0.09% +1.03% +1.38%] index_copy_ strided 255 : Elapsed 4.152 ms (83.041 ms / 20) good 89.881 -> 85.143 ( -5.27%) [ +0.15% +0.00% +0.06% / +0.16% -5.09% -5.27%] index_add_ perm : Elapsed 4.501 ms (90.014 ms / 20) 87.331 -> 86.001 ( -1.52%) [ +0.01% +0.02% +0.00% / +0.00% -1.52% -1.27%] index_copy_ perm : Elapsed 4.367 ms (87.336 ms / 20) 86.668 -> 86.713 ( +0.05%) [ +0.00% +0.43% +0.45% / +0.21% +0.05% +0.32%] index_add_ perm_sorted : Elapsed 4.333 ms (86.668 ms / 20) 85.065 -> 84.938 ( -0.15%) [ +0.05% +0.07% +0.00% / -0.15% +2.10% +1.74%] index_copy_ perm_sorted : Elapsed 4.256 ms (85.110 ms / 20) 1406.045 -> 1387.235 ( -1.34%) [ +0.05% +0.08% +0.00% / +0.11% -1.30% -1.34%] index_select const : Elapsed 70.337 ms (1406.745 ms / 20) 1432.151 -> 1432.359 ( +0.01%) [ +0.08% +0.05% +0.00% / +0.01% +0.73% +0.60%] index_select wrap : Elapsed 71.664 ms (1433.273 ms / 20) 1397.291 -> 1394.419 ( -0.21%) [ +0.02% +0.00% +0.01% / -0.02% -0.21% -0.20%] index_select linear : Elapsed 69.879 ms (1397.586 ms / 20) 1400.707 -> 1401.299 ( +0.04%) [ +0.32% +0.05% +0.00% / +0.29% +0.11% +0.04%] index_select reverse : Elapsed 70.261 ms (1405.214 ms / 20) 1406.673 -> 1387.685 ( -1.35%) [ +0.03% +0.03% +0.00% / +0.06% -1.11% -1.35%] index_select skip64 : Elapsed 70.356 ms (1407.125 ms / 20) 1406.193 -> 1386.182 ( -1.42%) [ +0.06% +0.00% +0.09% / +0.12% -1.42% -1.31%] index_select skip256 : Elapsed 70.349 ms (1406.983 ms / 20) 1407.480 -> 1405.413 ( -0.15%) [ +0.00% +0.12% +0.12% / +0.03% -0.00% -0.15%] index_select spread : Elapsed 70.374 ms (1407.480 ms / 20) 1439.894 -> 1437.770 ( -0.15%) [ +0.10% +0.02% +0.00% / -0.15% +0.14% +0.27%] index_select strided 3 : Elapsed 72.070 ms (1441.405 ms / 20) 1432.232 -> 1432.222 ( -0.00%) [ +0.00% +0.01% +0.00% / -0.00% +0.36% +0.53%] index_select strided 5 : Elapsed 71.612 ms (1432.241 ms / 20) 1435.414 -> 1435.512 ( +0.01%) [ +0.09% +0.10% +0.00% / +0.01% +0.52% +0.47%] index_select strided 7 : Elapsed 71.839 ms (1436.772 ms / 20) 1453.437 -> 1435.970 ( -1.20%) [ +0.05% +0.00% +0.11% / -0.06% -1.20% -1.11%] index_select strided 8 : Elapsed 72.709 ms (1454.177 ms / 20) 1452.745 -> 1437.599 ( -1.04%) [ +0.00% +0.06% +0.06% / -0.06% -0.98% -1.04%] index_select strided 16 : Elapsed 72.637 ms (1452.745 ms / 20) 1435.487 -> 1437.661 ( +0.15%) [ +0.10% +0.00% +0.11% / +0.15% +0.56% +0.36%] index_select random : Elapsed 71.848 ms (1436.968 ms / 20) 1407.119 -> 1406.024 ( -0.08%) [ +0.07% +0.09% +0.00% / +0.14% -0.08% -0.07%] index_select random_sorted : Elapsed 70.403 ms (1408.061 ms / 20) out_shape = [40, 20, 256, 100] in_shape = [40, 20, 50, 100] idx_dim = 2 B = [40, 20, 256, 100] (stride (512000, 100, 2000, 1)) A = [40, 20, 50, 100] (stride (100000, 1, 2000, 20)) dim = 2 54.765 -> 54.733 ( -0.06%) [ +0.24% +0.13% +0.00% / -0.06% +0.09% -0.04%] index_add_ linear : Elapsed 2.745 ms (54.894 ms / 20) 52.814 -> 52.770 ( -0.08%) [ +0.29% +0.15% +0.00% / -0.08% +0.16% +0.13%] index_copy_ linear : Elapsed 2.648 ms (52.967 ms / 20) 54.572 -> 54.588 ( +0.03%) [ +0.00% +0.09% +0.05% / +0.03% +0.27% +0.40%] index_add_ reverse : Elapsed 2.729 ms (54.572 ms / 20) 52.712 -> 52.836 ( +0.24%) [ +0.16% +0.00% +0.13% / +0.24% +0.27% +0.33%] index_copy_ reverse : Elapsed 2.640 ms (52.798 ms / 20) 54.642 -> 54.630 ( -0.02%) [ +0.15% +0.20% +0.00% / -0.02% +0.81% +1.13%] index_add_ spread : Elapsed 2.736 ms (54.723 ms / 20) 52.663 -> 52.644 ( -0.04%) [ +0.00% +0.24% +0.05% / -0.04% +0.77% +0.96%] index_copy_ spread : Elapsed 2.633 ms (52.663 ms / 20) 54.664 -> 54.724 ( +0.11%) [ +0.22% +0.15% +0.00% / +0.11% +0.34% +0.46%] index_add_ strided 3 : Elapsed 2.739 ms (54.782 ms / 20) 52.771 -> 52.721 ( -0.09%) [ +0.26% +0.00% +0.05% / -0.09% +0.24% +0.43%] index_copy_ strided 3 : Elapsed 2.645 ms (52.909 ms / 20) 54.600 -> 54.702 ( +0.19%) [ +0.27% +0.00% +0.44% / +0.19% +0.88% +1.05%] index_add_ strided 5 : Elapsed 2.737 ms (54.749 ms / 20) 52.819 -> 52.775 ( -0.08%) [ +0.15% +0.00% +0.10% / -0.08% +0.62% +0.48%] index_copy_ strided 5 : Elapsed 2.645 ms (52.900 ms / 20) 54.784 -> 54.805 ( +0.04%) [ +0.18% +0.00% +0.16% / +0.19% +0.04% +0.06%] index_add_ strided 7 : Elapsed 2.744 ms (54.885 ms / 20) 52.795 -> 52.864 ( +0.13%) [ +0.00% +0.00% +0.04% / +0.13% +0.37% +0.41%] index_copy_ strided 7 : Elapsed 2.640 ms (52.796 ms / 20) 54.597 -> 54.578 ( -0.03%) [ +0.08% +0.00% +0.06% / -0.03% +0.57% +0.86%] index_add_ strided 255 : Elapsed 2.732 ms (54.642 ms / 20) 52.707 -> 52.782 ( +0.14%) [ +0.13% +0.00% +0.15% / +0.14% +0.60% +0.54%] index_copy_ strided 255 : Elapsed 2.639 ms (52.775 ms / 20) 54.988 -> 54.568 ( -0.76%) [ +0.05% +0.00% +0.06% / -0.05% -0.76% -0.47%] index_add_ perm : Elapsed 2.751 ms (55.017 ms / 20) 52.888 -> 52.785 ( -0.19%) [ +0.13% +0.00% +0.14% / +0.17% -0.19% +0.05%] index_copy_ perm : Elapsed 2.648 ms (52.959 ms / 20) 55.083 -> 54.715 ( -0.67%) [ +0.50% +0.00% +0.11% / +0.08% -0.66% -0.67%] index_add_ perm_sorted : Elapsed 2.768 ms (55.357 ms / 20) 52.907 -> 52.885 ( -0.04%) [ +0.31% +0.19% +0.00% / +0.22% +0.15% -0.04%] index_copy_ perm_sorted : Elapsed 2.654 ms (53.070 ms / 20) 202.125 -> 198.627 ( -1.73%) [ +0.01% +0.01% +0.00% / -0.01% -1.71% -1.73%] index_select const : Elapsed 10.107 ms (202.138 ms / 20) 289.326 -> 289.335 ( +0.00%) [ +0.07% +0.11% +0.00% / +0.00% +0.07% +0.17%] index_select wrap : Elapsed 14.476 ms (289.523 ms / 20) 209.836 -> 210.208 ( +0.18%) [ +0.13% +0.00% +0.26% / +0.18% +0.71% +0.76%] index_select linear : Elapsed 10.505 ms (210.109 ms / 20) 224.470 -> 218.911 ( -2.48%) [ +0.00% +0.09% +0.03% / -0.04% -2.47% -2.48%] index_select reverse : Elapsed 11.224 ms (224.470 ms / 20) 202.111 -> 198.642 ( -1.72%) [ +0.02% +0.03% +0.00% / +0.03% -1.66% -1.72%] index_select skip64 : Elapsed 10.108 ms (202.152 ms / 20) 202.119 -> 198.583 ( -1.75%) [ +0.02% +0.00% +0.04% / -0.00% -1.74% -1.75%] index_select skip256 : Elapsed 10.108 ms (202.166 ms / 20) 270.051 -> 269.871 ( -0.07%) [ +0.22% +0.21% +0.00% / +0.09% -0.07% -0.05%] index_select spread : Elapsed 13.532 ms (270.633 ms / 20) 289.378 -> 289.127 ( -0.09%) [ +0.05% +0.00% +0.01% / +0.01% -0.09% +0.04%] index_select strided 3 : Elapsed 14.476 ms (289.522 ms / 20) 285.066 -> 285.225 ( +0.06%) [ +0.18% +0.11% +0.00% / +0.06% +0.19% +0.21%] index_select strided 5 : Elapsed 14.278 ms (285.569 ms / 20) 289.075 -> 288.511 ( -0.20%) [ +0.05% +0.07% +0.00% / +0.06% -0.20% -0.14%] index_select strided 7 : Elapsed 14.461 ms (289.223 ms / 20) 287.557 -> 286.757 ( -0.28%) [ +0.08% +0.00% +0.06% / +0.08% -0.28% -0.24%] index_select strided 8 : Elapsed 14.389 ms (287.788 ms / 20) 288.255 -> 286.499 ( -0.61%) [ +0.00% +0.07% +0.02% / +0.05% -0.61% -0.58%] index_select strided 16 : Elapsed 14.413 ms (288.255 ms / 20) 287.438 -> 287.437 ( -0.00%) [ +0.00% +0.01% +0.03% / -0.00% +0.28% +0.26%] index_select random : Elapsed 14.372 ms (287.438 ms / 20) 267.737 -> 267.111 ( -0.23%) [ +0.05% +0.21% +0.00% / +0.11% -0.23% -0.06%] index_select random_sorted : Elapsed 13.393 ms (267.869 ms / 20) out_shape = [40, 20, 50, 256] in_shape = [40, 20, 50, 100] idx_dim = 3 out_shape = [256, 20, 100, 50] in_shape = [40, 20, 100, 50] idx_dim = 0 B = [256, 20, 100, 50] (stride (20, 1, 5120, 512000)) A = [40, 20, 100, 50] (stride (2000, 1, 20, 80000)) dim = 0 155.083 -> 151.208 ( -2.50%) [ +0.50% +0.07% +0.00% / -0.13% -2.02% -2.50%] index_add_ linear : Elapsed 7.793 ms (155.865 ms / 20) 107.984 -> 106.736 ( -1.16%) [ +0.46% +0.00% +0.23% / +0.28% -1.16% -0.88%] index_copy_ linear : Elapsed 5.424 ms (108.482 ms / 20) 152.058 -> 149.014 ( -2.00%) [ +0.00% +0.48% +0.12% / -0.75% -2.00% -1.51%] index_add_ reverse : Elapsed 7.603 ms (152.058 ms / 20) 108.250 -> 108.051 ( -0.18%) [ +0.34% +0.00% +0.04% / -0.18% +0.27% +0.15%] index_copy_ reverse : Elapsed 5.431 ms (108.623 ms / 20) 154.945 -> 155.418 ( +0.31%) [ +0.00% +1.00% +0.32% / +0.87% +0.31% +0.85%] index_add_ spread : Elapsed 7.747 ms (154.945 ms / 20) 118.276 -> 118.293 ( +0.01%) [ +0.16% +0.00% +0.63% / +0.01% +0.82% +1.03%] index_copy_ spread : Elapsed 5.923 ms (118.469 ms / 20) 157.961 -> 155.966 ( -1.26%) [ +0.61% +0.00% +0.31% / -0.23% -1.23% -1.26%] index_add_ strided 3 : Elapsed 7.947 ms (158.930 ms / 20) 116.488 -> 114.838 ( -1.42%) [ +0.49% +0.00% +0.14% / +0.61% -1.42% -0.60%] index_copy_ strided 3 : Elapsed 5.853 ms (117.058 ms / 20) 155.572 -> 155.344 ( -0.15%) [ +0.54% +0.59% +0.00% / +0.53% +0.29% -0.15%] index_add_ strided 5 : Elapsed 7.821 ms (156.412 ms / 20) 114.541 -> 114.761 ( +0.19%) [ +0.58% +0.44% +0.00% / +0.79% +0.19% +0.27%] index_copy_ strided 5 : Elapsed 5.760 ms (115.202 ms / 20) 155.218 -> 154.888 ( -0.21%) [ +0.15% +0.00% +0.15% / -0.21% +0.46% +0.58%] index_add_ strided 7 : Elapsed 7.773 ms (155.451 ms / 20) 115.417 -> 115.430 ( +0.01%) [ +0.34% +0.88% +0.00% / +0.01% +0.91% +1.17%] index_copy_ strided 7 : Elapsed 5.791 ms (115.814 ms / 20) 155.064 -> 151.932 ( -2.02%) [ +0.00% +0.75% +0.57% / -0.13% -2.02% -1.92%] index_add_ strided 255 : Elapsed 7.753 ms (155.064 ms / 20) 108.175 -> 104.082 ( -3.78%) [ +0.07% +0.59% +0.00% / +0.09% -3.43% -3.78%] index_copy_ strided 255 : Elapsed 5.412 ms (108.248 ms / 20) 156.277 -> 154.412 ( -1.19%) [ +0.00% +1.24% +0.95% / +0.53% -1.19% -1.10%] index_add_ perm : Elapsed 7.814 ms (156.277 ms / 20) 118.329 -> 115.911 ( -2.04%) [ +0.20% +0.00% +0.40% / +0.47% -1.99% -2.04%] index_copy_ perm : Elapsed 5.928 ms (118.567 ms / 20) 154.695 -> 153.631 ( -0.69%) [ +0.00% +0.01% +0.30% / -0.37% -0.30% -0.69%] index_add_ perm_sorted : Elapsed 7.735 ms (154.695 ms / 20) 115.503 -> 113.804 ( -1.47%) [ +0.00% +0.36% +0.11% / +0.19% -1.47% -1.38%] index_copy_ perm_sorted : Elapsed 5.775 ms (115.503 ms / 20) good 809.552 -> 761.141 ( -5.98%) [ +0.14% +0.00% +0.01% / +0.04% -5.98% -5.96%] index_select const : Elapsed 40.535 ms (810.698 ms / 20) 923.354 -> 926.586 ( +0.35%) [ +0.20% +0.42% +0.00% / +0.35% +1.44% +1.23%] index_select wrap : Elapsed 46.260 ms (925.199 ms / 20) 774.631 -> 772.258 ( -0.31%) [ +0.02% +0.03% +0.00% / -0.31% +2.59% +2.51%] index_select linear : Elapsed 38.739 ms (774.773 ms / 20) 795.474 -> 797.617 ( +0.27%) [ +0.00% +0.07% +0.11% / +0.27% +4.05% +3.85%] index_select reverse : Elapsed 39.774 ms (795.474 ms / 20) good 809.808 -> 761.347 ( -5.98%) [ +0.04% +0.14% +0.00% / +0.04% -5.83% -5.98%] index_select skip64 : Elapsed 40.505 ms (810.104 ms / 20) good 808.240 -> 760.710 ( -5.88%) [ +0.07% +0.00% +0.07% / -0.09% -5.88% -5.74%] index_select skip256 : Elapsed 40.438 ms (808.766 ms / 20) 846.979 -> 847.298 ( +0.04%) [ +0.31% +0.27% +0.00% / +0.04% +0.75% +0.87%] index_select spread : Elapsed 42.482 ms (849.643 ms / 20) 948.182 -> 947.048 ( -0.12%) [ +0.03% +0.12% +0.00% / -0.12% +0.00% +0.15%] index_select strided 3 : Elapsed 47.421 ms (948.426 ms / 20) 942.968 -> 943.906 ( +0.10%) [ +0.00% +0.05% +0.01% / +0.10% +0.18% +0.32%] index_select strided 5 : Elapsed 47.148 ms (942.968 ms / 20) 949.912 -> 948.014 ( -0.20%) [ +0.15% +0.04% +0.00% / -0.20% +0.87% +0.63%] index_select strided 7 : Elapsed 47.568 ms (951.354 ms / 20) 950.692 -> 951.552 ( +0.09%) [ +0.13% +0.00% +0.22% / +0.09% +1.52% +1.45%] index_select strided 8 : Elapsed 47.596 ms (951.921 ms / 20) 954.525 -> 954.843 ( +0.03%) [ +0.22% +0.04% +0.00% / +0.03% +0.88% +0.92%] index_select strided 16 : Elapsed 47.829 ms (956.579 ms / 20) 934.487 -> 934.361 ( -0.01%) [ +0.01% +0.05% +0.00% / -0.01% +1.85% +1.96%] index_select random : Elapsed 46.728 ms (934.562 ms / 20) 837.738 -> 836.628 ( -0.13%) [ +0.02% +0.17% +0.00% / -0.13% +0.11% +0.22%] index_select random_sorted : Elapsed 41.896 ms (837.916 ms / 20) out_shape = [40, 256, 100, 50] in_shape = [40, 20, 100, 50] idx_dim = 1 out_shape = [40, 20, 256, 50] in_shape = [40, 20, 100, 50] idx_dim = 2 B = [40, 20, 256, 50] (stride (12800, 512000, 50, 1)) A = [40, 20, 100, 50] (stride (100000, 5000, 1, 100)) dim = 2 74.489 -> 74.467 ( -0.03%) [ +0.02% +0.04% +0.00% / -0.03% +0.46% +0.60%] index_add_ linear : Elapsed 3.725 ms (74.502 ms / 20) 71.589 -> 71.639 ( +0.07%) [ +0.00% +0.16% +0.11% / +0.07% +0.40% +0.52%] index_copy_ linear : Elapsed 3.579 ms (71.589 ms / 20) 74.289 -> 74.436 ( +0.20%) [ +0.16% +0.28% +0.00% / +0.20% +0.48% +0.59%] index_add_ reverse : Elapsed 3.720 ms (74.409 ms / 20) 71.518 -> 71.650 ( +0.18%) [ +0.16% +0.05% +0.00% / +0.18% +0.26% +0.31%] index_copy_ reverse : Elapsed 3.582 ms (71.635 ms / 20) 75.133 -> 75.288 ( +0.21%) [ +0.06% +0.00% +0.14% / +0.21% +0.50% +0.50%] index_add_ spread : Elapsed 3.759 ms (75.179 ms / 20) 71.952 -> 72.123 ( +0.24%) [ +0.08% +0.00% +0.15% / +0.24% +0.37% +0.46%] index_copy_ spread : Elapsed 3.600 ms (72.009 ms / 20) 74.698 -> 74.625 ( -0.10%) [ +0.00% +0.03% +0.01% / -0.10% +0.08% +0.20%] index_add_ strided 3 : Elapsed 3.735 ms (74.698 ms / 20) 71.697 -> 71.737 ( +0.06%) [ +0.08% +0.00% +0.05% / +0.06% +0.20% +0.31%] index_copy_ strided 3 : Elapsed 3.588 ms (71.752 ms / 20) 74.676 -> 74.723 ( +0.06%) [ +0.00% +0.14% +0.08% / +0.06% +0.41% +0.40%] index_add_ strided 5 : Elapsed 3.734 ms (74.676 ms / 20) 71.794 -> 71.733 ( -0.08%) [ +0.01% +0.05% +0.00% / -0.08% +0.15% +0.18%] index_copy_ strided 5 : Elapsed 3.590 ms (71.802 ms / 20) 74.666 -> 74.722 ( +0.08%) [ +0.19% +0.00% +0.25% / +0.08% +0.34% +0.13%] index_add_ strided 7 : Elapsed 3.740 ms (74.809 ms / 20) 71.671 -> 71.781 ( +0.15%) [ +0.32% +0.00% +0.37% / +0.15% +0.30% +0.24%] index_copy_ strided 7 : Elapsed 3.595 ms (71.898 ms / 20) 74.344 -> 74.501 ( +0.21%) [ +0.00% +0.13% +0.01% / +0.21% +0.22% +0.30%] index_add_ strided 255 : Elapsed 3.717 ms (74.344 ms / 20) 71.417 -> 71.600 ( +0.26%) [ +0.00% +0.20% +0.16% / +0.26% +0.38% +0.37%] index_copy_ strided 255 : Elapsed 3.571 ms (71.417 ms / 20) 74.782 -> 74.901 ( +0.16%) [ +0.09% +0.07% +0.00% / +0.16% +0.27% +0.22%] index_add_ perm : Elapsed 3.742 ms (74.846 ms / 20) 71.894 -> 71.954 ( +0.08%) [ +0.00% +0.01% +0.03% / +0.18% +0.32% +0.08%] index_copy_ perm : Elapsed 3.595 ms (71.894 ms / 20) 74.533 -> 74.585 ( +0.07%) [ +0.06% +0.10% +0.00% / +0.07% +0.43% +0.51%] index_add_ perm_sorted : Elapsed 3.729 ms (74.579 ms / 20) 71.665 -> 71.789 ( +0.17%) [ +0.03% +0.23% +0.00% / +0.17% +0.34% +0.47%] index_copy_ perm_sorted : Elapsed 3.585 ms (71.690 ms / 20) 176.050 -> 176.274 ( +0.13%) [ +0.00% +0.14% +0.05% / +0.21% +0.16% +0.13%] index_select const : Elapsed 8.802 ms (176.050 ms / 20) 183.947 -> 184.156 ( +0.11%) [ +0.00% +0.10% +0.08% / +0.11% +0.26% +0.30%] index_select wrap : Elapsed 9.197 ms (183.947 ms / 20) 178.222 -> 178.443 ( +0.12%) [ +0.04% +0.00% +0.09% / +0.12% +0.41% +0.41%] index_select linear : Elapsed 8.915 ms (178.293 ms / 20) 180.110 -> 180.276 ( +0.09%) [ +0.02% +0.00% +0.01% / +0.09% +0.26% +0.24%] index_select reverse : Elapsed 9.008 ms (180.151 ms / 20) 176.020 -> 176.358 ( +0.19%) [ +0.02% +0.04% +0.00% / +0.19% +0.41% +0.42%] index_select skip64 : Elapsed 8.803 ms (176.058 ms / 20) 176.045 -> 176.032 ( -0.01%) [ +0.15% +0.00% +0.10% / -0.01% +0.29% +0.15%] index_select skip256 : Elapsed 8.815 ms (176.302 ms / 20) 181.725 -> 181.728 ( +0.00%) [ +0.02% +0.00% +0.08% / +0.00% +0.33% +0.21%] index_select spread : Elapsed 9.088 ms (181.757 ms / 20) 185.629 -> 185.702 ( +0.04%) [ +0.01% +0.00% +0.02% / +0.04% +0.24% +0.28%] index_select strided 3 : Elapsed 9.283 ms (185.650 ms / 20) 185.876 -> 185.883 ( +0.00%) [ +0.00% +0.06% +0.04% / +0.00% +0.29% +0.26%] index_select strided 5 : Elapsed 9.294 ms (185.876 ms / 20) 185.939 -> 185.925 ( -0.01%) [ +0.02% +0.01% +0.00% / -0.01% +0.29% +0.33%] index_select strided 7 : Elapsed 9.298 ms (185.967 ms / 20) 185.950 -> 186.097 ( +0.08%) [ +0.06% +0.05% +0.00% / +0.08% +0.34% +0.34%] index_select strided 8 : Elapsed 9.303 ms (186.066 ms / 20) 186.001 -> 186.111 ( +0.06%) [ +0.00% +0.04% +0.02% / +0.06% +0.30% +0.30%] index_select strided 16 : Elapsed 9.300 ms (186.001 ms / 20) 186.015 -> 186.088 ( +0.04%) [ +0.01% +0.00% +0.01% / +0.04% +0.30% +0.25%] index_select strided 64 : Elapsed 9.302 ms (186.031 ms / 20) 185.637 -> 185.743 ( +0.06%) [ +0.08% +0.07% +0.00% / +0.06% +0.28% +0.32%] index_select random : Elapsed 9.290 ms (185.794 ms / 20) 181.532 -> 181.528 ( -0.00%) [ +0.01% +0.03% +0.00% / -0.00% +0.23% +0.26%] index_select random_sorted : Elapsed 9.077 ms (181.547 ms / 20) B = [40, 20, 256, 50] (stride (50, 512000, 2000, 1)) A = [40, 20, 100, 50] (stride (50, 200000, 2000, 1)) dim = 2 43.866 -> 43.950 ( +0.19%) [ +0.00% +0.15% +0.17% / +0.19% +0.71% +0.69%] index_add_ linear : Elapsed 2.193 ms (43.866 ms / 20) 41.689 -> 41.762 ( +0.18%) [ +0.00% +0.02% +0.13% / +0.18% +0.30% +0.22%] index_copy_ linear : Elapsed 2.084 ms (41.689 ms / 20) 43.860 -> 43.844 ( -0.04%) [ +0.07% +0.00% +0.14% / +0.22% -0.01% -0.04%] index_add_ reverse : Elapsed 2.195 ms (43.891 ms / 20) 41.638 -> 41.714 ( +0.18%) [ +0.00% +0.16% +0.26% / +0.18% +0.18% +0.23%] index_copy_ reverse : Elapsed 2.082 ms (41.638 ms / 20) 43.871 -> 43.876 ( +0.01%) [ +0.00% +0.20% +0.18% / +0.27% +0.01% +0.16%] index_add_ spread : Elapsed 2.194 ms (43.871 ms / 20) 41.627 -> 41.658 ( +0.07%) [ +0.00% +0.10% +0.02% / +0.07% +0.10% +0.09%] index_copy_ spread : Elapsed 2.081 ms (41.627 ms / 20) 43.756 -> 43.834 ( +0.18%) [ +0.00% +0.01% +0.21% / +0.18% +0.48% +0.30%] index_add_ strided 3 : Elapsed 2.188 ms (43.756 ms / 20) 41.527 -> 41.569 ( +0.10%) [ +0.00% +0.02% +0.21% / +0.11% +0.27% +0.10%] index_copy_ strided 3 : Elapsed 2.076 ms (41.527 ms / 20) 43.944 -> 43.898 ( -0.10%) [ +0.00% +0.03% +0.17% / +0.24% -0.08% -0.10%] index_add_ strided 5 : Elapsed 2.197 ms (43.944 ms / 20) 41.599 -> 41.567 ( -0.08%) [ +0.00% +0.13% +0.30% / +0.18% -0.08% -0.01%] index_copy_ strided 5 : Elapsed 2.080 ms (41.599 ms / 20) 43.963 -> 43.931 ( -0.07%) [ +0.00% +0.20% +0.14% / +0.23% -0.07% +0.04%] index_add_ strided 7 : Elapsed 2.198 ms (43.963 ms / 20) 41.605 -> 41.653 ( +0.12%) [ +0.00% +0.07% +0.00% / +0.19% +0.12% +0.26%] index_copy_ strided 7 : Elapsed 2.080 ms (41.607 ms / 20) 43.814 -> 43.812 ( -0.00%) [ +0.02% +0.13% +0.00% / +0.23% -0.00% -0.00%] index_add_ strided 255 : Elapsed 2.191 ms (43.821 ms / 20) 41.692 -> 41.725 ( +0.08%) [ +0.00% +0.03% +0.17% / +0.08% +0.19% +0.24%] index_copy_ strided 255 : Elapsed 2.085 ms (41.692 ms / 20) 43.618 -> 43.642 ( +0.06%) [ +0.14% +0.00% +0.08% / +0.06% +0.53% +0.56%] index_add_ perm : Elapsed 2.184 ms (43.677 ms / 20) 41.648 -> 41.673 ( +0.06%) [ +0.08% +0.00% +0.09% / +0.06% +0.24% +0.20%] index_copy_ perm : Elapsed 2.084 ms (41.682 ms / 20) 43.733 -> 43.734 ( +0.00%) [ +0.13% +0.00% +0.19% / +0.00% +0.02% +0.23%] index_add_ perm_sorted : Elapsed 2.189 ms (43.788 ms / 20) 41.555 -> 41.594 ( +0.09%) [ +0.12% +0.00% +0.24% / +0.14% +0.09% +0.12%] index_copy_ perm_sorted : Elapsed 2.080 ms (41.604 ms / 20) 99.301 -> 99.358 ( +0.06%) [ +0.00% +0.00% +0.04% / +0.06% +0.33% +0.30%] index_select const : Elapsed 4.965 ms (99.303 ms / 20) 106.911 -> 106.935 ( +0.02%) [ +0.00% +0.14% +0.19% / +0.08% +0.06% +0.02%] index_select wrap : Elapsed 5.346 ms (106.911 ms / 20) 100.871 -> 100.754 ( -0.12%) [ +0.07% +0.00% +0.06% / +0.03% -0.08% -0.12%] index_select linear : Elapsed 5.047 ms (100.938 ms / 20) 101.569 -> 101.570 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.08% +0.03% +0.00%] index_select reverse : Elapsed 5.079 ms (101.570 ms / 20) 100.048 -> 98.681 ( -1.37%) [ +0.15% +0.03% +0.00% / +0.12% -1.37% -1.34%] index_select skip64 : Elapsed 5.010 ms (100.194 ms / 20) 99.405 -> 99.294 ( -0.11%) [ +0.00% +0.01% +0.00% / -0.11% +0.28% +0.28%] index_select skip256 : Elapsed 4.970 ms (99.405 ms / 20) 105.839 -> 105.823 ( -0.02%) [ +0.16% +0.00% +0.03% / +0.11% -0.02% +0.01%] index_select spread : Elapsed 5.300 ms (106.009 ms / 20) 107.001 -> 107.088 ( +0.08%) [ +0.05% +0.00% +0.19% / +0.18% +0.08% +0.10%] index_select strided 3 : Elapsed 5.353 ms (107.057 ms / 20) 106.875 -> 106.148 ( -0.68%) [ +0.12% +0.00% +0.03% / -0.00% -0.61% -0.68%] index_select strided 5 : Elapsed 5.350 ms (107.006 ms / 20) 107.001 -> 107.089 ( +0.08%) [ +0.08% +0.02% +0.00% / +0.08% +0.15% +0.10%] index_select strided 7 : Elapsed 5.355 ms (107.090 ms / 20) 106.807 -> 106.926 ( +0.11%) [ +0.04% +0.00% +0.24% / +0.12% +0.17% +0.11%] index_select strided 8 : Elapsed 5.342 ms (106.845 ms / 20) 106.829 -> 106.897 ( +0.06%) [ +0.07% +0.00% +0.09% / +0.06% +0.16% +0.11%] index_select strided 16 : Elapsed 5.345 ms (106.899 ms / 20) 106.813 -> 106.864 ( +0.05%) [ +0.04% +0.00% +0.04% / +0.16% +0.17% +0.05%] index_select strided 64 : Elapsed 5.343 ms (106.859 ms / 20) 106.834 -> 106.780 ( -0.05%) [ +0.11% +0.00% +0.14% / +0.17% -0.05% -0.05%] index_select random : Elapsed 5.348 ms (106.953 ms / 20) 105.479 -> 105.418 ( -0.06%) [ +0.00% +0.09% +0.02% / +0.15% -0.06% +0.04%] index_select random_sorted : Elapsed 5.274 ms (105.479 ms / 20) out_shape = [40, 20, 100, 256] in_shape = [40, 20, 100, 50] idx_dim = 3 out_shape = [256, 50, 20, 100] in_shape = [40, 50, 20, 100] idx_dim = 0 out_shape = [40, 256, 20, 100] in_shape = [40, 50, 20, 100] idx_dim = 1 B = [40, 256, 20, 100] (stride (25600, 1, 1024000, 256)) A = [40, 50, 20, 100] (stride (1000, 20, 1, 40000)) dim = 1 203.538 -> 203.368 ( -0.08%) [ +0.09% +0.00% +0.04% / -0.01% -0.08% +0.05%] index_add_ linear : Elapsed 10.186 ms (203.724 ms / 20) 156.108 -> 155.457 ( -0.42%) [ +0.00% +0.00% +0.01% / -0.10% -0.42% -0.27%] index_copy_ linear : Elapsed 7.806 ms (156.114 ms / 20) 203.288 -> 203.309 ( +0.01%) [ +0.15% +0.23% +0.00% / +0.01% +0.09% +0.10%] index_add_ reverse : Elapsed 10.179 ms (203.584 ms / 20) 155.762 -> 155.662 ( -0.06%) [ +0.30% +0.14% +0.00% / +0.09% +0.04% -0.06%] index_copy_ reverse : Elapsed 7.811 ms (156.225 ms / 20) 206.583 -> 206.527 ( -0.03%) [ +0.09% +0.16% +0.00% / +0.07% +0.05% -0.03%] index_add_ spread : Elapsed 10.338 ms (206.765 ms / 20) 157.287 -> 156.933 ( -0.23%) [ +0.00% +0.07% +0.06% / +0.21% -0.19% -0.23%] index_copy_ spread : Elapsed 7.864 ms (157.287 ms / 20) 205.158 -> 204.963 ( -0.10%) [ +0.07% +0.00% +0.01% / -0.10% +0.11% -0.00%] index_add_ strided 3 : Elapsed 10.265 ms (205.302 ms / 20) 156.241 -> 156.410 ( +0.11%) [ +0.03% +0.22% +0.00% / +0.17% +0.11% +0.27%] index_copy_ strided 3 : Elapsed 7.815 ms (156.292 ms / 20) 206.286 -> 206.473 ( +0.09%) [ +0.17% +0.00% +0.07% / +0.11% +0.09% +0.24%] index_add_ strided 5 : Elapsed 10.331 ms (206.627 ms / 20) 157.035 -> 157.059 ( +0.02%) [ +0.09% +0.00% +0.01% / +0.02% +0.02% +0.02%] index_copy_ strided 5 : Elapsed 7.858 ms (157.169 ms / 20) 206.883 -> 206.653 ( -0.11%) [ +0.01% +0.38% +0.00% / -0.11% +0.22% +0.43%] index_add_ strided 7 : Elapsed 10.345 ms (206.903 ms / 20) 157.479 -> 157.392 ( -0.06%) [ +0.14% +0.02% +0.00% / -0.06% +0.10% +0.12%] index_copy_ strided 7 : Elapsed 7.885 ms (157.697 ms / 20) 203.127 -> 203.233 ( +0.05%) [ +0.05% +0.00% +0.01% / +0.05% +0.28% +0.06%] index_add_ strided 255 : Elapsed 10.162 ms (203.235 ms / 20) 155.356 -> 155.427 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.05% +0.17% +0.23%] index_copy_ strided 255 : Elapsed 7.775 ms (155.505 ms / 20) 210.214 -> 209.830 ( -0.18%) [ +0.11% +0.05% +0.00% / -0.04% -0.18% -0.05%] index_add_ perm : Elapsed 10.522 ms (210.437 ms / 20) 159.710 -> 159.341 ( -0.23%) [ +0.16% +0.06% +0.00% / +0.17% -0.23% -0.19%] index_copy_ perm : Elapsed 7.998 ms (159.965 ms / 20) 206.011 -> 205.411 ( -0.29%) [ +0.12% +0.07% +0.00% / +0.08% -0.29% -0.13%] index_add_ perm_sorted : Elapsed 10.312 ms (206.249 ms / 20) 157.211 -> 156.809 ( -0.26%) [ +0.00% +0.20% +0.04% / -0.13% -0.26% -0.10%] index_copy_ perm_sorted : Elapsed 7.861 ms (157.211 ms / 20) 752.308 -> 752.693 ( +0.05%) [ +0.16% +0.00% +0.11% / +0.05% +0.63% +0.59%] index_select const : Elapsed 37.674 ms (753.476 ms / 20) 839.240 -> 838.698 ( -0.06%) [ +0.06% +0.00% +0.04% / -0.06% +0.14% +0.05%] index_select wrap : Elapsed 41.989 ms (839.785 ms / 20) 759.377 -> 760.089 ( +0.09%) [ +0.04% +0.00% +0.10% / +0.10% +0.10% +0.09%] index_select linear : Elapsed 37.983 ms (759.669 ms / 20) 771.069 -> 771.137 ( +0.01%) [ +0.00% +0.03% +0.04% / +0.01% +0.21% +0.16%] index_select reverse : Elapsed 38.553 ms (771.069 ms / 20) 752.889 -> 753.326 ( +0.06%) [ +0.07% +0.05% +0.00% / +0.06% +0.60% +0.48%] index_select skip64 : Elapsed 37.669 ms (753.379 ms / 20) 751.765 -> 753.352 ( +0.21%) [ +0.09% +0.16% +0.00% / +0.21% +0.63% +0.57%] index_select skip256 : Elapsed 37.622 ms (752.437 ms / 20) 802.556 -> 802.610 ( +0.01%) [ +0.08% +0.00% +0.09% / +0.13% +0.02% +0.01%] index_select spread : Elapsed 40.161 ms (803.214 ms / 20) 847.351 -> 846.548 ( -0.09%) [ +0.04% +0.00% +0.00% / +0.09% -0.08% -0.09%] index_select strided 3 : Elapsed 42.384 ms (847.682 ms / 20) 845.801 -> 845.076 ( -0.09%) [ +0.09% +0.00% +0.08% / -0.09% +0.19% +0.27%] index_select strided 5 : Elapsed 42.328 ms (846.564 ms / 20) 846.376 -> 846.981 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.07% +0.14% +0.13%] index_select strided 7 : Elapsed 42.340 ms (846.791 ms / 20) 847.989 -> 846.732 ( -0.15%) [ +0.11% +0.02% +0.00% / +0.01% -0.09% -0.15%] index_select strided 8 : Elapsed 42.447 ms (848.941 ms / 20) 850.083 -> 849.786 ( -0.03%) [ +0.02% +0.00% +0.05% / +0.04% -0.03% -0.02%] index_select strided 16 : Elapsed 42.512 ms (850.245 ms / 20) 844.862 -> 845.147 ( +0.03%) [ +0.12% +0.00% +0.00% / +0.03% +0.32% +0.25%] index_select random : Elapsed 42.293 ms (845.858 ms / 20) 799.701 -> 799.581 ( -0.02%) [ +0.00% +0.19% +0.15% / +0.06% -0.02% -0.00%] index_select random_sorted : Elapsed 39.985 ms (799.701 ms / 20) out_shape = [40, 50, 256, 100] in_shape = [40, 50, 20, 100] idx_dim = 2 out_shape = [40, 50, 20, 256] in_shape = [40, 50, 20, 100] idx_dim = 3 out_shape = [256, 50, 100, 20] in_shape = [40, 50, 100, 20] idx_dim = 0 out_shape = [40, 256, 100, 20] in_shape = [40, 50, 100, 20] idx_dim = 1 out_shape = [40, 50, 256, 20] in_shape = [40, 50, 100, 20] idx_dim = 2 out_shape = [40, 50, 100, 256] in_shape = [40, 50, 100, 20] idx_dim = 3 out_shape = [256, 100, 20, 50] in_shape = [40, 100, 20, 50] idx_dim = 0 out_shape = [40, 256, 20, 50] in_shape = [40, 100, 20, 50] idx_dim = 1 out_shape = [40, 100, 256, 50] in_shape = [40, 100, 20, 50] idx_dim = 2 out_shape = [40, 100, 20, 256] in_shape = [40, 100, 20, 50] idx_dim = 3 B = [40, 100, 20, 256] (stride (2000, 1, 100, 80000)) A = [40, 100, 20, 50] (stride (100000, 1000, 50, 1)) dim = 3 77.609 -> 77.367 ( -0.31%) [ +0.00% +0.23% +0.17% / -0.04% -0.28% -0.31%] index_add_ linear : Elapsed 3.880 ms (77.609 ms / 20) 70.087 -> 69.707 ( -0.54%) [ +0.12% +0.00% +0.01% / -0.03% -0.54% -0.53%] index_copy_ linear : Elapsed 3.509 ms (70.171 ms / 20) 77.850 -> 77.283 ( -0.73%) [ +0.15% +0.00% +0.07% / -0.01% -0.71% -0.73%] index_add_ reverse : Elapsed 3.898 ms (77.965 ms / 20) 69.975 -> 69.490 ( -0.69%) [ +0.43% +0.00% +0.35% / +0.13% -0.61% -0.69%] index_copy_ reverse : Elapsed 3.514 ms (70.275 ms / 20) 77.796 -> 77.804 ( +0.01%) [ +0.00% +0.00% +0.03% / +0.01% +0.32% +0.29%] index_add_ spread : Elapsed 3.890 ms (77.796 ms / 20) 70.127 -> 69.718 ( -0.58%) [ +0.27% +0.00% +0.03% / +0.24% -0.58% -0.43%] index_copy_ spread : Elapsed 3.516 ms (70.313 ms / 20) 77.874 -> 76.904 ( -1.25%) [ +0.04% +0.07% +0.00% / -0.20% -1.00% -1.25%] index_add_ strided 3 : Elapsed 3.895 ms (77.906 ms / 20) 70.205 -> 69.530 ( -0.96%) [ +0.33% +0.00% +0.15% / +0.04% -0.69% -0.96%] index_copy_ strided 3 : Elapsed 3.522 ms (70.436 ms / 20) 77.913 -> 77.136 ( -1.00%) [ +0.22% +0.11% +0.00% / +0.03% -0.70% -1.00%] index_add_ strided 5 : Elapsed 3.904 ms (78.088 ms / 20) 69.786 -> 69.757 ( -0.04%) [ +0.25% +0.25% +0.00% / +0.08% -0.04% +0.03%] index_copy_ strided 5 : Elapsed 3.498 ms (69.961 ms / 20) 78.587 -> 76.823 ( -2.24%) [ +0.00% +0.14% +0.09% / -0.26% -2.15% -2.24%] index_add_ strided 7 : Elapsed 3.929 ms (78.587 ms / 20) 70.314 -> 69.573 ( -1.05%) [ +0.00% +0.36% +0.14% / +0.04% -1.05% -0.86%] index_copy_ strided 7 : Elapsed 3.516 ms (70.314 ms / 20) 77.613 -> 77.108 ( -0.65%) [ +0.00% +0.07% +0.11% / -0.26% -0.49% -0.65%] index_add_ strided 255 : Elapsed 3.881 ms (77.613 ms / 20) 69.940 -> 69.575 ( -0.52%) [ +0.06% +0.39% +0.00% / +0.14% -0.24% -0.52%] index_copy_ strided 255 : Elapsed 3.499 ms (69.984 ms / 20) 77.935 -> 77.730 ( -0.26%) [ +0.05% +0.00% +0.13% / +0.07% -0.19% -0.26%] index_add_ perm : Elapsed 3.899 ms (77.977 ms / 20) 70.154 -> 69.387 ( -1.09%) [ +0.16% +0.20% +0.00% / -0.01% -1.09% -0.91%] index_copy_ perm : Elapsed 3.513 ms (70.265 ms / 20) 77.788 -> 77.494 ( -0.38%) [ +0.16% +0.12% +0.00% / +0.01% -0.27% -0.38%] index_add_ perm_sorted : Elapsed 3.895 ms (77.909 ms / 20) 70.088 -> 69.458 ( -0.90%) [ +0.00% +0.01% +0.19% / -0.10% -0.90% -0.78%] index_copy_ perm_sorted : Elapsed 3.504 ms (70.088 ms / 20) 447.361 -> 446.070 ( -0.29%) [ +0.00% +0.13% +0.06% / +0.04% -0.17% -0.29%] index_select const : Elapsed 22.368 ms (447.361 ms / 20) 448.868 -> 448.138 ( -0.16%) [ +0.00% +0.02% +0.04% / +0.02% -0.06% -0.16%] index_select wrap : Elapsed 22.443 ms (448.868 ms / 20) 447.739 -> 445.683 ( -0.46%) [ +0.03% +0.02% +0.00% / +0.04% -0.41% -0.46%] index_select linear : Elapsed 22.393 ms (447.863 ms / 20) 447.577 -> 446.948 ( -0.14%) [ +0.10% +0.07% +0.00% / +0.17% +0.05% -0.14%] index_select reverse : Elapsed 22.402 ms (448.045 ms / 20) 447.742 -> 444.570 ( -0.71%) [ +0.00% +0.07% +0.09% / -0.10% -0.71% -0.55%] index_select skip64 : Elapsed 22.387 ms (447.742 ms / 20) 447.282 -> 445.885 ( -0.31%) [ +0.00% +0.11% +0.11% / +0.07% -0.31% -0.22%] index_select skip256 : Elapsed 22.364 ms (447.282 ms / 20) 448.718 -> 447.214 ( -0.34%) [ +0.01% +0.00% +0.08% / +0.07% -0.25% -0.34%] index_select spread : Elapsed 22.438 ms (448.758 ms / 20) 450.986 -> 448.789 ( -0.49%) [ +0.00% +0.03% +0.07% / +0.11% -0.49% -0.44%] index_select strided 3 : Elapsed 22.549 ms (450.986 ms / 20) 450.709 -> 448.842 ( -0.41%) [ +0.06% +0.10% +0.00% / +0.11% -0.41% -0.13%] index_select strided 5 : Elapsed 22.549 ms (450.988 ms / 20) 450.746 -> 449.544 ( -0.27%) [ +0.01% +0.00% +0.11% / +0.00% -0.27% -0.20%] index_select strided 7 : Elapsed 22.539 ms (450.775 ms / 20) 451.065 -> 449.108 ( -0.43%) [ +0.00% +0.06% +0.09% / -0.04% -0.43% -0.41%] index_select strided 8 : Elapsed 22.553 ms (451.065 ms / 20) 450.947 -> 447.868 ( -0.68%) [ +0.00% +0.07% +0.13% / +0.11% -0.33% -0.68%] index_select strided 16 : Elapsed 22.547 ms (450.947 ms / 20) 450.215 -> 448.866 ( -0.30%) [ +0.00% +0.14% +0.04% / -0.03% -0.26% -0.30%] index_select random : Elapsed 22.511 ms (450.215 ms / 20) 448.236 -> 447.547 ( -0.15%) [ +0.17% +0.19% +0.00% / +0.06% -0.15% -0.15%] index_select random_sorted : Elapsed 22.449 ms (448.976 ms / 20) out_shape = [256, 100, 50, 20] in_shape = [40, 100, 50, 20] idx_dim = 0 out_shape = [40, 256, 50, 20] in_shape = [40, 100, 50, 20] idx_dim = 1 B = [40, 256, 50, 20] (stride (256000, 1, 5120, 256)) A = [40, 100, 50, 20] (stride (1000, 40000, 1, 50)) dim = 1 147.974 -> 148.244 ( +0.18%) [ +0.00% +0.39% +0.38% / +0.26% +0.37% +0.18%] index_add_ linear : Elapsed 7.399 ms (147.974 ms / 20) 109.876 -> 109.437 ( -0.40%) [ +0.00% +0.15% +0.14% / -0.04% -0.38% -0.40%] index_copy_ linear : Elapsed 5.494 ms (109.876 ms / 20) 147.694 -> 147.480 ( -0.14%) [ +0.31% +0.15% +0.00% / +0.09% -0.14% +0.21%] index_add_ reverse : Elapsed 7.407 ms (148.147 ms / 20) 109.547 -> 109.048 ( -0.46%) [ +0.00% +0.11% +0.07% / -0.46% +0.03% -0.15%] index_copy_ reverse : Elapsed 5.477 ms (109.547 ms / 20) 152.550 -> 152.055 ( -0.32%) [ +0.00% +0.25% +0.62% / -0.32% -0.27% +0.25%] index_add_ spread : Elapsed 7.628 ms (152.550 ms / 20) 111.587 -> 111.254 ( -0.30%) [ +0.01% +0.00% +0.07% / -0.30% +0.16% +0.04%] index_copy_ spread : Elapsed 5.580 ms (111.599 ms / 20) 153.095 -> 153.118 ( +0.02%) [ +0.31% +0.27% +0.00% / +0.16% +0.34% +0.02%] index_add_ strided 3 : Elapsed 7.679 ms (153.573 ms / 20) 111.944 -> 112.142 ( +0.18%) [ +0.11% +0.18% +0.00% / +0.20% +0.18% +0.21%] index_copy_ strided 3 : Elapsed 5.603 ms (112.067 ms / 20) 156.698 -> 156.203 ( -0.32%) [ +0.07% +0.03% +0.00% / -0.29% -0.32% -0.24%] index_add_ strided 5 : Elapsed 7.840 ms (156.805 ms / 20) 113.694 -> 113.722 ( +0.02%) [ +0.29% +0.02% +0.00% / +0.13% +0.09% +0.02%] index_copy_ strided 5 : Elapsed 5.701 ms (114.021 ms / 20) 158.471 -> 158.165 ( -0.19%) [ +0.36% +0.07% +0.00% / +0.07% -0.06% -0.19%] index_add_ strided 7 : Elapsed 7.952 ms (159.042 ms / 20) 114.861 -> 114.373 ( -0.42%) [ +0.00% +0.05% +0.21% / -0.05% -0.42% -0.24%] index_copy_ strided 7 : Elapsed 5.743 ms (114.861 ms / 20) 148.025 -> 147.877 ( -0.10%) [ +0.00% +0.48% +0.08% / -0.10% -0.08% +0.27%] index_add_ strided 255 : Elapsed 7.401 ms (148.025 ms / 20) 109.504 -> 109.708 ( +0.19%) [ +0.44% +0.19% +0.00% / +0.26% +0.26% +0.19%] index_copy_ strided 255 : Elapsed 5.499 ms (109.987 ms / 20) 160.829 -> 160.843 ( +0.01%) [ +0.26% +0.57% +0.00% / +0.42% +0.01% +0.51%] index_add_ perm : Elapsed 8.062 ms (161.247 ms / 20) 116.681 -> 116.555 ( -0.11%) [ +0.12% +0.08% +0.00% / -0.11% +0.13% -0.04%] index_copy_ perm : Elapsed 5.841 ms (116.817 ms / 20) 151.768 -> 151.568 ( -0.13%) [ +0.14% +0.35% +0.00% / -0.13% +0.25% +0.14%] index_add_ perm_sorted : Elapsed 7.599 ms (151.982 ms / 20) 111.184 -> 111.244 ( +0.05%) [ +0.00% +0.20% +0.32% / +0.05% +0.32% +0.20%] index_copy_ perm_sorted : Elapsed 5.559 ms (111.184 ms / 20) 255.156 -> 255.250 ( +0.04%) [ +0.19% +0.00% +0.26% / +0.04% +0.26% +0.13%] index_select const : Elapsed 12.782 ms (255.641 ms / 20) 284.868 -> 284.774 ( -0.03%) [ +0.13% +0.00% +0.12% / -0.03% +0.53% +0.29%] index_select wrap : Elapsed 14.262 ms (285.237 ms / 20) 264.776 -> 265.267 ( +0.19%) [ +0.32% +0.24% +0.00% / +0.27% +0.33% +0.19%] index_select linear : Elapsed 13.282 ms (265.636 ms / 20) 269.565 -> 269.310 ( -0.09%) [ +0.00% +0.00% +0.07% / -0.09% +0.00% +0.29%] index_select reverse : Elapsed 13.478 ms (269.565 ms / 20) 255.983 -> 255.677 ( -0.12%) [ +0.20% +0.13% +0.00% / -0.02% -0.12% +0.05%] index_select skip64 : Elapsed 12.825 ms (256.495 ms / 20) 254.752 -> 254.910 ( +0.06%) [ +0.00% +0.40% +0.39% / +0.45% +0.06% +0.34%] index_select skip256 : Elapsed 12.738 ms (254.752 ms / 20) 279.580 -> 279.495 ( -0.03%) [ +0.04% +0.03% +0.00% / +0.25% +0.10% -0.03%] index_select spread : Elapsed 13.985 ms (279.691 ms / 20) 286.289 -> 286.057 ( -0.08%) [ +0.20% +0.00% +0.37% / +0.09% -0.08% +0.07%] index_select strided 3 : Elapsed 14.343 ms (286.853 ms / 20) 286.217 -> 286.002 ( -0.08%) [ +0.01% +0.00% +0.12% / -0.08% +0.31% +0.16%] index_select strided 5 : Elapsed 14.312 ms (286.245 ms / 20) 285.643 -> 286.382 ( +0.26%) [ +0.41% +0.18% +0.00% / +0.32% +0.42% +0.26%] index_select strided 7 : Elapsed 14.340 ms (286.803 ms / 20) 286.284 -> 286.117 ( -0.06%) [ +0.00% +0.19% +0.04% / -0.06% +0.00% +0.04%] index_select strided 8 : Elapsed 14.314 ms (286.284 ms / 20) 285.422 -> 285.799 ( +0.13%) [ +0.00% +0.21% +0.38% / +0.39% +0.13% +0.23%] index_select strided 16 : Elapsed 14.271 ms (285.422 ms / 20) 285.990 -> 286.148 ( +0.06%) [ +0.08% +0.00% +0.09% / +0.06% +0.20% +0.23%] index_select strided 64 : Elapsed 14.310 ms (286.207 ms / 20) 285.518 -> 285.818 ( +0.11%) [ +0.31% +0.22% +0.00% / +0.16% +0.47% +0.11%] index_select random : Elapsed 14.320 ms (286.401 ms / 20) 278.105 -> 277.619 ( -0.17%) [ +0.00% +0.03% +0.20% / +0.00% -0.17% +0.19%] index_select random_sorted : Elapsed 13.905 ms (278.105 ms / 20) out_shape = [40, 100, 256, 20] in_shape = [40, 100, 50, 20] idx_dim = 2 out_shape = [40, 100, 50, 256] in_shape = [40, 100, 50, 20] idx_dim = 3 out_shape = [256, 20, 40, 100] in_shape = [50, 20, 40, 100] idx_dim = 0 out_shape = [50, 256, 40, 100] in_shape = [50, 20, 40, 100] idx_dim = 1 B = [50, 256, 40, 100] (stride (100, 5000, 1280000, 1)) A = [50, 20, 40, 100] (stride (4000, 200000, 100, 1)) dim = 1 32.859 -> 32.753 ( -0.32%) [ +0.05% +0.00% +0.10% / +0.08% -0.25% -0.32%] index_add_ linear : Elapsed 1.644 ms (32.877 ms / 20) 31.241 -> 31.268 ( +0.09%) [ +0.05% +0.06% +0.00% / +0.10% +0.09% +0.09%] index_copy_ linear : Elapsed 1.563 ms (31.257 ms / 20) 32.933 -> 32.826 ( -0.32%) [ +0.00% +0.04% +0.11% / +0.12% -0.32% -0.28%] index_add_ reverse : Elapsed 1.647 ms (32.933 ms / 20) 31.298 -> 31.310 ( +0.04%) [ +0.00% +0.00% +0.07% / +0.10% +0.11% +0.04%] index_copy_ reverse : Elapsed 1.565 ms (31.298 ms / 20) 32.968 -> 32.884 ( -0.25%) [ +0.22% +0.00% +0.31% / +0.26% -0.14% -0.25%] index_add_ spread : Elapsed 1.652 ms (33.039 ms / 20) 31.338 -> 31.343 ( +0.02%) [ +0.04% +0.00% +0.31% / +0.02% +0.16% +0.05%] index_copy_ spread : Elapsed 1.568 ms (31.350 ms / 20) 32.966 -> 32.878 ( -0.27%) [ +0.01% +0.00% +0.16% / +0.07% -0.27% -0.22%] index_add_ strided 3 : Elapsed 1.648 ms (32.969 ms / 20) 31.363 -> 31.291 ( -0.23%) [ +0.00% +0.15% +0.15% / +0.03% -0.15% -0.23%] index_copy_ strided 3 : Elapsed 1.568 ms (31.363 ms / 20) 32.827 -> 32.838 ( +0.03%) [ +0.00% +0.03% +0.05% / +0.03% +0.51% +0.47%] index_add_ strided 5 : Elapsed 1.641 ms (32.827 ms / 20) 31.277 -> 31.295 ( +0.06%) [ +0.00% +0.03% +0.20% / +0.06% +0.23% +0.12%] index_copy_ strided 5 : Elapsed 1.564 ms (31.277 ms / 20) 32.869 -> 32.803 ( -0.20%) [ +0.00% +0.09% +0.16% / +0.09% -0.10% -0.20%] index_add_ strided 7 : Elapsed 1.643 ms (32.869 ms / 20) 31.312 -> 31.259 ( -0.17%) [ +0.09% +0.00% +0.13% / +0.08% -0.05% -0.17%] index_copy_ strided 7 : Elapsed 1.567 ms (31.339 ms / 20) 32.897 -> 32.758 ( -0.42%) [ +0.00% +0.17% +0.26% / -0.06% -0.42% -0.42%] index_add_ strided 255 : Elapsed 1.645 ms (32.897 ms / 20) 31.260 -> 31.283 ( +0.07%) [ +0.00% +0.04% +0.16% / +0.08% +0.13% +0.07%] index_copy_ strided 255 : Elapsed 1.563 ms (31.260 ms / 20) 32.849 -> 32.847 ( -0.01%) [ +0.04% +0.00% +0.18% / +0.02% -0.01% +0.21%] index_add_ perm : Elapsed 1.643 ms (32.863 ms / 20) 31.290 -> 31.258 ( -0.10%) [ +0.04% +0.00% +0.10% / +0.08% -0.10% -0.04%] index_copy_ perm : Elapsed 1.565 ms (31.303 ms / 20) 32.967 -> 32.888 ( -0.24%) [ +0.01% +0.00% +0.08% / +0.05% -0.24% -0.23%] index_add_ perm_sorted : Elapsed 1.648 ms (32.970 ms / 20) 31.346 -> 31.276 ( -0.22%) [ +0.02% +0.00% +0.17% / +0.05% -0.22% -0.04%] index_copy_ perm_sorted : Elapsed 1.568 ms (31.353 ms / 20) 390.110 -> 390.273 ( +0.04%) [ +0.00% +0.08% +0.11% / +0.04% +0.22% +0.24%] index_select const : Elapsed 19.505 ms (390.110 ms / 20) 397.413 -> 397.719 ( +0.08%) [ +0.02% +0.00% +0.08% / +0.08% +0.11% +0.08%] index_select wrap : Elapsed 19.874 ms (397.475 ms / 20) 390.399 -> 390.426 ( +0.01%) [ +0.00% +0.06% +0.11% / +0.13% +0.10% +0.01%] index_select linear : Elapsed 19.520 ms (390.399 ms / 20) 392.514 -> 392.560 ( +0.01%) [ +0.01% +0.00% +0.13% / +0.09% +0.01% +0.11%] index_select reverse : Elapsed 19.628 ms (392.559 ms / 20) 389.999 -> 390.481 ( +0.12%) [ +0.04% +0.00% +0.06% / +0.12% +0.26% +0.30%] index_select skip64 : Elapsed 19.508 ms (390.154 ms / 20) 389.699 -> 390.267 ( +0.15%) [ +0.17% +0.00% +0.22% / +0.15% +0.31% +0.31%] index_select skip256 : Elapsed 19.519 ms (390.375 ms / 20) 394.735 -> 395.251 ( +0.13%) [ +0.00% +0.04% +0.13% / +0.13% +0.15% +0.18%] index_select spread : Elapsed 19.737 ms (394.735 ms / 20) 397.388 -> 397.572 ( +0.05%) [ +0.00% +0.02% +0.02% / +0.05% +0.17% +0.06%] index_select strided 3 : Elapsed 19.869 ms (397.388 ms / 20) 395.730 -> 395.534 ( -0.05%) [ +0.00% +0.02% +0.08% / +0.11% -0.04% -0.05%] index_select strided 5 : Elapsed 19.786 ms (395.730 ms / 20) 397.455 -> 397.660 ( +0.05%) [ +0.00% +0.02% +0.08% / +0.08% +0.09% +0.05%] index_select strided 7 : Elapsed 19.873 ms (397.455 ms / 20) 396.342 -> 396.520 ( +0.04%) [ +0.04% +0.02% +0.00% / +0.04% +0.09% +0.10%] index_select strided 8 : Elapsed 19.826 ms (396.512 ms / 20) 396.355 -> 396.582 ( +0.06%) [ +0.00% +0.01% +0.03% / +0.06% +0.07% +0.10%] index_select strided 16 : Elapsed 19.818 ms (396.355 ms / 20) 397.093 -> 397.584 ( +0.12%) [ +0.00% +0.05% +0.12% / +0.13% +0.12% +0.14%] index_select random : Elapsed 19.855 ms (397.093 ms / 20) 394.317 -> 394.871 ( +0.14%) [ +0.01% +0.00% +0.05% / +0.14% +0.23% +0.28%] index_select random_sorted : Elapsed 19.718 ms (394.362 ms / 20) out_shape = [50, 20, 256, 100] in_shape = [50, 20, 40, 100] idx_dim = 2 out_shape = [50, 20, 40, 256] in_shape = [50, 20, 40, 100] idx_dim = 3 out_shape = [256, 20, 100, 40] in_shape = [50, 20, 100, 40] idx_dim = 0 out_shape = [50, 256, 100, 40] in_shape = [50, 20, 100, 40] idx_dim = 1 out_shape = [50, 20, 256, 40] in_shape = [50, 20, 100, 40] idx_dim = 2 out_shape = [50, 20, 100, 256] in_shape = [50, 20, 100, 40] idx_dim = 3 out_shape = [256, 40, 20, 100] in_shape = [50, 40, 20, 100] idx_dim = 0 B = [256, 40, 20, 100] (stride (80000, 20, 1, 800)) A = [50, 40, 20, 100] (stride (800, 1, 40, 40000)) dim = 0 110.798 -> 110.523 ( -0.25%) [ +0.44% +0.00% +0.02% / -0.25% +1.18% +1.06%] index_add_ linear : Elapsed 5.564 ms (111.289 ms / 20) 91.343 -> 91.469 ( +0.14%) [ +0.00% +0.56% +0.05% / +0.14% +2.25% +2.62%] index_copy_ linear : Elapsed 4.567 ms (91.343 ms / 20) 110.416 -> 108.561 ( -1.68%) [ +0.79% +0.72% +0.00% / -0.26% -1.68% -1.20%] index_add_ reverse : Elapsed 5.564 ms (111.290 ms / 20) 91.223 -> 91.081 ( -0.16%) [ +0.00% +0.23% +0.44% / -0.16% +1.13% +0.70%] index_copy_ reverse : Elapsed 4.561 ms (91.223 ms / 20) 112.352 -> 112.545 ( +0.17%) [ +0.00% +0.40% +0.34% / +0.17% +2.35% +1.93%] index_add_ spread : Elapsed 5.618 ms (112.352 ms / 20) 91.179 -> 90.956 ( -0.24%) [ +0.33% +0.14% +0.00% / -0.24% +3.03% +2.35%] index_copy_ spread : Elapsed 4.574 ms (91.481 ms / 20) 113.372 -> 110.602 ( -2.44%) [ +0.95% +0.00% +0.45% / -0.78% -2.13% -2.44%] index_add_ strided 3 : Elapsed 5.722 ms (114.450 ms / 20) 91.988 -> 91.579 ( -0.44%) [ +0.48% +0.11% +0.00% / -0.44% +1.43% +1.73%] index_copy_ strided 3 : Elapsed 4.622 ms (92.430 ms / 20) 112.232 -> 112.138 ( -0.08%) [ +0.17% +0.23% +0.00% / -0.08% +2.00% +1.73%] index_add_ strided 5 : Elapsed 5.621 ms (112.425 ms / 20) 91.344 -> 91.445 ( +0.11%) [ +0.46% +0.00% +0.11% / +0.11% +1.64% +2.75%] index_copy_ strided 5 : Elapsed 4.588 ms (91.760 ms / 20) 109.383 -> 109.874 ( +0.45%) [ +1.48% +0.74% +0.00% / +0.45% +1.13% +0.54%] index_add_ strided 7 : Elapsed 5.550 ms (111.005 ms / 20) 90.889 -> 90.747 ( -0.16%) [ +0.54% +0.43% +0.00% / -0.16% +2.83% +2.60%] index_copy_ strided 7 : Elapsed 4.569 ms (91.382 ms / 20) 109.537 -> 109.793 ( +0.23%) [ +0.11% +0.81% +0.00% / +0.23% +3.63% +3.67%] index_add_ strided 255 : Elapsed 5.483 ms (109.654 ms / 20) 91.691 -> 91.293 ( -0.43%) [ +0.40% +0.00% +0.14% / -0.43% +1.55% +1.09%] index_copy_ strided 255 : Elapsed 4.603 ms (92.057 ms / 20) 113.029 -> 111.419 ( -1.42%) [ +0.42% +0.00% +0.24% / -0.23% -1.42% -0.81%] index_add_ perm : Elapsed 5.675 ms (113.506 ms / 20) 91.199 -> 91.526 ( +0.36%) [ +0.32% +0.00% +0.73% / +0.36% +2.11% +1.83%] index_copy_ perm : Elapsed 4.574 ms (91.488 ms / 20) 111.086 -> 110.574 ( -0.46%) [ +0.00% +0.25% +0.17% / -0.46% +2.51% +2.13%] index_add_ perm_sorted : Elapsed 5.554 ms (111.086 ms / 20) 91.502 -> 91.580 ( +0.09%) [ +0.00% +0.14% +0.18% / +0.09% +2.20% +1.97%] index_copy_ perm_sorted : Elapsed 4.575 ms (91.502 ms / 20) good 525.694 -> 499.262 ( -5.03%) [ +0.00% +0.09% +0.01% / -0.14% -4.72% -5.03%] index_select const : Elapsed 26.285 ms (525.694 ms / 20) 648.960 -> 651.458 ( +0.38%) [ +0.20% +0.25% +0.00% / +0.38% +0.84% +0.91%] index_select wrap : Elapsed 32.511 ms (650.226 ms / 20) 531.530 -> 523.852 ( -1.44%) [ +0.04% +0.00% +0.15% / -0.24% -1.33% -1.44%] index_select linear : Elapsed 26.588 ms (531.750 ms / 20) 554.660 -> 533.451 ( -3.82%) [ +0.00% +0.06% +0.01% / +0.08% -3.63% -3.82%] index_select reverse : Elapsed 27.733 ms (554.660 ms / 20) 525.604 -> 499.781 ( -4.91%) [ +0.20% +0.11% +0.00% / -0.05% -4.91% -4.82%] index_select skip64 : Elapsed 26.334 ms (526.675 ms / 20) good 525.171 -> 498.832 ( -5.02%) [ +0.22% +0.00% +0.12% / +0.18% -4.82% -5.02%] index_select skip256 : Elapsed 26.316 ms (526.321 ms / 20) 570.189 -> 571.421 ( +0.22%) [ +0.21% +0.40% +0.00% / +0.22% +3.83% +3.91%] index_select spread : Elapsed 28.569 ms (571.378 ms / 20) 671.468 -> 668.049 ( -0.51%) [ +0.00% +0.07% +0.09% / +0.13% -0.28% -0.51%] index_select strided 3 : Elapsed 33.573 ms (671.468 ms / 20) 661.345 -> 662.410 ( +0.16%) [ +0.00% +0.25% +0.19% / +0.16% +1.69% +1.58%] index_select strided 5 : Elapsed 33.067 ms (661.345 ms / 20) 671.668 -> 670.924 ( -0.11%) [ +0.15% +0.14% +0.00% / +0.17% -0.11% +0.03%] index_select strided 7 : Elapsed 33.635 ms (672.697 ms / 20) 665.231 -> 666.456 ( +0.18%) [ +0.15% +0.00% +0.17% / +0.18% +2.05% +1.89%] index_select strided 8 : Elapsed 33.310 ms (666.206 ms / 20) 672.082 -> 670.252 ( -0.27%) [ +0.15% +0.01% +0.00% / -0.27% +0.09% +0.26%] index_select strided 16 : Elapsed 33.653 ms (673.066 ms / 20) 653.057 -> 651.561 ( -0.23%) [ +0.15% +0.17% +0.00% / -0.23% +2.13% +2.34%] index_select random : Elapsed 32.703 ms (654.067 ms / 20) 571.175 -> 570.798 ( -0.07%) [ +0.00% +0.21% +0.00% / -0.07% +3.55% +3.59%] index_select random_sorted : Elapsed 28.559 ms (571.181 ms / 20) out_shape = [50, 256, 20, 100] in_shape = [50, 40, 20, 100] idx_dim = 1 out_shape = [50, 40, 256, 100] in_shape = [50, 40, 20, 100] idx_dim = 2 B = [50, 40, 256, 100] (stride (25600, 1280000, 100, 1)) A = [50, 40, 20, 100] (stride (4000, 1, 200000, 40)) dim = 2 62.478 -> 60.858 ( -2.59%) [ +0.00% +0.19% +0.15% / +0.11% -2.59% -2.58%] index_add_ linear : Elapsed 3.124 ms (62.478 ms / 20) 59.324 -> 58.176 ( -1.94%) [ +0.12% +0.07% +0.00% / +0.26% -1.84% -1.94%] index_copy_ linear : Elapsed 2.970 ms (59.394 ms / 20) 61.066 -> 60.302 ( -1.25%) [ +0.00% +0.09% +0.13% / +0.18% -1.25% -1.08%] index_add_ reverse : Elapsed 3.053 ms (61.066 ms / 20) 58.154 -> 57.791 ( -0.62%) [ +0.00% +0.20% +0.13% / +0.20% -0.62% -0.53%] index_copy_ reverse : Elapsed 2.908 ms (58.154 ms / 20) 61.469 -> 61.032 ( -0.71%) [ +0.16% +0.00% +0.18% / -0.02% -0.57% -0.71%] index_add_ spread : Elapsed 3.078 ms (61.566 ms / 20) 58.194 -> 58.178 ( -0.03%) [ +0.40% +0.00% +0.39% / +0.09% -0.02% -0.03%] index_copy_ spread : Elapsed 2.921 ms (58.424 ms / 20) 61.380 -> 60.379 ( -1.63%) [ +0.15% +0.00% +0.08% / +0.07% -1.63% -1.11%] index_add_ strided 3 : Elapsed 3.073 ms (61.470 ms / 20) 58.470 -> 58.006 ( -0.79%) [ +0.20% +0.00% +0.09% / +0.20% -0.79% -0.60%] index_copy_ strided 3 : Elapsed 2.929 ms (58.589 ms / 20) 61.017 -> 60.525 ( -0.81%) [ +0.00% +0.37% +0.21% / +0.20% -0.79% -0.81%] index_add_ strided 5 : Elapsed 3.051 ms (61.017 ms / 20) 58.170 -> 57.925 ( -0.42%) [ +0.00% +0.08% +0.18% / +0.15% -0.42% -0.30%] index_copy_ strided 5 : Elapsed 2.909 ms (58.170 ms / 20) 61.083 -> 60.863 ( -0.36%) [ +0.00% +0.05% +0.13% / +0.28% -0.27% -0.36%] index_add_ strided 7 : Elapsed 3.054 ms (61.083 ms / 20) 58.168 -> 58.241 ( +0.13%) [ +0.10% +0.17% +0.00% / +0.13% +0.24% +0.18%] index_copy_ strided 7 : Elapsed 2.911 ms (58.227 ms / 20) 61.289 -> 60.668 ( -1.01%) [ +0.10% +0.13% +0.00% / -0.01% -1.01% -0.73%] index_add_ strided 255 : Elapsed 3.068 ms (61.352 ms / 20) 58.316 -> 58.039 ( -0.47%) [ +0.24% +0.20% +0.00% / -0.19% -0.47% -0.46%] index_copy_ strided 255 : Elapsed 2.923 ms (58.455 ms / 20) 60.580 -> 60.629 ( +0.08%) [ +0.03% +0.00% +0.07% / +0.08% +0.44% +0.47%] index_add_ perm : Elapsed 3.030 ms (60.601 ms / 20) 57.908 -> 57.985 ( +0.13%) [ +0.17% +0.00% +0.15% / +0.13% +0.56% +0.40%] index_copy_ perm : Elapsed 2.900 ms (58.009 ms / 20) 60.578 -> 60.561 ( -0.03%) [ +0.04% +0.30% +0.00% / -0.03% +0.27% +0.32%] index_add_ perm_sorted : Elapsed 3.030 ms (60.600 ms / 20) 57.853 -> 57.737 ( -0.20%) [ +0.14% +0.09% +0.00% / -0.20% +0.49% +0.52%] index_copy_ perm_sorted : Elapsed 2.897 ms (57.933 ms / 20) 686.238 -> 686.378 ( +0.02%) [ +0.00% +0.01% +0.04% / +0.02% +0.04% +0.05%] index_select const : Elapsed 34.312 ms (686.238 ms / 20) 809.341 -> 807.394 ( -0.24%) [ +0.00% +0.04% +0.01% / +0.02% -0.19% -0.24%] index_select wrap : Elapsed 40.467 ms (809.341 ms / 20) 689.450 -> 688.555 ( -0.13%) [ +0.05% +0.00% +0.03% / +0.05% -0.11% -0.13%] index_select linear : Elapsed 34.489 ms (689.779 ms / 20) 710.107 -> 708.289 ( -0.26%) [ +0.00% +0.03% +0.03% / +0.02% -0.26% -0.23%] index_select reverse : Elapsed 35.505 ms (710.107 ms / 20) 686.508 -> 686.558 ( +0.01%) [ +0.00% +0.00% +0.02% / +0.02% +0.01% +0.01%] index_select skip64 : Elapsed 34.327 ms (686.538 ms / 20) 686.349 -> 686.312 ( -0.01%) [ +0.00% +0.02% +0.03% / +0.03% -0.01% +0.02%] index_select skip256 : Elapsed 34.317 ms (686.349 ms / 20) 767.569 -> 766.617 ( -0.12%) [ +0.06% +0.02% +0.00% / +0.05% -0.12% -0.12%] index_select spread : Elapsed 38.401 ms (768.012 ms / 20) 809.234 -> 807.381 ( -0.23%) [ +0.00% +0.01% +0.00% / +0.01% -0.23% -0.21%] index_select strided 3 : Elapsed 40.464 ms (809.273 ms / 20) 793.119 -> 790.602 ( -0.32%) [ +0.07% +0.03% +0.00% / +0.05% -0.29% -0.32%] index_select strided 5 : Elapsed 39.684 ms (793.677 ms / 20) 809.175 -> 807.259 ( -0.24%) [ +0.05% +0.00% +0.02% / +0.03% -0.21% -0.24%] index_select strided 7 : Elapsed 40.480 ms (809.599 ms / 20) 797.586 -> 795.811 ( -0.22%) [ +0.01% +0.03% +0.00% / +0.01% -0.19% -0.22%] index_select strided 8 : Elapsed 39.882 ms (797.634 ms / 20) 797.434 -> 795.715 ( -0.22%) [ +0.02% +0.03% +0.00% / +0.03% -0.22% -0.19%] index_select strided 16 : Elapsed 39.878 ms (797.562 ms / 20) 806.426 -> 805.158 ( -0.16%) [ +0.04% +0.02% +0.00% / +0.00% -0.15% -0.16%] index_select random : Elapsed 40.339 ms (806.773 ms / 20) 766.679 -> 765.645 ( -0.13%) [ +0.02% +0.00% +0.02% / -0.01% -0.12% -0.13%] index_select random_sorted : Elapsed 38.343 ms (766.861 ms / 20) out_shape = [50, 40, 20, 256] in_shape = [50, 40, 20, 100] idx_dim = 3 out_shape = [256, 40, 100, 20] in_shape = [50, 40, 100, 20] idx_dim = 0 out_shape = [50, 256, 100, 20] in_shape = [50, 40, 100, 20] idx_dim = 1 out_shape = [50, 40, 256, 20] in_shape = [50, 40, 100, 20] idx_dim = 2 out_shape = [50, 40, 100, 256] in_shape = [50, 40, 100, 20] idx_dim = 3 out_shape = [256, 100, 20, 40] in_shape = [50, 100, 20, 40] idx_dim = 0 out_shape = [50, 256, 20, 40] in_shape = [50, 100, 20, 40] idx_dim = 1 out_shape = [50, 100, 256, 40] in_shape = [50, 100, 20, 40] idx_dim = 2 out_shape = [50, 100, 20, 256] in_shape = [50, 100, 20, 40] idx_dim = 3 out_shape = [256, 100, 40, 20] in_shape = [50, 100, 40, 20] idx_dim = 0 B = [256, 100, 40, 20] (stride (40, 204800, 1, 10240)) A = [50, 100, 40, 20] (stride (1, 1000, 100000, 50)) dim = 0 108.662 -> 107.129 ( -1.41%) [ +0.23% +0.00% +0.12% / +0.13% -1.34% -1.41%] index_add_ linear : Elapsed 5.446 ms (108.912 ms / 20) 84.697 -> 84.267 ( -0.51%) [ +0.50% +0.11% +0.00% / +0.44% -0.51% -0.17%] index_copy_ linear : Elapsed 4.256 ms (85.123 ms / 20) 107.849 -> 107.017 ( -0.77%) [ +0.22% +0.39% +0.00% / +0.08% -0.77% -0.65%] index_add_ reverse : Elapsed 5.404 ms (108.085 ms / 20) 84.511 -> 83.683 ( -0.98%) [ +0.10% +0.21% +0.00% / +0.08% -0.90% -0.98%] index_copy_ reverse : Elapsed 4.230 ms (84.598 ms / 20) 112.876 -> 111.847 ( -0.91%) [ +0.24% +0.00% +0.09% / +0.12% -0.87% -0.91%] index_add_ spread : Elapsed 5.657 ms (113.142 ms / 20) 89.516 -> 88.521 ( -1.11%) [ +0.00% +0.03% +0.14% / -0.06% -0.94% -1.11%] index_copy_ spread : Elapsed 4.476 ms (89.516 ms / 20) 112.139 -> 110.654 ( -1.32%) [ +0.00% +0.03% +0.09% / +0.17% -1.32% -1.13%] index_add_ strided 3 : Elapsed 5.607 ms (112.139 ms / 20) 89.601 -> 87.407 ( -2.45%) [ +0.19% +0.02% +0.00% / -0.01% -2.45% -2.15%] index_copy_ strided 3 : Elapsed 4.489 ms (89.772 ms / 20) 112.433 -> 112.150 ( -0.25%) [ +0.00% +0.18% +0.18% / +0.54% +0.06% -0.25%] index_add_ strided 5 : Elapsed 5.622 ms (112.433 ms / 20) 90.207 -> 88.284 ( -2.13%) [ +0.00% +0.17% +0.09% / +0.04% -2.05% -2.13%] index_copy_ strided 5 : Elapsed 4.510 ms (90.207 ms / 20) 113.682 -> 113.181 ( -0.44%) [ +0.36% +0.36% +0.00% / -0.02% -0.25% -0.44%] index_add_ strided 7 : Elapsed 5.704 ms (114.087 ms / 20) 90.486 -> 89.362 ( -1.24%) [ +0.21% +0.55% +0.00% / -0.14% -1.24% -0.87%] index_copy_ strided 7 : Elapsed 4.534 ms (90.674 ms / 20) 107.478 -> 107.450 ( -0.03%) [ +0.33% +0.34% +0.00% / +0.08% +0.09% -0.03%] index_add_ strided 255 : Elapsed 5.392 ms (107.830 ms / 20) 86.029 -> 84.723 ( -1.52%) [ +0.16% +0.00% +0.03% / +0.00% -1.03% -1.52%] index_copy_ strided 255 : Elapsed 4.308 ms (86.166 ms / 20) 112.385 -> 110.988 ( -1.24%) [ +0.10% +0.00% +0.22% / -0.36% -1.24% -1.17%] index_add_ perm : Elapsed 5.625 ms (112.498 ms / 20) 88.519 -> 88.250 ( -0.30%) [ +0.27% +0.08% +0.00% / -0.30% +0.94% +0.84%] index_copy_ perm : Elapsed 4.438 ms (88.755 ms / 20) 110.982 -> 110.385 ( -0.54%) [ +0.28% +0.00% +0.06% / +0.30% +0.15% -0.54%] index_add_ perm_sorted : Elapsed 5.565 ms (111.298 ms / 20) 87.179 -> 87.315 ( +0.16%) [ +0.44% +0.07% +0.00% / +0.16% +1.35% +1.62%] index_copy_ perm_sorted : Elapsed 4.378 ms (87.559 ms / 20) 509.077 -> 501.460 ( -1.50%) [ +0.20% +0.28% +0.00% / -0.00% -1.40% -1.50%] index_select const : Elapsed 25.504 ms (510.073 ms / 20) 510.527 -> 506.188 ( -0.85%) [ +0.03% +0.00% +0.24% / -0.08% -0.79% -0.85%] index_select wrap : Elapsed 25.534 ms (510.677 ms / 20) 509.910 -> 502.631 ( -1.43%) [ +0.15% +0.06% +0.00% / +0.04% -1.43% -1.37%] index_select linear : Elapsed 25.534 ms (510.678 ms / 20) 509.446 -> 502.304 ( -1.40%) [ +0.04% +0.13% +0.00% / -0.06% -1.00% -1.40%] index_select reverse : Elapsed 25.482 ms (509.638 ms / 20) 508.304 -> 502.112 ( -1.22%) [ +0.12% +0.00% +0.31% / +0.13% -1.19% -1.22%] index_select skip64 : Elapsed 25.445 ms (508.900 ms / 20) 509.952 -> 503.112 ( -1.34%) [ +0.00% +0.27% +0.21% / +0.13% -1.27% -1.34%] index_select skip256 : Elapsed 25.498 ms (509.952 ms / 20) 510.776 -> 504.447 ( -1.24%) [ +0.00% +0.10% +0.01% / -0.08% -1.24% -1.16%] index_select spread : Elapsed 25.539 ms (510.776 ms / 20) 513.520 -> 505.851 ( -1.49%) [ +0.23% +0.00% +0.07% / -0.06% -1.49% -1.42%] index_select strided 3 : Elapsed 25.734 ms (514.683 ms / 20) 512.958 -> 507.778 ( -1.01%) [ +0.31% +0.00% +0.14% / +0.16% -0.78% -1.01%] index_select strided 5 : Elapsed 25.728 ms (514.552 ms / 20) 512.487 -> 507.813 ( -0.91%) [ +0.00% +0.28% +0.47% / -0.34% -0.77% -0.91%] index_select strided 7 : Elapsed 25.624 ms (512.487 ms / 20) 514.010 -> 507.141 ( -1.34%) [ +0.09% +0.00% +0.07% / -0.21% -1.33% -1.34%] index_select strided 8 : Elapsed 25.725 ms (514.492 ms / 20) 512.033 -> 508.017 ( -0.78%) [ +0.66% +0.00% +0.33% / +0.25% -0.77% -0.78%] index_select strided 16 : Elapsed 25.771 ms (515.426 ms / 20) 512.879 -> 507.964 ( -0.96%) [ +0.00% +0.07% +0.31% / +0.05% -0.95% -0.96%] index_select random : Elapsed 25.644 ms (512.879 ms / 20) 510.373 -> 505.374 ( -0.98%) [ +0.54% +0.12% +0.00% / +0.32% -0.82% -0.98%] index_select random_sorted : Elapsed 25.655 ms (513.109 ms / 20) out_shape = [50, 256, 40, 20] in_shape = [50, 100, 40, 20] idx_dim = 1 B = [50, 256, 40, 20] (stride (800, 40000, 1, 40)) A = [50, 100, 40, 20] (stride (4000, 40, 1, 200000)) dim = 1 79.978 -> 80.035 ( +0.07%) [ +0.00% +0.37% +0.18% / +0.07% +0.58% +0.53%] index_add_ linear : Elapsed 3.999 ms (79.978 ms / 20) 76.244 -> 75.945 ( -0.39%) [ +0.14% +0.02% +0.00% / -0.39% +0.80% +0.90%] index_copy_ linear : Elapsed 3.817 ms (76.348 ms / 20) 79.719 -> 80.081 ( +0.45%) [ +0.18% +0.14% +0.00% / +0.45% +1.16% +1.09%] index_add_ reverse : Elapsed 3.993 ms (79.859 ms / 20) 76.166 -> 76.376 ( +0.28%) [ +0.00% +0.30% +0.04% / +0.28% +0.76% +1.33%] index_copy_ reverse : Elapsed 3.808 ms (76.166 ms / 20) 80.177 -> 79.937 ( -0.30%) [ +0.34% +0.00% +0.27% / -0.30% +0.21% +0.54%] index_add_ spread : Elapsed 4.022 ms (80.449 ms / 20) 76.357 -> 76.233 ( -0.16%) [ +0.38% +0.00% +0.07% / -0.16% +0.42% +0.71%] index_copy_ spread : Elapsed 3.832 ms (76.648 ms / 20) 80.483 -> 80.476 ( -0.01%) [ +0.00% +0.06% +0.12% / -0.01% +0.36% +0.77%] index_add_ strided 3 : Elapsed 4.024 ms (80.483 ms / 20) 76.439 -> 76.517 ( +0.10%) [ +0.11% +0.04% +0.00% / +0.10% +0.83% +1.24%] index_copy_ strided 3 : Elapsed 3.826 ms (76.521 ms / 20) 80.090 -> 79.741 ( -0.44%) [ +0.02% +0.10% +0.00% / -0.44% +0.09% +0.47%] index_add_ strided 5 : Elapsed 4.005 ms (80.103 ms / 20) 75.999 -> 75.788 ( -0.28%) [ +0.20% +0.66% +0.00% / -0.28% +0.73% +0.84%] index_copy_ strided 5 : Elapsed 3.808 ms (76.151 ms / 20) 80.522 -> 80.468 ( -0.07%) [ +0.00% +0.02% +0.31% / +0.12% -0.07% +0.03%] index_add_ strided 7 : Elapsed 4.026 ms (80.522 ms / 20) 76.816 -> 76.618 ( -0.26%) [ +0.04% +0.00% +0.58% / -0.26% -0.19% -0.22%] index_copy_ strided 7 : Elapsed 3.842 ms (76.843 ms / 20) 79.622 -> 79.755 ( +0.17%) [ +0.00% +0.21% +0.23% / +0.17% +1.31% +1.43%] index_add_ strided 255 : Elapsed 3.981 ms (79.622 ms / 20) 75.862 -> 75.973 ( +0.15%) [ +0.15% +0.00% +0.67% / +0.15% +1.19% +1.73%] index_copy_ strided 255 : Elapsed 3.799 ms (75.977 ms / 20) 80.786 -> 80.535 ( -0.31%) [ +0.00% +0.26% +0.15% / +0.09% -0.29% -0.31%] index_add_ perm : Elapsed 4.039 ms (80.786 ms / 20) 76.486 -> 76.530 ( +0.06%) [ +0.52% +0.28% +0.00% / +0.06% +0.11% +0.06%] index_copy_ perm : Elapsed 3.844 ms (76.881 ms / 20) 80.391 -> 80.280 ( -0.14%) [ +0.07% +0.31% +0.00% / -0.14% +0.53% +0.58%] index_add_ perm_sorted : Elapsed 4.022 ms (80.445 ms / 20) 76.476 -> 76.407 ( -0.09%) [ +0.00% +0.27% +0.19% / -0.09% +0.15% +0.18%] index_copy_ perm_sorted : Elapsed 3.824 ms (76.476 ms / 20) 173.986 -> 174.154 ( +0.10%) [ +0.14% +0.13% +0.00% / +0.13% +0.23% +0.10%] index_select const : Elapsed 8.712 ms (174.234 ms / 20) 225.047 -> 225.135 ( +0.04%) [ +0.30% +0.00% +0.17% / +0.04% +0.61% +0.86%] index_select wrap : Elapsed 11.286 ms (225.729 ms / 20) 185.560 -> 186.149 ( +0.32%) [ +0.00% +0.26% +0.06% / +0.32% +0.56% +0.64%] index_select linear : Elapsed 9.278 ms (185.560 ms / 20) 202.557 -> 202.148 ( -0.20%) [ +0.00% +0.09% +0.04% / -0.11% -0.08% -0.20%] index_select reverse : Elapsed 10.128 ms (202.557 ms / 20) 178.379 -> 173.990 ( -2.46%) [ +0.26% +0.00% +0.26% / +0.15% -2.46% -2.17%] index_select skip64 : Elapsed 8.943 ms (178.850 ms / 20) 173.816 -> 174.001 ( +0.11%) [ +0.40% +0.14% +0.00% / +0.11% +0.27% +0.23%] index_select skip256 : Elapsed 8.726 ms (174.518 ms / 20) 211.118 -> 211.512 ( +0.19%) [ +0.55% +0.31% +0.00% / +0.19% +0.89% +0.68%] index_select spread : Elapsed 10.613 ms (212.270 ms / 20) 235.469 -> 235.671 ( +0.09%) [ +0.33% +0.33% +0.00% / +0.09% +0.10% +0.74%] index_select strided 3 : Elapsed 11.813 ms (236.252 ms / 20) 237.806 -> 237.715 ( -0.04%) [ +0.00% +0.12% +0.12% / +0.05% -0.04% +0.09%] index_select strided 5 : Elapsed 11.890 ms (237.806 ms / 20) 239.524 -> 239.236 ( -0.12%) [ +0.14% +0.13% +0.00% / +0.08% -0.06% -0.12%] index_select strided 7 : Elapsed 11.994 ms (239.870 ms / 20) 238.093 -> 238.390 ( +0.12%) [ +0.07% +0.00% +0.09% / +0.27% +0.23% +0.12%] index_select strided 8 : Elapsed 11.914 ms (238.271 ms / 20) 237.367 -> 237.879 ( +0.22%) [ +0.21% +0.11% +0.00% / +0.22% +0.56% +0.54%] index_select strided 16 : Elapsed 11.893 ms (237.856 ms / 20) 238.085 -> 237.914 ( -0.07%) [ +0.39% +0.03% +0.00% / -0.07% +0.60% +0.28%] index_select strided 64 : Elapsed 11.951 ms (239.022 ms / 20) 234.999 -> 236.021 ( +0.43%) [ +0.16% +0.00% +0.37% / +0.43% +1.21% +1.00%] index_select random : Elapsed 11.769 ms (235.380 ms / 20) 207.961 -> 208.368 ( +0.20%) [ +0.28% +0.21% +0.00% / +0.20% +2.19% +2.06%] index_select random_sorted : Elapsed 10.427 ms (208.549 ms / 20) out_shape = [50, 100, 256, 20] in_shape = [50, 100, 40, 20] idx_dim = 2 B = [50, 100, 256, 20] (stride (512000, 20, 2000, 1)) A = [50, 100, 40, 20] (stride (80000, 40, 1, 4000)) dim = 2 78.634 -> 78.626 ( -0.01%) [ +0.08% +0.00% +0.09% / +0.13% -0.01% +0.02%] index_add_ linear : Elapsed 3.935 ms (78.700 ms / 20) 76.769 -> 76.705 ( -0.08%) [ +0.00% +0.00% +0.03% / +0.18% -0.08% -0.06%] index_copy_ linear : Elapsed 3.838 ms (76.769 ms / 20) 78.595 -> 78.602 ( +0.01%) [ +0.08% +0.00% +0.12% / +0.01% +0.04% +0.06%] index_add_ reverse : Elapsed 3.933 ms (78.654 ms / 20) 76.734 -> 76.666 ( -0.09%) [ +0.02% +0.00% +0.04% / +0.13% -0.09% -0.04%] index_copy_ reverse : Elapsed 3.837 ms (76.748 ms / 20) 78.637 -> 78.678 ( +0.05%) [ +0.00% +0.06% +0.06% / +0.12% +0.05% +0.05%] index_add_ spread : Elapsed 3.932 ms (78.637 ms / 20) 76.705 -> 76.668 ( -0.05%) [ +0.00% +0.13% +0.02% / +0.11% +0.00% -0.05%] index_copy_ spread : Elapsed 3.835 ms (76.705 ms / 20) 78.663 -> 78.594 ( -0.09%) [ +0.02% +0.05% +0.00% / +0.04% -0.07% -0.09%] index_add_ strided 3 : Elapsed 3.934 ms (78.680 ms / 20) 76.795 -> 76.653 ( -0.18%) [ +0.02% +0.01% +0.00% / +0.08% -0.05% -0.18%] index_copy_ strided 3 : Elapsed 3.840 ms (76.807 ms / 20) 78.723 -> 78.780 ( +0.07%) [ +0.06% +0.00% +0.05% / +0.07% +0.09% +0.09%] index_add_ strided 5 : Elapsed 3.939 ms (78.771 ms / 20) 76.780 -> 76.767 ( -0.02%) [ +0.05% +0.05% +0.00% / +0.14% +0.06% -0.02%] index_copy_ strided 5 : Elapsed 3.841 ms (76.820 ms / 20) 78.731 -> 78.613 ( -0.15%) [ +0.13% +0.06% +0.00% / +0.16% -0.15% -0.13%] index_add_ strided 7 : Elapsed 3.942 ms (78.836 ms / 20) 76.817 -> 76.678 ( -0.18%) [ +0.08% +0.07% +0.00% / +0.10% -0.18% -0.17%] index_copy_ strided 7 : Elapsed 3.844 ms (76.877 ms / 20) 78.706 -> 78.588 ( -0.15%) [ +0.01% +0.00% +0.12% / +0.09% -0.06% -0.15%] index_add_ strided 255 : Elapsed 3.936 ms (78.715 ms / 20) 76.766 -> 76.696 ( -0.09%) [ +0.00% +0.00% +0.08% / +0.08% -0.09% -0.08%] index_copy_ strided 255 : Elapsed 3.838 ms (76.766 ms / 20) 78.755 -> 78.582 ( -0.22%) [ +0.00% +0.10% +0.01% / +0.09% -0.20% -0.22%] index_add_ perm : Elapsed 3.938 ms (78.755 ms / 20) 76.805 -> 76.675 ( -0.17%) [ +0.04% +0.02% +0.00% / +0.06% -0.17% -0.02%] index_copy_ perm : Elapsed 3.842 ms (76.837 ms / 20) 78.823 -> 78.594 ( -0.29%) [ +0.12% +0.05% +0.00% / +0.15% -0.25% -0.29%] index_add_ perm_sorted : Elapsed 3.946 ms (78.918 ms / 20) 76.858 -> 76.702 ( -0.20%) [ +0.00% +0.10% +0.04% / +0.13% -0.20% -0.20%] index_copy_ perm_sorted : Elapsed 3.843 ms (76.858 ms / 20) 484.054 -> 484.019 ( -0.01%) [ +0.04% +0.03% +0.00% / -0.01% +0.07% +0.02%] index_select const : Elapsed 24.213 ms (484.264 ms / 20) 492.424 -> 491.849 ( -0.12%) [ +0.02% +0.00% +0.03% / +0.04% -0.12% -0.11%] index_select wrap : Elapsed 24.625 ms (492.498 ms / 20) 484.513 -> 483.806 ( -0.15%) [ +0.03% +0.04% +0.00% / -0.02% -0.14% -0.15%] index_select linear : Elapsed 24.233 ms (484.668 ms / 20) 485.863 -> 485.028 ( -0.17%) [ +0.01% +0.02% +0.00% / +0.04% -0.15% -0.17%] index_select reverse : Elapsed 24.295 ms (485.902 ms / 20) 484.062 -> 484.081 ( +0.00%) [ +0.04% +0.12% +0.00% / +0.01% +0.02% +0.00%] index_select skip64 : Elapsed 24.212 ms (484.245 ms / 20) 483.802 -> 484.052 ( +0.05%) [ +0.08% +0.00% +0.06% / +0.06% +0.05% +0.10%] index_select skip256 : Elapsed 24.210 ms (484.195 ms / 20) 486.129 -> 485.611 ( -0.11%) [ +0.04% +0.00% +0.02% / +0.05% -0.08% -0.11%] index_select spread : Elapsed 24.316 ms (486.314 ms / 20) 493.873 -> 493.411 ( -0.09%) [ +0.01% +0.00% +0.03% / +0.06% -0.08% -0.09%] index_select strided 3 : Elapsed 24.696 ms (493.926 ms / 20) 494.237 -> 493.622 ( -0.12%) [ +0.00% +0.05% +0.05% / -0.00% -0.09% -0.12%] index_select strided 5 : Elapsed 24.712 ms (494.237 ms / 20) 493.983 -> 493.561 ( -0.09%) [ +0.00% +0.08% +0.03% / +0.06% -0.09% -0.08%] index_select strided 7 : Elapsed 24.699 ms (493.983 ms / 20) 494.095 -> 493.615 ( -0.10%) [ +0.02% +0.04% +0.00% / +0.00% -0.10% -0.08%] index_select strided 8 : Elapsed 24.710 ms (494.198 ms / 20) 494.205 -> 493.870 ( -0.07%) [ +0.01% +0.02% +0.00% / -0.00% -0.07% -0.07%] index_select strided 16 : Elapsed 24.712 ms (494.240 ms / 20) 493.668 -> 493.096 ( -0.12%) [ +0.01% +0.01% +0.00% / +0.01% -0.12% -0.11%] index_select random : Elapsed 24.686 ms (493.730 ms / 20) 486.383 -> 485.663 ( -0.15%) [ +0.06% +0.00% +0.01% / +0.01% -0.14% -0.15%] index_select random_sorted : Elapsed 24.334 ms (486.675 ms / 20) out_shape = [50, 100, 40, 256] in_shape = [50, 100, 40, 20] idx_dim = 3 out_shape = [256, 20, 40, 50] in_shape = [100, 20, 40, 50] idx_dim = 0 B = [256, 20, 40, 50] (stride (50, 12800, 256000, 1)) A = [100, 20, 40, 50] (stride (40000, 1, 20, 800)) dim = 0 75.263 -> 75.339 ( +0.10%) [ +0.51% +0.16% +0.00% / +0.10% +0.33% +0.17%] index_add_ linear : Elapsed 3.782 ms (75.649 ms / 20) 71.404 -> 71.193 ( -0.30%) [ +0.26% +0.18% +0.00% / -0.30% +0.30% +0.36%] index_copy_ linear : Elapsed 3.579 ms (71.587 ms / 20) 75.040 -> 75.131 ( +0.12%) [ +0.00% +0.05% +0.33% / +0.12% +0.43% +0.78%] index_add_ reverse : Elapsed 3.752 ms (75.040 ms / 20) 71.482 -> 71.344 ( -0.19%) [ +0.42% +0.05% +0.00% / -0.19% +0.39% +0.42%] index_copy_ reverse : Elapsed 3.589 ms (71.784 ms / 20) 75.668 -> 75.520 ( -0.20%) [ +0.00% +0.32% +0.06% / -0.20% +0.34% +0.41%] index_add_ spread : Elapsed 3.783 ms (75.668 ms / 20) 71.238 -> 71.430 ( +0.27%) [ +0.00% +0.32% +0.36% / +0.27% +0.71% +0.69%] index_copy_ spread : Elapsed 3.562 ms (71.238 ms / 20) 74.616 -> 74.729 ( +0.15%) [ +0.35% +0.23% +0.00% / +0.15% +0.86% +0.73%] index_add_ strided 3 : Elapsed 3.744 ms (74.875 ms / 20) 70.360 -> 70.557 ( +0.28%) [ +0.58% +0.69% +0.00% / +0.28% +1.42% +1.26%] index_copy_ strided 3 : Elapsed 3.538 ms (70.769 ms / 20) 74.910 -> 74.815 ( -0.13%) [ +0.12% +0.38% +0.00% / -0.13% +0.43% +0.42%] index_add_ strided 5 : Elapsed 3.750 ms (75.002 ms / 20) 70.919 -> 70.807 ( -0.16%) [ +0.04% +0.09% +0.00% / -0.16% +0.52% +0.38%] index_copy_ strided 5 : Elapsed 3.547 ms (70.948 ms / 20) 74.750 -> 74.899 ( +0.20%) [ +0.46% +0.53% +0.00% / +0.20% +0.76% +0.54%] index_add_ strided 7 : Elapsed 3.755 ms (75.095 ms / 20) 70.752 -> 71.002 ( +0.35%) [ +0.20% +0.52% +0.00% / +0.35% +0.92% +0.83%] index_copy_ strided 7 : Elapsed 3.545 ms (70.891 ms / 20) 75.140 -> 75.228 ( +0.12%) [ +0.46% +0.23% +0.00% / +0.40% +0.12% +0.13%] index_add_ strided 255 : Elapsed 3.774 ms (75.486 ms / 20) 71.299 -> 71.525 ( +0.32%) [ +0.24% +0.29% +0.00% / +0.32% +0.64% +0.49%] index_copy_ strided 255 : Elapsed 3.574 ms (71.472 ms / 20) 75.007 -> 75.048 ( +0.05%) [ +0.22% +0.49% +0.00% / +0.05% +0.93% +0.89%] index_add_ perm : Elapsed 3.759 ms (75.171 ms / 20) 71.304 -> 71.059 ( -0.34%) [ +0.01% +0.20% +0.00% / -0.34% +0.07% +0.62%] index_copy_ perm : Elapsed 3.565 ms (71.308 ms / 20) 75.138 -> 75.010 ( -0.17%) [ +0.07% +0.00% +0.01% / -0.17% +0.70% +0.77%] index_add_ perm_sorted : Elapsed 3.759 ms (75.189 ms / 20) 71.027 -> 71.082 ( +0.08%) [ +0.41% +0.24% +0.00% / +0.08% +0.41% +0.97%] index_copy_ perm_sorted : Elapsed 3.566 ms (71.319 ms / 20) 98.330 -> 97.556 ( -0.79%) [ +0.00% +0.09% +0.12% / +0.11% -0.69% -0.79%] index_select const : Elapsed 4.916 ms (98.330 ms / 20) 209.130 -> 208.992 ( -0.07%) [ +0.20% +0.11% +0.00% / +0.05% +0.02% -0.07%] index_select wrap : Elapsed 10.477 ms (209.538 ms / 20) 130.886 -> 131.188 ( +0.23%) [ +0.24% +0.00% +0.13% / +0.25% +0.37% +0.23%] index_select linear : Elapsed 6.560 ms (131.199 ms / 20) 153.335 -> 152.781 ( -0.36%) [ +0.00% +0.08% +0.06% / +0.10% -0.34% -0.36%] index_select reverse : Elapsed 7.667 ms (153.335 ms / 20) 99.087 -> 98.074 ( -1.02%) [ +0.00% +0.03% +0.02% / +0.09% -1.02% -0.98%] index_select skip64 : Elapsed 4.954 ms (99.087 ms / 20) 98.341 -> 97.584 ( -0.77%) [ +0.00% +0.09% +0.14% / +0.16% -0.73% -0.77%] index_select skip256 : Elapsed 4.917 ms (98.341 ms / 20) 180.289 -> 180.726 ( +0.24%) [ +0.44% +0.00% +0.01% / +0.24% +0.32% +0.47%] index_select spread : Elapsed 9.054 ms (181.082 ms / 20) 211.662 -> 211.941 ( +0.13%) [ +0.15% +0.06% +0.00% / +0.13% +0.16% +0.44%] index_select strided 3 : Elapsed 10.599 ms (211.981 ms / 20) 208.353 -> 206.234 ( -1.02%) [ +0.08% +0.09% +0.00% / -0.02% -0.56% -1.02%] index_select strided 5 : Elapsed 10.426 ms (208.526 ms / 20) 211.383 -> 209.141 ( -1.06%) [ +0.16% +0.13% +0.00% / -0.10% -1.04% -1.06%] index_select strided 7 : Elapsed 10.586 ms (211.714 ms / 20) 206.991 -> 207.547 ( +0.27%) [ +0.25% +0.00% +0.07% / +0.27% +2.01% +1.97%] index_select strided 8 : Elapsed 10.376 ms (207.515 ms / 20) 206.634 -> 206.701 ( +0.03%) [ +0.00% +0.12% +0.16% / +0.03% +1.72% +1.92%] index_select strided 16 : Elapsed 10.332 ms (206.634 ms / 20) 207.736 -> 208.128 ( +0.19%) [ +0.17% +0.00% +0.08% / +0.19% +1.65% +1.43%] index_select strided 64 : Elapsed 10.404 ms (208.081 ms / 20) 206.191 -> 206.104 ( -0.04%) [ +0.00% +0.01% +0.00% / -0.04% +0.03% +0.37%] index_select random : Elapsed 10.310 ms (206.195 ms / 20) 171.931 -> 171.930 ( -0.00%) [ +0.19% +0.03% +0.00% / -0.00% +1.59% +1.58%] index_select random_sorted : Elapsed 8.613 ms (172.257 ms / 20) out_shape = [100, 256, 40, 50] in_shape = [100, 20, 40, 50] idx_dim = 1 out_shape = [100, 20, 256, 50] in_shape = [100, 20, 40, 50] idx_dim = 2 out_shape = [100, 20, 40, 256] in_shape = [100, 20, 40, 50] idx_dim = 3 B = [100, 20, 40, 256] (stride (204800, 256, 5120, 1)) A = [100, 20, 40, 50] (stride (40000, 1, 1000, 20)) dim = 3 202.206 -> 202.064 ( -0.07%) [ +0.10% +0.00% +0.14% / -0.07% -0.02% +0.14%] index_add_ linear : Elapsed 10.120 ms (202.404 ms / 20) 152.579 -> 152.410 ( -0.11%) [ +0.00% +0.14% +0.05% / -0.11% +0.03% -0.03%] index_copy_ linear : Elapsed 7.629 ms (152.579 ms / 20) 201.871 -> 202.201 ( +0.16%) [ +0.00% +0.22% +0.25% / +0.19% +0.16% +0.19%] index_add_ reverse : Elapsed 10.094 ms (201.871 ms / 20) 152.479 -> 152.303 ( -0.12%) [ +0.11% +0.00% +0.26% / +0.05% +0.08% -0.12%] index_copy_ reverse : Elapsed 7.633 ms (152.653 ms / 20) 205.247 -> 205.200 ( -0.02%) [ +0.03% +0.11% +0.00% / -0.02% -0.00% +0.13%] index_add_ spread : Elapsed 10.265 ms (205.307 ms / 20) 153.833 -> 153.640 ( -0.13%) [ +0.00% +0.08% +0.20% / +0.08% +0.02% -0.13%] index_copy_ spread : Elapsed 7.692 ms (153.833 ms / 20) 203.969 -> 203.916 ( -0.03%) [ +0.22% +0.04% +0.00% / -0.03% +0.12% +0.11%] index_add_ strided 3 : Elapsed 10.221 ms (204.412 ms / 20) 153.131 -> 153.219 ( +0.06%) [ +0.00% +0.21% +0.19% / +0.13% +0.06% +0.27%] index_copy_ strided 3 : Elapsed 7.657 ms (153.131 ms / 20) 205.327 -> 204.991 ( -0.16%) [ +0.00% +0.03% +0.05% / -0.16% +0.04% +0.03%] index_add_ strided 5 : Elapsed 10.266 ms (205.327 ms / 20) 153.861 -> 153.573 ( -0.19%) [ +0.01% +0.00% +0.01% / -0.19% +0.03% -0.01%] index_copy_ strided 5 : Elapsed 7.694 ms (153.873 ms / 20) 205.829 -> 205.907 ( +0.04%) [ +0.09% +0.00% +0.08% / +0.04% +0.29% +0.36%] index_add_ strided 7 : Elapsed 10.300 ms (206.004 ms / 20) 153.800 -> 153.820 ( +0.01%) [ +0.11% +0.00% +0.02% / +0.01% +0.44% +0.32%] index_copy_ strided 7 : Elapsed 7.698 ms (153.970 ms / 20) 202.210 -> 201.966 ( -0.12%) [ +0.00% +0.04% +0.15% / -0.12% -0.07% +0.14%] index_add_ strided 255 : Elapsed 10.110 ms (202.210 ms / 20) 152.314 -> 152.462 ( +0.10%) [ +0.08% +0.00% +0.00% / +0.10% +0.15% +0.12%] index_copy_ strided 255 : Elapsed 7.622 ms (152.434 ms / 20) 208.510 -> 208.212 ( -0.14%) [ +0.00% +0.09% +0.06% / -0.14% -0.08% -0.03%] index_add_ perm : Elapsed 10.425 ms (208.510 ms / 20) 156.065 -> 155.925 ( -0.09%) [ +0.03% +0.00% +0.00% / -0.02% -0.09% +0.04%] index_copy_ perm : Elapsed 7.805 ms (156.106 ms / 20) 204.672 -> 204.368 ( -0.15%) [ +0.06% +0.13% +0.00% / -0.15% -0.04% -0.05%] index_add_ perm_sorted : Elapsed 10.240 ms (204.798 ms / 20) 153.606 -> 153.494 ( -0.07%) [ +0.00% +0.22% +0.22% / +0.03% -0.07% -0.07%] index_copy_ perm_sorted : Elapsed 7.680 ms (153.606 ms / 20) 749.964 -> 751.008 ( +0.14%) [ +0.02% +0.00% +0.15% / +0.14% +0.80% +0.56%] index_select const : Elapsed 37.504 ms (750.087 ms / 20) 819.217 -> 819.233 ( +0.00%) [ +0.10% +0.00% +0.08% / +0.00% +0.08% +0.15%] index_select wrap : Elapsed 41.001 ms (820.019 ms / 20) 756.748 -> 757.099 ( +0.05%) [ +0.21% +0.00% +0.11% / +0.13% +0.06% +0.05%] index_select linear : Elapsed 37.916 ms (758.322 ms / 20) 767.970 -> 767.739 ( -0.03%) [ +0.01% +0.00% +0.03% / -0.03% +0.24% +0.08%] index_select reverse : Elapsed 38.402 ms (768.046 ms / 20) 750.046 -> 749.973 ( -0.01%) [ +0.15% +0.06% +0.00% / -0.01% +0.57% +0.60%] index_select skip64 : Elapsed 37.557 ms (751.141 ms / 20) 748.970 -> 751.141 ( +0.29%) [ +0.00% +0.18% +0.14% / +0.29% +0.73% +0.81%] index_select skip256 : Elapsed 37.448 ms (748.970 ms / 20) 789.176 -> 789.253 ( +0.01%) [ +0.00% +0.01% +0.07% / +0.01% +0.20% +0.04%] index_select spread : Elapsed 39.459 ms (789.176 ms / 20) 826.330 -> 826.026 ( -0.04%) [ +0.00% +0.17% +0.11% / -0.04% -0.00% +0.05%] index_select strided 3 : Elapsed 41.317 ms (826.330 ms / 20) 825.426 -> 824.844 ( -0.07%) [ +0.00% +0.10% +0.01% / -0.06% -0.06% -0.07%] index_select strided 5 : Elapsed 41.271 ms (825.426 ms / 20) 826.286 -> 826.104 ( -0.02%) [ +0.06% +0.00% +0.07% / +0.00% +0.14% -0.02%] index_select strided 7 : Elapsed 41.338 ms (826.758 ms / 20) 828.030 -> 826.144 ( -0.23%) [ +0.00% +0.03% +0.06% / -0.07% -0.23% -0.22%] index_select strided 8 : Elapsed 41.402 ms (828.030 ms / 20) 827.919 -> 826.138 ( -0.22%) [ +0.03% +0.00% +0.06% / -0.04% -0.06% -0.22%] index_select strided 16 : Elapsed 41.408 ms (828.154 ms / 20) 823.716 -> 823.941 ( +0.03%) [ +0.00% +0.08% +0.08% / +0.03% +0.17% +0.22%] index_select random : Elapsed 41.186 ms (823.716 ms / 20) 787.315 -> 788.063 ( +0.10%) [ +0.07% +0.05% +0.00% / +0.10% +0.12% +0.20%] index_select random_sorted : Elapsed 39.392 ms (787.837 ms / 20) out_shape = [256, 20, 50, 40] in_shape = [100, 20, 50, 40] idx_dim = 0 out_shape = [100, 256, 50, 40] in_shape = [100, 20, 50, 40] idx_dim = 1 out_shape = [100, 20, 256, 40] in_shape = [100, 20, 50, 40] idx_dim = 2 B = [100, 20, 256, 40] (stride (256, 25600, 1, 512000)) A = [100, 20, 50, 40] (stride (50, 5000, 1, 100000)) dim = 2 222.367 -> 222.558 ( +0.09%) [ +0.05% +0.00% +0.00% / +0.09% +0.11% +0.11%] index_add_ linear : Elapsed 11.124 ms (222.477 ms / 20) 173.184 -> 173.271 ( +0.05%) [ +0.02% +0.03% +0.00% / +0.05% +0.15% +0.16%] index_copy_ linear : Elapsed 8.661 ms (173.227 ms / 20) 222.028 -> 222.212 ( +0.08%) [ +0.18% +0.00% +0.05% / +0.08% +0.14% +0.17%] index_add_ reverse : Elapsed 11.121 ms (222.420 ms / 20) 173.226 -> 172.990 ( -0.14%) [ +0.01% +0.03% +0.00% / -0.14% -0.01% +0.20%] index_copy_ reverse : Elapsed 8.662 ms (173.244 ms / 20) 225.658 -> 225.388 ( -0.12%) [ +0.00% +0.01% +0.01% / -0.12% +0.07% -0.05%] index_add_ spread : Elapsed 11.283 ms (225.658 ms / 20) 174.465 -> 174.519 ( +0.03%) [ +0.00% +0.10% +0.14% / +0.03% +0.19% +0.25%] index_copy_ spread : Elapsed 8.723 ms (174.465 ms / 20) 224.073 -> 223.979 ( -0.04%) [ +0.06% +0.00% +0.10% / -0.01% +0.10% -0.04%] index_add_ strided 3 : Elapsed 11.210 ms (224.207 ms / 20) 174.012 -> 174.092 ( +0.05%) [ +0.00% +0.05% +0.09% / +0.05% +0.13% +0.05%] index_copy_ strided 3 : Elapsed 8.701 ms (174.012 ms / 20) 225.546 -> 225.292 ( -0.11%) [ +0.01% +0.05% +0.00% / -0.02% -0.06% -0.11%] index_add_ strided 5 : Elapsed 11.278 ms (225.569 ms / 20) 174.696 -> 174.563 ( -0.08%) [ +0.09% +0.00% +0.04% / +0.05% -0.08% -0.01%] index_copy_ strided 5 : Elapsed 8.742 ms (174.846 ms / 20) 226.639 -> 226.215 ( -0.19%) [ +0.00% +0.00% +0.02% / -0.04% -0.19% -0.13%] index_add_ strided 7 : Elapsed 11.332 ms (226.639 ms / 20) 175.099 -> 175.180 ( +0.05%) [ +0.06% +0.00% +0.21% / +0.10% +0.05% +0.09%] index_copy_ strided 7 : Elapsed 8.760 ms (175.202 ms / 20) 222.743 -> 222.481 ( -0.12%) [ +0.01% +0.00% +0.09% / -0.02% -0.06% -0.12%] index_add_ strided 255 : Elapsed 11.138 ms (222.757 ms / 20) 173.372 -> 173.380 ( +0.00%) [ +0.05% +0.00% +0.18% / +0.01% +0.00% +0.08%] index_copy_ strided 255 : Elapsed 8.673 ms (173.462 ms / 20) 229.261 -> 229.059 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.04% -0.04% -0.09%] index_add_ perm : Elapsed 11.463 ms (229.261 ms / 20) 176.912 -> 176.886 ( -0.01%) [ +0.01% +0.00% +0.00% / -0.01% +0.12% +0.03%] index_copy_ perm : Elapsed 8.847 ms (176.933 ms / 20) 225.243 -> 224.923 ( -0.14%) [ +0.00% +0.06% +0.06% / -0.08% -0.14% -0.05%] index_add_ perm_sorted : Elapsed 11.262 ms (225.243 ms / 20) 174.421 -> 174.503 ( +0.05%) [ +0.00% +0.04% +0.12% / +0.05% +0.14% +0.07%] index_copy_ perm_sorted : Elapsed 8.721 ms (174.421 ms / 20) 873.780 -> 873.929 ( +0.02%) [ +0.05% +0.09% +0.00% / +0.02% +0.32% +0.14%] index_select const : Elapsed 43.710 ms (874.206 ms / 20) 879.055 -> 879.802 ( +0.08%) [ +0.00% +0.03% +0.00% / +0.12% +0.08% +0.18%] index_select wrap : Elapsed 43.954 ms (879.084 ms / 20) 874.468 -> 875.054 ( +0.07%) [ +0.01% +0.03% +0.00% / +0.07% +0.13% +0.10%] index_select linear : Elapsed 43.729 ms (874.581 ms / 20) 875.058 -> 874.934 ( -0.01%) [ +0.00% +0.03% +0.03% / -0.01% +0.19% +0.26%] index_select reverse : Elapsed 43.753 ms (875.058 ms / 20) 874.076 -> 874.578 ( +0.06%) [ +0.00% +0.07% +0.00% / +0.06% +0.19% +0.19%] index_select skip64 : Elapsed 43.706 ms (874.115 ms / 20) 874.243 -> 874.310 ( +0.01%) [ +0.02% +0.00% +0.00% / +0.01% +0.26% +0.20%] index_select skip256 : Elapsed 43.720 ms (874.400 ms / 20) 875.139 -> 875.603 ( +0.05%) [ +0.00% +0.09% +0.00% / +0.05% +0.15% +0.07%] index_select spread : Elapsed 43.757 ms (875.146 ms / 20) 883.498 -> 884.007 ( +0.06%) [ +0.00% +0.07% +0.01% / +0.08% +0.06% +0.06%] index_select strided 3 : Elapsed 44.175 ms (883.498 ms / 20) 884.331 -> 884.496 ( +0.02%) [ +0.05% +0.01% +0.00% / +0.07% +0.07% +0.02%] index_select strided 5 : Elapsed 44.239 ms (884.775 ms / 20) 884.771 -> 885.044 ( +0.03%) [ +0.00% +0.04% +0.06% / +0.03% +0.16% +0.06%] index_select strided 7 : Elapsed 44.239 ms (884.771 ms / 20) 885.403 -> 885.107 ( -0.03%) [ +0.04% +0.00% +0.02% / -0.03% +0.01% -0.01%] index_select strided 8 : Elapsed 44.286 ms (885.728 ms / 20) 884.649 -> 884.887 ( +0.03%) [ +0.01% +0.02% +0.00% / +0.05% +0.03% +0.04%] index_select strided 16 : Elapsed 44.237 ms (884.737 ms / 20) 883.962 -> 884.514 ( +0.06%) [ +0.06% +0.00% +0.07% / +0.06% +0.08% +0.20%] index_select random : Elapsed 44.226 ms (884.530 ms / 20) 875.107 -> 875.756 ( +0.07%) [ +0.06% +0.09% +0.00% / +0.07% +0.13% +0.11%] index_select random_sorted : Elapsed 43.783 ms (875.663 ms / 20) out_shape = [100, 20, 50, 256] in_shape = [100, 20, 50, 40] idx_dim = 3 out_shape = [256, 40, 20, 50] in_shape = [100, 40, 20, 50] idx_dim = 0 out_shape = [100, 256, 20, 50] in_shape = [100, 40, 20, 50] idx_dim = 1 B = [100, 256, 20, 50] (stride (1, 100000, 5000, 100)) A = [100, 40, 20, 50] (stride (40000, 50, 2000, 1)) dim = 1 77.079 -> 76.277 ( -1.04%) [ +0.58% +0.18% +0.00% / -0.18% -1.04% -0.76%] index_add_ linear : Elapsed 3.876 ms (77.528 ms / 20) 67.090 -> 66.814 ( -0.41%) [ +0.00% +0.07% +0.02% / -0.10% +0.20% -0.41%] index_copy_ linear : Elapsed 3.354 ms (67.090 ms / 20) 76.709 -> 76.698 ( -0.01%) [ +0.00% +0.35% +0.29% / +0.04% -0.01% +0.13%] index_add_ reverse : Elapsed 3.835 ms (76.709 ms / 20) 67.146 -> 66.892 ( -0.38%) [ +0.00% +0.22% +0.24% / -0.38% -0.28% -0.13%] index_copy_ reverse : Elapsed 3.357 ms (67.146 ms / 20) 76.097 -> 75.617 ( -0.63%) [ +0.18% +0.20% +0.00% / -0.63% +3.57% +3.06%] index_add_ spread : Elapsed 3.812 ms (76.235 ms / 20) 66.743 -> 66.867 ( +0.19%) [ +0.02% +0.00% +0.07% / +0.19% +1.33% +1.07%] index_copy_ spread : Elapsed 3.338 ms (66.758 ms / 20) 73.734 -> 74.051 ( +0.43%) [ +1.04% +0.30% +0.00% / +0.43% +5.70% +6.03%] index_add_ strided 3 : Elapsed 3.725 ms (74.502 ms / 20) 66.670 -> 67.087 ( +0.63%) [ +0.62% +0.23% +0.00% / +0.63% +1.23% +1.42%] index_copy_ strided 3 : Elapsed 3.354 ms (67.081 ms / 20) 78.794 -> 77.794 ( -1.27%) [ +0.00% +0.06% +0.21% / +0.33% -1.27% -1.22%] index_add_ strided 5 : Elapsed 3.940 ms (78.794 ms / 20) 66.985 -> 66.911 ( -0.11%) [ +0.00% +0.03% +0.17% / +0.26% -0.11% -0.11%] index_copy_ strided 5 : Elapsed 3.349 ms (66.985 ms / 20) 78.369 -> 76.970 ( -1.79%) [ +0.91% +0.00% +0.27% / -0.22% -1.49% -1.79%] index_add_ strided 7 : Elapsed 3.954 ms (79.079 ms / 20) 66.985 -> 67.103 ( +0.18%) [ +0.64% +0.30% +0.00% / +0.36% +0.46% +0.18%] index_copy_ strided 7 : Elapsed 3.371 ms (67.416 ms / 20) 76.465 -> 76.272 ( -0.25%) [ +0.09% +0.15% +0.00% / -0.25% +1.39% +0.94%] index_add_ strided 255 : Elapsed 3.827 ms (76.531 ms / 20) 66.563 -> 66.797 ( +0.35%) [ +0.18% +0.35% +0.00% / +0.35% +1.01% +0.62%] index_copy_ strided 255 : Elapsed 3.334 ms (66.683 ms / 20) 76.546 -> 76.286 ( -0.34%) [ +0.00% +0.39% +0.13% / +0.36% -0.34% -0.15%] index_add_ perm : Elapsed 3.827 ms (76.546 ms / 20) 67.450 -> 66.998 ( -0.67%) [ +0.20% +0.00% +0.02% / -0.67% +0.08% -0.24%] index_copy_ perm : Elapsed 3.379 ms (67.585 ms / 20) 78.374 -> 76.771 ( -2.05%) [ +0.26% +0.58% +0.00% / -0.03% -1.48% -2.05%] index_add_ perm_sorted : Elapsed 3.929 ms (78.580 ms / 20) 67.195 -> 66.907 ( -0.43%) [ +0.48% +0.19% +0.00% / +0.37% -0.35% -0.43%] index_copy_ perm_sorted : Elapsed 3.376 ms (67.518 ms / 20) 590.124 -> 589.609 ( -0.09%) [ +0.00% +0.07% +0.11% / -0.09% +0.26% +0.28%] index_select const : Elapsed 29.506 ms (590.124 ms / 20) 588.348 -> 587.899 ( -0.08%) [ +0.04% +0.01% +0.00% / -0.08% +0.66% +0.72%] index_select wrap : Elapsed 29.429 ms (588.579 ms / 20) 588.526 -> 586.260 ( -0.39%) [ +0.17% +0.19% +0.00% / +0.39% -0.39% -0.13%] index_select linear : Elapsed 29.477 ms (589.541 ms / 20) 587.893 -> 588.254 ( +0.06%) [ +0.22% +0.30% +0.00% / +0.06% +0.32% +0.33%] index_select reverse : Elapsed 29.459 ms (589.189 ms / 20) 589.996 -> 589.744 ( -0.04%) [ +0.00% +0.10% +0.22% / -0.04% +0.10% +0.24%] index_select skip64 : Elapsed 29.500 ms (589.996 ms / 20) 590.215 -> 590.694 ( +0.08%) [ +0.09% +0.13% +0.00% / +0.08% +0.09% +0.24%] index_select skip256 : Elapsed 29.536 ms (590.729 ms / 20) 590.907 -> 590.880 ( -0.00%) [ +0.07% +0.19% +0.00% / -0.00% +0.21% +0.42%] index_select spread : Elapsed 29.567 ms (591.345 ms / 20) 590.469 -> 590.314 ( -0.03%) [ +0.00% +0.12% +0.24% / -0.03% +0.35% +0.23%] index_select strided 3 : Elapsed 29.523 ms (590.469 ms / 20) 591.613 -> 591.101 ( -0.09%) [ +0.00% +0.07% +0.06% / -0.09% +0.07% +0.17%] index_select strided 5 : Elapsed 29.581 ms (591.613 ms / 20) 587.929 -> 588.676 ( +0.13%) [ +0.00% +0.19% +0.03% / +0.13% +0.93% +0.80%] index_select strided 7 : Elapsed 29.396 ms (587.929 ms / 20) 591.022 -> 588.663 ( -0.40%) [ +0.08% +0.00% +0.09% / -0.07% -0.24% -0.40%] index_select strided 8 : Elapsed 29.574 ms (591.473 ms / 20) 591.313 -> 588.599 ( -0.46%) [ +0.14% +0.03% +0.00% / +0.10% -0.46% -0.35%] index_select strided 16 : Elapsed 29.606 ms (592.120 ms / 20) 588.583 -> 588.820 ( +0.04%) [ +0.01% +0.08% +0.00% / +0.04% +0.67% +0.45%] index_select random : Elapsed 29.431 ms (588.629 ms / 20) 591.130 -> 591.972 ( +0.14%) [ +0.00% +0.16% +0.04% / +0.14% +0.18% +0.32%] index_select random_sorted : Elapsed 29.557 ms (591.130 ms / 20) out_shape = [100, 40, 256, 50] in_shape = [100, 40, 20, 50] idx_dim = 2 out_shape = [100, 40, 20, 256] in_shape = [100, 40, 20, 50] idx_dim = 3 out_shape = [256, 40, 50, 20] in_shape = [100, 40, 50, 20] idx_dim = 0 out_shape = [100, 256, 50, 20] in_shape = [100, 40, 50, 20] idx_dim = 1 out_shape = [100, 40, 256, 20] in_shape = [100, 40, 50, 20] idx_dim = 2 out_shape = [100, 40, 50, 256] in_shape = [100, 40, 50, 20] idx_dim = 3 B = [100, 40, 50, 256] (stride (1, 1280000, 25600, 100)) A = [100, 40, 50, 20] (stride (1000, 100000, 1, 50)) dim = 3 good 114.237 -> 104.991 ( -8.09%) [ +1.60% +0.00% +0.70% / -0.06% -7.87% -8.09%] index_add_ linear : Elapsed 5.803 ms (116.065 ms / 20) 75.220 -> 73.484 ( -2.31%) [ +0.04% +0.00% +0.23% / +0.09% -2.10% -2.31%] index_copy_ linear : Elapsed 3.762 ms (75.249 ms / 20) 108.963 -> 107.124 ( -1.69%) [ +0.61% +0.45% +0.00% / +0.42% -1.69% -1.65%] index_add_ reverse : Elapsed 5.482 ms (109.632 ms / 20) 76.453 -> 74.739 ( -2.24%) [ +0.00% +0.52% +0.82% / +1.01% -2.22% -2.24%] index_copy_ reverse : Elapsed 3.823 ms (76.453 ms / 20) 105.393 -> 105.841 ( +0.43%) [ +0.43% +0.04% +0.00% / +0.43% +6.15% +6.76%] index_add_ spread : Elapsed 5.292 ms (105.842 ms / 20) 74.743 -> 74.713 ( -0.04%) [ +0.16% +0.63% +0.00% / -0.04% +0.42% +0.17%] index_copy_ spread : Elapsed 3.743 ms (74.864 ms / 20) 109.321 -> 109.591 ( +0.25%) [ +0.00% +1.05% +1.53% / +0.52% +0.53% +0.25%] index_add_ strided 3 : Elapsed 5.466 ms (109.321 ms / 20) 76.408 -> 75.427 ( -1.28%) [ +0.00% +0.23% +0.77% / +1.01% -1.28% -1.04%] index_copy_ strided 3 : Elapsed 3.820 ms (76.408 ms / 20) 108.370 -> 106.626 ( -1.61%) [ +0.11% +0.00% +0.25% / +0.36% -1.42% -1.61%] index_add_ strided 5 : Elapsed 5.425 ms (108.494 ms / 20) 77.091 -> 74.548 ( -3.30%) [ +0.00% +0.13% +0.00% / -0.09% -3.25% -3.30%] index_copy_ strided 5 : Elapsed 3.855 ms (77.091 ms / 20) 111.896 -> 110.669 ( -1.10%) [ +1.00% +0.00% +0.76% / +0.80% -1.10% +0.37%] index_add_ strided 7 : Elapsed 5.651 ms (113.018 ms / 20) good 82.097 -> 77.129 ( -6.05%) [ +1.12% +0.00% +0.53% / +0.42% -5.72% -6.05%] index_copy_ strided 7 : Elapsed 4.151 ms (83.016 ms / 20) good 118.972 -> 108.970 ( -8.41%) [ +0.00% +0.35% +1.11% / +0.90% -8.34% -8.41%] index_add_ strided 255 : Elapsed 5.949 ms (118.972 ms / 20) good 79.113 -> 72.675 ( -8.14%) [ +0.19% +0.00% +0.19% / +0.28% -8.14% -7.99%] index_copy_ strided 255 : Elapsed 3.963 ms (79.260 ms / 20) good 117.676 -> 108.827 ( -7.52%) [ +0.09% +0.12% +0.00% / +0.36% -6.77% -7.52%] index_add_ perm : Elapsed 5.889 ms (117.785 ms / 20) 78.549 -> 75.625 ( -3.72%) [ +0.00% +0.16% +0.56% / +0.55% -3.72% -3.43%] index_copy_ perm : Elapsed 3.927 ms (78.549 ms / 20) 112.361 -> 108.716 ( -3.24%) [ +0.12% +0.60% +0.00% / -0.35% -3.19% -3.24%] index_add_ perm_sorted : Elapsed 5.625 ms (112.497 ms / 20) 76.980 -> 76.747 ( -0.30%) [ +0.53% +0.08% +0.00% / -0.30% -0.01% +0.85%] index_copy_ perm_sorted : Elapsed 3.869 ms (77.388 ms / 20) 1373.357 -> 1374.614 ( +0.09%) [ +0.18% +0.01% +0.00% / +0.09% +0.44% +0.45%] index_select const : Elapsed 68.791 ms (1375.817 ms / 20) 1369.088 -> 1369.826 ( +0.05%) [ +0.10% +0.00% +0.13% / +0.05% +1.12% +1.16%] index_select wrap : Elapsed 68.526 ms (1370.516 ms / 20) 1372.253 -> 1371.814 ( -0.03%) [ +0.16% +0.02% +0.00% / -0.03% +0.42% +0.43%] index_select linear : Elapsed 68.720 ms (1374.395 ms / 20) 1372.829 -> 1373.772 ( +0.07%) [ +0.08% +0.19% +0.00% / +0.07% +0.56% +0.66%] index_select reverse : Elapsed 68.700 ms (1373.991 ms / 20) 1376.413 -> 1376.334 ( -0.01%) [ +0.14% +0.00% +0.04% / -0.01% +0.32% +0.33%] index_select skip64 : Elapsed 68.915 ms (1378.293 ms / 20) 1374.726 -> 1376.055 ( +0.10%) [ +0.00% +0.15% +0.15% / +0.10% +0.27% +0.15%] index_select skip256 : Elapsed 68.736 ms (1374.726 ms / 20) 1369.484 -> 1370.001 ( +0.04%) [ +0.20% +0.02% +0.00% / +0.04% +0.92% +1.00%] index_select spread : Elapsed 68.608 ms (1372.162 ms / 20) 1374.996 -> 1376.548 ( +0.11%) [ +0.12% +0.16% +0.00% / +0.11% +0.73% +0.68%] index_select strided 3 : Elapsed 68.832 ms (1376.636 ms / 20) 1374.489 -> 1374.033 ( -0.03%) [ +0.02% +0.11% +0.00% / -0.03% +0.72% +0.66%] index_select strided 5 : Elapsed 68.740 ms (1374.795 ms / 20) 1367.939 -> 1369.947 ( +0.15%) [ +0.00% +0.08% +0.02% / +0.15% +1.21% +1.21%] index_select strided 7 : Elapsed 68.397 ms (1367.939 ms / 20) 1376.255 -> 1377.767 ( +0.11%) [ +0.01% +0.00% +0.12% / +0.11% +0.36% +0.35%] index_select strided 8 : Elapsed 68.817 ms (1376.342 ms / 20) 1373.307 -> 1373.992 ( +0.05%) [ +0.20% +0.09% +0.00% / +0.05% +0.52% +0.52%] index_select strided 16 : Elapsed 68.800 ms (1376.003 ms / 20) 1367.651 -> 1368.281 ( +0.05%) [ +0.00% +0.00% +0.15% / +0.05% +1.24% +1.17%] index_select random : Elapsed 68.384 ms (1367.671 ms / 20) 1376.155 -> 1375.424 ( -0.05%) [ +0.10% +0.00% +0.11% / -0.05% +0.47% +0.61%] index_select random_sorted : Elapsed 68.874 ms (1377.478 ms / 20) out_shape = [256, 50, 20, 40] in_shape = [100, 50, 20, 40] idx_dim = 0 out_shape = [100, 256, 20, 40] in_shape = [100, 50, 20, 40] idx_dim = 1 out_shape = [100, 50, 256, 40] in_shape = [100, 50, 20, 40] idx_dim = 2 out_shape = [100, 50, 20, 256] in_shape = [100, 50, 20, 40] idx_dim = 3 out_shape = [256, 50, 40, 20] in_shape = [100, 50, 40, 20] idx_dim = 0 out_shape = [100, 256, 40, 20] in_shape = [100, 50, 40, 20] idx_dim = 1 out_shape = [100, 50, 256, 20] in_shape = [100, 50, 40, 20] idx_dim = 2 out_shape = [100, 50, 40, 256] in_shape = [100, 50, 40, 20] idx_dim = 3 ==================== rep_count = 100 dimensions = [4, 5, 16, 20, 40] out_shape = [4, 16, 20, 40] in_shape = [5, 16, 20, 40] idx_dim = 0 B = [4, 16, 20, 40] (stride (12800, 40, 640, 1)) A = [5, 16, 20, 40] (stride (800, 4000, 40, 1)) dim = 0 5.380 -> 5.379 ( -0.02%) [ +0.06% +0.07% +0.00% / -0.02% +0.22% +0.02%] index_select const : Elapsed 0.054 ms (5.383 ms / 100) 5.500 -> 5.492 ( -0.15%) [ +0.09% +0.00% +0.02% / +0.00% -0.15% -0.15%] index_select wrap : Elapsed 0.055 ms (5.505 ms / 100) 5.495 -> 5.487 ( -0.15%) [ +0.07% +0.00% +0.09% / +0.13% -0.15% +0.00%] index_select linear : Elapsed 0.055 ms (5.499 ms / 100) 5.514 -> 5.493 ( -0.38%) [ +0.00% +0.09% +0.11% / +0.07% -0.38% -0.31%] index_select reverse : Elapsed 0.055 ms (5.514 ms / 100) 5.377 -> 5.376 ( -0.02%) [ +0.07% +0.00% +0.07% / -0.02% +0.11% +0.11%] index_select skip64 : Elapsed 0.054 ms (5.381 ms / 100) 5.382 -> 5.375 ( -0.13%) [ +0.04% +0.00% +0.00% / -0.13% +0.04% +0.07%] index_select skip256 : Elapsed 0.054 ms (5.384 ms / 100) 5.500 -> 5.488 ( -0.22%) [ +0.11% +0.00% +0.02% / -0.02% +0.00% -0.22%] index_select spread : Elapsed 0.055 ms (5.506 ms / 100) 5.486 -> 5.481 ( -0.09%) [ +0.00% +0.07% +0.00% / -0.02% +0.00% -0.09%] index_select strided 3 : Elapsed 0.055 ms (5.486 ms / 100) 5.494 -> 5.487 ( -0.13%) [ +0.13% +0.09% +0.00% / +0.13% -0.04% -0.13%] index_select random : Elapsed 0.055 ms (5.501 ms / 100) 5.493 -> 5.484 ( -0.16%) [ +0.04% +0.07% +0.00% / -0.07% -0.11% -0.16%] index_select random_sorted : Elapsed 0.055 ms (5.495 ms / 100) 5.488 -> 5.481 ( -0.13%) [ +0.00% +0.05% +0.09% / +0.00% +0.02% -0.13%] index_select perm : Elapsed 0.055 ms (5.488 ms / 100) 5.488 -> 5.480 ( -0.15%) [ +0.07% +0.00% +0.05% / +0.00% -0.15% -0.09%] index_select perm_sorted : Elapsed 0.055 ms (5.492 ms / 100) B = [4, 16, 20, 40] (stride (12800, 20, 1, 320)) A = [5, 16, 20, 40] (stride (320, 1, 16, 1600)) dim = 0 5.521 -> 5.516 ( -0.09%) [ +0.02% +0.00% +0.11% / +0.07% -0.04% -0.09%] index_select const : Elapsed 0.055 ms (5.522 ms / 100) 5.565 -> 5.571 ( +0.11%) [ +0.00% +0.05% +0.34% / +0.25% +0.11% +0.16%] index_select wrap : Elapsed 0.056 ms (5.565 ms / 100) 5.568 -> 5.570 ( +0.04%) [ +0.07% +0.00% +0.16% / +0.20% +0.04% +0.09%] index_select linear : Elapsed 0.056 ms (5.572 ms / 100) 5.559 -> 5.571 ( +0.22%) [ +0.02% +0.00% +0.05% / +0.22% +0.23% +0.23%] index_select reverse : Elapsed 0.056 ms (5.560 ms / 100) 5.518 -> 5.512 ( -0.11%) [ +0.13% +0.00% +0.11% / +0.18% -0.11% -0.05%] index_select skip64 : Elapsed 0.055 ms (5.525 ms / 100) 5.521 -> 5.510 ( -0.20%) [ +0.02% +0.00% +0.07% / +0.11% -0.07% -0.20%] index_select skip256 : Elapsed 0.055 ms (5.522 ms / 100) 5.571 -> 5.572 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.09% +0.02% +0.16%] index_select spread : Elapsed 0.056 ms (5.575 ms / 100) 5.568 -> 5.570 ( +0.04%) [ +0.14% +0.00% +0.20% / +0.31% +0.22% +0.04%] index_select strided 3 : Elapsed 0.056 ms (5.576 ms / 100) 5.552 -> 5.549 ( -0.05%) [ +0.00% +0.23% +0.27% / +0.18% -0.05% -0.04%] index_select random : Elapsed 0.056 ms (5.552 ms / 100) 5.548 -> 5.547 ( -0.02%) [ +0.00% +0.25% +0.31% / +0.27% -0.02% +0.16%] index_select random_sorted : Elapsed 0.055 ms (5.548 ms / 100) 5.564 -> 5.572 ( +0.14%) [ +0.11% +0.00% +0.36% / +0.27% +0.27% +0.14%] index_select perm : Elapsed 0.056 ms (5.570 ms / 100) 5.562 -> 5.571 ( +0.16%) [ +0.23% +0.00% +0.22% / +0.16% +0.20% +0.16%] index_select perm_sorted : Elapsed 0.056 ms (5.575 ms / 100) B = [4, 16, 20, 40] (stride (1, 3200, 4, 80)) A = [5, 16, 20, 40] (stride (640, 40, 3200, 1)) dim = 0 5.798 -> 5.770 ( -0.48%) [ +0.00% +0.05% +0.07% / +0.16% -0.48% -0.41%] index_select const : Elapsed 0.058 ms (5.798 ms / 100) 5.870 -> 5.866 ( -0.07%) [ +0.00% +0.12% +0.09% / +0.15% -0.05% -0.07%] index_select wrap : Elapsed 0.059 ms (5.870 ms / 100) 5.866 -> 5.853 ( -0.22%) [ +0.05% +0.00% +0.05% / +0.07% -0.17% -0.22%] index_select linear : Elapsed 0.059 ms (5.869 ms / 100) 5.867 -> 5.868 ( +0.02%) [ +0.00% +0.00% +0.07% / +0.02% +0.02% +0.05%] index_select reverse : Elapsed 0.059 ms (5.867 ms / 100) 5.790 -> 5.755 ( -0.60%) [ +0.03% +0.00% +0.05% / +0.00% -0.60% -0.54%] index_select skip64 : Elapsed 0.058 ms (5.792 ms / 100) 5.791 -> 5.756 ( -0.60%) [ +0.03% +0.00% +0.00% / +0.09% -0.57% -0.60%] index_select skip256 : Elapsed 0.058 ms (5.793 ms / 100) 5.867 -> 5.865 ( -0.03%) [ +0.00% +0.07% +0.17% / +0.17% -0.02% -0.03%] index_select spread : Elapsed 0.059 ms (5.867 ms / 100) 5.863 -> 5.849 ( -0.24%) [ +0.12% +0.00% +0.15% / -0.02% -0.17% -0.24%] index_select strided 3 : Elapsed 0.059 ms (5.870 ms / 100) 5.843 -> 5.820 ( -0.39%) [ +0.00% +0.02% +0.10% / +0.07% -0.39% -0.38%] index_select random : Elapsed 0.058 ms (5.843 ms / 100) 5.837 -> 5.822 ( -0.26%) [ +0.09% +0.00% +0.12% / +0.05% -0.19% -0.26%] index_select random_sorted : Elapsed 0.058 ms (5.842 ms / 100) 5.865 -> 5.865 ( +0.00%) [ +0.09% +0.00% +0.12% / +0.00% +0.29% +0.41%] index_select perm : Elapsed 0.059 ms (5.870 ms / 100) 5.857 -> 5.861 ( +0.07%) [ +0.09% +0.00% +0.02% / +0.07% +0.27% +0.29%] index_select perm_sorted : Elapsed 0.059 ms (5.862 ms / 100) B = [4, 16, 20, 40] (stride (320, 20, 1, 1280)) A = [5, 16, 20, 40] (stride (640, 40, 3200, 1)) dim = 0 5.440 -> 5.442 ( +0.04%) [ +0.00% +0.04% +0.11% / +0.04% +0.31% +0.37%] index_select const : Elapsed 0.054 ms (5.440 ms / 100) 5.541 -> 5.535 ( -0.11%) [ +0.11% +0.00% +0.02% / +0.07% -0.11% -0.04%] index_select wrap : Elapsed 0.055 ms (5.547 ms / 100) 5.542 -> 5.538 ( -0.07%) [ +0.09% +0.00% +0.00% / -0.07% -0.07% -0.02%] index_select linear : Elapsed 0.055 ms (5.547 ms / 100) 5.545 -> 5.539 ( -0.11%) [ +0.00% +0.07% +0.09% / +0.09% -0.11% +0.04%] index_select reverse : Elapsed 0.055 ms (5.545 ms / 100) 5.440 -> 5.435 ( -0.09%) [ +0.02% +0.00% +0.02% / -0.09% +0.13% +0.13%] index_select skip64 : Elapsed 0.054 ms (5.441 ms / 100) 5.440 -> 5.447 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.40% +0.31%] index_select skip256 : Elapsed 0.054 ms (5.440 ms / 100) 5.542 -> 5.539 ( -0.05%) [ +0.07% +0.00% +0.14% / +0.14% +0.02% -0.05%] index_select spread : Elapsed 0.055 ms (5.546 ms / 100) 5.536 -> 5.532 ( -0.07%) [ +0.00% +0.00% +0.05% / +0.09% -0.07% -0.05%] index_select strided 3 : Elapsed 0.055 ms (5.536 ms / 100) 5.478 -> 5.477 ( -0.02%) [ +0.00% +0.07% +0.18% / -0.02% +0.05% +0.07%] index_select random : Elapsed 0.055 ms (5.478 ms / 100) 5.480 -> 5.478 ( -0.04%) [ +0.00% +0.02% +0.18% / +0.11% -0.04% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.480 ms / 100) 5.538 -> 5.543 ( +0.09%) [ +0.00% +0.04% +0.02% / +0.09% +0.23% +0.22%] index_select perm : Elapsed 0.055 ms (5.538 ms / 100) 5.533 -> 5.533 ( +0.00%) [ +0.07% +0.00% +0.16% / +0.14% +0.00% +0.14%] index_select perm_sorted : Elapsed 0.055 ms (5.537 ms / 100) B = [4, 16, 20, 40] (stride (20, 80, 1, 1280)) A = [5, 16, 20, 40] (stride (40, 4000, 200, 1)) dim = 0 5.488 -> 5.491 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.05% +0.07%] index_select const : Elapsed 0.055 ms (5.488 ms / 100) 5.587 -> 5.585 ( -0.04%) [ +0.00% +0.09% +0.11% / +0.02% +0.04% -0.04%] index_select wrap : Elapsed 0.056 ms (5.587 ms / 100) 5.577 -> 5.579 ( +0.04%) [ +0.00% +0.16% +0.25% / +0.04% +0.20% +0.22%] index_select linear : Elapsed 0.056 ms (5.577 ms / 100) 5.595 -> 5.602 ( +0.13%) [ +0.05% +0.00% +0.09% / +0.18% +0.13% +0.18%] index_select reverse : Elapsed 0.056 ms (5.598 ms / 100) 5.482 -> 5.481 ( -0.02%) [ +0.00% +0.09% +0.00% / +0.20% -0.02% -0.02%] index_select skip64 : Elapsed 0.055 ms (5.482 ms / 100) 5.482 -> 5.482 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.07% +0.00% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.485 ms / 100) 5.587 -> 5.584 ( -0.05%) [ +0.00% +0.05% +0.13% / +0.07% -0.05% +0.04%] index_select spread : Elapsed 0.056 ms (5.587 ms / 100) 5.583 -> 5.582 ( -0.02%) [ +0.14% +0.00% +0.05% / +0.23% +0.11% -0.02%] index_select strided 3 : Elapsed 0.056 ms (5.591 ms / 100) 5.568 -> 5.565 ( -0.05%) [ +0.05% +0.00% +0.14% / -0.02% +0.04% -0.05%] index_select random : Elapsed 0.056 ms (5.571 ms / 100) 5.566 -> 5.566 ( +0.00%) [ +0.07% +0.00% +0.14% / +0.09% +0.00% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.570 ms / 100) 5.592 -> 5.594 ( +0.04%) [ +0.04% +0.23% +0.00% / +0.21% +0.04% +0.05%] index_select perm : Elapsed 0.056 ms (5.594 ms / 100) 5.594 -> 5.592 ( -0.04%) [ +0.05% +0.02% +0.00% / -0.04% +0.00% +0.04%] index_select perm_sorted : Elapsed 0.056 ms (5.597 ms / 100) out_shape = [5, 4, 20, 40] in_shape = [5, 16, 20, 40] idx_dim = 1 B = [5, 4, 20, 40] (stride (1, 4000, 5, 100)) A = [5, 16, 20, 40] (stride (12800, 40, 640, 1)) dim = 1 2.381 -> 2.383 ( +0.08%) [ +0.00% +0.21% +0.17% / +0.13% +0.29% +0.08%] index_select const : Elapsed 0.024 ms (2.381 ms / 100) 2.432 -> 2.434 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.08% +0.37%] index_select wrap : Elapsed 0.024 ms (2.435 ms / 100) 2.428 -> 2.434 ( +0.25%) [ +0.21% +0.00% +0.00% / +0.25% +0.33% +0.37%] index_select linear : Elapsed 0.024 ms (2.433 ms / 100) 2.432 -> 2.438 ( +0.25%) [ +0.16% +0.12% +0.00% / +0.25% +0.41% +0.45%] index_select reverse : Elapsed 0.024 ms (2.436 ms / 100) 2.395 -> 2.393 ( -0.08%) [ +0.04% +0.00% +0.13% / +0.04% -0.08% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.396 ms / 100) 2.386 -> 2.387 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.04% +0.34% +0.13%] index_select skip256 : Elapsed 0.024 ms (2.386 ms / 100) 2.433 -> 2.434 ( +0.04%) [ +0.04% +0.21% +0.00% / +0.04% +0.08% +0.08%] index_select spread : Elapsed 0.024 ms (2.434 ms / 100) 2.437 -> 2.438 ( +0.04%) [ +0.21% +0.12% +0.00% / +0.04% +0.08% +0.08%] index_select strided 3 : Elapsed 0.024 ms (2.442 ms / 100) 2.430 -> 2.437 ( +0.29%) [ +0.37% +0.29% +0.00% / +0.29% +0.49% +0.49%] index_select strided 5 : Elapsed 0.024 ms (2.439 ms / 100) 2.448 -> 2.450 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.25% +0.08% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.448 ms / 100) 2.397 -> 2.399 ( +0.08%) [ +0.13% +0.21% +0.00% / +0.08% +0.08% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.400 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.17% +0.21%] index_select random : Elapsed 0.024 ms (2.410 ms / 100) 2.412 -> 2.416 ( +0.17%) [ +0.12% +0.12% +0.00% / +0.17% +0.33% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.415 ms / 100) 2.430 -> 2.432 ( +0.08%) [ +0.12% +0.16% +0.00% / +0.08% +0.16% +0.37%] index_select perm : Elapsed 0.024 ms (2.433 ms / 100) 2.428 -> 2.432 ( +0.16%) [ +0.29% +0.25% +0.00% / +0.16% +0.25% +0.37%] index_select perm_sorted : Elapsed 0.024 ms (2.435 ms / 100) B = [5, 4, 20, 40] (stride (160, 40, 800, 1)) A = [5, 16, 20, 40] (stride (12800, 1, 640, 16)) dim = 1 2.372 -> 2.376 ( +0.17%) [ +0.08% +0.00% +0.04% / +0.17% +0.34% +0.25%] index_select const : Elapsed 0.024 ms (2.374 ms / 100) 2.367 -> 2.365 ( -0.08%) [ +0.00% +0.04% +0.08% / -0.08% +0.25% +0.21%] index_select wrap : Elapsed 0.024 ms (2.367 ms / 100) 2.365 -> 2.367 ( +0.08%) [ +0.00% +0.21% +0.08% / +0.08% +0.21% +0.25%] index_select linear : Elapsed 0.024 ms (2.365 ms / 100) 2.360 -> 2.365 ( +0.21%) [ +0.17% +0.13% +0.00% / +0.21% +0.47% +0.21%] index_select reverse : Elapsed 0.024 ms (2.364 ms / 100) 2.366 -> 2.370 ( +0.17%) [ +0.17% +0.04% +0.00% / +0.17% +0.17% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.370 ms / 100) 2.374 -> 2.376 ( +0.08%) [ +0.17% +0.04% +0.00% / +0.13% +0.08% +0.08%] index_select skip256 : Elapsed 0.024 ms (2.378 ms / 100) 2.385 -> 2.389 ( +0.17%) [ +0.00% +0.04% +0.00% / +0.17% +0.17% +0.17%] index_select spread : Elapsed 0.024 ms (2.385 ms / 100) 2.388 -> 2.392 ( +0.17%) [ +0.04% +0.00% +0.04% / +0.17% +0.42% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.389 ms / 100) 2.391 -> 2.393 ( +0.08%) [ +0.00% +0.08% +0.13% / +0.08% +0.38% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.391 ms / 100) 2.386 -> 2.382 ( -0.17%) [ +0.08% +0.04% +0.00% / -0.17% +0.17% +0.17%] index_select strided 7 : Elapsed 0.024 ms (2.388 ms / 100) 2.392 -> 2.396 ( +0.17%) [ +0.25% +0.00% +0.08% / +0.17% +0.25% +0.33%] index_select strided 8 : Elapsed 0.024 ms (2.398 ms / 100) 2.361 -> 2.364 ( +0.13%) [ +0.04% +0.00% +0.08% / +0.13% +0.30% +0.34%] index_select random : Elapsed 0.024 ms (2.362 ms / 100) 2.365 -> 2.364 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.25% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.367 ms / 100) 2.373 -> 2.371 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.25% +0.25%] index_select perm : Elapsed 0.024 ms (2.374 ms / 100) 2.360 -> 2.361 ( +0.04%) [ +0.00% +0.00% +0.21% / +0.04% +0.34% +0.51%] index_select perm_sorted : Elapsed 0.024 ms (2.360 ms / 100) B = [5, 4, 20, 40] (stride (40, 200, 800, 1)) A = [5, 16, 20, 40] (stride (12800, 800, 1, 20)) dim = 1 2.475 -> 2.480 ( +0.20%) [ +0.28% +0.28% +0.00% / +0.20% +0.44% +0.57%] index_select const : Elapsed 0.025 ms (2.482 ms / 100) 2.471 -> 2.473 ( +0.08%) [ +0.20% +0.00% +0.16% / +0.08% +0.40% +0.28%] index_select wrap : Elapsed 0.025 ms (2.476 ms / 100) 2.471 -> 2.474 ( +0.12%) [ +0.00% +0.16% +0.00% / +0.12% +0.32% +0.40%] index_select linear : Elapsed 0.025 ms (2.471 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.65% +0.45%] index_select reverse : Elapsed 0.025 ms (2.470 ms / 100) 2.464 -> 2.467 ( +0.12%) [ +0.28% +0.16% +0.00% / +0.12% +0.45% +0.49%] index_select skip64 : Elapsed 0.025 ms (2.471 ms / 100) 2.478 -> 2.474 ( -0.16%) [ +0.00% +0.08% +0.04% / -0.16% +0.28% +0.40%] index_select skip256 : Elapsed 0.025 ms (2.478 ms / 100) 2.475 -> 2.476 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.44% +0.36%] index_select spread : Elapsed 0.025 ms (2.476 ms / 100) 2.470 -> 2.473 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.36% +0.12%] index_select strided 3 : Elapsed 0.025 ms (2.473 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.00% +0.04% +0.16% / +0.04% +0.40% +0.28%] index_select strided 5 : Elapsed 0.025 ms (2.471 ms / 100) 2.475 -> 2.479 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.44% +0.36%] index_select strided 7 : Elapsed 0.025 ms (2.477 ms / 100) 2.476 -> 2.477 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.57% +0.28%] index_select strided 8 : Elapsed 0.025 ms (2.477 ms / 100) 2.473 -> 2.475 ( +0.08%) [ +0.28% +0.00% +0.12% / +0.08% +0.24% +0.40%] index_select random : Elapsed 0.025 ms (2.480 ms / 100) 2.470 -> 2.476 ( +0.24%) [ +0.12% +0.24% +0.00% / +0.24% +0.49% +0.32%] index_select random_sorted : Elapsed 0.025 ms (2.473 ms / 100) 2.472 -> 2.473 ( +0.04%) [ +0.00% +0.16% +0.16% / +0.04% +0.36% +0.49%] index_select perm : Elapsed 0.025 ms (2.472 ms / 100) 2.484 -> 2.481 ( -0.12%) [ +0.04% +0.08% +0.00% / -0.12% +0.24% +0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.485 ms / 100) B = [5, 4, 20, 40] (stride (1, 200, 800, 5)) A = [5, 16, 20, 40] (stride (1, 4000, 5, 100)) dim = 1 2.528 -> 2.534 ( +0.24%) [ +0.24% +0.00% +0.16% / +0.24% +0.24% +0.32%] index_select const : Elapsed 0.025 ms (2.534 ms / 100) 2.539 -> 2.539 ( +0.00%) [ +0.28% +0.00% +0.12% / +0.04% +0.04% +0.00%] index_select wrap : Elapsed 0.025 ms (2.546 ms / 100) 2.538 -> 2.534 ( -0.16%) [ +0.12% +0.00% +0.04% / +0.12% -0.16% -0.12%] index_select linear : Elapsed 0.025 ms (2.541 ms / 100) 2.538 -> 2.540 ( +0.08%) [ +0.16% +0.04% +0.00% / +0.08% +0.20% +0.16%] index_select reverse : Elapsed 0.025 ms (2.542 ms / 100) 2.535 -> 2.536 ( +0.04%) [ +0.08% +0.00% +0.12% / +0.04% +0.04% +0.20%] index_select skip64 : Elapsed 0.025 ms (2.537 ms / 100) 2.530 -> 2.532 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.16% +0.16%] index_select skip256 : Elapsed 0.025 ms (2.530 ms / 100) 2.532 -> 2.536 ( +0.16%) [ +0.00% +0.20% +0.16% / +0.20% +0.28% +0.16%] index_select spread : Elapsed 0.025 ms (2.532 ms / 100) 2.532 -> 2.537 ( +0.20%) [ +0.04% +0.16% +0.00% / +0.20% +0.32% +0.28%] index_select strided 3 : Elapsed 0.025 ms (2.533 ms / 100) 2.534 -> 2.531 ( -0.12%) [ +0.04% +0.12% +0.00% / -0.12% +0.28% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.535 ms / 100) 2.532 -> 2.533 ( +0.04%) [ +0.28% +0.43% +0.00% / +0.04% +0.39% +0.47%] index_select strided 7 : Elapsed 0.025 ms (2.539 ms / 100) 2.530 -> 2.532 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.28% +0.16%] index_select strided 8 : Elapsed 0.025 ms (2.532 ms / 100) 2.531 -> 2.533 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.32% +0.32%] index_select random : Elapsed 0.025 ms (2.533 ms / 100) 2.532 -> 2.529 ( -0.12%) [ +0.00% +0.04% +0.00% / -0.12% +0.28% +0.32%] index_select random_sorted : Elapsed 0.025 ms (2.532 ms / 100) 2.537 -> 2.543 ( +0.24%) [ +0.24% +0.08% +0.00% / +0.24% +0.35% +0.28%] index_select perm : Elapsed 0.025 ms (2.543 ms / 100) 2.535 -> 2.538 ( +0.12%) [ +0.12% +0.08% +0.00% / +0.12% +0.24% +0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.538 ms / 100) B = [5, 4, 20, 40] (stride (1, 200, 800, 5)) A = [5, 16, 20, 40] (stride (1, 5, 80, 1600)) dim = 1 2.542 -> 2.544 ( +0.08%) [ +0.24% +0.00% +0.08% / +0.08% +0.31% +0.20%] index_select const : Elapsed 0.025 ms (2.548 ms / 100) 2.551 -> 2.553 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.27% +0.27%] index_select wrap : Elapsed 0.026 ms (2.551 ms / 100) 2.549 -> 2.549 ( +0.00%) [ +0.20% +0.20% +0.00% / +0.00% +0.39% +0.31%] index_select linear : Elapsed 0.026 ms (2.554 ms / 100) 2.565 -> 2.563 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.35% +0.31%] index_select reverse : Elapsed 0.026 ms (2.565 ms / 100) 2.544 -> 2.545 ( +0.04%) [ +0.12% +0.16% +0.00% / +0.04% +0.20% +0.20%] index_select skip64 : Elapsed 0.025 ms (2.547 ms / 100) 2.545 -> 2.543 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.16% +0.08%] index_select skip256 : Elapsed 0.025 ms (2.547 ms / 100) 2.590 -> 2.589 ( -0.04%) [ +0.04% +0.00% +0.08% / -0.04% +0.04% +0.15%] index_select spread : Elapsed 0.026 ms (2.591 ms / 100) 2.583 -> 2.581 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.19% +0.04%] index_select strided 3 : Elapsed 0.026 ms (2.584 ms / 100) 2.548 -> 2.547 ( -0.04%) [ +0.08% +0.12% +0.00% / -0.04% +0.31% +0.31%] index_select strided 5 : Elapsed 0.025 ms (2.550 ms / 100) 2.547 -> 2.551 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.16% +0.35%] index_select strided 7 : Elapsed 0.025 ms (2.549 ms / 100) 2.540 -> 2.542 ( +0.08%) [ +0.16% +0.12% +0.00% / +0.08% +0.39% +0.31%] index_select strided 8 : Elapsed 0.025 ms (2.544 ms / 100) 2.575 -> 2.578 ( +0.12%) [ +0.23% +0.00% +0.12% / +0.12% +0.50% +0.43%] index_select random : Elapsed 0.026 ms (2.581 ms / 100) 2.578 -> 2.579 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.27% +0.31%] index_select random_sorted : Elapsed 0.026 ms (2.579 ms / 100) 2.573 -> 2.574 ( +0.04%) [ +0.00% +0.16% +0.00% / +0.04% +0.47% +0.35%] index_select perm : Elapsed 0.026 ms (2.573 ms / 100) 2.573 -> 2.570 ( -0.12%) [ +0.12% +0.04% +0.00% / -0.12% +0.12% +0.16%] index_select perm_sorted : Elapsed 0.026 ms (2.576 ms / 100) B = [5, 4, 20, 40] (stride (80, 20, 1, 400)) A = [5, 16, 20, 40] (stride (12800, 1, 640, 16)) dim = 1 2.389 -> 2.388 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.08% +0.13%] index_select const : Elapsed 0.024 ms (2.389 ms / 100) 2.378 -> 2.378 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.25% +0.08%] index_select wrap : Elapsed 0.024 ms (2.378 ms / 100) 2.379 -> 2.381 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.17% +0.13%] index_select linear : Elapsed 0.024 ms (2.383 ms / 100) 2.375 -> 2.378 ( +0.13%) [ +0.13% +0.29% +0.00% / +0.13% +0.17% +0.21%] index_select reverse : Elapsed 0.024 ms (2.378 ms / 100) 2.376 -> 2.378 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.08% +0.13%] index_select skip64 : Elapsed 0.024 ms (2.380 ms / 100) 2.388 -> 2.388 ( +0.00%) [ +0.00% +0.00% +0.17% / +0.04% +0.04% +0.00%] index_select skip256 : Elapsed 0.024 ms (2.388 ms / 100) 2.394 -> 2.394 ( +0.00%) [ +0.13% +0.17% +0.00% / +0.00% +0.38% +0.29%] index_select spread : Elapsed 0.024 ms (2.397 ms / 100) 2.396 -> 2.398 ( +0.08%) [ +0.00% +0.21% +0.17% / +0.08% +0.17% +0.08%] index_select strided 3 : Elapsed 0.024 ms (2.396 ms / 100) 2.403 -> 2.404 ( +0.04%) [ +0.08% +0.17% +0.00% / +0.04% +0.37% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.405 ms / 100) 2.391 -> 2.396 ( +0.21%) [ +0.00% +0.21% +0.08% / +0.21% +0.33% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.391 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.00% +0.17% +0.08% / +0.17% +0.17% +0.29%] index_select strided 8 : Elapsed 0.024 ms (2.405 ms / 100) 2.402 -> 2.407 ( +0.21%) [ +0.17% +0.12% +0.00% / +0.21% +0.25% +0.29%] index_select random : Elapsed 0.024 ms (2.406 ms / 100) 2.394 -> 2.396 ( +0.08%) [ +0.04% +0.13% +0.00% / +0.08% +0.25% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.395 ms / 100) 2.404 -> 2.405 ( +0.04%) [ +0.04% +0.00% +0.21% / +0.04% +0.17% +0.21%] index_select perm : Elapsed 0.024 ms (2.405 ms / 100) 2.396 -> 2.395 ( -0.04%) [ +0.00% +0.13% +0.00% / -0.04% +0.08% +0.13%] index_select perm_sorted : Elapsed 0.024 ms (2.396 ms / 100) out_shape = [5, 16, 4, 40] in_shape = [5, 16, 20, 40] idx_dim = 2 B = [5, 16, 4, 40] (stride (640, 40, 3200, 1)) A = [5, 16, 20, 40] (stride (1, 5, 3200, 80)) dim = 2 2.048 -> 2.048 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.73%] index_select const : Elapsed 0.020 ms (2.048 ms / 100) 2.047 -> 2.048 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.73% +0.29%] index_select wrap : Elapsed 0.020 ms (2.048 ms / 100) 2.046 -> 2.047 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.05% +0.64% +0.44%] index_select linear : Elapsed 0.020 ms (2.048 ms / 100) 2.043 -> 2.043 ( +0.00%) [ +0.29% +0.05% +0.00% / +0.00% +0.64% +0.34%] index_select reverse : Elapsed 0.020 ms (2.049 ms / 100) 2.048 -> 2.053 ( +0.24%) [ +0.00% +0.20% +0.05% / +0.24% +0.59% +0.59%] index_select skip64 : Elapsed 0.020 ms (2.048 ms / 100) 2.044 -> 2.045 ( +0.05%) [ +0.15% +0.00% +0.15% / +0.05% +0.88% +0.83%] index_select skip256 : Elapsed 0.020 ms (2.047 ms / 100) 2.041 -> 2.044 ( +0.15%) [ +0.00% +0.05% +0.00% / +0.15% +0.49% +0.49%] index_select spread : Elapsed 0.020 ms (2.041 ms / 100) 2.042 -> 2.047 ( +0.24%) [ +0.10% +0.29% +0.00% / +0.24% +0.54% +0.69%] index_select strided 3 : Elapsed 0.020 ms (2.044 ms / 100) 2.039 -> 2.039 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.00% +0.64% +0.64%] index_select strided 5 : Elapsed 0.020 ms (2.039 ms / 100) 2.040 -> 2.047 ( +0.34%) [ +0.20% +0.20% +0.00% / +0.34% +0.54% +0.39%] index_select strided 7 : Elapsed 0.020 ms (2.044 ms / 100) 2.041 -> 2.045 ( +0.20%) [ +0.15% +0.15% +0.00% / +0.20% +0.49% +0.59%] index_select strided 8 : Elapsed 0.020 ms (2.044 ms / 100) 2.040 -> 2.044 ( +0.20%) [ +0.15% +0.00% +0.20% / +0.20% +0.59% +0.59%] index_select strided 16 : Elapsed 0.020 ms (2.043 ms / 100) 2.051 -> 2.053 ( +0.10%) [ +0.00% +0.10% +0.05% / +0.10% +0.73% +0.68%] index_select random : Elapsed 0.021 ms (2.051 ms / 100) 2.052 -> 2.057 ( +0.24%) [ +0.05% +0.00% +0.19% / +0.24% +0.73% +0.68%] index_select random_sorted : Elapsed 0.021 ms (2.053 ms / 100) 2.042 -> 2.044 ( +0.10%) [ +0.00% +0.00% +0.15% / +0.10% +0.54% +0.49%] index_select perm : Elapsed 0.020 ms (2.042 ms / 100) 2.040 -> 2.045 ( +0.25%) [ +0.39% +0.00% +0.00% / +0.25% +0.74% +0.44%] index_select perm_sorted : Elapsed 0.020 ms (2.048 ms / 100) B = [5, 16, 4, 40] (stride (64, 1, 16, 320)) A = [5, 16, 20, 40] (stride (20, 4000, 1, 100)) dim = 2 2.232 -> 2.231 ( -0.04%) [ +0.00% +0.09% +0.00% / -0.04% +0.27% +0.13%] index_select const : Elapsed 0.022 ms (2.232 ms / 100) 2.232 -> 2.233 ( +0.04%) [ +0.13% +0.00% +0.13% / +0.04% +0.40% +0.45%] index_select wrap : Elapsed 0.022 ms (2.235 ms / 100) 2.230 -> 2.234 ( +0.18%) [ +0.27% +0.09% +0.00% / +0.18% +0.40% +0.18%] index_select linear : Elapsed 0.022 ms (2.236 ms / 100) 2.233 -> 2.238 ( +0.22%) [ +0.00% +0.04% +0.00% / +0.22% +0.40% +0.22%] index_select reverse : Elapsed 0.022 ms (2.233 ms / 100) 2.232 -> 2.231 ( -0.04%) [ +0.00% +0.09% +0.09% / -0.04% +0.13% +0.27%] index_select skip64 : Elapsed 0.022 ms (2.232 ms / 100) 2.234 -> 2.233 ( -0.04%) [ +0.13% +0.00% +0.09% / -0.04% +0.22% +0.09%] index_select skip256 : Elapsed 0.022 ms (2.237 ms / 100) 2.303 -> 2.307 ( +0.17%) [ +0.09% +0.00% +0.00% / +0.17% +0.39% +0.26%] index_select spread : Elapsed 0.023 ms (2.305 ms / 100) 2.278 -> 2.277 ( -0.04%) [ +0.09% +0.00% +0.13% / +0.04% +0.09% -0.04%] index_select strided 3 : Elapsed 0.023 ms (2.280 ms / 100) 2.303 -> 2.300 ( -0.13%) [ +0.30% +0.13% +0.00% / -0.13% +0.17% +0.00%] index_select strided 5 : Elapsed 0.023 ms (2.310 ms / 100) 2.280 -> 2.286 ( +0.26%) [ +0.00% +0.09% +0.18% / +0.31% +0.31% +0.26%] index_select strided 7 : Elapsed 0.023 ms (2.280 ms / 100) 2.282 -> 2.280 ( -0.09%) [ +0.04% +0.00% +0.09% / -0.09% +0.22% -0.04%] index_select strided 8 : Elapsed 0.023 ms (2.283 ms / 100) 2.303 -> 2.305 ( +0.09%) [ +0.00% +0.26% +0.09% / +0.09% +0.17% +0.30%] index_select strided 16 : Elapsed 0.023 ms (2.303 ms / 100) 2.251 -> 2.254 ( +0.13%) [ +0.44% +0.31% +0.00% / +0.13% +0.53% +0.58%] index_select random : Elapsed 0.023 ms (2.261 ms / 100) 2.255 -> 2.256 ( +0.04%) [ +0.00% +0.18% +0.04% / +0.09% +0.04% +0.13%] index_select random_sorted : Elapsed 0.023 ms (2.255 ms / 100) 2.255 -> 2.259 ( +0.18%) [ +0.00% +0.04% +0.00% / +0.27% +0.22% +0.18%] index_select perm : Elapsed 0.023 ms (2.255 ms / 100) 2.254 -> 2.257 ( +0.13%) [ +0.04% +0.00% +0.09% / +0.13% +0.27% +0.44%] index_select perm_sorted : Elapsed 0.023 ms (2.255 ms / 100) B = [5, 16, 4, 40] (stride (1, 20, 5, 320)) dim = 2 fill_cnt = 20 3.723 -> 3.699 ( -0.64%) [ +0.11% +0.08% +0.00% / -0.64% -0.32% -0.43%] index_fill_ const : Elapsed 0.037 ms (3.727 ms / 100) 3.728 -> 3.711 ( -0.46%) [ +0.00% +0.21% +0.00% / -0.46% -0.38% -0.38%] index_fill_ linear : Elapsed 0.037 ms (3.728 ms / 100) 3.730 -> 3.703 ( -0.72%) [ +0.03% +0.00% +0.03% / -0.64% -0.64% -0.72%] index_fill_ reverse : Elapsed 0.037 ms (3.731 ms / 100) 3.719 -> 3.705 ( -0.38%) [ +0.05% +0.00% +0.19% / -0.35% -0.24% -0.38%] index_fill_ skip64 : Elapsed 0.037 ms (3.721 ms / 100) 3.723 -> 3.696 ( -0.73%) [ +0.00% +0.03% +0.03% / -0.73% -0.48% -0.35%] index_fill_ skip256 : Elapsed 0.037 ms (3.723 ms / 100) 3.720 -> 3.699 ( -0.56%) [ +0.08% +0.11% +0.00% / -0.40% -0.56% -0.38%] index_fill_ spread : Elapsed 0.037 ms (3.723 ms / 100) 3.717 -> 3.700 ( -0.46%) [ +0.03% +0.13% +0.00% / -0.46% -0.32% -0.22%] index_fill_ strided 3 : Elapsed 0.037 ms (3.718 ms / 100) 3.726 -> 3.701 ( -0.67%) [ +0.03% +0.00% +0.05% / -0.67% -0.51% -0.56%] index_fill_ random : Elapsed 0.037 ms (3.727 ms / 100) 3.722 -> 3.709 ( -0.35%) [ +0.16% +0.00% +0.05% / -0.35% -0.21% -0.35%] index_fill_ random_sorted : Elapsed 0.037 ms (3.728 ms / 100) out_shape = [5, 16, 20, 4] in_shape = [5, 16, 20, 40] idx_dim = 3 B = [5, 16, 20, 4] (stride (1280, 4, 64, 1)) A = [5, 16, 20, 40] (stride (1, 5, 3200, 80)) dim = 3 1.340 -> 1.340 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.52% +0.52%] index_select const : Elapsed 0.013 ms (1.341 ms / 100) 1.335 -> 1.335 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.60% +0.45%] index_select wrap : Elapsed 0.013 ms (1.336 ms / 100) 1.332 -> 1.333 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.60% +0.45%] index_select linear : Elapsed 0.013 ms (1.334 ms / 100) 1.339 -> 1.340 ( +0.07%) [ +0.15% +0.22% +0.00% / +0.07% +0.67% +0.60%] index_select reverse : Elapsed 0.013 ms (1.341 ms / 100) 1.337 -> 1.341 ( +0.30%) [ +0.00% +0.22% +0.00% / +0.30% +0.75% +0.67%] index_select skip64 : Elapsed 0.013 ms (1.337 ms / 100) 1.335 -> 1.337 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.75% +0.60%] index_select skip256 : Elapsed 0.013 ms (1.336 ms / 100) 1.343 -> 1.342 ( -0.07%) [ +0.22% +0.00% +0.00% / -0.07% +0.74% +0.60%] index_select spread : Elapsed 0.013 ms (1.346 ms / 100) 1.338 -> 1.341 ( +0.22%) [ +0.07% +0.00% +0.07% / +0.22% +0.60% +0.52%] index_select strided 3 : Elapsed 0.013 ms (1.339 ms / 100) 1.337 -> 1.337 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.37%] index_select strided 5 : Elapsed 0.013 ms (1.337 ms / 100) 1.333 -> 1.337 ( +0.30%) [ +0.23% +0.00% +0.08% / +0.30% +0.90% +0.53%] index_select strided 7 : Elapsed 0.013 ms (1.336 ms / 100) 1.345 -> 1.343 ( -0.15%) [ +0.07% +0.07% +0.00% / -0.15% +0.30% +0.67%] index_select strided 8 : Elapsed 0.013 ms (1.346 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +0.60% +0.60%] index_select strided 16 : Elapsed 0.013 ms (1.346 ms / 100) 1.340 -> 1.340 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.52% +0.45%] index_select random : Elapsed 0.013 ms (1.340 ms / 100) 1.340 -> 1.342 ( +0.15%) [ +0.15% +0.22% +0.00% / +0.15% +0.67% +0.52%] index_select random_sorted : Elapsed 0.013 ms (1.342 ms / 100) 1.341 -> 1.343 ( +0.15%) [ +0.00% +0.07% +0.15% / +0.15% +0.52% +0.37%] index_select perm : Elapsed 0.013 ms (1.341 ms / 100) 1.340 -> 1.341 ( +0.07%) [ +0.37% +0.00% +0.37% / +0.07% +0.52% +0.37%] index_select perm_sorted : Elapsed 0.013 ms (1.345 ms / 100) B = [5, 16, 20, 4] (stride (1280, 1, 16, 320)) A = [5, 16, 20, 40] (stride (1, 4000, 200, 5)) dim = 3 1.281 -> 1.283 ( +0.16%) [ +0.31% +0.16% +0.00% / +0.16% +0.47% +0.47%] index_select const : Elapsed 0.013 ms (1.285 ms / 100) 1.281 -> 1.280 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.47% +0.47%] index_select wrap : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.280 ( -0.08%) [ +0.16% +0.23% +0.00% / -0.08% +0.55% +0.47%] index_select linear : Elapsed 0.013 ms (1.283 ms / 100) 1.277 -> 1.276 ( -0.08%) [ +0.23% +0.00% +0.08% / -0.08% +0.55% +0.63%] index_select reverse : Elapsed 0.013 ms (1.280 ms / 100) 1.282 -> 1.281 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.47% +0.31%] index_select skip64 : Elapsed 0.013 ms (1.282 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.39% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.283 ms / 100) 1.284 -> 1.284 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.39%] index_select spread : Elapsed 0.013 ms (1.284 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.31% +0.31%] index_select strided 3 : Elapsed 0.013 ms (1.288 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.55% +0.55%] index_select strided 5 : Elapsed 0.013 ms (1.281 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.00% +0.47% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.284 ms / 100) 1.286 -> 1.285 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.08% +0.23%] index_select strided 8 : Elapsed 0.013 ms (1.287 ms / 100) 1.285 -> 1.286 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.31% +0.16%] index_select strided 16 : Elapsed 0.013 ms (1.287 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.86% +0.47%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.283 ( +0.23%) [ +0.00% +0.08% +0.00% / +0.23% +0.39% +0.23%] index_select random_sorted : Elapsed 0.013 ms (1.280 ms / 100) 1.283 -> 1.283 ( +0.00%) [ +0.23% +0.16% +0.00% / +0.00% +0.23% +0.23%] index_select perm : Elapsed 0.013 ms (1.286 ms / 100) 1.283 -> 1.283 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.39% +0.39%] index_select perm_sorted : Elapsed 0.013 ms (1.284 ms / 100) B = [5, 16, 20, 4] (stride (1, 20, 320, 5)) A = [5, 16, 20, 40] (stride (640, 40, 3200, 1)) dim = 3 1.286 -> 1.286 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.54%] index_select const : Elapsed 0.013 ms (1.287 ms / 100) 1.283 -> 1.285 ( +0.16%) [ +0.08% +0.31% +0.00% / +0.16% +0.47% +0.47%] index_select wrap : Elapsed 0.013 ms (1.284 ms / 100) 1.285 -> 1.286 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.47% +0.39%] index_select linear : Elapsed 0.013 ms (1.287 ms / 100) 1.282 -> 1.285 ( +0.23%) [ +0.23% +0.31% +0.00% / +0.23% +0.47% +0.55%] index_select reverse : Elapsed 0.013 ms (1.285 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.31%] index_select skip64 : Elapsed 0.013 ms (1.287 ms / 100) 1.284 -> 1.286 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.31% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.285 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.39% +0.31%] index_select spread : Elapsed 0.013 ms (1.288 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.31% +0.08%] index_select strided 3 : Elapsed 0.013 ms (1.288 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.23% +0.23%] index_select strided 5 : Elapsed 0.013 ms (1.286 ms / 100) 1.283 -> 1.286 ( +0.23%) [ +0.08% +0.16% +0.00% / +0.23% +0.47% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.284 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.47% +0.31%] index_select strided 8 : Elapsed 0.013 ms (1.286 ms / 100) 1.285 -> 1.286 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.39% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.286 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select random : Elapsed 0.013 ms (1.282 ms / 100) 1.283 -> 1.289 ( +0.47%) [ +0.39% +0.31% +0.00% / +0.47% +0.55% +0.70%] index_select random_sorted : Elapsed 0.013 ms (1.288 ms / 100) 1.284 -> 1.287 ( +0.23%) [ +0.00% +0.16% +0.16% / +0.23% +0.47% +0.31%] index_select perm : Elapsed 0.013 ms (1.284 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.16% +0.23% +0.00% / +0.08% +0.62% +0.70%] index_select perm_sorted : Elapsed 0.013 ms (1.283 ms / 100) B = [5, 16, 20, 4] (stride (1, 5, 320, 80)) A = [5, 16, 20, 40] (stride (12800, 20, 1, 320)) dim = 3 1.191 -> 1.193 ( +0.17%) [ +0.08% +0.17% +0.00% / +0.17% +0.50% +0.50%] index_select const : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.59% +0.42%] index_select wrap : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.08% +0.17% +0.00% / +0.00% +0.50% +0.42%] index_select linear : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.59% +0.50%] index_select reverse : Elapsed 0.012 ms (1.191 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.67% +0.59%] index_select skip64 : Elapsed 0.012 ms (1.191 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.67% +0.67%] index_select skip256 : Elapsed 0.012 ms (1.191 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.67% +0.59%] index_select spread : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.59% +0.50%] index_select strided 3 : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.42%] index_select strided 5 : Elapsed 0.012 ms (1.192 ms / 100) 1.193 -> 1.192 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.42% +0.34%] index_select strided 7 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.42% +0.42%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.42% +0.42%] index_select strided 16 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.42% +0.34%] index_select random : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.34% +0.25%] index_select random_sorted : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.42% +0.34%] index_select perm : Elapsed 0.012 ms (1.194 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.67% +0.59%] index_select perm_sorted : Elapsed 0.012 ms (1.193 ms / 100) B = [5, 16, 20, 4] (stride (1, 5, 80, 1600)) A = [5, 16, 20, 40] (stride (800, 4000, 40, 1)) dim = 3 1.367 -> 1.369 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +1.32% +1.24%] index_select const : Elapsed 0.014 ms (1.369 ms / 100) 1.370 -> 1.372 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.51% +0.44%] index_select wrap : Elapsed 0.014 ms (1.371 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.51% +0.44%] index_select linear : Elapsed 0.014 ms (1.369 ms / 100) 1.370 -> 1.372 ( +0.15%) [ +0.00% +0.15% +0.07% / +0.15% +0.58% +0.66%] index_select reverse : Elapsed 0.014 ms (1.370 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +1.09% +1.02%] index_select skip64 : Elapsed 0.014 ms (1.371 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.37% +0.29%] index_select skip256 : Elapsed 0.014 ms (1.369 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.95% +1.09%] index_select spread : Elapsed 0.014 ms (1.373 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +1.09% +1.09%] index_select strided 3 : Elapsed 0.014 ms (1.373 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.51% +0.22%] index_select strided 5 : Elapsed 0.014 ms (1.372 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.22% +0.00% +0.15% / +0.15% +0.66% +0.80%] index_select strided 7 : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.95% +1.02%] index_select strided 8 : Elapsed 0.014 ms (1.376 ms / 100) 1.370 -> 1.372 ( +0.15%) [ +0.07% +0.22% +0.00% / +0.15% +0.66% +0.51%] index_select strided 16 : Elapsed 0.014 ms (1.371 ms / 100) 1.372 -> 1.372 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.80% +0.66%] index_select random : Elapsed 0.014 ms (1.373 ms / 100) 1.372 -> 1.372 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.95% +0.95%] index_select random_sorted : Elapsed 0.014 ms (1.373 ms / 100) 1.369 -> 1.371 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.51% +0.44%] index_select perm : Elapsed 0.014 ms (1.371 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.51% +0.37%] index_select perm_sorted : Elapsed 0.014 ms (1.369 ms / 100) out_shape = [4, 16, 40, 20] in_shape = [5, 16, 40, 20] idx_dim = 0 B = [4, 16, 40, 20] (stride (12800, 20, 320, 1)) A = [5, 16, 40, 20] (stride (12800, 800, 20, 1)) dim = 0 5.127 -> 5.137 ( +0.20%) [ +0.12% +0.12% +0.00% / +0.20% +0.47% +0.47%] index_select const : Elapsed 0.051 ms (5.133 ms / 100) 5.241 -> 5.250 ( +0.17%) [ +0.19% +0.04% +0.00% / +0.17% +0.19% +0.27%] index_select wrap : Elapsed 0.053 ms (5.251 ms / 100) 5.240 -> 5.244 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.27% +0.15%] index_select linear : Elapsed 0.052 ms (5.242 ms / 100) 5.255 -> 5.247 ( -0.15%) [ +0.02% +0.17% +0.00% / +0.13% -0.15% -0.13%] index_select reverse : Elapsed 0.053 ms (5.256 ms / 100) 5.129 -> 5.136 ( +0.14%) [ +0.10% +0.00% +0.04% / +0.14% +0.27% +0.39%] index_select skip64 : Elapsed 0.051 ms (5.134 ms / 100) 5.125 -> 5.133 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.59% +0.41%] index_select skip256 : Elapsed 0.051 ms (5.133 ms / 100) 5.242 -> 5.242 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +0.15% +0.13%] index_select spread : Elapsed 0.052 ms (5.242 ms / 100) 5.237 -> 5.239 ( +0.04%) [ +0.02% +0.10% +0.00% / +0.04% +0.40% +0.29%] index_select strided 3 : Elapsed 0.052 ms (5.238 ms / 100) 5.177 -> 5.180 ( +0.06%) [ +0.02% +0.00% +0.04% / +0.10% +0.08% +0.06%] index_select random : Elapsed 0.052 ms (5.178 ms / 100) 5.178 -> 5.178 ( +0.00%) [ +0.00% +0.15% +0.14% / +0.00% +0.02% +0.15%] index_select random_sorted : Elapsed 0.052 ms (5.178 ms / 100) 5.256 -> 5.250 ( -0.11%) [ +0.13% +0.00% +0.02% / +0.17% -0.11% -0.10%] index_select perm : Elapsed 0.053 ms (5.263 ms / 100) 5.251 -> 5.246 ( -0.10%) [ +0.06% +0.00% +0.13% / +0.10% -0.10% +0.10%] index_select perm_sorted : Elapsed 0.053 ms (5.254 ms / 100) B = [4, 16, 40, 20] (stride (12800, 20, 320, 1)) A = [5, 16, 40, 20] (stride (12800, 40, 1, 640)) dim = 0 5.471 -> 5.451 ( -0.37%) [ +0.04% +0.00% +0.05% / +0.15% -0.29% -0.37%] index_select const : Elapsed 0.055 ms (5.473 ms / 100) 5.506 -> 5.502 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.02% -0.07% +0.07%] index_select wrap : Elapsed 0.055 ms (5.506 ms / 100) 5.504 -> 5.497 ( -0.13%) [ +0.09% +0.00% +0.05% / +0.13% +0.05% -0.13%] index_select linear : Elapsed 0.055 ms (5.509 ms / 100) 5.495 -> 5.499 ( +0.07%) [ +0.04% +0.00% +0.15% / +0.07% +0.09% +0.22%] index_select reverse : Elapsed 0.055 ms (5.497 ms / 100) 5.471 -> 5.461 ( -0.18%) [ +0.00% +0.02% +0.00% / -0.05% -0.18% -0.18%] index_select skip64 : Elapsed 0.055 ms (5.471 ms / 100) 5.466 -> 5.452 ( -0.26%) [ +0.16% +0.00% +0.09% / +0.13% -0.20% -0.26%] index_select skip256 : Elapsed 0.055 ms (5.475 ms / 100) 5.506 -> 5.504 ( -0.04%) [ +0.09% +0.07% +0.00% / -0.02% +0.07% -0.04%] index_select spread : Elapsed 0.055 ms (5.511 ms / 100) 5.511 -> 5.498 ( -0.24%) [ +0.07% +0.00% +0.05% / -0.04% -0.24% -0.22%] index_select strided 3 : Elapsed 0.055 ms (5.515 ms / 100) 5.499 -> 5.500 ( +0.02%) [ +0.15% +0.04% +0.00% / +0.04% +0.05% +0.02%] index_select random : Elapsed 0.055 ms (5.507 ms / 100) 5.491 -> 5.493 ( +0.04%) [ +0.04% +0.00% +0.07% / +0.04% +0.33% +0.22%] index_select random_sorted : Elapsed 0.055 ms (5.493 ms / 100) 5.482 -> 5.490 ( +0.15%) [ +0.16% +0.07% +0.00% / +0.15% +0.58% +0.36%] index_select perm : Elapsed 0.055 ms (5.491 ms / 100) 5.483 -> 5.488 ( +0.09%) [ +0.16% +0.00% +0.16% / +0.09% +0.46% +0.44%] index_select perm_sorted : Elapsed 0.055 ms (5.492 ms / 100) B = [4, 16, 40, 20] (stride (12800, 1, 320, 16)) A = [5, 16, 40, 20] (stride (12800, 800, 1, 40)) dim = 0 5.452 -> 5.461 ( +0.17%) [ +0.18% +0.00% +0.35% / +0.17% +0.44% +0.35%] index_select const : Elapsed 0.055 ms (5.462 ms / 100) 5.510 -> 5.502 ( -0.15%) [ +0.04% +0.00% +0.20% / +0.11% -0.04% -0.15%] index_select wrap : Elapsed 0.055 ms (5.512 ms / 100) 5.507 -> 5.491 ( -0.29%) [ +0.13% +0.09% +0.00% / +0.22% -0.07% -0.29%] index_select linear : Elapsed 0.055 ms (5.514 ms / 100) 5.521 -> 5.491 ( -0.54%) [ +0.02% +0.00% +0.00% / +0.07% -0.54% -0.49%] index_select reverse : Elapsed 0.055 ms (5.522 ms / 100) 5.455 -> 5.464 ( +0.16%) [ +0.00% +0.11% +0.20% / +0.16% +0.29% +0.35%] index_select skip64 : Elapsed 0.055 ms (5.455 ms / 100) 5.460 -> 5.469 ( +0.16%) [ +0.05% +0.07% +0.00% / +0.16% +0.24% +0.20%] index_select skip256 : Elapsed 0.055 ms (5.463 ms / 100) 5.511 -> 5.501 ( -0.18%) [ +0.02% +0.25% +0.00% / +0.07% -0.16% -0.18%] index_select spread : Elapsed 0.055 ms (5.512 ms / 100) 5.502 -> 5.500 ( -0.04%) [ +0.24% +0.36% +0.00% / +0.24% -0.04% +0.05%] index_select strided 3 : Elapsed 0.055 ms (5.515 ms / 100) 5.497 -> 5.485 ( -0.22%) [ +0.00% +0.00% +0.02% / -0.02% -0.22% -0.15%] index_select random : Elapsed 0.055 ms (5.497 ms / 100) 5.492 -> 5.483 ( -0.16%) [ +0.05% +0.07% +0.00% / +0.15% -0.16% -0.11%] index_select random_sorted : Elapsed 0.055 ms (5.495 ms / 100) 5.503 -> 5.492 ( -0.20%) [ +0.00% +0.11% +0.22% / +0.09% -0.04% -0.20%] index_select perm : Elapsed 0.055 ms (5.503 ms / 100) 5.504 -> 5.505 ( +0.02%) [ +0.20% +0.00% +0.16% / +0.27% +0.04% +0.02%] index_select perm_sorted : Elapsed 0.055 ms (5.515 ms / 100) B = [4, 16, 40, 20] (stride (12800, 40, 1, 640)) A = [5, 16, 40, 20] (stride (800, 4000, 20, 1)) dim = 0 3.307 -> 3.310 ( +0.09%) [ +0.36% +0.00% +0.06% / +0.09% +0.54% +0.33%] index_select const : Elapsed 0.033 ms (3.319 ms / 100) 3.368 -> 3.368 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.59% +0.71%] index_select wrap : Elapsed 0.034 ms (3.369 ms / 100) 3.371 -> 3.378 ( +0.21%) [ +0.00% +0.06% +0.21% / +0.21% +0.62% +0.44%] index_select linear : Elapsed 0.034 ms (3.371 ms / 100) 3.343 -> 3.349 ( +0.18%) [ +0.00% +0.09% +0.21% / +0.18% +0.66% +0.45%] index_select reverse : Elapsed 0.033 ms (3.343 ms / 100) 3.309 -> 3.310 ( +0.03%) [ +0.09% +0.00% +0.15% / +0.03% +0.33% +0.45%] index_select skip64 : Elapsed 0.033 ms (3.312 ms / 100) 3.309 -> 3.313 ( +0.12%) [ +0.03% +0.00% +0.39% / +0.12% +0.36% +0.15%] index_select skip256 : Elapsed 0.033 ms (3.310 ms / 100) 3.368 -> 3.372 ( +0.12%) [ +0.00% +0.27% +0.03% / +0.12% +0.39% +0.50%] index_select spread : Elapsed 0.034 ms (3.368 ms / 100) 3.382 -> 3.383 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.12% +0.09%] index_select strided 3 : Elapsed 0.034 ms (3.382 ms / 100) 3.304 -> 3.303 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.21% +0.51%] index_select random : Elapsed 0.033 ms (3.305 ms / 100) 3.323 -> 3.329 ( +0.18%) [ +0.36% +0.00% +0.21% / +0.18% +0.54% +0.66%] index_select random_sorted : Elapsed 0.033 ms (3.335 ms / 100) 3.340 -> 3.342 ( +0.06%) [ +0.00% +0.00% +0.12% / +0.24% +0.21% +0.06%] index_select perm : Elapsed 0.033 ms (3.340 ms / 100) 3.352 -> 3.348 ( -0.12%) [ +0.09% +0.15% +0.00% / -0.12% -0.12% +0.15%] index_select perm_sorted : Elapsed 0.034 ms (3.355 ms / 100) B = [4, 16, 40, 20] (stride (12800, 40, 1, 640)) A = [5, 16, 40, 20] (stride (1, 5, 1600, 80)) dim = 0 3.551 -> 3.555 ( +0.11%) [ +0.00% +0.11% +0.06% / +0.11% +0.14% +0.20%] index_select const : Elapsed 0.036 ms (3.551 ms / 100) 3.548 -> 3.551 ( +0.08%) [ +0.00% +0.20% +0.11% / +0.08% +0.37% +0.11%] index_select wrap : Elapsed 0.035 ms (3.548 ms / 100) 3.551 -> 3.547 ( -0.11%) [ +0.00% +0.14% +0.00% / -0.11% +0.14% +0.17%] index_select linear : Elapsed 0.036 ms (3.551 ms / 100) 3.561 -> 3.560 ( -0.03%) [ +0.00% +0.14% +0.06% / +0.20% -0.03% +0.03%] index_select reverse : Elapsed 0.036 ms (3.561 ms / 100) 3.556 -> 3.556 ( +0.00%) [ +0.25% +0.37% +0.00% / +0.28% +0.11% +0.00%] index_select skip64 : Elapsed 0.036 ms (3.565 ms / 100) 3.561 -> 3.544 ( -0.48%) [ +0.08% +0.00% +0.31% / +0.08% -0.48% -0.48%] index_select skip256 : Elapsed 0.036 ms (3.564 ms / 100) 3.562 -> 3.544 ( -0.51%) [ +0.17% +0.00% +0.03% / +0.11% -0.51% -0.48%] index_select spread : Elapsed 0.036 ms (3.568 ms / 100) 3.561 -> 3.559 ( -0.06%) [ +0.06% +0.17% +0.00% / +0.00% +0.03% -0.06%] index_select strided 3 : Elapsed 0.036 ms (3.563 ms / 100) 3.567 -> 3.563 ( -0.11%) [ +0.20% +0.00% +0.17% / +0.06% +0.03% -0.11%] index_select random : Elapsed 0.036 ms (3.574 ms / 100) 3.564 -> 3.568 ( +0.11%) [ +0.08% +0.08% +0.00% / +0.14% +0.11% +0.17%] index_select random_sorted : Elapsed 0.036 ms (3.567 ms / 100) 3.567 -> 3.563 ( -0.11%) [ +0.06% +0.00% +0.00% / -0.03% +0.03% -0.11%] index_select perm : Elapsed 0.036 ms (3.569 ms / 100) 3.549 -> 3.553 ( +0.11%) [ +0.00% +0.14% +0.37% / +0.11% +0.65% +0.51%] index_select perm_sorted : Elapsed 0.035 ms (3.549 ms / 100) B = [4, 16, 40, 20] (stride (12800, 40, 1, 640)) A = [5, 16, 40, 20] (stride (40, 200, 1, 3200)) dim = 0 5.552 -> 5.554 ( +0.04%) [ +0.11% +0.11% +0.00% / +0.23% +0.07% +0.04%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.610 -> 5.603 ( -0.12%) [ +0.12% +0.02% +0.00% / +0.07% -0.05% -0.12%] index_select wrap : Elapsed 0.056 ms (5.617 ms / 100) 5.607 -> 5.606 ( -0.02%) [ +0.00% +0.12% +0.21% / +0.14% -0.02% -0.02%] index_select linear : Elapsed 0.056 ms (5.607 ms / 100) 5.610 -> 5.603 ( -0.12%) [ +0.11% +0.00% +0.14% / +0.14% +0.02% -0.12%] index_select reverse : Elapsed 0.056 ms (5.616 ms / 100) 5.560 -> 5.550 ( -0.18%) [ +0.00% +0.04% +0.09% / +0.05% -0.11% -0.18%] index_select skip64 : Elapsed 0.056 ms (5.560 ms / 100) 5.558 -> 5.548 ( -0.18%) [ +0.00% +0.04% +0.09% / +0.02% -0.04% -0.18%] index_select skip256 : Elapsed 0.056 ms (5.558 ms / 100) 5.610 -> 5.601 ( -0.16%) [ +0.12% +0.07% +0.00% / +0.16% -0.16% +0.04%] index_select spread : Elapsed 0.056 ms (5.617 ms / 100) 5.624 -> 5.610 ( -0.25%) [ +0.00% +0.05% +0.12% / +0.07% -0.25% -0.25%] index_select strided 3 : Elapsed 0.056 ms (5.624 ms / 100) 5.614 -> 5.611 ( -0.05%) [ +0.07% +0.00% +0.04% / +0.20% -0.05% -0.04%] index_select random : Elapsed 0.056 ms (5.618 ms / 100) 5.606 -> 5.596 ( -0.18%) [ +0.09% +0.00% +0.16% / +0.23% -0.18% -0.14%] index_select random_sorted : Elapsed 0.056 ms (5.611 ms / 100) 5.612 -> 5.600 ( -0.21%) [ +0.02% +0.00% +0.23% / +0.14% -0.09% -0.21%] index_select perm : Elapsed 0.056 ms (5.613 ms / 100) 5.612 -> 5.597 ( -0.27%) [ +0.00% +0.07% +0.16% / +0.25% -0.27% -0.21%] index_select perm_sorted : Elapsed 0.056 ms (5.612 ms / 100) B = [4, 16, 40, 20] (stride (1, 80, 1280, 4)) A = [5, 16, 40, 20] (stride (800, 4000, 1, 40)) dim = 0 5.791 -> 5.798 ( +0.12%) [ +0.00% +0.05% +0.05% / +0.12% +0.50% +0.60%] index_select const : Elapsed 0.058 ms (5.791 ms / 100) 5.873 -> 5.859 ( -0.24%) [ +0.10% +0.00% +0.14% / +0.22% -0.24% -0.24%] index_select wrap : Elapsed 0.059 ms (5.879 ms / 100) 5.872 -> 5.858 ( -0.24%) [ +0.12% +0.00% +0.05% / +0.17% -0.15% -0.24%] index_select linear : Elapsed 0.059 ms (5.879 ms / 100) 5.898 -> 5.851 ( -0.80%) [ +0.00% +0.05% +0.29% / +0.17% -0.80% -0.63%] index_select reverse : Elapsed 0.059 ms (5.898 ms / 100) 5.793 -> 5.793 ( +0.00%) [ +0.03% +0.00% +0.12% / +0.00% +0.50% +0.48%] index_select skip64 : Elapsed 0.058 ms (5.795 ms / 100) 5.791 -> 5.796 ( +0.09%) [ +0.07% +0.02% +0.00% / +0.09% +0.52% +0.45%] index_select skip256 : Elapsed 0.058 ms (5.795 ms / 100) 5.876 -> 5.858 ( -0.31%) [ +0.10% +0.00% +0.09% / +0.02% -0.31% -0.20%] index_select spread : Elapsed 0.059 ms (5.882 ms / 100) 5.863 -> 5.839 ( -0.41%) [ +0.10% +0.02% +0.00% / -0.07% -0.41% -0.41%] index_select strided 3 : Elapsed 0.059 ms (5.869 ms / 100) 5.846 -> 5.821 ( -0.43%) [ +0.09% +0.19% +0.00% / +0.09% -0.43% -0.39%] index_select random : Elapsed 0.059 ms (5.851 ms / 100) 5.853 -> 5.814 ( -0.67%) [ +0.00% +0.00% +0.12% / +0.21% -0.67% -0.56%] index_select random_sorted : Elapsed 0.059 ms (5.853 ms / 100) 5.843 -> 5.847 ( +0.07%) [ +0.00% +0.22% +0.24% / +0.17% +0.19% +0.07%] index_select perm : Elapsed 0.058 ms (5.843 ms / 100) 5.876 -> 5.859 ( -0.29%) [ +0.00% +0.09% +0.10% / -0.05% -0.29% -0.29%] index_select perm_sorted : Elapsed 0.059 ms (5.876 ms / 100) B = [4, 16, 40, 20] (stride (1, 80, 1280, 4)) A = [5, 16, 40, 20] (stride (20, 4000, 100, 1)) dim = 0 5.585 -> 5.591 ( +0.11%) [ +0.14% +0.00% +0.18% / +0.18% +0.16% +0.11%] index_select const : Elapsed 0.056 ms (5.593 ms / 100) 5.669 -> 5.668 ( -0.02%) [ +0.00% +0.07% +0.09% / -0.02% +0.05% +0.02%] index_select wrap : Elapsed 0.057 ms (5.669 ms / 100) 5.671 -> 5.665 ( -0.11%) [ +0.00% +0.04% +0.00% / +0.11% +0.09% -0.11%] index_select linear : Elapsed 0.057 ms (5.671 ms / 100) 5.684 -> 5.675 ( -0.16%) [ +0.00% +0.09% +0.00% / -0.05% -0.12% -0.16%] index_select reverse : Elapsed 0.057 ms (5.684 ms / 100) 5.574 -> 5.581 ( +0.13%) [ +0.13% +0.00% +0.05% / +0.13% +0.16% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.581 ms / 100) 5.587 -> 5.585 ( -0.04%) [ +0.02% +0.11% +0.00% / +0.13% +0.16% -0.04%] index_select skip256 : Elapsed 0.056 ms (5.588 ms / 100) 5.667 -> 5.668 ( +0.02%) [ +0.09% +0.00% +0.16% / +0.12% +0.02% +0.16%] index_select spread : Elapsed 0.057 ms (5.672 ms / 100) 5.670 -> 5.665 ( -0.09%) [ +0.00% +0.00% +0.02% / -0.09% +0.05% -0.02%] index_select strided 3 : Elapsed 0.057 ms (5.670 ms / 100) 5.637 -> 5.645 ( +0.14%) [ +0.00% +0.16% +0.11% / +0.14% +0.14% +0.20%] index_select random : Elapsed 0.056 ms (5.637 ms / 100) 5.630 -> 5.634 ( +0.07%) [ +0.07% +0.00% +0.20% / +0.07% +0.23% +0.20%] index_select random_sorted : Elapsed 0.056 ms (5.634 ms / 100) 5.679 -> 5.679 ( +0.00%) [ +0.11% +0.00% +0.07% / +0.00% +0.05% +0.05%] index_select perm : Elapsed 0.057 ms (5.685 ms / 100) 5.665 -> 5.672 ( +0.12%) [ +0.05% +0.00% +0.23% / +0.12% +0.19% +0.18%] index_select perm_sorted : Elapsed 0.057 ms (5.668 ms / 100) out_shape = [5, 4, 40, 20] in_shape = [5, 16, 40, 20] idx_dim = 1 B = [5, 4, 40, 20] (stride (3200, 40, 1, 160)) A = [5, 16, 40, 20] (stride (1, 100, 1600, 5)) dim = 1 2.414 -> 2.410 ( -0.17%) [ +0.12% +0.04% +0.00% / -0.17% +0.12% +0.29%] index_select const : Elapsed 0.024 ms (2.417 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.25% +0.21%] index_select wrap : Elapsed 0.024 ms (2.411 ms / 100) 2.409 -> 2.410 ( +0.04%) [ +0.08% +0.12% +0.00% / +0.04% +0.33% +0.29%] index_select linear : Elapsed 0.024 ms (2.411 ms / 100) 2.402 -> 2.409 ( +0.29%) [ +0.00% +0.00% +0.17% / +0.29% +0.46% +0.46%] index_select reverse : Elapsed 0.024 ms (2.402 ms / 100) 2.415 -> 2.415 ( +0.00%) [ +0.21% +0.00% +0.12% / +0.00% +0.12% +0.41%] index_select skip64 : Elapsed 0.024 ms (2.420 ms / 100) 2.415 -> 2.418 ( +0.12%) [ +0.21% +0.17% +0.00% / +0.12% +0.37% +0.21%] index_select skip256 : Elapsed 0.024 ms (2.420 ms / 100) 2.408 -> 2.409 ( +0.04%) [ +0.00% +0.00% +0.17% / +0.08% +0.04% +0.12%] index_select spread : Elapsed 0.024 ms (2.408 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.25% +0.08% +0.17%] index_select strided 3 : Elapsed 0.024 ms (2.413 ms / 100) 2.406 -> 2.405 ( -0.04%) [ +0.04% +0.12% +0.00% / -0.04% +0.33% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.407 ms / 100) 2.409 -> 2.414 ( +0.21%) [ +0.00% +0.00% +0.04% / +0.21% +0.37% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.409 ms / 100) 2.409 -> 2.411 ( +0.08%) [ +0.21% +0.08% +0.00% / +0.08% +0.33% +0.33%] index_select strided 8 : Elapsed 0.024 ms (2.414 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.00% +0.08% +0.37% / +0.08% +0.08% +0.25%] index_select random : Elapsed 0.024 ms (2.413 ms / 100) 2.409 -> 2.411 ( +0.08%) [ +0.00% +0.08% +0.17% / +0.08% +0.25% +0.33%] index_select random_sorted : Elapsed 0.024 ms (2.409 ms / 100) 2.413 -> 2.416 ( +0.12%) [ +0.00% +0.04% +0.17% / +0.12% +0.21% +0.33%] index_select perm : Elapsed 0.024 ms (2.413 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.17% +0.21%] index_select perm_sorted : Elapsed 0.024 ms (2.412 ms / 100) B = [5, 4, 40, 20] (stride (20, 4000, 100, 1)) A = [5, 16, 40, 20] (stride (40, 200, 1, 3200)) dim = 1 2.486 -> 2.485 ( -0.04%) [ +0.00% +0.12% +0.00% / -0.04% +0.16% +0.24%] index_select const : Elapsed 0.025 ms (2.486 ms / 100) 2.501 -> 2.504 ( +0.12%) [ +0.24% +0.20% +0.00% / +0.12% +0.20% +0.16%] index_select wrap : Elapsed 0.025 ms (2.507 ms / 100) 2.501 -> 2.502 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.20% +0.04% +0.08%] index_select linear : Elapsed 0.025 ms (2.503 ms / 100) 2.512 -> 2.512 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.04% +0.00% +0.08%] index_select reverse : Elapsed 0.025 ms (2.512 ms / 100) 2.491 -> 2.488 ( -0.12%) [ +0.08% +0.08% +0.00% / -0.12% +0.24% +0.24%] index_select skip64 : Elapsed 0.025 ms (2.493 ms / 100) 2.484 -> 2.490 ( +0.24%) [ +0.16% +0.20% +0.00% / +0.28% +0.28% +0.24%] index_select skip256 : Elapsed 0.025 ms (2.488 ms / 100) 2.501 -> 2.500 ( -0.04%) [ +0.24% +0.20% +0.00% / -0.04% +0.44% +0.52%] index_select spread : Elapsed 0.025 ms (2.507 ms / 100) 2.501 -> 2.507 ( +0.24%) [ +0.00% +0.00% +0.08% / +0.24% +0.24% +0.24%] index_select strided 3 : Elapsed 0.025 ms (2.501 ms / 100) 2.500 -> 2.504 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.28% +0.24%] index_select strided 5 : Elapsed 0.025 ms (2.502 ms / 100) 2.501 -> 2.506 ( +0.20%) [ +0.24% +0.12% +0.00% / +0.20% +0.24% +0.56%] index_select strided 7 : Elapsed 0.025 ms (2.507 ms / 100) 2.481 -> 2.484 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.12% +0.44% +0.16%] index_select strided 8 : Elapsed 0.025 ms (2.483 ms / 100) 2.507 -> 2.508 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.04% +0.48% +0.20%] index_select random : Elapsed 0.025 ms (2.508 ms / 100) 2.512 -> 2.515 ( +0.12%) [ +0.00% +0.16% +0.16% / +0.12% +0.24% +0.40%] index_select random_sorted : Elapsed 0.025 ms (2.512 ms / 100) 2.505 -> 2.505 ( +0.00%) [ +0.20% +0.00% +0.16% / +0.00% +0.20% +0.20%] index_select perm : Elapsed 0.025 ms (2.510 ms / 100) 2.502 -> 2.505 ( +0.12%) [ +0.16% +0.00% +0.04% / +0.12% +0.40% +0.36%] index_select perm_sorted : Elapsed 0.025 ms (2.506 ms / 100) B = [5, 4, 40, 20] (stride (80, 20, 400, 1)) A = [5, 16, 40, 20] (stride (16, 1, 1600, 80)) dim = 1 2.468 -> 2.470 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.28% +0.08% +0.16%] index_select const : Elapsed 0.025 ms (2.468 ms / 100) 2.469 -> 2.472 ( +0.12%) [ +0.00% +0.00% +0.20% / +0.16% +0.12% +0.32%] index_select wrap : Elapsed 0.025 ms (2.469 ms / 100) 2.468 -> 2.470 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.08% +0.20%] index_select linear : Elapsed 0.025 ms (2.471 ms / 100) 2.466 -> 2.468 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.28% +0.45%] index_select reverse : Elapsed 0.025 ms (2.469 ms / 100) 2.466 -> 2.470 ( +0.16%) [ +0.16% +0.00% +0.12% / +0.16% +0.49% +0.45%] index_select skip64 : Elapsed 0.025 ms (2.470 ms / 100) 2.468 -> 2.468 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.32% +0.24%] index_select skip256 : Elapsed 0.025 ms (2.470 ms / 100) 2.484 -> 2.487 ( +0.12%) [ +0.16% +0.20% +0.00% / +0.12% +0.68% +0.81%] index_select spread : Elapsed 0.025 ms (2.488 ms / 100) 2.484 -> 2.490 ( +0.24%) [ +0.08% +0.00% +0.04% / +0.24% +0.68% +0.68%] index_select strided 3 : Elapsed 0.025 ms (2.486 ms / 100) 2.484 -> 2.482 ( -0.08%) [ +0.00% +0.28% +0.04% / -0.08% +0.56% +0.52%] index_select strided 5 : Elapsed 0.025 ms (2.484 ms / 100) 2.482 -> 2.489 ( +0.28%) [ +0.20% +0.16% +0.00% / +0.28% +0.81% +0.52%] index_select strided 7 : Elapsed 0.025 ms (2.487 ms / 100) 2.501 -> 2.502 ( +0.04%) [ +0.00% +0.12% +0.08% / +0.04% +0.44% +0.32%] index_select strided 8 : Elapsed 0.025 ms (2.501 ms / 100) 2.501 -> 2.501 ( +0.00%) [ +0.16% +0.04% +0.00% / +0.00% +0.36% +0.28%] index_select random : Elapsed 0.025 ms (2.505 ms / 100) 2.490 -> 2.487 ( -0.12%) [ +0.00% +0.20% +0.24% / -0.12% +0.60% +0.48%] index_select random_sorted : Elapsed 0.025 ms (2.490 ms / 100) 2.499 -> 2.504 ( +0.20%) [ +0.00% +0.12% +0.08% / +0.20% +0.48% +0.56%] index_select perm : Elapsed 0.025 ms (2.499 ms / 100) 2.488 -> 2.490 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.32% +0.44%] index_select perm_sorted : Elapsed 0.025 ms (2.491 ms / 100) B = [5, 4, 40, 20] (stride (160, 40, 1, 800)) A = [5, 16, 40, 20] (stride (800, 4000, 20, 1)) dim = 1 2.271 -> 2.273 ( +0.09%) [ +0.04% +0.00% +0.13% / +0.09% +0.31% +0.35%] index_select const : Elapsed 0.023 ms (2.272 ms / 100) 2.331 -> 2.333 ( +0.09%) [ +0.13% +0.00% +0.00% / +0.09% +0.17% +0.09%] index_select wrap : Elapsed 0.023 ms (2.334 ms / 100) 2.328 -> 2.331 ( +0.13%) [ +0.09% +0.00% +0.04% / +0.13% +0.21% +0.13%] index_select linear : Elapsed 0.023 ms (2.330 ms / 100) 2.329 -> 2.331 ( +0.09%) [ +0.21% +0.09% +0.00% / +0.09% +0.17% +0.21%] index_select reverse : Elapsed 0.023 ms (2.334 ms / 100) 2.272 -> 2.275 ( +0.13%) [ +0.04% +0.00% +0.00% / +0.13% +0.48% +0.18%] index_select skip64 : Elapsed 0.023 ms (2.273 ms / 100) 2.270 -> 2.275 ( +0.22%) [ +0.09% +0.09% +0.00% / +0.22% +0.22% +0.22%] index_select skip256 : Elapsed 0.023 ms (2.272 ms / 100) 2.326 -> 2.331 ( +0.21%) [ +0.09% +0.21% +0.00% / +0.21% +0.34% +0.26%] index_select spread : Elapsed 0.023 ms (2.328 ms / 100) 2.325 -> 2.325 ( +0.00%) [ +0.13% +0.00% +0.09% / +0.00% +0.39% +0.43%] index_select strided 3 : Elapsed 0.023 ms (2.328 ms / 100) 2.323 -> 2.324 ( +0.04%) [ +0.04% +0.22% +0.00% / +0.04% +0.69% +0.56%] index_select strided 5 : Elapsed 0.023 ms (2.324 ms / 100) 2.326 -> 2.326 ( +0.00%) [ +0.04% +0.26% +0.00% / +0.00% +0.30% +0.39%] index_select strided 7 : Elapsed 0.023 ms (2.327 ms / 100) 2.281 -> 2.280 ( -0.04%) [ +0.26% +0.00% +0.00% / -0.04% +0.35% +0.39%] index_select strided 8 : Elapsed 0.023 ms (2.287 ms / 100) 2.304 -> 2.309 ( +0.22%) [ +0.22% +0.00% +0.09% / +0.30% +0.22% +0.22%] index_select random : Elapsed 0.023 ms (2.309 ms / 100) 2.306 -> 2.308 ( +0.09%) [ +0.00% +0.30% +0.00% / +0.09% +0.17% +0.30%] index_select random_sorted : Elapsed 0.023 ms (2.306 ms / 100) 2.323 -> 2.323 ( +0.00%) [ +0.26% +0.22% +0.00% / +0.00% +0.60% +0.52%] index_select perm : Elapsed 0.023 ms (2.329 ms / 100) 2.327 -> 2.325 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.09% +0.30% +0.26%] index_select perm_sorted : Elapsed 0.023 ms (2.329 ms / 100) B = [5, 4, 40, 20] (stride (160, 1, 4, 800)) A = [5, 16, 40, 20] (stride (16, 1, 1600, 80)) dim = 1 2.487 -> 2.484 ( -0.12%) [ +0.04% +0.16% +0.00% / -0.12% +0.36% +0.44%] index_select const : Elapsed 0.025 ms (2.488 ms / 100) 2.486 -> 2.490 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.60% +0.40%] index_select wrap : Elapsed 0.025 ms (2.490 ms / 100) 2.485 -> 2.490 ( +0.20%) [ +0.20% +0.04% +0.00% / +0.20% +0.60% +0.56%] index_select linear : Elapsed 0.025 ms (2.490 ms / 100) 2.480 -> 2.486 ( +0.24%) [ +0.00% +0.20% +0.08% / +0.24% +0.60% +0.65%] index_select reverse : Elapsed 0.025 ms (2.480 ms / 100) 2.485 -> 2.488 ( +0.12%) [ +0.28% +0.04% +0.00% / +0.12% +0.48% +0.60%] index_select skip64 : Elapsed 0.025 ms (2.492 ms / 100) 2.481 -> 2.483 ( +0.08%) [ +0.24% +0.00% +0.04% / +0.08% +0.56% +0.77%] index_select skip256 : Elapsed 0.025 ms (2.487 ms / 100) 2.501 -> 2.504 ( +0.12%) [ +0.00% +0.24% +0.12% / +0.12% +0.64% +0.60%] index_select spread : Elapsed 0.025 ms (2.501 ms / 100) 2.508 -> 2.507 ( -0.04%) [ +0.00% +0.04% +0.08% / -0.04% +0.36% +0.48%] index_select strided 3 : Elapsed 0.025 ms (2.508 ms / 100) 2.503 -> 2.505 ( +0.08%) [ +0.12% +0.24% +0.00% / +0.08% +0.48% +0.44%] index_select strided 5 : Elapsed 0.025 ms (2.506 ms / 100) 2.501 -> 2.503 ( +0.08%) [ +0.12% +0.24% +0.00% / +0.08% +0.52% +0.44%] index_select strided 7 : Elapsed 0.025 ms (2.504 ms / 100) 2.512 -> 2.514 ( +0.08%) [ +0.20% +0.20% +0.00% / +0.08% +0.80% +0.64%] index_select strided 8 : Elapsed 0.025 ms (2.517 ms / 100) 2.512 -> 2.517 ( +0.20%) [ +0.00% +0.16% +0.08% / +0.20% +0.40% +0.40%] index_select random : Elapsed 0.025 ms (2.512 ms / 100) 2.506 -> 2.515 ( +0.36%) [ +0.12% +0.00% +0.00% / +0.36% +0.40% +0.52%] index_select random_sorted : Elapsed 0.025 ms (2.509 ms / 100) 2.508 -> 2.511 ( +0.12%) [ +0.20% +0.04% +0.00% / +0.12% +0.36% +0.24%] index_select perm : Elapsed 0.025 ms (2.513 ms / 100) 2.513 -> 2.512 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.28% +0.40%] index_select perm_sorted : Elapsed 0.025 ms (2.513 ms / 100) B = [5, 4, 40, 20] (stride (1, 5, 20, 800)) A = [5, 16, 40, 20] (stride (320, 20, 1600, 1)) dim = 1 2.406 -> 2.407 ( +0.04%) [ +0.00% +0.25% +0.04% / +0.08% +0.25% +0.04%] index_select const : Elapsed 0.024 ms (2.406 ms / 100) 2.443 -> 2.445 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.20% +0.16%] index_select wrap : Elapsed 0.024 ms (2.446 ms / 100) 2.450 -> 2.448 ( -0.08%) [ +0.08% +0.00% +0.24% / +0.33% -0.08% -0.04%] index_select linear : Elapsed 0.025 ms (2.452 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.41% +0.37% +0.00% / +0.29% +0.04% +0.20%] index_select reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.392 -> 2.389 ( -0.13%) [ +0.00% +0.04% +0.04% / +0.13% +0.04% -0.13%] index_select skip64 : Elapsed 0.024 ms (2.392 ms / 100) 2.404 -> 2.405 ( +0.04%) [ +0.21% +0.12% +0.00% / +0.04% +0.21% +0.12%] index_select skip256 : Elapsed 0.024 ms (2.409 ms / 100) 2.448 -> 2.450 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.16% +0.20% +0.08%] index_select spread : Elapsed 0.025 ms (2.450 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.24% +0.16%] index_select strided 3 : Elapsed 0.025 ms (2.457 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.20% +0.12%] index_select strided 5 : Elapsed 0.025 ms (2.452 ms / 100) 2.446 -> 2.449 ( +0.12%) [ +0.12% +0.16% +0.00% / +0.12% +0.29% +0.37%] index_select strided 7 : Elapsed 0.024 ms (2.449 ms / 100) 2.413 -> 2.418 ( +0.21%) [ +0.12% +0.00% +0.25% / +0.21% +0.62% +0.58%] index_select strided 8 : Elapsed 0.024 ms (2.416 ms / 100) 2.424 -> 2.423 ( -0.04%) [ +0.00% +0.17% +0.04% / -0.04% +0.25% +0.25%] index_select random : Elapsed 0.024 ms (2.424 ms / 100) 2.431 -> 2.427 ( -0.16%) [ +0.04% +0.08% +0.00% / +0.08% +0.16% -0.16%] index_select random_sorted : Elapsed 0.024 ms (2.432 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.00% +0.16% +0.24% / +0.33% +0.04% +0.16%] index_select perm : Elapsed 0.025 ms (2.458 ms / 100) 2.451 -> 2.455 ( +0.16%) [ +0.33% +0.00% +0.33% / +0.20% +0.37% +0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) out_shape = [5, 16, 4, 20] in_shape = [5, 16, 40, 20] idx_dim = 2 B = [5, 16, 4, 20] (stride (1280, 4, 1, 64)) A = [5, 16, 40, 20] (stride (40, 4000, 1, 200)) dim = 2 1.366 -> 1.366 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.66% +0.66%] index_select const : Elapsed 0.014 ms (1.367 ms / 100) 1.364 -> 1.363 ( -0.07%) [ +0.15% +0.00% +0.00% / -0.07% +0.66% +0.51%] index_select wrap : Elapsed 0.014 ms (1.366 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.66% +0.58%] index_select linear : Elapsed 0.014 ms (1.369 ms / 100) 1.367 -> 1.366 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.37% +0.51%] index_select reverse : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.22% +0.00% +0.07% / +0.15% +0.81% +0.73%] index_select skip64 : Elapsed 0.014 ms (1.369 ms / 100) 1.368 -> 1.370 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.80% +0.66%] index_select skip256 : Elapsed 0.014 ms (1.369 ms / 100) 1.370 -> 1.372 ( +0.15%) [ +0.00% +0.07% +0.07% / +0.15% +0.88% +0.58%] index_select spread : Elapsed 0.014 ms (1.370 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.44% +0.44%] index_select strided 3 : Elapsed 0.014 ms (1.366 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.66% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.371 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.37% +0.44%] index_select strided 7 : Elapsed 0.014 ms (1.369 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.58% +0.44%] index_select strided 8 : Elapsed 0.014 ms (1.371 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.07% +0.44% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.371 ms / 100) 1.375 -> 1.382 ( +0.51%) [ +0.15% +0.07% +0.00% / +1.16% +0.80% +0.51%] index_select random : Elapsed 0.014 ms (1.377 ms / 100) 1.368 -> 1.371 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.58% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.369 ms / 100) 1.369 -> 1.371 ( +0.15%) [ +0.15% +0.00% +0.29% / +0.15% +0.58% +0.73%] index_select perm : Elapsed 0.014 ms (1.371 ms / 100) 1.369 -> 1.368 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.37% +0.37%] index_select perm_sorted : Elapsed 0.014 ms (1.369 ms / 100) B = [5, 16, 4, 20] (stride (80, 400, 1, 4)) A = [5, 16, 40, 20] (stride (20, 4000, 100, 1)) dim = 2 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select const : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_select wrap : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.273 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.39% +0.47%] index_select linear : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.55% +0.55%] index_select reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.63% +0.63%] index_select skip64 : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.272 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.55% +0.55%] index_select spread : Elapsed 0.013 ms (1.273 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.47% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.63% +0.00% / +0.08% +0.47% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.39% +0.31%] index_select strided 8 : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.55% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select random : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.274 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.24%] index_select random_sorted : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.08% +0.71% / +0.08% +0.47% +0.31%] index_select perm : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) B = [5, 16, 4, 20] (stride (1, 400, 100, 5)) A = [5, 16, 40, 20] (stride (12800, 1, 320, 16)) dim = 2 1.279 -> 1.280 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.63% +0.70%] index_select const : Elapsed 0.013 ms (1.279 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.70% +0.94%] index_select wrap : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.63% +0.63%] index_select linear : Elapsed 0.013 ms (1.281 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_select reverse : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.280 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.39% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select spread : Elapsed 0.013 ms (1.286 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.08% +0.16% / +0.00% +0.55% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.23% / +0.08% +0.47% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.63% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.55% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.63% +0.63%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select random_sorted : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select perm : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.280 ms / 100) B = [5, 16, 4, 20] (stride (4, 400, 1, 20)) A = [5, 16, 40, 20] (stride (20, 100, 1600, 1)) dim = 2 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select const : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.55% +0.39%] index_select wrap : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.47% +0.47%] index_select linear : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.63% +0.63%] index_select reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.55% +0.63%] index_select skip64 : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.63% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.63%] index_select spread : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.24% +0.00% +0.08% / +0.08% +0.63% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.47% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.39% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.47% / +0.08% +0.47% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.47% +0.47%] index_select random : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.39%] index_select random_sorted : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.39%] index_select perm : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) B = [5, 16, 4, 20] (stride (1, 5, 1600, 80)) A = [5, 16, 40, 20] (stride (20, 4000, 100, 1)) dim = 2 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select const : Elapsed 0.013 ms (1.274 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.47% +0.31%] index_select wrap : Elapsed 0.013 ms (1.275 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select linear : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select reverse : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.39% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.275 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.39% +0.47%] index_select spread : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.31% +0.31%] index_select strided 5 : Elapsed 0.013 ms (1.277 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.55% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.55% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.274 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.31% +0.31%] index_select random : Elapsed 0.013 ms (1.277 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.47% +0.55%] index_select random_sorted : Elapsed 0.013 ms (1.274 ms / 100) 1.277 -> 1.276 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.31%] index_select perm : Elapsed 0.013 ms (1.277 ms / 100) 1.274 -> 1.278 ( +0.31%) [ +0.08% +0.00% +0.00% / +0.31% +0.47% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.275 ms / 100) B = [5, 16, 4, 20] (stride (1, 5, 1600, 80)) A = [5, 16, 40, 20] (stride (1, 5, 80, 3200)) dim = 2 1.336 -> 1.336 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.52% +0.45%] index_select const : Elapsed 0.013 ms (1.337 ms / 100) 1.344 -> 1.344 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.22% +0.15%] index_select wrap : Elapsed 0.013 ms (1.346 ms / 100) 1.340 -> 1.338 ( -0.15%) [ +0.15% +0.00% +0.00% / -0.15% +0.30% +0.30%] index_select linear : Elapsed 0.013 ms (1.342 ms / 100) 1.330 -> 1.331 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.15% +0.23%] index_select reverse : Elapsed 0.013 ms (1.331 ms / 100) 1.332 -> 1.332 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.38% +0.30%] index_select skip64 : Elapsed 0.013 ms (1.334 ms / 100) 1.333 -> 1.335 ( +0.15%) [ +0.00% +0.15% +1.05% / +0.15% +0.68% +0.68%] index_select skip256 : Elapsed 0.013 ms (1.333 ms / 100) 1.346 -> 1.345 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.45% +0.52%] index_select spread : Elapsed 0.013 ms (1.346 ms / 100) 1.338 -> 1.339 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +0.60% +0.60%] index_select strided 3 : Elapsed 0.013 ms (1.340 ms / 100) 1.340 -> 1.340 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.60% +0.67%] index_select strided 5 : Elapsed 0.013 ms (1.340 ms / 100) 1.343 -> 1.346 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.82% +1.04%] index_select strided 7 : Elapsed 0.013 ms (1.344 ms / 100) 1.334 -> 1.336 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.97% +0.67%] index_select strided 8 : Elapsed 0.013 ms (1.334 ms / 100) 1.333 -> 1.334 ( +0.08%) [ +0.15% +0.00% +0.08% / +0.08% +0.68% +0.60%] index_select strided 16 : Elapsed 0.013 ms (1.335 ms / 100) 1.333 -> 1.335 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.60% +0.75%] index_select random : Elapsed 0.013 ms (1.335 ms / 100) 1.339 -> 1.338 ( -0.07%) [ +0.07% +0.00% +0.15% / -0.07% +0.52% +0.52%] index_select random_sorted : Elapsed 0.013 ms (1.340 ms / 100) 1.342 -> 1.344 ( +0.15%) [ +0.07% +0.00% +0.22% / +0.15% +1.04% +1.12%] index_select perm : Elapsed 0.013 ms (1.343 ms / 100) 1.336 -> 1.336 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.82%] index_select perm_sorted : Elapsed 0.013 ms (1.336 ms / 100) B = [5, 16, 4, 20] (stride (1, 20, 5, 320)) dim = 2 fill_cnt = 40 2.743 -> 2.739 ( -0.15%) [ +0.04% +0.00% +0.00% / -0.15% +0.07% +0.26%] index_fill_ const : Elapsed 0.027 ms (2.744 ms / 100) 2.743 -> 2.741 ( -0.07%) [ +0.18% +0.04% +0.00% / -0.07% +0.29% +0.00%] index_fill_ linear : Elapsed 0.027 ms (2.748 ms / 100) 2.740 -> 2.739 ( -0.04%) [ +0.00% +0.11% +0.07% / -0.04% +0.33% +0.15%] index_fill_ reverse : Elapsed 0.027 ms (2.740 ms / 100) 2.748 -> 2.747 ( -0.04%) [ +0.18% +0.00% +0.11% / -0.04% +0.15% +0.11%] index_fill_ skip64 : Elapsed 0.028 ms (2.753 ms / 100) 2.744 -> 2.748 ( +0.15%) [ +0.00% +0.07% +0.26% / +0.18% +0.18% +0.15%] index_fill_ skip256 : Elapsed 0.027 ms (2.744 ms / 100) 2.745 -> 2.740 ( -0.18%) [ +0.07% +0.00% +0.04% / -0.18% +0.00% -0.18%] index_fill_ spread : Elapsed 0.027 ms (2.747 ms / 100) 2.747 -> 2.741 ( -0.22%) [ +0.04% +0.00% +0.11% / -0.04% -0.22% -0.04%] index_fill_ strided 3 : Elapsed 0.027 ms (2.748 ms / 100) 2.744 -> 2.737 ( -0.26%) [ +0.04% +0.00% +0.00% / -0.26% +0.04% +0.07%] index_fill_ random : Elapsed 0.027 ms (2.745 ms / 100) 2.746 -> 2.743 ( -0.11%) [ +0.00% +0.04% +0.04% / -0.11% +0.15% +0.07%] index_fill_ random_sorted : Elapsed 0.027 ms (2.746 ms / 100) B = [5, 16, 4, 20] (stride (16, 1, 80, 320)) A = [5, 16, 40, 20] (stride (40, 4000, 1, 200)) dim = 2 1.279 -> 1.278 ( -0.08%) [ +0.08% +0.00% +0.47% / -0.08% +0.47% +0.39%] index_select const : Elapsed 0.013 ms (1.280 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.00% +0.08% +0.23% / +0.16% +0.55% +0.39%] index_select wrap : Elapsed 0.013 ms (1.278 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.47% +0.55%] index_select linear : Elapsed 0.013 ms (1.280 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.39% +0.55%] index_select reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.23% +0.00% +0.00% / +0.00% +0.70% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.55% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.281 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.63% +0.63%] index_select spread : Elapsed 0.013 ms (1.278 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.23% / +0.08% +0.39% +0.55%] index_select strided 5 : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.39% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.282 -> 1.281 ( -0.08%) [ +0.31% +0.08% +0.00% / -0.08% +0.39% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.286 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.62%] index_select strided 16 : Elapsed 0.013 ms (1.281 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select random : Elapsed 0.013 ms (1.277 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.39% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.31% +0.23%] index_select perm : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.279 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.23% +0.39%] index_select perm_sorted : Elapsed 0.013 ms (1.281 ms / 100) out_shape = [5, 16, 40, 4] in_shape = [5, 16, 40, 20] idx_dim = 3 B = [5, 16, 40, 4] (stride (2560, 160, 1, 40)) A = [5, 16, 40, 20] (stride (800, 4000, 20, 1)) dim = 3 2.033 -> 2.034 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.39% +0.34%] index_select const : Elapsed 0.020 ms (2.033 ms / 100) 2.017 -> 2.021 ( +0.20%) [ +0.00% +0.00% +0.00% / +0.20% +0.40% +0.35%] index_select wrap : Elapsed 0.020 ms (2.017 ms / 100) 2.004 -> 2.012 ( +0.40%) [ +0.40% +0.15% +0.00% / +0.40% +0.75% +0.90%] index_select linear : Elapsed 0.020 ms (2.012 ms / 100) 2.036 -> 2.034 ( -0.10%) [ +0.00% +0.05% +0.00% / -0.10% +0.29% +0.29%] index_select reverse : Elapsed 0.020 ms (2.036 ms / 100) 2.017 -> 2.019 ( +0.10%) [ +0.30% +0.00% +0.40% / +0.10% +0.55% +0.84%] index_select skip64 : Elapsed 0.020 ms (2.023 ms / 100) 2.033 -> 2.031 ( -0.10%) [ +0.00% +0.20% +0.05% / -0.10% +0.34% +0.34%] index_select skip256 : Elapsed 0.020 ms (2.033 ms / 100) 2.075 -> 2.070 ( -0.24%) [ +0.05% +0.00% +0.00% / -0.24% +0.19% +0.24%] index_select spread : Elapsed 0.021 ms (2.076 ms / 100) 2.051 -> 2.055 ( +0.20%) [ +0.20% +0.00% +0.49% / +0.34% +0.24% +0.20%] index_select strided 3 : Elapsed 0.021 ms (2.055 ms / 100) 2.065 -> 2.069 ( +0.19%) [ +0.19% +0.39% +0.00% / +0.63% +0.53% +0.19%] index_select strided 5 : Elapsed 0.021 ms (2.069 ms / 100) 2.056 -> 2.057 ( +0.05%) [ +0.19% +0.00% +0.05% / +0.05% +0.78% +0.63%] index_select strided 7 : Elapsed 0.021 ms (2.060 ms / 100) 2.072 -> 2.075 ( +0.14%) [ +0.14% +0.00% +0.34% / +0.14% +0.68% +0.34%] index_select strided 8 : Elapsed 0.021 ms (2.075 ms / 100) 2.068 -> 2.065 ( -0.15%) [ +0.05% +0.00% +0.05% / -0.15% +0.48% +0.58%] index_select strided 16 : Elapsed 0.021 ms (2.069 ms / 100) 2.061 -> 2.054 ( -0.34%) [ +0.15% +0.00% +0.00% / -0.34% +0.00% +0.15%] index_select random : Elapsed 0.021 ms (2.064 ms / 100) 2.057 -> 2.064 ( +0.34%) [ +0.24% +0.00% +0.10% / +0.53% +0.34% +0.34%] index_select random_sorted : Elapsed 0.021 ms (2.062 ms / 100) 2.047 -> 2.053 ( +0.29%) [ +0.00% +0.20% +0.29% / +0.29% +0.59% +0.54%] index_select perm : Elapsed 0.020 ms (2.047 ms / 100) 2.069 -> 2.076 ( +0.34%) [ +0.53% +0.10% +0.00% / +0.34% +0.53% +0.53%] index_select perm_sorted : Elapsed 0.021 ms (2.080 ms / 100) B = [5, 16, 40, 4] (stride (160, 800, 1, 40)) A = [5, 16, 40, 20] (stride (800, 4000, 20, 1)) dim = 3 2.153 -> 2.147 ( -0.28%) [ +0.19% +0.05% +0.00% / +0.14% -0.09% -0.28%] index_select const : Elapsed 0.022 ms (2.157 ms / 100) 2.146 -> 2.137 ( -0.42%) [ +0.00% +0.14% +0.05% / +0.09% -0.28% -0.42%] index_select wrap : Elapsed 0.021 ms (2.146 ms / 100) 2.144 -> 2.137 ( -0.33%) [ +0.09% +0.23% +0.00% / +0.19% -0.33% -0.33%] index_select linear : Elapsed 0.021 ms (2.146 ms / 100) 2.158 -> 2.147 ( -0.51%) [ +0.19% +0.05% +0.00% / +0.19% -0.32% -0.51%] index_select reverse : Elapsed 0.022 ms (2.162 ms / 100) 2.156 -> 2.150 ( -0.28%) [ +0.00% +0.00% +0.00% / +0.09% -0.23% -0.28%] index_select skip64 : Elapsed 0.022 ms (2.156 ms / 100) 2.151 -> 2.137 ( -0.65%) [ +0.00% +0.09% +0.00% / -0.14% -0.65% -0.65%] index_select skip256 : Elapsed 0.022 ms (2.151 ms / 100) 2.197 -> 2.187 ( -0.46%) [ +0.23% +0.14% +0.00% / +0.41% -0.46% -0.41%] index_select spread : Elapsed 0.022 ms (2.202 ms / 100) 2.187 -> 2.180 ( -0.32%) [ +0.37% +0.23% +0.00% / +0.09% -0.32% -0.14%] index_select strided 3 : Elapsed 0.022 ms (2.195 ms / 100) 2.207 -> 2.191 ( -0.72%) [ +0.09% +0.36% +0.00% / +0.41% -0.59% -0.72%] index_select strided 5 : Elapsed 0.022 ms (2.209 ms / 100) 2.189 -> 2.184 ( -0.23%) [ +0.55% +0.00% +0.37% / +0.41% -0.14% -0.23%] index_select strided 7 : Elapsed 0.022 ms (2.201 ms / 100) 2.190 -> 2.184 ( -0.27%) [ +0.14% +0.37% +0.00% / +0.18% -0.27% -0.23%] index_select strided 8 : Elapsed 0.022 ms (2.193 ms / 100) 2.202 -> 2.190 ( -0.54%) [ +0.05% +0.00% +0.00% / +0.00% -0.54% -0.54%] index_select strided 16 : Elapsed 0.022 ms (2.203 ms / 100) 2.189 -> 2.171 ( -0.82%) [ +0.00% +0.09% +0.00% / -0.32% -0.73% -0.82%] index_select random : Elapsed 0.022 ms (2.189 ms / 100) 2.207 -> 2.199 ( -0.36%) [ +0.14% +0.09% +0.00% / +0.14% -0.36% -0.36%] index_select random_sorted : Elapsed 0.022 ms (2.210 ms / 100) 2.192 -> 2.183 ( -0.41%) [ +0.05% +0.32% +0.00% / +0.36% -0.41% -0.14%] index_select perm : Elapsed 0.022 ms (2.193 ms / 100) 2.184 -> 2.174 ( -0.46%) [ +0.00% +0.18% +0.05% / +0.23% -0.46% -0.41%] index_select perm_sorted : Elapsed 0.022 ms (2.184 ms / 100) B = [5, 16, 40, 4] (stride (160, 800, 1, 40)) A = [5, 16, 40, 20] (stride (20, 4000, 100, 1)) dim = 3 2.100 -> 2.100 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.00% +0.52% +0.71%] index_select const : Elapsed 0.021 ms (2.100 ms / 100) 2.102 -> 2.104 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.71% +0.57%] index_select wrap : Elapsed 0.021 ms (2.103 ms / 100) 2.098 -> 2.102 ( +0.19%) [ +0.14% +0.00% +0.29% / +0.19% +1.05% +0.86%] index_select linear : Elapsed 0.021 ms (2.101 ms / 100) 2.102 -> 2.101 ( -0.05%) [ +0.19% +0.05% +0.00% / -0.05% +0.57% +0.81%] index_select reverse : Elapsed 0.021 ms (2.106 ms / 100) 2.100 -> 2.103 ( +0.14%) [ +0.24% +0.00% +0.05% / +0.14% +0.86% +0.95%] index_select skip64 : Elapsed 0.021 ms (2.105 ms / 100) 2.102 -> 2.104 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +0.71% +0.57%] index_select skip256 : Elapsed 0.021 ms (2.104 ms / 100) 2.170 -> 2.175 ( +0.23%) [ +0.00% +0.18% +0.00% / +0.23% +0.69% +0.92%] index_select spread : Elapsed 0.022 ms (2.170 ms / 100) 2.143 -> 2.144 ( +0.05%) [ +0.05% +0.00% +0.09% / +0.05% +0.70% +0.65%] index_select strided 3 : Elapsed 0.021 ms (2.144 ms / 100) 2.165 -> 2.173 ( +0.37%) [ +0.28% +0.14% +0.00% / +0.37% +0.92% +0.79%] index_select strided 5 : Elapsed 0.022 ms (2.171 ms / 100) 2.145 -> 2.151 ( +0.28%) [ +0.09% +0.00% +0.47% / +0.28% +0.84% +1.21%] index_select strided 7 : Elapsed 0.021 ms (2.147 ms / 100) 2.146 -> 2.149 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.51% +0.75%] index_select strided 8 : Elapsed 0.021 ms (2.146 ms / 100) 2.171 -> 2.173 ( +0.09%) [ +0.00% +0.05% +0.09% / +0.09% +0.69% +0.74%] index_select strided 16 : Elapsed 0.022 ms (2.171 ms / 100) 2.161 -> 2.174 ( +0.60%) [ +0.14% +0.00% +0.00% / +0.60% +0.93% +0.88%] index_select random : Elapsed 0.022 ms (2.164 ms / 100) 2.170 -> 2.175 ( +0.23%) [ +0.00% +0.09% +0.37% / +0.23% +0.83% +0.83%] index_select random_sorted : Elapsed 0.022 ms (2.170 ms / 100) 2.169 -> 2.176 ( +0.32%) [ +0.00% +0.18% +0.32% / +0.32% +0.97% +0.83%] index_select perm : Elapsed 0.022 ms (2.169 ms / 100) 2.170 -> 2.168 ( -0.09%) [ +0.05% +0.23% +0.00% / -0.09% +0.51% +0.78%] index_select perm_sorted : Elapsed 0.022 ms (2.171 ms / 100) B = [5, 16, 40, 4] (stride (4, 800, 20, 1)) A = [5, 16, 40, 20] (stride (12800, 1, 16, 640)) dim = 3 1.987 -> 1.991 ( +0.20%) [ +0.20% +0.35% +0.00% / +0.25% +0.20% +0.50%] index_select const : Elapsed 0.020 ms (1.991 ms / 100) 1.981 -> 1.984 ( +0.15%) [ +0.15% +0.00% +0.10% / +0.15% +0.76% +0.76%] index_select wrap : Elapsed 0.020 ms (1.984 ms / 100) 1.982 -> 1.987 ( +0.25%) [ +0.05% +0.05% +0.00% / +0.25% +0.76% +0.61%] index_select linear : Elapsed 0.020 ms (1.983 ms / 100) 1.990 -> 1.989 ( -0.05%) [ +0.10% +0.00% +0.05% / -0.05% +0.75% +0.65%] index_select reverse : Elapsed 0.020 ms (1.992 ms / 100) 1.991 -> 1.992 ( +0.05%) [ +0.00% +0.00% +0.25% / +0.05% +0.45% +0.25%] index_select skip64 : Elapsed 0.020 ms (1.991 ms / 100) 1.979 -> 1.983 ( +0.20%) [ +0.00% +0.10% +0.05% / +0.20% +0.25% +0.35%] index_select skip256 : Elapsed 0.020 ms (1.979 ms / 100) 1.985 -> 1.984 ( -0.05%) [ +0.15% +0.15% +0.00% / -0.05% +0.40% +0.55%] index_select spread : Elapsed 0.020 ms (1.988 ms / 100) 1.989 -> 1.994 ( +0.25%) [ +0.45% +0.20% +0.00% / +0.25% +0.50% +0.50%] index_select strided 3 : Elapsed 0.020 ms (1.998 ms / 100) 1.991 -> 1.992 ( +0.05%) [ +0.00% +0.15% +0.05% / +0.05% +0.20% +0.25%] index_select strided 5 : Elapsed 0.020 ms (1.991 ms / 100) 1.993 -> 1.996 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.25% +0.25% +0.15%] index_select strided 7 : Elapsed 0.020 ms (1.996 ms / 100) 1.997 -> 1.993 ( -0.20%) [ +0.15% +0.00% +0.05% / -0.20% +0.00% +0.05%] index_select strided 8 : Elapsed 0.020 ms (2.000 ms / 100) 1.988 -> 1.992 ( +0.20%) [ +0.30% +0.30% +0.00% / +0.30% +0.35% +0.20%] index_select strided 16 : Elapsed 0.020 ms (1.994 ms / 100) 1.980 -> 1.987 ( +0.35%) [ +0.20% +0.15% +0.00% / +0.35% +0.76% +0.76%] index_select random : Elapsed 0.020 ms (1.984 ms / 100) 1.985 -> 1.986 ( +0.05%) [ +0.15% +0.25% +0.00% / +0.05% +0.91% +1.01%] index_select random_sorted : Elapsed 0.020 ms (1.988 ms / 100) 1.994 -> 1.998 ( +0.20%) [ +0.15% +0.00% +0.05% / +0.25% +0.20% +0.20%] index_select perm : Elapsed 0.020 ms (1.997 ms / 100) 1.989 -> 1.986 ( -0.15%) [ +0.00% +0.05% +0.00% / -0.15% -0.05% +0.10%] index_select perm_sorted : Elapsed 0.020 ms (1.989 ms / 100) B = [5, 16, 40, 4] (stride (4, 800, 20, 1)) A = [5, 16, 40, 20] (stride (16, 1, 1600, 80)) dim = 3 0.788 -> 0.798 ( +1.27%) [ +0.51% +0.00% +0.25% / +1.27% +2.92% +2.92%] index_select const : Elapsed 0.008 ms (0.792 ms / 100) 0.792 -> 0.798 ( +0.76%) [ +0.25% +0.00% +0.38% / +0.88% +1.14% +0.76%] index_select wrap : Elapsed 0.008 ms (0.794 ms / 100) 0.794 -> 0.795 ( +0.13%) [ +0.25% +0.00% +0.50% / +0.13% +0.88% +0.38%] index_select linear : Elapsed 0.008 ms (0.796 ms / 100) 0.795 -> 0.798 ( +0.38%) [ +0.00% +0.25% +0.00% / +0.50% +0.38% +0.50%] index_select reverse : Elapsed 0.008 ms (0.795 ms / 100) 0.796 -> 0.796 ( +0.00%) [ +0.38% +0.13% +0.00% / +0.00% +0.13% +0.25%] index_select skip64 : Elapsed 0.008 ms (0.799 ms / 100) 0.789 -> 0.794 ( +0.63%) [ +0.38% +0.63% +0.00% / +0.63% +2.28% +2.53%] index_select skip256 : Elapsed 0.008 ms (0.792 ms / 100) 0.793 -> 0.794 ( +0.13%) [ +0.00% +0.76% +0.63% / +0.13% +2.40% +2.65%] index_select spread : Elapsed 0.008 ms (0.793 ms / 100) 0.816 -> 0.804 ( -1.47%) [ +0.25% +0.49% +0.00% / +0.25% -1.10% -1.47%] index_select strided 3 : Elapsed 0.008 ms (0.818 ms / 100) 0.791 -> 0.798 ( +0.88%) [ +0.88% +0.38% +0.00% / +0.88% +1.90% +1.90%] index_select strided 5 : Elapsed 0.008 ms (0.798 ms / 100) 0.808 -> 0.797 ( -1.36%) [ +0.00% +0.12% +0.25% / +0.25% -1.36% -0.74%] index_select strided 7 : Elapsed 0.008 ms (0.808 ms / 100) 0.818 -> 0.797 ( -2.57%) [ +0.61% +0.00% +0.00% / +0.00% -2.32% -2.57%] index_select strided 8 : Elapsed 0.008 ms (0.823 ms / 100) 0.801 -> 0.798 ( -0.37%) [ +0.25% +0.50% +0.00% / +0.37% -0.37% +0.12%] index_select strided 16 : Elapsed 0.008 ms (0.803 ms / 100) 0.797 -> 0.797 ( +0.00%) [ +0.13% +0.00% +0.13% / +0.00% +2.89% +2.38%] index_select random : Elapsed 0.008 ms (0.798 ms / 100) 0.792 -> 0.795 ( +0.38%) [ +0.00% +0.63% +0.63% / +0.38% +1.01% +1.52%] index_select random_sorted : Elapsed 0.008 ms (0.792 ms / 100) 0.803 -> 0.799 ( -0.50%) [ +0.00% +0.12% +0.00% / -0.50% -0.50% -0.12%] index_select perm : Elapsed 0.008 ms (0.803 ms / 100) 0.798 -> 0.800 ( +0.25%) [ +0.00% +0.13% +0.25% / +0.63% +0.25% +0.25%] index_select perm_sorted : Elapsed 0.008 ms (0.798 ms / 100) B = [5, 16, 40, 4] (stride (64, 1, 320, 16)) A = [5, 16, 40, 20] (stride (12800, 1, 16, 640)) dim = 3 2.088 -> 2.090 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.38% +0.57%] index_select const : Elapsed 0.021 ms (2.090 ms / 100) 2.078 -> 2.084 ( +0.29%) [ +0.00% +0.10% +0.05% / +0.29% +0.63% +0.77%] index_select wrap : Elapsed 0.021 ms (2.078 ms / 100) 2.091 -> 2.094 ( +0.14%) [ +0.10% +0.00% +0.05% / +0.14% +0.33% +0.53%] index_select linear : Elapsed 0.021 ms (2.093 ms / 100) 2.095 -> 2.098 ( +0.14%) [ +0.24% +0.05% +0.00% / +0.14% +0.62% +0.91%] index_select reverse : Elapsed 0.021 ms (2.100 ms / 100) 2.089 -> 2.094 ( +0.24%) [ +0.00% +0.19% +0.10% / +0.24% +0.53% +0.29%] index_select skip64 : Elapsed 0.021 ms (2.089 ms / 100) 2.089 -> 2.089 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.00% +0.43% +0.24%] index_select skip256 : Elapsed 0.021 ms (2.090 ms / 100) 2.083 -> 2.080 ( -0.14%) [ +0.19% +0.05% +0.00% / -0.14% +0.43% +0.29%] index_select spread : Elapsed 0.021 ms (2.087 ms / 100) 2.093 -> 2.094 ( +0.05%) [ +0.14% +0.14% +0.00% / +0.05% +0.38% +0.29%] index_select strided 3 : Elapsed 0.021 ms (2.096 ms / 100) 2.095 -> 2.095 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.29% +0.38%] index_select strided 5 : Elapsed 0.021 ms (2.096 ms / 100) 2.096 -> 2.097 ( +0.05%) [ +0.33% +0.19% +0.00% / +0.33% +0.05% +0.10%] index_select strided 7 : Elapsed 0.021 ms (2.103 ms / 100) 2.094 -> 2.095 ( +0.05%) [ +0.10% +0.00% +0.05% / +0.14% +0.19% +0.05%] index_select strided 8 : Elapsed 0.021 ms (2.096 ms / 100) 2.087 -> 2.088 ( +0.05%) [ +0.00% +0.00% +0.10% / +0.10% +0.10% +0.05%] index_select strided 16 : Elapsed 0.021 ms (2.087 ms / 100) 2.091 -> 2.092 ( +0.05%) [ +0.19% +0.00% +0.00% / +0.05% +0.24% +0.24%] index_select random : Elapsed 0.021 ms (2.095 ms / 100) 2.091 -> 2.094 ( +0.14%) [ +0.10% +0.00% +0.05% / +0.14% +0.48% +0.53%] index_select random_sorted : Elapsed 0.021 ms (2.093 ms / 100) 2.097 -> 2.096 ( -0.05%) [ +0.05% +0.05% +0.00% / +0.00% +0.00% -0.05%] index_select perm : Elapsed 0.021 ms (2.098 ms / 100) 2.094 -> 2.095 ( +0.05%) [ +0.14% +0.00% +0.00% / +0.05% +0.05% +0.14%] index_select perm_sorted : Elapsed 0.021 ms (2.097 ms / 100) B = [5, 16, 40, 4] (stride (64, 1, 320, 16)) A = [5, 16, 40, 20] (stride (1, 5, 80, 3200)) dim = 3 2.180 -> 2.181 ( +0.05%) [ +0.00% +0.05% +0.14% / +0.05% +0.46% +0.37%] index_select const : Elapsed 0.022 ms (2.180 ms / 100) 2.178 -> 2.186 ( +0.37%) [ +0.09% +0.14% +0.00% / +0.37% +0.69% +0.69%] index_select wrap : Elapsed 0.022 ms (2.180 ms / 100) 2.177 -> 2.175 ( -0.09%) [ +0.00% +0.05% +0.00% / -0.09% +0.64% +0.51%] index_select linear : Elapsed 0.022 ms (2.177 ms / 100) 2.170 -> 2.174 ( +0.18%) [ +0.37% +0.00% +0.00% / +0.18% +0.65% +0.55%] index_select reverse : Elapsed 0.022 ms (2.178 ms / 100) 2.179 -> 2.184 ( +0.23%) [ +0.05% +0.14% +0.00% / +0.23% +0.73% +0.55%] index_select skip64 : Elapsed 0.022 ms (2.180 ms / 100) 2.179 -> 2.179 ( +0.00%) [ +0.28% +0.00% +0.09% / +0.00% +0.46% +0.87%] index_select skip256 : Elapsed 0.022 ms (2.185 ms / 100) 2.172 -> 2.176 ( +0.18%) [ +0.18% +0.09% +0.00% / +0.18% +0.41% +0.74%] index_select spread : Elapsed 0.022 ms (2.176 ms / 100) 2.174 -> 2.174 ( +0.00%) [ +0.18% +0.00% +0.14% / +0.00% +0.41% +0.55%] index_select strided 3 : Elapsed 0.022 ms (2.178 ms / 100) 2.174 -> 2.174 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.55% +0.60%] index_select strided 5 : Elapsed 0.022 ms (2.176 ms / 100) 2.167 -> 2.169 ( +0.09%) [ +0.23% +0.00% +0.09% / +0.09% +0.78% +0.51%] index_select strided 7 : Elapsed 0.022 ms (2.172 ms / 100) 2.172 -> 2.171 ( -0.05%) [ +0.18% +0.28% +0.00% / -0.05% +0.60% +0.55%] index_select strided 8 : Elapsed 0.022 ms (2.176 ms / 100) 2.173 -> 2.175 ( +0.09%) [ +0.09% +0.00% +0.05% / +0.09% +0.55% +0.46%] index_select strided 16 : Elapsed 0.022 ms (2.175 ms / 100) 2.179 -> 2.181 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.41% +0.41%] index_select random : Elapsed 0.022 ms (2.179 ms / 100) 2.180 -> 2.180 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.37% +0.32%] index_select random_sorted : Elapsed 0.022 ms (2.180 ms / 100) 2.181 -> 2.179 ( -0.09%) [ +0.00% +0.09% +0.00% / -0.09% +0.46% +0.41%] index_select perm : Elapsed 0.022 ms (2.181 ms / 100) 2.177 -> 2.182 ( +0.23%) [ +0.05% +0.18% +0.00% / +0.23% +0.64% +0.69%] index_select perm_sorted : Elapsed 0.022 ms (2.178 ms / 100) B = [5, 16, 40, 4] (stride (4, 20, 320, 1)) A = [5, 16, 40, 20] (stride (16, 1, 80, 3200)) dim = 3 2.090 -> 2.090 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.00% +0.29% +0.48%] index_select const : Elapsed 0.021 ms (2.091 ms / 100) 2.095 -> 2.095 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.33% +0.38%] index_select wrap : Elapsed 0.021 ms (2.095 ms / 100) 2.091 -> 2.095 ( +0.19%) [ +0.10% +0.14% +0.00% / +0.19% +0.62% +0.38%] index_select linear : Elapsed 0.021 ms (2.093 ms / 100) 2.097 -> 2.096 ( -0.05%) [ +0.24% +0.24% +0.00% / +0.05% +0.14% -0.05%] index_select reverse : Elapsed 0.021 ms (2.102 ms / 100) 2.091 -> 2.090 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.19% +0.29%] index_select skip64 : Elapsed 0.021 ms (2.092 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.05% +0.34% +0.24%] index_select skip256 : Elapsed 0.021 ms (2.090 ms / 100) 2.089 -> 2.089 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.14% +0.14%] index_select spread : Elapsed 0.021 ms (2.091 ms / 100) 2.099 -> 2.099 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.14% +0.19%] index_select strided 3 : Elapsed 0.021 ms (2.100 ms / 100) 2.091 -> 2.090 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.05% +0.05%] index_select strided 5 : Elapsed 0.021 ms (2.092 ms / 100) 2.091 -> 2.093 ( +0.10%) [ +0.14% +0.05% +0.00% / +0.10% +0.19% +0.19%] index_select strided 7 : Elapsed 0.021 ms (2.094 ms / 100) 2.092 -> 2.096 ( +0.19%) [ +0.33% +0.05% +0.00% / +0.19% +0.38% +0.53%] index_select strided 8 : Elapsed 0.021 ms (2.099 ms / 100) 2.091 -> 2.090 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.10% +0.10%] index_select strided 16 : Elapsed 0.021 ms (2.093 ms / 100) 2.094 -> 2.095 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.29% +0.43%] index_select random : Elapsed 0.021 ms (2.095 ms / 100) 2.096 -> 2.096 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.33% +0.19%] index_select random_sorted : Elapsed 0.021 ms (2.097 ms / 100) 2.090 -> 2.092 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.29% +0.19%] index_select perm : Elapsed 0.021 ms (2.091 ms / 100) 2.089 -> 2.089 ( +0.00%) [ +0.14% +0.05% +0.00% / +0.00% +0.38% +0.34%] index_select perm_sorted : Elapsed 0.021 ms (2.092 ms / 100) B = [5, 16, 40, 4] (stride (1, 5, 320, 80)) A = [5, 16, 40, 20] (stride (16, 1, 80, 3200)) dim = 3 2.082 -> 2.080 ( -0.10%) [ +0.14% +0.05% +0.00% / +0.24% -0.10% +0.00%] index_select const : Elapsed 0.021 ms (2.085 ms / 100) 2.089 -> 2.083 ( -0.29%) [ +0.14% +0.00% +0.14% / -0.10% -0.05% -0.29%] index_select wrap : Elapsed 0.021 ms (2.092 ms / 100) 2.091 -> 2.090 ( -0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.00% -0.05%] index_select linear : Elapsed 0.021 ms (2.091 ms / 100) 2.082 -> 2.083 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.14% +0.10%] index_select reverse : Elapsed 0.021 ms (2.084 ms / 100) 2.082 -> 2.079 ( -0.14%) [ +0.00% +0.00% +0.00% / +0.24% +0.00% -0.14%] index_select skip64 : Elapsed 0.021 ms (2.082 ms / 100) 2.084 -> 2.079 ( -0.24%) [ +0.14% +0.00% +0.00% / -0.24% -0.10% -0.19%] index_select skip256 : Elapsed 0.021 ms (2.087 ms / 100) 2.086 -> 2.089 ( +0.14%) [ +0.24% +0.14% +0.00% / +0.14% +0.24% +0.24%] index_select spread : Elapsed 0.021 ms (2.091 ms / 100) 2.090 -> 2.088 ( -0.10%) [ +0.00% +0.05% +0.00% / +0.05% -0.10% +0.00%] index_select strided 3 : Elapsed 0.021 ms (2.090 ms / 100) 2.084 -> 2.086 ( +0.10%) [ +0.19% +0.14% +0.00% / +0.10% +0.14% +0.19%] index_select strided 5 : Elapsed 0.021 ms (2.088 ms / 100) 2.088 -> 2.090 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.14% +0.24% +0.10%] index_select strided 7 : Elapsed 0.021 ms (2.089 ms / 100) 2.082 -> 2.087 ( +0.24%) [ +0.00% +0.14% +0.05% / +0.29% +0.24% +0.29%] index_select strided 8 : Elapsed 0.021 ms (2.082 ms / 100) 2.084 -> 2.084 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.14% +0.00% +0.14%] index_select strided 16 : Elapsed 0.021 ms (2.086 ms / 100) 2.092 -> 2.091 ( -0.05%) [ +0.00% +0.10% +0.14% / -0.05% +0.10% +0.00%] index_select random : Elapsed 0.021 ms (2.092 ms / 100) 2.090 -> 2.086 ( -0.19%) [ +0.00% +0.05% +0.00% / +0.00% -0.19% +0.05%] index_select random_sorted : Elapsed 0.021 ms (2.090 ms / 100) 2.089 -> 2.089 ( +0.00%) [ +0.14% +0.00% +0.10% / +0.00% +0.05% +0.14%] index_select perm : Elapsed 0.021 ms (2.092 ms / 100) 2.090 -> 2.089 ( -0.05%) [ +0.00% +0.10% +0.00% / -0.05% +0.10% +0.05%] index_select perm_sorted : Elapsed 0.021 ms (2.090 ms / 100) B = [5, 16, 40, 4] (stride (1, 5, 80, 3200)) dim = 3 fill_cnt = 20 3.702 -> 3.689 ( -0.35%) [ +0.14% +0.38% +0.00% / -0.35% -0.16% -0.19%] index_fill_ const : Elapsed 0.037 ms (3.707 ms / 100) 3.722 -> 3.706 ( -0.43%) [ +0.11% +0.00% +0.03% / -0.43% -0.32% -0.40%] index_fill_ linear : Elapsed 0.037 ms (3.726 ms / 100) 3.711 -> 3.696 ( -0.40%) [ +0.19% +0.00% +0.11% / -0.40% -0.27% -0.16%] index_fill_ reverse : Elapsed 0.037 ms (3.718 ms / 100) 3.712 -> 3.689 ( -0.62%) [ +0.00% +0.22% +0.13% / -0.62% -0.43% -0.48%] index_fill_ skip64 : Elapsed 0.037 ms (3.712 ms / 100) 3.706 -> 3.691 ( -0.40%) [ +0.24% +0.00% +0.16% / -0.40% -0.32% -0.35%] index_fill_ skip256 : Elapsed 0.037 ms (3.715 ms / 100) 3.714 -> 3.691 ( -0.62%) [ +0.05% +0.00% +0.00% / -0.62% -0.40% -0.43%] index_fill_ spread : Elapsed 0.037 ms (3.716 ms / 100) 3.714 -> 3.700 ( -0.38%) [ +0.16% +0.00% +0.27% / -0.38% -0.19% -0.32%] index_fill_ strided 3 : Elapsed 0.037 ms (3.720 ms / 100) 3.719 -> 3.707 ( -0.32%) [ +0.00% +0.08% +0.13% / -0.22% -0.32% -0.11%] index_fill_ random : Elapsed 0.037 ms (3.719 ms / 100) 3.720 -> 3.706 ( -0.38%) [ +0.00% +0.13% +0.05% / -0.38% -0.30% -0.30%] index_fill_ random_sorted : Elapsed 0.037 ms (3.720 ms / 100) B = [5, 16, 40, 4] (stride (1, 5, 80, 3200)) A = [5, 16, 40, 20] (stride (16, 1, 80, 3200)) dim = 3 2.074 -> 2.076 ( +0.10%) [ +0.00% +0.10% +0.14% / +0.10% +0.77% +0.82%] index_select const : Elapsed 0.021 ms (2.074 ms / 100) 2.084 -> 2.088 ( +0.19%) [ +0.05% +0.05% +0.00% / +0.19% +0.43% +0.48%] index_select wrap : Elapsed 0.021 ms (2.085 ms / 100) 2.084 -> 2.081 ( -0.14%) [ +0.19% +0.00% +0.05% / -0.14% +0.34% +0.43%] index_select linear : Elapsed 0.021 ms (2.088 ms / 100) 2.088 -> 2.087 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.19% +0.34%] index_select reverse : Elapsed 0.021 ms (2.089 ms / 100) 2.075 -> 2.077 ( +0.10%) [ +0.19% +0.00% +0.00% / +0.10% +0.67% +0.82%] index_select skip64 : Elapsed 0.021 ms (2.079 ms / 100) 2.072 -> 2.074 ( +0.10%) [ +0.00% +0.05% +0.10% / +0.10% +0.68% +0.58%] index_select skip256 : Elapsed 0.021 ms (2.072 ms / 100) 2.075 -> 2.077 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.72% +0.53%] index_select spread : Elapsed 0.021 ms (2.077 ms / 100) 2.090 -> 2.089 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.38% +0.10%] index_select strided 3 : Elapsed 0.021 ms (2.090 ms / 100) 2.074 -> 2.075 ( +0.05%) [ +0.00% +0.05% +0.24% / +0.05% +0.48% +0.58%] index_select strided 5 : Elapsed 0.021 ms (2.074 ms / 100) 2.081 -> 2.081 ( +0.00%) [ +0.05% +0.14% +0.00% / +0.00% +0.48% +0.43%] index_select strided 7 : Elapsed 0.021 ms (2.082 ms / 100) 2.088 -> 2.089 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.29% +0.14%] index_select strided 8 : Elapsed 0.021 ms (2.090 ms / 100) 2.080 -> 2.079 ( -0.05%) [ +0.00% +0.14% +0.00% / -0.05% +0.29% +0.58%] index_select strided 16 : Elapsed 0.021 ms (2.080 ms / 100) 2.087 -> 2.090 ( +0.14%) [ +0.24% +0.10% +0.00% / +0.14% +0.14% +0.19%] index_select random : Elapsed 0.021 ms (2.092 ms / 100) 2.091 -> 2.091 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.24% +0.33%] index_select random_sorted : Elapsed 0.021 ms (2.093 ms / 100) 2.085 -> 2.084 ( -0.05%) [ +0.34% +0.00% +0.14% / -0.05% +0.34% +0.24%] index_select perm : Elapsed 0.021 ms (2.092 ms / 100) 2.088 -> 2.089 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.19% +0.19%] index_select perm_sorted : Elapsed 0.021 ms (2.090 ms / 100) out_shape = [4, 20, 16, 40] in_shape = [5, 20, 16, 40] idx_dim = 0 B = [4, 20, 16, 40] (stride (12800, 16, 1, 320)) A = [5, 20, 16, 40] (stride (640, 3200, 40, 1)) dim = 0 3.300 -> 3.303 ( +0.09%) [ +0.00% +0.39% +0.09% / +0.09% +0.33% +0.09%] index_select const : Elapsed 0.033 ms (3.300 ms / 100) 3.363 -> 3.361 ( -0.06%) [ +0.06% +0.00% +0.03% / -0.06% +0.03% -0.06%] index_select wrap : Elapsed 0.034 ms (3.365 ms / 100) 3.361 -> 3.358 ( -0.09%) [ +0.09% +0.06% +0.00% / -0.09% +0.24% +0.27%] index_select linear : Elapsed 0.034 ms (3.364 ms / 100) 3.366 -> 3.357 ( -0.27%) [ +0.00% +0.15% +0.00% / +0.18% -0.27% -0.21%] index_select reverse : Elapsed 0.034 ms (3.366 ms / 100) 3.304 -> 3.306 ( +0.06%) [ +0.00% +0.18% +0.06% / +0.30% +0.06% +0.12%] index_select skip64 : Elapsed 0.033 ms (3.304 ms / 100) 3.296 -> 3.306 ( +0.30%) [ +0.00% +0.21% +0.21% / +0.30% +0.46% +0.30%] index_select skip256 : Elapsed 0.033 ms (3.296 ms / 100) 3.366 -> 3.367 ( +0.03%) [ +0.00% +0.06% +0.00% / +0.15% +0.06% +0.03%] index_select spread : Elapsed 0.034 ms (3.366 ms / 100) 3.356 -> 3.362 ( +0.18%) [ +0.18% +0.27% +0.00% / +0.18% +0.42% +0.60%] index_select strided 3 : Elapsed 0.034 ms (3.362 ms / 100) 3.313 -> 3.316 ( +0.09%) [ +0.09% +0.00% +0.12% / +0.09% +0.36% +0.36%] index_select random : Elapsed 0.033 ms (3.316 ms / 100) 3.321 -> 3.324 ( +0.09%) [ +0.00% +0.00% +0.06% / +0.09% +0.27% +0.27%] index_select random_sorted : Elapsed 0.033 ms (3.321 ms / 100) 3.359 -> 3.362 ( +0.09%) [ +0.03% +0.27% +0.00% / +0.09% +0.45% +0.51%] index_select perm : Elapsed 0.034 ms (3.360 ms / 100) 3.365 -> 3.366 ( +0.03%) [ +0.12% +0.00% +0.12% / +0.03% +0.33% +0.27%] index_select perm_sorted : Elapsed 0.034 ms (3.369 ms / 100) B = [4, 20, 16, 40] (stride (800, 1, 3200, 20)) A = [5, 20, 16, 40] (stride (1, 80, 5, 1600)) dim = 0 5.753 -> 5.762 ( +0.16%) [ +0.03% +0.00% +0.05% / +0.16% +0.33% +0.31%] index_select const : Elapsed 0.058 ms (5.755 ms / 100) 5.749 -> 5.758 ( +0.16%) [ +0.07% +0.12% +0.00% / +0.16% +0.47% +0.49%] index_select wrap : Elapsed 0.058 ms (5.753 ms / 100) 5.750 -> 5.762 ( +0.21%) [ +0.17% +0.00% +0.14% / +0.21% +0.35% +0.35%] index_select linear : Elapsed 0.058 ms (5.760 ms / 100) 5.754 -> 5.760 ( +0.10%) [ +0.03% +0.00% +0.10% / +0.10% +0.24% +0.28%] index_select reverse : Elapsed 0.058 ms (5.756 ms / 100) 5.750 -> 5.759 ( +0.16%) [ +0.21% +0.00% +0.12% / +0.16% +0.43% +0.38%] index_select skip64 : Elapsed 0.058 ms (5.762 ms / 100) 5.751 -> 5.758 ( +0.12%) [ +0.09% +0.00% +0.12% / +0.12% +0.37% +0.42%] index_select skip256 : Elapsed 0.058 ms (5.756 ms / 100) 5.748 -> 5.758 ( +0.17%) [ +0.03% +0.00% +0.19% / +0.17% +0.42% +0.59%] index_select spread : Elapsed 0.057 ms (5.750 ms / 100) 5.755 -> 5.756 ( +0.02%) [ +0.10% +0.00% +0.00% / +0.02% +0.30% +0.21%] index_select strided 3 : Elapsed 0.058 ms (5.761 ms / 100) 5.751 -> 5.762 ( +0.19%) [ +0.00% +0.07% +0.09% / +0.19% +0.45% +0.43%] index_select random : Elapsed 0.058 ms (5.751 ms / 100) 5.754 -> 5.759 ( +0.09%) [ +0.03% +0.00% +0.07% / +0.09% +0.40% +0.40%] index_select random_sorted : Elapsed 0.058 ms (5.756 ms / 100) 5.747 -> 5.764 ( +0.30%) [ +0.00% +0.16% +0.16% / +0.30% +0.42% +0.37%] index_select perm : Elapsed 0.057 ms (5.747 ms / 100) 5.753 -> 5.753 ( +0.00%) [ +0.00% +0.09% +0.02% / +0.00% +0.35% +0.40%] index_select perm_sorted : Elapsed 0.058 ms (5.753 ms / 100) B = [4, 20, 16, 40] (stride (1, 4, 3200, 80)) A = [5, 20, 16, 40] (stride (40, 200, 4000, 1)) dim = 0 5.529 -> 5.524 ( -0.09%) [ +0.00% +0.09% +0.14% / +0.05% -0.07% -0.09%] index_select const : Elapsed 0.055 ms (5.529 ms / 100) 5.604 -> 5.600 ( -0.07%) [ +0.02% +0.00% +0.02% / +0.07% -0.07% -0.07%] index_select wrap : Elapsed 0.056 ms (5.605 ms / 100) 5.599 -> 5.595 ( -0.07%) [ +0.02% +0.00% +0.11% / +0.11% -0.02% -0.07%] index_select linear : Elapsed 0.056 ms (5.600 ms / 100) 5.588 -> 5.590 ( +0.04%) [ +0.00% +0.05% +0.14% / +0.04% +0.09% +0.11%] index_select reverse : Elapsed 0.056 ms (5.588 ms / 100) 5.511 -> 5.511 ( +0.00%) [ +0.00% +0.02% +0.07% / +0.22% +0.00% +0.00%] index_select skip64 : Elapsed 0.055 ms (5.511 ms / 100) 5.523 -> 5.516 ( -0.13%) [ +0.00% +0.13% +0.14% / +0.22% -0.05% -0.13%] index_select skip256 : Elapsed 0.055 ms (5.523 ms / 100) 5.600 -> 5.596 ( -0.07%) [ +0.13% +0.00% +0.11% / +0.21% -0.07% +0.00%] index_select spread : Elapsed 0.056 ms (5.607 ms / 100) 5.594 -> 5.593 ( -0.02%) [ +0.11% +0.00% +0.04% / +0.16% -0.02% -0.02%] index_select strided 3 : Elapsed 0.056 ms (5.600 ms / 100) 5.564 -> 5.557 ( -0.13%) [ +0.00% +0.04% +0.14% / +0.04% -0.13% +0.04%] index_select random : Elapsed 0.056 ms (5.564 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.00% +0.13% +0.16% / +0.14% +0.13% +0.07%] index_select random_sorted : Elapsed 0.056 ms (5.565 ms / 100) 5.601 -> 5.602 ( +0.02%) [ +0.07% +0.00% +0.04% / +0.14% +0.02% +0.05%] index_select perm : Elapsed 0.056 ms (5.605 ms / 100) 5.586 -> 5.589 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.13% +0.05% +0.11%] index_select perm_sorted : Elapsed 0.056 ms (5.589 ms / 100) B = [4, 20, 16, 40] (stride (320, 1, 20, 1280)) A = [5, 20, 16, 40] (stride (1, 3200, 5, 80)) dim = 0 5.982 -> 5.962 ( -0.33%) [ +0.05% +0.00% +0.12% / +0.03% -0.18% -0.33%] index_select const : Elapsed 0.060 ms (5.985 ms / 100) 5.975 -> 5.963 ( -0.20%) [ +0.07% +0.00% +0.13% / +0.17% -0.20% -0.08%] index_select wrap : Elapsed 0.060 ms (5.979 ms / 100) 5.978 -> 5.962 ( -0.27%) [ +0.00% +0.08% +0.17% / +0.17% -0.18% -0.27%] index_select linear : Elapsed 0.060 ms (5.978 ms / 100) 5.981 -> 5.962 ( -0.32%) [ +0.00% +0.00% +0.07% / +0.10% -0.22% -0.32%] index_select reverse : Elapsed 0.060 ms (5.981 ms / 100) 5.981 -> 5.964 ( -0.28%) [ +0.00% +0.03% +0.02% / +0.02% -0.28% -0.20%] index_select skip64 : Elapsed 0.060 ms (5.981 ms / 100) 5.980 -> 5.964 ( -0.27%) [ +0.02% +0.00% +0.08% / +0.13% -0.20% -0.27%] index_select skip256 : Elapsed 0.060 ms (5.981 ms / 100) 5.976 -> 5.962 ( -0.23%) [ +0.00% +0.05% +0.20% / +0.23% -0.23% -0.10%] index_select spread : Elapsed 0.060 ms (5.976 ms / 100) 5.980 -> 5.968 ( -0.20%) [ +0.07% +0.00% +0.03% / +0.10% -0.20% -0.18%] index_select strided 3 : Elapsed 0.060 ms (5.984 ms / 100) 5.978 -> 5.966 ( -0.20%) [ +0.10% +0.00% +0.23% / +0.07% -0.17% -0.20%] index_select random : Elapsed 0.060 ms (5.984 ms / 100) 5.980 -> 5.965 ( -0.25%) [ +0.07% +0.00% +0.07% / +0.27% -0.23% -0.25%] index_select random_sorted : Elapsed 0.060 ms (5.984 ms / 100) 5.981 -> 5.961 ( -0.33%) [ +0.00% +0.07% +0.18% / +0.10% -0.25% -0.33%] index_select perm : Elapsed 0.060 ms (5.981 ms / 100) 5.978 -> 5.959 ( -0.32%) [ +0.05% +0.00% +0.13% / +0.02% -0.32% -0.13%] index_select perm_sorted : Elapsed 0.060 ms (5.981 ms / 100) B = [4, 20, 16, 40] (stride (320, 1, 20, 1280)) A = [5, 20, 16, 40] (stride (1, 5, 100, 1600)) dim = 0 6.023 -> 6.014 ( -0.15%) [ +0.05% +0.00% +0.00% / +0.10% -0.05% -0.15%] index_select const : Elapsed 0.060 ms (6.026 ms / 100) 6.021 -> 6.014 ( -0.12%) [ +0.03% +0.00% +0.05% / +0.07% +0.07% -0.12%] index_select wrap : Elapsed 0.060 ms (6.023 ms / 100) 6.019 -> 6.013 ( -0.10%) [ +0.00% +0.05% +0.08% / +0.07% -0.10% -0.03%] index_select linear : Elapsed 0.060 ms (6.019 ms / 100) 6.022 -> 6.012 ( -0.17%) [ +0.00% +0.02% +0.10% / +0.07% -0.15% -0.17%] index_select reverse : Elapsed 0.060 ms (6.022 ms / 100) 6.019 -> 6.006 ( -0.22%) [ +0.07% +0.00% +0.08% / +0.13% -0.22% +0.02%] index_select skip64 : Elapsed 0.060 ms (6.023 ms / 100) 6.019 -> 6.011 ( -0.13%) [ +0.00% +0.05% +0.05% / +0.08% -0.13% +0.00%] index_select skip256 : Elapsed 0.060 ms (6.019 ms / 100) 6.020 -> 6.018 ( -0.03%) [ +0.10% +0.00% +0.10% / +0.13% -0.02% -0.03%] index_select spread : Elapsed 0.060 ms (6.026 ms / 100) 6.015 -> 6.011 ( -0.07%) [ +0.15% +0.18% +0.00% / +0.13% +0.12% -0.07%] index_select strided 3 : Elapsed 0.060 ms (6.024 ms / 100) 6.020 -> 6.010 ( -0.17%) [ +0.12% +0.00% +0.12% / +0.02% -0.17% -0.12%] index_select random : Elapsed 0.060 ms (6.027 ms / 100) 6.019 -> 6.010 ( -0.15%) [ +0.03% +0.00% +0.03% / +0.12% -0.15% -0.02%] index_select random_sorted : Elapsed 0.060 ms (6.021 ms / 100) 6.019 -> 6.011 ( -0.13%) [ +0.08% +0.00% +0.03% / +0.03% -0.13% -0.12%] index_select perm : Elapsed 0.060 ms (6.024 ms / 100) 6.016 -> 6.011 ( -0.08%) [ +0.00% +0.03% +0.08% / +0.17% -0.07% -0.08%] index_select perm_sorted : Elapsed 0.060 ms (6.016 ms / 100) out_shape = [5, 4, 16, 40] in_shape = [5, 20, 16, 40] idx_dim = 1 B = [5, 4, 16, 40] (stride (2560, 40, 160, 1)) A = [5, 20, 16, 40] (stride (20, 1, 100, 1600)) dim = 1 2.216 -> 2.219 ( +0.14%) [ +0.09% +0.09% +0.00% / +0.14% +0.86% +0.81%] index_select const : Elapsed 0.022 ms (2.218 ms / 100) 2.220 -> 2.224 ( +0.18%) [ +0.00% +0.05% +0.00% / +0.18% +0.59% +0.59%] index_select wrap : Elapsed 0.022 ms (2.220 ms / 100) 2.216 -> 2.218 ( +0.09%) [ +0.00% +0.14% +0.23% / +0.09% +0.95% +0.99%] index_select linear : Elapsed 0.022 ms (2.216 ms / 100) 2.217 -> 2.217 ( +0.00%) [ +0.00% +0.14% +0.05% / +0.00% +0.72% +0.99%] index_select reverse : Elapsed 0.022 ms (2.217 ms / 100) 2.218 -> 2.216 ( -0.09%) [ +0.09% +0.32% +0.00% / -0.09% +0.86% +0.95%] index_select skip64 : Elapsed 0.022 ms (2.220 ms / 100) 2.219 -> 2.216 ( -0.14%) [ +0.00% +0.09% +0.05% / -0.14% +0.81% +0.63%] index_select skip256 : Elapsed 0.022 ms (2.219 ms / 100) 2.285 -> 2.294 ( +0.39%) [ +0.09% +0.00% +0.18% / +0.39% +0.79% +0.92%] index_select spread : Elapsed 0.023 ms (2.287 ms / 100) 2.262 -> 2.272 ( +0.44%) [ +0.00% +0.00% +0.18% / +0.44% +0.93% +0.88%] index_select strided 3 : Elapsed 0.023 ms (2.262 ms / 100) 2.288 -> 2.295 ( +0.31%) [ +0.04% +0.00% +0.22% / +0.31% +0.61% +0.87%] index_select strided 5 : Elapsed 0.023 ms (2.289 ms / 100) 2.275 -> 2.276 ( +0.04%) [ +0.04% +0.22% +0.00% / +0.04% +0.70% +0.57%] index_select strided 7 : Elapsed 0.023 ms (2.276 ms / 100) 2.271 -> 2.275 ( +0.18%) [ +0.22% +0.04% +0.00% / +0.18% +0.88% +0.70%] index_select strided 8 : Elapsed 0.023 ms (2.276 ms / 100) 2.289 -> 2.293 ( +0.17%) [ +0.00% +0.22% +0.00% / +0.17% +0.83% +1.00%] index_select strided 16 : Elapsed 0.023 ms (2.289 ms / 100) 2.248 -> 2.250 ( +0.09%) [ +0.18% +0.00% +0.22% / +0.09% +0.85% +0.93%] index_select random : Elapsed 0.023 ms (2.252 ms / 100) 2.271 -> 2.273 ( +0.09%) [ +0.22% +0.00% +0.09% / +0.09% +0.31% +0.48%] index_select random_sorted : Elapsed 0.023 ms (2.276 ms / 100) 2.264 -> 2.257 ( -0.31%) [ +0.00% +0.00% +0.00% / -0.31% +0.57% +0.49%] index_select perm : Elapsed 0.023 ms (2.264 ms / 100) 2.265 -> 2.269 ( +0.18%) [ +0.22% +0.00% +0.09% / +0.18% +0.79% +0.88%] index_select perm_sorted : Elapsed 0.023 ms (2.270 ms / 100) B = [5, 4, 16, 40] (stride (2560, 16, 1, 64)) A = [5, 20, 16, 40] (stride (640, 3200, 1, 16)) dim = 1 2.089 -> 2.092 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.72% +0.67%] index_select const : Elapsed 0.021 ms (2.092 ms / 100) 2.078 -> 2.075 ( -0.14%) [ +0.10% +0.00% +0.05% / -0.14% +0.82% +0.67%] index_select wrap : Elapsed 0.021 ms (2.080 ms / 100) 2.088 -> 2.090 ( +0.10%) [ +0.19% +0.10% +0.00% / +0.10% +0.72% +0.57%] index_select linear : Elapsed 0.021 ms (2.092 ms / 100) 2.098 -> 2.095 ( -0.14%) [ +0.05% +0.00% +0.14% / -0.14% +0.71% +0.43%] index_select reverse : Elapsed 0.021 ms (2.099 ms / 100) 2.086 -> 2.092 ( +0.29%) [ +0.34% +0.00% +0.24% / +0.29% +0.67% +0.77%] index_select skip64 : Elapsed 0.021 ms (2.093 ms / 100) 2.081 -> 2.081 ( +0.00%) [ +0.00% +0.19% +0.14% / +0.00% +0.77% +0.86%] index_select skip256 : Elapsed 0.021 ms (2.081 ms / 100) 2.086 -> 2.086 ( +0.00%) [ +0.24% +0.00% +0.00% / +0.14% +0.14% +0.00%] index_select spread : Elapsed 0.021 ms (2.091 ms / 100) 2.095 -> 2.098 ( +0.14%) [ +0.00% +0.00% +0.14% / +0.14% +0.24% +0.43%] index_select strided 3 : Elapsed 0.021 ms (2.095 ms / 100) 2.100 -> 2.099 ( -0.05%) [ +0.24% +0.00% +0.05% / +0.14% -0.05% -0.05%] index_select strided 5 : Elapsed 0.021 ms (2.105 ms / 100) 2.092 -> 2.092 ( +0.00%) [ +0.00% +0.00% +0.29% / +0.00% +0.81% +0.91%] index_select strided 7 : Elapsed 0.021 ms (2.092 ms / 100) 2.095 -> 2.095 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.48% +0.57%] index_select strided 8 : Elapsed 0.021 ms (2.096 ms / 100) 2.081 -> 2.078 ( -0.14%) [ +0.19% +0.05% +0.00% / -0.14% +0.48% +0.58%] index_select strided 16 : Elapsed 0.021 ms (2.085 ms / 100) 2.091 -> 2.090 ( -0.05%) [ +0.00% +0.10% +0.10% / -0.05% +0.33% +0.43%] index_select random : Elapsed 0.021 ms (2.091 ms / 100) 2.090 -> 2.091 ( +0.05%) [ +0.19% +0.00% +0.33% / +0.05% +0.77% +0.62%] index_select random_sorted : Elapsed 0.021 ms (2.094 ms / 100) 2.105 -> 2.105 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.14% +0.05%] index_select perm : Elapsed 0.021 ms (2.105 ms / 100) 2.095 -> 2.095 ( +0.00%) [ +0.00% +0.14% +0.19% / +0.24% +0.00% +0.10%] index_select perm_sorted : Elapsed 0.021 ms (2.095 ms / 100) B = [5, 4, 16, 40] (stride (640, 3200, 40, 1)) A = [5, 20, 16, 40] (stride (12800, 1, 800, 20)) dim = 1 0.771 -> 0.772 ( +0.13%) [ +0.00% +0.39% +0.26% / +0.13% +1.56% +0.78%] index_select const : Elapsed 0.008 ms (0.771 ms / 100) 0.774 -> 0.776 ( +0.26%) [ +0.52% +0.26% +0.00% / +0.65% +0.65% +0.26%] index_select wrap : Elapsed 0.008 ms (0.778 ms / 100) 0.771 -> 0.773 ( +0.26%) [ +0.00% +0.78% +0.52% / +0.65% +0.26% +0.52%] index_select linear : Elapsed 0.008 ms (0.771 ms / 100) 0.772 -> 0.773 ( +0.13%) [ +0.00% +0.52% +0.78% / +1.04% +0.39% +0.13%] index_select reverse : Elapsed 0.008 ms (0.772 ms / 100) 0.774 -> 0.775 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.65% +0.13% +0.39%] index_select skip64 : Elapsed 0.008 ms (0.774 ms / 100) 0.772 -> 0.773 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.52% +0.13%] index_select skip256 : Elapsed 0.008 ms (0.773 ms / 100) 0.805 -> 0.804 ( -0.12%) [ +0.00% +0.12% +0.25% / -0.12% +0.50% +0.37%] index_select spread : Elapsed 0.008 ms (0.805 ms / 100) 0.791 -> 0.791 ( +0.00%) [ +0.25% +0.00% +1.14% / +0.13% +0.00% +0.63%] index_select strided 3 : Elapsed 0.008 ms (0.793 ms / 100) 0.802 -> 0.807 ( +0.62%) [ +0.12% +0.00% +0.37% / +0.62% +0.62% +1.00%] index_select strided 5 : Elapsed 0.008 ms (0.803 ms / 100) 0.793 -> 0.793 ( +0.00%) [ +0.00% +0.25% +0.25% / +0.00% +0.13% +0.63%] index_select strided 7 : Elapsed 0.008 ms (0.793 ms / 100) 0.793 -> 0.798 ( +0.63%) [ +0.88% +0.00% +0.63% / +0.63% +0.76% +0.76%] index_select strided 8 : Elapsed 0.008 ms (0.800 ms / 100) 0.806 -> 0.805 ( -0.12%) [ +0.00% +0.12% +0.74% / +0.25% -0.12% +0.25%] index_select strided 16 : Elapsed 0.008 ms (0.806 ms / 100) 0.786 -> 0.785 ( -0.13%) [ +0.00% +0.13% +0.38% / -0.13% +0.76% +2.16%] index_select random : Elapsed 0.008 ms (0.786 ms / 100) 0.796 -> 0.794 ( -0.25%) [ +0.00% +0.25% +0.50% / -0.25% +0.00% +0.63%] index_select random_sorted : Elapsed 0.008 ms (0.796 ms / 100) 0.792 -> 0.795 ( +0.38%) [ +0.00% +0.13% +0.63% / +0.38% +1.01% +0.51%] index_select perm : Elapsed 0.008 ms (0.792 ms / 100) 0.795 -> 0.799 ( +0.50%) [ +0.00% +0.38% +0.38% / +0.50% +0.50% +0.63%] index_select perm_sorted : Elapsed 0.008 ms (0.795 ms / 100) B = [5, 4, 16, 40] (stride (160, 40, 800, 1)) A = [5, 20, 16, 40] (stride (12800, 40, 800, 1)) dim = 1 1.998 -> 2.001 ( +0.15%) [ +0.15% +0.05% +0.00% / +0.15% +0.25% +0.35%] index_select const : Elapsed 0.020 ms (2.001 ms / 100) 2.048 -> 2.049 ( +0.05%) [ +0.00% +0.20% +0.05% / +0.34% +0.05% +0.20%] index_select wrap : Elapsed 0.020 ms (2.048 ms / 100) 2.047 -> 2.048 ( +0.05%) [ +0.15% +0.15% +0.00% / +0.05% +0.05% +0.29%] index_select linear : Elapsed 0.020 ms (2.050 ms / 100) 2.056 -> 2.060 ( +0.19%) [ +0.00% +0.24% +0.15% / +0.19% +0.34% +0.24%] index_select reverse : Elapsed 0.021 ms (2.056 ms / 100) 1.999 -> 1.997 ( -0.10%) [ +0.15% +0.35% +0.00% / -0.10% +0.10% +0.40%] index_select skip64 : Elapsed 0.020 ms (2.002 ms / 100) 2.000 -> 2.004 ( +0.20%) [ +0.00% +0.30% +0.00% / +0.20% +0.20% +0.25%] index_select skip256 : Elapsed 0.020 ms (2.000 ms / 100) 2.042 -> 2.043 ( +0.05%) [ +0.24% +0.00% +0.10% / +0.05% +0.44% +0.44%] index_select spread : Elapsed 0.020 ms (2.047 ms / 100) 2.049 -> 2.051 ( +0.10%) [ +0.05% +0.15% +0.00% / +0.10% +0.49% +0.20%] index_select strided 3 : Elapsed 0.020 ms (2.050 ms / 100) 2.057 -> 2.055 ( -0.10%) [ +0.10% +0.00% +0.15% / -0.10% -0.10% +0.19%] index_select strided 5 : Elapsed 0.021 ms (2.059 ms / 100) 2.054 -> 2.053 ( -0.05%) [ +0.05% +0.05% +0.00% / +0.15% -0.05% +0.05%] index_select strided 7 : Elapsed 0.021 ms (2.055 ms / 100) 2.046 -> 2.047 ( +0.05%) [ +0.05% +0.00% +0.24% / +0.05% +0.78% +0.64%] index_select strided 8 : Elapsed 0.020 ms (2.047 ms / 100) 2.045 -> 2.045 ( +0.00%) [ +0.24% +0.00% +0.15% / +0.10% +0.10% +0.00%] index_select strided 16 : Elapsed 0.020 ms (2.050 ms / 100) 2.046 -> 2.047 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.15% +0.34% +0.05%] index_select random : Elapsed 0.020 ms (2.047 ms / 100) 2.048 -> 2.046 ( -0.10%) [ +0.24% +0.00% +0.00% / -0.10% +0.39% +0.34%] index_select random_sorted : Elapsed 0.021 ms (2.053 ms / 100) 2.048 -> 2.053 ( +0.24%) [ +0.44% +0.00% +0.44% / +0.49% +0.24% +0.29%] index_select perm : Elapsed 0.021 ms (2.057 ms / 100) 2.044 -> 2.045 ( +0.05%) [ +0.00% +0.24% +0.05% / +0.29% +0.05% +0.15%] index_select perm_sorted : Elapsed 0.020 ms (2.044 ms / 100) B = [5, 4, 16, 40] (stride (64, 16, 1, 320)) A = [5, 20, 16, 40] (stride (40, 3200, 200, 1)) dim = 1 2.010 -> 2.011 ( +0.05%) [ +0.00% +0.10% +0.15% / +0.10% +0.05% +0.45%] index_select const : Elapsed 0.020 ms (2.010 ms / 100) 2.053 -> 2.053 ( +0.00%) [ +0.24% +0.24% +0.00% / +0.29% +0.44% +0.00%] index_select wrap : Elapsed 0.021 ms (2.058 ms / 100) 2.053 -> 2.052 ( -0.05%) [ +0.00% +0.10% +0.15% / +0.00% +0.44% -0.05%] index_select linear : Elapsed 0.021 ms (2.053 ms / 100) 2.055 -> 2.054 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.00% +0.00%] index_select reverse : Elapsed 0.021 ms (2.055 ms / 100) 2.005 -> 2.006 ( +0.05%) [ +0.35% +0.35% +0.00% / +0.30% +0.05% +0.20%] index_select skip64 : Elapsed 0.020 ms (2.012 ms / 100) 2.009 -> 2.008 ( -0.05%) [ +0.05% +0.25% +0.00% / +0.10% -0.05% +0.00%] index_select skip256 : Elapsed 0.020 ms (2.010 ms / 100) 2.036 -> 2.038 ( +0.10%) [ +0.29% +0.10% +0.00% / +0.10% +0.29% +0.20%] index_select spread : Elapsed 0.020 ms (2.042 ms / 100) 2.057 -> 2.055 ( -0.10%) [ +0.05% +0.00% +0.10% / +0.05% -0.10% -0.05%] index_select strided 3 : Elapsed 0.021 ms (2.058 ms / 100) 2.033 -> 2.036 ( +0.15%) [ +0.00% +0.05% +0.00% / +0.15% +0.39% +0.25%] index_select strided 5 : Elapsed 0.020 ms (2.033 ms / 100) 2.044 -> 2.044 ( +0.00%) [ +0.00% +0.29% +0.10% / +0.00% +0.44% +0.59%] index_select strided 7 : Elapsed 0.020 ms (2.044 ms / 100) 2.030 -> 2.030 ( +0.00%) [ +0.00% +0.05% +0.20% / +0.00% +0.69% +0.84%] index_select strided 8 : Elapsed 0.020 ms (2.030 ms / 100) 2.043 -> 2.045 ( +0.10%) [ +0.29% +0.00% +0.15% / +0.29% +0.24% +0.10%] index_select strided 16 : Elapsed 0.020 ms (2.049 ms / 100) 2.055 -> 2.054 ( -0.05%) [ +0.44% +0.29% +0.00% / +0.29% +0.05% -0.05%] index_select random : Elapsed 0.021 ms (2.064 ms / 100) 2.061 -> 2.059 ( -0.10%) [ +0.15% +0.00% +0.34% / +0.34% -0.10% +0.00%] index_select random_sorted : Elapsed 0.021 ms (2.064 ms / 100) 2.036 -> 2.035 ( -0.05%) [ +0.00% +0.29% +0.20% / -0.05% +0.54% +0.59%] index_select perm : Elapsed 0.020 ms (2.036 ms / 100) 2.036 -> 2.042 ( +0.29%) [ +0.00% +0.29% +0.34% / +0.29% +0.59% +0.39%] index_select perm_sorted : Elapsed 0.020 ms (2.036 ms / 100) B = [5, 4, 16, 40] (stride (64, 1, 4, 320)) A = [5, 20, 16, 40] (stride (16, 80, 1, 1600)) dim = 1 2.189 -> 2.191 ( +0.09%) [ +0.09% +0.00% +0.05% / +0.09% +0.46% +0.41%] index_select const : Elapsed 0.022 ms (2.191 ms / 100) 2.203 -> 2.206 ( +0.14%) [ +0.18% +0.05% +0.00% / +0.14% +0.45% +0.36%] index_select wrap : Elapsed 0.022 ms (2.207 ms / 100) 2.205 -> 2.203 ( -0.09%) [ +0.14% +0.00% +0.05% / -0.09% +0.27% +0.45%] index_select linear : Elapsed 0.022 ms (2.208 ms / 100) 2.206 -> 2.206 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.41% +0.32%] index_select reverse : Elapsed 0.022 ms (2.206 ms / 100) 2.194 -> 2.190 ( -0.18%) [ +0.00% +0.09% +0.00% / -0.18% +0.27% +0.14%] index_select skip64 : Elapsed 0.022 ms (2.194 ms / 100) 2.195 -> 2.196 ( +0.05%) [ +0.00% +0.14% +0.18% / +0.05% +0.36% +0.46%] index_select skip256 : Elapsed 0.022 ms (2.195 ms / 100) 2.193 -> 2.198 ( +0.23%) [ +0.23% +0.23% +0.00% / +0.23% +0.50% +0.68%] index_select spread : Elapsed 0.022 ms (2.198 ms / 100) 2.203 -> 2.205 ( +0.09%) [ +0.18% +0.05% +0.00% / +0.09% +0.36% +0.23%] index_select strided 3 : Elapsed 0.022 ms (2.207 ms / 100) 2.192 -> 2.189 ( -0.14%) [ +0.09% +0.09% +0.00% / -0.14% +0.32% +0.27%] index_select strided 5 : Elapsed 0.022 ms (2.194 ms / 100) 2.203 -> 2.205 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.45% +0.50%] index_select strided 7 : Elapsed 0.022 ms (2.203 ms / 100) 2.209 -> 2.208 ( -0.05%) [ +0.05% +0.00% +0.32% / -0.05% +0.23% +0.68%] index_select strided 8 : Elapsed 0.022 ms (2.210 ms / 100) 2.206 -> 2.207 ( +0.05%) [ +0.27% +0.09% +0.00% / +0.05% +0.23% +0.32%] index_select strided 16 : Elapsed 0.022 ms (2.212 ms / 100) 2.202 -> 2.204 ( +0.09%) [ +0.23% +0.00% +0.18% / +0.09% +0.23% +0.27%] index_select random : Elapsed 0.022 ms (2.207 ms / 100) 2.206 -> 2.208 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.45% +0.41%] index_select random_sorted : Elapsed 0.022 ms (2.208 ms / 100) 2.209 -> 2.209 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.27% +0.32%] index_select perm : Elapsed 0.022 ms (2.212 ms / 100) 2.206 -> 2.208 ( +0.09%) [ +0.05% +0.00% +0.09% / +0.09% +0.36% +0.45%] index_select perm_sorted : Elapsed 0.022 ms (2.207 ms / 100) B = [5, 4, 16, 40] (stride (1, 5, 20, 320)) A = [5, 20, 16, 40] (stride (12800, 16, 1, 320)) dim = 1 2.193 -> 2.195 ( +0.09%) [ +0.09% +0.00% +0.27% / +0.09% +0.27% +0.09%] index_select const : Elapsed 0.022 ms (2.195 ms / 100) 2.187 -> 2.193 ( +0.27%) [ +0.00% +0.05% +0.23% / +0.27% +0.32% +0.32%] index_select wrap : Elapsed 0.022 ms (2.187 ms / 100) 2.192 -> 2.196 ( +0.18%) [ +0.05% +0.00% +0.14% / +0.18% +0.46% +0.36%] index_select linear : Elapsed 0.022 ms (2.193 ms / 100) 2.196 -> 2.198 ( +0.09%) [ +0.32% +0.00% +0.23% / +0.18% +0.09% +0.18%] index_select reverse : Elapsed 0.022 ms (2.203 ms / 100) 2.179 -> 2.184 ( +0.23%) [ +0.18% +0.00% +0.37% / +0.28% +0.28% +0.23%] index_select skip64 : Elapsed 0.022 ms (2.183 ms / 100) 2.183 -> 2.186 ( +0.14%) [ +0.00% +0.05% +0.05% / +0.14% +0.23% +0.14%] index_select skip256 : Elapsed 0.022 ms (2.183 ms / 100) 2.192 -> 2.191 ( -0.05%) [ +0.00% +0.09% +0.00% / -0.05% +0.27% +0.23%] index_select spread : Elapsed 0.022 ms (2.192 ms / 100) 2.185 -> 2.189 ( +0.18%) [ +0.00% +0.23% +0.14% / +0.27% +0.27% +0.18%] index_select strided 3 : Elapsed 0.022 ms (2.185 ms / 100) 2.187 -> 2.189 ( +0.09%) [ +0.14% +0.05% +0.00% / +0.09% +0.27% +0.37%] index_select strided 5 : Elapsed 0.022 ms (2.190 ms / 100) 2.183 -> 2.180 ( -0.14%) [ +0.09% +0.00% +0.27% / -0.14% +0.00% +0.00%] index_select strided 7 : Elapsed 0.022 ms (2.185 ms / 100) 2.187 -> 2.187 ( +0.00%) [ +0.23% +0.00% +0.14% / +0.00% +0.18% +0.05%] index_select strided 8 : Elapsed 0.022 ms (2.192 ms / 100) 2.190 -> 2.189 ( -0.05%) [ +0.18% +0.05% +0.00% / +0.18% -0.05% +0.14%] index_select strided 16 : Elapsed 0.022 ms (2.194 ms / 100) 2.188 -> 2.187 ( -0.05%) [ +0.18% +0.00% +0.23% / +0.23% -0.05% +0.05%] index_select random : Elapsed 0.022 ms (2.192 ms / 100) 2.190 -> 2.190 ( +0.00%) [ +0.09% +0.00% +0.14% / +0.00% +0.14% +0.14%] index_select random_sorted : Elapsed 0.022 ms (2.192 ms / 100) 2.190 -> 2.188 ( -0.09%) [ +0.05% +0.00% +0.05% / -0.09% +0.05% +0.14%] index_select perm : Elapsed 0.022 ms (2.191 ms / 100) 2.188 -> 2.191 ( +0.14%) [ +0.09% +0.00% +0.32% / +0.14% +0.32% +0.18%] index_select perm_sorted : Elapsed 0.022 ms (2.190 ms / 100) out_shape = [5, 20, 4, 40] in_shape = [5, 20, 16, 40] idx_dim = 2 B = [5, 20, 4, 40] (stride (3200, 160, 40, 1)) A = [5, 20, 16, 40] (stride (12800, 640, 40, 1)) dim = 2 1.072 -> 1.071 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +2.33% +2.15%] index_select const : Elapsed 0.011 ms (1.073 ms / 100) 1.119 -> 1.126 ( +0.63%) [ +0.00% +0.09% +0.18% / +0.63% +2.50% +2.14%] index_select wrap : Elapsed 0.011 ms (1.119 ms / 100) 1.117 -> 1.127 ( +0.90%) [ +0.00% +0.27% +0.54% / +0.90% +2.51% +2.51%] index_select linear : Elapsed 0.011 ms (1.117 ms / 100) 1.120 -> 1.125 ( +0.45%) [ +0.00% +0.62% +0.71% / +0.45% +2.05% +2.05%] index_select reverse : Elapsed 0.011 ms (1.120 ms / 100) 1.070 -> 1.068 ( -0.19%) [ +0.28% +0.00% +0.09% / -0.19% +2.06% +1.40%] index_select skip64 : Elapsed 0.011 ms (1.073 ms / 100) 1.070 -> 1.071 ( +0.09%) [ +0.37% +0.09% +0.00% / +0.09% +1.50% +1.50%] index_select skip256 : Elapsed 0.011 ms (1.074 ms / 100) 1.121 -> 1.125 ( +0.36%) [ +0.36% +0.00% +0.00% / +0.36% +2.05% +1.87%] index_select spread : Elapsed 0.011 ms (1.125 ms / 100) 1.135 -> 1.138 ( +0.26%) [ +0.09% +0.00% +0.35% / +0.53% +0.88% +0.26%] index_select strided 3 : Elapsed 0.011 ms (1.136 ms / 100) 1.125 -> 1.127 ( +0.18%) [ +0.00% +0.27% +0.53% / +0.18% +1.69% +1.60%] index_select strided 5 : Elapsed 0.011 ms (1.125 ms / 100) 1.118 -> 1.125 ( +0.63%) [ +0.00% +0.09% +0.36% / +0.63% +1.52% +2.33%] index_select strided 7 : Elapsed 0.011 ms (1.118 ms / 100) 1.089 -> 1.091 ( +0.18%) [ +0.00% +0.55% +0.92% / +0.18% +1.65% +1.74%] index_select strided 8 : Elapsed 0.011 ms (1.089 ms / 100) 1.078 -> 1.083 ( +0.46%) [ +0.28% +0.46% +0.00% / +0.46% +2.32% +2.13%] index_select random : Elapsed 0.011 ms (1.081 ms / 100) 1.076 -> 1.082 ( +0.56%) [ +0.00% +0.56% +0.65% / +0.56% +2.23% +2.14%] index_select random_sorted : Elapsed 0.011 ms (1.076 ms / 100) 1.128 -> 1.136 ( +0.71%) [ +0.09% +0.35% +0.00% / +0.71% +0.71% +0.80%] index_select perm : Elapsed 0.011 ms (1.129 ms / 100) 1.125 -> 1.127 ( +0.18%) [ +0.44% +0.80% +0.00% / +0.18% +1.33% +1.51%] index_select perm_sorted : Elapsed 0.011 ms (1.130 ms / 100) B = [5, 20, 4, 40] (stride (3200, 160, 40, 1)) A = [5, 20, 16, 40] (stride (20, 1, 4000, 100)) dim = 2 1.084 -> 1.089 ( +0.46%) [ +0.46% +0.28% +0.00% / +0.65% +0.46% +0.74%] index_select const : Elapsed 0.011 ms (1.089 ms / 100) 1.133 -> 1.134 ( +0.09%) [ +0.44% +0.35% +0.00% / +0.09% +0.35% +0.79%] index_select wrap : Elapsed 0.011 ms (1.138 ms / 100) 1.133 -> 1.132 ( -0.09%) [ +0.00% +0.00% +0.35% / -0.09% +0.35% +0.62%] index_select linear : Elapsed 0.011 ms (1.133 ms / 100) 1.133 -> 1.134 ( +0.09%) [ +0.18% +0.00% +0.35% / +0.09% +1.06% +1.06%] index_select reverse : Elapsed 0.011 ms (1.135 ms / 100) 1.082 -> 1.080 ( -0.18%) [ +0.09% +0.00% +0.00% / -0.18% +1.11% +0.83%] index_select skip64 : Elapsed 0.011 ms (1.083 ms / 100) 1.082 -> 1.082 ( +0.00%) [ +0.28% +0.00% +0.09% / +0.00% +0.74% +0.92%] index_select skip256 : Elapsed 0.011 ms (1.085 ms / 100) 1.107 -> 1.108 ( +0.09%) [ +0.00% +0.18% +0.81% / +0.09% +2.17% +2.08%] index_select spread : Elapsed 0.011 ms (1.107 ms / 100) 1.111 -> 1.113 ( +0.18%) [ +0.18% +0.00% +0.36% / +0.18% +1.62% +1.71%] index_select strided 3 : Elapsed 0.011 ms (1.113 ms / 100) 1.108 -> 1.105 ( -0.27%) [ +0.00% +0.36% +0.27% / -0.27% +1.26% +1.26%] index_select strided 5 : Elapsed 0.011 ms (1.108 ms / 100) 1.096 -> 1.096 ( +0.00%) [ +0.00% +0.00% +0.27% / +0.00% +0.46% +0.46%] index_select strided 7 : Elapsed 0.011 ms (1.096 ms / 100) 1.089 -> 1.088 ( -0.09%) [ +0.00% +0.00% +0.18% / -0.09% +0.55% +1.10%] index_select strided 8 : Elapsed 0.011 ms (1.089 ms / 100) 1.114 -> 1.117 ( +0.27%) [ +0.00% +0.18% +0.18% / +0.27% +1.26% +1.44%] index_select random : Elapsed 0.011 ms (1.114 ms / 100) 1.113 -> 1.117 ( +0.36%) [ +0.09% +0.27% +0.00% / +0.36% +1.26% +1.26%] index_select random_sorted : Elapsed 0.011 ms (1.114 ms / 100) 1.118 -> 1.120 ( +0.18%) [ +0.00% +0.36% +0.27% / +0.18% +0.81% +0.89%] index_select perm : Elapsed 0.011 ms (1.118 ms / 100) 1.107 -> 1.110 ( +0.27%) [ +0.27% +0.54% +0.00% / +0.27% +1.26% +1.72%] index_select perm_sorted : Elapsed 0.011 ms (1.110 ms / 100) B = [5, 20, 4, 40] (stride (160, 800, 1, 4)) A = [5, 20, 16, 40] (stride (12800, 40, 800, 1)) dim = 2 2.361 -> 2.363 ( +0.08%) [ +0.08% +0.00% +0.30% / +0.08% +0.59% +0.42%] index_select const : Elapsed 0.024 ms (2.363 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.08% +0.00% +0.12% / +0.08% +0.29% +0.50%] index_select wrap : Elapsed 0.024 ms (2.425 ms / 100) 2.423 -> 2.426 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.37% +0.45%] index_select linear : Elapsed 0.024 ms (2.425 ms / 100) 2.420 -> 2.423 ( +0.12%) [ +0.00% +0.17% +0.17% / +0.12% +0.58% +0.50%] index_select reverse : Elapsed 0.024 ms (2.420 ms / 100) 2.361 -> 2.358 ( -0.13%) [ +0.04% +0.21% +0.00% / -0.13% +0.55% +0.51%] index_select skip64 : Elapsed 0.024 ms (2.362 ms / 100) 2.361 -> 2.363 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.59% +0.59%] index_select skip256 : Elapsed 0.024 ms (2.361 ms / 100) 2.421 -> 2.419 ( -0.08%) [ +0.08% +0.00% +0.29% / -0.08% +0.41% +0.54%] index_select spread : Elapsed 0.024 ms (2.423 ms / 100) 2.424 -> 2.425 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.54% +0.41%] index_select strided 3 : Elapsed 0.024 ms (2.424 ms / 100) 2.422 -> 2.422 ( +0.00%) [ +0.17% +0.04% +0.00% / +0.00% +0.58% +0.50%] index_select strided 5 : Elapsed 0.024 ms (2.426 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.54% +0.50%] index_select strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.382 -> 2.383 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.63% +0.55%] index_select strided 8 : Elapsed 0.024 ms (2.382 ms / 100) 2.384 -> 2.386 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.34% +0.34%] index_select random : Elapsed 0.024 ms (2.384 ms / 100) 2.383 -> 2.384 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.21% +0.34%] index_select random_sorted : Elapsed 0.024 ms (2.385 ms / 100) 2.423 -> 2.424 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.25% +0.41%] index_select perm : Elapsed 0.024 ms (2.424 ms / 100) 2.424 -> 2.424 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) B = [5, 20, 4, 40] (stride (1, 800, 5, 20)) A = [5, 20, 16, 40] (stride (800, 40, 4000, 1)) dim = 2 2.278 -> 2.282 ( +0.18%) [ +0.09% +0.09% +0.00% / +0.18% +0.26% +0.26%] index_select const : Elapsed 0.023 ms (2.280 ms / 100) 2.337 -> 2.340 ( +0.13%) [ +0.00% +0.09% +0.30% / +0.13% +0.17% +0.26%] index_select wrap : Elapsed 0.023 ms (2.337 ms / 100) 2.337 -> 2.337 ( +0.00%) [ +0.00% +0.13% +0.04% / +0.00% +0.47% +0.43%] index_select linear : Elapsed 0.023 ms (2.337 ms / 100) 2.337 -> 2.337 ( +0.00%) [ +0.00% +0.04% +0.26% / +0.00% +0.13% +0.17%] index_select reverse : Elapsed 0.023 ms (2.337 ms / 100) 2.278 -> 2.280 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.22% +0.18%] index_select skip64 : Elapsed 0.023 ms (2.278 ms / 100) 2.276 -> 2.276 ( +0.00%) [ +0.13% +0.22% +0.00% / +0.00% +0.18% +0.31%] index_select skip256 : Elapsed 0.023 ms (2.279 ms / 100) 2.334 -> 2.339 ( +0.21%) [ +0.04% +0.09% +0.00% / +0.21% +0.30% +0.30%] index_select spread : Elapsed 0.023 ms (2.335 ms / 100) 2.331 -> 2.333 ( +0.09%) [ +0.00% +0.09% +0.30% / +0.09% +0.73% +0.39%] index_select strided 3 : Elapsed 0.023 ms (2.331 ms / 100) 2.333 -> 2.334 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.39% +0.56%] index_select strided 5 : Elapsed 0.023 ms (2.333 ms / 100) 2.336 -> 2.334 ( -0.09%) [ +0.21% +0.00% +0.09% / -0.09% +0.30% +0.39%] index_select strided 7 : Elapsed 0.023 ms (2.341 ms / 100) 2.287 -> 2.289 ( +0.09%) [ +0.22% +0.09% +0.00% / +0.09% +0.57% +0.44%] index_select strided 8 : Elapsed 0.023 ms (2.292 ms / 100) 2.310 -> 2.310 ( +0.00%) [ +0.22% +0.00% +0.13% / +0.00% +0.30% +0.43%] index_select random : Elapsed 0.023 ms (2.315 ms / 100) 2.311 -> 2.317 ( +0.26%) [ +0.00% +0.17% +0.13% / +0.26% +0.43% +0.43%] index_select random_sorted : Elapsed 0.023 ms (2.311 ms / 100) 2.336 -> 2.333 ( -0.13%) [ +0.09% +0.00% +0.00% / -0.13% +0.34% +0.47%] index_select perm : Elapsed 0.023 ms (2.338 ms / 100) 2.332 -> 2.334 ( +0.09%) [ +0.00% +0.21% +0.21% / +0.09% +0.56% +0.47%] index_select perm_sorted : Elapsed 0.023 ms (2.332 ms / 100) B = [5, 20, 4, 40] (stride (800, 40, 4000, 1)) A = [5, 20, 16, 40] (stride (640, 3200, 1, 16)) dim = 2 2.354 -> 2.358 ( +0.17%) [ +0.00% +0.13% +0.13% / +0.17% +0.34% +0.51%] index_select const : Elapsed 0.024 ms (2.354 ms / 100) 2.355 -> 2.358 ( +0.13%) [ +0.21% +0.00% +0.04% / +0.13% +0.30% +0.34%] index_select wrap : Elapsed 0.024 ms (2.360 ms / 100) 2.357 -> 2.360 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.34% +0.34%] index_select linear : Elapsed 0.024 ms (2.357 ms / 100) 2.354 -> 2.355 ( +0.04%) [ +0.00% +0.17% +0.25% / +0.04% +0.30% +0.51%] index_select reverse : Elapsed 0.024 ms (2.354 ms / 100) 2.354 -> 2.352 ( -0.08%) [ +0.13% +0.00% +0.17% / -0.08% +0.55% +0.34%] index_select skip64 : Elapsed 0.024 ms (2.357 ms / 100) 2.353 -> 2.361 ( +0.34%) [ +0.21% +0.00% +0.17% / +0.34% +0.59% +0.47%] index_select skip256 : Elapsed 0.024 ms (2.358 ms / 100) 2.385 -> 2.385 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.34% +0.42%] index_select spread : Elapsed 0.024 ms (2.387 ms / 100) 2.378 -> 2.380 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.08% +0.38% +0.50%] index_select strided 3 : Elapsed 0.024 ms (2.380 ms / 100) 2.378 -> 2.380 ( +0.08%) [ +0.17% +0.13% +0.00% / +0.08% +0.46% +0.38%] index_select strided 5 : Elapsed 0.024 ms (2.382 ms / 100) 2.383 -> 2.384 ( +0.04%) [ +0.08% +0.00% +0.17% / +0.04% +0.42% +0.46%] index_select strided 7 : Elapsed 0.024 ms (2.385 ms / 100) 2.388 -> 2.392 ( +0.17%) [ +0.04% +0.04% +0.00% / +0.17% +0.34% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.389 ms / 100) 2.378 -> 2.382 ( +0.17%) [ +0.00% +0.17% +0.25% / +0.17% +0.38% +0.34%] index_select random : Elapsed 0.024 ms (2.378 ms / 100) 2.388 -> 2.391 ( +0.13%) [ +0.00% +0.25% +0.08% / +0.13% +0.25% +0.46%] index_select random_sorted : Elapsed 0.024 ms (2.388 ms / 100) 2.379 -> 2.382 ( +0.13%) [ +0.21% +0.00% +0.00% / +0.13% +0.34% +0.21%] index_select perm : Elapsed 0.024 ms (2.384 ms / 100) 2.387 -> 2.389 ( +0.08%) [ +0.00% +0.13% +0.08% / +0.08% +0.38% +0.50%] index_select perm_sorted : Elapsed 0.024 ms (2.387 ms / 100) B = [5, 20, 4, 40] (stride (80, 4, 1, 400)) A = [5, 20, 16, 40] (stride (640, 3200, 40, 1)) dim = 2 2.299 -> 2.304 ( +0.22%) [ +0.00% +0.09% +0.09% / +0.22% +0.22% +0.22%] index_select const : Elapsed 0.023 ms (2.299 ms / 100) 2.332 -> 2.334 ( +0.09%) [ +0.13% +0.34% +0.00% / +0.09% +0.34% +0.47%] index_select wrap : Elapsed 0.023 ms (2.335 ms / 100) 2.333 -> 2.341 ( +0.34%) [ +0.39% +0.00% +0.21% / +0.34% +0.47% +0.43%] index_select linear : Elapsed 0.023 ms (2.342 ms / 100) 2.338 -> 2.341 ( +0.13%) [ +0.21% +0.00% +0.00% / +0.34% +0.13% +0.34%] index_select reverse : Elapsed 0.023 ms (2.343 ms / 100) 2.280 -> 2.281 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.13% +0.18% +0.04%] index_select skip64 : Elapsed 0.023 ms (2.280 ms / 100) 2.301 -> 2.302 ( +0.04%) [ +0.30% +0.00% +0.00% / +0.04% +0.26% +0.35%] index_select skip256 : Elapsed 0.023 ms (2.308 ms / 100) 2.340 -> 2.340 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +0.04% +0.04%] index_select spread : Elapsed 0.023 ms (2.343 ms / 100) 2.338 -> 2.338 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.38% +0.26%] index_select strided 3 : Elapsed 0.023 ms (2.342 ms / 100) 2.345 -> 2.342 ( -0.13%) [ +0.13% +0.13% +0.00% / -0.13% +0.09% +0.26%] index_select strided 5 : Elapsed 0.023 ms (2.348 ms / 100) 2.336 -> 2.336 ( +0.00%) [ +0.17% +0.00% +0.09% / +0.00% +0.47% +0.04%] index_select strided 7 : Elapsed 0.023 ms (2.340 ms / 100) 2.295 -> 2.297 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.31% +0.31%] index_select strided 8 : Elapsed 0.023 ms (2.297 ms / 100) 2.337 -> 2.337 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.39% +0.21%] index_select random : Elapsed 0.023 ms (2.337 ms / 100) 2.339 -> 2.343 ( +0.17%) [ +0.34% +0.17% +0.00% / +0.17% +0.43% +0.43%] index_select random_sorted : Elapsed 0.023 ms (2.347 ms / 100) 2.339 -> 2.339 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +0.56% +0.13%] index_select perm : Elapsed 0.023 ms (2.341 ms / 100) 2.335 -> 2.334 ( -0.04%) [ +0.00% +0.13% +0.09% / -0.04% +0.21% +0.21%] index_select perm_sorted : Elapsed 0.023 ms (2.335 ms / 100) out_shape = [5, 20, 16, 4] in_shape = [5, 20, 16, 40] idx_dim = 3 B = [5, 20, 16, 4] (stride (1280, 1, 20, 320)) A = [5, 20, 16, 40] (stride (20, 1, 4000, 100)) dim = 3 1.216 -> 1.217 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.58% +0.66%] index_select const : Elapsed 0.012 ms (1.218 ms / 100) 1.204 -> 1.205 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.58% +0.58%] index_select wrap : Elapsed 0.012 ms (1.205 ms / 100) 1.222 -> 1.222 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.33% +0.33%] index_select linear : Elapsed 0.012 ms (1.223 ms / 100) 1.212 -> 1.214 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +1.07% +0.99%] index_select reverse : Elapsed 0.012 ms (1.214 ms / 100) 1.245 -> 1.244 ( -0.08%) [ +0.08% +0.08% +0.00% / +0.08% -0.08% +0.00%] index_select skip64 : Elapsed 0.012 ms (1.246 ms / 100) 1.216 -> 1.218 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.74% +0.74%] index_select skip256 : Elapsed 0.012 ms (1.218 ms / 100) 1.208 -> 1.208 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.66% +0.66%] index_select spread : Elapsed 0.012 ms (1.209 ms / 100) 1.225 -> 1.226 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.65% +0.57%] index_select strided 3 : Elapsed 0.012 ms (1.226 ms / 100) 1.219 -> 1.220 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.90% +0.82%] index_select strided 5 : Elapsed 0.012 ms (1.220 ms / 100) 1.243 -> 1.240 ( -0.24%) [ +0.08% +0.00% +0.00% / +0.08% -0.24% -0.24%] index_select strided 7 : Elapsed 0.012 ms (1.244 ms / 100) 1.219 -> 1.221 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.57% +0.57%] index_select strided 8 : Elapsed 0.012 ms (1.220 ms / 100) 1.218 -> 1.218 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.57% +0.66%] index_select strided 16 : Elapsed 0.012 ms (1.218 ms / 100) 1.218 -> 1.218 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.57% +0.57%] index_select random : Elapsed 0.012 ms (1.220 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.49% +0.40%] index_select random_sorted : Elapsed 0.012 ms (1.235 ms / 100) 1.246 -> 1.245 ( -0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.00% -0.08%] index_select perm : Elapsed 0.012 ms (1.247 ms / 100) 1.248 -> 1.243 ( -0.40%) [ +0.00% +0.00% +0.00% / +0.08% -0.40% -0.16%] index_select perm_sorted : Elapsed 0.012 ms (1.248 ms / 100) B = [5, 20, 16, 4] (stride (4, 20, 400, 1)) A = [5, 20, 16, 40] (stride (640, 3200, 1, 16)) dim = 3 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.31%] index_select const : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.55%] index_select wrap : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.47%] index_select linear : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select reverse : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.63% +0.55%] index_select spread : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.277 ( +0.24%) [ +0.00% +0.00% +0.08% / +0.24% +0.55% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.16% +0.31% / +0.00% +0.39% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.274 ms / 100) 1.275 -> 1.274 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.47% +0.71%] index_select strided 8 : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.24% +0.00% +0.08% / +0.08% +0.47% +0.47%] index_select random : Elapsed 0.013 ms (1.277 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.31% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.24% +0.31%] index_select perm : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.16% / +0.08% +0.47% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) B = [5, 20, 16, 4] (stride (1, 80, 5, 1600)) A = [5, 20, 16, 40] (stride (640, 3200, 1, 16)) dim = 3 1.190 -> 1.192 ( +0.17%) [ +0.17% +0.00% +0.08% / +0.17% +0.76% +0.67%] index_select const : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.50% +0.42%] index_select wrap : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.00% +0.08% / +0.08% +0.50% +0.59%] index_select linear : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.59% +0.50%] index_select reverse : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.92% / +0.00% +0.50% +0.50%] index_select skip64 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.34% +0.00% +0.84% / +0.17% +0.42% +0.34%] index_select skip256 : Elapsed 0.012 ms (1.196 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.34% +0.00% +0.17% / +0.08% +0.50% +0.42%] index_select spread : Elapsed 0.012 ms (1.196 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.42% +0.25%] index_select strided 3 : Elapsed 0.012 ms (1.193 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.42% +0.25%] index_select strided 5 : Elapsed 0.012 ms (1.194 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.25% +0.00% +0.17% / +0.08% +0.59% +0.50%] index_select strided 7 : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.34% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_select strided 8 : Elapsed 0.012 ms (1.196 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.42% +0.00% +0.00% / +0.00% +0.42% +0.34%] index_select strided 16 : Elapsed 0.012 ms (1.198 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.34%] index_select random : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.192 ( -0.08%) [ +0.00% +0.08% +0.34% / -0.08% +0.42% +0.42%] index_select random_sorted : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.00% +0.00% +0.25% / +0.00% +0.67% +0.50%] index_select perm : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.67% +0.67%] index_select perm_sorted : Elapsed 0.012 ms (1.192 ms / 100) B = [5, 20, 16, 4] (stride (20, 1, 100, 1600)) A = [5, 20, 16, 40] (stride (1, 80, 5, 1600)) dim = 3 0.584 -> 0.585 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.34% +2.23%] index_select const : Elapsed 0.006 ms (0.584 ms / 100) 0.584 -> 0.584 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.34% +1.54%] index_select wrap : Elapsed 0.006 ms (0.585 ms / 100) 0.584 -> 0.584 ( +0.00%) [ +0.00% +0.34% +0.00% / +7.71% +0.17% +0.00%] index_select linear : Elapsed 0.006 ms (0.584 ms / 100) 0.586 -> 0.586 ( +0.00%) [ +0.34% +0.34% +0.00% / +0.17% +0.17% +0.00%] index_select reverse : Elapsed 0.006 ms (0.588 ms / 100) 0.585 -> 0.586 ( +0.17%) [ +8.21% +0.17% +0.00% / +0.17% +0.17% +0.17%] index_select skip64 : Elapsed 0.006 ms (0.633 ms / 100) 0.584 -> 0.584 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.51% +0.51%] index_select skip256 : Elapsed 0.006 ms (0.585 ms / 100) 0.585 -> 0.584 ( -0.17%) [ +0.00% +0.00% +0.00% / -0.17% +0.68% +0.51%] index_select spread : Elapsed 0.006 ms (0.585 ms / 100) 0.584 -> 0.585 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.34% +0.34%] index_select strided 3 : Elapsed 0.006 ms (0.585 ms / 100) 0.584 -> 0.585 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.51% +0.17%] index_select strided 5 : Elapsed 0.006 ms (0.585 ms / 100) 0.585 -> 0.584 ( -0.17%) [ +0.00% +0.00% +0.34% / -0.17% +0.17% +0.17%] index_select strided 7 : Elapsed 0.006 ms (0.585 ms / 100) 0.585 -> 0.586 ( +0.17%) [ +0.00% +0.00% +4.62% / +0.17% +0.34% +0.17%] index_select strided 8 : Elapsed 0.006 ms (0.585 ms / 100) 0.585 -> 0.584 ( -0.17%) [ +0.00% +0.00% +2.56% / +0.17% +0.17% -0.17%] index_select strided 16 : Elapsed 0.006 ms (0.585 ms / 100) 0.586 -> 0.584 ( -0.34%) [ +0.17% +0.00% +15.87% / +0.17% -0.34% -0.17%] index_select random : Elapsed 0.006 ms (0.587 ms / 100) 0.586 -> 0.586 ( +0.00%) [ +0.00% +0.00% +0.17% / +0.00% +0.51% +0.34%] index_select random_sorted : Elapsed 0.006 ms (0.586 ms / 100) 0.586 -> 0.586 ( +0.00%) [ +0.17% +0.00% +3.75% / +0.00% +0.51% +0.68%] index_select perm : Elapsed 0.006 ms (0.587 ms / 100) 0.595 -> 0.599 ( +0.67%) [ +0.50% +0.00% +0.00% / +0.67% +0.84% +0.84%] index_select perm_sorted : Elapsed 0.006 ms (0.598 ms / 100) out_shape = [4, 20, 40, 16] in_shape = [5, 20, 40, 16] idx_dim = 0 B = [4, 20, 40, 16] (stride (12800, 40, 1, 800)) A = [5, 20, 40, 16] (stride (640, 3200, 1, 40)) dim = 0 5.489 -> 5.472 ( -0.31%) [ +0.00% +0.11% +0.13% / +0.05% -0.31% -0.24%] index_select const : Elapsed 0.055 ms (5.489 ms / 100) 5.523 -> 5.516 ( -0.13%) [ +0.00% +0.07% +0.22% / +0.25% -0.05% -0.13%] index_select wrap : Elapsed 0.055 ms (5.523 ms / 100) 5.524 -> 5.510 ( -0.25%) [ +0.04% +0.00% +0.22% / +0.00% -0.20% -0.25%] index_select linear : Elapsed 0.055 ms (5.526 ms / 100) 5.532 -> 5.529 ( -0.05%) [ +0.00% +0.00% +0.07% / +0.13% -0.05% +0.00%] index_select reverse : Elapsed 0.055 ms (5.532 ms / 100) 5.493 -> 5.469 ( -0.44%) [ +0.16% +0.02% +0.00% / +0.02% -0.42% -0.44%] index_select skip64 : Elapsed 0.055 ms (5.502 ms / 100) 5.494 -> 5.464 ( -0.55%) [ +0.04% +0.00% +0.07% / +0.02% -0.55% -0.47%] index_select skip256 : Elapsed 0.055 ms (5.496 ms / 100) 5.524 -> 5.517 ( -0.13%) [ +0.16% +0.00% +0.11% / +0.27% -0.07% -0.13%] index_select spread : Elapsed 0.055 ms (5.533 ms / 100) 5.539 -> 5.512 ( -0.49%) [ +0.00% +0.02% +0.04% / +0.02% -0.38% -0.49%] index_select strided 3 : Elapsed 0.055 ms (5.539 ms / 100) 5.529 -> 5.517 ( -0.22%) [ +0.09% +0.00% +0.16% / +0.04% -0.22% -0.09%] index_select random : Elapsed 0.055 ms (5.534 ms / 100) 5.519 -> 5.511 ( -0.14%) [ +0.00% +0.16% +0.38% / +0.11% -0.11% -0.14%] index_select random_sorted : Elapsed 0.055 ms (5.519 ms / 100) 5.524 -> 5.519 ( -0.09%) [ +0.00% +0.16% +0.09% / +0.24% -0.09% -0.09%] index_select perm : Elapsed 0.055 ms (5.524 ms / 100) 5.522 -> 5.511 ( -0.20%) [ +0.00% +0.00% +0.18% / +0.20% -0.20% -0.13%] index_select perm_sorted : Elapsed 0.055 ms (5.522 ms / 100) B = [4, 20, 40, 16] (stride (1, 2560, 64, 4)) A = [5, 20, 40, 16] (stride (12800, 1, 20, 800)) dim = 0 5.314 -> 5.307 ( -0.13%) [ +0.15% +0.00% +0.17% / +0.17% -0.11% -0.13%] index_select const : Elapsed 0.053 ms (5.322 ms / 100) 5.326 -> 5.337 ( +0.21%) [ +0.00% +0.08% +0.24% / +0.21% +0.24% +0.38%] index_select wrap : Elapsed 0.053 ms (5.326 ms / 100) 5.329 -> 5.332 ( +0.06%) [ +0.00% +0.04% +0.32% / +0.06% +0.15% +0.24%] index_select linear : Elapsed 0.053 ms (5.329 ms / 100) 5.328 -> 5.336 ( +0.15%) [ +0.00% +0.13% +0.17% / +0.15% +0.34% +0.32%] index_select reverse : Elapsed 0.053 ms (5.328 ms / 100) 5.311 -> 5.304 ( -0.13%) [ +0.00% +0.24% +0.21% / +0.17% -0.08% -0.13%] index_select skip64 : Elapsed 0.053 ms (5.311 ms / 100) 5.314 -> 5.310 ( -0.08%) [ +0.00% +0.00% +0.32% / +0.21% -0.08% +0.04%] index_select skip256 : Elapsed 0.053 ms (5.314 ms / 100) 5.327 -> 5.335 ( +0.15%) [ +0.13% +0.00% +0.23% / +0.15% +0.26% +0.36%] index_select spread : Elapsed 0.053 ms (5.334 ms / 100) 5.333 -> 5.336 ( +0.06%) [ +0.00% +0.23% +0.28% / +0.26% +0.06% +0.09%] index_select strided 3 : Elapsed 0.053 ms (5.333 ms / 100) 5.316 -> 5.322 ( +0.11%) [ +0.00% +0.08% +0.30% / +0.11% +0.24% +0.24%] index_select random : Elapsed 0.053 ms (5.316 ms / 100) 5.312 -> 5.326 ( +0.26%) [ +0.06% +0.04% +0.00% / +0.26% +0.30% +0.34%] index_select random_sorted : Elapsed 0.053 ms (5.315 ms / 100) 5.331 -> 5.334 ( +0.06%) [ +0.00% +0.00% +0.15% / +0.06% +0.30% +0.24%] index_select perm : Elapsed 0.053 ms (5.331 ms / 100) 5.318 -> 5.333 ( +0.28%) [ +0.09% +0.00% +0.13% / +0.28% +0.56% +0.56%] index_select perm_sorted : Elapsed 0.053 ms (5.323 ms / 100) B = [4, 20, 40, 16] (stride (1, 4, 80, 3200)) A = [5, 20, 40, 16] (stride (640, 3200, 16, 1)) dim = 0 5.486 -> 5.460 ( -0.47%) [ +0.07% +0.05% +0.00% / +0.13% -0.47% -0.38%] index_select const : Elapsed 0.055 ms (5.490 ms / 100) 5.557 -> 5.546 ( -0.20%) [ +0.04% +0.05% +0.00% / +0.09% -0.14% -0.20%] index_select wrap : Elapsed 0.056 ms (5.559 ms / 100) 5.550 -> 5.544 ( -0.11%) [ +0.00% +0.14% +0.09% / +0.14% -0.11% -0.07%] index_select linear : Elapsed 0.056 ms (5.550 ms / 100) 5.559 -> 5.551 ( -0.14%) [ +0.05% +0.00% +0.04% / -0.07% -0.14% -0.13%] index_select reverse : Elapsed 0.056 ms (5.562 ms / 100) 5.478 -> 5.456 ( -0.40%) [ +0.09% +0.00% +0.13% / +0.18% -0.33% -0.40%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.477 -> 5.453 ( -0.44%) [ +0.00% +0.09% +0.22% / +0.22% -0.44% -0.42%] index_select skip256 : Elapsed 0.055 ms (5.477 ms / 100) 5.556 -> 5.546 ( -0.18%) [ +0.04% +0.00% +0.09% / +0.09% -0.11% -0.18%] index_select spread : Elapsed 0.056 ms (5.558 ms / 100) 5.556 -> 5.534 ( -0.40%) [ +0.09% +0.00% +0.14% / -0.04% -0.40% -0.34%] index_select strided 3 : Elapsed 0.056 ms (5.561 ms / 100) 5.497 -> 5.502 ( +0.09%) [ +0.15% +0.00% +0.13% / +0.13% +0.09% +0.11%] index_select random : Elapsed 0.055 ms (5.505 ms / 100) 5.494 -> 5.483 ( -0.20%) [ +0.00% +0.11% +0.24% / +0.05% -0.13% -0.20%] index_select random_sorted : Elapsed 0.055 ms (5.494 ms / 100) 5.566 -> 5.553 ( -0.23%) [ +0.00% +0.00% +0.02% / -0.07% -0.23% -0.23%] index_select perm : Elapsed 0.056 ms (5.566 ms / 100) 5.551 -> 5.539 ( -0.22%) [ +0.05% +0.00% +0.05% / +0.02% -0.07% -0.22%] index_select perm_sorted : Elapsed 0.056 ms (5.554 ms / 100) out_shape = [5, 4, 40, 16] in_shape = [5, 20, 40, 16] idx_dim = 1 B = [5, 4, 40, 16] (stride (2560, 640, 1, 40)) A = [5, 20, 40, 16] (stride (800, 1, 20, 4000)) dim = 1 2.196 -> 2.200 ( +0.18%) [ +0.41% +0.00% +0.27% / +0.18% +0.91% +0.96%] index_select const : Elapsed 0.022 ms (2.205 ms / 100) 2.202 -> 2.203 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.86% +0.73%] index_select wrap : Elapsed 0.022 ms (2.203 ms / 100) 2.201 -> 2.208 ( +0.32%) [ +0.00% +0.05% +0.32% / +0.32% +0.45% +0.45%] index_select linear : Elapsed 0.022 ms (2.201 ms / 100) 2.203 -> 2.207 ( +0.18%) [ +0.09% +0.00% +0.27% / +0.18% +0.41% +0.18%] index_select reverse : Elapsed 0.022 ms (2.205 ms / 100) 2.201 -> 2.201 ( +0.00%) [ +0.14% +0.05% +0.00% / +0.00% +0.73% +0.64%] index_select skip64 : Elapsed 0.022 ms (2.204 ms / 100) 2.200 -> 2.208 ( +0.36%) [ +0.14% +0.00% +0.14% / +0.36% +0.55% +0.55%] index_select skip256 : Elapsed 0.022 ms (2.203 ms / 100) 2.261 -> 2.268 ( +0.31%) [ +0.62% +0.00% +0.00% / +0.31% +0.71% +0.93%] index_select spread : Elapsed 0.023 ms (2.275 ms / 100) 2.249 -> 2.257 ( +0.36%) [ +0.13% +0.00% +0.04% / +0.36% +0.44% +0.36%] index_select strided 3 : Elapsed 0.023 ms (2.252 ms / 100) 2.268 -> 2.265 ( -0.13%) [ +0.31% +0.00% +0.13% / -0.13% +0.53% +0.31%] index_select strided 5 : Elapsed 0.023 ms (2.275 ms / 100) 2.251 -> 2.256 ( +0.22%) [ +0.00% +0.04% +0.18% / +0.22% +0.58% +0.49%] index_select strided 7 : Elapsed 0.023 ms (2.251 ms / 100) 2.240 -> 2.240 ( +0.00%) [ +0.00% +0.09% +0.13% / +0.00% +0.67% +0.45%] index_select strided 8 : Elapsed 0.022 ms (2.240 ms / 100) 2.259 -> 2.268 ( +0.40%) [ +0.09% +0.00% +0.31% / +0.40% +0.80% +0.71%] index_select strided 16 : Elapsed 0.023 ms (2.261 ms / 100) 2.223 -> 2.230 ( +0.31%) [ +0.13% +0.13% +0.00% / +0.31% +0.63% +0.76%] index_select random : Elapsed 0.022 ms (2.226 ms / 100) 2.222 -> 2.226 ( +0.18%) [ +0.00% +0.23% +0.18% / +0.18% +0.68% +0.77%] index_select random_sorted : Elapsed 0.022 ms (2.222 ms / 100) 2.221 -> 2.231 ( +0.45%) [ +0.09% +0.00% +0.50% / +0.45% +0.99% +0.63%] index_select perm : Elapsed 0.022 ms (2.223 ms / 100) 2.223 -> 2.228 ( +0.22%) [ +0.00% +0.31% +0.27% / +0.22% +0.54% +0.67%] index_select perm_sorted : Elapsed 0.022 ms (2.223 ms / 100) B = [5, 4, 40, 16] (stride (2560, 1, 64, 4)) A = [5, 20, 40, 16] (stride (320, 16, 1600, 1)) dim = 1 1.902 -> 1.904 ( +0.11%) [ +0.00% +0.00% +0.21% / +0.11% +0.42% +0.84%] index_select const : Elapsed 0.019 ms (1.902 ms / 100) 1.956 -> 1.952 ( -0.20%) [ +0.05% +0.05% +0.00% / +0.00% -0.20% +0.05%] index_select wrap : Elapsed 0.020 ms (1.957 ms / 100) 1.956 -> 1.955 ( -0.05%) [ +0.00% +0.05% +0.05% / +0.00% -0.05% +0.05%] index_select linear : Elapsed 0.020 ms (1.956 ms / 100) 1.958 -> 1.961 ( +0.15%) [ +0.10% +0.00% +0.00% / +0.15% +0.15% +0.20%] index_select reverse : Elapsed 0.020 ms (1.960 ms / 100) 1.903 -> 1.903 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.21% +0.00%] index_select skip64 : Elapsed 0.019 ms (1.903 ms / 100) 1.901 -> 1.903 ( +0.11%) [ +0.16% +0.11% +0.00% / +0.11% +0.11% +0.11%] index_select skip256 : Elapsed 0.019 ms (1.904 ms / 100) 1.956 -> 1.954 ( -0.10%) [ +0.00% +0.10% +0.41% / -0.10% +0.31% +0.56%] index_select spread : Elapsed 0.020 ms (1.956 ms / 100) 1.957 -> 1.959 ( +0.10%) [ +0.20% +0.00% +0.05% / +0.41% +0.26% +0.10%] index_select strided 3 : Elapsed 0.020 ms (1.961 ms / 100) 1.954 -> 1.958 ( +0.20%) [ +0.26% +0.15% +0.00% / +0.31% +0.20% +0.46%] index_select strided 5 : Elapsed 0.020 ms (1.959 ms / 100) 1.958 -> 1.954 ( -0.20%) [ +0.10% +0.46% +0.00% / -0.20% +0.10% +0.36%] index_select strided 7 : Elapsed 0.020 ms (1.960 ms / 100) 1.956 -> 1.954 ( -0.10%) [ +0.05% +0.00% +0.20% / -0.10% +0.26% +0.61%] index_select strided 8 : Elapsed 0.020 ms (1.957 ms / 100) 1.952 -> 1.956 ( +0.20%) [ +0.61% +0.00% +0.46% / +0.20% +0.61% +0.51%] index_select strided 16 : Elapsed 0.020 ms (1.964 ms / 100) 1.955 -> 1.961 ( +0.31%) [ +0.10% +0.15% +0.00% / +0.31% +0.36% +0.51%] index_select random : Elapsed 0.020 ms (1.957 ms / 100) 1.961 -> 1.959 ( -0.10%) [ +0.00% +0.10% +0.36% / +0.00% +0.00% -0.10%] index_select random_sorted : Elapsed 0.020 ms (1.961 ms / 100) 1.955 -> 1.955 ( +0.00%) [ +0.31% +0.00% +0.00% / +0.00% +0.26% +0.36%] index_select perm : Elapsed 0.020 ms (1.961 ms / 100) 1.956 -> 1.953 ( -0.15%) [ +0.20% +0.15% +0.00% / -0.15% +0.10% +0.31%] index_select perm_sorted : Elapsed 0.020 ms (1.960 ms / 100) B = [5, 4, 40, 16] (stride (16, 3200, 80, 1)) A = [5, 20, 40, 16] (stride (800, 40, 1, 4000)) dim = 1 2.105 -> 2.109 ( +0.19%) [ +0.00% +0.24% +0.24% / +0.29% +0.24% +0.19%] index_select const : Elapsed 0.021 ms (2.105 ms / 100) 2.097 -> 2.102 ( +0.24%) [ +0.10% +0.14% +0.00% / +0.24% +0.57% +0.52%] index_select wrap : Elapsed 0.021 ms (2.099 ms / 100) 2.105 -> 2.110 ( +0.24%) [ +0.10% +0.10% +0.00% / +0.29% +0.43% +0.24%] index_select linear : Elapsed 0.021 ms (2.107 ms / 100) 2.104 -> 2.105 ( +0.05%) [ +0.29% +0.19% +0.00% / +0.05% +0.05% +0.19%] index_select reverse : Elapsed 0.021 ms (2.110 ms / 100) 2.115 -> 2.116 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.14% +0.09%] index_select skip64 : Elapsed 0.021 ms (2.115 ms / 100) 2.114 -> 2.114 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.00% +0.28% +0.19%] index_select skip256 : Elapsed 0.021 ms (2.116 ms / 100) 2.122 -> 2.120 ( -0.09%) [ +0.05% +0.00% +0.14% / -0.09% +0.09% +0.38%] index_select spread : Elapsed 0.021 ms (2.123 ms / 100) 2.118 -> 2.115 ( -0.14%) [ +0.05% +0.00% +0.33% / -0.14% +0.14% +0.19%] index_select strided 3 : Elapsed 0.021 ms (2.119 ms / 100) 2.104 -> 2.105 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.52% +0.14%] index_select strided 5 : Elapsed 0.021 ms (2.105 ms / 100) 2.111 -> 2.115 ( +0.19%) [ +0.14% +0.14% +0.00% / +0.19% +0.19% +0.24%] index_select strided 7 : Elapsed 0.021 ms (2.114 ms / 100) 2.125 -> 2.125 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.24% +0.05%] index_select strided 8 : Elapsed 0.021 ms (2.126 ms / 100) 2.104 -> 2.107 ( +0.14%) [ +0.19% +0.24% +0.00% / +0.14% +0.52% +0.38%] index_select strided 16 : Elapsed 0.021 ms (2.108 ms / 100) 2.123 -> 2.126 ( +0.14%) [ +0.14% +0.05% +0.00% / +0.19% +0.38% +0.14%] index_select random : Elapsed 0.021 ms (2.126 ms / 100) 2.129 -> 2.133 ( +0.19%) [ +0.19% +0.00% +0.14% / +0.19% +0.38% +0.47%] index_select random_sorted : Elapsed 0.021 ms (2.133 ms / 100) 2.133 -> 2.132 ( -0.05%) [ +0.09% +0.00% +0.00% / -0.05% +0.33% +0.05%] index_select perm : Elapsed 0.021 ms (2.135 ms / 100) 2.124 -> 2.127 ( +0.14%) [ +0.00% +0.24% +0.00% / +0.14% +0.38% +0.42%] index_select perm_sorted : Elapsed 0.021 ms (2.124 ms / 100) B = [5, 4, 40, 16] (stride (40, 3200, 1, 200)) A = [5, 20, 40, 16] (stride (12800, 640, 1, 40)) dim = 1 2.090 -> 2.089 ( -0.05%) [ +0.19% +0.00% +0.14% / -0.05% +1.00% +0.91%] index_select const : Elapsed 0.021 ms (2.094 ms / 100) 2.096 -> 2.095 ( -0.05%) [ +0.19% +0.05% +0.00% / -0.05% +0.38% +0.62%] index_select wrap : Elapsed 0.021 ms (2.100 ms / 100) 2.105 -> 2.108 ( +0.14%) [ +0.33% +0.05% +0.00% / +0.14% +0.62% +0.76%] index_select linear : Elapsed 0.021 ms (2.112 ms / 100) 2.113 -> 2.117 ( +0.19%) [ +0.24% +0.14% +0.00% / +0.19% +0.90% +0.85%] index_select reverse : Elapsed 0.021 ms (2.118 ms / 100) 2.093 -> 2.093 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.76% +0.72%] index_select skip64 : Elapsed 0.021 ms (2.093 ms / 100) 2.091 -> 2.091 ( +0.00%) [ +0.00% +0.05% +0.14% / +0.00% +0.24% +0.43%] index_select skip256 : Elapsed 0.021 ms (2.091 ms / 100) 2.097 -> 2.098 ( +0.05%) [ +0.19% +0.00% +0.14% / +0.05% +0.38% +0.52%] index_select spread : Elapsed 0.021 ms (2.101 ms / 100) 2.118 -> 2.121 ( +0.14%) [ +0.05% +0.00% +0.28% / +0.14% +0.24% +0.42%] index_select strided 3 : Elapsed 0.021 ms (2.119 ms / 100) 2.120 -> 2.117 ( -0.14%) [ +0.00% +0.19% +0.00% / -0.14% +0.09% +0.14%] index_select strided 5 : Elapsed 0.021 ms (2.120 ms / 100) 2.111 -> 2.114 ( +0.14%) [ +0.19% +0.24% +0.00% / +0.14% +0.38% +0.28%] index_select strided 7 : Elapsed 0.021 ms (2.115 ms / 100) 2.124 -> 2.128 ( +0.19%) [ +0.05% +0.05% +0.00% / +0.19% +0.24% +0.24%] index_select strided 8 : Elapsed 0.021 ms (2.125 ms / 100) 2.105 -> 2.109 ( +0.19%) [ +0.29% +0.00% +0.14% / +0.19% +0.38% +0.24%] index_select strided 16 : Elapsed 0.021 ms (2.111 ms / 100) 2.107 -> 2.108 ( +0.05%) [ +0.33% +0.05% +0.00% / +0.05% +0.43% +0.24%] index_select random : Elapsed 0.021 ms (2.114 ms / 100) 2.127 -> 2.126 ( -0.05%) [ +0.05% +0.00% +0.09% / -0.05% +0.56% +0.33%] index_select random_sorted : Elapsed 0.021 ms (2.128 ms / 100) 2.120 -> 2.121 ( +0.05%) [ +0.09% +0.00% +0.09% / +0.05% +0.38% +0.52%] index_select perm : Elapsed 0.021 ms (2.122 ms / 100) 2.112 -> 2.113 ( +0.05%) [ +0.19% +0.00% +0.14% / +0.05% +0.57% +0.71%] index_select perm_sorted : Elapsed 0.021 ms (2.116 ms / 100) B = [5, 4, 40, 16] (stride (40, 3200, 1, 200)) A = [5, 20, 40, 16] (stride (800, 40, 1, 4000)) dim = 1 2.118 -> 2.122 ( +0.19%) [ +0.00% +0.14% +0.09% / +0.19% +0.80% +0.80%] index_select const : Elapsed 0.021 ms (2.118 ms / 100) 2.131 -> 2.133 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.99% +0.70%] index_select wrap : Elapsed 0.021 ms (2.131 ms / 100) 2.122 -> 2.128 ( +0.28%) [ +0.00% +0.05% +0.05% / +0.28% +0.85% +0.80%] index_select linear : Elapsed 0.021 ms (2.122 ms / 100) 2.132 -> 2.129 ( -0.14%) [ +0.09% +0.00% +0.05% / -0.14% +0.61% +0.38%] index_select reverse : Elapsed 0.021 ms (2.134 ms / 100) 2.115 -> 2.114 ( -0.05%) [ +0.00% +0.09% +0.00% / -0.05% +0.47% +0.43%] index_select skip64 : Elapsed 0.021 ms (2.115 ms / 100) 2.117 -> 2.117 ( +0.00%) [ +0.00% +0.05% +0.09% / +0.00% +0.85% +0.66%] index_select skip256 : Elapsed 0.021 ms (2.117 ms / 100) 2.143 -> 2.139 ( -0.19%) [ +0.05% +0.09% +0.00% / -0.19% +0.37% +0.37%] index_select spread : Elapsed 0.021 ms (2.144 ms / 100) 2.140 -> 2.143 ( +0.14%) [ +0.14% +0.09% +0.00% / +0.14% +0.37% +0.28%] index_select strided 3 : Elapsed 0.021 ms (2.143 ms / 100) 2.135 -> 2.134 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.37% +0.14%] index_select strided 5 : Elapsed 0.021 ms (2.135 ms / 100) 2.137 -> 2.139 ( +0.09%) [ +0.14% +0.14% +0.00% / +0.09% +0.61% +0.61%] index_select strided 7 : Elapsed 0.021 ms (2.140 ms / 100) 2.138 -> 2.141 ( +0.14%) [ +0.14% +0.05% +0.00% / +0.14% +0.75% +0.89%] index_select strided 8 : Elapsed 0.021 ms (2.141 ms / 100) 2.132 -> 2.136 ( +0.19%) [ +0.14% +0.23% +0.00% / +0.19% +0.80% +0.89%] index_select strided 16 : Elapsed 0.021 ms (2.135 ms / 100) 2.121 -> 2.123 ( +0.09%) [ +0.00% +0.05% +0.09% / +0.09% +0.71% +0.57%] index_select random : Elapsed 0.021 ms (2.121 ms / 100) 2.130 -> 2.133 ( +0.14%) [ +0.14% +0.00% +0.19% / +0.14% +0.66% +0.66%] index_select random_sorted : Elapsed 0.021 ms (2.133 ms / 100) 2.145 -> 2.144 ( -0.05%) [ +0.00% +0.05% +0.09% / -0.05% +0.47% +0.47%] index_select perm : Elapsed 0.021 ms (2.145 ms / 100) 2.143 -> 2.146 ( +0.14%) [ +0.28% +0.00% +0.23% / +0.14% +0.61% +0.42%] index_select perm_sorted : Elapsed 0.021 ms (2.149 ms / 100) B = [5, 4, 40, 16] (stride (160, 40, 1, 800)) A = [5, 20, 40, 16] (stride (1, 80, 1600, 5)) dim = 1 2.035 -> 2.040 ( +0.25%) [ +0.25% +0.00% +0.00% / +0.25% +0.44% +0.59%] index_select const : Elapsed 0.020 ms (2.040 ms / 100) 2.026 -> 2.030 ( +0.20%) [ +0.10% +0.00% +0.10% / +0.20% +0.44% +0.35%] index_select wrap : Elapsed 0.020 ms (2.028 ms / 100) 2.030 -> 2.032 ( +0.10%) [ +0.10% +0.00% +0.15% / +0.10% +0.30% +0.20%] index_select linear : Elapsed 0.020 ms (2.032 ms / 100) 2.033 -> 2.034 ( +0.05%) [ +0.00% +0.10% +0.00% / +0.05% +0.25% +0.39%] index_select reverse : Elapsed 0.020 ms (2.033 ms / 100) 2.035 -> 2.037 ( +0.10%) [ +0.05% +0.15% +0.00% / +0.10% +0.39% +0.29%] index_select skip64 : Elapsed 0.020 ms (2.036 ms / 100) 2.033 -> 2.035 ( +0.10%) [ +0.00% +0.15% +0.05% / +0.10% +0.20% +0.10%] index_select skip256 : Elapsed 0.020 ms (2.033 ms / 100) 2.029 -> 2.029 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.30% +0.30%] index_select spread : Elapsed 0.020 ms (2.030 ms / 100) 2.028 -> 2.032 ( +0.20%) [ +0.20% +0.00% +0.20% / +0.25% +0.20% +0.39%] index_select strided 3 : Elapsed 0.020 ms (2.032 ms / 100) 2.032 -> 2.032 ( +0.00%) [ +0.20% +0.00% +0.00% / +0.00% +0.05% +0.30%] index_select strided 5 : Elapsed 0.020 ms (2.036 ms / 100) 2.034 -> 2.038 ( +0.20%) [ +0.05% +0.00% +0.05% / +0.20% +0.39% +0.29%] index_select strided 7 : Elapsed 0.020 ms (2.035 ms / 100) 2.030 -> 2.030 ( +0.00%) [ +0.00% +0.20% +0.05% / +0.00% +0.30% +0.30%] index_select strided 8 : Elapsed 0.020 ms (2.030 ms / 100) 2.029 -> 2.035 ( +0.30%) [ +0.05% +0.00% +0.20% / +0.30% +0.54% +0.30%] index_select strided 16 : Elapsed 0.020 ms (2.030 ms / 100) 2.027 -> 2.032 ( +0.25%) [ +0.25% +0.00% +0.20% / +0.25% +0.44% +0.44%] index_select random : Elapsed 0.020 ms (2.032 ms / 100) 2.032 -> 2.032 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.34% +0.54%] index_select random_sorted : Elapsed 0.020 ms (2.032 ms / 100) 2.031 -> 2.032 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.69% +0.54%] index_select perm : Elapsed 0.020 ms (2.032 ms / 100) 2.028 -> 2.030 ( +0.10%) [ +0.05% +0.00% +0.05% / +0.10% +0.30% +0.35%] index_select perm_sorted : Elapsed 0.020 ms (2.029 ms / 100) B = [5, 4, 40, 16] (stride (160, 1, 4, 800)) A = [5, 20, 40, 16] (stride (20, 1, 1600, 100)) dim = 1 2.143 -> 2.140 ( -0.14%) [ +0.00% +0.05% +0.14% / +0.28% -0.14% -0.09%] index_select const : Elapsed 0.021 ms (2.143 ms / 100) 2.142 -> 2.139 ( -0.14%) [ +0.14% +0.19% +0.00% / +0.14% -0.14% -0.05%] index_select wrap : Elapsed 0.021 ms (2.145 ms / 100) 2.140 -> 2.137 ( -0.14%) [ +0.14% +0.00% +0.05% / +0.09% +0.05% -0.14%] index_select linear : Elapsed 0.021 ms (2.143 ms / 100) 2.140 -> 2.139 ( -0.05%) [ +0.19% +0.00% +0.23% / +0.33% +0.23% -0.05%] index_select reverse : Elapsed 0.021 ms (2.144 ms / 100) 2.139 -> 2.135 ( -0.19%) [ +0.00% +0.09% +0.00% / +0.23% +0.05% -0.19%] index_select skip64 : Elapsed 0.021 ms (2.139 ms / 100) 2.139 -> 2.137 ( -0.09%) [ +0.00% +0.05% +0.00% / +0.09% +0.05% -0.09%] index_select skip256 : Elapsed 0.021 ms (2.139 ms / 100) 2.201 -> 2.201 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.14% +0.14% +0.00%] index_select spread : Elapsed 0.022 ms (2.204 ms / 100) 2.177 -> 2.175 ( -0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.09% -0.09%] index_select strided 3 : Elapsed 0.022 ms (2.179 ms / 100) 2.198 -> 2.201 ( +0.14%) [ +0.23% +0.32% +0.00% / +0.32% +0.14% +0.14%] index_select strided 5 : Elapsed 0.022 ms (2.203 ms / 100) 2.184 -> 2.175 ( -0.41%) [ +0.32% +0.14% +0.00% / +0.05% -0.23% -0.41%] index_select strided 7 : Elapsed 0.022 ms (2.191 ms / 100) 2.181 -> 2.177 ( -0.18%) [ +0.00% +0.05% +0.09% / +0.09% -0.18% +0.05%] index_select strided 8 : Elapsed 0.022 ms (2.181 ms / 100) 2.203 -> 2.199 ( -0.18%) [ +0.05% +0.09% +0.00% / +0.09% -0.18% +0.05%] index_select strided 16 : Elapsed 0.022 ms (2.204 ms / 100) 2.180 -> 2.183 ( +0.14%) [ +0.09% +0.23% +0.00% / +0.23% +0.14% +0.14%] index_select random : Elapsed 0.022 ms (2.182 ms / 100) 2.180 -> 2.178 ( -0.09%) [ +0.00% +0.18% +0.00% / +0.37% +0.05% -0.09%] index_select random_sorted : Elapsed 0.022 ms (2.180 ms / 100) 2.182 -> 2.179 ( -0.14%) [ +0.05% +0.00% +0.09% / -0.14% +0.05% +0.14%] index_select perm : Elapsed 0.022 ms (2.183 ms / 100) 2.179 -> 2.181 ( +0.09%) [ +0.09% +0.18% +0.00% / +0.23% +0.14% +0.09%] index_select perm_sorted : Elapsed 0.022 ms (2.181 ms / 100) out_shape = [5, 20, 4, 16] in_shape = [5, 20, 40, 16] idx_dim = 2 B = [5, 20, 4, 16] (stride (1280, 1, 20, 80)) A = [5, 20, 40, 16] (stride (1, 5, 1600, 100)) dim = 2 1.321 -> 1.322 ( +0.08%) [ +0.15% +0.00% +0.08% / +0.08% +0.53% +0.53%] index_select const : Elapsed 0.013 ms (1.323 ms / 100) 1.320 -> 1.322 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.61% +0.53%] index_select wrap : Elapsed 0.013 ms (1.322 ms / 100) 1.320 -> 1.322 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.61% +0.61%] index_select linear : Elapsed 0.013 ms (1.321 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.76% +0.76%] index_select reverse : Elapsed 0.013 ms (1.320 ms / 100) 1.320 -> 1.322 ( +0.15%) [ +0.08% +0.00% +0.00% / +0.15% +0.76% +0.68%] index_select skip64 : Elapsed 0.013 ms (1.321 ms / 100) 1.323 -> 1.323 ( +0.00%) [ +0.23% +0.00% +0.08% / +0.00% +0.76% +0.68%] index_select skip256 : Elapsed 0.013 ms (1.326 ms / 100) 1.315 -> 1.318 ( +0.23%) [ +0.38% +0.08% +0.00% / +0.23% +0.61% +0.53%] index_select spread : Elapsed 0.013 ms (1.320 ms / 100) 1.310 -> 1.314 ( +0.31%) [ +0.53% +0.31% +0.00% / +0.31% +0.84% +0.76%] index_select strided 3 : Elapsed 0.013 ms (1.317 ms / 100) 1.306 -> 1.307 ( +0.08%) [ +0.23% +0.38% +0.00% / +0.08% +0.92% +1.07%] index_select strided 5 : Elapsed 0.013 ms (1.309 ms / 100) 1.320 -> 1.319 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.45% +0.45%] index_select strided 7 : Elapsed 0.013 ms (1.320 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.53% +0.53%] index_select strided 8 : Elapsed 0.013 ms (1.323 ms / 100) 1.322 -> 1.324 ( +0.15%) [ +0.23% +0.00% +0.00% / +0.15% +0.53% +0.45%] index_select strided 16 : Elapsed 0.013 ms (1.325 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.23% +0.15%] index_select random : Elapsed 0.013 ms (1.321 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.23% +0.15% +0.00% / +0.08% +0.53% +0.30%] index_select random_sorted : Elapsed 0.013 ms (1.321 ms / 100) 1.320 -> 1.320 ( +0.00%) [ +0.23% +0.00% +0.00% / +0.00% +0.45% +0.45%] index_select perm : Elapsed 0.013 ms (1.323 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.38% +0.38%] index_select perm_sorted : Elapsed 0.013 ms (1.321 ms / 100) B = [5, 20, 4, 16] (stride (1280, 1, 20, 80)) A = [5, 20, 40, 16] (stride (800, 1, 20, 4000)) dim = 2 1.313 -> 1.316 ( +0.23%) [ +0.08% +0.00% +0.30% / +0.23% +0.38% +0.53%] index_select const : Elapsed 0.013 ms (1.314 ms / 100) 1.315 -> 1.318 ( +0.23%) [ +0.15% +0.08% +0.00% / +0.23% +0.30% +0.68%] index_select wrap : Elapsed 0.013 ms (1.317 ms / 100) 1.307 -> 1.306 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.38% +0.84%] index_select linear : Elapsed 0.013 ms (1.307 ms / 100) 1.317 -> 1.314 ( -0.23%) [ +0.00% +0.08% +0.00% / -0.23% +0.23% +0.15%] index_select reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.315 -> 1.318 ( +0.23%) [ +0.00% +0.08% +0.15% / +0.23% +0.30% +0.30%] index_select skip64 : Elapsed 0.013 ms (1.315 ms / 100) 1.303 -> 1.304 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.69% +1.15%] index_select skip256 : Elapsed 0.013 ms (1.304 ms / 100) 1.297 -> 1.296 ( -0.08%) [ +0.54% +0.00% +0.39% / -0.08% +0.39% +0.46%] index_select spread : Elapsed 0.013 ms (1.304 ms / 100) 1.309 -> 1.310 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.76% +0.69%] index_select strided 3 : Elapsed 0.013 ms (1.310 ms / 100) 1.313 -> 1.314 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.46% +0.53%] index_select strided 5 : Elapsed 0.013 ms (1.313 ms / 100) 1.289 -> 1.290 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.54% +0.62%] index_select strided 7 : Elapsed 0.013 ms (1.290 ms / 100) 1.281 -> 1.283 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.47% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.282 ms / 100) 1.307 -> 1.307 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.54% +0.54%] index_select strided 16 : Elapsed 0.013 ms (1.308 ms / 100) 1.301 -> 1.305 ( +0.31%) [ +0.08% +0.00% +0.15% / +0.31% +0.85% +0.54%] index_select random : Elapsed 0.013 ms (1.302 ms / 100) 1.304 -> 1.306 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.46% +0.46%] index_select random_sorted : Elapsed 0.013 ms (1.305 ms / 100) 1.301 -> 1.302 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.61% +0.61%] index_select perm : Elapsed 0.013 ms (1.302 ms / 100) 1.302 -> 1.302 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.61% +0.54%] index_select perm_sorted : Elapsed 0.013 ms (1.304 ms / 100) B = [5, 20, 4, 16] (stride (16, 320, 80, 1)) A = [5, 20, 40, 16] (stride (12800, 40, 1, 800)) dim = 2 1.367 -> 1.366 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.44% +0.37%] index_select const : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.66% +0.66%] index_select wrap : Elapsed 0.014 ms (1.367 ms / 100) 1.365 -> 1.365 ( +0.00%) [ +0.00% +0.15% +0.07% / +0.00% +0.44% +0.37%] index_select linear : Elapsed 0.014 ms (1.365 ms / 100) 1.365 -> 1.365 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +0.59% +0.44%] index_select reverse : Elapsed 0.014 ms (1.368 ms / 100) 1.365 -> 1.367 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +0.51% +0.44%] index_select skip64 : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.29% +0.29%] index_select skip256 : Elapsed 0.014 ms (1.367 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.51% +0.44%] index_select spread : Elapsed 0.014 ms (1.370 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.37%] index_select strided 3 : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.44% +0.51%] index_select strided 5 : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.00% +0.15% +0.00% / +0.07% +0.44% +0.44%] index_select strided 7 : Elapsed 0.014 ms (1.367 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.51% +0.59%] index_select strided 8 : Elapsed 0.014 ms (1.369 ms / 100) 1.370 -> 1.370 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.22% +0.36%] index_select strided 16 : Elapsed 0.014 ms (1.371 ms / 100) 1.368 -> 1.367 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.37% +0.29%] index_select random : Elapsed 0.014 ms (1.368 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.51% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.369 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.44% +0.44%] index_select perm : Elapsed 0.014 ms (1.370 ms / 100) 1.365 -> 1.365 ( +0.00%) [ +0.22% +0.00% +0.07% / +0.00% +0.66% +0.59%] index_select perm_sorted : Elapsed 0.014 ms (1.368 ms / 100) B = [5, 20, 4, 16] (stride (16, 320, 80, 1)) A = [5, 20, 40, 16] (stride (1, 3200, 80, 5)) dim = 2 1.275 -> 1.274 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.47% +0.39%] index_select const : Elapsed 0.013 ms (1.275 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.39%] index_select wrap : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.31% +0.31%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.31% +0.31%] index_select reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.39%] index_select skip64 : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.63% +0.00% +0.00% / +0.16% +0.55% +0.55%] index_select spread : Elapsed 0.013 ms (1.282 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.71% +0.71%] index_select strided 5 : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.86% +0.79%] index_select strided 7 : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.71% +0.79%] index_select strided 8 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.71% +0.71%] index_select strided 16 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.273 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.71% +0.71%] index_select random : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.79% +0.71%] index_select random_sorted : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.79% +0.79%] index_select perm : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.71% +0.71%] index_select perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) B = [5, 20, 4, 16] (stride (1, 320, 80, 5)) A = [5, 20, 40, 16] (stride (12800, 640, 1, 40)) dim = 2 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.47%] index_select const : Elapsed 0.013 ms (1.276 ms / 100) 1.277 -> 1.280 ( +0.23%) [ +0.16% +0.16% +0.00% / +0.23% +0.63% +0.47%] index_select wrap : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_select reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.55% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.278 ms / 100) 1.269 -> 1.270 ( +0.08%) [ +0.16% +0.00% +0.16% / +0.08% +0.55% +0.39%] index_select spread : Elapsed 0.013 ms (1.271 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.47% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.47% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.280 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.31% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.277 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.275 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.39% +0.31%] index_select random : Elapsed 0.013 ms (1.277 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.23% +0.23%] index_select random_sorted : Elapsed 0.013 ms (1.280 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.39% +0.31%] index_select perm : Elapsed 0.013 ms (1.284 ms / 100) 1.280 -> 1.279 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.39% +0.31%] index_select perm_sorted : Elapsed 0.013 ms (1.281 ms / 100) B = [5, 20, 4, 16] (stride (4, 320, 1, 20)) A = [5, 20, 40, 16] (stride (12800, 640, 16, 1)) dim = 2 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.34% +0.34%] index_select const : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.50% +0.42%] index_select wrap : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.42% +0.34%] index_select linear : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.42% +0.42%] index_select reverse : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.08% +0.00% +0.00% / +0.17% +0.42% +0.42%] index_select skip64 : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.59% +0.59%] index_select skip256 : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.67% +0.59%] index_select spread : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.42% +0.42%] index_select strided 3 : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_select strided 5 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.192 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.42% +0.34%] index_select strided 7 : Elapsed 0.012 ms (1.194 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.25%] index_select strided 8 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.25% +0.00% +0.00% / +0.08% +0.25% +0.34%] index_select strided 16 : Elapsed 0.012 ms (1.196 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_select random : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.34% +0.34%] index_select random_sorted : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.34% +0.50%] index_select perm : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_select perm_sorted : Elapsed 0.012 ms (1.193 ms / 100) B = [5, 20, 4, 16] (stride (4, 320, 1, 20)) A = [5, 20, 40, 16] (stride (12800, 640, 1, 40)) dim = 2 1.277 -> 1.279 ( +0.16%) [ +0.23% +0.00% +0.08% / +0.16% +0.70% +0.70%] index_select const : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.55% +0.63%] index_select wrap : Elapsed 0.013 ms (1.279 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.63% +0.47%] index_select linear : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.70% +0.78%] index_select reverse : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.23% +0.16% +0.00% / +0.08% +0.86% +0.78%] index_select skip64 : Elapsed 0.013 ms (1.280 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.31% +0.00% +0.00% / +0.08% +0.55% +0.71%] index_select skip256 : Elapsed 0.013 ms (1.278 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.78% +0.63%] index_select spread : Elapsed 0.013 ms (1.276 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.70% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.275 -> 1.279 ( +0.31%) [ +0.16% +0.24% +0.00% / +0.31% +0.71% +0.63%] index_select strided 5 : Elapsed 0.013 ms (1.277 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.63% +0.63%] index_select strided 7 : Elapsed 0.013 ms (1.280 ms / 100) 1.284 -> 1.286 ( +0.16%) [ +0.23% +0.16% +0.00% / +0.16% +0.47% +0.39%] index_select strided 8 : Elapsed 0.013 ms (1.287 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.39% +0.23%] index_select strided 16 : Elapsed 0.013 ms (1.279 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.08% +0.78% +0.55%] index_select random : Elapsed 0.013 ms (1.278 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.31% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.284 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.39% +0.31%] index_select perm : Elapsed 0.013 ms (1.283 ms / 100) 1.280 -> 1.283 ( +0.23%) [ +0.08% +0.00% +0.00% / +0.23% +0.63% +0.31%] index_select perm_sorted : Elapsed 0.013 ms (1.281 ms / 100) B = [5, 20, 4, 16] (stride (320, 1, 1600, 20)) A = [5, 20, 40, 16] (stride (1, 200, 5, 4000)) dim = 2 1.367 -> 1.368 ( +0.07%) [ +0.07% +0.22% +0.00% / +0.07% +0.44% +0.37%] index_select const : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.59% +0.44%] index_select wrap : Elapsed 0.014 ms (1.369 ms / 100) 1.370 -> 1.370 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.51% +1.17%] index_select linear : Elapsed 0.014 ms (1.371 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.59% +0.66%] index_select reverse : Elapsed 0.014 ms (1.368 ms / 100) 1.369 -> 1.372 ( +0.22%) [ +0.00% +0.22% +0.22% / +0.22% +0.66% +0.37%] index_select skip64 : Elapsed 0.014 ms (1.369 ms / 100) 1.368 -> 1.372 ( +0.29%) [ +0.29% +0.22% +0.00% / +0.29% +0.80% +0.66%] index_select skip256 : Elapsed 0.014 ms (1.372 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.29% +0.07% +0.00% / +0.07% +0.59% +0.66%] index_select spread : Elapsed 0.014 ms (1.371 ms / 100) 1.369 -> 1.373 ( +0.29%) [ +0.22% +0.07% +0.00% / +0.29% +0.73% +0.66%] index_select strided 3 : Elapsed 0.014 ms (1.372 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.58% +0.51%] index_select strided 5 : Elapsed 0.014 ms (1.368 ms / 100) 1.369 -> 1.375 ( +0.44%) [ +0.22% +0.22% +0.00% / +0.44% +0.44% +0.51%] index_select strided 7 : Elapsed 0.014 ms (1.372 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.22% +0.00% +0.15% / +0.07% +0.44% +0.58%] index_select strided 8 : Elapsed 0.014 ms (1.374 ms / 100) 1.372 -> 1.375 ( +0.22%) [ +0.07% +0.00% +0.00% / +0.22% +0.58% +0.66%] index_select strided 16 : Elapsed 0.014 ms (1.373 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.44% +0.37%] index_select random : Elapsed 0.014 ms (1.371 ms / 100) 1.371 -> 1.371 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.29% +0.29%] index_select random_sorted : Elapsed 0.014 ms (1.371 ms / 100) 1.371 -> 1.373 ( +0.15%) [ +0.22% +0.07% +0.00% / +0.15% +0.51% +0.66%] index_select perm : Elapsed 0.014 ms (1.374 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.44% +0.37%] index_select perm_sorted : Elapsed 0.014 ms (1.370 ms / 100) B = [5, 20, 4, 16] (stride (16, 80, 1600, 1)) A = [5, 20, 40, 16] (stride (12800, 1, 320, 20)) dim = 2 1.274 -> 1.276 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.78% +0.78%] index_select const : Elapsed 0.013 ms (1.275 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.47% +0.47%] index_select wrap : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.47% +0.55%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_select reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select skip256 : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.31% +0.31%] index_select spread : Elapsed 0.013 ms (1.278 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.16% +0.31%] index_select strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.23% +0.23%] index_select strided 5 : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.86%] index_select strided 7 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.55% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.39% +0.31%] index_select random : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select perm : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.71% +0.63%] index_select perm_sorted : Elapsed 0.013 ms (1.276 ms / 100) B = [5, 20, 4, 16] (stride (20, 1, 1600, 100)) dim = 2 fill_cnt = 40 1.600 -> 1.596 ( -0.25%) [ +0.06% +0.00% +0.19% / -0.25% +0.19% +0.13%] index_fill_ const : Elapsed 0.016 ms (1.601 ms / 100) 1.574 -> 1.567 ( -0.44%) [ +0.00% +0.25% +0.44% / -0.25% +0.06% -0.44%] index_fill_ linear : Elapsed 0.016 ms (1.574 ms / 100) 1.568 -> 1.555 ( -0.83%) [ +0.32% +0.19% +0.00% / -0.51% -0.77% -0.83%] index_fill_ reverse : Elapsed 0.016 ms (1.573 ms / 100) 1.595 -> 1.592 ( -0.19%) [ +0.00% +0.25% +0.25% / -0.19% +0.69% +0.82%] index_fill_ skip64 : Elapsed 0.016 ms (1.595 ms / 100) 1.597 -> 1.592 ( -0.31%) [ +0.13% +0.00% +0.00% / -0.31% +0.50% +0.63%] index_fill_ skip256 : Elapsed 0.016 ms (1.599 ms / 100) 1.263 -> 1.258 ( -0.40%) [ +0.00% +0.16% +0.55% / -0.40% +0.95% +1.43%] index_fill_ spread : Elapsed 0.013 ms (1.263 ms / 100) 1.208 -> 1.191 ( -1.41%) [ +0.08% +0.00% +0.50% / -1.41% +0.17% -0.33%] index_fill_ strided 3 : Elapsed 0.012 ms (1.209 ms / 100) 1.296 -> 1.288 ( -0.62%) [ +0.54% +0.00% +0.46% / -0.62% +0.31% +0.08%] index_fill_ random : Elapsed 0.013 ms (1.303 ms / 100) 1.311 -> 1.302 ( -0.69%) [ +0.38% +0.00% +0.76% / -0.69% +0.84% +0.76%] index_fill_ random_sorted : Elapsed 0.013 ms (1.316 ms / 100) B = [5, 20, 4, 16] (stride (20, 1, 1600, 100)) A = [5, 20, 40, 16] (stride (20, 1, 1600, 100)) dim = 2 0.595 -> 0.595 ( +0.00%) [ +0.17% +0.67% +0.00% / +0.17% +0.00% +0.00%] index_select const : Elapsed 0.006 ms (0.596 ms / 100) 0.584 -> 0.585 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.51% +4.79%] index_select wrap : Elapsed 0.006 ms (0.585 ms / 100) 0.585 -> 0.585 ( +0.00%) [ +2.05% +0.00% +0.00% / +0.00% +0.34% +0.34%] index_select linear : Elapsed 0.006 ms (0.597 ms / 100) 0.588 -> 0.586 ( -0.34%) [ +1.36% +0.00% +0.00% / -0.17% -0.17% -0.34%] index_select reverse : Elapsed 0.006 ms (0.596 ms / 100) 0.592 -> 0.593 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +1.01% +0.51%] index_select skip64 : Elapsed 0.006 ms (0.593 ms / 100) 0.594 -> 0.596 ( +0.34%) [ +0.00% +0.17% +0.00% / +0.34% +1.35% +0.51%] index_select skip256 : Elapsed 0.006 ms (0.594 ms / 100) 0.584 -> 0.586 ( +0.34%) [ +0.34% +0.17% +0.00% / +0.34% +0.86% +0.86%] index_select spread : Elapsed 0.006 ms (0.586 ms / 100) 0.591 -> 0.595 ( +0.68%) [ +0.00% +0.17% +0.17% / +5.92% +1.02% +0.68%] index_select strided 3 : Elapsed 0.006 ms (0.591 ms / 100) 0.584 -> 0.585 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.34% +0.51% +0.17%] index_select strided 5 : Elapsed 0.006 ms (0.585 ms / 100) 0.592 -> 0.591 ( -0.17%) [ +0.51% +0.17% +0.00% / +0.34% +0.17% -0.17%] index_select strided 7 : Elapsed 0.006 ms (0.595 ms / 100) 0.589 -> 0.591 ( +0.34%) [ +0.00% +0.17% +0.00% / +0.34% +1.19% +1.02%] index_select strided 8 : Elapsed 0.006 ms (0.589 ms / 100) 0.593 -> 0.594 ( +0.17%) [ +0.17% +0.34% +0.00% / +0.17% +0.51% +0.34%] index_select strided 16 : Elapsed 0.006 ms (0.594 ms / 100) 0.586 -> 0.586 ( +0.00%) [ +0.17% +0.68% +0.00% / +0.34% +0.00% +0.00%] index_select random : Elapsed 0.006 ms (0.587 ms / 100) 0.592 -> 0.591 ( -0.17%) [ +0.00% +0.17% +0.00% / -0.17% +1.18% +0.84%] index_select random_sorted : Elapsed 0.006 ms (0.592 ms / 100) 0.587 -> 0.588 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.51% +0.51%] index_select perm : Elapsed 0.006 ms (0.587 ms / 100) 0.597 -> 0.599 ( +0.34%) [ +0.84% +0.34% +0.00% / +0.34% +0.84% +0.34%] index_select perm_sorted : Elapsed 0.006 ms (0.602 ms / 100) B = [5, 20, 4, 16] (stride (1, 20, 5, 400)) A = [5, 20, 40, 16] (stride (12800, 1, 20, 800)) dim = 2 1.320 -> 1.322 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.76% +0.68%] index_select const : Elapsed 0.013 ms (1.322 ms / 100) 1.302 -> 1.304 ( +0.15%) [ +0.08% +0.15% +0.00% / +0.15% +0.77% +0.77%] index_select wrap : Elapsed 0.013 ms (1.303 ms / 100) 1.297 -> 1.296 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.39%] index_select linear : Elapsed 0.013 ms (1.297 ms / 100) 1.289 -> 1.294 ( +0.39%) [ +0.08% +0.47% +0.00% / +0.39% +0.70% +0.62%] index_select reverse : Elapsed 0.013 ms (1.290 ms / 100) 1.321 -> 1.323 ( +0.15%) [ +0.08% +0.00% +0.00% / +0.15% +0.53% +0.68%] index_select skip64 : Elapsed 0.013 ms (1.322 ms / 100) 1.306 -> 1.308 ( +0.15%) [ +0.08% +0.15% +0.00% / +0.15% +1.07% +1.00%] index_select skip256 : Elapsed 0.013 ms (1.307 ms / 100) 1.308 -> 1.307 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.61% +0.84%] index_select spread : Elapsed 0.013 ms (1.309 ms / 100) 1.305 -> 1.305 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.38% +0.38%] index_select strided 3 : Elapsed 0.013 ms (1.305 ms / 100) 1.283 -> 1.283 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.31% +0.39%] index_select strided 5 : Elapsed 0.013 ms (1.284 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.55% +0.63%] index_select strided 7 : Elapsed 0.013 ms (1.282 ms / 100) 1.303 -> 1.304 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.46% +0.54%] index_select strided 8 : Elapsed 0.013 ms (1.304 ms / 100) 1.309 -> 1.311 ( +0.15%) [ +0.08% +0.00% +0.00% / +0.15% +0.76% +0.84%] index_select strided 16 : Elapsed 0.013 ms (1.310 ms / 100) 1.304 -> 1.307 ( +0.23%) [ +0.15% +0.15% +0.00% / +0.23% +0.61% +0.92%] index_select random : Elapsed 0.013 ms (1.306 ms / 100) 1.303 -> 1.304 ( +0.08%) [ +0.15% +0.15% +0.00% / +0.08% +0.54% +0.69%] index_select random_sorted : Elapsed 0.013 ms (1.305 ms / 100) 1.316 -> 1.317 ( +0.08%) [ +0.00% +0.15% +0.00% / +0.08% +0.38% +0.23%] index_select perm : Elapsed 0.013 ms (1.316 ms / 100) 1.307 -> 1.307 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.84% +0.99%] index_select perm_sorted : Elapsed 0.013 ms (1.308 ms / 100) B = [5, 20, 4, 16] (stride (1, 20, 5, 400)) A = [5, 20, 40, 16] (stride (800, 40, 1, 4000)) dim = 2 1.284 -> 1.285 ( +0.08%) [ +0.47% +0.16% +0.00% / +0.08% +0.55% +0.31%] index_select const : Elapsed 0.013 ms (1.290 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_select wrap : Elapsed 0.013 ms (1.287 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.39% +0.47%] index_select linear : Elapsed 0.013 ms (1.288 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.31% +0.23%] index_select reverse : Elapsed 0.013 ms (1.287 ms / 100) 1.291 -> 1.292 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.39% +0.31%] index_select skip64 : Elapsed 0.013 ms (1.292 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.54%] index_select skip256 : Elapsed 0.013 ms (1.288 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_select spread : Elapsed 0.013 ms (1.281 ms / 100) 1.289 -> 1.289 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.62% +0.62%] index_select strided 3 : Elapsed 0.013 ms (1.289 ms / 100) 1.282 -> 1.284 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.78% +0.70%] index_select strided 5 : Elapsed 0.013 ms (1.284 ms / 100) 1.287 -> 1.285 ( -0.16%) [ +0.08% +0.08% +0.00% / -0.16% +0.47% +0.62%] index_select strided 7 : Elapsed 0.013 ms (1.288 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.63% +0.63%] index_select strided 8 : Elapsed 0.013 ms (1.281 ms / 100) 1.283 -> 1.283 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.70%] index_select strided 16 : Elapsed 0.013 ms (1.283 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.70% +0.78%] index_select random : Elapsed 0.013 ms (1.288 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.70% +0.62%] index_select random_sorted : Elapsed 0.013 ms (1.283 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.70% +0.62%] index_select perm : Elapsed 0.013 ms (1.283 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.54% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.287 ms / 100) B = [5, 20, 4, 16] (stride (20, 1, 100, 400)) A = [5, 20, 40, 16] (stride (320, 1, 1600, 20)) dim = 2 1.198 -> 1.199 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.33% +0.33%] index_select const : Elapsed 0.012 ms (1.199 ms / 100) 1.199 -> 1.199 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.42% +0.42%] index_select wrap : Elapsed 0.012 ms (1.199 ms / 100) 1.198 -> 1.199 ( +0.08%) [ +0.33% +0.00% +0.08% / +0.08% +0.50% +0.42%] index_select linear : Elapsed 0.012 ms (1.202 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.58% +0.50%] index_select reverse : Elapsed 0.012 ms (1.198 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.58% +0.58%] index_select skip64 : Elapsed 0.012 ms (1.199 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.58% +0.58%] index_select skip256 : Elapsed 0.012 ms (1.198 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.58% +0.58%] index_select spread : Elapsed 0.012 ms (1.198 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.42% +0.33%] index_select strided 3 : Elapsed 0.012 ms (1.197 ms / 100) 1.198 -> 1.198 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.42% +0.50%] index_select strided 5 : Elapsed 0.012 ms (1.199 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.42% +0.42%] index_select strided 7 : Elapsed 0.012 ms (1.199 ms / 100) 1.198 -> 1.199 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.33% +0.25%] index_select strided 8 : Elapsed 0.012 ms (1.199 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.42% +0.33%] index_select strided 16 : Elapsed 0.012 ms (1.198 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.33% +0.08% +0.00% / +0.08% +0.50% +0.42%] index_select random : Elapsed 0.012 ms (1.201 ms / 100) 1.199 -> 1.200 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.42% +0.33%] index_select random_sorted : Elapsed 0.012 ms (1.199 ms / 100) 1.200 -> 1.199 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.33% +0.25%] index_select perm : Elapsed 0.012 ms (1.200 ms / 100) 1.198 -> 1.198 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.50% +0.50%] index_select perm_sorted : Elapsed 0.012 ms (1.199 ms / 100) B = [5, 20, 4, 16] (stride (20, 1, 100, 400)) A = [5, 20, 40, 16] (stride (800, 40, 1, 4000)) dim = 2 0.609 -> 0.607 ( -0.33%) [ +0.16% +0.00% +0.00% / +0.33% -0.16% -0.33%] index_select const : Elapsed 0.006 ms (0.610 ms / 100) 0.604 -> 0.604 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.33% +0.33%] index_select wrap : Elapsed 0.006 ms (0.604 ms / 100) 0.603 -> 0.606 ( +0.50%) [ +0.17% +0.00% +0.33% / +0.50% +0.66% +0.50%] index_select linear : Elapsed 0.006 ms (0.604 ms / 100) 0.609 -> 0.611 ( +0.33%) [ +0.49% +0.33% +0.00% / +0.49% +0.66% +0.33%] index_select reverse : Elapsed 0.006 ms (0.612 ms / 100) 0.610 -> 0.612 ( +0.33%) [ +0.00% +0.16% +0.00% / +3.28% +0.49% +0.33%] index_select skip64 : Elapsed 0.006 ms (0.610 ms / 100) 0.602 -> 0.603 ( +0.17%) [ +0.00% +0.33% +0.17% / +0.17% +0.83% +1.00%] index_select skip256 : Elapsed 0.006 ms (0.602 ms / 100) 0.605 -> 0.607 ( +0.33%) [ +0.17% +0.00% +0.17% / +0.33% +0.50% +0.66%] index_select spread : Elapsed 0.006 ms (0.606 ms / 100) 0.603 -> 0.605 ( +0.33%) [ +0.17% +0.33% +0.00% / +0.33% +0.66% +0.66%] index_select strided 3 : Elapsed 0.006 ms (0.604 ms / 100) 0.605 -> 0.606 ( +0.17%) [ +0.00% +0.33% +0.17% / +0.17% +0.50% +0.17%] index_select strided 5 : Elapsed 0.006 ms (0.605 ms / 100) 0.609 -> 0.609 ( +0.00%) [ +0.00% +0.49% +0.00% / +0.33% +0.16% +0.00%] index_select strided 7 : Elapsed 0.006 ms (0.609 ms / 100) 0.608 -> 0.606 ( -0.33%) [ +0.00% +0.00% +0.00% / -0.16% -0.16% -0.33%] index_select strided 8 : Elapsed 0.006 ms (0.608 ms / 100) 0.603 -> 0.604 ( +0.17%) [ +0.00% +0.17% +0.17% / +0.33% +0.17% +0.17%] index_select strided 16 : Elapsed 0.006 ms (0.603 ms / 100) 0.608 -> 0.607 ( -0.16%) [ +0.33% +0.16% +0.00% / +0.49% +0.16% -0.16%] index_select random : Elapsed 0.006 ms (0.610 ms / 100) 0.613 -> 0.614 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.33% +0.33%] index_select random_sorted : Elapsed 0.006 ms (0.613 ms / 100) 0.612 -> 0.611 ( -0.16%) [ +3.43% +0.00% +0.00% / -0.16% +0.00% +0.49%] index_select perm : Elapsed 0.006 ms (0.633 ms / 100) 0.612 -> 0.613 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.65% +0.33%] index_select perm_sorted : Elapsed 0.006 ms (0.613 ms / 100) out_shape = [5, 20, 40, 4] in_shape = [5, 20, 40, 16] idx_dim = 3 B = [5, 20, 40, 4] (stride (3200, 160, 1, 40)) A = [5, 20, 40, 16] (stride (12800, 16, 320, 1)) dim = 3 2.465 -> 2.470 ( +0.20%) [ +0.00% +0.08% +0.08% / +0.20% +0.28% +0.28%] index_select const : Elapsed 0.025 ms (2.465 ms / 100) 2.466 -> 2.469 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.12% +0.32% +0.32%] index_select wrap : Elapsed 0.025 ms (2.466 ms / 100) 2.467 -> 2.468 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.28% +0.24%] index_select linear : Elapsed 0.025 ms (2.467 ms / 100) 2.467 -> 2.465 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.24% +0.24%] index_select reverse : Elapsed 0.025 ms (2.467 ms / 100) 2.466 -> 2.469 ( +0.12%) [ +0.16% +0.08% +0.00% / +0.12% +0.24% +0.12%] index_select skip64 : Elapsed 0.025 ms (2.470 ms / 100) 2.466 -> 2.469 ( +0.12%) [ +0.12% +0.00% +0.04% / +0.12% +0.16% +0.12%] index_select skip256 : Elapsed 0.025 ms (2.469 ms / 100) 2.496 -> 2.496 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.28% +0.28%] index_select spread : Elapsed 0.025 ms (2.497 ms / 100) 2.498 -> 2.498 ( +0.00%) [ +0.16% +0.00% +0.04% / +0.00% +0.20% +0.28%] index_select strided 3 : Elapsed 0.025 ms (2.502 ms / 100) 2.489 -> 2.492 ( +0.12%) [ +0.00% +0.16% +0.28% / +0.12% +0.48% +0.84%] index_select strided 5 : Elapsed 0.025 ms (2.489 ms / 100) 2.492 -> 2.497 ( +0.20%) [ +0.28% +0.00% +0.20% / +0.20% +0.36% +0.40%] index_select strided 7 : Elapsed 0.025 ms (2.499 ms / 100) 2.506 -> 2.507 ( +0.04%) [ +0.00% +0.00% +0.08% / +0.04% +0.44% +0.60%] index_select strided 8 : Elapsed 0.025 ms (2.506 ms / 100) 2.503 -> 2.504 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.04% +0.48% +0.40%] index_select random : Elapsed 0.025 ms (2.504 ms / 100) 2.496 -> 2.502 ( +0.24%) [ +0.12% +0.16% +0.00% / +0.32% +0.24% +0.32%] index_select random_sorted : Elapsed 0.025 ms (2.499 ms / 100) 2.498 -> 2.500 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.12% +0.24%] index_select perm : Elapsed 0.025 ms (2.498 ms / 100) 2.503 -> 2.503 ( +0.00%) [ +0.12% +0.24% +0.00% / +0.00% +0.44% +0.56%] index_select perm_sorted : Elapsed 0.025 ms (2.506 ms / 100) B = [5, 20, 40, 4] (stride (80, 1, 400, 20)) A = [5, 20, 40, 16] (stride (40, 200, 1, 4000)) dim = 3 2.382 -> 2.382 ( +0.00%) [ +0.25% +0.04% +0.00% / +0.00% +0.13% +0.04%] index_select const : Elapsed 0.024 ms (2.388 ms / 100) 2.438 -> 2.440 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.16% +0.08%] index_select wrap : Elapsed 0.024 ms (2.440 ms / 100) 2.440 -> 2.439 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% +0.00% -0.04%] index_select linear : Elapsed 0.024 ms (2.440 ms / 100) 2.439 -> 2.439 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.12% +0.12% +0.00%] index_select reverse : Elapsed 0.024 ms (2.441 ms / 100) 2.388 -> 2.386 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.04% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.388 ms / 100) 2.379 -> 2.383 ( +0.17%) [ +0.08% +0.04% +0.00% / +0.21% +0.17% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.381 ms / 100) 2.437 -> 2.437 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.08% +0.16% +0.00%] index_select spread : Elapsed 0.024 ms (2.437 ms / 100) 2.438 -> 2.439 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.08% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.441 ms / 100) 2.436 -> 2.442 ( +0.25%) [ +0.00% +0.04% +0.04% / +0.25% +0.33% +0.29%] index_select strided 5 : Elapsed 0.024 ms (2.436 ms / 100) 2.439 -> 2.440 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.04% +0.04%] index_select strided 7 : Elapsed 0.024 ms (2.441 ms / 100) 2.401 -> 2.404 ( +0.12%) [ +0.12% +0.08% +0.00% / +0.12% +0.37% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.404 ms / 100) 2.433 -> 2.435 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.25% +0.21%] index_select random : Elapsed 0.024 ms (2.436 ms / 100) 2.431 -> 2.434 ( +0.12%) [ +0.00% +0.16% +0.04% / +0.12% +0.37% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.431 ms / 100) 2.442 -> 2.443 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.41% +0.41%] index_select perm : Elapsed 0.024 ms (2.443 ms / 100) 2.436 -> 2.435 ( -0.04%) [ +0.12% +0.16% +0.00% / -0.04% +0.25% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.439 ms / 100) out_shape = [4, 40, 16, 20] in_shape = [5, 40, 16, 20] idx_dim = 0 B = [4, 40, 16, 20] (stride (12800, 320, 20, 1)) A = [5, 40, 16, 20] (stride (20, 1600, 100, 1)) dim = 0 4.969 -> 4.980 ( +0.22%) [ +0.16% +0.00% +0.18% / +0.24% +0.22% +0.26%] index_select const : Elapsed 0.050 ms (4.977 ms / 100) 5.105 -> 5.106 ( +0.02%) [ +0.06% +0.00% +0.00% / +0.12% +0.22% +0.02%] index_select wrap : Elapsed 0.051 ms (5.108 ms / 100) 5.107 -> 5.111 ( +0.08%) [ +0.00% +0.02% +0.04% / +0.08% +0.16% +0.18%] index_select linear : Elapsed 0.051 ms (5.107 ms / 100) 5.104 -> 5.105 ( +0.02%) [ +0.00% +0.00% +0.14% / +0.02% +0.10% +0.14%] index_select reverse : Elapsed 0.051 ms (5.104 ms / 100) 4.970 -> 4.976 ( +0.12%) [ +0.00% +0.00% +0.16% / +0.12% +0.26% +0.14%] index_select skip64 : Elapsed 0.050 ms (4.970 ms / 100) 4.978 -> 4.980 ( +0.04%) [ +0.00% +0.06% +0.20% / +0.04% +0.10% +0.10%] index_select skip256 : Elapsed 0.050 ms (4.978 ms / 100) 5.102 -> 5.104 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.10% +0.10% +0.04%] index_select spread : Elapsed 0.051 ms (5.104 ms / 100) 5.093 -> 5.101 ( +0.16%) [ +0.18% +0.12% +0.00% / +0.16% +0.31% +0.43%] index_select strided 3 : Elapsed 0.051 ms (5.102 ms / 100) 5.023 -> 5.018 ( -0.10%) [ +0.00% +0.06% +0.16% / -0.10% +0.24% +0.24%] index_select random : Elapsed 0.050 ms (5.023 ms / 100) 5.011 -> 5.015 ( +0.08%) [ +0.02% +0.10% +0.00% / +0.08% +0.24% +0.34%] index_select random_sorted : Elapsed 0.050 ms (5.012 ms / 100) 5.110 -> 5.111 ( +0.02%) [ +0.06% +0.00% +0.00% / +0.02% +0.18% +0.18%] index_select perm : Elapsed 0.051 ms (5.113 ms / 100) 5.094 -> 5.104 ( +0.20%) [ +0.14% +0.00% +0.18% / +0.20% +0.33% +0.31%] index_select perm_sorted : Elapsed 0.051 ms (5.101 ms / 100) B = [4, 40, 16, 20] (stride (1, 1280, 4, 64)) A = [5, 40, 16, 20] (stride (800, 20, 4000, 1)) dim = 0 5.805 -> 5.801 ( -0.07%) [ +0.09% +0.00% +0.16% / +0.02% -0.03% -0.07%] index_select const : Elapsed 0.058 ms (5.810 ms / 100) 5.901 -> 5.894 ( -0.12%) [ +0.03% +0.03% +0.00% / +0.05% -0.12% -0.08%] index_select wrap : Elapsed 0.059 ms (5.903 ms / 100) 5.899 -> 5.880 ( -0.32%) [ +0.07% +0.03% +0.00% / +0.00% -0.32% -0.19%] index_select linear : Elapsed 0.059 ms (5.903 ms / 100) 5.928 -> 5.896 ( -0.54%) [ +0.03% +0.05% +0.00% / +0.13% -0.52% -0.54%] index_select reverse : Elapsed 0.059 ms (5.930 ms / 100) 5.799 -> 5.787 ( -0.21%) [ +0.05% +0.10% +0.00% / +0.02% -0.02% -0.21%] index_select skip64 : Elapsed 0.058 ms (5.802 ms / 100) 5.802 -> 5.796 ( -0.10%) [ +0.02% +0.00% +0.10% / +0.05% +0.00% -0.10%] index_select skip256 : Elapsed 0.058 ms (5.803 ms / 100) 5.899 -> 5.891 ( -0.14%) [ +0.07% +0.14% +0.00% / +0.19% -0.14% -0.12%] index_select spread : Elapsed 0.059 ms (5.903 ms / 100) 5.905 -> 5.886 ( -0.32%) [ +0.00% +0.00% +0.05% / -0.07% -0.29% -0.32%] index_select strided 3 : Elapsed 0.059 ms (5.905 ms / 100) 5.825 -> 5.829 ( +0.07%) [ +0.05% +0.00% +0.10% / +0.07% +0.39% +0.38%] index_select random : Elapsed 0.058 ms (5.828 ms / 100) 5.835 -> 5.838 ( +0.05%) [ +0.00% +0.05% +0.02% / +0.05% +0.43% +0.41%] index_select random_sorted : Elapsed 0.058 ms (5.835 ms / 100) 5.893 -> 5.890 ( -0.05%) [ +0.10% +0.00% +0.10% / +0.14% +0.02% -0.05%] index_select perm : Elapsed 0.059 ms (5.899 ms / 100) 5.892 -> 5.885 ( -0.12%) [ +0.00% +0.20% +0.14% / +0.03% -0.02% -0.12%] index_select perm_sorted : Elapsed 0.059 ms (5.892 ms / 100) B = [4, 40, 16, 20] (stride (20, 80, 3200, 1)) A = [5, 40, 16, 20] (stride (1, 1600, 5, 80)) dim = 0 5.979 -> 5.988 ( +0.15%) [ +0.13% +0.00% +0.12% / +0.15% +0.15% +0.20%] index_select const : Elapsed 0.060 ms (5.987 ms / 100) 5.986 -> 5.991 ( +0.08%) [ +0.07% +0.00% +0.12% / +0.10% +0.15% +0.08%] index_select wrap : Elapsed 0.060 ms (5.990 ms / 100) 5.982 -> 5.985 ( +0.05%) [ +0.12% +0.00% +0.02% / +0.18% +0.05% +0.13%] index_select linear : Elapsed 0.060 ms (5.989 ms / 100) 5.986 -> 5.989 ( +0.05%) [ +0.00% +0.00% +0.10% / +0.08% +0.20% +0.05%] index_select reverse : Elapsed 0.060 ms (5.986 ms / 100) 5.983 -> 5.989 ( +0.10%) [ +0.00% +0.15% +0.15% / +0.22% +0.10% +0.25%] index_select skip64 : Elapsed 0.060 ms (5.983 ms / 100) 5.988 -> 5.988 ( +0.00%) [ +0.08% +0.00% +0.05% / +0.08% +0.10% +0.00%] index_select skip256 : Elapsed 0.060 ms (5.993 ms / 100) 5.987 -> 5.987 ( +0.00%) [ +0.05% +0.00% +0.08% / +0.03% +0.10% +0.00%] index_select spread : Elapsed 0.060 ms (5.990 ms / 100) 5.988 -> 5.989 ( +0.02%) [ +0.00% +0.03% +0.02% / +0.08% +0.02% +0.05%] index_select strided 3 : Elapsed 0.060 ms (5.988 ms / 100) 5.989 -> 5.991 ( +0.03%) [ +0.00% +0.00% +0.02% / +0.03% +0.08% +0.05%] index_select random : Elapsed 0.060 ms (5.989 ms / 100) 5.992 -> 5.987 ( -0.08%) [ +0.00% +0.02% +0.03% / -0.03% +0.02% -0.08%] index_select random_sorted : Elapsed 0.060 ms (5.992 ms / 100) 5.986 -> 5.987 ( +0.02%) [ +0.00% +0.08% +0.08% / +0.02% +0.10% +0.03%] index_select perm : Elapsed 0.060 ms (5.986 ms / 100) 5.984 -> 5.990 ( +0.10%) [ +0.08% +0.00% +0.10% / +0.10% +0.13% +0.12%] index_select perm_sorted : Elapsed 0.060 ms (5.989 ms / 100) B = [4, 40, 16, 20] (stride (1, 80, 3200, 4)) A = [5, 40, 16, 20] (stride (12800, 16, 1, 640)) dim = 0 5.579 -> 5.551 ( -0.50%) [ +0.00% +0.04% +0.07% / -0.07% -0.48% -0.50%] index_select const : Elapsed 0.056 ms (5.579 ms / 100) 5.612 -> 5.602 ( -0.18%) [ +0.04% +0.00% +0.07% / +0.12% -0.18% -0.18%] index_select wrap : Elapsed 0.056 ms (5.614 ms / 100) 5.606 -> 5.598 ( -0.14%) [ +0.00% +0.21% +0.32% / +0.18% -0.05% -0.14%] index_select linear : Elapsed 0.056 ms (5.606 ms / 100) 5.608 -> 5.600 ( -0.14%) [ +0.04% +0.04% +0.00% / +0.05% -0.14% -0.14%] index_select reverse : Elapsed 0.056 ms (5.610 ms / 100) 5.583 -> 5.555 ( -0.50%) [ +0.00% +0.02% +0.05% / +0.07% -0.50% -0.45%] index_select skip64 : Elapsed 0.056 ms (5.583 ms / 100) 5.578 -> 5.551 ( -0.48%) [ +0.04% +0.00% +0.05% / +0.23% -0.48% -0.38%] index_select skip256 : Elapsed 0.056 ms (5.580 ms / 100) 5.613 -> 5.598 ( -0.27%) [ +0.00% +0.00% +0.11% / +0.04% -0.27% -0.11%] index_select spread : Elapsed 0.056 ms (5.613 ms / 100) 5.621 -> 5.596 ( -0.44%) [ +0.02% +0.00% +0.12% / +0.18% -0.37% -0.44%] index_select strided 3 : Elapsed 0.056 ms (5.622 ms / 100) 5.608 -> 5.570 ( -0.68%) [ +0.00% +0.05% +0.14% / +0.07% -0.52% -0.68%] index_select random : Elapsed 0.056 ms (5.608 ms / 100) 5.595 -> 5.562 ( -0.59%) [ +0.07% +0.00% +0.13% / +0.21% -0.59% -0.59%] index_select random_sorted : Elapsed 0.056 ms (5.599 ms / 100) 5.619 -> 5.607 ( -0.21%) [ +0.00% +0.07% +0.11% / -0.07% -0.16% -0.21%] index_select perm : Elapsed 0.056 ms (5.619 ms / 100) 5.619 -> 5.596 ( -0.41%) [ +0.00% +0.04% +0.07% / +0.09% -0.41% -0.36%] index_select perm_sorted : Elapsed 0.056 ms (5.619 ms / 100) out_shape = [5, 4, 16, 20] in_shape = [5, 40, 16, 20] idx_dim = 1 B = [5, 4, 16, 20] (stride (1280, 320, 1, 16)) A = [5, 40, 16, 20] (stride (16, 1600, 1, 80)) dim = 1 1.326 -> 1.327 ( +0.08%) [ +0.08% +0.15% +0.00% / +0.08% +0.60% +0.60%] index_select const : Elapsed 0.013 ms (1.327 ms / 100) 1.315 -> 1.319 ( +0.30%) [ +0.00% +0.00% +0.46% / +0.30% +0.38% +0.38%] index_select wrap : Elapsed 0.013 ms (1.315 ms / 100) 1.312 -> 1.318 ( +0.46%) [ +0.38% +0.30% +0.00% / +0.46% +0.61% +0.61%] index_select linear : Elapsed 0.013 ms (1.317 ms / 100) 1.330 -> 1.330 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.60% +0.60%] index_select reverse : Elapsed 0.013 ms (1.330 ms / 100) 1.325 -> 1.326 ( +0.08%) [ +0.23% +0.00% +0.00% / +0.08% +0.83% +0.83%] index_select skip64 : Elapsed 0.013 ms (1.328 ms / 100) 1.328 -> 1.329 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.68% +0.60%] index_select skip256 : Elapsed 0.013 ms (1.329 ms / 100) 1.310 -> 1.313 ( +0.23%) [ +0.23% +0.00% +0.15% / +0.23% +0.61% +0.61%] index_select spread : Elapsed 0.013 ms (1.313 ms / 100) 1.315 -> 1.320 ( +0.38%) [ +0.30% +0.08% +0.00% / +0.38% +0.38% +0.38%] index_select strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.319 -> 1.321 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.76% +0.76%] index_select strided 5 : Elapsed 0.013 ms (1.321 ms / 100) 1.324 -> 1.325 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.08% +0.08%] index_select strided 7 : Elapsed 0.013 ms (1.326 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.15% +0.00% +0.08% / +0.08% +0.61% +0.53%] index_select strided 8 : Elapsed 0.013 ms (1.324 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.76% +0.91%] index_select strided 16 : Elapsed 0.013 ms (1.323 ms / 100) 1.314 -> 1.311 ( -0.23%) [ +0.15% +0.00% +0.08% / -0.23% +0.46% +0.30%] index_select random : Elapsed 0.013 ms (1.316 ms / 100) 1.332 -> 1.333 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.45% +0.30%] index_select random_sorted : Elapsed 0.013 ms (1.332 ms / 100) 1.329 -> 1.332 ( +0.23%) [ +0.00% +0.00% +0.00% / +0.23% +0.38% +0.38%] index_select perm : Elapsed 0.013 ms (1.329 ms / 100) 1.327 -> 1.328 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.15% +0.15%] index_select perm_sorted : Elapsed 0.013 ms (1.328 ms / 100) B = [5, 4, 16, 20] (stride (1280, 16, 1, 64)) A = [5, 40, 16, 20] (stride (12800, 320, 1, 16)) dim = 1 1.280 -> 1.280 ( +0.00%) [ +0.31% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_select const : Elapsed 0.013 ms (1.284 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.63%] index_select wrap : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select linear : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.283 ( +0.23%) [ +0.08% +0.00% +0.00% / +0.23% +0.55% +0.63%] index_select reverse : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.279 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.55% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.55% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.63% +0.55%] index_select spread : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.284 ( +0.31%) [ +0.08% +0.08% +0.00% / +0.31% +0.63% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.281 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.63% +0.63%] index_select strided 16 : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select random : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.284 ( +0.23%) [ +0.08% +0.00% +0.00% / +0.23% +0.47% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.282 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.31% +0.00% +0.08% / +0.08% +0.47% +0.62%] index_select perm : Elapsed 0.013 ms (1.286 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.47% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.283 ms / 100) B = [5, 4, 16, 20] (stride (320, 1600, 1, 16)) A = [5, 40, 16, 20] (stride (40, 1, 4000, 200)) dim = 1 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.94% +0.86%] index_select const : Elapsed 0.013 ms (1.276 ms / 100) 1.279 -> 1.277 ( -0.16%) [ +0.08% +0.08% +0.00% / -0.16% +0.55% +0.39%] index_select wrap : Elapsed 0.013 ms (1.280 ms / 100) 1.276 -> 1.279 ( +0.24%) [ +0.16% +0.00% +0.00% / +0.24% +0.55% +0.47%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.71% +0.47%] index_select reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.279 ( +0.24%) [ +0.16% +0.08% +0.00% / +0.24% +0.86% +0.71%] index_select skip64 : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.31% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.47% +0.39%] index_select spread : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.39% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.31% +0.23%] index_select strided 5 : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.55% +0.70%] index_select strided 7 : Elapsed 0.013 ms (1.280 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.24% +0.00% +0.00% / +0.08% +0.55% +0.63%] index_select strided 8 : Elapsed 0.013 ms (1.279 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.63% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.39%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.47% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.284 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.63% +0.63%] index_select perm : Elapsed 0.013 ms (1.280 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.70% +0.78%] index_select perm_sorted : Elapsed 0.013 ms (1.278 ms / 100) B = [5, 4, 16, 20] (stride (80, 20, 400, 1)) A = [5, 40, 16, 20] (stride (16, 80, 1, 3200)) dim = 1 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.45% +0.53%] index_select const : Elapsed 0.013 ms (1.322 ms / 100) 1.332 -> 1.333 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.45% +0.45%] index_select wrap : Elapsed 0.013 ms (1.332 ms / 100) 1.346 -> 1.347 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.52% +0.59%] index_select linear : Elapsed 0.013 ms (1.347 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.23% +0.00% +0.00% / +0.00% +0.38% +0.30%] index_select reverse : Elapsed 0.013 ms (1.321 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.45% +0.53%] index_select skip64 : Elapsed 0.013 ms (1.321 ms / 100) 1.320 -> 1.320 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.61% +0.53%] index_select skip256 : Elapsed 0.013 ms (1.321 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.61% +0.68%] index_select spread : Elapsed 0.013 ms (1.319 ms / 100) 1.324 -> 1.325 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.53% +0.53%] index_select strided 3 : Elapsed 0.013 ms (1.324 ms / 100) 1.346 -> 1.346 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.52% +0.52%] index_select strided 5 : Elapsed 0.013 ms (1.347 ms / 100) 1.330 -> 1.331 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.45% +0.60%] index_select strided 7 : Elapsed 0.013 ms (1.331 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.45% +0.38%] index_select strided 8 : Elapsed 0.013 ms (1.322 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.00% +0.15% +0.00% / +0.08% +0.45% +0.45%] index_select strided 16 : Elapsed 0.013 ms (1.321 ms / 100) 1.324 -> 1.325 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.53% +0.45%] index_select random : Elapsed 0.013 ms (1.325 ms / 100) 1.326 -> 1.328 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +0.38% +0.38%] index_select random_sorted : Elapsed 0.013 ms (1.328 ms / 100) 1.336 -> 1.338 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.75% +0.60%] index_select perm : Elapsed 0.013 ms (1.338 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.45% +0.45%] index_select perm_sorted : Elapsed 0.013 ms (1.323 ms / 100) B = [5, 4, 16, 20] (stride (64, 1, 4, 320)) A = [5, 40, 16, 20] (stride (320, 1600, 20, 1)) dim = 1 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.08% +0.76% +0.76%] index_select const : Elapsed 0.012 ms (1.191 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.50% +0.50%] index_select wrap : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.59% +0.50%] index_select linear : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.59%] index_select reverse : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.59% +0.59%] index_select skip64 : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.59% +0.59%] index_select skip256 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.42% +0.42%] index_select spread : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.195 ( +0.25%) [ +0.17% +0.08% +0.00% / +0.25% +0.42% +0.34%] index_select strided 3 : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.08% +0.00% +0.00% / +0.17% +0.34% +0.42%] index_select strided 5 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.42%] index_select strided 7 : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.59% +0.50%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.42% +0.59%] index_select strided 16 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.50% +0.34%] index_select random : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.59% +0.59%] index_select random_sorted : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.59% +0.50%] index_select perm : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.67% +0.67%] index_select perm_sorted : Elapsed 0.012 ms (1.191 ms / 100) B = [5, 4, 16, 20] (stride (64, 1, 4, 320)) A = [5, 40, 16, 20] (stride (1, 1600, 100, 5)) dim = 1 1.194 -> 1.195 ( +0.08%) [ +0.08% +0.25% +0.00% / +0.08% +0.59% +0.59%] index_select const : Elapsed 0.012 ms (1.195 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.25% +0.00% +0.00% / +0.08% +0.42% +0.42%] index_select wrap : Elapsed 0.012 ms (1.198 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.25% +0.00% +0.08% / +0.08% +0.33% +0.33%] index_select linear : Elapsed 0.012 ms (1.198 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.33% +0.00% +0.00% / +0.00% +0.33% +0.25%] index_select reverse : Elapsed 0.012 ms (1.200 ms / 100) 1.196 -> 1.195 ( -0.08%) [ +0.00% +0.08% +0.25% / -0.08% +0.33% +0.25%] index_select skip64 : Elapsed 0.012 ms (1.196 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.67% +0.75%] index_select skip256 : Elapsed 0.012 ms (1.195 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.67% +0.75%] index_select spread : Elapsed 0.012 ms (1.196 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_select strided 3 : Elapsed 0.012 ms (1.194 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.34% +0.00% +0.00% / +0.08% +0.67% +0.67%] index_select strided 5 : Elapsed 0.012 ms (1.198 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.42% +0.08% +0.00% / +0.17% +0.92% +0.92%] index_select strided 7 : Elapsed 0.012 ms (1.197 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.92% +0.84%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.84% +0.75%] index_select strided 16 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.195 ( +0.17%) [ +0.08% +0.00% +0.00% / +0.17% +0.75% +0.75%] index_select random : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.67% +0.67%] index_select random_sorted : Elapsed 0.012 ms (1.194 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.59%] index_select perm : Elapsed 0.012 ms (1.194 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.59% +0.67%] index_select perm_sorted : Elapsed 0.012 ms (1.196 ms / 100) B = [5, 4, 16, 20] (stride (1, 5, 20, 320)) A = [5, 40, 16, 20] (stride (1, 80, 5, 3200)) dim = 1 1.333 -> 1.334 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.53% +0.53%] index_select const : Elapsed 0.013 ms (1.335 ms / 100) 1.337 -> 1.340 ( +0.22%) [ +0.22% +0.00% +0.07% / +0.22% +0.75% +0.52%] index_select wrap : Elapsed 0.013 ms (1.340 ms / 100) 1.340 -> 1.340 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.22% +0.37%] index_select linear : Elapsed 0.013 ms (1.341 ms / 100) 1.342 -> 1.343 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.60% +0.45%] index_select reverse : Elapsed 0.013 ms (1.343 ms / 100) 1.344 -> 1.345 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.45% +0.52%] index_select skip64 : Elapsed 0.013 ms (1.345 ms / 100) 1.339 -> 1.339 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.52% +0.45%] index_select skip256 : Elapsed 0.013 ms (1.340 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +0.37% +0.37%] index_select spread : Elapsed 0.013 ms (1.346 ms / 100) 1.330 -> 1.333 ( +0.23%) [ +0.15% +0.23% +0.00% / +0.23% +0.75% +0.60%] index_select strided 3 : Elapsed 0.013 ms (1.332 ms / 100) 1.333 -> 1.334 ( +0.08%) [ +0.15% +0.00% +0.08% / +0.08% +0.60% +0.75%] index_select strided 5 : Elapsed 0.013 ms (1.335 ms / 100) 1.329 -> 1.332 ( +0.23%) [ +0.30% +0.15% +0.00% / +0.23% +0.60% +0.60%] index_select strided 7 : Elapsed 0.013 ms (1.333 ms / 100) 1.350 -> 1.353 ( +0.22%) [ +0.07% +0.00% +0.00% / +0.22% +0.52% +0.44%] index_select strided 8 : Elapsed 0.014 ms (1.351 ms / 100) 1.343 -> 1.344 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.45% +0.52%] index_select strided 16 : Elapsed 0.013 ms (1.344 ms / 100) 1.334 -> 1.335 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.45% +0.45%] index_select random : Elapsed 0.013 ms (1.335 ms / 100) 1.338 -> 1.339 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.52% +0.37%] index_select random_sorted : Elapsed 0.013 ms (1.339 ms / 100) 1.347 -> 1.349 ( +0.15%) [ +0.30% +0.00% +0.15% / +0.15% +0.67% +0.74%] index_select perm : Elapsed 0.014 ms (1.351 ms / 100) 1.338 -> 1.338 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.52% +0.37%] index_select perm_sorted : Elapsed 0.013 ms (1.340 ms / 100) out_shape = [5, 40, 4, 20] in_shape = [5, 40, 16, 20] idx_dim = 2 B = [5, 40, 4, 20] (stride (3200, 80, 20, 1)) A = [5, 40, 16, 20] (stride (12800, 16, 1, 640)) dim = 2 2.449 -> 2.452 ( +0.12%) [ +0.00% +0.20% +0.20% / +0.12% +0.65% +0.65%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.16% +0.16% +0.00% / -0.04% +0.29% +0.29%] index_select wrap : Elapsed 0.025 ms (2.459 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.12% +0.20% +0.00% / +0.08% +0.57% +0.53%] index_select linear : Elapsed 0.025 ms (2.456 ms / 100) 2.452 -> 2.457 ( +0.20%) [ +0.20% +0.12% +0.00% / +0.20% +0.45% +0.37%] index_select reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.00% +0.00% +0.20% / +0.08% +0.37% +0.33%] index_select skip64 : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.453 ( +0.00%) [ +0.00% +0.12% +0.24% / +0.00% +0.37% +0.33%] index_select skip256 : Elapsed 0.025 ms (2.453 ms / 100) 2.473 -> 2.481 ( +0.32%) [ +0.36% +0.00% +0.08% / +0.32% +0.57% +0.53%] index_select spread : Elapsed 0.025 ms (2.482 ms / 100) 2.470 -> 2.477 ( +0.28%) [ +0.16% +0.00% +0.28% / +0.28% +0.77% +0.40%] index_select strided 3 : Elapsed 0.025 ms (2.474 ms / 100) 2.477 -> 2.479 ( +0.08%) [ +0.12% +0.16% +0.00% / +0.08% +0.48% +0.48%] index_select strided 5 : Elapsed 0.025 ms (2.480 ms / 100) 2.474 -> 2.481 ( +0.28%) [ +0.24% +0.00% +0.12% / +0.28% +0.73% +0.32%] index_select strided 7 : Elapsed 0.025 ms (2.480 ms / 100) 2.487 -> 2.487 ( +0.00%) [ +0.08% +0.00% +0.16% / +0.00% +0.84% +0.48%] index_select strided 8 : Elapsed 0.025 ms (2.489 ms / 100) 2.449 -> 2.454 ( +0.20%) [ +0.00% +0.29% +0.08% / +0.20% +0.53% +0.37%] index_select random : Elapsed 0.024 ms (2.449 ms / 100) 2.453 -> 2.450 ( -0.12%) [ +0.16% +0.04% +0.00% / -0.12% +0.24% +0.29%] index_select random_sorted : Elapsed 0.025 ms (2.457 ms / 100) 2.477 -> 2.477 ( +0.00%) [ +0.12% +0.00% +0.16% / +0.00% +0.44% +0.40%] index_select perm : Elapsed 0.025 ms (2.480 ms / 100) 2.478 -> 2.480 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.40% +0.48%] index_select perm_sorted : Elapsed 0.025 ms (2.478 ms / 100) B = [5, 40, 4, 20] (stride (3200, 80, 1, 4)) A = [5, 40, 16, 20] (stride (12800, 20, 800, 1)) dim = 2 2.259 -> 2.258 ( -0.04%) [ +0.13% +0.18% +0.00% / -0.04% +0.35% +0.27%] index_select const : Elapsed 0.023 ms (2.262 ms / 100) 2.318 -> 2.320 ( +0.09%) [ +0.00% +0.17% +0.13% / +0.09% +0.39% +0.26%] index_select wrap : Elapsed 0.023 ms (2.318 ms / 100) 2.319 -> 2.322 ( +0.13%) [ +0.00% +0.09% +0.04% / +0.13% +0.39% +0.22%] index_select linear : Elapsed 0.023 ms (2.319 ms / 100) 2.321 -> 2.323 ( +0.09%) [ +0.00% +0.13% +0.26% / +0.09% +0.17% +0.30%] index_select reverse : Elapsed 0.023 ms (2.321 ms / 100) 2.260 -> 2.264 ( +0.18%) [ +0.13% +0.00% +0.13% / +0.18% +0.18% +0.27%] index_select skip64 : Elapsed 0.023 ms (2.263 ms / 100) 2.260 -> 2.259 ( -0.04%) [ +0.00% +0.13% +0.04% / -0.04% +0.18% +0.00%] index_select skip256 : Elapsed 0.023 ms (2.260 ms / 100) 2.321 -> 2.322 ( +0.04%) [ +0.00% +0.30% +0.00% / +0.04% +0.26% +0.17%] index_select spread : Elapsed 0.023 ms (2.321 ms / 100) 2.318 -> 2.323 ( +0.22%) [ +0.13% +0.00% +0.26% / +0.22% +0.39% +0.52%] index_select strided 3 : Elapsed 0.023 ms (2.321 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.09% +0.04% +0.04%] index_select strided 5 : Elapsed 0.023 ms (2.324 ms / 100) 2.320 -> 2.321 ( +0.04%) [ +0.13% +0.04% +0.00% / +0.04% +0.47% +0.17%] index_select strided 7 : Elapsed 0.023 ms (2.323 ms / 100) 2.277 -> 2.280 ( +0.13%) [ +0.00% +0.00% +0.18% / +0.18% +0.22% +0.13%] index_select strided 8 : Elapsed 0.023 ms (2.277 ms / 100) 2.323 -> 2.325 ( +0.09%) [ +0.09% +0.00% +0.17% / +0.09% +0.13% +0.26%] index_select random : Elapsed 0.023 ms (2.325 ms / 100) 2.320 -> 2.321 ( +0.04%) [ +0.04% +0.00% +0.17% / +0.04% +0.43% +0.43%] index_select random_sorted : Elapsed 0.023 ms (2.321 ms / 100) 2.318 -> 2.321 ( +0.13%) [ +0.09% +0.13% +0.00% / +0.13% +0.47% +0.73%] index_select perm : Elapsed 0.023 ms (2.320 ms / 100) 2.317 -> 2.320 ( +0.13%) [ +0.26% +0.00% +0.09% / +0.13% +0.47% +0.56%] index_select perm_sorted : Elapsed 0.023 ms (2.323 ms / 100) B = [5, 40, 4, 20] (stride (3200, 1, 800, 40)) A = [5, 40, 16, 20] (stride (1, 5, 200, 3200)) dim = 2 2.516 -> 2.517 ( +0.04%) [ +0.00% +0.08% +0.12% / +0.04% +0.32% +0.20%] index_select const : Elapsed 0.025 ms (2.516 ms / 100) 2.504 -> 2.510 ( +0.24%) [ +0.00% +0.12% +0.04% / +0.32% +0.32% +0.24%] index_select wrap : Elapsed 0.025 ms (2.504 ms / 100) 2.506 -> 2.506 ( +0.00%) [ +0.12% +0.00% +0.08% / +0.00% +0.32% +0.12%] index_select linear : Elapsed 0.025 ms (2.509 ms / 100) 2.502 -> 2.503 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.64% +0.72%] index_select reverse : Elapsed 0.025 ms (2.503 ms / 100) 2.509 -> 2.514 ( +0.20%) [ +0.12% +0.00% +0.12% / +0.20% +0.40% +0.56%] index_select skip64 : Elapsed 0.025 ms (2.512 ms / 100) 2.515 -> 2.518 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.12% +0.36% +0.32%] index_select skip256 : Elapsed 0.025 ms (2.517 ms / 100) 2.506 -> 2.510 ( +0.16%) [ +0.24% +0.12% +0.00% / +0.16% +0.32% +0.52%] index_select spread : Elapsed 0.025 ms (2.512 ms / 100) 2.508 -> 2.507 ( -0.04%) [ +0.12% +0.00% +0.04% / -0.04% +0.04% +0.32%] index_select strided 3 : Elapsed 0.025 ms (2.511 ms / 100) 2.505 -> 2.506 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.48% +0.32%] index_select strided 5 : Elapsed 0.025 ms (2.507 ms / 100) 2.503 -> 2.503 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.44% +0.48%] index_select strided 7 : Elapsed 0.025 ms (2.503 ms / 100) 2.507 -> 2.508 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.04% +0.28% +0.32%] index_select strided 8 : Elapsed 0.025 ms (2.510 ms / 100) 2.507 -> 2.513 ( +0.24%) [ +0.00% +0.04% +0.12% / +0.24% +0.60% +0.52%] index_select random : Elapsed 0.025 ms (2.507 ms / 100) 2.511 -> 2.511 ( +0.00%) [ +0.16% +0.00% +0.04% / +0.08% +0.28% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.515 ms / 100) 2.504 -> 2.509 ( +0.20%) [ +0.12% +0.00% +0.24% / +0.20% +0.48% +0.28%] index_select perm : Elapsed 0.025 ms (2.507 ms / 100) 2.507 -> 2.508 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.16% +0.28%] index_select perm_sorted : Elapsed 0.025 ms (2.509 ms / 100) B = [5, 40, 4, 20] (stride (4, 400, 1, 20)) A = [5, 40, 16, 20] (stride (1, 80, 5, 3200)) dim = 2 2.568 -> 2.572 ( +0.16%) [ +0.16% +0.12% +0.00% / +0.16% +0.23% +0.39%] index_select const : Elapsed 0.026 ms (2.572 ms / 100) 2.573 -> 2.570 ( -0.12%) [ +0.04% +0.00% +0.00% / -0.12% +0.23% +0.19%] index_select wrap : Elapsed 0.026 ms (2.574 ms / 100) 2.573 -> 2.573 ( +0.00%) [ +0.19% +0.00% +0.08% / +0.00% +0.12% +0.16%] index_select linear : Elapsed 0.026 ms (2.578 ms / 100) 2.588 -> 2.590 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.27% +0.12%] index_select reverse : Elapsed 0.026 ms (2.588 ms / 100) 2.570 -> 2.570 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.31% +0.16%] index_select skip64 : Elapsed 0.026 ms (2.571 ms / 100) 2.574 -> 2.573 ( -0.04%) [ +0.08% +0.00% +0.04% / -0.04% -0.04% +0.08%] index_select skip256 : Elapsed 0.026 ms (2.576 ms / 100) 2.592 -> 2.599 ( +0.27%) [ +0.27% +0.00% +0.15% / +0.27% +0.39% +0.31%] index_select spread : Elapsed 0.026 ms (2.599 ms / 100) 2.590 -> 2.589 ( -0.04%) [ +0.08% +0.15% +0.00% / -0.04% +0.39% +0.19%] index_select strided 3 : Elapsed 0.026 ms (2.592 ms / 100) 2.561 -> 2.563 ( +0.08%) [ +0.23% +0.12% +0.00% / +0.08% +0.55% +0.51%] index_select strided 5 : Elapsed 0.026 ms (2.567 ms / 100) 2.566 -> 2.574 ( +0.31%) [ +0.19% +0.19% +0.00% / +0.31% +0.47% +0.55%] index_select strided 7 : Elapsed 0.026 ms (2.571 ms / 100) 2.563 -> 2.568 ( +0.20%) [ +0.00% +0.23% +0.12% / +0.20% +0.43% +0.59%] index_select strided 8 : Elapsed 0.026 ms (2.563 ms / 100) 2.577 -> 2.577 ( +0.00%) [ +0.16% +0.04% +0.00% / +0.00% +0.31% +0.19%] index_select random : Elapsed 0.026 ms (2.581 ms / 100) 2.578 -> 2.583 ( +0.19%) [ +0.04% +0.08% +0.00% / +0.19% +0.35% +0.27%] index_select random_sorted : Elapsed 0.026 ms (2.579 ms / 100) 2.575 -> 2.579 ( +0.16%) [ +0.12% +0.00% +0.16% / +0.16% +0.43% +0.27%] index_select perm : Elapsed 0.026 ms (2.578 ms / 100) 2.574 -> 2.572 ( -0.08%) [ +0.00% +0.04% +0.00% / -0.08% +0.27% +0.23%] index_select perm_sorted : Elapsed 0.026 ms (2.574 ms / 100) B = [5, 40, 4, 20] (stride (1, 5, 4000, 200)) A = [5, 40, 16, 20] (stride (12800, 320, 20, 1)) dim = 2 2.279 -> 2.285 ( +0.26%) [ +0.00% +0.04% +0.13% / +0.26% +0.39% +0.53%] index_select const : Elapsed 0.023 ms (2.279 ms / 100) 2.341 -> 2.346 ( +0.21%) [ +0.00% +0.21% +0.21% / +0.21% +0.47% +0.34%] index_select wrap : Elapsed 0.023 ms (2.341 ms / 100) 2.353 -> 2.353 ( +0.00%) [ +0.21% +0.00% +0.04% / +0.00% +0.21% +0.30%] index_select linear : Elapsed 0.024 ms (2.358 ms / 100) 2.354 -> 2.356 ( +0.08%) [ +0.00% +0.13% +0.00% / +0.08% +0.30% +0.25%] index_select reverse : Elapsed 0.024 ms (2.354 ms / 100) 2.276 -> 2.281 ( +0.22%) [ +0.04% +0.22% +0.00% / +0.22% +0.57% +0.70%] index_select skip64 : Elapsed 0.023 ms (2.277 ms / 100) 2.278 -> 2.280 ( +0.09%) [ +0.22% +0.04% +0.00% / +0.09% +0.31% +0.40%] index_select skip256 : Elapsed 0.023 ms (2.283 ms / 100) 2.347 -> 2.352 ( +0.21%) [ +0.17% +0.00% +0.30% / +0.21% +0.47% +0.38%] index_select spread : Elapsed 0.024 ms (2.351 ms / 100) 2.346 -> 2.352 ( +0.26%) [ +0.09% +0.00% +0.09% / +0.26% +0.34% +0.26%] index_select strided 3 : Elapsed 0.023 ms (2.348 ms / 100) 2.340 -> 2.340 ( +0.00%) [ +0.43% +0.00% +0.04% / +0.00% +0.51% +0.56%] index_select strided 5 : Elapsed 0.023 ms (2.350 ms / 100) 2.352 -> 2.355 ( +0.13%) [ +0.00% +0.26% +0.21% / +0.13% +0.55% +0.55%] index_select strided 7 : Elapsed 0.024 ms (2.352 ms / 100) 2.295 -> 2.299 ( +0.17%) [ +0.26% +0.00% +0.00% / +0.17% +0.48% +0.70%] index_select strided 8 : Elapsed 0.023 ms (2.301 ms / 100) 2.339 -> 2.343 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.47% +0.30%] index_select random : Elapsed 0.023 ms (2.343 ms / 100) 2.346 -> 2.350 ( +0.17%) [ +0.26% +0.00% +0.04% / +0.17% +0.38% +0.55%] index_select random_sorted : Elapsed 0.024 ms (2.352 ms / 100) 2.346 -> 2.349 ( +0.13%) [ +0.00% +0.30% +0.26% / +0.13% +0.43% +0.38%] index_select perm : Elapsed 0.023 ms (2.346 ms / 100) 2.348 -> 2.349 ( +0.04%) [ +0.04% +0.13% +0.00% / +0.04% +0.30% +0.38%] index_select perm_sorted : Elapsed 0.023 ms (2.349 ms / 100) B = [5, 40, 4, 20] (stride (160, 4, 1, 800)) A = [5, 40, 16, 20] (stride (20, 100, 4000, 1)) dim = 2 2.300 -> 2.300 ( +0.00%) [ +0.22% +0.17% +0.00% / +0.00% +0.48% +0.30%] index_select const : Elapsed 0.023 ms (2.305 ms / 100) 2.345 -> 2.345 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.26% +0.30%] index_select wrap : Elapsed 0.023 ms (2.348 ms / 100) 2.340 -> 2.348 ( +0.34%) [ +0.38% +0.09% +0.00% / +0.34% +0.34% +0.43%] index_select linear : Elapsed 0.023 ms (2.349 ms / 100) 2.342 -> 2.349 ( +0.30%) [ +0.21% +0.04% +0.00% / +0.30% +0.30% +0.30%] index_select reverse : Elapsed 0.023 ms (2.347 ms / 100) 2.284 -> 2.284 ( +0.00%) [ +0.18% +0.18% +0.00% / +0.00% +0.22% +0.39%] index_select skip64 : Elapsed 0.023 ms (2.288 ms / 100) 2.297 -> 2.303 ( +0.26%) [ +0.13% +0.09% +0.00% / +0.26% +0.48% +0.30%] index_select skip256 : Elapsed 0.023 ms (2.300 ms / 100) 2.340 -> 2.343 ( +0.13%) [ +0.04% +0.04% +0.00% / +0.13% +0.34% +0.30%] index_select spread : Elapsed 0.023 ms (2.341 ms / 100) 2.341 -> 2.340 ( -0.04%) [ +0.17% +0.00% +0.04% / -0.04% +0.30% +0.26%] index_select strided 3 : Elapsed 0.023 ms (2.345 ms / 100) 2.342 -> 2.346 ( +0.17%) [ +0.00% +0.09% +0.17% / +0.17% +0.34% +0.34%] index_select strided 5 : Elapsed 0.023 ms (2.342 ms / 100) 2.344 -> 2.350 ( +0.26%) [ +0.00% +0.13% +0.17% / +0.26% +0.55% +0.30%] index_select strided 7 : Elapsed 0.023 ms (2.344 ms / 100) 2.296 -> 2.302 ( +0.26%) [ +0.13% +0.00% +0.09% / +0.26% +0.48% +0.65%] index_select strided 8 : Elapsed 0.023 ms (2.299 ms / 100) 2.315 -> 2.316 ( +0.04%) [ +0.00% +0.00% +0.17% / +0.04% +0.52% +0.43%] index_select random : Elapsed 0.023 ms (2.315 ms / 100) 2.317 -> 2.313 ( -0.17%) [ +0.00% +0.13% +0.17% / -0.17% +0.35% +0.52%] index_select random_sorted : Elapsed 0.023 ms (2.317 ms / 100) 2.351 -> 2.355 ( +0.17%) [ +0.00% +0.17% +0.04% / +0.17% +0.26% +0.47%] index_select perm : Elapsed 0.024 ms (2.351 ms / 100) 2.344 -> 2.347 ( +0.13%) [ +0.00% +0.09% +0.00% / +0.13% +0.26% +0.21%] index_select perm_sorted : Elapsed 0.023 ms (2.344 ms / 100) B = [5, 40, 4, 20] (stride (1, 20, 5, 800)) A = [5, 40, 16, 20] (stride (12800, 1, 40, 640)) dim = 2 2.518 -> 2.518 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.36% +0.40%] index_select const : Elapsed 0.025 ms (2.522 ms / 100) 2.532 -> 2.534 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.32% +0.36%] index_select wrap : Elapsed 0.025 ms (2.534 ms / 100) 2.531 -> 2.535 ( +0.16%) [ +0.20% +0.08% +0.00% / +0.16% +0.43% +0.51%] index_select linear : Elapsed 0.025 ms (2.536 ms / 100) 2.530 -> 2.532 ( +0.08%) [ +0.12% +0.00% +0.00% / +0.08% +0.43% +0.63%] index_select reverse : Elapsed 0.025 ms (2.533 ms / 100) 2.515 -> 2.518 ( +0.12%) [ +0.00% +0.08% +0.00% / +0.12% +0.64% +0.44%] index_select skip64 : Elapsed 0.025 ms (2.515 ms / 100) 2.514 -> 2.519 ( +0.20%) [ +0.00% +0.04% +0.00% / +0.20% +0.52% +0.56%] index_select skip256 : Elapsed 0.025 ms (2.514 ms / 100) 2.524 -> 2.527 ( +0.12%) [ +0.24% +0.00% +0.04% / +0.12% +0.52% +0.63%] index_select spread : Elapsed 0.025 ms (2.530 ms / 100) 2.527 -> 2.532 ( +0.20%) [ +0.12% +0.00% +0.04% / +0.20% +0.44% +0.51%] index_select strided 3 : Elapsed 0.025 ms (2.530 ms / 100) 2.525 -> 2.530 ( +0.20%) [ +0.28% +0.16% +0.00% / +0.20% +0.48% +0.36%] index_select strided 5 : Elapsed 0.025 ms (2.532 ms / 100) 2.535 -> 2.537 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.39% +0.43%] index_select strided 7 : Elapsed 0.025 ms (2.538 ms / 100) 2.510 -> 2.515 ( +0.20%) [ +0.20% +0.00% +0.08% / +0.20% +0.48% +0.48%] index_select strided 8 : Elapsed 0.025 ms (2.515 ms / 100) 2.531 -> 2.527 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.24% +0.24%] index_select random : Elapsed 0.025 ms (2.531 ms / 100) 2.528 -> 2.537 ( +0.36%) [ +0.36% +0.32% +0.00% / +0.36% +0.59% +0.51%] index_select random_sorted : Elapsed 0.025 ms (2.537 ms / 100) 2.533 -> 2.537 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.28% +0.36%] index_select perm : Elapsed 0.025 ms (2.537 ms / 100) 2.528 -> 2.533 ( +0.20%) [ +0.04% +0.04% +0.00% / +0.32% +0.20% +0.40%] index_select perm_sorted : Elapsed 0.025 ms (2.529 ms / 100) B = [5, 40, 4, 20] (stride (40, 1, 200, 800)) A = [5, 40, 16, 20] (stride (16, 1600, 1, 80)) dim = 2 2.471 -> 2.471 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.00% +0.16% +0.16%] index_select const : Elapsed 0.025 ms (2.472 ms / 100) 2.472 -> 2.471 ( -0.04%) [ +0.04% +0.00% +0.12% / -0.04% +0.28% +0.04%] index_select wrap : Elapsed 0.025 ms (2.473 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.00% +0.00% +0.28% / +0.12% +0.00% +0.08%] index_select linear : Elapsed 0.025 ms (2.470 ms / 100) 2.467 -> 2.471 ( +0.16%) [ +0.32% +0.00% +0.16% / +0.20% +0.16% +0.20%] index_select reverse : Elapsed 0.025 ms (2.475 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.08% +0.00% +0.20% / +0.04% +0.24% +0.00%] index_select skip64 : Elapsed 0.025 ms (2.472 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.20% +0.12% +0.00% / +0.12% +0.20% +0.04%] index_select skip256 : Elapsed 0.025 ms (2.475 ms / 100) 2.489 -> 2.495 ( +0.24%) [ +0.20% +0.12% +0.00% / +0.24% +0.36% +0.44%] index_select spread : Elapsed 0.025 ms (2.494 ms / 100) 2.492 -> 2.499 ( +0.28%) [ +0.12% +0.00% +0.16% / +0.36% +0.28% +0.44%] index_select strided 3 : Elapsed 0.025 ms (2.495 ms / 100) 2.489 -> 2.492 ( +0.12%) [ +0.00% +0.20% +0.24% / +0.12% +0.40% +0.52%] index_select strided 5 : Elapsed 0.025 ms (2.489 ms / 100) 2.491 -> 2.495 ( +0.16%) [ +0.28% +0.00% +0.04% / +0.16% +0.52% +0.36%] index_select strided 7 : Elapsed 0.025 ms (2.498 ms / 100) 2.505 -> 2.507 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.28% +0.32%] index_select strided 8 : Elapsed 0.025 ms (2.508 ms / 100) 2.493 -> 2.493 ( +0.00%) [ +0.00% +0.04% +0.12% / +0.00% +0.24% +0.28%] index_select random : Elapsed 0.025 ms (2.493 ms / 100) 2.493 -> 2.493 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.32% +0.24%] index_select random_sorted : Elapsed 0.025 ms (2.493 ms / 100) 2.503 -> 2.506 ( +0.12%) [ +0.12% +0.00% +0.04% / +0.12% +0.20% +0.36%] index_select perm : Elapsed 0.025 ms (2.506 ms / 100) 2.505 -> 2.505 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.28% +0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.506 ms / 100) out_shape = [5, 40, 16, 4] in_shape = [5, 40, 16, 20] idx_dim = 3 B = [5, 40, 16, 4] (stride (2560, 4, 160, 1)) A = [5, 40, 16, 20] (stride (40, 1, 200, 3200)) dim = 3 2.024 -> 2.028 ( +0.20%) [ +0.00% +0.05% +0.00% / +0.20% +0.84% +0.84%] index_select const : Elapsed 0.020 ms (2.024 ms / 100) 2.035 -> 2.039 ( +0.20%) [ +0.25% +0.29% +0.00% / +0.20% +1.23% +1.03%] index_select wrap : Elapsed 0.020 ms (2.040 ms / 100) 2.043 -> 2.040 ( -0.15%) [ +0.05% +0.00% +0.00% / -0.15% +0.78% +0.78%] index_select linear : Elapsed 0.020 ms (2.044 ms / 100) 2.024 -> 2.028 ( +0.20%) [ +0.54% +0.25% +0.00% / +0.20% +0.64% +0.74%] index_select reverse : Elapsed 0.020 ms (2.035 ms / 100) 2.018 -> 2.022 ( +0.20%) [ +0.00% +0.00% +0.10% / +0.20% +0.89% +0.99%] index_select skip64 : Elapsed 0.020 ms (2.018 ms / 100) 2.026 -> 2.025 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.79% +0.84%] index_select skip256 : Elapsed 0.020 ms (2.028 ms / 100) 2.031 -> 2.034 ( +0.15%) [ +0.05% +0.00% +0.05% / +0.15% +0.94% +0.69%] index_select spread : Elapsed 0.020 ms (2.032 ms / 100) 2.042 -> 2.041 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.44% +0.39%] index_select strided 3 : Elapsed 0.020 ms (2.044 ms / 100) 2.040 -> 2.044 ( +0.20%) [ +0.10% +0.05% +0.00% / +0.20% +0.64% +0.54%] index_select strided 5 : Elapsed 0.020 ms (2.042 ms / 100) 2.047 -> 2.048 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.73% +0.93%] index_select strided 7 : Elapsed 0.020 ms (2.048 ms / 100) 2.034 -> 2.037 ( +0.15%) [ +0.00% +0.10% +0.15% / +0.15% +0.98% +1.03%] index_select strided 8 : Elapsed 0.020 ms (2.034 ms / 100) 2.035 -> 2.037 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.10% +1.33% +1.33%] index_select strided 16 : Elapsed 0.020 ms (2.035 ms / 100) 2.036 -> 2.036 ( +0.00%) [ +0.29% +0.15% +0.00% / +0.05% +0.15% +0.00%] index_select random : Elapsed 0.020 ms (2.042 ms / 100) 2.040 -> 2.040 ( +0.00%) [ +0.20% +0.00% +0.00% / +0.25% +0.00% +0.15%] index_select random_sorted : Elapsed 0.020 ms (2.044 ms / 100) 2.027 -> 2.025 ( -0.10%) [ +0.05% +0.20% +0.00% / -0.10% +0.64% +0.69%] index_select perm : Elapsed 0.020 ms (2.028 ms / 100) 2.029 -> 2.033 ( +0.20%) [ +0.34% +0.00% +0.05% / +0.20% +0.79% +0.79%] index_select perm_sorted : Elapsed 0.020 ms (2.036 ms / 100) B = [5, 40, 16, 4] (stride (2560, 16, 1, 640)) A = [5, 40, 16, 20] (stride (12800, 16, 1, 640)) dim = 3 0.725 -> 0.724 ( -0.14%) [ +0.28% +0.14% +0.00% / -0.14% +1.79% +2.07%] index_select const : Elapsed 0.007 ms (0.727 ms / 100) 0.731 -> 0.732 ( +0.14%) [ +0.14% +0.00% +0.27% / +0.14% +2.19% +2.33%] index_select wrap : Elapsed 0.007 ms (0.732 ms / 100) 0.729 -> 0.731 ( +0.27%) [ +0.27% +0.14% +0.00% / +0.27% +2.74% +2.61%] index_select linear : Elapsed 0.007 ms (0.731 ms / 100) 0.727 -> 0.728 ( +0.14%) [ +0.41% +0.28% +0.00% / +0.14% +3.16% +2.89%] index_select reverse : Elapsed 0.007 ms (0.730 ms / 100) 0.726 -> 0.726 ( +0.00%) [ +0.00% +0.41% +0.69% / +0.00% +0.83% +0.69%] index_select skip64 : Elapsed 0.007 ms (0.726 ms / 100) 0.726 -> 0.722 ( -0.55%) [ +0.00% +0.00% +0.14% / -0.55% +1.93% +1.52%] index_select skip256 : Elapsed 0.007 ms (0.726 ms / 100) 0.724 -> 0.725 ( +0.14%) [ +0.00% +0.00% +0.69% / +0.14% +1.80% +1.52%] index_select spread : Elapsed 0.007 ms (0.724 ms / 100) 0.723 -> 0.724 ( +0.14%) [ +0.41% +0.00% +0.55% / +0.14% +0.97% +1.24%] index_select strided 3 : Elapsed 0.007 ms (0.726 ms / 100) 0.724 -> 0.726 ( +0.28%) [ +0.28% +0.28% +0.00% / +0.28% +1.66% +0.69%] index_select strided 5 : Elapsed 0.007 ms (0.726 ms / 100) 0.726 -> 0.727 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +3.44% +2.75%] index_select strided 7 : Elapsed 0.007 ms (0.726 ms / 100) 0.722 -> 0.725 ( +0.42%) [ +0.55% +0.00% +0.55% / +0.42% +3.88% +4.02%] index_select strided 8 : Elapsed 0.007 ms (0.726 ms / 100) 0.722 -> 0.725 ( +0.42%) [ +0.28% +0.00% +0.14% / +0.42% +3.19% +3.19%] index_select strided 16 : Elapsed 0.007 ms (0.724 ms / 100) 0.722 -> 0.723 ( +0.14%) [ +0.42% +0.28% +0.00% / +0.14% +3.32% +3.05%] index_select random : Elapsed 0.007 ms (0.725 ms / 100) 0.724 -> 0.725 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +3.73% +4.01%] index_select random_sorted : Elapsed 0.007 ms (0.725 ms / 100) 0.737 -> 0.732 ( -0.68%) [ +0.27% +0.41% +0.00% / +0.27% -0.68% -0.68%] index_select perm : Elapsed 0.007 ms (0.739 ms / 100) 0.728 -> 0.730 ( +0.27%) [ +0.00% +0.27% +0.14% / +0.27% +0.82% +0.41%] index_select perm_sorted : Elapsed 0.007 ms (0.728 ms / 100) B = [5, 40, 16, 4] (stride (4, 320, 20, 1)) A = [5, 40, 16, 20] (stride (12800, 20, 800, 1)) dim = 3 2.085 -> 2.083 ( -0.10%) [ +0.14% +0.00% +0.10% / +0.05% +0.58% -0.10%] index_select const : Elapsed 0.021 ms (2.088 ms / 100) 2.082 -> 2.083 ( +0.05%) [ +0.14% +0.00% +0.00% / +0.10% +0.34% +0.05%] index_select wrap : Elapsed 0.021 ms (2.085 ms / 100) 2.084 -> 2.084 ( +0.00%) [ +0.19% +0.05% +0.00% / +0.14% +0.00% +0.10%] index_select linear : Elapsed 0.021 ms (2.088 ms / 100) 2.080 -> 2.084 ( +0.19%) [ +0.00% +0.29% +0.29% / +0.24% +0.48% +0.19%] index_select reverse : Elapsed 0.021 ms (2.080 ms / 100) 2.085 -> 2.084 ( -0.05%) [ +0.00% +0.00% +0.14% / +0.14% +0.05% -0.05%] index_select skip64 : Elapsed 0.021 ms (2.085 ms / 100) 2.085 -> 2.087 ( +0.10%) [ +0.14% +0.19% +0.00% / +0.10% +0.24% +0.24%] index_select skip256 : Elapsed 0.021 ms (2.088 ms / 100) 2.141 -> 2.147 ( +0.28%) [ +0.00% +0.61% +0.37% / +0.28% +0.75% +0.37%] index_select spread : Elapsed 0.021 ms (2.141 ms / 100) 2.122 -> 2.124 ( +0.09%) [ +0.00% +0.19% +0.00% / +0.09% +0.14% +0.14%] index_select strided 3 : Elapsed 0.021 ms (2.122 ms / 100) 2.142 -> 2.143 ( +0.05%) [ +0.00% +0.56% +0.61% / +0.51% +0.05% +0.14%] index_select strided 5 : Elapsed 0.021 ms (2.142 ms / 100) 2.123 -> 2.128 ( +0.24%) [ +0.00% +0.05% +0.14% / +0.33% +0.24% +0.38%] index_select strided 7 : Elapsed 0.021 ms (2.123 ms / 100) 2.110 -> 2.114 ( +0.19%) [ +0.28% +0.14% +0.00% / +0.38% +0.19% +0.33%] index_select strided 8 : Elapsed 0.021 ms (2.116 ms / 100) 2.144 -> 2.144 ( +0.00%) [ +0.14% +0.05% +0.00% / +0.00% +0.05% +0.05%] index_select strided 16 : Elapsed 0.021 ms (2.147 ms / 100) 2.131 -> 2.136 ( +0.23%) [ +0.00% +0.23% +0.19% / +0.23% +0.23% +0.28%] index_select random : Elapsed 0.021 ms (2.131 ms / 100) 2.133 -> 2.137 ( +0.19%) [ +0.00% +0.19% +0.28% / +0.19% +0.28% +0.52%] index_select random_sorted : Elapsed 0.021 ms (2.133 ms / 100) 2.095 -> 2.095 ( +0.00%) [ +0.29% +0.10% +0.00% / +0.19% +0.00% +0.19%] index_select perm : Elapsed 0.021 ms (2.101 ms / 100) 2.111 -> 2.110 ( -0.05%) [ +0.09% +0.00% +0.33% / -0.05% -0.05% +0.05%] index_select perm_sorted : Elapsed 0.021 ms (2.113 ms / 100) B = [5, 40, 16, 4] (stride (16, 320, 1, 80)) A = [5, 40, 16, 20] (stride (1, 5, 200, 3200)) dim = 3 2.108 -> 2.113 ( +0.24%) [ +0.00% +0.09% +0.00% / +0.24% +0.62% +0.76%] index_select const : Elapsed 0.021 ms (2.108 ms / 100) 2.109 -> 2.115 ( +0.28%) [ +0.00% +0.24% +0.05% / +0.28% +0.43% +0.43%] index_select wrap : Elapsed 0.021 ms (2.109 ms / 100) 2.105 -> 2.107 ( +0.10%) [ +0.00% +0.05% +0.05% / +0.10% +0.48% +0.48%] index_select linear : Elapsed 0.021 ms (2.105 ms / 100) 2.107 -> 2.106 ( -0.05%) [ +0.00% +0.00% +0.14% / -0.05% +0.19% +0.33%] index_select reverse : Elapsed 0.021 ms (2.107 ms / 100) 2.110 -> 2.110 ( +0.00%) [ +0.00% +0.24% +0.14% / +0.00% +0.43% +0.43%] index_select skip64 : Elapsed 0.021 ms (2.110 ms / 100) 2.105 -> 2.109 ( +0.19%) [ +0.00% +0.29% +0.19% / +0.19% +0.43% +0.86%] index_select skip256 : Elapsed 0.021 ms (2.105 ms / 100) 2.104 -> 2.108 ( +0.19%) [ +0.14% +0.00% +0.05% / +0.19% +0.71% +0.52%] index_select spread : Elapsed 0.021 ms (2.107 ms / 100) 2.106 -> 2.112 ( +0.28%) [ +0.33% +0.38% +0.00% / +0.33% +0.28% +0.38%] index_select strided 3 : Elapsed 0.021 ms (2.113 ms / 100) 2.113 -> 2.114 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.24% +0.33%] index_select strided 5 : Elapsed 0.021 ms (2.113 ms / 100) 2.107 -> 2.109 ( +0.09%) [ +0.24% +0.00% +0.14% / +0.09% +0.57% +0.66%] index_select strided 7 : Elapsed 0.021 ms (2.112 ms / 100) 2.119 -> 2.120 ( +0.05%) [ +0.09% +0.19% +0.00% / +0.05% +0.33% +0.14%] index_select strided 8 : Elapsed 0.021 ms (2.121 ms / 100) 2.110 -> 2.111 ( +0.05%) [ +0.24% +0.00% +0.00% / +0.05% +0.19% +0.33%] index_select strided 16 : Elapsed 0.021 ms (2.115 ms / 100) 2.110 -> 2.110 ( +0.00%) [ +0.05% +0.00% +0.19% / +0.00% +0.76% +0.76%] index_select random : Elapsed 0.021 ms (2.111 ms / 100) 2.118 -> 2.114 ( -0.19%) [ +0.05% +0.00% +0.05% / -0.19% +0.47% +0.47%] index_select random_sorted : Elapsed 0.021 ms (2.119 ms / 100) 2.111 -> 2.117 ( +0.28%) [ +0.28% +0.28% +0.00% / +0.28% +0.43% +0.38%] index_select perm : Elapsed 0.021 ms (2.117 ms / 100) 2.106 -> 2.103 ( -0.14%) [ +0.05% +0.00% +0.09% / -0.14% +0.33% +0.24%] index_select perm_sorted : Elapsed 0.021 ms (2.107 ms / 100) B = [5, 40, 16, 4] (stride (1, 320, 5, 80)) A = [5, 40, 16, 20] (stride (16, 1600, 1, 80)) dim = 3 2.002 -> 2.003 ( +0.05%) [ +0.50% +0.10% +0.00% / +0.50% +0.05% +0.25%] index_select const : Elapsed 0.020 ms (2.012 ms / 100) 2.045 -> 2.052 ( +0.34%) [ +0.10% +0.24% +0.00% / +0.34% +0.34% +0.34%] index_select wrap : Elapsed 0.020 ms (2.047 ms / 100) 2.049 -> 2.051 ( +0.10%) [ +0.29% +0.10% +0.00% / +0.20% +0.10% +0.10%] index_select linear : Elapsed 0.021 ms (2.055 ms / 100) 2.050 -> 2.050 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.10% +0.10%] index_select reverse : Elapsed 0.021 ms (2.051 ms / 100) 2.004 -> 2.002 ( -0.10%) [ +0.10% +0.10% +0.00% / -0.10% +0.05% +0.05%] index_select skip64 : Elapsed 0.020 ms (2.006 ms / 100) 2.004 -> 2.004 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.05% +0.05% +0.00%] index_select skip256 : Elapsed 0.020 ms (2.005 ms / 100) 2.044 -> 2.048 ( +0.20%) [ +0.00% +0.34% +0.34% / +0.29% +0.24% +0.20%] index_select spread : Elapsed 0.020 ms (2.044 ms / 100) 2.049 -> 2.050 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.15% +0.39%] index_select strided 3 : Elapsed 0.020 ms (2.049 ms / 100) 2.048 -> 2.047 ( -0.05%) [ +0.10% +0.05% +0.00% / +0.00% -0.05% +0.00%] index_select strided 5 : Elapsed 0.020 ms (2.050 ms / 100) 2.046 -> 2.052 ( +0.29%) [ +0.20% +0.15% +0.00% / +0.39% +0.34% +0.29%] index_select strided 7 : Elapsed 0.021 ms (2.050 ms / 100) 2.047 -> 2.048 ( +0.05%) [ +0.15% +0.10% +0.00% / +0.10% +0.05% +0.59%] index_select strided 8 : Elapsed 0.020 ms (2.050 ms / 100) 2.048 -> 2.049 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.15% +0.10%] index_select strided 16 : Elapsed 0.020 ms (2.049 ms / 100) 2.028 -> 2.031 ( +0.15%) [ +0.05% +0.00% +0.20% / +0.30% +0.15% +0.30%] index_select random : Elapsed 0.020 ms (2.029 ms / 100) 2.028 -> 2.031 ( +0.15%) [ +0.05% +0.00% +0.05% / +0.15% +0.35% +0.30%] index_select random_sorted : Elapsed 0.020 ms (2.029 ms / 100) 2.049 -> 2.049 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.15% +0.10% +0.00%] index_select perm : Elapsed 0.020 ms (2.049 ms / 100) 2.050 -> 2.047 ( -0.15%) [ +0.15% +0.10% +0.00% / -0.15% +0.29% +0.39%] index_select perm_sorted : Elapsed 0.021 ms (2.053 ms / 100) B = [5, 40, 16, 4] (stride (160, 4, 800, 1)) A = [5, 40, 16, 20] (stride (12800, 1, 800, 40)) dim = 3 1.998 -> 2.001 ( +0.15%) [ +0.05% +0.00% +0.00% / +0.15% +0.35% +0.35%] index_select const : Elapsed 0.020 ms (1.999 ms / 100) 2.015 -> 2.020 ( +0.25%) [ +0.20% +0.15% +0.00% / +0.25% +0.40% +0.50%] index_select wrap : Elapsed 0.020 ms (2.019 ms / 100) 2.025 -> 2.027 ( +0.10%) [ +0.00% +0.10% +0.20% / +0.10% +0.15% +0.15%] index_select linear : Elapsed 0.020 ms (2.025 ms / 100) 2.025 -> 2.025 ( +0.00%) [ +0.15% +0.05% +0.00% / +0.00% +0.40% +0.40%] index_select reverse : Elapsed 0.020 ms (2.028 ms / 100) 1.999 -> 1.999 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.40% +0.35%] index_select skip64 : Elapsed 0.020 ms (1.999 ms / 100) 2.007 -> 2.008 ( +0.05%) [ +0.00% +0.05% +0.10% / +0.05% +0.25% +0.25%] index_select skip256 : Elapsed 0.020 ms (2.007 ms / 100) 2.020 -> 2.021 ( +0.05%) [ +0.10% +0.00% +0.20% / +0.05% +0.30% +0.45%] index_select spread : Elapsed 0.020 ms (2.022 ms / 100) 2.024 -> 2.027 ( +0.15%) [ +0.25% +0.30% +0.00% / +0.15% +0.54% +0.44%] index_select strided 3 : Elapsed 0.020 ms (2.029 ms / 100) 2.031 -> 2.036 ( +0.25%) [ +0.00% +0.10% +0.25% / +0.25% +0.30% +0.54%] index_select strided 5 : Elapsed 0.020 ms (2.031 ms / 100) 2.031 -> 2.039 ( +0.39%) [ +0.10% +0.30% +0.00% / +0.39% +0.44% +0.39%] index_select strided 7 : Elapsed 0.020 ms (2.033 ms / 100) 2.025 -> 2.025 ( +0.00%) [ +0.10% +0.25% +0.00% / +0.00% +0.35% +0.25%] index_select strided 8 : Elapsed 0.020 ms (2.027 ms / 100) 2.024 -> 2.029 ( +0.25%) [ +0.00% +0.05% +0.10% / +0.25% +0.44% +0.35%] index_select strided 16 : Elapsed 0.020 ms (2.024 ms / 100) 2.002 -> 2.004 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +0.20% +0.25%] index_select random : Elapsed 0.020 ms (2.002 ms / 100) 2.035 -> 2.037 ( +0.10%) [ +0.15% +0.34% +0.00% / +0.10% +0.25% +0.49%] index_select random_sorted : Elapsed 0.020 ms (2.038 ms / 100) 2.032 -> 2.035 ( +0.15%) [ +0.05% +0.05% +0.00% / +0.15% +0.34% +0.25%] index_select perm : Elapsed 0.020 ms (2.033 ms / 100) 2.031 -> 2.033 ( +0.10%) [ +0.00% +0.00% +0.10% / +0.10% +0.30% +0.44%] index_select perm_sorted : Elapsed 0.020 ms (2.031 ms / 100) B = [5, 40, 16, 4] (stride (160, 4, 800, 1)) A = [5, 40, 16, 20] (stride (40, 1, 4000, 200)) dim = 3 0.816 -> 0.798 ( -2.21%) [ +0.25% +0.25% +0.00% / +0.25% -1.72% -2.21%] index_select const : Elapsed 0.008 ms (0.818 ms / 100) 0.839 -> 0.817 ( -2.62%) [ +0.00% +0.72% +0.00% / +0.00% -2.62% -2.62%] index_select wrap : Elapsed 0.008 ms (0.839 ms / 100) 0.839 -> 0.818 ( -2.50%) [ +0.00% +0.12% +0.12% / +0.12% -2.50% -2.50%] index_select linear : Elapsed 0.008 ms (0.839 ms / 100) 0.825 -> 0.816 ( -1.09%) [ +0.00% +0.73% +0.36% / +0.48% -1.09% -1.09%] index_select reverse : Elapsed 0.008 ms (0.825 ms / 100) 0.817 -> 0.788 ( -3.55%) [ +0.24% +0.00% +0.12% / +0.24% -2.94% -3.55%] index_select skip64 : Elapsed 0.008 ms (0.819 ms / 100) 0.817 -> 0.799 ( -2.20%) [ +0.12% +0.12% +0.00% / +0.37% -2.20% -2.08%] index_select skip256 : Elapsed 0.008 ms (0.818 ms / 100) 0.833 -> 0.804 ( -3.48%) [ +0.00% +0.12% +0.24% / +0.12% -3.48% -2.88%] index_select spread : Elapsed 0.008 ms (0.833 ms / 100) 0.839 -> 0.799 ( -4.77%) [ +0.48% +0.00% +0.00% / +0.36% -4.77% -4.65%] index_select strided 3 : Elapsed 0.008 ms (0.843 ms / 100) 0.835 -> 0.799 ( -4.31%) [ +0.00% +0.24% +0.12% / +0.36% -4.31% -4.07%] index_select strided 5 : Elapsed 0.008 ms (0.835 ms / 100) 0.824 -> 0.793 ( -3.76%) [ +0.49% +0.36% +0.00% / +0.24% -3.76% -3.40%] index_select strided 7 : Elapsed 0.008 ms (0.828 ms / 100) 0.819 -> 0.797 ( -2.69%) [ +0.73% +0.37% +0.00% / +0.24% -2.56% -2.69%] index_select strided 8 : Elapsed 0.008 ms (0.825 ms / 100) 0.819 -> 0.800 ( -2.32%) [ +0.37% +0.49% +0.00% / +0.49% -2.20% -2.32%] index_select strided 16 : Elapsed 0.008 ms (0.822 ms / 100) 0.810 -> 0.806 ( -0.49%) [ +0.25% +0.25% +0.00% / +0.25% -0.49% -0.12%] index_select random : Elapsed 0.008 ms (0.812 ms / 100) 0.819 -> 0.816 ( -0.37%) [ +0.12% +0.37% +0.00% / +0.12% -0.37% +1.34%] index_select random_sorted : Elapsed 0.008 ms (0.820 ms / 100) 0.826 -> 0.797 ( -3.51%) [ +0.00% +0.24% +0.12% / +0.00% -3.51% -3.03%] index_select perm : Elapsed 0.008 ms (0.826 ms / 100) 0.827 -> 0.799 ( -3.39%) [ +0.00% +0.12% +0.24% / +0.24% -3.14% -3.39%] index_select perm_sorted : Elapsed 0.008 ms (0.827 ms / 100) B = [5, 40, 16, 4] (stride (640, 1, 40, 3200)) A = [5, 40, 16, 20] (stride (1, 1600, 5, 80)) dim = 3 2.032 -> 2.035 ( +0.15%) [ +0.00% +0.05% +0.05% / +0.15% +0.44% +0.39%] index_select const : Elapsed 0.020 ms (2.032 ms / 100) 2.025 -> 2.024 ( -0.05%) [ +0.20% +0.00% +0.00% / -0.05% +0.35% +0.25%] index_select wrap : Elapsed 0.020 ms (2.029 ms / 100) 2.024 -> 2.024 ( +0.00%) [ +0.10% +0.20% +0.00% / +0.00% +0.20% +0.35%] index_select linear : Elapsed 0.020 ms (2.026 ms / 100) 2.027 -> 2.030 ( +0.15%) [ +0.20% +0.35% +0.00% / +0.15% +0.35% +0.44%] index_select reverse : Elapsed 0.020 ms (2.031 ms / 100) 2.032 -> 2.034 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.39% +0.44%] index_select skip64 : Elapsed 0.020 ms (2.034 ms / 100) 2.030 -> 2.030 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.39% +0.15%] index_select skip256 : Elapsed 0.020 ms (2.032 ms / 100) 2.027 -> 2.029 ( +0.10%) [ +0.00% +0.10% +0.05% / +0.10% +0.30% +0.49%] index_select spread : Elapsed 0.020 ms (2.027 ms / 100) 2.024 -> 2.026 ( +0.10%) [ +0.00% +0.10% +0.25% / +0.15% +0.10% +0.15%] index_select strided 3 : Elapsed 0.020 ms (2.024 ms / 100) 2.027 -> 2.030 ( +0.15%) [ +0.05% +0.00% +0.05% / +0.30% +0.35% +0.15%] index_select strided 5 : Elapsed 0.020 ms (2.028 ms / 100) 2.031 -> 2.031 ( +0.00%) [ +0.20% +0.00% +0.10% / +0.00% +0.20% +0.25%] index_select strided 7 : Elapsed 0.020 ms (2.035 ms / 100) 2.024 -> 2.031 ( +0.35%) [ +0.15% +0.25% +0.00% / +0.35% +0.49% +0.49%] index_select strided 8 : Elapsed 0.020 ms (2.027 ms / 100) 2.030 -> 2.028 ( -0.10%) [ +0.05% +0.00% +0.15% / -0.10% +0.20% +0.54%] index_select strided 16 : Elapsed 0.020 ms (2.031 ms / 100) 2.023 -> 2.021 ( -0.10%) [ +0.05% +0.05% +0.00% / -0.10% +0.25% +0.30%] index_select random : Elapsed 0.020 ms (2.024 ms / 100) 2.032 -> 2.031 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.34% +0.30%] index_select random_sorted : Elapsed 0.020 ms (2.032 ms / 100) 2.028 -> 2.028 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.44% +0.20%] index_select perm : Elapsed 0.020 ms (2.030 ms / 100) 2.021 -> 2.021 ( +0.00%) [ +0.00% +0.15% +0.10% / +0.00% +0.49% +0.25%] index_select perm_sorted : Elapsed 0.020 ms (2.021 ms / 100) out_shape = [4, 40, 20, 16] in_shape = [5, 40, 20, 16] idx_dim = 0 B = [4, 40, 20, 16] (stride (12800, 1, 640, 40)) dim = 0 fill_cnt = 5 1.902 -> 1.906 ( +0.21%) [ +0.11% +0.00% +0.21% / +0.37% +0.79% +0.21%] index_fill_ const : Elapsed 0.019 ms (1.904 ms / 100) 1.922 -> 1.924 ( +0.10%) [ +0.05% +0.26% +0.00% / +0.16% +0.16% +0.10%] index_fill_ linear : Elapsed 0.019 ms (1.923 ms / 100) 1.914 -> 1.921 ( +0.37%) [ +0.00% +0.10% +0.21% / +0.37% +0.57% +0.42%] index_fill_ reverse : Elapsed 0.019 ms (1.914 ms / 100) 1.901 -> 1.907 ( +0.32%) [ +0.11% +0.00% +0.32% / +0.32% +0.58% +0.37%] index_fill_ skip64 : Elapsed 0.019 ms (1.903 ms / 100) 1.901 -> 1.908 ( +0.37%) [ +0.00% +0.42% +0.21% / +0.37% +0.47% +0.37%] index_fill_ skip256 : Elapsed 0.019 ms (1.901 ms / 100) 1.929 -> 1.933 ( +0.21%) [ +0.26% +0.00% +0.31% / +0.41% +0.21% +0.26%] index_fill_ spread : Elapsed 0.019 ms (1.934 ms / 100) 1.919 -> 1.919 ( +0.00%) [ +0.10% +0.00% +0.36% / +0.00% +0.68% +0.26%] index_fill_ strided 3 : Elapsed 0.019 ms (1.921 ms / 100) 1.912 -> 1.907 ( -0.26%) [ +0.10% +0.10% +0.00% / +0.10% -0.26% -0.21%] index_fill_ random : Elapsed 0.019 ms (1.914 ms / 100) 1.909 -> 1.907 ( -0.10%) [ +0.00% +0.26% +0.05% / +0.42% -0.10% -0.10%] index_fill_ random_sorted : Elapsed 0.019 ms (1.909 ms / 100) B = [4, 40, 20, 16] (stride (12800, 20, 1, 800)) A = [5, 40, 20, 16] (stride (12800, 320, 16, 1)) dim = 0 4.991 -> 4.994 ( +0.06%) [ +0.12% +0.20% +0.00% / +0.06% +0.44% +0.28%] index_select const : Elapsed 0.050 ms (4.997 ms / 100) 5.074 -> 5.075 ( +0.02%) [ +0.14% +0.00% +0.00% / +0.02% +0.24% +0.20%] index_select wrap : Elapsed 0.051 ms (5.081 ms / 100) 5.068 -> 5.080 ( +0.24%) [ +0.18% +0.00% +0.24% / +0.24% +0.36% +0.24%] index_select linear : Elapsed 0.051 ms (5.077 ms / 100) 5.077 -> 5.077 ( +0.00%) [ +0.06% +0.00% +0.04% / +0.18% +0.18% +0.00%] index_select reverse : Elapsed 0.051 ms (5.080 ms / 100) 4.996 -> 4.997 ( +0.02%) [ +0.04% +0.00% +0.18% / +0.02% +0.28% +0.24%] index_select skip64 : Elapsed 0.050 ms (4.998 ms / 100) 4.991 -> 4.996 ( +0.10%) [ +0.16% +0.00% +0.20% / +0.10% +0.40% +0.32%] index_select skip256 : Elapsed 0.050 ms (4.999 ms / 100) 5.076 -> 5.078 ( +0.04%) [ +0.02% +0.00% +0.00% / +0.10% +0.04% +0.06%] index_select spread : Elapsed 0.051 ms (5.077 ms / 100) 5.072 -> 5.071 ( -0.02%) [ +0.00% +0.04% +0.10% / -0.02% +0.26% +0.30%] index_select strided 3 : Elapsed 0.051 ms (5.072 ms / 100) 5.056 -> 5.050 ( -0.12%) [ +0.06% +0.06% +0.00% / +0.02% -0.12% +0.10%] index_select random : Elapsed 0.051 ms (5.059 ms / 100) 5.049 -> 5.048 ( -0.02%) [ +0.10% +0.00% +0.14% / +0.20% +0.08% -0.02%] index_select random_sorted : Elapsed 0.051 ms (5.054 ms / 100) 5.076 -> 5.079 ( +0.06%) [ +0.00% +0.20% +0.14% / +0.06% +0.16% +0.16%] index_select perm : Elapsed 0.051 ms (5.076 ms / 100) 5.077 -> 5.070 ( -0.14%) [ +0.06% +0.06% +0.00% / -0.14% +0.10% +0.22%] index_select perm_sorted : Elapsed 0.051 ms (5.080 ms / 100) B = [4, 40, 20, 16] (stride (320, 1280, 16, 1)) A = [5, 40, 20, 16] (stride (1, 100, 5, 4000)) dim = 0 3.402 -> 3.393 ( -0.26%) [ +0.21% +0.06% +0.00% / +0.15% -0.26% -0.21%] index_select const : Elapsed 0.034 ms (3.409 ms / 100) 3.402 -> 3.397 ( -0.15%) [ +0.15% +0.12% +0.00% / +0.26% +0.03% -0.15%] index_select wrap : Elapsed 0.034 ms (3.407 ms / 100) 3.401 -> 3.400 ( -0.03%) [ +0.00% +0.15% +0.26% / +0.29% -0.03% +0.09%] index_select linear : Elapsed 0.034 ms (3.401 ms / 100) 3.402 -> 3.401 ( -0.03%) [ +0.00% +0.12% +0.09% / +0.12% +0.06% -0.03%] index_select reverse : Elapsed 0.034 ms (3.402 ms / 100) 3.404 -> 3.396 ( -0.24%) [ +0.00% +0.03% +0.06% / -0.06% -0.15% -0.24%] index_select skip64 : Elapsed 0.034 ms (3.404 ms / 100) 3.405 -> 3.399 ( -0.18%) [ +0.15% +0.00% +0.03% / +0.03% -0.18% -0.15%] index_select skip256 : Elapsed 0.034 ms (3.410 ms / 100) 3.406 -> 3.395 ( -0.32%) [ +0.03% +0.00% +0.00% / +0.15% -0.32% -0.32%] index_select spread : Elapsed 0.034 ms (3.407 ms / 100) 3.402 -> 3.395 ( -0.21%) [ +0.21% +0.29% +0.00% / +0.18% -0.12% -0.21%] index_select strided 3 : Elapsed 0.034 ms (3.409 ms / 100) 3.403 -> 3.400 ( -0.09%) [ +0.00% +0.12% +0.06% / +0.06% -0.09% -0.03%] index_select random : Elapsed 0.034 ms (3.403 ms / 100) 3.396 -> 3.395 ( -0.03%) [ +0.00% +0.32% +0.21% / +0.09% -0.03% +0.00%] index_select random_sorted : Elapsed 0.034 ms (3.396 ms / 100) 3.404 -> 3.398 ( -0.18%) [ +0.06% +0.12% +0.00% / +0.00% +0.00% -0.18%] index_select perm : Elapsed 0.034 ms (3.406 ms / 100) 3.404 -> 3.402 ( -0.06%) [ +0.09% +0.00% +0.15% / +0.00% -0.06% +0.03%] index_select perm_sorted : Elapsed 0.034 ms (3.407 ms / 100) B = [4, 40, 20, 16] (stride (640, 16, 2560, 1)) dim = 0 fill_cnt = 5 3.650 -> 3.660 ( +0.27%) [ +0.00% +0.11% +0.16% / +0.27% +0.47% +0.41%] index_fill_ const : Elapsed 0.037 ms (3.650 ms / 100) 3.678 -> 3.678 ( +0.00%) [ +0.08% +0.11% +0.00% / +0.00% +0.14% +0.05%] index_fill_ linear : Elapsed 0.037 ms (3.681 ms / 100) 3.685 -> 3.685 ( +0.00%) [ +0.14% +0.08% +0.00% / +0.16% +0.00% +0.00%] index_fill_ reverse : Elapsed 0.037 ms (3.690 ms / 100) 3.654 -> 3.658 ( +0.11%) [ +0.05% +0.00% +0.08% / +0.11% +0.11% +0.33%] index_fill_ skip64 : Elapsed 0.037 ms (3.656 ms / 100) 3.653 -> 3.650 ( -0.08%) [ +0.00% +0.00% +0.03% / -0.08% +0.08% +0.00%] index_fill_ skip256 : Elapsed 0.037 ms (3.653 ms / 100) 3.656 -> 3.659 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.25% +0.22%] index_fill_ spread : Elapsed 0.037 ms (3.659 ms / 100) 3.661 -> 3.662 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.08% +0.03% +0.41%] index_fill_ strided 3 : Elapsed 0.037 ms (3.661 ms / 100) 3.672 -> 3.672 ( +0.00%) [ +0.00% +0.19% +0.22% / +0.00% +0.30% +0.33%] index_fill_ random : Elapsed 0.037 ms (3.672 ms / 100) 3.676 -> 3.678 ( +0.05%) [ +0.00% +0.22% +0.00% / +0.05% +0.49% +0.27%] index_fill_ random_sorted : Elapsed 0.037 ms (3.676 ms / 100) B = [4, 40, 20, 16] (stride (640, 16, 2560, 1)) A = [5, 40, 20, 16] (stride (12800, 20, 1, 800)) dim = 0 5.448 -> 5.456 ( +0.15%) [ +0.09% +0.06% +0.00% / +0.15% +0.55% +0.46%] index_select const : Elapsed 0.055 ms (5.453 ms / 100) 5.535 -> 5.528 ( -0.13%) [ +0.00% +0.09% +0.05% / +0.14% -0.13% -0.09%] index_select wrap : Elapsed 0.055 ms (5.535 ms / 100) 5.535 -> 5.525 ( -0.18%) [ +0.07% +0.00% +0.11% / +0.13% -0.18% -0.13%] index_select linear : Elapsed 0.055 ms (5.539 ms / 100) 5.544 -> 5.527 ( -0.31%) [ +0.00% +0.00% +0.13% / +0.04% -0.31% -0.27%] index_select reverse : Elapsed 0.055 ms (5.544 ms / 100) 5.448 -> 5.450 ( +0.04%) [ +0.00% +0.09% +0.20% / +0.04% +0.57% +0.42%] index_select skip64 : Elapsed 0.054 ms (5.448 ms / 100) 5.446 -> 5.449 ( +0.06%) [ +0.09% +0.09% +0.00% / +0.06% +0.46% +0.59%] index_select skip256 : Elapsed 0.055 ms (5.451 ms / 100) 5.537 -> 5.525 ( -0.22%) [ +0.11% +0.09% +0.00% / +0.00% -0.22% -0.18%] index_select spread : Elapsed 0.055 ms (5.543 ms / 100) 5.529 -> 5.529 ( +0.00%) [ +0.04% +0.00% +0.11% / +0.11% +0.07% +0.00%] index_select strided 3 : Elapsed 0.055 ms (5.531 ms / 100) 5.508 -> 5.509 ( +0.02%) [ +0.09% +0.00% +0.05% / +0.02% +0.07% +0.04%] index_select random : Elapsed 0.055 ms (5.513 ms / 100) 5.509 -> 5.505 ( -0.07%) [ +0.15% +0.00% +0.02% / -0.07% -0.07% +0.02%] index_select random_sorted : Elapsed 0.055 ms (5.517 ms / 100) 5.535 -> 5.527 ( -0.14%) [ +0.00% +0.05% +0.02% / +0.04% -0.02% -0.14%] index_select perm : Elapsed 0.055 ms (5.535 ms / 100) 5.548 -> 5.525 ( -0.41%) [ +0.05% +0.14% +0.00% / -0.11% -0.34% -0.41%] index_select perm_sorted : Elapsed 0.056 ms (5.551 ms / 100) B = [4, 40, 20, 16] (stride (640, 1, 2560, 40)) A = [5, 40, 20, 16] (stride (40, 1, 3200, 200)) dim = 0 5.538 -> 5.541 ( +0.05%) [ +0.00% +0.00% +0.11% / +0.05% +0.25% +0.27%] index_select const : Elapsed 0.055 ms (5.538 ms / 100) 5.584 -> 5.589 ( +0.09%) [ +0.00% +0.02% +0.11% / +0.09% +0.18% +0.29%] index_select wrap : Elapsed 0.056 ms (5.584 ms / 100) 5.578 -> 5.585 ( +0.13%) [ +0.29% +0.00% +0.23% / +0.13% +0.38% +0.48%] index_select linear : Elapsed 0.056 ms (5.594 ms / 100) 5.586 -> 5.587 ( +0.02%) [ +0.05% +0.00% +0.20% / +0.02% +0.25% +0.29%] index_select reverse : Elapsed 0.056 ms (5.589 ms / 100) 5.535 -> 5.544 ( +0.16%) [ +0.16% +0.09% +0.00% / +0.16% +0.31% +0.34%] index_select skip64 : Elapsed 0.055 ms (5.544 ms / 100) 5.540 -> 5.544 ( +0.07%) [ +0.14% +0.00% +0.09% / +0.07% +0.29% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.548 ms / 100) 5.581 -> 5.587 ( +0.11%) [ +0.14% +0.00% +0.05% / +0.11% +0.20% +0.34%] index_select spread : Elapsed 0.056 ms (5.589 ms / 100) 5.602 -> 5.610 ( +0.14%) [ +0.02% +0.04% +0.00% / +0.14% +0.16% +0.14%] index_select strided 3 : Elapsed 0.056 ms (5.603 ms / 100) 5.571 -> 5.579 ( +0.14%) [ +0.09% +0.00% +0.18% / +0.14% +0.36% +0.31%] index_select random : Elapsed 0.056 ms (5.576 ms / 100) 5.576 -> 5.577 ( +0.02%) [ +0.11% +0.00% +0.20% / +0.02% +0.20% +0.14%] index_select random_sorted : Elapsed 0.056 ms (5.582 ms / 100) 5.587 -> 5.598 ( +0.20%) [ +0.09% +0.00% +0.11% / +0.20% +0.25% +0.27%] index_select perm : Elapsed 0.056 ms (5.592 ms / 100) 5.585 -> 5.587 ( +0.04%) [ +0.00% +0.07% +0.16% / +0.04% +0.25% +0.16%] index_select perm_sorted : Elapsed 0.056 ms (5.585 ms / 100) B = [4, 40, 20, 16] (stride (40, 1, 160, 3200)) A = [5, 40, 20, 16] (stride (40, 1, 3200, 200)) dim = 0 5.541 -> 5.549 ( +0.14%) [ +0.00% +0.05% +0.13% / +0.14% +0.18% +0.16%] index_select const : Elapsed 0.055 ms (5.541 ms / 100) 5.587 -> 5.598 ( +0.20%) [ +0.00% +0.11% +0.16% / +0.27% +0.20% +0.29%] index_select wrap : Elapsed 0.056 ms (5.587 ms / 100) 5.593 -> 5.598 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.09% +0.16% +0.13%] index_select linear : Elapsed 0.056 ms (5.598 ms / 100) 5.589 -> 5.593 ( +0.07%) [ +0.07% +0.18% +0.00% / +0.16% +0.07% +0.07%] index_select reverse : Elapsed 0.056 ms (5.593 ms / 100) 5.542 -> 5.542 ( +0.00%) [ +0.09% +0.11% +0.00% / +0.02% +0.04% +0.00%] index_select skip64 : Elapsed 0.055 ms (5.547 ms / 100) 5.545 -> 5.545 ( +0.00%) [ +0.00% +0.05% +0.07% / +0.07% +0.00% +0.02%] index_select skip256 : Elapsed 0.055 ms (5.545 ms / 100) 5.594 -> 5.594 ( +0.00%) [ +0.00% +0.13% +0.20% / +0.00% +0.11% +0.11%] index_select spread : Elapsed 0.056 ms (5.594 ms / 100) 5.609 -> 5.607 ( -0.04%) [ +0.05% +0.00% +0.11% / +0.07% -0.04% +0.00%] index_select strided 3 : Elapsed 0.056 ms (5.612 ms / 100) 5.595 -> 5.599 ( +0.07%) [ +0.00% +0.05% +0.09% / +0.14% +0.09% +0.07%] index_select random : Elapsed 0.056 ms (5.595 ms / 100) 5.594 -> 5.587 ( -0.13%) [ +0.00% +0.07% +0.21% / +0.09% -0.13% -0.07%] index_select random_sorted : Elapsed 0.056 ms (5.594 ms / 100) 5.600 -> 5.595 ( -0.09%) [ +0.05% +0.00% +0.05% / +0.14% +0.04% -0.09%] index_select perm : Elapsed 0.056 ms (5.603 ms / 100) 5.596 -> 5.597 ( +0.02%) [ +0.00% +0.07% +0.04% / +0.21% +0.18% +0.02%] index_select perm_sorted : Elapsed 0.056 ms (5.596 ms / 100) out_shape = [5, 4, 20, 16] in_shape = [5, 40, 20, 16] idx_dim = 1 B = [5, 4, 20, 16] (stride (1280, 320, 1, 20)) A = [5, 40, 20, 16] (stride (12800, 320, 16, 1)) dim = 1 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.67% +0.50%] index_select const : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.67% +0.50%] index_select wrap : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.67% +0.50%] index_select linear : Elapsed 0.012 ms (1.194 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.76% +0.67%] index_select reverse : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_select skip64 : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.76% +0.59%] index_select skip256 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.59% +0.59%] index_select spread : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.67% +0.59%] index_select strided 3 : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.67% +0.67%] index_select strided 5 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.195 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.50% +0.42%] index_select strided 7 : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.59% +0.50%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.42% +0.34%] index_select strided 16 : Elapsed 0.012 ms (1.195 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.42% +0.42%] index_select random : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.42% +0.42%] index_select random_sorted : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.59% +0.42%] index_select perm : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.34% +0.50%] index_select perm_sorted : Elapsed 0.012 ms (1.195 ms / 100) B = [5, 4, 20, 16] (stride (64, 16, 320, 1)) A = [5, 40, 20, 16] (stride (1, 5, 200, 4000)) dim = 1 1.363 -> 1.363 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.29% +0.44%] index_select const : Elapsed 0.014 ms (1.365 ms / 100) 1.363 -> 1.364 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.44% +0.51%] index_select wrap : Elapsed 0.014 ms (1.364 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.51% +0.44%] index_select linear : Elapsed 0.014 ms (1.368 ms / 100) 1.362 -> 1.364 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.59% +0.51%] index_select reverse : Elapsed 0.014 ms (1.363 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.15% +0.00% +0.15% / +0.07% +0.59% +0.51%] index_select skip64 : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.370 ( +0.29%) [ +0.07% +0.00% +0.00% / +0.29% +0.51% +0.44%] index_select skip256 : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.44%] index_select spread : Elapsed 0.014 ms (1.366 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +0.44% +0.59%] index_select strided 3 : Elapsed 0.014 ms (1.368 ms / 100) 1.364 -> 1.365 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.51% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.365 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.51% +0.51%] index_select strided 7 : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.51% +0.44%] index_select strided 8 : Elapsed 0.014 ms (1.368 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.58% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.370 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.44% +0.44%] index_select random : Elapsed 0.014 ms (1.368 ms / 100) 1.365 -> 1.368 ( +0.22%) [ +0.22% +0.07% +0.00% / +0.22% +0.29% +0.37%] index_select random_sorted : Elapsed 0.014 ms (1.368 ms / 100) 1.369 -> 1.372 ( +0.22%) [ +0.22% +0.00% +0.07% / +0.22% +0.58% +0.44%] index_select perm : Elapsed 0.014 ms (1.372 ms / 100) 1.365 -> 1.368 ( +0.22%) [ +0.15% +0.15% +0.00% / +0.22% +0.59% +0.59%] index_select perm_sorted : Elapsed 0.014 ms (1.367 ms / 100) B = [5, 4, 20, 16] (stride (1, 80, 320, 5)) A = [5, 40, 20, 16] (stride (16, 1600, 80, 1)) dim = 1 1.270 -> 1.271 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.79% +0.79%] index_select const : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.24% +0.00% +0.08% / +0.08% +0.55% +0.47%] index_select wrap : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.272 ( -0.16%) [ +0.00% +0.16% +0.31% / -0.16% +0.31% +0.39%] index_select linear : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.55%] index_select reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.275 ( +0.24%) [ +0.08% +0.00% +0.00% / +0.24% +0.63% +0.63%] index_select skip64 : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.24% +0.00% +0.00% / +0.08% +0.55% +0.39%] index_select skip256 : Elapsed 0.013 ms (1.275 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.47%] index_select spread : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.47% +0.31%] index_select strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.31% +0.24%] index_select strided 5 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.273 ms / 100) 1.271 -> 1.273 ( +0.16%) [ +0.24% +0.00% +0.08% / +0.16% +0.55% +0.63%] index_select strided 8 : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.55% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.24% +0.00% +0.00% / +0.08% +0.39% +0.47%] index_select random : Elapsed 0.013 ms (1.276 ms / 100) 1.271 -> 1.274 ( +0.24%) [ +0.16% +0.00% +0.08% / +0.24% +0.71% +0.63%] index_select random_sorted : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_select perm : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.71% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) B = [5, 4, 20, 16] (stride (20, 100, 1, 400)) A = [5, 40, 20, 16] (stride (1, 5, 3200, 200)) dim = 1 1.285 -> 1.287 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.54% +0.70%] index_select const : Elapsed 0.013 ms (1.287 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.55% +0.47%] index_select wrap : Elapsed 0.013 ms (1.283 ms / 100) 1.283 -> 1.285 ( +0.16%) [ +0.23% +0.00% +0.00% / +0.16% +0.70% +0.70%] index_select linear : Elapsed 0.013 ms (1.286 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.63% +0.55%] index_select reverse : Elapsed 0.013 ms (1.280 ms / 100) 1.285 -> 1.284 ( -0.08%) [ +0.00% +0.08% +0.16% / -0.08% +0.47% +0.62%] index_select skip64 : Elapsed 0.013 ms (1.285 ms / 100) 1.284 -> 1.287 ( +0.23%) [ +0.00% +0.08% +0.08% / +0.23% +0.62% +0.62%] index_select skip256 : Elapsed 0.013 ms (1.284 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.62% +0.62%] index_select spread : Elapsed 0.013 ms (1.288 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.288 ms / 100) 1.284 -> 1.284 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.39%] index_select strided 5 : Elapsed 0.013 ms (1.284 ms / 100) 1.285 -> 1.287 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.62% +0.62%] index_select strided 7 : Elapsed 0.013 ms (1.287 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.62% +0.78%] index_select strided 8 : Elapsed 0.013 ms (1.288 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.62% +0.62%] index_select strided 16 : Elapsed 0.013 ms (1.288 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.63% +0.55%] index_select random : Elapsed 0.013 ms (1.281 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.00% +0.47% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.284 ms / 100) 1.285 -> 1.288 ( +0.23%) [ +0.23% +0.16% +0.00% / +0.23% +0.39% +0.62%] index_select perm : Elapsed 0.013 ms (1.288 ms / 100) 1.285 -> 1.286 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.70% +0.54%] index_select perm_sorted : Elapsed 0.013 ms (1.287 ms / 100) out_shape = [5, 40, 4, 16] in_shape = [5, 40, 20, 16] idx_dim = 2 B = [5, 40, 4, 16] (stride (16, 320, 80, 1)) A = [5, 40, 20, 16] (stride (20, 100, 1, 4000)) dim = 2 2.211 -> 2.210 ( -0.05%) [ +0.09% +0.00% +0.18% / -0.05% +0.59% +0.63%] index_select const : Elapsed 0.022 ms (2.213 ms / 100) 2.212 -> 2.213 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.63% +0.68%] index_select wrap : Elapsed 0.022 ms (2.212 ms / 100) 2.213 -> 2.214 ( +0.05%) [ +0.00% +0.05% +0.18% / +0.05% +0.72% +0.90%] index_select linear : Elapsed 0.022 ms (2.213 ms / 100) 2.209 -> 2.208 ( -0.05%) [ +0.00% +0.05% +0.45% / -0.05% +0.91% +0.77%] index_select reverse : Elapsed 0.022 ms (2.209 ms / 100) 2.207 -> 2.213 ( +0.27%) [ +0.05% +0.00% +0.18% / +0.27% +0.82% +0.95%] index_select skip64 : Elapsed 0.022 ms (2.208 ms / 100) 2.207 -> 2.212 ( +0.23%) [ +0.14% +0.18% +0.00% / +0.23% +0.63% +0.77%] index_select skip256 : Elapsed 0.022 ms (2.210 ms / 100) 2.283 -> 2.287 ( +0.18%) [ +0.22% +0.09% +0.00% / +0.18% +0.53% +0.74%] index_select spread : Elapsed 0.023 ms (2.288 ms / 100) 2.263 -> 2.264 ( +0.04%) [ +0.00% +0.04% +0.09% / +0.04% +0.53% +0.49%] index_select strided 3 : Elapsed 0.023 ms (2.263 ms / 100) 2.289 -> 2.289 ( +0.00%) [ +0.00% +0.31% +0.17% / +0.00% +0.61% +0.57%] index_select strided 5 : Elapsed 0.023 ms (2.289 ms / 100) 2.262 -> 2.269 ( +0.31%) [ +0.18% +0.00% +0.04% / +0.31% +0.75% +0.62%] index_select strided 7 : Elapsed 0.023 ms (2.266 ms / 100) 2.263 -> 2.268 ( +0.22%) [ +0.13% +0.13% +0.00% / +0.22% +0.66% +0.53%] index_select strided 8 : Elapsed 0.023 ms (2.266 ms / 100) 2.289 -> 2.293 ( +0.17%) [ +0.00% +0.09% +0.00% / +0.17% +0.74% +0.66%] index_select strided 16 : Elapsed 0.023 ms (2.289 ms / 100) 2.261 -> 2.260 ( -0.04%) [ +0.44% +0.00% +0.18% / -0.04% +0.80% +0.84%] index_select random : Elapsed 0.023 ms (2.271 ms / 100) 2.288 -> 2.293 ( +0.22%) [ +0.13% +0.09% +0.00% / +0.22% +0.57% +0.61%] index_select random_sorted : Elapsed 0.023 ms (2.291 ms / 100) 2.254 -> 2.266 ( +0.53%) [ +0.00% +0.04% +0.49% / +0.53% +0.62% +0.98%] index_select perm : Elapsed 0.023 ms (2.254 ms / 100) 2.256 -> 2.259 ( +0.13%) [ +0.04% +0.00% +0.04% / +0.13% +0.49% +0.62%] index_select perm_sorted : Elapsed 0.023 ms (2.257 ms / 100) B = [5, 40, 4, 16] (stride (1, 320, 80, 5)) A = [5, 40, 20, 16] (stride (20, 100, 1, 4000)) dim = 2 2.223 -> 2.225 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.27% +0.31%] index_select const : Elapsed 0.022 ms (2.223 ms / 100) 2.225 -> 2.224 ( -0.04%) [ +0.13% +0.00% +0.09% / -0.04% +0.04% +0.13%] index_select wrap : Elapsed 0.022 ms (2.228 ms / 100) 2.227 -> 2.225 ( -0.09%) [ +0.13% +0.00% +0.09% / +0.04% +0.04% -0.09%] index_select linear : Elapsed 0.022 ms (2.230 ms / 100) 2.224 -> 2.226 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.09% +0.36%] index_select reverse : Elapsed 0.022 ms (2.224 ms / 100) 2.225 -> 2.228 ( +0.13%) [ +0.00% +0.04% +0.04% / +0.13% +0.13% +0.18%] index_select skip64 : Elapsed 0.022 ms (2.225 ms / 100) 2.226 -> 2.228 ( +0.09%) [ +0.00% +0.04% +0.04% / +0.36% +0.09% +0.27%] index_select skip256 : Elapsed 0.022 ms (2.226 ms / 100) 2.298 -> 2.297 ( -0.04%) [ +0.13% +0.22% +0.00% / +0.30% -0.04% +0.17%] index_select spread : Elapsed 0.023 ms (2.301 ms / 100) 2.270 -> 2.267 ( -0.13%) [ +0.31% +0.00% +0.00% / +0.35% -0.13% +0.13%] index_select strided 3 : Elapsed 0.023 ms (2.277 ms / 100) 2.302 -> 2.296 ( -0.26%) [ +0.39% +0.17% +0.00% / +0.09% -0.04% -0.26%] index_select strided 5 : Elapsed 0.023 ms (2.311 ms / 100) 2.280 -> 2.276 ( -0.18%) [ +0.00% +0.00% +0.00% / +0.04% -0.18% -0.18%] index_select strided 7 : Elapsed 0.023 ms (2.280 ms / 100) 2.276 -> 2.278 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.13% +0.09% +0.18%] index_select strided 8 : Elapsed 0.023 ms (2.278 ms / 100) 2.293 -> 2.297 ( +0.17%) [ +0.48% +0.13% +0.00% / +0.57% +0.31% +0.17%] index_select strided 16 : Elapsed 0.023 ms (2.304 ms / 100) 2.270 -> 2.273 ( +0.13%) [ +0.00% +0.09% +0.13% / +0.18% +0.44% +0.13%] index_select random : Elapsed 0.023 ms (2.270 ms / 100) 2.275 -> 2.275 ( +0.00%) [ +0.00% +0.09% +0.18% / +0.09% +0.00% +0.00%] index_select random_sorted : Elapsed 0.023 ms (2.275 ms / 100) 2.275 -> 2.278 ( +0.13%) [ +0.22% +0.31% +0.00% / +0.31% +0.22% +0.13%] index_select perm : Elapsed 0.023 ms (2.280 ms / 100) 2.276 -> 2.270 ( -0.26%) [ +0.00% +0.22% +0.18% / -0.26% +0.00% -0.04%] index_select perm_sorted : Elapsed 0.023 ms (2.276 ms / 100) B = [5, 40, 4, 16] (stride (160, 4, 1, 800)) dim = 2 fill_cnt = 20 good 1.957 -> 1.805 ( -7.77%) [ +0.00% +0.20% +0.46% / -6.69% -7.77% -7.56%] index_fill_ const : Elapsed 0.020 ms (1.957 ms / 100) good 1.899 -> 1.753 ( -7.69%) [ +0.00% +0.11% +0.16% / -4.48% -7.53% -7.69%] index_fill_ linear : Elapsed 0.019 ms (1.899 ms / 100) good 1.902 -> 1.755 ( -7.73%) [ +0.21% +0.00% +0.00% / -4.99% -7.73% -7.68%] index_fill_ reverse : Elapsed 0.019 ms (1.906 ms / 100) good 1.917 -> 1.799 ( -6.16%) [ +0.05% +0.00% +0.16% / -6.16% -5.74% -5.32%] index_fill_ skip64 : Elapsed 0.019 ms (1.918 ms / 100) good 1.918 -> 1.800 ( -6.15%) [ +0.00% +0.26% +0.21% / -6.15% -5.53% -5.53%] index_fill_ skip256 : Elapsed 0.019 ms (1.918 ms / 100) good 1.902 -> 1.753 ( -7.83%) [ +0.00% +0.05% +0.16% / -7.83% -6.78% -6.73%] index_fill_ spread : Elapsed 0.019 ms (1.902 ms / 100) good 1.901 -> 1.750 ( -7.94%) [ +0.21% +0.00% +0.26% / -7.94% -6.58% -6.58%] index_fill_ strided 3 : Elapsed 0.019 ms (1.905 ms / 100) good 1.897 -> 1.755 ( -7.49%) [ +0.11% +0.00% +0.05% / -6.27% -7.27% -7.49%] index_fill_ random : Elapsed 0.019 ms (1.899 ms / 100) good 1.896 -> 1.759 ( -7.23%) [ +0.00% +0.21% +0.11% / -6.12% -7.23% -7.23%] index_fill_ random_sorted : Elapsed 0.019 ms (1.896 ms / 100) out_shape = [5, 40, 20, 4] in_shape = [5, 40, 20, 16] idx_dim = 3 B = [5, 40, 20, 4] (stride (3200, 80, 1, 20)) A = [5, 40, 20, 16] (stride (1, 1600, 5, 100)) dim = 3 2.284 -> 2.287 ( +0.13%) [ +0.09% +0.00% +0.35% / +0.13% +0.26% +0.22%] index_select const : Elapsed 0.023 ms (2.286 ms / 100) 2.281 -> 2.282 ( +0.04%) [ +0.09% +0.00% +0.13% / +0.04% +0.31% +0.26%] index_select wrap : Elapsed 0.023 ms (2.283 ms / 100) 2.280 -> 2.285 ( +0.22%) [ +0.00% +0.04% +0.04% / +0.22% +0.44% +0.57%] index_select linear : Elapsed 0.023 ms (2.280 ms / 100) 2.280 -> 2.277 ( -0.13%) [ +0.04% +0.00% +0.00% / -0.13% +0.35% +0.31%] index_select reverse : Elapsed 0.023 ms (2.281 ms / 100) 2.281 -> 2.285 ( +0.18%) [ +0.00% +0.13% +0.00% / +0.18% +0.35% +0.18%] index_select skip64 : Elapsed 0.023 ms (2.281 ms / 100) 2.279 -> 2.288 ( +0.39%) [ +0.35% +0.00% +0.31% / +0.39% +0.39% +0.48%] index_select skip256 : Elapsed 0.023 ms (2.287 ms / 100) 2.280 -> 2.282 ( +0.09%) [ +0.18% +0.00% +0.13% / +0.09% +0.18% +0.22%] index_select spread : Elapsed 0.023 ms (2.284 ms / 100) 2.282 -> 2.281 ( -0.04%) [ +0.22% +0.00% +0.00% / +0.13% +0.13% -0.04%] index_select strided 3 : Elapsed 0.023 ms (2.287 ms / 100) 2.279 -> 2.281 ( +0.09%) [ +0.00% +0.13% +0.18% / +0.09% +0.39% +0.39%] index_select strided 5 : Elapsed 0.023 ms (2.279 ms / 100) 2.280 -> 2.279 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.31% +0.35%] index_select strided 7 : Elapsed 0.023 ms (2.280 ms / 100) 2.276 -> 2.277 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.35% +0.40%] index_select strided 8 : Elapsed 0.023 ms (2.278 ms / 100) 2.278 -> 2.283 ( +0.22%) [ +0.13% +0.00% +0.13% / +0.22% +0.31% +0.26%] index_select random : Elapsed 0.023 ms (2.281 ms / 100) 2.277 -> 2.278 ( +0.04%) [ +0.00% +0.22% +0.18% / +0.04% +0.35% +0.35%] index_select random_sorted : Elapsed 0.023 ms (2.277 ms / 100) 2.282 -> 2.282 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.00% +0.31% +0.22%] index_select perm : Elapsed 0.023 ms (2.283 ms / 100) 2.281 -> 2.282 ( +0.04%) [ +0.18% +0.04% +0.00% / +0.04% +0.39% +0.44%] index_select perm_sorted : Elapsed 0.023 ms (2.285 ms / 100) B = [5, 40, 20, 4] (stride (800, 1, 40, 4000)) A = [5, 40, 20, 16] (stride (800, 1, 40, 4000)) dim = 3 2.490 -> 2.490 ( +0.00%) [ +0.16% +0.00% +0.04% / +0.00% +0.12% +0.16%] index_select const : Elapsed 0.025 ms (2.494 ms / 100) 2.498 -> 2.499 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.12% +0.24% +0.04%] index_select wrap : Elapsed 0.025 ms (2.501 ms / 100) 2.497 -> 2.498 ( +0.04%) [ +0.24% +0.00% +0.12% / +0.20% +0.20% +0.04%] index_select linear : Elapsed 0.025 ms (2.503 ms / 100) 2.494 -> 2.494 ( +0.00%) [ +0.00% +0.04% +0.12% / +0.08% +0.00% +0.12%] index_select reverse : Elapsed 0.025 ms (2.494 ms / 100) 2.486 -> 2.483 ( -0.12%) [ +0.00% +0.08% +0.00% / -0.12% +0.00% +0.04%] index_select skip64 : Elapsed 0.025 ms (2.486 ms / 100) 2.487 -> 2.494 ( +0.28%) [ +0.28% +0.00% +0.12% / +0.28% +0.32% +0.36%] index_select skip256 : Elapsed 0.025 ms (2.494 ms / 100) 2.500 -> 2.503 ( +0.12%) [ +0.04% +0.00% +0.00% / +0.20% +0.16% +0.12%] index_select spread : Elapsed 0.025 ms (2.501 ms / 100) 2.496 -> 2.498 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.12% +0.20% +0.08%] index_select strided 3 : Elapsed 0.025 ms (2.499 ms / 100) 2.499 -> 2.504 ( +0.20%) [ +0.08% +0.00% +0.00% / +0.20% +0.28% +0.28%] index_select strided 5 : Elapsed 0.025 ms (2.501 ms / 100) 2.493 -> 2.494 ( +0.04%) [ +0.24% +0.28% +0.00% / +0.04% +0.40% +0.40%] index_select strided 7 : Elapsed 0.025 ms (2.499 ms / 100) 2.487 -> 2.484 ( -0.12%) [ +0.04% +0.04% +0.00% / -0.12% +0.32% +0.12%] index_select strided 8 : Elapsed 0.025 ms (2.488 ms / 100) 2.496 -> 2.497 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.28% +0.24%] index_select random : Elapsed 0.025 ms (2.496 ms / 100) 2.496 -> 2.498 ( +0.08%) [ +0.40% +0.20% +0.00% / +0.08% +0.24% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.506 ms / 100) 2.499 -> 2.498 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.12% +0.28%] index_select perm : Elapsed 0.025 ms (2.500 ms / 100) 2.494 -> 2.495 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.04% +0.20% +0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.494 ms / 100) B = [5, 40, 20, 4] (stride (20, 100, 1, 4000)) A = [5, 40, 20, 16] (stride (12800, 16, 640, 1)) dim = 3 2.569 -> 2.574 ( +0.19%) [ +0.00% +0.00% +0.04% / +0.19% +0.35% +0.35%] index_select const : Elapsed 0.026 ms (2.569 ms / 100) 2.571 -> 2.575 ( +0.16%) [ +0.04% +0.00% +0.00% / +0.16% +0.23% +0.43%] index_select wrap : Elapsed 0.026 ms (2.572 ms / 100) 2.569 -> 2.576 ( +0.27%) [ +0.00% +0.00% +0.12% / +0.27% +0.51% +0.31%] index_select linear : Elapsed 0.026 ms (2.569 ms / 100) 2.561 -> 2.572 ( +0.43%) [ +0.00% +0.04% +0.23% / +0.43% +0.74% +0.74%] index_select reverse : Elapsed 0.026 ms (2.561 ms / 100) 2.562 -> 2.570 ( +0.31%) [ +0.35% +0.00% +0.23% / +0.31% +0.82% +0.78%] index_select skip64 : Elapsed 0.026 ms (2.571 ms / 100) 2.565 -> 2.567 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.58% +0.55%] index_select skip256 : Elapsed 0.026 ms (2.567 ms / 100) 2.599 -> 2.601 ( +0.08%) [ +0.00% +0.12% +0.04% / +0.08% +0.54% +0.50%] index_select spread : Elapsed 0.026 ms (2.599 ms / 100) 2.598 -> 2.602 ( +0.15%) [ +0.04% +0.15% +0.00% / +0.15% +0.46% +0.23%] index_select strided 3 : Elapsed 0.026 ms (2.599 ms / 100) 2.601 -> 2.600 ( -0.04%) [ +0.12% +0.04% +0.00% / -0.04% +0.58% +0.38%] index_select strided 5 : Elapsed 0.026 ms (2.604 ms / 100) 2.601 -> 2.603 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.23% +0.38%] index_select strided 7 : Elapsed 0.026 ms (2.602 ms / 100) 2.607 -> 2.613 ( +0.23%) [ +0.15% +0.08% +0.00% / +0.23% +0.38% +0.58%] index_select strided 8 : Elapsed 0.026 ms (2.611 ms / 100) 2.609 -> 2.615 ( +0.23%) [ +0.15% +0.00% +0.15% / +0.23% +0.38% +0.61%] index_select random : Elapsed 0.026 ms (2.613 ms / 100) 2.598 -> 2.601 ( +0.12%) [ +0.35% +0.00% +0.08% / +0.12% +0.62% +0.50%] index_select random_sorted : Elapsed 0.026 ms (2.607 ms / 100) 2.601 -> 2.601 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.42% +0.50%] index_select perm : Elapsed 0.026 ms (2.601 ms / 100) 2.610 -> 2.613 ( +0.11%) [ +0.15% +0.00% +0.34% / +0.11% +0.46% +0.31%] index_select perm_sorted : Elapsed 0.026 ms (2.614 ms / 100) out_shape = [4, 5, 20, 40] in_shape = [16, 5, 20, 40] idx_dim = 0 B = [4, 5, 20, 40] (stride (4000, 800, 1, 20)) A = [16, 5, 20, 40] (stride (800, 12800, 40, 1)) dim = 0 2.269 -> 2.270 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.13% +0.18%] index_select const : Elapsed 0.023 ms (2.270 ms / 100) 2.331 -> 2.334 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.30% +0.34%] index_select wrap : Elapsed 0.023 ms (2.334 ms / 100) 2.328 -> 2.332 ( +0.17%) [ +0.26% +0.13% +0.00% / +0.17% +0.30% +0.43%] index_select linear : Elapsed 0.023 ms (2.334 ms / 100) 2.331 -> 2.332 ( +0.04%) [ +0.00% +0.17% +0.00% / +0.04% +0.51% +0.30%] index_select reverse : Elapsed 0.023 ms (2.331 ms / 100) 2.268 -> 2.269 ( +0.04%) [ +0.22% +0.09% +0.00% / +0.04% +0.13% +0.04%] index_select skip64 : Elapsed 0.023 ms (2.273 ms / 100) 2.270 -> 2.269 ( -0.04%) [ +0.22% +0.00% +0.00% / +0.00% +0.04% -0.04%] index_select skip256 : Elapsed 0.023 ms (2.275 ms / 100) 2.327 -> 2.330 ( +0.13%) [ +0.34% +0.00% +0.09% / +0.13% +0.30% +0.34%] index_select spread : Elapsed 0.023 ms (2.335 ms / 100) 2.331 -> 2.331 ( +0.00%) [ +0.00% +0.04% +0.09% / +0.00% +0.17% +0.00%] index_select strided 3 : Elapsed 0.023 ms (2.331 ms / 100) 2.330 -> 2.334 ( +0.17%) [ +0.13% +0.09% +0.00% / +0.17% +0.26% +0.17%] index_select strided 5 : Elapsed 0.023 ms (2.333 ms / 100) 2.328 -> 2.330 ( +0.09%) [ +0.21% +0.26% +0.00% / +0.09% +0.26% +0.26%] index_select strided 7 : Elapsed 0.023 ms (2.333 ms / 100) 2.283 -> 2.285 ( +0.09%) [ +0.00% +0.22% +0.31% / +0.09% +0.26% +0.09%] index_select strided 8 : Elapsed 0.023 ms (2.283 ms / 100) 2.327 -> 2.334 ( +0.30%) [ +0.00% +0.09% +0.17% / +0.30% +0.30% +0.34%] index_select random : Elapsed 0.023 ms (2.327 ms / 100) 2.329 -> 2.328 ( -0.04%) [ +0.00% +0.09% +0.00% / -0.04% +0.34% +0.21%] index_select random_sorted : Elapsed 0.023 ms (2.329 ms / 100) 2.330 -> 2.333 ( +0.13%) [ +0.26% +0.09% +0.00% / +0.13% +0.13% +0.13%] index_select perm : Elapsed 0.023 ms (2.336 ms / 100) 2.332 -> 2.333 ( +0.04%) [ +0.21% +0.21% +0.00% / +0.09% +0.04% +0.39%] index_select perm_sorted : Elapsed 0.023 ms (2.337 ms / 100) B = [4, 5, 20, 40] (stride (4000, 20, 1, 100)) A = [16, 5, 20, 40] (stride (800, 12800, 40, 1)) dim = 0 1.090 -> 1.098 ( +0.73%) [ +0.46% +0.28% +0.00% / +0.73% +1.19% +1.10%] index_select const : Elapsed 0.011 ms (1.095 ms / 100) 1.118 -> 1.118 ( +0.00%) [ +0.27% +0.45% +0.00% / +0.00% +1.88% +1.61%] index_select wrap : Elapsed 0.011 ms (1.121 ms / 100) 1.119 -> 1.122 ( +0.27%) [ +0.18% +0.09% +0.00% / +0.27% +1.07% +1.97%] index_select linear : Elapsed 0.011 ms (1.121 ms / 100) 1.120 -> 1.120 ( +0.00%) [ +0.27% +0.00% +0.09% / +0.00% +1.79% +1.87%] index_select reverse : Elapsed 0.011 ms (1.123 ms / 100) 1.093 -> 1.096 ( +0.27%) [ +0.00% +0.00% +0.00% / +0.27% +1.01% +0.64%] index_select skip64 : Elapsed 0.011 ms (1.093 ms / 100) 1.092 -> 1.094 ( +0.18%) [ +0.37% +0.00% +0.37% / +0.18% +1.19% +1.01%] index_select skip256 : Elapsed 0.011 ms (1.096 ms / 100) 1.109 -> 1.114 ( +0.45%) [ +0.00% +0.00% +0.27% / +0.45% +4.51% +5.14%] index_select spread : Elapsed 0.011 ms (1.109 ms / 100) 1.106 -> 1.113 ( +0.63%) [ +0.09% +0.00% +0.81% / +0.63% +1.45% +1.72%] index_select strided 3 : Elapsed 0.011 ms (1.107 ms / 100) 1.127 -> 1.126 ( -0.09%) [ +0.00% +0.35% +0.18% / -0.09% +1.06% +1.33%] index_select strided 5 : Elapsed 0.011 ms (1.127 ms / 100) 1.127 -> 1.119 ( -0.71%) [ +0.62% +0.00% +0.98% / +0.09% -0.44% -0.71%] index_select strided 7 : Elapsed 0.011 ms (1.134 ms / 100) 1.094 -> 1.095 ( +0.09%) [ +0.00% +0.09% +0.46% / +0.09% +2.38% +2.83%] index_select strided 8 : Elapsed 0.011 ms (1.094 ms / 100) 1.101 -> 1.104 ( +0.27%) [ +0.18% +0.00% +0.36% / +0.27% +2.63% +2.18%] index_select random : Elapsed 0.011 ms (1.103 ms / 100) 1.107 -> 1.110 ( +0.27%) [ +0.00% +0.09% +0.09% / +0.27% +1.99% +2.35%] index_select random_sorted : Elapsed 0.011 ms (1.107 ms / 100) 1.105 -> 1.106 ( +0.09%) [ +0.09% +0.00% +0.27% / +0.09% +3.98% +3.44%] index_select perm : Elapsed 0.011 ms (1.106 ms / 100) 1.107 -> 1.110 ( +0.27%) [ +0.00% +0.27% +0.81% / +0.27% +2.53% +2.71%] index_select perm_sorted : Elapsed 0.011 ms (1.107 ms / 100) B = [4, 5, 20, 40] (stride (40, 160, 800, 1)) A = [16, 5, 20, 40] (stride (4000, 800, 40, 1)) dim = 0 2.167 -> 2.168 ( +0.05%) [ +0.00% +0.18% +0.05% / +0.05% +0.37% +0.42%] index_select const : Elapsed 0.022 ms (2.167 ms / 100) 2.228 -> 2.229 ( +0.04%) [ +0.13% +0.00% +0.00% / +0.04% +0.40% +0.40%] index_select wrap : Elapsed 0.022 ms (2.231 ms / 100) 2.226 -> 2.228 ( +0.09%) [ +0.27% +0.18% +0.00% / +0.09% +0.36% +0.31%] index_select linear : Elapsed 0.022 ms (2.232 ms / 100) 2.227 -> 2.228 ( +0.04%) [ +0.18% +0.00% +0.04% / +0.04% +0.18% +0.40%] index_select reverse : Elapsed 0.022 ms (2.231 ms / 100) 2.166 -> 2.167 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.05% +0.46% +0.65%] index_select skip64 : Elapsed 0.022 ms (2.167 ms / 100) 2.167 -> 2.172 ( +0.23%) [ +0.05% +0.00% +0.14% / +0.23% +0.42% +0.51%] index_select skip256 : Elapsed 0.022 ms (2.168 ms / 100) 2.226 -> 2.227 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.54% +0.54%] index_select spread : Elapsed 0.022 ms (2.227 ms / 100) 2.229 -> 2.233 ( +0.18%) [ +0.13% +0.00% +0.09% / +0.18% +0.36% +0.40%] index_select strided 3 : Elapsed 0.022 ms (2.232 ms / 100) 2.231 -> 2.233 ( +0.09%) [ +0.09% +0.04% +0.00% / +0.09% +0.22% +0.31%] index_select strided 5 : Elapsed 0.022 ms (2.233 ms / 100) 2.231 -> 2.235 ( +0.18%) [ +0.00% +0.09% +0.09% / +0.18% +0.40% +0.54%] index_select strided 7 : Elapsed 0.022 ms (2.231 ms / 100) 2.183 -> 2.185 ( +0.09%) [ +0.23% +0.00% +0.14% / +0.09% +0.55% +0.55%] index_select strided 8 : Elapsed 0.022 ms (2.188 ms / 100) 2.208 -> 2.207 ( -0.05%) [ +0.23% +0.09% +0.00% / -0.05% +0.32% +0.27%] index_select random : Elapsed 0.022 ms (2.213 ms / 100) 2.206 -> 2.208 ( +0.09%) [ +0.23% +0.00% +0.09% / +0.09% +0.41% +0.54%] index_select random_sorted : Elapsed 0.022 ms (2.211 ms / 100) 2.225 -> 2.228 ( +0.13%) [ +0.22% +0.00% +0.13% / +0.13% +0.49% +0.54%] index_select perm : Elapsed 0.022 ms (2.230 ms / 100) 2.227 -> 2.235 ( +0.36%) [ +0.04% +0.22% +0.00% / +0.36% +0.36% +0.40%] index_select perm_sorted : Elapsed 0.022 ms (2.228 ms / 100) out_shape = [16, 4, 20, 40] in_shape = [16, 5, 20, 40] idx_dim = 1 B = [16, 4, 20, 40] (stride (3200, 800, 40, 1)) A = [16, 5, 20, 40] (stride (40, 12800, 640, 1)) dim = 1 5.375 -> 5.384 ( +0.17%) [ +0.07% +0.00% +0.06% / +0.17% +0.82% +0.71%] index_select const : Elapsed 0.054 ms (5.379 ms / 100) 5.498 -> 5.504 ( +0.11%) [ +0.02% +0.09% +0.00% / +0.11% +0.31% +0.33%] index_select wrap : Elapsed 0.055 ms (5.499 ms / 100) 5.496 -> 5.505 ( +0.16%) [ +0.00% +0.11% +0.09% / +0.16% +0.38% +0.31%] index_select linear : Elapsed 0.055 ms (5.496 ms / 100) 5.492 -> 5.494 ( +0.04%) [ +0.13% +0.18% +0.00% / +0.04% +0.13% +0.20%] index_select reverse : Elapsed 0.055 ms (5.499 ms / 100) 5.365 -> 5.374 ( +0.17%) [ +0.02% +0.00% +0.15% / +0.17% +0.71% +0.78%] index_select skip64 : Elapsed 0.054 ms (5.366 ms / 100) 5.370 -> 5.375 ( +0.09%) [ +0.02% +0.00% +0.20% / +0.09% +0.65% +0.67%] index_select skip256 : Elapsed 0.054 ms (5.371 ms / 100) 5.500 -> 5.505 ( +0.09%) [ +0.00% +0.07% +0.05% / +0.09% +0.18% +0.33%] index_select spread : Elapsed 0.055 ms (5.500 ms / 100) 5.488 -> 5.496 ( +0.15%) [ +0.00% +0.07% +0.07% / +0.15% +0.20% +0.31%] index_select strided 3 : Elapsed 0.055 ms (5.488 ms / 100) 5.478 -> 5.459 ( -0.35%) [ +0.00% +0.04% +0.15% / +0.24% -0.35% -0.31%] index_select random : Elapsed 0.055 ms (5.478 ms / 100) 5.470 -> 5.462 ( -0.15%) [ +0.07% +0.04% +0.00% / +0.24% -0.15% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.474 ms / 100) 5.486 -> 5.495 ( +0.16%) [ +0.00% +0.02% +0.15% / +0.16% +0.51% +0.62%] index_select perm : Elapsed 0.055 ms (5.486 ms / 100) 5.488 -> 5.488 ( +0.00%) [ +0.04% +0.00% +0.05% / +0.00% +0.18% +0.26%] index_select perm_sorted : Elapsed 0.055 ms (5.490 ms / 100) B = [16, 4, 20, 40] (stride (3200, 800, 40, 1)) A = [16, 5, 20, 40] (stride (200, 1, 3200, 5)) dim = 1 5.614 -> 5.625 ( +0.20%) [ +0.18% +0.00% +0.02% / +0.20% +0.53% +0.48%] index_select const : Elapsed 0.056 ms (5.624 ms / 100) 5.618 -> 5.630 ( +0.21%) [ +0.00% +0.00% +0.14% / +0.21% +0.34% +0.39%] index_select wrap : Elapsed 0.056 ms (5.618 ms / 100) 5.615 -> 5.634 ( +0.34%) [ +0.07% +0.00% +0.11% / +0.34% +0.46% +0.39%] index_select linear : Elapsed 0.056 ms (5.619 ms / 100) 5.620 -> 5.627 ( +0.12%) [ +0.11% +0.00% +0.09% / +0.12% +0.46% +0.36%] index_select reverse : Elapsed 0.056 ms (5.626 ms / 100) 5.616 -> 5.618 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.04% +0.37% +0.32%] index_select skip64 : Elapsed 0.056 ms (5.618 ms / 100) 5.620 -> 5.623 ( +0.05%) [ +0.00% +0.11% +0.14% / +0.05% +0.37% +0.39%] index_select skip256 : Elapsed 0.056 ms (5.620 ms / 100) 5.617 -> 5.620 ( +0.05%) [ +0.00% +0.05% +0.11% / +0.05% +0.45% +0.37%] index_select spread : Elapsed 0.056 ms (5.617 ms / 100) 5.615 -> 5.620 ( +0.09%) [ +0.11% +0.00% +0.11% / +0.09% +0.32% +0.48%] index_select strided 3 : Elapsed 0.056 ms (5.621 ms / 100) 5.616 -> 5.626 ( +0.18%) [ +0.04% +0.04% +0.00% / +0.18% +0.37% +0.45%] index_select random : Elapsed 0.056 ms (5.618 ms / 100) 5.613 -> 5.618 ( +0.09%) [ +0.14% +0.00% +0.18% / +0.09% +0.45% +0.36%] index_select random_sorted : Elapsed 0.056 ms (5.621 ms / 100) 5.618 -> 5.634 ( +0.28%) [ +0.00% +0.04% +0.09% / +0.28% +0.34% +0.37%] index_select perm : Elapsed 0.056 ms (5.618 ms / 100) 5.610 -> 5.621 ( +0.20%) [ +0.00% +0.23% +0.23% / +0.20% +0.64% +0.48%] index_select perm_sorted : Elapsed 0.056 ms (5.610 ms / 100) B = [16, 4, 20, 40] (stride (3200, 800, 1, 20)) A = [16, 5, 20, 40] (stride (1, 320, 16, 1600)) dim = 1 5.705 -> 5.713 ( +0.14%) [ +0.14% +0.00% +0.07% / +0.14% +0.47% +0.25%] index_select const : Elapsed 0.057 ms (5.713 ms / 100) 5.770 -> 5.780 ( +0.17%) [ +0.12% +0.00% +0.16% / +0.17% +0.19% +0.19%] index_select wrap : Elapsed 0.058 ms (5.777 ms / 100) 5.777 -> 5.780 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.12% +0.12%] index_select linear : Elapsed 0.058 ms (5.780 ms / 100) 5.766 -> 5.777 ( +0.19%) [ +0.17% +0.00% +0.00% / +0.21% +0.19% +0.23%] index_select reverse : Elapsed 0.058 ms (5.776 ms / 100) 5.708 -> 5.706 ( -0.04%) [ +0.00% +0.11% +0.02% / -0.04% +0.37% +0.39%] index_select skip64 : Elapsed 0.057 ms (5.708 ms / 100) 5.703 -> 5.711 ( +0.14%) [ +0.00% +0.14% +0.02% / +0.14% +0.37% +0.42%] index_select skip256 : Elapsed 0.057 ms (5.703 ms / 100) 5.776 -> 5.778 ( +0.03%) [ +0.09% +0.00% +0.05% / +0.05% +0.16% +0.03%] index_select spread : Elapsed 0.058 ms (5.781 ms / 100) 5.766 -> 5.777 ( +0.19%) [ +0.26% +0.00% +0.17% / +0.23% +0.19% +0.28%] index_select strided 3 : Elapsed 0.058 ms (5.781 ms / 100) 5.764 -> 5.767 ( +0.05%) [ +0.03% +0.00% +0.14% / +0.05% +0.10% +0.21%] index_select random : Elapsed 0.058 ms (5.766 ms / 100) 5.758 -> 5.758 ( +0.00%) [ +0.09% +0.00% +0.05% / +0.00% +0.12% +0.12%] index_select random_sorted : Elapsed 0.058 ms (5.763 ms / 100) 5.776 -> 5.780 ( +0.07%) [ +0.17% +0.07% +0.00% / +0.14% +0.14% +0.07%] index_select perm : Elapsed 0.058 ms (5.786 ms / 100) 5.783 -> 5.786 ( +0.05%) [ +0.10% +0.16% +0.00% / +0.05% +0.16% +0.21%] index_select perm_sorted : Elapsed 0.058 ms (5.789 ms / 100) B = [16, 4, 20, 40] (stride (3200, 40, 160, 1)) A = [16, 5, 20, 40] (stride (1, 12800, 640, 16)) dim = 1 5.462 -> 5.455 ( -0.13%) [ +0.05% +0.00% +0.13% / +0.13% -0.05% -0.13%] index_select const : Elapsed 0.055 ms (5.465 ms / 100) 5.474 -> 5.474 ( +0.00%) [ +0.15% +0.00% +0.09% / +0.00% +0.44% +0.42%] index_select wrap : Elapsed 0.055 ms (5.482 ms / 100) 5.475 -> 5.480 ( +0.09%) [ +0.00% +0.02% +0.20% / +0.09% +0.42% +0.31%] index_select linear : Elapsed 0.055 ms (5.475 ms / 100) 5.474 -> 5.477 ( +0.05%) [ +0.00% +0.04% +0.11% / +0.05% +0.31% +0.42%] index_select reverse : Elapsed 0.055 ms (5.474 ms / 100) 5.462 -> 5.462 ( +0.00%) [ +0.00% +0.11% +0.13% / +0.00% +0.04% +0.04%] index_select skip64 : Elapsed 0.055 ms (5.462 ms / 100) 5.462 -> 5.461 ( -0.02%) [ +0.00% +0.09% +0.13% / +0.18% +0.05% -0.02%] index_select skip256 : Elapsed 0.055 ms (5.462 ms / 100) 5.481 -> 5.479 ( -0.04%) [ +0.02% +0.05% +0.00% / -0.04% +0.11% +0.24%] index_select spread : Elapsed 0.055 ms (5.482 ms / 100) 5.473 -> 5.486 ( +0.24%) [ +0.26% +0.00% +0.15% / +0.24% +0.31% +0.42%] index_select strided 3 : Elapsed 0.055 ms (5.487 ms / 100) 5.493 -> 5.475 ( -0.33%) [ +0.04% +0.00% +0.02% / -0.02% -0.33% -0.25%] index_select random : Elapsed 0.055 ms (5.495 ms / 100) 5.486 -> 5.473 ( -0.24%) [ +0.04% +0.00% +0.00% / +0.16% -0.18% -0.24%] index_select random_sorted : Elapsed 0.055 ms (5.488 ms / 100) 5.473 -> 5.486 ( +0.24%) [ +0.00% +0.15% +0.04% / +0.24% +0.49% +0.27%] index_select perm : Elapsed 0.055 ms (5.473 ms / 100) 5.475 -> 5.476 ( +0.02%) [ +0.00% +0.09% +0.04% / +0.02% +0.40% +0.35%] index_select perm_sorted : Elapsed 0.055 ms (5.475 ms / 100) B = [16, 4, 20, 40] (stride (3200, 20, 1, 80)) dim = 1 fill_cnt = 5 3.706 -> 3.710 ( +0.11%) [ +0.05% +0.00% +0.11% / +0.11% +0.35% +0.22%] index_fill_ const : Elapsed 0.037 ms (3.708 ms / 100) 3.748 -> 3.744 ( -0.11%) [ +0.03% +0.08% +0.00% / -0.11% -0.08% -0.08%] index_fill_ linear : Elapsed 0.037 ms (3.749 ms / 100) 3.748 -> 3.748 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.11% +0.21% +0.00%] index_fill_ reverse : Elapsed 0.037 ms (3.748 ms / 100) 3.711 -> 3.708 ( -0.08%) [ +0.00% +0.11% +0.13% / -0.08% +0.05% +0.16%] index_fill_ skip64 : Elapsed 0.037 ms (3.711 ms / 100) 3.711 -> 3.710 ( -0.03%) [ +0.00% +0.11% +0.11% / +0.03% +0.13% -0.03%] index_fill_ skip256 : Elapsed 0.037 ms (3.711 ms / 100) 3.719 -> 3.723 ( +0.11%) [ +0.05% +0.13% +0.00% / +0.11% +0.35% +0.13%] index_fill_ spread : Elapsed 0.037 ms (3.721 ms / 100) 3.725 -> 3.730 ( +0.13%) [ +0.00% +0.19% +0.16% / +0.13% +0.16% +0.21%] index_fill_ strided 3 : Elapsed 0.037 ms (3.725 ms / 100) 3.749 -> 3.749 ( +0.00%) [ +0.05% +0.16% +0.00% / +0.00% +0.24% +0.21%] index_fill_ random : Elapsed 0.038 ms (3.751 ms / 100) 3.763 -> 3.760 ( -0.08%) [ +0.05% +0.00% +0.00% / -0.08% +0.24% +0.27%] index_fill_ random_sorted : Elapsed 0.038 ms (3.765 ms / 100) B = [16, 4, 20, 40] (stride (3200, 1, 4, 80)) A = [16, 5, 20, 40] (stride (4000, 40, 200, 1)) dim = 1 5.804 -> 5.791 ( -0.22%) [ +0.10% +0.03% +0.00% / +0.07% -0.22% -0.21%] index_select const : Elapsed 0.058 ms (5.810 ms / 100) 5.882 -> 5.882 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.02% +0.05% +0.00%] index_select wrap : Elapsed 0.059 ms (5.887 ms / 100) 5.869 -> 5.861 ( -0.14%) [ +0.00% +0.02% +0.05% / +0.03% -0.07% -0.14%] index_select linear : Elapsed 0.059 ms (5.869 ms / 100) 5.877 -> 5.872 ( -0.09%) [ +0.00% +0.05% +0.07% / +0.03% +0.00% -0.09%] index_select reverse : Elapsed 0.059 ms (5.877 ms / 100) 5.778 -> 5.774 ( -0.07%) [ +0.12% +0.00% +0.09% / +0.19% -0.05% -0.07%] index_select skip64 : Elapsed 0.058 ms (5.785 ms / 100) 5.792 -> 5.775 ( -0.29%) [ +0.00% +0.10% +0.16% / +0.00% -0.19% -0.29%] index_select skip256 : Elapsed 0.058 ms (5.792 ms / 100) 5.884 -> 5.883 ( -0.02%) [ +0.10% +0.00% +0.03% / +0.05% -0.02% +0.05%] index_select spread : Elapsed 0.059 ms (5.890 ms / 100) 5.868 -> 5.861 ( -0.12%) [ +0.00% +0.03% +0.03% / -0.05% -0.03% -0.12%] index_select strided 3 : Elapsed 0.059 ms (5.868 ms / 100) 5.853 -> 5.852 ( -0.02%) [ +0.00% +0.14% +0.12% / +0.03% -0.02% +0.00%] index_select random : Elapsed 0.059 ms (5.853 ms / 100) 5.841 -> 5.834 ( -0.12%) [ +0.12% +0.00% +0.05% / +0.07% -0.12% -0.09%] index_select random_sorted : Elapsed 0.058 ms (5.848 ms / 100) 5.880 -> 5.876 ( -0.07%) [ +0.00% +0.10% +0.07% / -0.07% +0.02% -0.05%] index_select perm : Elapsed 0.059 ms (5.880 ms / 100) 5.865 -> 5.853 ( -0.20%) [ +0.09% +0.00% +0.00% / -0.02% -0.20% -0.05%] index_select perm_sorted : Elapsed 0.059 ms (5.870 ms / 100) B = [16, 4, 20, 40] (stride (1, 12800, 640, 16)) A = [16, 5, 20, 40] (stride (4000, 40, 200, 1)) dim = 1 5.427 -> 5.432 ( +0.09%) [ +0.11% +0.00% +0.04% / +0.35% +0.20% +0.09%] index_select const : Elapsed 0.054 ms (5.433 ms / 100) 5.509 -> 5.505 ( -0.07%) [ +0.02% +0.00% +0.02% / -0.07% +0.00% +0.05%] index_select wrap : Elapsed 0.055 ms (5.510 ms / 100) 5.507 -> 5.506 ( -0.02%) [ +0.04% +0.04% +0.00% / +0.11% -0.02% +0.07%] index_select linear : Elapsed 0.055 ms (5.509 ms / 100) 5.537 -> 5.527 ( -0.18%) [ +0.04% +0.02% +0.00% / +0.02% -0.18% -0.11%] index_select reverse : Elapsed 0.055 ms (5.539 ms / 100) 5.405 -> 5.412 ( +0.13%) [ +0.04% +0.00% +0.17% / +0.13% +0.15% +0.13%] index_select skip64 : Elapsed 0.054 ms (5.407 ms / 100) 5.426 -> 5.432 ( +0.11%) [ +0.17% +0.00% +0.17% / +0.24% +0.11% +0.11%] index_select skip256 : Elapsed 0.054 ms (5.435 ms / 100) 5.507 -> 5.507 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.09% +0.11% +0.00%] index_select spread : Elapsed 0.055 ms (5.507 ms / 100) 5.519 -> 5.515 ( -0.07%) [ +0.00% +0.00% +0.05% / +0.05% -0.07% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.519 ms / 100) 5.518 -> 5.519 ( +0.02%) [ +0.09% +0.00% +0.05% / +0.14% +0.09% +0.02%] index_select random : Elapsed 0.055 ms (5.523 ms / 100) 5.526 -> 5.523 ( -0.05%) [ +0.04% +0.00% +0.05% / +0.16% +0.00% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.528 ms / 100) 5.522 -> 5.517 ( -0.09%) [ +0.00% +0.09% +0.13% / +0.13% +0.05% -0.09%] index_select perm : Elapsed 0.055 ms (5.522 ms / 100) 5.512 -> 5.509 ( -0.05%) [ +0.00% +0.15% +0.05% / +0.02% -0.05% -0.04%] index_select perm_sorted : Elapsed 0.055 ms (5.512 ms / 100) B = [16, 4, 20, 40] (stride (160, 1, 2560, 4)) A = [16, 5, 20, 40] (stride (5, 1, 3200, 80)) dim = 1 5.827 -> 5.811 ( -0.27%) [ +0.03% +0.05% +0.00% / +0.09% -0.27% -0.09%] index_select const : Elapsed 0.058 ms (5.829 ms / 100) 5.823 -> 5.810 ( -0.22%) [ +0.02% +0.00% +0.21% / +0.12% -0.22% -0.10%] index_select wrap : Elapsed 0.058 ms (5.824 ms / 100) 5.824 -> 5.814 ( -0.17%) [ +0.00% +0.02% +0.10% / -0.02% -0.17% -0.17%] index_select linear : Elapsed 0.058 ms (5.824 ms / 100) 5.825 -> 5.808 ( -0.29%) [ +0.02% +0.00% +0.12% / +0.07% -0.29% -0.14%] index_select reverse : Elapsed 0.058 ms (5.826 ms / 100) 5.825 -> 5.813 ( -0.21%) [ +0.03% +0.00% +0.03% / +0.17% -0.07% -0.21%] index_select skip64 : Elapsed 0.058 ms (5.827 ms / 100) 5.821 -> 5.811 ( -0.17%) [ +0.07% +0.00% +0.12% / +0.17% -0.17% -0.09%] index_select skip256 : Elapsed 0.058 ms (5.825 ms / 100) 5.822 -> 5.815 ( -0.12%) [ +0.00% +0.00% +0.15% / +0.14% +0.05% -0.12%] index_select spread : Elapsed 0.058 ms (5.822 ms / 100) 5.826 -> 5.817 ( -0.15%) [ +0.03% +0.00% +0.09% / -0.02% -0.15% -0.14%] index_select strided 3 : Elapsed 0.058 ms (5.828 ms / 100) 5.822 -> 5.818 ( -0.07%) [ +0.00% +0.09% +0.14% / +0.12% -0.05% -0.07%] index_select random : Elapsed 0.058 ms (5.822 ms / 100) 5.826 -> 5.813 ( -0.22%) [ +0.05% +0.00% +0.10% / +0.07% -0.22% -0.19%] index_select random_sorted : Elapsed 0.058 ms (5.829 ms / 100) 5.823 -> 5.810 ( -0.22%) [ +0.03% +0.00% +0.12% / +0.22% -0.22% -0.22%] index_select perm : Elapsed 0.058 ms (5.825 ms / 100) 5.824 -> 5.815 ( -0.15%) [ +0.00% +0.02% +0.17% / +0.15% -0.15% -0.15%] index_select perm_sorted : Elapsed 0.058 ms (5.824 ms / 100) B = [16, 4, 20, 40] (stride (80, 20, 1, 1280)) A = [16, 5, 20, 40] (stride (800, 12800, 40, 1)) dim = 1 5.457 -> 5.455 ( -0.04%) [ +0.00% +0.04% +0.05% / +0.00% -0.04% +0.00%] index_select const : Elapsed 0.055 ms (5.457 ms / 100) 5.516 -> 5.517 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.02% +0.05% +0.02%] index_select wrap : Elapsed 0.055 ms (5.520 ms / 100) 5.508 -> 5.513 ( +0.09%) [ +0.05% +0.00% +0.00% / +0.11% +0.16% +0.09%] index_select linear : Elapsed 0.055 ms (5.511 ms / 100) 5.512 -> 5.515 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.38% +0.38%] index_select reverse : Elapsed 0.055 ms (5.515 ms / 100) 5.452 -> 5.442 ( -0.18%) [ +0.02% +0.00% +0.00% / +0.06% -0.09% -0.18%] index_select skip64 : Elapsed 0.055 ms (5.453 ms / 100) 5.464 -> 5.450 ( -0.26%) [ +0.11% +0.00% +0.07% / -0.05% -0.26% +0.00%] index_select skip256 : Elapsed 0.055 ms (5.470 ms / 100) 5.509 -> 5.519 ( +0.18%) [ +0.27% +0.00% +0.13% / +0.18% +0.24% +0.18%] index_select spread : Elapsed 0.055 ms (5.524 ms / 100) 5.517 -> 5.519 ( +0.04%) [ +0.00% +0.09% +0.18% / +0.18% +0.04% +0.09%] index_select strided 3 : Elapsed 0.055 ms (5.517 ms / 100) 5.478 -> 5.479 ( +0.02%) [ +0.00% +0.16% +0.18% / +0.02% +0.33% +0.35%] index_select random : Elapsed 0.055 ms (5.478 ms / 100) 5.489 -> 5.493 ( +0.07%) [ +0.04% +0.00% +0.13% / +0.07% +0.24% +0.16%] index_select random_sorted : Elapsed 0.055 ms (5.491 ms / 100) 5.523 -> 5.528 ( +0.09%) [ +0.02% +0.04% +0.00% / +0.22% +0.09% +0.09%] index_select perm : Elapsed 0.055 ms (5.524 ms / 100) 5.511 -> 5.518 ( +0.13%) [ +0.00% +0.07% +0.00% / +0.13% +0.22% +0.22%] index_select perm_sorted : Elapsed 0.055 ms (5.511 ms / 100) B = [16, 4, 20, 40] (stride (4, 1, 64, 1280)) A = [16, 5, 20, 40] (stride (4000, 40, 200, 1)) dim = 1 5.807 -> 5.789 ( -0.31%) [ +0.07% +0.00% +0.05% / +0.02% -0.31% -0.29%] index_select const : Elapsed 0.058 ms (5.811 ms / 100) 5.879 -> 5.878 ( -0.02%) [ +0.00% +0.10% +0.02% / +0.00% +0.02% -0.02%] index_select wrap : Elapsed 0.059 ms (5.879 ms / 100) 5.866 -> 5.864 ( -0.03%) [ +0.02% +0.00% +0.09% / +0.07% +0.00% -0.03%] index_select linear : Elapsed 0.059 ms (5.867 ms / 100) 5.872 -> 5.870 ( -0.03%) [ +0.00% +0.00% +0.02% / +0.07% -0.03% +0.09%] index_select reverse : Elapsed 0.059 ms (5.872 ms / 100) 5.784 -> 5.771 ( -0.22%) [ +0.00% +0.10% +0.02% / +0.05% -0.21% -0.22%] index_select skip64 : Elapsed 0.058 ms (5.784 ms / 100) 5.787 -> 5.761 ( -0.45%) [ +0.00% +0.05% +0.22% / +0.03% -0.16% -0.45%] index_select skip256 : Elapsed 0.058 ms (5.787 ms / 100) 5.884 -> 5.879 ( -0.08%) [ +0.02% +0.00% +0.07% / -0.02% -0.08% -0.03%] index_select spread : Elapsed 0.059 ms (5.885 ms / 100) 5.863 -> 5.858 ( -0.09%) [ +0.07% +0.07% +0.00% / +0.14% -0.09% +0.02%] index_select strided 3 : Elapsed 0.059 ms (5.867 ms / 100) 5.847 -> 5.841 ( -0.10%) [ +0.00% +0.03% +0.09% / +0.03% -0.10% -0.10%] index_select random : Elapsed 0.058 ms (5.847 ms / 100) 5.847 -> 5.839 ( -0.14%) [ +0.05% +0.00% +0.14% / +0.03% -0.10% -0.14%] index_select random_sorted : Elapsed 0.058 ms (5.850 ms / 100) 5.876 -> 5.868 ( -0.14%) [ +0.09% +0.00% +0.07% / -0.05% -0.10% -0.14%] index_select perm : Elapsed 0.059 ms (5.881 ms / 100) 5.860 -> 5.859 ( -0.02%) [ +0.07% +0.07% +0.00% / +0.05% +0.02% -0.02%] index_select perm_sorted : Elapsed 0.059 ms (5.864 ms / 100) out_shape = [16, 5, 4, 40] in_shape = [16, 5, 20, 40] idx_dim = 2 B = [16, 5, 4, 40] (stride (800, 40, 200, 1)) A = [16, 5, 20, 40] (stride (1, 12800, 16, 320)) dim = 2 2.177 -> 2.179 ( +0.09%) [ +0.23% +0.14% +0.00% / +0.09% +0.51% +0.14%] index_select const : Elapsed 0.022 ms (2.182 ms / 100) 2.176 -> 2.176 ( +0.00%) [ +0.18% +0.00% +0.18% / +0.00% +0.46% +0.32%] index_select wrap : Elapsed 0.022 ms (2.180 ms / 100) 2.173 -> 2.175 ( +0.09%) [ +0.09% +0.14% +0.00% / +0.09% +0.41% +0.60%] index_select linear : Elapsed 0.022 ms (2.175 ms / 100) 2.172 -> 2.175 ( +0.14%) [ +0.18% +0.00% +0.18% / +0.14% +0.69% +0.55%] index_select reverse : Elapsed 0.022 ms (2.176 ms / 100) 2.176 -> 2.178 ( +0.09%) [ +0.18% +0.14% +0.00% / +0.09% +0.69% +0.51%] index_select skip64 : Elapsed 0.022 ms (2.180 ms / 100) 2.174 -> 2.176 ( +0.09%) [ +0.37% +0.00% +0.00% / +0.09% +0.41% +0.55%] index_select skip256 : Elapsed 0.022 ms (2.182 ms / 100) 2.163 -> 2.167 ( +0.18%) [ +0.00% +0.14% +0.05% / +0.18% +0.55% +0.42%] index_select spread : Elapsed 0.022 ms (2.163 ms / 100) 2.167 -> 2.170 ( +0.14%) [ +0.23% +0.09% +0.00% / +0.14% +0.46% +0.14%] index_select strided 3 : Elapsed 0.022 ms (2.172 ms / 100) 2.170 -> 2.168 ( -0.09%) [ +0.23% +0.09% +0.00% / -0.09% +0.28% +0.14%] index_select strided 5 : Elapsed 0.022 ms (2.175 ms / 100) 2.163 -> 2.168 ( +0.23%) [ +0.32% +0.00% +0.14% / +0.23% +0.51% +0.37%] index_select strided 7 : Elapsed 0.022 ms (2.170 ms / 100) 2.167 -> 2.172 ( +0.23%) [ +0.05% +0.23% +0.00% / +0.23% +0.28% +0.23%] index_select strided 8 : Elapsed 0.022 ms (2.168 ms / 100) 2.165 -> 2.162 ( -0.14%) [ +0.09% +0.00% +0.18% / -0.14% +0.37% +0.18%] index_select strided 16 : Elapsed 0.022 ms (2.167 ms / 100) 2.161 -> 2.160 ( -0.05%) [ +0.23% +0.28% +0.00% / -0.05% +0.51% +0.65%] index_select random : Elapsed 0.022 ms (2.166 ms / 100) 2.167 -> 2.169 ( +0.09%) [ +0.00% +0.05% +0.14% / +0.09% +0.65% +0.37%] index_select random_sorted : Elapsed 0.022 ms (2.167 ms / 100) 2.165 -> 2.165 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.69% +0.46%] index_select perm : Elapsed 0.022 ms (2.168 ms / 100) 2.162 -> 2.168 ( +0.28%) [ +0.05% +0.00% +0.32% / +0.28% +0.69% +0.51%] index_select perm_sorted : Elapsed 0.022 ms (2.163 ms / 100) B = [16, 5, 4, 40] (stride (800, 1, 5, 20)) A = [16, 5, 20, 40] (stride (1, 640, 3200, 16)) dim = 2 2.084 -> 2.083 ( -0.05%) [ +0.10% +0.00% +0.19% / -0.05% +0.43% +0.53%] index_select const : Elapsed 0.021 ms (2.086 ms / 100) 2.073 -> 2.069 ( -0.19%) [ +0.05% +0.05% +0.00% / -0.19% +0.39% +0.39%] index_select wrap : Elapsed 0.021 ms (2.074 ms / 100) 2.077 -> 2.079 ( +0.10%) [ +0.10% +0.14% +0.00% / +0.10% +0.34% +0.39%] index_select linear : Elapsed 0.021 ms (2.079 ms / 100) 2.083 -> 2.086 ( +0.14%) [ +0.19% +0.19% +0.00% / +0.14% +0.38% +0.24%] index_select reverse : Elapsed 0.021 ms (2.087 ms / 100) 2.084 -> 2.087 ( +0.14%) [ +0.19% +0.24% +0.00% / +0.14% +1.10% +0.48%] index_select skip64 : Elapsed 0.021 ms (2.088 ms / 100) 2.077 -> 2.079 ( +0.10%) [ +0.00% +0.00% +0.05% / +0.10% +0.96% +0.48%] index_select skip256 : Elapsed 0.021 ms (2.077 ms / 100) 2.069 -> 2.068 ( -0.05%) [ +0.14% +0.48% +0.00% / -0.05% +0.43% +0.43%] index_select spread : Elapsed 0.021 ms (2.072 ms / 100) 2.085 -> 2.090 ( +0.24%) [ +0.14% +0.19% +0.00% / +0.24% +0.24% +0.38%] index_select strided 3 : Elapsed 0.021 ms (2.088 ms / 100) 2.078 -> 2.081 ( +0.14%) [ +0.10% +0.00% +0.29% / +0.34% +0.43% +0.14%] index_select strided 5 : Elapsed 0.021 ms (2.080 ms / 100) 2.079 -> 2.085 ( +0.29%) [ +0.00% +0.05% +0.19% / +0.29% +0.34% +0.34%] index_select strided 7 : Elapsed 0.021 ms (2.079 ms / 100) 2.084 -> 2.085 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.43% +0.29%] index_select strided 8 : Elapsed 0.021 ms (2.084 ms / 100) 2.070 -> 2.073 ( +0.14%) [ +0.05% +0.24% +0.00% / +0.14% +0.63% +0.53%] index_select strided 16 : Elapsed 0.021 ms (2.071 ms / 100) 2.079 -> 2.080 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.53% +0.34%] index_select random : Elapsed 0.021 ms (2.080 ms / 100) 2.096 -> 2.092 ( -0.19%) [ +0.24% +0.00% +0.10% / -0.05% -0.05% -0.19%] index_select random_sorted : Elapsed 0.021 ms (2.101 ms / 100) 2.085 -> 2.086 ( +0.05%) [ +0.00% +0.24% +0.10% / +0.14% +0.05% +0.43%] index_select perm : Elapsed 0.021 ms (2.085 ms / 100) 2.084 -> 2.087 ( +0.14%) [ +0.14% +0.05% +0.00% / +0.19% +0.14% +0.29%] index_select perm_sorted : Elapsed 0.021 ms (2.087 ms / 100) B = [16, 5, 4, 40] (stride (5, 1, 80, 320)) A = [16, 5, 20, 40] (stride (20, 320, 1, 1600)) dim = 2 2.116 -> 2.118 ( +0.09%) [ +0.38% +0.14% +0.00% / +0.09% +0.38% +0.52%] index_select const : Elapsed 0.021 ms (2.124 ms / 100) 2.114 -> 2.117 ( +0.14%) [ +0.00% +0.14% +0.09% / +0.14% +0.61% +0.57%] index_select wrap : Elapsed 0.021 ms (2.114 ms / 100) 2.118 -> 2.119 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.05% +0.38% +0.33%] index_select linear : Elapsed 0.021 ms (2.119 ms / 100) 2.114 -> 2.115 ( +0.05%) [ +0.28% +0.33% +0.00% / +0.05% +0.43% +0.57%] index_select reverse : Elapsed 0.021 ms (2.120 ms / 100) 2.117 -> 2.120 ( +0.14%) [ +0.09% +0.14% +0.00% / +0.14% +0.47% +0.14%] index_select skip64 : Elapsed 0.021 ms (2.119 ms / 100) 2.113 -> 2.120 ( +0.33%) [ +0.00% +0.09% +0.09% / +0.38% +0.33% +0.47%] index_select skip256 : Elapsed 0.021 ms (2.113 ms / 100) 2.182 -> 2.180 ( -0.09%) [ +0.05% +0.27% +0.00% / +0.09% -0.09% +0.09%] index_select spread : Elapsed 0.022 ms (2.183 ms / 100) 2.156 -> 2.153 ( -0.14%) [ +0.09% +0.00% +0.05% / -0.14% +0.32% +0.19%] index_select strided 3 : Elapsed 0.022 ms (2.158 ms / 100) 2.179 -> 2.182 ( +0.14%) [ +0.37% +0.05% +0.00% / +0.18% +0.18% +0.14%] index_select strided 5 : Elapsed 0.022 ms (2.187 ms / 100) 2.171 -> 2.173 ( +0.09%) [ +0.05% +0.23% +0.00% / +0.09% +0.28% +0.32%] index_select strided 7 : Elapsed 0.022 ms (2.172 ms / 100) 2.167 -> 2.170 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.18% +0.14% +0.28%] index_select strided 8 : Elapsed 0.022 ms (2.170 ms / 100) 2.180 -> 2.180 ( +0.00%) [ +0.18% +0.00% +0.18% / +0.00% +0.28% +0.46%] index_select strided 16 : Elapsed 0.022 ms (2.184 ms / 100) 2.181 -> 2.184 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.32% +0.18%] index_select random : Elapsed 0.022 ms (2.184 ms / 100) 2.180 -> 2.181 ( +0.05%) [ +0.09% +0.09% +0.00% / +0.05% +0.05% +0.23%] index_select random_sorted : Elapsed 0.022 ms (2.182 ms / 100) 2.186 -> 2.185 ( -0.05%) [ +0.09% +0.00% +0.09% / +0.18% +0.05% -0.05%] index_select perm : Elapsed 0.022 ms (2.188 ms / 100) 2.171 -> 2.172 ( +0.05%) [ +0.00% +0.09% +0.09% / +0.05% +0.09% +0.09%] index_select perm_sorted : Elapsed 0.022 ms (2.171 ms / 100) out_shape = [16, 5, 20, 4] in_shape = [16, 5, 20, 40] idx_dim = 3 B = [16, 5, 20, 4] (stride (4, 1280, 64, 1)) A = [16, 5, 20, 40] (stride (5, 1, 3200, 80)) dim = 3 0.599 -> 0.600 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.33% +0.50%] index_select const : Elapsed 0.006 ms (0.600 ms / 100) 0.594 -> 0.596 ( +0.34%) [ +0.17% +0.00% +0.17% / +0.34% +1.01% +1.35%] index_select wrap : Elapsed 0.006 ms (0.595 ms / 100) 0.596 -> 0.602 ( +1.01%) [ +0.00% +1.01% +0.17% / +1.01% +1.01% +1.17%] index_select linear : Elapsed 0.006 ms (0.596 ms / 100) 0.600 -> 0.600 ( +0.00%) [ +0.17% +0.33% +0.00% / +0.00% +0.33% +0.17%] index_select reverse : Elapsed 0.006 ms (0.601 ms / 100) 0.597 -> 0.601 ( +0.67%) [ +0.34% +0.00% +0.50% / +0.67% +0.67% +0.67%] index_select skip64 : Elapsed 0.006 ms (0.599 ms / 100) 0.597 -> 0.599 ( +0.34%) [ +0.67% +0.34% +0.00% / +0.34% +0.84% +0.84%] index_select skip256 : Elapsed 0.006 ms (0.601 ms / 100) 0.601 -> 0.600 ( -0.17%) [ +0.00% +0.00% +0.00% / -0.17% +0.17% +0.17%] index_select spread : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.602 ( +0.50%) [ +0.33% +0.50% +0.00% / +0.67% +0.50% +0.50%] index_select strided 3 : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.599 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.17% +0.00% +6.18%] index_select strided 5 : Elapsed 0.006 ms (0.600 ms / 100) 0.601 -> 0.599 ( -0.33%) [ +0.17% +0.00% +0.00% / +0.00% -0.33% +0.67%] index_select strided 7 : Elapsed 0.006 ms (0.602 ms / 100) 0.601 -> 0.601 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.17% +0.17% +0.00%] index_select strided 8 : Elapsed 0.006 ms (0.602 ms / 100) 0.599 -> 0.602 ( +0.50%) [ +0.50% +0.00% +0.50% / +0.67% +0.50% +0.50%] index_select strided 16 : Elapsed 0.006 ms (0.602 ms / 100) 0.601 -> 0.599 ( -0.33%) [ +0.00% +0.17% +0.00% / -0.33% +0.17% +0.17%] index_select random : Elapsed 0.006 ms (0.601 ms / 100) 0.601 -> 0.599 ( -0.33%) [ +0.17% +0.33% +0.00% / -0.33% +0.00% +1.00%] index_select random_sorted : Elapsed 0.006 ms (0.602 ms / 100) 0.599 -> 0.601 ( +0.33%) [ +0.50% +0.50% +0.00% / +0.67% +0.50% +0.33%] index_select perm : Elapsed 0.006 ms (0.602 ms / 100) 0.601 -> 0.602 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.17% +0.17%] index_select perm_sorted : Elapsed 0.006 ms (0.602 ms / 100) B = [16, 5, 20, 4] (stride (20, 1, 320, 5)) A = [16, 5, 20, 40] (stride (1, 320, 16, 1600)) dim = 3 1.202 -> 1.203 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.42% +0.33%] index_select const : Elapsed 0.012 ms (1.204 ms / 100) 1.202 -> 1.202 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.42% +0.42%] index_select wrap : Elapsed 0.012 ms (1.203 ms / 100) 1.202 -> 1.203 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.42%] index_select linear : Elapsed 0.012 ms (1.203 ms / 100) 1.202 -> 1.201 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.50% +0.42%] index_select reverse : Elapsed 0.012 ms (1.203 ms / 100) 1.201 -> 1.202 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.58% +0.67%] index_select skip64 : Elapsed 0.012 ms (1.202 ms / 100) 1.201 -> 1.204 ( +0.25%) [ +0.08% +0.08% +0.00% / +0.25% +0.58% +0.58%] index_select skip256 : Elapsed 0.012 ms (1.202 ms / 100) 1.202 -> 1.203 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.58% +0.50%] index_select spread : Elapsed 0.012 ms (1.202 ms / 100) 1.202 -> 1.202 ( +0.00%) [ +0.08% +0.25% +0.00% / +0.00% +0.50% +0.42%] index_select strided 3 : Elapsed 0.012 ms (1.203 ms / 100) 1.202 -> 1.202 ( +0.00%) [ +0.17% +0.08% +0.00% / +0.00% +0.50% +0.42%] index_select strided 5 : Elapsed 0.012 ms (1.204 ms / 100) 1.202 -> 1.203 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.50% +0.42%] index_select strided 7 : Elapsed 0.012 ms (1.204 ms / 100) 1.201 -> 1.202 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_select strided 8 : Elapsed 0.012 ms (1.203 ms / 100) 1.203 -> 1.206 ( +0.25%) [ +0.08% +0.08% +0.00% / +0.25% +0.42% +0.25%] index_select strided 16 : Elapsed 0.012 ms (1.204 ms / 100) 1.203 -> 1.205 ( +0.17%) [ +0.08% +0.00% +0.00% / +0.17% +0.33% +0.25%] index_select random : Elapsed 0.012 ms (1.204 ms / 100) 1.204 -> 1.204 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.33% +0.25%] index_select random_sorted : Elapsed 0.012 ms (1.204 ms / 100) 1.204 -> 1.203 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.25% +0.17%] index_select perm : Elapsed 0.012 ms (1.204 ms / 100) 1.202 -> 1.203 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.50% +0.42%] index_select perm_sorted : Elapsed 0.012 ms (1.203 ms / 100) out_shape = [4, 5, 40, 20] in_shape = [16, 5, 40, 20] idx_dim = 0 B = [4, 5, 40, 20] (stride (4000, 1, 100, 5)) A = [16, 5, 40, 20] (stride (4000, 20, 100, 1)) dim = 0 2.264 -> 2.268 ( +0.18%) [ +0.04% +0.00% +0.04% / +0.18% +0.27% +0.31%] index_select const : Elapsed 0.023 ms (2.265 ms / 100) 2.330 -> 2.331 ( +0.04%) [ +0.00% +0.21% +0.21% / +0.04% +0.17% +0.43%] index_select wrap : Elapsed 0.023 ms (2.330 ms / 100) 2.326 -> 2.331 ( +0.21%) [ +0.00% +0.13% +0.13% / +0.21% +0.43% +0.39%] index_select linear : Elapsed 0.023 ms (2.326 ms / 100) 2.332 -> 2.334 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.09% +0.26% +0.26%] index_select reverse : Elapsed 0.023 ms (2.334 ms / 100) 2.275 -> 2.277 ( +0.09%) [ +0.00% +0.13% +0.09% / +0.09% +0.18% +0.13%] index_select skip64 : Elapsed 0.023 ms (2.275 ms / 100) 2.262 -> 2.262 ( +0.00%) [ +0.13% +0.09% +0.00% / +0.00% +0.49% +0.35%] index_select skip256 : Elapsed 0.023 ms (2.265 ms / 100) 2.323 -> 2.324 ( +0.04%) [ +0.00% +0.17% +0.00% / +0.04% +0.17% +0.30%] index_select spread : Elapsed 0.023 ms (2.323 ms / 100) 2.328 -> 2.330 ( +0.09%) [ +0.04% +0.00% +0.00% / +0.09% +0.17% +0.13%] index_select strided 3 : Elapsed 0.023 ms (2.329 ms / 100) 2.324 -> 2.323 ( -0.04%) [ +0.26% +0.13% +0.00% / -0.04% +0.56% +0.34%] index_select strided 5 : Elapsed 0.023 ms (2.330 ms / 100) 2.332 -> 2.338 ( +0.26%) [ +0.00% +0.21% +0.09% / +0.26% +0.26% +0.34%] index_select strided 7 : Elapsed 0.023 ms (2.332 ms / 100) 2.281 -> 2.281 ( +0.00%) [ +0.18% +0.00% +0.09% / +0.00% +0.18% +0.04%] index_select strided 8 : Elapsed 0.023 ms (2.285 ms / 100) 2.328 -> 2.330 ( +0.09%) [ +0.17% +0.00% +0.13% / +0.09% +0.43% +0.39%] index_select random : Elapsed 0.023 ms (2.332 ms / 100) 2.328 -> 2.329 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.17% +0.26%] index_select random_sorted : Elapsed 0.023 ms (2.330 ms / 100) 2.327 -> 2.331 ( +0.17%) [ +0.21% +0.00% +0.26% / +0.17% +0.17% +0.30%] index_select perm : Elapsed 0.023 ms (2.332 ms / 100) 2.332 -> 2.335 ( +0.13%) [ +0.04% +0.09% +0.00% / +0.13% +0.17% +0.21%] index_select perm_sorted : Elapsed 0.023 ms (2.333 ms / 100) B = [4, 5, 40, 20] (stride (800, 3200, 1, 40)) dim = 0 fill_cnt = 16 2.736 -> 2.730 ( -0.22%) [ +0.00% +0.22% +0.07% / -0.07% +0.15% -0.22%] index_fill_ const : Elapsed 0.027 ms (2.736 ms / 100) 2.756 -> 2.757 ( +0.04%) [ +0.00% +0.15% +0.04% / +0.04% +0.33% +0.44%] index_fill_ linear : Elapsed 0.028 ms (2.756 ms / 100) 2.747 -> 2.752 ( +0.18%) [ +0.15% +0.00% +0.11% / +0.18% +0.98% +0.73%] index_fill_ reverse : Elapsed 0.028 ms (2.751 ms / 100) 2.736 -> 2.736 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.04% +0.04%] index_fill_ skip64 : Elapsed 0.027 ms (2.738 ms / 100) 2.738 -> 2.733 ( -0.18%) [ +0.26% +0.07% +0.00% / +0.00% -0.11% -0.18%] index_fill_ skip256 : Elapsed 0.027 ms (2.745 ms / 100) 2.754 -> 2.755 ( +0.04%) [ +0.11% +0.04% +0.00% / +0.04% +0.07% +0.29%] index_fill_ spread : Elapsed 0.028 ms (2.757 ms / 100) 2.758 -> 2.761 ( +0.11%) [ +0.00% +0.00% +0.07% / +0.11% +0.40% +0.44%] index_fill_ strided 3 : Elapsed 0.028 ms (2.758 ms / 100) 2.748 -> 2.752 ( +0.15%) [ +0.15% +0.25% +0.00% / +0.15% +0.51% +0.47%] index_fill_ random : Elapsed 0.028 ms (2.752 ms / 100) 2.748 -> 2.749 ( +0.04%) [ +0.07% +0.00% +0.18% / +0.04% +0.66% +0.58%] index_fill_ random_sorted : Elapsed 0.028 ms (2.750 ms / 100) B = [4, 5, 40, 20] (stride (40, 160, 1, 800)) dim = 0 fill_cnt = 16 2.743 -> 2.745 ( +0.07%) [ +0.04% +0.00% +0.15% / +0.07% +0.26% +0.33%] index_fill_ const : Elapsed 0.027 ms (2.744 ms / 100) 2.763 -> 2.761 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.29% +0.47%] index_fill_ linear : Elapsed 0.028 ms (2.765 ms / 100) 2.756 -> 2.758 ( +0.07%) [ +0.00% +0.22% +0.11% / +0.07% +0.29% +0.33%] index_fill_ reverse : Elapsed 0.028 ms (2.756 ms / 100) 2.742 -> 2.742 ( +0.00%) [ +0.04% +0.18% +0.00% / +0.00% +0.55% +0.36%] index_fill_ skip64 : Elapsed 0.027 ms (2.743 ms / 100) 2.745 -> 2.748 ( +0.11%) [ +0.11% +0.00% +0.07% / +0.40% +0.22% +0.11%] index_fill_ skip256 : Elapsed 0.027 ms (2.748 ms / 100) 2.759 -> 2.761 ( +0.07%) [ +0.18% +0.04% +0.00% / +0.18% +0.29% +0.07%] index_fill_ spread : Elapsed 0.028 ms (2.764 ms / 100) 2.763 -> 2.770 ( +0.25%) [ +0.22% +0.14% +0.00% / +0.25% +0.43% +0.40%] index_fill_ strided 3 : Elapsed 0.028 ms (2.769 ms / 100) 2.757 -> 2.756 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.36% +0.36%] index_fill_ random : Elapsed 0.028 ms (2.757 ms / 100) 2.749 -> 2.750 ( +0.04%) [ +0.15% +0.00% +0.15% / +0.04% +0.69% +0.65%] index_fill_ random_sorted : Elapsed 0.028 ms (2.753 ms / 100) B = [4, 5, 40, 20] (stride (1, 160, 4, 800)) A = [16, 5, 40, 20] (stride (4000, 800, 20, 1)) dim = 0 2.097 -> 2.098 ( +0.05%) [ +0.14% +0.00% +0.10% / +0.24% +0.10% +0.05%] index_select const : Elapsed 0.021 ms (2.100 ms / 100) 2.153 -> 2.155 ( +0.09%) [ +0.28% +0.19% +0.00% / +0.28% +0.23% +0.09%] index_select wrap : Elapsed 0.022 ms (2.159 ms / 100) 2.157 -> 2.154 ( -0.14%) [ +0.00% +0.19% +0.00% / +0.14% +0.05% -0.14%] index_select linear : Elapsed 0.022 ms (2.157 ms / 100) 2.153 -> 2.155 ( +0.09%) [ +0.19% +0.00% +0.00% / +0.09% +0.14% +0.19%] index_select reverse : Elapsed 0.022 ms (2.157 ms / 100) 2.100 -> 2.096 ( -0.19%) [ +0.00% +0.05% +0.00% / -0.19% -0.10% -0.10%] index_select skip64 : Elapsed 0.021 ms (2.100 ms / 100) 2.100 -> 2.098 ( -0.10%) [ +0.10% +0.00% +0.00% / +0.00% -0.10% +0.00%] index_select skip256 : Elapsed 0.021 ms (2.102 ms / 100) 2.150 -> 2.155 ( +0.23%) [ +0.37% +0.00% +0.14% / +0.37% +0.33% +0.23%] index_select spread : Elapsed 0.022 ms (2.158 ms / 100) 2.156 -> 2.151 ( -0.23%) [ +0.00% +0.05% +0.05% / +0.09% +0.28% -0.23%] index_select strided 3 : Elapsed 0.022 ms (2.156 ms / 100) 2.151 -> 2.149 ( -0.09%) [ +0.00% +0.19% +0.14% / -0.09% +0.14% +0.23%] index_select strided 5 : Elapsed 0.022 ms (2.151 ms / 100) 2.152 -> 2.152 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.14% +0.14%] index_select strided 7 : Elapsed 0.022 ms (2.155 ms / 100) 2.113 -> 2.112 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.62% +0.05%] index_select strided 8 : Elapsed 0.021 ms (2.113 ms / 100) 2.154 -> 2.153 ( -0.05%) [ +0.09% +0.37% +0.00% / +0.05% -0.05% +0.05%] index_select random : Elapsed 0.022 ms (2.156 ms / 100) 2.151 -> 2.152 ( +0.05%) [ +0.23% +0.00% +0.14% / +0.23% +0.05% +0.33%] index_select random_sorted : Elapsed 0.022 ms (2.156 ms / 100) 2.148 -> 2.151 ( +0.14%) [ +0.00% +0.33% +0.14% / +0.14% +0.51% +0.37%] index_select perm : Elapsed 0.021 ms (2.148 ms / 100) 2.149 -> 2.153 ( +0.19%) [ +0.23% +0.00% +0.23% / +0.19% +0.33% +0.42%] index_select perm_sorted : Elapsed 0.022 ms (2.154 ms / 100) out_shape = [16, 4, 40, 20] in_shape = [16, 5, 40, 20] idx_dim = 1 B = [16, 4, 40, 20] (stride (800, 12800, 1, 40)) A = [16, 5, 40, 20] (stride (4000, 800, 1, 40)) dim = 1 5.718 -> 5.722 ( +0.07%) [ +0.00% +0.09% +0.12% / +0.07% +0.14% +0.24%] index_select const : Elapsed 0.057 ms (5.718 ms / 100) 5.785 -> 5.755 ( -0.52%) [ +0.00% +0.19% +0.07% / +0.21% -0.52% -0.48%] index_select wrap : Elapsed 0.058 ms (5.785 ms / 100) 5.782 -> 5.754 ( -0.48%) [ +0.02% +0.00% +0.16% / +0.10% -0.47% -0.48%] index_select linear : Elapsed 0.058 ms (5.783 ms / 100) 5.795 -> 5.750 ( -0.78%) [ +0.00% +0.00% +0.05% / +0.14% -0.47% -0.78%] index_select reverse : Elapsed 0.058 ms (5.795 ms / 100) 5.717 -> 5.721 ( +0.07%) [ +0.10% +0.00% +0.19% / +0.07% +0.35% +0.23%] index_select skip64 : Elapsed 0.057 ms (5.723 ms / 100) 5.713 -> 5.721 ( +0.14%) [ +0.00% +0.19% +0.21% / +0.14% +0.23% +0.23%] index_select skip256 : Elapsed 0.057 ms (5.713 ms / 100) 5.778 -> 5.753 ( -0.43%) [ +0.00% +0.03% +0.21% / +0.12% -0.31% -0.43%] index_select spread : Elapsed 0.058 ms (5.778 ms / 100) 5.772 -> 5.735 ( -0.64%) [ +0.00% +0.05% +0.02% / -0.03% -0.64% -0.57%] index_select strided 3 : Elapsed 0.058 ms (5.772 ms / 100) 5.773 -> 5.750 ( -0.40%) [ +0.07% +0.00% +0.10% / +0.09% -0.40% -0.24%] index_select random : Elapsed 0.058 ms (5.777 ms / 100) 5.782 -> 5.753 ( -0.50%) [ +0.00% +0.03% +0.14% / +0.10% -0.50% -0.50%] index_select random_sorted : Elapsed 0.058 ms (5.782 ms / 100) 5.754 -> 5.740 ( -0.24%) [ +0.07% +0.00% +0.07% / +0.16% -0.24% -0.02%] index_select perm : Elapsed 0.058 ms (5.758 ms / 100) 5.775 -> 5.744 ( -0.54%) [ +0.00% +0.09% +0.09% / -0.02% -0.54% -0.54%] index_select perm_sorted : Elapsed 0.058 ms (5.775 ms / 100) B = [16, 4, 40, 20] (stride (800, 12800, 1, 40)) A = [16, 5, 40, 20] (stride (1, 16, 1600, 80)) dim = 1 5.528 -> 5.525 ( -0.05%) [ +0.04% +0.09% +0.00% / -0.02% -0.05% -0.02%] index_select const : Elapsed 0.055 ms (5.530 ms / 100) 5.567 -> 5.570 ( +0.05%) [ +0.05% +0.00% +0.13% / +0.05% +0.11% +0.14%] index_select wrap : Elapsed 0.056 ms (5.570 ms / 100) 5.567 -> 5.572 ( +0.09%) [ +0.00% +0.13% +0.11% / +0.09% +0.18% +0.09%] index_select linear : Elapsed 0.056 ms (5.567 ms / 100) 5.571 -> 5.567 ( -0.07%) [ +0.07% +0.00% +0.05% / -0.07% +0.14% +0.00%] index_select reverse : Elapsed 0.056 ms (5.575 ms / 100) 5.523 -> 5.523 ( +0.00%) [ +0.07% +0.00% +0.05% / +0.13% +0.00% +0.05%] index_select skip64 : Elapsed 0.055 ms (5.527 ms / 100) 5.523 -> 5.529 ( +0.11%) [ +0.14% +0.00% +0.13% / +0.16% +0.11% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.531 ms / 100) 5.572 -> 5.577 ( +0.09%) [ +0.02% +0.00% +0.02% / +0.09% +0.11% +0.16%] index_select spread : Elapsed 0.056 ms (5.573 ms / 100) 5.587 -> 5.587 ( +0.00%) [ +0.16% +0.00% +0.13% / +0.00% +0.02% +0.07%] index_select strided 3 : Elapsed 0.056 ms (5.596 ms / 100) 5.554 -> 5.557 ( +0.05%) [ +0.02% +0.00% +0.02% / +0.13% +0.11% +0.05%] index_select random : Elapsed 0.056 ms (5.555 ms / 100) 5.549 -> 5.554 ( +0.09%) [ +0.00% +0.16% +0.09% / +0.18% +0.09% +0.25%] index_select random_sorted : Elapsed 0.055 ms (5.549 ms / 100) 5.579 -> 5.577 ( -0.04%) [ +0.07% +0.00% +0.09% / +0.20% -0.04% -0.02%] index_select perm : Elapsed 0.056 ms (5.583 ms / 100) 5.573 -> 5.571 ( -0.04%) [ +0.00% +0.00% +0.02% / -0.04% +0.02% +0.04%] index_select perm_sorted : Elapsed 0.056 ms (5.573 ms / 100) B = [16, 4, 40, 20] (stride (800, 12800, 1, 40)) A = [16, 5, 40, 20] (stride (1, 16, 80, 3200)) dim = 1 5.782 -> 5.764 ( -0.31%) [ +0.00% +0.00% +0.07% / +0.10% -0.28% -0.31%] index_select const : Elapsed 0.058 ms (5.782 ms / 100) 5.839 -> 5.830 ( -0.15%) [ +0.15% +0.00% +0.10% / +0.10% -0.10% -0.15%] index_select wrap : Elapsed 0.058 ms (5.848 ms / 100) 5.843 -> 5.831 ( -0.21%) [ +0.00% +0.09% +0.10% / +0.15% -0.21% -0.15%] index_select linear : Elapsed 0.058 ms (5.843 ms / 100) 5.845 -> 5.834 ( -0.19%) [ +0.00% +0.09% +0.03% / +0.02% -0.19% -0.12%] index_select reverse : Elapsed 0.058 ms (5.845 ms / 100) 5.779 -> 5.762 ( -0.29%) [ +0.10% +0.02% +0.00% / +0.00% -0.29% -0.17%] index_select skip64 : Elapsed 0.058 ms (5.785 ms / 100) 5.779 -> 5.766 ( -0.22%) [ +0.00% +0.07% +0.09% / +0.10% -0.21% -0.22%] index_select skip256 : Elapsed 0.058 ms (5.779 ms / 100) 5.838 -> 5.831 ( -0.12%) [ +0.12% +0.00% +0.12% / +0.14% -0.12% -0.05%] index_select spread : Elapsed 0.058 ms (5.845 ms / 100) 5.870 -> 5.852 ( -0.31%) [ +0.07% +0.00% +0.09% / +0.07% -0.24% -0.31%] index_select strided 3 : Elapsed 0.059 ms (5.874 ms / 100) 5.841 -> 5.820 ( -0.36%) [ +0.00% +0.00% +0.00% / -0.07% -0.33% -0.36%] index_select random : Elapsed 0.058 ms (5.841 ms / 100) 5.832 -> 5.824 ( -0.14%) [ +0.00% +0.10% +0.10% / +0.07% -0.14% -0.10%] index_select random_sorted : Elapsed 0.058 ms (5.832 ms / 100) 5.851 -> 5.839 ( -0.21%) [ +0.09% +0.00% +0.19% / +0.15% -0.21% -0.09%] index_select perm : Elapsed 0.059 ms (5.856 ms / 100) 5.847 -> 5.837 ( -0.17%) [ +0.05% +0.03% +0.00% / +0.19% -0.17% -0.10%] index_select perm_sorted : Elapsed 0.059 ms (5.850 ms / 100) B = [16, 4, 40, 20] (stride (1, 640, 16, 2560)) A = [16, 5, 40, 20] (stride (200, 40, 1, 3200)) dim = 1 5.788 -> 5.787 ( -0.02%) [ +0.00% +0.17% +0.21% / +0.21% +0.07% -0.02%] index_select const : Elapsed 0.058 ms (5.788 ms / 100) 5.855 -> 5.842 ( -0.22%) [ +0.12% +0.00% +0.15% / +0.22% -0.07% -0.22%] index_select wrap : Elapsed 0.059 ms (5.862 ms / 100) 5.856 -> 5.848 ( -0.14%) [ +0.02% +0.19% +0.00% / +0.22% -0.02% -0.14%] index_select linear : Elapsed 0.059 ms (5.857 ms / 100) 5.853 -> 5.851 ( -0.03%) [ +0.00% +0.09% +0.21% / +0.10% -0.03% -0.03%] index_select reverse : Elapsed 0.059 ms (5.853 ms / 100) 5.793 -> 5.784 ( -0.16%) [ +0.00% +0.09% +0.00% / +0.12% -0.02% -0.16%] index_select skip64 : Elapsed 0.058 ms (5.793 ms / 100) 5.789 -> 5.790 ( +0.02%) [ +0.26% +0.00% +0.10% / +0.03% +0.02% +0.03%] index_select skip256 : Elapsed 0.058 ms (5.804 ms / 100) 5.857 -> 5.847 ( -0.17%) [ +0.03% +0.00% +0.00% / +0.09% -0.17% -0.07%] index_select spread : Elapsed 0.059 ms (5.859 ms / 100) 5.865 -> 5.856 ( -0.15%) [ +0.00% +0.12% +0.09% / +0.17% -0.03% -0.15%] index_select strided 3 : Elapsed 0.059 ms (5.865 ms / 100) 5.832 -> 5.817 ( -0.26%) [ +0.15% +0.03% +0.00% / +0.05% -0.02% -0.26%] index_select random : Elapsed 0.058 ms (5.841 ms / 100) 5.819 -> 5.805 ( -0.24%) [ +0.00% +0.07% +0.02% / +0.05% -0.19% -0.24%] index_select random_sorted : Elapsed 0.058 ms (5.819 ms / 100) 5.863 -> 5.850 ( -0.22%) [ +0.00% +0.12% +0.10% / +0.19% -0.22% -0.02%] index_select perm : Elapsed 0.059 ms (5.863 ms / 100) 5.866 -> 5.857 ( -0.15%) [ +0.00% +0.05% +0.20% / +0.03% -0.15% -0.15%] index_select perm_sorted : Elapsed 0.059 ms (5.866 ms / 100) B = [16, 4, 40, 20] (stride (1, 16, 64, 2560)) A = [16, 5, 40, 20] (stride (100, 1, 1600, 5)) dim = 1 5.966 -> 5.969 ( +0.05%) [ +0.12% +0.00% +0.07% / +0.07% +0.05% +0.15%] index_select const : Elapsed 0.060 ms (5.973 ms / 100) 5.960 -> 5.963 ( +0.05%) [ +0.12% +0.00% +0.18% / +0.08% +0.05% +0.13%] index_select wrap : Elapsed 0.060 ms (5.967 ms / 100) 5.962 -> 5.965 ( +0.05%) [ +0.00% +0.13% +0.07% / +0.05% +0.17% +0.05%] index_select linear : Elapsed 0.060 ms (5.962 ms / 100) 5.965 -> 5.965 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.07% +0.08%] index_select reverse : Elapsed 0.060 ms (5.971 ms / 100) 5.965 -> 5.963 ( -0.03%) [ +0.03% +0.05% +0.00% / +0.07% +0.03% -0.03%] index_select skip64 : Elapsed 0.060 ms (5.967 ms / 100) 5.960 -> 5.964 ( +0.07%) [ +0.07% +0.00% +0.08% / +0.15% +0.20% +0.07%] index_select skip256 : Elapsed 0.060 ms (5.964 ms / 100) 5.963 -> 5.963 ( +0.00%) [ +0.08% +0.00% +0.12% / +0.10% +0.17% +0.00%] index_select spread : Elapsed 0.060 ms (5.968 ms / 100) 5.964 -> 5.963 ( -0.02%) [ +0.00% +0.02% +0.17% / -0.02% +0.07% +0.10%] index_select strided 3 : Elapsed 0.060 ms (5.964 ms / 100) 5.962 -> 5.963 ( +0.02%) [ +0.07% +0.00% +0.12% / +0.02% +0.18% +0.18%] index_select random : Elapsed 0.060 ms (5.966 ms / 100) 5.966 -> 5.962 ( -0.07%) [ +0.08% +0.02% +0.00% / -0.07% -0.05% +0.07%] index_select random_sorted : Elapsed 0.060 ms (5.971 ms / 100) 5.966 -> 5.968 ( +0.03%) [ +0.00% +0.03% +0.07% / +0.07% +0.08% +0.03%] index_select perm : Elapsed 0.060 ms (5.966 ms / 100) 5.962 -> 5.966 ( +0.07%) [ +0.12% +0.08% +0.00% / +0.20% +0.15% +0.07%] index_select perm_sorted : Elapsed 0.060 ms (5.969 ms / 100) out_shape = [16, 5, 4, 20] in_shape = [16, 5, 40, 20] idx_dim = 2 B = [16, 5, 4, 20] (stride (400, 80, 20, 1)) A = [16, 5, 40, 20] (stride (4000, 1, 5, 200)) dim = 2 1.252 -> 1.252 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.48% +0.72%] index_select const : Elapsed 0.013 ms (1.253 ms / 100) 1.254 -> 1.252 ( -0.16%) [ +0.08% +0.00% +0.08% / -0.16% +0.40% +0.64%] index_select wrap : Elapsed 0.013 ms (1.255 ms / 100) 1.249 -> 1.251 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.72% +0.56%] index_select linear : Elapsed 0.013 ms (1.251 ms / 100) 1.251 -> 1.250 ( -0.08%) [ +0.40% +0.00% +0.00% / -0.08% +1.20% +0.64%] index_select reverse : Elapsed 0.013 ms (1.256 ms / 100) 1.256 -> 1.257 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.80% +0.80%] index_select skip64 : Elapsed 0.013 ms (1.258 ms / 100) 1.248 -> 1.252 ( +0.32%) [ +0.08% +0.08% +0.00% / +0.32% +0.72% +0.48%] index_select skip256 : Elapsed 0.012 ms (1.249 ms / 100) 1.258 -> 1.260 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.79% +0.64%] index_select spread : Elapsed 0.013 ms (1.260 ms / 100) 1.252 -> 1.253 ( +0.08%) [ +0.00% +0.16% +0.24% / +0.08% +0.64% +0.80%] index_select strided 3 : Elapsed 0.013 ms (1.252 ms / 100) 1.258 -> 1.262 ( +0.32%) [ +0.00% +0.40% +0.16% / +0.32% +1.03% +0.72%] index_select strided 5 : Elapsed 0.013 ms (1.258 ms / 100) 1.255 -> 1.257 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.32% +0.80%] index_select strided 7 : Elapsed 0.013 ms (1.256 ms / 100) 1.254 -> 1.255 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.40% +0.48%] index_select strided 8 : Elapsed 0.013 ms (1.255 ms / 100) 1.258 -> 1.261 ( +0.24%) [ +0.16% +0.40% +0.00% / +0.24% +0.79% +0.72%] index_select strided 16 : Elapsed 0.013 ms (1.260 ms / 100) 1.255 -> 1.255 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.48% +0.40%] index_select random : Elapsed 0.013 ms (1.255 ms / 100) 1.250 -> 1.252 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.64% +0.40%] index_select random_sorted : Elapsed 0.012 ms (1.250 ms / 100) 1.266 -> 1.266 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.47% +0.32%] index_select perm : Elapsed 0.013 ms (1.268 ms / 100) 1.260 -> 1.261 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.48% +0.32%] index_select perm_sorted : Elapsed 0.013 ms (1.261 ms / 100) B = [16, 5, 4, 20] (stride (400, 20, 100, 1)) A = [16, 5, 40, 20] (stride (4000, 1, 5, 200)) dim = 2 1.351 -> 1.351 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.52% +0.44%] index_select const : Elapsed 0.014 ms (1.351 ms / 100) 1.348 -> 1.350 ( +0.15%) [ +0.00% +0.07% +0.00% / +0.15% +0.82% +0.67%] index_select wrap : Elapsed 0.013 ms (1.348 ms / 100) 1.345 -> 1.346 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.59% +0.67%] index_select linear : Elapsed 0.013 ms (1.347 ms / 100) 1.345 -> 1.348 ( +0.22%) [ +0.15% +0.00% +0.22% / +0.22% +0.59% +0.82%] index_select reverse : Elapsed 0.013 ms (1.347 ms / 100) 1.345 -> 1.345 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.67% +0.59%] index_select skip64 : Elapsed 0.013 ms (1.346 ms / 100) 1.342 -> 1.346 ( +0.30%) [ +0.00% +0.15% +0.15% / +0.30% +0.82% +0.60%] index_select skip256 : Elapsed 0.013 ms (1.342 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.67% +0.52%] index_select spread : Elapsed 0.013 ms (1.346 ms / 100) 1.346 -> 1.346 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.37% +0.37%] index_select strided 3 : Elapsed 0.013 ms (1.346 ms / 100) 1.340 -> 1.341 ( +0.07%) [ +0.00% +0.22% +0.15% / +0.07% +0.60% +0.60%] index_select strided 5 : Elapsed 0.013 ms (1.340 ms / 100) 1.347 -> 1.346 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.52% +0.37%] index_select strided 7 : Elapsed 0.013 ms (1.347 ms / 100) 1.346 -> 1.345 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.59% +0.67%] index_select strided 8 : Elapsed 0.013 ms (1.347 ms / 100) 1.352 -> 1.353 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.44% +0.37%] index_select strided 16 : Elapsed 0.014 ms (1.353 ms / 100) 1.346 -> 1.349 ( +0.22%) [ +0.15% +0.22% +0.00% / +0.22% +0.45% +0.67%] index_select random : Elapsed 0.013 ms (1.348 ms / 100) 1.349 -> 1.350 ( +0.07%) [ +0.22% +0.00% +0.00% / +0.07% +0.52% +0.44%] index_select random_sorted : Elapsed 0.014 ms (1.352 ms / 100) 1.351 -> 1.352 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.52% +0.52%] index_select perm : Elapsed 0.014 ms (1.353 ms / 100) 1.355 -> 1.353 ( -0.15%) [ +0.15% +0.07% +0.00% / -0.15% +0.37% +0.44%] index_select perm_sorted : Elapsed 0.014 ms (1.357 ms / 100) B = [16, 5, 4, 20] (stride (400, 20, 100, 1)) A = [16, 5, 40, 20] (stride (20, 12800, 320, 1)) dim = 2 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.63%] index_select const : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.63%] index_select wrap : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.63% +0.55%] index_select linear : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.71% +0.79%] index_select reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.16% +0.00% +0.39% / +0.00% +0.79% +0.71%] index_select skip64 : Elapsed 0.013 ms (1.273 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.71% +0.63%] index_select spread : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.275 ( +0.24%) [ +0.08% +0.00% +0.08% / +0.24% +0.63% +0.63%] index_select strided 5 : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.31%] index_select strided 16 : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_select random : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.63% +0.79%] index_select random_sorted : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select perm : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.31% +0.24%] index_select perm_sorted : Elapsed 0.013 ms (1.275 ms / 100) B = [16, 5, 4, 20] (stride (400, 4, 1, 20)) A = [16, 5, 40, 20] (stride (4000, 20, 100, 1)) dim = 2 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.39% +0.31%] index_select const : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select wrap : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.55%] index_select linear : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.16% +0.39% +0.00% / +0.16% +0.55% +0.55%] index_select reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.63% +0.63%] index_select skip64 : Elapsed 0.013 ms (1.274 ms / 100) 1.271 -> 1.273 ( +0.16%) [ +0.39% +0.08% +0.00% / +0.16% +0.71% +0.71%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_select spread : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.31%] index_select strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select strided 8 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.55% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.31%] index_select random : Elapsed 0.013 ms (1.275 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.24%] index_select random_sorted : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.39% +0.39%] index_select perm : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.276 ( +0.24%) [ +0.08% +0.00% +0.08% / +0.24% +0.55% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) B = [16, 5, 4, 20] (stride (80, 1280, 20, 1)) dim = 2 fill_cnt = 40 2.688 -> 2.686 ( -0.07%) [ +0.00% +0.11% +0.15% / -0.07% +0.33% +0.33%] index_fill_ const : Elapsed 0.027 ms (2.688 ms / 100) 2.698 -> 2.692 ( -0.22%) [ +0.00% +0.26% +0.11% / -0.22% -0.19% +0.07%] index_fill_ linear : Elapsed 0.027 ms (2.698 ms / 100) 2.696 -> 2.687 ( -0.33%) [ +0.00% +0.00% +0.07% / -0.33% -0.26% -0.30%] index_fill_ reverse : Elapsed 0.027 ms (2.696 ms / 100) 2.698 -> 2.691 ( -0.26%) [ +0.00% +0.26% +0.07% / -0.26% -0.15% -0.04%] index_fill_ skip64 : Elapsed 0.027 ms (2.698 ms / 100) 2.697 -> 2.688 ( -0.33%) [ +0.11% +0.11% +0.00% / -0.33% -0.30% -0.19%] index_fill_ skip256 : Elapsed 0.027 ms (2.700 ms / 100) 2.695 -> 2.690 ( -0.19%) [ +0.00% +0.04% +0.04% / -0.19% -0.11% +0.07%] index_fill_ spread : Elapsed 0.027 ms (2.695 ms / 100) 2.696 -> 2.694 ( -0.07%) [ +0.11% +0.00% +0.07% / -0.07% +0.04% +0.04%] index_fill_ strided 3 : Elapsed 0.027 ms (2.699 ms / 100) 2.699 -> 2.690 ( -0.33%) [ +0.00% +0.00% +0.04% / -0.33% -0.26% -0.15%] index_fill_ random : Elapsed 0.027 ms (2.699 ms / 100) 2.695 -> 2.690 ( -0.19%) [ +0.26% +0.00% +0.22% / -0.19% +0.04% +0.04%] index_fill_ random_sorted : Elapsed 0.027 ms (2.702 ms / 100) B = [16, 5, 4, 20] (stride (80, 1280, 20, 1)) A = [16, 5, 40, 20] (stride (200, 1, 5, 3200)) dim = 2 1.338 -> 1.339 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.97% +0.75%] index_select const : Elapsed 0.013 ms (1.340 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.45% +0.67%] index_select wrap : Elapsed 0.013 ms (1.346 ms / 100) 1.348 -> 1.353 ( +0.37%) [ +0.00% +0.15% +0.22% / +0.37% +0.82% +0.52%] index_select linear : Elapsed 0.013 ms (1.348 ms / 100) 1.344 -> 1.342 ( -0.15%) [ +0.00% +0.00% +0.37% / -0.15% +0.67% +0.52%] index_select reverse : Elapsed 0.013 ms (1.344 ms / 100) 1.340 -> 1.342 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.52% +0.52%] index_select skip64 : Elapsed 0.013 ms (1.340 ms / 100) 1.345 -> 1.347 ( +0.15%) [ +0.45% +0.89% +0.00% / +0.15% +0.89% +0.37%] index_select skip256 : Elapsed 0.014 ms (1.351 ms / 100) 1.348 -> 1.353 ( +0.37%) [ +0.37% +0.00% +0.37% / +0.37% +0.45% +0.52%] index_select spread : Elapsed 0.014 ms (1.353 ms / 100) 1.354 -> 1.349 ( -0.37%) [ +0.00% +0.15% +0.30% / -0.37% +0.30% +0.22%] index_select strided 3 : Elapsed 0.014 ms (1.354 ms / 100) 1.351 -> 1.351 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.44% +0.37%] index_select strided 5 : Elapsed 0.014 ms (1.353 ms / 100) 1.348 -> 1.351 ( +0.22%) [ +0.30% +0.22% +0.00% / +0.22% +0.74% +0.67%] index_select strided 7 : Elapsed 0.014 ms (1.352 ms / 100) 1.351 -> 1.351 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.52% +0.59%] index_select strided 8 : Elapsed 0.014 ms (1.352 ms / 100) 1.340 -> 1.341 ( +0.07%) [ +0.15% +0.00% +0.15% / +0.07% +0.52% +0.52%] index_select strided 16 : Elapsed 0.013 ms (1.342 ms / 100) 1.347 -> 1.352 ( +0.37%) [ +0.22% +0.30% +0.00% / +0.37% +0.67% +0.82%] index_select random : Elapsed 0.014 ms (1.350 ms / 100) 1.355 -> 1.353 ( -0.15%) [ +0.00% +0.15% +0.07% / -0.15% +0.52% +0.15%] index_select random_sorted : Elapsed 0.014 ms (1.355 ms / 100) 1.342 -> 1.346 ( +0.30%) [ +0.00% +0.45% +0.37% / +0.30% +0.52% +0.52%] index_select perm : Elapsed 0.013 ms (1.342 ms / 100) 1.342 -> 1.345 ( +0.22%) [ +0.30% +0.07% +0.00% / +0.22% +0.97% +0.89%] index_select perm_sorted : Elapsed 0.013 ms (1.346 ms / 100) B = [16, 5, 4, 20] (stride (80, 1280, 1, 4)) A = [16, 5, 40, 20] (stride (100, 20, 1600, 1)) dim = 2 1.191 -> 1.193 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.50% +0.42%] index_select const : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.42% +0.42%] index_select wrap : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.67% +0.50%] index_select linear : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.50% +0.76%] index_select reverse : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.50% +0.50%] index_select skip64 : Elapsed 0.012 ms (1.192 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.67% +0.59%] index_select skip256 : Elapsed 0.012 ms (1.191 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.67% +0.67%] index_select spread : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.50%] index_select strided 3 : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.08% +0.00% +0.00% / +0.17% +0.50% +0.42%] index_select strided 5 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.25% +0.00% +0.00% / +0.08% +0.42% +0.50%] index_select strided 7 : Elapsed 0.012 ms (1.195 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.17% +0.34% +0.00% / +0.08% +0.50% +0.50%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.42% +0.42%] index_select strided 16 : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.42% +0.59%] index_select random : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.34%] index_select random_sorted : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.34% +0.34%] index_select perm : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.50% +0.42%] index_select perm_sorted : Elapsed 0.012 ms (1.192 ms / 100) B = [16, 5, 4, 20] (stride (20, 1280, 320, 1)) A = [16, 5, 40, 20] (stride (4000, 1, 5, 200)) dim = 2 1.339 -> 1.342 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.67% +0.52%] index_select const : Elapsed 0.013 ms (1.339 ms / 100) 1.334 -> 1.334 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.60% +0.45%] index_select wrap : Elapsed 0.013 ms (1.336 ms / 100) 1.336 -> 1.335 ( -0.07%) [ +0.07% +0.15% +0.00% / -0.07% +0.52% +0.45%] index_select linear : Elapsed 0.013 ms (1.337 ms / 100) 1.336 -> 1.337 ( +0.07%) [ +0.15% +0.00% +0.22% / +0.07% +0.75% +0.75%] index_select reverse : Elapsed 0.013 ms (1.338 ms / 100) 1.339 -> 1.343 ( +0.30%) [ +0.30% +0.00% +0.00% / +0.30% +0.60% +0.67%] index_select skip64 : Elapsed 0.013 ms (1.343 ms / 100) 1.345 -> 1.346 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.45% +0.59%] index_select skip256 : Elapsed 0.013 ms (1.345 ms / 100) 1.346 -> 1.347 ( +0.07%) [ +0.22% +0.15% +0.00% / +0.07% +0.45% +0.52%] index_select spread : Elapsed 0.013 ms (1.349 ms / 100) 1.345 -> 1.347 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.67% +0.52%] index_select strided 3 : Elapsed 0.013 ms (1.346 ms / 100) 1.348 -> 1.349 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.67% +0.30%] index_select strided 5 : Elapsed 0.013 ms (1.348 ms / 100) 1.348 -> 1.354 ( +0.45%) [ +0.22% +0.37% +0.00% / +0.45% +0.82% +0.74%] index_select strided 7 : Elapsed 0.014 ms (1.351 ms / 100) 1.345 -> 1.346 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.59% +0.52%] index_select strided 8 : Elapsed 0.013 ms (1.345 ms / 100) 1.350 -> 1.350 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.59% +0.52%] index_select strided 16 : Elapsed 0.014 ms (1.351 ms / 100) 1.343 -> 1.346 ( +0.22%) [ +0.30% +0.22% +0.00% / +0.22% +0.60% +0.37%] index_select random : Elapsed 0.013 ms (1.347 ms / 100) 1.344 -> 1.350 ( +0.45%) [ +0.07% +0.45% +0.00% / +0.45% +0.89% +0.45%] index_select random_sorted : Elapsed 0.013 ms (1.345 ms / 100) 1.345 -> 1.346 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.52% +0.59%] index_select perm : Elapsed 0.013 ms (1.346 ms / 100) 1.336 -> 1.339 ( +0.22%) [ +0.22% +0.00% +0.22% / +0.22% +0.67% +0.60%] index_select perm_sorted : Elapsed 0.013 ms (1.339 ms / 100) B = [16, 5, 4, 20] (stride (1, 1280, 320, 16)) A = [16, 5, 40, 20] (stride (4000, 800, 1, 40)) dim = 2 1.279 -> 1.282 ( +0.23%) [ +0.23% +0.16% +0.00% / +0.23% +0.70% +0.55%] index_select const : Elapsed 0.013 ms (1.282 ms / 100) 1.281 -> 1.283 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.55% +0.55%] index_select wrap : Elapsed 0.013 ms (1.282 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.55% +0.62%] index_select linear : Elapsed 0.013 ms (1.283 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.63% +0.63%] index_select reverse : Elapsed 0.013 ms (1.282 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.23% +0.00% +0.08% / +0.00% +0.55% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.284 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.281 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.78% +0.63%] index_select spread : Elapsed 0.013 ms (1.277 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.282 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.62%] index_select strided 5 : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.63% +0.63%] index_select strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.276 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.63%] index_select strided 16 : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.278 ( -0.16%) [ +0.08% +0.08% +0.00% / -0.16% +0.47% +0.55%] index_select random : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.23% +0.31% +0.00% / +0.08% +0.39% +0.55%] index_select random_sorted : Elapsed 0.013 ms (1.283 ms / 100) 1.286 -> 1.285 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.23% +0.31%] index_select perm : Elapsed 0.013 ms (1.286 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.63% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.281 ms / 100) B = [16, 5, 4, 20] (stride (100, 20, 1600, 1)) A = [16, 5, 40, 20] (stride (800, 12800, 1, 40)) dim = 2 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.63%] index_select const : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.47%] index_select wrap : Elapsed 0.013 ms (1.280 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.16% +0.00% +0.16% / +0.16% +0.39% +0.55%] index_select linear : Elapsed 0.013 ms (1.276 ms / 100) 1.277 -> 1.276 ( -0.08%) [ +0.23% +0.08% +0.00% / -0.08% +0.55% +0.47%] index_select reverse : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.287 ( +0.63%) [ +0.08% +0.08% +0.00% / +0.63% +0.63% +0.63%] index_select skip64 : Elapsed 0.013 ms (1.280 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.47% +0.31%] index_select skip256 : Elapsed 0.013 ms (1.277 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.55% +0.39%] index_select spread : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.47% +0.31%] index_select strided 3 : Elapsed 0.013 ms (1.282 ms / 100) 1.278 -> 1.276 ( -0.16%) [ +0.00% +0.16% +0.16% / -0.16% +0.55% +0.31%] index_select strided 5 : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.71% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.276 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.281 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.47% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.279 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.31% +0.23%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.63% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.280 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.55% +0.47%] index_select perm : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.71% +0.63%] index_select perm_sorted : Elapsed 0.013 ms (1.277 ms / 100) out_shape = [16, 5, 40, 4] in_shape = [16, 5, 40, 20] idx_dim = 3 B = [16, 5, 40, 4] (stride (40, 2560, 1, 640)) A = [16, 5, 40, 20] (stride (20, 12800, 320, 1)) dim = 3 2.103 -> 2.112 ( +0.43%) [ +0.33% +0.00% +0.19% / +0.43% +0.95% +0.76%] index_select const : Elapsed 0.021 ms (2.110 ms / 100) 2.102 -> 2.106 ( +0.19%) [ +0.00% +0.10% +0.29% / +0.19% +1.19% +1.00%] index_select wrap : Elapsed 0.021 ms (2.102 ms / 100) 2.106 -> 2.110 ( +0.19%) [ +0.14% +0.00% +0.14% / +0.19% +1.09% +0.95%] index_select linear : Elapsed 0.021 ms (2.109 ms / 100) 2.105 -> 2.108 ( +0.14%) [ +0.00% +0.33% +0.24% / +0.14% +0.62% +1.09%] index_select reverse : Elapsed 0.021 ms (2.105 ms / 100) 2.104 -> 2.113 ( +0.43%) [ +0.38% +0.00% +0.24% / +0.43% +1.00% +0.95%] index_select skip64 : Elapsed 0.021 ms (2.112 ms / 100) 2.104 -> 2.111 ( +0.33%) [ +0.10% +0.00% +0.19% / +0.33% +0.62% +1.14%] index_select skip256 : Elapsed 0.021 ms (2.106 ms / 100) 2.163 -> 2.172 ( +0.42%) [ +0.28% +0.00% +0.14% / +0.42% +0.88% +0.83%] index_select spread : Elapsed 0.022 ms (2.169 ms / 100) 2.148 -> 2.155 ( +0.33%) [ +0.09% +0.09% +0.00% / +0.33% +0.65% +0.51%] index_select strided 3 : Elapsed 0.022 ms (2.150 ms / 100) 2.167 -> 2.176 ( +0.42%) [ +0.28% +0.00% +0.05% / +0.42% +0.69% +0.55%] index_select strided 5 : Elapsed 0.022 ms (2.173 ms / 100) 2.159 -> 2.160 ( +0.05%) [ +0.09% +0.00% +0.09% / +0.05% +0.93% +0.93%] index_select strided 7 : Elapsed 0.022 ms (2.161 ms / 100) 2.157 -> 2.155 ( -0.09%) [ +0.14% +0.05% +0.00% / -0.09% +0.79% +0.79%] index_select strided 8 : Elapsed 0.022 ms (2.160 ms / 100) 2.166 -> 2.171 ( +0.23%) [ +0.23% +0.00% +0.05% / +0.23% +0.92% +0.78%] index_select strided 16 : Elapsed 0.022 ms (2.171 ms / 100) 2.142 -> 2.147 ( +0.23%) [ +0.14% +0.19% +0.00% / +0.23% +1.21% +1.12%] index_select random : Elapsed 0.021 ms (2.145 ms / 100) 2.145 -> 2.143 ( -0.09%) [ +0.05% +0.00% +0.09% / -0.09% +0.51% +0.79%] index_select random_sorted : Elapsed 0.021 ms (2.146 ms / 100) 2.156 -> 2.160 ( +0.19%) [ +0.00% +0.09% +0.19% / +0.19% +0.79% +0.79%] index_select perm : Elapsed 0.022 ms (2.156 ms / 100) 2.149 -> 2.155 ( +0.28%) [ +0.23% +0.00% +0.23% / +0.28% +1.16% +1.16%] index_select perm_sorted : Elapsed 0.022 ms (2.154 ms / 100) B = [16, 5, 40, 4] (stride (1, 16, 320, 80)) A = [16, 5, 40, 20] (stride (1, 320, 1600, 16)) dim = 3 2.183 -> 2.182 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.50% +0.32%] index_select const : Elapsed 0.022 ms (2.184 ms / 100) 2.183 -> 2.180 ( -0.14%) [ +0.00% +0.18% +0.00% / -0.14% +0.27% +0.32%] index_select wrap : Elapsed 0.022 ms (2.183 ms / 100) 2.184 -> 2.186 ( +0.09%) [ +0.00% +0.05% +0.14% / +0.09% +0.46% +0.37%] index_select linear : Elapsed 0.022 ms (2.184 ms / 100) 2.187 -> 2.186 ( -0.05%) [ +0.14% +0.00% +0.09% / -0.05% +0.64% +0.41%] index_select reverse : Elapsed 0.022 ms (2.190 ms / 100) 2.181 -> 2.183 ( +0.09%) [ +0.14% +0.14% +0.00% / +0.09% +0.46% +0.50%] index_select skip64 : Elapsed 0.022 ms (2.184 ms / 100) 2.183 -> 2.182 ( -0.05%) [ +0.00% +0.23% +0.14% / -0.05% +0.37% +0.27%] index_select skip256 : Elapsed 0.022 ms (2.183 ms / 100) 2.172 -> 2.177 ( +0.23%) [ +0.18% +0.23% +0.00% / +0.23% +0.46% +0.41%] index_select spread : Elapsed 0.022 ms (2.176 ms / 100) 2.172 -> 2.176 ( +0.18%) [ +0.09% +0.05% +0.00% / +0.18% +0.32% +0.37%] index_select strided 3 : Elapsed 0.022 ms (2.174 ms / 100) 2.177 -> 2.177 ( +0.00%) [ +0.18% +0.05% +0.00% / +0.00% +0.23% +0.14%] index_select strided 5 : Elapsed 0.022 ms (2.181 ms / 100) 2.184 -> 2.187 ( +0.14%) [ +0.00% +0.09% +0.14% / +0.14% +0.32% +0.37%] index_select strided 7 : Elapsed 0.022 ms (2.184 ms / 100) 2.166 -> 2.168 ( +0.09%) [ +0.00% +0.14% +0.00% / +0.09% +0.42% +0.32%] index_select strided 8 : Elapsed 0.022 ms (2.166 ms / 100) 2.184 -> 2.185 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.50% +0.41%] index_select strided 16 : Elapsed 0.022 ms (2.184 ms / 100) 2.162 -> 2.166 ( +0.19%) [ +0.00% +0.19% +0.09% / +0.19% +0.42% +0.37%] index_select random : Elapsed 0.022 ms (2.162 ms / 100) 2.168 -> 2.172 ( +0.18%) [ +0.09% +0.00% +0.09% / +0.18% +0.46% +0.46%] index_select random_sorted : Elapsed 0.022 ms (2.170 ms / 100) 2.176 -> 2.179 ( +0.14%) [ +0.46% +0.00% +0.14% / +0.14% +0.28% +0.37%] index_select perm : Elapsed 0.022 ms (2.186 ms / 100) 2.174 -> 2.177 ( +0.14%) [ +0.00% +0.05% +0.09% / +0.14% +0.23% +0.37%] index_select perm_sorted : Elapsed 0.022 ms (2.174 ms / 100) B = [16, 5, 40, 4] (stride (200, 40, 1, 3200)) A = [16, 5, 40, 20] (stride (100, 20, 1600, 1)) dim = 3 1.875 -> 1.872 ( -0.16%) [ +0.21% +0.00% +0.16% / -0.11% -0.11% -0.16%] index_select const : Elapsed 0.019 ms (1.879 ms / 100) 1.869 -> 1.874 ( +0.27%) [ +0.54% +0.00% +0.21% / +0.27% +0.70% +0.54%] index_select wrap : Elapsed 0.019 ms (1.879 ms / 100) 1.876 -> 1.876 ( +0.00%) [ +0.21% +0.11% +0.00% / +0.00% +0.00% +0.00%] index_select linear : Elapsed 0.019 ms (1.880 ms / 100) 1.872 -> 1.874 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.11% +0.11% +0.37%] index_select reverse : Elapsed 0.019 ms (1.874 ms / 100) 1.872 -> 1.874 ( +0.11%) [ +0.21% +0.27% +0.00% / +0.37% +0.48% +0.11%] index_select skip64 : Elapsed 0.019 ms (1.876 ms / 100) 1.877 -> 1.871 ( -0.32%) [ +0.11% +0.00% +0.05% / -0.32% +0.05% -0.11%] index_select skip256 : Elapsed 0.019 ms (1.879 ms / 100) 1.942 -> 1.942 ( +0.00%) [ +0.00% +0.31% +0.05% / +0.00% +0.57% +0.77%] index_select spread : Elapsed 0.019 ms (1.942 ms / 100) 1.909 -> 1.910 ( +0.05%) [ +0.21% +0.00% +0.26% / +0.05% +0.37% +0.47%] index_select strided 3 : Elapsed 0.019 ms (1.913 ms / 100) 1.939 -> 1.945 ( +0.31%) [ +0.26% +0.00% +0.26% / +0.31% +0.93% +1.08%] index_select strided 5 : Elapsed 0.019 ms (1.944 ms / 100) 1.921 -> 1.921 ( +0.00%) [ +0.31% +0.21% +0.00% / +0.00% +0.78% +0.73%] index_select strided 7 : Elapsed 0.019 ms (1.927 ms / 100) 1.923 -> 1.928 ( +0.26%) [ +0.31% +0.00% +0.10% / +0.26% +0.42% +0.62%] index_select strided 8 : Elapsed 0.019 ms (1.929 ms / 100) 1.944 -> 1.947 ( +0.15%) [ +0.00% +0.21% +0.15% / +0.15% +0.46% +0.62%] index_select strided 16 : Elapsed 0.019 ms (1.944 ms / 100) 1.927 -> 1.932 ( +0.26%) [ +0.26% +0.00% +0.36% / +0.26% +0.26% +0.26%] index_select random : Elapsed 0.019 ms (1.932 ms / 100) 1.937 -> 1.947 ( +0.52%) [ +0.57% +0.00% +0.57% / +0.52% +0.62% +0.77%] index_select random_sorted : Elapsed 0.019 ms (1.948 ms / 100) 1.927 -> 1.933 ( +0.31%) [ +0.21% +0.00% +0.05% / +0.47% +0.31% +0.62%] index_select perm : Elapsed 0.019 ms (1.931 ms / 100) 1.939 -> 1.941 ( +0.10%) [ +0.26% +0.00% +0.10% / +0.10% +0.57% +0.36%] index_select perm_sorted : Elapsed 0.019 ms (1.944 ms / 100) B = [16, 5, 40, 4] (stride (1, 640, 16, 3200)) A = [16, 5, 40, 20] (stride (20, 12800, 320, 1)) dim = 3 0.819 -> 0.820 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +2.44% +2.20%] index_select const : Elapsed 0.008 ms (0.820 ms / 100) 0.821 -> 0.826 ( +0.61%) [ +1.22% +0.85% +0.00% / +0.61% +1.22% +1.58%] index_select wrap : Elapsed 0.008 ms (0.831 ms / 100) 0.827 -> 0.826 ( -0.12%) [ +0.00% +0.12% +0.24% / -0.12% +0.85% +0.60%] index_select linear : Elapsed 0.008 ms (0.827 ms / 100) 0.825 -> 0.823 ( -0.24%) [ +0.12% +0.00% +0.00% / -0.24% +1.09% +1.33%] index_select reverse : Elapsed 0.008 ms (0.826 ms / 100) 0.824 -> 0.827 ( +0.36%) [ +0.00% +0.12% +0.12% / +0.36% +1.33% +1.21%] index_select skip64 : Elapsed 0.008 ms (0.824 ms / 100) 0.818 -> 0.819 ( +0.12%) [ +0.49% +0.37% +0.00% / +0.12% +2.69% +3.06%] index_select skip256 : Elapsed 0.008 ms (0.822 ms / 100) 0.837 -> 0.839 ( +0.24%) [ +0.00% +0.36% +0.12% / +0.24% +3.35% +3.70%] index_select spread : Elapsed 0.008 ms (0.837 ms / 100) 0.831 -> 0.833 ( +0.24%) [ +0.24% +0.00% +0.00% / +0.24% +3.37% +2.41%] index_select strided 3 : Elapsed 0.008 ms (0.833 ms / 100) 0.838 -> 0.843 ( +0.60%) [ +0.12% +0.00% +0.12% / +0.60% +3.10% +2.86%] index_select strided 5 : Elapsed 0.008 ms (0.839 ms / 100) 0.837 -> 0.842 ( +0.60%) [ +0.00% +0.00% +0.00% / +0.60% +2.03% +2.03%] index_select strided 7 : Elapsed 0.008 ms (0.837 ms / 100) 0.834 -> 0.834 ( +0.00%) [ +0.00% +0.00% +0.36% / +0.00% +2.64% +2.04%] index_select strided 8 : Elapsed 0.008 ms (0.834 ms / 100) 0.831 -> 0.834 ( +0.36%) [ +0.84% +0.00% +0.72% / +0.36% +3.13% +3.25%] index_select strided 16 : Elapsed 0.008 ms (0.838 ms / 100) 0.841 -> 0.840 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +1.43% +1.31%] index_select random : Elapsed 0.008 ms (0.841 ms / 100) 0.836 -> 0.838 ( +0.24%) [ +0.00% +0.36% +0.12% / +0.24% +2.99% +2.99%] index_select random_sorted : Elapsed 0.008 ms (0.836 ms / 100) 0.836 -> 0.834 ( -0.24%) [ +0.00% +0.00% +0.12% / -0.24% +1.91% +2.03%] index_select perm : Elapsed 0.008 ms (0.836 ms / 100) 0.834 -> 0.834 ( +0.00%) [ +0.00% +0.24% +0.00% / +0.00% +2.40% +2.52%] index_select perm_sorted : Elapsed 0.008 ms (0.834 ms / 100) out_shape = [4, 20, 5, 40] in_shape = [16, 20, 5, 40] idx_dim = 0 B = [4, 20, 5, 40] (stride (4000, 1, 20, 100)) A = [16, 20, 5, 40] (stride (4000, 1, 800, 20)) dim = 0 2.373 -> 2.375 ( +0.08%) [ +0.13% +0.00% +0.17% / +0.08% +0.13% +0.17%] index_select const : Elapsed 0.024 ms (2.376 ms / 100) 2.369 -> 2.378 ( +0.38%) [ +0.04% +0.00% +0.08% / +0.38% +0.38% +0.42%] index_select wrap : Elapsed 0.024 ms (2.370 ms / 100) 2.370 -> 2.373 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.34% +0.17%] index_select linear : Elapsed 0.024 ms (2.370 ms / 100) 2.386 -> 2.388 ( +0.08%) [ +0.00% +0.13% +0.04% / +0.08% +0.38% +0.59%] index_select reverse : Elapsed 0.024 ms (2.386 ms / 100) 2.384 -> 2.384 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.00% +0.13% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.385 ms / 100) 2.369 -> 2.372 ( +0.13%) [ +0.00% +0.17% +0.08% / +0.38% +0.13% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.369 ms / 100) 2.365 -> 2.369 ( +0.17%) [ +0.25% +0.34% +0.00% / +0.25% +0.34% +0.17%] index_select spread : Elapsed 0.024 ms (2.371 ms / 100) 2.373 -> 2.372 ( -0.04%) [ +0.00% +0.08% +0.04% / +0.17% -0.04% +0.13%] index_select strided 3 : Elapsed 0.024 ms (2.373 ms / 100) 2.371 -> 2.372 ( +0.04%) [ +0.13% +0.00% +0.17% / +0.04% +0.21% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.374 ms / 100) 2.383 -> 2.384 ( +0.04%) [ +0.21% +0.04% +0.00% / +0.04% +0.17% +0.29%] index_select strided 7 : Elapsed 0.024 ms (2.388 ms / 100) 2.368 -> 2.369 ( +0.04%) [ +0.13% +0.00% +0.21% / +0.04% +0.21% +0.13%] index_select strided 8 : Elapsed 0.024 ms (2.371 ms / 100) 2.367 -> 2.370 ( +0.13%) [ +0.00% +0.17% +0.25% / +0.17% +0.46% +0.13%] index_select random : Elapsed 0.024 ms (2.367 ms / 100) 2.374 -> 2.369 ( -0.21%) [ +0.04% +0.00% +0.13% / -0.04% +0.17% -0.21%] index_select random_sorted : Elapsed 0.024 ms (2.375 ms / 100) 2.372 -> 2.372 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.08% +0.00% +0.04%] index_select perm : Elapsed 0.024 ms (2.375 ms / 100) 2.373 -> 2.372 ( -0.04%) [ +0.04% +0.13% +0.00% / -0.04% +0.04% +0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.374 ms / 100) B = [4, 20, 5, 40] (stride (1, 800, 4, 20)) A = [16, 20, 5, 40] (stride (1, 16, 12800, 320)) dim = 0 2.501 -> 2.504 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.36% +0.16%] index_select const : Elapsed 0.025 ms (2.501 ms / 100) 2.504 -> 2.505 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.32% +0.20%] index_select wrap : Elapsed 0.025 ms (2.506 ms / 100) 2.501 -> 2.502 ( +0.04%) [ +0.20% +0.00% +0.12% / +0.20% +0.08% +0.04%] index_select linear : Elapsed 0.025 ms (2.506 ms / 100) 2.499 -> 2.505 ( +0.24%) [ +0.08% +0.00% +0.12% / +0.24% +0.36% +0.32%] index_select reverse : Elapsed 0.025 ms (2.501 ms / 100) 2.502 -> 2.505 ( +0.12%) [ +0.12% +0.00% +0.16% / +0.12% +0.48% +0.28%] index_select skip64 : Elapsed 0.025 ms (2.505 ms / 100) 2.503 -> 2.503 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.12% +0.20%] index_select skip256 : Elapsed 0.025 ms (2.504 ms / 100) 2.526 -> 2.527 ( +0.04%) [ +0.16% +0.20% +0.00% / +0.04% +0.48% +0.40%] index_select spread : Elapsed 0.025 ms (2.530 ms / 100) 2.528 -> 2.526 ( -0.08%) [ +0.00% +0.24% +0.08% / -0.08% +0.20% +0.36%] index_select strided 3 : Elapsed 0.025 ms (2.528 ms / 100) 2.527 -> 2.527 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.24% +0.36%] index_select strided 5 : Elapsed 0.025 ms (2.530 ms / 100) 2.525 -> 2.533 ( +0.32%) [ +0.32% +0.00% +0.04% / +0.32% +0.63% +0.36%] index_select strided 7 : Elapsed 0.025 ms (2.533 ms / 100) 2.537 -> 2.540 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.12% +0.39% +0.32%] index_select strided 8 : Elapsed 0.025 ms (2.539 ms / 100) 2.537 -> 2.542 ( +0.20%) [ +0.00% +0.00% +0.00% / +0.20% +0.32% +0.43%] index_select random : Elapsed 0.025 ms (2.537 ms / 100) 2.528 -> 2.526 ( -0.08%) [ +0.12% +0.00% +0.20% / -0.08% +0.40% +0.16%] index_select random_sorted : Elapsed 0.025 ms (2.531 ms / 100) 2.527 -> 2.527 ( +0.00%) [ +0.00% +0.16% +0.04% / +0.00% +0.28% +0.40%] index_select perm : Elapsed 0.025 ms (2.527 ms / 100) 2.536 -> 2.540 ( +0.16%) [ +0.08% +0.00% +0.04% / +0.16% +0.32% +0.28%] index_select perm_sorted : Elapsed 0.025 ms (2.538 ms / 100) B = [4, 20, 5, 40] (stride (800, 40, 3200, 1)) A = [16, 20, 5, 40] (stride (4000, 200, 1, 5)) dim = 0 2.386 -> 2.385 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.25% +0.34%] index_select const : Elapsed 0.024 ms (2.388 ms / 100) 2.394 -> 2.397 ( +0.13%) [ +0.21% +0.21% +0.00% / +0.13% +0.46% +0.38%] index_select wrap : Elapsed 0.024 ms (2.399 ms / 100) 2.392 -> 2.393 ( +0.04%) [ +0.13% +0.08% +0.00% / +0.04% +0.42% +0.38%] index_select linear : Elapsed 0.024 ms (2.395 ms / 100) 2.391 -> 2.394 ( +0.13%) [ +0.00% +0.17% +0.08% / +0.13% +0.67% +0.63%] index_select reverse : Elapsed 0.024 ms (2.391 ms / 100) 2.386 -> 2.388 ( +0.08%) [ +0.13% +0.00% +0.04% / +0.08% +0.38% +0.54%] index_select skip64 : Elapsed 0.024 ms (2.389 ms / 100) 2.382 -> 2.385 ( +0.13%) [ +0.29% +0.17% +0.00% / +0.13% +0.63% +0.46%] index_select skip256 : Elapsed 0.024 ms (2.389 ms / 100) 2.392 -> 2.394 ( +0.08%) [ +0.04% +0.00% +0.13% / +0.08% +0.54% +0.42%] index_select spread : Elapsed 0.024 ms (2.393 ms / 100) 2.397 -> 2.402 ( +0.21%) [ +0.00% +0.00% +0.04% / +0.21% +0.42% +0.42%] index_select strided 3 : Elapsed 0.024 ms (2.397 ms / 100) 2.391 -> 2.393 ( +0.08%) [ +0.08% +0.21% +0.00% / +0.08% +0.50% +0.38%] index_select strided 5 : Elapsed 0.024 ms (2.393 ms / 100) 2.397 -> 2.401 ( +0.17%) [ +0.13% +0.00% +0.13% / +0.17% +0.38% +0.58%] index_select strided 7 : Elapsed 0.024 ms (2.400 ms / 100) 2.384 -> 2.388 ( +0.17%) [ +0.00% +0.04% +0.13% / +0.17% +0.46% +0.46%] index_select strided 8 : Elapsed 0.024 ms (2.384 ms / 100) 2.388 -> 2.390 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.50% +0.25%] index_select random : Elapsed 0.024 ms (2.392 ms / 100) 2.389 -> 2.388 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.33% +0.17%] index_select random_sorted : Elapsed 0.024 ms (2.389 ms / 100) 2.396 -> 2.396 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.33% +0.33%] index_select perm : Elapsed 0.024 ms (2.398 ms / 100) 2.394 -> 2.397 ( +0.13%) [ +0.08% +0.00% +0.04% / +0.13% +0.38% +0.38%] index_select perm_sorted : Elapsed 0.024 ms (2.396 ms / 100) out_shape = [16, 4, 5, 40] in_shape = [16, 20, 5, 40] idx_dim = 1 B = [16, 4, 5, 40] (stride (800, 1, 160, 4)) A = [16, 20, 5, 40] (stride (20, 1, 320, 1600)) dim = 1 2.092 -> 2.094 ( +0.10%) [ +0.14% +0.00% +0.19% / +0.10% +0.43% +0.72%] index_select const : Elapsed 0.021 ms (2.095 ms / 100) 2.094 -> 2.093 ( -0.05%) [ +0.00% +0.14% +0.24% / -0.05% +0.96% +0.86%] index_select wrap : Elapsed 0.021 ms (2.094 ms / 100) 2.096 -> 2.093 ( -0.14%) [ +0.14% +0.14% +0.00% / -0.14% +0.72% +0.72%] index_select linear : Elapsed 0.021 ms (2.099 ms / 100) 2.093 -> 2.095 ( +0.10%) [ +0.19% +0.14% +0.00% / +0.10% +0.72% +0.62%] index_select reverse : Elapsed 0.021 ms (2.097 ms / 100) 2.095 -> 2.102 ( +0.33%) [ +0.14% +0.00% +0.05% / +0.33% +0.95% +0.91%] index_select skip64 : Elapsed 0.021 ms (2.098 ms / 100) 2.093 -> 2.096 ( +0.14%) [ +0.24% +0.00% +0.00% / +0.14% +0.53% +0.67%] index_select skip256 : Elapsed 0.021 ms (2.098 ms / 100) 2.156 -> 2.158 ( +0.09%) [ +0.05% +0.00% +0.09% / +0.09% +0.83% +0.83%] index_select spread : Elapsed 0.022 ms (2.157 ms / 100) 2.129 -> 2.135 ( +0.28%) [ +0.19% +0.14% +0.00% / +0.28% +0.70% +0.80%] index_select strided 3 : Elapsed 0.021 ms (2.133 ms / 100) 2.155 -> 2.162 ( +0.32%) [ +0.32% +0.09% +0.00% / +0.32% +0.74% +0.60%] index_select strided 5 : Elapsed 0.022 ms (2.162 ms / 100) 2.149 -> 2.154 ( +0.23%) [ +0.05% +0.14% +0.00% / +0.23% +0.51% +0.74%] index_select strided 7 : Elapsed 0.021 ms (2.150 ms / 100) 2.146 -> 2.146 ( +0.00%) [ +0.09% +0.00% +0.14% / +0.00% +0.56% +0.75%] index_select strided 8 : Elapsed 0.021 ms (2.148 ms / 100) 2.155 -> 2.159 ( +0.19%) [ +0.09% +0.23% +0.00% / +0.19% +0.74% +0.84%] index_select strided 16 : Elapsed 0.022 ms (2.157 ms / 100) 2.137 -> 2.138 ( +0.05%) [ +0.09% +0.05% +0.00% / +0.05% +0.80% +0.80%] index_select random : Elapsed 0.021 ms (2.139 ms / 100) 2.136 -> 2.138 ( +0.09%) [ +0.19% +0.00% +0.09% / +0.09% +0.70% +0.61%] index_select random_sorted : Elapsed 0.021 ms (2.140 ms / 100) 2.131 -> 2.135 ( +0.19%) [ +0.00% +0.09% +0.14% / +0.19% +0.94% +0.80%] index_select perm : Elapsed 0.021 ms (2.131 ms / 100) 2.121 -> 2.126 ( +0.24%) [ +0.05% +0.28% +0.00% / +0.24% +0.80% +0.71%] index_select perm_sorted : Elapsed 0.021 ms (2.122 ms / 100) B = [16, 4, 5, 40] (stride (160, 40, 2560, 1)) A = [16, 20, 5, 40] (stride (100, 1, 20, 1600)) dim = 1 2.228 -> 2.225 ( -0.13%) [ +0.09% +0.04% +0.00% / -0.04% -0.09% -0.13%] index_select const : Elapsed 0.022 ms (2.230 ms / 100) 2.226 -> 2.230 ( +0.18%) [ +0.00% +0.00% +0.22% / +0.31% +0.18% +0.36%] index_select wrap : Elapsed 0.022 ms (2.226 ms / 100) 2.228 -> 2.225 ( -0.13%) [ +0.00% +0.04% +0.09% / +0.09% +0.04% -0.13%] index_select linear : Elapsed 0.022 ms (2.228 ms / 100) 2.229 -> 2.229 ( +0.00%) [ +0.00% +0.22% +0.00% / +0.36% +0.00% +0.22%] index_select reverse : Elapsed 0.022 ms (2.229 ms / 100) 2.226 -> 2.228 ( +0.09%) [ +0.18% +0.00% +0.09% / +0.09% +0.09% +0.13%] index_select skip64 : Elapsed 0.022 ms (2.230 ms / 100) 2.229 -> 2.231 ( +0.09%) [ +0.00% +0.22% +0.09% / +0.27% +0.09% +0.18%] index_select skip256 : Elapsed 0.022 ms (2.229 ms / 100) 2.291 -> 2.287 ( -0.17%) [ +0.44% +0.00% +0.09% / +0.44% -0.17% +0.04%] index_select spread : Elapsed 0.023 ms (2.301 ms / 100) 2.271 -> 2.266 ( -0.22%) [ +0.09% +0.26% +0.00% / +0.04% -0.22% -0.22%] index_select strided 3 : Elapsed 0.023 ms (2.273 ms / 100) 2.298 -> 2.290 ( -0.35%) [ +0.17% +0.00% +0.00% / +0.26% -0.30% -0.35%] index_select strided 5 : Elapsed 0.023 ms (2.302 ms / 100) 2.280 -> 2.274 ( -0.26%) [ +0.26% +0.00% +0.00% / +0.26% -0.26% -0.09%] index_select strided 7 : Elapsed 0.023 ms (2.286 ms / 100) 2.280 -> 2.275 ( -0.22%) [ +0.09% +0.00% +0.13% / +0.00% -0.22% -0.22%] index_select strided 8 : Elapsed 0.023 ms (2.282 ms / 100) 2.294 -> 2.292 ( -0.09%) [ +0.09% +0.09% +0.00% / +0.09% -0.09% +0.13%] index_select strided 16 : Elapsed 0.023 ms (2.296 ms / 100) 2.275 -> 2.274 ( -0.04%) [ +0.22% +0.31% +0.00% / +0.13% -0.04% +0.18%] index_select random : Elapsed 0.023 ms (2.280 ms / 100) 2.275 -> 2.274 ( -0.04%) [ +0.13% +0.13% +0.00% / +0.35% -0.04% +0.18%] index_select random_sorted : Elapsed 0.023 ms (2.278 ms / 100) 2.296 -> 2.287 ( -0.39%) [ +0.09% +0.30% +0.00% / +0.09% -0.39% -0.26%] index_select perm : Elapsed 0.023 ms (2.298 ms / 100) 2.290 -> 2.289 ( -0.04%) [ +0.17% +0.52% +0.00% / +0.31% -0.04% +0.00%] index_select perm_sorted : Elapsed 0.023 ms (2.294 ms / 100) B = [16, 4, 5, 40] (stride (5, 80, 1, 320)) A = [16, 20, 5, 40] (stride (1, 16, 320, 1600)) dim = 1 2.186 -> 2.186 ( +0.00%) [ +0.05% +0.18% +0.00% / +0.05% +0.05% +0.00%] index_select const : Elapsed 0.022 ms (2.187 ms / 100) 2.187 -> 2.185 ( -0.09%) [ +0.18% +0.05% +0.00% / -0.09% +0.09% -0.05%] index_select wrap : Elapsed 0.022 ms (2.191 ms / 100) 2.183 -> 2.184 ( +0.05%) [ +0.18% +0.00% +0.14% / +0.18% +0.14% +0.05%] index_select linear : Elapsed 0.022 ms (2.187 ms / 100) 2.182 -> 2.186 ( +0.18%) [ +0.00% +0.14% +0.00% / +0.18% +0.32% +0.18%] index_select reverse : Elapsed 0.022 ms (2.182 ms / 100) 2.183 -> 2.185 ( +0.09%) [ +0.32% +0.37% +0.00% / +0.32% +0.27% +0.09%] index_select skip64 : Elapsed 0.022 ms (2.190 ms / 100) 2.187 -> 2.187 ( +0.00%) [ +0.09% +0.18% +0.00% / +0.18% +0.00% +0.05%] index_select skip256 : Elapsed 0.022 ms (2.189 ms / 100) 2.182 -> 2.185 ( +0.14%) [ +0.00% +0.05% +0.09% / +0.23% +0.14% +0.18%] index_select spread : Elapsed 0.022 ms (2.182 ms / 100) 2.179 -> 2.177 ( -0.09%) [ +0.14% +0.09% +0.00% / +0.09% -0.09% +0.05%] index_select strided 3 : Elapsed 0.022 ms (2.182 ms / 100) 2.182 -> 2.183 ( +0.05%) [ +0.00% +0.18% +0.23% / +0.05% +0.23% +0.18%] index_select strided 5 : Elapsed 0.022 ms (2.182 ms / 100) 2.163 -> 2.162 ( -0.05%) [ +0.00% +0.42% +0.14% / +0.32% +0.09% -0.05%] index_select strided 7 : Elapsed 0.022 ms (2.163 ms / 100) 2.180 -> 2.183 ( +0.14%) [ +0.00% +0.18% +0.18% / +0.28% +0.14% +0.18%] index_select strided 8 : Elapsed 0.022 ms (2.180 ms / 100) 2.180 -> 2.177 ( -0.14%) [ +0.09% +0.00% +0.00% / -0.05% -0.14% -0.09%] index_select strided 16 : Elapsed 0.022 ms (2.182 ms / 100) 2.181 -> 2.181 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.09% +0.05% +0.00%] index_select random : Elapsed 0.022 ms (2.184 ms / 100) 2.181 -> 2.180 ( -0.05%) [ +0.23% +0.09% +0.00% / +0.05% +0.05% -0.05%] index_select random_sorted : Elapsed 0.022 ms (2.186 ms / 100) 2.172 -> 2.174 ( +0.09%) [ +0.05% +0.28% +0.00% / +0.23% +0.09% +0.18%] index_select perm : Elapsed 0.022 ms (2.173 ms / 100) 2.185 -> 2.185 ( +0.00%) [ +0.18% +0.00% +0.09% / +0.05% +0.00% +0.00%] index_select perm_sorted : Elapsed 0.022 ms (2.189 ms / 100) out_shape = [16, 20, 4, 40] in_shape = [16, 20, 5, 40] idx_dim = 2 B = [16, 20, 4, 40] (stride (160, 2560, 1, 4)) A = [16, 20, 5, 40] (stride (40, 640, 12800, 1)) dim = 2 5.769 -> 5.769 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.00% +0.24% +0.16%] index_select const : Elapsed 0.058 ms (5.769 ms / 100) 5.872 -> 5.871 ( -0.02%) [ +0.09% +0.17% +0.00% / +0.05% -0.02% +0.03%] index_select wrap : Elapsed 0.059 ms (5.877 ms / 100) 5.857 -> 5.859 ( +0.03%) [ +0.19% +0.10% +0.00% / +0.17% +0.03% +0.05%] index_select linear : Elapsed 0.059 ms (5.868 ms / 100) 5.864 -> 5.864 ( +0.00%) [ +0.02% +0.07% +0.00% / +0.09% +0.05% +0.00%] index_select reverse : Elapsed 0.059 ms (5.865 ms / 100) 5.746 -> 5.752 ( +0.10%) [ +0.00% +0.07% +0.07% / +0.10% +0.16% +0.17%] index_select skip64 : Elapsed 0.057 ms (5.746 ms / 100) 5.749 -> 5.757 ( +0.14%) [ +0.05% +0.00% +0.03% / +0.14% +0.23% +0.35%] index_select skip256 : Elapsed 0.058 ms (5.752 ms / 100) 5.877 -> 5.866 ( -0.19%) [ +0.05% +0.03% +0.00% / +0.09% -0.07% -0.19%] index_select spread : Elapsed 0.059 ms (5.880 ms / 100) 5.853 -> 5.852 ( -0.02%) [ +0.19% +0.14% +0.00% / +0.27% +0.03% -0.02%] index_select strided 3 : Elapsed 0.059 ms (5.864 ms / 100) 5.847 -> 5.848 ( +0.02%) [ +0.14% +0.02% +0.00% / +0.17% +0.02% +0.07%] index_select random : Elapsed 0.059 ms (5.855 ms / 100) 5.858 -> 5.849 ( -0.15%) [ +0.07% +0.07% +0.00% / +0.07% -0.15% -0.03%] index_select random_sorted : Elapsed 0.059 ms (5.862 ms / 100) 5.882 -> 5.872 ( -0.17%) [ +0.02% +0.00% +0.03% / +0.05% -0.14% -0.17%] index_select perm : Elapsed 0.059 ms (5.883 ms / 100) 5.874 -> 5.848 ( -0.44%) [ +0.02% +0.00% +0.03% / +0.07% -0.44% -0.19%] index_select perm_sorted : Elapsed 0.059 ms (5.875 ms / 100) B = [16, 20, 4, 40] (stride (80, 4, 1, 1280)) A = [16, 20, 5, 40] (stride (40, 3200, 640, 1)) dim = 2 5.503 -> 5.511 ( +0.15%) [ +0.00% +0.02% +0.07% / +0.15% +0.24% +0.35%] index_select const : Elapsed 0.055 ms (5.503 ms / 100) 5.582 -> 5.595 ( +0.23%) [ +0.25% +0.00% +0.29% / +0.23% +0.39% +0.30%] index_select wrap : Elapsed 0.056 ms (5.596 ms / 100) 5.589 -> 5.592 ( +0.05%) [ +0.04% +0.02% +0.00% / +0.11% +0.09% +0.05%] index_select linear : Elapsed 0.056 ms (5.591 ms / 100) 5.592 -> 5.594 ( +0.04%) [ +0.05% +0.00% +0.21% / +0.04% +0.09% +0.04%] index_select reverse : Elapsed 0.056 ms (5.595 ms / 100) 5.503 -> 5.515 ( +0.22%) [ +0.05% +0.13% +0.00% / +0.24% +0.22% +0.24%] index_select skip64 : Elapsed 0.055 ms (5.506 ms / 100) 5.504 -> 5.509 ( +0.09%) [ +0.00% +0.00% +0.11% / +0.09% +0.40% +0.47%] index_select skip256 : Elapsed 0.055 ms (5.504 ms / 100) 5.592 -> 5.592 ( +0.00%) [ +0.13% +0.00% +0.13% / +0.13% +0.00% +0.07%] index_select spread : Elapsed 0.056 ms (5.599 ms / 100) 5.579 -> 5.584 ( +0.09%) [ +0.00% +0.14% +0.05% / +0.09% +0.13% +0.25%] index_select strided 3 : Elapsed 0.056 ms (5.579 ms / 100) 5.542 -> 5.550 ( +0.14%) [ +0.00% +0.09% +0.04% / +0.14% +0.16% +0.29%] index_select random : Elapsed 0.055 ms (5.542 ms / 100) 5.554 -> 5.556 ( +0.04%) [ +0.05% +0.00% +0.09% / +0.04% +0.16% +0.22%] index_select random_sorted : Elapsed 0.056 ms (5.557 ms / 100) 5.599 -> 5.600 ( +0.02%) [ +0.07% +0.00% +0.09% / +0.09% +0.14% +0.02%] index_select perm : Elapsed 0.056 ms (5.603 ms / 100) 5.584 -> 5.580 ( -0.07%) [ +0.00% +0.13% +0.18% / +0.07% +0.25% -0.07%] index_select perm_sorted : Elapsed 0.056 ms (5.584 ms / 100) out_shape = [16, 20, 5, 4] in_shape = [16, 20, 5, 40] idx_dim = 3 B = [16, 20, 5, 4] (stride (400, 20, 4, 1)) dim = 3 fill_cnt = 40 BAD 0.985 -> 1.196 (+21.42%) [ +0.71% +0.00% +0.10% / +21.73% +21.42% +21.73%] index_fill_ const : Elapsed 0.010 ms (0.992 ms / 100) BAD 0.974 -> 1.171 (+20.23%) [ +0.51% +0.21% +0.00% / +22.18% +20.23% +20.64%] index_fill_ linear : Elapsed 0.010 ms (0.979 ms / 100) BAD 0.975 -> 1.172 (+20.21%) [ +0.21% +0.00% +0.00% / +22.46% +20.31% +20.21%] index_fill_ reverse : Elapsed 0.010 ms (0.977 ms / 100) Bad 0.980 -> 1.174 (+19.80%) [ +0.00% +0.10% +0.20% / +19.80% +20.41% +20.31%] index_fill_ skip64 : Elapsed 0.010 ms (0.980 ms / 100) Bad 0.982 -> 1.177 (+19.86%) [ +0.10% +0.00% +0.10% / +19.86% +20.67% +19.86%] index_fill_ skip256 : Elapsed 0.010 ms (0.983 ms / 100) Bad 0.975 -> 1.148 (+17.74%) [ +0.00% +0.31% +0.10% / +17.74% +19.28% +18.87%] index_fill_ spread : Elapsed 0.010 ms (0.975 ms / 100) BAD 0.973 -> 1.169 (+20.14%) [ +0.31% +0.10% +0.00% / +20.14% +21.38% +21.38%] index_fill_ strided 3 : Elapsed 0.010 ms (0.976 ms / 100) BAD 0.973 -> 1.175 (+20.76%) [ +0.21% +0.00% +0.21% / +20.76% +20.76% +20.86%] index_fill_ random : Elapsed 0.010 ms (0.975 ms / 100) BAD 0.970 -> 1.172 (+20.82%) [ +0.52% +0.00% +0.82% / +20.82% +21.34% +21.13%] index_fill_ random_sorted : Elapsed 0.010 ms (0.975 ms / 100) B = [16, 20, 5, 4] (stride (400, 1, 80, 20)) A = [16, 20, 5, 40] (stride (1, 3200, 16, 80)) dim = 3 1.322 -> 1.324 ( +0.15%) [ +0.23% +0.15% +0.00% / +0.15% +0.68% +0.53%] index_select const : Elapsed 0.013 ms (1.325 ms / 100) 1.310 -> 1.312 ( +0.15%) [ +0.23% +0.00% +0.15% / +0.15% +0.69% +0.69%] index_select wrap : Elapsed 0.013 ms (1.313 ms / 100) 1.305 -> 1.308 ( +0.23%) [ +0.31% +0.15% +0.00% / +0.23% +1.07% +1.07%] index_select linear : Elapsed 0.013 ms (1.309 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.15% +0.15% +0.00% / +0.08% +0.68% +0.61%] index_select reverse : Elapsed 0.013 ms (1.320 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.76% +0.68%] index_select skip64 : Elapsed 0.013 ms (1.321 ms / 100) 1.315 -> 1.319 ( +0.30%) [ +0.38% +0.00% +0.30% / +0.30% +0.61% +0.68%] index_select skip256 : Elapsed 0.013 ms (1.320 ms / 100) 1.317 -> 1.319 ( +0.15%) [ +0.08% +0.23% +0.00% / +0.15% +0.68% +0.61%] index_select spread : Elapsed 0.013 ms (1.318 ms / 100) 1.320 -> 1.322 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.68% +0.68%] index_select strided 3 : Elapsed 0.013 ms (1.322 ms / 100) 1.311 -> 1.312 ( +0.08%) [ +0.46% +0.31% +0.00% / +0.08% +0.76% +0.53%] index_select strided 5 : Elapsed 0.013 ms (1.317 ms / 100) 1.323 -> 1.325 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.60% +0.60%] index_select strided 7 : Elapsed 0.013 ms (1.323 ms / 100) 1.323 -> 1.325 ( +0.15%) [ +0.08% +0.00% +0.08% / +0.15% +0.45% +0.45%] index_select strided 8 : Elapsed 0.013 ms (1.324 ms / 100) 1.322 -> 1.322 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.38% +0.38%] index_select strided 16 : Elapsed 0.013 ms (1.323 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.53% +0.45%] index_select random : Elapsed 0.013 ms (1.322 ms / 100) 1.321 -> 1.323 ( +0.15%) [ +0.23% +0.00% +0.00% / +0.15% +0.53% +0.53%] index_select random_sorted : Elapsed 0.013 ms (1.324 ms / 100) 1.319 -> 1.320 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.53% +0.45%] index_select perm : Elapsed 0.013 ms (1.320 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.23% +0.00% +0.00% / +0.08% +0.38% +0.30%] index_select perm_sorted : Elapsed 0.013 ms (1.323 ms / 100) B = [16, 20, 5, 4] (stride (20, 320, 4, 1)) A = [16, 20, 5, 40] (stride (40, 640, 12800, 1)) dim = 3 1.383 -> 1.382 ( -0.07%) [ +0.22% +0.07% +0.00% / -0.07% +0.14% +0.07%] index_select const : Elapsed 0.014 ms (1.386 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.29% +0.22%] index_select wrap : Elapsed 0.014 ms (1.382 ms / 100) 1.389 -> 1.399 ( +0.72%) [ +0.36% +0.00% +0.58% / +0.72% +1.15% +0.79%] index_select linear : Elapsed 0.014 ms (1.394 ms / 100) 1.371 -> 1.380 ( +0.66%) [ +1.09% +0.00% +0.66% / +0.95% +0.95% +0.66%] index_select reverse : Elapsed 0.014 ms (1.386 ms / 100) 1.390 -> 1.394 ( +0.29%) [ +0.00% +0.00% +0.22% / +0.29% +0.58% +0.43%] index_select skip64 : Elapsed 0.014 ms (1.390 ms / 100) 1.381 -> 1.376 ( -0.36%) [ +0.36% +0.07% +0.00% / -0.36% +0.87% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.386 ms / 100) 1.373 -> 1.373 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.87% +0.87%] index_select spread : Elapsed 0.014 ms (1.375 ms / 100) 1.390 -> 1.390 ( +0.00%) [ +0.00% +0.00% +0.22% / +0.00% +0.50% +1.08%] index_select strided 3 : Elapsed 0.014 ms (1.390 ms / 100) 1.372 -> 1.381 ( +0.66%) [ +0.58% +0.44% +0.00% / +0.66% +0.95% +0.87%] index_select strided 5 : Elapsed 0.014 ms (1.380 ms / 100) 1.378 -> 1.383 ( +0.36%) [ +0.36% +0.00% +0.29% / +0.36% +0.58% +0.58%] index_select strided 7 : Elapsed 0.014 ms (1.383 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.58% +0.00% +0.44% / +0.07% +1.17% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.379 ms / 100) 1.368 -> 1.372 ( +0.29%) [ +0.15% +0.07% +0.00% / +0.29% +0.51% +0.29%] index_select strided 16 : Elapsed 0.014 ms (1.370 ms / 100) 1.382 -> 1.381 ( -0.07%) [ +0.36% +0.07% +0.00% / +0.07% +0.14% -0.07%] index_select random : Elapsed 0.014 ms (1.387 ms / 100) 1.374 -> 1.377 ( +0.22%) [ +0.66% +0.73% +0.00% / +0.22% +0.87% +1.02%] index_select random_sorted : Elapsed 0.014 ms (1.383 ms / 100) 1.377 -> 1.376 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.58% +0.73%] index_select perm : Elapsed 0.014 ms (1.377 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.51% +0.51%] index_select perm_sorted : Elapsed 0.014 ms (1.374 ms / 100) B = [16, 20, 5, 4] (stride (1, 320, 64, 16)) A = [16, 20, 5, 40] (stride (1, 640, 12800, 16)) dim = 3 1.295 -> 1.292 ( -0.23%) [ +0.54% +0.00% +0.62% / -0.23% +0.00% +0.15%] index_select const : Elapsed 0.013 ms (1.302 ms / 100) 1.290 -> 1.290 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.31% +0.39%] index_select wrap : Elapsed 0.013 ms (1.292 ms / 100) 1.290 -> 1.292 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.54% +0.54%] index_select linear : Elapsed 0.013 ms (1.291 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.31%] index_select reverse : Elapsed 0.013 ms (1.287 ms / 100) 1.294 -> 1.296 ( +0.15%) [ +0.62% +0.00% +0.54% / +0.15% +0.70% +1.08%] index_select skip64 : Elapsed 0.013 ms (1.302 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.00% +0.24% +0.00% / +0.16% +0.55% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.273 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.23% +0.16% +0.00% / +0.08% +0.62% +0.47%] index_select spread : Elapsed 0.013 ms (1.286 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.23% +0.39%] index_select strided 5 : Elapsed 0.013 ms (1.283 ms / 100) 1.283 -> 1.285 ( +0.16%) [ +0.08% +0.23% +0.00% / +0.16% +0.39% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.284 ms / 100) 1.287 -> 1.289 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.54% +0.39%] index_select strided 8 : Elapsed 0.013 ms (1.288 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.23% +0.16% +0.00% / +0.16% +0.55% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_select random : Elapsed 0.013 ms (1.281 ms / 100) 1.285 -> 1.286 ( +0.08%) [ +0.23% +0.00% +0.00% / +0.08% +0.39% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.288 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.47%] index_select perm : Elapsed 0.013 ms (1.282 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.78% +0.70%] index_select perm_sorted : Elapsed 0.013 ms (1.287 ms / 100) B = [16, 20, 5, 4] (stride (100, 5, 1, 1600)) A = [16, 20, 5, 40] (stride (1, 16, 12800, 320)) dim = 3 1.149 -> 1.150 ( +0.09%) [ +0.00% +0.26% +0.09% / +0.09% +0.44% +0.35%] index_select const : Elapsed 0.011 ms (1.149 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.61% +0.61%] index_select wrap : Elapsed 0.011 ms (1.149 ms / 100) 1.151 -> 1.151 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.35% +0.43%] index_select linear : Elapsed 0.012 ms (1.151 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.52% +0.52%] index_select reverse : Elapsed 0.012 ms (1.150 ms / 100) 1.148 -> 1.148 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.44% +0.52%] index_select skip64 : Elapsed 0.011 ms (1.148 ms / 100) 1.147 -> 1.148 ( +0.09%) [ +0.17% +0.09% +0.00% / +0.09% +0.61% +0.61%] index_select skip256 : Elapsed 0.011 ms (1.149 ms / 100) 1.153 -> 1.154 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.52% +0.52%] index_select spread : Elapsed 0.012 ms (1.154 ms / 100) 1.153 -> 1.154 ( +0.09%) [ +0.17% +0.09% +0.00% / +0.09% +0.61% +0.61%] index_select strided 3 : Elapsed 0.012 ms (1.155 ms / 100) 1.146 -> 1.147 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +1.40% +0.52%] index_select strided 5 : Elapsed 0.011 ms (1.146 ms / 100) 1.148 -> 1.148 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.17% +0.61%] index_select strided 7 : Elapsed 0.011 ms (1.148 ms / 100) 1.148 -> 1.148 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.44% +0.70%] index_select strided 8 : Elapsed 0.011 ms (1.148 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.44% +0.44%] index_select strided 16 : Elapsed 0.011 ms (1.149 ms / 100) 1.149 -> 1.152 ( +0.26%) [ +0.26% +0.00% +0.09% / +0.26% +0.52% +0.35%] index_select random : Elapsed 0.012 ms (1.152 ms / 100) 1.154 -> 1.156 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.61% +0.61%] index_select random_sorted : Elapsed 0.012 ms (1.156 ms / 100) 1.147 -> 1.147 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.44% +0.35%] index_select perm : Elapsed 0.011 ms (1.147 ms / 100) 1.138 -> 1.144 ( +0.53%) [ +0.35% +0.62% +0.00% / +0.53% +0.79% +0.88%] index_select perm_sorted : Elapsed 0.011 ms (1.142 ms / 100) B = [16, 20, 5, 4] (stride (1, 16, 320, 1600)) A = [16, 20, 5, 40] (stride (4000, 40, 800, 1)) dim = 3 1.369 -> 1.372 ( +0.22%) [ +0.15% +0.07% +0.00% / +0.22% +0.95% +0.88%] index_select const : Elapsed 0.014 ms (1.371 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.07% +0.00% +0.15% / +0.07% +0.51% +0.51%] index_select wrap : Elapsed 0.014 ms (1.371 ms / 100) 1.370 -> 1.372 ( +0.15%) [ +0.22% +0.00% +0.00% / +0.15% +0.58% +0.66%] index_select linear : Elapsed 0.014 ms (1.373 ms / 100) 1.371 -> 1.373 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.73% +0.36%] index_select reverse : Elapsed 0.014 ms (1.372 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.58% +0.66%] index_select skip64 : Elapsed 0.014 ms (1.372 ms / 100) 1.371 -> 1.373 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.73% +0.51%] index_select skip256 : Elapsed 0.014 ms (1.372 ms / 100) 1.373 -> 1.373 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.51% +0.36%] index_select spread : Elapsed 0.014 ms (1.374 ms / 100) 1.371 -> 1.376 ( +0.36%) [ +0.07% +0.00% +0.07% / +0.36% +0.51% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.372 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.29% +0.29%] index_select strided 5 : Elapsed 0.014 ms (1.373 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.73% +0.58%] index_select strided 7 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.95% +0.95%] index_select strided 8 : Elapsed 0.014 ms (1.375 ms / 100) 1.372 -> 1.372 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.51% +0.58%] index_select strided 16 : Elapsed 0.014 ms (1.374 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.22% +0.00% +0.07% / +0.07% +0.58% +0.36%] index_select random : Elapsed 0.014 ms (1.373 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.80% +0.80%] index_select random_sorted : Elapsed 0.014 ms (1.376 ms / 100) 1.372 -> 1.372 ( +0.00%) [ +0.29% +0.07% +0.00% / +0.00% +1.02% +0.66%] index_select perm : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.00% +0.07% +0.00% / +0.15% +0.87% +0.87%] index_select perm_sorted : Elapsed 0.014 ms (1.374 ms / 100) B = [16, 20, 5, 4] (stride (1, 16, 320, 1600)) A = [16, 20, 5, 40] (stride (20, 1, 12800, 320)) dim = 3 1.195 -> 1.198 ( +0.25%) [ +0.08% +0.00% +0.00% / +0.25% +0.75% +0.59%] index_select const : Elapsed 0.012 ms (1.196 ms / 100) 1.198 -> 1.199 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.50%] index_select wrap : Elapsed 0.012 ms (1.199 ms / 100) 1.198 -> 1.199 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.42%] index_select linear : Elapsed 0.012 ms (1.199 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.42% +0.25%] index_select reverse : Elapsed 0.012 ms (1.199 ms / 100) 1.196 -> 1.197 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.42% +0.50%] index_select skip64 : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.67% +0.67%] index_select skip256 : Elapsed 0.012 ms (1.196 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.58% +0.50%] index_select spread : Elapsed 0.012 ms (1.198 ms / 100) 1.196 -> 1.195 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.67% +0.59%] index_select strided 3 : Elapsed 0.012 ms (1.196 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.67% +0.67%] index_select strided 5 : Elapsed 0.012 ms (1.196 ms / 100) 1.194 -> 1.196 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.84% +0.84%] index_select strided 7 : Elapsed 0.012 ms (1.194 ms / 100) 1.194 -> 1.196 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +1.01% +0.75%] index_select strided 8 : Elapsed 0.012 ms (1.196 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.17% +0.00% +0.92% / +0.08% +0.75% +0.84%] index_select strided 16 : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.75% +0.75%] index_select random : Elapsed 0.012 ms (1.196 ms / 100) 1.195 -> 1.195 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.84%] index_select random_sorted : Elapsed 0.012 ms (1.195 ms / 100) 1.197 -> 1.197 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.67%] index_select perm : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.42% +0.08% +0.00% / +0.08% +0.59% +0.59%] index_select perm_sorted : Elapsed 0.012 ms (1.200 ms / 100) out_shape = [4, 20, 40, 5] in_shape = [16, 20, 40, 5] idx_dim = 0 B = [4, 20, 40, 5] (stride (4000, 200, 5, 1)) A = [16, 20, 40, 5] (stride (200, 3200, 5, 1)) dim = 0 2.063 -> 2.061 ( -0.10%) [ +0.10% +0.00% +0.05% / -0.10% +0.48% +0.44%] index_select const : Elapsed 0.021 ms (2.065 ms / 100) 2.135 -> 2.136 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.09% +0.23%] index_select wrap : Elapsed 0.021 ms (2.135 ms / 100) 2.134 -> 2.135 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.28% +0.19%] index_select linear : Elapsed 0.021 ms (2.137 ms / 100) 2.130 -> 2.133 ( +0.14%) [ +0.09% +0.05% +0.00% / +0.14% +0.80% +0.70%] index_select reverse : Elapsed 0.021 ms (2.132 ms / 100) 2.063 -> 2.065 ( +0.10%) [ +0.00% +0.05% +0.15% / +0.10% +0.44% +0.39%] index_select skip64 : Elapsed 0.021 ms (2.063 ms / 100) 2.063 -> 2.065 ( +0.10%) [ +0.00% +0.10% +0.05% / +0.10% +0.48% +0.53%] index_select skip256 : Elapsed 0.021 ms (2.063 ms / 100) 2.131 -> 2.134 ( +0.14%) [ +0.05% +0.19% +0.00% / +0.14% +0.38% +0.28%] index_select spread : Elapsed 0.021 ms (2.132 ms / 100) 2.134 -> 2.132 ( -0.09%) [ +0.14% +0.09% +0.00% / -0.09% +0.05% +0.00%] index_select strided 3 : Elapsed 0.021 ms (2.137 ms / 100) 2.131 -> 2.133 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.47% +0.52%] index_select strided 5 : Elapsed 0.021 ms (2.131 ms / 100) 2.132 -> 2.132 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.28% +0.28%] index_select strided 7 : Elapsed 0.021 ms (2.132 ms / 100) 2.083 -> 2.085 ( +0.10%) [ +0.24% +0.29% +0.00% / +0.10% +0.58% +0.24%] index_select strided 8 : Elapsed 0.021 ms (2.088 ms / 100) 2.130 -> 2.129 ( -0.05%) [ +0.00% +0.14% +0.00% / -0.05% +0.19% +0.28%] index_select random : Elapsed 0.021 ms (2.130 ms / 100) 2.129 -> 2.131 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.33% +0.14%] index_select random_sorted : Elapsed 0.021 ms (2.129 ms / 100) 2.128 -> 2.132 ( +0.19%) [ +0.19% +0.00% +0.14% / +0.19% +0.47% +0.28%] index_select perm : Elapsed 0.021 ms (2.132 ms / 100) 2.126 -> 2.131 ( +0.24%) [ +0.00% +0.19% +0.24% / +0.24% +0.42% +0.61%] index_select perm_sorted : Elapsed 0.021 ms (2.126 ms / 100) B = [4, 20, 40, 5] (stride (4000, 5, 100, 1)) A = [16, 20, 40, 5] (stride (1, 640, 16, 12800)) dim = 0 2.408 -> 2.409 ( +0.04%) [ +0.08% +0.25% +0.00% / +0.04% +0.17% +0.29%] index_select const : Elapsed 0.024 ms (2.410 ms / 100) 2.395 -> 2.399 ( +0.17%) [ +0.04% +0.00% +0.13% / +0.33% +0.17% +0.33%] index_select wrap : Elapsed 0.024 ms (2.396 ms / 100) 2.401 -> 2.399 ( -0.08%) [ +0.08% +0.04% +0.00% / +0.21% -0.08% -0.08%] index_select linear : Elapsed 0.024 ms (2.403 ms / 100) 2.396 -> 2.391 ( -0.21%) [ +0.13% +0.00% +0.25% / +0.33% -0.21% -0.04%] index_select reverse : Elapsed 0.024 ms (2.399 ms / 100) 2.398 -> 2.400 ( +0.08%) [ +0.13% +0.13% +0.00% / +0.29% +0.08% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.401 ms / 100) 2.408 -> 2.411 ( +0.12%) [ +0.17% +0.00% +0.00% / +0.17% +0.21% +0.12%] index_select skip256 : Elapsed 0.024 ms (2.412 ms / 100) 2.419 -> 2.423 ( +0.17%) [ +0.08% +0.00% +0.08% / +0.17% +0.29% +0.37%] index_select spread : Elapsed 0.024 ms (2.421 ms / 100) 2.419 -> 2.424 ( +0.21%) [ +0.12% +0.12% +0.00% / +0.21% +0.25% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.422 ms / 100) 2.431 -> 2.431 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.12% +0.33%] index_select strided 5 : Elapsed 0.024 ms (2.433 ms / 100) 2.420 -> 2.419 ( -0.04%) [ +0.00% +0.08% +0.33% / -0.04% +0.29% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.424 -> 2.428 ( +0.17%) [ +0.00% +0.25% +0.41% / +0.29% +0.17% +0.41%] index_select strided 8 : Elapsed 0.024 ms (2.424 ms / 100) 2.424 -> 2.428 ( +0.17%) [ +0.00% +0.17% +0.04% / +0.29% +0.21% +0.17%] index_select random : Elapsed 0.024 ms (2.424 ms / 100) 2.418 -> 2.423 ( +0.21%) [ +0.21% +0.12% +0.00% / +0.21% +0.29% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.423 ms / 100) 2.428 -> 2.431 ( +0.12%) [ +0.21% +0.00% +0.08% / +0.12% +0.25% +0.29%] index_select perm : Elapsed 0.024 ms (2.433 ms / 100) 2.416 -> 2.421 ( +0.21%) [ +0.12% +0.00% +0.08% / +0.21% +0.33% +0.41%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [4, 20, 40, 5] (stride (4000, 40, 1, 800)) A = [16, 20, 40, 5] (stride (5, 80, 1600, 1)) dim = 0 2.317 -> 2.314 ( -0.13%) [ +0.09% +0.00% +0.04% / -0.13% +0.47% +0.52%] index_select const : Elapsed 0.023 ms (2.319 ms / 100) 2.368 -> 2.368 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.59% +0.42%] index_select wrap : Elapsed 0.024 ms (2.370 ms / 100) 2.371 -> 2.373 ( +0.08%) [ +0.04% +0.13% +0.00% / +0.08% +0.42% +0.46%] index_select linear : Elapsed 0.024 ms (2.372 ms / 100) 2.348 -> 2.353 ( +0.21%) [ +0.00% +0.09% +0.09% / +0.21% +0.60% +0.60%] index_select reverse : Elapsed 0.023 ms (2.348 ms / 100) 2.310 -> 2.310 ( +0.00%) [ +0.00% +0.13% +0.04% / +0.00% +0.61% +0.65%] index_select skip64 : Elapsed 0.023 ms (2.310 ms / 100) 2.314 -> 2.315 ( +0.04%) [ +0.00% +0.00% +0.26% / +0.04% +0.61% +0.56%] index_select skip256 : Elapsed 0.023 ms (2.314 ms / 100) 2.403 -> 2.402 ( -0.04%) [ +0.12% +0.08% +0.00% / -0.04% +0.25% +0.54%] index_select spread : Elapsed 0.024 ms (2.406 ms / 100) 2.421 -> 2.424 ( +0.12%) [ +0.08% +0.00% +0.17% / +0.12% +0.33% +0.41%] index_select strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.399 -> 2.405 ( +0.25%) [ +0.00% +0.13% +0.00% / +0.25% +0.42% +0.50%] index_select strided 5 : Elapsed 0.024 ms (2.399 ms / 100) 2.401 -> 2.400 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.42% +0.46%] index_select strided 7 : Elapsed 0.024 ms (2.401 ms / 100) 2.337 -> 2.343 ( +0.26%) [ +0.13% +0.13% +0.00% / +0.26% +0.64% +0.94%] index_select strided 8 : Elapsed 0.023 ms (2.340 ms / 100) 2.387 -> 2.387 ( +0.00%) [ +0.00% +0.00% +0.17% / +0.00% +0.29% +0.38%] index_select random : Elapsed 0.024 ms (2.387 ms / 100) 2.370 -> 2.371 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.25% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.371 ms / 100) 2.401 -> 2.407 ( +0.25%) [ +0.12% +0.17% +0.00% / +0.25% +0.37% +0.33%] index_select perm : Elapsed 0.024 ms (2.404 ms / 100) 2.402 -> 2.406 ( +0.17%) [ +0.08% +0.12% +0.00% / +0.17% +0.42% +0.37%] index_select perm_sorted : Elapsed 0.024 ms (2.404 ms / 100) B = [4, 20, 40, 5] (stride (4000, 40, 1, 800)) A = [16, 20, 40, 5] (stride (20, 1, 320, 12800)) dim = 0 2.416 -> 2.422 ( +0.25%) [ +0.29% +0.00% +0.25% / +0.25% +0.54% +0.41%] index_select const : Elapsed 0.024 ms (2.423 ms / 100) 2.420 -> 2.424 ( +0.17%) [ +0.21% +0.00% +0.04% / +0.17% +0.58% +0.45%] index_select wrap : Elapsed 0.024 ms (2.425 ms / 100) 2.425 -> 2.424 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.29% +0.33%] index_select linear : Elapsed 0.024 ms (2.425 ms / 100) 2.430 -> 2.432 ( +0.08%) [ +0.04% +0.16% +0.00% / +0.08% +0.41% +0.29%] index_select reverse : Elapsed 0.024 ms (2.431 ms / 100) 2.415 -> 2.420 ( +0.21%) [ +0.12% +0.17% +0.00% / +0.21% +0.66% +0.54%] index_select skip64 : Elapsed 0.024 ms (2.418 ms / 100) 2.416 -> 2.415 ( -0.04%) [ +0.12% +0.00% +0.08% / -0.04% +0.54% +0.54%] index_select skip256 : Elapsed 0.024 ms (2.419 ms / 100) 2.433 -> 2.436 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.12% +0.62% +0.41%] index_select spread : Elapsed 0.024 ms (2.435 ms / 100) 2.425 -> 2.433 ( +0.33%) [ +0.16% +0.00% +0.21% / +0.33% +0.62% +0.62%] index_select strided 3 : Elapsed 0.024 ms (2.429 ms / 100) 2.429 -> 2.430 ( +0.04%) [ +0.21% +0.00% +0.12% / +0.04% +0.54% +0.49%] index_select strided 5 : Elapsed 0.024 ms (2.434 ms / 100) 2.425 -> 2.426 ( +0.04%) [ +0.21% +0.00% +0.04% / +0.04% +0.49% +0.37%] index_select strided 7 : Elapsed 0.024 ms (2.430 ms / 100) 2.417 -> 2.420 ( +0.12%) [ +0.00% +0.00% +0.25% / +0.12% +0.46% +0.46%] index_select strided 8 : Elapsed 0.024 ms (2.417 ms / 100) 2.421 -> 2.425 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.41% +0.45%] index_select random : Elapsed 0.024 ms (2.423 ms / 100) 2.428 -> 2.431 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.37% +0.41%] index_select random_sorted : Elapsed 0.024 ms (2.431 ms / 100) 2.440 -> 2.444 ( +0.16%) [ +0.00% +0.04% +0.12% / +0.16% +0.37% +0.49%] index_select perm : Elapsed 0.024 ms (2.440 ms / 100) 2.427 -> 2.432 ( +0.21%) [ +0.12% +0.25% +0.00% / +0.21% +0.41% +0.74%] index_select perm_sorted : Elapsed 0.024 ms (2.430 ms / 100) B = [4, 20, 40, 5] (stride (4000, 1, 20, 800)) A = [16, 20, 40, 5] (stride (4000, 1, 100, 20)) dim = 0 2.378 -> 2.380 ( +0.08%) [ +0.08% +0.00% +0.13% / +0.08% +0.42% +0.42%] index_select const : Elapsed 0.024 ms (2.380 ms / 100) 2.368 -> 2.371 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.13% +0.30%] index_select wrap : Elapsed 0.024 ms (2.371 ms / 100) 2.368 -> 2.368 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.13% +0.21%] index_select linear : Elapsed 0.024 ms (2.368 ms / 100) 2.369 -> 2.368 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.21% +0.00%] index_select reverse : Elapsed 0.024 ms (2.370 ms / 100) 2.368 -> 2.371 ( +0.13%) [ +0.13% +0.04% +0.00% / +0.13% +0.21% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.371 ms / 100) 2.379 -> 2.383 ( +0.17%) [ +0.04% +0.13% +0.00% / +0.17% +0.38% +0.34%] index_select skip256 : Elapsed 0.024 ms (2.380 ms / 100) 2.365 -> 2.367 ( +0.08%) [ +0.00% +0.00% +0.21% / +0.08% +0.38% +0.25%] index_select spread : Elapsed 0.024 ms (2.365 ms / 100) 2.364 -> 2.370 ( +0.25%) [ +0.17% +0.00% +0.13% / +0.25% +0.42% +0.38%] index_select strided 3 : Elapsed 0.024 ms (2.368 ms / 100) 2.378 -> 2.381 ( +0.13%) [ +0.21% +0.13% +0.00% / +0.13% +0.59% +0.67%] index_select strided 5 : Elapsed 0.024 ms (2.383 ms / 100) 2.370 -> 2.370 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.21% +0.30%] index_select strided 7 : Elapsed 0.024 ms (2.372 ms / 100) 2.365 -> 2.366 ( +0.04%) [ +0.13% +0.04% +0.00% / +0.04% +0.30% +0.51%] index_select strided 8 : Elapsed 0.024 ms (2.368 ms / 100) 2.369 -> 2.364 ( -0.21%) [ +0.04% +0.13% +0.00% / -0.21% +0.04% +0.51%] index_select random : Elapsed 0.024 ms (2.370 ms / 100) 2.370 -> 2.372 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.34% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.370 ms / 100) 2.383 -> 2.383 ( +0.00%) [ +0.04% +0.00% +0.13% / +0.00% +0.42% +0.25%] index_select perm : Elapsed 0.024 ms (2.384 ms / 100) 2.366 -> 2.368 ( +0.08%) [ +0.13% +0.08% +0.00% / +0.08% +0.46% +0.38%] index_select perm_sorted : Elapsed 0.024 ms (2.369 ms / 100) B = [4, 20, 40, 5] (stride (4000, 1, 20, 800)) A = [16, 20, 40, 5] (stride (4000, 40, 1, 800)) dim = 0 2.315 -> 2.317 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.09% +0.13% +0.39%] index_select const : Elapsed 0.023 ms (2.317 ms / 100) 2.350 -> 2.354 ( +0.17%) [ +0.21% +0.21% +0.00% / +0.17% +0.21% +0.47%] index_select wrap : Elapsed 0.024 ms (2.355 ms / 100) 2.361 -> 2.361 ( +0.00%) [ +0.08% +0.00% +0.13% / +0.04% +0.00% +0.04%] index_select linear : Elapsed 0.024 ms (2.363 ms / 100) 2.353 -> 2.355 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.21% +0.17%] index_select reverse : Elapsed 0.024 ms (2.354 ms / 100) 2.309 -> 2.311 ( +0.09%) [ +0.00% +0.22% +0.09% / +0.09% +0.56% +0.35%] index_select skip64 : Elapsed 0.023 ms (2.309 ms / 100) 2.315 -> 2.318 ( +0.13%) [ +0.00% +0.04% +0.04% / +0.13% +0.17% +0.13%] index_select skip256 : Elapsed 0.023 ms (2.315 ms / 100) 2.350 -> 2.354 ( +0.17%) [ +0.34% +0.13% +0.00% / +0.17% +0.30% +0.38%] index_select spread : Elapsed 0.024 ms (2.358 ms / 100) 2.358 -> 2.360 ( +0.08%) [ +0.21% +0.04% +0.00% / +0.08% +0.34% +0.17%] index_select strided 3 : Elapsed 0.024 ms (2.363 ms / 100) 2.354 -> 2.359 ( +0.21%) [ +0.00% +0.13% +0.25% / +0.21% +0.55% +0.30%] index_select strided 5 : Elapsed 0.024 ms (2.354 ms / 100) 2.349 -> 2.350 ( +0.04%) [ +0.04% +0.34% +0.00% / +0.04% +0.34% +0.43%] index_select strided 7 : Elapsed 0.024 ms (2.350 ms / 100) 2.325 -> 2.326 ( +0.04%) [ +0.04% +0.00% +0.17% / +0.04% +0.26% +0.09%] index_select strided 8 : Elapsed 0.023 ms (2.326 ms / 100) 2.347 -> 2.351 ( +0.17%) [ +0.04% +0.13% +0.00% / +0.17% +0.43% +0.55%] index_select random : Elapsed 0.023 ms (2.348 ms / 100) 2.347 -> 2.350 ( +0.13%) [ +0.00% +0.26% +0.00% / +0.13% +0.43% +0.43%] index_select random_sorted : Elapsed 0.023 ms (2.347 ms / 100) 2.356 -> 2.358 ( +0.08%) [ +0.00% +0.13% +0.00% / +0.08% +0.21% +0.17%] index_select perm : Elapsed 0.024 ms (2.356 ms / 100) 2.348 -> 2.354 ( +0.26%) [ +0.00% +0.13% +0.00% / +0.26% +0.55% +0.38%] index_select perm_sorted : Elapsed 0.023 ms (2.348 ms / 100) B = [4, 20, 40, 5] (stride (100, 1, 400, 20)) A = [16, 20, 40, 5] (stride (100, 1, 1600, 20)) dim = 0 2.501 -> 2.506 ( +0.20%) [ +0.24% +0.20% +0.00% / +0.20% +0.44% +0.40%] index_select const : Elapsed 0.025 ms (2.507 ms / 100) 2.515 -> 2.510 ( -0.20%) [ +0.04% +0.00% +0.08% / -0.20% +0.16% +0.04%] index_select wrap : Elapsed 0.025 ms (2.516 ms / 100) 2.509 -> 2.511 ( +0.08%) [ +0.12% +0.20% +0.00% / +0.08% +0.20% +0.20%] index_select linear : Elapsed 0.025 ms (2.512 ms / 100) 2.511 -> 2.513 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.16% +0.08% +0.20%] index_select reverse : Elapsed 0.025 ms (2.511 ms / 100) 2.510 -> 2.508 ( -0.08%) [ +0.16% +0.00% +0.00% / -0.08% +0.28% +0.16%] index_select skip64 : Elapsed 0.025 ms (2.514 ms / 100) 2.507 -> 2.504 ( -0.12%) [ +0.04% +0.00% +0.00% / -0.12% +0.20% +0.16%] index_select skip256 : Elapsed 0.025 ms (2.508 ms / 100) 2.512 -> 2.513 ( +0.04%) [ +0.16% +0.08% +0.00% / +0.04% +0.20% +0.32%] index_select spread : Elapsed 0.025 ms (2.516 ms / 100) 2.506 -> 2.510 ( +0.16%) [ +0.04% +0.20% +0.00% / +0.16% +0.44% +0.32%] index_select strided 3 : Elapsed 0.025 ms (2.507 ms / 100) 2.500 -> 2.500 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.60% +0.40%] index_select strided 5 : Elapsed 0.025 ms (2.500 ms / 100) 2.513 -> 2.513 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.44% +0.28%] index_select strided 7 : Elapsed 0.025 ms (2.513 ms / 100) 2.497 -> 2.499 ( +0.08%) [ +0.08% +0.20% +0.00% / +0.08% +0.40% +0.28%] index_select strided 8 : Elapsed 0.025 ms (2.499 ms / 100) 2.511 -> 2.514 ( +0.12%) [ +0.04% +0.04% +0.00% / +0.12% +0.32% +0.28%] index_select random : Elapsed 0.025 ms (2.512 ms / 100) 2.503 -> 2.506 ( +0.12%) [ +0.08% +0.00% +0.20% / +0.12% +0.28% +0.40%] index_select random_sorted : Elapsed 0.025 ms (2.505 ms / 100) 2.511 -> 2.510 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.24% +0.20%] index_select perm : Elapsed 0.025 ms (2.513 ms / 100) 2.514 -> 2.508 ( -0.24%) [ +0.04% +0.04% +0.00% / -0.24% +0.24% +0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.515 ms / 100) B = [4, 20, 40, 5] (stride (1, 4, 400, 80)) A = [16, 20, 40, 5] (stride (4000, 200, 5, 1)) dim = 0 2.090 -> 2.090 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.14% +0.00% +0.05%] index_select const : Elapsed 0.021 ms (2.092 ms / 100) 2.147 -> 2.152 ( +0.23%) [ +0.00% +0.14% +0.19% / +0.23% +0.28% +0.28%] index_select wrap : Elapsed 0.021 ms (2.147 ms / 100) 2.149 -> 2.149 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.05% +0.00% +0.23%] index_select linear : Elapsed 0.021 ms (2.150 ms / 100) 2.148 -> 2.147 ( -0.05%) [ +0.00% +0.05% +0.09% / +0.19% +0.00% -0.05%] index_select reverse : Elapsed 0.021 ms (2.148 ms / 100) 2.088 -> 2.090 ( +0.10%) [ +0.05% +0.00% +0.10% / +0.10% +0.34% +0.34%] index_select skip64 : Elapsed 0.021 ms (2.089 ms / 100) 2.088 -> 2.089 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.29% +0.34%] index_select skip256 : Elapsed 0.021 ms (2.091 ms / 100) 2.145 -> 2.142 ( -0.14%) [ +0.00% +0.05% +0.05% / -0.14% +0.28% +0.33%] index_select spread : Elapsed 0.021 ms (2.145 ms / 100) 2.147 -> 2.146 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.14% +0.19%] index_select strided 3 : Elapsed 0.021 ms (2.147 ms / 100) 2.145 -> 2.148 ( +0.14%) [ +0.09% +0.00% +0.00% / +0.14% +0.28% +0.19%] index_select strided 5 : Elapsed 0.021 ms (2.147 ms / 100) 2.148 -> 2.149 ( +0.05%) [ +0.05% +0.09% +0.00% / +0.05% +0.09% +0.19%] index_select strided 7 : Elapsed 0.021 ms (2.149 ms / 100) 2.102 -> 2.104 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.10% +0.33% +0.43%] index_select strided 8 : Elapsed 0.021 ms (2.102 ms / 100) 2.149 -> 2.148 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.05% +0.19%] index_select random : Elapsed 0.021 ms (2.150 ms / 100) 2.151 -> 2.150 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.05% +0.00%] index_select random_sorted : Elapsed 0.022 ms (2.152 ms / 100) 2.146 -> 2.149 ( +0.14%) [ +0.19% +0.05% +0.00% / +0.14% +0.28% +0.28%] index_select perm : Elapsed 0.021 ms (2.150 ms / 100) 2.145 -> 2.148 ( +0.14%) [ +0.09% +0.05% +0.00% / +0.14% +0.23% +0.33%] index_select perm_sorted : Elapsed 0.021 ms (2.147 ms / 100) B = [4, 20, 40, 5] (stride (40, 160, 1, 3200)) A = [16, 20, 40, 5] (stride (1, 16, 1600, 320)) dim = 0 2.485 -> 2.491 ( +0.24%) [ +0.00% +0.40% +0.08% / +0.24% +0.52% +0.40%] index_select const : Elapsed 0.025 ms (2.485 ms / 100) 2.485 -> 2.490 ( +0.20%) [ +0.20% +0.24% +0.00% / +0.20% +0.44% +0.56%] index_select wrap : Elapsed 0.025 ms (2.490 ms / 100) 2.484 -> 2.486 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.40% +0.44%] index_select linear : Elapsed 0.025 ms (2.484 ms / 100) 2.481 -> 2.486 ( +0.20%) [ +0.24% +0.04% +0.00% / +0.20% +0.60% +0.64%] index_select reverse : Elapsed 0.025 ms (2.487 ms / 100) 2.481 -> 2.484 ( +0.12%) [ +0.12% +0.20% +0.00% / +0.12% +0.69% +0.77%] index_select skip64 : Elapsed 0.025 ms (2.484 ms / 100) 2.480 -> 2.486 ( +0.24%) [ +0.20% +0.36% +0.00% / +0.24% +0.81% +0.77%] index_select skip256 : Elapsed 0.025 ms (2.485 ms / 100) 2.511 -> 2.520 ( +0.36%) [ +0.40% +0.00% +0.16% / +0.36% +0.88% +0.76%] index_select spread : Elapsed 0.025 ms (2.521 ms / 100) 2.517 -> 2.522 ( +0.20%) [ +0.00% +0.08% +0.16% / +0.20% +0.48% +0.44%] index_select strided 3 : Elapsed 0.025 ms (2.517 ms / 100) 2.515 -> 2.522 ( +0.28%) [ +0.12% +0.16% +0.00% / +0.28% +0.68% +0.80%] index_select strided 5 : Elapsed 0.025 ms (2.518 ms / 100) 2.515 -> 2.518 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.44% +0.68%] index_select strided 7 : Elapsed 0.025 ms (2.516 ms / 100) 2.529 -> 2.533 ( +0.16%) [ +0.04% +0.04% +0.00% / +0.16% +0.47% +0.59%] index_select strided 8 : Elapsed 0.025 ms (2.530 ms / 100) 2.520 -> 2.521 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.40% +0.28%] index_select random : Elapsed 0.025 ms (2.522 ms / 100) 2.527 -> 2.533 ( +0.24%) [ +0.24% +0.08% +0.00% / +0.24% +0.59% +0.59%] index_select random_sorted : Elapsed 0.025 ms (2.533 ms / 100) 2.486 -> 2.486 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.44% +0.44%] index_select perm : Elapsed 0.025 ms (2.486 ms / 100) 2.482 -> 2.479 ( -0.12%) [ +0.08% +0.12% +0.00% / -0.12% +0.48% +0.36%] index_select perm_sorted : Elapsed 0.025 ms (2.484 ms / 100) out_shape = [16, 4, 40, 5] in_shape = [16, 20, 40, 5] idx_dim = 1 B = [16, 4, 40, 5] (stride (800, 200, 5, 1)) A = [16, 20, 40, 5] (stride (1, 3200, 80, 16)) dim = 1 0.749 -> 0.749 ( +0.00%) [ +0.40% +0.53% +0.00% / +0.00% +2.40% +2.27%] index_select const : Elapsed 0.008 ms (0.752 ms / 100) 0.751 -> 0.750 ( -0.13%) [ +0.00% +0.27% +0.13% / -0.13% +1.33% +0.80%] index_select wrap : Elapsed 0.008 ms (0.751 ms / 100) 0.751 -> 0.750 ( -0.13%) [ +0.40% +0.00% +0.13% / -0.13% +1.20% +1.07%] index_select linear : Elapsed 0.008 ms (0.754 ms / 100) 0.757 -> 0.756 ( -0.13%) [ +0.00% +0.26% +0.13% / -0.13% +0.13% +0.40%] index_select reverse : Elapsed 0.008 ms (0.757 ms / 100) 0.752 -> 0.756 ( +0.53%) [ +0.00% +0.40% +0.40% / +0.93% +0.53% +0.80%] index_select skip64 : Elapsed 0.008 ms (0.752 ms / 100) 0.748 -> 0.750 ( +0.27%) [ +0.13% +0.00% +0.67% / +0.27% +1.87% +2.27%] index_select skip256 : Elapsed 0.007 ms (0.749 ms / 100) 0.746 -> 0.751 ( +0.67%) [ +0.00% +0.27% +0.54% / +0.67% +2.95% +2.55%] index_select spread : Elapsed 0.007 ms (0.746 ms / 100) 0.745 -> 0.745 ( +0.00%) [ +0.40% +0.13% +0.00% / +0.00% +2.28% +2.01%] index_select strided 3 : Elapsed 0.007 ms (0.748 ms / 100) 0.749 -> 0.748 ( -0.13%) [ +0.13% +0.00% +0.27% / -0.13% +1.74% +2.00%] index_select strided 5 : Elapsed 0.008 ms (0.750 ms / 100) 0.752 -> 0.751 ( -0.13%) [ +0.00% +0.00% +0.13% / -0.13% +0.93% +0.93%] index_select strided 7 : Elapsed 0.008 ms (0.752 ms / 100) 0.752 -> 0.755 ( +0.40%) [ +0.53% +0.40% +0.00% / +0.40% +0.66% +0.40%] index_select strided 8 : Elapsed 0.008 ms (0.756 ms / 100) 0.752 -> 0.752 ( +0.00%) [ +0.27% +0.27% +0.00% / +0.00% +0.40% +1.20%] index_select strided 16 : Elapsed 0.008 ms (0.754 ms / 100) 0.751 -> 0.754 ( +0.40%) [ +0.00% +0.00% +0.00% / +0.40% +1.20% +1.33%] index_select random : Elapsed 0.008 ms (0.751 ms / 100) 0.750 -> 0.751 ( +0.13%) [ +0.27% +0.00% +0.13% / +0.13% +1.07% +1.47%] index_select random_sorted : Elapsed 0.008 ms (0.752 ms / 100) 0.748 -> 0.751 ( +0.40%) [ +0.53% +0.13% +0.00% / +0.40% +1.34% +2.01%] index_select perm : Elapsed 0.008 ms (0.752 ms / 100) 0.751 -> 0.752 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +1.07% +1.20%] index_select perm_sorted : Elapsed 0.008 ms (0.751 ms / 100) B = [16, 4, 40, 5] (stride (800, 5, 20, 1)) A = [16, 20, 40, 5] (stride (1, 3200, 16, 640)) dim = 1 2.106 -> 2.114 ( +0.38%) [ +0.28% +0.28% +0.00% / +0.38% +0.81% +0.71%] index_select const : Elapsed 0.021 ms (2.112 ms / 100) 2.094 -> 2.100 ( +0.29%) [ +0.19% +0.05% +0.00% / +0.33% +0.33% +0.29%] index_select wrap : Elapsed 0.021 ms (2.098 ms / 100) 2.098 -> 2.096 ( -0.10%) [ +0.00% +0.19% +0.05% / -0.10% +0.14% +0.33%] index_select linear : Elapsed 0.021 ms (2.098 ms / 100) 2.108 -> 2.108 ( +0.00%) [ +0.19% +0.00% +0.09% / +0.00% +0.19% +0.33%] index_select reverse : Elapsed 0.021 ms (2.112 ms / 100) 2.111 -> 2.111 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.76% +0.52%] index_select skip64 : Elapsed 0.021 ms (2.112 ms / 100) 2.098 -> 2.104 ( +0.29%) [ +0.00% +0.33% +0.05% / +0.29% +0.57% +0.76%] index_select skip256 : Elapsed 0.021 ms (2.098 ms / 100) 2.093 -> 2.099 ( +0.29%) [ +0.19% +0.10% +0.00% / +0.29% +0.57% +0.29%] index_select spread : Elapsed 0.021 ms (2.097 ms / 100) 2.111 -> 2.112 ( +0.05%) [ +0.19% +0.00% +0.09% / +0.05% +0.33% +0.14%] index_select strided 3 : Elapsed 0.021 ms (2.115 ms / 100) 2.105 -> 2.104 ( -0.05%) [ +0.05% +0.00% +0.19% / -0.05% +0.57% +0.14%] index_select strided 5 : Elapsed 0.021 ms (2.106 ms / 100) 2.103 -> 2.104 ( +0.05%) [ +0.00% +0.10% +0.14% / +0.05% +0.38% +0.57%] index_select strided 7 : Elapsed 0.021 ms (2.103 ms / 100) 2.103 -> 2.110 ( +0.33%) [ +0.33% +0.29% +0.00% / +0.33% +0.81% +0.57%] index_select strided 8 : Elapsed 0.021 ms (2.110 ms / 100) 2.095 -> 2.095 ( +0.00%) [ +0.00% +0.10% +0.10% / +0.00% +0.57% +0.38%] index_select strided 16 : Elapsed 0.021 ms (2.095 ms / 100) 2.092 -> 2.097 ( +0.24%) [ +0.00% +0.24% +0.14% / +0.24% +0.38% +0.43%] index_select random : Elapsed 0.021 ms (2.092 ms / 100) 2.104 -> 2.111 ( +0.33%) [ +0.33% +0.52% +0.00% / +0.33% +0.86% +0.76%] index_select random_sorted : Elapsed 0.021 ms (2.111 ms / 100) 2.102 -> 2.108 ( +0.29%) [ +0.19% +0.33% +0.00% / +0.29% +0.67% +0.67%] index_select perm : Elapsed 0.021 ms (2.106 ms / 100) 2.093 -> 2.096 ( +0.14%) [ +0.19% +0.00% +0.10% / +0.14% +0.81% +0.57%] index_select perm_sorted : Elapsed 0.021 ms (2.097 ms / 100) B = [16, 4, 40, 5] (stride (800, 1, 20, 4)) A = [16, 20, 40, 5] (stride (800, 1, 20, 12800)) dim = 1 2.046 -> 2.040 ( -0.29%) [ +0.05% +0.64% +0.00% / +0.15% -0.29% -0.24%] index_select const : Elapsed 0.020 ms (2.047 ms / 100) 2.039 -> 2.037 ( -0.10%) [ +0.00% +0.20% +0.05% / -0.10% +0.20% -0.05%] index_select wrap : Elapsed 0.020 ms (2.039 ms / 100) 2.041 -> 2.042 ( +0.05%) [ +0.10% +0.00% +0.10% / +0.05% +0.05% +0.24%] index_select linear : Elapsed 0.020 ms (2.043 ms / 100) 2.043 -> 2.039 ( -0.20%) [ +0.00% +0.15% +0.10% / +0.29% +0.10% -0.20%] index_select reverse : Elapsed 0.020 ms (2.043 ms / 100) 2.049 -> 2.048 ( -0.05%) [ +0.10% +0.05% +0.00% / +0.00% +0.20% -0.05%] index_select skip64 : Elapsed 0.021 ms (2.051 ms / 100) 2.051 -> 2.047 ( -0.20%) [ +0.00% +0.05% +0.15% / +0.05% -0.15% -0.20%] index_select skip256 : Elapsed 0.021 ms (2.051 ms / 100) 2.102 -> 2.101 ( -0.05%) [ +0.33% +0.05% +0.00% / +0.05% -0.05% +0.48%] index_select spread : Elapsed 0.021 ms (2.109 ms / 100) 2.081 -> 2.081 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.38% +0.62%] index_select strided 3 : Elapsed 0.021 ms (2.083 ms / 100) 2.094 -> 2.098 ( +0.19%) [ +0.53% +0.00% +0.14% / +0.19% +0.29% +0.43%] index_select strided 5 : Elapsed 0.021 ms (2.105 ms / 100) 2.085 -> 2.087 ( +0.10%) [ +0.14% +0.00% +0.29% / +0.10% +0.14% +0.19%] index_select strided 7 : Elapsed 0.021 ms (2.088 ms / 100) 2.090 -> 2.091 ( +0.05%) [ +0.00% +0.14% +0.00% / +0.29% +0.05% +0.10%] index_select strided 8 : Elapsed 0.021 ms (2.090 ms / 100) 2.097 -> 2.100 ( +0.14%) [ +0.14% +0.57% +0.00% / +0.14% +0.19% +0.48%] index_select strided 16 : Elapsed 0.021 ms (2.100 ms / 100) 2.081 -> 2.079 ( -0.10%) [ +0.48% +0.00% +0.14% / -0.10% +0.10% +0.14%] index_select random : Elapsed 0.021 ms (2.091 ms / 100) 2.082 -> 2.083 ( +0.05%) [ +0.19% +0.00% +0.00% / +0.05% +0.05% +0.19%] index_select random_sorted : Elapsed 0.021 ms (2.086 ms / 100) 2.081 -> 2.084 ( +0.14%) [ +0.00% +0.24% +0.24% / +0.14% +0.43% +0.48%] index_select perm : Elapsed 0.021 ms (2.081 ms / 100) 2.086 -> 2.085 ( -0.05%) [ +0.19% +0.00% +0.00% / -0.05% +0.34% -0.05%] index_select perm_sorted : Elapsed 0.021 ms (2.090 ms / 100) B = [16, 4, 40, 5] (stride (20, 5, 320, 1)) A = [16, 20, 40, 5] (stride (20, 1, 320, 12800)) dim = 1 2.231 -> 2.228 ( -0.13%) [ +0.09% +0.09% +0.00% / +0.04% -0.13% +0.22%] index_select const : Elapsed 0.022 ms (2.233 ms / 100) 2.230 -> 2.228 ( -0.09%) [ +0.00% +0.22% +0.00% / +0.40% -0.09% +0.18%] index_select wrap : Elapsed 0.022 ms (2.230 ms / 100) 2.229 -> 2.228 ( -0.04%) [ +0.18% +0.04% +0.00% / +0.04% +0.13% -0.04%] index_select linear : Elapsed 0.022 ms (2.233 ms / 100) 2.238 -> 2.235 ( -0.13%) [ +0.04% +0.00% +0.04% / -0.09% -0.13% -0.09%] index_select reverse : Elapsed 0.022 ms (2.239 ms / 100) 2.228 -> 2.231 ( +0.13%) [ +0.09% +0.18% +0.00% / +0.13% +0.22% +0.40%] index_select skip64 : Elapsed 0.022 ms (2.230 ms / 100) 2.236 -> 2.233 ( -0.13%) [ +0.00% +0.09% +0.04% / -0.04% +0.13% -0.13%] index_select skip256 : Elapsed 0.022 ms (2.236 ms / 100) 2.286 -> 2.283 ( -0.13%) [ +0.35% +0.13% +0.00% / +0.09% -0.13% +0.04%] index_select spread : Elapsed 0.023 ms (2.294 ms / 100) 2.263 -> 2.257 ( -0.27%) [ +0.00% +0.13% +0.04% / +0.18% -0.27% +0.00%] index_select strided 3 : Elapsed 0.023 ms (2.263 ms / 100) 2.289 -> 2.283 ( -0.26%) [ +0.09% +0.00% +0.00% / +0.04% +0.13% -0.26%] index_select strided 5 : Elapsed 0.023 ms (2.291 ms / 100) 2.279 -> 2.280 ( +0.04%) [ +0.39% +0.35% +0.00% / +0.22% +0.09% +0.04%] index_select strided 7 : Elapsed 0.023 ms (2.288 ms / 100) 2.275 -> 2.271 ( -0.18%) [ +0.09% +0.00% +0.13% / +0.22% +0.13% -0.18%] index_select strided 8 : Elapsed 0.023 ms (2.277 ms / 100) 2.283 -> 2.286 ( +0.13%) [ +0.39% +0.48% +0.00% / +0.13% +0.39% +0.26%] index_select strided 16 : Elapsed 0.023 ms (2.292 ms / 100) 2.254 -> 2.260 ( +0.27%) [ +0.27% +0.22% +0.00% / +0.27% +0.27% +0.35%] index_select random : Elapsed 0.023 ms (2.260 ms / 100) 2.266 -> 2.264 ( -0.09%) [ +0.00% +0.04% +0.04% / +0.04% -0.09% +0.18%] index_select random_sorted : Elapsed 0.023 ms (2.266 ms / 100) 2.279 -> 2.280 ( +0.04%) [ +0.13% +0.22% +0.00% / +0.13% +0.22% +0.04%] index_select perm : Elapsed 0.023 ms (2.282 ms / 100) 2.283 -> 2.284 ( +0.04%) [ +0.00% +0.13% +0.09% / +0.09% +0.04% +0.18%] index_select perm_sorted : Elapsed 0.023 ms (2.283 ms / 100) B = [16, 4, 40, 5] (stride (5, 80, 320, 1)) A = [16, 20, 40, 5] (stride (800, 1, 20, 12800)) dim = 1 2.169 -> 2.163 ( -0.28%) [ +0.00% +0.09% +0.05% / -0.18% -0.23% -0.28%] index_select const : Elapsed 0.022 ms (2.169 ms / 100) 2.157 -> 2.156 ( -0.05%) [ +0.00% +0.14% +0.09% / +0.23% +0.23% -0.05%] index_select wrap : Elapsed 0.022 ms (2.157 ms / 100) 2.158 -> 2.163 ( +0.23%) [ +0.37% +0.09% +0.00% / +0.42% +0.32% +0.23%] index_select linear : Elapsed 0.022 ms (2.166 ms / 100) 2.162 -> 2.162 ( +0.00%) [ +0.19% +0.00% +0.23% / +0.32% +0.00% +0.32%] index_select reverse : Elapsed 0.022 ms (2.166 ms / 100) 2.165 -> 2.164 ( -0.05%) [ +0.00% +0.05% +0.09% / +0.05% +0.00% -0.05%] index_select skip64 : Elapsed 0.022 ms (2.165 ms / 100) 2.165 -> 2.166 ( +0.05%) [ +0.18% +0.18% +0.00% / +0.14% +0.09% +0.05%] index_select skip256 : Elapsed 0.022 ms (2.169 ms / 100) 2.227 -> 2.231 ( +0.18%) [ +0.22% +0.22% +0.00% / +0.18% +0.18% +0.40%] index_select spread : Elapsed 0.022 ms (2.232 ms / 100) 2.202 -> 2.206 ( +0.18%) [ +0.14% +0.09% +0.00% / +0.18% +0.36% +0.41%] index_select strided 3 : Elapsed 0.022 ms (2.205 ms / 100) 2.227 -> 2.232 ( +0.22%) [ +0.09% +0.09% +0.00% / +0.22% +0.40% +0.36%] index_select strided 5 : Elapsed 0.022 ms (2.229 ms / 100) 2.210 -> 2.211 ( +0.05%) [ +0.23% +0.00% +0.05% / +0.05% +0.09% +0.14%] index_select strided 7 : Elapsed 0.022 ms (2.215 ms / 100) 2.211 -> 2.216 ( +0.23%) [ +0.18% +0.00% +0.18% / +0.27% +0.23% +0.27%] index_select strided 8 : Elapsed 0.022 ms (2.215 ms / 100) 2.230 -> 2.229 ( -0.04%) [ +0.09% +0.00% +0.04% / +0.04% +0.18% -0.04%] index_select strided 16 : Elapsed 0.022 ms (2.232 ms / 100) 2.176 -> 2.175 ( -0.05%) [ +0.00% +0.05% +0.23% / +0.23% +0.14% -0.05%] index_select random : Elapsed 0.022 ms (2.176 ms / 100) 2.179 -> 2.176 ( -0.14%) [ +0.28% +0.00% +0.14% / -0.05% -0.09% -0.14%] index_select random_sorted : Elapsed 0.022 ms (2.185 ms / 100) 2.176 -> 2.180 ( +0.18%) [ +0.46% +0.23% +0.00% / +0.18% +0.18% +0.23%] index_select perm : Elapsed 0.022 ms (2.186 ms / 100) 2.215 -> 2.210 ( -0.23%) [ +0.05% +0.00% +0.05% / -0.18% -0.23% -0.05%] index_select perm_sorted : Elapsed 0.022 ms (2.216 ms / 100) B = [16, 4, 40, 5] (stride (4, 1, 320, 64)) A = [16, 20, 40, 5] (stride (4000, 200, 1, 40)) dim = 1 1.942 -> 1.945 ( +0.15%) [ +0.36% +0.21% +0.00% / +0.15% +0.46% +0.21%] index_select const : Elapsed 0.019 ms (1.949 ms / 100) 1.960 -> 1.968 ( +0.41%) [ +0.41% +0.41% +0.00% / +0.46% +0.41% +0.71%] index_select wrap : Elapsed 0.020 ms (1.968 ms / 100) 1.967 -> 1.969 ( +0.10%) [ +0.36% +0.20% +0.00% / +0.10% +0.51% +0.31%] index_select linear : Elapsed 0.020 ms (1.974 ms / 100) 1.966 -> 1.968 ( +0.10%) [ +0.31% +0.00% +0.31% / +0.10% +0.41% +0.51%] index_select reverse : Elapsed 0.020 ms (1.972 ms / 100) 1.943 -> 1.944 ( +0.05%) [ +0.31% +0.00% +0.00% / +0.05% +0.46% +0.31%] index_select skip64 : Elapsed 0.019 ms (1.949 ms / 100) 1.939 -> 1.942 ( +0.15%) [ +0.00% +0.36% +0.10% / +0.15% +0.52% +0.62%] index_select skip256 : Elapsed 0.019 ms (1.939 ms / 100) 1.964 -> 1.958 ( -0.31%) [ +0.10% +0.10% +0.00% / -0.31% +0.31% +0.46%] index_select spread : Elapsed 0.020 ms (1.966 ms / 100) 1.971 -> 1.972 ( +0.05%) [ +0.46% +0.15% +0.00% / +0.05% +0.41% +0.15%] index_select strided 3 : Elapsed 0.020 ms (1.980 ms / 100) 1.968 -> 1.972 ( +0.20%) [ +0.10% +0.00% +0.10% / +0.20% +0.41% +0.41%] index_select strided 5 : Elapsed 0.020 ms (1.970 ms / 100) 1.969 -> 1.972 ( +0.15%) [ +0.05% +0.00% +0.00% / +0.15% +0.51% +0.61%] index_select strided 7 : Elapsed 0.020 ms (1.970 ms / 100) 1.971 -> 1.971 ( +0.00%) [ +0.00% +0.10% +0.10% / +0.00% +0.30% +0.30%] index_select strided 8 : Elapsed 0.020 ms (1.971 ms / 100) 1.969 -> 1.967 ( -0.10%) [ +0.20% +0.20% +0.00% / -0.10% +0.30% +0.20%] index_select strided 16 : Elapsed 0.020 ms (1.973 ms / 100) 1.966 -> 1.972 ( +0.31%) [ +0.25% +0.00% +0.05% / +0.31% +0.46% +0.46%] index_select random : Elapsed 0.020 ms (1.971 ms / 100) 1.966 -> 1.970 ( +0.20%) [ +0.10% +0.00% +0.10% / +0.20% +0.51% +0.36%] index_select random_sorted : Elapsed 0.020 ms (1.968 ms / 100) 1.965 -> 1.971 ( +0.31%) [ +0.00% +0.10% +0.05% / +0.31% +0.56% +0.71%] index_select perm : Elapsed 0.020 ms (1.965 ms / 100) 1.961 -> 1.965 ( +0.20%) [ +0.10% +0.10% +0.00% / +0.20% +0.61% +0.76%] index_select perm_sorted : Elapsed 0.020 ms (1.963 ms / 100) B = [16, 4, 40, 5] (stride (1, 640, 16, 2560)) A = [16, 20, 40, 5] (stride (200, 3200, 1, 40)) dim = 1 2.046 -> 2.048 ( +0.10%) [ +0.39% +0.00% +0.10% / +0.20% +0.15% +0.10%] index_select const : Elapsed 0.021 ms (2.054 ms / 100) 2.065 -> 2.072 ( +0.34%) [ +0.34% +0.53% +0.00% / +0.34% +0.39% +0.34%] index_select wrap : Elapsed 0.021 ms (2.072 ms / 100) 2.068 -> 2.069 ( +0.05%) [ +0.00% +0.05% +0.15% / +0.05% +0.10% +0.10%] index_select linear : Elapsed 0.021 ms (2.068 ms / 100) 2.070 -> 2.068 ( -0.10%) [ +0.00% +0.19% +0.00% / -0.10% -0.05% +0.00%] index_select reverse : Elapsed 0.021 ms (2.070 ms / 100) 2.051 -> 2.048 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.00% -0.05%] index_select skip64 : Elapsed 0.021 ms (2.051 ms / 100) 2.050 -> 2.048 ( -0.10%) [ +0.00% +0.00% +0.00% / -0.10% +0.15% +0.10%] index_select skip256 : Elapsed 0.021 ms (2.050 ms / 100) 2.054 -> 2.065 ( +0.54%) [ +0.39% +0.24% +0.00% / +0.54% +0.97% +1.07%] index_select spread : Elapsed 0.021 ms (2.062 ms / 100) 2.066 -> 2.071 ( +0.24%) [ +0.05% +0.24% +0.00% / +0.24% +0.34% +0.29%] index_select strided 3 : Elapsed 0.021 ms (2.067 ms / 100) 2.053 -> 2.058 ( +0.24%) [ +0.00% +0.05% +0.05% / +0.24% +0.88% +1.02%] index_select strided 5 : Elapsed 0.021 ms (2.053 ms / 100) 2.061 -> 2.067 ( +0.29%) [ +0.24% +0.00% +0.34% / +0.29% +0.68% +0.63%] index_select strided 7 : Elapsed 0.021 ms (2.066 ms / 100) 2.057 -> 2.055 ( -0.10%) [ +0.00% +0.00% +0.05% / -0.10% +1.17% +1.07%] index_select strided 8 : Elapsed 0.021 ms (2.057 ms / 100) 2.064 -> 2.071 ( +0.34%) [ +0.29% +0.29% +0.00% / +0.34% +0.48% +0.53%] index_select strided 16 : Elapsed 0.021 ms (2.070 ms / 100) 2.057 -> 2.061 ( +0.19%) [ +0.29% +0.10% +0.00% / +0.19% +0.68% +0.63%] index_select random : Elapsed 0.021 ms (2.063 ms / 100) 2.053 -> 2.058 ( +0.24%) [ +0.19% +0.34% +0.00% / +0.24% +0.63% +0.68%] index_select random_sorted : Elapsed 0.021 ms (2.057 ms / 100) 2.069 -> 2.067 ( -0.10%) [ +0.10% +0.19% +0.00% / +0.29% -0.05% -0.10%] index_select perm : Elapsed 0.021 ms (2.071 ms / 100) 2.075 -> 2.075 ( +0.00%) [ +0.00% +0.05% +0.10% / +0.24% +0.24% +0.00%] index_select perm_sorted : Elapsed 0.021 ms (2.075 ms / 100) B = [16, 4, 40, 5] (stride (4, 1, 64, 2560)) A = [16, 20, 40, 5] (stride (40, 3200, 1, 640)) dim = 1 2.037 -> 2.039 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.83% +0.64%] index_select const : Elapsed 0.020 ms (2.038 ms / 100) 2.075 -> 2.073 ( -0.10%) [ +0.14% +0.00% +0.24% / -0.10% +1.11% +1.06%] index_select wrap : Elapsed 0.021 ms (2.078 ms / 100) 2.078 -> 2.080 ( +0.10%) [ +0.19% +0.10% +0.00% / +0.10% +1.01% +1.06%] index_select linear : Elapsed 0.021 ms (2.082 ms / 100) 2.075 -> 2.080 ( +0.24%) [ +0.19% +0.24% +0.00% / +0.24% +0.82% +0.87%] index_select reverse : Elapsed 0.021 ms (2.079 ms / 100) 2.038 -> 2.037 ( -0.05%) [ +0.25% +0.05% +0.00% / -0.05% +0.69% +1.08%] index_select skip64 : Elapsed 0.020 ms (2.043 ms / 100) 2.039 -> 2.043 ( +0.20%) [ +0.25% +0.20% +0.00% / +0.20% +0.74% +1.03%] index_select skip256 : Elapsed 0.020 ms (2.044 ms / 100) 2.071 -> 2.073 ( +0.10%) [ +0.05% +0.00% +0.14% / +0.10% +0.87% +0.77%] index_select spread : Elapsed 0.021 ms (2.072 ms / 100) 2.072 -> 2.081 ( +0.43%) [ +0.24% +0.48% +0.00% / +0.43% +0.77% +0.97%] index_select strided 3 : Elapsed 0.021 ms (2.077 ms / 100) 2.080 -> 2.085 ( +0.24%) [ +0.14% +0.14% +0.00% / +0.24% +0.96% +0.62%] index_select strided 5 : Elapsed 0.021 ms (2.083 ms / 100) 2.096 -> 2.100 ( +0.19%) [ +0.10% +0.33% +0.00% / +0.19% +0.24% +0.29%] index_select strided 7 : Elapsed 0.021 ms (2.098 ms / 100) 2.076 -> 2.080 ( +0.19%) [ +0.39% +0.24% +0.00% / +0.19% +0.29% +0.72%] index_select strided 8 : Elapsed 0.021 ms (2.084 ms / 100) 2.075 -> 2.078 ( +0.14%) [ +0.24% +0.34% +0.00% / +0.14% +0.48% +0.43%] index_select strided 16 : Elapsed 0.021 ms (2.080 ms / 100) 2.076 -> 2.079 ( +0.14%) [ +0.10% +0.00% +0.05% / +0.14% +0.34% +0.48%] index_select random : Elapsed 0.021 ms (2.078 ms / 100) 2.080 -> 2.080 ( +0.00%) [ +0.24% +0.05% +0.00% / +0.00% +0.24% +0.19%] index_select random_sorted : Elapsed 0.021 ms (2.085 ms / 100) 2.099 -> 2.098 ( -0.05%) [ +0.00% +0.00% +0.05% / +0.10% +0.05% -0.05%] index_select perm : Elapsed 0.021 ms (2.099 ms / 100) 2.075 -> 2.075 ( +0.00%) [ +0.00% +0.10% +0.19% / +0.00% +0.00% +0.14%] index_select perm_sorted : Elapsed 0.021 ms (2.075 ms / 100) out_shape = [16, 20, 4, 5] in_shape = [16, 20, 40, 5] idx_dim = 2 B = [16, 20, 4, 5] (stride (400, 20, 1, 4)) A = [16, 20, 40, 5] (stride (100, 5, 1600, 1)) dim = 2 0.555 -> 0.556 ( +0.18%) [ +1.80% +0.36% +0.00% / +0.18% +0.18% +0.18%] index_select const : Elapsed 0.006 ms (0.565 ms / 100) 0.554 -> 0.553 ( -0.18%) [+15.70% +0.00% +0.00% / -0.18% +0.90% +0.72%] index_select wrap : Elapsed 0.006 ms (0.641 ms / 100) 0.553 -> 0.554 ( +0.18%) [ +5.42% +0.36% +0.00% / +0.18% +1.08% +1.08%] index_select linear : Elapsed 0.006 ms (0.583 ms / 100) 0.554 -> 0.554 ( +0.00%) [+13.36% +0.18% +0.00% / +0.00% +0.54% +0.54%] index_select reverse : Elapsed 0.006 ms (0.628 ms / 100) 0.554 -> 0.555 ( +0.18%) [ +0.00% +0.18% +9.57% / +0.18% +0.54% +0.54%] index_select skip64 : Elapsed 0.006 ms (0.554 ms / 100) 0.554 -> 0.555 ( +0.18%) [ +2.35% +0.18% +0.00% / +0.18% +0.72% +0.36%] index_select skip256 : Elapsed 0.006 ms (0.567 ms / 100) 0.554 -> 0.554 ( +0.00%) [+10.11% +0.18% +0.00% / +0.00% +0.54% +0.54%] index_select spread : Elapsed 0.006 ms (0.610 ms / 100) 0.554 -> 0.554 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.72% +0.54%] index_select strided 3 : Elapsed 0.006 ms (0.554 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +0.00% +0.18% +0.00% / +0.36% +0.54% +0.54%] index_select strided 5 : Elapsed 0.006 ms (0.554 ms / 100) 0.557 -> 0.556 ( -0.18%) [ +0.18% +0.18% +0.00% / +0.00% +0.54% -0.18%] index_select strided 7 : Elapsed 0.006 ms (0.558 ms / 100) 0.556 -> 0.557 ( +0.18%) [ +0.18% +0.36% +0.00% / +1.44% +0.18% +0.18%] index_select strided 8 : Elapsed 0.006 ms (0.557 ms / 100) 0.556 -> 0.557 ( +0.18%) [ +0.00% +0.18% +0.00% / +4.86% +0.18% +0.18%] index_select strided 16 : Elapsed 0.006 ms (0.556 ms / 100) 0.555 -> 0.556 ( +0.18%) [ +5.77% +1.62% +0.00% / +0.18% +0.54% +0.18%] index_select random : Elapsed 0.006 ms (0.587 ms / 100) 0.553 -> 0.558 ( +0.90%) [+18.26% +0.36% +0.00% / +0.90% +0.90% +1.08%] index_select random_sorted : Elapsed 0.007 ms (0.654 ms / 100) 0.553 -> 0.567 ( +2.53%) [ +0.36% +0.18% +0.00% / +12.30% +2.53% +5.42%] index_select perm : Elapsed 0.006 ms (0.555 ms / 100) 0.557 -> 0.558 ( +0.18%) [ +0.00% +0.00% +0.00% / +6.10% +0.18% +6.82%] index_select perm_sorted : Elapsed 0.006 ms (0.557 ms / 100) B = [16, 20, 4, 5] (stride (400, 5, 100, 1)) A = [16, 20, 40, 5] (stride (20, 1, 1600, 320)) dim = 2 1.194 -> 1.195 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.42%] index_select const : Elapsed 0.012 ms (1.195 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.33% +0.08% +0.00% / +0.08% +0.42% +0.59%] index_select wrap : Elapsed 0.012 ms (1.199 ms / 100) 1.195 -> 1.197 ( +0.17%) [ +0.25% +0.00% +0.00% / +0.17% +0.59% +0.50%] index_select linear : Elapsed 0.012 ms (1.198 ms / 100) 1.195 -> 1.198 ( +0.25%) [ +0.08% +0.00% +0.00% / +0.25% +0.50% +0.50%] index_select reverse : Elapsed 0.012 ms (1.196 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.59% +0.50%] index_select skip64 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.67% +0.59%] index_select skip256 : Elapsed 0.012 ms (1.195 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.75% +0.59%] index_select spread : Elapsed 0.012 ms (1.195 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.50% +0.42%] index_select strided 3 : Elapsed 0.012 ms (1.196 ms / 100) 1.195 -> 1.195 ( +0.00%) [ +0.17% +0.00% +0.08% / +0.00% +0.59% +0.42%] index_select strided 5 : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.33% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_select strided 7 : Elapsed 0.012 ms (1.199 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.17% +0.00% +0.08% / +0.08% +0.50% +0.42%] index_select strided 8 : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.42% +0.59%] index_select strided 16 : Elapsed 0.012 ms (1.196 ms / 100) 1.196 -> 1.197 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.42% +0.42%] index_select random : Elapsed 0.012 ms (1.197 ms / 100) 1.196 -> 1.197 ( +0.08%) [ +0.17% +0.00% +0.08% / +0.08% +0.33% +0.42%] index_select random_sorted : Elapsed 0.012 ms (1.198 ms / 100) 1.196 -> 1.197 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.42% +0.33%] index_select perm : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.195 ( +0.00%) [ +0.17% +0.08% +0.00% / +0.00% +0.42% +0.42%] index_select perm_sorted : Elapsed 0.012 ms (1.197 ms / 100) B = [16, 20, 4, 5] (stride (400, 5, 100, 1)) A = [16, 20, 40, 5] (stride (40, 640, 1, 12800)) dim = 2 1.368 -> 1.367 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.73% +0.51%] index_select const : Elapsed 0.014 ms (1.369 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.58% +0.07%] index_select wrap : Elapsed 0.014 ms (1.372 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.73% +0.66%] index_select linear : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.59% +0.59%] index_select reverse : Elapsed 0.014 ms (1.368 ms / 100) 1.368 -> 1.372 ( +0.29%) [ +0.22% +0.00% +0.22% / +0.29% +1.32% +0.58%] index_select skip64 : Elapsed 0.014 ms (1.371 ms / 100) 1.365 -> 1.367 ( +0.15%) [ +0.00% +0.15% +0.22% / +0.15% +0.73% +0.66%] index_select skip256 : Elapsed 0.014 ms (1.365 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.73% +0.73%] index_select spread : Elapsed 0.014 ms (1.369 ms / 100) 1.369 -> 1.372 ( +0.22%) [ +0.37% +0.15% +0.00% / +0.22% +0.73% +0.44%] index_select strided 3 : Elapsed 0.014 ms (1.374 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.29% +0.00% +0.07% / +0.15% +0.51% +0.37%] index_select strided 5 : Elapsed 0.014 ms (1.371 ms / 100) 1.366 -> 1.371 ( +0.37%) [ +0.00% +0.37% +0.51% / +0.37% +0.73% +0.59%] index_select strided 7 : Elapsed 0.014 ms (1.366 ms / 100) 1.368 -> 1.371 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.51% +0.73%] index_select strided 8 : Elapsed 0.014 ms (1.368 ms / 100) 1.369 -> 1.374 ( +0.37%) [ +0.37% +0.44% +0.00% / +0.37% +1.10% +1.17%] index_select strided 16 : Elapsed 0.014 ms (1.374 ms / 100) 1.364 -> 1.366 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.81% +0.66%] index_select random : Elapsed 0.014 ms (1.366 ms / 100) 1.369 -> 1.371 ( +0.15%) [ +0.00% +0.00% +0.22% / +0.15% +0.80% +0.37%] index_select random_sorted : Elapsed 0.014 ms (1.369 ms / 100) 1.367 -> 1.365 ( -0.15%) [ +0.00% +0.07% +0.00% / -0.15% +0.37% +0.44%] index_select perm : Elapsed 0.014 ms (1.367 ms / 100) 1.365 -> 1.368 ( +0.22%) [ +0.00% +0.22% +0.15% / +0.22% +0.73% +0.37%] index_select perm_sorted : Elapsed 0.014 ms (1.365 ms / 100) B = [16, 20, 4, 5] (stride (400, 4, 1, 80)) dim = 2 fill_cnt = 40 2.727 -> 2.720 ( -0.26%) [ +0.04% +0.04% +0.00% / -0.26% +0.33% +0.11%] index_fill_ const : Elapsed 0.027 ms (2.728 ms / 100) 2.717 -> 2.710 ( -0.26%) [ +0.00% +0.00% +0.00% / -0.26% +0.22% +0.04%] index_fill_ linear : Elapsed 0.027 ms (2.717 ms / 100) 2.712 -> 2.712 ( +0.00%) [ +0.22% +0.33% +0.00% / +0.00% +0.52% +0.41%] index_fill_ reverse : Elapsed 0.027 ms (2.718 ms / 100) 2.728 -> 2.723 ( -0.18%) [ +0.18% +0.00% +0.00% / -0.18% +0.22% +0.11%] index_fill_ skip64 : Elapsed 0.027 ms (2.733 ms / 100) 2.736 -> 2.730 ( -0.22%) [ +0.00% +0.04% +0.04% / -0.22% -0.15% -0.07%] index_fill_ skip256 : Elapsed 0.027 ms (2.736 ms / 100) 2.721 -> 2.709 ( -0.44%) [ +0.07% +0.00% +0.00% / -0.44% +0.07% -0.04%] index_fill_ spread : Elapsed 0.027 ms (2.723 ms / 100) 2.718 -> 2.712 ( -0.22%) [ +0.11% +0.11% +0.00% / -0.22% +0.00% +0.15%] index_fill_ strided 3 : Elapsed 0.027 ms (2.721 ms / 100) 2.719 -> 2.715 ( -0.15%) [ +0.18% +0.15% +0.00% / -0.15% +0.00% +0.07%] index_fill_ random : Elapsed 0.027 ms (2.724 ms / 100) 2.722 -> 2.712 ( -0.37%) [ +0.37% +0.22% +0.00% / -0.37% +0.04% -0.11%] index_fill_ random_sorted : Elapsed 0.027 ms (2.732 ms / 100) B = [16, 20, 4, 5] (stride (20, 320, 5, 1)) A = [16, 20, 40, 5] (stride (1, 3200, 16, 640)) dim = 2 1.404 -> 1.406 ( +0.14%) [ +0.43% +0.00% +0.36% / +0.43% +0.14% +0.71%] index_select const : Elapsed 0.014 ms (1.410 ms / 100) 1.388 -> 1.392 ( +0.29%) [ +0.29% +0.22% +0.00% / +0.29% +0.65% +1.08%] index_select wrap : Elapsed 0.014 ms (1.392 ms / 100) 1.385 -> 1.390 ( +0.36%) [ +0.36% +0.00% +0.07% / +0.36% +0.58% +0.43%] index_select linear : Elapsed 0.014 ms (1.390 ms / 100) 1.364 -> 1.367 ( +0.22%) [ +0.00% +0.07% +0.15% / +0.22% +0.66% +0.44%] index_select reverse : Elapsed 0.014 ms (1.364 ms / 100) 1.406 -> 1.405 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.50% +0.28%] index_select skip64 : Elapsed 0.014 ms (1.406 ms / 100) 1.406 -> 1.406 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.50% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.407 ms / 100) 1.375 -> 1.378 ( +0.22%) [ +0.29% +0.29% +0.00% / +0.22% +0.73% +1.02%] index_select spread : Elapsed 0.014 ms (1.379 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +0.51% +0.37%] index_select strided 3 : Elapsed 0.014 ms (1.370 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.29% +0.00% +0.22% / +0.00% +0.88% +0.58%] index_select strided 5 : Elapsed 0.014 ms (1.373 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.51% +0.44%] index_select strided 7 : Elapsed 0.014 ms (1.369 ms / 100) 1.370 -> 1.372 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.58% +0.44%] index_select strided 8 : Elapsed 0.014 ms (1.372 ms / 100) 1.377 -> 1.381 ( +0.29%) [ +0.15% +0.00% +0.07% / +0.29% +0.73% +0.58%] index_select strided 16 : Elapsed 0.014 ms (1.379 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.44% +0.37%] index_select random : Elapsed 0.014 ms (1.369 ms / 100) 1.358 -> 1.357 ( -0.07%) [ +0.22% +0.07% +0.00% / -0.07% +0.52% +0.22%] index_select random_sorted : Elapsed 0.014 ms (1.361 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.44%] index_select perm : Elapsed 0.014 ms (1.369 ms / 100) 1.363 -> 1.363 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.37% +0.44%] index_select perm_sorted : Elapsed 0.014 ms (1.364 ms / 100) B = [16, 20, 4, 5] (stride (80, 4, 1, 1280)) A = [16, 20, 40, 5] (stride (800, 40, 1, 12800)) dim = 2 0.594 -> 0.598 ( +0.67%) [ +2.19% +0.17% +0.00% / +0.67% +1.01% +1.18%] index_select const : Elapsed 0.006 ms (0.607 ms / 100) 0.599 -> 0.600 ( +0.17%) [ +0.33% +0.00% +0.33% / +0.17% +0.17% +0.50%] index_select wrap : Elapsed 0.006 ms (0.601 ms / 100) 0.601 -> 0.599 ( -0.33%) [ +0.00% +0.00% +1.33% / -0.33% +0.00% +0.00%] index_select linear : Elapsed 0.006 ms (0.601 ms / 100) 0.598 -> 0.601 ( +0.50%) [ +0.33% +0.67% +0.00% / +0.50% +0.67% +0.50%] index_select reverse : Elapsed 0.006 ms (0.600 ms / 100) 0.600 -> 0.600 ( +0.00%) [ +0.00% +0.00% +0.17% / +0.17% +0.17% +0.00%] index_select skip64 : Elapsed 0.006 ms (0.600 ms / 100) 0.598 -> 0.601 ( +0.50%) [ +0.50% +0.00% +0.67% / +0.50% +0.50% +0.50%] index_select skip256 : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.599 ( +0.00%) [ +0.17% +0.00% +0.33% / +0.17% +1.67% +0.00%] index_select spread : Elapsed 0.006 ms (0.600 ms / 100) 0.601 -> 0.600 ( -0.17%) [ +0.17% +0.00% +0.00% / +0.33% +0.00% -0.17%] index_select strided 3 : Elapsed 0.006 ms (0.602 ms / 100) 0.601 -> 0.601 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.33% +0.00% +0.17%] index_select strided 5 : Elapsed 0.006 ms (0.602 ms / 100) 0.599 -> 0.599 ( +0.00%) [ +0.50% +0.00% +0.17% / +0.00% +0.50% +0.33%] index_select strided 7 : Elapsed 0.006 ms (0.602 ms / 100) 0.595 -> 0.597 ( +0.34%) [ +0.34% +0.17% +0.00% / +0.34% +0.84% +0.84%] index_select strided 8 : Elapsed 0.006 ms (0.597 ms / 100) 0.598 -> 0.599 ( +0.17%) [ +0.84% +0.33% +0.00% / +0.17% +0.17% +0.50%] index_select strided 16 : Elapsed 0.006 ms (0.603 ms / 100) 0.602 -> 0.600 ( -0.33%) [ +0.00% +0.00% +0.00% / -0.33% -0.17% +0.00%] index_select random : Elapsed 0.006 ms (0.602 ms / 100) 0.600 -> 0.598 ( -0.33%) [ +0.33% +0.00% +0.00% / +0.17% -0.33% +1.83%] index_select random_sorted : Elapsed 0.006 ms (0.602 ms / 100) 0.597 -> 0.597 ( +0.00%) [ +0.34% +0.50% +0.00% / +0.00% +0.50% +0.50%] index_select perm : Elapsed 0.006 ms (0.599 ms / 100) 0.595 -> 0.595 ( +0.00%) [ +0.34% +0.50% +0.00% / +0.00% +1.18% +1.34%] index_select perm_sorted : Elapsed 0.006 ms (0.597 ms / 100) B = [16, 20, 4, 5] (stride (1, 16, 320, 1280)) A = [16, 20, 40, 5] (stride (40, 3200, 1, 640)) dim = 2 1.395 -> 1.399 ( +0.29%) [ +0.14% +0.00% +0.14% / +0.29% +0.57% +0.50%] index_select const : Elapsed 0.014 ms (1.397 ms / 100) 1.399 -> 1.399 ( +0.00%) [ +0.07% +0.36% +0.00% / +0.00% +1.14% +1.50%] index_select wrap : Elapsed 0.014 ms (1.400 ms / 100) 1.389 -> 1.394 ( +0.36%) [ +0.36% +0.00% +0.50% / +0.36% +1.15% +1.08%] index_select linear : Elapsed 0.014 ms (1.394 ms / 100) 1.396 -> 1.400 ( +0.29%) [ +0.21% +0.21% +0.00% / +0.29% +1.15% +0.86%] index_select reverse : Elapsed 0.014 ms (1.399 ms / 100) 1.389 -> 1.396 ( +0.50%) [ +0.29% +0.00% +0.58% / +0.50% +1.37% +1.30%] index_select skip64 : Elapsed 0.014 ms (1.393 ms / 100) 1.397 -> 1.402 ( +0.36%) [ +0.21% +0.00% +0.36% / +0.36% +0.79% +0.79%] index_select skip256 : Elapsed 0.014 ms (1.400 ms / 100) 1.397 -> 1.398 ( +0.07%) [ +0.29% +0.14% +0.00% / +0.07% +0.72% +0.57%] index_select spread : Elapsed 0.014 ms (1.401 ms / 100) 1.392 -> 1.394 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +1.08% +0.86%] index_select strided 3 : Elapsed 0.014 ms (1.393 ms / 100) 1.400 -> 1.401 ( +0.07%) [ +0.14% +0.21% +0.00% / +0.07% +0.50% +1.21%] index_select strided 5 : Elapsed 0.014 ms (1.402 ms / 100) 1.393 -> 1.396 ( +0.22%) [ +0.29% +0.00% +0.29% / +0.22% +0.72% +1.01%] index_select strided 7 : Elapsed 0.014 ms (1.397 ms / 100) 1.390 -> 1.390 ( +0.00%) [ +0.00% +0.14% +0.14% / +0.00% +0.58% +0.65%] index_select strided 8 : Elapsed 0.014 ms (1.390 ms / 100) 1.385 -> 1.386 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.22% +0.58%] index_select strided 16 : Elapsed 0.014 ms (1.387 ms / 100) 1.395 -> 1.397 ( +0.14%) [ +0.22% +0.00% +0.07% / +0.14% +0.36% +0.50%] index_select random : Elapsed 0.014 ms (1.398 ms / 100) 1.398 -> 1.399 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.57% +0.43%] index_select random_sorted : Elapsed 0.014 ms (1.400 ms / 100) 1.391 -> 1.392 ( +0.07%) [ +0.00% +0.29% +0.07% / +0.07% +0.65% +0.43%] index_select perm : Elapsed 0.014 ms (1.391 ms / 100) 1.396 -> 1.400 ( +0.29%) [ +0.29% +0.00% +0.21% / +0.29% +0.64% +0.29%] index_select perm_sorted : Elapsed 0.014 ms (1.400 ms / 100) out_shape = [16, 20, 40, 4] in_shape = [16, 20, 40, 5] idx_dim = 3 B = [16, 20, 40, 4] (stride (3200, 160, 1, 40)) A = [16, 20, 40, 5] (stride (4000, 40, 1, 800)) dim = 3 3.216 -> 3.220 ( +0.12%) [ +0.03% +0.00% +0.03% / +0.12% +0.28% +0.34%] index_select const : Elapsed 0.032 ms (3.217 ms / 100) 3.315 -> 3.313 ( -0.06%) [ +0.21% +0.00% +0.09% / -0.06% +0.21% +0.21%] index_select wrap : Elapsed 0.033 ms (3.322 ms / 100) 3.315 -> 3.318 ( +0.09%) [ +0.27% +0.00% +0.18% / +0.09% +0.27% +0.36%] index_select linear : Elapsed 0.033 ms (3.324 ms / 100) 3.272 -> 3.265 ( -0.21%) [ +0.03% +0.00% +0.06% / -0.21% +0.24% +0.31%] index_select reverse : Elapsed 0.033 ms (3.273 ms / 100) 3.216 -> 3.216 ( +0.00%) [ +0.09% +0.00% +0.28% / +0.00% +0.31% +0.37%] index_select skip64 : Elapsed 0.032 ms (3.219 ms / 100) 3.222 -> 3.220 ( -0.06%) [ +0.12% +0.12% +0.00% / +0.12% +0.03% -0.06%] index_select skip256 : Elapsed 0.032 ms (3.226 ms / 100) 3.320 -> 3.309 ( -0.33%) [ +0.00% +0.06% +0.15% / +0.00% -0.33% -0.21%] index_select spread : Elapsed 0.033 ms (3.320 ms / 100) 3.313 -> 3.305 ( -0.24%) [ +0.00% +0.18% +0.21% / -0.24% +0.00% +0.09%] index_select strided 3 : Elapsed 0.033 ms (3.313 ms / 100) 3.282 -> 3.263 ( -0.58%) [ +0.00% +0.21% +0.27% / +0.37% -0.15% -0.58%] index_select random : Elapsed 0.033 ms (3.282 ms / 100) 3.287 -> 3.293 ( +0.18%) [ +0.24% +0.00% +0.27% / +0.18% +0.27% +0.37%] index_select random_sorted : Elapsed 0.033 ms (3.295 ms / 100) 3.311 -> 3.303 ( -0.24%) [ +0.00% +0.12% +0.09% / +0.00% -0.24% -0.12%] index_select perm : Elapsed 0.033 ms (3.311 ms / 100) 3.326 -> 3.323 ( -0.09%) [ +0.00% +0.12% +0.12% / +0.06% +0.06% -0.09%] index_select perm_sorted : Elapsed 0.033 ms (3.326 ms / 100) B = [16, 20, 40, 4] (stride (3200, 4, 80, 1)) A = [16, 20, 40, 5] (stride (1, 3200, 16, 640)) dim = 3 5.788 -> 5.779 ( -0.16%) [ +0.05% +0.00% +0.12% / +0.03% -0.16% -0.09%] index_select const : Elapsed 0.058 ms (5.791 ms / 100) 5.832 -> 5.817 ( -0.26%) [ +0.09% +0.00% +0.15% / +0.10% -0.26% -0.19%] index_select wrap : Elapsed 0.058 ms (5.837 ms / 100) 5.830 -> 5.811 ( -0.33%) [ +0.00% +0.07% +0.14% / +0.00% -0.33% -0.07%] index_select linear : Elapsed 0.058 ms (5.830 ms / 100) 5.832 -> 5.813 ( -0.33%) [ +0.12% +0.00% +0.10% / +0.09% -0.33% -0.29%] index_select reverse : Elapsed 0.058 ms (5.839 ms / 100) 5.787 -> 5.780 ( -0.12%) [ +0.00% +0.02% +0.07% / +0.14% -0.07% -0.12%] index_select skip64 : Elapsed 0.058 ms (5.787 ms / 100) 5.787 -> 5.779 ( -0.14%) [ +0.00% +0.02% +0.17% / +0.09% -0.14% -0.03%] index_select skip256 : Elapsed 0.058 ms (5.787 ms / 100) 5.831 -> 5.824 ( -0.12%) [ +0.00% +0.05% +0.00% / +0.15% -0.12% -0.09%] index_select spread : Elapsed 0.058 ms (5.831 ms / 100) 5.820 -> 5.821 ( +0.02%) [ +0.00% +0.05% +0.29% / +0.15% +0.02% +0.05%] index_select strided 3 : Elapsed 0.058 ms (5.820 ms / 100) 5.818 -> 5.808 ( -0.17%) [ +0.00% +0.12% +0.15% / +0.26% -0.17% -0.15%] index_select random : Elapsed 0.058 ms (5.818 ms / 100) 5.819 -> 5.800 ( -0.33%) [ +0.00% +0.07% +0.17% / +0.14% -0.31% -0.33%] index_select random_sorted : Elapsed 0.058 ms (5.819 ms / 100) 5.831 -> 5.810 ( -0.36%) [ +0.00% +0.07% +0.24% / +0.03% -0.34% -0.36%] index_select perm : Elapsed 0.058 ms (5.831 ms / 100) 5.828 -> 5.816 ( -0.21%) [ +0.00% +0.00% +0.05% / +0.02% -0.21% -0.17%] index_select perm_sorted : Elapsed 0.058 ms (5.828 ms / 100) B = [16, 20, 40, 4] (stride (3200, 1, 80, 20)) dim = 3 fill_cnt = 5 3.699 -> 3.708 ( +0.24%) [ +0.00% +0.24% +0.11% / +0.24% +0.32% +0.41%] index_fill_ const : Elapsed 0.037 ms (3.699 ms / 100) 3.739 -> 3.744 ( +0.13%) [ +0.00% +0.24% +0.24% / +0.29% +0.35% +0.13%] index_fill_ linear : Elapsed 0.037 ms (3.739 ms / 100) 3.745 -> 3.747 ( +0.05%) [ +0.00% +0.11% +0.16% / +0.19% +0.32% +0.05%] index_fill_ reverse : Elapsed 0.037 ms (3.745 ms / 100) 3.701 -> 3.707 ( +0.16%) [ +0.00% +0.16% +0.24% / +0.19% +0.16% +0.16%] index_fill_ skip64 : Elapsed 0.037 ms (3.701 ms / 100) 3.703 -> 3.700 ( -0.08%) [ +0.00% +0.05% +0.30% / -0.08% +0.00% +0.16%] index_fill_ skip256 : Elapsed 0.037 ms (3.703 ms / 100) 3.713 -> 3.712 ( -0.03%) [ +0.00% +0.11% +0.03% / -0.03% +0.11% +0.19%] index_fill_ spread : Elapsed 0.037 ms (3.713 ms / 100) 3.719 -> 3.719 ( +0.00%) [ +0.08% +0.00% +0.11% / +0.00% +0.22% +0.11%] index_fill_ strided 3 : Elapsed 0.037 ms (3.722 ms / 100) 3.737 -> 3.741 ( +0.11%) [ +0.03% +0.00% +0.40% / +0.11% +0.27% +0.35%] index_fill_ random : Elapsed 0.037 ms (3.738 ms / 100) 3.743 -> 3.744 ( +0.03%) [ +0.11% +0.00% +0.21% / +0.03% +0.13% +0.11%] index_fill_ random_sorted : Elapsed 0.037 ms (3.747 ms / 100) B = [16, 20, 40, 4] (stride (3200, 1, 20, 800)) A = [16, 20, 40, 5] (stride (1, 80, 1600, 16)) dim = 3 5.754 -> 5.754 ( +0.00%) [ +0.02% +0.03% +0.00% / +0.00% +0.09% +0.03%] index_select const : Elapsed 0.058 ms (5.755 ms / 100) 5.823 -> 5.831 ( +0.14%) [ +0.09% +0.00% +0.07% / +0.14% +0.39% +0.38%] index_select wrap : Elapsed 0.058 ms (5.828 ms / 100) 5.826 -> 5.833 ( +0.12%) [ +0.00% +0.00% +0.07% / +0.12% +0.26% +0.34%] index_select linear : Elapsed 0.058 ms (5.826 ms / 100) 5.832 -> 5.833 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.12% +0.02% +0.10%] index_select reverse : Elapsed 0.058 ms (5.835 ms / 100) 5.756 -> 5.758 ( +0.03%) [ +0.00% +0.02% +0.21% / +0.07% +0.03% +0.10%] index_select skip64 : Elapsed 0.058 ms (5.756 ms / 100) 5.758 -> 5.755 ( -0.05%) [ +0.00% +0.00% +0.02% / -0.05% +0.02% +0.03%] index_select skip256 : Elapsed 0.058 ms (5.758 ms / 100) 5.830 -> 5.826 ( -0.07%) [ +0.15% +0.00% +0.05% / -0.07% +0.19% +0.24%] index_select spread : Elapsed 0.058 ms (5.839 ms / 100) 5.856 -> 5.866 ( +0.17%) [ +0.05% +0.00% +0.09% / +0.17% +0.27% +0.26%] index_select strided 3 : Elapsed 0.059 ms (5.859 ms / 100) 5.859 -> 5.869 ( +0.17%) [ +0.05% +0.15% +0.00% / +0.38% +0.17% +0.20%] index_select random : Elapsed 0.059 ms (5.862 ms / 100) 5.821 -> 5.836 ( +0.26%) [ +0.26% +0.00% +0.29% / +0.26% +0.40% +0.43%] index_select random_sorted : Elapsed 0.058 ms (5.836 ms / 100) 5.844 -> 5.852 ( +0.14%) [ +0.10% +0.00% +0.07% / +0.14% +0.34% +0.26%] index_select perm : Elapsed 0.059 ms (5.850 ms / 100) 5.828 -> 5.827 ( -0.02%) [ +0.00% +0.03% +0.21% / -0.02% +0.34% +0.36%] index_select perm_sorted : Elapsed 0.058 ms (5.828 ms / 100) B = [16, 20, 40, 4] (stride (80, 4, 1280, 1)) A = [16, 20, 40, 5] (stride (40, 640, 1, 12800)) dim = 3 5.512 -> 5.510 ( -0.04%) [ +0.05% +0.00% +0.02% / +0.00% +0.05% -0.04%] index_select const : Elapsed 0.055 ms (5.515 ms / 100) 5.569 -> 5.574 ( +0.09%) [ +0.16% +0.00% +0.23% / +0.23% +0.09% +0.13%] index_select wrap : Elapsed 0.056 ms (5.578 ms / 100) 5.576 -> 5.574 ( -0.04%) [ +0.00% +0.02% +0.07% / -0.04% +0.07% +0.13%] index_select linear : Elapsed 0.056 ms (5.576 ms / 100) 5.577 -> 5.578 ( +0.02%) [ +0.00% +0.09% +0.16% / +0.02% +0.18% +0.23%] index_select reverse : Elapsed 0.056 ms (5.577 ms / 100) 5.500 -> 5.507 ( +0.13%) [ +0.13% +0.00% +0.31% / +0.22% +0.15% +0.13%] index_select skip64 : Elapsed 0.055 ms (5.507 ms / 100) 5.514 -> 5.507 ( -0.13%) [ +0.00% +0.04% +0.00% / +0.04% -0.13% +0.04%] index_select skip256 : Elapsed 0.055 ms (5.514 ms / 100) 5.580 -> 5.580 ( +0.00%) [ +0.04% +0.00% +0.16% / +0.16% +0.04% +0.00%] index_select spread : Elapsed 0.056 ms (5.582 ms / 100) 5.579 -> 5.580 ( +0.02%) [ +0.05% +0.00% +0.22% / +0.02% +0.04% +0.02%] index_select strided 3 : Elapsed 0.056 ms (5.582 ms / 100) 5.545 -> 5.543 ( -0.04%) [ +0.04% +0.00% +0.05% / +0.22% -0.04% +0.05%] index_select random : Elapsed 0.055 ms (5.547 ms / 100) 5.548 -> 5.547 ( -0.02%) [ +0.02% +0.00% +0.20% / -0.02% +0.09% +0.02%] index_select random_sorted : Elapsed 0.055 ms (5.549 ms / 100) 5.580 -> 5.579 ( -0.02%) [ +0.07% +0.00% +0.09% / -0.02% +0.25% +0.25%] index_select perm : Elapsed 0.056 ms (5.584 ms / 100) 5.575 -> 5.579 ( +0.07%) [ +0.09% +0.00% +0.05% / +0.20% +0.18% +0.07%] index_select perm_sorted : Elapsed 0.056 ms (5.580 ms / 100) B = [16, 20, 40, 4] (stride (1, 64, 1280, 16)) A = [16, 20, 40, 5] (stride (5, 3200, 80, 1)) dim = 3 5.749 -> 5.738 ( -0.19%) [ +0.10% +0.00% +0.17% / +0.10% +0.00% -0.19%] index_select const : Elapsed 0.058 ms (5.755 ms / 100) 5.752 -> 5.741 ( -0.19%) [ +0.00% +0.05% +0.10% / +0.17% -0.19% -0.17%] index_select wrap : Elapsed 0.058 ms (5.752 ms / 100) 5.750 -> 5.749 ( -0.02%) [ +0.00% +0.14% +0.10% / +0.10% -0.02% +0.00%] index_select linear : Elapsed 0.057 ms (5.750 ms / 100) 5.747 -> 5.740 ( -0.12%) [ +0.14% +0.00% +0.14% / +0.10% -0.07% -0.12%] index_select reverse : Elapsed 0.058 ms (5.755 ms / 100) 5.753 -> 5.740 ( -0.23%) [ +0.02% +0.00% +0.07% / +0.03% -0.21% -0.23%] index_select skip64 : Elapsed 0.058 ms (5.754 ms / 100) 5.750 -> 5.739 ( -0.19%) [ +0.09% +0.00% +0.07% / +0.19% -0.19% -0.16%] index_select skip256 : Elapsed 0.058 ms (5.755 ms / 100) 5.748 -> 5.742 ( -0.10%) [ +0.12% +0.00% +0.28% / +0.14% -0.10% +0.02%] index_select spread : Elapsed 0.058 ms (5.755 ms / 100) 5.754 -> 5.745 ( -0.16%) [ +0.00% +0.10% +0.03% / -0.03% -0.16% -0.16%] index_select strided 3 : Elapsed 0.058 ms (5.754 ms / 100) 5.748 -> 5.743 ( -0.09%) [ +0.00% +0.12% +0.12% / +0.09% -0.09% -0.09%] index_select random : Elapsed 0.057 ms (5.748 ms / 100) 5.751 -> 5.742 ( -0.16%) [ +0.00% +0.12% +0.16% / +0.03% -0.16% -0.12%] index_select random_sorted : Elapsed 0.058 ms (5.751 ms / 100) 5.750 -> 5.740 ( -0.17%) [ +0.00% +0.07% +0.09% / +0.19% -0.09% -0.17%] index_select perm : Elapsed 0.058 ms (5.750 ms / 100) 5.750 -> 5.746 ( -0.07%) [ +0.14% +0.00% +0.03% / +0.10% -0.07% -0.02%] index_select perm_sorted : Elapsed 0.058 ms (5.758 ms / 100) B = [16, 20, 40, 4] (stride (1, 64, 1280, 16)) A = [16, 20, 40, 5] (stride (800, 1, 20, 12800)) dim = 3 5.725 -> 5.723 ( -0.03%) [ +0.07% +0.03% +0.00% / +0.12% +0.03% -0.03%] index_select const : Elapsed 0.057 ms (5.729 ms / 100) 5.756 -> 5.748 ( -0.14%) [ +0.10% +0.00% +0.17% / +0.19% +0.09% -0.14%] index_select wrap : Elapsed 0.058 ms (5.762 ms / 100) 5.763 -> 5.755 ( -0.14%) [ +0.07% +0.00% +0.07% / +0.07% +0.09% -0.14%] index_select linear : Elapsed 0.058 ms (5.767 ms / 100) 5.756 -> 5.755 ( -0.02%) [ +0.00% +0.19% +0.14% / +0.19% -0.02% +0.00%] index_select reverse : Elapsed 0.058 ms (5.756 ms / 100) 5.728 -> 5.717 ( -0.19%) [ +0.09% +0.05% +0.00% / +0.07% -0.19% -0.03%] index_select skip64 : Elapsed 0.057 ms (5.733 ms / 100) 5.722 -> 5.727 ( +0.09%) [ +0.14% +0.00% +0.30% / +0.19% +0.09% +0.12%] index_select skip256 : Elapsed 0.057 ms (5.730 ms / 100) 5.761 -> 5.751 ( -0.17%) [ +0.00% +0.05% +0.16% / +0.05% +0.00% -0.17%] index_select spread : Elapsed 0.058 ms (5.761 ms / 100) 5.767 -> 5.751 ( -0.28%) [ +0.14% +0.00% +0.03% / +0.12% -0.17% -0.28%] index_select strided 3 : Elapsed 0.058 ms (5.775 ms / 100) 5.755 -> 5.761 ( +0.10%) [ +0.00% +0.12% +0.10% / +0.10% +0.16% +0.17%] index_select random : Elapsed 0.058 ms (5.755 ms / 100) 5.751 -> 5.761 ( +0.17%) [ +0.07% +0.00% +0.12% / +0.17% +0.30% +0.21%] index_select random_sorted : Elapsed 0.058 ms (5.755 ms / 100) 5.767 -> 5.759 ( -0.14%) [ +0.12% +0.00% +0.10% / +0.12% -0.09% -0.14%] index_select perm : Elapsed 0.058 ms (5.774 ms / 100) 5.751 -> 5.763 ( +0.21%) [ +0.07% +0.00% +0.07% / +0.24% +0.21% +0.21%] index_select perm_sorted : Elapsed 0.058 ms (5.755 ms / 100) out_shape = [4, 40, 5, 20] in_shape = [16, 40, 5, 20] idx_dim = 0 B = [4, 40, 5, 20] (stride (4000, 20, 800, 1)) A = [16, 40, 5, 20] (stride (100, 1600, 20, 1)) dim = 0 2.266 -> 2.270 ( +0.18%) [ +0.31% +0.00% +0.09% / +0.22% +0.26% +0.18%] index_select const : Elapsed 0.023 ms (2.273 ms / 100) 2.334 -> 2.332 ( -0.09%) [ +0.09% +0.09% +0.00% / -0.09% +0.17% +0.39%] index_select wrap : Elapsed 0.023 ms (2.336 ms / 100) 2.333 -> 2.335 ( +0.09%) [ +0.21% +0.17% +0.00% / +0.09% +0.34% +0.21%] index_select linear : Elapsed 0.023 ms (2.338 ms / 100) 2.332 -> 2.332 ( +0.00%) [ +0.00% +0.21% +0.04% / +0.00% +0.34% +0.39%] index_select reverse : Elapsed 0.023 ms (2.332 ms / 100) 2.273 -> 2.274 ( +0.04%) [ +0.18% +0.04% +0.00% / +0.18% +0.04% +0.22%] index_select skip64 : Elapsed 0.023 ms (2.277 ms / 100) 2.269 -> 2.270 ( +0.04%) [ +0.26% +0.13% +0.00% / +0.04% +0.13% +0.18%] index_select skip256 : Elapsed 0.023 ms (2.275 ms / 100) 2.337 -> 2.337 ( +0.00%) [ +0.21% +0.09% +0.00% / +0.04% +0.17% +0.00%] index_select spread : Elapsed 0.023 ms (2.342 ms / 100) 2.336 -> 2.336 ( +0.00%) [ +0.13% +0.00% +0.09% / +0.17% +0.00% +0.00%] index_select strided 3 : Elapsed 0.023 ms (2.339 ms / 100) 2.332 -> 2.338 ( +0.26%) [ +0.04% +0.04% +0.00% / +0.26% +0.26% +0.26%] index_select strided 5 : Elapsed 0.023 ms (2.333 ms / 100) 2.340 -> 2.338 ( -0.09%) [ +0.00% +0.26% +0.13% / +0.13% -0.04% -0.09%] index_select strided 7 : Elapsed 0.023 ms (2.340 ms / 100) 2.284 -> 2.285 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.18% +0.22% +0.04%] index_select strided 8 : Elapsed 0.023 ms (2.285 ms / 100) 2.331 -> 2.334 ( +0.13%) [ +0.00% +0.13% +0.09% / +0.13% +0.47% +0.34%] index_select random : Elapsed 0.023 ms (2.331 ms / 100) 2.332 -> 2.337 ( +0.21%) [ +0.34% +0.13% +0.00% / +0.43% +0.30% +0.21%] index_select random_sorted : Elapsed 0.023 ms (2.340 ms / 100) 2.334 -> 2.334 ( +0.00%) [ +0.34% +0.17% +0.00% / +0.30% +0.00% +0.17%] index_select perm : Elapsed 0.023 ms (2.342 ms / 100) 2.337 -> 2.334 ( -0.13%) [ +0.00% +0.00% +0.09% / +0.09% -0.13% +0.09%] index_select perm_sorted : Elapsed 0.023 ms (2.337 ms / 100) B = [4, 40, 5, 20] (stride (20, 80, 3200, 1)) A = [16, 40, 5, 20] (stride (1, 1600, 16, 80)) dim = 0 2.574 -> 2.577 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.12% +0.16%] index_select const : Elapsed 0.026 ms (2.577 ms / 100) 2.573 -> 2.573 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.16% +0.16%] index_select wrap : Elapsed 0.026 ms (2.577 ms / 100) 2.574 -> 2.574 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.23% +0.08%] index_select linear : Elapsed 0.026 ms (2.576 ms / 100) 2.573 -> 2.573 ( +0.00%) [ +0.04% +0.12% +0.00% / +0.00% +0.04% +0.23%] index_select reverse : Elapsed 0.026 ms (2.574 ms / 100) 2.574 -> 2.576 ( +0.08%) [ +0.31% +0.04% +0.00% / +0.08% +0.16% +0.16%] index_select skip64 : Elapsed 0.026 ms (2.582 ms / 100) 2.573 -> 2.574 ( +0.04%) [ +0.23% +0.00% +0.04% / +0.04% +0.23% +0.27%] index_select skip256 : Elapsed 0.026 ms (2.579 ms / 100) 2.593 -> 2.600 ( +0.27%) [ +0.39% +0.31% +0.00% / +0.27% +0.46% +0.50%] index_select spread : Elapsed 0.026 ms (2.603 ms / 100) 2.593 -> 2.597 ( +0.15%) [ +0.15% +0.00% +0.08% / +0.15% +0.35% +0.27%] index_select strided 3 : Elapsed 0.026 ms (2.597 ms / 100) 2.598 -> 2.599 ( +0.04%) [ +0.08% +0.00% +0.15% / +0.04% +0.46% +0.35%] index_select strided 5 : Elapsed 0.026 ms (2.600 ms / 100) 2.599 -> 2.595 ( -0.15%) [ +0.04% +0.00% +0.15% / -0.15% +0.35% +0.50%] index_select strided 7 : Elapsed 0.026 ms (2.600 ms / 100) 2.605 -> 2.612 ( +0.27%) [ +0.00% +0.12% +0.08% / +0.27% +0.35% +0.42%] index_select strided 8 : Elapsed 0.026 ms (2.605 ms / 100) 2.607 -> 2.606 ( -0.04%) [ +0.15% +0.00% +0.00% / +0.04% +0.38% -0.04%] index_select random : Elapsed 0.026 ms (2.611 ms / 100) 2.598 -> 2.604 ( +0.23%) [ +0.12% +0.04% +0.00% / +0.23% +0.31% +0.27%] index_select random_sorted : Elapsed 0.026 ms (2.601 ms / 100) 2.604 -> 2.606 ( +0.08%) [ +0.15% +0.19% +0.00% / +0.08% +0.23% +0.27%] index_select perm : Elapsed 0.026 ms (2.608 ms / 100) 2.606 -> 2.609 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.23% +0.31%] index_select perm_sorted : Elapsed 0.026 ms (2.609 ms / 100) B = [4, 40, 5, 20] (stride (200, 5, 1, 800)) A = [16, 40, 5, 20] (stride (200, 1, 40, 3200)) dim = 0 2.433 -> 2.437 ( +0.16%) [ +0.00% +0.12% +0.16% / +0.16% +0.41% +0.37%] index_select const : Elapsed 0.024 ms (2.433 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.12% +0.08% +0.00% / +0.20% +0.16% +0.16%] index_select wrap : Elapsed 0.024 ms (2.449 ms / 100) 2.443 -> 2.450 ( +0.29%) [ +0.20% +0.25% +0.00% / +0.29% +0.33% +0.41%] index_select linear : Elapsed 0.024 ms (2.448 ms / 100) 2.442 -> 2.444 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.08% +0.66% +0.61%] index_select reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.437 -> 2.438 ( +0.04%) [ +0.00% +0.04% +0.16% / +0.04% +0.37% +0.29%] index_select skip64 : Elapsed 0.024 ms (2.437 ms / 100) 2.434 -> 2.439 ( +0.21%) [ +0.00% +0.12% +0.16% / +0.21% +0.41% +0.37%] index_select skip256 : Elapsed 0.024 ms (2.434 ms / 100) 2.437 -> 2.441 ( +0.16%) [ +0.00% +0.12% +0.25% / +0.16% +0.33% +0.29%] index_select spread : Elapsed 0.024 ms (2.437 ms / 100) 2.449 -> 2.453 ( +0.16%) [ +0.24% +0.20% +0.00% / +0.16% +0.53% +0.37%] index_select strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.448 -> 2.454 ( +0.25%) [ +0.08% +0.04% +0.00% / +0.25% +0.37% +0.37%] index_select strided 5 : Elapsed 0.025 ms (2.450 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.12% +0.00% +0.12% / +0.08% +0.33% +0.37%] index_select strided 7 : Elapsed 0.024 ms (2.449 ms / 100) 2.436 -> 2.436 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.21% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.436 ms / 100) 2.440 -> 2.440 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.29% +0.29%] index_select random : Elapsed 0.024 ms (2.440 ms / 100) 2.441 -> 2.441 ( +0.00%) [ +0.16% +0.04% +0.00% / +0.00% +0.08% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.445 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.08% +0.12% +0.00% / +0.04% +0.29% +0.16%] index_select perm : Elapsed 0.024 ms (2.450 ms / 100) 2.445 -> 2.444 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.29% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.446 ms / 100) B = [4, 40, 5, 20] (stride (1, 4, 160, 800)) A = [16, 40, 5, 20] (stride (4000, 20, 800, 1)) dim = 0 2.406 -> 2.406 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.25% +0.42%] index_select const : Elapsed 0.024 ms (2.406 ms / 100) 2.456 -> 2.458 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.12% +0.16%] index_select wrap : Elapsed 0.025 ms (2.456 ms / 100) 2.457 -> 2.459 ( +0.08%) [ +0.33% +0.16% +0.00% / +0.20% +0.08% +0.24%] index_select linear : Elapsed 0.025 ms (2.465 ms / 100) 2.456 -> 2.460 ( +0.16%) [ +0.00% +0.24% +0.00% / +0.33% +0.16% +0.29%] index_select reverse : Elapsed 0.025 ms (2.456 ms / 100) 2.405 -> 2.414 ( +0.37%) [ +0.12% +0.00% +0.08% / +0.37% +0.42% +0.37%] index_select skip64 : Elapsed 0.024 ms (2.408 ms / 100) 2.406 -> 2.406 ( +0.00%) [ +0.21% +0.00% +0.08% / +0.00% +0.29% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.411 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.16% +0.00% +0.12% / +0.04% +0.37% +0.04%] index_select spread : Elapsed 0.025 ms (2.459 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.29% +0.29%] index_select strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.455 -> 2.452 ( -0.12%) [ +0.08% +0.00% +0.16% / -0.12% +0.41% +0.37%] index_select strided 5 : Elapsed 0.025 ms (2.457 ms / 100) 2.454 -> 2.457 ( +0.12%) [ +0.04% +0.24% +0.00% / +0.12% +0.45% +0.37%] index_select strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.415 -> 2.419 ( +0.17%) [ +0.00% +0.08% +0.04% / +0.17% +0.21% +0.33%] index_select strided 8 : Elapsed 0.024 ms (2.415 ms / 100) 2.457 -> 2.456 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.12% +0.24%] index_select random : Elapsed 0.025 ms (2.458 ms / 100) 2.452 -> 2.455 ( +0.12%) [ +0.12% +0.24% +0.00% / +0.12% +0.24% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +0.24% +0.28%] index_select perm : Elapsed 0.025 ms (2.461 ms / 100) 2.456 -> 2.460 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.37% +0.37%] index_select perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) out_shape = [16, 4, 5, 20] in_shape = [16, 40, 5, 20] idx_dim = 1 B = [16, 4, 5, 20] (stride (400, 100, 20, 1)) A = [16, 40, 5, 20] (stride (4000, 100, 1, 5)) dim = 1 1.192 -> 1.195 ( +0.25%) [ +0.17% +0.17% +0.00% / +0.25% +0.50% +0.59%] index_select const : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.50%] index_select wrap : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.59% +0.50%] index_select linear : Elapsed 0.012 ms (1.194 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.17% +0.00% +0.25% / +0.17% +0.84% +0.76%] index_select reverse : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.67% +0.59%] index_select skip64 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.67% +0.59%] index_select skip256 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.76% +0.67%] index_select spread : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.59% +0.59%] index_select strided 3 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.59% +0.59%] index_select strided 5 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.195 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.59% +0.50%] index_select strided 7 : Elapsed 0.012 ms (1.194 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.42% +0.50%] index_select strided 8 : Elapsed 0.012 ms (1.195 ms / 100) 1.194 -> 1.196 ( +0.17%) [ +0.00% +0.08% +0.00% / +0.17% +0.42% +0.50%] index_select strided 16 : Elapsed 0.012 ms (1.194 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.42% +0.34%] index_select random : Elapsed 0.012 ms (1.195 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.42% +0.42%] index_select random_sorted : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.17% +0.08% +0.00% / +0.00% +0.59% +0.42%] index_select perm : Elapsed 0.012 ms (1.195 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.34% +0.34%] index_select perm_sorted : Elapsed 0.012 ms (1.196 ms / 100) B = [16, 4, 5, 20] (stride (400, 20, 80, 1)) A = [16, 40, 5, 20] (stride (800, 20, 12800, 1)) dim = 1 1.272 -> 1.274 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.63% +0.55%] index_select const : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.277 ( +0.31%) [ +0.08% +0.00% +0.00% / +0.31% +0.47% +0.47%] index_select wrap : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.276 ( +0.24%) [ +0.08% +0.00% +0.00% / +0.24% +0.47% +0.55%] index_select linear : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.47% +0.63%] index_select reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.275 ( +0.24%) [ +0.00% +0.08% +0.00% / +0.24% +0.63% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.272 ms / 100) 1.272 -> 1.275 ( +0.24%) [ +0.08% +0.00% +0.08% / +0.24% +0.55% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.276 ( +0.31%) [ +0.00% +0.00% +0.00% / +0.31% +0.63% +0.63%] index_select spread : Elapsed 0.013 ms (1.272 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.276 ( +0.31%) [ +0.16% +0.00% +0.16% / +0.31% +0.55% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.47%] index_select random : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.47% +0.39%] index_select random_sorted : Elapsed 0.013 ms (1.277 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.39% +0.31%] index_select perm : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.63% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) B = [16, 4, 5, 20] (stride (400, 20, 80, 1)) A = [16, 40, 5, 20] (stride (20, 320, 12800, 1)) dim = 1 1.272 -> 1.272 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.71% +0.55%] index_select const : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select wrap : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.63%] index_select linear : Elapsed 0.013 ms (1.274 ms / 100) 1.271 -> 1.275 ( +0.31%) [ +0.16% +0.00% +0.00% / +0.31% +0.87% +0.71%] index_select reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.71% +0.71%] index_select skip64 : Elapsed 0.013 ms (1.272 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.79% +0.71%] index_select skip256 : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.86% +0.00% +0.00% / +0.08% +0.71% +0.71%] index_select spread : Elapsed 0.013 ms (1.283 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.71% +0.63%] index_select strided 5 : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.63% +0.63%] index_select strided 8 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.39%] index_select random : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.63%] index_select perm : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.39% +0.39%] index_select perm_sorted : Elapsed 0.013 ms (1.275 ms / 100) B = [16, 4, 5, 20] (stride (400, 5, 1, 20)) A = [16, 40, 5, 20] (stride (1, 1600, 16, 80)) dim = 1 1.346 -> 1.348 ( +0.15%) [ +0.15% +0.22% +0.00% / +0.15% +0.67% +0.67%] index_select const : Elapsed 0.013 ms (1.348 ms / 100) 1.346 -> 1.347 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.52% +0.67%] index_select wrap : Elapsed 0.013 ms (1.348 ms / 100) 1.354 -> 1.356 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.52% +0.52%] index_select linear : Elapsed 0.014 ms (1.357 ms / 100) 1.346 -> 1.348 ( +0.15%) [ +0.22% +0.07% +0.00% / +0.15% +0.67% +0.45%] index_select reverse : Elapsed 0.013 ms (1.349 ms / 100) 1.350 -> 1.350 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.52% +0.52%] index_select skip64 : Elapsed 0.014 ms (1.352 ms / 100) 1.346 -> 1.349 ( +0.22%) [ +0.15% +0.00% +0.15% / +0.22% +0.82% +0.82%] index_select skip256 : Elapsed 0.013 ms (1.348 ms / 100) 1.349 -> 1.354 ( +0.37%) [ +0.22% +0.15% +0.00% / +0.37% +0.59% +0.59%] index_select spread : Elapsed 0.014 ms (1.352 ms / 100) 1.355 -> 1.356 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +0.44% +0.30%] index_select strided 3 : Elapsed 0.014 ms (1.357 ms / 100) 1.350 -> 1.352 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.59% +0.52%] index_select strided 5 : Elapsed 0.014 ms (1.351 ms / 100) 1.351 -> 1.353 ( +0.15%) [ +0.15% +0.00% +0.07% / +0.15% +0.67% +0.44%] index_select strided 7 : Elapsed 0.014 ms (1.353 ms / 100) 1.351 -> 1.351 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.44% +0.44%] index_select strided 8 : Elapsed 0.014 ms (1.351 ms / 100) 1.351 -> 1.351 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.44% +0.59%] index_select strided 16 : Elapsed 0.014 ms (1.351 ms / 100) 1.351 -> 1.353 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.44% +0.44%] index_select random : Elapsed 0.014 ms (1.352 ms / 100) 1.351 -> 1.351 ( +0.00%) [ +0.00% +0.15% +0.07% / +0.00% +0.37% +0.30%] index_select random_sorted : Elapsed 0.014 ms (1.351 ms / 100) 1.352 -> 1.353 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.07% +0.44% +0.37%] index_select perm : Elapsed 0.014 ms (1.352 ms / 100) 1.350 -> 1.351 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.52% +0.52%] index_select perm_sorted : Elapsed 0.014 ms (1.352 ms / 100) B = [16, 4, 5, 20] (stride (400, 1, 4, 20)) A = [16, 40, 5, 20] (stride (1, 80, 16, 3200)) dim = 1 1.357 -> 1.361 ( +0.29%) [ +0.29% +0.00% +0.15% / +0.29% +0.81% +0.81%] index_select const : Elapsed 0.014 ms (1.361 ms / 100) 1.361 -> 1.365 ( +0.29%) [ +0.22% +0.22% +0.00% / +0.29% +0.59% +0.44%] index_select wrap : Elapsed 0.014 ms (1.364 ms / 100) 1.360 -> 1.365 ( +0.37%) [ +0.00% +0.37% +0.07% / +0.37% +0.74% +0.66%] index_select linear : Elapsed 0.014 ms (1.360 ms / 100) 1.362 -> 1.362 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.44% +0.37%] index_select reverse : Elapsed 0.014 ms (1.363 ms / 100) 1.362 -> 1.359 ( -0.22%) [ +0.07% +0.00% +0.00% / -0.22% +0.29% +0.44%] index_select skip64 : Elapsed 0.014 ms (1.363 ms / 100) 1.363 -> 1.363 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.59% +0.37%] index_select skip256 : Elapsed 0.014 ms (1.364 ms / 100) 1.371 -> 1.371 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.07% +0.51%] index_select spread : Elapsed 0.014 ms (1.372 ms / 100) 1.350 -> 1.351 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.37% +0.52%] index_select strided 3 : Elapsed 0.014 ms (1.352 ms / 100) 1.358 -> 1.360 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.52% +0.52%] index_select strided 5 : Elapsed 0.014 ms (1.360 ms / 100) 1.359 -> 1.363 ( +0.29%) [ +0.07% +0.22% +0.00% / +0.29% +0.66% +0.52%] index_select strided 7 : Elapsed 0.014 ms (1.360 ms / 100) 1.362 -> 1.368 ( +0.44%) [ +0.07% +0.22% +0.00% / +0.44% +0.59% +0.81%] index_select strided 8 : Elapsed 0.014 ms (1.363 ms / 100) 1.361 -> 1.363 ( +0.15%) [ +0.22% +0.07% +0.00% / +0.15% +0.59% +0.51%] index_select strided 16 : Elapsed 0.014 ms (1.364 ms / 100) 1.357 -> 1.359 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.59% +0.74%] index_select random : Elapsed 0.014 ms (1.359 ms / 100) 1.360 -> 1.361 ( +0.07%) [ +0.22% +0.00% +0.00% / +0.07% +0.59% +0.59%] index_select random_sorted : Elapsed 0.014 ms (1.363 ms / 100) 1.360 -> 1.362 ( +0.15%) [ +0.29% +0.07% +0.00% / +0.15% +0.59% +0.51%] index_select perm : Elapsed 0.014 ms (1.364 ms / 100) 1.361 -> 1.358 ( -0.22%) [ +0.00% +0.15% +0.00% / -0.22% +0.22% +0.51%] index_select perm_sorted : Elapsed 0.014 ms (1.361 ms / 100) B = [16, 4, 5, 20] (stride (80, 1, 1280, 4)) A = [16, 40, 5, 20] (stride (5, 80, 1, 3200)) dim = 1 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.15% +0.00% / +0.08% +0.53% +0.53%] index_select const : Elapsed 0.013 ms (1.322 ms / 100) 1.332 -> 1.332 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.45% +0.45%] index_select wrap : Elapsed 0.013 ms (1.332 ms / 100) 1.347 -> 1.348 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.52% +0.52%] index_select linear : Elapsed 0.013 ms (1.348 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.38% +0.30%] index_select reverse : Elapsed 0.013 ms (1.319 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.53% +0.45%] index_select skip64 : Elapsed 0.013 ms (1.322 ms / 100) 1.320 -> 1.322 ( +0.15%) [ +0.23% +0.08% +0.00% / +0.15% +0.61% +0.61%] index_select skip256 : Elapsed 0.013 ms (1.323 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.68% +0.61%] index_select spread : Elapsed 0.013 ms (1.321 ms / 100) 1.324 -> 1.325 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.53% +0.45%] index_select strided 3 : Elapsed 0.013 ms (1.325 ms / 100) 1.346 -> 1.348 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.59% +0.59%] index_select strided 5 : Elapsed 0.013 ms (1.347 ms / 100) 1.331 -> 1.332 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.45% +0.60%] index_select strided 7 : Elapsed 0.013 ms (1.333 ms / 100) 1.321 -> 1.323 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.45% +0.61%] index_select strided 8 : Elapsed 0.013 ms (1.322 ms / 100) 1.322 -> 1.322 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.38% +0.38%] index_select strided 16 : Elapsed 0.013 ms (1.323 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.53% +0.46%] index_select random : Elapsed 0.013 ms (1.319 ms / 100) 1.335 -> 1.336 ( +0.07%) [ +0.30% +0.00% +0.07% / +0.07% +0.67% +0.60%] index_select random_sorted : Elapsed 0.013 ms (1.339 ms / 100) 1.330 -> 1.330 ( +0.00%) [ +0.15% +0.00% +0.08% / +0.00% +0.30% +0.23%] index_select perm : Elapsed 0.013 ms (1.332 ms / 100) 1.329 -> 1.330 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.45% +0.45%] index_select perm_sorted : Elapsed 0.013 ms (1.330 ms / 100) B = [16, 4, 5, 20] (stride (4, 1, 1280, 64)) A = [16, 40, 5, 20] (stride (800, 1, 12800, 40)) dim = 1 1.282 -> 1.283 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.62% +0.62%] index_select const : Elapsed 0.013 ms (1.285 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select wrap : Elapsed 0.013 ms (1.284 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_select linear : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.279 ( -0.16%) [ +0.00% +0.08% +0.00% / -0.16% +0.47% +0.47%] index_select reverse : Elapsed 0.013 ms (1.281 ms / 100) 1.285 -> 1.286 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.31% +0.23%] index_select skip64 : Elapsed 0.013 ms (1.285 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.08% +0.00% +0.23% / +0.16% +0.55% +0.39%] index_select skip256 : Elapsed 0.013 ms (1.281 ms / 100) 1.283 -> 1.285 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.39% +0.31%] index_select spread : Elapsed 0.013 ms (1.285 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.16% +0.08%] index_select strided 3 : Elapsed 0.013 ms (1.286 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.00% +0.00% +0.16% / +0.08% +0.47% +0.39%] index_select strided 5 : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.285 ( +0.39%) [ +0.08% +0.00% +0.16% / +0.39% +0.70% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.288 -> 1.288 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.23% +0.31%] index_select strided 8 : Elapsed 0.013 ms (1.288 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.281 ms / 100) 1.284 -> 1.284 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.39% +0.31%] index_select random : Elapsed 0.013 ms (1.284 ms / 100) 1.283 -> 1.283 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_select random_sorted : Elapsed 0.013 ms (1.283 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.63%] index_select perm : Elapsed 0.013 ms (1.281 ms / 100) 1.283 -> 1.282 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.283 ms / 100) B = [16, 4, 5, 20] (stride (1, 80, 16, 320)) A = [16, 40, 5, 20] (stride (200, 5, 1, 3200)) dim = 1 1.345 -> 1.346 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +0.59% +0.45%] index_select const : Elapsed 0.013 ms (1.347 ms / 100) 1.354 -> 1.356 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.44% +0.37%] index_select wrap : Elapsed 0.014 ms (1.356 ms / 100) 1.353 -> 1.352 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.44% +0.30%] index_select linear : Elapsed 0.014 ms (1.353 ms / 100) 1.352 -> 1.353 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.59% +0.52%] index_select reverse : Elapsed 0.014 ms (1.353 ms / 100) 1.344 -> 1.345 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.45% +0.60%] index_select skip64 : Elapsed 0.013 ms (1.346 ms / 100) 1.345 -> 1.346 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.59% +0.74%] index_select skip256 : Elapsed 0.013 ms (1.346 ms / 100) 1.352 -> 1.352 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.52% +0.44%] index_select spread : Elapsed 0.014 ms (1.353 ms / 100) 1.357 -> 1.357 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.59% +0.52%] index_select strided 3 : Elapsed 0.014 ms (1.359 ms / 100) 1.357 -> 1.357 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.44% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.359 ms / 100) 1.351 -> 1.355 ( +0.30%) [ +0.22% +0.15% +0.00% / +0.30% +0.52% +0.44%] index_select strided 7 : Elapsed 0.014 ms (1.354 ms / 100) 1.349 -> 1.353 ( +0.30%) [ +0.22% +0.07% +0.00% / +0.30% +0.67% +0.59%] index_select strided 8 : Elapsed 0.014 ms (1.352 ms / 100) 1.351 -> 1.354 ( +0.22%) [ +0.22% +0.37% +0.00% / +0.22% +0.67% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.354 ms / 100) 1.356 -> 1.358 ( +0.15%) [ +0.22% +0.44% +0.00% / +0.15% +0.88% +0.59%] index_select random : Elapsed 0.014 ms (1.359 ms / 100) 1.357 -> 1.354 ( -0.22%) [ +0.07% +0.07% +0.00% / -0.22% +0.07% +0.52%] index_select random_sorted : Elapsed 0.014 ms (1.358 ms / 100) 1.344 -> 1.344 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.60% +0.52%] index_select perm : Elapsed 0.013 ms (1.346 ms / 100) 1.345 -> 1.347 ( +0.15%) [ +0.15% +0.00% +0.07% / +0.15% +0.59% +0.45%] index_select perm_sorted : Elapsed 0.013 ms (1.347 ms / 100) B = [16, 4, 5, 20] (stride (4, 1, 64, 320)) A = [16, 40, 5, 20] (stride (1, 1600, 320, 16)) dim = 1 1.201 -> 1.202 ( +0.08%) [ +0.17% +0.00% +0.25% / +0.08% +0.58% +0.58%] index_select const : Elapsed 0.012 ms (1.203 ms / 100) 1.203 -> 1.202 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.33% +0.25%] index_select wrap : Elapsed 0.012 ms (1.203 ms / 100) 1.202 -> 1.203 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.42% +0.42%] index_select linear : Elapsed 0.012 ms (1.202 ms / 100) 1.205 -> 1.206 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_select reverse : Elapsed 0.012 ms (1.206 ms / 100) 1.203 -> 1.203 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.42%] index_select skip64 : Elapsed 0.012 ms (1.203 ms / 100) 1.209 -> 1.210 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.33% +0.33%] index_select skip256 : Elapsed 0.012 ms (1.209 ms / 100) 1.203 -> 1.203 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.50%] index_select spread : Elapsed 0.012 ms (1.203 ms / 100) 1.203 -> 1.203 ( +0.00%) [ +0.08% +0.17% +0.00% / +0.00% +0.25% +0.42%] index_select strided 3 : Elapsed 0.012 ms (1.204 ms / 100) 1.203 -> 1.203 ( +0.00%) [ +0.25% +0.00% +0.00% / +0.00% +0.33% +0.25%] index_select strided 5 : Elapsed 0.012 ms (1.206 ms / 100) 1.202 -> 1.203 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.42% +0.42%] index_select strided 7 : Elapsed 0.012 ms (1.203 ms / 100) 1.202 -> 1.204 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.42% +0.42%] index_select strided 8 : Elapsed 0.012 ms (1.204 ms / 100) 1.202 -> 1.203 ( +0.08%) [ +0.25% +0.08% +0.00% / +0.08% +0.42% +0.42%] index_select strided 16 : Elapsed 0.012 ms (1.205 ms / 100) 1.202 -> 1.204 ( +0.17%) [ +0.33% +0.08% +0.00% / +0.17% +0.33% +0.33%] index_select random : Elapsed 0.012 ms (1.206 ms / 100) 1.202 -> 1.203 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.50% +0.42%] index_select random_sorted : Elapsed 0.012 ms (1.203 ms / 100) 1.204 -> 1.205 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.50%] index_select perm : Elapsed 0.012 ms (1.205 ms / 100) 1.201 -> 1.204 ( +0.25%) [ +0.00% +0.08% +0.00% / +0.25% +0.67% +0.58%] index_select perm_sorted : Elapsed 0.012 ms (1.201 ms / 100) B = [16, 4, 5, 20] (stride (1, 16, 64, 320)) dim = 1 fill_cnt = 40 2.738 -> 2.732 ( -0.22%) [ +0.15% +0.04% +0.00% / -0.22% +0.22% +0.33%] index_fill_ const : Elapsed 0.027 ms (2.742 ms / 100) 2.737 -> 2.739 ( +0.07%) [ +0.22% +0.26% +0.00% / +0.07% +0.40% +0.47%] index_fill_ linear : Elapsed 0.027 ms (2.743 ms / 100) 2.739 -> 2.735 ( -0.15%) [ +0.00% +0.07% +0.15% / -0.15% +0.29% +0.29%] index_fill_ reverse : Elapsed 0.027 ms (2.739 ms / 100) 2.739 -> 2.734 ( -0.18%) [ +0.00% +0.11% +0.11% / -0.18% +0.15% +0.22%] index_fill_ skip64 : Elapsed 0.027 ms (2.739 ms / 100) 2.742 -> 2.741 ( -0.04%) [ +0.11% +0.07% +0.00% / -0.04% +0.04% +0.18%] index_fill_ skip256 : Elapsed 0.027 ms (2.745 ms / 100) 2.740 -> 2.739 ( -0.04%) [ +0.18% +0.00% +0.04% / -0.04% +0.18% -0.04%] index_fill_ spread : Elapsed 0.027 ms (2.745 ms / 100) 2.743 -> 2.740 ( -0.11%) [ +0.00% +0.00% +0.07% / -0.11% +0.15% +0.11%] index_fill_ strided 3 : Elapsed 0.027 ms (2.743 ms / 100) 2.748 -> 2.742 ( -0.22%) [ +0.11% +0.00% +0.07% / -0.15% -0.11% -0.22%] index_fill_ random : Elapsed 0.028 ms (2.751 ms / 100) 2.741 -> 2.740 ( -0.04%) [ +0.00% +0.26% +0.18% / -0.04% +0.18% -0.04%] index_fill_ random_sorted : Elapsed 0.027 ms (2.741 ms / 100) out_shape = [16, 40, 4, 20] in_shape = [16, 40, 5, 20] idx_dim = 2 B = [16, 40, 4, 20] (stride (3200, 80, 1, 4)) A = [16, 40, 5, 20] (stride (4000, 20, 800, 1)) dim = 2 5.429 -> 5.426 ( -0.06%) [ +0.11% +0.06% +0.00% / -0.06% +0.53% +0.59%] index_select const : Elapsed 0.054 ms (5.435 ms / 100) 5.524 -> 5.526 ( +0.04%) [ +0.11% +0.11% +0.00% / +0.04% +0.04% +0.05%] index_select wrap : Elapsed 0.055 ms (5.530 ms / 100) 5.515 -> 5.508 ( -0.13%) [ +0.02% +0.00% +0.07% / +0.04% -0.13% -0.11%] index_select linear : Elapsed 0.055 ms (5.516 ms / 100) 5.516 -> 5.497 ( -0.34%) [ +0.13% +0.00% +0.07% / +0.13% -0.25% -0.34%] index_select reverse : Elapsed 0.055 ms (5.523 ms / 100) 5.401 -> 5.404 ( +0.06%) [ +0.00% +0.02% +0.02% / +0.06% +0.50% +0.54%] index_select skip64 : Elapsed 0.054 ms (5.401 ms / 100) 5.410 -> 5.409 ( -0.02%) [ +0.11% +0.02% +0.00% / -0.02% +0.63% +0.68%] index_select skip256 : Elapsed 0.054 ms (5.416 ms / 100) 5.527 -> 5.521 ( -0.11%) [ +0.00% +0.13% +0.02% / +0.02% +0.00% -0.11%] index_select spread : Elapsed 0.055 ms (5.527 ms / 100) 5.488 -> 5.495 ( +0.13%) [ +0.16% +0.00% +0.05% / +0.13% +0.18% +0.15%] index_select strided 3 : Elapsed 0.055 ms (5.497 ms / 100) 5.469 -> 5.476 ( +0.13%) [ +0.00% +0.04% +0.07% / +0.13% +0.33% +0.35%] index_select random : Elapsed 0.055 ms (5.469 ms / 100) 5.467 -> 5.459 ( -0.15%) [ +0.05% +0.00% +0.07% / -0.15% +0.37% +0.29%] index_select random_sorted : Elapsed 0.055 ms (5.470 ms / 100) 5.513 -> 5.514 ( +0.02%) [ +0.07% +0.00% +0.20% / +0.02% +0.27% +0.29%] index_select perm : Elapsed 0.055 ms (5.517 ms / 100) 5.495 -> 5.495 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.33% +0.40%] index_select perm_sorted : Elapsed 0.055 ms (5.495 ms / 100) B = [16, 40, 4, 20] (stride (3200, 1, 800, 40)) A = [16, 40, 5, 20] (stride (1, 16, 640, 3200)) dim = 2 5.710 -> 5.716 ( +0.11%) [ +0.14% +0.00% +0.19% / +0.11% +0.23% +0.32%] index_select const : Elapsed 0.057 ms (5.718 ms / 100) 5.774 -> 5.774 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.14% +0.07% +0.00%] index_select wrap : Elapsed 0.058 ms (5.777 ms / 100) 5.771 -> 5.773 ( +0.03%) [ +0.00% +0.00% +0.17% / +0.03% +0.12% +0.10%] index_select linear : Elapsed 0.058 ms (5.771 ms / 100) 5.771 -> 5.770 ( -0.02%) [ +0.05% +0.00% +0.24% / +0.28% -0.02% +0.07%] index_select reverse : Elapsed 0.058 ms (5.774 ms / 100) 5.709 -> 5.709 ( +0.00%) [ +0.00% +0.02% +0.18% / +0.00% +0.28% +0.26%] index_select skip64 : Elapsed 0.057 ms (5.709 ms / 100) 5.706 -> 5.711 ( +0.09%) [ +0.00% +0.02% +0.12% / +0.09% +0.39% +0.40%] index_select skip256 : Elapsed 0.057 ms (5.706 ms / 100) 5.773 -> 5.774 ( +0.02%) [ +0.00% +0.23% +0.07% / +0.12% +0.12% +0.02%] index_select spread : Elapsed 0.058 ms (5.773 ms / 100) 5.759 -> 5.768 ( +0.16%) [ +0.21% +0.10% +0.00% / +0.16% +0.31% +0.35%] index_select strided 3 : Elapsed 0.058 ms (5.771 ms / 100) 5.767 -> 5.765 ( -0.03%) [ +0.03% +0.05% +0.00% / -0.03% +0.14% +0.03%] index_select random : Elapsed 0.058 ms (5.769 ms / 100) 5.765 -> 5.771 ( +0.10%) [ +0.12% +0.00% +0.12% / +0.10% +0.19% +0.24%] index_select random_sorted : Elapsed 0.058 ms (5.772 ms / 100) 5.762 -> 5.772 ( +0.17%) [ +0.00% +0.02% +0.12% / +0.21% +0.23% +0.17%] index_select perm : Elapsed 0.058 ms (5.762 ms / 100) 5.759 -> 5.766 ( +0.12%) [ +0.16% +0.00% +0.10% / +0.12% +0.24% +0.38%] index_select perm_sorted : Elapsed 0.058 ms (5.768 ms / 100) B = [16, 40, 4, 20] (stride (3200, 4, 1, 160)) A = [16, 40, 5, 20] (stride (100, 1600, 1, 5)) dim = 2 6.038 -> 6.045 ( +0.12%) [ +0.10% +0.00% +0.18% / +0.12% +0.25% +0.20%] index_select const : Elapsed 0.060 ms (6.044 ms / 100) 6.041 -> 6.048 ( +0.12%) [ +0.02% +0.00% +0.12% / +0.12% +0.22% +0.17%] index_select wrap : Elapsed 0.060 ms (6.042 ms / 100) 6.040 -> 6.046 ( +0.10%) [ +0.08% +0.00% +0.13% / +0.10% +0.12% +0.17%] index_select linear : Elapsed 0.060 ms (6.045 ms / 100) 6.040 -> 6.048 ( +0.13%) [ +0.02% +0.00% +0.10% / +0.15% +0.13% +0.13%] index_select reverse : Elapsed 0.060 ms (6.041 ms / 100) 6.042 -> 6.043 ( +0.02%) [ +0.00% +0.02% +0.10% / +0.02% +0.13% +0.20%] index_select skip64 : Elapsed 0.060 ms (6.042 ms / 100) 6.037 -> 6.043 ( +0.10%) [ +0.00% +0.10% +0.08% / +0.10% +0.28% +0.20%] index_select skip256 : Elapsed 0.060 ms (6.037 ms / 100) 6.042 -> 6.048 ( +0.10%) [ +0.00% +0.03% +0.12% / +0.10% +0.12% +0.13%] index_select spread : Elapsed 0.060 ms (6.042 ms / 100) 6.037 -> 6.046 ( +0.15%) [ +0.05% +0.00% +0.07% / +0.15% +0.23% +0.23%] index_select strided 3 : Elapsed 0.060 ms (6.040 ms / 100) 6.042 -> 6.048 ( +0.10%) [ +0.02% +0.00% +0.02% / +0.10% +0.18% +0.17%] index_select random : Elapsed 0.060 ms (6.043 ms / 100) 6.039 -> 6.046 ( +0.12%) [ +0.07% +0.07% +0.00% / +0.12% +0.25% +0.22%] index_select random_sorted : Elapsed 0.060 ms (6.043 ms / 100) 6.042 -> 6.044 ( +0.03%) [ +0.00% +0.03% +0.05% / +0.03% +0.18% +0.10%] index_select perm : Elapsed 0.060 ms (6.042 ms / 100) 6.038 -> 6.048 ( +0.17%) [ +0.00% +0.03% +0.20% / +0.20% +0.26% +0.17%] index_select perm_sorted : Elapsed 0.060 ms (6.038 ms / 100) B = [16, 40, 4, 20] (stride (80, 1280, 1, 4)) A = [16, 40, 5, 20] (stride (800, 20, 12800, 1)) dim = 2 5.466 -> 5.460 ( -0.11%) [ +0.00% +0.16% +0.13% / +0.20% -0.09% -0.11%] index_select const : Elapsed 0.055 ms (5.466 ms / 100) 5.522 -> 5.532 ( +0.18%) [ +0.16% +0.00% +0.18% / +0.18% +0.20% +0.18%] index_select wrap : Elapsed 0.055 ms (5.531 ms / 100) 5.523 -> 5.525 ( +0.04%) [ +0.07% +0.00% +0.04% / +0.13% +0.04% +0.09%] index_select linear : Elapsed 0.055 ms (5.527 ms / 100) 5.522 -> 5.527 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.09% +0.14%] index_select reverse : Elapsed 0.055 ms (5.527 ms / 100) 5.456 -> 5.451 ( -0.09%) [ +0.00% +0.09% +0.13% / +0.15% -0.05% -0.09%] index_select skip64 : Elapsed 0.055 ms (5.456 ms / 100) 5.467 -> 5.463 ( -0.07%) [ +0.02% +0.00% +0.13% / +0.13% -0.02% -0.07%] index_select skip256 : Elapsed 0.055 ms (5.468 ms / 100) 5.521 -> 5.527 ( +0.11%) [ +0.00% +0.04% +0.22% / +0.18% +0.11% +0.14%] index_select spread : Elapsed 0.055 ms (5.521 ms / 100) 5.533 -> 5.528 ( -0.09%) [ +0.02% +0.00% +0.05% / +0.11% -0.09% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.534 ms / 100) 5.494 -> 5.501 ( +0.13%) [ +0.00% +0.05% +0.00% / +0.13% +0.16% +0.15%] index_select random : Elapsed 0.055 ms (5.494 ms / 100) 5.489 -> 5.494 ( +0.09%) [ +0.00% +0.07% +0.04% / +0.09% +0.42% +0.44%] index_select random_sorted : Elapsed 0.055 ms (5.489 ms / 100) 5.527 -> 5.528 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.11% +0.02% +0.16%] index_select perm : Elapsed 0.055 ms (5.528 ms / 100) 5.519 -> 5.527 ( +0.14%) [ +0.00% +0.02% +0.16% / +0.16% +0.29% +0.14%] index_select perm_sorted : Elapsed 0.055 ms (5.519 ms / 100) B = [16, 40, 4, 20] (stride (1, 320, 12800, 16)) A = [16, 40, 5, 20] (stride (4000, 100, 1, 5)) dim = 2 5.664 -> 5.644 ( -0.35%) [ +0.00% +0.14% +0.11% / +0.21% -0.30% -0.35%] index_select const : Elapsed 0.057 ms (5.664 ms / 100) 5.666 -> 5.639 ( -0.48%) [ +0.11% +0.00% +0.04% / +0.18% -0.48% -0.41%] index_select wrap : Elapsed 0.057 ms (5.672 ms / 100) 5.661 -> 5.631 ( -0.53%) [ +0.09% +0.00% +0.16% / +0.14% -0.53% -0.41%] index_select linear : Elapsed 0.057 ms (5.666 ms / 100) 5.666 -> 5.636 ( -0.53%) [ +0.07% +0.11% +0.00% / +0.04% -0.49% -0.53%] index_select reverse : Elapsed 0.057 ms (5.670 ms / 100) 5.665 -> 5.636 ( -0.51%) [ +0.11% +0.00% +0.16% / +0.09% -0.51% -0.49%] index_select skip64 : Elapsed 0.057 ms (5.671 ms / 100) 5.662 -> 5.638 ( -0.42%) [ +0.04% +0.00% +0.16% / +0.05% -0.42% -0.42%] index_select skip256 : Elapsed 0.057 ms (5.664 ms / 100) 5.661 -> 5.640 ( -0.37%) [ +0.00% +0.28% +0.23% / +0.18% -0.37% -0.35%] index_select spread : Elapsed 0.057 ms (5.661 ms / 100) 5.664 -> 5.635 ( -0.51%) [ +0.00% +0.04% +0.18% / +0.04% -0.51% -0.32%] index_select strided 3 : Elapsed 0.057 ms (5.664 ms / 100) 5.657 -> 5.638 ( -0.34%) [ +0.11% +0.00% +0.21% / +0.16% -0.25% -0.34%] index_select random : Elapsed 0.057 ms (5.663 ms / 100) 5.662 -> 5.637 ( -0.44%) [ +0.00% +0.07% +0.07% / +0.12% -0.37% -0.44%] index_select random_sorted : Elapsed 0.057 ms (5.662 ms / 100) 5.669 -> 5.642 ( -0.48%) [ +0.11% +0.05% +0.00% / +0.12% -0.46% -0.48%] index_select perm : Elapsed 0.057 ms (5.675 ms / 100) 5.660 -> 5.636 ( -0.42%) [ +0.07% +0.00% +0.04% / +0.11% -0.35% -0.42%] index_select perm_sorted : Elapsed 0.057 ms (5.664 ms / 100) B = [16, 40, 4, 20] (stride (40, 1, 12800, 640)) A = [16, 40, 5, 20] (stride (200, 1, 40, 3200)) dim = 2 5.563 -> 5.553 ( -0.18%) [ +0.00% +0.04% +0.07% / +0.09% -0.05% -0.18%] index_select const : Elapsed 0.056 ms (5.563 ms / 100) 5.612 -> 5.607 ( -0.09%) [ +0.16% +0.00% +0.25% / +0.23% -0.02% -0.09%] index_select wrap : Elapsed 0.056 ms (5.621 ms / 100) 5.610 -> 5.601 ( -0.16%) [ +0.05% +0.00% +0.32% / +0.32% -0.16% -0.04%] index_select linear : Elapsed 0.056 ms (5.613 ms / 100) 5.611 -> 5.609 ( -0.04%) [ +0.05% +0.00% +0.16% / +0.23% -0.04% -0.04%] index_select reverse : Elapsed 0.056 ms (5.614 ms / 100) 5.554 -> 5.547 ( -0.13%) [ +0.02% +0.00% +0.31% / +0.07% -0.13% +0.05%] index_select skip64 : Elapsed 0.056 ms (5.555 ms / 100) 5.556 -> 5.546 ( -0.18%) [ +0.13% +0.00% +0.14% / +0.18% -0.18% -0.02%] index_select skip256 : Elapsed 0.056 ms (5.563 ms / 100) 5.616 -> 5.605 ( -0.20%) [ +0.00% +0.05% +0.09% / +0.21% -0.20% -0.07%] index_select spread : Elapsed 0.056 ms (5.616 ms / 100) 5.635 -> 5.608 ( -0.48%) [ +0.02% +0.02% +0.00% / +0.05% -0.48% -0.48%] index_select strided 3 : Elapsed 0.056 ms (5.636 ms / 100) 5.596 -> 5.580 ( -0.29%) [ +0.00% +0.05% +0.25% / +0.20% -0.27% -0.29%] index_select random : Elapsed 0.056 ms (5.596 ms / 100) 5.589 -> 5.569 ( -0.36%) [ +0.00% +0.11% +0.13% / +0.23% -0.36% -0.20%] index_select random_sorted : Elapsed 0.056 ms (5.589 ms / 100) 5.636 -> 5.610 ( -0.46%) [ +0.00% +0.04% +0.04% / -0.05% -0.41% -0.46%] index_select perm : Elapsed 0.056 ms (5.636 ms / 100) 5.617 -> 5.614 ( -0.05%) [ +0.00% +0.23% +0.23% / +0.02% +0.00% -0.05%] index_select perm_sorted : Elapsed 0.056 ms (5.617 ms / 100) B = [16, 40, 4, 20] (stride (4, 64, 1, 2560)) A = [16, 40, 5, 20] (stride (1, 1600, 320, 16)) dim = 2 5.785 -> 5.785 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.28% +0.17%] index_select const : Elapsed 0.058 ms (5.785 ms / 100) 5.832 -> 5.822 ( -0.17%) [ +0.12% +0.15% +0.00% / +0.10% -0.17% -0.03%] index_select wrap : Elapsed 0.058 ms (5.839 ms / 100) 5.837 -> 5.825 ( -0.21%) [ +0.00% +0.00% +0.07% / +0.14% -0.12% -0.21%] index_select linear : Elapsed 0.058 ms (5.837 ms / 100) 5.835 -> 5.825 ( -0.17%) [ +0.00% +0.02% +0.14% / +0.15% -0.17% -0.15%] index_select reverse : Elapsed 0.058 ms (5.835 ms / 100) 5.784 -> 5.784 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.29% +0.26%] index_select skip64 : Elapsed 0.058 ms (5.785 ms / 100) 5.784 -> 5.791 ( +0.12%) [ +0.00% +0.00% +0.02% / +0.12% +0.28% +0.28%] index_select skip256 : Elapsed 0.058 ms (5.784 ms / 100) 5.836 -> 5.819 ( -0.29%) [ +0.07% +0.02% +0.00% / +0.12% -0.24% -0.29%] index_select spread : Elapsed 0.058 ms (5.840 ms / 100) 5.838 -> 5.825 ( -0.22%) [ +0.00% +0.03% +0.15% / +0.05% -0.22% -0.22%] index_select strided 3 : Elapsed 0.058 ms (5.838 ms / 100) 5.834 -> 5.822 ( -0.21%) [ +0.07% +0.00% +0.15% / -0.05% -0.21% -0.17%] index_select random : Elapsed 0.058 ms (5.838 ms / 100) 5.818 -> 5.810 ( -0.14%) [ +0.09% +0.00% +0.12% / +0.07% -0.14% -0.03%] index_select random_sorted : Elapsed 0.058 ms (5.823 ms / 100) 5.838 -> 5.819 ( -0.33%) [ +0.00% +0.03% +0.14% / +0.10% -0.31% -0.33%] index_select perm : Elapsed 0.058 ms (5.838 ms / 100) 5.840 -> 5.827 ( -0.22%) [ +0.03% +0.00% +0.05% / -0.02% -0.22% -0.12%] index_select perm_sorted : Elapsed 0.058 ms (5.842 ms / 100) out_shape = [16, 40, 5, 4] in_shape = [16, 40, 5, 20] idx_dim = 3 B = [16, 40, 5, 4] (stride (800, 20, 4, 1)) A = [16, 40, 5, 20] (stride (1, 16, 640, 3200)) dim = 3 1.859 -> 1.863 ( +0.22%) [ +0.16% +0.05% +0.00% / +0.22% +0.48% +0.48%] index_select const : Elapsed 0.019 ms (1.862 ms / 100) 1.852 -> 1.858 ( +0.32%) [ +0.49% +0.22% +0.00% / +0.32% +0.86% +0.86%] index_select wrap : Elapsed 0.019 ms (1.861 ms / 100) 1.852 -> 1.852 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.70% +0.49%] index_select linear : Elapsed 0.019 ms (1.853 ms / 100) 1.861 -> 1.861 ( +0.00%) [ +0.00% +0.27% +0.27% / +0.00% +0.97% +0.11%] index_select reverse : Elapsed 0.019 ms (1.861 ms / 100) 1.851 -> 1.852 ( +0.05%) [ +0.27% +0.49% +0.00% / +0.05% +0.86% +0.49%] index_select skip64 : Elapsed 0.019 ms (1.856 ms / 100) 1.860 -> 1.868 ( +0.43%) [ +0.16% +0.16% +0.00% / +0.43% +0.48% +0.59%] index_select skip256 : Elapsed 0.019 ms (1.863 ms / 100) 1.846 -> 1.853 ( +0.38%) [ +0.11% +0.00% +0.05% / +0.38% +0.49% +0.70%] index_select spread : Elapsed 0.018 ms (1.848 ms / 100) 1.853 -> 1.854 ( +0.05%) [ +0.43% +0.00% +0.38% / +0.05% +0.65% +0.49%] index_select strided 3 : Elapsed 0.019 ms (1.861 ms / 100) 1.845 -> 1.842 ( -0.16%) [ +0.00% +0.11% +0.49% / -0.16% +0.38% +0.70%] index_select strided 5 : Elapsed 0.018 ms (1.845 ms / 100) 1.849 -> 1.850 ( +0.05%) [ +0.38% +0.00% +0.27% / +0.05% +0.76% +0.43%] index_select strided 7 : Elapsed 0.019 ms (1.856 ms / 100) 1.850 -> 1.856 ( +0.32%) [ +0.43% +0.43% +0.00% / +0.32% +0.49% +0.92%] index_select strided 8 : Elapsed 0.019 ms (1.858 ms / 100) 1.852 -> 1.852 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.59% +0.65%] index_select strided 16 : Elapsed 0.019 ms (1.852 ms / 100) 1.855 -> 1.855 ( +0.00%) [ +0.22% +0.00% +0.16% / +0.00% +0.86% +0.81%] index_select random : Elapsed 0.019 ms (1.859 ms / 100) 1.853 -> 1.861 ( +0.43%) [ +0.22% +0.00% +0.32% / +0.43% +0.97% +1.08%] index_select random_sorted : Elapsed 0.019 ms (1.857 ms / 100) 1.841 -> 1.847 ( +0.33%) [ +0.60% +0.00% +0.16% / +0.33% +0.49% +0.49%] index_select perm : Elapsed 0.019 ms (1.852 ms / 100) 1.861 -> 1.860 ( -0.05%) [ +0.16% +0.00% +0.11% / -0.05% +0.05% +0.16%] index_select perm_sorted : Elapsed 0.019 ms (1.864 ms / 100) B = [16, 40, 5, 4] (stride (800, 1, 160, 40)) A = [16, 40, 5, 20] (stride (1, 1600, 16, 80)) dim = 3 2.113 -> 2.118 ( +0.24%) [ +0.00% +0.38% +0.19% / +0.24% +0.28% +0.62%] index_select const : Elapsed 0.021 ms (2.113 ms / 100) 2.105 -> 2.108 ( +0.14%) [ +0.00% +0.10% +0.14% / +0.14% +0.57% +0.24%] index_select wrap : Elapsed 0.021 ms (2.105 ms / 100) 2.105 -> 2.108 ( +0.14%) [ +0.29% +0.10% +0.00% / +0.14% +0.48% +0.57%] index_select linear : Elapsed 0.021 ms (2.111 ms / 100) 2.104 -> 2.110 ( +0.29%) [ +0.24% +0.00% +0.33% / +0.29% +0.38% +0.71%] index_select reverse : Elapsed 0.021 ms (2.109 ms / 100) 2.115 -> 2.121 ( +0.28%) [ +0.24% +0.28% +0.00% / +0.28% +0.43% +0.33%] index_select skip64 : Elapsed 0.021 ms (2.120 ms / 100) 2.114 -> 2.119 ( +0.24%) [ +0.05% +0.00% +0.19% / +0.24% +0.38% +0.33%] index_select skip256 : Elapsed 0.021 ms (2.115 ms / 100) 2.109 -> 2.116 ( +0.33%) [ +0.24% +0.00% +0.14% / +0.33% +0.43% +0.33%] index_select spread : Elapsed 0.021 ms (2.114 ms / 100) 2.112 -> 2.115 ( +0.14%) [ +0.14% +0.24% +0.00% / +0.14% +0.47% +0.47%] index_select strided 3 : Elapsed 0.021 ms (2.115 ms / 100) 2.121 -> 2.119 ( -0.09%) [ +0.00% +0.09% +0.14% / -0.09% +0.42% +0.33%] index_select strided 5 : Elapsed 0.021 ms (2.121 ms / 100) 2.111 -> 2.115 ( +0.19%) [ +0.19% +0.00% +0.24% / +0.19% +0.52% +0.43%] index_select strided 7 : Elapsed 0.021 ms (2.115 ms / 100) 2.114 -> 2.119 ( +0.24%) [ +0.05% +0.00% +0.00% / +0.28% +0.57% +0.24%] index_select strided 8 : Elapsed 0.021 ms (2.115 ms / 100) 2.106 -> 2.112 ( +0.28%) [ +0.38% +0.00% +0.19% / +0.28% +0.43% +0.57%] index_select strided 16 : Elapsed 0.021 ms (2.114 ms / 100) 2.112 -> 2.115 ( +0.14%) [ +0.33% +0.00% +0.38% / +0.14% +0.47% +0.33%] index_select random : Elapsed 0.021 ms (2.119 ms / 100) 2.116 -> 2.117 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.05% +0.33% +0.33%] index_select random_sorted : Elapsed 0.021 ms (2.116 ms / 100) 2.109 -> 2.110 ( +0.05%) [ +0.05% +0.28% +0.00% / +0.05% +0.52% +0.57%] index_select perm : Elapsed 0.021 ms (2.110 ms / 100) 2.106 -> 2.110 ( +0.19%) [ +0.00% +0.05% +0.00% / +0.19% +0.33% +0.38%] index_select perm_sorted : Elapsed 0.021 ms (2.106 ms / 100) B = [16, 40, 5, 4] (stride (800, 5, 1, 200)) A = [16, 40, 5, 20] (stride (4000, 100, 20, 1)) dim = 3 1.816 -> 1.815 ( -0.06%) [ +0.11% +0.06% +0.00% / +0.28% +0.00% -0.06%] index_select const : Elapsed 0.018 ms (1.818 ms / 100) 1.797 -> 1.798 ( +0.06%) [ +0.22% +0.00% +0.22% / +0.17% +0.06% +0.06%] index_select wrap : Elapsed 0.018 ms (1.801 ms / 100) 1.816 -> 1.814 ( -0.11%) [ +0.00% +0.06% +0.06% / -0.06% +0.00% -0.11%] index_select linear : Elapsed 0.018 ms (1.816 ms / 100) 1.818 -> 1.813 ( -0.28%) [ +0.00% +0.11% +0.22% / -0.06% -0.28% -0.17%] index_select reverse : Elapsed 0.018 ms (1.818 ms / 100) 1.822 -> 1.819 ( -0.16%) [ +0.00% +0.05% +0.27% / +0.05% +0.16% -0.16%] index_select skip64 : Elapsed 0.018 ms (1.822 ms / 100) 1.821 -> 1.817 ( -0.22%) [ +0.00% +0.16% +0.00% / +0.05% -0.22% +0.16%] index_select skip256 : Elapsed 0.018 ms (1.821 ms / 100) 1.877 -> 1.876 ( -0.05%) [ +0.11% +0.27% +0.00% / +0.37% +0.00% -0.05%] index_select spread : Elapsed 0.019 ms (1.879 ms / 100) 1.851 -> 1.847 ( -0.22%) [ +0.11% +0.00% +0.05% / +0.05% -0.22% -0.22%] index_select strided 3 : Elapsed 0.019 ms (1.853 ms / 100) 1.865 -> 1.861 ( -0.21%) [ +0.16% +0.00% +0.27% / -0.21% +0.11% +0.11%] index_select strided 5 : Elapsed 0.019 ms (1.868 ms / 100) 1.860 -> 1.861 ( +0.05%) [ +0.00% +0.11% +0.11% / +0.27% +0.05% +0.11%] index_select strided 7 : Elapsed 0.019 ms (1.860 ms / 100) 1.857 -> 1.858 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.22% +0.22% +0.05%] index_select strided 8 : Elapsed 0.019 ms (1.858 ms / 100) 1.864 -> 1.856 ( -0.43%) [ +0.21% +0.00% +0.00% / +0.16% -0.43% -0.21%] index_select strided 16 : Elapsed 0.019 ms (1.868 ms / 100) 1.855 -> 1.854 ( -0.05%) [ +0.22% +0.05% +0.00% / +0.38% +0.05% -0.05%] index_select random : Elapsed 0.019 ms (1.859 ms / 100) 1.837 -> 1.840 ( +0.16%) [ +0.22% +0.00% +0.11% / +0.16% +0.60% +0.16%] index_select random_sorted : Elapsed 0.018 ms (1.841 ms / 100) 1.855 -> 1.853 ( -0.11%) [ +0.16% +0.00% +0.11% / -0.11% +0.11% -0.05%] index_select perm : Elapsed 0.019 ms (1.858 ms / 100) 1.850 -> 1.853 ( +0.16%) [ +0.00% +0.38% +0.05% / +0.32% +0.16% +0.16%] index_select perm_sorted : Elapsed 0.019 ms (1.850 ms / 100) B = [16, 40, 5, 4] (stride (1, 320, 64, 16)) A = [16, 40, 5, 20] (stride (4000, 1, 40, 200)) dim = 3 1.940 -> 1.944 ( +0.21%) [ +0.10% +0.00% +0.00% / +0.21% +0.52% +0.36%] index_select const : Elapsed 0.019 ms (1.942 ms / 100) 1.959 -> 1.961 ( +0.10%) [ +0.00% +0.00% +0.10% / +0.26% +0.46% +0.10%] index_select wrap : Elapsed 0.020 ms (1.959 ms / 100) 1.960 -> 1.966 ( +0.31%) [ +0.26% +0.00% +0.15% / +0.31% +0.31% +0.31%] index_select linear : Elapsed 0.020 ms (1.965 ms / 100) 1.966 -> 1.966 ( +0.00%) [ +0.10% +0.15% +0.00% / +0.00% +0.36% +0.41%] index_select reverse : Elapsed 0.020 ms (1.968 ms / 100) 1.940 -> 1.944 ( +0.21%) [ +0.26% +0.21% +0.00% / +0.21% +0.21% +0.26%] index_select skip64 : Elapsed 0.019 ms (1.945 ms / 100) 1.938 -> 1.942 ( +0.21%) [ +0.00% +0.21% +0.10% / +0.26% +0.21% +0.21%] index_select skip256 : Elapsed 0.019 ms (1.938 ms / 100) 1.966 -> 1.960 ( -0.31%) [ +0.10% +0.00% +0.05% / -0.31% +0.25% +0.25%] index_select spread : Elapsed 0.020 ms (1.968 ms / 100) 1.974 -> 1.971 ( -0.15%) [ +0.05% +0.00% +0.46% / -0.15% -0.05% +0.15%] index_select strided 3 : Elapsed 0.020 ms (1.975 ms / 100) 1.968 -> 1.966 ( -0.10%) [ +0.20% +0.05% +0.00% / -0.10% +0.15% +0.10%] index_select strided 5 : Elapsed 0.020 ms (1.972 ms / 100) 1.967 -> 1.969 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.71% +0.71%] index_select strided 7 : Elapsed 0.020 ms (1.967 ms / 100) 1.971 -> 1.973 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.15% +0.25% +0.10%] index_select strided 8 : Elapsed 0.020 ms (1.971 ms / 100) 1.971 -> 1.971 ( +0.00%) [ +0.15% +0.10% +0.00% / +0.30% +0.10% +0.00%] index_select strided 16 : Elapsed 0.020 ms (1.974 ms / 100) 1.967 -> 1.965 ( -0.10%) [ +0.00% +0.05% +0.10% / -0.10% +0.31% +0.25%] index_select random : Elapsed 0.020 ms (1.967 ms / 100) 1.976 -> 1.975 ( -0.05%) [ +0.10% +0.00% +0.00% / -0.05% +0.30% +0.00%] index_select random_sorted : Elapsed 0.020 ms (1.978 ms / 100) 1.967 -> 1.975 ( +0.41%) [ +0.00% +0.25% +0.20% / +0.41% +0.56% +0.51%] index_select perm : Elapsed 0.020 ms (1.967 ms / 100) 1.966 -> 1.965 ( -0.05%) [ +0.00% +0.10% +0.05% / -0.05% +0.36% +0.41%] index_select perm_sorted : Elapsed 0.020 ms (1.966 ms / 100) B = [16, 40, 5, 4] (stride (4, 64, 2560, 1)) A = [16, 40, 5, 20] (stride (20, 320, 12800, 1)) dim = 3 2.242 -> 2.250 ( +0.36%) [ +0.00% +0.40% +0.13% / +0.36% +0.54% +0.62%] index_select const : Elapsed 0.022 ms (2.242 ms / 100) 2.242 -> 2.244 ( +0.09%) [ +0.36% +0.00% +0.04% / +0.09% +0.58% +0.71%] index_select wrap : Elapsed 0.023 ms (2.250 ms / 100) 2.245 -> 2.249 ( +0.18%) [ +0.00% +0.09% +0.04% / +0.18% +0.58% +0.62%] index_select linear : Elapsed 0.022 ms (2.245 ms / 100) 2.243 -> 2.243 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +0.40% +0.36%] index_select reverse : Elapsed 0.022 ms (2.246 ms / 100) 2.242 -> 2.248 ( +0.27%) [ +0.22% +0.00% +0.27% / +0.27% +0.80% +0.58%] index_select skip64 : Elapsed 0.022 ms (2.247 ms / 100) 2.245 -> 2.246 ( +0.04%) [ +0.22% +0.09% +0.00% / +0.04% +0.53% +0.49%] index_select skip256 : Elapsed 0.022 ms (2.250 ms / 100) 2.298 -> 2.301 ( +0.13%) [ +0.00% +0.39% +0.30% / +0.13% +0.44% +0.44%] index_select spread : Elapsed 0.023 ms (2.298 ms / 100) 2.278 -> 2.281 ( +0.13%) [ +0.00% +0.22% +0.04% / +0.13% +0.53% +0.57%] index_select strided 3 : Elapsed 0.023 ms (2.278 ms / 100) 2.295 -> 2.298 ( +0.13%) [ +0.22% +0.52% +0.00% / +0.13% +0.78% +0.26%] index_select strided 5 : Elapsed 0.023 ms (2.300 ms / 100) 2.289 -> 2.294 ( +0.22%) [ +0.00% +0.26% +0.04% / +0.22% +0.22% +0.66%] index_select strided 7 : Elapsed 0.023 ms (2.289 ms / 100) 2.285 -> 2.291 ( +0.26%) [ +0.00% +0.31% +0.44% / +0.26% +0.48% +0.57%] index_select strided 8 : Elapsed 0.023 ms (2.285 ms / 100) 2.301 -> 2.299 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.22% +0.09%] index_select strided 16 : Elapsed 0.023 ms (2.301 ms / 100) 2.278 -> 2.280 ( +0.09%) [ +0.00% +0.18% +0.04% / +0.18% +0.09% +0.18%] index_select random : Elapsed 0.023 ms (2.278 ms / 100) 2.274 -> 2.280 ( +0.26%) [ +0.09% +0.00% +0.09% / +0.26% +0.35% +0.31%] index_select random_sorted : Elapsed 0.023 ms (2.276 ms / 100) 2.291 -> 2.291 ( +0.00%) [ +0.09% +0.00% +0.35% / +0.00% +0.17% +0.52%] index_select perm : Elapsed 0.023 ms (2.293 ms / 100) 2.283 -> 2.289 ( +0.26%) [ +0.00% +0.04% +0.18% / +0.26% +0.39% +0.66%] index_select perm_sorted : Elapsed 0.023 ms (2.283 ms / 100) B = [16, 40, 5, 4] (stride (1, 64, 2560, 16)) A = [16, 40, 5, 20] (stride (5, 80, 1, 3200)) dim = 3 2.053 -> 2.059 ( +0.29%) [ +0.15% +0.00% +0.05% / +0.29% +0.63% +0.68%] index_select const : Elapsed 0.021 ms (2.056 ms / 100) 2.058 -> 2.059 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.44% +0.63%] index_select wrap : Elapsed 0.021 ms (2.059 ms / 100) 2.059 -> 2.059 ( +0.00%) [ +0.00% +0.19% +0.24% / +0.00% +0.78% +0.97%] index_select linear : Elapsed 0.021 ms (2.059 ms / 100) 2.063 -> 2.067 ( +0.19%) [ +0.00% +0.15% +0.19% / +0.19% +0.48% +0.68%] index_select reverse : Elapsed 0.021 ms (2.063 ms / 100) 2.056 -> 2.057 ( +0.05%) [ +0.15% +0.05% +0.00% / +0.05% +0.54% +0.54%] index_select skip64 : Elapsed 0.021 ms (2.059 ms / 100) 2.054 -> 2.059 ( +0.24%) [ +0.24% +0.24% +0.00% / +0.24% +0.58% +0.49%] index_select skip256 : Elapsed 0.021 ms (2.059 ms / 100) 2.060 -> 2.058 ( -0.10%) [ +0.00% +0.05% +0.15% / -0.10% +0.68% +0.53%] index_select spread : Elapsed 0.021 ms (2.060 ms / 100) 2.066 -> 2.069 ( +0.15%) [ +0.34% +0.00% +0.29% / +0.15% +0.39% +0.48%] index_select strided 3 : Elapsed 0.021 ms (2.073 ms / 100) 2.057 -> 2.059 ( +0.10%) [ +0.19% +0.10% +0.00% / +0.10% +0.39% +0.53%] index_select strided 5 : Elapsed 0.021 ms (2.061 ms / 100) 2.060 -> 2.062 ( +0.10%) [ +0.00% +0.10% +0.15% / +0.10% +0.73% +0.58%] index_select strided 7 : Elapsed 0.021 ms (2.060 ms / 100) 2.069 -> 2.071 ( +0.10%) [ +0.00% +0.14% +0.05% / +0.10% +0.68% +0.43%] index_select strided 8 : Elapsed 0.021 ms (2.069 ms / 100) 2.061 -> 2.069 ( +0.39%) [ +0.24% +0.00% +0.15% / +0.39% +0.58% +0.53%] index_select strided 16 : Elapsed 0.021 ms (2.066 ms / 100) 2.058 -> 2.055 ( -0.15%) [ +0.00% +0.19% +0.00% / -0.15% +0.53% +0.39%] index_select random : Elapsed 0.021 ms (2.058 ms / 100) 2.057 -> 2.058 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.34% +0.49%] index_select random_sorted : Elapsed 0.021 ms (2.058 ms / 100) 2.064 -> 2.067 ( +0.15%) [ +0.24% +0.10% +0.00% / +0.19% +0.34% +0.15%] index_select perm : Elapsed 0.021 ms (2.069 ms / 100) 2.065 -> 2.070 ( +0.24%) [ +0.00% +0.44% +0.24% / +0.24% +0.44% +0.29%] index_select perm_sorted : Elapsed 0.021 ms (2.065 ms / 100) out_shape = [4, 40, 20, 5] in_shape = [16, 40, 20, 5] idx_dim = 0 B = [4, 40, 20, 5] (stride (4000, 1, 200, 40)) A = [16, 40, 20, 5] (stride (4000, 20, 1, 800)) dim = 0 1.113 -> 1.113 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +1.71% +1.53%] index_select const : Elapsed 0.011 ms (1.113 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.00% +0.26% +0.00% / +0.09% +2.61% +2.53%] index_select wrap : Elapsed 0.011 ms (1.148 ms / 100) 1.145 -> 1.147 ( +0.17%) [ +0.09% +0.00% +0.00% / +0.17% +2.53% +3.14%] index_select linear : Elapsed 0.011 ms (1.146 ms / 100) 1.149 -> 1.148 ( -0.09%) [ +0.09% +0.26% +0.00% / -0.09% +2.87% +2.18%] index_select reverse : Elapsed 0.011 ms (1.150 ms / 100) 1.107 -> 1.109 ( +0.18%) [ +0.18% +0.00% +0.36% / +0.18% +1.54% +1.63%] index_select skip64 : Elapsed 0.011 ms (1.109 ms / 100) 1.108 -> 1.110 ( +0.18%) [ +0.36% +0.00% +0.63% / +0.18% +1.26% +1.62%] index_select skip256 : Elapsed 0.011 ms (1.112 ms / 100) 1.153 -> 1.153 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +1.13% +1.04%] index_select spread : Elapsed 0.012 ms (1.153 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +2.53% +2.44%] index_select strided 3 : Elapsed 0.011 ms (1.148 ms / 100) 1.148 -> 1.150 ( +0.17%) [ +0.26% +0.26% +0.00% / +0.17% +2.18% +1.74%] index_select strided 5 : Elapsed 0.012 ms (1.151 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.17% +0.00% +0.17% / +0.09% +1.22% +1.31%] index_select strided 7 : Elapsed 0.011 ms (1.150 ms / 100) 1.126 -> 1.127 ( +0.09%) [ +0.00% +0.27% +0.00% / +0.09% +0.62% +0.80%] index_select strided 8 : Elapsed 0.011 ms (1.126 ms / 100) 1.158 -> 1.157 ( -0.09%) [ +0.09% +0.00% +0.09% / +0.17% -0.09% +0.09%] index_select random : Elapsed 0.012 ms (1.159 ms / 100) 1.150 -> 1.153 ( +0.26%) [ +0.26% +0.09% +0.00% / +0.26% +0.78% +1.39%] index_select random_sorted : Elapsed 0.012 ms (1.153 ms / 100) 1.135 -> 1.146 ( +0.97%) [ +0.79% +0.00% +0.79% / +0.97% +2.20% +2.20%] index_select perm : Elapsed 0.011 ms (1.144 ms / 100) 1.143 -> 1.147 ( +0.35%) [ +0.26% +0.17% +0.00% / +0.35% +1.49% +1.66%] index_select perm_sorted : Elapsed 0.011 ms (1.146 ms / 100) B = [4, 40, 20, 5] (stride (4000, 1, 40, 800)) A = [16, 40, 20, 5] (stride (1, 16, 3200, 640)) dim = 0 2.495 -> 2.498 ( +0.12%) [ +0.04% +0.16% +0.00% / +0.12% +0.28% +0.12%] index_select const : Elapsed 0.025 ms (2.496 ms / 100) 2.497 -> 2.494 ( -0.12%) [ +0.08% +0.00% +0.00% / -0.12% +0.16% +0.20%] index_select wrap : Elapsed 0.025 ms (2.499 ms / 100) 2.498 -> 2.496 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.04% -0.04% -0.08%] index_select linear : Elapsed 0.025 ms (2.498 ms / 100) 2.495 -> 2.497 ( +0.08%) [ +0.00% +0.16% +0.04% / +0.12% +0.24% +0.08%] index_select reverse : Elapsed 0.025 ms (2.495 ms / 100) 2.493 -> 2.496 ( +0.12%) [ +0.12% +0.16% +0.00% / +0.24% +0.44% +0.12%] index_select skip64 : Elapsed 0.025 ms (2.496 ms / 100) 2.491 -> 2.492 ( +0.04%) [ +0.40% +0.08% +0.00% / +0.04% +0.36% +0.28%] index_select skip256 : Elapsed 0.025 ms (2.501 ms / 100) 2.523 -> 2.524 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.28% +0.20%] index_select spread : Elapsed 0.025 ms (2.525 ms / 100) 2.517 -> 2.523 ( +0.24%) [ +0.16% +0.24% +0.00% / +0.24% +0.36% +0.44%] index_select strided 3 : Elapsed 0.025 ms (2.521 ms / 100) 2.517 -> 2.519 ( +0.08%) [ +0.40% +0.00% +0.24% / +0.08% +0.32% +0.48%] index_select strided 5 : Elapsed 0.025 ms (2.527 ms / 100) 2.518 -> 2.520 ( +0.08%) [ +0.24% +0.16% +0.00% / +0.08% +0.48% +0.44%] index_select strided 7 : Elapsed 0.025 ms (2.524 ms / 100) 2.532 -> 2.539 ( +0.28%) [ +0.20% +0.36% +0.00% / +0.28% +0.63% +0.39%] index_select strided 8 : Elapsed 0.025 ms (2.537 ms / 100) 2.534 -> 2.538 ( +0.16%) [ +0.00% +0.12% +0.00% / +0.20% +0.16% +0.39%] index_select random : Elapsed 0.025 ms (2.534 ms / 100) 2.532 -> 2.535 ( +0.12%) [ +0.24% +0.00% +0.08% / +0.12% +0.39% +0.39%] index_select random_sorted : Elapsed 0.025 ms (2.538 ms / 100) 2.532 -> 2.536 ( +0.16%) [ +0.20% +0.12% +0.00% / +0.16% +0.32% +0.43%] index_select perm : Elapsed 0.025 ms (2.537 ms / 100) 2.521 -> 2.524 ( +0.12%) [ +0.08% +0.16% +0.00% / +0.12% +0.32% +0.36%] index_select perm_sorted : Elapsed 0.025 ms (2.523 ms / 100) B = [4, 40, 20, 5] (stride (5, 400, 20, 1)) A = [16, 40, 20, 5] (stride (1, 1600, 16, 320)) dim = 0 2.394 -> 2.399 ( +0.21%) [ +0.00% +0.25% +0.29% / +0.21% +0.42% +0.38%] index_select const : Elapsed 0.024 ms (2.394 ms / 100) 2.391 -> 2.392 ( +0.04%) [ +0.00% +0.13% +0.21% / +0.04% +0.54% +0.42%] index_select wrap : Elapsed 0.024 ms (2.391 ms / 100) 2.396 -> 2.395 ( -0.04%) [ +0.21% +0.00% +0.13% / -0.04% +0.50% +0.54%] index_select linear : Elapsed 0.024 ms (2.401 ms / 100) 2.392 -> 2.389 ( -0.13%) [ +0.38% +0.33% +0.00% / -0.13% +0.75% +0.67%] index_select reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.388 -> 2.391 ( +0.13%) [ +0.46% +0.00% +0.25% / +0.13% +0.50% +0.59%] index_select skip64 : Elapsed 0.024 ms (2.399 ms / 100) 2.394 -> 2.394 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.54% +0.67%] index_select skip256 : Elapsed 0.024 ms (2.394 ms / 100) 2.415 -> 2.422 ( +0.29%) [ +0.04% +0.00% +0.00% / +0.29% +0.41% +0.33%] index_select spread : Elapsed 0.024 ms (2.416 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.00% +0.08% +0.12% / +0.00% +0.37% +0.17%] index_select strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.417 -> 2.416 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.41% +0.50%] index_select strided 5 : Elapsed 0.024 ms (2.417 ms / 100) 2.413 -> 2.411 ( -0.08%) [ +0.00% +0.17% +0.17% / -0.08% +0.54% +0.62%] index_select strided 7 : Elapsed 0.024 ms (2.413 ms / 100) 2.419 -> 2.423 ( +0.17%) [ +0.21% +0.00% +0.29% / +0.17% +0.66% +0.66%] index_select strided 8 : Elapsed 0.024 ms (2.424 ms / 100) 2.412 -> 2.423 ( +0.46%) [ +0.50% +0.00% +0.21% / +0.46% +0.66% +0.50%] index_select random : Elapsed 0.024 ms (2.424 ms / 100) 2.416 -> 2.413 ( -0.12%) [ +0.08% +0.04% +0.00% / -0.12% +0.00% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.418 ms / 100) 2.424 -> 2.424 ( +0.00%) [ +0.04% +0.17% +0.00% / +0.00% +0.33% +0.33%] index_select perm : Elapsed 0.024 ms (2.425 ms / 100) 2.417 -> 2.418 ( +0.04%) [ +0.08% +0.17% +0.00% / +0.04% +0.41% +0.37%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [4, 40, 20, 5] (stride (1, 400, 4, 80)) A = [16, 40, 20, 5] (stride (800, 20, 1, 12800)) dim = 0 2.312 -> 2.315 ( +0.13%) [ +0.17% +0.17% +0.00% / +0.13% +0.22% +0.30%] index_select const : Elapsed 0.023 ms (2.316 ms / 100) 2.352 -> 2.352 ( +0.00%) [ +0.00% +0.21% +0.00% / +0.09% +0.00% +0.17%] index_select wrap : Elapsed 0.024 ms (2.352 ms / 100) 2.350 -> 2.351 ( +0.04%) [ +0.09% +0.00% +0.00% / +0.09% +0.09% +0.04%] index_select linear : Elapsed 0.024 ms (2.352 ms / 100) 2.347 -> 2.349 ( +0.09%) [ +0.13% +0.00% +0.09% / +0.26% +0.09% +0.09%] index_select reverse : Elapsed 0.024 ms (2.350 ms / 100) 2.313 -> 2.312 ( -0.04%) [ +0.26% +0.13% +0.00% / +0.13% +0.04% -0.04%] index_select skip64 : Elapsed 0.023 ms (2.319 ms / 100) 2.318 -> 2.313 ( -0.22%) [ +0.13% +0.04% +0.00% / -0.04% -0.22% -0.13%] index_select skip256 : Elapsed 0.023 ms (2.321 ms / 100) 2.346 -> 2.351 ( +0.21%) [ +0.34% +0.04% +0.00% / +0.43% +0.21% +0.30%] index_select spread : Elapsed 0.024 ms (2.354 ms / 100) 2.344 -> 2.349 ( +0.21%) [ +0.17% +0.17% +0.00% / +0.21% +0.47% +0.51%] index_select strided 3 : Elapsed 0.023 ms (2.348 ms / 100) 2.353 -> 2.355 ( +0.08%) [ +0.30% +0.00% +0.08% / +0.08% +0.38% +0.38%] index_select strided 5 : Elapsed 0.024 ms (2.360 ms / 100) 2.351 -> 2.355 ( +0.17%) [ +0.21% +0.04% +0.00% / +0.26% +0.17% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.356 ms / 100) 2.323 -> 2.324 ( +0.04%) [ +0.13% +0.00% +0.17% / +0.04% +0.22% +0.17%] index_select strided 8 : Elapsed 0.023 ms (2.326 ms / 100) 2.352 -> 2.354 ( +0.09%) [ +0.26% +0.26% +0.00% / +0.21% +0.17% +0.09%] index_select random : Elapsed 0.024 ms (2.358 ms / 100) 2.357 -> 2.355 ( -0.08%) [ +0.08% +0.17% +0.00% / -0.08% +0.17% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.359 ms / 100) 2.357 -> 2.358 ( +0.04%) [ +0.13% +0.13% +0.00% / +0.08% +0.34% +0.04%] index_select perm : Elapsed 0.024 ms (2.360 ms / 100) 2.348 -> 2.353 ( +0.21%) [ +0.04% +0.00% +0.09% / +0.21% +0.38% +0.26%] index_select perm_sorted : Elapsed 0.023 ms (2.349 ms / 100) B = [4, 40, 20, 5] (stride (200, 5, 800, 1)) A = [16, 40, 20, 5] (stride (4000, 1, 200, 40)) dim = 0 2.428 -> 2.427 ( -0.04%) [ +0.00% +0.00% +0.12% / -0.04% +0.25% +0.37%] index_select const : Elapsed 0.024 ms (2.428 ms / 100) 2.431 -> 2.433 ( +0.08%) [ +0.08% +0.12% +0.00% / +0.08% +0.37% +0.37%] index_select wrap : Elapsed 0.024 ms (2.433 ms / 100) 2.430 -> 2.434 ( +0.16%) [ +0.12% +0.12% +0.00% / +0.16% +0.45% +0.53%] index_select linear : Elapsed 0.024 ms (2.433 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.25% +0.21% +0.00% / +0.12% +0.54% +0.54%] index_select reverse : Elapsed 0.024 ms (2.432 ms / 100) 2.425 -> 2.424 ( -0.04%) [ +0.00% +0.16% +0.08% / -0.04% +0.58% +0.37%] index_select skip64 : Elapsed 0.024 ms (2.425 ms / 100) 2.423 -> 2.427 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.54% +0.45%] index_select skip256 : Elapsed 0.024 ms (2.425 ms / 100) 2.430 -> 2.429 ( -0.04%) [ +0.16% +0.00% +0.04% / -0.04% +0.29% +0.45%] index_select spread : Elapsed 0.024 ms (2.434 ms / 100) 2.429 -> 2.430 ( +0.04%) [ +0.00% +0.12% +0.16% / +0.04% +0.45% +0.49%] index_select strided 3 : Elapsed 0.024 ms (2.429 ms / 100) 2.430 -> 2.428 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.29% +0.37%] index_select strided 5 : Elapsed 0.024 ms (2.431 ms / 100) 2.428 -> 2.428 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.45% +0.41%] index_select strided 7 : Elapsed 0.024 ms (2.430 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.00% +0.08% +0.12% / +0.08% +0.54% +0.62%] index_select strided 8 : Elapsed 0.024 ms (2.422 ms / 100) 2.426 -> 2.427 ( +0.04%) [ +0.12% +0.00% +0.12% / +0.04% +0.25% +0.41%] index_select random : Elapsed 0.024 ms (2.429 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.16% +0.08% +0.00% / +0.12% +0.33% +0.37%] index_select random_sorted : Elapsed 0.024 ms (2.430 ms / 100) 2.430 -> 2.433 ( +0.12%) [ +0.08% +0.16% +0.00% / +0.12% +0.21% +0.21%] index_select perm : Elapsed 0.024 ms (2.432 ms / 100) 2.430 -> 2.432 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.33% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.430 ms / 100) B = [4, 40, 20, 5] (stride (200, 5, 800, 1)) A = [16, 40, 20, 5] (stride (800, 1, 40, 12800)) dim = 0 2.544 -> 2.550 ( +0.24%) [ +0.04% +0.04% +0.00% / +0.24% +0.39% +0.43%] index_select const : Elapsed 0.025 ms (2.545 ms / 100) 2.549 -> 2.549 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.43% +0.31%] index_select wrap : Elapsed 0.026 ms (2.550 ms / 100) 2.548 -> 2.550 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.55% +0.43%] index_select linear : Elapsed 0.026 ms (2.551 ms / 100) 2.548 -> 2.549 ( +0.04%) [ +0.00% +0.16% +0.12% / +0.04% +0.31% +0.39%] index_select reverse : Elapsed 0.025 ms (2.548 ms / 100) 2.544 -> 2.544 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.43% +0.31%] index_select skip64 : Elapsed 0.025 ms (2.546 ms / 100) 2.542 -> 2.545 ( +0.12%) [ +0.00% +0.04% +0.16% / +0.12% +0.43% +0.47%] index_select skip256 : Elapsed 0.025 ms (2.542 ms / 100) 2.552 -> 2.557 ( +0.20%) [ +0.20% +0.08% +0.00% / +0.20% +0.67% +0.47%] index_select spread : Elapsed 0.026 ms (2.557 ms / 100) 2.552 -> 2.552 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.43% +0.35%] index_select strided 3 : Elapsed 0.026 ms (2.553 ms / 100) 2.555 -> 2.553 ( -0.08%) [ +0.00% +0.04% +0.12% / -0.08% +0.55% +0.55%] index_select strided 5 : Elapsed 0.026 ms (2.555 ms / 100) 2.554 -> 2.557 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.51% +0.27%] index_select strided 7 : Elapsed 0.026 ms (2.555 ms / 100) 2.546 -> 2.545 ( -0.04%) [ +0.16% +0.00% +0.08% / -0.04% +0.31% +0.24%] index_select strided 8 : Elapsed 0.025 ms (2.550 ms / 100) 2.553 -> 2.557 ( +0.16%) [ +0.00% +0.12% +0.00% / +0.24% +0.31% +0.16%] index_select random : Elapsed 0.026 ms (2.553 ms / 100) 2.553 -> 2.553 ( +0.00%) [ +0.00% +0.12% +0.04% / +0.00% +0.31% +0.27%] index_select random_sorted : Elapsed 0.026 ms (2.553 ms / 100) 2.546 -> 2.549 ( +0.12%) [ +0.00% +0.20% +0.04% / +0.12% +0.35% +0.35%] index_select perm : Elapsed 0.025 ms (2.546 ms / 100) 2.552 -> 2.551 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.39% +0.35%] index_select perm_sorted : Elapsed 0.026 ms (2.553 ms / 100) B = [4, 40, 20, 5] (stride (40, 1, 800, 160)) A = [16, 40, 20, 5] (stride (1, 1600, 80, 16)) dim = 0 2.180 -> 2.179 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.28% +0.18%] index_select const : Elapsed 0.022 ms (2.181 ms / 100) 2.182 -> 2.182 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.09% +0.00% +0.09%] index_select wrap : Elapsed 0.022 ms (2.184 ms / 100) 2.184 -> 2.181 ( -0.14%) [ +0.05% +0.09% +0.00% / +0.00% -0.14% -0.09%] index_select linear : Elapsed 0.022 ms (2.185 ms / 100) 2.180 -> 2.179 ( -0.05%) [ +0.37% +0.09% +0.00% / -0.05% +0.00% +0.05%] index_select reverse : Elapsed 0.022 ms (2.188 ms / 100) 2.182 -> 2.183 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.18% +0.18%] index_select skip64 : Elapsed 0.022 ms (2.183 ms / 100) 2.181 -> 2.184 ( +0.14%) [ +0.14% +0.09% +0.00% / +0.28% +0.14% +0.14%] index_select skip256 : Elapsed 0.022 ms (2.184 ms / 100) 2.192 -> 2.192 ( +0.00%) [ +0.00% +0.14% +0.18% / +0.00% +0.36% +0.27%] index_select spread : Elapsed 0.022 ms (2.192 ms / 100) 2.198 -> 2.204 ( +0.27%) [ +0.18% +0.00% +0.32% / +0.27% +0.50% +0.45%] index_select strided 3 : Elapsed 0.022 ms (2.202 ms / 100) 2.194 -> 2.199 ( +0.23%) [ +0.00% +0.05% +0.05% / +0.23% +0.32% +0.41%] index_select strided 5 : Elapsed 0.022 ms (2.194 ms / 100) 2.192 -> 2.193 ( +0.05%) [ +0.23% +0.00% +0.18% / +0.05% +0.50% +0.41%] index_select strided 7 : Elapsed 0.022 ms (2.197 ms / 100) 2.208 -> 2.212 ( +0.18%) [ +0.27% +0.05% +0.00% / +0.27% +0.18% +0.27%] index_select strided 8 : Elapsed 0.022 ms (2.214 ms / 100) 2.208 -> 2.208 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.36% +0.32%] index_select random : Elapsed 0.022 ms (2.208 ms / 100) 2.200 -> 2.203 ( +0.14%) [ +0.00% +0.18% +0.09% / +0.14% +0.18% +0.14%] index_select random_sorted : Elapsed 0.022 ms (2.200 ms / 100) 2.198 -> 2.196 ( -0.09%) [ +0.09% +0.05% +0.00% / -0.09% +0.18% +0.14%] index_select perm : Elapsed 0.022 ms (2.200 ms / 100) 2.208 -> 2.208 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.23% +0.45%] index_select perm_sorted : Elapsed 0.022 ms (2.208 ms / 100) out_shape = [16, 4, 20, 5] in_shape = [16, 40, 20, 5] idx_dim = 1 B = [16, 4, 20, 5] (stride (400, 5, 20, 1)) A = [16, 40, 20, 5] (stride (40, 1, 3200, 640)) dim = 1 1.276 -> 1.275 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.31%] index_select const : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.277 ( +0.24%) [ +0.39% +0.16% +0.00% / +0.24% +0.47% +0.55%] index_select wrap : Elapsed 0.013 ms (1.279 ms / 100) 1.275 -> 1.272 ( -0.24%) [ +0.00% +0.08% +0.16% / -0.24% +0.55% +0.63%] index_select linear : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.55% +0.86%] index_select reverse : Elapsed 0.013 ms (1.275 ms / 100) 1.276 -> 1.275 ( -0.08%) [ +0.08% +0.24% +0.00% / -0.08% +0.39% +0.39%] index_select skip64 : Elapsed 0.013 ms (1.277 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.24% +0.31% +0.00% / +0.08% +0.71% +0.39%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.63% +0.94%] index_select spread : Elapsed 0.013 ms (1.275 ms / 100) 1.275 -> 1.277 ( +0.16%) [ +0.00% +0.24% +0.16% / +0.16% +0.71% +0.71%] index_select strided 3 : Elapsed 0.013 ms (1.275 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.63% +0.55%] index_select strided 5 : Elapsed 0.013 ms (1.278 ms / 100) 1.273 -> 1.277 ( +0.31%) [ +0.39% +0.00% +0.31% / +0.31% +0.86% +0.79%] index_select strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.275 ( -0.08%) [ +0.16% +0.00% +0.08% / -0.08% +0.47% +0.39%] index_select strided 8 : Elapsed 0.013 ms (1.278 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.31% +0.31%] index_select strided 16 : Elapsed 0.013 ms (1.281 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.39% +0.00% +0.63% / +0.16% +0.71% +0.55%] index_select random : Elapsed 0.013 ms (1.278 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.47% +0.39%] index_select random_sorted : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_select perm : Elapsed 0.013 ms (1.275 ms / 100) 1.277 -> 1.280 ( +0.23%) [ +0.00% +0.23% +0.00% / +0.23% +0.55% +0.39%] index_select perm_sorted : Elapsed 0.013 ms (1.277 ms / 100) B = [16, 4, 20, 5] (stride (400, 20, 1, 80)) A = [16, 40, 20, 5] (stride (1, 80, 3200, 16)) dim = 1 1.323 -> 1.324 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.53% +0.38%] index_select const : Elapsed 0.013 ms (1.324 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.53% +0.61%] index_select wrap : Elapsed 0.013 ms (1.320 ms / 100) 1.323 -> 1.323 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.45% +0.45%] index_select linear : Elapsed 0.013 ms (1.323 ms / 100) 1.319 -> 1.320 ( +0.08%) [ +0.15% +0.00% +0.08% / +0.08% +0.38% +0.53%] index_select reverse : Elapsed 0.013 ms (1.321 ms / 100) 1.325 -> 1.326 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.60% +0.45%] index_select skip64 : Elapsed 0.013 ms (1.325 ms / 100) 1.328 -> 1.327 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.60% +0.38%] index_select skip256 : Elapsed 0.013 ms (1.329 ms / 100) 1.317 -> 1.321 ( +0.30%) [ +0.15% +0.15% +0.00% / +0.30% +0.84% +0.76%] index_select spread : Elapsed 0.013 ms (1.319 ms / 100) 1.323 -> 1.324 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.53% +0.60%] index_select strided 3 : Elapsed 0.013 ms (1.324 ms / 100) 1.325 -> 1.326 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.53% +0.45%] index_select strided 5 : Elapsed 0.013 ms (1.325 ms / 100) 1.317 -> 1.319 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.38% +0.15%] index_select strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.328 -> 1.329 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.38% +0.45%] index_select strided 8 : Elapsed 0.013 ms (1.328 ms / 100) 1.325 -> 1.328 ( +0.23%) [ +0.15% +0.08% +0.00% / +0.23% +0.53% +0.60%] index_select strided 16 : Elapsed 0.013 ms (1.327 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.46% +0.30%] index_select random : Elapsed 0.013 ms (1.320 ms / 100) 1.320 -> 1.320 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.38% +0.30%] index_select random_sorted : Elapsed 0.013 ms (1.320 ms / 100) 1.326 -> 1.326 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.30% +0.38%] index_select perm : Elapsed 0.013 ms (1.327 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.53% +0.76%] index_select perm_sorted : Elapsed 0.013 ms (1.322 ms / 100) B = [16, 4, 20, 5] (stride (1, 1600, 80, 16)) A = [16, 40, 20, 5] (stride (1, 80, 3200, 16)) dim = 1 1.242 -> 1.243 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.40% +0.56%] index_select const : Elapsed 0.012 ms (1.242 ms / 100) 1.220 -> 1.225 ( +0.41%) [ +0.00% +0.33% +0.00% / +0.41% +0.49% +0.74%] index_select wrap : Elapsed 0.012 ms (1.220 ms / 100) 1.224 -> 1.229 ( +0.41%) [ +0.08% +0.08% +0.00% / +0.41% +0.74% +0.65%] index_select linear : Elapsed 0.012 ms (1.225 ms / 100) 1.235 -> 1.234 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.32% +0.40%] index_select reverse : Elapsed 0.012 ms (1.235 ms / 100) 1.243 -> 1.244 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.48% +0.32%] index_select skip64 : Elapsed 0.012 ms (1.244 ms / 100) 1.240 -> 1.241 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.08% +0.16%] index_select skip256 : Elapsed 0.012 ms (1.240 ms / 100) 1.232 -> 1.235 ( +0.24%) [ +0.16% +0.00% +0.08% / +0.24% +0.41% +0.32%] index_select spread : Elapsed 0.012 ms (1.234 ms / 100) 1.236 -> 1.237 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.49% +0.65%] index_select strided 3 : Elapsed 0.012 ms (1.238 ms / 100) 1.233 -> 1.234 ( +0.08%) [ +0.16% +0.41% +0.00% / +0.08% +0.24% +0.49%] index_select strided 5 : Elapsed 0.012 ms (1.235 ms / 100) 1.236 -> 1.236 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.32% +0.32%] index_select strided 7 : Elapsed 0.012 ms (1.238 ms / 100) 1.244 -> 1.246 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.40% +0.32%] index_select strided 8 : Elapsed 0.012 ms (1.246 ms / 100) 1.239 -> 1.240 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.56% +0.24%] index_select strided 16 : Elapsed 0.012 ms (1.240 ms / 100) 1.236 -> 1.237 ( +0.08%) [ +0.32% +0.00% +0.16% / +0.08% +0.49% +0.49%] index_select random : Elapsed 0.012 ms (1.240 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.65% +0.73%] index_select random_sorted : Elapsed 0.012 ms (1.237 ms / 100) 1.235 -> 1.239 ( +0.32%) [ +0.24% +0.16% +0.00% / +0.32% +0.57% +0.57%] index_select perm : Elapsed 0.012 ms (1.238 ms / 100) 1.233 -> 1.233 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.65% +0.73%] index_select perm_sorted : Elapsed 0.012 ms (1.234 ms / 100) B = [16, 4, 20, 5] (stride (20, 1600, 1, 320)) A = [16, 40, 20, 5] (stride (1, 80, 3200, 16)) dim = 1 1.323 -> 1.324 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.38% +0.38%] index_select const : Elapsed 0.013 ms (1.323 ms / 100) 1.320 -> 1.320 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.38% +0.45%] index_select wrap : Elapsed 0.013 ms (1.321 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.61% +0.45%] index_select linear : Elapsed 0.013 ms (1.323 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.76% +0.61%] index_select reverse : Elapsed 0.013 ms (1.320 ms / 100) 1.324 -> 1.325 ( +0.08%) [ +0.15% +0.00% +0.08% / +0.08% +0.60% +0.60%] index_select skip64 : Elapsed 0.013 ms (1.326 ms / 100) 1.327 -> 1.328 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.60% +0.45%] index_select skip256 : Elapsed 0.013 ms (1.327 ms / 100) 1.320 -> 1.320 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.53%] index_select spread : Elapsed 0.013 ms (1.320 ms / 100) 1.322 -> 1.324 ( +0.15%) [ +0.15% +0.23% +0.00% / +0.15% +0.68% +0.61%] index_select strided 3 : Elapsed 0.013 ms (1.324 ms / 100) 1.325 -> 1.325 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_select strided 5 : Elapsed 0.013 ms (1.326 ms / 100) 1.317 -> 1.321 ( +0.30%) [ +0.23% +0.23% +0.00% / +0.46% +0.38% +0.30%] index_select strided 7 : Elapsed 0.013 ms (1.320 ms / 100) 1.327 -> 1.330 ( +0.23%) [ +0.08% +0.08% +0.00% / +0.23% +0.45% +0.45%] index_select strided 8 : Elapsed 0.013 ms (1.328 ms / 100) 1.326 -> 1.327 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.53% +0.45%] index_select strided 16 : Elapsed 0.013 ms (1.327 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.53% +0.45%] index_select random : Elapsed 0.013 ms (1.319 ms / 100) 1.324 -> 1.325 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.30% +0.30%] index_select random_sorted : Elapsed 0.013 ms (1.325 ms / 100) 1.321 -> 1.320 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.30% +0.38%] index_select perm : Elapsed 0.013 ms (1.321 ms / 100) 1.326 -> 1.325 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.45% +0.38%] index_select perm_sorted : Elapsed 0.013 ms (1.326 ms / 100) B = [16, 4, 20, 5] (stride (1, 1600, 16, 320)) A = [16, 40, 20, 5] (stride (1, 1600, 16, 320)) dim = 1 1.323 -> 1.325 ( +0.15%) [ +0.08% +0.00% +0.15% / +0.15% +0.38% +0.45%] index_select const : Elapsed 0.013 ms (1.324 ms / 100) 1.318 -> 1.323 ( +0.38%) [ +0.38% +0.00% +0.38% / +0.38% +0.53% +0.61%] index_select wrap : Elapsed 0.013 ms (1.323 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.30% +0.08% +0.00% / +0.08% +0.61% +0.53%] index_select linear : Elapsed 0.013 ms (1.326 ms / 100) 1.327 -> 1.329 ( +0.15%) [ +0.30% +0.00% +0.08% / +0.15% +0.30% +0.38%] index_select reverse : Elapsed 0.013 ms (1.331 ms / 100) 1.325 -> 1.325 ( +0.00%) [ +0.23% +0.00% +0.15% / +0.00% +0.23% +0.15%] index_select skip64 : Elapsed 0.013 ms (1.328 ms / 100) 1.328 -> 1.327 ( -0.08%) [ +0.38% +0.00% +0.00% / -0.08% +0.23% +0.15%] index_select skip256 : Elapsed 0.013 ms (1.333 ms / 100) 1.320 -> 1.324 ( +0.30%) [ +0.23% +0.15% +0.00% / +0.30% +0.45% +0.45%] index_select spread : Elapsed 0.013 ms (1.323 ms / 100) 1.324 -> 1.326 ( +0.15%) [ +0.23% +0.00% +0.15% / +0.15% +0.30% +0.38%] index_select strided 3 : Elapsed 0.013 ms (1.327 ms / 100) 1.324 -> 1.328 ( +0.30%) [ +0.23% +0.15% +0.00% / +0.30% +0.45% +0.30%] index_select strided 5 : Elapsed 0.013 ms (1.327 ms / 100) 1.332 -> 1.328 ( -0.30%) [ +0.00% +0.00% +0.00% / -0.30% +0.30% +0.45%] index_select strided 7 : Elapsed 0.013 ms (1.332 ms / 100) 1.328 -> 1.332 ( +0.30%) [ +0.00% +0.00% +0.00% / +0.30% +0.53% +0.45%] index_select strided 8 : Elapsed 0.013 ms (1.328 ms / 100) 1.318 -> 1.316 ( -0.15%) [ +0.15% +0.00% +0.15% / -0.15% +0.30% +0.23%] index_select strided 16 : Elapsed 0.013 ms (1.320 ms / 100) 1.319 -> 1.321 ( +0.15%) [ +0.15% +0.00% +0.08% / +0.15% +0.38% +0.38%] index_select random : Elapsed 0.013 ms (1.321 ms / 100) 1.321 -> 1.323 ( +0.15%) [ +0.23% +0.00% +0.15% / +0.15% +0.61% +0.68%] index_select random_sorted : Elapsed 0.013 ms (1.324 ms / 100) 1.320 -> 1.323 ( +0.23%) [ +0.08% +0.00% +0.23% / +0.23% +0.38% +0.38%] index_select perm : Elapsed 0.013 ms (1.321 ms / 100) 1.320 -> 1.323 ( +0.23%) [ +0.00% +0.08% +0.08% / +0.23% +0.45% +0.45%] index_select perm_sorted : Elapsed 0.013 ms (1.320 ms / 100) B = [16, 4, 20, 5] (stride (1, 80, 320, 16)) A = [16, 40, 20, 5] (stride (4000, 100, 1, 20)) dim = 1 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.16% / +0.00% +0.31% +0.39%] index_select const : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.24% +0.00% +0.00% / +0.00% +0.39% +0.31%] index_select wrap : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.31% +0.39%] index_select linear : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.279 ( +0.24%) [ +0.16% +0.00% +0.00% / +0.24% +0.39% +0.39%] index_select reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.39% +0.39%] index_select skip64 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.274 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.47% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.71% +0.55%] index_select spread : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.31% +0.08% +0.00% / +0.08% +0.47% +0.39%] index_select strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.39% +0.31%] index_select strided 7 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.39% +0.31%] index_select strided 8 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.55%] index_select strided 16 : Elapsed 0.013 ms (1.277 ms / 100) 1.278 -> 1.281 ( +0.23%) [ +0.16% +0.00% +0.08% / +0.23% +0.39% +0.47%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.31% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.277 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.31% +0.23%] index_select perm : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.278 ms / 100) B = [16, 4, 20, 5] (stride (1, 80, 320, 16)) A = [16, 40, 20, 5] (stride (800, 1, 40, 12800)) dim = 1 1.365 -> 1.367 ( +0.15%) [ +0.22% +0.00% +0.07% / +0.15% +0.59% +0.73%] index_select const : Elapsed 0.014 ms (1.368 ms / 100) 1.364 -> 1.364 ( +0.00%) [ +0.00% +0.22% +0.00% / +0.00% +0.59% +0.66%] index_select wrap : Elapsed 0.014 ms (1.364 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.51% +0.51%] index_select linear : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.66% +0.59%] index_select reverse : Elapsed 0.014 ms (1.367 ms / 100) 1.363 -> 1.365 ( +0.15%) [ +0.15% +0.00% +0.07% / +0.15% +0.66% +0.73%] index_select skip64 : Elapsed 0.014 ms (1.365 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.66% +0.51%] index_select skip256 : Elapsed 0.014 ms (1.366 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.66% +0.58%] index_select spread : Elapsed 0.014 ms (1.370 ms / 100) 1.366 -> 1.364 ( -0.15%) [ +0.15% +0.15% +0.00% / -0.15% +0.51% +0.44%] index_select strided 3 : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.44% +0.51%] index_select strided 5 : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.44% +0.37%] index_select strided 7 : Elapsed 0.014 ms (1.368 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.65% +0.80%] index_select strided 8 : Elapsed 0.014 ms (1.376 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.22% +0.15%] index_select strided 16 : Elapsed 0.014 ms (1.368 ms / 100) 1.371 -> 1.371 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.51% +0.36%] index_select random : Elapsed 0.014 ms (1.373 ms / 100) 1.369 -> 1.371 ( +0.15%) [ +0.15% +0.00% +0.07% / +0.15% +0.66% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.371 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.44% +0.29%] index_select perm : Elapsed 0.014 ms (1.368 ms / 100) 1.369 -> 1.370 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.29% +0.22%] index_select perm_sorted : Elapsed 0.014 ms (1.369 ms / 100) B = [16, 4, 20, 5] (stride (1, 16, 320, 64)) dim = 1 fill_cnt = 40 1.220 -> 1.207 ( -1.07%) [ +0.41% +0.16% +0.00% / -1.07% -0.66% -0.33%] index_fill_ const : Elapsed 0.012 ms (1.225 ms / 100) 1.210 -> 1.196 ( -1.16%) [ +0.00% +0.50% +0.33% / -0.74% -1.16% -0.58%] index_fill_ linear : Elapsed 0.012 ms (1.210 ms / 100) 1.208 -> 1.198 ( -0.83%) [ +0.00% +0.08% +0.50% / -0.83% -0.83% -0.66%] index_fill_ reverse : Elapsed 0.012 ms (1.208 ms / 100) 1.221 -> 1.205 ( -1.31%) [ +0.08% +0.08% +0.00% / -1.31% -0.98% -0.66%] index_fill_ skip64 : Elapsed 0.012 ms (1.222 ms / 100) 1.216 -> 1.204 ( -0.99%) [ +0.00% +0.49% +0.49% / -0.99% -0.49% -0.33%] index_fill_ skip256 : Elapsed 0.012 ms (1.216 ms / 100) 1.205 -> 1.192 ( -1.08%) [ +0.00% +0.08% +0.25% / -1.08% +0.00% +0.00%] index_fill_ spread : Elapsed 0.012 ms (1.205 ms / 100) 1.212 -> 1.196 ( -1.32%) [ +0.00% +0.08% +0.17% / -1.32% -0.58% -0.33%] index_fill_ strided 3 : Elapsed 0.012 ms (1.212 ms / 100) 1.233 -> 1.219 ( -1.14%) [ +0.24% +0.00% +0.08% / -1.14% -0.89% -0.81%] index_fill_ random : Elapsed 0.012 ms (1.236 ms / 100) 1.228 -> 1.216 ( -0.98%) [ +0.41% +0.08% +0.00% / -0.98% -0.90% -0.49%] index_fill_ random_sorted : Elapsed 0.012 ms (1.233 ms / 100) B = [16, 4, 20, 5] (stride (1, 320, 16, 1280)) A = [16, 40, 20, 5] (stride (1, 80, 3200, 16)) dim = 1 1.327 -> 1.329 ( +0.15%) [ +0.23% +0.00% +0.08% / +0.15% +0.45% +0.45%] index_select const : Elapsed 0.013 ms (1.330 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.45% +0.30%] index_select wrap : Elapsed 0.013 ms (1.325 ms / 100) 1.324 -> 1.326 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.60% +0.60%] index_select linear : Elapsed 0.013 ms (1.326 ms / 100) 1.324 -> 1.325 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.23% +0.30%] index_select reverse : Elapsed 0.013 ms (1.324 ms / 100) 1.330 -> 1.330 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.75% +0.68%] index_select skip64 : Elapsed 0.013 ms (1.330 ms / 100) 1.347 -> 1.347 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.22% +0.37%] index_select skip256 : Elapsed 0.013 ms (1.348 ms / 100) 1.336 -> 1.337 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.37% +0.37%] index_select spread : Elapsed 0.013 ms (1.336 ms / 100) 1.327 -> 1.327 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.23% +0.68%] index_select strided 3 : Elapsed 0.013 ms (1.327 ms / 100) 1.330 -> 1.329 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.53% +0.30%] index_select strided 5 : Elapsed 0.013 ms (1.331 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.23% +0.53%] index_select strided 7 : Elapsed 0.013 ms (1.321 ms / 100) 1.332 -> 1.333 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.45% +0.38%] index_select strided 8 : Elapsed 0.013 ms (1.332 ms / 100) 1.330 -> 1.330 ( +0.00%) [ +0.23% +0.00% +0.00% / +0.00% +0.45% +0.38%] index_select strided 16 : Elapsed 0.013 ms (1.333 ms / 100) 1.327 -> 1.327 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.15% +0.00% +0.15%] index_select random : Elapsed 0.013 ms (1.328 ms / 100) 1.325 -> 1.325 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.23% +0.23%] index_select random_sorted : Elapsed 0.013 ms (1.325 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.15% +0.08% +0.00% / +0.00% +0.30% +0.38%] index_select perm : Elapsed 0.013 ms (1.323 ms / 100) 1.327 -> 1.333 ( +0.45%) [ +0.38% +0.30% +0.00% / +0.53% +0.45% +0.45%] index_select perm_sorted : Elapsed 0.013 ms (1.332 ms / 100) out_shape = [16, 40, 4, 5] in_shape = [16, 40, 20, 5] idx_dim = 2 B = [16, 40, 4, 5] (stride (800, 5, 200, 1)) A = [16, 40, 20, 5] (stride (4000, 1, 200, 40)) dim = 2 2.036 -> 2.038 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.34% +0.25%] index_select const : Elapsed 0.020 ms (2.038 ms / 100) 2.048 -> 2.054 ( +0.29%) [ +0.20% +0.00% +0.05% / +0.29% +0.59% +0.39%] index_select wrap : Elapsed 0.021 ms (2.052 ms / 100) 2.051 -> 2.059 ( +0.39%) [ +0.00% +0.29% +0.15% / +0.39% +0.73% +0.39%] index_select linear : Elapsed 0.021 ms (2.051 ms / 100) 2.046 -> 2.052 ( +0.29%) [ +0.24% +0.00% +0.15% / +0.29% +0.68% +0.68%] index_select reverse : Elapsed 0.021 ms (2.051 ms / 100) 2.029 -> 2.033 ( +0.20%) [ +0.59% +0.49% +0.00% / +0.20% +0.79% +0.74%] index_select skip64 : Elapsed 0.020 ms (2.041 ms / 100) 2.036 -> 2.038 ( +0.10%) [ +0.00% +0.00% +0.05% / +0.10% +0.59% +0.44%] index_select skip256 : Elapsed 0.020 ms (2.036 ms / 100) 2.053 -> 2.052 ( -0.05%) [ +0.00% +0.19% +0.15% / -0.05% +0.68% +0.78%] index_select spread : Elapsed 0.021 ms (2.053 ms / 100) 2.046 -> 2.047 ( +0.05%) [ +0.00% +0.20% +0.15% / +0.05% +0.34% +0.44%] index_select strided 3 : Elapsed 0.020 ms (2.046 ms / 100) 2.048 -> 2.052 ( +0.20%) [ +0.05% +0.00% +0.05% / +0.20% +0.54% +0.78%] index_select strided 5 : Elapsed 0.020 ms (2.049 ms / 100) 2.048 -> 2.054 ( +0.29%) [ +0.34% +0.24% +0.00% / +0.29% +0.73% +0.68%] index_select strided 7 : Elapsed 0.021 ms (2.055 ms / 100) 2.048 -> 2.051 ( +0.15%) [ +0.00% +0.24% +0.05% / +0.15% +1.17% +0.83%] index_select strided 8 : Elapsed 0.020 ms (2.048 ms / 100) 2.042 -> 2.045 ( +0.15%) [ +0.05% +0.20% +0.00% / +0.15% +1.27% +0.73%] index_select strided 16 : Elapsed 0.020 ms (2.043 ms / 100) 2.052 -> 2.055 ( +0.15%) [ +0.19% +0.00% +0.05% / +0.15% +0.49% +0.19%] index_select random : Elapsed 0.021 ms (2.056 ms / 100) 2.044 -> 2.042 ( -0.10%) [ +0.10% +0.10% +0.00% / -0.10% +0.54% +0.64%] index_select random_sorted : Elapsed 0.020 ms (2.046 ms / 100) 2.042 -> 2.045 ( +0.15%) [ +0.10% +0.05% +0.00% / +0.15% +0.69% +0.64%] index_select perm : Elapsed 0.020 ms (2.044 ms / 100) 2.058 -> 2.060 ( +0.10%) [ +0.00% +0.00% +0.19% / +0.10% +0.10% +0.39%] index_select perm_sorted : Elapsed 0.021 ms (2.058 ms / 100) B = [16, 40, 4, 5] (stride (1, 320, 80, 16)) A = [16, 40, 20, 5] (stride (5, 80, 3200, 1)) dim = 2 2.046 -> 2.047 ( +0.05%) [ +0.29% +0.24% +0.00% / +0.05% +0.83% +0.78%] index_select const : Elapsed 0.021 ms (2.052 ms / 100) 2.051 -> 2.055 ( +0.20%) [ +0.24% +0.34% +0.00% / +0.20% +0.93% +0.93%] index_select wrap : Elapsed 0.021 ms (2.056 ms / 100) 2.052 -> 2.060 ( +0.39%) [ +0.29% +0.29% +0.00% / +0.39% +1.12% +1.07%] index_select linear : Elapsed 0.021 ms (2.058 ms / 100) 2.064 -> 2.066 ( +0.10%) [ +0.05% +0.00% +0.15% / +0.10% +0.78% +0.73%] index_select reverse : Elapsed 0.021 ms (2.065 ms / 100) 2.047 -> 2.049 ( +0.10%) [ +0.20% +0.05% +0.00% / +0.10% +0.59% +0.78%] index_select skip64 : Elapsed 0.021 ms (2.051 ms / 100) 2.044 -> 2.048 ( +0.20%) [ +0.00% +0.24% +0.20% / +0.20% +0.83% +0.73%] index_select skip256 : Elapsed 0.020 ms (2.044 ms / 100) 2.053 -> 2.056 ( +0.15%) [ +0.05% +0.00% +0.19% / +0.15% +0.19% +0.29%] index_select spread : Elapsed 0.021 ms (2.054 ms / 100) 2.059 -> 2.059 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.29% +0.39%] index_select strided 3 : Elapsed 0.021 ms (2.059 ms / 100) 2.054 -> 2.057 ( +0.15%) [ +0.00% +0.34% +0.05% / +0.15% +0.29% +0.34%] index_select strided 5 : Elapsed 0.021 ms (2.054 ms / 100) 2.054 -> 2.056 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.73% +0.68%] index_select strided 7 : Elapsed 0.021 ms (2.055 ms / 100) 2.062 -> 2.059 ( -0.15%) [ +0.39% +0.00% +0.05% / -0.15% +0.73% +0.48%] index_select strided 8 : Elapsed 0.021 ms (2.070 ms / 100) 2.056 -> 2.059 ( +0.15%) [ +0.24% +0.19% +0.00% / +0.15% +0.88% +0.63%] index_select strided 16 : Elapsed 0.021 ms (2.061 ms / 100) 2.057 -> 2.057 ( +0.00%) [ +0.00% +0.05% +0.10% / +0.00% +0.29% +0.24%] index_select random : Elapsed 0.021 ms (2.057 ms / 100) 2.066 -> 2.065 ( -0.05%) [ +0.15% +0.00% +0.00% / -0.05% +0.48% +0.39%] index_select random_sorted : Elapsed 0.021 ms (2.069 ms / 100) 2.051 -> 2.051 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.49%] index_select perm : Elapsed 0.021 ms (2.051 ms / 100) 2.056 -> 2.056 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.63% +0.68%] index_select perm_sorted : Elapsed 0.021 ms (2.057 ms / 100) B = [16, 40, 4, 5] (stride (4, 320, 1, 64)) A = [16, 40, 20, 5] (stride (1, 80, 3200, 16)) dim = 2 2.097 -> 2.093 ( -0.19%) [ +0.00% +0.05% +0.00% / -0.19% +0.10% +0.24%] index_select const : Elapsed 0.021 ms (2.097 ms / 100) 2.090 -> 2.085 ( -0.24%) [ +0.10% +0.00% +0.14% / -0.05% -0.24% -0.24%] index_select wrap : Elapsed 0.021 ms (2.092 ms / 100) 2.090 -> 2.089 ( -0.05%) [ +0.14% +0.00% +0.00% / +0.14% +0.00% -0.05%] index_select linear : Elapsed 0.021 ms (2.093 ms / 100) 2.092 -> 2.090 ( -0.10%) [ +0.05% +0.00% +0.00% / -0.10% +0.10% +0.05%] index_select reverse : Elapsed 0.021 ms (2.093 ms / 100) 2.094 -> 2.100 ( +0.29%) [ +0.38% +0.29% +0.00% / +0.29% +0.33% +0.33%] index_select skip64 : Elapsed 0.021 ms (2.102 ms / 100) 2.096 -> 2.093 ( -0.14%) [ +0.19% +0.10% +0.00% / +0.05% -0.14% +0.05%] index_select skip256 : Elapsed 0.021 ms (2.100 ms / 100) 2.090 -> 2.092 ( +0.10%) [ +0.24% +0.10% +0.00% / +0.10% +0.33% +0.14%] index_select spread : Elapsed 0.021 ms (2.095 ms / 100) 2.089 -> 2.084 ( -0.24%) [ +0.05% +0.00% +0.05% / +0.00% -0.05% -0.24%] index_select strided 3 : Elapsed 0.021 ms (2.090 ms / 100) 2.085 -> 2.088 ( +0.14%) [ +0.24% +0.00% +0.00% / +0.14% +0.14% +0.14%] index_select strided 5 : Elapsed 0.021 ms (2.090 ms / 100) 2.094 -> 2.094 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.14% +0.14%] index_select strided 7 : Elapsed 0.021 ms (2.095 ms / 100) 2.093 -> 2.092 ( -0.05%) [ +0.00% +0.05% +0.10% / -0.05% +0.33% +0.62%] index_select strided 8 : Elapsed 0.021 ms (2.093 ms / 100) 2.085 -> 2.087 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.10% +0.48%] index_select strided 16 : Elapsed 0.021 ms (2.087 ms / 100) 2.091 -> 2.089 ( -0.10%) [ +0.10% +0.00% +0.05% / +0.19% -0.05% -0.10%] index_select random : Elapsed 0.021 ms (2.093 ms / 100) 2.087 -> 2.087 ( +0.00%) [ +0.19% +0.00% +0.14% / +0.19% +0.14% +0.00%] index_select random_sorted : Elapsed 0.021 ms (2.091 ms / 100) 2.090 -> 2.089 ( -0.05%) [ +0.05% +0.00% +0.00% / +0.10% -0.05% +0.14%] index_select perm : Elapsed 0.021 ms (2.091 ms / 100) 2.094 -> 2.093 ( -0.05%) [ +0.00% +0.10% +0.05% / +0.14% -0.05% +0.14%] index_select perm_sorted : Elapsed 0.021 ms (2.094 ms / 100) B = [16, 40, 4, 5] (stride (200, 1, 3200, 40)) A = [16, 40, 20, 5] (stride (4000, 5, 200, 1)) dim = 2 1.993 -> 1.995 ( +0.10%) [ +0.55% +0.00% +0.10% / +0.10% +0.80% +1.20%] index_select const : Elapsed 0.020 ms (2.004 ms / 100) 2.041 -> 2.040 ( -0.05%) [ +0.15% +0.29% +0.00% / +0.24% +0.10% -0.05%] index_select wrap : Elapsed 0.020 ms (2.044 ms / 100) 2.040 -> 2.044 ( +0.20%) [ +0.00% +0.34% +0.25% / +0.20% +0.20% +0.34%] index_select linear : Elapsed 0.020 ms (2.040 ms / 100) 2.045 -> 2.045 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.24% +0.10% +0.00%] index_select reverse : Elapsed 0.020 ms (2.045 ms / 100) 1.992 -> 1.995 ( +0.15%) [ +0.50% +0.00% +0.40% / +0.15% +1.05% +0.50%] index_select skip64 : Elapsed 0.020 ms (2.002 ms / 100) 1.989 -> 1.993 ( +0.20%) [ +0.30% +0.00% +0.20% / +0.20% +0.50% +0.45%] index_select skip256 : Elapsed 0.020 ms (1.995 ms / 100) 2.032 -> 2.045 ( +0.64%) [ +0.39% +0.20% +0.00% / +0.64% +0.84% +0.89%] index_select spread : Elapsed 0.020 ms (2.040 ms / 100) 2.044 -> 2.042 ( -0.10%) [ +0.20% +0.24% +0.00% / -0.10% +0.44% +0.64%] index_select strided 3 : Elapsed 0.020 ms (2.048 ms / 100) 2.043 -> 2.047 ( +0.20%) [ +0.00% +0.00% +0.05% / +0.24% +0.20% +0.34%] index_select strided 5 : Elapsed 0.020 ms (2.043 ms / 100) 2.051 -> 2.049 ( -0.10%) [ +0.00% +0.05% +0.10% / -0.10% +0.20% +0.15%] index_select strided 7 : Elapsed 0.021 ms (2.051 ms / 100) 2.042 -> 2.045 ( +0.15%) [ +0.34% +0.00% +0.10% / +0.15% +0.24% +0.34%] index_select strided 8 : Elapsed 0.020 ms (2.049 ms / 100) 2.040 -> 2.044 ( +0.20%) [ +0.00% +0.10% +0.10% / +0.20% +0.39% +0.39%] index_select strided 16 : Elapsed 0.020 ms (2.040 ms / 100) 2.040 -> 2.040 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.69%] index_select random : Elapsed 0.020 ms (2.040 ms / 100) 2.041 -> 2.042 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.44% +0.20%] index_select random_sorted : Elapsed 0.020 ms (2.041 ms / 100) 2.038 -> 2.042 ( +0.20%) [ +0.15% +0.00% +0.05% / +0.20% +0.64% +0.49%] index_select perm : Elapsed 0.020 ms (2.041 ms / 100) 2.039 -> 2.038 ( -0.05%) [ +0.00% +0.00% +0.20% / -0.05% +0.34% +0.49%] index_select perm_sorted : Elapsed 0.020 ms (2.039 ms / 100) out_shape = [16, 40, 20, 4] in_shape = [16, 40, 20, 5] idx_dim = 3 B = [16, 40, 20, 4] (stride (3200, 4, 160, 1)) A = [16, 40, 20, 5] (stride (1, 16, 640, 12800)) dim = 3 5.784 -> 5.793 ( +0.16%) [ +0.02% +0.00% +0.09% / +0.16% +0.21% +0.19%] index_select const : Elapsed 0.058 ms (5.785 ms / 100) 5.864 -> 5.861 ( -0.05%) [ +0.00% +0.20% +0.10% / +0.15% -0.05% -0.02%] index_select wrap : Elapsed 0.059 ms (5.864 ms / 100) 5.865 -> 5.859 ( -0.10%) [ +0.02% +0.00% +0.03% / +0.09% -0.07% -0.10%] index_select linear : Elapsed 0.059 ms (5.866 ms / 100) 5.868 -> 5.857 ( -0.19%) [ +0.02% +0.00% +0.07% / +0.10% -0.15% -0.19%] index_select reverse : Elapsed 0.059 ms (5.869 ms / 100) 5.785 -> 5.780 ( -0.09%) [ +0.00% +0.02% +0.12% / -0.09% +0.12% +0.14%] index_select skip64 : Elapsed 0.058 ms (5.785 ms / 100) 5.785 -> 5.789 ( +0.07%) [ +0.03% +0.00% +0.09% / +0.07% +0.10% +0.14%] index_select skip256 : Elapsed 0.058 ms (5.787 ms / 100) 5.868 -> 5.857 ( -0.19%) [ +0.05% +0.00% +0.00% / +0.12% -0.12% -0.19%] index_select spread : Elapsed 0.059 ms (5.871 ms / 100) 5.856 -> 5.850 ( -0.10%) [ +0.00% +0.02% +0.19% / +0.17% -0.05% -0.10%] index_select strided 3 : Elapsed 0.059 ms (5.856 ms / 100) 5.815 -> 5.802 ( -0.22%) [ +0.00% +0.03% +0.07% / +0.10% -0.22% -0.17%] index_select random : Elapsed 0.058 ms (5.815 ms / 100) 5.812 -> 5.804 ( -0.14%) [ +0.07% +0.00% +0.07% / +0.21% -0.10% -0.14%] index_select random_sorted : Elapsed 0.058 ms (5.816 ms / 100) 5.870 -> 5.861 ( -0.15%) [ +0.00% +0.12% +0.07% / +0.09% -0.15% -0.12%] index_select perm : Elapsed 0.059 ms (5.870 ms / 100) 5.867 -> 5.857 ( -0.17%) [ +0.00% +0.05% +0.12% / +0.09% -0.17% -0.14%] index_select perm_sorted : Elapsed 0.059 ms (5.867 ms / 100) B = [16, 40, 20, 4] (stride (3200, 20, 1, 800)) A = [16, 40, 20, 5] (stride (4000, 1, 200, 40)) dim = 3 5.469 -> 5.479 ( +0.18%) [ +0.00% +0.07% +0.20% / +0.18% +0.33% +0.37%] index_select const : Elapsed 0.055 ms (5.469 ms / 100) 5.523 -> 5.529 ( +0.11%) [ +0.00% +0.00% +0.13% / +0.11% +0.45% +0.40%] index_select wrap : Elapsed 0.055 ms (5.523 ms / 100) 5.517 -> 5.532 ( +0.27%) [ +0.00% +0.02% +0.09% / +0.27% +0.58% +0.51%] index_select linear : Elapsed 0.055 ms (5.517 ms / 100) 5.517 -> 5.527 ( +0.18%) [ +0.05% +0.00% +0.16% / +0.18% +0.49% +0.56%] index_select reverse : Elapsed 0.055 ms (5.520 ms / 100) 5.468 -> 5.477 ( +0.16%) [ +0.13% +0.00% +0.26% / +0.16% +0.49% +0.42%] index_select skip64 : Elapsed 0.055 ms (5.475 ms / 100) 5.474 -> 5.478 ( +0.07%) [ +0.00% +0.11% +0.13% / +0.07% +0.31% +0.26%] index_select skip256 : Elapsed 0.055 ms (5.474 ms / 100) 5.518 -> 5.530 ( +0.22%) [ +0.13% +0.00% +0.13% / +0.22% +0.58% +0.67%] index_select spread : Elapsed 0.055 ms (5.525 ms / 100) 5.534 -> 5.542 ( +0.14%) [ +0.00% +0.07% +0.13% / +0.14% +0.16% +0.20%] index_select strided 3 : Elapsed 0.055 ms (5.534 ms / 100) 5.514 -> 5.527 ( +0.24%) [ +0.00% +0.09% +0.24% / +0.25% +0.25% +0.24%] index_select random : Elapsed 0.055 ms (5.514 ms / 100) 5.511 -> 5.520 ( +0.16%) [ +0.00% +0.02% +0.15% / +0.16% +0.31% +0.40%] index_select random_sorted : Elapsed 0.055 ms (5.511 ms / 100) 5.538 -> 5.536 ( -0.04%) [ +0.04% +0.00% +0.11% / +0.00% -0.04% +0.31%] index_select perm : Elapsed 0.055 ms (5.540 ms / 100) 5.521 -> 5.530 ( +0.16%) [ +0.00% +0.13% +0.22% / +0.16% +0.29% +0.40%] index_select perm_sorted : Elapsed 0.055 ms (5.521 ms / 100) B = [16, 40, 20, 4] (stride (3200, 1, 40, 800)) A = [16, 40, 20, 5] (stride (40, 1, 3200, 640)) dim = 3 5.503 -> 5.481 ( -0.40%) [ +0.00% +0.05% +0.07% / +0.02% -0.38% -0.40%] index_select const : Elapsed 0.055 ms (5.503 ms / 100) 5.540 -> 5.542 ( +0.04%) [ +0.13% +0.00% +0.02% / +0.11% +0.04% +0.13%] index_select wrap : Elapsed 0.055 ms (5.547 ms / 100) 5.540 -> 5.536 ( -0.07%) [ +0.00% +0.00% +0.05% / -0.07% +0.04% +0.14%] index_select linear : Elapsed 0.055 ms (5.540 ms / 100) 5.553 -> 5.559 ( +0.11%) [ +0.04% +0.00% +0.05% / +0.11% +0.11% +0.13%] index_select reverse : Elapsed 0.056 ms (5.555 ms / 100) 5.502 -> 5.481 ( -0.38%) [ +0.00% +0.07% +0.18% / +0.24% -0.38% -0.29%] index_select skip64 : Elapsed 0.055 ms (5.502 ms / 100) 5.506 -> 5.476 ( -0.54%) [ +0.00% +0.11% +0.16% / +0.00% -0.33% -0.54%] index_select skip256 : Elapsed 0.055 ms (5.506 ms / 100) 5.538 -> 5.542 ( +0.07%) [ +0.05% +0.00% +0.00% / +0.07% +0.13% +0.25%] index_select spread : Elapsed 0.055 ms (5.541 ms / 100) 5.552 -> 5.548 ( -0.07%) [ +0.00% +0.25% +0.23% / +0.22% -0.07% -0.04%] index_select strided 3 : Elapsed 0.056 ms (5.552 ms / 100) 5.513 -> 5.522 ( +0.16%) [ +0.00% +0.07% +0.07% / +0.16% +0.49% +0.58%] index_select random : Elapsed 0.055 ms (5.513 ms / 100) 5.508 -> 5.522 ( +0.25%) [ +0.15% +0.00% +0.16% / +0.25% +0.53% +0.67%] index_select random_sorted : Elapsed 0.055 ms (5.516 ms / 100) 5.534 -> 5.546 ( +0.22%) [ +0.07% +0.00% +0.16% / +0.22% +0.27% +0.52%] index_select perm : Elapsed 0.055 ms (5.538 ms / 100) 5.546 -> 5.552 ( +0.11%) [ +0.09% +0.00% +0.14% / +0.11% +0.32% +0.11%] index_select perm_sorted : Elapsed 0.056 ms (5.551 ms / 100) B = [16, 40, 20, 4] (stride (80, 1280, 4, 1)) A = [16, 40, 20, 5] (stride (1, 1600, 80, 16)) dim = 3 5.601 -> 5.590 ( -0.20%) [ +0.00% +0.00% +0.07% / +0.16% -0.20% -0.05%] index_select const : Elapsed 0.056 ms (5.601 ms / 100) 5.663 -> 5.657 ( -0.11%) [ +0.09% +0.00% +0.12% / +0.14% -0.09% -0.11%] index_select wrap : Elapsed 0.057 ms (5.668 ms / 100) 5.663 -> 5.657 ( -0.11%) [ +0.00% +0.16% +0.11% / +0.05% -0.11% -0.11%] index_select linear : Elapsed 0.057 ms (5.663 ms / 100) 5.663 -> 5.660 ( -0.05%) [ +0.00% +0.05% +0.21% / +0.16% -0.04% -0.05%] index_select reverse : Elapsed 0.057 ms (5.663 ms / 100) 5.600 -> 5.589 ( -0.20%) [ +0.00% +0.02% +0.04% / -0.04% -0.20% -0.14%] index_select skip64 : Elapsed 0.056 ms (5.600 ms / 100) 5.595 -> 5.595 ( +0.00%) [ +0.05% +0.00% +0.13% / +0.16% +0.00% +0.02%] index_select skip256 : Elapsed 0.056 ms (5.598 ms / 100) 5.666 -> 5.656 ( -0.18%) [ +0.04% +0.00% +0.11% / +0.12% -0.12% -0.18%] index_select spread : Elapsed 0.057 ms (5.668 ms / 100) 5.682 -> 5.672 ( -0.18%) [ +0.00% +0.05% +0.12% / +0.18% -0.05% -0.18%] index_select strided 3 : Elapsed 0.057 ms (5.682 ms / 100) 5.686 -> 5.668 ( -0.32%) [ +0.04% +0.02% +0.00% / -0.07% -0.32% -0.25%] index_select random : Elapsed 0.057 ms (5.688 ms / 100) 5.661 -> 5.649 ( -0.21%) [ +0.05% +0.00% +0.19% / +0.23% -0.18% -0.21%] index_select random_sorted : Elapsed 0.057 ms (5.664 ms / 100) 5.676 -> 5.658 ( -0.32%) [ +0.09% +0.00% +0.33% / +0.23% -0.28% -0.32%] index_select perm : Elapsed 0.057 ms (5.681 ms / 100) 5.674 -> 5.656 ( -0.32%) [ +0.04% +0.00% +0.07% / +0.00% -0.32% -0.30%] index_select perm_sorted : Elapsed 0.057 ms (5.676 ms / 100) B = [16, 40, 20, 4] (stride (4, 64, 2560, 1)) A = [16, 40, 20, 5] (stride (100, 1600, 5, 1)) dim = 3 6.017 -> 6.023 ( +0.10%) [ +0.13% +0.00% +0.08% / +0.17% +0.15% +0.10%] index_select const : Elapsed 0.060 ms (6.025 ms / 100) 6.020 -> 6.021 ( +0.02%) [ +0.07% +0.00% +0.05% / +0.02% +0.10% +0.10%] index_select wrap : Elapsed 0.060 ms (6.024 ms / 100) 6.017 -> 6.016 ( -0.02%) [ +0.00% +0.07% +0.07% / -0.02% +0.18% +0.28%] index_select linear : Elapsed 0.060 ms (6.017 ms / 100) 6.016 -> 6.021 ( +0.08%) [ +0.02% +0.00% +0.05% / +0.10% +0.15% +0.08%] index_select reverse : Elapsed 0.060 ms (6.017 ms / 100) 6.014 -> 6.020 ( +0.10%) [ +0.10% +0.00% +0.22% / +0.10% +0.27% +0.18%] index_select skip64 : Elapsed 0.060 ms (6.020 ms / 100) 6.014 -> 6.026 ( +0.20%) [ +0.00% +0.08% +0.12% / +0.20% +0.25% +0.30%] index_select skip256 : Elapsed 0.060 ms (6.014 ms / 100) 6.016 -> 6.018 ( +0.03%) [ +0.00% +0.02% +0.07% / +0.03% +0.27% +0.23%] index_select spread : Elapsed 0.060 ms (6.016 ms / 100) 6.013 -> 6.020 ( +0.12%) [ +0.12% +0.00% +0.03% / +0.12% +0.25% +0.30%] index_select strided 3 : Elapsed 0.060 ms (6.020 ms / 100) 6.015 -> 6.022 ( +0.12%) [ +0.08% +0.00% +0.10% / +0.12% +0.18% +0.23%] index_select random : Elapsed 0.060 ms (6.020 ms / 100) 6.015 -> 6.022 ( +0.12%) [ +0.17% +0.10% +0.00% / +0.12% +0.18% +0.15%] index_select random_sorted : Elapsed 0.060 ms (6.025 ms / 100) 6.018 -> 6.019 ( +0.02%) [ +0.00% +0.05% +0.08% / +0.07% +0.03% +0.02%] index_select perm : Elapsed 0.060 ms (6.018 ms / 100) 6.013 -> 6.024 ( +0.18%) [ +0.03% +0.00% +0.12% / +0.20% +0.28% +0.18%] index_select perm_sorted : Elapsed 0.060 ms (6.015 ms / 100) B = [16, 40, 20, 4] (stride (1, 320, 16, 12800)) A = [16, 40, 20, 5] (stride (800, 20, 1, 12800)) dim = 3 4.974 -> 4.973 ( -0.02%) [ +0.08% +0.00% +0.06% / -0.02% +0.12% +0.14%] index_select const : Elapsed 0.050 ms (4.978 ms / 100) 5.055 -> 5.051 ( -0.08%) [ +0.00% +0.02% +0.06% / -0.08% +0.08% +0.10%] index_select wrap : Elapsed 0.051 ms (5.055 ms / 100) 5.048 -> 5.054 ( +0.12%) [ +0.18% +0.00% +0.12% / +0.20% +0.12% +0.26%] index_select linear : Elapsed 0.051 ms (5.057 ms / 100) 5.045 -> 5.054 ( +0.18%) [ +0.04% +0.20% +0.00% / +0.18% +0.18% +0.20%] index_select reverse : Elapsed 0.050 ms (5.047 ms / 100) 4.970 -> 4.971 ( +0.02%) [ +0.08% +0.00% +0.04% / +0.22% +0.02% +0.04%] index_select skip64 : Elapsed 0.050 ms (4.974 ms / 100) 4.972 -> 4.976 ( +0.08%) [ +0.00% +0.06% +0.08% / +0.08% +0.16% +0.08%] index_select skip256 : Elapsed 0.050 ms (4.972 ms / 100) 5.053 -> 5.057 ( +0.08%) [ +0.04% +0.00% +0.06% / +0.08% +0.24% +0.22%] index_select spread : Elapsed 0.051 ms (5.055 ms / 100) 5.049 -> 5.052 ( +0.06%) [ +0.08% +0.00% +0.10% / +0.06% +0.06% +0.10%] index_select strided 3 : Elapsed 0.051 ms (5.053 ms / 100) 5.020 -> 5.029 ( +0.18%) [ +0.24% +0.00% +0.30% / +0.18% +0.24% +0.18%] index_select random : Elapsed 0.050 ms (5.032 ms / 100) 5.026 -> 5.027 ( +0.02%) [ +0.32% +0.18% +0.00% / +0.02% +0.06% +0.08%] index_select random_sorted : Elapsed 0.050 ms (5.042 ms / 100) 5.048 -> 5.047 ( -0.02%) [ +0.24% +0.02% +0.00% / -0.02% +0.24% +0.06%] index_select perm : Elapsed 0.051 ms (5.060 ms / 100) 5.049 -> 5.052 ( +0.06%) [ +0.08% +0.00% +0.10% / +0.06% +0.22% +0.08%] index_select perm_sorted : Elapsed 0.051 ms (5.053 ms / 100) B = [16, 40, 20, 4] (stride (40, 1, 640, 12800)) A = [16, 40, 20, 5] (stride (100, 1600, 5, 1)) dim = 3 5.666 -> 5.649 ( -0.30%) [ +0.09% +0.00% +0.19% / +0.07% -0.30% -0.28%] index_select const : Elapsed 0.057 ms (5.671 ms / 100) 5.669 -> 5.651 ( -0.32%) [ +0.00% +0.04% +0.18% / +0.05% -0.32% -0.26%] index_select wrap : Elapsed 0.057 ms (5.669 ms / 100) 5.665 -> 5.643 ( -0.39%) [ +0.00% +0.07% +0.07% / +0.02% -0.25% -0.39%] index_select linear : Elapsed 0.057 ms (5.665 ms / 100) 5.664 -> 5.651 ( -0.23%) [ +0.00% +0.12% +0.07% / +0.12% -0.23% -0.21%] index_select reverse : Elapsed 0.057 ms (5.664 ms / 100) 5.665 -> 5.649 ( -0.28%) [ +0.02% +0.00% +0.07% / +0.12% -0.28% -0.26%] index_select skip64 : Elapsed 0.057 ms (5.666 ms / 100) 5.664 -> 5.648 ( -0.28%) [ +0.00% +0.02% +0.09% / +0.09% -0.28% -0.25%] index_select skip256 : Elapsed 0.057 ms (5.664 ms / 100) 5.670 -> 5.656 ( -0.25%) [ +0.02% +0.00% +0.00% / +0.07% -0.25% -0.25%] index_select spread : Elapsed 0.057 ms (5.671 ms / 100) 5.659 -> 5.644 ( -0.27%) [ +0.00% +0.07% +0.14% / +0.25% -0.25% -0.27%] index_select strided 3 : Elapsed 0.057 ms (5.659 ms / 100) 5.663 -> 5.648 ( -0.26%) [ +0.11% +0.00% +0.23% / +0.18% -0.26% -0.23%] index_select random : Elapsed 0.057 ms (5.669 ms / 100) 5.669 -> 5.646 ( -0.41%) [ +0.00% +0.02% +0.05% / +0.09% -0.41% -0.41%] index_select random_sorted : Elapsed 0.057 ms (5.669 ms / 100) 5.668 -> 5.652 ( -0.28%) [ +0.07% +0.00% +0.09% / +0.11% -0.21% -0.28%] index_select perm : Elapsed 0.057 ms (5.672 ms / 100) 5.665 -> 5.645 ( -0.35%) [ +0.00% +0.02% +0.02% / +0.02% -0.35% -0.25%] index_select perm_sorted : Elapsed 0.057 ms (5.665 ms / 100) out_shape = [4, 5, 16, 40] in_shape = [20, 5, 16, 40] idx_dim = 0 B = [4, 5, 16, 40] (stride (3200, 640, 1, 16)) A = [20, 5, 16, 40] (stride (640, 12800, 40, 1)) dim = 0 1.900 -> 1.904 ( +0.21%) [ +0.11% +0.00% +0.21% / +0.21% +0.79% +0.84%] index_select const : Elapsed 0.019 ms (1.902 ms / 100) 1.907 -> 1.909 ( +0.10%) [ +0.16% +0.10% +0.00% / +0.10% +3.62% +3.62%] index_select wrap : Elapsed 0.019 ms (1.910 ms / 100) 1.905 -> 1.913 ( +0.42%) [ +0.26% +0.00% +0.21% / +0.42% +3.62% +3.67%] index_select linear : Elapsed 0.019 ms (1.910 ms / 100) 1.911 -> 1.913 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +3.24% +3.45%] index_select reverse : Elapsed 0.019 ms (1.911 ms / 100) 1.901 -> 1.905 ( +0.21%) [ +0.16% +0.11% +0.00% / +0.21% +0.89% +0.89%] index_select skip64 : Elapsed 0.019 ms (1.904 ms / 100) 1.900 -> 1.903 ( +0.16%) [ +0.21% +0.00% +0.21% / +0.16% +0.79% +0.84%] index_select skip256 : Elapsed 0.019 ms (1.904 ms / 100) 1.945 -> 1.941 ( -0.21%) [ +0.00% +0.00% +0.21% / +0.05% -0.05% -0.21%] index_select spread : Elapsed 0.019 ms (1.945 ms / 100) 1.931 -> 1.936 ( +0.26%) [ +0.00% +0.36% +0.21% / +0.26% +0.98% +0.73%] index_select strided 3 : Elapsed 0.019 ms (1.931 ms / 100) 1.944 -> 1.940 ( -0.21%) [ +0.10% +0.00% +0.00% / +0.10% -0.10% -0.21%] index_select strided 5 : Elapsed 0.019 ms (1.946 ms / 100) 1.931 -> 1.934 ( +0.16%) [ +0.00% +0.05% +0.16% / +0.16% +1.24% +1.09%] index_select strided 7 : Elapsed 0.019 ms (1.931 ms / 100) 1.927 -> 1.931 ( +0.21%) [ +0.00% +0.05% +0.00% / +0.21% +1.09% +0.99%] index_select strided 8 : Elapsed 0.019 ms (1.927 ms / 100) 1.940 -> 1.941 ( +0.05%) [ +0.21% +0.00% +0.26% / +0.05% +0.52% +0.31%] index_select strided 16 : Elapsed 0.019 ms (1.944 ms / 100) 1.924 -> 1.924 ( +0.00%) [ +0.00% +0.10% +0.16% / +0.00% +1.98% +1.77%] index_select random : Elapsed 0.019 ms (1.924 ms / 100) 1.925 -> 1.932 ( +0.36%) [ +0.00% +0.00% +0.10% / +0.36% +1.71% +1.97%] index_select random_sorted : Elapsed 0.019 ms (1.925 ms / 100) 1.942 -> 1.941 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.00% +0.05%] index_select perm : Elapsed 0.019 ms (1.942 ms / 100) 1.952 -> 1.930 ( -1.13%) [ +0.20% +0.00% +0.15% / -0.10% -0.92% -1.13%] index_select perm_sorted : Elapsed 0.020 ms (1.956 ms / 100) B = [4, 5, 16, 40] (stride (40, 160, 800, 1)) A = [20, 5, 16, 40] (stride (200, 1, 4000, 5)) dim = 0 2.020 -> 2.027 ( +0.35%) [ +0.00% +0.15% +0.20% / +0.35% +0.84% +0.69%] index_select const : Elapsed 0.020 ms (2.020 ms / 100) 2.021 -> 2.024 ( +0.15%) [ +0.00% +0.10% +0.05% / +0.15% +0.30% +0.25%] index_select wrap : Elapsed 0.020 ms (2.021 ms / 100) 2.017 -> 2.021 ( +0.20%) [ +0.30% +0.00% +0.10% / +0.20% +0.55% +0.40%] index_select linear : Elapsed 0.020 ms (2.023 ms / 100) 2.016 -> 2.020 ( +0.20%) [ +0.20% +0.00% +0.30% / +0.20% +0.40% +0.55%] index_select reverse : Elapsed 0.020 ms (2.020 ms / 100) 2.024 -> 2.023 ( -0.05%) [ +0.00% +0.15% +0.00% / -0.05% +0.54% +0.44%] index_select skip64 : Elapsed 0.020 ms (2.024 ms / 100) 2.020 -> 2.020 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.35% +0.30%] index_select skip256 : Elapsed 0.020 ms (2.022 ms / 100) 2.014 -> 2.017 ( +0.15%) [ +0.10% +0.10% +0.00% / +0.15% +0.40% +0.55%] index_select spread : Elapsed 0.020 ms (2.016 ms / 100) 2.022 -> 2.022 ( +0.00%) [ +0.15% +0.00% +0.05% / +0.00% +0.40% +0.15%] index_select strided 3 : Elapsed 0.020 ms (2.025 ms / 100) 2.019 -> 2.024 ( +0.25%) [ +0.15% +0.00% +0.15% / +0.25% +0.45% +0.45%] index_select strided 5 : Elapsed 0.020 ms (2.022 ms / 100) 2.019 -> 2.017 ( -0.10%) [ +0.10% +0.30% +0.00% / -0.10% +0.45% +0.40%] index_select strided 7 : Elapsed 0.020 ms (2.021 ms / 100) 2.018 -> 2.022 ( +0.20%) [ +0.20% +0.00% +0.00% / +0.20% +0.64% +0.69%] index_select strided 8 : Elapsed 0.020 ms (2.022 ms / 100) 2.021 -> 2.023 ( +0.10%) [ +0.10% +0.25% +0.00% / +0.10% +0.59% +1.04%] index_select strided 16 : Elapsed 0.020 ms (2.023 ms / 100) 2.014 -> 2.014 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.45% +0.70%] index_select random : Elapsed 0.020 ms (2.014 ms / 100) 2.018 -> 2.018 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.35% +0.30%] index_select random_sorted : Elapsed 0.020 ms (2.020 ms / 100) 2.017 -> 2.018 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.05% +0.59% +0.45%] index_select perm : Elapsed 0.020 ms (2.018 ms / 100) 2.011 -> 2.013 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.55% +0.45%] index_select perm_sorted : Elapsed 0.020 ms (2.013 ms / 100) B = [4, 5, 16, 40] (stride (5, 1, 800, 20)) A = [20, 5, 16, 40] (stride (16, 320, 1, 1600)) dim = 0 2.074 -> 2.075 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.05% +0.10% +0.39%] index_select const : Elapsed 0.021 ms (2.076 ms / 100) 2.089 -> 2.089 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.05% +0.00%] index_select wrap : Elapsed 0.021 ms (2.089 ms / 100) 2.086 -> 2.089 ( +0.14%) [ +0.00% +0.24% +0.19% / +0.14% +0.29% +0.19%] index_select linear : Elapsed 0.021 ms (2.086 ms / 100) 2.088 -> 2.085 ( -0.14%) [ +0.10% +0.00% +0.05% / +0.05% -0.14% +0.05%] index_select reverse : Elapsed 0.021 ms (2.090 ms / 100) 2.074 -> 2.073 ( -0.05%) [ +0.19% +0.19% +0.00% / +0.05% -0.05% +0.24%] index_select skip64 : Elapsed 0.021 ms (2.078 ms / 100) 2.073 -> 2.075 ( +0.10%) [ +0.19% +0.00% +0.14% / +0.24% +0.10% +0.34%] index_select skip256 : Elapsed 0.021 ms (2.077 ms / 100) 2.080 -> 2.084 ( +0.19%) [ +0.24% +0.00% +0.19% / +0.24% +0.19% +0.29%] index_select spread : Elapsed 0.021 ms (2.085 ms / 100) 2.081 -> 2.087 ( +0.29%) [ +0.19% +0.14% +0.00% / +0.29% +0.34% +0.48%] index_select strided 3 : Elapsed 0.021 ms (2.085 ms / 100) 2.084 -> 2.081 ( -0.14%) [ +0.19% +0.00% +0.14% / +0.00% +0.19% -0.14%] index_select strided 5 : Elapsed 0.021 ms (2.088 ms / 100) 2.067 -> 2.068 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.19% +0.15%] index_select strided 7 : Elapsed 0.021 ms (2.069 ms / 100) 2.079 -> 2.078 ( -0.05%) [ +0.00% +0.00% +0.00% / +0.10% +0.34% -0.05%] index_select strided 8 : Elapsed 0.021 ms (2.079 ms / 100) 2.079 -> 2.079 ( +0.00%) [ +0.05% +0.00% +0.14% / +0.29% +0.14% +0.00%] index_select strided 16 : Elapsed 0.021 ms (2.080 ms / 100) 2.070 -> 2.068 ( -0.10%) [ +0.00% +0.19% +0.00% / +0.10% -0.10% -0.10%] index_select random : Elapsed 0.021 ms (2.070 ms / 100) 2.083 -> 2.084 ( +0.05%) [ +0.00% +0.10% +0.00% / +0.14% +0.10% +0.05%] index_select random_sorted : Elapsed 0.021 ms (2.083 ms / 100) 2.076 -> 2.078 ( +0.10%) [ +0.14% +0.14% +0.00% / +0.10% +0.10% +0.19%] index_select perm : Elapsed 0.021 ms (2.079 ms / 100) 2.074 -> 2.078 ( +0.19%) [ +0.00% +0.05% +0.10% / +0.19% +0.24% +0.29%] index_select perm_sorted : Elapsed 0.021 ms (2.074 ms / 100) B = [4, 5, 16, 40] (stride (80, 16, 1, 320)) A = [20, 5, 16, 40] (stride (1, 20, 4000, 100)) dim = 0 0.823 -> 0.820 ( -0.36%) [ +0.61% +0.49% +0.00% / -0.36% +3.04% +3.16%] index_select const : Elapsed 0.008 ms (0.828 ms / 100) 0.829 -> 0.831 ( +0.24%) [ +0.00% +0.97% +0.36% / +0.24% +1.69% +1.69%] index_select wrap : Elapsed 0.008 ms (0.829 ms / 100) 0.830 -> 0.834 ( +0.48%) [ +0.12% +0.00% +0.24% / +0.48% +1.33% +1.20%] index_select linear : Elapsed 0.008 ms (0.831 ms / 100) 0.830 -> 0.836 ( +0.72%) [ +0.12% +0.00% +0.12% / +0.72% +1.81% +1.20%] index_select reverse : Elapsed 0.008 ms (0.831 ms / 100) 0.830 -> 0.836 ( +0.72%) [ +0.36% +0.00% +0.24% / +0.72% +1.45% +1.57%] index_select skip64 : Elapsed 0.008 ms (0.833 ms / 100) 0.819 -> 0.827 ( +0.98%) [ +1.22% +0.73% +0.00% / +0.98% +3.42% +4.03%] index_select skip256 : Elapsed 0.008 ms (0.829 ms / 100) 0.841 -> 0.844 ( +0.36%) [ +0.83% +0.12% +0.00% / +0.36% +3.92% +3.45%] index_select spread : Elapsed 0.008 ms (0.848 ms / 100) 0.835 -> 0.836 ( +0.12%) [ +0.12% +0.00% +0.36% / +0.12% +3.23% +2.99%] index_select strided 3 : Elapsed 0.008 ms (0.836 ms / 100) 0.843 -> 0.844 ( +0.12%) [ +0.12% +0.24% +0.00% / +0.12% +2.73% +3.08%] index_select strided 5 : Elapsed 0.008 ms (0.844 ms / 100) 0.835 -> 0.837 ( +0.24%) [ +0.24% +0.36% +0.00% / +0.24% +2.99% +2.75%] index_select strided 7 : Elapsed 0.008 ms (0.837 ms / 100) 0.831 -> 0.837 ( +0.72%) [ +0.36% +0.24% +0.00% / +0.72% +3.49% +3.01%] index_select strided 8 : Elapsed 0.008 ms (0.834 ms / 100) 0.838 -> 0.841 ( +0.36%) [ +0.00% +0.72% +0.48% / +0.36% +3.22% +3.22%] index_select strided 16 : Elapsed 0.008 ms (0.838 ms / 100) 0.839 -> 0.837 ( -0.24%) [ +0.48% +0.00% +0.60% / -0.24% +3.22% +2.98%] index_select random : Elapsed 0.008 ms (0.843 ms / 100) 0.840 -> 0.840 ( +0.00%) [ +0.12% +0.00% +0.36% / +0.00% +3.21% +3.21%] index_select random_sorted : Elapsed 0.008 ms (0.841 ms / 100) 0.835 -> 0.830 ( -0.60%) [ +0.00% +0.12% +0.36% / -0.60% +2.75% +2.51%] index_select perm : Elapsed 0.008 ms (0.835 ms / 100) 0.835 -> 0.838 ( +0.36%) [ +0.00% +0.60% +0.60% / +0.36% +2.63% +2.63%] index_select perm_sorted : Elapsed 0.008 ms (0.835 ms / 100) out_shape = [20, 4, 16, 40] in_shape = [20, 5, 16, 40] idx_dim = 1 B = [20, 4, 16, 40] (stride (2560, 640, 1, 16)) A = [20, 5, 16, 40] (stride (1, 20, 4000, 100)) dim = 1 5.552 -> 5.539 ( -0.23%) [ +0.00% +0.05% +0.00% / +0.00% -0.23% -0.23%] index_select const : Elapsed 0.056 ms (5.552 ms / 100) 5.598 -> 5.584 ( -0.25%) [ +0.00% +0.00% +0.13% / +0.11% -0.25% -0.20%] index_select wrap : Elapsed 0.056 ms (5.598 ms / 100) 5.600 -> 5.577 ( -0.41%) [ +0.00% +0.04% +0.18% / -0.05% -0.41% -0.18%] index_select linear : Elapsed 0.056 ms (5.600 ms / 100) 5.591 -> 5.584 ( -0.13%) [ +0.05% +0.00% +0.23% / +0.14% -0.13% +0.07%] index_select reverse : Elapsed 0.056 ms (5.594 ms / 100) 5.550 -> 5.538 ( -0.22%) [ +0.13% +0.00% +0.11% / +0.04% -0.16% -0.22%] index_select skip64 : Elapsed 0.056 ms (5.557 ms / 100) 5.549 -> 5.536 ( -0.23%) [ +0.18% +0.00% +0.11% / +0.02% -0.23% -0.23%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.593 -> 5.584 ( -0.16%) [ +0.00% +0.07% +0.23% / +0.21% -0.13% -0.16%] index_select spread : Elapsed 0.056 ms (5.593 ms / 100) 5.617 -> 5.598 ( -0.34%) [ +0.20% +0.00% +0.20% / +0.11% -0.34% -0.28%] index_select strided 3 : Elapsed 0.056 ms (5.628 ms / 100) 5.582 -> 5.576 ( -0.11%) [ +0.00% +0.16% +0.14% / +0.04% -0.11% +0.11%] index_select random : Elapsed 0.056 ms (5.582 ms / 100) 5.582 -> 5.583 ( +0.02%) [ +0.00% +0.07% +0.14% / +0.13% +0.02% +0.02%] index_select random_sorted : Elapsed 0.056 ms (5.582 ms / 100) 5.609 -> 5.596 ( -0.23%) [ +0.02% +0.07% +0.00% / -0.02% -0.09% -0.23%] index_select perm : Elapsed 0.056 ms (5.610 ms / 100) 5.597 -> 5.596 ( -0.02%) [ +0.00% +0.05% +0.11% / +0.30% -0.02% +0.09%] index_select perm_sorted : Elapsed 0.056 ms (5.597 ms / 100) B = [20, 4, 16, 40] (stride (16, 12800, 1, 320)) A = [20, 5, 16, 40] (stride (5, 1, 4000, 100)) dim = 1 5.736 -> 5.730 ( -0.10%) [ +0.02% +0.00% +0.00% / +0.10% +0.05% -0.10%] index_select const : Elapsed 0.057 ms (5.737 ms / 100) 5.729 -> 5.730 ( +0.02%) [ +0.03% +0.00% +0.10% / +0.02% +0.12% +0.19%] index_select wrap : Elapsed 0.057 ms (5.731 ms / 100) 5.729 -> 5.731 ( +0.03%) [ +0.00% +0.07% +0.02% / +0.12% +0.03% +0.07%] index_select linear : Elapsed 0.057 ms (5.729 ms / 100) 5.732 -> 5.730 ( -0.03%) [ +0.03% +0.00% +0.00% / +0.07% -0.03% -0.02%] index_select reverse : Elapsed 0.057 ms (5.734 ms / 100) 5.730 -> 5.732 ( +0.03%) [ +0.07% +0.00% +0.10% / +0.05% +0.12% +0.03%] index_select skip64 : Elapsed 0.057 ms (5.734 ms / 100) 5.733 -> 5.737 ( +0.07%) [ +0.00% +0.05% +0.03% / +0.09% +0.10% +0.07%] index_select skip256 : Elapsed 0.057 ms (5.733 ms / 100) 5.729 -> 5.733 ( +0.07%) [ +0.07% +0.00% +0.16% / +0.07% +0.07% +0.07%] index_select spread : Elapsed 0.057 ms (5.733 ms / 100) 5.727 -> 5.731 ( +0.07%) [ +0.02% +0.09% +0.00% / +0.12% +0.12% +0.07%] index_select strided 3 : Elapsed 0.057 ms (5.728 ms / 100) 5.728 -> 5.726 ( -0.03%) [ +0.00% +0.02% +0.05% / +0.10% -0.02% -0.03%] index_select random : Elapsed 0.057 ms (5.728 ms / 100) 5.731 -> 5.730 ( -0.02%) [ +0.00% +0.00% +0.02% / +0.09% +0.02% -0.02%] index_select random_sorted : Elapsed 0.057 ms (5.731 ms / 100) 5.731 -> 5.731 ( +0.00%) [ +0.00% +0.03% +0.12% / +0.03% +0.00% +0.21%] index_select perm : Elapsed 0.057 ms (5.731 ms / 100) 5.733 -> 5.732 ( -0.02%) [ +0.00% +0.16% +0.07% / -0.02% +0.12% +0.00%] index_select perm_sorted : Elapsed 0.057 ms (5.733 ms / 100) B = [20, 4, 16, 40] (stride (160, 40, 3200, 1)) A = [20, 5, 16, 40] (stride (40, 800, 4000, 1)) dim = 1 5.694 -> 5.696 ( +0.04%) [ +0.05% +0.00% +0.05% / +0.04% +0.30% +0.26%] index_select const : Elapsed 0.057 ms (5.697 ms / 100) 5.814 -> 5.800 ( -0.24%) [ +0.24% +0.00% +0.12% / +0.07% -0.21% -0.24%] index_select wrap : Elapsed 0.058 ms (5.828 ms / 100) 5.803 -> 5.799 ( -0.07%) [ +0.00% +0.07% +0.00% / +0.03% +0.00% -0.07%] index_select linear : Elapsed 0.058 ms (5.803 ms / 100) 5.808 -> 5.784 ( -0.41%) [ +0.17% +0.00% +0.07% / +0.09% -0.34% -0.41%] index_select reverse : Elapsed 0.058 ms (5.818 ms / 100) 5.675 -> 5.675 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.33% +0.48%] index_select skip64 : Elapsed 0.057 ms (5.676 ms / 100) 5.676 -> 5.682 ( +0.11%) [ +0.00% +0.11% +0.04% / +0.11% +0.51% +0.56%] index_select skip256 : Elapsed 0.057 ms (5.676 ms / 100) 5.814 -> 5.800 ( -0.24%) [ +0.12% +0.14% +0.00% / +0.05% -0.10% -0.24%] index_select spread : Elapsed 0.058 ms (5.821 ms / 100) 5.777 -> 5.772 ( -0.09%) [ +0.19% +0.00% +0.05% / +0.31% -0.09% +0.00%] index_select strided 3 : Elapsed 0.058 ms (5.788 ms / 100) 5.751 -> 5.756 ( +0.09%) [ +0.12% +0.05% +0.00% / +0.09% +0.38% +0.42%] index_select random : Elapsed 0.058 ms (5.758 ms / 100) 5.746 -> 5.744 ( -0.03%) [ +0.00% +0.00% +0.05% / -0.03% +0.12% +0.14%] index_select random_sorted : Elapsed 0.057 ms (5.746 ms / 100) 5.798 -> 5.787 ( -0.19%) [ +0.07% +0.00% +0.10% / -0.07% -0.14% -0.19%] index_select perm : Elapsed 0.058 ms (5.802 ms / 100) 5.802 -> 5.789 ( -0.22%) [ +0.02% +0.00% +0.14% / +0.10% -0.17% -0.22%] index_select perm_sorted : Elapsed 0.058 ms (5.803 ms / 100) B = [20, 4, 16, 40] (stride (40, 800, 3200, 1)) A = [20, 5, 16, 40] (stride (3200, 1, 5, 80)) dim = 1 5.919 -> 5.925 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.20% +0.10% +0.34%] index_select const : Elapsed 0.059 ms (5.922 ms / 100) 5.919 -> 5.919 ( +0.00%) [ +0.02% +0.00% +0.07% / +0.00% +0.14% +0.25%] index_select wrap : Elapsed 0.059 ms (5.920 ms / 100) 5.920 -> 5.919 ( -0.02%) [ +0.07% +0.00% +0.08% / -0.02% +0.10% +0.03%] index_select linear : Elapsed 0.059 ms (5.924 ms / 100) 5.916 -> 5.924 ( +0.14%) [ +0.12% +0.17% +0.00% / +0.14% +0.14% +0.15%] index_select reverse : Elapsed 0.059 ms (5.923 ms / 100) 5.917 -> 5.925 ( +0.14%) [ +0.00% +0.03% +0.17% / +0.22% +0.14% +0.14%] index_select skip64 : Elapsed 0.059 ms (5.917 ms / 100) 5.917 -> 5.924 ( +0.12%) [ +0.05% +0.00% +0.00% / +0.20% +0.17% +0.12%] index_select skip256 : Elapsed 0.059 ms (5.920 ms / 100) 5.917 -> 5.922 ( +0.08%) [ +0.05% +0.00% +0.10% / +0.08% +0.22% +0.15%] index_select spread : Elapsed 0.059 ms (5.920 ms / 100) 5.916 -> 5.920 ( +0.07%) [ +0.02% +0.00% +0.08% / +0.07% +0.29% +0.27%] index_select strided 3 : Elapsed 0.059 ms (5.917 ms / 100) 5.920 -> 5.917 ( -0.05%) [ +0.07% +0.00% +0.02% / -0.05% +0.15% +0.19%] index_select random : Elapsed 0.059 ms (5.924 ms / 100) 5.916 -> 5.924 ( +0.14%) [ +0.05% +0.00% +0.22% / +0.14% +0.17% +0.22%] index_select random_sorted : Elapsed 0.059 ms (5.919 ms / 100) 5.914 -> 5.921 ( +0.12%) [ +0.15% +0.00% +0.24% / +0.12% +0.17% +0.20%] index_select perm : Elapsed 0.059 ms (5.923 ms / 100) 5.918 -> 5.922 ( +0.07%) [ +0.08% +0.00% +0.14% / +0.25% +0.07% +0.25%] index_select perm_sorted : Elapsed 0.059 ms (5.923 ms / 100) B = [20, 4, 16, 40] (stride (4, 1, 3200, 80)) A = [20, 5, 16, 40] (stride (80, 16, 1, 1600)) dim = 1 5.647 -> 5.625 ( -0.39%) [ +0.05% +0.00% +0.00% / -0.04% -0.39% -0.37%] index_select const : Elapsed 0.057 ms (5.650 ms / 100) 5.716 -> 5.704 ( -0.21%) [ +0.00% +0.02% +0.05% / +0.23% -0.17% -0.21%] index_select wrap : Elapsed 0.057 ms (5.716 ms / 100) 5.715 -> 5.703 ( -0.21%) [ +0.00% +0.02% +0.12% / +0.26% -0.09% -0.21%] index_select linear : Elapsed 0.057 ms (5.715 ms / 100) 5.721 -> 5.706 ( -0.26%) [ +0.00% +0.09% +0.00% / +0.14% -0.26% -0.23%] index_select reverse : Elapsed 0.057 ms (5.721 ms / 100) 5.642 -> 5.629 ( -0.23%) [ +0.23% +0.00% +0.16% / +0.16% -0.11% -0.23%] index_select skip64 : Elapsed 0.057 ms (5.655 ms / 100) 5.647 -> 5.623 ( -0.43%) [ +0.00% +0.04% +0.07% / +0.05% -0.32% -0.43%] index_select skip256 : Elapsed 0.056 ms (5.647 ms / 100) 5.710 -> 5.704 ( -0.11%) [ +0.00% +0.16% +0.21% / +0.21% -0.11% -0.04%] index_select spread : Elapsed 0.057 ms (5.710 ms / 100) 5.731 -> 5.716 ( -0.26%) [ +0.07% +0.00% +0.02% / +0.05% -0.26% -0.26%] index_select strided 3 : Elapsed 0.057 ms (5.735 ms / 100) 5.689 -> 5.677 ( -0.21%) [ +0.04% +0.05% +0.00% / +0.25% -0.18% -0.21%] index_select random : Elapsed 0.057 ms (5.691 ms / 100) 5.673 -> 5.662 ( -0.19%) [ +0.02% +0.00% +0.07% / +0.09% -0.12% -0.19%] index_select random_sorted : Elapsed 0.057 ms (5.674 ms / 100) 5.718 -> 5.708 ( -0.17%) [ +0.00% +0.03% +0.07% / +0.05% -0.12% -0.17%] index_select perm : Elapsed 0.057 ms (5.718 ms / 100) 5.715 -> 5.703 ( -0.21%) [ +0.05% +0.00% +0.14% / +0.02% -0.21% -0.02%] index_select perm_sorted : Elapsed 0.057 ms (5.718 ms / 100) B = [20, 4, 16, 40] (stride (1, 20, 3200, 80)) A = [20, 5, 16, 40] (stride (3200, 1, 5, 80)) dim = 1 5.758 -> 5.755 ( -0.05%) [ +0.05% +0.00% +0.02% / +0.07% -0.05% +0.02%] index_select const : Elapsed 0.058 ms (5.761 ms / 100) 5.752 -> 5.752 ( +0.00%) [ +0.14% +0.00% +0.12% / +0.12% +0.02% +0.00%] index_select wrap : Elapsed 0.058 ms (5.760 ms / 100) 5.758 -> 5.755 ( -0.05%) [ +0.02% +0.00% +0.07% / +0.05% -0.05% +0.00%] index_select linear : Elapsed 0.058 ms (5.759 ms / 100) 5.758 -> 5.752 ( -0.10%) [ +0.05% +0.00% +0.00% / -0.02% -0.10% -0.10%] index_select reverse : Elapsed 0.058 ms (5.761 ms / 100) 5.755 -> 5.751 ( -0.07%) [ +0.00% +0.00% +0.12% / +0.10% -0.07% +0.05%] index_select skip64 : Elapsed 0.058 ms (5.755 ms / 100) 5.751 -> 5.751 ( +0.00%) [ +0.16% +0.00% +0.17% / +0.21% +0.00% +0.02%] index_select skip256 : Elapsed 0.058 ms (5.760 ms / 100) 5.755 -> 5.746 ( -0.16%) [ +0.02% +0.00% +0.05% / +0.14% -0.16% -0.10%] index_select spread : Elapsed 0.058 ms (5.756 ms / 100) 5.753 -> 5.755 ( +0.03%) [ +0.05% +0.00% +0.05% / +0.17% +0.03% +0.07%] index_select strided 3 : Elapsed 0.058 ms (5.756 ms / 100) 5.754 -> 5.754 ( +0.00%) [ +0.00% +0.09% +0.12% / +0.09% +0.03% +0.00%] index_select random : Elapsed 0.058 ms (5.754 ms / 100) 5.758 -> 5.753 ( -0.09%) [ +0.00% +0.03% +0.02% / +0.12% -0.07% -0.09%] index_select random_sorted : Elapsed 0.058 ms (5.758 ms / 100) 5.757 -> 5.754 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.03% -0.05% +0.05%] index_select perm : Elapsed 0.058 ms (5.757 ms / 100) 5.756 -> 5.747 ( -0.16%) [ +0.05% +0.00% +0.00% / +0.12% +0.03% -0.16%] index_select perm_sorted : Elapsed 0.058 ms (5.759 ms / 100) B = [20, 4, 16, 40] (stride (64, 16, 1, 1280)) A = [20, 5, 16, 40] (stride (1, 800, 4000, 20)) dim = 1 5.743 -> 5.754 ( +0.19%) [ +0.17% +0.00% +0.10% / +0.19% +0.44% +0.54%] index_select const : Elapsed 0.058 ms (5.753 ms / 100) 5.823 -> 5.808 ( -0.26%) [ +0.00% +0.09% +0.14% / +0.15% -0.26% -0.26%] index_select wrap : Elapsed 0.058 ms (5.823 ms / 100) 5.828 -> 5.807 ( -0.36%) [ +0.02% +0.03% +0.00% / +0.00% -0.36% -0.19%] index_select linear : Elapsed 0.058 ms (5.829 ms / 100) 5.831 -> 5.799 ( -0.55%) [ +0.00% +0.17% +0.15% / +0.15% -0.55% -0.36%] index_select reverse : Elapsed 0.058 ms (5.831 ms / 100) 5.745 -> 5.752 ( +0.12%) [ +0.09% +0.00% +0.09% / +0.12% +0.44% +0.42%] index_select skip64 : Elapsed 0.057 ms (5.750 ms / 100) 5.753 -> 5.752 ( -0.02%) [ +0.02% +0.03% +0.00% / -0.02% +0.24% +0.28%] index_select skip256 : Elapsed 0.058 ms (5.754 ms / 100) 5.826 -> 5.809 ( -0.29%) [ +0.09% +0.00% +0.02% / +0.03% -0.29% -0.26%] index_select spread : Elapsed 0.058 ms (5.831 ms / 100) 5.804 -> 5.790 ( -0.24%) [ +0.00% +0.10% +0.03% / -0.10% -0.24% -0.24%] index_select strided 3 : Elapsed 0.058 ms (5.804 ms / 100) 5.796 -> 5.796 ( +0.00%) [ +0.00% +0.02% +0.10% / +0.10% +0.00% +0.02%] index_select random : Elapsed 0.058 ms (5.796 ms / 100) 5.773 -> 5.778 ( +0.09%) [ +0.05% +0.00% +0.07% / +0.09% +0.14% +0.21%] index_select random_sorted : Elapsed 0.058 ms (5.776 ms / 100) 5.797 -> 5.795 ( -0.03%) [ +0.05% +0.09% +0.00% / +0.12% -0.03% +0.05%] index_select perm : Elapsed 0.058 ms (5.800 ms / 100) 5.810 -> 5.799 ( -0.19%) [ +0.00% +0.09% +0.14% / -0.05% -0.19% -0.14%] index_select perm_sorted : Elapsed 0.058 ms (5.810 ms / 100) out_shape = [20, 5, 4, 40] in_shape = [20, 5, 16, 40] idx_dim = 2 B = [20, 5, 4, 40] (stride (800, 160, 1, 4)) A = [20, 5, 16, 40] (stride (40, 800, 4000, 1)) dim = 2 2.260 -> 2.263 ( +0.13%) [ +0.13% +0.22% +0.00% / +0.13% +0.58% +0.40%] index_select const : Elapsed 0.023 ms (2.263 ms / 100) 2.330 -> 2.329 ( -0.04%) [ +0.04% +0.09% +0.00% / -0.04% +0.34% +0.60%] index_select wrap : Elapsed 0.023 ms (2.331 ms / 100) 2.330 -> 2.332 ( +0.09%) [ +0.04% +0.00% +0.00% / +0.09% +0.39% +0.69%] index_select linear : Elapsed 0.023 ms (2.331 ms / 100) 2.327 -> 2.330 ( +0.13%) [ +0.00% +0.43% +0.17% / +0.13% +0.60% +0.86%] index_select reverse : Elapsed 0.023 ms (2.327 ms / 100) 2.263 -> 2.266 ( +0.13%) [ +0.22% +0.00% +0.00% / +0.13% +0.40% +0.84%] index_select skip64 : Elapsed 0.023 ms (2.268 ms / 100) 2.265 -> 2.266 ( +0.04%) [ +0.13% +0.00% +0.00% / +0.04% +0.09% +0.18%] index_select skip256 : Elapsed 0.023 ms (2.268 ms / 100) 2.331 -> 2.334 ( +0.13%) [ +0.17% +0.00% +0.04% / +0.13% +0.26% +0.30%] index_select spread : Elapsed 0.023 ms (2.335 ms / 100) 2.327 -> 2.329 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.56% +0.47%] index_select strided 3 : Elapsed 0.023 ms (2.329 ms / 100) 2.327 -> 2.332 ( +0.21%) [ +0.13% +0.09% +0.00% / +0.21% +0.34% +0.64%] index_select strided 5 : Elapsed 0.023 ms (2.330 ms / 100) 2.329 -> 2.326 ( -0.13%) [ +0.00% +0.13% +0.21% / -0.13% +0.34% +0.21%] index_select strided 7 : Elapsed 0.023 ms (2.329 ms / 100) 2.281 -> 2.281 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.35% +0.09%] index_select strided 8 : Elapsed 0.023 ms (2.283 ms / 100) 2.330 -> 2.332 ( +0.09%) [ +0.00% +0.17% +0.04% / +0.09% +0.39% +0.13%] index_select random : Elapsed 0.023 ms (2.330 ms / 100) 2.329 -> 2.333 ( +0.17%) [ +0.04% +0.00% +0.34% / +0.17% +0.39% +0.17%] index_select random_sorted : Elapsed 0.023 ms (2.330 ms / 100) 2.328 -> 2.334 ( +0.26%) [ +0.09% +0.00% +0.26% / +0.26% +0.60% +0.39%] index_select perm : Elapsed 0.023 ms (2.330 ms / 100) 2.326 -> 2.332 ( +0.26%) [ +0.21% +0.21% +0.00% / +0.26% +0.47% +0.47%] index_select perm_sorted : Elapsed 0.023 ms (2.331 ms / 100) B = [20, 5, 4, 40] (stride (800, 4, 1, 20)) A = [20, 5, 16, 40] (stride (16, 12800, 1, 320)) dim = 2 2.616 -> 2.617 ( +0.04%) [ +0.19% +0.00% +0.04% / +0.08% +0.27% +0.04%] index_select const : Elapsed 0.026 ms (2.621 ms / 100) 2.617 -> 2.621 ( +0.15%) [ +0.00% +0.08% +0.08% / +0.15% +0.31% +0.46%] index_select wrap : Elapsed 0.026 ms (2.617 ms / 100) 2.618 -> 2.617 ( -0.04%) [ +0.15% +0.08% +0.00% / -0.04% +0.11% +0.23%] index_select linear : Elapsed 0.026 ms (2.622 ms / 100) 2.617 -> 2.615 ( -0.08%) [ +0.00% +0.00% +0.19% / -0.08% +0.23% +0.15%] index_select reverse : Elapsed 0.026 ms (2.617 ms / 100) 2.619 -> 2.621 ( +0.08%) [ +0.23% +0.23% +0.00% / +0.08% +0.23% +0.23%] index_select skip64 : Elapsed 0.026 ms (2.625 ms / 100) 2.616 -> 2.620 ( +0.15%) [ +0.00% +0.04% +0.00% / +0.15% +0.19% +0.15%] index_select skip256 : Elapsed 0.026 ms (2.616 ms / 100) 2.640 -> 2.644 ( +0.15%) [ +0.19% +0.00% +0.08% / +0.15% +0.15% +0.80%] index_select spread : Elapsed 0.026 ms (2.645 ms / 100) 2.645 -> 2.647 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.19% +0.30%] index_select strided 3 : Elapsed 0.026 ms (2.646 ms / 100) 2.635 -> 2.643 ( +0.30%) [ +0.23% +0.19% +0.00% / +0.30% +0.49% +0.46%] index_select strided 5 : Elapsed 0.026 ms (2.641 ms / 100) 2.645 -> 2.643 ( -0.08%) [ +0.15% +0.15% +0.00% / -0.08% +0.30% +0.38%] index_select strided 7 : Elapsed 0.026 ms (2.649 ms / 100) 2.648 -> 2.657 ( +0.34%) [ +0.00% +0.30% +0.19% / +0.42% +0.34% +0.38%] index_select strided 8 : Elapsed 0.026 ms (2.648 ms / 100) 2.649 -> 2.655 ( +0.23%) [ +0.19% +0.04% +0.00% / +0.23% +0.30% +0.30%] index_select random : Elapsed 0.027 ms (2.654 ms / 100) 2.642 -> 2.645 ( +0.11%) [ +0.26% +0.08% +0.00% / +0.11% +0.38% +0.30%] index_select random_sorted : Elapsed 0.026 ms (2.649 ms / 100) 2.652 -> 2.652 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.30% +0.23%] index_select perm : Elapsed 0.027 ms (2.654 ms / 100) 2.653 -> 2.654 ( +0.04%) [ +0.11% +0.00% +0.08% / +0.04% +0.19% +0.30%] index_select perm_sorted : Elapsed 0.027 ms (2.656 ms / 100) B = [20, 5, 4, 40] (stride (160, 3200, 40, 1)) A = [20, 5, 16, 40] (stride (1, 12800, 20, 320)) dim = 2 2.529 -> 2.529 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.32% +0.43%] index_select const : Elapsed 0.025 ms (2.529 ms / 100) 2.533 -> 2.539 ( +0.24%) [ +0.12% +0.00% +0.08% / +0.24% +0.32% +0.47%] index_select wrap : Elapsed 0.025 ms (2.536 ms / 100) 2.532 -> 2.534 ( +0.08%) [ +0.04% +0.16% +0.00% / +0.08% +0.43% +0.36%] index_select linear : Elapsed 0.025 ms (2.533 ms / 100) 2.530 -> 2.534 ( +0.16%) [ +0.00% +0.04% +0.00% / +0.16% +0.67% +0.59%] index_select reverse : Elapsed 0.025 ms (2.530 ms / 100) 2.525 -> 2.525 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.59% +0.67%] index_select skip64 : Elapsed 0.025 ms (2.525 ms / 100) 2.527 -> 2.529 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.51% +0.40%] index_select skip256 : Elapsed 0.025 ms (2.533 ms / 100) 2.540 -> 2.541 ( +0.04%) [ +0.00% +0.16% +0.12% / +0.04% +0.47% +0.47%] index_select spread : Elapsed 0.025 ms (2.540 ms / 100) 2.536 -> 2.537 ( +0.04%) [ +0.00% +0.00% +0.12% / +0.04% +0.28% +0.28%] index_select strided 3 : Elapsed 0.025 ms (2.536 ms / 100) 2.525 -> 2.528 ( +0.12%) [ +0.28% +0.00% +0.08% / +0.12% +0.40% +0.44%] index_select strided 5 : Elapsed 0.025 ms (2.532 ms / 100) 2.534 -> 2.535 ( +0.04%) [ +0.00% +0.32% +0.16% / +0.04% +0.39% +0.36%] index_select strided 7 : Elapsed 0.025 ms (2.534 ms / 100) 2.526 -> 2.528 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.08% +0.48% +0.44%] index_select strided 8 : Elapsed 0.025 ms (2.526 ms / 100) 2.530 -> 2.536 ( +0.24%) [ +0.00% +0.20% +0.12% / +0.24% +0.59% +0.51%] index_select random : Elapsed 0.025 ms (2.530 ms / 100) 2.535 -> 2.534 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.39% +0.36%] index_select random_sorted : Elapsed 0.025 ms (2.536 ms / 100) 2.540 -> 2.541 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.08% +0.16%] index_select perm : Elapsed 0.025 ms (2.540 ms / 100) 2.546 -> 2.545 ( -0.04%) [ +0.00% +0.08% +0.16% / -0.04% +0.16% +0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.546 ms / 100) B = [20, 5, 4, 40] (stride (160, 3200, 1, 4)) A = [20, 5, 16, 40] (stride (1, 12800, 800, 20)) dim = 2 2.478 -> 2.480 ( +0.08%) [ +0.04% +0.00% +0.16% / +0.08% +0.20% +0.24%] index_select const : Elapsed 0.025 ms (2.479 ms / 100) 2.474 -> 2.473 ( -0.04%) [ +0.16% +0.00% +0.00% / +0.04% +0.16% -0.04%] index_select wrap : Elapsed 0.025 ms (2.478 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.08% +0.24% +0.00% / +0.08% +0.04% +0.20%] index_select linear : Elapsed 0.025 ms (2.473 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.08% +0.12%] index_select reverse : Elapsed 0.025 ms (2.473 ms / 100) 2.474 -> 2.472 ( -0.08%) [ +0.08% +0.00% +0.24% / +0.00% +0.00% -0.08%] index_select skip64 : Elapsed 0.025 ms (2.476 ms / 100) 2.474 -> 2.482 ( +0.32%) [ +0.28% +0.32% +0.00% / +0.32% +0.36% +0.49%] index_select skip256 : Elapsed 0.025 ms (2.481 ms / 100) 2.475 -> 2.475 ( +0.00%) [ +0.12% +0.00% +0.04% / +0.00% +0.00% +0.16%] index_select spread : Elapsed 0.025 ms (2.478 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.00% +0.04% +0.12% / +0.04% +0.36% +0.20%] index_select strided 3 : Elapsed 0.025 ms (2.471 ms / 100) 2.480 -> 2.478 ( -0.08%) [ +0.20% +0.00% +0.08% / -0.08% +0.24% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.485 ms / 100) 2.469 -> 2.473 ( +0.16%) [ +0.00% +0.20% +0.04% / +0.16% +0.36% +0.36%] index_select strided 7 : Elapsed 0.025 ms (2.469 ms / 100) 2.467 -> 2.470 ( +0.12%) [ +0.24% +0.32% +0.00% / +0.12% +0.36% +0.45%] index_select strided 8 : Elapsed 0.025 ms (2.473 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.04% +0.28% +0.36%] index_select random : Elapsed 0.025 ms (2.471 ms / 100) 2.474 -> 2.476 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.28% +0.24%] index_select random_sorted : Elapsed 0.025 ms (2.475 ms / 100) 2.477 -> 2.478 ( +0.04%) [ +0.20% +0.00% +0.12% / +0.04% +0.32% +0.36%] index_select perm : Elapsed 0.025 ms (2.482 ms / 100) 2.471 -> 2.471 ( +0.00%) [ +0.00% +0.12% +0.04% / +0.00% +0.28% +0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.471 ms / 100) B = [20, 5, 4, 40] (stride (40, 3200, 800, 1)) A = [20, 5, 16, 40] (stride (5, 1, 4000, 100)) dim = 2 2.421 -> 2.423 ( +0.08%) [ +0.00% +0.12% +0.00% / +0.08% +0.37% +0.25%] index_select const : Elapsed 0.024 ms (2.421 ms / 100) 2.429 -> 2.434 ( +0.21%) [ +0.37% +0.21% +0.00% / +0.21% +0.45% +0.37%] index_select wrap : Elapsed 0.024 ms (2.438 ms / 100) 2.430 -> 2.434 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.45% +0.49%] index_select linear : Elapsed 0.024 ms (2.430 ms / 100) 2.427 -> 2.431 ( +0.16%) [ +0.12% +0.00% +0.00% / +0.16% +0.41% +0.45%] index_select reverse : Elapsed 0.024 ms (2.430 ms / 100) 2.417 -> 2.420 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.66% +0.58%] index_select skip64 : Elapsed 0.024 ms (2.417 ms / 100) 2.419 -> 2.421 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.50% +0.41%] index_select skip256 : Elapsed 0.024 ms (2.421 ms / 100) 2.430 -> 2.434 ( +0.16%) [ +0.12% +0.00% +0.08% / +0.16% +0.58% +0.41%] index_select spread : Elapsed 0.024 ms (2.433 ms / 100) 2.433 -> 2.433 ( +0.00%) [ +0.12% +0.00% +0.16% / +0.00% +0.41% +0.33%] index_select strided 3 : Elapsed 0.024 ms (2.436 ms / 100) 2.435 -> 2.436 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.41% +0.41%] index_select strided 5 : Elapsed 0.024 ms (2.437 ms / 100) 2.429 -> 2.432 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.33% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.432 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.50% +0.41%] index_select strided 8 : Elapsed 0.024 ms (2.425 ms / 100) 2.430 -> 2.432 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.25% +0.29%] index_select random : Elapsed 0.024 ms (2.431 ms / 100) 2.431 -> 2.434 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.12% +0.29% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.433 ms / 100) 2.430 -> 2.430 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.12% +0.25%] index_select perm : Elapsed 0.024 ms (2.433 ms / 100) 2.430 -> 2.434 ( +0.16%) [ +0.12% +0.00% +0.12% / +0.16% +0.45% +0.37%] index_select perm_sorted : Elapsed 0.024 ms (2.433 ms / 100) B = [20, 5, 4, 40] (stride (1, 3200, 800, 20)) A = [20, 5, 16, 40] (stride (1, 800, 4000, 20)) dim = 2 2.482 -> 2.485 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.20% +0.24%] index_select const : Elapsed 0.025 ms (2.482 ms / 100) 2.479 -> 2.479 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.04% +0.08%] index_select wrap : Elapsed 0.025 ms (2.481 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.28% +0.12%] index_select linear : Elapsed 0.025 ms (2.473 ms / 100) 2.473 -> 2.477 ( +0.16%) [ +0.04% +0.00% +0.12% / +0.16% +0.16% +0.16%] index_select reverse : Elapsed 0.025 ms (2.474 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.20% +0.16% +0.00% / +0.04% +0.40% +0.36%] index_select skip64 : Elapsed 0.025 ms (2.476 ms / 100) 2.481 -> 2.485 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.16% +0.28%] index_select skip256 : Elapsed 0.025 ms (2.483 ms / 100) 2.475 -> 2.476 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.20% +0.24%] index_select spread : Elapsed 0.025 ms (2.476 ms / 100) 2.469 -> 2.470 ( +0.04%) [ +0.00% +0.16% +0.04% / +0.04% +0.24% +0.41%] index_select strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.478 -> 2.485 ( +0.28%) [ +0.20% +0.00% +0.20% / +0.28% +0.61% +0.44%] index_select strided 5 : Elapsed 0.025 ms (2.483 ms / 100) 2.468 -> 2.473 ( +0.20%) [ +0.32% +0.00% +0.12% / +0.20% +0.61% +0.69%] index_select strided 7 : Elapsed 0.025 ms (2.476 ms / 100) 2.469 -> 2.474 ( +0.20%) [ +0.12% +0.00% +0.12% / +0.20% +0.57% +0.57%] index_select strided 8 : Elapsed 0.025 ms (2.472 ms / 100) 2.472 -> 2.473 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.57% +0.57%] index_select random : Elapsed 0.025 ms (2.473 ms / 100) 2.472 -> 2.473 ( +0.04%) [ +0.00% +0.12% +0.16% / +0.04% +0.49% +0.40%] index_select random_sorted : Elapsed 0.025 ms (2.472 ms / 100) 2.480 -> 2.480 ( +0.00%) [ +0.00% +0.32% +0.16% / +0.00% +0.44% +0.44%] index_select perm : Elapsed 0.025 ms (2.480 ms / 100) 2.471 -> 2.471 ( +0.00%) [ +0.20% +0.00% +0.00% / +0.00% +0.61% +0.69%] index_select perm_sorted : Elapsed 0.025 ms (2.476 ms / 100) B = [20, 5, 4, 40] (stride (1, 3200, 20, 80)) A = [20, 5, 16, 40] (stride (1, 12800, 20, 320)) dim = 2 2.550 -> 2.555 ( +0.20%) [ +0.31% +0.00% +0.12% / +0.20% +0.39% +0.55%] index_select const : Elapsed 0.026 ms (2.558 ms / 100) 2.554 -> 2.556 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.55% +0.23%] index_select wrap : Elapsed 0.026 ms (2.555 ms / 100) 2.553 -> 2.555 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.43% +0.24%] index_select linear : Elapsed 0.026 ms (2.557 ms / 100) 2.551 -> 2.556 ( +0.20%) [ +0.08% +0.00% +0.16% / +0.20% +0.55% +0.63%] index_select reverse : Elapsed 0.026 ms (2.553 ms / 100) 2.550 -> 2.551 ( +0.04%) [ +0.12% +0.04% +0.00% / +0.04% +0.55% +0.35%] index_select skip64 : Elapsed 0.026 ms (2.553 ms / 100) 2.552 -> 2.554 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.51% +0.59%] index_select skip256 : Elapsed 0.026 ms (2.555 ms / 100) 2.558 -> 2.558 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.51% +0.39%] index_select spread : Elapsed 0.026 ms (2.558 ms / 100) 2.554 -> 2.556 ( +0.08%) [ +0.12% +0.16% +0.00% / +0.16% +0.08% +0.31%] index_select strided 3 : Elapsed 0.026 ms (2.557 ms / 100) 2.545 -> 2.549 ( +0.16%) [ +0.12% +0.08% +0.00% / +0.16% +0.39% +0.35%] index_select strided 5 : Elapsed 0.025 ms (2.548 ms / 100) 2.554 -> 2.559 ( +0.20%) [ +0.04% +0.23% +0.00% / +0.20% +0.27% +0.51%] index_select strided 7 : Elapsed 0.026 ms (2.555 ms / 100) 2.549 -> 2.550 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.43% +0.27%] index_select strided 8 : Elapsed 0.026 ms (2.552 ms / 100) 2.548 -> 2.548 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.55% +0.39%] index_select random : Elapsed 0.026 ms (2.552 ms / 100) 2.551 -> 2.553 ( +0.08%) [ +0.08% +0.00% +0.12% / +0.08% +0.39% +0.39%] index_select random_sorted : Elapsed 0.026 ms (2.553 ms / 100) 2.556 -> 2.556 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.23% +0.31%] index_select perm : Elapsed 0.026 ms (2.558 ms / 100) 2.558 -> 2.558 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.23% +0.31%] index_select perm_sorted : Elapsed 0.026 ms (2.558 ms / 100) B = [20, 5, 4, 40] (stride (40, 800, 4000, 1)) A = [20, 5, 16, 40] (stride (1, 20, 4000, 100)) dim = 2 2.529 -> 2.531 ( +0.08%) [ +0.00% +0.16% +0.24% / +0.08% +0.51% +0.28%] index_select const : Elapsed 0.025 ms (2.529 ms / 100) 2.542 -> 2.542 ( +0.00%) [ +0.28% +0.00% +0.00% / +0.00% +0.16% +0.12%] index_select wrap : Elapsed 0.025 ms (2.549 ms / 100) 2.543 -> 2.543 ( +0.00%) [ +0.00% +0.04% +0.12% / +0.00% +0.04% +0.08%] index_select linear : Elapsed 0.025 ms (2.543 ms / 100) 2.540 -> 2.541 ( +0.04%) [ +0.00% +0.16% +0.16% / +0.04% +0.20% +0.24%] index_select reverse : Elapsed 0.025 ms (2.540 ms / 100) 2.531 -> 2.535 ( +0.16%) [ +0.12% +0.24% +0.00% / +0.16% +0.36% +0.32%] index_select skip64 : Elapsed 0.025 ms (2.534 ms / 100) 2.532 -> 2.532 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.12% +0.00%] index_select skip256 : Elapsed 0.025 ms (2.532 ms / 100) 2.533 -> 2.538 ( +0.20%) [ +0.16% +0.00% +0.24% / +0.20% +0.39% +0.47%] index_select spread : Elapsed 0.025 ms (2.537 ms / 100) 2.537 -> 2.536 ( -0.04%) [ +0.20% +0.00% +0.00% / -0.04% +0.28% +0.32%] index_select strided 3 : Elapsed 0.025 ms (2.542 ms / 100) 2.536 -> 2.537 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.47% +0.28%] index_select strided 5 : Elapsed 0.025 ms (2.536 ms / 100) 2.537 -> 2.537 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.32% +0.24%] index_select strided 7 : Elapsed 0.025 ms (2.537 ms / 100) 2.530 -> 2.533 ( +0.12%) [ +0.04% +0.00% +0.28% / +0.12% +0.16% +0.36%] index_select strided 8 : Elapsed 0.025 ms (2.531 ms / 100) 2.535 -> 2.535 ( +0.00%) [ +0.00% +0.20% +0.00% / +0.00% +0.28% +0.43%] index_select random : Elapsed 0.025 ms (2.535 ms / 100) 2.537 -> 2.539 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.39% +0.28%] index_select random_sorted : Elapsed 0.025 ms (2.539 ms / 100) 2.535 -> 2.538 ( +0.12%) [ +0.00% +0.28% +0.08% / +0.12% +0.32% +0.43%] index_select perm : Elapsed 0.025 ms (2.535 ms / 100) 2.538 -> 2.541 ( +0.12%) [ +0.04% +0.00% +0.00% / +0.12% +0.24% +0.28%] index_select perm_sorted : Elapsed 0.025 ms (2.539 ms / 100) out_shape = [20, 5, 16, 4] in_shape = [20, 5, 16, 40] idx_dim = 3 B = [20, 5, 16, 4] (stride (320, 1, 5, 80)) A = [20, 5, 16, 40] (stride (3200, 1, 200, 5)) dim = 3 1.338 -> 1.338 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.52% +0.67%] index_select const : Elapsed 0.013 ms (1.338 ms / 100) 1.335 -> 1.337 ( +0.15%) [ +0.00% +0.07% +0.07% / +0.15% +0.52% +0.75%] index_select wrap : Elapsed 0.013 ms (1.335 ms / 100) 1.339 -> 1.341 ( +0.15%) [ +0.07% +0.00% +0.22% / +0.15% +0.52% +0.60%] index_select linear : Elapsed 0.013 ms (1.340 ms / 100) 1.338 -> 1.339 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.60% +0.67%] index_select reverse : Elapsed 0.013 ms (1.338 ms / 100) 1.333 -> 1.337 ( +0.30%) [ +0.53% +0.00% +0.23% / +0.30% +0.90% +1.20%] index_select skip64 : Elapsed 0.013 ms (1.340 ms / 100) 1.339 -> 1.341 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.52% +0.52%] index_select skip256 : Elapsed 0.013 ms (1.339 ms / 100) 1.334 -> 1.336 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.75% +0.90%] index_select spread : Elapsed 0.013 ms (1.337 ms / 100) 1.339 -> 1.340 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.37% +0.52%] index_select strided 3 : Elapsed 0.013 ms (1.340 ms / 100) 1.337 -> 1.337 ( +0.00%) [ +0.00% +0.22% +0.15% / +0.00% +0.60% +0.30%] index_select strided 5 : Elapsed 0.013 ms (1.337 ms / 100) 1.336 -> 1.337 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.60% +0.52%] index_select strided 7 : Elapsed 0.013 ms (1.336 ms / 100) 1.337 -> 1.340 ( +0.22%) [ +0.22% +0.00% +0.00% / +0.22% +0.52% +0.30%] index_select strided 8 : Elapsed 0.013 ms (1.340 ms / 100) 1.342 -> 1.342 ( +0.00%) [ +0.00% +0.30% +0.07% / +0.00% +0.15% +0.30%] index_select strided 16 : Elapsed 0.013 ms (1.342 ms / 100) 1.333 -> 1.332 ( -0.08%) [ +0.00% +0.15% +0.00% / -0.08% +0.60% +0.38%] index_select random : Elapsed 0.013 ms (1.333 ms / 100) 1.332 -> 1.335 ( +0.23%) [ +0.00% +0.23% +0.15% / +0.23% +0.68% +0.68%] index_select random_sorted : Elapsed 0.013 ms (1.332 ms / 100) 1.329 -> 1.329 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.53% +0.60%] index_select perm : Elapsed 0.013 ms (1.330 ms / 100) 1.339 -> 1.346 ( +0.52%) [ +0.22% +0.22% +0.00% / +0.52% +0.67% +0.67%] index_select perm_sorted : Elapsed 0.013 ms (1.342 ms / 100) B = [20, 5, 16, 4] (stride (320, 1, 5, 80)) A = [20, 5, 16, 40] (stride (1, 12800, 800, 20)) dim = 3 1.267 -> 1.267 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.47%] index_select const : Elapsed 0.013 ms (1.268 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.47% +0.47%] index_select wrap : Elapsed 0.013 ms (1.275 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.24% +0.00% +0.08% / +0.16% +0.71% +0.55%] index_select linear : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.31% +0.47%] index_select reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.270 -> 1.271 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.270 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.16% +0.00% +0.24% / +0.00% +0.55% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.273 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.63%] index_select spread : Elapsed 0.013 ms (1.280 ms / 100) 1.282 -> 1.281 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.47% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.55% +0.55%] index_select strided 5 : Elapsed 0.013 ms (1.281 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.63%] index_select strided 7 : Elapsed 0.013 ms (1.280 ms / 100) 1.281 -> 1.284 ( +0.23%) [ +0.16% +0.08% +0.00% / +0.23% +0.55% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.283 ms / 100) 1.281 -> 1.283 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.47% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.279 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.23%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.39% +0.31%] index_select perm : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.279 ms / 100) B = [20, 5, 16, 4] (stride (64, 1280, 1, 16)) A = [20, 5, 16, 40] (stride (3200, 40, 200, 1)) dim = 3 1.365 -> 1.367 ( +0.15%) [ +0.00% +0.15% +0.07% / +0.15% +0.59% +0.59%] index_select const : Elapsed 0.014 ms (1.365 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.44%] index_select wrap : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.44% +0.37%] index_select linear : Elapsed 0.014 ms (1.367 ms / 100) 1.362 -> 1.364 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.44% +0.59%] index_select reverse : Elapsed 0.014 ms (1.364 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.59%] index_select skip64 : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.29% +0.15%] index_select skip256 : Elapsed 0.014 ms (1.367 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +0.29% +0.44%] index_select spread : Elapsed 0.014 ms (1.369 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.00% +0.00% +0.07% / +0.15% +0.44% +0.37%] index_select strided 3 : Elapsed 0.014 ms (1.366 ms / 100) 1.365 -> 1.366 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.29% +0.37%] index_select strided 5 : Elapsed 0.014 ms (1.366 ms / 100) 1.364 -> 1.365 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.37% +0.44%] index_select strided 7 : Elapsed 0.014 ms (1.365 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.58% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.370 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.07% +0.29% +0.29%] index_select strided 16 : Elapsed 0.014 ms (1.367 ms / 100) 1.367 -> 1.370 ( +0.22%) [ +0.07% +0.00% +0.07% / +0.22% +0.37% +0.29%] index_select random : Elapsed 0.014 ms (1.368 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.58% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.370 ms / 100) 1.365 -> 1.369 ( +0.29%) [ +0.15% +0.00% +0.22% / +0.29% +0.59% +0.51%] index_select perm : Elapsed 0.014 ms (1.367 ms / 100) 1.364 -> 1.366 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.66% +0.66%] index_select perm_sorted : Elapsed 0.014 ms (1.364 ms / 100) B = [20, 5, 16, 4] (stride (64, 1280, 1, 16)) A = [20, 5, 16, 40] (stride (640, 12800, 40, 1)) dim = 3 1.367 -> 1.368 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.37% +0.22%] index_select const : Elapsed 0.014 ms (1.367 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.44%] index_select wrap : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.44% +0.29%] index_select linear : Elapsed 0.014 ms (1.366 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.15% +0.37%] index_select reverse : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.37% +0.29%] index_select skip64 : Elapsed 0.014 ms (1.367 ms / 100) 1.365 -> 1.366 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.73% +0.59%] index_select skip256 : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.66% +0.59%] index_select spread : Elapsed 0.014 ms (1.368 ms / 100) 1.365 -> 1.366 ( +0.07%) [ +0.07% +0.00% +0.22% / +0.07% +0.66% +0.66%] index_select strided 3 : Elapsed 0.014 ms (1.366 ms / 100) 1.364 -> 1.364 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.66% +0.73%] index_select strided 5 : Elapsed 0.014 ms (1.364 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.66% +0.59%] index_select strided 7 : Elapsed 0.014 ms (1.366 ms / 100) 1.364 -> 1.366 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.59% +0.73%] index_select strided 8 : Elapsed 0.014 ms (1.365 ms / 100) 1.364 -> 1.368 ( +0.29%) [ +0.29% +0.00% +0.07% / +0.29% +0.88% +0.73%] index_select strided 16 : Elapsed 0.014 ms (1.368 ms / 100) 1.364 -> 1.365 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.73% +0.66%] index_select random : Elapsed 0.014 ms (1.365 ms / 100) 1.364 -> 1.366 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.73% +0.73%] index_select random_sorted : Elapsed 0.014 ms (1.367 ms / 100) 1.363 -> 1.363 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +0.73%] index_select perm : Elapsed 0.014 ms (1.363 ms / 100) 1.364 -> 1.364 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.66% +0.51%] index_select perm_sorted : Elapsed 0.014 ms (1.366 ms / 100) B = [20, 5, 16, 4] (stride (1, 1280, 80, 20)) A = [20, 5, 16, 40] (stride (1, 20, 100, 1600)) dim = 3 1.258 -> 1.259 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.72% +0.48%] index_select const : Elapsed 0.013 ms (1.260 ms / 100) 1.265 -> 1.266 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.32% +0.24%] index_select wrap : Elapsed 0.013 ms (1.267 ms / 100) 1.261 -> 1.262 ( +0.08%) [ +0.24% +0.00% +0.00% / +0.08% +0.56% +0.40%] index_select linear : Elapsed 0.013 ms (1.264 ms / 100) 1.267 -> 1.267 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.47%] index_select reverse : Elapsed 0.013 ms (1.268 ms / 100) 1.257 -> 1.259 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.88% +0.72%] index_select skip64 : Elapsed 0.013 ms (1.259 ms / 100) 1.256 -> 1.258 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.96% +0.88%] index_select skip256 : Elapsed 0.013 ms (1.258 ms / 100) 1.260 -> 1.262 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.63% +0.56%] index_select spread : Elapsed 0.013 ms (1.261 ms / 100) 1.264 -> 1.265 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.55% +0.40%] index_select strided 3 : Elapsed 0.013 ms (1.267 ms / 100) 1.260 -> 1.259 ( -0.08%) [ +0.16% +0.00% +0.00% / -0.08% +0.56% +0.56%] index_select strided 5 : Elapsed 0.013 ms (1.262 ms / 100) 1.261 -> 1.263 ( +0.16%) [ +0.40% +0.00% +0.00% / +0.16% +0.56% +0.56%] index_select strided 7 : Elapsed 0.013 ms (1.266 ms / 100) 1.266 -> 1.268 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.24% +0.39%] index_select strided 8 : Elapsed 0.013 ms (1.268 ms / 100) 1.266 -> 1.268 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +0.55% +0.63%] index_select strided 16 : Elapsed 0.013 ms (1.266 ms / 100) 1.271 -> 1.273 ( +0.16%) [ +0.24% +0.16% +0.00% / +0.16% +0.31% +0.39%] index_select random : Elapsed 0.013 ms (1.274 ms / 100) 1.266 -> 1.267 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.08% +0.24%] index_select random_sorted : Elapsed 0.013 ms (1.268 ms / 100) 1.270 -> 1.270 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.31% +0.24%] index_select perm : Elapsed 0.013 ms (1.271 ms / 100) 1.266 -> 1.270 ( +0.32%) [ +0.08% +0.08% +0.00% / +0.32% +0.32% +0.32%] index_select perm_sorted : Elapsed 0.013 ms (1.267 ms / 100) B = [20, 5, 16, 4] (stride (80, 16, 1, 1600)) A = [20, 5, 16, 40] (stride (200, 40, 4000, 1)) dim = 3 1.096 -> 1.099 ( +0.27%) [ +0.27% +0.00% +0.09% / +0.27% +0.64% +0.46%] index_select const : Elapsed 0.011 ms (1.099 ms / 100) 1.100 -> 1.103 ( +0.27%) [ +0.00% +0.09% +0.00% / +0.27% +1.00% +0.64%] index_select wrap : Elapsed 0.011 ms (1.100 ms / 100) 1.098 -> 1.100 ( +0.18%) [ +0.09% +0.00% +0.00% / +0.18% +0.46% +0.55%] index_select linear : Elapsed 0.011 ms (1.099 ms / 100) 1.101 -> 1.100 ( -0.09%) [ +0.09% +0.18% +0.00% / -0.09% +0.45% +0.64%] index_select reverse : Elapsed 0.011 ms (1.102 ms / 100) 1.099 -> 1.099 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.55% +0.36%] index_select skip64 : Elapsed 0.011 ms (1.100 ms / 100) 1.098 -> 1.100 ( +0.18%) [ +0.00% +0.18% +0.09% / +0.18% +0.36% +0.64%] index_select skip256 : Elapsed 0.011 ms (1.098 ms / 100) 1.098 -> 1.100 ( +0.18%) [ +0.18% +0.00% +0.18% / +0.18% +0.73% +0.64%] index_select spread : Elapsed 0.011 ms (1.100 ms / 100) 1.099 -> 1.099 ( +0.00%) [ +0.36% +0.09% +0.00% / +0.00% +0.55% +0.45%] index_select strided 3 : Elapsed 0.011 ms (1.103 ms / 100) 1.099 -> 1.104 ( +0.45%) [ +0.00% +0.09% +0.27% / +0.45% +0.82% +0.64%] index_select strided 5 : Elapsed 0.011 ms (1.099 ms / 100) 1.098 -> 1.099 ( +0.09%) [ +0.18% +0.18% +0.00% / +0.09% +0.36% +0.55%] index_select strided 7 : Elapsed 0.011 ms (1.100 ms / 100) 1.099 -> 1.100 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.45% +0.36%] index_select strided 8 : Elapsed 0.011 ms (1.099 ms / 100) 1.100 -> 1.101 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.45% +0.09%] index_select strided 16 : Elapsed 0.011 ms (1.100 ms / 100) 1.101 -> 1.103 ( +0.18%) [ +0.00% +0.09% +0.09% / +0.18% +0.54% +0.36%] index_select random : Elapsed 0.011 ms (1.101 ms / 100) 1.101 -> 1.102 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.73% +0.45%] index_select random_sorted : Elapsed 0.011 ms (1.102 ms / 100) 1.103 -> 1.104 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.45% +0.18%] index_select perm : Elapsed 0.011 ms (1.103 ms / 100) 1.102 -> 1.102 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.64% +0.45%] index_select perm_sorted : Elapsed 0.011 ms (1.103 ms / 100) out_shape = [4, 5, 40, 16] in_shape = [20, 5, 40, 16] idx_dim = 0 B = [4, 5, 40, 16] (stride (3200, 640, 1, 40)) A = [20, 5, 40, 16] (stride (1, 12800, 320, 20)) dim = 0 1.943 -> 1.947 ( +0.21%) [ +0.26% +0.10% +0.00% / +0.21% +1.03% +1.03%] index_select const : Elapsed 0.019 ms (1.948 ms / 100) 1.926 -> 1.934 ( +0.42%) [ +0.00% +0.16% +0.10% / +0.42% +0.52% +0.62%] index_select wrap : Elapsed 0.019 ms (1.926 ms / 100) 1.906 -> 1.914 ( +0.42%) [ +0.26% +0.00% +0.21% / +0.42% +1.15% +2.10%] index_select linear : Elapsed 0.019 ms (1.911 ms / 100) 1.947 -> 1.948 ( +0.05%) [ +0.21% +0.00% +0.05% / +0.05% +0.98% +0.82%] index_select reverse : Elapsed 0.020 ms (1.951 ms / 100) 1.930 -> 1.940 ( +0.52%) [ +0.00% +0.00% +0.47% / +0.52% +0.57% +0.98%] index_select skip64 : Elapsed 0.019 ms (1.930 ms / 100) 1.945 -> 1.949 ( +0.21%) [ +0.15% +0.05% +0.00% / +0.21% +1.39% +0.82%] index_select skip256 : Elapsed 0.019 ms (1.948 ms / 100) 1.971 -> 1.982 ( +0.56%) [ +0.00% +0.00% +0.41% / +0.56% +1.47% +1.22%] index_select spread : Elapsed 0.020 ms (1.971 ms / 100) 1.957 -> 1.961 ( +0.20%) [ +0.10% +0.00% +0.00% / +0.20% +0.46% +0.46%] index_select strided 3 : Elapsed 0.020 ms (1.959 ms / 100) 1.956 -> 1.958 ( +0.10%) [ +0.31% +0.00% +0.26% / +0.10% +1.12% +1.23%] index_select strided 5 : Elapsed 0.020 ms (1.962 ms / 100) 1.958 -> 1.968 ( +0.51%) [ +0.46% +0.56% +0.00% / +0.51% +1.17% +1.53%] index_select strided 7 : Elapsed 0.020 ms (1.967 ms / 100) 1.975 -> 1.978 ( +0.15%) [ +0.05% +0.00% +0.30% / +0.15% +1.16% +1.11%] index_select strided 8 : Elapsed 0.020 ms (1.976 ms / 100) 1.953 -> 1.959 ( +0.31%) [ +0.36% +0.41% +0.00% / +0.31% +1.43% +1.74%] index_select strided 16 : Elapsed 0.020 ms (1.960 ms / 100) 1.947 -> 1.944 ( -0.15%) [ +0.00% +0.05% +0.00% / -0.15% +0.46% +0.56%] index_select random : Elapsed 0.019 ms (1.947 ms / 100) 1.940 -> 1.943 ( +0.15%) [ +0.52% +0.21% +0.00% / +0.15% +0.77% +0.62%] index_select random_sorted : Elapsed 0.019 ms (1.950 ms / 100) 1.944 -> 1.944 ( +0.00%) [ +0.10% +0.00% +0.10% / +0.00% +1.34% +1.03%] index_select perm : Elapsed 0.019 ms (1.946 ms / 100) 1.977 -> 1.975 ( -0.10%) [ +0.05% +0.00% +0.20% / -0.10% +1.32% +1.32%] index_select perm_sorted : Elapsed 0.020 ms (1.978 ms / 100) B = [4, 5, 40, 16] (stride (3200, 16, 80, 1)) A = [20, 5, 40, 16] (stride (640, 12800, 1, 40)) dim = 0 2.081 -> 2.083 ( +0.10%) [ +0.34% +0.14% +0.00% / +0.10% +0.62% +0.53%] index_select const : Elapsed 0.021 ms (2.088 ms / 100) 2.080 -> 2.085 ( +0.24%) [ +0.05% +0.10% +0.00% / +0.24% +0.48% +0.34%] index_select wrap : Elapsed 0.021 ms (2.081 ms / 100) 2.090 -> 2.094 ( +0.19%) [ +0.19% +0.10% +0.00% / +0.19% +0.33% +0.19%] index_select linear : Elapsed 0.021 ms (2.094 ms / 100) 2.091 -> 2.095 ( +0.19%) [ +0.00% +0.00% +0.29% / +0.19% +0.72% +0.62%] index_select reverse : Elapsed 0.021 ms (2.091 ms / 100) 2.085 -> 2.090 ( +0.24%) [ +0.19% +0.00% +0.05% / +0.24% +0.53% +0.53%] index_select skip64 : Elapsed 0.021 ms (2.089 ms / 100) 2.083 -> 2.085 ( +0.10%) [ +0.24% +0.00% +0.19% / +0.10% +0.34% +0.34%] index_select skip256 : Elapsed 0.021 ms (2.088 ms / 100) 2.084 -> 2.083 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.14% +0.10%] index_select spread : Elapsed 0.021 ms (2.085 ms / 100) 2.095 -> 2.092 ( -0.14%) [ +0.05% +0.00% +0.00% / -0.14% +0.29% +0.29%] index_select strided 3 : Elapsed 0.021 ms (2.096 ms / 100) 2.090 -> 2.091 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.33% +0.19%] index_select strided 5 : Elapsed 0.021 ms (2.090 ms / 100) 2.091 -> 2.089 ( -0.10%) [ +0.19% +0.05% +0.00% / +0.29% -0.05% -0.10%] index_select strided 7 : Elapsed 0.021 ms (2.095 ms / 100) 2.093 -> 2.096 ( +0.14%) [ +0.19% +0.05% +0.00% / +0.14% +0.14% +0.43%] index_select strided 8 : Elapsed 0.021 ms (2.097 ms / 100) 2.089 -> 2.091 ( +0.10%) [ +0.00% +0.05% +0.10% / +0.14% +0.14% +0.10%] index_select strided 16 : Elapsed 0.021 ms (2.089 ms / 100) 2.073 -> 2.075 ( +0.10%) [ +0.00% +0.24% +0.24% / +0.10% +0.96% +1.06%] index_select random : Elapsed 0.021 ms (2.073 ms / 100) 2.076 -> 2.076 ( +0.00%) [ +0.14% +0.00% +0.10% / +0.00% +0.72% +0.77%] index_select random_sorted : Elapsed 0.021 ms (2.079 ms / 100) 2.092 -> 2.091 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.19% +0.29%] index_select perm : Elapsed 0.021 ms (2.093 ms / 100) 2.087 -> 2.091 ( +0.19%) [ +0.14% +0.05% +0.00% / +0.19% +0.34% +0.38%] index_select perm_sorted : Elapsed 0.021 ms (2.090 ms / 100) B = [4, 5, 40, 16] (stride (3200, 40, 1, 200)) A = [20, 5, 40, 16] (stride (1, 320, 1600, 20)) dim = 0 2.029 -> 2.026 ( -0.15%) [ +0.25% +0.00% +0.05% / -0.15% +0.49% +0.25%] index_select const : Elapsed 0.020 ms (2.034 ms / 100) 2.023 -> 2.023 ( +0.00%) [ +0.00% +0.20% +0.00% / +0.10% +0.00% +0.20%] index_select wrap : Elapsed 0.020 ms (2.023 ms / 100) 2.022 -> 2.025 ( +0.15%) [ +0.15% +0.00% +0.05% / +0.15% +0.30% +0.35%] index_select linear : Elapsed 0.020 ms (2.025 ms / 100) 2.025 -> 2.028 ( +0.15%) [ +0.00% +0.40% +0.15% / +0.20% +0.15% +0.74%] index_select reverse : Elapsed 0.020 ms (2.025 ms / 100) 2.037 -> 2.039 ( +0.10%) [ +0.00% +0.10% +0.15% / +0.29% +0.25% +0.10%] index_select skip64 : Elapsed 0.020 ms (2.037 ms / 100) 2.040 -> 2.040 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.00% +0.05%] index_select skip256 : Elapsed 0.020 ms (2.043 ms / 100) 2.084 -> 2.085 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.29% +0.24%] index_select spread : Elapsed 0.021 ms (2.085 ms / 100) 2.056 -> 2.060 ( +0.19%) [ +0.00% +0.15% +0.24% / +0.19% +0.29% +0.34%] index_select strided 3 : Elapsed 0.021 ms (2.056 ms / 100) 2.078 -> 2.084 ( +0.29%) [ +0.05% +0.00% +0.10% / +0.29% +0.34% +0.43%] index_select strided 5 : Elapsed 0.021 ms (2.079 ms / 100) 2.064 -> 2.066 ( +0.10%) [ +0.24% +0.29% +0.00% / +0.10% +0.68% +0.63%] index_select strided 7 : Elapsed 0.021 ms (2.069 ms / 100) 2.075 -> 2.076 ( +0.05%) [ +0.19% +0.24% +0.00% / +0.34% +0.05% +0.10%] index_select strided 8 : Elapsed 0.021 ms (2.079 ms / 100) 2.076 -> 2.082 ( +0.29%) [ +0.24% +0.19% +0.00% / +0.43% +0.29% +0.48%] index_select strided 16 : Elapsed 0.021 ms (2.081 ms / 100) 2.059 -> 2.059 ( +0.00%) [ +0.00% +0.39% +0.19% / +0.00% +0.15% +0.15%] index_select random : Elapsed 0.021 ms (2.059 ms / 100) 2.059 -> 2.061 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.29% +0.10%] index_select random_sorted : Elapsed 0.021 ms (2.060 ms / 100) 2.061 -> 2.060 ( -0.05%) [ +0.05% +0.10% +0.00% / -0.05% +0.24% +0.19%] index_select perm : Elapsed 0.021 ms (2.062 ms / 100) 2.065 -> 2.069 ( +0.19%) [ +0.10% +0.00% +0.15% / +0.19% +0.63% +0.39%] index_select perm_sorted : Elapsed 0.021 ms (2.067 ms / 100) B = [4, 5, 40, 16] (stride (16, 2560, 64, 1)) A = [20, 5, 40, 16] (stride (640, 12800, 1, 40)) dim = 0 1.976 -> 1.977 ( +0.05%) [ +0.20% +0.25% +0.00% / +0.05% +0.56% +0.40%] index_select const : Elapsed 0.020 ms (1.980 ms / 100) 1.977 -> 1.979 ( +0.10%) [ +0.25% +0.00% +0.10% / +0.20% +0.35% +0.10%] index_select wrap : Elapsed 0.020 ms (1.982 ms / 100) 1.981 -> 1.982 ( +0.05%) [ +0.00% +0.15% +0.05% / +0.05% +0.35% +0.20%] index_select linear : Elapsed 0.020 ms (1.981 ms / 100) 1.984 -> 1.982 ( -0.10%) [ +0.05% +0.00% +0.10% / -0.10% +0.66% +0.25%] index_select reverse : Elapsed 0.020 ms (1.985 ms / 100) 1.979 -> 1.977 ( -0.10%) [ +0.00% +0.00% +0.00% / -0.10% +0.30% +0.20%] index_select skip64 : Elapsed 0.020 ms (1.979 ms / 100) 1.974 -> 1.974 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.10% +0.20%] index_select skip256 : Elapsed 0.020 ms (1.975 ms / 100) 1.978 -> 1.980 ( +0.10%) [ +0.15% +0.00% +0.20% / +0.10% +0.30% +0.35%] index_select spread : Elapsed 0.020 ms (1.981 ms / 100) 1.986 -> 1.988 ( +0.10%) [ +0.00% +0.05% +0.10% / +0.10% +0.30% +0.20%] index_select strided 3 : Elapsed 0.020 ms (1.986 ms / 100) 1.986 -> 1.985 ( -0.05%) [ +0.20% +0.10% +0.00% / +0.05% -0.05% +0.25%] index_select strided 5 : Elapsed 0.020 ms (1.990 ms / 100) 1.987 -> 1.984 ( -0.15%) [ +0.10% +0.00% +0.05% / +0.05% -0.15% -0.05%] index_select strided 7 : Elapsed 0.020 ms (1.989 ms / 100) 1.993 -> 1.992 ( -0.05%) [ +0.00% +0.25% +0.05% / -0.05% +0.20% +0.00%] index_select strided 8 : Elapsed 0.020 ms (1.993 ms / 100) 1.981 -> 1.978 ( -0.15%) [ +0.25% +0.00% +0.10% / +0.10% -0.15% +0.10%] index_select strided 16 : Elapsed 0.020 ms (1.986 ms / 100) 1.974 -> 1.973 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.61% +0.61%] index_select random : Elapsed 0.020 ms (1.975 ms / 100) 1.973 -> 1.977 ( +0.20%) [ +0.15% +0.10% +0.00% / +0.20% +1.06% +1.12%] index_select random_sorted : Elapsed 0.020 ms (1.976 ms / 100) 1.982 -> 1.986 ( +0.20%) [ +0.15% +0.00% +0.10% / +0.25% +0.30% +0.20%] index_select perm : Elapsed 0.020 ms (1.985 ms / 100) 1.988 -> 1.983 ( -0.25%) [ +0.10% +0.00% +0.20% / -0.10% -0.25% -0.25%] index_select perm_sorted : Elapsed 0.020 ms (1.990 ms / 100) B = [4, 5, 40, 16] (stride (1, 2560, 4, 160)) dim = 0 fill_cnt = 20 3.743 -> 3.717 ( -0.69%) [ +0.00% +0.05% +0.08% / -0.59% -0.69% -0.64%] index_fill_ const : Elapsed 0.037 ms (3.743 ms / 100) 3.735 -> 3.711 ( -0.64%) [ +0.00% +0.16% +0.03% / -0.62% -0.64% -0.62%] index_fill_ linear : Elapsed 0.037 ms (3.735 ms / 100) 3.719 -> 3.704 ( -0.40%) [ +0.16% +0.00% +0.43% / -0.32% -0.40% -0.35%] index_fill_ reverse : Elapsed 0.037 ms (3.725 ms / 100) 3.739 -> 3.711 ( -0.75%) [ +0.00% +0.16% +0.16% / -0.51% -0.75% -0.48%] index_fill_ skip64 : Elapsed 0.037 ms (3.739 ms / 100) 3.735 -> 3.719 ( -0.43%) [ +0.46% +0.00% +0.16% / -0.35% -0.37% -0.43%] index_fill_ skip256 : Elapsed 0.038 ms (3.752 ms / 100) 3.721 -> 3.700 ( -0.56%) [ +0.08% +0.05% +0.00% / -0.54% -0.56% -0.46%] index_fill_ spread : Elapsed 0.037 ms (3.724 ms / 100) 3.716 -> 3.699 ( -0.46%) [ +0.16% +0.00% +0.11% / -0.46% -0.43% -0.46%] index_fill_ strided 3 : Elapsed 0.037 ms (3.722 ms / 100) 3.725 -> 3.706 ( -0.51%) [ +0.00% +0.03% +0.05% / -0.43% -0.51% -0.38%] index_fill_ random : Elapsed 0.037 ms (3.725 ms / 100) 3.727 -> 3.709 ( -0.48%) [ +0.00% +0.05% +0.00% / -0.48% -0.35% -0.40%] index_fill_ random_sorted : Elapsed 0.037 ms (3.727 ms / 100) B = [4, 5, 40, 16] (stride (200, 40, 1, 800)) A = [20, 5, 40, 16] (stride (200, 1, 5, 4000)) dim = 0 2.015 -> 2.013 ( -0.10%) [ +0.05% +0.00% +0.05% / +0.05% +0.00% -0.10%] index_select const : Elapsed 0.020 ms (2.016 ms / 100) 2.002 -> 2.005 ( +0.15%) [ +0.00% +0.35% +0.40% / +0.20% +0.15% +0.20%] index_select wrap : Elapsed 0.020 ms (2.002 ms / 100) 2.005 -> 2.006 ( +0.05%) [ +0.20% +0.10% +0.00% / +0.10% +0.05% +0.20%] index_select linear : Elapsed 0.020 ms (2.009 ms / 100) 2.012 -> 2.007 ( -0.25%) [ +0.00% +0.40% +0.00% / -0.25% +0.20% -0.15%] index_select reverse : Elapsed 0.020 ms (2.012 ms / 100) 2.015 -> 2.011 ( -0.20%) [ +0.20% +0.00% +0.15% / +0.10% -0.10% -0.20%] index_select skip64 : Elapsed 0.020 ms (2.019 ms / 100) 2.013 -> 2.013 ( +0.00%) [ +0.20% +0.00% +0.15% / +0.30% +0.00% +0.05%] index_select skip256 : Elapsed 0.020 ms (2.017 ms / 100) 2.002 -> 2.003 ( +0.05%) [ +0.00% +0.00% +0.20% / +0.15% +0.05% +0.20%] index_select spread : Elapsed 0.020 ms (2.002 ms / 100) 2.010 -> 2.009 ( -0.05%) [ +0.00% +0.00% +0.10% / +0.25% +0.00% -0.05%] index_select strided 3 : Elapsed 0.020 ms (2.010 ms / 100) 2.002 -> 2.001 ( -0.05%) [ +0.25% +0.00% +0.05% / +0.10% +0.00% -0.05%] index_select strided 5 : Elapsed 0.020 ms (2.007 ms / 100) 2.008 -> 2.007 ( -0.05%) [ +0.00% +0.10% +0.20% / +0.15% +0.05% -0.05%] index_select strided 7 : Elapsed 0.020 ms (2.008 ms / 100) 2.005 -> 2.003 ( -0.10%) [ +0.25% +0.40% +0.00% / +0.05% -0.05% -0.10%] index_select strided 8 : Elapsed 0.020 ms (2.010 ms / 100) 2.003 -> 2.002 ( -0.05%) [ +0.00% +0.15% +0.25% / +0.40% -0.05% +0.00%] index_select strided 16 : Elapsed 0.020 ms (2.003 ms / 100) 2.017 -> 2.011 ( -0.30%) [ +0.00% +0.10% +0.00% / -0.25% -0.30% -0.20%] index_select random : Elapsed 0.020 ms (2.017 ms / 100) 2.002 -> 2.006 ( +0.20%) [ +0.15% +0.00% +0.30% / +0.25% +0.30% +0.20%] index_select random_sorted : Elapsed 0.020 ms (2.005 ms / 100) 2.002 -> 2.001 ( -0.05%) [ +0.00% +0.20% +0.05% / +0.20% -0.05% +0.00%] index_select perm : Elapsed 0.020 ms (2.002 ms / 100) 2.005 -> 2.003 ( -0.10%) [ +0.05% +0.00% +0.10% / +0.10% -0.10% +0.10%] index_select perm_sorted : Elapsed 0.020 ms (2.006 ms / 100) B = [4, 5, 40, 16] (stride (1, 160, 4, 800)) A = [20, 5, 40, 16] (stride (3200, 1, 5, 200)) dim = 0 2.006 -> 2.008 ( +0.10%) [ +0.30% +0.15% +0.00% / +0.10% +0.45% +0.30%] index_select const : Elapsed 0.020 ms (2.012 ms / 100) 2.005 -> 2.009 ( +0.20%) [ +0.10% +0.00% +0.25% / +0.20% +0.60% +0.25%] index_select wrap : Elapsed 0.020 ms (2.007 ms / 100) 2.006 -> 2.007 ( +0.05%) [ +0.10% +0.00% +0.15% / +0.05% +0.05% +0.45%] index_select linear : Elapsed 0.020 ms (2.008 ms / 100) 2.006 -> 2.007 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.15% +0.30%] index_select reverse : Elapsed 0.020 ms (2.006 ms / 100) 2.005 -> 2.007 ( +0.10%) [ +0.35% +0.00% +0.15% / +0.10% +0.55% +0.40%] index_select skip64 : Elapsed 0.020 ms (2.012 ms / 100) 2.006 -> 2.007 ( +0.05%) [ +0.00% +0.15% +0.15% / +0.05% +0.20% +0.25%] index_select skip256 : Elapsed 0.020 ms (2.006 ms / 100) 2.007 -> 2.005 ( -0.10%) [ +0.20% +0.30% +0.00% / -0.10% +0.30% +0.45%] index_select spread : Elapsed 0.020 ms (2.011 ms / 100) 2.015 -> 2.013 ( -0.10%) [ +0.10% +0.00% +0.05% / +0.00% +0.00% -0.10%] index_select strided 3 : Elapsed 0.020 ms (2.017 ms / 100) 2.013 -> 2.016 ( +0.15%) [ +0.05% +0.00% +0.20% / +0.15% +0.20% +0.30%] index_select strided 5 : Elapsed 0.020 ms (2.014 ms / 100) 2.005 -> 2.010 ( +0.25%) [ +0.05% +0.15% +0.00% / +0.25% +0.60% +0.30%] index_select strided 7 : Elapsed 0.020 ms (2.006 ms / 100) 2.009 -> 2.014 ( +0.25%) [ +0.15% +0.20% +0.00% / +0.25% +0.55% +0.55%] index_select strided 8 : Elapsed 0.020 ms (2.012 ms / 100) 2.001 -> 2.009 ( +0.40%) [ +0.10% +0.35% +0.00% / +0.40% +0.45% +0.50%] index_select strided 16 : Elapsed 0.020 ms (2.003 ms / 100) 2.009 -> 2.015 ( +0.30%) [ +0.30% +0.10% +0.00% / +0.30% +0.45% +0.40%] index_select random : Elapsed 0.020 ms (2.015 ms / 100) 2.008 -> 2.012 ( +0.20%) [ +0.20% +0.25% +0.00% / +0.20% +0.45% +0.45%] index_select random_sorted : Elapsed 0.020 ms (2.012 ms / 100) 2.008 -> 2.015 ( +0.35%) [ +0.25% +0.45% +0.00% / +0.45% +0.75% +0.35%] index_select perm : Elapsed 0.020 ms (2.013 ms / 100) 2.009 -> 2.005 ( -0.20%) [ +0.05% +0.15% +0.00% / -0.20% +0.20% +0.05%] index_select perm_sorted : Elapsed 0.020 ms (2.010 ms / 100) B = [4, 5, 40, 16] (stride (1, 4, 20, 800)) A = [20, 5, 40, 16] (stride (3200, 40, 1, 200)) dim = 0 2.021 -> 2.014 ( -0.35%) [ +0.00% +0.10% +0.00% / -0.15% -0.35% -0.30%] index_select const : Elapsed 0.020 ms (2.021 ms / 100) 2.045 -> 2.044 ( -0.05%) [ +0.05% +0.00% +0.10% / -0.05% +0.39% +0.24%] index_select wrap : Elapsed 0.020 ms (2.046 ms / 100) 2.049 -> 2.053 ( +0.20%) [ +0.24% +0.00% +0.10% / +0.29% +0.29% +0.20%] index_select linear : Elapsed 0.021 ms (2.054 ms / 100) 2.059 -> 2.059 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.00% +0.15% +0.00%] index_select reverse : Elapsed 0.021 ms (2.061 ms / 100) 2.018 -> 2.014 ( -0.20%) [ +0.00% +0.00% +0.00% / +0.10% -0.20% -0.15%] index_select skip64 : Elapsed 0.020 ms (2.018 ms / 100) 2.017 -> 2.017 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.05% +0.10%] index_select skip256 : Elapsed 0.020 ms (2.019 ms / 100) 2.036 -> 2.040 ( +0.20%) [ +0.20% +0.15% +0.00% / +0.20% +0.54% +0.49%] index_select spread : Elapsed 0.020 ms (2.040 ms / 100) 2.049 -> 2.049 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.10% +0.20%] index_select strided 3 : Elapsed 0.021 ms (2.050 ms / 100) 2.038 -> 2.038 ( +0.00%) [ +0.25% +0.20% +0.00% / +0.00% +0.54% +0.44%] index_select strided 5 : Elapsed 0.020 ms (2.043 ms / 100) 2.035 -> 2.038 ( +0.15%) [ +0.15% +0.05% +0.00% / +0.15% +0.59% +0.59%] index_select strided 7 : Elapsed 0.020 ms (2.038 ms / 100) 2.037 -> 2.040 ( +0.15%) [ +0.15% +0.05% +0.00% / +0.15% +0.20% +0.34%] index_select strided 8 : Elapsed 0.020 ms (2.040 ms / 100) 2.053 -> 2.055 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.19% +0.15% +0.10%] index_select strided 16 : Elapsed 0.021 ms (2.055 ms / 100) 2.039 -> 2.042 ( +0.15%) [ +0.00% +0.05% +0.05% / +0.20% +0.15% +0.20%] index_select random : Elapsed 0.020 ms (2.039 ms / 100) 2.045 -> 2.047 ( +0.10%) [ +0.20% +0.15% +0.00% / +0.10% +0.44% +0.34%] index_select random_sorted : Elapsed 0.020 ms (2.049 ms / 100) 2.053 -> 2.047 ( -0.29%) [ +0.19% +0.29% +0.00% / +0.00% -0.29% +0.10%] index_select perm : Elapsed 0.021 ms (2.057 ms / 100) 2.049 -> 2.048 ( -0.05%) [ +0.00% +0.20% +0.24% / -0.05% +0.00% +0.00%] index_select perm_sorted : Elapsed 0.020 ms (2.049 ms / 100) out_shape = [20, 4, 40, 16] in_shape = [20, 5, 40, 16] idx_dim = 1 B = [20, 4, 40, 16] (stride (2560, 640, 1, 40)) A = [20, 5, 40, 16] (stride (200, 40, 1, 4000)) dim = 1 5.764 -> 5.762 ( -0.03%) [ +0.12% +0.07% +0.00% / +0.05% +0.10% -0.03%] index_select const : Elapsed 0.058 ms (5.771 ms / 100) 5.835 -> 5.840 ( +0.09%) [ +0.15% +0.00% +0.21% / +0.10% +0.09% +0.17%] index_select wrap : Elapsed 0.058 ms (5.844 ms / 100) 5.834 -> 5.830 ( -0.07%) [ +0.29% +0.00% +0.17% / +0.19% -0.07% +0.10%] index_select linear : Elapsed 0.059 ms (5.851 ms / 100) 5.836 -> 5.838 ( +0.03%) [ +0.12% +0.00% +0.17% / +0.27% +0.03% +0.09%] index_select reverse : Elapsed 0.058 ms (5.843 ms / 100) 5.768 -> 5.764 ( -0.07%) [ +0.02% +0.00% +0.07% / +0.07% -0.07% -0.03%] index_select skip64 : Elapsed 0.058 ms (5.769 ms / 100) 5.763 -> 5.761 ( -0.03%) [ +0.02% +0.00% +0.35% / +0.26% -0.03% +0.02%] index_select skip256 : Elapsed 0.058 ms (5.764 ms / 100) 5.846 -> 5.833 ( -0.22%) [ +0.00% +0.09% +0.05% / +0.02% -0.22% -0.21%] index_select spread : Elapsed 0.058 ms (5.846 ms / 100) 5.844 -> 5.840 ( -0.07%) [ +0.02% +0.00% +0.02% / +0.12% -0.07% +0.05%] index_select strided 3 : Elapsed 0.058 ms (5.845 ms / 100) 5.849 -> 5.835 ( -0.24%) [ +0.00% +0.02% +0.00% / +0.03% -0.12% -0.24%] index_select random : Elapsed 0.058 ms (5.849 ms / 100) 5.840 -> 5.835 ( -0.09%) [ +0.00% +0.07% +0.17% / +0.07% -0.02% -0.09%] index_select random_sorted : Elapsed 0.058 ms (5.840 ms / 100) 5.843 -> 5.836 ( -0.12%) [ +0.00% +0.09% +0.07% / -0.03% -0.03% -0.12%] index_select perm : Elapsed 0.058 ms (5.843 ms / 100) 5.841 -> 5.841 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.09% +0.00% +0.07%] index_select perm_sorted : Elapsed 0.058 ms (5.849 ms / 100) B = [20, 4, 40, 16] (stride (2560, 640, 1, 40)) A = [20, 5, 40, 16] (stride (1, 20, 100, 4000)) dim = 1 5.787 -> 5.790 ( +0.05%) [ +0.00% +0.03% +0.05% / +0.05% +0.14% +0.07%] index_select const : Elapsed 0.058 ms (5.787 ms / 100) 5.847 -> 5.846 ( -0.02%) [ +0.00% +0.02% +0.26% / +0.15% -0.02% +0.00%] index_select wrap : Elapsed 0.058 ms (5.847 ms / 100) 5.846 -> 5.836 ( -0.17%) [ +0.00% +0.09% +0.17% / +0.14% -0.17% +0.02%] index_select linear : Elapsed 0.058 ms (5.846 ms / 100) 5.849 -> 5.838 ( -0.19%) [ +0.10% +0.00% +0.10% / +0.10% -0.19% -0.14%] index_select reverse : Elapsed 0.059 ms (5.855 ms / 100) 5.792 -> 5.789 ( -0.05%) [ +0.00% +0.05% +0.05% / +0.19% -0.05% -0.03%] index_select skip64 : Elapsed 0.058 ms (5.792 ms / 100) 5.792 -> 5.788 ( -0.07%) [ +0.02% +0.00% +0.07% / +0.00% -0.07% +0.05%] index_select skip256 : Elapsed 0.058 ms (5.793 ms / 100) 5.855 -> 5.841 ( -0.24%) [ +0.03% +0.00% +0.20% / +0.07% -0.22% -0.24%] index_select spread : Elapsed 0.059 ms (5.857 ms / 100) 5.861 -> 5.864 ( +0.05%) [ +0.00% +0.15% +0.07% / +0.26% +0.09% +0.05%] index_select strided 3 : Elapsed 0.059 ms (5.861 ms / 100) 5.856 -> 5.852 ( -0.07%) [ +0.17% +0.00% +0.24% / +0.19% +0.03% -0.07%] index_select random : Elapsed 0.059 ms (5.866 ms / 100) 5.852 -> 5.842 ( -0.17%) [ +0.05% +0.00% +0.02% / +0.10% -0.17% -0.07%] index_select random_sorted : Elapsed 0.059 ms (5.855 ms / 100) 5.865 -> 5.860 ( -0.09%) [ +0.02% +0.00% +0.09% / +0.00% -0.09% +0.00%] index_select perm : Elapsed 0.059 ms (5.866 ms / 100) 5.853 -> 5.849 ( -0.07%) [ +0.00% +0.09% +0.07% / +0.14% +0.05% -0.07%] index_select perm_sorted : Elapsed 0.059 ms (5.853 ms / 100) B = [20, 4, 40, 16] (stride (16, 12800, 320, 1)) A = [20, 5, 40, 16] (stride (16, 12800, 320, 1)) dim = 1 5.647 -> 5.660 ( +0.23%) [ +0.00% +0.23% +0.39% / +0.23% +0.51% +0.55%] index_select const : Elapsed 0.056 ms (5.647 ms / 100) 5.777 -> 5.769 ( -0.14%) [ +0.09% +0.00% +0.21% / +0.24% -0.14% -0.05%] index_select wrap : Elapsed 0.058 ms (5.782 ms / 100) 5.775 -> 5.770 ( -0.09%) [ +0.19% +0.12% +0.00% / +0.12% -0.05% -0.09%] index_select linear : Elapsed 0.058 ms (5.786 ms / 100) 5.771 -> 5.757 ( -0.24%) [ +0.16% +0.00% +0.28% / +0.19% -0.17% -0.24%] index_select reverse : Elapsed 0.058 ms (5.780 ms / 100) 5.641 -> 5.652 ( +0.20%) [ +0.14% +0.00% +0.20% / +0.20% +0.25% +0.41%] index_select skip64 : Elapsed 0.056 ms (5.649 ms / 100) 5.640 -> 5.649 ( +0.16%) [ +0.00% +0.12% +0.18% / +0.16% +0.37% +0.53%] index_select skip256 : Elapsed 0.056 ms (5.640 ms / 100) 5.780 -> 5.777 ( -0.05%) [ +0.00% +0.17% +0.19% / +0.03% -0.05% -0.02%] index_select spread : Elapsed 0.058 ms (5.780 ms / 100) 5.764 -> 5.765 ( +0.02%) [ +0.00% +0.23% +0.16% / +0.23% +0.02% +0.03%] index_select strided 3 : Elapsed 0.058 ms (5.764 ms / 100) 5.727 -> 5.719 ( -0.14%) [ +0.00% +0.03% +0.23% / +0.19% -0.14% -0.12%] index_select random : Elapsed 0.057 ms (5.727 ms / 100) 5.725 -> 5.714 ( -0.19%) [ +0.12% +0.00% +0.07% / +0.05% -0.19% -0.10%] index_select random_sorted : Elapsed 0.057 ms (5.732 ms / 100) 5.765 -> 5.767 ( +0.03%) [ +0.00% +0.07% +0.12% / +0.03% +0.17% +0.23%] index_select perm : Elapsed 0.058 ms (5.765 ms / 100) 5.769 -> 5.763 ( -0.10%) [ +0.03% +0.00% +0.26% / +0.07% -0.10% +0.03%] index_select perm_sorted : Elapsed 0.058 ms (5.771 ms / 100) B = [20, 4, 40, 16] (stride (64, 1, 1280, 4)) A = [20, 5, 40, 16] (stride (1, 320, 1600, 20)) dim = 1 5.812 -> 5.787 ( -0.43%) [ +0.00% +0.03% +0.00% / -0.03% -0.43% -0.34%] index_select const : Elapsed 0.058 ms (5.812 ms / 100) 5.861 -> 5.843 ( -0.31%) [ +0.00% +0.02% +0.14% / +0.12% -0.31% -0.24%] index_select wrap : Elapsed 0.059 ms (5.861 ms / 100) 5.859 -> 5.844 ( -0.26%) [ +0.02% +0.00% +0.14% / +0.14% -0.26% -0.26%] index_select linear : Elapsed 0.059 ms (5.860 ms / 100) 5.848 -> 5.847 ( -0.02%) [ +0.15% +0.00% +0.19% / +0.21% -0.02% +0.05%] index_select reverse : Elapsed 0.059 ms (5.857 ms / 100) 5.809 -> 5.786 ( -0.40%) [ +0.00% +0.00% +0.07% / +0.00% -0.26% -0.40%] index_select skip64 : Elapsed 0.058 ms (5.809 ms / 100) 5.808 -> 5.792 ( -0.28%) [ +0.03% +0.02% +0.00% / +0.14% -0.19% -0.28%] index_select skip256 : Elapsed 0.058 ms (5.810 ms / 100) 5.857 -> 5.845 ( -0.20%) [ +0.00% +0.03% +0.22% / +0.24% -0.20% -0.17%] index_select spread : Elapsed 0.059 ms (5.857 ms / 100) 5.853 -> 5.839 ( -0.24%) [ +0.00% +0.09% +0.00% / +0.10% -0.24% -0.22%] index_select strided 3 : Elapsed 0.059 ms (5.853 ms / 100) 5.840 -> 5.828 ( -0.21%) [ +0.09% +0.00% +0.10% / +0.21% -0.21% -0.10%] index_select random : Elapsed 0.058 ms (5.845 ms / 100) 5.845 -> 5.822 ( -0.39%) [ +0.00% +0.12% +0.12% / +0.07% -0.26% -0.39%] index_select random_sorted : Elapsed 0.058 ms (5.845 ms / 100) 5.857 -> 5.844 ( -0.22%) [ +0.00% +0.00% +0.00% / -0.02% -0.22% -0.22%] index_select perm : Elapsed 0.059 ms (5.857 ms / 100) 5.859 -> 5.845 ( -0.24%) [ +0.00% +0.15% +0.05% / +0.00% -0.24% -0.24%] index_select perm_sorted : Elapsed 0.059 ms (5.859 ms / 100) B = [20, 4, 40, 16] (stride (16, 320, 1280, 1)) A = [20, 5, 40, 16] (stride (3200, 40, 1, 200)) dim = 1 5.737 -> 5.725 ( -0.21%) [ +0.10% +0.00% +0.16% / +0.05% -0.17% -0.21%] index_select const : Elapsed 0.057 ms (5.743 ms / 100) 5.827 -> 5.803 ( -0.41%) [ +0.12% +0.00% +0.03% / +0.15% -0.27% -0.41%] index_select wrap : Elapsed 0.058 ms (5.834 ms / 100) 5.822 -> 5.803 ( -0.33%) [ +0.19% +0.00% +0.09% / +0.12% -0.33% -0.29%] index_select linear : Elapsed 0.058 ms (5.833 ms / 100) 5.823 -> 5.803 ( -0.34%) [ +0.09% +0.07% +0.00% / +0.24% -0.34% -0.31%] index_select reverse : Elapsed 0.058 ms (5.828 ms / 100) 5.736 -> 5.724 ( -0.21%) [ +0.02% +0.00% +0.12% / +0.17% -0.07% -0.21%] index_select skip64 : Elapsed 0.057 ms (5.737 ms / 100) 5.738 -> 5.724 ( -0.24%) [ +0.00% +0.00% +0.10% / +0.17% -0.24% -0.21%] index_select skip256 : Elapsed 0.057 ms (5.738 ms / 100) 5.824 -> 5.799 ( -0.43%) [ +0.00% +0.12% +0.09% / +0.22% -0.41% -0.43%] index_select spread : Elapsed 0.058 ms (5.824 ms / 100) 5.836 -> 5.810 ( -0.45%) [ +0.03% +0.03% +0.00% / +0.00% -0.45% -0.45%] index_select strided 3 : Elapsed 0.058 ms (5.838 ms / 100) 5.802 -> 5.783 ( -0.33%) [ +0.00% +0.00% +0.00% / -0.10% -0.31% -0.33%] index_select random : Elapsed 0.058 ms (5.802 ms / 100) 5.796 -> 5.779 ( -0.29%) [ +0.00% +0.05% +0.17% / +0.07% -0.29% -0.28%] index_select random_sorted : Elapsed 0.058 ms (5.796 ms / 100) 5.814 -> 5.806 ( -0.14%) [ +0.15% +0.00% +0.15% / +0.12% -0.12% -0.14%] index_select perm : Elapsed 0.058 ms (5.823 ms / 100) 5.829 -> 5.802 ( -0.46%) [ +0.03% +0.00% +0.17% / +0.12% -0.46% -0.31%] index_select perm_sorted : Elapsed 0.058 ms (5.831 ms / 100) B = [20, 4, 40, 16] (stride (160, 1, 4, 3200)) A = [20, 5, 40, 16] (stride (200, 40, 1, 4000)) dim = 1 5.886 -> 5.876 ( -0.17%) [ +0.03% +0.00% +0.12% / +0.08% -0.17% -0.15%] index_select const : Elapsed 0.059 ms (5.888 ms / 100) 5.966 -> 5.951 ( -0.25%) [ +0.12% +0.00% +0.13% / +0.22% -0.20% -0.25%] index_select wrap : Elapsed 0.060 ms (5.973 ms / 100) 5.971 -> 5.946 ( -0.42%) [ +0.05% +0.03% +0.00% / +0.07% -0.42% -0.27%] index_select linear : Elapsed 0.060 ms (5.974 ms / 100) 5.967 -> 5.949 ( -0.30%) [ +0.00% +0.00% +0.18% / +0.17% -0.15% -0.30%] index_select reverse : Elapsed 0.060 ms (5.967 ms / 100) 5.886 -> 5.878 ( -0.14%) [ +0.10% +0.00% +0.10% / +0.08% -0.12% -0.14%] index_select skip64 : Elapsed 0.059 ms (5.892 ms / 100) 5.885 -> 5.877 ( -0.14%) [ +0.14% +0.00% +0.19% / +0.19% -0.14% -0.12%] index_select skip256 : Elapsed 0.059 ms (5.893 ms / 100) 5.968 -> 5.946 ( -0.37%) [ +0.00% +0.03% +0.12% / +0.07% -0.28% -0.37%] index_select spread : Elapsed 0.060 ms (5.968 ms / 100) 5.980 -> 5.948 ( -0.54%) [ +0.10% +0.08% +0.00% / -0.02% -0.35% -0.54%] index_select strided 3 : Elapsed 0.060 ms (5.986 ms / 100) 5.960 -> 5.928 ( -0.54%) [ +0.05% +0.00% +0.07% / +0.03% -0.45% -0.54%] index_select random : Elapsed 0.060 ms (5.963 ms / 100) 5.949 -> 5.915 ( -0.57%) [ +0.00% +0.05% +0.03% / +0.22% -0.54% -0.57%] index_select random_sorted : Elapsed 0.059 ms (5.949 ms / 100) 5.968 -> 5.952 ( -0.27%) [ +0.00% +0.00% +0.07% / +0.10% -0.27% -0.27%] index_select perm : Elapsed 0.060 ms (5.968 ms / 100) 5.970 -> 5.952 ( -0.30%) [ +0.02% +0.00% +0.13% / +0.08% -0.30% -0.28%] index_select perm_sorted : Elapsed 0.060 ms (5.971 ms / 100) B = [20, 4, 40, 16] (stride (1, 800, 20, 3200)) A = [20, 5, 40, 16] (stride (40, 12800, 1, 800)) dim = 1 5.711 -> 5.710 ( -0.02%) [ +0.05% +0.04% +0.00% / -0.02% +0.37% +0.54%] index_select const : Elapsed 0.057 ms (5.714 ms / 100) 5.783 -> 5.785 ( +0.03%) [ +0.00% +0.09% +0.00% / +0.03% +0.12% +0.17%] index_select wrap : Elapsed 0.058 ms (5.783 ms / 100) 5.785 -> 5.788 ( +0.05%) [ +0.00% +0.00% +0.19% / +0.05% +0.05% +0.16%] index_select linear : Elapsed 0.058 ms (5.785 ms / 100) 5.776 -> 5.780 ( +0.07%) [ +0.00% +0.19% +0.19% / +0.24% +0.07% +0.12%] index_select reverse : Elapsed 0.058 ms (5.776 ms / 100) 5.706 -> 5.706 ( +0.00%) [ +0.00% +0.05% +0.09% / +0.00% +0.49% +0.46%] index_select skip64 : Elapsed 0.057 ms (5.706 ms / 100) 5.711 -> 5.710 ( -0.02%) [ +0.02% +0.00% +0.07% / -0.02% +0.42% +0.28%] index_select skip256 : Elapsed 0.057 ms (5.712 ms / 100) 5.783 -> 5.783 ( +0.00%) [ +0.16% +0.10% +0.00% / +0.00% +0.16% +0.21%] index_select spread : Elapsed 0.058 ms (5.792 ms / 100) 5.775 -> 5.780 ( +0.09%) [ +0.05% +0.00% +0.38% / +0.09% +0.29% +0.38%] index_select strided 3 : Elapsed 0.058 ms (5.778 ms / 100) 5.782 -> 5.770 ( -0.21%) [ +0.00% +0.17% +0.12% / +0.12% -0.03% -0.21%] index_select random : Elapsed 0.058 ms (5.782 ms / 100) 5.773 -> 5.772 ( -0.02%) [ +0.03% +0.00% +0.10% / +0.03% -0.02% -0.02%] index_select random_sorted : Elapsed 0.058 ms (5.775 ms / 100) 5.780 -> 5.788 ( +0.14%) [ +0.10% +0.03% +0.00% / +0.21% +0.14% +0.28%] index_select perm : Elapsed 0.058 ms (5.786 ms / 100) 5.777 -> 5.793 ( +0.28%) [ +0.09% +0.00% +0.16% / +0.28% +0.33% +0.40%] index_select perm_sorted : Elapsed 0.058 ms (5.782 ms / 100) B = [20, 4, 40, 16] (stride (1, 20, 80, 3200)) A = [20, 5, 40, 16] (stride (1, 320, 1600, 20)) dim = 1 5.767 -> 5.758 ( -0.16%) [ +0.03% +0.00% +0.12% / +0.14% -0.16% -0.09%] index_select const : Elapsed 0.058 ms (5.769 ms / 100) 5.811 -> 5.799 ( -0.21%) [ +0.02% +0.00% +0.05% / +0.24% +0.02% -0.21%] index_select wrap : Elapsed 0.058 ms (5.812 ms / 100) 5.817 -> 5.797 ( -0.34%) [ +0.02% +0.00% +0.02% / +0.03% -0.34% -0.22%] index_select linear : Elapsed 0.058 ms (5.818 ms / 100) 5.802 -> 5.800 ( -0.03%) [ +0.05% +0.07% +0.00% / +0.19% -0.03% +0.16%] index_select reverse : Elapsed 0.058 ms (5.805 ms / 100) 5.761 -> 5.754 ( -0.12%) [ +0.19% +0.00% +0.09% / +0.28% -0.12% -0.03%] index_select skip64 : Elapsed 0.058 ms (5.772 ms / 100) 5.764 -> 5.759 ( -0.09%) [ +0.00% +0.02% +0.07% / +0.05% -0.07% -0.09%] index_select skip256 : Elapsed 0.058 ms (5.764 ms / 100) 5.811 -> 5.802 ( -0.15%) [ +0.07% +0.07% +0.00% / -0.03% -0.15% -0.15%] index_select spread : Elapsed 0.058 ms (5.815 ms / 100) 5.805 -> 5.793 ( -0.21%) [ +0.16% +0.00% +0.14% / +0.12% -0.21% -0.10%] index_select strided 3 : Elapsed 0.058 ms (5.814 ms / 100) 5.797 -> 5.785 ( -0.21%) [ +0.05% +0.00% +0.07% / +0.21% -0.21% -0.21%] index_select random : Elapsed 0.058 ms (5.800 ms / 100) 5.787 -> 5.769 ( -0.31%) [ +0.07% +0.00% +0.09% / +0.14% -0.31% -0.21%] index_select random_sorted : Elapsed 0.058 ms (5.791 ms / 100) 5.813 -> 5.801 ( -0.21%) [ +0.00% +0.02% +0.09% / -0.07% -0.15% -0.21%] index_select perm : Elapsed 0.058 ms (5.813 ms / 100) 5.809 -> 5.799 ( -0.17%) [ +0.07% +0.00% +0.19% / +0.03% -0.14% -0.17%] index_select perm_sorted : Elapsed 0.058 ms (5.813 ms / 100) out_shape = [20, 5, 4, 16] in_shape = [20, 5, 40, 16] idx_dim = 2 B = [20, 5, 4, 16] (stride (320, 1, 80, 5)) A = [20, 5, 40, 16] (stride (40, 800, 1, 4000)) dim = 2 1.371 -> 1.374 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.58% +0.51%] index_select const : Elapsed 0.014 ms (1.372 ms / 100) 1.370 -> 1.372 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.66% +0.80%] index_select wrap : Elapsed 0.014 ms (1.372 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.80% +0.66%] index_select linear : Elapsed 0.014 ms (1.368 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.95% +0.80%] index_select reverse : Elapsed 0.014 ms (1.370 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.73% +0.73%] index_select skip64 : Elapsed 0.014 ms (1.369 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.37% +0.07% +0.00% / +0.15% +0.66% +0.66%] index_select skip256 : Elapsed 0.014 ms (1.371 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.95% +1.02%] index_select spread : Elapsed 0.014 ms (1.373 ms / 100) 1.370 -> 1.373 ( +0.22%) [ +0.07% +0.00% +0.07% / +0.22% +0.88% +0.58%] index_select strided 3 : Elapsed 0.014 ms (1.371 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.58% +0.51%] index_select strided 5 : Elapsed 0.014 ms (1.368 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.95% +0.66%] index_select strided 7 : Elapsed 0.014 ms (1.369 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.58% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.374 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.44% +0.36%] index_select strided 16 : Elapsed 0.014 ms (1.370 ms / 100) 1.372 -> 1.372 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.51% +0.36%] index_select random : Elapsed 0.014 ms (1.374 ms / 100) 1.373 -> 1.372 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.51% +0.44%] index_select random_sorted : Elapsed 0.014 ms (1.373 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.51% +0.44%] index_select perm : Elapsed 0.014 ms (1.369 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.22% +0.07% +0.00% / +0.07% +0.59% +0.44%] index_select perm_sorted : Elapsed 0.014 ms (1.369 ms / 100) B = [20, 5, 4, 16] (stride (64, 1280, 16, 1)) A = [20, 5, 40, 16] (stride (3200, 16, 80, 1)) dim = 2 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.31% +0.24%] index_select const : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.39%] index_select wrap : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.63% +0.39%] index_select linear : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.55% +0.39%] index_select skip64 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.39% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.47%] index_select spread : Elapsed 0.013 ms (1.273 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.63% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.55% +0.39%] index_select strided 5 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.39% +0.31%] index_select strided 7 : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.24% +0.08% +0.00% / +0.16% +0.39% +0.39%] index_select strided 8 : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.39% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.39% +0.39%] index_select random : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.24% +0.24%] index_select random_sorted : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.24% +0.16%] index_select perm : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.63% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) B = [20, 5, 4, 16] (stride (16, 1280, 320, 1)) A = [20, 5, 40, 16] (stride (200, 1, 5, 4000)) dim = 2 1.335 -> 1.336 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.75% +0.82%] index_select const : Elapsed 0.013 ms (1.336 ms / 100) 1.336 -> 1.340 ( +0.30%) [ +0.30% +0.00% +0.00% / +0.30% +0.75% +0.60%] index_select wrap : Elapsed 0.013 ms (1.340 ms / 100) 1.339 -> 1.338 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.37% +0.30%] index_select linear : Elapsed 0.013 ms (1.339 ms / 100) 1.336 -> 1.340 ( +0.30%) [ +0.30% +0.07% +0.00% / +0.30% +0.75% +0.75%] index_select reverse : Elapsed 0.013 ms (1.340 ms / 100) 1.336 -> 1.336 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.52% +0.52%] index_select skip64 : Elapsed 0.013 ms (1.338 ms / 100) 1.338 -> 1.340 ( +0.15%) [ +0.00% +0.15% +0.22% / +0.15% +0.37% +0.30%] index_select skip256 : Elapsed 0.013 ms (1.338 ms / 100) 1.328 -> 1.332 ( +0.30%) [ +0.38% +0.00% +0.30% / +0.30% +0.60% +0.68%] index_select spread : Elapsed 0.013 ms (1.333 ms / 100) 1.336 -> 1.337 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.30% +0.37%] index_select strided 3 : Elapsed 0.013 ms (1.336 ms / 100) 1.335 -> 1.335 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.45% +0.37%] index_select strided 5 : Elapsed 0.013 ms (1.337 ms / 100) 1.336 -> 1.334 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.60% +0.52%] index_select strided 7 : Elapsed 0.013 ms (1.336 ms / 100) 1.330 -> 1.332 ( +0.15%) [ +0.08% +0.00% +0.15% / +0.15% +0.53% +0.53%] index_select strided 8 : Elapsed 0.013 ms (1.331 ms / 100) 1.335 -> 1.335 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.37% +0.37%] index_select strided 16 : Elapsed 0.013 ms (1.335 ms / 100) 1.343 -> 1.346 ( +0.22%) [ +0.00% +0.00% +0.15% / +0.22% +0.52% +0.30%] index_select random : Elapsed 0.013 ms (1.343 ms / 100) 1.333 -> 1.335 ( +0.15%) [ +0.15% +0.00% +0.08% / +0.15% +0.60% +0.53%] index_select random_sorted : Elapsed 0.013 ms (1.335 ms / 100) 1.345 -> 1.347 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.59% +0.82%] index_select perm : Elapsed 0.013 ms (1.346 ms / 100) 1.334 -> 1.338 ( +0.30%) [ +0.30% +0.22% +0.00% / +0.30% +0.67% +0.67%] index_select perm_sorted : Elapsed 0.013 ms (1.338 ms / 100) B = [20, 5, 4, 16] (stride (4, 1280, 1, 80)) A = [20, 5, 40, 16] (stride (16, 12800, 320, 1)) dim = 2 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.39% +0.39%] index_select const : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.55% +0.55%] index_select wrap : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_select linear : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.55%] index_select reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.276 ( +0.24%) [ +0.08% +0.08% +0.00% / +0.24% +0.55% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.274 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.24% +0.08% +0.00% / +0.16% +0.63% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_select spread : Elapsed 0.013 ms (1.274 ms / 100) 1.275 -> 1.274 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.47% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.275 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.24% +0.00% +0.00% / +0.00% +0.39% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.278 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.55% +0.47%] index_select random : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.277 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.39% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select perm : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.275 ms / 100) B = [20, 5, 4, 16] (stride (1, 320, 1600, 20)) A = [20, 5, 40, 16] (stride (3200, 16, 80, 1)) dim = 2 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.55% +0.55%] index_select const : Elapsed 0.013 ms (1.273 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select wrap : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.31%] index_select linear : Elapsed 0.013 ms (1.275 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.71% +0.71%] index_select reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.47% +0.39%] index_select skip256 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.31% +0.31%] index_select spread : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.31% +0.39%] index_select strided 3 : Elapsed 0.013 ms (1.275 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.31% +0.31%] index_select strided 5 : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.55% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.47% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.47% +0.55%] index_select random : Elapsed 0.013 ms (1.275 ms / 100) 1.272 -> 1.275 ( +0.24%) [ +0.08% +0.00% +0.00% / +0.24% +0.63% +0.63%] index_select random_sorted : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.24% +0.00% / +0.08% +0.47% +0.55%] index_select perm : Elapsed 0.013 ms (1.274 ms / 100) 1.271 -> 1.273 ( +0.16%) [ +0.24% +0.08% +0.00% / +0.16% +0.79% +0.71%] index_select perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) B = [20, 5, 4, 16] (stride (1, 320, 1600, 20)) A = [20, 5, 40, 16] (stride (1, 20, 100, 4000)) dim = 2 1.366 -> 1.369 ( +0.22%) [ +0.15% +0.07% +0.00% / +0.22% +0.59% +0.66%] index_select const : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.00% +0.07% +0.07% / +0.15% +0.44% +0.29%] index_select wrap : Elapsed 0.014 ms (1.367 ms / 100) 1.364 -> 1.365 ( +0.07%) [ +0.22% +0.00% +0.07% / +0.07% +0.44% +0.44%] index_select linear : Elapsed 0.014 ms (1.367 ms / 100) 1.368 -> 1.372 ( +0.29%) [ +0.00% +0.07% +0.07% / +0.29% +0.44% +0.37%] index_select reverse : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.369 ( +0.22%) [ +0.00% +0.00% +0.07% / +0.22% +0.51% +0.44%] index_select skip64 : Elapsed 0.014 ms (1.366 ms / 100) 1.365 -> 1.367 ( +0.15%) [ +0.15% +0.00% +0.07% / +0.15% +0.66% +0.44%] index_select skip256 : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.66% +0.66%] index_select spread : Elapsed 0.014 ms (1.368 ms / 100) 1.357 -> 1.360 ( +0.22%) [ +0.15% +0.29% +0.00% / +0.22% +0.74% +0.88%] index_select strided 3 : Elapsed 0.014 ms (1.359 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.66% +0.59%] index_select strided 5 : Elapsed 0.014 ms (1.368 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.80% +0.80%] index_select strided 7 : Elapsed 0.014 ms (1.369 ms / 100) 1.372 -> 1.374 ( +0.15%) [ +0.00% +0.07% +0.00% / +0.15% +0.80% +0.87%] index_select strided 8 : Elapsed 0.014 ms (1.372 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.80% +0.66%] index_select strided 16 : Elapsed 0.014 ms (1.367 ms / 100) 1.363 -> 1.365 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.81% +0.66%] index_select random : Elapsed 0.014 ms (1.365 ms / 100) 1.364 -> 1.367 ( +0.22%) [ +0.00% +0.00% +0.07% / +0.22% +0.59% +0.73%] index_select random_sorted : Elapsed 0.014 ms (1.364 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.80% +0.73%] index_select perm : Elapsed 0.014 ms (1.368 ms / 100) 1.371 -> 1.376 ( +0.36%) [ +0.00% +0.29% +0.07% / +0.36% +0.95% +0.95%] index_select perm_sorted : Elapsed 0.014 ms (1.371 ms / 100) B = [20, 5, 4, 16] (stride (20, 1, 5, 400)) A = [20, 5, 40, 16] (stride (5, 1, 1600, 100)) dim = 2 1.230 -> 1.233 ( +0.24%) [ +0.24% +0.16% +0.00% / +0.33% +0.24% +0.24%] index_select const : Elapsed 0.012 ms (1.233 ms / 100) 1.215 -> 1.215 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.49%] index_select wrap : Elapsed 0.012 ms (1.215 ms / 100) 1.207 -> 1.210 ( +0.25%) [ +0.33% +0.17% +0.00% / +0.25% +0.50% +0.50%] index_select linear : Elapsed 0.012 ms (1.211 ms / 100) 1.236 -> 1.237 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.81% +0.57%] index_select reverse : Elapsed 0.012 ms (1.237 ms / 100) 1.227 -> 1.229 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.49% +0.49%] index_select skip64 : Elapsed 0.012 ms (1.229 ms / 100) 1.229 -> 1.228 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.24% +0.49%] index_select skip256 : Elapsed 0.012 ms (1.229 ms / 100) 1.204 -> 1.204 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.66%] index_select spread : Elapsed 0.012 ms (1.204 ms / 100) 1.210 -> 1.212 ( +0.17%) [ +0.08% +0.17% +0.00% / +0.17% +0.58% +0.74%] index_select strided 3 : Elapsed 0.012 ms (1.211 ms / 100) 1.210 -> 1.212 ( +0.17%) [ +0.00% +0.08% +0.00% / +0.17% +0.50% +0.58%] index_select strided 5 : Elapsed 0.012 ms (1.210 ms / 100) 1.220 -> 1.220 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select strided 7 : Elapsed 0.012 ms (1.220 ms / 100) 1.222 -> 1.223 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.41% +0.41%] index_select strided 8 : Elapsed 0.012 ms (1.224 ms / 100) 1.222 -> 1.223 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.33% +0.25%] index_select strided 16 : Elapsed 0.012 ms (1.223 ms / 100) 1.233 -> 1.234 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.49% +0.49%] index_select random : Elapsed 0.012 ms (1.234 ms / 100) 1.228 -> 1.230 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.33% +0.33%] index_select random_sorted : Elapsed 0.012 ms (1.229 ms / 100) 1.237 -> 1.240 ( +0.24%) [ +0.16% +0.16% +0.00% / +0.24% +0.57% +0.57%] index_select perm : Elapsed 0.012 ms (1.239 ms / 100) 1.230 -> 1.231 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.33% +0.33%] index_select perm_sorted : Elapsed 0.012 ms (1.231 ms / 100) B = [20, 5, 4, 16] (stride (5, 1, 100, 400)) A = [20, 5, 40, 16] (stride (40, 800, 1, 4000)) dim = 2 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_select const : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.285 ( +0.39%) [ +0.08% +0.00% +0.00% / +0.55% +0.47% +0.39%] index_select wrap : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.55%] index_select linear : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.47% +0.39%] index_select reverse : Elapsed 0.013 ms (1.281 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.70% +0.63%] index_select skip64 : Elapsed 0.013 ms (1.281 ms / 100) 1.279 -> 1.283 ( +0.31%) [ +0.39% +0.08% +0.00% / +0.31% +0.70% +0.70%] index_select skip256 : Elapsed 0.013 ms (1.284 ms / 100) 1.281 -> 1.280 ( -0.08%) [ +0.00% +0.00% +0.62% / -0.08% +0.62% +0.55%] index_select spread : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.283 -> 1.282 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.47% +0.23%] index_select strided 5 : Elapsed 0.013 ms (1.284 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.55% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.282 -> 1.284 ( +0.16%) [ +0.23% +0.16% +0.00% / +0.16% +0.70% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.285 ms / 100) 1.284 -> 1.283 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.31% +0.31%] index_select strided 16 : Elapsed 0.013 ms (1.284 ms / 100) 1.283 -> 1.283 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select random : Elapsed 0.013 ms (1.284 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.08% +0.08%] index_select random_sorted : Elapsed 0.013 ms (1.287 ms / 100) 1.285 -> 1.286 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.39% +0.47%] index_select perm : Elapsed 0.013 ms (1.287 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_select perm_sorted : Elapsed 0.013 ms (1.280 ms / 100) out_shape = [20, 5, 40, 4] in_shape = [20, 5, 40, 16] idx_dim = 3 B = [20, 5, 40, 4] (stride (160, 3200, 4, 1)) A = [20, 5, 40, 16] (stride (1, 20, 100, 4000)) dim = 3 2.544 -> 2.548 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.39% +0.31%] index_select const : Elapsed 0.025 ms (2.548 ms / 100) 2.548 -> 2.547 ( -0.04%) [ +0.00% +0.16% +0.08% / -0.04% +0.43% +0.55%] index_select wrap : Elapsed 0.025 ms (2.548 ms / 100) 2.548 -> 2.553 ( +0.20%) [ +0.20% +0.00% +0.00% / +0.20% +0.55% +0.55%] index_select linear : Elapsed 0.026 ms (2.553 ms / 100) 2.547 -> 2.553 ( +0.24%) [ +0.24% +0.08% +0.00% / +0.24% +0.47% +0.67%] index_select reverse : Elapsed 0.026 ms (2.553 ms / 100) 2.545 -> 2.545 ( +0.00%) [ +0.00% +0.00% +0.24% / +0.00% +0.28% +0.28%] index_select skip64 : Elapsed 0.025 ms (2.545 ms / 100) 2.548 -> 2.545 ( -0.12%) [ +0.08% +0.00% +0.20% / -0.12% +0.24% +0.08%] index_select skip256 : Elapsed 0.026 ms (2.550 ms / 100) 2.551 -> 2.551 ( +0.00%) [ +0.12% +0.00% +0.04% / +0.00% +0.27% +0.16%] index_select spread : Elapsed 0.026 ms (2.554 ms / 100) 2.546 -> 2.553 ( +0.27%) [ +0.24% +0.00% +0.16% / +0.27% +0.35% +0.55%] index_select strided 3 : Elapsed 0.026 ms (2.552 ms / 100) 2.549 -> 2.555 ( +0.24%) [ +0.08% +0.04% +0.00% / +0.24% +0.31% +0.51%] index_select strided 5 : Elapsed 0.026 ms (2.551 ms / 100) 2.546 -> 2.545 ( -0.04%) [ +0.04% +0.00% +0.08% / -0.04% +0.20% +0.39%] index_select strided 7 : Elapsed 0.025 ms (2.547 ms / 100) 2.546 -> 2.547 ( +0.04%) [ +0.20% +0.24% +0.00% / +0.04% +0.55% +0.35%] index_select strided 8 : Elapsed 0.026 ms (2.551 ms / 100) 2.547 -> 2.551 ( +0.16%) [ +0.08% +0.12% +0.00% / +0.16% +0.31% +0.35%] index_select random : Elapsed 0.025 ms (2.549 ms / 100) 2.548 -> 2.549 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.31% +0.16% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.548 ms / 100) 2.546 -> 2.548 ( +0.08%) [ +0.20% +0.04% +0.00% / +0.08% +0.16% +0.20%] index_select perm : Elapsed 0.026 ms (2.551 ms / 100) 2.548 -> 2.551 ( +0.12%) [ +0.24% +0.27% +0.00% / +0.12% +0.35% +0.27%] index_select perm_sorted : Elapsed 0.026 ms (2.554 ms / 100) B = [20, 5, 40, 4] (stride (20, 4, 400, 1)) A = [20, 5, 40, 16] (stride (1, 320, 1600, 20)) dim = 3 2.446 -> 2.447 ( +0.04%) [ +0.00% +0.12% +0.00% / +0.04% +0.29% +0.08%] index_select const : Elapsed 0.024 ms (2.446 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.41% +0.37%] index_select wrap : Elapsed 0.025 ms (2.452 ms / 100) 2.456 -> 2.457 ( +0.04%) [ +0.16% +0.04% +0.00% / +0.08% +0.12% +0.04%] index_select linear : Elapsed 0.025 ms (2.460 ms / 100) 2.457 -> 2.448 ( -0.37%) [ +0.12% +0.00% +0.00% / -0.08% -0.37% -0.04%] index_select reverse : Elapsed 0.025 ms (2.460 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.00% +0.16% +0.25% / +0.12% +0.33% +0.29%] index_select skip64 : Elapsed 0.024 ms (2.448 ms / 100) 2.449 -> 2.448 ( -0.04%) [ +0.00% +0.00% +0.04% / +0.08% -0.04% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.451 -> 2.453 ( +0.08%) [ +0.24% +0.04% +0.00% / +0.12% +0.08% +0.41%] index_select spread : Elapsed 0.025 ms (2.457 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.12% +0.16% +0.00% / +0.04% +0.41% +0.29%] index_select strided 3 : Elapsed 0.025 ms (2.452 ms / 100) 2.450 -> 2.454 ( +0.16%) [ +0.00% +0.12% +0.24% / +0.16% +0.53% +0.49%] index_select strided 5 : Elapsed 0.024 ms (2.450 ms / 100) 2.442 -> 2.441 ( -0.04%) [ +0.00% +0.04% +0.12% / -0.04% +0.45% +0.53%] index_select strided 7 : Elapsed 0.024 ms (2.442 ms / 100) 2.445 -> 2.448 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.12% +0.45% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.448 ms / 100) 2.450 -> 2.455 ( +0.20%) [ +0.00% +0.16% +0.00% / +0.20% +0.29% +0.20%] index_select random : Elapsed 0.024 ms (2.450 ms / 100) 2.450 -> 2.454 ( +0.16%) [ +0.04% +0.00% +0.00% / +0.16% +0.20% +0.53%] index_select random_sorted : Elapsed 0.025 ms (2.451 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.12% +0.16% +0.00% / +0.16% +0.25% +0.20%] index_select perm : Elapsed 0.024 ms (2.449 ms / 100) 2.450 -> 2.448 ( -0.08%) [ +0.12% +0.00% +0.08% / -0.08% +0.08% +0.29%] index_select perm_sorted : Elapsed 0.025 ms (2.453 ms / 100) B = [20, 5, 40, 4] (stride (1, 20, 400, 100)) A = [20, 5, 40, 16] (stride (3200, 16, 80, 1)) dim = 3 2.595 -> 2.596 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.04% +0.15% +0.23%] index_select const : Elapsed 0.026 ms (2.598 ms / 100) 2.596 -> 2.596 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.35% +0.42%] index_select wrap : Elapsed 0.026 ms (2.598 ms / 100) 2.595 -> 2.601 ( +0.23%) [ +0.19% +0.08% +0.00% / +0.23% +0.31% +0.85%] index_select linear : Elapsed 0.026 ms (2.600 ms / 100) 2.590 -> 2.592 ( +0.08%) [ +0.23% +0.12% +0.00% / +0.08% +0.50% +0.50%] index_select reverse : Elapsed 0.026 ms (2.596 ms / 100) 2.589 -> 2.594 ( +0.19%) [ +0.19% +0.12% +0.00% / +0.19% +0.54% +0.62%] index_select skip64 : Elapsed 0.026 ms (2.594 ms / 100) 2.590 -> 2.590 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.42% +0.46%] index_select skip256 : Elapsed 0.026 ms (2.593 ms / 100) 2.612 -> 2.613 ( +0.04%) [ +0.27% +0.00% +0.23% / +0.04% +0.50% +0.61%] index_select spread : Elapsed 0.026 ms (2.619 ms / 100) 2.614 -> 2.620 ( +0.23%) [ +0.11% +0.11% +0.00% / +0.23% +0.46% +0.50%] index_select strided 3 : Elapsed 0.026 ms (2.617 ms / 100) 2.618 -> 2.618 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.27% +0.42%] index_select strided 5 : Elapsed 0.026 ms (2.620 ms / 100) 2.615 -> 2.614 ( -0.04%) [ +0.08% +0.00% +0.15% / -0.04% +0.38% +0.54%] index_select strided 7 : Elapsed 0.026 ms (2.617 ms / 100) 2.624 -> 2.629 ( +0.19%) [ +0.00% +0.11% +0.19% / +0.19% +0.50% +0.57%] index_select strided 8 : Elapsed 0.026 ms (2.624 ms / 100) 2.631 -> 2.636 ( +0.19%) [ +0.11% +0.04% +0.00% / +0.19% +0.27% +0.19%] index_select random : Elapsed 0.026 ms (2.634 ms / 100) 2.628 -> 2.630 ( +0.08%) [ +0.15% +0.00% +0.11% / +0.08% +0.27% +0.23%] index_select random_sorted : Elapsed 0.026 ms (2.632 ms / 100) 2.627 -> 2.629 ( +0.08%) [ +0.15% +0.23% +0.00% / +0.08% +0.34% +0.30%] index_select perm : Elapsed 0.026 ms (2.631 ms / 100) 2.619 -> 2.616 ( -0.11%) [ +0.00% +0.04% +0.00% / -0.11% +0.27% +0.34%] index_select perm_sorted : Elapsed 0.026 ms (2.619 ms / 100) B = [20, 5, 40, 4] (stride (200, 40, 1, 4000)) dim = 3 fill_cnt = 16 0.934 -> 0.935 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +0.75% +0.86%] index_fill_ const : Elapsed 0.009 ms (0.935 ms / 100) 0.939 -> 0.940 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +0.64% +0.64%] index_fill_ linear : Elapsed 0.009 ms (0.940 ms / 100) 0.942 -> 0.941 ( -0.11%) [ +0.00% +0.11% +0.11% / +0.00% +0.00% -0.11%] index_fill_ reverse : Elapsed 0.009 ms (0.942 ms / 100) 0.942 -> 0.942 ( +0.00%) [ +0.21% +0.00% +0.32% / +0.21% +0.00% +0.00%] index_fill_ skip64 : Elapsed 0.009 ms (0.944 ms / 100) 0.936 -> 0.938 ( +0.21%) [ +0.21% +0.11% +0.00% / +0.21% +0.75% +0.64%] index_fill_ skip256 : Elapsed 0.009 ms (0.938 ms / 100) 0.937 -> 0.938 ( +0.11%) [ +0.00% +0.00% +0.11% / +0.11% +0.43% +0.53%] index_fill_ spread : Elapsed 0.009 ms (0.937 ms / 100) 0.941 -> 0.940 ( -0.11%) [ +0.11% +0.11% +0.00% / +0.21% -0.11% +0.85%] index_fill_ strided 3 : Elapsed 0.009 ms (0.942 ms / 100) 0.936 -> 0.937 ( +0.11%) [ +0.21% +0.11% +0.00% / +0.11% +0.21% +0.11%] index_fill_ random : Elapsed 0.009 ms (0.938 ms / 100) 0.943 -> 0.936 ( -0.74%) [ +0.11% +0.00% +0.00% / +0.00% -0.74% -0.74%] index_fill_ random_sorted : Elapsed 0.009 ms (0.944 ms / 100) B = [20, 5, 40, 4] (stride (200, 40, 1, 4000)) A = [20, 5, 40, 16] (stride (80, 1, 1600, 5)) dim = 3 2.295 -> 2.297 ( +0.09%) [ +0.13% +0.04% +0.00% / +0.09% +0.26% +0.35%] index_select const : Elapsed 0.023 ms (2.298 ms / 100) 2.314 -> 2.314 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.09% +0.09%] index_select wrap : Elapsed 0.023 ms (2.315 ms / 100) 2.313 -> 2.315 ( +0.09%) [ +0.13% +0.00% +0.09% / +0.13% +0.09% +0.26%] index_select linear : Elapsed 0.023 ms (2.316 ms / 100) 2.324 -> 2.327 ( +0.13%) [ +0.04% +0.09% +0.00% / +0.17% +0.13% +0.22%] index_select reverse : Elapsed 0.023 ms (2.325 ms / 100) 2.296 -> 2.302 ( +0.26%) [ +0.00% +0.00% +0.17% / +0.26% +0.48% +0.44%] index_select skip64 : Elapsed 0.023 ms (2.296 ms / 100) 2.294 -> 2.299 ( +0.22%) [ +0.00% +0.09% +0.22% / +0.22% +0.39% +0.26%] index_select skip256 : Elapsed 0.023 ms (2.294 ms / 100) 2.353 -> 2.354 ( +0.04%) [ +0.00% +0.21% +0.00% / +0.04% +0.17% +0.25%] index_select spread : Elapsed 0.024 ms (2.353 ms / 100) 2.357 -> 2.355 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.21% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.359 ms / 100) 2.328 -> 2.326 ( -0.09%) [ +0.04% +0.04% +0.00% / -0.09% +0.30% +0.43%] index_select strided 5 : Elapsed 0.023 ms (2.329 ms / 100) 2.329 -> 2.335 ( +0.26%) [ +0.00% +0.09% +0.00% / +0.26% +0.47% +0.56%] index_select strided 7 : Elapsed 0.023 ms (2.329 ms / 100) 2.299 -> 2.299 ( +0.00%) [ +0.17% +0.04% +0.00% / +0.00% +0.52% +0.57%] index_select strided 8 : Elapsed 0.023 ms (2.303 ms / 100) 2.352 -> 2.355 ( +0.13%) [ +0.00% +0.21% +0.17% / +0.13% +0.17% +0.34%] index_select random : Elapsed 0.024 ms (2.352 ms / 100) 2.351 -> 2.355 ( +0.17%) [ +0.00% +0.04% +0.17% / +0.17% +0.38% +0.38%] index_select random_sorted : Elapsed 0.024 ms (2.351 ms / 100) 2.359 -> 2.360 ( +0.04%) [ +0.17% +0.04% +0.00% / +0.04% +0.47% +0.51%] index_select perm : Elapsed 0.024 ms (2.363 ms / 100) 2.362 -> 2.363 ( +0.04%) [ +0.21% +0.13% +0.00% / +0.04% +0.30% +0.38%] index_select perm_sorted : Elapsed 0.024 ms (2.367 ms / 100) B = [20, 5, 40, 4] (stride (1, 20, 100, 4000)) A = [20, 5, 40, 16] (stride (3200, 640, 1, 40)) dim = 3 2.275 -> 2.279 ( +0.18%) [ +0.26% +0.00% +0.18% / +0.18% +0.31% +0.48%] index_select const : Elapsed 0.023 ms (2.281 ms / 100) 2.336 -> 2.343 ( +0.30%) [ +0.09% +0.09% +0.00% / +0.30% +0.43% +0.64%] index_select wrap : Elapsed 0.023 ms (2.338 ms / 100) 2.336 -> 2.342 ( +0.26%) [ +0.09% +0.00% +0.13% / +0.26% +0.60% +0.39%] index_select linear : Elapsed 0.023 ms (2.338 ms / 100) 2.339 -> 2.340 ( +0.04%) [ +0.21% +0.00% +0.13% / +0.04% +0.38% +0.51%] index_select reverse : Elapsed 0.023 ms (2.344 ms / 100) 2.275 -> 2.274 ( -0.04%) [ +0.04% +0.00% +0.09% / -0.04% +0.40% +0.31%] index_select skip64 : Elapsed 0.023 ms (2.276 ms / 100) 2.274 -> 2.278 ( +0.18%) [ +0.09% +0.13% +0.00% / +0.18% +0.40% +0.35%] index_select skip256 : Elapsed 0.023 ms (2.276 ms / 100) 2.337 -> 2.343 ( +0.26%) [ +0.00% +0.13% +0.26% / +0.26% +0.51% +0.64%] index_select spread : Elapsed 0.023 ms (2.337 ms / 100) 2.340 -> 2.342 ( +0.09%) [ +0.04% +0.21% +0.00% / +0.09% +0.47% +0.17%] index_select strided 3 : Elapsed 0.023 ms (2.341 ms / 100) 2.332 -> 2.333 ( +0.04%) [ +0.26% +0.00% +0.04% / +0.04% +0.47% +0.47%] index_select strided 5 : Elapsed 0.023 ms (2.338 ms / 100) 2.348 -> 2.354 ( +0.26%) [ +0.00% +0.34% +0.04% / +0.26% +0.55% +0.68%] index_select strided 7 : Elapsed 0.023 ms (2.348 ms / 100) 2.285 -> 2.292 ( +0.31%) [ +0.39% +0.39% +0.00% / +0.31% +0.66% +0.53%] index_select strided 8 : Elapsed 0.023 ms (2.294 ms / 100) 2.335 -> 2.340 ( +0.21%) [ +0.13% +0.17% +0.00% / +0.21% +0.26% +0.64%] index_select random : Elapsed 0.023 ms (2.338 ms / 100) 2.334 -> 2.341 ( +0.30%) [ +0.17% +0.26% +0.00% / +0.34% +0.56% +0.30%] index_select random_sorted : Elapsed 0.023 ms (2.338 ms / 100) 2.335 -> 2.340 ( +0.21%) [ +0.00% +0.04% +0.00% / +0.21% +0.30% +0.30%] index_select perm : Elapsed 0.023 ms (2.335 ms / 100) 2.340 -> 2.340 ( +0.00%) [ +0.00% +0.00% +0.30% / +0.00% +0.17% +0.43%] index_select perm_sorted : Elapsed 0.023 ms (2.340 ms / 100) out_shape = [4, 16, 5, 40] in_shape = [20, 16, 5, 40] idx_dim = 0 B = [4, 16, 5, 40] (stride (3200, 200, 1, 5)) A = [20, 16, 5, 40] (stride (1, 100, 20, 1600)) dim = 0 2.112 -> 2.118 ( +0.28%) [ +0.24% +0.00% +0.33% / +0.28% +1.04% +0.90%] index_select const : Elapsed 0.021 ms (2.117 ms / 100) 2.115 -> 2.115 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.95% +0.95%] index_select wrap : Elapsed 0.021 ms (2.115 ms / 100) 2.110 -> 2.112 ( +0.09%) [ +0.00% +0.19% +0.28% / +0.09% +1.00% +1.23%] index_select linear : Elapsed 0.021 ms (2.110 ms / 100) 2.111 -> 2.116 ( +0.24%) [ +0.00% +0.38% +0.09% / +0.24% +0.85% +1.09%] index_select reverse : Elapsed 0.021 ms (2.111 ms / 100) 2.119 -> 2.119 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.61% +0.71%] index_select skip64 : Elapsed 0.021 ms (2.121 ms / 100) 2.112 -> 2.114 ( +0.09%) [ +0.38% +0.00% +0.28% / +0.09% +0.99% +1.14%] index_select skip256 : Elapsed 0.021 ms (2.120 ms / 100) 2.186 -> 2.195 ( +0.41%) [ +0.09% +0.00% +0.27% / +0.41% +0.91% +0.78%] index_select spread : Elapsed 0.022 ms (2.188 ms / 100) 2.158 -> 2.163 ( +0.23%) [ +0.00% +0.56% +0.09% / +0.23% +0.93% +0.79%] index_select strided 3 : Elapsed 0.022 ms (2.158 ms / 100) 2.186 -> 2.191 ( +0.23%) [ +0.37% +0.00% +0.41% / +0.23% +0.78% +1.01%] index_select strided 5 : Elapsed 0.022 ms (2.194 ms / 100) 2.169 -> 2.179 ( +0.46%) [ +0.00% +0.55% +0.28% / +0.46% +1.01% +1.15%] index_select strided 7 : Elapsed 0.022 ms (2.169 ms / 100) 2.174 -> 2.174 ( +0.00%) [ +0.00% +0.14% +0.05% / +0.00% +0.69% +0.87%] index_select strided 8 : Elapsed 0.022 ms (2.174 ms / 100) 2.188 -> 2.185 ( -0.14%) [ +0.09% +0.00% +0.09% / -0.14% +1.10% +0.87%] index_select strided 16 : Elapsed 0.022 ms (2.190 ms / 100) 2.142 -> 2.150 ( +0.37%) [ +0.19% +0.09% +0.00% / +0.37% +0.84% +1.03%] index_select random : Elapsed 0.021 ms (2.146 ms / 100) 2.142 -> 2.146 ( +0.19%) [ +0.14% +0.14% +0.00% / +0.19% +0.84% +0.79%] index_select random_sorted : Elapsed 0.021 ms (2.145 ms / 100) 2.139 -> 2.139 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.89% +0.79%] index_select perm : Elapsed 0.021 ms (2.140 ms / 100) 2.134 -> 2.140 ( +0.28%) [ +0.23% +0.23% +0.00% / +0.28% +1.12% +0.84%] index_select perm_sorted : Elapsed 0.021 ms (2.139 ms / 100) B = [4, 16, 5, 40] (stride (40, 800, 160, 1)) A = [20, 16, 5, 40] (stride (3200, 200, 40, 1)) dim = 0 1.713 -> 1.713 ( +0.00%) [ +0.12% +0.00% +0.06% / +0.00% +1.23% +1.11%] index_select const : Elapsed 0.017 ms (1.715 ms / 100) 1.759 -> 1.765 ( +0.34%) [ +0.06% +0.00% +0.11% / +0.34% +1.88% +1.93%] index_select wrap : Elapsed 0.018 ms (1.760 ms / 100) 1.760 -> 1.760 ( +0.00%) [ +0.00% +0.06% +0.11% / +0.00% +1.76% +1.70%] index_select linear : Elapsed 0.018 ms (1.760 ms / 100) 1.751 -> 1.753 ( +0.11%) [ +0.11% +0.00% +0.23% / +0.11% +2.28% +2.23%] index_select reverse : Elapsed 0.018 ms (1.753 ms / 100) 1.716 -> 1.714 ( -0.12%) [ +0.12% +0.00% +0.12% / -0.12% +0.76% +0.70%] index_select skip64 : Elapsed 0.017 ms (1.718 ms / 100) 1.713 -> 1.714 ( +0.06%) [ +0.00% +0.23% +0.12% / +0.06% +0.99% +1.28%] index_select skip256 : Elapsed 0.017 ms (1.713 ms / 100) 1.776 -> 1.766 ( -0.56%) [ +0.00% +0.00% +0.23% / +0.23% -0.34% -0.56%] index_select spread : Elapsed 0.018 ms (1.776 ms / 100) 1.773 -> 1.765 ( -0.45%) [ +0.00% +0.17% +0.06% / +0.51% -0.45% -0.39%] index_select strided 3 : Elapsed 0.018 ms (1.773 ms / 100) 1.776 -> 1.766 ( -0.56%) [ +0.23% +0.00% +0.11% / +0.17% -0.23% -0.56%] index_select strided 5 : Elapsed 0.018 ms (1.780 ms / 100) 1.742 -> 1.740 ( -0.11%) [ +0.00% +0.00% +0.11% / -0.11% +2.64% +2.64%] index_select strided 7 : Elapsed 0.017 ms (1.742 ms / 100) 1.770 -> 1.771 ( +0.06%) [ +0.17% +0.23% +0.00% / +0.06% +0.34% +0.40%] index_select strided 8 : Elapsed 0.018 ms (1.773 ms / 100) 1.774 -> 1.774 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.11% +0.34%] index_select strided 16 : Elapsed 0.018 ms (1.775 ms / 100) 1.763 -> 1.747 ( -0.91%) [ +0.00% +0.00% +0.28% / +0.11% -0.91% -0.68%] index_select random : Elapsed 0.018 ms (1.763 ms / 100) 1.763 -> 1.748 ( -0.85%) [ +0.06% +0.00% +0.06% / +0.06% -0.79% -0.85%] index_select random_sorted : Elapsed 0.018 ms (1.764 ms / 100) 1.774 -> 1.775 ( +0.06%) [ +0.06% +0.11% +0.00% / +0.06% +1.01% +1.01%] index_select perm : Elapsed 0.018 ms (1.775 ms / 100) 1.777 -> 1.778 ( +0.06%) [ +0.23% +0.17% +0.00% / +0.06% +0.51% +0.56%] index_select perm_sorted : Elapsed 0.018 ms (1.781 ms / 100) B = [4, 16, 5, 40] (stride (40, 800, 160, 1)) A = [20, 16, 5, 40] (stride (40, 800, 12800, 1)) dim = 0 1.895 -> 1.898 ( +0.16%) [ +0.00% +0.05% +0.11% / +0.16% +0.37% +0.37%] index_select const : Elapsed 0.019 ms (1.895 ms / 100) 1.945 -> 1.939 ( -0.31%) [ +0.10% +0.00% +0.05% / -0.31% +0.77% +0.57%] index_select wrap : Elapsed 0.019 ms (1.947 ms / 100) 1.941 -> 1.944 ( +0.15%) [ +0.05% +0.00% +0.15% / +0.15% +0.98% +0.72%] index_select linear : Elapsed 0.019 ms (1.942 ms / 100) 1.947 -> 1.951 ( +0.21%) [ +0.00% +0.21% +0.00% / +0.21% +0.87% +0.56%] index_select reverse : Elapsed 0.019 ms (1.947 ms / 100) 1.903 -> 1.907 ( +0.21%) [ +0.00% +0.11% +0.00% / +0.21% +0.58% +0.89%] index_select skip64 : Elapsed 0.019 ms (1.903 ms / 100) 1.894 -> 1.896 ( +0.11%) [ +0.21% +0.00% +0.00% / +0.11% +0.42% +0.48%] index_select skip256 : Elapsed 0.019 ms (1.898 ms / 100) 1.944 -> 1.944 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.82% +0.77%] index_select spread : Elapsed 0.019 ms (1.944 ms / 100) 1.938 -> 1.942 ( +0.21%) [ +0.00% +0.26% +0.00% / +0.21% +0.98% +0.98%] index_select strided 3 : Elapsed 0.019 ms (1.938 ms / 100) 1.940 -> 1.939 ( -0.05%) [ +0.15% +0.21% +0.00% / -0.05% +0.98% +0.77%] index_select strided 5 : Elapsed 0.019 ms (1.943 ms / 100) 1.941 -> 1.942 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +1.29% +1.18%] index_select strided 7 : Elapsed 0.019 ms (1.942 ms / 100) 1.939 -> 1.948 ( +0.46%) [ +0.36% +0.00% +0.10% / +0.46% +1.24% +1.08%] index_select strided 8 : Elapsed 0.019 ms (1.946 ms / 100) 1.935 -> 1.939 ( +0.21%) [ +0.00% +0.00% +0.21% / +0.21% +1.14% +1.50%] index_select strided 16 : Elapsed 0.019 ms (1.935 ms / 100) 1.937 -> 1.939 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +1.29% +1.14%] index_select random : Elapsed 0.019 ms (1.938 ms / 100) 1.938 -> 1.939 ( +0.05%) [ +0.21% +0.00% +0.05% / +0.05% +0.67% +1.03%] index_select random_sorted : Elapsed 0.019 ms (1.942 ms / 100) 1.939 -> 1.940 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.05% +1.13% +1.24%] index_select perm : Elapsed 0.019 ms (1.940 ms / 100) 1.942 -> 1.948 ( +0.31%) [ +0.26% +0.00% +0.46% / +0.31% +0.88% +0.72%] index_select perm_sorted : Elapsed 0.019 ms (1.947 ms / 100) B = [4, 16, 5, 40] (stride (1, 800, 160, 4)) A = [20, 16, 5, 40] (stride (3200, 200, 40, 1)) dim = 0 0.690 -> 0.690 ( +0.00%) [ +0.58% +0.29% +0.00% / +0.00% +2.32% +2.32%] index_select const : Elapsed 0.007 ms (0.694 ms / 100) 0.697 -> 0.699 ( +0.29%) [ +0.72% +0.57% +0.00% / +0.29% +0.57% +0.57%] index_select wrap : Elapsed 0.007 ms (0.702 ms / 100) 0.699 -> 0.698 ( -0.14%) [ +0.00% +0.00% +0.29% / -0.14% +0.57% +0.57%] index_select linear : Elapsed 0.007 ms (0.699 ms / 100) 0.696 -> 0.694 ( -0.29%) [ +0.00% +0.00% +0.00% / -0.29% +0.14% +0.29%] index_select reverse : Elapsed 0.007 ms (0.696 ms / 100) 0.695 -> 0.693 ( -0.29%) [ +0.00% +0.00% +0.00% / -0.29% +0.29% +0.72%] index_select skip64 : Elapsed 0.007 ms (0.695 ms / 100) 0.689 -> 0.691 ( +0.29%) [ +0.00% +0.73% +0.29% / +0.29% +1.60% +2.32%] index_select skip256 : Elapsed 0.007 ms (0.689 ms / 100) 0.689 -> 0.690 ( +0.15%) [ +0.00% +0.87% +0.58% / +0.15% +3.92% +4.50%] index_select spread : Elapsed 0.007 ms (0.689 ms / 100) 0.692 -> 0.693 ( +0.14%) [ +0.14% +0.00% +0.29% / +0.14% +1.16% +1.88%] index_select strided 3 : Elapsed 0.007 ms (0.693 ms / 100) 0.692 -> 0.695 ( +0.43%) [ +0.00% +0.14% +0.43% / +0.43% +3.32% +3.47%] index_select strided 5 : Elapsed 0.007 ms (0.692 ms / 100) 0.690 -> 0.690 ( +0.00%) [ +0.29% +0.58% +0.00% / +0.00% +7.68% +8.12%] index_select strided 7 : Elapsed 0.007 ms (0.692 ms / 100) 0.690 -> 0.692 ( +0.29%) [ +0.43% +0.72% +0.00% / +0.29% +5.94% +5.94%] index_select strided 8 : Elapsed 0.007 ms (0.693 ms / 100) 0.694 -> 0.695 ( +0.14%) [ +0.14% +0.43% +0.00% / +0.14% +3.03% +2.88%] index_select strided 16 : Elapsed 0.007 ms (0.695 ms / 100) 0.692 -> 0.693 ( +0.14%) [ +0.00% +0.72% +0.43% / +0.14% +5.64% +5.64%] index_select random : Elapsed 0.007 ms (0.692 ms / 100) 0.692 -> 0.691 ( -0.14%) [ +0.00% +0.43% +0.29% / -0.14% +6.07% +5.64%] index_select random_sorted : Elapsed 0.007 ms (0.692 ms / 100) 0.691 -> 0.694 ( +0.43%) [ +0.29% +0.00% +0.29% / +0.43% +1.59% +1.45%] index_select perm : Elapsed 0.007 ms (0.693 ms / 100) 0.692 -> 0.694 ( +0.29%) [ +0.29% +0.29% +0.00% / +0.29% +1.45% +1.88%] index_select perm_sorted : Elapsed 0.007 ms (0.694 ms / 100) B = [4, 16, 5, 40] (stride (80, 5, 1, 320)) A = [20, 16, 5, 40] (stride (1, 20, 12800, 320)) dim = 0 0.839 -> 0.844 ( +0.60%) [ +0.12% +0.00% +0.72% / +0.60% +2.38% +2.38%] index_select const : Elapsed 0.008 ms (0.840 ms / 100) 0.847 -> 0.849 ( +0.24%) [ +0.59% +0.00% +0.35% / +0.47% +0.71% +0.24%] index_select wrap : Elapsed 0.009 ms (0.852 ms / 100) 0.848 -> 0.849 ( +0.12%) [ +0.24% +0.00% +0.47% / +0.12% +0.59% +0.24%] index_select linear : Elapsed 0.009 ms (0.850 ms / 100) 0.848 -> 0.844 ( -0.47%) [ +0.35% +0.00% +0.24% / +0.59% +0.00% -0.47%] index_select reverse : Elapsed 0.009 ms (0.851 ms / 100) 0.848 -> 0.847 ( -0.12%) [ +0.35% +0.24% +0.00% / +0.59% +0.47% -0.12%] index_select skip64 : Elapsed 0.009 ms (0.851 ms / 100) 0.841 -> 0.843 ( +0.24%) [ +0.24% +0.00% +0.00% / +0.24% +0.83% +1.07%] index_select skip256 : Elapsed 0.008 ms (0.843 ms / 100) 0.862 -> 0.865 ( +0.35%) [ +0.00% +0.00% +0.23% / +0.58% +0.70% +0.35%] index_select spread : Elapsed 0.009 ms (0.862 ms / 100) 0.849 -> 0.854 ( +0.59%) [ +0.59% +0.00% +0.47% / +0.59% +0.71% +1.06%] index_select strided 3 : Elapsed 0.009 ms (0.854 ms / 100) 0.863 -> 0.866 ( +0.35%) [ +0.00% +0.00% +0.35% / +0.35% +0.81% +0.35%] index_select strided 5 : Elapsed 0.009 ms (0.863 ms / 100) 0.855 -> 0.859 ( +0.47%) [ +0.23% +0.00% +0.70% / +0.47% +1.64% +0.94%] index_select strided 7 : Elapsed 0.009 ms (0.857 ms / 100) 0.858 -> 0.859 ( +0.12%) [ +0.00% +0.12% +0.47% / +0.12% +0.93% +1.28%] index_select strided 8 : Elapsed 0.009 ms (0.858 ms / 100) 0.866 -> 0.865 ( -0.12%) [ +0.00% +0.12% +0.35% / -0.12% -0.12% +0.12%] index_select strided 16 : Elapsed 0.009 ms (0.866 ms / 100) 0.861 -> 0.862 ( +0.12%) [ +0.12% +0.35% +0.00% / +0.12% +0.81% +0.81%] index_select random : Elapsed 0.009 ms (0.862 ms / 100) 0.856 -> 0.861 ( +0.58%) [ +0.00% +0.00% +0.82% / +0.58% +1.17% +1.05%] index_select random_sorted : Elapsed 0.009 ms (0.856 ms / 100) 0.858 -> 0.864 ( +0.70%) [ +0.23% +0.23% +0.00% / +0.70% +0.82% +1.05%] index_select perm : Elapsed 0.009 ms (0.860 ms / 100) 0.859 -> 0.860 ( +0.12%) [ +0.35% +0.00% +0.58% / +0.12% +1.28% +1.05%] index_select perm_sorted : Elapsed 0.009 ms (0.862 ms / 100) B = [4, 16, 5, 40] (stride (16, 1, 64, 320)) A = [20, 16, 5, 40] (stride (16, 1, 320, 1600)) dim = 0 2.185 -> 2.187 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.32% +0.18%] index_select const : Elapsed 0.022 ms (2.185 ms / 100) 2.185 -> 2.186 ( +0.05%) [ +0.18% +0.14% +0.00% / +0.05% +0.32% +0.23%] index_select wrap : Elapsed 0.022 ms (2.189 ms / 100) 2.188 -> 2.189 ( +0.05%) [ +0.00% +0.00% +0.14% / +0.05% +0.37% +0.05%] index_select linear : Elapsed 0.022 ms (2.188 ms / 100) 2.189 -> 2.193 ( +0.18%) [ +0.37% +0.14% +0.00% / +0.18% +0.46% +0.73%] index_select reverse : Elapsed 0.022 ms (2.197 ms / 100) 2.186 -> 2.187 ( +0.05%) [ +0.00% +0.00% +0.18% / +0.05% +0.41% +0.37%] index_select skip64 : Elapsed 0.022 ms (2.186 ms / 100) 2.187 -> 2.188 ( +0.05%) [ +0.23% +0.23% +0.00% / +0.05% +0.27% +0.18%] index_select skip256 : Elapsed 0.022 ms (2.192 ms / 100) 2.178 -> 2.181 ( +0.14%) [ +0.14% +0.00% +0.18% / +0.14% +0.23% +0.14%] index_select spread : Elapsed 0.022 ms (2.181 ms / 100) 2.178 -> 2.180 ( +0.09%) [ +0.09% +0.14% +0.00% / +0.09% +0.14% +0.09%] index_select strided 3 : Elapsed 0.022 ms (2.180 ms / 100) 2.181 -> 2.181 ( +0.00%) [ +0.14% +0.09% +0.00% / +0.00% +0.05% +0.05%] index_select strided 5 : Elapsed 0.022 ms (2.184 ms / 100) 2.187 -> 2.190 ( +0.14%) [ +0.00% +0.23% +0.05% / +0.14% +0.23% +0.73%] index_select strided 7 : Elapsed 0.022 ms (2.187 ms / 100) 2.171 -> 2.173 ( +0.09%) [ +0.00% +0.18% +0.05% / +0.09% +0.23% +0.28%] index_select strided 8 : Elapsed 0.022 ms (2.171 ms / 100) 2.190 -> 2.190 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.05% +0.09%] index_select strided 16 : Elapsed 0.022 ms (2.191 ms / 100) 2.172 -> 2.174 ( +0.09%) [ +0.23% +0.09% +0.00% / +0.09% +0.46% +0.51%] index_select random : Elapsed 0.022 ms (2.177 ms / 100) 2.175 -> 2.179 ( +0.18%) [ +0.05% +0.00% +0.05% / +0.18% +0.41% +0.55%] index_select random_sorted : Elapsed 0.022 ms (2.176 ms / 100) 2.173 -> 2.177 ( +0.18%) [ +0.23% +0.00% +0.18% / +0.18% +0.55% +0.51%] index_select perm : Elapsed 0.022 ms (2.178 ms / 100) 2.177 -> 2.181 ( +0.18%) [ +0.00% +0.32% +0.28% / +0.18% +0.60% +0.60%] index_select perm_sorted : Elapsed 0.022 ms (2.177 ms / 100) out_shape = [20, 4, 5, 40] in_shape = [20, 16, 5, 40] idx_dim = 1 B = [20, 4, 5, 40] (stride (800, 40, 160, 1)) A = [20, 16, 5, 40] (stride (40, 4000, 800, 1)) dim = 1 2.354 -> 2.361 ( +0.30%) [ +0.17% +0.00% +0.13% / +0.30% +0.51% +0.51%] index_select const : Elapsed 0.024 ms (2.358 ms / 100) 2.429 -> 2.431 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.25% +0.25%] index_select wrap : Elapsed 0.024 ms (2.431 ms / 100) 2.427 -> 2.430 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.45% +0.49%] index_select linear : Elapsed 0.024 ms (2.427 ms / 100) 2.429 -> 2.431 ( +0.08%) [ +0.00% +0.04% +0.25% / +0.08% +0.41% +0.41%] index_select reverse : Elapsed 0.024 ms (2.429 ms / 100) 2.358 -> 2.362 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.51% +0.34%] index_select skip64 : Elapsed 0.024 ms (2.362 ms / 100) 2.356 -> 2.362 ( +0.25%) [ +0.00% +0.04% +0.13% / +0.25% +0.47% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.356 ms / 100) 2.433 -> 2.433 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.04% +0.12% +0.00%] index_select spread : Elapsed 0.024 ms (2.433 ms / 100) 2.425 -> 2.429 ( +0.16%) [ +0.00% +0.21% +0.00% / +0.16% +0.45% +0.37%] index_select strided 3 : Elapsed 0.024 ms (2.425 ms / 100) 2.426 -> 2.428 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.54% +0.41%] index_select strided 5 : Elapsed 0.024 ms (2.427 ms / 100) 2.425 -> 2.430 ( +0.21%) [ +0.12% +0.33% +0.00% / +0.21% +0.45% +0.70%] index_select strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.376 -> 2.378 ( +0.08%) [ +0.25% +0.08% +0.00% / +0.08% +0.38% +0.55%] index_select strided 8 : Elapsed 0.024 ms (2.382 ms / 100) 2.401 -> 2.405 ( +0.17%) [ +0.37% +0.21% +0.00% / +0.17% +0.54% +0.54%] index_select random : Elapsed 0.024 ms (2.410 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.00% +0.08% +0.12% / +0.12% +0.12% +0.29%] index_select random_sorted : Elapsed 0.024 ms (2.403 ms / 100) 2.431 -> 2.429 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.21% +0.21%] index_select perm : Elapsed 0.024 ms (2.431 ms / 100) 2.434 -> 2.430 ( -0.16%) [ +0.00% +0.00% +0.08% / -0.16% +0.12% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.434 ms / 100) B = [20, 4, 5, 40] (stride (200, 4000, 40, 1)) A = [20, 16, 5, 40] (stride (1, 100, 20, 1600)) dim = 1 2.419 -> 2.419 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.25% +0.21%] index_select const : Elapsed 0.024 ms (2.419 ms / 100) 2.419 -> 2.421 ( +0.08%) [ +0.12% +0.17% +0.00% / +0.08% +0.33% +0.21%] index_select wrap : Elapsed 0.024 ms (2.422 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.25% +0.00% +0.12% / +0.12% +0.25% +0.25%] index_select linear : Elapsed 0.024 ms (2.425 ms / 100) 2.418 -> 2.423 ( +0.21%) [ +0.04% +0.17% +0.00% / +0.21% +0.25% +0.25%] index_select reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.420 -> 2.417 ( -0.12%) [ +0.00% +0.04% +0.00% / -0.12% +0.21% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.420 ms / 100) 2.419 -> 2.417 ( -0.08%) [ +0.12% +0.00% +0.04% / -0.08% +0.21% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.422 ms / 100) 2.421 -> 2.420 ( -0.04%) [ +0.00% +0.21% +0.08% / -0.04% +0.50% +0.41%] index_select spread : Elapsed 0.024 ms (2.421 ms / 100) 2.424 -> 2.425 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.41% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.427 ms / 100) 2.417 -> 2.425 ( +0.33%) [ +0.21% +0.00% +0.12% / +0.33% +0.50% +0.46%] index_select strided 5 : Elapsed 0.024 ms (2.422 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.46% +0.41%] index_select strided 7 : Elapsed 0.024 ms (2.418 ms / 100) 2.420 -> 2.425 ( +0.21%) [ +0.00% +0.08% +0.17% / +0.21% +0.45% +0.50%] index_select strided 8 : Elapsed 0.024 ms (2.420 ms / 100) 2.418 -> 2.423 ( +0.21%) [ +0.21% +0.00% +0.08% / +0.21% +0.62% +0.74%] index_select random : Elapsed 0.024 ms (2.423 ms / 100) 2.419 -> 2.416 ( -0.12%) [ +0.08% +0.00% +0.12% / -0.12% +0.25% +0.50%] index_select random_sorted : Elapsed 0.024 ms (2.421 ms / 100) 2.415 -> 2.419 ( +0.17%) [ +0.12% +0.21% +0.00% / +0.17% +0.46% +0.62%] index_select perm : Elapsed 0.024 ms (2.418 ms / 100) 2.420 -> 2.421 ( +0.04%) [ +0.00% +0.17% +0.00% / +0.04% +0.45% +0.37%] index_select perm_sorted : Elapsed 0.024 ms (2.420 ms / 100) B = [20, 4, 5, 40] (stride (200, 4000, 40, 1)) A = [20, 16, 5, 40] (stride (1, 20, 320, 1600)) dim = 1 2.415 -> 2.417 ( +0.08%) [ +0.21% +0.00% +0.17% / +0.08% +0.54% +0.50%] index_select const : Elapsed 0.024 ms (2.420 ms / 100) 2.424 -> 2.428 ( +0.17%) [ +0.04% +0.17% +0.00% / +0.17% +0.45% +0.50%] index_select wrap : Elapsed 0.024 ms (2.425 ms / 100) 2.418 -> 2.422 ( +0.17%) [ +0.00% +0.08% +0.17% / +0.17% +0.70% +0.66%] index_select linear : Elapsed 0.024 ms (2.418 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.00% +0.04% +0.04% / +0.12% +0.41% +0.54%] index_select reverse : Elapsed 0.024 ms (2.426 ms / 100) 2.420 -> 2.419 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.25% +0.33%] index_select skip64 : Elapsed 0.024 ms (2.421 ms / 100) 2.422 -> 2.420 ( -0.08%) [ +0.00% +0.04% +0.00% / -0.08% +0.29% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.422 ms / 100) 2.428 -> 2.431 ( +0.12%) [ +0.08% +0.25% +0.00% / +0.12% +0.37% +0.45%] index_select spread : Elapsed 0.024 ms (2.430 ms / 100) 2.425 -> 2.429 ( +0.16%) [ +0.29% +0.00% +0.12% / +0.16% +0.49% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.432 ms / 100) 2.420 -> 2.424 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.17% +0.50% +0.62%] index_select strided 5 : Elapsed 0.024 ms (2.420 ms / 100) 2.421 -> 2.427 ( +0.25%) [ +0.04% +0.00% +0.17% / +0.25% +0.58% +0.45%] index_select strided 7 : Elapsed 0.024 ms (2.422 ms / 100) 2.415 -> 2.417 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.62% +0.75%] index_select strided 8 : Elapsed 0.024 ms (2.418 ms / 100) 2.424 -> 2.425 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.58% +0.62%] index_select random : Elapsed 0.024 ms (2.424 ms / 100) 2.426 -> 2.426 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.37% +0.33%] index_select random_sorted : Elapsed 0.024 ms (2.428 ms / 100) 2.429 -> 2.430 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.41% +0.21%] index_select perm : Elapsed 0.024 ms (2.430 ms / 100) 2.425 -> 2.427 ( +0.08%) [ +0.00% +0.12% +0.04% / +0.08% +0.45% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.425 ms / 100) B = [20, 4, 5, 40] (stride (200, 4000, 1, 5)) A = [20, 16, 5, 40] (stride (3200, 1, 16, 80)) dim = 1 2.579 -> 2.585 ( +0.23%) [ +0.19% +0.16% +0.00% / +0.23% +0.27% +0.35%] index_select const : Elapsed 0.026 ms (2.584 ms / 100) 2.582 -> 2.584 ( +0.08%) [ +0.15% +0.12% +0.00% / +0.12% +0.08% +0.08%] index_select wrap : Elapsed 0.026 ms (2.586 ms / 100) 2.585 -> 2.586 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.04% +0.12% +0.04%] index_select linear : Elapsed 0.026 ms (2.585 ms / 100) 2.583 -> 2.585 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.15% +0.19% +0.08%] index_select reverse : Elapsed 0.026 ms (2.583 ms / 100) 2.584 -> 2.584 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.23% +0.08% +0.00%] index_select skip64 : Elapsed 0.026 ms (2.586 ms / 100) 2.583 -> 2.584 ( +0.04%) [ +0.19% +0.00% +0.15% / +0.15% +0.19% +0.04%] index_select skip256 : Elapsed 0.026 ms (2.588 ms / 100) 2.609 -> 2.611 ( +0.08%) [ +0.00% +0.08% +0.11% / +0.15% +0.19% +0.08%] index_select spread : Elapsed 0.026 ms (2.609 ms / 100) 2.610 -> 2.614 ( +0.15%) [ +0.04% +0.00% +0.08% / +0.15% +0.19% +0.15%] index_select strided 3 : Elapsed 0.026 ms (2.611 ms / 100) 2.606 -> 2.608 ( +0.08%) [ +0.04% +0.15% +0.00% / +0.08% +0.31% +0.23%] index_select strided 5 : Elapsed 0.026 ms (2.607 ms / 100) 2.606 -> 2.613 ( +0.27%) [ +0.15% +0.04% +0.00% / +0.27% +0.38% +0.31%] index_select strided 7 : Elapsed 0.026 ms (2.610 ms / 100) 2.618 -> 2.620 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.11% +0.15%] index_select strided 8 : Elapsed 0.026 ms (2.618 ms / 100) 2.606 -> 2.608 ( +0.08%) [ +0.19% +0.12% +0.00% / +0.08% +0.31% +0.31%] index_select random : Elapsed 0.026 ms (2.611 ms / 100) 2.609 -> 2.609 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.08% +0.15%] index_select random_sorted : Elapsed 0.026 ms (2.613 ms / 100) 2.618 -> 2.615 ( -0.11%) [ +0.11% +0.00% +0.00% / -0.11% +0.19% -0.08%] index_select perm : Elapsed 0.026 ms (2.621 ms / 100) 2.616 -> 2.618 ( +0.08%) [ +0.19% +0.04% +0.00% / +0.08% +0.11% +0.08%] index_select perm_sorted : Elapsed 0.026 ms (2.621 ms / 100) B = [20, 4, 5, 40] (stride (1, 4000, 20, 100)) A = [20, 16, 5, 40] (stride (640, 40, 12800, 1)) dim = 1 2.376 -> 2.384 ( +0.34%) [ +0.08% +0.38% +0.00% / +0.34% +0.67% +0.63%] index_select const : Elapsed 0.024 ms (2.378 ms / 100) 2.429 -> 2.431 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.37% +0.45%] index_select wrap : Elapsed 0.024 ms (2.431 ms / 100) 2.430 -> 2.434 ( +0.16%) [ +0.00% +0.21% +0.00% / +0.16% +0.45% +0.45%] index_select linear : Elapsed 0.024 ms (2.430 ms / 100) 2.434 -> 2.434 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.49% +0.70%] index_select reverse : Elapsed 0.024 ms (2.436 ms / 100) 2.366 -> 2.365 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.80% +0.59%] index_select skip64 : Elapsed 0.024 ms (2.366 ms / 100) 2.376 -> 2.382 ( +0.25%) [ +0.21% +0.00% +0.21% / +0.25% +0.76% +0.84%] index_select skip256 : Elapsed 0.024 ms (2.381 ms / 100) 2.426 -> 2.431 ( +0.21%) [ +0.00% +0.29% +0.04% / +0.21% +0.66% +0.62%] index_select spread : Elapsed 0.024 ms (2.426 ms / 100) 2.434 -> 2.434 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.58% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.434 ms / 100) 2.432 -> 2.434 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.37% +0.45%] index_select strided 5 : Elapsed 0.024 ms (2.434 ms / 100) 2.443 -> 2.445 ( +0.08%) [ +0.16% +0.33% +0.00% / +0.08% +0.53% +0.57%] index_select strided 7 : Elapsed 0.024 ms (2.447 ms / 100) 2.394 -> 2.402 ( +0.33%) [ +0.33% +0.42% +0.00% / +0.33% +0.58% +0.46%] index_select strided 8 : Elapsed 0.024 ms (2.402 ms / 100) 2.400 -> 2.402 ( +0.08%) [ +0.17% +0.04% +0.00% / +0.08% +0.50% +0.46%] index_select random : Elapsed 0.024 ms (2.404 ms / 100) 2.390 -> 2.392 ( +0.08%) [ +0.00% +0.13% +0.04% / +0.08% +0.33% +0.33%] index_select random_sorted : Elapsed 0.024 ms (2.390 ms / 100) 2.434 -> 2.438 ( +0.16%) [ +0.21% +0.00% +0.04% / +0.16% +0.29% +0.45%] index_select perm : Elapsed 0.024 ms (2.439 ms / 100) 2.434 -> 2.437 ( +0.12%) [ +0.12% +0.16% +0.00% / +0.12% +0.58% +0.45%] index_select perm_sorted : Elapsed 0.024 ms (2.437 ms / 100) B = [20, 4, 5, 40] (stride (160, 40, 3200, 1)) A = [20, 16, 5, 40] (stride (80, 5, 1, 1600)) dim = 1 2.547 -> 2.550 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.16% +0.24%] index_select const : Elapsed 0.025 ms (2.548 ms / 100) 2.561 -> 2.564 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.20% +0.27%] index_select wrap : Elapsed 0.026 ms (2.564 ms / 100) 2.558 -> 2.563 ( +0.20%) [ +0.16% +0.23% +0.00% / +0.20% +0.23% +0.27%] index_select linear : Elapsed 0.026 ms (2.562 ms / 100) 2.578 -> 2.577 ( -0.04%) [ +0.12% +0.00% +0.08% / -0.04% +0.19% +0.16%] index_select reverse : Elapsed 0.026 ms (2.581 ms / 100) 2.546 -> 2.547 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.12% +0.20%] index_select skip64 : Elapsed 0.025 ms (2.547 ms / 100) 2.544 -> 2.548 ( +0.16%) [ +0.00% +0.12% +0.12% / +0.16% +0.16% +0.31%] index_select skip256 : Elapsed 0.025 ms (2.544 ms / 100) 2.601 -> 2.601 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.12% +0.31% +0.00%] index_select spread : Elapsed 0.026 ms (2.603 ms / 100) 2.605 -> 2.608 ( +0.12%) [ +0.00% +0.15% +0.00% / +0.12% +0.31% +0.35%] index_select strided 3 : Elapsed 0.026 ms (2.605 ms / 100) 2.571 -> 2.569 ( -0.08%) [ +0.00% +0.08% +0.12% / -0.08% +0.23% +0.27%] index_select strided 5 : Elapsed 0.026 ms (2.571 ms / 100) 2.572 -> 2.574 ( +0.08%) [ +0.12% +0.31% +0.00% / +0.08% +0.54% +0.47%] index_select strided 7 : Elapsed 0.026 ms (2.575 ms / 100) 2.552 -> 2.551 ( -0.04%) [ +0.00% +0.24% +0.12% / -0.04% +0.31% +0.31%] index_select strided 8 : Elapsed 0.026 ms (2.552 ms / 100) 2.597 -> 2.599 ( +0.08%) [ +0.00% +0.23% +0.12% / +0.08% +0.39% +0.27%] index_select random : Elapsed 0.026 ms (2.597 ms / 100) 2.585 -> 2.586 ( +0.04%) [ +0.00% +0.15% +0.04% / +0.04% +0.43% +0.27%] index_select random_sorted : Elapsed 0.026 ms (2.585 ms / 100) 2.569 -> 2.574 ( +0.19%) [ +0.12% +0.08% +0.00% / +0.19% +0.31% +0.35%] index_select perm : Elapsed 0.026 ms (2.572 ms / 100) 2.573 -> 2.570 ( -0.12%) [ +0.00% +0.12% +0.04% / -0.12% +0.39% +0.47%] index_select perm_sorted : Elapsed 0.026 ms (2.573 ms / 100) B = [20, 4, 5, 40] (stride (160, 1, 3200, 4)) A = [20, 16, 5, 40] (stride (80, 5, 1, 1600)) dim = 1 2.547 -> 2.551 ( +0.16%) [ +0.20% +0.12% +0.00% / +0.16% +0.51% +0.35%] index_select const : Elapsed 0.026 ms (2.552 ms / 100) 2.560 -> 2.563 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.43% +0.47%] index_select wrap : Elapsed 0.026 ms (2.560 ms / 100) 2.558 -> 2.557 ( -0.04%) [ +0.16% +0.04% +0.00% / -0.04% +0.43% +0.51%] index_select linear : Elapsed 0.026 ms (2.562 ms / 100) 2.577 -> 2.578 ( +0.04%) [ +0.16% +0.00% +0.04% / +0.04% +0.54% +0.47%] index_select reverse : Elapsed 0.026 ms (2.581 ms / 100) 2.545 -> 2.545 ( +0.00%) [ +0.00% +0.20% +0.20% / +0.00% +0.55% +0.47%] index_select skip64 : Elapsed 0.025 ms (2.545 ms / 100) 2.548 -> 2.549 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.55% +0.43%] index_select skip256 : Elapsed 0.025 ms (2.549 ms / 100) 2.592 -> 2.596 ( +0.15%) [ +0.12% +0.04% +0.00% / +0.15% +0.46% +0.58%] index_select spread : Elapsed 0.026 ms (2.595 ms / 100) 2.604 -> 2.605 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.38% +0.38%] index_select strided 3 : Elapsed 0.026 ms (2.605 ms / 100) 2.572 -> 2.573 ( +0.04%) [ +0.16% +0.00% +0.04% / +0.04% +0.43% +0.51%] index_select strided 5 : Elapsed 0.026 ms (2.576 ms / 100) 2.574 -> 2.576 ( +0.08%) [ +0.00% +0.08% +0.12% / +0.08% +0.47% +0.43%] index_select strided 7 : Elapsed 0.026 ms (2.574 ms / 100) 2.550 -> 2.552 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.43% +0.39%] index_select strided 8 : Elapsed 0.025 ms (2.550 ms / 100) 2.590 -> 2.590 ( +0.00%) [ +0.31% +0.00% +0.08% / +0.00% +0.19% +0.35%] index_select random : Elapsed 0.026 ms (2.598 ms / 100) 2.583 -> 2.588 ( +0.19%) [ +0.04% +0.04% +0.00% / +0.19% +0.35% +0.23%] index_select random_sorted : Elapsed 0.026 ms (2.584 ms / 100) 2.586 -> 2.589 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.31% +0.39%] index_select perm : Elapsed 0.026 ms (2.588 ms / 100) 2.581 -> 2.579 ( -0.08%) [ +0.23% +0.08% +0.00% / -0.08% +0.46% +0.31%] index_select perm_sorted : Elapsed 0.026 ms (2.587 ms / 100) B = [20, 4, 5, 40] (stride (160, 1, 3200, 4)) A = [20, 16, 5, 40] (stride (1, 20, 320, 1600)) dim = 1 2.540 -> 2.541 ( +0.04%) [ +0.20% +0.00% +0.16% / +0.04% +0.39% +0.51%] index_select const : Elapsed 0.025 ms (2.545 ms / 100) 2.542 -> 2.546 ( +0.16%) [ +0.20% +0.12% +0.00% / +0.16% +0.47% +0.43%] index_select wrap : Elapsed 0.025 ms (2.547 ms / 100) 2.544 -> 2.543 ( -0.04%) [ +0.00% +0.08% +0.12% / -0.04% +0.47% +0.43%] index_select linear : Elapsed 0.025 ms (2.544 ms / 100) 2.548 -> 2.549 ( +0.04%) [ +0.24% +0.00% +0.16% / +0.04% +0.55% +0.35%] index_select reverse : Elapsed 0.026 ms (2.554 ms / 100) 2.538 -> 2.540 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.79% +0.51%] index_select skip64 : Elapsed 0.025 ms (2.540 ms / 100) 2.536 -> 2.540 ( +0.16%) [ +0.00% +0.20% +0.12% / +0.16% +0.55% +0.55%] index_select skip256 : Elapsed 0.025 ms (2.536 ms / 100) 2.546 -> 2.547 ( +0.04%) [ +0.00% +0.20% +0.20% / +0.04% +0.39% +0.47%] index_select spread : Elapsed 0.025 ms (2.546 ms / 100) 2.543 -> 2.548 ( +0.20%) [ +0.28% +0.00% +0.08% / +0.20% +0.43% +0.35%] index_select strided 3 : Elapsed 0.026 ms (2.550 ms / 100) 2.546 -> 2.547 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.04% +0.35% +0.35%] index_select strided 5 : Elapsed 0.025 ms (2.547 ms / 100) 2.538 -> 2.542 ( +0.16%) [ +0.04% +0.08% +0.00% / +0.16% +0.43% +0.39%] index_select strided 7 : Elapsed 0.025 ms (2.539 ms / 100) 2.535 -> 2.537 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.08% +0.39% +0.43%] index_select strided 8 : Elapsed 0.025 ms (2.536 ms / 100) 2.547 -> 2.551 ( +0.16%) [ +0.08% +0.04% +0.00% / +0.16% +0.43% +0.35%] index_select random : Elapsed 0.025 ms (2.549 ms / 100) 2.547 -> 2.547 ( +0.00%) [ +0.00% +0.08% +0.16% / +0.00% +0.20% +0.43%] index_select random_sorted : Elapsed 0.025 ms (2.547 ms / 100) 2.542 -> 2.540 ( -0.08%) [ +0.08% +0.00% +0.16% / -0.08% +0.35% +0.28%] index_select perm : Elapsed 0.025 ms (2.544 ms / 100) 2.544 -> 2.543 ( -0.04%) [ +0.20% +0.00% +0.04% / -0.04% +0.24% +0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.549 ms / 100) B = [20, 4, 5, 40] (stride (1, 800, 3200, 20)) A = [20, 16, 5, 40] (stride (80, 5, 1, 1600)) dim = 1 2.553 -> 2.554 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.20% +0.16%] index_select const : Elapsed 0.026 ms (2.554 ms / 100) 2.567 -> 2.569 ( +0.08%) [ +0.12% +0.00% +0.12% / +0.08% +0.23% +0.27%] index_select wrap : Elapsed 0.026 ms (2.570 ms / 100) 2.573 -> 2.572 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.12% +0.12%] index_select linear : Elapsed 0.026 ms (2.575 ms / 100) 2.576 -> 2.583 ( +0.27%) [ +0.23% +0.16% +0.00% / +0.31% +0.39% +0.27%] index_select reverse : Elapsed 0.026 ms (2.582 ms / 100) 2.552 -> 2.554 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.27% +0.24%] index_select skip64 : Elapsed 0.026 ms (2.558 ms / 100) 2.553 -> 2.556 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.20% +0.24% +0.12%] index_select skip256 : Elapsed 0.026 ms (2.555 ms / 100) 2.603 -> 2.607 ( +0.15%) [ +0.15% +0.04% +0.00% / +0.15% +0.46% +0.42%] index_select spread : Elapsed 0.026 ms (2.607 ms / 100) 2.614 -> 2.612 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.34% +0.23%] index_select strided 3 : Elapsed 0.026 ms (2.615 ms / 100) 2.580 -> 2.582 ( +0.08%) [ +0.12% +0.00% +0.12% / +0.08% +0.50% +0.47%] index_select strided 5 : Elapsed 0.026 ms (2.583 ms / 100) 2.582 -> 2.584 ( +0.08%) [ +0.19% +0.12% +0.00% / +0.08% +0.50% +0.43%] index_select strided 7 : Elapsed 0.026 ms (2.587 ms / 100) 2.559 -> 2.564 ( +0.20%) [ +0.08% +0.00% +0.00% / +0.20% +0.35% +0.39%] index_select strided 8 : Elapsed 0.026 ms (2.561 ms / 100) 2.601 -> 2.605 ( +0.15%) [ +0.23% +0.31% +0.00% / +0.15% +0.62% +0.62%] index_select random : Elapsed 0.026 ms (2.607 ms / 100) 2.599 -> 2.600 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.38% +0.38%] index_select random_sorted : Elapsed 0.026 ms (2.601 ms / 100) 2.576 -> 2.577 ( +0.04%) [ +0.19% +0.12% +0.00% / +0.04% +0.39% +0.39%] index_select perm : Elapsed 0.026 ms (2.581 ms / 100) 2.575 -> 2.576 ( +0.04%) [ +0.00% +0.12% +0.00% / +0.04% +0.39% +0.43%] index_select perm_sorted : Elapsed 0.026 ms (2.575 ms / 100) B = [20, 4, 5, 40] (stride (20, 5, 1, 400)) A = [20, 16, 5, 40] (stride (3200, 5, 1, 80)) dim = 1 2.571 -> 2.573 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.27% +0.23%] index_select const : Elapsed 0.026 ms (2.573 ms / 100) 2.577 -> 2.584 ( +0.27%) [ +0.16% +0.23% +0.00% / +0.31% +0.35% +0.27%] index_select wrap : Elapsed 0.026 ms (2.581 ms / 100) 2.577 -> 2.584 ( +0.27%) [ +0.16% +0.27% +0.00% / +0.27% +0.31% +0.27%] index_select linear : Elapsed 0.026 ms (2.581 ms / 100) 2.589 -> 2.592 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.15% +0.12% +0.23%] index_select reverse : Elapsed 0.026 ms (2.592 ms / 100) 2.571 -> 2.572 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.23% +0.19%] index_select skip64 : Elapsed 0.026 ms (2.571 ms / 100) 2.570 -> 2.573 ( +0.12%) [ +0.16% +0.31% +0.00% / +0.12% +0.19% +0.19%] index_select skip256 : Elapsed 0.026 ms (2.574 ms / 100) 2.599 -> 2.601 ( +0.08%) [ +0.08% +0.15% +0.00% / +0.08% +0.19% +0.65%] index_select spread : Elapsed 0.026 ms (2.601 ms / 100) 2.603 -> 2.603 ( +0.00%) [ +0.04% +0.12% +0.00% / +0.00% +0.27% +0.27%] index_select strided 3 : Elapsed 0.026 ms (2.604 ms / 100) 2.581 -> 2.583 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.43% +0.31%] index_select strided 5 : Elapsed 0.026 ms (2.581 ms / 100) 2.585 -> 2.591 ( +0.23%) [ +0.00% +0.04% +0.04% / +0.23% +0.31% +0.46%] index_select strided 7 : Elapsed 0.026 ms (2.585 ms / 100) 2.572 -> 2.578 ( +0.23%) [ +0.16% +0.00% +0.12% / +0.23% +0.43% +0.23%] index_select strided 8 : Elapsed 0.026 ms (2.576 ms / 100) 2.584 -> 2.583 ( -0.04%) [ +0.04% +0.00% +0.08% / -0.04% +0.43% +0.31%] index_select random : Elapsed 0.026 ms (2.585 ms / 100) 2.583 -> 2.586 ( +0.12%) [ +0.00% +0.15% +0.00% / +0.12% +0.27% +0.39%] index_select random_sorted : Elapsed 0.026 ms (2.583 ms / 100) 2.592 -> 2.594 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.42% +0.46%] index_select perm : Elapsed 0.026 ms (2.592 ms / 100) 2.590 -> 2.595 ( +0.19%) [ +0.00% +0.12% +0.00% / +0.19% +0.19% +0.31%] index_select perm_sorted : Elapsed 0.026 ms (2.590 ms / 100) B = [20, 4, 5, 40] (stride (20, 5, 1, 400)) A = [20, 16, 5, 40] (stride (40, 800, 12800, 1)) dim = 1 2.388 -> 2.389 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.38% +0.25%] index_select const : Elapsed 0.024 ms (2.388 ms / 100) 2.442 -> 2.445 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +0.12% +0.16%] index_select wrap : Elapsed 0.024 ms (2.444 ms / 100) 2.445 -> 2.448 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.37% +0.45%] index_select linear : Elapsed 0.024 ms (2.448 ms / 100) 2.443 -> 2.446 ( +0.12%) [ +0.08% +0.00% +0.12% / +0.12% +0.45% +0.49%] index_select reverse : Elapsed 0.024 ms (2.445 ms / 100) 2.393 -> 2.393 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.00% +0.13% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.393 ms / 100) 2.392 -> 2.397 ( +0.21%) [ +0.17% +0.08% +0.00% / +0.25% +0.21% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.396 ms / 100) 2.442 -> 2.442 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.12% +0.33%] index_select spread : Elapsed 0.024 ms (2.443 ms / 100) 2.439 -> 2.439 ( +0.00%) [ +0.00% +0.00% +0.25% / +0.00% +0.29% +0.33%] index_select strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.439 -> 2.442 ( +0.12%) [ +0.21% +0.00% +0.25% / +0.12% +0.25% +0.41%] index_select strided 5 : Elapsed 0.024 ms (2.444 ms / 100) 2.440 -> 2.445 ( +0.20%) [ +0.20% +0.00% +0.20% / +0.20% +0.20% +0.53%] index_select strided 7 : Elapsed 0.024 ms (2.445 ms / 100) 2.403 -> 2.403 ( +0.00%) [ +0.04% +0.21% +0.00% / +0.00% +0.29% +0.42%] index_select strided 8 : Elapsed 0.024 ms (2.404 ms / 100) 2.422 -> 2.421 ( -0.04%) [ +0.08% +0.12% +0.00% / -0.04% +0.12% +0.21%] index_select random : Elapsed 0.024 ms (2.424 ms / 100) 2.419 -> 2.426 ( +0.29%) [ +0.12% +0.25% +0.00% / +0.29% +0.29% +0.41%] index_select random_sorted : Elapsed 0.024 ms (2.422 ms / 100) 2.443 -> 2.444 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.12% +0.25% +0.04%] index_select perm : Elapsed 0.024 ms (2.443 ms / 100) 2.438 -> 2.441 ( +0.12%) [ +0.16% +0.00% +0.21% / +0.12% +0.25% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.442 ms / 100) B = [20, 4, 5, 40] (stride (1, 100, 20, 400)) A = [20, 16, 5, 40] (stride (3200, 200, 1, 5)) dim = 1 2.405 -> 2.408 ( +0.12%) [ +0.08% +0.17% +0.00% / +0.17% +0.12% +0.12%] index_select const : Elapsed 0.024 ms (2.407 ms / 100) 2.420 -> 2.424 ( +0.17%) [ +0.21% +0.04% +0.00% / +0.21% +0.33% +0.17%] index_select wrap : Elapsed 0.024 ms (2.425 ms / 100) 2.418 -> 2.420 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.21% +0.25% +0.08%] index_select linear : Elapsed 0.024 ms (2.421 ms / 100) 2.420 -> 2.420 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.12% +0.12%] index_select reverse : Elapsed 0.024 ms (2.424 ms / 100) 2.400 -> 2.408 ( +0.33%) [ +0.00% +0.33% +0.42% / +0.33% +0.50% +0.58%] index_select skip64 : Elapsed 0.024 ms (2.400 ms / 100) 2.405 -> 2.406 ( +0.04%) [ +0.12% +0.00% +0.12% / +0.04% +0.17% +0.17%] index_select skip256 : Elapsed 0.024 ms (2.408 ms / 100) 2.419 -> 2.420 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.25% +0.21%] index_select spread : Elapsed 0.024 ms (2.420 ms / 100) 2.418 -> 2.422 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.17% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.418 ms / 100) 2.417 -> 2.421 ( +0.17%) [ +0.00% +0.04% +0.08% / +0.17% +0.46% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.417 ms / 100) 2.419 -> 2.418 ( -0.04%) [ +0.25% +0.00% +0.04% / -0.04% +0.37% +0.37%] index_select strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.411 -> 2.412 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.37% +0.33%] index_select strided 8 : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.415 ( +0.21%) [ +0.25% +0.17% +0.00% / +0.21% +0.62% +0.58%] index_select random : Elapsed 0.024 ms (2.416 ms / 100) 2.418 -> 2.420 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.25% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.419 ms / 100) 2.415 -> 2.421 ( +0.25%) [ +0.29% +0.00% +0.12% / +0.25% +0.50% +0.37%] index_select perm : Elapsed 0.024 ms (2.422 ms / 100) 2.412 -> 2.417 ( +0.21%) [ +0.00% +0.08% +0.21% / +0.21% +0.29% +0.41%] index_select perm_sorted : Elapsed 0.024 ms (2.412 ms / 100) B = [20, 4, 5, 40] (stride (4, 1, 80, 400)) dim = 1 fill_cnt = 16 2.804 -> 2.802 ( -0.07%) [ +0.11% +0.00% +0.14% / +0.21% -0.07% -0.07%] index_fill_ const : Elapsed 0.028 ms (2.807 ms / 100) 2.781 -> 2.777 ( -0.14%) [ +0.00% +0.18% +0.07% / +0.07% -0.04% -0.14%] index_fill_ linear : Elapsed 0.028 ms (2.781 ms / 100) 2.783 -> 2.774 ( -0.32%) [ +0.00% +0.14% +0.04% / +0.04% -0.32% -0.25%] index_fill_ reverse : Elapsed 0.028 ms (2.783 ms / 100) 2.803 -> 2.796 ( -0.25%) [ +0.21% +0.11% +0.00% / -0.14% -0.21% -0.25%] index_fill_ skip64 : Elapsed 0.028 ms (2.809 ms / 100) 2.806 -> 2.803 ( -0.11%) [ +0.04% +0.00% +0.11% / -0.11% -0.04% -0.07%] index_fill_ skip256 : Elapsed 0.028 ms (2.807 ms / 100) 2.781 -> 2.776 ( -0.18%) [ +0.04% +0.11% +0.00% / +0.07% -0.18% -0.07%] index_fill_ spread : Elapsed 0.028 ms (2.782 ms / 100) 2.779 -> 2.777 ( -0.07%) [ +0.00% +0.00% +0.04% / +0.18% -0.07% +0.07%] index_fill_ strided 3 : Elapsed 0.028 ms (2.779 ms / 100) 2.783 -> 2.775 ( -0.29%) [ +0.14% +0.00% +0.00% / +0.11% -0.22% -0.29%] index_fill_ random : Elapsed 0.028 ms (2.787 ms / 100) 2.783 -> 2.769 ( -0.50%) [ +0.22% +0.00% +0.22% / +0.04% -0.50% -0.29%] index_fill_ random_sorted : Elapsed 0.028 ms (2.789 ms / 100) B = [20, 4, 5, 40] (stride (4, 1, 80, 400)) A = [20, 16, 5, 40] (stride (80, 5, 1, 1600)) dim = 1 2.573 -> 2.576 ( +0.12%) [ +0.00% +0.00% +0.08% / +0.12% +0.43% +0.31%] index_select const : Elapsed 0.026 ms (2.573 ms / 100) 2.589 -> 2.590 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.31% +0.19%] index_select wrap : Elapsed 0.026 ms (2.590 ms / 100) 2.584 -> 2.584 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.27% +0.27%] index_select linear : Elapsed 0.026 ms (2.584 ms / 100) 2.595 -> 2.594 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.42% +0.50%] index_select reverse : Elapsed 0.026 ms (2.597 ms / 100) 2.571 -> 2.576 ( +0.19%) [ +0.12% +0.00% +0.00% / +0.19% +0.43% +0.51%] index_select skip64 : Elapsed 0.026 ms (2.574 ms / 100) 2.571 -> 2.572 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.04% +0.58% +0.51%] index_select skip256 : Elapsed 0.026 ms (2.574 ms / 100) 2.613 -> 2.617 ( +0.15%) [ +0.00% +0.23% +0.11% / +0.15% +0.50% +0.54%] index_select spread : Elapsed 0.026 ms (2.613 ms / 100) 2.620 -> 2.628 ( +0.31%) [ +0.11% +0.11% +0.00% / +0.31% +0.50% +0.53%] index_select strided 3 : Elapsed 0.026 ms (2.623 ms / 100) 2.594 -> 2.603 ( +0.35%) [ +0.00% +0.12% +0.04% / +0.35% +0.42% +0.54%] index_select strided 5 : Elapsed 0.026 ms (2.594 ms / 100) 2.597 -> 2.599 ( +0.08%) [ +0.15% +0.04% +0.00% / +0.08% +0.39% +0.39%] index_select strided 7 : Elapsed 0.026 ms (2.601 ms / 100) 2.571 -> 2.574 ( +0.12%) [ +0.16% +0.16% +0.00% / +0.12% +0.43% +0.43%] index_select strided 8 : Elapsed 0.026 ms (2.575 ms / 100) 2.599 -> 2.601 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.31% +0.46%] index_select random : Elapsed 0.026 ms (2.599 ms / 100) 2.598 -> 2.600 ( +0.08%) [ +0.00% +0.23% +0.00% / +0.08% +0.31% +0.23%] index_select random_sorted : Elapsed 0.026 ms (2.598 ms / 100) 2.609 -> 2.613 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.23% +0.23%] index_select perm : Elapsed 0.026 ms (2.611 ms / 100) 2.604 -> 2.607 ( +0.12%) [ +0.00% +0.04% +0.04% / +0.12% +0.50% +0.42%] index_select perm_sorted : Elapsed 0.026 ms (2.604 ms / 100) out_shape = [20, 16, 4, 40] in_shape = [20, 16, 5, 40] idx_dim = 2 B = [20, 16, 4, 40] (stride (160, 3200, 40, 1)) A = [20, 16, 5, 40] (stride (1, 20, 12800, 320)) dim = 2 5.682 -> 5.684 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.26% +0.19%] index_select const : Elapsed 0.057 ms (5.682 ms / 100) 5.754 -> 5.750 ( -0.07%) [ +0.05% +0.00% +0.09% / +0.16% -0.07% -0.05%] index_select wrap : Elapsed 0.058 ms (5.757 ms / 100) 5.748 -> 5.742 ( -0.10%) [ +0.21% +0.09% +0.00% / +0.17% +0.05% -0.10%] index_select linear : Elapsed 0.058 ms (5.760 ms / 100) 5.744 -> 5.749 ( +0.09%) [ +0.00% +0.05% +0.03% / +0.14% +0.10% +0.09%] index_select reverse : Elapsed 0.057 ms (5.744 ms / 100) 5.679 -> 5.686 ( +0.12%) [ +0.00% +0.00% +0.11% / +0.12% +0.25% +0.21%] index_select skip64 : Elapsed 0.057 ms (5.679 ms / 100) 5.681 -> 5.682 ( +0.02%) [ +0.02% +0.00% +0.11% / +0.02% +0.23% +0.18%] index_select skip256 : Elapsed 0.057 ms (5.682 ms / 100) 5.753 -> 5.746 ( -0.12%) [ +0.00% +0.10% +0.14% / +0.05% -0.07% -0.12%] index_select spread : Elapsed 0.058 ms (5.753 ms / 100) 5.747 -> 5.748 ( +0.02%) [ +0.00% +0.02% +0.12% / +0.02% +0.07% +0.02%] index_select strided 3 : Elapsed 0.057 ms (5.747 ms / 100) 5.701 -> 5.706 ( +0.09%) [ +0.00% +0.07% +0.07% / +0.09% +0.32% +0.26%] index_select random : Elapsed 0.057 ms (5.701 ms / 100) 5.693 -> 5.699 ( +0.11%) [ +0.07% +0.00% +0.09% / +0.11% +0.39% +0.37%] index_select random_sorted : Elapsed 0.057 ms (5.697 ms / 100) 5.744 -> 5.751 ( +0.12%) [ +0.14% +0.16% +0.00% / +0.14% +0.12% +0.24%] index_select perm : Elapsed 0.058 ms (5.752 ms / 100) 5.745 -> 5.739 ( -0.10%) [ +0.10% +0.00% +0.14% / +0.19% -0.10% -0.07%] index_select perm_sorted : Elapsed 0.058 ms (5.751 ms / 100) B = [20, 16, 4, 40] (stride (160, 3200, 1, 4)) A = [20, 16, 5, 40] (stride (640, 1, 12800, 16)) dim = 2 5.792 -> 5.786 ( -0.10%) [ +0.07% +0.00% +0.24% / +0.33% -0.10% -0.02%] index_select const : Elapsed 0.058 ms (5.796 ms / 100) 5.829 -> 5.807 ( -0.38%) [ +0.00% +0.03% +0.14% / +0.09% -0.19% -0.38%] index_select wrap : Elapsed 0.058 ms (5.829 ms / 100) 5.831 -> 5.807 ( -0.41%) [ +0.09% +0.00% +0.03% / +0.10% -0.41% -0.17%] index_select linear : Elapsed 0.058 ms (5.836 ms / 100) 5.830 -> 5.816 ( -0.24%) [ +0.00% +0.02% +0.05% / +0.14% -0.22% -0.24%] index_select reverse : Elapsed 0.058 ms (5.830 ms / 100) 5.794 -> 5.782 ( -0.21%) [ +0.14% +0.00% +0.09% / +0.16% -0.21% -0.03%] index_select skip64 : Elapsed 0.058 ms (5.802 ms / 100) 5.794 -> 5.787 ( -0.12%) [ +0.07% +0.05% +0.00% / +0.09% -0.07% -0.12%] index_select skip256 : Elapsed 0.058 ms (5.798 ms / 100) 5.830 -> 5.813 ( -0.29%) [ +0.00% +0.02% +0.14% / +0.15% -0.29% -0.24%] index_select spread : Elapsed 0.058 ms (5.830 ms / 100) 5.834 -> 5.813 ( -0.36%) [ +0.07% +0.03% +0.00% / +0.03% -0.36% -0.19%] index_select strided 3 : Elapsed 0.058 ms (5.838 ms / 100) 5.830 -> 5.810 ( -0.34%) [ +0.00% +0.05% +0.02% / +0.15% -0.34% -0.33%] index_select random : Elapsed 0.058 ms (5.830 ms / 100) 5.821 -> 5.808 ( -0.22%) [ +0.00% +0.09% +0.09% / +0.03% -0.22% -0.10%] index_select random_sorted : Elapsed 0.058 ms (5.821 ms / 100) 5.830 -> 5.812 ( -0.31%) [ +0.00% +0.03% +0.17% / +0.02% -0.03% -0.31%] index_select perm : Elapsed 0.058 ms (5.830 ms / 100) 5.830 -> 5.821 ( -0.15%) [ +0.12% +0.00% +0.05% / +0.10% -0.09% -0.15%] index_select perm_sorted : Elapsed 0.058 ms (5.837 ms / 100) B = [20, 16, 4, 40] (stride (40, 3200, 800, 1)) A = [20, 16, 5, 40] (stride (200, 4000, 40, 1)) dim = 2 5.688 -> 5.694 ( +0.11%) [ +0.09% +0.00% +0.05% / +0.19% +0.11% +0.25%] index_select const : Elapsed 0.057 ms (5.693 ms / 100) 5.780 -> 5.781 ( +0.02%) [ +0.02% +0.00% +0.09% / +0.17% +0.02% +0.10%] index_select wrap : Elapsed 0.058 ms (5.781 ms / 100) 5.779 -> 5.782 ( +0.05%) [ +0.09% +0.00% +0.10% / +0.05% +0.09% +0.07%] index_select linear : Elapsed 0.058 ms (5.784 ms / 100) 5.777 -> 5.771 ( -0.10%) [ +0.00% +0.00% +0.19% / +0.14% +0.05% -0.10%] index_select reverse : Elapsed 0.058 ms (5.777 ms / 100) 5.678 -> 5.681 ( +0.05%) [ +0.00% +0.07% +0.25% / +0.05% +0.12% +0.05%] index_select skip64 : Elapsed 0.057 ms (5.678 ms / 100) 5.683 -> 5.688 ( +0.09%) [ +0.00% +0.07% +0.21% / +0.11% +0.14% +0.09%] index_select skip256 : Elapsed 0.057 ms (5.683 ms / 100) 5.785 -> 5.790 ( +0.09%) [ +0.00% +0.05% +0.17% / +0.12% +0.09% +0.10%] index_select spread : Elapsed 0.058 ms (5.785 ms / 100) 5.780 -> 5.778 ( -0.03%) [ +0.02% +0.00% +0.10% / +0.00% +0.12% -0.03%] index_select strided 3 : Elapsed 0.058 ms (5.781 ms / 100) 5.718 -> 5.724 ( +0.10%) [ +0.24% +0.00% +0.19% / +0.38% +0.16% +0.10%] index_select random : Elapsed 0.057 ms (5.732 ms / 100) 5.721 -> 5.717 ( -0.07%) [ +0.03% +0.00% +0.16% / +0.03% +0.00% -0.07%] index_select random_sorted : Elapsed 0.057 ms (5.723 ms / 100) 5.779 -> 5.786 ( +0.12%) [ +0.00% +0.03% +0.09% / +0.22% +0.12% +0.17%] index_select perm : Elapsed 0.058 ms (5.779 ms / 100) 5.773 -> 5.779 ( +0.10%) [ +0.19% +0.00% +0.07% / +0.10% +0.19% +0.23%] index_select perm_sorted : Elapsed 0.058 ms (5.784 ms / 100) B = [20, 16, 4, 40] (stride (640, 1, 12800, 16)) A = [20, 16, 5, 40] (stride (200, 4000, 1, 5)) dim = 2 5.912 -> 5.905 ( -0.12%) [ +0.10% +0.05% +0.00% / +0.19% -0.12% -0.12%] index_select const : Elapsed 0.059 ms (5.918 ms / 100) 5.915 -> 5.905 ( -0.17%) [ +0.12% +0.00% +0.19% / +0.03% -0.14% -0.17%] index_select wrap : Elapsed 0.059 ms (5.922 ms / 100) 5.913 -> 5.900 ( -0.22%) [ +0.10% +0.00% +0.19% / +0.10% -0.22% -0.08%] index_select linear : Elapsed 0.059 ms (5.919 ms / 100) 5.915 -> 5.899 ( -0.27%) [ +0.00% +0.02% +0.15% / +0.10% -0.19% -0.27%] index_select reverse : Elapsed 0.059 ms (5.915 ms / 100) 5.917 -> 5.904 ( -0.22%) [ +0.00% +0.03% +0.15% / +0.19% -0.22% -0.20%] index_select skip64 : Elapsed 0.059 ms (5.917 ms / 100) 5.914 -> 5.899 ( -0.25%) [ +0.00% +0.07% +0.07% / +0.32% -0.20% -0.25%] index_select skip256 : Elapsed 0.059 ms (5.914 ms / 100) 5.916 -> 5.901 ( -0.25%) [ +0.02% +0.05% +0.00% / +0.14% -0.14% -0.25%] index_select spread : Elapsed 0.059 ms (5.917 ms / 100) 5.916 -> 5.902 ( -0.24%) [ +0.00% +0.00% +0.10% / +0.05% -0.05% -0.24%] index_select strided 3 : Elapsed 0.059 ms (5.916 ms / 100) 5.911 -> 5.903 ( -0.14%) [ +0.08% +0.00% +0.17% / +0.14% -0.10% -0.14%] index_select random : Elapsed 0.059 ms (5.916 ms / 100) 5.914 -> 5.903 ( -0.19%) [ +0.00% +0.05% +0.12% / +0.10% -0.17% -0.19%] index_select random_sorted : Elapsed 0.059 ms (5.914 ms / 100) 5.913 -> 5.899 ( -0.24%) [ +0.07% +0.00% +0.08% / +0.19% -0.17% -0.24%] index_select perm : Elapsed 0.059 ms (5.917 ms / 100) 5.916 -> 5.902 ( -0.24%) [ +0.00% +0.02% +0.07% / +0.02% -0.24% -0.14%] index_select perm_sorted : Elapsed 0.059 ms (5.916 ms / 100) B = [20, 16, 4, 40] (stride (640, 1, 12800, 16)) A = [20, 16, 5, 40] (stride (5, 100, 1, 1600)) dim = 2 5.994 -> 5.992 ( -0.03%) [ +0.00% +0.05% +0.13% / +0.03% +0.02% -0.03%] index_select const : Elapsed 0.060 ms (5.994 ms / 100) 5.993 -> 5.989 ( -0.07%) [ +0.00% +0.10% +0.18% / +0.12% -0.07% -0.05%] index_select wrap : Elapsed 0.060 ms (5.993 ms / 100) 5.989 -> 5.995 ( +0.10%) [ +0.17% +0.25% +0.00% / +0.13% +0.10% +0.20%] index_select linear : Elapsed 0.060 ms (5.999 ms / 100) 5.991 -> 5.991 ( +0.00%) [ +0.13% +0.00% +0.12% / +0.07% +0.07% +0.00%] index_select reverse : Elapsed 0.060 ms (5.999 ms / 100) 5.995 -> 5.992 ( -0.05%) [ +0.05% +0.08% +0.00% / +0.15% -0.05% +0.03%] index_select skip64 : Elapsed 0.060 ms (5.998 ms / 100) 5.991 -> 5.995 ( +0.07%) [ +0.12% +0.00% +0.20% / +0.17% +0.07% +0.10%] index_select skip256 : Elapsed 0.060 ms (5.998 ms / 100) 5.995 -> 5.996 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.10% +0.07% +0.02%] index_select spread : Elapsed 0.060 ms (5.996 ms / 100) 5.991 -> 5.996 ( +0.08%) [ +0.15% +0.00% +0.17% / +0.15% +0.08% +0.08%] index_select strided 3 : Elapsed 0.060 ms (6.000 ms / 100) 5.990 -> 5.990 ( +0.00%) [ +0.00% +0.05% +0.15% / +0.10% +0.03% +0.00%] index_select random : Elapsed 0.060 ms (5.990 ms / 100) 5.993 -> 5.989 ( -0.07%) [ +0.10% +0.17% +0.00% / +0.07% -0.07% +0.07%] index_select random_sorted : Elapsed 0.060 ms (5.999 ms / 100) 5.988 -> 5.986 ( -0.03%) [ +0.00% +0.10% +0.08% / +0.22% -0.03% +0.17%] index_select perm : Elapsed 0.060 ms (5.988 ms / 100) 5.991 -> 5.991 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.15% +0.00% +0.08%] index_select perm_sorted : Elapsed 0.060 ms (5.994 ms / 100) B = [20, 16, 4, 40] (stride (16, 1, 12800, 320)) A = [20, 16, 5, 40] (stride (200, 4000, 1, 5)) dim = 2 5.675 -> 5.669 ( -0.11%) [ +0.04% +0.00% +0.12% / +0.04% -0.05% -0.11%] index_select const : Elapsed 0.057 ms (5.677 ms / 100) 5.669 -> 5.666 ( -0.05%) [ +0.11% +0.00% +0.07% / +0.00% -0.05% -0.02%] index_select wrap : Elapsed 0.057 ms (5.675 ms / 100) 5.670 -> 5.667 ( -0.05%) [ +0.05% +0.00% +0.09% / +0.00% -0.05% +0.04%] index_select linear : Elapsed 0.057 ms (5.673 ms / 100) 5.676 -> 5.668 ( -0.14%) [ +0.07% +0.00% +0.05% / +0.02% -0.14% -0.12%] index_select reverse : Elapsed 0.057 ms (5.680 ms / 100) 5.672 -> 5.664 ( -0.14%) [ +0.11% +0.04% +0.00% / +0.18% +0.00% -0.14%] index_select skip64 : Elapsed 0.057 ms (5.678 ms / 100) 5.677 -> 5.670 ( -0.12%) [ +0.00% +0.00% +0.09% / +0.00% -0.11% -0.12%] index_select skip256 : Elapsed 0.057 ms (5.677 ms / 100) 5.668 -> 5.666 ( -0.04%) [ +0.12% +0.00% +0.09% / +0.12% -0.04% +0.00%] index_select spread : Elapsed 0.057 ms (5.675 ms / 100) 5.671 -> 5.666 ( -0.09%) [ +0.02% +0.00% +0.05% / +0.07% -0.09% -0.05%] index_select strided 3 : Elapsed 0.057 ms (5.672 ms / 100) 5.667 -> 5.665 ( -0.04%) [ +0.04% +0.00% +0.23% / +0.14% -0.04% +0.02%] index_select random : Elapsed 0.057 ms (5.669 ms / 100) 5.674 -> 5.663 ( -0.19%) [ +0.07% +0.00% +0.05% / +0.07% -0.19% -0.18%] index_select random_sorted : Elapsed 0.057 ms (5.678 ms / 100) 5.676 -> 5.670 ( -0.11%) [ +0.07% +0.00% +0.04% / +0.14% -0.09% -0.11%] index_select perm : Elapsed 0.057 ms (5.680 ms / 100) 5.667 -> 5.666 ( -0.02%) [ +0.18% +0.00% +0.16% / +0.21% -0.02% +0.04%] index_select perm_sorted : Elapsed 0.057 ms (5.677 ms / 100) B = [20, 16, 4, 40] (stride (1, 20, 12800, 320)) A = [20, 16, 5, 40] (stride (5, 4000, 1, 100)) dim = 2 5.976 -> 5.970 ( -0.10%) [ +0.00% +0.10% +0.07% / +0.08% -0.10% -0.05%] index_select const : Elapsed 0.060 ms (5.976 ms / 100) 5.971 -> 5.978 ( +0.12%) [ +0.13% +0.00% +0.17% / +0.15% +0.13% +0.12%] index_select wrap : Elapsed 0.060 ms (5.979 ms / 100) 5.977 -> 5.975 ( -0.03%) [ +0.00% +0.05% +0.12% / +0.12% -0.03% -0.02%] index_select linear : Elapsed 0.060 ms (5.977 ms / 100) 5.976 -> 5.972 ( -0.07%) [ +0.05% +0.00% +0.05% / +0.17% -0.05% -0.07%] index_select reverse : Elapsed 0.060 ms (5.979 ms / 100) 5.978 -> 5.973 ( -0.08%) [ +0.00% +0.02% +0.02% / +0.10% -0.08% -0.02%] index_select skip64 : Elapsed 0.060 ms (5.978 ms / 100) 5.974 -> 5.974 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.02% +0.05%] index_select skip256 : Elapsed 0.060 ms (5.977 ms / 100) 5.979 -> 5.973 ( -0.10%) [ +0.02% +0.00% +0.03% / +0.10% -0.10% -0.03%] index_select spread : Elapsed 0.060 ms (5.980 ms / 100) 5.979 -> 5.973 ( -0.10%) [ +0.00% +0.00% +0.12% / -0.10% -0.07% -0.02%] index_select strided 3 : Elapsed 0.060 ms (5.979 ms / 100) 5.972 -> 5.973 ( +0.02%) [ +0.12% +0.00% +0.00% / +0.12% +0.03% +0.02%] index_select random : Elapsed 0.060 ms (5.979 ms / 100) 5.975 -> 5.972 ( -0.05%) [ +0.00% +0.03% +0.05% / +0.03% +0.03% -0.05%] index_select random_sorted : Elapsed 0.060 ms (5.975 ms / 100) 5.976 -> 5.976 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.00% +0.00% +0.08%] index_select perm : Elapsed 0.060 ms (5.977 ms / 100) 5.978 -> 5.975 ( -0.05%) [ +0.00% +0.03% +0.10% / +0.02% -0.05% -0.05%] index_select perm_sorted : Elapsed 0.060 ms (5.978 ms / 100) B = [20, 16, 4, 40] (stride (64, 1, 16, 1280)) A = [20, 16, 5, 40] (stride (1, 20, 12800, 320)) dim = 2 5.741 -> 5.735 ( -0.10%) [ +0.07% +0.00% +0.14% / +0.26% -0.10% +0.05%] index_select const : Elapsed 0.057 ms (5.745 ms / 100) 5.788 -> 5.784 ( -0.07%) [ +0.00% +0.07% +0.29% / +0.12% -0.05% -0.07%] index_select wrap : Elapsed 0.058 ms (5.788 ms / 100) 5.790 -> 5.782 ( -0.14%) [ +0.05% +0.09% +0.00% / +0.16% -0.14% +0.00%] index_select linear : Elapsed 0.058 ms (5.793 ms / 100) 5.786 -> 5.780 ( -0.10%) [ +0.00% +0.14% +0.09% / +0.00% +0.00% -0.10%] index_select reverse : Elapsed 0.058 ms (5.786 ms / 100) 5.745 -> 5.736 ( -0.16%) [ +0.00% +0.03% +0.12% / +0.19% -0.05% -0.16%] index_select skip64 : Elapsed 0.057 ms (5.745 ms / 100) 5.748 -> 5.735 ( -0.23%) [ +0.00% +0.02% +0.02% / +0.00% -0.23% -0.14%] index_select skip256 : Elapsed 0.057 ms (5.748 ms / 100) 5.785 -> 5.782 ( -0.05%) [ +0.17% +0.00% +0.21% / +0.17% -0.05% +0.02%] index_select spread : Elapsed 0.058 ms (5.795 ms / 100) 5.796 -> 5.787 ( -0.16%) [ +0.00% +0.03% +0.16% / +0.14% -0.05% -0.16%] index_select strided 3 : Elapsed 0.058 ms (5.796 ms / 100) 5.789 -> 5.783 ( -0.10%) [ +0.09% +0.00% +0.09% / +0.17% -0.10% -0.07%] index_select random : Elapsed 0.058 ms (5.794 ms / 100) 5.761 -> 5.767 ( +0.10%) [ +0.12% +0.00% +0.07% / +0.10% +0.14% +0.26%] index_select random_sorted : Elapsed 0.058 ms (5.768 ms / 100) 5.789 -> 5.788 ( -0.02%) [ +0.00% +0.09% +0.10% / +0.09% +0.05% -0.02%] index_select perm : Elapsed 0.058 ms (5.789 ms / 100) 5.779 -> 5.793 ( +0.24%) [ +0.00% +0.21% +0.16% / +0.28% +0.36% +0.24%] index_select perm_sorted : Elapsed 0.058 ms (5.779 ms / 100) B = [20, 16, 4, 40] (stride (4, 80, 1, 1280)) A = [20, 16, 5, 40] (stride (40, 800, 12800, 1)) dim = 2 5.772 -> 5.778 ( +0.10%) [ +0.00% +0.05% +0.05% / +0.10% +0.31% +0.31%] index_select const : Elapsed 0.058 ms (5.772 ms / 100) 5.866 -> 5.868 ( +0.03%) [ +0.07% +0.00% +0.03% / +0.17% +0.12% +0.03%] index_select wrap : Elapsed 0.059 ms (5.870 ms / 100) 5.858 -> 5.855 ( -0.05%) [ +0.02% +0.02% +0.00% / -0.02% -0.05% +0.02%] index_select linear : Elapsed 0.059 ms (5.859 ms / 100) 5.854 -> 5.850 ( -0.07%) [ +0.15% +0.00% +0.12% / +0.07% -0.07% -0.07%] index_select reverse : Elapsed 0.059 ms (5.863 ms / 100) 5.755 -> 5.759 ( +0.07%) [ +0.17% +0.00% +0.14% / +0.07% +0.30% +0.38%] index_select skip64 : Elapsed 0.058 ms (5.765 ms / 100) 5.756 -> 5.764 ( +0.14%) [ +0.00% +0.03% +0.05% / +0.14% +0.33% +0.36%] index_select skip256 : Elapsed 0.058 ms (5.756 ms / 100) 5.870 -> 5.874 ( +0.07%) [ +0.03% +0.03% +0.00% / +0.07% +0.07% +0.10%] index_select spread : Elapsed 0.059 ms (5.872 ms / 100) 5.846 -> 5.849 ( +0.05%) [ +0.00% +0.14% +0.03% / +0.12% +0.05% +0.17%] index_select strided 3 : Elapsed 0.058 ms (5.846 ms / 100) 5.840 -> 5.823 ( -0.29%) [ +0.02% +0.00% +0.12% / +0.10% -0.19% -0.29%] index_select random : Elapsed 0.058 ms (5.841 ms / 100) 5.835 -> 5.826 ( -0.15%) [ +0.21% +0.00% +0.07% / +0.15% -0.10% -0.15%] index_select random_sorted : Elapsed 0.058 ms (5.847 ms / 100) 5.862 -> 5.857 ( -0.09%) [ +0.05% +0.00% +0.15% / +0.07% -0.07% -0.09%] index_select perm : Elapsed 0.059 ms (5.865 ms / 100) 5.858 -> 5.849 ( -0.15%) [ +0.09% +0.00% +0.29% / +0.14% -0.10% -0.15%] index_select perm_sorted : Elapsed 0.059 ms (5.863 ms / 100) B = [20, 16, 4, 40] (stride (1, 20, 320, 1280)) A = [20, 16, 5, 40] (stride (3200, 5, 1, 80)) dim = 2 5.972 -> 5.960 ( -0.20%) [ +0.10% +0.00% +0.40% / +0.30% -0.20% -0.15%] index_select const : Elapsed 0.060 ms (5.978 ms / 100) 5.978 -> 5.961 ( -0.28%) [ +0.12% +0.08% +0.00% / +0.03% -0.23% -0.28%] index_select wrap : Elapsed 0.060 ms (5.985 ms / 100) 5.978 -> 5.960 ( -0.30%) [ +0.05% +0.00% +0.08% / +0.10% -0.22% -0.30%] index_select linear : Elapsed 0.060 ms (5.981 ms / 100) 5.977 -> 5.951 ( -0.44%) [ +0.00% +0.20% +0.10% / +0.15% -0.30% -0.44%] index_select reverse : Elapsed 0.060 ms (5.977 ms / 100) 5.977 -> 5.958 ( -0.32%) [ +0.08% +0.00% +0.20% / +0.05% -0.32% -0.30%] index_select skip64 : Elapsed 0.060 ms (5.982 ms / 100) 5.982 -> 5.960 ( -0.37%) [ +0.00% +0.05% +0.05% / +0.07% -0.37% -0.35%] index_select skip256 : Elapsed 0.060 ms (5.982 ms / 100) 5.975 -> 5.959 ( -0.27%) [ +0.00% +0.08% +0.20% / +0.12% -0.27% -0.07%] index_select spread : Elapsed 0.060 ms (5.975 ms / 100) 5.979 -> 5.960 ( -0.32%) [ +0.05% +0.05% +0.00% / +0.08% -0.18% -0.32%] index_select strided 3 : Elapsed 0.060 ms (5.982 ms / 100) 5.977 -> 5.963 ( -0.23%) [ +0.00% +0.03% +0.13% / +0.12% -0.23% -0.18%] index_select random : Elapsed 0.060 ms (5.977 ms / 100) 5.976 -> 5.959 ( -0.28%) [ +0.00% +0.02% +0.12% / +0.13% -0.28% -0.25%] index_select random_sorted : Elapsed 0.060 ms (5.976 ms / 100) 5.977 -> 5.965 ( -0.20%) [ +0.00% +0.05% +0.12% / +0.28% -0.18% -0.20%] index_select perm : Elapsed 0.060 ms (5.977 ms / 100) 5.978 -> 5.961 ( -0.28%) [ +0.00% +0.12% +0.03% / +0.12% -0.28% -0.23%] index_select perm_sorted : Elapsed 0.060 ms (5.978 ms / 100) out_shape = [20, 16, 5, 4] in_shape = [20, 16, 5, 40] idx_dim = 3 B = [20, 16, 5, 4] (stride (320, 20, 1, 5)) A = [20, 16, 5, 40] (stride (1, 20, 12800, 320)) dim = 3 1.252 -> 1.254 ( +0.16%) [ +0.32% +0.00% +0.08% / +0.16% +0.56% +0.72%] index_select const : Elapsed 0.013 ms (1.256 ms / 100) 1.245 -> 1.247 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.64% +0.48%] index_select wrap : Elapsed 0.012 ms (1.247 ms / 100) 1.245 -> 1.246 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.72% +0.64%] index_select linear : Elapsed 0.012 ms (1.246 ms / 100) 1.246 -> 1.248 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.72% +0.64%] index_select reverse : Elapsed 0.012 ms (1.247 ms / 100) 1.250 -> 1.251 ( +0.08%) [ +0.40% +0.00% +0.16% / +0.08% +0.56% +0.56%] index_select skip64 : Elapsed 0.013 ms (1.255 ms / 100) 1.249 -> 1.250 ( +0.08%) [ +0.24% +0.16% +0.00% / +0.08% +0.96% +1.52%] index_select skip256 : Elapsed 0.013 ms (1.252 ms / 100) 1.240 -> 1.243 ( +0.24%) [ +0.40% +0.00% +0.16% / +0.24% +0.81% +0.65%] index_select spread : Elapsed 0.012 ms (1.245 ms / 100) 1.245 -> 1.245 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.56% +0.40%] index_select strided 3 : Elapsed 0.012 ms (1.246 ms / 100) 1.245 -> 1.246 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.72% +0.48%] index_select strided 5 : Elapsed 0.012 ms (1.247 ms / 100) 1.245 -> 1.245 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.32% +0.48%] index_select strided 7 : Elapsed 0.012 ms (1.246 ms / 100) 1.248 -> 1.249 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.56% +0.56%] index_select strided 8 : Elapsed 0.012 ms (1.250 ms / 100) 1.247 -> 1.248 ( +0.08%) [ +0.24% +0.00% +0.24% / +0.08% +0.64% +0.48%] index_select strided 16 : Elapsed 0.013 ms (1.250 ms / 100) 1.245 -> 1.244 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.24% +0.24%] index_select random : Elapsed 0.012 ms (1.246 ms / 100) 1.245 -> 1.245 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.56% +0.32%] index_select random_sorted : Elapsed 0.012 ms (1.246 ms / 100) 1.245 -> 1.247 ( +0.16%) [ +0.00% +0.08% +0.08% / +0.16% +0.72% +0.64%] index_select perm : Elapsed 0.012 ms (1.245 ms / 100) 1.246 -> 1.248 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.40% +0.56%] index_select perm_sorted : Elapsed 0.012 ms (1.247 ms / 100) B = [20, 16, 5, 4] (stride (320, 1, 64, 16)) dim = 3 fill_cnt = 40 2.698 -> 2.688 ( -0.37%) [ +0.00% +0.11% +0.00% / -0.37% -0.04% +0.30%] index_fill_ const : Elapsed 0.027 ms (2.698 ms / 100) 2.700 -> 2.695 ( -0.19%) [ +0.11% +0.15% +0.00% / -0.19% +0.37% +0.26%] index_fill_ linear : Elapsed 0.027 ms (2.703 ms / 100) 2.699 -> 2.691 ( -0.30%) [ +0.00% +0.22% +0.11% / -0.30% +0.15% +0.15%] index_fill_ reverse : Elapsed 0.027 ms (2.699 ms / 100) 2.701 -> 2.695 ( -0.22%) [ +0.00% +0.00% +0.19% / -0.22% +0.00% +0.00%] index_fill_ skip64 : Elapsed 0.027 ms (2.701 ms / 100) 2.705 -> 2.699 ( -0.22%) [ +0.00% +0.26% +0.15% / -0.22% -0.18% +0.00%] index_fill_ skip256 : Elapsed 0.027 ms (2.705 ms / 100) 2.699 -> 2.693 ( -0.22%) [ +0.04% +0.15% +0.00% / -0.22% +0.15% +0.07%] index_fill_ spread : Elapsed 0.027 ms (2.700 ms / 100) 2.702 -> 2.698 ( -0.15%) [ +0.04% +0.11% +0.00% / -0.15% +0.19% +0.07%] index_fill_ strided 3 : Elapsed 0.027 ms (2.703 ms / 100) 2.707 -> 2.699 ( -0.30%) [ +0.18% +0.18% +0.00% / -0.30% -0.04% +0.00%] index_fill_ random : Elapsed 0.027 ms (2.712 ms / 100) 2.703 -> 2.697 ( -0.22%) [ +0.00% +0.04% +0.11% / -0.22% -0.11% -0.15%] index_fill_ random_sorted : Elapsed 0.027 ms (2.703 ms / 100) B = [20, 16, 5, 4] (stride (1, 80, 1280, 20)) A = [20, 16, 5, 40] (stride (3200, 1, 640, 16)) dim = 3 1.289 -> 1.287 ( -0.16%) [ +0.08% +0.00% +0.00% / -0.16% +0.23% +0.23%] index_select const : Elapsed 0.013 ms (1.290 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select wrap : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.47% +0.47%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.55%] index_select reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.308 -> 1.309 ( +0.08%) [ +0.08% +0.15% +0.00% / +0.08% +0.69% +0.69%] index_select skip64 : Elapsed 0.013 ms (1.309 ms / 100) 1.287 -> 1.286 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.39% +0.39%] index_select skip256 : Elapsed 0.013 ms (1.288 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.63% +0.71%] index_select spread : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.55% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.278 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.47% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.31%] index_select strided 8 : Elapsed 0.013 ms (1.278 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.31% +0.31%] index_select strided 16 : Elapsed 0.013 ms (1.279 ms / 100) 1.278 -> 1.277 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.39% +0.31%] index_select random : Elapsed 0.013 ms (1.278 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.279 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.08% +0.16% / +0.00% +0.31% +0.23%] index_select perm : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.280 ( +0.23%) [ +0.16% +0.08% +0.00% / +0.23% +0.55% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.279 ms / 100) B = [20, 16, 5, 4] (stride (1, 80, 1280, 20)) A = [20, 16, 5, 40] (stride (16, 1, 12800, 320)) dim = 3 1.195 -> 1.195 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.50% +0.59%] index_select const : Elapsed 0.012 ms (1.195 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.67% +0.67%] index_select wrap : Elapsed 0.012 ms (1.196 ms / 100) 1.195 -> 1.195 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.59% +0.75%] index_select linear : Elapsed 0.012 ms (1.197 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.84% +0.75%] index_select reverse : Elapsed 0.012 ms (1.195 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.75% +0.75%] index_select skip64 : Elapsed 0.012 ms (1.195 ms / 100) 1.194 -> 1.196 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.67% +0.67%] index_select skip256 : Elapsed 0.012 ms (1.194 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.59% +0.67%] index_select spread : Elapsed 0.012 ms (1.196 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.17% +0.00% +0.08% / +0.08% +0.67% +0.67%] index_select strided 3 : Elapsed 0.012 ms (1.197 ms / 100) 1.196 -> 1.197 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.67% +0.59%] index_select strided 5 : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.197 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.59% +0.59%] index_select strided 7 : Elapsed 0.012 ms (1.196 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.50%] index_select strided 8 : Elapsed 0.012 ms (1.196 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.42% +0.33%] index_select strided 16 : Elapsed 0.012 ms (1.198 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.17% +0.00% +0.08% / +0.08% +0.42% +0.42%] index_select random : Elapsed 0.012 ms (1.199 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.59% +0.59%] index_select random_sorted : Elapsed 0.012 ms (1.197 ms / 100) 1.196 -> 1.198 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.59% +0.67%] index_select perm : Elapsed 0.012 ms (1.197 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.25% +0.25%] index_select perm_sorted : Elapsed 0.012 ms (1.199 ms / 100) B = [20, 16, 5, 4] (stride (5, 100, 1, 1600)) A = [20, 16, 5, 40] (stride (200, 4000, 1, 5)) dim = 3 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.31%] index_select const : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.47% +0.39%] index_select wrap : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.55% +0.55%] index_select linear : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.47% +0.39%] index_select reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.55% +0.78%] index_select spread : Elapsed 0.013 ms (1.275 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.39%] index_select strided 5 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.31% +0.00% +0.00% / +0.00% +0.47% +0.31%] index_select strided 7 : Elapsed 0.013 ms (1.280 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.31% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_select strided 8 : Elapsed 0.013 ms (1.280 ms / 100) 1.275 -> 1.277 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.47% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.23% +0.31%] index_select random : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.31% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.39% +0.31%] index_select perm : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.276 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.31% +0.39%] index_select perm_sorted : Elapsed 0.013 ms (1.277 ms / 100) out_shape = [4, 16, 40, 5] in_shape = [20, 16, 40, 5] idx_dim = 0 B = [4, 16, 40, 5] (stride (3200, 1, 80, 16)) dim = 0 fill_cnt = 20 1.812 -> 1.793 ( -1.05%) [ +0.00% +0.17% +0.11% / -0.83% -0.88% -1.05%] index_fill_ const : Elapsed 0.018 ms (1.812 ms / 100) 1.819 -> 1.808 ( -0.60%) [ +0.22% +0.00% +0.16% / -0.60% -0.33% -0.60%] index_fill_ linear : Elapsed 0.018 ms (1.823 ms / 100) 1.807 -> 1.796 ( -0.61%) [ +0.06% +0.00% +0.00% / -0.61% -0.33% -0.44%] index_fill_ reverse : Elapsed 0.018 ms (1.808 ms / 100) 1.853 -> 1.836 ( -0.92%) [ +0.05% +0.00% +0.05% / -0.92% -0.49% -0.59%] index_fill_ skip64 : Elapsed 0.019 ms (1.854 ms / 100) 1.854 -> 1.837 ( -0.92%) [ +0.05% +0.16% +0.00% / -0.92% -0.76% -0.81%] index_fill_ skip256 : Elapsed 0.019 ms (1.855 ms / 100) 1.800 -> 1.791 ( -0.50%) [ +0.17% +0.00% +0.11% / -0.33% -0.39% -0.50%] index_fill_ spread : Elapsed 0.018 ms (1.803 ms / 100) 1.807 -> 1.795 ( -0.66%) [ +0.00% +0.17% +0.00% / -0.66% -0.55% -0.33%] index_fill_ strided 3 : Elapsed 0.018 ms (1.807 ms / 100) 1.809 -> 1.798 ( -0.61%) [ +0.11% +0.33% +0.00% / -0.50% -0.61% -0.55%] index_fill_ random : Elapsed 0.018 ms (1.811 ms / 100) 1.811 -> 1.797 ( -0.77%) [ +0.00% +0.17% +0.06% / -0.77% -0.28% -0.66%] index_fill_ random_sorted : Elapsed 0.018 ms (1.811 ms / 100) B = [4, 16, 40, 5] (stride (640, 40, 1, 2560)) A = [20, 16, 40, 5] (stride (3200, 1, 80, 16)) dim = 0 0.763 -> 0.770 ( +0.92%) [ +0.00% +0.39% +0.13% / +0.92% +2.36% +2.62%] index_select const : Elapsed 0.008 ms (0.763 ms / 100) 0.764 -> 0.772 ( +1.05%) [ +0.79% +0.00% +0.52% / +1.18% +1.44% +1.05%] index_select wrap : Elapsed 0.008 ms (0.770 ms / 100) 0.767 -> 0.770 ( +0.39%) [ +0.52% +0.00% +0.78% / +0.39% +2.22% +0.91%] index_select linear : Elapsed 0.008 ms (0.771 ms / 100) 0.771 -> 0.774 ( +0.39%) [ +0.65% +0.26% +0.00% / +0.65% +0.39% +0.52%] index_select reverse : Elapsed 0.008 ms (0.776 ms / 100) 0.771 -> 0.775 ( +0.52%) [ +0.00% +0.13% +0.26% / +0.52% +0.78% +0.52%] index_select skip64 : Elapsed 0.008 ms (0.771 ms / 100) 0.767 -> 0.764 ( -0.39%) [ +0.00% +0.13% +0.00% / -0.39% +1.96% +1.43%] index_select skip256 : Elapsed 0.008 ms (0.767 ms / 100) 0.764 -> 0.766 ( +0.26%) [ +0.39% +0.00% +0.52% / +0.26% +3.93% +2.49%] index_select spread : Elapsed 0.008 ms (0.767 ms / 100) 0.764 -> 0.769 ( +0.65%) [ +0.52% +0.00% +0.13% / +0.65% +1.70% +1.44%] index_select strided 3 : Elapsed 0.008 ms (0.768 ms / 100) 0.765 -> 0.770 ( +0.65%) [ +0.78% +0.00% +0.78% / +0.65% +1.57% +1.05%] index_select strided 5 : Elapsed 0.008 ms (0.771 ms / 100) 0.770 -> 0.773 ( +0.39%) [ +0.65% +0.00% +0.65% / +0.65% +0.39% +0.39%] index_select strided 7 : Elapsed 0.008 ms (0.775 ms / 100) 0.772 -> 0.771 ( -0.13%) [ +0.65% +0.26% +0.00% / +0.00% +0.13% -0.13%] index_select strided 8 : Elapsed 0.008 ms (0.777 ms / 100) 0.769 -> 0.768 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.39% +0.91%] index_select strided 16 : Elapsed 0.008 ms (0.769 ms / 100) 0.773 -> 0.771 ( -0.26%) [ +0.00% +1.03% +0.00% / -0.26% +0.26% +0.13%] index_select random : Elapsed 0.008 ms (0.773 ms / 100) 0.767 -> 0.770 ( +0.39%) [ +0.39% +0.91% +0.00% / +0.39% +0.65% +1.04%] index_select random_sorted : Elapsed 0.008 ms (0.770 ms / 100) 0.767 -> 0.770 ( +0.39%) [ +0.00% +0.39% +0.00% / +0.39% +0.65% +0.78%] index_select perm : Elapsed 0.008 ms (0.767 ms / 100) 0.765 -> 0.767 ( +0.26%) [ +0.00% +0.92% +0.52% / +0.26% +1.05% +0.92%] index_select perm_sorted : Elapsed 0.008 ms (0.765 ms / 100) B = [4, 16, 40, 5] (stride (1, 160, 4, 2560)) A = [20, 16, 40, 5] (stride (80, 1, 1600, 16)) dim = 0 2.001 -> 2.005 ( +0.20%) [ +0.00% +0.05% +0.05% / +0.20% +0.75% +0.90%] index_select const : Elapsed 0.020 ms (2.001 ms / 100) 1.991 -> 1.993 ( +0.10%) [ +0.00% +0.05% +0.25% / +0.10% +0.15% +0.45%] index_select wrap : Elapsed 0.020 ms (1.991 ms / 100) 1.996 -> 1.994 ( -0.10%) [ +0.30% +0.25% +0.00% / -0.10% +0.25% +0.15%] index_select linear : Elapsed 0.020 ms (2.002 ms / 100) 1.997 -> 1.997 ( +0.00%) [ +0.30% +0.00% +0.05% / +0.00% +0.15% +0.30%] index_select reverse : Elapsed 0.020 ms (2.003 ms / 100) 2.003 -> 2.002 ( -0.05%) [ +0.20% +0.05% +0.00% / -0.05% +0.70% +0.50%] index_select skip64 : Elapsed 0.020 ms (2.007 ms / 100) 1.999 -> 2.003 ( +0.20%) [ +0.45% +0.00% +0.30% / +0.20% +0.55% +0.70%] index_select skip256 : Elapsed 0.020 ms (2.008 ms / 100) 1.994 -> 1.998 ( +0.20%) [ +0.00% +0.05% +0.20% / +0.20% +0.70% +0.80%] index_select spread : Elapsed 0.020 ms (1.994 ms / 100) 1.995 -> 1.999 ( +0.20%) [ +0.15% +0.00% +0.15% / +0.20% +0.50% +0.35%] index_select strided 3 : Elapsed 0.020 ms (1.998 ms / 100) 2.009 -> 2.006 ( -0.15%) [ +0.10% +0.00% +0.05% / -0.15% -0.05% +0.20%] index_select strided 5 : Elapsed 0.020 ms (2.011 ms / 100) 2.000 -> 1.994 ( -0.30%) [ +0.15% +0.00% +0.05% / -0.30% +0.25% +0.25%] index_select strided 7 : Elapsed 0.020 ms (2.003 ms / 100) 2.001 -> 2.007 ( +0.30%) [ +0.20% +0.20% +0.00% / +0.30% +0.30% +0.40%] index_select strided 8 : Elapsed 0.020 ms (2.005 ms / 100) 1.997 -> 2.001 ( +0.20%) [ +0.05% +0.00% +0.05% / +0.20% +0.40% +0.30%] index_select strided 16 : Elapsed 0.020 ms (1.998 ms / 100) 1.995 -> 2.001 ( +0.30%) [ +0.05% +0.30% +0.00% / +0.45% +0.55% +0.30%] index_select random : Elapsed 0.020 ms (1.996 ms / 100) 2.004 -> 2.006 ( +0.10%) [ +0.00% +0.05% +0.15% / +0.10% +0.30% +0.35%] index_select random_sorted : Elapsed 0.020 ms (2.004 ms / 100) 1.999 -> 1.999 ( +0.00%) [ +0.00% +0.05% +0.15% / +0.00% +0.50% +0.70%] index_select perm : Elapsed 0.020 ms (1.999 ms / 100) 1.996 -> 1.998 ( +0.10%) [ +0.25% +0.00% +0.10% / +0.10% +0.55% +0.30%] index_select perm_sorted : Elapsed 0.020 ms (2.001 ms / 100) out_shape = [20, 4, 40, 5] in_shape = [20, 16, 40, 5] idx_dim = 1 B = [20, 4, 40, 5] (stride (800, 200, 5, 1)) A = [20, 16, 40, 5] (stride (3200, 40, 1, 640)) dim = 1 2.299 -> 2.298 ( -0.04%) [ +0.00% +0.09% +0.04% / -0.04% +0.22% +0.22%] index_select const : Elapsed 0.023 ms (2.299 ms / 100) 2.346 -> 2.350 ( +0.17%) [ +0.21% +0.13% +0.00% / +0.21% +0.30% +0.17%] index_select wrap : Elapsed 0.024 ms (2.351 ms / 100) 2.348 -> 2.352 ( +0.17%) [ +0.43% +0.13% +0.00% / +0.26% +0.26% +0.17%] index_select linear : Elapsed 0.024 ms (2.358 ms / 100) 2.350 -> 2.356 ( +0.26%) [ +0.38% +0.04% +0.00% / +0.26% +0.30% +0.34%] index_select reverse : Elapsed 0.024 ms (2.359 ms / 100) 2.301 -> 2.307 ( +0.26%) [ +0.17% +0.13% +0.00% / +0.26% +0.65% +0.43%] index_select skip64 : Elapsed 0.023 ms (2.305 ms / 100) 2.299 -> 2.304 ( +0.22%) [ +0.00% +0.17% +0.17% / +0.22% +0.35% +0.39%] index_select skip256 : Elapsed 0.023 ms (2.299 ms / 100) 2.352 -> 2.351 ( -0.04%) [ +0.21% +0.00% +0.04% / -0.04% +0.38% +0.04%] index_select spread : Elapsed 0.024 ms (2.357 ms / 100) 2.352 -> 2.355 ( +0.13%) [ +0.09% +0.00% +0.04% / +0.13% +0.13% +0.13%] index_select strided 3 : Elapsed 0.024 ms (2.354 ms / 100) 2.352 -> 2.357 ( +0.21%) [ +0.00% +0.13% +0.34% / +0.21% +0.30% +0.30%] index_select strided 5 : Elapsed 0.024 ms (2.352 ms / 100) 2.350 -> 2.350 ( +0.00%) [ +0.09% +0.17% +0.00% / +0.00% +0.21% +0.13%] index_select strided 7 : Elapsed 0.024 ms (2.352 ms / 100) 2.308 -> 2.313 ( +0.22%) [ +0.35% +0.00% +0.26% / +0.22% +0.56% +0.65%] index_select strided 8 : Elapsed 0.023 ms (2.316 ms / 100) 2.326 -> 2.334 ( +0.34%) [ +0.00% +0.17% +0.30% / +0.34% +0.47% +0.43%] index_select random : Elapsed 0.023 ms (2.326 ms / 100) 2.327 -> 2.328 ( +0.04%) [ +0.04% +0.00% +0.26% / +0.17% +0.09% +0.04%] index_select random_sorted : Elapsed 0.023 ms (2.328 ms / 100) 2.353 -> 2.356 ( +0.13%) [ +0.17% +0.00% +0.17% / +0.13% +0.25% +0.42%] index_select perm : Elapsed 0.024 ms (2.357 ms / 100) 2.350 -> 2.353 ( +0.13%) [ +0.00% +0.04% +0.09% / +0.13% +0.26% +0.38%] index_select perm_sorted : Elapsed 0.024 ms (2.350 ms / 100) B = [20, 4, 40, 5] (stride (800, 40, 1, 160)) A = [20, 16, 40, 5] (stride (200, 4000, 1, 40)) dim = 1 2.401 -> 2.399 ( -0.08%) [ +0.04% +0.17% +0.00% / -0.08% +0.17% +0.04%] index_select const : Elapsed 0.024 ms (2.402 ms / 100) 2.444 -> 2.447 ( +0.12%) [ +0.16% +0.12% +0.00% / +0.12% +0.20% +0.16%] index_select wrap : Elapsed 0.024 ms (2.448 ms / 100) 2.445 -> 2.442 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.04% -0.12%] index_select linear : Elapsed 0.024 ms (2.445 ms / 100) 2.441 -> 2.439 ( -0.08%) [ +0.00% +0.04% +0.08% / -0.08% -0.08% +0.08%] index_select reverse : Elapsed 0.024 ms (2.441 ms / 100) 2.399 -> 2.405 ( +0.25%) [ +0.29% +0.21% +0.00% / +0.25% +0.33% +0.42%] index_select skip64 : Elapsed 0.024 ms (2.406 ms / 100) 2.398 -> 2.401 ( +0.13%) [ +0.00% +0.08% +0.17% / +0.13% +0.42% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.398 ms / 100) 2.438 -> 2.436 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.41% +0.16%] index_select spread : Elapsed 0.024 ms (2.440 ms / 100) 2.442 -> 2.444 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.12% +0.16%] index_select strided 3 : Elapsed 0.024 ms (2.442 ms / 100) 2.436 -> 2.436 ( +0.00%) [ +0.04% +0.00% +0.12% / +0.00% +0.45% +0.49%] index_select strided 5 : Elapsed 0.024 ms (2.437 ms / 100) 2.440 -> 2.443 ( +0.12%) [ +0.04% +0.25% +0.00% / +0.12% +0.37% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.441 ms / 100) 2.410 -> 2.414 ( +0.17%) [ +0.33% +0.00% +0.25% / +0.17% +0.50% +0.58%] index_select strided 8 : Elapsed 0.024 ms (2.418 ms / 100) 2.435 -> 2.435 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.37% +0.45%] index_select random : Elapsed 0.024 ms (2.439 ms / 100) 2.441 -> 2.439 ( -0.08%) [ +0.00% +0.00% +0.33% / -0.08% +0.25% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.441 ms / 100) 2.440 -> 2.440 ( +0.00%) [ +0.04% +0.00% +0.12% / +0.00% +0.41% +0.08%] index_select perm : Elapsed 0.024 ms (2.441 ms / 100) 2.440 -> 2.438 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.20% +0.16%] index_select perm_sorted : Elapsed 0.024 ms (2.440 ms / 100) B = [20, 4, 40, 5] (stride (1, 4000, 20, 800)) A = [20, 16, 40, 5] (stride (3200, 200, 5, 1)) dim = 1 2.275 -> 2.276 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.44% +0.44%] index_select const : Elapsed 0.023 ms (2.276 ms / 100) 2.329 -> 2.334 ( +0.21%) [ +0.30% +0.00% +0.00% / +0.21% +0.52% +0.64%] index_select wrap : Elapsed 0.023 ms (2.336 ms / 100) 2.330 -> 2.331 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.04% +0.52% +0.52%] index_select linear : Elapsed 0.023 ms (2.333 ms / 100) 2.331 -> 2.329 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.43% +0.34%] index_select reverse : Elapsed 0.023 ms (2.331 ms / 100) 2.269 -> 2.272 ( +0.13%) [ +0.04% +0.00% +0.09% / +0.13% +0.48% +0.62%] index_select skip64 : Elapsed 0.023 ms (2.270 ms / 100) 2.275 -> 2.274 ( -0.04%) [ +0.09% +0.00% +0.09% / -0.04% +0.53% +0.40%] index_select skip256 : Elapsed 0.023 ms (2.277 ms / 100) 2.330 -> 2.333 ( +0.13%) [ +0.00% +0.26% +0.00% / +0.13% +0.39% +0.43%] index_select spread : Elapsed 0.023 ms (2.330 ms / 100) 2.332 -> 2.331 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.30% +0.34%] index_select strided 3 : Elapsed 0.023 ms (2.333 ms / 100) 2.331 -> 2.330 ( -0.04%) [ +0.00% +0.13% +0.04% / -0.04% +0.69% +0.60%] index_select strided 5 : Elapsed 0.023 ms (2.331 ms / 100) 2.329 -> 2.337 ( +0.34%) [ +0.00% +0.13% +0.17% / +0.34% +0.60% +0.60%] index_select strided 7 : Elapsed 0.023 ms (2.329 ms / 100) 2.289 -> 2.289 ( +0.00%) [ +0.31% +0.00% +0.04% / +0.00% +0.57% +0.48%] index_select strided 8 : Elapsed 0.023 ms (2.296 ms / 100) 2.334 -> 2.334 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.34% +0.30%] index_select random : Elapsed 0.023 ms (2.334 ms / 100) 2.328 -> 2.331 ( +0.13%) [ +0.09% +0.00% +0.21% / +0.13% +0.43% +0.60%] index_select random_sorted : Elapsed 0.023 ms (2.330 ms / 100) 2.327 -> 2.330 ( +0.13%) [ +0.00% +0.17% +0.21% / +0.13% +0.47% +0.47%] index_select perm : Elapsed 0.023 ms (2.327 ms / 100) 2.333 -> 2.333 ( +0.00%) [ +0.00% +0.13% +0.04% / +0.00% +0.30% +0.26%] index_select perm_sorted : Elapsed 0.023 ms (2.333 ms / 100) B = [20, 4, 40, 5] (stride (20, 1, 400, 4)) A = [20, 16, 40, 5] (stride (200, 4000, 1, 40)) dim = 1 2.406 -> 2.406 ( +0.00%) [ +0.17% +0.00% +0.08% / +0.00% +0.12% +0.08%] index_select const : Elapsed 0.024 ms (2.410 ms / 100) 2.446 -> 2.445 ( -0.04%) [ +0.04% +0.00% +0.20% / +0.20% -0.04% +0.08%] index_select wrap : Elapsed 0.024 ms (2.447 ms / 100) 2.439 -> 2.444 ( +0.21%) [ +0.21% +0.00% +0.25% / +0.21% +0.21% +0.29%] index_select linear : Elapsed 0.024 ms (2.444 ms / 100) 2.440 -> 2.443 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +0.25% +0.29%] index_select reverse : Elapsed 0.024 ms (2.442 ms / 100) 2.406 -> 2.405 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.25% +0.37%] index_select skip64 : Elapsed 0.024 ms (2.408 ms / 100) 2.405 -> 2.404 ( -0.04%) [ +0.00% +0.17% +0.04% / -0.04% +0.25% +0.17%] index_select skip256 : Elapsed 0.024 ms (2.405 ms / 100) 2.435 -> 2.439 ( +0.16%) [ +0.21% +0.00% +0.04% / +0.16% +0.33% +0.33%] index_select spread : Elapsed 0.024 ms (2.440 ms / 100) 2.439 -> 2.437 ( -0.08%) [ +0.00% +0.12% +0.08% / -0.08% +0.29% +0.16%] index_select strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.440 -> 2.438 ( -0.08%) [ +0.00% +0.04% +0.04% / -0.08% +0.29% +0.20%] index_select strided 5 : Elapsed 0.024 ms (2.440 ms / 100) 2.439 -> 2.445 ( +0.25%) [ +0.04% +0.00% +0.21% / +0.25% +0.45% +0.45%] index_select strided 7 : Elapsed 0.024 ms (2.440 ms / 100) 2.415 -> 2.417 ( +0.08%) [ +0.00% +0.21% +0.12% / +0.08% +0.54% +0.41%] index_select strided 8 : Elapsed 0.024 ms (2.415 ms / 100) 2.440 -> 2.441 ( +0.04%) [ +0.00% +0.08% +0.20% / +0.04% +0.37% +0.45%] index_select random : Elapsed 0.024 ms (2.440 ms / 100) 2.441 -> 2.444 ( +0.12%) [ +0.25% +0.00% +0.16% / +0.12% +0.25% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.447 ms / 100) 2.440 -> 2.440 ( +0.00%) [ +0.00% +0.12% +0.20% / +0.00% +0.16% +0.20%] index_select perm : Elapsed 0.024 ms (2.440 ms / 100) 2.434 -> 2.435 ( +0.04%) [ +0.16% +0.08% +0.00% / +0.04% +0.49% +0.45%] index_select perm_sorted : Elapsed 0.024 ms (2.438 ms / 100) B = [20, 4, 40, 5] (stride (20, 1, 400, 4)) A = [20, 16, 40, 5] (stride (16, 1, 320, 12800)) dim = 1 2.615 -> 2.620 ( +0.19%) [ +0.19% +0.08% +0.00% / +0.19% +0.57% +0.38%] index_select const : Elapsed 0.026 ms (2.620 ms / 100) 2.619 -> 2.621 ( +0.08%) [ +0.11% +0.00% +0.04% / +0.08% +0.11% +0.34%] index_select wrap : Elapsed 0.026 ms (2.622 ms / 100) 2.619 -> 2.620 ( +0.04%) [ +0.00% +0.23% +0.04% / +0.04% +0.31% +0.38%] index_select linear : Elapsed 0.026 ms (2.619 ms / 100) 2.616 -> 2.620 ( +0.15%) [ +0.00% +0.04% +0.19% / +0.15% +0.46% +0.50%] index_select reverse : Elapsed 0.026 ms (2.616 ms / 100) 2.617 -> 2.621 ( +0.15%) [ +0.15% +0.11% +0.00% / +0.15% +0.27% +0.27%] index_select skip64 : Elapsed 0.026 ms (2.621 ms / 100) 2.616 -> 2.615 ( -0.04%) [ +0.15% +0.00% +0.11% / -0.04% +0.57% +0.42%] index_select skip256 : Elapsed 0.026 ms (2.620 ms / 100) 2.647 -> 2.648 ( +0.04%) [ +0.26% +0.08% +0.00% / +0.04% +0.45% +0.49%] index_select spread : Elapsed 0.027 ms (2.654 ms / 100) 2.651 -> 2.654 ( +0.11%) [ +0.00% +0.11% +0.11% / +0.11% +0.30% +0.34%] index_select strided 3 : Elapsed 0.027 ms (2.651 ms / 100) 2.647 -> 2.653 ( +0.23%) [ +0.11% +0.00% +0.08% / +0.23% +0.49% +0.53%] index_select strided 5 : Elapsed 0.026 ms (2.650 ms / 100) 2.649 -> 2.648 ( -0.04%) [ +0.19% +0.08% +0.00% / -0.04% +0.38% +0.30%] index_select strided 7 : Elapsed 0.027 ms (2.654 ms / 100) 2.656 -> 2.659 ( +0.11%) [ +0.08% +0.00% +0.00% / +0.11% +0.34% +0.38%] index_select strided 8 : Elapsed 0.027 ms (2.658 ms / 100) 2.655 -> 2.659 ( +0.15%) [ +0.00% +0.04% +0.15% / +0.15% +0.53% +0.26%] index_select random : Elapsed 0.027 ms (2.655 ms / 100) 2.656 -> 2.660 ( +0.15%) [ +0.19% +0.00% +0.11% / +0.15% +0.53% +0.38%] index_select random_sorted : Elapsed 0.027 ms (2.661 ms / 100) 2.658 -> 2.658 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select perm : Elapsed 0.027 ms (2.659 ms / 100) 2.650 -> 2.647 ( -0.11%) [ +0.04% +0.00% +0.08% / -0.11% +0.38% +0.30%] index_select perm_sorted : Elapsed 0.027 ms (2.651 ms / 100) B = [20, 4, 40, 5] (stride (5, 100, 400, 1)) A = [20, 16, 40, 5] (stride (3200, 1, 16, 640)) dim = 1 2.518 -> 2.523 ( +0.20%) [ +0.24% +0.12% +0.00% / +0.24% +0.36% +0.20%] index_select const : Elapsed 0.025 ms (2.524 ms / 100) 2.517 -> 2.520 ( +0.12%) [ +0.08% +0.00% +0.16% / +0.12% +0.32% +0.40%] index_select wrap : Elapsed 0.025 ms (2.519 ms / 100) 2.513 -> 2.517 ( +0.16%) [ +0.24% +0.04% +0.00% / +0.16% +0.24% +0.16%] index_select linear : Elapsed 0.025 ms (2.519 ms / 100) 2.510 -> 2.514 ( +0.16%) [ +0.00% +0.28% +0.20% / +0.68% +0.44% +0.16%] index_select reverse : Elapsed 0.025 ms (2.510 ms / 100) 2.518 -> 2.513 ( -0.20%) [ +0.20% +0.00% +0.00% / -0.20% +0.24% +0.28%] index_select skip64 : Elapsed 0.025 ms (2.523 ms / 100) 2.520 -> 2.523 ( +0.12%) [ +0.24% +0.00% +0.20% / +0.24% +0.28% +0.12%] index_select skip256 : Elapsed 0.025 ms (2.526 ms / 100) 2.534 -> 2.535 ( +0.04%) [ +0.36% +0.00% +0.51% / +0.04% +0.36% +0.24%] index_select spread : Elapsed 0.025 ms (2.543 ms / 100) 2.537 -> 2.536 ( -0.04%) [ +0.08% +0.00% +0.00% / +0.08% +0.35% -0.04%] index_select strided 3 : Elapsed 0.025 ms (2.539 ms / 100) 2.541 -> 2.544 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.39% +0.47%] index_select strided 5 : Elapsed 0.025 ms (2.541 ms / 100) 2.542 -> 2.544 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.08% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.545 ms / 100) 2.545 -> 2.538 ( -0.28%) [ +0.00% +0.00% +0.08% / -0.28% +0.24% +0.31%] index_select strided 8 : Elapsed 0.025 ms (2.545 ms / 100) 2.532 -> 2.540 ( +0.32%) [ +0.12% +0.20% +0.00% / +0.32% +0.51% +0.47%] index_select random : Elapsed 0.025 ms (2.535 ms / 100) 2.548 -> 2.551 ( +0.12%) [ +0.00% +0.20% +0.08% / +0.20% +0.12% +0.39%] index_select random_sorted : Elapsed 0.025 ms (2.548 ms / 100) 2.552 -> 2.552 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.16% +0.08%] index_select perm : Elapsed 0.026 ms (2.552 ms / 100) 2.537 -> 2.541 ( +0.16%) [ +0.00% +0.16% +0.04% / +0.24% +0.16% +0.32%] index_select perm_sorted : Elapsed 0.025 ms (2.537 ms / 100) B = [20, 4, 40, 5] (stride (5, 100, 400, 1)) A = [20, 16, 40, 5] (stride (1, 4000, 100, 20)) dim = 1 2.348 -> 2.346 ( -0.09%) [ +0.04% +0.26% +0.00% / -0.09% +0.60% +0.51%] index_select const : Elapsed 0.023 ms (2.349 ms / 100) 2.353 -> 2.358 ( +0.21%) [ +0.38% +0.21% +0.00% / +0.21% +0.21% +0.21%] index_select wrap : Elapsed 0.024 ms (2.362 ms / 100) 2.352 -> 2.356 ( +0.17%) [ +0.04% +0.00% +0.00% / +0.17% +0.47% +0.47%] index_select linear : Elapsed 0.024 ms (2.353 ms / 100) 2.368 -> 2.372 ( +0.17%) [ +0.08% +0.04% +0.00% / +0.17% +0.42% +0.21%] index_select reverse : Elapsed 0.024 ms (2.370 ms / 100) 2.362 -> 2.364 ( +0.08%) [ +0.25% +0.34% +0.00% / +0.08% +0.42% +0.51%] index_select skip64 : Elapsed 0.024 ms (2.368 ms / 100) 2.349 -> 2.354 ( +0.21%) [ +0.21% +0.00% +0.26% / +0.21% +0.51% +0.34%] index_select skip256 : Elapsed 0.024 ms (2.354 ms / 100) 2.354 -> 2.360 ( +0.25%) [ +0.08% +0.04% +0.00% / +0.25% +0.30% +0.25%] index_select spread : Elapsed 0.024 ms (2.356 ms / 100) 2.352 -> 2.355 ( +0.13%) [ +0.21% +0.21% +0.00% / +0.13% +0.13% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.357 ms / 100) 2.352 -> 2.358 ( +0.26%) [ +0.26% +0.00% +0.13% / +0.26% +0.38% +0.51%] index_select strided 5 : Elapsed 0.024 ms (2.358 ms / 100) 2.365 -> 2.367 ( +0.08%) [ +0.13% +0.13% +0.00% / +0.08% +0.51% +0.38%] index_select strided 7 : Elapsed 0.024 ms (2.368 ms / 100) 2.346 -> 2.352 ( +0.26%) [ +0.30% +0.13% +0.00% / +0.26% +0.43% +0.43%] index_select strided 8 : Elapsed 0.024 ms (2.353 ms / 100) 2.354 -> 2.357 ( +0.13%) [ +0.00% +0.04% +0.17% / +0.13% +0.55% +0.34%] index_select random : Elapsed 0.024 ms (2.354 ms / 100) 2.354 -> 2.356 ( +0.08%) [ +0.00% +0.34% +0.08% / +0.08% +0.34% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.354 ms / 100) 2.353 -> 2.353 ( +0.00%) [ +0.00% +0.25% +0.04% / +0.00% +0.34% +0.17%] index_select perm : Elapsed 0.024 ms (2.353 ms / 100) 2.354 -> 2.354 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.13% +0.08%] index_select perm_sorted : Elapsed 0.024 ms (2.356 ms / 100) B = [20, 4, 40, 5] (stride (1, 20, 400, 80)) dim = 1 fill_cnt = 16 1.306 -> 1.308 ( +0.15%) [ +0.08% +0.00% +0.08% / +0.15% +0.31% +0.23%] index_fill_ const : Elapsed 0.013 ms (1.307 ms / 100) 1.299 -> 1.299 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.54% +0.62%] index_fill_ linear : Elapsed 0.013 ms (1.300 ms / 100) 1.315 -> 1.294 ( -1.60%) [ +0.00% +0.08% +0.00% / -0.23% -1.60% -1.14%] index_fill_ reverse : Elapsed 0.013 ms (1.315 ms / 100) 1.303 -> 1.288 ( -1.15%) [ +0.00% +0.31% +0.23% / +0.38% -0.84% -1.15%] index_fill_ skip64 : Elapsed 0.013 ms (1.303 ms / 100) 1.311 -> 1.301 ( -0.76%) [ +0.23% +0.00% +0.38% / +0.31% -0.76% -0.53%] index_fill_ skip256 : Elapsed 0.013 ms (1.314 ms / 100) 1.309 -> 1.299 ( -0.76%) [ +0.23% +0.00% +0.23% / +0.23% -0.76% -0.31%] index_fill_ spread : Elapsed 0.013 ms (1.312 ms / 100) 1.302 -> 1.302 ( +0.00%) [ +0.00% +0.08% +0.23% / +0.00% +0.54% +0.54%] index_fill_ strided 3 : Elapsed 0.013 ms (1.302 ms / 100) 1.309 -> 1.310 ( +0.08%) [ +0.00% +0.23% +0.15% / +0.08% +0.61% +0.92%] index_fill_ random : Elapsed 0.013 ms (1.309 ms / 100) 1.308 -> 1.312 ( +0.31%) [ +0.00% +0.00% +0.00% / +0.31% +0.46% +0.46%] index_fill_ random_sorted : Elapsed 0.013 ms (1.308 ms / 100) B = [20, 4, 40, 5] (stride (40, 800, 1, 3200)) A = [20, 16, 40, 5] (stride (40, 4000, 1, 800)) dim = 1 2.390 -> 2.392 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.08% +0.38% +0.54%] index_select const : Elapsed 0.024 ms (2.392 ms / 100) 2.453 -> 2.453 ( +0.00%) [ +0.00% +0.12% +0.24% / +0.00% +0.04% +0.20%] index_select wrap : Elapsed 0.025 ms (2.453 ms / 100) 2.454 -> 2.453 ( -0.04%) [ +0.08% +0.00% +0.04% / -0.04% +0.04% -0.04%] index_select linear : Elapsed 0.025 ms (2.456 ms / 100) 2.446 -> 2.447 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.20% +0.20%] index_select reverse : Elapsed 0.024 ms (2.448 ms / 100) 2.395 -> 2.398 ( +0.13%) [ +0.08% +0.08% +0.00% / +0.13% +0.38% +0.46%] index_select skip64 : Elapsed 0.024 ms (2.397 ms / 100) 2.391 -> 2.392 ( +0.04%) [ +0.17% +0.08% +0.00% / +0.04% +0.42% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.395 ms / 100) 2.443 -> 2.448 ( +0.20%) [ +0.04% +0.00% +0.12% / +0.20% +0.20% +0.37%] index_select spread : Elapsed 0.024 ms (2.444 ms / 100) 2.447 -> 2.440 ( -0.29%) [ +0.00% +0.08% +0.08% / -0.29% +0.16% +0.37%] index_select strided 3 : Elapsed 0.024 ms (2.447 ms / 100) 2.443 -> 2.443 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.53% +0.53%] index_select strided 5 : Elapsed 0.024 ms (2.445 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.04% +0.16% +0.00% / +0.16% +0.25% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.447 ms / 100) 2.409 -> 2.411 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.21% +0.42%] index_select strided 8 : Elapsed 0.024 ms (2.409 ms / 100) 2.436 -> 2.443 ( +0.29%) [ +0.12% +0.00% +0.25% / +0.29% +0.62% +0.53%] index_select random : Elapsed 0.024 ms (2.439 ms / 100) 2.445 -> 2.451 ( +0.25%) [ +0.04% +0.00% +0.04% / +0.25% +0.33% +0.37%] index_select random_sorted : Elapsed 0.024 ms (2.446 ms / 100) 2.445 -> 2.449 ( +0.16%) [ +0.00% +0.04% +0.04% / +0.16% +0.29% +0.25%] index_select perm : Elapsed 0.024 ms (2.445 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.04% +0.29% +0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.449 ms / 100) out_shape = [20, 16, 4, 5] in_shape = [20, 16, 40, 5] idx_dim = 2 B = [20, 16, 4, 5] (stride (320, 5, 80, 1)) A = [20, 16, 40, 5] (stride (3200, 40, 1, 640)) dim = 2 1.365 -> 1.365 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.59% +0.44%] index_select const : Elapsed 0.014 ms (1.366 ms / 100) 1.365 -> 1.367 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.59% +0.59%] index_select wrap : Elapsed 0.014 ms (1.366 ms / 100) 1.362 -> 1.362 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.73% +0.81%] index_select linear : Elapsed 0.014 ms (1.362 ms / 100) 1.363 -> 1.364 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.73% +0.73%] index_select reverse : Elapsed 0.014 ms (1.365 ms / 100) 1.364 -> 1.366 ( +0.15%) [ +0.22% +0.00% +0.29% / +0.15% +0.81% +0.81%] index_select skip64 : Elapsed 0.014 ms (1.367 ms / 100) 1.361 -> 1.362 ( +0.07%) [ +0.22% +0.00% +0.00% / +0.07% +0.81% +0.73%] index_select skip256 : Elapsed 0.014 ms (1.364 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.59% +0.66%] index_select spread : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.44% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.367 ms / 100) 1.364 -> 1.363 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.51% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.364 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.51% +0.44%] index_select strided 7 : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.51% +0.59%] index_select strided 8 : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.369 ( +0.22%) [ +0.15% +0.07% +0.00% / +0.22% +0.37% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.368 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.07% +0.00% +0.29% / +0.00% +0.44% +0.44%] index_select random : Elapsed 0.014 ms (1.370 ms / 100) 1.369 -> 1.370 ( +0.07%) [ +0.07% +0.00% +0.22% / +0.07% +0.51% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.370 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.51% +0.37%] index_select perm : Elapsed 0.014 ms (1.366 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.44% +0.37%] index_select perm_sorted : Elapsed 0.014 ms (1.368 ms / 100) B = [20, 16, 4, 5] (stride (320, 1, 16, 64)) A = [20, 16, 40, 5] (stride (1, 20, 320, 12800)) dim = 2 1.336 -> 1.336 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.45% +0.52%] index_select const : Elapsed 0.013 ms (1.337 ms / 100) 1.334 -> 1.335 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.30% +0.37%] index_select wrap : Elapsed 0.013 ms (1.335 ms / 100) 1.337 -> 1.339 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.37% +0.45%] index_select linear : Elapsed 0.013 ms (1.338 ms / 100) 1.334 -> 1.335 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.60% +0.52%] index_select reverse : Elapsed 0.013 ms (1.334 ms / 100) 1.331 -> 1.332 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.60% +0.53%] index_select skip64 : Elapsed 0.013 ms (1.332 ms / 100) 1.333 -> 1.335 ( +0.15%) [ +0.00% +0.00% +0.15% / +0.15% +0.45% +0.68%] index_select skip256 : Elapsed 0.013 ms (1.333 ms / 100) 1.333 -> 1.333 ( +0.00%) [ +0.00% +0.15% +0.08% / +0.00% +0.45% +0.45%] index_select spread : Elapsed 0.013 ms (1.333 ms / 100) 1.337 -> 1.337 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.52% +0.45%] index_select strided 3 : Elapsed 0.013 ms (1.339 ms / 100) 1.329 -> 1.330 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.53% +0.30%] index_select strided 5 : Elapsed 0.013 ms (1.329 ms / 100) 1.334 -> 1.334 ( +0.00%) [ +0.67% +0.22% +0.00% / +0.00% +0.52% +0.67%] index_select strided 7 : Elapsed 0.013 ms (1.343 ms / 100) 1.334 -> 1.334 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.67% +0.30%] index_select strided 8 : Elapsed 0.013 ms (1.335 ms / 100) 1.333 -> 1.332 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.45% +0.68%] index_select strided 16 : Elapsed 0.013 ms (1.333 ms / 100) 1.333 -> 1.337 ( +0.30%) [ +0.15% +0.15% +0.00% / +0.30% +0.75% +0.68%] index_select random : Elapsed 0.013 ms (1.335 ms / 100) 1.333 -> 1.334 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.53% +0.38%] index_select random_sorted : Elapsed 0.013 ms (1.335 ms / 100) 1.332 -> 1.332 ( +0.00%) [ +0.15% +0.00% +0.08% / +0.00% +0.30% +0.38%] index_select perm : Elapsed 0.013 ms (1.334 ms / 100) 1.328 -> 1.329 ( +0.08%) [ +0.30% +0.30% +0.00% / +0.08% +0.60% +0.68%] index_select perm_sorted : Elapsed 0.013 ms (1.332 ms / 100) B = [20, 16, 4, 5] (stride (1, 400, 100, 20)) A = [20, 16, 40, 5] (stride (40, 800, 1, 12800)) dim = 2 1.371 -> 1.372 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +0.95% +0.66%] index_select const : Elapsed 0.014 ms (1.373 ms / 100) 1.373 -> 1.372 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.58% +0.44%] index_select wrap : Elapsed 0.014 ms (1.374 ms / 100) 1.368 -> 1.367 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.44% +0.37%] index_select linear : Elapsed 0.014 ms (1.369 ms / 100) 1.371 -> 1.371 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.51% +0.22%] index_select reverse : Elapsed 0.014 ms (1.371 ms / 100) 1.372 -> 1.374 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.51% +0.51%] index_select skip64 : Elapsed 0.014 ms (1.373 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.00% +0.15% +0.00% / +0.07% +0.44% +0.51%] index_select skip256 : Elapsed 0.014 ms (1.367 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.29% +0.00% +0.00% / +0.15% +0.51% +0.58%] index_select spread : Elapsed 0.014 ms (1.377 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.44% +0.58%] index_select strided 3 : Elapsed 0.014 ms (1.374 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.58% +0.51%] index_select strided 5 : Elapsed 0.014 ms (1.374 ms / 100) 1.371 -> 1.371 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.51% +0.51%] index_select strided 7 : Elapsed 0.014 ms (1.373 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.80% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.374 ms / 100) 1.373 -> 1.373 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.87% +0.51%] index_select strided 16 : Elapsed 0.014 ms (1.375 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.51% +0.44%] index_select random : Elapsed 0.014 ms (1.373 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.36% +0.07% +0.00% / +0.07% +0.51% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.377 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.44% +0.58%] index_select perm : Elapsed 0.014 ms (1.373 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.66% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.370 ms / 100) B = [20, 16, 4, 5] (stride (4, 400, 1, 80)) A = [20, 16, 40, 5] (stride (1, 100, 1600, 20)) dim = 2 1.316 -> 1.318 ( +0.15%) [ +0.00% +0.23% +0.23% / +0.15% +1.06% +0.30%] index_select const : Elapsed 0.013 ms (1.316 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.46% +0.53%] index_select wrap : Elapsed 0.013 ms (1.319 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.45% +0.38%] index_select linear : Elapsed 0.013 ms (1.323 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.53% +0.53%] index_select reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.53% +0.53%] index_select skip64 : Elapsed 0.013 ms (1.322 ms / 100) 1.317 -> 1.319 ( +0.15%) [ +0.30% +0.00% +0.08% / +0.15% +0.46% +0.46%] index_select skip256 : Elapsed 0.013 ms (1.321 ms / 100) 1.314 -> 1.318 ( +0.30%) [ +0.15% +0.00% +0.00% / +0.38% +0.30% +0.38%] index_select spread : Elapsed 0.013 ms (1.316 ms / 100) 1.322 -> 1.322 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.45% +0.45%] index_select strided 3 : Elapsed 0.013 ms (1.322 ms / 100) 1.319 -> 1.320 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.38% +0.30%] index_select strided 5 : Elapsed 0.013 ms (1.319 ms / 100) 1.314 -> 1.314 ( +0.00%) [ +0.15% +0.38% +0.00% / +0.00% +0.46% +0.30%] index_select strided 7 : Elapsed 0.013 ms (1.316 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.30% +0.00% +0.15% / +0.08% +0.15% +0.15%] index_select strided 8 : Elapsed 0.013 ms (1.322 ms / 100) 1.312 -> 1.318 ( +0.46%) [ +0.30% +0.30% +0.00% / +0.46% +0.53% +0.46%] index_select strided 16 : Elapsed 0.013 ms (1.316 ms / 100) 1.312 -> 1.312 ( +0.00%) [ +0.00% +0.15% +0.08% / +0.00% +0.46% +0.61%] index_select random : Elapsed 0.013 ms (1.312 ms / 100) 1.319 -> 1.320 ( +0.08%) [ +0.15% +0.00% +0.08% / +0.08% +0.30% +0.30%] index_select random_sorted : Elapsed 0.013 ms (1.321 ms / 100) 1.323 -> 1.323 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.30% +0.38%] index_select perm : Elapsed 0.013 ms (1.323 ms / 100) 1.315 -> 1.314 ( -0.08%) [ +0.15% +0.00% +0.00% / -0.08% +0.38% +0.30%] index_select perm_sorted : Elapsed 0.013 ms (1.317 ms / 100) B = [20, 16, 4, 5] (stride (64, 1, 16, 1280)) A = [20, 16, 40, 5] (stride (16, 1, 1600, 320)) dim = 2 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.67% +0.67%] index_select const : Elapsed 0.012 ms (1.194 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.42% +0.42%] index_select wrap : Elapsed 0.012 ms (1.197 ms / 100) 1.197 -> 1.196 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.50% +0.50%] index_select linear : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.50%] index_select reverse : Elapsed 0.012 ms (1.196 ms / 100) 1.194 -> 1.197 ( +0.25%) [ +0.08% +0.00% +0.08% / +0.25% +0.67% +0.67%] index_select skip64 : Elapsed 0.012 ms (1.195 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.42% +0.50%] index_select skip256 : Elapsed 0.012 ms (1.195 ms / 100) 1.195 -> 1.198 ( +0.25%) [ +0.17% +0.08% +0.00% / +0.25% +0.50% +0.42%] index_select spread : Elapsed 0.012 ms (1.197 ms / 100) 1.197 -> 1.197 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.25% +0.25%] index_select strided 3 : Elapsed 0.012 ms (1.198 ms / 100) 1.197 -> 1.197 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.25%] index_select strided 5 : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.59% +0.59%] index_select strided 7 : Elapsed 0.012 ms (1.196 ms / 100) 1.195 -> 1.195 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.59% +0.50%] index_select strided 8 : Elapsed 0.012 ms (1.196 ms / 100) 1.197 -> 1.196 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.33% +0.33%] index_select strided 16 : Elapsed 0.012 ms (1.197 ms / 100) 1.195 -> 1.198 ( +0.25%) [ +0.08% +0.08% +0.00% / +0.25% +0.50% +0.59%] index_select random : Elapsed 0.012 ms (1.196 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.50%] index_select random_sorted : Elapsed 0.012 ms (1.196 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.42% +0.42%] index_select perm : Elapsed 0.012 ms (1.196 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.75% +0.59%] index_select perm_sorted : Elapsed 0.012 ms (1.196 ms / 100) B = [20, 16, 4, 5] (stride (4, 80, 1, 1280)) A = [20, 16, 40, 5] (stride (1, 4000, 20, 800)) dim = 2 1.359 -> 1.359 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.59% +0.59%] index_select const : Elapsed 0.014 ms (1.360 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.51%] index_select wrap : Elapsed 0.014 ms (1.369 ms / 100) 1.369 -> 1.371 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.66% +0.58%] index_select linear : Elapsed 0.014 ms (1.371 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.66% +0.44%] index_select reverse : Elapsed 0.014 ms (1.373 ms / 100) 1.361 -> 1.362 ( +0.07%) [ +0.22% +0.15% +0.00% / +0.07% +0.59% +0.51%] index_select skip64 : Elapsed 0.014 ms (1.364 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.51% +0.59%] index_select skip256 : Elapsed 0.014 ms (1.368 ms / 100) 1.374 -> 1.377 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.80% +0.80%] index_select spread : Elapsed 0.014 ms (1.375 ms / 100) 1.371 -> 1.374 ( +0.22%) [ +0.22% +0.00% +0.22% / +0.22% +0.80% +0.95%] index_select strided 3 : Elapsed 0.014 ms (1.374 ms / 100) 1.370 -> 1.370 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.51% +0.51%] index_select strided 5 : Elapsed 0.014 ms (1.371 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.00% +0.15% +0.00% / +0.07% +0.58% +0.66%] index_select strided 7 : Elapsed 0.014 ms (1.372 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.00% +0.15% / +0.07% +0.65% +0.65%] index_select strided 8 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.44% +0.73%] index_select strided 16 : Elapsed 0.014 ms (1.375 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.36% +0.36%] index_select random : Elapsed 0.014 ms (1.373 ms / 100) 1.372 -> 1.377 ( +0.36%) [ +0.22% +0.00% +0.22% / +0.36% +0.66% +0.66%] index_select random_sorted : Elapsed 0.014 ms (1.375 ms / 100) 1.372 -> 1.372 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.36% +0.87%] index_select perm : Elapsed 0.014 ms (1.372 ms / 100) 1.370 -> 1.370 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.36% +0.44%] index_select perm_sorted : Elapsed 0.014 ms (1.371 ms / 100) out_shape = [20, 16, 40, 4] in_shape = [20, 16, 40, 5] idx_dim = 3 B = [20, 16, 40, 4] (stride (2560, 1, 64, 16)) A = [20, 16, 40, 5] (stride (3200, 200, 5, 1)) dim = 3 5.405 -> 5.390 ( -0.28%) [ +0.13% +0.00% +0.07% / +0.04% -0.28% -0.28%] index_select const : Elapsed 0.054 ms (5.412 ms / 100) 5.406 -> 5.387 ( -0.35%) [ +0.00% +0.06% +0.06% / +0.06% -0.22% -0.35%] index_select wrap : Elapsed 0.054 ms (5.406 ms / 100) 5.398 -> 5.377 ( -0.39%) [ +0.00% +0.13% +0.11% / +0.04% -0.20% -0.39%] index_select linear : Elapsed 0.054 ms (5.398 ms / 100) 5.403 -> 5.384 ( -0.35%) [ +0.11% +0.00% +0.13% / +0.02% -0.26% -0.35%] index_select reverse : Elapsed 0.054 ms (5.409 ms / 100) 5.398 -> 5.385 ( -0.24%) [ +0.13% +0.00% +0.07% / +0.07% -0.19% -0.24%] index_select skip64 : Elapsed 0.054 ms (5.405 ms / 100) 5.402 -> 5.383 ( -0.35%) [ +0.00% +0.06% +0.07% / +0.09% -0.33% -0.35%] index_select skip256 : Elapsed 0.054 ms (5.402 ms / 100) 5.408 -> 5.389 ( -0.35%) [ +0.09% +0.00% +0.28% / +0.02% -0.35% -0.35%] index_select spread : Elapsed 0.054 ms (5.413 ms / 100) 5.400 -> 5.387 ( -0.24%) [ +0.04% +0.07% +0.00% / +0.07% -0.17% -0.24%] index_select strided 3 : Elapsed 0.054 ms (5.402 ms / 100) 5.405 -> 5.389 ( -0.30%) [ +0.00% +0.06% +0.02% / +0.02% -0.26% -0.30%] index_select random : Elapsed 0.054 ms (5.405 ms / 100) 5.406 -> 5.383 ( -0.43%) [ +0.00% +0.00% +0.04% / +0.04% -0.43% -0.35%] index_select random_sorted : Elapsed 0.054 ms (5.406 ms / 100) 5.405 -> 5.386 ( -0.35%) [ +0.04% +0.09% +0.00% / +0.19% -0.35% -0.20%] index_select perm : Elapsed 0.054 ms (5.407 ms / 100) 5.403 -> 5.384 ( -0.35%) [ +0.00% +0.06% +0.11% / +0.11% -0.35% -0.33%] index_select perm_sorted : Elapsed 0.054 ms (5.403 ms / 100) B = [20, 16, 40, 4] (stride (4, 3200, 80, 1)) A = [20, 16, 40, 5] (stride (1, 20, 1600, 320)) dim = 3 5.591 -> 5.581 ( -0.18%) [ +0.00% +0.09% +0.09% / +0.14% -0.11% -0.18%] index_select const : Elapsed 0.056 ms (5.591 ms / 100) 5.649 -> 5.643 ( -0.11%) [ +0.07% +0.00% +0.14% / +0.11% -0.04% -0.11%] index_select wrap : Elapsed 0.057 ms (5.653 ms / 100) 5.651 -> 5.644 ( -0.12%) [ +0.00% +0.00% +0.11% / -0.02% -0.12% -0.11%] index_select linear : Elapsed 0.057 ms (5.651 ms / 100) 5.639 -> 5.642 ( +0.05%) [ +0.00% +0.11% +0.14% / +0.05% +0.25% +0.28%] index_select reverse : Elapsed 0.056 ms (5.639 ms / 100) 5.593 -> 5.578 ( -0.27%) [ +0.18% +0.00% +0.07% / +0.04% -0.27% -0.20%] index_select skip64 : Elapsed 0.056 ms (5.603 ms / 100) 5.594 -> 5.572 ( -0.39%) [ +0.00% +0.04% +0.16% / +0.13% -0.39% -0.29%] index_select skip256 : Elapsed 0.056 ms (5.594 ms / 100) 5.648 -> 5.639 ( -0.16%) [ +0.12% +0.00% +0.05% / +0.00% -0.16% -0.11%] index_select spread : Elapsed 0.057 ms (5.655 ms / 100) 5.644 -> 5.639 ( -0.09%) [ +0.14% +0.00% +0.05% / +0.07% +0.04% -0.09%] index_select strided 3 : Elapsed 0.057 ms (5.652 ms / 100) 5.632 -> 5.625 ( -0.12%) [ +0.00% +0.05% +0.14% / +0.09% -0.12% -0.04%] index_select random : Elapsed 0.056 ms (5.632 ms / 100) 5.623 -> 5.622 ( -0.02%) [ +0.00% +0.14% +0.12% / +0.02% +0.20% -0.02%] index_select random_sorted : Elapsed 0.056 ms (5.623 ms / 100) 5.642 -> 5.643 ( +0.02%) [ +0.00% +0.04% +0.05% / +0.07% +0.02% +0.04%] index_select perm : Elapsed 0.056 ms (5.642 ms / 100) 5.641 -> 5.634 ( -0.12%) [ +0.12% +0.00% +0.16% / +0.18% -0.12% +0.04%] index_select perm_sorted : Elapsed 0.056 ms (5.648 ms / 100) B = [20, 16, 40, 4] (stride (64, 1, 1280, 16)) A = [20, 16, 40, 5] (stride (5, 4000, 100, 1)) dim = 3 5.761 -> 5.755 ( -0.10%) [ +0.00% +0.07% +0.10% / +0.10% -0.07% -0.10%] index_select const : Elapsed 0.058 ms (5.761 ms / 100) 5.759 -> 5.755 ( -0.07%) [ +0.00% +0.10% +0.02% / +0.12% -0.02% -0.07%] index_select wrap : Elapsed 0.058 ms (5.759 ms / 100) 5.759 -> 5.751 ( -0.14%) [ +0.00% +0.03% +0.10% / +0.17% -0.14% -0.10%] index_select linear : Elapsed 0.058 ms (5.759 ms / 100) 5.761 -> 5.749 ( -0.21%) [ +0.09% +0.00% +0.03% / +0.09% -0.21% -0.21%] index_select reverse : Elapsed 0.058 ms (5.766 ms / 100) 5.759 -> 5.753 ( -0.10%) [ +0.09% +0.00% +0.14% / +0.17% -0.05% -0.10%] index_select skip64 : Elapsed 0.058 ms (5.764 ms / 100) 5.759 -> 5.750 ( -0.16%) [ +0.03% +0.05% +0.00% / +0.23% -0.16% -0.02%] index_select skip256 : Elapsed 0.058 ms (5.761 ms / 100) 5.760 -> 5.752 ( -0.14%) [ +0.02% +0.00% +0.07% / +0.19% -0.14% -0.05%] index_select spread : Elapsed 0.058 ms (5.761 ms / 100) 5.762 -> 5.756 ( -0.10%) [ +0.02% +0.00% +0.05% / +0.02% -0.09% -0.10%] index_select strided 3 : Elapsed 0.058 ms (5.763 ms / 100) 5.755 -> 5.749 ( -0.10%) [ +0.12% +0.00% +0.17% / +0.09% -0.10% -0.05%] index_select random : Elapsed 0.058 ms (5.762 ms / 100) 5.761 -> 5.749 ( -0.21%) [ +0.05% +0.00% +0.00% / +0.09% -0.21% -0.10%] index_select random_sorted : Elapsed 0.058 ms (5.764 ms / 100) 5.757 -> 5.752 ( -0.09%) [ +0.00% +0.02% +0.10% / +0.12% -0.09% -0.09%] index_select perm : Elapsed 0.058 ms (5.757 ms / 100) 5.760 -> 5.757 ( -0.05%) [ +0.10% +0.00% +0.12% / +0.02% +0.00% -0.05%] index_select perm_sorted : Elapsed 0.058 ms (5.766 ms / 100) B = [20, 16, 40, 4] (stride (1, 20, 1280, 320)) dim = 3 fill_cnt = 5 3.712 -> 3.714 ( +0.05%) [ +0.00% +0.13% +0.08% / +0.08% +0.30% +0.05%] index_fill_ const : Elapsed 0.037 ms (3.712 ms / 100) 3.732 -> 3.726 ( -0.16%) [ +0.03% +0.03% +0.00% / -0.16% +0.11% +0.03%] index_fill_ linear : Elapsed 0.037 ms (3.733 ms / 100) 3.716 -> 3.721 ( +0.13%) [ +0.05% +0.00% +0.19% / +0.13% +0.35% +0.32%] index_fill_ reverse : Elapsed 0.037 ms (3.718 ms / 100) 3.745 -> 3.741 ( -0.11%) [ +0.00% +0.08% +0.19% / -0.11% +0.24% +0.37%] index_fill_ skip64 : Elapsed 0.037 ms (3.745 ms / 100) 3.747 -> 3.753 ( +0.16%) [ +0.13% +0.00% +0.11% / +0.16% +0.21% +0.40%] index_fill_ skip256 : Elapsed 0.038 ms (3.752 ms / 100) 3.705 -> 3.703 ( -0.05%) [ +0.00% +0.19% +0.05% / -0.05% +0.08% +0.16%] index_fill_ spread : Elapsed 0.037 ms (3.705 ms / 100) 3.714 -> 3.712 ( -0.05%) [ +0.05% +0.03% +0.00% / -0.05% +0.30% +0.24%] index_fill_ strided 3 : Elapsed 0.037 ms (3.716 ms / 100) 3.743 -> 3.748 ( +0.13%) [ +0.11% +0.00% +0.05% / +0.13% +0.19% +0.19%] index_fill_ random : Elapsed 0.037 ms (3.747 ms / 100) 3.770 -> 3.765 ( -0.13%) [ +0.00% +0.03% +0.08% / +0.00% +0.16% -0.13%] index_fill_ random_sorted : Elapsed 0.038 ms (3.770 ms / 100) B = [20, 16, 40, 4] (stride (640, 40, 1, 12800)) A = [20, 16, 40, 5] (stride (3200, 1, 16, 640)) dim = 3 5.199 -> 5.211 ( +0.23%) [ +0.00% +0.19% +0.17% / +0.23% +0.46% +0.54%] index_select const : Elapsed 0.052 ms (5.199 ms / 100) 5.237 -> 5.220 ( -0.32%) [ +0.00% +0.06% +0.10% / +0.11% -0.32% -0.21%] index_select wrap : Elapsed 0.052 ms (5.237 ms / 100) 5.242 -> 5.218 ( -0.46%) [ +0.04% +0.00% +0.10% / +0.06% -0.38% -0.46%] index_select linear : Elapsed 0.052 ms (5.244 ms / 100) 5.221 -> 5.219 ( -0.04%) [ +0.21% +0.00% +0.40% / +0.29% -0.04% +0.04%] index_select reverse : Elapsed 0.052 ms (5.232 ms / 100) 5.195 -> 5.214 ( +0.37%) [ +0.12% +0.00% +0.31% / +0.37% +0.60% +0.48%] index_select skip64 : Elapsed 0.052 ms (5.201 ms / 100) 5.198 -> 5.212 ( +0.27%) [ +0.00% +0.00% +0.19% / +0.27% +0.54% +0.58%] index_select skip256 : Elapsed 0.052 ms (5.198 ms / 100) 5.235 -> 5.222 ( -0.25%) [ +0.00% +0.11% +0.25% / +0.25% -0.15% -0.25%] index_select spread : Elapsed 0.052 ms (5.235 ms / 100) 5.213 -> 5.228 ( +0.29%) [ +0.13% +0.00% +0.25% / +0.36% +0.29% +0.36%] index_select strided 3 : Elapsed 0.052 ms (5.220 ms / 100) 5.225 -> 5.220 ( -0.10%) [ +0.00% +0.08% +0.13% / +0.38% +0.10% -0.10%] index_select random : Elapsed 0.052 ms (5.225 ms / 100) 5.229 -> 5.231 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.08% +0.19%] index_select random_sorted : Elapsed 0.052 ms (5.231 ms / 100) 5.235 -> 5.231 ( -0.08%) [ +0.00% +0.04% +0.17% / +0.25% -0.08% -0.06%] index_select perm : Elapsed 0.052 ms (5.235 ms / 100) 5.239 -> 5.217 ( -0.42%) [ +0.00% +0.08% +0.11% / +0.06% -0.36% -0.42%] index_select perm_sorted : Elapsed 0.052 ms (5.239 ms / 100) B = [20, 16, 40, 4] (stride (640, 1, 16, 12800)) A = [20, 16, 40, 5] (stride (1, 4000, 20, 800)) dim = 3 5.698 -> 5.699 ( +0.02%) [ +0.05% +0.00% +0.07% / +0.02% +0.07% +0.18%] index_select const : Elapsed 0.057 ms (5.701 ms / 100) 5.769 -> 5.743 ( -0.45%) [ +0.02% +0.00% +0.12% / +0.07% -0.31% -0.45%] index_select wrap : Elapsed 0.058 ms (5.770 ms / 100) 5.770 -> 5.739 ( -0.54%) [ +0.09% +0.00% +0.21% / +0.03% -0.54% -0.33%] index_select linear : Elapsed 0.058 ms (5.775 ms / 100) 5.791 -> 5.744 ( -0.81%) [ +0.00% +0.10% +0.10% / +0.10% -0.78% -0.81%] index_select reverse : Elapsed 0.058 ms (5.791 ms / 100) 5.692 -> 5.704 ( +0.21%) [ +0.07% +0.11% +0.00% / +0.21% +0.32% +0.26%] index_select skip64 : Elapsed 0.057 ms (5.696 ms / 100) 5.694 -> 5.702 ( +0.14%) [ +0.00% +0.07% +0.05% / +0.14% +0.26% +0.19%] index_select skip256 : Elapsed 0.057 ms (5.694 ms / 100) 5.773 -> 5.741 ( -0.55%) [ +0.03% +0.00% +0.16% / +0.00% -0.43% -0.55%] index_select spread : Elapsed 0.058 ms (5.775 ms / 100) 5.753 -> 5.725 ( -0.49%) [ +0.10% +0.00% +0.10% / +0.23% -0.42% -0.49%] index_select strided 3 : Elapsed 0.058 ms (5.759 ms / 100) 5.749 -> 5.761 ( +0.21%) [ +0.00% +0.23% +0.24% / +0.21% +0.23% +0.28%] index_select random : Elapsed 0.057 ms (5.749 ms / 100) 5.760 -> 5.755 ( -0.09%) [ +0.05% +0.00% +0.03% / +0.10% +0.09% -0.09%] index_select random_sorted : Elapsed 0.058 ms (5.763 ms / 100) 5.761 -> 5.727 ( -0.59%) [ +0.00% +0.00% +0.14% / +0.05% -0.45% -0.59%] index_select perm : Elapsed 0.058 ms (5.761 ms / 100) 5.770 -> 5.742 ( -0.49%) [ +0.00% +0.03% +0.17% / +0.14% -0.49% -0.36%] index_select perm_sorted : Elapsed 0.058 ms (5.770 ms / 100) B = [20, 16, 40, 4] (stride (1, 800, 20, 12800)) A = [20, 16, 40, 5] (stride (200, 4000, 5, 1)) dim = 3 5.653 -> 5.644 ( -0.16%) [ +0.02% +0.00% +0.07% / +0.28% -0.04% -0.16%] index_select const : Elapsed 0.057 ms (5.654 ms / 100) 5.651 -> 5.645 ( -0.11%) [ +0.00% +0.07% +0.11% / +0.12% -0.11% -0.11%] index_select wrap : Elapsed 0.057 ms (5.651 ms / 100) 5.647 -> 5.647 ( +0.00%) [ +0.02% +0.00% +0.18% / +0.05% +0.00% +0.07%] index_select linear : Elapsed 0.056 ms (5.648 ms / 100) 5.654 -> 5.644 ( -0.18%) [ +0.11% +0.00% +0.21% / +0.07% -0.18% -0.04%] index_select reverse : Elapsed 0.057 ms (5.660 ms / 100) 5.654 -> 5.646 ( -0.14%) [ +0.02% +0.09% +0.00% / +0.04% -0.14% -0.11%] index_select skip64 : Elapsed 0.057 ms (5.655 ms / 100) 5.654 -> 5.651 ( -0.05%) [ +0.12% +0.00% +0.04% / +0.23% +0.05% -0.05%] index_select skip256 : Elapsed 0.057 ms (5.661 ms / 100) 5.652 -> 5.647 ( -0.09%) [ +0.14% +0.00% +0.00% / +0.07% -0.07% -0.09%] index_select spread : Elapsed 0.057 ms (5.660 ms / 100) 5.652 -> 5.643 ( -0.16%) [ +0.04% +0.00% +0.02% / +0.00% -0.16% -0.11%] index_select strided 3 : Elapsed 0.057 ms (5.654 ms / 100) 5.649 -> 5.648 ( -0.02%) [ +0.07% +0.00% +0.12% / +0.12% -0.02% -0.02%] index_select random : Elapsed 0.057 ms (5.653 ms / 100) 5.648 -> 5.643 ( -0.09%) [ +0.00% +0.14% +0.21% / +0.16% +0.05% -0.09%] index_select random_sorted : Elapsed 0.056 ms (5.648 ms / 100) 5.657 -> 5.650 ( -0.12%) [ +0.00% +0.14% +0.12% / +0.18% -0.12% -0.05%] index_select perm : Elapsed 0.057 ms (5.657 ms / 100) 5.648 -> 5.645 ( -0.05%) [ +0.07% +0.00% +0.11% / +0.02% -0.05% +0.05%] index_select perm_sorted : Elapsed 0.057 ms (5.652 ms / 100) B = [20, 16, 40, 4] (stride (1, 800, 20, 12800)) A = [20, 16, 40, 5] (stride (40, 4000, 1, 800)) dim = 3 5.426 -> 5.430 ( +0.07%) [ +0.13% +0.00% +0.02% / +0.07% +0.33% +0.22%] index_select const : Elapsed 0.054 ms (5.433 ms / 100) 5.543 -> 5.527 ( -0.29%) [ +0.04% +0.00% +0.16% / +0.16% -0.14% -0.29%] index_select wrap : Elapsed 0.055 ms (5.545 ms / 100) 5.537 -> 5.516 ( -0.38%) [ +0.11% +0.00% +0.18% / +0.23% -0.07% -0.38%] index_select linear : Elapsed 0.055 ms (5.543 ms / 100) 5.551 -> 5.518 ( -0.59%) [ +0.00% +0.04% +0.04% / +0.04% -0.34% -0.59%] index_select reverse : Elapsed 0.056 ms (5.551 ms / 100) 5.407 -> 5.414 ( +0.13%) [ +0.00% +0.09% +0.20% / +0.13% +0.41% +0.43%] index_select skip64 : Elapsed 0.054 ms (5.407 ms / 100) 5.419 -> 5.420 ( +0.02%) [ +0.04% +0.00% +0.17% / +0.02% +0.28% +0.31%] index_select skip256 : Elapsed 0.054 ms (5.421 ms / 100) 5.543 -> 5.532 ( -0.20%) [ +0.00% +0.14% +0.09% / +0.05% -0.09% -0.20%] index_select spread : Elapsed 0.055 ms (5.543 ms / 100) 5.523 -> 5.514 ( -0.16%) [ +0.02% +0.00% +0.02% / -0.02% +0.04% -0.16%] index_select strided 3 : Elapsed 0.055 ms (5.524 ms / 100) 5.542 -> 5.519 ( -0.42%) [ +0.00% +0.16% +0.18% / +0.22% -0.42% -0.31%] index_select random : Elapsed 0.055 ms (5.542 ms / 100) 5.542 -> 5.514 ( -0.51%) [ +0.00% +0.23% +0.13% / +0.14% -0.47% -0.51%] index_select random_sorted : Elapsed 0.055 ms (5.542 ms / 100) 5.521 -> 5.522 ( +0.02%) [ +0.00% +0.24% +0.29% / +0.18% +0.02% +0.16%] index_select perm : Elapsed 0.055 ms (5.521 ms / 100) 5.520 -> 5.509 ( -0.20%) [ +0.07% +0.00% +0.02% / -0.09% -0.20% +0.02%] index_select perm_sorted : Elapsed 0.055 ms (5.524 ms / 100) out_shape = [4, 40, 5, 16] in_shape = [20, 40, 5, 16] idx_dim = 0 B = [4, 40, 5, 16] (stride (3200, 80, 1, 5)) A = [20, 40, 5, 16] (stride (3200, 16, 640, 1)) dim = 0 1.991 -> 1.992 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +1.16% +0.90%] index_select const : Elapsed 0.020 ms (1.991 ms / 100) 2.039 -> 2.034 ( -0.25%) [ +0.10% +0.00% +0.10% / -0.25% +1.23% +1.18%] index_select wrap : Elapsed 0.020 ms (2.041 ms / 100) 2.038 -> 2.035 ( -0.15%) [ +0.15% +0.00% +0.05% / -0.15% +1.13% +1.28%] index_select linear : Elapsed 0.020 ms (2.041 ms / 100) 2.051 -> 2.048 ( -0.15%) [ +0.00% +0.05% +0.15% / -0.15% +0.59% +0.63%] index_select reverse : Elapsed 0.021 ms (2.051 ms / 100) 1.984 -> 1.986 ( +0.10%) [ +0.00% +0.05% +0.10% / +0.10% +0.96% +1.01%] index_select skip64 : Elapsed 0.020 ms (1.984 ms / 100) 1.991 -> 1.992 ( +0.05%) [ +0.15% +0.00% +0.05% / +0.05% +1.16% +1.05%] index_select skip256 : Elapsed 0.020 ms (1.994 ms / 100) 2.044 -> 2.049 ( +0.24%) [ +0.00% +0.20% +0.10% / +0.24% +0.93% +0.59%] index_select spread : Elapsed 0.020 ms (2.044 ms / 100) 2.043 -> 2.050 ( +0.34%) [ +0.00% +0.00% +0.29% / +0.34% +1.27% +1.13%] index_select strided 3 : Elapsed 0.020 ms (2.043 ms / 100) 2.043 -> 2.042 ( -0.05%) [ +0.10% +0.44% +0.00% / -0.05% +0.73% +0.59%] index_select strided 5 : Elapsed 0.020 ms (2.045 ms / 100) 2.037 -> 2.042 ( +0.25%) [ +0.05% +0.74% +0.00% / +0.25% +1.03% +1.13%] index_select strided 7 : Elapsed 0.020 ms (2.038 ms / 100) 2.028 -> 2.033 ( +0.25%) [ +0.10% +0.15% +0.00% / +0.25% +0.89% +0.79%] index_select strided 8 : Elapsed 0.020 ms (2.030 ms / 100) 2.018 -> 2.019 ( +0.05%) [ +0.10% +0.00% +0.15% / +0.05% +2.23% +2.23%] index_select strided 16 : Elapsed 0.020 ms (2.020 ms / 100) 2.009 -> 2.012 ( +0.15%) [ +0.00% +0.20% +0.00% / +0.15% +0.25% +0.45%] index_select random : Elapsed 0.020 ms (2.009 ms / 100) 2.003 -> 2.013 ( +0.50%) [ +0.35% +0.40% +0.00% / +0.50% +0.85% +0.75%] index_select random_sorted : Elapsed 0.020 ms (2.010 ms / 100) 2.051 -> 2.040 ( -0.54%) [ +0.00% +0.05% +0.00% / +0.24% -0.54% -0.44%] index_select perm : Elapsed 0.021 ms (2.051 ms / 100) 2.057 -> 2.048 ( -0.44%) [ +0.39% +0.00% +0.19% / +0.00% -0.29% -0.44%] index_select perm_sorted : Elapsed 0.021 ms (2.065 ms / 100) B = [4, 40, 5, 16] (stride (3200, 1, 640, 40)) A = [20, 40, 5, 16] (stride (3200, 80, 16, 1)) dim = 0 1.725 -> 1.723 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.12% +1.16% +0.99%] index_select const : Elapsed 0.017 ms (1.725 ms / 100) 1.766 -> 1.770 ( +0.23%) [ +0.45% +0.11% +0.00% / +0.23% +2.04% +1.98%] index_select wrap : Elapsed 0.018 ms (1.774 ms / 100) 1.768 -> 1.770 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +2.21% +2.04%] index_select linear : Elapsed 0.018 ms (1.770 ms / 100) 1.763 -> 1.764 ( +0.06%) [ +0.17% +0.23% +0.00% / +0.06% +2.21% +2.33%] index_select reverse : Elapsed 0.018 ms (1.766 ms / 100) 1.725 -> 1.728 ( +0.17%) [ +0.23% +0.00% +0.00% / +0.17% +1.28% +0.93%] index_select skip64 : Elapsed 0.017 ms (1.729 ms / 100) 1.725 -> 1.727 ( +0.12%) [ +0.00% +0.23% +0.29% / +0.12% +0.87% +0.99%] index_select skip256 : Elapsed 0.017 ms (1.725 ms / 100) 1.782 -> 1.780 ( -0.11%) [ +0.17% +0.06% +0.00% / -0.06% -0.11% -0.06%] index_select spread : Elapsed 0.018 ms (1.785 ms / 100) 1.779 -> 1.778 ( -0.06%) [ +0.00% +0.17% +0.11% / +0.11% +0.06% -0.06%] index_select strided 3 : Elapsed 0.018 ms (1.779 ms / 100) 1.783 -> 1.783 ( +0.00%) [ +0.17% +0.00% +0.17% / +0.11% +0.00% +0.00%] index_select strided 5 : Elapsed 0.018 ms (1.786 ms / 100) 1.751 -> 1.753 ( +0.11%) [ +0.11% +0.23% +0.00% / +0.11% +2.57% +2.91%] index_select strided 7 : Elapsed 0.018 ms (1.753 ms / 100) 1.777 -> 1.778 ( +0.06%) [ +0.00% +0.28% +0.00% / +0.06% +0.84% +0.84%] index_select strided 8 : Elapsed 0.018 ms (1.777 ms / 100) 1.781 -> 1.782 ( +0.06%) [ +0.00% +0.34% +0.28% / +0.06% +0.62% +0.34%] index_select strided 16 : Elapsed 0.018 ms (1.781 ms / 100) 1.734 -> 1.732 ( -0.12%) [ +0.29% +0.17% +0.00% / +0.35% -0.12% +0.17%] index_select random : Elapsed 0.017 ms (1.739 ms / 100) 1.734 -> 1.734 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.29% +0.00% +0.06%] index_select random_sorted : Elapsed 0.017 ms (1.736 ms / 100) 1.770 -> 1.772 ( +0.11%) [ +0.00% +0.06% +0.06% / +0.11% +0.28% +0.17%] index_select perm : Elapsed 0.018 ms (1.770 ms / 100) 1.781 -> 1.772 ( -0.51%) [ +0.00% +0.22% +0.11% / +0.17% -0.17% -0.51%] index_select perm_sorted : Elapsed 0.018 ms (1.781 ms / 100) B = [4, 40, 5, 16] (stride (3200, 1, 640, 40)) A = [20, 40, 5, 16] (stride (80, 1600, 1, 5)) dim = 0 1.923 -> 1.924 ( +0.05%) [ +0.16% +0.00% +0.10% / +0.05% +0.62% +0.62%] index_select const : Elapsed 0.019 ms (1.926 ms / 100) 1.919 -> 1.921 ( +0.10%) [ +0.10% +0.00% +0.05% / +0.10% +0.94% +0.99%] index_select wrap : Elapsed 0.019 ms (1.921 ms / 100) 1.914 -> 1.914 ( +0.00%) [ +0.26% +0.37% +0.00% / +0.00% +1.20% +1.10%] index_select linear : Elapsed 0.019 ms (1.919 ms / 100) 1.924 -> 1.930 ( +0.31%) [ +0.57% +0.00% +0.36% / +0.31% +0.83% +0.83%] index_select reverse : Elapsed 0.019 ms (1.935 ms / 100) 1.910 -> 1.924 ( +0.73%) [ +0.42% +0.00% +0.16% / +0.73% +0.84% +1.31%] index_select skip64 : Elapsed 0.019 ms (1.918 ms / 100) 1.925 -> 1.925 ( +0.00%) [ +0.00% +0.16% +0.05% / +0.00% +0.57% +0.47%] index_select skip256 : Elapsed 0.019 ms (1.925 ms / 100) 1.920 -> 1.915 ( -0.26%) [ +0.00% +0.05% +0.00% / -0.26% +1.61% +1.04%] index_select spread : Elapsed 0.019 ms (1.920 ms / 100) 1.923 -> 1.926 ( +0.16%) [ +0.42% +0.36% +0.00% / +0.16% +0.78% +0.62%] index_select strided 3 : Elapsed 0.019 ms (1.931 ms / 100) 1.912 -> 1.910 ( -0.10%) [ +0.21% +0.21% +0.00% / -0.10% +1.36% +1.41%] index_select strided 5 : Elapsed 0.019 ms (1.916 ms / 100) 1.920 -> 1.927 ( +0.36%) [ +0.21% +0.00% +0.10% / +0.36% +1.56% +1.30%] index_select strided 7 : Elapsed 0.019 ms (1.924 ms / 100) 1.933 -> 1.929 ( -0.21%) [ +0.10% +0.05% +0.00% / -0.21% +0.83% +0.98%] index_select strided 8 : Elapsed 0.019 ms (1.935 ms / 100) 1.922 -> 1.932 ( +0.52%) [ +0.26% +0.00% +0.21% / +0.52% +1.40% +1.40%] index_select strided 16 : Elapsed 0.019 ms (1.927 ms / 100) 1.904 -> 1.903 ( -0.05%) [ +0.00% +0.00% +0.26% / -0.05% +0.63% +0.58%] index_select random : Elapsed 0.019 ms (1.904 ms / 100) 1.906 -> 1.914 ( +0.42%) [ +0.26% +0.31% +0.00% / +0.42% +0.89% +1.00%] index_select random_sorted : Elapsed 0.019 ms (1.911 ms / 100) 1.909 -> 1.910 ( +0.05%) [ +0.00% +0.31% +0.26% / +0.05% +1.52% +1.52%] index_select perm : Elapsed 0.019 ms (1.909 ms / 100) 1.925 -> 1.925 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.83% +0.94%] index_select perm_sorted : Elapsed 0.019 ms (1.928 ms / 100) B = [4, 40, 5, 16] (stride (40, 1, 2560, 160)) A = [20, 40, 5, 16] (stride (200, 5, 1, 4000)) dim = 0 0.796 -> 0.795 ( -0.13%) [ +0.25% +0.00% +0.38% / -0.13% +4.40% +4.52%] index_select const : Elapsed 0.008 ms (0.798 ms / 100) 0.804 -> 0.804 ( +0.00%) [ +0.25% +0.00% +0.37% / +0.00% +3.98% +4.10%] index_select wrap : Elapsed 0.008 ms (0.806 ms / 100) 0.802 -> 0.803 ( +0.12%) [ +0.50% +0.12% +0.00% / +0.12% +4.61% +4.49%] index_select linear : Elapsed 0.008 ms (0.806 ms / 100) 0.798 -> 0.798 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +4.76% +5.39%] index_select reverse : Elapsed 0.008 ms (0.798 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.00% +0.38% +0.00% / +0.00% +2.62% +3.00%] index_select skip64 : Elapsed 0.008 ms (0.800 ms / 100) 0.797 -> 0.799 ( +0.25%) [ +0.00% +0.13% +0.00% / +0.25% +4.14% +3.89%] index_select skip256 : Elapsed 0.008 ms (0.797 ms / 100) 0.799 -> 0.803 ( +0.50%) [ +0.13% +0.38% +0.00% / +0.50% +5.76% +6.38%] index_select spread : Elapsed 0.008 ms (0.800 ms / 100) 0.796 -> 0.798 ( +0.25%) [ +0.13% +0.00% +0.38% / +0.25% +7.16% +7.54%] index_select strided 3 : Elapsed 0.008 ms (0.797 ms / 100) 0.799 -> 0.799 ( +0.00%) [ +0.13% +0.50% +0.00% / +0.00% +5.51% +5.63%] index_select strided 5 : Elapsed 0.008 ms (0.800 ms / 100) 0.813 -> 0.815 ( +0.25%) [ +0.00% +0.62% +0.25% / +0.25% +3.32% +3.32%] index_select strided 7 : Elapsed 0.008 ms (0.813 ms / 100) 0.813 -> 0.817 ( +0.49%) [ +0.00% +0.49% +0.62% / +0.49% +1.85% +1.85%] index_select strided 8 : Elapsed 0.008 ms (0.813 ms / 100) 0.798 -> 0.797 ( -0.13%) [ +0.13% +0.13% +0.00% / -0.13% +3.51% +3.76%] index_select strided 16 : Elapsed 0.008 ms (0.799 ms / 100) 0.793 -> 0.798 ( +0.63%) [ +0.00% +0.63% +0.25% / +0.63% +3.53% +4.04%] index_select random : Elapsed 0.008 ms (0.793 ms / 100) 0.794 -> 0.794 ( +0.00%) [ +0.25% +0.50% +0.00% / +0.00% +3.02% +2.90%] index_select random_sorted : Elapsed 0.008 ms (0.796 ms / 100) 0.788 -> 0.798 ( +1.27%) [ +0.89% +0.89% +0.00% / +1.27% +1.40% +1.65%] index_select perm : Elapsed 0.008 ms (0.795 ms / 100) 0.813 -> 0.799 ( -1.72%) [ +0.37% +0.00% +0.12% / +0.74% -1.48% -1.72%] index_select perm_sorted : Elapsed 0.008 ms (0.816 ms / 100) B = [4, 40, 5, 16] (stride (1, 20, 4, 800)) A = [20, 40, 5, 16] (stride (16, 1600, 320, 1)) dim = 0 0.768 -> 0.772 ( +0.52%) [ +0.00% +0.52% +0.65% / +0.52% +2.08% +2.08%] index_select const : Elapsed 0.008 ms (0.768 ms / 100) 0.800 -> 0.795 ( -0.63%) [ +0.00% +0.38% +0.00% / +0.00% -0.63% -0.63%] index_select wrap : Elapsed 0.008 ms (0.800 ms / 100) 0.798 -> 0.796 ( -0.25%) [ +0.38% +0.00% +0.75% / +0.38% -0.25% -0.13%] index_select linear : Elapsed 0.008 ms (0.801 ms / 100) 0.797 -> 0.788 ( -1.13%) [ +0.38% +0.00% +0.63% / +0.38% +0.13% -1.13%] index_select reverse : Elapsed 0.008 ms (0.800 ms / 100) 0.778 -> 0.773 ( -0.64%) [ +0.13% +0.00% +0.00% / -0.26% -0.13% -0.64%] index_select skip64 : Elapsed 0.008 ms (0.779 ms / 100) 0.770 -> 0.773 ( +0.39%) [ +0.65% +0.00% +0.65% / +0.39% +0.91% +0.65%] index_select skip256 : Elapsed 0.008 ms (0.775 ms / 100) 0.798 -> 0.784 ( -1.75%) [ +0.25% +0.00% +0.13% / +0.00% -1.75% -1.25%] index_select spread : Elapsed 0.008 ms (0.800 ms / 100) 0.794 -> 0.797 ( +0.38%) [ +0.00% +0.50% +0.63% / +0.38% +0.88% +1.39%] index_select strided 3 : Elapsed 0.008 ms (0.794 ms / 100) 0.795 -> 0.788 ( -0.88%) [ +0.00% +0.25% +0.38% / +0.13% -0.88% -0.88%] index_select strided 5 : Elapsed 0.008 ms (0.795 ms / 100) 0.794 -> 0.792 ( -0.25%) [ +0.00% +0.25% +0.13% / -0.25% +0.50% +0.25%] index_select strided 7 : Elapsed 0.008 ms (0.794 ms / 100) 0.792 -> 0.781 ( -1.39%) [ +0.00% +0.38% +0.25% / +0.88% -1.39% -1.14%] index_select strided 8 : Elapsed 0.008 ms (0.792 ms / 100) 0.792 -> 0.778 ( -1.77%) [ +1.14% +0.88% +0.00% / +0.63% -0.63% -1.77%] index_select strided 16 : Elapsed 0.008 ms (0.801 ms / 100) 0.792 -> 0.776 ( -2.02%) [ +0.00% +0.25% +0.63% / -1.26% -1.64% -2.02%] index_select random : Elapsed 0.008 ms (0.792 ms / 100) 0.785 -> 0.773 ( -1.53%) [ +1.15% +0.76% +0.00% / +1.15% -0.89% -1.53%] index_select random_sorted : Elapsed 0.008 ms (0.794 ms / 100) 0.798 -> 0.799 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.38% +0.13% +0.38%] index_select perm : Elapsed 0.008 ms (0.799 ms / 100) 0.794 -> 0.792 ( -0.25%) [ +0.38% +0.00% +0.25% / +0.00% -0.25% +0.00%] index_select perm_sorted : Elapsed 0.008 ms (0.797 ms / 100) B = [4, 40, 5, 16] (stride (1, 20, 4, 800)) A = [20, 40, 5, 16] (stride (1, 20, 12800, 800)) dim = 0 0.830 -> 0.835 ( +0.60%) [ +0.00% +0.24% +0.36% / +0.60% +3.61% +3.73%] index_select const : Elapsed 0.008 ms (0.830 ms / 100) 0.824 -> 0.822 ( -0.24%) [ +0.36% +0.73% +0.00% / -0.24% +4.85% +5.22%] index_select wrap : Elapsed 0.008 ms (0.827 ms / 100) 0.824 -> 0.822 ( -0.24%) [ +0.00% +0.36% +0.12% / -0.24% +5.22% +4.85%] index_select linear : Elapsed 0.008 ms (0.824 ms / 100) 0.827 -> 0.830 ( +0.36%) [ +0.36% +0.73% +0.00% / +0.36% +4.84% +4.59%] index_select reverse : Elapsed 0.008 ms (0.830 ms / 100) 0.825 -> 0.829 ( +0.48%) [ +0.00% +0.48% +0.24% / +0.48% +4.97% +4.73%] index_select skip64 : Elapsed 0.008 ms (0.825 ms / 100) 0.826 -> 0.828 ( +0.24%) [ +0.00% +0.36% +0.00% / +0.24% +4.84% +4.96%] index_select skip256 : Elapsed 0.008 ms (0.826 ms / 100) 0.840 -> 0.841 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.12% +5.36% +5.95%] index_select spread : Elapsed 0.008 ms (0.840 ms / 100) 0.834 -> 0.835 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +5.52% +5.76%] index_select strided 3 : Elapsed 0.008 ms (0.835 ms / 100) 0.841 -> 0.839 ( -0.24%) [ +0.00% +0.00% +0.24% / -0.24% +5.71% +5.71%] index_select strided 5 : Elapsed 0.008 ms (0.841 ms / 100) 0.840 -> 0.845 ( +0.60%) [ +0.71% +0.00% +0.00% / +0.60% +4.76% +4.88%] index_select strided 7 : Elapsed 0.008 ms (0.846 ms / 100) 0.842 -> 0.847 ( +0.59%) [ +0.24% +0.00% +0.83% / +0.59% +5.23% +4.39%] index_select strided 8 : Elapsed 0.008 ms (0.844 ms / 100) 0.842 -> 0.843 ( +0.12%) [ +0.59% +0.00% +0.59% / +0.12% +4.99% +4.28%] index_select strided 16 : Elapsed 0.008 ms (0.847 ms / 100) 0.836 -> 0.837 ( +0.12%) [ +0.24% +0.00% +0.24% / +0.12% +3.71% +3.95%] index_select random : Elapsed 0.008 ms (0.838 ms / 100) 0.832 -> 0.832 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +4.57% +4.93%] index_select random_sorted : Elapsed 0.008 ms (0.833 ms / 100) 0.838 -> 0.841 ( +0.36%) [ +0.36% +0.12% +0.00% / +0.36% +5.25% +5.37%] index_select perm : Elapsed 0.008 ms (0.841 ms / 100) 0.839 -> 0.840 ( +0.12%) [ +0.24% +0.24% +0.00% / +0.12% +5.36% +5.60%] index_select perm_sorted : Elapsed 0.008 ms (0.841 ms / 100) out_shape = [20, 4, 5, 16] in_shape = [20, 40, 5, 16] idx_dim = 1 B = [20, 4, 5, 16] (stride (320, 80, 1, 5)) A = [20, 40, 5, 16] (stride (80, 1600, 16, 1)) dim = 1 1.191 -> 1.191 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.59% +0.50%] index_select const : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.67%] index_select wrap : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.67% +0.67%] index_select linear : Elapsed 0.012 ms (1.192 ms / 100) 1.190 -> 1.192 ( +0.17%) [ +0.17% +0.00% +0.08% / +0.17% +0.76% +0.76%] index_select reverse : Elapsed 0.012 ms (1.192 ms / 100) 1.190 -> 1.192 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.76% +0.84%] index_select skip64 : Elapsed 0.012 ms (1.190 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.67% +0.67%] index_select skip256 : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.59% +1.01%] index_select spread : Elapsed 0.012 ms (1.191 ms / 100) 1.192 -> 1.191 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.59% +0.50%] index_select strided 3 : Elapsed 0.012 ms (1.192 ms / 100) 1.190 -> 1.192 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.67% +0.67%] index_select strided 5 : Elapsed 0.012 ms (1.191 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.59% +0.50%] index_select strided 7 : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.59% +0.59%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.42% +0.34%] index_select strided 16 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.42% +0.50%] index_select random : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.42% +0.42%] index_select random_sorted : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.17% / +0.00% +0.42% +0.50%] index_select perm : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.42% +0.42%] index_select perm_sorted : Elapsed 0.012 ms (1.194 ms / 100) B = [20, 4, 5, 16] (stride (320, 80, 1, 5)) A = [20, 40, 5, 16] (stride (40, 1, 12800, 800)) dim = 1 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.31%] index_select const : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.24% +0.24% +0.00% / +0.16% +0.55% +0.55%] index_select wrap : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_select linear : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.63% +0.71%] index_select reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.275 -> 1.277 ( +0.16%) [ +0.24% +0.00% +0.24% / +0.16% +0.86% +0.71%] index_select skip64 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.47% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.55% +0.86%] index_select spread : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.00% +0.00% +0.16% / +0.16% +0.78% +0.63%] index_select strided 3 : Elapsed 0.013 ms (1.277 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.63%] index_select strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.47% +0.63%] index_select strided 8 : Elapsed 0.013 ms (1.278 ms / 100) 1.279 -> 1.278 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.39% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.23% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.278 ms / 100) 1.285 -> 1.284 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.31% +0.23%] index_select perm : Elapsed 0.013 ms (1.286 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.55% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.278 ms / 100) B = [20, 4, 5, 16] (stride (80, 1600, 1, 5)) A = [20, 40, 5, 16] (stride (1, 20, 12800, 800)) dim = 1 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select const : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.70% +0.55%] index_select wrap : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.63%] index_select linear : Elapsed 0.013 ms (1.280 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select reverse : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.282 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_select skip256 : Elapsed 0.013 ms (1.278 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.63% +0.63%] index_select spread : Elapsed 0.013 ms (1.281 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.23% +0.16%] index_select strided 3 : Elapsed 0.013 ms (1.287 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select strided 5 : Elapsed 0.013 ms (1.283 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.16% / +0.00% +0.63% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_select strided 8 : Elapsed 0.013 ms (1.277 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.63% +0.70%] index_select strided 16 : Elapsed 0.013 ms (1.281 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.47% +0.31%] index_select random : Elapsed 0.013 ms (1.279 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_select random_sorted : Elapsed 0.013 ms (1.276 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.39% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select perm : Elapsed 0.013 ms (1.284 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.70% +0.78%] index_select perm_sorted : Elapsed 0.013 ms (1.279 ms / 100) B = [20, 4, 5, 16] (stride (16, 1600, 320, 1)) A = [20, 40, 5, 16] (stride (3200, 1, 40, 200)) dim = 1 1.364 -> 1.364 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.37%] index_select const : Elapsed 0.014 ms (1.364 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.37% +0.07% +0.00% / +0.07% +0.29% +0.29%] index_select wrap : Elapsed 0.014 ms (1.371 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.51% +0.51%] index_select linear : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.365 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.44% +0.95%] index_select reverse : Elapsed 0.014 ms (1.367 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.51% +0.66%] index_select skip64 : Elapsed 0.014 ms (1.369 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.66% +0.51%] index_select skip256 : Elapsed 0.014 ms (1.368 ms / 100) 1.364 -> 1.366 ( +0.15%) [ +0.29% +0.00% +0.07% / +0.15% +0.59% +0.59%] index_select spread : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.44% +0.44%] index_select strided 3 : Elapsed 0.014 ms (1.367 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.44% +0.37%] index_select strided 5 : Elapsed 0.014 ms (1.367 ms / 100) 1.365 -> 1.364 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.59% +0.37%] index_select strided 7 : Elapsed 0.014 ms (1.365 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.51% +0.37%] index_select strided 8 : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.00% +0.07% +0.22% / +0.07% +0.37% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.367 ms / 100) 1.364 -> 1.363 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.29% +0.29%] index_select random : Elapsed 0.014 ms (1.364 ms / 100) 1.364 -> 1.367 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.29% +0.22%] index_select random_sorted : Elapsed 0.014 ms (1.365 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.15% +0.00% +0.22% / +0.07% +0.36% +0.29%] index_select perm : Elapsed 0.014 ms (1.372 ms / 100) 1.364 -> 1.364 ( +0.00%) [ +0.15% +0.00% +0.22% / +0.00% +0.51% +0.59%] index_select perm_sorted : Elapsed 0.014 ms (1.366 ms / 100) B = [20, 4, 5, 16] (stride (5, 1600, 1, 100)) A = [20, 40, 5, 16] (stride (3200, 1, 640, 40)) dim = 1 1.279 -> 1.280 ( +0.08%) [ +0.55% +0.16% +0.00% / +0.08% +1.02% +0.86%] index_select const : Elapsed 0.013 ms (1.286 ms / 100) 1.285 -> 1.285 ( +0.00%) [ +0.23% +0.00% +0.00% / +0.00% +0.16% +0.16%] index_select wrap : Elapsed 0.013 ms (1.288 ms / 100) 1.281 -> 1.280 ( -0.08%) [ +0.16% +0.00% +0.16% / -0.08% +0.47% +0.31%] index_select linear : Elapsed 0.013 ms (1.283 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.16% +0.00% +0.16% / +0.08% +0.31% +0.31%] index_select reverse : Elapsed 0.013 ms (1.286 ms / 100) 1.282 -> 1.284 ( +0.16%) [ +0.00% +0.00% +0.16% / +0.16% +0.39% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.282 ms / 100) 1.281 -> 1.283 ( +0.16%) [ +0.08% +0.23% +0.00% / +0.16% +0.39% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.282 ms / 100) 1.284 -> 1.284 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.31% +0.16%] index_select spread : Elapsed 0.013 ms (1.285 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.23% +0.00% +0.16% / +0.08% +0.62% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.284 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.23% +0.23%] index_select strided 5 : Elapsed 0.013 ms (1.288 ms / 100) 1.287 -> 1.286 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.39% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.287 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +1.24% +1.17%] index_select strided 8 : Elapsed 0.013 ms (1.288 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.16% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.31%] index_select random : Elapsed 0.013 ms (1.281 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +1.01% +0.93%] index_select random_sorted : Elapsed 0.013 ms (1.288 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.39% +0.62%] index_select perm : Elapsed 0.013 ms (1.284 ms / 100) 1.282 -> 1.284 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.55% +0.39%] index_select perm_sorted : Elapsed 0.013 ms (1.282 ms / 100) B = [20, 4, 5, 16] (stride (20, 1, 4, 400)) A = [20, 40, 5, 16] (stride (3200, 5, 1, 200)) dim = 1 1.339 -> 1.344 ( +0.37%) [ +0.52% +0.15% +0.00% / +0.37% +0.67% +0.75%] index_select const : Elapsed 0.013 ms (1.346 ms / 100) 1.348 -> 1.346 ( -0.15%) [ +0.00% +0.07% +0.15% / -0.15% +0.74% +0.45%] index_select wrap : Elapsed 0.013 ms (1.348 ms / 100) 1.345 -> 1.347 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.59% +0.52%] index_select linear : Elapsed 0.013 ms (1.347 ms / 100) 1.333 -> 1.335 ( +0.15%) [ +0.23% +0.15% +0.00% / +0.15% +0.53% +0.60%] index_select reverse : Elapsed 0.013 ms (1.336 ms / 100) 1.339 -> 1.343 ( +0.30%) [ +0.22% +0.15% +0.00% / +0.30% +0.52% +0.52%] index_select skip64 : Elapsed 0.013 ms (1.342 ms / 100) 1.339 -> 1.344 ( +0.37%) [ +0.30% +0.00% +0.30% / +0.37% +0.90% +0.75%] index_select skip256 : Elapsed 0.013 ms (1.343 ms / 100) 1.346 -> 1.346 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.30% +0.74%] index_select spread : Elapsed 0.013 ms (1.348 ms / 100) 1.342 -> 1.341 ( -0.07%) [ +0.22% +0.00% +0.22% / -0.07% +0.52% +0.37%] index_select strided 3 : Elapsed 0.013 ms (1.345 ms / 100) 1.338 -> 1.336 ( -0.15%) [ +0.07% +0.00% +0.00% / -0.15% +0.45% +0.45%] index_select strided 5 : Elapsed 0.013 ms (1.339 ms / 100) 1.339 -> 1.339 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.45% +0.52%] index_select strided 7 : Elapsed 0.013 ms (1.339 ms / 100) 1.337 -> 1.337 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.22% +0.45%] index_select strided 8 : Elapsed 0.013 ms (1.339 ms / 100) 1.337 -> 1.337 ( +0.00%) [ +0.22% +0.07% +0.00% / +0.00% +0.52% +0.45%] index_select strided 16 : Elapsed 0.013 ms (1.340 ms / 100) 1.336 -> 1.338 ( +0.15%) [ +0.07% +0.22% +0.00% / +0.15% +0.60% +0.67%] index_select random : Elapsed 0.013 ms (1.337 ms / 100) 1.336 -> 1.338 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.45% +0.52%] index_select random_sorted : Elapsed 0.013 ms (1.338 ms / 100) 1.340 -> 1.342 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.22% +0.22%] index_select perm : Elapsed 0.013 ms (1.340 ms / 100) 1.335 -> 1.335 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +1.12% +0.52%] index_select perm_sorted : Elapsed 0.013 ms (1.336 ms / 100) B = [20, 4, 5, 16] (stride (4, 1, 80, 400)) A = [20, 40, 5, 16] (stride (3200, 1, 640, 40)) dim = 1 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.70% +0.63%] index_select const : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_select wrap : Elapsed 0.013 ms (1.281 ms / 100) 1.277 -> 1.280 ( +0.23%) [ +0.00% +0.16% +0.08% / +0.23% +0.63% +0.55%] index_select linear : Elapsed 0.013 ms (1.277 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_select reverse : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.277 ( -0.16%) [ +0.00% +0.08% +0.00% / -0.16% +0.23% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.08% +0.55% +0.55%] index_select spread : Elapsed 0.013 ms (1.277 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.31% +0.00% / +0.08% +0.55% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.47% +0.62%] index_select strided 5 : Elapsed 0.013 ms (1.284 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.283 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.39% +0.31%] index_select strided 8 : Elapsed 0.013 ms (1.288 ms / 100) 1.280 -> 1.278 ( -0.16%) [ +0.08% +0.00% +0.08% / -0.16% +0.63% +0.55%] index_select strided 16 : Elapsed 0.013 ms (1.281 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.39%] index_select random : Elapsed 0.013 ms (1.276 ms / 100) 1.281 -> 1.280 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.55% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.282 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select perm : Elapsed 0.013 ms (1.284 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.271 ms / 100) out_shape = [20, 40, 4, 16] in_shape = [20, 40, 5, 16] idx_dim = 2 B = [20, 40, 4, 16] (stride (1, 1280, 320, 20)) A = [20, 40, 5, 16] (stride (80, 1600, 1, 5)) dim = 2 5.929 -> 5.914 ( -0.25%) [ +0.05% +0.00% +0.08% / +0.10% -0.19% -0.25%] index_select const : Elapsed 0.059 ms (5.932 ms / 100) 5.931 -> 5.916 ( -0.25%) [ +0.10% +0.00% +0.07% / +0.08% -0.22% -0.25%] index_select wrap : Elapsed 0.059 ms (5.937 ms / 100) 5.930 -> 5.913 ( -0.29%) [ +0.00% +0.05% +0.15% / +0.12% -0.29% -0.17%] index_select linear : Elapsed 0.059 ms (5.930 ms / 100) 5.932 -> 5.908 ( -0.40%) [ +0.00% +0.00% +0.12% / +0.05% -0.40% -0.29%] index_select reverse : Elapsed 0.059 ms (5.932 ms / 100) 5.929 -> 5.911 ( -0.30%) [ +0.13% +0.00% +0.12% / +0.05% -0.30% -0.24%] index_select skip64 : Elapsed 0.059 ms (5.937 ms / 100) 5.926 -> 5.915 ( -0.19%) [ +0.10% +0.00% +0.08% / +0.10% -0.19% -0.12%] index_select skip256 : Elapsed 0.059 ms (5.932 ms / 100) 5.934 -> 5.913 ( -0.35%) [ +0.00% +0.00% +0.02% / +0.00% -0.35% -0.34%] index_select spread : Elapsed 0.059 ms (5.934 ms / 100) 5.927 -> 5.914 ( -0.22%) [ +0.08% +0.00% +0.08% / +0.13% -0.19% -0.22%] index_select strided 3 : Elapsed 0.059 ms (5.932 ms / 100) 5.927 -> 5.913 ( -0.24%) [ +0.12% +0.00% +0.19% / +0.15% -0.22% -0.24%] index_select random : Elapsed 0.059 ms (5.934 ms / 100) 5.926 -> 5.908 ( -0.30%) [ +0.08% +0.00% +0.13% / +0.25% -0.30% -0.24%] index_select random_sorted : Elapsed 0.059 ms (5.931 ms / 100) 5.928 -> 5.909 ( -0.32%) [ +0.00% +0.07% +0.07% / +0.08% -0.25% -0.32%] index_select perm : Elapsed 0.059 ms (5.928 ms / 100) 5.929 -> 5.914 ( -0.25%) [ +0.00% +0.03% +0.05% / +0.12% -0.22% -0.25%] index_select perm_sorted : Elapsed 0.059 ms (5.929 ms / 100) B = [20, 40, 4, 16] (stride (4, 1280, 1, 80)) A = [20, 40, 5, 16] (stride (640, 1, 12800, 40)) dim = 2 5.795 -> 5.793 ( -0.03%) [ +0.17% +0.00% +0.14% / +0.02% -0.03% +0.00%] index_select const : Elapsed 0.058 ms (5.805 ms / 100) 5.839 -> 5.825 ( -0.24%) [ +0.00% +0.09% +0.09% / +0.12% +0.00% -0.24%] index_select wrap : Elapsed 0.058 ms (5.839 ms / 100) 5.842 -> 5.828 ( -0.24%) [ +0.00% +0.07% +0.02% / +0.09% -0.05% -0.24%] index_select linear : Elapsed 0.058 ms (5.842 ms / 100) 5.845 -> 5.825 ( -0.34%) [ +0.00% +0.02% +0.03% / -0.05% -0.27% -0.34%] index_select reverse : Elapsed 0.058 ms (5.845 ms / 100) 5.801 -> 5.794 ( -0.12%) [ +0.03% +0.03% +0.00% / +0.03% -0.12% -0.12%] index_select skip64 : Elapsed 0.058 ms (5.803 ms / 100) 5.799 -> 5.791 ( -0.14%) [ +0.03% +0.00% +0.09% / +0.00% -0.07% -0.14%] index_select skip256 : Elapsed 0.058 ms (5.801 ms / 100) 5.839 -> 5.833 ( -0.10%) [ +0.03% +0.00% +0.12% / +0.09% -0.10% -0.10%] index_select spread : Elapsed 0.058 ms (5.841 ms / 100) 5.843 -> 5.828 ( -0.26%) [ +0.03% +0.03% +0.00% / +0.17% -0.24% -0.26%] index_select strided 3 : Elapsed 0.058 ms (5.845 ms / 100) 5.839 -> 5.825 ( -0.24%) [ +0.07% +0.00% +0.03% / +0.05% -0.17% -0.24%] index_select random : Elapsed 0.058 ms (5.843 ms / 100) 5.830 -> 5.816 ( -0.24%) [ +0.00% +0.09% +0.15% / +0.14% -0.15% -0.24%] index_select random_sorted : Elapsed 0.058 ms (5.830 ms / 100) 5.833 -> 5.831 ( -0.03%) [ +0.00% +0.07% +0.05% / +0.10% -0.03% +0.09%] index_select perm : Elapsed 0.058 ms (5.833 ms / 100) 5.834 -> 5.834 ( +0.00%) [ +0.00% +0.09% +0.03% / +0.03% +0.02% +0.00%] index_select perm_sorted : Elapsed 0.058 ms (5.834 ms / 100) B = [20, 40, 4, 16] (stride (1, 1280, 20, 80)) A = [20, 40, 5, 16] (stride (5, 100, 1, 4000)) dim = 2 6.023 -> 6.028 ( +0.08%) [ +0.10% +0.00% +0.12% / +0.08% +0.25% +0.30%] index_select const : Elapsed 0.060 ms (6.029 ms / 100) 6.022 -> 6.027 ( +0.08%) [ +0.05% +0.00% +0.17% / +0.08% +0.25% +0.23%] index_select wrap : Elapsed 0.060 ms (6.025 ms / 100) 6.024 -> 6.031 ( +0.12%) [ +0.00% +0.17% +0.10% / +0.12% +0.23% +0.22%] index_select linear : Elapsed 0.060 ms (6.024 ms / 100) 6.025 -> 6.022 ( -0.05%) [ +0.07% +0.00% +0.12% / -0.05% +0.10% +0.22%] index_select reverse : Elapsed 0.060 ms (6.029 ms / 100) 6.026 -> 6.025 ( -0.02%) [ +0.05% +0.02% +0.00% / -0.02% +0.17% +0.17%] index_select skip64 : Elapsed 0.060 ms (6.029 ms / 100) 6.026 -> 6.028 ( +0.03%) [ +0.08% +0.05% +0.00% / +0.03% +0.20% +0.23%] index_select skip256 : Elapsed 0.060 ms (6.031 ms / 100) 6.023 -> 6.030 ( +0.12%) [ +0.15% +0.00% +0.17% / +0.13% +0.25% +0.12%] index_select spread : Elapsed 0.060 ms (6.032 ms / 100) 6.022 -> 6.025 ( +0.05%) [ +0.03% +0.00% +0.15% / +0.05% +0.35% +0.27%] index_select strided 3 : Elapsed 0.060 ms (6.024 ms / 100) 6.025 -> 6.025 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.25% +0.27%] index_select random : Elapsed 0.060 ms (6.025 ms / 100) 6.022 -> 6.028 ( +0.10%) [ +0.00% +0.10% +0.12% / +0.10% +0.22% +0.20%] index_select random_sorted : Elapsed 0.060 ms (6.022 ms / 100) 6.023 -> 6.029 ( +0.10%) [ +0.03% +0.00% +0.07% / +0.18% +0.10% +0.20%] index_select perm : Elapsed 0.060 ms (6.025 ms / 100) 6.024 -> 6.031 ( +0.12%) [ +0.05% +0.00% +0.02% / +0.12% +0.20% +0.25%] index_select perm_sorted : Elapsed 0.060 ms (6.027 ms / 100) B = [20, 40, 4, 16] (stride (640, 1, 12800, 40)) A = [20, 40, 5, 16] (stride (1, 20, 800, 4000)) dim = 2 5.723 -> 5.708 ( -0.26%) [ +0.00% +0.05% +0.02% / +0.09% -0.02% -0.26%] index_select const : Elapsed 0.057 ms (5.723 ms / 100) 5.777 -> 5.772 ( -0.09%) [ +0.14% +0.00% +0.03% / +0.12% -0.09% -0.05%] index_select wrap : Elapsed 0.058 ms (5.785 ms / 100) 5.776 -> 5.769 ( -0.12%) [ +0.14% +0.00% +0.14% / +0.03% -0.05% -0.12%] index_select linear : Elapsed 0.058 ms (5.784 ms / 100) 5.806 -> 5.781 ( -0.43%) [ +0.00% +0.10% +0.07% / +0.14% -0.43% -0.34%] index_select reverse : Elapsed 0.058 ms (5.806 ms / 100) 5.722 -> 5.705 ( -0.30%) [ +0.09% +0.12% +0.00% / +0.16% -0.30% -0.24%] index_select skip64 : Elapsed 0.057 ms (5.727 ms / 100) 5.721 -> 5.708 ( -0.23%) [ +0.12% +0.00% +0.05% / +0.16% -0.14% -0.23%] index_select skip256 : Elapsed 0.057 ms (5.728 ms / 100) 5.777 -> 5.773 ( -0.07%) [ +0.05% +0.00% +0.24% / +0.07% +0.02% -0.07%] index_select spread : Elapsed 0.058 ms (5.780 ms / 100) 5.785 -> 5.758 ( -0.47%) [ +0.16% +0.03% +0.00% / +0.03% -0.47% -0.43%] index_select strided 3 : Elapsed 0.058 ms (5.794 ms / 100) 5.742 -> 5.753 ( +0.19%) [ +0.26% +0.00% +0.24% / +0.19% +0.30% +0.35%] index_select random : Elapsed 0.058 ms (5.757 ms / 100) 5.747 -> 5.749 ( +0.03%) [ +0.03% +0.00% +0.12% / +0.03% +0.14% +0.14%] index_select random_sorted : Elapsed 0.057 ms (5.749 ms / 100) 5.793 -> 5.760 ( -0.57%) [ +0.02% +0.00% +0.10% / -0.03% -0.57% -0.48%] index_select perm : Elapsed 0.058 ms (5.794 ms / 100) 5.783 -> 5.762 ( -0.36%) [ +0.00% +0.00% +0.05% / +0.17% -0.36% -0.33%] index_select perm_sorted : Elapsed 0.058 ms (5.783 ms / 100) B = [20, 40, 4, 16] (stride (16, 320, 12800, 1)) A = [20, 40, 5, 16] (stride (80, 1600, 1, 5)) dim = 2 5.900 -> 5.883 ( -0.29%) [ +0.00% +0.07% +0.00% / +0.05% -0.29% -0.24%] index_select const : Elapsed 0.059 ms (5.900 ms / 100) 5.894 -> 5.890 ( -0.07%) [ +0.00% +0.05% +0.05% / +0.05% -0.07% +0.02%] index_select wrap : Elapsed 0.059 ms (5.894 ms / 100) 5.895 -> 5.883 ( -0.20%) [ +0.08% +0.00% +0.17% / +0.17% -0.10% -0.20%] index_select linear : Elapsed 0.059 ms (5.900 ms / 100) 5.895 -> 5.889 ( -0.10%) [ +0.05% +0.00% +0.07% / +0.15% -0.10% -0.05%] index_select reverse : Elapsed 0.059 ms (5.898 ms / 100) 5.898 -> 5.886 ( -0.20%) [ +0.00% +0.02% +0.03% / +0.02% -0.15% -0.20%] index_select skip64 : Elapsed 0.059 ms (5.898 ms / 100) 5.889 -> 5.886 ( -0.05%) [ +0.15% +0.00% +0.08% / +0.14% -0.03% -0.05%] index_select skip256 : Elapsed 0.059 ms (5.898 ms / 100) 5.894 -> 5.888 ( -0.10%) [ +0.00% +0.05% +0.12% / +0.05% -0.10% +0.08%] index_select spread : Elapsed 0.059 ms (5.894 ms / 100) 5.897 -> 5.889 ( -0.14%) [ +0.00% +0.02% +0.05% / +0.10% -0.14% -0.07%] index_select strided 3 : Elapsed 0.059 ms (5.897 ms / 100) 5.900 -> 5.883 ( -0.29%) [ +0.00% +0.00% +0.03% / +0.00% -0.03% -0.29%] index_select random : Elapsed 0.059 ms (5.900 ms / 100) 5.891 -> 5.892 ( +0.02%) [ +0.00% +0.15% +0.22% / +0.15% +0.05% +0.02%] index_select random_sorted : Elapsed 0.059 ms (5.891 ms / 100) 5.895 -> 5.889 ( -0.10%) [ +0.12% +0.00% +0.19% / +0.22% -0.10% -0.07%] index_select perm : Elapsed 0.059 ms (5.902 ms / 100) 5.897 -> 5.889 ( -0.14%) [ +0.03% +0.00% +0.05% / +0.05% -0.14% -0.12%] index_select perm_sorted : Elapsed 0.059 ms (5.899 ms / 100) B = [20, 40, 4, 16] (stride (160, 4, 1, 3200)) A = [20, 40, 5, 16] (stride (5, 100, 1, 4000)) dim = 2 5.838 -> 5.829 ( -0.15%) [ +0.00% +0.10% +0.10% / +0.05% -0.05% -0.15%] index_select const : Elapsed 0.058 ms (5.838 ms / 100) 5.837 -> 5.824 ( -0.22%) [ +0.02% +0.14% +0.00% / -0.05% -0.22% -0.22%] index_select wrap : Elapsed 0.058 ms (5.838 ms / 100) 5.842 -> 5.827 ( -0.26%) [ +0.03% +0.00% +0.03% / +0.14% -0.26% -0.26%] index_select linear : Elapsed 0.058 ms (5.844 ms / 100) 5.836 -> 5.823 ( -0.22%) [ +0.07% +0.00% +0.02% / +0.02% -0.17% -0.22%] index_select reverse : Elapsed 0.058 ms (5.840 ms / 100) 5.838 -> 5.822 ( -0.27%) [ +0.00% +0.17% +0.15% / +0.09% -0.21% -0.27%] index_select skip64 : Elapsed 0.058 ms (5.838 ms / 100) 5.839 -> 5.830 ( -0.15%) [ +0.07% +0.00% +0.12% / -0.02% -0.15% -0.14%] index_select skip256 : Elapsed 0.058 ms (5.843 ms / 100) 5.844 -> 5.826 ( -0.31%) [ +0.02% +0.00% +0.03% / -0.02% -0.22% -0.31%] index_select spread : Elapsed 0.058 ms (5.845 ms / 100) 5.837 -> 5.824 ( -0.22%) [ +0.00% +0.07% +0.10% / +0.07% -0.17% -0.22%] index_select strided 3 : Elapsed 0.058 ms (5.837 ms / 100) 5.838 -> 5.828 ( -0.17%) [ +0.00% +0.02% +0.12% / +0.14% -0.17% -0.10%] index_select random : Elapsed 0.058 ms (5.838 ms / 100) 5.836 -> 5.826 ( -0.17%) [ +0.03% +0.00% +0.14% / +0.14% -0.15% -0.17%] index_select random_sorted : Elapsed 0.058 ms (5.838 ms / 100) 5.844 -> 5.822 ( -0.38%) [ +0.02% +0.00% +0.03% / +0.09% -0.34% -0.38%] index_select perm : Elapsed 0.058 ms (5.845 ms / 100) 5.841 -> 5.824 ( -0.29%) [ +0.00% +0.15% +0.00% / +0.05% -0.29% -0.24%] index_select perm_sorted : Elapsed 0.058 ms (5.841 ms / 100) B = [20, 40, 4, 16] (stride (4, 80, 1, 3200)) A = [20, 40, 5, 16] (stride (16, 1600, 320, 1)) dim = 2 5.785 -> 5.792 ( +0.12%) [ +0.09% +0.00% +0.14% / +0.12% +0.43% +0.28%] index_select const : Elapsed 0.058 ms (5.790 ms / 100) 5.883 -> 5.871 ( -0.20%) [ +0.00% +0.03% +0.03% / -0.02% -0.12% -0.20%] index_select wrap : Elapsed 0.059 ms (5.883 ms / 100) 5.869 -> 5.854 ( -0.26%) [ +0.07% +0.00% +0.14% / +0.12% -0.15% -0.26%] index_select linear : Elapsed 0.059 ms (5.873 ms / 100) 5.873 -> 5.864 ( -0.15%) [ +0.17% +0.05% +0.00% / +0.09% -0.15% -0.15%] index_select reverse : Elapsed 0.059 ms (5.883 ms / 100) 5.770 -> 5.772 ( +0.03%) [ +0.00% +0.17% +0.05% / +0.03% +0.31% +0.17%] index_select skip64 : Elapsed 0.058 ms (5.770 ms / 100) 5.770 -> 5.773 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.29% +0.29%] index_select skip256 : Elapsed 0.058 ms (5.774 ms / 100) 5.879 -> 5.868 ( -0.19%) [ +0.00% +0.15% +0.17% / +0.02% -0.12% -0.19%] index_select spread : Elapsed 0.059 ms (5.879 ms / 100) 5.872 -> 5.859 ( -0.22%) [ +0.02% +0.03% +0.00% / -0.02% -0.22% -0.20%] index_select strided 3 : Elapsed 0.059 ms (5.873 ms / 100) 5.796 -> 5.803 ( +0.12%) [ +0.09% +0.00% +0.16% / +0.12% +0.36% +0.24%] index_select random : Elapsed 0.058 ms (5.801 ms / 100) 5.804 -> 5.816 ( +0.21%) [ +0.00% +0.17% +0.07% / +0.21% +0.38% +0.40%] index_select random_sorted : Elapsed 0.058 ms (5.804 ms / 100) 5.887 -> 5.884 ( -0.05%) [ +0.02% +0.00% +0.02% / +0.05% +0.05% -0.05%] index_select perm : Elapsed 0.059 ms (5.888 ms / 100) 5.870 -> 5.867 ( -0.05%) [ +0.00% +0.09% +0.10% / -0.03% -0.05% -0.03%] index_select perm_sorted : Elapsed 0.059 ms (5.870 ms / 100) out_shape = [20, 40, 5, 4] in_shape = [20, 40, 5, 16] idx_dim = 3 B = [20, 40, 5, 4] (stride (800, 4, 160, 1)) A = [20, 40, 5, 16] (stride (200, 5, 1, 4000)) dim = 3 2.184 -> 2.187 ( +0.14%) [ +0.09% +0.09% +0.00% / +0.14% +0.32% +0.37%] index_select const : Elapsed 0.022 ms (2.186 ms / 100) 2.238 -> 2.239 ( +0.04%) [ +0.00% +0.18% +0.09% / +0.04% +0.36% +0.45%] index_select wrap : Elapsed 0.022 ms (2.238 ms / 100) 2.236 -> 2.243 ( +0.31%) [ +0.22% +0.09% +0.00% / +0.31% +0.49% +0.40%] index_select linear : Elapsed 0.022 ms (2.241 ms / 100) 2.239 -> 2.238 ( -0.04%) [ +0.13% +0.13% +0.00% / -0.04% +0.45% +0.45%] index_select reverse : Elapsed 0.022 ms (2.242 ms / 100) 2.185 -> 2.185 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.14% +0.37%] index_select skip64 : Elapsed 0.022 ms (2.187 ms / 100) 2.185 -> 2.186 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.05% +0.18% +0.14%] index_select skip256 : Elapsed 0.022 ms (2.185 ms / 100) 2.240 -> 2.242 ( +0.09%) [ +0.00% +0.00% +0.13% / +0.09% +0.22% +0.13%] index_select spread : Elapsed 0.022 ms (2.240 ms / 100) 2.239 -> 2.242 ( +0.13%) [ +0.00% +0.13% +0.18% / +0.13% +0.13% +0.27%] index_select strided 3 : Elapsed 0.022 ms (2.239 ms / 100) 2.239 -> 2.237 ( -0.09%) [ +0.18% +0.18% +0.00% / -0.09% +0.63% +0.40%] index_select strided 5 : Elapsed 0.022 ms (2.243 ms / 100) 2.238 -> 2.243 ( +0.22%) [ +0.00% +0.13% +0.00% / +0.22% +0.36% +0.36%] index_select strided 7 : Elapsed 0.022 ms (2.238 ms / 100) 2.198 -> 2.200 ( +0.09%) [ +0.18% +0.18% +0.00% / +0.09% +0.36% +0.27%] index_select strided 8 : Elapsed 0.022 ms (2.202 ms / 100) 2.239 -> 2.237 ( -0.09%) [ +0.04% +0.13% +0.00% / -0.09% +0.45% +0.36%] index_select random : Elapsed 0.022 ms (2.240 ms / 100) 2.240 -> 2.245 ( +0.22%) [ +0.18% +0.00% +0.09% / +0.22% +0.27% +0.31%] index_select random_sorted : Elapsed 0.022 ms (2.244 ms / 100) 2.239 -> 2.242 ( +0.13%) [ +0.13% +0.00% +0.04% / +0.13% +0.18% +0.27%] index_select perm : Elapsed 0.022 ms (2.242 ms / 100) 2.239 -> 2.243 ( +0.18%) [ +0.18% +0.13% +0.00% / +0.18% +0.31% +0.45%] index_select perm_sorted : Elapsed 0.022 ms (2.243 ms / 100) B = [20, 40, 5, 4] (stride (800, 1, 160, 40)) A = [20, 40, 5, 16] (stride (3200, 1, 40, 200)) dim = 3 2.398 -> 2.401 ( +0.13%) [ +0.00% +0.21% +0.17% / +0.13% +0.33% +0.38%] index_select const : Elapsed 0.024 ms (2.398 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.16% +0.12% +0.08%] index_select wrap : Elapsed 0.024 ms (2.445 ms / 100) 2.444 -> 2.446 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.12% +0.25%] index_select linear : Elapsed 0.024 ms (2.444 ms / 100) 2.436 -> 2.445 ( +0.37%) [ +0.21% +0.00% +0.16% / +0.41% +0.37% +0.37%] index_select reverse : Elapsed 0.024 ms (2.441 ms / 100) 2.405 -> 2.400 ( -0.21%) [ +0.00% +0.00% +0.12% / -0.21% +0.08% +0.25%] index_select skip64 : Elapsed 0.024 ms (2.405 ms / 100) 2.401 -> 2.403 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.17% +0.25% +0.08%] index_select skip256 : Elapsed 0.024 ms (2.404 ms / 100) 2.441 -> 2.444 ( +0.12%) [ +0.00% +0.25% +0.04% / +0.12% +0.33% +0.16%] index_select spread : Elapsed 0.024 ms (2.441 ms / 100) 2.444 -> 2.444 ( +0.00%) [ +0.00% +0.04% +0.12% / +0.00% +0.12% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.437 -> 2.441 ( +0.16%) [ +0.21% +0.08% +0.00% / +0.16% +0.37% +0.49%] index_select strided 5 : Elapsed 0.024 ms (2.442 ms / 100) 2.442 -> 2.444 ( +0.08%) [ +0.33% +0.33% +0.00% / +0.08% +0.57% +0.53%] index_select strided 7 : Elapsed 0.024 ms (2.450 ms / 100) 2.413 -> 2.418 ( +0.21%) [ +0.12% +0.04% +0.00% / +0.21% +0.54% +0.50%] index_select strided 8 : Elapsed 0.024 ms (2.416 ms / 100) 2.439 -> 2.440 ( +0.04%) [ +0.00% +0.21% +0.21% / +0.04% +0.21% +0.45%] index_select random : Elapsed 0.024 ms (2.439 ms / 100) 2.444 -> 2.447 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.25% +0.29%] index_select random_sorted : Elapsed 0.024 ms (2.445 ms / 100) 2.438 -> 2.440 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.33% +0.33%] index_select perm : Elapsed 0.024 ms (2.441 ms / 100) 2.441 -> 2.444 ( +0.12%) [ +0.00% +0.04% +0.12% / +0.12% +0.41% +0.41%] index_select perm_sorted : Elapsed 0.024 ms (2.441 ms / 100) B = [20, 40, 5, 4] (stride (20, 400, 4, 1)) A = [20, 40, 5, 16] (stride (1, 100, 20, 4000)) dim = 3 2.366 -> 2.369 ( +0.13%) [ +0.00% +0.21% +0.25% / +0.13% +0.55% +0.72%] index_select const : Elapsed 0.024 ms (2.366 ms / 100) 2.364 -> 2.368 ( +0.17%) [ +0.30% +0.00% +0.30% / +0.17% +0.47% +0.47%] index_select wrap : Elapsed 0.024 ms (2.371 ms / 100) 2.367 -> 2.365 ( -0.08%) [ +0.17% +0.00% +0.08% / -0.08% +0.55% +0.42%] index_select linear : Elapsed 0.024 ms (2.371 ms / 100) 2.368 -> 2.366 ( -0.08%) [ +0.00% +0.04% +0.13% / -0.08% +0.42% +0.55%] index_select reverse : Elapsed 0.024 ms (2.368 ms / 100) 2.364 -> 2.368 ( +0.17%) [ +0.13% +0.17% +0.00% / +0.17% +0.76% +0.59%] index_select skip64 : Elapsed 0.024 ms (2.367 ms / 100) 2.363 -> 2.365 ( +0.08%) [ +0.30% +0.25% +0.00% / +0.08% +0.80% +0.85%] index_select skip256 : Elapsed 0.024 ms (2.370 ms / 100) 2.381 -> 2.385 ( +0.17%) [ +0.00% +0.21% +0.08% / +0.17% +0.59% +0.63%] index_select spread : Elapsed 0.024 ms (2.381 ms / 100) 2.364 -> 2.364 ( +0.00%) [ +0.00% +0.13% +0.04% / +0.00% +0.59% +0.38%] index_select strided 3 : Elapsed 0.024 ms (2.364 ms / 100) 2.364 -> 2.363 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.59% +0.68%] index_select strided 5 : Elapsed 0.024 ms (2.365 ms / 100) 2.383 -> 2.378 ( -0.21%) [ +0.17% +0.17% +0.00% / -0.21% +0.50% +0.50%] index_select strided 7 : Elapsed 0.024 ms (2.387 ms / 100) 2.365 -> 2.373 ( +0.34%) [ +0.21% +0.08% +0.00% / +0.34% +0.80% +0.85%] index_select strided 8 : Elapsed 0.024 ms (2.370 ms / 100) 2.367 -> 2.367 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.51% +0.30%] index_select random : Elapsed 0.024 ms (2.367 ms / 100) 2.367 -> 2.372 ( +0.21%) [ +0.08% +0.08% +0.00% / +0.21% +0.42% +0.34%] index_select random_sorted : Elapsed 0.024 ms (2.369 ms / 100) 2.373 -> 2.374 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.46% +0.46%] index_select perm : Elapsed 0.024 ms (2.377 ms / 100) 2.385 -> 2.388 ( +0.13%) [ +0.34% +0.00% +0.13% / +0.13% +0.50% +0.34%] index_select perm_sorted : Elapsed 0.024 ms (2.393 ms / 100) B = [20, 40, 5, 4] (stride (5, 400, 1, 100)) A = [20, 40, 5, 16] (stride (1, 20, 800, 4000)) dim = 3 2.487 -> 2.490 ( +0.12%) [ +0.16% +0.12% +0.00% / +0.12% +0.16% +0.24%] index_select const : Elapsed 0.025 ms (2.491 ms / 100) 2.498 -> 2.499 ( +0.04%) [ +0.08% +0.12% +0.00% / +0.20% +0.04% +0.08%] index_select wrap : Elapsed 0.025 ms (2.500 ms / 100) 2.496 -> 2.492 ( -0.16%) [ +0.08% +0.00% +0.00% / -0.16% -0.08% +0.04%] index_select linear : Elapsed 0.025 ms (2.498 ms / 100) 2.490 -> 2.488 ( -0.08%) [ +0.00% +0.00% +0.12% / -0.08% +0.12% +0.04%] index_select reverse : Elapsed 0.025 ms (2.490 ms / 100) 2.491 -> 2.488 ( -0.12%) [ +0.00% +0.08% +0.00% / -0.12% +0.24% +0.20%] index_select skip64 : Elapsed 0.025 ms (2.491 ms / 100) 2.486 -> 2.486 ( +0.00%) [ +0.00% +0.16% +0.12% / +0.00% +0.28% +0.16%] index_select skip256 : Elapsed 0.025 ms (2.486 ms / 100) 2.489 -> 2.492 ( +0.12%) [ +0.04% +0.00% +0.12% / +0.12% +0.36% +0.40%] index_select spread : Elapsed 0.025 ms (2.490 ms / 100) 2.487 -> 2.492 ( +0.20%) [ +0.28% +0.00% +0.16% / +0.28% +0.20% +0.40%] index_select strided 3 : Elapsed 0.025 ms (2.494 ms / 100) 2.490 -> 2.492 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.44% +0.32%] index_select strided 5 : Elapsed 0.025 ms (2.490 ms / 100) 2.490 -> 2.496 ( +0.24%) [ +0.08% +0.04% +0.00% / +0.24% +0.76% +0.80%] index_select strided 7 : Elapsed 0.025 ms (2.492 ms / 100) 2.486 -> 2.487 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.36% +0.32%] index_select strided 8 : Elapsed 0.025 ms (2.489 ms / 100) 2.486 -> 2.486 ( +0.00%) [ +0.08% +0.24% +0.00% / +0.00% +0.24% +0.36%] index_select random : Elapsed 0.025 ms (2.488 ms / 100) 2.491 -> 2.494 ( +0.12%) [ +0.08% +0.28% +0.00% / +0.12% +0.40% +0.32%] index_select random_sorted : Elapsed 0.025 ms (2.493 ms / 100) 2.493 -> 2.493 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.24% +0.12%] index_select perm : Elapsed 0.025 ms (2.493 ms / 100) 2.486 -> 2.494 ( +0.32%) [ +0.32% +0.00% +0.16% / +0.32% +0.48% +0.64%] index_select perm_sorted : Elapsed 0.025 ms (2.494 ms / 100) B = [20, 40, 5, 4] (stride (1, 20, 3200, 800)) A = [20, 40, 5, 16] (stride (5, 1600, 1, 100)) dim = 3 2.440 -> 2.442 ( +0.08%) [ +0.29% +0.00% +0.08% / +0.08% +0.45% +0.57%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.461 -> 2.467 ( +0.24%) [ +0.00% +0.04% +0.24% / +0.24% +0.28% +0.37%] index_select wrap : Elapsed 0.025 ms (2.461 ms / 100) 2.466 -> 2.468 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.24% +0.32%] index_select linear : Elapsed 0.025 ms (2.468 ms / 100) 2.461 -> 2.466 ( +0.20%) [ +0.00% +0.04% +0.16% / +0.20% +0.37% +0.33%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.430 -> 2.437 ( +0.29%) [ +0.12% +0.16% +0.00% / +0.29% +0.66% +0.95%] index_select skip64 : Elapsed 0.024 ms (2.433 ms / 100) 2.439 -> 2.441 ( +0.08%) [ +0.00% +0.12% +0.21% / +0.08% +0.49% +0.57%] index_select skip256 : Elapsed 0.024 ms (2.439 ms / 100) 2.472 -> 2.476 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.40% +0.44%] index_select spread : Elapsed 0.025 ms (2.474 ms / 100) 2.463 -> 2.467 ( +0.16%) [ +0.00% +0.16% +0.04% / +0.16% +0.16% +0.20%] index_select strided 3 : Elapsed 0.025 ms (2.463 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.08% +0.04%] index_select strided 5 : Elapsed 0.025 ms (2.471 ms / 100) 2.473 -> 2.475 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.20% +0.44%] index_select strided 7 : Elapsed 0.025 ms (2.473 ms / 100) 2.456 -> 2.459 ( +0.12%) [ +0.16% +0.00% +0.04% / +0.12% +0.49% +0.57%] index_select strided 8 : Elapsed 0.025 ms (2.460 ms / 100) 2.461 -> 2.466 ( +0.20%) [ +0.24% +0.00% +0.00% / +0.20% +0.41% +0.33%] index_select random : Elapsed 0.025 ms (2.467 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.00% +0.04% +0.33% / +0.00% +0.37% +0.20%] index_select random_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.464 -> 2.466 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.16% +0.16%] index_select perm : Elapsed 0.025 ms (2.464 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.20% +0.00% +0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) out_shape = [4, 40, 16, 5] in_shape = [20, 40, 16, 5] idx_dim = 0 B = [4, 40, 16, 5] (stride (3200, 5, 200, 1)) A = [20, 40, 16, 5] (stride (80, 1600, 1, 16)) dim = 0 2.021 -> 2.017 ( -0.20%) [ +0.00% +0.00% +0.15% / -0.20% +0.45% +0.40%] index_select const : Elapsed 0.020 ms (2.021 ms / 100) 2.024 -> 2.028 ( +0.20%) [ +0.25% +0.00% +0.10% / +0.20% +0.74% +0.44%] index_select wrap : Elapsed 0.020 ms (2.029 ms / 100) 2.019 -> 2.015 ( -0.20%) [ +0.10% +0.00% +0.25% / -0.20% +1.29% +1.09%] index_select linear : Elapsed 0.020 ms (2.021 ms / 100) 2.035 -> 2.038 ( +0.15%) [ +0.05% +0.10% +0.00% / +0.15% +0.59% +0.49%] index_select reverse : Elapsed 0.020 ms (2.036 ms / 100) 2.007 -> 2.011 ( +0.20%) [ +0.10% +0.00% +0.15% / +0.20% +0.75% +0.90%] index_select skip64 : Elapsed 0.020 ms (2.009 ms / 100) 2.019 -> 2.021 ( +0.10%) [ +0.00% +0.05% +0.20% / +0.10% +0.45% +0.45%] index_select skip256 : Elapsed 0.020 ms (2.019 ms / 100) 2.029 -> 2.028 ( -0.05%) [ +0.25% +0.20% +0.00% / -0.05% +1.13% +0.99%] index_select spread : Elapsed 0.020 ms (2.034 ms / 100) 2.029 -> 2.035 ( +0.30%) [ +0.25% +0.20% +0.00% / +0.30% +0.69% +0.49%] index_select strided 3 : Elapsed 0.020 ms (2.034 ms / 100) 2.015 -> 2.021 ( +0.30%) [ +0.20% +0.00% +0.25% / +0.30% +0.89% +1.14%] index_select strided 5 : Elapsed 0.020 ms (2.019 ms / 100) 2.027 -> 2.027 ( +0.00%) [ +0.10% +0.00% +0.25% / +0.00% +1.58% +1.13%] index_select strided 7 : Elapsed 0.020 ms (2.029 ms / 100) 2.037 -> 2.041 ( +0.20%) [ +0.00% +0.20% +0.10% / +0.20% +0.74% +0.74%] index_select strided 8 : Elapsed 0.020 ms (2.037 ms / 100) 2.017 -> 2.023 ( +0.30%) [ +0.00% +0.40% +0.30% / +0.30% +1.19% +1.19%] index_select strided 16 : Elapsed 0.020 ms (2.017 ms / 100) 2.005 -> 2.006 ( +0.05%) [ +0.25% +0.50% +0.00% / +0.05% +1.10% +1.25%] index_select random : Elapsed 0.020 ms (2.010 ms / 100) 2.006 -> 1.999 ( -0.35%) [ +0.00% +0.10% +0.00% / -0.35% +1.00% +1.00%] index_select random_sorted : Elapsed 0.020 ms (2.006 ms / 100) 2.019 -> 2.020 ( +0.05%) [ +0.20% +0.10% +0.00% / +0.05% +1.14% +1.09%] index_select perm : Elapsed 0.020 ms (2.023 ms / 100) 2.038 -> 2.035 ( -0.15%) [ +0.20% +0.00% +0.10% / -0.15% +0.69% +0.64%] index_select perm_sorted : Elapsed 0.020 ms (2.042 ms / 100) B = [4, 40, 16, 5] (stride (3200, 1, 200, 40)) A = [20, 40, 16, 5] (stride (1, 1600, 100, 20)) dim = 0 1.833 -> 1.830 ( -0.16%) [ +0.00% +0.27% +0.05% / +0.00% -0.16% -0.05%] index_select const : Elapsed 0.018 ms (1.833 ms / 100) 1.816 -> 1.816 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.11% +0.11% +0.00%] index_select wrap : Elapsed 0.018 ms (1.817 ms / 100) 1.822 -> 1.824 ( +0.11%) [ +0.33% +0.00% +0.11% / +0.22% +0.11% +0.38%] index_select linear : Elapsed 0.018 ms (1.828 ms / 100) 1.825 -> 1.826 ( +0.05%) [ +0.05% +0.16% +0.00% / +0.33% +0.05% +0.16%] index_select reverse : Elapsed 0.018 ms (1.826 ms / 100) 1.836 -> 1.832 ( -0.22%) [ +0.05% +0.00% +0.11% / -0.22% -0.22% -0.22%] index_select skip64 : Elapsed 0.018 ms (1.837 ms / 100) 1.824 -> 1.821 ( -0.16%) [ +0.16% +0.00% +0.05% / +0.00% -0.16% +0.27%] index_select skip256 : Elapsed 0.018 ms (1.827 ms / 100) 1.872 -> 1.865 ( -0.37%) [ +0.48% +0.00% +0.16% / +0.05% +0.00% -0.37%] index_select spread : Elapsed 0.019 ms (1.881 ms / 100) 1.861 -> 1.858 ( -0.16%) [ +0.00% +0.05% +0.11% / +0.32% +0.21% -0.16%] index_select strided 3 : Elapsed 0.019 ms (1.861 ms / 100) 1.881 -> 1.873 ( -0.43%) [ +0.00% +0.05% +0.16% / +0.16% -0.32% -0.43%] index_select strided 5 : Elapsed 0.019 ms (1.881 ms / 100) 1.862 -> 1.861 ( -0.05%) [ +0.21% +0.00% +0.21% / +0.43% +0.00% -0.05%] index_select strided 7 : Elapsed 0.019 ms (1.866 ms / 100) 1.872 -> 1.871 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.00% +0.16%] index_select strided 8 : Elapsed 0.019 ms (1.872 ms / 100) 1.869 -> 1.866 ( -0.16%) [ +0.00% +0.16% +0.05% / +0.64% -0.16% -0.11%] index_select strided 16 : Elapsed 0.019 ms (1.869 ms / 100) 1.833 -> 1.837 ( +0.22%) [ +0.22% +0.00% +0.11% / +0.22% +0.22% +0.27%] index_select random : Elapsed 0.018 ms (1.837 ms / 100) 1.865 -> 1.866 ( +0.05%) [ +0.32% +0.00% +0.38% / +0.43% +0.16% +0.05%] index_select random_sorted : Elapsed 0.019 ms (1.871 ms / 100) 1.864 -> 1.860 ( -0.21%) [ +0.16% +0.00% +0.21% / -0.11% -0.21% -0.05%] index_select perm : Elapsed 0.019 ms (1.867 ms / 100) 1.860 -> 1.859 ( -0.05%) [ +0.11% +0.00% +0.05% / +0.16% -0.05% -0.05%] index_select perm_sorted : Elapsed 0.019 ms (1.862 ms / 100) B = [4, 40, 16, 5] (stride (3200, 1, 200, 40)) A = [20, 40, 16, 5] (stride (40, 1, 800, 12800)) dim = 0 2.069 -> 2.069 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.77% +0.48%] index_select const : Elapsed 0.021 ms (2.070 ms / 100) 2.082 -> 2.087 ( +0.24%) [ +0.29% +0.24% +0.00% / +0.24% +0.48% +0.53%] index_select wrap : Elapsed 0.021 ms (2.088 ms / 100) 2.084 -> 2.087 ( +0.14%) [ +0.00% +0.00% +0.10% / +0.14% +0.48% +0.24%] index_select linear : Elapsed 0.021 ms (2.084 ms / 100) 2.086 -> 2.087 ( +0.05%) [ +0.14% +0.00% +0.00% / +0.05% +0.29% +0.34%] index_select reverse : Elapsed 0.021 ms (2.089 ms / 100) 2.069 -> 2.073 ( +0.19%) [ +0.00% +0.05% +0.00% / +0.19% +0.58% +0.48%] index_select skip64 : Elapsed 0.021 ms (2.069 ms / 100) 2.066 -> 2.065 ( -0.05%) [ +0.15% +0.15% +0.00% / -0.05% +0.68% +0.58%] index_select skip256 : Elapsed 0.021 ms (2.069 ms / 100) 2.087 -> 2.087 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.00% +0.34% +0.24%] index_select spread : Elapsed 0.021 ms (2.088 ms / 100) 2.086 -> 2.083 ( -0.14%) [ +0.14% +0.19% +0.00% / -0.14% +0.10% +0.29%] index_select strided 3 : Elapsed 0.021 ms (2.089 ms / 100) 2.085 -> 2.085 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.34% +0.58%] index_select strided 5 : Elapsed 0.021 ms (2.087 ms / 100) 2.086 -> 2.086 ( +0.00%) [ +0.10% +0.00% +0.10% / +0.00% +0.29% +0.29%] index_select strided 7 : Elapsed 0.021 ms (2.088 ms / 100) 2.090 -> 2.088 ( -0.10%) [ +0.14% +0.00% +0.00% / -0.10% +0.10% +0.00%] index_select strided 8 : Elapsed 0.021 ms (2.093 ms / 100) 2.082 -> 2.089 ( +0.34%) [ +0.19% +0.19% +0.00% / +0.34% +0.53% +0.48%] index_select strided 16 : Elapsed 0.021 ms (2.086 ms / 100) 2.077 -> 2.080 ( +0.14%) [ +0.34% +0.00% +0.05% / +0.14% +0.67% +0.67%] index_select random : Elapsed 0.021 ms (2.084 ms / 100) 2.084 -> 2.089 ( +0.24%) [ +0.29% +0.05% +0.00% / +0.29% +0.24% +0.53%] index_select random_sorted : Elapsed 0.021 ms (2.090 ms / 100) 2.087 -> 2.086 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.24% +0.24%] index_select perm : Elapsed 0.021 ms (2.088 ms / 100) 2.084 -> 2.087 ( +0.14%) [ +0.19% +0.05% +0.00% / +0.14% +0.38% +0.38%] index_select perm_sorted : Elapsed 0.021 ms (2.088 ms / 100) B = [4, 40, 16, 5] (stride (5, 320, 20, 1)) A = [20, 40, 16, 5] (stride (3200, 80, 1, 16)) dim = 0 1.904 -> 1.902 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% +0.53% +0.47%] index_select const : Elapsed 0.019 ms (1.904 ms / 100) 1.918 -> 1.923 ( +0.26%) [ +0.10% +0.00% +0.10% / +0.26% +0.89% +0.83%] index_select wrap : Elapsed 0.019 ms (1.920 ms / 100) 1.927 -> 1.925 ( -0.10%) [ +0.05% +0.05% +0.00% / -0.10% +0.99% +0.88%] index_select linear : Elapsed 0.019 ms (1.928 ms / 100) 1.922 -> 1.922 ( +0.00%) [ +0.05% +0.00% +0.36% / +0.00% +1.35% +1.40%] index_select reverse : Elapsed 0.019 ms (1.923 ms / 100) 1.902 -> 1.904 ( +0.11%) [ +0.00% +0.16% +0.05% / +0.11% +0.89% +0.79%] index_select skip64 : Elapsed 0.019 ms (1.902 ms / 100) 1.902 -> 1.902 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.58% +0.42%] index_select skip256 : Elapsed 0.019 ms (1.902 ms / 100) 1.929 -> 1.926 ( -0.16%) [ +0.21% +0.21% +0.00% / +0.21% -0.16% -0.10%] index_select spread : Elapsed 0.019 ms (1.933 ms / 100) 1.939 -> 1.937 ( -0.10%) [ +0.15% +0.10% +0.00% / +0.10% -0.10% -0.05%] index_select strided 3 : Elapsed 0.019 ms (1.942 ms / 100) 1.936 -> 1.928 ( -0.41%) [ +0.00% +0.36% +0.26% / +0.26% -0.41% -0.21%] index_select strided 5 : Elapsed 0.019 ms (1.936 ms / 100) 1.909 -> 1.917 ( +0.42%) [ +0.21% +0.05% +0.00% / +0.42% +1.52% +1.78%] index_select strided 7 : Elapsed 0.019 ms (1.913 ms / 100) 1.939 -> 1.945 ( +0.31%) [ +0.10% +0.15% +0.00% / +0.31% +0.77% +0.36%] index_select strided 8 : Elapsed 0.019 ms (1.941 ms / 100) 1.928 -> 1.929 ( +0.05%) [ +0.21% +0.21% +0.00% / +0.05% +0.36% +0.26%] index_select strided 16 : Elapsed 0.019 ms (1.932 ms / 100) 1.922 -> 1.918 ( -0.21%) [ +0.05% +0.00% +0.05% / -0.21% +0.21% +0.16%] index_select random : Elapsed 0.019 ms (1.923 ms / 100) 1.929 -> 1.934 ( +0.26%) [ +0.00% +0.31% +0.26% / +0.26% +0.41% +0.41%] index_select random_sorted : Elapsed 0.019 ms (1.929 ms / 100) 1.942 -> 1.934 ( -0.41%) [ +0.26% +0.15% +0.00% / +0.46% -0.26% -0.41%] index_select perm : Elapsed 0.019 ms (1.947 ms / 100) 1.940 -> 1.923 ( -0.88%) [ +0.26% +0.00% +0.15% / +0.00% -0.72% -0.88%] index_select perm_sorted : Elapsed 0.019 ms (1.945 ms / 100) B = [4, 40, 16, 5] (stride (1, 4, 800, 160)) A = [20, 40, 16, 5] (stride (1, 1600, 100, 20)) dim = 0 1.837 -> 1.834 ( -0.16%) [ +0.27% +0.05% +0.00% / +0.11% -0.16% -0.11%] index_select const : Elapsed 0.018 ms (1.842 ms / 100) 1.820 -> 1.824 ( +0.22%) [ +0.00% +0.27% +0.05% / +0.22% +0.38% +0.49%] index_select wrap : Elapsed 0.018 ms (1.820 ms / 100) 1.832 -> 1.834 ( +0.11%) [ +0.16% +0.05% +0.00% / +0.16% +0.11% +0.11%] index_select linear : Elapsed 0.018 ms (1.835 ms / 100) 1.836 -> 1.831 ( -0.27%) [ +0.11% +0.11% +0.00% / +0.33% -0.27% -0.16%] index_select reverse : Elapsed 0.018 ms (1.838 ms / 100) 1.839 -> 1.841 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.11% +0.33%] index_select skip64 : Elapsed 0.018 ms (1.839 ms / 100) 1.839 -> 1.843 ( +0.22%) [ +0.11% +0.00% +0.16% / +0.44% +0.38% +0.22%] index_select skip256 : Elapsed 0.018 ms (1.841 ms / 100) 1.886 -> 1.884 ( -0.11%) [ +0.00% +0.00% +0.16% / +0.11% -0.11% +0.11%] index_select spread : Elapsed 0.019 ms (1.886 ms / 100) 1.860 -> 1.861 ( +0.05%) [ +0.16% +0.11% +0.00% / +0.11% +0.05% +0.70%] index_select strided 3 : Elapsed 0.019 ms (1.863 ms / 100) 1.878 -> 1.875 ( -0.16%) [ +0.00% +0.21% +0.00% / +0.00% -0.16% +0.16%] index_select strided 5 : Elapsed 0.019 ms (1.878 ms / 100) 1.872 -> 1.871 ( -0.05%) [ +0.16% +0.11% +0.00% / -0.05% +0.00% +0.00%] index_select strided 7 : Elapsed 0.019 ms (1.875 ms / 100) 1.870 -> 1.872 ( +0.11%) [ +0.00% +0.21% +0.27% / +0.16% +0.11% +0.37%] index_select strided 8 : Elapsed 0.019 ms (1.870 ms / 100) 1.875 -> 1.874 ( -0.05%) [ +0.27% +0.53% +0.00% / -0.05% +0.27% +0.48%] index_select strided 16 : Elapsed 0.019 ms (1.880 ms / 100) 1.863 -> 1.862 ( -0.05%) [ +0.21% +0.00% +0.21% / +0.00% -0.05% +0.00%] index_select random : Elapsed 0.019 ms (1.867 ms / 100) 1.845 -> 1.853 ( +0.43%) [ +0.00% +0.33% +0.27% / +0.43% +0.65% +0.43%] index_select random_sorted : Elapsed 0.018 ms (1.845 ms / 100) 1.870 -> 1.868 ( -0.11%) [ +0.16% +0.00% +0.21% / -0.11% +0.05% +0.05%] index_select perm : Elapsed 0.019 ms (1.873 ms / 100) 1.872 -> 1.868 ( -0.21%) [ +0.05% +0.00% +0.00% / +0.32% +0.05% -0.21%] index_select perm_sorted : Elapsed 0.019 ms (1.873 ms / 100) B = [4, 40, 16, 5] (stride (1, 4, 800, 160)) A = [20, 40, 16, 5] (stride (1, 20, 4000, 800)) dim = 0 0.832 -> 0.833 ( +0.12%) [ +0.36% +0.36% +0.00% / +0.12% +3.61% +3.85%] index_select const : Elapsed 0.008 ms (0.835 ms / 100) 0.827 -> 0.825 ( -0.24%) [ +0.00% +0.12% +0.12% / -0.24% +4.59% +4.72%] index_select wrap : Elapsed 0.008 ms (0.827 ms / 100) 0.825 -> 0.827 ( +0.24%) [ +0.12% +0.24% +0.00% / +0.24% +4.97% +5.09%] index_select linear : Elapsed 0.008 ms (0.826 ms / 100) 0.829 -> 0.833 ( +0.48%) [ +0.00% +0.12% +0.12% / +0.48% +4.46% +4.83%] index_select reverse : Elapsed 0.008 ms (0.829 ms / 100) 0.828 -> 0.832 ( +0.48%) [ +0.00% +0.60% +0.12% / +0.48% +4.47% +4.47%] index_select skip64 : Elapsed 0.008 ms (0.828 ms / 100) 0.829 -> 0.828 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +4.83% +5.19%] index_select skip256 : Elapsed 0.008 ms (0.829 ms / 100) 0.841 -> 0.842 ( +0.12%) [ +0.36% +0.00% +0.24% / +0.12% +5.11% +6.06%] index_select spread : Elapsed 0.008 ms (0.844 ms / 100) 0.834 -> 0.837 ( +0.36%) [ +0.36% +0.00% +0.36% / +0.36% +5.52% +5.76%] index_select strided 3 : Elapsed 0.008 ms (0.837 ms / 100) 0.839 -> 0.841 ( +0.24%) [ +0.36% +0.00% +0.00% / +0.24% +6.20% +7.03%] index_select strided 5 : Elapsed 0.008 ms (0.842 ms / 100) 0.843 -> 0.842 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.12% +4.98% +4.63%] index_select strided 7 : Elapsed 0.008 ms (0.843 ms / 100) 0.844 -> 0.841 ( -0.36%) [ +0.00% +0.24% +0.00% / -0.36% +4.50% +4.74%] index_select strided 8 : Elapsed 0.008 ms (0.844 ms / 100) 0.844 -> 0.843 ( -0.12%) [ +0.12% +0.00% +0.12% / -0.12% +4.38% +4.50%] index_select strided 16 : Elapsed 0.008 ms (0.845 ms / 100) 0.839 -> 0.838 ( -0.12%) [ +0.48% +0.24% +0.00% / -0.12% +4.05% +4.17%] index_select random : Elapsed 0.008 ms (0.843 ms / 100) 0.831 -> 0.833 ( +0.24%) [ +0.36% +0.00% +0.00% / +0.24% +4.45% +5.05%] index_select random_sorted : Elapsed 0.008 ms (0.834 ms / 100) 0.841 -> 0.842 ( +0.12%) [ +0.24% +0.00% +0.24% / +0.12% +5.23% +4.76%] index_select perm : Elapsed 0.008 ms (0.843 ms / 100) 0.838 -> 0.843 ( +0.60%) [ +0.24% +0.72% +0.00% / +0.60% +5.13% +5.13%] index_select perm_sorted : Elapsed 0.008 ms (0.840 ms / 100) B = [4, 40, 16, 5] (stride (1, 4, 800, 160)) A = [20, 40, 16, 5] (stride (40, 1, 800, 12800)) dim = 0 2.077 -> 2.080 ( +0.14%) [ +0.10% +0.05% +0.00% / +0.14% +0.29% +0.39%] index_select const : Elapsed 0.021 ms (2.079 ms / 100) 2.091 -> 2.090 ( -0.05%) [ +0.00% +0.00% +0.05% / +0.10% +0.05% -0.05%] index_select wrap : Elapsed 0.021 ms (2.091 ms / 100) 2.090 -> 2.090 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.05% +0.05% +0.00%] index_select linear : Elapsed 0.021 ms (2.090 ms / 100) 2.091 -> 2.089 ( -0.10%) [ +0.05% +0.00% +0.05% / +0.00% -0.10% -0.10%] index_select reverse : Elapsed 0.021 ms (2.092 ms / 100) 2.079 -> 2.075 ( -0.19%) [ +0.10% +0.19% +0.00% / +0.19% +0.24% -0.19%] index_select skip64 : Elapsed 0.021 ms (2.081 ms / 100) 2.079 -> 2.078 ( -0.05%) [ +0.29% +0.00% +0.05% / -0.05% +0.10% +0.05%] index_select skip256 : Elapsed 0.021 ms (2.085 ms / 100) 2.090 -> 2.091 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.10% +0.14%] index_select spread : Elapsed 0.021 ms (2.092 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.05% +0.00% +0.43% / +0.05% +0.10% +0.14%] index_select strided 3 : Elapsed 0.021 ms (2.090 ms / 100) 2.092 -> 2.091 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.00% +0.10%] index_select strided 5 : Elapsed 0.021 ms (2.092 ms / 100) 2.090 -> 2.090 ( +0.00%) [ +0.19% +0.10% +0.00% / +0.00% +0.05% +0.05%] index_select strided 7 : Elapsed 0.021 ms (2.094 ms / 100) 2.088 -> 2.091 ( +0.14%) [ +0.24% +0.14% +0.00% / +0.19% +0.14% +0.24%] index_select strided 8 : Elapsed 0.021 ms (2.093 ms / 100) 2.090 -> 2.092 ( +0.10%) [ +0.00% +0.05% +0.10% / +0.10% +0.10% +0.10%] index_select strided 16 : Elapsed 0.021 ms (2.090 ms / 100) 2.088 -> 2.088 ( +0.00%) [ +0.14% +0.10% +0.00% / +0.00% +0.19% +0.14%] index_select random : Elapsed 0.021 ms (2.091 ms / 100) 2.089 -> 2.092 ( +0.14%) [ +0.10% +0.00% +0.00% / +0.14% +0.24% +0.19%] index_select random_sorted : Elapsed 0.021 ms (2.091 ms / 100) 2.090 -> 2.088 ( -0.10%) [ +0.10% +0.00% +0.10% / +0.14% -0.10% +0.00%] index_select perm : Elapsed 0.021 ms (2.092 ms / 100) 2.088 -> 2.090 ( +0.10%) [ +0.14% +0.10% +0.00% / +0.14% +0.14% +0.10%] index_select perm_sorted : Elapsed 0.021 ms (2.091 ms / 100) B = [4, 40, 16, 5] (stride (40, 1, 160, 2560)) A = [20, 40, 16, 5] (stride (1, 20, 4000, 800)) dim = 0 2.139 -> 2.134 ( -0.23%) [ +0.05% +0.00% +0.09% / +0.19% -0.14% -0.23%] index_select const : Elapsed 0.021 ms (2.140 ms / 100) 2.140 -> 2.139 ( -0.05%) [ +0.00% +0.00% +0.23% / +0.23% -0.05% -0.05%] index_select wrap : Elapsed 0.021 ms (2.140 ms / 100) 2.141 -> 2.136 ( -0.23%) [ +0.00% +0.05% +0.09% / -0.23% -0.19% -0.19%] index_select linear : Elapsed 0.021 ms (2.141 ms / 100) 2.137 -> 2.136 ( -0.05%) [ +0.00% +0.00% +0.00% / +0.14% -0.05% +0.37%] index_select reverse : Elapsed 0.021 ms (2.137 ms / 100) 2.138 -> 2.134 ( -0.19%) [ +0.00% +0.05% +0.09% / +0.09% -0.19% +0.00%] index_select skip64 : Elapsed 0.021 ms (2.138 ms / 100) 2.141 -> 2.135 ( -0.28%) [ +0.19% +0.00% +0.09% / +0.09% -0.19% -0.28%] index_select skip256 : Elapsed 0.021 ms (2.145 ms / 100) 2.200 -> 2.189 ( -0.50%) [ +0.00% +0.18% +0.05% / +0.00% -0.45% -0.50%] index_select spread : Elapsed 0.022 ms (2.200 ms / 100) 2.178 -> 2.175 ( -0.14%) [ +0.00% +0.18% +0.09% / +0.23% +0.00% -0.14%] index_select strided 3 : Elapsed 0.022 ms (2.178 ms / 100) 2.199 -> 2.194 ( -0.23%) [ +0.00% +0.09% +0.27% / +0.18% -0.23% -0.14%] index_select strided 5 : Elapsed 0.022 ms (2.199 ms / 100) 2.191 -> 2.188 ( -0.14%) [ +0.05% +0.09% +0.00% / +0.05% -0.14% -0.14%] index_select strided 7 : Elapsed 0.022 ms (2.192 ms / 100) 2.187 -> 2.184 ( -0.14%) [ +0.14% +0.18% +0.00% / +0.23% -0.14% +0.27%] index_select strided 8 : Elapsed 0.022 ms (2.190 ms / 100) 2.197 -> 2.187 ( -0.46%) [ +0.18% +0.00% +0.09% / +0.23% -0.14% -0.46%] index_select strided 16 : Elapsed 0.022 ms (2.201 ms / 100) 2.193 -> 2.188 ( -0.23%) [ +0.23% +0.00% +0.00% / +0.32% -0.14% -0.23%] index_select random : Elapsed 0.022 ms (2.198 ms / 100) 2.203 -> 2.196 ( -0.32%) [ +0.27% +0.00% +0.05% / -0.14% -0.27% -0.32%] index_select random_sorted : Elapsed 0.022 ms (2.209 ms / 100) 2.186 -> 2.183 ( -0.14%) [ +0.00% +0.14% +0.18% / +0.14% -0.14% +0.14%] index_select perm : Elapsed 0.022 ms (2.186 ms / 100) 2.198 -> 2.193 ( -0.23%) [ +0.00% +0.05% +0.00% / +0.23% +0.14% -0.23%] index_select perm_sorted : Elapsed 0.022 ms (2.198 ms / 100) out_shape = [20, 4, 16, 5] in_shape = [20, 40, 16, 5] idx_dim = 1 B = [20, 4, 16, 5] (stride (16, 1600, 1, 320)) A = [20, 40, 16, 5] (stride (3200, 80, 1, 16)) dim = 1 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.16% / +0.00% +0.55% +0.47%] index_select const : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.47%] index_select wrap : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.24% +0.08% +0.00% / +0.16% +0.55% +0.55%] index_select linear : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.71% +0.63%] index_select reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.71% +0.71%] index_select skip64 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.55% +0.55%] index_select spread : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +1.25%] index_select strided 5 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.39%] index_select random : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.55%] index_select random_sorted : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select perm : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.31% +0.31%] index_select perm_sorted : Elapsed 0.013 ms (1.277 ms / 100) B = [20, 4, 16, 5] (stride (16, 1600, 1, 320)) A = [20, 40, 16, 5] (stride (16, 1600, 1, 320)) dim = 1 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.39%] index_select const : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.00% +0.16% / +0.08% +0.55% +0.55%] index_select wrap : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_select linear : Elapsed 0.013 ms (1.279 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.63%] index_select reverse : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.63%] index_select skip64 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.47%] index_select spread : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.55%] index_select strided 5 : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.39% +1.17%] index_select strided 8 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.39% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.47% +0.47%] index_select random : Elapsed 0.013 ms (1.278 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.23% +0.23%] index_select random_sorted : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.23% +0.39%] index_select perm : Elapsed 0.013 ms (1.281 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.63% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.278 ms / 100) B = [20, 4, 16, 5] (stride (4, 1, 400, 80)) dim = 1 fill_cnt = 40 good 1.220 -> 1.158 ( -5.08%) [ +0.00% +0.33% +0.16% / -5.08% -4.10% -4.02%] index_fill_ const : Elapsed 0.012 ms (1.220 ms / 100) 1.216 -> 1.179 ( -3.04%) [ +0.00% +0.25% +0.00% / -1.81% -3.04% -2.96%] index_fill_ linear : Elapsed 0.012 ms (1.216 ms / 100) 1.216 -> 1.181 ( -2.88%) [ +0.00% +0.33% +0.33% / -2.30% -2.63% -2.88%] index_fill_ reverse : Elapsed 0.012 ms (1.216 ms / 100) 1.217 -> 1.194 ( -1.89%) [ +0.00% +0.66% +0.08% / -1.89% -1.81% -1.81%] index_fill_ skip64 : Elapsed 0.012 ms (1.217 ms / 100) 1.218 -> 1.174 ( -3.61%) [ +0.41% +0.33% +0.00% / -3.61% -3.37% -3.12%] index_fill_ skip256 : Elapsed 0.012 ms (1.223 ms / 100) 1.217 -> 1.179 ( -3.12%) [ +0.00% +0.25% +0.16% / -3.12% -2.63% -2.71%] index_fill_ spread : Elapsed 0.012 ms (1.217 ms / 100) 1.218 -> 1.178 ( -3.28%) [ +0.00% +0.25% +0.16% / -3.28% -2.87% -2.87%] index_fill_ strided 3 : Elapsed 0.012 ms (1.218 ms / 100) 1.225 -> 1.169 ( -4.57%) [ +0.00% +0.00% +0.08% / -4.08% -4.33% -4.57%] index_fill_ random : Elapsed 0.012 ms (1.225 ms / 100) 1.220 -> 1.187 ( -2.70%) [ +0.00% +0.41% +0.08% / -2.13% -2.30% -2.70%] index_fill_ random_sorted : Elapsed 0.012 ms (1.220 ms / 100) B = [20, 4, 16, 5] (stride (64, 1, 4, 1280)) A = [20, 40, 16, 5] (stride (3200, 80, 5, 1)) dim = 1 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.67% +0.67%] index_select const : Elapsed 0.012 ms (1.191 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.25% +0.00% +0.00% / +0.00% +0.42% +0.50%] index_select wrap : Elapsed 0.012 ms (1.195 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.42% +0.42%] index_select linear : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.25% +0.08% +0.00% / +0.08% +0.59% +0.50%] index_select reverse : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.08% +0.00% +0.00% / +0.17% +0.50% +0.67%] index_select skip64 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.42% +0.42%] index_select skip256 : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.25% +0.17% +0.00% / +0.17% +0.50% +0.67%] index_select spread : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.34% +0.25%] index_select strided 3 : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.34% +0.34%] index_select strided 5 : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.50% +0.42%] index_select strided 7 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.192 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.34% +0.34%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.34% +0.34%] index_select strided 16 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.50% +0.34%] index_select random : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.194 ( +0.25%) [ +0.08% +0.08% +0.00% / +0.25% +0.59% +0.59%] index_select random_sorted : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.195 ( +0.25%) [ +0.08% +0.00% +0.17% / +0.25% +0.50% +0.42%] index_select perm : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.59% +0.59%] index_select perm_sorted : Elapsed 0.012 ms (1.192 ms / 100) out_shape = [20, 40, 4, 5] in_shape = [20, 40, 16, 5] idx_dim = 2 B = [20, 40, 4, 5] (stride (800, 20, 5, 1)) A = [20, 40, 16, 5] (stride (640, 16, 1, 12800)) dim = 2 1.149 -> 1.156 ( +0.61%) [ +0.17% +0.26% +0.00% / +0.61% +2.09% +2.09%] index_select const : Elapsed 0.012 ms (1.151 ms / 100) 1.148 -> 1.150 ( +0.17%) [ +0.09% +0.00% +0.35% / +0.17% +2.18% +1.92%] index_select wrap : Elapsed 0.011 ms (1.149 ms / 100) 1.149 -> 1.150 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +2.09% +2.09%] index_select linear : Elapsed 0.012 ms (1.150 ms / 100) 1.149 -> 1.148 ( -0.09%) [ +0.00% +0.09% +0.09% / -0.09% +1.65% +2.00%] index_select reverse : Elapsed 0.011 ms (1.149 ms / 100) 1.147 -> 1.155 ( +0.70%) [ +0.09% +0.35% +0.00% / +0.70% +1.74% +1.74%] index_select skip64 : Elapsed 0.011 ms (1.148 ms / 100) 1.149 -> 1.152 ( +0.26%) [ +0.00% +0.09% +0.17% / +0.26% +1.39% +1.65%] index_select skip256 : Elapsed 0.011 ms (1.149 ms / 100) 1.164 -> 1.171 ( +0.60%) [ +0.00% +0.00% +0.17% / +0.60% +1.89% +2.23%] index_select spread : Elapsed 0.012 ms (1.164 ms / 100) 1.163 -> 1.169 ( +0.52%) [ +0.52% +0.00% +0.52% / +0.52% +1.63% +1.81%] index_select strided 3 : Elapsed 0.012 ms (1.169 ms / 100) 1.161 -> 1.167 ( +0.52%) [ +0.26% +0.00% +0.26% / +0.52% +1.81% +2.41%] index_select strided 5 : Elapsed 0.012 ms (1.164 ms / 100) 1.163 -> 1.164 ( +0.09%) [ +0.26% +0.00% +0.09% / +0.09% +2.15% +2.06%] index_select strided 7 : Elapsed 0.012 ms (1.166 ms / 100) 1.180 -> 1.184 ( +0.34%) [ +0.08% +0.00% +0.51% / +0.34% +1.78% +2.29%] index_select strided 8 : Elapsed 0.012 ms (1.181 ms / 100) 1.178 -> 1.180 ( +0.17%) [ +0.00% +0.25% +0.34% / +0.17% +1.95% +2.12%] index_select random : Elapsed 0.012 ms (1.178 ms / 100) 1.164 -> 1.160 ( -0.34%) [ +0.00% +0.17% +0.09% / -0.34% +1.98% +2.15%] index_select random_sorted : Elapsed 0.012 ms (1.164 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.17% +0.00% +0.35% / +0.09% +1.66% +1.92%] index_select perm : Elapsed 0.011 ms (1.150 ms / 100) 1.148 -> 1.151 ( +0.26%) [ +0.17% +0.00% +0.17% / +0.26% +2.09% +2.00%] index_select perm_sorted : Elapsed 0.012 ms (1.150 ms / 100) B = [20, 40, 4, 5] (stride (1, 400, 100, 20)) A = [20, 40, 16, 5] (stride (640, 1, 40, 12800)) dim = 2 2.421 -> 2.417 ( -0.17%) [ +0.12% +0.00% +0.00% / -0.17% +0.21% +0.12%] index_select const : Elapsed 0.024 ms (2.424 ms / 100) 2.467 -> 2.471 ( +0.16%) [ +0.04% +0.04% +0.00% / +0.16% +0.32% +0.16%] index_select wrap : Elapsed 0.025 ms (2.468 ms / 100) 2.463 -> 2.465 ( +0.08%) [ +0.24% +0.12% +0.00% / +0.12% +0.24% +0.08%] index_select linear : Elapsed 0.025 ms (2.469 ms / 100) 2.470 -> 2.469 ( -0.04%) [ +0.00% +0.04% +0.20% / +0.08% -0.04% +0.20%] index_select reverse : Elapsed 0.025 ms (2.470 ms / 100) 2.428 -> 2.423 ( -0.21%) [ +0.00% +0.12% +0.00% / -0.21% +0.25% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.428 ms / 100) 2.419 -> 2.419 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.41% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.420 ms / 100) 2.467 -> 2.470 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.12% +0.16% +0.12%] index_select spread : Elapsed 0.025 ms (2.470 ms / 100) 2.468 -> 2.470 ( +0.08%) [ +0.00% +0.12% +0.12% / +0.12% +0.08% +0.20%] index_select strided 3 : Elapsed 0.025 ms (2.468 ms / 100) 2.463 -> 2.467 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.32% +0.24%] index_select strided 5 : Elapsed 0.025 ms (2.465 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.16% +0.00% +0.04% / +0.04% +0.40% +0.57%] index_select strided 7 : Elapsed 0.025 ms (2.475 ms / 100) 2.438 -> 2.442 ( +0.16%) [ +0.12% +0.12% +0.00% / +0.16% +0.37% +0.41%] index_select strided 8 : Elapsed 0.024 ms (2.441 ms / 100) 2.448 -> 2.452 ( +0.16%) [ +0.12% +0.16% +0.00% / +0.25% +0.16% +0.25%] index_select random : Elapsed 0.025 ms (2.451 ms / 100) 2.447 -> 2.453 ( +0.25%) [ +0.00% +0.20% +0.08% / +0.25% +0.25% +0.37%] index_select random_sorted : Elapsed 0.024 ms (2.447 ms / 100) 2.464 -> 2.471 ( +0.28%) [ +0.28% +0.24% +0.00% / +0.28% +0.32% +0.37%] index_select perm : Elapsed 0.025 ms (2.471 ms / 100) 2.460 -> 2.466 ( +0.24%) [ +0.33% +0.16% +0.00% / +0.24% +0.37% +0.53%] index_select perm_sorted : Elapsed 0.025 ms (2.468 ms / 100) B = [20, 40, 4, 5] (stride (1, 400, 20, 80)) A = [20, 40, 16, 5] (stride (40, 1, 4000, 800)) dim = 2 2.315 -> 2.315 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.26% +0.39%] index_select const : Elapsed 0.023 ms (2.316 ms / 100) 2.355 -> 2.356 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.34% +0.38%] index_select wrap : Elapsed 0.024 ms (2.356 ms / 100) 2.357 -> 2.359 ( +0.08%) [ +0.08% +0.00% +0.21% / +0.08% +0.42% +0.42%] index_select linear : Elapsed 0.024 ms (2.359 ms / 100) 2.355 -> 2.355 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.04% +0.17%] index_select reverse : Elapsed 0.024 ms (2.357 ms / 100) 2.308 -> 2.310 ( +0.09%) [ +0.00% +0.43% +0.13% / +0.09% +0.65% +0.65%] index_select skip64 : Elapsed 0.023 ms (2.308 ms / 100) 2.308 -> 2.311 ( +0.13%) [ +0.35% +0.00% +0.04% / +0.13% +0.65% +0.65%] index_select skip256 : Elapsed 0.023 ms (2.316 ms / 100) 2.359 -> 2.360 ( +0.04%) [ +0.00% +0.04% +0.13% / +0.04% +0.38% +0.68%] index_select spread : Elapsed 0.024 ms (2.359 ms / 100) 2.356 -> 2.358 ( +0.08%) [ +0.00% +0.13% +0.08% / +0.08% +0.51% +0.72%] index_select strided 3 : Elapsed 0.024 ms (2.356 ms / 100) 2.363 -> 2.363 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.42%] index_select strided 5 : Elapsed 0.024 ms (2.363 ms / 100) 2.361 -> 2.365 ( +0.17%) [ +0.08% +0.00% +0.00% / +0.17% +0.51% +0.68%] index_select strided 7 : Elapsed 0.024 ms (2.363 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.00% +0.13% +0.04% / +0.04% +0.52% +0.65%] index_select strided 8 : Elapsed 0.023 ms (2.324 ms / 100) 2.347 -> 2.351 ( +0.17%) [ +0.30% +0.13% +0.00% / +0.17% +0.43% +0.30%] index_select random : Elapsed 0.024 ms (2.354 ms / 100) 2.339 -> 2.342 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.17% +0.13% +0.26%] index_select random_sorted : Elapsed 0.023 ms (2.342 ms / 100) 2.354 -> 2.357 ( +0.13%) [ +0.08% +0.13% +0.00% / +0.21% +0.13% +0.38%] index_select perm : Elapsed 0.024 ms (2.356 ms / 100) 2.359 -> 2.362 ( +0.13%) [ +0.21% +0.21% +0.00% / +0.13% +0.47% +0.42%] index_select perm_sorted : Elapsed 0.024 ms (2.364 ms / 100) B = [20, 40, 4, 5] (stride (200, 1, 4000, 40)) A = [20, 40, 16, 5] (stride (5, 1600, 100, 1)) dim = 2 2.421 -> 2.423 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.33% +0.25%] index_select const : Elapsed 0.024 ms (2.423 ms / 100) 2.455 -> 2.459 ( +0.16%) [ +0.00% +0.20% +0.16% / +0.16% +0.20% +0.20%] index_select wrap : Elapsed 0.025 ms (2.455 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.12% +0.08% +0.00% / -0.04% +0.16% +0.08%] index_select linear : Elapsed 0.025 ms (2.458 ms / 100) 2.458 -> 2.460 ( +0.08%) [ +0.12% +0.20% +0.00% / +0.08% +0.28% +0.08%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.426 -> 2.422 ( -0.16%) [ +0.00% +0.16% +0.04% / -0.16% +0.29% +0.41%] index_select skip64 : Elapsed 0.024 ms (2.426 ms / 100) 2.418 -> 2.420 ( +0.08%) [ +0.04% +0.00% +0.21% / +0.08% +0.41% +0.62%] index_select skip256 : Elapsed 0.024 ms (2.419 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.20% +0.00% +0.08% / +0.12% +0.49% +0.33%] index_select spread : Elapsed 0.025 ms (2.466 ms / 100) 2.451 -> 2.455 ( +0.16%) [ +0.00% +0.08% +0.12% / +0.16% +0.29% +0.33%] index_select strided 3 : Elapsed 0.025 ms (2.451 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.12% +0.24%] index_select strided 5 : Elapsed 0.025 ms (2.456 ms / 100) 2.451 -> 2.455 ( +0.16%) [ +0.04% +0.00% +0.04% / +0.16% +0.37% +0.57%] index_select strided 7 : Elapsed 0.025 ms (2.452 ms / 100) 2.436 -> 2.441 ( +0.21%) [ +0.04% +0.21% +0.00% / +0.21% +0.70% +0.49%] index_select strided 8 : Elapsed 0.024 ms (2.437 ms / 100) 2.451 -> 2.453 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.37% +0.45%] index_select random : Elapsed 0.025 ms (2.451 ms / 100) 2.457 -> 2.457 ( +0.00%) [ +0.33% +0.04% +0.00% / +0.00% +0.33% +0.28%] index_select random_sorted : Elapsed 0.025 ms (2.465 ms / 100) 2.460 -> 2.466 ( +0.24%) [ +0.08% +0.16% +0.00% / +0.24% +0.33% +0.24%] index_select perm : Elapsed 0.025 ms (2.462 ms / 100) 2.459 -> 2.461 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.12% +0.41%] index_select perm_sorted : Elapsed 0.025 ms (2.460 ms / 100) B = [20, 40, 4, 5] (stride (40, 1, 4000, 800)) A = [20, 40, 16, 5] (stride (40, 1, 800, 12800)) dim = 2 1.108 -> 1.082 ( -2.35%) [ +0.18% +0.00% +0.18% / +0.18% -2.35% -1.99%] index_select const : Elapsed 0.011 ms (1.110 ms / 100) 1.149 -> 1.136 ( -1.13%) [ +0.09% +0.00% +0.00% / +0.70% -1.13% -0.70%] index_select wrap : Elapsed 0.012 ms (1.150 ms / 100) 1.149 -> 1.136 ( -1.13%) [ +0.09% +0.35% +0.00% / -0.26% -1.13% -0.78%] index_select linear : Elapsed 0.011 ms (1.150 ms / 100) 1.128 -> 1.132 ( +0.35%) [ +0.00% +0.18% +0.35% / +0.35% +0.80% +1.15%] index_select reverse : Elapsed 0.011 ms (1.128 ms / 100) 1.107 -> 1.083 ( -2.17%) [ +0.00% +0.27% +0.45% / +0.09% -2.17% -1.45%] index_select skip64 : Elapsed 0.011 ms (1.107 ms / 100) 1.104 -> 1.086 ( -1.63%) [ +0.27% +0.00% +0.18% / +0.09% -1.63% -1.45%] index_select skip256 : Elapsed 0.011 ms (1.107 ms / 100) 1.156 -> 1.125 ( -2.68%) [ +0.17% +0.52% +0.00% / -0.17% -2.68% -2.60%] index_select spread : Elapsed 0.012 ms (1.158 ms / 100) 1.157 -> 1.130 ( -2.33%) [ +0.09% +0.00% +0.26% / -0.35% -2.33% -2.25%] index_select strided 3 : Elapsed 0.012 ms (1.158 ms / 100) 1.157 -> 1.138 ( -1.64%) [ +0.00% +0.00% +0.17% / -0.09% -1.64% -1.56%] index_select strided 5 : Elapsed 0.012 ms (1.157 ms / 100) 1.154 -> 1.140 ( -1.21%) [ +0.69% +0.35% +0.00% / -0.09% -1.21% -1.04%] index_select strided 7 : Elapsed 0.012 ms (1.162 ms / 100) 1.121 -> 1.083 ( -3.39%) [ +0.18% +0.27% +0.00% / +0.09% -3.39% -2.68%] index_select strided 8 : Elapsed 0.011 ms (1.123 ms / 100) 1.148 -> 1.142 ( -0.52%) [ +0.09% +0.00% +0.09% / +0.09% -0.52% -0.52%] index_select random : Elapsed 0.011 ms (1.149 ms / 100) 1.150 -> 1.140 ( -0.87%) [ +0.26% +0.26% +0.00% / +0.00% -0.87% -0.70%] index_select random_sorted : Elapsed 0.012 ms (1.153 ms / 100) 1.135 -> 1.136 ( +0.09%) [ +0.26% +0.26% +0.00% / +0.35% +0.09% +0.53%] index_select perm : Elapsed 0.011 ms (1.138 ms / 100) 1.141 -> 1.146 ( +0.44%) [ +0.26% +0.00% +0.26% / +0.44% +0.53% +0.44%] index_select perm_sorted : Elapsed 0.011 ms (1.144 ms / 100) B = [20, 40, 4, 5] (stride (160, 1, 40, 3200)) A = [20, 40, 16, 5] (stride (1, 320, 20, 12800)) dim = 2 2.542 -> 2.545 ( +0.12%) [ +0.12% +0.00% +0.08% / +0.12% +0.47% +0.43%] index_select const : Elapsed 0.025 ms (2.545 ms / 100) 2.557 -> 2.553 ( -0.16%) [ +0.00% +0.00% +0.00% / +0.00% -0.16% -0.04%] index_select wrap : Elapsed 0.026 ms (2.557 ms / 100) 2.554 -> 2.557 ( +0.12%) [ +0.20% +0.12% +0.00% / +0.16% +0.23% +0.12%] index_select linear : Elapsed 0.026 ms (2.559 ms / 100) 2.551 -> 2.549 ( -0.08%) [ +0.12% +0.20% +0.00% / +0.04% -0.08% +0.39%] index_select reverse : Elapsed 0.026 ms (2.554 ms / 100) 2.551 -> 2.550 ( -0.04%) [ +0.00% +0.04% +0.12% / -0.04% +0.24% +0.08%] index_select skip64 : Elapsed 0.026 ms (2.551 ms / 100) 2.542 -> 2.547 ( +0.20%) [ +0.20% +0.20% +0.00% / +0.20% +0.24% +0.20%] index_select skip256 : Elapsed 0.025 ms (2.547 ms / 100) 2.555 -> 2.553 ( -0.08%) [ +0.00% +0.20% +0.00% / -0.08% +0.23% +0.12%] index_select spread : Elapsed 0.026 ms (2.555 ms / 100) 2.555 -> 2.555 ( +0.00%) [ +0.00% +0.12% +0.04% / +0.00% +0.12% +0.27%] index_select strided 3 : Elapsed 0.026 ms (2.555 ms / 100) 2.549 -> 2.552 ( +0.12%) [ +0.39% +0.00% +0.08% / +0.12% +0.47% +0.51%] index_select strided 5 : Elapsed 0.026 ms (2.559 ms / 100) 2.549 -> 2.549 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.35% +0.20%] index_select strided 7 : Elapsed 0.025 ms (2.549 ms / 100) 2.542 -> 2.540 ( -0.08%) [ +0.12% +0.00% +0.16% / -0.08% +0.43% +0.43%] index_select strided 8 : Elapsed 0.025 ms (2.545 ms / 100) 2.548 -> 2.548 ( +0.00%) [ +0.20% +0.00% +0.12% / +0.00% +0.35% +0.35%] index_select random : Elapsed 0.026 ms (2.553 ms / 100) 2.551 -> 2.553 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.16% +0.24%] index_select random_sorted : Elapsed 0.026 ms (2.551 ms / 100) 2.556 -> 2.557 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.04% +0.20% +0.35%] index_select perm : Elapsed 0.026 ms (2.556 ms / 100) 2.551 -> 2.553 ( +0.08%) [ +0.24% +0.12% +0.00% / +0.08% +0.31% +0.43%] index_select perm_sorted : Elapsed 0.026 ms (2.557 ms / 100) out_shape = [20, 40, 16, 4] in_shape = [20, 40, 16, 5] idx_dim = 3 B = [20, 40, 16, 4] (stride (1, 1280, 20, 320)) A = [20, 40, 16, 5] (stride (16, 320, 1, 12800)) dim = 3 5.673 -> 5.687 ( +0.25%) [ +0.18% +0.00% +0.12% / +0.25% +0.33% +0.49%] index_select const : Elapsed 0.057 ms (5.683 ms / 100) 5.789 -> 5.780 ( -0.16%) [ +0.09% +0.00% +0.03% / +0.00% -0.16% -0.16%] index_select wrap : Elapsed 0.058 ms (5.794 ms / 100) 5.775 -> 5.773 ( -0.03%) [ +0.00% +0.09% +0.07% / +0.02% +0.10% -0.03%] index_select linear : Elapsed 0.058 ms (5.775 ms / 100) 5.785 -> 5.764 ( -0.36%) [ +0.09% +0.02% +0.00% / +0.14% -0.35% -0.36%] index_select reverse : Elapsed 0.058 ms (5.790 ms / 100) 5.660 -> 5.672 ( +0.21%) [ +0.00% +0.02% +0.28% / +0.21% +0.57% +0.48%] index_select skip64 : Elapsed 0.057 ms (5.660 ms / 100) 5.668 -> 5.675 ( +0.12%) [ +0.00% +0.14% +0.05% / +0.12% +0.23% +0.39%] index_select skip256 : Elapsed 0.057 ms (5.668 ms / 100) 5.789 -> 5.773 ( -0.28%) [ +0.05% +0.00% +0.09% / +0.05% -0.28% -0.26%] index_select spread : Elapsed 0.058 ms (5.792 ms / 100) 5.775 -> 5.769 ( -0.10%) [ +0.00% +0.03% +0.02% / +0.12% -0.10% -0.05%] index_select strided 3 : Elapsed 0.058 ms (5.775 ms / 100) 5.748 -> 5.745 ( -0.05%) [ +0.07% +0.00% +0.09% / -0.05% +0.02% +0.14%] index_select random : Elapsed 0.058 ms (5.752 ms / 100) 5.743 -> 5.746 ( +0.05%) [ +0.00% +0.05% +0.14% / +0.05% +0.09% +0.10%] index_select random_sorted : Elapsed 0.057 ms (5.743 ms / 100) 5.798 -> 5.766 ( -0.55%) [ +0.00% +0.00% +0.02% / +0.00% -0.53% -0.55%] index_select perm : Elapsed 0.058 ms (5.798 ms / 100) 5.788 -> 5.759 ( -0.50%) [ +0.07% +0.03% +0.00% / +0.10% -0.50% -0.33%] index_select perm_sorted : Elapsed 0.058 ms (5.792 ms / 100) B = [20, 40, 16, 4] (stride (160, 4, 3200, 1)) A = [20, 40, 16, 5] (stride (200, 5, 4000, 1)) dim = 3 3.562 -> 3.564 ( +0.06%) [ +0.20% +0.00% +0.03% / +0.11% +0.06% +0.06%] index_select const : Elapsed 0.036 ms (3.569 ms / 100) 3.572 -> 3.579 ( +0.20%) [ +0.11% +0.00% +0.11% / +0.20% +0.59% +0.42%] index_select wrap : Elapsed 0.036 ms (3.576 ms / 100) 3.571 -> 3.576 ( +0.14%) [ +0.08% +0.22% +0.00% / +0.14% +0.53% +0.73%] index_select linear : Elapsed 0.036 ms (3.574 ms / 100) 3.576 -> 3.574 ( -0.06%) [ +0.11% +0.08% +0.00% / -0.06% +0.06% +0.03%] index_select reverse : Elapsed 0.036 ms (3.580 ms / 100) 3.568 -> 3.572 ( +0.11%) [ +0.17% +0.11% +0.00% / +0.17% +0.11% +0.39%] index_select skip64 : Elapsed 0.036 ms (3.574 ms / 100) 3.559 -> 3.565 ( +0.17%) [ +0.14% +0.25% +0.00% / +0.25% +0.17% +0.31%] index_select skip256 : Elapsed 0.036 ms (3.564 ms / 100) 3.571 -> 3.567 ( -0.11%) [ +0.08% +0.03% +0.00% / -0.08% -0.11% -0.06%] index_select spread : Elapsed 0.036 ms (3.574 ms / 100) 3.570 -> 3.569 ( -0.03%) [ +0.08% +0.22% +0.00% / +0.17% +0.20% -0.03%] index_select strided 3 : Elapsed 0.036 ms (3.573 ms / 100) 3.568 -> 3.567 ( -0.03%) [ +0.00% +0.11% +0.03% / -0.03% +0.11% +0.14%] index_select random : Elapsed 0.036 ms (3.568 ms / 100) 3.566 -> 3.571 ( +0.14%) [ +0.22% +0.00% +0.17% / +0.14% +0.25% +0.31%] index_select random_sorted : Elapsed 0.036 ms (3.574 ms / 100) 3.577 -> 3.573 ( -0.11%) [ +0.00% +0.11% +0.17% / -0.06% -0.11% +0.06%] index_select perm : Elapsed 0.036 ms (3.577 ms / 100) 3.565 -> 3.568 ( +0.08%) [ +0.03% +0.00% +0.17% / +0.08% +0.67% +0.70%] index_select perm_sorted : Elapsed 0.036 ms (3.566 ms / 100) B = [20, 40, 16, 4] (stride (4, 80, 3200, 1)) A = [20, 40, 16, 5] (stride (80, 1600, 1, 16)) dim = 3 5.841 -> 5.827 ( -0.24%) [ +0.07% +0.00% +0.07% / +0.02% -0.09% -0.24%] index_select const : Elapsed 0.058 ms (5.845 ms / 100) 5.901 -> 5.900 ( -0.02%) [ +0.00% +0.12% +0.15% / +0.25% -0.02% +0.34%] index_select wrap : Elapsed 0.059 ms (5.901 ms / 100) 5.899 -> 5.891 ( -0.14%) [ +0.03% +0.02% +0.00% / +0.10% -0.08% -0.14%] index_select linear : Elapsed 0.059 ms (5.901 ms / 100) 5.907 -> 5.898 ( -0.15%) [ +0.00% +0.02% +0.07% / +0.07% -0.14% -0.15%] index_select reverse : Elapsed 0.059 ms (5.907 ms / 100) 5.820 -> 5.812 ( -0.14%) [ +0.05% +0.00% +0.09% / +0.14% -0.14% -0.07%] index_select skip64 : Elapsed 0.058 ms (5.823 ms / 100) 5.825 -> 5.816 ( -0.15%) [ +0.07% +0.00% +0.03% / +0.15% -0.10% -0.15%] index_select skip256 : Elapsed 0.058 ms (5.829 ms / 100) 5.908 -> 5.905 ( -0.05%) [ +0.00% +0.07% +0.00% / +0.17% -0.05% +0.02%] index_select spread : Elapsed 0.059 ms (5.908 ms / 100) 5.899 -> 5.896 ( -0.05%) [ +0.03% +0.10% +0.00% / +0.15% +0.03% -0.05%] index_select strided 3 : Elapsed 0.059 ms (5.901 ms / 100) 5.875 -> 5.876 ( +0.02%) [ +0.02% +0.00% +0.09% / +0.02% +0.10% +0.07%] index_select random : Elapsed 0.059 ms (5.876 ms / 100) 5.882 -> 5.883 ( +0.02%) [ +0.00% +0.00% +0.09% / +0.07% +0.02% +0.05%] index_select random_sorted : Elapsed 0.059 ms (5.882 ms / 100) 5.911 -> 5.908 ( -0.05%) [ +0.20% +0.00% +0.15% / +0.22% +0.03% -0.05%] index_select perm : Elapsed 0.059 ms (5.923 ms / 100) 5.905 -> 5.896 ( -0.15%) [ +0.00% +0.00% +0.02% / -0.10% -0.08% -0.15%] index_select perm_sorted : Elapsed 0.059 ms (5.905 ms / 100) B = [20, 40, 16, 4] (stride (4, 80, 3200, 1)) A = [20, 40, 16, 5] (stride (1, 100, 4000, 20)) dim = 3 5.884 -> 5.874 ( -0.17%) [ +0.02% +0.00% +0.07% / +0.07% -0.08% -0.17%] index_select const : Elapsed 0.059 ms (5.885 ms / 100) 5.953 -> 5.939 ( -0.24%) [ +0.00% +0.03% +0.17% / +0.32% -0.12% -0.24%] index_select wrap : Elapsed 0.060 ms (5.953 ms / 100) 5.957 -> 5.938 ( -0.32%) [ +0.17% +0.00% +0.08% / +0.05% -0.30% -0.32%] index_select linear : Elapsed 0.060 ms (5.967 ms / 100) 5.952 -> 5.930 ( -0.37%) [ +0.13% +0.00% +0.15% / +0.29% -0.37% -0.29%] index_select reverse : Elapsed 0.060 ms (5.960 ms / 100) 5.887 -> 5.874 ( -0.22%) [ +0.02% +0.03% +0.00% / +0.02% -0.15% -0.22%] index_select skip64 : Elapsed 0.059 ms (5.888 ms / 100) 5.881 -> 5.877 ( -0.07%) [ +0.00% +0.10% +0.10% / -0.02% -0.07% -0.03%] index_select skip256 : Elapsed 0.059 ms (5.881 ms / 100) 5.960 -> 5.939 ( -0.35%) [ +0.02% +0.02% +0.00% / +0.02% -0.35% -0.22%] index_select spread : Elapsed 0.060 ms (5.961 ms / 100) 5.976 -> 5.961 ( -0.25%) [ +0.03% +0.00% +0.07% / -0.02% -0.25% -0.23%] index_select strided 3 : Elapsed 0.060 ms (5.978 ms / 100) 5.954 -> 5.944 ( -0.17%) [ +0.00% +0.02% +0.18% / +0.08% -0.12% -0.17%] index_select random : Elapsed 0.060 ms (5.954 ms / 100) 5.954 -> 5.940 ( -0.24%) [ +0.00% +0.02% +0.08% / +0.02% -0.24% -0.24%] index_select random_sorted : Elapsed 0.060 ms (5.954 ms / 100) 5.961 -> 5.943 ( -0.30%) [ +0.02% +0.00% +0.03% / +0.10% -0.30% -0.20%] index_select perm : Elapsed 0.060 ms (5.962 ms / 100) 5.960 -> 5.937 ( -0.39%) [ +0.00% +0.13% +0.13% / +0.07% -0.39% -0.35%] index_select perm_sorted : Elapsed 0.060 ms (5.960 ms / 100) B = [20, 40, 16, 4] (stride (1, 20, 3200, 800)) A = [20, 40, 16, 5] (stride (1, 20, 4000, 800)) dim = 3 5.714 -> 5.716 ( +0.04%) [ +0.00% +0.04% +0.05% / +0.04% +0.33% +0.39%] index_select const : Elapsed 0.057 ms (5.714 ms / 100) 5.784 -> 5.771 ( -0.22%) [ +0.05% +0.00% +0.09% / +0.12% -0.22% -0.10%] index_select wrap : Elapsed 0.058 ms (5.787 ms / 100) 5.786 -> 5.779 ( -0.12%) [ +0.00% +0.02% +0.02% / -0.12% -0.10% -0.02%] index_select linear : Elapsed 0.058 ms (5.786 ms / 100) 5.810 -> 5.785 ( -0.43%) [ +0.00% +0.12% +0.10% / +0.12% -0.41% -0.43%] index_select reverse : Elapsed 0.058 ms (5.810 ms / 100) 5.709 -> 5.713 ( +0.07%) [ +0.16% +0.00% +0.14% / +0.07% +0.44% +0.46%] index_select skip64 : Elapsed 0.057 ms (5.718 ms / 100) 5.715 -> 5.723 ( +0.14%) [ +0.00% +0.00% +0.09% / +0.14% +0.28% +0.33%] index_select skip256 : Elapsed 0.057 ms (5.715 ms / 100) 5.780 -> 5.775 ( -0.09%) [ +0.07% +0.00% +0.19% / +0.05% -0.09% -0.07%] index_select spread : Elapsed 0.058 ms (5.784 ms / 100) 5.774 -> 5.754 ( -0.35%) [ +0.00% +0.02% +0.10% / -0.02% -0.35% -0.33%] index_select strided 3 : Elapsed 0.058 ms (5.774 ms / 100) 5.778 -> 5.769 ( -0.16%) [ +0.05% +0.12% +0.00% / +0.17% -0.16% -0.05%] index_select random : Elapsed 0.058 ms (5.781 ms / 100) 5.764 -> 5.764 ( +0.00%) [ +0.00% +0.05% +0.02% / +0.10% +0.00% +0.02%] index_select random_sorted : Elapsed 0.058 ms (5.764 ms / 100) 5.773 -> 5.772 ( -0.02%) [ +0.03% +0.00% +0.12% / +0.09% +0.14% -0.02%] index_select perm : Elapsed 0.058 ms (5.775 ms / 100) 5.767 -> 5.765 ( -0.03%) [ +0.00% +0.00% +0.12% / +0.10% -0.03% +0.03%] index_select perm_sorted : Elapsed 0.058 ms (5.767 ms / 100) out_shape = [4, 5, 16, 20] in_shape = [40, 5, 16, 20] idx_dim = 0 B = [4, 5, 16, 20] (stride (1600, 320, 1, 16)) A = [40, 5, 16, 20] (stride (1, 800, 4000, 40)) dim = 0 1.366 -> 1.366 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.59% +0.51%] index_select const : Elapsed 0.014 ms (1.366 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.00% +0.07% +0.00% / +0.15% +0.51% +0.44%] index_select wrap : Elapsed 0.014 ms (1.367 ms / 100) 1.365 -> 1.366 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.59% +0.44%] index_select linear : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.59% +0.59%] index_select reverse : Elapsed 0.014 ms (1.366 ms / 100) 1.363 -> 1.364 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.66% +0.66%] index_select skip64 : Elapsed 0.014 ms (1.364 ms / 100) 1.364 -> 1.365 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.59% +0.59%] index_select skip256 : Elapsed 0.014 ms (1.365 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.66% +0.44%] index_select spread : Elapsed 0.014 ms (1.367 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.51% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.366 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.51% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.367 ms / 100) 1.365 -> 1.366 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.66% +0.51%] index_select strided 7 : Elapsed 0.014 ms (1.367 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.44% +0.37%] index_select strided 8 : Elapsed 0.014 ms (1.368 ms / 100) 1.364 -> 1.365 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.44% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.365 ms / 100) 1.365 -> 1.367 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.59% +0.59%] index_select random : Elapsed 0.014 ms (1.367 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.44% +0.37%] index_select random_sorted : Elapsed 0.014 ms (1.368 ms / 100) 1.364 -> 1.365 ( +0.07%) [ +0.15% +0.00% +0.15% / +0.07% +0.44% +0.51%] index_select perm : Elapsed 0.014 ms (1.366 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.00% +0.22% +0.07% / +0.07% +0.37% +0.37%] index_select perm_sorted : Elapsed 0.014 ms (1.366 ms / 100) B = [4, 5, 16, 20] (stride (1600, 1, 5, 80)) A = [40, 5, 16, 20] (stride (320, 12800, 1, 16)) dim = 0 1.281 -> 1.281 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select const : Elapsed 0.013 ms (1.282 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.55% +0.55%] index_select wrap : Elapsed 0.013 ms (1.282 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.55% +0.39%] index_select linear : Elapsed 0.013 ms (1.282 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.23% +0.08% +0.00% / +0.00% +0.70% +0.55%] index_select reverse : Elapsed 0.013 ms (1.284 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.63% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.47%] index_select spread : Elapsed 0.013 ms (1.282 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select strided 3 : Elapsed 0.013 ms (1.282 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.39% +0.39%] index_select strided 5 : Elapsed 0.013 ms (1.283 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.70% +0.63%] index_select strided 8 : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select strided 16 : Elapsed 0.013 ms (1.282 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.39% +0.62%] index_select random : Elapsed 0.013 ms (1.285 ms / 100) 1.282 -> 1.284 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.39% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.284 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.78% +0.47%] index_select perm : Elapsed 0.013 ms (1.283 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.282 ms / 100) B = [4, 5, 16, 20] (stride (1, 1280, 80, 4)) A = [40, 5, 16, 20] (stride (16, 12800, 1, 640)) dim = 0 1.145 -> 1.145 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.79% +0.87%] index_select const : Elapsed 0.011 ms (1.146 ms / 100) 1.165 -> 1.169 ( +0.34%) [ +0.26% +0.09% +0.00% / +0.34% +0.60% +0.69%] index_select wrap : Elapsed 0.012 ms (1.168 ms / 100) 1.147 -> 1.146 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.44% +0.52%] index_select linear : Elapsed 0.011 ms (1.147 ms / 100) 1.164 -> 1.164 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.52% +0.52%] index_select reverse : Elapsed 0.012 ms (1.164 ms / 100) 1.145 -> 1.146 ( +0.09%) [ +0.17% +0.00% +0.09% / +0.09% +0.61% +0.61%] index_select skip64 : Elapsed 0.011 ms (1.147 ms / 100) 1.146 -> 1.146 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.44% +0.52%] index_select skip256 : Elapsed 0.011 ms (1.146 ms / 100) 1.146 -> 1.147 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.52% +0.52%] index_select spread : Elapsed 0.011 ms (1.147 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.52% +0.44%] index_select strided 3 : Elapsed 0.012 ms (1.150 ms / 100) 1.158 -> 1.158 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.43% +0.60%] index_select strided 5 : Elapsed 0.012 ms (1.158 ms / 100) 1.151 -> 1.153 ( +0.17%) [ +0.17% +0.26% +0.00% / +0.17% +0.52% +0.61%] index_select strided 7 : Elapsed 0.012 ms (1.153 ms / 100) 1.151 -> 1.151 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.52% +0.52%] index_select strided 8 : Elapsed 0.012 ms (1.152 ms / 100) 1.154 -> 1.154 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.52% +0.35%] index_select strided 16 : Elapsed 0.012 ms (1.155 ms / 100) 1.146 -> 1.147 ( +0.09%) [ +0.00% +0.00% +0.17% / +0.09% +0.44% +0.52%] index_select random : Elapsed 0.011 ms (1.146 ms / 100) 1.145 -> 1.146 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.52% +0.52%] index_select random_sorted : Elapsed 0.011 ms (1.146 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.78% +0.87%] index_select perm : Elapsed 0.011 ms (1.149 ms / 100) 1.171 -> 1.170 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +1.62% +1.54%] index_select perm_sorted : Elapsed 0.012 ms (1.171 ms / 100) B = [4, 5, 16, 20] (stride (16, 1280, 1, 64)) A = [40, 5, 16, 20] (stride (20, 800, 4000, 1)) dim = 0 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.63%] index_select const : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select wrap : Elapsed 0.013 ms (1.275 ms / 100) 1.275 -> 1.274 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.47% +0.47%] index_select linear : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.55%] index_select reverse : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.55% +0.63%] index_select skip64 : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.55% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.273 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select spread : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.55% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.279 ( +0.47%) [ +0.39% +0.00% +0.08% / +0.94% +0.55% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.278 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.39%] index_select strided 8 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.39% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.274 ms / 100) 1.275 -> 1.277 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.39% +0.47%] index_select random : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.31% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.275 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.31% +0.31%] index_select perm : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.276 ms / 100) B = [4, 5, 16, 20] (stride (80, 16, 1, 320)) A = [40, 5, 16, 20] (stride (1, 640, 40, 3200)) dim = 0 0.605 -> 0.606 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.50% +0.66%] index_select const : Elapsed 0.006 ms (0.605 ms / 100) 0.604 -> 0.606 ( +0.33%) [ +0.17% +0.33% +0.00% / +2.98% +0.33% +0.33%] index_select wrap : Elapsed 0.006 ms (0.605 ms / 100) 0.604 -> 0.604 ( +0.00%) [ +0.17% +0.00% +0.17% / +0.00% +0.33% +0.33%] index_select linear : Elapsed 0.006 ms (0.605 ms / 100) 0.605 -> 0.607 ( +0.33%) [ +0.33% +0.00% +0.50% / +0.33% +0.33% +3.80%] index_select reverse : Elapsed 0.006 ms (0.607 ms / 100) 0.605 -> 0.607 ( +0.33%) [ +0.33% +0.17% +0.00% / +0.33% +0.50% +0.66%] index_select skip64 : Elapsed 0.006 ms (0.607 ms / 100) 0.605 -> 0.604 ( -0.17%) [ +0.33% +0.00% +0.00% / +0.33% -0.17% -0.17%] index_select skip256 : Elapsed 0.006 ms (0.607 ms / 100) 0.606 -> 0.604 ( -0.33%) [ +0.00% +0.17% +0.17% / +0.17% -0.33% +0.00%] index_select spread : Elapsed 0.006 ms (0.606 ms / 100) 0.608 -> 0.609 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.33% +0.49%] index_select strided 3 : Elapsed 0.006 ms (0.609 ms / 100) 0.607 -> 0.610 ( +0.49%) [ +0.33% +0.16% +0.00% / +4.61% +0.49% +0.49%] index_select strided 5 : Elapsed 0.006 ms (0.609 ms / 100) 0.607 -> 0.608 ( +0.16%) [ +4.94% +0.00% +0.00% / +0.16% +0.49% +0.49%] index_select strided 7 : Elapsed 0.006 ms (0.637 ms / 100) 0.605 -> 0.609 ( +0.66%) [ +0.17% +0.00% +0.17% / +3.47% +0.66% +0.66%] index_select strided 8 : Elapsed 0.006 ms (0.606 ms / 100) 0.609 -> 0.611 ( +0.33%) [ +0.16% +0.00% +1.64% / +0.33% +0.49% +0.66%] index_select strided 16 : Elapsed 0.006 ms (0.610 ms / 100) 0.606 -> 0.607 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.17% +0.17%] index_select random : Elapsed 0.006 ms (0.606 ms / 100) 0.610 -> 0.609 ( -0.16%) [ +0.00% +0.16% +0.00% / -0.16% +0.00% +0.49%] index_select random_sorted : Elapsed 0.006 ms (0.610 ms / 100) 0.607 -> 0.607 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.33% +0.00% +0.16%] index_select perm : Elapsed 0.006 ms (0.608 ms / 100) 0.603 -> 0.604 ( +0.17%) [ +0.17% +0.33% +0.00% / +0.17% +0.66% +0.83%] index_select perm_sorted : Elapsed 0.006 ms (0.604 ms / 100) B = [4, 5, 16, 20] (stride (16, 64, 1, 320)) A = [40, 5, 16, 20] (stride (5, 1, 200, 3200)) dim = 0 1.363 -> 1.364 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.37% +0.29%] index_select const : Elapsed 0.014 ms (1.363 ms / 100) 1.361 -> 1.362 ( +0.07%) [ +0.22% +0.15% +0.00% / +0.07% +0.59% +0.59%] index_select wrap : Elapsed 0.014 ms (1.364 ms / 100) 1.361 -> 1.364 ( +0.22%) [ +0.22% +0.00% +0.07% / +0.22% +0.73% +0.59%] index_select linear : Elapsed 0.014 ms (1.364 ms / 100) 1.362 -> 1.366 ( +0.29%) [ +0.00% +0.07% +0.15% / +0.29% +0.66% +0.59%] index_select reverse : Elapsed 0.014 ms (1.362 ms / 100) 1.360 -> 1.360 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.59% +0.74%] index_select skip64 : Elapsed 0.014 ms (1.362 ms / 100) 1.362 -> 1.362 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.51% +0.59%] index_select skip256 : Elapsed 0.014 ms (1.364 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.15% +0.00% +0.15% / +0.07% +0.80% +0.95%] index_select spread : Elapsed 0.014 ms (1.374 ms / 100) 1.371 -> 1.373 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.66% +0.58%] index_select strided 3 : Elapsed 0.014 ms (1.372 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.80% +0.66%] index_select strided 5 : Elapsed 0.014 ms (1.374 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.44% +0.51%] index_select strided 7 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.15% +0.07% / +0.00% +0.65% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.66% +0.87%] index_select strided 16 : Elapsed 0.014 ms (1.374 ms / 100) 1.371 -> 1.372 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.44% +0.51%] index_select random : Elapsed 0.014 ms (1.372 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.73% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.375 ms / 100) 1.372 -> 1.371 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.29% +0.36%] index_select perm : Elapsed 0.014 ms (1.373 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.51%] index_select perm_sorted : Elapsed 0.014 ms (1.371 ms / 100) out_shape = [40, 4, 16, 20] in_shape = [40, 5, 16, 20] idx_dim = 1 B = [40, 4, 16, 20] (stride (1280, 16, 1, 64)) A = [40, 5, 16, 20] (stride (1600, 320, 20, 1)) dim = 1 5.419 -> 5.419 ( +0.00%) [ +0.13% +0.00% +0.13% / +0.00% +0.37% +0.61%] index_select const : Elapsed 0.054 ms (5.426 ms / 100) 5.509 -> 5.506 ( -0.05%) [ +0.09% +0.00% +0.04% / +0.11% -0.05% +0.04%] index_select wrap : Elapsed 0.055 ms (5.514 ms / 100) 5.501 -> 5.506 ( +0.09%) [ +0.00% +0.07% +0.09% / +0.13% +0.16% +0.09%] index_select linear : Elapsed 0.055 ms (5.501 ms / 100) 5.509 -> 5.506 ( -0.05%) [ +0.00% +0.13% +0.11% / +0.16% -0.05% -0.04%] index_select reverse : Elapsed 0.055 ms (5.509 ms / 100) 5.412 -> 5.410 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.24% +0.18%] index_select skip64 : Elapsed 0.054 ms (5.414 ms / 100) 5.415 -> 5.423 ( +0.15%) [ +0.00% +0.00% +0.15% / +0.15% +0.22% +0.18%] index_select skip256 : Elapsed 0.054 ms (5.415 ms / 100) 5.510 -> 5.510 ( +0.00%) [ +0.00% +0.13% +0.18% / +0.09% +0.02% +0.00%] index_select spread : Elapsed 0.055 ms (5.510 ms / 100) 5.502 -> 5.496 ( -0.11%) [ +0.00% +0.11% +0.09% / +0.00% +0.04% -0.11%] index_select strided 3 : Elapsed 0.055 ms (5.502 ms / 100) 5.475 -> 5.475 ( +0.00%) [ +0.18% +0.11% +0.00% / +0.00% +0.11% +0.16%] index_select random : Elapsed 0.055 ms (5.485 ms / 100) 5.470 -> 5.481 ( +0.20%) [ +0.15% +0.00% +0.18% / +0.20% +0.22% +0.26%] index_select random_sorted : Elapsed 0.055 ms (5.478 ms / 100) 5.512 -> 5.517 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.11% +0.09% +0.36%] index_select perm : Elapsed 0.055 ms (5.517 ms / 100) 5.503 -> 5.501 ( -0.04%) [ +0.16% +0.04% +0.00% / -0.04% +0.31% +0.05%] index_select perm_sorted : Elapsed 0.055 ms (5.512 ms / 100) B = [40, 4, 16, 20] (stride (16, 12800, 1, 640)) A = [40, 5, 16, 20] (stride (1600, 16, 1, 80)) dim = 1 5.750 -> 5.751 ( +0.02%) [ +0.16% +0.00% +0.23% / +0.16% +0.02% +0.09%] index_select const : Elapsed 0.058 ms (5.759 ms / 100) 5.805 -> 5.799 ( -0.10%) [ +0.00% +0.00% +0.10% / +0.14% -0.09% -0.10%] index_select wrap : Elapsed 0.058 ms (5.805 ms / 100) 5.800 -> 5.791 ( -0.16%) [ +0.21% +0.12% +0.00% / +0.03% -0.16% +0.12%] index_select linear : Elapsed 0.058 ms (5.812 ms / 100) 5.810 -> 5.805 ( -0.09%) [ +0.07% +0.00% +0.05% / +0.05% -0.09% -0.03%] index_select reverse : Elapsed 0.058 ms (5.814 ms / 100) 5.761 -> 5.748 ( -0.23%) [ +0.00% +0.05% +0.07% / -0.03% -0.23% -0.17%] index_select skip64 : Elapsed 0.058 ms (5.761 ms / 100) 5.749 -> 5.753 ( +0.07%) [ +0.21% +0.00% +0.21% / +0.21% +0.07% +0.09%] index_select skip256 : Elapsed 0.058 ms (5.761 ms / 100) 5.804 -> 5.787 ( -0.29%) [ +0.00% +0.02% +0.12% / +0.19% -0.12% -0.29%] index_select spread : Elapsed 0.058 ms (5.804 ms / 100) 5.817 -> 5.807 ( -0.17%) [ +0.00% +0.05% +0.00% / +0.09% -0.17% -0.12%] index_select strided 3 : Elapsed 0.058 ms (5.817 ms / 100) 5.810 -> 5.795 ( -0.26%) [ +0.00% +0.10% +0.12% / +0.07% -0.09% -0.26%] index_select random : Elapsed 0.058 ms (5.810 ms / 100) 5.811 -> 5.793 ( -0.31%) [ +0.00% +0.09% +0.09% / +0.10% -0.26% -0.31%] index_select random_sorted : Elapsed 0.058 ms (5.811 ms / 100) 5.806 -> 5.806 ( +0.00%) [ +0.03% +0.00% +0.10% / +0.09% +0.00% +0.03%] index_select perm : Elapsed 0.058 ms (5.808 ms / 100) 5.808 -> 5.796 ( -0.21%) [ +0.03% +0.00% +0.03% / +0.05% -0.21% -0.14%] index_select perm_sorted : Elapsed 0.058 ms (5.810 ms / 100) B = [40, 4, 16, 20] (stride (20, 800, 3200, 1)) A = [40, 5, 16, 20] (stride (1, 800, 4000, 40)) dim = 1 5.679 -> 5.683 ( +0.07%) [ +0.00% +0.05% +0.12% / +0.07% +0.41% +0.41%] index_select const : Elapsed 0.057 ms (5.679 ms / 100) 5.750 -> 5.738 ( -0.21%) [ +0.00% +0.00% +0.16% / +0.02% -0.12% -0.21%] index_select wrap : Elapsed 0.057 ms (5.750 ms / 100) 5.747 -> 5.737 ( -0.17%) [ +0.03% +0.00% +0.26% / +0.21% -0.17% -0.17%] index_select linear : Elapsed 0.057 ms (5.749 ms / 100) 5.768 -> 5.743 ( -0.43%) [ +0.05% +0.00% +0.16% / +0.17% -0.38% -0.43%] index_select reverse : Elapsed 0.058 ms (5.771 ms / 100) 5.680 -> 5.684 ( +0.07%) [ +0.00% +0.00% +0.16% / +0.07% +0.25% +0.39%] index_select skip64 : Elapsed 0.057 ms (5.680 ms / 100) 5.679 -> 5.683 ( +0.07%) [ +0.00% +0.00% +0.09% / +0.07% +0.44% +0.30%] index_select skip256 : Elapsed 0.057 ms (5.679 ms / 100) 5.748 -> 5.743 ( -0.09%) [ +0.16% +0.00% +0.10% / +0.03% -0.09% -0.09%] index_select spread : Elapsed 0.058 ms (5.757 ms / 100) 5.734 -> 5.716 ( -0.31%) [ +0.09% +0.17% +0.00% / +0.17% -0.24% -0.31%] index_select strided 3 : Elapsed 0.057 ms (5.739 ms / 100) 5.738 -> 5.735 ( -0.05%) [ +0.12% +0.07% +0.00% / -0.05% +0.00% +0.07%] index_select random : Elapsed 0.057 ms (5.745 ms / 100) 5.734 -> 5.719 ( -0.26%) [ +0.09% +0.00% +0.09% / -0.02% -0.26% -0.26%] index_select random_sorted : Elapsed 0.057 ms (5.739 ms / 100) 5.752 -> 5.733 ( -0.33%) [ +0.10% +0.00% +0.00% / +0.00% -0.33% -0.24%] index_select perm : Elapsed 0.058 ms (5.758 ms / 100) 5.751 -> 5.739 ( -0.21%) [ +0.07% +0.00% +0.09% / -0.03% -0.21% -0.14%] index_select perm_sorted : Elapsed 0.058 ms (5.755 ms / 100) B = [40, 4, 16, 20] (stride (1, 800, 3200, 40)) A = [40, 5, 16, 20] (stride (5, 1, 4000, 200)) dim = 1 5.729 -> 5.736 ( +0.12%) [ +0.00% +0.02% +0.14% / +0.12% +0.59% +0.47%] index_select const : Elapsed 0.057 ms (5.729 ms / 100) 5.739 -> 5.742 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.30% +0.24%] index_select wrap : Elapsed 0.057 ms (5.739 ms / 100) 5.733 -> 5.742 ( +0.16%) [ +0.03% +0.00% +0.14% / +0.16% +0.35% +0.45%] index_select linear : Elapsed 0.057 ms (5.735 ms / 100) 5.734 -> 5.737 ( +0.05%) [ +0.03% +0.07% +0.00% / +0.05% +0.35% +0.28%] index_select reverse : Elapsed 0.057 ms (5.736 ms / 100) 5.734 -> 5.739 ( +0.09%) [ +0.05% +0.09% +0.00% / +0.09% +0.26% +0.26%] index_select skip64 : Elapsed 0.057 ms (5.737 ms / 100) 5.734 -> 5.741 ( +0.12%) [ +0.00% +0.05% +0.14% / +0.12% +0.33% +0.33%] index_select skip256 : Elapsed 0.057 ms (5.734 ms / 100) 5.730 -> 5.740 ( +0.17%) [ +0.07% +0.00% +0.10% / +0.17% +0.45% +0.45%] index_select spread : Elapsed 0.057 ms (5.734 ms / 100) 5.733 -> 5.735 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.44% +0.40%] index_select strided 3 : Elapsed 0.057 ms (5.733 ms / 100) 5.725 -> 5.734 ( +0.16%) [ +0.09% +0.00% +0.17% / +0.16% +0.45% +0.49%] index_select random : Elapsed 0.057 ms (5.730 ms / 100) 5.732 -> 5.736 ( +0.07%) [ +0.10% +0.00% +0.09% / +0.07% +0.35% +0.42%] index_select random_sorted : Elapsed 0.057 ms (5.738 ms / 100) 5.728 -> 5.744 ( +0.28%) [ +0.16% +0.00% +0.24% / +0.28% +0.42% +0.58%] index_select perm : Elapsed 0.057 ms (5.737 ms / 100) 5.735 -> 5.750 ( +0.26%) [ +0.00% +0.03% +0.16% / +0.26% +0.28% +0.42%] index_select perm_sorted : Elapsed 0.057 ms (5.735 ms / 100) B = [40, 4, 16, 20] (stride (1, 800, 3200, 40)) A = [40, 5, 16, 20] (stride (16, 640, 1, 3200)) dim = 1 5.734 -> 5.708 ( -0.45%) [ +0.05% +0.00% +0.12% / +0.12% -0.45% -0.33%] index_select const : Elapsed 0.057 ms (5.737 ms / 100) 5.782 -> 5.775 ( -0.12%) [ +0.07% +0.00% +0.05% / -0.03% -0.12% -0.09%] index_select wrap : Elapsed 0.058 ms (5.786 ms / 100) 5.770 -> 5.777 ( +0.12%) [ +0.00% +0.17% +0.36% / +0.28% +0.12% +0.23%] index_select linear : Elapsed 0.058 ms (5.770 ms / 100) 5.801 -> 5.796 ( -0.09%) [ +0.00% +0.12% +0.02% / -0.09% +0.28% +0.31%] index_select reverse : Elapsed 0.058 ms (5.801 ms / 100) 5.729 -> 5.710 ( -0.33%) [ +0.00% +0.07% +0.19% / +0.14% -0.33% -0.26%] index_select skip64 : Elapsed 0.057 ms (5.729 ms / 100) 5.733 -> 5.708 ( -0.44%) [ +0.00% +0.07% +0.24% / +0.12% -0.42% -0.44%] index_select skip256 : Elapsed 0.057 ms (5.733 ms / 100) 5.780 -> 5.780 ( +0.00%) [ +0.10% +0.00% +0.07% / +0.10% +0.00% +0.09%] index_select spread : Elapsed 0.058 ms (5.786 ms / 100) 5.795 -> 5.788 ( -0.12%) [ +0.00% +0.28% +0.10% / +0.19% -0.12% +0.03%] index_select strided 3 : Elapsed 0.058 ms (5.795 ms / 100) 5.769 -> 5.781 ( +0.21%) [ +0.00% +0.09% +0.17% / +0.29% +0.23% +0.21%] index_select random : Elapsed 0.058 ms (5.769 ms / 100) 5.771 -> 5.759 ( -0.21%) [ +0.00% +0.07% +0.07% / +0.00% +0.05% -0.21%] index_select random_sorted : Elapsed 0.058 ms (5.771 ms / 100) 5.783 -> 5.789 ( +0.10%) [ +0.10% +0.03% +0.00% / +0.10% +0.31% +0.38%] index_select perm : Elapsed 0.058 ms (5.789 ms / 100) 5.775 -> 5.781 ( +0.10%) [ +0.07% +0.21% +0.00% / +0.17% +0.17% +0.10%] index_select perm_sorted : Elapsed 0.058 ms (5.779 ms / 100) B = [40, 4, 16, 20] (stride (64, 1, 4, 2560)) A = [40, 5, 16, 20] (stride (1, 12800, 800, 40)) dim = 1 5.572 -> 5.558 ( -0.25%) [ +0.13% +0.00% +0.20% / +0.11% -0.25% -0.25%] index_select const : Elapsed 0.056 ms (5.579 ms / 100) 5.596 -> 5.599 ( +0.05%) [ +0.00% +0.09% +0.23% / +0.16% +0.16% +0.05%] index_select wrap : Elapsed 0.056 ms (5.596 ms / 100) 5.597 -> 5.600 ( +0.05%) [ +0.04% +0.00% +0.02% / +0.09% +0.05% +0.16%] index_select linear : Elapsed 0.056 ms (5.599 ms / 100) 5.598 -> 5.599 ( +0.02%) [ +0.00% +0.09% +0.16% / +0.18% +0.02% +0.09%] index_select reverse : Elapsed 0.056 ms (5.598 ms / 100) 5.573 -> 5.553 ( -0.36%) [ +0.00% +0.09% +0.07% / +0.16% -0.27% -0.36%] index_select skip64 : Elapsed 0.056 ms (5.573 ms / 100) 5.575 -> 5.555 ( -0.36%) [ +0.00% +0.02% +0.07% / +0.09% -0.23% -0.36%] index_select skip256 : Elapsed 0.056 ms (5.575 ms / 100) 5.596 -> 5.594 ( -0.04%) [ +0.00% +0.07% +0.09% / +0.14% +0.13% -0.04%] index_select spread : Elapsed 0.056 ms (5.596 ms / 100) 5.606 -> 5.601 ( -0.09%) [ +0.00% +0.04% +0.12% / +0.11% -0.05% -0.09%] index_select strided 3 : Elapsed 0.056 ms (5.606 ms / 100) 5.616 -> 5.594 ( -0.39%) [ +0.07% +0.00% +0.12% / +0.14% -0.34% -0.39%] index_select random : Elapsed 0.056 ms (5.620 ms / 100) 5.589 -> 5.581 ( -0.14%) [ +0.27% +0.00% +0.16% / +0.21% -0.07% -0.14%] index_select random_sorted : Elapsed 0.056 ms (5.604 ms / 100) 5.615 -> 5.602 ( -0.23%) [ +0.07% +0.00% +0.14% / +0.14% -0.21% -0.23%] index_select perm : Elapsed 0.056 ms (5.619 ms / 100) 5.597 -> 5.590 ( -0.13%) [ +0.04% +0.00% +0.29% / +0.23% -0.05% -0.13%] index_select perm_sorted : Elapsed 0.056 ms (5.599 ms / 100) out_shape = [40, 5, 4, 20] in_shape = [40, 5, 16, 20] idx_dim = 2 B = [40, 5, 4, 20] (stride (400, 80, 20, 1)) A = [40, 5, 16, 20] (stride (1, 12800, 800, 40)) dim = 2 2.412 -> 2.412 ( +0.00%) [ +0.00% +0.17% +0.17% / +0.00% +0.33% +0.46%] index_select const : Elapsed 0.024 ms (2.412 ms / 100) 2.413 -> 2.417 ( +0.17%) [ +0.25% +0.25% +0.00% / +0.17% +0.46% +0.50%] index_select wrap : Elapsed 0.024 ms (2.419 ms / 100) 2.418 -> 2.421 ( +0.12%) [ +0.04% +0.00% +0.12% / +0.12% +0.37% +0.45%] index_select linear : Elapsed 0.024 ms (2.419 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.25% +0.17%] index_select reverse : Elapsed 0.024 ms (2.417 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.08% +0.37% +0.00% / +0.04% +0.29% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.412 ms / 100) 2.411 -> 2.416 ( +0.21%) [ +0.00% +0.08% +0.08% / +0.21% +0.25% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.411 ms / 100) 2.419 -> 2.420 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.29% +0.17%] index_select spread : Elapsed 0.024 ms (2.420 ms / 100) 2.418 -> 2.422 ( +0.17%) [ +0.08% +0.12% +0.00% / +0.17% +0.25% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.420 ms / 100) 2.416 -> 2.421 ( +0.21%) [ +0.12% +0.00% +0.08% / +0.21% +0.29% +0.29%] index_select strided 5 : Elapsed 0.024 ms (2.419 ms / 100) 2.415 -> 2.419 ( +0.17%) [ +0.08% +0.12% +0.00% / +0.17% +0.21% +0.37%] index_select strided 7 : Elapsed 0.024 ms (2.417 ms / 100) 2.415 -> 2.414 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.21% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.409 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.21% +0.25%] index_select random : Elapsed 0.024 ms (2.411 ms / 100) 2.409 -> 2.415 ( +0.25%) [ +0.29% +0.17% +0.00% / +0.25% +0.46% +0.37%] index_select random_sorted : Elapsed 0.024 ms (2.416 ms / 100) 2.417 -> 2.417 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.21% +0.50%] index_select perm : Elapsed 0.024 ms (2.419 ms / 100) 2.414 -> 2.423 ( +0.37%) [ +0.08% +0.00% +0.12% / +0.37% +0.46% +0.50%] index_select perm_sorted : Elapsed 0.024 ms (2.416 ms / 100) B = [40, 5, 4, 20] (stride (400, 1, 100, 5)) A = [40, 5, 16, 20] (stride (80, 1, 5, 3200)) dim = 2 2.522 -> 2.530 ( +0.32%) [ +0.08% +0.08% +0.00% / +0.32% +0.52% +0.32%] index_select const : Elapsed 0.025 ms (2.524 ms / 100) 2.544 -> 2.545 ( +0.04%) [ +0.28% +0.16% +0.00% / +0.04% +0.35% +0.31%] index_select wrap : Elapsed 0.026 ms (2.551 ms / 100) 2.549 -> 2.549 ( +0.00%) [ +0.00% +0.04% +0.20% / +0.16% +0.00% +0.12%] index_select linear : Elapsed 0.025 ms (2.549 ms / 100) 2.549 -> 2.553 ( +0.16%) [ +0.00% +0.20% +0.20% / +0.16% +0.16% +0.31%] index_select reverse : Elapsed 0.025 ms (2.549 ms / 100) 2.522 -> 2.524 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.28% +0.24%] index_select skip64 : Elapsed 0.025 ms (2.524 ms / 100) 2.524 -> 2.525 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.20% +0.12%] index_select skip256 : Elapsed 0.025 ms (2.526 ms / 100) 2.580 -> 2.579 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% +0.16% +0.08%] index_select spread : Elapsed 0.026 ms (2.580 ms / 100) 2.594 -> 2.593 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.12% +0.04%] index_select strided 3 : Elapsed 0.026 ms (2.596 ms / 100) 2.543 -> 2.549 ( +0.24%) [ +0.12% +0.20% +0.00% / +0.24% +0.39% +0.39%] index_select strided 5 : Elapsed 0.025 ms (2.546 ms / 100) 2.559 -> 2.562 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.12% +0.23% +0.23%] index_select strided 7 : Elapsed 0.026 ms (2.562 ms / 100) 2.526 -> 2.528 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.40% +0.28%] index_select strided 8 : Elapsed 0.025 ms (2.528 ms / 100) 2.540 -> 2.544 ( +0.16%) [ +0.00% +0.08% +0.16% / +0.16% +0.47% +0.35%] index_select random : Elapsed 0.025 ms (2.540 ms / 100) 2.536 -> 2.536 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.24% +0.28%] index_select random_sorted : Elapsed 0.025 ms (2.536 ms / 100) 2.568 -> 2.566 ( -0.08%) [ +0.12% +0.00% +0.08% / -0.08% +0.19% +0.35%] index_select perm : Elapsed 0.026 ms (2.571 ms / 100) 2.562 -> 2.570 ( +0.31%) [ +0.20% +0.16% +0.00% / +0.31% +0.43% +0.35%] index_select perm_sorted : Elapsed 0.026 ms (2.567 ms / 100) B = [40, 5, 4, 20] (stride (1, 40, 4000, 200)) A = [40, 5, 16, 20] (stride (100, 1, 4000, 5)) dim = 2 2.393 -> 2.395 ( +0.08%) [ +0.13% +0.04% +0.00% / +0.08% +0.42% +0.38%] index_select const : Elapsed 0.024 ms (2.396 ms / 100) 2.412 -> 2.410 ( -0.08%) [ +0.04% +0.00% +0.04% / -0.08% +0.21% +0.29%] index_select wrap : Elapsed 0.024 ms (2.413 ms / 100) 2.410 -> 2.417 ( +0.29%) [ +0.12% +0.00% +0.17% / +0.29% +0.33% +0.58%] index_select linear : Elapsed 0.024 ms (2.413 ms / 100) 2.407 -> 2.410 ( +0.12%) [ +0.17% +0.00% +0.08% / +0.12% +0.62% +0.50%] index_select reverse : Elapsed 0.024 ms (2.411 ms / 100) 2.382 -> 2.392 ( +0.42%) [ +0.00% +0.17% +0.17% / +0.42% +0.71% +0.67%] index_select skip64 : Elapsed 0.024 ms (2.382 ms / 100) 2.388 -> 2.390 ( +0.08%) [ +0.00% +0.21% +0.13% / +0.08% +0.63% +0.63%] index_select skip256 : Elapsed 0.024 ms (2.388 ms / 100) 2.412 -> 2.417 ( +0.21%) [ +0.25% +0.17% +0.00% / +0.21% +0.58% +0.83%] index_select spread : Elapsed 0.024 ms (2.418 ms / 100) 2.410 -> 2.410 ( +0.00%) [ +0.12% +0.00% +0.04% / +0.00% +0.37% +0.41%] index_select strided 3 : Elapsed 0.024 ms (2.413 ms / 100) 2.412 -> 2.412 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.37% +0.29%] index_select strided 5 : Elapsed 0.024 ms (2.412 ms / 100) 2.417 -> 2.421 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.58% +0.54%] index_select strided 7 : Elapsed 0.024 ms (2.421 ms / 100) 2.402 -> 2.404 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.46% +0.29%] index_select strided 8 : Elapsed 0.024 ms (2.402 ms / 100) 2.415 -> 2.415 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.08% +0.29%] index_select random : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.412 ( -0.08%) [ +0.00% +0.08% +0.21% / -0.08% +0.29% +0.29%] index_select random_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.412 -> 2.411 ( -0.04%) [ +0.17% +0.04% +0.00% / -0.04% +0.17% +0.25%] index_select perm : Elapsed 0.024 ms (2.416 ms / 100) 2.421 -> 2.418 ( -0.12%) [ +0.00% +0.00% +0.04% / -0.12% +0.29% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.421 ms / 100) B = [40, 5, 4, 20] (stride (20, 1, 5, 800)) A = [40, 5, 16, 20] (stride (1, 640, 40, 3200)) dim = 2 2.547 -> 2.548 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.35% +0.47%] index_select const : Elapsed 0.025 ms (2.548 ms / 100) 2.551 -> 2.551 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.04% +0.08% +0.00%] index_select wrap : Elapsed 0.026 ms (2.552 ms / 100) 2.547 -> 2.552 ( +0.20%) [ +0.00% +0.16% +0.20% / +0.20% +0.27% +0.20%] index_select linear : Elapsed 0.025 ms (2.547 ms / 100) 2.561 -> 2.563 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.16% +0.12%] index_select reverse : Elapsed 0.026 ms (2.561 ms / 100) 2.550 -> 2.551 ( +0.04%) [ +0.12% +0.00% +0.12% / +0.04% +0.39% +0.27%] index_select skip64 : Elapsed 0.026 ms (2.553 ms / 100) 2.550 -> 2.549 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.12% +0.27%] index_select skip256 : Elapsed 0.026 ms (2.552 ms / 100) 2.552 -> 2.554 ( +0.08%) [ +0.12% +0.00% +0.20% / +0.08% +0.27% +0.39%] index_select spread : Elapsed 0.026 ms (2.555 ms / 100) 2.550 -> 2.553 ( +0.12%) [ +0.20% +0.00% +0.31% / +0.12% +0.35% +0.20%] index_select strided 3 : Elapsed 0.026 ms (2.555 ms / 100) 2.565 -> 2.563 ( -0.08%) [ +0.00% +0.00% +0.12% / -0.08% +0.23% +0.23%] index_select strided 5 : Elapsed 0.026 ms (2.565 ms / 100) 2.559 -> 2.558 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.23% +0.31%] index_select strided 7 : Elapsed 0.026 ms (2.561 ms / 100) 2.549 -> 2.557 ( +0.31%) [ +0.12% +0.00% +0.16% / +0.35% +0.39% +0.31%] index_select strided 8 : Elapsed 0.026 ms (2.552 ms / 100) 2.557 -> 2.559 ( +0.08%) [ +0.16% +0.00% +0.04% / +0.08% +0.43% +0.43%] index_select random : Elapsed 0.026 ms (2.561 ms / 100) 2.551 -> 2.553 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.27% +0.55%] index_select random_sorted : Elapsed 0.026 ms (2.551 ms / 100) 2.564 -> 2.563 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.47% +0.39%] index_select perm : Elapsed 0.026 ms (2.564 ms / 100) 2.564 -> 2.567 ( +0.12%) [ +0.00% +0.27% +0.16% / +0.12% +0.51% +0.62%] index_select perm_sorted : Elapsed 0.026 ms (2.564 ms / 100) B = [40, 5, 4, 20] (stride (4, 160, 1, 800)) A = [40, 5, 16, 20] (stride (1, 12800, 800, 40)) dim = 2 2.570 -> 2.571 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.39% +0.23%] index_select const : Elapsed 0.026 ms (2.572 ms / 100) 2.570 -> 2.570 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.31% +0.39%] index_select wrap : Elapsed 0.026 ms (2.572 ms / 100) 2.569 -> 2.573 ( +0.16%) [ +0.00% +0.08% +0.04% / +0.16% +0.43% +0.47%] index_select linear : Elapsed 0.026 ms (2.569 ms / 100) 2.568 -> 2.569 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.51% +0.62%] index_select reverse : Elapsed 0.026 ms (2.568 ms / 100) 2.567 -> 2.569 ( +0.08%) [ +0.16% +0.04% +0.00% / +0.08% +0.58% +0.43%] index_select skip64 : Elapsed 0.026 ms (2.571 ms / 100) 2.567 -> 2.569 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.47% +0.43%] index_select skip256 : Elapsed 0.026 ms (2.570 ms / 100) 2.568 -> 2.571 ( +0.12%) [ +0.00% +0.00% +0.08% / +0.12% +0.43% +0.43%] index_select spread : Elapsed 0.026 ms (2.568 ms / 100) 2.570 -> 2.574 ( +0.16%) [ +0.00% +0.08% +0.12% / +0.16% +0.54% +0.35%] index_select strided 3 : Elapsed 0.026 ms (2.570 ms / 100) 2.570 -> 2.573 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.43% +0.51%] index_select strided 5 : Elapsed 0.026 ms (2.571 ms / 100) 2.569 -> 2.572 ( +0.12%) [ +0.08% +0.16% +0.00% / +0.12% +0.47% +0.35%] index_select strided 7 : Elapsed 0.026 ms (2.571 ms / 100) 2.567 -> 2.570 ( +0.12%) [ +0.08% +0.12% +0.00% / +0.12% +0.39% +0.39%] index_select strided 8 : Elapsed 0.026 ms (2.569 ms / 100) 2.569 -> 2.573 ( +0.16%) [ +0.12% +0.00% +0.16% / +0.16% +0.62% +0.31%] index_select random : Elapsed 0.026 ms (2.572 ms / 100) 2.568 -> 2.571 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.12% +0.39% +0.51%] index_select random_sorted : Elapsed 0.026 ms (2.570 ms / 100) 2.568 -> 2.567 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.39% +0.47%] index_select perm : Elapsed 0.026 ms (2.568 ms / 100) 2.567 -> 2.570 ( +0.12%) [ +0.04% +0.00% +0.16% / +0.12% +0.39% +0.39%] index_select perm_sorted : Elapsed 0.026 ms (2.568 ms / 100) B = [40, 5, 4, 20] (stride (1, 160, 40, 800)) A = [40, 5, 16, 20] (stride (20, 800, 4000, 1)) dim = 2 2.396 -> 2.395 ( -0.04%) [ +0.17% +0.21% +0.00% / -0.04% +0.21% +0.25%] index_select const : Elapsed 0.024 ms (2.400 ms / 100) 2.447 -> 2.447 ( +0.00%) [ +0.12% +0.00% +0.08% / +0.08% +0.04% +0.00%] index_select wrap : Elapsed 0.024 ms (2.450 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.08% +0.16% +0.00%] index_select linear : Elapsed 0.025 ms (2.453 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.00% +0.12% +0.04% / +0.00% +0.20% +0.08%] index_select reverse : Elapsed 0.024 ms (2.448 ms / 100) 2.393 -> 2.394 ( +0.04%) [ +0.17% +0.13% +0.00% / +0.04% +0.04% +0.17%] index_select skip64 : Elapsed 0.024 ms (2.397 ms / 100) 2.396 -> 2.398 ( +0.08%) [ +0.00% +0.21% +0.08% / +0.08% +0.17% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.396 ms / 100) 2.440 -> 2.448 ( +0.33%) [ +0.16% +0.00% +0.25% / +0.33% +0.49% +0.45%] index_select spread : Elapsed 0.024 ms (2.444 ms / 100) 2.442 -> 2.449 ( +0.29%) [ +0.25% +0.00% +0.20% / +0.29% +0.33% +0.41%] index_select strided 3 : Elapsed 0.024 ms (2.448 ms / 100) 2.448 -> 2.447 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.25% +0.41%] index_select strided 5 : Elapsed 0.024 ms (2.448 ms / 100) 2.443 -> 2.445 ( +0.08%) [ +0.08% +0.00% +0.12% / +0.08% +0.49% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.445 ms / 100) 2.398 -> 2.404 ( +0.25%) [ +0.17% +0.33% +0.00% / +0.25% +0.42% +0.42%] index_select strided 8 : Elapsed 0.024 ms (2.402 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.00% +0.00% +0.12% / +0.25% +0.33% +0.16%] index_select random : Elapsed 0.024 ms (2.446 ms / 100) 2.445 -> 2.449 ( +0.16%) [ +0.00% +0.33% +0.16% / +0.16% +0.20% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.445 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.04% +0.16%] index_select perm : Elapsed 0.025 ms (2.454 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.12% +0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.447 ms / 100) out_shape = [40, 5, 16, 4] in_shape = [40, 5, 16, 20] idx_dim = 3 B = [40, 5, 16, 4] (stride (320, 64, 4, 1)) A = [40, 5, 16, 20] (stride (1600, 1, 5, 80)) dim = 3 1.803 -> 1.806 ( +0.17%) [ +0.06% +0.00% +0.28% / +0.17% +0.94% +0.83%] index_select const : Elapsed 0.018 ms (1.804 ms / 100) 1.801 -> 1.802 ( +0.06%) [ +0.28% +0.00% +0.00% / +0.06% +1.00% +0.61%] index_select wrap : Elapsed 0.018 ms (1.806 ms / 100) 1.799 -> 1.803 ( +0.22%) [ +0.17% +0.00% +0.06% / +0.22% +0.61% +1.06%] index_select linear : Elapsed 0.018 ms (1.802 ms / 100) 1.810 -> 1.807 ( -0.17%) [ +0.17% +0.00% +0.06% / -0.17% +0.17% +0.39%] index_select reverse : Elapsed 0.018 ms (1.813 ms / 100) 1.786 -> 1.791 ( +0.28%) [ +0.17% +0.00% +0.73% / +0.28% +0.90% +0.84%] index_select skip64 : Elapsed 0.018 ms (1.789 ms / 100) 1.802 -> 1.803 ( +0.06%) [ +0.00% +0.33% +0.22% / +0.06% +0.61% +0.83%] index_select skip256 : Elapsed 0.018 ms (1.802 ms / 100) 1.800 -> 1.799 ( -0.06%) [ +0.33% +0.06% +0.00% / -0.06% +1.06% +0.89%] index_select spread : Elapsed 0.018 ms (1.806 ms / 100) 1.805 -> 1.804 ( -0.06%) [ +0.06% +0.00% +0.17% / -0.06% +0.72% +0.83%] index_select strided 3 : Elapsed 0.018 ms (1.806 ms / 100) 1.799 -> 1.799 ( +0.00%) [ +0.17% +0.22% +0.00% / +0.00% +0.89% +0.72%] index_select strided 5 : Elapsed 0.018 ms (1.802 ms / 100) 1.798 -> 1.805 ( +0.39%) [ +0.28% +0.44% +0.00% / +0.39% +0.78% +0.83%] index_select strided 7 : Elapsed 0.018 ms (1.803 ms / 100) 1.812 -> 1.814 ( +0.11%) [ +0.06% +0.00% +0.06% / +0.11% +1.10% +0.94%] index_select strided 8 : Elapsed 0.018 ms (1.813 ms / 100) 1.801 -> 1.807 ( +0.33%) [ +0.00% +0.22% +0.17% / +0.33% +0.83% +0.67%] index_select strided 16 : Elapsed 0.018 ms (1.801 ms / 100) 1.795 -> 1.801 ( +0.33%) [ +0.00% +0.45% +0.33% / +0.33% +1.00% +1.28%] index_select random : Elapsed 0.018 ms (1.795 ms / 100) 1.804 -> 1.805 ( +0.06%) [ +0.06% +0.11% +0.00% / +0.06% +0.61% +0.50%] index_select random_sorted : Elapsed 0.018 ms (1.805 ms / 100) 1.798 -> 1.806 ( +0.44%) [ +0.00% +0.39% +0.28% / +0.44% +0.78% +0.83%] index_select perm : Elapsed 0.018 ms (1.798 ms / 100) 1.805 -> 1.809 ( +0.22%) [ +0.11% +0.00% +0.00% / +0.22% +1.16% +0.89%] index_select perm_sorted : Elapsed 0.018 ms (1.807 ms / 100) B = [40, 5, 16, 4] (stride (320, 64, 1, 16)) A = [40, 5, 16, 20] (stride (1600, 1, 100, 5)) dim = 3 2.020 -> 2.022 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.45% +0.89%] index_select const : Elapsed 0.020 ms (2.022 ms / 100) 2.041 -> 2.046 ( +0.24%) [ +0.00% +0.00% +0.20% / +0.24% +0.39% +0.34%] index_select wrap : Elapsed 0.020 ms (2.041 ms / 100) 2.037 -> 2.036 ( -0.05%) [ +0.00% +0.44% +0.05% / -0.05% +0.59% +0.15%] index_select linear : Elapsed 0.020 ms (2.037 ms / 100) 2.035 -> 2.038 ( +0.15%) [ +0.00% +0.15% +0.29% / +0.15% +0.29% +0.25%] index_select reverse : Elapsed 0.020 ms (2.035 ms / 100) 2.025 -> 2.022 ( -0.15%) [ +0.15% +0.05% +0.00% / -0.15% +0.15% +0.25%] index_select skip64 : Elapsed 0.020 ms (2.028 ms / 100) 2.018 -> 2.021 ( +0.15%) [ +0.20% +0.05% +0.00% / +0.25% +0.15% +0.59%] index_select skip256 : Elapsed 0.020 ms (2.022 ms / 100) 2.057 -> 2.059 ( +0.10%) [ +0.05% +0.00% +0.34% / +0.34% +0.15% +0.10%] index_select spread : Elapsed 0.021 ms (2.058 ms / 100) 2.063 -> 2.065 ( +0.10%) [ +0.00% +0.05% +0.10% / +0.10% +0.15% +0.15%] index_select strided 3 : Elapsed 0.021 ms (2.063 ms / 100) 2.063 -> 2.058 ( -0.24%) [ +0.05% +0.05% +0.00% / -0.19% -0.24% +0.48%] index_select strided 5 : Elapsed 0.021 ms (2.064 ms / 100) 2.047 -> 2.054 ( +0.34%) [ +0.05% +0.00% +0.24% / +0.73% +0.34% +0.83%] index_select strided 7 : Elapsed 0.020 ms (2.048 ms / 100) 2.063 -> 2.069 ( +0.29%) [ +0.00% +0.34% +0.44% / +0.29% +0.68% +0.87%] index_select strided 8 : Elapsed 0.021 ms (2.063 ms / 100) 2.070 -> 2.064 ( -0.29%) [ +0.53% +0.00% +0.34% / +0.24% -0.29% -0.05%] index_select strided 16 : Elapsed 0.021 ms (2.081 ms / 100) 2.030 -> 2.031 ( +0.05%) [ +0.34% +0.05% +0.00% / +0.05% +0.34% +0.05%] index_select random : Elapsed 0.020 ms (2.037 ms / 100) 2.035 -> 2.038 ( +0.15%) [ +0.29% +0.20% +0.00% / +0.15% +0.39% +0.64%] index_select random_sorted : Elapsed 0.020 ms (2.041 ms / 100) 2.059 -> 2.059 ( +0.00%) [ +0.44% +0.00% +0.49% / +0.29% +0.00% +0.10%] index_select perm : Elapsed 0.021 ms (2.068 ms / 100) 2.054 -> 2.055 ( +0.05%) [ +0.73% +0.44% +0.00% / +0.63% +0.05% +0.44%] index_select perm_sorted : Elapsed 0.021 ms (2.069 ms / 100) B = [40, 5, 16, 4] (stride (320, 16, 1, 80)) A = [40, 5, 16, 20] (stride (1600, 16, 1, 80)) dim = 3 0.735 -> 0.736 ( +0.14%) [ +0.41% +0.54% +0.00% / +0.27% +0.54% +0.14%] index_select const : Elapsed 0.007 ms (0.738 ms / 100) 0.774 -> 0.741 ( -4.26%) [ +0.39% +0.39% +0.00% / +0.39% -4.01% -4.26%] index_select wrap : Elapsed 0.008 ms (0.777 ms / 100) 0.777 -> 0.741 ( -4.63%) [ +0.26% +0.00% +0.13% / -0.64% -4.63% -4.25%] index_select linear : Elapsed 0.008 ms (0.779 ms / 100) 0.775 -> 0.754 ( -2.71%) [ +0.00% +0.52% +0.13% / +0.90% -2.71% -2.45%] index_select reverse : Elapsed 0.008 ms (0.775 ms / 100) 0.741 -> 0.730 ( -1.48%) [ +0.27% +0.00% +0.00% / +0.27% -1.48% -1.21%] index_select skip64 : Elapsed 0.007 ms (0.743 ms / 100) 0.738 -> 0.733 ( -0.68%) [ +0.00% +0.41% +0.00% / +0.00% -0.68% -0.68%] index_select skip256 : Elapsed 0.007 ms (0.738 ms / 100) 0.756 -> 0.736 ( -2.65%) [ +0.00% +0.00% +0.00% / +0.26% -2.65% -2.65%] index_select spread : Elapsed 0.008 ms (0.756 ms / 100) 0.760 -> 0.738 ( -2.89%) [ +0.13% +0.26% +0.00% / +0.26% -2.76% -2.89%] index_select strided 3 : Elapsed 0.008 ms (0.761 ms / 100) 0.755 -> 0.737 ( -2.38%) [ +0.00% +0.13% +0.13% / -0.13% -2.12% -2.38%] index_select strided 5 : Elapsed 0.008 ms (0.755 ms / 100) 0.751 -> 0.729 ( -2.93%) [ +0.27% +0.00% +0.40% / +0.80% -2.93% -2.40%] index_select strided 7 : Elapsed 0.008 ms (0.753 ms / 100) 0.757 -> 0.746 ( -1.45%) [ +0.00% +0.13% +0.26% / +0.13% -1.45% -0.79%] index_select strided 8 : Elapsed 0.008 ms (0.757 ms / 100) 0.756 -> 0.731 ( -3.31%) [ +0.26% +0.00% +1.06% / +0.79% -3.31% -3.31%] index_select strided 16 : Elapsed 0.008 ms (0.758 ms / 100) 0.742 -> 0.733 ( -1.21%) [ +0.27% +0.13% +0.00% / +0.27% -1.21% -0.67%] index_select random : Elapsed 0.007 ms (0.744 ms / 100) 0.746 -> 0.739 ( -0.94%) [ +0.40% +0.27% +0.00% / +0.00% -0.80% -0.94%] index_select random_sorted : Elapsed 0.007 ms (0.749 ms / 100) 0.747 -> 0.745 ( -0.27%) [ +0.13% +0.00% +0.54% / +1.20% -0.27% +1.34%] index_select perm : Elapsed 0.007 ms (0.748 ms / 100) 0.743 -> 0.749 ( +0.81%) [ +0.54% +0.00% +0.67% / +0.81% +1.88% +2.42%] index_select perm_sorted : Elapsed 0.007 ms (0.747 ms / 100) B = [40, 5, 16, 4] (stride (1, 2560, 40, 640)) A = [40, 5, 16, 20] (stride (100, 1, 4000, 5)) dim = 3 2.155 -> 2.163 ( +0.37%) [ +0.00% +0.23% +0.19% / +0.70% +0.37% +0.74%] index_select const : Elapsed 0.022 ms (2.155 ms / 100) 2.164 -> 2.172 ( +0.37%) [ +0.00% +0.46% +0.28% / +0.37% +0.74% +0.51%] index_select wrap : Elapsed 0.022 ms (2.164 ms / 100) 2.171 -> 2.176 ( +0.23%) [ +0.37% +0.14% +0.00% / +0.23% +0.32% +0.41%] index_select linear : Elapsed 0.022 ms (2.179 ms / 100) 2.169 -> 2.172 ( +0.14%) [ +0.23% +0.32% +0.00% / +0.14% +0.92% +0.83%] index_select reverse : Elapsed 0.022 ms (2.174 ms / 100) 2.159 -> 2.157 ( -0.09%) [ +0.28% +0.00% +0.09% / -0.09% +0.60% +0.56%] index_select skip64 : Elapsed 0.022 ms (2.165 ms / 100) 2.154 -> 2.159 ( +0.23%) [ +0.19% +0.00% +0.28% / +0.23% +0.46% +0.23%] index_select skip256 : Elapsed 0.022 ms (2.158 ms / 100) 2.193 -> 2.194 ( +0.05%) [ +0.46% +0.00% +0.32% / +0.09% +0.41% +0.05%] index_select spread : Elapsed 0.022 ms (2.203 ms / 100) 2.187 -> 2.187 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.00% +0.23%] index_select strided 3 : Elapsed 0.022 ms (2.190 ms / 100) 2.196 -> 2.191 ( -0.23%) [ +0.00% +0.23% +0.09% / -0.23% +0.00% +0.41%] index_select strided 5 : Elapsed 0.022 ms (2.196 ms / 100) 2.192 -> 2.189 ( -0.14%) [ +0.27% +0.05% +0.00% / -0.14% +0.23% +0.05%] index_select strided 7 : Elapsed 0.022 ms (2.198 ms / 100) 2.195 -> 2.199 ( +0.18%) [ +0.05% +0.18% +0.00% / +0.18% +0.46% +0.50%] index_select strided 8 : Elapsed 0.022 ms (2.196 ms / 100) 2.186 -> 2.191 ( +0.23%) [ +0.09% +0.27% +0.00% / +0.23% +0.55% +0.55%] index_select strided 16 : Elapsed 0.022 ms (2.188 ms / 100) 2.181 -> 2.193 ( +0.55%) [ +0.41% +0.00% +0.09% / +0.55% +0.64% +0.64%] index_select random : Elapsed 0.022 ms (2.190 ms / 100) 2.184 -> 2.189 ( +0.23%) [ +0.27% +0.18% +0.00% / +0.23% +0.78% +0.46%] index_select random_sorted : Elapsed 0.022 ms (2.190 ms / 100) 2.192 -> 2.196 ( +0.18%) [ +0.00% +0.41% +0.14% / +0.27% +0.50% +0.18%] index_select perm : Elapsed 0.022 ms (2.192 ms / 100) 2.188 -> 2.189 ( +0.05%) [ +0.41% +0.00% +0.18% / +0.05% +0.23% +0.59%] index_select perm_sorted : Elapsed 0.022 ms (2.197 ms / 100) B = [40, 5, 16, 4] (stride (20, 4, 800, 1)) A = [40, 5, 16, 20] (stride (1600, 16, 1, 80)) dim = 3 0.774 -> 0.780 ( +0.78%) [ +0.00% +0.90% +0.00% / +0.78% +1.42% +1.03%] index_select const : Elapsed 0.008 ms (0.774 ms / 100) 0.799 -> 0.777 ( -2.75%) [ +0.25% +0.25% +0.00% / +1.13% -2.00% -2.75%] index_select wrap : Elapsed 0.008 ms (0.801 ms / 100) 0.799 -> 0.778 ( -2.63%) [ +0.00% +1.25% +1.25% / +0.00% -2.63% -2.00%] index_select linear : Elapsed 0.008 ms (0.799 ms / 100) 0.797 -> 0.790 ( -0.88%) [ +0.00% +0.25% +1.38% / +1.63% -0.88% -0.88%] index_select reverse : Elapsed 0.008 ms (0.797 ms / 100) 0.777 -> 0.772 ( -0.64%) [ +0.13% +0.00% +0.39% / +1.03% +0.26% -0.64%] index_select skip64 : Elapsed 0.008 ms (0.778 ms / 100) 0.772 -> 0.774 ( +0.26%) [ +0.26% +0.00% +0.65% / +0.26% +0.65% +1.68%] index_select skip256 : Elapsed 0.008 ms (0.774 ms / 100) 0.787 -> 0.774 ( -1.65%) [ +0.64% +0.00% +0.76% / +0.64% -0.76% -1.65%] index_select spread : Elapsed 0.008 ms (0.792 ms / 100) 0.789 -> 0.777 ( -1.52%) [ +0.00% +0.25% +0.51% / +0.63% -1.52% -1.14%] index_select strided 3 : Elapsed 0.008 ms (0.789 ms / 100) 0.778 -> 0.777 ( -0.13%) [ +0.64% +0.26% +0.00% / +2.06% -0.13% +0.26%] index_select strided 5 : Elapsed 0.008 ms (0.783 ms / 100) 0.787 -> 0.778 ( -1.14%) [ +0.00% +0.51% +0.51% / -0.13% -1.14% -1.14%] index_select strided 7 : Elapsed 0.008 ms (0.787 ms / 100) 0.786 -> 0.787 ( +0.13%) [ +0.13% +0.00% +0.38% / +0.13% +0.64% +0.51%] index_select strided 8 : Elapsed 0.008 ms (0.787 ms / 100) 0.782 -> 0.777 ( -0.64%) [ +0.51% +0.13% +0.00% / +0.13% -0.64% -0.64%] index_select strided 16 : Elapsed 0.008 ms (0.786 ms / 100) 0.770 -> 0.777 ( +0.91%) [ +0.91% +1.17% +0.00% / +1.69% +1.04% +0.91%] index_select random : Elapsed 0.008 ms (0.777 ms / 100) 0.776 -> 0.775 ( -0.13%) [ +1.03% +0.00% +1.42% / +0.77% -0.13% +0.13%] index_select random_sorted : Elapsed 0.008 ms (0.784 ms / 100) 0.787 -> 0.776 ( -1.40%) [ +0.51% +0.89% +0.00% / +1.14% -1.40% -1.27%] index_select perm : Elapsed 0.008 ms (0.791 ms / 100) 0.778 -> 0.774 ( -0.51%) [ +0.00% +0.77% +0.51% / +0.13% -0.13% -0.51%] index_select perm_sorted : Elapsed 0.008 ms (0.778 ms / 100) B = [40, 5, 16, 4] (stride (20, 4, 800, 1)) A = [40, 5, 16, 20] (stride (100, 1, 4000, 5)) dim = 3 2.055 -> 2.047 ( -0.39%) [ +0.10% +0.05% +0.00% / +0.05% +0.19% -0.39%] index_select const : Elapsed 0.021 ms (2.057 ms / 100) 2.065 -> 2.056 ( -0.44%) [ +0.10% +0.05% +0.00% / +0.19% -0.44% +0.34%] index_select wrap : Elapsed 0.021 ms (2.067 ms / 100) 2.063 -> 2.059 ( -0.19%) [ +0.00% +0.00% +0.05% / +0.05% +0.05% -0.19%] index_select linear : Elapsed 0.021 ms (2.063 ms / 100) 2.054 -> 2.050 ( -0.19%) [ +0.88% +0.00% +0.58% / +0.44% -0.19% -0.15%] index_select reverse : Elapsed 0.021 ms (2.072 ms / 100) 2.053 -> 2.056 ( +0.15%) [ +0.00% +0.24% +0.29% / +0.19% +0.34% +0.15%] index_select skip64 : Elapsed 0.021 ms (2.053 ms / 100) 2.053 -> 2.052 ( -0.05%) [ +0.00% +0.39% +0.00% / +0.10% +0.29% -0.05%] index_select skip256 : Elapsed 0.021 ms (2.053 ms / 100) 2.079 -> 2.078 ( -0.05%) [ +0.63% +0.19% +0.00% / +0.58% -0.05% +0.00%] index_select spread : Elapsed 0.021 ms (2.092 ms / 100) 2.074 -> 2.082 ( +0.39%) [ +0.24% +0.34% +0.00% / +0.39% +0.48% +0.53%] index_select strided 3 : Elapsed 0.021 ms (2.079 ms / 100) 2.086 -> 2.085 ( -0.05%) [ +0.38% +0.10% +0.00% / +0.24% +0.29% -0.05%] index_select strided 5 : Elapsed 0.021 ms (2.094 ms / 100) 2.074 -> 2.072 ( -0.10%) [ +0.00% +0.05% +0.05% / -0.10% +0.29% +0.53%] index_select strided 7 : Elapsed 0.021 ms (2.074 ms / 100) 2.077 -> 2.082 ( +0.24%) [ +0.10% +0.00% +0.00% / +0.24% +0.39% +0.67%] index_select strided 8 : Elapsed 0.021 ms (2.079 ms / 100) 2.079 -> 2.084 ( +0.24%) [ +0.00% +0.10% +0.19% / +0.24% +0.38% +0.24%] index_select strided 16 : Elapsed 0.021 ms (2.079 ms / 100) 2.057 -> 2.057 ( +0.00%) [ +0.00% +0.15% +0.10% / +0.00% +0.00% +0.44%] index_select random : Elapsed 0.021 ms (2.057 ms / 100) 2.069 -> 2.068 ( -0.05%) [ +0.19% +0.29% +0.00% / +0.19% -0.05% +0.14%] index_select random_sorted : Elapsed 0.021 ms (2.073 ms / 100) 2.076 -> 2.070 ( -0.29%) [ +0.39% +0.34% +0.00% / +0.29% +0.10% -0.29%] index_select perm : Elapsed 0.021 ms (2.084 ms / 100) 2.080 -> 2.078 ( -0.10%) [ +0.19% +0.19% +0.00% / +0.43% -0.10% +0.53%] index_select perm_sorted : Elapsed 0.021 ms (2.084 ms / 100) B = [40, 5, 16, 4] (stride (5, 1, 200, 3200)) A = [40, 5, 16, 20] (stride (100, 1, 4000, 5)) dim = 3 2.033 -> 2.037 ( +0.20%) [ +0.30% +0.20% +0.00% / +0.20% +0.64% +1.03%] index_select const : Elapsed 0.020 ms (2.039 ms / 100) 2.052 -> 2.061 ( +0.44%) [ +0.63% +0.34% +0.00% / +0.44% +0.78% +0.97%] index_select wrap : Elapsed 0.021 ms (2.065 ms / 100) 2.046 -> 2.052 ( +0.29%) [ +0.24% +0.00% +0.29% / +0.29% +0.73% +0.64%] index_select linear : Elapsed 0.021 ms (2.051 ms / 100) 2.043 -> 2.051 ( +0.39%) [ +0.78% +0.00% +0.54% / +0.39% +0.98% +1.27%] index_select reverse : Elapsed 0.021 ms (2.059 ms / 100) 2.038 -> 2.039 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.05% +0.44% +0.20%] index_select skip64 : Elapsed 0.020 ms (2.040 ms / 100) 2.032 -> 2.036 ( +0.20%) [ +0.00% +0.30% +0.05% / +0.20% +0.44% +0.79%] index_select skip256 : Elapsed 0.020 ms (2.032 ms / 100) 2.080 -> 2.077 ( -0.14%) [ +0.14% +0.29% +0.00% / -0.14% +0.34% +0.48%] index_select spread : Elapsed 0.021 ms (2.083 ms / 100) 2.079 -> 2.079 ( +0.00%) [ +0.38% +0.14% +0.00% / +0.00% +0.24% +0.24%] index_select strided 3 : Elapsed 0.021 ms (2.087 ms / 100) 2.077 -> 2.082 ( +0.24%) [ +0.63% +0.48% +0.00% / +0.58% +0.24% +0.53%] index_select strided 5 : Elapsed 0.021 ms (2.090 ms / 100) 2.067 -> 2.074 ( +0.34%) [ +0.19% +0.34% +0.00% / +0.44% +0.34% +0.73%] index_select strided 7 : Elapsed 0.021 ms (2.071 ms / 100) 2.077 -> 2.080 ( +0.14%) [ +0.00% +0.10% +0.19% / +0.14% +0.53% +0.43%] index_select strided 8 : Elapsed 0.021 ms (2.077 ms / 100) 2.076 -> 2.069 ( -0.34%) [ +0.00% +0.24% +0.14% / -0.34% +0.39% +0.48%] index_select strided 16 : Elapsed 0.021 ms (2.076 ms / 100) 2.071 -> 2.079 ( +0.39%) [ +0.53% +0.05% +0.00% / +0.39% +0.72% +0.63%] index_select random : Elapsed 0.021 ms (2.082 ms / 100) 2.075 -> 2.079 ( +0.19%) [ +0.53% +0.00% +0.14% / +0.19% +0.53% +0.58%] index_select random_sorted : Elapsed 0.021 ms (2.086 ms / 100) 2.065 -> 2.076 ( +0.53%) [ +0.19% +0.48% +0.00% / +0.53% +0.92% +0.92%] index_select perm : Elapsed 0.021 ms (2.069 ms / 100) 2.062 -> 2.071 ( +0.44%) [ +0.44% +0.53% +0.00% / +0.44% +1.36% +1.36%] index_select perm_sorted : Elapsed 0.021 ms (2.071 ms / 100) out_shape = [4, 5, 20, 16] in_shape = [40, 5, 20, 16] idx_dim = 0 B = [4, 5, 20, 16] (stride (1600, 320, 1, 20)) A = [40, 5, 20, 16] (stride (80, 1, 3200, 5)) dim = 0 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.63%] index_select const : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.71% +0.78%] index_select wrap : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.70% +0.55%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.71% +0.78%] index_select reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.78%] index_select skip64 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.78% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_select spread : Elapsed 0.013 ms (1.277 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.70% +0.70%] index_select strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.71% +0.55%] index_select strided 5 : Elapsed 0.013 ms (1.277 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.63%] index_select strided 7 : Elapsed 0.013 ms (1.280 ms / 100) 1.277 -> 1.280 ( +0.23%) [ +0.31% +0.16% +0.00% / +0.23% +0.63% +0.63%] index_select strided 8 : Elapsed 0.013 ms (1.281 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.39% +0.31%] index_select strided 16 : Elapsed 0.013 ms (1.279 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.47%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.47% +0.55%] index_select random_sorted : Elapsed 0.013 ms (1.278 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.47%] index_select perm : Elapsed 0.013 ms (1.279 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.23% +0.16%] index_select perm_sorted : Elapsed 0.013 ms (1.281 ms / 100) B = [4, 5, 20, 16] (stride (1600, 16, 80, 1)) dim = 0 fill_cnt = 40 2.690 -> 2.678 ( -0.45%) [ +0.11% +0.00% +0.15% / -0.45% +0.15% +0.07%] index_fill_ const : Elapsed 0.027 ms (2.693 ms / 100) 2.693 -> 2.687 ( -0.22%) [ +0.15% +0.11% +0.00% / -0.22% +0.19% +0.15%] index_fill_ linear : Elapsed 0.027 ms (2.697 ms / 100) 2.684 -> 2.678 ( -0.22%) [ +0.15% +0.00% +0.19% / -0.22% +0.26% +0.26%] index_fill_ reverse : Elapsed 0.027 ms (2.688 ms / 100) 2.690 -> 2.682 ( -0.30%) [ +0.00% +0.07% +0.07% / -0.30% +0.04% -0.19%] index_fill_ skip64 : Elapsed 0.027 ms (2.690 ms / 100) 2.696 -> 2.684 ( -0.45%) [ +0.22% +0.00% +0.19% / -0.45% -0.11% -0.26%] index_fill_ skip256 : Elapsed 0.027 ms (2.702 ms / 100) 2.689 -> 2.679 ( -0.37%) [ +0.11% +0.11% +0.00% / -0.37% -0.07% -0.15%] index_fill_ spread : Elapsed 0.027 ms (2.692 ms / 100) 2.692 -> 2.687 ( -0.19%) [ +0.22% +0.19% +0.00% / -0.19% +0.11% +0.00%] index_fill_ strided 3 : Elapsed 0.027 ms (2.698 ms / 100) 2.697 -> 2.692 ( -0.19%) [ +0.00% +0.11% +0.00% / -0.19% -0.15% -0.15%] index_fill_ random : Elapsed 0.027 ms (2.697 ms / 100) 2.690 -> 2.684 ( -0.22%) [ +0.00% +0.26% +0.04% / -0.22% -0.07% -0.04%] index_fill_ random_sorted : Elapsed 0.027 ms (2.690 ms / 100) B = [4, 5, 20, 16] (stride (16, 64, 320, 1)) A = [40, 5, 20, 16] (stride (1, 800, 40, 4000)) dim = 0 1.273 -> 1.276 ( +0.24%) [ +0.16% +0.08% +0.00% / +0.24% +0.63% +0.55%] index_select const : Elapsed 0.013 ms (1.275 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_select wrap : Elapsed 0.013 ms (1.276 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.78% +0.63%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.279 -> 1.282 ( +0.23%) [ +0.16% +0.00% +0.00% / +0.23% +0.55% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.281 ms / 100) 1.276 -> 1.275 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.63% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.63% +0.39%] index_select spread : Elapsed 0.013 ms (1.276 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_select strided 5 : Elapsed 0.013 ms (1.277 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.276 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.31%] index_select strided 8 : Elapsed 0.013 ms (1.281 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.31% +0.00% +0.00% / +0.16% +0.23% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.282 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.31% +0.31%] index_select random : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.39% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.278 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.31%] index_select perm : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.280 ms / 100) B = [4, 5, 20, 16] (stride (1, 64, 320, 4)) A = [40, 5, 20, 16] (stride (16, 640, 3200, 1)) dim = 0 1.272 -> 1.272 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.63%] index_select const : Elapsed 0.013 ms (1.273 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.55% +0.47%] index_select wrap : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.55%] index_select linear : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.47% +0.55%] index_select reverse : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.55% +0.39%] index_select skip256 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.39%] index_select spread : Elapsed 0.013 ms (1.274 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.31% +0.31%] index_select strided 3 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.47% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.275 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.47% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.275 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.55% +0.47%] index_select random : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.55% +0.55%] index_select random_sorted : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.55%] index_select perm : Elapsed 0.013 ms (1.274 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.55% / +0.08% +0.55% +0.55%] index_select perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) B = [4, 5, 20, 16] (stride (1, 4, 320, 20)) A = [40, 5, 20, 16] (stride (5, 1, 3200, 200)) dim = 0 0.601 -> 0.600 ( -0.17%) [ +0.00% +0.00% +0.00% / -0.17% +0.17% +0.00%] index_select const : Elapsed 0.006 ms (0.601 ms / 100) 0.598 -> 0.601 ( +0.50%) [ +4.68% +0.00% +0.00% / +0.50% +0.50% +0.50%] index_select wrap : Elapsed 0.006 ms (0.626 ms / 100) 0.601 -> 0.600 ( -0.17%) [ +0.00% +0.17% +0.17% / +0.17% -0.17% +0.00%] index_select linear : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.601 ( +0.33%) [ +0.17% +0.00% +0.17% / +0.33% +0.33% +0.33%] index_select reverse : Elapsed 0.006 ms (0.600 ms / 100) 0.601 -> 0.602 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.17% +0.17%] index_select skip64 : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.597 ( -0.33%) [ +0.00% +0.00% +0.83% / -0.33% +0.33% +0.33%] index_select skip256 : Elapsed 0.006 ms (0.599 ms / 100) 0.598 -> 0.602 ( +0.67%) [ +0.00% +0.17% +0.00% / +0.67% +0.67% +0.67%] index_select spread : Elapsed 0.006 ms (0.598 ms / 100) 0.601 -> 0.601 ( +0.00%) [ +0.17% +0.17% +0.00% / +3.66% +0.00% +0.17%] index_select strided 3 : Elapsed 0.006 ms (0.602 ms / 100) 0.597 -> 0.602 ( +0.84%) [ +0.67% +0.84% +0.00% / +0.84% +0.84% +0.84%] index_select strided 5 : Elapsed 0.006 ms (0.601 ms / 100) 0.601 -> 0.601 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.17% +0.00%] index_select strided 7 : Elapsed 0.006 ms (0.601 ms / 100) 0.598 -> 0.600 ( +0.33%) [ +0.67% +0.33% +0.00% / +0.67% +0.33% +0.67%] index_select strided 8 : Elapsed 0.006 ms (0.602 ms / 100) 0.599 -> 0.602 ( +0.50%) [ +0.50% +0.50% +0.00% / +4.17% +0.67% +0.50%] index_select strided 16 : Elapsed 0.006 ms (0.602 ms / 100) 0.598 -> 0.597 ( -0.17%) [ +0.50% +0.00% +0.33% / +0.67% -0.17% +0.17%] index_select random : Elapsed 0.006 ms (0.601 ms / 100) 0.601 -> 0.601 ( +0.00%) [ +0.17% +0.00% +0.17% / +0.17% +0.00% +0.17%] index_select random_sorted : Elapsed 0.006 ms (0.602 ms / 100) 0.600 -> 0.600 ( +0.00%) [ +0.33% +0.00% +0.33% / +0.00% +0.50% +0.33%] index_select perm : Elapsed 0.006 ms (0.602 ms / 100) 0.601 -> 0.601 ( +0.00%) [ +0.83% +0.00% +3.16% / +0.00% +0.17% +0.00%] index_select perm_sorted : Elapsed 0.006 ms (0.606 ms / 100) B = [4, 5, 20, 16] (stride (100, 1, 5, 400)) A = [40, 5, 20, 16] (stride (100, 1, 5, 4000)) dim = 0 1.350 -> 1.352 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.74% +0.74%] index_select const : Elapsed 0.014 ms (1.352 ms / 100) 1.330 -> 1.329 ( -0.08%) [ +0.38% +0.00% +0.30% / -0.08% +0.60% +0.60%] index_select wrap : Elapsed 0.013 ms (1.335 ms / 100) 1.331 -> 1.330 ( -0.08%) [ +0.30% +0.00% +0.15% / -0.08% +0.75% +0.38%] index_select linear : Elapsed 0.013 ms (1.335 ms / 100) 1.334 -> 1.335 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.52% +0.52%] index_select reverse : Elapsed 0.013 ms (1.336 ms / 100) 1.351 -> 1.354 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.67% +0.67%] index_select skip64 : Elapsed 0.014 ms (1.352 ms / 100) 1.354 -> 1.352 ( -0.15%) [ +0.07% +0.00% +0.00% / -0.15% +0.30% +0.52%] index_select skip256 : Elapsed 0.014 ms (1.355 ms / 100) 1.331 -> 1.333 ( +0.15%) [ +0.00% +0.00% +0.08% / +0.15% +0.60% +0.45%] index_select spread : Elapsed 0.013 ms (1.331 ms / 100) 1.339 -> 1.339 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.30% +0.30%] index_select strided 3 : Elapsed 0.013 ms (1.339 ms / 100) 1.342 -> 1.344 ( +0.15%) [ +0.22% +0.00% +0.30% / +0.15% +0.30% +0.37%] index_select strided 5 : Elapsed 0.013 ms (1.345 ms / 100) 1.337 -> 1.338 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.67% +0.75%] index_select strided 7 : Elapsed 0.013 ms (1.338 ms / 100) 1.347 -> 1.348 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.67% +0.59%] index_select strided 8 : Elapsed 0.013 ms (1.348 ms / 100) 1.345 -> 1.349 ( +0.30%) [ +0.15% +0.07% +0.00% / +0.30% +0.67% +0.89%] index_select strided 16 : Elapsed 0.013 ms (1.347 ms / 100) 1.338 -> 1.340 ( +0.15%) [ +0.22% +0.07% +0.00% / +0.15% +0.90% +0.90%] index_select random : Elapsed 0.013 ms (1.341 ms / 100) 1.334 -> 1.336 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.60% +0.60%] index_select random_sorted : Elapsed 0.013 ms (1.335 ms / 100) 1.323 -> 1.323 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.53% +0.60%] index_select perm : Elapsed 0.013 ms (1.323 ms / 100) 1.320 -> 1.319 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.15% +0.15%] index_select perm_sorted : Elapsed 0.013 ms (1.321 ms / 100) B = [4, 5, 20, 16] (stride (20, 80, 1, 400)) A = [40, 5, 20, 16] (stride (20, 12800, 1, 800)) dim = 0 1.315 -> 1.319 ( +0.30%) [ +0.61% +0.23% +0.00% / +0.38% +0.30% +0.38%] index_select const : Elapsed 0.013 ms (1.323 ms / 100) 1.302 -> 1.305 ( +0.23%) [ +0.15% +0.23% +0.00% / +0.23% +0.92% +0.61%] index_select wrap : Elapsed 0.013 ms (1.304 ms / 100) 1.308 -> 1.311 ( +0.23%) [ +0.31% +0.23% +0.00% / +0.23% +0.84% +0.76%] index_select linear : Elapsed 0.013 ms (1.312 ms / 100) 1.309 -> 1.311 ( +0.15%) [ +0.00% +0.15% +0.08% / +0.15% +0.92% +0.84%] index_select reverse : Elapsed 0.013 ms (1.309 ms / 100) 1.309 -> 1.310 ( +0.08%) [ +0.08% +0.00% +0.15% / +0.08% +0.69% +0.76%] index_select skip64 : Elapsed 0.013 ms (1.310 ms / 100) 1.309 -> 1.312 ( +0.23%) [ +0.15% +0.00% +0.08% / +0.23% +0.84% +0.76%] index_select skip256 : Elapsed 0.013 ms (1.311 ms / 100) 1.308 -> 1.309 ( +0.08%) [ +0.00% +0.00% +0.15% / +0.08% +0.92% +0.76%] index_select spread : Elapsed 0.013 ms (1.308 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.68% +0.61%] index_select strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.317 -> 1.319 ( +0.15%) [ +0.08% +0.00% +0.00% / +0.23% +0.15% +0.15%] index_select strided 5 : Elapsed 0.013 ms (1.318 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.54% +0.62%] index_select strided 7 : Elapsed 0.013 ms (1.288 ms / 100) 1.297 -> 1.298 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.62% +0.46%] index_select strided 8 : Elapsed 0.013 ms (1.298 ms / 100) 1.293 -> 1.294 ( +0.08%) [ +0.31% +0.00% +0.00% / +0.08% +0.62% +0.62%] index_select strided 16 : Elapsed 0.013 ms (1.297 ms / 100) 1.304 -> 1.306 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.54% +0.54%] index_select random : Elapsed 0.013 ms (1.305 ms / 100) 1.297 -> 1.300 ( +0.23%) [ +0.23% +0.00% +0.00% / +0.23% +0.39% +0.54%] index_select random_sorted : Elapsed 0.013 ms (1.300 ms / 100) 1.310 -> 1.311 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.46% +0.46%] index_select perm : Elapsed 0.013 ms (1.312 ms / 100) 1.304 -> 1.306 ( +0.15%) [ +0.00% +0.38% +0.23% / +0.15% +1.07% +0.69%] index_select perm_sorted : Elapsed 0.013 ms (1.304 ms / 100) B = [4, 5, 20, 16] (stride (20, 80, 1, 400)) A = [40, 5, 20, 16] (stride (5, 1, 3200, 200)) dim = 0 1.278 -> 1.280 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.70% +0.63%] index_select const : Elapsed 0.013 ms (1.280 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.55%] index_select wrap : Elapsed 0.013 ms (1.276 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.86% +0.70%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.78% +0.70%] index_select reverse : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.78% +0.78%] index_select skip64 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.71% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.277 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.63% +0.63%] index_select spread : Elapsed 0.013 ms (1.280 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.00% +0.23% / +0.08% +0.70% +0.63%] index_select strided 5 : Elapsed 0.013 ms (1.279 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.55% +0.47%] index_select strided 7 : Elapsed 0.013 ms (1.280 ms / 100) 1.281 -> 1.284 ( +0.23%) [ +0.23% +0.23% +0.00% / +0.23% +0.55% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.284 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.279 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select random : Elapsed 0.013 ms (1.281 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select random_sorted : Elapsed 0.013 ms (1.282 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.63%] index_select perm : Elapsed 0.013 ms (1.279 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.55% +0.47%] index_select perm_sorted : Elapsed 0.013 ms (1.282 ms / 100) B = [4, 5, 20, 16] (stride (1, 80, 4, 400)) A = [40, 5, 20, 16] (stride (100, 20, 1, 4000)) dim = 0 0.587 -> 0.587 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.17% +0.00%] index_select const : Elapsed 0.006 ms (0.587 ms / 100) 0.586 -> 0.585 ( -0.17%) [ +0.00% +0.17% +0.85% / -0.17% +0.51% +0.51%] index_select wrap : Elapsed 0.006 ms (0.586 ms / 100) 0.585 -> 0.586 ( +0.17%) [ +0.17% +0.34% +0.00% / +0.17% +0.68% +0.51%] index_select linear : Elapsed 0.006 ms (0.586 ms / 100) 0.588 -> 0.587 ( -0.17%) [ +0.34% +0.17% +0.00% / +0.00% +0.00% -0.17%] index_select reverse : Elapsed 0.006 ms (0.590 ms / 100) 0.587 -> 0.587 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.17% +0.00%] index_select skip64 : Elapsed 0.006 ms (0.587 ms / 100) 0.585 -> 0.586 ( +0.17%) [ +0.00% +0.00% +0.17% / +0.17% +1.71% +0.51%] index_select skip256 : Elapsed 0.006 ms (0.585 ms / 100) 0.599 -> 0.596 ( -0.50%) [ +0.00% +0.17% +0.00% / -0.50% +0.33% +0.17%] index_select spread : Elapsed 0.006 ms (0.599 ms / 100) 0.585 -> 0.587 ( +0.34%) [ +0.17% +0.17% +0.00% / +0.34% +0.85% +0.68%] index_select strided 3 : Elapsed 0.006 ms (0.586 ms / 100) 0.587 -> 0.589 ( +0.34%) [ +0.00% +0.17% +0.17% / +0.34% +0.68% +0.51%] index_select strided 5 : Elapsed 0.006 ms (0.587 ms / 100) 0.588 -> 0.588 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.34% +0.34%] index_select strided 7 : Elapsed 0.006 ms (0.588 ms / 100) 0.590 -> 0.588 ( -0.34%) [ +0.00% +0.68% +0.00% / +0.00% -0.34% -0.17%] index_select strided 8 : Elapsed 0.006 ms (0.590 ms / 100) 0.590 -> 0.589 ( -0.17%) [ +0.00% +0.00% +0.00% / -0.17% +0.00% -0.17%] index_select strided 16 : Elapsed 0.006 ms (0.590 ms / 100) 0.588 -> 0.588 ( +0.00%) [ +0.17% +0.51% +0.00% / +0.17% +3.57% +0.00%] index_select random : Elapsed 0.006 ms (0.589 ms / 100) 0.589 -> 0.589 ( +0.00%) [ +0.00% +5.09% +0.00% / +0.00% +0.17% +0.17%] index_select random_sorted : Elapsed 0.006 ms (0.589 ms / 100) 0.589 -> 0.590 ( +0.17%) [ +0.00% +0.34% +0.00% / +0.17% +0.51% +0.34%] index_select perm : Elapsed 0.006 ms (0.589 ms / 100) 0.586 -> 0.585 ( -0.17%) [ +0.00% +0.00% +0.00% / -0.17% +0.34% +0.51%] index_select perm_sorted : Elapsed 0.006 ms (0.586 ms / 100) B = [4, 5, 20, 16] (stride (1, 80, 4, 400)) A = [40, 5, 20, 16] (stride (1, 40, 200, 4000)) dim = 0 1.281 -> 1.280 ( -0.08%) [ +0.16% +0.08% +0.00% / -0.08% +0.55% +0.47%] index_select const : Elapsed 0.013 ms (1.283 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.70% +0.70%] index_select wrap : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.78%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.78% +0.78%] index_select reverse : Elapsed 0.013 ms (1.281 ms / 100) 1.276 -> 1.279 ( +0.24%) [ +0.24% +0.00% +0.08% / +0.24% +0.71% +0.78%] index_select skip64 : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.63% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.278 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.62% +0.62%] index_select spread : Elapsed 0.013 ms (1.283 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.63% +0.78%] index_select strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.63% +0.78%] index_select strided 5 : Elapsed 0.013 ms (1.279 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.279 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.55% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.282 ms / 100) 1.281 -> 1.280 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.55% +0.47%] index_select strided 16 : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_select random : Elapsed 0.013 ms (1.281 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.282 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.23% +0.23% +0.00% / +0.08% +0.70% +0.63%] index_select perm : Elapsed 0.013 ms (1.283 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.00% +0.63% +0.39%] index_select perm_sorted : Elapsed 0.013 ms (1.282 ms / 100) out_shape = [40, 4, 20, 16] in_shape = [40, 5, 20, 16] idx_dim = 1 B = [40, 4, 20, 16] (stride (1280, 320, 16, 1)) A = [40, 5, 20, 16] (stride (100, 20, 1, 4000)) dim = 1 5.541 -> 5.517 ( -0.43%) [ +0.07% +0.00% +0.05% / +0.07% -0.36% -0.43%] index_select const : Elapsed 0.055 ms (5.545 ms / 100) 5.594 -> 5.588 ( -0.11%) [ +0.00% +0.11% +0.20% / +0.16% -0.11% -0.09%] index_select wrap : Elapsed 0.056 ms (5.594 ms / 100) 5.589 -> 5.589 ( +0.00%) [ +0.20% +0.00% +0.14% / +0.25% +0.00% +0.00%] index_select linear : Elapsed 0.056 ms (5.600 ms / 100) 5.596 -> 5.584 ( -0.21%) [ +0.00% +0.00% +0.07% / -0.02% -0.21% -0.07%] index_select reverse : Elapsed 0.056 ms (5.596 ms / 100) 5.538 -> 5.518 ( -0.36%) [ +0.02% +0.00% +0.13% / +0.05% -0.36% -0.33%] index_select skip64 : Elapsed 0.055 ms (5.539 ms / 100) 5.537 -> 5.514 ( -0.42%) [ +0.00% +0.16% +0.09% / +0.11% -0.05% -0.42%] index_select skip256 : Elapsed 0.055 ms (5.537 ms / 100) 5.592 -> 5.587 ( -0.09%) [ +0.00% +0.13% +0.09% / +0.07% -0.09% -0.07%] index_select spread : Elapsed 0.056 ms (5.592 ms / 100) 5.606 -> 5.593 ( -0.23%) [ +0.04% +0.00% +0.11% / +0.27% -0.23% -0.07%] index_select strided 3 : Elapsed 0.056 ms (5.608 ms / 100) 5.578 -> 5.571 ( -0.13%) [ +0.00% +0.09% +0.07% / +0.16% -0.13% -0.11%] index_select random : Elapsed 0.056 ms (5.578 ms / 100) 5.569 -> 5.563 ( -0.11%) [ +0.00% +0.14% +0.14% / +0.11% -0.07% -0.11%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) 5.600 -> 5.594 ( -0.11%) [ +0.00% +0.02% +0.04% / +0.07% -0.11% -0.04%] index_select perm : Elapsed 0.056 ms (5.600 ms / 100) 5.595 -> 5.591 ( -0.07%) [ +0.00% +0.05% +0.07% / +0.00% -0.04% -0.07%] index_select perm_sorted : Elapsed 0.056 ms (5.595 ms / 100) B = [40, 4, 20, 16] (stride (1280, 20, 1, 80)) A = [40, 5, 20, 16] (stride (1600, 20, 1, 100)) dim = 1 5.848 -> 5.837 ( -0.19%) [ +0.07% +0.02% +0.00% / -0.02% -0.03% -0.19%] index_select const : Elapsed 0.059 ms (5.852 ms / 100) 5.907 -> 5.894 ( -0.22%) [ +0.05% +0.00% +0.14% / +0.02% -0.07% -0.22%] index_select wrap : Elapsed 0.059 ms (5.910 ms / 100) 5.914 -> 5.897 ( -0.29%) [ +0.00% +0.07% +0.02% / +0.03% -0.27% -0.29%] index_select linear : Elapsed 0.059 ms (5.914 ms / 100) 5.903 -> 5.891 ( -0.20%) [ +0.22% +0.00% +0.12% / +0.17% -0.08% -0.20%] index_select reverse : Elapsed 0.059 ms (5.916 ms / 100) 5.850 -> 5.844 ( -0.10%) [ +0.02% +0.00% +0.07% / +0.14% -0.10% -0.10%] index_select skip64 : Elapsed 0.059 ms (5.851 ms / 100) 5.849 -> 5.837 ( -0.21%) [ +0.03% +0.00% +0.07% / +0.10% -0.21% -0.07%] index_select skip256 : Elapsed 0.059 ms (5.851 ms / 100) 5.906 -> 5.895 ( -0.19%) [ +0.00% +0.05% +0.08% / +0.12% -0.10% -0.19%] index_select spread : Elapsed 0.059 ms (5.906 ms / 100) 5.917 -> 5.910 ( -0.12%) [ +0.07% +0.00% +0.00% / +0.14% -0.10% -0.12%] index_select strided 3 : Elapsed 0.059 ms (5.921 ms / 100) 5.891 -> 5.882 ( -0.15%) [ +0.17% +0.10% +0.00% / +0.14% -0.15% -0.14%] index_select random : Elapsed 0.059 ms (5.901 ms / 100) 5.891 -> 5.883 ( -0.14%) [ +0.00% +0.03% +0.00% / +0.10% -0.10% -0.14%] index_select random_sorted : Elapsed 0.059 ms (5.891 ms / 100) 5.908 -> 5.906 ( -0.03%) [ +0.20% +0.00% +0.25% / +0.10% +0.02% -0.03%] index_select perm : Elapsed 0.059 ms (5.920 ms / 100) 5.909 -> 5.896 ( -0.22%) [ +0.05% +0.00% +0.24% / +0.20% -0.15% -0.22%] index_select perm_sorted : Elapsed 0.059 ms (5.912 ms / 100) B = [40, 4, 20, 16] (stride (1, 12800, 40, 800)) A = [40, 5, 20, 16] (stride (1600, 320, 16, 1)) dim = 1 5.414 -> 5.409 ( -0.09%) [ +0.09% +0.09% +0.00% / +0.04% +0.00% -0.09%] index_select const : Elapsed 0.054 ms (5.419 ms / 100) 5.510 -> 5.492 ( -0.33%) [ +0.09% +0.02% +0.00% / -0.05% -0.31% -0.33%] index_select wrap : Elapsed 0.055 ms (5.515 ms / 100) 5.494 -> 5.498 ( +0.07%) [ +0.11% +0.00% +0.09% / +0.09% +0.27% +0.07%] index_select linear : Elapsed 0.055 ms (5.500 ms / 100) 5.504 -> 5.486 ( -0.33%) [ +0.09% +0.00% +0.05% / +0.13% -0.33% -0.18%] index_select reverse : Elapsed 0.055 ms (5.509 ms / 100) 5.402 -> 5.408 ( +0.11%) [ +0.07% +0.15% +0.00% / +0.17% +0.22% +0.11%] index_select skip64 : Elapsed 0.054 ms (5.406 ms / 100) 5.406 -> 5.412 ( +0.11%) [ +0.15% +0.00% +0.09% / +0.11% +0.15% +0.11%] index_select skip256 : Elapsed 0.054 ms (5.414 ms / 100) 5.508 -> 5.492 ( -0.29%) [ +0.22% +0.00% +0.13% / +0.13% -0.22% -0.29%] index_select spread : Elapsed 0.055 ms (5.520 ms / 100) 5.499 -> 5.489 ( -0.18%) [ +0.02% +0.00% +0.00% / +0.05% -0.18% +0.04%] index_select strided 3 : Elapsed 0.055 ms (5.500 ms / 100) 5.450 -> 5.447 ( -0.06%) [ +0.00% +0.20% +0.17% / +0.26% +0.06% -0.06%] index_select random : Elapsed 0.055 ms (5.450 ms / 100) 5.457 -> 5.436 ( -0.38%) [ +0.02% +0.00% +0.05% / +0.18% -0.38% -0.15%] index_select random_sorted : Elapsed 0.055 ms (5.458 ms / 100) 5.503 -> 5.498 ( -0.09%) [ +0.13% +0.16% +0.00% / +0.18% -0.09% +0.00%] index_select perm : Elapsed 0.055 ms (5.510 ms / 100) 5.498 -> 5.492 ( -0.11%) [ +0.11% +0.15% +0.00% / +0.15% -0.09% -0.11%] index_select perm_sorted : Elapsed 0.055 ms (5.504 ms / 100) B = [40, 4, 20, 16] (stride (1, 640, 2560, 40)) A = [40, 5, 20, 16] (stride (20, 800, 1, 4000)) dim = 1 5.742 -> 5.744 ( +0.03%) [ +0.00% +0.09% +0.03% / +0.03% +0.12% +0.05%] index_select const : Elapsed 0.057 ms (5.742 ms / 100) 5.805 -> 5.814 ( +0.16%) [ +0.31% +0.00% +0.26% / +0.21% +0.17% +0.16%] index_select wrap : Elapsed 0.058 ms (5.823 ms / 100) 5.815 -> 5.815 ( +0.00%) [ +0.05% +0.00% +0.14% / +0.15% +0.00% +0.00%] index_select linear : Elapsed 0.058 ms (5.818 ms / 100) 5.848 -> 5.826 ( -0.38%) [ +0.00% +0.03% +0.14% / +0.07% -0.38% -0.26%] index_select reverse : Elapsed 0.058 ms (5.848 ms / 100) 5.742 -> 5.738 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.17% -0.02%] index_select skip64 : Elapsed 0.057 ms (5.746 ms / 100) 5.747 -> 5.746 ( -0.02%) [ +0.03% +0.00% +0.09% / +0.03% +0.02% -0.02%] index_select skip256 : Elapsed 0.057 ms (5.749 ms / 100) 5.813 -> 5.813 ( +0.00%) [ +0.00% +0.07% +0.29% / +0.00% +0.02% +0.12%] index_select spread : Elapsed 0.058 ms (5.813 ms / 100) 5.817 -> 5.810 ( -0.12%) [ +0.03% +0.00% +0.15% / +0.15% -0.12% -0.02%] index_select strided 3 : Elapsed 0.058 ms (5.819 ms / 100) 5.812 -> 5.827 ( +0.26%) [ +0.15% +0.00% +0.29% / +0.28% +0.34% +0.26%] index_select random : Elapsed 0.058 ms (5.821 ms / 100) 5.817 -> 5.818 ( +0.02%) [ +0.12% +0.00% +0.12% / +0.05% +0.02% +0.03%] index_select random_sorted : Elapsed 0.058 ms (5.824 ms / 100) 5.840 -> 5.842 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.09% +0.03% +0.15%] index_select perm : Elapsed 0.058 ms (5.843 ms / 100) 5.814 -> 5.816 ( +0.03%) [ +0.02% +0.00% +0.03% / +0.12% +0.12% +0.03%] index_select perm_sorted : Elapsed 0.058 ms (5.815 ms / 100) B = [40, 4, 20, 16] (stride (80, 20, 1, 3200)) A = [40, 5, 20, 16] (stride (16, 640, 3200, 1)) dim = 1 5.761 -> 5.728 ( -0.57%) [ +0.00% +0.05% +0.09% / +0.10% -0.56% -0.57%] index_select const : Elapsed 0.058 ms (5.761 ms / 100) 5.841 -> 5.830 ( -0.19%) [ +0.07% +0.07% +0.00% / +0.12% -0.19% +0.00%] index_select wrap : Elapsed 0.058 ms (5.845 ms / 100) 5.830 -> 5.823 ( -0.12%) [ +0.05% +0.00% +0.10% / +0.19% -0.12% -0.03%] index_select linear : Elapsed 0.058 ms (5.833 ms / 100) 5.836 -> 5.833 ( -0.05%) [ +0.03% +0.00% +0.14% / -0.05% +0.09% +0.05%] index_select reverse : Elapsed 0.058 ms (5.838 ms / 100) 5.740 -> 5.711 ( -0.51%) [ +0.09% +0.00% +0.23% / +0.21% -0.51% -0.37%] index_select skip64 : Elapsed 0.057 ms (5.745 ms / 100) 5.743 -> 5.715 ( -0.49%) [ +0.00% +0.14% +0.16% / +0.10% -0.42% -0.49%] index_select skip256 : Elapsed 0.057 ms (5.743 ms / 100) 5.840 -> 5.834 ( -0.10%) [ +0.09% +0.02% +0.00% / +0.00% -0.07% -0.10%] index_select spread : Elapsed 0.058 ms (5.845 ms / 100) 5.836 -> 5.820 ( -0.27%) [ +0.00% +0.00% +0.02% / +0.14% -0.24% -0.27%] index_select strided 3 : Elapsed 0.058 ms (5.836 ms / 100) 5.828 -> 5.826 ( -0.03%) [ +0.00% +0.00% +0.15% / +0.02% -0.03% +0.02%] index_select random : Elapsed 0.058 ms (5.828 ms / 100) 5.823 -> 5.825 ( +0.03%) [ +0.03% +0.00% +0.27% / +0.15% +0.03% +0.14%] index_select random_sorted : Elapsed 0.058 ms (5.825 ms / 100) 5.846 -> 5.844 ( -0.03%) [ +0.07% +0.10% +0.00% / +0.02% -0.03% -0.03%] index_select perm : Elapsed 0.058 ms (5.850 ms / 100) 5.831 -> 5.817 ( -0.24%) [ +0.00% +0.09% +0.09% / +0.19% -0.12% -0.24%] index_select perm_sorted : Elapsed 0.058 ms (5.831 ms / 100) B = [40, 4, 20, 16] (stride (80, 1, 4, 3200)) A = [40, 5, 20, 16] (stride (20, 12800, 1, 800)) dim = 1 5.831 -> 5.822 ( -0.15%) [ +0.03% +0.00% +0.14% / +0.03% -0.09% -0.15%] index_select const : Elapsed 0.058 ms (5.833 ms / 100) 5.905 -> 5.883 ( -0.37%) [ +0.17% +0.00% +0.03% / +0.10% -0.37% -0.36%] index_select wrap : Elapsed 0.059 ms (5.915 ms / 100) 5.905 -> 5.884 ( -0.36%) [ +0.02% +0.00% +0.10% / -0.05% -0.29% -0.36%] index_select linear : Elapsed 0.059 ms (5.906 ms / 100) 5.896 -> 5.882 ( -0.24%) [ +0.00% +0.15% +0.02% / +0.15% -0.20% -0.24%] index_select reverse : Elapsed 0.059 ms (5.896 ms / 100) 5.831 -> 5.822 ( -0.15%) [ +0.00% +0.00% +0.10% / +0.05% +0.00% -0.15%] index_select skip64 : Elapsed 0.058 ms (5.831 ms / 100) 5.827 -> 5.827 ( +0.00%) [ +0.03% +0.00% +0.07% / +0.10% +0.03% +0.00%] index_select skip256 : Elapsed 0.058 ms (5.829 ms / 100) 5.905 -> 5.887 ( -0.30%) [ +0.00% +0.03% +0.02% / +0.22% -0.30% -0.22%] index_select spread : Elapsed 0.059 ms (5.905 ms / 100) 5.901 -> 5.888 ( -0.22%) [ +0.03% +0.00% +0.20% / +0.14% -0.22% -0.22%] index_select strided 3 : Elapsed 0.059 ms (5.903 ms / 100) 5.863 -> 5.868 ( +0.09%) [ +0.00% +0.10% +0.22% / +0.20% +0.09% +0.27%] index_select random : Elapsed 0.059 ms (5.863 ms / 100) 5.872 -> 5.866 ( -0.10%) [ +0.05% +0.00% +0.05% / -0.07% -0.10% -0.03%] index_select random_sorted : Elapsed 0.059 ms (5.875 ms / 100) 5.903 -> 5.883 ( -0.34%) [ +0.00% +0.02% +0.05% / +0.00% -0.34% -0.32%] index_select perm : Elapsed 0.059 ms (5.903 ms / 100) 5.893 -> 5.892 ( -0.02%) [ +0.00% +0.05% +0.14% / +0.17% -0.02% +0.00%] index_select perm_sorted : Elapsed 0.059 ms (5.893 ms / 100) out_shape = [40, 5, 4, 16] in_shape = [40, 5, 20, 16] idx_dim = 2 B = [40, 5, 4, 16] (stride (64, 2560, 1, 4)) A = [40, 5, 20, 16] (stride (1, 12800, 640, 40)) dim = 2 2.171 -> 2.174 ( +0.14%) [ +0.00% +0.18% +0.05% / +0.14% +0.74% +0.51%] index_select const : Elapsed 0.022 ms (2.171 ms / 100) 2.178 -> 2.180 ( +0.09%) [ +0.05% +0.14% +0.00% / +0.09% +0.83% +0.69%] index_select wrap : Elapsed 0.022 ms (2.179 ms / 100) 2.178 -> 2.176 ( -0.09%) [ +0.00% +0.14% +0.18% / -0.09% +0.78% +0.64%] index_select linear : Elapsed 0.022 ms (2.178 ms / 100) 2.178 -> 2.181 ( +0.14%) [ +0.05% +0.00% +0.09% / +0.14% +0.78% +0.64%] index_select reverse : Elapsed 0.022 ms (2.179 ms / 100) 2.177 -> 2.176 ( -0.05%) [ +0.09% +0.18% +0.00% / -0.05% +0.55% +0.64%] index_select skip64 : Elapsed 0.022 ms (2.179 ms / 100) 2.173 -> 2.175 ( +0.09%) [ +0.28% +0.00% +0.09% / +0.09% +0.64% +0.64%] index_select skip256 : Elapsed 0.022 ms (2.179 ms / 100) 2.184 -> 2.183 ( -0.05%) [ +0.00% +0.09% +0.05% / -0.05% +0.46% +0.78%] index_select spread : Elapsed 0.022 ms (2.184 ms / 100) 2.182 -> 2.181 ( -0.05%) [ +0.23% +0.00% +0.05% / -0.05% +0.55% +0.69%] index_select strided 3 : Elapsed 0.022 ms (2.187 ms / 100) 2.179 -> 2.183 ( +0.18%) [ +0.18% +0.00% +0.05% / +0.18% +0.69% +0.64%] index_select strided 5 : Elapsed 0.022 ms (2.183 ms / 100) 2.184 -> 2.188 ( +0.18%) [ +0.00% +0.05% +0.05% / +0.18% +0.64% +0.50%] index_select strided 7 : Elapsed 0.022 ms (2.184 ms / 100) 2.179 -> 2.183 ( +0.18%) [ +0.00% +0.14% +0.00% / +0.18% +0.92% +0.87%] index_select strided 8 : Elapsed 0.022 ms (2.179 ms / 100) 2.177 -> 2.180 ( +0.14%) [ +0.28% +0.28% +0.00% / +0.14% +0.60% +0.92%] index_select strided 16 : Elapsed 0.022 ms (2.183 ms / 100) 2.187 -> 2.187 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.37% +0.27%] index_select random : Elapsed 0.022 ms (2.188 ms / 100) 2.188 -> 2.188 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.00% +0.55% +0.50%] index_select random_sorted : Elapsed 0.022 ms (2.189 ms / 100) 2.186 -> 2.186 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.00% +0.37% +0.55%] index_select perm : Elapsed 0.022 ms (2.188 ms / 100) 2.186 -> 2.187 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.46% +0.32%] index_select perm_sorted : Elapsed 0.022 ms (2.187 ms / 100) B = [40, 5, 4, 16] (stride (80, 1, 3200, 5)) A = [40, 5, 20, 16] (stride (320, 12800, 1, 20)) dim = 2 2.148 -> 2.146 ( -0.09%) [ +0.05% +0.14% +0.00% / +0.14% -0.05% -0.09%] index_select const : Elapsed 0.021 ms (2.149 ms / 100) 2.141 -> 2.138 ( -0.14%) [ +0.00% +0.37% +0.19% / +0.00% -0.05% -0.14%] index_select wrap : Elapsed 0.021 ms (2.141 ms / 100) 2.142 -> 2.137 ( -0.23%) [ +0.00% +0.05% +0.00% / +0.19% -0.23% -0.14%] index_select linear : Elapsed 0.021 ms (2.142 ms / 100) 2.148 -> 2.145 ( -0.14%) [ +0.19% +0.00% +0.14% / +0.14% +0.00% -0.14%] index_select reverse : Elapsed 0.022 ms (2.152 ms / 100) 2.149 -> 2.147 ( -0.09%) [ +0.23% +0.00% +0.00% / +0.14% -0.09% -0.05%] index_select skip64 : Elapsed 0.022 ms (2.154 ms / 100) 2.142 -> 2.136 ( -0.28%) [ +0.19% +0.00% +0.28% / +0.09% -0.28% -0.14%] index_select skip256 : Elapsed 0.021 ms (2.146 ms / 100) 2.199 -> 2.194 ( -0.23%) [ +0.18% +0.00% +0.05% / +0.00% -0.23% -0.09%] index_select spread : Elapsed 0.022 ms (2.203 ms / 100) 2.185 -> 2.183 ( -0.09%) [ +0.05% +0.18% +0.00% / +0.18% -0.09% -0.05%] index_select strided 3 : Elapsed 0.022 ms (2.186 ms / 100) 2.205 -> 2.205 ( +0.00%) [ +0.23% +0.00% +0.00% / +0.00% +0.00% +0.09%] index_select strided 5 : Elapsed 0.022 ms (2.210 ms / 100) 2.189 -> 2.186 ( -0.14%) [ +0.18% +0.14% +0.00% / +0.23% -0.14% -0.05%] index_select strided 7 : Elapsed 0.022 ms (2.193 ms / 100) 2.185 -> 2.185 ( +0.00%) [ +0.18% +0.46% +0.00% / +0.50% +0.00% +0.23%] index_select strided 8 : Elapsed 0.022 ms (2.189 ms / 100) 2.196 -> 2.196 ( +0.00%) [ +0.00% +0.09% +0.27% / +0.18% +0.00% +0.00%] index_select strided 16 : Elapsed 0.022 ms (2.196 ms / 100) 2.173 -> 2.173 ( +0.00%) [ +0.37% +0.00% +0.00% / +0.18% +0.00% +0.32%] index_select random : Elapsed 0.022 ms (2.181 ms / 100) 2.182 -> 2.184 ( +0.09%) [ +0.18% +0.23% +0.00% / +0.09% +0.32% +0.18%] index_select random_sorted : Elapsed 0.022 ms (2.186 ms / 100) 2.184 -> 2.186 ( +0.09%) [ +0.00% +0.09% +0.23% / +0.27% +0.09% +0.14%] index_select perm : Elapsed 0.022 ms (2.184 ms / 100) 2.177 -> 2.172 ( -0.23%) [ +0.00% +0.09% +0.14% / +0.05% -0.23% -0.18%] index_select perm_sorted : Elapsed 0.022 ms (2.177 ms / 100) B = [40, 5, 4, 16] (stride (16, 640, 3200, 1)) A = [40, 5, 20, 16] (stride (320, 12800, 16, 1)) dim = 2 2.015 -> 2.015 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.25% +0.10% +0.00%] index_select const : Elapsed 0.020 ms (2.016 ms / 100) 2.048 -> 2.047 ( -0.05%) [ +0.05% +0.15% +0.00% / -0.05% +0.29% +0.20%] index_select wrap : Elapsed 0.020 ms (2.049 ms / 100) 2.045 -> 2.045 ( +0.00%) [ +0.00% +0.34% +0.29% / +0.00% +0.44% +0.54%] index_select linear : Elapsed 0.020 ms (2.045 ms / 100) 2.051 -> 2.062 ( +0.54%) [ +0.00% +0.05% +0.10% / +0.54% +0.63% +0.63%] index_select reverse : Elapsed 0.021 ms (2.051 ms / 100) 2.007 -> 2.006 ( -0.05%) [ +0.15% +0.10% +0.00% / +0.10% -0.05% +0.10%] index_select skip64 : Elapsed 0.020 ms (2.010 ms / 100) 2.005 -> 2.008 ( +0.15%) [ +0.25% +0.15% +0.00% / +0.40% +0.15% +0.20%] index_select skip256 : Elapsed 0.020 ms (2.010 ms / 100) 2.049 -> 2.054 ( +0.24%) [ +0.00% +0.15% +0.05% / +0.29% +0.24% +0.39%] index_select spread : Elapsed 0.020 ms (2.049 ms / 100) 2.042 -> 2.051 ( +0.44%) [ +0.29% +0.00% +0.44% / +0.44% +0.59% +0.59%] index_select strided 3 : Elapsed 0.020 ms (2.048 ms / 100) 2.046 -> 2.050 ( +0.20%) [ +0.10% +0.15% +0.00% / +0.20% +0.34% +0.59%] index_select strided 5 : Elapsed 0.020 ms (2.048 ms / 100) 2.047 -> 2.050 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.15% +0.54%] index_select strided 7 : Elapsed 0.021 ms (2.050 ms / 100) 2.046 -> 2.050 ( +0.20%) [ +0.44% +0.00% +0.10% / +0.20% +0.44% +0.44%] index_select strided 8 : Elapsed 0.021 ms (2.055 ms / 100) 2.046 -> 2.049 ( +0.15%) [ +0.20% +0.00% +0.10% / +0.15% +0.34% +0.39%] index_select strided 16 : Elapsed 0.021 ms (2.050 ms / 100) 2.050 -> 2.052 ( +0.10%) [ +0.15% +0.00% +0.15% / +0.34% +0.29% +0.10%] index_select random : Elapsed 0.021 ms (2.053 ms / 100) 2.049 -> 2.051 ( +0.10%) [ +0.15% +0.15% +0.00% / +0.10% +0.24% +0.20%] index_select random_sorted : Elapsed 0.021 ms (2.052 ms / 100) 2.048 -> 2.050 ( +0.10%) [ +0.15% +0.00% +0.15% / +0.10% +0.49% +0.34%] index_select perm : Elapsed 0.021 ms (2.051 ms / 100) 2.045 -> 2.052 ( +0.34%) [ +0.00% +0.29% +0.39% / +0.49% +0.49% +0.34%] index_select perm_sorted : Elapsed 0.020 ms (2.045 ms / 100) B = [40, 5, 4, 16] (stride (4, 160, 1, 800)) A = [40, 5, 20, 16] (stride (1600, 16, 80, 1)) dim = 2 2.009 -> 2.008 ( -0.05%) [ +0.00% +0.10% +0.00% / -0.05% +0.35% +0.50%] index_select const : Elapsed 0.020 ms (2.009 ms / 100) 2.057 -> 2.056 ( -0.05%) [ +0.19% +0.00% +0.00% / +0.05% -0.05% +0.10%] index_select wrap : Elapsed 0.021 ms (2.061 ms / 100) 2.053 -> 2.057 ( +0.19%) [ +0.34% +0.15% +0.00% / +0.29% +0.34% +0.19%] index_select linear : Elapsed 0.021 ms (2.060 ms / 100) 2.053 -> 2.060 ( +0.34%) [ +0.00% +0.39% +0.34% / +0.34% +0.39% +0.49%] index_select reverse : Elapsed 0.021 ms (2.053 ms / 100) 2.006 -> 2.012 ( +0.30%) [ +0.25% +0.00% +0.05% / +0.30% +0.70% +0.60%] index_select skip64 : Elapsed 0.020 ms (2.011 ms / 100) 2.011 -> 2.014 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +0.50% +0.60%] index_select skip256 : Elapsed 0.020 ms (2.017 ms / 100) 2.051 -> 2.060 ( +0.44%) [ +0.24% +0.00% +0.00% / +0.44% +0.83% +0.54%] index_select spread : Elapsed 0.021 ms (2.056 ms / 100) 2.059 -> 2.054 ( -0.24%) [ +0.29% +0.00% +0.05% / -0.24% +0.00% +0.39%] index_select strided 3 : Elapsed 0.021 ms (2.065 ms / 100) 2.064 -> 2.061 ( -0.15%) [ +0.00% +0.44% +0.10% / -0.15% +0.19% +0.15%] index_select strided 5 : Elapsed 0.021 ms (2.064 ms / 100) 2.057 -> 2.062 ( +0.24%) [ +0.05% +0.00% +0.05% / +0.29% +0.34% +0.24%] index_select strided 7 : Elapsed 0.021 ms (2.058 ms / 100) 2.060 -> 2.059 ( -0.05%) [ +0.29% +0.00% +0.19% / -0.05% +0.53% +0.34%] index_select strided 8 : Elapsed 0.021 ms (2.066 ms / 100) 2.055 -> 2.056 ( +0.05%) [ +0.34% +0.44% +0.00% / +0.24% +0.49% +0.05%] index_select strided 16 : Elapsed 0.021 ms (2.062 ms / 100) 2.056 -> 2.058 ( +0.10%) [ +0.15% +0.10% +0.00% / +0.10% +0.10% +0.34%] index_select random : Elapsed 0.021 ms (2.059 ms / 100) 2.053 -> 2.055 ( +0.10%) [ +0.24% +0.00% +0.29% / +0.10% +0.39% +0.39%] index_select random_sorted : Elapsed 0.021 ms (2.058 ms / 100) 2.055 -> 2.059 ( +0.19%) [ +0.00% +0.00% +0.05% / +0.19% +0.29% +0.49%] index_select perm : Elapsed 0.021 ms (2.055 ms / 100) 2.058 -> 2.059 ( +0.05%) [ +0.15% +0.05% +0.00% / +0.05% +0.15% +0.19%] index_select perm_sorted : Elapsed 0.021 ms (2.061 ms / 100) B = [40, 5, 4, 16] (stride (5, 1, 200, 800)) A = [40, 5, 20, 16] (stride (1600, 1, 5, 100)) dim = 2 2.041 -> 2.038 ( -0.15%) [ +0.10% +0.00% +0.10% / +0.00% +0.00% -0.15%] index_select const : Elapsed 0.020 ms (2.043 ms / 100) 2.056 -> 2.063 ( +0.34%) [ +0.00% +0.34% +0.15% / +0.34% +0.44% +0.34%] index_select wrap : Elapsed 0.021 ms (2.056 ms / 100) 2.054 -> 2.059 ( +0.24%) [ +0.44% +0.24% +0.00% / +0.24% +0.44% +0.49%] index_select linear : Elapsed 0.021 ms (2.063 ms / 100) 2.052 -> 2.056 ( +0.19%) [ +0.73% +0.34% +0.00% / +0.19% +0.54% +0.78%] index_select reverse : Elapsed 0.021 ms (2.067 ms / 100) 2.046 -> 2.045 ( -0.05%) [ +0.10% +0.00% +0.34% / -0.05% -0.05% +0.10%] index_select skip64 : Elapsed 0.020 ms (2.048 ms / 100) 2.043 -> 2.038 ( -0.24%) [ +0.00% +0.34% +0.44% / +0.34% -0.24% +0.24%] index_select skip256 : Elapsed 0.020 ms (2.043 ms / 100) 2.073 -> 2.084 ( +0.53%) [ +0.63% +0.63% +0.00% / +0.53% +0.53% +0.72%] index_select spread : Elapsed 0.021 ms (2.086 ms / 100) 2.072 -> 2.076 ( +0.19%) [ +0.53% +0.00% +0.29% / +0.58% +0.19% +0.24%] index_select strided 3 : Elapsed 0.021 ms (2.083 ms / 100) 2.070 -> 2.082 ( +0.58%) [ +0.39% +0.29% +0.00% / +1.06% +0.58% +0.72%] index_select strided 5 : Elapsed 0.021 ms (2.078 ms / 100) 2.072 -> 2.070 ( -0.10%) [ +0.00% +0.19% +0.00% / +0.29% -0.05% -0.10%] index_select strided 7 : Elapsed 0.021 ms (2.072 ms / 100) 2.088 -> 2.086 ( -0.10%) [ +0.14% +0.10% +0.00% / +0.24% -0.10% -0.05%] index_select strided 8 : Elapsed 0.021 ms (2.091 ms / 100) 2.083 -> 2.078 ( -0.24%) [ +0.38% +0.48% +0.00% / -0.19% +0.38% -0.24%] index_select strided 16 : Elapsed 0.021 ms (2.091 ms / 100) 2.065 -> 2.062 ( -0.15%) [ +0.00% +0.15% +0.19% / +0.00% +0.63% -0.15%] index_select random : Elapsed 0.021 ms (2.065 ms / 100) 2.064 -> 2.064 ( +0.00%) [ +0.00% +0.15% +0.10% / +0.24% +0.05% +0.00%] index_select random_sorted : Elapsed 0.021 ms (2.064 ms / 100) 2.065 -> 2.067 ( +0.10%) [ +0.00% +0.39% +0.00% / +0.34% +0.10% +0.39%] index_select perm : Elapsed 0.021 ms (2.065 ms / 100) 2.062 -> 2.072 ( +0.48%) [ +0.19% +0.19% +0.00% / +0.53% +0.87% +0.48%] index_select perm_sorted : Elapsed 0.021 ms (2.066 ms / 100) B = [40, 5, 4, 16] (stride (5, 1, 200, 800)) A = [40, 5, 20, 16] (stride (1, 12800, 640, 40)) dim = 2 2.072 -> 2.076 ( +0.19%) [ +0.14% +0.00% +0.05% / +0.19% +0.24% +0.19%] index_select const : Elapsed 0.021 ms (2.075 ms / 100) 2.079 -> 2.078 ( -0.05%) [ +0.24% +0.05% +0.00% / -0.05% +0.10% +0.19%] index_select wrap : Elapsed 0.021 ms (2.084 ms / 100) 2.081 -> 2.082 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.14% +0.05% +0.14%] index_select linear : Elapsed 0.021 ms (2.082 ms / 100) 2.078 -> 2.080 ( +0.10%) [ +0.05% +0.14% +0.00% / +0.29% +0.10% +0.14%] index_select reverse : Elapsed 0.021 ms (2.079 ms / 100) 2.073 -> 2.073 ( +0.00%) [ +0.00% +0.19% +0.14% / +0.19% +0.00% +0.05%] index_select skip64 : Elapsed 0.021 ms (2.073 ms / 100) 2.075 -> 2.075 ( +0.00%) [ +0.34% +0.34% +0.00% / +0.00% +0.19% +0.10%] index_select skip256 : Elapsed 0.021 ms (2.082 ms / 100) 2.079 -> 2.080 ( +0.05%) [ +0.19% +0.00% +0.43% / +0.24% +0.05% +0.14%] index_select spread : Elapsed 0.021 ms (2.083 ms / 100) 2.078 -> 2.075 ( -0.14%) [ +0.14% +0.05% +0.00% / -0.10% -0.14% +0.10%] index_select strided 3 : Elapsed 0.021 ms (2.081 ms / 100) 2.078 -> 2.078 ( +0.00%) [ +0.10% +0.19% +0.00% / +0.00% +0.43% +0.29%] index_select strided 5 : Elapsed 0.021 ms (2.080 ms / 100) 2.078 -> 2.077 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.19% +0.14%] index_select strided 7 : Elapsed 0.021 ms (2.078 ms / 100) 2.082 -> 2.082 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.05% +0.00% +0.19%] index_select strided 8 : Elapsed 0.021 ms (2.084 ms / 100) 2.078 -> 2.076 ( -0.10%) [ +0.00% +0.19% +0.05% / -0.10% +0.10% +0.05%] index_select strided 16 : Elapsed 0.021 ms (2.078 ms / 100) 2.077 -> 2.071 ( -0.29%) [ +0.00% +0.19% +0.05% / -0.29% +0.00% +0.14%] index_select random : Elapsed 0.021 ms (2.077 ms / 100) 2.072 -> 2.071 ( -0.05%) [ +0.00% +0.05% +0.05% / +0.19% +0.24% -0.05%] index_select random_sorted : Elapsed 0.021 ms (2.072 ms / 100) 2.076 -> 2.077 ( +0.05%) [ +0.00% +0.10% +0.24% / +0.05% +0.48% +0.48%] index_select perm : Elapsed 0.021 ms (2.076 ms / 100) 2.083 -> 2.083 ( +0.00%) [ +0.00% +0.05% +0.10% / +0.05% +0.00% +0.19%] index_select perm_sorted : Elapsed 0.021 ms (2.083 ms / 100) out_shape = [40, 5, 20, 4] in_shape = [40, 5, 20, 16] idx_dim = 3 B = [40, 5, 20, 4] (stride (400, 4, 20, 1)) A = [40, 5, 20, 16] (stride (100, 20, 1, 4000)) dim = 3 2.184 -> 2.183 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.18% +0.14%] index_select const : Elapsed 0.022 ms (2.184 ms / 100) 2.238 -> 2.240 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.27% +0.22%] index_select wrap : Elapsed 0.022 ms (2.240 ms / 100) 2.234 -> 2.238 ( +0.18%) [ +0.09% +0.13% +0.00% / +0.18% +0.49% +0.49%] index_select linear : Elapsed 0.022 ms (2.236 ms / 100) 2.236 -> 2.238 ( +0.09%) [ +0.04% +0.13% +0.00% / +0.09% +0.45% +0.45%] index_select reverse : Elapsed 0.022 ms (2.237 ms / 100) 2.180 -> 2.187 ( +0.32%) [ +0.05% +0.00% +0.09% / +0.32% +0.32% +0.32%] index_select skip64 : Elapsed 0.022 ms (2.181 ms / 100) 2.179 -> 2.181 ( +0.09%) [ +0.18% +0.18% +0.00% / +0.09% +0.23% +0.37%] index_select skip256 : Elapsed 0.022 ms (2.183 ms / 100) 2.237 -> 2.240 ( +0.13%) [ +0.09% +0.09% +0.00% / +0.13% +0.13% +0.18%] index_select spread : Elapsed 0.022 ms (2.239 ms / 100) 2.237 -> 2.237 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.18% +0.27%] index_select strided 3 : Elapsed 0.022 ms (2.237 ms / 100) 2.239 -> 2.240 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.27% +0.36%] index_select strided 5 : Elapsed 0.022 ms (2.239 ms / 100) 2.236 -> 2.236 ( +0.00%) [ +0.04% +0.22% +0.00% / +0.00% +0.27% +0.36%] index_select strided 7 : Elapsed 0.022 ms (2.237 ms / 100) 2.196 -> 2.198 ( +0.09%) [ +0.23% +0.00% +0.27% / +0.09% +0.36% +0.27%] index_select strided 8 : Elapsed 0.022 ms (2.201 ms / 100) 2.237 -> 2.238 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.22% +0.36%] index_select random : Elapsed 0.022 ms (2.237 ms / 100) 2.236 -> 2.236 ( +0.00%) [ +0.04% +0.13% +0.00% / +0.00% +0.49% +0.18%] index_select random_sorted : Elapsed 0.022 ms (2.237 ms / 100) 2.239 -> 2.242 ( +0.13%) [ +0.00% +0.18% +0.00% / +0.13% +0.18% +0.27%] index_select perm : Elapsed 0.022 ms (2.239 ms / 100) 2.237 -> 2.241 ( +0.18%) [ +0.04% +0.13% +0.00% / +0.18% +0.18% +0.18%] index_select perm_sorted : Elapsed 0.022 ms (2.238 ms / 100) B = [40, 5, 20, 4] (stride (400, 1, 20, 5)) A = [40, 5, 20, 16] (stride (1600, 1, 80, 5)) dim = 3 2.526 -> 2.527 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.04% +0.24% +0.20%] index_select const : Elapsed 0.025 ms (2.529 ms / 100) 2.545 -> 2.550 ( +0.20%) [ +0.00% +0.00% +0.20% / +0.24% +0.20% +0.24%] index_select wrap : Elapsed 0.025 ms (2.545 ms / 100) 2.543 -> 2.543 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.24% +0.35%] index_select linear : Elapsed 0.025 ms (2.543 ms / 100) 2.553 -> 2.549 ( -0.16%) [ +0.04% +0.04% +0.00% / -0.16% +0.16% +0.16%] index_select reverse : Elapsed 0.026 ms (2.554 ms / 100) 2.528 -> 2.527 ( -0.04%) [ +0.00% +0.08% +0.16% / -0.04% +0.28% +0.32%] index_select skip64 : Elapsed 0.025 ms (2.528 ms / 100) 2.526 -> 2.532 ( +0.24%) [ +0.00% +0.08% +0.20% / +0.24% +0.48% +0.28%] index_select skip256 : Elapsed 0.025 ms (2.526 ms / 100) 2.561 -> 2.563 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.55% +0.23%] index_select spread : Elapsed 0.026 ms (2.563 ms / 100) 2.572 -> 2.572 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.35% +0.47%] index_select strided 3 : Elapsed 0.026 ms (2.575 ms / 100) 2.540 -> 2.543 ( +0.12%) [ +0.00% +0.16% +0.28% / +0.12% +0.39% +0.47%] index_select strided 5 : Elapsed 0.025 ms (2.540 ms / 100) 2.547 -> 2.549 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.27% +0.39%] index_select strided 7 : Elapsed 0.025 ms (2.548 ms / 100) 2.522 -> 2.525 ( +0.12%) [ +0.00% +0.08% +0.16% / +0.12% +0.59% +0.52%] index_select strided 8 : Elapsed 0.025 ms (2.522 ms / 100) 2.553 -> 2.556 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.12% +0.31% +0.39%] index_select random : Elapsed 0.026 ms (2.555 ms / 100) 2.558 -> 2.562 ( +0.16%) [ +0.16% +0.23% +0.00% / +0.16% +0.23% +0.39%] index_select random_sorted : Elapsed 0.026 ms (2.562 ms / 100) 2.554 -> 2.555 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.39% +0.35%] index_select perm : Elapsed 0.026 ms (2.555 ms / 100) 2.556 -> 2.555 ( -0.04%) [ +0.08% +0.00% +0.00% / -0.04% +0.27% +0.31%] index_select perm_sorted : Elapsed 0.026 ms (2.558 ms / 100) B = [40, 5, 20, 4] (stride (1, 3200, 160, 40)) A = [40, 5, 20, 16] (stride (20, 12800, 1, 800)) dim = 3 2.289 -> 2.291 ( +0.09%) [ +0.04% +0.13% +0.00% / +0.09% +0.26% +0.17%] index_select const : Elapsed 0.023 ms (2.290 ms / 100) 2.341 -> 2.347 ( +0.26%) [ +0.09% +0.00% +0.13% / +0.26% +0.73% +0.56%] index_select wrap : Elapsed 0.023 ms (2.343 ms / 100) 2.340 -> 2.337 ( -0.13%) [ +0.09% +0.00% +0.09% / -0.13% +0.38% +0.47%] index_select linear : Elapsed 0.023 ms (2.342 ms / 100) 2.338 -> 2.340 ( +0.09%) [ +0.17% +0.09% +0.00% / +0.09% +0.47% +0.47%] index_select reverse : Elapsed 0.023 ms (2.342 ms / 100) 2.282 -> 2.282 ( +0.00%) [ +0.31% +0.00% +0.00% / +0.00% +0.66% +0.92%] index_select skip64 : Elapsed 0.023 ms (2.289 ms / 100) 2.285 -> 2.294 ( +0.39%) [ +0.39% +0.00% +0.04% / +0.39% +0.61% +0.44%] index_select skip256 : Elapsed 0.023 ms (2.294 ms / 100) 2.351 -> 2.352 ( +0.04%) [ +0.21% +0.09% +0.00% / +0.04% +0.13% +0.26%] index_select spread : Elapsed 0.024 ms (2.356 ms / 100) 2.346 -> 2.357 ( +0.47%) [ +0.04% +0.04% +0.00% / +0.47% +0.47% +0.55%] index_select strided 3 : Elapsed 0.023 ms (2.347 ms / 100) 2.345 -> 2.350 ( +0.21%) [ +0.09% +0.00% +0.13% / +0.21% +0.26% +0.38%] index_select strided 5 : Elapsed 0.023 ms (2.347 ms / 100) 2.346 -> 2.346 ( +0.00%) [ +0.00% +0.00% +0.21% / +0.00% +0.26% +0.21%] index_select strided 7 : Elapsed 0.023 ms (2.346 ms / 100) 2.299 -> 2.302 ( +0.13%) [ +0.00% +0.09% +0.00% / +0.13% +0.52% +0.52%] index_select strided 8 : Elapsed 0.023 ms (2.299 ms / 100) 2.320 -> 2.325 ( +0.22%) [ +0.00% +0.09% +0.22% / +0.22% +0.26% +0.34%] index_select random : Elapsed 0.023 ms (2.320 ms / 100) 2.328 -> 2.332 ( +0.17%) [ +0.00% +0.21% +0.21% / +0.17% +0.17% +0.30%] index_select random_sorted : Elapsed 0.023 ms (2.328 ms / 100) 2.340 -> 2.347 ( +0.30%) [ +0.47% +0.00% +0.26% / +0.30% +0.56% +0.56%] index_select perm : Elapsed 0.024 ms (2.351 ms / 100) 2.345 -> 2.349 ( +0.17%) [ +0.00% +0.09% +0.09% / +0.17% +0.21% +0.26%] index_select perm_sorted : Elapsed 0.023 ms (2.345 ms / 100) B = [40, 5, 20, 4] (stride (4, 160, 800, 1)) A = [40, 5, 20, 16] (stride (1600, 1, 5, 100)) dim = 3 2.409 -> 2.412 ( +0.12%) [ +0.17% +0.17% +0.00% / +0.12% +0.29% +0.17%] index_select const : Elapsed 0.024 ms (2.413 ms / 100) 2.423 -> 2.430 ( +0.29%) [ +0.25% +0.25% +0.00% / +0.29% +0.54% +0.33%] index_select wrap : Elapsed 0.024 ms (2.429 ms / 100) 2.426 -> 2.428 ( +0.08%) [ +0.00% +0.33% +0.00% / +0.08% +0.21% +0.29%] index_select linear : Elapsed 0.024 ms (2.426 ms / 100) 2.429 -> 2.428 ( -0.04%) [ +0.04% +0.00% +0.04% / +0.08% +0.12% -0.04%] index_select reverse : Elapsed 0.024 ms (2.430 ms / 100) 2.409 -> 2.417 ( +0.33%) [ +0.00% +0.21% +0.37% / +0.33% +0.46% +0.46%] index_select skip64 : Elapsed 0.024 ms (2.409 ms / 100) 2.412 -> 2.411 ( -0.04%) [ +0.12% +0.00% +0.04% / -0.04% +0.04% +0.37%] index_select skip256 : Elapsed 0.024 ms (2.415 ms / 100) 2.419 -> 2.418 ( -0.04%) [ +0.17% +0.04% +0.00% / -0.04% +0.29% +0.25%] index_select spread : Elapsed 0.024 ms (2.423 ms / 100) 2.427 -> 2.428 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.29% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.427 ms / 100) 2.424 -> 2.429 ( +0.21%) [ +0.25% +0.25% +0.00% / +0.21% +0.45% +0.45%] index_select strided 5 : Elapsed 0.024 ms (2.430 ms / 100) 2.427 -> 2.431 ( +0.16%) [ +0.12% +0.00% +0.21% / +0.16% +0.29% +0.29%] index_select strided 7 : Elapsed 0.024 ms (2.430 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.37% +0.41%] index_select strided 8 : Elapsed 0.024 ms (2.414 ms / 100) 2.427 -> 2.429 ( +0.08%) [ +0.08% +0.25% +0.00% / +0.08% +0.29% +0.21%] index_select random : Elapsed 0.024 ms (2.429 ms / 100) 2.428 -> 2.429 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.04% +0.37% +0.29%] index_select random_sorted : Elapsed 0.024 ms (2.431 ms / 100) 2.431 -> 2.426 ( -0.21%) [ +0.04% +0.00% +0.00% / -0.21% +0.21% +0.21%] index_select perm : Elapsed 0.024 ms (2.432 ms / 100) 2.424 -> 2.432 ( +0.33%) [ +0.21% +0.04% +0.00% / +0.33% +0.37% +0.37%] index_select perm_sorted : Elapsed 0.024 ms (2.429 ms / 100) out_shape = [4, 16, 5, 20] in_shape = [40, 16, 5, 20] idx_dim = 0 B = [4, 16, 5, 20] (stride (1600, 5, 1, 80)) dim = 0 fill_cnt = 40 1.248 -> 1.238 ( -0.80%) [ +0.24% +0.00% +0.40% / -0.80% -0.32% -0.32%] index_fill_ const : Elapsed 0.013 ms (1.251 ms / 100) 1.581 -> 1.567 ( -0.89%) [ +0.13% +0.19% +0.00% / -0.51% -0.89% -0.70%] index_fill_ linear : Elapsed 0.016 ms (1.583 ms / 100) 1.589 -> 1.579 ( -0.63%) [ +0.00% +0.06% +0.00% / -0.57% -0.63% -0.57%] index_fill_ reverse : Elapsed 0.016 ms (1.589 ms / 100) 1.232 -> 1.220 ( -0.97%) [ +0.00% +0.24% +0.08% / -0.97% -0.97% -0.65%] index_fill_ skip64 : Elapsed 0.012 ms (1.232 ms / 100) 1.230 -> 1.218 ( -0.98%) [ +0.16% +0.00% +0.57% / -0.98% -0.65% -0.33%] index_fill_ skip256 : Elapsed 0.012 ms (1.232 ms / 100) 1.226 -> 1.220 ( -0.49%) [ +0.00% +0.16% +0.65% / -0.49% +0.00% -0.08%] index_fill_ spread : Elapsed 0.012 ms (1.226 ms / 100) 1.208 -> 1.194 ( -1.16%) [ +0.58% +0.00% +0.41% / -1.16% -0.33% -0.25%] index_fill_ strided 3 : Elapsed 0.012 ms (1.215 ms / 100) 1.227 -> 1.216 ( -0.90%) [ +0.00% +0.24% +0.08% / -0.90% -0.73% -0.81%] index_fill_ random : Elapsed 0.012 ms (1.227 ms / 100) 1.226 -> 1.216 ( -0.82%) [ +0.08% +0.08% +0.00% / -0.82% -0.08% -0.08%] index_fill_ random_sorted : Elapsed 0.012 ms (1.227 ms / 100) B = [4, 16, 5, 20] (stride (1600, 5, 1, 80)) A = [40, 16, 5, 20] (stride (20, 800, 12800, 1)) dim = 0 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.59% +0.59%] index_select const : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +1.51% +0.76%] index_select wrap : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.67% +0.67%] index_select linear : Elapsed 0.012 ms (1.191 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.84% +0.84%] index_select reverse : Elapsed 0.012 ms (1.191 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.84%] index_select skip64 : Elapsed 0.012 ms (1.190 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.17% +0.00% +0.42% / +0.00% +0.76% +0.76%] index_select skip256 : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.17% +0.00% +0.76% / +0.17% +0.76% +0.59%] index_select spread : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.08% +0.00% +0.67% / +0.17% +0.67% +0.67%] index_select strided 3 : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.34% +0.00% +0.67% / +0.08% +0.67% +0.59%] index_select strided 5 : Elapsed 0.012 ms (1.196 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.59% / +0.00% +0.67% +0.50%] index_select strided 7 : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.59% / +0.08% +0.59% +0.59%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.59% / +0.00% +0.34% +0.34%] index_select strided 16 : Elapsed 0.012 ms (1.194 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.08% +0.00% +0.59% / +0.00% +0.42% +0.42%] index_select random : Elapsed 0.012 ms (1.194 ms / 100) 1.194 -> 1.193 ( -0.08%) [ +0.00% +0.00% +0.50% / -0.08% +0.42% +0.42%] index_select random_sorted : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.59% +0.59%] index_select perm : Elapsed 0.012 ms (1.194 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.34% +0.25%] index_select perm_sorted : Elapsed 0.012 ms (1.194 ms / 100) B = [4, 16, 5, 20] (stride (1, 4, 64, 320)) A = [40, 16, 5, 20] (stride (320, 1, 12800, 16)) dim = 0 1.307 -> 1.307 ( +0.00%) [ +0.46% +0.46% +0.00% / +0.00% +0.69% +1.30%] index_select const : Elapsed 0.013 ms (1.313 ms / 100) 1.293 -> 1.295 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.23% +0.62%] index_select wrap : Elapsed 0.013 ms (1.293 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.54% +0.54%] index_select linear : Elapsed 0.013 ms (1.288 ms / 100) 1.281 -> 1.284 ( +0.23%) [ +0.23% +0.16% +0.00% / +0.23% +0.62% +0.78%] index_select reverse : Elapsed 0.013 ms (1.284 ms / 100) 1.318 -> 1.313 ( -0.38%) [ +0.00% +0.08% +0.08% / -0.38% +0.15% +0.30%] index_select skip64 : Elapsed 0.013 ms (1.318 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.78% +0.78%] index_select skip256 : Elapsed 0.013 ms (1.287 ms / 100) 1.281 -> 1.284 ( +0.23%) [ +0.23% +0.39% +0.00% / +0.23% +0.70% +0.70%] index_select spread : Elapsed 0.013 ms (1.284 ms / 100) 1.312 -> 1.312 ( +0.00%) [ +0.00% +0.46% +0.00% / +0.00% +0.00% +0.46%] index_select strided 3 : Elapsed 0.013 ms (1.312 ms / 100) 1.284 -> 1.283 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.31% +0.39%] index_select strided 5 : Elapsed 0.013 ms (1.284 ms / 100) 1.283 -> 1.285 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.31% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.284 ms / 100) 1.283 -> 1.285 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.47% +0.39%] index_select strided 8 : Elapsed 0.013 ms (1.284 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.39% +0.39%] index_select strided 16 : Elapsed 0.013 ms (1.285 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.31%] index_select random : Elapsed 0.013 ms (1.285 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.47% +0.39%] index_select random_sorted : Elapsed 0.013 ms (1.285 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.23% +0.23%] index_select perm : Elapsed 0.013 ms (1.318 ms / 100) 1.293 -> 1.292 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.62% +0.46%] index_select perm_sorted : Elapsed 0.013 ms (1.293 ms / 100) out_shape = [40, 4, 5, 20] in_shape = [40, 16, 5, 20] idx_dim = 1 B = [40, 4, 5, 20] (stride (400, 1, 4, 20)) A = [40, 16, 5, 20] (stride (1, 4000, 800, 40)) dim = 1 2.439 -> 2.441 ( +0.08%) [ +0.00% +0.04% +0.12% / +0.08% +0.33% +0.25%] index_select const : Elapsed 0.024 ms (2.439 ms / 100) 2.441 -> 2.444 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.49% +0.20%] index_select wrap : Elapsed 0.024 ms (2.444 ms / 100) 2.440 -> 2.440 ( +0.00%) [ +0.33% +0.04% +0.00% / +0.00% +0.61% +0.53%] index_select linear : Elapsed 0.024 ms (2.448 ms / 100) 2.441 -> 2.440 ( -0.04%) [ +0.20% +0.00% +0.16% / -0.04% +0.37% +0.41%] index_select reverse : Elapsed 0.024 ms (2.446 ms / 100) 2.436 -> 2.441 ( +0.21%) [ +0.00% +0.12% +0.08% / +0.21% +0.33% +0.29%] index_select skip64 : Elapsed 0.024 ms (2.436 ms / 100) 2.438 -> 2.438 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.21% +0.62%] index_select skip256 : Elapsed 0.024 ms (2.439 ms / 100) 2.439 -> 2.445 ( +0.25%) [ +0.16% +0.00% +0.21% / +0.25% +0.29% +0.29%] index_select spread : Elapsed 0.024 ms (2.443 ms / 100) 2.439 -> 2.443 ( +0.16%) [ +0.25% +0.00% +0.04% / +0.16% +0.29% +0.49%] index_select strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.442 -> 2.440 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.41% +0.33%] index_select strided 5 : Elapsed 0.024 ms (2.442 ms / 100) 2.439 -> 2.439 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.25% +0.45%] index_select strided 7 : Elapsed 0.024 ms (2.439 ms / 100) 2.441 -> 2.437 ( -0.16%) [ +0.00% +0.08% +0.00% / -0.16% +0.29% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.441 ms / 100) 2.437 -> 2.440 ( +0.12%) [ +0.00% +0.25% +0.12% / +0.12% +0.37% +0.37%] index_select random : Elapsed 0.024 ms (2.437 ms / 100) 2.437 -> 2.439 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.41% +0.37%] index_select random_sorted : Elapsed 0.024 ms (2.440 ms / 100) 2.444 -> 2.443 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.33% +0.20%] index_select perm : Elapsed 0.024 ms (2.445 ms / 100) 2.441 -> 2.446 ( +0.20%) [ +0.20% +0.00% +0.25% / +0.20% +0.49% +0.45%] index_select perm_sorted : Elapsed 0.024 ms (2.446 ms / 100) B = [40, 4, 5, 20] (stride (400, 1, 4, 20)) A = [40, 16, 5, 20] (stride (80, 1, 16, 3200)) dim = 1 2.596 -> 2.598 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.23% +0.08% +0.23%] index_select const : Elapsed 0.026 ms (2.597 ms / 100) 2.598 -> 2.596 ( -0.08%) [ +0.04% +0.08% +0.00% / -0.08% +0.23% +0.12%] index_select wrap : Elapsed 0.026 ms (2.599 ms / 100) 2.600 -> 2.601 ( +0.04%) [ +0.00% +0.08% +0.19% / +0.27% +0.04% +0.15%] index_select linear : Elapsed 0.026 ms (2.600 ms / 100) 2.598 -> 2.601 ( +0.12%) [ +0.00% +0.15% +0.35% / +0.27% +0.12% +0.15%] index_select reverse : Elapsed 0.026 ms (2.598 ms / 100) 2.600 -> 2.600 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.15% +0.00% +0.04%] index_select skip64 : Elapsed 0.026 ms (2.601 ms / 100) 2.599 -> 2.602 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +0.12% +0.31%] index_select skip256 : Elapsed 0.026 ms (2.599 ms / 100) 2.620 -> 2.624 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.15% +0.42%] index_select spread : Elapsed 0.026 ms (2.624 ms / 100) 2.620 -> 2.622 ( +0.08%) [ +0.23% +0.19% +0.00% / +0.42% +0.42% +0.08%] index_select strided 3 : Elapsed 0.026 ms (2.626 ms / 100) 2.620 -> 2.623 ( +0.11%) [ +0.11% +0.23% +0.00% / +0.11% +0.23% +0.11%] index_select strided 5 : Elapsed 0.026 ms (2.623 ms / 100) 2.615 -> 2.621 ( +0.23%) [ +0.23% +0.31% +0.00% / +0.27% +0.31% +0.23%] index_select strided 7 : Elapsed 0.026 ms (2.621 ms / 100) 2.632 -> 2.632 ( +0.00%) [ +0.11% +0.04% +0.00% / +0.00% +0.30% +0.34%] index_select strided 8 : Elapsed 0.026 ms (2.635 ms / 100) 2.621 -> 2.624 ( +0.11%) [ +0.04% +0.04% +0.00% / +0.15% +0.11% +0.31%] index_select random : Elapsed 0.026 ms (2.622 ms / 100) 2.618 -> 2.618 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.19% +0.19%] index_select random_sorted : Elapsed 0.026 ms (2.618 ms / 100) 2.630 -> 2.631 ( +0.04%) [ +0.30% +0.00% +0.19% / +0.04% +0.34% +0.27%] index_select perm : Elapsed 0.026 ms (2.638 ms / 100) 2.626 -> 2.624 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.27% +0.30%] index_select perm_sorted : Elapsed 0.026 ms (2.626 ms / 100) B = [40, 4, 5, 20] (stride (100, 4000, 20, 1)) A = [40, 16, 5, 20] (stride (320, 1, 12800, 16)) dim = 1 2.361 -> 2.360 ( -0.04%) [ +0.17% +0.08% +0.00% / -0.04% +0.51% +0.34%] index_select const : Elapsed 0.024 ms (2.365 ms / 100) 2.361 -> 2.362 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.59% +0.42%] index_select wrap : Elapsed 0.024 ms (2.361 ms / 100) 2.363 -> 2.365 ( +0.08%) [ +0.13% +0.13% +0.00% / +0.08% +0.30% +0.34%] index_select linear : Elapsed 0.024 ms (2.366 ms / 100) 2.363 -> 2.363 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.51%] index_select reverse : Elapsed 0.024 ms (2.363 ms / 100) 2.359 -> 2.360 ( +0.04%) [ +0.04% +0.21% +0.00% / +0.04% +0.55% +0.42%] index_select skip64 : Elapsed 0.024 ms (2.360 ms / 100) 2.356 -> 2.358 ( +0.08%) [ +0.21% +0.00% +0.00% / +0.08% +0.64% +0.68%] index_select skip256 : Elapsed 0.024 ms (2.361 ms / 100) 2.389 -> 2.395 ( +0.25%) [ +0.04% +0.08% +0.00% / +0.25% +0.59% +0.63%] index_select spread : Elapsed 0.024 ms (2.390 ms / 100) 2.386 -> 2.386 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.00% +0.59% +0.38%] index_select strided 3 : Elapsed 0.024 ms (2.389 ms / 100) 2.386 -> 2.388 ( +0.08%) [ +0.13% +0.00% +0.13% / +0.08% +0.71% +0.29%] index_select strided 5 : Elapsed 0.024 ms (2.389 ms / 100) 2.388 -> 2.389 ( +0.04%) [ +0.25% +0.00% +0.08% / +0.04% +0.84% +0.71%] index_select strided 7 : Elapsed 0.024 ms (2.394 ms / 100) 2.390 -> 2.395 ( +0.21%) [ +0.13% +0.00% +0.13% / +0.21% +0.46% +0.50%] index_select strided 8 : Elapsed 0.024 ms (2.393 ms / 100) 2.395 -> 2.400 ( +0.21%) [ +0.00% +0.17% +0.25% / +0.21% +0.38% +0.33%] index_select random : Elapsed 0.024 ms (2.395 ms / 100) 2.388 -> 2.388 ( +0.00%) [ +0.00% +0.04% +0.17% / +0.00% +0.38% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.388 ms / 100) 2.384 -> 2.386 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.29% +0.46%] index_select perm : Elapsed 0.024 ms (2.384 ms / 100) 2.401 -> 2.404 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.46% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.401 ms / 100) B = [40, 4, 5, 20] (stride (1, 4000, 40, 200)) A = [40, 16, 5, 20] (stride (1, 200, 40, 3200)) dim = 1 2.550 -> 2.554 ( +0.16%) [ +0.31% +0.08% +0.00% / +0.16% +0.35% +0.27%] index_select const : Elapsed 0.026 ms (2.558 ms / 100) 2.555 -> 2.556 ( +0.04%) [ +0.31% +0.08% +0.00% / +0.04% +0.39% +0.39%] index_select wrap : Elapsed 0.026 ms (2.563 ms / 100) 2.558 -> 2.556 ( -0.08%) [ +0.16% +0.00% +0.00% / +0.04% -0.08% +0.08%] index_select linear : Elapsed 0.026 ms (2.562 ms / 100) 2.559 -> 2.558 ( -0.04%) [ +0.00% +0.00% +0.08% / +0.08% -0.04% +0.16%] index_select reverse : Elapsed 0.026 ms (2.559 ms / 100) 2.550 -> 2.548 ( -0.08%) [ +0.00% +0.20% +0.12% / -0.08% +0.24% +0.24%] index_select skip64 : Elapsed 0.025 ms (2.550 ms / 100) 2.552 -> 2.554 ( +0.08%) [ +0.04% +0.16% +0.00% / +0.08% +0.35% +0.47%] index_select skip256 : Elapsed 0.026 ms (2.553 ms / 100) 2.552 -> 2.557 ( +0.20%) [ +0.24% +0.27% +0.00% / +0.20% +0.63% +0.63%] index_select spread : Elapsed 0.026 ms (2.558 ms / 100) 2.556 -> 2.556 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.23% +0.27%] index_select strided 3 : Elapsed 0.026 ms (2.558 ms / 100) 2.557 -> 2.561 ( +0.16%) [ +0.00% +0.08% +0.04% / +0.16% +0.47% +0.39%] index_select strided 5 : Elapsed 0.026 ms (2.557 ms / 100) 2.553 -> 2.555 ( +0.08%) [ +0.16% +0.04% +0.00% / +0.08% +0.39% +0.39%] index_select strided 7 : Elapsed 0.026 ms (2.557 ms / 100) 2.549 -> 2.547 ( -0.08%) [ +0.31% +0.00% +0.12% / -0.08% +0.35% +0.43%] index_select strided 8 : Elapsed 0.026 ms (2.557 ms / 100) 2.557 -> 2.561 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.43% +0.43%] index_select random : Elapsed 0.026 ms (2.561 ms / 100) 2.555 -> 2.555 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.20% +0.35%] index_select random_sorted : Elapsed 0.026 ms (2.557 ms / 100) 2.552 -> 2.557 ( +0.20%) [ +0.08% +0.20% +0.00% / +0.20% +0.43% +0.51%] index_select perm : Elapsed 0.026 ms (2.554 ms / 100) 2.548 -> 2.558 ( +0.39%) [ +0.16% +0.00% +0.16% / +0.39% +0.63% +0.75%] index_select perm_sorted : Elapsed 0.026 ms (2.552 ms / 100) B = [40, 4, 5, 20] (stride (20, 1, 4, 800)) A = [40, 16, 5, 20] (stride (1600, 20, 320, 1)) dim = 1 2.393 -> 2.395 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.42% +0.38%] index_select const : Elapsed 0.024 ms (2.397 ms / 100) 2.442 -> 2.443 ( +0.04%) [ +0.08% +0.00% +0.12% / +0.04% +0.49% +0.45%] index_select wrap : Elapsed 0.024 ms (2.444 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.00% +0.08% +0.24% / +0.00% +0.41% +0.37%] index_select linear : Elapsed 0.025 ms (2.460 ms / 100) 2.464 -> 2.463 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.16% +0.45%] index_select reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.388 -> 2.391 ( +0.13%) [ +0.04% +0.00% +0.17% / +0.13% +0.50% +0.63%] index_select skip64 : Elapsed 0.024 ms (2.389 ms / 100) 2.389 -> 2.393 ( +0.17%) [ +0.08% +0.00% +0.04% / +0.17% +0.59% +0.80%] index_select skip256 : Elapsed 0.024 ms (2.391 ms / 100) 2.449 -> 2.451 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.41% +0.53%] index_select spread : Elapsed 0.024 ms (2.449 ms / 100) 2.447 -> 2.453 ( +0.25%) [ +0.16% +0.00% +0.04% / +0.25% +0.57% +0.49%] index_select strided 3 : Elapsed 0.025 ms (2.451 ms / 100) 2.441 -> 2.444 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.12% +0.33% +0.37%] index_select strided 5 : Elapsed 0.024 ms (2.443 ms / 100) 2.458 -> 2.457 ( -0.04%) [ +0.00% +0.00% +0.08% / -0.04% +0.41% +0.41%] index_select strided 7 : Elapsed 0.025 ms (2.458 ms / 100) 2.414 -> 2.412 ( -0.08%) [ +0.00% +0.04% +0.04% / -0.08% +0.37% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.414 ms / 100) 2.437 -> 2.436 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% +0.21% +0.21%] index_select random : Elapsed 0.024 ms (2.437 ms / 100) 2.424 -> 2.430 ( +0.25%) [ +0.12% +0.29% +0.00% / +0.25% +0.29% +0.45%] index_select random_sorted : Elapsed 0.024 ms (2.427 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.41% +0.41%] index_select perm : Elapsed 0.024 ms (2.445 ms / 100) 2.459 -> 2.460 ( +0.04%) [ +0.00% +0.20% +0.08% / +0.04% +0.24% +0.49%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) B = [40, 4, 5, 20] (stride (1, 200, 40, 800)) dim = 1 fill_cnt = 16 2.742 -> 2.742 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.00% +0.18% +0.15%] index_fill_ const : Elapsed 0.027 ms (2.742 ms / 100) 2.778 -> 2.772 ( -0.22%) [ +0.11% +0.18% +0.00% / +0.04% -0.22% -0.07%] index_fill_ linear : Elapsed 0.028 ms (2.781 ms / 100) 2.760 -> 2.758 ( -0.07%) [ +0.07% +0.00% +0.14% / -0.07% +0.54% +0.69%] index_fill_ reverse : Elapsed 0.028 ms (2.762 ms / 100) 2.743 -> 2.744 ( +0.04%) [ +0.04% +0.15% +0.00% / +0.04% +0.22% +0.29%] index_fill_ skip64 : Elapsed 0.027 ms (2.744 ms / 100) 2.748 -> 2.745 ( -0.11%) [ +0.04% +0.11% +0.00% / +0.18% +0.04% -0.11%] index_fill_ skip256 : Elapsed 0.027 ms (2.749 ms / 100) 2.770 -> 2.761 ( -0.32%) [ +0.00% +0.04% +0.00% / +0.07% -0.32% -0.25%] index_fill_ spread : Elapsed 0.028 ms (2.770 ms / 100) 2.776 -> 2.777 ( +0.04%) [ +0.04% +0.14% +0.00% / +0.04% +0.14% +0.04%] index_fill_ strided 3 : Elapsed 0.028 ms (2.777 ms / 100) 2.767 -> 2.764 ( -0.11%) [ +0.11% +0.00% +0.07% / +0.25% -0.11% -0.11%] index_fill_ random : Elapsed 0.028 ms (2.770 ms / 100) 2.759 -> 2.763 ( +0.14%) [ +0.11% +0.00% +0.00% / +0.14% +0.47% +0.36%] index_fill_ random_sorted : Elapsed 0.028 ms (2.762 ms / 100) out_shape = [40, 16, 4, 20] in_shape = [40, 16, 5, 20] idx_dim = 2 B = [40, 16, 4, 20] (stride (1280, 1, 16, 64)) A = [40, 16, 5, 20] (stride (1, 4000, 800, 40)) dim = 2 5.742 -> 5.744 ( +0.03%) [ +0.00% +0.07% +0.03% / +0.03% +0.56% +0.30%] index_select const : Elapsed 0.057 ms (5.742 ms / 100) 5.816 -> 5.795 ( -0.36%) [ +0.15% +0.14% +0.00% / +0.17% -0.36% -0.28%] index_select wrap : Elapsed 0.058 ms (5.825 ms / 100) 5.811 -> 5.796 ( -0.26%) [ +0.12% +0.14% +0.00% / +0.09% -0.26% -0.22%] index_select linear : Elapsed 0.058 ms (5.818 ms / 100) 5.824 -> 5.791 ( -0.57%) [ +0.10% +0.00% +0.05% / +0.10% -0.57% -0.53%] index_select reverse : Elapsed 0.058 ms (5.830 ms / 100) 5.744 -> 5.744 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.44% +0.35%] index_select skip64 : Elapsed 0.057 ms (5.749 ms / 100) 5.743 -> 5.741 ( -0.03%) [ +0.07% +0.03% +0.00% / -0.03% +0.52% +0.40%] index_select skip256 : Elapsed 0.057 ms (5.747 ms / 100) 5.809 -> 5.797 ( -0.21%) [ +0.00% +0.12% +0.15% / +0.19% -0.07% -0.21%] index_select spread : Elapsed 0.058 ms (5.809 ms / 100) 5.787 -> 5.774 ( -0.22%) [ +0.07% +0.00% +0.12% / +0.10% -0.09% -0.22%] index_select strided 3 : Elapsed 0.058 ms (5.791 ms / 100) 5.784 -> 5.764 ( -0.35%) [ +0.00% +0.05% +0.03% / +0.28% -0.35% -0.24%] index_select random : Elapsed 0.058 ms (5.784 ms / 100) 5.774 -> 5.750 ( -0.42%) [ +0.00% +0.03% +0.02% / +0.02% -0.42% -0.26%] index_select random_sorted : Elapsed 0.058 ms (5.774 ms / 100) 5.782 -> 5.784 ( +0.03%) [ +0.00% +0.05% +0.21% / +0.22% +0.03% +0.21%] index_select perm : Elapsed 0.058 ms (5.782 ms / 100) 5.803 -> 5.793 ( -0.17%) [ +0.09% +0.03% +0.00% / +0.02% -0.16% -0.17%] index_select perm_sorted : Elapsed 0.058 ms (5.808 ms / 100) B = [40, 16, 4, 20] (stride (1280, 1, 16, 64)) A = [40, 16, 5, 20] (stride (16, 1, 640, 3200)) dim = 2 5.527 -> 5.524 ( -0.05%) [ +0.04% +0.00% +0.02% / -0.05% +0.18% +0.00%] index_select const : Elapsed 0.055 ms (5.529 ms / 100) 5.576 -> 5.579 ( +0.05%) [ +0.00% +0.14% +0.14% / +0.18% +0.14% +0.05%] index_select wrap : Elapsed 0.056 ms (5.576 ms / 100) 5.579 -> 5.581 ( +0.04%) [ +0.00% +0.02% +0.13% / +0.04% +0.13% +0.11%] index_select linear : Elapsed 0.056 ms (5.579 ms / 100) 5.573 -> 5.576 ( +0.05%) [ +0.00% +0.02% +0.11% / +0.20% +0.14% +0.05%] index_select reverse : Elapsed 0.056 ms (5.573 ms / 100) 5.522 -> 5.532 ( +0.18%) [ +0.22% +0.00% +0.04% / +0.18% +0.22% +0.27%] index_select skip64 : Elapsed 0.055 ms (5.534 ms / 100) 5.521 -> 5.527 ( +0.11%) [ +0.07% +0.04% +0.00% / +0.11% +0.31% +0.36%] index_select skip256 : Elapsed 0.055 ms (5.525 ms / 100) 5.577 -> 5.582 ( +0.09%) [ +0.00% +0.07% +0.04% / +0.09% +0.09% +0.20%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.571 -> 5.571 ( +0.00%) [ +0.09% +0.04% +0.00% / +0.00% +0.11% +0.18%] index_select strided 3 : Elapsed 0.056 ms (5.576 ms / 100) 5.576 -> 5.560 ( -0.29%) [ +0.25% +0.00% +0.07% / +0.13% -0.29% -0.18%] index_select random : Elapsed 0.056 ms (5.590 ms / 100) 5.555 -> 5.548 ( -0.13%) [ +0.02% +0.00% +0.20% / +0.16% -0.13% +0.00%] index_select random_sorted : Elapsed 0.056 ms (5.556 ms / 100) 5.576 -> 5.572 ( -0.07%) [ +0.13% +0.00% +0.07% / +0.09% -0.07% +0.05%] index_select perm : Elapsed 0.056 ms (5.583 ms / 100) 5.576 -> 5.576 ( +0.00%) [ +0.00% +0.02% +0.25% / +0.05% +0.00% +0.14%] index_select perm_sorted : Elapsed 0.056 ms (5.576 ms / 100) B = [40, 16, 4, 20] (stride (1, 160, 40, 2560)) A = [40, 16, 5, 20] (stride (5, 4000, 1, 200)) dim = 2 5.994 -> 6.000 ( +0.10%) [ +0.00% +0.03% +0.15% / +0.15% +0.10% +0.23%] index_select const : Elapsed 0.060 ms (5.994 ms / 100) 5.993 -> 6.001 ( +0.13%) [ +0.12% +0.00% +0.17% / +0.13% +0.17% +0.18%] index_select wrap : Elapsed 0.060 ms (6.000 ms / 100) 5.998 -> 5.994 ( -0.07%) [ +0.05% +0.00% +0.00% / -0.07% +0.12% +0.23%] index_select linear : Elapsed 0.060 ms (6.001 ms / 100) 5.993 -> 5.995 ( +0.03%) [ +0.00% +0.03% +0.07% / +0.03% +0.15% +0.05%] index_select reverse : Elapsed 0.060 ms (5.993 ms / 100) 5.995 -> 5.999 ( +0.07%) [ +0.00% +0.05% +0.20% / +0.07% +0.28% +0.12%] index_select skip64 : Elapsed 0.060 ms (5.995 ms / 100) 5.997 -> 6.001 ( +0.07%) [ +0.08% +0.05% +0.00% / +0.07% +0.12% +0.18%] index_select skip256 : Elapsed 0.060 ms (6.002 ms / 100) 5.997 -> 6.004 ( +0.12%) [ +0.13% +0.08% +0.00% / +0.22% +0.12% +0.17%] index_select spread : Elapsed 0.060 ms (6.005 ms / 100) 5.998 -> 5.999 ( +0.02%) [ +0.02% +0.13% +0.00% / +0.02% +0.15% +0.13%] index_select strided 3 : Elapsed 0.060 ms (5.999 ms / 100) 5.989 -> 5.997 ( +0.13%) [ +0.07% +0.00% +0.15% / +0.13% +0.22% +0.27%] index_select random : Elapsed 0.060 ms (5.993 ms / 100) 5.989 -> 5.995 ( +0.10%) [ +0.10% +0.00% +0.23% / +0.10% +0.38% +0.30%] index_select random_sorted : Elapsed 0.060 ms (5.995 ms / 100) 5.995 -> 6.001 ( +0.10%) [ +0.02% +0.00% +0.03% / +0.10% +0.13% +0.23%] index_select perm : Elapsed 0.060 ms (5.996 ms / 100) 6.002 -> 6.000 ( -0.03%) [ +0.07% +0.00% +0.15% / -0.03% +0.13% +0.02%] index_select perm_sorted : Elapsed 0.060 ms (6.006 ms / 100) out_shape = [40, 16, 5, 4] in_shape = [40, 16, 5, 20] idx_dim = 3 B = [40, 16, 5, 4] (stride (320, 5, 1, 80)) A = [40, 16, 5, 20] (stride (1, 800, 12800, 40)) dim = 3 2.047 -> 2.051 ( +0.20%) [ +0.15% +0.00% +0.15% / +0.20% +0.68% +0.73%] index_select const : Elapsed 0.021 ms (2.050 ms / 100) 2.067 -> 2.071 ( +0.19%) [ +0.00% +0.05% +0.15% / +0.19% +0.39% +0.48%] index_select wrap : Elapsed 0.021 ms (2.067 ms / 100) 2.063 -> 2.069 ( +0.29%) [ +0.24% +0.29% +0.00% / +0.29% +0.73% +0.63%] index_select linear : Elapsed 0.021 ms (2.068 ms / 100) 2.067 -> 2.071 ( +0.19%) [ +0.29% +0.34% +0.00% / +0.19% +0.53% +0.77%] index_select reverse : Elapsed 0.021 ms (2.073 ms / 100) 2.051 -> 2.050 ( -0.05%) [ +0.10% +0.00% +0.05% / -0.05% +0.34% +0.34%] index_select skip64 : Elapsed 0.021 ms (2.053 ms / 100) 2.050 -> 2.051 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.44% +0.59%] index_select skip256 : Elapsed 0.021 ms (2.050 ms / 100) 2.068 -> 2.066 ( -0.10%) [ +0.29% +0.00% +0.00% / -0.10% +0.48% +0.48%] index_select spread : Elapsed 0.021 ms (2.074 ms / 100) 2.068 -> 2.067 ( -0.05%) [ +0.19% +0.00% +0.15% / -0.05% +0.53% +0.48%] index_select strided 3 : Elapsed 0.021 ms (2.072 ms / 100) 2.063 -> 2.070 ( +0.34%) [ +0.29% +0.24% +0.00% / +0.34% +0.53% +0.68%] index_select strided 5 : Elapsed 0.021 ms (2.069 ms / 100) 2.064 -> 2.071 ( +0.34%) [ +0.24% +0.19% +0.00% / +0.34% +0.73% +0.68%] index_select strided 7 : Elapsed 0.021 ms (2.069 ms / 100) 2.072 -> 2.075 ( +0.14%) [ +0.05% +0.00% +0.05% / +0.14% +0.39% +0.53%] index_select strided 8 : Elapsed 0.021 ms (2.073 ms / 100) 2.065 -> 2.068 ( +0.15%) [ +0.19% +0.19% +0.00% / +0.15% +1.02% +0.82%] index_select strided 16 : Elapsed 0.021 ms (2.069 ms / 100) 2.059 -> 2.062 ( +0.15%) [ +0.24% +0.00% +0.05% / +0.15% +0.78% +0.87%] index_select random : Elapsed 0.021 ms (2.064 ms / 100) 2.064 -> 2.069 ( +0.24%) [ +0.10% +0.00% +0.00% / +0.24% +0.63% +0.58%] index_select random_sorted : Elapsed 0.021 ms (2.066 ms / 100) 2.068 -> 2.070 ( +0.10%) [ +0.15% +0.00% +0.10% / +0.10% +0.53% +0.39%] index_select perm : Elapsed 0.021 ms (2.071 ms / 100) 2.064 -> 2.061 ( -0.15%) [ +0.00% +0.00% +0.10% / -0.15% +0.44% +0.34%] index_select perm_sorted : Elapsed 0.021 ms (2.064 ms / 100) B = [40, 16, 5, 4] (stride (20, 800, 1, 5)) A = [40, 16, 5, 20] (stride (1600, 20, 320, 1)) dim = 3 2.174 -> 2.170 ( -0.18%) [ +0.00% +0.46% +0.05% / +0.46% -0.18% +0.00%] index_select const : Elapsed 0.022 ms (2.174 ms / 100) 2.176 -> 2.169 ( -0.32%) [ +0.23% +0.00% +0.05% / +0.14% -0.18% -0.32%] index_select wrap : Elapsed 0.022 ms (2.181 ms / 100) 2.170 -> 2.165 ( -0.23%) [ +0.14% +0.00% +0.23% / +0.32% -0.23% +0.05%] index_select linear : Elapsed 0.022 ms (2.173 ms / 100) 2.172 -> 2.167 ( -0.23%) [ +0.09% +0.64% +0.00% / +0.64% +0.00% -0.23%] index_select reverse : Elapsed 0.022 ms (2.174 ms / 100) 2.177 -> 2.175 ( -0.09%) [ +0.14% +0.00% +0.28% / +0.05% -0.09% -0.05%] index_select skip64 : Elapsed 0.022 ms (2.180 ms / 100) 2.173 -> 2.170 ( -0.14%) [ +0.32% +0.32% +0.00% / +0.23% -0.14% -0.05%] index_select skip256 : Elapsed 0.022 ms (2.180 ms / 100) 2.240 -> 2.229 ( -0.49%) [ +0.09% +0.00% +0.04% / +0.13% -0.49% -0.45%] index_select spread : Elapsed 0.022 ms (2.242 ms / 100) 2.218 -> 2.208 ( -0.45%) [ +0.32% +0.00% +0.05% / +0.32% -0.45% -0.32%] index_select strided 3 : Elapsed 0.022 ms (2.225 ms / 100) 2.240 -> 2.226 ( -0.63%) [ +0.22% +0.04% +0.00% / -0.13% -0.63% -0.58%] index_select strided 5 : Elapsed 0.022 ms (2.245 ms / 100) 2.213 -> 2.201 ( -0.54%) [ +0.23% +0.00% +0.05% / +0.32% -0.45% -0.54%] index_select strided 7 : Elapsed 0.022 ms (2.218 ms / 100) 2.222 -> 2.208 ( -0.63%) [ +0.05% +0.00% +0.18% / +0.00% -0.45% -0.63%] index_select strided 8 : Elapsed 0.022 ms (2.223 ms / 100) 2.237 -> 2.230 ( -0.31%) [ +0.27% +0.00% +0.18% / +0.09% -0.31% -0.27%] index_select strided 16 : Elapsed 0.022 ms (2.243 ms / 100) 2.210 -> 2.208 ( -0.09%) [ +0.23% +0.00% +0.14% / +0.41% -0.09% +0.05%] index_select random : Elapsed 0.022 ms (2.215 ms / 100) 2.214 -> 2.208 ( -0.27%) [ +0.23% +0.09% +0.00% / +0.23% +0.09% -0.27%] index_select random_sorted : Elapsed 0.022 ms (2.219 ms / 100) 2.212 -> 2.208 ( -0.18%) [ +0.50% +0.00% +0.36% / +0.59% -0.18% +0.00%] index_select perm : Elapsed 0.022 ms (2.223 ms / 100) 2.215 -> 2.201 ( -0.63%) [ +0.18% +0.00% +0.05% / +0.14% -0.27% -0.63%] index_select perm_sorted : Elapsed 0.022 ms (2.219 ms / 100) B = [40, 16, 5, 4] (stride (1, 160, 2560, 40)) A = [40, 16, 5, 20] (stride (16, 1, 12800, 640)) dim = 3 1.957 -> 1.956 ( -0.05%) [ +0.00% +0.26% +0.15% / +0.26% -0.05% +0.00%] index_select const : Elapsed 0.020 ms (1.957 ms / 100) 1.993 -> 1.989 ( -0.20%) [ +0.00% +0.05% +0.00% / -0.20% +0.30% +0.20%] index_select wrap : Elapsed 0.020 ms (1.993 ms / 100) 1.988 -> 1.992 ( +0.20%) [ +0.20% +0.00% +0.10% / +0.20% +0.45% +0.50%] index_select linear : Elapsed 0.020 ms (1.992 ms / 100) 1.988 -> 1.992 ( +0.20%) [ +0.00% +0.00% +0.15% / +0.20% +0.45% +0.35%] index_select reverse : Elapsed 0.020 ms (1.988 ms / 100) 1.952 -> 1.960 ( +0.41%) [ +0.41% +0.46% +0.00% / +0.41% +0.51% +0.51%] index_select skip64 : Elapsed 0.020 ms (1.960 ms / 100) 1.958 -> 1.957 ( -0.05%) [ +0.05% +0.10% +0.00% / -0.05% +0.05% +0.00%] index_select skip256 : Elapsed 0.020 ms (1.959 ms / 100) 1.992 -> 1.988 ( -0.20%) [ +0.00% +0.15% +0.15% / +0.25% -0.20% -0.20%] index_select spread : Elapsed 0.020 ms (1.992 ms / 100) 1.993 -> 1.984 ( -0.45%) [ +0.00% +0.05% +0.10% / +0.10% -0.40% -0.45%] index_select strided 3 : Elapsed 0.020 ms (1.993 ms / 100) 1.993 -> 1.988 ( -0.25%) [ +0.00% +0.05% +0.00% / +0.20% -0.25% +0.00%] index_select strided 5 : Elapsed 0.020 ms (1.993 ms / 100) 1.997 -> 1.991 ( -0.30%) [ +0.00% +0.05% +0.00% / +0.10% -0.15% -0.30%] index_select strided 7 : Elapsed 0.020 ms (1.997 ms / 100) 1.999 -> 1.996 ( -0.15%) [ +0.20% +0.00% +0.00% / -0.10% -0.05% -0.15%] index_select strided 8 : Elapsed 0.020 ms (2.003 ms / 100) 1.995 -> 1.992 ( -0.15%) [ +0.10% +0.00% +0.05% / -0.05% -0.15% -0.10%] index_select strided 16 : Elapsed 0.020 ms (1.997 ms / 100) 1.991 -> 1.995 ( +0.20%) [ +0.00% +0.20% +0.15% / +0.20% +0.25% +0.45%] index_select random : Elapsed 0.020 ms (1.991 ms / 100) 1.990 -> 1.993 ( +0.15%) [ +0.35% +0.00% +0.20% / +0.15% +0.35% +0.35%] index_select random_sorted : Elapsed 0.020 ms (1.997 ms / 100) 1.997 -> 1.996 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.35% +0.15%] index_select perm : Elapsed 0.020 ms (1.998 ms / 100) 1.993 -> 1.997 ( +0.20%) [ +0.00% +0.10% +0.00% / +0.30% +0.25% +0.20%] index_select perm_sorted : Elapsed 0.020 ms (1.993 ms / 100) B = [40, 16, 5, 4] (stride (1, 200, 40, 3200)) A = [40, 16, 5, 20] (stride (80, 1, 16, 3200)) dim = 3 1.903 -> 1.908 ( +0.26%) [ +0.00% +0.68% +0.21% / +0.26% +1.31% +1.26%] index_select const : Elapsed 0.019 ms (1.903 ms / 100) 1.915 -> 1.925 ( +0.52%) [ +0.21% +0.21% +0.00% / +0.52% +1.31% +1.15%] index_select wrap : Elapsed 0.019 ms (1.919 ms / 100) 1.928 -> 1.929 ( +0.05%) [ +0.00% +0.05% +0.10% / +0.05% +0.99% +1.04%] index_select linear : Elapsed 0.019 ms (1.928 ms / 100) 1.921 -> 1.928 ( +0.36%) [ +0.73% +0.57% +0.00% / +0.36% +1.77% +1.77%] index_select reverse : Elapsed 0.019 ms (1.935 ms / 100) 1.907 -> 1.913 ( +0.31%) [ +0.31% +0.26% +0.00% / +0.31% +1.21% +1.31%] index_select skip64 : Elapsed 0.019 ms (1.913 ms / 100) 1.908 -> 1.914 ( +0.31%) [ +0.26% +0.00% +0.10% / +0.31% +0.84% +0.94%] index_select skip256 : Elapsed 0.019 ms (1.913 ms / 100) 1.931 -> 1.933 ( +0.10%) [ +0.26% +0.00% +0.31% / +0.10% +0.21% +0.21%] index_select spread : Elapsed 0.019 ms (1.936 ms / 100) 1.947 -> 1.948 ( +0.05%) [ +0.15% +0.00% +0.05% / +0.05% +0.31% +0.26%] index_select strided 3 : Elapsed 0.020 ms (1.950 ms / 100) 1.947 -> 1.948 ( +0.05%) [ +0.00% +0.15% +0.15% / +0.41% +0.36% +0.05%] index_select strided 5 : Elapsed 0.019 ms (1.947 ms / 100) 1.928 -> 1.927 ( -0.05%) [ +0.31% +0.00% +0.26% / -0.05% +0.93% +0.83%] index_select strided 7 : Elapsed 0.019 ms (1.934 ms / 100) 1.952 -> 1.954 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.10% +0.26% +0.46%] index_select strided 8 : Elapsed 0.020 ms (1.952 ms / 100) 1.933 -> 1.934 ( +0.05%) [ +0.00% +0.16% +0.31% / +0.05% +0.26% +0.10%] index_select strided 16 : Elapsed 0.019 ms (1.933 ms / 100) 1.941 -> 1.939 ( -0.10%) [ +0.41% +0.26% +0.00% / +0.72% +0.21% -0.10%] index_select random : Elapsed 0.019 ms (1.949 ms / 100) 1.951 -> 1.950 ( -0.05%) [ +0.15% +0.10% +0.00% / -0.05% +0.56% +0.46%] index_select random_sorted : Elapsed 0.020 ms (1.954 ms / 100) 1.938 -> 1.944 ( +0.31%) [ +0.10% +0.00% +0.15% / +0.31% +0.88% +1.03%] index_select perm : Elapsed 0.019 ms (1.940 ms / 100) 1.924 -> 1.925 ( +0.05%) [ +0.00% +0.21% +0.05% / +0.05% +1.04% +1.25%] index_select perm_sorted : Elapsed 0.019 ms (1.924 ms / 100) out_shape = [4, 16, 20, 5] in_shape = [40, 16, 20, 5] idx_dim = 0 B = [4, 16, 20, 5] (stride (1600, 1, 16, 320)) A = [40, 16, 20, 5] (stride (1, 200, 3200, 40)) dim = 0 1.365 -> 1.365 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.81% +0.66%] index_select const : Elapsed 0.014 ms (1.367 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.66% +0.66%] index_select wrap : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.66% +0.59%] index_select linear : Elapsed 0.014 ms (1.368 ms / 100) 1.365 -> 1.367 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.66% +0.59%] index_select reverse : Elapsed 0.014 ms (1.366 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.15% +0.00% +0.07% / +0.15% +0.81% +0.66%] index_select skip64 : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.73% +0.59%] index_select skip256 : Elapsed 0.014 ms (1.367 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.73% +0.80%] index_select spread : Elapsed 0.014 ms (1.369 ms / 100) 1.367 -> 1.370 ( +0.22%) [ +0.07% +0.00% +0.00% / +0.22% +0.66% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.368 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.51% +0.59%] index_select strided 5 : Elapsed 0.014 ms (1.369 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.66% +0.59%] index_select strided 7 : Elapsed 0.014 ms (1.368 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.58% +0.58%] index_select strided 8 : Elapsed 0.014 ms (1.369 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.44% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.369 ms / 100) 1.369 -> 1.376 ( +0.51%) [ +0.00% +0.00% +0.00% / +0.51% +0.51% +0.58%] index_select random : Elapsed 0.014 ms (1.369 ms / 100) 1.367 -> 1.368 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.66% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.368 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.66%] index_select perm : Elapsed 0.014 ms (1.367 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.44% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.368 ms / 100) B = [4, 16, 20, 5] (stride (1, 400, 4, 80)) A = [40, 16, 20, 5] (stride (80, 1, 3200, 16)) dim = 0 1.322 -> 1.322 ( +0.00%) [ +0.23% +0.15% +0.00% / +0.00% +0.45% +0.45%] index_select const : Elapsed 0.013 ms (1.325 ms / 100) 1.317 -> 1.320 ( +0.23%) [ +0.23% +0.15% +0.00% / +0.23% +0.23% +0.23%] index_select wrap : Elapsed 0.013 ms (1.320 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.08% +0.30% +0.00% / +0.15% +0.53% +0.53%] index_select linear : Elapsed 0.013 ms (1.319 ms / 100) 1.315 -> 1.319 ( +0.30%) [ +0.15% +0.23% +0.00% / +0.30% +0.38% +0.46%] index_select reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.325 -> 1.324 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.75% +0.45%] index_select skip64 : Elapsed 0.013 ms (1.326 ms / 100) 1.326 -> 1.330 ( +0.30%) [ +0.00% +0.08% +0.08% / +0.30% +0.68% +0.68%] index_select skip256 : Elapsed 0.013 ms (1.326 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.08% +0.00% +1.14% / +0.08% +0.46% +0.68%] index_select spread : Elapsed 0.013 ms (1.319 ms / 100) 1.320 -> 1.323 ( +0.23%) [ +0.30% +0.00% +0.23% / +0.23% +0.61% +0.91%] index_select strided 3 : Elapsed 0.013 ms (1.324 ms / 100) 1.324 -> 1.326 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +0.45% +0.38%] index_select strided 5 : Elapsed 0.013 ms (1.326 ms / 100) 1.313 -> 1.317 ( +0.30%) [ +0.30% +0.38% +0.00% / +0.30% +0.53% +0.38%] index_select strided 7 : Elapsed 0.013 ms (1.317 ms / 100) 1.325 -> 1.326 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.53% +0.45%] index_select strided 8 : Elapsed 0.013 ms (1.326 ms / 100) 1.325 -> 1.325 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.45% +0.45%] index_select strided 16 : Elapsed 0.013 ms (1.326 ms / 100) 1.320 -> 1.318 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.15% +0.08%] index_select random : Elapsed 0.013 ms (1.320 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.30% +0.30%] index_select random_sorted : Elapsed 0.013 ms (1.320 ms / 100) 1.316 -> 1.318 ( +0.15%) [ +0.00% +0.23% +0.00% / +0.15% +0.30% +0.30%] index_select perm : Elapsed 0.013 ms (1.316 ms / 100) 1.326 -> 1.327 ( +0.08%) [ +0.23% +0.15% +0.00% / +0.08% +0.53% +0.68%] index_select perm_sorted : Elapsed 0.013 ms (1.329 ms / 100) B = [4, 16, 20, 5] (stride (80, 1, 320, 16)) A = [40, 16, 20, 5] (stride (320, 20, 1, 12800)) dim = 0 1.194 -> 1.194 ( +0.00%) [ +0.34% +0.08% +0.00% / +0.00% +0.75% +0.84%] index_select const : Elapsed 0.012 ms (1.198 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.42%] index_select wrap : Elapsed 0.012 ms (1.198 ms / 100) 1.196 -> 1.198 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.50% +0.50%] index_select linear : Elapsed 0.012 ms (1.197 ms / 100) 1.196 -> 1.197 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.59% +0.50%] index_select reverse : Elapsed 0.012 ms (1.198 ms / 100) 1.194 -> 1.195 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.75% +0.67%] index_select skip64 : Elapsed 0.012 ms (1.196 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.42% +0.50%] index_select skip256 : Elapsed 0.012 ms (1.197 ms / 100) 1.197 -> 1.198 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.33% +0.33%] index_select spread : Elapsed 0.012 ms (1.198 ms / 100) 1.197 -> 1.199 ( +0.17%) [ +0.08% +0.00% +0.00% / +0.17% +0.33% +0.33%] index_select strided 3 : Elapsed 0.012 ms (1.198 ms / 100) 1.198 -> 1.198 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.42% +0.25%] index_select strided 5 : Elapsed 0.012 ms (1.200 ms / 100) 1.196 -> 1.197 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.50% +0.42%] index_select strided 7 : Elapsed 0.012 ms (1.197 ms / 100) 1.196 -> 1.197 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.59% +0.59%] index_select strided 8 : Elapsed 0.012 ms (1.197 ms / 100) 1.197 -> 1.197 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.33% +0.42%] index_select strided 16 : Elapsed 0.012 ms (1.198 ms / 100) 1.197 -> 1.199 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.42% +0.50%] index_select random : Elapsed 0.012 ms (1.199 ms / 100) 1.196 -> 1.199 ( +0.25%) [ +0.17% +0.08% +0.00% / +0.25% +0.59% +0.59%] index_select random_sorted : Elapsed 0.012 ms (1.198 ms / 100) 1.195 -> 1.197 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.59% +0.59%] index_select perm : Elapsed 0.012 ms (1.197 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.08% +0.00% +0.17% / +0.00% +0.59% +0.59%] index_select perm_sorted : Elapsed 0.012 ms (1.197 ms / 100) out_shape = [40, 4, 20, 5] in_shape = [40, 16, 20, 5] idx_dim = 1 B = [40, 4, 20, 5] (stride (400, 20, 1, 80)) A = [40, 16, 20, 5] (stride (1600, 20, 1, 320)) dim = 1 2.428 -> 2.434 ( +0.25%) [ +0.00% +0.00% +0.04% / +0.25% +0.25% +0.33%] index_select const : Elapsed 0.024 ms (2.428 ms / 100) 2.467 -> 2.470 ( +0.12%) [ +0.28% +0.08% +0.00% / +0.12% +0.28% +0.28%] index_select wrap : Elapsed 0.025 ms (2.474 ms / 100) 2.474 -> 2.476 ( +0.08%) [ +0.00% +0.08% +0.12% / +0.08% +0.28% +0.36%] index_select linear : Elapsed 0.025 ms (2.474 ms / 100) 2.474 -> 2.479 ( +0.20%) [ +0.00% +0.04% +0.08% / +0.20% +0.44% +0.28%] index_select reverse : Elapsed 0.025 ms (2.474 ms / 100) 2.417 -> 2.424 ( +0.29%) [ +0.29% +0.00% +0.37% / +0.29% +0.54% +0.33%] index_select skip64 : Elapsed 0.024 ms (2.424 ms / 100) 2.423 -> 2.428 ( +0.21%) [ +0.29% +0.00% +0.17% / +0.21% +0.33% +0.50%] index_select skip256 : Elapsed 0.024 ms (2.430 ms / 100) 2.475 -> 2.483 ( +0.32%) [ +0.08% +0.00% +0.12% / +0.40% +0.44% +0.32%] index_select spread : Elapsed 0.025 ms (2.477 ms / 100) 2.480 -> 2.475 ( -0.20%) [ +0.00% +0.04% +0.00% / -0.20% +0.36% +0.44%] index_select strided 3 : Elapsed 0.025 ms (2.480 ms / 100) 2.471 -> 2.473 ( +0.08%) [ +0.08% +0.00% +0.24% / +0.08% +0.61% +0.53%] index_select strided 5 : Elapsed 0.025 ms (2.473 ms / 100) 2.474 -> 2.476 ( +0.08%) [ +0.08% +0.00% +0.28% / +0.08% +0.44% +0.49%] index_select strided 7 : Elapsed 0.025 ms (2.476 ms / 100) 2.438 -> 2.441 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.78% +0.37%] index_select strided 8 : Elapsed 0.024 ms (2.439 ms / 100) 2.470 -> 2.467 ( -0.12%) [ +0.00% +0.00% +0.04% / -0.12% +0.28% +0.04%] index_select random : Elapsed 0.025 ms (2.470 ms / 100) 2.464 -> 2.468 ( +0.16%) [ +0.16% +0.00% +0.24% / +0.16% +0.53% +0.32%] index_select random_sorted : Elapsed 0.025 ms (2.468 ms / 100) 2.471 -> 2.471 ( +0.00%) [ +0.28% +0.00% +0.24% / +0.00% +0.24% +0.32%] index_select perm : Elapsed 0.025 ms (2.478 ms / 100) 2.472 -> 2.471 ( -0.04%) [ +0.28% +0.04% +0.00% / -0.04% +0.28% +0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.479 ms / 100) B = [40, 4, 20, 5] (stride (400, 20, 1, 80)) A = [40, 16, 20, 5] (stride (1, 200, 3200, 40)) dim = 1 2.543 -> 2.545 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.51% +0.39%] index_select const : Elapsed 0.025 ms (2.543 ms / 100) 2.548 -> 2.549 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.16% +0.39%] index_select wrap : Elapsed 0.025 ms (2.550 ms / 100) 2.548 -> 2.548 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.27% +0.24%] index_select linear : Elapsed 0.025 ms (2.549 ms / 100) 2.547 -> 2.549 ( +0.08%) [ +0.00% +0.20% +0.04% / +0.08% +0.24% +0.31%] index_select reverse : Elapsed 0.025 ms (2.547 ms / 100) 2.541 -> 2.545 ( +0.16%) [ +0.08% +0.12% +0.00% / +0.16% +0.35% +0.47%] index_select skip64 : Elapsed 0.025 ms (2.543 ms / 100) 2.542 -> 2.546 ( +0.16%) [ +0.24% +0.08% +0.00% / +0.16% +0.55% +0.47%] index_select skip256 : Elapsed 0.025 ms (2.548 ms / 100) 2.549 -> 2.552 ( +0.12%) [ +0.20% +0.00% +0.00% / +0.12% +0.43% +0.55%] index_select spread : Elapsed 0.026 ms (2.554 ms / 100) 2.547 -> 2.547 ( +0.00%) [ +0.20% +0.27% +0.00% / +0.00% +0.27% +0.47%] index_select strided 3 : Elapsed 0.026 ms (2.552 ms / 100) 2.550 -> 2.555 ( +0.20%) [ +0.04% +0.12% +0.00% / +0.20% +0.43% +0.35%] index_select strided 5 : Elapsed 0.026 ms (2.551 ms / 100) 2.545 -> 2.547 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.43% +0.59%] index_select strided 7 : Elapsed 0.025 ms (2.548 ms / 100) 2.542 -> 2.544 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.55% +0.59%] index_select strided 8 : Elapsed 0.025 ms (2.543 ms / 100) 2.548 -> 2.548 ( +0.00%) [ +0.16% +0.04% +0.00% / +0.00% +0.27% +0.31%] index_select random : Elapsed 0.026 ms (2.552 ms / 100) 2.546 -> 2.548 ( +0.08%) [ +0.20% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_select random_sorted : Elapsed 0.026 ms (2.551 ms / 100) 2.547 -> 2.550 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +0.55% +0.43%] index_select perm : Elapsed 0.025 ms (2.547 ms / 100) 2.548 -> 2.553 ( +0.20%) [ +0.20% +0.08% +0.00% / +0.20% +0.43% +0.47%] index_select perm_sorted : Elapsed 0.026 ms (2.553 ms / 100) B = [40, 4, 20, 5] (stride (100, 4000, 1, 20)) A = [40, 16, 20, 5] (stride (16, 1, 3200, 640)) dim = 1 2.472 -> 2.469 ( -0.12%) [ +0.04% +0.00% +0.04% / -0.12% +0.40% +0.44%] index_select const : Elapsed 0.025 ms (2.473 ms / 100) 2.473 -> 2.478 ( +0.20%) [ +0.00% +0.04% +0.00% / +0.20% +0.65% +0.53%] index_select wrap : Elapsed 0.025 ms (2.473 ms / 100) 2.470 -> 2.473 ( +0.12%) [ +0.00% +0.04% +0.12% / +0.12% +0.77% +0.53%] index_select linear : Elapsed 0.025 ms (2.470 ms / 100) 2.468 -> 2.471 ( +0.12%) [ +0.16% +0.20% +0.00% / +0.12% +0.85% +1.09%] index_select reverse : Elapsed 0.025 ms (2.472 ms / 100) 2.466 -> 2.472 ( +0.24%) [ +0.24% +0.12% +0.00% / +0.24% +0.77% +1.01%] index_select skip64 : Elapsed 0.025 ms (2.472 ms / 100) 2.470 -> 2.472 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.57% +0.73%] index_select skip256 : Elapsed 0.025 ms (2.470 ms / 100) 2.503 -> 2.511 ( +0.32%) [ +0.12% +0.00% +0.08% / +0.32% +0.84% +0.80%] index_select spread : Elapsed 0.025 ms (2.506 ms / 100) 2.506 -> 2.512 ( +0.24%) [ +0.08% +0.28% +0.00% / +0.24% +0.76% +0.72%] index_select strided 3 : Elapsed 0.025 ms (2.508 ms / 100) 2.506 -> 2.509 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.72% +0.52%] index_select strided 5 : Elapsed 0.025 ms (2.506 ms / 100) 2.509 -> 2.506 ( -0.12%) [ +0.12% +0.08% +0.00% / -0.12% +0.52% +0.60%] index_select strided 7 : Elapsed 0.025 ms (2.512 ms / 100) 2.517 -> 2.518 ( +0.04%) [ +0.16% +0.00% +0.00% / +0.04% +0.56% +0.64%] index_select strided 8 : Elapsed 0.025 ms (2.521 ms / 100) 2.507 -> 2.507 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.64% +0.72%] index_select random : Elapsed 0.025 ms (2.509 ms / 100) 2.510 -> 2.511 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.56% +0.48%] index_select random_sorted : Elapsed 0.025 ms (2.510 ms / 100) 2.517 -> 2.523 ( +0.24%) [ +0.12% +0.12% +0.00% / +0.24% +0.56% +0.72%] index_select perm : Elapsed 0.025 ms (2.520 ms / 100) 2.507 -> 2.509 ( +0.08%) [ +0.12% +0.00% +0.00% / +0.08% +0.68% +0.44%] index_select perm_sorted : Elapsed 0.025 ms (2.510 ms / 100) B = [40, 4, 20, 5] (stride (5, 4000, 200, 1)) A = [40, 16, 20, 5] (stride (20, 800, 1, 12800)) dim = 1 2.391 -> 2.390 ( -0.04%) [ +0.04% +0.00% +0.13% / -0.04% +0.08% +0.25%] index_select const : Elapsed 0.024 ms (2.392 ms / 100) 2.445 -> 2.439 ( -0.25%) [ +0.00% +0.00% +0.04% / -0.25% +0.20% +0.29%] index_select wrap : Elapsed 0.024 ms (2.445 ms / 100) 2.433 -> 2.440 ( +0.29%) [ +0.00% +0.21% +0.08% / +0.29% +0.41% +0.49%] index_select linear : Elapsed 0.024 ms (2.433 ms / 100) 2.436 -> 2.440 ( +0.16%) [ +0.04% +0.29% +0.00% / +0.25% +0.16% +0.25%] index_select reverse : Elapsed 0.024 ms (2.437 ms / 100) 2.393 -> 2.391 ( -0.08%) [ +0.04% +0.13% +0.00% / -0.08% +0.17% +0.17%] index_select skip64 : Elapsed 0.024 ms (2.394 ms / 100) 2.391 -> 2.392 ( +0.04%) [ +0.13% +0.00% +0.00% / +0.08% +0.21% +0.04%] index_select skip256 : Elapsed 0.024 ms (2.394 ms / 100) 2.435 -> 2.439 ( +0.16%) [ +0.25% +0.00% +0.21% / +0.16% +0.37% +0.41%] index_select spread : Elapsed 0.024 ms (2.441 ms / 100) 2.435 -> 2.437 ( +0.08%) [ +0.16% +0.00% +0.16% / +0.08% +0.41% +0.45%] index_select strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.439 -> 2.445 ( +0.25%) [ +0.16% +0.21% +0.00% / +0.25% +0.37% +0.45%] index_select strided 5 : Elapsed 0.024 ms (2.443 ms / 100) 2.440 -> 2.443 ( +0.12%) [ +0.16% +0.00% +0.08% / +0.12% +0.41% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.444 ms / 100) 2.402 -> 2.400 ( -0.08%) [ +0.12% +0.08% +0.00% / -0.08% +0.33% +0.33%] index_select strided 8 : Elapsed 0.024 ms (2.405 ms / 100) 2.440 -> 2.444 ( +0.16%) [ +0.00% +0.04% +0.12% / +0.16% +0.33% +0.49%] index_select random : Elapsed 0.024 ms (2.440 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.16% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.450 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.00% +0.04% +0.00% / -0.08% +0.16% +0.16%] index_select perm : Elapsed 0.024 ms (2.447 ms / 100) 2.440 -> 2.442 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.37% +0.20%] index_select perm_sorted : Elapsed 0.024 ms (2.443 ms / 100) B = [40, 4, 20, 5] (stride (20, 4000, 1, 800)) A = [40, 16, 20, 5] (stride (100, 4000, 1, 20)) dim = 1 2.382 -> 2.384 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.46% +0.42%] index_select const : Elapsed 0.024 ms (2.382 ms / 100) 2.419 -> 2.420 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.29% +0.54%] index_select wrap : Elapsed 0.024 ms (2.421 ms / 100) 2.420 -> 2.424 ( +0.17%) [ +0.12% +0.00% +0.12% / +0.17% +0.50% +0.54%] index_select linear : Elapsed 0.024 ms (2.423 ms / 100) 2.416 -> 2.416 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.54% +0.66%] index_select reverse : Elapsed 0.024 ms (2.416 ms / 100) 2.371 -> 2.375 ( +0.17%) [ +0.08% +0.17% +0.00% / +0.17% +0.63% +0.72%] index_select skip64 : Elapsed 0.024 ms (2.373 ms / 100) 2.375 -> 2.383 ( +0.34%) [ +0.13% +0.29% +0.00% / +0.34% +0.80% +0.76%] index_select skip256 : Elapsed 0.024 ms (2.378 ms / 100) 2.420 -> 2.422 ( +0.08%) [ +0.17% +0.12% +0.00% / +0.08% +0.50% +0.54%] index_select spread : Elapsed 0.024 ms (2.424 ms / 100) 2.414 -> 2.418 ( +0.17%) [ +0.21% +0.21% +0.00% / +0.17% +0.62% +0.66%] index_select strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.416 -> 2.421 ( +0.21%) [ +0.12% +0.17% +0.00% / +0.21% +0.50% +0.58%] index_select strided 5 : Elapsed 0.024 ms (2.419 ms / 100) 2.428 -> 2.431 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +0.49% +0.49%] index_select strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.397 -> 2.399 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.54% +0.63%] index_select strided 8 : Elapsed 0.024 ms (2.398 ms / 100) 2.421 -> 2.422 ( +0.04%) [ +0.17% +0.00% +0.00% / +0.04% +0.50% +0.45%] index_select random : Elapsed 0.024 ms (2.425 ms / 100) 2.416 -> 2.422 ( +0.25%) [ +0.00% +0.12% +0.04% / +0.25% +0.46% +0.41%] index_select random_sorted : Elapsed 0.024 ms (2.416 ms / 100) 2.422 -> 2.427 ( +0.21%) [ +0.04% +0.12% +0.00% / +0.21% +0.29% +0.37%] index_select perm : Elapsed 0.024 ms (2.423 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.08% +0.37% +0.37%] index_select perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) B = [40, 4, 20, 5] (stride (5, 200, 800, 1)) A = [40, 16, 20, 5] (stride (1600, 100, 1, 20)) dim = 1 2.384 -> 2.382 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.04% +0.08%] index_select const : Elapsed 0.024 ms (2.384 ms / 100) 2.424 -> 2.423 ( -0.04%) [ +0.17% +0.08% +0.00% / -0.04% +0.12% +0.21%] index_select wrap : Elapsed 0.024 ms (2.428 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.12% +0.21% +0.00% / +0.08% +0.21% +0.12%] index_select linear : Elapsed 0.024 ms (2.427 ms / 100) 2.423 -> 2.426 ( +0.12%) [ +0.21% +0.12% +0.00% / +0.17% +0.12% +0.37%] index_select reverse : Elapsed 0.024 ms (2.428 ms / 100) 2.382 -> 2.387 ( +0.21%) [ +0.00% +0.21% +0.17% / +0.21% +0.25% +0.42%] index_select skip64 : Elapsed 0.024 ms (2.382 ms / 100) 2.383 -> 2.383 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.21% +0.04%] index_select skip256 : Elapsed 0.024 ms (2.384 ms / 100) 2.419 -> 2.423 ( +0.17%) [ +0.17% +0.33% +0.00% / +0.17% +0.45% +0.54%] index_select spread : Elapsed 0.024 ms (2.423 ms / 100) 2.427 -> 2.429 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.21% +0.08% +0.16%] index_select strided 3 : Elapsed 0.024 ms (2.430 ms / 100) 2.429 -> 2.431 ( +0.08%) [ +0.00% +0.21% +0.08% / +0.08% +0.21% +0.33%] index_select strided 5 : Elapsed 0.024 ms (2.429 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.37% +0.21% +0.00% / +0.12% +0.33% +0.49%] index_select strided 7 : Elapsed 0.024 ms (2.435 ms / 100) 2.391 -> 2.395 ( +0.17%) [ +0.17% +0.13% +0.00% / +0.17% +0.46% +0.42%] index_select strided 8 : Elapsed 0.024 ms (2.395 ms / 100) 2.411 -> 2.409 ( -0.08%) [ +0.04% +0.00% +0.04% / -0.08% +0.08% +0.12%] index_select random : Elapsed 0.024 ms (2.412 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.21% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.430 -> 2.429 ( -0.04%) [ +0.29% +0.12% +0.00% / -0.04% +0.37% +0.33%] index_select perm : Elapsed 0.024 ms (2.437 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.54% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.427 ms / 100) B = [40, 4, 20, 5] (stride (1, 40, 160, 3200)) A = [40, 16, 20, 5] (stride (1, 200, 3200, 40)) dim = 1 2.552 -> 2.558 ( +0.24%) [ +0.12% +0.00% +0.12% / +0.27% +0.43% +0.24%] index_select const : Elapsed 0.026 ms (2.555 ms / 100) 2.554 -> 2.560 ( +0.23%) [ +0.00% +0.08% +0.08% / +0.23% +0.35% +0.27%] index_select wrap : Elapsed 0.026 ms (2.554 ms / 100) 2.557 -> 2.556 ( -0.04%) [ +0.08% +0.00% +0.00% / -0.04% +0.23% +0.23%] index_select linear : Elapsed 0.026 ms (2.559 ms / 100) 2.553 -> 2.552 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.47% +0.39%] index_select reverse : Elapsed 0.026 ms (2.553 ms / 100) 2.550 -> 2.556 ( +0.24%) [ +0.00% +0.12% +0.04% / +0.24% +0.51% +0.47%] index_select skip64 : Elapsed 0.026 ms (2.550 ms / 100) 2.552 -> 2.552 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.00% +0.43% +0.27%] index_select skip256 : Elapsed 0.026 ms (2.552 ms / 100) 2.547 -> 2.550 ( +0.12%) [ +0.00% +0.08% +0.27% / +0.12% +0.47% +0.67%] index_select spread : Elapsed 0.025 ms (2.547 ms / 100) 2.557 -> 2.561 ( +0.16%) [ +0.00% +0.04% +0.04% / +0.16% +0.55% +0.39%] index_select strided 3 : Elapsed 0.026 ms (2.557 ms / 100) 2.553 -> 2.561 ( +0.31%) [ +0.27% +0.16% +0.00% / +0.31% +0.55% +0.47%] index_select strided 5 : Elapsed 0.026 ms (2.560 ms / 100) 2.550 -> 2.553 ( +0.12%) [ +0.16% +0.00% +0.12% / +0.12% +0.27% +0.51%] index_select strided 7 : Elapsed 0.026 ms (2.554 ms / 100) 2.549 -> 2.552 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.12% +0.43% +0.39%] index_select strided 8 : Elapsed 0.026 ms (2.551 ms / 100) 2.557 -> 2.556 ( -0.04%) [ +0.04% +0.00% +0.12% / -0.04% +0.23% +0.39%] index_select random : Elapsed 0.026 ms (2.558 ms / 100) 2.553 -> 2.553 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.39% +0.27%] index_select random_sorted : Elapsed 0.026 ms (2.555 ms / 100) 2.558 -> 2.558 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.16% +0.23%] index_select perm : Elapsed 0.026 ms (2.562 ms / 100) 2.551 -> 2.553 ( +0.08%) [ +0.16% +0.00% +0.16% / +0.08% +0.27% +0.24%] index_select perm_sorted : Elapsed 0.026 ms (2.555 ms / 100) out_shape = [40, 16, 4, 5] in_shape = [40, 16, 20, 5] idx_dim = 2 B = [40, 16, 4, 5] (stride (16, 1, 3200, 640)) A = [40, 16, 20, 5] (stride (80, 5, 3200, 1)) dim = 2 0.737 -> 0.741 ( +0.54%) [ +0.27% +0.14% +0.00% / +0.54% +2.44% +2.04%] index_select const : Elapsed 0.007 ms (0.739 ms / 100) 0.741 -> 0.742 ( +0.13%) [ +0.13% +0.94% +0.00% / +0.13% +4.32% +5.13%] index_select wrap : Elapsed 0.007 ms (0.742 ms / 100) 0.741 -> 0.742 ( +0.13%) [ +0.27% +0.40% +0.00% / +0.13% +4.05% +3.64%] index_select linear : Elapsed 0.007 ms (0.743 ms / 100) 0.748 -> 0.748 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.53% +0.80%] index_select reverse : Elapsed 0.007 ms (0.748 ms / 100) 0.747 -> 0.741 ( -0.80%) [ +0.40% +0.27% +0.00% / +0.54% -0.54% -0.80%] index_select skip64 : Elapsed 0.007 ms (0.750 ms / 100) 0.738 -> 0.741 ( +0.41%) [ +0.54% +0.00% +0.00% / +0.41% +2.17% +2.03%] index_select skip256 : Elapsed 0.007 ms (0.742 ms / 100) 0.736 -> 0.738 ( +0.27%) [ +0.41% +0.82% +0.00% / +0.27% +4.08% +3.67%] index_select spread : Elapsed 0.007 ms (0.739 ms / 100) 0.749 -> 0.751 ( +0.27%) [ +0.80% +0.53% +0.00% / +0.27% +2.27% +2.40%] index_select strided 3 : Elapsed 0.008 ms (0.755 ms / 100) 0.739 -> 0.746 ( +0.95%) [ +0.68% +0.68% +0.00% / +0.95% +1.89% +1.62%] index_select strided 5 : Elapsed 0.007 ms (0.744 ms / 100) 0.750 -> 0.746 ( -0.53%) [ +0.27% +0.27% +0.00% / +0.00% -0.53% -0.53%] index_select strided 7 : Elapsed 0.008 ms (0.752 ms / 100) 0.748 -> 0.747 ( -0.13%) [ +0.80% +0.00% +0.67% / +0.00% -0.13% -0.13%] index_select strided 8 : Elapsed 0.008 ms (0.754 ms / 100) 0.744 -> 0.740 ( -0.54%) [ +0.13% +0.27% +0.00% / -0.54% +0.27% +0.54%] index_select strided 16 : Elapsed 0.007 ms (0.745 ms / 100) 0.745 -> 0.745 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.13% +0.00% +0.13%] index_select random : Elapsed 0.007 ms (0.745 ms / 100) 0.741 -> 0.741 ( +0.00%) [ +0.00% +0.40% +0.00% / +0.00% +0.81% +0.67%] index_select random_sorted : Elapsed 0.007 ms (0.741 ms / 100) 0.740 -> 0.744 ( +0.54%) [ +0.27% +0.27% +0.00% / +0.54% +1.22% +1.22%] index_select perm : Elapsed 0.007 ms (0.742 ms / 100) 0.741 -> 0.743 ( +0.27%) [ +0.13% +0.00% +0.67% / +0.27% +1.08% +1.08%] index_select perm_sorted : Elapsed 0.007 ms (0.742 ms / 100) B = [40, 16, 4, 5] (stride (4, 160, 1, 2560)) A = [40, 16, 20, 5] (stride (1, 200, 3200, 40)) dim = 2 2.195 -> 2.197 ( +0.09%) [ +0.00% +0.00% +0.05% / +0.09% +0.50% +0.46%] index_select const : Elapsed 0.022 ms (2.195 ms / 100) 2.196 -> 2.202 ( +0.27%) [ +0.00% +0.14% +0.05% / +0.27% +0.50% +0.73%] index_select wrap : Elapsed 0.022 ms (2.196 ms / 100) 2.201 -> 2.203 ( +0.09%) [ +0.00% +0.00% +0.05% / +0.09% +0.55% +0.45%] index_select linear : Elapsed 0.022 ms (2.201 ms / 100) 2.202 -> 2.208 ( +0.27%) [ +0.23% +0.14% +0.00% / +0.27% +0.27% +0.41%] index_select reverse : Elapsed 0.022 ms (2.207 ms / 100) 2.194 -> 2.197 ( +0.14%) [ +0.09% +0.23% +0.00% / +0.14% +0.50% +0.64%] index_select skip64 : Elapsed 0.022 ms (2.196 ms / 100) 2.195 -> 2.197 ( +0.09%) [ +0.00% +0.18% +0.00% / +0.09% +0.55% +0.64%] index_select skip256 : Elapsed 0.022 ms (2.195 ms / 100) 2.201 -> 2.205 ( +0.18%) [ +0.00% +0.05% +0.00% / +0.18% +0.32% +0.41%] index_select spread : Elapsed 0.022 ms (2.201 ms / 100) 2.195 -> 2.196 ( +0.05%) [ +0.32% +0.14% +0.00% / +0.05% +0.41% +0.46%] index_select strided 3 : Elapsed 0.022 ms (2.202 ms / 100) 2.203 -> 2.206 ( +0.14%) [ +0.09% +0.14% +0.00% / +0.14% +0.14% +0.23%] index_select strided 5 : Elapsed 0.022 ms (2.205 ms / 100) 2.196 -> 2.198 ( +0.09%) [ +0.23% +0.23% +0.00% / +0.09% +0.64% +0.59%] index_select strided 7 : Elapsed 0.022 ms (2.201 ms / 100) 2.199 -> 2.206 ( +0.32%) [ +0.23% +0.36% +0.00% / +0.32% +0.50% +0.41%] index_select strided 8 : Elapsed 0.022 ms (2.204 ms / 100) 2.201 -> 2.205 ( +0.18%) [ +0.00% +0.14% +0.00% / +0.18% +0.50% +0.32%] index_select strided 16 : Elapsed 0.022 ms (2.201 ms / 100) 2.205 -> 2.204 ( -0.05%) [ +0.32% +0.00% +0.05% / +0.14% -0.05% +0.00%] index_select random : Elapsed 0.022 ms (2.212 ms / 100) 2.205 -> 2.207 ( +0.09%) [ +0.00% +0.14% +0.14% / +0.18% +0.09% +0.32%] index_select random_sorted : Elapsed 0.022 ms (2.205 ms / 100) 2.200 -> 2.201 ( +0.05%) [ +0.05% +0.09% +0.00% / +0.05% +0.27% +0.27%] index_select perm : Elapsed 0.022 ms (2.201 ms / 100) 2.202 -> 2.204 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.32% +0.23%] index_select perm_sorted : Elapsed 0.022 ms (2.202 ms / 100) out_shape = [40, 16, 20, 4] in_shape = [40, 16, 20, 5] idx_dim = 3 B = [40, 16, 20, 4] (stride (1280, 80, 4, 1)) A = [40, 16, 20, 5] (stride (100, 4000, 1, 20)) dim = 3 5.266 -> 5.258 ( -0.15%) [ +0.09% +0.00% +0.08% / +0.13% -0.08% -0.15%] index_select const : Elapsed 0.053 ms (5.271 ms / 100) 5.357 -> 5.351 ( -0.11%) [ +0.09% +0.00% +0.06% / +0.06% +0.04% -0.11%] index_select wrap : Elapsed 0.054 ms (5.362 ms / 100) 5.363 -> 5.358 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.09% +0.06%] index_select linear : Elapsed 0.054 ms (5.363 ms / 100) 5.351 -> 5.353 ( +0.04%) [ +0.11% +0.00% +0.06% / +0.04% +0.17% +0.21%] index_select reverse : Elapsed 0.054 ms (5.357 ms / 100) 5.261 -> 5.258 ( -0.06%) [ +0.00% +0.00% +0.04% / +0.02% -0.06% -0.06%] index_select skip64 : Elapsed 0.053 ms (5.261 ms / 100) 5.268 -> 5.266 ( -0.04%) [ +0.00% +0.00% +0.00% / +0.17% -0.04% +0.09%] index_select skip256 : Elapsed 0.053 ms (5.268 ms / 100) 5.348 -> 5.356 ( +0.15%) [ +0.00% +0.07% +0.21% / +0.22% +0.32% +0.15%] index_select spread : Elapsed 0.053 ms (5.348 ms / 100) 5.353 -> 5.354 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.22% +0.11%] index_select strided 3 : Elapsed 0.054 ms (5.353 ms / 100) 5.350 -> 5.356 ( +0.11%) [ +0.00% +0.00% +0.13% / +0.11% +0.22% +0.17%] index_select random : Elapsed 0.053 ms (5.350 ms / 100) 5.356 -> 5.363 ( +0.13%) [ +0.00% +0.11% +0.06% / +0.13% +0.19% +0.24%] index_select random_sorted : Elapsed 0.054 ms (5.356 ms / 100) 5.358 -> 5.361 ( +0.06%) [ +0.09% +0.00% +0.11% / +0.19% +0.06% +0.19%] index_select perm : Elapsed 0.054 ms (5.363 ms / 100) 5.352 -> 5.351 ( -0.02%) [ +0.00% +0.07% +0.02% / +0.19% +0.07% -0.02%] index_select perm_sorted : Elapsed 0.054 ms (5.352 ms / 100) B = [40, 16, 20, 4] (stride (1280, 20, 1, 320)) A = [40, 16, 20, 5] (stride (5, 4000, 200, 1)) dim = 3 3.427 -> 3.409 ( -0.53%) [ +0.03% +0.06% +0.00% / -0.09% -0.09% -0.53%] index_select const : Elapsed 0.034 ms (3.428 ms / 100) 3.425 -> 3.420 ( -0.15%) [ +0.00% +0.12% +0.12% / -0.15% +0.20% +0.03%] index_select wrap : Elapsed 0.034 ms (3.425 ms / 100) 3.424 -> 3.426 ( +0.06%) [ +0.03% +0.12% +0.00% / +0.09% +0.06% +0.12%] index_select linear : Elapsed 0.034 ms (3.425 ms / 100) 3.430 -> 3.418 ( -0.35%) [ +0.00% +0.06% +0.00% / +0.15% -0.35% -0.12%] index_select reverse : Elapsed 0.034 ms (3.430 ms / 100) 3.423 -> 3.413 ( -0.29%) [ +0.00% +0.06% +0.06% / -0.03% -0.18% -0.29%] index_select skip64 : Elapsed 0.034 ms (3.423 ms / 100) 3.423 -> 3.409 ( -0.41%) [ +0.06% +0.00% +0.06% / +0.09% -0.41% -0.12%] index_select skip256 : Elapsed 0.034 ms (3.425 ms / 100) 3.430 -> 3.414 ( -0.47%) [ +0.09% +0.06% +0.00% / +0.00% -0.41% -0.47%] index_select spread : Elapsed 0.034 ms (3.433 ms / 100) 3.416 -> 3.409 ( -0.20%) [ +0.00% +0.41% +0.32% / +0.41% -0.20% +0.06%] index_select strided 3 : Elapsed 0.034 ms (3.416 ms / 100) 3.433 -> 3.415 ( -0.52%) [ +0.06% +0.00% +0.15% / +0.09% -0.52% -0.38%] index_select random : Elapsed 0.034 ms (3.435 ms / 100) 3.420 -> 3.424 ( +0.12%) [ +0.00% +0.18% +0.06% / +0.20% +0.12% +0.15%] index_select random_sorted : Elapsed 0.034 ms (3.420 ms / 100) 3.425 -> 3.418 ( -0.20%) [ +0.00% +0.06% +0.06% / +0.23% -0.18% -0.20%] index_select perm : Elapsed 0.034 ms (3.425 ms / 100) 3.429 -> 3.427 ( -0.06%) [ +0.06% +0.09% +0.00% / +0.06% +0.03% -0.06%] index_select perm_sorted : Elapsed 0.034 ms (3.431 ms / 100) B = [40, 16, 20, 4] (stride (1280, 1, 16, 320)) A = [40, 16, 20, 5] (stride (5, 4000, 200, 1)) dim = 3 5.728 -> 5.699 ( -0.51%) [ +0.00% +0.03% +0.14% / +0.05% -0.42% -0.51%] index_select const : Elapsed 0.057 ms (5.728 ms / 100) 5.721 -> 5.701 ( -0.35%) [ +0.00% +0.10% +0.16% / +0.24% -0.35% -0.35%] index_select wrap : Elapsed 0.057 ms (5.721 ms / 100) 5.727 -> 5.698 ( -0.51%) [ +0.00% +0.09% +0.03% / +0.07% -0.33% -0.51%] index_select linear : Elapsed 0.057 ms (5.727 ms / 100) 5.723 -> 5.693 ( -0.52%) [ +0.03% +0.02% +0.00% / +0.09% -0.52% -0.49%] index_select reverse : Elapsed 0.057 ms (5.725 ms / 100) 5.727 -> 5.700 ( -0.47%) [ +0.00% +0.03% +0.09% / +0.17% -0.47% -0.44%] index_select skip64 : Elapsed 0.057 ms (5.727 ms / 100) 5.727 -> 5.695 ( -0.56%) [ +0.14% +0.00% +0.05% / +0.16% -0.56% -0.42%] index_select skip256 : Elapsed 0.057 ms (5.735 ms / 100) 5.721 -> 5.703 ( -0.31%) [ +0.00% +0.03% +0.16% / +0.14% -0.28% -0.31%] index_select spread : Elapsed 0.057 ms (5.721 ms / 100) 5.728 -> 5.697 ( -0.54%) [ +0.00% +0.03% +0.07% / +0.02% -0.54% -0.42%] index_select strided 3 : Elapsed 0.057 ms (5.728 ms / 100) 5.717 -> 5.690 ( -0.47%) [ +0.00% +0.12% +0.16% / +0.16% -0.42% -0.47%] index_select random : Elapsed 0.057 ms (5.717 ms / 100) 5.729 -> 5.701 ( -0.49%) [ +0.00% +0.00% +0.10% / +0.05% -0.40% -0.49%] index_select random_sorted : Elapsed 0.057 ms (5.729 ms / 100) 5.717 -> 5.690 ( -0.47%) [ +0.10% +0.00% +0.26% / +0.00% -0.47% +0.05%] index_select perm : Elapsed 0.057 ms (5.723 ms / 100) 5.719 -> 5.700 ( -0.33%) [ +0.00% +0.17% +0.24% / +0.19% -0.33% -0.23%] index_select perm_sorted : Elapsed 0.057 ms (5.719 ms / 100) B = [40, 16, 20, 4] (stride (1, 160, 2560, 40)) A = [40, 16, 20, 5] (stride (1600, 100, 1, 20)) dim = 3 5.528 -> 5.529 ( +0.02%) [ +0.02% +0.00% +0.07% / +0.02% +0.25% +0.27%] index_select const : Elapsed 0.055 ms (5.529 ms / 100) 5.612 -> 5.618 ( +0.11%) [ +0.00% +0.04% +0.07% / +0.11% +0.36% +0.37%] index_select wrap : Elapsed 0.056 ms (5.612 ms / 100) 5.614 -> 5.620 ( +0.11%) [ +0.11% +0.00% +0.14% / +0.11% +0.25% +0.45%] index_select linear : Elapsed 0.056 ms (5.620 ms / 100) 5.620 -> 5.623 ( +0.05%) [ +0.00% +0.07% +0.00% / +0.05% +0.41% +0.32%] index_select reverse : Elapsed 0.056 ms (5.620 ms / 100) 5.520 -> 5.530 ( +0.18%) [ +0.09% +0.00% +0.13% / +0.18% +0.31% +0.33%] index_select skip64 : Elapsed 0.055 ms (5.525 ms / 100) 5.525 -> 5.530 ( +0.09%) [ +0.00% +0.05% +0.24% / +0.09% +0.24% +0.36%] index_select skip256 : Elapsed 0.055 ms (5.525 ms / 100) 5.612 -> 5.614 ( +0.04%) [ +0.00% +0.02% +0.05% / +0.04% +0.27% +0.45%] index_select spread : Elapsed 0.056 ms (5.612 ms / 100) 5.614 -> 5.623 ( +0.16%) [ +0.00% +0.04% +0.09% / +0.16% +0.34% +0.45%] index_select strided 3 : Elapsed 0.056 ms (5.614 ms / 100) 5.563 -> 5.567 ( +0.07%) [ +0.00% +0.11% +0.22% / +0.07% +0.27% +0.27%] index_select random : Elapsed 0.056 ms (5.563 ms / 100) 5.553 -> 5.563 ( +0.18%) [ +0.00% +0.05% +0.09% / +0.18% +0.36% +0.54%] index_select random_sorted : Elapsed 0.056 ms (5.553 ms / 100) 5.619 -> 5.619 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.16% +0.25%] index_select perm : Elapsed 0.056 ms (5.619 ms / 100) 5.620 -> 5.620 ( +0.00%) [ +0.09% +0.00% +0.05% / +0.00% +0.16% +0.21%] index_select perm_sorted : Elapsed 0.056 ms (5.625 ms / 100) B = [40, 16, 20, 4] (stride (1, 40, 640, 12800)) A = [40, 16, 20, 5] (stride (80, 1, 3200, 16)) dim = 3 5.793 -> 5.775 ( -0.31%) [ +0.09% +0.00% +0.10% / +0.24% -0.31% -0.12%] index_select const : Elapsed 0.058 ms (5.798 ms / 100) 5.863 -> 5.848 ( -0.26%) [ +0.02% +0.07% +0.00% / +0.15% -0.17% -0.26%] index_select wrap : Elapsed 0.059 ms (5.864 ms / 100) 5.864 -> 5.850 ( -0.24%) [ +0.03% +0.00% +0.20% / +0.17% -0.24% -0.24%] index_select linear : Elapsed 0.059 ms (5.866 ms / 100) 5.868 -> 5.850 ( -0.31%) [ +0.09% +0.00% +0.03% / +0.00% -0.31% -0.26%] index_select reverse : Elapsed 0.059 ms (5.873 ms / 100) 5.794 -> 5.776 ( -0.31%) [ +0.14% +0.14% +0.00% / +0.10% -0.31% -0.29%] index_select skip64 : Elapsed 0.058 ms (5.802 ms / 100) 5.794 -> 5.768 ( -0.45%) [ +0.05% +0.00% +0.00% / +0.05% -0.45% -0.26%] index_select skip256 : Elapsed 0.058 ms (5.797 ms / 100) 5.868 -> 5.847 ( -0.36%) [ +0.05% +0.00% +0.07% / +0.03% -0.34% -0.36%] index_select spread : Elapsed 0.059 ms (5.871 ms / 100) 5.886 -> 5.874 ( -0.20%) [ +0.05% +0.00% +0.10% / +0.12% -0.20% -0.19%] index_select strided 3 : Elapsed 0.059 ms (5.889 ms / 100) 5.848 -> 5.826 ( -0.38%) [ +0.00% +0.05% +0.05% / +0.15% -0.38% -0.34%] index_select random : Elapsed 0.058 ms (5.848 ms / 100) 5.841 -> 5.827 ( -0.24%) [ +0.00% +0.09% +0.15% / +0.12% -0.19% -0.24%] index_select random_sorted : Elapsed 0.058 ms (5.841 ms / 100) 5.877 -> 5.858 ( -0.32%) [ +0.00% +0.20% +0.19% / +0.17% -0.32% -0.27%] index_select perm : Elapsed 0.059 ms (5.877 ms / 100) 5.874 -> 5.858 ( -0.27%) [ +0.19% +0.00% +0.14% / +0.02% -0.27% -0.22%] index_select perm_sorted : Elapsed 0.059 ms (5.885 ms / 100) out_shape = [4, 20, 5, 16] in_shape = [40, 20, 5, 16] idx_dim = 0 B = [4, 20, 5, 16] (stride (1, 320, 4, 20)) A = [40, 20, 5, 16] (stride (16, 3200, 640, 1)) dim = 0 1.191 -> 1.192 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.76% +0.59%] index_select const : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.67% +0.76%] index_select wrap : Elapsed 0.012 ms (1.192 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_select linear : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.00% +0.00% +0.42% / +0.00% +0.76% +0.76%] index_select reverse : Elapsed 0.012 ms (1.191 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.76% +0.76%] index_select skip64 : Elapsed 0.012 ms (1.191 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.67% +0.59%] index_select skip256 : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.59%] index_select spread : Elapsed 0.012 ms (1.191 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.67%] index_select strided 3 : Elapsed 0.012 ms (1.192 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.67% +0.67%] index_select strided 5 : Elapsed 0.012 ms (1.191 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_select strided 7 : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.194 ( +0.25%) [ +0.17% +0.17% +0.00% / +0.25% +0.84% +0.59%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.34% +0.34%] index_select strided 16 : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.42% +0.42%] index_select random : Elapsed 0.012 ms (1.193 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.59% +0.50%] index_select random_sorted : Elapsed 0.012 ms (1.193 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_select perm : Elapsed 0.012 ms (1.194 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.25% +0.08% +0.00% / +0.08% +0.50% +0.34%] index_select perm_sorted : Elapsed 0.012 ms (1.195 ms / 100) B = [4, 20, 5, 16] (stride (20, 1, 80, 400)) A = [40, 20, 5, 16] (stride (1600, 5, 1, 100)) dim = 0 1.237 -> 1.238 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.57% +0.73%] index_select const : Elapsed 0.012 ms (1.238 ms / 100) 1.205 -> 1.206 ( +0.08%) [ +0.25% +0.00% +0.00% / +0.08% +0.50% +0.58%] index_select wrap : Elapsed 0.012 ms (1.208 ms / 100) 1.213 -> 1.214 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.74% +0.49%] index_select linear : Elapsed 0.012 ms (1.215 ms / 100) 1.202 -> 1.202 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.42% +0.50%] index_select reverse : Elapsed 0.012 ms (1.202 ms / 100) 1.236 -> 1.238 ( +0.16%) [ +0.24% +0.16% +0.00% / +0.16% +0.73% +0.81%] index_select skip64 : Elapsed 0.012 ms (1.239 ms / 100) 1.237 -> 1.237 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.65% +0.65%] index_select skip256 : Elapsed 0.012 ms (1.237 ms / 100) 1.228 -> 1.229 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.65% +0.57%] index_select spread : Elapsed 0.012 ms (1.231 ms / 100) 1.237 -> 1.239 ( +0.16%) [ +0.16% +0.24% +0.00% / +0.16% +0.57% +0.65%] index_select strided 3 : Elapsed 0.012 ms (1.239 ms / 100) 1.217 -> 1.217 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.41% +0.33%] index_select strided 5 : Elapsed 0.012 ms (1.218 ms / 100) 1.239 -> 1.239 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.73% +0.40%] index_select strided 7 : Elapsed 0.012 ms (1.240 ms / 100) 1.225 -> 1.226 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.49% +0.41%] index_select strided 8 : Elapsed 0.012 ms (1.226 ms / 100) 1.236 -> 1.236 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.40% +0.57%] index_select strided 16 : Elapsed 0.012 ms (1.236 ms / 100) 1.202 -> 1.203 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.33% +0.42%] index_select random : Elapsed 0.012 ms (1.203 ms / 100) 1.212 -> 1.212 ( +0.00%) [ +0.25% +0.00% +0.00% / +0.00% +0.66% +0.33%] index_select random_sorted : Elapsed 0.012 ms (1.215 ms / 100) 1.218 -> 1.219 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.49% +0.49%] index_select perm : Elapsed 0.012 ms (1.219 ms / 100) 1.211 -> 1.213 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.33% +0.41%] index_select perm_sorted : Elapsed 0.012 ms (1.211 ms / 100) out_shape = [40, 4, 5, 16] in_shape = [40, 20, 5, 16] idx_dim = 1 B = [40, 4, 5, 16] (stride (16, 3200, 640, 1)) A = [40, 20, 5, 16] (stride (80, 3200, 1, 5)) dim = 1 2.005 -> 2.006 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.85% +0.75%] index_select const : Elapsed 0.020 ms (2.007 ms / 100) 2.003 -> 2.007 ( +0.20%) [ +0.05% +0.15% +0.00% / +0.20% +0.55% +0.55%] index_select wrap : Elapsed 0.020 ms (2.004 ms / 100) 2.002 -> 2.001 ( -0.05%) [ +0.00% +0.20% +0.00% / -0.05% +0.60% +0.55%] index_select linear : Elapsed 0.020 ms (2.002 ms / 100) 2.017 -> 2.014 ( -0.15%) [ +0.25% +0.00% +0.10% / -0.15% +0.64% +0.50%] index_select reverse : Elapsed 0.020 ms (2.022 ms / 100) 1.997 -> 2.000 ( +0.15%) [ +0.25% +0.05% +0.00% / +0.15% +0.75% +0.65%] index_select skip64 : Elapsed 0.020 ms (2.002 ms / 100) 2.005 -> 2.004 ( -0.05%) [ +0.20% +0.00% +0.10% / -0.05% +0.80% +0.70%] index_select skip256 : Elapsed 0.020 ms (2.009 ms / 100) 2.009 -> 2.010 ( +0.05%) [ +0.00% +0.40% +0.05% / +0.05% +0.25% +0.25%] index_select spread : Elapsed 0.020 ms (2.009 ms / 100) 2.002 -> 2.003 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.90% +1.05%] index_select strided 3 : Elapsed 0.020 ms (2.003 ms / 100) 2.003 -> 2.006 ( +0.15%) [ +0.20% +0.00% +0.05% / +0.15% +0.30% +0.35%] index_select strided 5 : Elapsed 0.020 ms (2.007 ms / 100) 2.011 -> 2.011 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.50% +0.70%] index_select strided 7 : Elapsed 0.020 ms (2.012 ms / 100) 2.010 -> 2.012 ( +0.10%) [ +0.25% +0.15% +0.00% / +0.10% +1.00% +1.00%] index_select strided 8 : Elapsed 0.020 ms (2.015 ms / 100) 1.997 -> 2.001 ( +0.20%) [ +0.00% +0.00% +0.05% / +0.20% +1.40% +1.25%] index_select strided 16 : Elapsed 0.020 ms (1.997 ms / 100) 2.002 -> 2.004 ( +0.10%) [ +0.00% +0.25% +0.00% / +0.10% +0.65% +0.65%] index_select random : Elapsed 0.020 ms (2.002 ms / 100) 2.008 -> 2.011 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.15% +0.20%] index_select random_sorted : Elapsed 0.020 ms (2.011 ms / 100) 2.002 -> 2.007 ( +0.25%) [ +0.25% +0.05% +0.00% / +0.25% +0.80% +0.90%] index_select perm : Elapsed 0.020 ms (2.007 ms / 100) 2.022 -> 2.025 ( +0.15%) [ +0.10% +0.35% +0.00% / +0.15% +0.54% +0.54%] index_select perm_sorted : Elapsed 0.020 ms (2.024 ms / 100) B = [40, 4, 5, 16] (stride (1, 3200, 40, 200)) A = [40, 20, 5, 16] (stride (16, 640, 12800, 1)) dim = 1 2.023 -> 2.024 ( +0.05%) [ +0.20% +0.00% +0.05% / +0.05% +0.35% +0.54%] index_select const : Elapsed 0.020 ms (2.027 ms / 100) 2.053 -> 2.055 ( +0.10%) [ +0.19% +0.15% +0.00% / +0.10% +1.17% +1.22%] index_select wrap : Elapsed 0.021 ms (2.057 ms / 100) 2.056 -> 2.058 ( +0.10%) [ +0.00% +0.15% +0.00% / +0.10% +1.02% +1.12%] index_select linear : Elapsed 0.021 ms (2.056 ms / 100) 2.054 -> 2.057 ( +0.15%) [ +0.29% +0.24% +0.00% / +0.15% +0.88% +1.27%] index_select reverse : Elapsed 0.021 ms (2.060 ms / 100) 2.023 -> 2.028 ( +0.25%) [ +0.20% +0.15% +0.00% / +0.25% +0.30% +0.40%] index_select skip64 : Elapsed 0.020 ms (2.027 ms / 100) 2.012 -> 2.013 ( +0.05%) [ +0.30% +0.00% +0.20% / +0.05% +0.60% +0.40%] index_select skip256 : Elapsed 0.020 ms (2.018 ms / 100) 2.063 -> 2.064 ( +0.05%) [ +0.00% +0.15% +0.24% / +0.05% +0.29% +0.29%] index_select spread : Elapsed 0.021 ms (2.063 ms / 100) 2.070 -> 2.067 ( -0.14%) [ +0.00% +0.10% +0.05% / -0.14% +0.48% +0.53%] index_select strided 3 : Elapsed 0.021 ms (2.070 ms / 100) 2.064 -> 2.059 ( -0.24%) [ +0.19% +0.00% +0.19% / +0.34% -0.24% +0.15%] index_select strided 5 : Elapsed 0.021 ms (2.068 ms / 100) 2.053 -> 2.055 ( +0.10%) [ +0.15% +0.39% +0.00% / +0.10% +0.68% +0.63%] index_select strided 7 : Elapsed 0.021 ms (2.056 ms / 100) 2.072 -> 2.069 ( -0.14%) [ +0.05% +0.05% +0.00% / -0.14% +0.29% +0.43%] index_select strided 8 : Elapsed 0.021 ms (2.073 ms / 100) 2.064 -> 2.058 ( -0.29%) [ +0.05% +0.05% +0.00% / -0.15% -0.24% -0.29%] index_select strided 16 : Elapsed 0.021 ms (2.065 ms / 100) 2.054 -> 2.055 ( +0.05%) [ +0.19% +0.19% +0.00% / +0.05% +0.97% +0.78%] index_select random : Elapsed 0.021 ms (2.058 ms / 100) 2.059 -> 2.062 ( +0.15%) [ +0.29% +0.00% +0.19% / +0.15% +0.78% +1.02%] index_select random_sorted : Elapsed 0.021 ms (2.065 ms / 100) 2.068 -> 2.064 ( -0.19%) [ +0.10% +0.05% +0.00% / +0.19% -0.19% -0.10%] index_select perm : Elapsed 0.021 ms (2.070 ms / 100) 2.069 -> 2.062 ( -0.34%) [ +0.05% +0.00% +0.05% / +0.19% -0.34% -0.34%] index_select perm_sorted : Elapsed 0.021 ms (2.070 ms / 100) B = [40, 4, 5, 16] (stride (16, 640, 2560, 1)) A = [40, 20, 5, 16] (stride (1600, 1, 320, 20)) dim = 1 2.041 -> 2.038 ( -0.15%) [ +0.00% +0.15% +0.10% / +0.29% -0.15% -0.15%] index_select const : Elapsed 0.020 ms (2.041 ms / 100) 2.022 -> 2.024 ( +0.10%) [ +0.00% +0.00% +0.25% / +0.10% +0.49% +0.59%] index_select wrap : Elapsed 0.020 ms (2.022 ms / 100) 2.027 -> 2.030 ( +0.15%) [ +0.20% +0.00% +0.10% / +0.35% +0.15% +0.25%] index_select linear : Elapsed 0.020 ms (2.031 ms / 100) 2.040 -> 2.038 ( -0.10%) [ +0.00% +0.25% +0.10% / -0.10% +0.00% +0.05%] index_select reverse : Elapsed 0.020 ms (2.040 ms / 100) 2.044 -> 2.047 ( +0.15%) [ +0.00% +0.29% +0.10% / +0.15% +0.59% +0.54%] index_select skip64 : Elapsed 0.020 ms (2.044 ms / 100) 2.044 -> 2.046 ( +0.10%) [ +0.15% +0.00% +0.10% / +0.10% +0.73% +0.44%] index_select skip256 : Elapsed 0.020 ms (2.047 ms / 100) 2.092 -> 2.093 ( +0.05%) [ +0.14% +0.19% +0.00% / +0.14% +0.10% +0.05%] index_select spread : Elapsed 0.021 ms (2.095 ms / 100) 2.062 -> 2.061 ( -0.05%) [ +0.10% +0.19% +0.00% / +0.15% -0.05% +0.05%] index_select strided 3 : Elapsed 0.021 ms (2.064 ms / 100) 2.080 -> 2.079 ( -0.05%) [ +0.14% +0.00% +0.00% / -0.05% +0.38% +0.19%] index_select strided 5 : Elapsed 0.021 ms (2.083 ms / 100) 2.079 -> 2.079 ( +0.00%) [ +0.10% +0.38% +0.00% / +0.00% +0.00% +0.14%] index_select strided 7 : Elapsed 0.021 ms (2.081 ms / 100) 2.084 -> 2.086 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.14% +0.29% +0.10%] index_select strided 8 : Elapsed 0.021 ms (2.086 ms / 100) 2.079 -> 2.081 ( +0.10%) [ +0.24% +0.10% +0.00% / +0.10% +0.29% +0.34%] index_select strided 16 : Elapsed 0.021 ms (2.084 ms / 100) 2.049 -> 2.050 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.10% +0.05% +0.15%] index_select random : Elapsed 0.021 ms (2.051 ms / 100) 2.040 -> 2.039 ( -0.05%) [ +0.15% +0.00% +0.25% / -0.05% +0.44% +0.39%] index_select random_sorted : Elapsed 0.020 ms (2.043 ms / 100) 2.045 -> 2.049 ( +0.20%) [ +0.20% +0.24% +0.00% / +0.20% +0.39% +0.44%] index_select perm : Elapsed 0.020 ms (2.049 ms / 100) 2.072 -> 2.078 ( +0.29%) [ +0.00% +0.39% +0.29% / +0.39% +0.29% +0.58%] index_select perm_sorted : Elapsed 0.021 ms (2.072 ms / 100) B = [40, 4, 5, 16] (stride (4, 1, 2560, 160)) A = [40, 20, 5, 16] (stride (1, 40, 800, 4000)) dim = 1 2.073 -> 2.073 ( +0.00%) [ +0.19% +0.34% +0.00% / +0.00% +0.34% +0.58%] index_select const : Elapsed 0.021 ms (2.077 ms / 100) 2.085 -> 2.083 ( -0.10%) [ +0.14% +0.10% +0.00% / -0.10% +0.24% +0.19%] index_select wrap : Elapsed 0.021 ms (2.088 ms / 100) 2.078 -> 2.084 ( +0.29%) [ +0.00% +0.38% +0.29% / +0.29% +0.29% +0.48%] index_select linear : Elapsed 0.021 ms (2.078 ms / 100) 2.080 -> 2.081 ( +0.05%) [ +0.19% +0.05% +0.00% / +0.05% +0.48% +0.53%] index_select reverse : Elapsed 0.021 ms (2.084 ms / 100) 2.074 -> 2.075 ( +0.05%) [ +0.00% +0.14% +0.10% / +0.05% +0.43% +0.48%] index_select skip64 : Elapsed 0.021 ms (2.074 ms / 100) 2.077 -> 2.076 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.14% +0.29%] index_select skip256 : Elapsed 0.021 ms (2.078 ms / 100) 2.083 -> 2.085 ( +0.10%) [ +0.00% +0.05% +0.14% / +0.10% +0.34% +0.38%] index_select spread : Elapsed 0.021 ms (2.083 ms / 100) 2.081 -> 2.084 ( +0.14%) [ +0.24% +0.19% +0.00% / +0.14% +0.38% +0.24%] index_select strided 3 : Elapsed 0.021 ms (2.086 ms / 100) 2.079 -> 2.081 ( +0.10%) [ +0.14% +0.14% +0.00% / +0.10% +0.34% +0.19%] index_select strided 5 : Elapsed 0.021 ms (2.082 ms / 100) 2.083 -> 2.085 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +0.43% +0.24%] index_select strided 7 : Elapsed 0.021 ms (2.085 ms / 100) 2.085 -> 2.086 ( +0.05%) [ +0.19% +0.24% +0.00% / +0.05% +0.29% +0.29%] index_select strided 8 : Elapsed 0.021 ms (2.089 ms / 100) 2.088 -> 2.087 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.19% +0.19%] index_select strided 16 : Elapsed 0.021 ms (2.089 ms / 100) 2.080 -> 2.084 ( +0.19%) [ +0.24% +0.43% +0.00% / +0.19% +0.29% +0.29%] index_select random : Elapsed 0.021 ms (2.085 ms / 100) 2.079 -> 2.082 ( +0.14%) [ +0.29% +0.00% +0.19% / +0.14% +0.58% +0.58%] index_select random_sorted : Elapsed 0.021 ms (2.085 ms / 100) 2.087 -> 2.085 ( -0.10%) [ +0.14% +0.00% +0.00% / -0.10% +0.24% +0.24%] index_select perm : Elapsed 0.021 ms (2.090 ms / 100) 2.082 -> 2.081 ( -0.05%) [ +0.29% +0.00% +0.10% / -0.05% +0.58% +0.38%] index_select perm_sorted : Elapsed 0.021 ms (2.088 ms / 100) B = [40, 4, 5, 16] (stride (1, 40, 2560, 160)) A = [40, 20, 5, 16] (stride (100, 1, 20, 4000)) dim = 1 2.088 -> 2.086 ( -0.10%) [ +0.24% +0.05% +0.00% / +0.14% -0.10% -0.05%] index_select const : Elapsed 0.021 ms (2.093 ms / 100) 2.084 -> 2.083 ( -0.05%) [ +0.19% +0.00% +0.00% / -0.05% +0.19% +0.05%] index_select wrap : Elapsed 0.021 ms (2.088 ms / 100) 2.091 -> 2.091 ( +0.00%) [ +0.10% +0.10% +0.00% / +0.05% +0.00% +0.00%] index_select linear : Elapsed 0.021 ms (2.093 ms / 100) 2.087 -> 2.087 ( +0.00%) [ +0.00% +0.14% +0.19% / +0.29% +0.14% +0.00%] index_select reverse : Elapsed 0.021 ms (2.087 ms / 100) 2.088 -> 2.085 ( -0.14%) [ +0.19% +0.10% +0.00% / +0.14% +0.10% -0.14%] index_select skip64 : Elapsed 0.021 ms (2.092 ms / 100) 2.090 -> 2.083 ( -0.33%) [ +0.00% +0.00% +0.00% / +0.10% -0.33% -0.05%] index_select skip256 : Elapsed 0.021 ms (2.090 ms / 100) 2.153 -> 2.151 ( -0.09%) [ +0.14% +0.00% +0.05% / +0.09% +0.23% -0.09%] index_select spread : Elapsed 0.022 ms (2.156 ms / 100) 2.130 -> 2.131 ( +0.05%) [ +0.00% +0.09% +0.09% / +0.05% +0.09% +0.05%] index_select strided 3 : Elapsed 0.021 ms (2.130 ms / 100) 2.148 -> 2.151 ( +0.14%) [ +0.47% +0.00% +0.05% / +0.33% +0.14% +0.28%] index_select strided 5 : Elapsed 0.022 ms (2.158 ms / 100) 2.141 -> 2.134 ( -0.33%) [ +0.05% +0.05% +0.00% / -0.09% -0.33% -0.33%] index_select strided 7 : Elapsed 0.021 ms (2.142 ms / 100) 2.127 -> 2.126 ( -0.05%) [ +0.05% +0.09% +0.00% / +0.05% +0.00% -0.05%] index_select strided 8 : Elapsed 0.021 ms (2.128 ms / 100) 2.150 -> 2.152 ( +0.09%) [ +0.14% +0.00% +0.28% / +0.14% +0.37% +0.09%] index_select strided 16 : Elapsed 0.022 ms (2.153 ms / 100) 2.119 -> 2.113 ( -0.28%) [ +0.14% +0.00% +0.09% / +0.05% +0.24% -0.28%] index_select random : Elapsed 0.021 ms (2.122 ms / 100) 2.101 -> 2.100 ( -0.05%) [ +0.10% +0.00% +0.14% / +0.76% +0.00% -0.05%] index_select random_sorted : Elapsed 0.021 ms (2.103 ms / 100) 2.156 -> 2.154 ( -0.09%) [ +0.28% +0.00% +0.32% / +0.19% -0.09% +0.28%] index_select perm : Elapsed 0.022 ms (2.162 ms / 100) 2.155 -> 2.148 ( -0.32%) [ +0.23% +0.00% +0.14% / -0.19% -0.32% -0.05%] index_select perm_sorted : Elapsed 0.022 ms (2.160 ms / 100) B = [40, 4, 5, 16] (stride (20, 5, 1, 800)) A = [40, 20, 5, 16] (stride (1, 40, 12800, 800)) dim = 1 2.088 -> 2.088 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.00% +0.29% +0.48%] index_select const : Elapsed 0.021 ms (2.088 ms / 100) 2.096 -> 2.100 ( +0.19%) [ +0.19% +0.33% +0.00% / +0.19% +0.29% +0.33%] index_select wrap : Elapsed 0.021 ms (2.100 ms / 100) 2.095 -> 2.097 ( +0.10%) [ +0.00% +0.19% +0.00% / +0.10% +0.33% +0.33%] index_select linear : Elapsed 0.021 ms (2.095 ms / 100) 2.092 -> 2.097 ( +0.24%) [ +0.24% +0.14% +0.00% / +0.24% +0.72% +0.43%] index_select reverse : Elapsed 0.021 ms (2.097 ms / 100) 2.090 -> 2.091 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.05% +0.24% +0.24%] index_select skip64 : Elapsed 0.021 ms (2.091 ms / 100) 2.090 -> 2.091 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.24% +0.57%] index_select skip256 : Elapsed 0.021 ms (2.090 ms / 100) 2.091 -> 2.096 ( +0.24%) [ +0.24% +0.00% +0.19% / +0.24% +0.57% +0.43%] index_select spread : Elapsed 0.021 ms (2.096 ms / 100) 2.094 -> 2.092 ( -0.10%) [ +0.00% +0.14% +0.10% / -0.10% +0.00% +0.29%] index_select strided 3 : Elapsed 0.021 ms (2.094 ms / 100) 2.091 -> 2.090 ( -0.05%) [ +0.14% +0.14% +0.00% / -0.05% +0.05% +0.19%] index_select strided 5 : Elapsed 0.021 ms (2.094 ms / 100) 2.097 -> 2.097 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.19% +0.38%] index_select strided 7 : Elapsed 0.021 ms (2.098 ms / 100) 2.092 -> 2.097 ( +0.24%) [ +0.29% +0.14% +0.00% / +0.24% +0.48% +0.33%] index_select strided 8 : Elapsed 0.021 ms (2.098 ms / 100) 2.093 -> 2.096 ( +0.14%) [ +0.29% +0.14% +0.00% / +0.14% +0.29% +0.48%] index_select strided 16 : Elapsed 0.021 ms (2.099 ms / 100) 2.092 -> 2.094 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.48% +0.53%] index_select random : Elapsed 0.021 ms (2.093 ms / 100) 2.095 -> 2.095 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.33% +0.48%] index_select random_sorted : Elapsed 0.021 ms (2.095 ms / 100) 2.093 -> 2.094 ( +0.05%) [ +0.14% +0.29% +0.00% / +0.05% +0.43% +0.38%] index_select perm : Elapsed 0.021 ms (2.096 ms / 100) 2.093 -> 2.094 ( +0.05%) [ +0.24% +0.00% +0.19% / +0.05% +0.57% +0.38%] index_select perm_sorted : Elapsed 0.021 ms (2.098 ms / 100) B = [40, 4, 5, 16] (stride (4, 1, 160, 800)) A = [40, 20, 5, 16] (stride (1, 640, 12800, 40)) dim = 1 2.204 -> 2.204 ( +0.00%) [ +0.27% +0.09% +0.00% / +0.41% +0.00% +0.14%] index_select const : Elapsed 0.022 ms (2.210 ms / 100) 2.200 -> 2.204 ( +0.18%) [ +0.18% +0.14% +0.00% / +0.18% +0.18% +0.27%] index_select wrap : Elapsed 0.022 ms (2.204 ms / 100) 2.204 -> 2.203 ( -0.05%) [ +0.00% +0.05% +0.09% / -0.05% +0.09% +0.18%] index_select linear : Elapsed 0.022 ms (2.204 ms / 100) 2.205 -> 2.205 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.14% +0.14% +0.00%] index_select reverse : Elapsed 0.022 ms (2.205 ms / 100) 2.200 -> 2.200 ( +0.00%) [ +0.27% +0.14% +0.00% / +0.14% +0.05% +0.00%] index_select skip64 : Elapsed 0.022 ms (2.206 ms / 100) 2.201 -> 2.200 ( -0.05%) [ +0.00% +0.05% +0.05% / +0.00% -0.05% +0.23%] index_select skip256 : Elapsed 0.022 ms (2.201 ms / 100) 2.205 -> 2.208 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.18% +0.14% +0.18%] index_select spread : Elapsed 0.022 ms (2.205 ms / 100) 2.203 -> 2.202 ( -0.05%) [ +0.00% +0.09% +0.09% / -0.05% +0.18% +0.27%] index_select strided 3 : Elapsed 0.022 ms (2.203 ms / 100) 2.206 -> 2.205 ( -0.05%) [ +0.09% +0.00% +0.00% / -0.05% +0.05% +0.14%] index_select strided 5 : Elapsed 0.022 ms (2.208 ms / 100) 2.204 -> 2.206 ( +0.09%) [ +0.18% +0.36% +0.00% / +0.09% +0.18% +0.36%] index_select strided 7 : Elapsed 0.022 ms (2.208 ms / 100) 2.203 -> 2.204 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.23% +0.27%] index_select strided 8 : Elapsed 0.022 ms (2.205 ms / 100) 2.207 -> 2.207 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.09% +0.00%] index_select strided 16 : Elapsed 0.022 ms (2.207 ms / 100) 2.203 -> 2.200 ( -0.14%) [ +0.27% +0.00% +0.09% / +0.14% -0.14% +0.09%] index_select random : Elapsed 0.022 ms (2.209 ms / 100) 2.205 -> 2.207 ( +0.09%) [ +0.14% +0.09% +0.00% / +0.23% +0.09% +0.09%] index_select random_sorted : Elapsed 0.022 ms (2.208 ms / 100) 2.205 -> 2.207 ( +0.09%) [ +0.18% +0.00% +0.05% / +0.09% +0.09% +0.09%] index_select perm : Elapsed 0.022 ms (2.209 ms / 100) 2.200 -> 2.206 ( +0.27%) [ +0.00% +0.23% +0.18% / +0.27% +0.36% +0.36%] index_select perm_sorted : Elapsed 0.022 ms (2.200 ms / 100) out_shape = [40, 20, 4, 16] in_shape = [40, 20, 5, 16] idx_dim = 2 B = [40, 20, 4, 16] (stride (1280, 64, 16, 1)) A = [40, 20, 5, 16] (stride (1, 3200, 40, 200)) dim = 2 5.509 -> 5.506 ( -0.05%) [ +0.00% +0.05% +0.00% / +0.11% -0.05% +0.07%] index_select const : Elapsed 0.055 ms (5.509 ms / 100) 5.565 -> 5.553 ( -0.22%) [ +0.05% +0.00% +0.07% / +0.14% -0.22% -0.11%] index_select wrap : Elapsed 0.056 ms (5.568 ms / 100) 5.564 -> 5.561 ( -0.05%) [ +0.00% +0.11% +0.09% / +0.07% +0.04% -0.05%] index_select linear : Elapsed 0.056 ms (5.564 ms / 100) 5.568 -> 5.570 ( +0.04%) [ +0.09% +0.00% +0.18% / +0.20% +0.04% +0.04%] index_select reverse : Elapsed 0.056 ms (5.573 ms / 100) 5.510 -> 5.504 ( -0.11%) [ +0.00% +0.04% +0.13% / -0.05% -0.11% -0.02%] index_select skip64 : Elapsed 0.055 ms (5.510 ms / 100) 5.511 -> 5.507 ( -0.07%) [ +0.02% +0.00% +0.07% / +0.05% -0.07% -0.04%] index_select skip256 : Elapsed 0.055 ms (5.512 ms / 100) 5.561 -> 5.564 ( +0.05%) [ +0.00% +0.23% +0.20% / +0.11% +0.07% +0.05%] index_select spread : Elapsed 0.056 ms (5.561 ms / 100) 5.573 -> 5.567 ( -0.11%) [ +0.00% +0.02% +0.23% / +0.38% -0.11% +0.02%] index_select strided 3 : Elapsed 0.056 ms (5.573 ms / 100) 5.550 -> 5.539 ( -0.20%) [ +0.07% +0.00% +0.09% / +0.27% -0.20% -0.16%] index_select random : Elapsed 0.056 ms (5.554 ms / 100) 5.552 -> 5.539 ( -0.23%) [ +0.07% +0.00% +0.14% / +0.22% -0.20% -0.23%] index_select random_sorted : Elapsed 0.056 ms (5.556 ms / 100) 5.571 -> 5.568 ( -0.05%) [ +0.00% +0.09% +0.18% / +0.14% -0.05% +0.11%] index_select perm : Elapsed 0.056 ms (5.571 ms / 100) 5.568 -> 5.559 ( -0.16%) [ +0.00% +0.13% +0.11% / +0.18% -0.02% -0.16%] index_select perm_sorted : Elapsed 0.056 ms (5.568 ms / 100) B = [40, 20, 4, 16] (stride (16, 640, 12800, 1)) A = [40, 20, 5, 16] (stride (1600, 80, 16, 1)) dim = 2 5.450 -> 5.454 ( +0.07%) [ +0.13% +0.06% +0.00% / +0.18% +0.07% +0.11%] index_select const : Elapsed 0.055 ms (5.457 ms / 100) 5.540 -> 5.547 ( +0.13%) [ +0.04% +0.00% +0.04% / +0.14% +0.22% +0.13%] index_select wrap : Elapsed 0.055 ms (5.542 ms / 100) 5.542 -> 5.544 ( +0.04%) [ +0.02% +0.00% +0.14% / +0.04% +0.29% +0.13%] index_select linear : Elapsed 0.055 ms (5.543 ms / 100) 5.543 -> 5.546 ( +0.05%) [ +0.00% +0.14% +0.16% / +0.05% +0.23% +0.16%] index_select reverse : Elapsed 0.055 ms (5.543 ms / 100) 5.442 -> 5.449 ( +0.13%) [ +0.00% +0.11% +0.00% / +0.17% +0.20% +0.13%] index_select skip64 : Elapsed 0.054 ms (5.442 ms / 100) 5.451 -> 5.449 ( -0.04%) [ +0.00% +0.00% +0.07% / +0.07% -0.02% -0.04%] index_select skip256 : Elapsed 0.055 ms (5.451 ms / 100) 5.536 -> 5.538 ( +0.04%) [ +0.00% +0.00% +0.16% / +0.04% +0.20% +0.20%] index_select spread : Elapsed 0.055 ms (5.536 ms / 100) 5.555 -> 5.551 ( -0.07%) [ +0.04% +0.00% +0.07% / +0.00% -0.07% +0.11%] index_select strided 3 : Elapsed 0.056 ms (5.557 ms / 100) 5.485 -> 5.483 ( -0.04%) [ +0.04% +0.11% +0.00% / +0.11% +0.00% -0.04%] index_select random : Elapsed 0.055 ms (5.487 ms / 100) 5.475 -> 5.480 ( +0.09%) [ +0.00% +0.07% +0.11% / +0.09% +0.16% +0.09%] index_select random_sorted : Elapsed 0.055 ms (5.475 ms / 100) 5.546 -> 5.543 ( -0.05%) [ +0.04% +0.04% +0.00% / +0.18% +0.13% -0.05%] index_select perm : Elapsed 0.055 ms (5.548 ms / 100) 5.549 -> 5.544 ( -0.09%) [ +0.00% +0.04% +0.04% / +0.14% +0.25% -0.09%] index_select perm_sorted : Elapsed 0.055 ms (5.549 ms / 100) B = [40, 20, 4, 16] (stride (1, 160, 40, 3200)) A = [40, 20, 5, 16] (stride (1600, 1, 20, 100)) dim = 2 5.807 -> 5.801 ( -0.10%) [ +0.10% +0.00% +0.14% / +0.24% -0.03% -0.10%] index_select const : Elapsed 0.058 ms (5.813 ms / 100) 5.865 -> 5.875 ( +0.17%) [ +0.05% +0.07% +0.00% / +0.26% +0.17% +0.22%] index_select wrap : Elapsed 0.059 ms (5.868 ms / 100) 5.871 -> 5.876 ( +0.09%) [ +0.00% +0.02% +0.00% / +0.09% +0.22% +0.17%] index_select linear : Elapsed 0.059 ms (5.871 ms / 100) 5.869 -> 5.873 ( +0.07%) [ +0.07% +0.00% +0.09% / +0.07% +0.24% +0.12%] index_select reverse : Elapsed 0.059 ms (5.873 ms / 100) 5.809 -> 5.802 ( -0.12%) [ +0.00% +0.09% +0.19% / +0.10% -0.12% -0.02%] index_select skip64 : Elapsed 0.058 ms (5.809 ms / 100) 5.813 -> 5.799 ( -0.24%) [ +0.02% +0.00% +0.09% / +0.14% -0.05% -0.24%] index_select skip256 : Elapsed 0.058 ms (5.814 ms / 100) 5.866 -> 5.876 ( +0.17%) [ +0.00% +0.02% +0.17% / +0.20% +0.26% +0.17%] index_select spread : Elapsed 0.059 ms (5.866 ms / 100) 5.878 -> 5.882 ( +0.07%) [ +0.00% +0.02% +0.05% / +0.07% +0.26% +0.26%] index_select strided 3 : Elapsed 0.059 ms (5.878 ms / 100) 5.839 -> 5.843 ( +0.07%) [ +0.00% +0.02% +0.02% / +0.17% +0.07% +0.26%] index_select random : Elapsed 0.058 ms (5.839 ms / 100) 5.826 -> 5.838 ( +0.21%) [ +0.15% +0.00% +0.19% / +0.21% +0.31% +0.29%] index_select random_sorted : Elapsed 0.058 ms (5.835 ms / 100) 5.880 -> 5.884 ( +0.07%) [ +0.00% +0.00% +0.02% / +0.07% +0.07% +0.10%] index_select perm : Elapsed 0.059 ms (5.880 ms / 100) 5.863 -> 5.872 ( +0.15%) [ +0.00% +0.09% +0.12% / +0.15% +0.43% +0.19%] index_select perm_sorted : Elapsed 0.059 ms (5.863 ms / 100) B = [40, 20, 4, 16] (stride (20, 1, 800, 3200)) A = [40, 20, 5, 16] (stride (1600, 16, 320, 1)) dim = 2 5.405 -> 5.404 ( -0.02%) [ +0.02% +0.00% +0.15% / +0.13% -0.02% +0.09%] index_select const : Elapsed 0.054 ms (5.406 ms / 100) 5.447 -> 5.462 ( +0.28%) [ +0.00% +0.09% +0.20% / +0.28% +0.42% +0.53%] index_select wrap : Elapsed 0.054 ms (5.447 ms / 100) 5.450 -> 5.458 ( +0.15%) [ +0.07% +0.06% +0.00% / +0.22% +0.29% +0.15%] index_select linear : Elapsed 0.055 ms (5.454 ms / 100) 5.458 -> 5.467 ( +0.16%) [ +0.00% +0.11% +0.24% / +0.16% +0.33% +0.68%] index_select reverse : Elapsed 0.055 ms (5.458 ms / 100) 5.387 -> 5.387 ( +0.00%) [ +0.02% +0.00% +0.11% / +0.02% +0.13% +0.00%] index_select skip64 : Elapsed 0.054 ms (5.388 ms / 100) 5.404 -> 5.405 ( +0.02%) [ +0.00% +0.15% +0.22% / +0.11% +0.11% +0.02%] index_select skip256 : Elapsed 0.054 ms (5.404 ms / 100) 5.454 -> 5.462 ( +0.15%) [ +0.00% +0.06% +0.13% / +0.15% +0.31% +0.20%] index_select spread : Elapsed 0.055 ms (5.454 ms / 100) 5.450 -> 5.458 ( +0.15%) [ +0.06% +0.00% +0.17% / +0.15% +0.35% +0.22%] index_select strided 3 : Elapsed 0.055 ms (5.453 ms / 100) 5.422 -> 5.430 ( +0.15%) [ +0.04% +0.04% +0.00% / +0.17% +0.15% +0.15%] index_select random : Elapsed 0.054 ms (5.424 ms / 100) 5.427 -> 5.435 ( +0.15%) [ +0.06% +0.00% +0.02% / +0.15% +0.15% +0.15%] index_select random_sorted : Elapsed 0.054 ms (5.430 ms / 100) 5.469 -> 5.472 ( +0.05%) [ +0.07% +0.00% +0.05% / +0.05% +0.26% +0.26%] index_select perm : Elapsed 0.055 ms (5.473 ms / 100) 5.449 -> 5.453 ( +0.07%) [ +0.00% +0.02% +0.02% / +0.07% +0.18% +0.31%] index_select perm_sorted : Elapsed 0.054 ms (5.449 ms / 100) out_shape = [40, 20, 5, 4] in_shape = [40, 20, 5, 16] idx_dim = 3 B = [40, 20, 5, 4] (stride (400, 20, 1, 5)) A = [40, 20, 5, 16] (stride (1, 200, 40, 4000)) dim = 3 1.094 -> 1.095 ( +0.09%) [ +0.18% +0.27% +0.00% / +0.09% +2.38% +2.29%] index_select const : Elapsed 0.011 ms (1.096 ms / 100) 1.099 -> 1.101 ( +0.18%) [ +0.00% +0.73% +0.55% / +0.18% +3.00% +3.00%] index_select wrap : Elapsed 0.011 ms (1.099 ms / 100) 1.100 -> 1.101 ( +0.09%) [ +0.00% +0.27% +0.09% / +0.09% +2.91% +3.36%] index_select linear : Elapsed 0.011 ms (1.100 ms / 100) 1.096 -> 1.096 ( +0.00%) [ +0.00% +0.18% +0.09% / +0.00% +3.56% +3.56%] index_select reverse : Elapsed 0.011 ms (1.096 ms / 100) 1.098 -> 1.098 ( +0.00%) [ +0.00% +0.27% +0.36% / +0.00% +1.73% +2.37%] index_select skip64 : Elapsed 0.011 ms (1.098 ms / 100) 1.100 -> 1.101 ( +0.09%) [ +0.00% +0.36% +0.09% / +0.09% +1.82% +2.00%] index_select skip256 : Elapsed 0.011 ms (1.100 ms / 100) 1.105 -> 1.111 ( +0.54%) [ +0.18% +0.27% +0.00% / +0.54% +2.26% +2.35%] index_select spread : Elapsed 0.011 ms (1.107 ms / 100) 1.105 -> 1.100 ( -0.45%) [ +0.00% +0.09% +0.09% / -0.45% +2.71% +2.81%] index_select strided 3 : Elapsed 0.011 ms (1.105 ms / 100) 1.102 -> 1.106 ( +0.36%) [ +0.18% +0.00% +0.36% / +0.36% +2.27% +2.45%] index_select strided 5 : Elapsed 0.011 ms (1.104 ms / 100) 1.106 -> 1.105 ( -0.09%) [ +0.18% +0.00% +0.45% / -0.09% +1.90% +1.90%] index_select strided 7 : Elapsed 0.011 ms (1.108 ms / 100) 1.102 -> 1.105 ( +0.27%) [ +0.00% +0.27% +0.36% / +0.27% +1.81% +2.09%] index_select strided 8 : Elapsed 0.011 ms (1.102 ms / 100) 1.096 -> 1.103 ( +0.64%) [ +0.36% +0.00% +0.36% / +0.64% +1.00% +1.19%] index_select random : Elapsed 0.011 ms (1.100 ms / 100) 1.100 -> 1.103 ( +0.27%) [ +0.27% +0.00% +0.73% / +0.27% +0.73% +0.45%] index_select random_sorted : Elapsed 0.011 ms (1.103 ms / 100) 1.106 -> 1.106 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +1.45% +1.90%] index_select perm : Elapsed 0.011 ms (1.107 ms / 100) 1.098 -> 1.101 ( +0.27%) [ +0.00% +0.09% +0.36% / +0.27% +1.46% +1.82%] index_select perm_sorted : Elapsed 0.011 ms (1.098 ms / 100) B = [40, 20, 5, 4] (stride (80, 4, 3200, 1)) A = [40, 20, 5, 16] (stride (320, 16, 12800, 1)) dim = 3 1.183 -> 1.179 ( -0.34%) [ +0.25% +0.00% +0.08% / +0.00% -0.34% -0.25%] index_select const : Elapsed 0.012 ms (1.186 ms / 100) 1.182 -> 1.181 ( -0.08%) [ +0.34% +0.59% +0.00% / +0.00% -0.08% +0.00%] index_select wrap : Elapsed 0.012 ms (1.186 ms / 100) 1.182 -> 1.176 ( -0.51%) [ +0.51% +0.17% +0.00% / +0.25% -0.08% -0.51%] index_select linear : Elapsed 0.012 ms (1.188 ms / 100) 1.183 -> 1.182 ( -0.08%) [ +0.17% +0.08% +0.00% / +0.42% +0.00% -0.08%] index_select reverse : Elapsed 0.012 ms (1.185 ms / 100) 1.177 -> 1.180 ( +0.25%) [ +0.68% +0.00% +0.25% / +1.02% +0.25% +0.51%] index_select skip64 : Elapsed 0.012 ms (1.185 ms / 100) 1.180 -> 1.179 ( -0.08%) [ +0.08% +0.34% +0.00% / -0.08% +0.00% +0.00%] index_select skip256 : Elapsed 0.012 ms (1.181 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.00% +0.42% +0.42% / +0.00% +0.17% +0.50%] index_select spread : Elapsed 0.012 ms (1.196 ms / 100) 1.199 -> 1.196 ( -0.25%) [ +0.00% +0.50% +0.33% / +0.00% -0.25% +0.00%] index_select strided 3 : Elapsed 0.012 ms (1.199 ms / 100) 1.199 -> 1.198 ( -0.08%) [ +0.00% +0.17% +0.00% / +0.00% -0.08% +0.17%] index_select strided 5 : Elapsed 0.012 ms (1.199 ms / 100) 1.198 -> 1.193 ( -0.42%) [ +0.50% +0.00% +0.17% / +0.00% -0.42% +0.17%] index_select strided 7 : Elapsed 0.012 ms (1.204 ms / 100) 1.215 -> 1.208 ( -0.58%) [ +0.41% +0.08% +0.00% / -0.16% -0.41% -0.58%] index_select strided 8 : Elapsed 0.012 ms (1.220 ms / 100) 1.196 -> 1.193 ( -0.25%) [ +0.42% +0.00% +0.59% / +0.75% -0.25% +0.25%] index_select random : Elapsed 0.012 ms (1.201 ms / 100) 1.199 -> 1.192 ( -0.58%) [ +0.08% +0.00% +0.00% / +0.00% -0.58% +0.00%] index_select random_sorted : Elapsed 0.012 ms (1.200 ms / 100) 1.217 -> 1.214 ( -0.25%) [ +0.33% +0.00% +0.16% / +0.33% -0.16% -0.25%] index_select perm : Elapsed 0.012 ms (1.221 ms / 100) 1.200 -> 1.193 ( -0.58%) [ +0.00% +0.00% +0.33% / +0.00% +0.08% -0.58%] index_select perm_sorted : Elapsed 0.012 ms (1.200 ms / 100) B = [40, 20, 5, 4] (stride (4, 160, 3200, 1)) A = [40, 20, 5, 16] (stride (320, 1, 12800, 20)) dim = 3 2.445 -> 2.445 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.45% +0.41%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.482 -> 2.486 ( +0.16%) [ +0.12% +0.00% +0.16% / +0.16% +0.44% +0.60%] index_select wrap : Elapsed 0.025 ms (2.485 ms / 100) 2.485 -> 2.489 ( +0.16%) [ +0.00% +0.08% +0.16% / +0.16% +0.32% +0.44%] index_select linear : Elapsed 0.025 ms (2.485 ms / 100) 2.487 -> 2.484 ( -0.12%) [ +0.00% +0.12% +0.12% / -0.12% +0.52% +0.64%] index_select reverse : Elapsed 0.025 ms (2.487 ms / 100) 2.434 -> 2.439 ( +0.21%) [ +0.45% +0.00% +0.04% / +0.21% +0.78% +0.58%] index_select skip64 : Elapsed 0.024 ms (2.445 ms / 100) 2.444 -> 2.444 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.37% +0.45%] index_select skip256 : Elapsed 0.024 ms (2.446 ms / 100) 2.495 -> 2.495 ( +0.00%) [ +0.00% +0.04% +0.20% / +0.00% +0.56% +0.48%] index_select spread : Elapsed 0.025 ms (2.495 ms / 100) 2.495 -> 2.503 ( +0.32%) [ +0.28% +0.16% +0.00% / +0.32% +0.56% +0.56%] index_select strided 3 : Elapsed 0.025 ms (2.502 ms / 100) 2.482 -> 2.487 ( +0.20%) [ +0.20% +0.16% +0.00% / +0.20% +0.56% +0.52%] index_select strided 5 : Elapsed 0.025 ms (2.487 ms / 100) 2.490 -> 2.495 ( +0.20%) [ +0.04% +0.04% +0.00% / +0.20% +0.44% +0.48%] index_select strided 7 : Elapsed 0.025 ms (2.491 ms / 100) 2.454 -> 2.459 ( +0.20%) [ +0.29% +0.00% +0.20% / +0.20% +0.69% +0.77%] index_select strided 8 : Elapsed 0.025 ms (2.461 ms / 100) 2.496 -> 2.499 ( +0.12%) [ +0.12% +0.16% +0.00% / +0.12% +0.68% +0.56%] index_select random : Elapsed 0.025 ms (2.499 ms / 100) 2.500 -> 2.504 ( +0.16%) [ +0.16% +0.00% +0.32% / +0.16% +0.40% +0.56%] index_select random_sorted : Elapsed 0.025 ms (2.504 ms / 100) 2.487 -> 2.491 ( +0.16%) [ +0.20% +0.24% +0.00% / +0.28% +0.72% +0.16%] index_select perm : Elapsed 0.025 ms (2.492 ms / 100) 2.484 -> 2.488 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.36% +0.28%] index_select perm_sorted : Elapsed 0.025 ms (2.486 ms / 100) B = [40, 20, 5, 4] (stride (100, 5, 1, 4000)) A = [40, 20, 5, 16] (stride (1600, 16, 320, 1)) dim = 3 2.261 -> 2.269 ( +0.35%) [ +0.57% +0.27% +0.00% / +0.35% +0.40% +0.49%] index_select const : Elapsed 0.023 ms (2.274 ms / 100) 2.253 -> 2.254 ( +0.04%) [ +0.44% +0.44% +0.00% / +0.62% +0.67% +0.04%] index_select wrap : Elapsed 0.023 ms (2.263 ms / 100) 2.262 -> 2.261 ( -0.04%) [ +0.04% +0.00% +0.09% / -0.04% +0.00% +0.31%] index_select linear : Elapsed 0.023 ms (2.263 ms / 100) 2.257 -> 2.253 ( -0.18%) [ +0.18% +0.22% +0.00% / +0.09% -0.18% -0.04%] index_select reverse : Elapsed 0.023 ms (2.261 ms / 100) 2.251 -> 2.260 ( +0.40%) [ +0.00% +0.67% +0.18% / +0.40% +0.84% +0.71%] index_select skip64 : Elapsed 0.023 ms (2.251 ms / 100) 2.263 -> 2.265 ( +0.09%) [ +0.27% +0.27% +0.00% / +0.31% +0.49% +0.09%] index_select skip256 : Elapsed 0.023 ms (2.269 ms / 100) 2.284 -> 2.284 ( +0.00%) [ +0.00% +0.18% +0.04% / +0.00% +0.31% +0.35%] index_select spread : Elapsed 0.023 ms (2.284 ms / 100) 2.285 -> 2.287 ( +0.09%) [ +0.00% +0.09% +0.18% / +0.18% +0.09% +0.13%] index_select strided 3 : Elapsed 0.023 ms (2.285 ms / 100) 2.289 -> 2.286 ( -0.13%) [ +0.04% +0.04% +0.00% / -0.13% +0.39% +0.52%] index_select strided 5 : Elapsed 0.023 ms (2.290 ms / 100) 2.278 -> 2.285 ( +0.31%) [ +0.00% +0.31% +0.35% / +0.31% +0.70% +0.75%] index_select strided 7 : Elapsed 0.023 ms (2.278 ms / 100) 2.292 -> 2.296 ( +0.17%) [ +0.00% +0.13% +0.00% / +0.17% +0.61% +0.44%] index_select strided 8 : Elapsed 0.023 ms (2.292 ms / 100) 2.288 -> 2.289 ( +0.04%) [ +0.09% +0.00% +0.04% / +0.04% +0.39% +0.39%] index_select random : Elapsed 0.023 ms (2.290 ms / 100) 2.280 -> 2.281 ( +0.04%) [ +0.13% +0.00% +0.09% / +0.04% +0.53% +0.75%] index_select random_sorted : Elapsed 0.023 ms (2.283 ms / 100) 2.293 -> 2.294 ( +0.04%) [ +0.00% +0.39% +0.09% / +0.04% +0.48% +0.48%] index_select perm : Elapsed 0.023 ms (2.293 ms / 100) 2.281 -> 2.284 ( +0.13%) [ +0.00% +0.13% +0.13% / +0.35% +0.35% +0.13%] index_select perm_sorted : Elapsed 0.023 ms (2.281 ms / 100) out_shape = [4, 20, 16, 5] in_shape = [40, 20, 16, 5] idx_dim = 0 B = [4, 20, 16, 5] (stride (1600, 80, 5, 1)) A = [40, 20, 16, 5] (stride (1600, 80, 5, 1)) dim = 0 0.528 -> 0.529 ( +0.19%) [ +3.03% +0.00% +0.19% / +2.27% +18.56% +0.19%] index_select const : Elapsed 0.005 ms (0.544 ms / 100) 0.521 -> 0.529 ( +1.54%) [ +4.22% +0.00% +0.19% / +7.68% +1.54% +1.92%] index_select wrap : Elapsed 0.005 ms (0.543 ms / 100) 0.522 -> 0.520 ( -0.38%) [ +4.60% +0.57% +0.00% / +5.56% +2.11% -0.38%] index_select linear : Elapsed 0.005 ms (0.546 ms / 100) 0.520 -> 0.527 ( +1.35%) [ +5.38% +1.54% +0.00% / +4.42% +6.35% +1.35%] index_select reverse : Elapsed 0.005 ms (0.548 ms / 100) 0.521 -> 0.535 ( +2.69%) [ +3.84% +0.77% +0.00% / +3.45% +6.53% +2.69%] index_select skip64 : Elapsed 0.005 ms (0.541 ms / 100) 0.522 -> 0.524 ( +0.38%) [ +3.64% +0.19% +0.00% / +3.45% +9.00% +0.38%] index_select skip256 : Elapsed 0.005 ms (0.541 ms / 100) 0.522 -> 0.529 ( +1.34%) [+10.34% +1.53% +0.00% / +3.45% +1.34% +1.72%] index_select spread : Elapsed 0.006 ms (0.576 ms / 100) 0.522 -> 0.543 ( +4.02%) [+14.18% +0.00% +0.00% / +4.02% +7.28% +7.85%] index_select strided 3 : Elapsed 0.006 ms (0.596 ms / 100) 0.518 -> 0.539 ( +4.05%) [+22.01% +3.09% +0.00% / +4.05% +16.41% +14.29%] index_select strided 5 : Elapsed 0.006 ms (0.632 ms / 100) 0.522 -> 0.532 ( +1.92%) [+20.88% +1.92% +0.00% / +6.70% +1.92% +5.36%] index_select strided 7 : Elapsed 0.006 ms (0.631 ms / 100) 0.541 -> 0.535 ( -1.11%) [ +0.00% +4.44% +0.00% / -1.11% -1.11% -1.11%] index_select strided 8 : Elapsed 0.005 ms (0.541 ms / 100) 0.523 -> 0.535 ( +2.29%) [ +3.82% +14.15% +0.00% / +5.54% +2.29% +2.29%] index_select strided 16 : Elapsed 0.005 ms (0.543 ms / 100) 0.524 -> 0.532 ( +1.53%) [+12.02% +2.10% +0.00% / +3.44% +6.68% +1.53%] index_select random : Elapsed 0.006 ms (0.587 ms / 100) 0.563 -> 0.576 ( +2.31%) [ +6.22% +0.53% +0.00% / +2.31% +2.31% +2.31%] index_select random_sorted : Elapsed 0.006 ms (0.598 ms / 100) 0.523 -> 0.526 ( +0.57%) [ +5.93% +14.91% +0.00% / +3.44% +0.57% +24.67%] index_select perm : Elapsed 0.006 ms (0.554 ms / 100) 0.524 -> 0.528 ( +0.76%) [ +3.82% +0.00% +0.76% / +2.86% +0.76% +5.92%] index_select perm_sorted : Elapsed 0.005 ms (0.544 ms / 100) B = [4, 20, 16, 5] (stride (1600, 80, 5, 1)) A = [40, 20, 16, 5] (stride (20, 1, 800, 12800)) dim = 0 1.186 -> 1.186 ( +0.00%) [ +0.25% +0.25% +0.00% / +0.00% +0.42% +0.25%] index_select const : Elapsed 0.012 ms (1.189 ms / 100) 1.187 -> 1.187 ( +0.00%) [ +0.00% +0.25% +0.00% / +0.00% +0.17% +0.08%] index_select wrap : Elapsed 0.012 ms (1.187 ms / 100) 1.187 -> 1.186 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.42% +0.17%] index_select linear : Elapsed 0.012 ms (1.187 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.17% +0.08% +0.00% / +0.00% +0.17% +0.17%] index_select reverse : Elapsed 0.012 ms (1.190 ms / 100) 1.187 -> 1.188 ( +0.08%) [ +0.17% +0.25% +0.00% / +0.08% +0.25% +0.34%] index_select skip64 : Elapsed 0.012 ms (1.189 ms / 100) 1.187 -> 1.186 ( -0.08%) [ +0.08% +0.00% +0.17% / -0.08% +0.17% +0.08%] index_select skip256 : Elapsed 0.012 ms (1.188 ms / 100) 1.187 -> 1.189 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.17% +0.17%] index_select spread : Elapsed 0.012 ms (1.187 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.42% +0.34%] index_select strided 3 : Elapsed 0.012 ms (1.188 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.00% +0.17% / +0.00% +0.25% +0.25%] index_select strided 5 : Elapsed 0.012 ms (1.189 ms / 100) 1.189 -> 1.190 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.34% +0.34%] index_select strided 7 : Elapsed 0.012 ms (1.189 ms / 100) 1.187 -> 1.188 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.42% +0.34%] index_select strided 8 : Elapsed 0.012 ms (1.188 ms / 100) 1.188 -> 1.187 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.25% +0.25%] index_select strided 16 : Elapsed 0.012 ms (1.188 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.08% +0.59% / +0.00% +0.42% +0.42%] index_select random : Elapsed 0.012 ms (1.189 ms / 100) 1.188 -> 1.191 ( +0.25%) [ +0.00% +0.17% +0.17% / +0.25% +0.25% +0.25%] index_select random_sorted : Elapsed 0.012 ms (1.188 ms / 100) 1.188 -> 1.190 ( +0.17%) [ +0.00% +0.08% +0.00% / +0.17% +0.34% +0.25%] index_select perm : Elapsed 0.012 ms (1.188 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.34% +0.42%] index_select perm_sorted : Elapsed 0.012 ms (1.188 ms / 100) B = [4, 20, 16, 5] (stride (1600, 16, 1, 320)) A = [40, 20, 16, 5] (stride (1600, 16, 1, 320)) dim = 0 0.571 -> 0.571 ( +0.00%) [ +0.35% +0.53% +0.00% / +0.00% +0.53% +0.70%] index_select const : Elapsed 0.006 ms (0.573 ms / 100) 0.574 -> 0.574 ( +0.00%) [ +1.22% +0.00% +0.00% / +0.00% +0.17% +0.17%] index_select wrap : Elapsed 0.006 ms (0.581 ms / 100) 0.575 -> 0.575 ( +0.00%) [+12.70% +1.04% +0.00% / +0.00% +0.00% +2.09%] index_select linear : Elapsed 0.006 ms (0.648 ms / 100) 0.574 -> 0.574 ( +0.00%) [ +0.17% +0.00% +4.18% / +0.00% +0.35% +5.05%] index_select reverse : Elapsed 0.006 ms (0.575 ms / 100) 0.574 -> 0.574 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.35% +0.17%] index_select skip64 : Elapsed 0.006 ms (0.574 ms / 100) 0.569 -> 0.568 ( -0.18%) [ +0.18% +0.70% +0.00% / +0.18% +0.00% -0.18%] index_select skip256 : Elapsed 0.006 ms (0.570 ms / 100) 0.571 -> 0.570 ( -0.18%) [ +0.00% +0.18% +0.00% / +0.00% +0.00% -0.18%] index_select spread : Elapsed 0.006 ms (0.571 ms / 100) 0.573 -> 0.573 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.17% +0.00% +0.17%] index_select strided 3 : Elapsed 0.006 ms (0.573 ms / 100) 0.574 -> 0.575 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.35% +0.17% +0.17%] index_select strided 5 : Elapsed 0.006 ms (0.574 ms / 100) 0.573 -> 0.574 ( +0.17%) [ +0.17% +7.16% +0.00% / +0.17% +0.52% +5.93%] index_select strided 7 : Elapsed 0.006 ms (0.574 ms / 100) 0.574 -> 0.575 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.35% +1.05%] index_select strided 8 : Elapsed 0.006 ms (0.575 ms / 100) 0.574 -> 0.574 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.17% +0.17% +0.00%] index_select strided 16 : Elapsed 0.006 ms (0.575 ms / 100) 0.574 -> 0.575 ( +0.17%) [ +0.00% +0.35% +0.17% / +0.17% +0.17% +0.17%] index_select random : Elapsed 0.006 ms (0.574 ms / 100) 0.570 -> 0.570 ( +0.00%) [ +0.70% +0.18% +0.00% / +0.00% +0.53% +0.18%] index_select random_sorted : Elapsed 0.006 ms (0.574 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +3.50% +0.00% +0.00% / +0.00% +6.13% +0.53%] index_select perm : Elapsed 0.006 ms (0.591 ms / 100) 0.568 -> 0.568 ( +0.00%) [ +7.39% +0.00% +6.69% / +0.00% +1.23% +1.06%] index_select perm_sorted : Elapsed 0.006 ms (0.610 ms / 100) B = [4, 20, 16, 5] (stride (1600, 16, 1, 320)) A = [40, 20, 16, 5] (stride (1600, 1, 20, 320)) dim = 0 1.241 -> 1.244 ( +0.24%) [ +0.24% +0.24% +0.00% / +0.24% +0.48% +0.48%] index_select const : Elapsed 0.012 ms (1.244 ms / 100) 1.241 -> 1.240 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.40% +0.40%] index_select wrap : Elapsed 0.012 ms (1.241 ms / 100) 1.239 -> 1.238 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.48% +0.56%] index_select linear : Elapsed 0.012 ms (1.240 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.32% +0.40%] index_select reverse : Elapsed 0.012 ms (1.238 ms / 100) 1.242 -> 1.242 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.24% +0.24%] index_select skip64 : Elapsed 0.012 ms (1.244 ms / 100) 1.239 -> 1.239 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.65% +0.48%] index_select skip256 : Elapsed 0.012 ms (1.239 ms / 100) 1.237 -> 1.237 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.73% +0.65%] index_select spread : Elapsed 0.012 ms (1.239 ms / 100) 1.237 -> 1.238 ( +0.08%) [ +0.24% +0.16% +0.00% / +0.08% +0.73% +0.73%] index_select strided 3 : Elapsed 0.012 ms (1.240 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.73% +0.65%] index_select strided 5 : Elapsed 0.012 ms (1.235 ms / 100) 1.241 -> 1.243 ( +0.16%) [ +0.00% +0.00% +0.08% / +0.16% +0.73% +0.64%] index_select strided 7 : Elapsed 0.012 ms (1.241 ms / 100) 1.244 -> 1.242 ( -0.16%) [ +0.08% +0.08% +0.00% / -0.16% +0.56% +0.48%] index_select strided 8 : Elapsed 0.012 ms (1.245 ms / 100) 1.236 -> 1.239 ( +0.24%) [ +0.24% +0.16% +0.00% / +0.24% +0.89% +0.81%] index_select strided 16 : Elapsed 0.012 ms (1.239 ms / 100) 1.241 -> 1.241 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.56% +0.48%] index_select random : Elapsed 0.012 ms (1.242 ms / 100) 1.238 -> 1.241 ( +0.24%) [ +0.16% +0.32% +0.00% / +0.24% +0.57% +0.81%] index_select random_sorted : Elapsed 0.012 ms (1.240 ms / 100) 1.234 -> 1.239 ( +0.41%) [ +0.16% +0.24% +0.00% / +0.41% +0.97% +0.89%] index_select perm : Elapsed 0.012 ms (1.236 ms / 100) 1.244 -> 1.245 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.72% +0.56%] index_select perm_sorted : Elapsed 0.012 ms (1.245 ms / 100) B = [4, 20, 16, 5] (stride (1600, 1, 20, 320)) A = [40, 20, 16, 5] (stride (1600, 1, 100, 20)) dim = 0 1.230 -> 1.230 ( +0.00%) [ +0.24% +0.16% +0.00% / +0.00% +0.49% +0.49%] index_select const : Elapsed 0.012 ms (1.233 ms / 100) 1.232 -> 1.233 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.73% +0.49%] index_select wrap : Elapsed 0.012 ms (1.233 ms / 100) 1.239 -> 1.240 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.56% +0.65%] index_select linear : Elapsed 0.012 ms (1.239 ms / 100) 1.233 -> 1.235 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.49% +0.49%] index_select reverse : Elapsed 0.012 ms (1.233 ms / 100) 1.241 -> 1.241 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.32% +0.32%] index_select skip64 : Elapsed 0.012 ms (1.241 ms / 100) 1.225 -> 1.225 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.57% +0.65%] index_select skip256 : Elapsed 0.012 ms (1.225 ms / 100) 1.226 -> 1.227 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.98% +0.65%] index_select spread : Elapsed 0.012 ms (1.227 ms / 100) 1.238 -> 1.239 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.57% +0.65%] index_select strided 3 : Elapsed 0.012 ms (1.240 ms / 100) 1.233 -> 1.234 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.41% +0.49%] index_select strided 5 : Elapsed 0.012 ms (1.233 ms / 100) 1.229 -> 1.230 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.49% +0.49%] index_select strided 7 : Elapsed 0.012 ms (1.230 ms / 100) 1.225 -> 1.225 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.57% +0.33%] index_select strided 8 : Elapsed 0.012 ms (1.226 ms / 100) 1.226 -> 1.227 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.57% +0.57%] index_select strided 16 : Elapsed 0.012 ms (1.228 ms / 100) 1.229 -> 1.230 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.49% +0.57%] index_select random : Elapsed 0.012 ms (1.229 ms / 100) 1.235 -> 1.238 ( +0.24%) [ +0.08% +0.00% +0.32% / +0.24% +0.24% +0.40%] index_select random_sorted : Elapsed 0.012 ms (1.236 ms / 100) 1.240 -> 1.243 ( +0.24%) [ +0.08% +0.00% +0.00% / +0.24% +0.24% +0.24%] index_select perm : Elapsed 0.012 ms (1.241 ms / 100) 1.228 -> 1.229 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.49% +0.57%] index_select perm_sorted : Elapsed 0.012 ms (1.229 ms / 100) B = [4, 20, 16, 5] (stride (16, 320, 1, 64)) A = [40, 20, 16, 5] (stride (1, 40, 4000, 800)) dim = 0 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.39% +0.47%] index_select const : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.275 ( -0.08%) [ +0.16% +0.00% +0.00% / -0.08% +0.47% +0.55%] index_select wrap : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.00% +0.24% +0.16% / +0.16% +0.55% +0.78%] index_select linear : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_select reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.278 -> 1.276 ( -0.16%) [ +0.08% +0.08% +0.00% / -0.16% +0.55% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_select spread : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.47% +0.47%] index_select strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.71%] index_select strided 5 : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.39% +0.31%] index_select strided 7 : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.31%] index_select strided 8 : Elapsed 0.013 ms (1.278 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.23%] index_select strided 16 : Elapsed 0.013 ms (1.280 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.47% +0.31%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.39% +0.31%] index_select random_sorted : Elapsed 0.013 ms (1.279 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.47% +0.31%] index_select perm : Elapsed 0.013 ms (1.281 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select perm_sorted : Elapsed 0.013 ms (1.278 ms / 100) B = [4, 20, 16, 5] (stride (20, 1, 400, 80)) A = [40, 20, 16, 5] (stride (1600, 1, 20, 320)) dim = 0 1.239 -> 1.243 ( +0.32%) [ +0.32% +0.00% +0.16% / +0.32% +0.81% +0.73%] index_select const : Elapsed 0.012 ms (1.243 ms / 100) 1.238 -> 1.239 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.73% +0.65%] index_select wrap : Elapsed 0.012 ms (1.239 ms / 100) 1.245 -> 1.245 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.56% +0.40%] index_select linear : Elapsed 0.012 ms (1.246 ms / 100) 1.239 -> 1.239 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.65% +0.48%] index_select reverse : Elapsed 0.012 ms (1.241 ms / 100) 1.242 -> 1.243 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.48% +0.32%] index_select skip64 : Elapsed 0.012 ms (1.244 ms / 100) 1.245 -> 1.243 ( -0.16%) [ +0.24% +0.00% +0.00% / -0.16% +0.16% +0.16%] index_select skip256 : Elapsed 0.012 ms (1.248 ms / 100) 1.241 -> 1.243 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.48% +0.40%] index_select spread : Elapsed 0.012 ms (1.242 ms / 100) 1.239 -> 1.239 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.48% +0.48%] index_select strided 3 : Elapsed 0.012 ms (1.240 ms / 100) 1.239 -> 1.241 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.56% +0.65%] index_select strided 5 : Elapsed 0.012 ms (1.240 ms / 100) 1.238 -> 1.239 ( +0.08%) [ +0.24% +0.16% +0.00% / +0.08% +0.73% +0.57%] index_select strided 7 : Elapsed 0.012 ms (1.241 ms / 100) 1.233 -> 1.236 ( +0.24%) [ +0.24% +0.24% +0.00% / +0.24% +0.73% +0.89%] index_select strided 8 : Elapsed 0.012 ms (1.236 ms / 100) 1.233 -> 1.233 ( +0.00%) [ +0.24% +0.00% +0.08% / +0.00% +0.57% +0.49%] index_select strided 16 : Elapsed 0.012 ms (1.236 ms / 100) 1.248 -> 1.247 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.40% +0.24%] index_select random : Elapsed 0.012 ms (1.248 ms / 100) 1.245 -> 1.246 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.40% +0.48%] index_select random_sorted : Elapsed 0.012 ms (1.246 ms / 100) 1.244 -> 1.246 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.72% +0.88%] index_select perm : Elapsed 0.012 ms (1.246 ms / 100) 1.241 -> 1.244 ( +0.24%) [ +0.16% +0.32% +0.00% / +0.24% +0.48% +0.73%] index_select perm_sorted : Elapsed 0.012 ms (1.243 ms / 100) out_shape = [40, 4, 16, 5] in_shape = [40, 20, 16, 5] idx_dim = 1 B = [40, 4, 16, 5] (stride (320, 80, 5, 1)) A = [40, 20, 16, 5] (stride (1, 3200, 200, 40)) dim = 1 0.754 -> 0.756 ( +0.27%) [ +0.00% +0.53% +0.40% / +0.27% +2.65% +2.25%] index_select const : Elapsed 0.008 ms (0.754 ms / 100) 0.759 -> 0.759 ( +0.00%) [ +0.00% +0.00% +0.26% / +0.00% +1.32% +1.05%] index_select wrap : Elapsed 0.008 ms (0.759 ms / 100) 0.758 -> 0.762 ( +0.53%) [ +0.13% +0.00% +0.13% / +0.53% +1.32% +1.19%] index_select linear : Elapsed 0.008 ms (0.759 ms / 100) 0.759 -> 0.761 ( +0.26%) [ +0.13% +0.00% +1.45% / +0.26% +1.58% +1.71%] index_select reverse : Elapsed 0.008 ms (0.760 ms / 100) 0.759 -> 0.759 ( +0.00%) [ +0.13% +0.00% +0.26% / +0.00% +0.92% +0.92%] index_select skip64 : Elapsed 0.008 ms (0.760 ms / 100) 0.753 -> 0.756 ( +0.40%) [ +0.00% +0.00% +0.53% / +0.40% +2.79% +2.52%] index_select skip256 : Elapsed 0.008 ms (0.753 ms / 100) 0.754 -> 0.758 ( +0.53%) [ +0.00% +0.13% +0.13% / +0.53% +2.79% +2.39%] index_select spread : Elapsed 0.008 ms (0.754 ms / 100) 0.754 -> 0.754 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +3.18% +2.79%] index_select strided 3 : Elapsed 0.008 ms (0.754 ms / 100) 0.753 -> 0.755 ( +0.27%) [ +0.80% +0.53% +0.00% / +0.27% +2.79% +2.12%] index_select strided 5 : Elapsed 0.008 ms (0.759 ms / 100) 0.760 -> 0.761 ( +0.13%) [ +0.53% +0.00% +0.13% / +0.13% +1.05% +0.79%] index_select strided 7 : Elapsed 0.008 ms (0.764 ms / 100) 0.759 -> 0.759 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +1.05% +0.92%] index_select strided 8 : Elapsed 0.008 ms (0.760 ms / 100) 0.757 -> 0.757 ( +0.00%) [ +0.00% +0.40% +0.00% / +0.00% +1.72% +1.32%] index_select strided 16 : Elapsed 0.008 ms (0.757 ms / 100) 0.765 -> 0.767 ( +0.26%) [ +0.39% +0.26% +0.00% / +0.39% +0.78% +0.26%] index_select random : Elapsed 0.008 ms (0.768 ms / 100) 0.758 -> 0.762 ( +0.53%) [ +0.79% +0.40% +0.00% / +0.53% +1.32% +1.58%] index_select random_sorted : Elapsed 0.008 ms (0.764 ms / 100) 0.757 -> 0.762 ( +0.66%) [ +0.66% +0.92% +0.00% / +0.66% +1.32% +1.19%] index_select perm : Elapsed 0.008 ms (0.762 ms / 100) 0.752 -> 0.754 ( +0.27%) [ +0.66% +0.66% +0.00% / +0.27% +2.13% +1.86%] index_select perm_sorted : Elapsed 0.008 ms (0.757 ms / 100) B = [40, 4, 16, 5] (stride (320, 16, 1, 64)) A = [40, 20, 16, 5] (stride (1600, 1, 100, 20)) dim = 1 2.058 -> 2.054 ( -0.19%) [ +0.00% +0.15% +0.00% / -0.05% -0.19% -0.05%] index_select const : Elapsed 0.021 ms (2.058 ms / 100) 2.031 -> 2.035 ( +0.20%) [ +0.10% +0.00% +0.30% / +0.30% +0.20% +0.30%] index_select wrap : Elapsed 0.020 ms (2.033 ms / 100) 2.035 -> 2.034 ( -0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.29% -0.05%] index_select linear : Elapsed 0.020 ms (2.036 ms / 100) 2.044 -> 2.044 ( +0.00%) [ +0.00% +0.15% +0.10% / +0.15% +0.00% +0.15%] index_select reverse : Elapsed 0.020 ms (2.044 ms / 100) 2.056 -> 2.053 ( -0.15%) [ +0.15% +0.00% +0.05% / +0.15% -0.15% +0.15%] index_select skip64 : Elapsed 0.021 ms (2.059 ms / 100) 2.037 -> 2.033 ( -0.20%) [ +0.05% +0.00% +0.00% / +0.00% +0.00% -0.20%] index_select skip256 : Elapsed 0.020 ms (2.038 ms / 100) 2.091 -> 2.082 ( -0.43%) [ +0.00% +0.05% +0.00% / +0.00% -0.43% -0.38%] index_select spread : Elapsed 0.021 ms (2.091 ms / 100) 2.089 -> 2.088 ( -0.05%) [ +0.00% +0.05% +0.05% / +0.00% -0.05% -0.05%] index_select strided 3 : Elapsed 0.021 ms (2.089 ms / 100) 2.099 -> 2.089 ( -0.48%) [ +0.05% +0.00% +0.29% / +0.24% -0.48% -0.24%] index_select strided 5 : Elapsed 0.021 ms (2.100 ms / 100) 2.089 -> 2.078 ( -0.53%) [ +0.14% +0.14% +0.00% / +0.19% -0.53% -0.19%] index_select strided 7 : Elapsed 0.021 ms (2.092 ms / 100) 2.096 -> 2.088 ( -0.38%) [ +0.10% +0.00% +0.05% / -0.10% -0.10% -0.38%] index_select strided 8 : Elapsed 0.021 ms (2.098 ms / 100) 2.088 -> 2.085 ( -0.14%) [ +0.00% +0.24% +0.24% / +0.10% -0.14% +0.05%] index_select strided 16 : Elapsed 0.021 ms (2.088 ms / 100) 2.053 -> 2.052 ( -0.05%) [ +0.00% +0.19% +0.15% / -0.05% +0.00% +0.00%] index_select random : Elapsed 0.021 ms (2.053 ms / 100) 2.078 -> 2.074 ( -0.19%) [ +0.05% +0.10% +0.00% / +0.00% -0.05% -0.19%] index_select random_sorted : Elapsed 0.021 ms (2.079 ms / 100) 2.064 -> 2.061 ( -0.15%) [ +0.15% +0.29% +0.00% / +0.10% -0.05% -0.15%] index_select perm : Elapsed 0.021 ms (2.067 ms / 100) 2.074 -> 2.072 ( -0.10%) [ +0.34% +0.00% +0.14% / -0.10% -0.05% -0.10%] index_select perm_sorted : Elapsed 0.021 ms (2.081 ms / 100) B = [40, 4, 16, 5] (stride (80, 3200, 5, 1)) A = [40, 20, 16, 5] (stride (1, 3200, 200, 40)) dim = 1 0.759 -> 0.763 ( +0.53%) [ +0.13% +0.00% +0.40% / +0.53% +1.98% +1.58%] index_select const : Elapsed 0.008 ms (0.760 ms / 100) 0.761 -> 0.765 ( +0.53%) [ +0.79% +0.00% +0.53% / +0.53% +1.18% +1.45%] index_select wrap : Elapsed 0.008 ms (0.767 ms / 100) 0.766 -> 0.767 ( +0.13%) [ +0.00% +0.39% +0.00% / +0.13% +0.78% +0.78%] index_select linear : Elapsed 0.008 ms (0.766 ms / 100) 0.762 -> 0.767 ( +0.66%) [ +0.00% +0.39% +0.13% / +0.66% +1.84% +1.18%] index_select reverse : Elapsed 0.008 ms (0.762 ms / 100) 0.764 -> 0.767 ( +0.39%) [ +0.52% +0.26% +0.00% / +0.39% +1.18% +0.65%] index_select skip64 : Elapsed 0.008 ms (0.768 ms / 100) 0.757 -> 0.766 ( +1.19%) [ +0.00% +0.40% +0.13% / +1.19% +1.85% +1.72%] index_select skip256 : Elapsed 0.008 ms (0.757 ms / 100) 0.756 -> 0.757 ( +0.13%) [ +0.26% +0.00% +0.53% / +0.13% +2.38% +2.12%] index_select spread : Elapsed 0.008 ms (0.758 ms / 100) 0.758 -> 0.761 ( +0.40%) [ +0.13% +0.13% +0.00% / +0.40% +2.24% +2.24%] index_select strided 3 : Elapsed 0.008 ms (0.759 ms / 100) 0.755 -> 0.759 ( +0.53%) [ +0.53% +0.53% +0.00% / +0.53% +2.38% +2.25%] index_select strided 5 : Elapsed 0.008 ms (0.759 ms / 100) 0.758 -> 0.763 ( +0.66%) [ +0.13% +0.13% +0.00% / +0.66% +1.85% +1.85%] index_select strided 7 : Elapsed 0.008 ms (0.759 ms / 100) 0.757 -> 0.759 ( +0.26%) [ +0.00% +0.13% +0.66% / +0.26% +1.98% +1.59%] index_select strided 8 : Elapsed 0.008 ms (0.757 ms / 100) 0.758 -> 0.759 ( +0.13%) [ +0.40% +0.26% +0.00% / +0.13% +1.85% +1.85%] index_select strided 16 : Elapsed 0.008 ms (0.761 ms / 100) 0.758 -> 0.758 ( +0.00%) [ +0.53% +0.40% +0.00% / +0.00% +2.11% +1.98%] index_select random : Elapsed 0.008 ms (0.762 ms / 100) 0.755 -> 0.757 ( +0.26%) [ +0.53% +0.66% +0.00% / +0.26% +2.78% +2.52%] index_select random_sorted : Elapsed 0.008 ms (0.759 ms / 100) 0.758 -> 0.761 ( +0.40%) [ +0.13% +0.40% +0.00% / +0.40% +1.98% +1.98%] index_select perm : Elapsed 0.008 ms (0.759 ms / 100) 0.763 -> 0.767 ( +0.52%) [ +0.66% +0.39% +0.00% / +0.52% +1.70% +1.31%] index_select perm_sorted : Elapsed 0.008 ms (0.768 ms / 100) B = [40, 4, 16, 5] (stride (1, 3200, 200, 40)) A = [40, 20, 16, 5] (stride (1, 3200, 40, 640)) dim = 1 2.063 -> 2.068 ( +0.24%) [ +0.24% +0.19% +0.00% / +0.24% +0.58% +0.63%] index_select const : Elapsed 0.021 ms (2.068 ms / 100) 2.075 -> 2.076 ( +0.05%) [ +0.00% +0.14% +0.05% / +0.05% +0.53% +0.53%] index_select wrap : Elapsed 0.021 ms (2.075 ms / 100) 2.075 -> 2.077 ( +0.10%) [ +0.29% +0.14% +0.00% / +0.10% +0.53% +0.53%] index_select linear : Elapsed 0.021 ms (2.081 ms / 100) 2.074 -> 2.077 ( +0.14%) [ +0.39% +0.19% +0.00% / +0.14% +0.58% +0.63%] index_select reverse : Elapsed 0.021 ms (2.082 ms / 100) 2.066 -> 2.063 ( -0.15%) [ +0.15% +0.19% +0.00% / -0.15% +0.53% +0.58%] index_select skip64 : Elapsed 0.021 ms (2.069 ms / 100) 2.065 -> 2.067 ( +0.10%) [ +0.00% +0.05% +0.24% / +0.10% +0.53% +0.58%] index_select skip256 : Elapsed 0.021 ms (2.065 ms / 100) 2.073 -> 2.075 ( +0.10%) [ +0.05% +0.14% +0.00% / +0.10% +0.58% +0.63%] index_select spread : Elapsed 0.021 ms (2.074 ms / 100) 2.079 -> 2.079 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.43% +0.34%] index_select strided 3 : Elapsed 0.021 ms (2.079 ms / 100) 2.075 -> 2.082 ( +0.34%) [ +0.24% +0.00% +0.05% / +0.34% +0.48% +0.63%] index_select strided 5 : Elapsed 0.021 ms (2.080 ms / 100) 2.074 -> 2.078 ( +0.19%) [ +0.10% +0.19% +0.00% / +0.19% +0.77% +0.72%] index_select strided 7 : Elapsed 0.021 ms (2.076 ms / 100) 2.077 -> 2.081 ( +0.19%) [ +0.29% +0.00% +0.00% / +0.19% +0.58% +0.63%] index_select strided 8 : Elapsed 0.021 ms (2.083 ms / 100) 2.076 -> 2.078 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.58% +0.48%] index_select strided 16 : Elapsed 0.021 ms (2.077 ms / 100) 2.073 -> 2.077 ( +0.19%) [ +0.10% +0.00% +0.14% / +0.19% +0.43% +0.68%] index_select random : Elapsed 0.021 ms (2.075 ms / 100) 2.071 -> 2.073 ( +0.10%) [ +0.10% +0.19% +0.00% / +0.10% +0.77% +0.72%] index_select random_sorted : Elapsed 0.021 ms (2.073 ms / 100) 2.078 -> 2.083 ( +0.24%) [ +0.24% +0.19% +0.00% / +0.29% +0.29% +0.24%] index_select perm : Elapsed 0.021 ms (2.083 ms / 100) 2.079 -> 2.078 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.58% +0.29%] index_select perm_sorted : Elapsed 0.021 ms (2.080 ms / 100) B = [40, 4, 16, 5] (stride (1, 3200, 200, 40)) A = [40, 20, 16, 5] (stride (1, 640, 40, 12800)) dim = 1 2.058 -> 2.063 ( +0.24%) [ +0.44% +0.00% +0.24% / +0.24% +0.63% +0.63%] index_select const : Elapsed 0.021 ms (2.067 ms / 100) 2.069 -> 2.074 ( +0.24%) [ +0.05% +0.10% +0.00% / +0.24% +1.01% +0.87%] index_select wrap : Elapsed 0.021 ms (2.070 ms / 100) 2.073 -> 2.073 ( +0.00%) [ +0.05% +0.14% +0.00% / +0.00% +0.63% +0.68%] index_select linear : Elapsed 0.021 ms (2.074 ms / 100) 2.075 -> 2.077 ( +0.10%) [ +0.24% +0.00% +0.29% / +0.10% +0.58% +0.53%] index_select reverse : Elapsed 0.021 ms (2.080 ms / 100) 2.066 -> 2.067 ( +0.05%) [ +0.15% +0.00% +0.00% / +0.05% +0.44% +0.48%] index_select skip64 : Elapsed 0.021 ms (2.069 ms / 100) 2.060 -> 2.064 ( +0.19%) [ +0.00% +0.34% +0.24% / +0.19% +0.53% +0.63%] index_select skip256 : Elapsed 0.021 ms (2.060 ms / 100) 2.075 -> 2.077 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.58% +0.67%] index_select spread : Elapsed 0.021 ms (2.077 ms / 100) 2.074 -> 2.079 ( +0.24%) [ +0.48% +0.19% +0.00% / +0.24% +0.77% +0.68%] index_select strided 3 : Elapsed 0.021 ms (2.084 ms / 100) 2.080 -> 2.084 ( +0.19%) [ +0.05% +0.05% +0.00% / +0.29% +0.19% +0.43%] index_select strided 5 : Elapsed 0.021 ms (2.081 ms / 100) 2.076 -> 2.079 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.53% +0.77%] index_select strided 7 : Elapsed 0.021 ms (2.079 ms / 100) 2.072 -> 2.072 ( +0.00%) [ +0.10% +0.10% +0.00% / +0.00% +0.87% +0.77%] index_select strided 8 : Elapsed 0.021 ms (2.074 ms / 100) 2.072 -> 2.073 ( +0.05%) [ +0.34% +0.00% +0.14% / +0.05% +0.72% +0.87%] index_select strided 16 : Elapsed 0.021 ms (2.079 ms / 100) 2.068 -> 2.076 ( +0.39%) [ +0.15% +0.15% +0.00% / +0.39% +0.44% +0.73%] index_select random : Elapsed 0.021 ms (2.071 ms / 100) 2.069 -> 2.072 ( +0.14%) [ +0.19% +0.14% +0.00% / +0.29% +0.29% +0.14%] index_select random_sorted : Elapsed 0.021 ms (2.073 ms / 100) 2.077 -> 2.079 ( +0.10%) [ +0.19% +0.29% +0.00% / +0.10% +0.58% +0.67%] index_select perm : Elapsed 0.021 ms (2.081 ms / 100) 2.082 -> 2.080 ( -0.10%) [ +0.19% +0.00% +0.05% / -0.10% +0.43% +0.48%] index_select perm_sorted : Elapsed 0.021 ms (2.086 ms / 100) B = [40, 4, 16, 5] (stride (1, 3200, 40, 640)) A = [40, 20, 16, 5] (stride (80, 3200, 1, 16)) dim = 1 2.012 -> 2.021 ( +0.45%) [ +0.35% +0.50% +0.00% / +0.45% +1.19% +1.04%] index_select const : Elapsed 0.020 ms (2.019 ms / 100) 2.022 -> 2.022 ( +0.00%) [ +0.15% +0.25% +0.00% / +0.00% +1.04% +1.24%] index_select wrap : Elapsed 0.020 ms (2.025 ms / 100) 2.029 -> 2.034 ( +0.25%) [ +0.30% +0.00% +0.30% / +0.25% +1.03% +1.23%] index_select linear : Elapsed 0.020 ms (2.035 ms / 100) 2.032 -> 2.031 ( -0.05%) [ +0.20% +0.05% +0.00% / -0.05% +1.18% +1.13%] index_select reverse : Elapsed 0.020 ms (2.036 ms / 100) 2.020 -> 2.018 ( -0.10%) [ +0.05% +0.00% +0.05% / -0.10% +0.69% +0.84%] index_select skip64 : Elapsed 0.020 ms (2.021 ms / 100) 2.010 -> 2.013 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +1.00% +0.95%] index_select skip256 : Elapsed 0.020 ms (2.013 ms / 100) 2.035 -> 2.037 ( +0.10%) [ +0.00% +0.05% +0.34% / +0.10% +0.39% +0.59%] index_select spread : Elapsed 0.020 ms (2.035 ms / 100) 2.049 -> 2.053 ( +0.20%) [ +0.20% +0.10% +0.00% / +0.20% +0.73% +0.78%] index_select strided 3 : Elapsed 0.021 ms (2.053 ms / 100) 2.053 -> 2.056 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.29% +0.58%] index_select strided 5 : Elapsed 0.021 ms (2.053 ms / 100) 2.029 -> 2.037 ( +0.39%) [ +0.00% +0.59% +0.10% / +0.39% +1.38% +1.33%] index_select strided 7 : Elapsed 0.020 ms (2.029 ms / 100) 2.057 -> 2.056 ( -0.05%) [ +0.10% +0.19% +0.00% / -0.05% +0.49% +0.44%] index_select strided 8 : Elapsed 0.021 ms (2.059 ms / 100) 2.034 -> 2.042 ( +0.39%) [ +0.54% +0.15% +0.00% / +0.39% +0.54% +0.39%] index_select strided 16 : Elapsed 0.020 ms (2.045 ms / 100) 2.040 -> 2.040 ( +0.00%) [ +0.25% +0.00% +0.00% / +0.00% +0.29% +0.34%] index_select random : Elapsed 0.020 ms (2.045 ms / 100) 2.051 -> 2.054 ( +0.15%) [ +0.00% +0.15% +0.20% / +0.15% +0.44% +0.54%] index_select random_sorted : Elapsed 0.021 ms (2.051 ms / 100) 2.055 -> 2.052 ( -0.15%) [ +0.24% +0.00% +0.10% / +0.05% -0.05% -0.15%] index_select perm : Elapsed 0.021 ms (2.060 ms / 100) 2.045 -> 2.046 ( +0.05%) [ +0.29% +0.00% +0.05% / +0.10% +0.20% +0.05%] index_select perm_sorted : Elapsed 0.021 ms (2.051 ms / 100) B = [40, 4, 16, 5] (stride (1, 3200, 40, 640)) A = [40, 20, 16, 5] (stride (100, 5, 4000, 1)) dim = 1 2.074 -> 2.079 ( +0.24%) [ +0.19% +0.39% +0.00% / +0.24% +0.63% +0.53%] index_select const : Elapsed 0.021 ms (2.078 ms / 100) 2.115 -> 2.119 ( +0.19%) [ +0.05% +0.19% +0.00% / +0.19% +0.90% +0.38%] index_select wrap : Elapsed 0.021 ms (2.116 ms / 100) 2.110 -> 2.116 ( +0.28%) [ +0.19% +0.24% +0.00% / +0.28% +0.76% +0.66%] index_select linear : Elapsed 0.021 ms (2.114 ms / 100) 2.117 -> 2.111 ( -0.28%) [ +0.05% +0.00% +0.14% / -0.28% +0.14% +0.19%] index_select reverse : Elapsed 0.021 ms (2.118 ms / 100) 2.084 -> 2.082 ( -0.10%) [ +0.19% +0.05% +0.00% / -0.10% +0.67% +0.58%] index_select skip64 : Elapsed 0.021 ms (2.088 ms / 100) 2.079 -> 2.077 ( -0.10%) [ +0.00% +0.24% +0.19% / -0.10% +0.53% +0.63%] index_select skip256 : Elapsed 0.021 ms (2.079 ms / 100) 2.129 -> 2.131 ( +0.09%) [ +0.19% +0.28% +0.00% / +0.09% +0.42% +0.52%] index_select spread : Elapsed 0.021 ms (2.133 ms / 100) 2.134 -> 2.135 ( +0.05%) [ +0.23% +0.14% +0.00% / +0.33% +0.47% +0.05%] index_select strided 3 : Elapsed 0.021 ms (2.139 ms / 100) 2.130 -> 2.136 ( +0.28%) [ +0.28% +0.00% +0.00% / +0.28% +0.52% +0.66%] index_select strided 5 : Elapsed 0.021 ms (2.136 ms / 100) 2.124 -> 2.129 ( +0.24%) [ +0.24% +0.19% +0.00% / +0.24% +0.66% +0.61%] index_select strided 7 : Elapsed 0.021 ms (2.129 ms / 100) 2.133 -> 2.134 ( +0.05%) [ +0.14% +0.14% +0.00% / +0.05% +0.42% +0.38%] index_select strided 8 : Elapsed 0.021 ms (2.136 ms / 100) 2.132 -> 2.133 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.05% +0.56% +0.70%] index_select strided 16 : Elapsed 0.021 ms (2.133 ms / 100) 2.127 -> 2.132 ( +0.24%) [ +0.14% +0.28% +0.00% / +0.24% +0.89% +0.75%] index_select random : Elapsed 0.021 ms (2.130 ms / 100) 2.129 -> 2.135 ( +0.28%) [ +0.00% +0.28% +0.05% / +0.28% +0.70% +0.99%] index_select random_sorted : Elapsed 0.021 ms (2.129 ms / 100) 2.131 -> 2.129 ( -0.09%) [ +0.09% +0.00% +0.05% / -0.09% +0.52% +0.47%] index_select perm : Elapsed 0.021 ms (2.133 ms / 100) 2.139 -> 2.143 ( +0.19%) [ +0.19% +0.05% +0.00% / +0.33% +0.19% +0.33%] index_select perm_sorted : Elapsed 0.021 ms (2.143 ms / 100) B = [40, 4, 16, 5] (stride (4, 1, 800, 160)) A = [40, 20, 16, 5] (stride (20, 1, 800, 12800)) dim = 1 2.117 -> 2.120 ( +0.14%) [ +0.00% +0.09% +0.05% / +0.24% +0.38% +0.14%] index_select const : Elapsed 0.021 ms (2.117 ms / 100) 2.115 -> 2.114 ( -0.05%) [ +0.38% +0.00% +0.24% / +0.24% +0.24% -0.05%] index_select wrap : Elapsed 0.021 ms (2.123 ms / 100) 2.118 -> 2.116 ( -0.09%) [ +0.24% +0.28% +0.00% / +0.19% -0.09% +0.05%] index_select linear : Elapsed 0.021 ms (2.123 ms / 100) 2.119 -> 2.118 ( -0.05%) [ +0.14% +0.00% +0.00% / +0.00% +0.14% -0.05%] index_select reverse : Elapsed 0.021 ms (2.122 ms / 100) 2.116 -> 2.120 ( +0.19%) [ +0.19% +0.14% +0.00% / +0.19% +0.24% +0.24%] index_select skip64 : Elapsed 0.021 ms (2.120 ms / 100) 2.121 -> 2.120 ( -0.05%) [ +0.19% +0.00% +0.19% / +0.19% -0.05% +0.09%] index_select skip256 : Elapsed 0.021 ms (2.125 ms / 100) 2.177 -> 2.176 ( -0.05%) [ +0.00% +0.09% +0.00% / +0.00% -0.05% +0.00%] index_select spread : Elapsed 0.022 ms (2.177 ms / 100) 2.154 -> 2.148 ( -0.28%) [ +0.23% +0.19% +0.00% / +0.37% -0.23% -0.28%] index_select strided 3 : Elapsed 0.022 ms (2.159 ms / 100) 2.177 -> 2.174 ( -0.14%) [ +0.23% +0.00% +0.18% / +0.05% -0.14% +0.05%] index_select strided 5 : Elapsed 0.022 ms (2.182 ms / 100) 2.166 -> 2.163 ( -0.14%) [ +0.00% +0.37% +0.05% / -0.09% +0.09% -0.14%] index_select strided 7 : Elapsed 0.022 ms (2.166 ms / 100) 2.164 -> 2.160 ( -0.18%) [ +0.09% +0.14% +0.00% / +0.14% -0.18% -0.05%] index_select strided 8 : Elapsed 0.022 ms (2.166 ms / 100) 2.171 -> 2.170 ( -0.05%) [ +0.37% +0.14% +0.00% / +0.28% +0.37% -0.05%] index_select strided 16 : Elapsed 0.022 ms (2.179 ms / 100) 2.157 -> 2.152 ( -0.23%) [ +0.00% +0.09% +0.09% / -0.09% -0.23% -0.05%] index_select random : Elapsed 0.022 ms (2.157 ms / 100) 2.142 -> 2.145 ( +0.14%) [ +0.00% +0.14% +0.19% / +0.14% +0.14% +0.14%] index_select random_sorted : Elapsed 0.021 ms (2.142 ms / 100) 2.156 -> 2.157 ( +0.05%) [ +0.00% +0.19% +0.09% / +0.05% +0.28% +0.37%] index_select perm : Elapsed 0.022 ms (2.156 ms / 100) 2.153 -> 2.152 ( -0.05%) [ +0.33% +0.28% +0.00% / +0.09% -0.05% +0.19%] index_select perm_sorted : Elapsed 0.022 ms (2.160 ms / 100) out_shape = [40, 20, 4, 5] in_shape = [40, 20, 16, 5] idx_dim = 2 B = [40, 20, 4, 5] (stride (400, 1, 100, 20)) A = [40, 20, 16, 5] (stride (1600, 1, 100, 20)) dim = 2 2.379 -> 2.382 ( +0.13%) [ +0.00% +0.21% +0.17% / +0.13% +0.42% +0.38%] index_select const : Elapsed 0.024 ms (2.379 ms / 100) 2.423 -> 2.424 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.25% +0.45%] index_select wrap : Elapsed 0.024 ms (2.425 ms / 100) 2.422 -> 2.420 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.29% +0.29%] index_select linear : Elapsed 0.024 ms (2.424 ms / 100) 2.427 -> 2.428 ( +0.04%) [ +0.16% +0.12% +0.00% / +0.04% +0.45% +0.54%] index_select reverse : Elapsed 0.024 ms (2.431 ms / 100) 2.385 -> 2.385 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.42% +0.29%] index_select skip64 : Elapsed 0.024 ms (2.385 ms / 100) 2.377 -> 2.378 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.34% +0.34%] index_select skip256 : Elapsed 0.024 ms (2.381 ms / 100) 2.428 -> 2.427 ( -0.04%) [ +0.00% +0.12% +0.21% / -0.04% +0.29% +0.29%] index_select spread : Elapsed 0.024 ms (2.428 ms / 100) 2.432 -> 2.433 ( +0.04%) [ +0.12% +0.25% +0.00% / +0.04% +0.21% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.435 ms / 100) 2.426 -> 2.427 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.04% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.428 ms / 100) 2.431 -> 2.429 ( -0.08%) [ +0.12% +0.08% +0.00% / -0.08% +0.21% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.434 ms / 100) 2.393 -> 2.393 ( +0.00%) [ +0.00% +0.17% +0.17% / +0.00% +0.29% +0.42%] index_select strided 8 : Elapsed 0.024 ms (2.393 ms / 100) 2.430 -> 2.434 ( +0.16%) [ +0.29% +0.16% +0.00% / +0.16% +0.37% +0.41%] index_select random : Elapsed 0.024 ms (2.437 ms / 100) 2.431 -> 2.430 ( -0.04%) [ +0.12% +0.00% +0.00% / -0.04% +0.29% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.434 ms / 100) 2.427 -> 2.429 ( +0.08%) [ +0.21% +0.21% +0.00% / +0.08% +0.29% +0.21%] index_select perm : Elapsed 0.024 ms (2.432 ms / 100) 2.425 -> 2.430 ( +0.21%) [ +0.08% +0.08% +0.00% / +0.21% +0.29% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.427 ms / 100) B = [40, 20, 4, 5] (stride (400, 1, 20, 80)) A = [40, 20, 16, 5] (stride (100, 1, 4000, 20)) dim = 2 2.394 -> 2.398 ( +0.17%) [ +0.21% +0.00% +0.17% / +0.17% +0.17% +0.38%] index_select const : Elapsed 0.024 ms (2.399 ms / 100) 2.431 -> 2.438 ( +0.29%) [ +0.00% +0.21% +0.00% / +0.37% +0.29% +0.29%] index_select wrap : Elapsed 0.024 ms (2.431 ms / 100) 2.428 -> 2.429 ( +0.04%) [ +0.16% +0.21% +0.00% / +0.12% +0.12% +0.04%] index_select linear : Elapsed 0.024 ms (2.432 ms / 100) 2.430 -> 2.431 ( +0.04%) [ +0.21% +0.00% +0.00% / +0.04% +0.08% +0.08%] index_select reverse : Elapsed 0.024 ms (2.435 ms / 100) 2.387 -> 2.388 ( +0.04%) [ +0.13% +0.00% +0.00% / +0.17% +0.17% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.390 ms / 100) 2.394 -> 2.398 ( +0.17%) [ +0.00% +0.00% +0.21% / +0.17% +0.29% +0.46%] index_select skip256 : Elapsed 0.024 ms (2.394 ms / 100) 2.429 -> 2.431 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.33% +0.16%] index_select spread : Elapsed 0.024 ms (2.431 ms / 100) 2.429 -> 2.430 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.12% +0.04% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.432 ms / 100) 2.431 -> 2.436 ( +0.21%) [ +0.00% +0.16% +0.04% / +0.21% +0.45% +0.45%] index_select strided 5 : Elapsed 0.024 ms (2.431 ms / 100) 2.428 -> 2.430 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.37% +0.45%] index_select strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.402 -> 2.403 ( +0.04%) [ +0.17% +0.08% +0.00% / +0.04% +0.42% +0.42%] index_select strided 8 : Elapsed 0.024 ms (2.406 ms / 100) 2.427 -> 2.430 ( +0.12%) [ +0.00% +0.16% +0.00% / +0.12% +0.33% +0.33%] index_select random : Elapsed 0.024 ms (2.427 ms / 100) 2.426 -> 2.428 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.21% +0.49%] index_select random_sorted : Elapsed 0.024 ms (2.426 ms / 100) 2.427 -> 2.425 ( -0.08%) [ +0.12% +0.00% +0.00% / -0.08% +0.29% +0.25%] index_select perm : Elapsed 0.024 ms (2.430 ms / 100) 2.424 -> 2.424 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.33% +0.37%] index_select perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) B = [40, 20, 4, 5] (stride (100, 5, 4000, 1)) A = [40, 20, 16, 5] (stride (320, 16, 1, 12800)) dim = 2 1.150 -> 1.152 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +1.04% +1.57%] index_select const : Elapsed 0.012 ms (1.152 ms / 100) 1.149 -> 1.151 ( +0.17%) [ +0.35% +0.00% +0.17% / +0.17% +0.96% +1.48%] index_select wrap : Elapsed 0.012 ms (1.153 ms / 100) 1.150 -> 1.153 ( +0.26%) [ +0.17% +0.00% +0.09% / +0.26% +1.91% +1.13%] index_select linear : Elapsed 0.012 ms (1.152 ms / 100) 1.148 -> 1.148 ( +0.00%) [ +0.44% +0.00% +0.44% / +0.00% +1.31% +1.48%] index_select reverse : Elapsed 0.012 ms (1.153 ms / 100) 1.148 -> 1.150 ( +0.17%) [ +0.00% +0.17% +0.35% / +0.17% +1.39% +1.74%] index_select skip64 : Elapsed 0.011 ms (1.148 ms / 100) 1.149 -> 1.148 ( -0.09%) [ +0.09% +0.00% +0.26% / -0.09% +1.13% +1.39%] index_select skip256 : Elapsed 0.011 ms (1.150 ms / 100) 1.167 -> 1.170 ( +0.26%) [ +0.51% +0.00% +0.69% / +0.26% +1.63% +1.37%] index_select spread : Elapsed 0.012 ms (1.173 ms / 100) 1.164 -> 1.168 ( +0.34%) [ +0.00% +0.52% +0.43% / +0.34% +1.98% +1.89%] index_select strided 3 : Elapsed 0.012 ms (1.164 ms / 100) 1.168 -> 1.174 ( +0.51%) [ +0.43% +0.00% +0.34% / +0.51% +1.63% +1.97%] index_select strided 5 : Elapsed 0.012 ms (1.173 ms / 100) 1.171 -> 1.165 ( -0.51%) [ +0.43% +0.00% +0.26% / -0.51% +1.79% +1.45%] index_select strided 7 : Elapsed 0.012 ms (1.176 ms / 100) 1.183 -> 1.180 ( -0.25%) [ +0.08% +0.25% +0.00% / -0.25% +1.94% +1.86%] index_select strided 8 : Elapsed 0.012 ms (1.184 ms / 100) 1.183 -> 1.185 ( +0.17%) [ +0.17% +0.00% +0.25% / +0.17% +1.86% +1.69%] index_select random : Elapsed 0.012 ms (1.185 ms / 100) 1.173 -> 1.171 ( -0.17%) [ +0.00% +0.09% +0.09% / -0.17% +1.36% +1.11%] index_select random_sorted : Elapsed 0.012 ms (1.173 ms / 100) 1.172 -> 1.175 ( +0.26%) [ +0.60% +0.00% +0.17% / +0.26% +1.62% +1.28%] index_select perm : Elapsed 0.012 ms (1.179 ms / 100) 1.171 -> 1.177 ( +0.51%) [ +0.00% +0.60% +0.34% / +0.51% +1.54% +1.54%] index_select perm_sorted : Elapsed 0.012 ms (1.171 ms / 100) B = [40, 20, 4, 5] (stride (4, 160, 1, 3200)) A = [40, 20, 16, 5] (stride (1600, 5, 100, 1)) dim = 2 2.402 -> 2.405 ( +0.12%) [ +0.21% +0.00% +0.17% / +0.12% +0.12% +0.25%] index_select const : Elapsed 0.024 ms (2.407 ms / 100) 2.438 -> 2.443 ( +0.21%) [ +0.12% +0.25% +0.00% / +0.21% +0.29% +0.49%] index_select wrap : Elapsed 0.024 ms (2.441 ms / 100) 2.442 -> 2.440 ( -0.08%) [ +0.04% +0.12% +0.00% / -0.08% +0.08% +0.16%] index_select linear : Elapsed 0.024 ms (2.443 ms / 100) 2.437 -> 2.441 ( +0.16%) [ +0.29% +0.21% +0.00% / +0.16% +0.21% +0.25%] index_select reverse : Elapsed 0.024 ms (2.444 ms / 100) 2.393 -> 2.396 ( +0.13%) [ +0.13% +0.17% +0.00% / +0.13% +0.33% +0.46%] index_select skip64 : Elapsed 0.024 ms (2.396 ms / 100) 2.402 -> 2.400 ( -0.08%) [ +0.00% +0.00% +0.17% / +0.00% +0.08% -0.08%] index_select skip256 : Elapsed 0.024 ms (2.402 ms / 100) 2.437 -> 2.442 ( +0.21%) [ +0.25% +0.04% +0.00% / +0.21% +0.29% +0.21%] index_select spread : Elapsed 0.024 ms (2.443 ms / 100) 2.437 -> 2.439 ( +0.08%) [ +0.16% +0.04% +0.00% / +0.08% +0.29% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.441 ms / 100) 2.444 -> 2.442 ( -0.08%) [ +0.16% +0.00% +0.00% / -0.08% +0.20% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.448 ms / 100) 2.444 -> 2.442 ( -0.08%) [ +0.20% +0.04% +0.00% / -0.08% +0.08% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.449 ms / 100) 2.403 -> 2.401 ( -0.08%) [ +0.12% +0.00% +0.00% / -0.08% +0.33% +0.37%] index_select strided 8 : Elapsed 0.024 ms (2.406 ms / 100) 2.438 -> 2.440 ( +0.08%) [ +0.00% +0.21% +0.12% / +0.08% +0.16% +0.37%] index_select random : Elapsed 0.024 ms (2.438 ms / 100) 2.435 -> 2.438 ( +0.12%) [ +0.00% +0.04% +0.12% / +0.12% +0.49% +0.41%] index_select random_sorted : Elapsed 0.024 ms (2.435 ms / 100) 2.442 -> 2.441 ( -0.04%) [ +0.16% +0.00% +0.12% / -0.04% +0.08% +0.20%] index_select perm : Elapsed 0.024 ms (2.446 ms / 100) 2.440 -> 2.444 ( +0.16%) [ +0.00% +0.12% +0.20% / +0.16% +0.29% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.440 ms / 100) B = [40, 20, 4, 5] (stride (20, 1, 800, 3200)) A = [40, 20, 16, 5] (stride (1, 40, 4000, 800)) dim = 2 2.417 -> 2.419 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.08% +0.54% +0.41%] index_select const : Elapsed 0.024 ms (2.418 ms / 100) 2.427 -> 2.428 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.37% +0.29%] index_select wrap : Elapsed 0.024 ms (2.427 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.08% +0.00% +0.21% / +0.12% +0.45% +0.54%] index_select linear : Elapsed 0.024 ms (2.428 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.29% +0.08% +0.00% / +0.08% +0.58% +0.54%] index_select reverse : Elapsed 0.024 ms (2.429 ms / 100) 2.415 -> 2.415 ( +0.00%) [ +0.00% +0.21% +0.08% / +0.00% +0.54% +0.66%] index_select skip64 : Elapsed 0.024 ms (2.415 ms / 100) 2.416 -> 2.418 ( +0.08%) [ +0.25% +0.21% +0.00% / +0.08% +0.54% +0.58%] index_select skip256 : Elapsed 0.024 ms (2.422 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.00% +0.12% +0.00% / +0.08% +0.45% +0.50%] index_select spread : Elapsed 0.024 ms (2.423 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +0.37% +0.54%] index_select strided 3 : Elapsed 0.024 ms (2.428 ms / 100) 2.426 -> 2.426 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.45% +0.58%] index_select strided 5 : Elapsed 0.024 ms (2.426 ms / 100) 2.424 -> 2.425 ( +0.04%) [ +0.00% +0.17% +0.04% / +0.04% +0.29% +0.45%] index_select strided 7 : Elapsed 0.024 ms (2.424 ms / 100) 2.417 -> 2.416 ( -0.04%) [ +0.00% +0.04% +0.08% / -0.04% +0.29% +0.41%] index_select strided 8 : Elapsed 0.024 ms (2.417 ms / 100) 2.423 -> 2.422 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.33% +0.25%] index_select random : Elapsed 0.024 ms (2.424 ms / 100) 2.421 -> 2.425 ( +0.17%) [ +0.29% +0.00% +0.17% / +0.17% +0.50% +0.41%] index_select random_sorted : Elapsed 0.024 ms (2.428 ms / 100) 2.423 -> 2.427 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.58% +0.41%] index_select perm : Elapsed 0.024 ms (2.427 ms / 100) 2.424 -> 2.427 ( +0.12%) [ +0.08% +0.17% +0.00% / +0.12% +0.41% +0.45%] index_select perm_sorted : Elapsed 0.024 ms (2.426 ms / 100) out_shape = [40, 20, 16, 4] in_shape = [40, 20, 16, 5] idx_dim = 3 B = [40, 20, 16, 4] (stride (1280, 1, 80, 20)) A = [40, 20, 16, 5] (stride (5, 200, 4000, 1)) dim = 3 6.016 -> 6.019 ( +0.05%) [ +0.00% +0.07% +0.10% / +0.05% +0.15% +0.23%] index_select const : Elapsed 0.060 ms (6.016 ms / 100) 6.019 -> 6.023 ( +0.07%) [ +0.02% +0.00% +0.08% / +0.10% +0.08% +0.07%] index_select wrap : Elapsed 0.060 ms (6.020 ms / 100) 6.018 -> 6.022 ( +0.07%) [ +0.00% +0.08% +0.00% / +0.07% +0.15% +0.25%] index_select linear : Elapsed 0.060 ms (6.018 ms / 100) 6.011 -> 6.020 ( +0.15%) [ +0.07% +0.00% +0.05% / +0.15% +0.15% +0.22%] index_select reverse : Elapsed 0.060 ms (6.015 ms / 100) 6.017 -> 6.021 ( +0.07%) [ +0.13% +0.00% +0.05% / +0.07% +0.07% +0.15%] index_select skip64 : Elapsed 0.060 ms (6.025 ms / 100) 6.019 -> 6.024 ( +0.08%) [ +0.13% +0.00% +0.07% / +0.12% +0.08% +0.22%] index_select skip256 : Elapsed 0.060 ms (6.027 ms / 100) 6.015 -> 6.019 ( +0.07%) [ +0.15% +0.00% +0.18% / +0.07% +0.25% +0.20%] index_select spread : Elapsed 0.060 ms (6.024 ms / 100) 6.016 -> 6.023 ( +0.12%) [ +0.07% +0.00% +0.08% / +0.12% +0.23% +0.27%] index_select strided 3 : Elapsed 0.060 ms (6.020 ms / 100) 6.016 -> 6.020 ( +0.07%) [ +0.00% +0.07% +0.10% / +0.12% +0.07% +0.15%] index_select random : Elapsed 0.060 ms (6.016 ms / 100) 6.015 -> 6.025 ( +0.17%) [ +0.05% +0.00% +0.12% / +0.18% +0.17% +0.30%] index_select random_sorted : Elapsed 0.060 ms (6.018 ms / 100) 6.015 -> 6.015 ( +0.00%) [ +0.00% +0.08% +0.17% / +0.00% +0.08% +0.22%] index_select perm : Elapsed 0.060 ms (6.015 ms / 100) 6.017 -> 6.016 ( -0.02%) [ +0.05% +0.00% +0.00% / -0.02% +0.20% +0.25%] index_select perm_sorted : Elapsed 0.060 ms (6.020 ms / 100) B = [40, 20, 16, 4] (stride (1280, 1, 80, 20)) A = [40, 20, 16, 5] (stride (1, 40, 4000, 800)) dim = 3 5.752 -> 5.747 ( -0.09%) [ +0.14% +0.00% +0.05% / -0.02% -0.09% -0.02%] index_select const : Elapsed 0.058 ms (5.760 ms / 100) 5.814 -> 5.813 ( -0.02%) [ +0.22% +0.12% +0.00% / +0.19% +0.03% -0.02%] index_select wrap : Elapsed 0.058 ms (5.827 ms / 100) 5.818 -> 5.812 ( -0.10%) [ +0.12% +0.00% +0.12% / +0.09% -0.03% -0.10%] index_select linear : Elapsed 0.058 ms (5.825 ms / 100) 5.836 -> 5.818 ( -0.31%) [ +0.07% +0.00% +0.17% / +0.22% -0.31% -0.29%] index_select reverse : Elapsed 0.058 ms (5.840 ms / 100) 5.756 -> 5.745 ( -0.19%) [ +0.00% +0.00% +0.10% / +0.00% -0.12% -0.19%] index_select skip64 : Elapsed 0.058 ms (5.756 ms / 100) 5.746 -> 5.749 ( +0.05%) [ +0.14% +0.17% +0.00% / +0.10% +0.12% +0.05%] index_select skip256 : Elapsed 0.058 ms (5.754 ms / 100) 5.820 -> 5.804 ( -0.27%) [ +0.03% +0.00% +0.05% / +0.07% -0.27% -0.03%] index_select spread : Elapsed 0.058 ms (5.822 ms / 100) 5.820 -> 5.801 ( -0.33%) [ +0.00% +0.10% +0.12% / +0.14% -0.24% -0.33%] index_select strided 3 : Elapsed 0.058 ms (5.820 ms / 100) 5.797 -> 5.776 ( -0.36%) [ +0.14% +0.00% +0.14% / +0.14% -0.36% -0.35%] index_select random : Elapsed 0.058 ms (5.805 ms / 100) 5.783 -> 5.765 ( -0.31%) [ +0.00% +0.03% +0.17% / +0.22% -0.31% -0.17%] index_select random_sorted : Elapsed 0.058 ms (5.783 ms / 100) 5.817 -> 5.805 ( -0.21%) [ +0.03% +0.09% +0.00% / +0.07% -0.21% -0.09%] index_select perm : Elapsed 0.058 ms (5.819 ms / 100) 5.816 -> 5.802 ( -0.24%) [ +0.00% +0.17% +0.10% / +0.05% -0.22% -0.24%] index_select perm_sorted : Elapsed 0.058 ms (5.816 ms / 100) B = [40, 20, 16, 4] (stride (1280, 1, 20, 320)) A = [40, 20, 16, 5] (stride (100, 5, 4000, 1)) dim = 3 5.736 -> 5.706 ( -0.52%) [ +0.00% +0.02% +0.02% / +0.03% -0.37% -0.52%] index_select const : Elapsed 0.057 ms (5.736 ms / 100) 5.732 -> 5.707 ( -0.44%) [ +0.17% +0.00% +0.12% / +0.00% -0.44% -0.37%] index_select wrap : Elapsed 0.057 ms (5.742 ms / 100) 5.732 -> 5.706 ( -0.45%) [ +0.00% +0.12% +0.17% / +0.14% -0.45% -0.42%] index_select linear : Elapsed 0.057 ms (5.732 ms / 100) 5.724 -> 5.707 ( -0.30%) [ +0.24% +0.00% +0.19% / +0.24% -0.30% -0.19%] index_select reverse : Elapsed 0.057 ms (5.738 ms / 100) 5.731 -> 5.704 ( -0.47%) [ +0.12% +0.00% +0.07% / +0.14% -0.47% -0.37%] index_select skip64 : Elapsed 0.057 ms (5.738 ms / 100) 5.734 -> 5.709 ( -0.44%) [ +0.14% +0.05% +0.00% / +0.12% -0.42% -0.44%] index_select skip256 : Elapsed 0.057 ms (5.742 ms / 100) 5.735 -> 5.708 ( -0.47%) [ +0.03% +0.14% +0.00% / +0.02% -0.47% -0.42%] index_select spread : Elapsed 0.057 ms (5.737 ms / 100) 5.735 -> 5.707 ( -0.49%) [ +0.02% +0.00% +0.07% / +0.14% -0.47% -0.49%] index_select strided 3 : Elapsed 0.057 ms (5.736 ms / 100) 5.733 -> 5.704 ( -0.51%) [ +0.02% +0.00% +0.12% / +0.05% -0.51% -0.38%] index_select random : Elapsed 0.057 ms (5.734 ms / 100) 5.724 -> 5.702 ( -0.38%) [ +0.00% +0.12% +0.07% / +0.14% -0.23% -0.38%] index_select random_sorted : Elapsed 0.057 ms (5.724 ms / 100) 5.730 -> 5.705 ( -0.44%) [ +0.00% +0.03% +0.14% / +0.00% -0.44% -0.37%] index_select perm : Elapsed 0.057 ms (5.730 ms / 100) 5.733 -> 5.706 ( -0.47%) [ +0.00% +0.07% +0.05% / +0.17% -0.35% -0.47%] index_select perm_sorted : Elapsed 0.057 ms (5.733 ms / 100) B = [40, 20, 16, 4] (stride (64, 2560, 1, 16)) A = [40, 20, 16, 5] (stride (16, 3200, 1, 640)) dim = 3 5.697 -> 5.704 ( +0.12%) [ +0.00% +0.02% +0.00% / +0.12% +0.32% +0.18%] index_select const : Elapsed 0.057 ms (5.697 ms / 100) 5.805 -> 5.801 ( -0.07%) [ +0.00% +0.05% +0.03% / -0.02% +0.00% -0.07%] index_select wrap : Elapsed 0.058 ms (5.805 ms / 100) 5.802 -> 5.794 ( -0.14%) [ +0.03% +0.00% +0.02% / +0.05% -0.09% -0.14%] index_select linear : Elapsed 0.058 ms (5.804 ms / 100) 5.808 -> 5.793 ( -0.26%) [ +0.03% +0.07% +0.00% / +0.12% -0.26% -0.17%] index_select reverse : Elapsed 0.058 ms (5.810 ms / 100) 5.687 -> 5.690 ( +0.05%) [ +0.00% +0.07% +0.16% / +0.14% +0.14% +0.05%] index_select skip64 : Elapsed 0.057 ms (5.687 ms / 100) 5.692 -> 5.707 ( +0.26%) [ +0.00% +0.09% +0.00% / +0.26% +0.26% +0.30%] index_select skip256 : Elapsed 0.057 ms (5.692 ms / 100) 5.797 -> 5.789 ( -0.14%) [ +0.00% +0.05% +0.02% / +0.21% +0.00% -0.14%] index_select spread : Elapsed 0.058 ms (5.797 ms / 100) 5.791 -> 5.787 ( -0.07%) [ +0.00% +0.05% +0.02% / +0.07% -0.07% +0.05%] index_select strided 3 : Elapsed 0.058 ms (5.791 ms / 100) 5.795 -> 5.782 ( -0.22%) [ +0.00% +0.09% +0.03% / +0.12% -0.22% -0.09%] index_select random : Elapsed 0.058 ms (5.795 ms / 100) 5.799 -> 5.788 ( -0.19%) [ +0.00% +0.14% +0.10% / +0.14% -0.16% -0.19%] index_select random_sorted : Elapsed 0.058 ms (5.799 ms / 100) 5.795 -> 5.795 ( +0.00%) [ +0.03% +0.00% +0.09% / +0.00% +0.05% +0.05%] index_select perm : Elapsed 0.058 ms (5.797 ms / 100) 5.791 -> 5.793 ( +0.03%) [ +0.14% +0.09% +0.00% / +0.16% +0.07% +0.03%] index_select perm_sorted : Elapsed 0.058 ms (5.799 ms / 100) B = [40, 20, 16, 4] (stride (4, 2560, 160, 1)) A = [40, 20, 16, 5] (stride (16, 640, 1, 12800)) dim = 3 5.484 -> 5.488 ( +0.07%) [ +0.07% +0.00% +0.04% / +0.07% +0.47% +0.51%] index_select const : Elapsed 0.055 ms (5.488 ms / 100) 5.575 -> 5.586 ( +0.20%) [ +0.11% +0.00% +0.14% / +0.30% +0.20% +0.29%] index_select wrap : Elapsed 0.056 ms (5.581 ms / 100) 5.580 -> 5.583 ( +0.05%) [ +0.05% +0.07% +0.00% / +0.14% +0.13% +0.05%] index_select linear : Elapsed 0.056 ms (5.583 ms / 100) 5.577 -> 5.572 ( -0.09%) [ +0.13% +0.00% +0.13% / +0.25% +0.09% -0.09%] index_select reverse : Elapsed 0.056 ms (5.584 ms / 100) 5.473 -> 5.484 ( +0.20%) [ +0.05% +0.00% +0.13% / +0.20% +0.42% +0.55%] index_select skip64 : Elapsed 0.055 ms (5.476 ms / 100) 5.481 -> 5.483 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.04% +0.35% +0.38%] index_select skip256 : Elapsed 0.055 ms (5.488 ms / 100) 5.581 -> 5.584 ( +0.05%) [ +0.07% +0.00% +0.02% / +0.07% +0.05% +0.18%] index_select spread : Elapsed 0.056 ms (5.585 ms / 100) 5.568 -> 5.571 ( +0.05%) [ +0.00% +0.04% +0.04% / +0.05% +0.11% +0.18%] index_select strided 3 : Elapsed 0.056 ms (5.568 ms / 100) 5.556 -> 5.534 ( -0.40%) [ +0.04% +0.00% +0.09% / +0.00% -0.40% -0.27%] index_select random : Elapsed 0.056 ms (5.558 ms / 100) 5.556 -> 5.546 ( -0.18%) [ +0.02% +0.00% +0.18% / +0.05% -0.18% -0.18%] index_select random_sorted : Elapsed 0.056 ms (5.557 ms / 100) 5.580 -> 5.579 ( -0.02%) [ +0.09% +0.11% +0.00% / +0.07% -0.02% +0.16%] index_select perm : Elapsed 0.056 ms (5.585 ms / 100) 5.571 -> 5.573 ( +0.04%) [ +0.00% +0.02% +0.09% / +0.04% +0.27% +0.16%] index_select perm_sorted : Elapsed 0.056 ms (5.571 ms / 100) B = [40, 20, 16, 4] (stride (16, 2560, 1, 640)) A = [40, 20, 16, 5] (stride (1, 40, 4000, 800)) dim = 3 5.676 -> 5.669 ( -0.12%) [ +0.07% +0.00% +0.04% / +0.11% -0.12% -0.07%] index_select const : Elapsed 0.057 ms (5.680 ms / 100) 5.736 -> 5.735 ( -0.02%) [ +0.05% +0.14% +0.00% / +0.03% +0.03% -0.02%] index_select wrap : Elapsed 0.057 ms (5.739 ms / 100) 5.727 -> 5.743 ( +0.28%) [ +0.26% +0.00% +0.33% / +0.35% +0.30% +0.28%] index_select linear : Elapsed 0.057 ms (5.742 ms / 100) 5.765 -> 5.741 ( -0.42%) [ +0.12% +0.00% +0.10% / +0.17% -0.42% -0.31%] index_select reverse : Elapsed 0.058 ms (5.772 ms / 100) 5.676 -> 5.665 ( -0.19%) [ +0.05% +0.00% +0.05% / +0.00% +0.02% -0.19%] index_select skip64 : Elapsed 0.057 ms (5.679 ms / 100) 5.675 -> 5.671 ( -0.07%) [ +0.00% +0.04% +0.05% / -0.02% -0.07% -0.02%] index_select skip256 : Elapsed 0.057 ms (5.675 ms / 100) 5.738 -> 5.733 ( -0.09%) [ +0.02% +0.00% +0.12% / +0.03% +0.00% -0.09%] index_select spread : Elapsed 0.057 ms (5.739 ms / 100) 5.738 -> 5.733 ( -0.09%) [ +0.02% +0.00% +0.05% / +0.16% -0.09% -0.09%] index_select strided 3 : Elapsed 0.057 ms (5.739 ms / 100) 5.712 -> 5.701 ( -0.19%) [ +0.00% +0.02% +0.02% / +0.12% -0.14% -0.19%] index_select random : Elapsed 0.057 ms (5.712 ms / 100) 5.699 -> 5.680 ( -0.33%) [ +0.00% +0.00% +0.05% / -0.07% -0.33% -0.32%] index_select random_sorted : Elapsed 0.057 ms (5.699 ms / 100) 5.754 -> 5.760 ( +0.10%) [ +0.00% +0.05% +0.09% / +0.23% +0.10% +0.26%] index_select perm : Elapsed 0.058 ms (5.754 ms / 100) 5.741 -> 5.741 ( +0.00%) [ +0.00% +0.03% +0.09% / +0.00% +0.05% +0.07%] index_select perm_sorted : Elapsed 0.057 ms (5.741 ms / 100) B = [40, 20, 16, 4] (stride (20, 1, 3200, 800)) A = [40, 20, 16, 5] (stride (1600, 1, 20, 320)) dim = 3 5.449 -> 5.460 ( +0.20%) [ +0.04% +0.00% +0.24% / +0.20% +0.53% +0.46%] index_select const : Elapsed 0.055 ms (5.451 ms / 100) 5.508 -> 5.509 ( +0.02%) [ +0.05% +0.00% +0.16% / +0.04% +0.02% +0.07%] index_select wrap : Elapsed 0.055 ms (5.511 ms / 100) 5.507 -> 5.509 ( +0.04%) [ +0.00% +0.20% +0.07% / +0.04% +0.05% +0.07%] index_select linear : Elapsed 0.055 ms (5.507 ms / 100) 5.505 -> 5.503 ( -0.04%) [ +0.05% +0.00% +0.11% / +0.22% -0.04% +0.02%] index_select reverse : Elapsed 0.055 ms (5.508 ms / 100) 5.459 -> 5.460 ( +0.02%) [ +0.04% +0.00% +0.05% / +0.02% +0.33% +0.31%] index_select skip64 : Elapsed 0.055 ms (5.461 ms / 100) 5.455 -> 5.456 ( +0.02%) [ +0.04% +0.00% +0.07% / +0.02% +0.46% +0.31%] index_select skip256 : Elapsed 0.055 ms (5.457 ms / 100) 5.510 -> 5.515 ( +0.09%) [ +0.00% +0.13% +0.16% / +0.09% +0.13% +0.13%] index_select spread : Elapsed 0.055 ms (5.510 ms / 100) 5.510 -> 5.512 ( +0.04%) [ +0.15% +0.00% +0.09% / +0.11% +0.05% +0.04%] index_select strided 3 : Elapsed 0.055 ms (5.518 ms / 100) 5.509 -> 5.503 ( -0.11%) [ +0.00% +0.07% +0.09% / +0.16% -0.04% -0.11%] index_select random : Elapsed 0.055 ms (5.509 ms / 100) 5.506 -> 5.504 ( -0.04%) [ +0.09% +0.00% +0.22% / +0.11% +0.16% -0.04%] index_select random_sorted : Elapsed 0.055 ms (5.511 ms / 100) 5.517 -> 5.510 ( -0.13%) [ +0.11% +0.00% +0.05% / +0.11% -0.13% -0.09%] index_select perm : Elapsed 0.055 ms (5.523 ms / 100) 5.513 -> 5.521 ( +0.15%) [ +0.04% +0.00% +0.20% / +0.16% +0.15% +0.15%] index_select perm_sorted : Elapsed 0.055 ms (5.515 ms / 100) B = [40, 20, 16, 4] (stride (320, 1, 20, 12800)) A = [40, 20, 16, 5] (stride (20, 1, 4000, 800)) dim = 3 5.511 -> 5.489 ( -0.40%) [ +0.00% +0.05% +0.15% / +0.05% -0.33% -0.40%] index_select const : Elapsed 0.055 ms (5.511 ms / 100) 5.568 -> 5.566 ( -0.04%) [ +0.18% +0.04% +0.00% / +0.02% -0.04% -0.02%] index_select wrap : Elapsed 0.056 ms (5.578 ms / 100) 5.569 -> 5.565 ( -0.07%) [ +0.09% +0.00% +0.00% / +0.00% -0.07% +0.02%] index_select linear : Elapsed 0.056 ms (5.574 ms / 100) 5.595 -> 5.586 ( -0.16%) [ +0.18% +0.00% +0.13% / +0.13% -0.11% -0.16%] index_select reverse : Elapsed 0.056 ms (5.605 ms / 100) 5.511 -> 5.494 ( -0.31%) [ +0.00% +0.09% +0.04% / +0.07% -0.31% -0.25%] index_select skip64 : Elapsed 0.055 ms (5.511 ms / 100) 5.507 -> 5.495 ( -0.22%) [ +0.00% +0.24% +0.07% / +0.22% -0.22% -0.20%] index_select skip256 : Elapsed 0.055 ms (5.507 ms / 100) 5.569 -> 5.562 ( -0.13%) [ +0.00% +0.09% +0.02% / +0.05% -0.11% -0.13%] index_select spread : Elapsed 0.056 ms (5.569 ms / 100) 5.581 -> 5.558 ( -0.41%) [ +0.07% +0.02% +0.00% / +0.04% -0.41% -0.39%] index_select strided 3 : Elapsed 0.056 ms (5.585 ms / 100) 5.536 -> 5.541 ( +0.09%) [ +0.00% +0.02% +0.14% / +0.09% +0.76% +0.83%] index_select random : Elapsed 0.055 ms (5.536 ms / 100) 5.525 -> 5.528 ( +0.05%) [ +0.07% +0.00% +0.04% / +0.05% +0.42% +0.38%] index_select random_sorted : Elapsed 0.055 ms (5.529 ms / 100) 5.566 -> 5.558 ( -0.14%) [ +0.00% +0.13% +0.13% / +0.22% -0.14% -0.14%] index_select perm : Elapsed 0.056 ms (5.566 ms / 100) 5.571 -> 5.553 ( -0.32%) [ +0.02% +0.25% +0.00% / +0.04% -0.20% -0.32%] index_select perm_sorted : Elapsed 0.056 ms (5.572 ms / 100) out_shape = [5, 16, 20, 40] in_shape = [4, 16, 20, 40] idx_dim = 0 B = [5, 16, 20, 40] (stride (12800, 40, 640, 1)) A = [4, 16, 20, 40] (stride (40, 3200, 160, 1)) dim = 0 5.621 -> 5.592 ( -0.52%) [ +0.09% +0.00% +0.09% / +0.16% -0.52% -0.52%] index_add_ linear : Elapsed 0.056 ms (5.626 ms / 100) 5.552 -> 5.522 ( -0.54%) [ +0.18% +0.00% +0.22% / +0.16% -0.54% -0.41%] index_copy_ linear : Elapsed 0.056 ms (5.562 ms / 100) 5.607 -> 5.592 ( -0.27%) [ +0.00% +0.16% +0.20% / +0.23% -0.27% -0.20%] index_add_ reverse : Elapsed 0.056 ms (5.607 ms / 100) 5.553 -> 5.526 ( -0.49%) [ +0.00% +0.00% +0.04% / +0.04% -0.36% -0.49%] index_copy_ reverse : Elapsed 0.056 ms (5.553 ms / 100) 5.619 -> 5.584 ( -0.62%) [ +0.00% +0.04% +0.09% / -0.02% -0.53% -0.62%] index_add_ spread : Elapsed 0.056 ms (5.619 ms / 100) 5.550 -> 5.518 ( -0.58%) [ +0.07% +0.05% +0.00% / +0.02% -0.34% -0.58%] index_copy_ spread : Elapsed 0.056 ms (5.554 ms / 100) 5.609 -> 5.602 ( -0.12%) [ +0.04% +0.00% +0.18% / +0.11% -0.12% -0.11%] index_add_ strided 3 : Elapsed 0.056 ms (5.611 ms / 100) 5.550 -> 5.526 ( -0.43%) [ +0.00% +0.00% +0.00% / -0.07% -0.31% -0.43%] index_copy_ strided 3 : Elapsed 0.055 ms (5.550 ms / 100) 5.608 -> 5.596 ( -0.21%) [ +0.07% +0.00% +0.00% / +0.00% -0.16% -0.21%] index_add_ perm : Elapsed 0.056 ms (5.612 ms / 100) 5.547 -> 5.537 ( -0.18%) [ +0.09% +0.00% +0.04% / +0.05% -0.18% -0.16%] index_copy_ perm : Elapsed 0.056 ms (5.552 ms / 100) 5.616 -> 5.586 ( -0.53%) [ +0.04% +0.00% +0.20% / +0.02% -0.50% -0.53%] index_add_ perm_sorted : Elapsed 0.056 ms (5.618 ms / 100) 5.556 -> 5.527 ( -0.52%) [ +0.07% +0.00% +0.07% / +0.04% -0.52% -0.41%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.560 ms / 100) 5.755 -> 5.739 ( -0.28%) [ +0.03% +0.00% +0.03% / -0.02% -0.28% -0.23%] index_select const : Elapsed 0.058 ms (5.757 ms / 100) 5.863 -> 5.827 ( -0.61%) [ +0.00% +0.14% +0.05% / +0.05% -0.55% -0.61%] index_select wrap : Elapsed 0.059 ms (5.863 ms / 100) 5.848 -> 5.822 ( -0.44%) [ +0.00% +0.07% +0.07% / -0.03% -0.29% -0.44%] index_select linear : Elapsed 0.058 ms (5.848 ms / 100) 5.858 -> 5.832 ( -0.44%) [ +0.00% +0.03% +0.10% / +0.14% -0.44% -0.31%] index_select reverse : Elapsed 0.059 ms (5.858 ms / 100) 5.753 -> 5.727 ( -0.45%) [ +0.03% +0.00% +0.09% / -0.02% -0.37% -0.45%] index_select skip64 : Elapsed 0.058 ms (5.755 ms / 100) 5.757 -> 5.744 ( -0.23%) [ +0.03% +0.00% +0.02% / +0.07% -0.09% -0.23%] index_select skip256 : Elapsed 0.058 ms (5.759 ms / 100) 5.863 -> 5.828 ( -0.60%) [ +0.05% +0.00% +0.09% / -0.05% -0.60% -0.46%] index_select spread : Elapsed 0.059 ms (5.866 ms / 100) 5.861 -> 5.837 ( -0.41%) [ +0.00% +0.22% +0.15% / +0.15% -0.41% -0.36%] index_select strided 3 : Elapsed 0.059 ms (5.861 ms / 100) 5.859 -> 5.829 ( -0.51%) [ +0.00% +0.03% +0.14% / -0.02% -0.36% -0.51%] index_select random : Elapsed 0.059 ms (5.859 ms / 100) 5.850 -> 5.827 ( -0.39%) [ +0.21% +0.10% +0.00% / +0.02% -0.34% -0.39%] index_select random_sorted : Elapsed 0.059 ms (5.862 ms / 100) B = [5, 16, 20, 40] (stride (12800, 20, 1, 320)) A = [4, 16, 20, 40] (stride (16, 1, 64, 1280)) dim = 0 5.636 -> 5.644 ( +0.14%) [ +0.00% +0.12% +0.23% / +0.20% +0.25% +0.14%] index_add_ linear : Elapsed 0.056 ms (5.636 ms / 100) 5.589 -> 5.602 ( +0.23%) [ +0.00% +0.18% +0.18% / +0.25% +0.32% +0.23%] index_copy_ linear : Elapsed 0.056 ms (5.589 ms / 100) 5.634 -> 5.641 ( +0.12%) [ +0.05% +0.00% +0.30% / +0.12% +0.37% +0.32%] index_add_ reverse : Elapsed 0.056 ms (5.637 ms / 100) 5.579 -> 5.590 ( +0.20%) [ +0.18% +0.00% +0.32% / +0.20% +0.45% +0.61%] index_copy_ reverse : Elapsed 0.056 ms (5.589 ms / 100) 5.635 -> 5.640 ( +0.09%) [ +0.00% +0.16% +0.28% / +0.11% +0.20% +0.09%] index_add_ spread : Elapsed 0.056 ms (5.635 ms / 100) 5.595 -> 5.598 ( +0.05%) [ +0.00% +0.04% +0.16% / +0.05% +0.11% +0.14%] index_copy_ spread : Elapsed 0.056 ms (5.595 ms / 100) 5.637 -> 5.651 ( +0.25%) [ +0.00% +0.18% +0.20% / +0.25% +0.44% +0.59%] index_add_ strided 3 : Elapsed 0.056 ms (5.637 ms / 100) 5.590 -> 5.610 ( +0.36%) [ +0.00% +0.02% +0.13% / +0.36% +0.59% +0.45%] index_copy_ strided 3 : Elapsed 0.056 ms (5.590 ms / 100) 5.649 -> 5.656 ( +0.12%) [ +0.00% +0.09% +0.14% / +0.12% +0.21% +0.35%] index_add_ perm : Elapsed 0.056 ms (5.649 ms / 100) 5.601 -> 5.602 ( +0.02%) [ +0.00% +0.09% +0.23% / +0.02% +0.36% +0.30%] index_copy_ perm : Elapsed 0.056 ms (5.601 ms / 100) 5.641 -> 5.663 ( +0.39%) [ +0.00% +0.14% +0.41% / +0.39% +0.48% +0.73%] index_add_ perm_sorted : Elapsed 0.056 ms (5.641 ms / 100) 5.600 -> 5.615 ( +0.27%) [ +0.13% +0.00% +0.41% / +0.27% +0.57% +0.34%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.607 ms / 100) 5.877 -> 5.888 ( +0.19%) [ +0.00% +0.02% +0.00% / +0.19% +0.27% +0.37%] index_select const : Elapsed 0.059 ms (5.877 ms / 100) 5.940 -> 5.950 ( +0.17%) [ +0.13% +0.00% +0.25% / +0.17% +0.59% +0.61%] index_select wrap : Elapsed 0.059 ms (5.948 ms / 100) 5.932 -> 5.940 ( +0.13%) [ +0.05% +0.00% +0.05% / +0.13% +0.37% +0.47%] index_select linear : Elapsed 0.059 ms (5.935 ms / 100) 5.946 -> 5.954 ( +0.13%) [ +0.12% +0.00% +0.24% / +0.13% +0.35% +0.25%] index_select reverse : Elapsed 0.060 ms (5.953 ms / 100) 5.878 -> 5.887 ( +0.15%) [ +0.03% +0.00% +0.31% / +0.15% +0.31% +0.31%] index_select skip64 : Elapsed 0.059 ms (5.880 ms / 100) 5.876 -> 5.885 ( +0.15%) [ +0.00% +0.12% +0.29% / +0.26% +0.39% +0.15%] index_select skip256 : Elapsed 0.059 ms (5.876 ms / 100) 5.929 -> 5.943 ( +0.24%) [ +0.19% +0.00% +0.34% / +0.24% +0.57% +0.51%] index_select spread : Elapsed 0.059 ms (5.940 ms / 100) 5.949 -> 5.947 ( -0.03%) [ +0.08% +0.00% +0.13% / -0.03% +0.35% +0.29%] index_select strided 3 : Elapsed 0.060 ms (5.954 ms / 100) 5.924 -> 5.940 ( +0.27%) [ +0.15% +0.00% +0.20% / +0.27% +0.47% +0.71%] index_select random : Elapsed 0.059 ms (5.933 ms / 100) 5.916 -> 5.927 ( +0.19%) [ +0.20% +0.00% +0.29% / +0.19% +0.56% +0.42%] index_select random_sorted : Elapsed 0.059 ms (5.928 ms / 100) B = [5, 16, 20, 40] (stride (12800, 1, 16, 320)) A = [4, 16, 20, 40] (stride (320, 20, 1, 1280)) dim = 0 5.656 -> 5.605 ( -0.90%) [ +0.05% +0.00% +0.00% / +0.12% -0.72% -0.90%] index_add_ linear : Elapsed 0.057 ms (5.659 ms / 100) 5.601 -> 5.549 ( -0.93%) [ +0.00% +0.02% +0.25% / +0.12% -0.75% -0.93%] index_copy_ linear : Elapsed 0.056 ms (5.601 ms / 100) 5.656 -> 5.620 ( -0.64%) [ +0.32% +0.00% +0.16% / +0.25% -0.64% -0.57%] index_add_ reverse : Elapsed 0.057 ms (5.674 ms / 100) 5.594 -> 5.550 ( -0.79%) [ +0.00% +0.14% +0.23% / +0.16% -0.79% -0.70%] index_copy_ reverse : Elapsed 0.056 ms (5.594 ms / 100) 5.660 -> 5.602 ( -1.02%) [ +0.00% +0.02% +0.09% / +0.07% -1.02% -0.95%] index_add_ spread : Elapsed 0.057 ms (5.660 ms / 100) 5.607 -> 5.557 ( -0.89%) [ +0.00% +0.00% +0.04% / +0.16% -0.89% -0.84%] index_copy_ spread : Elapsed 0.056 ms (5.607 ms / 100) 5.635 -> 5.615 ( -0.35%) [ +0.00% +0.07% +0.07% / +0.07% -0.25% -0.35%] index_add_ strided 3 : Elapsed 0.056 ms (5.635 ms / 100) 5.570 -> 5.564 ( -0.11%) [ +0.22% +0.00% +0.23% / +0.29% -0.11% -0.05%] index_copy_ strided 3 : Elapsed 0.056 ms (5.582 ms / 100) 5.641 -> 5.619 ( -0.39%) [ +0.00% +0.11% +0.11% / +0.16% -0.37% -0.39%] index_add_ perm : Elapsed 0.056 ms (5.641 ms / 100) 5.594 -> 5.562 ( -0.57%) [ +0.00% +0.16% +0.20% / +0.07% -0.57% -0.45%] index_copy_ perm : Elapsed 0.056 ms (5.594 ms / 100) 5.658 -> 5.607 ( -0.90%) [ +0.00% +0.07% +0.16% / +0.09% -0.71% -0.90%] index_add_ perm_sorted : Elapsed 0.057 ms (5.658 ms / 100) 5.605 -> 5.552 ( -0.95%) [ +0.05% +0.00% +0.07% / +0.00% -0.95% -0.93%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.608 ms / 100) 5.849 -> 5.851 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.09% +0.03% +0.10%] index_select const : Elapsed 0.058 ms (5.849 ms / 100) 5.932 -> 5.889 ( -0.72%) [ +0.07% +0.00% +0.07% / +0.15% -0.72% -0.66%] index_select wrap : Elapsed 0.059 ms (5.936 ms / 100) 5.923 -> 5.884 ( -0.66%) [ +0.05% +0.05% +0.00% / +0.14% -0.56% -0.66%] index_select linear : Elapsed 0.059 ms (5.926 ms / 100) 5.925 -> 5.889 ( -0.61%) [ +0.00% +0.05% +0.08% / +0.12% -0.51% -0.61%] index_select reverse : Elapsed 0.059 ms (5.925 ms / 100) 5.842 -> 5.839 ( -0.05%) [ +0.03% +0.00% +0.09% / +0.21% -0.02% -0.05%] index_select skip64 : Elapsed 0.058 ms (5.844 ms / 100) 5.847 -> 5.841 ( -0.10%) [ +0.03% +0.00% +0.07% / +0.10% +0.00% -0.10%] index_select skip256 : Elapsed 0.058 ms (5.849 ms / 100) 5.927 -> 5.881 ( -0.78%) [ +0.00% +0.00% +0.07% / +0.13% -0.78% -0.76%] index_select spread : Elapsed 0.059 ms (5.927 ms / 100) 5.931 -> 5.885 ( -0.78%) [ +0.08% +0.00% +0.13% / +0.02% -0.67% -0.78%] index_select strided 3 : Elapsed 0.059 ms (5.936 ms / 100) 5.917 -> 5.887 ( -0.51%) [ +0.35% +0.20% +0.00% / +0.22% -0.51% -0.44%] index_select random : Elapsed 0.059 ms (5.938 ms / 100) 5.923 -> 5.880 ( -0.73%) [ +0.02% +0.08% +0.00% / +0.10% -0.68% -0.73%] index_select random_sorted : Elapsed 0.059 ms (5.924 ms / 100) B = [5, 16, 20, 40] (stride (40, 4000, 200, 1)) A = [4, 16, 20, 40] (stride (12800, 800, 1, 20)) dim = 0 5.544 -> 5.547 ( +0.05%) [ +0.00% +0.05% +0.20% / +0.05% +0.14% +0.09%] index_add_ linear : Elapsed 0.055 ms (5.544 ms / 100) 5.481 -> 5.496 ( +0.27%) [ +0.24% +0.00% +0.18% / +0.27% +0.31% +0.40%] index_copy_ linear : Elapsed 0.055 ms (5.494 ms / 100) 5.541 -> 5.540 ( -0.02%) [ +0.04% +0.00% +0.07% / +0.14% +0.04% -0.02%] index_add_ reverse : Elapsed 0.055 ms (5.543 ms / 100) 5.470 -> 5.481 ( +0.20%) [ +0.00% +0.16% +0.16% / +0.20% +0.27% +0.29%] index_copy_ reverse : Elapsed 0.055 ms (5.470 ms / 100) 5.541 -> 5.550 ( +0.16%) [ +0.00% +0.04% +0.23% / +0.16% +0.16% +0.20%] index_add_ spread : Elapsed 0.055 ms (5.541 ms / 100) 5.487 -> 5.492 ( +0.09%) [ +0.00% +0.00% +0.26% / +0.18% +0.24% +0.09%] index_copy_ spread : Elapsed 0.055 ms (5.487 ms / 100) 5.541 -> 5.549 ( +0.14%) [ +0.04% +0.00% +0.04% / +0.14% +0.31% +0.23%] index_add_ strided 3 : Elapsed 0.055 ms (5.543 ms / 100) 5.483 -> 5.487 ( +0.07%) [ +0.09% +0.00% +0.05% / +0.07% +0.38% +0.29%] index_copy_ strided 3 : Elapsed 0.055 ms (5.488 ms / 100) 5.541 -> 5.546 ( +0.09%) [ +0.20% +0.00% +0.20% / +0.09% +0.14% +0.29%] index_add_ perm : Elapsed 0.056 ms (5.552 ms / 100) 5.481 -> 5.486 ( +0.09%) [ +0.07% +0.00% +0.24% / +0.09% +0.31% +0.27%] index_copy_ perm : Elapsed 0.055 ms (5.485 ms / 100) 5.545 -> 5.543 ( -0.04%) [ +0.00% +0.23% +0.09% / +0.18% -0.04% +0.09%] index_add_ perm_sorted : Elapsed 0.055 ms (5.545 ms / 100) 5.483 -> 5.495 ( +0.22%) [ +0.00% +0.15% +0.24% / +0.22% +0.22% +0.35%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.483 ms / 100) 5.776 -> 5.792 ( +0.28%) [ +0.00% +0.00% +0.14% / +0.28% +0.40% +0.28%] index_select const : Elapsed 0.058 ms (5.776 ms / 100) 5.813 -> 5.827 ( +0.24%) [ +0.17% +0.00% +0.17% / +0.24% +0.53% +0.46%] index_select wrap : Elapsed 0.058 ms (5.823 ms / 100) 5.812 -> 5.818 ( +0.10%) [ +0.00% +0.03% +0.19% / +0.10% +0.33% +0.46%] index_select linear : Elapsed 0.058 ms (5.812 ms / 100) 5.791 -> 5.802 ( +0.19%) [ +0.14% +0.00% +0.21% / +0.19% +0.59% +0.69%] index_select reverse : Elapsed 0.058 ms (5.799 ms / 100) 5.781 -> 5.796 ( +0.26%) [ +0.00% +0.02% +0.16% / +0.26% +0.29% +0.31%] index_select skip64 : Elapsed 0.058 ms (5.781 ms / 100) 5.780 -> 5.791 ( +0.19%) [ +0.00% +0.00% +0.07% / +0.22% +0.31% +0.19%] index_select skip256 : Elapsed 0.058 ms (5.780 ms / 100) 5.797 -> 5.812 ( +0.26%) [ +0.09% +0.00% +0.22% / +0.26% +0.52% +0.53%] index_select spread : Elapsed 0.058 ms (5.802 ms / 100) 5.807 -> 5.813 ( +0.10%) [ +0.05% +0.00% +0.33% / +0.10% +0.52% +0.67%] index_select strided 3 : Elapsed 0.058 ms (5.810 ms / 100) 5.806 -> 5.804 ( -0.03%) [ +0.02% +0.00% +0.07% / +0.19% +0.17% -0.03%] index_select random : Elapsed 0.058 ms (5.807 ms / 100) 5.788 -> 5.797 ( +0.16%) [ +0.00% +0.12% +0.16% / +0.16% +0.29% +0.38%] index_select random_sorted : Elapsed 0.058 ms (5.788 ms / 100) B = [5, 16, 20, 40] (stride (1, 4000, 200, 5)) A = [4, 16, 20, 40] (stride (16, 1, 64, 1280)) dim = 0 5.582 -> 5.560 ( -0.39%) [ +0.04% +0.00% +0.07% / +0.09% -0.34% -0.39%] index_add_ linear : Elapsed 0.056 ms (5.584 ms / 100) 5.549 -> 5.523 ( -0.47%) [ +0.04% +0.00% +0.07% / +0.22% -0.47% -0.40%] index_copy_ linear : Elapsed 0.056 ms (5.551 ms / 100) 5.574 -> 5.557 ( -0.30%) [ +0.09% +0.00% +0.18% / +0.13% -0.30% -0.30%] index_add_ reverse : Elapsed 0.056 ms (5.579 ms / 100) 5.547 -> 5.525 ( -0.40%) [ +0.00% +0.11% +0.23% / +0.16% -0.40% -0.32%] index_copy_ reverse : Elapsed 0.055 ms (5.547 ms / 100) 5.579 -> 5.563 ( -0.29%) [ +0.02% +0.00% +0.23% / +0.25% -0.27% -0.29%] index_add_ spread : Elapsed 0.056 ms (5.580 ms / 100) 5.546 -> 5.526 ( -0.36%) [ +0.00% +0.05% +0.13% / +0.23% -0.36% -0.36%] index_copy_ spread : Elapsed 0.055 ms (5.546 ms / 100) 5.579 -> 5.560 ( -0.34%) [ +0.04% +0.02% +0.00% / +0.13% -0.30% -0.34%] index_add_ strided 3 : Elapsed 0.056 ms (5.581 ms / 100) 5.547 -> 5.526 ( -0.38%) [ +0.16% +0.00% +0.14% / +0.25% -0.38% -0.25%] index_copy_ strided 3 : Elapsed 0.056 ms (5.556 ms / 100) 5.578 -> 5.560 ( -0.32%) [ +0.00% +0.02% +0.00% / +0.27% -0.32% -0.30%] index_add_ perm : Elapsed 0.056 ms (5.578 ms / 100) 5.548 -> 5.518 ( -0.54%) [ +0.00% +0.04% +0.02% / +0.11% -0.47% -0.54%] index_copy_ perm : Elapsed 0.055 ms (5.548 ms / 100) 5.577 -> 5.563 ( -0.25%) [ +0.02% +0.04% +0.00% / +0.22% -0.25% -0.23%] index_add_ perm_sorted : Elapsed 0.056 ms (5.578 ms / 100) 5.549 -> 5.527 ( -0.40%) [ +0.00% +0.00% +0.20% / +0.02% -0.40% -0.40%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.549 ms / 100) 5.694 -> 5.690 ( -0.07%) [ +0.00% +0.14% +0.02% / +0.07% -0.05% -0.07%] index_select const : Elapsed 0.057 ms (5.694 ms / 100) 5.780 -> 5.752 ( -0.48%) [ +0.07% +0.00% +0.17% / +0.10% -0.48% -0.42%] index_select wrap : Elapsed 0.058 ms (5.784 ms / 100) 5.762 -> 5.741 ( -0.36%) [ +0.16% +0.00% +0.12% / +0.10% -0.31% -0.36%] index_select linear : Elapsed 0.058 ms (5.771 ms / 100) 5.754 -> 5.727 ( -0.47%) [ +0.00% +0.17% +0.12% / +0.10% -0.47% -0.31%] index_select reverse : Elapsed 0.058 ms (5.754 ms / 100) 5.692 -> 5.686 ( -0.11%) [ +0.04% +0.00% +0.09% / +0.18% -0.11% -0.05%] index_select skip64 : Elapsed 0.057 ms (5.694 ms / 100) 5.692 -> 5.691 ( -0.02%) [ +0.00% +0.09% +0.25% / +0.21% -0.02% +0.05%] index_select skip256 : Elapsed 0.057 ms (5.692 ms / 100) 5.754 -> 5.731 ( -0.40%) [ +0.00% +0.07% +0.02% / +0.12% -0.40% -0.33%] index_select spread : Elapsed 0.058 ms (5.754 ms / 100) 5.782 -> 5.747 ( -0.61%) [ +0.09% +0.00% +0.21% / +0.14% -0.61% -0.50%] index_select strided 3 : Elapsed 0.058 ms (5.787 ms / 100) 5.795 -> 5.761 ( -0.59%) [ +0.00% +0.26% +0.05% / +0.19% -0.59% -0.47%] index_select random : Elapsed 0.058 ms (5.795 ms / 100) 5.762 -> 5.744 ( -0.31%) [ +0.07% +0.00% +0.17% / +0.19% -0.26% -0.31%] index_select random_sorted : Elapsed 0.058 ms (5.766 ms / 100) B = [5, 16, 20, 40] (stride (320, 1, 16, 1600)) dim = 0 fill_cnt = 4 3.548 -> 3.547 ( -0.03%) [ +0.08% +0.06% +0.00% / -0.03% +0.20% +0.08%] index_fill_ const : Elapsed 0.036 ms (3.551 ms / 100) 3.571 -> 3.567 ( -0.11%) [ +0.08% +0.08% +0.00% / -0.06% -0.11% +0.14%] index_fill_ linear : Elapsed 0.036 ms (3.574 ms / 100) 3.567 -> 3.573 ( +0.17%) [ +0.14% +0.00% +0.00% / +0.22% +0.25% +0.17%] index_fill_ reverse : Elapsed 0.036 ms (3.572 ms / 100) 3.549 -> 3.554 ( +0.14%) [ +0.00% +0.31% +0.28% / +0.14% +0.42% +0.25%] index_fill_ skip64 : Elapsed 0.035 ms (3.549 ms / 100) 3.560 -> 3.561 ( +0.03%) [ +0.00% +0.03% +0.11% / +0.11% +0.17% +0.03%] index_fill_ skip256 : Elapsed 0.036 ms (3.560 ms / 100) 3.561 -> 3.564 ( +0.08%) [ +0.08% +0.00% +0.20% / +0.08% +0.28% +0.42%] index_fill_ spread : Elapsed 0.036 ms (3.564 ms / 100) 3.568 -> 3.568 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.00% +0.08% +0.06%] index_fill_ strided 3 : Elapsed 0.036 ms (3.572 ms / 100) 3.576 -> 3.579 ( +0.08%) [ +0.00% +0.00% +0.20% / +0.14% +0.08% +0.31%] index_fill_ random : Elapsed 0.036 ms (3.576 ms / 100) 3.569 -> 3.571 ( +0.06%) [ +0.14% +0.11% +0.00% / +0.06% +0.20% +0.28%] index_fill_ random_sorted : Elapsed 0.036 ms (3.574 ms / 100) 3.566 -> 3.569 ( +0.08%) [ +0.00% +0.11% +0.11% / +0.08% +0.20% +0.08%] index_fill_ perm : Elapsed 0.036 ms (3.566 ms / 100) 3.567 -> 3.572 ( +0.14%) [ +0.00% +0.17% +0.20% / +0.17% +0.20% +0.14%] index_fill_ perm_sorted : Elapsed 0.036 ms (3.567 ms / 100) B = [5, 16, 20, 40] (stride (320, 1, 16, 1600)) A = [4, 16, 20, 40] (stride (12800, 800, 40, 1)) dim = 0 5.307 -> 5.317 ( +0.19%) [ +0.17% +0.24% +0.00% / +0.19% +0.41% +0.49%] index_add_ linear : Elapsed 0.053 ms (5.316 ms / 100) 5.260 -> 5.264 ( +0.08%) [ +0.00% +0.15% +0.06% / +0.08% +0.42% +0.34%] index_copy_ linear : Elapsed 0.053 ms (5.260 ms / 100) 5.312 -> 5.313 ( +0.02%) [ +0.00% +0.09% +0.15% / +0.02% +0.34% +0.45%] index_add_ reverse : Elapsed 0.053 ms (5.312 ms / 100) 5.266 -> 5.262 ( -0.08%) [ +0.04% +0.00% +0.08% / -0.08% +0.28% +0.21%] index_copy_ reverse : Elapsed 0.053 ms (5.268 ms / 100) 5.314 -> 5.322 ( +0.15%) [ +0.00% +0.08% +0.02% / +0.15% +0.24% +0.41%] index_add_ spread : Elapsed 0.053 ms (5.314 ms / 100) 5.258 -> 5.267 ( +0.17%) [ +0.13% +0.00% +0.13% / +0.17% +0.36% +0.38%] index_copy_ spread : Elapsed 0.053 ms (5.265 ms / 100) 5.314 -> 5.318 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.41% +0.23%] index_add_ strided 3 : Elapsed 0.053 ms (5.314 ms / 100) 5.262 -> 5.261 ( -0.02%) [ +0.06% +0.00% +0.02% / -0.02% +0.25% +0.11%] index_copy_ strided 3 : Elapsed 0.053 ms (5.265 ms / 100) 5.311 -> 5.317 ( +0.11%) [ +0.04% +0.06% +0.00% / +0.11% +0.53% +0.43%] index_add_ perm : Elapsed 0.053 ms (5.313 ms / 100) 5.260 -> 5.258 ( -0.04%) [ +0.00% +0.00% +0.15% / -0.04% +0.29% +0.40%] index_copy_ perm : Elapsed 0.053 ms (5.260 ms / 100) 5.313 -> 5.311 ( -0.04%) [ +0.00% +0.09% +0.06% / -0.04% +0.47% +0.36%] index_add_ perm_sorted : Elapsed 0.053 ms (5.313 ms / 100) 5.258 -> 5.259 ( +0.02%) [ +0.10% +0.00% +0.08% / +0.02% +0.36% +0.36%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.263 ms / 100) 5.442 -> 5.444 ( +0.04%) [ +0.07% +0.00% +0.04% / +0.04% +0.31% +0.24%] index_select const : Elapsed 0.054 ms (5.446 ms / 100) 5.525 -> 5.526 ( +0.02%) [ +0.04% +0.00% +0.07% / +0.02% +0.42% +0.25%] index_select wrap : Elapsed 0.055 ms (5.527 ms / 100) 5.520 -> 5.529 ( +0.16%) [ +0.16% +0.00% +0.20% / +0.16% +0.47% +0.42%] index_select linear : Elapsed 0.055 ms (5.529 ms / 100) 5.522 -> 5.524 ( +0.04%) [ +0.07% +0.14% +0.00% / +0.04% +0.53% +0.47%] index_select reverse : Elapsed 0.055 ms (5.526 ms / 100) 5.446 -> 5.446 ( +0.00%) [ +0.07% +0.00% +0.02% / +0.00% +0.26% +0.17%] index_select skip64 : Elapsed 0.054 ms (5.450 ms / 100) 5.436 -> 5.439 ( +0.06%) [ +0.09% +0.00% +0.13% / +0.06% +0.31% +0.44%] index_select skip256 : Elapsed 0.054 ms (5.441 ms / 100) 5.524 -> 5.525 ( +0.02%) [ +0.00% +0.05% +0.13% / +0.02% +0.45% +0.49%] index_select spread : Elapsed 0.055 ms (5.524 ms / 100) 5.524 -> 5.532 ( +0.14%) [ +0.05% +0.00% +0.05% / +0.14% +0.56% +0.52%] index_select strided 3 : Elapsed 0.055 ms (5.527 ms / 100) 5.517 -> 5.527 ( +0.18%) [ +0.15% +0.00% +0.20% / +0.18% +0.63% +0.76%] index_select random : Elapsed 0.055 ms (5.525 ms / 100) 5.522 -> 5.532 ( +0.18%) [ +0.07% +0.09% +0.00% / +0.18% +0.43% +0.38%] index_select random_sorted : Elapsed 0.055 ms (5.526 ms / 100) B = [5, 16, 20, 40] (stride (320, 1, 16, 1600)) A = [4, 16, 20, 40] (stride (12800, 1, 640, 16)) dim = 0 5.591 -> 5.573 ( -0.32%) [ +0.18% +0.04% +0.00% / +0.16% -0.23% -0.32%] index_add_ linear : Elapsed 0.056 ms (5.601 ms / 100) 5.520 -> 5.508 ( -0.22%) [ +0.33% +0.00% +0.20% / +0.22% -0.22% -0.04%] index_copy_ linear : Elapsed 0.055 ms (5.538 ms / 100) 5.593 -> 5.577 ( -0.29%) [ +0.09% +0.05% +0.00% / +0.11% -0.16% -0.29%] index_add_ reverse : Elapsed 0.056 ms (5.598 ms / 100) 5.523 -> 5.517 ( -0.11%) [ +0.00% +0.07% +0.05% / +0.14% -0.05% -0.11%] index_copy_ reverse : Elapsed 0.055 ms (5.523 ms / 100) 5.590 -> 5.580 ( -0.18%) [ +0.14% +0.16% +0.00% / +0.07% -0.18% -0.14%] index_add_ spread : Elapsed 0.056 ms (5.598 ms / 100) 5.526 -> 5.510 ( -0.29%) [ +0.00% +0.07% +0.07% / +0.13% -0.27% -0.29%] index_copy_ spread : Elapsed 0.055 ms (5.526 ms / 100) 5.590 -> 5.567 ( -0.41%) [ +0.07% +0.00% +0.16% / +0.11% -0.41% -0.29%] index_add_ strided 3 : Elapsed 0.056 ms (5.594 ms / 100) 5.525 -> 5.511 ( -0.25%) [ +0.00% +0.09% +0.14% / +0.14% -0.25% -0.09%] index_copy_ strided 3 : Elapsed 0.055 ms (5.525 ms / 100) 5.596 -> 5.558 ( -0.68%) [ +0.00% +0.00% +0.05% / +0.11% -0.39% -0.68%] index_add_ perm : Elapsed 0.056 ms (5.596 ms / 100) 5.525 -> 5.500 ( -0.45%) [ +0.14% +0.00% +0.13% / +0.24% -0.45% -0.38%] index_copy_ perm : Elapsed 0.055 ms (5.533 ms / 100) 5.593 -> 5.578 ( -0.27%) [ +0.07% +0.02% +0.00% / -0.07% -0.27% -0.14%] index_add_ perm_sorted : Elapsed 0.056 ms (5.597 ms / 100) 5.525 -> 5.502 ( -0.42%) [ +0.11% +0.00% +0.04% / +0.02% -0.42% -0.24%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.531 ms / 100) 5.798 -> 5.802 ( +0.07%) [ +0.07% +0.00% +0.09% / +0.07% +0.48% +0.50%] index_select const : Elapsed 0.058 ms (5.802 ms / 100) 5.851 -> 5.848 ( -0.05%) [ +0.14% +0.05% +0.00% / +0.09% -0.05% +0.05%] index_select wrap : Elapsed 0.059 ms (5.859 ms / 100) 5.847 -> 5.845 ( -0.03%) [ +0.00% +0.12% +0.19% / +0.19% -0.03% -0.03%] index_select linear : Elapsed 0.058 ms (5.847 ms / 100) 5.858 -> 5.850 ( -0.14%) [ +0.00% +0.02% +0.12% / +0.00% +0.00% -0.14%] index_select reverse : Elapsed 0.059 ms (5.858 ms / 100) 5.804 -> 5.804 ( +0.00%) [ +0.00% +0.02% +0.03% / +0.00% +0.40% +0.36%] index_select skip64 : Elapsed 0.058 ms (5.804 ms / 100) 5.799 -> 5.806 ( +0.12%) [ +0.00% +0.09% +0.05% / +0.12% +0.55% +0.47%] index_select skip256 : Elapsed 0.058 ms (5.799 ms / 100) 5.856 -> 5.856 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.07% +0.00% +0.02%] index_select spread : Elapsed 0.059 ms (5.857 ms / 100) 5.856 -> 5.857 ( +0.02%) [ +0.00% +0.09% +0.03% / +0.02% +0.07% +0.09%] index_select strided 3 : Elapsed 0.059 ms (5.856 ms / 100) 5.826 -> 5.839 ( +0.22%) [ +0.00% +0.19% +0.21% / +0.22% +0.26% +0.45%] index_select random : Elapsed 0.058 ms (5.826 ms / 100) 5.840 -> 5.839 ( -0.02%) [ +0.00% +0.00% +0.12% / +0.07% -0.02% +0.07%] index_select random_sorted : Elapsed 0.058 ms (5.840 ms / 100) B = [5, 16, 20, 40] (stride (1, 100, 5, 1600)) A = [4, 16, 20, 40] (stride (1, 4, 2560, 64)) dim = 0 3.645 -> 3.648 ( +0.08%) [ +0.08% +0.03% +0.00% / +0.08% +0.63% +0.55%] index_add_ linear : Elapsed 0.036 ms (3.648 ms / 100) 3.646 -> 3.648 ( +0.05%) [ +0.00% +0.14% +0.11% / +0.05% +0.60% +0.49%] index_copy_ linear : Elapsed 0.036 ms (3.646 ms / 100) 3.662 -> 3.667 ( +0.14%) [ +0.14% +0.00% +0.16% / +0.14% +0.49% +0.44%] index_add_ reverse : Elapsed 0.037 ms (3.667 ms / 100) 3.655 -> 3.659 ( +0.11%) [ +0.00% +0.25% +0.11% / +0.11% +0.55% +0.55%] index_copy_ reverse : Elapsed 0.037 ms (3.655 ms / 100) 3.666 -> 3.667 ( +0.03%) [ +0.00% +0.05% +0.11% / +0.03% +0.44% +0.44%] index_add_ spread : Elapsed 0.037 ms (3.666 ms / 100) 3.661 -> 3.663 ( +0.05%) [ +0.22% +0.00% +0.27% / +0.05% +0.36% +0.46%] index_copy_ spread : Elapsed 0.037 ms (3.669 ms / 100) 3.679 -> 3.664 ( -0.41%) [ +0.24% +0.03% +0.00% / +0.22% -0.41% -0.41%] index_add_ strided 3 : Elapsed 0.037 ms (3.688 ms / 100) 3.669 -> 3.663 ( -0.16%) [ +0.25% +0.22% +0.00% / +0.19% -0.08% -0.16%] index_copy_ strided 3 : Elapsed 0.037 ms (3.678 ms / 100) 3.679 -> 3.660 ( -0.52%) [ +0.08% +0.00% +0.03% / +0.00% -0.49% -0.52%] index_add_ perm : Elapsed 0.037 ms (3.682 ms / 100) 3.670 -> 3.663 ( -0.19%) [ +0.00% +0.19% +0.19% / +0.30% +0.08% -0.19%] index_copy_ perm : Elapsed 0.037 ms (3.670 ms / 100) 3.649 -> 3.661 ( +0.33%) [ +0.14% +0.00% +0.19% / +0.33% +0.58% +0.38%] index_add_ perm_sorted : Elapsed 0.037 ms (3.654 ms / 100) 3.657 -> 3.660 ( +0.08%) [ +0.03% +0.00% +0.19% / +0.16% +0.36% +0.08%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.658 ms / 100) 3.702 -> 3.701 ( -0.03%) [ +0.00% +0.03% +0.08% / -0.03% +0.81% +0.65%] index_select const : Elapsed 0.037 ms (3.702 ms / 100) 3.730 -> 3.729 ( -0.03%) [ +0.16% +0.00% +0.08% / -0.03% +0.38% +0.46%] index_select wrap : Elapsed 0.037 ms (3.736 ms / 100) 3.729 -> 3.731 ( +0.05%) [ +0.08% +0.00% +0.00% / +0.05% +0.56% +0.38%] index_select linear : Elapsed 0.037 ms (3.732 ms / 100) 3.743 -> 3.731 ( -0.32%) [ +0.00% +0.08% +0.03% / +0.00% -0.29% -0.32%] index_select reverse : Elapsed 0.037 ms (3.743 ms / 100) 3.741 -> 3.728 ( -0.35%) [ +0.24% +0.11% +0.00% / +0.13% -0.27% -0.35%] index_select skip64 : Elapsed 0.037 ms (3.750 ms / 100) 3.717 -> 3.721 ( +0.11%) [ +0.24% +0.00% +0.05% / +0.11% +0.38% +0.35%] index_select skip256 : Elapsed 0.037 ms (3.726 ms / 100) 3.708 -> 3.714 ( +0.16%) [ +0.00% +0.19% +0.19% / +0.16% +0.67% +0.86%] index_select spread : Elapsed 0.037 ms (3.708 ms / 100) 3.711 -> 3.708 ( -0.08%) [ +0.00% +0.05% +0.00% / -0.08% +0.67% +0.62%] index_select strided 3 : Elapsed 0.037 ms (3.711 ms / 100) 3.742 -> 3.730 ( -0.32%) [ +0.16% +0.03% +0.00% / +0.29% -0.32% -0.21%] index_select random : Elapsed 0.037 ms (3.748 ms / 100) 3.736 -> 3.730 ( -0.16%) [ +0.21% +0.00% +0.13% / +0.35% +0.03% -0.16%] index_select random_sorted : Elapsed 0.037 ms (3.744 ms / 100) B = [5, 16, 20, 40] (stride (1, 100, 5, 1600)) A = [4, 16, 20, 40] (stride (320, 20, 1, 1280)) dim = 0 3.600 -> 3.601 ( +0.03%) [ +0.11% +0.25% +0.00% / +0.03% +0.78% +0.78%] index_add_ linear : Elapsed 0.036 ms (3.604 ms / 100) 3.590 -> 3.600 ( +0.28%) [ +0.03% +0.00% +0.14% / +0.28% +0.97% +0.72%] index_copy_ linear : Elapsed 0.036 ms (3.591 ms / 100) 3.598 -> 3.603 ( +0.14%) [ +0.00% +0.19% +0.11% / +0.14% +0.92% +0.61%] index_add_ reverse : Elapsed 0.036 ms (3.598 ms / 100) 3.590 -> 3.592 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.67% +0.45%] index_copy_ reverse : Elapsed 0.036 ms (3.591 ms / 100) 3.603 -> 3.610 ( +0.19%) [ +0.00% +0.06% +0.25% / +0.19% +0.47% +0.53%] index_add_ spread : Elapsed 0.036 ms (3.603 ms / 100) 3.593 -> 3.595 ( +0.06%) [ +0.14% +0.00% +0.06% / +0.06% +0.47% +0.83%] index_copy_ spread : Elapsed 0.036 ms (3.598 ms / 100) 3.601 -> 3.602 ( +0.03%) [ +0.19% +0.00% +0.14% / +0.03% +0.61% +0.58%] index_add_ strided 3 : Elapsed 0.036 ms (3.608 ms / 100) 3.591 -> 3.593 ( +0.06%) [ +0.14% +0.00% +0.22% / +0.06% +0.61% +0.75%] index_copy_ strided 3 : Elapsed 0.036 ms (3.596 ms / 100) 3.601 -> 3.599 ( -0.06%) [ +0.03% +0.17% +0.00% / -0.06% +0.42% +0.47%] index_add_ perm : Elapsed 0.036 ms (3.602 ms / 100) 3.592 -> 3.593 ( +0.03%) [ +0.08% +0.14% +0.00% / +0.03% +0.58% +0.67%] index_copy_ perm : Elapsed 0.036 ms (3.595 ms / 100) 3.612 -> 3.615 ( +0.08%) [ +0.11% +0.14% +0.00% / +0.08% +0.36% +0.44%] index_add_ perm_sorted : Elapsed 0.036 ms (3.616 ms / 100) 3.605 -> 3.610 ( +0.14%) [ +0.11% +0.03% +0.00% / +0.14% +0.14% +0.25%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.609 ms / 100) 3.713 -> 3.719 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +1.51% +1.35%] index_select const : Elapsed 0.037 ms (3.716 ms / 100) 3.655 -> 3.654 ( -0.03%) [ +0.16% +0.03% +0.00% / -0.03% +0.96% +0.74%] index_select wrap : Elapsed 0.037 ms (3.661 ms / 100) 3.662 -> 3.665 ( +0.08%) [ +0.00% +0.35% +0.22% / +0.08% +1.09% +0.90%] index_select linear : Elapsed 0.037 ms (3.662 ms / 100) 3.644 -> 3.645 ( +0.03%) [ +0.00% +0.19% +0.05% / +0.22% +0.03% +0.30%] index_select reverse : Elapsed 0.036 ms (3.644 ms / 100) 3.648 -> 3.653 ( +0.14%) [ +0.00% +0.14% +0.19% / +0.14% +1.62% +1.70%] index_select skip64 : Elapsed 0.036 ms (3.648 ms / 100) 3.648 -> 3.658 ( +0.27%) [ +0.25% +0.25% +0.00% / +0.27% +1.67% +1.59%] index_select skip256 : Elapsed 0.037 ms (3.657 ms / 100) 3.625 -> 3.631 ( +0.17%) [ +0.33% +0.33% +0.00% / +0.17% +1.08% +0.88%] index_select spread : Elapsed 0.036 ms (3.637 ms / 100) 3.654 -> 3.658 ( +0.11%) [ +0.00% +0.19% +0.08% / +0.11% +1.20% +0.88%] index_select strided 3 : Elapsed 0.037 ms (3.654 ms / 100) 3.673 -> 3.658 ( -0.41%) [ +0.00% +0.11% +0.00% / +0.00% +0.90% -0.41%] index_select random : Elapsed 0.037 ms (3.673 ms / 100) 3.674 -> 3.679 ( +0.14%) [ +0.11% +0.03% +0.00% / +0.14% +0.73% +0.63%] index_select random_sorted : Elapsed 0.037 ms (3.678 ms / 100) out_shape = [4, 5, 20, 40] in_shape = [4, 16, 20, 40] idx_dim = 1 B = [4, 5, 20, 40] (stride (40, 3200, 160, 1)) A = [4, 16, 20, 40] (stride (12800, 20, 1, 320)) dim = 1 2.449 -> 2.451 ( +0.08%) [ +0.00% +0.16% +0.16% / +0.08% +0.82% +0.57%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.456 -> 2.460 ( +0.16%) [ +0.16% +0.04% +0.00% / +0.16% +0.73% +0.57%] index_select wrap : Elapsed 0.025 ms (2.460 ms / 100) 2.458 -> 2.460 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.49% +0.49%] index_select linear : Elapsed 0.025 ms (2.461 ms / 100) 2.459 -> 2.460 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.49% +0.45%] index_select reverse : Elapsed 0.025 ms (2.460 ms / 100) 2.441 -> 2.440 ( -0.04%) [ +0.12% +0.04% +0.00% / -0.04% +0.49% +0.57%] index_select skip64 : Elapsed 0.024 ms (2.444 ms / 100) 2.450 -> 2.456 ( +0.24%) [ +0.12% +0.08% +0.00% / +0.24% +0.82% +0.78%] index_select skip256 : Elapsed 0.025 ms (2.453 ms / 100) 2.456 -> 2.456 ( +0.00%) [ +0.12% +0.00% +0.12% / +0.00% +0.41% +0.49%] index_select spread : Elapsed 0.025 ms (2.459 ms / 100) 2.457 -> 2.459 ( +0.08%) [ +0.00% +0.12% +0.08% / +0.08% +0.37% +0.49%] index_select strided 3 : Elapsed 0.025 ms (2.457 ms / 100) 2.445 -> 2.448 ( +0.12%) [ +0.12% +0.16% +0.00% / +0.12% +0.41% +0.57%] index_select strided 5 : Elapsed 0.024 ms (2.448 ms / 100) 2.454 -> 2.453 ( -0.04%) [ +0.04% +0.00% +0.08% / -0.04% +0.24% +0.29%] index_select strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.441 -> 2.441 ( +0.00%) [ +0.33% +0.00% +0.04% / +0.00% +0.78% +0.70%] index_select strided 8 : Elapsed 0.024 ms (2.449 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.61% +0.57%] index_select random : Elapsed 0.025 ms (2.457 ms / 100) 2.461 -> 2.462 ( +0.04%) [ +0.12% +0.00% +0.12% / +0.04% +0.53% +0.45%] index_select random_sorted : Elapsed 0.025 ms (2.464 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.12% +0.08% +0.00% / +0.12% +0.45% +0.49%] index_select perm : Elapsed 0.025 ms (2.464 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.04% +0.33% +0.00% / +0.04% +0.49% +0.49%] index_select perm_sorted : Elapsed 0.025 ms (2.456 ms / 100) B = [4, 5, 20, 40] (stride (1, 3200, 160, 4)) A = [4, 16, 20, 40] (stride (16, 1, 64, 1280)) dim = 1 2.339 -> 2.338 ( -0.04%) [ +0.00% +0.09% +0.09% / -0.04% +0.43% +0.13%] index_select const : Elapsed 0.023 ms (2.339 ms / 100) 2.341 -> 2.342 ( +0.04%) [ +0.13% +0.00% +0.21% / +0.26% +0.30% +0.04%] index_select wrap : Elapsed 0.023 ms (2.344 ms / 100) 2.341 -> 2.340 ( -0.04%) [ +0.04% +0.17% +0.00% / +0.26% +0.30% -0.04%] index_select linear : Elapsed 0.023 ms (2.342 ms / 100) 2.340 -> 2.341 ( +0.04%) [ +0.00% +0.00% +0.13% / +0.09% +0.47% +0.04%] index_select reverse : Elapsed 0.023 ms (2.340 ms / 100) 2.338 -> 2.342 ( +0.17%) [ +0.26% +0.04% +0.00% / +0.17% +0.34% +0.38%] index_select skip64 : Elapsed 0.023 ms (2.344 ms / 100) 2.338 -> 2.335 ( -0.13%) [ +0.21% +0.00% +0.17% / -0.13% +0.21% +0.38%] index_select skip256 : Elapsed 0.023 ms (2.343 ms / 100) 2.368 -> 2.363 ( -0.21%) [ +0.00% +0.25% +0.17% / +0.17% -0.04% -0.21%] index_select spread : Elapsed 0.024 ms (2.368 ms / 100) 2.363 -> 2.366 ( +0.13%) [ +0.21% +0.25% +0.00% / +0.13% +0.47% +0.17%] index_select strided 3 : Elapsed 0.024 ms (2.368 ms / 100) 2.365 -> 2.365 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.17% +0.00% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.366 ms / 100) 2.367 -> 2.364 ( -0.13%) [ +0.00% +0.08% +0.13% / -0.13% +0.30% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.367 ms / 100) 2.372 -> 2.368 ( -0.17%) [ +0.00% +0.04% +0.04% / -0.04% -0.17% -0.17%] index_select strided 8 : Elapsed 0.024 ms (2.372 ms / 100) 2.369 -> 2.371 ( +0.08%) [ +0.04% +0.21% +0.00% / +0.30% +0.08% +0.17%] index_select random : Elapsed 0.024 ms (2.370 ms / 100) 2.364 -> 2.366 ( +0.08%) [ +0.13% +0.13% +0.00% / +0.08% +0.55% +0.13%] index_select random_sorted : Elapsed 0.024 ms (2.367 ms / 100) 2.358 -> 2.365 ( +0.30%) [ +0.25% +0.51% +0.00% / +0.30% +0.38% +0.34%] index_select perm : Elapsed 0.024 ms (2.364 ms / 100) 2.368 -> 2.369 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.04% +0.34% +0.34%] index_select perm_sorted : Elapsed 0.024 ms (2.368 ms / 100) B = [4, 5, 20, 40] (stride (5, 1, 800, 20)) A = [4, 16, 20, 40] (stride (1, 160, 2560, 4)) dim = 1 2.127 -> 2.127 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.00% +0.80% +0.52%] index_select const : Elapsed 0.021 ms (2.130 ms / 100) 2.114 -> 2.117 ( +0.14%) [ +0.24% +0.05% +0.00% / +0.14% +0.57% +0.52%] index_select wrap : Elapsed 0.021 ms (2.119 ms / 100) 2.124 -> 2.128 ( +0.19%) [ +0.00% +0.14% +0.09% / +0.19% +0.52% +0.47%] index_select linear : Elapsed 0.021 ms (2.124 ms / 100) 2.126 -> 2.125 ( -0.05%) [ +0.19% +0.00% +0.00% / -0.05% +0.28% +0.47%] index_select reverse : Elapsed 0.021 ms (2.130 ms / 100) 2.126 -> 2.122 ( -0.19%) [ +0.00% +0.00% +0.09% / -0.19% +0.61% +0.28%] index_select skip64 : Elapsed 0.021 ms (2.126 ms / 100) 2.128 -> 2.130 ( +0.09%) [ +0.00% +0.38% +0.05% / +0.09% +0.28% +0.56%] index_select skip256 : Elapsed 0.021 ms (2.128 ms / 100) 2.120 -> 2.126 ( +0.28%) [ +0.38% +0.33% +0.00% / +0.28% +0.66% +0.99%] index_select spread : Elapsed 0.021 ms (2.128 ms / 100) 2.125 -> 2.131 ( +0.28%) [ +0.38% +0.05% +0.00% / +0.28% +0.66% +0.52%] index_select strided 3 : Elapsed 0.021 ms (2.133 ms / 100) 2.126 -> 2.129 ( +0.14%) [ +0.00% +0.09% +0.28% / +0.14% +0.80% +0.52%] index_select strided 5 : Elapsed 0.021 ms (2.126 ms / 100) 2.128 -> 2.133 ( +0.23%) [ +0.00% +0.23% +0.09% / +0.23% +0.61% +0.38%] index_select strided 7 : Elapsed 0.021 ms (2.128 ms / 100) 2.121 -> 2.129 ( +0.38%) [ +0.09% +0.19% +0.00% / +0.38% +0.85% +0.99%] index_select strided 8 : Elapsed 0.021 ms (2.123 ms / 100) 2.112 -> 2.115 ( +0.14%) [ +0.00% +0.38% +0.52% / +0.14% +0.90% +0.80%] index_select random : Elapsed 0.021 ms (2.112 ms / 100) 2.123 -> 2.130 ( +0.33%) [ +0.05% +0.19% +0.00% / +0.33% +0.99% +0.94%] index_select random_sorted : Elapsed 0.021 ms (2.124 ms / 100) 2.131 -> 2.131 ( +0.00%) [ +0.00% +0.19% +0.00% / +0.00% +0.33% +0.47%] index_select perm : Elapsed 0.021 ms (2.131 ms / 100) 2.117 -> 2.114 ( -0.14%) [ +0.00% +0.09% +0.09% / -0.14% +0.52% +0.33%] index_select perm_sorted : Elapsed 0.021 ms (2.117 ms / 100) B = [4, 5, 20, 40] (stride (100, 1, 5, 400)) A = [4, 16, 20, 40] (stride (1, 160, 2560, 4)) dim = 1 2.270 -> 2.277 ( +0.31%) [ +0.04% +0.13% +0.00% / +0.31% +0.53% +0.35%] index_select const : Elapsed 0.023 ms (2.271 ms / 100) 2.258 -> 2.257 ( -0.04%) [ +0.18% +0.22% +0.00% / -0.04% +0.71% +0.58%] index_select wrap : Elapsed 0.023 ms (2.262 ms / 100) 2.252 -> 2.253 ( +0.04%) [ +0.00% +0.31% +0.00% / +0.04% +0.53% +0.67%] index_select linear : Elapsed 0.023 ms (2.252 ms / 100) 2.254 -> 2.253 ( -0.04%) [ +0.00% +0.04% +0.09% / -0.04% +0.71% +0.67%] index_select reverse : Elapsed 0.023 ms (2.254 ms / 100) 2.273 -> 2.279 ( +0.26%) [ +0.00% +0.04% +0.00% / +0.26% +0.31% +0.48%] index_select skip64 : Elapsed 0.023 ms (2.273 ms / 100) 2.270 -> 2.270 ( +0.00%) [ +0.00% +0.00% +0.22% / +0.00% +0.40% +0.44%] index_select skip256 : Elapsed 0.023 ms (2.270 ms / 100) 2.255 -> 2.256 ( +0.04%) [ +0.09% +0.00% +0.00% / +0.04% +0.35% +0.27%] index_select spread : Elapsed 0.023 ms (2.257 ms / 100) 2.253 -> 2.255 ( +0.09%) [ +0.04% +0.09% +0.00% / +0.09% +0.40% +0.71%] index_select strided 3 : Elapsed 0.023 ms (2.254 ms / 100) 2.256 -> 2.257 ( +0.04%) [ +0.00% +0.18% +0.00% / +0.04% +0.58% +0.44%] index_select strided 5 : Elapsed 0.023 ms (2.256 ms / 100) 2.251 -> 2.254 ( +0.13%) [ +0.00% +0.00% +0.09% / +0.13% +0.44% +0.44%] index_select strided 7 : Elapsed 0.023 ms (2.251 ms / 100) 2.270 -> 2.275 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.40% +0.44%] index_select strided 8 : Elapsed 0.023 ms (2.270 ms / 100) 2.261 -> 2.260 ( -0.04%) [ +0.04% +0.00% +0.09% / -0.04% +0.44% +0.40%] index_select random : Elapsed 0.023 ms (2.262 ms / 100) 2.250 -> 2.256 ( +0.27%) [ +0.27% +0.00% +0.04% / +0.27% +0.49% +0.49%] index_select random_sorted : Elapsed 0.023 ms (2.256 ms / 100) 2.257 -> 2.258 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.44% +0.44%] index_select perm : Elapsed 0.023 ms (2.259 ms / 100) 2.261 -> 2.263 ( +0.09%) [ +0.13% +0.00% +0.09% / +0.09% +0.53% +0.49%] index_select perm_sorted : Elapsed 0.023 ms (2.264 ms / 100) B = [4, 5, 20, 40] (stride (1, 80, 4, 400)) A = [4, 16, 20, 40] (stride (40, 3200, 160, 1)) dim = 1 2.239 -> 2.237 ( -0.09%) [ +0.00% +0.04% +0.18% / -0.09% +0.85% +0.63%] index_select const : Elapsed 0.022 ms (2.239 ms / 100) 2.296 -> 2.297 ( +0.04%) [ +0.00% +0.17% +0.00% / +0.04% +0.04% +0.13%] index_select wrap : Elapsed 0.023 ms (2.296 ms / 100) 2.306 -> 2.309 ( +0.13%) [ +0.00% +0.09% +0.22% / +0.13% +0.17% +0.26%] index_select linear : Elapsed 0.023 ms (2.306 ms / 100) 2.302 -> 2.302 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.22% +0.26%] index_select reverse : Elapsed 0.023 ms (2.303 ms / 100) 2.235 -> 2.232 ( -0.13%) [ +0.00% +0.09% +0.09% / -0.13% +0.89% +0.81%] index_select skip64 : Elapsed 0.022 ms (2.235 ms / 100) 2.233 -> 2.235 ( +0.09%) [ +0.27% +0.36% +0.00% / +0.09% +0.99% +0.99%] index_select skip256 : Elapsed 0.022 ms (2.239 ms / 100) 2.307 -> 2.303 ( -0.17%) [ +0.04% +0.00% +0.17% / +0.09% -0.17% -0.09%] index_select spread : Elapsed 0.023 ms (2.308 ms / 100) 2.309 -> 2.311 ( +0.09%) [ +0.26% +0.43% +0.00% / +0.09% +0.17% +0.26%] index_select strided 3 : Elapsed 0.023 ms (2.315 ms / 100) 2.293 -> 2.296 ( +0.13%) [ +0.13% +0.00% +0.09% / +0.13% +0.17% +0.17%] index_select strided 5 : Elapsed 0.023 ms (2.296 ms / 100) 2.307 -> 2.311 ( +0.17%) [ +0.00% +0.04% +0.13% / +0.17% +0.65% +0.65%] index_select strided 7 : Elapsed 0.023 ms (2.307 ms / 100) 2.248 -> 2.250 ( +0.09%) [ +0.04% +0.13% +0.00% / +0.09% +0.67% +0.58%] index_select strided 8 : Elapsed 0.022 ms (2.249 ms / 100) 2.255 -> 2.257 ( +0.09%) [ +0.18% +0.00% +0.31% / +0.09% +1.64% +1.42%] index_select random : Elapsed 0.023 ms (2.259 ms / 100) 2.266 -> 2.272 ( +0.26%) [ +0.00% +0.04% +0.35% / +0.26% +1.24% +1.37%] index_select random_sorted : Elapsed 0.023 ms (2.266 ms / 100) 2.295 -> 2.282 ( -0.57%) [ +0.00% +0.09% +0.13% / +0.13% -0.44% -0.57%] index_select perm : Elapsed 0.023 ms (2.295 ms / 100) 2.286 -> 2.269 ( -0.74%) [ +0.22% +0.26% +0.00% / +0.31% -0.22% -0.74%] index_select perm_sorted : Elapsed 0.023 ms (2.291 ms / 100) out_shape = [4, 16, 5, 40] in_shape = [4, 16, 20, 40] idx_dim = 2 B = [4, 16, 5, 40] (stride (200, 800, 40, 1)) A = [4, 16, 20, 40] (stride (20, 3200, 1, 80)) dim = 2 1.912 -> 1.911 ( -0.05%) [ +0.21% +0.26% +0.00% / -0.05% +0.58% +0.47%] index_select const : Elapsed 0.019 ms (1.916 ms / 100) 1.917 -> 1.914 ( -0.16%) [ +0.10% +0.00% +0.21% / -0.16% +0.47% +0.63%] index_select wrap : Elapsed 0.019 ms (1.919 ms / 100) 1.918 -> 1.915 ( -0.16%) [ +0.00% +0.16% +0.00% / -0.16% +0.47% +0.21%] index_select linear : Elapsed 0.019 ms (1.918 ms / 100) 1.918 -> 1.921 ( +0.16%) [ +0.26% +0.10% +0.00% / +0.16% +0.47% +0.31%] index_select reverse : Elapsed 0.019 ms (1.923 ms / 100) 1.913 -> 1.913 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.26% +0.21%] index_select skip64 : Elapsed 0.019 ms (1.914 ms / 100) 1.911 -> 1.914 ( +0.16%) [ +0.26% +0.05% +0.00% / +0.16% +0.31% +0.21%] index_select skip256 : Elapsed 0.019 ms (1.916 ms / 100) 1.931 -> 1.932 ( +0.05%) [ +0.05% +0.16% +0.00% / +0.05% +0.10% +0.31%] index_select spread : Elapsed 0.019 ms (1.932 ms / 100) 1.928 -> 1.928 ( +0.00%) [ +0.10% +0.10% +0.00% / +0.00% +0.41% +0.47%] index_select strided 3 : Elapsed 0.019 ms (1.930 ms / 100) 1.932 -> 1.936 ( +0.21%) [ +0.00% +0.10% +0.05% / +0.26% +0.47% +0.21%] index_select strided 5 : Elapsed 0.019 ms (1.932 ms / 100) 1.930 -> 1.934 ( +0.21%) [ +0.10% +0.16% +0.00% / +0.26% +0.57% +0.21%] index_select strided 7 : Elapsed 0.019 ms (1.932 ms / 100) 1.934 -> 1.934 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.31% +0.31%] index_select strided 8 : Elapsed 0.019 ms (1.937 ms / 100) 1.934 -> 1.937 ( +0.16%) [ +0.10% +0.00% +0.05% / +0.26% +0.16% +0.26%] index_select strided 16 : Elapsed 0.019 ms (1.936 ms / 100) 1.930 -> 1.929 ( -0.05%) [ +0.05% +0.00% +0.16% / -0.05% +0.26% +0.16%] index_select random : Elapsed 0.019 ms (1.931 ms / 100) 1.930 -> 1.933 ( +0.16%) [ +0.26% +0.26% +0.00% / +0.16% +0.36% +0.26%] index_select random_sorted : Elapsed 0.019 ms (1.935 ms / 100) 1.924 -> 1.928 ( +0.21%) [ +0.21% +0.47% +0.00% / +0.21% +0.57% +0.42%] index_select perm : Elapsed 0.019 ms (1.928 ms / 100) 1.933 -> 1.931 ( -0.10%) [ +0.10% +0.00% +0.10% / -0.10% +0.16% +0.36%] index_select perm_sorted : Elapsed 0.019 ms (1.935 ms / 100) B = [4, 16, 5, 40] (stride (1, 800, 160, 4)) A = [4, 16, 20, 40] (stride (40, 160, 2560, 1)) dim = 2 1.771 -> 1.771 ( +0.00%) [ +0.23% +0.40% +0.00% / +0.28% +0.06% +0.00%] index_select const : Elapsed 0.018 ms (1.775 ms / 100) 1.769 -> 1.771 ( +0.11%) [ +0.23% +0.00% +0.06% / +0.11% +0.40% +0.11%] index_select wrap : Elapsed 0.018 ms (1.773 ms / 100) 1.771 -> 1.771 ( +0.00%) [ +0.00% +0.23% +0.00% / +0.00% +0.34% +0.17%] index_select linear : Elapsed 0.018 ms (1.771 ms / 100) 1.770 -> 1.772 ( +0.11%) [ +0.00% +0.06% +0.06% / +0.11% +0.28% +0.34%] index_select reverse : Elapsed 0.018 ms (1.770 ms / 100) 1.769 -> 1.771 ( +0.11%) [ +0.23% +0.28% +0.00% / +0.17% +0.11% +0.17%] index_select skip64 : Elapsed 0.018 ms (1.773 ms / 100) 1.769 -> 1.771 ( +0.11%) [ +0.17% +0.00% +0.06% / +0.11% +0.51% +0.57%] index_select skip256 : Elapsed 0.018 ms (1.772 ms / 100) 1.781 -> 1.786 ( +0.28%) [ +0.00% +0.22% +0.22% / +0.28% +0.39% +0.56%] index_select spread : Elapsed 0.018 ms (1.781 ms / 100) 1.797 -> 1.785 ( -0.67%) [ +0.06% +0.00% +0.06% / +0.00% -0.67% -0.56%] index_select strided 3 : Elapsed 0.018 ms (1.798 ms / 100) 1.783 -> 1.785 ( +0.11%) [ +0.17% +0.06% +0.00% / +0.11% +0.22% +0.22%] index_select strided 5 : Elapsed 0.018 ms (1.786 ms / 100) 1.789 -> 1.790 ( +0.06%) [ +0.00% +0.17% +0.17% / +0.06% +0.50% +0.39%] index_select strided 7 : Elapsed 0.018 ms (1.789 ms / 100) 1.783 -> 1.785 ( +0.11%) [ +0.00% +0.06% +0.00% / +0.11% +0.28% +0.22%] index_select strided 8 : Elapsed 0.018 ms (1.783 ms / 100) 1.784 -> 1.782 ( -0.11%) [ +0.11% +0.00% +0.22% / -0.11% +0.28% +0.22%] index_select strided 16 : Elapsed 0.018 ms (1.786 ms / 100) 1.791 -> 1.786 ( -0.28%) [ +0.00% +0.00% +0.28% / +0.11% -0.28% -0.17%] index_select random : Elapsed 0.018 ms (1.791 ms / 100) 1.788 -> 1.784 ( -0.22%) [ +0.39% +0.00% +0.39% / +0.06% -0.22% -0.06%] index_select random_sorted : Elapsed 0.018 ms (1.795 ms / 100) 1.795 -> 1.798 ( +0.17%) [ +0.22% +0.11% +0.00% / +0.22% +0.17% +0.22%] index_select perm : Elapsed 0.018 ms (1.799 ms / 100) 1.795 -> 1.793 ( -0.11%) [ +0.00% +0.00% +0.06% / -0.11% +0.33% +0.17%] index_select perm_sorted : Elapsed 0.018 ms (1.795 ms / 100) B = [4, 16, 5, 40] (stride (1, 800, 160, 4)) A = [4, 16, 20, 40] (stride (16, 1, 64, 1280)) dim = 2 1.763 -> 1.767 ( +0.23%) [ +0.11% +0.23% +0.00% / +0.23% +0.68% +0.45%] index_select const : Elapsed 0.018 ms (1.765 ms / 100) 1.791 -> 1.795 ( +0.22%) [ +0.22% +0.00% +0.17% / +0.22% +0.78% +0.45%] index_select wrap : Elapsed 0.018 ms (1.795 ms / 100) 1.793 -> 1.795 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.78% +0.61%] index_select linear : Elapsed 0.018 ms (1.793 ms / 100) 1.796 -> 1.799 ( +0.17%) [ +0.17% +0.00% +0.06% / +0.17% +0.84% +0.67%] index_select reverse : Elapsed 0.018 ms (1.799 ms / 100) 1.768 -> 1.770 ( +0.11%) [ +0.06% +0.00% +0.23% / +0.11% +0.74% +0.79%] index_select skip64 : Elapsed 0.018 ms (1.769 ms / 100) 1.769 -> 1.770 ( +0.06%) [ +0.28% +0.28% +0.00% / +0.06% +0.68% +0.68%] index_select skip256 : Elapsed 0.018 ms (1.774 ms / 100) 1.793 -> 1.796 ( +0.17%) [ +0.17% +0.00% +0.11% / +0.17% +0.56% +0.67%] index_select spread : Elapsed 0.018 ms (1.796 ms / 100) 1.784 -> 1.782 ( -0.11%) [ +0.34% +0.11% +0.00% / -0.11% +0.78% +0.84%] index_select strided 3 : Elapsed 0.018 ms (1.790 ms / 100) 1.795 -> 1.792 ( -0.17%) [ +0.17% +0.00% +0.06% / -0.17% +0.39% +0.33%] index_select strided 5 : Elapsed 0.018 ms (1.798 ms / 100) 1.789 -> 1.792 ( +0.17%) [ +0.00% +0.06% +0.00% / +0.17% +0.73% +0.61%] index_select strided 7 : Elapsed 0.018 ms (1.789 ms / 100) 1.794 -> 1.796 ( +0.11%) [ +0.00% +0.17% +0.00% / +0.11% +0.67% +0.61%] index_select strided 8 : Elapsed 0.018 ms (1.794 ms / 100) 1.794 -> 1.797 ( +0.17%) [ +0.00% +0.00% +0.11% / +0.17% +0.50% +0.45%] index_select strided 16 : Elapsed 0.018 ms (1.794 ms / 100) 1.771 -> 1.774 ( +0.17%) [ +0.00% +0.17% +0.11% / +0.17% +0.62% +0.62%] index_select random : Elapsed 0.018 ms (1.771 ms / 100) 1.786 -> 1.787 ( +0.06%) [ +0.06% +0.00% +0.22% / +0.06% +0.95% +0.73%] index_select random_sorted : Elapsed 0.018 ms (1.787 ms / 100) 1.808 -> 1.810 ( +0.11%) [ +0.28% +0.00% +0.00% / +0.11% +0.33% +0.22%] index_select perm : Elapsed 0.018 ms (1.813 ms / 100) 1.802 -> 1.808 ( +0.33%) [ +0.06% +0.00% +0.11% / +0.33% +0.55% +0.72%] index_select perm_sorted : Elapsed 0.018 ms (1.803 ms / 100) B = [4, 16, 5, 40] (stride (640, 1, 2560, 16)) A = [4, 16, 20, 40] (stride (800, 3200, 40, 1)) dim = 2 1.778 -> 1.779 ( +0.06%) [ +0.11% +0.11% +0.00% / +0.06% +0.62% +0.51%] index_select const : Elapsed 0.018 ms (1.780 ms / 100) 1.812 -> 1.817 ( +0.28%) [ +0.44% +0.11% +0.00% / +0.28% +0.99% +0.83%] index_select wrap : Elapsed 0.018 ms (1.820 ms / 100) 1.816 -> 1.813 ( -0.17%) [ +0.00% +0.17% +0.06% / -0.17% +0.44% +0.22%] index_select linear : Elapsed 0.018 ms (1.816 ms / 100) 1.817 -> 1.815 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% +0.28% +0.33%] index_select reverse : Elapsed 0.018 ms (1.817 ms / 100) 1.774 -> 1.775 ( +0.06%) [ +0.39% +0.06% +0.00% / +0.06% +0.62% +0.79%] index_select skip64 : Elapsed 0.018 ms (1.781 ms / 100) 1.777 -> 1.774 ( -0.17%) [ +0.11% +0.23% +0.00% / -0.17% +0.39% +0.62%] index_select skip256 : Elapsed 0.018 ms (1.779 ms / 100) 1.816 -> 1.819 ( +0.17%) [ +0.06% +0.00% +0.28% / +0.33% +0.17% +0.17%] index_select spread : Elapsed 0.018 ms (1.817 ms / 100) 1.817 -> 1.818 ( +0.06%) [ +0.17% +0.06% +0.00% / +0.11% +0.33% +0.06%] index_select strided 3 : Elapsed 0.018 ms (1.820 ms / 100) 1.805 -> 1.805 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.39% +0.28%] index_select strided 5 : Elapsed 0.018 ms (1.805 ms / 100) 1.817 -> 1.819 ( +0.11%) [ +0.22% +0.00% +0.06% / +0.17% +0.28% +0.11%] index_select strided 7 : Elapsed 0.018 ms (1.821 ms / 100) 1.817 -> 1.820 ( +0.17%) [ +0.17% +0.22% +0.00% / +0.17% +0.17% +0.17%] index_select strided 8 : Elapsed 0.018 ms (1.820 ms / 100) 1.818 -> 1.816 ( -0.11%) [ +0.06% +0.00% +0.00% / -0.11% +0.44% +0.11%] index_select strided 16 : Elapsed 0.018 ms (1.819 ms / 100) 1.814 -> 1.819 ( +0.28%) [ +0.00% +0.11% +0.17% / +0.28% +0.44% +0.28%] index_select random : Elapsed 0.018 ms (1.814 ms / 100) 1.816 -> 1.822 ( +0.33%) [ +0.39% +0.22% +0.00% / +0.33% +0.33% +0.39%] index_select random_sorted : Elapsed 0.018 ms (1.823 ms / 100) 1.815 -> 1.820 ( +0.28%) [ +0.22% +0.00% +0.33% / +0.28% +0.33% +0.33%] index_select perm : Elapsed 0.018 ms (1.819 ms / 100) 1.817 -> 1.821 ( +0.22%) [ +0.33% +0.00% +0.00% / +0.28% +0.22% +0.22%] index_select perm_sorted : Elapsed 0.018 ms (1.823 ms / 100) B = [4, 16, 5, 40] (stride (640, 1, 2560, 16)) A = [4, 16, 20, 40] (stride (16, 1, 2560, 64)) dim = 2 1.777 -> 1.776 ( -0.06%) [ +0.11% +0.11% +0.00% / +0.17% -0.06% +0.39%] index_select const : Elapsed 0.018 ms (1.779 ms / 100) 1.775 -> 1.778 ( +0.17%) [ +0.06% +0.28% +0.00% / +0.17% +0.17% +0.45%] index_select wrap : Elapsed 0.018 ms (1.776 ms / 100) 1.772 -> 1.773 ( +0.06%) [ +0.00% +0.06% +0.23% / +0.06% +0.11% +0.17%] index_select linear : Elapsed 0.018 ms (1.772 ms / 100) 1.777 -> 1.779 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +0.45% +0.34%] index_select reverse : Elapsed 0.018 ms (1.779 ms / 100) 1.781 -> 1.781 ( +0.00%) [ +0.00% +0.17% +0.22% / +0.00% +0.28% +0.06%] index_select skip64 : Elapsed 0.018 ms (1.781 ms / 100) 1.771 -> 1.776 ( +0.28%) [ +0.00% +0.23% +0.17% / +0.28% +0.45% +0.34%] index_select skip256 : Elapsed 0.018 ms (1.771 ms / 100) 1.785 -> 1.786 ( +0.06%) [ +0.00% +0.11% +0.22% / +0.06% +0.45% +0.17%] index_select spread : Elapsed 0.018 ms (1.785 ms / 100) 1.779 -> 1.781 ( +0.11%) [ +0.00% +0.11% +0.11% / +0.11% +0.62% +0.34%] index_select strided 3 : Elapsed 0.018 ms (1.779 ms / 100) 1.776 -> 1.778 ( +0.11%) [ +0.17% +0.00% +0.11% / +0.11% +0.51% +0.34%] index_select strided 5 : Elapsed 0.018 ms (1.779 ms / 100) 1.783 -> 1.784 ( +0.06%) [ +0.06% +0.00% +0.11% / +0.06% +0.34% +0.28%] index_select strided 7 : Elapsed 0.018 ms (1.784 ms / 100) 1.789 -> 1.791 ( +0.11%) [ +0.22% +0.11% +0.00% / +0.11% +0.17% +0.17%] index_select strided 8 : Elapsed 0.018 ms (1.793 ms / 100) 1.790 -> 1.789 ( -0.06%) [ +0.11% +0.28% +0.00% / +0.06% -0.06% -0.06%] index_select strided 16 : Elapsed 0.018 ms (1.792 ms / 100) 1.784 -> 1.784 ( +0.00%) [ +0.17% +0.00% +0.17% / +0.00% +0.34% +0.00%] index_select random : Elapsed 0.018 ms (1.787 ms / 100) 1.792 -> 1.792 ( +0.00%) [ +0.28% +0.22% +0.00% / +0.00% +0.11% +0.00%] index_select random_sorted : Elapsed 0.018 ms (1.797 ms / 100) 1.772 -> 1.775 ( +0.17%) [ +0.23% +0.00% +0.00% / +0.17% +0.56% +0.68%] index_select perm : Elapsed 0.018 ms (1.776 ms / 100) 1.773 -> 1.775 ( +0.11%) [ +0.23% +0.00% +0.06% / +0.11% +0.39% +0.34%] index_select perm_sorted : Elapsed 0.018 ms (1.777 ms / 100) B = [4, 16, 5, 40] (stride (1, 160, 2560, 4)) A = [4, 16, 20, 40] (stride (1, 4, 2560, 64)) dim = 2 1.893 -> 1.893 ( +0.00%) [ +0.21% +0.11% +0.00% / +0.00% +0.37% +0.21%] index_select const : Elapsed 0.019 ms (1.897 ms / 100) 1.874 -> 1.870 ( -0.21%) [ +0.00% +0.05% +0.00% / -0.21% +0.32% +0.43%] index_select wrap : Elapsed 0.019 ms (1.874 ms / 100) 1.883 -> 1.883 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.21% +0.16%] index_select linear : Elapsed 0.019 ms (1.883 ms / 100) 1.878 -> 1.881 ( +0.16%) [ +0.00% +0.11% +0.11% / +0.16% +0.32% +0.21%] index_select reverse : Elapsed 0.019 ms (1.878 ms / 100) 1.888 -> 1.894 ( +0.32%) [ +0.21% +0.00% +0.00% / +0.32% +0.42% +0.53%] index_select skip64 : Elapsed 0.019 ms (1.892 ms / 100) 1.885 -> 1.886 ( +0.05%) [ +0.11% +0.00% +0.00% / +0.05% +0.64% +0.64%] index_select skip256 : Elapsed 0.019 ms (1.887 ms / 100) 1.883 -> 1.884 ( +0.05%) [ +0.16% +0.11% +0.00% / +0.05% +0.48% +0.37%] index_select spread : Elapsed 0.019 ms (1.886 ms / 100) 1.882 -> 1.880 ( -0.11%) [ +0.00% +0.05% +0.16% / -0.11% +0.16% +0.16%] index_select strided 3 : Elapsed 0.019 ms (1.882 ms / 100) 1.873 -> 1.876 ( +0.16%) [ +0.16% +0.00% +0.05% / +0.16% +0.48% +0.64%] index_select strided 5 : Elapsed 0.019 ms (1.876 ms / 100) 1.877 -> 1.881 ( +0.21%) [ +0.37% +0.27% +0.00% / +0.21% +0.59% +0.75%] index_select strided 7 : Elapsed 0.019 ms (1.884 ms / 100) 1.878 -> 1.878 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.43% +0.64%] index_select strided 8 : Elapsed 0.019 ms (1.879 ms / 100) 1.873 -> 1.875 ( +0.11%) [ +0.21% +0.05% +0.00% / +0.11% +0.53% +0.69%] index_select strided 16 : Elapsed 0.019 ms (1.877 ms / 100) 1.872 -> 1.872 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.11% +0.00% +0.16%] index_select random : Elapsed 0.019 ms (1.872 ms / 100) 1.884 -> 1.882 ( -0.11%) [ +0.11% +0.00% +0.32% / -0.11% +0.58% +0.48%] index_select random_sorted : Elapsed 0.019 ms (1.886 ms / 100) 1.878 -> 1.882 ( +0.21%) [ +0.00% +0.21% +0.16% / +0.21% +0.69% +0.59%] index_select perm : Elapsed 0.019 ms (1.878 ms / 100) 1.891 -> 1.890 ( -0.05%) [ +0.00% +0.05% +0.11% / +0.32% +0.16% -0.05%] index_select perm_sorted : Elapsed 0.019 ms (1.891 ms / 100) B = [4, 16, 5, 40] (stride (1, 4, 2560, 64)) A = [4, 16, 20, 40] (stride (320, 1, 16, 1280)) dim = 2 1.917 -> 1.918 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.05% +0.68% +0.63%] index_select const : Elapsed 0.019 ms (1.919 ms / 100) 1.915 -> 1.916 ( +0.05%) [ +0.00% +0.16% +0.05% / +0.05% +0.63% +0.63%] index_select wrap : Elapsed 0.019 ms (1.915 ms / 100) 1.917 -> 1.920 ( +0.16%) [ +0.00% +0.21% +0.00% / +0.16% +0.68% +0.52%] index_select linear : Elapsed 0.019 ms (1.917 ms / 100) 1.922 -> 1.925 ( +0.16%) [ +0.16% +0.36% +0.00% / +0.16% +0.68% +0.73%] index_select reverse : Elapsed 0.019 ms (1.925 ms / 100) 1.917 -> 1.915 ( -0.10%) [ +0.42% +0.16% +0.00% / -0.10% +0.68% +0.68%] index_select skip64 : Elapsed 0.019 ms (1.925 ms / 100) 1.914 -> 1.921 ( +0.37%) [ +0.16% +0.00% +0.16% / +0.37% +0.68% +0.68%] index_select skip256 : Elapsed 0.019 ms (1.917 ms / 100) 1.913 -> 1.913 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.42% +0.47%] index_select spread : Elapsed 0.019 ms (1.914 ms / 100) 1.913 -> 1.915 ( +0.10%) [ +0.21% +0.10% +0.00% / +0.10% +0.42% +0.58%] index_select strided 3 : Elapsed 0.019 ms (1.917 ms / 100) 1.917 -> 1.922 ( +0.26%) [ +0.16% +0.21% +0.00% / +0.26% +0.57% +0.52%] index_select strided 5 : Elapsed 0.019 ms (1.920 ms / 100) 1.910 -> 1.914 ( +0.21%) [ +0.00% +0.05% +0.10% / +0.21% +0.58% +0.58%] index_select strided 7 : Elapsed 0.019 ms (1.910 ms / 100) 1.918 -> 1.925 ( +0.36%) [ +0.10% +0.10% +0.00% / +0.36% +0.47% +0.36%] index_select strided 8 : Elapsed 0.019 ms (1.920 ms / 100) 1.908 -> 1.917 ( +0.47%) [ +0.00% +0.16% +0.10% / +0.52% +0.58% +0.47%] index_select strided 16 : Elapsed 0.019 ms (1.908 ms / 100) 1.925 -> 1.930 ( +0.26%) [ +0.00% +0.21% +0.26% / +0.26% +0.52% +0.57%] index_select random : Elapsed 0.019 ms (1.925 ms / 100) 1.920 -> 1.921 ( +0.05%) [ +0.00% +0.10% +0.10% / +0.05% +0.21% +0.47%] index_select random_sorted : Elapsed 0.019 ms (1.920 ms / 100) 1.915 -> 1.919 ( +0.21%) [ +0.31% +0.21% +0.00% / +0.21% +0.84% +0.68%] index_select perm : Elapsed 0.019 ms (1.921 ms / 100) 1.916 -> 1.920 ( +0.21%) [ +0.05% +0.05% +0.00% / +0.21% +0.57% +0.63%] index_select perm_sorted : Elapsed 0.019 ms (1.917 ms / 100) B = [4, 16, 5, 40] (stride (1, 20, 4, 320)) A = [4, 16, 20, 40] (stride (640, 1, 2560, 16)) dim = 2 1.816 -> 1.816 ( +0.00%) [ +0.22% +0.06% +0.00% / +0.17% +0.00% +0.00%] index_select const : Elapsed 0.018 ms (1.820 ms / 100) 1.812 -> 1.813 ( +0.06%) [ +0.17% +0.00% +0.22% / +0.06% +0.11% +0.39%] index_select wrap : Elapsed 0.018 ms (1.815 ms / 100) 1.815 -> 1.813 ( -0.11%) [ +0.06% +0.00% +0.00% / -0.11% +0.44% +0.44%] index_select linear : Elapsed 0.018 ms (1.816 ms / 100) 1.819 -> 1.815 ( -0.22%) [ +0.00% +0.05% +0.11% / -0.22% +0.44% +0.27%] index_select reverse : Elapsed 0.018 ms (1.819 ms / 100) 1.820 -> 1.818 ( -0.11%) [ +0.00% +0.11% +0.16% / +0.05% -0.11% +0.16%] index_select skip64 : Elapsed 0.018 ms (1.820 ms / 100) 1.816 -> 1.815 ( -0.06%) [ +0.28% +0.00% +0.00% / +0.06% +0.28% -0.06%] index_select skip256 : Elapsed 0.018 ms (1.821 ms / 100) 1.814 -> 1.815 ( +0.06%) [ +0.00% +0.11% +0.00% / +0.06% +0.66% +0.28%] index_select spread : Elapsed 0.018 ms (1.814 ms / 100) 1.825 -> 1.823 ( -0.11%) [ +0.00% +0.05% +0.11% / -0.11% -0.05% -0.05%] index_select strided 3 : Elapsed 0.018 ms (1.825 ms / 100) 1.817 -> 1.818 ( +0.06%) [ +0.28% +0.22% +0.00% / +0.06% +0.55% +0.28%] index_select strided 5 : Elapsed 0.018 ms (1.822 ms / 100) 1.814 -> 1.816 ( +0.11%) [ +0.06% +0.00% +0.11% / +0.11% +0.77% +0.39%] index_select strided 7 : Elapsed 0.018 ms (1.815 ms / 100) 1.817 -> 1.817 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.50% +0.17%] index_select strided 8 : Elapsed 0.018 ms (1.817 ms / 100) 1.817 -> 1.819 ( +0.11%) [ +0.00% +0.17% +0.06% / +0.11% +0.28% +0.39%] index_select strided 16 : Elapsed 0.018 ms (1.817 ms / 100) 1.822 -> 1.824 ( +0.11%) [ +0.00% +0.16% +0.49% / +0.11% +0.38% +0.16%] index_select random : Elapsed 0.018 ms (1.822 ms / 100) 1.819 -> 1.821 ( +0.11%) [ +0.00% +0.00% +0.05% / +0.11% +0.22% +0.11%] index_select random_sorted : Elapsed 0.018 ms (1.819 ms / 100) 1.810 -> 1.811 ( +0.06%) [ +0.22% +0.00% +0.06% / +0.06% +0.50% +0.55%] index_select perm : Elapsed 0.018 ms (1.814 ms / 100) 1.817 -> 1.817 ( +0.00%) [ +0.22% +0.06% +0.00% / +0.00% +0.50% +0.33%] index_select perm_sorted : Elapsed 0.018 ms (1.821 ms / 100) out_shape = [4, 16, 20, 5] in_shape = [4, 16, 20, 40] idx_dim = 3 B = [4, 16, 20, 5] (stride (1600, 5, 80, 1)) A = [4, 16, 20, 40] (stride (12800, 800, 40, 1)) dim = 3 1.396 -> 1.399 ( +0.21%) [ +0.07% +0.00% +0.00% / +0.21% +0.36% +0.29%] index_select const : Elapsed 0.014 ms (1.397 ms / 100) 1.393 -> 1.396 ( +0.22%) [ +0.22% +0.14% +0.00% / +0.22% +0.57% +0.36%] index_select wrap : Elapsed 0.014 ms (1.396 ms / 100) 1.396 -> 1.400 ( +0.29%) [ +0.00% +0.21% +0.21% / +0.29% +0.64% +0.64%] index_select linear : Elapsed 0.014 ms (1.396 ms / 100) 1.396 -> 1.399 ( +0.21%) [ +0.14% +0.00% +0.07% / +0.21% +0.57% +0.86%] index_select reverse : Elapsed 0.014 ms (1.398 ms / 100) 1.396 -> 1.397 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.36% +0.29%] index_select skip64 : Elapsed 0.014 ms (1.396 ms / 100) 1.395 -> 1.396 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.57% +0.57%] index_select skip256 : Elapsed 0.014 ms (1.396 ms / 100) 1.392 -> 1.393 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.43% +0.36%] index_select spread : Elapsed 0.014 ms (1.393 ms / 100) 1.376 -> 1.381 ( +0.36%) [ +0.58% +0.44% +0.00% / +0.36% +0.65% +0.65%] index_select strided 3 : Elapsed 0.014 ms (1.384 ms / 100) 1.356 -> 1.357 ( +0.07%) [ +0.22% +0.00% +0.00% / +0.07% +0.74% +0.59%] index_select strided 5 : Elapsed 0.014 ms (1.359 ms / 100) 1.391 -> 1.392 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.58% +0.50%] index_select strided 7 : Elapsed 0.014 ms (1.393 ms / 100) 1.397 -> 1.399 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.50% +0.50%] index_select strided 8 : Elapsed 0.014 ms (1.398 ms / 100) 1.354 -> 1.356 ( +0.15%) [ +0.22% +0.07% +0.00% / +0.15% +0.59% +0.89%] index_select strided 16 : Elapsed 0.014 ms (1.357 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.65% +0.65%] index_select random : Elapsed 0.014 ms (1.385 ms / 100) 1.392 -> 1.393 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.93% +0.65%] index_select random_sorted : Elapsed 0.014 ms (1.394 ms / 100) 1.396 -> 1.397 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.86% +0.72%] index_select perm : Elapsed 0.014 ms (1.398 ms / 100) 1.357 -> 1.358 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.59% +0.66%] index_select perm_sorted : Elapsed 0.014 ms (1.359 ms / 100) B = [4, 16, 20, 5] (stride (1600, 1, 80, 16)) A = [4, 16, 20, 40] (stride (12800, 20, 1, 320)) dim = 3 1.374 -> 1.377 ( +0.22%) [ +0.22% +0.15% +0.00% / +0.22% +0.73% +0.73%] index_select const : Elapsed 0.014 ms (1.377 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +1.09% +0.73%] index_select wrap : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.58% +0.65%] index_select linear : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.80% +0.73%] index_select reverse : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.66% +0.66%] index_select skip64 : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.80% +0.73%] index_select skip256 : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.87% +0.80%] index_select spread : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.80% +0.66%] index_select strided 3 : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.65% +0.73%] index_select strided 5 : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +1.02% +0.58%] index_select strided 7 : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.80% +0.73%] index_select strided 8 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.65% +0.95%] index_select strided 16 : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.73% +0.80%] index_select random : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.73% +0.73%] index_select random_sorted : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.65% +0.65%] index_select perm : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.73% +0.73%] index_select perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) B = [4, 16, 20, 5] (stride (1600, 20, 1, 320)) A = [4, 16, 20, 40] (stride (320, 20, 1, 1280)) dim = 3 1.180 -> 1.182 ( +0.17%) [ +0.42% +0.00% +0.59% / +0.17% +0.68% +0.68%] index_select const : Elapsed 0.012 ms (1.185 ms / 100) 1.183 -> 1.186 ( +0.25%) [ +0.34% +0.00% +0.00% / +0.25% +0.51% +0.42%] index_select wrap : Elapsed 0.012 ms (1.187 ms / 100) 1.181 -> 1.188 ( +0.59%) [ +0.51% +0.00% +0.17% / +0.68% +0.59% +0.68%] index_select linear : Elapsed 0.012 ms (1.187 ms / 100) 1.183 -> 1.182 ( -0.08%) [ +0.17% +0.17% +0.00% / -0.08% +0.59% +0.25%] index_select reverse : Elapsed 0.012 ms (1.185 ms / 100) 1.180 -> 1.179 ( -0.08%) [ +0.59% +0.34% +0.00% / -0.08% +0.51% +0.68%] index_select skip64 : Elapsed 0.012 ms (1.187 ms / 100) 1.181 -> 1.183 ( +0.17%) [ +0.51% +0.34% +0.00% / +0.17% +0.34% +0.42%] index_select skip256 : Elapsed 0.012 ms (1.187 ms / 100) 1.181 -> 1.186 ( +0.42%) [ +0.59% +0.00% +0.34% / +0.42% +0.59% +0.59%] index_select spread : Elapsed 0.012 ms (1.188 ms / 100) 1.182 -> 1.182 ( +0.00%) [ +0.17% +0.00% +0.25% / +0.00% +0.51% +0.59%] index_select strided 3 : Elapsed 0.012 ms (1.184 ms / 100) 1.179 -> 1.185 ( +0.51%) [ +0.68% +0.00% +0.42% / +0.51% +0.76% +0.76%] index_select strided 5 : Elapsed 0.012 ms (1.187 ms / 100) 1.182 -> 1.183 ( +0.08%) [ +0.51% +0.00% +0.17% / +0.08% +0.42% +0.59%] index_select strided 7 : Elapsed 0.012 ms (1.188 ms / 100) 1.182 -> 1.187 ( +0.42%) [ +0.51% +0.17% +0.00% / +0.51% +0.42% +0.59%] index_select strided 8 : Elapsed 0.012 ms (1.188 ms / 100) 1.180 -> 1.182 ( +0.17%) [ +0.42% +0.00% +0.42% / +0.17% +0.68% +0.68%] index_select strided 16 : Elapsed 0.012 ms (1.185 ms / 100) 1.179 -> 1.186 ( +0.59%) [ +0.68% +0.00% +0.17% / +0.59% +0.68% +0.76%] index_select random : Elapsed 0.012 ms (1.187 ms / 100) 1.183 -> 1.186 ( +0.25%) [ +0.08% +0.00% +0.25% / +0.25% +0.59% +1.01%] index_select random_sorted : Elapsed 0.012 ms (1.184 ms / 100) 1.182 -> 1.183 ( +0.08%) [ +0.34% +0.17% +0.00% / +0.08% +0.51% +0.59%] index_select perm : Elapsed 0.012 ms (1.186 ms / 100) 1.179 -> 1.181 ( +0.17%) [ +0.68% +0.25% +0.00% / +0.17% +0.85% +0.59%] index_select perm_sorted : Elapsed 0.012 ms (1.187 ms / 100) B = [4, 16, 20, 5] (stride (100, 400, 5, 1)) A = [4, 16, 20, 40] (stride (12800, 20, 1, 320)) dim = 3 1.376 -> 1.378 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.58% +0.51%] index_select const : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.73% +0.65%] index_select wrap : Elapsed 0.014 ms (1.375 ms / 100) 1.376 -> 1.375 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.58% +0.58%] index_select linear : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +0.73% +0.80%] index_select reverse : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.73% +0.65%] index_select skip64 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.374 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.80% +0.65%] index_select skip256 : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.73% +0.80%] index_select spread : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.73% +0.73%] index_select strided 3 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.65% +0.65%] index_select strided 5 : Elapsed 0.014 ms (1.376 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.22% +0.00% +0.15% / +0.07% +0.80% +0.87%] index_select strided 7 : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.36% +0.00% +0.00% / +0.00% +0.66% +0.73%] index_select strided 8 : Elapsed 0.014 ms (1.379 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.80% +0.73%] index_select strided 16 : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.374 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.65% +0.73%] index_select random : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.87% +0.73%] index_select random_sorted : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.80% +0.73%] index_select perm : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.73% +0.73%] index_select perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) B = [4, 16, 20, 5] (stride (80, 1, 320, 16)) A = [4, 16, 20, 40] (stride (12800, 1, 16, 320)) dim = 3 1.487 -> 1.488 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.34%] index_select const : Elapsed 0.015 ms (1.488 ms / 100) 1.487 -> 1.489 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.47% +0.40%] index_select wrap : Elapsed 0.015 ms (1.488 ms / 100) 1.487 -> 1.487 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.47% +0.61%] index_select linear : Elapsed 0.015 ms (1.488 ms / 100) 1.486 -> 1.486 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.47% +0.54%] index_select reverse : Elapsed 0.015 ms (1.487 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.54% +0.47%] index_select skip64 : Elapsed 0.015 ms (1.487 ms / 100) 1.487 -> 1.488 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.47% +0.40%] index_select skip256 : Elapsed 0.015 ms (1.487 ms / 100) 1.487 -> 1.486 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.47% +0.40%] index_select spread : Elapsed 0.015 ms (1.487 ms / 100) 1.487 -> 1.487 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.40% +0.40%] index_select strided 3 : Elapsed 0.015 ms (1.488 ms / 100) 1.487 -> 1.488 ( +0.07%) [ +0.13% +0.00% +0.00% / +0.07% +0.54% +0.40%] index_select strided 5 : Elapsed 0.015 ms (1.489 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.487 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.61%] index_select strided 8 : Elapsed 0.015 ms (1.487 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.54% +0.54%] index_select strided 16 : Elapsed 0.015 ms (1.487 ms / 100) 1.485 -> 1.486 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.74% +0.61%] index_select random : Elapsed 0.015 ms (1.487 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.81% +0.67%] index_select random_sorted : Elapsed 0.015 ms (1.487 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.61% +0.54%] index_select perm : Elapsed 0.015 ms (1.487 ms / 100) 1.487 -> 1.489 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.61% +0.67%] index_select perm_sorted : Elapsed 0.015 ms (1.489 ms / 100) B = [4, 16, 20, 5] (stride (1, 4, 64, 1280)) A = [4, 16, 20, 40] (stride (12800, 1, 640, 16)) dim = 3 1.590 -> 1.592 ( +0.13%) [ +0.06% +0.06% +0.00% / +0.13% +0.94% +0.94%] index_select const : Elapsed 0.016 ms (1.591 ms / 100) 1.575 -> 1.575 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.13% +0.00% +0.19%] index_select wrap : Elapsed 0.016 ms (1.575 ms / 100) 1.576 -> 1.575 ( -0.06%) [ +0.25% +0.13% +0.00% / +0.19% -0.06% +0.51%] index_select linear : Elapsed 0.016 ms (1.580 ms / 100) 1.574 -> 1.576 ( +0.13%) [ +0.25% +0.38% +0.00% / +0.38% +0.38% +0.13%] index_select reverse : Elapsed 0.016 ms (1.578 ms / 100) 1.578 -> 1.579 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.38% +0.51%] index_select skip64 : Elapsed 0.016 ms (1.579 ms / 100) 1.589 -> 1.590 ( +0.06%) [ +0.13% +0.06% +0.00% / +0.06% +1.07% +1.01%] index_select skip256 : Elapsed 0.016 ms (1.591 ms / 100) 1.575 -> 1.575 ( +0.00%) [ +0.32% +0.19% +0.00% / +0.00% +0.70% +0.57%] index_select spread : Elapsed 0.016 ms (1.580 ms / 100) 1.572 -> 1.572 ( +0.00%) [ +0.45% +0.00% +0.19% / +0.00% +0.19% +0.19%] index_select strided 3 : Elapsed 0.016 ms (1.579 ms / 100) 1.554 -> 1.556 ( +0.13%) [ +0.19% +0.19% +0.00% / +0.13% +0.90% +1.54%] index_select strided 5 : Elapsed 0.016 ms (1.557 ms / 100) 1.575 -> 1.576 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.19% +0.06% +0.13%] index_select strided 7 : Elapsed 0.016 ms (1.576 ms / 100) 1.578 -> 1.574 ( -0.25%) [ +0.13% +0.00% +0.00% / +0.19% +0.25% -0.25%] index_select strided 8 : Elapsed 0.016 ms (1.580 ms / 100) 1.573 -> 1.576 ( +0.19%) [ +0.00% +0.19% +0.13% / +0.19% +0.32% +0.38%] index_select strided 16 : Elapsed 0.016 ms (1.573 ms / 100) 1.568 -> 1.578 ( +0.64%) [ +0.70% +0.00% +0.51% / +0.77% +0.64% +0.70%] index_select random : Elapsed 0.016 ms (1.579 ms / 100) 1.548 -> 1.549 ( +0.06%) [ +0.00% +0.13% +0.39% / +0.06% +0.78% +1.61%] index_select random_sorted : Elapsed 0.015 ms (1.548 ms / 100) 1.565 -> 1.567 ( +0.13%) [ +0.89% +0.06% +0.00% / +0.13% +0.89% +0.77%] index_select perm : Elapsed 0.016 ms (1.579 ms / 100) 1.566 -> 1.573 ( +0.45%) [ +0.57% +0.00% +0.64% / +0.51% +0.57% +0.45%] index_select perm_sorted : Elapsed 0.016 ms (1.575 ms / 100) out_shape = [5, 16, 40, 20] in_shape = [4, 16, 40, 20] idx_dim = 0 B = [5, 16, 40, 20] (stride (12800, 1, 16, 640)) A = [4, 16, 40, 20] (stride (12800, 800, 20, 1)) dim = 0 5.315 -> 5.306 ( -0.17%) [ +0.23% +0.00% +0.23% / +0.23% -0.17% +0.00%] index_add_ linear : Elapsed 0.053 ms (5.327 ms / 100) 5.276 -> 5.272 ( -0.08%) [ +0.00% +0.04% +0.00% / +0.06% -0.08% -0.02%] index_copy_ linear : Elapsed 0.053 ms (5.276 ms / 100) 5.304 -> 5.306 ( +0.04%) [ +0.15% +0.00% +0.02% / +0.17% +0.11% +0.04%] index_add_ reverse : Elapsed 0.053 ms (5.312 ms / 100) 5.271 -> 5.264 ( -0.13%) [ +0.11% +0.00% +0.17% / +0.08% -0.08% -0.13%] index_copy_ reverse : Elapsed 0.053 ms (5.277 ms / 100) 5.318 -> 5.313 ( -0.09%) [ +0.13% +0.00% +0.13% / -0.02% -0.09% -0.08%] index_add_ spread : Elapsed 0.053 ms (5.325 ms / 100) 5.271 -> 5.263 ( -0.15%) [ +0.06% +0.04% +0.00% / +0.11% +0.09% -0.15%] index_copy_ spread : Elapsed 0.053 ms (5.274 ms / 100) 5.308 -> 5.305 ( -0.06%) [ +0.13% +0.13% +0.00% / -0.06% -0.02% -0.04%] index_add_ strided 3 : Elapsed 0.053 ms (5.315 ms / 100) 5.265 -> 5.260 ( -0.09%) [ +0.08% +0.00% +0.28% / +0.17% -0.09% +0.02%] index_copy_ strided 3 : Elapsed 0.053 ms (5.269 ms / 100) 5.301 -> 5.293 ( -0.15%) [ +0.00% +0.08% +0.11% / +0.04% -0.15% +0.08%] index_add_ perm : Elapsed 0.053 ms (5.301 ms / 100) 5.269 -> 5.263 ( -0.11%) [ +0.09% +0.00% +0.04% / +0.02% +0.09% -0.11%] index_copy_ perm : Elapsed 0.053 ms (5.274 ms / 100) 5.321 -> 5.307 ( -0.26%) [ +0.06% +0.00% +0.08% / -0.06% -0.26% -0.21%] index_add_ perm_sorted : Elapsed 0.053 ms (5.324 ms / 100) 5.275 -> 5.266 ( -0.17%) [ +0.06% +0.02% +0.00% / +0.00% -0.17% -0.11%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.278 ms / 100) 5.438 -> 5.436 ( -0.04%) [ +0.04% +0.00% +0.26% / -0.04% +0.13% +0.13%] index_select const : Elapsed 0.054 ms (5.440 ms / 100) 5.540 -> 5.527 ( -0.23%) [ +0.00% +0.02% +0.02% / +0.13% -0.11% -0.23%] index_select wrap : Elapsed 0.055 ms (5.540 ms / 100) 5.533 -> 5.521 ( -0.22%) [ +0.00% +0.09% +0.09% / +0.13% -0.02% -0.22%] index_select linear : Elapsed 0.055 ms (5.533 ms / 100) 5.531 -> 5.524 ( -0.13%) [ +0.11% +0.00% +0.09% / +0.09% +0.00% -0.13%] index_select reverse : Elapsed 0.055 ms (5.537 ms / 100) 5.437 -> 5.438 ( +0.02%) [ +0.00% +0.11% +0.02% / +0.02% +0.15% +0.09%] index_select skip64 : Elapsed 0.054 ms (5.437 ms / 100) 5.440 -> 5.443 ( +0.06%) [ +0.20% +0.00% +0.15% / +0.06% +0.09% +0.28%] index_select skip256 : Elapsed 0.055 ms (5.451 ms / 100) 5.531 -> 5.528 ( -0.05%) [ +0.00% +0.02% +0.07% / +0.07% -0.05% -0.04%] index_select spread : Elapsed 0.055 ms (5.531 ms / 100) 5.531 -> 5.521 ( -0.18%) [ +0.14% +0.00% +0.04% / +0.13% -0.18% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.539 ms / 100) 5.507 -> 5.492 ( -0.27%) [ +0.02% +0.11% +0.00% / +0.04% -0.27% -0.25%] index_select random : Elapsed 0.055 ms (5.508 ms / 100) 5.513 -> 5.499 ( -0.25%) [ +0.02% +0.11% +0.00% / +0.09% -0.25% -0.22%] index_select random_sorted : Elapsed 0.055 ms (5.514 ms / 100) B = [5, 16, 40, 20] (stride (800, 4000, 1, 40)) A = [4, 16, 40, 20] (stride (12800, 1, 16, 640)) dim = 0 5.908 -> 5.923 ( +0.25%) [ +0.08% +0.00% +0.20% / +0.25% +0.46% +0.49%] index_add_ linear : Elapsed 0.059 ms (5.913 ms / 100) 5.794 -> 5.799 ( +0.09%) [ +0.10% +0.00% +0.05% / +0.09% +0.09% +0.10%] index_copy_ linear : Elapsed 0.058 ms (5.800 ms / 100) 5.884 -> 5.895 ( +0.19%) [ +0.00% +0.02% +0.07% / +0.19% +0.44% +0.36%] index_add_ reverse : Elapsed 0.059 ms (5.884 ms / 100) 5.782 -> 5.783 ( +0.02%) [ +0.00% +0.07% +0.05% / +0.02% +0.36% +0.36%] index_copy_ reverse : Elapsed 0.058 ms (5.782 ms / 100) 5.904 -> 5.917 ( +0.22%) [ +0.05% +0.17% +0.00% / +0.22% +0.42% +0.44%] index_add_ spread : Elapsed 0.059 ms (5.907 ms / 100) 5.791 -> 5.798 ( +0.12%) [ +0.02% +0.00% +0.12% / +0.12% +0.21% +0.14%] index_copy_ spread : Elapsed 0.058 ms (5.792 ms / 100) 5.878 -> 5.872 ( -0.10%) [ +0.05% +0.00% +0.09% / +0.09% -0.10% -0.09%] index_add_ strided 3 : Elapsed 0.059 ms (5.881 ms / 100) 5.769 -> 5.768 ( -0.02%) [ +0.16% +0.00% +0.17% / +0.19% +0.10% -0.02%] index_copy_ strided 3 : Elapsed 0.058 ms (5.778 ms / 100) 5.859 -> 5.854 ( -0.09%) [ +0.05% +0.00% +0.00% / -0.09% +0.36% +0.26%] index_add_ perm : Elapsed 0.059 ms (5.862 ms / 100) 5.786 -> 5.784 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.35% +0.21%] index_copy_ perm : Elapsed 0.058 ms (5.788 ms / 100) 5.852 -> 5.851 ( -0.02%) [ +0.05% +0.00% +0.07% / -0.02% +0.48% +0.34%] index_add_ perm_sorted : Elapsed 0.059 ms (5.855 ms / 100) 5.783 -> 5.786 ( +0.05%) [ +0.12% +0.02% +0.00% / +0.05% +0.26% +0.16%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.790 ms / 100) 6.135 -> 6.118 ( -0.28%) [ +0.00% +0.07% +0.16% / +0.00% -0.11% -0.28%] index_select const : Elapsed 0.061 ms (6.135 ms / 100) 6.170 -> 6.184 ( +0.23%) [ +0.29% +0.00% +0.29% / +0.31% +0.34% +0.23%] index_select wrap : Elapsed 0.062 ms (6.188 ms / 100) 6.169 -> 6.164 ( -0.08%) [ +0.00% +0.05% +0.11% / -0.08% +0.18% +0.11%] index_select linear : Elapsed 0.062 ms (6.169 ms / 100) 6.171 -> 6.186 ( +0.24%) [ +0.15% +0.00% +0.13% / +0.24% +0.32% +0.24%] index_select reverse : Elapsed 0.062 ms (6.180 ms / 100) 6.137 -> 6.116 ( -0.34%) [ +0.00% +0.00% +0.13% / -0.02% -0.05% -0.34%] index_select skip64 : Elapsed 0.061 ms (6.137 ms / 100) 6.138 -> 6.120 ( -0.29%) [ +0.00% +0.03% +0.10% / +0.02% -0.29% -0.24%] index_select skip256 : Elapsed 0.061 ms (6.138 ms / 100) 6.166 -> 6.172 ( +0.10%) [ +0.06% +0.00% +0.16% / +0.19% +0.21% +0.10%] index_select spread : Elapsed 0.062 ms (6.170 ms / 100) 6.185 -> 6.184 ( -0.02%) [ +0.02% +0.08% +0.00% / -0.02% +0.06% +0.05%] index_select strided 3 : Elapsed 0.062 ms (6.186 ms / 100) 6.151 -> 6.153 ( +0.03%) [ +0.07% +0.00% +0.13% / +0.10% +0.07% +0.03%] index_select random : Elapsed 0.062 ms (6.155 ms / 100) 6.149 -> 6.141 ( -0.13%) [ +0.21% +0.00% +0.13% / +0.29% -0.05% -0.13%] index_select random_sorted : Elapsed 0.062 ms (6.162 ms / 100) B = [5, 16, 40, 20] (stride (20, 4000, 100, 1)) A = [4, 16, 40, 20] (stride (320, 20, 1280, 1)) dim = 0 5.652 -> 5.640 ( -0.21%) [ +0.00% +0.11% +0.16% / +0.21% -0.19% -0.21%] index_add_ linear : Elapsed 0.057 ms (5.652 ms / 100) 5.604 -> 5.587 ( -0.30%) [ +0.14% +0.00% +0.16% / +0.23% -0.30% -0.21%] index_copy_ linear : Elapsed 0.056 ms (5.612 ms / 100) 5.653 -> 5.645 ( -0.14%) [ +0.11% +0.00% +0.19% / +0.07% +0.11% -0.14%] index_add_ reverse : Elapsed 0.057 ms (5.659 ms / 100) 5.596 -> 5.592 ( -0.07%) [ +0.00% +0.14% +0.23% / +0.16% -0.02% -0.07%] index_copy_ reverse : Elapsed 0.056 ms (5.596 ms / 100) 5.655 -> 5.635 ( -0.35%) [ +0.14% +0.00% +0.04% / +0.09% -0.18% -0.35%] index_add_ spread : Elapsed 0.057 ms (5.663 ms / 100) 5.601 -> 5.589 ( -0.21%) [ +0.00% +0.00% +0.27% / +0.00% -0.14% -0.21%] index_copy_ spread : Elapsed 0.056 ms (5.601 ms / 100) 5.646 -> 5.645 ( -0.02%) [ +0.00% +0.00% +0.04% / +0.09% +0.04% -0.02%] index_add_ strided 3 : Elapsed 0.056 ms (5.646 ms / 100) 5.585 -> 5.598 ( +0.23%) [ +0.00% +0.27% +0.30% / +0.34% +0.23% +0.48%] index_copy_ strided 3 : Elapsed 0.056 ms (5.585 ms / 100) 5.641 -> 5.638 ( -0.05%) [ +0.00% +0.20% +0.00% / +0.12% +0.09% -0.05%] index_add_ perm : Elapsed 0.056 ms (5.641 ms / 100) 5.584 -> 5.586 ( +0.04%) [ +0.00% +0.14% +0.20% / +0.13% +0.05% +0.04%] index_copy_ perm : Elapsed 0.056 ms (5.584 ms / 100) 5.647 -> 5.636 ( -0.19%) [ +0.07% +0.00% +0.07% / +0.18% -0.16% -0.19%] index_add_ perm_sorted : Elapsed 0.057 ms (5.651 ms / 100) 5.601 -> 5.589 ( -0.21%) [ +0.05% +0.00% +0.02% / +0.02% -0.18% -0.21%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.604 ms / 100) 5.774 -> 5.780 ( +0.10%) [ +0.10% +0.00% +0.07% / +0.10% +0.19% +0.28%] index_select const : Elapsed 0.058 ms (5.780 ms / 100) 5.908 -> 5.905 ( -0.05%) [ +0.07% +0.00% +0.20% / +0.15% -0.05% +0.08%] index_select wrap : Elapsed 0.059 ms (5.912 ms / 100) 5.895 -> 5.884 ( -0.19%) [ +0.19% +0.00% +0.02% / +0.20% -0.19% -0.19%] index_select linear : Elapsed 0.059 ms (5.906 ms / 100) 5.895 -> 5.892 ( -0.05%) [ +0.00% +0.03% +0.02% / -0.03% -0.05% +0.24%] index_select reverse : Elapsed 0.059 ms (5.895 ms / 100) 5.760 -> 5.771 ( +0.19%) [ +0.09% +0.00% +0.24% / +0.35% +0.19% +0.26%] index_select skip64 : Elapsed 0.058 ms (5.765 ms / 100) 5.769 -> 5.776 ( +0.12%) [ +0.10% +0.00% +0.19% / +0.12% +0.28% +0.28%] index_select skip256 : Elapsed 0.058 ms (5.775 ms / 100) 5.885 -> 5.877 ( -0.14%) [ +0.00% +0.12% +0.19% / +0.22% -0.14% -0.03%] index_select spread : Elapsed 0.059 ms (5.885 ms / 100) 5.899 -> 5.890 ( -0.15%) [ +0.00% +0.14% +0.03% / +0.05% -0.14% -0.15%] index_select strided 3 : Elapsed 0.059 ms (5.899 ms / 100) 5.901 -> 5.892 ( -0.15%) [ +0.02% +0.10% +0.00% / +0.03% -0.15% -0.07%] index_select random : Elapsed 0.059 ms (5.902 ms / 100) 5.903 -> 5.887 ( -0.27%) [ +0.02% +0.00% +0.07% / +0.03% -0.27% -0.25%] index_select random_sorted : Elapsed 0.059 ms (5.904 ms / 100) B = [5, 16, 40, 20] (stride (1, 4000, 5, 200)) A = [4, 16, 40, 20] (stride (800, 3200, 20, 1)) dim = 0 5.699 -> 5.705 ( +0.11%) [ +0.05% +0.00% +0.04% / +0.11% +0.53% +0.46%] index_add_ linear : Elapsed 0.057 ms (5.702 ms / 100) 5.673 -> 5.678 ( +0.09%) [ +0.00% +0.05% +0.12% / +0.09% +0.23% +0.37%] index_copy_ linear : Elapsed 0.057 ms (5.673 ms / 100) 5.705 -> 5.707 ( +0.04%) [ +0.07% +0.00% +0.07% / +0.04% +0.47% +0.37%] index_add_ reverse : Elapsed 0.057 ms (5.709 ms / 100) 5.678 -> 5.681 ( +0.05%) [ +0.02% +0.00% +0.02% / +0.05% +0.26% +0.30%] index_copy_ reverse : Elapsed 0.057 ms (5.679 ms / 100) 5.702 -> 5.712 ( +0.18%) [ +0.07% +0.04% +0.00% / +0.18% +0.67% +0.65%] index_add_ spread : Elapsed 0.057 ms (5.706 ms / 100) 5.682 -> 5.685 ( +0.05%) [ +0.00% +0.11% +0.00% / +0.05% +0.28% +0.33%] index_copy_ spread : Elapsed 0.057 ms (5.682 ms / 100) 5.699 -> 5.705 ( +0.11%) [ +0.00% +0.05% +0.05% / +0.11% +0.53% +0.49%] index_add_ strided 3 : Elapsed 0.057 ms (5.699 ms / 100) 5.676 -> 5.680 ( +0.07%) [ +0.04% +0.02% +0.00% / +0.07% +0.26% +0.39%] index_copy_ strided 3 : Elapsed 0.057 ms (5.678 ms / 100) 5.712 -> 5.713 ( +0.02%) [ +0.00% +0.11% +0.02% / +0.02% +0.39% +0.39%] index_add_ perm : Elapsed 0.057 ms (5.712 ms / 100) 5.687 -> 5.692 ( +0.09%) [ +0.00% +0.18% +0.26% / +0.09% +0.28% +0.19%] index_copy_ perm : Elapsed 0.057 ms (5.687 ms / 100) 5.700 -> 5.712 ( +0.21%) [ +0.00% +0.04% +0.19% / +0.21% +0.56% +0.54%] index_add_ perm_sorted : Elapsed 0.057 ms (5.700 ms / 100) 5.679 -> 5.689 ( +0.18%) [ +0.09% +0.00% +0.09% / +0.18% +0.39% +0.32%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.684 ms / 100) 5.803 -> 5.812 ( +0.16%) [ +0.00% +0.03% +0.09% / +0.16% +0.40% +0.38%] index_select const : Elapsed 0.058 ms (5.803 ms / 100) 5.886 -> 5.892 ( +0.10%) [ +0.03% +0.00% +0.02% / +0.10% +0.41% +0.37%] index_select wrap : Elapsed 0.059 ms (5.888 ms / 100) 5.885 -> 5.887 ( +0.03%) [ +0.00% +0.14% +0.17% / +0.03% +0.29% +0.37%] index_select linear : Elapsed 0.059 ms (5.885 ms / 100) 5.881 -> 5.886 ( +0.09%) [ +0.10% +0.00% +0.07% / +0.09% +0.63% +0.51%] index_select reverse : Elapsed 0.059 ms (5.887 ms / 100) 5.817 -> 5.822 ( +0.09%) [ +0.00% +0.03% +0.09% / +0.09% +0.48% +0.46%] index_select skip64 : Elapsed 0.058 ms (5.817 ms / 100) 5.800 -> 5.807 ( +0.12%) [ +0.02% +0.00% +0.10% / +0.12% +0.45% +0.34%] index_select skip256 : Elapsed 0.058 ms (5.801 ms / 100) 5.894 -> 5.897 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.37% +0.37%] index_select spread : Elapsed 0.059 ms (5.894 ms / 100) 5.884 -> 5.881 ( -0.05%) [ +0.00% +0.00% +0.02% / -0.05% +0.49% +0.66%] index_select strided 3 : Elapsed 0.059 ms (5.884 ms / 100) 5.860 -> 5.877 ( +0.29%) [ +0.09% +0.00% +0.09% / +0.29% +0.63% +0.60%] index_select random : Elapsed 0.059 ms (5.865 ms / 100) 5.849 -> 5.850 ( +0.02%) [ +0.00% +0.14% +0.24% / +0.02% +0.62% +0.65%] index_select random_sorted : Elapsed 0.058 ms (5.849 ms / 100) B = [5, 16, 40, 20] (stride (640, 1, 16, 3200)) A = [4, 16, 40, 20] (stride (1, 160, 4, 2560)) dim = 0 5.747 -> 5.721 ( -0.45%) [ +0.00% +0.02% +0.07% / +0.10% -0.45% -0.40%] index_add_ linear : Elapsed 0.057 ms (5.747 ms / 100) 5.691 -> 5.671 ( -0.35%) [ +0.02% +0.00% +0.21% / +0.16% -0.23% -0.35%] index_copy_ linear : Elapsed 0.057 ms (5.692 ms / 100) 5.756 -> 5.716 ( -0.69%) [ +0.03% +0.00% +0.14% / +0.07% -0.66% -0.69%] index_add_ reverse : Elapsed 0.058 ms (5.758 ms / 100) 5.701 -> 5.669 ( -0.56%) [ +0.00% +0.02% +0.02% / -0.02% -0.21% -0.56%] index_copy_ reverse : Elapsed 0.057 ms (5.701 ms / 100) 5.744 -> 5.724 ( -0.35%) [ +0.00% +0.03% +0.10% / +0.17% -0.19% -0.35%] index_add_ spread : Elapsed 0.057 ms (5.744 ms / 100) 5.694 -> 5.674 ( -0.35%) [ +0.02% +0.00% +0.05% / +0.07% -0.16% -0.35%] index_copy_ spread : Elapsed 0.057 ms (5.695 ms / 100) 5.749 -> 5.724 ( -0.43%) [ +0.00% +0.05% +0.07% / +0.07% -0.43% -0.43%] index_add_ strided 3 : Elapsed 0.057 ms (5.749 ms / 100) 5.691 -> 5.675 ( -0.28%) [ +0.00% +0.12% +0.18% / +0.26% -0.16% -0.28%] index_copy_ strided 3 : Elapsed 0.057 ms (5.691 ms / 100) 5.744 -> 5.738 ( -0.10%) [ +0.05% +0.00% +0.03% / +0.16% -0.10% -0.07%] index_add_ perm : Elapsed 0.057 ms (5.747 ms / 100) 5.687 -> 5.681 ( -0.11%) [ +0.09% +0.00% +0.05% / +0.19% -0.11% +0.04%] index_copy_ perm : Elapsed 0.057 ms (5.692 ms / 100) 5.739 -> 5.728 ( -0.19%) [ +0.00% +0.02% +0.21% / +0.44% -0.14% -0.19%] index_add_ perm_sorted : Elapsed 0.057 ms (5.739 ms / 100) 5.689 -> 5.679 ( -0.18%) [ +0.00% +0.05% +0.18% / +0.02% -0.18% -0.18%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.689 ms / 100) 6.048 -> 6.023 ( -0.41%) [ +0.05% +0.07% +0.00% / +0.17% -0.41% -0.41%] index_select const : Elapsed 0.061 ms (6.051 ms / 100) 6.042 -> 6.017 ( -0.41%) [ +0.00% +0.00% +0.15% / +0.17% -0.41% -0.40%] index_select wrap : Elapsed 0.060 ms (6.042 ms / 100) 6.045 -> 6.013 ( -0.53%) [ +0.00% +0.02% +0.15% / +0.03% -0.53% -0.43%] index_select linear : Elapsed 0.060 ms (6.045 ms / 100) 6.046 -> 6.011 ( -0.58%) [ +0.00% +0.00% +0.07% / +0.07% -0.58% -0.46%] index_select reverse : Elapsed 0.060 ms (6.046 ms / 100) 6.046 -> 6.019 ( -0.45%) [ +0.00% +0.00% +0.08% / -0.02% -0.36% -0.45%] index_select skip64 : Elapsed 0.060 ms (6.046 ms / 100) 6.042 -> 6.021 ( -0.35%) [ +0.10% +0.00% +0.18% / +0.10% -0.35% -0.35%] index_select skip256 : Elapsed 0.060 ms (6.048 ms / 100) 6.037 -> 6.016 ( -0.35%) [ +0.18% +0.00% +0.30% / +0.22% -0.35% -0.35%] index_select spread : Elapsed 0.060 ms (6.048 ms / 100) 6.042 -> 6.013 ( -0.48%) [ +0.05% +0.00% +0.07% / +0.05% -0.33% -0.48%] index_select strided 3 : Elapsed 0.060 ms (6.045 ms / 100) 6.042 -> 6.009 ( -0.55%) [ +0.00% +0.08% +0.17% / +0.15% -0.48% -0.55%] index_select random : Elapsed 0.060 ms (6.042 ms / 100) 6.039 -> 6.018 ( -0.35%) [ +0.00% +0.15% +0.18% / +0.26% -0.31% -0.35%] index_select random_sorted : Elapsed 0.060 ms (6.039 ms / 100) out_shape = [4, 5, 40, 20] in_shape = [4, 16, 40, 20] idx_dim = 1 B = [4, 5, 40, 20] (stride (4000, 800, 20, 1)) A = [4, 16, 40, 20] (stride (12800, 40, 1, 640)) dim = 1 2.261 -> 2.263 ( +0.09%) [ +0.18% +0.00% +0.09% / +0.09% +0.53% +0.49%] index_select const : Elapsed 0.023 ms (2.265 ms / 100) 2.273 -> 2.278 ( +0.22%) [ +0.22% +0.18% +0.00% / +0.22% +0.31% +0.31%] index_select wrap : Elapsed 0.023 ms (2.278 ms / 100) 2.279 -> 2.280 ( +0.04%) [ +0.00% +0.00% +0.09% / +0.04% +0.04% +0.13%] index_select linear : Elapsed 0.023 ms (2.279 ms / 100) 2.275 -> 2.277 ( +0.09%) [ +0.13% +0.22% +0.00% / +0.09% +0.13% +0.13%] index_select reverse : Elapsed 0.023 ms (2.278 ms / 100) 2.264 -> 2.265 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.44% +0.44%] index_select skip64 : Elapsed 0.023 ms (2.264 ms / 100) 2.260 -> 2.264 ( +0.18%) [ +0.13% +0.00% +0.04% / +0.18% +0.80% +0.53%] index_select skip256 : Elapsed 0.023 ms (2.263 ms / 100) 2.277 -> 2.276 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.26% +0.18%] index_select spread : Elapsed 0.023 ms (2.277 ms / 100) 2.277 -> 2.279 ( +0.09%) [ +0.22% +0.13% +0.00% / +0.09% +0.61% +0.13%] index_select strided 3 : Elapsed 0.023 ms (2.282 ms / 100) 2.279 -> 2.281 ( +0.09%) [ +0.04% +0.00% +0.00% / +0.09% +0.22% +0.22%] index_select strided 5 : Elapsed 0.023 ms (2.280 ms / 100) 2.279 -> 2.279 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.48% +0.66%] index_select strided 7 : Elapsed 0.023 ms (2.280 ms / 100) 2.255 -> 2.259 ( +0.18%) [ +0.13% +0.00% +0.04% / +0.18% +0.71% +0.58%] index_select strided 8 : Elapsed 0.023 ms (2.258 ms / 100) 2.277 -> 2.280 ( +0.13%) [ +0.22% +0.00% +0.00% / +0.13% +0.31% +0.40%] index_select random : Elapsed 0.023 ms (2.282 ms / 100) 2.273 -> 2.280 ( +0.31%) [ +0.22% +0.22% +0.00% / +0.31% +0.70% +0.48%] index_select random_sorted : Elapsed 0.023 ms (2.278 ms / 100) 2.270 -> 2.275 ( +0.22%) [ +0.00% +0.40% +0.09% / +0.22% +0.48% +0.48%] index_select perm : Elapsed 0.023 ms (2.270 ms / 100) 2.275 -> 2.273 ( -0.09%) [ +0.09% +0.22% +0.00% / -0.09% +0.31% +0.31%] index_select perm_sorted : Elapsed 0.023 ms (2.277 ms / 100) B = [4, 5, 40, 20] (stride (4000, 20, 100, 1)) A = [4, 16, 40, 20] (stride (1, 4, 64, 2560)) dim = 1 2.460 -> 2.467 ( +0.28%) [ +0.24% +0.08% +0.00% / +0.28% +0.53% +0.41%] index_select const : Elapsed 0.025 ms (2.466 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.16% +0.00% +0.20% / -0.04% +0.53% +0.08%] index_select wrap : Elapsed 0.025 ms (2.469 ms / 100) 2.464 -> 2.467 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.24% +0.37%] index_select linear : Elapsed 0.025 ms (2.466 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.37% +0.41%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.464 -> 2.460 ( -0.16%) [ +0.12% +0.04% +0.00% / -0.16% +0.28% +0.28%] index_select skip64 : Elapsed 0.025 ms (2.467 ms / 100) 2.459 -> 2.463 ( +0.16%) [ +0.37% +0.20% +0.00% / +0.16% +0.45% +0.41%] index_select skip256 : Elapsed 0.025 ms (2.468 ms / 100) 2.459 -> 2.461 ( +0.08%) [ +0.37% +0.00% +0.24% / +0.08% +0.16% +0.12%] index_select spread : Elapsed 0.025 ms (2.468 ms / 100) 2.461 -> 2.459 ( -0.08%) [ +0.16% +0.12% +0.00% / -0.08% +0.20% +0.33%] index_select strided 3 : Elapsed 0.025 ms (2.465 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.33% +0.28% +0.00% / +0.00% +0.45% +0.41%] index_select strided 5 : Elapsed 0.025 ms (2.468 ms / 100) 2.454 -> 2.462 ( +0.33%) [ +0.00% +0.33% +0.20% / +0.33% +0.41% +0.37%] index_select strided 7 : Elapsed 0.025 ms (2.454 ms / 100) 2.452 -> 2.451 ( -0.04%) [ +0.24% +0.16% +0.00% / -0.04% +0.16% +0.29%] index_select strided 8 : Elapsed 0.025 ms (2.458 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.00% +0.20% +0.00% / +0.00% +0.04% +0.08%] index_select random : Elapsed 0.025 ms (2.465 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.04% +0.20% +0.00% / +0.00% +0.16% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.458 -> 2.460 ( +0.08%) [ +0.00% +0.41% +0.33% / +0.08% +0.37% +0.41%] index_select perm : Elapsed 0.025 ms (2.458 ms / 100) 2.465 -> 2.466 ( +0.04%) [ +0.16% +0.16% +0.00% / +0.04% +0.04% +0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.469 ms / 100) B = [4, 5, 40, 20] (stride (100, 20, 400, 1)) A = [4, 16, 40, 20] (stride (20, 80, 1280, 1)) dim = 1 2.240 -> 2.243 ( +0.13%) [ +0.00% +0.00% +0.04% / +0.13% +0.18% +0.13%] index_select const : Elapsed 0.022 ms (2.240 ms / 100) 2.304 -> 2.305 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.22% +0.17% +0.04%] index_select wrap : Elapsed 0.023 ms (2.304 ms / 100) 2.308 -> 2.313 ( +0.22%) [ +0.00% +0.09% +0.13% / +0.22% +0.43% +0.26%] index_select linear : Elapsed 0.023 ms (2.308 ms / 100) 2.306 -> 2.306 ( +0.00%) [ +0.17% +0.00% +0.04% / +0.00% +0.43% +0.69%] index_select reverse : Elapsed 0.023 ms (2.310 ms / 100) 2.235 -> 2.233 ( -0.09%) [ +0.13% +0.00% +0.09% / -0.09% +0.40% +0.54%] index_select skip64 : Elapsed 0.022 ms (2.238 ms / 100) 2.239 -> 2.239 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.13% +0.40%] index_select skip256 : Elapsed 0.022 ms (2.240 ms / 100) 2.305 -> 2.310 ( +0.22%) [ +0.17% +0.00% +0.39% / +0.22% +0.30% +0.30%] index_select spread : Elapsed 0.023 ms (2.309 ms / 100) 2.320 -> 2.323 ( +0.13%) [ +0.13% +0.00% +0.22% / +0.22% +0.13% +0.34%] index_select strided 3 : Elapsed 0.023 ms (2.323 ms / 100) 2.311 -> 2.314 ( +0.13%) [ +0.09% +0.13% +0.00% / +0.13% +0.61% +0.61%] index_select strided 5 : Elapsed 0.023 ms (2.313 ms / 100) 2.301 -> 2.303 ( +0.09%) [ +0.00% +0.00% +0.35% / +0.09% +0.26% +0.43%] index_select strided 7 : Elapsed 0.023 ms (2.301 ms / 100) 2.251 -> 2.248 ( -0.13%) [ +0.00% +0.00% +0.13% / -0.13% +0.53% +0.49%] index_select strided 8 : Elapsed 0.023 ms (2.251 ms / 100) 2.310 -> 2.316 ( +0.26%) [ +0.17% +0.00% +0.00% / +0.26% +0.56% +0.52%] index_select random : Elapsed 0.023 ms (2.314 ms / 100) 2.302 -> 2.301 ( -0.04%) [ +0.13% +0.04% +0.00% / -0.04% +0.39% +0.17%] index_select random_sorted : Elapsed 0.023 ms (2.305 ms / 100) 2.311 -> 2.311 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.26% +0.22%] index_select perm : Elapsed 0.023 ms (2.312 ms / 100) 2.309 -> 2.313 ( +0.17%) [ +0.30% +0.00% +0.17% / +0.17% +0.30% +0.26%] index_select perm_sorted : Elapsed 0.023 ms (2.316 ms / 100) B = [4, 5, 40, 20] (stride (20, 80, 400, 1)) A = [4, 16, 40, 20] (stride (320, 20, 1280, 1)) dim = 1 2.234 -> 2.231 ( -0.13%) [ +0.04% +0.00% +0.09% / -0.13% +0.49% +0.54%] index_select const : Elapsed 0.022 ms (2.235 ms / 100) 2.311 -> 2.307 ( -0.17%) [ +0.00% +0.22% +0.04% / +0.17% -0.17% -0.09%] index_select wrap : Elapsed 0.023 ms (2.311 ms / 100) 2.301 -> 2.301 ( +0.00%) [ +0.00% +0.26% +0.04% / +0.00% +0.09% +0.22%] index_select linear : Elapsed 0.023 ms (2.301 ms / 100) 2.307 -> 2.314 ( +0.30%) [ +0.39% +0.09% +0.00% / +0.30% +0.30% +0.30%] index_select reverse : Elapsed 0.023 ms (2.316 ms / 100) 2.230 -> 2.229 ( -0.04%) [ +0.00% +0.00% +0.27% / -0.04% +0.45% +0.45%] index_select skip64 : Elapsed 0.022 ms (2.230 ms / 100) 2.236 -> 2.237 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.31% +0.31%] index_select skip256 : Elapsed 0.022 ms (2.236 ms / 100) 2.310 -> 2.313 ( +0.13%) [ +0.17% +0.00% +0.09% / +0.13% +0.17% +0.17%] index_select spread : Elapsed 0.023 ms (2.314 ms / 100) 2.313 -> 2.311 ( -0.09%) [ +0.00% +0.13% +0.26% / -0.09% +0.00% +0.13%] index_select strided 3 : Elapsed 0.023 ms (2.313 ms / 100) 2.290 -> 2.295 ( +0.22%) [ +0.13% +0.04% +0.00% / +0.22% +0.61% +0.44%] index_select strided 5 : Elapsed 0.023 ms (2.293 ms / 100) 2.302 -> 2.307 ( +0.22%) [ +0.13% +0.09% +0.00% / +0.22% +0.39% +0.65%] index_select strided 7 : Elapsed 0.023 ms (2.305 ms / 100) 2.246 -> 2.250 ( +0.18%) [ +0.22% +0.09% +0.00% / +0.22% +0.18% +0.45%] index_select strided 8 : Elapsed 0.023 ms (2.251 ms / 100) 2.271 -> 2.278 ( +0.31%) [ +0.00% +0.13% +0.00% / +0.31% +0.40% +0.44%] index_select random : Elapsed 0.023 ms (2.271 ms / 100) 2.254 -> 2.254 ( +0.00%) [ +0.22% +0.00% +0.27% / +0.00% +0.22% +0.27%] index_select random_sorted : Elapsed 0.023 ms (2.259 ms / 100) 2.307 -> 2.310 ( +0.13%) [ +0.17% +0.22% +0.00% / +0.22% +0.30% +0.13%] index_select perm : Elapsed 0.023 ms (2.311 ms / 100) 2.315 -> 2.310 ( -0.22%) [ +0.04% +0.09% +0.00% / -0.22% +0.13% +0.17%] index_select perm_sorted : Elapsed 0.023 ms (2.316 ms / 100) B = [4, 5, 40, 20] (stride (20, 80, 400, 1)) A = [4, 16, 40, 20] (stride (640, 1, 16, 2560)) dim = 1 2.474 -> 2.481 ( +0.28%) [ +0.00% +0.08% +0.12% / +0.28% +0.44% +0.61%] index_select const : Elapsed 0.025 ms (2.474 ms / 100) 2.474 -> 2.473 ( -0.04%) [ +0.00% +0.28% +0.04% / -0.04% +0.40% +0.53%] index_select wrap : Elapsed 0.025 ms (2.474 ms / 100) 2.476 -> 2.476 ( +0.00%) [ +0.00% +0.00% +0.20% / +0.00% +0.44% +0.24%] index_select linear : Elapsed 0.025 ms (2.476 ms / 100) 2.475 -> 2.471 ( -0.16%) [ +0.00% +0.04% +0.48% / -0.16% +0.24% +0.44%] index_select reverse : Elapsed 0.025 ms (2.475 ms / 100) 2.468 -> 2.476 ( +0.32%) [ +0.12% +0.00% +0.08% / +0.32% +0.69% +0.57%] index_select skip64 : Elapsed 0.025 ms (2.471 ms / 100) 2.468 -> 2.473 ( +0.20%) [ +0.00% +0.20% +0.20% / +0.20% +0.61% +0.61%] index_select skip256 : Elapsed 0.025 ms (2.468 ms / 100) 2.494 -> 2.503 ( +0.36%) [ +0.00% +0.24% +0.12% / +0.36% +0.84% +0.60%] index_select spread : Elapsed 0.025 ms (2.494 ms / 100) 2.496 -> 2.502 ( +0.24%) [ +0.00% +0.28% +0.24% / +0.24% +0.68% +0.56%] index_select strided 3 : Elapsed 0.025 ms (2.496 ms / 100) 2.498 -> 2.503 ( +0.20%) [ +0.00% +0.16% +0.24% / +0.20% +0.52% +0.60%] index_select strided 5 : Elapsed 0.025 ms (2.498 ms / 100) 2.495 -> 2.501 ( +0.24%) [ +0.16% +0.24% +0.00% / +0.24% +0.56% +0.76%] index_select strided 7 : Elapsed 0.025 ms (2.499 ms / 100) 2.492 -> 2.501 ( +0.36%) [ +0.00% +0.40% +0.24% / +0.36% +0.84% +1.00%] index_select strided 8 : Elapsed 0.025 ms (2.492 ms / 100) 2.497 -> 2.503 ( +0.24%) [ +0.12% +0.08% +0.00% / +0.24% +0.52% +0.68%] index_select random : Elapsed 0.025 ms (2.500 ms / 100) 2.498 -> 2.497 ( -0.04%) [ +0.32% +0.08% +0.00% / -0.04% +0.60% +0.20%] index_select random_sorted : Elapsed 0.025 ms (2.506 ms / 100) 2.499 -> 2.501 ( +0.08%) [ +0.12% +0.00% +0.16% / +0.08% +0.40% +0.36%] index_select perm : Elapsed 0.025 ms (2.502 ms / 100) 2.496 -> 2.499 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.60% +0.84%] index_select perm_sorted : Elapsed 0.025 ms (2.496 ms / 100) B = [4, 5, 40, 20] (stride (200, 40, 1, 800)) A = [4, 16, 40, 20] (stride (16, 1, 64, 2560)) dim = 1 2.516 -> 2.515 ( -0.04%) [ +0.12% +0.32% +0.00% / +0.36% +0.04% -0.04%] index_select const : Elapsed 0.025 ms (2.519 ms / 100) 2.520 -> 2.514 ( -0.24%) [ +0.00% +0.12% +0.12% / +0.28% -0.04% -0.24%] index_select wrap : Elapsed 0.025 ms (2.520 ms / 100) 2.516 -> 2.516 ( +0.00%) [ +0.28% +0.08% +0.00% / +0.08% +0.04% +0.00%] index_select linear : Elapsed 0.025 ms (2.523 ms / 100) 2.515 -> 2.511 ( -0.16%) [ +0.00% +0.16% +0.20% / -0.04% +0.08% -0.16%] index_select reverse : Elapsed 0.025 ms (2.515 ms / 100) 2.521 -> 2.510 ( -0.44%) [ +0.16% +0.04% +0.00% / +0.16% -0.28% -0.44%] index_select skip64 : Elapsed 0.025 ms (2.525 ms / 100) 2.515 -> 2.515 ( +0.00%) [ +0.40% +0.00% +0.08% / +0.20% +0.00% +0.08%] index_select skip256 : Elapsed 0.025 ms (2.525 ms / 100) 2.543 -> 2.535 ( -0.31%) [ +0.24% +0.00% +0.08% / +0.08% -0.31% -0.12%] index_select spread : Elapsed 0.025 ms (2.549 ms / 100) 2.537 -> 2.529 ( -0.32%) [ +0.00% +0.39% +0.16% / +0.28% -0.32% -0.04%] index_select strided 3 : Elapsed 0.025 ms (2.537 ms / 100) 2.546 -> 2.538 ( -0.31%) [ +0.00% +0.08% +0.12% / -0.12% -0.31% -0.20%] index_select strided 5 : Elapsed 0.025 ms (2.546 ms / 100) 2.547 -> 2.539 ( -0.31%) [ +0.20% +0.00% +0.00% / -0.16% -0.31% -0.12%] index_select strided 7 : Elapsed 0.026 ms (2.552 ms / 100) 2.539 -> 2.539 ( +0.00%) [ +0.59% +0.00% +0.47% / +0.28% +0.24% +0.00%] index_select strided 8 : Elapsed 0.026 ms (2.554 ms / 100) 2.544 -> 2.540 ( -0.16%) [ +0.16% +0.00% +0.16% / -0.12% -0.16% -0.16%] index_select random : Elapsed 0.025 ms (2.548 ms / 100) 2.542 -> 2.540 ( -0.08%) [ +0.47% +0.16% +0.00% / -0.08% -0.04% -0.04%] index_select random_sorted : Elapsed 0.026 ms (2.554 ms / 100) 2.546 -> 2.538 ( -0.31%) [ +0.20% +0.08% +0.00% / -0.08% -0.31% -0.31%] index_select perm : Elapsed 0.026 ms (2.551 ms / 100) 2.543 -> 2.538 ( -0.20%) [ +0.00% +0.24% +0.35% / -0.16% -0.20% +0.00%] index_select perm_sorted : Elapsed 0.025 ms (2.543 ms / 100) B = [4, 5, 40, 20] (stride (40, 160, 1, 800)) A = [4, 16, 40, 20] (stride (16, 1, 64, 2560)) dim = 1 2.510 -> 2.522 ( +0.48%) [ +0.04% +0.08% +0.00% / +0.48% +0.72% +0.72%] index_select const : Elapsed 0.025 ms (2.511 ms / 100) 2.516 -> 2.503 ( -0.52%) [ +0.00% +0.00% +0.08% / -0.52% +0.64% +0.16%] index_select wrap : Elapsed 0.025 ms (2.516 ms / 100) 2.515 -> 2.517 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.20% +0.12%] index_select linear : Elapsed 0.025 ms (2.515 ms / 100) 2.510 -> 2.512 ( +0.08%) [ +0.12% +0.00% +0.16% / +0.08% +0.84% +0.68%] index_select reverse : Elapsed 0.025 ms (2.513 ms / 100) 2.504 -> 2.515 ( +0.44%) [ +0.60% +0.00% +0.44% / +0.44% +0.80% +0.64%] index_select skip64 : Elapsed 0.025 ms (2.519 ms / 100) 2.513 -> 2.505 ( -0.32%) [ +0.08% +0.00% +0.08% / -0.32% +0.80% +0.64%] index_select skip256 : Elapsed 0.025 ms (2.515 ms / 100) 2.531 -> 2.538 ( +0.28%) [ +0.47% +0.00% +0.32% / +0.28% +0.67% +0.75%] index_select spread : Elapsed 0.025 ms (2.543 ms / 100) 2.529 -> 2.538 ( +0.36%) [ +0.04% +0.00% +0.32% / +0.36% +1.19% +0.43%] index_select strided 3 : Elapsed 0.025 ms (2.530 ms / 100) 2.534 -> 2.541 ( +0.28%) [ +0.00% +0.16% +0.39% / +0.28% +0.83% +1.07%] index_select strided 5 : Elapsed 0.025 ms (2.534 ms / 100) 2.537 -> 2.542 ( +0.20%) [ +0.20% +0.00% +0.20% / +0.20% +0.71% +0.47%] index_select strided 7 : Elapsed 0.025 ms (2.542 ms / 100) 2.535 -> 2.536 ( +0.04%) [ +0.08% +0.00% +0.28% / +0.04% +0.95% +0.79%] index_select strided 8 : Elapsed 0.025 ms (2.537 ms / 100) 2.535 -> 2.535 ( +0.00%) [ +0.28% +0.00% +0.00% / +0.00% +1.03% +0.39%] index_select random : Elapsed 0.025 ms (2.542 ms / 100) 2.534 -> 2.540 ( +0.24%) [ +0.00% +0.16% +0.32% / +0.24% +0.75% +0.55%] index_select random_sorted : Elapsed 0.025 ms (2.534 ms / 100) 2.536 -> 2.540 ( +0.16%) [ +0.20% +0.00% +0.00% / +0.16% +0.83% +0.91%] index_select perm : Elapsed 0.025 ms (2.541 ms / 100) 2.529 -> 2.540 ( +0.43%) [ +0.67% +0.00% +0.16% / +0.43% +0.91% +0.95%] index_select perm_sorted : Elapsed 0.025 ms (2.546 ms / 100) B = [4, 5, 40, 20] (stride (1, 4, 20, 800)) A = [4, 16, 40, 20] (stride (320, 20, 1280, 1)) dim = 1 2.269 -> 2.268 ( -0.04%) [ +0.26% +0.00% +0.00% / -0.04% +0.26% +0.40%] index_select const : Elapsed 0.023 ms (2.275 ms / 100) 2.330 -> 2.335 ( +0.21%) [ +0.04% +0.34% +0.00% / +0.21% +0.34% +0.21%] index_select wrap : Elapsed 0.023 ms (2.331 ms / 100) 2.329 -> 2.325 ( -0.17%) [ +0.04% +0.13% +0.00% / +0.09% -0.09% -0.17%] index_select linear : Elapsed 0.023 ms (2.330 ms / 100) 2.329 -> 2.334 ( +0.21%) [ +0.13% +0.26% +0.00% / +0.21% +0.60% +0.39%] index_select reverse : Elapsed 0.023 ms (2.332 ms / 100) 2.260 -> 2.261 ( +0.04%) [ +0.22% +0.09% +0.00% / +0.04% +0.35% +0.22%] index_select skip64 : Elapsed 0.023 ms (2.265 ms / 100) 2.267 -> 2.268 ( +0.04%) [ +0.31% +0.09% +0.00% / +0.04% +0.31% +0.40%] index_select skip256 : Elapsed 0.023 ms (2.274 ms / 100) 2.331 -> 2.331 ( +0.00%) [ +0.04% +0.00% +0.09% / +0.17% +0.00% +0.21%] index_select spread : Elapsed 0.023 ms (2.332 ms / 100) 2.335 -> 2.334 ( -0.04%) [ +0.04% +0.13% +0.00% / -0.04% +0.13% +0.00%] index_select strided 3 : Elapsed 0.023 ms (2.336 ms / 100) 2.319 -> 2.317 ( -0.09%) [ +0.09% +0.09% +0.00% / +0.13% -0.09% +0.04%] index_select strided 5 : Elapsed 0.023 ms (2.321 ms / 100) 2.328 -> 2.326 ( -0.09%) [ +0.00% +0.04% +0.09% / -0.09% +0.09% +0.00%] index_select strided 7 : Elapsed 0.023 ms (2.328 ms / 100) 2.275 -> 2.280 ( +0.22%) [ +0.18% +0.09% +0.00% / +0.22% +0.22% +0.44%] index_select strided 8 : Elapsed 0.023 ms (2.279 ms / 100) 2.335 -> 2.333 ( -0.09%) [ +0.00% +0.00% +0.34% / -0.09% +0.09% -0.09%] index_select random : Elapsed 0.023 ms (2.335 ms / 100) 2.327 -> 2.331 ( +0.17%) [ +0.00% +0.39% +0.04% / +0.47% +0.21% +0.17%] index_select random_sorted : Elapsed 0.023 ms (2.327 ms / 100) 2.330 -> 2.326 ( -0.17%) [ +0.00% +0.09% +0.04% / -0.17% +0.21% +0.09%] index_select perm : Elapsed 0.023 ms (2.330 ms / 100) 2.328 -> 2.333 ( +0.21%) [ +0.09% +0.21% +0.00% / +0.21% +0.26% +0.21%] index_select perm_sorted : Elapsed 0.023 ms (2.330 ms / 100) out_shape = [4, 16, 5, 20] in_shape = [4, 16, 40, 20] idx_dim = 2 B = [4, 16, 5, 20] (stride (20, 400, 80, 1)) A = [4, 16, 40, 20] (stride (800, 3200, 1, 40)) dim = 2 1.607 -> 1.606 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.44% +0.44%] index_select const : Elapsed 0.016 ms (1.607 ms / 100) 1.609 -> 1.609 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.44% +0.56%] index_select wrap : Elapsed 0.016 ms (1.609 ms / 100) 1.606 -> 1.608 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.37% +0.37%] index_select linear : Elapsed 0.016 ms (1.607 ms / 100) 1.606 -> 1.606 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_select reverse : Elapsed 0.016 ms (1.607 ms / 100) 1.608 -> 1.607 ( -0.06%) [ +0.00% +0.12% +0.06% / -0.06% +0.56% +0.50%] index_select skip64 : Elapsed 0.016 ms (1.608 ms / 100) 1.606 -> 1.606 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_select skip256 : Elapsed 0.016 ms (1.606 ms / 100) 1.602 -> 1.602 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.56% +0.44%] index_select spread : Elapsed 0.016 ms (1.604 ms / 100) 1.601 -> 1.605 ( +0.25%) [ +0.56% +0.00% +0.19% / +0.25% +0.69% +0.69%] index_select strided 3 : Elapsed 0.016 ms (1.610 ms / 100) 1.598 -> 1.603 ( +0.31%) [ +0.31% +0.31% +0.00% / +0.31% +0.69% +1.25%] index_select strided 5 : Elapsed 0.016 ms (1.603 ms / 100) 1.601 -> 1.602 ( +0.06%) [ +0.25% +0.00% +0.00% / +0.06% +0.50% +0.50%] index_select strided 7 : Elapsed 0.016 ms (1.605 ms / 100) 1.606 -> 1.606 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.44% +0.44%] index_select strided 8 : Elapsed 0.016 ms (1.606 ms / 100) 1.598 -> 1.599 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.56% +0.56%] index_select strided 16 : Elapsed 0.016 ms (1.598 ms / 100) 1.598 -> 1.598 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.75% +0.69%] index_select random : Elapsed 0.016 ms (1.600 ms / 100) 1.597 -> 1.599 ( +0.13%) [ +0.06% +0.00% +0.06% / +0.13% +0.69% +0.56%] index_select random_sorted : Elapsed 0.016 ms (1.598 ms / 100) 1.603 -> 1.605 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.69% +0.62%] index_select perm : Elapsed 0.016 ms (1.604 ms / 100) 1.596 -> 1.596 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.63% +0.69%] index_select perm_sorted : Elapsed 0.016 ms (1.597 ms / 100) B = [4, 16, 5, 20] (stride (320, 20, 1280, 1)) A = [4, 16, 40, 20] (stride (640, 1, 16, 2560)) dim = 2 1.467 -> 1.468 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.55% +0.55%] index_select const : Elapsed 0.015 ms (1.468 ms / 100) 1.460 -> 1.460 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.62% +0.55%] index_select wrap : Elapsed 0.015 ms (1.461 ms / 100) 1.460 -> 1.460 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.62% +0.55%] index_select linear : Elapsed 0.015 ms (1.461 ms / 100) 1.440 -> 1.441 ( +0.07%) [ +0.28% +0.00% +0.00% / +0.07% +0.63% +0.63%] index_select reverse : Elapsed 0.014 ms (1.444 ms / 100) 1.467 -> 1.468 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.61% +0.68%] index_select skip64 : Elapsed 0.015 ms (1.468 ms / 100) 1.467 -> 1.468 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.61% +0.68%] index_select skip256 : Elapsed 0.015 ms (1.468 ms / 100) 1.446 -> 1.447 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.76% +0.69%] index_select spread : Elapsed 0.014 ms (1.447 ms / 100) 1.460 -> 1.462 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.75% +0.89%] index_select strided 3 : Elapsed 0.015 ms (1.462 ms / 100) 1.471 -> 1.472 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.75% +0.68%] index_select strided 5 : Elapsed 0.015 ms (1.472 ms / 100) 1.446 -> 1.451 ( +0.35%) [ +0.21% +0.07% +0.00% / +0.35% +0.62% +0.69%] index_select strided 7 : Elapsed 0.014 ms (1.449 ms / 100) 1.446 -> 1.449 ( +0.21%) [ +0.21% +0.21% +0.00% / +0.21% +0.62% +0.83%] index_select strided 8 : Elapsed 0.014 ms (1.449 ms / 100) 1.465 -> 1.467 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.75% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.466 ms / 100) 1.455 -> 1.456 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.89% +0.82%] index_select random : Elapsed 0.015 ms (1.457 ms / 100) 1.462 -> 1.463 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select random_sorted : Elapsed 0.015 ms (1.463 ms / 100) 1.454 -> 1.454 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.89% +0.69%] index_select perm : Elapsed 0.015 ms (1.454 ms / 100) 1.467 -> 1.467 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.89% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.468 ms / 100) out_shape = [4, 16, 40, 5] in_shape = [4, 16, 40, 20] idx_dim = 3 B = [4, 16, 40, 5] (stride (3200, 200, 5, 1)) A = [4, 16, 40, 20] (stride (320, 20, 1280, 1)) dim = 3 1.588 -> 1.587 ( -0.06%) [ +0.00% +0.19% +0.06% / -0.06% +0.63% +0.94%] index_select const : Elapsed 0.016 ms (1.588 ms / 100) 1.598 -> 1.599 ( +0.06%) [ +0.06% +0.00% +0.13% / +0.06% +0.81% +0.88%] index_select wrap : Elapsed 0.016 ms (1.599 ms / 100) 1.594 -> 1.598 ( +0.25%) [ +0.19% +0.13% +0.00% / +0.25% +1.13% +1.07%] index_select linear : Elapsed 0.016 ms (1.597 ms / 100) 1.599 -> 1.601 ( +0.13%) [ +0.06% +0.00% +0.19% / +0.13% +0.75% +0.63%] index_select reverse : Elapsed 0.016 ms (1.600 ms / 100) 1.589 -> 1.586 ( -0.19%) [ +0.19% +0.00% +0.00% / -0.19% +0.50% +0.50%] index_select skip64 : Elapsed 0.016 ms (1.592 ms / 100) 1.586 -> 1.587 ( +0.06%) [ +0.00% +0.13% +0.13% / +0.06% +0.95% +0.69%] index_select skip256 : Elapsed 0.016 ms (1.586 ms / 100) 1.621 -> 1.618 ( -0.19%) [ +0.12% +0.12% +0.00% / -0.19% +0.43% +0.62%] index_select spread : Elapsed 0.016 ms (1.623 ms / 100) 1.618 -> 1.619 ( +0.06%) [ +0.19% +0.19% +0.00% / +0.06% +0.68% +0.62%] index_select strided 3 : Elapsed 0.016 ms (1.621 ms / 100) 1.621 -> 1.624 ( +0.19%) [ +0.19% +0.00% +0.00% / +0.19% +0.93% +0.49%] index_select strided 5 : Elapsed 0.016 ms (1.624 ms / 100) 1.613 -> 1.611 ( -0.12%) [ +0.00% +0.00% +0.25% / -0.12% +0.62% +0.74%] index_select strided 7 : Elapsed 0.016 ms (1.613 ms / 100) 1.613 -> 1.624 ( +0.68%) [ +0.37% +0.00% +0.68% / +1.05% +0.81% +0.68%] index_select strided 8 : Elapsed 0.016 ms (1.619 ms / 100) 1.619 -> 1.621 ( +0.12%) [ +0.31% +0.00% +0.12% / +0.12% +0.56% +0.99%] index_select strided 16 : Elapsed 0.016 ms (1.624 ms / 100) 1.614 -> 1.616 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.81% +0.68%] index_select random : Elapsed 0.016 ms (1.616 ms / 100) 1.614 -> 1.621 ( +0.43%) [ +0.12% +0.25% +0.00% / +0.43% +0.87% +1.05%] index_select random_sorted : Elapsed 0.016 ms (1.616 ms / 100) 1.616 -> 1.620 ( +0.25%) [ +0.00% +0.00% +0.00% / +0.25% +0.62% +0.74%] index_select perm : Elapsed 0.016 ms (1.616 ms / 100) 1.620 -> 1.621 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.68% +0.56%] index_select perm_sorted : Elapsed 0.016 ms (1.621 ms / 100) B = [4, 16, 40, 5] (stride (3200, 200, 1, 40)) A = [4, 16, 40, 20] (stride (20, 80, 1280, 1)) dim = 3 1.805 -> 1.802 ( -0.17%) [ +0.00% +0.06% +0.00% / +0.00% -0.06% -0.17%] index_select const : Elapsed 0.018 ms (1.805 ms / 100) 1.804 -> 1.805 ( +0.06%) [ +0.11% +0.00% +0.28% / +0.06% +0.55% +0.94%] index_select wrap : Elapsed 0.018 ms (1.806 ms / 100) 1.806 -> 1.811 ( +0.28%) [ +0.17% +0.22% +0.00% / +0.28% +0.66% +0.66%] index_select linear : Elapsed 0.018 ms (1.809 ms / 100) 1.813 -> 1.813 ( +0.00%) [ +0.22% +0.00% +0.06% / +0.00% +0.44% +0.50%] index_select reverse : Elapsed 0.018 ms (1.817 ms / 100) 1.804 -> 1.806 ( +0.11%) [ +0.17% +0.00% +0.11% / +0.11% +0.33% +0.39%] index_select skip64 : Elapsed 0.018 ms (1.807 ms / 100) 1.800 -> 1.804 ( +0.22%) [ +0.06% +0.28% +0.00% / +0.22% +0.39% +0.56%] index_select skip256 : Elapsed 0.018 ms (1.801 ms / 100) 1.823 -> 1.827 ( +0.22%) [ +0.22% +0.27% +0.00% / +0.22% +0.66% +0.55%] index_select spread : Elapsed 0.018 ms (1.827 ms / 100) 1.820 -> 1.827 ( +0.38%) [ +0.33% +0.22% +0.00% / +0.38% +0.93% +0.77%] index_select strided 3 : Elapsed 0.018 ms (1.826 ms / 100) 1.825 -> 1.828 ( +0.16%) [ +0.00% +0.27% +0.05% / +0.16% +0.77% +0.44%] index_select strided 5 : Elapsed 0.018 ms (1.825 ms / 100) 1.828 -> 1.829 ( +0.05%) [ +0.11% +0.05% +0.00% / +0.38% +0.11% +0.05%] index_select strided 7 : Elapsed 0.018 ms (1.830 ms / 100) 1.828 -> 1.827 ( -0.05%) [ +0.00% +0.00% +0.11% / -0.05% +0.22% +0.38%] index_select strided 8 : Elapsed 0.018 ms (1.828 ms / 100) 1.826 -> 1.829 ( +0.16%) [ +0.05% +0.22% +0.00% / +0.16% +0.16% +0.27%] index_select strided 16 : Elapsed 0.018 ms (1.827 ms / 100) 1.826 -> 1.828 ( +0.11%) [ +0.16% +0.33% +0.00% / +0.27% +0.11% +0.16%] index_select random : Elapsed 0.018 ms (1.829 ms / 100) 1.825 -> 1.827 ( +0.11%) [ +0.22% +0.00% +0.11% / +0.22% +0.44% +0.11%] index_select random_sorted : Elapsed 0.018 ms (1.829 ms / 100) 1.829 -> 1.828 ( -0.05%) [ +0.22% +0.33% +0.00% / +0.22% -0.05% +0.22%] index_select perm : Elapsed 0.018 ms (1.833 ms / 100) 1.825 -> 1.825 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.05% +0.00% +0.00%] index_select perm_sorted : Elapsed 0.018 ms (1.825 ms / 100) B = [4, 16, 40, 5] (stride (3200, 1, 16, 640)) dim = 3 fill_cnt = 20 3.533 -> 3.518 ( -0.42%) [ +0.00% +0.11% +0.11% / -0.31% -0.42% -0.40%] index_fill_ const : Elapsed 0.035 ms (3.533 ms / 100) 3.534 -> 3.515 ( -0.54%) [ +0.08% +0.00% +0.17% / -0.48% -0.51% -0.54%] index_fill_ linear : Elapsed 0.035 ms (3.537 ms / 100) 3.525 -> 3.506 ( -0.54%) [ +0.00% +0.00% +0.06% / -0.54% -0.45% -0.43%] index_fill_ reverse : Elapsed 0.035 ms (3.525 ms / 100) 3.553 -> 3.532 ( -0.59%) [ +0.20% +0.00% +0.08% / -0.59% -0.59% -0.45%] index_fill_ skip64 : Elapsed 0.036 ms (3.560 ms / 100) 3.546 -> 3.529 ( -0.48%) [ +0.03% +0.00% +0.00% / -0.42% -0.48% -0.42%] index_fill_ skip256 : Elapsed 0.035 ms (3.547 ms / 100) 3.526 -> 3.508 ( -0.51%) [ +0.03% +0.14% +0.00% / -0.51% -0.45% -0.51%] index_fill_ spread : Elapsed 0.035 ms (3.527 ms / 100) 3.529 -> 3.510 ( -0.54%) [ +0.11% +0.11% +0.00% / -0.37% -0.45% -0.54%] index_fill_ strided 3 : Elapsed 0.035 ms (3.533 ms / 100) 3.537 -> 3.518 ( -0.54%) [ +0.03% +0.00% +0.08% / -0.54% -0.42% -0.28%] index_fill_ random : Elapsed 0.035 ms (3.538 ms / 100) 3.538 -> 3.524 ( -0.40%) [ +0.25% +0.00% +0.17% / -0.40% -0.34% -0.23%] index_fill_ random_sorted : Elapsed 0.035 ms (3.547 ms / 100) B = [4, 16, 40, 5] (stride (1, 800, 4, 160)) A = [4, 16, 40, 20] (stride (1, 3200, 4, 160)) dim = 3 1.774 -> 1.774 ( +0.00%) [ +0.06% +0.00% +0.23% / +0.00% +0.23% +0.00%] index_select const : Elapsed 0.018 ms (1.775 ms / 100) 1.781 -> 1.786 ( +0.28%) [ +0.00% +0.11% +0.00% / +0.28% +1.18% +0.84%] index_select wrap : Elapsed 0.018 ms (1.781 ms / 100) 1.781 -> 1.783 ( +0.11%) [ +0.00% +0.28% +0.00% / +0.11% +1.01% +1.12%] index_select linear : Elapsed 0.018 ms (1.781 ms / 100) 1.784 -> 1.786 ( +0.11%) [ +0.11% +0.00% +0.17% / +0.11% +1.07% +0.73%] index_select reverse : Elapsed 0.018 ms (1.786 ms / 100) 1.772 -> 1.775 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.28% +0.23%] index_select skip64 : Elapsed 0.018 ms (1.775 ms / 100) 1.772 -> 1.776 ( +0.23%) [ +0.06% +0.06% +0.00% / +0.23% +0.40% +0.23%] index_select skip256 : Elapsed 0.018 ms (1.773 ms / 100) 1.782 -> 1.781 ( -0.06%) [ +0.11% +0.00% +0.17% / -0.06% +0.28% +0.11%] index_select spread : Elapsed 0.018 ms (1.784 ms / 100) 1.785 -> 1.783 ( -0.11%) [ +0.22% +0.06% +0.00% / +0.22% -0.11% +0.17%] index_select strided 3 : Elapsed 0.018 ms (1.789 ms / 100) 1.776 -> 1.775 ( -0.06%) [ +0.17% +0.06% +0.00% / -0.06% +0.56% +0.28%] index_select strided 5 : Elapsed 0.018 ms (1.779 ms / 100) 1.783 -> 1.785 ( +0.11%) [ +0.00% +0.06% +0.22% / +0.11% +0.28% +0.28%] index_select strided 7 : Elapsed 0.018 ms (1.783 ms / 100) 1.779 -> 1.782 ( +0.17%) [ +0.17% +0.00% +0.06% / +0.17% +0.39% +0.39%] index_select strided 8 : Elapsed 0.018 ms (1.782 ms / 100) 1.778 -> 1.776 ( -0.11%) [ +0.17% +0.00% +0.22% / -0.11% +0.73% +0.62%] index_select strided 16 : Elapsed 0.018 ms (1.781 ms / 100) 1.782 -> 1.784 ( +0.11%) [ +0.06% +0.00% +0.06% / +0.11% +0.67% +0.67%] index_select random : Elapsed 0.018 ms (1.783 ms / 100) 1.781 -> 1.781 ( +0.00%) [ +0.06% +0.00% +0.17% / +0.00% +0.51% +0.56%] index_select random_sorted : Elapsed 0.018 ms (1.782 ms / 100) 1.779 -> 1.782 ( +0.17%) [ +0.11% +0.00% +0.11% / +0.17% +0.62% +0.67%] index_select perm : Elapsed 0.018 ms (1.781 ms / 100) 1.779 -> 1.781 ( +0.11%) [ +0.17% +0.00% +0.28% / +0.11% +0.67% +0.62%] index_select perm_sorted : Elapsed 0.018 ms (1.782 ms / 100) B = [4, 16, 40, 5] (stride (80, 5, 320, 1)) A = [4, 16, 40, 20] (stride (640, 40, 1, 2560)) dim = 3 1.475 -> 1.478 ( +0.20%) [ +0.20% +0.00% +0.27% / +0.20% +0.61% +0.34%] index_select const : Elapsed 0.015 ms (1.478 ms / 100) 1.474 -> 1.477 ( +0.20%) [ +0.41% +0.07% +0.00% / +0.20% +0.54% +0.54%] index_select wrap : Elapsed 0.015 ms (1.480 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.61% +0.20% +0.14%] index_select linear : Elapsed 0.015 ms (1.478 ms / 100) 1.480 -> 1.474 ( -0.41%) [ +0.00% +0.00% +0.14% / -0.41% +0.27% +0.14%] index_select reverse : Elapsed 0.015 ms (1.480 ms / 100) 1.475 -> 1.474 ( -0.07%) [ +0.00% +0.41% +0.07% / -0.07% +0.41% +0.54%] index_select skip64 : Elapsed 0.015 ms (1.475 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.27% +0.00% +0.20% / +0.00% +0.47% +0.61%] index_select skip256 : Elapsed 0.015 ms (1.479 ms / 100) 1.487 -> 1.489 ( +0.13%) [ +0.00% +0.00% +0.07% / +0.27% +0.61% +0.13%] index_select spread : Elapsed 0.015 ms (1.487 ms / 100) 1.503 -> 1.487 ( -1.06%) [ +0.00% +0.00% +0.20% / +0.33% -0.86% -1.06%] index_select strided 3 : Elapsed 0.015 ms (1.503 ms / 100) 1.486 -> 1.490 ( +0.27%) [ +0.00% +0.07% +0.07% / +0.27% +0.34% +0.40%] index_select strided 5 : Elapsed 0.015 ms (1.486 ms / 100) 1.489 -> 1.488 ( -0.07%) [ +0.34% +0.20% +0.00% / -0.07% +0.27% +0.40%] index_select strided 7 : Elapsed 0.015 ms (1.494 ms / 100) 1.488 -> 1.487 ( -0.07%) [ +0.20% +0.07% +0.00% / -0.07% +0.40% +0.40%] index_select strided 8 : Elapsed 0.015 ms (1.491 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.54% +0.47%] index_select strided 16 : Elapsed 0.015 ms (1.488 ms / 100) 1.503 -> 1.505 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.33% +0.27%] index_select random : Elapsed 0.015 ms (1.505 ms / 100) 1.498 -> 1.500 ( +0.13%) [ +0.00% +0.13% +0.07% / +0.13% +0.20% +0.13%] index_select random_sorted : Elapsed 0.015 ms (1.498 ms / 100) 1.480 -> 1.479 ( -0.07%) [ +0.34% +0.00% +0.41% / -0.07% +0.14% +0.20%] index_select perm : Elapsed 0.015 ms (1.485 ms / 100) 1.479 -> 1.482 ( +0.20%) [ +0.54% +0.00% +0.07% / +0.27% +0.27% +0.20%] index_select perm_sorted : Elapsed 0.015 ms (1.487 ms / 100) B = [4, 16, 40, 5] (stride (640, 40, 1, 2560)) A = [4, 16, 40, 20] (stride (800, 3200, 20, 1)) dim = 3 1.638 -> 1.641 ( +0.18%) [ +0.00% +0.18% +0.12% / +0.18% +1.04% +0.85%] index_select const : Elapsed 0.016 ms (1.638 ms / 100) 1.647 -> 1.647 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.79% +0.85%] index_select wrap : Elapsed 0.016 ms (1.647 ms / 100) 1.647 -> 1.647 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.79% +0.79%] index_select linear : Elapsed 0.016 ms (1.647 ms / 100) 1.648 -> 1.650 ( +0.12%) [ +0.00% +0.24% +0.00% / +0.12% +0.97% +1.03%] index_select reverse : Elapsed 0.016 ms (1.648 ms / 100) 1.638 -> 1.641 ( +0.18%) [ +0.06% +0.12% +0.00% / +0.18% +0.61% +0.61%] index_select skip64 : Elapsed 0.016 ms (1.639 ms / 100) 1.637 -> 1.637 ( +0.00%) [ +0.31% +0.31% +0.00% / +0.00% +1.10% +0.92%] index_select skip256 : Elapsed 0.016 ms (1.642 ms / 100) 1.671 -> 1.670 ( -0.06%) [ +0.06% +0.12% +0.00% / -0.06% +1.32% +1.26%] index_select spread : Elapsed 0.017 ms (1.672 ms / 100) 1.671 -> 1.675 ( +0.24%) [ +0.36% +0.00% +0.06% / +0.24% +1.02% +1.08%] index_select strided 3 : Elapsed 0.017 ms (1.677 ms / 100) 1.671 -> 1.671 ( +0.00%) [ +0.12% +0.24% +0.00% / +0.00% +1.14% +1.38%] index_select strided 5 : Elapsed 0.017 ms (1.673 ms / 100) 1.660 -> 1.664 ( +0.24%) [ +0.00% +0.06% +0.12% / +0.24% +1.02% +1.08%] index_select strided 7 : Elapsed 0.017 ms (1.660 ms / 100) 1.658 -> 1.659 ( +0.06%) [ +0.00% +0.06% +0.24% / +0.06% +1.21% +1.15%] index_select strided 8 : Elapsed 0.017 ms (1.658 ms / 100) 1.669 -> 1.669 ( +0.00%) [ +0.00% +0.24% +0.06% / +0.00% +1.14% +0.84%] index_select strided 16 : Elapsed 0.017 ms (1.669 ms / 100) 1.648 -> 1.652 ( +0.24%) [ +0.24% +0.30% +0.00% / +0.24% +1.27% +1.09%] index_select random : Elapsed 0.017 ms (1.652 ms / 100) 1.658 -> 1.660 ( +0.12%) [ +0.06% +0.00% +0.06% / +0.12% +1.09% +1.03%] index_select random_sorted : Elapsed 0.017 ms (1.659 ms / 100) 1.661 -> 1.662 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +1.02% +0.72%] index_select perm : Elapsed 0.017 ms (1.661 ms / 100) 1.662 -> 1.662 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.32% +1.14%] index_select perm_sorted : Elapsed 0.017 ms (1.662 ms / 100) out_shape = [5, 20, 16, 40] in_shape = [4, 20, 16, 40] idx_dim = 0 B = [5, 20, 16, 40] (stride (12800, 640, 1, 16)) A = [4, 20, 16, 40] (stride (12800, 16, 1, 320)) dim = 0 5.596 -> 5.550 ( -0.82%) [ +0.09% +0.13% +0.00% / +0.00% -0.68% -0.82%] index_add_ linear : Elapsed 0.056 ms (5.601 ms / 100) 5.543 -> 5.491 ( -0.94%) [ +0.09% +0.00% +0.16% / +0.05% -0.83% -0.94%] index_copy_ linear : Elapsed 0.055 ms (5.548 ms / 100) 5.587 -> 5.544 ( -0.77%) [ +0.11% +0.00% +0.13% / +0.05% -0.66% -0.77%] index_add_ reverse : Elapsed 0.056 ms (5.593 ms / 100) 5.537 -> 5.495 ( -0.76%) [ +0.13% +0.00% +0.36% / +0.11% -0.76% -0.72%] index_copy_ reverse : Elapsed 0.055 ms (5.544 ms / 100) 5.593 -> 5.555 ( -0.68%) [ +0.00% +0.07% +0.05% / +0.16% -0.64% -0.68%] index_add_ spread : Elapsed 0.056 ms (5.593 ms / 100) 5.544 -> 5.493 ( -0.92%) [ +0.07% +0.00% +0.05% / +0.05% -0.85% -0.92%] index_copy_ spread : Elapsed 0.055 ms (5.548 ms / 100) 5.581 -> 5.554 ( -0.48%) [ +0.00% +0.02% +0.02% / +0.11% -0.45% -0.48%] index_add_ strided 3 : Elapsed 0.056 ms (5.581 ms / 100) 5.522 -> 5.503 ( -0.34%) [ +0.05% +0.00% +0.02% / +0.09% -0.31% -0.34%] index_copy_ strided 3 : Elapsed 0.055 ms (5.525 ms / 100) 5.569 -> 5.545 ( -0.43%) [ +0.00% +0.05% +0.11% / +0.04% -0.32% -0.43%] index_add_ perm : Elapsed 0.056 ms (5.569 ms / 100) 5.524 -> 5.493 ( -0.56%) [ +0.00% +0.00% +0.02% / +0.13% -0.42% -0.56%] index_copy_ perm : Elapsed 0.055 ms (5.524 ms / 100) 5.585 -> 5.552 ( -0.59%) [ +0.21% +0.00% +0.18% / +0.14% -0.59% -0.48%] index_add_ perm_sorted : Elapsed 0.056 ms (5.597 ms / 100) 5.536 -> 5.498 ( -0.69%) [ +0.04% +0.00% +0.09% / +0.11% -0.69% -0.67%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.538 ms / 100) 5.797 -> 5.793 ( -0.07%) [ +0.02% +0.00% +0.09% / +0.19% -0.07% +0.00%] index_select const : Elapsed 0.058 ms (5.798 ms / 100) 5.858 -> 5.825 ( -0.56%) [ +0.00% +0.15% +0.36% / +0.12% -0.50% -0.56%] index_select wrap : Elapsed 0.059 ms (5.858 ms / 100) 5.856 -> 5.820 ( -0.61%) [ +0.07% +0.17% +0.00% / +0.10% -0.61% -0.50%] index_select linear : Elapsed 0.059 ms (5.860 ms / 100) 5.858 -> 5.810 ( -0.82%) [ +0.10% +0.00% +0.09% / +0.14% -0.68% -0.82%] index_select reverse : Elapsed 0.059 ms (5.864 ms / 100) 5.794 -> 5.786 ( -0.14%) [ +0.10% +0.00% +0.19% / +0.24% -0.14% -0.03%] index_select skip64 : Elapsed 0.058 ms (5.800 ms / 100) 5.795 -> 5.793 ( -0.03%) [ +0.07% +0.00% +0.03% / +0.17% +0.02% -0.03%] index_select skip256 : Elapsed 0.058 ms (5.799 ms / 100) 5.864 -> 5.816 ( -0.82%) [ +0.07% +0.00% +0.05% / +0.09% -0.82% -0.77%] index_select spread : Elapsed 0.059 ms (5.868 ms / 100) 5.866 -> 5.816 ( -0.85%) [ +0.03% +0.00% +0.02% / +0.02% -0.66% -0.85%] index_select strided 3 : Elapsed 0.059 ms (5.868 ms / 100) 5.838 -> 5.798 ( -0.69%) [ +0.21% +0.00% +0.24% / +0.17% -0.69% -0.63%] index_select random : Elapsed 0.058 ms (5.850 ms / 100) 5.860 -> 5.816 ( -0.75%) [ +0.00% +0.03% +0.27% / +0.14% -0.75% -0.67%] index_select random_sorted : Elapsed 0.059 ms (5.860 ms / 100) B = [5, 20, 16, 40] (stride (12800, 16, 1, 320)) A = [4, 20, 16, 40] (stride (12800, 1, 800, 20)) dim = 0 3.457 -> 3.451 ( -0.17%) [ +0.17% +0.00% +0.12% / -0.03% +0.00% -0.17%] index_add_ linear : Elapsed 0.035 ms (3.463 ms / 100) 3.401 -> 3.397 ( -0.12%) [ +0.24% +0.03% +0.00% / +0.09% +0.03% -0.12%] index_copy_ linear : Elapsed 0.034 ms (3.409 ms / 100) 3.456 -> 3.463 ( +0.20%) [ +0.00% +0.09% +0.23% / +0.20% +0.35% +0.26%] index_add_ reverse : Elapsed 0.035 ms (3.456 ms / 100) 3.410 -> 3.406 ( -0.12%) [ +0.00% +0.09% +0.18% / -0.12% -0.06% +0.15%] index_copy_ reverse : Elapsed 0.034 ms (3.410 ms / 100) 3.462 -> 3.448 ( -0.40%) [ +0.06% +0.12% +0.00% / -0.06% -0.40% -0.32%] index_add_ spread : Elapsed 0.035 ms (3.464 ms / 100) 3.404 -> 3.402 ( -0.06%) [ +0.00% +0.15% +0.26% / +0.03% +0.26% -0.06%] index_copy_ spread : Elapsed 0.034 ms (3.404 ms / 100) 3.473 -> 3.464 ( -0.26%) [ +0.00% +0.03% +0.03% / -0.20% -0.26% +0.00%] index_add_ strided 3 : Elapsed 0.035 ms (3.473 ms / 100) 3.418 -> 3.402 ( -0.47%) [ +0.03% +0.09% +0.00% / +0.15% -0.47% -0.32%] index_copy_ strided 3 : Elapsed 0.034 ms (3.419 ms / 100) 3.458 -> 3.462 ( +0.12%) [ +0.35% +0.32% +0.00% / +0.12% +0.81% +0.84%] index_add_ perm : Elapsed 0.035 ms (3.470 ms / 100) 3.417 -> 3.410 ( -0.20%) [ +0.06% +0.18% +0.00% / +0.18% -0.15% -0.20%] index_copy_ perm : Elapsed 0.034 ms (3.419 ms / 100) 3.460 -> 3.463 ( +0.09%) [ +0.14% +0.00% +0.32% / +0.09% +0.32% +0.14%] index_add_ perm_sorted : Elapsed 0.035 ms (3.465 ms / 100) 3.418 -> 3.412 ( -0.18%) [ +0.03% +0.00% +0.00% / -0.18% -0.06% +0.06%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.419 ms / 100) 3.535 -> 3.534 ( -0.03%) [ +0.06% +0.17% +0.00% / +0.03% +0.06% -0.03%] index_select const : Elapsed 0.035 ms (3.537 ms / 100) 3.560 -> 3.564 ( +0.11%) [ +0.11% +0.00% +0.20% / +0.17% +0.11% +0.25%] index_select wrap : Elapsed 0.036 ms (3.564 ms / 100) 3.563 -> 3.552 ( -0.31%) [ +0.00% +0.06% +0.08% / +0.03% -0.06% -0.31%] index_select linear : Elapsed 0.036 ms (3.563 ms / 100) 3.544 -> 3.524 ( -0.56%) [ +0.00% +0.11% +0.06% / +0.08% -0.37% -0.56%] index_select reverse : Elapsed 0.035 ms (3.544 ms / 100) 3.575 -> 3.528 ( -1.31%) [ +0.08% +0.06% +0.00% / +0.03% -1.15% -1.31%] index_select skip64 : Elapsed 0.036 ms (3.578 ms / 100) 3.563 -> 3.558 ( -0.14%) [ +0.00% +0.22% +0.14% / +0.20% -0.14% -0.08%] index_select skip256 : Elapsed 0.036 ms (3.563 ms / 100) 3.526 -> 3.535 ( +0.26%) [ +0.20% +0.17% +0.00% / +0.28% +0.26% +0.43%] index_select spread : Elapsed 0.035 ms (3.533 ms / 100) 3.532 -> 3.539 ( +0.20%) [ +0.17% +0.11% +0.00% / +0.20% +0.48% +0.54%] index_select strided 3 : Elapsed 0.035 ms (3.538 ms / 100) 3.568 -> 3.539 ( -0.81%) [ +0.00% +0.36% +0.08% / +0.22% -0.64% -0.81%] index_select random : Elapsed 0.036 ms (3.568 ms / 100) 3.564 -> 3.527 ( -1.04%) [ +0.17% +0.00% +0.08% / +0.34% -0.93% -1.04%] index_select random_sorted : Elapsed 0.036 ms (3.570 ms / 100) B = [5, 20, 16, 40] (stride (20, 1, 4000, 100)) A = [4, 20, 16, 40] (stride (12800, 40, 800, 1)) dim = 0 5.625 -> 5.620 ( -0.09%) [ +0.27% +0.00% +0.14% / +0.18% -0.02% -0.09%] index_add_ linear : Elapsed 0.056 ms (5.640 ms / 100) 5.575 -> 5.571 ( -0.07%) [ +0.00% +0.09% +0.16% / +0.16% -0.05% -0.07%] index_copy_ linear : Elapsed 0.056 ms (5.575 ms / 100) 5.635 -> 5.617 ( -0.32%) [ +0.00% +0.00% +0.07% / +0.20% -0.14% -0.32%] index_add_ reverse : Elapsed 0.056 ms (5.635 ms / 100) 5.577 -> 5.564 ( -0.23%) [ +0.00% +0.11% +0.27% / +0.27% -0.23% -0.22%] index_copy_ reverse : Elapsed 0.056 ms (5.577 ms / 100) 5.622 -> 5.619 ( -0.05%) [ +0.16% +0.00% +0.25% / +0.18% +0.00% -0.05%] index_add_ spread : Elapsed 0.056 ms (5.631 ms / 100) 5.565 -> 5.564 ( -0.02%) [ +0.09% +0.00% +0.29% / +0.14% +0.00% -0.02%] index_copy_ spread : Elapsed 0.056 ms (5.570 ms / 100) 5.632 -> 5.621 ( -0.20%) [ +0.02% +0.00% +0.07% / +0.07% -0.20% -0.05%] index_add_ strided 3 : Elapsed 0.056 ms (5.633 ms / 100) 5.574 -> 5.568 ( -0.11%) [ +0.00% +0.02% +0.07% / -0.02% -0.11% -0.04%] index_copy_ strided 3 : Elapsed 0.056 ms (5.574 ms / 100) 5.614 -> 5.613 ( -0.02%) [ +0.07% +0.00% +0.02% / +0.20% +0.12% -0.02%] index_add_ perm : Elapsed 0.056 ms (5.618 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.00% +0.05% +0.18% / +0.13% +0.02% +0.07%] index_copy_ perm : Elapsed 0.056 ms (5.564 ms / 100) 5.630 -> 5.620 ( -0.18%) [ +0.12% +0.00% +0.16% / +0.00% -0.07% -0.18%] index_add_ perm_sorted : Elapsed 0.056 ms (5.637 ms / 100) 5.574 -> 5.565 ( -0.16%) [ +0.00% +0.00% +0.18% / +0.11% -0.16% -0.05%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.574 ms / 100) 5.771 -> 5.775 ( +0.07%) [ +0.00% +0.02% +0.07% / +0.07% +0.50% +0.42%] index_select const : Elapsed 0.058 ms (5.771 ms / 100) 5.894 -> 5.882 ( -0.20%) [ +0.07% +0.00% +0.00% / +0.07% -0.20% -0.17%] index_select wrap : Elapsed 0.059 ms (5.898 ms / 100) 5.870 -> 5.859 ( -0.19%) [ +0.00% +0.03% +0.09% / +0.07% -0.09% -0.19%] index_select linear : Elapsed 0.059 ms (5.870 ms / 100) 5.866 -> 5.855 ( -0.19%) [ +0.00% +0.07% +0.07% / +0.17% -0.17% -0.19%] index_select reverse : Elapsed 0.059 ms (5.866 ms / 100) 5.758 -> 5.759 ( +0.02%) [ +0.02% +0.00% +0.12% / +0.02% +0.38% +0.31%] index_select skip64 : Elapsed 0.058 ms (5.759 ms / 100) 5.771 -> 5.778 ( +0.12%) [ +0.00% +0.07% +0.17% / +0.12% +0.40% +0.43%] index_select skip256 : Elapsed 0.058 ms (5.771 ms / 100) 5.866 -> 5.861 ( -0.09%) [ +0.00% +0.00% +0.02% / -0.09% -0.09% -0.09%] index_select spread : Elapsed 0.059 ms (5.866 ms / 100) 5.872 -> 5.858 ( -0.24%) [ +0.10% +0.00% +0.12% / +0.22% -0.03% -0.24%] index_select strided 3 : Elapsed 0.059 ms (5.878 ms / 100) 5.839 -> 5.829 ( -0.17%) [ +0.00% +0.02% +0.07% / +0.05% -0.17% +0.03%] index_select random : Elapsed 0.058 ms (5.839 ms / 100) 5.865 -> 5.838 ( -0.46%) [ +0.07% +0.00% +0.12% / +0.12% -0.24% -0.46%] index_select random_sorted : Elapsed 0.059 ms (5.869 ms / 100) B = [5, 20, 16, 40] (stride (320, 16, 1, 1600)) A = [4, 20, 16, 40] (stride (1, 64, 4, 1280)) dim = 0 3.554 -> 3.563 ( +0.25%) [ +0.28% +0.23% +0.00% / +0.25% +1.07% +0.93%] index_add_ linear : Elapsed 0.036 ms (3.564 ms / 100) 3.528 -> 3.527 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.60% +0.60%] index_copy_ linear : Elapsed 0.035 ms (3.528 ms / 100) 3.573 -> 3.571 ( -0.06%) [ +0.22% +0.20% +0.00% / +0.31% +0.08% -0.06%] index_add_ reverse : Elapsed 0.036 ms (3.581 ms / 100) 3.536 -> 3.538 ( +0.06%) [ +0.00% +0.14% +0.03% / +0.06% +0.51% +0.57%] index_copy_ reverse : Elapsed 0.035 ms (3.536 ms / 100) 3.570 -> 3.569 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +1.01% +0.84%] index_add_ spread : Elapsed 0.036 ms (3.571 ms / 100) 3.536 -> 3.539 ( +0.08%) [ +0.00% +0.14% +0.14% / +0.08% +0.93% +0.76%] index_copy_ spread : Elapsed 0.035 ms (3.536 ms / 100) 3.555 -> 3.565 ( +0.28%) [ +0.25% +0.14% +0.00% / +0.28% +0.84% +0.90%] index_add_ strided 3 : Elapsed 0.036 ms (3.564 ms / 100) 3.532 -> 3.536 ( +0.11%) [ +0.00% +0.31% +0.00% / +0.11% +0.68% +0.65%] index_copy_ strided 3 : Elapsed 0.035 ms (3.532 ms / 100) 3.565 -> 3.570 ( +0.14%) [ +0.00% +0.22% +0.08% / +0.14% +0.62% +0.45%] index_add_ perm : Elapsed 0.036 ms (3.565 ms / 100) 3.534 -> 3.534 ( +0.00%) [ +0.17% +0.06% +0.00% / +0.00% +0.34% +0.40%] index_copy_ perm : Elapsed 0.035 ms (3.540 ms / 100) 3.560 -> 3.562 ( +0.06%) [ +0.14% +0.00% +0.03% / +0.06% +1.10% +1.04%] index_add_ perm_sorted : Elapsed 0.036 ms (3.565 ms / 100) 3.544 -> 3.552 ( +0.23%) [ +0.11% +0.23% +0.00% / +0.23% +0.65% +0.73%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.548 ms / 100) 3.682 -> 3.685 ( +0.08%) [ +0.05% +0.00% +0.03% / +0.08% +0.81% +0.84%] index_select const : Elapsed 0.037 ms (3.684 ms / 100) 3.700 -> 3.709 ( +0.24%) [ +0.27% +0.08% +0.00% / +0.24% +0.92% +0.86%] index_select wrap : Elapsed 0.037 ms (3.710 ms / 100) 3.706 -> 3.707 ( +0.03%) [ +0.05% +0.00% +0.13% / +0.03% +0.73% +0.67%] index_select linear : Elapsed 0.037 ms (3.708 ms / 100) 3.716 -> 3.712 ( -0.11%) [ +0.11% +0.00% +0.03% / -0.11% +0.13% +0.13%] index_select reverse : Elapsed 0.037 ms (3.720 ms / 100) 3.713 -> 3.719 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.38% +0.16%] index_select skip64 : Elapsed 0.037 ms (3.719 ms / 100) 3.705 -> 3.702 ( -0.08%) [ +0.13% +0.03% +0.00% / -0.08% +0.40% +0.32%] index_select skip256 : Elapsed 0.037 ms (3.710 ms / 100) 3.689 -> 3.690 ( +0.03%) [ +0.08% +0.05% +0.00% / +0.03% +0.95% +0.76%] index_select spread : Elapsed 0.037 ms (3.692 ms / 100) 3.689 -> 3.690 ( +0.03%) [ +0.16% +0.08% +0.00% / +0.03% +0.70% +0.92%] index_select strided 3 : Elapsed 0.037 ms (3.695 ms / 100) 3.716 -> 3.718 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.38% +0.11%] index_select random : Elapsed 0.037 ms (3.716 ms / 100) 3.709 -> 3.720 ( +0.30%) [ +0.22% +0.16% +0.00% / +0.30% +0.49% +0.49%] index_select random_sorted : Elapsed 0.037 ms (3.717 ms / 100) B = [5, 20, 16, 40] (stride (1, 80, 5, 1600)) A = [4, 20, 16, 40] (stride (1, 2560, 160, 4)) dim = 0 5.315 -> 5.307 ( -0.15%) [ +0.11% +0.00% +0.02% / +0.09% -0.11% -0.15%] index_add_ linear : Elapsed 0.053 ms (5.321 ms / 100) 5.267 -> 5.253 ( -0.27%) [ +0.06% +0.00% +0.17% / +0.02% -0.23% -0.27%] index_copy_ linear : Elapsed 0.053 ms (5.270 ms / 100) 5.314 -> 5.305 ( -0.17%) [ +0.02% +0.00% +0.02% / +0.02% -0.09% -0.17%] index_add_ reverse : Elapsed 0.053 ms (5.315 ms / 100) 5.265 -> 5.252 ( -0.25%) [ +0.08% +0.00% +0.15% / +0.19% -0.25% -0.09%] index_copy_ reverse : Elapsed 0.053 ms (5.269 ms / 100) 5.316 -> 5.303 ( -0.24%) [ +0.02% +0.00% +0.08% / +0.11% -0.24% -0.24%] index_add_ spread : Elapsed 0.053 ms (5.317 ms / 100) 5.266 -> 5.247 ( -0.36%) [ +0.02% +0.00% +0.21% / +0.00% -0.36% -0.27%] index_copy_ spread : Elapsed 0.053 ms (5.267 ms / 100) 5.321 -> 5.309 ( -0.23%) [ +0.04% +0.00% +0.04% / -0.04% -0.19% -0.23%] index_add_ strided 3 : Elapsed 0.053 ms (5.323 ms / 100) 5.275 -> 5.251 ( -0.45%) [ +0.00% +0.00% +0.00% / -0.06% -0.45% -0.34%] index_copy_ strided 3 : Elapsed 0.053 ms (5.275 ms / 100) 5.308 -> 5.302 ( -0.11%) [ +0.06% +0.21% +0.00% / +0.17% -0.04% -0.11%] index_add_ perm : Elapsed 0.053 ms (5.311 ms / 100) 5.265 -> 5.247 ( -0.34%) [ +0.02% +0.00% +0.00% / +0.04% -0.32% -0.34%] index_copy_ perm : Elapsed 0.053 ms (5.266 ms / 100) 5.317 -> 5.304 ( -0.24%) [ +0.02% +0.06% +0.00% / -0.02% -0.15% -0.24%] index_add_ perm_sorted : Elapsed 0.053 ms (5.318 ms / 100) 5.265 -> 5.247 ( -0.34%) [ +0.06% +0.00% +0.21% / +0.09% -0.34% -0.25%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.268 ms / 100) 5.406 -> 5.398 ( -0.15%) [ +0.17% +0.00% +0.13% / +0.26% -0.13% -0.15%] index_select const : Elapsed 0.054 ms (5.415 ms / 100) 5.410 -> 5.396 ( -0.26%) [ +0.00% +0.06% +0.17% / +0.00% -0.26% -0.26%] index_select wrap : Elapsed 0.054 ms (5.410 ms / 100) 5.409 -> 5.395 ( -0.26%) [ +0.17% +0.00% +0.09% / +0.04% -0.26% -0.17%] index_select linear : Elapsed 0.054 ms (5.418 ms / 100) 5.416 -> 5.397 ( -0.35%) [ +0.00% +0.00% +0.17% / +0.11% -0.31% -0.35%] index_select reverse : Elapsed 0.054 ms (5.416 ms / 100) 5.405 -> 5.394 ( -0.20%) [ +0.00% +0.09% +0.17% / +0.26% -0.11% -0.20%] index_select skip64 : Elapsed 0.054 ms (5.405 ms / 100) 5.413 -> 5.388 ( -0.46%) [ +0.06% +0.00% +0.09% / +0.09% -0.46% -0.26%] index_select skip256 : Elapsed 0.054 ms (5.416 ms / 100) 5.408 -> 5.395 ( -0.24%) [ +0.00% +0.02% +0.11% / +0.13% -0.24% -0.20%] index_select spread : Elapsed 0.054 ms (5.408 ms / 100) 5.414 -> 5.401 ( -0.24%) [ +0.00% +0.02% +0.04% / +0.11% -0.13% -0.24%] index_select strided 3 : Elapsed 0.054 ms (5.414 ms / 100) 5.410 -> 5.392 ( -0.33%) [ +0.00% +0.09% +0.04% / +0.09% -0.28% -0.33%] index_select random : Elapsed 0.054 ms (5.410 ms / 100) 5.412 -> 5.397 ( -0.28%) [ +0.00% +0.11% +0.13% / +0.13% -0.28% -0.28%] index_select random_sorted : Elapsed 0.054 ms (5.412 ms / 100) B = [5, 20, 16, 40] (stride (1, 80, 5, 1600)) A = [4, 20, 16, 40] (stride (20, 1, 80, 1280)) dim = 0 5.893 -> 5.898 ( +0.08%) [ +0.00% +0.02% +0.12% / +0.14% +0.19% +0.08%] index_add_ linear : Elapsed 0.059 ms (5.893 ms / 100) 5.877 -> 5.862 ( -0.26%) [ +0.00% +0.07% +0.10% / +0.05% -0.26% -0.17%] index_copy_ linear : Elapsed 0.059 ms (5.877 ms / 100) 5.894 -> 5.902 ( +0.14%) [ +0.08% +0.00% +0.27% / +0.14% +0.15% +0.24%] index_add_ reverse : Elapsed 0.059 ms (5.899 ms / 100) 5.873 -> 5.865 ( -0.14%) [ +0.02% +0.00% +0.09% / +0.29% +0.05% -0.14%] index_copy_ reverse : Elapsed 0.059 ms (5.874 ms / 100) 5.899 -> 5.899 ( +0.00%) [ +0.03% +0.00% +0.10% / +0.08% +0.05% +0.00%] index_add_ spread : Elapsed 0.059 ms (5.901 ms / 100) 5.876 -> 5.866 ( -0.17%) [ +0.00% +0.07% +0.29% / +0.20% -0.15% -0.17%] index_copy_ spread : Elapsed 0.059 ms (5.876 ms / 100) 5.897 -> 5.897 ( +0.00%) [ +0.10% +0.00% +0.10% / +0.22% +0.10% +0.00%] index_add_ strided 3 : Elapsed 0.059 ms (5.903 ms / 100) 5.874 -> 5.867 ( -0.12%) [ +0.00% +0.07% +0.24% / +0.15% -0.12% -0.12%] index_copy_ strided 3 : Elapsed 0.059 ms (5.874 ms / 100) 5.904 -> 5.900 ( -0.07%) [ +0.02% +0.00% +0.07% / -0.02% -0.07% -0.03%] index_add_ perm : Elapsed 0.059 ms (5.905 ms / 100) 5.874 -> 5.863 ( -0.19%) [ +0.05% +0.00% +0.22% / +0.22% -0.19% -0.10%] index_copy_ perm : Elapsed 0.059 ms (5.877 ms / 100) 5.893 -> 5.902 ( +0.15%) [ +0.05% +0.00% +0.07% / +0.15% +0.19% +0.17%] index_add_ perm_sorted : Elapsed 0.059 ms (5.896 ms / 100) 5.876 -> 5.868 ( -0.14%) [ +0.10% +0.00% +0.05% / +0.20% -0.14% -0.14%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.882 ms / 100) 6.064 -> 6.059 ( -0.08%) [ +0.00% +0.05% +0.10% / +0.23% -0.08% -0.02%] index_select const : Elapsed 0.061 ms (6.064 ms / 100) 6.142 -> 6.137 ( -0.08%) [ +0.00% +0.11% +0.34% / +0.29% -0.08% +0.05%] index_select wrap : Elapsed 0.061 ms (6.142 ms / 100) 6.139 -> 6.128 ( -0.18%) [ +0.00% +0.02% +0.13% / +0.18% -0.18% -0.13%] index_select linear : Elapsed 0.061 ms (6.139 ms / 100) 6.128 -> 6.124 ( -0.07%) [ +0.10% +0.00% +0.23% / +0.11% +0.03% -0.07%] index_select reverse : Elapsed 0.061 ms (6.134 ms / 100) 6.062 -> 6.058 ( -0.07%) [ +0.00% +0.13% +0.28% / +0.20% -0.07% +0.02%] index_select skip64 : Elapsed 0.061 ms (6.062 ms / 100) 6.062 -> 6.057 ( -0.08%) [ +0.10% +0.00% +0.36% / +0.15% -0.08% +0.02%] index_select skip256 : Elapsed 0.061 ms (6.068 ms / 100) 6.128 -> 6.117 ( -0.18%) [ +0.00% +0.16% +0.20% / +0.36% -0.08% -0.18%] index_select spread : Elapsed 0.061 ms (6.128 ms / 100) 6.145 -> 6.131 ( -0.23%) [ +0.00% +0.05% +0.08% / +0.34% -0.08% -0.23%] index_select strided 3 : Elapsed 0.061 ms (6.145 ms / 100) 6.143 -> 6.141 ( -0.03%) [ +0.11% +0.00% +0.16% / +0.10% -0.03% +0.00%] index_select random : Elapsed 0.061 ms (6.150 ms / 100) 6.115 -> 6.112 ( -0.05%) [ +0.00% +0.02% +0.02% / +0.02% -0.05% -0.02%] index_select random_sorted : Elapsed 0.061 ms (6.115 ms / 100) out_shape = [4, 5, 16, 40] in_shape = [4, 20, 16, 40] idx_dim = 1 B = [4, 5, 16, 40] (stride (3200, 40, 200, 1)) dim = 1 fill_cnt = 20 3.509 -> 3.489 ( -0.57%) [ +0.11% +0.00% +0.00% / -0.46% -0.51% -0.57%] index_fill_ const : Elapsed 0.035 ms (3.513 ms / 100) 3.525 -> 3.500 ( -0.71%) [ +0.00% +0.03% +0.03% / -0.45% -0.40% -0.71%] index_fill_ linear : Elapsed 0.035 ms (3.525 ms / 100) 3.511 -> 3.495 ( -0.46%) [ +0.00% +0.17% +0.23% / -0.43% -0.40% -0.46%] index_fill_ reverse : Elapsed 0.035 ms (3.511 ms / 100) 3.518 -> 3.499 ( -0.54%) [ +0.09% +0.00% +0.06% / -0.45% -0.54% -0.31%] index_fill_ skip64 : Elapsed 0.035 ms (3.521 ms / 100) 3.522 -> 3.496 ( -0.74%) [ +0.03% +0.00% +0.06% / -0.54% -0.74% -0.62%] index_fill_ skip256 : Elapsed 0.035 ms (3.523 ms / 100) 3.510 -> 3.497 ( -0.37%) [ +0.31% +0.17% +0.00% / -0.37% -0.14% -0.28%] index_fill_ spread : Elapsed 0.035 ms (3.521 ms / 100) 3.521 -> 3.501 ( -0.57%) [ +0.06% +0.00% +0.11% / -0.57% -0.43% -0.45%] index_fill_ strided 3 : Elapsed 0.035 ms (3.523 ms / 100) 3.529 -> 3.514 ( -0.43%) [ +0.09% +0.11% +0.00% / -0.43% -0.31% -0.28%] index_fill_ random : Elapsed 0.035 ms (3.532 ms / 100) 3.523 -> 3.508 ( -0.43%) [ +0.06% +0.00% +0.11% / -0.43% -0.23% -0.37%] index_fill_ random_sorted : Elapsed 0.035 ms (3.525 ms / 100) B = [4, 5, 16, 40] (stride (1, 2560, 160, 4)) A = [4, 20, 16, 40] (stride (800, 1, 3200, 20)) dim = 1 1.749 -> 1.758 ( +0.51%) [ +0.00% +0.06% +0.34% / +0.51% +0.69% +0.69%] index_select const : Elapsed 0.017 ms (1.749 ms / 100) 1.755 -> 1.755 ( +0.00%) [ +0.11% +0.23% +0.00% / +0.00% +0.85% +1.14%] index_select wrap : Elapsed 0.018 ms (1.757 ms / 100) 1.761 -> 1.764 ( +0.17%) [ +0.06% +0.00% +0.17% / +0.17% +0.68% +0.57%] index_select linear : Elapsed 0.018 ms (1.762 ms / 100) 1.760 -> 1.759 ( -0.06%) [ +0.06% +0.00% +0.06% / -0.06% +0.68% +0.62%] index_select reverse : Elapsed 0.018 ms (1.761 ms / 100) 1.751 -> 1.750 ( -0.06%) [ +0.00% +0.11% +0.17% / -0.06% +0.46% +1.03%] index_select skip64 : Elapsed 0.018 ms (1.751 ms / 100) 1.757 -> 1.755 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% +0.40% +0.57%] index_select skip256 : Elapsed 0.018 ms (1.757 ms / 100) 1.776 -> 1.781 ( +0.28%) [ +0.28% +0.00% +0.06% / +0.28% +0.96% +1.24%] index_select spread : Elapsed 0.018 ms (1.781 ms / 100) 1.779 -> 1.779 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.96% +0.84%] index_select strided 3 : Elapsed 0.018 ms (1.780 ms / 100) 1.782 -> 1.782 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.90% +0.84%] index_select strided 5 : Elapsed 0.018 ms (1.782 ms / 100) 1.770 -> 1.768 ( -0.11%) [ +0.06% +0.00% +0.11% / -0.11% +1.07% +0.96%] index_select strided 7 : Elapsed 0.018 ms (1.771 ms / 100) 1.770 -> 1.773 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.96% +0.90%] index_select strided 8 : Elapsed 0.018 ms (1.773 ms / 100) 1.784 -> 1.784 ( +0.00%) [ +0.00% +0.06% +0.17% / +0.00% +0.95% +1.01%] index_select strided 16 : Elapsed 0.018 ms (1.784 ms / 100) 1.770 -> 1.770 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.96% +0.73%] index_select random : Elapsed 0.018 ms (1.771 ms / 100) 1.770 -> 1.773 ( +0.17%) [ +0.00% +0.23% +0.11% / +0.17% +1.02% +0.68%] index_select random_sorted : Elapsed 0.018 ms (1.770 ms / 100) 1.769 -> 1.770 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +1.07% +0.90%] index_select perm : Elapsed 0.018 ms (1.770 ms / 100) 1.776 -> 1.777 ( +0.06%) [ +0.11% +0.23% +0.00% / +0.06% +1.01% +1.13%] index_select perm_sorted : Elapsed 0.018 ms (1.778 ms / 100) B = [4, 5, 16, 40] (stride (1, 160, 800, 4)) A = [4, 20, 16, 40] (stride (320, 16, 1, 1280)) dim = 1 1.947 -> 1.940 ( -0.36%) [ +0.21% +0.15% +0.00% / +0.05% -0.26% -0.36%] index_select const : Elapsed 0.020 ms (1.951 ms / 100) 1.911 -> 1.910 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.26% +0.47%] index_select wrap : Elapsed 0.019 ms (1.911 ms / 100) 1.911 -> 1.912 ( +0.05%) [ +0.16% +0.00% +0.00% / +0.05% +0.52% +0.47%] index_select linear : Elapsed 0.019 ms (1.914 ms / 100) 1.910 -> 1.910 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select reverse : Elapsed 0.019 ms (1.912 ms / 100) 1.943 -> 1.944 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.26% +0.26%] index_select skip64 : Elapsed 0.019 ms (1.943 ms / 100) 1.941 -> 1.942 ( +0.05%) [ +0.15% +0.00% +0.00% / +0.05% +0.05% +0.05%] index_select skip256 : Elapsed 0.019 ms (1.944 ms / 100) 1.908 -> 1.909 ( +0.05%) [ +0.10% +0.00% +0.05% / +0.05% +0.37% +0.21%] index_select spread : Elapsed 0.019 ms (1.910 ms / 100) 1.904 -> 1.906 ( +0.11%) [ +0.11% +0.00% +0.16% / +0.11% +0.42% +0.16%] index_select strided 3 : Elapsed 0.019 ms (1.906 ms / 100) 1.902 -> 1.907 ( +0.26%) [ +0.00% +0.16% +0.21% / +0.26% +0.32% +0.53%] index_select strided 5 : Elapsed 0.019 ms (1.902 ms / 100) 1.904 -> 1.906 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +0.26% +0.42%] index_select strided 7 : Elapsed 0.019 ms (1.906 ms / 100) 1.904 -> 1.906 ( +0.11%) [ +0.21% +0.00% +0.05% / +0.11% +0.16% +0.37%] index_select strided 8 : Elapsed 0.019 ms (1.908 ms / 100) 1.900 -> 1.904 ( +0.21%) [ +0.32% +0.26% +0.00% / +0.21% +0.26% +0.32%] index_select strided 16 : Elapsed 0.019 ms (1.906 ms / 100) 1.907 -> 1.909 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.42% +0.47%] index_select random : Elapsed 0.019 ms (1.907 ms / 100) 1.905 -> 1.906 ( +0.05%) [ +0.21% +0.16% +0.00% / +0.05% +0.52% +0.52%] index_select random_sorted : Elapsed 0.019 ms (1.909 ms / 100) 1.904 -> 1.904 ( +0.00%) [ +0.21% +0.05% +0.00% / +0.00% +0.21% +0.47%] index_select perm : Elapsed 0.019 ms (1.908 ms / 100) 1.909 -> 1.912 ( +0.16%) [ +0.05% +0.00% +0.10% / +0.21% +0.16% +0.26%] index_select perm_sorted : Elapsed 0.019 ms (1.910 ms / 100) B = [4, 5, 16, 40] (stride (80, 16, 1, 320)) A = [4, 20, 16, 40] (stride (320, 1, 20, 1280)) dim = 1 1.932 -> 1.934 ( +0.10%) [ +0.26% +0.00% +0.26% / +0.10% +0.67% +0.72%] index_select const : Elapsed 0.019 ms (1.937 ms / 100) 1.941 -> 1.941 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.77% +0.82%] index_select wrap : Elapsed 0.019 ms (1.942 ms / 100) 1.944 -> 1.942 ( -0.10%) [ +0.15% +0.00% +0.10% / -0.10% +0.51% +0.57%] index_select linear : Elapsed 0.019 ms (1.947 ms / 100) 1.943 -> 1.943 ( +0.00%) [ +0.26% +0.00% +0.31% / +0.00% +0.67% +0.31%] index_select reverse : Elapsed 0.019 ms (1.948 ms / 100) 1.935 -> 1.932 ( -0.16%) [ +0.05% +0.00% +0.10% / -0.16% +0.52% +0.21%] index_select skip64 : Elapsed 0.019 ms (1.936 ms / 100) 1.932 -> 1.930 ( -0.10%) [ +0.21% +0.00% +0.10% / -0.10% +0.67% +0.57%] index_select skip256 : Elapsed 0.019 ms (1.936 ms / 100) 1.957 -> 1.958 ( +0.05%) [ +0.20% +0.15% +0.00% / +0.05% +0.61% +0.61%] index_select spread : Elapsed 0.020 ms (1.961 ms / 100) 1.959 -> 1.961 ( +0.10%) [ +0.20% +0.00% +0.15% / +0.10% +0.56% +0.31%] index_select strided 3 : Elapsed 0.020 ms (1.963 ms / 100) 1.957 -> 1.960 ( +0.15%) [ +0.05% +0.00% +0.10% / +0.15% +0.36% +0.51%] index_select strided 5 : Elapsed 0.020 ms (1.958 ms / 100) 1.952 -> 1.950 ( -0.10%) [ +0.05% +0.00% +0.00% / -0.10% +0.46% +0.51%] index_select strided 7 : Elapsed 0.020 ms (1.953 ms / 100) 1.949 -> 1.951 ( +0.10%) [ +0.10% +0.15% +0.00% / +0.10% +0.67% +0.67%] index_select strided 8 : Elapsed 0.020 ms (1.951 ms / 100) 1.955 -> 1.955 ( +0.00%) [ +0.10% +0.00% +0.10% / +0.00% +0.61% +0.82%] index_select strided 16 : Elapsed 0.020 ms (1.957 ms / 100) 1.952 -> 1.956 ( +0.20%) [ +0.05% +0.10% +0.00% / +0.20% +0.36% +0.41%] index_select random : Elapsed 0.020 ms (1.953 ms / 100) 1.947 -> 1.948 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.72% +0.36%] index_select random_sorted : Elapsed 0.019 ms (1.947 ms / 100) 1.950 -> 1.951 ( +0.05%) [ +0.15% +0.00% +0.05% / +0.05% +0.72% +0.72%] index_select perm : Elapsed 0.020 ms (1.953 ms / 100) 1.958 -> 1.964 ( +0.31%) [ +0.20% +0.00% +0.00% / +0.31% +0.56% +0.61%] index_select perm_sorted : Elapsed 0.020 ms (1.962 ms / 100) B = [4, 5, 16, 40] (stride (5, 1, 20, 320)) A = [4, 20, 16, 40] (stride (1, 160, 3200, 4)) dim = 1 1.783 -> 1.784 ( +0.06%) [ +0.39% +0.00% +0.28% / +0.28% +0.50% +0.06%] index_select const : Elapsed 0.018 ms (1.790 ms / 100) 1.799 -> 1.792 ( -0.39%) [ +0.06% +0.00% +0.06% / -0.06% -0.33% -0.39%] index_select wrap : Elapsed 0.018 ms (1.800 ms / 100) 1.798 -> 1.787 ( -0.61%) [ +0.11% +0.39% +0.00% / +0.22% -0.44% -0.61%] index_select linear : Elapsed 0.018 ms (1.800 ms / 100) 1.799 -> 1.787 ( -0.67%) [ +0.11% +0.00% +0.00% / +0.33% -0.67% -0.50%] index_select reverse : Elapsed 0.018 ms (1.801 ms / 100) 1.783 -> 1.783 ( +0.00%) [ +0.28% +0.00% +0.11% / +0.00% +0.39% +0.22%] index_select skip64 : Elapsed 0.018 ms (1.788 ms / 100) 1.784 -> 1.786 ( +0.11%) [ +0.00% +0.17% +0.06% / +0.11% +0.17% +0.28%] index_select skip256 : Elapsed 0.018 ms (1.784 ms / 100) 1.787 -> 1.790 ( +0.17%) [ +0.11% +0.00% +0.00% / +0.17% +0.22% +0.28%] index_select spread : Elapsed 0.018 ms (1.789 ms / 100) 1.789 -> 1.794 ( +0.28%) [ +0.17% +0.00% +0.06% / +0.28% +0.61% +0.50%] index_select strided 3 : Elapsed 0.018 ms (1.792 ms / 100) 1.788 -> 1.791 ( +0.17%) [ +0.00% +0.17% +0.17% / +0.22% +0.17% +0.28%] index_select strided 5 : Elapsed 0.018 ms (1.788 ms / 100) 1.793 -> 1.790 ( -0.17%) [ +0.06% +0.00% +0.17% / -0.06% +0.00% -0.17%] index_select strided 7 : Elapsed 0.018 ms (1.794 ms / 100) 1.786 -> 1.789 ( +0.17%) [ +0.11% +0.11% +0.00% / +0.17% +0.34% +0.39%] index_select strided 8 : Elapsed 0.018 ms (1.788 ms / 100) 1.786 -> 1.788 ( +0.11%) [ +0.22% +0.00% +0.06% / +0.28% +0.11% +0.28%] index_select strided 16 : Elapsed 0.018 ms (1.790 ms / 100) 1.789 -> 1.788 ( -0.06%) [ +0.28% +0.56% +0.00% / +0.28% -0.06% -0.06%] index_select random : Elapsed 0.018 ms (1.794 ms / 100) 1.793 -> 1.789 ( -0.22%) [ +0.00% +0.11% +0.22% / +0.11% -0.11% -0.22%] index_select random_sorted : Elapsed 0.018 ms (1.793 ms / 100) 1.790 -> 1.791 ( +0.06%) [ +0.11% +0.00% +0.17% / +0.11% +0.28% +0.06%] index_select perm : Elapsed 0.018 ms (1.792 ms / 100) 1.791 -> 1.791 ( +0.00%) [ +0.06% +0.00% +0.17% / +0.34% +0.00% +0.00%] index_select perm_sorted : Elapsed 0.018 ms (1.792 ms / 100) out_shape = [4, 20, 5, 40] in_shape = [4, 20, 16, 40] idx_dim = 2 B = [4, 20, 5, 40] (stride (4000, 200, 40, 1)) A = [4, 20, 16, 40] (stride (40, 2560, 160, 1)) dim = 2 2.104 -> 2.112 ( +0.38%) [ +0.00% +0.29% +0.29% / +0.38% +0.43% +0.43%] index_select const : Elapsed 0.021 ms (2.104 ms / 100) 2.175 -> 2.179 ( +0.18%) [ +0.00% +0.18% +0.28% / +0.37% +0.18% +0.78%] index_select wrap : Elapsed 0.022 ms (2.175 ms / 100) 2.190 -> 2.191 ( +0.05%) [ +0.05% +0.09% +0.00% / +0.32% +0.05% +0.18%] index_select linear : Elapsed 0.022 ms (2.191 ms / 100) 2.172 -> 2.174 ( +0.09%) [ +0.46% +0.23% +0.00% / +0.09% +0.51% +0.74%] index_select reverse : Elapsed 0.022 ms (2.182 ms / 100) 2.101 -> 2.103 ( +0.10%) [ +0.10% +0.00% +0.14% / +0.10% +0.43% +0.24%] index_select skip64 : Elapsed 0.021 ms (2.103 ms / 100) 2.105 -> 2.106 ( +0.05%) [ +0.10% +0.14% +0.00% / +0.05% +0.14% +0.81%] index_select skip256 : Elapsed 0.021 ms (2.107 ms / 100) 2.178 -> 2.172 ( -0.28%) [ +0.09% +0.00% +0.00% / -0.28% +0.00% +0.32%] index_select spread : Elapsed 0.022 ms (2.180 ms / 100) 2.185 -> 2.190 ( +0.23%) [ +0.23% +0.18% +0.00% / +0.23% +0.27% +0.27%] index_select strided 3 : Elapsed 0.022 ms (2.190 ms / 100) 2.185 -> 2.184 ( -0.05%) [ +0.05% +0.00% +0.09% / -0.05% +0.23% +0.32%] index_select strided 5 : Elapsed 0.022 ms (2.186 ms / 100) 2.179 -> 2.185 ( +0.28%) [ +0.23% +0.00% +0.28% / +0.28% +0.55% +0.37%] index_select strided 7 : Elapsed 0.022 ms (2.184 ms / 100) 2.122 -> 2.119 ( -0.14%) [ +0.19% +0.05% +0.00% / -0.14% +0.09% +0.19%] index_select strided 8 : Elapsed 0.021 ms (2.126 ms / 100) 2.155 -> 2.152 ( -0.14%) [ +0.19% +0.00% +0.09% / -0.14% +0.42% +0.42%] index_select random : Elapsed 0.022 ms (2.159 ms / 100) 2.160 -> 2.160 ( +0.00%) [ +0.00% +0.09% +0.05% / +0.00% +0.46% +0.65%] index_select random_sorted : Elapsed 0.022 ms (2.160 ms / 100) 2.185 -> 2.182 ( -0.14%) [ +0.00% +0.14% +0.05% / -0.14% +0.05% +0.18%] index_select perm : Elapsed 0.022 ms (2.185 ms / 100) 2.172 -> 2.174 ( +0.09%) [ +0.00% +0.23% +0.18% / +0.09% +0.46% +0.55%] index_select perm_sorted : Elapsed 0.022 ms (2.172 ms / 100) B = [4, 20, 5, 40] (stride (4000, 200, 1, 5)) A = [4, 20, 16, 40] (stride (12800, 1, 800, 20)) dim = 2 2.255 -> 2.259 ( +0.18%) [ +0.27% +0.00% +0.18% / +0.18% +0.49% +0.40%] index_select const : Elapsed 0.023 ms (2.261 ms / 100) 2.254 -> 2.260 ( +0.27%) [ +0.00% +0.04% +0.13% / +0.31% +0.27% +0.40%] index_select wrap : Elapsed 0.023 ms (2.254 ms / 100) 2.249 -> 2.251 ( +0.09%) [ +0.00% +0.04% +0.04% / +0.09% +0.27% +0.18%] index_select linear : Elapsed 0.022 ms (2.249 ms / 100) 2.245 -> 2.243 ( -0.09%) [ +0.04% +0.00% +0.00% / +0.04% -0.09% -0.09%] index_select reverse : Elapsed 0.022 ms (2.246 ms / 100) 2.256 -> 2.259 ( +0.13%) [ +0.00% +0.04% +0.04% / +0.13% +0.35% +0.40%] index_select skip64 : Elapsed 0.023 ms (2.256 ms / 100) 2.260 -> 2.262 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.18% +0.18% +0.09%] index_select skip256 : Elapsed 0.023 ms (2.260 ms / 100) 2.245 -> 2.242 ( -0.13%) [ +0.18% +0.00% +0.22% / +0.00% -0.13% +0.22%] index_select spread : Elapsed 0.022 ms (2.249 ms / 100) 2.249 -> 2.254 ( +0.22%) [ +0.09% +0.27% +0.00% / +0.22% +0.53% +0.31%] index_select strided 3 : Elapsed 0.023 ms (2.251 ms / 100) 2.252 -> 2.254 ( +0.09%) [ +0.22% +0.00% +0.27% / +0.27% +0.09% +0.40%] index_select strided 5 : Elapsed 0.023 ms (2.257 ms / 100) 2.242 -> 2.243 ( +0.04%) [ +0.00% +0.00% +0.09% / +0.04% +0.31% +0.31%] index_select strided 7 : Elapsed 0.022 ms (2.242 ms / 100) 2.255 -> 2.260 ( +0.22%) [ +0.31% +0.00% +0.22% / +0.22% +0.44% +0.58%] index_select strided 8 : Elapsed 0.023 ms (2.262 ms / 100) 2.256 -> 2.252 ( -0.18%) [ +0.00% +0.13% +0.09% / -0.18% -0.04% -0.13%] index_select random : Elapsed 0.023 ms (2.256 ms / 100) 2.242 -> 2.250 ( +0.36%) [ +0.31% +0.54% +0.00% / +0.40% +0.36% +0.54%] index_select random_sorted : Elapsed 0.022 ms (2.249 ms / 100) 2.246 -> 2.248 ( +0.09%) [ +0.18% +0.00% +0.18% / +0.09% +0.45% +0.22%] index_select perm : Elapsed 0.023 ms (2.250 ms / 100) 2.251 -> 2.249 ( -0.09%) [ +0.04% +0.00% +0.09% / -0.09% +0.27% +0.22%] index_select perm_sorted : Elapsed 0.023 ms (2.252 ms / 100) B = [4, 20, 5, 40] (stride (200, 800, 40, 1)) A = [4, 20, 16, 40] (stride (1, 4, 3200, 80)) dim = 2 2.449 -> 2.455 ( +0.24%) [ +0.00% +0.16% +0.12% / +0.24% +0.78% +0.94%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.00% +0.08% +0.16% / +0.12% +0.57% +0.78%] index_select wrap : Elapsed 0.024 ms (2.448 ms / 100) 2.444 -> 2.447 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.53% +0.70%] index_select linear : Elapsed 0.024 ms (2.444 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.08% +0.04% +0.12%] index_select reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.450 -> 2.455 ( +0.20%) [ +0.00% +0.20% +0.08% / +0.20% +0.61% +0.69%] index_select skip64 : Elapsed 0.024 ms (2.450 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.61% +0.61%] index_select skip256 : Elapsed 0.025 ms (2.454 ms / 100) 2.444 -> 2.444 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.45% +0.61%] index_select spread : Elapsed 0.024 ms (2.447 ms / 100) 2.440 -> 2.442 ( +0.08%) [ +0.12% +0.16% +0.00% / +0.08% +0.78% +0.78%] index_select strided 3 : Elapsed 0.024 ms (2.443 ms / 100) 2.437 -> 2.443 ( +0.25%) [ +0.12% +0.25% +0.00% / +0.25% +0.53% +0.49%] index_select strided 5 : Elapsed 0.024 ms (2.440 ms / 100) 2.446 -> 2.447 ( +0.04%) [ +0.20% +0.04% +0.00% / +0.04% +0.65% +0.74%] index_select strided 7 : Elapsed 0.025 ms (2.451 ms / 100) 2.446 -> 2.446 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.57% +0.65%] index_select strided 8 : Elapsed 0.024 ms (2.446 ms / 100) 2.452 -> 2.450 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.24% +0.41%] index_select random : Elapsed 0.025 ms (2.452 ms / 100) 2.443 -> 2.444 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.61% +0.53%] index_select random_sorted : Elapsed 0.024 ms (2.446 ms / 100) 2.442 -> 2.447 ( +0.20%) [ +0.16% +0.16% +0.00% / +0.20% +0.49% +0.41%] index_select perm : Elapsed 0.024 ms (2.446 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.12% +0.74% +0.65%] index_select perm_sorted : Elapsed 0.024 ms (2.450 ms / 100) B = [4, 20, 5, 40] (stride (100, 5, 1, 400)) A = [4, 20, 16, 40] (stride (320, 16, 1, 1280)) dim = 2 0.931 -> 0.932 ( +0.11%) [ +0.54% +0.00% +0.43% / +0.11% +1.40% +1.40%] index_select const : Elapsed 0.009 ms (0.936 ms / 100) 0.932 -> 0.932 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.97% +1.50%] index_select wrap : Elapsed 0.009 ms (0.932 ms / 100) 0.934 -> 0.936 ( +0.21%) [ +0.21% +0.11% +0.00% / +0.21% +1.07% +1.39%] index_select linear : Elapsed 0.009 ms (0.936 ms / 100) 0.934 -> 0.932 ( -0.21%) [ +0.11% +0.00% +0.00% / -0.21% +1.18% +1.50%] index_select reverse : Elapsed 0.009 ms (0.935 ms / 100) 0.929 -> 0.926 ( -0.32%) [ +0.11% +0.11% +0.00% / -0.32% +2.69% +3.12%] index_select skip64 : Elapsed 0.009 ms (0.930 ms / 100) 0.925 -> 0.926 ( +0.11%) [ +0.43% +0.00% +0.65% / +0.11% +3.24% +3.57%] index_select skip256 : Elapsed 0.009 ms (0.929 ms / 100) 0.940 -> 0.941 ( +0.11%) [ +0.64% +0.21% +0.00% / +0.11% +2.34% +2.34%] index_select spread : Elapsed 0.009 ms (0.946 ms / 100) 0.941 -> 0.942 ( +0.11%) [ +0.00% +0.32% +0.00% / +0.11% +2.02% +2.02%] index_select strided 3 : Elapsed 0.009 ms (0.941 ms / 100) 0.938 -> 0.942 ( +0.43%) [ +0.32% +0.32% +0.00% / +0.43% +2.03% +1.81%] index_select strided 5 : Elapsed 0.009 ms (0.941 ms / 100) 0.941 -> 0.948 ( +0.74%) [ +0.11% +0.00% +0.00% / +0.74% +1.70% +1.59%] index_select strided 7 : Elapsed 0.009 ms (0.942 ms / 100) 0.943 -> 0.943 ( +0.00%) [ +0.74% +0.42% +0.00% / +0.00% +1.27% +1.70%] index_select strided 8 : Elapsed 0.009 ms (0.950 ms / 100) 0.944 -> 0.944 ( +0.00%) [ +0.11% +0.64% +0.00% / +0.00% +1.59% +1.38%] index_select random : Elapsed 0.009 ms (0.945 ms / 100) 0.940 -> 0.941 ( +0.11%) [ +0.21% +0.00% +0.00% / +0.11% +1.70% +1.28%] index_select random_sorted : Elapsed 0.009 ms (0.942 ms / 100) 0.943 -> 0.943 ( +0.00%) [ +0.11% +0.21% +0.00% / +0.00% +1.91% +1.70%] index_select perm : Elapsed 0.009 ms (0.944 ms / 100) 0.938 -> 0.945 ( +0.75%) [ +0.00% +0.11% +0.11% / +0.75% +1.81% +1.81%] index_select perm_sorted : Elapsed 0.009 ms (0.938 ms / 100) B = [4, 20, 5, 40] (stride (20, 1, 80, 400)) A = [4, 20, 16, 40] (stride (20, 1, 80, 1280)) dim = 2 0.906 -> 0.905 ( -0.11%) [ +0.33% +0.22% +0.00% / -0.11% +1.10% +1.43%] index_select const : Elapsed 0.009 ms (0.909 ms / 100) 0.941 -> 0.922 ( -2.02%) [ +0.11% +0.11% +0.00% / -0.11% -2.02% -1.70%] index_select wrap : Elapsed 0.009 ms (0.942 ms / 100) 0.937 -> 0.923 ( -1.49%) [ +0.11% +0.00% +0.21% / +0.21% -1.17% -1.49%] index_select linear : Elapsed 0.009 ms (0.938 ms / 100) 0.930 -> 0.901 ( -3.12%) [ +0.00% +0.00% +0.00% / +0.32% -3.12% -2.90%] index_select reverse : Elapsed 0.009 ms (0.930 ms / 100) 0.904 -> 0.906 ( +0.22%) [ +0.44% +0.22% +0.00% / +0.22% +1.33% +2.21%] index_select skip64 : Elapsed 0.009 ms (0.908 ms / 100) 0.905 -> 0.906 ( +0.11%) [ +0.11% +0.00% +0.33% / +0.11% +1.66% +1.77%] index_select skip256 : Elapsed 0.009 ms (0.906 ms / 100) 0.934 -> 0.917 ( -1.82%) [ +0.43% +0.00% +0.21% / +0.32% -1.50% -1.82%] index_select spread : Elapsed 0.009 ms (0.938 ms / 100) 0.937 -> 0.918 ( -2.03%) [ +0.21% +0.00% +0.11% / +0.11% -2.03% -2.03%] index_select strided 3 : Elapsed 0.009 ms (0.939 ms / 100) 0.932 -> 0.927 ( -0.54%) [ +0.32% +0.32% +0.00% / -0.21% -0.54% -0.43%] index_select strided 5 : Elapsed 0.009 ms (0.935 ms / 100) 0.927 -> 0.920 ( -0.76%) [ +0.00% +0.00% +0.32% / -0.11% -0.76% -0.22%] index_select strided 7 : Elapsed 0.009 ms (0.927 ms / 100) 0.909 -> 0.908 ( -0.11%) [ +0.11% +0.00% +0.11% / -0.11% +0.88% +1.21%] index_select strided 8 : Elapsed 0.009 ms (0.910 ms / 100) 0.911 -> 0.901 ( -1.10%) [ +0.55% +0.00% +0.11% / +0.11% -1.10% -1.10%] index_select random : Elapsed 0.009 ms (0.916 ms / 100) 0.920 -> 0.911 ( -0.98%) [ +0.11% +0.11% +0.00% / +0.11% -0.98% -0.98%] index_select random_sorted : Elapsed 0.009 ms (0.921 ms / 100) 0.924 -> 0.924 ( +0.00%) [ +0.32% +0.11% +0.00% / +0.00% +0.54% +0.11%] index_select perm : Elapsed 0.009 ms (0.927 ms / 100) 0.909 -> 0.902 ( -0.77%) [ +0.22% +0.33% +0.00% / +0.33% +0.00% -0.77%] index_select perm_sorted : Elapsed 0.009 ms (0.911 ms / 100) out_shape = [4, 20, 16, 5] in_shape = [4, 20, 16, 40] idx_dim = 3 B = [4, 20, 16, 5] (stride (1, 320, 20, 4)) A = [4, 20, 16, 40] (stride (12800, 640, 1, 16)) dim = 3 0.639 -> 0.638 ( -0.16%) [ +0.31% +0.00% +0.00% / -0.16% -0.16% +0.00%] index_select const : Elapsed 0.006 ms (0.641 ms / 100) 0.638 -> 0.640 ( +0.31%) [ +0.47% +0.63% +0.00% / +0.31% +0.63% +0.31%] index_select wrap : Elapsed 0.006 ms (0.641 ms / 100) 0.639 -> 0.641 ( +0.31%) [ +0.00% +0.31% +0.31% / +0.31% +0.31% +0.31%] index_select linear : Elapsed 0.006 ms (0.639 ms / 100) 0.639 -> 0.640 ( +0.16%) [ +0.31% +0.00% +0.00% / +0.31% +0.16% +0.31%] index_select reverse : Elapsed 0.006 ms (0.641 ms / 100) 0.637 -> 0.638 ( +0.16%) [ +0.00% +0.31% +0.16% / +0.16% +0.16% +0.63%] index_select skip64 : Elapsed 0.006 ms (0.637 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.31% +0.31% +0.00% / +0.31% +0.16% +0.47%] index_select skip256 : Elapsed 0.006 ms (0.640 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.31% +0.31% +0.00% / +0.16% +0.16% +0.31%] index_select spread : Elapsed 0.006 ms (0.640 ms / 100) 0.639 -> 0.640 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.31% +0.31%] index_select strided 3 : Elapsed 0.006 ms (0.640 ms / 100) 0.638 -> 0.640 ( +0.31%) [ +0.31% +0.00% +0.47% / +0.31% +0.31% +0.31%] index_select strided 5 : Elapsed 0.006 ms (0.640 ms / 100) 0.637 -> 0.638 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.47% +0.16% +0.78%] index_select strided 7 : Elapsed 0.006 ms (0.638 ms / 100) 0.636 -> 0.640 ( +0.63%) [ +0.63% +0.94% +0.00% / +0.63% +0.79% +0.79%] index_select strided 8 : Elapsed 0.006 ms (0.640 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.78% +0.00% +0.00% / +0.16% +0.47% +0.31%] index_select strided 16 : Elapsed 0.006 ms (0.643 ms / 100) 0.636 -> 0.637 ( +0.16%) [ +0.31% +0.79% +0.00% / +0.16% +0.79% +0.63%] index_select random : Elapsed 0.006 ms (0.638 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.00% +0.00% +0.16% / +0.16% +0.47% +0.47%] index_select random_sorted : Elapsed 0.006 ms (0.638 ms / 100) 0.638 -> 0.641 ( +0.47%) [ +0.31% +0.00% +0.31% / +0.47% +0.47% +0.47%] index_select perm : Elapsed 0.006 ms (0.640 ms / 100) 0.638 -> 0.638 ( +0.00%) [ +0.00% +0.63% +0.31% / +0.00% +0.47% +0.47%] index_select perm_sorted : Elapsed 0.006 ms (0.638 ms / 100) B = [4, 20, 16, 5] (stride (1, 320, 20, 4)) A = [4, 20, 16, 40] (stride (20, 1, 80, 1280)) dim = 3 0.654 -> 0.654 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.15% +0.15%] index_select const : Elapsed 0.007 ms (0.655 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.76% +1.22%] index_select wrap : Elapsed 0.007 ms (0.655 ms / 100) 0.653 -> 0.653 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.61% +0.61%] index_select linear : Elapsed 0.007 ms (0.653 ms / 100) 0.652 -> 0.652 ( +0.00%) [ +0.31% +0.46% +0.00% / +0.00% +0.92% +0.92%] index_select reverse : Elapsed 0.007 ms (0.654 ms / 100) 0.651 -> 0.653 ( +0.31%) [ +0.31% +0.31% +0.00% / +0.31% +0.92% +0.77%] index_select skip64 : Elapsed 0.007 ms (0.653 ms / 100) 0.654 -> 0.654 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.46% +0.31%] index_select skip256 : Elapsed 0.007 ms (0.655 ms / 100) 0.654 -> 0.654 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.61% +0.76%] index_select spread : Elapsed 0.007 ms (0.654 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.61% +1.37%] index_select strided 3 : Elapsed 0.007 ms (0.655 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.61% +0.76%] index_select strided 5 : Elapsed 0.007 ms (0.655 ms / 100) 0.654 -> 0.653 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.92% +0.76%] index_select strided 7 : Elapsed 0.007 ms (0.654 ms / 100) 0.652 -> 0.653 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +1.07% +0.92%] index_select strided 8 : Elapsed 0.007 ms (0.652 ms / 100) 0.653 -> 0.654 ( +0.15%) [ +0.15% +0.31% +0.00% / +0.15% +0.61% +0.77%] index_select strided 16 : Elapsed 0.007 ms (0.654 ms / 100) 0.654 -> 0.657 ( +0.46%) [ +0.15% +0.15% +0.00% / +0.46% +0.61% +0.61%] index_select random : Elapsed 0.007 ms (0.655 ms / 100) 0.653 -> 0.654 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.92% +0.77%] index_select random_sorted : Elapsed 0.007 ms (0.654 ms / 100) 0.653 -> 0.654 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.77% +0.77%] index_select perm : Elapsed 0.007 ms (0.653 ms / 100) 0.652 -> 0.653 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +1.38% +1.07%] index_select perm_sorted : Elapsed 0.007 ms (0.653 ms / 100) B = [4, 20, 16, 5] (stride (100, 5, 400, 1)) A = [4, 20, 16, 40] (stride (1, 64, 4, 1280)) dim = 3 0.648 -> 0.648 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.46% +1.70%] index_select const : Elapsed 0.006 ms (0.649 ms / 100) 0.649 -> 0.649 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.31% +0.77%] index_select wrap : Elapsed 0.006 ms (0.649 ms / 100) 0.649 -> 0.648 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.31% +0.31%] index_select linear : Elapsed 0.006 ms (0.649 ms / 100) 0.649 -> 0.649 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.46% +0.46%] index_select reverse : Elapsed 0.006 ms (0.649 ms / 100) 0.648 -> 0.648 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.46% +0.62%] index_select skip64 : Elapsed 0.006 ms (0.648 ms / 100) 0.650 -> 0.649 ( -0.15%) [ +0.15% +0.00% +0.00% / -0.15% +0.15% +0.31%] index_select skip256 : Elapsed 0.007 ms (0.651 ms / 100) 0.650 -> 0.651 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +0.15% +0.31%] index_select spread : Elapsed 0.007 ms (0.651 ms / 100) 0.649 -> 0.649 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.46% +0.31%] index_select strided 3 : Elapsed 0.006 ms (0.649 ms / 100) 0.650 -> 0.649 ( -0.15%) [ +0.92% +0.31% +0.00% / -0.15% +0.00% +0.00%] index_select strided 5 : Elapsed 0.007 ms (0.656 ms / 100) 0.648 -> 0.650 ( +0.31%) [ +0.00% +0.00% +0.15% / +0.93% +0.31% +0.77%] index_select strided 7 : Elapsed 0.006 ms (0.648 ms / 100) 0.647 -> 0.649 ( +0.31%) [ +0.31% +0.15% +0.00% / +0.31% +0.46% +0.77%] index_select strided 8 : Elapsed 0.006 ms (0.649 ms / 100) 0.648 -> 0.648 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.62% +0.62%] index_select strided 16 : Elapsed 0.006 ms (0.648 ms / 100) 0.648 -> 0.648 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.62% +0.46%] index_select random : Elapsed 0.006 ms (0.649 ms / 100) 0.648 -> 0.648 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.93% +0.93%] index_select random_sorted : Elapsed 0.006 ms (0.649 ms / 100) 0.648 -> 0.648 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.93% +1.08%] index_select perm : Elapsed 0.006 ms (0.649 ms / 100) 0.646 -> 0.648 ( +0.31%) [ +0.31% +0.15% +0.00% / +0.31% +1.08% +0.93%] index_select perm_sorted : Elapsed 0.006 ms (0.648 ms / 100) B = [4, 20, 16, 5] (stride (1, 4, 400, 80)) A = [4, 20, 16, 40] (stride (12800, 1, 20, 320)) dim = 3 1.481 -> 1.482 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.47% +0.54%] index_select const : Elapsed 0.015 ms (1.481 ms / 100) 1.482 -> 1.483 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.67% +0.54%] index_select wrap : Elapsed 0.015 ms (1.483 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.61% +0.61%] index_select linear : Elapsed 0.015 ms (1.482 ms / 100) 1.482 -> 1.485 ( +0.20%) [ +0.07% +0.00% +0.00% / +0.20% +0.81% +0.61%] index_select reverse : Elapsed 0.015 ms (1.483 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.14% +0.61% +0.00% / +0.07% +0.61% +0.68%] index_select skip64 : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.61% +0.61%] index_select skip256 : Elapsed 0.015 ms (1.482 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.61%] index_select spread : Elapsed 0.015 ms (1.482 ms / 100) 1.481 -> 1.484 ( +0.20%) [ +0.14% +0.07% +0.00% / +0.20% +0.68% +0.68%] index_select strided 3 : Elapsed 0.015 ms (1.483 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.68% +0.68%] index_select strided 5 : Elapsed 0.015 ms (1.483 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.68% +0.68%] index_select strided 7 : Elapsed 0.015 ms (1.482 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.68% +0.61%] index_select strided 8 : Elapsed 0.015 ms (1.482 ms / 100) 1.479 -> 1.482 ( +0.20%) [ +0.14% +0.00% +0.07% / +0.20% +0.88% +0.74%] index_select strided 16 : Elapsed 0.015 ms (1.481 ms / 100) 1.481 -> 1.483 ( +0.14%) [ +0.27% +0.07% +0.00% / +0.14% +0.88% +0.81%] index_select random : Elapsed 0.015 ms (1.485 ms / 100) 1.482 -> 1.483 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.67% +0.74%] index_select random_sorted : Elapsed 0.015 ms (1.483 ms / 100) 1.482 -> 1.483 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.67% +0.74%] index_select perm : Elapsed 0.015 ms (1.483 ms / 100) 1.482 -> 1.483 ( +0.07%) [ +0.13% +0.00% +0.00% / +0.07% +0.74% +0.81%] index_select perm_sorted : Elapsed 0.015 ms (1.484 ms / 100) B = [4, 20, 16, 5] (stride (320, 16, 1, 1280)) A = [4, 20, 16, 40] (stride (320, 1, 20, 1280)) dim = 3 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.47%] index_select const : Elapsed 0.013 ms (1.282 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.16% +0.00% +0.16% / +0.08% +0.39% +0.39%] index_select wrap : Elapsed 0.013 ms (1.285 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.47% +0.39%] index_select linear : Elapsed 0.013 ms (1.284 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.39% +0.31%] index_select reverse : Elapsed 0.013 ms (1.283 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.55% +0.47%] index_select skip64 : Elapsed 0.013 ms (1.282 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select skip256 : Elapsed 0.013 ms (1.282 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select spread : Elapsed 0.013 ms (1.282 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.283 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select strided 5 : Elapsed 0.013 ms (1.283 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.55% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.283 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.55% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.283 ms / 100) 1.282 -> 1.281 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.55% +0.62%] index_select strided 16 : Elapsed 0.013 ms (1.282 ms / 100) 1.283 -> 1.283 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.55% +0.47%] index_select random : Elapsed 0.013 ms (1.283 ms / 100) 1.283 -> 1.282 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.55% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.284 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.47% +0.55%] index_select perm : Elapsed 0.013 ms (1.283 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.16% +0.23% +0.00% / +0.08% +0.70% +0.70%] index_select perm_sorted : Elapsed 0.013 ms (1.283 ms / 100) B = [4, 20, 16, 5] (stride (320, 1, 20, 1280)) A = [4, 20, 16, 40] (stride (40, 160, 3200, 1)) dim = 3 1.617 -> 1.617 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.56% +0.56%] index_select const : Elapsed 0.016 ms (1.618 ms / 100) 1.613 -> 1.616 ( +0.19%) [ +0.19% +0.06% +0.00% / +0.19% +0.68% +0.56%] index_select wrap : Elapsed 0.016 ms (1.616 ms / 100) 1.616 -> 1.614 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.62% +0.62%] index_select linear : Elapsed 0.016 ms (1.616 ms / 100) 1.613 -> 1.615 ( +0.12%) [ +0.19% +0.00% +0.19% / +0.12% +0.81% +0.93%] index_select reverse : Elapsed 0.016 ms (1.616 ms / 100) 1.614 -> 1.615 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.68% +0.56%] index_select skip64 : Elapsed 0.016 ms (1.615 ms / 100) 1.616 -> 1.617 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.68% +0.62%] index_select skip256 : Elapsed 0.016 ms (1.617 ms / 100) 1.616 -> 1.615 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.74% +0.68%] index_select spread : Elapsed 0.016 ms (1.616 ms / 100) 1.613 -> 1.615 ( +0.12%) [ +0.19% +0.00% +0.00% / +0.12% +0.68% +0.62%] index_select strided 3 : Elapsed 0.016 ms (1.616 ms / 100) 1.613 -> 1.621 ( +0.50%) [ +0.00% +0.12% +0.19% / +0.50% +0.93% +0.87%] index_select strided 5 : Elapsed 0.016 ms (1.613 ms / 100) 1.616 -> 1.618 ( +0.12%) [ +0.06% +0.12% +0.00% / +0.12% +0.74% +0.74%] index_select strided 7 : Elapsed 0.016 ms (1.617 ms / 100) 1.616 -> 1.615 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.74% +0.74%] index_select strided 8 : Elapsed 0.016 ms (1.616 ms / 100) 1.612 -> 1.614 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.81% +0.68%] index_select strided 16 : Elapsed 0.016 ms (1.614 ms / 100) 1.615 -> 1.615 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.74% +0.80%] index_select random : Elapsed 0.016 ms (1.616 ms / 100) 1.611 -> 1.613 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.81% +0.68%] index_select random_sorted : Elapsed 0.016 ms (1.612 ms / 100) 1.606 -> 1.612 ( +0.37%) [ +0.12% +0.19% +0.00% / +0.37% +0.68% +0.68%] index_select perm : Elapsed 0.016 ms (1.608 ms / 100) 1.615 -> 1.617 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.74% +0.80%] index_select perm_sorted : Elapsed 0.016 ms (1.617 ms / 100) B = [4, 20, 16, 5] (stride (320, 1, 20, 1280)) A = [4, 20, 16, 40] (stride (20, 1, 80, 1280)) dim = 3 1.434 -> 1.435 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.63% +0.56%] index_select const : Elapsed 0.014 ms (1.436 ms / 100) 1.428 -> 1.428 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.49% +0.42%] index_select wrap : Elapsed 0.014 ms (1.429 ms / 100) 1.434 -> 1.434 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.49% +0.49%] index_select linear : Elapsed 0.014 ms (1.434 ms / 100) 1.441 -> 1.441 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.56% +0.49%] index_select reverse : Elapsed 0.014 ms (1.441 ms / 100) 1.426 -> 1.427 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.77% +0.84%] index_select skip64 : Elapsed 0.014 ms (1.427 ms / 100) 1.434 -> 1.436 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.56% +0.49%] index_select skip256 : Elapsed 0.014 ms (1.436 ms / 100) 1.423 -> 1.424 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.49% +0.49%] index_select spread : Elapsed 0.014 ms (1.424 ms / 100) 1.424 -> 1.423 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.42% +0.35%] index_select strided 3 : Elapsed 0.014 ms (1.424 ms / 100) 1.428 -> 1.429 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.56% +0.56%] index_select strided 5 : Elapsed 0.014 ms (1.430 ms / 100) 1.422 -> 1.425 ( +0.21%) [ +0.07% +0.14% +0.00% / +0.21% +0.63% +0.63%] index_select strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.423 -> 1.424 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.56% +0.49%] index_select strided 8 : Elapsed 0.014 ms (1.423 ms / 100) 1.423 -> 1.425 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.63% +0.63%] index_select strided 16 : Elapsed 0.014 ms (1.424 ms / 100) 1.439 -> 1.439 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.69% +0.76%] index_select random : Elapsed 0.014 ms (1.439 ms / 100) 1.439 -> 1.441 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.76% +0.69%] index_select random_sorted : Elapsed 0.014 ms (1.440 ms / 100) 1.443 -> 1.444 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.90% +0.83%] index_select perm : Elapsed 0.014 ms (1.445 ms / 100) 1.424 -> 1.426 ( +0.14%) [ +0.00% +0.00% +0.07% / +0.14% +0.70% +0.56%] index_select perm_sorted : Elapsed 0.014 ms (1.424 ms / 100) B = [4, 20, 16, 5] (stride (20, 1, 80, 1280)) A = [4, 20, 16, 40] (stride (640, 2560, 1, 16)) dim = 3 1.377 -> 1.379 ( +0.15%) [ +0.22% +0.07% +0.00% / +0.15% +0.73% +0.58%] index_select const : Elapsed 0.014 ms (1.380 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.07% +0.29% +0.00% / +0.07% +0.65% +0.58%] index_select wrap : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.58% +0.58%] index_select linear : Elapsed 0.014 ms (1.378 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.65% +0.58%] index_select reverse : Elapsed 0.014 ms (1.378 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.73% +0.73%] index_select skip64 : Elapsed 0.014 ms (1.378 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.80% +0.73%] index_select skip256 : Elapsed 0.014 ms (1.376 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.80% +0.58%] index_select spread : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.15% +0.22% +0.00% / +0.15% +0.65% +0.58%] index_select strided 3 : Elapsed 0.014 ms (1.379 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.22% +0.07% +0.00% / +0.00% +0.51% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.382 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.80% +1.16%] index_select strided 7 : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.65% +0.58%] index_select strided 8 : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.73% +0.73%] index_select strided 16 : Elapsed 0.014 ms (1.377 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.51% +0.65%] index_select random : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.65% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.378 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.00% +0.07% +0.07% / +0.15% +0.87% +0.73%] index_select perm : Elapsed 0.014 ms (1.376 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.58% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.378 ms / 100) out_shape = [5, 20, 40, 16] in_shape = [4, 20, 40, 16] idx_dim = 0 B = [5, 20, 40, 16] (stride (12800, 640, 16, 1)) A = [4, 20, 40, 16] (stride (16, 2560, 64, 1)) dim = 0 5.153 -> 5.147 ( -0.12%) [ +0.00% +0.10% +0.04% / +0.06% -0.02% -0.12%] index_add_ linear : Elapsed 0.052 ms (5.153 ms / 100) 5.075 -> 5.074 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.12% +0.00%] index_copy_ linear : Elapsed 0.051 ms (5.075 ms / 100) 5.147 -> 5.143 ( -0.08%) [ +0.08% +0.00% +0.06% / +0.19% +0.14% -0.08%] index_add_ reverse : Elapsed 0.052 ms (5.151 ms / 100) 5.067 -> 5.068 ( +0.02%) [ +0.12% +0.00% +0.04% / +0.10% +0.02% +0.14%] index_copy_ reverse : Elapsed 0.051 ms (5.073 ms / 100) 5.152 -> 5.152 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.08% +0.00% +0.10%] index_add_ spread : Elapsed 0.052 ms (5.152 ms / 100) 5.073 -> 5.074 ( +0.02%) [ +0.00% +0.12% +0.02% / +0.06% +0.02% +0.12%] index_copy_ spread : Elapsed 0.051 ms (5.073 ms / 100) 5.146 -> 5.141 ( -0.10%) [ +0.16% +0.00% +0.06% / +0.10% -0.10% +0.10%] index_add_ strided 3 : Elapsed 0.052 ms (5.154 ms / 100) 5.070 -> 5.070 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.06% +0.00% +0.02%] index_copy_ strided 3 : Elapsed 0.051 ms (5.070 ms / 100) 5.151 -> 5.141 ( -0.19%) [ +0.02% +0.00% +0.10% / +0.12% -0.10% -0.19%] index_add_ perm : Elapsed 0.052 ms (5.152 ms / 100) 5.073 -> 5.068 ( -0.10%) [ +0.02% +0.04% +0.00% / +0.28% -0.06% -0.10%] index_copy_ perm : Elapsed 0.051 ms (5.074 ms / 100) 5.149 -> 5.144 ( -0.10%) [ +0.04% +0.23% +0.00% / +0.17% -0.10% +0.00%] index_add_ perm_sorted : Elapsed 0.052 ms (5.151 ms / 100) 5.071 -> 5.068 ( -0.06%) [ +0.00% +0.18% +0.10% / +0.16% -0.06% +0.00%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.071 ms / 100) 5.179 -> 5.182 ( +0.06%) [ +0.00% +0.04% +0.19% / +0.06% +0.17% +0.06%] index_select const : Elapsed 0.052 ms (5.179 ms / 100) 5.301 -> 5.296 ( -0.09%) [ +0.00% +0.08% +0.09% / +0.17% -0.09% -0.04%] index_select wrap : Elapsed 0.053 ms (5.301 ms / 100) 5.308 -> 5.299 ( -0.17%) [ +0.02% +0.00% +0.00% / -0.04% -0.13% -0.17%] index_select linear : Elapsed 0.053 ms (5.309 ms / 100) 5.299 -> 5.293 ( -0.11%) [ +0.04% +0.08% +0.00% / +0.17% -0.11% +0.04%] index_select reverse : Elapsed 0.053 ms (5.301 ms / 100) 5.178 -> 5.177 ( -0.02%) [ +0.00% +0.04% +0.10% / +0.06% -0.02% +0.08%] index_select skip64 : Elapsed 0.052 ms (5.178 ms / 100) 5.184 -> 5.184 ( +0.00%) [ +0.00% +0.12% +0.02% / +0.04% +0.08% +0.00%] index_select skip256 : Elapsed 0.052 ms (5.184 ms / 100) 5.295 -> 5.292 ( -0.06%) [ +0.00% +0.06% +0.04% / +0.06% -0.04% -0.06%] index_select spread : Elapsed 0.053 ms (5.295 ms / 100) 5.311 -> 5.302 ( -0.17%) [ +0.15% +0.00% +0.00% / +0.00% -0.17% -0.08%] index_select strided 3 : Elapsed 0.053 ms (5.319 ms / 100) 5.271 -> 5.263 ( -0.15%) [ +0.06% +0.00% +0.13% / +0.28% -0.13% -0.15%] index_select random : Elapsed 0.053 ms (5.274 ms / 100) 5.251 -> 5.247 ( -0.08%) [ +0.11% +0.00% +0.25% / +0.11% +0.02% -0.08%] index_select random_sorted : Elapsed 0.053 ms (5.257 ms / 100) B = [5, 20, 40, 16] (stride (12800, 16, 320, 1)) A = [4, 20, 40, 16] (stride (1, 4, 1280, 80)) dim = 0 5.653 -> 5.663 ( +0.18%) [ +0.00% +0.00% +0.23% / +0.18% +0.69% +0.65%] index_add_ linear : Elapsed 0.057 ms (5.653 ms / 100) 5.589 -> 5.601 ( +0.21%) [ +0.02% +0.00% +0.05% / +0.21% +0.59% +0.75%] index_copy_ linear : Elapsed 0.056 ms (5.590 ms / 100) 5.642 -> 5.657 ( +0.27%) [ +0.09% +0.00% +0.19% / +0.27% +0.90% +0.96%] index_add_ reverse : Elapsed 0.056 ms (5.647 ms / 100) 5.582 -> 5.586 ( +0.07%) [ +0.00% +0.05% +0.11% / +0.07% +0.90% +0.88%] index_copy_ reverse : Elapsed 0.056 ms (5.582 ms / 100) 5.658 -> 5.658 ( +0.00%) [ +0.04% +0.00% +0.14% / +0.00% +0.60% +0.60%] index_add_ spread : Elapsed 0.057 ms (5.660 ms / 100) 5.584 -> 5.599 ( +0.27%) [ +0.07% +0.00% +0.11% / +0.27% +0.82% +0.72%] index_copy_ spread : Elapsed 0.056 ms (5.588 ms / 100) 5.662 -> 5.676 ( +0.25%) [ +0.00% +0.05% +0.18% / +0.25% +0.90% +0.92%] index_add_ strided 3 : Elapsed 0.057 ms (5.662 ms / 100) 5.592 -> 5.602 ( +0.18%) [ +0.00% +0.07% +0.16% / +0.18% +0.91% +1.06%] index_copy_ strided 3 : Elapsed 0.056 ms (5.592 ms / 100) 5.657 -> 5.665 ( +0.14%) [ +0.00% +0.09% +0.21% / +0.14% +1.04% +1.06%] index_add_ perm : Elapsed 0.057 ms (5.657 ms / 100) 5.596 -> 5.600 ( +0.07%) [ +0.00% +0.02% +0.18% / +0.07% +1.05% +0.89%] index_copy_ perm : Elapsed 0.056 ms (5.596 ms / 100) 5.667 -> 5.677 ( +0.18%) [ +0.07% +0.00% +0.32% / +0.18% +0.71% +0.72%] index_add_ perm_sorted : Elapsed 0.057 ms (5.671 ms / 100) 5.604 -> 5.608 ( +0.07%) [ +0.00% +0.00% +0.02% / +0.07% +0.75% +0.66%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.604 ms / 100) 5.923 -> 5.935 ( +0.20%) [ +0.00% +0.14% +0.07% / +0.20% +0.98% +1.01%] index_select const : Elapsed 0.059 ms (5.923 ms / 100) 5.928 -> 5.941 ( +0.22%) [ +0.00% +0.02% +0.12% / +0.22% +0.94% +0.94%] index_select wrap : Elapsed 0.059 ms (5.928 ms / 100) 5.927 -> 5.933 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.98% +0.89%] index_select linear : Elapsed 0.059 ms (5.930 ms / 100) 5.925 -> 5.928 ( +0.05%) [ +0.00% +0.12% +0.30% / +0.05% +1.01% +0.96%] index_select reverse : Elapsed 0.059 ms (5.925 ms / 100) 5.925 -> 5.933 ( +0.14%) [ +0.02% +0.00% +0.15% / +0.14% +1.10% +0.93%] index_select skip64 : Elapsed 0.059 ms (5.926 ms / 100) 5.921 -> 5.941 ( +0.34%) [ +0.17% +0.00% +0.19% / +0.34% +1.01% +0.98%] index_select skip256 : Elapsed 0.059 ms (5.931 ms / 100) 5.929 -> 5.929 ( +0.00%) [ +0.00% +0.13% +0.07% / +0.00% +1.05% +0.84%] index_select spread : Elapsed 0.059 ms (5.929 ms / 100) 5.923 -> 5.936 ( +0.22%) [ +0.12% +0.00% +0.20% / +0.22% +1.05% +1.13%] index_select strided 3 : Elapsed 0.059 ms (5.930 ms / 100) 5.923 -> 5.930 ( +0.12%) [ +0.00% +0.08% +0.22% / +0.12% +1.00% +1.10%] index_select random : Elapsed 0.059 ms (5.923 ms / 100) 5.928 -> 5.934 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +1.00% +1.01%] index_select random_sorted : Elapsed 0.059 ms (5.934 ms / 100) B = [5, 20, 40, 16] (stride (16, 3200, 80, 1)) A = [4, 20, 40, 16] (stride (12800, 640, 1, 40)) dim = 0 5.567 -> 5.556 ( -0.20%) [ +0.02% +0.04% +0.00% / -0.02% -0.20% -0.11%] index_add_ linear : Elapsed 0.056 ms (5.568 ms / 100) 5.510 -> 5.499 ( -0.20%) [ +0.05% +0.05% +0.00% / +0.07% -0.20% -0.13%] index_copy_ linear : Elapsed 0.055 ms (5.513 ms / 100) 5.558 -> 5.561 ( +0.05%) [ +0.18% +0.00% +0.27% / +0.14% +0.09% +0.05%] index_add_ reverse : Elapsed 0.056 ms (5.568 ms / 100) 5.512 -> 5.501 ( -0.20%) [ +0.00% +0.11% +0.11% / +0.05% -0.20% -0.16%] index_copy_ reverse : Elapsed 0.055 ms (5.512 ms / 100) 5.562 -> 5.551 ( -0.20%) [ +0.00% +0.16% +0.14% / +0.14% -0.20% +0.02%] index_add_ spread : Elapsed 0.056 ms (5.562 ms / 100) 5.512 -> 5.498 ( -0.25%) [ +0.15% +0.09% +0.00% / +0.02% -0.09% -0.25%] index_copy_ spread : Elapsed 0.055 ms (5.520 ms / 100) 5.566 -> 5.553 ( -0.23%) [ +0.14% +0.00% +0.11% / +0.04% -0.09% -0.23%] index_add_ strided 3 : Elapsed 0.056 ms (5.574 ms / 100) 5.522 -> 5.502 ( -0.36%) [ +0.00% +0.09% +0.24% / +0.16% -0.13% -0.36%] index_copy_ strided 3 : Elapsed 0.055 ms (5.522 ms / 100) 5.568 -> 5.549 ( -0.34%) [ +0.13% +0.11% +0.00% / +0.22% -0.31% -0.34%] index_add_ perm : Elapsed 0.056 ms (5.575 ms / 100) 5.513 -> 5.496 ( -0.31%) [ +0.24% +0.00% +0.07% / +0.18% -0.31% -0.20%] index_copy_ perm : Elapsed 0.055 ms (5.526 ms / 100) 5.569 -> 5.547 ( -0.40%) [ +0.00% +0.02% +0.13% / -0.02% -0.25% -0.40%] index_add_ perm_sorted : Elapsed 0.056 ms (5.569 ms / 100) 5.518 -> 5.507 ( -0.20%) [ +0.16% +0.00% +0.11% / +0.18% -0.14% -0.20%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.527 ms / 100) 5.774 -> 5.788 ( +0.24%) [ +0.00% +0.14% +0.24% / +0.24% +0.28% +0.50%] index_select const : Elapsed 0.058 ms (5.774 ms / 100) 5.849 -> 5.842 ( -0.12%) [ +0.19% +0.00% +0.05% / +0.17% -0.12% +0.00%] index_select wrap : Elapsed 0.059 ms (5.860 ms / 100) 5.842 -> 5.835 ( -0.12%) [ +0.12% +0.02% +0.00% / +0.14% -0.07% -0.12%] index_select linear : Elapsed 0.058 ms (5.849 ms / 100) 5.843 -> 5.833 ( -0.17%) [ +0.00% +0.03% +0.09% / +0.14% -0.12% -0.17%] index_select reverse : Elapsed 0.058 ms (5.843 ms / 100) 5.781 -> 5.788 ( +0.12%) [ +0.00% +0.00% +0.02% / +0.12% +0.14% +0.28%] index_select skip64 : Elapsed 0.058 ms (5.781 ms / 100) 5.774 -> 5.784 ( +0.17%) [ +0.00% +0.05% +0.02% / +0.17% +0.47% +0.40%] index_select skip256 : Elapsed 0.058 ms (5.774 ms / 100) 5.832 -> 5.830 ( -0.03%) [ +0.00% +0.10% +0.17% / +0.29% -0.03% +0.02%] index_select spread : Elapsed 0.058 ms (5.832 ms / 100) 5.851 -> 5.845 ( -0.10%) [ +0.03% +0.00% +0.09% / +0.14% -0.10% +0.10%] index_select strided 3 : Elapsed 0.059 ms (5.853 ms / 100) 5.821 -> 5.820 ( -0.02%) [ +0.02% +0.00% +0.00% / +0.09% -0.02% -0.02%] index_select random : Elapsed 0.058 ms (5.822 ms / 100) 5.804 -> 5.809 ( +0.09%) [ +0.00% +0.12% +0.09% / +0.09% +0.10% +0.10%] index_select random_sorted : Elapsed 0.058 ms (5.804 ms / 100) B = [5, 20, 40, 16] (stride (16, 80, 1600, 1)) A = [4, 20, 40, 16] (stride (40, 160, 1, 3200)) dim = 0 5.934 -> 5.936 ( +0.03%) [ +0.00% +0.00% +0.20% / +0.03% +0.34% +0.51%] index_add_ linear : Elapsed 0.059 ms (5.934 ms / 100) 5.843 -> 5.852 ( +0.15%) [ +0.03% +0.00% +0.14% / +0.17% +0.31% +0.15%] index_copy_ linear : Elapsed 0.058 ms (5.845 ms / 100) 5.941 -> 5.945 ( +0.07%) [ +0.03% +0.00% +0.05% / +0.07% +0.22% +0.13%] index_add_ reverse : Elapsed 0.059 ms (5.943 ms / 100) 5.852 -> 5.859 ( +0.12%) [ +0.00% +0.00% +0.10% / +0.12% +0.38% +0.19%] index_copy_ reverse : Elapsed 0.059 ms (5.852 ms / 100) 5.936 -> 5.946 ( +0.17%) [ +0.07% +0.13% +0.00% / +0.17% +0.42% +0.40%] index_add_ spread : Elapsed 0.059 ms (5.940 ms / 100) 5.852 -> 5.854 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.12% +0.12% +0.03%] index_copy_ spread : Elapsed 0.059 ms (5.852 ms / 100) 5.940 -> 5.944 ( +0.07%) [ +0.00% +0.00% +0.08% / +0.07% +0.10% +0.27%] index_add_ strided 3 : Elapsed 0.059 ms (5.940 ms / 100) 5.862 -> 5.859 ( -0.05%) [ +0.07% +0.00% +0.00% / +0.02% +0.09% -0.05%] index_copy_ strided 3 : Elapsed 0.059 ms (5.866 ms / 100) 5.935 -> 5.942 ( +0.12%) [ +0.02% +0.00% +0.10% / +0.12% +0.34% +0.47%] index_add_ perm : Elapsed 0.059 ms (5.936 ms / 100) 5.846 -> 5.854 ( +0.14%) [ +0.00% +0.02% +0.07% / +0.27% +0.19% +0.14%] index_copy_ perm : Elapsed 0.058 ms (5.846 ms / 100) 5.935 -> 5.939 ( +0.07%) [ +0.00% +0.08% +0.10% / +0.07% +0.37% +0.54%] index_add_ perm_sorted : Elapsed 0.059 ms (5.935 ms / 100) 5.851 -> 5.857 ( +0.10%) [ +0.07% +0.00% +0.12% / +0.15% +0.10% +0.10%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.855 ms / 100) 6.136 -> 6.141 ( +0.08%) [ +0.00% +0.00% +0.02% / +0.08% +0.34% +0.24%] index_select const : Elapsed 0.061 ms (6.136 ms / 100) 6.245 -> 6.260 ( +0.24%) [ +0.21% +0.00% +0.27% / +0.29% +0.24% +0.29%] index_select wrap : Elapsed 0.063 ms (6.258 ms / 100) 6.236 -> 6.245 ( +0.14%) [ +0.03% +0.00% +0.03% / +0.14% +0.27% +0.22%] index_select linear : Elapsed 0.062 ms (6.238 ms / 100) 6.222 -> 6.240 ( +0.29%) [ +0.16% +0.00% +0.14% / +0.31% +0.29% +0.47%] index_select reverse : Elapsed 0.062 ms (6.232 ms / 100) 6.135 -> 6.142 ( +0.11%) [ +0.10% +0.00% +0.10% / +0.11% +0.37% +0.34%] index_select skip64 : Elapsed 0.061 ms (6.141 ms / 100) 6.132 -> 6.151 ( +0.31%) [ +0.16% +0.00% +0.21% / +0.31% +0.46% +0.31%] index_select skip256 : Elapsed 0.061 ms (6.142 ms / 100) 6.227 -> 6.238 ( +0.18%) [ +0.03% +0.00% +0.13% / +0.19% +0.29% +0.18%] index_select spread : Elapsed 0.062 ms (6.229 ms / 100) 6.243 -> 6.257 ( +0.22%) [ +0.11% +0.00% +0.22% / +0.29% +0.22% +0.29%] index_select strided 3 : Elapsed 0.063 ms (6.250 ms / 100) 6.229 -> 6.235 ( +0.10%) [ +0.22% +0.00% +0.16% / +0.10% +0.29% +0.29%] index_select random : Elapsed 0.062 ms (6.243 ms / 100) 6.236 -> 6.246 ( +0.16%) [ +0.00% +0.18% +0.18% / +0.16% +0.16% +0.19%] index_select random_sorted : Elapsed 0.062 ms (6.236 ms / 100) B = [5, 20, 40, 16] (stride (800, 1, 20, 4000)) A = [4, 20, 40, 16] (stride (12800, 16, 320, 1)) dim = 0 5.918 -> 5.915 ( -0.05%) [ +0.08% +0.00% +0.10% / +0.15% +0.05% -0.05%] index_add_ linear : Elapsed 0.059 ms (5.923 ms / 100) 5.825 -> 5.817 ( -0.14%) [ +0.00% +0.07% +0.22% / +0.02% -0.14% +0.05%] index_copy_ linear : Elapsed 0.058 ms (5.825 ms / 100) 5.883 -> 5.888 ( +0.08%) [ +0.14% +0.00% +0.03% / +0.22% +0.08% +0.34%] index_add_ reverse : Elapsed 0.059 ms (5.891 ms / 100) 5.808 -> 5.798 ( -0.17%) [ +0.00% +0.05% +0.03% / +0.03% -0.17% -0.07%] index_copy_ reverse : Elapsed 0.058 ms (5.808 ms / 100) 5.922 -> 5.913 ( -0.15%) [ +0.00% +0.10% +0.02% / -0.03% -0.08% -0.15%] index_add_ spread : Elapsed 0.059 ms (5.922 ms / 100) 5.817 -> 5.811 ( -0.10%) [ +0.00% +0.05% +0.07% / +0.17% +0.03% -0.10%] index_copy_ spread : Elapsed 0.058 ms (5.817 ms / 100) 5.901 -> 5.856 ( -0.76%) [ +0.00% +0.02% +0.10% / +0.10% -0.68% -0.76%] index_add_ strided 3 : Elapsed 0.059 ms (5.901 ms / 100) 5.805 -> 5.777 ( -0.48%) [ +0.09% +0.02% +0.00% / +0.03% -0.43% -0.48%] index_copy_ strided 3 : Elapsed 0.058 ms (5.810 ms / 100) 5.866 -> 5.867 ( +0.02%) [ +0.07% +0.00% +0.05% / +0.10% +0.29% +0.02%] index_add_ perm : Elapsed 0.059 ms (5.870 ms / 100) 5.784 -> 5.793 ( +0.16%) [ +0.24% +0.00% +0.26% / +0.24% +0.22% +0.16%] index_copy_ perm : Elapsed 0.058 ms (5.798 ms / 100) 5.892 -> 5.891 ( -0.02%) [ +0.00% +0.05% +0.14% / +0.17% -0.02% +0.00%] index_add_ perm_sorted : Elapsed 0.059 ms (5.892 ms / 100) 5.800 -> 5.798 ( -0.03%) [ +0.05% +0.00% +0.12% / -0.02% -0.03% +0.07%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.803 ms / 100) 6.085 -> 6.089 ( +0.07%) [ +0.02% +0.00% +0.02% / +0.07% +0.12% +0.20%] index_select const : Elapsed 0.061 ms (6.086 ms / 100) 6.203 -> 6.188 ( -0.24%) [ +0.06% +0.00% +0.00% / +0.11% -0.16% -0.24%] index_select wrap : Elapsed 0.062 ms (6.207 ms / 100) 6.171 -> 6.168 ( -0.05%) [ +0.03% +0.00% +0.08% / -0.05% +0.06% -0.03%] index_select linear : Elapsed 0.062 ms (6.173 ms / 100) 6.187 -> 6.179 ( -0.13%) [ +0.08% +0.00% +0.05% / +0.23% -0.13% -0.02%] index_select reverse : Elapsed 0.062 ms (6.192 ms / 100) 6.071 -> 6.069 ( -0.03%) [ +0.00% +0.00% +0.03% / +0.13% -0.03% +0.07%] index_select skip64 : Elapsed 0.061 ms (6.071 ms / 100) 6.086 -> 6.090 ( +0.07%) [ +0.05% +0.00% +0.07% / +0.07% +0.15% +0.18%] index_select skip256 : Elapsed 0.061 ms (6.089 ms / 100) 6.187 -> 6.169 ( -0.29%) [ +0.00% +0.03% +0.19% / +0.05% -0.18% -0.29%] index_select spread : Elapsed 0.062 ms (6.187 ms / 100) 6.189 -> 6.179 ( -0.16%) [ +0.02% +0.00% +0.10% / +0.10% -0.06% -0.16%] index_select strided 3 : Elapsed 0.062 ms (6.190 ms / 100) 6.188 -> 6.179 ( -0.15%) [ +0.16% +0.00% +0.13% / +0.15% -0.15% -0.13%] index_select random : Elapsed 0.062 ms (6.198 ms / 100) 6.199 -> 6.181 ( -0.29%) [ +0.05% +0.05% +0.00% / +0.05% -0.29% -0.24%] index_select random_sorted : Elapsed 0.062 ms (6.202 ms / 100) B = [5, 20, 40, 16] (stride (1, 5, 100, 4000)) A = [4, 20, 40, 16] (stride (16, 2560, 64, 1)) dim = 0 5.770 -> 5.776 ( +0.10%) [ +0.00% +0.09% +0.07% / +0.10% +0.52% +0.31%] index_add_ linear : Elapsed 0.058 ms (5.770 ms / 100) 5.751 -> 5.752 ( +0.02%) [ +0.00% +0.03% +0.21% / +0.02% +0.23% +0.24%] index_copy_ linear : Elapsed 0.058 ms (5.751 ms / 100) 5.768 -> 5.779 ( +0.19%) [ +0.09% +0.00% +0.03% / +0.19% +0.45% +0.47%] index_add_ reverse : Elapsed 0.058 ms (5.773 ms / 100) 5.751 -> 5.752 ( +0.02%) [ +0.00% +0.07% +0.10% / +0.02% +0.31% +0.30%] index_copy_ reverse : Elapsed 0.058 ms (5.751 ms / 100) 5.773 -> 5.778 ( +0.09%) [ +0.00% +0.00% +0.14% / +0.09% +0.38% +0.49%] index_add_ spread : Elapsed 0.058 ms (5.773 ms / 100) 5.754 -> 5.758 ( +0.07%) [ +0.00% +0.00% +0.09% / +0.07% +0.14% +0.17%] index_copy_ spread : Elapsed 0.058 ms (5.754 ms / 100) 5.767 -> 5.779 ( +0.21%) [ +0.10% +0.00% +0.17% / +0.21% +0.40% +0.40%] index_add_ strided 3 : Elapsed 0.058 ms (5.773 ms / 100) 5.749 -> 5.759 ( +0.17%) [ +0.02% +0.00% +0.12% / +0.17% +0.26% +0.24%] index_copy_ strided 3 : Elapsed 0.058 ms (5.750 ms / 100) 5.779 -> 5.782 ( +0.05%) [ +0.05% +0.12% +0.00% / +0.05% +0.40% +0.36%] index_add_ perm : Elapsed 0.058 ms (5.782 ms / 100) 5.765 -> 5.767 ( +0.03%) [ +0.00% +0.00% +0.07% / +0.03% +0.21% +0.16%] index_copy_ perm : Elapsed 0.058 ms (5.765 ms / 100) 5.770 -> 5.780 ( +0.17%) [ +0.07% +0.00% +0.09% / +0.17% +0.35% +0.33%] index_add_ perm_sorted : Elapsed 0.058 ms (5.774 ms / 100) 5.748 -> 5.754 ( +0.10%) [ +0.00% +0.23% +0.10% / +0.10% +0.26% +0.28%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.748 ms / 100) 5.910 -> 5.919 ( +0.15%) [ +0.00% +0.05% +0.08% / +0.15% +0.41% +0.49%] index_select const : Elapsed 0.059 ms (5.910 ms / 100) 5.976 -> 5.988 ( +0.20%) [ +0.00% +0.05% +0.05% / +0.20% +0.28% +0.47%] index_select wrap : Elapsed 0.060 ms (5.976 ms / 100) 5.965 -> 5.966 ( +0.02%) [ +0.00% +0.08% +0.13% / +0.02% +0.40% +0.39%] index_select linear : Elapsed 0.060 ms (5.965 ms / 100) 5.978 -> 5.993 ( +0.25%) [ +0.00% +0.07% +0.12% / +0.25% +0.57% +0.64%] index_select reverse : Elapsed 0.060 ms (5.978 ms / 100) 5.939 -> 5.943 ( +0.07%) [ +0.03% +0.00% +0.00% / +0.07% +0.08% +0.12%] index_select skip64 : Elapsed 0.059 ms (5.941 ms / 100) 5.910 -> 5.922 ( +0.20%) [ +0.00% +0.02% +0.15% / +0.20% +0.47% +0.34%] index_select skip256 : Elapsed 0.059 ms (5.910 ms / 100) 5.990 -> 6.000 ( +0.17%) [ +0.00% +0.05% +0.18% / +0.17% +0.27% +0.35%] index_select spread : Elapsed 0.060 ms (5.990 ms / 100) 5.972 -> 5.980 ( +0.13%) [ +0.00% +0.12% +0.10% / +0.13% +0.47% +0.52%] index_select strided 3 : Elapsed 0.060 ms (5.972 ms / 100) 5.969 -> 5.976 ( +0.12%) [ +0.02% +0.00% +0.03% / +0.12% +0.35% +0.37%] index_select random : Elapsed 0.060 ms (5.970 ms / 100) 5.941 -> 5.950 ( +0.15%) [ +0.00% +0.08% +0.12% / +0.15% +0.56% +0.52%] index_select random_sorted : Elapsed 0.059 ms (5.941 ms / 100) out_shape = [4, 5, 40, 16] in_shape = [4, 20, 40, 16] idx_dim = 1 B = [4, 5, 40, 16] (stride (3200, 640, 1, 40)) A = [4, 20, 40, 16] (stride (12800, 1, 20, 800)) dim = 1 1.904 -> 1.905 ( +0.05%) [ +0.00% +0.21% +0.00% / +0.05% +0.58% +0.42%] index_select const : Elapsed 0.019 ms (1.904 ms / 100) 1.913 -> 1.917 ( +0.21%) [ +0.10% +0.31% +0.00% / +0.21% +0.42% +0.63%] index_select wrap : Elapsed 0.019 ms (1.915 ms / 100) 1.911 -> 1.917 ( +0.31%) [ +0.10% +0.31% +0.00% / +0.31% +0.94% +0.37%] index_select linear : Elapsed 0.019 ms (1.913 ms / 100) 1.908 -> 1.913 ( +0.26%) [ +0.37% +0.00% +0.26% / +0.26% +0.52% +0.42%] index_select reverse : Elapsed 0.019 ms (1.915 ms / 100) 1.905 -> 1.901 ( -0.21%) [ +0.16% +0.00% +0.05% / +0.05% -0.21% -0.16%] index_select skip64 : Elapsed 0.019 ms (1.908 ms / 100) 1.905 -> 1.909 ( +0.21%) [ +0.05% +0.00% +0.05% / +0.21% +0.58% +0.42%] index_select skip256 : Elapsed 0.019 ms (1.906 ms / 100) 1.932 -> 1.935 ( +0.16%) [ +0.05% +0.05% +0.00% / +0.31% +0.26% +0.16%] index_select spread : Elapsed 0.019 ms (1.933 ms / 100) 1.927 -> 1.928 ( +0.05%) [ +0.36% +0.05% +0.00% / +0.05% +0.21% +0.21%] index_select strided 3 : Elapsed 0.019 ms (1.934 ms / 100) 1.932 -> 1.934 ( +0.10%) [ +0.21% +0.00% +0.31% / +0.10% +0.16% +0.21%] index_select strided 5 : Elapsed 0.019 ms (1.936 ms / 100) 1.924 -> 1.925 ( +0.05%) [ +0.16% +0.26% +0.00% / +0.05% +0.26% +0.10%] index_select strided 7 : Elapsed 0.019 ms (1.927 ms / 100) 1.921 -> 1.919 ( -0.10%) [ +0.00% +0.00% +0.26% / -0.10% +0.21% +0.16%] index_select strided 8 : Elapsed 0.019 ms (1.921 ms / 100) 1.923 -> 1.926 ( +0.16%) [ +0.00% +0.21% +0.10% / +0.16% +0.62% +0.62%] index_select strided 16 : Elapsed 0.019 ms (1.923 ms / 100) 1.930 -> 1.931 ( +0.05%) [ +0.00% +0.31% +0.21% / +0.05% +0.21% +0.05%] index_select random : Elapsed 0.019 ms (1.930 ms / 100) 1.930 -> 1.930 ( +0.00%) [ +0.26% +0.00% +0.16% / +0.16% +0.00% +0.36%] index_select random_sorted : Elapsed 0.019 ms (1.935 ms / 100) 1.923 -> 1.927 ( +0.21%) [ +0.31% +0.00% +0.16% / +0.21% +0.57% +0.62%] index_select perm : Elapsed 0.019 ms (1.929 ms / 100) 1.920 -> 1.916 ( -0.21%) [ +0.00% +0.00% +0.00% / -0.21% -0.10% -0.05%] index_select perm_sorted : Elapsed 0.019 ms (1.920 ms / 100) B = [4, 5, 40, 16] (stride (16, 2560, 64, 1)) A = [4, 20, 40, 16] (stride (1, 64, 1280, 4)) dim = 1 1.779 -> 1.775 ( -0.22%) [ +0.06% +0.00% +0.00% / -0.22% +0.22% +0.34%] index_select const : Elapsed 0.018 ms (1.780 ms / 100) 1.795 -> 1.795 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.33% +0.50%] index_select wrap : Elapsed 0.018 ms (1.796 ms / 100) 1.794 -> 1.797 ( +0.17%) [ +0.00% +0.11% +0.17% / +0.17% +0.56% +0.39%] index_select linear : Elapsed 0.018 ms (1.794 ms / 100) 1.797 -> 1.798 ( +0.06%) [ +0.22% +0.11% +0.00% / +0.06% +0.56% +0.22%] index_select reverse : Elapsed 0.018 ms (1.801 ms / 100) 1.772 -> 1.776 ( +0.23%) [ +0.34% +0.23% +0.00% / +0.23% +0.73% +0.85%] index_select skip64 : Elapsed 0.018 ms (1.778 ms / 100) 1.770 -> 1.777 ( +0.40%) [ +0.34% +0.40% +0.00% / +0.40% +1.07% +0.73%] index_select skip256 : Elapsed 0.018 ms (1.776 ms / 100) 1.793 -> 1.795 ( +0.11%) [ +0.00% +0.06% +0.17% / +0.11% +0.39% +0.17%] index_select spread : Elapsed 0.018 ms (1.793 ms / 100) 1.793 -> 1.795 ( +0.11%) [ +0.28% +0.11% +0.00% / +0.11% +0.61% +0.45%] index_select strided 3 : Elapsed 0.018 ms (1.798 ms / 100) 1.782 -> 1.786 ( +0.22%) [ +0.00% +0.00% +0.17% / +0.22% +0.51% +0.51%] index_select strided 5 : Elapsed 0.018 ms (1.782 ms / 100) 1.792 -> 1.796 ( +0.22%) [ +0.00% +0.22% +0.11% / +0.22% +0.22% +0.28%] index_select strided 7 : Elapsed 0.018 ms (1.792 ms / 100) 1.792 -> 1.792 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.33% +0.39%] index_select strided 8 : Elapsed 0.018 ms (1.792 ms / 100) 1.792 -> 1.793 ( +0.06%) [ +0.22% +0.00% +0.06% / +0.06% +0.45% +0.28%] index_select strided 16 : Elapsed 0.018 ms (1.796 ms / 100) 1.777 -> 1.781 ( +0.23%) [ +0.17% +0.00% +0.06% / +0.23% +0.39% +0.51%] index_select random : Elapsed 0.018 ms (1.780 ms / 100) 1.779 -> 1.780 ( +0.06%) [ +0.11% +0.22% +0.00% / +0.06% +0.39% +0.45%] index_select random_sorted : Elapsed 0.018 ms (1.781 ms / 100) 1.792 -> 1.791 ( -0.06%) [ +0.22% +0.00% +0.22% / -0.06% +0.33% +0.61%] index_select perm : Elapsed 0.018 ms (1.796 ms / 100) 1.792 -> 1.791 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.45% +0.61%] index_select perm_sorted : Elapsed 0.018 ms (1.793 ms / 100) B = [4, 5, 40, 16] (stride (1, 2560, 64, 4)) dim = 1 fill_cnt = 20 1.644 -> 1.625 ( -1.16%) [ +0.43% +0.24% +0.00% / -0.79% -1.16% -1.09%] index_fill_ const : Elapsed 0.017 ms (1.651 ms / 100) 1.658 -> 1.638 ( -1.21%) [ +0.36% +0.24% +0.00% / -1.21% -0.97% -1.09%] index_fill_ linear : Elapsed 0.017 ms (1.664 ms / 100) 1.643 -> 1.624 ( -1.16%) [ +0.12% +0.06% +0.00% / -1.16% -0.91% -0.97%] index_fill_ reverse : Elapsed 0.016 ms (1.645 ms / 100) 1.648 -> 1.631 ( -1.03%) [ +0.12% +0.00% +0.18% / -1.03% -0.91% -1.03%] index_fill_ skip64 : Elapsed 0.017 ms (1.650 ms / 100) 1.648 -> 1.629 ( -1.15%) [ +0.00% +0.18% +0.18% / -1.15% -0.85% -1.03%] index_fill_ skip256 : Elapsed 0.016 ms (1.648 ms / 100) 1.650 -> 1.631 ( -1.15%) [ +0.18% +0.00% +0.18% / -0.73% -1.15% -1.15%] index_fill_ spread : Elapsed 0.017 ms (1.653 ms / 100) 1.658 -> 1.642 ( -0.97%) [ +0.42% +0.12% +0.00% / -0.97% -0.97% -0.97%] index_fill_ strided 3 : Elapsed 0.017 ms (1.665 ms / 100) 1.657 -> 1.636 ( -1.27%) [ +0.12% +0.12% +0.00% / -1.27% -0.91% -1.09%] index_fill_ random : Elapsed 0.017 ms (1.659 ms / 100) 1.655 -> 1.635 ( -1.21%) [ +0.00% +0.00% +0.12% / -1.09% -1.09% -1.21%] index_fill_ random_sorted : Elapsed 0.017 ms (1.655 ms / 100) B = [4, 5, 40, 16] (stride (40, 2560, 1, 160)) A = [4, 20, 40, 16] (stride (20, 1, 80, 3200)) dim = 1 1.918 -> 1.918 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.47% +0.63%] index_select const : Elapsed 0.019 ms (1.920 ms / 100) 1.926 -> 1.923 ( -0.16%) [ +0.10% +0.00% +0.10% / -0.16% +0.42% +0.62%] index_select wrap : Elapsed 0.019 ms (1.928 ms / 100) 1.924 -> 1.925 ( +0.05%) [ +0.36% +0.00% +0.42% / +0.05% +0.94% +0.68%] index_select linear : Elapsed 0.019 ms (1.931 ms / 100) 1.923 -> 1.929 ( +0.31%) [ +0.16% +0.16% +0.00% / +0.31% +0.68% +0.68%] index_select reverse : Elapsed 0.019 ms (1.926 ms / 100) 1.918 -> 1.917 ( -0.05%) [ +0.16% +0.10% +0.00% / -0.05% +0.52% +0.52%] index_select skip64 : Elapsed 0.019 ms (1.921 ms / 100) 1.920 -> 1.920 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.52% +0.52%] index_select skip256 : Elapsed 0.019 ms (1.920 ms / 100) 1.939 -> 1.937 ( -0.10%) [ +0.00% +0.15% +0.05% / -0.10% +0.67% +0.62%] index_select spread : Elapsed 0.019 ms (1.939 ms / 100) 1.938 -> 1.938 ( +0.00%) [ +0.21% +0.15% +0.00% / +0.00% +0.57% +0.62%] index_select strided 3 : Elapsed 0.019 ms (1.942 ms / 100) 1.933 -> 1.936 ( +0.16%) [ +0.16% +0.47% +0.00% / +0.16% +0.88% +0.67%] index_select strided 5 : Elapsed 0.019 ms (1.936 ms / 100) 1.937 -> 1.938 ( +0.05%) [ +0.15% +0.00% +0.26% / +0.05% +0.72% +0.98%] index_select strided 7 : Elapsed 0.019 ms (1.940 ms / 100) 1.938 -> 1.947 ( +0.46%) [ +0.05% +0.31% +0.00% / +0.46% +0.67% +0.77%] index_select strided 8 : Elapsed 0.019 ms (1.939 ms / 100) 1.942 -> 1.943 ( +0.05%) [ +0.00% +0.10% +0.15% / +0.05% +0.57% +0.51%] index_select strided 16 : Elapsed 0.019 ms (1.942 ms / 100) 1.938 -> 1.937 ( -0.05%) [ +0.15% +0.05% +0.00% / -0.05% +0.52% +0.77%] index_select random : Elapsed 0.019 ms (1.941 ms / 100) 1.938 -> 1.941 ( +0.15%) [ +0.00% +0.00% +0.10% / +0.15% +0.77% +0.57%] index_select random_sorted : Elapsed 0.019 ms (1.938 ms / 100) 1.942 -> 1.944 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.51% +0.62%] index_select perm : Elapsed 0.019 ms (1.944 ms / 100) 1.932 -> 1.937 ( +0.26%) [ +0.26% +0.00% +0.26% / +0.26% +0.83% +0.98%] index_select perm_sorted : Elapsed 0.019 ms (1.937 ms / 100) B = [4, 5, 40, 16] (stride (80, 16, 320, 1)) A = [4, 20, 40, 16] (stride (40, 2560, 1, 160)) dim = 1 1.823 -> 1.825 ( +0.11%) [ +0.00% +0.00% +0.05% / +0.11% +0.22% +0.22%] index_select const : Elapsed 0.018 ms (1.823 ms / 100) 1.834 -> 1.836 ( +0.11%) [ +0.22% +0.00% +0.16% / +0.16% +0.44% +0.11%] index_select wrap : Elapsed 0.018 ms (1.838 ms / 100) 1.816 -> 1.817 ( +0.06%) [ +0.17% +0.00% +0.11% / +0.06% +0.44% +0.39%] index_select linear : Elapsed 0.018 ms (1.819 ms / 100) 1.822 -> 1.824 ( +0.11%) [ +0.05% +0.22% +0.00% / +0.16% +0.11% +0.44%] index_select reverse : Elapsed 0.018 ms (1.823 ms / 100) 1.819 -> 1.819 ( +0.00%) [ +0.22% +0.00% +0.05% / +0.00% +0.82% +0.55%] index_select skip64 : Elapsed 0.018 ms (1.823 ms / 100) 1.831 -> 1.836 ( +0.27%) [ +0.05% +0.16% +0.00% / +0.27% +0.27% +0.27%] index_select skip256 : Elapsed 0.018 ms (1.832 ms / 100) 1.843 -> 1.842 ( -0.05%) [ +0.05% +0.11% +0.00% / -0.05% +0.16% +0.05%] index_select spread : Elapsed 0.018 ms (1.844 ms / 100) 1.848 -> 1.851 ( +0.16%) [ +0.00% +0.05% +0.16% / +0.16% +0.16% +0.22%] index_select strided 3 : Elapsed 0.018 ms (1.848 ms / 100) 1.824 -> 1.825 ( +0.05%) [ +0.11% +0.00% +0.16% / +0.05% +0.38% +0.55%] index_select strided 5 : Elapsed 0.018 ms (1.826 ms / 100) 1.832 -> 1.833 ( +0.05%) [ +0.00% +0.05% +0.11% / +0.05% +0.49% +0.38%] index_select strided 7 : Elapsed 0.018 ms (1.832 ms / 100) 1.830 -> 1.829 ( -0.05%) [ +0.00% +0.11% +0.16% / -0.05% +0.60% +0.66%] index_select strided 8 : Elapsed 0.018 ms (1.830 ms / 100) 1.833 -> 1.832 ( -0.05%) [ +0.11% +0.00% +0.00% / -0.05% +0.76% +0.55%] index_select strided 16 : Elapsed 0.018 ms (1.835 ms / 100) 1.821 -> 1.823 ( +0.11%) [ +0.00% +0.22% +0.11% / +0.11% +0.66% +0.49%] index_select random : Elapsed 0.018 ms (1.821 ms / 100) 1.833 -> 1.833 ( +0.00%) [ +0.05% +0.11% +0.00% / +0.00% +0.33% +0.27%] index_select random_sorted : Elapsed 0.018 ms (1.834 ms / 100) 1.834 -> 1.836 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +0.38% +0.44%] index_select perm : Elapsed 0.018 ms (1.836 ms / 100) 1.825 -> 1.821 ( -0.22%) [ +0.11% +0.00% +0.16% / -0.22% +0.49% +0.44%] index_select perm_sorted : Elapsed 0.018 ms (1.827 ms / 100) B = [4, 5, 40, 16] (stride (1, 64, 320, 4)) A = [4, 20, 40, 16] (stride (1, 64, 1280, 4)) dim = 1 1.776 -> 1.776 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.39% +0.28%] index_select const : Elapsed 0.018 ms (1.776 ms / 100) 1.790 -> 1.795 ( +0.28%) [ +0.06% +0.00% +0.17% / +0.28% +0.61% +0.73%] index_select wrap : Elapsed 0.018 ms (1.791 ms / 100) 1.795 -> 1.796 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.33% +0.45%] index_select linear : Elapsed 0.018 ms (1.795 ms / 100) 1.791 -> 1.795 ( +0.22%) [ +0.06% +0.00% +0.06% / +0.22% +0.56% +0.39%] index_select reverse : Elapsed 0.018 ms (1.792 ms / 100) 1.774 -> 1.776 ( +0.11%) [ +0.00% +0.23% +0.11% / +0.11% +0.73% +0.51%] index_select skip64 : Elapsed 0.018 ms (1.774 ms / 100) 1.775 -> 1.777 ( +0.11%) [ +0.06% +0.00% +0.11% / +0.11% +0.23% +0.34%] index_select skip256 : Elapsed 0.018 ms (1.776 ms / 100) 1.791 -> 1.790 ( -0.06%) [ +0.00% +0.11% +0.00% / -0.06% +0.28% +0.22%] index_select spread : Elapsed 0.018 ms (1.791 ms / 100) 1.788 -> 1.788 ( +0.00%) [ +0.22% +0.00% +0.11% / +0.00% +0.62% +0.50%] index_select strided 3 : Elapsed 0.018 ms (1.792 ms / 100) 1.782 -> 1.784 ( +0.11%) [ +0.00% +0.06% +0.22% / +0.11% +0.73% +0.51%] index_select strided 5 : Elapsed 0.018 ms (1.782 ms / 100) 1.790 -> 1.789 ( -0.06%) [ +0.11% +0.00% +0.11% / -0.06% +0.28% +0.39%] index_select strided 7 : Elapsed 0.018 ms (1.792 ms / 100) 1.788 -> 1.793 ( +0.28%) [ +0.50% +0.00% +0.11% / +0.28% +0.45% +0.39%] index_select strided 8 : Elapsed 0.018 ms (1.797 ms / 100) 1.794 -> 1.791 ( -0.17%) [ +0.11% +0.00% +0.00% / -0.17% +0.22% +0.11%] index_select strided 16 : Elapsed 0.018 ms (1.796 ms / 100) 1.793 -> 1.791 ( -0.11%) [ +0.22% +0.00% +0.33% / +0.17% +0.00% -0.11%] index_select random : Elapsed 0.018 ms (1.797 ms / 100) 1.795 -> 1.792 ( -0.17%) [ +0.00% +0.00% +0.22% / +0.17% -0.17% +0.00%] index_select random_sorted : Elapsed 0.018 ms (1.795 ms / 100) 1.793 -> 1.794 ( +0.06%) [ +0.28% +0.11% +0.00% / +0.06% +0.28% +0.22%] index_select perm : Elapsed 0.018 ms (1.798 ms / 100) 1.794 -> 1.799 ( +0.28%) [ +0.06% +0.00% +0.11% / +0.33% +0.28% +0.28%] index_select perm_sorted : Elapsed 0.018 ms (1.795 ms / 100) B = [4, 5, 40, 16] (stride (1, 4, 320, 20)) A = [4, 20, 40, 16] (stride (20, 1, 80, 3200)) dim = 1 1.817 -> 1.817 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.00% +0.06%] index_select const : Elapsed 0.018 ms (1.817 ms / 100) 1.817 -> 1.818 ( +0.06%) [ +0.06% +0.22% +0.00% / +0.06% +0.55% +0.72%] index_select wrap : Elapsed 0.018 ms (1.818 ms / 100) 1.820 -> 1.823 ( +0.16%) [ +0.00% +0.16% +0.05% / +0.16% +0.55% +0.71%] index_select linear : Elapsed 0.018 ms (1.820 ms / 100) 1.815 -> 1.819 ( +0.22%) [ +0.00% +0.28% +0.39% / +0.22% +0.66% +0.77%] index_select reverse : Elapsed 0.018 ms (1.815 ms / 100) 1.816 -> 1.814 ( -0.11%) [ +0.06% +0.00% +0.00% / -0.11% +0.33% +0.17%] index_select skip64 : Elapsed 0.018 ms (1.817 ms / 100) 1.815 -> 1.814 ( -0.06%) [ +0.11% +0.00% +0.06% / -0.06% +0.55% +0.33%] index_select skip256 : Elapsed 0.018 ms (1.817 ms / 100) 1.830 -> 1.830 ( +0.00%) [ +0.05% +0.00% +0.38% / +0.00% +0.60% +0.38%] index_select spread : Elapsed 0.018 ms (1.831 ms / 100) 1.828 -> 1.830 ( +0.11%) [ +0.22% +0.00% +0.11% / +0.11% +0.71% +0.71%] index_select strided 3 : Elapsed 0.018 ms (1.832 ms / 100) 1.832 -> 1.836 ( +0.22%) [ +0.00% +0.16% +0.11% / +0.22% +0.55% +0.38%] index_select strided 5 : Elapsed 0.018 ms (1.832 ms / 100) 1.828 -> 1.832 ( +0.22%) [ +0.22% +0.00% +0.11% / +0.22% +0.55% +0.49%] index_select strided 7 : Elapsed 0.018 ms (1.832 ms / 100) 1.831 -> 1.832 ( +0.05%) [ +0.11% +0.16% +0.00% / +0.05% +0.27% +0.38%] index_select strided 8 : Elapsed 0.018 ms (1.833 ms / 100) 1.827 -> 1.832 ( +0.27%) [ +0.27% +0.38% +0.00% / +0.27% +0.60% +0.55%] index_select strided 16 : Elapsed 0.018 ms (1.832 ms / 100) 1.840 -> 1.835 ( -0.27%) [ +0.00% +0.11% +0.27% / -0.27% +0.05% +0.05%] index_select random : Elapsed 0.018 ms (1.840 ms / 100) 1.837 -> 1.837 ( +0.00%) [ +0.05% +0.16% +0.00% / +0.27% +0.00% +0.22%] index_select random_sorted : Elapsed 0.018 ms (1.838 ms / 100) 1.836 -> 1.837 ( +0.05%) [ +0.11% +0.00% +0.11% / +0.33% +0.05% +0.11%] index_select perm : Elapsed 0.018 ms (1.838 ms / 100) 1.829 -> 1.828 ( -0.05%) [ +0.27% +0.00% +0.27% / -0.05% +0.38% +0.44%] index_select perm_sorted : Elapsed 0.018 ms (1.834 ms / 100) B = [4, 5, 40, 16] (stride (1, 160, 4, 800)) dim = 1 fill_cnt = 20 3.638 -> 3.621 ( -0.47%) [ +0.05% +0.16% +0.00% / -0.47% -0.25% -0.16%] index_fill_ const : Elapsed 0.036 ms (3.640 ms / 100) 3.586 -> 3.569 ( -0.47%) [ +0.08% +0.00% +0.25% / -0.47% -0.25% -0.36%] index_fill_ linear : Elapsed 0.036 ms (3.589 ms / 100) 3.570 -> 3.556 ( -0.39%) [ +0.06% +0.14% +0.00% / -0.36% -0.39% -0.25%] index_fill_ reverse : Elapsed 0.036 ms (3.572 ms / 100) 3.661 -> 3.642 ( -0.52%) [ +0.00% +0.08% +0.03% / -0.52% -0.49% -0.27%] index_fill_ skip64 : Elapsed 0.037 ms (3.661 ms / 100) 3.657 -> 3.638 ( -0.52%) [ +0.19% +0.00% +0.03% / -0.52% -0.46% -0.44%] index_fill_ skip256 : Elapsed 0.037 ms (3.664 ms / 100) 3.554 -> 3.534 ( -0.56%) [ +0.00% +0.03% +0.06% / -0.51% -0.42% -0.56%] index_fill_ spread : Elapsed 0.036 ms (3.554 ms / 100) 3.552 -> 3.527 ( -0.70%) [ +0.06% +0.00% +0.08% / -0.70% -0.48% -0.51%] index_fill_ strided 3 : Elapsed 0.036 ms (3.554 ms / 100) 3.563 -> 3.549 ( -0.39%) [ +0.00% +0.06% +0.14% / -0.31% -0.39% -0.25%] index_fill_ random : Elapsed 0.036 ms (3.563 ms / 100) 3.567 -> 3.556 ( -0.31%) [ +0.14% +0.06% +0.00% / -0.31% -0.25% -0.25%] index_fill_ random_sorted : Elapsed 0.036 ms (3.572 ms / 100) B = [4, 5, 40, 16] (stride (1, 160, 4, 800)) A = [4, 20, 40, 16] (stride (1, 2560, 4, 160)) dim = 1 1.837 -> 1.839 ( +0.11%) [ +0.05% +0.00% +0.22% / +0.11% +0.22% +0.22%] index_select const : Elapsed 0.018 ms (1.838 ms / 100) 1.820 -> 1.823 ( +0.16%) [ +0.22% +0.00% +0.33% / +0.16% +0.27% +0.16%] index_select wrap : Elapsed 0.018 ms (1.824 ms / 100) 1.823 -> 1.820 ( -0.16%) [ +0.00% +0.00% +0.27% / +0.11% -0.05% -0.16%] index_select linear : Elapsed 0.018 ms (1.823 ms / 100) 1.831 -> 1.833 ( +0.11%) [ +0.00% +0.05% +0.00% / +0.11% +0.11% +0.22%] index_select reverse : Elapsed 0.018 ms (1.831 ms / 100) 1.826 -> 1.828 ( +0.11%) [ +0.33% +0.11% +0.00% / +0.11% +0.33% +0.27%] index_select skip64 : Elapsed 0.018 ms (1.832 ms / 100) 1.832 -> 1.832 ( +0.00%) [ +0.05% +0.22% +0.00% / +0.05% +0.00% +0.00%] index_select skip256 : Elapsed 0.018 ms (1.833 ms / 100) 1.847 -> 1.847 ( +0.00%) [ +0.05% +0.22% +0.00% / +0.00% +0.54% +0.11%] index_select spread : Elapsed 0.018 ms (1.848 ms / 100) 1.849 -> 1.842 ( -0.38%) [ +0.05% +0.00% +0.05% / +0.11% -0.32% -0.38%] index_select strided 3 : Elapsed 0.018 ms (1.850 ms / 100) 1.835 -> 1.833 ( -0.11%) [ +0.00% +0.11% +0.38% / +0.27% -0.11% +0.05%] index_select strided 5 : Elapsed 0.018 ms (1.835 ms / 100) 1.843 -> 1.846 ( +0.16%) [ +0.43% +0.00% +0.22% / +0.27% +0.33% +0.16%] index_select strided 7 : Elapsed 0.019 ms (1.851 ms / 100) 1.854 -> 1.849 ( -0.27%) [ +0.16% +0.00% +0.11% / +0.00% -0.27% -0.22%] index_select strided 8 : Elapsed 0.019 ms (1.857 ms / 100) 1.853 -> 1.853 ( +0.00%) [ +0.16% +0.00% +0.11% / +0.22% +0.11% +0.00%] index_select strided 16 : Elapsed 0.019 ms (1.856 ms / 100) 1.825 -> 1.832 ( +0.38%) [ +0.11% +0.00% +0.82% / +0.60% +0.38% +0.38%] index_select random : Elapsed 0.018 ms (1.827 ms / 100) 1.834 -> 1.836 ( +0.11%) [ +0.11% +0.00% +0.33% / +0.11% +0.16% +0.16%] index_select random_sorted : Elapsed 0.018 ms (1.836 ms / 100) 1.852 -> 1.849 ( -0.16%) [ +0.05% +0.00% +0.97% / +0.70% -0.16% -0.05%] index_select perm : Elapsed 0.019 ms (1.853 ms / 100) 1.856 -> 1.852 ( -0.22%) [ +0.16% +0.00% +1.24% / +0.86% -0.05% -0.22%] index_select perm_sorted : Elapsed 0.019 ms (1.859 ms / 100) B = [4, 5, 40, 16] (stride (1, 4, 20, 800)) A = [4, 20, 40, 16] (stride (20, 1, 80, 3200)) dim = 1 1.930 -> 1.927 ( -0.16%) [ +0.05% +0.16% +0.00% / +0.00% +0.05% -0.16%] index_select const : Elapsed 0.019 ms (1.931 ms / 100) 1.925 -> 1.930 ( +0.26%) [ +0.05% +0.00% +0.26% / +0.26% +0.83% +0.83%] index_select wrap : Elapsed 0.019 ms (1.926 ms / 100) 1.931 -> 1.933 ( +0.10%) [ +0.21% +0.05% +0.00% / +0.10% +0.57% +0.78%] index_select linear : Elapsed 0.019 ms (1.935 ms / 100) 1.932 -> 1.931 ( -0.05%) [ +0.00% +0.21% +0.10% / -0.05% +0.36% +0.26%] index_select reverse : Elapsed 0.019 ms (1.932 ms / 100) 1.926 -> 1.924 ( -0.10%) [ +0.26% +0.00% +0.21% / -0.10% +0.52% +0.57%] index_select skip64 : Elapsed 0.019 ms (1.931 ms / 100) 1.925 -> 1.923 ( -0.10%) [ +0.00% +0.00% +0.21% / -0.10% +0.47% +0.47%] index_select skip256 : Elapsed 0.019 ms (1.925 ms / 100) 1.941 -> 1.943 ( +0.10%) [ +0.00% +0.21% +0.05% / +0.10% +0.62% +0.67%] index_select spread : Elapsed 0.019 ms (1.941 ms / 100) 1.944 -> 1.945 ( +0.05%) [ +0.15% +0.10% +0.00% / +0.05% +0.62% +0.67%] index_select strided 3 : Elapsed 0.019 ms (1.947 ms / 100) 1.944 -> 1.945 ( +0.05%) [ +0.15% +0.26% +0.00% / +0.05% +0.67% +0.87%] index_select strided 5 : Elapsed 0.019 ms (1.947 ms / 100) 1.946 -> 1.945 ( -0.05%) [ +0.26% +0.00% +0.00% / -0.05% +0.26% +0.31%] index_select strided 7 : Elapsed 0.020 ms (1.951 ms / 100) 1.945 -> 1.946 ( +0.05%) [ +0.41% +0.00% +0.10% / +0.05% +0.46% +0.31%] index_select strided 8 : Elapsed 0.020 ms (1.953 ms / 100) 1.944 -> 1.947 ( +0.15%) [ +0.26% +0.10% +0.00% / +0.15% +0.57% +0.51%] index_select strided 16 : Elapsed 0.019 ms (1.949 ms / 100) 1.950 -> 1.948 ( -0.10%) [ +0.00% +0.15% +0.15% / -0.10% +0.15% +0.26%] index_select random : Elapsed 0.020 ms (1.950 ms / 100) 1.949 -> 1.951 ( +0.10%) [ +0.26% +0.00% +0.10% / +0.10% +0.21% +0.10%] index_select random_sorted : Elapsed 0.020 ms (1.954 ms / 100) 1.943 -> 1.947 ( +0.21%) [ +0.05% +0.31% +0.00% / +0.21% +0.46% +0.62%] index_select perm : Elapsed 0.019 ms (1.944 ms / 100) 1.942 -> 1.947 ( +0.26%) [ +0.15% +0.46% +0.00% / +0.26% +0.26% +0.26%] index_select perm_sorted : Elapsed 0.019 ms (1.945 ms / 100) out_shape = [4, 20, 5, 16] in_shape = [4, 20, 40, 16] idx_dim = 2 B = [4, 20, 5, 16] (stride (1600, 16, 320, 1)) A = [4, 20, 40, 16] (stride (16, 64, 1280, 1)) dim = 2 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select const : Elapsed 0.015 ms (1.475 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.47% +0.47%] index_select wrap : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.14% / +0.00% +0.47% +0.61%] index_select linear : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.47% +0.41%] index_select reverse : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.54% +0.41%] index_select skip64 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.20% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select skip256 : Elapsed 0.015 ms (1.478 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.47% +0.68%] index_select spread : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.54% +0.47%] index_select strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.475 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.47% +0.47%] index_select strided 5 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.27% / +0.07% +0.54% +0.47%] index_select strided 8 : Elapsed 0.015 ms (1.476 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.00% +0.00% +0.27% / +0.07% +0.61% +0.54%] index_select strided 16 : Elapsed 0.015 ms (1.474 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.54%] index_select random : Elapsed 0.015 ms (1.475 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.61% +0.54%] index_select random_sorted : Elapsed 0.015 ms (1.475 ms / 100) 1.474 -> 1.476 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.61% +0.61%] index_select perm : Elapsed 0.015 ms (1.474 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.61% +0.61%] index_select perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) B = [4, 20, 5, 16] (stride (1600, 1, 20, 100)) A = [4, 20, 40, 16] (stride (12800, 640, 1, 40)) dim = 2 1.501 -> 1.502 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.73% +0.53%] index_select const : Elapsed 0.015 ms (1.503 ms / 100) 1.497 -> 1.499 ( +0.13%) [ +0.00% +0.00% +0.07% / +0.13% +0.60% +0.67%] index_select wrap : Elapsed 0.015 ms (1.497 ms / 100) 1.501 -> 1.502 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.73% +0.67%] index_select linear : Elapsed 0.015 ms (1.502 ms / 100) 1.501 -> 1.501 ( +0.00%) [ +0.00% +0.00% +0.47% / +0.00% +0.80% +0.73%] index_select reverse : Elapsed 0.015 ms (1.501 ms / 100) 1.498 -> 1.498 ( +0.00%) [ +0.00% +0.00% +0.27% / +0.00% +0.67% +0.60%] index_select skip64 : Elapsed 0.015 ms (1.498 ms / 100) 1.501 -> 1.501 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.73% +0.67%] index_select skip256 : Elapsed 0.015 ms (1.501 ms / 100) 1.496 -> 1.497 ( +0.07%) [ +0.60% +0.00% +0.07% / +0.07% +0.74% +0.67%] index_select spread : Elapsed 0.015 ms (1.505 ms / 100) 1.479 -> 1.484 ( +0.34%) [ +0.20% +0.00% +0.07% / +0.34% +0.81% +0.68%] index_select strided 3 : Elapsed 0.015 ms (1.482 ms / 100) 1.492 -> 1.492 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.67% +0.67%] index_select strided 5 : Elapsed 0.015 ms (1.493 ms / 100) 1.494 -> 1.494 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.80% +0.74%] index_select strided 7 : Elapsed 0.015 ms (1.495 ms / 100) 1.502 -> 1.503 ( +0.07%) [ +0.07% +0.00% +0.20% / +0.07% +0.67% +0.67%] index_select strided 8 : Elapsed 0.015 ms (1.503 ms / 100) 1.489 -> 1.489 ( +0.00%) [ +0.00% +0.40% +0.60% / +0.00% +0.67% +1.07%] index_select strided 16 : Elapsed 0.015 ms (1.489 ms / 100) 1.501 -> 1.502 ( +0.07%) [ +0.13% +0.00% +0.13% / +0.07% +0.67% +0.67%] index_select random : Elapsed 0.015 ms (1.503 ms / 100) 1.482 -> 1.483 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.81% +0.74%] index_select random_sorted : Elapsed 0.015 ms (1.483 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.80% +0.74%] index_select perm : Elapsed 0.015 ms (1.493 ms / 100) 1.502 -> 1.503 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.67% +0.73%] index_select perm_sorted : Elapsed 0.015 ms (1.502 ms / 100) B = [4, 20, 5, 16] (stride (1, 320, 64, 4)) A = [4, 20, 40, 16] (stride (640, 2560, 1, 40)) dim = 2 1.608 -> 1.609 ( +0.06%) [ +0.00% +0.12% +0.00% / +0.06% +0.56% +0.44%] index_select const : Elapsed 0.016 ms (1.608 ms / 100) 1.609 -> 1.612 ( +0.19%) [ +0.00% +0.06% +0.00% / +0.19% +0.56% +0.44%] index_select wrap : Elapsed 0.016 ms (1.609 ms / 100) 1.609 -> 1.611 ( +0.12%) [ +0.06% +0.00% +0.12% / +0.12% +0.44% +0.56%] index_select linear : Elapsed 0.016 ms (1.610 ms / 100) 1.609 -> 1.610 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.44% +0.37%] index_select reverse : Elapsed 0.016 ms (1.610 ms / 100) 1.608 -> 1.608 ( +0.00%) [ +0.19% +0.00% +0.12% / +0.00% +0.68% +0.50%] index_select skip64 : Elapsed 0.016 ms (1.611 ms / 100) 1.607 -> 1.608 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.56% +0.56%] index_select skip256 : Elapsed 0.016 ms (1.609 ms / 100) 1.600 -> 1.601 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.63% +0.56%] index_select spread : Elapsed 0.016 ms (1.601 ms / 100) 1.601 -> 1.601 ( +0.00%) [ +0.00% +0.19% +0.06% / +0.00% +0.62% +0.50%] index_select strided 3 : Elapsed 0.016 ms (1.601 ms / 100) 1.597 -> 1.600 ( +0.19%) [ +0.25% +0.13% +0.00% / +0.19% +0.63% +0.63%] index_select strided 5 : Elapsed 0.016 ms (1.601 ms / 100) 1.602 -> 1.602 ( +0.00%) [ +0.12% +0.00% +0.06% / +0.00% +0.56% +0.50%] index_select strided 7 : Elapsed 0.016 ms (1.604 ms / 100) 1.605 -> 1.606 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.50% +0.50%] index_select strided 8 : Elapsed 0.016 ms (1.606 ms / 100) 1.585 -> 1.591 ( +0.38%) [ +0.00% +0.38% +0.38% / +0.38% +1.01% +1.14%] index_select strided 16 : Elapsed 0.016 ms (1.585 ms / 100) 1.580 -> 1.581 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.63% +0.70%] index_select random : Elapsed 0.016 ms (1.581 ms / 100) 1.599 -> 1.600 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.63% +0.63%] index_select random_sorted : Elapsed 0.016 ms (1.600 ms / 100) 1.604 -> 1.605 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.62% +0.75%] index_select perm : Elapsed 0.016 ms (1.605 ms / 100) 1.594 -> 1.599 ( +0.31%) [ +0.00% +0.13% +0.00% / +0.31% +0.63% +0.69%] index_select perm_sorted : Elapsed 0.016 ms (1.594 ms / 100) B = [4, 20, 5, 16] (stride (20, 1, 1280, 80)) A = [4, 20, 40, 16] (stride (12800, 16, 320, 1)) dim = 2 1.377 -> 1.377 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.51%] index_select const : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.07% +0.22% +0.00% / +0.15% +0.73% +0.58%] index_select wrap : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.73% +0.58%] index_select linear : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.87% +0.65%] index_select reverse : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.58% +0.51%] index_select skip64 : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.378 ( +0.29%) [ +0.07% +0.15% +0.00% / +0.29% +0.73% +0.73%] index_select skip256 : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.65% +0.73%] index_select spread : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.73% +0.58%] index_select strided 3 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.73% +0.65%] index_select strided 5 : Elapsed 0.014 ms (1.378 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.65% +0.58%] index_select strided 7 : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.58% +0.65%] index_select strided 8 : Elapsed 0.014 ms (1.377 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.15% +0.22% +0.00% / +0.00% +0.73% +0.66%] index_select strided 16 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.87% +0.65%] index_select random : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.73% +0.65%] index_select random_sorted : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.65% +0.65%] index_select perm : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.383 ( +0.58%) [ +0.07% +0.07% +0.00% / +0.58% +0.73% +0.80%] index_select perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) B = [4, 20, 5, 16] (stride (100, 5, 1, 400)) A = [4, 20, 40, 16] (stride (1, 4, 80, 3200)) dim = 2 1.442 -> 1.443 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.55% +0.42%] index_select const : Elapsed 0.014 ms (1.444 ms / 100) 1.431 -> 1.430 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.49% +0.49%] index_select wrap : Elapsed 0.014 ms (1.431 ms / 100) 1.423 -> 1.427 ( +0.28%) [ +0.14% +0.35% +0.00% / +0.28% +0.56% +0.77%] index_select linear : Elapsed 0.014 ms (1.425 ms / 100) 1.441 -> 1.442 ( +0.07%) [ +0.14% +0.21% +0.00% / +0.07% +0.69% +0.62%] index_select reverse : Elapsed 0.014 ms (1.443 ms / 100) 1.447 -> 1.447 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.00% +0.48% +0.41%] index_select skip64 : Elapsed 0.014 ms (1.449 ms / 100) 1.441 -> 1.442 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.62% +0.56%] index_select skip256 : Elapsed 0.014 ms (1.443 ms / 100) 1.437 -> 1.437 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.49% +0.42%] index_select spread : Elapsed 0.014 ms (1.438 ms / 100) 1.436 -> 1.439 ( +0.21%) [ +0.00% +0.07% +0.07% / +0.21% +0.63% +0.49%] index_select strided 3 : Elapsed 0.014 ms (1.436 ms / 100) 1.425 -> 1.434 ( +0.63%) [ +0.00% +0.07% +0.28% / +0.70% +0.63% +0.63%] index_select strided 5 : Elapsed 0.014 ms (1.425 ms / 100) 1.445 -> 1.449 ( +0.28%) [ +0.14% +0.21% +0.00% / +0.28% +0.55% +0.55%] index_select strided 7 : Elapsed 0.014 ms (1.447 ms / 100) 1.439 -> 1.440 ( +0.07%) [ +0.35% +0.00% +0.00% / +0.07% +0.69% +0.76%] index_select strided 8 : Elapsed 0.014 ms (1.444 ms / 100) 1.448 -> 1.449 ( +0.07%) [ +0.35% +0.14% +0.00% / +0.07% +0.62% +0.62%] index_select strided 16 : Elapsed 0.015 ms (1.453 ms / 100) 1.448 -> 1.450 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.62% +0.48%] index_select random : Elapsed 0.014 ms (1.449 ms / 100) 1.433 -> 1.434 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.56% +0.42%] index_select random_sorted : Elapsed 0.014 ms (1.434 ms / 100) 1.424 -> 1.425 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.63% +0.63%] index_select perm : Elapsed 0.014 ms (1.424 ms / 100) 1.433 -> 1.435 ( +0.14%) [ +0.21% +0.00% +0.14% / +0.14% +0.98% +0.70%] index_select perm_sorted : Elapsed 0.014 ms (1.436 ms / 100) out_shape = [4, 20, 40, 5] in_shape = [4, 20, 40, 16] idx_dim = 3 B = [4, 20, 40, 5] (stride (4000, 200, 5, 1)) A = [4, 20, 40, 16] (stride (1, 160, 4, 3200)) dim = 3 1.886 -> 1.884 ( -0.11%) [ +0.00% +0.00% +0.11% / -0.11% +0.48% +0.27%] index_select const : Elapsed 0.019 ms (1.886 ms / 100) 1.878 -> 1.883 ( +0.27%) [ +0.16% +0.00% +0.32% / +0.27% +0.37% +0.48%] index_select wrap : Elapsed 0.019 ms (1.881 ms / 100) 1.890 -> 1.891 ( +0.05%) [ +0.00% +0.05% +0.11% / +0.05% +0.21% +0.42%] index_select linear : Elapsed 0.019 ms (1.890 ms / 100) 1.884 -> 1.884 ( +0.00%) [ +0.11% +0.00% +0.05% / +0.00% +0.21% +0.05%] index_select reverse : Elapsed 0.019 ms (1.886 ms / 100) 1.877 -> 1.883 ( +0.32%) [ +0.00% +0.11% +0.21% / +0.32% +0.75% +0.75%] index_select skip64 : Elapsed 0.019 ms (1.877 ms / 100) 1.885 -> 1.887 ( +0.11%) [ +0.11% +0.00% +0.21% / +0.11% +0.64% +0.69%] index_select skip256 : Elapsed 0.019 ms (1.887 ms / 100) 1.876 -> 1.887 ( +0.59%) [ +0.11% +0.11% +0.00% / +0.59% +0.75% +0.91%] index_select spread : Elapsed 0.019 ms (1.878 ms / 100) 1.886 -> 1.893 ( +0.37%) [ +0.16% +0.00% +0.16% / +0.37% +0.64% +0.69%] index_select strided 3 : Elapsed 0.019 ms (1.889 ms / 100) 1.884 -> 1.887 ( +0.16%) [ +0.27% +0.27% +0.00% / +0.21% +0.37% +0.16%] index_select strided 5 : Elapsed 0.019 ms (1.889 ms / 100) 1.876 -> 1.878 ( +0.11%) [ +0.00% +0.27% +0.16% / +0.11% +0.80% +0.96%] index_select strided 7 : Elapsed 0.019 ms (1.876 ms / 100) 1.882 -> 1.885 ( +0.16%) [ +0.00% +0.11% +0.00% / +0.16% +0.69% +0.69%] index_select strided 8 : Elapsed 0.019 ms (1.882 ms / 100) 1.878 -> 1.876 ( -0.11%) [ +0.00% +0.11% +0.05% / -0.11% +0.53% +0.69%] index_select random : Elapsed 0.019 ms (1.878 ms / 100) 1.876 -> 1.877 ( +0.05%) [ +0.11% +0.05% +0.00% / +0.05% +0.75% +0.64%] index_select random_sorted : Elapsed 0.019 ms (1.878 ms / 100) 1.884 -> 1.888 ( +0.21%) [ +0.11% +0.00% +0.11% / +0.21% +0.53% +0.69%] index_select perm : Elapsed 0.019 ms (1.886 ms / 100) 1.877 -> 1.883 ( +0.32%) [ +0.11% +0.00% +0.11% / +0.32% +0.48% +0.32%] index_select perm_sorted : Elapsed 0.019 ms (1.879 ms / 100) B = [4, 20, 40, 5] (stride (200, 800, 1, 40)) A = [4, 20, 40, 16] (stride (20, 1, 1280, 80)) dim = 3 2.305 -> 2.306 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.43% +0.43%] index_select const : Elapsed 0.023 ms (2.309 ms / 100) 2.319 -> 2.318 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.26% +0.17%] index_select wrap : Elapsed 0.023 ms (2.319 ms / 100) 2.317 -> 2.319 ( +0.09%) [ +0.22% +0.17% +0.00% / +0.09% +0.30% +0.47%] index_select linear : Elapsed 0.023 ms (2.322 ms / 100) 2.308 -> 2.311 ( +0.13%) [ +0.13% +0.17% +0.00% / +0.13% +0.26% +0.39%] index_select reverse : Elapsed 0.023 ms (2.311 ms / 100) 2.303 -> 2.306 ( +0.13%) [ +0.09% +0.09% +0.00% / +0.13% +0.52% +0.52%] index_select skip64 : Elapsed 0.023 ms (2.305 ms / 100) 2.304 -> 2.303 ( -0.04%) [ +0.00% +0.00% +0.13% / -0.04% +0.35% +0.39%] index_select skip256 : Elapsed 0.023 ms (2.304 ms / 100) 2.317 -> 2.318 ( +0.04%) [ +0.13% +0.17% +0.00% / +0.04% +0.35% +0.35%] index_select spread : Elapsed 0.023 ms (2.320 ms / 100) 2.319 -> 2.323 ( +0.17%) [ +0.26% +0.04% +0.00% / +0.26% +0.34% +0.17%] index_select strided 3 : Elapsed 0.023 ms (2.325 ms / 100) 2.310 -> 2.307 ( -0.13%) [ +0.13% +0.04% +0.00% / -0.13% +0.30% +0.35%] index_select strided 5 : Elapsed 0.023 ms (2.313 ms / 100) 2.303 -> 2.301 ( -0.09%) [ +0.13% +0.00% +0.00% / -0.09% +0.39% +0.22%] index_select strided 7 : Elapsed 0.023 ms (2.306 ms / 100) 2.304 -> 2.306 ( +0.09%) [ +0.13% +0.04% +0.00% / +0.09% +0.43% +0.43%] index_select strided 8 : Elapsed 0.023 ms (2.307 ms / 100) 2.300 -> 2.302 ( +0.09%) [ +0.13% +0.26% +0.00% / +0.09% +0.35% +0.35%] index_select random : Elapsed 0.023 ms (2.303 ms / 100) 2.289 -> 2.291 ( +0.09%) [ +0.00% +0.17% +0.04% / +0.09% +0.57% +0.57%] index_select random_sorted : Elapsed 0.023 ms (2.289 ms / 100) 2.321 -> 2.325 ( +0.17%) [ +0.17% +0.04% +0.00% / +0.17% +0.26% +0.39%] index_select perm : Elapsed 0.023 ms (2.325 ms / 100) 2.308 -> 2.311 ( +0.13%) [ +0.17% +0.13% +0.00% / +0.13% +0.26% +0.30%] index_select perm_sorted : Elapsed 0.023 ms (2.312 ms / 100) B = [4, 20, 40, 5] (stride (40, 800, 1, 160)) A = [4, 20, 40, 16] (stride (20, 1, 1280, 80)) dim = 3 2.300 -> 2.299 ( -0.04%) [ +0.09% +0.00% +0.00% / -0.04% +0.43% +0.43%] index_select const : Elapsed 0.023 ms (2.302 ms / 100) 2.304 -> 2.307 ( +0.13%) [ +0.00% +0.26% +0.00% / +0.13% +0.43% +0.78%] index_select wrap : Elapsed 0.023 ms (2.304 ms / 100) 2.305 -> 2.305 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.69% +0.48%] index_select linear : Elapsed 0.023 ms (2.305 ms / 100) 2.304 -> 2.304 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.43% +0.39%] index_select reverse : Elapsed 0.023 ms (2.305 ms / 100) 2.293 -> 2.299 ( +0.26%) [ +0.35% +0.26% +0.00% / +0.26% +0.61% +0.65%] index_select skip64 : Elapsed 0.023 ms (2.301 ms / 100) 2.297 -> 2.296 ( -0.04%) [ +0.30% +0.04% +0.00% / -0.04% +0.48% +0.57%] index_select skip256 : Elapsed 0.023 ms (2.304 ms / 100) 2.322 -> 2.324 ( +0.09%) [ +0.13% +0.00% +0.04% / +0.09% +0.60% +0.65%] index_select spread : Elapsed 0.023 ms (2.325 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.13% +0.39% +0.00% / +0.04% +0.43% +0.56%] index_select strided 3 : Elapsed 0.023 ms (2.327 ms / 100) 2.311 -> 2.316 ( +0.22%) [ +0.04% +0.09% +0.00% / +0.22% +0.69% +0.61%] index_select strided 5 : Elapsed 0.023 ms (2.312 ms / 100) 2.295 -> 2.295 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.61% +0.70%] index_select strided 7 : Elapsed 0.023 ms (2.295 ms / 100) 2.299 -> 2.305 ( +0.26%) [ +0.26% +0.09% +0.00% / +0.26% +0.70% +0.61%] index_select strided 8 : Elapsed 0.023 ms (2.305 ms / 100) 2.307 -> 2.305 ( -0.09%) [ +0.09% +0.17% +0.00% / -0.09% +0.65% +0.61%] index_select random : Elapsed 0.023 ms (2.309 ms / 100) 2.306 -> 2.306 ( +0.00%) [ +0.09% +0.35% +0.00% / +0.00% +0.56% +0.65%] index_select random_sorted : Elapsed 0.023 ms (2.308 ms / 100) 2.318 -> 2.322 ( +0.17%) [ +0.22% +0.22% +0.00% / +0.17% +0.56% +0.52%] index_select perm : Elapsed 0.023 ms (2.323 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.26% +0.00% +0.17% / +0.04% +0.43% +0.30%] index_select perm_sorted : Elapsed 0.023 ms (2.330 ms / 100) B = [4, 20, 40, 5] (stride (1, 4, 400, 80)) A = [4, 20, 40, 16] (stride (1, 160, 4, 3200)) dim = 3 2.146 -> 2.146 ( +0.00%) [ +0.00% +0.19% +0.09% / +0.00% +0.42% +0.47%] index_select const : Elapsed 0.021 ms (2.146 ms / 100) 2.135 -> 2.140 ( +0.23%) [ +0.28% +0.00% +0.19% / +0.23% +0.47% +0.66%] index_select wrap : Elapsed 0.021 ms (2.141 ms / 100) 2.137 -> 2.138 ( +0.05%) [ +0.23% +0.00% +0.09% / +0.05% +0.66% +0.51%] index_select linear : Elapsed 0.021 ms (2.142 ms / 100) 2.140 -> 2.142 ( +0.09%) [ +0.00% +0.05% +0.09% / +0.09% +0.75% +0.51%] index_select reverse : Elapsed 0.021 ms (2.140 ms / 100) 2.140 -> 2.145 ( +0.23%) [ +0.09% +0.00% +0.09% / +0.23% +0.42% +0.33%] index_select skip64 : Elapsed 0.021 ms (2.142 ms / 100) 2.145 -> 2.147 ( +0.09%) [ +0.00% +0.09% +0.05% / +0.09% +0.51% +0.33%] index_select skip256 : Elapsed 0.021 ms (2.145 ms / 100) 2.140 -> 2.140 ( +0.00%) [ +0.00% +0.19% +0.19% / +0.00% +0.19% +0.28%] index_select spread : Elapsed 0.021 ms (2.140 ms / 100) 2.139 -> 2.141 ( +0.09%) [ +0.09% +0.00% +0.19% / +0.09% +0.28% +0.33%] index_select strided 3 : Elapsed 0.021 ms (2.141 ms / 100) 2.141 -> 2.140 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.28% +0.42%] index_select strided 5 : Elapsed 0.021 ms (2.142 ms / 100) 2.144 -> 2.143 ( -0.05%) [ +0.00% +0.05% +0.09% / -0.05% +0.09% +0.14%] index_select strided 7 : Elapsed 0.021 ms (2.144 ms / 100) 2.146 -> 2.150 ( +0.19%) [ +0.23% +0.00% +0.14% / +0.19% +0.33% +0.23%] index_select strided 8 : Elapsed 0.022 ms (2.151 ms / 100) 2.142 -> 2.142 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.00% +0.28% +0.37%] index_select random : Elapsed 0.021 ms (2.143 ms / 100) 2.138 -> 2.142 ( +0.19%) [ +0.28% +0.19% +0.00% / +0.19% +0.89% +0.61%] index_select random_sorted : Elapsed 0.021 ms (2.144 ms / 100) 2.136 -> 2.140 ( +0.19%) [ +0.00% +0.19% +0.14% / +0.19% +0.56% +0.33%] index_select perm : Elapsed 0.021 ms (2.136 ms / 100) 2.137 -> 2.139 ( +0.09%) [ +0.00% +0.14% +0.00% / +0.09% +0.33% +0.42%] index_select perm_sorted : Elapsed 0.021 ms (2.137 ms / 100) B = [4, 20, 40, 5] (stride (40, 160, 1, 3200)) A = [4, 20, 40, 16] (stride (12800, 640, 1, 40)) dim = 3 2.123 -> 2.125 ( +0.09%) [ +0.00% +0.14% +0.09% / +0.09% +0.28% +0.24%] index_select const : Elapsed 0.021 ms (2.123 ms / 100) 2.181 -> 2.184 ( +0.14%) [ +0.00% +0.00% +0.09% / +0.14% +0.64% +0.50%] index_select wrap : Elapsed 0.022 ms (2.181 ms / 100) 2.192 -> 2.195 ( +0.14%) [ +0.00% +0.27% +0.09% / +0.41% +0.14% +0.36%] index_select linear : Elapsed 0.022 ms (2.192 ms / 100) 2.177 -> 2.182 ( +0.23%) [ +0.23% +0.41% +0.00% / +0.23% +1.10% +1.06%] index_select reverse : Elapsed 0.022 ms (2.182 ms / 100) 2.123 -> 2.125 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.33% +0.71%] index_select skip64 : Elapsed 0.021 ms (2.125 ms / 100) 2.122 -> 2.126 ( +0.19%) [ +0.24% +0.00% +0.14% / +0.38% +0.47% +0.19%] index_select skip256 : Elapsed 0.021 ms (2.127 ms / 100) 2.183 -> 2.182 ( -0.05%) [ +0.09% +0.00% +0.05% / -0.05% +0.92% +1.01%] index_select spread : Elapsed 0.022 ms (2.185 ms / 100) 2.198 -> 2.201 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.18% +0.41%] index_select strided 3 : Elapsed 0.022 ms (2.201 ms / 100) 2.199 -> 2.203 ( +0.18%) [ +0.27% +0.00% +0.27% / +0.41% +0.23% +0.18%] index_select strided 5 : Elapsed 0.022 ms (2.205 ms / 100) 2.173 -> 2.172 ( -0.05%) [ +0.23% +0.00% +0.14% / -0.05% +0.97% +1.06%] index_select strided 7 : Elapsed 0.022 ms (2.178 ms / 100) 2.121 -> 2.124 ( +0.14%) [ +0.28% +0.00% +0.19% / +0.14% +1.08% +0.99%] index_select strided 8 : Elapsed 0.021 ms (2.127 ms / 100) 2.154 -> 2.155 ( +0.05%) [ +0.14% +0.00% +0.09% / +0.05% +0.70% +0.84%] index_select random : Elapsed 0.022 ms (2.157 ms / 100) 2.160 -> 2.166 ( +0.28%) [ +0.32% +0.42% +0.00% / +0.28% +0.65% +0.93%] index_select random_sorted : Elapsed 0.022 ms (2.167 ms / 100) 2.176 -> 2.184 ( +0.37%) [ +0.00% +0.87% +0.78% / +0.37% +0.87% +1.42%] index_select perm : Elapsed 0.022 ms (2.176 ms / 100) 2.176 -> 2.172 ( -0.18%) [ +0.14% +0.00% +0.00% / -0.18% +0.60% +0.64%] index_select perm_sorted : Elapsed 0.022 ms (2.179 ms / 100) B = [4, 20, 40, 5] (stride (20, 1, 80, 3200)) dim = 3 fill_cnt = 16 1.296 -> 1.296 ( +0.00%) [ +0.00% +0.39% +0.15% / +0.00% +0.85% +0.77%] index_fill_ const : Elapsed 0.013 ms (1.296 ms / 100) 1.302 -> 1.303 ( +0.08%) [ +0.15% +0.31% +0.00% / +0.08% +0.92% +0.61%] index_fill_ linear : Elapsed 0.013 ms (1.304 ms / 100) 1.304 -> 1.304 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.08% +0.00% +0.23%] index_fill_ reverse : Elapsed 0.013 ms (1.306 ms / 100) 1.303 -> 1.301 ( -0.15%) [ +0.00% +0.08% +0.00% / -0.15% +0.46% +0.61%] index_fill_ skip64 : Elapsed 0.013 ms (1.303 ms / 100) 1.299 -> 1.299 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +1.23% +1.00%] index_fill_ skip256 : Elapsed 0.013 ms (1.300 ms / 100) 1.297 -> 1.299 ( +0.15%) [ +0.00% +0.15% +0.23% / +0.15% +1.08% +0.77%] index_fill_ spread : Elapsed 0.013 ms (1.297 ms / 100) 1.292 -> 1.296 ( +0.31%) [ +0.15% +0.00% +0.15% / +0.31% +1.08% +1.16%] index_fill_ strided 3 : Elapsed 0.013 ms (1.294 ms / 100) 1.292 -> 1.291 ( -0.08%) [ +0.31% +0.15% +0.00% / -0.08% +0.93% +0.93%] index_fill_ random : Elapsed 0.013 ms (1.296 ms / 100) 1.297 -> 1.301 ( +0.31%) [ +0.00% +0.08% +0.15% / +0.31% +1.16% +1.39%] index_fill_ random_sorted : Elapsed 0.013 ms (1.297 ms / 100) out_shape = [5, 40, 16, 20] in_shape = [4, 40, 16, 20] idx_dim = 0 B = [5, 40, 16, 20] (stride (12800, 320, 1, 16)) A = [4, 40, 16, 20] (stride (1, 1280, 80, 4)) dim = 0 5.379 -> 5.325 ( -1.00%) [ +0.11% +0.09% +0.00% / +0.17% -1.00% -0.86%] index_add_ linear : Elapsed 0.054 ms (5.385 ms / 100) 5.316 -> 5.272 ( -0.83%) [ +0.00% +0.08% +0.08% / +0.09% -0.83% -0.66%] index_copy_ linear : Elapsed 0.053 ms (5.316 ms / 100) 5.381 -> 5.329 ( -0.97%) [ +0.06% +0.00% +0.02% / +0.06% -0.95% -0.97%] index_add_ reverse : Elapsed 0.054 ms (5.384 ms / 100) 5.314 -> 5.274 ( -0.75%) [ +0.00% +0.02% +0.00% / +0.06% -0.75% -0.66%] index_copy_ reverse : Elapsed 0.053 ms (5.314 ms / 100) 5.381 -> 5.324 ( -1.06%) [ +0.00% +0.04% +0.00% / +0.04% -0.98% -1.06%] index_add_ spread : Elapsed 0.054 ms (5.381 ms / 100) 5.312 -> 5.270 ( -0.79%) [ +0.00% +0.15% +0.09% / +0.17% -0.79% -0.77%] index_copy_ spread : Elapsed 0.053 ms (5.312 ms / 100) 5.364 -> 5.335 ( -0.54%) [ +0.04% +0.04% +0.00% / -0.07% -0.50% -0.54%] index_add_ strided 3 : Elapsed 0.054 ms (5.366 ms / 100) 5.299 -> 5.273 ( -0.49%) [ +0.00% +0.06% +0.02% / +0.08% -0.49% -0.34%] index_copy_ strided 3 : Elapsed 0.053 ms (5.299 ms / 100) 5.364 -> 5.322 ( -0.78%) [ +0.00% +0.17% +0.07% / +0.07% -0.78% -0.71%] index_add_ perm : Elapsed 0.054 ms (5.364 ms / 100) 5.298 -> 5.273 ( -0.47%) [ +0.00% +0.06% +0.17% / +0.06% -0.47% -0.42%] index_copy_ perm : Elapsed 0.053 ms (5.298 ms / 100) 5.362 -> 5.327 ( -0.65%) [ +0.00% +0.11% +0.21% / +0.07% -0.62% -0.65%] index_add_ perm_sorted : Elapsed 0.054 ms (5.362 ms / 100) 5.295 -> 5.271 ( -0.45%) [ +0.19% +0.00% +0.15% / +0.09% -0.45% -0.42%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.305 ms / 100) 5.587 -> 5.533 ( -0.97%) [ +0.00% +0.09% +0.13% / +0.14% -0.97% -0.84%] index_select const : Elapsed 0.056 ms (5.587 ms / 100) 5.591 -> 5.532 ( -1.06%) [ +0.07% +0.00% +0.00% / +0.14% -1.06% -0.93%] index_select wrap : Elapsed 0.056 ms (5.595 ms / 100) 5.585 -> 5.534 ( -0.91%) [ +0.04% +0.00% +0.14% / +0.14% -0.91% -0.84%] index_select linear : Elapsed 0.056 ms (5.587 ms / 100) 5.593 -> 5.533 ( -1.07%) [ +0.13% +0.09% +0.00% / +0.18% -0.98% -1.07%] index_select reverse : Elapsed 0.056 ms (5.600 ms / 100) 5.588 -> 5.527 ( -1.09%) [ +0.09% +0.00% +0.02% / +0.00% -1.09% -0.88%] index_select skip64 : Elapsed 0.056 ms (5.593 ms / 100) 5.588 -> 5.532 ( -1.00%) [ +0.00% +0.04% +0.14% / +0.00% -0.84% -1.00%] index_select skip256 : Elapsed 0.056 ms (5.588 ms / 100) 5.587 -> 5.536 ( -0.91%) [ +0.00% +0.04% +0.00% / +0.07% -0.89% -0.91%] index_select spread : Elapsed 0.056 ms (5.587 ms / 100) 5.590 -> 5.534 ( -1.00%) [ +0.00% +0.11% +0.00% / +0.16% -0.79% -1.00%] index_select strided 3 : Elapsed 0.056 ms (5.590 ms / 100) 5.585 -> 5.536 ( -0.88%) [ +0.00% +0.04% +0.16% / +0.02% -0.88% -0.84%] index_select random : Elapsed 0.056 ms (5.585 ms / 100) 5.590 -> 5.536 ( -0.97%) [ +0.09% +0.00% +0.13% / +0.13% -0.97% -0.89%] index_select random_sorted : Elapsed 0.056 ms (5.595 ms / 100) B = [5, 40, 16, 20] (stride (320, 1600, 20, 1)) A = [4, 40, 16, 20] (stride (12800, 20, 800, 1)) dim = 0 5.554 -> 5.568 ( +0.25%) [ +0.16% +0.00% +0.18% / +0.25% +0.52% +0.43%] index_add_ linear : Elapsed 0.056 ms (5.563 ms / 100) 5.509 -> 5.519 ( +0.18%) [ +0.09% +0.00% +0.13% / +0.18% +0.36% +0.31%] index_copy_ linear : Elapsed 0.055 ms (5.514 ms / 100) 5.556 -> 5.563 ( +0.13%) [ +0.02% +0.00% +0.13% / +0.13% +0.32% +0.40%] index_add_ reverse : Elapsed 0.056 ms (5.557 ms / 100) 5.510 -> 5.517 ( +0.13%) [ +0.09% +0.00% +0.13% / +0.13% +0.33% +0.29%] index_copy_ reverse : Elapsed 0.055 ms (5.515 ms / 100) 5.561 -> 5.565 ( +0.07%) [ +0.14% +0.00% +0.13% / +0.07% +0.38% +0.41%] index_add_ spread : Elapsed 0.056 ms (5.569 ms / 100) 5.511 -> 5.515 ( +0.07%) [ +0.04% +0.00% +0.22% / +0.07% +0.49% +0.33%] index_copy_ spread : Elapsed 0.055 ms (5.513 ms / 100) 5.565 -> 5.572 ( +0.13%) [ +0.05% +0.00% +0.07% / +0.16% +0.20% +0.13%] index_add_ strided 3 : Elapsed 0.056 ms (5.568 ms / 100) 5.510 -> 5.510 ( +0.00%) [ +0.05% +0.00% +0.22% / +0.00% +0.07% +0.24%] index_copy_ strided 3 : Elapsed 0.055 ms (5.513 ms / 100) 5.558 -> 5.564 ( +0.11%) [ +0.09% +0.00% +0.20% / +0.11% +0.16% +0.20%] index_add_ perm : Elapsed 0.056 ms (5.563 ms / 100) 5.513 -> 5.507 ( -0.11%) [ +0.00% +0.04% +0.02% / -0.11% +0.15% +0.11%] index_copy_ perm : Elapsed 0.055 ms (5.513 ms / 100) 5.553 -> 5.566 ( +0.23%) [ +0.00% +0.11% +0.22% / +0.27% +0.23% +0.34%] index_add_ perm_sorted : Elapsed 0.056 ms (5.553 ms / 100) 5.517 -> 5.518 ( +0.02%) [ +0.02% +0.00% +0.11% / +0.11% +0.02% +0.02%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.518 ms / 100) 5.709 -> 5.714 ( +0.09%) [ +0.00% +0.00% +0.05% / +0.09% +0.40% +0.46%] index_select const : Elapsed 0.057 ms (5.709 ms / 100) 5.818 -> 5.836 ( +0.31%) [ +0.00% +0.19% +0.10% / +0.31% +0.52% +0.64%] index_select wrap : Elapsed 0.058 ms (5.818 ms / 100) 5.808 -> 5.823 ( +0.26%) [ +0.03% +0.00% +0.22% / +0.26% +0.59% +0.65%] index_select linear : Elapsed 0.058 ms (5.810 ms / 100) 5.798 -> 5.818 ( +0.34%) [ +0.00% +0.16% +0.22% / +0.34% +0.74% +0.76%] index_select reverse : Elapsed 0.058 ms (5.798 ms / 100) 5.719 -> 5.723 ( +0.07%) [ +0.05% +0.00% +0.05% / +0.07% +0.44% +0.38%] index_select skip64 : Elapsed 0.057 ms (5.722 ms / 100) 5.702 -> 5.720 ( +0.32%) [ +0.00% +0.05% +0.26% / +0.32% +0.60% +0.63%] index_select skip256 : Elapsed 0.057 ms (5.702 ms / 100) 5.816 -> 5.826 ( +0.17%) [ +0.09% +0.00% +0.14% / +0.17% +0.65% +0.50%] index_select spread : Elapsed 0.058 ms (5.821 ms / 100) 5.820 -> 5.836 ( +0.27%) [ +0.03% +0.00% +0.19% / +0.27% +0.50% +0.50%] index_select strided 3 : Elapsed 0.058 ms (5.822 ms / 100) 5.745 -> 5.754 ( +0.16%) [ +0.00% +0.05% +0.19% / +0.16% +0.85% +0.87%] index_select random : Elapsed 0.057 ms (5.745 ms / 100) 5.727 -> 5.751 ( +0.42%) [ +0.12% +0.00% +0.40% / +0.42% +0.87% +0.73%] index_select random_sorted : Elapsed 0.057 ms (5.734 ms / 100) B = [5, 40, 16, 20] (stride (800, 1, 4000, 40)) A = [4, 40, 16, 20] (stride (800, 20, 3200, 1)) dim = 0 5.912 -> 5.915 ( +0.05%) [ +0.25% +0.00% +0.29% / +0.36% +0.12% +0.05%] index_add_ linear : Elapsed 0.059 ms (5.927 ms / 100) 5.840 -> 5.837 ( -0.05%) [ +0.00% +0.09% +0.05% / +0.09% +0.00% -0.05%] index_copy_ linear : Elapsed 0.058 ms (5.840 ms / 100) 5.898 -> 5.897 ( -0.02%) [ +0.02% +0.05% +0.00% / -0.02% +0.25% +0.08%] index_add_ reverse : Elapsed 0.059 ms (5.899 ms / 100) 5.825 -> 5.827 ( +0.03%) [ +0.07% +0.10% +0.00% / +0.03% +0.14% +0.10%] index_copy_ reverse : Elapsed 0.058 ms (5.829 ms / 100) 5.912 -> 5.909 ( -0.05%) [ +0.12% +0.00% +0.22% / +0.19% +0.07% -0.05%] index_add_ spread : Elapsed 0.059 ms (5.919 ms / 100) 5.832 -> 5.817 ( -0.26%) [ +0.00% +0.09% +0.17% / +0.15% -0.26% +0.00%] index_copy_ spread : Elapsed 0.058 ms (5.832 ms / 100) 5.904 -> 5.874 ( -0.51%) [ +0.00% +0.00% +0.08% / +0.15% -0.49% -0.51%] index_add_ strided 3 : Elapsed 0.059 ms (5.904 ms / 100) 5.821 -> 5.793 ( -0.48%) [ +0.05% +0.03% +0.00% / +0.12% -0.48% -0.22%] index_copy_ strided 3 : Elapsed 0.058 ms (5.824 ms / 100) 5.909 -> 5.893 ( -0.27%) [ +0.08% +0.00% +0.03% / +0.22% -0.14% -0.27%] index_add_ perm : Elapsed 0.059 ms (5.914 ms / 100) 5.828 -> 5.812 ( -0.27%) [ +0.00% +0.00% +0.15% / +0.03% -0.17% -0.27%] index_copy_ perm : Elapsed 0.058 ms (5.828 ms / 100) 5.920 -> 5.904 ( -0.27%) [ +0.03% +0.00% +0.08% / -0.07% -0.14% -0.27%] index_add_ perm_sorted : Elapsed 0.059 ms (5.922 ms / 100) 5.833 -> 5.822 ( -0.19%) [ +0.00% +0.05% +0.05% / +0.17% -0.10% -0.19%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.833 ms / 100) 6.110 -> 6.091 ( -0.31%) [ +0.00% +0.00% +0.13% / +0.15% -0.25% -0.31%] index_select const : Elapsed 0.061 ms (6.110 ms / 100) 6.227 -> 6.208 ( -0.31%) [ +0.02% +0.02% +0.00% / +0.06% -0.26% -0.31%] index_select wrap : Elapsed 0.062 ms (6.228 ms / 100) 6.198 -> 6.182 ( -0.26%) [ +0.15% +0.00% +0.13% / +0.26% -0.26% -0.24%] index_select linear : Elapsed 0.062 ms (6.207 ms / 100) 6.207 -> 6.186 ( -0.34%) [ +0.24% +0.16% +0.00% / +0.19% -0.34% -0.11%] index_select reverse : Elapsed 0.062 ms (6.222 ms / 100) 6.087 -> 6.072 ( -0.25%) [ +0.02% +0.08% +0.00% / +0.13% -0.16% -0.25%] index_select skip64 : Elapsed 0.061 ms (6.088 ms / 100) 6.107 -> 6.092 ( -0.25%) [ +0.28% +0.00% +0.26% / +0.20% -0.16% -0.25%] index_select skip256 : Elapsed 0.061 ms (6.124 ms / 100) 6.198 -> 6.182 ( -0.26%) [ +0.00% +0.05% +0.05% / -0.03% -0.23% -0.26%] index_select spread : Elapsed 0.062 ms (6.198 ms / 100) 6.214 -> 6.191 ( -0.37%) [ +0.02% +0.00% +0.02% / -0.10% -0.37% -0.26%] index_select strided 3 : Elapsed 0.062 ms (6.215 ms / 100) 6.139 -> 6.121 ( -0.29%) [ +0.07% +0.00% +0.05% / +0.16% -0.21% -0.29%] index_select random : Elapsed 0.061 ms (6.143 ms / 100) 6.147 -> 6.131 ( -0.26%) [ +0.07% +0.00% +0.02% / +0.11% -0.26% -0.24%] index_select random_sorted : Elapsed 0.062 ms (6.151 ms / 100) B = [5, 40, 16, 20] (stride (1, 5, 4000, 200)) A = [4, 40, 16, 20] (stride (20, 1280, 80, 1)) dim = 0 3.557 -> 3.557 ( +0.00%) [ +0.06% +0.00% +0.17% / +0.00% +0.11% +0.25%] index_add_ linear : Elapsed 0.036 ms (3.559 ms / 100) 3.535 -> 3.530 ( -0.14%) [ +0.06% +0.00% +0.23% / -0.14% +0.20% +0.20%] index_copy_ linear : Elapsed 0.035 ms (3.537 ms / 100) 3.556 -> 3.562 ( +0.17%) [ +0.14% +0.06% +0.00% / +0.17% +0.22% +0.20%] index_add_ reverse : Elapsed 0.036 ms (3.561 ms / 100) 3.529 -> 3.531 ( +0.06%) [ +0.03% +0.17% +0.00% / +0.06% +0.37% +0.31%] index_copy_ reverse : Elapsed 0.035 ms (3.530 ms / 100) 3.559 -> 3.561 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.08% +0.22% +0.06%] index_add_ spread : Elapsed 0.036 ms (3.561 ms / 100) 3.536 -> 3.531 ( -0.14%) [ +0.00% +0.08% +0.03% / +0.11% -0.14% +0.11%] index_copy_ spread : Elapsed 0.035 ms (3.536 ms / 100) 3.558 -> 3.558 ( +0.00%) [ +0.17% +0.00% +0.22% / +0.00% +0.34% +0.22%] index_add_ strided 3 : Elapsed 0.036 ms (3.564 ms / 100) 3.538 -> 3.528 ( -0.28%) [ +0.06% +0.00% +0.11% / +0.14% -0.06% -0.28%] index_copy_ strided 3 : Elapsed 0.035 ms (3.540 ms / 100) 3.558 -> 3.560 ( +0.06%) [ +0.28% +0.06% +0.00% / +0.06% +0.11% +0.08%] index_add_ perm : Elapsed 0.036 ms (3.568 ms / 100) 3.539 -> 3.531 ( -0.23%) [ +0.00% +0.08% +0.08% / +0.14% +0.14% -0.23%] index_copy_ perm : Elapsed 0.035 ms (3.539 ms / 100) 3.556 -> 3.557 ( +0.03%) [ +0.00% +0.14% +0.08% / +0.03% +0.31% +0.48%] index_add_ perm_sorted : Elapsed 0.036 ms (3.556 ms / 100) 3.531 -> 3.532 ( +0.03%) [ +0.08% +0.34% +0.00% / +0.03% +0.28% +0.57%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.534 ms / 100) 3.498 -> 3.496 ( -0.06%) [ +0.17% +0.00% +0.00% / -0.06% +0.34% +0.23%] index_select const : Elapsed 0.035 ms (3.504 ms / 100) 3.547 -> 3.545 ( -0.06%) [ +0.00% +0.11% +0.20% / -0.06% +0.20% +0.28%] index_select wrap : Elapsed 0.035 ms (3.547 ms / 100) 3.544 -> 3.549 ( +0.14%) [ +0.14% +0.08% +0.00% / +0.17% +0.28% +0.14%] index_select linear : Elapsed 0.035 ms (3.549 ms / 100) 3.546 -> 3.551 ( +0.14%) [ +0.00% +0.11% +0.17% / +0.14% +0.20% +0.20%] index_select reverse : Elapsed 0.035 ms (3.546 ms / 100) 3.520 -> 3.505 ( -0.43%) [ +0.20% +0.11% +0.00% / -0.11% -0.31% -0.43%] index_select skip64 : Elapsed 0.035 ms (3.527 ms / 100) 3.502 -> 3.504 ( +0.06%) [ +0.06% +0.11% +0.00% / +0.14% +0.20% +0.06%] index_select skip256 : Elapsed 0.035 ms (3.504 ms / 100) 3.546 -> 3.548 ( +0.06%) [ +0.06% +0.11% +0.00% / +0.06% +0.45% +0.65%] index_select spread : Elapsed 0.035 ms (3.548 ms / 100) 3.540 -> 3.547 ( +0.20%) [ +0.31% +0.17% +0.00% / +0.20% +0.71% +0.51%] index_select strided 3 : Elapsed 0.036 ms (3.551 ms / 100) 3.530 -> 3.525 ( -0.14%) [ +0.08% +0.06% +0.00% / +0.06% -0.14% +0.06%] index_select random : Elapsed 0.035 ms (3.533 ms / 100) 3.528 -> 3.522 ( -0.17%) [ +0.00% +0.11% +0.06% / +0.06% -0.06% -0.17%] index_select random_sorted : Elapsed 0.035 ms (3.528 ms / 100) B = [5, 40, 16, 20] (stride (640, 1, 40, 3200)) A = [4, 40, 16, 20] (stride (1, 80, 3200, 4)) dim = 0 5.946 -> 5.927 ( -0.32%) [ +0.00% +0.00% +0.00% / +0.05% -0.25% -0.32%] index_add_ linear : Elapsed 0.059 ms (5.946 ms / 100) 5.868 -> 5.862 ( -0.10%) [ +0.00% +0.02% +0.12% / +0.15% -0.10% -0.07%] index_copy_ linear : Elapsed 0.059 ms (5.868 ms / 100) 5.945 -> 5.914 ( -0.52%) [ +0.03% +0.00% +0.20% / +0.17% -0.52% -0.42%] index_add_ reverse : Elapsed 0.059 ms (5.947 ms / 100) 5.877 -> 5.856 ( -0.36%) [ +0.00% +0.10% +0.10% / +0.03% -0.26% -0.36%] index_copy_ reverse : Elapsed 0.059 ms (5.877 ms / 100) 5.945 -> 5.924 ( -0.35%) [ +0.00% +0.02% +0.00% / +0.05% -0.27% -0.35%] index_add_ spread : Elapsed 0.059 ms (5.945 ms / 100) 5.863 -> 5.852 ( -0.19%) [ +0.10% +0.00% +0.19% / +0.15% -0.19% -0.05%] index_copy_ spread : Elapsed 0.059 ms (5.869 ms / 100) 5.941 -> 5.932 ( -0.15%) [ +0.00% +0.07% +0.07% / +0.03% -0.13% -0.15%] index_add_ strided 3 : Elapsed 0.059 ms (5.941 ms / 100) 5.868 -> 5.864 ( -0.07%) [ +0.05% +0.00% +0.00% / +0.12% -0.07% -0.05%] index_copy_ strided 3 : Elapsed 0.059 ms (5.871 ms / 100) 5.944 -> 5.930 ( -0.24%) [ +0.20% +0.05% +0.00% / +0.17% -0.22% -0.24%] index_add_ perm : Elapsed 0.060 ms (5.956 ms / 100) 5.876 -> 5.859 ( -0.29%) [ +0.12% +0.00% +0.15% / +0.09% -0.27% -0.29%] index_copy_ perm : Elapsed 0.059 ms (5.883 ms / 100) 5.942 -> 5.925 ( -0.29%) [ +0.00% +0.02% +0.00% / +0.03% -0.29% -0.15%] index_add_ perm_sorted : Elapsed 0.059 ms (5.942 ms / 100) 5.871 -> 5.857 ( -0.24%) [ +0.00% +0.03% +0.02% / +0.14% -0.24% -0.20%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.871 ms / 100) 6.276 -> 6.265 ( -0.18%) [ +0.00% +0.05% +0.24% / +0.11% -0.06% -0.18%] index_select const : Elapsed 0.063 ms (6.276 ms / 100) 6.274 -> 6.270 ( -0.06%) [ +0.08% +0.00% +0.16% / +0.21% -0.02% -0.06%] index_select wrap : Elapsed 0.063 ms (6.279 ms / 100) 6.270 -> 6.266 ( -0.06%) [ +0.00% +0.03% +0.11% / +0.10% -0.06% -0.06%] index_select linear : Elapsed 0.063 ms (6.270 ms / 100) 6.269 -> 6.263 ( -0.10%) [ +0.00% +0.11% +0.27% / +0.13% +0.00% -0.10%] index_select reverse : Elapsed 0.063 ms (6.269 ms / 100) 6.272 -> 6.266 ( -0.10%) [ +0.02% +0.00% +0.18% / +0.13% -0.10% -0.08%] index_select skip64 : Elapsed 0.063 ms (6.273 ms / 100) 6.279 -> 6.277 ( -0.03%) [ +0.08% +0.06% +0.00% / +0.05% -0.02% -0.03%] index_select skip256 : Elapsed 0.063 ms (6.284 ms / 100) 6.269 -> 6.264 ( -0.08%) [ +0.00% +0.10% +0.13% / +0.13% -0.08% -0.05%] index_select spread : Elapsed 0.063 ms (6.269 ms / 100) 6.271 -> 6.269 ( -0.03%) [ +0.00% +0.00% +0.21% / +0.03% -0.03% +0.02%] index_select strided 3 : Elapsed 0.063 ms (6.271 ms / 100) 6.274 -> 6.267 ( -0.11%) [ +0.06% +0.00% +0.05% / +0.19% -0.10% -0.11%] index_select random : Elapsed 0.063 ms (6.278 ms / 100) 6.277 -> 6.269 ( -0.13%) [ +0.08% +0.00% +0.30% / +0.11% -0.08% -0.13%] index_select random_sorted : Elapsed 0.063 ms (6.282 ms / 100) B = [5, 40, 16, 20] (stride (1, 80, 5, 3200)) A = [4, 40, 16, 20] (stride (16, 1280, 1, 64)) dim = 0 5.813 -> 5.818 ( +0.09%) [ +0.00% +0.14% +0.14% / +0.09% +0.29% +0.17%] index_add_ linear : Elapsed 0.058 ms (5.813 ms / 100) 5.788 -> 5.793 ( +0.09%) [ +0.00% +0.09% +0.31% / +0.38% +0.09% +0.14%] index_copy_ linear : Elapsed 0.058 ms (5.788 ms / 100) 5.816 -> 5.819 ( +0.05%) [ +0.09% +0.00% +0.22% / +0.05% +0.22% +0.26%] index_add_ reverse : Elapsed 0.058 ms (5.821 ms / 100) 5.790 -> 5.794 ( +0.07%) [ +0.07% +0.00% +0.16% / +0.21% +0.14% +0.07%] index_copy_ reverse : Elapsed 0.058 ms (5.794 ms / 100) 5.819 -> 5.824 ( +0.09%) [ +0.00% +0.02% +0.29% / +0.38% +0.19% +0.09%] index_add_ spread : Elapsed 0.058 ms (5.819 ms / 100) 5.795 -> 5.790 ( -0.09%) [ +0.00% +0.07% +0.52% / +0.12% -0.09% -0.07%] index_copy_ spread : Elapsed 0.058 ms (5.795 ms / 100) 5.818 -> 5.828 ( +0.17%) [ +0.09% +0.00% +0.12% / +0.28% +0.34% +0.17%] index_add_ strided 3 : Elapsed 0.058 ms (5.823 ms / 100) 5.794 -> 5.789 ( -0.09%) [ +0.00% +0.09% +0.28% / +0.03% +0.03% -0.09%] index_copy_ strided 3 : Elapsed 0.058 ms (5.794 ms / 100) 5.815 -> 5.824 ( +0.15%) [ +0.00% +0.10% +0.07% / +0.15% +0.31% +0.33%] index_add_ perm : Elapsed 0.058 ms (5.815 ms / 100) 5.797 -> 5.789 ( -0.14%) [ +0.00% +0.02% +0.12% / +0.03% +0.05% -0.14%] index_copy_ perm : Elapsed 0.058 ms (5.797 ms / 100) 5.821 -> 5.818 ( -0.05%) [ +0.02% +0.00% +0.02% / -0.05% +0.12% +0.14%] index_add_ perm_sorted : Elapsed 0.058 ms (5.822 ms / 100) 5.791 -> 5.794 ( +0.05%) [ +0.00% +0.09% +0.24% / +0.31% +0.12% +0.05%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.791 ms / 100) 6.013 -> 6.019 ( +0.10%) [ +0.00% +0.05% +0.25% / +0.17% +0.10% +0.15%] index_select const : Elapsed 0.060 ms (6.013 ms / 100) 6.069 -> 6.071 ( +0.03%) [ +0.03% +0.07% +0.00% / +0.12% +0.03% +0.05%] index_select wrap : Elapsed 0.061 ms (6.071 ms / 100) 6.061 -> 6.062 ( +0.02%) [ +0.00% +0.00% +0.07% / +0.07% +0.02% +0.07%] index_select linear : Elapsed 0.061 ms (6.061 ms / 100) 6.049 -> 6.060 ( +0.18%) [ +0.05% +0.00% +0.23% / +0.23% +0.18% +0.18%] index_select reverse : Elapsed 0.061 ms (6.052 ms / 100) 6.018 -> 6.018 ( +0.00%) [ +0.07% +0.00% +0.15% / +0.07% +0.02% +0.00%] index_select skip64 : Elapsed 0.060 ms (6.022 ms / 100) 6.011 -> 6.020 ( +0.15%) [ +0.00% +0.17% +0.15% / +0.27% +0.17% +0.15%] index_select skip256 : Elapsed 0.060 ms (6.011 ms / 100) 6.053 -> 6.051 ( -0.03%) [ +0.17% +0.00% +0.08% / +0.15% -0.03% +0.02%] index_select spread : Elapsed 0.061 ms (6.063 ms / 100) 6.061 -> 6.062 ( +0.02%) [ +0.08% +0.00% +0.13% / +0.12% +0.10% +0.02%] index_select strided 3 : Elapsed 0.061 ms (6.066 ms / 100) 6.047 -> 6.051 ( +0.07%) [ +0.15% +0.00% +0.20% / +0.18% +0.07% +0.07%] index_select random : Elapsed 0.061 ms (6.056 ms / 100) 6.052 -> 6.050 ( -0.03%) [ +0.10% +0.00% +0.02% / -0.02% -0.02% -0.03%] index_select random_sorted : Elapsed 0.061 ms (6.058 ms / 100) B = [5, 40, 16, 20] (stride (1, 5, 200, 3200)) A = [4, 40, 16, 20] (stride (800, 1, 3200, 40)) dim = 0 6.035 -> 6.028 ( -0.12%) [ +0.00% +0.02% +0.02% / +0.07% -0.05% -0.12%] index_add_ linear : Elapsed 0.060 ms (6.035 ms / 100) 6.003 -> 5.988 ( -0.25%) [ +0.02% +0.00% +0.13% / +0.05% -0.25% -0.18%] index_copy_ linear : Elapsed 0.060 ms (6.004 ms / 100) 6.030 -> 6.028 ( -0.03%) [ +0.03% +0.00% +0.12% / +0.20% -0.03% +0.05%] index_add_ reverse : Elapsed 0.060 ms (6.032 ms / 100) 6.001 -> 5.989 ( -0.20%) [ +0.00% +0.10% +0.03% / +0.07% -0.17% -0.20%] index_copy_ reverse : Elapsed 0.060 ms (6.001 ms / 100) 6.034 -> 6.027 ( -0.12%) [ +0.00% +0.03% +0.02% / +0.07% -0.10% -0.12%] index_add_ spread : Elapsed 0.060 ms (6.034 ms / 100) 6.001 -> 5.990 ( -0.18%) [ +0.03% +0.00% +0.12% / +0.12% -0.18% -0.13%] index_copy_ spread : Elapsed 0.060 ms (6.003 ms / 100) 6.037 -> 6.029 ( -0.13%) [ +0.05% +0.00% +0.12% / +0.05% -0.07% -0.13%] index_add_ strided 3 : Elapsed 0.060 ms (6.040 ms / 100) 6.003 -> 5.989 ( -0.23%) [ +0.03% +0.00% +0.02% / +0.08% -0.22% -0.23%] index_copy_ strided 3 : Elapsed 0.060 ms (6.005 ms / 100) 6.036 -> 6.024 ( -0.20%) [ +0.00% +0.03% +0.07% / +0.08% -0.20% +0.00%] index_add_ perm : Elapsed 0.060 ms (6.036 ms / 100) 6.001 -> 5.989 ( -0.20%) [ +0.00% +0.10% +0.15% / +0.15% -0.10% -0.20%] index_copy_ perm : Elapsed 0.060 ms (6.001 ms / 100) 6.033 -> 6.023 ( -0.17%) [ +0.15% +0.00% +0.05% / +0.07% +0.02% -0.17%] index_add_ perm_sorted : Elapsed 0.060 ms (6.042 ms / 100) 5.999 -> 5.989 ( -0.17%) [ +0.02% +0.00% +0.05% / +0.20% -0.17% -0.08%] index_copy_ perm_sorted : Elapsed 0.060 ms (6.000 ms / 100) 6.242 -> 6.222 ( -0.32%) [ +0.02% +0.00% +0.10% / +0.11% -0.21% -0.32%] index_select const : Elapsed 0.062 ms (6.243 ms / 100) 6.299 -> 6.281 ( -0.29%) [ +0.08% +0.00% +0.05% / +0.00% -0.29% -0.24%] index_select wrap : Elapsed 0.063 ms (6.304 ms / 100) 6.288 -> 6.278 ( -0.16%) [ +0.06% +0.00% +0.14% / +0.16% +0.00% -0.16%] index_select linear : Elapsed 0.063 ms (6.292 ms / 100) 6.291 -> 6.271 ( -0.32%) [ +0.02% +0.00% +0.05% / +0.05% -0.22% -0.32%] index_select reverse : Elapsed 0.063 ms (6.292 ms / 100) 6.242 -> 6.226 ( -0.26%) [ +0.11% +0.00% +0.02% / +0.00% -0.26% -0.26%] index_select skip64 : Elapsed 0.062 ms (6.249 ms / 100) 6.245 -> 6.219 ( -0.42%) [ +0.05% +0.13% +0.00% / -0.05% -0.34% -0.42%] index_select skip256 : Elapsed 0.062 ms (6.248 ms / 100) 6.295 -> 6.279 ( -0.25%) [ +0.03% +0.00% +0.11% / -0.05% -0.24% -0.25%] index_select spread : Elapsed 0.063 ms (6.297 ms / 100) 6.300 -> 6.285 ( -0.24%) [ +0.00% +0.00% +0.08% / -0.05% -0.19% -0.24%] index_select strided 3 : Elapsed 0.063 ms (6.300 ms / 100) 6.274 -> 6.258 ( -0.26%) [ +0.10% +0.00% +0.08% / +0.06% -0.26% -0.19%] index_select random : Elapsed 0.063 ms (6.280 ms / 100) 6.275 -> 6.250 ( -0.40%) [ +0.00% +0.11% +0.03% / +0.05% -0.30% -0.40%] index_select random_sorted : Elapsed 0.063 ms (6.275 ms / 100) out_shape = [4, 5, 16, 20] in_shape = [4, 40, 16, 20] idx_dim = 1 B = [4, 5, 16, 20] (stride (1600, 320, 1, 16)) A = [4, 40, 16, 20] (stride (640, 1, 40, 2560)) dim = 1 1.613 -> 1.612 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.37% +0.50%] index_select const : Elapsed 0.016 ms (1.613 ms / 100) 1.611 -> 1.613 ( +0.12%) [ +0.06% +0.00% +0.06% / +0.12% +0.56% +0.56%] index_select wrap : Elapsed 0.016 ms (1.612 ms / 100) 1.616 -> 1.617 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.50% +0.43%] index_select linear : Elapsed 0.016 ms (1.618 ms / 100) 1.615 -> 1.614 ( -0.06%) [ +0.00% +0.06% +0.06% / -0.06% +0.62% +0.50%] index_select reverse : Elapsed 0.016 ms (1.615 ms / 100) 1.612 -> 1.613 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.43% +0.37%] index_select skip64 : Elapsed 0.016 ms (1.613 ms / 100) 1.613 -> 1.611 ( -0.12%) [ +0.12% +0.00% +0.00% / -0.12% +0.43% +0.43%] index_select skip256 : Elapsed 0.016 ms (1.615 ms / 100) 1.612 -> 1.611 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.56% +0.43%] index_select spread : Elapsed 0.016 ms (1.612 ms / 100) 1.607 -> 1.608 ( +0.06%) [ +0.19% +0.06% +0.00% / +0.06% +0.56% +0.56%] index_select strided 3 : Elapsed 0.016 ms (1.610 ms / 100) 1.611 -> 1.615 ( +0.25%) [ +0.00% +0.12% +0.00% / +0.25% +0.31% +0.37%] index_select strided 5 : Elapsed 0.016 ms (1.611 ms / 100) 1.605 -> 1.607 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.62% +0.62%] index_select strided 7 : Elapsed 0.016 ms (1.606 ms / 100) 1.613 -> 1.613 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.43%] index_select strided 8 : Elapsed 0.016 ms (1.613 ms / 100) 1.605 -> 1.604 ( -0.06%) [ +0.12% +0.00% +0.00% / -0.06% +0.62% +0.62%] index_select strided 16 : Elapsed 0.016 ms (1.607 ms / 100) 1.605 -> 1.606 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.62% +0.50%] index_select random : Elapsed 0.016 ms (1.605 ms / 100) 1.605 -> 1.604 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.56% +0.44%] index_select random_sorted : Elapsed 0.016 ms (1.605 ms / 100) 1.607 -> 1.612 ( +0.31%) [ +0.19% +0.25% +0.00% / +0.31% +0.81% +0.87%] index_select perm : Elapsed 0.016 ms (1.610 ms / 100) 1.606 -> 1.605 ( -0.06%) [ +0.25% +0.25% +0.00% / -0.06% +0.75% +0.62%] index_select perm_sorted : Elapsed 0.016 ms (1.610 ms / 100) B = [4, 5, 16, 20] (stride (20, 80, 400, 1)) A = [4, 40, 16, 20] (stride (640, 1, 40, 2560)) dim = 1 1.609 -> 1.608 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.75% +0.75%] index_select const : Elapsed 0.016 ms (1.609 ms / 100) 1.611 -> 1.610 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.56% +0.56%] index_select wrap : Elapsed 0.016 ms (1.611 ms / 100) 1.607 -> 1.608 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.75% +0.68%] index_select linear : Elapsed 0.016 ms (1.609 ms / 100) 1.607 -> 1.608 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.81% +0.68%] index_select reverse : Elapsed 0.016 ms (1.607 ms / 100) 1.609 -> 1.610 ( +0.06%) [ +0.06% +0.00% +0.12% / +0.06% +0.81% +0.75%] index_select skip64 : Elapsed 0.016 ms (1.610 ms / 100) 1.609 -> 1.608 ( -0.06%) [ +0.06% +0.12% +0.00% / -0.06% +0.50% +0.99%] index_select skip256 : Elapsed 0.016 ms (1.610 ms / 100) 1.602 -> 1.607 ( +0.31%) [ +0.31% +0.00% +0.25% / +0.31% +0.94% +0.94%] index_select spread : Elapsed 0.016 ms (1.607 ms / 100) 1.604 -> 1.605 ( +0.06%) [ +0.00% +0.12% +0.06% / +0.06% +0.62% +0.75%] index_select strided 3 : Elapsed 0.016 ms (1.604 ms / 100) 1.604 -> 1.603 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.62% +0.69%] index_select strided 5 : Elapsed 0.016 ms (1.604 ms / 100) 1.604 -> 1.606 ( +0.12%) [ +0.00% +0.06% +0.00% / +0.12% +0.75% +0.69%] index_select strided 7 : Elapsed 0.016 ms (1.604 ms / 100) 1.605 -> 1.606 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.62% +0.81%] index_select strided 8 : Elapsed 0.016 ms (1.607 ms / 100) 1.604 -> 1.607 ( +0.19%) [ +0.00% +0.25% +0.06% / +0.19% +0.44% +0.81%] index_select strided 16 : Elapsed 0.016 ms (1.604 ms / 100) 1.606 -> 1.610 ( +0.25%) [ +0.19% +0.06% +0.00% / +0.25% +0.81% +0.68%] index_select random : Elapsed 0.016 ms (1.609 ms / 100) 1.608 -> 1.608 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.68% +0.56%] index_select random_sorted : Elapsed 0.016 ms (1.608 ms / 100) 1.606 -> 1.608 ( +0.12%) [ +0.00% +0.00% +0.06% / +0.12% +0.75% +0.50%] index_select perm : Elapsed 0.016 ms (1.606 ms / 100) 1.603 -> 1.600 ( -0.19%) [ +0.06% +0.00% +0.06% / -0.19% +0.56% +0.56%] index_select perm_sorted : Elapsed 0.016 ms (1.604 ms / 100) B = [4, 5, 16, 20] (stride (1, 4, 400, 20)) A = [4, 40, 16, 20] (stride (640, 16, 1, 2560)) dim = 1 1.467 -> 1.468 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.55% +0.55%] index_select const : Elapsed 0.015 ms (1.468 ms / 100) 1.460 -> 1.461 ( +0.07%) [ +0.21% +0.00% +0.07% / +0.07% +0.48% +0.55%] index_select wrap : Elapsed 0.015 ms (1.463 ms / 100) 1.451 -> 1.453 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.62% +0.41%] index_select linear : Elapsed 0.015 ms (1.453 ms / 100) 1.451 -> 1.450 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.55% +0.41%] index_select reverse : Elapsed 0.015 ms (1.451 ms / 100) 1.463 -> 1.464 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.48% +0.55%] index_select skip64 : Elapsed 0.015 ms (1.463 ms / 100) 1.467 -> 1.467 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.55% +0.55%] index_select skip256 : Elapsed 0.015 ms (1.468 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.54% +0.54%] index_select spread : Elapsed 0.015 ms (1.478 ms / 100) 1.467 -> 1.467 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.27% +0.34%] index_select strided 3 : Elapsed 0.015 ms (1.469 ms / 100) 1.464 -> 1.467 ( +0.20%) [ +0.14% +0.14% +0.00% / +0.20% +0.41% +0.27%] index_select strided 5 : Elapsed 0.015 ms (1.466 ms / 100) 1.450 -> 1.449 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.34% +0.34%] index_select strided 7 : Elapsed 0.014 ms (1.450 ms / 100) 1.467 -> 1.467 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.48% +0.41%] index_select strided 8 : Elapsed 0.015 ms (1.469 ms / 100) 1.455 -> 1.456 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.82% +0.76%] index_select strided 16 : Elapsed 0.015 ms (1.457 ms / 100) 1.449 -> 1.450 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.55% +0.48%] index_select random : Elapsed 0.015 ms (1.450 ms / 100) 1.454 -> 1.453 ( -0.07%) [ +0.21% +0.00% +0.00% / -0.07% +0.62% +0.55%] index_select random_sorted : Elapsed 0.015 ms (1.457 ms / 100) 1.448 -> 1.449 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.90% +0.76%] index_select perm : Elapsed 0.014 ms (1.450 ms / 100) 1.444 -> 1.446 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.55% +0.48%] index_select perm_sorted : Elapsed 0.014 ms (1.444 ms / 100) B = [4, 5, 16, 20] (stride (1, 64, 4, 320)) A = [4, 40, 16, 20] (stride (320, 1280, 1, 16)) dim = 1 1.486 -> 1.486 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.54%] index_select const : Elapsed 0.015 ms (1.486 ms / 100) 1.486 -> 1.486 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.54% +0.54%] index_select wrap : Elapsed 0.015 ms (1.486 ms / 100) 1.485 -> 1.486 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.61% +0.54%] index_select linear : Elapsed 0.015 ms (1.485 ms / 100) 1.485 -> 1.486 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.74% +0.67%] index_select reverse : Elapsed 0.015 ms (1.485 ms / 100) 1.485 -> 1.485 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.74% +0.61%] index_select skip64 : Elapsed 0.015 ms (1.485 ms / 100) 1.486 -> 1.485 ( -0.07%) [ +0.00% +0.00% +0.47% / -0.07% +0.67% +0.61%] index_select skip256 : Elapsed 0.015 ms (1.486 ms / 100) 1.485 -> 1.485 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.67% +0.61%] index_select spread : Elapsed 0.015 ms (1.486 ms / 100) 1.485 -> 1.485 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_select strided 3 : Elapsed 0.015 ms (1.486 ms / 100) 1.485 -> 1.486 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.74% +0.74%] index_select strided 5 : Elapsed 0.015 ms (1.485 ms / 100) 1.485 -> 1.486 ( +0.07%) [ +0.00% +0.07% +0.13% / +0.07% +0.74% +0.74%] index_select strided 7 : Elapsed 0.015 ms (1.485 ms / 100) 1.485 -> 1.486 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.67% +0.67%] index_select strided 8 : Elapsed 0.015 ms (1.485 ms / 100) 1.485 -> 1.485 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.74% +0.67%] index_select strided 16 : Elapsed 0.015 ms (1.486 ms / 100) 1.485 -> 1.486 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.74% +0.61%] index_select random : Elapsed 0.015 ms (1.486 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.07% +0.00% +0.67% / +0.07% +0.67% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.487 ms / 100) 1.485 -> 1.487 ( +0.13%) [ +0.07% +0.00% +0.13% / +0.13% +0.61% +0.61%] index_select perm : Elapsed 0.015 ms (1.486 ms / 100) 1.485 -> 1.485 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.74% +0.67%] index_select perm_sorted : Elapsed 0.015 ms (1.486 ms / 100) out_shape = [4, 40, 5, 20] in_shape = [4, 40, 16, 20] idx_dim = 2 B = [4, 40, 5, 20] (stride (4000, 100, 20, 1)) A = [4, 40, 16, 20] (stride (320, 1280, 20, 1)) dim = 2 2.098 -> 2.108 ( +0.48%) [ +0.00% +0.19% +0.10% / +0.48% +0.57% +0.71%] index_select const : Elapsed 0.021 ms (2.098 ms / 100) 2.188 -> 2.184 ( -0.18%) [ +0.05% +0.00% +0.00% / -0.18% +0.32% +0.41%] index_select wrap : Elapsed 0.022 ms (2.189 ms / 100) 2.187 -> 2.191 ( +0.18%) [ +0.32% +0.09% +0.00% / +0.18% +0.23% +0.55%] index_select linear : Elapsed 0.022 ms (2.194 ms / 100) 2.193 -> 2.192 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.73% +0.87%] index_select reverse : Elapsed 0.022 ms (2.193 ms / 100) 2.111 -> 2.110 ( -0.05%) [ +0.00% +0.09% +0.09% / -0.05% +0.66% +0.62%] index_select skip64 : Elapsed 0.021 ms (2.111 ms / 100) 2.098 -> 2.102 ( +0.19%) [ +0.10% +0.00% +0.19% / +0.19% +0.38% +0.62%] index_select skip256 : Elapsed 0.021 ms (2.100 ms / 100) 2.196 -> 2.198 ( +0.09%) [ +0.18% +0.09% +0.00% / +0.09% +0.32% +0.23%] index_select spread : Elapsed 0.022 ms (2.200 ms / 100) 2.197 -> 2.200 ( +0.14%) [ +0.14% +0.00% +0.09% / +0.14% +0.23% +0.27%] index_select strided 3 : Elapsed 0.022 ms (2.200 ms / 100) 2.187 -> 2.191 ( +0.18%) [ +0.14% +0.00% +0.00% / +0.27% +0.27% +0.18%] index_select strided 5 : Elapsed 0.022 ms (2.190 ms / 100) 2.189 -> 2.188 ( -0.05%) [ +0.00% +0.14% +0.27% / -0.05% +0.41% +0.32%] index_select strided 7 : Elapsed 0.022 ms (2.189 ms / 100) 2.121 -> 2.124 ( +0.14%) [ +0.00% +0.05% +0.00% / +0.14% +0.52% +0.57%] index_select strided 8 : Elapsed 0.021 ms (2.121 ms / 100) 2.165 -> 2.167 ( +0.09%) [ +0.32% +0.37% +0.00% / +0.09% +0.60% +0.69%] index_select random : Elapsed 0.022 ms (2.172 ms / 100) 2.165 -> 2.172 ( +0.32%) [ +0.09% +0.23% +0.00% / +0.32% +0.60% +0.74%] index_select random_sorted : Elapsed 0.022 ms (2.167 ms / 100) 2.184 -> 2.191 ( +0.32%) [ +0.00% +0.00% +0.05% / +0.32% +0.37% +0.50%] index_select perm : Elapsed 0.022 ms (2.184 ms / 100) 2.182 -> 2.185 ( +0.14%) [ +0.23% +0.27% +0.00% / +0.14% +0.96% +0.69%] index_select perm_sorted : Elapsed 0.022 ms (2.187 ms / 100) B = [4, 40, 5, 20] (stride (100, 400, 20, 1)) A = [4, 40, 16, 20] (stride (12800, 320, 20, 1)) dim = 2 2.134 -> 2.137 ( +0.14%) [ +0.00% +0.14% +0.28% / +0.19% +0.14% +0.28%] index_select const : Elapsed 0.021 ms (2.134 ms / 100) 2.200 -> 2.195 ( -0.23%) [ +0.09% +0.18% +0.00% / +0.14% -0.23% -0.14%] index_select wrap : Elapsed 0.022 ms (2.202 ms / 100) 2.209 -> 2.201 ( -0.36%) [ +0.36% +0.00% +0.23% / +0.05% -0.09% -0.36%] index_select linear : Elapsed 0.022 ms (2.217 ms / 100) 2.212 -> 2.200 ( -0.54%) [ +0.00% +0.00% +0.14% / +0.09% -0.41% -0.54%] index_select reverse : Elapsed 0.022 ms (2.212 ms / 100) 2.115 -> 2.115 ( +0.00%) [ +0.38% +0.09% +0.00% / +0.24% +0.76% +0.00%] index_select skip64 : Elapsed 0.021 ms (2.123 ms / 100) 2.135 -> 2.133 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.09% +0.05% +0.14%] index_select skip256 : Elapsed 0.021 ms (2.137 ms / 100) 2.207 -> 2.203 ( -0.18%) [ +0.00% +0.05% +0.05% / -0.14% -0.05% -0.18%] index_select spread : Elapsed 0.022 ms (2.207 ms / 100) 2.208 -> 2.202 ( -0.27%) [ +0.32% +0.00% +0.05% / +0.18% -0.27% -0.23%] index_select strided 3 : Elapsed 0.022 ms (2.215 ms / 100) 2.199 -> 2.187 ( -0.55%) [ +0.05% +0.32% +0.00% / +0.00% -0.32% -0.55%] index_select strided 5 : Elapsed 0.022 ms (2.200 ms / 100) 2.199 -> 2.192 ( -0.32%) [ +0.00% +0.23% +0.09% / +0.18% -0.14% -0.32%] index_select strided 7 : Elapsed 0.022 ms (2.199 ms / 100) 2.151 -> 2.136 ( -0.70%) [ +0.00% +0.14% +0.14% / +0.19% -0.70% -0.65%] index_select strided 8 : Elapsed 0.022 ms (2.151 ms / 100) 2.193 -> 2.191 ( -0.09%) [ +0.14% +0.00% +0.23% / +0.18% -0.09% +0.32%] index_select random : Elapsed 0.022 ms (2.196 ms / 100) 2.197 -> 2.198 ( +0.05%) [ +0.00% +0.18% +0.14% / +0.05% +0.05% +0.05%] index_select random_sorted : Elapsed 0.022 ms (2.197 ms / 100) 2.211 -> 2.202 ( -0.41%) [ +0.00% +0.05% +0.14% / +0.32% -0.41% -0.41%] index_select perm : Elapsed 0.022 ms (2.211 ms / 100) 2.195 -> 2.192 ( -0.14%) [ +0.18% +0.00% +0.36% / +0.09% -0.14% +0.14%] index_select perm_sorted : Elapsed 0.022 ms (2.199 ms / 100) B = [4, 40, 5, 20] (stride (800, 1, 3200, 40)) A = [4, 40, 16, 20] (stride (12800, 320, 1, 16)) dim = 2 2.265 -> 2.267 ( +0.09%) [ +0.09% +0.13% +0.00% / +0.09% +0.53% +0.44%] index_select const : Elapsed 0.023 ms (2.267 ms / 100) 2.260 -> 2.265 ( +0.22%) [ +0.22% +0.00% +0.09% / +0.22% +0.53% +0.62%] index_select wrap : Elapsed 0.023 ms (2.265 ms / 100) 2.268 -> 2.272 ( +0.18%) [ +0.00% +0.13% +0.00% / +0.18% +0.44% +0.62%] index_select linear : Elapsed 0.023 ms (2.268 ms / 100) 2.261 -> 2.259 ( -0.09%) [ +0.04% +0.00% +0.49% / -0.09% +0.75% +0.44%] index_select reverse : Elapsed 0.023 ms (2.262 ms / 100) 2.259 -> 2.263 ( +0.18%) [ +0.00% +0.22% +0.35% / +0.18% +0.49% +0.93%] index_select skip64 : Elapsed 0.023 ms (2.259 ms / 100) 2.263 -> 2.265 ( +0.09%) [ +0.04% +0.00% +0.22% / +0.09% +0.66% +0.66%] index_select skip256 : Elapsed 0.023 ms (2.264 ms / 100) 2.274 -> 2.286 ( +0.53%) [ +0.00% +0.18% +0.00% / +0.92% +0.62% +0.53%] index_select spread : Elapsed 0.023 ms (2.274 ms / 100) 2.280 -> 2.282 ( +0.09%) [ +0.13% +0.00% +0.00% / +0.09% +0.66% +0.53%] index_select strided 3 : Elapsed 0.023 ms (2.283 ms / 100) 2.280 -> 2.284 ( +0.18%) [ +0.04% +0.00% +0.13% / +0.18% +0.75% +0.79%] index_select strided 5 : Elapsed 0.023 ms (2.281 ms / 100) 2.273 -> 2.274 ( +0.04%) [ +0.13% +0.00% +0.18% / +0.04% +0.48% +0.57%] index_select strided 7 : Elapsed 0.023 ms (2.276 ms / 100) 2.279 -> 2.283 ( +0.18%) [ +0.00% +0.13% +0.00% / +0.18% +0.83% +0.88%] index_select strided 8 : Elapsed 0.023 ms (2.279 ms / 100) 2.272 -> 2.275 ( +0.13%) [ +0.13% +0.00% +0.09% / +0.13% +0.75% +0.70%] index_select random : Elapsed 0.023 ms (2.275 ms / 100) 2.268 -> 2.272 ( +0.18%) [ +0.13% +0.26% +0.00% / +0.18% +0.79% +0.79%] index_select random_sorted : Elapsed 0.023 ms (2.271 ms / 100) 2.279 -> 2.279 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +0.66% +0.61%] index_select perm : Elapsed 0.023 ms (2.282 ms / 100) 2.274 -> 2.279 ( +0.22%) [ +0.09% +0.00% +0.09% / +0.22% +0.48% +0.57%] index_select perm_sorted : Elapsed 0.023 ms (2.276 ms / 100) B = [4, 40, 5, 20] (stride (5, 20, 1, 800)) A = [4, 40, 16, 20] (stride (20, 1280, 80, 1)) dim = 2 2.268 -> 2.275 ( +0.31%) [ +0.31% +0.22% +0.00% / +0.31% +0.49% +0.31%] index_select const : Elapsed 0.023 ms (2.275 ms / 100) 2.335 -> 2.334 ( -0.04%) [ +0.21% +0.09% +0.00% / -0.04% +0.26% +0.26%] index_select wrap : Elapsed 0.023 ms (2.340 ms / 100) 2.321 -> 2.323 ( +0.09%) [ +0.00% +0.00% +0.13% / +0.13% +0.09% +0.13%] index_select linear : Elapsed 0.023 ms (2.321 ms / 100) 2.324 -> 2.330 ( +0.26%) [ +0.34% +0.00% +0.22% / +0.26% +0.30% +0.30%] index_select reverse : Elapsed 0.023 ms (2.332 ms / 100) 2.272 -> 2.275 ( +0.13%) [ +0.44% +0.22% +0.00% / +0.13% +0.44% +0.40%] index_select skip64 : Elapsed 0.023 ms (2.282 ms / 100) 2.270 -> 2.273 ( +0.13%) [ +0.18% +0.00% +0.18% / +0.13% +0.26% +0.26%] index_select skip256 : Elapsed 0.023 ms (2.274 ms / 100) 2.327 -> 2.326 ( -0.04%) [ +0.00% +0.00% +0.04% / +0.13% -0.04% -0.04%] index_select spread : Elapsed 0.023 ms (2.327 ms / 100) 2.325 -> 2.322 ( -0.13%) [ +0.00% +0.13% +0.00% / -0.13% +0.26% +0.04%] index_select strided 3 : Elapsed 0.023 ms (2.325 ms / 100) 2.326 -> 2.324 ( -0.09%) [ +0.17% +0.26% +0.00% / +0.13% +0.13% -0.09%] index_select strided 5 : Elapsed 0.023 ms (2.330 ms / 100) 2.330 -> 2.327 ( -0.13%) [ +0.04% +0.00% +0.00% / -0.09% -0.13% +0.04%] index_select strided 7 : Elapsed 0.023 ms (2.331 ms / 100) 2.281 -> 2.290 ( +0.39%) [ +0.00% +0.48% +0.35% / +0.39% +0.66% +0.48%] index_select strided 8 : Elapsed 0.023 ms (2.281 ms / 100) 2.278 -> 2.283 ( +0.22%) [ +0.26% +0.09% +0.00% / +0.31% +0.22% +0.35%] index_select random : Elapsed 0.023 ms (2.284 ms / 100) 2.278 -> 2.282 ( +0.18%) [ +0.13% +0.00% +0.13% / +0.18% +0.26% +0.22%] index_select random_sorted : Elapsed 0.023 ms (2.281 ms / 100) 2.330 -> 2.331 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.17% +0.04% +0.04%] index_select perm : Elapsed 0.023 ms (2.333 ms / 100) 2.332 -> 2.332 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.26% +0.00% +0.17%] index_select perm_sorted : Elapsed 0.023 ms (2.335 ms / 100) out_shape = [4, 40, 16, 5] in_shape = [4, 40, 16, 20] idx_dim = 3 B = [4, 40, 16, 5] (stride (3200, 80, 5, 1)) A = [4, 40, 16, 20] (stride (12800, 1, 800, 40)) dim = 3 1.612 -> 1.609 ( -0.19%) [ +0.00% +0.00% +0.06% / -0.19% +0.43% +0.06%] index_select const : Elapsed 0.016 ms (1.612 ms / 100) 1.643 -> 1.646 ( +0.18%) [ +0.30% +0.00% +0.43% / +0.18% +0.49% +0.37%] index_select wrap : Elapsed 0.016 ms (1.648 ms / 100) 1.641 -> 1.646 ( +0.30%) [ +0.12% +0.00% +0.30% / +0.30% +0.67% +0.55%] index_select linear : Elapsed 0.016 ms (1.643 ms / 100) 1.642 -> 1.644 ( +0.12%) [ +0.24% +0.43% +0.00% / +0.12% +0.85% +0.79%] index_select reverse : Elapsed 0.016 ms (1.646 ms / 100) 1.614 -> 1.615 ( +0.06%) [ +0.19% +0.00% +0.00% / +0.06% +0.43% +0.50%] index_select skip64 : Elapsed 0.016 ms (1.617 ms / 100) 1.612 -> 1.616 ( +0.25%) [ +0.00% +0.12% +0.12% / +0.25% +0.74% +0.87%] index_select skip256 : Elapsed 0.016 ms (1.612 ms / 100) 1.643 -> 1.647 ( +0.24%) [ +0.06% +0.24% +0.00% / +0.24% +0.37% +0.55%] index_select spread : Elapsed 0.016 ms (1.644 ms / 100) 1.642 -> 1.645 ( +0.18%) [ +0.06% +0.12% +0.00% / +0.18% +0.67% +0.55%] index_select strided 3 : Elapsed 0.016 ms (1.643 ms / 100) 1.631 -> 1.637 ( +0.37%) [ +0.00% +0.12% +0.06% / +0.37% +0.67% +0.92%] index_select strided 5 : Elapsed 0.016 ms (1.631 ms / 100) 1.642 -> 1.644 ( +0.12%) [ +0.00% +0.06% +0.06% / +0.12% +0.24% +0.18%] index_select strided 7 : Elapsed 0.016 ms (1.642 ms / 100) 1.650 -> 1.653 ( +0.18%) [ +0.24% +0.12% +0.00% / +0.18% +0.24% +0.30%] index_select strided 8 : Elapsed 0.017 ms (1.654 ms / 100) 1.651 -> 1.649 ( -0.12%) [ +0.06% +0.00% +0.00% / -0.12% +0.24% +0.12%] index_select strided 16 : Elapsed 0.017 ms (1.652 ms / 100) 1.641 -> 1.640 ( -0.06%) [ +0.06% +0.00% +0.06% / -0.06% +0.30% +0.37%] index_select random : Elapsed 0.016 ms (1.642 ms / 100) 1.639 -> 1.641 ( +0.12%) [ +0.06% +0.00% +0.12% / +0.12% +0.73% +0.73%] index_select random_sorted : Elapsed 0.016 ms (1.640 ms / 100) 1.636 -> 1.640 ( +0.24%) [ +0.18% +0.00% +0.12% / +0.24% +1.10% +0.73%] index_select perm : Elapsed 0.016 ms (1.639 ms / 100) 1.644 -> 1.645 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.49% +0.36%] index_select perm_sorted : Elapsed 0.016 ms (1.646 ms / 100) B = [4, 40, 16, 5] (stride (3200, 80, 1, 16)) A = [4, 40, 16, 20] (stride (1, 1280, 80, 4)) dim = 3 0.683 -> 0.684 ( +0.15%) [ +0.15% +0.00% +0.29% / +0.15% +0.73% +0.59%] index_select const : Elapsed 0.007 ms (0.684 ms / 100) 0.681 -> 0.682 ( +0.15%) [ +0.44% +0.00% +0.44% / +0.15% +1.32% +1.17%] index_select wrap : Elapsed 0.007 ms (0.684 ms / 100) 0.682 -> 0.681 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +1.61% +1.32%] index_select linear : Elapsed 0.007 ms (0.682 ms / 100) 0.683 -> 0.682 ( -0.15%) [ +0.29% +0.00% +0.00% / -0.15% +1.02% +1.46%] index_select reverse : Elapsed 0.007 ms (0.685 ms / 100) 0.683 -> 0.683 ( +0.00%) [ +0.44% +0.15% +0.00% / +0.00% +0.88% +1.02%] index_select skip64 : Elapsed 0.007 ms (0.686 ms / 100) 0.686 -> 0.684 ( -0.29%) [ +0.15% +0.00% +0.15% / +0.15% +0.00% -0.29%] index_select skip256 : Elapsed 0.007 ms (0.687 ms / 100) 0.687 -> 0.684 ( -0.44%) [ +0.00% +0.15% +0.00% / +0.00% -0.29% -0.44%] index_select spread : Elapsed 0.007 ms (0.687 ms / 100) 0.687 -> 0.686 ( -0.15%) [ +0.29% +0.15% +0.00% / +0.00% -0.15% +0.44%] index_select strided 3 : Elapsed 0.007 ms (0.689 ms / 100) 0.684 -> 0.683 ( -0.15%) [ +0.29% +0.00% +0.15% / +0.29% -0.15% +0.00%] index_select strided 5 : Elapsed 0.007 ms (0.686 ms / 100) 0.679 -> 0.679 ( +0.00%) [ +0.44% +0.00% +0.00% / +0.00% +0.00% +0.00%] index_select strided 7 : Elapsed 0.007 ms (0.682 ms / 100) 0.675 -> 0.675 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.44% +0.00% +0.15%] index_select strided 8 : Elapsed 0.007 ms (0.676 ms / 100) 0.687 -> 0.684 ( -0.44%) [ +0.29% +0.00% +0.00% / +0.00% -0.29% -0.44%] index_select strided 16 : Elapsed 0.007 ms (0.689 ms / 100) 0.676 -> 0.674 ( -0.30%) [ +0.44% +0.15% +0.00% / +0.15% -0.30% -0.30%] index_select random : Elapsed 0.007 ms (0.679 ms / 100) 0.682 -> 0.682 ( +0.00%) [ +0.00% +0.29% +0.00% / +0.00% +1.03% +0.88%] index_select random_sorted : Elapsed 0.007 ms (0.682 ms / 100) 0.683 -> 0.682 ( -0.15%) [ +0.00% +0.00% +0.15% / -0.15% +0.88% +0.88%] index_select perm : Elapsed 0.007 ms (0.683 ms / 100) 0.682 -> 0.690 ( +1.17%) [ +0.00% +0.00% +0.00% / +1.17% +1.47% +1.47%] index_select perm_sorted : Elapsed 0.007 ms (0.682 ms / 100) B = [4, 40, 16, 5] (stride (80, 320, 5, 1)) A = [4, 40, 16, 20] (stride (12800, 16, 1, 640)) dim = 3 1.673 -> 1.675 ( +0.12%) [ +0.30% +0.00% +0.18% / +0.12% +0.24% +0.60%] index_select const : Elapsed 0.017 ms (1.678 ms / 100) 1.675 -> 1.677 ( +0.12%) [ +0.00% +0.24% +0.18% / +0.78% +0.12% +0.36%] index_select wrap : Elapsed 0.017 ms (1.675 ms / 100) 1.676 -> 1.677 ( +0.06%) [ +0.06% +0.00% +0.24% / +0.12% +0.30% +0.06%] index_select linear : Elapsed 0.017 ms (1.677 ms / 100) 1.675 -> 1.677 ( +0.12%) [ +0.00% +0.00% +0.06% / +0.12% +0.24% +0.18%] index_select reverse : Elapsed 0.017 ms (1.675 ms / 100) 1.674 -> 1.672 ( -0.12%) [ +0.24% +0.12% +0.00% / +0.06% +0.48% -0.12%] index_select skip64 : Elapsed 0.017 ms (1.678 ms / 100) 1.673 -> 1.673 ( +0.00%) [ +0.24% +0.24% +0.00% / +0.30% +0.30% +0.00%] index_select skip256 : Elapsed 0.017 ms (1.677 ms / 100) 1.691 -> 1.692 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.18% +0.18%] index_select spread : Elapsed 0.017 ms (1.693 ms / 100) 1.691 -> 1.691 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.00% +0.00%] index_select strided 3 : Elapsed 0.017 ms (1.692 ms / 100) 1.676 -> 1.679 ( +0.18%) [ +0.18% +0.24% +0.00% / +0.18% +1.25% +1.07%] index_select strided 5 : Elapsed 0.017 ms (1.679 ms / 100) 1.687 -> 1.689 ( +0.12%) [ +0.00% +0.00% +0.24% / +0.12% +0.47% +0.59%] index_select strided 7 : Elapsed 0.017 ms (1.687 ms / 100) 1.692 -> 1.690 ( -0.12%) [ +0.06% +0.00% +0.00% / -0.12% +0.35% +0.06%] index_select strided 8 : Elapsed 0.017 ms (1.693 ms / 100) 1.689 -> 1.689 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.06% +0.00% +0.24%] index_select strided 16 : Elapsed 0.017 ms (1.689 ms / 100) 1.684 -> 1.686 ( +0.12%) [ +0.00% +0.06% +0.24% / +0.12% +0.59% +0.77%] index_select random : Elapsed 0.017 ms (1.684 ms / 100) 1.680 -> 1.688 ( +0.48%) [ +0.00% +0.18% +0.42% / +0.48% +1.01% +1.31%] index_select random_sorted : Elapsed 0.017 ms (1.680 ms / 100) 1.697 -> 1.697 ( +0.00%) [ +0.06% +0.00% +0.12% / +0.00% +0.29% +0.47%] index_select perm : Elapsed 0.017 ms (1.698 ms / 100) 1.692 -> 1.695 ( +0.18%) [ +0.00% +0.18% +0.35% / +0.18% +0.41% +0.35%] index_select perm_sorted : Elapsed 0.017 ms (1.692 ms / 100) B = [4, 40, 16, 5] (stride (80, 320, 1, 16)) A = [4, 40, 16, 20] (stride (12800, 16, 1, 640)) dim = 3 1.678 -> 1.678 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.06% +0.00% +0.12%] index_select const : Elapsed 0.017 ms (1.678 ms / 100) 1.701 -> 1.689 ( -0.71%) [ +0.00% +0.12% +0.00% / +0.18% -0.71% -0.41%] index_select wrap : Elapsed 0.017 ms (1.701 ms / 100) 1.700 -> 1.690 ( -0.59%) [ +0.29% +0.00% +0.29% / +0.24% -0.41% -0.59%] index_select linear : Elapsed 0.017 ms (1.705 ms / 100) 1.703 -> 1.690 ( -0.76%) [ +0.12% +0.00% +0.06% / +0.00% -0.76% -0.65%] index_select reverse : Elapsed 0.017 ms (1.705 ms / 100) 1.673 -> 1.674 ( +0.06%) [ +0.30% +0.18% +0.00% / +0.06% +0.24% +0.24%] index_select skip64 : Elapsed 0.017 ms (1.678 ms / 100) 1.675 -> 1.673 ( -0.12%) [ +0.36% +0.12% +0.00% / -0.12% +0.12% +0.18%] index_select skip256 : Elapsed 0.017 ms (1.681 ms / 100) 1.697 -> 1.677 ( -1.18%) [ +0.00% +0.06% +0.18% / -0.06% -1.12% -1.18%] index_select spread : Elapsed 0.017 ms (1.697 ms / 100) 1.687 -> 1.685 ( -0.12%) [ +0.36% +0.00% +0.12% / +0.30% +0.06% -0.12%] index_select strided 3 : Elapsed 0.017 ms (1.693 ms / 100) 1.678 -> 1.678 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +1.01%] index_select strided 5 : Elapsed 0.017 ms (1.678 ms / 100) 1.690 -> 1.691 ( +0.06%) [ +0.30% +0.18% +0.00% / +0.06% +0.41% +0.71%] index_select strided 7 : Elapsed 0.017 ms (1.695 ms / 100) 1.695 -> 1.678 ( -1.00%) [ +0.00% +0.12% +0.24% / +0.06% -0.88% -1.00%] index_select strided 8 : Elapsed 0.017 ms (1.695 ms / 100) 1.694 -> 1.679 ( -0.89%) [ +0.00% +0.35% +0.12% / +0.18% -0.59% -0.89%] index_select strided 16 : Elapsed 0.017 ms (1.694 ms / 100) 1.679 -> 1.680 ( +0.06%) [ +0.06% +0.00% +0.30% / +0.06% +0.77% +0.83%] index_select random : Elapsed 0.017 ms (1.680 ms / 100) 1.679 -> 1.683 ( +0.24%) [ +0.30% +0.24% +0.00% / +0.24% +0.89% +0.66%] index_select random_sorted : Elapsed 0.017 ms (1.684 ms / 100) 1.686 -> 1.687 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.42% +0.24%] index_select perm : Elapsed 0.017 ms (1.688 ms / 100) 1.683 -> 1.683 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.77% +0.53%] index_select perm_sorted : Elapsed 0.017 ms (1.684 ms / 100) B = [4, 40, 16, 5] (stride (80, 320, 1, 16)) A = [4, 40, 16, 20] (stride (1, 4, 160, 2560)) dim = 3 1.831 -> 1.833 ( +0.11%) [ +0.05% +0.00% +0.05% / +0.11% +0.60% +0.33%] index_select const : Elapsed 0.018 ms (1.832 ms / 100) 1.819 -> 1.817 ( -0.11%) [ +0.05% +0.11% +0.00% / -0.11% +0.33% +0.11%] index_select wrap : Elapsed 0.018 ms (1.820 ms / 100) 1.814 -> 1.817 ( +0.17%) [ +0.11% +0.17% +0.00% / +0.17% +0.61% +0.55%] index_select linear : Elapsed 0.018 ms (1.816 ms / 100) 1.824 -> 1.824 ( +0.00%) [ +0.00% +0.16% +0.27% / +0.00% +0.44% +0.49%] index_select reverse : Elapsed 0.018 ms (1.824 ms / 100) 1.821 -> 1.826 ( +0.27%) [ +0.00% +0.16% +0.00% / +0.27% +0.33% +0.27%] index_select skip64 : Elapsed 0.018 ms (1.821 ms / 100) 1.819 -> 1.822 ( +0.16%) [ +0.27% +0.11% +0.00% / +0.16% +0.60% +0.49%] index_select skip256 : Elapsed 0.018 ms (1.824 ms / 100) 1.837 -> 1.837 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.00% +0.22%] index_select spread : Elapsed 0.018 ms (1.837 ms / 100) 1.840 -> 1.831 ( -0.49%) [ +0.00% +0.05% +0.00% / +0.00% -0.38% -0.49%] index_select strided 3 : Elapsed 0.018 ms (1.840 ms / 100) 1.824 -> 1.830 ( +0.33%) [ +0.11% +0.00% +0.27% / +0.33% +0.33% +0.44%] index_select strided 5 : Elapsed 0.018 ms (1.826 ms / 100) 1.832 -> 1.837 ( +0.27%) [ +0.38% +0.05% +0.00% / +0.27% +0.55% +0.27%] index_select strided 7 : Elapsed 0.018 ms (1.839 ms / 100) 1.843 -> 1.842 ( -0.05%) [ +0.22% +0.11% +0.00% / +0.00% +0.05% -0.05%] index_select strided 8 : Elapsed 0.018 ms (1.847 ms / 100) 1.843 -> 1.844 ( +0.05%) [ +0.22% +0.00% +0.16% / +0.05% +0.16% +0.27%] index_select strided 16 : Elapsed 0.018 ms (1.847 ms / 100) 1.853 -> 1.852 ( -0.05%) [ +0.16% +0.00% +0.05% / +0.11% +0.00% -0.05%] index_select random : Elapsed 0.019 ms (1.856 ms / 100) 1.846 -> 1.836 ( -0.54%) [ +0.00% +0.00% +0.16% / +0.16% -0.43% -0.54%] index_select random_sorted : Elapsed 0.018 ms (1.846 ms / 100) 1.817 -> 1.824 ( +0.39%) [ +0.22% +0.00% +0.39% / +0.39% +0.39% +0.44%] index_select perm : Elapsed 0.018 ms (1.821 ms / 100) 1.823 -> 1.827 ( +0.22%) [ +0.44% +0.00% +0.00% / +0.22% +0.66% +0.55%] index_select perm_sorted : Elapsed 0.018 ms (1.831 ms / 100) B = [4, 40, 16, 5] (stride (1, 320, 4, 64)) A = [4, 40, 16, 20] (stride (640, 1, 40, 2560)) dim = 3 1.817 -> 1.818 ( +0.06%) [ +0.11% +0.00% +0.00% / +0.06% +0.28% +0.22%] index_select const : Elapsed 0.018 ms (1.819 ms / 100) 1.807 -> 1.812 ( +0.28%) [ +0.28% +0.00% +0.11% / +0.28% +0.50% +0.50%] index_select wrap : Elapsed 0.018 ms (1.812 ms / 100) 1.809 -> 1.810 ( +0.06%) [ +0.00% +0.17% +0.06% / +0.06% +0.33% +0.17%] index_select linear : Elapsed 0.018 ms (1.809 ms / 100) 1.814 -> 1.813 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% -0.06% -0.06%] index_select reverse : Elapsed 0.018 ms (1.814 ms / 100) 1.809 -> 1.810 ( +0.06%) [ +0.00% +0.28% +0.17% / +0.06% +0.33% +0.22%] index_select skip64 : Elapsed 0.018 ms (1.809 ms / 100) 1.811 -> 1.812 ( +0.06%) [ +0.00% +0.06% +0.11% / +0.06% +0.61% +0.22%] index_select skip256 : Elapsed 0.018 ms (1.811 ms / 100) 1.819 -> 1.820 ( +0.05%) [ +0.05% +0.11% +0.00% / +0.05% +0.22% +0.05%] index_select spread : Elapsed 0.018 ms (1.820 ms / 100) 1.827 -> 1.827 ( +0.00%) [ +0.27% +0.00% +0.05% / +0.00% +0.11% +0.16%] index_select strided 3 : Elapsed 0.018 ms (1.832 ms / 100) 1.822 -> 1.820 ( -0.11%) [ +0.11% +0.05% +0.00% / -0.05% -0.11% +0.00%] index_select strided 5 : Elapsed 0.018 ms (1.824 ms / 100) 1.817 -> 1.816 ( -0.06%) [ +0.22% +0.06% +0.00% / +0.06% -0.06% +0.06%] index_select strided 7 : Elapsed 0.018 ms (1.821 ms / 100) 1.814 -> 1.818 ( +0.22%) [ +0.39% +0.28% +0.00% / +0.22% +0.50% +0.50%] index_select strided 8 : Elapsed 0.018 ms (1.821 ms / 100) 1.821 -> 1.822 ( +0.05%) [ +0.00% +0.22% +0.05% / +0.11% +0.05% +0.05%] index_select strided 16 : Elapsed 0.018 ms (1.821 ms / 100) 1.815 -> 1.818 ( +0.17%) [ +0.00% +0.28% +0.33% / +0.28% +0.22% +0.17%] index_select random : Elapsed 0.018 ms (1.815 ms / 100) 1.811 -> 1.816 ( +0.28%) [ +0.00% +0.33% +0.11% / +0.33% +0.28% +0.28%] index_select random_sorted : Elapsed 0.018 ms (1.811 ms / 100) 1.811 -> 1.811 ( +0.00%) [ +0.33% +0.11% +0.00% / +0.00% +0.77% +0.83%] index_select perm : Elapsed 0.018 ms (1.817 ms / 100) 1.812 -> 1.816 ( +0.22%) [ +0.00% +0.17% +0.06% / +0.22% +0.44% +0.28%] index_select perm_sorted : Elapsed 0.018 ms (1.812 ms / 100) B = [4, 40, 16, 5] (stride (5, 20, 800, 1)) A = [4, 40, 16, 20] (stride (16, 64, 1, 2560)) dim = 3 1.783 -> 1.781 ( -0.11%) [ +0.06% +0.11% +0.00% / +0.22% +0.00% -0.11%] index_select const : Elapsed 0.018 ms (1.784 ms / 100) 1.781 -> 1.781 ( +0.00%) [ +0.00% +0.11% +0.22% / +0.00% +0.06% +0.28%] index_select wrap : Elapsed 0.018 ms (1.781 ms / 100) 1.779 -> 1.784 ( +0.28%) [ +0.00% +0.17% +0.17% / +0.28% +0.67% +0.34%] index_select linear : Elapsed 0.018 ms (1.779 ms / 100) 1.783 -> 1.782 ( -0.06%) [ +0.17% +0.00% +0.00% / +0.00% +0.00% -0.06%] index_select reverse : Elapsed 0.018 ms (1.786 ms / 100) 1.781 -> 1.781 ( +0.00%) [ +0.11% +0.00% +0.17% / +0.00% +0.06% +0.22%] index_select skip64 : Elapsed 0.018 ms (1.783 ms / 100) 1.780 -> 1.779 ( -0.06%) [ +0.28% +0.00% +0.45% / -0.06% +0.00% +0.28%] index_select skip256 : Elapsed 0.018 ms (1.785 ms / 100) 1.800 -> 1.797 ( -0.17%) [ +0.28% +0.06% +0.00% / -0.17% +0.44% +0.28%] index_select spread : Elapsed 0.018 ms (1.805 ms / 100) 1.796 -> 1.801 ( +0.28%) [ +0.50% +0.33% +0.00% / +0.28% +0.33% +0.39%] index_select strided 3 : Elapsed 0.018 ms (1.805 ms / 100) 1.779 -> 1.784 ( +0.28%) [ +0.28% +0.00% +0.17% / +0.51% +0.28% +0.28%] index_select strided 5 : Elapsed 0.018 ms (1.784 ms / 100) 1.797 -> 1.797 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.45% +0.56%] index_select strided 7 : Elapsed 0.018 ms (1.800 ms / 100) 1.800 -> 1.800 ( +0.00%) [ +0.28% +0.06% +0.00% / +0.00% +0.67% +0.50%] index_select strided 8 : Elapsed 0.018 ms (1.805 ms / 100) 1.802 -> 1.805 ( +0.17%) [ +0.00% +0.22% +0.17% / +0.17% +0.17% +0.44%] index_select strided 16 : Elapsed 0.018 ms (1.802 ms / 100) 1.788 -> 1.786 ( -0.11%) [ +0.06% +0.00% +0.17% / -0.11% +0.67% +0.34%] index_select random : Elapsed 0.018 ms (1.789 ms / 100) 1.789 -> 1.796 ( +0.39%) [ +0.00% +0.17% +0.28% / +0.56% +0.39% +0.50%] index_select random_sorted : Elapsed 0.018 ms (1.789 ms / 100) 1.804 -> 1.802 ( -0.11%) [ +0.22% +0.00% +0.00% / -0.11% +0.11% +0.00%] index_select perm : Elapsed 0.018 ms (1.808 ms / 100) 1.803 -> 1.801 ( -0.11%) [ +0.17% +0.00% +0.33% / -0.11% -0.06% -0.06%] index_select perm_sorted : Elapsed 0.018 ms (1.806 ms / 100) B = [4, 40, 16, 5] (stride (1, 20, 800, 4)) A = [4, 40, 16, 20] (stride (800, 20, 3200, 1)) dim = 3 1.806 -> 1.807 ( +0.06%) [ +0.33% +0.28% +0.00% / +0.17% +0.06% +0.22%] index_select const : Elapsed 0.018 ms (1.812 ms / 100) 1.817 -> 1.818 ( +0.06%) [ +0.00% +0.28% +0.22% / +0.06% +0.44% +0.55%] index_select wrap : Elapsed 0.018 ms (1.817 ms / 100) 1.813 -> 1.819 ( +0.33%) [ +0.11% +0.22% +0.00% / +0.33% +0.88% +0.83%] index_select linear : Elapsed 0.018 ms (1.815 ms / 100) 1.820 -> 1.816 ( -0.22%) [ +0.00% +0.00% +0.11% / -0.22% +0.44% +0.05%] index_select reverse : Elapsed 0.018 ms (1.820 ms / 100) 1.804 -> 1.805 ( +0.06%) [ +0.00% +0.06% +0.28% / +0.06% +0.55% +0.83%] index_select skip64 : Elapsed 0.018 ms (1.804 ms / 100) 1.803 -> 1.803 ( +0.00%) [ +0.28% +0.06% +0.00% / +0.00% +0.55% +0.55%] index_select skip256 : Elapsed 0.018 ms (1.808 ms / 100) 1.835 -> 1.839 ( +0.22%) [ +0.11% +0.05% +0.00% / +0.22% +0.44% +0.71%] index_select spread : Elapsed 0.018 ms (1.837 ms / 100) 1.832 -> 1.833 ( +0.05%) [ +0.33% +0.05% +0.00% / +0.05% +0.44% +0.60%] index_select strided 3 : Elapsed 0.018 ms (1.838 ms / 100) 1.833 -> 1.834 ( +0.05%) [ +0.44% +0.27% +0.00% / +0.05% +0.49% +0.55%] index_select strided 5 : Elapsed 0.018 ms (1.841 ms / 100) 1.824 -> 1.828 ( +0.22%) [ +0.11% +0.16% +0.00% / +0.22% +0.38% +0.49%] index_select strided 7 : Elapsed 0.018 ms (1.826 ms / 100) 1.820 -> 1.819 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.38% +0.27%] index_select strided 8 : Elapsed 0.018 ms (1.820 ms / 100) 1.828 -> 1.828 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.11% +0.00%] index_select strided 16 : Elapsed 0.018 ms (1.831 ms / 100) 1.803 -> 1.806 ( +0.17%) [ +0.17% +0.00% +0.06% / +0.17% +0.55% +0.39%] index_select random : Elapsed 0.018 ms (1.806 ms / 100) 1.807 -> 1.807 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.39% +0.17%] index_select random_sorted : Elapsed 0.018 ms (1.808 ms / 100) 1.826 -> 1.822 ( -0.22%) [ +0.05% +0.05% +0.00% / -0.22% +0.38% +0.27%] index_select perm : Elapsed 0.018 ms (1.827 ms / 100) 1.832 -> 1.832 ( +0.00%) [ +0.22% +0.05% +0.00% / +0.00% +0.66% +0.22%] index_select perm_sorted : Elapsed 0.018 ms (1.836 ms / 100) out_shape = [5, 40, 20, 16] in_shape = [4, 40, 20, 16] idx_dim = 0 B = [5, 40, 20, 16] (stride (12800, 320, 16, 1)) A = [4, 40, 20, 16] (stride (320, 1280, 1, 20)) dim = 0 5.342 -> 5.314 ( -0.52%) [ +0.15% +0.00% +0.17% / +0.13% -0.49% -0.52%] index_add_ linear : Elapsed 0.053 ms (5.350 ms / 100) 5.269 -> 5.238 ( -0.59%) [ +0.00% +0.13% +0.09% / +0.13% -0.49% -0.59%] index_copy_ linear : Elapsed 0.053 ms (5.269 ms / 100) 5.340 -> 5.311 ( -0.54%) [ +0.00% +0.02% +0.09% / +0.17% -0.47% -0.54%] index_add_ reverse : Elapsed 0.053 ms (5.340 ms / 100) 5.270 -> 5.242 ( -0.53%) [ +0.00% +0.08% +0.00% / +0.13% -0.49% -0.53%] index_copy_ reverse : Elapsed 0.053 ms (5.270 ms / 100) 5.354 -> 5.314 ( -0.75%) [ +0.00% +0.00% +0.00% / -0.17% -0.65% -0.75%] index_add_ spread : Elapsed 0.054 ms (5.354 ms / 100) 5.274 -> 5.244 ( -0.57%) [ +0.00% +0.02% +0.02% / +0.11% -0.55% -0.57%] index_copy_ spread : Elapsed 0.053 ms (5.274 ms / 100) 5.331 -> 5.324 ( -0.13%) [ +0.00% +0.04% +0.19% / +0.02% -0.13% -0.11%] index_add_ strided 3 : Elapsed 0.053 ms (5.331 ms / 100) 5.254 -> 5.243 ( -0.21%) [ +0.08% +0.00% +0.00% / +0.19% -0.21% -0.21%] index_copy_ strided 3 : Elapsed 0.053 ms (5.258 ms / 100) 5.325 -> 5.326 ( +0.02%) [ +0.00% +0.11% +0.13% / +0.09% +0.04% +0.02%] index_add_ perm : Elapsed 0.053 ms (5.325 ms / 100) 5.243 -> 5.250 ( +0.13%) [ +0.04% +0.00% +0.19% / +0.13% +0.34% +0.15%] index_copy_ perm : Elapsed 0.052 ms (5.245 ms / 100) 5.337 -> 5.321 ( -0.30%) [ +0.07% +0.00% +0.04% / +0.07% -0.30% -0.28%] index_add_ perm_sorted : Elapsed 0.053 ms (5.341 ms / 100) 5.253 -> 5.248 ( -0.10%) [ +0.17% +0.00% +0.19% / +0.32% -0.06% -0.10%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.262 ms / 100) 5.465 -> 5.470 ( +0.09%) [ +0.37% +0.00% +0.22% / +0.26% +0.09% +0.11%] index_select const : Elapsed 0.055 ms (5.485 ms / 100) 5.554 -> 5.519 ( -0.63%) [ +0.00% +0.09% +0.16% / +0.14% -0.59% -0.63%] index_select wrap : Elapsed 0.056 ms (5.554 ms / 100) 5.552 -> 5.513 ( -0.70%) [ +0.09% +0.07% +0.00% / +0.07% -0.58% -0.70%] index_select linear : Elapsed 0.056 ms (5.557 ms / 100) 5.543 -> 5.532 ( -0.20%) [ +0.04% +0.00% +0.22% / +0.18% -0.20% -0.20%] index_select reverse : Elapsed 0.055 ms (5.545 ms / 100) 5.466 -> 5.471 ( +0.09%) [ +0.09% +0.00% +0.22% / +0.26% +0.09% +0.15%] index_select skip64 : Elapsed 0.055 ms (5.471 ms / 100) 5.469 -> 5.465 ( -0.07%) [ +0.11% +0.00% +0.04% / +0.16% -0.07% -0.04%] index_select skip256 : Elapsed 0.055 ms (5.475 ms / 100) 5.540 -> 5.513 ( -0.49%) [ +0.11% +0.00% +0.23% / +0.18% -0.49% -0.43%] index_select spread : Elapsed 0.055 ms (5.546 ms / 100) 5.542 -> 5.530 ( -0.22%) [ +0.00% +0.04% +0.25% / +0.32% -0.22% -0.13%] index_select strided 3 : Elapsed 0.055 ms (5.542 ms / 100) 5.488 -> 5.476 ( -0.22%) [ +0.13% +0.00% +0.11% / +0.00% -0.15% -0.22%] index_select random : Elapsed 0.055 ms (5.495 ms / 100) 5.483 -> 5.484 ( +0.02%) [ +0.02% +0.00% +0.18% / +0.18% +0.13% +0.02%] index_select random_sorted : Elapsed 0.055 ms (5.484 ms / 100) B = [5, 40, 20, 16] (stride (12800, 16, 640, 1)) A = [4, 40, 20, 16] (stride (20, 1280, 1, 80)) dim = 0 5.837 -> 5.840 ( +0.05%) [ +0.00% +0.02% +0.00% / +0.17% +0.05% +0.07%] index_add_ linear : Elapsed 0.058 ms (5.837 ms / 100) 5.778 -> 5.784 ( +0.10%) [ +0.07% +0.09% +0.00% / +0.12% +0.12% +0.10%] index_copy_ linear : Elapsed 0.058 ms (5.782 ms / 100) 5.826 -> 5.831 ( +0.09%) [ +0.00% +0.00% +0.14% / +0.09% +0.29% +0.27%] index_add_ reverse : Elapsed 0.058 ms (5.826 ms / 100) 5.770 -> 5.776 ( +0.10%) [ +0.00% +0.19% +0.09% / +0.10% +0.28% +0.26%] index_copy_ reverse : Elapsed 0.058 ms (5.770 ms / 100) 5.832 -> 5.841 ( +0.15%) [ +0.00% +0.07% +0.19% / +0.15% +0.17% +0.15%] index_add_ spread : Elapsed 0.058 ms (5.832 ms / 100) 5.779 -> 5.779 ( +0.00%) [ +0.00% +0.09% +0.16% / +0.02% +0.10% +0.00%] index_copy_ spread : Elapsed 0.058 ms (5.779 ms / 100) 5.842 -> 5.845 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.12% +0.12%] index_add_ strided 3 : Elapsed 0.059 ms (5.850 ms / 100) 5.775 -> 5.785 ( +0.17%) [ +0.10% +0.10% +0.00% / +0.17% +0.26% +0.40%] index_copy_ strided 3 : Elapsed 0.058 ms (5.781 ms / 100) 5.849 -> 5.845 ( -0.07%) [ +0.00% +0.02% +0.02% / +0.02% +0.02% -0.07%] index_add_ perm : Elapsed 0.058 ms (5.849 ms / 100) 5.781 -> 5.789 ( +0.14%) [ +0.00% +0.07% +0.24% / +0.14% +0.22% +0.38%] index_copy_ perm : Elapsed 0.058 ms (5.781 ms / 100) 5.847 -> 5.851 ( +0.07%) [ +0.00% +0.03% +0.00% / +0.09% +0.07% +0.26%] index_add_ perm_sorted : Elapsed 0.058 ms (5.847 ms / 100) 5.788 -> 5.790 ( +0.03%) [ +0.00% +0.03% +0.05% / +0.10% +0.19% +0.03%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.788 ms / 100) 6.115 -> 6.129 ( +0.23%) [ +0.00% +0.07% +0.15% / +0.26% +0.25% +0.23%] index_select const : Elapsed 0.061 ms (6.115 ms / 100) 6.167 -> 6.173 ( +0.10%) [ +0.00% +0.08% +0.15% / +0.10% +0.24% +0.15%] index_select wrap : Elapsed 0.062 ms (6.167 ms / 100) 6.159 -> 6.160 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.16% +0.02% +0.11%] index_select linear : Elapsed 0.062 ms (6.159 ms / 100) 6.156 -> 6.162 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +0.24% +0.16%] index_select reverse : Elapsed 0.062 ms (6.156 ms / 100) 6.119 -> 6.129 ( +0.16%) [ +0.00% +0.05% +0.25% / +0.16% +0.29% +0.21%] index_select skip64 : Elapsed 0.061 ms (6.119 ms / 100) 6.116 -> 6.125 ( +0.15%) [ +0.03% +0.00% +0.15% / +0.16% +0.29% +0.15%] index_select skip256 : Elapsed 0.061 ms (6.118 ms / 100) 6.160 -> 6.169 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +0.34% +0.28%] index_select spread : Elapsed 0.062 ms (6.160 ms / 100) 6.161 -> 6.169 ( +0.13%) [ +0.00% +0.03% +0.06% / +0.13% +0.23% +0.24%] index_select strided 3 : Elapsed 0.062 ms (6.161 ms / 100) 6.147 -> 6.150 ( +0.05%) [ +0.10% +0.00% +0.13% / +0.05% +0.16% +0.23%] index_select random : Elapsed 0.062 ms (6.153 ms / 100) 6.155 -> 6.152 ( -0.05%) [ +0.02% +0.00% +0.11% / +0.05% -0.05% +0.19%] index_select random_sorted : Elapsed 0.062 ms (6.156 ms / 100) B = [5, 40, 20, 16] (stride (12800, 1, 40, 800)) A = [4, 40, 20, 16] (stride (12800, 320, 16, 1)) dim = 0 5.317 -> 5.303 ( -0.26%) [ +0.00% +0.13% +0.06% / -0.06% -0.15% -0.26%] index_add_ linear : Elapsed 0.053 ms (5.317 ms / 100) 5.275 -> 5.262 ( -0.25%) [ +0.00% +0.11% +0.08% / +0.04% -0.09% -0.25%] index_copy_ linear : Elapsed 0.053 ms (5.275 ms / 100) 5.303 -> 5.297 ( -0.11%) [ +0.00% +0.09% +0.13% / +0.02% -0.08% -0.11%] index_add_ reverse : Elapsed 0.053 ms (5.303 ms / 100) 5.272 -> 5.260 ( -0.23%) [ +0.08% +0.00% +0.09% / -0.08% -0.09% -0.23%] index_copy_ reverse : Elapsed 0.053 ms (5.276 ms / 100) 5.316 -> 5.305 ( -0.21%) [ +0.06% +0.00% +0.02% / -0.02% -0.21% -0.21%] index_add_ spread : Elapsed 0.053 ms (5.319 ms / 100) 5.274 -> 5.261 ( -0.25%) [ +0.00% +0.08% +0.02% / +0.08% -0.25% -0.13%] index_copy_ spread : Elapsed 0.053 ms (5.274 ms / 100) 5.308 -> 5.305 ( -0.06%) [ +0.04% +0.17% +0.00% / +0.08% +0.02% -0.06%] index_add_ strided 3 : Elapsed 0.053 ms (5.310 ms / 100) 5.262 -> 5.267 ( +0.10%) [ +0.10% +0.00% +0.29% / +0.21% +0.10% +0.13%] index_copy_ strided 3 : Elapsed 0.053 ms (5.267 ms / 100) 5.296 -> 5.295 ( -0.02%) [ +0.17% +0.13% +0.00% / +0.06% -0.02% -0.02%] index_add_ perm : Elapsed 0.053 ms (5.305 ms / 100) 5.270 -> 5.257 ( -0.25%) [ +0.04% +0.06% +0.00% / +0.00% -0.25% -0.21%] index_copy_ perm : Elapsed 0.053 ms (5.272 ms / 100) 5.303 -> 5.299 ( -0.08%) [ +0.00% +0.17% +0.23% / +0.21% -0.08% +0.04%] index_add_ perm_sorted : Elapsed 0.053 ms (5.303 ms / 100) 5.270 -> 5.260 ( -0.19%) [ +0.09% +0.00% +0.09% / +0.13% -0.17% -0.19%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.275 ms / 100) 5.438 -> 5.448 ( +0.18%) [ +0.00% +0.04% +0.15% / +0.20% +0.18% +0.18%] index_select const : Elapsed 0.054 ms (5.438 ms / 100) 5.541 -> 5.522 ( -0.34%) [ +0.07% +0.00% +0.09% / +0.09% -0.23% -0.34%] index_select wrap : Elapsed 0.055 ms (5.545 ms / 100) 5.541 -> 5.520 ( -0.38%) [ +0.07% +0.00% +0.14% / +0.13% -0.16% -0.38%] index_select linear : Elapsed 0.055 ms (5.545 ms / 100) 5.534 -> 5.526 ( -0.14%) [ +0.07% +0.02% +0.00% / -0.02% -0.14% -0.13%] index_select reverse : Elapsed 0.055 ms (5.538 ms / 100) 5.438 -> 5.442 ( +0.07%) [ +0.04% +0.00% +0.04% / +0.13% +0.15% +0.07%] index_select skip64 : Elapsed 0.054 ms (5.440 ms / 100) 5.438 -> 5.441 ( +0.06%) [ +0.00% +0.11% +0.15% / +0.06% +0.09% +0.20%] index_select skip256 : Elapsed 0.054 ms (5.438 ms / 100) 5.540 -> 5.521 ( -0.34%) [ +0.00% +0.14% +0.00% / -0.05% -0.34% -0.18%] index_select spread : Elapsed 0.055 ms (5.540 ms / 100) 5.532 -> 5.520 ( -0.22%) [ +0.00% +0.09% +0.14% / +0.11% -0.07% -0.22%] index_select strided 3 : Elapsed 0.055 ms (5.532 ms / 100) 5.499 -> 5.501 ( +0.04%) [ +0.02% +0.00% +0.13% / +0.11% +0.04% +0.05%] index_select random : Elapsed 0.055 ms (5.500 ms / 100) 5.503 -> 5.505 ( +0.04%) [ +0.09% +0.00% +0.05% / +0.20% +0.07% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.508 ms / 100) B = [5, 40, 20, 16] (stride (12800, 1, 40, 800)) A = [4, 40, 20, 16] (stride (800, 20, 1, 3200)) dim = 0 5.609 -> 5.616 ( +0.12%) [ +0.00% +0.05% +0.12% / +0.27% +0.12% +0.14%] index_add_ linear : Elapsed 0.056 ms (5.609 ms / 100) 5.581 -> 5.575 ( -0.11%) [ +0.05% +0.00% +0.16% / -0.11% +0.07% -0.05%] index_copy_ linear : Elapsed 0.056 ms (5.584 ms / 100) 5.608 -> 5.606 ( -0.04%) [ +0.00% +0.02% +0.12% / +0.04% +0.02% -0.04%] index_add_ reverse : Elapsed 0.056 ms (5.608 ms / 100) 5.576 -> 5.576 ( +0.00%) [ +0.14% +0.00% +0.13% / +0.09% +0.05% +0.00%] index_copy_ reverse : Elapsed 0.056 ms (5.584 ms / 100) 5.610 -> 5.611 ( +0.02%) [ +0.00% +0.12% +0.14% / +0.18% +0.04% +0.02%] index_add_ spread : Elapsed 0.056 ms (5.610 ms / 100) 5.584 -> 5.583 ( -0.02%) [ +0.07% +0.00% +0.09% / +0.11% +0.04% -0.02%] index_copy_ spread : Elapsed 0.056 ms (5.588 ms / 100) 5.614 -> 5.614 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.53% +0.48%] index_add_ strided 3 : Elapsed 0.056 ms (5.617 ms / 100) 5.578 -> 5.586 ( +0.14%) [ +0.00% +0.11% +0.00% / +0.14% +0.48% +0.66%] index_copy_ strided 3 : Elapsed 0.056 ms (5.578 ms / 100) 5.606 -> 5.612 ( +0.11%) [ +0.00% +0.21% +0.12% / +0.11% +0.32% +0.46%] index_add_ perm : Elapsed 0.056 ms (5.606 ms / 100) 5.578 -> 5.584 ( +0.11%) [ +0.00% +0.02% +0.09% / +0.11% +0.36% +0.38%] index_copy_ perm : Elapsed 0.056 ms (5.578 ms / 100) 5.602 -> 5.610 ( +0.14%) [ +0.00% +0.00% +0.18% / +0.14% +0.55% +0.43%] index_add_ perm_sorted : Elapsed 0.056 ms (5.602 ms / 100) 5.575 -> 5.579 ( +0.07%) [ +0.00% +0.04% +0.29% / +0.07% +0.14% +0.14%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.575 ms / 100) 5.836 -> 5.847 ( +0.19%) [ +0.03% +0.00% +0.07% / +0.19% +0.31% +0.45%] index_select const : Elapsed 0.058 ms (5.838 ms / 100) 5.909 -> 5.916 ( +0.12%) [ +0.00% +0.07% +0.10% / +0.12% +0.36% +0.32%] index_select wrap : Elapsed 0.059 ms (5.909 ms / 100) 5.904 -> 5.916 ( +0.20%) [ +0.10% +0.10% +0.00% / +0.20% +0.25% +0.44%] index_select linear : Elapsed 0.059 ms (5.910 ms / 100) 5.907 -> 5.920 ( +0.22%) [ +0.00% +0.07% +0.36% / +0.22% +0.36% +0.34%] index_select reverse : Elapsed 0.059 ms (5.907 ms / 100) 5.831 -> 5.851 ( +0.34%) [ +0.10% +0.00% +0.27% / +0.34% +0.45% +0.50%] index_select skip64 : Elapsed 0.058 ms (5.837 ms / 100) 5.831 -> 5.845 ( +0.24%) [ +0.09% +0.00% +0.24% / +0.24% +0.41% +0.45%] index_select skip256 : Elapsed 0.058 ms (5.836 ms / 100) 5.905 -> 5.916 ( +0.19%) [ +0.15% +0.00% +0.17% / +0.19% +0.37% +0.20%] index_select spread : Elapsed 0.059 ms (5.914 ms / 100) 5.910 -> 5.926 ( +0.27%) [ +0.25% +0.00% +0.12% / +0.27% +0.37% +0.52%] index_select strided 3 : Elapsed 0.059 ms (5.925 ms / 100) 5.898 -> 5.909 ( +0.19%) [ +0.02% +0.24% +0.00% / +0.19% +0.53% +0.49%] index_select random : Elapsed 0.059 ms (5.899 ms / 100) 5.895 -> 5.894 ( -0.02%) [ +0.00% +0.08% +0.00% / -0.02% +0.10% +0.05%] index_select random_sorted : Elapsed 0.059 ms (5.895 ms / 100) B = [5, 40, 20, 16] (stride (1, 1600, 80, 5)) A = [4, 40, 20, 16] (stride (1, 4, 160, 3200)) dim = 0 5.666 -> 5.638 ( -0.49%) [ +0.09% +0.05% +0.00% / +0.16% -0.49% -0.49%] index_add_ linear : Elapsed 0.057 ms (5.671 ms / 100) 5.627 -> 5.591 ( -0.64%) [ +0.00% +0.04% +0.11% / +0.11% -0.50% -0.64%] index_copy_ linear : Elapsed 0.056 ms (5.627 ms / 100) 5.666 -> 5.635 ( -0.55%) [ +0.00% +0.05% +0.14% / +0.12% -0.55% -0.55%] index_add_ reverse : Elapsed 0.057 ms (5.666 ms / 100) 5.628 -> 5.597 ( -0.55%) [ +0.07% +0.00% +0.11% / +0.11% -0.55% -0.52%] index_copy_ reverse : Elapsed 0.056 ms (5.632 ms / 100) 5.666 -> 5.636 ( -0.53%) [ +0.02% +0.09% +0.00% / +0.07% -0.51% -0.53%] index_add_ spread : Elapsed 0.057 ms (5.667 ms / 100) 5.629 -> 5.598 ( -0.55%) [ +0.09% +0.00% +0.21% / +0.09% -0.53% -0.55%] index_copy_ spread : Elapsed 0.056 ms (5.634 ms / 100) 5.669 -> 5.633 ( -0.64%) [ +0.00% +0.00% +0.09% / +0.02% -0.53% -0.64%] index_add_ strided 3 : Elapsed 0.057 ms (5.669 ms / 100) 5.624 -> 5.589 ( -0.62%) [ +0.09% +0.00% +0.18% / +0.14% -0.57% -0.62%] index_copy_ strided 3 : Elapsed 0.056 ms (5.629 ms / 100) 5.661 -> 5.630 ( -0.55%) [ +0.00% +0.11% +0.26% / +0.07% -0.55% -0.42%] index_add_ perm : Elapsed 0.057 ms (5.661 ms / 100) 5.628 -> 5.596 ( -0.57%) [ +0.00% +0.09% +0.12% / +0.20% -0.57% -0.57%] index_copy_ perm : Elapsed 0.056 ms (5.628 ms / 100) 5.663 -> 5.632 ( -0.55%) [ +0.04% +0.00% +0.05% / +0.16% -0.55% -0.39%] index_add_ perm_sorted : Elapsed 0.057 ms (5.665 ms / 100) 5.630 -> 5.592 ( -0.67%) [ +0.18% +0.00% +0.00% / +0.00% -0.59% -0.67%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.640 ms / 100) 5.862 -> 5.822 ( -0.68%) [ +0.00% +0.02% +0.12% / +0.05% -0.68% -0.68%] index_select const : Elapsed 0.059 ms (5.862 ms / 100) 5.862 -> 5.817 ( -0.77%) [ +0.00% +0.03% +0.19% / +0.00% -0.77% -0.67%] index_select wrap : Elapsed 0.059 ms (5.862 ms / 100) 5.855 -> 5.819 ( -0.61%) [ +0.03% +0.00% +0.29% / +0.19% -0.55% -0.61%] index_select linear : Elapsed 0.059 ms (5.857 ms / 100) 5.860 -> 5.815 ( -0.77%) [ +0.20% +0.00% +0.03% / +0.10% -0.70% -0.77%] index_select reverse : Elapsed 0.059 ms (5.872 ms / 100) 5.863 -> 5.823 ( -0.68%) [ +0.02% +0.00% +0.10% / +0.15% -0.68% -0.68%] index_select skip64 : Elapsed 0.059 ms (5.864 ms / 100) 5.862 -> 5.818 ( -0.75%) [ +0.03% +0.03% +0.00% / +0.17% -0.68% -0.75%] index_select skip256 : Elapsed 0.059 ms (5.864 ms / 100) 5.859 -> 5.816 ( -0.73%) [ +0.00% +0.03% +0.22% / +0.12% -0.73% -0.63%] index_select spread : Elapsed 0.059 ms (5.859 ms / 100) 5.859 -> 5.816 ( -0.73%) [ +0.00% +0.02% +0.14% / +0.19% -0.72% -0.73%] index_select strided 3 : Elapsed 0.059 ms (5.859 ms / 100) 5.861 -> 5.823 ( -0.65%) [ +0.00% +0.10% +0.10% / +0.17% -0.65% -0.65%] index_select random : Elapsed 0.059 ms (5.861 ms / 100) 5.866 -> 5.817 ( -0.84%) [ +0.00% +0.00% +0.02% / +0.15% -0.84% -0.72%] index_select random_sorted : Elapsed 0.059 ms (5.866 ms / 100) B = [5, 40, 20, 16] (stride (40, 1, 3200, 200)) A = [4, 40, 20, 16] (stride (1, 1280, 64, 4)) dim = 0 5.139 -> 5.142 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.90% +0.74%] index_add_ linear : Elapsed 0.051 ms (5.139 ms / 100) 5.080 -> 5.088 ( +0.16%) [ +0.00% +0.06% +0.00% / +0.16% +0.73% +0.73%] index_copy_ linear : Elapsed 0.051 ms (5.080 ms / 100) 5.143 -> 5.147 ( +0.08%) [ +0.10% +0.04% +0.00% / +0.08% +0.68% +0.60%] index_add_ reverse : Elapsed 0.051 ms (5.148 ms / 100) 5.082 -> 5.087 ( +0.10%) [ +0.00% +0.04% +0.00% / +0.10% +0.63% +0.49%] index_copy_ reverse : Elapsed 0.051 ms (5.082 ms / 100) 5.145 -> 5.152 ( +0.14%) [ +0.14% +0.00% +0.02% / +0.14% +0.84% +0.87%] index_add_ spread : Elapsed 0.052 ms (5.152 ms / 100) 5.081 -> 5.095 ( +0.28%) [ +0.00% +0.16% +0.26% / +0.28% +0.71% +0.85%] index_copy_ spread : Elapsed 0.051 ms (5.081 ms / 100) 5.149 -> 5.161 ( +0.23%) [ +0.14% +0.00% +0.21% / +0.23% +0.80% +0.91%] index_add_ strided 3 : Elapsed 0.052 ms (5.156 ms / 100) 5.091 -> 5.094 ( +0.06%) [ +0.00% +0.00% +0.04% / +0.06% +0.71% +0.75%] index_copy_ strided 3 : Elapsed 0.051 ms (5.091 ms / 100) 5.147 -> 5.156 ( +0.17%) [ +0.00% +0.04% +0.25% / +0.17% +0.93% +0.91%] index_add_ perm : Elapsed 0.051 ms (5.147 ms / 100) 5.093 -> 5.098 ( +0.10%) [ +0.04% +0.00% +0.08% / +0.10% +0.79% +0.69%] index_copy_ perm : Elapsed 0.051 ms (5.095 ms / 100) 5.154 -> 5.163 ( +0.17%) [ +0.00% +0.06% +0.10% / +0.17% +0.64% +0.93%] index_add_ perm_sorted : Elapsed 0.052 ms (5.154 ms / 100) 5.095 -> 5.098 ( +0.06%) [ +0.00% +0.00% +0.08% / +0.06% +0.57% +0.75%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.095 ms / 100) 5.327 -> 5.332 ( +0.09%) [ +0.00% +0.09% +0.17% / +0.09% +1.01% +0.88%] index_select const : Elapsed 0.053 ms (5.327 ms / 100) 5.327 -> 5.331 ( +0.08%) [ +0.06% +0.00% +0.15% / +0.08% +1.01% +0.83%] index_select wrap : Elapsed 0.053 ms (5.330 ms / 100) 5.334 -> 5.341 ( +0.13%) [ +0.02% +0.00% +0.02% / +0.13% +0.94% +1.12%] index_select linear : Elapsed 0.053 ms (5.335 ms / 100) 5.329 -> 5.339 ( +0.19%) [ +0.04% +0.00% +0.08% / +0.19% +0.99% +0.99%] index_select reverse : Elapsed 0.053 ms (5.331 ms / 100) 5.327 -> 5.340 ( +0.24%) [ +0.00% +0.13% +0.15% / +0.24% +0.92% +0.88%] index_select skip64 : Elapsed 0.053 ms (5.327 ms / 100) 5.332 -> 5.327 ( -0.09%) [ +0.06% +0.00% +0.11% / -0.09% +0.94% +0.79%] index_select skip256 : Elapsed 0.053 ms (5.335 ms / 100) 5.330 -> 5.332 ( +0.04%) [ +0.00% +0.02% +0.21% / +0.04% +0.86% +0.86%] index_select spread : Elapsed 0.053 ms (5.330 ms / 100) 5.322 -> 5.340 ( +0.34%) [ +0.00% +0.15% +0.28% / +0.34% +1.16% +1.15%] index_select strided 3 : Elapsed 0.053 ms (5.322 ms / 100) 5.332 -> 5.340 ( +0.15%) [ +0.00% +0.06% +0.09% / +0.15% +0.86% +0.79%] index_select random : Elapsed 0.053 ms (5.332 ms / 100) 5.332 -> 5.333 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.94% +0.98%] index_select random_sorted : Elapsed 0.053 ms (5.332 ms / 100) B = [5, 40, 20, 16] (stride (40, 1, 3200, 200)) A = [4, 40, 20, 16] (stride (1, 1280, 4, 80)) dim = 0 5.754 -> 5.732 ( -0.38%) [ +0.09% +0.00% +0.10% / +0.00% -0.38% -0.35%] index_add_ linear : Elapsed 0.058 ms (5.759 ms / 100) 5.676 -> 5.656 ( -0.35%) [ +0.19% +0.00% +0.11% / +0.16% -0.21% -0.35%] index_copy_ linear : Elapsed 0.057 ms (5.687 ms / 100) 5.749 -> 5.727 ( -0.38%) [ +0.00% +0.05% +0.10% / +0.21% -0.37% -0.38%] index_add_ reverse : Elapsed 0.057 ms (5.749 ms / 100) 5.677 -> 5.654 ( -0.41%) [ +0.07% +0.00% +0.02% / +0.05% -0.39% -0.41%] index_copy_ reverse : Elapsed 0.057 ms (5.681 ms / 100) 5.754 -> 5.727 ( -0.47%) [ +0.00% +0.02% +0.14% / +0.02% -0.38% -0.47%] index_add_ spread : Elapsed 0.058 ms (5.754 ms / 100) 5.679 -> 5.655 ( -0.42%) [ +0.02% +0.00% +0.12% / +0.12% -0.42% -0.42%] index_copy_ spread : Elapsed 0.057 ms (5.680 ms / 100) 5.761 -> 5.749 ( -0.21%) [ +0.05% +0.00% +0.19% / +0.19% -0.16% -0.21%] index_add_ strided 3 : Elapsed 0.058 ms (5.764 ms / 100) 5.685 -> 5.678 ( -0.12%) [ +0.00% +0.09% +0.16% / +0.16% -0.09% -0.12%] index_copy_ strided 3 : Elapsed 0.057 ms (5.685 ms / 100) 5.756 -> 5.736 ( -0.35%) [ +0.03% +0.00% +0.09% / +0.00% -0.35% -0.23%] index_add_ perm : Elapsed 0.058 ms (5.758 ms / 100) 5.681 -> 5.668 ( -0.23%) [ +0.05% +0.07% +0.00% / +0.09% -0.18% -0.23%] index_copy_ perm : Elapsed 0.057 ms (5.684 ms / 100) 5.746 -> 5.730 ( -0.28%) [ +0.16% +0.00% +0.00% / +0.17% -0.28% -0.14%] index_add_ perm_sorted : Elapsed 0.058 ms (5.755 ms / 100) 5.678 -> 5.665 ( -0.23%) [ +0.12% +0.00% +0.09% / +0.09% -0.19% -0.23%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.685 ms / 100) 6.042 -> 6.018 ( -0.40%) [ +0.00% +0.00% +0.10% / +0.12% -0.38% -0.40%] index_select const : Elapsed 0.060 ms (6.042 ms / 100) 6.039 -> 6.013 ( -0.43%) [ +0.00% +0.18% +0.13% / +0.13% -0.43% -0.41%] index_select wrap : Elapsed 0.060 ms (6.039 ms / 100) 6.041 -> 6.013 ( -0.46%) [ +0.00% +0.05% +0.07% / +0.07% -0.36% -0.46%] index_select linear : Elapsed 0.060 ms (6.041 ms / 100) 6.041 -> 6.014 ( -0.45%) [ +0.13% +0.00% +0.13% / +0.10% -0.45% -0.41%] index_select reverse : Elapsed 0.060 ms (6.049 ms / 100) 6.045 -> 6.012 ( -0.55%) [ +0.08% +0.00% +0.07% / -0.05% -0.46% -0.55%] index_select skip64 : Elapsed 0.061 ms (6.050 ms / 100) 6.038 -> 6.021 ( -0.28%) [ +0.12% +0.00% +0.18% / +0.12% -0.23% -0.28%] index_select skip256 : Elapsed 0.060 ms (6.045 ms / 100) 6.043 -> 6.013 ( -0.50%) [ +0.05% +0.03% +0.00% / +0.03% -0.50% -0.28%] index_select spread : Elapsed 0.060 ms (6.046 ms / 100) 6.037 -> 6.018 ( -0.31%) [ +0.17% +0.00% +0.15% / +0.17% -0.31% -0.31%] index_select strided 3 : Elapsed 0.060 ms (6.047 ms / 100) 6.040 -> 6.013 ( -0.45%) [ +0.00% +0.12% +0.23% / +0.12% -0.45% -0.31%] index_select random : Elapsed 0.060 ms (6.040 ms / 100) 6.038 -> 6.019 ( -0.31%) [ +0.00% +0.08% +0.28% / +0.15% -0.31% -0.30%] index_select random_sorted : Elapsed 0.060 ms (6.038 ms / 100) B = [5, 40, 20, 16] (stride (1, 100, 5, 4000)) A = [4, 40, 20, 16] (stride (12800, 320, 1, 20)) dim = 0 5.757 -> 5.770 ( +0.23%) [ +0.00% +0.02% +0.12% / +0.23% +0.26% +0.43%] index_add_ linear : Elapsed 0.058 ms (5.757 ms / 100) 5.749 -> 5.751 ( +0.03%) [ +0.10% +0.00% +0.17% / +0.09% +0.03% +0.07%] index_copy_ linear : Elapsed 0.058 ms (5.755 ms / 100) 5.755 -> 5.769 ( +0.24%) [ +0.17% +0.00% +0.05% / +0.24% +0.43% +0.28%] index_add_ reverse : Elapsed 0.058 ms (5.765 ms / 100) 5.751 -> 5.748 ( -0.05%) [ +0.00% +0.10% +0.14% / +0.14% +0.05% -0.05%] index_copy_ reverse : Elapsed 0.058 ms (5.751 ms / 100) 5.766 -> 5.766 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.00% +0.10% +0.19%] index_add_ spread : Elapsed 0.058 ms (5.766 ms / 100) 5.754 -> 5.752 ( -0.03%) [ +0.00% +0.03% +0.19% / +0.10% -0.03% +0.05%] index_copy_ spread : Elapsed 0.058 ms (5.754 ms / 100) 5.761 -> 5.766 ( +0.09%) [ +0.03% +0.00% +0.09% / +0.09% +0.19% +0.24%] index_add_ strided 3 : Elapsed 0.058 ms (5.763 ms / 100) 5.754 -> 5.753 ( -0.02%) [ +0.00% +0.09% +0.12% / +0.02% +0.12% -0.02%] index_copy_ strided 3 : Elapsed 0.058 ms (5.754 ms / 100) 5.767 -> 5.771 ( +0.07%) [ +0.03% +0.03% +0.00% / +0.07% +0.16% +0.16%] index_add_ perm : Elapsed 0.058 ms (5.769 ms / 100) 5.751 -> 5.755 ( +0.07%) [ +0.02% +0.00% +0.19% / +0.12% +0.10% +0.07%] index_copy_ perm : Elapsed 0.058 ms (5.752 ms / 100) 5.764 -> 5.767 ( +0.05%) [ +0.00% +0.02% +0.12% / +0.05% +0.17% +0.17%] index_add_ perm_sorted : Elapsed 0.058 ms (5.764 ms / 100) 5.754 -> 5.756 ( +0.03%) [ +0.00% +0.05% +0.05% / +0.03% +0.03% +0.09%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.754 ms / 100) 5.965 -> 5.970 ( +0.08%) [ +0.00% +0.05% +0.08% / +0.10% +0.08% +0.08%] index_select const : Elapsed 0.060 ms (5.965 ms / 100) 6.014 -> 6.013 ( -0.02%) [ +0.08% +0.00% +0.08% / -0.02% +0.17% +0.12%] index_select wrap : Elapsed 0.060 ms (6.019 ms / 100) 6.001 -> 6.006 ( +0.08%) [ +0.12% +0.00% +0.18% / +0.08% +0.20% +0.32%] index_select linear : Elapsed 0.060 ms (6.008 ms / 100) 5.997 -> 6.004 ( +0.12%) [ +0.00% +0.00% +0.03% / +0.12% +0.32% +0.20%] index_select reverse : Elapsed 0.060 ms (5.997 ms / 100) 5.969 -> 5.967 ( -0.03%) [ +0.05% +0.03% +0.00% / +0.17% -0.03% +0.00%] index_select skip64 : Elapsed 0.060 ms (5.972 ms / 100) 5.973 -> 5.968 ( -0.08%) [ +0.00% +0.02% +0.03% / +0.08% -0.05% -0.08%] index_select skip256 : Elapsed 0.060 ms (5.973 ms / 100) 6.007 -> 6.006 ( -0.02%) [ +0.00% +0.10% +0.05% / +0.03% -0.02% +0.07%] index_select spread : Elapsed 0.060 ms (6.007 ms / 100) 6.011 -> 6.017 ( +0.10%) [ +0.00% +0.00% +0.10% / +0.18% +0.10% +0.37%] index_select strided 3 : Elapsed 0.060 ms (6.011 ms / 100) 5.994 -> 6.006 ( +0.20%) [ +0.00% +0.07% +0.08% / +0.20% +0.35% +0.40%] index_select random : Elapsed 0.060 ms (5.994 ms / 100) 5.984 -> 5.990 ( +0.10%) [ +0.00% +0.05% +0.12% / +0.10% +0.30% +0.25%] index_select random_sorted : Elapsed 0.060 ms (5.984 ms / 100) B = [5, 40, 20, 16] (stride (40, 1, 200, 4000)) A = [4, 40, 20, 16] (stride (12800, 1, 640, 40)) dim = 0 5.600 -> 5.572 ( -0.50%) [ +0.00% +0.20% +0.43% / +0.21% -0.50% -0.39%] index_add_ linear : Elapsed 0.056 ms (5.600 ms / 100) 5.552 -> 5.524 ( -0.50%) [ +0.13% +0.00% +0.05% / +0.20% -0.50% -0.49%] index_copy_ linear : Elapsed 0.056 ms (5.559 ms / 100) 5.605 -> 5.572 ( -0.59%) [ +0.00% +0.02% +0.04% / +0.18% -0.59% -0.54%] index_add_ reverse : Elapsed 0.056 ms (5.605 ms / 100) 5.542 -> 5.522 ( -0.36%) [ +0.20% +0.00% +0.16% / +0.23% -0.36% -0.29%] index_copy_ reverse : Elapsed 0.056 ms (5.553 ms / 100) 5.601 -> 5.569 ( -0.57%) [ +0.00% +0.21% +0.21% / +0.05% -0.57% -0.43%] index_add_ spread : Elapsed 0.056 ms (5.601 ms / 100) 5.555 -> 5.521 ( -0.61%) [ +0.00% +0.04% +0.11% / +0.02% -0.59% -0.61%] index_copy_ spread : Elapsed 0.056 ms (5.555 ms / 100) 5.615 -> 5.588 ( -0.48%) [ +0.00% +0.07% +0.05% / +0.20% -0.34% -0.48%] index_add_ strided 3 : Elapsed 0.056 ms (5.615 ms / 100) 5.559 -> 5.537 ( -0.40%) [ +0.11% +0.07% +0.00% / +0.07% -0.40% -0.32%] index_copy_ strided 3 : Elapsed 0.056 ms (5.565 ms / 100) 5.610 -> 5.585 ( -0.45%) [ +0.05% +0.05% +0.00% / +0.00% -0.37% -0.45%] index_add_ perm : Elapsed 0.056 ms (5.613 ms / 100) 5.556 -> 5.532 ( -0.43%) [ +0.00% +0.00% +0.00% / +0.11% -0.32% -0.43%] index_copy_ perm : Elapsed 0.056 ms (5.556 ms / 100) 5.610 -> 5.571 ( -0.70%) [ +0.02% +0.16% +0.00% / +0.04% -0.70% -0.57%] index_add_ perm_sorted : Elapsed 0.056 ms (5.611 ms / 100) 5.553 -> 5.515 ( -0.68%) [ +0.00% +0.04% +0.20% / +0.18% -0.68% -0.61%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.553 ms / 100) 5.814 -> 5.820 ( +0.10%) [ +0.12% +0.09% +0.00% / +0.10% +0.28% +0.33%] index_select const : Elapsed 0.058 ms (5.821 ms / 100) 5.878 -> 5.858 ( -0.34%) [ +0.00% +0.00% +0.17% / +0.26% -0.34% -0.24%] index_select wrap : Elapsed 0.059 ms (5.878 ms / 100) 5.882 -> 5.856 ( -0.44%) [ +0.14% +0.00% +0.12% / +0.07% -0.39% -0.44%] index_select linear : Elapsed 0.059 ms (5.890 ms / 100) 5.876 -> 5.857 ( -0.32%) [ +0.02% +0.05% +0.00% / +0.05% -0.20% -0.32%] index_select reverse : Elapsed 0.059 ms (5.877 ms / 100) 5.820 -> 5.820 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.24% +0.21%] index_select skip64 : Elapsed 0.058 ms (5.824 ms / 100) 5.818 -> 5.826 ( +0.14%) [ +0.00% +0.09% +0.07% / +0.14% +0.22% +0.40%] index_select skip256 : Elapsed 0.058 ms (5.818 ms / 100) 5.878 -> 5.853 ( -0.43%) [ +0.03% +0.05% +0.00% / -0.05% -0.43% -0.37%] index_select spread : Elapsed 0.059 ms (5.880 ms / 100) 5.872 -> 5.863 ( -0.15%) [ +0.03% +0.00% +0.10% / +0.20% -0.03% -0.15%] index_select strided 3 : Elapsed 0.059 ms (5.874 ms / 100) 5.874 -> 5.863 ( -0.19%) [ +0.02% +0.00% +0.15% / +0.03% -0.19% -0.14%] index_select random : Elapsed 0.059 ms (5.875 ms / 100) 5.888 -> 5.853 ( -0.59%) [ +0.10% +0.00% +0.10% / +0.15% -0.54% -0.59%] index_select random_sorted : Elapsed 0.059 ms (5.894 ms / 100) B = [5, 40, 20, 16] (stride (1, 5, 200, 4000)) A = [4, 40, 20, 16] (stride (20, 1280, 1, 80)) dim = 0 6.085 -> 6.079 ( -0.10%) [ +0.00% +0.12% +0.18% / +0.25% -0.10% -0.03%] index_add_ linear : Elapsed 0.061 ms (6.085 ms / 100) 6.079 -> 6.061 ( -0.30%) [ +0.00% +0.02% +0.03% / +0.08% -0.28% -0.30%] index_copy_ linear : Elapsed 0.061 ms (6.079 ms / 100) 6.088 -> 6.086 ( -0.03%) [ +0.13% +0.00% +0.10% / +0.16% -0.03% +0.03%] index_add_ reverse : Elapsed 0.061 ms (6.096 ms / 100) 6.076 -> 6.063 ( -0.21%) [ +0.00% +0.00% +0.20% / +0.08% -0.20% -0.21%] index_copy_ reverse : Elapsed 0.061 ms (6.076 ms / 100) 6.087 -> 6.083 ( -0.07%) [ +0.00% +0.07% +0.13% / +0.23% +0.03% -0.07%] index_add_ spread : Elapsed 0.061 ms (6.087 ms / 100) 6.079 -> 6.059 ( -0.33%) [ +0.00% +0.02% +0.03% / +0.03% -0.33% -0.21%] index_copy_ spread : Elapsed 0.061 ms (6.079 ms / 100) 6.090 -> 6.080 ( -0.16%) [ +0.02% +0.00% +0.11% / +0.15% -0.16% -0.10%] index_add_ strided 3 : Elapsed 0.061 ms (6.091 ms / 100) 6.077 -> 6.059 ( -0.30%) [ +0.00% +0.07% +0.08% / +0.02% -0.21% -0.30%] index_copy_ strided 3 : Elapsed 0.061 ms (6.077 ms / 100) 6.091 -> 6.080 ( -0.18%) [ +0.00% +0.11% +0.03% / +0.11% -0.15% -0.18%] index_add_ perm : Elapsed 0.061 ms (6.091 ms / 100) 6.076 -> 6.066 ( -0.16%) [ +0.02% +0.00% +0.08% / +0.15% -0.16% -0.07%] index_copy_ perm : Elapsed 0.061 ms (6.077 ms / 100) 6.086 -> 6.082 ( -0.07%) [ +0.00% +0.05% +0.20% / +0.21% -0.07% -0.03%] index_add_ perm_sorted : Elapsed 0.061 ms (6.086 ms / 100) 6.079 -> 6.060 ( -0.31%) [ +0.00% +0.07% +0.13% / +0.12% -0.20% -0.31%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.079 ms / 100) 6.344 -> 6.326 ( -0.28%) [ +0.00% +0.03% +0.03% / +0.11% -0.24% -0.28%] index_select const : Elapsed 0.063 ms (6.344 ms / 100) 6.396 -> 6.375 ( -0.33%) [ +0.09% +0.00% +0.09% / +0.08% -0.23% -0.33%] index_select wrap : Elapsed 0.064 ms (6.402 ms / 100) 6.384 -> 6.374 ( -0.16%) [ +0.03% +0.00% +0.08% / +0.09% -0.16% -0.14%] index_select linear : Elapsed 0.064 ms (6.386 ms / 100) 6.385 -> 6.371 ( -0.22%) [ +0.06% +0.06% +0.00% / +0.16% -0.22% -0.22%] index_select reverse : Elapsed 0.064 ms (6.389 ms / 100) 6.345 -> 6.320 ( -0.39%) [ +0.06% +0.00% +0.13% / +0.09% -0.39% -0.35%] index_select skip64 : Elapsed 0.063 ms (6.349 ms / 100) 6.344 -> 6.325 ( -0.30%) [ +0.06% +0.00% +0.09% / +0.13% -0.30% -0.28%] index_select skip256 : Elapsed 0.063 ms (6.348 ms / 100) 6.388 -> 6.362 ( -0.41%) [ +0.11% +0.00% +0.13% / +0.17% -0.31% -0.41%] index_select spread : Elapsed 0.064 ms (6.395 ms / 100) 6.394 -> 6.371 ( -0.36%) [ +0.00% +0.00% +0.03% / +0.03% -0.36% -0.31%] index_select strided 3 : Elapsed 0.064 ms (6.394 ms / 100) 6.388 -> 6.374 ( -0.22%) [ +0.02% +0.16% +0.00% / +0.14% -0.14% -0.22%] index_select random : Elapsed 0.064 ms (6.389 ms / 100) 6.376 -> 6.361 ( -0.24%) [ +0.00% +0.13% +0.19% / +0.16% -0.24% -0.16%] index_select random_sorted : Elapsed 0.064 ms (6.376 ms / 100) out_shape = [4, 5, 20, 16] in_shape = [4, 40, 20, 16] idx_dim = 1 B = [4, 5, 20, 16] (stride (1600, 320, 16, 1)) A = [4, 40, 20, 16] (stride (12800, 1, 40, 800)) dim = 1 1.499 -> 1.500 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.47% +0.40%] index_select const : Elapsed 0.015 ms (1.499 ms / 100) 1.498 -> 1.497 ( -0.07%) [ +0.07% +0.13% +0.00% / -0.07% +0.40% +0.20%] index_select wrap : Elapsed 0.015 ms (1.499 ms / 100) 1.500 -> 1.502 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.60% +0.33%] index_select linear : Elapsed 0.015 ms (1.501 ms / 100) 1.498 -> 1.501 ( +0.20%) [ +0.07% +0.20% +0.00% / +0.20% +0.60% +0.73%] index_select reverse : Elapsed 0.015 ms (1.499 ms / 100) 1.497 -> 1.496 ( -0.07%) [ +0.07% +0.13% +0.00% / -0.07% +0.60% +0.40%] index_select skip64 : Elapsed 0.015 ms (1.498 ms / 100) 1.498 -> 1.499 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.53% +0.47%] index_select skip256 : Elapsed 0.015 ms (1.498 ms / 100) 1.494 -> 1.496 ( +0.13%) [ +0.33% +0.27% +0.00% / +0.13% +0.60% +0.60%] index_select spread : Elapsed 0.015 ms (1.499 ms / 100) 1.493 -> 1.495 ( +0.13%) [ +0.07% +0.00% +0.00% / +0.13% +0.60% +0.67%] index_select strided 3 : Elapsed 0.015 ms (1.494 ms / 100) 1.493 -> 1.494 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.33% +0.33%] index_select strided 5 : Elapsed 0.015 ms (1.494 ms / 100) 1.493 -> 1.494 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.67% +0.54%] index_select strided 7 : Elapsed 0.015 ms (1.494 ms / 100) 1.496 -> 1.496 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.53% +0.47%] index_select strided 8 : Elapsed 0.015 ms (1.496 ms / 100) 1.493 -> 1.494 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.74% +0.60%] index_select strided 16 : Elapsed 0.015 ms (1.494 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.60% +0.60%] index_select random : Elapsed 0.015 ms (1.494 ms / 100) 1.492 -> 1.493 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.67% +0.60%] index_select random_sorted : Elapsed 0.015 ms (1.493 ms / 100) 1.494 -> 1.496 ( +0.13%) [ +0.13% +0.07% +0.00% / +0.13% +0.67% +0.80%] index_select perm : Elapsed 0.015 ms (1.496 ms / 100) 1.496 -> 1.497 ( +0.07%) [ +0.13% +0.00% +0.00% / +0.07% +0.60% +0.60%] index_select perm_sorted : Elapsed 0.015 ms (1.498 ms / 100) B = [4, 5, 20, 16] (stride (1600, 1, 5, 100)) A = [4, 40, 20, 16] (stride (12800, 320, 1, 20)) dim = 1 1.482 -> 1.483 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.40% +0.40%] index_select const : Elapsed 0.015 ms (1.482 ms / 100) 1.482 -> 1.481 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.54% +0.54%] index_select wrap : Elapsed 0.015 ms (1.482 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select linear : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.482 ( +0.14%) [ +0.20% +0.00% +0.00% / +0.14% +0.95% +0.68%] index_select reverse : Elapsed 0.015 ms (1.483 ms / 100) 1.480 -> 1.482 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.68% +0.68%] index_select skip64 : Elapsed 0.015 ms (1.481 ms / 100) 1.481 -> 1.480 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.61% +0.47%] index_select skip256 : Elapsed 0.015 ms (1.481 ms / 100) 1.481 -> 1.483 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.68% +0.61%] index_select spread : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.482 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.81% +0.74%] index_select strided 3 : Elapsed 0.015 ms (1.482 ms / 100) 1.482 -> 1.482 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +1.01% +0.54%] index_select strided 5 : Elapsed 0.015 ms (1.483 ms / 100) 1.480 -> 1.482 ( +0.14%) [ +0.20% +0.20% +0.00% / +0.14% +0.74% +0.81%] index_select strided 7 : Elapsed 0.015 ms (1.483 ms / 100) 1.481 -> 1.481 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.68% +0.68%] index_select strided 8 : Elapsed 0.015 ms (1.481 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.68% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.481 ms / 100) 1.482 -> 1.481 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.67% +0.54%] index_select random : Elapsed 0.015 ms (1.482 ms / 100) 1.481 -> 1.481 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.74% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.482 ms / 100) 1.481 -> 1.483 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.81% +0.68%] index_select perm : Elapsed 0.015 ms (1.483 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.74% +0.74%] index_select perm_sorted : Elapsed 0.015 ms (1.482 ms / 100) B = [4, 5, 20, 16] (stride (320, 1280, 16, 1)) A = [4, 40, 20, 16] (stride (640, 16, 2560, 1)) dim = 1 1.378 -> 1.383 ( +0.36%) [ +0.29% +0.00% +0.44% / +0.36% +0.44% +0.44%] index_select const : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.44% +0.07% +0.00% / +0.15% +0.36% +0.36%] index_select wrap : Elapsed 0.014 ms (1.385 ms / 100) 1.380 -> 1.383 ( +0.22%) [ +0.29% +0.29% +0.00% / +0.29% +0.22% +0.22%] index_select linear : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.379 ( -0.14%) [ +0.14% +0.07% +0.00% / -0.14% +0.22% +0.22%] index_select reverse : Elapsed 0.014 ms (1.383 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.22% +0.15% +0.00% / +0.07% +0.44% +0.36%] index_select skip64 : Elapsed 0.014 ms (1.381 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.00% +0.22% +0.00% / +0.00% +0.36% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.379 ms / 100) 1.378 -> 1.380 ( +0.15%) [ +0.15% +0.36% +0.00% / +0.15% +0.44% +0.36%] index_select spread : Elapsed 0.014 ms (1.380 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.36% +0.29%] index_select strided 3 : Elapsed 0.014 ms (1.380 ms / 100) 1.379 -> 1.378 ( -0.07%) [ +0.29% +0.00% +0.07% / -0.07% +0.36% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.383 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.44% +0.36%] index_select strided 7 : Elapsed 0.014 ms (1.380 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.22% +0.22% +0.00% / +0.07% +0.44% +0.29%] index_select strided 8 : Elapsed 0.014 ms (1.382 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.58% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.58% +0.51%] index_select random : Elapsed 0.014 ms (1.379 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.22% +0.29% +0.00% / +0.15% +0.58% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.379 ms / 100) 1.378 -> 1.377 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.44% +0.51%] index_select perm : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.00% +0.58% +0.00% / +0.00% +0.58% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.377 ms / 100) B = [4, 5, 20, 16] (stride (20, 1280, 1, 80)) dim = 1 fill_cnt = 40 2.486 -> 2.484 ( -0.08%) [ +0.28% +0.00% +0.08% / -0.08% +0.00% +0.08%] index_fill_ const : Elapsed 0.025 ms (2.493 ms / 100) 2.624 -> 2.622 ( -0.08%) [ +0.00% +0.19% +0.15% / -0.08% +0.04% +0.23%] index_fill_ linear : Elapsed 0.026 ms (2.624 ms / 100) 2.625 -> 2.623 ( -0.08%) [ +0.04% +0.27% +0.00% / +0.34% -0.08% +0.38%] index_fill_ reverse : Elapsed 0.026 ms (2.626 ms / 100) 2.490 -> 2.481 ( -0.36%) [ +0.04% +0.00% +0.00% / -0.28% +0.08% -0.36%] index_fill_ skip64 : Elapsed 0.025 ms (2.491 ms / 100) 2.486 -> 2.484 ( -0.08%) [ +0.12% +0.16% +0.00% / +0.04% -0.08% +0.12%] index_fill_ skip256 : Elapsed 0.025 ms (2.489 ms / 100) 2.513 -> 2.506 ( -0.28%) [ +0.04% +0.00% +0.04% / -0.28% -0.16% +0.00%] index_fill_ spread : Elapsed 0.025 ms (2.514 ms / 100) 2.495 -> 2.492 ( -0.12%) [ +0.00% +0.08% +0.04% / -0.12% -0.08% -0.12%] index_fill_ strided 3 : Elapsed 0.025 ms (2.495 ms / 100) 2.491 -> 2.489 ( -0.08%) [ +0.00% +0.08% +0.04% / -0.08% +0.12% +0.12%] index_fill_ random : Elapsed 0.025 ms (2.491 ms / 100) 2.481 -> 2.482 ( +0.04%) [ +0.04% +0.00% +0.16% / +0.04% +0.24% +0.44%] index_fill_ random_sorted : Elapsed 0.025 ms (2.482 ms / 100) B = [4, 5, 20, 16] (stride (1, 1280, 4, 80)) A = [4, 40, 20, 16] (stride (16, 1280, 64, 1)) dim = 1 1.474 -> 1.475 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.61% +0.54%] index_select const : Elapsed 0.015 ms (1.475 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.61% +0.61%] index_select wrap : Elapsed 0.015 ms (1.475 ms / 100) 1.474 -> 1.476 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.61% +0.54%] index_select linear : Elapsed 0.015 ms (1.476 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.68% +0.75%] index_select reverse : Elapsed 0.015 ms (1.475 ms / 100) 1.474 -> 1.474 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.61% +0.54%] index_select skip64 : Elapsed 0.015 ms (1.474 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.68% +0.75%] index_select skip256 : Elapsed 0.015 ms (1.475 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.75% +0.75%] index_select spread : Elapsed 0.015 ms (1.475 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.14% +0.00% +0.07% / +0.14% +0.68% +0.68%] index_select strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.81% +0.68%] index_select strided 5 : Elapsed 0.015 ms (1.476 ms / 100) 1.474 -> 1.474 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.61%] index_select strided 7 : Elapsed 0.015 ms (1.474 ms / 100) 1.474 -> 1.474 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.61% +0.54%] index_select strided 8 : Elapsed 0.015 ms (1.474 ms / 100) 1.474 -> 1.474 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.75%] index_select strided 16 : Elapsed 0.015 ms (1.474 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select random : Elapsed 0.015 ms (1.474 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.20% +0.07% +0.00% / +0.07% +0.75% +0.75%] index_select random_sorted : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.61% +0.54%] index_select perm : Elapsed 0.015 ms (1.475 ms / 100) 1.474 -> 1.474 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.68% +0.61%] index_select perm_sorted : Elapsed 0.015 ms (1.474 ms / 100) B = [4, 5, 20, 16] (stride (100, 1, 5, 400)) dim = 1 fill_cnt = 40 2.494 -> 2.491 ( -0.12%) [ +0.20% +0.04% +0.00% / -0.12% +0.36% +0.36%] index_fill_ const : Elapsed 0.025 ms (2.499 ms / 100) 2.486 -> 2.483 ( -0.12%) [ +0.00% +0.04% +0.04% / -0.12% +0.36% +0.36%] index_fill_ linear : Elapsed 0.025 ms (2.486 ms / 100) 2.484 -> 2.480 ( -0.16%) [ +0.04% +0.12% +0.00% / -0.16% +0.28% +0.20%] index_fill_ reverse : Elapsed 0.025 ms (2.485 ms / 100) 2.494 -> 2.495 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.04% +0.40% +0.40%] index_fill_ skip64 : Elapsed 0.025 ms (2.497 ms / 100) 2.495 -> 2.501 ( +0.24%) [ +0.00% +0.04% +0.04% / +0.28% +0.28% +0.24%] index_fill_ skip256 : Elapsed 0.025 ms (2.495 ms / 100) 2.487 -> 2.489 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.36% +0.48%] index_fill_ spread : Elapsed 0.025 ms (2.487 ms / 100) 2.485 -> 2.480 ( -0.20%) [ +0.00% +0.00% +0.16% / -0.20% +0.20% +0.48%] index_fill_ strided 3 : Elapsed 0.025 ms (2.485 ms / 100) 2.484 -> 2.484 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.00% +0.40% +0.36%] index_fill_ random : Elapsed 0.025 ms (2.484 ms / 100) 2.486 -> 2.480 ( -0.24%) [ +0.00% +0.08% +0.00% / -0.24% +0.40% +0.16%] index_fill_ random_sorted : Elapsed 0.025 ms (2.486 ms / 100) B = [4, 5, 20, 16] (stride (20, 80, 1, 400)) A = [4, 40, 20, 16] (stride (16, 1280, 64, 1)) dim = 1 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.68% +0.47%] index_select const : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.41% +0.54%] index_select wrap : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select linear : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.61% +0.54%] index_select reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.54% +0.47%] index_select skip64 : Elapsed 0.015 ms (1.475 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.47%] index_select skip256 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.47% +0.47%] index_select spread : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.54% +0.54%] index_select strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.47% +0.47%] index_select strided 5 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select strided 8 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.61% +0.54%] index_select strided 16 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.54% +0.47%] index_select random : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.61% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.475 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.54% +0.47%] index_select perm : Elapsed 0.015 ms (1.476 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.75% +1.02%] index_select perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) B = [4, 5, 20, 16] (stride (1, 4, 20, 400)) A = [4, 40, 20, 16] (stride (800, 20, 1, 3200)) dim = 1 1.570 -> 1.573 ( +0.19%) [ +0.38% +0.25% +0.00% / +0.19% +0.25% +0.25%] index_select const : Elapsed 0.016 ms (1.576 ms / 100) 1.548 -> 1.549 ( +0.06%) [ +0.19% +0.13% +0.00% / +0.06% +0.65% +0.97%] index_select wrap : Elapsed 0.016 ms (1.551 ms / 100) 1.541 -> 1.542 ( +0.06%) [ +0.06% +0.13% +0.00% / +0.06% +0.58% +0.58%] index_select linear : Elapsed 0.015 ms (1.542 ms / 100) 1.535 -> 1.540 ( +0.33%) [ +0.33% +0.00% +0.33% / +0.33% +0.98% +0.65%] index_select reverse : Elapsed 0.015 ms (1.540 ms / 100) 1.549 -> 1.555 ( +0.39%) [ +0.58% +0.13% +0.00% / +0.39% +1.36% +1.55%] index_select skip64 : Elapsed 0.016 ms (1.558 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.06% +0.00% +0.00% / +0.38% +0.19% +0.13%] index_select skip256 : Elapsed 0.016 ms (1.574 ms / 100) 1.538 -> 1.541 ( +0.20%) [ +0.13% +0.00% +0.13% / +0.20% +0.78% +0.78%] index_select spread : Elapsed 0.015 ms (1.540 ms / 100) 1.536 -> 1.539 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +0.85% +0.59%] index_select strided 3 : Elapsed 0.015 ms (1.537 ms / 100) 1.545 -> 1.546 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +1.88% +2.20%] index_select strided 5 : Elapsed 0.015 ms (1.546 ms / 100) 1.548 -> 1.548 ( +0.00%) [ +0.06% +0.19% +0.00% / +0.00% +1.36% +1.74%] index_select strided 7 : Elapsed 0.015 ms (1.549 ms / 100) 1.531 -> 1.531 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.72% +0.65%] index_select strided 8 : Elapsed 0.015 ms (1.531 ms / 100) 1.551 -> 1.556 ( +0.32%) [ +0.00% +0.00% +0.13% / +0.32% +1.48% +1.35%] index_select strided 16 : Elapsed 0.016 ms (1.551 ms / 100) 1.545 -> 1.545 ( +0.00%) [ +0.13% +0.06% +0.00% / +0.00% +0.78% +0.58%] index_select random : Elapsed 0.015 ms (1.547 ms / 100) 1.532 -> 1.532 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.78% +0.85%] index_select random_sorted : Elapsed 0.015 ms (1.533 ms / 100) 1.545 -> 1.546 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +1.29% +1.75%] index_select perm : Elapsed 0.015 ms (1.545 ms / 100) 1.548 -> 1.550 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +1.10% +1.68%] index_select perm_sorted : Elapsed 0.015 ms (1.548 ms / 100) out_shape = [4, 40, 5, 16] in_shape = [4, 40, 20, 16] idx_dim = 2 B = [4, 40, 5, 16] (stride (3200, 16, 640, 1)) A = [4, 40, 20, 16] (stride (12800, 320, 16, 1)) dim = 2 1.674 -> 1.682 ( +0.48%) [ +0.24% +0.18% +0.00% / +0.48% +0.54% +0.72%] index_select const : Elapsed 0.017 ms (1.678 ms / 100) 1.712 -> 1.716 ( +0.23%) [ +0.00% +0.06% +0.00% / +0.23% +0.41% +0.58%] index_select wrap : Elapsed 0.017 ms (1.712 ms / 100) 1.712 -> 1.715 ( +0.18%) [ +0.06% +0.00% +0.00% / +0.18% +0.41% +0.41%] index_select linear : Elapsed 0.017 ms (1.713 ms / 100) 1.714 -> 1.714 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.41% +0.29%] index_select reverse : Elapsed 0.017 ms (1.714 ms / 100) 1.674 -> 1.680 ( +0.36%) [ +0.24% +0.00% +0.30% / +0.36% +0.60% +0.42%] index_select skip64 : Elapsed 0.017 ms (1.678 ms / 100) 1.680 -> 1.673 ( -0.42%) [ +0.06% +0.18% +0.00% / -0.42% +0.42% +0.18%] index_select skip256 : Elapsed 0.017 ms (1.681 ms / 100) 1.711 -> 1.715 ( +0.23%) [ +0.00% +0.06% +0.06% / +0.23% +0.53% +0.47%] index_select spread : Elapsed 0.017 ms (1.711 ms / 100) 1.713 -> 1.716 ( +0.18%) [ +0.23% +0.00% +0.12% / +0.18% +0.53% +0.53%] index_select strided 3 : Elapsed 0.017 ms (1.717 ms / 100) 1.698 -> 1.698 ( +0.00%) [ +0.06% +0.00% +0.18% / +0.00% +0.71% +0.65%] index_select strided 5 : Elapsed 0.017 ms (1.699 ms / 100) 1.708 -> 1.710 ( +0.12%) [ +0.41% +0.12% +0.00% / +0.12% +0.70% +0.76%] index_select strided 7 : Elapsed 0.017 ms (1.715 ms / 100) 1.711 -> 1.713 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.12% +0.47% +0.53%] index_select strided 8 : Elapsed 0.017 ms (1.713 ms / 100) 1.711 -> 1.712 ( +0.06%) [ +0.06% +0.00% +0.35% / +0.06% +0.53% +0.53%] index_select strided 16 : Elapsed 0.017 ms (1.712 ms / 100) 1.690 -> 1.692 ( +0.12%) [ +0.06% +0.00% +0.18% / +0.12% +0.71% +0.59%] index_select random : Elapsed 0.017 ms (1.691 ms / 100) 1.689 -> 1.692 ( +0.18%) [ +0.06% +0.00% +0.18% / +0.18% +0.71% +0.59%] index_select random_sorted : Elapsed 0.017 ms (1.690 ms / 100) 1.712 -> 1.710 ( -0.12%) [ +0.00% +0.18% +0.00% / -0.12% +0.70% +0.47%] index_select perm : Elapsed 0.017 ms (1.712 ms / 100) 1.709 -> 1.714 ( +0.29%) [ +0.00% +0.00% +0.18% / +0.29% +0.70% +0.82%] index_select perm_sorted : Elapsed 0.017 ms (1.709 ms / 100) B = [4, 40, 5, 16] (stride (3200, 1, 40, 200)) A = [4, 40, 20, 16] (stride (640, 16, 2560, 1)) dim = 2 1.671 -> 1.671 ( +0.00%) [ +0.36% +0.00% +0.42% / +0.42% +0.06% +0.00%] index_select const : Elapsed 0.017 ms (1.677 ms / 100) 1.669 -> 1.675 ( +0.36%) [ +0.18% +0.06% +0.00% / +0.60% +0.42% +0.36%] index_select wrap : Elapsed 0.017 ms (1.672 ms / 100) 1.667 -> 1.673 ( +0.36%) [ +0.24% +0.30% +0.00% / +0.36% +0.60% +0.36%] index_select linear : Elapsed 0.017 ms (1.671 ms / 100) 1.670 -> 1.673 ( +0.18%) [ +0.06% +0.00% +0.12% / +0.18% +1.14% +0.36%] index_select reverse : Elapsed 0.017 ms (1.671 ms / 100) 1.672 -> 1.673 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.24% +0.06% +0.06%] index_select skip64 : Elapsed 0.017 ms (1.673 ms / 100) 1.671 -> 1.673 ( +0.12%) [ +0.12% +0.18% +0.00% / +0.24% +0.12% +0.24%] index_select skip256 : Elapsed 0.017 ms (1.673 ms / 100) 1.684 -> 1.683 ( -0.06%) [ +0.00% +0.12% +0.30% / -0.06% +0.12% -0.06%] index_select spread : Elapsed 0.017 ms (1.684 ms / 100) 1.697 -> 1.684 ( -0.77%) [ +0.24% +0.00% +0.12% / +0.24% -0.65% -0.77%] index_select strided 3 : Elapsed 0.017 ms (1.701 ms / 100) 1.684 -> 1.680 ( -0.24%) [ +0.06% +0.00% +0.12% / -0.06% -0.06% -0.24%] index_select strided 5 : Elapsed 0.017 ms (1.685 ms / 100) 1.689 -> 1.688 ( -0.06%) [ +0.06% +0.18% +0.00% / +0.36% +0.06% -0.06%] index_select strided 7 : Elapsed 0.017 ms (1.690 ms / 100) 1.687 -> 1.684 ( -0.18%) [ +0.00% +0.00% +0.18% / +0.00% +0.00% -0.18%] index_select strided 8 : Elapsed 0.017 ms (1.687 ms / 100) 1.683 -> 1.683 ( +0.00%) [ +0.24% +0.00% +0.12% / +0.00% +0.36% +0.12%] index_select strided 16 : Elapsed 0.017 ms (1.687 ms / 100) 1.696 -> 1.682 ( -0.83%) [ +0.00% +0.18% +0.00% / +0.06% -0.77% -0.83%] index_select random : Elapsed 0.017 ms (1.696 ms / 100) 1.701 -> 1.680 ( -1.23%) [ +0.06% +0.00% +0.00% / +0.06% -1.23% -1.00%] index_select random_sorted : Elapsed 0.017 ms (1.702 ms / 100) 1.680 -> 1.681 ( +0.06%) [ +0.30% +0.06% +0.00% / +0.06% +1.13% +1.19%] index_select perm : Elapsed 0.017 ms (1.685 ms / 100) 1.682 -> 1.687 ( +0.30%) [ +0.00% +0.12% +0.18% / +0.30% +0.77% +0.89%] index_select perm_sorted : Elapsed 0.017 ms (1.682 ms / 100) B = [4, 40, 5, 16] (stride (3200, 1, 40, 200)) A = [4, 40, 20, 16] (stride (1, 4, 2560, 160)) dim = 2 1.830 -> 1.836 ( +0.33%) [ +0.44% +0.33% +0.00% / +0.38% +0.33% +0.33%] index_select const : Elapsed 0.018 ms (1.838 ms / 100) 1.823 -> 1.820 ( -0.16%) [ +0.00% +0.05% +0.00% / -0.16% +0.11% -0.11%] index_select wrap : Elapsed 0.018 ms (1.823 ms / 100) 1.821 -> 1.822 ( +0.05%) [ +0.00% +0.27% +0.05% / +0.11% +0.05% +0.16%] index_select linear : Elapsed 0.018 ms (1.821 ms / 100) 1.821 -> 1.822 ( +0.05%) [ +0.22% +0.00% +0.05% / +0.05% +0.49% +0.33%] index_select reverse : Elapsed 0.018 ms (1.825 ms / 100) 1.827 -> 1.828 ( +0.05%) [ +0.00% +0.16% +0.05% / +0.05% +0.05% +0.27%] index_select skip64 : Elapsed 0.018 ms (1.827 ms / 100) 1.826 -> 1.829 ( +0.16%) [ +0.16% +0.22% +0.00% / +0.16% +0.27% +0.16%] index_select skip256 : Elapsed 0.018 ms (1.829 ms / 100) 1.844 -> 1.843 ( -0.05%) [ +0.00% +0.11% +0.27% / +0.11% +0.11% -0.05%] index_select spread : Elapsed 0.018 ms (1.844 ms / 100) 1.848 -> 1.836 ( -0.65%) [ +0.16% +0.00% +0.00% / +0.05% -0.49% -0.65%] index_select strided 3 : Elapsed 0.019 ms (1.851 ms / 100) 1.832 -> 1.832 ( +0.00%) [ +0.05% +0.00% +0.33% / +0.27% +0.05% +0.00%] index_select strided 5 : Elapsed 0.018 ms (1.833 ms / 100) 1.838 -> 1.837 ( -0.05%) [ +0.11% +0.27% +0.00% / +0.16% -0.05% +0.00%] index_select strided 7 : Elapsed 0.018 ms (1.840 ms / 100) 1.853 -> 1.846 ( -0.38%) [ +0.05% +0.16% +0.00% / +0.16% -0.38% -0.38%] index_select strided 8 : Elapsed 0.019 ms (1.854 ms / 100) 1.854 -> 1.849 ( -0.27%) [ +0.05% +0.00% +0.11% / +0.16% -0.22% -0.27%] index_select strided 16 : Elapsed 0.019 ms (1.855 ms / 100) 1.827 -> 1.825 ( -0.11%) [ +0.00% +0.16% +0.00% / +0.00% -0.11% +0.16%] index_select random : Elapsed 0.018 ms (1.827 ms / 100) 1.832 -> 1.835 ( +0.16%) [ +0.27% +0.22% +0.00% / +0.22% +0.22% +0.16%] index_select random_sorted : Elapsed 0.018 ms (1.837 ms / 100) 1.824 -> 1.826 ( +0.11%) [ +0.00% +0.00% +0.55% / +0.22% +0.11% +0.11%] index_select perm : Elapsed 0.018 ms (1.824 ms / 100) 1.825 -> 1.825 ( +0.00%) [ +0.00% +0.11% +0.77% / +0.66% +0.33% +0.00%] index_select perm_sorted : Elapsed 0.018 ms (1.825 ms / 100) B = [4, 40, 5, 16] (stride (80, 320, 16, 1)) A = [4, 40, 20, 16] (stride (320, 1280, 16, 1)) dim = 2 1.777 -> 1.777 ( +0.00%) [ +0.45% +0.00% +0.34% / +0.00% +0.34% +0.06%] index_select const : Elapsed 0.018 ms (1.785 ms / 100) 1.823 -> 1.819 ( -0.22%) [ +0.16% +0.05% +0.00% / -0.22% +0.16% +0.11%] index_select wrap : Elapsed 0.018 ms (1.826 ms / 100) 1.823 -> 1.818 ( -0.27%) [ +0.05% +0.00% +0.05% / -0.27% +0.16% +0.22%] index_select linear : Elapsed 0.018 ms (1.824 ms / 100) 1.825 -> 1.822 ( -0.16%) [ +0.05% +0.00% +0.16% / -0.16% +0.05% +0.05%] index_select reverse : Elapsed 0.018 ms (1.826 ms / 100) 1.774 -> 1.779 ( +0.28%) [ +0.28% +0.06% +0.00% / +0.28% +0.51% +0.56%] index_select skip64 : Elapsed 0.018 ms (1.779 ms / 100) 1.773 -> 1.778 ( +0.28%) [ +0.51% +0.39% +0.00% / +0.39% +0.28% +0.56%] index_select skip256 : Elapsed 0.018 ms (1.782 ms / 100) 1.822 -> 1.819 ( -0.16%) [ +0.16% +0.00% +0.22% / -0.16% +0.16% +0.16%] index_select spread : Elapsed 0.018 ms (1.825 ms / 100) 1.822 -> 1.826 ( +0.22%) [ +0.00% +0.00% +0.22% / +0.33% +0.22% +0.33%] index_select strided 3 : Elapsed 0.018 ms (1.822 ms / 100) 1.810 -> 1.811 ( +0.06%) [ +0.11% +0.17% +0.00% / +0.06% +0.06% +0.28%] index_select strided 5 : Elapsed 0.018 ms (1.812 ms / 100) 1.822 -> 1.824 ( +0.11%) [ +0.00% +0.05% +0.00% / +0.11% +0.22% +0.22%] index_select strided 7 : Elapsed 0.018 ms (1.822 ms / 100) 1.821 -> 1.819 ( -0.11%) [ +0.05% +0.00% +0.05% / -0.11% +0.33% +0.27%] index_select strided 8 : Elapsed 0.018 ms (1.822 ms / 100) 1.822 -> 1.824 ( +0.11%) [ +0.22% +0.11% +0.00% / +0.11% +0.11% +0.22%] index_select strided 16 : Elapsed 0.018 ms (1.826 ms / 100) 1.821 -> 1.821 ( +0.00%) [ +0.22% +0.00% +0.16% / +0.00% +0.49% +0.49%] index_select random : Elapsed 0.018 ms (1.825 ms / 100) 1.820 -> 1.824 ( +0.22%) [ +0.27% +0.00% +0.11% / +0.22% +0.44% +0.49%] index_select random_sorted : Elapsed 0.018 ms (1.825 ms / 100) 1.820 -> 1.820 ( +0.00%) [ +0.16% +0.00% +0.22% / +0.00% +0.38% +0.38%] index_select perm : Elapsed 0.018 ms (1.823 ms / 100) 1.820 -> 1.824 ( +0.22%) [ +0.22% +0.00% +0.44% / +0.22% +0.44% +0.33%] index_select perm_sorted : Elapsed 0.018 ms (1.824 ms / 100) B = [4, 40, 5, 16] (stride (16, 320, 64, 1)) dim = 2 fill_cnt = 20 3.509 -> 3.489 ( -0.57%) [ +0.14% +0.03% +0.00% / -0.57% -0.46% -0.40%] index_fill_ const : Elapsed 0.035 ms (3.514 ms / 100) 3.527 -> 3.503 ( -0.68%) [ +0.03% +0.00% +0.03% / -0.54% -0.48% -0.68%] index_fill_ linear : Elapsed 0.035 ms (3.528 ms / 100) 3.509 -> 3.490 ( -0.54%) [ +0.00% +0.11% +0.17% / -0.54% -0.51% -0.48%] index_fill_ reverse : Elapsed 0.035 ms (3.509 ms / 100) 3.517 -> 3.499 ( -0.51%) [ +0.17% +0.00% +0.06% / -0.51% -0.40% -0.40%] index_fill_ skip64 : Elapsed 0.035 ms (3.523 ms / 100) 3.519 -> 3.497 ( -0.63%) [ +0.00% +0.06% +0.06% / -0.60% -0.63% -0.51%] index_fill_ skip256 : Elapsed 0.035 ms (3.519 ms / 100) 3.513 -> 3.495 ( -0.51%) [ +0.20% +0.06% +0.00% / -0.46% -0.37% -0.51%] index_fill_ spread : Elapsed 0.035 ms (3.520 ms / 100) 3.522 -> 3.503 ( -0.54%) [ +0.26% +0.00% +0.14% / -0.40% -0.54% -0.43%] index_fill_ strided 3 : Elapsed 0.035 ms (3.531 ms / 100) 3.533 -> 3.512 ( -0.59%) [ +0.11% +0.06% +0.00% / -0.40% -0.57% -0.59%] index_fill_ random : Elapsed 0.035 ms (3.537 ms / 100) 3.523 -> 3.504 ( -0.54%) [ +0.06% +0.06% +0.00% / -0.45% -0.54% -0.37%] index_fill_ random_sorted : Elapsed 0.035 ms (3.525 ms / 100) B = [4, 40, 5, 16] (stride (1, 320, 64, 4)) A = [4, 40, 20, 16] (stride (1, 64, 2560, 4)) dim = 2 1.772 -> 1.771 ( -0.06%) [ +0.00% +0.17% +0.17% / -0.06% +0.45% +0.23%] index_select const : Elapsed 0.018 ms (1.772 ms / 100) 1.772 -> 1.775 ( +0.17%) [ +0.28% +0.00% +0.00% / +0.40% +0.17% +0.17%] index_select wrap : Elapsed 0.018 ms (1.777 ms / 100) 1.777 -> 1.775 ( -0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.11% -0.11%] index_select linear : Elapsed 0.018 ms (1.777 ms / 100) 1.773 -> 1.776 ( +0.17%) [ +0.00% +0.11% +0.11% / +0.17% +0.23% +0.28%] index_select reverse : Elapsed 0.018 ms (1.773 ms / 100) 1.773 -> 1.774 ( +0.06%) [ +0.17% +0.11% +0.00% / +0.06% +0.11% +0.23%] index_select skip64 : Elapsed 0.018 ms (1.776 ms / 100) 1.774 -> 1.773 ( -0.06%) [ +0.11% +0.11% +0.00% / -0.06% +0.28% +0.23%] index_select skip256 : Elapsed 0.018 ms (1.776 ms / 100) 1.776 -> 1.775 ( -0.06%) [ +0.11% +0.00% +0.06% / -0.06% +0.06% +0.23%] index_select spread : Elapsed 0.018 ms (1.778 ms / 100) 1.775 -> 1.778 ( +0.17%) [ +0.28% +0.00% +0.17% / +0.17% +0.39% +0.28%] index_select strided 3 : Elapsed 0.018 ms (1.780 ms / 100) 1.772 -> 1.776 ( +0.23%) [ +0.00% +0.00% +0.06% / +0.23% +0.34% +0.34%] index_select strided 5 : Elapsed 0.018 ms (1.772 ms / 100) 1.775 -> 1.774 ( -0.06%) [ +0.11% +0.00% +0.00% / -0.06% +0.45% +0.28%] index_select strided 7 : Elapsed 0.018 ms (1.777 ms / 100) 1.777 -> 1.777 ( +0.00%) [ +0.17% +0.00% +0.06% / +0.06% +0.06% +0.00%] index_select strided 8 : Elapsed 0.018 ms (1.780 ms / 100) 1.775 -> 1.777 ( +0.11%) [ +0.00% +0.28% +0.23% / +0.17% +0.17% +0.11%] index_select strided 16 : Elapsed 0.018 ms (1.775 ms / 100) 1.776 -> 1.776 ( +0.00%) [ +0.11% +0.45% +0.00% / +0.00% +0.39% +0.39%] index_select random : Elapsed 0.018 ms (1.778 ms / 100) 1.776 -> 1.776 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.39% +0.23%] index_select random_sorted : Elapsed 0.018 ms (1.777 ms / 100) 1.774 -> 1.778 ( +0.23%) [ +0.28% +0.11% +0.00% / +0.23% +0.39% +0.62%] index_select perm : Elapsed 0.018 ms (1.779 ms / 100) 1.776 -> 1.778 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.28% +0.28%] index_select perm_sorted : Elapsed 0.018 ms (1.776 ms / 100) B = [4, 40, 5, 16] (stride (5, 320, 1, 20)) A = [4, 40, 20, 16] (stride (20, 1280, 1, 80)) dim = 2 1.921 -> 1.920 ( -0.05%) [ +0.00% +0.31% +0.10% / -0.05% +0.21% +0.26%] index_select const : Elapsed 0.019 ms (1.921 ms / 100) 1.922 -> 1.923 ( +0.05%) [ +0.05% +0.31% +0.00% / +0.05% +0.73% +0.68%] index_select wrap : Elapsed 0.019 ms (1.923 ms / 100) 1.918 -> 1.921 ( +0.16%) [ +0.00% +0.31% +0.26% / +0.16% +0.83% +0.63%] index_select linear : Elapsed 0.019 ms (1.918 ms / 100) 1.924 -> 1.921 ( -0.16%) [ +0.10% +0.00% +0.05% / -0.16% +0.36% +0.26%] index_select reverse : Elapsed 0.019 ms (1.926 ms / 100) 1.917 -> 1.920 ( +0.16%) [ +0.21% +0.00% +0.10% / +0.16% +0.52% +0.42%] index_select skip64 : Elapsed 0.019 ms (1.921 ms / 100) 1.916 -> 1.917 ( +0.05%) [ +0.37% +0.00% +0.00% / +0.05% +0.47% +0.42%] index_select skip256 : Elapsed 0.019 ms (1.923 ms / 100) 1.932 -> 1.937 ( +0.26%) [ +0.26% +0.00% +0.21% / +0.26% +0.67% +0.36%] index_select spread : Elapsed 0.019 ms (1.937 ms / 100) 1.932 -> 1.932 ( +0.00%) [ +0.16% +0.10% +0.00% / +0.00% +0.57% +0.52%] index_select strided 3 : Elapsed 0.019 ms (1.935 ms / 100) 1.931 -> 1.936 ( +0.26%) [ +0.21% +0.00% +0.00% / +0.26% +0.83% +0.78%] index_select strided 5 : Elapsed 0.019 ms (1.935 ms / 100) 1.937 -> 1.937 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.15% +0.21%] index_select strided 7 : Elapsed 0.019 ms (1.939 ms / 100) 1.932 -> 1.937 ( +0.26%) [ +0.00% +0.10% +0.05% / +0.31% +0.26% +0.62%] index_select strided 8 : Elapsed 0.019 ms (1.932 ms / 100) 1.933 -> 1.937 ( +0.21%) [ +0.00% +0.10% +0.00% / +0.21% +0.57% +0.36%] index_select strided 16 : Elapsed 0.019 ms (1.933 ms / 100) 1.935 -> 1.940 ( +0.26%) [ +0.31% +0.00% +0.00% / +0.47% +0.26% +0.36%] index_select random : Elapsed 0.019 ms (1.941 ms / 100) 1.928 -> 1.931 ( +0.16%) [ +0.36% +0.16% +0.00% / +0.16% +0.52% +0.67%] index_select random_sorted : Elapsed 0.019 ms (1.935 ms / 100) 1.934 -> 1.932 ( -0.10%) [ +0.05% +0.05% +0.00% / -0.10% +0.10% +0.16%] index_select perm : Elapsed 0.019 ms (1.935 ms / 100) 1.930 -> 1.931 ( +0.05%) [ +0.31% +0.05% +0.00% / +0.05% +0.47% +0.52%] index_select perm_sorted : Elapsed 0.019 ms (1.936 ms / 100) B = [4, 40, 5, 16] (stride (16, 64, 2560, 1)) A = [4, 40, 20, 16] (stride (12800, 1, 640, 40)) dim = 2 1.818 -> 1.819 ( +0.06%) [ +0.28% +0.00% +0.17% / +0.06% +0.22% +0.22%] index_select const : Elapsed 0.018 ms (1.823 ms / 100) 1.814 -> 1.820 ( +0.33%) [ +0.17% +0.00% +0.28% / +0.33% +0.39% +0.39%] index_select wrap : Elapsed 0.018 ms (1.817 ms / 100) 1.807 -> 1.811 ( +0.22%) [ +0.00% +0.06% +0.44% / +0.22% +0.44% +0.28%] index_select linear : Elapsed 0.018 ms (1.807 ms / 100) 1.802 -> 1.810 ( +0.44%) [ +0.22% +0.00% +0.33% / +0.44% +0.61% +0.67%] index_select reverse : Elapsed 0.018 ms (1.806 ms / 100) 1.815 -> 1.819 ( +0.22%) [ +0.22% +0.39% +0.00% / +0.22% +0.39% +0.50%] index_select skip64 : Elapsed 0.018 ms (1.819 ms / 100) 1.819 -> 1.823 ( +0.22%) [ +0.00% +0.22% +0.00% / +0.22% +0.44% +0.38%] index_select skip256 : Elapsed 0.018 ms (1.819 ms / 100) 1.816 -> 1.819 ( +0.17%) [ +0.06% +0.00% +0.00% / +0.17% +0.22% +0.22%] index_select spread : Elapsed 0.018 ms (1.817 ms / 100) 1.816 -> 1.817 ( +0.06%) [ +0.11% +0.17% +0.00% / +0.06% +0.55% +0.66%] index_select strided 3 : Elapsed 0.018 ms (1.818 ms / 100) 1.808 -> 1.810 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.88% +0.72%] index_select strided 5 : Elapsed 0.018 ms (1.810 ms / 100) 1.811 -> 1.815 ( +0.22%) [ +0.06% +0.11% +0.00% / +0.22% +0.39% +0.33%] index_select strided 7 : Elapsed 0.018 ms (1.812 ms / 100) 1.818 -> 1.821 ( +0.17%) [ +0.11% +0.06% +0.00% / +0.17% +0.33% +0.44%] index_select strided 8 : Elapsed 0.018 ms (1.820 ms / 100) 1.838 -> 1.832 ( -0.33%) [ +0.05% +0.00% +0.00% / -0.22% -0.05% -0.33%] index_select strided 16 : Elapsed 0.018 ms (1.839 ms / 100) 1.823 -> 1.825 ( +0.11%) [ +0.33% +0.00% +0.38% / +0.11% +0.27% +0.49%] index_select random : Elapsed 0.018 ms (1.829 ms / 100) 1.813 -> 1.821 ( +0.44%) [ +0.28% +0.00% +0.44% / +0.44% +0.66% +0.50%] index_select random_sorted : Elapsed 0.018 ms (1.818 ms / 100) 1.824 -> 1.822 ( -0.11%) [ +0.00% +0.00% +0.05% / -0.11% +0.71% +0.66%] index_select perm : Elapsed 0.018 ms (1.824 ms / 100) 1.829 -> 1.828 ( -0.05%) [ +0.11% +0.33% +0.00% / -0.05% +0.60% +0.71%] index_select perm_sorted : Elapsed 0.018 ms (1.831 ms / 100) B = [4, 40, 5, 16] (stride (1, 4, 2560, 160)) A = [4, 40, 20, 16] (stride (16, 64, 2560, 1)) dim = 2 1.778 -> 1.780 ( +0.11%) [ +0.22% +0.22% +0.00% / +0.11% +0.39% +0.34%] index_select const : Elapsed 0.018 ms (1.782 ms / 100) 1.771 -> 1.770 ( -0.06%) [ +0.23% +0.11% +0.00% / -0.06% +0.90% +0.90%] index_select wrap : Elapsed 0.018 ms (1.775 ms / 100) 1.771 -> 1.771 ( +0.00%) [ +0.06% +0.23% +0.00% / +0.00% +0.79% +0.73%] index_select linear : Elapsed 0.018 ms (1.772 ms / 100) 1.773 -> 1.775 ( +0.11%) [ +0.06% +0.11% +0.00% / +0.11% +0.85% +0.68%] index_select reverse : Elapsed 0.018 ms (1.774 ms / 100) 1.770 -> 1.774 ( +0.23%) [ +0.11% +0.23% +0.00% / +0.23% +0.73% +1.07%] index_select skip64 : Elapsed 0.018 ms (1.772 ms / 100) 1.770 -> 1.772 ( +0.11%) [ +0.00% +0.34% +0.06% / +0.11% +0.90% +1.02%] index_select skip256 : Elapsed 0.018 ms (1.770 ms / 100) 1.781 -> 1.782 ( +0.06%) [ +0.00% +0.06% +0.11% / +0.06% +0.73% +0.79%] index_select spread : Elapsed 0.018 ms (1.781 ms / 100) 1.792 -> 1.797 ( +0.28%) [ +0.17% +0.00% +0.22% / +0.39% +0.61% +0.28%] index_select strided 3 : Elapsed 0.018 ms (1.795 ms / 100) 1.780 -> 1.780 ( +0.00%) [ +0.22% +0.00% +0.06% / +0.00% +0.84% +0.67%] index_select strided 5 : Elapsed 0.018 ms (1.784 ms / 100) 1.790 -> 1.786 ( -0.22%) [ +0.00% +0.00% +0.00% / -0.22% +0.67% +0.84%] index_select strided 7 : Elapsed 0.018 ms (1.790 ms / 100) 1.780 -> 1.781 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +1.07% +0.73%] index_select strided 8 : Elapsed 0.018 ms (1.781 ms / 100) 1.782 -> 1.779 ( -0.17%) [ +0.22% +0.00% +0.00% / -0.17% +0.73% +1.01%] index_select strided 16 : Elapsed 0.018 ms (1.786 ms / 100) 1.773 -> 1.776 ( +0.17%) [ +0.00% +0.23% +0.06% / +0.17% +2.37% +1.64%] index_select random : Elapsed 0.018 ms (1.773 ms / 100) 1.777 -> 1.779 ( +0.11%) [ +0.00% +0.06% +0.17% / +0.11% +1.58% +1.74%] index_select random_sorted : Elapsed 0.018 ms (1.777 ms / 100) 1.792 -> 1.789 ( -0.17%) [ +0.06% +0.00% +0.22% / -0.17% +0.84% +0.89%] index_select perm : Elapsed 0.018 ms (1.793 ms / 100) 1.792 -> 1.793 ( +0.06%) [ +0.17% +0.00% +0.06% / +0.06% +0.84% +0.89%] index_select perm_sorted : Elapsed 0.018 ms (1.795 ms / 100) B = [4, 40, 5, 16] (stride (200, 1, 40, 800)) A = [4, 40, 20, 16] (stride (16, 64, 2560, 1)) dim = 2 1.772 -> 1.770 ( -0.11%) [ +0.17% +0.11% +0.00% / -0.11% +0.23% +0.11%] index_select const : Elapsed 0.018 ms (1.775 ms / 100) 1.772 -> 1.774 ( +0.11%) [ +0.17% +0.00% +0.06% / +0.11% +0.11% +0.11%] index_select wrap : Elapsed 0.018 ms (1.775 ms / 100) 1.772 -> 1.773 ( +0.06%) [ +0.00% +0.23% +0.28% / +0.23% +0.17% +0.06%] index_select linear : Elapsed 0.018 ms (1.772 ms / 100) 1.772 -> 1.772 ( +0.00%) [ +0.00% +0.17% +0.23% / +0.00% +0.11% +0.00%] index_select reverse : Elapsed 0.018 ms (1.772 ms / 100) 1.772 -> 1.771 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.06% +0.11%] index_select skip64 : Elapsed 0.018 ms (1.772 ms / 100) 1.768 -> 1.770 ( +0.11%) [ +0.17% +0.00% +0.40% / +0.23% +0.11% +0.23%] index_select skip256 : Elapsed 0.018 ms (1.771 ms / 100) 1.792 -> 1.794 ( +0.11%) [ +0.22% +0.00% +0.28% / +0.11% +0.56% +0.50%] index_select spread : Elapsed 0.018 ms (1.796 ms / 100) 1.792 -> 1.792 ( +0.00%) [ +0.06% +0.00% +0.11% / +0.00% +0.22% +0.00%] index_select strided 3 : Elapsed 0.018 ms (1.793 ms / 100) 1.775 -> 1.776 ( +0.06%) [ +0.00% +0.00% +0.28% / +0.39% +0.28% +0.06%] index_select strided 5 : Elapsed 0.018 ms (1.775 ms / 100) 1.788 -> 1.788 ( +0.00%) [ +0.39% +0.00% +0.06% / +0.00% +0.45% +0.39%] index_select strided 7 : Elapsed 0.018 ms (1.795 ms / 100) 1.790 -> 1.792 ( +0.11%) [ +0.00% +0.00% +0.06% / +0.11% +0.45% +0.34%] index_select strided 8 : Elapsed 0.018 ms (1.790 ms / 100) 1.792 -> 1.798 ( +0.33%) [ +0.00% +0.33% +0.00% / +0.33% +0.45% +0.39%] index_select strided 16 : Elapsed 0.018 ms (1.792 ms / 100) 1.782 -> 1.782 ( +0.00%) [ +0.11% +0.00% +0.06% / +0.00% +0.34% +0.62%] index_select random : Elapsed 0.018 ms (1.784 ms / 100) 1.785 -> 1.785 ( +0.00%) [ +0.11% +0.00% +0.06% / +0.00% +0.34% +0.34%] index_select random_sorted : Elapsed 0.018 ms (1.787 ms / 100) 1.790 -> 1.794 ( +0.22%) [ +0.00% +0.00% +0.34% / +0.39% +0.22% +0.22%] index_select perm : Elapsed 0.018 ms (1.790 ms / 100) 1.790 -> 1.791 ( +0.06%) [ +0.00% +0.17% +0.45% / +0.39% +0.06% +0.11%] index_select perm_sorted : Elapsed 0.018 ms (1.790 ms / 100) B = [4, 40, 5, 16] (stride (5, 20, 1, 800)) A = [4, 40, 20, 16] (stride (1, 4, 2560, 160)) dim = 2 1.836 -> 1.833 ( -0.16%) [ +0.05% +0.27% +0.00% / +0.00% -0.16% +0.22%] index_select const : Elapsed 0.018 ms (1.837 ms / 100) 1.830 -> 1.831 ( +0.05%) [ +0.16% +0.27% +0.00% / +0.05% +0.71% +0.49%] index_select wrap : Elapsed 0.018 ms (1.833 ms / 100) 1.826 -> 1.826 ( +0.00%) [ +0.33% +0.00% +0.00% / +0.00% +0.38% +0.33%] index_select linear : Elapsed 0.018 ms (1.832 ms / 100) 1.831 -> 1.831 ( +0.00%) [ +0.16% +0.27% +0.00% / +0.00% +0.00% +0.16%] index_select reverse : Elapsed 0.018 ms (1.834 ms / 100) 1.830 -> 1.834 ( +0.22%) [ +0.05% +0.11% +0.00% / +0.22% +0.44% +0.27%] index_select skip64 : Elapsed 0.018 ms (1.831 ms / 100) 1.832 -> 1.834 ( +0.11%) [ +0.05% +0.00% +0.22% / +0.11% +0.55% +0.38%] index_select skip256 : Elapsed 0.018 ms (1.833 ms / 100) 1.838 -> 1.840 ( +0.11%) [ +0.27% +0.27% +0.00% / +0.11% +0.60% +0.49%] index_select spread : Elapsed 0.018 ms (1.843 ms / 100) 1.848 -> 1.847 ( -0.05%) [ +0.11% +0.16% +0.00% / +0.05% +0.05% -0.05%] index_select strided 3 : Elapsed 0.018 ms (1.850 ms / 100) 1.828 -> 1.830 ( +0.11%) [ +0.16% +0.00% +0.00% / +0.11% +0.55% +0.49%] index_select strided 5 : Elapsed 0.018 ms (1.831 ms / 100) 1.829 -> 1.827 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% +0.16% +0.27%] index_select strided 7 : Elapsed 0.018 ms (1.829 ms / 100) 1.833 -> 1.835 ( +0.11%) [ +0.05% +0.11% +0.00% / +0.11% +0.44% +0.27%] index_select strided 8 : Elapsed 0.018 ms (1.834 ms / 100) 1.831 -> 1.833 ( +0.11%) [ +0.11% +0.33% +0.00% / +0.11% +0.55% +0.55%] index_select strided 16 : Elapsed 0.018 ms (1.833 ms / 100) 1.836 -> 1.835 ( -0.05%) [ +0.00% +0.11% +0.33% / -0.05% +0.16% +0.33%] index_select random : Elapsed 0.018 ms (1.836 ms / 100) 1.835 -> 1.837 ( +0.11%) [ +0.16% +0.11% +0.00% / +0.11% +0.60% +0.60%] index_select random_sorted : Elapsed 0.018 ms (1.838 ms / 100) 1.841 -> 1.842 ( +0.05%) [ +0.11% +0.00% +0.05% / +0.05% +0.54% +0.54%] index_select perm : Elapsed 0.018 ms (1.843 ms / 100) 1.832 -> 1.833 ( +0.05%) [ +0.38% +0.00% +0.05% / +0.05% +0.49% +0.49%] index_select perm_sorted : Elapsed 0.018 ms (1.839 ms / 100) B = [4, 40, 5, 16] (stride (1, 4, 160, 800)) dim = 2 fill_cnt = 20 3.640 -> 3.622 ( -0.49%) [ +0.16% +0.00% +0.22% / -0.47% -0.38% -0.49%] index_fill_ const : Elapsed 0.036 ms (3.646 ms / 100) 3.582 -> 3.569 ( -0.36%) [ +0.00% +0.31% +0.17% / -0.36% -0.22% -0.14%] index_fill_ linear : Elapsed 0.036 ms (3.582 ms / 100) 3.569 -> 3.556 ( -0.36%) [ +0.00% +0.00% +0.00% / -0.36% -0.25% -0.20%] index_fill_ reverse : Elapsed 0.036 ms (3.569 ms / 100) 3.658 -> 3.642 ( -0.44%) [ +0.11% +0.05% +0.00% / -0.44% -0.27% -0.36%] index_fill_ skip64 : Elapsed 0.037 ms (3.662 ms / 100) 3.651 -> 3.640 ( -0.30%) [ +0.22% +0.19% +0.00% / -0.30% -0.05% -0.27%] index_fill_ skip256 : Elapsed 0.037 ms (3.659 ms / 100) 3.552 -> 3.537 ( -0.42%) [ +0.06% +0.00% +0.00% / -0.42% -0.31% -0.23%] index_fill_ spread : Elapsed 0.036 ms (3.554 ms / 100) 3.546 -> 3.529 ( -0.48%) [ +0.03% +0.17% +0.00% / -0.48% -0.39% -0.34%] index_fill_ strided 3 : Elapsed 0.035 ms (3.547 ms / 100) 3.568 -> 3.545 ( -0.64%) [ +0.11% +0.08% +0.00% / -0.64% -0.56% -0.48%] index_fill_ random : Elapsed 0.036 ms (3.572 ms / 100) 3.561 -> 3.545 ( -0.45%) [ +0.00% +0.20% +0.11% / -0.39% -0.28% -0.45%] index_fill_ random_sorted : Elapsed 0.036 ms (3.561 ms / 100) out_shape = [4, 40, 20, 5] in_shape = [4, 40, 20, 16] idx_dim = 3 B = [4, 40, 20, 5] (stride (4000, 100, 1, 20)) A = [4, 40, 20, 16] (stride (800, 20, 1, 3200)) dim = 3 1.882 -> 1.885 ( +0.16%) [ +0.21% +0.00% +0.11% / +0.16% +0.32% +0.37%] index_select const : Elapsed 0.019 ms (1.886 ms / 100) 1.950 -> 1.945 ( -0.26%) [ +0.00% +0.26% +0.10% / -0.05% -0.26% +0.05%] index_select wrap : Elapsed 0.020 ms (1.950 ms / 100) 1.950 -> 1.950 ( +0.00%) [ +0.05% +0.36% +0.00% / +0.05% +0.05% +0.00%] index_select linear : Elapsed 0.020 ms (1.951 ms / 100) 1.944 -> 1.944 ( +0.00%) [ +0.00% +0.10% +0.26% / +0.00% +0.05% +0.21%] index_select reverse : Elapsed 0.019 ms (1.944 ms / 100) 1.880 -> 1.884 ( +0.21%) [ +0.00% +0.21% +0.21% / +0.21% +0.43% +0.43%] index_select skip64 : Elapsed 0.019 ms (1.880 ms / 100) 1.881 -> 1.884 ( +0.16%) [ +0.00% +0.05% +0.00% / +0.16% +0.43% +0.48%] index_select skip256 : Elapsed 0.019 ms (1.881 ms / 100) 1.943 -> 1.947 ( +0.21%) [ +0.10% +0.26% +0.00% / +0.21% +0.41% +0.36%] index_select spread : Elapsed 0.019 ms (1.945 ms / 100) 1.945 -> 1.949 ( +0.21%) [ +0.36% +0.00% +0.00% / +0.21% +0.51% +0.21%] index_select strided 3 : Elapsed 0.020 ms (1.952 ms / 100) 1.930 -> 1.935 ( +0.26%) [ +0.05% +0.36% +0.00% / +0.26% +0.93% +1.09%] index_select strided 5 : Elapsed 0.019 ms (1.931 ms / 100) 1.954 -> 1.951 ( -0.15%) [ +0.26% +0.10% +0.00% / -0.15% +0.72% +0.36%] index_select strided 7 : Elapsed 0.020 ms (1.959 ms / 100) 1.897 -> 1.900 ( +0.16%) [ +0.00% +0.21% +0.05% / +0.16% +0.37% +0.32%] index_select strided 8 : Elapsed 0.019 ms (1.897 ms / 100) 1.936 -> 1.936 ( +0.00%) [ +0.26% +0.31% +0.00% / +0.00% +0.21% +0.05%] index_select random : Elapsed 0.019 ms (1.941 ms / 100) 1.941 -> 1.934 ( -0.36%) [ +0.00% +0.26% +0.00% / -0.05% -0.10% -0.36%] index_select random_sorted : Elapsed 0.019 ms (1.941 ms / 100) 1.937 -> 1.940 ( +0.15%) [ +0.26% +0.15% +0.00% / +0.15% +0.46% +0.41%] index_select perm : Elapsed 0.019 ms (1.942 ms / 100) 1.930 -> 1.930 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.88% +0.67%] index_select perm_sorted : Elapsed 0.019 ms (1.930 ms / 100) B = [4, 40, 20, 5] (stride (1, 400, 20, 4)) A = [4, 40, 20, 16] (stride (16, 1280, 64, 1)) dim = 3 0.892 -> 0.897 ( +0.56%) [ +0.00% +1.23% +0.22% / +0.56% +2.80% +3.03%] index_select const : Elapsed 0.009 ms (0.892 ms / 100) 0.896 -> 0.894 ( -0.22%) [ +0.22% +0.22% +0.00% / -0.22% +2.46% +2.46%] index_select wrap : Elapsed 0.009 ms (0.898 ms / 100) 0.893 -> 0.896 ( +0.34%) [ +0.00% +0.56% +0.34% / +0.34% +2.91% +2.91%] index_select linear : Elapsed 0.009 ms (0.893 ms / 100) 0.891 -> 0.895 ( +0.45%) [ +0.45% +0.56% +0.00% / +0.45% +2.81% +2.81%] index_select reverse : Elapsed 0.009 ms (0.895 ms / 100) 0.890 -> 0.893 ( +0.34%) [ +0.34% +0.00% +0.34% / +0.34% +3.93% +3.93%] index_select skip64 : Elapsed 0.009 ms (0.893 ms / 100) 0.888 -> 0.892 ( +0.45%) [ +0.00% +0.56% +0.23% / +0.45% +4.17% +4.28%] index_select skip256 : Elapsed 0.009 ms (0.888 ms / 100) 0.895 -> 0.896 ( +0.11%) [ +0.00% +0.11% +0.22% / +0.11% +3.80% +4.25%] index_select spread : Elapsed 0.009 ms (0.895 ms / 100) 0.894 -> 0.899 ( +0.56%) [ +0.22% +0.00% +0.22% / +0.56% +4.25% +4.25%] index_select strided 3 : Elapsed 0.009 ms (0.896 ms / 100) 0.894 -> 0.894 ( +0.00%) [ +0.56% +0.00% +0.56% / +0.00% +2.68% +3.13%] index_select strided 5 : Elapsed 0.009 ms (0.899 ms / 100) 0.891 -> 0.895 ( +0.45%) [ +0.79% +0.34% +0.00% / +0.45% +3.59% +3.59%] index_select strided 7 : Elapsed 0.009 ms (0.898 ms / 100) 0.893 -> 0.897 ( +0.45%) [ +0.00% +0.00% +0.22% / +0.45% +3.81% +3.70%] index_select strided 8 : Elapsed 0.009 ms (0.893 ms / 100) 0.898 -> 0.897 ( -0.11%) [ +0.00% +0.33% +0.00% / -0.11% +3.01% +3.01%] index_select random : Elapsed 0.009 ms (0.898 ms / 100) 0.891 -> 0.891 ( +0.00%) [ +0.34% +0.22% +0.00% / +0.00% +3.48% +3.70%] index_select random_sorted : Elapsed 0.009 ms (0.894 ms / 100) 0.891 -> 0.897 ( +0.67%) [ +0.34% +0.79% +0.00% / +0.67% +3.82% +3.82%] index_select perm : Elapsed 0.009 ms (0.894 ms / 100) 0.892 -> 0.897 ( +0.56%) [ +0.11% +0.11% +0.00% / +0.56% +3.25% +3.25%] index_select perm_sorted : Elapsed 0.009 ms (0.893 ms / 100) B = [4, 40, 20, 5] (stride (200, 1, 800, 40)) dim = 3 fill_cnt = 16 2.412 -> 2.415 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.87% +0.75%] index_fill_ const : Elapsed 0.024 ms (2.415 ms / 100) 2.430 -> 2.434 ( +0.16%) [ +0.16% +0.25% +0.00% / +0.16% +0.82% +0.91%] index_fill_ linear : Elapsed 0.024 ms (2.434 ms / 100) 2.429 -> 2.430 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.99% +0.78%] index_fill_ reverse : Elapsed 0.024 ms (2.431 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.87% +0.91%] index_fill_ skip64 : Elapsed 0.024 ms (2.414 ms / 100) 2.420 -> 2.421 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.04% +0.70% +0.87%] index_fill_ skip256 : Elapsed 0.024 ms (2.423 ms / 100) 2.423 -> 2.424 ( +0.04%) [ +0.00% +0.29% +0.04% / +0.04% +0.78% +0.95%] index_fill_ spread : Elapsed 0.024 ms (2.423 ms / 100) 2.424 -> 2.422 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.87% +0.70%] index_fill_ strided 3 : Elapsed 0.024 ms (2.424 ms / 100) 2.433 -> 2.433 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.74% +0.58%] index_fill_ random : Elapsed 0.024 ms (2.433 ms / 100) 2.432 -> 2.432 ( +0.00%) [ +0.12% +0.00% +0.04% / +0.00% +0.66% +0.58%] index_fill_ random_sorted : Elapsed 0.024 ms (2.435 ms / 100) B = [4, 40, 20, 5] (stride (800, 20, 1, 3200)) A = [4, 40, 20, 16] (stride (1, 80, 4, 3200)) dim = 3 1.893 -> 1.898 ( +0.26%) [ +0.26% +0.00% +0.21% / +0.26% +0.53% +0.48%] index_select const : Elapsed 0.019 ms (1.898 ms / 100) 1.889 -> 1.889 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.32% +0.64%] index_select wrap : Elapsed 0.019 ms (1.891 ms / 100) 1.899 -> 1.897 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% +0.16% +0.32%] index_select linear : Elapsed 0.019 ms (1.899 ms / 100) 1.894 -> 1.895 ( +0.05%) [ +0.00% +0.11% +0.00% / +0.05% +0.32% +0.16%] index_select reverse : Elapsed 0.019 ms (1.894 ms / 100) 1.893 -> 1.897 ( +0.21%) [ +0.00% +0.16% +0.26% / +0.21% +0.26% +0.42%] index_select skip64 : Elapsed 0.019 ms (1.893 ms / 100) 1.896 -> 1.897 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.47% +0.37%] index_select skip256 : Elapsed 0.019 ms (1.897 ms / 100) 1.889 -> 1.889 ( +0.00%) [ +0.00% +0.05% +0.11% / +0.00% +0.42% +0.48%] index_select spread : Elapsed 0.019 ms (1.889 ms / 100) 1.894 -> 1.890 ( -0.21%) [ +0.00% +0.16% +0.00% / -0.21% +0.53% +0.63%] index_select strided 3 : Elapsed 0.019 ms (1.894 ms / 100) 1.893 -> 1.898 ( +0.26%) [ +0.05% +0.11% +0.00% / +0.26% +0.42% +0.42%] index_select strided 5 : Elapsed 0.019 ms (1.894 ms / 100) 1.884 -> 1.885 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.37% +0.37%] index_select strided 7 : Elapsed 0.019 ms (1.885 ms / 100) 1.891 -> 1.892 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.69% +0.63%] index_select strided 8 : Elapsed 0.019 ms (1.891 ms / 100) 1.885 -> 1.885 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.27% +0.64%] index_select random : Elapsed 0.019 ms (1.885 ms / 100) 1.889 -> 1.891 ( +0.11%) [ +0.00% +0.00% +0.16% / +0.11% +0.11% +0.21%] index_select random_sorted : Elapsed 0.019 ms (1.889 ms / 100) 1.893 -> 1.892 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.58% +0.32%] index_select perm : Elapsed 0.019 ms (1.894 ms / 100) 1.886 -> 1.885 ( -0.05%) [ +0.00% +0.05% +0.11% / -0.05% +1.06% +0.27%] index_select perm_sorted : Elapsed 0.019 ms (1.886 ms / 100) B = [4, 40, 20, 5] (stride (800, 1, 40, 3200)) A = [4, 40, 20, 16] (stride (12800, 320, 16, 1)) dim = 3 2.161 -> 2.161 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.23% +0.00% +0.19%] index_select const : Elapsed 0.022 ms (2.164 ms / 100) 2.159 -> 2.159 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.00% +0.09%] index_select wrap : Elapsed 0.022 ms (2.159 ms / 100) 2.159 -> 2.160 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.14% +0.14% +0.05%] index_select linear : Elapsed 0.022 ms (2.160 ms / 100) 2.156 -> 2.154 ( -0.09%) [ +0.19% +0.00% +0.14% / -0.09% +0.37% +0.09%] index_select reverse : Elapsed 0.022 ms (2.160 ms / 100) 2.158 -> 2.159 ( +0.05%) [ +0.00% +0.19% +0.09% / +0.32% +0.14% +0.05%] index_select skip64 : Elapsed 0.022 ms (2.158 ms / 100) 2.163 -> 2.159 ( -0.18%) [ +0.09% +0.00% +0.18% / -0.09% +0.00% -0.18%] index_select skip256 : Elapsed 0.022 ms (2.165 ms / 100) 2.170 -> 2.172 ( +0.09%) [ +0.14% +0.18% +0.00% / +0.09% +0.14% +0.09%] index_select spread : Elapsed 0.022 ms (2.173 ms / 100) 2.177 -> 2.173 ( -0.18%) [ +0.14% +0.14% +0.00% / +0.18% -0.09% -0.18%] index_select strided 3 : Elapsed 0.022 ms (2.180 ms / 100) 2.176 -> 2.164 ( -0.55%) [ +0.00% +0.00% +0.05% / -0.14% -0.28% -0.55%] index_select strided 5 : Elapsed 0.022 ms (2.176 ms / 100) 2.170 -> 2.166 ( -0.18%) [ +0.09% +0.00% +0.05% / -0.05% -0.05% -0.18%] index_select strided 7 : Elapsed 0.022 ms (2.172 ms / 100) 2.179 -> 2.174 ( -0.23%) [ +0.14% +0.00% +0.18% / +0.05% -0.18% -0.23%] index_select strided 8 : Elapsed 0.022 ms (2.182 ms / 100) 2.175 -> 2.174 ( -0.05%) [ +0.18% +0.00% +0.23% / +0.18% +0.09% -0.05%] index_select random : Elapsed 0.022 ms (2.179 ms / 100) 2.169 -> 2.172 ( +0.14%) [ +0.09% +0.28% +0.00% / +0.18% +0.14% +0.18%] index_select random_sorted : Elapsed 0.022 ms (2.171 ms / 100) 2.174 -> 2.169 ( -0.23%) [ +0.00% +0.05% +0.05% / -0.09% -0.23% -0.23%] index_select perm : Elapsed 0.022 ms (2.174 ms / 100) 2.171 -> 2.169 ( -0.09%) [ +0.18% +0.00% +0.05% / +0.09% +0.23% -0.09%] index_select perm_sorted : Elapsed 0.022 ms (2.175 ms / 100) B = [4, 40, 20, 5] (stride (40, 1, 160, 3200)) A = [4, 40, 20, 16] (stride (12800, 320, 1, 20)) dim = 3 0.843 -> 0.844 ( +0.12%) [ +0.00% +0.12% +0.24% / +0.12% +0.47% +0.83%] index_select const : Elapsed 0.008 ms (0.843 ms / 100) 0.894 -> 0.869 ( -2.80%) [ +0.78% +0.56% +0.00% / +0.22% -2.57% -2.80%] index_select wrap : Elapsed 0.009 ms (0.901 ms / 100) 0.898 -> 0.867 ( -3.45%) [ +0.00% +0.11% +0.22% / +0.00% -3.45% -3.23%] index_select linear : Elapsed 0.009 ms (0.898 ms / 100) 0.884 -> 0.870 ( -1.58%) [ +0.45% +0.45% +0.00% / +0.34% -1.58% -1.36%] index_select reverse : Elapsed 0.009 ms (0.888 ms / 100) 0.852 -> 0.852 ( +0.00%) [ +0.23% +0.00% +0.59% / +0.00% +0.23% +0.00%] index_select skip64 : Elapsed 0.009 ms (0.854 ms / 100) 0.842 -> 0.849 ( +0.83%) [ +1.31% +0.00% +0.36% / +1.19% +1.07% +0.83%] index_select skip256 : Elapsed 0.009 ms (0.853 ms / 100) 0.895 -> 0.864 ( -3.46%) [ +0.34% +0.00% +0.00% / -0.22% -3.46% -3.35%] index_select spread : Elapsed 0.009 ms (0.898 ms / 100) 0.896 -> 0.855 ( -4.58%) [ +0.89% +0.56% +0.00% / +0.45% -4.58% -4.58%] index_select strided 3 : Elapsed 0.009 ms (0.904 ms / 100) 0.889 -> 0.853 ( -4.05%) [ +1.69% +1.91% +0.00% / +1.12% -3.71% -4.05%] index_select strided 5 : Elapsed 0.009 ms (0.904 ms / 100) good 0.900 -> 0.849 ( -5.67%) [ +0.00% +0.22% +0.78% / +0.33% -5.33% -5.67%] index_select strided 7 : Elapsed 0.009 ms (0.900 ms / 100) 0.849 -> 0.846 ( -0.35%) [ +0.00% +1.06% +0.59% / +1.18% +0.00% -0.35%] index_select strided 8 : Elapsed 0.008 ms (0.849 ms / 100) 0.894 -> 0.853 ( -4.59%) [ +0.11% +0.00% +0.00% / +0.34% -4.47% -4.59%] index_select random : Elapsed 0.009 ms (0.895 ms / 100) 0.892 -> 0.861 ( -3.48%) [ +0.22% +0.00% +0.00% / +0.00% -3.48% -3.48%] index_select random_sorted : Elapsed 0.009 ms (0.894 ms / 100) 0.890 -> 0.862 ( -3.15%) [ +0.79% +0.00% +0.45% / +0.11% -2.81% -3.15%] index_select perm : Elapsed 0.009 ms (0.897 ms / 100) 0.892 -> 0.862 ( -3.36%) [ +0.00% +0.34% +0.22% / +0.45% -3.14% -3.36%] index_select perm_sorted : Elapsed 0.009 ms (0.892 ms / 100) out_shape = [5, 4, 20, 40] in_shape = [16, 4, 20, 40] idx_dim = 0 B = [5, 4, 20, 40] (stride (3200, 1, 160, 4)) A = [16, 4, 20, 40] (stride (3200, 40, 160, 1)) dim = 0 2.096 -> 2.097 ( +0.05%) [ +0.14% +0.33% +0.00% / +0.05% +0.52% +0.57%] index_select const : Elapsed 0.021 ms (2.099 ms / 100) 2.163 -> 2.162 ( -0.05%) [ +0.32% +0.09% +0.00% / +0.05% +0.18% -0.05%] index_select wrap : Elapsed 0.022 ms (2.170 ms / 100) 2.168 -> 2.169 ( +0.05%) [ +0.42% +0.37% +0.00% / +0.05% +0.46% +0.42%] index_select linear : Elapsed 0.022 ms (2.177 ms / 100) 2.163 -> 2.163 ( +0.00%) [ +0.32% +0.14% +0.00% / +0.00% +0.32% +0.28%] index_select reverse : Elapsed 0.022 ms (2.170 ms / 100) 2.096 -> 2.104 ( +0.38%) [ +0.29% +0.00% +0.19% / +0.38% +0.86% +0.76%] index_select skip64 : Elapsed 0.021 ms (2.102 ms / 100) 2.093 -> 2.094 ( +0.05%) [ +0.14% +0.00% +0.38% / +0.05% +1.10% +0.29%] index_select skip256 : Elapsed 0.021 ms (2.096 ms / 100) 2.172 -> 2.169 ( -0.14%) [ +0.05% +0.09% +0.00% / +0.14% -0.14% +0.00%] index_select spread : Elapsed 0.022 ms (2.173 ms / 100) 2.175 -> 2.177 ( +0.09%) [ +0.14% +0.00% +0.18% / +0.09% +0.28% +0.28%] index_select strided 3 : Elapsed 0.022 ms (2.178 ms / 100) 2.156 -> 2.155 ( -0.05%) [ +0.19% +0.19% +0.00% / -0.05% +0.51% +0.28%] index_select strided 5 : Elapsed 0.022 ms (2.160 ms / 100) 2.171 -> 2.180 ( +0.41%) [ +0.00% +0.51% +0.51% / +0.41% +0.55% +0.78%] index_select strided 7 : Elapsed 0.022 ms (2.171 ms / 100) 2.107 -> 2.114 ( +0.33%) [ +0.19% +0.14% +0.00% / +0.33% +0.76% +0.66%] index_select strided 8 : Elapsed 0.021 ms (2.111 ms / 100) 2.143 -> 2.146 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.89% +0.79%] index_select random : Elapsed 0.021 ms (2.146 ms / 100) 2.160 -> 2.168 ( +0.37%) [ +0.23% +0.23% +0.00% / +0.37% +0.56% +0.51%] index_select random_sorted : Elapsed 0.022 ms (2.165 ms / 100) 2.157 -> 2.156 ( -0.05%) [ +0.19% +0.00% +0.28% / -0.05% +1.16% +0.70%] index_select perm : Elapsed 0.022 ms (2.161 ms / 100) 2.150 -> 2.156 ( +0.28%) [ +0.00% +0.19% +0.28% / +0.28% +0.65% +0.56%] index_select perm_sorted : Elapsed 0.021 ms (2.150 ms / 100) B = [5, 4, 20, 40] (stride (1, 4000, 200, 5)) A = [16, 4, 20, 40] (stride (1, 16, 64, 1280)) dim = 0 2.204 -> 2.207 ( +0.14%) [ +0.14% +0.59% +0.00% / +0.18% +0.36% +0.14%] index_select const : Elapsed 0.022 ms (2.207 ms / 100) 2.208 -> 2.204 ( -0.18%) [ +0.32% +0.05% +0.00% / +0.27% -0.18% +0.23%] index_select wrap : Elapsed 0.022 ms (2.215 ms / 100) 2.211 -> 2.204 ( -0.32%) [ +0.09% +0.00% +0.18% / +0.14% -0.23% -0.32%] index_select linear : Elapsed 0.022 ms (2.213 ms / 100) 2.208 -> 2.205 ( -0.14%) [ +0.18% +0.00% +0.18% / -0.09% -0.14% +0.00%] index_select reverse : Elapsed 0.022 ms (2.212 ms / 100) 2.210 -> 2.207 ( -0.14%) [ +0.00% +0.14% +0.00% / -0.14% +0.32% -0.14%] index_select skip64 : Elapsed 0.022 ms (2.210 ms / 100) 2.205 -> 2.210 ( +0.23%) [ +0.00% +0.14% +0.27% / +0.23% +0.27% +0.41%] index_select skip256 : Elapsed 0.022 ms (2.205 ms / 100) 2.233 -> 2.228 ( -0.22%) [ +0.04% +0.00% +0.09% / +0.36% -0.22% -0.22%] index_select spread : Elapsed 0.022 ms (2.234 ms / 100) 2.237 -> 2.223 ( -0.63%) [ +0.04% +0.00% +0.04% / -0.40% -0.31% -0.63%] index_select strided 3 : Elapsed 0.022 ms (2.238 ms / 100) 2.233 -> 2.227 ( -0.27%) [ +0.09% +0.00% +0.22% / +0.09% -0.27% -0.27%] index_select strided 5 : Elapsed 0.022 ms (2.235 ms / 100) 2.235 -> 2.231 ( -0.18%) [ +0.00% +0.27% +0.04% / +0.18% -0.13% -0.18%] index_select strided 7 : Elapsed 0.022 ms (2.235 ms / 100) 2.236 -> 2.227 ( -0.40%) [ +0.13% +0.04% +0.00% / +0.04% -0.40% +0.00%] index_select strided 8 : Elapsed 0.022 ms (2.239 ms / 100) 2.237 -> 2.229 ( -0.36%) [ +0.13% +0.04% +0.00% / +0.09% -0.36% -0.13%] index_select random : Elapsed 0.022 ms (2.240 ms / 100) 2.232 -> 2.228 ( -0.18%) [ +0.22% +0.00% +0.00% / +0.18% -0.18% +0.27%] index_select random_sorted : Elapsed 0.022 ms (2.237 ms / 100) 2.236 -> 2.234 ( -0.09%) [ +0.09% +0.27% +0.00% / +0.22% -0.09% -0.09%] index_select perm : Elapsed 0.022 ms (2.238 ms / 100) 2.230 -> 2.227 ( -0.13%) [ +0.31% +0.00% +0.18% / +0.13% +0.40% -0.13%] index_select perm_sorted : Elapsed 0.022 ms (2.237 ms / 100) out_shape = [16, 5, 20, 40] in_shape = [16, 4, 20, 40] idx_dim = 1 B = [16, 5, 20, 40] (stride (4000, 800, 1, 20)) A = [16, 4, 20, 40] (stride (160, 40, 2560, 1)) dim = 1 5.942 -> 5.931 ( -0.19%) [ +0.00% +0.02% +0.02% / -0.07% -0.15% -0.19%] index_add_ linear : Elapsed 0.059 ms (5.942 ms / 100) 5.834 -> 5.828 ( -0.10%) [ +0.02% +0.05% +0.00% / +0.19% -0.07% -0.10%] index_copy_ linear : Elapsed 0.058 ms (5.835 ms / 100) 5.906 -> 5.902 ( -0.07%) [ +0.02% +0.00% +0.03% / +0.07% -0.07% +0.03%] index_add_ reverse : Elapsed 0.059 ms (5.907 ms / 100) 5.813 -> 5.814 ( +0.02%) [ +0.00% +0.10% +0.15% / +0.09% +0.02% +0.09%] index_copy_ reverse : Elapsed 0.058 ms (5.813 ms / 100) 5.932 -> 5.919 ( -0.22%) [ +0.00% +0.02% +0.10% / +0.17% -0.22% -0.17%] index_add_ spread : Elapsed 0.059 ms (5.932 ms / 100) 5.826 -> 5.817 ( -0.15%) [ +0.09% +0.00% +0.12% / +0.09% -0.15% -0.15%] index_copy_ spread : Elapsed 0.058 ms (5.831 ms / 100) 5.916 -> 5.887 ( -0.49%) [ +0.05% +0.00% +0.15% / +0.10% -0.49% -0.41%] index_add_ strided 3 : Elapsed 0.059 ms (5.919 ms / 100) 5.809 -> 5.792 ( -0.29%) [ +0.19% +0.00% +0.21% / +0.24% -0.26% -0.29%] index_copy_ strided 3 : Elapsed 0.058 ms (5.820 ms / 100) 5.886 -> 5.881 ( -0.08%) [ +0.12% +0.00% +0.10% / -0.08% -0.07% +0.03%] index_add_ perm : Elapsed 0.059 ms (5.893 ms / 100) 5.810 -> 5.809 ( -0.02%) [ +0.00% +0.05% +0.05% / +0.05% +0.05% -0.02%] index_copy_ perm : Elapsed 0.058 ms (5.810 ms / 100) 5.893 -> 5.880 ( -0.22%) [ +0.00% +0.05% +0.05% / +0.19% -0.05% -0.22%] index_add_ perm_sorted : Elapsed 0.059 ms (5.893 ms / 100) 5.811 -> 5.807 ( -0.07%) [ +0.02% +0.21% +0.00% / +0.24% -0.07% -0.07%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.812 ms / 100) 6.123 -> 6.102 ( -0.34%) [ +0.05% +0.00% +0.02% / +0.11% -0.34% -0.20%] index_select const : Elapsed 0.061 ms (6.126 ms / 100) 6.210 -> 6.193 ( -0.27%) [ +0.06% +0.00% +0.03% / +0.06% -0.16% -0.27%] index_select wrap : Elapsed 0.062 ms (6.214 ms / 100) 6.189 -> 6.175 ( -0.23%) [ +0.00% +0.10% +0.08% / +0.05% -0.18% -0.23%] index_select linear : Elapsed 0.062 ms (6.189 ms / 100) 6.203 -> 6.170 ( -0.53%) [ +0.00% +0.08% +0.06% / -0.03% -0.35% -0.53%] index_select reverse : Elapsed 0.062 ms (6.203 ms / 100) 6.109 -> 6.091 ( -0.29%) [ +0.00% +0.13% +0.07% / +0.16% -0.25% -0.29%] index_select skip64 : Elapsed 0.061 ms (6.109 ms / 100) 6.122 -> 6.105 ( -0.28%) [ +0.00% +0.03% +0.16% / +0.05% -0.28% -0.20%] index_select skip256 : Elapsed 0.061 ms (6.122 ms / 100) 6.201 -> 6.179 ( -0.35%) [ +0.00% +0.03% +0.00% / +0.13% -0.26% -0.35%] index_select spread : Elapsed 0.062 ms (6.201 ms / 100) 6.201 -> 6.180 ( -0.34%) [ +0.00% +0.05% +0.00% / +0.06% -0.32% -0.34%] index_select strided 3 : Elapsed 0.062 ms (6.201 ms / 100) 6.197 -> 6.181 ( -0.26%) [ +0.08% +0.00% +0.08% / +0.10% -0.26% -0.06%] index_select random : Elapsed 0.062 ms (6.202 ms / 100) 6.212 -> 6.191 ( -0.34%) [ +0.00% +0.00% +0.06% / +0.19% -0.31% -0.34%] index_select random_sorted : Elapsed 0.062 ms (6.212 ms / 100) B = [16, 5, 20, 40] (stride (800, 12800, 1, 20)) A = [16, 4, 20, 40] (stride (80, 20, 1, 1280)) dim = 1 5.890 -> 5.883 ( -0.12%) [ +0.00% +0.03% +0.22% / +0.31% +0.07% -0.12%] index_add_ linear : Elapsed 0.059 ms (5.890 ms / 100) 5.862 -> 5.854 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.02% -0.14% -0.10%] index_copy_ linear : Elapsed 0.059 ms (5.862 ms / 100) 5.879 -> 5.885 ( +0.10%) [ +0.27% +0.00% +0.22% / +0.22% +0.24% +0.10%] index_add_ reverse : Elapsed 0.059 ms (5.895 ms / 100) 5.852 -> 5.852 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.00% +0.05% +0.09%] index_copy_ reverse : Elapsed 0.059 ms (5.852 ms / 100) 5.897 -> 5.889 ( -0.14%) [ +0.00% +0.05% +0.07% / +0.03% -0.14% -0.10%] index_add_ spread : Elapsed 0.059 ms (5.897 ms / 100) 5.852 -> 5.850 ( -0.03%) [ +0.10% +0.00% +0.31% / +0.21% -0.03% -0.02%] index_copy_ spread : Elapsed 0.059 ms (5.858 ms / 100) 5.887 -> 5.898 ( +0.19%) [ +0.00% +0.15% +0.34% / +0.19% +0.36% +0.42%] index_add_ strided 3 : Elapsed 0.059 ms (5.887 ms / 100) 5.853 -> 5.871 ( +0.31%) [ +0.07% +0.00% +0.07% / +0.31% +0.32% +0.44%] index_copy_ strided 3 : Elapsed 0.059 ms (5.857 ms / 100) 5.893 -> 5.903 ( +0.17%) [ +0.02% +0.00% +0.03% / +0.29% +0.17% +0.24%] index_add_ perm : Elapsed 0.059 ms (5.894 ms / 100) 5.857 -> 5.867 ( +0.17%) [ +0.00% +0.00% +0.19% / +0.17% +0.29% +0.22%] index_copy_ perm : Elapsed 0.059 ms (5.857 ms / 100) 5.893 -> 5.898 ( +0.08%) [ +0.00% +0.03% +0.17% / +0.20% +0.27% +0.08%] index_add_ perm_sorted : Elapsed 0.059 ms (5.893 ms / 100) 5.855 -> 5.866 ( +0.19%) [ +0.00% +0.03% +0.24% / +0.19% +0.19% +0.22%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.855 ms / 100) 6.188 -> 6.194 ( +0.10%) [ +0.00% +0.05% +0.08% / +0.11% +0.16% +0.10%] index_select const : Elapsed 0.062 ms (6.188 ms / 100) 6.251 -> 6.252 ( +0.02%) [ +0.11% +0.00% +0.27% / +0.34% +0.02% +0.18%] index_select wrap : Elapsed 0.063 ms (6.258 ms / 100) 6.253 -> 6.242 ( -0.18%) [ +0.00% +0.05% +0.10% / +0.10% -0.11% -0.18%] index_select linear : Elapsed 0.063 ms (6.253 ms / 100) 6.246 -> 6.245 ( -0.02%) [ +0.13% +0.00% +0.21% / +0.00% +0.10% -0.02%] index_select reverse : Elapsed 0.063 ms (6.254 ms / 100) 6.187 -> 6.193 ( +0.10%) [ +0.06% +0.00% +0.11% / +0.10% +0.15% +0.11%] index_select skip64 : Elapsed 0.062 ms (6.191 ms / 100) 6.187 -> 6.192 ( +0.08%) [ +0.02% +0.00% +0.27% / +0.13% +0.18% +0.08%] index_select skip256 : Elapsed 0.062 ms (6.188 ms / 100) 6.245 -> 6.247 ( +0.03%) [ +0.00% +0.10% +0.29% / +0.29% +0.14% +0.03%] index_select spread : Elapsed 0.062 ms (6.245 ms / 100) 6.253 -> 6.248 ( -0.08%) [ +0.03% +0.00% +0.21% / +0.08% -0.08% -0.08%] index_select strided 3 : Elapsed 0.063 ms (6.255 ms / 100) 6.243 -> 6.235 ( -0.13%) [ +0.10% +0.08% +0.00% / +0.08% +0.05% -0.13%] index_select random : Elapsed 0.062 ms (6.249 ms / 100) 6.232 -> 6.228 ( -0.06%) [ +0.00% +0.05% +0.18% / +0.08% +0.00% -0.06%] index_select random_sorted : Elapsed 0.062 ms (6.232 ms / 100) B = [16, 5, 20, 40] (stride (1, 12800, 640, 16)) A = [16, 4, 20, 40] (stride (3200, 40, 160, 1)) dim = 1 5.596 -> 5.577 ( -0.34%) [ +0.05% +0.05% +0.00% / +0.07% -0.34% -0.32%] index_add_ linear : Elapsed 0.056 ms (5.599 ms / 100) 5.543 -> 5.515 ( -0.51%) [ +0.04% +0.02% +0.00% / +0.16% -0.51% -0.34%] index_copy_ linear : Elapsed 0.055 ms (5.545 ms / 100) 5.581 -> 5.573 ( -0.14%) [ +0.36% +0.00% +0.20% / +0.29% -0.14% -0.13%] index_add_ reverse : Elapsed 0.056 ms (5.601 ms / 100) 5.537 -> 5.520 ( -0.31%) [ +0.11% +0.13% +0.00% / +0.04% -0.31% -0.23%] index_copy_ reverse : Elapsed 0.055 ms (5.543 ms / 100) 5.581 -> 5.563 ( -0.32%) [ +0.05% +0.00% +0.20% / +0.27% -0.22% -0.32%] index_add_ spread : Elapsed 0.056 ms (5.584 ms / 100) 5.534 -> 5.519 ( -0.27%) [ +0.05% +0.00% +0.14% / +0.14% -0.13% -0.27%] index_copy_ spread : Elapsed 0.055 ms (5.537 ms / 100) 5.560 -> 5.566 ( +0.11%) [ +0.14% +0.00% +0.13% / +0.34% +0.11% +0.25%] index_add_ strided 3 : Elapsed 0.056 ms (5.568 ms / 100) 5.513 -> 5.515 ( +0.04%) [ +0.13% +0.00% +0.11% / +0.15% +0.13% +0.04%] index_copy_ strided 3 : Elapsed 0.055 ms (5.520 ms / 100) 5.558 -> 5.561 ( +0.05%) [ +0.09% +0.00% +0.32% / +0.16% +0.05% +0.09%] index_add_ perm : Elapsed 0.056 ms (5.563 ms / 100) 5.515 -> 5.518 ( +0.05%) [ +0.11% +0.00% +0.29% / +0.29% +0.09% +0.05%] index_copy_ perm : Elapsed 0.055 ms (5.521 ms / 100) 5.572 -> 5.560 ( -0.22%) [ +0.00% +0.13% +0.05% / +0.07% -0.18% -0.22%] index_add_ perm_sorted : Elapsed 0.056 ms (5.572 ms / 100) 5.516 -> 5.511 ( -0.09%) [ +0.18% +0.00% +0.13% / +0.16% -0.09% +0.11%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.526 ms / 100) 5.769 -> 5.757 ( -0.21%) [ +0.09% +0.00% +0.07% / -0.02% -0.21% -0.17%] index_select const : Elapsed 0.058 ms (5.774 ms / 100) 5.873 -> 5.830 ( -0.73%) [ +0.00% +0.03% +0.03% / +0.05% -0.73% -0.72%] index_select wrap : Elapsed 0.059 ms (5.873 ms / 100) 5.838 -> 5.818 ( -0.34%) [ +0.03% +0.02% +0.00% / +0.09% -0.34% -0.34%] index_select linear : Elapsed 0.058 ms (5.840 ms / 100) 5.845 -> 5.821 ( -0.41%) [ +0.00% +0.02% +0.05% / +0.03% -0.41% -0.38%] index_select reverse : Elapsed 0.058 ms (5.845 ms / 100) 5.750 -> 5.736 ( -0.24%) [ +0.09% +0.00% +0.23% / +0.17% -0.24% -0.24%] index_select skip64 : Elapsed 0.058 ms (5.755 ms / 100) 5.768 -> 5.747 ( -0.36%) [ +0.00% +0.14% +0.07% / +0.03% -0.29% -0.36%] index_select skip256 : Elapsed 0.058 ms (5.768 ms / 100) 5.846 -> 5.823 ( -0.39%) [ +0.02% +0.00% +0.12% / +0.27% -0.39% -0.26%] index_select spread : Elapsed 0.058 ms (5.847 ms / 100) 5.853 -> 5.830 ( -0.39%) [ +0.09% +0.00% +0.07% / +0.24% -0.39% -0.34%] index_select strided 3 : Elapsed 0.059 ms (5.858 ms / 100) 5.787 -> 5.773 ( -0.24%) [ +0.14% +0.00% +0.19% / +0.05% -0.21% -0.24%] index_select random : Elapsed 0.058 ms (5.795 ms / 100) 5.803 -> 5.769 ( -0.59%) [ +0.07% +0.00% +0.12% / +0.09% -0.50% -0.59%] index_select random_sorted : Elapsed 0.058 ms (5.807 ms / 100) B = [16, 5, 20, 40] (stride (100, 20, 1, 1600)) A = [16, 4, 20, 40] (stride (40, 12800, 640, 1)) dim = 1 5.944 -> 5.944 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.22% +0.00% +0.17%] index_add_ linear : Elapsed 0.059 ms (5.947 ms / 100) 5.845 -> 5.838 ( -0.12%) [ +0.00% +0.02% +0.07% / +0.07% -0.05% -0.12%] index_copy_ linear : Elapsed 0.058 ms (5.845 ms / 100) 5.946 -> 5.953 ( +0.12%) [ +0.00% +0.10% +0.12% / +0.20% +0.12% +0.13%] index_add_ reverse : Elapsed 0.059 ms (5.946 ms / 100) 5.847 -> 5.847 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.09% +0.03% +0.00%] index_copy_ reverse : Elapsed 0.058 ms (5.847 ms / 100) 5.944 -> 5.953 ( +0.15%) [ +0.00% +0.08% +0.22% / +0.25% +0.15% +0.15%] index_add_ spread : Elapsed 0.059 ms (5.944 ms / 100) 5.836 -> 5.838 ( +0.03%) [ +0.09% +0.00% +0.19% / +0.31% +0.03% +0.03%] index_copy_ spread : Elapsed 0.058 ms (5.841 ms / 100) 5.958 -> 5.965 ( +0.12%) [ +0.00% +0.08% +0.18% / +0.27% +0.20% +0.12%] index_add_ strided 3 : Elapsed 0.060 ms (5.958 ms / 100) 5.856 -> 5.855 ( -0.02%) [ +0.20% +0.00% +0.03% / +0.19% -0.02% +0.07%] index_copy_ strided 3 : Elapsed 0.059 ms (5.868 ms / 100) 5.958 -> 5.961 ( +0.05%) [ +0.00% +0.20% +0.13% / +0.05% +0.30% +0.29%] index_add_ perm : Elapsed 0.060 ms (5.958 ms / 100) 5.855 -> 5.863 ( +0.14%) [ +0.20% +0.00% +0.12% / +0.29% +0.22% +0.14%] index_copy_ perm : Elapsed 0.059 ms (5.867 ms / 100) 5.940 -> 5.947 ( +0.12%) [ +0.15% +0.00% +0.22% / +0.12% +0.15% +0.17%] index_add_ perm_sorted : Elapsed 0.059 ms (5.949 ms / 100) 5.845 -> 5.841 ( -0.07%) [ +0.03% +0.00% +0.05% / +0.10% -0.07% -0.05%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.847 ms / 100) 6.114 -> 6.115 ( +0.02%) [ +0.07% +0.00% +0.08% / +0.21% +0.29% +0.02%] index_select const : Elapsed 0.061 ms (6.118 ms / 100) 6.209 -> 6.209 ( +0.00%) [ +0.00% +0.13% +0.08% / +0.18% +0.05% +0.00%] index_select wrap : Elapsed 0.062 ms (6.209 ms / 100) 6.192 -> 6.189 ( -0.05%) [ +0.00% +0.00% +0.15% / +0.10% -0.05% +0.06%] index_select linear : Elapsed 0.062 ms (6.192 ms / 100) 6.201 -> 6.204 ( +0.05%) [ +0.11% +0.00% +0.02% / +0.05% +0.16% +0.21%] index_select reverse : Elapsed 0.062 ms (6.208 ms / 100) 6.136 -> 6.134 ( -0.03%) [ +0.00% +0.02% +0.15% / +0.16% +0.13% -0.03%] index_select skip64 : Elapsed 0.061 ms (6.136 ms / 100) 6.117 -> 6.121 ( +0.07%) [ +0.00% +0.03% +0.03% / +0.07% +0.11% +0.07%] index_select skip256 : Elapsed 0.061 ms (6.117 ms / 100) 6.221 -> 6.225 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.21% +0.08% +0.06%] index_select spread : Elapsed 0.062 ms (6.221 ms / 100) 6.208 -> 6.213 ( +0.08%) [ +0.08% +0.00% +0.11% / +0.13% +0.08% +0.10%] index_select strided 3 : Elapsed 0.062 ms (6.213 ms / 100) 6.185 -> 6.176 ( -0.15%) [ +0.03% +0.00% +0.03% / +0.11% -0.15% -0.05%] index_select random : Elapsed 0.062 ms (6.187 ms / 100) 6.166 -> 6.167 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.11% +0.02% +0.08%] index_select random_sorted : Elapsed 0.062 ms (6.169 ms / 100) B = [16, 5, 20, 40] (stride (100, 1, 5, 1600)) A = [16, 4, 20, 40] (stride (3200, 1, 160, 4)) dim = 1 5.909 -> 5.889 ( -0.34%) [ +0.05% +0.00% +0.19% / +0.08% -0.34% -0.29%] index_add_ linear : Elapsed 0.059 ms (5.912 ms / 100) 5.859 -> 5.844 ( -0.26%) [ +0.14% +0.00% +0.26% / +0.15% -0.24% -0.26%] index_copy_ linear : Elapsed 0.059 ms (5.867 ms / 100) 5.910 -> 5.890 ( -0.34%) [ +0.00% +0.10% +0.08% / +0.10% -0.27% -0.34%] index_add_ reverse : Elapsed 0.059 ms (5.910 ms / 100) 5.861 -> 5.838 ( -0.39%) [ +0.09% +0.00% +0.10% / +0.05% -0.39% -0.34%] index_copy_ reverse : Elapsed 0.059 ms (5.866 ms / 100) 5.900 -> 5.880 ( -0.34%) [ +0.00% +0.07% +0.12% / +0.31% -0.34% -0.34%] index_add_ spread : Elapsed 0.059 ms (5.900 ms / 100) 5.849 -> 5.827 ( -0.38%) [ +0.05% +0.00% +0.12% / +0.14% -0.38% -0.38%] index_copy_ spread : Elapsed 0.059 ms (5.852 ms / 100) 5.902 -> 5.882 ( -0.34%) [ +0.05% +0.00% +0.07% / +0.14% -0.32% -0.34%] index_add_ strided 3 : Elapsed 0.059 ms (5.905 ms / 100) 5.854 -> 5.826 ( -0.48%) [ +0.02% +0.00% +0.05% / +0.03% -0.39% -0.48%] index_copy_ strided 3 : Elapsed 0.059 ms (5.855 ms / 100) 5.905 -> 5.875 ( -0.51%) [ +0.12% +0.05% +0.00% / +0.02% -0.51% -0.46%] index_add_ perm : Elapsed 0.059 ms (5.912 ms / 100) 5.854 -> 5.827 ( -0.46%) [ +0.00% +0.02% +0.12% / +0.17% -0.44% -0.46%] index_copy_ perm : Elapsed 0.059 ms (5.854 ms / 100) 5.900 -> 5.877 ( -0.39%) [ +0.08% +0.00% +0.10% / +0.08% -0.39% -0.37%] index_add_ perm_sorted : Elapsed 0.059 ms (5.905 ms / 100) 5.855 -> 5.824 ( -0.53%) [ +0.03% +0.00% +0.03% / +0.12% -0.50% -0.53%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.857 ms / 100) 6.115 -> 6.096 ( -0.31%) [ +0.18% +0.00% +0.25% / +0.29% -0.31% -0.23%] index_select const : Elapsed 0.061 ms (6.126 ms / 100) 6.127 -> 6.094 ( -0.54%) [ +0.02% +0.00% +0.00% / -0.07% -0.54% -0.54%] index_select wrap : Elapsed 0.061 ms (6.128 ms / 100) 6.110 -> 6.076 ( -0.56%) [ +0.10% +0.05% +0.00% / +0.10% -0.52% -0.56%] index_select linear : Elapsed 0.061 ms (6.116 ms / 100) 6.109 -> 6.073 ( -0.59%) [ +0.08% +0.00% +0.15% / +0.18% -0.57% -0.59%] index_select reverse : Elapsed 0.061 ms (6.114 ms / 100) 6.109 -> 6.072 ( -0.61%) [ +0.00% +0.02% +0.08% / +0.13% -0.61% -0.52%] index_select skip64 : Elapsed 0.061 ms (6.109 ms / 100) 6.120 -> 6.093 ( -0.44%) [ +0.10% +0.00% +0.16% / +0.16% -0.44% -0.38%] index_select skip256 : Elapsed 0.061 ms (6.126 ms / 100) 6.109 -> 6.074 ( -0.57%) [ +0.00% +0.00% +0.07% / +0.10% -0.51% -0.57%] index_select spread : Elapsed 0.061 ms (6.109 ms / 100) 6.109 -> 6.077 ( -0.52%) [ +0.07% +0.00% +0.05% / +0.05% -0.49% -0.52%] index_select strided 3 : Elapsed 0.061 ms (6.113 ms / 100) 6.106 -> 6.069 ( -0.61%) [ +0.02% +0.00% +0.07% / +0.16% -0.61% -0.51%] index_select random : Elapsed 0.061 ms (6.107 ms / 100) 6.124 -> 6.092 ( -0.52%) [ +0.00% +0.11% +0.11% / +0.11% -0.52% -0.52%] index_select random_sorted : Elapsed 0.061 ms (6.124 ms / 100) B = [16, 5, 20, 40] (stride (1, 16, 80, 1600)) A = [16, 4, 20, 40] (stride (4, 1, 64, 1280)) dim = 1 6.060 -> 6.072 ( +0.20%) [ +0.00% +0.02% +0.13% / +0.20% +0.20% +0.28%] index_add_ linear : Elapsed 0.061 ms (6.060 ms / 100) 5.964 -> 5.972 ( +0.13%) [ +0.10% +0.00% +0.17% / +0.34% +0.13% +0.22%] index_copy_ linear : Elapsed 0.060 ms (5.970 ms / 100) 6.055 -> 6.070 ( +0.25%) [ +0.18% +0.00% +0.18% / +0.25% +0.26% +0.26%] index_add_ reverse : Elapsed 0.061 ms (6.066 ms / 100) 5.963 -> 5.972 ( +0.15%) [ +0.00% +0.07% +0.20% / +0.18% +0.27% +0.15%] index_copy_ reverse : Elapsed 0.060 ms (5.963 ms / 100) 6.061 -> 6.073 ( +0.20%) [ +0.00% +0.03% +0.18% / +0.20% +0.31% +0.20%] index_add_ spread : Elapsed 0.061 ms (6.061 ms / 100) 5.967 -> 5.963 ( -0.07%) [ +0.00% +0.05% +0.12% / +0.27% -0.07% +0.08%] index_copy_ spread : Elapsed 0.060 ms (5.967 ms / 100) 6.092 -> 6.090 ( -0.03%) [ +0.03% +0.00% +0.13% / +0.05% -0.03% +0.20%] index_add_ strided 3 : Elapsed 0.061 ms (6.094 ms / 100) 5.999 -> 5.992 ( -0.12%) [ +0.00% +0.07% +0.08% / +0.03% -0.12% -0.12%] index_copy_ strided 3 : Elapsed 0.060 ms (5.999 ms / 100) 6.070 -> 6.080 ( +0.16%) [ +0.12% +0.00% +0.13% / +0.18% +0.21% +0.16%] index_add_ perm : Elapsed 0.061 ms (6.077 ms / 100) 5.988 -> 5.972 ( -0.27%) [ +0.03% +0.00% +0.13% / +0.18% -0.27% -0.22%] index_copy_ perm : Elapsed 0.060 ms (5.990 ms / 100) 6.063 -> 6.075 ( +0.20%) [ +0.00% +0.12% +0.18% / +0.20% +0.36% +0.28%] index_add_ perm_sorted : Elapsed 0.061 ms (6.063 ms / 100) 5.974 -> 5.977 ( +0.05%) [ +0.08% +0.00% +0.22% / +0.27% +0.05% +0.12%] index_copy_ perm_sorted : Elapsed 0.060 ms (5.979 ms / 100) 6.388 -> 6.392 ( +0.06%) [ +0.03% +0.00% +0.02% / +0.14% +0.06% +0.06%] index_select const : Elapsed 0.064 ms (6.390 ms / 100) 6.389 -> 6.387 ( -0.03%) [ +0.02% +0.00% +0.02% / +0.11% +0.13% -0.03%] index_select wrap : Elapsed 0.064 ms (6.390 ms / 100) 6.386 -> 6.381 ( -0.08%) [ +0.00% +0.08% +0.23% / +0.13% -0.08% +0.06%] index_select linear : Elapsed 0.064 ms (6.386 ms / 100) 6.385 -> 6.385 ( +0.00%) [ +0.03% +0.00% +0.06% / +0.25% +0.00% +0.06%] index_select reverse : Elapsed 0.064 ms (6.387 ms / 100) 6.385 -> 6.387 ( +0.03%) [ +0.14% +0.00% +0.27% / +0.25% +0.03% +0.06%] index_select skip64 : Elapsed 0.064 ms (6.394 ms / 100) 6.382 -> 6.389 ( +0.11%) [ +0.13% +0.00% +0.24% / +0.14% +0.11% +0.17%] index_select skip256 : Elapsed 0.064 ms (6.390 ms / 100) 6.379 -> 6.390 ( +0.17%) [ +0.13% +0.00% +0.36% / +0.30% +0.17% +0.24%] index_select spread : Elapsed 0.064 ms (6.387 ms / 100) 6.390 -> 6.388 ( -0.03%) [ +0.02% +0.00% +0.14% / +0.20% -0.03% +0.14%] index_select strided 3 : Elapsed 0.064 ms (6.391 ms / 100) 6.383 -> 6.389 ( +0.09%) [ +0.03% +0.00% +0.09% / +0.19% +0.11% +0.09%] index_select random : Elapsed 0.064 ms (6.385 ms / 100) 6.377 -> 6.390 ( +0.20%) [ +0.00% +0.09% +0.17% / +0.28% +0.20% +0.24%] index_select random_sorted : Elapsed 0.064 ms (6.377 ms / 100) out_shape = [16, 4, 5, 40] in_shape = [16, 4, 20, 40] idx_dim = 2 B = [16, 4, 5, 40] (stride (800, 40, 160, 1)) A = [16, 4, 20, 40] (stride (20, 320, 1, 1280)) dim = 2 1.913 -> 1.915 ( +0.10%) [ +0.00% +0.00% +0.16% / +0.10% +0.42% +0.21%] index_select const : Elapsed 0.019 ms (1.913 ms / 100) 1.921 -> 1.918 ( -0.16%) [ +0.26% +0.00% +0.10% / -0.16% +0.36% +0.57%] index_select wrap : Elapsed 0.019 ms (1.926 ms / 100) 1.921 -> 1.919 ( -0.10%) [ +0.21% +0.00% +0.26% / -0.10% +0.36% +0.42%] index_select linear : Elapsed 0.019 ms (1.925 ms / 100) 1.926 -> 1.930 ( +0.21%) [ +0.16% +0.00% +0.05% / +0.21% +0.57% +0.36%] index_select reverse : Elapsed 0.019 ms (1.929 ms / 100) 1.911 -> 1.912 ( +0.05%) [ +0.16% +0.00% +0.21% / +0.05% +0.37% +0.58%] index_select skip64 : Elapsed 0.019 ms (1.914 ms / 100) 1.914 -> 1.913 ( -0.05%) [ +0.00% +0.10% +0.10% / -0.05% +0.42% +0.26%] index_select skip256 : Elapsed 0.019 ms (1.914 ms / 100) 1.940 -> 1.945 ( +0.26%) [ +0.00% +0.26% +0.00% / +0.26% +0.36% +0.67%] index_select spread : Elapsed 0.019 ms (1.940 ms / 100) 1.935 -> 1.936 ( +0.05%) [ +0.00% +0.10% +0.21% / +0.05% +0.52% +0.47%] index_select strided 3 : Elapsed 0.019 ms (1.935 ms / 100) 1.939 -> 1.942 ( +0.15%) [ +0.10% +0.00% +0.15% / +0.15% +0.41% +0.36%] index_select strided 5 : Elapsed 0.019 ms (1.941 ms / 100) 1.940 -> 1.945 ( +0.26%) [ +0.26% +0.31% +0.00% / +0.26% +0.77% +0.72%] index_select strided 7 : Elapsed 0.019 ms (1.945 ms / 100) 1.941 -> 1.942 ( +0.05%) [ +0.00% +0.05% +0.10% / +0.05% +0.31% +0.46%] index_select strided 8 : Elapsed 0.019 ms (1.941 ms / 100) 1.938 -> 1.943 ( +0.26%) [ +0.31% +0.00% +0.26% / +0.26% +0.57% +0.67%] index_select strided 16 : Elapsed 0.019 ms (1.944 ms / 100) 1.925 -> 1.929 ( +0.21%) [ +0.26% +0.00% +0.10% / +0.21% +0.62% +0.57%] index_select random : Elapsed 0.019 ms (1.930 ms / 100) 1.930 -> 1.934 ( +0.21%) [ +0.21% +0.05% +0.00% / +0.21% +0.57% +0.73%] index_select random_sorted : Elapsed 0.019 ms (1.934 ms / 100) 1.929 -> 1.928 ( -0.05%) [ +0.26% +0.05% +0.00% / -0.05% +0.31% +0.26%] index_select perm : Elapsed 0.019 ms (1.934 ms / 100) 1.926 -> 1.929 ( +0.16%) [ +0.00% +0.10% +0.00% / +0.16% +0.52% +0.47%] index_select perm_sorted : Elapsed 0.019 ms (1.926 ms / 100) B = [16, 4, 5, 40] (stride (800, 1, 160, 4)) A = [16, 4, 20, 40] (stride (1, 320, 16, 1280)) dim = 2 1.907 -> 1.905 ( -0.10%) [ +0.05% +0.00% +0.10% / +0.00% -0.10% +0.16%] index_select const : Elapsed 0.019 ms (1.908 ms / 100) 1.897 -> 1.897 ( +0.00%) [ +0.37% +0.00% +0.05% / +0.00% +0.32% +0.47%] index_select wrap : Elapsed 0.019 ms (1.904 ms / 100) 1.895 -> 1.899 ( +0.21%) [ +0.21% +0.26% +0.00% / +0.21% +0.42% +0.63%] index_select linear : Elapsed 0.019 ms (1.899 ms / 100) 1.901 -> 1.903 ( +0.11%) [ +0.26% +0.11% +0.00% / +0.11% +0.21% +0.16%] index_select reverse : Elapsed 0.019 ms (1.906 ms / 100) 1.901 -> 1.903 ( +0.11%) [ +0.00% +0.00% +0.16% / +0.11% +0.53% +0.58%] index_select skip64 : Elapsed 0.019 ms (1.901 ms / 100) 1.896 -> 1.902 ( +0.32%) [ +0.16% +0.32% +0.00% / +0.32% +0.32% +0.53%] index_select skip256 : Elapsed 0.019 ms (1.899 ms / 100) 1.894 -> 1.893 ( -0.05%) [ +0.26% +0.05% +0.00% / -0.05% +0.37% +0.00%] index_select spread : Elapsed 0.019 ms (1.899 ms / 100) 1.893 -> 1.894 ( +0.05%) [ +0.11% +0.11% +0.00% / +0.05% +0.32% +0.48%] index_select strided 3 : Elapsed 0.019 ms (1.895 ms / 100) 1.902 -> 1.900 ( -0.11%) [ +0.21% +0.00% +0.32% / -0.11% +0.26% +0.26%] index_select strided 5 : Elapsed 0.019 ms (1.906 ms / 100) 1.901 -> 1.904 ( +0.16%) [ +0.21% +0.11% +0.00% / +0.16% +0.37% +0.26%] index_select strided 7 : Elapsed 0.019 ms (1.905 ms / 100) 1.892 -> 1.893 ( +0.05%) [ +0.16% +0.05% +0.00% / +0.05% +0.53% +0.37%] index_select strided 8 : Elapsed 0.019 ms (1.895 ms / 100) 1.898 -> 1.899 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.32% +0.37%] index_select strided 16 : Elapsed 0.019 ms (1.899 ms / 100) 1.882 -> 1.887 ( +0.27%) [ +0.32% +0.05% +0.00% / +0.27% +0.64% +0.58%] index_select random : Elapsed 0.019 ms (1.888 ms / 100) 1.890 -> 1.890 ( +0.00%) [ +0.16% +0.05% +0.00% / +0.00% +0.32% +0.48%] index_select random_sorted : Elapsed 0.019 ms (1.893 ms / 100) 1.893 -> 1.894 ( +0.05%) [ +0.16% +0.16% +0.00% / +0.05% +0.58% +0.53%] index_select perm : Elapsed 0.019 ms (1.896 ms / 100) 1.889 -> 1.890 ( +0.05%) [ +0.11% +0.00% +0.05% / +0.05% +0.64% +0.53%] index_select perm_sorted : Elapsed 0.019 ms (1.891 ms / 100) B = [16, 4, 5, 40] (stride (800, 1, 4, 20)) A = [16, 4, 20, 40] (stride (80, 20, 1, 1280)) dim = 2 1.819 -> 1.822 ( +0.16%) [ +0.00% +0.05% +0.11% / +0.16% +0.44% +0.49%] index_select const : Elapsed 0.018 ms (1.819 ms / 100) 1.826 -> 1.827 ( +0.05%) [ +0.00% +0.33% +0.05% / +0.05% +0.49% +0.77%] index_select wrap : Elapsed 0.018 ms (1.826 ms / 100) 1.825 -> 1.824 ( -0.05%) [ +0.00% +0.22% +0.11% / -0.05% +0.82% +0.71%] index_select linear : Elapsed 0.018 ms (1.825 ms / 100) 1.828 -> 1.825 ( -0.16%) [ +0.05% +0.00% +0.11% / -0.16% +0.66% +0.49%] index_select reverse : Elapsed 0.018 ms (1.829 ms / 100) 1.818 -> 1.817 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.55% +0.50%] index_select skip64 : Elapsed 0.018 ms (1.819 ms / 100) 1.817 -> 1.820 ( +0.17%) [ +0.11% +0.00% +0.22% / +0.17% +0.66% +0.33%] index_select skip256 : Elapsed 0.018 ms (1.819 ms / 100) 1.846 -> 1.848 ( +0.11%) [ +0.22% +0.00% +0.11% / +0.11% +0.54% +0.38%] index_select spread : Elapsed 0.019 ms (1.850 ms / 100) 1.839 -> 1.846 ( +0.38%) [ +0.27% +0.11% +0.00% / +0.38% +0.92% +0.87%] index_select strided 3 : Elapsed 0.018 ms (1.844 ms / 100) 1.842 -> 1.848 ( +0.33%) [ +0.05% +0.00% +0.22% / +0.33% +0.92% +0.54%] index_select strided 5 : Elapsed 0.018 ms (1.843 ms / 100) 1.839 -> 1.838 ( -0.05%) [ +0.11% +0.00% +0.00% / -0.05% +0.44% +0.49%] index_select strided 7 : Elapsed 0.018 ms (1.841 ms / 100) 1.834 -> 1.837 ( +0.16%) [ +0.16% +0.00% +0.22% / +0.16% +0.38% +0.55%] index_select strided 8 : Elapsed 0.018 ms (1.837 ms / 100) 1.844 -> 1.846 ( +0.11%) [ +0.38% +0.11% +0.00% / +0.11% +0.54% +0.60%] index_select strided 16 : Elapsed 0.019 ms (1.851 ms / 100) 1.840 -> 1.843 ( +0.16%) [ +0.00% +0.05% +0.11% / +0.16% +0.49% +0.71%] index_select random : Elapsed 0.018 ms (1.840 ms / 100) 1.836 -> 1.838 ( +0.11%) [ +0.00% +0.11% +0.22% / +0.11% +0.54% +0.33%] index_select random_sorted : Elapsed 0.018 ms (1.836 ms / 100) 1.830 -> 1.841 ( +0.60%) [ +0.38% +0.05% +0.00% / +0.60% +0.60% +0.93%] index_select perm : Elapsed 0.018 ms (1.837 ms / 100) 1.846 -> 1.844 ( -0.11%) [ +0.00% +0.11% +0.05% / -0.11% +0.43% +0.38%] index_select perm_sorted : Elapsed 0.018 ms (1.846 ms / 100) B = [16, 4, 5, 40] (stride (40, 3200, 640, 1)) A = [16, 4, 20, 40] (stride (1, 640, 2560, 16)) dim = 2 1.811 -> 1.804 ( -0.39%) [ +0.22% +0.17% +0.00% / -0.11% -0.11% -0.39%] index_select const : Elapsed 0.018 ms (1.815 ms / 100) 1.804 -> 1.802 ( -0.11%) [ +0.17% +0.00% +0.17% / -0.11% +0.44% +0.22%] index_select wrap : Elapsed 0.018 ms (1.807 ms / 100) 1.806 -> 1.811 ( +0.28%) [ +0.22% +0.00% +0.11% / +0.28% +0.55% +0.39%] index_select linear : Elapsed 0.018 ms (1.810 ms / 100) 1.807 -> 1.807 ( +0.00%) [ +0.22% +0.00% +0.22% / +0.00% +0.11% +0.33%] index_select reverse : Elapsed 0.018 ms (1.811 ms / 100) 1.815 -> 1.810 ( -0.28%) [ +0.06% +0.00% +0.00% / +0.06% -0.06% -0.28%] index_select skip64 : Elapsed 0.018 ms (1.816 ms / 100) 1.810 -> 1.808 ( -0.11%) [ +0.11% +0.06% +0.00% / -0.11% -0.06% +0.00%] index_select skip256 : Elapsed 0.018 ms (1.812 ms / 100) 1.802 -> 1.807 ( +0.28%) [ +0.28% +0.00% +0.17% / +0.28% +0.55% +0.78%] index_select spread : Elapsed 0.018 ms (1.807 ms / 100) 1.812 -> 1.812 ( +0.00%) [ +0.00% +0.06% +0.17% / +0.00% +0.11% +0.00%] index_select strided 3 : Elapsed 0.018 ms (1.812 ms / 100) 1.809 -> 1.810 ( +0.06%) [ +0.28% +0.00% +0.00% / +0.33% +0.06% +0.28%] index_select strided 5 : Elapsed 0.018 ms (1.814 ms / 100) 1.803 -> 1.807 ( +0.22%) [ +0.28% +0.00% +0.33% / +0.22% +0.44% +0.39%] index_select strided 7 : Elapsed 0.018 ms (1.808 ms / 100) 1.807 -> 1.805 ( -0.11%) [ +0.17% +0.00% +0.00% / -0.11% +0.22% +0.50%] index_select strided 8 : Elapsed 0.018 ms (1.810 ms / 100) 1.809 -> 1.810 ( +0.06%) [ +0.17% +0.00% +0.06% / +0.06% +0.33% +0.28%] index_select strided 16 : Elapsed 0.018 ms (1.812 ms / 100) 1.805 -> 1.812 ( +0.39%) [ +0.39% +0.39% +0.00% / +0.39% +0.55% +0.66%] index_select random : Elapsed 0.018 ms (1.812 ms / 100) 1.804 -> 1.804 ( +0.00%) [ +0.28% +0.00% +0.33% / +0.00% +0.55% +0.39%] index_select random_sorted : Elapsed 0.018 ms (1.809 ms / 100) 1.804 -> 1.807 ( +0.17%) [ +0.28% +0.00% +0.00% / +0.17% +0.50% +0.28%] index_select perm : Elapsed 0.018 ms (1.809 ms / 100) 1.812 -> 1.810 ( -0.11%) [ +0.11% +0.22% +0.00% / +0.28% -0.11% +0.11%] index_select perm_sorted : Elapsed 0.018 ms (1.814 ms / 100) B = [16, 4, 5, 40] (stride (40, 640, 2560, 1)) A = [16, 4, 20, 40] (stride (3200, 1, 4, 80)) dim = 2 1.899 -> 1.906 ( +0.37%) [ +0.16% +0.00% +0.11% / +0.37% +0.42% +0.37%] index_select const : Elapsed 0.019 ms (1.902 ms / 100) 1.905 -> 1.907 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.26% +0.21%] index_select wrap : Elapsed 0.019 ms (1.906 ms / 100) 1.905 -> 1.902 ( -0.16%) [ +0.16% +0.00% +0.05% / -0.16% +0.26% +0.21%] index_select linear : Elapsed 0.019 ms (1.908 ms / 100) 1.908 -> 1.908 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.26% +0.47%] index_select reverse : Elapsed 0.019 ms (1.908 ms / 100) 1.897 -> 1.902 ( +0.26%) [ +0.00% +0.21% +0.32% / +0.37% +0.53% +0.26%] index_select skip64 : Elapsed 0.019 ms (1.897 ms / 100) 1.904 -> 1.906 ( +0.11%) [ +0.00% +0.00% +0.05% / +0.11% +0.16% +0.11%] index_select skip256 : Elapsed 0.019 ms (1.904 ms / 100) 1.905 -> 1.908 ( +0.16%) [ +0.05% +0.10% +0.00% / +0.16% +0.26% +0.42%] index_select spread : Elapsed 0.019 ms (1.906 ms / 100) 1.908 -> 1.907 ( -0.05%) [ +0.00% +0.05% +0.10% / +0.16% -0.05% +0.10%] index_select strided 3 : Elapsed 0.019 ms (1.908 ms / 100) 1.905 -> 1.905 ( +0.00%) [ +0.00% +0.05% +0.21% / +0.10% +0.10% +0.00%] index_select strided 5 : Elapsed 0.019 ms (1.905 ms / 100) 1.897 -> 1.897 ( +0.00%) [ +0.05% +0.11% +0.00% / +0.00% +0.32% +0.37%] index_select strided 7 : Elapsed 0.019 ms (1.898 ms / 100) 1.908 -> 1.907 ( -0.05%) [ +0.16% +0.16% +0.00% / -0.05% +0.26% +0.52%] index_select strided 8 : Elapsed 0.019 ms (1.911 ms / 100) 1.904 -> 1.908 ( +0.21%) [ +0.11% +0.05% +0.00% / +0.21% +0.37% +0.26%] index_select strided 16 : Elapsed 0.019 ms (1.906 ms / 100) 1.905 -> 1.904 ( -0.05%) [ +0.05% +0.00% +0.10% / -0.05% +0.26% +0.31%] index_select random : Elapsed 0.019 ms (1.906 ms / 100) 1.908 -> 1.911 ( +0.16%) [ +0.10% +0.00% +0.47% / +0.16% +0.37% +0.31%] index_select random_sorted : Elapsed 0.019 ms (1.910 ms / 100) 1.901 -> 1.906 ( +0.26%) [ +0.21% +0.00% +0.21% / +0.26% +0.47% +0.42%] index_select perm : Elapsed 0.019 ms (1.905 ms / 100) 1.911 -> 1.911 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.37% +0.47%] index_select perm_sorted : Elapsed 0.019 ms (1.911 ms / 100) B = [16, 4, 5, 40] (stride (4, 1, 2560, 64)) A = [16, 4, 20, 40] (stride (80, 20, 1, 1280)) dim = 2 0.717 -> 0.717 ( +0.00%) [ +0.14% +0.00% +0.28% / +0.00% +0.42% +0.42%] index_select const : Elapsed 0.007 ms (0.718 ms / 100) 0.719 -> 0.720 ( +0.14%) [ +0.28% +0.14% +0.00% / +0.14% +1.39% +1.39%] index_select wrap : Elapsed 0.007 ms (0.721 ms / 100) 0.719 -> 0.720 ( +0.14%) [ +0.28% +0.14% +0.00% / +0.14% +1.39% +1.25%] index_select linear : Elapsed 0.007 ms (0.721 ms / 100) 0.716 -> 0.717 ( +0.14%) [ +0.00% +0.28% +0.00% / +0.14% +0.84% +0.84%] index_select reverse : Elapsed 0.007 ms (0.716 ms / 100) 0.716 -> 0.716 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.70% +0.84%] index_select skip64 : Elapsed 0.007 ms (0.717 ms / 100) 0.728 -> 0.721 ( -0.96%) [ +0.14% +0.14% +0.00% / +0.00% -0.96% -0.82%] index_select skip256 : Elapsed 0.007 ms (0.729 ms / 100) 0.728 -> 0.721 ( -0.96%) [ +0.14% +0.00% +0.00% / +0.00% -0.96% -0.96%] index_select spread : Elapsed 0.007 ms (0.729 ms / 100) 0.723 -> 0.723 ( +0.00%) [ +0.00% +0.14% +0.41% / +0.00% +4.43% +0.14%] index_select strided 3 : Elapsed 0.007 ms (0.723 ms / 100) 0.722 -> 0.721 ( -0.14%) [ +0.00% +0.14% +0.00% / +0.00% -0.14% +0.00%] index_select strided 5 : Elapsed 0.007 ms (0.722 ms / 100) 0.722 -> 0.722 ( +0.00%) [ +0.14% +0.42% +0.00% / +0.55% +0.00% +0.14%] index_select strided 7 : Elapsed 0.007 ms (0.723 ms / 100) 0.723 -> 0.722 ( -0.14%) [ +0.00% +0.00% +0.14% / +0.14% -0.14% +0.00%] index_select strided 8 : Elapsed 0.007 ms (0.723 ms / 100) 0.723 -> 0.721 ( -0.28%) [ +0.28% +0.28% +0.00% / +0.28% -0.28% -0.28%] index_select strided 16 : Elapsed 0.007 ms (0.725 ms / 100) 0.726 -> 0.722 ( -0.55%) [ +0.28% +0.00% +0.00% / +0.14% -0.28% -0.55%] index_select random : Elapsed 0.007 ms (0.728 ms / 100) 0.716 -> 0.716 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.12% +0.98%] index_select random_sorted : Elapsed 0.007 ms (0.716 ms / 100) 0.714 -> 0.714 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +1.26% +1.26%] index_select perm : Elapsed 0.007 ms (0.715 ms / 100) 0.717 -> 0.718 ( +0.14%) [ +0.28% +0.14% +0.00% / +0.14% +2.23% +2.37%] index_select perm_sorted : Elapsed 0.007 ms (0.719 ms / 100) out_shape = [16, 4, 20, 5] in_shape = [16, 4, 20, 40] idx_dim = 3 B = [16, 4, 20, 5] (stride (400, 5, 20, 1)) A = [16, 4, 20, 40] (stride (3200, 1, 4, 80)) dim = 3 1.480 -> 1.480 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.27% +0.34%] index_select const : Elapsed 0.015 ms (1.482 ms / 100) 1.481 -> 1.481 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.27% +0.27%] index_select wrap : Elapsed 0.015 ms (1.481 ms / 100) 1.480 -> 1.482 ( +0.14%) [ +0.20% +0.07% +0.00% / +0.14% +0.34% +0.34%] index_select linear : Elapsed 0.015 ms (1.483 ms / 100) 1.480 -> 1.482 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.47% +0.41%] index_select reverse : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.41% +0.27%] index_select skip64 : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.480 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.41% +0.27%] index_select skip256 : Elapsed 0.015 ms (1.481 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.47% +0.41%] index_select spread : Elapsed 0.015 ms (1.480 ms / 100) 1.479 -> 1.480 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.54% +0.47%] index_select strided 3 : Elapsed 0.015 ms (1.480 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.54% +0.54%] index_select strided 5 : Elapsed 0.015 ms (1.480 ms / 100) 1.479 -> 1.480 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.54% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.480 ms / 100) 1.480 -> 1.480 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.47% +0.41%] index_select strided 8 : Elapsed 0.015 ms (1.482 ms / 100) 1.478 -> 1.480 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.68% +0.68%] index_select strided 16 : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.68% +0.54%] index_select random : Elapsed 0.015 ms (1.478 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.61% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.480 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.61% +0.68%] index_select perm : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.54% +0.54%] index_select perm_sorted : Elapsed 0.015 ms (1.479 ms / 100) B = [16, 4, 20, 5] (stride (5, 1600, 80, 1)) A = [16, 4, 20, 40] (stride (40, 12800, 640, 1)) dim = 3 0.678 -> 0.678 ( +0.00%) [ +0.44% +0.15% +0.00% / +0.00% +0.74% +0.44%] index_select const : Elapsed 0.007 ms (0.681 ms / 100) 0.677 -> 0.679 ( +0.30%) [ +0.30% +0.15% +0.00% / +0.30% +1.03% +0.74%] index_select wrap : Elapsed 0.007 ms (0.679 ms / 100) 0.678 -> 0.678 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.88% +0.74%] index_select linear : Elapsed 0.007 ms (0.678 ms / 100) 0.677 -> 0.678 ( +0.15%) [ +0.30% +0.00% +0.00% / +0.15% +1.03% +1.03%] index_select reverse : Elapsed 0.007 ms (0.679 ms / 100) 0.677 -> 0.678 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +0.89% +0.89%] index_select skip64 : Elapsed 0.007 ms (0.679 ms / 100) 0.677 -> 0.679 ( +0.30%) [ +0.44% +0.00% +0.00% / +0.30% +0.89% +1.03%] index_select skip256 : Elapsed 0.007 ms (0.680 ms / 100) 0.677 -> 0.680 ( +0.44%) [ +0.30% +0.15% +0.00% / +0.44% +0.74% +0.44%] index_select spread : Elapsed 0.007 ms (0.679 ms / 100) 0.677 -> 0.677 ( +0.00%) [ +0.30% +0.00% +0.00% / +0.00% +1.33% +0.89%] index_select strided 3 : Elapsed 0.007 ms (0.679 ms / 100) 0.678 -> 0.677 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +1.03% +1.03%] index_select strided 5 : Elapsed 0.007 ms (0.678 ms / 100) 0.677 -> 0.682 ( +0.74%) [ +0.74% +0.15% +0.00% / +0.74% +0.89% +0.89%] index_select strided 7 : Elapsed 0.007 ms (0.682 ms / 100) 0.677 -> 0.678 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +0.74% +0.89%] index_select strided 8 : Elapsed 0.007 ms (0.679 ms / 100) 0.679 -> 0.679 ( +0.00%) [ +0.29% +0.15% +0.00% / +0.00% +0.74% +0.74%] index_select strided 16 : Elapsed 0.007 ms (0.681 ms / 100) 0.678 -> 0.679 ( +0.15%) [ +0.29% +0.15% +0.00% / +0.15% +0.74% +1.33%] index_select random : Elapsed 0.007 ms (0.680 ms / 100) 0.677 -> 0.678 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +0.74% +0.59%] index_select random_sorted : Elapsed 0.007 ms (0.679 ms / 100) 0.679 -> 0.680 ( +0.15%) [ +1.18% +0.00% +0.00% / +0.15% +0.88% +0.74%] index_select perm : Elapsed 0.007 ms (0.687 ms / 100) 0.678 -> 0.677 ( -0.15%) [ +0.00% +0.15% +0.15% / -0.15% +1.18% +0.88%] index_select perm_sorted : Elapsed 0.007 ms (0.678 ms / 100) B = [16, 4, 20, 5] (stride (20, 1, 320, 4)) A = [16, 4, 20, 40] (stride (1, 16, 64, 1280)) dim = 3 1.593 -> 1.595 ( +0.13%) [ +0.56% +0.00% +0.13% / +0.13% +0.56% +0.94%] index_select const : Elapsed 0.016 ms (1.602 ms / 100) 1.574 -> 1.576 ( +0.13%) [ +0.19% +0.00% +0.00% / +0.13% +0.57% +0.38%] index_select wrap : Elapsed 0.016 ms (1.577 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.32% +0.06% +0.00% / +0.13% +0.45% +0.45%] index_select linear : Elapsed 0.016 ms (1.578 ms / 100) 1.573 -> 1.573 ( +0.00%) [ +0.45% +0.00% +0.32% / +0.00% +0.64% +0.51%] index_select reverse : Elapsed 0.016 ms (1.580 ms / 100) 1.575 -> 1.576 ( +0.06%) [ +0.19% +0.13% +0.00% / +0.06% +0.25% +0.25%] index_select skip64 : Elapsed 0.016 ms (1.578 ms / 100) 1.591 -> 1.597 ( +0.38%) [ +0.31% +0.25% +0.00% / +0.38% +1.32% +0.57%] index_select skip256 : Elapsed 0.016 ms (1.596 ms / 100) 1.595 -> 1.596 ( +0.06%) [ +0.00% +0.50% +0.50% / +0.06% +0.94% +0.56%] index_select spread : Elapsed 0.016 ms (1.595 ms / 100) 1.575 -> 1.574 ( -0.06%) [ +0.13% +0.00% +0.00% / -0.06% +0.32% +0.32%] index_select strided 3 : Elapsed 0.016 ms (1.577 ms / 100) 1.578 -> 1.573 ( -0.32%) [ +0.00% +0.00% +0.00% / -0.32% +0.19% +0.13%] index_select strided 5 : Elapsed 0.016 ms (1.578 ms / 100) 1.596 -> 1.596 ( +0.00%) [ +0.06% +0.50% +0.00% / +0.00% +0.56% +0.88%] index_select strided 7 : Elapsed 0.016 ms (1.597 ms / 100) 1.594 -> 1.601 ( +0.44%) [ +0.25% +0.56% +0.00% / +0.44% +1.07% +0.75%] index_select strided 8 : Elapsed 0.016 ms (1.598 ms / 100) 1.574 -> 1.578 ( +0.25%) [ +0.32% +0.19% +0.00% / +0.25% +0.44% +0.38%] index_select strided 16 : Elapsed 0.016 ms (1.579 ms / 100) 1.573 -> 1.579 ( +0.38%) [ +0.13% +0.13% +0.00% / +0.38% +0.45% +0.51%] index_select random : Elapsed 0.016 ms (1.575 ms / 100) 1.593 -> 1.595 ( +0.13%) [ +0.56% +0.00% +0.00% / +0.63% +0.25% +0.13%] index_select random_sorted : Elapsed 0.016 ms (1.602 ms / 100) 1.596 -> 1.603 ( +0.44%) [ +0.00% +0.13% +0.31% / +0.44% +0.94% +0.69%] index_select perm : Elapsed 0.016 ms (1.596 ms / 100) 1.575 -> 1.573 ( -0.13%) [ +0.13% +0.00% +0.06% / -0.13% +0.32% +0.25%] index_select perm_sorted : Elapsed 0.016 ms (1.577 ms / 100) B = [16, 4, 20, 5] (stride (80, 20, 1, 1280)) A = [16, 4, 20, 40] (stride (3200, 1, 160, 4)) dim = 3 1.364 -> 1.367 ( +0.22%) [ +0.22% +0.00% +0.00% / +0.22% +0.73% +0.73%] index_select const : Elapsed 0.014 ms (1.367 ms / 100) 1.359 -> 1.361 ( +0.15%) [ +0.00% +0.07% +0.00% / +0.15% +0.81% +0.74%] index_select wrap : Elapsed 0.014 ms (1.359 ms / 100) 1.368 -> 1.372 ( +0.29%) [ +0.00% +0.00% +0.22% / +0.29% +0.80% +0.66%] index_select linear : Elapsed 0.014 ms (1.368 ms / 100) 1.369 -> 1.367 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +1.02% +1.02%] index_select reverse : Elapsed 0.014 ms (1.369 ms / 100) 1.356 -> 1.358 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.96% +1.11%] index_select skip64 : Elapsed 0.014 ms (1.359 ms / 100) 1.364 -> 1.365 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.73% +0.73%] index_select skip256 : Elapsed 0.014 ms (1.364 ms / 100) 1.356 -> 1.357 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.88% +0.66%] index_select spread : Elapsed 0.014 ms (1.357 ms / 100) 1.357 -> 1.356 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.74% +0.88%] index_select strided 3 : Elapsed 0.014 ms (1.357 ms / 100) 1.361 -> 1.364 ( +0.22%) [ +0.37% +0.00% +0.15% / +0.22% +0.81% +0.81%] index_select strided 5 : Elapsed 0.014 ms (1.366 ms / 100) 1.354 -> 1.357 ( +0.22%) [ +0.22% +0.30% +0.00% / +0.22% +0.89% +0.89%] index_select strided 7 : Elapsed 0.014 ms (1.357 ms / 100) 1.356 -> 1.358 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.81% +0.88%] index_select strided 8 : Elapsed 0.014 ms (1.357 ms / 100) 1.360 -> 1.361 ( +0.07%) [ +0.07% +0.00% +0.29% / +0.07% +0.88% +0.74%] index_select strided 16 : Elapsed 0.014 ms (1.361 ms / 100) 1.356 -> 1.357 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.66% +0.59%] index_select random : Elapsed 0.014 ms (1.357 ms / 100) 1.356 -> 1.357 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.81% +0.81%] index_select random_sorted : Elapsed 0.014 ms (1.357 ms / 100) 1.358 -> 1.357 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.88% +0.74%] index_select perm : Elapsed 0.014 ms (1.359 ms / 100) 1.356 -> 1.357 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.81% +0.66%] index_select perm_sorted : Elapsed 0.014 ms (1.357 ms / 100) B = [16, 4, 20, 5] (stride (80, 1, 4, 1280)) A = [16, 4, 20, 40] (stride (3200, 40, 160, 1)) dim = 3 1.609 -> 1.609 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.25% +0.25%] index_select const : Elapsed 0.016 ms (1.610 ms / 100) 1.611 -> 1.612 ( +0.06%) [ +0.25% +0.00% +0.00% / +0.06% +0.56% +0.50%] index_select wrap : Elapsed 0.016 ms (1.615 ms / 100) 1.610 -> 1.610 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.56% +0.50%] index_select linear : Elapsed 0.016 ms (1.611 ms / 100) 1.609 -> 1.608 ( -0.06%) [ +1.31% +0.00% +0.00% / -0.06% +0.56% +0.37%] index_select reverse : Elapsed 0.016 ms (1.630 ms / 100) 1.610 -> 1.612 ( +0.12%) [ +0.12% +0.19% +0.00% / +0.12% +0.68% +0.56%] index_select skip64 : Elapsed 0.016 ms (1.612 ms / 100) 1.606 -> 1.609 ( +0.19%) [ +0.19% +0.00% +0.12% / +0.19% +0.50% +0.56%] index_select skip256 : Elapsed 0.016 ms (1.609 ms / 100) 1.606 -> 1.606 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.56% +0.56%] index_select spread : Elapsed 0.016 ms (1.607 ms / 100) 1.609 -> 1.609 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.50% +0.44%] index_select strided 3 : Elapsed 0.016 ms (1.609 ms / 100) 1.603 -> 1.602 ( -0.06%) [ +0.06% +0.19% +0.00% / -0.06% +0.50% +0.44%] index_select strided 5 : Elapsed 0.016 ms (1.604 ms / 100) 1.603 -> 1.604 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.44% +0.44%] index_select strided 7 : Elapsed 0.016 ms (1.604 ms / 100) 1.608 -> 1.613 ( +0.31%) [ +0.06% +0.00% +0.06% / +0.31% +0.62% +0.50%] index_select strided 8 : Elapsed 0.016 ms (1.609 ms / 100) 1.601 -> 1.602 ( +0.06%) [ +0.00% +0.25% +0.19% / +0.06% +0.56% +0.75%] index_select strided 16 : Elapsed 0.016 ms (1.601 ms / 100) 1.603 -> 1.605 ( +0.12%) [ +0.00% +0.19% +0.06% / +0.12% +0.69% +0.62%] index_select random : Elapsed 0.016 ms (1.603 ms / 100) 1.600 -> 1.604 ( +0.25%) [ +0.38% +0.00% +0.13% / +0.25% +0.81% +0.75%] index_select random_sorted : Elapsed 0.016 ms (1.606 ms / 100) 1.606 -> 1.608 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.68% +0.87%] index_select perm : Elapsed 0.016 ms (1.608 ms / 100) 1.600 -> 1.601 ( +0.06%) [ +0.19% +0.00% +0.13% / +0.06% +0.88% +0.56%] index_select perm_sorted : Elapsed 0.016 ms (1.603 ms / 100) B = [16, 4, 20, 5] (stride (1, 16, 64, 1280)) A = [16, 4, 20, 40] (stride (3200, 40, 160, 1)) dim = 3 1.616 -> 1.616 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_select const : Elapsed 0.016 ms (1.617 ms / 100) 1.616 -> 1.616 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.62% +0.50%] index_select wrap : Elapsed 0.016 ms (1.616 ms / 100) 1.618 -> 1.623 ( +0.31%) [ +0.12% +0.00% +0.12% / +0.31% +0.43% +0.43%] index_select linear : Elapsed 0.016 ms (1.620 ms / 100) 1.618 -> 1.623 ( +0.31%) [ +0.00% +0.19% +0.06% / +0.31% +0.62% +0.49%] index_select reverse : Elapsed 0.016 ms (1.618 ms / 100) 1.614 -> 1.616 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.68% +0.68%] index_select skip64 : Elapsed 0.016 ms (1.616 ms / 100) 1.615 -> 1.616 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.56% +0.68%] index_select skip256 : Elapsed 0.016 ms (1.616 ms / 100) 1.609 -> 1.611 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.37% +0.31%] index_select spread : Elapsed 0.016 ms (1.611 ms / 100) 1.611 -> 1.613 ( +0.12%) [ +0.19% +0.00% +0.00% / +0.12% +0.68% +0.62%] index_select strided 3 : Elapsed 0.016 ms (1.614 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.19% +0.06% +0.00% / +0.00% +0.81% +0.62%] index_select strided 5 : Elapsed 0.016 ms (1.614 ms / 100) 1.608 -> 1.610 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.44% +0.31%] index_select strided 7 : Elapsed 0.016 ms (1.608 ms / 100) 1.612 -> 1.610 ( -0.12%) [ +0.06% +0.00% +0.00% / -0.12% +0.62% +0.50%] index_select strided 8 : Elapsed 0.016 ms (1.613 ms / 100) 1.609 -> 1.609 ( +0.00%) [ +0.12% +0.06% +0.00% / +0.00% +0.50% +0.44%] index_select strided 16 : Elapsed 0.016 ms (1.611 ms / 100) 1.613 -> 1.614 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.50% +0.62%] index_select random : Elapsed 0.016 ms (1.613 ms / 100) 1.607 -> 1.611 ( +0.25%) [ +0.12% +0.00% +0.06% / +0.25% +0.68% +0.68%] index_select random_sorted : Elapsed 0.016 ms (1.609 ms / 100) 1.611 -> 1.612 ( +0.06%) [ +0.06% +0.12% +0.00% / +0.06% +0.62% +0.50%] index_select perm : Elapsed 0.016 ms (1.612 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_select perm_sorted : Elapsed 0.016 ms (1.612 ms / 100) B = [16, 4, 20, 5] (stride (1, 16, 64, 1280)) A = [16, 4, 20, 40] (stride (160, 1, 2560, 4)) dim = 3 1.597 -> 1.597 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.56% +0.69%] index_select const : Elapsed 0.016 ms (1.597 ms / 100) 1.599 -> 1.601 ( +0.13%) [ +0.00% +0.13% +0.00% / +0.13% +0.75% +0.56%] index_select wrap : Elapsed 0.016 ms (1.599 ms / 100) 1.597 -> 1.597 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.56% +0.56%] index_select linear : Elapsed 0.016 ms (1.598 ms / 100) 1.594 -> 1.597 ( +0.19%) [ +0.00% +0.19% +0.13% / +0.19% +0.82% +0.69%] index_select reverse : Elapsed 0.016 ms (1.594 ms / 100) 1.598 -> 1.598 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.75% +0.75%] index_select skip64 : Elapsed 0.016 ms (1.599 ms / 100) 1.595 -> 1.598 ( +0.19%) [ +0.00% +0.00% +0.06% / +0.19% +0.82% +1.00%] index_select skip256 : Elapsed 0.016 ms (1.595 ms / 100) 1.589 -> 1.590 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.38% +0.31%] index_select spread : Elapsed 0.016 ms (1.589 ms / 100) 1.586 -> 1.587 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.57% +0.63%] index_select strided 3 : Elapsed 0.016 ms (1.586 ms / 100) 1.580 -> 1.580 ( +0.00%) [ +0.06% +0.00% +0.13% / +0.00% +0.63% +0.63%] index_select strided 5 : Elapsed 0.016 ms (1.581 ms / 100) 1.584 -> 1.585 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.51% +0.51%] index_select strided 7 : Elapsed 0.016 ms (1.585 ms / 100) 1.586 -> 1.585 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.32% +0.06%] index_select strided 8 : Elapsed 0.016 ms (1.586 ms / 100) 1.587 -> 1.586 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.50% +0.50%] index_select strided 16 : Elapsed 0.016 ms (1.587 ms / 100) 1.587 -> 1.591 ( +0.25%) [ +0.32% +0.00% +0.32% / +0.25% +0.38% +0.69%] index_select random : Elapsed 0.016 ms (1.592 ms / 100) 1.585 -> 1.589 ( +0.25%) [ +0.00% +0.25% +0.19% / +0.25% +0.88% +1.01%] index_select random_sorted : Elapsed 0.016 ms (1.585 ms / 100) 1.590 -> 1.587 ( -0.19%) [ +0.00% +0.13% +0.00% / -0.19% +0.57% +0.57%] index_select perm : Elapsed 0.016 ms (1.590 ms / 100) 1.584 -> 1.584 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.69% +0.69%] index_select perm_sorted : Elapsed 0.016 ms (1.584 ms / 100) out_shape = [5, 4, 40, 20] in_shape = [16, 4, 40, 20] idx_dim = 0 B = [5, 4, 40, 20] (stride (3200, 800, 20, 1)) dim = 0 fill_cnt = 16 0.925 -> 0.925 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +1.41% +1.30%] index_fill_ const : Elapsed 0.009 ms (0.925 ms / 100) 0.925 -> 0.926 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +1.73% +1.41%] index_fill_ linear : Elapsed 0.009 ms (0.925 ms / 100) 0.924 -> 0.926 ( +0.22%) [ +0.22% +0.00% +0.00% / +0.22% +1.30% +1.41%] index_fill_ reverse : Elapsed 0.009 ms (0.926 ms / 100) 0.918 -> 0.918 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +1.53% +1.63%] index_fill_ skip64 : Elapsed 0.009 ms (0.918 ms / 100) 0.925 -> 0.927 ( +0.22%) [ +0.00% +0.00% +0.11% / +0.22% +1.30% +1.30%] index_fill_ skip256 : Elapsed 0.009 ms (0.925 ms / 100) 0.926 -> 0.926 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +1.19% +1.19%] index_fill_ spread : Elapsed 0.009 ms (0.926 ms / 100) 0.924 -> 0.923 ( -0.11%) [ +0.00% +0.00% +0.11% / -0.11% +0.76% +0.65%] index_fill_ strided 3 : Elapsed 0.009 ms (0.924 ms / 100) 0.924 -> 0.925 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.97% +0.97%] index_fill_ random : Elapsed 0.009 ms (0.925 ms / 100) 0.925 -> 0.926 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.65% +0.65%] index_fill_ random_sorted : Elapsed 0.009 ms (0.926 ms / 100) B = [5, 4, 40, 20] (stride (3200, 800, 20, 1)) A = [16, 4, 40, 20] (stride (40, 12800, 1, 640)) dim = 0 2.118 -> 2.118 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.66% +0.47%] index_select const : Elapsed 0.021 ms (2.121 ms / 100) 2.134 -> 2.141 ( +0.33%) [ +0.19% +0.33% +0.00% / +0.33% +0.61% +0.66%] index_select wrap : Elapsed 0.021 ms (2.138 ms / 100) 2.139 -> 2.138 ( -0.05%) [ +0.09% +0.09% +0.00% / -0.05% +0.33% +0.33%] index_select linear : Elapsed 0.021 ms (2.141 ms / 100) 2.130 -> 2.133 ( +0.14%) [ +0.19% +0.28% +0.00% / +0.14% +0.42% +0.28%] index_select reverse : Elapsed 0.021 ms (2.134 ms / 100) 2.124 -> 2.125 ( +0.05%) [ +0.09% +0.00% +0.05% / +0.05% +0.47% +0.38%] index_select skip64 : Elapsed 0.021 ms (2.126 ms / 100) 2.117 -> 2.118 ( +0.05%) [ +0.09% +0.09% +0.00% / +0.05% +0.61% +0.71%] index_select skip256 : Elapsed 0.021 ms (2.119 ms / 100) 2.134 -> 2.134 ( +0.00%) [ +0.00% +0.23% +0.00% / +0.00% +0.33% +0.52%] index_select spread : Elapsed 0.021 ms (2.134 ms / 100) 2.131 -> 2.135 ( +0.19%) [ +0.23% +0.19% +0.00% / +0.19% +0.66% +0.61%] index_select strided 3 : Elapsed 0.021 ms (2.136 ms / 100) 2.137 -> 2.140 ( +0.14%) [ +0.00% +0.05% +0.09% / +0.14% +0.42% +0.66%] index_select strided 5 : Elapsed 0.021 ms (2.137 ms / 100) 2.137 -> 2.138 ( +0.05%) [ +0.19% +0.14% +0.00% / +0.05% +0.89% +0.89%] index_select strided 7 : Elapsed 0.021 ms (2.141 ms / 100) 2.116 -> 2.117 ( +0.05%) [ +0.00% +0.19% +0.24% / +0.05% +0.61% +0.85%] index_select strided 8 : Elapsed 0.021 ms (2.116 ms / 100) 2.123 -> 2.122 ( -0.05%) [ +0.09% +0.00% +0.05% / -0.05% +0.75% +0.89%] index_select random : Elapsed 0.021 ms (2.125 ms / 100) 2.122 -> 2.125 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.52% +0.66%] index_select random_sorted : Elapsed 0.021 ms (2.122 ms / 100) 2.134 -> 2.139 ( +0.23%) [ +0.23% +0.14% +0.00% / +0.23% +0.56% +0.61%] index_select perm : Elapsed 0.021 ms (2.139 ms / 100) 2.132 -> 2.135 ( +0.14%) [ +0.19% +0.00% +0.19% / +0.14% +0.56% +0.61%] index_select perm_sorted : Elapsed 0.021 ms (2.136 ms / 100) B = [5, 4, 40, 20] (stride (3200, 800, 1, 40)) A = [16, 4, 40, 20] (stride (800, 12800, 20, 1)) dim = 0 2.117 -> 2.116 ( -0.05%) [ +0.00% +0.14% +0.05% / -0.05% +0.47% +0.52%] index_select const : Elapsed 0.021 ms (2.117 ms / 100) 2.172 -> 2.141 ( -1.43%) [ +0.05% +0.00% +0.00% / -0.18% -1.29% -1.43%] index_select wrap : Elapsed 0.022 ms (2.173 ms / 100) 2.171 -> 2.142 ( -1.34%) [ +0.09% +0.00% +0.00% / -0.05% -1.34% -1.29%] index_select linear : Elapsed 0.022 ms (2.173 ms / 100) 2.172 -> 2.138 ( -1.57%) [ +0.37% +0.23% +0.00% / +0.09% -1.57% -1.47%] index_select reverse : Elapsed 0.022 ms (2.180 ms / 100) 2.119 -> 2.120 ( +0.05%) [ +0.24% +0.05% +0.00% / +0.05% +0.76% +0.76%] index_select skip64 : Elapsed 0.021 ms (2.124 ms / 100) 2.116 -> 2.119 ( +0.14%) [ +0.09% +0.09% +0.00% / +0.14% +0.47% +0.52%] index_select skip256 : Elapsed 0.021 ms (2.118 ms / 100) 2.172 -> 2.141 ( -1.43%) [ +0.23% +0.18% +0.00% / +0.46% -1.43% -1.29%] index_select spread : Elapsed 0.022 ms (2.177 ms / 100) 2.176 -> 2.142 ( -1.56%) [ +0.05% +0.00% +0.00% / +0.09% -1.47% -1.56%] index_select strided 3 : Elapsed 0.022 ms (2.177 ms / 100) 2.177 -> 2.159 ( -0.83%) [ +0.00% +0.28% +0.00% / +0.18% -0.83% -0.73%] index_select strided 5 : Elapsed 0.022 ms (2.177 ms / 100) 2.174 -> 2.169 ( -0.23%) [ +0.00% +0.09% +0.00% / -0.05% -0.23% -0.23%] index_select strided 7 : Elapsed 0.022 ms (2.174 ms / 100) 2.133 -> 2.125 ( -0.38%) [ +0.00% +0.09% +0.00% / -0.05% -0.28% -0.38%] index_select strided 8 : Elapsed 0.021 ms (2.133 ms / 100) 2.163 -> 2.152 ( -0.51%) [ +0.14% +0.00% +0.00% / +0.00% -0.42% -0.51%] index_select random : Elapsed 0.022 ms (2.166 ms / 100) 2.154 -> 2.141 ( -0.60%) [ +0.28% +0.32% +0.00% / +0.23% -0.51% -0.60%] index_select random_sorted : Elapsed 0.022 ms (2.160 ms / 100) 2.168 -> 2.152 ( -0.74%) [ +0.23% +0.00% +0.23% / -0.05% -0.65% -0.74%] index_select perm : Elapsed 0.022 ms (2.173 ms / 100) 2.172 -> 2.168 ( -0.18%) [ +0.00% +0.05% +0.18% / +0.18% -0.09% -0.18%] index_select perm_sorted : Elapsed 0.022 ms (2.172 ms / 100) B = [5, 4, 40, 20] (stride (3200, 800, 1, 40)) A = [16, 4, 40, 20] (stride (1, 12800, 320, 16)) dim = 0 2.151 -> 2.154 ( +0.14%) [ +0.23% +0.00% +0.00% / +0.14% +0.46% +0.51%] index_select const : Elapsed 0.022 ms (2.156 ms / 100) 2.150 -> 2.151 ( +0.05%) [ +0.00% +0.09% +0.00% / +0.05% +0.51% +0.42%] index_select wrap : Elapsed 0.021 ms (2.150 ms / 100) 2.153 -> 2.159 ( +0.28%) [ +0.19% +0.28% +0.00% / +0.28% +0.65% +0.74%] index_select linear : Elapsed 0.022 ms (2.157 ms / 100) 2.152 -> 2.154 ( +0.09%) [ +0.28% +0.00% +0.09% / +0.09% +0.74% +0.70%] index_select reverse : Elapsed 0.022 ms (2.158 ms / 100) 2.147 -> 2.150 ( +0.14%) [ +0.19% +0.00% +0.09% / +0.14% +0.79% +0.61%] index_select skip64 : Elapsed 0.022 ms (2.151 ms / 100) 2.149 -> 2.155 ( +0.28%) [ +0.09% +0.28% +0.00% / +0.28% +0.88% +0.74%] index_select skip256 : Elapsed 0.022 ms (2.151 ms / 100) 2.162 -> 2.163 ( +0.05%) [ +0.05% +0.00% +0.23% / +0.05% +0.97% +1.11%] index_select spread : Elapsed 0.022 ms (2.163 ms / 100) 2.165 -> 2.168 ( +0.14%) [ +0.23% +0.00% +0.18% / +0.14% +1.02% +0.92%] index_select strided 3 : Elapsed 0.022 ms (2.170 ms / 100) 2.163 -> 2.165 ( +0.09%) [ +0.23% +0.09% +0.00% / +0.09% +0.92% +0.69%] index_select strided 5 : Elapsed 0.022 ms (2.168 ms / 100) 2.162 -> 2.165 ( +0.14%) [ +0.00% +0.14% +0.05% / +0.14% +0.69% +0.79%] index_select strided 7 : Elapsed 0.022 ms (2.162 ms / 100) 2.162 -> 2.167 ( +0.23%) [ +0.23% +0.00% +0.19% / +0.23% +1.16% +0.97%] index_select strided 8 : Elapsed 0.022 ms (2.167 ms / 100) 2.162 -> 2.165 ( +0.14%) [ +0.00% +0.00% +0.09% / +0.14% +0.97% +0.93%] index_select random : Elapsed 0.022 ms (2.162 ms / 100) 2.159 -> 2.160 ( +0.05%) [ +0.32% +0.00% +0.09% / +0.05% +0.88% +0.83%] index_select random_sorted : Elapsed 0.022 ms (2.166 ms / 100) 2.171 -> 2.165 ( -0.28%) [ +0.00% +0.00% +0.00% / -0.28% +0.60% +0.83%] index_select perm : Elapsed 0.022 ms (2.171 ms / 100) 2.160 -> 2.163 ( +0.14%) [ +0.19% +0.14% +0.00% / +0.14% +0.79% +0.83%] index_select perm_sorted : Elapsed 0.022 ms (2.164 ms / 100) B = [5, 4, 40, 20] (stride (1, 100, 400, 5)) A = [16, 4, 40, 20] (stride (1, 12800, 320, 16)) dim = 0 2.158 -> 2.160 ( +0.09%) [ +0.14% +0.23% +0.00% / +0.09% +0.23% +0.09%] index_select const : Elapsed 0.022 ms (2.161 ms / 100) 2.156 -> 2.156 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.32% +0.19%] index_select wrap : Elapsed 0.022 ms (2.158 ms / 100) 2.154 -> 2.156 ( +0.09%) [ +0.09% +0.05% +0.00% / +0.09% +0.19% +0.37%] index_select linear : Elapsed 0.022 ms (2.156 ms / 100) 2.153 -> 2.152 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.33% +0.14%] index_select reverse : Elapsed 0.022 ms (2.153 ms / 100) 2.156 -> 2.157 ( +0.05%) [ +0.14% +0.00% +0.09% / +0.09% +0.14% +0.05%] index_select skip64 : Elapsed 0.022 ms (2.159 ms / 100) 2.157 -> 2.159 ( +0.09%) [ +0.23% +0.09% +0.00% / +0.28% +0.14% +0.09%] index_select skip256 : Elapsed 0.022 ms (2.162 ms / 100) 2.167 -> 2.165 ( -0.09%) [ +0.18% +0.18% +0.00% / -0.09% +0.23% +0.00%] index_select spread : Elapsed 0.022 ms (2.171 ms / 100) 2.175 -> 2.168 ( -0.32%) [ +0.00% +0.18% +0.14% / -0.09% -0.32% -0.32%] index_select strided 3 : Elapsed 0.022 ms (2.175 ms / 100) 2.175 -> 2.166 ( -0.41%) [ +0.05% +0.09% +0.00% / -0.05% -0.32% -0.41%] index_select strided 5 : Elapsed 0.022 ms (2.176 ms / 100) 2.168 -> 2.166 ( -0.09%) [ +0.00% +0.05% +0.14% / +0.09% -0.05% -0.09%] index_select strided 7 : Elapsed 0.022 ms (2.168 ms / 100) 2.178 -> 2.174 ( -0.18%) [ +0.32% +0.00% +0.14% / +0.14% -0.18% +0.00%] index_select strided 8 : Elapsed 0.022 ms (2.185 ms / 100) 2.174 -> 2.174 ( +0.00%) [ +0.14% +0.18% +0.00% / +0.00% +0.05% +0.05%] index_select random : Elapsed 0.022 ms (2.177 ms / 100) 2.170 -> 2.169 ( -0.05%) [ +0.00% +0.00% +0.00% / +0.00% -0.05% +0.05%] index_select random_sorted : Elapsed 0.022 ms (2.170 ms / 100) 2.174 -> 2.167 ( -0.32%) [ +0.00% +0.05% +0.00% / +0.14% -0.32% -0.28%] index_select perm : Elapsed 0.022 ms (2.174 ms / 100) 2.169 -> 2.172 ( +0.14%) [ +0.09% +0.05% +0.00% / +0.14% +0.14% +0.23%] index_select perm_sorted : Elapsed 0.022 ms (2.171 ms / 100) B = [5, 4, 40, 20] (stride (1, 100, 400, 5)) A = [16, 4, 40, 20] (stride (1, 16, 1280, 64)) dim = 0 2.379 -> 2.377 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.59% +0.50%] index_select const : Elapsed 0.024 ms (2.380 ms / 100) 2.376 -> 2.375 ( -0.04%) [ +0.13% +0.13% +0.00% / -0.04% +0.51% +0.59%] index_select wrap : Elapsed 0.024 ms (2.379 ms / 100) 2.375 -> 2.379 ( +0.17%) [ +0.08% +0.34% +0.00% / +0.17% +0.51% +0.63%] index_select linear : Elapsed 0.024 ms (2.377 ms / 100) 2.377 -> 2.382 ( +0.21%) [ +0.13% +0.00% +0.00% / +0.21% +0.67% +0.55%] index_select reverse : Elapsed 0.024 ms (2.380 ms / 100) 2.374 -> 2.380 ( +0.25%) [ +0.25% +0.29% +0.00% / +0.25% +0.67% +0.67%] index_select skip64 : Elapsed 0.024 ms (2.380 ms / 100) 2.374 -> 2.380 ( +0.25%) [ +0.21% +0.17% +0.00% / +0.25% +0.80% +0.84%] index_select skip256 : Elapsed 0.024 ms (2.379 ms / 100) 2.392 -> 2.392 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.71% +0.67%] index_select spread : Elapsed 0.024 ms (2.393 ms / 100) 2.390 -> 2.392 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.75% +0.79%] index_select strided 3 : Elapsed 0.024 ms (2.392 ms / 100) 2.387 -> 2.391 ( +0.17%) [ +0.08% +0.00% +0.08% / +0.17% +0.88% +0.88%] index_select strided 5 : Elapsed 0.024 ms (2.389 ms / 100) 2.390 -> 2.391 ( +0.04%) [ +0.25% +0.00% +0.17% / +0.04% +0.79% +0.67%] index_select strided 7 : Elapsed 0.024 ms (2.396 ms / 100) 2.393 -> 2.393 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.67% +0.67%] index_select strided 8 : Elapsed 0.024 ms (2.393 ms / 100) 2.390 -> 2.388 ( -0.08%) [ +0.00% +0.00% +0.17% / -0.08% +0.79% +0.75%] index_select random : Elapsed 0.024 ms (2.390 ms / 100) 2.392 -> 2.397 ( +0.21%) [ +0.13% +0.00% +0.08% / +0.21% +0.46% +0.46%] index_select random_sorted : Elapsed 0.024 ms (2.395 ms / 100) 2.390 -> 2.389 ( -0.04%) [ +0.13% +0.00% +0.00% / -0.04% +0.54% +0.75%] index_select perm : Elapsed 0.024 ms (2.393 ms / 100) 2.393 -> 2.391 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.33% +0.46%] index_select perm_sorted : Elapsed 0.024 ms (2.394 ms / 100) B = [5, 4, 40, 20] (stride (4, 1, 400, 20)) A = [16, 4, 40, 20] (stride (20, 320, 1280, 1)) dim = 0 2.134 -> 2.140 ( +0.28%) [ +0.09% +0.00% +0.33% / +0.33% +0.28% +0.37%] index_select const : Elapsed 0.021 ms (2.136 ms / 100) 2.202 -> 2.201 ( -0.05%) [ +0.00% +0.05% +0.27% / -0.05% +0.09% +0.05%] index_select wrap : Elapsed 0.022 ms (2.202 ms / 100) 2.200 -> 2.197 ( -0.14%) [ +0.14% +0.05% +0.00% / -0.05% +0.09% -0.14%] index_select linear : Elapsed 0.022 ms (2.203 ms / 100) 2.199 -> 2.204 ( +0.23%) [ +0.09% +0.00% +0.18% / +0.27% +0.41% +0.23%] index_select reverse : Elapsed 0.022 ms (2.201 ms / 100) 2.127 -> 2.127 ( +0.00%) [ +0.00% +0.28% +0.19% / +0.00% +0.38% +0.38%] index_select skip64 : Elapsed 0.021 ms (2.127 ms / 100) 2.135 -> 2.135 ( +0.00%) [ +0.00% +0.09% +0.14% / +0.00% +0.33% +0.33%] index_select skip256 : Elapsed 0.021 ms (2.135 ms / 100) 2.203 -> 2.202 ( -0.05%) [ +0.00% +0.14% +0.00% / -0.05% +0.50% +0.41%] index_select spread : Elapsed 0.022 ms (2.203 ms / 100) 2.200 -> 2.207 ( +0.32%) [ +0.23% +0.36% +0.00% / +0.32% +0.59% +0.59%] index_select strided 3 : Elapsed 0.022 ms (2.205 ms / 100) 2.189 -> 2.196 ( +0.32%) [ +0.23% +0.14% +0.00% / +0.32% +0.55% +0.46%] index_select strided 5 : Elapsed 0.022 ms (2.194 ms / 100) 2.197 -> 2.194 ( -0.14%) [ +0.05% +0.05% +0.00% / -0.14% +0.27% +0.50%] index_select strided 7 : Elapsed 0.022 ms (2.198 ms / 100) 2.141 -> 2.146 ( +0.23%) [ +0.00% +0.00% +0.37% / +0.33% +0.23% +0.33%] index_select strided 8 : Elapsed 0.021 ms (2.141 ms / 100) 2.179 -> 2.186 ( +0.32%) [ +0.18% +0.00% +0.46% / +0.32% +0.60% +0.64%] index_select random : Elapsed 0.022 ms (2.183 ms / 100) 2.184 -> 2.185 ( +0.05%) [ +0.09% +0.00% +0.18% / +0.05% +0.50% +0.32%] index_select random_sorted : Elapsed 0.022 ms (2.186 ms / 100) 2.200 -> 2.201 ( +0.05%) [ +0.00% +0.09% +0.00% / +0.05% +0.36% +0.23%] index_select perm : Elapsed 0.022 ms (2.200 ms / 100) 2.195 -> 2.196 ( +0.05%) [ +0.27% +0.00% +0.14% / +0.05% +0.36% +0.46%] index_select perm_sorted : Elapsed 0.022 ms (2.201 ms / 100) B = [5, 4, 40, 20] (stride (1, 5, 400, 20)) A = [16, 4, 40, 20] (stride (20, 12800, 320, 1)) dim = 0 0.839 -> 0.839 ( +0.00%) [ +1.55% +0.00% +0.60% / +0.95% +0.36% +0.00%] index_select const : Elapsed 0.009 ms (0.852 ms / 100) 0.891 -> 0.864 ( -3.03%) [ +0.00% +0.67% +0.56% / +0.34% -1.91% -3.03%] index_select wrap : Elapsed 0.009 ms (0.891 ms / 100) 0.895 -> 0.872 ( -2.57%) [ +0.00% +0.00% +0.00% / -0.34% -2.57% -2.46%] index_select linear : Elapsed 0.009 ms (0.895 ms / 100) 0.881 -> 0.868 ( -1.48%) [ +0.00% +0.23% +0.23% / +0.00% -1.48% -1.48%] index_select reverse : Elapsed 0.009 ms (0.881 ms / 100) 0.850 -> 0.841 ( -1.06%) [ +0.12% +0.35% +0.00% / -0.71% -0.59% -1.06%] index_select skip64 : Elapsed 0.009 ms (0.851 ms / 100) 0.836 -> 0.839 ( +0.36%) [ +0.24% +0.00% +0.12% / +0.96% +0.48% +0.36%] index_select skip256 : Elapsed 0.008 ms (0.838 ms / 100) 0.888 -> 0.853 ( -3.94%) [ +0.56% +0.11% +0.00% / -0.34% -3.38% -3.94%] index_select spread : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.857 ( -4.03%) [ +0.22% +0.00% +0.78% / -0.11% -3.70% -4.03%] index_select strided 3 : Elapsed 0.009 ms (0.895 ms / 100) 0.893 -> 0.850 ( -4.82%) [ +0.11% +0.34% +0.00% / +0.22% -4.82% -4.70%] index_select strided 5 : Elapsed 0.009 ms (0.894 ms / 100) good 0.896 -> 0.848 ( -5.36%) [ +0.22% +0.11% +0.00% / -0.22% -5.25% -5.36%] index_select strided 7 : Elapsed 0.009 ms (0.898 ms / 100) 0.848 -> 0.842 ( -0.71%) [ +0.00% +0.12% +0.94% / +0.35% -0.59% -0.71%] index_select strided 8 : Elapsed 0.008 ms (0.848 ms / 100) 0.885 -> 0.844 ( -4.63%) [ +0.45% +0.23% +0.00% / +0.00% -4.63% -4.41%] index_select random : Elapsed 0.009 ms (0.889 ms / 100) 0.879 -> 0.853 ( -2.96%) [ +0.00% +0.11% +0.00% / +0.68% -2.96% -2.84%] index_select random_sorted : Elapsed 0.009 ms (0.879 ms / 100) 0.887 -> 0.854 ( -3.72%) [ +0.11% +0.00% +0.34% / +0.23% -3.72% -3.72%] index_select perm : Elapsed 0.009 ms (0.888 ms / 100) 0.886 -> 0.850 ( -4.06%) [ +0.00% +0.79% +0.34% / -0.11% -4.06% -3.84%] index_select perm_sorted : Elapsed 0.009 ms (0.886 ms / 100) B = [5, 4, 40, 20] (stride (40, 200, 1, 800)) A = [16, 4, 40, 20] (stride (20, 320, 1280, 1)) dim = 0 2.252 -> 2.251 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.62% +0.67%] index_select const : Elapsed 0.023 ms (2.252 ms / 100) 2.320 -> 2.321 ( +0.04%) [ +0.00% +0.04% +0.09% / +0.04% +0.22% +0.17%] index_select wrap : Elapsed 0.023 ms (2.320 ms / 100) 2.322 -> 2.320 ( -0.09%) [ +0.04% +0.00% +0.04% / +0.09% -0.09% -0.04%] index_select linear : Elapsed 0.023 ms (2.323 ms / 100) 2.325 -> 2.323 ( -0.09%) [ +0.00% +0.17% +0.00% / -0.09% +0.47% +0.17%] index_select reverse : Elapsed 0.023 ms (2.325 ms / 100) 2.250 -> 2.250 ( +0.00%) [ +0.22% +0.09% +0.00% / +0.00% +0.44% +0.40%] index_select skip64 : Elapsed 0.023 ms (2.255 ms / 100) 2.251 -> 2.253 ( +0.09%) [ +0.18% +0.00% +0.18% / +0.09% +0.71% +0.40%] index_select skip256 : Elapsed 0.023 ms (2.255 ms / 100) 2.320 -> 2.323 ( +0.13%) [ +0.13% +0.17% +0.00% / +0.13% +0.39% +0.43%] index_select spread : Elapsed 0.023 ms (2.323 ms / 100) 2.321 -> 2.324 ( +0.13%) [ +0.00% +0.22% +0.17% / +0.13% +0.56% +0.60%] index_select strided 3 : Elapsed 0.023 ms (2.321 ms / 100) 2.306 -> 2.311 ( +0.22%) [ +0.09% +0.30% +0.00% / +0.22% +0.35% +0.56%] index_select strided 5 : Elapsed 0.023 ms (2.308 ms / 100) 2.313 -> 2.316 ( +0.13%) [ +0.22% +0.00% +0.04% / +0.13% +0.22% +0.61%] index_select strided 7 : Elapsed 0.023 ms (2.318 ms / 100) 2.261 -> 2.261 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.49% +0.40%] index_select strided 8 : Elapsed 0.023 ms (2.261 ms / 100) 2.325 -> 2.321 ( -0.17%) [ +0.00% +0.17% +0.04% / -0.17% +0.13% +0.26%] index_select random : Elapsed 0.023 ms (2.325 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.00% +0.17% +0.04% / +0.09% +0.30% +0.04%] index_select random_sorted : Elapsed 0.023 ms (2.324 ms / 100) 2.317 -> 2.321 ( +0.17%) [ +0.00% +0.09% +0.22% / +0.17% +0.73% +0.39%] index_select perm : Elapsed 0.023 ms (2.317 ms / 100) 2.320 -> 2.325 ( +0.22%) [ +0.22% +0.00% +0.00% / +0.22% +0.39% +0.47%] index_select perm_sorted : Elapsed 0.023 ms (2.325 ms / 100) B = [5, 4, 40, 20] (stride (40, 200, 1, 800)) A = [16, 4, 40, 20] (stride (4, 1, 64, 2560)) dim = 0 2.487 -> 2.482 ( -0.20%) [ +0.04% +0.00% +0.08% / -0.20% +0.04% +0.08%] index_select const : Elapsed 0.025 ms (2.488 ms / 100) 2.480 -> 2.479 ( -0.04%) [ +0.00% +0.04% +0.24% / -0.04% +0.24% +0.28%] index_select wrap : Elapsed 0.025 ms (2.480 ms / 100) 2.482 -> 2.483 ( +0.04%) [ +0.00% +0.16% +0.24% / +0.04% +0.08% +0.44%] index_select linear : Elapsed 0.025 ms (2.482 ms / 100) 2.482 -> 2.486 ( +0.16%) [ +0.16% +0.00% +0.20% / +0.16% +0.40% +0.28%] index_select reverse : Elapsed 0.025 ms (2.486 ms / 100) 2.481 -> 2.482 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.04% +0.36% +0.36%] index_select skip64 : Elapsed 0.025 ms (2.484 ms / 100) 2.482 -> 2.480 ( -0.08%) [ +0.08% +0.28% +0.00% / -0.08% +0.24% +0.12%] index_select skip256 : Elapsed 0.025 ms (2.484 ms / 100) 2.475 -> 2.480 ( +0.20%) [ +0.28% +0.24% +0.00% / +0.20% +0.24% +0.28%] index_select spread : Elapsed 0.025 ms (2.482 ms / 100) 2.470 -> 2.477 ( +0.28%) [ +0.40% +0.28% +0.00% / +0.28% +0.57% +0.49%] index_select strided 3 : Elapsed 0.025 ms (2.480 ms / 100) 2.481 -> 2.484 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.20% +0.24%] index_select strided 5 : Elapsed 0.025 ms (2.481 ms / 100) 2.471 -> 2.478 ( +0.28%) [ +0.00% +0.28% +0.20% / +0.28% +0.61% +0.40%] index_select strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.467 -> 2.469 ( +0.08%) [ +0.00% +0.16% +0.24% / +0.08% +0.57% +0.57%] index_select strided 8 : Elapsed 0.025 ms (2.467 ms / 100) 2.469 -> 2.476 ( +0.28%) [ +0.49% +0.12% +0.00% / +0.28% +0.57% +0.77%] index_select random : Elapsed 0.025 ms (2.481 ms / 100) 2.475 -> 2.478 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.28% +0.36%] index_select random_sorted : Elapsed 0.025 ms (2.475 ms / 100) 2.475 -> 2.474 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% -0.04% +0.04%] index_select perm : Elapsed 0.025 ms (2.476 ms / 100) 2.468 -> 2.473 ( +0.20%) [ +0.61% +0.00% +0.41% / +0.20% +0.65% +0.77%] index_select perm_sorted : Elapsed 0.025 ms (2.483 ms / 100) out_shape = [16, 5, 40, 20] in_shape = [16, 4, 40, 20] idx_dim = 1 B = [16, 5, 40, 20] (stride (4000, 40, 1, 200)) A = [16, 4, 40, 20] (stride (3200, 40, 1, 160)) dim = 1 5.911 -> 5.902 ( -0.15%) [ +0.03% +0.00% +0.14% / +0.07% -0.12% -0.15%] index_add_ linear : Elapsed 0.059 ms (5.913 ms / 100) 5.832 -> 5.812 ( -0.34%) [ +0.00% +0.05% +0.05% / -0.07% -0.34% -0.34%] index_copy_ linear : Elapsed 0.058 ms (5.832 ms / 100) 5.906 -> 5.903 ( -0.05%) [ +0.15% +0.17% +0.00% / +0.30% -0.02% -0.05%] index_add_ reverse : Elapsed 0.059 ms (5.915 ms / 100) 5.825 -> 5.816 ( -0.15%) [ +0.00% +0.10% +0.19% / +0.14% -0.15% -0.10%] index_copy_ reverse : Elapsed 0.058 ms (5.825 ms / 100) 5.905 -> 5.905 ( +0.00%) [ +0.17% +0.00% +0.25% / +0.32% +0.00% +0.07%] index_add_ spread : Elapsed 0.059 ms (5.915 ms / 100) 5.831 -> 5.815 ( -0.27%) [ +0.05% +0.00% +0.00% / +0.03% -0.27% -0.19%] index_copy_ spread : Elapsed 0.058 ms (5.834 ms / 100) 5.920 -> 5.915 ( -0.08%) [ +0.02% +0.00% +0.15% / +0.12% +0.00% -0.08%] index_add_ strided 3 : Elapsed 0.059 ms (5.921 ms / 100) 5.832 -> 5.831 ( -0.02%) [ +0.14% +0.15% +0.00% / +0.21% +0.02% -0.02%] index_copy_ strided 3 : Elapsed 0.058 ms (5.840 ms / 100) 5.926 -> 5.915 ( -0.19%) [ +0.00% +0.02% +0.07% / +0.02% -0.19% -0.02%] index_add_ perm : Elapsed 0.059 ms (5.926 ms / 100) 5.840 -> 5.831 ( -0.15%) [ +0.00% +0.09% +0.02% / -0.05% -0.10% -0.15%] index_copy_ perm : Elapsed 0.058 ms (5.840 ms / 100) 5.921 -> 5.907 ( -0.24%) [ +0.00% +0.05% +0.03% / -0.14% -0.10% -0.24%] index_add_ perm_sorted : Elapsed 0.059 ms (5.921 ms / 100) 5.825 -> 5.823 ( -0.03%) [ +0.19% +0.00% +0.10% / +0.14% -0.02% -0.03%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.836 ms / 100) 6.146 -> 6.141 ( -0.08%) [ +0.00% +0.00% +0.21% / +0.10% -0.02% -0.08%] index_select const : Elapsed 0.061 ms (6.146 ms / 100) 6.218 -> 6.203 ( -0.24%) [ +0.24% +0.00% +0.23% / +0.23% -0.21% -0.24%] index_select wrap : Elapsed 0.062 ms (6.233 ms / 100) 6.208 -> 6.196 ( -0.19%) [ +0.19% +0.00% +0.23% / +0.16% -0.14% -0.19%] index_select linear : Elapsed 0.062 ms (6.220 ms / 100) 6.211 -> 6.196 ( -0.24%) [ +0.02% +0.03% +0.00% / -0.08% -0.24% -0.24%] index_select reverse : Elapsed 0.062 ms (6.212 ms / 100) 6.151 -> 6.142 ( -0.15%) [ +0.15% +0.03% +0.00% / +0.18% -0.15% -0.10%] index_select skip64 : Elapsed 0.062 ms (6.160 ms / 100) 6.150 -> 6.136 ( -0.23%) [ +0.03% +0.00% +0.10% / +0.07% -0.23% -0.20%] index_select skip256 : Elapsed 0.062 ms (6.152 ms / 100) 6.208 -> 6.187 ( -0.34%) [ +0.02% +0.00% +0.03% / +0.08% -0.13% -0.34%] index_select spread : Elapsed 0.062 ms (6.209 ms / 100) 6.220 -> 6.206 ( -0.23%) [ +0.00% +0.00% +0.05% / +0.13% -0.21% -0.23%] index_select strided 3 : Elapsed 0.062 ms (6.220 ms / 100) 6.201 -> 6.188 ( -0.21%) [ +0.21% +0.00% +0.15% / +0.06% -0.21% -0.13%] index_select random : Elapsed 0.062 ms (6.214 ms / 100) 6.199 -> 6.174 ( -0.40%) [ +0.08% +0.11% +0.00% / +0.16% -0.40% -0.29%] index_select random_sorted : Elapsed 0.062 ms (6.204 ms / 100) B = [16, 5, 40, 20] (stride (800, 12800, 20, 1)) A = [16, 4, 40, 20] (stride (80, 1, 1280, 4)) dim = 1 5.607 -> 5.616 ( +0.16%) [ +0.07% +0.00% +0.16% / +0.16% +0.50% +0.54%] index_add_ linear : Elapsed 0.056 ms (5.611 ms / 100) 5.547 -> 5.552 ( +0.09%) [ +0.25% +0.00% +0.18% / +0.09% +0.47% +0.56%] index_copy_ linear : Elapsed 0.056 ms (5.561 ms / 100) 5.600 -> 5.615 ( +0.27%) [ +0.05% +0.00% +0.16% / +0.27% +0.55% +0.59%] index_add_ reverse : Elapsed 0.056 ms (5.603 ms / 100) 5.542 -> 5.542 ( +0.00%) [ +0.09% +0.11% +0.00% / +0.00% +0.67% +0.81%] index_copy_ reverse : Elapsed 0.055 ms (5.547 ms / 100) 5.608 -> 5.613 ( +0.09%) [ +0.12% +0.00% +0.18% / +0.09% +0.53% +0.37%] index_add_ spread : Elapsed 0.056 ms (5.615 ms / 100) 5.545 -> 5.554 ( +0.16%) [ +0.13% +0.00% +0.05% / +0.16% +0.63% +0.67%] index_copy_ spread : Elapsed 0.056 ms (5.552 ms / 100) 5.619 -> 5.624 ( +0.09%) [ +0.11% +0.00% +0.12% / +0.09% +0.64% +0.66%] index_add_ strided 3 : Elapsed 0.056 ms (5.625 ms / 100) 5.557 -> 5.558 ( +0.02%) [ +0.00% +0.07% +0.18% / +0.02% +0.85% +0.70%] index_copy_ strided 3 : Elapsed 0.056 ms (5.557 ms / 100) 5.615 -> 5.628 ( +0.23%) [ +0.25% +0.00% +0.36% / +0.23% +0.55% +0.68%] index_add_ perm : Elapsed 0.056 ms (5.629 ms / 100) 5.560 -> 5.571 ( +0.20%) [ +0.05% +0.11% +0.00% / +0.20% +0.68% +0.61%] index_copy_ perm : Elapsed 0.056 ms (5.563 ms / 100) 5.620 -> 5.622 ( +0.04%) [ +0.05% +0.00% +0.14% / +0.04% +0.55% +0.50%] index_add_ perm_sorted : Elapsed 0.056 ms (5.623 ms / 100) 5.553 -> 5.563 ( +0.18%) [ +0.14% +0.00% +0.18% / +0.18% +0.79% +0.70%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.561 ms / 100) 5.891 -> 5.896 ( +0.08%) [ +0.00% +0.10% +0.03% / +0.08% +0.63% +0.78%] index_select const : Elapsed 0.059 ms (5.891 ms / 100) 5.888 -> 5.891 ( +0.05%) [ +0.00% +0.05% +0.19% / +0.05% +0.83% +0.75%] index_select wrap : Elapsed 0.059 ms (5.888 ms / 100) 5.883 -> 5.891 ( +0.14%) [ +0.03% +0.00% +0.20% / +0.14% +0.87% +0.83%] index_select linear : Elapsed 0.059 ms (5.885 ms / 100) 5.892 -> 5.899 ( +0.12%) [ +0.00% +0.05% +0.29% / +0.12% +0.83% +0.78%] index_select reverse : Elapsed 0.059 ms (5.892 ms / 100) 5.893 -> 5.898 ( +0.08%) [ +0.00% +0.05% +0.17% / +0.08% +0.83% +0.63%] index_select skip64 : Elapsed 0.059 ms (5.893 ms / 100) 5.883 -> 5.896 ( +0.22%) [ +0.00% +0.10% +0.19% / +0.22% +0.82% +0.82%] index_select skip256 : Elapsed 0.059 ms (5.883 ms / 100) 5.892 -> 5.907 ( +0.25%) [ +0.00% +0.10% +0.19% / +0.25% +0.80% +0.73%] index_select spread : Elapsed 0.059 ms (5.892 ms / 100) 5.889 -> 5.900 ( +0.19%) [ +0.00% +0.08% +0.24% / +0.19% +0.83% +0.83%] index_select strided 3 : Elapsed 0.059 ms (5.889 ms / 100) 5.893 -> 5.899 ( +0.10%) [ +0.00% +0.02% +0.12% / +0.10% +0.76% +0.87%] index_select random : Elapsed 0.059 ms (5.893 ms / 100) 5.894 -> 5.901 ( +0.12%) [ +0.00% +0.05% +0.08% / +0.12% +0.75% +0.63%] index_select random_sorted : Elapsed 0.059 ms (5.894 ms / 100) B = [16, 5, 40, 20] (stride (800, 12800, 20, 1)) A = [16, 4, 40, 20] (stride (4, 1, 1280, 64)) dim = 1 3.401 -> 3.363 ( -1.12%) [ +0.15% +0.06% +0.00% / +0.06% -1.12% -1.00%] index_add_ linear : Elapsed 0.034 ms (3.406 ms / 100) 3.349 -> 3.324 ( -0.75%) [ +0.00% +0.24% +0.36% / +0.12% -0.75% -0.75%] index_copy_ linear : Elapsed 0.033 ms (3.349 ms / 100) 3.398 -> 3.368 ( -0.88%) [ +0.03% +0.00% +0.03% / -0.09% -0.88% -0.88%] index_add_ reverse : Elapsed 0.034 ms (3.399 ms / 100) 3.346 -> 3.314 ( -0.96%) [ +0.03% +0.00% +0.12% / +0.00% -0.57% -0.96%] index_copy_ reverse : Elapsed 0.033 ms (3.347 ms / 100) 3.401 -> 3.350 ( -1.50%) [ +0.00% +0.06% +0.09% / +0.12% -1.50% -1.41%] index_add_ spread : Elapsed 0.034 ms (3.401 ms / 100) 3.348 -> 3.314 ( -1.02%) [ +0.00% +0.09% +0.12% / +0.18% -1.02% -0.63%] index_copy_ spread : Elapsed 0.033 ms (3.348 ms / 100) 3.393 -> 3.362 ( -0.91%) [ +0.06% +0.03% +0.00% / -0.09% -0.74% -0.91%] index_add_ strided 3 : Elapsed 0.034 ms (3.395 ms / 100) 3.343 -> 3.310 ( -0.99%) [ +0.00% +0.03% +0.18% / -0.09% -0.99% -0.69%] index_copy_ strided 3 : Elapsed 0.033 ms (3.343 ms / 100) 3.396 -> 3.365 ( -0.91%) [ +0.09% +0.00% +0.03% / +0.06% -0.77% -0.91%] index_add_ perm : Elapsed 0.034 ms (3.399 ms / 100) 3.345 -> 3.316 ( -0.87%) [ +0.09% +0.27% +0.00% / +0.12% -0.72% -0.87%] index_copy_ perm : Elapsed 0.033 ms (3.348 ms / 100) 3.404 -> 3.363 ( -1.20%) [ +0.15% +0.00% +0.18% / +0.26% -1.20% -0.97%] index_add_ perm_sorted : Elapsed 0.034 ms (3.409 ms / 100) 3.356 -> 3.321 ( -1.04%) [ +0.36% +0.00% +0.42% / +0.27% -1.04% -0.89%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.368 ms / 100) 3.458 -> 3.438 ( -0.58%) [ +0.00% +0.20% +0.06% / +0.20% -0.58% -0.46%] index_select const : Elapsed 0.035 ms (3.458 ms / 100) 3.454 -> 3.425 ( -0.84%) [ +0.00% +0.17% +0.14% / +0.23% -0.84% -0.64%] index_select wrap : Elapsed 0.035 ms (3.454 ms / 100) 3.453 -> 3.426 ( -0.78%) [ +0.23% +0.17% +0.00% / +0.17% -0.78% -0.72%] index_select linear : Elapsed 0.035 ms (3.461 ms / 100) 3.459 -> 3.418 ( -1.19%) [ +0.00% +0.06% +0.12% / +0.14% -1.16% -1.19%] index_select reverse : Elapsed 0.035 ms (3.459 ms / 100) 3.457 -> 3.416 ( -1.19%) [ +0.29% +0.00% +0.20% / +0.29% -0.95% -1.19%] index_select skip64 : Elapsed 0.035 ms (3.467 ms / 100) 3.482 -> 3.418 ( -1.84%) [ +0.00% +0.23% +0.09% / +0.00% -1.72% -1.84%] index_select skip256 : Elapsed 0.035 ms (3.482 ms / 100) 3.460 -> 3.443 ( -0.49%) [ +0.00% +0.20% +0.12% / +0.20% -0.49% -0.46%] index_select spread : Elapsed 0.035 ms (3.460 ms / 100) 3.461 -> 3.437 ( -0.69%) [ +0.26% +0.00% +0.17% / +0.23% -0.69% -0.55%] index_select strided 3 : Elapsed 0.035 ms (3.470 ms / 100) 3.464 -> 3.443 ( -0.61%) [ +0.00% +0.06% +0.09% / +0.14% -0.61% -0.61%] index_select random : Elapsed 0.035 ms (3.464 ms / 100) 3.461 -> 3.439 ( -0.64%) [ +0.00% +0.12% +0.23% / +0.09% -0.52% -0.64%] index_select random_sorted : Elapsed 0.035 ms (3.461 ms / 100) B = [16, 5, 40, 20] (stride (1, 12800, 320, 16)) A = [16, 4, 40, 20] (stride (160, 40, 1, 2560)) dim = 1 5.631 -> 5.627 ( -0.07%) [ +0.02% +0.00% +0.05% / +0.09% -0.07% +0.11%] index_add_ linear : Elapsed 0.056 ms (5.632 ms / 100) 5.597 -> 5.584 ( -0.23%) [ +0.13% +0.00% +0.04% / -0.05% -0.21% -0.23%] index_copy_ linear : Elapsed 0.056 ms (5.604 ms / 100) 5.620 -> 5.621 ( +0.02%) [ +0.05% +0.00% +0.30% / +0.07% +0.11% +0.02%] index_add_ reverse : Elapsed 0.056 ms (5.623 ms / 100) 5.587 -> 5.589 ( +0.04%) [ +0.00% +0.02% +0.20% / +0.07% +0.04% +0.04%] index_copy_ reverse : Elapsed 0.056 ms (5.587 ms / 100) 5.633 -> 5.619 ( -0.25%) [ +0.04% +0.00% +0.04% / -0.05% -0.02% -0.25%] index_add_ spread : Elapsed 0.056 ms (5.635 ms / 100) 5.594 -> 5.587 ( -0.13%) [ +0.00% +0.05% +0.20% / +0.09% -0.11% -0.13%] index_copy_ spread : Elapsed 0.056 ms (5.594 ms / 100) 5.621 -> 5.630 ( +0.16%) [ +0.00% +0.00% +0.05% / +0.16% +0.66% +0.68%] index_add_ strided 3 : Elapsed 0.056 ms (5.621 ms / 100) 5.584 -> 5.590 ( +0.11%) [ +0.18% +0.00% +0.13% / +0.11% +0.56% +0.54%] index_copy_ strided 3 : Elapsed 0.056 ms (5.594 ms / 100) 5.621 -> 5.625 ( +0.07%) [ +0.12% +0.00% +0.11% / +0.09% +0.07% +0.11%] index_add_ perm : Elapsed 0.056 ms (5.628 ms / 100) 5.579 -> 5.582 ( +0.05%) [ +0.16% +0.00% +0.09% / +0.05% +0.09% +0.13%] index_copy_ perm : Elapsed 0.056 ms (5.588 ms / 100) 5.626 -> 5.620 ( -0.11%) [ +0.09% +0.00% +0.12% / +0.18% -0.09% -0.11%] index_add_ perm_sorted : Elapsed 0.056 ms (5.631 ms / 100) 5.588 -> 5.586 ( -0.04%) [ +0.00% +0.16% +0.38% / +0.16% -0.04% -0.04%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.588 ms / 100) 5.855 -> 5.861 ( +0.10%) [ +0.00% +0.03% +0.07% / +0.10% +0.15% +0.36%] index_select const : Elapsed 0.059 ms (5.855 ms / 100) 5.931 -> 5.942 ( +0.19%) [ +0.17% +0.00% +0.25% / +0.24% +0.22% +0.19%] index_select wrap : Elapsed 0.059 ms (5.941 ms / 100) 5.926 -> 5.932 ( +0.10%) [ +0.19% +0.00% +0.15% / +0.19% +0.10% +0.15%] index_select linear : Elapsed 0.059 ms (5.937 ms / 100) 5.922 -> 5.927 ( +0.08%) [ +0.00% +0.12% +0.25% / +0.08% +0.22% +0.29%] index_select reverse : Elapsed 0.059 ms (5.922 ms / 100) 5.861 -> 5.866 ( +0.09%) [ +0.00% +0.00% +0.12% / +0.09% +0.19% +0.17%] index_select skip64 : Elapsed 0.059 ms (5.861 ms / 100) 5.860 -> 5.858 ( -0.03%) [ +0.02% +0.05% +0.00% / -0.03% +0.12% +0.15%] index_select skip256 : Elapsed 0.059 ms (5.861 ms / 100) 5.924 -> 5.924 ( +0.00%) [ +0.00% +0.07% +0.22% / +0.00% +0.32% +0.20%] index_select spread : Elapsed 0.059 ms (5.924 ms / 100) 5.923 -> 5.934 ( +0.19%) [ +0.07% +0.00% +0.00% / +0.19% +0.49% +0.35%] index_select strided 3 : Elapsed 0.059 ms (5.927 ms / 100) 5.929 -> 5.942 ( +0.22%) [ +0.00% +0.05% +0.24% / +0.25% +0.30% +0.22%] index_select random : Elapsed 0.059 ms (5.929 ms / 100) 5.924 -> 5.936 ( +0.20%) [ +0.10% +0.00% +0.12% / +0.24% +0.20% +0.27%] index_select random_sorted : Elapsed 0.059 ms (5.930 ms / 100) B = [16, 5, 40, 20] (stride (40, 12800, 1, 640)) A = [16, 4, 40, 20] (stride (3200, 20, 80, 1)) dim = 1 5.921 -> 5.896 ( -0.42%) [ +0.00% +0.02% +0.17% / +0.12% -0.42% -0.42%] index_add_ linear : Elapsed 0.059 ms (5.921 ms / 100) 5.853 -> 5.832 ( -0.36%) [ +0.07% +0.00% +0.31% / +0.09% -0.32% -0.36%] index_copy_ linear : Elapsed 0.059 ms (5.857 ms / 100) 5.912 -> 5.898 ( -0.24%) [ +0.00% +0.19% +0.10% / +0.12% -0.24% -0.15%] index_add_ reverse : Elapsed 0.059 ms (5.912 ms / 100) 5.852 -> 5.831 ( -0.36%) [ +0.03% +0.00% +0.09% / +0.05% -0.29% -0.36%] index_copy_ reverse : Elapsed 0.059 ms (5.854 ms / 100) 5.918 -> 5.893 ( -0.42%) [ +0.07% +0.00% +0.00% / +0.00% -0.35% -0.42%] index_add_ spread : Elapsed 0.059 ms (5.922 ms / 100) 5.849 -> 5.828 ( -0.36%) [ +0.09% +0.00% +0.21% / +0.21% -0.36% -0.27%] index_copy_ spread : Elapsed 0.059 ms (5.854 ms / 100) 5.896 -> 5.890 ( -0.10%) [ +0.00% +0.12% +0.03% / +0.05% -0.10% +0.02%] index_add_ strided 3 : Elapsed 0.059 ms (5.896 ms / 100) 5.836 -> 5.825 ( -0.19%) [ +0.03% +0.00% +0.00% / +0.03% -0.17% -0.19%] index_copy_ strided 3 : Elapsed 0.058 ms (5.838 ms / 100) 5.914 -> 5.888 ( -0.44%) [ +0.00% +0.00% +0.25% / +0.20% -0.19% -0.44%] index_add_ perm : Elapsed 0.059 ms (5.914 ms / 100) 5.845 -> 5.832 ( -0.22%) [ +0.12% +0.00% +0.14% / +0.05% -0.17% -0.22%] index_copy_ perm : Elapsed 0.059 ms (5.852 ms / 100) 5.916 -> 5.895 ( -0.35%) [ +0.05% +0.00% +0.17% / +0.10% -0.30% -0.35%] index_add_ perm_sorted : Elapsed 0.059 ms (5.919 ms / 100) 5.845 -> 5.828 ( -0.29%) [ +0.15% +0.00% +0.31% / +0.15% -0.29% -0.27%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.854 ms / 100) 6.174 -> 6.147 ( -0.44%) [ +0.03% +0.00% +0.03% / +0.10% -0.36% -0.44%] index_select const : Elapsed 0.062 ms (6.176 ms / 100) 6.235 -> 6.210 ( -0.40%) [ +0.13% +0.00% +0.21% / +0.18% -0.40% -0.35%] index_select wrap : Elapsed 0.062 ms (6.243 ms / 100) 6.216 -> 6.190 ( -0.42%) [ +0.00% +0.00% +0.18% / +0.10% -0.32% -0.42%] index_select linear : Elapsed 0.062 ms (6.216 ms / 100) 6.227 -> 6.195 ( -0.51%) [ +0.19% +0.08% +0.00% / -0.02% -0.51% -0.40%] index_select reverse : Elapsed 0.062 ms (6.239 ms / 100) 6.166 -> 6.147 ( -0.31%) [ +0.00% +0.11% +0.13% / +0.02% -0.28% -0.31%] index_select skip64 : Elapsed 0.062 ms (6.166 ms / 100) 6.176 -> 6.149 ( -0.44%) [ +0.03% +0.03% +0.00% / +0.18% -0.42% -0.44%] index_select skip256 : Elapsed 0.062 ms (6.178 ms / 100) 6.240 -> 6.212 ( -0.45%) [ +0.02% +0.00% +0.03% / +0.10% -0.42% -0.45%] index_select spread : Elapsed 0.062 ms (6.241 ms / 100) 6.226 -> 6.197 ( -0.47%) [ +0.08% +0.02% +0.00% / +0.00% -0.47% -0.42%] index_select strided 3 : Elapsed 0.062 ms (6.231 ms / 100) 6.211 -> 6.178 ( -0.53%) [ +0.02% +0.00% +0.03% / +0.13% -0.45% -0.53%] index_select random : Elapsed 0.062 ms (6.212 ms / 100) 6.203 -> 6.178 ( -0.40%) [ +0.13% +0.00% +0.10% / +0.16% -0.40% -0.35%] index_select random_sorted : Elapsed 0.062 ms (6.211 ms / 100) B = [16, 5, 40, 20] (stride (1, 320, 1600, 16)) A = [16, 4, 40, 20] (stride (40, 640, 1, 2560)) dim = 1 5.812 -> 5.821 ( +0.15%) [ +0.00% +0.03% +0.10% / +0.15% +0.71% +0.50%] index_add_ linear : Elapsed 0.058 ms (5.812 ms / 100) 5.775 -> 5.788 ( +0.23%) [ +0.03% +0.00% +0.07% / +0.23% +0.48% +0.48%] index_copy_ linear : Elapsed 0.058 ms (5.777 ms / 100) 5.819 -> 5.827 ( +0.14%) [ +0.00% +0.12% +0.15% / +0.14% +0.46% +0.34%] index_add_ reverse : Elapsed 0.058 ms (5.819 ms / 100) 5.780 -> 5.786 ( +0.10%) [ +0.09% +0.00% +0.00% / +0.10% +0.26% +0.35%] index_copy_ reverse : Elapsed 0.058 ms (5.785 ms / 100) 5.818 -> 5.831 ( +0.22%) [ +0.00% +0.07% +0.03% / +0.22% +0.45% +0.52%] index_add_ spread : Elapsed 0.058 ms (5.818 ms / 100) 5.780 -> 5.783 ( +0.05%) [ +0.00% +0.10% +0.07% / +0.05% +0.43% +0.31%] index_copy_ spread : Elapsed 0.058 ms (5.780 ms / 100) 5.822 -> 5.829 ( +0.12%) [ +0.07% +0.05% +0.00% / +0.12% +0.17% +0.19%] index_add_ strided 3 : Elapsed 0.058 ms (5.826 ms / 100) 5.781 -> 5.790 ( +0.16%) [ +0.00% +0.09% +0.24% / +0.26% +0.21% +0.16%] index_copy_ strided 3 : Elapsed 0.058 ms (5.781 ms / 100) 5.819 -> 5.831 ( +0.21%) [ +0.12% +0.00% +0.31% / +0.21% +0.53% +0.31%] index_add_ perm : Elapsed 0.058 ms (5.826 ms / 100) 5.785 -> 5.786 ( +0.02%) [ +0.00% +0.00% +0.09% / +0.02% +0.33% +0.26%] index_copy_ perm : Elapsed 0.058 ms (5.785 ms / 100) 5.811 -> 5.825 ( +0.24%) [ +0.00% +0.09% +0.19% / +0.24% +0.69% +0.41%] index_add_ perm_sorted : Elapsed 0.058 ms (5.811 ms / 100) 5.769 -> 5.774 ( +0.09%) [ +0.00% +0.00% +0.28% / +0.09% +0.57% +0.59%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.769 ms / 100) 6.107 -> 6.113 ( +0.10%) [ +0.13% +0.00% +0.20% / +0.29% +0.10% +0.18%] index_select const : Elapsed 0.061 ms (6.115 ms / 100) 6.166 -> 6.173 ( +0.11%) [ +0.11% +0.00% +0.28% / +0.11% +0.19% +0.23%] index_select wrap : Elapsed 0.062 ms (6.173 ms / 100) 6.150 -> 6.166 ( +0.26%) [ +0.00% +0.05% +0.16% / +0.26% +0.28% +0.47%] index_select linear : Elapsed 0.062 ms (6.150 ms / 100) 6.150 -> 6.154 ( +0.07%) [ +0.00% +0.08% +0.15% / +0.07% +0.31% +0.28%] index_select reverse : Elapsed 0.062 ms (6.150 ms / 100) 6.115 -> 6.109 ( -0.10%) [ +0.08% +0.00% +0.02% / +0.10% +0.08% -0.10%] index_select skip64 : Elapsed 0.061 ms (6.120 ms / 100) 6.111 -> 6.114 ( +0.05%) [ +0.11% +0.00% +0.11% / +0.11% +0.16% +0.05%] index_select skip256 : Elapsed 0.061 ms (6.118 ms / 100) 6.143 -> 6.174 ( +0.50%) [ +0.00% +0.15% +0.26% / +0.50% +0.93% +0.70%] index_select spread : Elapsed 0.061 ms (6.143 ms / 100) 6.166 -> 6.158 ( -0.13%) [ +0.15% +0.00% +0.05% / -0.13% +0.26% +0.16%] index_select strided 3 : Elapsed 0.062 ms (6.175 ms / 100) 6.136 -> 6.150 ( +0.23%) [ +0.00% +0.05% +0.15% / +0.23% +0.31% +0.34%] index_select random : Elapsed 0.061 ms (6.136 ms / 100) 6.116 -> 6.123 ( +0.11%) [ +0.00% +0.07% +0.15% / +0.11% +0.70% +0.65%] index_select random_sorted : Elapsed 0.061 ms (6.116 ms / 100) B = [16, 5, 40, 20] (stride (1, 16, 1600, 80)) A = [16, 4, 40, 20] (stride (1, 12800, 320, 16)) dim = 1 3.448 -> 3.434 ( -0.41%) [ +0.00% +0.17% +0.23% / +0.15% -0.20% -0.41%] index_add_ linear : Elapsed 0.034 ms (3.448 ms / 100) 3.380 -> 3.377 ( -0.09%) [ +0.18% +0.00% +0.03% / +0.06% +0.12% -0.09%] index_copy_ linear : Elapsed 0.034 ms (3.386 ms / 100) 3.450 -> 3.443 ( -0.20%) [ +0.12% +0.12% +0.00% / +0.14% -0.20% -0.17%] index_add_ reverse : Elapsed 0.035 ms (3.454 ms / 100) 3.376 -> 3.372 ( -0.12%) [ +0.21% +0.12% +0.00% / +0.36% +0.15% -0.12%] index_copy_ reverse : Elapsed 0.034 ms (3.383 ms / 100) 3.447 -> 3.431 ( -0.46%) [ +0.32% +0.00% +0.00% / +0.06% -0.32% -0.46%] index_add_ spread : Elapsed 0.035 ms (3.458 ms / 100) 3.378 -> 3.378 ( +0.00%) [ +0.12% +0.06% +0.00% / +0.00% +0.15% +0.09%] index_copy_ spread : Elapsed 0.034 ms (3.382 ms / 100) 3.454 -> 3.422 ( -0.93%) [ +0.20% +0.26% +0.00% / +0.35% -0.69% -0.93%] index_add_ strided 3 : Elapsed 0.035 ms (3.461 ms / 100) 3.387 -> 3.378 ( -0.27%) [ +0.00% +0.21% +0.32% / +0.12% -0.18% -0.27%] index_copy_ strided 3 : Elapsed 0.034 ms (3.387 ms / 100) 3.450 -> 3.434 ( -0.46%) [ +0.00% +0.09% +0.12% / +0.14% -0.12% -0.46%] index_add_ perm : Elapsed 0.035 ms (3.450 ms / 100) 3.384 -> 3.373 ( -0.33%) [ +0.00% +0.18% +0.15% / +0.00% -0.33% +0.06%] index_copy_ perm : Elapsed 0.034 ms (3.384 ms / 100) 3.450 -> 3.434 ( -0.46%) [ +0.29% +0.26% +0.00% / +0.26% -0.43% -0.46%] index_add_ perm_sorted : Elapsed 0.035 ms (3.460 ms / 100) 3.406 -> 3.381 ( -0.73%) [ +0.09% +0.03% +0.00% / +0.15% -0.73% -0.68%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.409 ms / 100) 3.514 -> 3.518 ( +0.11%) [ +0.00% +0.14% +0.11% / +0.11% +0.34% +0.51%] index_select const : Elapsed 0.035 ms (3.514 ms / 100) 3.530 -> 3.528 ( -0.06%) [ +0.20% +0.00% +0.20% / +0.20% -0.06% +0.17%] index_select wrap : Elapsed 0.035 ms (3.537 ms / 100) 3.531 -> 3.534 ( +0.08%) [ +0.17% +0.14% +0.00% / +0.11% +0.20% +0.08%] index_select linear : Elapsed 0.035 ms (3.537 ms / 100) 3.524 -> 3.519 ( -0.14%) [ +0.06% +0.11% +0.00% / +0.23% -0.14% -0.06%] index_select reverse : Elapsed 0.035 ms (3.526 ms / 100) 3.532 -> 3.533 ( +0.03%) [ +0.20% +0.00% +0.23% / +0.14% +0.08% +0.03%] index_select skip64 : Elapsed 0.035 ms (3.539 ms / 100) 3.545 -> 3.519 ( -0.73%) [ +0.25% +0.17% +0.00% / +0.31% -0.62% -0.73%] index_select skip256 : Elapsed 0.036 ms (3.554 ms / 100) 3.542 -> 3.534 ( -0.23%) [ +0.00% +0.06% +0.00% / -0.14% -0.23% -0.08%] index_select spread : Elapsed 0.035 ms (3.542 ms / 100) 3.550 -> 3.538 ( -0.34%) [ +0.00% +0.06% +0.00% / -0.17% -0.17% -0.34%] index_select strided 3 : Elapsed 0.035 ms (3.550 ms / 100) 3.545 -> 3.541 ( -0.11%) [ +0.23% +0.00% +0.11% / +0.17% -0.08% -0.11%] index_select random : Elapsed 0.036 ms (3.553 ms / 100) 3.552 -> 3.536 ( -0.45%) [ +0.03% +0.00% +0.11% / +0.06% -0.45% -0.25%] index_select random_sorted : Elapsed 0.036 ms (3.553 ms / 100) B = [16, 5, 40, 20] (stride (1, 16, 1600, 80)) A = [16, 4, 40, 20] (stride (80, 20, 1280, 1)) dim = 1 5.653 -> 5.672 ( +0.34%) [ +0.34% +0.00% +0.23% / +0.34% +0.46% +0.41%] index_add_ linear : Elapsed 0.057 ms (5.672 ms / 100) 5.608 -> 5.613 ( +0.09%) [ +0.11% +0.00% +0.16% / +0.09% +0.21% +0.30%] index_copy_ linear : Elapsed 0.056 ms (5.614 ms / 100) 5.665 -> 5.668 ( +0.05%) [ +0.04% +0.02% +0.00% / +0.05% +0.26% +0.35%] index_add_ reverse : Elapsed 0.057 ms (5.667 ms / 100) 5.607 -> 5.617 ( +0.18%) [ +0.20% +0.00% +0.09% / +0.18% +0.27% +0.36%] index_copy_ reverse : Elapsed 0.056 ms (5.618 ms / 100) 5.666 -> 5.663 ( -0.05%) [ +0.00% +0.04% +0.00% / -0.05% +0.21% +0.11%] index_add_ spread : Elapsed 0.057 ms (5.666 ms / 100) 5.607 -> 5.608 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.27% +0.32%] index_copy_ spread : Elapsed 0.056 ms (5.607 ms / 100) 5.687 -> 5.693 ( +0.11%) [ +0.00% +0.19% +0.16% / +0.26% +0.28% +0.11%] index_add_ strided 3 : Elapsed 0.057 ms (5.687 ms / 100) 5.631 -> 5.635 ( +0.07%) [ +0.00% +0.11% +0.00% / +0.18% +0.07% +0.25%] index_copy_ strided 3 : Elapsed 0.056 ms (5.631 ms / 100) 5.685 -> 5.685 ( +0.00%) [ +0.00% +0.02% +0.05% / +0.14% +0.14% +0.00%] index_add_ perm : Elapsed 0.057 ms (5.685 ms / 100) 5.630 -> 5.630 ( +0.00%) [ +0.07% +0.00% +0.04% / +0.07% +0.09% +0.00%] index_copy_ perm : Elapsed 0.056 ms (5.634 ms / 100) 5.664 -> 5.668 ( +0.07%) [ +0.02% +0.00% +0.04% / +0.07% +0.23% +0.14%] index_add_ perm_sorted : Elapsed 0.057 ms (5.665 ms / 100) 5.609 -> 5.613 ( +0.07%) [ +0.00% +0.05% +0.20% / +0.07% +0.21% +0.11%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.609 ms / 100) 5.846 -> 5.859 ( +0.22%) [ +0.10% +0.00% +0.33% / +0.22% +0.53% +0.63%] index_select const : Elapsed 0.059 ms (5.852 ms / 100) 5.944 -> 5.958 ( +0.24%) [ +0.05% +0.00% +0.24% / +0.24% +0.44% +0.40%] index_select wrap : Elapsed 0.059 ms (5.947 ms / 100) 5.937 -> 5.945 ( +0.13%) [ +0.02% +0.02% +0.00% / +0.13% +0.35% +0.30%] index_select linear : Elapsed 0.059 ms (5.938 ms / 100) 5.956 -> 5.960 ( +0.07%) [ +0.00% +0.07% +0.08% / +0.07% +0.40% +0.30%] index_select reverse : Elapsed 0.060 ms (5.956 ms / 100) 5.880 -> 5.893 ( +0.22%) [ +0.14% +0.00% +0.17% / +0.22% +0.36% +0.41%] index_select skip64 : Elapsed 0.059 ms (5.888 ms / 100) 5.848 -> 5.862 ( +0.24%) [ +0.09% +0.00% +0.27% / +0.24% +0.53% +0.75%] index_select skip256 : Elapsed 0.059 ms (5.853 ms / 100) 5.956 -> 5.972 ( +0.27%) [ +0.00% +0.15% +0.22% / +0.27% +0.39% +0.45%] index_select spread : Elapsed 0.060 ms (5.956 ms / 100) 5.956 -> 5.968 ( +0.20%) [ +0.05% +0.12% +0.00% / +0.20% +0.37% +0.39%] index_select strided 3 : Elapsed 0.060 ms (5.959 ms / 100) 5.924 -> 5.941 ( +0.29%) [ +0.00% +0.12% +0.25% / +0.29% +0.57% +0.57%] index_select random : Elapsed 0.059 ms (5.924 ms / 100) 5.932 -> 5.945 ( +0.22%) [ +0.00% +0.05% +0.07% / +0.22% +0.37% +0.34%] index_select random_sorted : Elapsed 0.059 ms (5.932 ms / 100) B = [16, 5, 40, 20] (stride (200, 1, 5, 3200)) A = [16, 4, 40, 20] (stride (160, 40, 1, 2560)) dim = 1 6.135 -> 6.120 ( -0.24%) [ +0.00% +0.02% +0.03% / +0.07% -0.21% -0.24%] index_add_ linear : Elapsed 0.061 ms (6.135 ms / 100) 6.116 -> 6.098 ( -0.29%) [ +0.00% +0.15% +0.13% / +0.11% -0.20% -0.29%] index_copy_ linear : Elapsed 0.061 ms (6.116 ms / 100) 6.135 -> 6.127 ( -0.13%) [ +0.00% +0.10% +0.02% / +0.08% -0.08% -0.13%] index_add_ reverse : Elapsed 0.061 ms (6.135 ms / 100) 6.116 -> 6.104 ( -0.20%) [ +0.13% +0.10% +0.00% / +0.08% -0.20% -0.13%] index_copy_ reverse : Elapsed 0.061 ms (6.124 ms / 100) 6.128 -> 6.127 ( -0.02%) [ +0.00% +0.15% +0.26% / +0.11% +0.07% -0.02%] index_add_ spread : Elapsed 0.061 ms (6.128 ms / 100) 6.116 -> 6.100 ( -0.26%) [ +0.10% +0.00% +0.15% / +0.25% -0.26% -0.25%] index_copy_ spread : Elapsed 0.061 ms (6.122 ms / 100) 6.136 -> 6.118 ( -0.29%) [ +0.02% +0.08% +0.00% / +0.05% -0.26% -0.29%] index_add_ strided 3 : Elapsed 0.061 ms (6.137 ms / 100) 6.116 -> 6.101 ( -0.25%) [ +0.00% +0.08% +0.08% / +0.13% -0.25% -0.18%] index_copy_ strided 3 : Elapsed 0.061 ms (6.116 ms / 100) 6.139 -> 6.130 ( -0.15%) [ +0.00% +0.02% +0.03% / +0.10% -0.15% -0.13%] index_add_ perm : Elapsed 0.061 ms (6.139 ms / 100) 6.121 -> 6.100 ( -0.34%) [ +0.00% +0.10% +0.05% / -0.02% -0.26% -0.34%] index_copy_ perm : Elapsed 0.061 ms (6.121 ms / 100) 6.134 -> 6.128 ( -0.10%) [ +0.00% +0.08% +0.05% / +0.20% -0.05% -0.10%] index_add_ perm_sorted : Elapsed 0.061 ms (6.134 ms / 100) 6.118 -> 6.096 ( -0.36%) [ +0.03% +0.00% +0.11% / +0.15% -0.33% -0.36%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.120 ms / 100) 6.325 -> 6.306 ( -0.30%) [ +0.00% +0.08% +0.11% / +0.14% -0.21% -0.30%] index_select const : Elapsed 0.063 ms (6.325 ms / 100) 6.439 -> 6.418 ( -0.33%) [ +0.00% +0.02% +0.06% / +0.02% -0.30% -0.33%] index_select wrap : Elapsed 0.064 ms (6.439 ms / 100) 6.431 -> 6.406 ( -0.39%) [ +0.00% +0.00% +0.05% / +0.02% -0.30% -0.39%] index_select linear : Elapsed 0.064 ms (6.431 ms / 100) 6.413 -> 6.391 ( -0.34%) [ +0.00% +0.03% +0.19% / +0.19% -0.33% -0.34%] index_select reverse : Elapsed 0.064 ms (6.413 ms / 100) 6.330 -> 6.308 ( -0.35%) [ +0.02% +0.00% +0.08% / -0.05% -0.24% -0.35%] index_select skip64 : Elapsed 0.063 ms (6.331 ms / 100) 6.323 -> 6.308 ( -0.24%) [ +0.00% +0.14% +0.03% / +0.19% -0.24% -0.16%] index_select skip256 : Elapsed 0.063 ms (6.323 ms / 100) 6.417 -> 6.394 ( -0.36%) [ +0.16% +0.00% +0.08% / +0.08% -0.34% -0.36%] index_select spread : Elapsed 0.064 ms (6.427 ms / 100) 6.439 -> 6.411 ( -0.43%) [ +0.02% +0.00% +0.09% / +0.09% -0.40% -0.43%] index_select strided 3 : Elapsed 0.064 ms (6.440 ms / 100) 6.405 -> 6.378 ( -0.42%) [ +0.05% +0.00% +0.03% / +0.09% -0.22% -0.42%] index_select random : Elapsed 0.064 ms (6.408 ms / 100) 6.403 -> 6.374 ( -0.45%) [ +0.06% +0.00% +0.00% / +0.11% -0.45% -0.39%] index_select random_sorted : Elapsed 0.064 ms (6.407 ms / 100) out_shape = [16, 4, 5, 20] in_shape = [16, 4, 40, 20] idx_dim = 2 B = [16, 4, 5, 20] (stride (400, 5, 1, 20)) A = [16, 4, 40, 20] (stride (20, 12800, 320, 1)) dim = 2 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.47% +0.41%] index_select const : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.47% +0.41%] index_select wrap : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.61% +0.61%] index_select linear : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.47% +0.47%] index_select reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.54% +0.54%] index_select skip64 : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.54% +0.54%] index_select skip256 : Elapsed 0.015 ms (1.475 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.47% +0.47%] index_select spread : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.54% +0.54%] index_select strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.61% +0.47%] index_select strided 5 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.47% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.61% +0.54%] index_select strided 8 : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +0.61% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.61% +0.61%] index_select random : Elapsed 0.015 ms (1.475 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.61% +0.68%] index_select perm : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.475 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.54% +0.54%] index_select perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) B = [16, 4, 5, 20] (stride (400, 1, 4, 20)) A = [16, 4, 40, 20] (stride (1, 640, 16, 2560)) dim = 2 1.666 -> 1.667 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.60% +0.72%] index_select const : Elapsed 0.017 ms (1.667 ms / 100) 1.638 -> 1.640 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.55% +0.49%] index_select wrap : Elapsed 0.016 ms (1.640 ms / 100) 1.637 -> 1.638 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.73% +0.61%] index_select linear : Elapsed 0.016 ms (1.637 ms / 100) 1.614 -> 1.618 ( +0.25%) [ +0.19% +0.00% +0.12% / +0.25% +0.93% +0.87%] index_select reverse : Elapsed 0.016 ms (1.617 ms / 100) 1.666 -> 1.666 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.72% +0.72%] index_select skip64 : Elapsed 0.017 ms (1.666 ms / 100) 1.666 -> 1.666 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.72% +0.66%] index_select skip256 : Elapsed 0.017 ms (1.667 ms / 100) 1.612 -> 1.613 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.68% +0.68%] index_select spread : Elapsed 0.016 ms (1.613 ms / 100) 1.606 -> 1.607 ( +0.06%) [ +0.00% +0.12% +0.06% / +0.06% +0.81% +0.75%] index_select strided 3 : Elapsed 0.016 ms (1.606 ms / 100) 1.628 -> 1.629 ( +0.06%) [ +0.06% +0.25% +0.00% / +0.06% +0.98% +0.74%] index_select strided 5 : Elapsed 0.016 ms (1.629 ms / 100) 1.617 -> 1.620 ( +0.19%) [ +0.06% +0.00% +0.06% / +0.19% +0.74% +0.68%] index_select strided 7 : Elapsed 0.016 ms (1.618 ms / 100) 1.608 -> 1.609 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.81% +0.81%] index_select strided 8 : Elapsed 0.016 ms (1.609 ms / 100) 1.594 -> 1.595 ( +0.06%) [ +0.19% +0.25% +0.00% / +0.06% +0.69% +0.88%] index_select strided 16 : Elapsed 0.016 ms (1.597 ms / 100) 1.616 -> 1.617 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.62% +0.62%] index_select random : Elapsed 0.016 ms (1.616 ms / 100) 1.610 -> 1.612 ( +0.12%) [ +0.19% +0.00% +0.06% / +0.12% +0.81% +0.75%] index_select random_sorted : Elapsed 0.016 ms (1.613 ms / 100) 1.614 -> 1.617 ( +0.19%) [ +0.19% +0.19% +0.00% / +0.19% +0.62% +0.74%] index_select perm : Elapsed 0.016 ms (1.617 ms / 100) 1.617 -> 1.617 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.56% +0.68%] index_select perm_sorted : Elapsed 0.016 ms (1.618 ms / 100) B = [16, 4, 5, 20] (stride (20, 1600, 320, 1)) A = [16, 4, 40, 20] (stride (3200, 1, 80, 4)) dim = 2 1.481 -> 1.481 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.27% +0.20%] index_select const : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.34% +0.07% +0.00% / +0.07% +0.41% +0.34%] index_select wrap : Elapsed 0.015 ms (1.485 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.34% +0.34%] index_select linear : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.479 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.47% +0.47%] index_select reverse : Elapsed 0.015 ms (1.481 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.41% +0.34%] index_select skip64 : Elapsed 0.015 ms (1.481 ms / 100) 1.479 -> 1.480 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.41% +0.47%] index_select skip256 : Elapsed 0.015 ms (1.481 ms / 100) 1.480 -> 1.480 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select spread : Elapsed 0.015 ms (1.480 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.47% +0.54%] index_select strided 3 : Elapsed 0.015 ms (1.480 ms / 100) 1.479 -> 1.481 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.54% +0.54%] index_select strided 5 : Elapsed 0.015 ms (1.480 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.54% +0.54%] index_select strided 7 : Elapsed 0.015 ms (1.480 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.54% +0.34%] index_select strided 8 : Elapsed 0.015 ms (1.481 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.68% +0.68%] index_select random : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.477 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.61% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.478 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.68% +0.68%] index_select perm : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.478 ms / 100) B = [16, 4, 5, 20] (stride (80, 1, 1280, 4)) A = [16, 4, 40, 20] (stride (3200, 40, 1, 160)) dim = 2 1.608 -> 1.610 ( +0.12%) [ +0.19% +0.12% +0.00% / +0.12% +0.62% +0.68%] index_select const : Elapsed 0.016 ms (1.611 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.12% +0.06% +0.00% / +0.00% +0.50% +0.56%] index_select wrap : Elapsed 0.016 ms (1.613 ms / 100) 1.610 -> 1.610 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.62% +0.68%] index_select linear : Elapsed 0.016 ms (1.612 ms / 100) 1.609 -> 1.611 ( +0.12%) [ +0.00% +0.25% +0.06% / +0.12% +0.81% +0.68%] index_select reverse : Elapsed 0.016 ms (1.609 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.19% +0.00% +0.00% / +0.00% +0.56% +0.56%] index_select skip64 : Elapsed 0.016 ms (1.614 ms / 100) 1.608 -> 1.609 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.62% +0.68%] index_select skip256 : Elapsed 0.016 ms (1.610 ms / 100) 1.599 -> 1.599 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.81% +0.75%] index_select spread : Elapsed 0.016 ms (1.599 ms / 100) 1.604 -> 1.609 ( +0.31%) [ +0.31% +0.19% +0.00% / +0.31% +0.87% +0.87%] index_select strided 3 : Elapsed 0.016 ms (1.609 ms / 100) 1.606 -> 1.609 ( +0.19%) [ +0.06% +0.00% +0.06% / +0.19% +0.75% +0.68%] index_select strided 5 : Elapsed 0.016 ms (1.607 ms / 100) 1.599 -> 1.601 ( +0.13%) [ +0.19% +0.19% +0.00% / +0.13% +0.75% +0.69%] index_select strided 7 : Elapsed 0.016 ms (1.602 ms / 100) 1.605 -> 1.606 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.75% +0.75%] index_select strided 8 : Elapsed 0.016 ms (1.607 ms / 100) 1.602 -> 1.604 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.69% +0.62%] index_select strided 16 : Elapsed 0.016 ms (1.603 ms / 100) 1.607 -> 1.607 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.68% +0.93%] index_select random : Elapsed 0.016 ms (1.608 ms / 100) 1.599 -> 1.600 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.56% +0.69%] index_select random_sorted : Elapsed 0.016 ms (1.599 ms / 100) 1.604 -> 1.607 ( +0.19%) [ +0.19% +0.19% +0.00% / +0.19% +0.81% +0.87%] index_select perm : Elapsed 0.016 ms (1.607 ms / 100) 1.607 -> 1.608 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.56% +0.75%] index_select perm_sorted : Elapsed 0.016 ms (1.609 ms / 100) out_shape = [16, 4, 40, 5] in_shape = [16, 4, 40, 20] idx_dim = 3 B = [16, 4, 40, 5] (stride (800, 200, 1, 40)) A = [16, 4, 40, 20] (stride (80, 1, 1280, 4)) dim = 3 1.797 -> 1.800 ( +0.17%) [ +0.22% +0.17% +0.00% / +0.17% +0.67% +0.83%] index_select const : Elapsed 0.018 ms (1.801 ms / 100) 1.805 -> 1.806 ( +0.06%) [ +0.28% +0.00% +0.06% / +0.06% +0.44% +0.39%] index_select wrap : Elapsed 0.018 ms (1.810 ms / 100) 1.803 -> 1.805 ( +0.11%) [ +0.06% +0.00% +0.00% / +0.11% +0.50% +0.39%] index_select linear : Elapsed 0.018 ms (1.804 ms / 100) 1.813 -> 1.812 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.39% +0.28%] index_select reverse : Elapsed 0.018 ms (1.814 ms / 100) 1.798 -> 1.799 ( +0.06%) [ +0.22% +0.22% +0.00% / +0.06% +0.67% +0.78%] index_select skip64 : Elapsed 0.018 ms (1.802 ms / 100) 1.797 -> 1.799 ( +0.11%) [ +0.00% +0.17% +0.00% / +0.11% +0.45% +0.78%] index_select skip256 : Elapsed 0.018 ms (1.797 ms / 100) 1.815 -> 1.815 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.50% +0.44%] index_select spread : Elapsed 0.018 ms (1.817 ms / 100) 1.814 -> 1.819 ( +0.28%) [ +0.00% +0.22% +0.17% / +0.28% +0.55% +0.39%] index_select strided 3 : Elapsed 0.018 ms (1.814 ms / 100) 1.816 -> 1.814 ( -0.11%) [ +0.00% +0.11% +0.11% / -0.11% +0.61% +0.44%] index_select strided 5 : Elapsed 0.018 ms (1.816 ms / 100) 1.806 -> 1.811 ( +0.28%) [ +0.22% +0.00% +0.28% / +0.28% +0.83% +0.72%] index_select strided 7 : Elapsed 0.018 ms (1.810 ms / 100) 1.815 -> 1.817 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +0.61% +0.61%] index_select strided 8 : Elapsed 0.018 ms (1.817 ms / 100) 1.814 -> 1.815 ( +0.06%) [ +0.22% +0.17% +0.00% / +0.06% +0.39% +0.33%] index_select strided 16 : Elapsed 0.018 ms (1.818 ms / 100) 1.813 -> 1.812 ( -0.06%) [ +0.33% +0.00% +0.06% / -0.06% +0.50% +0.39%] index_select random : Elapsed 0.018 ms (1.819 ms / 100) 1.811 -> 1.814 ( +0.17%) [ +0.22% +0.17% +0.00% / +0.17% +0.50% +0.55%] index_select random_sorted : Elapsed 0.018 ms (1.815 ms / 100) 1.809 -> 1.813 ( +0.22%) [ +0.28% +0.00% +0.11% / +0.22% +0.88% +0.83%] index_select perm : Elapsed 0.018 ms (1.814 ms / 100) 1.818 -> 1.820 ( +0.11%) [ +0.00% +0.11% +0.17% / +0.11% +0.66% +0.72%] index_select perm_sorted : Elapsed 0.018 ms (1.818 ms / 100) B = [16, 4, 40, 5] (stride (800, 40, 1, 160)) A = [16, 4, 40, 20] (stride (3200, 20, 80, 1)) dim = 3 1.797 -> 1.796 ( -0.06%) [ +0.17% +0.22% +0.00% / -0.06% -0.06% +0.00%] index_select const : Elapsed 0.018 ms (1.800 ms / 100) 1.800 -> 1.804 ( +0.22%) [ +0.00% +0.22% +0.06% / +0.22% +0.50% +0.67%] index_select wrap : Elapsed 0.018 ms (1.800 ms / 100) 1.802 -> 1.806 ( +0.22%) [ +0.00% +0.06% +0.11% / +0.22% +0.67% +0.78%] index_select linear : Elapsed 0.018 ms (1.802 ms / 100) 1.801 -> 1.803 ( +0.11%) [ +0.44% +0.28% +0.00% / +0.11% +0.67% +0.50%] index_select reverse : Elapsed 0.018 ms (1.809 ms / 100) 1.798 -> 1.800 ( +0.11%) [ +0.00% +0.28% +0.00% / +0.11% +0.56% +0.39%] index_select skip64 : Elapsed 0.018 ms (1.798 ms / 100) 1.797 -> 1.798 ( +0.06%) [ +0.00% +0.22% +0.11% / +0.06% +0.56% +0.28%] index_select skip256 : Elapsed 0.018 ms (1.797 ms / 100) 1.821 -> 1.820 ( -0.05%) [ +0.00% +0.00% +0.16% / -0.05% +0.66% +0.44%] index_select spread : Elapsed 0.018 ms (1.821 ms / 100) 1.818 -> 1.818 ( +0.00%) [ +0.28% +0.00% +0.11% / +0.00% +0.77% +0.72%] index_select strided 3 : Elapsed 0.018 ms (1.823 ms / 100) 1.821 -> 1.819 ( -0.11%) [ +0.00% +0.05% +0.05% / -0.11% +0.55% +0.38%] index_select strided 5 : Elapsed 0.018 ms (1.821 ms / 100) 1.813 -> 1.815 ( +0.11%) [ +0.28% +0.17% +0.00% / +0.11% +0.44% +0.28%] index_select strided 7 : Elapsed 0.018 ms (1.818 ms / 100) 1.813 -> 1.815 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +0.33% +0.39%] index_select strided 8 : Elapsed 0.018 ms (1.815 ms / 100) 1.821 -> 1.825 ( +0.22%) [ +0.05% +0.16% +0.00% / +0.22% +0.27% +0.38%] index_select strided 16 : Elapsed 0.018 ms (1.822 ms / 100) 1.818 -> 1.821 ( +0.17%) [ +0.06% +0.22% +0.00% / +0.17% +0.17% +0.28%] index_select random : Elapsed 0.018 ms (1.819 ms / 100) 1.811 -> 1.816 ( +0.28%) [ +0.06% +0.33% +0.00% / +0.28% +0.50% +0.33%] index_select random_sorted : Elapsed 0.018 ms (1.812 ms / 100) 1.811 -> 1.812 ( +0.06%) [ +0.28% +0.11% +0.00% / +0.06% +0.50% +0.50%] index_select perm : Elapsed 0.018 ms (1.816 ms / 100) 1.814 -> 1.818 ( +0.22%) [ +0.39% +0.22% +0.00% / +0.22% +0.66% +0.66%] index_select perm_sorted : Elapsed 0.018 ms (1.821 ms / 100) B = [16, 4, 40, 5] (stride (800, 1, 4, 160)) A = [16, 4, 40, 20] (stride (800, 12800, 20, 1)) dim = 3 1.861 -> 1.865 ( +0.21%) [ +0.11% +0.00% +0.16% / +0.21% +0.59% +0.48%] index_select const : Elapsed 0.019 ms (1.863 ms / 100) 1.869 -> 1.874 ( +0.27%) [ +0.11% +0.16% +0.00% / +0.27% +0.64% +0.59%] index_select wrap : Elapsed 0.019 ms (1.871 ms / 100) 1.872 -> 1.871 ( -0.05%) [ +0.05% +0.21% +0.00% / -0.05% +0.48% +0.37%] index_select linear : Elapsed 0.019 ms (1.873 ms / 100) 1.873 -> 1.875 ( +0.11%) [ +0.16% +0.00% +0.00% / +0.11% +0.69% +0.53%] index_select reverse : Elapsed 0.019 ms (1.876 ms / 100) 1.862 -> 1.862 ( +0.00%) [ +0.05% +0.16% +0.00% / +0.00% +0.43% +0.43%] index_select skip64 : Elapsed 0.019 ms (1.863 ms / 100) 1.866 -> 1.862 ( -0.21%) [ +0.00% +0.05% +0.00% / -0.21% +0.32% +0.48%] index_select skip256 : Elapsed 0.019 ms (1.866 ms / 100) 1.896 -> 1.899 ( +0.16%) [ +0.00% +0.05% +0.00% / +0.16% +0.42% +0.53%] index_select spread : Elapsed 0.019 ms (1.896 ms / 100) 1.889 -> 1.892 ( +0.16%) [ +0.26% +0.26% +0.00% / +0.16% +0.69% +0.74%] index_select strided 3 : Elapsed 0.019 ms (1.894 ms / 100) 1.894 -> 1.895 ( +0.05%) [ +0.21% +0.05% +0.00% / +0.05% +0.42% +0.42%] index_select strided 5 : Elapsed 0.019 ms (1.898 ms / 100) 1.882 -> 1.887 ( +0.27%) [ +0.21% +0.32% +0.00% / +0.27% +0.69% +0.53%] index_select strided 7 : Elapsed 0.019 ms (1.886 ms / 100) 1.884 -> 1.885 ( +0.05%) [ +0.21% +0.00% +0.11% / +0.05% +0.64% +0.53%] index_select strided 8 : Elapsed 0.019 ms (1.888 ms / 100) 1.895 -> 1.896 ( +0.05%) [ +0.11% +0.00% +0.05% / +0.05% +0.58% +0.58%] index_select strided 16 : Elapsed 0.019 ms (1.897 ms / 100) 1.888 -> 1.889 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.11% +0.21%] index_select random : Elapsed 0.019 ms (1.888 ms / 100) 1.880 -> 1.884 ( +0.21%) [ +0.16% +0.21% +0.00% / +0.21% +0.69% +0.69%] index_select random_sorted : Elapsed 0.019 ms (1.883 ms / 100) 1.887 -> 1.887 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.37% +0.58%] index_select perm : Elapsed 0.019 ms (1.888 ms / 100) 1.888 -> 1.888 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.21% +0.32%] index_select perm_sorted : Elapsed 0.019 ms (1.888 ms / 100) B = [16, 4, 40, 5] (stride (200, 3200, 5, 1)) A = [16, 4, 40, 20] (stride (1, 12800, 320, 16)) dim = 3 1.797 -> 1.793 ( -0.22%) [ +0.06% +0.06% +0.00% / +0.06% -0.17% -0.22%] index_select const : Elapsed 0.018 ms (1.798 ms / 100) 1.783 -> 1.784 ( +0.06%) [ +0.45% +0.11% +0.00% / +0.06% +0.39% +0.50%] index_select wrap : Elapsed 0.018 ms (1.791 ms / 100) 1.787 -> 1.790 ( +0.17%) [ +0.22% +0.11% +0.00% / +0.22% +0.17% +0.28%] index_select linear : Elapsed 0.018 ms (1.791 ms / 100) 1.786 -> 1.790 ( +0.22%) [ +0.28% +0.22% +0.00% / +0.22% +0.67% +0.56%] index_select reverse : Elapsed 0.018 ms (1.791 ms / 100) 1.784 -> 1.789 ( +0.28%) [ +0.34% +0.17% +0.00% / +0.28% +0.39% +0.39%] index_select skip64 : Elapsed 0.018 ms (1.790 ms / 100) 1.785 -> 1.786 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.28% +0.28%] index_select skip256 : Elapsed 0.018 ms (1.785 ms / 100) 1.774 -> 1.774 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.45% +0.45%] index_select spread : Elapsed 0.018 ms (1.777 ms / 100) 1.777 -> 1.778 ( +0.06%) [ +0.28% +0.11% +0.00% / +0.06% +0.51% +0.56%] index_select strided 3 : Elapsed 0.018 ms (1.782 ms / 100) 1.777 -> 1.781 ( +0.23%) [ +0.06% +0.00% +0.06% / +0.23% +0.45% +0.73%] index_select strided 5 : Elapsed 0.018 ms (1.778 ms / 100) 1.789 -> 1.789 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +0.39% +0.34%] index_select strided 7 : Elapsed 0.018 ms (1.789 ms / 100) 1.771 -> 1.774 ( +0.17%) [ +0.28% +0.40% +0.00% / +0.17% +0.45% +0.62%] index_select strided 8 : Elapsed 0.018 ms (1.776 ms / 100) 1.773 -> 1.769 ( -0.23%) [ +0.06% +0.00% +0.00% / -0.23% +0.23% +0.17%] index_select strided 16 : Elapsed 0.018 ms (1.774 ms / 100) 1.774 -> 1.776 ( +0.11%) [ +0.00% +0.17% +0.23% / +0.11% +0.73% +0.79%] index_select random : Elapsed 0.018 ms (1.774 ms / 100) 1.779 -> 1.778 ( -0.06%) [ +0.00% +0.06% +0.17% / -0.06% +0.34% +0.11%] index_select random_sorted : Elapsed 0.018 ms (1.779 ms / 100) 1.785 -> 1.787 ( +0.11%) [ +0.11% +0.00% +0.06% / +0.11% +0.39% +0.34%] index_select perm : Elapsed 0.018 ms (1.787 ms / 100) 1.782 -> 1.785 ( +0.17%) [ +0.11% +0.06% +0.00% / +0.17% +0.62% +0.56%] index_select perm_sorted : Elapsed 0.018 ms (1.784 ms / 100) B = [16, 4, 40, 5] (stride (5, 80, 320, 1)) A = [16, 4, 40, 20] (stride (3200, 1, 4, 160)) dim = 3 1.784 -> 1.787 ( +0.17%) [ +0.28% +0.17% +0.00% / +0.17% +0.39% +0.45%] index_select const : Elapsed 0.018 ms (1.789 ms / 100) 1.789 -> 1.793 ( +0.22%) [ +0.11% +0.22% +0.00% / +0.22% +0.95% +1.12%] index_select wrap : Elapsed 0.018 ms (1.791 ms / 100) 1.790 -> 1.796 ( +0.34%) [ +0.11% +0.00% +0.06% / +0.34% +0.89% +1.12%] index_select linear : Elapsed 0.018 ms (1.792 ms / 100) 1.797 -> 1.800 ( +0.17%) [ +0.11% +0.06% +0.00% / +0.17% +0.56% +0.50%] index_select reverse : Elapsed 0.018 ms (1.799 ms / 100) 1.784 -> 1.790 ( +0.34%) [ +0.34% +0.28% +0.00% / +0.34% +0.39% +0.62%] index_select skip64 : Elapsed 0.018 ms (1.790 ms / 100) 1.787 -> 1.790 ( +0.17%) [ +0.11% +0.06% +0.00% / +0.28% +0.17% +0.28%] index_select skip256 : Elapsed 0.018 ms (1.789 ms / 100) 1.788 -> 1.788 ( +0.00%) [ +0.34% +0.06% +0.00% / +0.00% +0.62% +0.67%] index_select spread : Elapsed 0.018 ms (1.794 ms / 100) 1.791 -> 1.795 ( +0.22%) [ +0.06% +0.22% +0.00% / +0.22% +0.34% +0.34%] index_select strided 3 : Elapsed 0.018 ms (1.792 ms / 100) 1.790 -> 1.791 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.34% +0.45%] index_select strided 5 : Elapsed 0.018 ms (1.790 ms / 100) 1.789 -> 1.791 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.67% +0.61%] index_select strided 7 : Elapsed 0.018 ms (1.789 ms / 100) 1.789 -> 1.785 ( -0.22%) [ +0.22% +0.11% +0.00% / -0.22% +0.78% +0.45%] index_select strided 8 : Elapsed 0.018 ms (1.793 ms / 100) 1.789 -> 1.793 ( +0.22%) [ +0.22% +0.00% +0.17% / +0.22% +0.50% +0.50%] index_select strided 16 : Elapsed 0.018 ms (1.793 ms / 100) 1.787 -> 1.790 ( +0.17%) [ +0.11% +0.22% +0.00% / +0.17% +0.50% +0.45%] index_select random : Elapsed 0.018 ms (1.789 ms / 100) 1.786 -> 1.786 ( +0.00%) [ +0.17% +0.00% +0.11% / +0.00% +0.62% +0.73%] index_select random_sorted : Elapsed 0.018 ms (1.789 ms / 100) 1.785 -> 1.793 ( +0.45%) [ +0.50% +0.00% +0.22% / +0.62% +0.45% +0.50%] index_select perm : Elapsed 0.018 ms (1.794 ms / 100) 1.797 -> 1.796 ( -0.06%) [ +0.17% +0.06% +0.00% / +0.17% +0.06% -0.06%] index_select perm_sorted : Elapsed 0.018 ms (1.800 ms / 100) B = [16, 4, 40, 5] (stride (160, 1, 4, 2560)) A = [16, 4, 40, 20] (stride (80, 1, 1280, 4)) dim = 3 1.946 -> 1.944 ( -0.10%) [ +0.00% +0.05% +0.05% / +0.15% -0.10% -0.10%] index_select const : Elapsed 0.019 ms (1.946 ms / 100) 1.935 -> 1.935 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +1.19% +1.09%] index_select wrap : Elapsed 0.019 ms (1.936 ms / 100) 1.937 -> 1.937 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.52% +0.36%] index_select linear : Elapsed 0.019 ms (1.937 ms / 100) 1.934 -> 1.939 ( +0.26%) [ +0.00% +0.36% +0.16% / +0.26% +0.72% +0.36%] index_select reverse : Elapsed 0.019 ms (1.934 ms / 100) 1.940 -> 1.942 ( +0.10%) [ +0.00% +0.26% +0.26% / +0.10% +0.52% +0.57%] index_select skip64 : Elapsed 0.019 ms (1.940 ms / 100) 1.936 -> 1.936 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +1.08% +0.88%] index_select skip256 : Elapsed 0.019 ms (1.936 ms / 100) 1.924 -> 1.925 ( +0.05%) [ +0.31% +0.21% +0.00% / +0.05% +0.47% +0.31%] index_select spread : Elapsed 0.019 ms (1.930 ms / 100) 1.925 -> 1.925 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.99% +0.94%] index_select strided 3 : Elapsed 0.019 ms (1.925 ms / 100) 1.920 -> 1.920 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.31% +0.10%] index_select strided 5 : Elapsed 0.019 ms (1.923 ms / 100) 1.920 -> 1.923 ( +0.16%) [ +0.21% +0.16% +0.00% / +0.16% +0.68% +0.78%] index_select strided 7 : Elapsed 0.019 ms (1.924 ms / 100) 1.927 -> 1.926 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.26% +0.26%] index_select strided 8 : Elapsed 0.019 ms (1.928 ms / 100) 1.925 -> 1.928 ( +0.16%) [ +0.00% +0.26% +0.00% / +0.16% +0.78% +0.73%] index_select strided 16 : Elapsed 0.019 ms (1.925 ms / 100) 1.922 -> 1.918 ( -0.21%) [ +0.10% +0.21% +0.00% / -0.21% +0.42% +0.57%] index_select random : Elapsed 0.019 ms (1.924 ms / 100) 1.911 -> 1.915 ( +0.21%) [ +0.26% +0.26% +0.00% / +0.21% +0.73% +0.58%] index_select random_sorted : Elapsed 0.019 ms (1.916 ms / 100) 1.925 -> 1.923 ( -0.10%) [ +0.00% +0.05% +0.21% / -0.10% +0.47% +0.36%] index_select perm : Elapsed 0.019 ms (1.925 ms / 100) 1.925 -> 1.926 ( +0.05%) [ +0.00% +0.16% +0.05% / +0.05% +0.52% +0.52%] index_select perm_sorted : Elapsed 0.019 ms (1.925 ms / 100) out_shape = [5, 20, 4, 40] in_shape = [16, 20, 4, 40] idx_dim = 0 B = [5, 20, 4, 40] (stride (160, 800, 40, 1)) A = [16, 20, 4, 40] (stride (3200, 160, 1, 4)) dim = 0 2.125 -> 2.128 ( +0.14%) [ +0.00% +0.05% +0.33% / +0.14% +0.61% +0.80%] index_select const : Elapsed 0.021 ms (2.125 ms / 100) 2.143 -> 2.146 ( +0.14%) [ +0.23% +0.00% +0.23% / +0.14% +0.33% +0.42%] index_select wrap : Elapsed 0.021 ms (2.148 ms / 100) 2.143 -> 2.147 ( +0.19%) [ +0.19% +0.00% +0.19% / +0.19% +0.42% +0.47%] index_select linear : Elapsed 0.021 ms (2.147 ms / 100) 2.146 -> 2.147 ( +0.05%) [ +0.14% +0.19% +0.00% / +0.05% +0.42% +0.33%] index_select reverse : Elapsed 0.021 ms (2.149 ms / 100) 2.121 -> 2.123 ( +0.09%) [ +0.19% +0.00% +0.09% / +0.09% +0.47% +0.47%] index_select skip64 : Elapsed 0.021 ms (2.125 ms / 100) 2.128 -> 2.131 ( +0.14%) [ +0.09% +0.00% +0.23% / +0.14% +0.66% +0.52%] index_select skip256 : Elapsed 0.021 ms (2.130 ms / 100) 2.139 -> 2.141 ( +0.09%) [ +0.14% +0.00% +0.14% / +0.09% +0.51% +0.61%] index_select spread : Elapsed 0.021 ms (2.142 ms / 100) 2.137 -> 2.138 ( +0.05%) [ +0.00% +0.09% +0.00% / +0.05% +0.61% +0.61%] index_select strided 3 : Elapsed 0.021 ms (2.137 ms / 100) 2.134 -> 2.134 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.00% +0.80% +0.37%] index_select strided 5 : Elapsed 0.021 ms (2.136 ms / 100) 2.145 -> 2.144 ( -0.05%) [ +0.00% +0.00% +0.14% / -0.05% +0.51% +0.47%] index_select strided 7 : Elapsed 0.021 ms (2.145 ms / 100) 2.127 -> 2.124 ( -0.14%) [ +0.05% +0.00% +0.14% / -0.14% +0.71% +0.42%] index_select strided 8 : Elapsed 0.021 ms (2.128 ms / 100) 2.142 -> 2.137 ( -0.23%) [ +0.00% +0.05% +0.05% / -0.23% +0.09% +0.19%] index_select random : Elapsed 0.021 ms (2.142 ms / 100) 2.134 -> 2.134 ( +0.00%) [ +0.00% +0.14% +0.33% / +0.00% +0.66% +0.56%] index_select random_sorted : Elapsed 0.021 ms (2.134 ms / 100) 2.134 -> 2.138 ( +0.19%) [ +0.05% +0.14% +0.00% / +0.19% +0.52% +0.52%] index_select perm : Elapsed 0.021 ms (2.135 ms / 100) 2.140 -> 2.138 ( -0.09%) [ +0.00% +0.00% +0.19% / -0.09% +0.14% +0.19%] index_select perm_sorted : Elapsed 0.021 ms (2.140 ms / 100) B = [5, 20, 4, 40] (stride (1, 800, 200, 5)) A = [16, 20, 4, 40] (stride (20, 1, 12800, 320)) dim = 0 2.034 -> 2.038 ( +0.20%) [ +0.10% +0.15% +0.00% / +0.20% +0.64% +0.54%] index_select const : Elapsed 0.020 ms (2.036 ms / 100) 2.035 -> 2.036 ( +0.05%) [ +0.00% +0.00% +0.10% / +0.05% +0.05% +0.39%] index_select wrap : Elapsed 0.020 ms (2.035 ms / 100) 2.031 -> 2.036 ( +0.25%) [ +0.34% +0.05% +0.00% / +0.25% +0.44% +0.54%] index_select linear : Elapsed 0.020 ms (2.038 ms / 100) 2.042 -> 2.046 ( +0.20%) [ +0.05% +0.00% +0.24% / +0.20% +0.34% +0.44%] index_select reverse : Elapsed 0.020 ms (2.043 ms / 100) 2.037 -> 2.041 ( +0.20%) [ +0.00% +0.05% +0.25% / +0.25% +0.20% +0.44%] index_select skip64 : Elapsed 0.020 ms (2.037 ms / 100) 2.035 -> 2.040 ( +0.25%) [ +0.20% +0.05% +0.00% / +0.25% +0.29% +0.49%] index_select skip256 : Elapsed 0.020 ms (2.039 ms / 100) 2.040 -> 2.041 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.10% +0.34%] index_select spread : Elapsed 0.020 ms (2.040 ms / 100) 2.038 -> 2.043 ( +0.25%) [ +0.25% +0.34% +0.00% / +0.29% +0.25% +0.29%] index_select strided 3 : Elapsed 0.020 ms (2.043 ms / 100) 2.038 -> 2.043 ( +0.25%) [ +0.34% +0.00% +0.25% / +0.25% +0.25% +0.54%] index_select strided 5 : Elapsed 0.020 ms (2.045 ms / 100) 2.042 -> 2.041 ( -0.05%) [ +0.15% +0.00% +0.05% / -0.05% +0.15% +0.20%] index_select strided 7 : Elapsed 0.020 ms (2.045 ms / 100) 2.032 -> 2.041 ( +0.44%) [ +0.00% +0.30% +0.64% / +0.44% +0.54% +0.44%] index_select strided 8 : Elapsed 0.020 ms (2.032 ms / 100) 2.039 -> 2.044 ( +0.25%) [ +0.15% +0.00% +0.15% / +1.13% +0.39% +0.25%] index_select random : Elapsed 0.020 ms (2.042 ms / 100) 2.037 -> 2.041 ( +0.20%) [ +0.29% +0.25% +0.00% / +0.20% +0.49% +0.49%] index_select random_sorted : Elapsed 0.020 ms (2.043 ms / 100) 2.038 -> 2.041 ( +0.15%) [ +0.15% +0.00% +0.10% / +0.15% +0.44% +0.69%] index_select perm : Elapsed 0.020 ms (2.041 ms / 100) 2.041 -> 2.040 ( -0.05%) [ +0.29% +0.00% +0.05% / -0.05% +0.34% +0.15%] index_select perm_sorted : Elapsed 0.020 ms (2.047 ms / 100) B = [5, 20, 4, 40] (stride (4, 800, 1, 20)) A = [16, 20, 4, 40] (stride (3200, 4, 1, 80)) dim = 0 2.343 -> 2.349 ( +0.26%) [ +0.30% +0.00% +0.38% / +0.26% +0.51% +0.38%] index_select const : Elapsed 0.024 ms (2.350 ms / 100) 2.341 -> 2.339 ( -0.09%) [ +0.00% +0.13% +0.04% / -0.09% +0.09% +0.09%] index_select wrap : Elapsed 0.023 ms (2.341 ms / 100) 2.342 -> 2.344 ( +0.09%) [ +0.00% +0.04% +0.09% / +0.09% +0.30% +0.30%] index_select linear : Elapsed 0.023 ms (2.342 ms / 100) 2.349 -> 2.353 ( +0.17%) [ +0.34% +0.13% +0.00% / +0.17% +0.21% +0.38%] index_select reverse : Elapsed 0.024 ms (2.357 ms / 100) 2.335 -> 2.340 ( +0.21%) [ +0.13% +0.00% +0.26% / +0.21% +0.56% +0.30%] index_select skip64 : Elapsed 0.023 ms (2.338 ms / 100) 2.343 -> 2.347 ( +0.17%) [ +0.26% +0.00% +0.17% / +0.17% +0.55% +0.34%] index_select skip256 : Elapsed 0.023 ms (2.349 ms / 100) 2.352 -> 2.354 ( +0.09%) [ +0.34% +0.00% +0.00% / +0.09% +0.17% +0.26%] index_select spread : Elapsed 0.024 ms (2.360 ms / 100) 2.349 -> 2.354 ( +0.21%) [ +0.00% +0.13% +0.68% / +0.21% +0.89% +0.26%] index_select strided 3 : Elapsed 0.023 ms (2.349 ms / 100) 2.333 -> 2.337 ( +0.17%) [ +0.30% +0.00% +0.09% / +0.17% +0.47% +0.47%] index_select strided 5 : Elapsed 0.023 ms (2.340 ms / 100) 2.347 -> 2.346 ( -0.04%) [ +0.04% +0.09% +0.00% / -0.04% +0.51% +0.21%] index_select strided 7 : Elapsed 0.023 ms (2.348 ms / 100) 2.330 -> 2.334 ( +0.17%) [ +0.34% +0.00% +0.00% / +0.17% +0.52% +0.82%] index_select strided 8 : Elapsed 0.023 ms (2.338 ms / 100) 2.344 -> 2.347 ( +0.13%) [ +0.04% +0.26% +0.00% / +0.13% +0.34% +0.17%] index_select random : Elapsed 0.023 ms (2.345 ms / 100) 2.339 -> 2.344 ( +0.21%) [ +0.00% +0.17% +0.04% / +0.30% +0.21% +0.38%] index_select random_sorted : Elapsed 0.023 ms (2.339 ms / 100) 2.334 -> 2.337 ( +0.13%) [ +0.00% +0.09% +0.13% / +0.13% +0.34% +0.39%] index_select perm : Elapsed 0.023 ms (2.334 ms / 100) 2.339 -> 2.341 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.34% +0.30%] index_select perm_sorted : Elapsed 0.023 ms (2.339 ms / 100) B = [5, 20, 4, 40] (stride (1, 800, 5, 20)) A = [16, 20, 4, 40] (stride (1, 640, 12800, 16)) dim = 0 2.417 -> 2.418 ( +0.04%) [ +0.17% +0.00% +0.33% / +0.25% +0.04% +0.12%] index_select const : Elapsed 0.024 ms (2.421 ms / 100) 2.404 -> 2.401 ( -0.12%) [ +0.04% +0.12% +0.00% / -0.12% +0.46% +0.37%] index_select wrap : Elapsed 0.024 ms (2.405 ms / 100) 2.389 -> 2.392 ( +0.13%) [ +0.33% +0.00% +0.13% / +0.13% +0.54% +0.46%] index_select linear : Elapsed 0.024 ms (2.397 ms / 100) 2.383 -> 2.387 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.25% +0.29%] index_select reverse : Elapsed 0.024 ms (2.387 ms / 100) 2.403 -> 2.403 ( +0.00%) [ +0.00% +0.12% +0.08% / +0.00% +0.21% +0.29%] index_select skip64 : Elapsed 0.024 ms (2.403 ms / 100) 2.425 -> 2.415 ( -0.41%) [ +0.04% +0.12% +0.00% / -0.08% -0.41% -0.12%] index_select skip256 : Elapsed 0.024 ms (2.426 ms / 100) 2.406 -> 2.405 ( -0.04%) [ +0.08% +0.00% +0.00% / -0.04% +0.12% +0.00%] index_select spread : Elapsed 0.024 ms (2.408 ms / 100) 2.408 -> 2.408 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.21% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.408 ms / 100) 2.409 -> 2.409 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.21% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.411 ms / 100) 2.402 -> 2.401 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.00% +0.08% -0.04%] index_select strided 7 : Elapsed 0.024 ms (2.402 ms / 100) 2.439 -> 2.426 ( -0.53%) [ +0.00% +0.00% +0.16% / +0.00% -0.53% -0.16%] index_select strided 8 : Elapsed 0.024 ms (2.439 ms / 100) 2.402 -> 2.404 ( +0.08%) [ +0.00% +0.25% +0.08% / +0.08% +0.37% +0.42%] index_select random : Elapsed 0.024 ms (2.402 ms / 100) 2.386 -> 2.385 ( -0.04%) [ +0.08% +0.13% +0.00% / -0.04% +0.00% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.388 ms / 100) 2.407 -> 2.407 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.33% +0.33%] index_select perm : Elapsed 0.024 ms (2.408 ms / 100) 2.415 -> 2.421 ( +0.25%) [ +0.00% +0.04% +0.50% / +0.25% +0.25% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) B = [5, 20, 4, 40] (stride (20, 1, 4000, 100)) A = [16, 20, 4, 40] (stride (800, 1, 12800, 20)) dim = 0 2.261 -> 2.263 ( +0.09%) [ +0.04% +0.00% +0.13% / +0.09% +0.27% +0.57%] index_select const : Elapsed 0.023 ms (2.262 ms / 100) 2.259 -> 2.257 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.27% +0.18%] index_select wrap : Elapsed 0.023 ms (2.261 ms / 100) 2.270 -> 2.269 ( -0.04%) [ +0.18% +0.18% +0.00% / -0.04% +0.35% +0.31%] index_select linear : Elapsed 0.023 ms (2.274 ms / 100) 2.265 -> 2.266 ( +0.04%) [ +0.13% +0.04% +0.00% / +0.04% +0.26% +0.18%] index_select reverse : Elapsed 0.023 ms (2.268 ms / 100) 2.253 -> 2.253 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.67% +0.40%] index_select skip64 : Elapsed 0.023 ms (2.253 ms / 100) 2.257 -> 2.260 ( +0.13%) [ +0.09% +0.04% +0.00% / +0.13% +0.35% +0.62%] index_select skip256 : Elapsed 0.023 ms (2.259 ms / 100) 2.260 -> 2.262 ( +0.09%) [ +0.13% +0.00% +0.13% / +0.09% +0.27% +0.44%] index_select spread : Elapsed 0.023 ms (2.263 ms / 100) 2.259 -> 2.259 ( +0.00%) [ +0.35% +0.18% +0.00% / +0.00% +0.66% +0.66%] index_select strided 3 : Elapsed 0.023 ms (2.267 ms / 100) 2.268 -> 2.268 ( +0.00%) [ +0.18% +0.00% +0.09% / +0.00% +0.57% +0.53%] index_select strided 5 : Elapsed 0.023 ms (2.272 ms / 100) 2.261 -> 2.265 ( +0.18%) [ +0.18% +0.00% +0.09% / +0.18% +0.62% +0.31%] index_select strided 7 : Elapsed 0.023 ms (2.265 ms / 100) 2.257 -> 2.262 ( +0.22%) [ +0.13% +0.00% +0.18% / +0.22% +0.66% +0.31%] index_select strided 8 : Elapsed 0.023 ms (2.260 ms / 100) 2.254 -> 2.253 ( -0.04%) [ +0.18% +0.00% +0.04% / -0.04% +0.35% +0.27%] index_select random : Elapsed 0.023 ms (2.258 ms / 100) 2.263 -> 2.268 ( +0.22%) [ +0.09% +0.00% +0.09% / +0.27% +0.31% +0.22%] index_select random_sorted : Elapsed 0.023 ms (2.265 ms / 100) 2.267 -> 2.272 ( +0.22%) [ +0.31% +0.13% +0.00% / +0.22% +0.49% +0.84%] index_select perm : Elapsed 0.023 ms (2.274 ms / 100) 2.254 -> 2.254 ( +0.00%) [ +0.18% +0.22% +0.00% / +0.00% +0.49% +0.35%] index_select perm_sorted : Elapsed 0.023 ms (2.258 ms / 100) B = [5, 20, 4, 40] (stride (4, 20, 1, 400)) A = [16, 20, 4, 40] (stride (1, 640, 12800, 16)) dim = 0 2.424 -> 2.415 ( -0.37%) [ +0.00% +0.29% +0.21% / +0.33% +0.17% -0.37%] index_select const : Elapsed 0.024 ms (2.424 ms / 100) 2.410 -> 2.409 ( -0.04%) [ +0.00% +0.08% +0.37% / -0.04% +0.41% +0.25%] index_select wrap : Elapsed 0.024 ms (2.410 ms / 100) 2.397 -> 2.399 ( +0.08%) [ +0.00% +0.08% +0.17% / +0.08% +0.63% +0.29%] index_select linear : Elapsed 0.024 ms (2.397 ms / 100) 2.388 -> 2.389 ( +0.04%) [ +0.21% +0.08% +0.00% / +0.04% +0.29% +0.34%] index_select reverse : Elapsed 0.024 ms (2.393 ms / 100) 2.408 -> 2.409 ( +0.04%) [ +0.21% +0.17% +0.00% / +0.04% +0.37% +0.33%] index_select skip64 : Elapsed 0.024 ms (2.413 ms / 100) 2.428 -> 2.423 ( -0.21%) [ +0.04% +0.00% +0.04% / +0.25% -0.21% -0.21%] index_select skip256 : Elapsed 0.024 ms (2.429 ms / 100) 2.407 -> 2.412 ( +0.21%) [ +0.17% +0.00% +0.29% / +0.21% +0.29% +0.25%] index_select spread : Elapsed 0.024 ms (2.411 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.46% +0.46%] index_select strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.412 -> 2.412 ( +0.00%) [ +0.00% +0.08% +0.25% / +0.00% +0.50% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.412 ms / 100) 2.407 -> 2.408 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.12% +0.04% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.445 -> 2.430 ( -0.61%) [ +0.00% +0.00% +0.16% / +0.04% -0.61% -0.25%] index_select strided 8 : Elapsed 0.024 ms (2.445 ms / 100) 2.428 -> 2.429 ( +0.04%) [ +0.00% +0.12% +0.00% / +0.04% +0.29% +0.21%] index_select random : Elapsed 0.024 ms (2.428 ms / 100) 2.407 -> 2.409 ( +0.08%) [ +0.04% +0.00% +0.42% / +0.12% +0.08% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.408 ms / 100) 2.416 -> 2.418 ( +0.08%) [ +0.00% +0.04% +0.12% / +0.08% +0.29% +0.37%] index_select perm : Elapsed 0.024 ms (2.416 ms / 100) 2.424 -> 2.422 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.45% +0.29%] index_select perm_sorted : Elapsed 0.024 ms (2.425 ms / 100) B = [5, 20, 4, 40] (stride (4, 20, 1, 400)) A = [16, 20, 4, 40] (stride (20, 1, 320, 1280)) dim = 0 2.481 -> 2.481 ( +0.00%) [ +0.12% +0.00% +0.08% / +0.00% +0.20% +0.40%] index_select const : Elapsed 0.025 ms (2.484 ms / 100) 2.476 -> 2.477 ( +0.04%) [ +0.28% +0.04% +0.00% / +0.04% +0.44% +0.20%] index_select wrap : Elapsed 0.025 ms (2.483 ms / 100) 2.477 -> 2.475 ( -0.08%) [ +0.12% +0.00% +0.04% / -0.08% +0.20% +0.36%] index_select linear : Elapsed 0.025 ms (2.480 ms / 100) 2.476 -> 2.482 ( +0.24%) [ +0.04% +0.00% +0.08% / +0.24% +0.61% +0.61%] index_select reverse : Elapsed 0.025 ms (2.477 ms / 100) 2.476 -> 2.483 ( +0.28%) [ +0.16% +0.32% +0.00% / +0.28% +0.53% +0.40%] index_select skip64 : Elapsed 0.025 ms (2.480 ms / 100) 2.474 -> 2.479 ( +0.20%) [ +0.28% +0.16% +0.00% / +0.20% +0.44% +0.44%] index_select skip256 : Elapsed 0.025 ms (2.481 ms / 100) 2.476 -> 2.478 ( +0.08%) [ +0.00% +0.04% +0.16% / +0.08% +0.40% +0.36%] index_select spread : Elapsed 0.025 ms (2.476 ms / 100) 2.478 -> 2.481 ( +0.12%) [ +0.12% +0.00% +0.24% / +0.12% +0.28% +0.24%] index_select strided 3 : Elapsed 0.025 ms (2.481 ms / 100) 2.471 -> 2.470 ( -0.04%) [ +0.00% +0.04% +0.08% / -0.04% +0.40% +0.12%] index_select strided 5 : Elapsed 0.025 ms (2.471 ms / 100) 2.472 -> 2.475 ( +0.12%) [ +0.16% +0.16% +0.00% / +0.16% +0.32% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.476 ms / 100) 2.469 -> 2.475 ( +0.24%) [ +0.00% +0.16% +0.04% / +0.24% +0.32% +0.28%] index_select strided 8 : Elapsed 0.025 ms (2.469 ms / 100) 2.469 -> 2.470 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.04% +0.49% +0.53%] index_select random : Elapsed 0.025 ms (2.472 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.00% +0.12% +0.04% / +0.00% +0.40% +0.45%] index_select random_sorted : Elapsed 0.025 ms (2.470 ms / 100) 2.473 -> 2.477 ( +0.16%) [ +0.16% +0.00% +0.04% / +0.16% +0.40% +0.44%] index_select perm : Elapsed 0.025 ms (2.477 ms / 100) 2.474 -> 2.477 ( +0.12%) [ +0.00% +0.00% +0.20% / +0.12% +0.36% +0.61%] index_select perm_sorted : Elapsed 0.025 ms (2.474 ms / 100) out_shape = [16, 5, 4, 40] in_shape = [16, 20, 4, 40] idx_dim = 1 B = [16, 5, 4, 40] (stride (160, 2560, 1, 4)) A = [16, 20, 4, 40] (stride (800, 1, 12800, 20)) dim = 1 1.860 -> 1.866 ( +0.32%) [ +0.16% +0.11% +0.00% / +0.32% +0.70% +0.81%] index_select const : Elapsed 0.019 ms (1.863 ms / 100) 1.871 -> 1.873 ( +0.11%) [ +0.11% +0.00% +0.27% / +0.11% +0.69% +0.53%] index_select wrap : Elapsed 0.019 ms (1.873 ms / 100) 1.870 -> 1.874 ( +0.21%) [ +0.37% +0.27% +0.00% / +0.21% +0.70% +0.75%] index_select linear : Elapsed 0.019 ms (1.877 ms / 100) 1.870 -> 1.871 ( +0.05%) [ +0.21% +0.00% +0.11% / +0.05% +0.91% +0.75%] index_select reverse : Elapsed 0.019 ms (1.874 ms / 100) 1.860 -> 1.863 ( +0.16%) [ +0.16% +0.00% +0.16% / +0.16% +0.65% +0.75%] index_select skip64 : Elapsed 0.019 ms (1.863 ms / 100) 1.867 -> 1.868 ( +0.05%) [ +0.21% +0.05% +0.00% / +0.05% +0.32% +0.43%] index_select skip256 : Elapsed 0.019 ms (1.871 ms / 100) 1.893 -> 1.894 ( +0.05%) [ +0.42% +0.00% +0.05% / +0.05% +0.58% +0.58%] index_select spread : Elapsed 0.019 ms (1.901 ms / 100) 1.891 -> 1.893 ( +0.11%) [ +0.21% +0.00% +0.11% / +0.11% +0.58% +0.53%] index_select strided 3 : Elapsed 0.019 ms (1.895 ms / 100) 1.894 -> 1.896 ( +0.11%) [ +0.05% +0.32% +0.00% / +0.11% +0.42% +0.37%] index_select strided 5 : Elapsed 0.019 ms (1.895 ms / 100) 1.883 -> 1.885 ( +0.11%) [ +0.16% +0.00% +0.05% / +0.11% +0.53% +0.58%] index_select strided 7 : Elapsed 0.019 ms (1.886 ms / 100) 1.882 -> 1.880 ( -0.11%) [ +0.00% +0.16% +0.00% / -0.11% +0.58% +0.69%] index_select strided 8 : Elapsed 0.019 ms (1.882 ms / 100) 1.893 -> 1.898 ( +0.26%) [ +0.00% +0.11% +0.26% / +0.26% +0.74% +0.58%] index_select strided 16 : Elapsed 0.019 ms (1.893 ms / 100) 1.884 -> 1.884 ( +0.00%) [ +0.11% +0.16% +0.00% / +0.00% +0.58% +0.32%] index_select random : Elapsed 0.019 ms (1.886 ms / 100) 1.884 -> 1.883 ( -0.05%) [ +0.11% +0.05% +0.00% / -0.05% +0.53% +0.53%] index_select random_sorted : Elapsed 0.019 ms (1.886 ms / 100) 1.874 -> 1.877 ( +0.16%) [ +0.11% +0.16% +0.00% / +0.16% +0.64% +0.59%] index_select perm : Elapsed 0.019 ms (1.876 ms / 100) 1.883 -> 1.880 ( -0.16%) [ +0.11% +0.00% +0.00% / -0.16% +0.48% +0.69%] index_select perm_sorted : Elapsed 0.019 ms (1.885 ms / 100) B = [16, 5, 4, 40] (stride (40, 2560, 640, 1)) A = [16, 20, 4, 40] (stride (1, 640, 12800, 16)) dim = 1 1.816 -> 1.813 ( -0.17%) [ +0.28% +0.06% +0.00% / +0.00% +0.11% -0.17%] index_select const : Elapsed 0.018 ms (1.821 ms / 100) 1.818 -> 1.816 ( -0.11%) [ +0.11% +0.11% +0.00% / -0.06% -0.11% +0.17%] index_select wrap : Elapsed 0.018 ms (1.820 ms / 100) 1.811 -> 1.813 ( +0.11%) [ +0.28% +0.22% +0.00% / +0.11% +0.33% +0.28%] index_select linear : Elapsed 0.018 ms (1.816 ms / 100) 1.812 -> 1.817 ( +0.28%) [ +0.22% +0.11% +0.00% / +0.28% +0.28% +0.28%] index_select reverse : Elapsed 0.018 ms (1.816 ms / 100) 1.809 -> 1.809 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +0.55% +0.28%] index_select skip64 : Elapsed 0.018 ms (1.809 ms / 100) 1.807 -> 1.811 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.33% +0.50%] index_select skip256 : Elapsed 0.018 ms (1.807 ms / 100) 1.819 -> 1.814 ( -0.27%) [ +0.00% +0.05% +0.16% / +0.16% -0.16% -0.27%] index_select spread : Elapsed 0.018 ms (1.819 ms / 100) 1.812 -> 1.818 ( +0.33%) [ +0.17% +0.00% +0.06% / +0.33% +0.61% +0.33%] index_select strided 3 : Elapsed 0.018 ms (1.815 ms / 100) 1.815 -> 1.814 ( -0.06%) [ +0.00% +0.17% +0.17% / +0.06% +0.11% -0.06%] index_select strided 5 : Elapsed 0.018 ms (1.815 ms / 100) 1.814 -> 1.814 ( +0.00%) [ +0.00% +0.22% +0.11% / +0.00% +0.50% +0.28%] index_select strided 7 : Elapsed 0.018 ms (1.814 ms / 100) 1.815 -> 1.816 ( +0.06%) [ +0.11% +0.06% +0.00% / +0.06% +0.17% +0.11%] index_select strided 8 : Elapsed 0.018 ms (1.817 ms / 100) 1.813 -> 1.815 ( +0.11%) [ +0.17% +0.11% +0.00% / +0.11% +0.17% +0.17%] index_select strided 16 : Elapsed 0.018 ms (1.816 ms / 100) 1.811 -> 1.810 ( -0.06%) [ +0.00% +0.11% +0.06% / -0.06% +0.33% +0.22%] index_select random : Elapsed 0.018 ms (1.811 ms / 100) 1.808 -> 1.812 ( +0.22%) [ +0.17% +0.06% +0.00% / +0.22% +0.39% +0.50%] index_select random_sorted : Elapsed 0.018 ms (1.811 ms / 100) 1.813 -> 1.816 ( +0.17%) [ +0.44% +0.33% +0.00% / +0.44% +0.17% +0.17%] index_select perm : Elapsed 0.018 ms (1.821 ms / 100) 1.817 -> 1.817 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.06% +0.00% +0.06%] index_select perm_sorted : Elapsed 0.018 ms (1.819 ms / 100) B = [16, 5, 4, 40] (stride (40, 640, 3200, 1)) A = [16, 20, 4, 40] (stride (1, 64, 16, 1280)) dim = 1 1.892 -> 1.891 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.69% +0.42%] index_select const : Elapsed 0.019 ms (1.893 ms / 100) 1.886 -> 1.891 ( +0.27%) [ +0.00% +0.27% +0.16% / +0.27% +0.37% +0.42%] index_select wrap : Elapsed 0.019 ms (1.886 ms / 100) 1.884 -> 1.887 ( +0.16%) [ +0.27% +0.21% +0.00% / +0.16% +0.37% +0.64%] index_select linear : Elapsed 0.019 ms (1.889 ms / 100) 1.892 -> 1.892 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.53% +0.26%] index_select reverse : Elapsed 0.019 ms (1.892 ms / 100) 1.890 -> 1.890 ( +0.00%) [ +0.00% +0.16% +0.16% / +0.00% +0.58% +0.42%] index_select skip64 : Elapsed 0.019 ms (1.890 ms / 100) 1.890 -> 1.889 ( -0.05%) [ +0.11% +0.16% +0.00% / -0.05% +0.26% +0.42%] index_select skip256 : Elapsed 0.019 ms (1.892 ms / 100) 1.892 -> 1.895 ( +0.16%) [ +0.21% +0.21% +0.00% / +0.16% +0.42% +0.53%] index_select spread : Elapsed 0.019 ms (1.896 ms / 100) 1.880 -> 1.878 ( -0.11%) [ +0.00% +0.16% +0.05% / -0.11% +0.64% +0.59%] index_select strided 3 : Elapsed 0.019 ms (1.880 ms / 100) 1.890 -> 1.893 ( +0.16%) [ +0.11% +0.00% +0.11% / +0.16% +0.53% +0.69%] index_select strided 5 : Elapsed 0.019 ms (1.892 ms / 100) 1.892 -> 1.896 ( +0.21%) [ +0.37% +0.00% +0.26% / +0.21% +0.58% +0.53%] index_select strided 7 : Elapsed 0.019 ms (1.899 ms / 100) 1.895 -> 1.899 ( +0.21%) [ +0.16% +0.00% +0.26% / +0.21% +0.58% +0.53%] index_select strided 8 : Elapsed 0.019 ms (1.898 ms / 100) 1.888 -> 1.889 ( +0.05%) [ +0.11% +0.00% +0.16% / +0.05% +0.42% +0.42%] index_select strided 16 : Elapsed 0.019 ms (1.890 ms / 100) 1.904 -> 1.906 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.11% +0.16% +0.26%] index_select random : Elapsed 0.019 ms (1.906 ms / 100) 1.902 -> 1.906 ( +0.21%) [ +0.00% +0.05% +0.16% / +0.21% +0.21% +0.37%] index_select random_sorted : Elapsed 0.019 ms (1.902 ms / 100) 1.894 -> 1.897 ( +0.16%) [ +0.00% +0.05% +0.00% / +0.16% +0.48% +0.63%] index_select perm : Elapsed 0.019 ms (1.894 ms / 100) 1.893 -> 1.895 ( +0.11%) [ +0.05% +0.05% +0.00% / +0.11% +0.37% +0.48%] index_select perm_sorted : Elapsed 0.019 ms (1.894 ms / 100) B = [16, 5, 4, 40] (stride (20, 4, 1, 320)) A = [16, 20, 4, 40] (stride (20, 1, 320, 1280)) dim = 1 1.938 -> 1.940 ( +0.10%) [ +0.00% +0.10% +0.05% / +0.10% +0.46% +0.46%] index_select const : Elapsed 0.019 ms (1.938 ms / 100) 1.939 -> 1.943 ( +0.21%) [ +0.00% +0.31% +0.26% / +0.21% +0.77% +1.08%] index_select wrap : Elapsed 0.019 ms (1.939 ms / 100) 1.943 -> 1.945 ( +0.10%) [ +0.05% +0.00% +0.31% / +0.10% +0.82% +0.72%] index_select linear : Elapsed 0.019 ms (1.944 ms / 100) 1.947 -> 1.946 ( -0.05%) [ +0.21% +0.05% +0.00% / -0.05% +0.98% +0.46%] index_select reverse : Elapsed 0.020 ms (1.951 ms / 100) 1.938 -> 1.940 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +0.57% +0.41%] index_select skip64 : Elapsed 0.019 ms (1.940 ms / 100) 1.933 -> 1.936 ( +0.16%) [ +0.21% +0.00% +0.26% / +0.16% +0.88% +0.98%] index_select skip256 : Elapsed 0.019 ms (1.937 ms / 100) 1.955 -> 1.956 ( +0.05%) [ +0.46% +0.20% +0.00% / +0.05% +0.82% +0.87%] index_select spread : Elapsed 0.020 ms (1.964 ms / 100) 1.954 -> 1.956 ( +0.10%) [ +0.41% +0.15% +0.00% / +0.10% +0.92% +0.92%] index_select strided 3 : Elapsed 0.020 ms (1.962 ms / 100) 1.960 -> 1.957 ( -0.15%) [ +0.05% +0.00% +0.10% / -0.15% +0.71% +0.41%] index_select strided 5 : Elapsed 0.020 ms (1.961 ms / 100) 1.958 -> 1.958 ( +0.00%) [ +0.00% +0.00% +0.20% / +0.00% +0.26% +0.51%] index_select strided 7 : Elapsed 0.020 ms (1.958 ms / 100) 1.954 -> 1.954 ( +0.00%) [ +0.10% +0.20% +0.00% / +0.00% +0.61% +0.51%] index_select strided 8 : Elapsed 0.020 ms (1.956 ms / 100) 1.951 -> 1.960 ( +0.46%) [ +0.31% +0.41% +0.00% / +0.46% +0.87% +0.46%] index_select strided 16 : Elapsed 0.020 ms (1.957 ms / 100) 1.959 -> 1.963 ( +0.20%) [ +0.20% +0.20% +0.00% / +0.20% +0.71% +0.51%] index_select random : Elapsed 0.020 ms (1.963 ms / 100) 1.957 -> 1.956 ( -0.05%) [ +0.00% +0.00% +0.20% / -0.05% +0.31% +0.41%] index_select random_sorted : Elapsed 0.020 ms (1.957 ms / 100) 1.950 -> 1.952 ( +0.10%) [ +0.36% +0.26% +0.00% / +0.10% +0.72% +0.51%] index_select perm : Elapsed 0.020 ms (1.957 ms / 100) 1.957 -> 1.957 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.77% +0.41%] index_select perm_sorted : Elapsed 0.020 ms (1.957 ms / 100) out_shape = [16, 20, 5, 40] in_shape = [16, 20, 4, 40] idx_dim = 2 B = [16, 20, 5, 40] (stride (4000, 200, 40, 1)) A = [16, 20, 4, 40] (stride (3200, 40, 800, 1)) dim = 2 5.534 -> 5.523 ( -0.20%) [ +0.04% +0.00% +0.04% / +0.07% -0.13% -0.20%] index_add_ linear : Elapsed 0.055 ms (5.536 ms / 100) 5.465 -> 5.459 ( -0.11%) [ +0.00% +0.13% +0.13% / +0.05% -0.04% -0.11%] index_copy_ linear : Elapsed 0.055 ms (5.465 ms / 100) 5.526 -> 5.520 ( -0.11%) [ +0.00% +0.14% +0.18% / +0.14% +0.00% -0.11%] index_add_ reverse : Elapsed 0.055 ms (5.526 ms / 100) 5.460 -> 5.451 ( -0.16%) [ +0.05% +0.00% +0.16% / +0.13% -0.16% -0.09%] index_copy_ reverse : Elapsed 0.055 ms (5.463 ms / 100) 5.522 -> 5.513 ( -0.16%) [ +0.00% +0.11% +0.09% / +0.05% -0.16% -0.09%] index_add_ spread : Elapsed 0.055 ms (5.522 ms / 100) 5.455 -> 5.439 ( -0.29%) [ +0.00% +0.02% +0.05% / +0.16% -0.29% -0.20%] index_copy_ spread : Elapsed 0.055 ms (5.455 ms / 100) 5.521 -> 5.507 ( -0.25%) [ +0.16% +0.00% +0.13% / +0.09% -0.13% -0.25%] index_add_ strided 3 : Elapsed 0.055 ms (5.530 ms / 100) 5.455 -> 5.452 ( -0.05%) [ +0.24% +0.00% +0.13% / +0.13% -0.05% -0.02%] index_copy_ strided 3 : Elapsed 0.055 ms (5.468 ms / 100) 5.514 -> 5.504 ( -0.18%) [ +0.13% +0.00% +0.13% / +0.18% -0.18% -0.07%] index_add_ perm : Elapsed 0.055 ms (5.521 ms / 100) 5.455 -> 5.446 ( -0.16%) [ +0.00% +0.11% +0.15% / +0.02% -0.15% -0.16%] index_copy_ perm : Elapsed 0.055 ms (5.455 ms / 100) 5.517 -> 5.512 ( -0.09%) [ +0.07% +0.00% +0.16% / +0.15% -0.02% -0.09%] index_add_ perm_sorted : Elapsed 0.055 ms (5.521 ms / 100) 5.461 -> 5.441 ( -0.37%) [ +0.07% +0.00% +0.24% / +0.09% -0.26% -0.37%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.465 ms / 100) 5.658 -> 5.659 ( +0.02%) [ +0.00% +0.05% +0.19% / +0.14% +0.02% +0.05%] index_select const : Elapsed 0.057 ms (5.658 ms / 100) 5.766 -> 5.759 ( -0.12%) [ +0.03% +0.00% +0.14% / +0.03% -0.12% -0.05%] index_select wrap : Elapsed 0.058 ms (5.768 ms / 100) 5.753 -> 5.736 ( -0.30%) [ +0.09% +0.05% +0.00% / +0.12% -0.30% -0.17%] index_select linear : Elapsed 0.058 ms (5.758 ms / 100) 5.744 -> 5.741 ( -0.05%) [ +0.05% +0.00% +0.00% / +0.26% +0.03% -0.05%] index_select reverse : Elapsed 0.057 ms (5.747 ms / 100) 5.646 -> 5.640 ( -0.11%) [ +0.00% +0.00% +0.19% / +0.07% -0.11% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.646 ms / 100) 5.659 -> 5.656 ( -0.05%) [ +0.00% +0.04% +0.18% / +0.00% -0.05% +0.04%] index_select skip256 : Elapsed 0.057 ms (5.659 ms / 100) 5.745 -> 5.735 ( -0.17%) [ +0.19% +0.00% +0.21% / +0.00% -0.17% -0.16%] index_select spread : Elapsed 0.058 ms (5.756 ms / 100) 5.753 -> 5.751 ( -0.03%) [ +0.09% +0.00% +0.10% / +0.19% -0.02% -0.03%] index_select strided 3 : Elapsed 0.058 ms (5.758 ms / 100) 5.727 -> 5.720 ( -0.12%) [ +0.00% +0.16% +0.17% / +0.09% -0.03% -0.12%] index_select random : Elapsed 0.057 ms (5.727 ms / 100) 5.733 -> 5.716 ( -0.30%) [ +0.14% +0.02% +0.00% / +0.12% -0.26% -0.30%] index_select random_sorted : Elapsed 0.057 ms (5.741 ms / 100) B = [16, 20, 5, 40] (stride (4000, 1, 20, 100)) A = [16, 20, 4, 40] (stride (40, 640, 12800, 1)) dim = 2 5.915 -> 5.907 ( -0.14%) [ +0.03% +0.00% +0.12% / +0.10% -0.12% -0.14%] index_add_ linear : Elapsed 0.059 ms (5.917 ms / 100) 5.851 -> 5.839 ( -0.21%) [ +0.00% +0.02% +0.10% / -0.07% -0.14% -0.21%] index_copy_ linear : Elapsed 0.059 ms (5.851 ms / 100) 5.911 -> 5.909 ( -0.03%) [ +0.07% +0.00% +0.14% / +0.03% -0.03% +0.17%] index_add_ reverse : Elapsed 0.059 ms (5.915 ms / 100) 5.850 -> 5.845 ( -0.09%) [ +0.03% +0.10% +0.00% / +0.10% -0.05% -0.09%] index_copy_ reverse : Elapsed 0.059 ms (5.852 ms / 100) 5.911 -> 5.910 ( -0.02%) [ +0.14% +0.00% +0.03% / +0.24% +0.10% -0.02%] index_add_ spread : Elapsed 0.059 ms (5.919 ms / 100) 5.846 -> 5.839 ( -0.12%) [ +0.00% +0.09% +0.26% / +0.00% -0.09% -0.12%] index_copy_ spread : Elapsed 0.058 ms (5.846 ms / 100) 5.922 -> 5.928 ( +0.10%) [ +0.25% +0.00% +0.19% / +0.19% +0.14% +0.10%] index_add_ strided 3 : Elapsed 0.059 ms (5.937 ms / 100) 5.856 -> 5.856 ( +0.00%) [ +0.00% +0.07% +0.27% / +0.24% +0.14% +0.00%] index_copy_ strided 3 : Elapsed 0.059 ms (5.856 ms / 100) 5.920 -> 5.923 ( +0.05%) [ +0.00% +0.14% +0.08% / +0.05% +0.05% +0.14%] index_add_ perm : Elapsed 0.059 ms (5.920 ms / 100) 5.857 -> 5.853 ( -0.07%) [ +0.02% +0.00% +0.02% / +0.07% +0.00% -0.07%] index_copy_ perm : Elapsed 0.059 ms (5.858 ms / 100) 5.914 -> 5.920 ( +0.10%) [ +0.00% +0.07% +0.05% / +0.24% +0.10% +0.19%] index_add_ perm_sorted : Elapsed 0.059 ms (5.914 ms / 100) 5.853 -> 5.857 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.12% +0.07% +0.07%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.857 ms / 100) 6.121 -> 6.117 ( -0.07%) [ +0.00% +0.00% +0.08% / +0.16% +0.03% -0.07%] index_select const : Elapsed 0.061 ms (6.121 ms / 100) 6.219 -> 6.207 ( -0.19%) [ +0.05% +0.00% +0.05% / +0.08% +0.00% -0.19%] index_select wrap : Elapsed 0.062 ms (6.222 ms / 100) 6.195 -> 6.195 ( +0.00%) [ +0.11% +0.00% +0.15% / +0.06% +0.03% +0.00%] index_select linear : Elapsed 0.062 ms (6.202 ms / 100) 6.198 -> 6.211 ( +0.21%) [ +0.06% +0.00% +0.21% / +0.21% +0.24% +0.31%] index_select reverse : Elapsed 0.062 ms (6.202 ms / 100) 6.140 -> 6.129 ( -0.18%) [ +0.02% +0.02% +0.00% / -0.08% -0.18% -0.02%] index_select skip64 : Elapsed 0.061 ms (6.141 ms / 100) 6.113 -> 6.115 ( +0.03%) [ +0.28% +0.00% +0.18% / +0.25% +0.03% +0.05%] index_select skip256 : Elapsed 0.061 ms (6.130 ms / 100) 6.222 -> 6.222 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.02% +0.00% +0.08%] index_select spread : Elapsed 0.062 ms (6.226 ms / 100) 6.210 -> 6.210 ( +0.00%) [ +0.00% +0.13% +0.02% / +0.00% +0.03% +0.21%] index_select strided 3 : Elapsed 0.062 ms (6.210 ms / 100) 6.192 -> 6.198 ( +0.10%) [ +0.10% +0.00% +0.16% / +0.13% +0.15% +0.10%] index_select random : Elapsed 0.062 ms (6.198 ms / 100) 6.163 -> 6.162 ( -0.02%) [ +0.00% +0.10% +0.21% / +0.11% -0.02% +0.03%] index_select random_sorted : Elapsed 0.062 ms (6.163 ms / 100) B = [16, 20, 5, 40] (stride (800, 40, 12800, 1)) A = [16, 20, 4, 40] (stride (80, 1, 20, 1280)) dim = 2 5.637 -> 5.615 ( -0.39%) [ +0.00% +0.07% +0.18% / +0.09% -0.39% -0.39%] index_add_ linear : Elapsed 0.056 ms (5.637 ms / 100) 5.598 -> 5.579 ( -0.34%) [ +0.00% +0.04% +0.02% / +0.14% -0.34% -0.34%] index_copy_ linear : Elapsed 0.056 ms (5.598 ms / 100) 5.639 -> 5.610 ( -0.51%) [ +0.00% +0.02% +0.07% / +0.05% -0.51% -0.48%] index_add_ reverse : Elapsed 0.056 ms (5.639 ms / 100) 5.592 -> 5.569 ( -0.41%) [ +0.00% +0.13% +0.11% / +0.13% -0.30% -0.41%] index_copy_ reverse : Elapsed 0.056 ms (5.592 ms / 100) 5.638 -> 5.611 ( -0.48%) [ +0.16% +0.05% +0.00% / +0.04% -0.48% -0.34%] index_add_ spread : Elapsed 0.056 ms (5.647 ms / 100) 5.594 -> 5.572 ( -0.39%) [ +0.02% +0.00% +0.18% / +0.20% -0.39% -0.38%] index_copy_ spread : Elapsed 0.056 ms (5.595 ms / 100) 5.625 -> 5.612 ( -0.23%) [ +0.09% +0.00% +0.04% / +0.16% -0.16% -0.23%] index_add_ strided 3 : Elapsed 0.056 ms (5.630 ms / 100) 5.579 -> 5.580 ( +0.02%) [ +0.00% +0.05% +0.25% / +0.23% +0.02% +0.02%] index_copy_ strided 3 : Elapsed 0.056 ms (5.579 ms / 100) 5.637 -> 5.606 ( -0.55%) [ +0.11% +0.07% +0.00% / +0.14% -0.35% -0.55%] index_add_ perm : Elapsed 0.056 ms (5.643 ms / 100) 5.602 -> 5.573 ( -0.52%) [ +0.05% +0.00% +0.00% / -0.07% -0.52% -0.52%] index_copy_ perm : Elapsed 0.056 ms (5.605 ms / 100) 5.640 -> 5.609 ( -0.55%) [ +0.16% +0.00% +0.09% / +0.05% -0.27% -0.55%] index_add_ perm_sorted : Elapsed 0.056 ms (5.649 ms / 100) 5.601 -> 5.568 ( -0.59%) [ +0.09% +0.02% +0.00% / -0.12% -0.37% -0.59%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.606 ms / 100) 5.871 -> 5.852 ( -0.32%) [ +0.00% +0.00% +0.10% / +0.12% -0.32% -0.17%] index_select const : Elapsed 0.059 ms (5.871 ms / 100) 5.928 -> 5.902 ( -0.44%) [ +0.00% +0.13% +0.22% / +0.22% -0.44% -0.34%] index_select wrap : Elapsed 0.059 ms (5.928 ms / 100) 5.923 -> 5.897 ( -0.44%) [ +0.07% +0.00% +0.14% / +0.17% -0.41% -0.44%] index_select linear : Elapsed 0.059 ms (5.927 ms / 100) 5.927 -> 5.879 ( -0.81%) [ +0.00% +0.00% +0.10% / +0.05% -0.73% -0.81%] index_select reverse : Elapsed 0.059 ms (5.927 ms / 100) 5.873 -> 5.859 ( -0.24%) [ +0.00% +0.14% +0.03% / +0.03% -0.24% -0.22%] index_select skip64 : Elapsed 0.059 ms (5.873 ms / 100) 5.871 -> 5.862 ( -0.15%) [ +0.15% +0.00% +0.22% / +0.09% -0.07% -0.15%] index_select skip256 : Elapsed 0.059 ms (5.880 ms / 100) 5.925 -> 5.891 ( -0.57%) [ +0.10% +0.00% +0.05% / +0.12% -0.57% -0.49%] index_select spread : Elapsed 0.059 ms (5.931 ms / 100) 5.939 -> 5.896 ( -0.72%) [ +0.02% +0.00% +0.13% / -0.02% -0.47% -0.72%] index_select strided 3 : Elapsed 0.059 ms (5.940 ms / 100) 5.917 -> 5.880 ( -0.63%) [ +0.00% +0.05% +0.12% / +0.03% -0.63% -0.61%] index_select random : Elapsed 0.059 ms (5.917 ms / 100) 5.906 -> 5.879 ( -0.46%) [ +0.12% +0.00% +0.08% / -0.03% -0.41% -0.46%] index_select random_sorted : Elapsed 0.059 ms (5.913 ms / 100) B = [16, 20, 5, 40] (stride (800, 1, 12800, 20)) A = [16, 20, 4, 40] (stride (3200, 4, 1, 80)) dim = 2 5.935 -> 5.929 ( -0.10%) [ +0.00% +0.00% +0.13% / +0.20% -0.10% +0.08%] index_add_ linear : Elapsed 0.059 ms (5.935 ms / 100) 5.877 -> 5.881 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.15% +0.07% +0.09%] index_copy_ linear : Elapsed 0.059 ms (5.877 ms / 100) 5.918 -> 5.931 ( +0.22%) [ +0.00% +0.15% +0.17% / +0.22% +0.35% +0.24%] index_add_ reverse : Elapsed 0.059 ms (5.918 ms / 100) 5.873 -> 5.878 ( +0.09%) [ +0.12% +0.00% +0.12% / +0.09% +0.12% +0.20%] index_copy_ reverse : Elapsed 0.059 ms (5.880 ms / 100) 5.931 -> 5.934 ( +0.05%) [ +0.03% +0.00% +0.19% / +0.13% +0.05% +0.05%] index_add_ spread : Elapsed 0.059 ms (5.933 ms / 100) 5.876 -> 5.872 ( -0.07%) [ +0.00% +0.15% +0.44% / +0.03% -0.02% -0.07%] index_copy_ spread : Elapsed 0.059 ms (5.876 ms / 100) 5.943 -> 5.949 ( +0.10%) [ +0.00% +0.02% +0.05% / +0.10% +0.20% +0.19%] index_add_ strided 3 : Elapsed 0.059 ms (5.943 ms / 100) 5.883 -> 5.892 ( +0.15%) [ +0.08% +0.00% +0.15% / +0.19% +0.15% +0.22%] index_copy_ strided 3 : Elapsed 0.059 ms (5.888 ms / 100) 5.934 -> 5.939 ( +0.08%) [ +0.05% +0.00% +0.13% / +0.08% +0.44% +0.34%] index_add_ perm : Elapsed 0.059 ms (5.937 ms / 100) 5.879 -> 5.884 ( +0.09%) [ +0.05% +0.00% +0.15% / +0.09% +0.41% +0.29%] index_copy_ perm : Elapsed 0.059 ms (5.882 ms / 100) 5.945 -> 5.942 ( -0.05%) [ +0.00% +0.05% +0.22% / +0.17% +0.12% -0.05%] index_add_ perm_sorted : Elapsed 0.059 ms (5.945 ms / 100) 5.889 -> 5.883 ( -0.10%) [ +0.00% +0.05% +0.17% / -0.02% -0.03% -0.10%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.889 ms / 100) 6.285 -> 6.287 ( +0.03%) [ +0.00% +0.05% +0.14% / +0.08% +0.03% +0.05%] index_select const : Elapsed 0.063 ms (6.285 ms / 100) 6.286 -> 6.283 ( -0.05%) [ +0.13% +0.00% +0.21% / +0.17% -0.02% -0.05%] index_select wrap : Elapsed 0.063 ms (6.294 ms / 100) 6.289 -> 6.283 ( -0.10%) [ +0.00% +0.08% +0.06% / +0.06% +0.03% -0.10%] index_select linear : Elapsed 0.063 ms (6.289 ms / 100) 6.279 -> 6.283 ( +0.06%) [ +0.06% +0.00% +0.38% / +0.30% +0.08% +0.06%] index_select reverse : Elapsed 0.063 ms (6.283 ms / 100) 6.282 -> 6.287 ( +0.08%) [ +0.16% +0.00% +0.19% / +0.19% +0.08% +0.08%] index_select skip64 : Elapsed 0.063 ms (6.292 ms / 100) 6.288 -> 6.288 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.10% +0.00% +0.17%] index_select skip256 : Elapsed 0.063 ms (6.288 ms / 100) 6.286 -> 6.279 ( -0.11%) [ +0.05% +0.00% +0.10% / +0.19% -0.11% +0.03%] index_select spread : Elapsed 0.063 ms (6.289 ms / 100) 6.288 -> 6.288 ( +0.00%) [ +0.00% +0.06% +0.16% / +0.05% +0.00% +0.03%] index_select strided 3 : Elapsed 0.063 ms (6.288 ms / 100) 6.282 -> 6.290 ( +0.13%) [ +0.00% +0.10% +0.16% / +0.25% +0.13% +0.16%] index_select random : Elapsed 0.063 ms (6.282 ms / 100) 6.285 -> 6.281 ( -0.06%) [ +0.00% +0.05% +0.06% / +0.11% +0.06% -0.06%] index_select random_sorted : Elapsed 0.063 ms (6.285 ms / 100) B = [16, 20, 5, 40] (stride (40, 640, 12800, 1)) A = [16, 20, 4, 40] (stride (3200, 40, 800, 1)) dim = 2 5.790 -> 5.768 ( -0.38%) [ +0.12% +0.00% +0.17% / +0.22% -0.38% -0.28%] index_add_ linear : Elapsed 0.058 ms (5.797 ms / 100) 5.729 -> 5.710 ( -0.33%) [ +0.00% +0.02% +0.12% / +0.19% -0.28% -0.33%] index_copy_ linear : Elapsed 0.057 ms (5.729 ms / 100) 5.787 -> 5.776 ( -0.19%) [ +0.16% +0.00% +0.00% / +0.14% -0.19% -0.16%] index_add_ reverse : Elapsed 0.058 ms (5.796 ms / 100) 5.723 -> 5.713 ( -0.17%) [ +0.00% +0.02% +0.09% / +0.02% -0.10% -0.17%] index_copy_ reverse : Elapsed 0.057 ms (5.723 ms / 100) 5.781 -> 5.765 ( -0.28%) [ +0.02% +0.00% +0.16% / +0.12% -0.03% -0.28%] index_add_ spread : Elapsed 0.058 ms (5.782 ms / 100) 5.718 -> 5.701 ( -0.30%) [ +0.00% +0.03% +0.09% / +0.02% -0.30% -0.24%] index_copy_ spread : Elapsed 0.057 ms (5.718 ms / 100) 5.773 -> 5.765 ( -0.14%) [ +0.12% +0.10% +0.00% / +0.17% -0.02% -0.14%] index_add_ strided 3 : Elapsed 0.058 ms (5.780 ms / 100) 5.705 -> 5.701 ( -0.07%) [ +0.12% +0.00% +0.14% / +0.18% -0.04% -0.07%] index_copy_ strided 3 : Elapsed 0.057 ms (5.712 ms / 100) 5.769 -> 5.769 ( +0.00%) [ +0.00% +0.07% +0.10% / +0.14% +0.16% +0.00%] index_add_ perm : Elapsed 0.058 ms (5.769 ms / 100) 5.706 -> 5.699 ( -0.12%) [ +0.04% +0.00% +0.28% / +0.12% -0.12% +0.09%] index_copy_ perm : Elapsed 0.057 ms (5.708 ms / 100) 5.781 -> 5.768 ( -0.22%) [ +0.07% +0.00% +0.16% / +0.26% -0.22% -0.07%] index_add_ perm_sorted : Elapsed 0.058 ms (5.785 ms / 100) 5.718 -> 5.694 ( -0.42%) [ +0.10% +0.02% +0.00% / +0.12% -0.42% -0.30%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.724 ms / 100) 5.991 -> 5.968 ( -0.38%) [ +0.15% +0.00% +0.12% / +0.25% -0.32% -0.38%] index_select const : Elapsed 0.060 ms (6.000 ms / 100) 6.079 -> 6.062 ( -0.28%) [ +0.00% +0.03% +0.00% / +0.13% -0.28% -0.26%] index_select wrap : Elapsed 0.061 ms (6.079 ms / 100) 6.051 -> 6.038 ( -0.21%) [ +0.00% +0.18% +0.21% / +0.17% -0.20% -0.21%] index_select linear : Elapsed 0.061 ms (6.051 ms / 100) 6.061 -> 6.038 ( -0.38%) [ +0.00% +0.07% +0.10% / +0.12% -0.38% -0.35%] index_select reverse : Elapsed 0.061 ms (6.061 ms / 100) 5.973 -> 5.944 ( -0.49%) [ +0.05% +0.00% +0.08% / +0.13% -0.49% -0.44%] index_select skip64 : Elapsed 0.060 ms (5.976 ms / 100) 5.995 -> 5.960 ( -0.58%) [ +0.08% +0.00% +0.18% / +0.18% -0.42% -0.58%] index_select skip256 : Elapsed 0.060 ms (6.000 ms / 100) 6.066 -> 6.034 ( -0.53%) [ +0.08% +0.00% +0.08% / +0.05% -0.53% -0.49%] index_select spread : Elapsed 0.061 ms (6.071 ms / 100) 6.063 -> 6.043 ( -0.33%) [ +0.05% +0.00% +0.15% / +0.25% -0.33% -0.33%] index_select strided 3 : Elapsed 0.061 ms (6.066 ms / 100) 6.058 -> 6.039 ( -0.31%) [ +0.10% +0.00% +0.17% / +0.23% -0.31% -0.26%] index_select random : Elapsed 0.061 ms (6.064 ms / 100) 6.075 -> 6.055 ( -0.33%) [ +0.00% +0.03% +0.02% / +0.02% -0.25% -0.33%] index_select random_sorted : Elapsed 0.061 ms (6.075 ms / 100) B = [16, 20, 5, 40] (stride (1, 16, 12800, 320)) A = [16, 20, 4, 40] (stride (3200, 160, 40, 1)) dim = 2 5.601 -> 5.603 ( +0.04%) [ +0.09% +0.00% +0.20% / +0.04% +0.43% +0.43%] index_add_ linear : Elapsed 0.056 ms (5.606 ms / 100) 5.555 -> 5.558 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.23% +0.25%] index_copy_ linear : Elapsed 0.056 ms (5.555 ms / 100) 5.600 -> 5.606 ( +0.11%) [ +0.00% +0.07% +0.05% / +0.11% +0.25% +0.29%] index_add_ reverse : Elapsed 0.056 ms (5.600 ms / 100) 5.548 -> 5.546 ( -0.04%) [ +0.04% +0.09% +0.00% / -0.04% +0.36% +0.36%] index_copy_ reverse : Elapsed 0.055 ms (5.550 ms / 100) 5.605 -> 5.603 ( -0.04%) [ +0.16% +0.02% +0.00% / -0.04% +0.34% +0.11%] index_add_ spread : Elapsed 0.056 ms (5.614 ms / 100) 5.557 -> 5.552 ( -0.09%) [ +0.02% +0.00% +0.00% / -0.09% +0.09% +0.18%] index_copy_ spread : Elapsed 0.056 ms (5.558 ms / 100) 5.594 -> 5.599 ( +0.09%) [ +0.09% +0.00% +0.20% / +0.09% +0.84% +0.77%] index_add_ strided 3 : Elapsed 0.056 ms (5.599 ms / 100) 5.545 -> 5.546 ( +0.02%) [ +0.02% +0.16% +0.00% / +0.02% +0.78% +0.61%] index_copy_ strided 3 : Elapsed 0.055 ms (5.546 ms / 100) 5.596 -> 5.599 ( +0.05%) [ +0.11% +0.00% +0.13% / +0.05% +0.63% +0.59%] index_add_ perm : Elapsed 0.056 ms (5.602 ms / 100) 5.550 -> 5.554 ( +0.07%) [ +0.13% +0.00% +0.16% / +0.07% +0.67% +0.68%] index_copy_ perm : Elapsed 0.056 ms (5.557 ms / 100) 5.597 -> 5.609 ( +0.21%) [ +0.21% +0.00% +0.30% / +0.21% +0.64% +0.61%] index_add_ perm_sorted : Elapsed 0.056 ms (5.609 ms / 100) 5.543 -> 5.560 ( +0.31%) [ +0.07% +0.00% +0.14% / +0.31% +0.58% +0.45%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.547 ms / 100) 5.778 -> 5.790 ( +0.21%) [ +0.05% +0.00% +0.12% / +0.21% +0.28% +0.42%] index_select const : Elapsed 0.058 ms (5.781 ms / 100) 5.876 -> 5.880 ( +0.07%) [ +0.09% +0.00% +0.00% / +0.07% +0.44% +0.44%] index_select wrap : Elapsed 0.059 ms (5.881 ms / 100) 5.868 -> 5.874 ( +0.10%) [ +0.09% +0.09% +0.00% / +0.10% +0.27% +0.34%] index_select linear : Elapsed 0.059 ms (5.873 ms / 100) 5.875 -> 5.888 ( +0.22%) [ +0.14% +0.00% +0.19% / +0.22% +0.60% +0.44%] index_select reverse : Elapsed 0.059 ms (5.883 ms / 100) 5.785 -> 5.793 ( +0.14%) [ +0.00% +0.10% +0.17% / +0.14% +0.69% +0.52%] index_select skip64 : Elapsed 0.058 ms (5.785 ms / 100) 5.780 -> 5.790 ( +0.17%) [ +0.00% +0.00% +0.02% / +0.17% +0.33% +0.29%] index_select skip256 : Elapsed 0.058 ms (5.780 ms / 100) 5.875 -> 5.892 ( +0.29%) [ +0.00% +0.07% +0.12% / +0.29% +0.36% +0.46%] index_select spread : Elapsed 0.059 ms (5.875 ms / 100) 5.880 -> 5.878 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.41% +0.34%] index_select strided 3 : Elapsed 0.059 ms (5.880 ms / 100) 5.871 -> 5.877 ( +0.10%) [ +0.00% +0.19% +0.07% / +0.10% +0.37% +0.32%] index_select random : Elapsed 0.059 ms (5.871 ms / 100) 5.859 -> 5.862 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.48% +0.29%] index_select random_sorted : Elapsed 0.059 ms (5.867 ms / 100) B = [16, 20, 5, 40] (stride (100, 1, 20, 1600)) A = [16, 20, 4, 40] (stride (40, 640, 12800, 1)) dim = 2 5.964 -> 5.948 ( -0.27%) [ +0.00% +0.17% +0.00% / -0.07% -0.23% -0.27%] index_add_ linear : Elapsed 0.060 ms (5.964 ms / 100) 5.854 -> 5.851 ( -0.05%) [ +0.09% +0.00% +0.02% / +0.17% -0.05% -0.05%] index_copy_ linear : Elapsed 0.059 ms (5.859 ms / 100) 5.968 -> 5.954 ( -0.23%) [ +0.00% +0.02% +0.00% / -0.02% -0.20% -0.23%] index_add_ reverse : Elapsed 0.060 ms (5.968 ms / 100) 5.857 -> 5.853 ( -0.07%) [ +0.00% +0.14% +0.05% / +0.12% -0.07% -0.02%] index_copy_ reverse : Elapsed 0.059 ms (5.857 ms / 100) 5.950 -> 5.938 ( -0.20%) [ +0.15% +0.00% +0.24% / +0.05% -0.08% -0.20%] index_add_ spread : Elapsed 0.060 ms (5.959 ms / 100) 5.842 -> 5.836 ( -0.10%) [ +0.19% +0.15% +0.00% / +0.21% -0.10% -0.07%] index_copy_ spread : Elapsed 0.059 ms (5.853 ms / 100) 5.966 -> 5.954 ( -0.20%) [ +0.18% +0.00% +0.02% / +0.25% -0.13% -0.20%] index_add_ strided 3 : Elapsed 0.060 ms (5.977 ms / 100) 5.861 -> 5.848 ( -0.22%) [ +0.00% +0.00% +0.12% / +0.12% -0.22% -0.10%] index_copy_ strided 3 : Elapsed 0.059 ms (5.861 ms / 100) 5.965 -> 5.957 ( -0.13%) [ +0.02% +0.00% +0.12% / -0.07% -0.13% -0.10%] index_add_ perm : Elapsed 0.060 ms (5.966 ms / 100) 5.861 -> 5.852 ( -0.15%) [ +0.00% +0.09% +0.19% / +0.09% -0.09% -0.15%] index_copy_ perm : Elapsed 0.059 ms (5.861 ms / 100) 5.944 -> 5.943 ( -0.02%) [ +0.00% +0.10% +0.15% / +0.13% +0.27% -0.02%] index_add_ perm_sorted : Elapsed 0.059 ms (5.944 ms / 100) 5.847 -> 5.836 ( -0.19%) [ +0.00% +0.00% +0.03% / +0.07% -0.19% -0.19%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.847 ms / 100) 6.110 -> 6.117 ( +0.11%) [ +0.00% +0.03% +0.16% / +0.11% +0.26% +0.29%] index_select const : Elapsed 0.061 ms (6.110 ms / 100) 6.216 -> 6.217 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.05% +0.05%] index_select wrap : Elapsed 0.062 ms (6.216 ms / 100) 6.192 -> 6.189 ( -0.05%) [ +0.00% +0.16% +0.05% / +0.10% -0.05% +0.08%] index_select linear : Elapsed 0.062 ms (6.192 ms / 100) 6.207 -> 6.204 ( -0.05%) [ +0.10% +0.00% +0.02% / +0.05% +0.00% -0.05%] index_select reverse : Elapsed 0.062 ms (6.213 ms / 100) 6.100 -> 6.103 ( +0.05%) [ +0.02% +0.00% +0.13% / +0.05% +0.10% +0.08%] index_select skip64 : Elapsed 0.061 ms (6.101 ms / 100) 6.113 -> 6.122 ( +0.15%) [ +0.18% +0.00% +0.02% / +0.15% +0.18% +0.15%] index_select skip256 : Elapsed 0.061 ms (6.124 ms / 100) 6.202 -> 6.191 ( -0.18%) [ +0.05% +0.06% +0.00% / +0.15% -0.10% -0.18%] index_select spread : Elapsed 0.062 ms (6.205 ms / 100) 6.207 -> 6.205 ( -0.03%) [ +0.00% +0.03% +0.13% / +0.05% -0.03% +0.05%] index_select strided 3 : Elapsed 0.062 ms (6.207 ms / 100) 6.178 -> 6.163 ( -0.24%) [ +0.02% +0.05% +0.00% / +0.05% -0.24% -0.18%] index_select random : Elapsed 0.062 ms (6.179 ms / 100) 6.183 -> 6.174 ( -0.15%) [ +0.11% +0.00% +0.02% / +0.39% -0.10% -0.15%] index_select random_sorted : Elapsed 0.062 ms (6.190 ms / 100) B = [16, 20, 5, 40] (stride (5, 80, 1, 1600)) A = [16, 20, 4, 40] (stride (3200, 1, 800, 20)) dim = 2 6.042 -> 6.042 ( +0.00%) [ +0.08% +0.00% +0.07% / +0.38% +0.00% +0.03%] index_add_ linear : Elapsed 0.060 ms (6.047 ms / 100) 6.001 -> 5.993 ( -0.13%) [ +0.17% +0.00% +0.13% / +0.38% -0.08% -0.13%] index_copy_ linear : Elapsed 0.060 ms (6.011 ms / 100) 6.045 -> 6.047 ( +0.03%) [ +0.02% +0.03% +0.00% / +0.25% +0.03% +0.07%] index_add_ reverse : Elapsed 0.060 ms (6.046 ms / 100) 6.005 -> 5.996 ( -0.15%) [ +0.02% +0.00% +0.03% / +0.13% -0.13% -0.15%] index_copy_ reverse : Elapsed 0.060 ms (6.006 ms / 100) 6.042 -> 6.036 ( -0.10%) [ +0.15% +0.08% +0.00% / +0.35% -0.10% +0.08%] index_add_ spread : Elapsed 0.061 ms (6.051 ms / 100) 6.005 -> 5.993 ( -0.20%) [ +0.02% +0.00% +0.08% / +0.12% -0.17% -0.20%] index_copy_ spread : Elapsed 0.060 ms (6.006 ms / 100) 6.043 -> 6.039 ( -0.07%) [ +0.08% +0.00% +0.03% / +0.02% +0.05% -0.07%] index_add_ strided 3 : Elapsed 0.060 ms (6.048 ms / 100) 6.006 -> 5.993 ( -0.22%) [ +0.00% +0.03% +0.02% / +0.07% -0.17% -0.22%] index_copy_ strided 3 : Elapsed 0.060 ms (6.006 ms / 100) 6.044 -> 6.039 ( -0.08%) [ +0.02% +0.00% +0.07% / +0.18% +0.13% -0.08%] index_add_ perm : Elapsed 0.060 ms (6.045 ms / 100) 6.003 -> 5.988 ( -0.25%) [ +0.00% +0.03% +0.10% / +0.17% -0.25% -0.13%] index_copy_ perm : Elapsed 0.060 ms (6.003 ms / 100) 6.042 -> 6.035 ( -0.12%) [ +0.07% +0.00% +0.10% / -0.03% +0.07% -0.12%] index_add_ perm_sorted : Elapsed 0.060 ms (6.046 ms / 100) 6.004 -> 5.996 ( -0.13%) [ +0.02% +0.00% +0.13% / +0.10% -0.13% -0.12%] index_copy_ perm_sorted : Elapsed 0.060 ms (6.005 ms / 100) 6.245 -> 6.227 ( -0.29%) [ +0.05% +0.00% +0.03% / +0.11% -0.14% -0.29%] index_select const : Elapsed 0.062 ms (6.248 ms / 100) 6.299 -> 6.291 ( -0.13%) [ +0.10% +0.00% +0.17% / +0.19% -0.10% -0.13%] index_select wrap : Elapsed 0.063 ms (6.305 ms / 100) 6.294 -> 6.275 ( -0.30%) [ +0.02% +0.02% +0.00% / +0.05% -0.25% -0.30%] index_select linear : Elapsed 0.063 ms (6.295 ms / 100) 6.294 -> 6.277 ( -0.27%) [ +0.00% +0.08% +0.10% / +0.19% -0.25% -0.27%] index_select reverse : Elapsed 0.063 ms (6.294 ms / 100) 6.243 -> 6.230 ( -0.21%) [ +0.11% +0.00% +0.16% / +0.10% -0.14% -0.21%] index_select skip64 : Elapsed 0.063 ms (6.250 ms / 100) 6.243 -> 6.225 ( -0.29%) [ +0.00% +0.13% +0.18% / +0.08% -0.29% -0.27%] index_select skip256 : Elapsed 0.062 ms (6.243 ms / 100) 6.299 -> 6.279 ( -0.32%) [ +0.00% +0.00% +0.24% / +0.16% -0.24% -0.32%] index_select spread : Elapsed 0.063 ms (6.299 ms / 100) 6.302 -> 6.284 ( -0.29%) [ +0.08% +0.00% +0.06% / +0.05% -0.29% -0.24%] index_select strided 3 : Elapsed 0.063 ms (6.307 ms / 100) 6.285 -> 6.280 ( -0.08%) [ +0.03% +0.00% +0.19% / +0.21% +0.00% -0.08%] index_select random : Elapsed 0.063 ms (6.287 ms / 100) 6.290 -> 6.285 ( -0.08%) [ +0.00% +0.05% +0.11% / +0.17% -0.06% -0.08%] index_select random_sorted : Elapsed 0.063 ms (6.290 ms / 100) B = [16, 20, 5, 40] (stride (1, 80, 16, 1600)) A = [16, 20, 4, 40] (stride (160, 2560, 40, 1)) dim = 2 5.953 -> 5.946 ( -0.12%) [ +0.02% +0.00% +0.17% / +0.15% -0.12% -0.08%] index_add_ linear : Elapsed 0.060 ms (5.954 ms / 100) 5.846 -> 5.836 ( -0.17%) [ +0.00% +0.03% +0.02% / +0.07% -0.14% -0.17%] index_copy_ linear : Elapsed 0.058 ms (5.846 ms / 100) 5.954 -> 5.939 ( -0.25%) [ +0.05% +0.00% +0.22% / +0.08% -0.20% -0.25%] index_add_ reverse : Elapsed 0.060 ms (5.957 ms / 100) 5.842 -> 5.833 ( -0.15%) [ +0.15% +0.00% +0.09% / +0.09% -0.02% -0.15%] index_copy_ reverse : Elapsed 0.059 ms (5.851 ms / 100) 5.942 -> 5.943 ( +0.02%) [ +0.02% +0.00% +0.03% / +0.20% +0.03% +0.02%] index_add_ spread : Elapsed 0.059 ms (5.943 ms / 100) 5.831 -> 5.830 ( -0.02%) [ +0.00% +0.05% +0.10% / +0.19% -0.02% +0.00%] index_copy_ spread : Elapsed 0.058 ms (5.831 ms / 100) 5.964 -> 5.957 ( -0.12%) [ +0.07% +0.00% +0.37% / +0.23% -0.03% -0.12%] index_add_ strided 3 : Elapsed 0.060 ms (5.968 ms / 100) 5.858 -> 5.855 ( -0.05%) [ +0.02% +0.00% +0.05% / +0.09% -0.05% -0.05%] index_copy_ strided 3 : Elapsed 0.059 ms (5.859 ms / 100) 5.964 -> 5.948 ( -0.27%) [ +0.08% +0.02% +0.00% / +0.02% -0.27% -0.23%] index_add_ perm : Elapsed 0.060 ms (5.969 ms / 100) 5.852 -> 5.839 ( -0.22%) [ +0.00% +0.07% +0.02% / +0.03% -0.10% -0.22%] index_copy_ perm : Elapsed 0.059 ms (5.852 ms / 100) 5.949 -> 5.938 ( -0.18%) [ +0.00% +0.25% +0.08% / +0.17% -0.10% -0.18%] index_add_ perm_sorted : Elapsed 0.059 ms (5.949 ms / 100) 5.837 -> 5.835 ( -0.03%) [ +0.10% +0.00% +0.05% / +0.22% +0.07% -0.03%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.843 ms / 100) 6.150 -> 6.141 ( -0.15%) [ +0.00% +0.02% +0.05% / +0.08% -0.11% -0.15%] index_select const : Elapsed 0.062 ms (6.150 ms / 100) 6.237 -> 6.233 ( -0.06%) [ +0.02% +0.00% +0.05% / -0.06% -0.06% -0.05%] index_select wrap : Elapsed 0.062 ms (6.238 ms / 100) 6.209 -> 6.203 ( -0.10%) [ +0.00% +0.03% +0.06% / +0.08% -0.10% -0.08%] index_select linear : Elapsed 0.062 ms (6.209 ms / 100) 6.221 -> 6.211 ( -0.16%) [ +0.06% +0.00% +0.10% / +0.18% +0.03% -0.16%] index_select reverse : Elapsed 0.062 ms (6.225 ms / 100) 6.143 -> 6.141 ( -0.03%) [ +0.08% +0.00% +0.05% / +0.03% -0.03% +0.02%] index_select skip64 : Elapsed 0.061 ms (6.148 ms / 100) 6.142 -> 6.145 ( +0.05%) [ +0.11% +0.00% +0.15% / +0.26% +0.05% +0.07%] index_select skip256 : Elapsed 0.061 ms (6.149 ms / 100) 6.228 -> 6.221 ( -0.11%) [ +0.05% +0.00% +0.10% / +0.11% -0.05% -0.11%] index_select spread : Elapsed 0.062 ms (6.231 ms / 100) 6.225 -> 6.214 ( -0.18%) [ +0.00% +0.08% +0.08% / +0.03% -0.18% -0.14%] index_select strided 3 : Elapsed 0.062 ms (6.225 ms / 100) 6.192 -> 6.187 ( -0.08%) [ +0.00% +0.03% +0.02% / +0.11% +0.10% -0.08%] index_select random : Elapsed 0.062 ms (6.192 ms / 100) 6.202 -> 6.197 ( -0.08%) [ +0.00% +0.00% +0.10% / +0.10% +0.08% -0.08%] index_select random_sorted : Elapsed 0.062 ms (6.202 ms / 100) B = [16, 20, 5, 40] (stride (1, 80, 16, 1600)) A = [16, 20, 4, 40] (stride (800, 1, 12800, 20)) dim = 2 5.865 -> 5.874 ( +0.15%) [ +0.03% +0.00% +0.20% / +0.26% +0.17% +0.15%] index_add_ linear : Elapsed 0.059 ms (5.867 ms / 100) 5.775 -> 5.777 ( +0.03%) [ +0.24% +0.00% +0.19% / +0.23% +0.17% +0.03%] index_copy_ linear : Elapsed 0.058 ms (5.789 ms / 100) 5.871 -> 5.869 ( -0.03%) [ +0.00% +0.09% +0.00% / -0.03% +0.05% +0.05%] index_add_ reverse : Elapsed 0.059 ms (5.871 ms / 100) 5.776 -> 5.783 ( +0.12%) [ +0.00% +0.09% +0.07% / +0.14% +0.12% +0.16%] index_copy_ reverse : Elapsed 0.058 ms (5.776 ms / 100) 5.871 -> 5.873 ( +0.03%) [ +0.00% +0.09% +0.07% / +0.12% +0.03% +0.10%] index_add_ spread : Elapsed 0.059 ms (5.871 ms / 100) 5.778 -> 5.777 ( -0.02%) [ +0.00% +0.02% +0.03% / -0.02% +0.12% +0.12%] index_copy_ spread : Elapsed 0.058 ms (5.778 ms / 100) 5.906 -> 5.897 ( -0.15%) [ +0.08% +0.00% +0.00% / +0.03% -0.08% -0.15%] index_add_ strided 3 : Elapsed 0.059 ms (5.911 ms / 100) 5.810 -> 5.801 ( -0.15%) [ +0.07% +0.00% +0.00% / +0.15% -0.09% -0.15%] index_copy_ strided 3 : Elapsed 0.058 ms (5.814 ms / 100) 5.886 -> 5.882 ( -0.07%) [ +0.03% +0.00% +0.07% / -0.05% +0.12% -0.07%] index_add_ perm : Elapsed 0.059 ms (5.888 ms / 100) 5.791 -> 5.783 ( -0.14%) [ +0.07% +0.00% +0.00% / +0.22% -0.14% +0.16%] index_copy_ perm : Elapsed 0.058 ms (5.795 ms / 100) 5.867 -> 5.879 ( +0.20%) [ +0.00% +0.12% +0.17% / +0.29% +0.26% +0.20%] index_add_ perm_sorted : Elapsed 0.059 ms (5.867 ms / 100) 5.785 -> 5.784 ( -0.02%) [ +0.05% +0.00% +0.02% / +0.03% -0.02% +0.03%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.788 ms / 100) 6.130 -> 6.128 ( -0.03%) [ +0.05% +0.00% +0.05% / +0.23% -0.03% +0.02%] index_select const : Elapsed 0.061 ms (6.133 ms / 100) 6.177 -> 6.183 ( +0.10%) [ +0.08% +0.00% +0.05% / +0.26% +0.10% +0.10%] index_select wrap : Elapsed 0.062 ms (6.182 ms / 100) 6.169 -> 6.170 ( +0.02%) [ +0.00% +0.08% +0.08% / +0.05% +0.10% +0.02%] index_select linear : Elapsed 0.062 ms (6.169 ms / 100) 6.167 -> 6.181 ( +0.23%) [ +0.00% +0.00% +0.13% / +0.24% +0.31% +0.23%] index_select reverse : Elapsed 0.062 ms (6.167 ms / 100) 6.123 -> 6.125 ( +0.03%) [ +0.00% +0.10% +0.20% / +0.34% +0.13% +0.03%] index_select skip64 : Elapsed 0.061 ms (6.123 ms / 100) 6.131 -> 6.127 ( -0.07%) [ +0.00% +0.02% +0.20% / +0.13% -0.07% -0.02%] index_select skip256 : Elapsed 0.061 ms (6.131 ms / 100) 6.173 -> 6.171 ( -0.03%) [ +0.00% +0.05% +0.08% / +0.18% +0.05% -0.03%] index_select spread : Elapsed 0.062 ms (6.173 ms / 100) 6.171 -> 6.182 ( +0.18%) [ +0.03% +0.00% +0.26% / +0.26% +0.32% +0.18%] index_select strided 3 : Elapsed 0.062 ms (6.173 ms / 100) 6.157 -> 6.164 ( +0.11%) [ +0.10% +0.00% +0.23% / +0.29% +0.11% +0.16%] index_select random : Elapsed 0.062 ms (6.163 ms / 100) 6.149 -> 6.152 ( +0.05%) [ +0.00% +0.11% +0.21% / +0.08% +0.20% +0.05%] index_select random_sorted : Elapsed 0.061 ms (6.149 ms / 100) B = [16, 20, 5, 40] (stride (20, 1, 320, 1600)) A = [16, 20, 4, 40] (stride (800, 1, 12800, 20)) dim = 2 5.589 -> 5.581 ( -0.14%) [ +0.00% +0.09% +0.04% / +0.00% -0.09% -0.14%] index_add_ linear : Elapsed 0.056 ms (5.589 ms / 100) 5.515 -> 5.504 ( -0.20%) [ +0.18% +0.00% +0.24% / +0.29% +0.07% -0.20%] index_copy_ linear : Elapsed 0.055 ms (5.525 ms / 100) 5.590 -> 5.574 ( -0.29%) [ +0.00% +0.07% +0.00% / +0.21% -0.02% -0.29%] index_add_ reverse : Elapsed 0.056 ms (5.590 ms / 100) 5.520 -> 5.510 ( -0.18%) [ +0.11% +0.09% +0.00% / +0.18% -0.18% -0.18%] index_copy_ reverse : Elapsed 0.055 ms (5.526 ms / 100) 5.582 -> 5.578 ( -0.07%) [ +0.00% +0.09% +0.16% / +0.09% +0.04% -0.07%] index_add_ spread : Elapsed 0.056 ms (5.582 ms / 100) 5.518 -> 5.513 ( -0.09%) [ +0.00% +0.14% +0.16% / +0.11% -0.09% -0.05%] index_copy_ spread : Elapsed 0.055 ms (5.518 ms / 100) 5.592 -> 5.586 ( -0.11%) [ +0.04% +0.00% +0.04% / +0.05% -0.09% -0.11%] index_add_ strided 3 : Elapsed 0.056 ms (5.594 ms / 100) 5.529 -> 5.513 ( -0.29%) [ +0.16% +0.00% +0.18% / +0.07% -0.27% -0.29%] index_copy_ strided 3 : Elapsed 0.055 ms (5.538 ms / 100) 5.590 -> 5.581 ( -0.16%) [ +0.00% +0.23% +0.18% / +0.11% -0.16% -0.02%] index_add_ perm : Elapsed 0.056 ms (5.590 ms / 100) 5.520 -> 5.505 ( -0.27%) [ +0.00% +0.02% +0.16% / +0.18% -0.13% -0.27%] index_copy_ perm : Elapsed 0.055 ms (5.520 ms / 100) 5.586 -> 5.580 ( -0.11%) [ +0.14% +0.00% +0.11% / +0.13% -0.02% -0.11%] index_add_ perm_sorted : Elapsed 0.056 ms (5.594 ms / 100) 5.522 -> 5.509 ( -0.24%) [ +0.02% +0.05% +0.00% / +0.25% -0.24% -0.18%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.523 ms / 100) 5.791 -> 5.800 ( +0.16%) [ +0.00% +0.10% +0.16% / +0.16% +0.76% +0.78%] index_select const : Elapsed 0.058 ms (5.791 ms / 100) 5.857 -> 5.852 ( -0.09%) [ +0.00% +0.03% +0.22% / -0.03% -0.09% +0.07%] index_select wrap : Elapsed 0.059 ms (5.857 ms / 100) 5.852 -> 5.841 ( -0.19%) [ +0.00% +0.07% +0.14% / +0.07% -0.14% -0.19%] index_select linear : Elapsed 0.059 ms (5.852 ms / 100) 5.853 -> 5.855 ( +0.03%) [ +0.00% +0.12% +0.31% / +0.03% +0.07% +0.12%] index_select reverse : Elapsed 0.059 ms (5.853 ms / 100) 5.790 -> 5.803 ( +0.22%) [ +0.00% +0.09% +0.07% / +0.22% +0.69% +0.67%] index_select skip64 : Elapsed 0.058 ms (5.790 ms / 100) 5.790 -> 5.801 ( +0.19%) [ +0.00% +0.16% +0.07% / +0.19% +0.71% +0.79%] index_select skip256 : Elapsed 0.058 ms (5.790 ms / 100) 5.854 -> 5.860 ( +0.10%) [ +0.00% +0.09% +0.15% / +0.19% +0.15% +0.10%] index_select spread : Elapsed 0.059 ms (5.854 ms / 100) 5.849 -> 5.855 ( +0.10%) [ +0.09% +0.00% +0.10% / +0.29% +0.10% +0.19%] index_select strided 3 : Elapsed 0.059 ms (5.854 ms / 100) 5.811 -> 5.820 ( +0.15%) [ +0.00% +0.09% +0.15% / +0.15% +0.62% +0.60%] index_select random : Elapsed 0.058 ms (5.811 ms / 100) 5.814 -> 5.827 ( +0.22%) [ +0.00% +0.03% +0.19% / +0.22% +0.55% +0.62%] index_select random_sorted : Elapsed 0.058 ms (5.814 ms / 100) out_shape = [16, 20, 4, 5] in_shape = [16, 20, 4, 40] idx_dim = 3 B = [16, 20, 4, 5] (stride (400, 1, 20, 80)) A = [16, 20, 4, 40] (stride (20, 1, 320, 1280)) dim = 3 1.383 -> 1.384 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.29% +0.36%] index_select const : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.22% +0.00% +0.07% / +0.14% +0.29% +0.29%] index_select wrap : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.00% +0.29% +0.22%] index_select linear : Elapsed 0.014 ms (1.384 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.36% +0.36% +0.00% / +0.29% +0.58% +0.51%] index_select reverse : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.58% +0.29%] index_select skip64 : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.22% +0.00% +0.00% / +0.22% +0.36% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.14% +0.00% +0.29% / +0.00% +0.43% +0.36%] index_select spread : Elapsed 0.014 ms (1.382 ms / 100) 1.381 -> 1.382 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.36% +0.29%] index_select strided 3 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.36% +0.00% +0.15% / +0.22% +0.51% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.385 ( +0.36%) [ +0.29% +0.22% +0.00% / +0.36% +0.36% +0.36%] index_select strided 7 : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.382 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +0.29% +0.36%] index_select strided 8 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.384 ( +0.36%) [ +0.22% +0.15% +0.00% / +0.36% +0.58% +0.51%] index_select strided 16 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.384 ( +0.36%) [ +0.22% +0.29% +0.00% / +0.36% +0.44% +0.58%] index_select random : Elapsed 0.014 ms (1.382 ms / 100) 1.383 -> 1.383 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.29% +0.14%] index_select random_sorted : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.36% +0.43%] index_select perm : Elapsed 0.014 ms (1.383 ms / 100) 1.380 -> 1.379 ( -0.07%) [ +0.00% +0.07% +0.14% / -0.07% +0.51% +0.43%] index_select perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) B = [16, 20, 4, 5] (stride (4, 320, 1, 64)) A = [16, 20, 4, 40] (stride (4, 64, 1, 1280)) dim = 3 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.61% +0.61%] index_select const : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.61% +0.68%] index_select wrap : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.68% +0.61%] index_select linear : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.75% +0.75%] index_select reverse : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.75% +0.68%] index_select skip64 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.68% +0.68%] index_select skip256 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.75% +0.68%] index_select spread : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.75% +0.68%] index_select strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_select strided 5 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.68% +0.68%] index_select strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.61% +0.61%] index_select strided 8 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.75%] index_select strided 16 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.81% +0.75%] index_select random : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.75% +0.68%] index_select random_sorted : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.61%] index_select perm : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.477 ms / 100) B = [16, 20, 4, 5] (stride (1, 320, 16, 64)) A = [16, 20, 4, 40] (stride (160, 2560, 40, 1)) dim = 3 1.609 -> 1.610 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.56% +0.37%] index_select const : Elapsed 0.016 ms (1.609 ms / 100) 1.611 -> 1.613 ( +0.12%) [ +0.06% +0.00% +0.06% / +0.12% +0.56% +0.56%] index_select wrap : Elapsed 0.016 ms (1.612 ms / 100) 1.608 -> 1.610 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.12% +0.44% +0.50%] index_select linear : Elapsed 0.016 ms (1.610 ms / 100) 1.608 -> 1.608 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.44% +0.50%] index_select reverse : Elapsed 0.016 ms (1.609 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +1.12% +0.56%] index_select skip64 : Elapsed 0.016 ms (1.611 ms / 100) 1.608 -> 1.609 ( +0.06%) [ +0.12% +0.00% +0.06% / +0.06% +0.56% +0.50%] index_select skip256 : Elapsed 0.016 ms (1.610 ms / 100) 1.611 -> 1.612 ( +0.06%) [ +0.00% +0.00% +0.12% / +0.06% +0.43% +0.56%] index_select spread : Elapsed 0.016 ms (1.611 ms / 100) 1.612 -> 1.613 ( +0.06%) [ +0.19% +0.12% +0.00% / +0.06% +0.50% +0.31%] index_select strided 3 : Elapsed 0.016 ms (1.615 ms / 100) 1.601 -> 1.603 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.56% +0.62%] index_select strided 5 : Elapsed 0.016 ms (1.603 ms / 100) 1.602 -> 1.604 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.44% +0.50%] index_select strided 7 : Elapsed 0.016 ms (1.602 ms / 100) 1.608 -> 1.612 ( +0.25%) [ +0.25% +0.00% +0.25% / +0.25% +0.68% +0.62%] index_select strided 8 : Elapsed 0.016 ms (1.612 ms / 100) 1.607 -> 1.605 ( -0.12%) [ +0.06% +0.00% +0.00% / -0.12% +0.44% +0.31%] index_select strided 16 : Elapsed 0.016 ms (1.608 ms / 100) 1.604 -> 1.606 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.69% +0.75%] index_select random : Elapsed 0.016 ms (1.606 ms / 100) 1.611 -> 1.610 ( -0.06%) [ +0.12% +0.12% +0.00% / -0.06% +0.56% +0.56%] index_select random_sorted : Elapsed 0.016 ms (1.613 ms / 100) 1.610 -> 1.610 ( +0.00%) [ +0.00% +0.12% +0.06% / +0.00% +0.75% +0.75%] index_select perm : Elapsed 0.016 ms (1.610 ms / 100) 1.606 -> 1.608 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.50% +0.44%] index_select perm_sorted : Elapsed 0.016 ms (1.607 ms / 100) B = [16, 20, 4, 5] (stride (100, 5, 1600, 1)) A = [16, 20, 4, 40] (stride (3200, 160, 1, 4)) dim = 3 0.635 -> 0.636 ( +0.16%) [ +0.00% +0.63% +0.16% / +0.16% +0.94% +1.10%] index_select const : Elapsed 0.006 ms (0.635 ms / 100) 0.636 -> 0.641 ( +0.79%) [ +0.16% +0.31% +0.00% / +0.79% +0.79% +1.10%] index_select wrap : Elapsed 0.006 ms (0.637 ms / 100) 0.639 -> 0.638 ( -0.16%) [ +0.31% +0.16% +0.00% / -0.16% +0.47% +0.31%] index_select linear : Elapsed 0.006 ms (0.641 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.31% +0.00% +0.00% / +0.16% +0.63% +0.63%] index_select reverse : Elapsed 0.006 ms (0.640 ms / 100) 0.633 -> 0.633 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +1.11% +1.26%] index_select skip64 : Elapsed 0.006 ms (0.634 ms / 100) 0.635 -> 0.635 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.79% +0.63%] index_select skip256 : Elapsed 0.006 ms (0.635 ms / 100) 0.636 -> 0.636 ( +0.00%) [ +1.57% +0.00% +0.16% / +0.00% +0.94% +0.63%] index_select spread : Elapsed 0.006 ms (0.646 ms / 100) 0.635 -> 0.636 ( +0.16%) [ +0.31% +0.16% +0.00% / +0.16% +0.94% +1.10%] index_select strided 3 : Elapsed 0.006 ms (0.637 ms / 100) 0.636 -> 0.636 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.79% +0.63%] index_select strided 5 : Elapsed 0.006 ms (0.637 ms / 100) 0.634 -> 0.634 ( +0.00%) [ +0.16% +0.00% +0.32% / +0.00% +0.95% +1.10%] index_select strided 7 : Elapsed 0.006 ms (0.635 ms / 100) 0.633 -> 0.634 ( +0.16%) [ +0.32% +0.16% +0.00% / +0.16% +1.42% +1.26%] index_select strided 8 : Elapsed 0.006 ms (0.635 ms / 100) 0.635 -> 0.636 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +1.10% +0.79%] index_select strided 16 : Elapsed 0.006 ms (0.636 ms / 100) 0.636 -> 0.635 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.79% +0.63%] index_select random : Elapsed 0.006 ms (0.636 ms / 100) 0.634 -> 0.635 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.95% +1.42%] index_select random_sorted : Elapsed 0.006 ms (0.635 ms / 100) 0.635 -> 0.635 ( +0.00%) [ +0.00% +0.47% +0.16% / +0.00% +0.94% +0.94%] index_select perm : Elapsed 0.006 ms (0.635 ms / 100) 0.634 -> 0.633 ( -0.16%) [ +0.00% +1.58% +0.00% / -0.16% +1.10% +1.10%] index_select perm_sorted : Elapsed 0.006 ms (0.634 ms / 100) B = [16, 20, 4, 5] (stride (100, 5, 1600, 1)) A = [16, 20, 4, 40] (stride (4, 64, 1, 1280)) dim = 3 1.380 -> 1.383 ( +0.22%) [ +0.22% +0.22% +0.00% / +0.22% +0.29% +0.29%] index_select const : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.07% +0.00% +0.22% / +0.00% +0.22% +0.22%] index_select wrap : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.29% +0.15% +0.00% / +0.22% +0.36% +0.51%] index_select linear : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.22% +0.00% +0.14% / +0.22% +0.29% +0.14%] index_select reverse : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.07% +0.07%] index_select skip64 : Elapsed 0.014 ms (1.382 ms / 100) 1.378 -> 1.381 ( +0.22%) [ +0.29% +0.22% +0.00% / +0.22% +0.51% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.382 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.22% +0.14%] index_select spread : Elapsed 0.014 ms (1.382 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.14% +0.14%] index_select strided 3 : Elapsed 0.014 ms (1.383 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.22% +0.00% +0.07% / +0.00% +0.36% +0.36%] index_select strided 5 : Elapsed 0.014 ms (1.383 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.22% +0.29%] index_select strided 7 : Elapsed 0.014 ms (1.382 ms / 100) 1.378 -> 1.381 ( +0.22%) [ +0.22% +0.29% +0.00% / +0.22% +0.51% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.381 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.00% +0.00% +0.07% / +0.29% +0.44% +0.36%] index_select strided 16 : Elapsed 0.014 ms (1.379 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.00% +0.15% +0.07% / +0.15% +0.44% +0.44%] index_select random : Elapsed 0.014 ms (1.379 ms / 100) 1.378 -> 1.380 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.51% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.380 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.44% +0.36%] index_select perm : Elapsed 0.014 ms (1.380 ms / 100) 1.378 -> 1.380 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.51% +0.44%] index_select perm_sorted : Elapsed 0.014 ms (1.381 ms / 100) B = [16, 20, 4, 5] (stride (1, 16, 1600, 320)) A = [16, 20, 4, 40] (stride (1, 2560, 640, 16)) dim = 3 1.574 -> 1.578 ( +0.25%) [ +0.32% +0.13% +0.00% / +0.25% +0.25% +0.38%] index_select const : Elapsed 0.016 ms (1.579 ms / 100) 1.523 -> 1.524 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.53% +0.53%] index_select wrap : Elapsed 0.015 ms (1.523 ms / 100) 1.515 -> 1.519 ( +0.26%) [ +0.07% +0.00% +0.26% / +0.26% +0.53% +0.53%] index_select linear : Elapsed 0.015 ms (1.516 ms / 100) 1.505 -> 1.507 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.73% +0.93%] index_select reverse : Elapsed 0.015 ms (1.506 ms / 100) 1.552 -> 1.558 ( +0.39%) [ +0.00% +0.26% +0.00% / +0.39% +1.74% +0.64%] index_select skip64 : Elapsed 0.016 ms (1.552 ms / 100) 1.572 -> 1.573 ( +0.06%) [ +0.32% +0.00% +0.00% / +0.06% +0.38% +0.13%] index_select skip256 : Elapsed 0.016 ms (1.577 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.07% +0.00% +0.13% / +0.00% +0.74% +0.60%] index_select spread : Elapsed 0.015 ms (1.494 ms / 100) 1.504 -> 1.505 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.60% +0.53%] index_select strided 3 : Elapsed 0.015 ms (1.504 ms / 100) 1.514 -> 1.514 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.79% +0.86%] index_select strided 5 : Elapsed 0.015 ms (1.515 ms / 100) 1.508 -> 1.509 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.66% +0.73%] index_select strided 7 : Elapsed 0.015 ms (1.509 ms / 100) 1.507 -> 1.508 ( +0.07%) [ +0.13% +0.33% +0.00% / +0.07% +0.73% +0.66%] index_select strided 8 : Elapsed 0.015 ms (1.509 ms / 100) 1.487 -> 1.488 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.61% +0.74%] index_select strided 16 : Elapsed 0.015 ms (1.487 ms / 100) 1.487 -> 1.490 ( +0.20%) [ +0.34% +0.27% +0.00% / +0.20% +0.87% +0.94%] index_select random : Elapsed 0.015 ms (1.492 ms / 100) 1.497 -> 1.500 ( +0.20%) [ +0.07% +0.00% +0.07% / +0.20% +0.73% +0.53%] index_select random_sorted : Elapsed 0.015 ms (1.498 ms / 100) 1.509 -> 1.509 ( +0.00%) [ +0.20% +0.00% +0.07% / +0.00% +0.66% +0.53%] index_select perm : Elapsed 0.015 ms (1.512 ms / 100) 1.524 -> 1.526 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.66% +0.66%] index_select perm_sorted : Elapsed 0.015 ms (1.525 ms / 100) B = [16, 20, 4, 5] (stride (1, 16, 1600, 320)) A = [16, 20, 4, 40] (stride (80, 1, 20, 1280)) dim = 3 1.477 -> 1.479 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.54% +0.41%] index_select const : Elapsed 0.015 ms (1.478 ms / 100) 1.479 -> 1.478 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.41% +0.34%] index_select wrap : Elapsed 0.015 ms (1.479 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.34% +0.41%] index_select linear : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.47% +0.47%] index_select reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.476 -> 1.478 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.68% +0.61%] index_select skip64 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.61% +0.61%] index_select skip256 : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.20% +0.00% / +0.07% +0.47% +0.61%] index_select spread : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.61% +0.54%] index_select strided 3 : Elapsed 0.015 ms (1.478 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.54% +0.47%] index_select strided 5 : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.480 ( +0.20%) [ +0.14% +0.00% +0.00% / +0.20% +0.47% +0.54%] index_select strided 7 : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.480 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +0.61% +0.95%] index_select strided 8 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.478 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.75% +0.68%] index_select strided 16 : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.61%] index_select random : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.61% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.478 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.61% +0.54%] index_select perm : Elapsed 0.015 ms (1.479 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.477 ms / 100) out_shape = [5, 20, 40, 4] in_shape = [16, 20, 40, 4] idx_dim = 0 B = [5, 20, 40, 4] (stride (3200, 160, 1, 40)) A = [16, 20, 40, 4] (stride (20, 1, 1280, 320)) dim = 0 2.322 -> 2.321 ( -0.04%) [ +0.17% +0.00% +0.17% / -0.04% +0.30% +0.60%] index_select const : Elapsed 0.023 ms (2.326 ms / 100) 2.320 -> 2.317 ( -0.13%) [ +0.00% +0.09% +0.09% / -0.13% +0.43% +0.73%] index_select wrap : Elapsed 0.023 ms (2.320 ms / 100) 2.324 -> 2.326 ( +0.09%) [ +0.00% +0.22% +0.00% / +0.09% +0.26% +0.30%] index_select linear : Elapsed 0.023 ms (2.324 ms / 100) 2.321 -> 2.318 ( -0.13%) [ +0.04% +0.17% +0.00% / -0.13% +0.56% +0.43%] index_select reverse : Elapsed 0.023 ms (2.322 ms / 100) 2.318 -> 2.323 ( +0.22%) [ +0.00% +0.00% +0.09% / +0.22% +0.65% +0.73%] index_select skip64 : Elapsed 0.023 ms (2.318 ms / 100) 2.323 -> 2.322 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.52% +0.39%] index_select skip256 : Elapsed 0.023 ms (2.323 ms / 100) 2.325 -> 2.329 ( +0.17%) [ +0.34% +0.26% +0.00% / +0.17% +0.60% +0.65%] index_select spread : Elapsed 0.023 ms (2.333 ms / 100) 2.329 -> 2.329 ( +0.00%) [ +0.00% +0.09% +0.04% / +0.00% +0.56% +0.47%] index_select strided 3 : Elapsed 0.023 ms (2.329 ms / 100) 2.320 -> 2.321 ( +0.04%) [ +0.04% +0.00% +0.09% / +0.04% +0.43% +0.26%] index_select strided 5 : Elapsed 0.023 ms (2.321 ms / 100) 2.321 -> 2.319 ( -0.09%) [ +0.04% +0.34% +0.00% / -0.09% +0.47% +0.60%] index_select strided 7 : Elapsed 0.023 ms (2.322 ms / 100) 2.312 -> 2.311 ( -0.04%) [ +0.04% +0.00% +0.09% / -0.04% +0.48% +0.65%] index_select strided 8 : Elapsed 0.023 ms (2.313 ms / 100) 2.322 -> 2.327 ( +0.22%) [ +0.00% +0.22% +0.04% / +0.22% +0.56% +0.43%] index_select random : Elapsed 0.023 ms (2.322 ms / 100) 2.321 -> 2.323 ( +0.09%) [ +0.00% +0.22% +0.09% / +0.09% +0.47% +0.39%] index_select random_sorted : Elapsed 0.023 ms (2.321 ms / 100) 2.332 -> 2.333 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.30% +0.39%] index_select perm : Elapsed 0.023 ms (2.336 ms / 100) 2.327 -> 2.328 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.39% +0.64%] index_select perm_sorted : Elapsed 0.023 ms (2.328 ms / 100) B = [5, 20, 40, 4] (stride (3200, 4, 80, 1)) A = [16, 20, 40, 4] (stride (160, 2560, 4, 1)) dim = 0 2.112 -> 2.112 ( +0.00%) [ +0.19% +0.24% +0.00% / +0.05% +0.19% +0.00%] index_select const : Elapsed 0.021 ms (2.116 ms / 100) 2.172 -> 2.151 ( -0.97%) [ +0.00% +0.18% +0.28% / +0.32% -0.97% -0.74%] index_select wrap : Elapsed 0.022 ms (2.172 ms / 100) 2.177 -> 2.154 ( -1.06%) [ +0.37% +0.00% +0.14% / -0.05% -1.06% -1.01%] index_select linear : Elapsed 0.022 ms (2.185 ms / 100) 2.177 -> 2.143 ( -1.56%) [ +0.00% +0.05% +0.18% / -0.05% -1.56% -1.33%] index_select reverse : Elapsed 0.022 ms (2.177 ms / 100) 2.101 -> 2.103 ( +0.10%) [ +0.19% +0.00% +0.19% / +0.10% +0.14% +0.24%] index_select skip64 : Elapsed 0.021 ms (2.105 ms / 100) 2.114 -> 2.111 ( -0.14%) [ +0.14% +0.00% +0.14% / -0.09% -0.14% -0.05%] index_select skip256 : Elapsed 0.021 ms (2.117 ms / 100) 2.179 -> 2.168 ( -0.50%) [ +0.00% +0.00% +0.09% / -0.18% -0.50% -0.28%] index_select spread : Elapsed 0.022 ms (2.179 ms / 100) 2.176 -> 2.172 ( -0.18%) [ +0.00% +0.32% +0.23% / -0.09% -0.14% -0.18%] index_select strided 3 : Elapsed 0.022 ms (2.176 ms / 100) 2.175 -> 2.163 ( -0.55%) [ +0.00% +0.00% +0.09% / +0.00% -0.55% -0.46%] index_select strided 5 : Elapsed 0.022 ms (2.175 ms / 100) 2.176 -> 2.166 ( -0.46%) [ +0.32% +0.00% +0.09% / +0.14% -0.46% -0.41%] index_select strided 7 : Elapsed 0.022 ms (2.183 ms / 100) 2.125 -> 2.119 ( -0.28%) [ +0.00% +0.09% +0.19% / +0.28% -0.28% -0.19%] index_select strided 8 : Elapsed 0.021 ms (2.125 ms / 100) 2.161 -> 2.159 ( -0.09%) [ +0.00% +0.14% +0.23% / -0.05% -0.05% -0.09%] index_select random : Elapsed 0.022 ms (2.161 ms / 100) 2.160 -> 2.139 ( -0.97%) [ +0.09% +0.14% +0.00% / -0.14% -0.74% -0.97%] index_select random_sorted : Elapsed 0.022 ms (2.162 ms / 100) 2.179 -> 2.160 ( -0.87%) [ +0.00% +0.09% +0.09% / -0.09% -0.78% -0.87%] index_select perm : Elapsed 0.022 ms (2.179 ms / 100) 2.174 -> 2.170 ( -0.18%) [ +0.00% +0.32% +0.28% / +0.32% -0.18% -0.09%] index_select perm_sorted : Elapsed 0.022 ms (2.174 ms / 100) B = [5, 20, 40, 4] (stride (3200, 1, 20, 800)) A = [16, 20, 40, 4] (stride (1, 2560, 16, 640)) dim = 0 2.390 -> 2.392 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.29% +0.33%] index_select const : Elapsed 0.024 ms (2.391 ms / 100) 2.380 -> 2.381 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.42% +0.34%] index_select wrap : Elapsed 0.024 ms (2.382 ms / 100) 2.412 -> 2.410 ( -0.08%) [ +0.04% +0.00% +0.17% / -0.08% +0.46% +0.33%] index_select linear : Elapsed 0.024 ms (2.413 ms / 100) 2.390 -> 2.392 ( +0.08%) [ +0.21% +0.00% +0.33% / +0.08% +0.71% +0.75%] index_select reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.377 -> 2.376 ( -0.04%) [ +0.21% +0.21% +0.00% / -0.04% +0.67% +0.67%] index_select skip64 : Elapsed 0.024 ms (2.382 ms / 100) 2.390 -> 2.393 ( +0.13%) [ +0.08% +0.13% +0.00% / +0.13% +0.46% +0.13%] index_select skip256 : Elapsed 0.024 ms (2.392 ms / 100) 2.404 -> 2.403 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% +0.96% +1.04%] index_select spread : Elapsed 0.024 ms (2.404 ms / 100) 2.426 -> 2.424 ( -0.08%) [ +0.04% +0.25% +0.00% / -0.08% +0.70% +0.45%] index_select strided 3 : Elapsed 0.024 ms (2.427 ms / 100) 2.425 -> 2.422 ( -0.12%) [ +0.08% +0.00% +0.21% / -0.12% +0.49% +0.49%] index_select strided 5 : Elapsed 0.024 ms (2.427 ms / 100) 2.403 -> 2.404 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.87% +0.96%] index_select strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.406 -> 2.412 ( +0.25%) [ +0.25% +0.00% +0.29% / +0.25% +0.37% +0.50%] index_select strided 8 : Elapsed 0.024 ms (2.412 ms / 100) 2.399 -> 2.401 ( +0.08%) [ +0.00% +0.13% +0.00% / +0.08% +0.50% +0.58%] index_select random : Elapsed 0.024 ms (2.399 ms / 100) 2.403 -> 2.408 ( +0.21%) [ +0.21% +0.04% +0.00% / +0.21% +1.00% +1.00%] index_select random_sorted : Elapsed 0.024 ms (2.408 ms / 100) 2.427 -> 2.426 ( -0.04%) [ +0.21% +0.00% +0.00% / -0.04% +0.45% +0.58%] index_select perm : Elapsed 0.024 ms (2.432 ms / 100) 2.394 -> 2.398 ( +0.17%) [ +0.17% +0.00% +0.08% / +0.17% +0.54% +0.63%] index_select perm_sorted : Elapsed 0.024 ms (2.398 ms / 100) B = [5, 20, 40, 4] (stride (800, 40, 1, 4000)) A = [16, 20, 40, 4] (stride (80, 1, 1280, 20)) dim = 0 2.258 -> 2.264 ( +0.27%) [ +0.09% +0.04% +0.00% / +0.27% +0.58% +0.44%] index_select const : Elapsed 0.023 ms (2.260 ms / 100) 2.255 -> 2.255 ( +0.00%) [ +0.18% +0.04% +0.00% / +0.00% +0.00% +0.27%] index_select wrap : Elapsed 0.023 ms (2.259 ms / 100) 2.255 -> 2.257 ( +0.09%) [ +0.04% +0.09% +0.00% / +0.09% +0.35% +0.49%] index_select linear : Elapsed 0.023 ms (2.256 ms / 100) 2.249 -> 2.249 ( +0.00%) [ +0.18% +0.00% +0.31% / +0.00% +0.53% +0.44%] index_select reverse : Elapsed 0.023 ms (2.253 ms / 100) 2.257 -> 2.256 ( -0.04%) [ +0.04% +0.09% +0.00% / -0.04% +0.40% +0.35%] index_select skip64 : Elapsed 0.023 ms (2.258 ms / 100) 2.256 -> 2.260 ( +0.18%) [ +0.00% +0.31% +0.09% / +0.18% +0.62% +0.44%] index_select skip256 : Elapsed 0.023 ms (2.256 ms / 100) 2.253 -> 2.256 ( +0.13%) [ +0.27% +0.04% +0.00% / +0.18% +0.13% +0.40%] index_select spread : Elapsed 0.023 ms (2.259 ms / 100) 2.250 -> 2.252 ( +0.09%) [ +0.27% +0.00% +0.09% / +0.09% +0.49% +0.58%] index_select strided 3 : Elapsed 0.023 ms (2.256 ms / 100) 2.259 -> 2.256 ( -0.13%) [ +0.00% +0.04% +0.00% / -0.13% +0.13% +0.09%] index_select strided 5 : Elapsed 0.023 ms (2.259 ms / 100) 2.248 -> 2.253 ( +0.22%) [ +0.18% +0.49% +0.00% / +0.22% +0.36% +0.53%] index_select strided 7 : Elapsed 0.023 ms (2.252 ms / 100) 2.257 -> 2.257 ( +0.00%) [ +0.18% +0.27% +0.00% / +0.00% +0.44% +0.27%] index_select strided 8 : Elapsed 0.023 ms (2.261 ms / 100) 2.249 -> 2.244 ( -0.22%) [ +0.00% +0.49% +0.09% / -0.22% +0.22% +0.18%] index_select random : Elapsed 0.022 ms (2.249 ms / 100) 2.249 -> 2.252 ( +0.13%) [ +0.00% +0.44% +0.09% / +0.13% +0.22% +0.44%] index_select random_sorted : Elapsed 0.022 ms (2.249 ms / 100) 2.250 -> 2.256 ( +0.27%) [ +0.18% +0.62% +0.00% / +0.40% +0.31% +0.27%] index_select perm : Elapsed 0.023 ms (2.254 ms / 100) 2.251 -> 2.245 ( -0.27%) [ +0.13% +0.40% +0.00% / -0.27% +0.49% +0.13%] index_select perm_sorted : Elapsed 0.023 ms (2.254 ms / 100) B = [5, 20, 40, 4] (stride (1, 200, 5, 4000)) dim = 0 fill_cnt = 16 1.184 -> 1.190 ( +0.51%) [ +0.17% +1.01% +0.00% / +0.51% +2.20% +1.27%] index_fill_ const : Elapsed 0.012 ms (1.186 ms / 100) 1.192 -> 1.201 ( +0.76%) [ +0.25% +0.00% +0.59% / +0.76% +1.68% +1.26%] index_fill_ linear : Elapsed 0.012 ms (1.195 ms / 100) 1.188 -> 1.195 ( +0.59%) [ +0.17% +0.67% +0.00% / +0.59% +1.77% +1.85%] index_fill_ reverse : Elapsed 0.012 ms (1.190 ms / 100) 1.195 -> 1.188 ( -0.59%) [ +0.25% +0.00% +0.50% / -0.59% +0.75% +1.84%] index_fill_ skip64 : Elapsed 0.012 ms (1.198 ms / 100) 1.190 -> 1.200 ( +0.84%) [ +0.00% +1.26% +1.18% / +1.60% +0.84% +1.76%] index_fill_ skip256 : Elapsed 0.012 ms (1.190 ms / 100) 1.206 -> 1.210 ( +0.33%) [ +0.00% +0.33% +0.08% / +0.33% +0.75% +1.08%] index_fill_ spread : Elapsed 0.012 ms (1.206 ms / 100) 1.189 -> 1.200 ( +0.93%) [ +2.10% +0.84% +0.00% / +0.93% +2.02% +2.86%] index_fill_ strided 3 : Elapsed 0.012 ms (1.214 ms / 100) 1.197 -> 1.209 ( +1.00%) [ +1.17% +0.00% +0.17% / +1.00% +2.26% +2.34%] index_fill_ random : Elapsed 0.012 ms (1.211 ms / 100) 1.194 -> 1.192 ( -0.17%) [ +0.59% +0.59% +0.00% / -0.17% +1.34% +1.76%] index_fill_ random_sorted : Elapsed 0.012 ms (1.201 ms / 100) B = [5, 20, 40, 4] (stride (20, 1, 100, 4000)) A = [16, 20, 40, 4] (stride (3200, 40, 1, 800)) dim = 0 2.157 -> 2.158 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.88% +0.88%] index_select const : Elapsed 0.022 ms (2.157 ms / 100) 2.239 -> 2.245 ( +0.27%) [ +0.22% +0.00% +0.04% / +0.27% +0.54% +0.49%] index_select wrap : Elapsed 0.022 ms (2.244 ms / 100) 2.238 -> 2.241 ( +0.13%) [ +0.09% +0.27% +0.00% / +0.13% +0.31% +0.22%] index_select linear : Elapsed 0.022 ms (2.240 ms / 100) 2.239 -> 2.240 ( +0.04%) [ +0.00% +0.13% +0.04% / +0.22% +0.04% +0.40%] index_select reverse : Elapsed 0.022 ms (2.239 ms / 100) 2.153 -> 2.158 ( +0.23%) [ +0.14% +0.23% +0.00% / +0.23% +0.88% +1.11%] index_select skip64 : Elapsed 0.022 ms (2.156 ms / 100) 2.157 -> 2.156 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.93% +0.97%] index_select skip256 : Elapsed 0.022 ms (2.157 ms / 100) 2.232 -> 2.237 ( +0.22%) [ +0.18% +0.09% +0.00% / +0.22% +0.22% +0.36%] index_select spread : Elapsed 0.022 ms (2.236 ms / 100) 2.241 -> 2.243 ( +0.09%) [ +0.09% +0.13% +0.00% / +0.22% +0.09% +0.09%] index_select strided 3 : Elapsed 0.022 ms (2.243 ms / 100) 2.231 -> 2.232 ( +0.04%) [ +0.00% +0.13% +0.13% / +0.22% +0.09% +0.04%] index_select strided 5 : Elapsed 0.022 ms (2.231 ms / 100) 2.239 -> 2.243 ( +0.18%) [ +0.00% +0.00% +0.09% / +0.18% +0.58% +0.31%] index_select strided 7 : Elapsed 0.022 ms (2.239 ms / 100) 2.171 -> 2.172 ( +0.05%) [ +0.23% +0.00% +0.09% / +0.05% +1.01% +0.92%] index_select strided 8 : Elapsed 0.022 ms (2.176 ms / 100) 2.212 -> 2.197 ( -0.68%) [ +0.14% +0.00% +0.05% / +0.00% -0.50% -0.68%] index_select random : Elapsed 0.022 ms (2.215 ms / 100) 2.208 -> 2.203 ( -0.23%) [ +0.18% +0.14% +0.00% / +0.00% -0.23% -0.18%] index_select random_sorted : Elapsed 0.022 ms (2.212 ms / 100) 2.243 -> 2.244 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.36% +0.27%] index_select perm : Elapsed 0.022 ms (2.243 ms / 100) 2.230 -> 2.233 ( +0.13%) [ +0.18% +0.22% +0.00% / +0.40% +0.40% +0.13%] index_select perm_sorted : Elapsed 0.022 ms (2.234 ms / 100) out_shape = [16, 5, 40, 4] in_shape = [16, 20, 40, 4] idx_dim = 1 B = [16, 5, 40, 4] (stride (4, 64, 320, 1)) A = [16, 20, 40, 4] (stride (4, 64, 1280, 1)) dim = 1 1.796 -> 1.797 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.78% +0.50%] index_select const : Elapsed 0.018 ms (1.797 ms / 100) 1.812 -> 1.814 ( +0.11%) [ +0.06% +0.22% +0.00% / +0.11% +0.83% +0.94%] index_select wrap : Elapsed 0.018 ms (1.813 ms / 100) 1.812 -> 1.815 ( +0.17%) [ +0.17% +0.00% +0.11% / +0.17% +0.88% +0.94%] index_select linear : Elapsed 0.018 ms (1.815 ms / 100) 1.824 -> 1.826 ( +0.11%) [ +0.00% +0.11% +0.33% / +0.11% +0.60% +0.55%] index_select reverse : Elapsed 0.018 ms (1.824 ms / 100) 1.796 -> 1.800 ( +0.22%) [ +0.00% +0.00% +0.11% / +0.22% +0.67% +0.56%] index_select skip64 : Elapsed 0.018 ms (1.796 ms / 100) 1.794 -> 1.798 ( +0.22%) [ +0.06% +0.11% +0.00% / +0.22% +0.84% +0.56%] index_select skip256 : Elapsed 0.018 ms (1.795 ms / 100) 1.812 -> 1.812 ( +0.00%) [ +0.00% +0.06% +0.11% / +0.00% +0.72% +0.66%] index_select spread : Elapsed 0.018 ms (1.812 ms / 100) 1.816 -> 1.821 ( +0.28%) [ +0.17% +0.22% +0.00% / +0.28% +0.61% +0.61%] index_select strided 3 : Elapsed 0.018 ms (1.819 ms / 100) 1.797 -> 1.801 ( +0.22%) [ +0.28% +0.00% +0.17% / +0.22% +0.89% +0.78%] index_select strided 5 : Elapsed 0.018 ms (1.802 ms / 100) 1.813 -> 1.817 ( +0.22%) [ +0.00% +0.11% +0.33% / +0.22% +0.55% +0.44%] index_select strided 7 : Elapsed 0.018 ms (1.813 ms / 100) 1.812 -> 1.815 ( +0.17%) [ +0.06% +0.00% +0.06% / +0.17% +1.55% +0.83%] index_select strided 8 : Elapsed 0.018 ms (1.813 ms / 100) 1.814 -> 1.817 ( +0.17%) [ +0.06% +0.00% +0.00% / +0.17% +1.21% +0.77%] index_select strided 16 : Elapsed 0.018 ms (1.815 ms / 100) 1.815 -> 1.819 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.83% +0.50%] index_select random : Elapsed 0.018 ms (1.815 ms / 100) 1.824 -> 1.830 ( +0.33%) [ +0.00% +0.33% +0.27% / +0.33% +1.32% +0.99%] index_select random_sorted : Elapsed 0.018 ms (1.824 ms / 100) 1.815 -> 1.817 ( +0.11%) [ +0.11% +0.22% +0.00% / +0.11% +1.16% +0.72%] index_select perm : Elapsed 0.018 ms (1.817 ms / 100) 1.817 -> 1.820 ( +0.17%) [ +0.00% +0.06% +0.00% / +0.17% +0.55% +0.44%] index_select perm_sorted : Elapsed 0.018 ms (1.817 ms / 100) B = [16, 5, 40, 4] (stride (1, 64, 320, 16)) dim = 1 fill_cnt = 20 3.643 -> 3.615 ( -0.77%) [ +0.00% +0.14% +0.22% / -0.27% -0.63% -0.77%] index_fill_ const : Elapsed 0.036 ms (3.643 ms / 100) 3.551 -> 3.537 ( -0.39%) [ +0.23% +0.00% +0.17% / -0.39% -0.34% -0.28%] index_fill_ linear : Elapsed 0.036 ms (3.559 ms / 100) 3.544 -> 3.527 ( -0.48%) [ +0.17% +0.00% +0.11% / -0.48% -0.37% -0.42%] index_fill_ reverse : Elapsed 0.035 ms (3.550 ms / 100) 3.613 -> 3.598 ( -0.42%) [ +0.00% +0.22% +0.39% / -0.25% -0.42% -0.36%] index_fill_ skip64 : Elapsed 0.036 ms (3.613 ms / 100) 3.619 -> 3.601 ( -0.50%) [ +0.06% +0.00% +0.14% / -0.41% -0.50% -0.47%] index_fill_ skip256 : Elapsed 0.036 ms (3.621 ms / 100) 3.537 -> 3.520 ( -0.48%) [ +0.11% +0.11% +0.00% / -0.48% -0.42% -0.42%] index_fill_ spread : Elapsed 0.035 ms (3.541 ms / 100) 3.541 -> 3.521 ( -0.56%) [ +0.23% +0.17% +0.00% / -0.54% -0.40% -0.56%] index_fill_ strided 3 : Elapsed 0.035 ms (3.549 ms / 100) 3.531 -> 3.511 ( -0.57%) [ +0.00% +0.06% +0.00% / -0.37% -0.57% -0.45%] index_fill_ random : Elapsed 0.035 ms (3.531 ms / 100) 3.527 -> 3.509 ( -0.51%) [ +0.11% +0.11% +0.00% / -0.51% -0.48% -0.37%] index_fill_ random_sorted : Elapsed 0.035 ms (3.531 ms / 100) B = [16, 5, 40, 4] (stride (1, 64, 320, 16)) A = [16, 20, 40, 4] (stride (3200, 40, 1, 800)) dim = 1 1.783 -> 1.778 ( -0.28%) [ +0.00% +0.34% +0.11% / +0.11% -0.28% -0.28%] index_select const : Elapsed 0.018 ms (1.783 ms / 100) 1.815 -> 1.818 ( +0.17%) [ +0.11% +0.00% +0.06% / +0.17% +0.39% +0.28%] index_select wrap : Elapsed 0.018 ms (1.817 ms / 100) 1.818 -> 1.817 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.22% +0.11%] index_select linear : Elapsed 0.018 ms (1.818 ms / 100) 1.816 -> 1.816 ( +0.00%) [ +0.28% +0.28% +0.00% / +0.00% +0.28% +0.28%] index_select reverse : Elapsed 0.018 ms (1.821 ms / 100) 1.780 -> 1.778 ( -0.11%) [ +0.22% +0.11% +0.00% / -0.11% +0.06% +0.06%] index_select skip64 : Elapsed 0.018 ms (1.784 ms / 100) 1.775 -> 1.777 ( +0.11%) [ +0.23% +0.00% +0.34% / +0.11% +0.34% +0.34%] index_select skip256 : Elapsed 0.018 ms (1.779 ms / 100) 1.814 -> 1.818 ( +0.22%) [ +0.17% +0.17% +0.00% / +0.22% +0.44% +0.50%] index_select spread : Elapsed 0.018 ms (1.817 ms / 100) 1.815 -> 1.819 ( +0.22%) [ +0.06% +0.17% +0.00% / +0.22% +0.28% +0.33%] index_select strided 3 : Elapsed 0.018 ms (1.816 ms / 100) 1.801 -> 1.806 ( +0.28%) [ +0.28% +0.28% +0.00% / +0.28% +0.61% +0.78%] index_select strided 5 : Elapsed 0.018 ms (1.806 ms / 100) 1.816 -> 1.815 ( -0.06%) [ +0.11% +0.11% +0.00% / -0.06% +0.39% +0.50%] index_select strided 7 : Elapsed 0.018 ms (1.818 ms / 100) 1.817 -> 1.818 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.50% +0.28%] index_select strided 8 : Elapsed 0.018 ms (1.817 ms / 100) 1.815 -> 1.817 ( +0.11%) [ +0.00% +0.06% +0.06% / +0.11% +0.55% +0.44%] index_select strided 16 : Elapsed 0.018 ms (1.815 ms / 100) 1.816 -> 1.815 ( -0.06%) [ +0.00% +0.11% +0.06% / -0.06% +0.55% +0.50%] index_select random : Elapsed 0.018 ms (1.816 ms / 100) 1.818 -> 1.816 ( -0.11%) [ +0.00% +0.06% +0.00% / -0.11% +0.50% +0.28%] index_select random_sorted : Elapsed 0.018 ms (1.818 ms / 100) 1.817 -> 1.814 ( -0.17%) [ +0.00% +0.00% +0.00% / -0.17% +0.55% +0.55%] index_select perm : Elapsed 0.018 ms (1.817 ms / 100) 1.815 -> 1.817 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +0.66% +0.50%] index_select perm_sorted : Elapsed 0.018 ms (1.817 ms / 100) B = [16, 5, 40, 4] (stride (1, 16, 80, 3200)) A = [16, 20, 40, 4] (stride (1, 640, 16, 12800)) dim = 1 1.810 -> 1.814 ( +0.22%) [ +0.11% +0.00% +0.39% / +0.28% +0.33% +0.22%] index_select const : Elapsed 0.018 ms (1.812 ms / 100) 1.812 -> 1.816 ( +0.22%) [ +0.06% +0.00% +0.11% / +0.28% +0.22% +0.39%] index_select wrap : Elapsed 0.018 ms (1.813 ms / 100) 1.810 -> 1.815 ( +0.28%) [ +0.28% +0.06% +0.00% / +0.28% +0.61% +0.61%] index_select linear : Elapsed 0.018 ms (1.815 ms / 100) 1.807 -> 1.809 ( +0.11%) [ +0.00% +0.28% +0.22% / +0.11% +0.44% +0.61%] index_select reverse : Elapsed 0.018 ms (1.807 ms / 100) 1.812 -> 1.811 ( -0.06%) [ +0.00% +0.00% +0.00% / +0.11% -0.06% +0.00%] index_select skip64 : Elapsed 0.018 ms (1.812 ms / 100) 1.809 -> 1.814 ( +0.28%) [ +0.00% +0.28% +0.11% / +0.28% +0.61% +0.44%] index_select skip256 : Elapsed 0.018 ms (1.809 ms / 100) 1.816 -> 1.818 ( +0.11%) [ +0.00% +0.11% +0.17% / +0.11% +0.22% +0.11%] index_select spread : Elapsed 0.018 ms (1.816 ms / 100) 1.815 -> 1.816 ( +0.06%) [ +0.17% +0.17% +0.00% / +0.06% +0.33% +0.33%] index_select strided 3 : Elapsed 0.018 ms (1.818 ms / 100) 1.811 -> 1.813 ( +0.11%) [ +0.00% +0.17% +0.44% / +0.11% +0.88% +0.88%] index_select strided 5 : Elapsed 0.018 ms (1.811 ms / 100) 1.815 -> 1.814 ( -0.06%) [ +0.00% +0.22% +0.06% / -0.06% +0.39% +0.22%] index_select strided 7 : Elapsed 0.018 ms (1.815 ms / 100) 1.820 -> 1.818 ( -0.11%) [ +0.00% +0.05% +0.00% / -0.11% +0.05% +0.11%] index_select strided 8 : Elapsed 0.018 ms (1.820 ms / 100) 1.822 -> 1.820 ( -0.11%) [ +0.00% +0.11% +0.11% / +0.22% -0.11% +0.05%] index_select strided 16 : Elapsed 0.018 ms (1.822 ms / 100) 1.818 -> 1.817 ( -0.06%) [ +0.22% +0.00% +0.06% / -0.06% +0.28% +0.22%] index_select random : Elapsed 0.018 ms (1.822 ms / 100) 1.817 -> 1.819 ( +0.11%) [ +0.44% +0.00% +0.00% / +0.11% +1.16% +0.99%] index_select random_sorted : Elapsed 0.018 ms (1.825 ms / 100) 1.816 -> 1.821 ( +0.28%) [ +0.28% +0.22% +0.00% / +0.33% +0.28% +0.33%] index_select perm : Elapsed 0.018 ms (1.821 ms / 100) 1.818 -> 1.817 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.72% +0.66%] index_select perm_sorted : Elapsed 0.018 ms (1.819 ms / 100) out_shape = [16, 20, 5, 4] in_shape = [16, 20, 40, 4] idx_dim = 2 B = [16, 20, 5, 4] (stride (400, 5, 1, 100)) A = [16, 20, 40, 4] (stride (3200, 4, 80, 1)) dim = 2 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.34%] index_select const : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.478 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.41% +0.41%] index_select wrap : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select linear : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.54% +0.41%] index_select reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.41% +0.34%] index_select skip64 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.34% / +0.00% +0.34% +0.34%] index_select skip256 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.41%] index_select spread : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.61%] index_select strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.475 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.47% +0.47%] index_select strided 5 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.47% +0.61%] index_select strided 8 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.61% +0.54%] index_select strided 16 : Elapsed 0.015 ms (1.475 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.54% +0.54%] index_select random : Elapsed 0.015 ms (1.475 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select random_sorted : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.61% +0.54%] index_select perm : Elapsed 0.015 ms (1.475 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.54% +0.54%] index_select perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) B = [16, 20, 5, 4] (stride (80, 4, 1280, 1)) A = [16, 20, 40, 4] (stride (1, 2560, 64, 16)) dim = 2 1.451 -> 1.454 ( +0.21%) [ +0.00% +0.28% +0.28% / +0.21% +0.90% +0.41%] index_select const : Elapsed 0.015 ms (1.451 ms / 100) 1.413 -> 1.413 ( +0.00%) [ +0.00% +0.57% +0.14% / +0.00% +0.78% +0.64%] index_select wrap : Elapsed 0.014 ms (1.413 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.42% +0.35%] index_select linear : Elapsed 0.014 ms (1.423 ms / 100) 1.417 -> 1.418 ( +0.07%) [ +0.21% +0.21% +0.00% / +0.07% +0.71% +0.64%] index_select reverse : Elapsed 0.014 ms (1.420 ms / 100) 1.437 -> 1.438 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.77% +0.70%] index_select skip64 : Elapsed 0.014 ms (1.437 ms / 100) 1.449 -> 1.451 ( +0.14%) [ +0.21% +0.00% +0.35% / +0.14% +1.10% +1.24%] index_select skip256 : Elapsed 0.015 ms (1.452 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.77% +0.84%] index_select spread : Elapsed 0.014 ms (1.425 ms / 100) 1.437 -> 1.438 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.90% +0.84%] index_select strided 3 : Elapsed 0.014 ms (1.438 ms / 100) 1.428 -> 1.428 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.63% +0.70%] index_select strided 5 : Elapsed 0.014 ms (1.429 ms / 100) 1.423 -> 1.422 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.49% +0.56%] index_select strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.422 -> 1.424 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.70% +0.63%] index_select strided 8 : Elapsed 0.014 ms (1.424 ms / 100) 1.426 -> 1.426 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.77% +0.77%] index_select strided 16 : Elapsed 0.014 ms (1.426 ms / 100) 1.423 -> 1.424 ( +0.07%) [ +0.00% +0.00% +0.21% / +0.07% +0.70% +0.70%] index_select random : Elapsed 0.014 ms (1.423 ms / 100) 1.426 -> 1.426 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.56%] index_select random_sorted : Elapsed 0.014 ms (1.426 ms / 100) 1.425 -> 1.426 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.77% +0.70%] index_select perm : Elapsed 0.014 ms (1.425 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.77% +0.63%] index_select perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) B = [16, 20, 5, 4] (stride (100, 1, 20, 1600)) A = [16, 20, 40, 4] (stride (20, 1, 1280, 320)) dim = 2 1.382 -> 1.384 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.36% +0.36%] index_select const : Elapsed 0.014 ms (1.384 ms / 100) 1.383 -> 1.383 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.14% +0.14%] index_select wrap : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.382 ( +0.07%) [ +0.22% +0.00% +0.22% / +0.07% +0.36% +0.80%] index_select linear : Elapsed 0.014 ms (1.384 ms / 100) 1.383 -> 1.381 ( -0.14%) [ +0.07% +0.00% +0.00% / -0.14% +0.29% +0.22%] index_select reverse : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.51% +0.51%] index_select skip64 : Elapsed 0.014 ms (1.381 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.14% +0.14% +0.00% / +0.22% +0.94% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.383 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.29% +0.00% +0.36% / +0.14% +0.43% +0.43%] index_select spread : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.29% +0.29%] index_select strided 3 : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.29% +0.00% +0.14% / +0.14% +0.29% +0.29%] index_select strided 5 : Elapsed 0.014 ms (1.386 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.36% +0.22% +0.00% / +0.29% +0.43% +0.43%] index_select strided 7 : Elapsed 0.014 ms (1.385 ms / 100) 1.381 -> 1.382 ( +0.07%) [ +0.00% +0.07% +0.29% / +0.07% +0.51% +0.36%] index_select strided 8 : Elapsed 0.014 ms (1.381 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.43% +0.51%] index_select strided 16 : Elapsed 0.014 ms (1.382 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.14% +0.00% +0.07% / +0.22% +0.43% +0.36%] index_select random : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.07% +0.00% +0.14% / +0.00% +0.43% +0.36%] index_select random_sorted : Elapsed 0.014 ms (1.382 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.22% +0.22% +0.00% / +0.14% +0.51% +0.43%] index_select perm : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.381 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.36% +0.36%] index_select perm_sorted : Elapsed 0.014 ms (1.383 ms / 100) B = [16, 20, 5, 4] (stride (5, 80, 1, 1600)) A = [16, 20, 40, 4] (stride (1, 16, 1280, 320)) dim = 2 1.533 -> 1.538 ( +0.33%) [ +0.26% +0.13% +0.00% / +0.33% +0.72% +0.98%] index_select const : Elapsed 0.015 ms (1.537 ms / 100) 1.540 -> 1.540 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.65% +0.65%] index_select wrap : Elapsed 0.015 ms (1.540 ms / 100) 1.540 -> 1.541 ( +0.06%) [ +0.13% +0.06% +0.00% / +0.06% +0.78% +0.78%] index_select linear : Elapsed 0.015 ms (1.542 ms / 100) 1.539 -> 1.539 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +2.14% +0.91%] index_select reverse : Elapsed 0.015 ms (1.539 ms / 100) 1.539 -> 1.540 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +1.30% +1.17%] index_select skip64 : Elapsed 0.015 ms (1.539 ms / 100) 1.533 -> 1.535 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.78% +0.72%] index_select skip256 : Elapsed 0.015 ms (1.535 ms / 100) 1.529 -> 1.531 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.85% +0.85%] index_select spread : Elapsed 0.015 ms (1.531 ms / 100) 1.534 -> 1.536 ( +0.13%) [ +0.26% +0.20% +0.00% / +0.13% +1.17% +0.91%] index_select strided 3 : Elapsed 0.015 ms (1.538 ms / 100) 1.533 -> 1.534 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.78% +0.78%] index_select strided 5 : Elapsed 0.015 ms (1.534 ms / 100) 1.529 -> 1.526 ( -0.20%) [ +0.13% +0.13% +0.00% / -0.20% +0.78% +0.85%] index_select strided 7 : Elapsed 0.015 ms (1.531 ms / 100) 1.529 -> 1.531 ( +0.13%) [ +0.20% +0.07% +0.00% / +0.13% +0.72% +0.59%] index_select strided 8 : Elapsed 0.015 ms (1.532 ms / 100) 1.529 -> 1.531 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.92% +0.65%] index_select strided 16 : Elapsed 0.015 ms (1.530 ms / 100) 1.527 -> 1.527 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.72% +0.79%] index_select random : Elapsed 0.015 ms (1.528 ms / 100) 1.534 -> 1.536 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.91% +0.85%] index_select random_sorted : Elapsed 0.015 ms (1.536 ms / 100) 1.531 -> 1.532 ( +0.07%) [ +0.20% +0.07% +0.00% / +0.07% +0.85% +0.72%] index_select perm : Elapsed 0.015 ms (1.534 ms / 100) 1.539 -> 1.542 ( +0.19%) [ +0.00% +0.00% +0.00% / +0.19% +1.23% +0.65%] index_select perm_sorted : Elapsed 0.015 ms (1.539 ms / 100) B = [16, 20, 5, 4] (stride (1, 16, 320, 1600)) A = [16, 20, 40, 4] (stride (3200, 1, 80, 20)) dim = 2 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.54% +0.41%] index_select const : Elapsed 0.015 ms (1.479 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.34% +0.34%] index_select wrap : Elapsed 0.015 ms (1.481 ms / 100) 1.480 -> 1.479 ( -0.07%) [ +0.14% +0.00% +0.07% / -0.07% +0.27% +0.27%] index_select linear : Elapsed 0.015 ms (1.482 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.61%] index_select reverse : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.47% +0.47%] index_select skip64 : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select skip256 : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.54% +0.54%] index_select spread : Elapsed 0.015 ms (1.477 ms / 100) 1.479 -> 1.480 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.47% +0.41%] index_select strided 3 : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.482 ( +0.27%) [ +0.00% +0.00% +0.14% / +0.27% +0.54% +0.47%] index_select strided 5 : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.54% +0.54%] index_select strided 7 : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.54% +0.41%] index_select strided 8 : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.61% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.81%] index_select random : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.75% +0.75%] index_select random_sorted : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.74% +0.61%] index_select perm : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.476 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.61% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.477 ms / 100) out_shape = [16, 20, 40, 5] in_shape = [16, 20, 40, 4] idx_dim = 3 B = [16, 20, 40, 5] (stride (100, 5, 1600, 1)) A = [16, 20, 40, 4] (stride (3200, 1, 80, 20)) dim = 3 5.838 -> 5.817 ( -0.36%) [ +0.00% +0.05% +0.19% / +0.00% -0.36% -0.34%] index_add_ linear : Elapsed 0.058 ms (5.838 ms / 100) 5.805 -> 5.788 ( -0.29%) [ +0.00% +0.10% +0.17% / +0.14% -0.28% -0.29%] index_copy_ linear : Elapsed 0.058 ms (5.805 ms / 100) 5.845 -> 5.823 ( -0.38%) [ +0.00% +0.00% +0.05% / -0.09% -0.38% -0.33%] index_add_ reverse : Elapsed 0.058 ms (5.845 ms / 100) 5.805 -> 5.784 ( -0.36%) [ +0.07% +0.00% +0.22% / +0.07% -0.34% -0.36%] index_copy_ reverse : Elapsed 0.058 ms (5.809 ms / 100) 5.843 -> 5.815 ( -0.48%) [ +0.00% +0.07% +0.03% / +0.10% -0.48% -0.31%] index_add_ spread : Elapsed 0.058 ms (5.843 ms / 100) 5.803 -> 5.789 ( -0.24%) [ +0.12% +0.00% +0.16% / +0.21% -0.12% -0.24%] index_copy_ spread : Elapsed 0.058 ms (5.810 ms / 100) 5.838 -> 5.826 ( -0.21%) [ +0.03% +0.12% +0.00% / +0.12% -0.19% -0.21%] index_add_ strided 3 : Elapsed 0.058 ms (5.840 ms / 100) 5.804 -> 5.784 ( -0.34%) [ +0.00% +0.12% +0.09% / +0.19% -0.28% -0.34%] index_copy_ strided 3 : Elapsed 0.058 ms (5.804 ms / 100) 5.841 -> 5.823 ( -0.31%) [ +0.10% +0.00% +0.09% / +0.22% -0.31% -0.27%] index_add_ perm : Elapsed 0.058 ms (5.847 ms / 100) 5.806 -> 5.785 ( -0.36%) [ +0.00% +0.00% +0.12% / +0.07% -0.34% -0.36%] index_copy_ perm : Elapsed 0.058 ms (5.806 ms / 100) 5.840 -> 5.816 ( -0.41%) [ +0.07% +0.00% +0.05% / +0.02% -0.41% -0.36%] index_add_ perm_sorted : Elapsed 0.058 ms (5.844 ms / 100) 5.805 -> 5.788 ( -0.29%) [ +0.14% +0.07% +0.00% / +0.10% -0.26% -0.29%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.813 ms / 100) 6.033 -> 6.013 ( -0.33%) [ +0.12% +0.00% +0.13% / +0.12% -0.28% -0.33%] index_select const : Elapsed 0.060 ms (6.040 ms / 100) 6.069 -> 6.053 ( -0.26%) [ +0.00% +0.21% +0.21% / +0.20% -0.26% -0.25%] index_select wrap : Elapsed 0.061 ms (6.069 ms / 100) 6.057 -> 6.048 ( -0.15%) [ +0.00% +0.10% +0.18% / +0.20% -0.13% -0.15%] index_select linear : Elapsed 0.061 ms (6.057 ms / 100) 6.056 -> 6.034 ( -0.36%) [ +0.08% +0.00% +0.05% / +0.18% -0.36% -0.33%] index_select reverse : Elapsed 0.061 ms (6.061 ms / 100) 6.037 -> 6.010 ( -0.45%) [ +0.00% +0.02% +0.05% / +0.07% -0.45% -0.38%] index_select skip64 : Elapsed 0.060 ms (6.037 ms / 100) 6.033 -> 6.010 ( -0.38%) [ +0.07% +0.00% +0.03% / +0.08% -0.38% -0.30%] index_select skip256 : Elapsed 0.060 ms (6.037 ms / 100) 6.065 -> 6.044 ( -0.35%) [ +0.07% +0.07% +0.00% / +0.05% -0.30% -0.35%] index_select spread : Elapsed 0.061 ms (6.069 ms / 100) 6.070 -> 6.050 ( -0.33%) [ +0.00% +0.15% +0.23% / +0.08% -0.26% -0.33%] index_select strided 3 : Elapsed 0.061 ms (6.070 ms / 100) 6.044 -> 6.033 ( -0.18%) [ +0.00% +0.07% +0.07% / +0.13% -0.13% -0.18%] index_select random : Elapsed 0.060 ms (6.044 ms / 100) 6.059 -> 6.033 ( -0.43%) [ +0.00% +0.00% +0.05% / -0.02% -0.33% -0.43%] index_select random_sorted : Elapsed 0.061 ms (6.059 ms / 100) B = [16, 20, 40, 5] (stride (800, 40, 1, 12800)) A = [16, 20, 40, 4] (stride (40, 2560, 1, 640)) dim = 3 5.332 -> 5.343 ( +0.21%) [ +0.17% +0.00% +0.19% / +0.24% +0.21% +0.23%] index_add_ linear : Elapsed 0.053 ms (5.341 ms / 100) 5.263 -> 5.262 ( -0.02%) [ +0.00% +0.06% +0.15% / -0.02% +0.23% +0.13%] index_copy_ linear : Elapsed 0.053 ms (5.263 ms / 100) 5.336 -> 5.334 ( -0.04%) [ +0.09% +0.00% +0.04% / -0.04% +0.09% +0.26%] index_add_ reverse : Elapsed 0.053 ms (5.341 ms / 100) 5.261 -> 5.268 ( +0.13%) [ +0.00% +0.06% +0.32% / +0.13% +0.19% +0.17%] index_copy_ reverse : Elapsed 0.053 ms (5.261 ms / 100) 5.344 -> 5.349 ( +0.09%) [ +0.07% +0.00% +0.07% / +0.09% +0.09% +0.15%] index_add_ spread : Elapsed 0.053 ms (5.348 ms / 100) 5.266 -> 5.274 ( +0.15%) [ +0.06% +0.00% +0.15% / +0.15% +0.17% +0.28%] index_copy_ spread : Elapsed 0.053 ms (5.269 ms / 100) 5.342 -> 5.353 ( +0.21%) [ +0.00% +0.17% +0.13% / +0.21% +0.28% +0.36%] index_add_ strided 3 : Elapsed 0.053 ms (5.342 ms / 100) 5.271 -> 5.275 ( +0.08%) [ +0.00% +0.11% +0.08% / +0.08% +0.09% +0.09%] index_copy_ strided 3 : Elapsed 0.053 ms (5.271 ms / 100) 5.337 -> 5.347 ( +0.19%) [ +0.00% +0.00% +0.07% / +0.24% +0.19% +0.22%] index_add_ perm : Elapsed 0.053 ms (5.337 ms / 100) 5.259 -> 5.269 ( +0.19%) [ +0.00% +0.15% +0.32% / +0.19% +0.19% +0.25%] index_copy_ perm : Elapsed 0.053 ms (5.259 ms / 100) 5.337 -> 5.343 ( +0.11%) [ +0.07% +0.00% +0.19% / +0.11% +0.22% +0.21%] index_add_ perm_sorted : Elapsed 0.053 ms (5.341 ms / 100) 5.270 -> 5.267 ( -0.06%) [ +0.00% +0.08% +0.06% / +0.15% +0.11% -0.06%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.270 ms / 100) 5.401 -> 5.393 ( -0.15%) [ +0.00% +0.04% +0.15% / +0.02% -0.15% -0.15%] index_select const : Elapsed 0.054 ms (5.401 ms / 100) 5.537 -> 5.535 ( -0.04%) [ +0.00% +0.02% +0.20% / -0.04% +0.16% +0.09%] index_select wrap : Elapsed 0.055 ms (5.537 ms / 100) 5.527 -> 5.534 ( +0.13%) [ +0.20% +0.00% +0.02% / +0.29% +0.16% +0.13%] index_select linear : Elapsed 0.055 ms (5.538 ms / 100) 5.527 -> 5.509 ( -0.33%) [ +0.05% +0.00% +0.18% / -0.02% -0.33% -0.13%] index_select reverse : Elapsed 0.055 ms (5.530 ms / 100) 5.411 -> 5.398 ( -0.24%) [ +0.00% +0.06% +0.02% / +0.11% -0.24% -0.07%] index_select skip64 : Elapsed 0.054 ms (5.411 ms / 100) 5.403 -> 5.384 ( -0.35%) [ +0.00% +0.02% +0.19% / +0.13% -0.35% -0.22%] index_select skip256 : Elapsed 0.054 ms (5.403 ms / 100) 5.527 -> 5.526 ( -0.02%) [ +0.00% +0.07% +0.00% / -0.02% +0.05% +0.11%] index_select spread : Elapsed 0.055 ms (5.527 ms / 100) 5.539 -> 5.537 ( -0.04%) [ +0.13% +0.00% +0.02% / +0.11% -0.04% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.546 ms / 100) 5.460 -> 5.466 ( +0.11%) [ +0.00% +0.09% +0.20% / +0.11% +0.46% +0.55%] index_select random : Elapsed 0.055 ms (5.460 ms / 100) 5.470 -> 5.481 ( +0.20%) [ +0.00% +0.11% +0.15% / +0.20% +0.51% +0.68%] index_select random_sorted : Elapsed 0.055 ms (5.470 ms / 100) B = [16, 20, 40, 5] (stride (800, 1, 20, 12800)) A = [16, 20, 40, 4] (stride (1, 16, 1280, 320)) dim = 3 5.863 -> 5.825 ( -0.65%) [ +0.00% +0.14% +0.14% / +0.17% -0.65% -0.55%] index_add_ linear : Elapsed 0.059 ms (5.863 ms / 100) 5.815 -> 5.765 ( -0.86%) [ +0.03% +0.00% +0.26% / +0.07% -0.72% -0.86%] index_copy_ linear : Elapsed 0.058 ms (5.817 ms / 100) 5.850 -> 5.816 ( -0.58%) [ +0.00% +0.07% +0.29% / +0.07% -0.48% -0.58%] index_add_ reverse : Elapsed 0.058 ms (5.850 ms / 100) 5.796 -> 5.763 ( -0.57%) [ +0.00% +0.14% +0.31% / +0.03% -0.48% -0.57%] index_copy_ reverse : Elapsed 0.058 ms (5.796 ms / 100) 5.866 -> 5.831 ( -0.60%) [ +0.00% +0.05% +0.27% / +0.07% -0.53% -0.60%] index_add_ spread : Elapsed 0.059 ms (5.866 ms / 100) 5.805 -> 5.761 ( -0.76%) [ +0.00% +0.05% +0.19% / +0.05% -0.60% -0.76%] index_copy_ spread : Elapsed 0.058 ms (5.805 ms / 100) 5.844 -> 5.819 ( -0.43%) [ +0.12% +0.03% +0.00% / -0.10% -0.31% -0.43%] index_add_ strided 3 : Elapsed 0.059 ms (5.851 ms / 100) 5.783 -> 5.761 ( -0.38%) [ +0.00% +0.07% +0.09% / +0.09% -0.38% -0.24%] index_copy_ strided 3 : Elapsed 0.058 ms (5.783 ms / 100) 5.840 -> 5.822 ( -0.31%) [ +0.02% +0.02% +0.00% / +0.10% -0.31% -0.29%] index_add_ perm : Elapsed 0.058 ms (5.841 ms / 100) 5.784 -> 5.756 ( -0.48%) [ +0.00% +0.12% +0.17% / +0.07% -0.48% -0.19%] index_copy_ perm : Elapsed 0.058 ms (5.784 ms / 100) 5.866 -> 5.825 ( -0.70%) [ +0.03% +0.00% +0.24% / +0.17% -0.70% -0.56%] index_add_ perm_sorted : Elapsed 0.059 ms (5.868 ms / 100) 5.806 -> 5.770 ( -0.62%) [ +0.09% +0.00% +0.29% / +0.12% -0.50% -0.62%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.811 ms / 100) 6.074 -> 6.067 ( -0.12%) [ +0.00% +0.02% +0.18% / +0.16% -0.12% -0.12%] index_select const : Elapsed 0.061 ms (6.074 ms / 100) 6.185 -> 6.144 ( -0.66%) [ +0.00% +0.02% +0.29% / +0.10% -0.57% -0.66%] index_select wrap : Elapsed 0.062 ms (6.185 ms / 100) 6.176 -> 6.137 ( -0.63%) [ +0.11% +0.00% +0.10% / +0.13% -0.52% -0.63%] index_select linear : Elapsed 0.062 ms (6.183 ms / 100) 6.164 -> 6.159 ( -0.08%) [ +0.05% +0.00% +0.19% / +0.13% -0.03% -0.08%] index_select reverse : Elapsed 0.062 ms (6.167 ms / 100) 6.073 -> 6.066 ( -0.12%) [ +0.00% +0.13% +0.28% / +0.16% -0.08% -0.12%] index_select skip64 : Elapsed 0.061 ms (6.073 ms / 100) 6.075 -> 6.058 ( -0.28%) [ +0.16% +0.00% +0.25% / +0.12% -0.07% -0.28%] index_select skip256 : Elapsed 0.061 ms (6.085 ms / 100) 6.177 -> 6.145 ( -0.52%) [ +0.10% +0.00% +0.18% / +0.06% -0.52% -0.44%] index_select spread : Elapsed 0.062 ms (6.183 ms / 100) 6.176 -> 6.150 ( -0.42%) [ +0.00% +0.06% +0.18% / +0.19% -0.28% -0.42%] index_select strided 3 : Elapsed 0.062 ms (6.176 ms / 100) 6.129 -> 6.113 ( -0.26%) [ +0.00% +0.03% +0.11% / +0.15% -0.23% -0.26%] index_select random : Elapsed 0.061 ms (6.129 ms / 100) 6.130 -> 6.121 ( -0.15%) [ +0.00% +0.08% +0.18% / +0.16% -0.15% -0.10%] index_select random_sorted : Elapsed 0.061 ms (6.130 ms / 100) B = [16, 20, 40, 5] (stride (40, 640, 1, 12800)) A = [16, 20, 40, 4] (stride (3200, 1, 80, 20)) dim = 3 5.821 -> 5.818 ( -0.05%) [ +0.03% +0.00% +0.17% / +0.07% -0.05% +0.02%] index_add_ linear : Elapsed 0.058 ms (5.823 ms / 100) 5.763 -> 5.756 ( -0.12%) [ +0.00% +0.09% +0.09% / +0.14% +0.03% -0.12%] index_copy_ linear : Elapsed 0.058 ms (5.763 ms / 100) 5.806 -> 5.814 ( +0.14%) [ +0.16% +0.00% +0.10% / +0.14% +0.29% +0.48%] index_add_ reverse : Elapsed 0.058 ms (5.815 ms / 100) 5.752 -> 5.758 ( +0.10%) [ +0.24% +0.00% +0.24% / +0.10% +0.31% +0.21%] index_copy_ reverse : Elapsed 0.058 ms (5.766 ms / 100) 5.824 -> 5.823 ( -0.02%) [ +0.00% +0.17% +0.15% / +0.05% +0.02% -0.02%] index_add_ spread : Elapsed 0.058 ms (5.824 ms / 100) 5.762 -> 5.761 ( -0.02%) [ +0.02% +0.12% +0.00% / +0.03% +0.02% -0.02%] index_copy_ spread : Elapsed 0.058 ms (5.763 ms / 100) 5.828 -> 5.832 ( +0.07%) [ +0.00% +0.02% +0.00% / +0.07% +0.19% +0.22%] index_add_ strided 3 : Elapsed 0.058 ms (5.828 ms / 100) 5.767 -> 5.772 ( +0.09%) [ +0.02% +0.02% +0.00% / +0.23% +0.17% +0.09%] index_copy_ strided 3 : Elapsed 0.058 ms (5.768 ms / 100) 5.815 -> 5.820 ( +0.09%) [ +0.00% +0.02% +0.00% / +0.09% +0.55% +0.43%] index_add_ perm : Elapsed 0.058 ms (5.815 ms / 100) 5.756 -> 5.769 ( +0.23%) [ +0.05% +0.00% +0.05% / +0.23% +0.59% +0.47%] index_copy_ perm : Elapsed 0.058 ms (5.759 ms / 100) 5.819 -> 5.822 ( +0.05%) [ +0.03% +0.00% +0.14% / +0.05% +0.17% +0.33%] index_add_ perm_sorted : Elapsed 0.058 ms (5.821 ms / 100) 5.763 -> 5.770 ( +0.12%) [ +0.14% +0.00% +0.05% / +0.12% +0.19% +0.23%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.771 ms / 100) 6.105 -> 6.113 ( +0.13%) [ +0.05% +0.00% +0.13% / +0.13% +0.29% +0.16%] index_select const : Elapsed 0.061 ms (6.108 ms / 100) 6.157 -> 6.156 ( -0.02%) [ +0.00% +0.05% +0.00% / +0.00% -0.02% +0.03%] index_select wrap : Elapsed 0.062 ms (6.157 ms / 100) 6.143 -> 6.147 ( +0.07%) [ +0.02% +0.00% +0.03% / +0.08% +0.07% +0.15%] index_select linear : Elapsed 0.061 ms (6.144 ms / 100) 6.141 -> 6.148 ( +0.11%) [ +0.10% +0.00% +0.08% / +0.11% +0.29% +0.23%] index_select reverse : Elapsed 0.061 ms (6.147 ms / 100) 6.106 -> 6.114 ( +0.13%) [ +0.00% +0.03% +0.10% / +0.20% +0.21% +0.13%] index_select skip64 : Elapsed 0.061 ms (6.106 ms / 100) 6.103 -> 6.117 ( +0.23%) [ +0.00% +0.16% +0.16% / +0.25% +0.36% +0.23%] index_select skip256 : Elapsed 0.061 ms (6.103 ms / 100) 6.148 -> 6.152 ( +0.07%) [ +0.05% +0.08% +0.00% / +0.07% +0.20% +0.21%] index_select spread : Elapsed 0.062 ms (6.151 ms / 100) 6.145 -> 6.156 ( +0.18%) [ +0.10% +0.00% +0.15% / +0.18% +0.26% +0.24%] index_select strided 3 : Elapsed 0.062 ms (6.151 ms / 100) 6.154 -> 6.159 ( +0.08%) [ +0.06% +0.13% +0.00% / +0.08% +0.08% +0.18%] index_select random : Elapsed 0.062 ms (6.158 ms / 100) 6.140 -> 6.143 ( +0.05%) [ +0.16% +0.08% +0.00% / +0.16% +0.05% +0.26%] index_select random_sorted : Elapsed 0.062 ms (6.150 ms / 100) out_shape = [5, 40, 4, 20] in_shape = [16, 40, 4, 20] idx_dim = 0 B = [5, 40, 4, 20] (stride (3200, 80, 1, 4)) A = [16, 40, 4, 20] (stride (40, 1, 12800, 640)) dim = 0 2.324 -> 2.323 ( -0.04%) [ +0.13% +0.04% +0.00% / -0.04% +0.39% +0.39%] index_select const : Elapsed 0.023 ms (2.327 ms / 100) 2.329 -> 2.334 ( +0.21%) [ +0.21% +0.26% +0.00% / +0.21% +0.43% +0.43%] index_select wrap : Elapsed 0.023 ms (2.334 ms / 100) 2.333 -> 2.339 ( +0.26%) [ +0.00% +0.04% +0.00% / +0.26% +0.56% +0.77%] index_select linear : Elapsed 0.023 ms (2.333 ms / 100) 2.333 -> 2.332 ( -0.04%) [ +0.00% +0.17% +0.04% / -0.04% +0.34% +0.21%] index_select reverse : Elapsed 0.023 ms (2.333 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.13% +0.04% +0.00% / +0.04% +0.34% +0.17%] index_select skip64 : Elapsed 0.023 ms (2.327 ms / 100) 2.321 -> 2.325 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.60% +0.52%] index_select skip256 : Elapsed 0.023 ms (2.325 ms / 100) 2.325 -> 2.326 ( +0.04%) [ +0.22% +0.00% +0.13% / +0.04% +0.26% +0.30%] index_select spread : Elapsed 0.023 ms (2.330 ms / 100) 2.325 -> 2.322 ( -0.13%) [ +0.00% +0.00% +0.09% / -0.13% +0.34% +0.39%] index_select strided 3 : Elapsed 0.023 ms (2.325 ms / 100) 2.332 -> 2.336 ( +0.17%) [ +0.04% +0.00% +0.00% / +0.17% +0.21% +0.39%] index_select strided 5 : Elapsed 0.023 ms (2.333 ms / 100) 2.332 -> 2.340 ( +0.34%) [ +0.13% +0.21% +0.00% / +0.34% +0.56% +0.56%] index_select strided 7 : Elapsed 0.023 ms (2.335 ms / 100) 2.303 -> 2.307 ( +0.17%) [ +0.35% +0.26% +0.00% / +0.17% +0.83% +0.74%] index_select strided 8 : Elapsed 0.023 ms (2.311 ms / 100) 2.316 -> 2.316 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +0.43% +0.65%] index_select random : Elapsed 0.023 ms (2.319 ms / 100) 2.316 -> 2.319 ( +0.13%) [ +0.09% +0.00% +0.13% / +0.13% +0.30% +0.47%] index_select random_sorted : Elapsed 0.023 ms (2.318 ms / 100) 2.326 -> 2.330 ( +0.17%) [ +0.09% +0.04% +0.00% / +0.17% +0.69% +0.52%] index_select perm : Elapsed 0.023 ms (2.328 ms / 100) 2.323 -> 2.330 ( +0.30%) [ +0.00% +0.09% +0.26% / +0.30% +0.30% +0.39%] index_select perm_sorted : Elapsed 0.023 ms (2.323 ms / 100) B = [5, 40, 4, 20] (stride (3200, 1, 800, 40)) A = [16, 40, 4, 20] (stride (3200, 20, 800, 1)) dim = 0 2.138 -> 2.127 ( -0.51%) [ +0.00% +0.19% +0.05% / +0.05% -0.33% -0.51%] index_select const : Elapsed 0.021 ms (2.138 ms / 100) 2.194 -> 2.186 ( -0.36%) [ +0.09% +0.00% +0.23% / +0.09% -0.36% -0.36%] index_select wrap : Elapsed 0.022 ms (2.196 ms / 100) 2.189 -> 2.183 ( -0.27%) [ +0.23% +0.00% +0.09% / +0.09% -0.23% -0.27%] index_select linear : Elapsed 0.022 ms (2.194 ms / 100) 2.185 -> 2.181 ( -0.18%) [ +0.27% +0.14% +0.00% / +0.27% -0.18% -0.14%] index_select reverse : Elapsed 0.022 ms (2.191 ms / 100) 2.139 -> 2.135 ( -0.19%) [ +0.09% +0.09% +0.00% / -0.09% -0.19% -0.14%] index_select skip64 : Elapsed 0.021 ms (2.141 ms / 100) 2.134 -> 2.129 ( -0.23%) [ +0.23% +0.09% +0.00% / +0.23% -0.19% -0.23%] index_select skip256 : Elapsed 0.021 ms (2.139 ms / 100) 2.193 -> 2.184 ( -0.41%) [ +0.23% +0.32% +0.00% / +0.27% -0.05% -0.41%] index_select spread : Elapsed 0.022 ms (2.198 ms / 100) 2.195 -> 2.188 ( -0.32%) [ +0.00% +0.05% +0.05% / -0.23% -0.32% -0.05%] index_select strided 3 : Elapsed 0.022 ms (2.195 ms / 100) 2.196 -> 2.203 ( +0.32%) [ +0.27% +0.27% +0.00% / +0.36% +0.41% +0.32%] index_select strided 5 : Elapsed 0.022 ms (2.202 ms / 100) 2.197 -> 2.185 ( -0.55%) [ +0.36% +0.27% +0.00% / +0.32% -0.27% -0.55%] index_select strided 7 : Elapsed 0.022 ms (2.205 ms / 100) 2.155 -> 2.145 ( -0.46%) [ +0.00% +0.05% +0.28% / +0.14% -0.19% -0.46%] index_select strided 8 : Elapsed 0.022 ms (2.155 ms / 100) 2.181 -> 2.177 ( -0.18%) [ +0.00% +0.14% +0.18% / -0.18% +0.05% +0.23%] index_select random : Elapsed 0.022 ms (2.181 ms / 100) 2.167 -> 2.169 ( +0.09%) [ +0.37% +0.32% +0.00% / +0.09% +0.60% +0.65%] index_select random_sorted : Elapsed 0.022 ms (2.175 ms / 100) 2.188 -> 2.191 ( +0.14%) [ +0.05% +0.00% +0.05% / +0.14% +0.82% +1.01%] index_select perm : Elapsed 0.022 ms (2.189 ms / 100) 2.189 -> 2.188 ( -0.05%) [ +0.32% +0.09% +0.00% / -0.05% +0.55% +0.64%] index_select perm_sorted : Elapsed 0.022 ms (2.196 ms / 100) B = [5, 40, 4, 20] (stride (3200, 1, 40, 160)) A = [16, 40, 4, 20] (stride (3200, 20, 800, 1)) dim = 0 2.247 -> 2.247 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.00% +0.62% +0.76%] index_select const : Elapsed 0.022 ms (2.248 ms / 100) 2.316 -> 2.322 ( +0.26%) [ +0.17% +0.00% +0.22% / +0.26% +0.47% +0.26%] index_select wrap : Elapsed 0.023 ms (2.320 ms / 100) 2.323 -> 2.327 ( +0.17%) [ +0.09% +0.13% +0.00% / +0.17% +0.17% +0.22%] index_select linear : Elapsed 0.023 ms (2.325 ms / 100) 2.318 -> 2.323 ( +0.22%) [ +0.00% +0.22% +0.13% / +0.22% +0.30% +0.43%] index_select reverse : Elapsed 0.023 ms (2.318 ms / 100) 2.248 -> 2.248 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.85% +0.89%] index_select skip64 : Elapsed 0.022 ms (2.250 ms / 100) 2.246 -> 2.253 ( +0.31%) [ +0.13% +0.00% +0.22% / +0.31% +0.80% +0.93%] index_select skip256 : Elapsed 0.022 ms (2.249 ms / 100) 2.326 -> 2.327 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.04% +0.13% +0.43%] index_select spread : Elapsed 0.023 ms (2.329 ms / 100) 2.329 -> 2.334 ( +0.21%) [ +0.04% +0.00% +0.09% / +0.21% +0.39% +0.26%] index_select strided 3 : Elapsed 0.023 ms (2.330 ms / 100) 2.312 -> 2.316 ( +0.17%) [ +0.26% +0.26% +0.00% / +0.17% +0.65% +0.48%] index_select strided 5 : Elapsed 0.023 ms (2.318 ms / 100) 2.319 -> 2.325 ( +0.26%) [ +0.04% +0.00% +0.09% / +0.26% +0.52% +0.47%] index_select strided 7 : Elapsed 0.023 ms (2.320 ms / 100) 2.261 -> 2.262 ( +0.04%) [ +0.00% +0.09% +0.13% / +0.04% +1.02% +0.97%] index_select strided 8 : Elapsed 0.023 ms (2.261 ms / 100) 2.313 -> 2.321 ( +0.35%) [ +0.39% +0.26% +0.00% / +0.39% +0.35% +0.39%] index_select random : Elapsed 0.023 ms (2.322 ms / 100) 2.317 -> 2.321 ( +0.17%) [ +0.52% +0.30% +0.00% / +0.52% +0.35% +0.17%] index_select random_sorted : Elapsed 0.023 ms (2.329 ms / 100) 2.317 -> 2.312 ( -0.22%) [ +0.00% +0.30% +0.52% / +0.35% -0.04% -0.22%] index_select perm : Elapsed 0.023 ms (2.317 ms / 100) 2.318 -> 2.309 ( -0.39%) [ +0.17% +0.00% +0.09% / +0.04% -0.39% -0.35%] index_select perm_sorted : Elapsed 0.023 ms (2.322 ms / 100) B = [5, 40, 4, 20] (stride (1, 400, 100, 5)) A = [16, 40, 4, 20] (stride (40, 1, 12800, 640)) dim = 0 2.023 -> 2.032 ( +0.44%) [ +0.00% +0.20% +0.15% / +0.44% +0.84% +0.69%] index_select const : Elapsed 0.020 ms (2.023 ms / 100) 2.044 -> 2.041 ( -0.15%) [ +0.05% +0.00% +0.05% / -0.15% +0.34% +0.29%] index_select wrap : Elapsed 0.020 ms (2.045 ms / 100) 2.038 -> 2.037 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.49% +0.44%] index_select linear : Elapsed 0.020 ms (2.040 ms / 100) 2.043 -> 2.042 ( -0.05%) [ +0.10% +0.24% +0.00% / -0.05% +0.44% +0.29%] index_select reverse : Elapsed 0.020 ms (2.045 ms / 100) 2.030 -> 2.033 ( +0.15%) [ +0.25% +0.10% +0.00% / +0.15% +0.34% +0.39%] index_select skip64 : Elapsed 0.020 ms (2.035 ms / 100) 2.027 -> 2.028 ( +0.05%) [ +0.00% +0.10% +0.00% / +0.05% +0.44% +0.25%] index_select skip256 : Elapsed 0.020 ms (2.027 ms / 100) 2.043 -> 2.043 ( +0.00%) [ +0.20% +0.00% +0.20% / +0.00% +0.15% +0.24%] index_select spread : Elapsed 0.020 ms (2.047 ms / 100) 2.044 -> 2.050 ( +0.29%) [ +0.00% +0.05% +0.00% / +0.34% +0.29% +0.39%] index_select strided 3 : Elapsed 0.020 ms (2.044 ms / 100) 2.048 -> 2.049 ( +0.05%) [ +0.00% +0.10% +0.20% / +0.05% +0.24% +0.39%] index_select strided 5 : Elapsed 0.020 ms (2.048 ms / 100) 2.043 -> 2.046 ( +0.15%) [ +0.10% +0.10% +0.00% / +0.15% +0.29% +0.44%] index_select strided 7 : Elapsed 0.020 ms (2.045 ms / 100) 2.024 -> 2.028 ( +0.20%) [ +0.15% +0.15% +0.00% / +0.20% +0.35% +0.54%] index_select strided 8 : Elapsed 0.020 ms (2.027 ms / 100) 2.034 -> 2.034 ( +0.00%) [ +0.05% +0.20% +0.00% / +0.00% +0.20% +0.29%] index_select random : Elapsed 0.020 ms (2.035 ms / 100) 2.038 -> 2.040 ( +0.10%) [ +0.05% +0.00% +0.05% / +0.10% +0.39% +0.49%] index_select random_sorted : Elapsed 0.020 ms (2.039 ms / 100) 2.042 -> 2.045 ( +0.15%) [ +0.24% +0.24% +0.00% / +0.15% +0.54% +0.29%] index_select perm : Elapsed 0.020 ms (2.047 ms / 100) 2.045 -> 2.043 ( -0.10%) [ +0.15% +0.00% +0.05% / -0.10% +0.15% +0.29%] index_select perm_sorted : Elapsed 0.020 ms (2.048 ms / 100) B = [5, 40, 4, 20] (stride (800, 20, 4000, 1)) A = [16, 40, 4, 20] (stride (160, 4, 1, 2560)) dim = 0 2.283 -> 2.284 ( +0.04%) [ +0.00% +0.13% +0.04% / +0.04% +0.70% +0.53%] index_select const : Elapsed 0.023 ms (2.283 ms / 100) 2.305 -> 2.305 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.00% +0.48% +0.56%] index_select wrap : Elapsed 0.023 ms (2.308 ms / 100) 2.301 -> 2.302 ( +0.04%) [ +0.09% +0.00% +0.09% / +0.04% +0.48% +0.74%] index_select linear : Elapsed 0.023 ms (2.303 ms / 100) 2.293 -> 2.295 ( +0.09%) [ +0.35% +0.00% +0.26% / +0.09% +0.22% +0.26%] index_select reverse : Elapsed 0.023 ms (2.301 ms / 100) 2.281 -> 2.280 ( -0.04%) [ +0.13% +0.00% +0.00% / -0.04% +0.61% +0.66%] index_select skip64 : Elapsed 0.023 ms (2.284 ms / 100) 2.282 -> 2.281 ( -0.04%) [ +0.00% +0.00% +0.13% / -0.04% +0.66% +0.70%] index_select skip256 : Elapsed 0.023 ms (2.282 ms / 100) 2.292 -> 2.292 ( +0.00%) [ +0.13% +0.09% +0.00% / +0.00% +0.52% +0.35%] index_select spread : Elapsed 0.023 ms (2.295 ms / 100) 2.291 -> 2.295 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.52% +0.44%] index_select strided 3 : Elapsed 0.023 ms (2.295 ms / 100) 2.282 -> 2.285 ( +0.13%) [ +0.09% +0.09% +0.00% / +0.13% +0.83% +0.66%] index_select strided 5 : Elapsed 0.023 ms (2.284 ms / 100) 2.289 -> 2.294 ( +0.22%) [ +0.31% +0.00% +0.09% / +0.22% +0.74% +0.87%] index_select strided 7 : Elapsed 0.023 ms (2.296 ms / 100) 2.278 -> 2.279 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.61% +0.57%] index_select strided 8 : Elapsed 0.023 ms (2.279 ms / 100) 2.279 -> 2.283 ( +0.18%) [ +0.04% +0.09% +0.00% / +0.18% +0.35% +0.53%] index_select random : Elapsed 0.023 ms (2.280 ms / 100) 2.288 -> 2.289 ( +0.04%) [ +0.17% +0.26% +0.00% / +0.04% +0.22% +0.35%] index_select random_sorted : Elapsed 0.023 ms (2.292 ms / 100) 2.284 -> 2.287 ( +0.13%) [ +0.09% +0.00% +0.18% / +0.13% +0.57% +0.66%] index_select perm : Elapsed 0.023 ms (2.286 ms / 100) 2.296 -> 2.298 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.30% +0.26%] index_select perm_sorted : Elapsed 0.023 ms (2.296 ms / 100) B = [5, 40, 4, 20] (stride (1, 100, 4000, 5)) A = [16, 40, 4, 20] (stride (40, 1, 12800, 640)) dim = 0 2.314 -> 2.319 ( +0.22%) [ +0.00% +0.13% +0.13% / +0.22% +0.35% +0.39%] index_select const : Elapsed 0.023 ms (2.314 ms / 100) 2.326 -> 2.327 ( +0.04%) [ +0.21% +0.21% +0.00% / +0.04% +0.30% +0.26%] index_select wrap : Elapsed 0.023 ms (2.331 ms / 100) 2.327 -> 2.327 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.34% +0.21%] index_select linear : Elapsed 0.023 ms (2.327 ms / 100) 2.332 -> 2.332 ( +0.00%) [ +0.00% +0.04% +0.17% / +0.00% +0.21% +0.21%] index_select reverse : Elapsed 0.023 ms (2.332 ms / 100) 2.319 -> 2.318 ( -0.04%) [ +0.09% +0.39% +0.00% / -0.04% +0.52% +0.34%] index_select skip64 : Elapsed 0.023 ms (2.321 ms / 100) 2.317 -> 2.321 ( +0.17%) [ +0.09% +0.00% +0.13% / +0.17% +0.17% +0.35%] index_select skip256 : Elapsed 0.023 ms (2.319 ms / 100) 2.325 -> 2.328 ( +0.13%) [ +0.30% +0.26% +0.00% / +0.13% +0.22% +0.22%] index_select spread : Elapsed 0.023 ms (2.332 ms / 100) 2.332 -> 2.333 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.04% +0.26% +0.13%] index_select strided 3 : Elapsed 0.023 ms (2.332 ms / 100) 2.339 -> 2.338 ( -0.04%) [ +0.13% +0.13% +0.00% / -0.04% +0.26% +0.60%] index_select strided 5 : Elapsed 0.023 ms (2.342 ms / 100) 2.333 -> 2.336 ( +0.13%) [ +0.09% +0.00% +0.21% / +0.26% +0.13% +0.26%] index_select strided 7 : Elapsed 0.023 ms (2.335 ms / 100) 2.319 -> 2.324 ( +0.22%) [ +0.00% +0.17% +0.13% / +0.22% +0.30% +0.39%] index_select strided 8 : Elapsed 0.023 ms (2.319 ms / 100) 2.319 -> 2.322 ( +0.13%) [ +0.00% +0.13% +0.17% / +0.13% +0.47% +0.39%] index_select random : Elapsed 0.023 ms (2.319 ms / 100) 2.329 -> 2.327 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.17% +0.21%] index_select random_sorted : Elapsed 0.023 ms (2.329 ms / 100) 2.334 -> 2.335 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.04% +0.13% +0.39%] index_select perm : Elapsed 0.023 ms (2.337 ms / 100) 2.332 -> 2.338 ( +0.26%) [ +0.04% +0.00% +0.04% / +0.26% +0.43% +0.39%] index_select perm_sorted : Elapsed 0.023 ms (2.333 ms / 100) out_shape = [16, 5, 4, 20] in_shape = [16, 40, 4, 20] idx_dim = 1 B = [16, 5, 4, 20] (stride (400, 80, 1, 4)) A = [16, 40, 4, 20] (stride (3200, 80, 20, 1)) dim = 1 1.380 -> 1.378 ( -0.14%) [ +0.00% +0.00% +0.14% / -0.14% +0.29% +0.29%] index_select const : Elapsed 0.014 ms (1.380 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.36% +0.51%] index_select wrap : Elapsed 0.014 ms (1.378 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.44% +0.44%] index_select linear : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.51% +0.58%] index_select reverse : Elapsed 0.014 ms (1.377 ms / 100) 1.379 -> 1.378 ( -0.07%) [ +0.00% +0.15% +0.00% / -0.07% +0.36% +0.36%] index_select skip64 : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.51% +0.51%] index_select skip256 : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.380 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.51% +0.51%] index_select spread : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.73% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.378 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.22% +0.07% +0.00% / +0.07% +0.73% +0.58%] index_select strided 5 : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.51% +0.51%] index_select strided 7 : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.51% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.376 ( -0.07%) [ +0.00% +0.15% +0.00% / -0.07% +0.51% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.65% +0.65%] index_select random : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.375 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.58% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.15% +0.00% +0.15% / +0.07% +0.80% +0.65%] index_select perm : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.58% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.377 ms / 100) B = [16, 5, 4, 20] (stride (400, 1, 100, 5)) A = [16, 40, 4, 20] (stride (3200, 80, 20, 1)) dim = 1 0.619 -> 0.618 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.48% +0.65%] index_select const : Elapsed 0.006 ms (0.619 ms / 100) 0.617 -> 0.618 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +1.13% +1.13%] index_select wrap : Elapsed 0.006 ms (0.618 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.97% +0.97%] index_select linear : Elapsed 0.006 ms (0.618 ms / 100) 0.617 -> 0.618 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.97% +1.13%] index_select reverse : Elapsed 0.006 ms (0.617 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.97% +0.81%] index_select skip64 : Elapsed 0.006 ms (0.617 ms / 100) 0.616 -> 0.618 ( +0.32%) [ +0.16% +0.32% +0.00% / +0.32% +1.14% +1.14%] index_select skip256 : Elapsed 0.006 ms (0.617 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +1.30% +1.13%] index_select spread : Elapsed 0.006 ms (0.617 ms / 100) 0.618 -> 0.618 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.81% +0.97%] index_select strided 3 : Elapsed 0.006 ms (0.618 ms / 100) 0.618 -> 0.618 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.81% +0.81%] index_select strided 5 : Elapsed 0.006 ms (0.618 ms / 100) 0.618 -> 0.623 ( +0.81%) [ +0.00% +0.00% +0.97% / +2.91% +0.81% +0.97%] index_select strided 7 : Elapsed 0.006 ms (0.618 ms / 100) 0.617 -> 0.618 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +0.81% +0.97%] index_select strided 8 : Elapsed 0.006 ms (0.617 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.00% +0.16% +0.16% / +0.00% +0.81% +0.97%] index_select strided 16 : Elapsed 0.006 ms (0.617 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.13% +0.97%] index_select random : Elapsed 0.006 ms (0.617 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.97% +1.46%] index_select random_sorted : Elapsed 0.006 ms (0.617 ms / 100) 0.617 -> 0.618 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +1.13% +0.97%] index_select perm : Elapsed 0.006 ms (0.618 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.97% +0.97%] index_select perm_sorted : Elapsed 0.006 ms (0.618 ms / 100) B = [16, 5, 4, 20] (stride (400, 1, 5, 20)) A = [16, 40, 4, 20] (stride (3200, 80, 20, 1)) dim = 1 1.380 -> 1.379 ( -0.07%) [ +0.22% +0.00% +0.07% / -0.07% +0.29% +0.29%] index_select const : Elapsed 0.014 ms (1.383 ms / 100) 1.378 -> 1.377 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.44% +0.44%] index_select wrap : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.44% +0.58%] index_select linear : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.58% +0.51%] index_select reverse : Elapsed 0.014 ms (1.378 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.36% +0.29%] index_select skip64 : Elapsed 0.014 ms (1.381 ms / 100) 1.377 -> 1.380 ( +0.22%) [ +0.22% +0.00% +0.07% / +0.22% +0.51% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.380 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.65% +0.51%] index_select spread : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.51% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.00% +0.15% / +0.07% +0.51% +0.58%] index_select strided 5 : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.376 ( -0.07%) [ +0.15% +0.00% +0.00% / -0.07% +0.51% +0.51%] index_select strided 7 : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.00% +0.29% +0.00% / +0.07% +0.51% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.65% +0.73%] index_select strided 16 : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.73% +0.65%] index_select random : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.374 ( -0.15%) [ +0.00% +0.07% +0.44% / -0.15% +0.73% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.58% +0.73%] index_select perm : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.00% +0.07% +0.15% / +0.07% +0.58% +0.51%] index_select perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) B = [16, 5, 4, 20] (stride (4, 1280, 1, 64)) A = [16, 40, 4, 20] (stride (3200, 1, 40, 160)) dim = 1 1.615 -> 1.616 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.62% +0.50%] index_select const : Elapsed 0.016 ms (1.617 ms / 100) 1.615 -> 1.614 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.56% +0.50%] index_select wrap : Elapsed 0.016 ms (1.615 ms / 100) 1.618 -> 1.621 ( +0.19%) [ +0.19% +0.00% +0.31% / +0.19% +0.56% +0.56%] index_select linear : Elapsed 0.016 ms (1.621 ms / 100) 1.619 -> 1.620 ( +0.06%) [ +0.00% +0.19% +0.12% / +0.06% +0.49% +0.43%] index_select reverse : Elapsed 0.016 ms (1.619 ms / 100) 1.614 -> 1.615 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.68% +0.56%] index_select skip64 : Elapsed 0.016 ms (1.614 ms / 100) 1.615 -> 1.616 ( +0.06%) [ +0.06% +0.12% +0.00% / +0.06% +0.62% +0.50%] index_select skip256 : Elapsed 0.016 ms (1.616 ms / 100) 1.607 -> 1.609 ( +0.12%) [ +0.00% +0.19% +0.12% / +0.12% +0.44% +0.50%] index_select spread : Elapsed 0.016 ms (1.607 ms / 100) 1.610 -> 1.610 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.56% +0.43%] index_select strided 3 : Elapsed 0.016 ms (1.611 ms / 100) 1.610 -> 1.613 ( +0.19%) [ +0.25% +0.25% +0.00% / +0.19% +0.87% +0.68%] index_select strided 5 : Elapsed 0.016 ms (1.614 ms / 100) 1.606 -> 1.607 ( +0.06%) [ +0.12% +0.00% +0.00% / +0.06% +0.44% +0.50%] index_select strided 7 : Elapsed 0.016 ms (1.608 ms / 100) 1.613 -> 1.614 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.68% +0.43%] index_select strided 8 : Elapsed 0.016 ms (1.614 ms / 100) 1.607 -> 1.609 ( +0.12%) [ +0.00% +0.12% +0.06% / +0.12% +0.62% +0.68%] index_select strided 16 : Elapsed 0.016 ms (1.607 ms / 100) 1.611 -> 1.613 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.87% +0.81%] index_select random : Elapsed 0.016 ms (1.613 ms / 100) 1.612 -> 1.613 ( +0.06%) [ +0.25% +0.00% +0.19% / +0.06% +0.56% +0.68%] index_select random_sorted : Elapsed 0.016 ms (1.616 ms / 100) 1.607 -> 1.609 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +0.50% +0.50%] index_select perm : Elapsed 0.016 ms (1.607 ms / 100) 1.610 -> 1.609 ( -0.06%) [ +0.12% +0.06% +0.00% / -0.06% +0.68% +0.50%] index_select perm_sorted : Elapsed 0.016 ms (1.612 ms / 100) B = [16, 5, 4, 20] (stride (1, 1280, 16, 64)) A = [16, 40, 4, 20] (stride (3200, 80, 20, 1)) dim = 1 1.381 -> 1.383 ( +0.14%) [ +0.00% +0.07% +0.00% / +0.22% +0.14% +0.22%] index_select const : Elapsed 0.014 ms (1.381 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.44% +0.51%] index_select wrap : Elapsed 0.014 ms (1.380 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.22% +0.00% +0.22% / +0.15% +0.58% +0.94%] index_select linear : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.00% +0.22% +0.15% / +0.00% +0.51% +0.44%] index_select reverse : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.380 ( +0.22%) [ +0.15% +0.22% +0.00% / +0.22% +0.51% +0.44%] index_select skip64 : Elapsed 0.014 ms (1.379 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.22% +0.14% +0.00% / +0.14% +0.36% +0.29%] index_select skip256 : Elapsed 0.014 ms (1.385 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.22% +0.00% +0.22% / +0.22% +0.51% +0.51%] index_select spread : Elapsed 0.014 ms (1.384 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.51% +0.65%] index_select strided 3 : Elapsed 0.014 ms (1.378 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.44% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.51% +0.65%] index_select strided 7 : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.58% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.379 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.22% +0.22% +0.00% / +0.22% +0.43% +0.51%] index_select strided 16 : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.382 ( +0.07%) [ +0.29% +0.00% +0.14% / +0.07% +0.51% +0.51%] index_select random : Elapsed 0.014 ms (1.385 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.73% +0.73%] index_select random_sorted : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.22% +0.15% +0.00% / +0.07% +0.65% +0.65%] index_select perm : Elapsed 0.014 ms (1.378 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.29% +0.29% +0.00% / +0.07% +0.87% +0.65%] index_select perm_sorted : Elapsed 0.014 ms (1.379 ms / 100) B = [16, 5, 4, 20] (stride (100, 20, 1600, 1)) A = [16, 40, 4, 20] (stride (80, 1280, 1, 4)) dim = 1 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.00% +0.20% / +0.07% +0.68% +0.68%] index_select const : Elapsed 0.015 ms (1.478 ms / 100) 1.479 -> 1.478 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.54% +1.08%] index_select wrap : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.20% +0.07% +0.00% / +0.14% +0.74% +0.74%] index_select linear : Elapsed 0.015 ms (1.480 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.68% +0.61%] index_select reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.75% +0.81%] index_select skip64 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.81% +0.75%] index_select skip256 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.479 ( +0.20%) [ +0.20% +0.14% +0.00% / +0.20% +0.88% +0.81%] index_select spread : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.482 ( +0.27%) [ +0.00% +0.00% +0.00% / +0.27% +0.74% +0.74%] index_select strided 3 : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.74% +0.74%] index_select strided 5 : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.74% +0.68%] index_select strided 7 : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.74% +0.74%] index_select strided 8 : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.74% +0.74%] index_select strided 16 : Elapsed 0.015 ms (1.478 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.61% +0.61%] index_select random : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.81% +0.68%] index_select random_sorted : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.68% +0.68%] index_select perm : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.88% +0.74%] index_select perm_sorted : Elapsed 0.015 ms (1.478 ms / 100) B = [16, 5, 4, 20] (stride (100, 20, 1600, 1)) A = [16, 40, 4, 20] (stride (1, 16, 12800, 640)) dim = 1 1.528 -> 1.540 ( +0.79%) [ +0.59% +0.00% +0.92% / +1.05% +1.44% +0.79%] index_select const : Elapsed 0.015 ms (1.537 ms / 100) 1.494 -> 1.494 ( +0.00%) [ +0.07% +0.27% +0.00% / +0.00% +0.40% +0.33%] index_select wrap : Elapsed 0.015 ms (1.495 ms / 100) 1.483 -> 1.486 ( +0.20%) [ +0.00% +0.20% +0.00% / +0.20% +0.54% +0.67%] index_select linear : Elapsed 0.015 ms (1.483 ms / 100) 1.488 -> 1.488 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.60% +0.54%] index_select reverse : Elapsed 0.015 ms (1.488 ms / 100) 1.503 -> 1.502 ( -0.07%) [ +0.33% +0.00% +0.33% / +0.27% -0.07% +0.73%] index_select skip64 : Elapsed 0.015 ms (1.508 ms / 100) 1.527 -> 1.535 ( +0.52%) [ +0.26% +0.65% +0.00% / +0.52% +1.31% +1.24%] index_select skip256 : Elapsed 0.015 ms (1.531 ms / 100) 1.485 -> 1.488 ( +0.20%) [ +0.27% +0.20% +0.00% / +0.20% +0.61% +0.54%] index_select spread : Elapsed 0.015 ms (1.489 ms / 100) 1.493 -> 1.492 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.60% +0.47%] index_select strided 3 : Elapsed 0.015 ms (1.494 ms / 100) 1.489 -> 1.493 ( +0.27%) [ +0.27% +0.20% +0.00% / +0.27% +0.60% +0.67%] index_select strided 5 : Elapsed 0.015 ms (1.493 ms / 100) 1.485 -> 1.489 ( +0.27%) [ +0.27% +0.27% +0.00% / +0.27% +0.61% +0.81%] index_select strided 7 : Elapsed 0.015 ms (1.489 ms / 100) 1.498 -> 1.501 ( +0.20%) [ +0.00% +0.07% +0.20% / +0.20% +0.73% +0.67%] index_select strided 8 : Elapsed 0.015 ms (1.498 ms / 100) 1.493 -> 1.494 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.80% +0.60%] index_select strided 16 : Elapsed 0.015 ms (1.493 ms / 100) 1.508 -> 1.509 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.46% +0.46%] index_select random : Elapsed 0.015 ms (1.509 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.94% +0.67%] index_select random_sorted : Elapsed 0.015 ms (1.495 ms / 100) 1.515 -> 1.516 ( +0.07%) [ +0.20% +0.13% +0.00% / +0.07% +0.73% +0.59%] index_select perm : Elapsed 0.015 ms (1.518 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.67% +0.60%] index_select perm_sorted : Elapsed 0.015 ms (1.493 ms / 100) B = [16, 5, 4, 20] (stride (100, 1, 1600, 5)) A = [16, 40, 4, 20] (stride (3200, 1, 40, 160)) dim = 1 1.611 -> 1.611 ( +0.00%) [ +0.06% +0.00% +0.43% / +0.00% +0.56% +0.56%] index_select const : Elapsed 0.016 ms (1.612 ms / 100) 1.610 -> 1.611 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.56% +0.56%] index_select wrap : Elapsed 0.016 ms (1.611 ms / 100) 1.612 -> 1.612 ( +0.00%) [ +0.19% +0.00% +0.06% / +0.00% +0.62% +0.56%] index_select linear : Elapsed 0.016 ms (1.615 ms / 100) 1.610 -> 1.612 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.12% +0.81% +0.75%] index_select reverse : Elapsed 0.016 ms (1.610 ms / 100) 1.610 -> 1.610 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.62% +0.68%] index_select skip64 : Elapsed 0.016 ms (1.611 ms / 100) 1.609 -> 1.610 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.75% +0.62%] index_select skip256 : Elapsed 0.016 ms (1.610 ms / 100) 1.602 -> 1.602 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.56%] index_select spread : Elapsed 0.016 ms (1.602 ms / 100) 1.605 -> 1.606 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.75% +0.81%] index_select strided 3 : Elapsed 0.016 ms (1.606 ms / 100) 1.607 -> 1.609 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.50% +0.68%] index_select strided 5 : Elapsed 0.016 ms (1.609 ms / 100) 1.597 -> 1.602 ( +0.31%) [ +0.25% +0.19% +0.00% / +0.31% +0.88% +0.75%] index_select strided 7 : Elapsed 0.016 ms (1.601 ms / 100) 1.606 -> 1.610 ( +0.25%) [ +0.06% +0.12% +0.00% / +0.25% +0.81% +0.81%] index_select strided 8 : Elapsed 0.016 ms (1.607 ms / 100) 1.603 -> 1.605 ( +0.12%) [ +0.25% +0.00% +0.06% / +0.12% +0.81% +0.75%] index_select strided 16 : Elapsed 0.016 ms (1.607 ms / 100) 1.606 -> 1.608 ( +0.12%) [ +0.00% +0.06% +0.00% / +0.12% +0.68% +0.87%] index_select random : Elapsed 0.016 ms (1.606 ms / 100) 1.602 -> 1.603 ( +0.06%) [ +0.12% +0.00% +0.00% / +0.06% +0.75% +0.69%] index_select random_sorted : Elapsed 0.016 ms (1.604 ms / 100) 1.606 -> 1.605 ( -0.06%) [ +0.00% +0.12% +0.12% / -0.06% +0.93% +0.75%] index_select perm : Elapsed 0.016 ms (1.606 ms / 100) 1.599 -> 1.605 ( +0.38%) [ +0.44% +0.81% +0.00% / +0.38% +0.94% +1.00%] index_select perm_sorted : Elapsed 0.016 ms (1.606 ms / 100) B = [16, 5, 4, 20] (stride (100, 1, 1600, 5)) A = [16, 40, 4, 20] (stride (1, 1280, 16, 64)) dim = 1 1.592 -> 1.589 ( -0.19%) [ +0.19% +0.50% +0.00% / -0.19% +1.07% +0.88%] index_select const : Elapsed 0.016 ms (1.595 ms / 100) 1.574 -> 1.575 ( +0.06%) [ +0.06% +0.00% +0.13% / +0.06% +0.38% +0.32%] index_select wrap : Elapsed 0.016 ms (1.575 ms / 100) 1.574 -> 1.576 ( +0.13%) [ +0.00% +0.32% +0.13% / +0.32% +0.13% +0.32%] index_select linear : Elapsed 0.016 ms (1.574 ms / 100) 1.573 -> 1.576 ( +0.19%) [ +0.00% +0.13% +0.25% / +0.32% +0.19% +0.25%] index_select reverse : Elapsed 0.016 ms (1.573 ms / 100) 1.571 -> 1.576 ( +0.32%) [ +0.45% +0.45% +0.00% / +0.45% +0.45% +0.32%] index_select skip64 : Elapsed 0.016 ms (1.578 ms / 100) 1.587 -> 1.598 ( +0.69%) [ +0.69% +0.00% +0.82% / +0.69% +1.26% +0.82%] index_select skip256 : Elapsed 0.016 ms (1.598 ms / 100) 1.585 -> 1.594 ( +0.57%) [ +1.07% +0.57% +0.00% / +0.57% +1.07% +1.51%] index_select spread : Elapsed 0.016 ms (1.602 ms / 100) 1.574 -> 1.576 ( +0.13%) [ +0.32% +0.32% +0.00% / +0.38% +0.13% +0.25%] index_select strided 3 : Elapsed 0.016 ms (1.579 ms / 100) 1.577 -> 1.576 ( -0.06%) [ +0.19% +0.06% +0.00% / +0.13% +0.19% -0.06%] index_select strided 5 : Elapsed 0.016 ms (1.580 ms / 100) 1.586 -> 1.592 ( +0.38%) [ +0.00% +1.07% +1.01% / +1.07% +1.58% +0.38%] index_select strided 7 : Elapsed 0.016 ms (1.586 ms / 100) 1.594 -> 1.593 ( -0.06%) [ +0.50% +0.56% +0.00% / -0.06% +0.31% +0.88%] index_select strided 8 : Elapsed 0.016 ms (1.602 ms / 100) 1.573 -> 1.577 ( +0.25%) [ +0.06% +0.45% +0.00% / +0.32% +0.45% +0.25%] index_select strided 16 : Elapsed 0.016 ms (1.574 ms / 100) 1.576 -> 1.574 ( -0.13%) [ +0.00% +0.13% +0.25% / -0.13% +0.00% +0.00%] index_select random : Elapsed 0.016 ms (1.576 ms / 100) 1.586 -> 1.597 ( +0.69%) [ +0.44% +0.06% +0.00% / +0.69% +1.07% +1.07%] index_select random_sorted : Elapsed 0.016 ms (1.593 ms / 100) 1.578 -> 1.584 ( +0.38%) [ +0.44% +0.38% +0.00% / +0.38% +1.14% +0.70%] index_select perm : Elapsed 0.016 ms (1.585 ms / 100) 1.575 -> 1.577 ( +0.13%) [ +0.00% +0.13% +0.00% / +0.25% +0.13% +0.19%] index_select perm_sorted : Elapsed 0.016 ms (1.575 ms / 100) B = [16, 5, 4, 20] (stride (100, 1, 1600, 5)) A = [16, 40, 4, 20] (stride (160, 4, 1, 2560)) dim = 1 1.581 -> 1.585 ( +0.25%) [ +0.25% +0.19% +0.00% / +0.25% +0.76% +0.76%] index_select const : Elapsed 0.016 ms (1.585 ms / 100) 1.583 -> 1.582 ( -0.06%) [ +0.06% +0.13% +0.00% / -0.06% +0.51% +0.44%] index_select wrap : Elapsed 0.016 ms (1.584 ms / 100) 1.582 -> 1.582 ( +0.00%) [ +0.06% +0.63% +0.00% / +0.00% +0.57% +0.63%] index_select linear : Elapsed 0.016 ms (1.583 ms / 100) 1.583 -> 1.583 ( +0.00%) [ +0.00% +0.69% +0.06% / +0.00% +0.63% +0.63%] index_select reverse : Elapsed 0.016 ms (1.583 ms / 100) 1.580 -> 1.581 ( +0.06%) [ +0.13% +0.19% +0.00% / +0.06% +0.63% +0.76%] index_select skip64 : Elapsed 0.016 ms (1.582 ms / 100) 1.581 -> 1.580 ( -0.06%) [ +0.00% +0.25% +0.13% / -0.06% +0.82% +0.82%] index_select skip256 : Elapsed 0.016 ms (1.581 ms / 100) 1.578 -> 1.580 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.76% +0.76%] index_select spread : Elapsed 0.016 ms (1.580 ms / 100) 1.578 -> 1.578 ( +0.00%) [ +0.00% +0.19% +0.13% / +0.00% +0.95% +0.89%] index_select strided 3 : Elapsed 0.016 ms (1.578 ms / 100) 1.575 -> 1.575 ( +0.00%) [ +0.38% +0.38% +0.00% / +0.00% +0.57% +0.51%] index_select strided 5 : Elapsed 0.016 ms (1.581 ms / 100) 1.576 -> 1.578 ( +0.13%) [ +0.06% +0.25% +0.00% / +0.13% +0.76% +0.70%] index_select strided 7 : Elapsed 0.016 ms (1.577 ms / 100) 1.575 -> 1.579 ( +0.25%) [ +0.00% +0.38% +0.25% / +0.25% +0.83% +0.63%] index_select strided 8 : Elapsed 0.016 ms (1.575 ms / 100) 1.575 -> 1.575 ( +0.00%) [ +0.19% +0.00% +0.19% / +0.00% +0.57% +0.57%] index_select strided 16 : Elapsed 0.016 ms (1.578 ms / 100) 1.576 -> 1.578 ( +0.13%) [ +0.32% +0.13% +0.00% / +0.13% +1.02% +0.63%] index_select random : Elapsed 0.016 ms (1.581 ms / 100) 1.573 -> 1.578 ( +0.32%) [ +0.00% +0.45% +0.25% / +0.32% +0.38% +0.32%] index_select random_sorted : Elapsed 0.016 ms (1.573 ms / 100) 1.579 -> 1.577 ( -0.13%) [ +0.00% +0.06% +0.00% / -0.13% +0.06% -0.06%] index_select perm : Elapsed 0.016 ms (1.579 ms / 100) 1.576 -> 1.578 ( +0.13%) [ +0.25% +0.13% +0.00% / +0.13% +0.89% +0.82%] index_select perm_sorted : Elapsed 0.016 ms (1.580 ms / 100) B = [16, 5, 4, 20] (stride (20, 320, 1600, 1)) A = [16, 40, 4, 20] (stride (4, 64, 1, 2560)) dim = 1 1.575 -> 1.574 ( -0.06%) [ +0.00% +0.19% +0.00% / +0.32% +0.13% -0.06%] index_select const : Elapsed 0.016 ms (1.575 ms / 100) 1.574 -> 1.579 ( +0.32%) [ +0.25% +0.13% +0.00% / +0.32% +0.38% +0.38%] index_select wrap : Elapsed 0.016 ms (1.578 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.00% +0.19% +0.32% / +0.32% +0.32% +0.13%] index_select linear : Elapsed 0.016 ms (1.573 ms / 100) 1.568 -> 1.573 ( +0.32%) [ +0.57% +0.32% +0.00% / +0.45% +0.64% +0.32%] index_select reverse : Elapsed 0.016 ms (1.577 ms / 100) 1.574 -> 1.574 ( +0.00%) [ +0.32% +0.13% +0.00% / +0.00% +0.25% +0.25%] index_select skip64 : Elapsed 0.016 ms (1.579 ms / 100) 1.569 -> 1.574 ( +0.32%) [ +0.38% +0.00% +0.32% / +0.70% +0.32% +0.64%] index_select skip256 : Elapsed 0.016 ms (1.575 ms / 100) 1.565 -> 1.560 ( -0.32%) [ +0.38% +0.00% +0.26% / -0.32% +0.83% +0.64%] index_select spread : Elapsed 0.016 ms (1.571 ms / 100) 1.558 -> 1.570 ( +0.77%) [ +0.13% +0.39% +0.00% / +1.41% +1.16% +0.77%] index_select strided 3 : Elapsed 0.016 ms (1.560 ms / 100) 1.574 -> 1.577 ( +0.19%) [ +0.38% +0.00% +0.32% / +0.19% +0.32% +0.38%] index_select strided 5 : Elapsed 0.016 ms (1.580 ms / 100) 1.558 -> 1.563 ( +0.32%) [ +0.39% +0.13% +0.00% / +0.32% +0.90% +1.16%] index_select strided 7 : Elapsed 0.016 ms (1.564 ms / 100) 1.559 -> 1.570 ( +0.71%) [ +0.51% +0.71% +0.00% / +0.71% +0.71% +1.22%] index_select strided 8 : Elapsed 0.016 ms (1.567 ms / 100) 1.569 -> 1.572 ( +0.19%) [ +0.38% +0.00% +0.45% / +0.19% +0.70% +0.25%] index_select strided 16 : Elapsed 0.016 ms (1.575 ms / 100) 1.576 -> 1.575 ( -0.06%) [ +0.00% +0.13% +0.13% / -0.06% +0.06% +0.00%] index_select random : Elapsed 0.016 ms (1.576 ms / 100) 1.576 -> 1.577 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.82% +0.76%] index_select random_sorted : Elapsed 0.016 ms (1.577 ms / 100) 1.576 -> 1.573 ( -0.19%) [ +0.00% +0.25% +0.06% / -0.19% +0.32% +0.00%] index_select perm : Elapsed 0.016 ms (1.576 ms / 100) 1.568 -> 1.559 ( -0.57%) [ +0.77% +0.00% +0.57% / -0.57% +0.77% +0.70%] index_select perm_sorted : Elapsed 0.016 ms (1.580 ms / 100) B = [16, 5, 4, 20] (stride (20, 1, 5, 320)) A = [16, 40, 4, 20] (stride (80, 1280, 20, 1)) dim = 1 1.376 -> 1.377 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.58% +0.44%] index_select const : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.65% +0.73%] index_select wrap : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.73% +0.73%] index_select linear : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.73% +0.73%] index_select reverse : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.65% +0.65%] index_select skip64 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.374 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.65% +0.58%] index_select skip256 : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.73% +0.80%] index_select spread : Elapsed 0.014 ms (1.374 ms / 100) 1.375 -> 1.374 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.65% +0.80%] index_select strided 3 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.73% +0.65%] index_select strided 5 : Elapsed 0.014 ms (1.377 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.80% +0.73%] index_select strided 7 : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.73% +1.02%] index_select strided 8 : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.73% +0.66%] index_select strided 16 : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.29% +0.07% +0.00% / +0.00% +0.73% +0.73%] index_select random : Elapsed 0.014 ms (1.378 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.73% +0.73%] index_select random_sorted : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.73% +0.73%] index_select perm : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.73% +0.73%] index_select perm_sorted : Elapsed 0.014 ms (1.375 ms / 100) out_shape = [16, 40, 5, 20] in_shape = [16, 40, 4, 20] idx_dim = 2 B = [16, 40, 5, 20] (stride (4000, 100, 1, 5)) A = [16, 40, 4, 20] (stride (40, 1, 640, 2560)) dim = 2 3.501 -> 3.496 ( -0.14%) [ +0.03% +0.00% +0.06% / -0.03% -0.11% -0.14%] index_add_ linear : Elapsed 0.035 ms (3.502 ms / 100) 3.475 -> 3.478 ( +0.09%) [ +0.20% +0.29% +0.00% / +0.17% +0.09% +0.17%] index_copy_ linear : Elapsed 0.035 ms (3.482 ms / 100) 3.499 -> 3.489 ( -0.29%) [ +0.11% +0.14% +0.00% / +0.03% -0.29% -0.14%] index_add_ reverse : Elapsed 0.035 ms (3.503 ms / 100) 3.471 -> 3.473 ( +0.06%) [ +0.00% +0.32% +0.23% / +0.17% +0.26% +0.06%] index_copy_ reverse : Elapsed 0.035 ms (3.471 ms / 100) 3.499 -> 3.494 ( -0.14%) [ +0.00% +0.03% +0.11% / +0.06% +0.00% -0.14%] index_add_ spread : Elapsed 0.035 ms (3.499 ms / 100) 3.480 -> 3.479 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.03% +0.00%] index_copy_ spread : Elapsed 0.035 ms (3.481 ms / 100) 3.498 -> 3.487 ( -0.31%) [ +0.00% +0.26% +0.11% / +0.03% -0.31% +0.09%] index_add_ strided 3 : Elapsed 0.035 ms (3.498 ms / 100) 3.474 -> 3.470 ( -0.12%) [ +0.20% +0.00% +0.14% / +0.06% +0.46% -0.12%] index_copy_ strided 3 : Elapsed 0.035 ms (3.481 ms / 100) 3.499 -> 3.489 ( -0.29%) [ +0.11% +0.00% +0.09% / +0.11% -0.29% -0.06%] index_add_ perm : Elapsed 0.035 ms (3.503 ms / 100) 3.479 -> 3.477 ( -0.06%) [ +0.14% +0.00% +0.09% / -0.03% -0.06% +0.09%] index_copy_ perm : Elapsed 0.035 ms (3.484 ms / 100) 3.503 -> 3.490 ( -0.37%) [ +0.20% +0.26% +0.00% / +0.14% -0.09% -0.37%] index_add_ perm_sorted : Elapsed 0.035 ms (3.510 ms / 100) 3.485 -> 3.474 ( -0.32%) [ +0.09% +0.23% +0.00% / -0.20% -0.32% -0.23%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.488 ms / 100) 3.447 -> 3.456 ( +0.26%) [ +0.00% +0.15% +0.20% / +0.26% +0.99% +1.16%] index_select const : Elapsed 0.034 ms (3.447 ms / 100) 3.495 -> 3.495 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.03% +0.11% +0.00%] index_select wrap : Elapsed 0.035 ms (3.495 ms / 100) 3.486 -> 3.493 ( +0.20%) [ +0.40% +0.00% +0.23% / +0.20% +0.40% +0.43%] index_select linear : Elapsed 0.035 ms (3.500 ms / 100) 3.476 -> 3.458 ( -0.52%) [ +0.17% +0.06% +0.00% / +0.12% -0.52% -0.40%] index_select reverse : Elapsed 0.035 ms (3.482 ms / 100) 3.437 -> 3.447 ( +0.29%) [ +0.15% +0.09% +0.00% / +0.29% +0.73% +0.76%] index_select skip64 : Elapsed 0.034 ms (3.442 ms / 100) 3.452 -> 3.450 ( -0.06%) [ +0.03% +0.03% +0.00% / -0.06% +0.32% +0.35%] index_select skip256 : Elapsed 0.035 ms (3.453 ms / 100) 3.471 -> 3.470 ( -0.03%) [ +0.00% +0.09% +0.09% / -0.03% +0.06% +0.14%] index_select spread : Elapsed 0.035 ms (3.471 ms / 100) 3.495 -> 3.490 ( -0.14%) [ +0.17% +0.00% +0.14% / +0.26% -0.14% +0.06%] index_select strided 3 : Elapsed 0.035 ms (3.501 ms / 100) 3.474 -> 3.469 ( -0.14%) [ +0.03% +0.00% +0.14% / -0.09% +0.00% -0.14%] index_select random : Elapsed 0.035 ms (3.475 ms / 100) 3.441 -> 3.436 ( -0.15%) [ +0.09% +0.03% +0.00% / +0.20% -0.15% +0.17%] index_select random_sorted : Elapsed 0.034 ms (3.444 ms / 100) B = [16, 40, 5, 20] (stride (4000, 1, 40, 200)) A = [16, 40, 4, 20] (stride (160, 1, 40, 2560)) dim = 2 5.950 -> 5.960 ( +0.17%) [ +0.00% +0.07% +0.12% / +0.17% +0.22% +0.18%] index_add_ linear : Elapsed 0.059 ms (5.950 ms / 100) 5.852 -> 5.857 ( +0.09%) [ +0.00% +0.26% +0.14% / +0.26% +0.12% +0.09%] index_copy_ linear : Elapsed 0.059 ms (5.852 ms / 100) 5.946 -> 5.947 ( +0.02%) [ +0.00% +0.02% +0.10% / +0.02% +0.07% +0.15%] index_add_ reverse : Elapsed 0.059 ms (5.946 ms / 100) 5.846 -> 5.850 ( +0.07%) [ +0.00% +0.17% +0.17% / +0.09% +0.07% +0.17%] index_copy_ reverse : Elapsed 0.058 ms (5.846 ms / 100) 5.951 -> 5.949 ( -0.03%) [ +0.00% +0.10% +0.00% / -0.03% +0.15% +0.15%] index_add_ spread : Elapsed 0.060 ms (5.951 ms / 100) 5.859 -> 5.855 ( -0.07%) [ +0.05% +0.00% +0.10% / +0.12% -0.07% +0.09%] index_copy_ spread : Elapsed 0.059 ms (5.862 ms / 100) 5.956 -> 5.958 ( +0.03%) [ +0.00% +0.10% +0.30% / +0.03% +0.35% +0.32%] index_add_ strided 3 : Elapsed 0.060 ms (5.956 ms / 100) 5.864 -> 5.867 ( +0.05%) [ +0.00% +0.05% +0.24% / +0.15% +0.17% +0.05%] index_copy_ strided 3 : Elapsed 0.059 ms (5.864 ms / 100) 5.951 -> 5.961 ( +0.17%) [ +0.29% +0.00% +0.30% / +0.17% +0.32% +0.34%] index_add_ perm : Elapsed 0.060 ms (5.968 ms / 100) 5.862 -> 5.865 ( +0.05%) [ +0.07% +0.00% +0.27% / +0.10% +0.09% +0.05%] index_copy_ perm : Elapsed 0.059 ms (5.866 ms / 100) 5.951 -> 5.957 ( +0.10%) [ +0.00% +0.10% +0.13% / +0.10% +0.32% +0.30%] index_add_ perm_sorted : Elapsed 0.060 ms (5.951 ms / 100) 5.864 -> 5.862 ( -0.03%) [ +0.05% +0.00% +0.15% / +0.02% -0.03% +0.03%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.867 ms / 100) 6.157 -> 6.168 ( +0.18%) [ +0.00% +0.18% +0.15% / +0.18% +0.26% +0.23%] index_select const : Elapsed 0.062 ms (6.157 ms / 100) 6.258 -> 6.255 ( -0.05%) [ +0.00% +0.00% +0.00% / +0.10% -0.05% -0.03%] index_select wrap : Elapsed 0.063 ms (6.258 ms / 100) 6.242 -> 6.244 ( +0.03%) [ +0.10% +0.00% +0.02% / +0.22% +0.03% +0.26%] index_select linear : Elapsed 0.062 ms (6.248 ms / 100) 6.238 -> 6.247 ( +0.14%) [ +0.00% +0.05% +0.14% / +0.24% +0.16% +0.14%] index_select reverse : Elapsed 0.062 ms (6.238 ms / 100) 6.161 -> 6.165 ( +0.06%) [ +0.00% +0.02% +0.11% / +0.24% +0.06% +0.08%] index_select skip64 : Elapsed 0.062 ms (6.161 ms / 100) 6.160 -> 6.168 ( +0.13%) [ +0.03% +0.00% +0.03% / +0.13% +0.18% +0.18%] index_select skip256 : Elapsed 0.062 ms (6.162 ms / 100) 6.232 -> 6.233 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.13% +0.16%] index_select spread : Elapsed 0.062 ms (6.232 ms / 100) 6.253 -> 6.253 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.18% +0.03% +0.00%] index_select strided 3 : Elapsed 0.063 ms (6.253 ms / 100) 6.220 -> 6.219 ( -0.02%) [ +0.03% +0.00% +0.21% / +0.26% +0.10% -0.02%] index_select random : Elapsed 0.062 ms (6.222 ms / 100) 6.216 -> 6.222 ( +0.10%) [ +0.00% +0.06% +0.05% / +0.10% +0.11% +0.13%] index_select random_sorted : Elapsed 0.062 ms (6.216 ms / 100) B = [16, 40, 5, 20] (stride (100, 1600, 1, 5)) A = [16, 40, 4, 20] (stride (1, 16, 640, 2560)) dim = 2 6.036 -> 6.033 ( -0.05%) [ +0.08% +0.08% +0.00% / +0.07% -0.03% -0.05%] index_add_ linear : Elapsed 0.060 ms (6.041 ms / 100) 6.039 -> 6.023 ( -0.26%) [ +0.00% +0.02% +0.17% / +0.05% -0.26% -0.15%] index_copy_ linear : Elapsed 0.060 ms (6.039 ms / 100) 6.033 -> 6.028 ( -0.08%) [ +0.15% +0.00% +0.10% / +0.05% +0.02% -0.08%] index_add_ reverse : Elapsed 0.060 ms (6.042 ms / 100) 6.033 -> 6.027 ( -0.10%) [ +0.00% +0.17% +0.08% / +0.28% -0.10% -0.07%] index_copy_ reverse : Elapsed 0.060 ms (6.033 ms / 100) 6.036 -> 6.031 ( -0.08%) [ +0.00% +0.05% +0.05% / +0.07% -0.08% -0.02%] index_add_ spread : Elapsed 0.060 ms (6.036 ms / 100) 6.041 -> 6.032 ( -0.15%) [ +0.02% +0.00% +0.13% / +0.05% -0.15% -0.10%] index_copy_ spread : Elapsed 0.060 ms (6.042 ms / 100) 6.038 -> 6.026 ( -0.20%) [ +0.00% +0.02% +0.00% / +0.00% -0.08% -0.20%] index_add_ strided 3 : Elapsed 0.060 ms (6.038 ms / 100) 6.039 -> 6.032 ( -0.12%) [ +0.00% +0.18% +0.15% / +0.10% -0.08% -0.12%] index_copy_ strided 3 : Elapsed 0.060 ms (6.039 ms / 100) 6.034 -> 6.032 ( -0.03%) [ +0.00% +0.00% +0.02% / +0.20% -0.02% -0.03%] index_add_ perm : Elapsed 0.060 ms (6.034 ms / 100) 6.032 -> 6.023 ( -0.15%) [ +0.15% +0.00% +0.28% / +0.28% -0.15% -0.15%] index_copy_ perm : Elapsed 0.060 ms (6.041 ms / 100) 6.034 -> 6.027 ( -0.12%) [ +0.05% +0.00% +0.13% / +0.15% -0.12% -0.05%] index_add_ perm_sorted : Elapsed 0.060 ms (6.037 ms / 100) 6.036 -> 6.027 ( -0.15%) [ +0.02% +0.00% +0.25% / +0.08% -0.13% -0.15%] index_copy_ perm_sorted : Elapsed 0.060 ms (6.037 ms / 100) 6.281 -> 6.242 ( -0.62%) [ +0.13% +0.00% +0.16% / +0.08% -0.54% -0.62%] index_select const : Elapsed 0.063 ms (6.289 ms / 100) 6.357 -> 6.343 ( -0.22%) [ +0.00% +0.13% +0.06% / +0.19% -0.22% -0.14%] index_select wrap : Elapsed 0.064 ms (6.357 ms / 100) 6.348 -> 6.330 ( -0.28%) [ +0.06% +0.06% +0.00% / +0.08% -0.16% -0.28%] index_select linear : Elapsed 0.064 ms (6.352 ms / 100) 6.351 -> 6.318 ( -0.52%) [ +0.00% +0.08% +0.03% / -0.05% -0.35% -0.52%] index_select reverse : Elapsed 0.064 ms (6.351 ms / 100) 6.285 -> 6.249 ( -0.57%) [ +0.00% +0.00% +0.11% / +0.10% -0.57% -0.46%] index_select skip64 : Elapsed 0.063 ms (6.285 ms / 100) 6.277 -> 6.250 ( -0.43%) [ +0.10% +0.00% +0.30% / +0.14% -0.40% -0.43%] index_select skip256 : Elapsed 0.063 ms (6.283 ms / 100) 6.350 -> 6.328 ( -0.35%) [ +0.00% +0.02% +0.13% / +0.11% -0.30% -0.35%] index_select spread : Elapsed 0.063 ms (6.350 ms / 100) 6.369 -> 6.332 ( -0.58%) [ +0.00% +0.00% +0.02% / +0.13% -0.58% -0.55%] index_select strided 3 : Elapsed 0.064 ms (6.369 ms / 100) 6.318 -> 6.298 ( -0.32%) [ +0.00% +0.00% +0.16% / +0.13% -0.27% -0.32%] index_select random : Elapsed 0.063 ms (6.318 ms / 100) 6.299 -> 6.305 ( +0.10%) [ +0.08% +0.02% +0.00% / +0.10% +0.43% +0.30%] index_select random_sorted : Elapsed 0.063 ms (6.304 ms / 100) B = [16, 40, 5, 20] (stride (1, 320, 12800, 16)) A = [16, 40, 4, 20] (stride (1, 16, 640, 2560)) dim = 2 5.813 -> 5.806 ( -0.12%) [ +0.09% +0.12% +0.00% / +0.07% +0.02% -0.12%] index_add_ linear : Elapsed 0.058 ms (5.818 ms / 100) 5.771 -> 5.761 ( -0.17%) [ +0.00% +0.42% +0.10% / +0.12% -0.17% -0.16%] index_copy_ linear : Elapsed 0.058 ms (5.771 ms / 100) 5.799 -> 5.804 ( +0.09%) [ +0.05% +0.00% +0.10% / +0.09% +0.36% +0.31%] index_add_ reverse : Elapsed 0.058 ms (5.802 ms / 100) 5.750 -> 5.763 ( +0.23%) [ +0.17% +0.00% +0.28% / +0.23% +0.38% +0.40%] index_copy_ reverse : Elapsed 0.058 ms (5.760 ms / 100) 5.804 -> 5.809 ( +0.09%) [ +0.00% +0.22% +0.21% / +0.19% +0.09% +0.21%] index_add_ spread : Elapsed 0.058 ms (5.804 ms / 100) 5.762 -> 5.757 ( -0.09%) [ +0.10% +0.05% +0.00% / +0.21% -0.09% +0.16%] index_copy_ spread : Elapsed 0.058 ms (5.768 ms / 100) 5.810 -> 5.817 ( +0.12%) [ +0.05% +0.00% +0.09% / +0.12% +0.29% +0.45%] index_add_ strided 3 : Elapsed 0.058 ms (5.813 ms / 100) 5.764 -> 5.771 ( +0.12%) [ +0.00% +0.09% +0.03% / +0.12% +0.40% +0.36%] index_copy_ strided 3 : Elapsed 0.058 ms (5.764 ms / 100) 5.795 -> 5.805 ( +0.17%) [ +0.00% +0.17% +0.02% / +0.38% +0.28% +0.17%] index_add_ perm : Elapsed 0.058 ms (5.795 ms / 100) 5.756 -> 5.764 ( +0.14%) [ +0.00% +0.12% +0.19% / +0.31% +0.14% +0.42%] index_copy_ perm : Elapsed 0.058 ms (5.756 ms / 100) 5.798 -> 5.798 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.00% +0.26% +0.14%] index_add_ perm_sorted : Elapsed 0.058 ms (5.806 ms / 100) 5.756 -> 5.759 ( +0.05%) [ +0.00% +0.05% +0.14% / +0.05% +0.17% +0.14%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.756 ms / 100) 6.100 -> 6.074 ( -0.43%) [ +0.07% +0.00% +0.05% / +0.25% -0.28% -0.43%] index_select const : Elapsed 0.061 ms (6.104 ms / 100) 6.159 -> 6.148 ( -0.18%) [ +0.00% +0.00% +0.02% / +0.11% -0.16% -0.18%] index_select wrap : Elapsed 0.062 ms (6.159 ms / 100) 6.141 -> 6.133 ( -0.13%) [ +0.00% +0.07% +0.03% / +0.07% -0.11% -0.13%] index_select linear : Elapsed 0.061 ms (6.141 ms / 100) 6.145 -> 6.139 ( -0.10%) [ +0.00% +0.08% +0.11% / +0.07% -0.03% -0.10%] index_select reverse : Elapsed 0.061 ms (6.145 ms / 100) 6.107 -> 6.078 ( -0.47%) [ +0.00% +0.02% +0.02% / -0.05% -0.41% -0.47%] index_select skip64 : Elapsed 0.061 ms (6.107 ms / 100) 6.101 -> 6.080 ( -0.34%) [ +0.00% +0.00% +0.13% / +0.21% -0.26% -0.34%] index_select skip256 : Elapsed 0.061 ms (6.101 ms / 100) 6.148 -> 6.149 ( +0.02%) [ +0.00% +0.08% +0.05% / +0.02% +0.08% +0.08%] index_select spread : Elapsed 0.061 ms (6.148 ms / 100) 6.148 -> 6.144 ( -0.07%) [ +0.15% +0.00% +0.18% / +0.18% -0.07% -0.05%] index_select strided 3 : Elapsed 0.062 ms (6.157 ms / 100) 6.115 -> 6.106 ( -0.15%) [ +0.00% +0.08% +0.08% / +0.08% -0.15% -0.13%] index_select random : Elapsed 0.061 ms (6.115 ms / 100) 6.084 -> 6.087 ( +0.05%) [ +0.13% +0.10% +0.00% / +0.13% +0.05% +0.10%] index_select random_sorted : Elapsed 0.061 ms (6.092 ms / 100) B = [16, 40, 5, 20] (stride (200, 5, 1, 3200)) A = [16, 40, 4, 20] (stride (1, 1280, 16, 64)) dim = 2 5.818 -> 5.797 ( -0.36%) [ +0.05% +0.00% +0.19% / +0.09% -0.36% -0.34%] index_add_ linear : Elapsed 0.058 ms (5.821 ms / 100) 5.789 -> 5.769 ( -0.35%) [ +0.09% +0.00% +0.33% / +0.10% -0.35% -0.35%] index_copy_ linear : Elapsed 0.058 ms (5.794 ms / 100) 5.817 -> 5.803 ( -0.24%) [ +0.00% +0.02% +0.15% / +0.05% -0.24% -0.19%] index_add_ reverse : Elapsed 0.058 ms (5.817 ms / 100) 5.784 -> 5.759 ( -0.43%) [ +0.03% +0.00% +0.26% / +0.17% -0.21% -0.43%] index_copy_ reverse : Elapsed 0.058 ms (5.786 ms / 100) 5.816 -> 5.798 ( -0.31%) [ +0.05% +0.05% +0.00% / +0.15% -0.31% -0.22%] index_add_ spread : Elapsed 0.058 ms (5.819 ms / 100) 5.790 -> 5.768 ( -0.38%) [ +0.02% +0.00% +0.10% / +0.07% -0.38% -0.36%] index_copy_ spread : Elapsed 0.058 ms (5.791 ms / 100) 5.816 -> 5.795 ( -0.36%) [ +0.10% +0.00% +0.12% / +0.00% -0.36% -0.36%] index_add_ strided 3 : Elapsed 0.058 ms (5.822 ms / 100) 5.792 -> 5.766 ( -0.45%) [ +0.07% +0.09% +0.00% / -0.07% -0.45% -0.43%] index_copy_ strided 3 : Elapsed 0.058 ms (5.796 ms / 100) 5.818 -> 5.800 ( -0.31%) [ +0.03% +0.21% +0.00% / +0.02% -0.31% -0.28%] index_add_ perm : Elapsed 0.058 ms (5.820 ms / 100) 5.789 -> 5.756 ( -0.57%) [ +0.07% +0.00% +0.07% / +0.10% -0.57% -0.36%] index_copy_ perm : Elapsed 0.058 ms (5.793 ms / 100) 5.816 -> 5.803 ( -0.22%) [ +0.00% +0.00% +0.10% / +0.09% -0.22% -0.22%] index_add_ perm_sorted : Elapsed 0.058 ms (5.816 ms / 100) 5.789 -> 5.763 ( -0.45%) [ +0.00% +0.07% +0.07% / +0.14% -0.45% -0.41%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.789 ms / 100) 6.006 -> 5.982 ( -0.40%) [ +0.00% +0.08% +0.12% / +0.02% -0.40% -0.33%] index_select const : Elapsed 0.060 ms (6.006 ms / 100) 6.062 -> 6.035 ( -0.45%) [ +0.00% +0.05% +0.08% / -0.03% -0.45% -0.43%] index_select wrap : Elapsed 0.061 ms (6.062 ms / 100) 6.050 -> 6.021 ( -0.48%) [ +0.02% +0.00% +0.03% / +0.05% -0.43% -0.48%] index_select linear : Elapsed 0.061 ms (6.051 ms / 100) 6.042 -> 6.014 ( -0.46%) [ +0.00% +0.10% +0.17% / +0.08% -0.46% -0.40%] index_select reverse : Elapsed 0.060 ms (6.042 ms / 100) 6.004 -> 5.986 ( -0.30%) [ +0.00% +0.02% +0.03% / +0.12% -0.30% -0.25%] index_select skip64 : Elapsed 0.060 ms (6.004 ms / 100) 6.004 -> 5.983 ( -0.35%) [ +0.02% +0.00% +0.08% / +0.05% -0.25% -0.35%] index_select skip256 : Elapsed 0.060 ms (6.005 ms / 100) 6.038 -> 6.014 ( -0.40%) [ +0.12% +0.02% +0.00% / +0.12% -0.40% -0.30%] index_select spread : Elapsed 0.060 ms (6.045 ms / 100) 6.057 -> 6.028 ( -0.48%) [ +0.00% +0.03% +0.08% / +0.07% -0.48% -0.36%] index_select strided 3 : Elapsed 0.061 ms (6.057 ms / 100) 6.053 -> 6.035 ( -0.30%) [ +0.21% +0.12% +0.00% / +0.21% -0.30% -0.23%] index_select random : Elapsed 0.061 ms (6.066 ms / 100) 6.032 -> 6.017 ( -0.25%) [ +0.10% +0.00% +0.23% / +0.20% -0.23% -0.25%] index_select random_sorted : Elapsed 0.060 ms (6.038 ms / 100) out_shape = [16, 40, 4, 5] in_shape = [16, 40, 4, 20] idx_dim = 3 B = [16, 40, 4, 5] (stride (800, 1, 200, 40)) A = [16, 40, 4, 20] (stride (1, 64, 16, 2560)) dim = 3 1.705 -> 1.709 ( +0.23%) [ +0.12% +0.00% +0.18% / +0.23% +0.29% +0.35%] index_select const : Elapsed 0.017 ms (1.707 ms / 100) 1.708 -> 1.708 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.12% +0.18% +0.00%] index_select wrap : Elapsed 0.017 ms (1.711 ms / 100) 1.704 -> 1.702 ( -0.12%) [ +0.00% +0.12% +0.06% / -0.12% +0.29% +0.12%] index_select linear : Elapsed 0.017 ms (1.704 ms / 100) 1.707 -> 1.708 ( +0.06%) [ +0.23% +0.18% +0.00% / +0.06% +0.06% +0.35%] index_select reverse : Elapsed 0.017 ms (1.711 ms / 100) 1.702 -> 1.707 ( +0.29%) [ +0.29% +0.24% +0.00% / +0.29% +0.35% +0.29%] index_select skip64 : Elapsed 0.017 ms (1.707 ms / 100) 1.705 -> 1.705 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.12% +0.12%] index_select skip256 : Elapsed 0.017 ms (1.705 ms / 100) 1.711 -> 1.706 ( -0.29%) [ +0.00% +0.12% +0.12% / -0.29% +0.12% +0.06%] index_select spread : Elapsed 0.017 ms (1.711 ms / 100) 1.704 -> 1.707 ( +0.18%) [ +0.23% +0.23% +0.00% / +0.18% +0.41% +0.88%] index_select strided 3 : Elapsed 0.017 ms (1.708 ms / 100) 1.707 -> 1.706 ( -0.06%) [ +0.00% +0.23% +0.00% / -0.06% +0.41% +0.12%] index_select strided 5 : Elapsed 0.017 ms (1.707 ms / 100) 1.704 -> 1.708 ( +0.23%) [ +0.29% +0.29% +0.00% / +0.23% +0.29% +0.35%] index_select strided 7 : Elapsed 0.017 ms (1.709 ms / 100) 1.710 -> 1.710 ( +0.00%) [ +0.35% +0.00% +0.29% / +0.00% +0.00% +0.29%] index_select strided 8 : Elapsed 0.017 ms (1.716 ms / 100) 1.708 -> 1.707 ( -0.06%) [ +0.35% +0.00% +0.06% / +0.23% -0.06% +0.18%] index_select strided 16 : Elapsed 0.017 ms (1.714 ms / 100) 1.713 -> 1.710 ( -0.18%) [ +0.00% +0.18% +0.00% / -0.06% -0.18% +0.00%] index_select random : Elapsed 0.017 ms (1.713 ms / 100) 1.709 -> 1.711 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.29% +0.35%] index_select random_sorted : Elapsed 0.017 ms (1.711 ms / 100) 1.708 -> 1.710 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.23% +0.12% +0.23%] index_select perm : Elapsed 0.017 ms (1.710 ms / 100) 1.708 -> 1.710 ( +0.12%) [ +0.06% +0.18% +0.00% / +0.12% +0.18% +0.12%] index_select perm_sorted : Elapsed 0.017 ms (1.709 ms / 100) B = [16, 40, 4, 5] (stride (1, 16, 640, 2560)) A = [16, 40, 4, 20] (stride (1, 16, 640, 2560)) dim = 3 1.817 -> 1.816 ( -0.06%) [ +0.00% +0.11% +0.11% / +0.06% +0.00% -0.06%] index_select const : Elapsed 0.018 ms (1.817 ms / 100) 1.810 -> 1.807 ( -0.17%) [ +0.06% +0.00% +0.00% / -0.17% +0.39% +0.22%] index_select wrap : Elapsed 0.018 ms (1.811 ms / 100) 1.811 -> 1.813 ( +0.11%) [ +0.00% +0.00% +0.06% / +0.11% +0.55% +0.50%] index_select linear : Elapsed 0.018 ms (1.811 ms / 100) 1.813 -> 1.818 ( +0.28%) [ +0.00% +0.22% +0.00% / +0.33% +0.50% +0.28%] index_select reverse : Elapsed 0.018 ms (1.813 ms / 100) 1.813 -> 1.818 ( +0.28%) [ +0.00% +0.00% +0.00% / +0.33% +0.39% +0.28%] index_select skip64 : Elapsed 0.018 ms (1.813 ms / 100) 1.810 -> 1.810 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.72% +0.55%] index_select skip256 : Elapsed 0.018 ms (1.811 ms / 100) 1.812 -> 1.811 ( -0.06%) [ +0.17% +0.22% +0.00% / -0.06% +0.55% +0.66%] index_select spread : Elapsed 0.018 ms (1.815 ms / 100) 1.815 -> 1.819 ( +0.22%) [ +0.17% +0.00% +0.06% / +0.22% +0.39% +0.39%] index_select strided 3 : Elapsed 0.018 ms (1.818 ms / 100) 1.817 -> 1.816 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.39% +0.39%] index_select strided 5 : Elapsed 0.018 ms (1.818 ms / 100) 1.811 -> 1.812 ( +0.06%) [ +0.22% +0.11% +0.00% / +0.06% +0.55% +0.72%] index_select strided 7 : Elapsed 0.018 ms (1.815 ms / 100) 1.811 -> 1.814 ( +0.17%) [ +0.17% +0.00% +0.06% / +0.17% +0.83% +0.72%] index_select strided 8 : Elapsed 0.018 ms (1.814 ms / 100) 1.816 -> 1.820 ( +0.22%) [ +0.00% +0.17% +0.00% / +0.22% +0.55% +0.44%] index_select strided 16 : Elapsed 0.018 ms (1.816 ms / 100) 1.818 -> 1.820 ( +0.11%) [ +0.11% +0.33% +0.00% / +0.11% +0.72% +0.88%] index_select random : Elapsed 0.018 ms (1.820 ms / 100) 1.822 -> 1.823 ( +0.05%) [ +0.00% +0.22% +0.00% / +0.05% +0.55% +0.33%] index_select random_sorted : Elapsed 0.018 ms (1.822 ms / 100) 1.811 -> 1.812 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.33% +0.33%] index_select perm : Elapsed 0.018 ms (1.811 ms / 100) 1.814 -> 1.814 ( +0.00%) [ +0.00% +0.22% +0.06% / +0.00% +0.50% +0.55%] index_select perm_sorted : Elapsed 0.018 ms (1.814 ms / 100) out_shape = [5, 40, 20, 4] in_shape = [16, 40, 20, 4] idx_dim = 0 B = [5, 40, 20, 4] (stride (3200, 80, 4, 1)) A = [16, 40, 20, 4] (stride (20, 320, 1, 12800)) dim = 0 2.022 -> 2.023 ( +0.05%) [ +0.30% +0.00% +0.25% / +0.05% +0.40% +0.64%] index_select const : Elapsed 0.020 ms (2.028 ms / 100) 2.081 -> 2.083 ( +0.10%) [ +0.19% +0.24% +0.00% / +0.10% +0.53% +0.58%] index_select wrap : Elapsed 0.021 ms (2.085 ms / 100) 2.081 -> 2.085 ( +0.19%) [ +0.10% +0.05% +0.00% / +0.19% +0.48% +0.91%] index_select linear : Elapsed 0.021 ms (2.083 ms / 100) 2.078 -> 2.084 ( +0.29%) [ +0.00% +0.05% +0.58% / +0.29% +0.82% +0.87%] index_select reverse : Elapsed 0.021 ms (2.078 ms / 100) 2.021 -> 2.022 ( +0.05%) [ +0.25% +0.00% +0.05% / +0.05% +0.59% +0.35%] index_select skip64 : Elapsed 0.020 ms (2.026 ms / 100) 2.027 -> 2.023 ( -0.20%) [ +0.05% +0.05% +0.00% / -0.20% +0.35% +0.25%] index_select skip256 : Elapsed 0.020 ms (2.028 ms / 100) 2.090 -> 2.091 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.19% +0.19%] index_select spread : Elapsed 0.021 ms (2.091 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.10% +0.00% +0.05% / +0.05% +0.24% +0.43%] index_select strided 3 : Elapsed 0.021 ms (2.091 ms / 100) 2.076 -> 2.084 ( +0.39%) [ +0.14% +0.14% +0.00% / +0.39% +0.77% +0.72%] index_select strided 5 : Elapsed 0.021 ms (2.079 ms / 100) 2.086 -> 2.091 ( +0.24%) [ +0.19% +0.00% +0.00% / +0.24% +0.29% +0.38%] index_select strided 7 : Elapsed 0.021 ms (2.090 ms / 100) 2.032 -> 2.031 ( -0.05%) [ +0.00% +0.05% +0.25% / -0.05% +0.54% +0.64%] index_select strided 8 : Elapsed 0.020 ms (2.032 ms / 100) 2.067 -> 2.076 ( +0.44%) [ +0.29% +0.29% +0.00% / +0.44% +1.02% +0.73%] index_select random : Elapsed 0.021 ms (2.073 ms / 100) 2.076 -> 2.076 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.14% +0.43%] index_select random_sorted : Elapsed 0.021 ms (2.077 ms / 100) 2.089 -> 2.092 ( +0.14%) [ +0.00% +0.38% +0.10% / +0.14% +0.24% +0.29%] index_select perm : Elapsed 0.021 ms (2.089 ms / 100) 2.088 -> 2.092 ( +0.19%) [ +0.10% +0.19% +0.00% / +0.38% +0.34% +0.19%] index_select perm_sorted : Elapsed 0.021 ms (2.090 ms / 100) B = [5, 40, 20, 4] (stride (80, 400, 1, 20)) A = [16, 40, 20, 4] (stride (20, 320, 1, 12800)) dim = 0 2.276 -> 2.277 ( +0.04%) [ +0.09% +0.00% +0.18% / +0.04% +0.22% +0.35%] index_select const : Elapsed 0.023 ms (2.278 ms / 100) 2.330 -> 2.331 ( +0.04%) [ +0.00% +0.17% +0.21% / +0.04% +0.43% +0.39%] index_select wrap : Elapsed 0.023 ms (2.330 ms / 100) 2.334 -> 2.339 ( +0.21%) [ +0.43% +0.00% +0.47% / +0.21% +0.77% +0.51%] index_select linear : Elapsed 0.023 ms (2.344 ms / 100) 2.340 -> 2.344 ( +0.17%) [ +0.04% +0.00% +0.04% / +0.17% +0.21% +0.34%] index_select reverse : Elapsed 0.023 ms (2.341 ms / 100) 2.266 -> 2.266 ( +0.00%) [ +0.22% +0.04% +0.00% / +0.00% +0.62% +0.49%] index_select skip64 : Elapsed 0.023 ms (2.271 ms / 100) 2.274 -> 2.280 ( +0.26%) [ +0.04% +0.00% +0.09% / +0.26% +0.44% +0.57%] index_select skip256 : Elapsed 0.023 ms (2.275 ms / 100) 2.344 -> 2.353 ( +0.38%) [ +0.00% +0.55% +0.17% / +0.60% +0.38% +0.38%] index_select spread : Elapsed 0.023 ms (2.344 ms / 100) 2.342 -> 2.347 ( +0.21%) [ +0.21% +0.00% +0.43% / +0.30% +0.21% +0.34%] index_select strided 3 : Elapsed 0.023 ms (2.347 ms / 100) 2.331 -> 2.334 ( +0.13%) [ +0.00% +0.39% +0.09% / +0.13% +0.34% +0.51%] index_select strided 5 : Elapsed 0.023 ms (2.331 ms / 100) 2.340 -> 2.347 ( +0.30%) [ +0.00% +0.17% +0.00% / +0.43% +0.30% +0.51%] index_select strided 7 : Elapsed 0.023 ms (2.340 ms / 100) 2.291 -> 2.293 ( +0.09%) [ +0.00% +0.04% +0.04% / +0.09% +0.17% +0.48%] index_select strided 8 : Elapsed 0.023 ms (2.291 ms / 100) 2.328 -> 2.332 ( +0.17%) [ +0.09% +0.13% +0.00% / +0.26% +0.17% +0.17%] index_select random : Elapsed 0.023 ms (2.330 ms / 100) 2.321 -> 2.319 ( -0.09%) [ +0.04% +0.00% +0.17% / -0.09% +0.34% +0.30%] index_select random_sorted : Elapsed 0.023 ms (2.322 ms / 100) 2.338 -> 2.334 ( -0.17%) [ +0.00% +0.17% +0.17% / -0.17% +0.30% +0.43%] index_select perm : Elapsed 0.023 ms (2.338 ms / 100) 2.340 -> 2.336 ( -0.17%) [ +0.00% +0.38% +0.00% / +0.43% -0.17% -0.09%] index_select perm_sorted : Elapsed 0.023 ms (2.340 ms / 100) B = [5, 40, 20, 4] (stride (20, 400, 1, 100)) A = [16, 40, 20, 4] (stride (3200, 80, 4, 1)) dim = 0 1.999 -> 2.006 ( +0.35%) [ +0.00% +0.05% +0.00% / +0.35% +0.35% +0.35%] index_select const : Elapsed 0.020 ms (1.999 ms / 100) 2.066 -> 2.063 ( -0.15%) [ +0.00% +0.00% +0.05% / +0.00% -0.15% +0.05%] index_select wrap : Elapsed 0.021 ms (2.066 ms / 100) 2.066 -> 2.065 ( -0.05%) [ +0.15% +0.19% +0.00% / +0.19% +0.00% -0.05%] index_select linear : Elapsed 0.021 ms (2.069 ms / 100) 2.062 -> 2.059 ( -0.15%) [ +0.00% +0.05% +0.05% / -0.15% +0.00% -0.05%] index_select reverse : Elapsed 0.021 ms (2.062 ms / 100) 1.998 -> 1.997 ( -0.05%) [ +0.00% +0.20% +0.10% / -0.05% +0.35% +0.45%] index_select skip64 : Elapsed 0.020 ms (1.998 ms / 100) 1.996 -> 2.001 ( +0.25%) [ +0.10% +0.20% +0.00% / +0.25% +0.40% +0.50%] index_select skip256 : Elapsed 0.020 ms (1.998 ms / 100) 2.058 -> 2.066 ( +0.39%) [ +0.19% +0.00% +0.15% / +0.39% +0.53% +0.49%] index_select spread : Elapsed 0.021 ms (2.062 ms / 100) 2.063 -> 2.065 ( +0.10%) [ +0.15% +0.34% +0.00% / +0.10% +0.10% +0.24%] index_select strided 3 : Elapsed 0.021 ms (2.066 ms / 100) 2.046 -> 2.050 ( +0.20%) [ +0.20% +0.15% +0.00% / +0.20% +0.88% +0.93%] index_select strided 5 : Elapsed 0.021 ms (2.050 ms / 100) 2.069 -> 2.070 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.39% +0.53%] index_select strided 7 : Elapsed 0.021 ms (2.070 ms / 100) 2.010 -> 2.016 ( +0.30%) [ +0.15% +0.10% +0.00% / +0.30% +0.60% +0.80%] index_select strided 8 : Elapsed 0.020 ms (2.013 ms / 100) 2.032 -> 2.035 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.74% +0.59%] index_select random : Elapsed 0.020 ms (2.035 ms / 100) 2.037 -> 2.042 ( +0.25%) [ +0.25% +0.15% +0.00% / +0.25% +0.98% +0.69%] index_select random_sorted : Elapsed 0.020 ms (2.042 ms / 100) 2.050 -> 2.051 ( +0.05%) [ +0.15% +0.00% +0.15% / +0.05% +0.54% +0.54%] index_select perm : Elapsed 0.021 ms (2.053 ms / 100) 2.045 -> 2.049 ( +0.20%) [ +0.00% +0.34% +0.20% / +0.24% +0.34% +0.20%] index_select perm_sorted : Elapsed 0.020 ms (2.045 ms / 100) B = [5, 40, 20, 4] (stride (160, 1, 800, 40)) A = [16, 40, 20, 4] (stride (1, 320, 16, 12800)) dim = 0 2.283 -> 2.282 ( -0.04%) [ +0.00% +0.00% +0.04% / +0.09% -0.04% +0.13%] index_select const : Elapsed 0.023 ms (2.283 ms / 100) 2.270 -> 2.270 ( +0.00%) [ +0.22% +0.26% +0.00% / +0.00% +0.22% +0.31%] index_select wrap : Elapsed 0.023 ms (2.275 ms / 100) 2.273 -> 2.275 ( +0.09%) [ +0.00% +0.00% +0.13% / +0.09% +0.22% +0.22%] index_select linear : Elapsed 0.023 ms (2.273 ms / 100) 2.271 -> 2.272 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.26% +0.26%] index_select reverse : Elapsed 0.023 ms (2.271 ms / 100) 2.271 -> 2.271 ( +0.00%) [ +0.26% +0.13% +0.00% / +0.13% +0.04% +0.00%] index_select skip64 : Elapsed 0.023 ms (2.277 ms / 100) 2.281 -> 2.282 ( +0.04%) [ +0.26% +0.26% +0.00% / +0.04% +0.13% +0.22%] index_select skip256 : Elapsed 0.023 ms (2.287 ms / 100) 2.282 -> 2.283 ( +0.04%) [ +0.22% +0.00% +0.09% / +0.18% +0.39% +0.04%] index_select spread : Elapsed 0.023 ms (2.287 ms / 100) 2.288 -> 2.287 ( -0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.17% -0.04%] index_select strided 3 : Elapsed 0.023 ms (2.288 ms / 100) 2.288 -> 2.286 ( -0.09%) [ +0.00% +0.04% +0.00% / +0.09% -0.09% +0.17%] index_select strided 5 : Elapsed 0.023 ms (2.288 ms / 100) 2.287 -> 2.284 ( -0.13%) [ +0.00% +0.22% +0.09% / +0.13% -0.04% -0.13%] index_select strided 7 : Elapsed 0.023 ms (2.287 ms / 100) 2.302 -> 2.300 ( -0.09%) [ +0.09% +0.13% +0.00% / +0.39% -0.09% +0.04%] index_select strided 8 : Elapsed 0.023 ms (2.304 ms / 100) 2.284 -> 2.286 ( +0.09%) [ +0.13% +0.00% +0.18% / +0.09% +0.35% +0.22%] index_select random : Elapsed 0.023 ms (2.287 ms / 100) 2.282 -> 2.281 ( -0.04%) [ +0.44% +0.35% +0.00% / +0.31% -0.04% +0.09%] index_select random_sorted : Elapsed 0.023 ms (2.292 ms / 100) 2.286 -> 2.289 ( +0.13%) [ +0.17% +0.00% +0.22% / +0.13% +0.17% +0.17%] index_select perm : Elapsed 0.023 ms (2.290 ms / 100) 2.287 -> 2.287 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.04% +0.04%] index_select perm_sorted : Elapsed 0.023 ms (2.289 ms / 100) B = [5, 40, 20, 4] (stride (4, 20, 800, 1)) dim = 0 fill_cnt = 16 2.343 -> 2.351 ( +0.34%) [ +0.51% +0.00% +0.13% / +0.34% +0.77% +0.77%] index_fill_ const : Elapsed 0.024 ms (2.355 ms / 100) 2.354 -> 2.352 ( -0.08%) [ +0.04% +0.00% +0.17% / -0.08% +0.68% +0.81%] index_fill_ linear : Elapsed 0.024 ms (2.355 ms / 100) 2.353 -> 2.360 ( +0.30%) [ +0.25% +0.13% +0.00% / +0.30% +0.72% +0.85%] index_fill_ reverse : Elapsed 0.024 ms (2.359 ms / 100) 2.347 -> 2.347 ( +0.00%) [ +0.04% +0.00% +0.17% / +0.00% +0.51% +0.64%] index_fill_ skip64 : Elapsed 0.023 ms (2.348 ms / 100) 2.350 -> 2.355 ( +0.21%) [ +0.17% +0.13% +0.00% / +0.21% +0.81% +0.77%] index_fill_ skip256 : Elapsed 0.024 ms (2.354 ms / 100) 2.350 -> 2.347 ( -0.13%) [ +0.04% +0.00% +0.21% / -0.13% +0.68% +0.55%] index_fill_ spread : Elapsed 0.024 ms (2.351 ms / 100) 2.351 -> 2.350 ( -0.04%) [ +0.26% +0.00% +0.17% / -0.04% +0.77% +0.60%] index_fill_ strided 3 : Elapsed 0.024 ms (2.357 ms / 100) 2.356 -> 2.360 ( +0.17%) [ +0.00% +0.17% +0.04% / +0.17% +0.68% +0.76%] index_fill_ random : Elapsed 0.024 ms (2.356 ms / 100) 2.357 -> 2.357 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.47%] index_fill_ random_sorted : Elapsed 0.024 ms (2.359 ms / 100) B = [5, 40, 20, 4] (stride (1, 5, 800, 200)) A = [16, 40, 20, 4] (stride (3200, 1, 160, 40)) dim = 0 0.889 -> 0.892 ( +0.34%) [ +0.56% +0.00% +0.11% / +0.34% +1.24% +1.01%] index_select const : Elapsed 0.009 ms (0.894 ms / 100) 0.890 -> 0.891 ( +0.11%) [ +0.22% +0.11% +0.00% / +0.11% +0.79% +1.01%] index_select wrap : Elapsed 0.009 ms (0.892 ms / 100) 0.892 -> 0.894 ( +0.22%) [ +0.11% +0.00% +0.00% / +0.22% +0.78% +1.12%] index_select linear : Elapsed 0.009 ms (0.893 ms / 100) 0.890 -> 0.887 ( -0.34%) [ +0.00% +0.22% +0.00% / -0.34% +1.35% +1.12%] index_select reverse : Elapsed 0.009 ms (0.890 ms / 100) 0.888 -> 0.891 ( +0.34%) [ +0.79% +0.23% +0.00% / +0.34% +1.13% +1.01%] index_select skip64 : Elapsed 0.009 ms (0.895 ms / 100) 0.891 -> 0.893 ( +0.22%) [ +0.34% +0.11% +0.00% / +0.22% +0.56% +0.90%] index_select skip256 : Elapsed 0.009 ms (0.894 ms / 100) 0.891 -> 0.893 ( +0.22%) [ +0.22% +0.00% +0.00% / +0.22% +0.67% +0.90%] index_select spread : Elapsed 0.009 ms (0.893 ms / 100) 0.892 -> 0.895 ( +0.34%) [ +0.00% +0.11% +0.34% / +0.34% +0.90% +1.01%] index_select strided 3 : Elapsed 0.009 ms (0.892 ms / 100) 0.896 -> 0.897 ( +0.11%) [ +0.22% +0.11% +0.00% / +0.33% +0.11% +0.78%] index_select strided 5 : Elapsed 0.009 ms (0.898 ms / 100) 0.896 -> 0.894 ( -0.22%) [ +0.00% +0.22% +0.22% / +0.22% -0.22% +0.11%] index_select strided 7 : Elapsed 0.009 ms (0.896 ms / 100) 0.897 -> 0.898 ( +0.11%) [ +0.00% +0.56% +0.11% / +0.11% +0.45% +0.11%] index_select strided 8 : Elapsed 0.009 ms (0.897 ms / 100) 0.897 -> 0.899 ( +0.22%) [ +0.00% +0.11% +0.00% / +0.22% +0.67% +0.56%] index_select random : Elapsed 0.009 ms (0.897 ms / 100) 0.888 -> 0.893 ( +0.56%) [ +0.45% +0.34% +0.00% / +0.56% +2.25% +2.25%] index_select random_sorted : Elapsed 0.009 ms (0.892 ms / 100) 0.893 -> 0.899 ( +0.67%) [ +0.11% +0.11% +0.00% / +0.67% +2.13% +1.79%] index_select perm : Elapsed 0.009 ms (0.894 ms / 100) 0.890 -> 0.895 ( +0.56%) [ +0.00% +0.22% +0.45% / +0.56% +2.02% +1.35%] index_select perm_sorted : Elapsed 0.009 ms (0.890 ms / 100) B = [5, 40, 20, 4] (stride (1, 5, 200, 4000)) A = [16, 40, 20, 4] (stride (1, 1280, 16, 320)) dim = 0 2.431 -> 2.424 ( -0.29%) [ +0.00% +0.12% +0.00% / +0.12% +0.25% -0.29%] index_select const : Elapsed 0.024 ms (2.431 ms / 100) 2.425 -> 2.420 ( -0.21%) [ +0.00% +0.16% +0.21% / +0.33% +0.74% -0.21%] index_select wrap : Elapsed 0.024 ms (2.425 ms / 100) 2.416 -> 2.424 ( +0.33%) [ +0.00% +0.41% +0.29% / +0.33% +0.91% +0.41%] index_select linear : Elapsed 0.024 ms (2.416 ms / 100) 2.418 -> 2.410 ( -0.33%) [ +0.17% +0.08% +0.00% / +0.21% -0.33% -0.17%] index_select reverse : Elapsed 0.024 ms (2.422 ms / 100) 2.424 -> 2.424 ( +0.00%) [ +0.00% +0.50% +0.58% / +0.00% +0.45% +0.58%] index_select skip64 : Elapsed 0.024 ms (2.424 ms / 100) 2.423 -> 2.422 ( -0.04%) [ +0.50% +0.50% +0.00% / +0.58% +0.66% -0.04%] index_select skip256 : Elapsed 0.024 ms (2.435 ms / 100) 2.434 -> 2.429 ( -0.21%) [ +0.00% +0.58% +0.41% / +0.49% -0.21% +0.70%] index_select spread : Elapsed 0.024 ms (2.434 ms / 100) 2.439 -> 2.444 ( +0.21%) [ +0.00% +0.66% +0.00% / +0.49% +0.25% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.444 -> 2.437 ( -0.29%) [ +0.00% +0.08% +0.20% / -0.12% -0.08% -0.29%] index_select strided 5 : Elapsed 0.024 ms (2.444 ms / 100) 2.441 -> 2.426 ( -0.61%) [ +0.00% +0.12% +0.20% / +0.00% -0.49% -0.61%] index_select strided 7 : Elapsed 0.024 ms (2.441 ms / 100) 2.446 -> 2.442 ( -0.16%) [ +0.25% +0.00% +0.37% / +0.37% -0.16% +0.20%] index_select strided 8 : Elapsed 0.025 ms (2.452 ms / 100) 2.442 -> 2.439 ( -0.12%) [ +0.16% +0.08% +0.00% / -0.12% +0.12% +0.04%] index_select random : Elapsed 0.024 ms (2.446 ms / 100) 2.435 -> 2.429 ( -0.25%) [ +0.00% +0.00% +0.25% / +0.33% -0.25% +0.16%] index_select random_sorted : Elapsed 0.024 ms (2.435 ms / 100) 2.439 -> 2.439 ( +0.00%) [ +0.16% +0.37% +0.00% / +0.45% +0.78% +0.00%] index_select perm : Elapsed 0.024 ms (2.443 ms / 100) 2.444 -> 2.445 ( +0.04%) [ +0.33% +0.49% +0.00% / +0.20% +0.04% +0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.452 ms / 100) out_shape = [16, 5, 20, 4] in_shape = [16, 40, 20, 4] idx_dim = 1 B = [16, 5, 20, 4] (stride (1, 1280, 64, 16)) A = [16, 40, 20, 4] (stride (800, 1, 40, 12800)) dim = 1 1.499 -> 1.501 ( +0.13%) [ +0.20% +0.13% +0.00% / +0.13% +0.40% +0.40%] index_select const : Elapsed 0.015 ms (1.502 ms / 100) 1.498 -> 1.502 ( +0.27%) [ +0.13% +0.27% +0.00% / +0.27% +0.73% +0.67%] index_select wrap : Elapsed 0.015 ms (1.500 ms / 100) 1.499 -> 1.500 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.47% +0.60%] index_select linear : Elapsed 0.015 ms (1.501 ms / 100) 1.499 -> 1.501 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.67% +0.60%] index_select reverse : Elapsed 0.015 ms (1.500 ms / 100) 1.496 -> 1.499 ( +0.20%) [ +0.20% +0.40% +0.00% / +0.20% +0.74% +0.80%] index_select skip64 : Elapsed 0.015 ms (1.499 ms / 100) 1.499 -> 1.501 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.60% +0.47%] index_select skip256 : Elapsed 0.015 ms (1.501 ms / 100) 1.497 -> 1.498 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.27% +0.33%] index_select spread : Elapsed 0.015 ms (1.497 ms / 100) 1.492 -> 1.493 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +1.21% +0.60%] index_select strided 3 : Elapsed 0.015 ms (1.493 ms / 100) 1.483 -> 1.487 ( +0.27%) [ +0.13% +0.27% +0.00% / +0.27% +0.74% +0.67%] index_select strided 5 : Elapsed 0.015 ms (1.485 ms / 100) 1.489 -> 1.491 ( +0.13%) [ +0.34% +0.27% +0.00% / +0.13% +0.74% +0.60%] index_select strided 7 : Elapsed 0.015 ms (1.494 ms / 100) 1.498 -> 1.497 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.40% +0.33%] index_select strided 8 : Elapsed 0.015 ms (1.498 ms / 100) 1.482 -> 1.482 ( +0.00%) [ +0.34% +0.20% +0.00% / +0.00% +0.67% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.487 ms / 100) 1.490 -> 1.493 ( +0.20%) [ +0.00% +0.13% +0.07% / +0.20% +0.54% +0.67%] index_select random : Elapsed 0.015 ms (1.490 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.74% +0.33%] index_select random_sorted : Elapsed 0.015 ms (1.494 ms / 100) 1.495 -> 1.495 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.67% +0.54%] index_select perm : Elapsed 0.015 ms (1.495 ms / 100) 1.494 -> 1.495 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.60% +0.54%] index_select perm_sorted : Elapsed 0.015 ms (1.495 ms / 100) B = [16, 5, 20, 4] (stride (20, 1280, 1, 320)) A = [16, 40, 20, 4] (stride (1, 1280, 16, 320)) dim = 1 1.533 -> 1.532 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.78% +0.65%] index_select const : Elapsed 0.015 ms (1.534 ms / 100) 1.534 -> 1.535 ( +0.07%) [ +0.20% +0.13% +0.00% / +0.07% +0.91% +0.59%] index_select wrap : Elapsed 0.015 ms (1.537 ms / 100) 1.539 -> 1.538 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.97% +0.65%] index_select linear : Elapsed 0.015 ms (1.540 ms / 100) 1.536 -> 1.539 ( +0.20%) [ +0.13% +0.00% +0.13% / +0.20% +1.24% +0.78%] index_select reverse : Elapsed 0.015 ms (1.538 ms / 100) 1.536 -> 1.537 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.91% +0.98%] index_select skip64 : Elapsed 0.015 ms (1.537 ms / 100) 1.531 -> 1.532 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.72% +0.85%] index_select skip256 : Elapsed 0.015 ms (1.533 ms / 100) 1.525 -> 1.525 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.92% +0.59%] index_select spread : Elapsed 0.015 ms (1.526 ms / 100) 1.531 -> 1.532 ( +0.07%) [ +0.39% +0.07% +0.00% / +0.07% +0.78% +0.72%] index_select strided 3 : Elapsed 0.015 ms (1.537 ms / 100) 1.532 -> 1.533 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.72% +0.65%] index_select strided 5 : Elapsed 0.015 ms (1.533 ms / 100) 1.532 -> 1.536 ( +0.26%) [ +0.20% +0.46% +0.00% / +0.26% +0.98% +0.98%] index_select strided 7 : Elapsed 0.015 ms (1.535 ms / 100) 1.525 -> 1.526 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.79%] index_select strided 8 : Elapsed 0.015 ms (1.526 ms / 100) 1.524 -> 1.525 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.79% +0.72%] index_select strided 16 : Elapsed 0.015 ms (1.525 ms / 100) 1.540 -> 1.538 ( -0.13%) [ +0.13% +0.06% +0.00% / -0.13% +0.78% +0.71%] index_select random : Elapsed 0.015 ms (1.542 ms / 100) 1.537 -> 1.539 ( +0.13%) [ +0.13% +0.20% +0.00% / +0.13% +1.89% +1.04%] index_select random_sorted : Elapsed 0.015 ms (1.539 ms / 100) 1.536 -> 1.536 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.04% +0.85%] index_select perm : Elapsed 0.015 ms (1.536 ms / 100) 1.529 -> 1.529 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.78% +0.65%] index_select perm_sorted : Elapsed 0.015 ms (1.529 ms / 100) B = [16, 5, 20, 4] (stride (5, 1, 320, 80)) A = [16, 40, 20, 4] (stride (80, 1280, 4, 1)) dim = 1 0.641 -> 0.643 ( +0.31%) [ +0.47% +0.31% +0.00% / +0.31% +0.47% +0.47%] index_select const : Elapsed 0.006 ms (0.644 ms / 100) 0.643 -> 0.643 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.16% +0.31%] index_select wrap : Elapsed 0.006 ms (0.643 ms / 100) 0.643 -> 0.644 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.62% +0.31% +0.16%] index_select linear : Elapsed 0.006 ms (0.643 ms / 100) 0.642 -> 0.642 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_select reverse : Elapsed 0.006 ms (0.642 ms / 100) 0.640 -> 0.641 ( +0.16%) [ +0.16% +0.31% +0.00% / +0.16% +0.63% +0.63%] index_select skip64 : Elapsed 0.006 ms (0.641 ms / 100) 0.644 -> 0.643 ( -0.16%) [ +0.16% +0.00% +0.62% / -0.16% +0.16% +0.16%] index_select skip256 : Elapsed 0.006 ms (0.645 ms / 100) 0.644 -> 0.644 ( +0.00%) [ +0.16% +0.00% +0.31% / +0.00% +0.31% +0.16%] index_select spread : Elapsed 0.006 ms (0.645 ms / 100) 0.643 -> 0.644 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.47% +2.02%] index_select strided 3 : Elapsed 0.006 ms (0.644 ms / 100) 0.644 -> 0.644 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.31% +0.31%] index_select strided 5 : Elapsed 0.006 ms (0.644 ms / 100) 0.642 -> 0.643 ( +0.16%) [ +0.16% +0.31% +0.00% / +0.16% +0.47% +0.31%] index_select strided 7 : Elapsed 0.006 ms (0.643 ms / 100) 0.642 -> 0.643 ( +0.16%) [ +0.16% +0.31% +0.00% / +0.16% +0.47% +0.47%] index_select strided 8 : Elapsed 0.006 ms (0.643 ms / 100) 0.641 -> 0.643 ( +0.31%) [ +0.00% +0.31% +0.16% / +0.31% +1.72% +0.94%] index_select strided 16 : Elapsed 0.006 ms (0.641 ms / 100) 0.642 -> 0.642 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.62% +0.78%] index_select random : Elapsed 0.006 ms (0.642 ms / 100) 0.642 -> 0.642 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.78% +0.78%] index_select random_sorted : Elapsed 0.006 ms (0.643 ms / 100) 0.641 -> 0.642 ( +0.16%) [ +0.16% +0.31% +0.00% / +0.16% +0.94% +1.09%] index_select perm : Elapsed 0.006 ms (0.642 ms / 100) 0.642 -> 0.641 ( -0.16%) [ +0.16% +0.00% +0.00% / -0.16% +0.78% +0.93%] index_select perm_sorted : Elapsed 0.006 ms (0.643 ms / 100) B = [16, 5, 20, 4] (stride (1, 16, 320, 80)) A = [16, 40, 20, 4] (stride (1, 16, 2560, 640)) dim = 1 0.677 -> 0.677 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.44%] index_select const : Elapsed 0.007 ms (0.677 ms / 100) 0.674 -> 0.675 ( +0.15%) [ +0.00% +0.30% +0.00% / +0.15% +1.04% +1.34%] index_select wrap : Elapsed 0.007 ms (0.674 ms / 100) 0.674 -> 0.673 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.89% +0.89%] index_select linear : Elapsed 0.007 ms (0.674 ms / 100) 0.673 -> 0.673 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +1.04% +1.04%] index_select reverse : Elapsed 0.007 ms (0.674 ms / 100) 0.676 -> 0.677 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +1.18% +0.74%] index_select skip64 : Elapsed 0.007 ms (0.678 ms / 100) 0.677 -> 0.677 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +1.03% +0.89%] index_select skip256 : Elapsed 0.007 ms (0.678 ms / 100) 0.669 -> 0.669 ( +0.00%) [ +0.30% +0.45% +0.00% / +0.00% +1.35% +1.05%] index_select spread : Elapsed 0.007 ms (0.671 ms / 100) 0.678 -> 0.677 ( -0.15%) [ +0.15% +0.29% +0.00% / -0.15% +0.74% +0.59%] index_select strided 3 : Elapsed 0.007 ms (0.679 ms / 100) 0.681 -> 0.682 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +0.59% +0.44%] index_select strided 5 : Elapsed 0.007 ms (0.681 ms / 100) 0.681 -> 0.681 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.73% +0.44%] index_select strided 7 : Elapsed 0.007 ms (0.681 ms / 100) 0.672 -> 0.674 ( +0.30%) [ +0.00% +0.30% +0.15% / +0.30% +1.34% +1.19%] index_select strided 8 : Elapsed 0.007 ms (0.672 ms / 100) 0.672 -> 0.671 ( -0.15%) [ +0.00% +0.89% +0.00% / -0.15% +0.89% +1.04%] index_select strided 16 : Elapsed 0.007 ms (0.672 ms / 100) 0.674 -> 0.675 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.89% +0.89%] index_select random : Elapsed 0.007 ms (0.674 ms / 100) 0.676 -> 0.676 ( +0.00%) [ +0.44% +0.00% +0.00% / +0.00% +0.74% +0.30%] index_select random_sorted : Elapsed 0.007 ms (0.679 ms / 100) 0.670 -> 0.674 ( +0.60%) [ +0.15% +0.15% +0.00% / +0.60% +0.90% +1.04%] index_select perm : Elapsed 0.007 ms (0.671 ms / 100) 0.671 -> 0.672 ( +0.15%) [ +0.60% +0.45% +0.00% / +0.15% +1.19% +1.64%] index_select perm_sorted : Elapsed 0.007 ms (0.675 ms / 100) B = [16, 5, 20, 4] (stride (100, 20, 1, 1600)) A = [16, 40, 20, 4] (stride (3200, 20, 1, 800)) dim = 1 1.481 -> 1.482 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.41% +0.34%] index_select const : Elapsed 0.015 ms (1.481 ms / 100) 1.482 -> 1.482 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.34% +0.34%] index_select wrap : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.47% +0.41%] index_select linear : Elapsed 0.015 ms (1.481 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.47% +0.47%] index_select reverse : Elapsed 0.015 ms (1.482 ms / 100) 1.491 -> 1.492 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.47% +0.54%] index_select skip64 : Elapsed 0.015 ms (1.491 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.47% +0.54%] index_select skip256 : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.481 ( +0.20%) [ +0.20% +0.34% +0.00% / +0.20% +0.54% +0.61%] index_select spread : Elapsed 0.015 ms (1.481 ms / 100) 1.481 -> 1.483 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.47% +0.41%] index_select strided 3 : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.480 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.54% +0.47%] index_select strided 5 : Elapsed 0.015 ms (1.482 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.47% +0.41%] index_select strided 7 : Elapsed 0.015 ms (1.482 ms / 100) 1.478 -> 1.481 ( +0.20%) [ +0.27% +0.14% +0.00% / +0.20% +0.61% +0.61%] index_select strided 8 : Elapsed 0.015 ms (1.482 ms / 100) 1.478 -> 1.480 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.68% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.61% +0.61%] index_select random : Elapsed 0.015 ms (1.480 ms / 100) 1.479 -> 1.480 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.61% +0.54%] index_select random_sorted : Elapsed 0.015 ms (1.481 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.54% +0.54%] index_select perm : Elapsed 0.015 ms (1.480 ms / 100) 1.478 -> 1.480 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.68% +0.61%] index_select perm_sorted : Elapsed 0.015 ms (1.480 ms / 100) B = [16, 5, 20, 4] (stride (5, 1, 80, 1600)) A = [16, 40, 20, 4] (stride (1, 16, 640, 12800)) dim = 1 1.668 -> 1.669 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.60% +0.72%] index_select const : Elapsed 0.017 ms (1.668 ms / 100) 1.636 -> 1.638 ( +0.12%) [ +0.12% +0.18% +0.00% / +0.12% +0.67% +0.67%] index_select wrap : Elapsed 0.016 ms (1.638 ms / 100) 1.639 -> 1.638 ( -0.06%) [ +0.06% +0.12% +0.00% / -0.06% +0.67% +0.61%] index_select linear : Elapsed 0.016 ms (1.640 ms / 100) 1.627 -> 1.627 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.74% +0.49%] index_select reverse : Elapsed 0.016 ms (1.627 ms / 100) 1.667 -> 1.667 ( +0.00%) [ +0.06% +0.00% +0.18% / +0.00% +0.66% +0.78%] index_select skip64 : Elapsed 0.017 ms (1.668 ms / 100) 1.666 -> 1.668 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.12% +0.78% +0.84%] index_select skip256 : Elapsed 0.017 ms (1.668 ms / 100) 1.604 -> 1.606 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.87% +0.87%] index_select spread : Elapsed 0.016 ms (1.606 ms / 100) 1.614 -> 1.614 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.81% +0.74%] index_select strided 3 : Elapsed 0.016 ms (1.615 ms / 100) 1.632 -> 1.633 ( +0.06%) [ +0.25% +0.12% +0.00% / +0.06% +0.80% +0.80%] index_select strided 5 : Elapsed 0.016 ms (1.636 ms / 100) 1.625 -> 1.625 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.74% +0.74%] index_select strided 7 : Elapsed 0.016 ms (1.626 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.68% +0.93%] index_select strided 8 : Elapsed 0.016 ms (1.612 ms / 100) 1.598 -> 1.600 ( +0.13%) [ +0.13% +0.06% +0.00% / +0.13% +0.75% +0.81%] index_select strided 16 : Elapsed 0.016 ms (1.600 ms / 100) 1.651 -> 1.651 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.73% +0.67%] index_select random : Elapsed 0.017 ms (1.652 ms / 100) 1.644 -> 1.645 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.67% +0.67%] index_select random_sorted : Elapsed 0.016 ms (1.645 ms / 100) 1.611 -> 1.612 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.68% +0.62%] index_select perm : Elapsed 0.016 ms (1.612 ms / 100) 1.613 -> 1.615 ( +0.12%) [ +0.00% +0.06% +0.00% / +0.12% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.016 ms (1.613 ms / 100) out_shape = [16, 40, 5, 4] in_shape = [16, 40, 20, 4] idx_dim = 2 B = [16, 40, 5, 4] (stride (20, 320, 1, 5)) A = [16, 40, 20, 4] (stride (3200, 1, 160, 40)) dim = 2 1.778 -> 1.778 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.00% +0.56% +0.67%] index_select const : Elapsed 0.018 ms (1.780 ms / 100) 1.789 -> 1.790 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +1.57% +1.62%] index_select wrap : Elapsed 0.018 ms (1.789 ms / 100) 1.787 -> 1.788 ( +0.06%) [ +0.06% +0.22% +0.00% / +0.06% +1.68% +1.68%] index_select linear : Elapsed 0.018 ms (1.788 ms / 100) 1.793 -> 1.795 ( +0.11%) [ +0.28% +0.17% +0.00% / +0.11% +1.56% +1.45%] index_select reverse : Elapsed 0.018 ms (1.798 ms / 100) 1.778 -> 1.778 ( +0.00%) [ +0.00% +0.28% +0.11% / +0.00% +0.39% +0.51%] index_select skip64 : Elapsed 0.018 ms (1.778 ms / 100) 1.781 -> 1.783 ( +0.11%) [ +0.06% +0.00% +0.00% / +0.11% +0.51% +0.22%] index_select skip256 : Elapsed 0.018 ms (1.782 ms / 100) 1.783 -> 1.787 ( +0.22%) [ +0.17% +0.00% +0.22% / +0.22% +1.23% +1.29%] index_select spread : Elapsed 0.018 ms (1.786 ms / 100) 1.787 -> 1.789 ( +0.11%) [ +0.00% +0.06% +0.00% / +0.11% +0.95% +1.12%] index_select strided 3 : Elapsed 0.018 ms (1.787 ms / 100) 1.785 -> 1.789 ( +0.22%) [ +0.00% +0.00% +0.06% / +0.22% +0.90% +1.18%] index_select strided 5 : Elapsed 0.018 ms (1.785 ms / 100) 1.786 -> 1.787 ( +0.06%) [ +0.17% +0.00% +0.11% / +0.06% +1.40% +1.40%] index_select strided 7 : Elapsed 0.018 ms (1.789 ms / 100) 1.786 -> 1.784 ( -0.11%) [ +0.00% +0.00% +0.06% / -0.11% +1.23% +1.29%] index_select strided 8 : Elapsed 0.018 ms (1.786 ms / 100) 1.785 -> 1.784 ( -0.06%) [ +0.11% +0.11% +0.00% / -0.06% +1.18% +1.29%] index_select strided 16 : Elapsed 0.018 ms (1.787 ms / 100) 1.801 -> 1.799 ( -0.11%) [ +0.00% +0.22% +0.00% / -0.11% +0.78% +0.61%] index_select random : Elapsed 0.018 ms (1.801 ms / 100) 1.797 -> 1.799 ( +0.11%) [ +0.28% +0.00% +0.00% / +0.11% +0.83% +0.78%] index_select random_sorted : Elapsed 0.018 ms (1.802 ms / 100) 1.782 -> 1.788 ( +0.34%) [ +0.28% +0.28% +0.00% / +0.34% +1.40% +1.40%] index_select perm : Elapsed 0.018 ms (1.787 ms / 100) 1.786 -> 1.790 ( +0.22%) [ +0.28% +0.00% +0.06% / +0.22% +1.06% +1.06%] index_select perm_sorted : Elapsed 0.018 ms (1.791 ms / 100) B = [16, 40, 5, 4] (stride (4, 320, 64, 1)) A = [16, 40, 20, 4] (stride (3200, 20, 1, 800)) dim = 2 1.864 -> 1.866 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.21% +0.11% +0.21%] index_select const : Elapsed 0.019 ms (1.866 ms / 100) 1.866 -> 1.868 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.91% +1.07%] index_select wrap : Elapsed 0.019 ms (1.868 ms / 100) 1.867 -> 1.866 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.70% +0.80%] index_select linear : Elapsed 0.019 ms (1.867 ms / 100) 1.875 -> 1.872 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.75% +0.53%] index_select reverse : Elapsed 0.019 ms (1.875 ms / 100) 1.858 -> 1.859 ( +0.05%) [ +0.11% +0.05% +0.00% / +0.05% +0.16% +0.38%] index_select skip64 : Elapsed 0.019 ms (1.860 ms / 100) 1.858 -> 1.859 ( +0.05%) [ +0.22% +0.11% +0.00% / +0.05% +0.59% +0.75%] index_select skip256 : Elapsed 0.019 ms (1.862 ms / 100) 1.891 -> 1.890 ( -0.05%) [ +0.00% +0.16% +0.11% / -0.05% +0.90% +0.74%] index_select spread : Elapsed 0.019 ms (1.891 ms / 100) 1.884 -> 1.885 ( +0.05%) [ +0.11% +0.27% +0.00% / +0.05% +0.90% +0.80%] index_select strided 3 : Elapsed 0.019 ms (1.886 ms / 100) 1.894 -> 1.895 ( +0.05%) [ +0.21% +0.00% +0.21% / +0.05% +0.69% +0.69%] index_select strided 5 : Elapsed 0.019 ms (1.898 ms / 100) 1.880 -> 1.878 ( -0.11%) [ +0.00% +0.00% +0.05% / -0.11% +0.64% +0.37%] index_select strided 7 : Elapsed 0.019 ms (1.880 ms / 100) 1.878 -> 1.878 ( +0.00%) [ +0.21% +0.21% +0.00% / +0.00% +0.53% +0.64%] index_select strided 8 : Elapsed 0.019 ms (1.882 ms / 100) 1.894 -> 1.893 ( -0.05%) [ +0.21% +0.11% +0.00% / -0.05% +0.53% +0.63%] index_select strided 16 : Elapsed 0.019 ms (1.898 ms / 100) 1.873 -> 1.874 ( +0.05%) [ +0.00% +0.16% +0.05% / +0.05% +0.53% +0.69%] index_select random : Elapsed 0.019 ms (1.873 ms / 100) 1.885 -> 1.888 ( +0.16%) [ +0.05% +0.11% +0.00% / +0.16% +0.69% +0.69%] index_select random_sorted : Elapsed 0.019 ms (1.886 ms / 100) 1.875 -> 1.878 ( +0.16%) [ +0.21% +0.00% +0.16% / +0.16% +0.75% +0.80%] index_select perm : Elapsed 0.019 ms (1.879 ms / 100) 1.874 -> 1.874 ( +0.00%) [ +0.11% +0.16% +0.00% / +0.00% +0.64% +0.53%] index_select perm_sorted : Elapsed 0.019 ms (1.876 ms / 100) B = [16, 40, 5, 4] (stride (1, 320, 16, 80)) A = [16, 40, 20, 4] (stride (800, 1, 40, 12800)) dim = 2 1.788 -> 1.785 ( -0.17%) [ +0.06% +0.17% +0.00% / -0.17% +0.17% +0.17%] index_select const : Elapsed 0.018 ms (1.789 ms / 100) 1.825 -> 1.827 ( +0.11%) [ +0.00% +0.11% +0.16% / +0.11% +0.77% +0.88%] index_select wrap : Elapsed 0.018 ms (1.825 ms / 100) 1.823 -> 1.828 ( +0.27%) [ +0.00% +0.05% +0.16% / +0.27% +1.26% +1.10%] index_select linear : Elapsed 0.018 ms (1.823 ms / 100) 1.825 -> 1.825 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +1.10% +1.15%] index_select reverse : Elapsed 0.018 ms (1.825 ms / 100) 1.785 -> 1.784 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.34% +0.22%] index_select skip64 : Elapsed 0.018 ms (1.786 ms / 100) 1.784 -> 1.787 ( +0.17%) [ +0.17% +0.00% +0.39% / +0.17% +0.50% +0.39%] index_select skip256 : Elapsed 0.018 ms (1.787 ms / 100) 1.825 -> 1.826 ( +0.05%) [ +0.11% +0.00% +0.16% / +0.05% +0.77% +0.82%] index_select spread : Elapsed 0.018 ms (1.827 ms / 100) 1.828 -> 1.826 ( -0.11%) [ +0.05% +0.00% +0.05% / -0.11% +0.44% +0.49%] index_select strided 3 : Elapsed 0.018 ms (1.829 ms / 100) 1.812 -> 1.814 ( +0.11%) [ +0.00% +0.06% +0.28% / +0.11% +0.83% +0.99%] index_select strided 5 : Elapsed 0.018 ms (1.812 ms / 100) 1.828 -> 1.832 ( +0.22%) [ +0.33% +0.00% +0.05% / +0.22% +0.88% +0.60%] index_select strided 7 : Elapsed 0.018 ms (1.834 ms / 100) 1.827 -> 1.827 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.00% +0.71% +0.49%] index_select strided 8 : Elapsed 0.018 ms (1.827 ms / 100) 1.825 -> 1.829 ( +0.22%) [ +0.00% +0.16% +0.00% / +0.22% +0.82% +0.77%] index_select strided 16 : Elapsed 0.018 ms (1.825 ms / 100) 1.824 -> 1.830 ( +0.33%) [ +0.00% +0.33% +0.16% / +0.33% +0.99% +0.99%] index_select random : Elapsed 0.018 ms (1.824 ms / 100) 1.830 -> 1.834 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.38% +0.60%] index_select random_sorted : Elapsed 0.018 ms (1.830 ms / 100) 1.832 -> 1.835 ( +0.16%) [ +0.22% +0.00% +0.00% / +0.16% +0.44% +0.44%] index_select perm : Elapsed 0.018 ms (1.836 ms / 100) 1.833 -> 1.835 ( +0.11%) [ +0.05% +0.00% +0.16% / +0.22% +0.11% +0.22%] index_select perm_sorted : Elapsed 0.018 ms (1.834 ms / 100) B = [16, 40, 5, 4] (stride (160, 4, 2560, 1)) A = [16, 40, 20, 4] (stride (80, 1280, 4, 1)) dim = 2 1.712 -> 1.714 ( +0.12%) [ +0.29% +0.18% +0.00% / +0.12% +0.12% +0.12%] index_select const : Elapsed 0.017 ms (1.717 ms / 100) 1.730 -> 1.729 ( -0.06%) [ +0.00% +0.17% +0.00% / -0.06% +0.58% +0.35%] index_select wrap : Elapsed 0.017 ms (1.730 ms / 100) 1.727 -> 1.723 ( -0.23%) [ +0.00% +0.00% +0.06% / -0.23% +0.35% +0.35%] index_select linear : Elapsed 0.017 ms (1.727 ms / 100) 1.729 -> 1.729 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.12% +0.29%] index_select reverse : Elapsed 0.017 ms (1.731 ms / 100) 1.706 -> 1.708 ( +0.12%) [ +0.23% +0.12% +0.00% / +0.12% +0.41% +0.59%] index_select skip64 : Elapsed 0.017 ms (1.710 ms / 100) 1.708 -> 1.708 ( +0.00%) [ +0.18% +0.00% +0.12% / +0.00% +0.47% +0.53%] index_select skip256 : Elapsed 0.017 ms (1.711 ms / 100) 1.740 -> 1.745 ( +0.29%) [ +0.06% +0.00% +0.00% / +0.29% +0.34% +0.34%] index_select spread : Elapsed 0.017 ms (1.741 ms / 100) 1.745 -> 1.750 ( +0.29%) [ +0.00% +0.11% +0.17% / +0.29% +0.97% +1.09%] index_select strided 3 : Elapsed 0.017 ms (1.745 ms / 100) 1.726 -> 1.724 ( -0.12%) [ +0.17% +0.23% +0.00% / -0.12% +0.06% +0.41%] index_select strided 5 : Elapsed 0.017 ms (1.729 ms / 100) 1.728 -> 1.733 ( +0.29%) [ +0.00% +0.12% +0.12% / +0.29% +0.87% +1.04%] index_select strided 7 : Elapsed 0.017 ms (1.728 ms / 100) 1.741 -> 1.743 ( +0.11%) [ +0.11% +0.23% +0.00% / +0.11% +0.57% +0.69%] index_select strided 8 : Elapsed 0.017 ms (1.743 ms / 100) 1.736 -> 1.737 ( +0.06%) [ +0.00% +0.23% +0.17% / +0.06% +0.81% +0.81%] index_select strided 16 : Elapsed 0.017 ms (1.736 ms / 100) 1.732 -> 1.732 ( +0.00%) [ +0.23% +0.00% +0.29% / +0.00% +0.46% +0.29%] index_select random : Elapsed 0.017 ms (1.736 ms / 100) 1.737 -> 1.740 ( +0.17%) [ +0.23% +0.23% +0.00% / +0.17% +0.46% +0.40%] index_select random_sorted : Elapsed 0.017 ms (1.741 ms / 100) 1.740 -> 1.745 ( +0.29%) [ +0.29% +0.17% +0.00% / +0.29% +0.46% +0.46%] index_select perm : Elapsed 0.017 ms (1.745 ms / 100) 1.740 -> 1.743 ( +0.17%) [ +0.23% +0.00% +0.29% / +0.17% +0.69% +0.63%] index_select perm_sorted : Elapsed 0.017 ms (1.744 ms / 100) B = [16, 40, 5, 4] (stride (160, 1, 2560, 40)) dim = 2 fill_cnt = 20 3.512 -> 3.491 ( -0.60%) [ +0.00% +0.09% +0.11% / -0.46% -0.60% -0.60%] index_fill_ const : Elapsed 0.035 ms (3.512 ms / 100) 3.523 -> 3.506 ( -0.48%) [ +0.00% +0.17% +0.06% / -0.43% -0.48% -0.48%] index_fill_ linear : Elapsed 0.035 ms (3.523 ms / 100) 3.512 -> 3.497 ( -0.43%) [ +0.00% +0.09% +0.17% / -0.40% -0.43% -0.37%] index_fill_ reverse : Elapsed 0.035 ms (3.512 ms / 100) 3.516 -> 3.498 ( -0.51%) [ +0.20% +0.09% +0.00% / -0.26% -0.40% -0.51%] index_fill_ skip64 : Elapsed 0.035 ms (3.523 ms / 100) 3.520 -> 3.502 ( -0.51%) [ +0.14% +0.00% +0.09% / -0.51% -0.40% -0.45%] index_fill_ skip256 : Elapsed 0.035 ms (3.525 ms / 100) 3.516 -> 3.496 ( -0.57%) [ +0.03% +0.06% +0.00% / -0.48% -0.37% -0.57%] index_fill_ spread : Elapsed 0.035 ms (3.517 ms / 100) 3.525 -> 3.502 ( -0.65%) [ +0.03% +0.11% +0.00% / -0.65% -0.54% -0.51%] index_fill_ strided 3 : Elapsed 0.035 ms (3.526 ms / 100) 3.531 -> 3.508 ( -0.65%) [ +0.00% +0.08% +0.06% / -0.34% -0.34% -0.65%] index_fill_ random : Elapsed 0.035 ms (3.531 ms / 100) 3.524 -> 3.505 ( -0.54%) [ +0.14% +0.00% +0.03% / -0.54% -0.43% -0.43%] index_fill_ random_sorted : Elapsed 0.035 ms (3.529 ms / 100) B = [16, 40, 5, 4] (stride (40, 1, 2560, 640)) A = [16, 40, 20, 4] (stride (3200, 1, 40, 800)) dim = 2 1.678 -> 1.678 ( +0.00%) [ +0.30% +0.12% +0.00% / +0.00% +0.48% +0.42%] index_select const : Elapsed 0.017 ms (1.683 ms / 100) 1.716 -> 1.719 ( +0.17%) [ +0.17% +0.12% +0.00% / +0.17% +0.64% +0.99%] index_select wrap : Elapsed 0.017 ms (1.719 ms / 100) 1.718 -> 1.722 ( +0.23%) [ +0.00% +0.06% +0.06% / +0.23% +0.70% +0.93%] index_select linear : Elapsed 0.017 ms (1.718 ms / 100) 1.716 -> 1.719 ( +0.17%) [ +0.12% +0.17% +0.00% / +0.17% +0.76% +1.05%] index_select reverse : Elapsed 0.017 ms (1.718 ms / 100) 1.676 -> 1.679 ( +0.18%) [ +0.00% +0.48% +0.24% / +0.18% +0.60% +0.60%] index_select skip64 : Elapsed 0.017 ms (1.676 ms / 100) 1.681 -> 1.680 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.36% +0.30%] index_select skip256 : Elapsed 0.017 ms (1.681 ms / 100) 1.716 -> 1.717 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +1.05% +0.64%] index_select spread : Elapsed 0.017 ms (1.718 ms / 100) 1.717 -> 1.719 ( +0.12%) [ +0.00% +0.12% +0.06% / +0.12% +0.93% +0.93%] index_select strided 3 : Elapsed 0.017 ms (1.717 ms / 100) 1.709 -> 1.709 ( +0.00%) [ +0.12% +0.06% +0.00% / +0.00% +0.47% +0.41%] index_select strided 5 : Elapsed 0.017 ms (1.711 ms / 100) 1.718 -> 1.718 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.81% +0.47%] index_select strided 7 : Elapsed 0.017 ms (1.718 ms / 100) 1.716 -> 1.719 ( +0.17%) [ +0.00% +0.17% +0.17% / +0.17% +0.82% +0.99%] index_select strided 8 : Elapsed 0.017 ms (1.716 ms / 100) 1.717 -> 1.720 ( +0.17%) [ +0.00% +0.00% +0.12% / +0.17% +0.82% +0.87%] index_select strided 16 : Elapsed 0.017 ms (1.717 ms / 100) 1.716 -> 1.719 ( +0.17%) [ +0.23% +0.06% +0.00% / +0.17% +0.87% +1.05%] index_select random : Elapsed 0.017 ms (1.720 ms / 100) 1.715 -> 1.718 ( +0.17%) [ +0.35% +0.23% +0.00% / +0.17% +0.64% +0.76%] index_select random_sorted : Elapsed 0.017 ms (1.721 ms / 100) 1.717 -> 1.722 ( +0.29%) [ +0.00% +0.00% +0.12% / +0.29% +0.76% +1.11%] index_select perm : Elapsed 0.017 ms (1.717 ms / 100) 1.717 -> 1.718 ( +0.06%) [ +0.06% +0.41% +0.00% / +0.06% +0.58% +0.70%] index_select perm_sorted : Elapsed 0.017 ms (1.718 ms / 100) B = [16, 40, 5, 4] (stride (40, 1, 2560, 640)) A = [16, 40, 20, 4] (stride (160, 1, 2560, 40)) dim = 2 1.673 -> 1.676 ( +0.18%) [ +0.00% +0.12% +0.12% / +0.18% +0.48% +0.36%] index_select const : Elapsed 0.017 ms (1.673 ms / 100) 1.668 -> 1.672 ( +0.24%) [ +0.30% +0.30% +0.00% / +0.24% +0.96% +0.60%] index_select wrap : Elapsed 0.017 ms (1.673 ms / 100) 1.670 -> 1.673 ( +0.18%) [ +0.00% +0.24% +0.06% / +0.18% +0.60% +0.66%] index_select linear : Elapsed 0.017 ms (1.670 ms / 100) 1.667 -> 1.674 ( +0.42%) [ +0.00% +0.48% +0.00% / +0.42% +0.66% +0.90%] index_select reverse : Elapsed 0.017 ms (1.667 ms / 100) 1.664 -> 1.670 ( +0.36%) [ +0.48% +0.42% +0.00% / +0.36% +1.08% +0.72%] index_select skip64 : Elapsed 0.017 ms (1.672 ms / 100) 1.669 -> 1.668 ( -0.06%) [ +0.18% +0.00% +0.18% / -0.06% +0.66% +0.72%] index_select skip256 : Elapsed 0.017 ms (1.672 ms / 100) 1.686 -> 1.688 ( +0.12%) [ +0.24% +0.24% +0.00% / +0.18% +0.42% +0.12%] index_select spread : Elapsed 0.017 ms (1.690 ms / 100) 1.696 -> 1.692 ( -0.24%) [ +0.00% +0.41% +0.35% / +0.41% -0.12% -0.24%] index_select strided 3 : Elapsed 0.017 ms (1.696 ms / 100) 1.682 -> 1.685 ( +0.18%) [ +0.24% +0.36% +0.00% / +0.18% +0.36% +0.71%] index_select strided 5 : Elapsed 0.017 ms (1.686 ms / 100) 1.695 -> 1.694 ( -0.06%) [ +0.18% +0.06% +0.00% / +0.12% -0.06% +0.12%] index_select strided 7 : Elapsed 0.017 ms (1.698 ms / 100) 1.687 -> 1.688 ( +0.06%) [ +0.12% +0.00% +0.06% / +0.06% +0.53% +0.36%] index_select strided 8 : Elapsed 0.017 ms (1.689 ms / 100) 1.682 -> 1.687 ( +0.30%) [ +0.36% +0.00% +0.30% / +0.30% +0.59% +0.77%] index_select strided 16 : Elapsed 0.017 ms (1.688 ms / 100) 1.698 -> 1.697 ( -0.06%) [ +0.00% +0.18% +0.06% / +0.18% -0.06% -0.06%] index_select random : Elapsed 0.017 ms (1.698 ms / 100) 1.704 -> 1.693 ( -0.65%) [ +0.29% +0.00% +0.12% / -0.29% -0.65% -0.53%] index_select random_sorted : Elapsed 0.017 ms (1.709 ms / 100) 1.684 -> 1.681 ( -0.18%) [ +0.24% +0.36% +0.00% / +0.24% -0.18% -0.06%] index_select perm : Elapsed 0.017 ms (1.688 ms / 100) 1.689 -> 1.683 ( -0.36%) [ +0.06% +0.06% +0.00% / -0.18% -0.18% -0.36%] index_select perm_sorted : Elapsed 0.017 ms (1.690 ms / 100) B = [16, 40, 5, 4] (stride (40, 1, 2560, 640)) A = [16, 40, 20, 4] (stride (800, 1, 40, 12800)) dim = 2 1.680 -> 1.682 ( +0.12%) [ +0.00% +0.06% +0.12% / +0.12% +0.48% +0.30%] index_select const : Elapsed 0.017 ms (1.680 ms / 100) 1.717 -> 1.719 ( +0.12%) [ +0.00% +0.17% +0.00% / +0.12% +0.99% +1.05%] index_select wrap : Elapsed 0.017 ms (1.717 ms / 100) 1.718 -> 1.717 ( -0.06%) [ +0.00% +0.17% +0.00% / -0.06% +0.70% +0.76%] index_select linear : Elapsed 0.017 ms (1.718 ms / 100) 1.716 -> 1.719 ( +0.17%) [ +0.17% +0.00% +0.06% / +0.17% +0.93% +1.11%] index_select reverse : Elapsed 0.017 ms (1.719 ms / 100) 1.678 -> 1.681 ( +0.18%) [ +0.00% +0.12% +0.12% / +0.18% +0.48% +0.54%] index_select skip64 : Elapsed 0.017 ms (1.678 ms / 100) 1.679 -> 1.680 ( +0.06%) [ +0.00% +0.00% +0.18% / +0.06% +0.48% +0.30%] index_select skip256 : Elapsed 0.017 ms (1.679 ms / 100) 1.716 -> 1.718 ( +0.12%) [ +0.00% +0.17% +0.12% / +0.12% +0.82% +0.93%] index_select spread : Elapsed 0.017 ms (1.716 ms / 100) 1.717 -> 1.723 ( +0.35%) [ +0.06% +0.00% +0.29% / +0.35% +0.64% +0.64%] index_select strided 3 : Elapsed 0.017 ms (1.718 ms / 100) 1.709 -> 1.711 ( +0.12%) [ +0.00% +0.23% +0.41% / +0.12% +0.59% +0.59%] index_select strided 5 : Elapsed 0.017 ms (1.709 ms / 100) 1.716 -> 1.724 ( +0.47%) [ +0.29% +0.29% +0.00% / +0.47% +0.76% +0.93%] index_select strided 7 : Elapsed 0.017 ms (1.721 ms / 100) 1.719 -> 1.720 ( +0.06%) [ +0.17% +0.17% +0.00% / +0.06% +0.64% +0.70%] index_select strided 8 : Elapsed 0.017 ms (1.722 ms / 100) 1.720 -> 1.721 ( +0.06%) [ +0.00% +0.12% +0.17% / +0.06% +0.64% +0.35%] index_select strided 16 : Elapsed 0.017 ms (1.720 ms / 100) 1.717 -> 1.719 ( +0.12%) [ +0.00% +0.35% +0.12% / +0.12% +0.76% +0.82%] index_select random : Elapsed 0.017 ms (1.717 ms / 100) 1.719 -> 1.718 ( -0.06%) [ +0.12% +0.00% +0.12% / -0.06% +0.76% +0.64%] index_select random_sorted : Elapsed 0.017 ms (1.721 ms / 100) 1.719 -> 1.722 ( +0.17%) [ +0.12% +0.00% +0.17% / +0.17% +0.58% +0.76%] index_select perm : Elapsed 0.017 ms (1.721 ms / 100) 1.716 -> 1.722 ( +0.35%) [ +0.23% +0.00% +0.23% / +0.35% +0.93% +0.99%] index_select perm_sorted : Elapsed 0.017 ms (1.720 ms / 100) out_shape = [16, 40, 20, 5] in_shape = [16, 40, 20, 4] idx_dim = 3 B = [16, 40, 20, 5] (stride (4000, 100, 5, 1)) A = [16, 40, 20, 4] (stride (3200, 20, 1, 800)) dim = 3 5.171 -> 5.170 ( -0.02%) [ +0.04% +0.08% +0.00% / +0.10% +0.10% -0.02%] index_add_ linear : Elapsed 0.052 ms (5.173 ms / 100) 5.144 -> 5.139 ( -0.10%) [ +0.14% +0.00% +0.19% / +0.41% -0.02% -0.10%] index_copy_ linear : Elapsed 0.052 ms (5.151 ms / 100) 5.169 -> 5.166 ( -0.06%) [ +0.00% +0.08% +0.08% / +0.10% -0.06% -0.04%] index_add_ reverse : Elapsed 0.052 ms (5.169 ms / 100) 5.153 -> 5.145 ( -0.16%) [ +0.02% +0.00% +0.02% / +0.00% -0.16% -0.04%] index_copy_ reverse : Elapsed 0.052 ms (5.154 ms / 100) 5.167 -> 5.167 ( +0.00%) [ +0.15% +0.00% +0.12% / +0.21% +0.00% +0.25%] index_add_ spread : Elapsed 0.052 ms (5.175 ms / 100) 5.152 -> 5.145 ( -0.14%) [ +0.14% +0.02% +0.00% / +0.10% -0.14% -0.10%] index_copy_ spread : Elapsed 0.052 ms (5.159 ms / 100) 5.168 -> 5.164 ( -0.08%) [ +0.00% +0.14% +0.04% / +0.06% +0.06% -0.08%] index_add_ strided 3 : Elapsed 0.052 ms (5.168 ms / 100) 5.145 -> 5.142 ( -0.06%) [ +0.00% +0.14% +0.19% / +0.23% +0.04% -0.06%] index_copy_ strided 3 : Elapsed 0.051 ms (5.145 ms / 100) 5.168 -> 5.165 ( -0.06%) [ +0.00% +0.08% +0.00% / -0.06% +0.02% -0.02%] index_add_ perm : Elapsed 0.052 ms (5.168 ms / 100) 5.147 -> 5.134 ( -0.25%) [ +0.12% +0.08% +0.00% / +0.06% -0.23% -0.25%] index_copy_ perm : Elapsed 0.052 ms (5.153 ms / 100) 5.177 -> 5.173 ( -0.08%) [ +0.00% +0.10% +0.06% / +0.02% -0.08% -0.08%] index_add_ perm_sorted : Elapsed 0.052 ms (5.177 ms / 100) 5.155 -> 5.141 ( -0.27%) [ +0.02% +0.02% +0.00% / +0.02% -0.27% -0.17%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.156 ms / 100) 5.187 -> 5.190 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.08% +0.19% +0.06%] index_select const : Elapsed 0.052 ms (5.193 ms / 100) 5.278 -> 5.266 ( -0.23%) [ +0.04% +0.11% +0.00% / -0.11% -0.09% -0.23%] index_select wrap : Elapsed 0.053 ms (5.280 ms / 100) 5.273 -> 5.264 ( -0.17%) [ +0.04% +0.00% +0.02% / -0.09% -0.17% -0.09%] index_select linear : Elapsed 0.053 ms (5.275 ms / 100) 5.273 -> 5.273 ( +0.00%) [ +0.19% +0.04% +0.00% / +0.02% +0.17% +0.00%] index_select reverse : Elapsed 0.053 ms (5.283 ms / 100) 5.186 -> 5.183 ( -0.06%) [ +0.21% +0.00% +0.02% / +0.12% -0.06% -0.06%] index_select skip64 : Elapsed 0.052 ms (5.197 ms / 100) 5.187 -> 5.189 ( +0.04%) [ +0.13% +0.19% +0.00% / +0.12% +0.08% +0.04%] index_select skip256 : Elapsed 0.052 ms (5.194 ms / 100) 5.265 -> 5.261 ( -0.08%) [ +0.06% +0.09% +0.00% / +0.17% -0.08% +0.11%] index_select spread : Elapsed 0.053 ms (5.268 ms / 100) 5.269 -> 5.267 ( -0.04%) [ +0.00% +0.02% +0.00% / +0.02% -0.04% +0.06%] index_select strided 3 : Elapsed 0.053 ms (5.269 ms / 100) 5.241 -> 5.243 ( +0.04%) [ +0.13% +0.21% +0.00% / +0.13% +0.08% +0.04%] index_select random : Elapsed 0.052 ms (5.248 ms / 100) 5.250 -> 5.239 ( -0.21%) [ +0.06% +0.00% +0.00% / +0.08% -0.21% -0.21%] index_select random_sorted : Elapsed 0.053 ms (5.253 ms / 100) B = [16, 40, 20, 5] (stride (100, 1600, 1, 20)) A = [16, 40, 20, 4] (stride (1, 16, 640, 12800)) dim = 3 5.860 -> 5.865 ( +0.09%) [ +0.03% +0.00% +0.20% / +0.10% +0.09% +0.12%] index_add_ linear : Elapsed 0.059 ms (5.862 ms / 100) 5.790 -> 5.777 ( -0.22%) [ +0.16% +0.00% +0.22% / +0.19% -0.22% -0.07%] index_copy_ linear : Elapsed 0.058 ms (5.799 ms / 100) 5.860 -> 5.867 ( +0.12%) [ +0.12% +0.00% +0.22% / +0.19% +0.12% +0.20%] index_add_ reverse : Elapsed 0.059 ms (5.867 ms / 100) 5.783 -> 5.789 ( +0.10%) [ +0.00% +0.09% +0.19% / +0.38% +0.16% +0.10%] index_copy_ reverse : Elapsed 0.058 ms (5.783 ms / 100) 5.865 -> 5.862 ( -0.05%) [ +0.00% +0.05% +0.10% / +0.15% -0.05% +0.00%] index_add_ spread : Elapsed 0.059 ms (5.865 ms / 100) 5.785 -> 5.785 ( +0.00%) [ +0.09% +0.00% +0.17% / +0.26% +0.09% +0.00%] index_copy_ spread : Elapsed 0.058 ms (5.790 ms / 100) 5.860 -> 5.872 ( +0.20%) [ +0.00% +0.12% +0.19% / +0.22% +0.20% +0.24%] index_add_ strided 3 : Elapsed 0.059 ms (5.860 ms / 100) 5.788 -> 5.795 ( +0.12%) [ +0.24% +0.00% +0.16% / +0.35% +0.12% +0.17%] index_copy_ strided 3 : Elapsed 0.058 ms (5.802 ms / 100) 5.856 -> 5.863 ( +0.12%) [ +0.09% +0.00% +0.29% / +0.24% +0.12% +0.24%] index_add_ perm : Elapsed 0.059 ms (5.861 ms / 100) 5.788 -> 5.795 ( +0.12%) [ +0.00% +0.02% +0.31% / +0.22% +0.14% +0.12%] index_copy_ perm : Elapsed 0.058 ms (5.788 ms / 100) 5.860 -> 5.866 ( +0.10%) [ +0.07% +0.00% +0.17% / +0.22% +0.10% +0.19%] index_add_ perm_sorted : Elapsed 0.059 ms (5.864 ms / 100) 5.795 -> 5.787 ( -0.14%) [ +0.00% +0.02% +0.10% / -0.02% -0.14% +0.03%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.795 ms / 100) 6.091 -> 6.098 ( +0.11%) [ +0.00% +0.23% +0.25% / +0.16% +0.11% +0.11%] index_select const : Elapsed 0.061 ms (6.091 ms / 100) 6.178 -> 6.162 ( -0.26%) [ +0.00% +0.03% +0.00% / +0.18% -0.26% -0.24%] index_select wrap : Elapsed 0.062 ms (6.178 ms / 100) 6.158 -> 6.162 ( +0.06%) [ +0.00% +0.06% +0.23% / +0.10% +0.08% +0.06%] index_select linear : Elapsed 0.062 ms (6.158 ms / 100) 6.155 -> 6.164 ( +0.15%) [ +0.00% +0.02% +0.08% / +0.18% +0.21% +0.15%] index_select reverse : Elapsed 0.062 ms (6.155 ms / 100) 6.090 -> 6.090 ( +0.00%) [ +0.00% +0.03% +0.25% / +0.20% +0.00% +0.05%] index_select skip64 : Elapsed 0.061 ms (6.090 ms / 100) 6.091 -> 6.093 ( +0.03%) [ +0.00% +0.08% +0.23% / +0.16% +0.03% +0.13%] index_select skip256 : Elapsed 0.061 ms (6.091 ms / 100) 6.148 -> 6.157 ( +0.15%) [ +0.07% +0.00% +0.03% / +0.23% +0.15% +0.18%] index_select spread : Elapsed 0.062 ms (6.152 ms / 100) 6.163 -> 6.165 ( +0.03%) [ +0.02% +0.00% +0.15% / +0.11% +0.11% +0.03%] index_select strided 3 : Elapsed 0.062 ms (6.164 ms / 100) 6.135 -> 6.145 ( +0.16%) [ +0.02% +0.00% +0.16% / +0.16% +0.28% +0.24%] index_select random : Elapsed 0.061 ms (6.136 ms / 100) 6.133 -> 6.131 ( -0.03%) [ +0.08% +0.00% +0.10% / -0.03% +0.03% +0.10%] index_select random_sorted : Elapsed 0.061 ms (6.138 ms / 100) B = [16, 40, 20, 5] (stride (200, 5, 3200, 1)) A = [16, 40, 20, 4] (stride (4, 64, 2560, 1)) dim = 3 5.929 -> 5.906 ( -0.39%) [ +0.00% +0.00% +0.08% / +0.10% -0.29% -0.39%] index_add_ linear : Elapsed 0.059 ms (5.929 ms / 100) 5.906 -> 5.882 ( -0.41%) [ +0.00% +0.07% +0.02% / +0.14% -0.41% -0.30%] index_copy_ linear : Elapsed 0.059 ms (5.906 ms / 100) 5.929 -> 5.907 ( -0.37%) [ +0.00% +0.08% +0.10% / +0.00% -0.37% -0.24%] index_add_ reverse : Elapsed 0.059 ms (5.929 ms / 100) 5.904 -> 5.884 ( -0.34%) [ +0.00% +0.12% +0.19% / +0.03% -0.34% -0.29%] index_copy_ reverse : Elapsed 0.059 ms (5.904 ms / 100) 5.930 -> 5.909 ( -0.35%) [ +0.00% +0.00% +0.12% / +0.07% -0.34% -0.35%] index_add_ spread : Elapsed 0.059 ms (5.930 ms / 100) 5.909 -> 5.887 ( -0.37%) [ +0.00% +0.02% +0.15% / -0.03% -0.37% -0.24%] index_copy_ spread : Elapsed 0.059 ms (5.909 ms / 100) 5.932 -> 5.902 ( -0.51%) [ +0.02% +0.00% +0.02% / +0.03% -0.51% -0.32%] index_add_ strided 3 : Elapsed 0.059 ms (5.933 ms / 100) 5.906 -> 5.888 ( -0.30%) [ +0.00% +0.02% +0.14% / +0.12% -0.30% -0.30%] index_copy_ strided 3 : Elapsed 0.059 ms (5.906 ms / 100) 5.929 -> 5.907 ( -0.37%) [ +0.05% +0.05% +0.00% / +0.19% -0.37% -0.37%] index_add_ perm : Elapsed 0.059 ms (5.932 ms / 100) 5.906 -> 5.884 ( -0.37%) [ +0.00% +0.05% +0.02% / +0.07% -0.30% -0.37%] index_copy_ perm : Elapsed 0.059 ms (5.906 ms / 100) 5.929 -> 5.904 ( -0.42%) [ +0.10% +0.00% +0.10% / +0.17% -0.42% -0.35%] index_add_ perm_sorted : Elapsed 0.059 ms (5.935 ms / 100) 5.908 -> 5.882 ( -0.44%) [ +0.00% +0.08% +0.08% / +0.12% -0.44% -0.29%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.908 ms / 100) 6.184 -> 6.163 ( -0.34%) [ +0.00% +0.03% +0.11% / +0.06% -0.31% -0.34%] index_select const : Elapsed 0.062 ms (6.184 ms / 100) 6.177 -> 6.159 ( -0.29%) [ +0.18% +0.00% +0.23% / +0.23% -0.29% -0.23%] index_select wrap : Elapsed 0.062 ms (6.188 ms / 100) 6.182 -> 6.160 ( -0.36%) [ +0.00% +0.00% +0.18% / +0.00% -0.36% -0.31%] index_select linear : Elapsed 0.062 ms (6.182 ms / 100) 6.188 -> 6.159 ( -0.47%) [ +0.02% +0.00% +0.06% / +0.02% -0.45% -0.47%] index_select reverse : Elapsed 0.062 ms (6.189 ms / 100) 6.189 -> 6.164 ( -0.40%) [ +0.03% +0.00% +0.06% / -0.02% -0.40% -0.39%] index_select skip64 : Elapsed 0.062 ms (6.191 ms / 100) 6.180 -> 6.159 ( -0.34%) [ +0.06% +0.00% +0.23% / +0.24% -0.31% -0.34%] index_select skip256 : Elapsed 0.062 ms (6.184 ms / 100) 6.186 -> 6.160 ( -0.42%) [ +0.00% +0.00% +0.08% / -0.05% -0.42% -0.31%] index_select spread : Elapsed 0.062 ms (6.186 ms / 100) 6.181 -> 6.165 ( -0.26%) [ +0.00% +0.13% +0.13% / +0.15% -0.23% -0.26%] index_select strided 3 : Elapsed 0.062 ms (6.181 ms / 100) 6.186 -> 6.160 ( -0.42%) [ +0.00% +0.00% +0.03% / +0.13% -0.37% -0.42%] index_select random : Elapsed 0.062 ms (6.186 ms / 100) 6.181 -> 6.157 ( -0.39%) [ +0.00% +0.08% +0.13% / +0.19% -0.24% -0.39%] index_select random_sorted : Elapsed 0.062 ms (6.181 ms / 100) B = [16, 40, 20, 5] (stride (800, 20, 1, 12800)) A = [16, 40, 20, 4] (stride (3200, 4, 160, 1)) dim = 3 5.438 -> 5.445 ( +0.13%) [ +0.04% +0.00% +0.04% / +0.13% +0.63% +0.75%] index_add_ linear : Elapsed 0.054 ms (5.440 ms / 100) 5.351 -> 5.355 ( +0.07%) [ +0.00% +0.15% +0.09% / +0.07% +0.75% +0.73%] index_copy_ linear : Elapsed 0.054 ms (5.351 ms / 100) 5.432 -> 5.440 ( +0.15%) [ +0.00% +0.00% +0.09% / +0.15% +0.72% +0.74%] index_add_ reverse : Elapsed 0.054 ms (5.432 ms / 100) 5.344 -> 5.351 ( +0.13%) [ +0.00% +0.07% +0.26% / +0.13% +0.73% +0.79%] index_copy_ reverse : Elapsed 0.053 ms (5.344 ms / 100) 5.435 -> 5.441 ( +0.11%) [ +0.09% +0.04% +0.00% / +0.11% +0.68% +0.64%] index_add_ spread : Elapsed 0.054 ms (5.440 ms / 100) 5.351 -> 5.359 ( +0.15%) [ +0.17% +0.00% +0.13% / +0.15% +0.62% +0.71%] index_copy_ spread : Elapsed 0.054 ms (5.360 ms / 100) 5.433 -> 5.449 ( +0.29%) [ +0.20% +0.00% +0.24% / +0.29% +0.87% +0.98%] index_add_ strided 3 : Elapsed 0.054 ms (5.444 ms / 100) 5.351 -> 5.360 ( +0.17%) [ +0.02% +0.00% +0.15% / +0.17% +0.77% +0.95%] index_copy_ strided 3 : Elapsed 0.054 ms (5.352 ms / 100) 5.430 -> 5.437 ( +0.13%) [ +0.17% +0.00% +0.15% / +0.13% +0.76% +0.79%] index_add_ perm : Elapsed 0.054 ms (5.439 ms / 100) 5.346 -> 5.358 ( +0.22%) [ +0.04% +0.00% +0.11% / +0.22% +0.84% +0.82%] index_copy_ perm : Elapsed 0.053 ms (5.348 ms / 100) 5.431 -> 5.447 ( +0.29%) [ +0.09% +0.00% +0.15% / +0.29% +0.83% +0.98%] index_add_ perm_sorted : Elapsed 0.054 ms (5.436 ms / 100) 5.349 -> 5.362 ( +0.24%) [ +0.13% +0.00% +0.24% / +0.24% +0.79% +0.77%] index_copy_ perm_sorted : Elapsed 0.054 ms (5.356 ms / 100) 5.644 -> 5.653 ( +0.16%) [ +0.14% +0.00% +0.21% / +0.16% +1.19% +1.19%] index_select const : Elapsed 0.057 ms (5.652 ms / 100) 5.648 -> 5.655 ( +0.12%) [ +0.11% +0.00% +0.05% / +0.12% +1.12% +1.27%] index_select wrap : Elapsed 0.057 ms (5.654 ms / 100) 5.647 -> 5.652 ( +0.09%) [ +0.16% +0.00% +0.16% / +0.09% +1.29% +1.17%] index_select linear : Elapsed 0.057 ms (5.656 ms / 100) 5.650 -> 5.659 ( +0.16%) [ +0.11% +0.00% +0.05% / +0.16% +1.20% +1.08%] index_select reverse : Elapsed 0.057 ms (5.656 ms / 100) 5.647 -> 5.648 ( +0.02%) [ +0.00% +0.02% +0.18% / +0.02% +1.17% +1.24%] index_select skip64 : Elapsed 0.056 ms (5.647 ms / 100) 5.648 -> 5.661 ( +0.23%) [ +0.04% +0.00% +0.05% / +0.23% +1.03% +1.19%] index_select skip256 : Elapsed 0.056 ms (5.650 ms / 100) 5.645 -> 5.656 ( +0.19%) [ +0.02% +0.09% +0.00% / +0.19% +1.17% +1.19%] index_select spread : Elapsed 0.056 ms (5.646 ms / 100) 5.648 -> 5.658 ( +0.18%) [ +0.00% +0.02% +0.09% / +0.18% +1.08% +1.13%] index_select strided 3 : Elapsed 0.056 ms (5.648 ms / 100) 5.641 -> 5.656 ( +0.27%) [ +0.14% +0.00% +0.25% / +0.27% +1.24% +1.28%] index_select random : Elapsed 0.056 ms (5.649 ms / 100) 5.648 -> 5.649 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.02% +1.19% +1.13%] index_select random_sorted : Elapsed 0.056 ms (5.648 ms / 100) B = [16, 40, 20, 5] (stride (20, 320, 1, 12800)) A = [16, 40, 20, 4] (stride (1, 16, 2560, 640)) dim = 3 5.817 -> 5.794 ( -0.40%) [ +0.00% +0.15% +0.17% / +0.05% -0.33% -0.40%] index_add_ linear : Elapsed 0.058 ms (5.817 ms / 100) 5.749 -> 5.724 ( -0.43%) [ +0.05% +0.14% +0.00% / +0.16% -0.43% -0.31%] index_copy_ linear : Elapsed 0.058 ms (5.752 ms / 100) 5.805 -> 5.796 ( -0.16%) [ +0.14% +0.00% +0.14% / +0.19% -0.14% -0.16%] index_add_ reverse : Elapsed 0.058 ms (5.813 ms / 100) 5.738 -> 5.730 ( -0.14%) [ +0.00% +0.09% +0.30% / +0.21% -0.14% -0.03%] index_copy_ reverse : Elapsed 0.057 ms (5.738 ms / 100) 5.813 -> 5.798 ( -0.26%) [ +0.00% +0.15% +0.02% / +0.10% -0.26% -0.22%] index_add_ spread : Elapsed 0.058 ms (5.813 ms / 100) 5.751 -> 5.730 ( -0.37%) [ +0.02% +0.09% +0.00% / +0.23% -0.37% -0.33%] index_copy_ spread : Elapsed 0.058 ms (5.752 ms / 100) 5.804 -> 5.791 ( -0.22%) [ +0.00% +0.12% +0.10% / +0.00% -0.19% -0.22%] index_add_ strided 3 : Elapsed 0.058 ms (5.804 ms / 100) 5.738 -> 5.727 ( -0.19%) [ +0.02% +0.00% +0.05% / +0.07% -0.14% -0.19%] index_copy_ strided 3 : Elapsed 0.057 ms (5.739 ms / 100) 5.798 -> 5.793 ( -0.09%) [ +0.02% +0.00% +0.03% / +0.05% -0.09% -0.05%] index_add_ perm : Elapsed 0.058 ms (5.799 ms / 100) 5.724 -> 5.724 ( +0.00%) [ +0.00% +0.05% +0.17% / +0.24% +0.00% +0.03%] index_copy_ perm : Elapsed 0.057 ms (5.724 ms / 100) 5.799 -> 5.783 ( -0.28%) [ +0.12% +0.00% +0.17% / +0.03% -0.28% -0.21%] index_add_ perm_sorted : Elapsed 0.058 ms (5.806 ms / 100) 5.733 -> 5.718 ( -0.26%) [ +0.14% +0.00% +0.09% / +0.14% -0.26% -0.21%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.741 ms / 100) 6.061 -> 6.025 ( -0.59%) [ +0.16% +0.00% +0.20% / +0.25% -0.59% -0.41%] index_select const : Elapsed 0.061 ms (6.071 ms / 100) 6.129 -> 6.096 ( -0.54%) [ +0.00% +0.00% +0.11% / +0.00% -0.54% -0.46%] index_select wrap : Elapsed 0.061 ms (6.129 ms / 100) 6.116 -> 6.087 ( -0.47%) [ +0.07% +0.00% +0.11% / +0.05% -0.47% -0.43%] index_select linear : Elapsed 0.061 ms (6.120 ms / 100) 6.113 -> 6.089 ( -0.39%) [ +0.00% +0.11% +0.16% / +0.20% -0.39% -0.18%] index_select reverse : Elapsed 0.061 ms (6.113 ms / 100) 6.065 -> 6.027 ( -0.63%) [ +0.07% +0.00% +0.30% / +0.12% -0.54% -0.63%] index_select skip64 : Elapsed 0.061 ms (6.069 ms / 100) 6.062 -> 6.030 ( -0.53%) [ +0.00% +0.10% +0.18% / +0.10% -0.48% -0.53%] index_select skip256 : Elapsed 0.061 ms (6.062 ms / 100) 6.117 -> 6.083 ( -0.56%) [ +0.00% +0.07% +0.05% / +0.13% -0.56% -0.51%] index_select spread : Elapsed 0.061 ms (6.117 ms / 100) 6.127 -> 6.091 ( -0.59%) [ +0.00% +0.18% +0.20% / +0.08% -0.55% -0.59%] index_select strided 3 : Elapsed 0.061 ms (6.127 ms / 100) 6.119 -> 6.094 ( -0.41%) [ +0.00% +0.03% +0.07% / -0.02% -0.39% -0.41%] index_select random : Elapsed 0.061 ms (6.119 ms / 100) 6.104 -> 6.083 ( -0.34%) [ +0.00% +0.10% +0.11% / +0.00% -0.34% -0.34%] index_select random_sorted : Elapsed 0.061 ms (6.104 ms / 100) B = [16, 40, 20, 5] (stride (1, 320, 16, 12800)) A = [16, 40, 20, 4] (stride (800, 20, 1, 12800)) dim = 3 5.088 -> 5.092 ( +0.08%) [ +0.00% +0.31% +0.06% / +0.08% +0.31% +0.24%] index_add_ linear : Elapsed 0.051 ms (5.088 ms / 100) 5.049 -> 5.048 ( -0.02%) [ +0.06% +0.00% +0.18% / -0.02% +0.10% +0.16%] index_copy_ linear : Elapsed 0.051 ms (5.052 ms / 100) 5.088 -> 5.092 ( +0.08%) [ +0.10% +0.00% +0.14% / +0.08% +0.26% +0.39%] index_add_ reverse : Elapsed 0.051 ms (5.093 ms / 100) 5.049 -> 5.058 ( +0.18%) [ +0.00% +0.02% +0.14% / +0.18% +0.24% +0.32%] index_copy_ reverse : Elapsed 0.050 ms (5.049 ms / 100) 5.090 -> 5.090 ( +0.00%) [ +0.00% +0.14% +0.16% / +0.00% +0.31% +0.24%] index_add_ spread : Elapsed 0.051 ms (5.090 ms / 100) 5.042 -> 5.054 ( +0.24%) [ +0.16% +0.00% +0.40% / +0.24% +0.48% +0.26%] index_copy_ spread : Elapsed 0.050 ms (5.050 ms / 100) 5.090 -> 5.088 ( -0.04%) [ +0.08% +0.00% +0.12% / -0.04% +0.31% +0.35%] index_add_ strided 3 : Elapsed 0.051 ms (5.094 ms / 100) 5.048 -> 5.049 ( +0.02%) [ +0.06% +0.00% +0.00% / +0.02% +0.24% +0.26%] index_copy_ strided 3 : Elapsed 0.051 ms (5.051 ms / 100) 5.092 -> 5.092 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.27% +0.29%] index_add_ perm : Elapsed 0.051 ms (5.092 ms / 100) 5.044 -> 5.051 ( +0.14%) [ +0.20% +0.00% +0.16% / +0.14% +0.30% +0.26%] index_copy_ perm : Elapsed 0.051 ms (5.054 ms / 100) 5.090 -> 5.102 ( +0.24%) [ +0.08% +0.00% +0.06% / +0.24% +0.35% +0.41%] index_add_ perm_sorted : Elapsed 0.051 ms (5.094 ms / 100) 5.053 -> 5.055 ( +0.04%) [ +0.00% +0.08% +0.10% / +0.04% +0.20% +0.16%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.053 ms / 100) 5.188 -> 5.192 ( +0.08%) [ +0.15% +0.13% +0.00% / +0.08% +0.17% +0.23%] index_select const : Elapsed 0.052 ms (5.196 ms / 100) 5.269 -> 5.283 ( +0.27%) [ +0.00% +0.40% +0.17% / +0.27% +0.27% +0.36%] index_select wrap : Elapsed 0.053 ms (5.269 ms / 100) 5.276 -> 5.279 ( +0.06%) [ +0.00% +0.11% +0.13% / +0.06% +0.27% +0.09%] index_select linear : Elapsed 0.053 ms (5.276 ms / 100) 5.268 -> 5.277 ( +0.17%) [ +0.00% +0.23% +0.11% / +0.17% +0.38% +0.32%] index_select reverse : Elapsed 0.053 ms (5.268 ms / 100) 5.194 -> 5.193 ( -0.02%) [ +0.04% +0.00% +0.12% / +0.08% +0.13% -0.02%] index_select skip64 : Elapsed 0.052 ms (5.196 ms / 100) 5.190 -> 5.191 ( +0.02%) [ +0.00% +0.04% +0.10% / +0.10% +0.02% +0.13%] index_select skip256 : Elapsed 0.052 ms (5.190 ms / 100) 5.280 -> 5.278 ( -0.04%) [ +0.00% +0.11% +0.06% / -0.04% +0.11% +0.11%] index_select spread : Elapsed 0.053 ms (5.280 ms / 100) 5.273 -> 5.283 ( +0.19%) [ +0.00% +0.02% +0.21% / +0.19% +0.19% +0.40%] index_select strided 3 : Elapsed 0.053 ms (5.273 ms / 100) 5.251 -> 5.255 ( +0.08%) [ +0.04% +0.13% +0.00% / +0.08% +0.32% +0.27%] index_select random : Elapsed 0.053 ms (5.253 ms / 100) 5.242 -> 5.249 ( +0.13%) [ +0.21% +0.15% +0.00% / +0.13% +0.32% +0.42%] index_select random_sorted : Elapsed 0.053 ms (5.253 ms / 100) B = [16, 40, 20, 5] (stride (1, 16, 640, 12800)) A = [16, 40, 20, 4] (stride (1, 16, 640, 12800)) dim = 3 5.836 -> 5.793 ( -0.74%) [ +0.00% +0.02% +0.14% / -0.07% -0.65% -0.74%] index_add_ linear : Elapsed 0.058 ms (5.836 ms / 100) 5.778 -> 5.743 ( -0.61%) [ +0.02% +0.00% +0.03% / -0.03% -0.55% -0.61%] index_copy_ linear : Elapsed 0.058 ms (5.779 ms / 100) 5.815 -> 5.794 ( -0.36%) [ +0.10% +0.00% +0.17% / +0.22% -0.34% -0.36%] index_add_ reverse : Elapsed 0.058 ms (5.821 ms / 100) 5.772 -> 5.746 ( -0.45%) [ +0.14% +0.00% +0.28% / +0.10% -0.43% -0.45%] index_copy_ reverse : Elapsed 0.058 ms (5.780 ms / 100) 5.839 -> 5.792 ( -0.80%) [ +0.00% +0.00% +0.03% / +0.07% -0.80% -0.55%] index_add_ spread : Elapsed 0.058 ms (5.839 ms / 100) 5.773 -> 5.746 ( -0.47%) [ +0.12% +0.00% +0.24% / +0.29% -0.47% -0.45%] index_copy_ spread : Elapsed 0.058 ms (5.780 ms / 100) 5.809 -> 5.783 ( -0.45%) [ +0.07% +0.00% +0.10% / +0.19% -0.38% -0.45%] index_add_ strided 3 : Elapsed 0.058 ms (5.813 ms / 100) 5.749 -> 5.732 ( -0.30%) [ +0.00% +0.03% +0.02% / +0.10% -0.23% -0.30%] index_copy_ strided 3 : Elapsed 0.057 ms (5.749 ms / 100) 5.826 -> 5.786 ( -0.69%) [ +0.00% +0.12% +0.00% / +0.17% -0.64% -0.69%] index_add_ perm : Elapsed 0.058 ms (5.826 ms / 100) 5.774 -> 5.742 ( -0.55%) [ +0.09% +0.09% +0.00% / +0.14% -0.55% -0.48%] index_copy_ perm : Elapsed 0.058 ms (5.779 ms / 100) 5.823 -> 5.778 ( -0.77%) [ +0.00% +0.09% +0.05% / +0.14% -0.62% -0.77%] index_add_ perm_sorted : Elapsed 0.058 ms (5.823 ms / 100) 5.771 -> 5.733 ( -0.66%) [ +0.00% +0.03% +0.12% / +0.10% -0.66% -0.52%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.771 ms / 100) 6.073 -> 6.077 ( +0.07%) [ +0.00% +0.05% +0.18% / +0.16% +0.08% +0.07%] index_select const : Elapsed 0.061 ms (6.073 ms / 100) 6.161 -> 6.128 ( -0.54%) [ +0.03% +0.00% +0.08% / +0.03% -0.50% -0.54%] index_select wrap : Elapsed 0.062 ms (6.163 ms / 100) 6.148 -> 6.117 ( -0.50%) [ +0.11% +0.00% +0.26% / +0.15% -0.50% -0.46%] index_select linear : Elapsed 0.062 ms (6.155 ms / 100) 6.157 -> 6.116 ( -0.67%) [ +0.16% +0.08% +0.00% / +0.11% -0.67% -0.67%] index_select reverse : Elapsed 0.062 ms (6.167 ms / 100) 6.076 -> 6.069 ( -0.12%) [ +0.13% +0.00% +0.18% / +0.08% -0.10% -0.12%] index_select skip64 : Elapsed 0.061 ms (6.084 ms / 100) 6.077 -> 6.073 ( -0.07%) [ +0.08% +0.00% +0.15% / +0.03% +0.00% -0.07%] index_select skip256 : Elapsed 0.061 ms (6.082 ms / 100) 6.157 -> 6.117 ( -0.65%) [ +0.00% +0.03% +0.10% / +0.18% -0.65% -0.65%] index_select spread : Elapsed 0.062 ms (6.157 ms / 100) 6.154 -> 6.113 ( -0.67%) [ +0.00% +0.02% +0.24% / +0.26% -0.62% -0.67%] index_select strided 3 : Elapsed 0.062 ms (6.154 ms / 100) 6.133 -> 6.092 ( -0.67%) [ +0.00% +0.05% +0.03% / +0.05% -0.64% -0.67%] index_select random : Elapsed 0.061 ms (6.133 ms / 100) 6.133 -> 6.096 ( -0.60%) [ +0.00% +0.08% +0.24% / +0.11% -0.60% -0.57%] index_select random_sorted : Elapsed 0.061 ms (6.133 ms / 100) out_shape = [5, 4, 16, 40] in_shape = [20, 4, 16, 40] idx_dim = 0 B = [5, 4, 16, 40] (stride (2560, 640, 1, 16)) A = [20, 4, 16, 40] (stride (1, 12800, 20, 320)) dim = 0 1.914 -> 1.916 ( +0.10%) [ +0.00% +0.16% +0.10% / +0.10% +0.47% +0.37%] index_select const : Elapsed 0.019 ms (1.914 ms / 100) 1.923 -> 1.924 ( +0.05%) [ +0.16% +0.16% +0.00% / +0.05% +0.57% +0.62%] index_select wrap : Elapsed 0.019 ms (1.926 ms / 100) 1.923 -> 1.926 ( +0.16%) [ +0.00% +0.21% +0.00% / +0.16% +0.47% +0.36%] index_select linear : Elapsed 0.019 ms (1.923 ms / 100) 1.921 -> 1.927 ( +0.31%) [ +0.52% +0.00% +0.16% / +0.31% +0.83% +0.68%] index_select reverse : Elapsed 0.019 ms (1.931 ms / 100) 1.914 -> 1.919 ( +0.26%) [ +0.26% +0.00% +0.00% / +0.26% +0.57% +0.63%] index_select skip64 : Elapsed 0.019 ms (1.919 ms / 100) 1.915 -> 1.919 ( +0.21%) [ +0.05% +0.00% +0.10% / +0.21% +0.31% +0.52%] index_select skip256 : Elapsed 0.019 ms (1.916 ms / 100) 1.949 -> 1.950 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.31% +0.67%] index_select spread : Elapsed 0.020 ms (1.950 ms / 100) 1.944 -> 1.945 ( +0.05%) [ +0.26% +0.00% +0.00% / +0.05% +0.72% +0.62%] index_select strided 3 : Elapsed 0.019 ms (1.949 ms / 100) 1.950 -> 1.953 ( +0.15%) [ +0.05% +0.21% +0.00% / +0.15% +0.51% +0.41%] index_select strided 5 : Elapsed 0.020 ms (1.951 ms / 100) 1.943 -> 1.940 ( -0.15%) [ +0.00% +0.00% +0.15% / -0.15% +0.26% +0.26%] index_select strided 7 : Elapsed 0.019 ms (1.943 ms / 100) 1.945 -> 1.947 ( +0.10%) [ +0.05% +0.00% +0.10% / +0.10% +0.41% +0.41%] index_select strided 8 : Elapsed 0.019 ms (1.946 ms / 100) 1.949 -> 1.946 ( -0.15%) [ +0.31% +0.00% +0.10% / -0.15% +0.56% +0.36%] index_select strided 16 : Elapsed 0.020 ms (1.955 ms / 100) 1.945 -> 1.945 ( +0.00%) [ +0.15% +0.00% +0.21% / +0.00% +0.51% +0.72%] index_select random : Elapsed 0.019 ms (1.948 ms / 100) 1.938 -> 1.940 ( +0.10%) [ +0.21% +0.00% +0.00% / +0.10% +0.52% +0.46%] index_select random_sorted : Elapsed 0.019 ms (1.942 ms / 100) 1.941 -> 1.943 ( +0.10%) [ +0.00% +0.00% +0.15% / +0.10% +0.72% +0.36%] index_select perm : Elapsed 0.019 ms (1.941 ms / 100) 1.937 -> 1.939 ( +0.10%) [ +0.21% +0.15% +0.00% / +0.10% +0.26% +0.52%] index_select perm_sorted : Elapsed 0.019 ms (1.941 ms / 100) B = [5, 4, 16, 40] (stride (2560, 1, 160, 4)) A = [20, 4, 16, 40] (stride (4, 1, 3200, 80)) dim = 0 0.686 -> 0.689 ( +0.44%) [ +0.15% +0.00% +0.00% / +0.44% +0.58% +0.58%] index_select const : Elapsed 0.007 ms (0.687 ms / 100) 0.687 -> 0.687 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +1.16% +1.16%] index_select wrap : Elapsed 0.007 ms (0.687 ms / 100) 0.686 -> 0.687 ( +0.15%) [ +0.29% +0.29% +0.00% / +0.15% +1.31% +1.17%] index_select linear : Elapsed 0.007 ms (0.688 ms / 100) 0.686 -> 0.687 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.87% +1.17%] index_select reverse : Elapsed 0.007 ms (0.687 ms / 100) 0.686 -> 0.687 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +0.87% +1.02%] index_select skip64 : Elapsed 0.007 ms (0.687 ms / 100) 0.694 -> 0.688 ( -0.86%) [ +0.00% +0.00% +0.14% / -0.14% -0.86% -0.86%] index_select skip256 : Elapsed 0.007 ms (0.694 ms / 100) 0.692 -> 0.688 ( -0.58%) [ +0.29% +0.00% +0.00% / +0.14% +0.72% -0.58%] index_select spread : Elapsed 0.007 ms (0.694 ms / 100) 0.689 -> 0.689 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.15% +0.00% +0.15%] index_select strided 3 : Elapsed 0.007 ms (0.690 ms / 100) 0.686 -> 0.685 ( -0.15%) [ +0.29% +0.00% +0.15% / +0.15% -0.15% +0.15%] index_select strided 5 : Elapsed 0.007 ms (0.688 ms / 100) 0.684 -> 0.685 ( +0.15%) [ +0.00% +0.29% +0.00% / +0.15% +0.15% +0.15%] index_select strided 7 : Elapsed 0.007 ms (0.684 ms / 100) 0.679 -> 0.680 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.29% +0.29%] index_select strided 8 : Elapsed 0.007 ms (0.680 ms / 100) 0.689 -> 0.687 ( -0.29%) [ +0.15% +0.00% +0.15% / +0.15% -0.29% -0.29%] index_select strided 16 : Elapsed 0.007 ms (0.690 ms / 100) 0.691 -> 0.688 ( -0.43%) [ +0.00% +0.14% +0.00% / +0.14% -0.43% -0.43%] index_select random : Elapsed 0.007 ms (0.691 ms / 100) 0.685 -> 0.686 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +1.02% +1.02%] index_select random_sorted : Elapsed 0.007 ms (0.685 ms / 100) 0.677 -> 0.678 ( +0.15%) [ +0.30% +0.00% +0.30% / +0.15% +1.33% +1.33%] index_select perm : Elapsed 0.007 ms (0.679 ms / 100) 0.687 -> 0.688 ( +0.15%) [ +0.00% +0.00% +0.29% / +0.15% +1.60% +1.60%] index_select perm_sorted : Elapsed 0.007 ms (0.687 ms / 100) B = [5, 4, 16, 40] (stride (640, 3200, 1, 16)) A = [20, 4, 16, 40] (stride (1, 800, 3200, 20)) dim = 0 1.856 -> 1.861 ( +0.27%) [ +0.22% +0.22% +0.00% / +0.27% +0.86% +0.92%] index_select const : Elapsed 0.019 ms (1.860 ms / 100) 1.867 -> 1.867 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.91% +0.75%] index_select wrap : Elapsed 0.019 ms (1.867 ms / 100) 1.872 -> 1.873 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.85% +0.80%] index_select linear : Elapsed 0.019 ms (1.872 ms / 100) 1.864 -> 1.868 ( +0.21%) [ +0.11% +0.48% +0.00% / +0.21% +0.70% +0.75%] index_select reverse : Elapsed 0.019 ms (1.866 ms / 100) 1.858 -> 1.857 ( -0.05%) [ +0.22% +0.00% +0.05% / -0.05% +0.70% +0.81%] index_select skip64 : Elapsed 0.019 ms (1.862 ms / 100) 1.865 -> 1.870 ( +0.27%) [ +0.00% +0.00% +0.00% / +0.27% +0.54% +0.64%] index_select skip256 : Elapsed 0.019 ms (1.865 ms / 100) 1.889 -> 1.890 ( +0.05%) [ +0.05% +0.11% +0.00% / +0.05% +1.11% +1.06%] index_select spread : Elapsed 0.019 ms (1.890 ms / 100) 1.886 -> 1.890 ( +0.21%) [ +0.00% +0.00% +0.05% / +0.21% +0.95% +1.06%] index_select strided 3 : Elapsed 0.019 ms (1.886 ms / 100) 1.890 -> 1.890 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.58% +0.69%] index_select strided 5 : Elapsed 0.019 ms (1.890 ms / 100) 1.878 -> 1.877 ( -0.05%) [ +0.00% +0.05% +0.21% / -0.05% +1.01% +1.06%] index_select strided 7 : Elapsed 0.019 ms (1.878 ms / 100) 1.877 -> 1.876 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.75% +0.59%] index_select strided 8 : Elapsed 0.019 ms (1.877 ms / 100) 1.896 -> 1.897 ( +0.05%) [ +0.16% +0.21% +0.00% / +0.05% +0.74% +0.74%] index_select strided 16 : Elapsed 0.019 ms (1.899 ms / 100) 1.880 -> 1.876 ( -0.21%) [ +0.00% +0.00% +0.05% / -0.21% +1.01% +0.80%] index_select random : Elapsed 0.019 ms (1.880 ms / 100) 1.875 -> 1.877 ( +0.11%) [ +0.11% +0.05% +0.00% / +0.11% +0.80% +0.96%] index_select random_sorted : Elapsed 0.019 ms (1.877 ms / 100) 1.889 -> 1.888 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.85% +0.90%] index_select perm : Elapsed 0.019 ms (1.889 ms / 100) 1.885 -> 1.890 ( +0.27%) [ +0.21% +0.00% +0.21% / +0.27% +1.06% +1.43%] index_select perm_sorted : Elapsed 0.019 ms (1.889 ms / 100) B = [5, 4, 16, 40] (stride (1, 3200, 5, 80)) A = [20, 4, 16, 40] (stride (2560, 640, 1, 16)) dim = 0 1.818 -> 1.812 ( -0.33%) [ +0.22% +0.06% +0.00% / +0.06% -0.11% -0.33%] index_select const : Elapsed 0.018 ms (1.822 ms / 100) 1.812 -> 1.814 ( +0.11%) [ +0.06% +0.00% +0.00% / +0.11% +0.17% +0.28%] index_select wrap : Elapsed 0.018 ms (1.813 ms / 100) 1.815 -> 1.817 ( +0.11%) [ +0.17% +0.17% +0.00% / +0.11% +0.17% +0.22%] index_select linear : Elapsed 0.018 ms (1.818 ms / 100) 1.819 -> 1.820 ( +0.05%) [ +0.16% +0.00% +0.05% / +0.05% +0.38% +0.44%] index_select reverse : Elapsed 0.018 ms (1.822 ms / 100) 1.822 -> 1.817 ( -0.27%) [ +0.11% +0.00% +0.00% / +0.16% -0.27% -0.05%] index_select skip64 : Elapsed 0.018 ms (1.824 ms / 100) 1.817 -> 1.816 ( -0.06%) [ +0.00% +0.22% +0.39% / -0.06% +0.11% +0.17%] index_select skip256 : Elapsed 0.018 ms (1.817 ms / 100) 1.813 -> 1.813 ( +0.00%) [ +0.00% +0.17% +0.11% / +0.00% +0.33% +0.39%] index_select spread : Elapsed 0.018 ms (1.813 ms / 100) 1.820 -> 1.822 ( +0.11%) [ +0.00% +0.33% +0.22% / +0.22% +0.11% +0.22%] index_select strided 3 : Elapsed 0.018 ms (1.820 ms / 100) 1.819 -> 1.822 ( +0.16%) [ +0.16% +0.27% +0.00% / +0.16% +0.44% +0.33%] index_select strided 5 : Elapsed 0.018 ms (1.822 ms / 100) 1.816 -> 1.816 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.17% +0.11%] index_select strided 7 : Elapsed 0.018 ms (1.817 ms / 100) 1.817 -> 1.819 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.55% +0.44%] index_select strided 8 : Elapsed 0.018 ms (1.819 ms / 100) 1.820 -> 1.820 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +0.22% +0.49%] index_select strided 16 : Elapsed 0.018 ms (1.824 ms / 100) 1.818 -> 1.819 ( +0.06%) [ +0.22% +0.00% +0.11% / +0.06% +0.22% +0.17%] index_select random : Elapsed 0.018 ms (1.822 ms / 100) 1.814 -> 1.819 ( +0.28%) [ +0.33% +0.28% +0.00% / +0.28% +0.44% +0.50%] index_select random_sorted : Elapsed 0.018 ms (1.820 ms / 100) 1.813 -> 1.813 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.17%] index_select perm : Elapsed 0.018 ms (1.813 ms / 100) 1.815 -> 1.818 ( +0.17%) [ +0.28% +0.00% +0.11% / +0.17% +0.39% +0.50%] index_select perm_sorted : Elapsed 0.018 ms (1.820 ms / 100) B = [5, 4, 16, 40] (stride (1, 3200, 5, 80)) A = [20, 4, 16, 40] (stride (2560, 1, 4, 64)) dim = 0 1.888 -> 1.893 ( +0.26%) [ +0.21% +0.32% +0.00% / +0.48% +0.32% +0.26%] index_select const : Elapsed 0.019 ms (1.892 ms / 100) 1.887 -> 1.889 ( +0.11%) [ +0.16% +0.21% +0.00% / +0.11% +0.32% +0.42%] index_select wrap : Elapsed 0.019 ms (1.890 ms / 100) 1.890 -> 1.892 ( +0.11%) [ +0.00% +0.26% +0.21% / +0.16% +0.11% +0.16%] index_select linear : Elapsed 0.019 ms (1.890 ms / 100) 1.890 -> 1.888 ( -0.11%) [ +0.05% +0.05% +0.00% / -0.11% +0.26% +0.21%] index_select reverse : Elapsed 0.019 ms (1.891 ms / 100) 1.892 -> 1.894 ( +0.11%) [ +0.00% +0.16% +0.00% / +0.21% +0.11% +0.21%] index_select skip64 : Elapsed 0.019 ms (1.892 ms / 100) 1.894 -> 1.893 ( -0.05%) [ +0.00% +0.00% +0.11% / -0.05% +0.21% +0.00%] index_select skip256 : Elapsed 0.019 ms (1.894 ms / 100) 1.896 -> 1.898 ( +0.11%) [ +0.00% +0.16% +0.32% / +0.16% +0.21% +0.11%] index_select spread : Elapsed 0.019 ms (1.896 ms / 100) 1.889 -> 1.888 ( -0.05%) [ +0.11% +0.00% +0.11% / -0.05% +0.37% +0.32%] index_select strided 3 : Elapsed 0.019 ms (1.891 ms / 100) 1.892 -> 1.892 ( +0.00%) [ +0.00% +0.11% +0.16% / +0.00% +0.21% +0.05%] index_select strided 5 : Elapsed 0.019 ms (1.892 ms / 100) 1.893 -> 1.894 ( +0.05%) [ +0.26% +0.11% +0.00% / +0.11% +0.05% +0.11%] index_select strided 7 : Elapsed 0.019 ms (1.898 ms / 100) 1.896 -> 1.897 ( +0.05%) [ +0.42% +0.05% +0.00% / +0.11% +0.05% +0.11%] index_select strided 8 : Elapsed 0.019 ms (1.904 ms / 100) 1.897 -> 1.896 ( -0.05%) [ +0.26% +0.00% +0.16% / +0.26% +0.11% -0.05%] index_select strided 16 : Elapsed 0.019 ms (1.902 ms / 100) 1.894 -> 1.892 ( -0.11%) [ +0.00% +0.05% +0.00% / -0.11% -0.11% +0.00%] index_select random : Elapsed 0.019 ms (1.894 ms / 100) 1.893 -> 1.894 ( +0.05%) [ +0.42% +0.37% +0.00% / +0.05% +0.21% +0.26%] index_select random_sorted : Elapsed 0.019 ms (1.901 ms / 100) 1.897 -> 1.893 ( -0.21%) [ +0.05% +0.00% +0.05% / +0.11% -0.21% -0.16%] index_select perm : Elapsed 0.019 ms (1.898 ms / 100) 1.895 -> 1.890 ( -0.26%) [ +0.21% +0.16% +0.00% / +0.05% -0.26% -0.21%] index_select perm_sorted : Elapsed 0.019 ms (1.899 ms / 100) B = [5, 4, 16, 40] (stride (1, 200, 800, 5)) A = [20, 4, 16, 40] (stride (1, 20, 3200, 80)) dim = 0 1.812 -> 1.807 ( -0.28%) [ +0.00% +0.17% +0.28% / +0.17% -0.28% +0.06%] index_select const : Elapsed 0.018 ms (1.812 ms / 100) 1.811 -> 1.811 ( +0.00%) [ +0.00% +0.28% +0.06% / +0.00% +0.83% +0.66%] index_select wrap : Elapsed 0.018 ms (1.811 ms / 100) 1.811 -> 1.812 ( +0.06%) [ +0.22% +0.33% +0.00% / +0.06% +0.66% +0.66%] index_select linear : Elapsed 0.018 ms (1.815 ms / 100) 1.813 -> 1.816 ( +0.17%) [ +0.22% +0.22% +0.00% / +0.17% +0.50% +0.39%] index_select reverse : Elapsed 0.018 ms (1.817 ms / 100) 1.805 -> 1.812 ( +0.39%) [ +0.33% +0.39% +0.00% / +0.39% +0.55% +0.55%] index_select skip64 : Elapsed 0.018 ms (1.811 ms / 100) 1.808 -> 1.812 ( +0.22%) [ +0.11% +0.11% +0.00% / +0.22% +0.50% +0.66%] index_select skip256 : Elapsed 0.018 ms (1.810 ms / 100) 1.822 -> 1.829 ( +0.38%) [ +0.16% +0.00% +0.11% / +0.38% +0.49% +0.60%] index_select spread : Elapsed 0.018 ms (1.825 ms / 100) 1.822 -> 1.822 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.55% +0.55%] index_select strided 3 : Elapsed 0.018 ms (1.822 ms / 100) 1.822 -> 1.823 ( +0.05%) [ +0.11% +0.00% +0.00% / +0.05% +0.55% +0.55%] index_select strided 5 : Elapsed 0.018 ms (1.824 ms / 100) 1.821 -> 1.820 ( -0.05%) [ +0.11% +0.11% +0.00% / -0.05% +0.44% +0.49%] index_select strided 7 : Elapsed 0.018 ms (1.823 ms / 100) 1.823 -> 1.826 ( +0.16%) [ +0.00% +0.33% +0.00% / +0.27% +0.27% +0.16%] index_select strided 8 : Elapsed 0.018 ms (1.823 ms / 100) 1.820 -> 1.822 ( +0.11%) [ +0.05% +0.16% +0.00% / +0.11% +0.60% +0.55%] index_select strided 16 : Elapsed 0.018 ms (1.821 ms / 100) 1.829 -> 1.830 ( +0.05%) [ +0.27% +0.00% +0.27% / +0.05% +0.27% +0.22%] index_select random : Elapsed 0.018 ms (1.834 ms / 100) 1.821 -> 1.825 ( +0.22%) [ +0.22% +0.00% +0.05% / +0.22% +0.33% +0.27%] index_select random_sorted : Elapsed 0.018 ms (1.825 ms / 100) 1.817 -> 1.819 ( +0.11%) [ +0.28% +0.00% +0.11% / +0.11% +0.22% +0.33%] index_select perm : Elapsed 0.018 ms (1.822 ms / 100) 1.818 -> 1.821 ( +0.17%) [ +0.17% +0.00% +0.06% / +0.22% +0.17% +0.22%] index_select perm_sorted : Elapsed 0.018 ms (1.821 ms / 100) B = [5, 4, 16, 40] (stride (4, 1, 800, 20)) A = [20, 4, 16, 40] (stride (2560, 16, 1, 64)) dim = 0 0.694 -> 0.694 ( +0.00%) [ +0.14% +0.00% +0.58% / +0.00% +1.01% +1.01%] index_select const : Elapsed 0.007 ms (0.695 ms / 100) 0.701 -> 0.696 ( -0.71%) [ +0.14% +0.14% +0.00% / +0.00% -0.57% -0.71%] index_select wrap : Elapsed 0.007 ms (0.702 ms / 100) 0.701 -> 0.696 ( -0.71%) [ +0.14% +0.14% +0.00% / +0.14% -0.57% -0.71%] index_select linear : Elapsed 0.007 ms (0.702 ms / 100) 0.697 -> 0.698 ( +0.14%) [ +0.29% +0.00% +0.00% / +0.43% +0.14% +0.14%] index_select reverse : Elapsed 0.007 ms (0.699 ms / 100) 0.695 -> 0.696 ( +0.14%) [ +0.43% +0.29% +0.00% / +0.14% +0.43% +0.43%] index_select skip64 : Elapsed 0.007 ms (0.698 ms / 100) 0.695 -> 0.695 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.86% +1.01%] index_select skip256 : Elapsed 0.007 ms (0.695 ms / 100) 0.696 -> 0.695 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +1.01% +1.15%] index_select spread : Elapsed 0.007 ms (0.696 ms / 100) 0.698 -> 0.700 ( +0.29%) [ +0.00% +0.29% +0.14% / +0.43% +0.29% +0.43%] index_select strided 3 : Elapsed 0.007 ms (0.698 ms / 100) 0.697 -> 0.697 ( +0.00%) [ +0.00% +0.29% +0.00% / +0.00% +0.57% +0.29%] index_select strided 5 : Elapsed 0.007 ms (0.697 ms / 100) 0.695 -> 0.699 ( +0.58%) [ +0.29% +0.43% +0.00% / +0.58% +0.58% +0.86%] index_select strided 7 : Elapsed 0.007 ms (0.697 ms / 100) 0.698 -> 0.698 ( +0.00%) [ +0.29% +1.15% +0.00% / +0.00% +0.00% +0.57%] index_select strided 8 : Elapsed 0.007 ms (0.700 ms / 100) 0.701 -> 0.696 ( -0.71%) [ +0.14% +0.14% +0.00% / +0.00% -0.71% -0.71%] index_select strided 16 : Elapsed 0.007 ms (0.702 ms / 100) 0.704 -> 0.702 ( -0.28%) [ +0.00% +0.00% +0.00% / +0.28% -0.28% +0.00%] index_select random : Elapsed 0.007 ms (0.704 ms / 100) 0.700 -> 0.697 ( -0.43%) [ +0.00% +0.29% +0.14% / -0.43% +0.14% -0.14%] index_select random_sorted : Elapsed 0.007 ms (0.700 ms / 100) 0.699 -> 0.700 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.57% +0.43%] index_select perm : Elapsed 0.007 ms (0.699 ms / 100) 0.696 -> 0.696 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.86% +0.86%] index_select perm_sorted : Elapsed 0.007 ms (0.697 ms / 100) B = [5, 4, 16, 40] (stride (64, 1, 4, 320)) A = [20, 4, 16, 40] (stride (640, 12800, 40, 1)) dim = 0 1.687 -> 1.674 ( -0.77%) [ +0.00% +0.06% +0.18% / +0.00% -0.59% -0.77%] index_select const : Elapsed 0.017 ms (1.687 ms / 100) 1.702 -> 1.688 ( -0.82%) [ +0.00% +0.06% +0.12% / +0.12% -0.82% -0.76%] index_select wrap : Elapsed 0.017 ms (1.702 ms / 100) 1.703 -> 1.687 ( -0.94%) [ +0.12% +0.18% +0.00% / +0.23% -0.94% -0.59%] index_select linear : Elapsed 0.017 ms (1.705 ms / 100) 1.701 -> 1.688 ( -0.76%) [ +0.18% +0.00% +0.00% / -0.12% -0.53% -0.76%] index_select reverse : Elapsed 0.017 ms (1.704 ms / 100) 1.678 -> 1.674 ( -0.24%) [ +0.00% +0.06% +0.00% / +0.00% -0.24% -0.06%] index_select skip64 : Elapsed 0.017 ms (1.678 ms / 100) 1.678 -> 1.674 ( -0.24%) [ +0.12% +0.12% +0.00% / -0.18% -0.24% +0.00%] index_select skip256 : Elapsed 0.017 ms (1.680 ms / 100) 1.699 -> 1.677 ( -1.29%) [ +0.00% +0.06% +0.06% / -0.18% -1.06% -1.29%] index_select spread : Elapsed 0.017 ms (1.699 ms / 100) 1.689 -> 1.687 ( -0.12%) [ +0.24% +0.24% +0.00% / +0.12% -0.06% -0.12%] index_select strided 3 : Elapsed 0.017 ms (1.693 ms / 100) 1.679 -> 1.681 ( +0.12%) [ +0.00% +0.06% +0.00% / +0.12% +0.66% +0.66%] index_select strided 5 : Elapsed 0.017 ms (1.679 ms / 100) 1.692 -> 1.697 ( +0.30%) [ +0.18% +0.00% +0.06% / +0.30% +0.41% +0.59%] index_select strided 7 : Elapsed 0.017 ms (1.695 ms / 100) 1.695 -> 1.678 ( -1.00%) [ +0.18% +0.18% +0.00% / +0.18% -1.00% -1.00%] index_select strided 8 : Elapsed 0.017 ms (1.698 ms / 100) 1.698 -> 1.680 ( -1.06%) [ +0.00% +0.00% +0.00% / +0.12% -1.06% -1.06%] index_select strided 16 : Elapsed 0.017 ms (1.698 ms / 100) 1.683 -> 1.682 ( -0.06%) [ +0.30% +0.00% +0.06% / +0.00% +0.24% -0.06%] index_select random : Elapsed 0.017 ms (1.688 ms / 100) 1.686 -> 1.685 ( -0.06%) [ +0.00% +0.00% +0.12% / -0.06% +0.06% +0.06%] index_select random_sorted : Elapsed 0.017 ms (1.686 ms / 100) 1.682 -> 1.685 ( +0.18%) [ +0.00% +0.12% +0.00% / +0.18% +0.95% +0.65%] index_select perm : Elapsed 0.017 ms (1.682 ms / 100) 1.680 -> 1.682 ( +0.12%) [ +0.30% +0.18% +0.00% / +0.12% +0.95% +0.95%] index_select perm_sorted : Elapsed 0.017 ms (1.685 ms / 100) B = [5, 4, 16, 40] (stride (4, 1, 20, 320)) A = [20, 4, 16, 40] (stride (160, 40, 3200, 1)) dim = 0 1.782 -> 1.785 ( +0.17%) [ +0.06% +0.00% +0.34% / +0.17% +0.73% +0.67%] index_select const : Elapsed 0.018 ms (1.783 ms / 100) 1.804 -> 1.806 ( +0.11%) [ +0.00% +0.17% +0.22% / +0.11% +1.83% +1.50%] index_select wrap : Elapsed 0.018 ms (1.804 ms / 100) 1.803 -> 1.806 ( +0.17%) [ +0.11% +0.22% +0.00% / +0.17% +1.83% +1.77%] index_select linear : Elapsed 0.018 ms (1.805 ms / 100) 1.809 -> 1.807 ( -0.11%) [ +0.00% +0.06% +0.17% / -0.11% +1.55% +1.71%] index_select reverse : Elapsed 0.018 ms (1.809 ms / 100) 1.781 -> 1.780 ( -0.06%) [ +0.00% +0.17% +0.11% / -0.06% +0.67% +0.73%] index_select skip64 : Elapsed 0.018 ms (1.781 ms / 100) 1.782 -> 1.781 ( -0.06%) [ +0.00% +0.17% +0.06% / -0.06% +0.73% +0.73%] index_select skip256 : Elapsed 0.018 ms (1.782 ms / 100) 1.806 -> 1.808 ( +0.11%) [ +0.17% +0.06% +0.00% / +0.11% +0.66% +0.78%] index_select spread : Elapsed 0.018 ms (1.809 ms / 100) 1.815 -> 1.817 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +0.50% +0.17%] index_select strided 3 : Elapsed 0.018 ms (1.817 ms / 100) 1.796 -> 1.801 ( +0.28%) [ +0.00% +0.28% +0.11% / +0.28% +0.84% +0.95%] index_select strided 5 : Elapsed 0.018 ms (1.796 ms / 100) 1.804 -> 1.812 ( +0.44%) [ +0.17% +0.00% +0.17% / +0.44% +0.67% +0.83%] index_select strided 7 : Elapsed 0.018 ms (1.807 ms / 100) 1.808 -> 1.811 ( +0.17%) [ +0.06% +0.00% +0.00% / +0.17% +0.72% +0.77%] index_select strided 8 : Elapsed 0.018 ms (1.809 ms / 100) 1.806 -> 1.812 ( +0.33%) [ +0.17% +0.00% +0.39% / +0.33% +0.78% +0.66%] index_select strided 16 : Elapsed 0.018 ms (1.809 ms / 100) 1.803 -> 1.804 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.50% +0.83%] index_select random : Elapsed 0.018 ms (1.803 ms / 100) 1.797 -> 1.798 ( +0.06%) [ +0.28% +0.39% +0.00% / +0.06% +1.06% +0.95%] index_select random_sorted : Elapsed 0.018 ms (1.802 ms / 100) 1.806 -> 1.807 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +1.22% +0.89%] index_select perm : Elapsed 0.018 ms (1.807 ms / 100) 1.805 -> 1.809 ( +0.22%) [ +0.06% +0.06% +0.00% / +0.22% +1.11% +0.94%] index_select perm_sorted : Elapsed 0.018 ms (1.806 ms / 100) out_shape = [20, 5, 16, 40] in_shape = [20, 4, 16, 40] idx_dim = 1 B = [20, 5, 16, 40] (stride (640, 12800, 1, 16)) A = [20, 4, 16, 40] (stride (16, 12800, 1, 320)) dim = 1 5.815 -> 5.788 ( -0.46%) [ +0.05% +0.14% +0.00% / +0.10% -0.36% -0.46%] index_add_ linear : Elapsed 0.058 ms (5.818 ms / 100) 5.775 -> 5.733 ( -0.73%) [ +0.00% +0.03% +0.02% / -0.03% -0.73% -0.64%] index_copy_ linear : Elapsed 0.058 ms (5.775 ms / 100) 5.805 -> 5.784 ( -0.36%) [ +0.00% +0.22% +0.09% / +0.09% -0.26% -0.36%] index_add_ reverse : Elapsed 0.058 ms (5.805 ms / 100) 5.767 -> 5.744 ( -0.40%) [ +0.09% +0.05% +0.00% / +0.17% -0.40% -0.36%] index_copy_ reverse : Elapsed 0.058 ms (5.772 ms / 100) 5.816 -> 5.789 ( -0.46%) [ +0.02% +0.07% +0.00% / +0.02% -0.46% -0.36%] index_add_ spread : Elapsed 0.058 ms (5.817 ms / 100) 5.774 -> 5.740 ( -0.59%) [ +0.00% +0.03% +0.02% / +0.14% -0.59% -0.54%] index_copy_ spread : Elapsed 0.058 ms (5.774 ms / 100) 5.797 -> 5.777 ( -0.35%) [ +0.00% +0.10% +0.00% / +0.02% -0.35% -0.33%] index_add_ strided 3 : Elapsed 0.058 ms (5.797 ms / 100) 5.749 -> 5.722 ( -0.47%) [ +0.00% +0.02% +0.10% / +0.24% -0.47% -0.42%] index_copy_ strided 3 : Elapsed 0.057 ms (5.749 ms / 100) 5.790 -> 5.763 ( -0.47%) [ +0.00% +0.00% +0.07% / +0.16% -0.38% -0.47%] index_add_ perm : Elapsed 0.058 ms (5.790 ms / 100) 5.756 -> 5.728 ( -0.49%) [ +0.17% +0.00% +0.24% / +0.12% -0.49% -0.31%] index_copy_ perm : Elapsed 0.058 ms (5.766 ms / 100) 5.814 -> 5.792 ( -0.38%) [ +0.00% +0.14% +0.26% / +0.12% -0.33% -0.38%] index_add_ perm_sorted : Elapsed 0.058 ms (5.814 ms / 100) 5.772 -> 5.740 ( -0.55%) [ +0.07% +0.00% +0.05% / +0.12% -0.55% -0.49%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.776 ms / 100) 6.064 -> 6.064 ( +0.00%) [ +0.00% +0.08% +0.23% / +0.12% +0.00% +0.02%] index_select const : Elapsed 0.061 ms (6.064 ms / 100) 6.142 -> 6.111 ( -0.50%) [ +0.02% +0.00% +0.08% / +0.13% -0.46% -0.50%] index_select wrap : Elapsed 0.061 ms (6.143 ms / 100) 6.136 -> 6.108 ( -0.46%) [ +0.08% +0.08% +0.00% / +0.08% -0.42% -0.46%] index_select linear : Elapsed 0.061 ms (6.141 ms / 100) 6.148 -> 6.109 ( -0.63%) [ +0.00% +0.03% +0.03% / +0.16% -0.63% -0.57%] index_select reverse : Elapsed 0.061 ms (6.148 ms / 100) 6.069 -> 6.066 ( -0.05%) [ +0.00% +0.13% +0.08% / +0.20% -0.05% -0.03%] index_select skip64 : Elapsed 0.061 ms (6.069 ms / 100) 6.071 -> 6.065 ( -0.10%) [ +0.05% +0.03% +0.00% / +0.07% -0.10% -0.03%] index_select skip256 : Elapsed 0.061 ms (6.074 ms / 100) 6.143 -> 6.105 ( -0.62%) [ +0.02% +0.00% +0.11% / +0.08% -0.57% -0.62%] index_select spread : Elapsed 0.061 ms (6.144 ms / 100) 6.141 -> 6.098 ( -0.70%) [ +0.02% +0.00% +0.08% / +0.20% -0.70% -0.65%] index_select strided 3 : Elapsed 0.061 ms (6.142 ms / 100) 6.129 -> 6.092 ( -0.60%) [ +0.11% +0.00% +0.16% / +0.28% -0.60% -0.54%] index_select random : Elapsed 0.061 ms (6.136 ms / 100) 6.136 -> 6.098 ( -0.62%) [ +0.00% +0.05% +0.24% / +0.08% -0.46% -0.62%] index_select random_sorted : Elapsed 0.061 ms (6.136 ms / 100) B = [20, 5, 16, 40] (stride (1, 12800, 800, 20)) A = [20, 4, 16, 40] (stride (2560, 640, 40, 1)) dim = 1 3.422 -> 3.423 ( +0.03%) [ +0.06% +0.06% +0.00% / +0.03% +0.73% +0.85%] index_add_ linear : Elapsed 0.034 ms (3.424 ms / 100) 3.338 -> 3.342 ( +0.12%) [ +0.09% +0.15% +0.00% / +0.12% +0.75% +0.93%] index_copy_ linear : Elapsed 0.033 ms (3.341 ms / 100) 3.421 -> 3.427 ( +0.18%) [ +0.09% +0.20% +0.00% / +0.18% +1.02% +1.08%] index_add_ reverse : Elapsed 0.034 ms (3.424 ms / 100) 3.337 -> 3.339 ( +0.06%) [ +0.00% +0.12% +0.09% / +0.06% +0.66% +0.81%] index_copy_ reverse : Elapsed 0.033 ms (3.337 ms / 100) 3.418 -> 3.421 ( +0.09%) [ +0.20% +0.06% +0.00% / +0.09% +0.85% +0.79%] index_add_ spread : Elapsed 0.034 ms (3.425 ms / 100) 3.338 -> 3.335 ( -0.09%) [ +0.15% +0.06% +0.00% / -0.09% +0.81% +0.51%] index_copy_ spread : Elapsed 0.033 ms (3.343 ms / 100) 3.429 -> 3.426 ( -0.09%) [ +0.00% +0.12% +0.23% / -0.09% +0.61% +0.67%] index_add_ strided 3 : Elapsed 0.034 ms (3.429 ms / 100) 3.342 -> 3.344 ( +0.06%) [ +0.00% +0.18% +0.15% / +0.06% +0.75% +0.75%] index_copy_ strided 3 : Elapsed 0.033 ms (3.342 ms / 100) 3.426 -> 3.424 ( -0.06%) [ +0.00% +0.12% +0.06% / -0.06% +0.93% +0.85%] index_add_ perm : Elapsed 0.034 ms (3.426 ms / 100) 3.341 -> 3.344 ( +0.09%) [ +0.00% +0.21% +0.21% / +0.09% +0.42% +0.54%] index_copy_ perm : Elapsed 0.033 ms (3.341 ms / 100) 3.417 -> 3.421 ( +0.12%) [ +0.00% +0.12% +0.29% / +0.12% +1.32% +1.02%] index_add_ perm_sorted : Elapsed 0.034 ms (3.417 ms / 100) 3.335 -> 3.342 ( +0.21%) [ +0.09% +0.00% +0.15% / +0.21% +0.81% +0.90%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.338 ms / 100) 3.371 -> 3.371 ( +0.00%) [ +0.00% +0.09% +0.12% / +0.00% +0.77% +0.89%] index_select const : Elapsed 0.034 ms (3.371 ms / 100) 3.445 -> 3.452 ( +0.20%) [ +0.06% +0.15% +0.00% / +0.20% +0.70% +0.58%] index_select wrap : Elapsed 0.034 ms (3.447 ms / 100) 3.449 -> 3.438 ( -0.32%) [ +0.00% +0.03% +0.00% / -0.32% +0.41% +0.29%] index_select linear : Elapsed 0.034 ms (3.449 ms / 100) 3.464 -> 3.455 ( -0.26%) [ +0.03% +0.00% +0.00% / +0.03% -0.23% -0.26%] index_select reverse : Elapsed 0.035 ms (3.465 ms / 100) 3.367 -> 3.372 ( +0.15%) [ +0.03% +0.00% +0.12% / +0.15% +0.77% +0.80%] index_select skip64 : Elapsed 0.034 ms (3.368 ms / 100) 3.371 -> 3.379 ( +0.24%) [ +0.00% +0.33% +0.00% / +0.24% +0.83% +0.80%] index_select skip256 : Elapsed 0.034 ms (3.371 ms / 100) 3.433 -> 3.445 ( +0.35%) [ +0.12% +0.00% +0.12% / +0.35% +1.19% +1.05%] index_select spread : Elapsed 0.034 ms (3.437 ms / 100) 3.451 -> 3.452 ( +0.03%) [ +0.06% +0.06% +0.00% / +0.03% +0.55% +0.58%] index_select strided 3 : Elapsed 0.035 ms (3.453 ms / 100) 3.454 -> 3.454 ( +0.00%) [ +0.12% +0.00% +0.03% / +0.00% +0.46% +0.46%] index_select random : Elapsed 0.035 ms (3.458 ms / 100) 3.441 -> 3.443 ( +0.06%) [ +0.00% +0.00% +0.09% / +0.06% +0.93% +0.64%] index_select random_sorted : Elapsed 0.034 ms (3.441 ms / 100) B = [20, 5, 16, 40] (stride (80, 1, 5, 1600)) dim = 1 fill_cnt = 4 3.708 -> 3.706 ( -0.05%) [ +0.16% +0.11% +0.00% / +0.00% -0.05% +0.19%] index_fill_ const : Elapsed 0.037 ms (3.714 ms / 100) 3.710 -> 3.708 ( -0.05%) [ +0.00% +0.05% +0.00% / +0.08% -0.05% +0.03%] index_fill_ linear : Elapsed 0.037 ms (3.710 ms / 100) 3.691 -> 3.695 ( +0.11%) [ +0.03% +0.08% +0.00% / +0.11% +0.35% +0.33%] index_fill_ reverse : Elapsed 0.037 ms (3.692 ms / 100) 3.690 -> 3.696 ( +0.16%) [ +0.19% +0.03% +0.00% / +0.30% +0.16% +0.19%] index_fill_ skip64 : Elapsed 0.037 ms (3.697 ms / 100) 3.691 -> 3.696 ( +0.14%) [ +0.14% +0.05% +0.00% / +0.14% +0.24% +0.41%] index_fill_ skip256 : Elapsed 0.037 ms (3.696 ms / 100) 3.695 -> 3.694 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.08% +0.11%] index_fill_ spread : Elapsed 0.037 ms (3.698 ms / 100) 3.707 -> 3.711 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.11% +0.11% +0.19%] index_fill_ strided 3 : Elapsed 0.037 ms (3.711 ms / 100) 3.694 -> 3.699 ( +0.14%) [ +0.19% +0.27% +0.00% / +0.16% +0.14% +0.14%] index_fill_ random : Elapsed 0.037 ms (3.701 ms / 100) 3.691 -> 3.698 ( +0.19%) [ +0.30% +0.14% +0.00% / +0.19% +0.43% +0.30%] index_fill_ random_sorted : Elapsed 0.037 ms (3.702 ms / 100) 3.694 -> 3.689 ( -0.14%) [ +0.03% +0.00% +0.05% / -0.14% +0.24% +0.14%] index_fill_ perm : Elapsed 0.037 ms (3.695 ms / 100) 3.688 -> 3.698 ( +0.27%) [ +0.27% +0.00% +0.00% / +0.27% +0.27% +0.33%] index_fill_ perm_sorted : Elapsed 0.037 ms (3.698 ms / 100) out_shape = [20, 4, 5, 40] in_shape = [20, 4, 16, 40] idx_dim = 2 B = [20, 4, 5, 40] (stride (40, 4000, 800, 1)) A = [20, 4, 16, 40] (stride (2560, 40, 160, 1)) dim = 2 2.207 -> 2.208 ( +0.05%) [ +0.00% +0.05% +0.14% / +0.05% +0.54% +0.63%] index_select const : Elapsed 0.022 ms (2.207 ms / 100) 2.261 -> 2.269 ( +0.35%) [ +0.18% +0.27% +0.00% / +0.35% +1.19% +1.11%] index_select wrap : Elapsed 0.023 ms (2.265 ms / 100) 2.272 -> 2.275 ( +0.13%) [ +0.13% +0.44% +0.00% / +0.13% +1.19% +1.01%] index_select linear : Elapsed 0.023 ms (2.275 ms / 100) 2.262 -> 2.265 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +1.24% +1.19%] index_select reverse : Elapsed 0.023 ms (2.265 ms / 100) 2.207 -> 2.206 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.54% +0.23%] index_select skip64 : Elapsed 0.022 ms (2.207 ms / 100) 2.205 -> 2.208 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.59% +0.50%] index_select skip256 : Elapsed 0.022 ms (2.208 ms / 100) 2.263 -> 2.261 ( -0.09%) [ +0.04% +0.04% +0.00% / -0.09% +1.02% +1.10%] index_select spread : Elapsed 0.023 ms (2.264 ms / 100) 2.274 -> 2.273 ( -0.04%) [ +0.09% +0.00% +0.04% / -0.04% +0.44% +0.53%] index_select strided 3 : Elapsed 0.023 ms (2.276 ms / 100) 2.277 -> 2.279 ( +0.09%) [ +0.18% +0.04% +0.00% / +0.09% +0.83% +0.61%] index_select strided 5 : Elapsed 0.023 ms (2.281 ms / 100) 2.256 -> 2.257 ( +0.04%) [ +0.04% +0.13% +0.00% / +0.04% +1.33% +1.64%] index_select strided 7 : Elapsed 0.023 ms (2.257 ms / 100) 2.219 -> 2.218 ( -0.05%) [ +0.14% +0.00% +0.05% / -0.05% +0.59% +0.41%] index_select strided 8 : Elapsed 0.022 ms (2.222 ms / 100) 2.238 -> 2.235 ( -0.13%) [ +0.18% +0.18% +0.00% / -0.13% +0.27% +0.31%] index_select random : Elapsed 0.022 ms (2.242 ms / 100) 2.235 -> 2.239 ( +0.18%) [ +0.00% +0.27% +0.22% / +0.18% +0.85% +0.81%] index_select random_sorted : Elapsed 0.022 ms (2.235 ms / 100) 2.264 -> 2.268 ( +0.18%) [ +0.04% +0.13% +0.00% / +0.18% +0.71% +0.62%] index_select perm : Elapsed 0.023 ms (2.265 ms / 100) 2.259 -> 2.266 ( +0.31%) [ +0.09% +0.22% +0.00% / +0.31% +0.75% +0.71%] index_select perm_sorted : Elapsed 0.023 ms (2.261 ms / 100) B = [20, 4, 5, 40] (stride (1, 4000, 800, 20)) A = [20, 4, 16, 40] (stride (640, 12800, 40, 1)) dim = 2 2.245 -> 2.244 ( -0.04%) [ +0.31% +0.00% +0.00% / -0.04% +0.53% +0.49%] index_select const : Elapsed 0.023 ms (2.252 ms / 100) 2.307 -> 2.310 ( +0.13%) [ +0.17% +0.00% +0.04% / +0.13% +0.26% +0.43%] index_select wrap : Elapsed 0.023 ms (2.311 ms / 100) 2.294 -> 2.297 ( +0.13%) [ +0.00% +0.17% +0.04% / +0.13% +0.48% +0.70%] index_select linear : Elapsed 0.023 ms (2.294 ms / 100) 2.297 -> 2.298 ( +0.04%) [ +0.09% +0.00% +0.04% / +0.04% +0.48% +0.39%] index_select reverse : Elapsed 0.023 ms (2.299 ms / 100) 2.234 -> 2.238 ( +0.18%) [ +0.18% +0.00% +0.18% / +0.18% +0.40% +0.76%] index_select skip64 : Elapsed 0.022 ms (2.238 ms / 100) 2.243 -> 2.243 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.40% +0.45%] index_select skip256 : Elapsed 0.022 ms (2.246 ms / 100) 2.297 -> 2.301 ( +0.17%) [ +0.04% +0.17% +0.00% / +0.17% +0.44% +0.44%] index_select spread : Elapsed 0.023 ms (2.298 ms / 100) 2.295 -> 2.302 ( +0.31%) [ +0.00% +0.39% +0.22% / +0.31% +0.44% +0.57%] index_select strided 3 : Elapsed 0.023 ms (2.295 ms / 100) 2.290 -> 2.298 ( +0.35%) [ +0.22% +0.31% +0.00% / +0.35% +0.66% +0.74%] index_select strided 5 : Elapsed 0.023 ms (2.295 ms / 100) 2.288 -> 2.291 ( +0.13%) [ +0.04% +0.17% +0.00% / +0.13% +0.48% +0.44%] index_select strided 7 : Elapsed 0.023 ms (2.289 ms / 100) 2.261 -> 2.257 ( -0.18%) [ +0.22% +0.00% +0.40% / -0.18% +0.44% +0.35%] index_select strided 8 : Elapsed 0.023 ms (2.266 ms / 100) 2.301 -> 2.298 ( -0.13%) [ +0.04% +0.00% +0.13% / -0.13% +0.70% +0.39%] index_select random : Elapsed 0.023 ms (2.302 ms / 100) 2.290 -> 2.289 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.52% +0.17%] index_select random_sorted : Elapsed 0.023 ms (2.290 ms / 100) 2.299 -> 2.304 ( +0.22%) [ +0.26% +0.30% +0.00% / +0.22% +0.43% +0.39%] index_select perm : Elapsed 0.023 ms (2.305 ms / 100) 2.304 -> 2.306 ( +0.09%) [ +0.00% +0.22% +0.43% / +0.09% +0.48% +0.30%] index_select perm_sorted : Elapsed 0.023 ms (2.304 ms / 100) B = [20, 4, 5, 40] (stride (5, 100, 1, 400)) A = [20, 4, 16, 40] (stride (1, 20, 80, 1280)) dim = 2 2.473 -> 2.474 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.16% +0.32% +0.04%] index_select const : Elapsed 0.025 ms (2.475 ms / 100) 2.467 -> 2.470 ( +0.12%) [ +0.04% +0.00% +0.08% / +0.12% +0.16% +0.24%] index_select wrap : Elapsed 0.025 ms (2.468 ms / 100) 2.465 -> 2.468 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.20% +0.20%] index_select linear : Elapsed 0.025 ms (2.465 ms / 100) 2.462 -> 2.464 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.28% +0.28%] index_select reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.469 -> 2.475 ( +0.24%) [ +0.00% +0.16% +0.04% / +0.24% +0.28% +0.36%] index_select skip64 : Elapsed 0.025 ms (2.469 ms / 100) 2.468 -> 2.472 ( +0.16%) [ +0.32% +0.45% +0.00% / +0.16% +0.53% +0.77%] index_select skip256 : Elapsed 0.025 ms (2.476 ms / 100) 2.467 -> 2.465 ( -0.08%) [ +0.00% +0.12% +0.12% / -0.08% +0.28% +0.12%] index_select spread : Elapsed 0.025 ms (2.467 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.00% +0.24% +0.04% / +0.00% +0.41% +0.28%] index_select strided 3 : Elapsed 0.025 ms (2.464 ms / 100) 2.465 -> 2.472 ( +0.28%) [ +0.12% +0.20% +0.00% / +0.32% +0.28% +0.49%] index_select strided 5 : Elapsed 0.025 ms (2.468 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.20% +0.41%] index_select strided 7 : Elapsed 0.025 ms (2.468 ms / 100) 2.475 -> 2.472 ( -0.12%) [ +0.12% +0.16% +0.00% / -0.12% +0.24% +0.24%] index_select strided 8 : Elapsed 0.025 ms (2.478 ms / 100) 2.460 -> 2.469 ( +0.37%) [ +0.28% +0.33% +0.00% / +0.41% +0.49% +0.37%] index_select random : Elapsed 0.025 ms (2.467 ms / 100) 2.460 -> 2.459 ( -0.04%) [ +0.33% +0.04% +0.00% / -0.04% +0.53% +0.33%] index_select random_sorted : Elapsed 0.025 ms (2.468 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.41% +0.45%] index_select perm : Elapsed 0.025 ms (2.460 ms / 100) 2.461 -> 2.463 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.33% +0.33%] index_select perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) out_shape = [20, 4, 16, 5] in_shape = [20, 4, 16, 40] idx_dim = 3 B = [20, 4, 16, 5] (stride (320, 80, 1, 16)) A = [20, 4, 16, 40] (stride (640, 12800, 40, 1)) dim = 3 1.493 -> 1.493 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.54% +0.54%] index_select const : Elapsed 0.015 ms (1.494 ms / 100) 1.492 -> 1.492 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.40% +0.60%] index_select wrap : Elapsed 0.015 ms (1.493 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.54% +0.60%] index_select linear : Elapsed 0.015 ms (1.494 ms / 100) 1.490 -> 1.493 ( +0.20%) [ +0.27% +0.34% +0.00% / +0.20% +0.74% +0.74%] index_select reverse : Elapsed 0.015 ms (1.494 ms / 100) 1.490 -> 1.493 ( +0.20%) [ +0.00% +0.20% +0.27% / +0.20% +0.67% +0.74%] index_select skip64 : Elapsed 0.015 ms (1.490 ms / 100) 1.492 -> 1.493 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.60% +0.60%] index_select skip256 : Elapsed 0.015 ms (1.494 ms / 100) 1.491 -> 1.491 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.47% +0.40%] index_select spread : Elapsed 0.015 ms (1.492 ms / 100) 1.490 -> 1.491 ( +0.07%) [ +0.13% +0.20% +0.00% / +0.07% +0.47% +0.54%] index_select strided 3 : Elapsed 0.015 ms (1.492 ms / 100) 1.482 -> 1.485 ( +0.20%) [ +0.27% +0.00% +0.13% / +0.40% +0.88% +0.20%] index_select strided 5 : Elapsed 0.015 ms (1.486 ms / 100) 1.489 -> 1.491 ( +0.13%) [ +0.07% +0.00% +0.13% / +0.13% +0.60% +0.54%] index_select strided 7 : Elapsed 0.015 ms (1.490 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.67% +0.54%] index_select strided 8 : Elapsed 0.015 ms (1.494 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.20% +0.14% +0.00% / +0.07% +0.54% +0.81%] index_select strided 16 : Elapsed 0.015 ms (1.484 ms / 100) 1.479 -> 1.483 ( +0.27%) [ +0.61% +0.27% +0.00% / +0.27% +0.95% +0.47%] index_select random : Elapsed 0.015 ms (1.488 ms / 100) 1.487 -> 1.489 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.67% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.488 ms / 100) 1.493 -> 1.494 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.67% +0.67%] index_select perm : Elapsed 0.015 ms (1.493 ms / 100) 1.491 -> 1.485 ( -0.40%) [ +0.00% +0.13% +0.07% / -0.40% +0.67% +0.54%] index_select perm_sorted : Elapsed 0.015 ms (1.491 ms / 100) B = [20, 4, 16, 5] (stride (320, 16, 1, 64)) A = [20, 4, 16, 40] (stride (640, 12800, 40, 1)) dim = 3 1.493 -> 1.493 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.60% +0.60%] index_select const : Elapsed 0.015 ms (1.493 ms / 100) 1.492 -> 1.493 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.67% +0.60%] index_select wrap : Elapsed 0.015 ms (1.493 ms / 100) 1.496 -> 1.497 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.67% +0.67%] index_select linear : Elapsed 0.015 ms (1.496 ms / 100) 1.496 -> 1.497 ( +0.07%) [ +0.00% +0.27% +0.00% / +0.07% +0.60% +0.60%] index_select reverse : Elapsed 0.015 ms (1.496 ms / 100) 1.492 -> 1.493 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.60% +0.67%] index_select skip64 : Elapsed 0.015 ms (1.493 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.60% +0.54%] index_select skip256 : Elapsed 0.015 ms (1.493 ms / 100) 1.480 -> 1.488 ( +0.54%) [ +0.61% +0.47% +0.00% / +0.54% +1.22% +1.35%] index_select spread : Elapsed 0.015 ms (1.489 ms / 100) 1.489 -> 1.489 ( +0.00%) [ +0.00% +0.20% +0.27% / +0.00% +0.81% +0.87%] index_select strided 3 : Elapsed 0.015 ms (1.489 ms / 100) 1.491 -> 1.491 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.60% +0.60%] index_select strided 5 : Elapsed 0.015 ms (1.492 ms / 100) 1.488 -> 1.488 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.60% +0.67%] index_select strided 7 : Elapsed 0.015 ms (1.488 ms / 100) 1.492 -> 1.493 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.67% +0.87%] index_select strided 8 : Elapsed 0.015 ms (1.494 ms / 100) 1.487 -> 1.487 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.67% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.488 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.67% +0.54%] index_select random : Elapsed 0.015 ms (1.494 ms / 100) 1.489 -> 1.490 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.67% +0.60%] index_select random_sorted : Elapsed 0.015 ms (1.489 ms / 100) 1.488 -> 1.491 ( +0.20%) [ +0.34% +0.07% +0.00% / +0.20% +0.94% +0.81%] index_select perm : Elapsed 0.015 ms (1.493 ms / 100) 1.493 -> 1.494 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.67% +0.54%] index_select perm_sorted : Elapsed 0.015 ms (1.493 ms / 100) B = [20, 4, 16, 5] (stride (5, 1600, 100, 1)) A = [20, 4, 16, 40] (stride (64, 16, 1, 1280)) dim = 3 1.182 -> 1.184 ( +0.17%) [ +0.42% +0.00% +0.51% / +0.17% +0.59% +0.51%] index_select const : Elapsed 0.012 ms (1.187 ms / 100) 1.182 -> 1.185 ( +0.25%) [ +0.51% +0.00% +0.00% / +0.25% +0.25% +0.34%] index_select wrap : Elapsed 0.012 ms (1.188 ms / 100) 1.183 -> 1.183 ( +0.00%) [ +0.08% +0.00% +0.51% / +0.00% +0.42% +0.51%] index_select linear : Elapsed 0.012 ms (1.184 ms / 100) 1.182 -> 1.186 ( +0.34%) [ +0.34% +0.00% +0.00% / +0.42% +0.51% +0.34%] index_select reverse : Elapsed 0.012 ms (1.186 ms / 100) 1.182 -> 1.184 ( +0.17%) [ +0.51% +0.00% +0.08% / +0.17% +0.42% +0.59%] index_select skip64 : Elapsed 0.012 ms (1.188 ms / 100) 1.181 -> 1.188 ( +0.59%) [ +0.25% +0.59% +0.00% / +0.59% +0.59% +0.59%] index_select skip256 : Elapsed 0.012 ms (1.184 ms / 100) 1.180 -> 1.187 ( +0.59%) [ +0.59% +0.00% +0.42% / +0.68% +0.68% +0.59%] index_select spread : Elapsed 0.012 ms (1.187 ms / 100) 1.184 -> 1.182 ( -0.17%) [ +0.00% +0.34% +0.25% / -0.17% +0.42% +0.42%] index_select strided 3 : Elapsed 0.012 ms (1.184 ms / 100) 1.182 -> 1.187 ( +0.42%) [ +0.34% +0.00% +0.34% / +0.42% +0.42% +0.51%] index_select strided 5 : Elapsed 0.012 ms (1.186 ms / 100) 1.180 -> 1.181 ( +0.08%) [ +0.76% +0.42% +0.00% / +0.08% +0.76% +0.42%] index_select strided 7 : Elapsed 0.012 ms (1.189 ms / 100) 1.184 -> 1.188 ( +0.34%) [ +0.00% +0.17% +0.00% / +0.34% +0.51% +0.42%] index_select strided 8 : Elapsed 0.012 ms (1.184 ms / 100) 1.179 -> 1.182 ( +0.25%) [ +0.25% +0.00% +0.59% / +0.25% +0.85% +0.51%] index_select strided 16 : Elapsed 0.012 ms (1.182 ms / 100) 1.179 -> 1.185 ( +0.51%) [ +0.76% +0.51% +0.00% / +0.51% +0.93% +0.59%] index_select random : Elapsed 0.012 ms (1.188 ms / 100) 1.182 -> 1.185 ( +0.25%) [ +0.08% +0.00% +0.42% / +0.25% +0.59% +0.68%] index_select random_sorted : Elapsed 0.012 ms (1.183 ms / 100) 1.182 -> 1.186 ( +0.34%) [ +0.59% +0.42% +0.00% / +0.34% +0.59% +0.51%] index_select perm : Elapsed 0.012 ms (1.189 ms / 100) 1.177 -> 1.182 ( +0.42%) [ +0.25% +0.34% +0.00% / +0.42% +0.85% +0.76%] index_select perm_sorted : Elapsed 0.012 ms (1.180 ms / 100) B = [20, 4, 16, 5] (stride (16, 1600, 1, 320)) A = [20, 4, 16, 40] (stride (2560, 1, 160, 4)) dim = 3 1.539 -> 1.540 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.78% +0.84%] index_select const : Elapsed 0.015 ms (1.539 ms / 100) 1.539 -> 1.540 ( +0.06%) [ +0.26% +0.00% +0.00% / +0.06% +1.75% +0.91%] index_select wrap : Elapsed 0.015 ms (1.543 ms / 100) 1.539 -> 1.540 ( +0.06%) [ +0.13% +0.00% +0.00% / +0.06% +1.56% +1.04%] index_select linear : Elapsed 0.015 ms (1.541 ms / 100) 1.540 -> 1.540 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +1.62% +0.97%] index_select reverse : Elapsed 0.015 ms (1.540 ms / 100) 1.538 -> 1.539 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +2.28% +1.04%] index_select skip64 : Elapsed 0.015 ms (1.540 ms / 100) 1.539 -> 1.540 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +1.95% +0.78%] index_select skip256 : Elapsed 0.015 ms (1.540 ms / 100) 1.530 -> 1.531 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.65% +0.65%] index_select spread : Elapsed 0.015 ms (1.530 ms / 100) 1.537 -> 1.537 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.78% +1.50%] index_select strided 3 : Elapsed 0.015 ms (1.539 ms / 100) 1.531 -> 1.533 ( +0.13%) [ +0.07% +0.00% +0.00% / +0.13% +0.72% +0.72%] index_select strided 5 : Elapsed 0.015 ms (1.532 ms / 100) 1.539 -> 1.538 ( -0.06%) [ +0.32% +0.13% +0.00% / -0.06% +1.04% +1.10%] index_select strided 7 : Elapsed 0.015 ms (1.544 ms / 100) 1.522 -> 1.523 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.72% +0.72%] index_select strided 8 : Elapsed 0.015 ms (1.523 ms / 100) 1.527 -> 1.528 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.72% +0.79%] index_select strided 16 : Elapsed 0.015 ms (1.528 ms / 100) 1.534 -> 1.536 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.72% +0.72%] index_select random : Elapsed 0.015 ms (1.534 ms / 100) 1.529 -> 1.529 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.65% +0.72%] index_select random_sorted : Elapsed 0.015 ms (1.529 ms / 100) 1.528 -> 1.528 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.72% +0.52%] index_select perm : Elapsed 0.015 ms (1.528 ms / 100) 1.533 -> 1.537 ( +0.26%) [ +0.26% +0.26% +0.00% / +0.26% +0.98% +0.91%] index_select perm_sorted : Elapsed 0.015 ms (1.537 ms / 100) B = [20, 4, 16, 5] (stride (1, 20, 400, 80)) A = [20, 4, 16, 40] (stride (64, 16, 1, 1280)) dim = 3 1.276 -> 1.279 ( +0.24%) [ +0.16% +0.16% +0.00% / +0.24% +0.47% +0.55%] index_select const : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.39%] index_select wrap : Elapsed 0.013 ms (1.278 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.78%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.63% +0.47%] index_select reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.277 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.71% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.279 ( +0.24%) [ +0.08% +0.00% +0.00% / +0.24% +0.55% +0.55%] index_select spread : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.31% +0.00% +0.00% / +0.00% +0.47% +0.70%] index_select strided 5 : Elapsed 0.013 ms (1.281 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.39%] index_select strided 7 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.71% +0.63%] index_select strided 16 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +1.02%] index_select random : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.71%] index_select random_sorted : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.63% +0.71%] index_select perm : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.78% +0.63%] index_select perm_sorted : Elapsed 0.013 ms (1.276 ms / 100) B = [20, 4, 16, 5] (stride (64, 16, 1, 1280)) A = [20, 4, 16, 40] (stride (640, 12800, 1, 16)) dim = 3 1.277 -> 1.277 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.78% +0.70%] index_select const : Elapsed 0.013 ms (1.279 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.78% +0.86%] index_select wrap : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.78% +0.78%] index_select linear : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.86% +0.86%] index_select reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.86% +0.78%] index_select skip64 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.86% +0.94%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.86% +0.78%] index_select spread : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.70% +0.78%] index_select strided 3 : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.24% +0.00% / +0.08% +1.02% +0.86%] index_select strided 5 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.70% +0.78%] index_select strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.31% +0.08% +0.00% / +0.08% +0.86% +0.86%] index_select strided 8 : Elapsed 0.013 ms (1.280 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.94% +0.78%] index_select strided 16 : Elapsed 0.013 ms (1.278 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.86% +0.78%] index_select random : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.86% +0.86%] index_select random_sorted : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.86% +0.86%] index_select perm : Elapsed 0.013 ms (1.277 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.78% +0.78%] index_select perm_sorted : Elapsed 0.013 ms (1.278 ms / 100) B = [20, 4, 16, 5] (stride (1, 320, 20, 1280)) A = [20, 4, 16, 40] (stride (4, 1, 80, 1280)) dim = 3 0.658 -> 0.659 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.30% +0.30% +0.15%] index_select const : Elapsed 0.007 ms (0.658 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.46% +0.15%] index_select wrap : Elapsed 0.007 ms (0.655 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.15% +0.00%] index_select linear : Elapsed 0.007 ms (0.655 ms / 100) 0.656 -> 0.657 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.30% +0.15% +0.46%] index_select reverse : Elapsed 0.007 ms (0.656 ms / 100) 0.654 -> 0.654 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.31% +0.31%] index_select skip64 : Elapsed 0.007 ms (0.654 ms / 100) 0.665 -> 0.666 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.75% +0.15% +0.15%] index_select skip256 : Elapsed 0.007 ms (0.665 ms / 100) 0.660 -> 0.660 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.15% +0.15%] index_select spread : Elapsed 0.007 ms (0.660 ms / 100) 0.659 -> 0.660 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.46% +0.15%] index_select strided 3 : Elapsed 0.007 ms (0.659 ms / 100) 0.658 -> 0.659 ( +0.15%) [ +0.00% +0.30% +0.15% / +0.46% +0.15% +0.15%] index_select strided 5 : Elapsed 0.007 ms (0.658 ms / 100) 0.659 -> 0.659 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.00% +0.46%] index_select strided 7 : Elapsed 0.007 ms (0.660 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.46% +0.31%] index_select strided 8 : Elapsed 0.007 ms (0.656 ms / 100) 0.661 -> 0.661 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.91% +1.06%] index_select strided 16 : Elapsed 0.007 ms (0.662 ms / 100) 0.660 -> 0.660 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +1.06% +0.91%] index_select random : Elapsed 0.007 ms (0.660 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +6.26% +0.15% +0.00% / +0.00% +0.46% +0.76%] index_select random_sorted : Elapsed 0.007 ms (0.696 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.76% +0.92%] index_select perm : Elapsed 0.007 ms (0.656 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +0.15% +0.31% +0.00% / +0.00% +1.07% +0.92%] index_select perm_sorted : Elapsed 0.007 ms (0.656 ms / 100) out_shape = [5, 4, 40, 16] in_shape = [20, 4, 40, 16] idx_dim = 0 B = [5, 4, 40, 16] (stride (2560, 640, 16, 1)) A = [20, 4, 40, 16] (stride (4, 1, 1280, 80)) dim = 0 1.558 -> 1.569 ( +0.71%) [ +0.64% +0.51% +0.00% / +0.71% +0.83% +1.09%] index_select const : Elapsed 0.016 ms (1.568 ms / 100) 1.568 -> 1.572 ( +0.26%) [ +0.38% +0.00% +0.32% / +0.26% +0.45% +0.70%] index_select wrap : Elapsed 0.016 ms (1.574 ms / 100) 1.568 -> 1.573 ( +0.32%) [ +0.57% +0.00% +0.26% / +0.38% +0.32% +0.32%] index_select linear : Elapsed 0.016 ms (1.577 ms / 100) 1.572 -> 1.570 ( -0.13%) [ +0.00% +0.13% +0.06% / -0.13% +0.51% +0.00%] index_select reverse : Elapsed 0.016 ms (1.572 ms / 100) 1.560 -> 1.569 ( +0.58%) [ +0.38% +0.32% +0.00% / +0.71% +1.03% +0.58%] index_select skip64 : Elapsed 0.016 ms (1.566 ms / 100) 1.561 -> 1.565 ( +0.26%) [ +0.19% +0.00% +0.06% / +0.26% +0.51% +0.58%] index_select skip256 : Elapsed 0.016 ms (1.564 ms / 100) 1.575 -> 1.578 ( +0.19%) [ +0.00% +0.32% +0.25% / +0.32% +0.19% +0.19%] index_select spread : Elapsed 0.016 ms (1.575 ms / 100) 1.579 -> 1.577 ( -0.13%) [ +0.06% +0.06% +0.00% / -0.13% +0.44% +0.38%] index_select strided 3 : Elapsed 0.016 ms (1.580 ms / 100) 1.566 -> 1.568 ( +0.13%) [ +0.38% +0.00% +0.70% / +0.45% +0.38% +0.13%] index_select strided 5 : Elapsed 0.016 ms (1.572 ms / 100) 1.561 -> 1.562 ( +0.06%) [ +0.13% +0.19% +0.00% / +0.06% +0.38% +0.19%] index_select strided 7 : Elapsed 0.016 ms (1.563 ms / 100) 1.571 -> 1.570 ( -0.06%) [ +0.32% +0.25% +0.00% / -0.06% +0.45% +0.45%] index_select strided 8 : Elapsed 0.016 ms (1.576 ms / 100) 1.576 -> 1.580 ( +0.25%) [ +0.13% +0.00% +0.06% / +0.25% +0.44% +0.32%] index_select strided 16 : Elapsed 0.016 ms (1.578 ms / 100) 1.576 -> 1.575 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.19% +0.06%] index_select random : Elapsed 0.016 ms (1.576 ms / 100) 1.576 -> 1.578 ( +0.13%) [ +0.06% +0.00% +0.06% / +0.13% +0.25% +0.38%] index_select random_sorted : Elapsed 0.016 ms (1.577 ms / 100) 1.576 -> 1.577 ( +0.06%) [ +0.00% +0.19% +0.19% / +0.06% +0.13% +0.51%] index_select perm : Elapsed 0.016 ms (1.576 ms / 100) 1.585 -> 1.590 ( +0.32%) [ +0.00% +0.00% +0.00% / +0.32% +0.38% +0.38%] index_select perm_sorted : Elapsed 0.016 ms (1.585 ms / 100) B = [5, 4, 40, 16] (stride (2560, 1, 64, 4)) A = [20, 4, 40, 16] (stride (4, 1, 1280, 80)) dim = 0 0.686 -> 0.687 ( +0.15%) [ +0.29% +0.15% +0.00% / +0.15% +0.58% +0.58%] index_select const : Elapsed 0.007 ms (0.688 ms / 100) 0.687 -> 0.687 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +1.31% +1.16%] index_select wrap : Elapsed 0.007 ms (0.687 ms / 100) 0.687 -> 0.688 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +1.16% +1.31%] index_select linear : Elapsed 0.007 ms (0.687 ms / 100) 0.687 -> 0.686 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.73% +1.02%] index_select reverse : Elapsed 0.007 ms (0.687 ms / 100) 0.687 -> 0.687 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.73% +1.31%] index_select skip64 : Elapsed 0.007 ms (0.687 ms / 100) 0.692 -> 0.687 ( -0.72%) [ +0.14% +0.43% +0.00% / +0.14% -0.58% -0.72%] index_select skip256 : Elapsed 0.007 ms (0.693 ms / 100) 0.692 -> 0.688 ( -0.58%) [ +0.14% +0.43% +0.00% / +0.00% -0.43% -0.58%] index_select spread : Elapsed 0.007 ms (0.693 ms / 100) 0.690 -> 0.690 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.00% +0.00%] index_select strided 3 : Elapsed 0.007 ms (0.691 ms / 100) 0.687 -> 0.686 ( -0.15%) [ +0.00% +0.44% +0.15% / +0.44% -0.15% -0.15%] index_select strided 5 : Elapsed 0.007 ms (0.687 ms / 100) 0.684 -> 0.684 ( +0.00%) [ +0.15% +0.29% +0.00% / +0.00% +0.00% +0.15%] index_select strided 7 : Elapsed 0.007 ms (0.685 ms / 100) 0.680 -> 0.679 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% -0.15% -0.15%] index_select strided 8 : Elapsed 0.007 ms (0.680 ms / 100) 0.689 -> 0.687 ( -0.29%) [ +0.00% +0.15% +0.00% / +0.15% -0.29% -0.29%] index_select strided 16 : Elapsed 0.007 ms (0.689 ms / 100) 0.688 -> 0.687 ( -0.15%) [ +0.44% +0.15% +0.00% / -0.15% +0.00% -0.15%] index_select random : Elapsed 0.007 ms (0.691 ms / 100) 0.684 -> 0.683 ( -0.15%) [ +0.00% +0.15% +0.00% / -0.15% +0.88% +0.58%] index_select random_sorted : Elapsed 0.007 ms (0.684 ms / 100) 0.678 -> 0.679 ( +0.15%) [ +0.15% +0.29% +0.00% / +0.15% +1.33% +1.33%] index_select perm : Elapsed 0.007 ms (0.679 ms / 100) 0.677 -> 0.679 ( +0.30%) [ +0.15% +0.59% +0.00% / +0.30% +1.62% +1.48%] index_select perm_sorted : Elapsed 0.007 ms (0.678 ms / 100) B = [5, 4, 40, 16] (stride (16, 3200, 80, 1)) A = [20, 4, 40, 16] (stride (1, 12800, 20, 800)) dim = 0 1.786 -> 1.793 ( +0.39%) [ +0.39% +0.28% +0.00% / +0.39% +0.62% +0.73%] index_select const : Elapsed 0.018 ms (1.793 ms / 100) 1.799 -> 1.800 ( +0.06%) [ +0.17% +0.11% +0.00% / +0.06% +0.50% +0.22%] index_select wrap : Elapsed 0.018 ms (1.802 ms / 100) 1.800 -> 1.802 ( +0.11%) [ +0.22% +0.00% +0.06% / +0.11% +0.44% +0.28%] index_select linear : Elapsed 0.018 ms (1.804 ms / 100) 1.800 -> 1.804 ( +0.22%) [ +0.17% +0.17% +0.00% / +0.22% +0.61% +0.72%] index_select reverse : Elapsed 0.018 ms (1.803 ms / 100) 1.786 -> 1.788 ( +0.11%) [ +0.11% +0.00% +0.06% / +0.11% +0.45% +0.50%] index_select skip64 : Elapsed 0.018 ms (1.788 ms / 100) 1.790 -> 1.790 ( +0.00%) [ +0.17% +0.06% +0.00% / +0.00% +0.22% +0.34%] index_select skip256 : Elapsed 0.018 ms (1.793 ms / 100) 1.820 -> 1.826 ( +0.33%) [ +0.22% +0.22% +0.00% / +0.33% +0.88% +0.93%] index_select spread : Elapsed 0.018 ms (1.824 ms / 100) 1.815 -> 1.816 ( +0.06%) [ +0.00% +0.22% +0.06% / +0.06% +0.55% +0.66%] index_select strided 3 : Elapsed 0.018 ms (1.815 ms / 100) 1.824 -> 1.827 ( +0.16%) [ +0.05% +0.00% +0.27% / +0.16% +0.44% +0.27%] index_select strided 5 : Elapsed 0.018 ms (1.825 ms / 100) 1.812 -> 1.811 ( -0.06%) [ +0.11% +0.28% +0.00% / -0.06% +0.39% +0.83%] index_select strided 7 : Elapsed 0.018 ms (1.814 ms / 100) 1.809 -> 1.810 ( +0.06%) [ +0.33% +0.00% +0.17% / +0.06% +0.50% +0.44%] index_select strided 8 : Elapsed 0.018 ms (1.815 ms / 100) 1.814 -> 1.817 ( +0.17%) [ +0.00% +0.00% +0.17% / +0.17% +0.66% +0.55%] index_select strided 16 : Elapsed 0.018 ms (1.814 ms / 100) 1.811 -> 1.809 ( -0.11%) [ +0.28% +0.00% +0.11% / -0.11% +0.44% +0.61%] index_select random : Elapsed 0.018 ms (1.816 ms / 100) 1.819 -> 1.823 ( +0.22%) [ +0.11% +0.00% +0.11% / +0.22% +0.49% +0.55%] index_select random_sorted : Elapsed 0.018 ms (1.821 ms / 100) 1.800 -> 1.800 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.44% +0.56%] index_select perm : Elapsed 0.018 ms (1.801 ms / 100) 1.814 -> 1.813 ( -0.06%) [ +0.17% +0.00% +0.00% / -0.06% +0.44% +0.66%] index_select perm_sorted : Elapsed 0.018 ms (1.817 ms / 100) B = [5, 4, 40, 16] (stride (64, 1, 320, 4)) A = [20, 4, 40, 16] (stride (1, 20, 80, 3200)) dim = 0 1.916 -> 1.917 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.37% +0.05% +0.10%] index_select const : Elapsed 0.019 ms (1.916 ms / 100) 1.919 -> 1.921 ( +0.10%) [ +0.16% +0.10% +0.00% / +0.10% +0.52% +0.63%] index_select wrap : Elapsed 0.019 ms (1.922 ms / 100) 1.923 -> 1.920 ( -0.16%) [ +0.10% +0.05% +0.00% / -0.16% +0.52% +0.62%] index_select linear : Elapsed 0.019 ms (1.925 ms / 100) 1.920 -> 1.926 ( +0.31%) [ +0.05% +0.10% +0.00% / +0.36% +0.31% +0.31%] index_select reverse : Elapsed 0.019 ms (1.921 ms / 100) 1.917 -> 1.916 ( -0.05%) [ +0.05% +0.00% +0.10% / -0.05% +0.52% +0.31%] index_select skip64 : Elapsed 0.019 ms (1.918 ms / 100) 1.916 -> 1.915 ( -0.05%) [ +0.05% +0.00% +0.10% / -0.05% +0.52% +0.37%] index_select skip256 : Elapsed 0.019 ms (1.917 ms / 100) 1.934 -> 1.939 ( +0.26%) [ +0.05% +0.00% +0.16% / +0.26% +0.47% +0.41%] index_select spread : Elapsed 0.019 ms (1.935 ms / 100) 1.935 -> 1.935 ( +0.00%) [ +0.26% +0.00% +0.10% / +0.00% +0.57% +0.47%] index_select strided 3 : Elapsed 0.019 ms (1.940 ms / 100) 1.940 -> 1.936 ( -0.21%) [ +0.10% +0.00% +0.10% / -0.21% +0.36% +0.36%] index_select strided 5 : Elapsed 0.019 ms (1.942 ms / 100) 1.938 -> 1.942 ( +0.21%) [ +0.21% +0.26% +0.00% / +0.26% +0.21% +0.26%] index_select strided 7 : Elapsed 0.019 ms (1.942 ms / 100) 1.938 -> 1.940 ( +0.10%) [ +0.00% +0.31% +0.10% / +0.15% +0.10% +0.26%] index_select strided 8 : Elapsed 0.019 ms (1.938 ms / 100) 1.938 -> 1.935 ( -0.15%) [ +0.15% +0.10% +0.00% / -0.15% +0.31% +0.36%] index_select strided 16 : Elapsed 0.019 ms (1.941 ms / 100) 1.937 -> 1.938 ( +0.05%) [ +0.10% +0.00% +0.10% / +0.05% +0.10% +0.26%] index_select random : Elapsed 0.019 ms (1.939 ms / 100) 1.938 -> 1.943 ( +0.26%) [ +0.31% +0.21% +0.00% / +0.52% +0.41% +0.26%] index_select random_sorted : Elapsed 0.019 ms (1.944 ms / 100) 1.943 -> 1.947 ( +0.21%) [ +0.00% +0.10% +0.26% / +0.21% +0.31% +0.31%] index_select perm : Elapsed 0.019 ms (1.943 ms / 100) 1.938 -> 1.937 ( -0.05%) [ +0.10% +0.15% +0.00% / -0.05% +0.10% +0.00%] index_select perm_sorted : Elapsed 0.019 ms (1.940 ms / 100) B = [5, 4, 40, 16] (stride (1, 80, 320, 5)) A = [20, 4, 40, 16] (stride (1, 800, 20, 3200)) dim = 0 1.800 -> 1.800 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.61% +0.50%] index_select const : Elapsed 0.018 ms (1.801 ms / 100) 1.810 -> 1.815 ( +0.28%) [ +0.00% +0.06% +0.28% / +0.28% +0.50% +0.39%] index_select wrap : Elapsed 0.018 ms (1.810 ms / 100) 1.813 -> 1.814 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.55% +0.55%] index_select linear : Elapsed 0.018 ms (1.813 ms / 100) 1.812 -> 1.818 ( +0.33%) [ +0.00% +0.11% +0.28% / +0.33% +0.55% +0.55%] index_select reverse : Elapsed 0.018 ms (1.812 ms / 100) 1.800 -> 1.801 ( +0.06%) [ +0.06% +0.00% +0.17% / +0.06% +0.44% +0.39%] index_select skip64 : Elapsed 0.018 ms (1.801 ms / 100) 1.799 -> 1.801 ( +0.11%) [ +0.00% +0.06% +0.06% / +0.11% +0.67% +0.56%] index_select skip256 : Elapsed 0.018 ms (1.799 ms / 100) 1.826 -> 1.827 ( +0.05%) [ +0.22% +0.11% +0.00% / +0.05% +0.77% +0.82%] index_select spread : Elapsed 0.018 ms (1.830 ms / 100) 1.823 -> 1.826 ( +0.16%) [ +0.33% +0.22% +0.00% / +0.16% +0.77% +0.93%] index_select strided 3 : Elapsed 0.018 ms (1.829 ms / 100) 1.830 -> 1.834 ( +0.22%) [ +0.05% +0.00% +0.05% / +0.22% +0.71% +0.77%] index_select strided 5 : Elapsed 0.018 ms (1.831 ms / 100) 1.819 -> 1.819 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +0.55% +0.82%] index_select strided 7 : Elapsed 0.018 ms (1.819 ms / 100) 1.818 -> 1.818 ( +0.00%) [ +0.28% +0.06% +0.00% / +0.00% +0.50% +0.50%] index_select strided 8 : Elapsed 0.018 ms (1.823 ms / 100) 1.825 -> 1.825 ( +0.00%) [ +0.16% +0.05% +0.00% / +0.00% +0.77% +0.55%] index_select strided 16 : Elapsed 0.018 ms (1.828 ms / 100) 1.832 -> 1.837 ( +0.27%) [ +0.00% +0.22% +0.00% / +0.27% +0.71% +0.71%] index_select random : Elapsed 0.018 ms (1.832 ms / 100) 1.825 -> 1.833 ( +0.44%) [ +0.55% +0.27% +0.00% / +0.44% +1.10% +0.93%] index_select random_sorted : Elapsed 0.018 ms (1.835 ms / 100) 1.816 -> 1.818 ( +0.11%) [ +0.06% +0.00% +0.22% / +0.11% +0.77% +0.66%] index_select perm : Elapsed 0.018 ms (1.817 ms / 100) 1.833 -> 1.832 ( -0.05%) [ +0.00% +0.16% +0.11% / -0.05% +0.82% +0.65%] index_select perm_sorted : Elapsed 0.018 ms (1.833 ms / 100) B = [5, 4, 40, 16] (stride (1, 5, 320, 20)) A = [20, 4, 40, 16] (stride (2560, 1, 4, 160)) dim = 0 1.724 -> 1.722 ( -0.12%) [ +0.23% +0.17% +0.00% / -0.12% +0.58% +0.17%] index_select const : Elapsed 0.017 ms (1.728 ms / 100) 1.729 -> 1.727 ( -0.12%) [ +0.06% +0.29% +0.00% / -0.12% +0.29% +0.29%] index_select wrap : Elapsed 0.017 ms (1.730 ms / 100) 1.716 -> 1.716 ( +0.00%) [ +0.70% +0.00% +0.12% / +0.00% +0.52% +0.35%] index_select linear : Elapsed 0.017 ms (1.728 ms / 100) 1.721 -> 1.717 ( -0.23%) [ +0.23% +0.06% +0.00% / -0.23% +0.29% +0.46%] index_select reverse : Elapsed 0.017 ms (1.725 ms / 100) 1.719 -> 1.723 ( +0.23%) [ +0.17% +0.17% +0.00% / +0.23% +0.81% +0.81%] index_select skip64 : Elapsed 0.017 ms (1.722 ms / 100) 1.726 -> 1.727 ( +0.06%) [ +0.17% +0.06% +0.00% / +0.06% +0.35% +0.35%] index_select skip256 : Elapsed 0.017 ms (1.729 ms / 100) 1.731 -> 1.733 ( +0.12%) [ +0.35% +0.12% +0.00% / +0.12% +0.35% +0.52%] index_select spread : Elapsed 0.017 ms (1.737 ms / 100) 1.734 -> 1.738 ( +0.23%) [ +0.29% +0.00% +0.12% / +0.23% +0.29% +0.40%] index_select strided 3 : Elapsed 0.017 ms (1.739 ms / 100) 1.718 -> 1.721 ( +0.17%) [ +0.00% +0.00% +0.06% / +0.17% +0.35% +0.47%] index_select strided 5 : Elapsed 0.017 ms (1.718 ms / 100) 1.722 -> 1.725 ( +0.17%) [ +0.23% +0.00% +0.00% / +0.17% +0.41% +0.70%] index_select strided 7 : Elapsed 0.017 ms (1.726 ms / 100) 1.723 -> 1.725 ( +0.12%) [ +0.00% +0.00% +0.12% / +0.12% +0.29% +0.46%] index_select strided 8 : Elapsed 0.017 ms (1.723 ms / 100) 1.724 -> 1.727 ( +0.17%) [ +0.17% +0.00% +0.12% / +0.17% +0.52% +0.29%] index_select strided 16 : Elapsed 0.017 ms (1.727 ms / 100) 1.723 -> 1.723 ( +0.00%) [ +0.00% +0.12% +0.23% / +0.06% +0.00% +0.00%] index_select random : Elapsed 0.017 ms (1.723 ms / 100) 1.722 -> 1.723 ( +0.06%) [ +0.00% +0.17% +0.00% / +0.29% +0.23% +0.06%] index_select random_sorted : Elapsed 0.017 ms (1.722 ms / 100) 1.726 -> 1.728 ( +0.12%) [ +0.00% +0.23% +0.12% / +0.12% +0.17% +0.23%] index_select perm : Elapsed 0.017 ms (1.726 ms / 100) 1.723 -> 1.725 ( +0.12%) [ +0.12% +0.00% +0.35% / +0.12% +0.41% +0.35%] index_select perm_sorted : Elapsed 0.017 ms (1.725 ms / 100) out_shape = [20, 5, 40, 16] in_shape = [20, 4, 40, 16] idx_dim = 1 B = [20, 5, 40, 16] (stride (3200, 40, 1, 200)) A = [20, 4, 40, 16] (stride (160, 40, 1, 3200)) dim = 1 5.973 -> 5.962 ( -0.18%) [ +0.17% +0.12% +0.00% / +0.25% -0.18% -0.15%] index_add_ linear : Elapsed 0.060 ms (5.983 ms / 100) 5.875 -> 5.869 ( -0.10%) [ +0.00% +0.03% +0.05% / +0.12% -0.10% -0.09%] index_copy_ linear : Elapsed 0.059 ms (5.875 ms / 100) 5.970 -> 5.957 ( -0.22%) [ +0.00% +0.10% +0.08% / +0.12% -0.22% -0.07%] index_add_ reverse : Elapsed 0.060 ms (5.970 ms / 100) 5.874 -> 5.861 ( -0.22%) [ +0.00% +0.05% +0.17% / +0.14% -0.22% -0.14%] index_copy_ reverse : Elapsed 0.059 ms (5.874 ms / 100) 5.972 -> 5.963 ( -0.15%) [ +0.00% +0.00% +0.05% / +0.03% -0.13% -0.15%] index_add_ spread : Elapsed 0.060 ms (5.972 ms / 100) 5.872 -> 5.857 ( -0.26%) [ +0.00% +0.17% +0.20% / +0.09% -0.26% -0.20%] index_copy_ spread : Elapsed 0.059 ms (5.872 ms / 100) 5.980 -> 5.968 ( -0.20%) [ +0.00% +0.08% +0.00% / +0.02% -0.18% -0.20%] index_add_ strided 3 : Elapsed 0.060 ms (5.980 ms / 100) 5.884 -> 5.871 ( -0.22%) [ +0.00% +0.07% +0.14% / +0.07% -0.19% -0.22%] index_copy_ strided 3 : Elapsed 0.059 ms (5.884 ms / 100) 5.965 -> 5.965 ( +0.00%) [ +0.08% +0.00% +0.10% / +0.07% +0.10% +0.00%] index_add_ perm : Elapsed 0.060 ms (5.970 ms / 100) 5.866 -> 5.863 ( -0.05%) [ +0.14% +0.00% +0.07% / +0.09% +0.09% -0.05%] index_copy_ perm : Elapsed 0.059 ms (5.874 ms / 100) 5.967 -> 5.964 ( -0.05%) [ +0.15% +0.00% +0.08% / +0.00% -0.05% +0.03%] index_add_ perm_sorted : Elapsed 0.060 ms (5.976 ms / 100) 5.866 -> 5.862 ( -0.07%) [ +0.00% +0.10% +0.12% / +0.14% -0.07% -0.05%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.866 ms / 100) 6.170 -> 6.156 ( -0.23%) [ +0.13% +0.00% +0.18% / -0.03% -0.23% -0.18%] index_select const : Elapsed 0.062 ms (6.178 ms / 100) 6.272 -> 6.260 ( -0.19%) [ +0.00% +0.18% +0.13% / +0.11% -0.18% -0.19%] index_select wrap : Elapsed 0.063 ms (6.272 ms / 100) 6.260 -> 6.238 ( -0.35%) [ +0.00% +0.06% +0.06% / +0.00% -0.19% -0.35%] index_select linear : Elapsed 0.063 ms (6.260 ms / 100) 6.256 -> 6.238 ( -0.29%) [ +0.00% +0.00% +0.05% / +0.21% -0.19% -0.29%] index_select reverse : Elapsed 0.063 ms (6.256 ms / 100) 6.170 -> 6.155 ( -0.24%) [ +0.00% +0.11% +0.00% / +0.05% -0.16% -0.24%] index_select skip64 : Elapsed 0.062 ms (6.170 ms / 100) 6.166 -> 6.150 ( -0.26%) [ +0.00% +0.19% +0.21% / +0.15% -0.18% -0.26%] index_select skip256 : Elapsed 0.062 ms (6.166 ms / 100) 6.248 -> 6.231 ( -0.27%) [ +0.00% +0.06% +0.10% / +0.16% -0.18% -0.27%] index_select spread : Elapsed 0.062 ms (6.248 ms / 100) 6.270 -> 6.246 ( -0.38%) [ +0.08% +0.00% +0.02% / +0.19% -0.38% -0.27%] index_select strided 3 : Elapsed 0.063 ms (6.275 ms / 100) 6.264 -> 6.249 ( -0.24%) [ +0.11% +0.00% +0.10% / +0.06% -0.24% -0.19%] index_select random : Elapsed 0.063 ms (6.271 ms / 100) 6.252 -> 6.220 ( -0.51%) [ +0.03% +0.02% +0.00% / +0.14% -0.51% -0.22%] index_select random_sorted : Elapsed 0.063 ms (6.254 ms / 100) B = [20, 5, 40, 16] (stride (640, 12800, 1, 40)) A = [20, 4, 40, 16] (stride (1, 12800, 320, 20)) dim = 1 5.539 -> 5.546 ( +0.13%) [ +0.22% +0.00% +0.07% / +0.23% +0.13% +0.14%] index_add_ linear : Elapsed 0.056 ms (5.551 ms / 100) 5.514 -> 5.514 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.13% +0.00%] index_copy_ linear : Elapsed 0.055 ms (5.514 ms / 100) 5.528 -> 5.529 ( +0.02%) [ +0.16% +0.00% +0.00% / +0.02% +0.09% +0.11%] index_add_ reverse : Elapsed 0.055 ms (5.537 ms / 100) 5.496 -> 5.506 ( +0.18%) [ +0.00% +0.05% +0.15% / +0.18% +0.22% +0.35%] index_copy_ reverse : Elapsed 0.055 ms (5.496 ms / 100) 5.540 -> 5.541 ( +0.02%) [ +0.14% +0.00% +0.05% / +0.29% +0.05% +0.02%] index_add_ spread : Elapsed 0.055 ms (5.548 ms / 100) 5.515 -> 5.512 ( -0.05%) [ +0.00% +0.13% +0.04% / +0.05% -0.05% +0.05%] index_copy_ spread : Elapsed 0.055 ms (5.515 ms / 100) 5.520 -> 5.521 ( +0.02%) [ +0.13% +0.00% +0.16% / +0.02% +0.56% +0.60%] index_add_ strided 3 : Elapsed 0.055 ms (5.527 ms / 100) 5.494 -> 5.497 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +0.60% +0.66%] index_copy_ strided 3 : Elapsed 0.055 ms (5.497 ms / 100) 5.525 -> 5.529 ( +0.07%) [ +0.00% +0.00% +0.09% / +0.07% +0.27% +0.18%] index_add_ perm : Elapsed 0.055 ms (5.525 ms / 100) 5.496 -> 5.507 ( +0.20%) [ +0.00% +0.13% +0.29% / +0.20% +0.35% +0.33%] index_copy_ perm : Elapsed 0.055 ms (5.496 ms / 100) 5.532 -> 5.541 ( +0.16%) [ +0.00% +0.09% +0.25% / +0.16% +0.43% +0.31%] index_add_ perm_sorted : Elapsed 0.055 ms (5.532 ms / 100) 5.502 -> 5.518 ( +0.29%) [ +0.02% +0.00% +0.00% / +0.29% +0.31% +0.42%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.503 ms / 100) 5.811 -> 5.815 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.09% +0.07% +0.12%] index_select const : Elapsed 0.058 ms (5.811 ms / 100) 5.840 -> 5.843 ( +0.05%) [ +0.02% +0.00% +0.03% / +0.05% +0.27% +0.21%] index_select wrap : Elapsed 0.058 ms (5.841 ms / 100) 5.829 -> 5.837 ( +0.14%) [ +0.09% +0.00% +0.12% / +0.14% +0.53% +0.45%] index_select linear : Elapsed 0.058 ms (5.834 ms / 100) 5.812 -> 5.821 ( +0.15%) [ +0.09% +0.00% +0.09% / +0.15% +0.48% +0.41%] index_select reverse : Elapsed 0.058 ms (5.817 ms / 100) 5.809 -> 5.810 ( +0.02%) [ +0.00% +0.09% +0.05% / +0.14% +0.02% +0.10%] index_select skip64 : Elapsed 0.058 ms (5.809 ms / 100) 5.804 -> 5.811 ( +0.12%) [ +0.07% +0.02% +0.00% / +0.12% +0.14% +0.24%] index_select skip256 : Elapsed 0.058 ms (5.808 ms / 100) 5.830 -> 5.835 ( +0.09%) [ +0.00% +0.02% +0.10% / +0.09% +0.46% +0.22%] index_select spread : Elapsed 0.058 ms (5.830 ms / 100) 5.821 -> 5.827 ( +0.10%) [ +0.00% +0.00% +0.02% / +0.10% +0.26% +0.27%] index_select strided 3 : Elapsed 0.058 ms (5.821 ms / 100) 5.807 -> 5.813 ( +0.10%) [ +0.00% +0.00% +0.09% / +0.10% +1.07% +1.02%] index_select random : Elapsed 0.058 ms (5.807 ms / 100) 5.787 -> 5.800 ( +0.22%) [ +0.00% +0.09% +0.14% / +0.22% +0.97% +0.81%] index_select random_sorted : Elapsed 0.058 ms (5.787 ms / 100) B = [20, 5, 40, 16] (stride (1, 12800, 320, 20)) dim = 1 fill_cnt = 4 1.782 -> 1.787 ( +0.28%) [ +0.00% +0.06% +0.00% / +0.34% +0.34% +0.28%] index_fill_ const : Elapsed 0.018 ms (1.782 ms / 100) 1.773 -> 1.776 ( +0.17%) [ +0.17% +0.00% +0.28% / +0.17% +0.34% +0.28%] index_fill_ linear : Elapsed 0.018 ms (1.776 ms / 100) 1.764 -> 1.765 ( +0.06%) [ +0.00% +0.17% +0.28% / +0.06% +0.57% +0.79%] index_fill_ reverse : Elapsed 0.018 ms (1.764 ms / 100) 1.752 -> 1.751 ( -0.06%) [ +0.23% +0.00% +0.06% / -0.06% +0.11% +0.29%] index_fill_ skip64 : Elapsed 0.018 ms (1.756 ms / 100) 1.753 -> 1.754 ( +0.06%) [ +0.17% +0.11% +0.00% / +0.06% +0.40% +0.11%] index_fill_ skip256 : Elapsed 0.018 ms (1.756 ms / 100) 1.785 -> 1.783 ( -0.11%) [ +0.00% +0.17% +0.06% / +0.22% +0.22% -0.11%] index_fill_ spread : Elapsed 0.018 ms (1.785 ms / 100) 1.774 -> 1.775 ( +0.06%) [ +0.11% +0.11% +0.00% / +0.06% +0.51% +0.23%] index_fill_ strided 3 : Elapsed 0.018 ms (1.776 ms / 100) 1.763 -> 1.769 ( +0.34%) [ +0.00% +0.28% +0.11% / +0.34% +0.51% +0.79%] index_fill_ random : Elapsed 0.018 ms (1.763 ms / 100) 1.762 -> 1.765 ( +0.17%) [ +0.06% +0.00% +0.17% / +0.17% +0.45% +0.40%] index_fill_ random_sorted : Elapsed 0.018 ms (1.763 ms / 100) 1.770 -> 1.771 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.17% +0.28%] index_fill_ perm : Elapsed 0.018 ms (1.770 ms / 100) 1.766 -> 1.769 ( +0.17%) [ +0.40% +0.00% +0.40% / +0.45% +0.34% +0.17%] index_fill_ perm_sorted : Elapsed 0.018 ms (1.773 ms / 100) B = [20, 5, 40, 16] (stride (1, 320, 1600, 20)) A = [20, 4, 40, 16] (stride (16, 320, 1280, 1)) dim = 1 5.851 -> 5.855 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.36% +0.07% +0.10%] index_add_ linear : Elapsed 0.059 ms (5.851 ms / 100) 5.806 -> 5.802 ( -0.07%) [ +0.14% +0.00% +0.12% / +0.10% -0.07% +0.02%] index_copy_ linear : Elapsed 0.058 ms (5.814 ms / 100) 5.853 -> 5.858 ( +0.09%) [ +0.09% +0.07% +0.00% / +0.12% +0.09% +0.09%] index_add_ reverse : Elapsed 0.059 ms (5.858 ms / 100) 5.805 -> 5.800 ( -0.09%) [ +0.09% +0.00% +0.02% / -0.09% -0.02% +0.05%] index_copy_ reverse : Elapsed 0.058 ms (5.810 ms / 100) 5.842 -> 5.843 ( +0.02%) [ +0.19% +0.00% +0.19% / +0.02% +0.19% +0.14%] index_add_ spread : Elapsed 0.059 ms (5.853 ms / 100) 5.794 -> 5.789 ( -0.09%) [ +0.22% +0.00% +0.00% / +0.21% -0.09% -0.09%] index_copy_ spread : Elapsed 0.058 ms (5.807 ms / 100) 5.847 -> 5.840 ( -0.12%) [ +0.03% +0.00% +0.12% / +0.12% -0.12% -0.12%] index_add_ strided 3 : Elapsed 0.058 ms (5.849 ms / 100) 5.793 -> 5.777 ( -0.28%) [ +0.12% +0.00% +0.05% / +0.26% -0.14% -0.28%] index_copy_ strided 3 : Elapsed 0.058 ms (5.800 ms / 100) 5.846 -> 5.838 ( -0.14%) [ +0.03% +0.00% +0.10% / +0.15% -0.14% -0.09%] index_add_ perm : Elapsed 0.058 ms (5.848 ms / 100) 5.791 -> 5.784 ( -0.12%) [ +0.00% +0.10% +0.05% / +0.12% -0.12% -0.03%] index_copy_ perm : Elapsed 0.058 ms (5.791 ms / 100) 5.839 -> 5.842 ( +0.05%) [ +0.12% +0.00% +0.05% / +0.05% +0.31% +0.22%] index_add_ perm_sorted : Elapsed 0.058 ms (5.846 ms / 100) 5.783 -> 5.795 ( +0.21%) [ +0.00% +0.17% +0.29% / +0.26% +0.21% +0.45%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.783 ms / 100) 6.047 -> 6.051 ( +0.07%) [ +0.08% +0.00% +0.07% / +0.07% +0.51% +0.50%] index_select const : Elapsed 0.061 ms (6.052 ms / 100) 6.163 -> 6.180 ( +0.28%) [ +0.16% +0.08% +0.00% / +0.28% +0.39% +0.39%] index_select wrap : Elapsed 0.062 ms (6.173 ms / 100) 6.146 -> 6.150 ( +0.07%) [ +0.02% +0.05% +0.00% / +0.11% +0.21% +0.07%] index_select linear : Elapsed 0.061 ms (6.147 ms / 100) 6.152 -> 6.168 ( +0.26%) [ +0.11% +0.02% +0.00% / +0.28% +0.29% +0.26%] index_select reverse : Elapsed 0.062 ms (6.159 ms / 100) 6.035 -> 6.042 ( +0.12%) [ +0.00% +0.02% +0.12% / +0.12% +0.43% +0.36%] index_select skip64 : Elapsed 0.060 ms (6.035 ms / 100) 6.048 -> 6.063 ( +0.25%) [ +0.07% +0.00% +0.12% / +0.25% +0.50% +0.53%] index_select skip256 : Elapsed 0.061 ms (6.052 ms / 100) 6.159 -> 6.153 ( -0.10%) [ +0.00% +0.08% +0.03% / -0.10% +0.08% +0.06%] index_select spread : Elapsed 0.062 ms (6.159 ms / 100) 6.141 -> 6.160 ( +0.31%) [ +0.00% +0.23% +0.21% / +0.31% +0.41% +0.49%] index_select strided 3 : Elapsed 0.061 ms (6.141 ms / 100) 6.117 -> 6.102 ( -0.25%) [ +0.05% +0.00% +0.11% / +0.20% -0.25% -0.23%] index_select random : Elapsed 0.061 ms (6.120 ms / 100) 6.123 -> 6.101 ( -0.36%) [ +0.00% +0.13% +0.03% / +0.24% -0.28% -0.36%] index_select random_sorted : Elapsed 0.061 ms (6.123 ms / 100) B = [20, 5, 40, 16] (stride (40, 800, 1, 4000)) A = [20, 4, 40, 16] (stride (640, 12800, 1, 40)) dim = 1 5.887 -> 5.893 ( +0.10%) [ +0.00% +0.14% +0.15% / +0.10% +0.44% +0.37%] index_add_ linear : Elapsed 0.059 ms (5.887 ms / 100) 5.770 -> 5.771 ( +0.02%) [ +0.00% +0.05% +0.17% / +0.16% +0.02% +0.17%] index_copy_ linear : Elapsed 0.058 ms (5.770 ms / 100) 5.872 -> 5.879 ( +0.12%) [ +0.09% +0.00% +0.05% / +0.12% +0.12% +0.19%] index_add_ reverse : Elapsed 0.059 ms (5.877 ms / 100) 5.756 -> 5.763 ( +0.12%) [ +0.05% +0.16% +0.00% / +0.14% +0.16% +0.12%] index_copy_ reverse : Elapsed 0.058 ms (5.759 ms / 100) 5.893 -> 5.900 ( +0.12%) [ +0.07% +0.00% +0.10% / +0.12% +0.27% +0.14%] index_add_ spread : Elapsed 0.059 ms (5.897 ms / 100) 5.768 -> 5.770 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.23% +0.07% +0.03%] index_copy_ spread : Elapsed 0.058 ms (5.770 ms / 100) 5.870 -> 5.870 ( +0.00%) [ +0.05% +0.00% +0.12% / +0.05% +0.02% +0.00%] index_add_ strided 3 : Elapsed 0.059 ms (5.873 ms / 100) 5.759 -> 5.761 ( +0.03%) [ +0.05% +0.00% +0.05% / +0.09% +0.09% +0.03%] index_copy_ strided 3 : Elapsed 0.058 ms (5.762 ms / 100) 5.907 -> 5.907 ( +0.00%) [ +0.14% +0.00% +0.05% / +0.00% +0.29% +0.27%] index_add_ perm : Elapsed 0.059 ms (5.915 ms / 100) 5.786 -> 5.781 ( -0.09%) [ +0.00% +0.05% +0.02% / +0.09% -0.09% +0.02%] index_copy_ perm : Elapsed 0.058 ms (5.786 ms / 100) 5.898 -> 5.905 ( +0.12%) [ +0.07% +0.00% +0.14% / +0.12% +0.20% +0.19%] index_add_ perm_sorted : Elapsed 0.059 ms (5.902 ms / 100) 5.777 -> 5.773 ( -0.07%) [ +0.00% +0.00% +0.03% / +0.12% -0.07% -0.02%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.777 ms / 100) 6.121 -> 6.107 ( -0.23%) [ +0.00% +0.05% +0.05% / +0.07% -0.20% -0.23%] index_select const : Elapsed 0.061 ms (6.121 ms / 100) 6.155 -> 6.157 ( +0.03%) [ +0.00% +0.03% +0.13% / +0.13% +0.06% +0.03%] index_select wrap : Elapsed 0.062 ms (6.155 ms / 100) 6.142 -> 6.147 ( +0.08%) [ +0.00% +0.03% +0.13% / +0.10% +0.29% +0.08%] index_select linear : Elapsed 0.061 ms (6.142 ms / 100) 6.155 -> 6.152 ( -0.05%) [ +0.05% +0.02% +0.00% / +0.13% -0.03% -0.05%] index_select reverse : Elapsed 0.062 ms (6.158 ms / 100) 6.126 -> 6.110 ( -0.26%) [ +0.03% +0.00% +0.05% / -0.13% -0.26% -0.20%] index_select skip64 : Elapsed 0.061 ms (6.128 ms / 100) 6.118 -> 6.106 ( -0.20%) [ +0.21% +0.00% +0.23% / +0.33% -0.15% -0.20%] index_select skip256 : Elapsed 0.061 ms (6.131 ms / 100) 6.146 -> 6.150 ( +0.07%) [ +0.00% +0.05% +0.20% / +0.07% +0.15% +0.07%] index_select spread : Elapsed 0.061 ms (6.146 ms / 100) 6.159 -> 6.163 ( +0.06%) [ +0.00% +0.11% +0.03% / +0.08% +0.06% +0.08%] index_select strided 3 : Elapsed 0.062 ms (6.159 ms / 100) 6.158 -> 6.157 ( -0.02%) [ +0.08% +0.00% +0.05% / +0.21% -0.02% +0.06%] index_select random : Elapsed 0.062 ms (6.163 ms / 100) 6.141 -> 6.140 ( -0.02%) [ +0.16% +0.00% +0.13% / +0.15% +0.11% -0.02%] index_select random_sorted : Elapsed 0.062 ms (6.151 ms / 100) out_shape = [20, 4, 5, 16] in_shape = [20, 4, 40, 16] idx_dim = 2 B = [20, 4, 5, 16] (stride (320, 80, 1, 5)) A = [20, 4, 40, 16] (stride (40, 800, 1, 3200)) dim = 2 1.502 -> 1.503 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.53% +0.47%] index_select const : Elapsed 0.015 ms (1.504 ms / 100) 1.501 -> 1.501 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.53% +0.47%] index_select wrap : Elapsed 0.015 ms (1.501 ms / 100) 1.501 -> 1.501 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.33% +0.40%] index_select linear : Elapsed 0.015 ms (1.501 ms / 100) 1.501 -> 1.505 ( +0.27%) [ +0.00% +0.00% +0.00% / +0.33% +0.27% +0.40%] index_select reverse : Elapsed 0.015 ms (1.501 ms / 100) 1.500 -> 1.501 ( +0.07%) [ +0.33% +0.07% +0.00% / +0.07% +0.53% +0.60%] index_select skip64 : Elapsed 0.015 ms (1.505 ms / 100) 1.501 -> 1.501 ( +0.00%) [ +0.13% +0.07% +0.00% / +0.00% +0.47% +0.53%] index_select skip256 : Elapsed 0.015 ms (1.503 ms / 100) 1.493 -> 1.496 ( +0.20%) [ +0.27% +0.27% +0.00% / +0.20% +0.67% +0.67%] index_select spread : Elapsed 0.015 ms (1.497 ms / 100) 1.498 -> 1.501 ( +0.20%) [ +0.27% +0.00% +0.13% / +0.20% +0.67% +0.53%] index_select strided 3 : Elapsed 0.015 ms (1.502 ms / 100) 1.498 -> 1.499 ( +0.07%) [ +0.13% +0.00% +0.00% / +0.07% +0.47% +0.53%] index_select strided 5 : Elapsed 0.015 ms (1.500 ms / 100) 1.499 -> 1.497 ( -0.13%) [ +0.00% +0.13% +0.00% / -0.13% +0.47% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.499 ms / 100) 1.498 -> 1.498 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.47% +0.40%] index_select strided 8 : Elapsed 0.015 ms (1.498 ms / 100) 1.494 -> 1.496 ( +0.13%) [ +0.33% +0.13% +0.00% / +0.13% +0.87% +0.80%] index_select strided 16 : Elapsed 0.015 ms (1.499 ms / 100) 1.495 -> 1.499 ( +0.27%) [ +0.20% +0.20% +0.00% / +0.27% +0.80% +0.80%] index_select random : Elapsed 0.015 ms (1.498 ms / 100) 1.497 -> 1.498 ( +0.07%) [ +0.20% +0.07% +0.00% / +0.07% +0.80% +0.67%] index_select random_sorted : Elapsed 0.015 ms (1.500 ms / 100) 1.495 -> 1.498 ( +0.20%) [ +0.13% +0.13% +0.00% / +0.20% +0.74% +0.67%] index_select perm : Elapsed 0.015 ms (1.497 ms / 100) 1.496 -> 1.496 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.74% +0.67%] index_select perm_sorted : Elapsed 0.015 ms (1.498 ms / 100) B = [20, 4, 5, 16] (stride (320, 1, 4, 20)) A = [20, 4, 40, 16] (stride (40, 800, 1, 3200)) dim = 2 1.615 -> 1.616 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.50% +0.50%] index_select const : Elapsed 0.016 ms (1.616 ms / 100) 1.613 -> 1.613 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.56% +0.74%] index_select wrap : Elapsed 0.016 ms (1.615 ms / 100) 1.612 -> 1.613 ( +0.06%) [ +0.25% +0.12% +0.00% / +0.06% +0.74% +0.74%] index_select linear : Elapsed 0.016 ms (1.616 ms / 100) 1.612 -> 1.614 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.68% +0.74%] index_select reverse : Elapsed 0.016 ms (1.612 ms / 100) 1.612 -> 1.614 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.68% +0.62%] index_select skip64 : Elapsed 0.016 ms (1.614 ms / 100) 1.614 -> 1.615 ( +0.06%) [ +0.06% +0.00% +0.12% / +0.06% +0.68% +0.68%] index_select skip256 : Elapsed 0.016 ms (1.615 ms / 100) 1.610 -> 1.612 ( +0.12%) [ +0.06% +0.12% +0.00% / +0.12% +0.81% +0.75%] index_select spread : Elapsed 0.016 ms (1.611 ms / 100) 1.608 -> 1.609 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.68% +0.68%] index_select strided 3 : Elapsed 0.016 ms (1.610 ms / 100) 1.610 -> 1.610 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_select strided 5 : Elapsed 0.016 ms (1.610 ms / 100) 1.610 -> 1.613 ( +0.19%) [ +0.06% +0.19% +0.00% / +0.19% +0.87% +0.93%] index_select strided 7 : Elapsed 0.016 ms (1.611 ms / 100) 1.612 -> 1.612 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.74% +0.56%] index_select strided 8 : Elapsed 0.016 ms (1.613 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.81% +0.62%] index_select strided 16 : Elapsed 0.016 ms (1.612 ms / 100) 1.614 -> 1.614 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_select random : Elapsed 0.016 ms (1.615 ms / 100) 1.612 -> 1.614 ( +0.12%) [ +0.19% +0.12% +0.00% / +0.12% +0.68% +0.62%] index_select random_sorted : Elapsed 0.016 ms (1.615 ms / 100) 1.606 -> 1.610 ( +0.25%) [ +0.00% +0.12% +0.00% / +0.25% +0.81% +0.75%] index_select perm : Elapsed 0.016 ms (1.606 ms / 100) 1.614 -> 1.615 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.016 ms (1.614 ms / 100) B = [20, 4, 5, 16] (stride (1, 1600, 320, 20)) A = [20, 4, 40, 16] (stride (2560, 1, 4, 160)) dim = 2 1.543 -> 1.546 ( +0.19%) [ +0.00% +0.00% +0.19% / +0.19% +1.04% +0.65%] index_select const : Elapsed 0.015 ms (1.543 ms / 100) 1.539 -> 1.538 ( -0.06%) [ +0.06% +0.13% +0.00% / -0.06% +0.65% +0.65%] index_select wrap : Elapsed 0.015 ms (1.540 ms / 100) 1.547 -> 1.546 ( -0.06%) [ +0.13% +0.00% +0.19% / -0.06% +1.42% +0.84%] index_select linear : Elapsed 0.015 ms (1.549 ms / 100) 1.547 -> 1.546 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.39% +1.29%] index_select reverse : Elapsed 0.015 ms (1.547 ms / 100) 1.538 -> 1.542 ( +0.26%) [ +0.00% +0.33% +0.13% / +0.26% +1.63% +0.59%] index_select skip64 : Elapsed 0.015 ms (1.538 ms / 100) 1.542 -> 1.544 ( +0.13%) [ +0.00% +0.26% +0.26% / +0.13% +0.65% +0.71%] index_select skip256 : Elapsed 0.015 ms (1.542 ms / 100) 1.531 -> 1.532 ( +0.07%) [ +0.13% +0.26% +0.00% / +0.07% +0.91% +0.78%] index_select spread : Elapsed 0.015 ms (1.533 ms / 100) 1.537 -> 1.536 ( -0.07%) [ +0.13% +0.07% +0.00% / -0.07% +0.72% +0.59%] index_select strided 3 : Elapsed 0.015 ms (1.539 ms / 100) 1.535 -> 1.535 ( +0.00%) [ +0.13% +0.26% +0.00% / +0.00% +1.95% +1.43%] index_select strided 5 : Elapsed 0.015 ms (1.537 ms / 100) 1.530 -> 1.533 ( +0.20%) [ +0.13% +0.07% +0.00% / +0.20% +1.05% +0.92%] index_select strided 7 : Elapsed 0.015 ms (1.532 ms / 100) 1.528 -> 1.530 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.79% +0.59%] index_select strided 8 : Elapsed 0.015 ms (1.529 ms / 100) 1.536 -> 1.537 ( +0.07%) [ +0.00% +0.07% +0.20% / +0.07% +0.78% +1.04%] index_select strided 16 : Elapsed 0.015 ms (1.536 ms / 100) 1.536 -> 1.539 ( +0.20%) [ +0.13% +0.07% +0.00% / +0.20% +0.85% +0.91%] index_select random : Elapsed 0.015 ms (1.538 ms / 100) 1.555 -> 1.563 ( +0.51%) [ +0.00% +0.06% +0.00% / +0.51% +1.03% +1.29%] index_select random_sorted : Elapsed 0.016 ms (1.555 ms / 100) 1.542 -> 1.543 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.65% +0.84%] index_select perm : Elapsed 0.015 ms (1.542 ms / 100) 1.544 -> 1.547 ( +0.19%) [ +0.00% +0.45% +0.13% / +0.19% +0.91% +0.84%] index_select perm_sorted : Elapsed 0.015 ms (1.544 ms / 100) B = [20, 4, 5, 16] (stride (5, 1600, 1, 100)) A = [20, 4, 40, 16] (stride (16, 12800, 320, 1)) dim = 2 1.475 -> 1.476 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.68% +0.61%] index_select const : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.68% +0.54%] index_select wrap : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.61% +0.54%] index_select linear : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.81% +0.88%] index_select reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.478 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +0.75% +0.68%] index_select skip64 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.61%] index_select skip256 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.95% +0.61%] index_select spread : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.61% +0.75%] index_select strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_select strided 5 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.475 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.68% +0.68%] index_select strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.20% +0.00% +0.00% / +0.07% +0.68% +0.81%] index_select strided 8 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.478 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.61% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_select random : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.68% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.476 ms / 100) 1.477 -> 1.476 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.68% +0.61%] index_select perm : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.61%] index_select perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) out_shape = [20, 4, 40, 5] in_shape = [20, 4, 40, 16] idx_dim = 3 B = [20, 4, 40, 5] (stride (800, 5, 20, 1)) A = [20, 4, 40, 16] (stride (2560, 40, 1, 160)) dim = 3 2.119 -> 2.124 ( +0.24%) [ +0.00% +0.00% +0.14% / +0.24% +0.66% +0.52%] index_select const : Elapsed 0.021 ms (2.119 ms / 100) 2.166 -> 2.173 ( +0.32%) [ +0.37% +0.18% +0.00% / +0.32% +1.39% +1.11%] index_select wrap : Elapsed 0.022 ms (2.174 ms / 100) 2.178 -> 2.179 ( +0.05%) [ +0.46% +0.05% +0.00% / +0.05% +1.06% +1.01%] index_select linear : Elapsed 0.022 ms (2.188 ms / 100) 2.171 -> 2.173 ( +0.09%) [ +0.00% +0.23% +0.00% / +0.09% +1.20% +1.01%] index_select reverse : Elapsed 0.022 ms (2.171 ms / 100) 2.114 -> 2.120 ( +0.28%) [ +0.43% +0.28% +0.00% / +0.28% +0.80% +0.57%] index_select skip64 : Elapsed 0.021 ms (2.123 ms / 100) 2.117 -> 2.122 ( +0.24%) [ +0.00% +0.09% +0.09% / +0.24% +0.52% +0.66%] index_select skip256 : Elapsed 0.021 ms (2.117 ms / 100) 2.168 -> 2.171 ( +0.14%) [ +0.09% +0.00% +0.14% / +0.14% +0.83% +1.15%] index_select spread : Elapsed 0.022 ms (2.170 ms / 100) 2.177 -> 2.180 ( +0.14%) [ +0.00% +0.09% +0.09% / +0.14% +0.60% +0.64%] index_select strided 3 : Elapsed 0.022 ms (2.177 ms / 100) 2.173 -> 2.175 ( +0.09%) [ +0.37% +0.05% +0.00% / +0.09% +0.74% +0.83%] index_select strided 5 : Elapsed 0.022 ms (2.181 ms / 100) 2.160 -> 2.162 ( +0.09%) [ +0.23% +0.00% +0.23% / +0.09% +1.44% +1.48%] index_select strided 7 : Elapsed 0.022 ms (2.165 ms / 100) 2.124 -> 2.126 ( +0.09%) [ +0.33% +0.24% +0.00% / +0.09% +0.52% +0.61%] index_select strided 8 : Elapsed 0.021 ms (2.131 ms / 100) 2.157 -> 2.161 ( +0.19%) [ +0.14% +0.00% +0.23% / +0.19% +1.11% +1.25%] index_select random : Elapsed 0.022 ms (2.160 ms / 100) 2.155 -> 2.159 ( +0.19%) [ +0.00% +0.09% +0.09% / +0.19% +1.30% +1.44%] index_select random_sorted : Elapsed 0.022 ms (2.155 ms / 100) 2.172 -> 2.175 ( +0.14%) [ +0.00% +0.09% +0.00% / +0.14% +1.10% +0.87%] index_select perm : Elapsed 0.022 ms (2.172 ms / 100) 2.168 -> 2.168 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.74% +0.83%] index_select perm_sorted : Elapsed 0.022 ms (2.169 ms / 100) B = [20, 4, 40, 5] (stride (800, 40, 1, 160)) A = [20, 4, 40, 16] (stride (2560, 640, 16, 1)) dim = 3 2.013 -> 2.013 ( +0.00%) [ +0.20% +0.15% +0.00% / +0.05% +0.05% +0.00%] index_select const : Elapsed 0.020 ms (2.017 ms / 100) 2.012 -> 2.012 ( +0.00%) [ +0.25% +0.00% +0.25% / +0.00% +0.30% +0.10%] index_select wrap : Elapsed 0.020 ms (2.017 ms / 100) 2.013 -> 2.015 ( +0.10%) [ +0.25% +0.00% +0.10% / +0.10% +0.15% +0.15%] index_select linear : Elapsed 0.020 ms (2.018 ms / 100) 2.014 -> 2.012 ( -0.10%) [ +0.10% +0.10% +0.00% / -0.10% -0.05% +0.20%] index_select reverse : Elapsed 0.020 ms (2.016 ms / 100) 2.010 -> 2.013 ( +0.15%) [ +0.20% +0.00% +0.20% / +0.30% +0.25% +0.15%] index_select skip64 : Elapsed 0.020 ms (2.014 ms / 100) 2.013 -> 2.010 ( -0.15%) [ +0.00% +0.05% +0.00% / +0.05% -0.15% +0.10%] index_select skip256 : Elapsed 0.020 ms (2.013 ms / 100) 2.030 -> 2.028 ( -0.10%) [ +0.00% +0.05% +0.05% / +0.05% -0.10% +0.10%] index_select spread : Elapsed 0.020 ms (2.030 ms / 100) 2.032 -> 2.025 ( -0.34%) [ +0.59% +0.30% +0.00% / +0.30% -0.34% -0.20%] index_select strided 3 : Elapsed 0.020 ms (2.044 ms / 100) 2.031 -> 2.026 ( -0.25%) [ +0.05% +0.15% +0.00% / +0.05% -0.25% -0.20%] index_select strided 5 : Elapsed 0.020 ms (2.032 ms / 100) 2.029 -> 2.023 ( -0.30%) [ +0.00% +0.10% +0.05% / +0.05% -0.15% -0.30%] index_select strided 7 : Elapsed 0.020 ms (2.029 ms / 100) 2.038 -> 2.031 ( -0.34%) [ +0.10% +0.00% +0.10% / +0.15% -0.25% -0.34%] index_select strided 8 : Elapsed 0.020 ms (2.040 ms / 100) 2.036 -> 2.034 ( -0.10%) [ +0.05% +0.10% +0.00% / -0.10% -0.10% -0.10%] index_select random : Elapsed 0.020 ms (2.037 ms / 100) 2.027 -> 2.028 ( +0.05%) [ +0.00% +0.20% +0.25% / +0.30% +0.25% +0.05%] index_select random_sorted : Elapsed 0.020 ms (2.027 ms / 100) 2.030 -> 2.027 ( -0.15%) [ +0.05% +0.00% +0.25% / +0.00% -0.15% -0.15%] index_select perm : Elapsed 0.020 ms (2.031 ms / 100) 2.029 -> 2.027 ( -0.10%) [ +0.20% +0.00% +0.00% / +0.00% -0.10% +0.05%] index_select perm_sorted : Elapsed 0.020 ms (2.033 ms / 100) B = [20, 4, 40, 5] (stride (800, 40, 1, 160)) A = [20, 4, 40, 16] (stride (2560, 1, 4, 160)) dim = 3 2.126 -> 2.124 ( -0.09%) [ +0.05% +0.00% +0.00% / -0.09% +0.24% +0.19%] index_select const : Elapsed 0.021 ms (2.127 ms / 100) 2.135 -> 2.134 ( -0.05%) [ +0.23% +0.14% +0.00% / -0.05% +0.75% +0.61%] index_select wrap : Elapsed 0.021 ms (2.140 ms / 100) 2.139 -> 2.139 ( +0.00%) [ +0.47% +0.00% +0.05% / +0.00% +0.56% +0.56%] index_select linear : Elapsed 0.021 ms (2.149 ms / 100) 2.135 -> 2.137 ( +0.09%) [ +0.19% +0.00% +0.09% / +0.09% +0.80% +0.66%] index_select reverse : Elapsed 0.021 ms (2.139 ms / 100) 2.117 -> 2.122 ( +0.24%) [ +0.09% +0.00% +0.14% / +0.24% +0.47% +0.33%] index_select skip64 : Elapsed 0.021 ms (2.119 ms / 100) 2.123 -> 2.125 ( +0.09%) [ +0.00% +0.19% +0.19% / +0.09% +0.47% +0.47%] index_select skip256 : Elapsed 0.021 ms (2.123 ms / 100) 2.142 -> 2.141 ( -0.05%) [ +0.00% +0.09% +0.00% / -0.05% +0.61% +0.56%] index_select spread : Elapsed 0.021 ms (2.142 ms / 100) 2.139 -> 2.143 ( +0.19%) [ +0.14% +0.05% +0.00% / +0.19% +0.61% +0.61%] index_select strided 3 : Elapsed 0.021 ms (2.142 ms / 100) 2.136 -> 2.140 ( +0.19%) [ +0.00% +0.00% +0.14% / +0.19% +0.89% +0.94%] index_select strided 5 : Elapsed 0.021 ms (2.136 ms / 100) 2.136 -> 2.137 ( +0.05%) [ +0.19% +0.00% +0.09% / +0.05% +0.98% +0.94%] index_select strided 7 : Elapsed 0.021 ms (2.140 ms / 100) 2.123 -> 2.125 ( +0.09%) [ +0.00% +0.00% +0.14% / +0.09% +0.42% +0.38%] index_select strided 8 : Elapsed 0.021 ms (2.123 ms / 100) 2.130 -> 2.131 ( +0.05%) [ +0.00% +0.14% +0.14% / +0.05% +0.61% +0.66%] index_select random : Elapsed 0.021 ms (2.130 ms / 100) 2.137 -> 2.137 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.51% +0.66%] index_select random_sorted : Elapsed 0.021 ms (2.137 ms / 100) 2.139 -> 2.138 ( -0.05%) [ +0.19% +0.09% +0.00% / -0.05% +0.94% +0.51%] index_select perm : Elapsed 0.021 ms (2.143 ms / 100) 2.131 -> 2.133 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +1.03% +1.03%] index_select perm_sorted : Elapsed 0.021 ms (2.131 ms / 100) B = [20, 4, 40, 5] (stride (800, 40, 1, 160)) A = [20, 4, 40, 16] (stride (1, 12800, 20, 800)) dim = 3 2.246 -> 2.252 ( +0.27%) [ +0.00% +0.00% +0.09% / +0.27% +1.02% +0.49%] index_select const : Elapsed 0.022 ms (2.246 ms / 100) 2.247 -> 2.247 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.45% +0.36%] index_select wrap : Elapsed 0.022 ms (2.247 ms / 100) 2.242 -> 2.246 ( +0.18%) [ +0.18% +0.04% +0.00% / +0.18% +0.62% +0.31%] index_select linear : Elapsed 0.022 ms (2.246 ms / 100) 2.237 -> 2.239 ( +0.09%) [ +0.13% +0.09% +0.00% / +0.09% +0.31% +0.31%] index_select reverse : Elapsed 0.022 ms (2.240 ms / 100) 2.243 -> 2.249 ( +0.27%) [ +0.04% +0.00% +0.00% / +0.27% +0.49% +0.71%] index_select skip64 : Elapsed 0.022 ms (2.244 ms / 100) 2.244 -> 2.248 ( +0.18%) [ +0.00% +0.09% +0.22% / +0.18% +0.58% +0.89%] index_select skip256 : Elapsed 0.022 ms (2.244 ms / 100) 2.231 -> 2.233 ( +0.09%) [ +0.27% +0.04% +0.00% / +0.09% +0.54% +0.36%] index_select spread : Elapsed 0.022 ms (2.237 ms / 100) 2.241 -> 2.241 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.40% +0.45%] index_select strided 3 : Elapsed 0.022 ms (2.241 ms / 100) 2.240 -> 2.244 ( +0.18%) [ +0.31% +0.18% +0.00% / +0.18% +0.58% +0.45%] index_select strided 5 : Elapsed 0.022 ms (2.247 ms / 100) 2.226 -> 2.234 ( +0.36%) [ +0.18% +0.18% +0.00% / +0.36% +0.49% +0.63%] index_select strided 7 : Elapsed 0.022 ms (2.230 ms / 100) 2.245 -> 2.242 ( -0.13%) [ +0.18% +0.00% +0.00% / -0.13% +0.53% +0.67%] index_select strided 8 : Elapsed 0.022 ms (2.249 ms / 100) 2.248 -> 2.250 ( +0.09%) [ +0.00% +0.13% +0.00% / +0.09% +0.31% +0.22%] index_select random : Elapsed 0.022 ms (2.248 ms / 100) 2.234 -> 2.239 ( +0.22%) [ +0.22% +0.00% +0.27% / +0.22% +0.45% +0.63%] index_select random_sorted : Elapsed 0.022 ms (2.239 ms / 100) 2.241 -> 2.242 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.04% +0.45% +0.40%] index_select perm : Elapsed 0.022 ms (2.244 ms / 100) 2.249 -> 2.245 ( -0.18%) [ +0.00% +0.09% +0.13% / -0.18% +0.31% +0.36%] index_select perm_sorted : Elapsed 0.022 ms (2.249 ms / 100) B = [20, 4, 40, 5] (stride (200, 4000, 1, 40)) A = [20, 4, 40, 16] (stride (2560, 1, 4, 160)) dim = 3 2.244 -> 2.241 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.31% +0.13%] index_select const : Elapsed 0.022 ms (2.244 ms / 100) 2.246 -> 2.246 ( +0.00%) [ +0.49% +0.09% +0.00% / +0.00% +0.71% +0.76%] index_select wrap : Elapsed 0.023 ms (2.257 ms / 100) 2.248 -> 2.251 ( +0.13%) [ +0.00% +0.00% +0.18% / +0.13% +0.67% +0.76%] index_select linear : Elapsed 0.022 ms (2.248 ms / 100) 2.248 -> 2.251 ( +0.13%) [ +0.04% +0.22% +0.00% / +0.13% +0.58% +0.58%] index_select reverse : Elapsed 0.022 ms (2.249 ms / 100) 2.232 -> 2.235 ( +0.13%) [ +0.22% +0.04% +0.00% / +0.13% +0.36% +0.45%] index_select skip64 : Elapsed 0.022 ms (2.237 ms / 100) 2.239 -> 2.244 ( +0.22%) [ +0.00% +0.18% +0.22% / +0.22% +0.45% +0.36%] index_select skip256 : Elapsed 0.022 ms (2.239 ms / 100) 2.250 -> 2.250 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.62% +0.62%] index_select spread : Elapsed 0.023 ms (2.251 ms / 100) 2.255 -> 2.255 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.58% +0.40%] index_select strided 3 : Elapsed 0.023 ms (2.257 ms / 100) 2.249 -> 2.251 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.09% +0.84% +0.84%] index_select strided 5 : Elapsed 0.023 ms (2.251 ms / 100) 2.247 -> 2.250 ( +0.13%) [ +0.09% +0.00% +0.09% / +0.13% +0.93% +0.89%] index_select strided 7 : Elapsed 0.022 ms (2.249 ms / 100) 2.237 -> 2.240 ( +0.13%) [ +0.04% +0.00% +0.04% / +0.13% +0.31% +0.49%] index_select strided 8 : Elapsed 0.022 ms (2.238 ms / 100) 2.247 -> 2.253 ( +0.27%) [ +0.13% +0.13% +0.00% / +0.27% +1.02% +0.80%] index_select random : Elapsed 0.022 ms (2.250 ms / 100) 2.250 -> 2.250 ( +0.00%) [ +0.09% +0.04% +0.00% / +0.00% +0.71% +0.67%] index_select random_sorted : Elapsed 0.023 ms (2.252 ms / 100) 2.252 -> 2.250 ( -0.09%) [ +0.13% +0.00% +0.04% / -0.09% +0.62% +0.62%] index_select perm : Elapsed 0.023 ms (2.255 ms / 100) 2.250 -> 2.246 ( -0.18%) [ +0.04% +0.00% +0.09% / -0.18% +0.62% +0.62%] index_select perm_sorted : Elapsed 0.023 ms (2.251 ms / 100) B = [20, 4, 40, 5] (stride (20, 1, 400, 4)) A = [20, 4, 40, 16] (stride (2560, 1, 64, 4)) dim = 3 2.504 -> 2.502 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.08% +0.12%] index_select const : Elapsed 0.025 ms (2.506 ms / 100) 2.516 -> 2.510 ( -0.24%) [ +0.08% +0.16% +0.00% / +0.08% -0.24% +0.04%] index_select wrap : Elapsed 0.025 ms (2.518 ms / 100) 2.515 -> 2.516 ( +0.04%) [ +0.00% +0.16% +0.04% / +0.16% +0.12% +0.04%] index_select linear : Elapsed 0.025 ms (2.515 ms / 100) 2.524 -> 2.518 ( -0.24%) [ +0.00% +0.08% +0.08% / -0.16% -0.20% -0.24%] index_select reverse : Elapsed 0.025 ms (2.524 ms / 100) 2.498 -> 2.504 ( +0.24%) [ +0.00% +0.16% +0.32% / +0.44% +0.36% +0.24%] index_select skip64 : Elapsed 0.025 ms (2.498 ms / 100) 2.502 -> 2.501 ( -0.04%) [ +0.08% +0.16% +0.00% / +0.04% -0.04% +0.04%] index_select skip256 : Elapsed 0.025 ms (2.504 ms / 100) 2.547 -> 2.531 ( -0.63%) [ +0.20% +0.16% +0.00% / -0.12% -0.47% -0.63%] index_select spread : Elapsed 0.026 ms (2.552 ms / 100) 2.541 -> 2.536 ( -0.20%) [ +0.35% +0.20% +0.00% / +0.24% -0.16% -0.20%] index_select strided 3 : Elapsed 0.025 ms (2.550 ms / 100) 2.533 -> 2.528 ( -0.20%) [ +0.16% +0.36% +0.00% / +0.20% -0.20% -0.20%] index_select strided 5 : Elapsed 0.025 ms (2.537 ms / 100) 2.545 -> 2.534 ( -0.43%) [ +0.00% +0.12% +0.04% / -0.08% -0.31% -0.43%] index_select strided 7 : Elapsed 0.025 ms (2.545 ms / 100) 2.510 -> 2.509 ( -0.04%) [ +0.16% +0.00% +0.16% / +0.28% +0.04% -0.04%] index_select strided 8 : Elapsed 0.025 ms (2.514 ms / 100) 2.519 -> 2.510 ( -0.36%) [ +0.08% +0.16% +0.00% / -0.04% -0.16% -0.36%] index_select random : Elapsed 0.025 ms (2.521 ms / 100) 2.527 -> 2.516 ( -0.44%) [ +0.12% +0.04% +0.00% / +0.24% -0.44% -0.28%] index_select random_sorted : Elapsed 0.025 ms (2.530 ms / 100) 2.533 -> 2.527 ( -0.24%) [ +0.04% +0.00% +0.04% / +0.24% -0.24% -0.16%] index_select perm : Elapsed 0.025 ms (2.534 ms / 100) 2.524 -> 2.523 ( -0.04%) [ +0.32% +0.20% +0.00% / +0.20% -0.04% +0.04%] index_select perm_sorted : Elapsed 0.025 ms (2.532 ms / 100) B = [20, 4, 40, 5] (stride (20, 1, 400, 4)) A = [20, 4, 40, 16] (stride (1, 12800, 320, 20)) dim = 3 2.352 -> 2.351 ( -0.04%) [ +0.00% +0.26% +0.00% / -0.04% +0.21% +0.13%] index_select const : Elapsed 0.024 ms (2.352 ms / 100) 2.345 -> 2.346 ( +0.04%) [ +0.26% +0.09% +0.00% / +0.04% +0.38% +0.38%] index_select wrap : Elapsed 0.024 ms (2.351 ms / 100) 2.347 -> 2.355 ( +0.34%) [ +0.21% +0.30% +0.00% / +0.38% +0.34% +0.51%] index_select linear : Elapsed 0.024 ms (2.352 ms / 100) 2.341 -> 2.347 ( +0.26%) [ +0.34% +0.34% +0.00% / +0.26% +0.73% +0.85%] index_select reverse : Elapsed 0.023 ms (2.349 ms / 100) 2.346 -> 2.357 ( +0.47%) [ +0.13% +0.13% +0.00% / +0.51% +0.51% +0.47%] index_select skip64 : Elapsed 0.023 ms (2.349 ms / 100) 2.352 -> 2.355 ( +0.13%) [ +0.00% +0.04% +0.00% / +0.13% +0.21% +0.34%] index_select skip256 : Elapsed 0.024 ms (2.352 ms / 100) 2.347 -> 2.352 ( +0.21%) [ +0.60% +0.13% +0.00% / +0.21% +0.30% +0.38%] index_select spread : Elapsed 0.024 ms (2.361 ms / 100) 2.350 -> 2.354 ( +0.17%) [ +0.00% +0.09% +0.26% / +0.17% +0.38% +0.34%] index_select strided 3 : Elapsed 0.024 ms (2.350 ms / 100) 2.343 -> 2.344 ( +0.04%) [ +0.04% +0.13% +0.00% / +0.04% +0.47% +0.34%] index_select strided 5 : Elapsed 0.023 ms (2.344 ms / 100) 2.344 -> 2.350 ( +0.26%) [ +0.00% +0.13% +0.26% / +0.26% +0.47% +0.43%] index_select strided 7 : Elapsed 0.023 ms (2.344 ms / 100) 2.340 -> 2.345 ( +0.21%) [ +0.04% +0.04% +0.00% / +0.21% +0.51% +0.47%] index_select strided 8 : Elapsed 0.023 ms (2.341 ms / 100) 2.356 -> 2.355 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.42% +0.42%] index_select random : Elapsed 0.024 ms (2.357 ms / 100) 2.350 -> 2.357 ( +0.30%) [ +0.00% +0.00% +0.04% / +0.43% +0.38% +0.30%] index_select random_sorted : Elapsed 0.023 ms (2.350 ms / 100) 2.344 -> 2.349 ( +0.21%) [ +0.13% +0.21% +0.00% / +0.21% +0.47% +0.51%] index_select perm : Elapsed 0.023 ms (2.347 ms / 100) 2.345 -> 2.349 ( +0.17%) [ +0.00% +0.13% +0.38% / +0.17% +0.38% +0.60%] index_select perm_sorted : Elapsed 0.023 ms (2.345 ms / 100) B = [20, 4, 40, 5] (stride (1, 20, 400, 80)) A = [20, 4, 40, 16] (stride (16, 320, 1280, 1)) dim = 3 2.511 -> 2.510 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.04% +0.08%] index_select const : Elapsed 0.025 ms (2.511 ms / 100) 2.512 -> 2.511 ( -0.04%) [ +0.12% +0.08% +0.00% / +0.08% -0.04% +0.16%] index_select wrap : Elapsed 0.025 ms (2.515 ms / 100) 2.507 -> 2.510 ( +0.12%) [ +0.24% +0.16% +0.00% / +0.12% +0.20% +0.12%] index_select linear : Elapsed 0.025 ms (2.513 ms / 100) 2.510 -> 2.511 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.08% +0.20%] index_select reverse : Elapsed 0.025 ms (2.510 ms / 100) 2.511 -> 2.511 ( +0.00%) [ +0.20% +0.00% +0.16% / +0.00% +0.08% +0.04%] index_select skip64 : Elapsed 0.025 ms (2.516 ms / 100) 2.508 -> 2.511 ( +0.12%) [ +0.00% +0.16% +0.08% / +0.12% +0.12% +0.16%] index_select skip256 : Elapsed 0.025 ms (2.508 ms / 100) 2.532 -> 2.534 ( +0.08%) [ +0.00% +0.20% +0.20% / +0.20% +0.20% +0.08%] index_select spread : Elapsed 0.025 ms (2.532 ms / 100) 2.532 -> 2.530 ( -0.08%) [ +0.04% +0.00% +0.04% / +0.08% -0.08% +0.08%] index_select strided 3 : Elapsed 0.025 ms (2.533 ms / 100) 2.535 -> 2.532 ( -0.12%) [ +0.00% +0.00% +0.12% / +0.16% -0.12% -0.08%] index_select strided 5 : Elapsed 0.025 ms (2.535 ms / 100) 2.536 -> 2.536 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.04% +0.00% +0.00%] index_select strided 7 : Elapsed 0.025 ms (2.536 ms / 100) 2.536 -> 2.536 ( +0.00%) [ +0.35% +0.04% +0.00% / +0.04% +0.00% +0.08%] index_select strided 8 : Elapsed 0.025 ms (2.545 ms / 100) 2.541 -> 2.540 ( -0.04%) [ +0.12% +0.00% +0.04% / +0.12% +0.08% -0.04%] index_select random : Elapsed 0.025 ms (2.544 ms / 100) 2.534 -> 2.534 ( +0.00%) [ +0.00% +0.20% +0.08% / +0.04% +0.04% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.534 ms / 100) 2.532 -> 2.531 ( -0.04%) [ +0.04% +0.08% +0.00% / +0.04% -0.04% +0.04%] index_select perm : Elapsed 0.025 ms (2.533 ms / 100) 2.535 -> 2.536 ( +0.04%) [ +0.00% +0.12% +0.04% / +0.12% +0.04% +0.04%] index_select perm_sorted : Elapsed 0.025 ms (2.535 ms / 100) B = [20, 4, 40, 5] (stride (4, 1, 80, 3200)) A = [20, 4, 40, 16] (stride (1, 20, 1280, 80)) dim = 3 2.320 -> 2.323 ( +0.13%) [ +0.17% +0.00% +0.22% / +0.13% +0.39% +0.47%] index_select const : Elapsed 0.023 ms (2.324 ms / 100) 2.321 -> 2.319 ( -0.09%) [ +0.17% +0.00% +0.13% / -0.09% +0.26% +0.22%] index_select wrap : Elapsed 0.023 ms (2.325 ms / 100) 2.317 -> 2.324 ( +0.30%) [ +0.04% +0.09% +0.00% / +0.30% +0.52% +0.39%] index_select linear : Elapsed 0.023 ms (2.318 ms / 100) 2.311 -> 2.312 ( +0.04%) [ +0.26% +0.00% +0.22% / +0.04% +0.48% +0.26%] index_select reverse : Elapsed 0.023 ms (2.317 ms / 100) 2.315 -> 2.316 ( +0.04%) [ +0.22% +0.00% +0.17% / +0.04% +0.09% +0.65%] index_select skip64 : Elapsed 0.023 ms (2.320 ms / 100) 2.321 -> 2.326 ( +0.22%) [ +0.09% +0.00% +0.09% / +0.22% +0.22% +0.43%] index_select skip256 : Elapsed 0.023 ms (2.323 ms / 100) 2.314 -> 2.316 ( +0.09%) [ +0.13% +0.00% +0.13% / +0.09% +0.35% +0.26%] index_select spread : Elapsed 0.023 ms (2.317 ms / 100) 2.312 -> 2.318 ( +0.26%) [ +0.04% +0.04% +0.00% / +0.26% +0.43% +0.52%] index_select strided 3 : Elapsed 0.023 ms (2.313 ms / 100) 2.312 -> 2.321 ( +0.39%) [ +0.39% +0.30% +0.00% / +0.39% +0.74% +0.69%] index_select strided 5 : Elapsed 0.023 ms (2.321 ms / 100) 2.313 -> 2.314 ( +0.04%) [ +0.09% +0.00% +0.17% / +0.04% +0.48% +0.35%] index_select strided 7 : Elapsed 0.023 ms (2.315 ms / 100) 2.320 -> 2.324 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.52% +0.65%] index_select strided 8 : Elapsed 0.023 ms (2.324 ms / 100) 2.317 -> 2.321 ( +0.17%) [ +0.00% +0.00% +0.09% / +0.17% +0.43% +0.39%] index_select random : Elapsed 0.023 ms (2.317 ms / 100) 2.314 -> 2.316 ( +0.09%) [ +0.26% +0.00% +0.22% / +0.09% +0.35% +0.48%] index_select random_sorted : Elapsed 0.023 ms (2.320 ms / 100) 2.311 -> 2.314 ( +0.13%) [ +0.30% +0.09% +0.00% / +0.13% +0.30% +0.56%] index_select perm : Elapsed 0.023 ms (2.318 ms / 100) 2.312 -> 2.323 ( +0.48%) [ +0.35% +0.17% +0.00% / +0.56% +0.65% +0.48%] index_select perm_sorted : Elapsed 0.023 ms (2.320 ms / 100) out_shape = [5, 16, 4, 40] in_shape = [20, 16, 4, 40] idx_dim = 0 B = [5, 16, 4, 40] (stride (2560, 160, 1, 4)) A = [20, 16, 4, 40] (stride (2560, 160, 40, 1)) dim = 0 1.571 -> 1.572 ( +0.06%) [ +0.00% +0.06% +0.25% / +0.06% +0.57% +0.57%] index_select const : Elapsed 0.016 ms (1.571 ms / 100) 1.574 -> 1.575 ( +0.06%) [ +0.38% +0.25% +0.00% / +0.06% +0.44% +0.38%] index_select wrap : Elapsed 0.016 ms (1.580 ms / 100) 1.576 -> 1.573 ( -0.19%) [ +0.00% +0.19% +0.13% / -0.19% +0.13% +0.32%] index_select linear : Elapsed 0.016 ms (1.576 ms / 100) 1.578 -> 1.576 ( -0.13%) [ +0.00% +0.13% +0.00% / -0.13% +0.19% +0.06%] index_select reverse : Elapsed 0.016 ms (1.578 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.06% +0.19% +0.00% / +0.13% +0.45% +0.38%] index_select skip64 : Elapsed 0.016 ms (1.574 ms / 100) 1.571 -> 1.576 ( +0.32%) [ +0.06% +0.06% +0.00% / +0.32% +0.51% +0.64%] index_select skip256 : Elapsed 0.016 ms (1.572 ms / 100) 1.588 -> 1.592 ( +0.25%) [ +0.00% +0.19% +0.06% / +0.25% +0.94% +0.88%] index_select spread : Elapsed 0.016 ms (1.588 ms / 100) 1.591 -> 1.593 ( +0.13%) [ +0.00% +0.06% +0.00% / +0.13% +0.44% +0.44%] index_select strided 3 : Elapsed 0.016 ms (1.591 ms / 100) 1.570 -> 1.575 ( +0.32%) [ +0.19% +0.25% +0.00% / +0.32% +0.70% +0.57%] index_select strided 5 : Elapsed 0.016 ms (1.573 ms / 100) 1.595 -> 1.592 ( -0.19%) [ +0.44% +0.19% +0.00% / -0.13% -0.19% -0.13%] index_select strided 7 : Elapsed 0.016 ms (1.602 ms / 100) 1.592 -> 1.593 ( +0.06%) [ +0.13% +0.00% +0.13% / +0.06% +0.88% +0.69%] index_select strided 8 : Elapsed 0.016 ms (1.594 ms / 100) 1.586 -> 1.590 ( +0.25%) [ +0.19% +0.32% +0.00% / +0.25% +0.95% +0.88%] index_select strided 16 : Elapsed 0.016 ms (1.589 ms / 100) 1.580 -> 1.586 ( +0.38%) [ +0.00% +0.00% +0.32% / +0.38% +0.89% +0.63%] index_select random : Elapsed 0.016 ms (1.580 ms / 100) 1.577 -> 1.585 ( +0.51%) [ +0.06% +0.00% +0.32% / +0.51% +0.76% +0.89%] index_select random_sorted : Elapsed 0.016 ms (1.578 ms / 100) 1.585 -> 1.592 ( +0.44%) [ +0.25% +0.00% +0.13% / +0.44% +0.76% +0.88%] index_select perm : Elapsed 0.016 ms (1.589 ms / 100) 1.588 -> 1.590 ( +0.13%) [ +0.00% +0.06% +0.06% / +0.13% +0.50% +0.69%] index_select perm_sorted : Elapsed 0.016 ms (1.588 ms / 100) B = [5, 16, 4, 40] (stride (160, 800, 40, 1)) A = [20, 16, 4, 40] (stride (64, 1, 16, 1280)) dim = 0 1.817 -> 1.810 ( -0.39%) [ +0.00% +0.06% +0.00% / +0.11% -0.28% -0.39%] index_select const : Elapsed 0.018 ms (1.817 ms / 100) 1.773 -> 1.775 ( +0.11%) [ +0.23% +0.00% +0.00% / +0.11% +0.45% +0.51%] index_select wrap : Elapsed 0.018 ms (1.777 ms / 100) 1.779 -> 1.780 ( +0.06%) [ +0.00% +0.34% +0.22% / +0.06% +0.51% +0.51%] index_select linear : Elapsed 0.018 ms (1.779 ms / 100) 1.777 -> 1.780 ( +0.17%) [ +0.00% +0.06% +0.00% / +0.17% +0.51% +0.45%] index_select reverse : Elapsed 0.018 ms (1.777 ms / 100) 1.808 -> 1.808 ( +0.00%) [ +0.11% +0.17% +0.00% / +0.00% +0.22% +0.77%] index_select skip64 : Elapsed 0.018 ms (1.810 ms / 100) 1.807 -> 1.809 ( +0.11%) [ +0.22% +0.28% +0.00% / +0.11% +0.83% +0.55%] index_select skip256 : Elapsed 0.018 ms (1.811 ms / 100) 1.778 -> 1.779 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.39% +0.06%] index_select spread : Elapsed 0.018 ms (1.778 ms / 100) 1.789 -> 1.789 ( +0.00%) [ +0.28% +0.22% +0.00% / +0.00% +0.39% +0.39%] index_select strided 3 : Elapsed 0.018 ms (1.794 ms / 100) 1.788 -> 1.790 ( +0.11%) [ +0.17% +0.00% +0.11% / +0.11% +0.22% +0.56%] index_select strided 5 : Elapsed 0.018 ms (1.791 ms / 100) 1.784 -> 1.786 ( +0.11%) [ +0.22% +0.11% +0.00% / +0.11% +0.73% +0.62%] index_select strided 7 : Elapsed 0.018 ms (1.788 ms / 100) 1.784 -> 1.785 ( +0.06%) [ +0.11% +0.06% +0.00% / +0.06% +0.62% +0.50%] index_select strided 8 : Elapsed 0.018 ms (1.786 ms / 100) 1.772 -> 1.776 ( +0.23%) [ +0.28% +0.06% +0.00% / +0.23% +0.45% +0.62%] index_select strided 16 : Elapsed 0.018 ms (1.777 ms / 100) 1.787 -> 1.789 ( +0.11%) [ +0.17% +0.00% +0.06% / +0.11% +0.45% +0.50%] index_select random : Elapsed 0.018 ms (1.790 ms / 100) 1.791 -> 1.794 ( +0.17%) [ +0.11% +0.06% +0.00% / +0.17% +0.45% +0.50%] index_select random_sorted : Elapsed 0.018 ms (1.793 ms / 100) 1.770 -> 1.771 ( +0.06%) [ +0.28% +0.00% +0.06% / +0.06% +0.62% +0.68%] index_select perm : Elapsed 0.018 ms (1.775 ms / 100) 1.779 -> 1.786 ( +0.39%) [ +0.17% +0.11% +0.00% / +0.39% +0.73% +0.67%] index_select perm_sorted : Elapsed 0.018 ms (1.782 ms / 100) B = [5, 16, 4, 40] (stride (40, 800, 200, 1)) A = [20, 16, 4, 40] (stride (2560, 4, 1, 64)) dim = 0 0.667 -> 0.667 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.90% +1.05%] index_select const : Elapsed 0.007 ms (0.667 ms / 100) 0.674 -> 0.668 ( -0.89%) [ +0.15% +0.15% +0.00% / +0.15% -0.89% -0.74%] index_select wrap : Elapsed 0.007 ms (0.675 ms / 100) 0.675 -> 0.669 ( -0.89%) [ +0.15% +0.00% +0.00% / +0.00% -0.89% -0.89%] index_select linear : Elapsed 0.007 ms (0.676 ms / 100) 0.675 -> 0.674 ( -0.15%) [ +0.15% +0.00% +0.00% / -0.15% +0.59% +0.59%] index_select reverse : Elapsed 0.007 ms (0.676 ms / 100) 0.675 -> 0.675 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.44% +0.59%] index_select skip64 : Elapsed 0.007 ms (0.676 ms / 100) 0.668 -> 0.668 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +1.35% +1.35%] index_select skip256 : Elapsed 0.007 ms (0.669 ms / 100) 0.669 -> 0.670 ( +0.15%) [ +0.00% +0.00% +0.15% / +0.15% +1.05% +1.20%] index_select spread : Elapsed 0.007 ms (0.669 ms / 100) 0.676 -> 0.676 ( +0.00%) [ +0.00% +0.44% +0.00% / +0.00% +0.30% +0.30%] index_select strided 3 : Elapsed 0.007 ms (0.676 ms / 100) 0.671 -> 0.672 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.30% +0.15%] index_select strided 5 : Elapsed 0.007 ms (0.671 ms / 100) 0.676 -> 0.676 ( +0.00%) [ +0.30% +0.30% +0.00% / +0.00% +0.30% +0.44%] index_select strided 7 : Elapsed 0.007 ms (0.678 ms / 100) 0.674 -> 0.674 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.74% +0.59%] index_select strided 8 : Elapsed 0.007 ms (0.675 ms / 100) 0.675 -> 0.670 ( -0.74%) [ +0.00% +0.15% +0.00% / +0.00% -0.74% -0.74%] index_select strided 16 : Elapsed 0.007 ms (0.675 ms / 100) 0.675 -> 0.669 ( -0.89%) [ +0.00% +0.15% +0.15% / +0.00% -0.89% -0.89%] index_select random : Elapsed 0.007 ms (0.675 ms / 100) 0.674 -> 0.675 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.30% +0.15%] index_select random_sorted : Elapsed 0.007 ms (0.675 ms / 100) 0.676 -> 0.678 ( +0.30%) [ +0.44% +0.44% +0.00% / +0.44% +0.30% +0.30%] index_select perm : Elapsed 0.007 ms (0.679 ms / 100) 0.668 -> 0.669 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +1.20% +1.50%] index_select perm_sorted : Elapsed 0.007 ms (0.668 ms / 100) B = [5, 16, 4, 40] (stride (40, 800, 200, 1)) A = [20, 16, 4, 40] (stride (4, 80, 1, 1280)) dim = 0 1.831 -> 1.826 ( -0.27%) [ +0.05% +0.00% +0.11% / +0.05% -0.27% -0.16%] index_select const : Elapsed 0.018 ms (1.832 ms / 100) 1.818 -> 1.821 ( +0.17%) [ +0.00% +8.09% +0.00% / +0.17% +1.27% +1.27%] index_select wrap : Elapsed 0.018 ms (1.818 ms / 100) 1.818 -> 1.821 ( +0.17%) [ +0.17% +0.22% +0.00% / +0.17% +0.61% +0.55%] index_select linear : Elapsed 0.018 ms (1.821 ms / 100) 1.818 -> 1.822 ( +0.22%) [ +0.17% +0.11% +0.00% / +0.22% +0.72% +0.55%] index_select reverse : Elapsed 0.018 ms (1.821 ms / 100) 1.819 -> 1.821 ( +0.11%) [ +0.05% +0.00% +0.16% / +0.11% +0.49% +0.55%] index_select skip64 : Elapsed 0.018 ms (1.820 ms / 100) 1.820 -> 1.821 ( +0.05%) [ +0.00% +0.05% +0.11% / +0.05% +0.99% +1.10%] index_select skip256 : Elapsed 0.018 ms (1.820 ms / 100) 1.813 -> 1.812 ( -0.06%) [ +0.06% +0.00% +0.11% / -0.06% +0.50% +0.44%] index_select spread : Elapsed 0.018 ms (1.814 ms / 100) 1.810 -> 1.808 ( -0.11%) [ +0.11% +0.17% +0.00% / -0.11% +1.16% +1.16%] index_select strided 3 : Elapsed 0.018 ms (1.812 ms / 100) 1.810 -> 1.812 ( +0.11%) [ +0.06% +0.00% +0.06% / +0.11% +0.44% +0.66%] index_select strided 5 : Elapsed 0.018 ms (1.811 ms / 100) 1.807 -> 1.812 ( +0.28%) [ +0.22% +0.00% +0.17% / +0.28% +0.66% +0.66%] index_select strided 7 : Elapsed 0.018 ms (1.811 ms / 100) 1.812 -> 1.812 ( +0.00%) [ +0.06% +0.11% +0.00% / +0.00% +0.55% +0.55%] index_select strided 8 : Elapsed 0.018 ms (1.813 ms / 100) 1.812 -> 1.814 ( +0.11%) [ +0.06% +0.00% +0.00% / +0.11% +0.77% +0.55%] index_select strided 16 : Elapsed 0.018 ms (1.813 ms / 100) 1.807 -> 1.807 ( +0.00%) [ +0.33% +0.00% +0.06% / +0.00% +0.61% +0.66%] index_select random : Elapsed 0.018 ms (1.813 ms / 100) 1.815 -> 1.814 ( -0.06%) [ +0.17% +0.00% +0.00% / -0.06% +0.55% +0.61%] index_select random_sorted : Elapsed 0.018 ms (1.818 ms / 100) 1.801 -> 1.804 ( +0.17%) [ +0.22% +0.39% +0.00% / +0.17% +0.67% +0.72%] index_select perm : Elapsed 0.018 ms (1.805 ms / 100) 1.804 -> 1.807 ( +0.17%) [ +0.28% +0.06% +0.00% / +0.17% +0.55% +0.55%] index_select perm_sorted : Elapsed 0.018 ms (1.809 ms / 100) B = [5, 16, 4, 40] (stride (4, 800, 1, 20)) A = [20, 16, 4, 40] (stride (160, 3200, 1, 4)) dim = 0 1.777 -> 1.781 ( +0.23%) [ +0.00% +0.34% +0.11% / +0.23% +0.45% +0.28%] index_select const : Elapsed 0.018 ms (1.777 ms / 100) 1.783 -> 1.787 ( +0.22%) [ +0.00% +0.06% +0.34% / +0.22% +1.01% +1.12%] index_select wrap : Elapsed 0.018 ms (1.783 ms / 100) 1.783 -> 1.785 ( +0.11%) [ +0.28% +0.11% +0.00% / +0.11% +0.84% +0.90%] index_select linear : Elapsed 0.018 ms (1.788 ms / 100) 1.790 -> 1.789 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.73% +0.89%] index_select reverse : Elapsed 0.018 ms (1.791 ms / 100) 1.780 -> 1.783 ( +0.17%) [ +0.06% +0.00% +0.00% / +0.17% +0.22% +0.45%] index_select skip64 : Elapsed 0.018 ms (1.781 ms / 100) 1.778 -> 1.778 ( +0.00%) [ +0.06% +0.00% +0.22% / +0.00% +0.28% +0.28%] index_select skip256 : Elapsed 0.018 ms (1.779 ms / 100) 1.781 -> 1.781 ( +0.00%) [ +0.17% +0.00% +0.22% / +0.00% +0.56% +0.62%] index_select spread : Elapsed 0.018 ms (1.784 ms / 100) 1.783 -> 1.785 ( +0.11%) [ +0.11% +0.00% +0.06% / +0.11% +0.39% +0.34%] index_select strided 3 : Elapsed 0.018 ms (1.785 ms / 100) 1.779 -> 1.781 ( +0.11%) [ +0.22% +0.17% +0.00% / +0.11% +0.34% +0.56%] index_select strided 5 : Elapsed 0.018 ms (1.783 ms / 100) 1.782 -> 1.782 ( +0.00%) [ +0.28% +0.00% +0.00% / +0.00% +0.84% +0.56%] index_select strided 7 : Elapsed 0.018 ms (1.787 ms / 100) 1.779 -> 1.781 ( +0.11%) [ +0.34% +0.00% +0.11% / +0.11% +0.62% +0.56%] index_select strided 8 : Elapsed 0.018 ms (1.785 ms / 100) 1.781 -> 1.784 ( +0.17%) [ +0.28% +0.00% +0.00% / +0.17% +0.39% +0.56%] index_select strided 16 : Elapsed 0.018 ms (1.786 ms / 100) 1.780 -> 1.781 ( +0.06%) [ +0.17% +0.17% +0.00% / +0.06% +0.34% +0.34%] index_select random : Elapsed 0.018 ms (1.783 ms / 100) 1.781 -> 1.784 ( +0.17%) [ +0.06% +0.06% +0.00% / +0.17% +0.56% +0.51%] index_select random_sorted : Elapsed 0.018 ms (1.782 ms / 100) 1.784 -> 1.786 ( +0.11%) [ +0.00% +0.17% +0.22% / +0.11% +0.17% +0.11%] index_select perm : Elapsed 0.018 ms (1.784 ms / 100) 1.787 -> 1.787 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.06% +0.28%] index_select perm_sorted : Elapsed 0.018 ms (1.788 ms / 100) B = [5, 16, 4, 40] (stride (640, 40, 3200, 1)) A = [20, 16, 4, 40] (stride (1, 3200, 20, 80)) dim = 0 1.906 -> 1.908 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.26% +0.10% +0.10%] index_select const : Elapsed 0.019 ms (1.906 ms / 100) 1.908 -> 1.910 ( +0.10%) [ +0.16% +0.21% +0.00% / +0.10% +0.89% +0.68%] index_select wrap : Elapsed 0.019 ms (1.911 ms / 100) 1.913 -> 1.911 ( -0.10%) [ +0.21% +0.16% +0.00% / -0.10% +0.68% +0.58%] index_select linear : Elapsed 0.019 ms (1.917 ms / 100) 1.913 -> 1.914 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.52% +0.63%] index_select reverse : Elapsed 0.019 ms (1.914 ms / 100) 1.909 -> 1.911 ( +0.10%) [ +0.00% +0.05% +0.05% / +0.10% +0.16% +0.16%] index_select skip64 : Elapsed 0.019 ms (1.909 ms / 100) 1.906 -> 1.907 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.31% +0.47%] index_select skip256 : Elapsed 0.019 ms (1.908 ms / 100) 1.927 -> 1.929 ( +0.10%) [ +0.16% +0.00% +0.00% / +0.10% +0.62% +0.57%] index_select spread : Elapsed 0.019 ms (1.930 ms / 100) 1.929 -> 1.928 ( -0.05%) [ +0.16% +0.21% +0.00% / -0.05% +0.98% +0.93%] index_select strided 3 : Elapsed 0.019 ms (1.932 ms / 100) 1.929 -> 1.935 ( +0.31%) [ +0.00% +0.21% +0.10% / +0.31% +0.57% +0.67%] index_select strided 5 : Elapsed 0.019 ms (1.929 ms / 100) 1.926 -> 1.928 ( +0.10%) [ +0.05% +0.16% +0.00% / +0.10% +0.31% +0.10%] index_select strided 7 : Elapsed 0.019 ms (1.927 ms / 100) 1.925 -> 1.927 ( +0.10%) [ +0.21% +0.00% +0.16% / +0.10% +0.47% +0.36%] index_select strided 8 : Elapsed 0.019 ms (1.929 ms / 100) 1.933 -> 1.932 ( -0.05%) [ +0.16% +0.05% +0.00% / -0.05% +0.10% -0.05%] index_select strided 16 : Elapsed 0.019 ms (1.936 ms / 100) 1.914 -> 1.913 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.31% +0.26%] index_select random : Elapsed 0.019 ms (1.914 ms / 100) 1.913 -> 1.913 ( +0.00%) [ +0.00% +0.47% +0.26% / +0.00% +0.31% +0.16%] index_select random_sorted : Elapsed 0.019 ms (1.913 ms / 100) 1.923 -> 1.923 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.26% +0.21%] index_select perm : Elapsed 0.019 ms (1.923 ms / 100) 1.917 -> 1.915 ( -0.10%) [ +0.00% +0.05% +0.21% / -0.10% +0.26% +0.37%] index_select perm_sorted : Elapsed 0.019 ms (1.917 ms / 100) B = [5, 16, 4, 40] (stride (1, 200, 3200, 5)) A = [20, 16, 4, 40] (stride (2560, 160, 40, 1)) dim = 0 1.577 -> 1.572 ( -0.32%) [ +0.00% +0.25% +0.00% / -0.32% +0.13% +0.19%] index_select const : Elapsed 0.016 ms (1.577 ms / 100) 1.577 -> 1.578 ( +0.06%) [ +0.13% +0.00% +0.25% / +0.06% +0.06% +0.38%] index_select wrap : Elapsed 0.016 ms (1.579 ms / 100) 1.577 -> 1.577 ( +0.00%) [ +0.00% +0.19% +0.00% / +0.00% +0.00% +0.06%] index_select linear : Elapsed 0.016 ms (1.577 ms / 100) 1.573 -> 1.579 ( +0.38%) [ +0.00% +0.13% +0.19% / +0.38% +0.38% +0.45%] index_select reverse : Elapsed 0.016 ms (1.573 ms / 100) 1.576 -> 1.575 ( -0.06%) [ +0.06% +0.19% +0.00% / -0.06% +0.00% +0.25%] index_select skip64 : Elapsed 0.016 ms (1.577 ms / 100) 1.575 -> 1.574 ( -0.06%) [ +0.06% +0.19% +0.00% / +0.32% -0.06% +0.13%] index_select skip256 : Elapsed 0.016 ms (1.576 ms / 100) 1.591 -> 1.589 ( -0.13%) [ +0.13% +0.00% +0.19% / -0.13% +0.31% +0.75%] index_select spread : Elapsed 0.016 ms (1.593 ms / 100) 1.592 -> 1.593 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.38% +0.06%] index_select strided 3 : Elapsed 0.016 ms (1.593 ms / 100) 1.574 -> 1.578 ( +0.25%) [ +0.19% +0.00% +0.00% / +0.25% +0.57% +0.44%] index_select strided 5 : Elapsed 0.016 ms (1.577 ms / 100) 1.592 -> 1.588 ( -0.25%) [ +0.00% +0.31% +0.44% / +0.13% -0.25% +0.19%] index_select strided 7 : Elapsed 0.016 ms (1.592 ms / 100) 1.590 -> 1.590 ( +0.00%) [ +0.00% +0.19% +0.00% / +0.00% +0.44% +0.69%] index_select strided 8 : Elapsed 0.016 ms (1.590 ms / 100) 1.591 -> 1.595 ( +0.25%) [ +0.00% +0.06% +0.06% / +0.25% +0.50% +0.44%] index_select strided 16 : Elapsed 0.016 ms (1.591 ms / 100) 1.577 -> 1.573 ( -0.25%) [ +0.06% +0.19% +0.00% / -0.25% +0.25% +0.32%] index_select random : Elapsed 0.016 ms (1.578 ms / 100) 1.576 -> 1.577 ( +0.06%) [ +0.00% +0.13% +0.00% / +0.25% +0.06% +0.38%] index_select random_sorted : Elapsed 0.016 ms (1.576 ms / 100) 1.577 -> 1.592 ( +0.95%) [ +0.00% +0.06% +0.57% / +0.95% +1.20% +1.46%] index_select perm : Elapsed 0.016 ms (1.577 ms / 100) 1.579 -> 1.589 ( +0.63%) [ +0.13% +0.00% +0.89% / +0.63% +1.08% +1.08%] index_select perm_sorted : Elapsed 0.016 ms (1.581 ms / 100) B = [5, 16, 4, 40] (stride (16, 1, 80, 320)) A = [20, 16, 4, 40] (stride (1, 3200, 800, 20)) dim = 0 1.649 -> 1.648 ( -0.06%) [ +0.00% +0.18% +0.00% / +0.00% -0.06% +0.12%] index_select const : Elapsed 0.016 ms (1.649 ms / 100) 1.650 -> 1.652 ( +0.12%) [ +0.30% +0.00% +0.00% / +0.12% +0.61% +0.97%] index_select wrap : Elapsed 0.017 ms (1.655 ms / 100) 1.652 -> 1.652 ( +0.00%) [ +0.12% +0.24% +0.00% / +0.00% +0.91% +0.97%] index_select linear : Elapsed 0.017 ms (1.654 ms / 100) 1.652 -> 1.659 ( +0.42%) [ +0.12% +0.06% +0.00% / +0.42% +0.54% +0.85%] index_select reverse : Elapsed 0.017 ms (1.654 ms / 100) 1.645 -> 1.647 ( +0.12%) [ +0.30% +0.00% +0.12% / +0.12% +0.91% +0.67%] index_select skip64 : Elapsed 0.016 ms (1.650 ms / 100) 1.649 -> 1.648 ( -0.06%) [ +0.12% +0.00% +0.00% / -0.06% +0.61% +0.55%] index_select skip256 : Elapsed 0.017 ms (1.651 ms / 100) 1.671 -> 1.672 ( +0.06%) [ +0.00% +0.18% +0.06% / +0.06% +0.84% +0.72%] index_select spread : Elapsed 0.017 ms (1.671 ms / 100) 1.676 -> 1.678 ( +0.12%) [ +0.18% +0.00% +0.06% / +0.12% +1.01% +0.78%] index_select strided 3 : Elapsed 0.017 ms (1.679 ms / 100) 1.672 -> 1.673 ( +0.06%) [ +0.06% +0.24% +0.00% / +0.06% +0.78% +0.90%] index_select strided 5 : Elapsed 0.017 ms (1.673 ms / 100) 1.660 -> 1.662 ( +0.12%) [ +0.06% +0.18% +0.00% / +0.12% +0.30% +0.36%] index_select strided 7 : Elapsed 0.017 ms (1.661 ms / 100) 1.666 -> 1.664 ( -0.12%) [ +0.06% +0.00% +0.06% / -0.12% +0.06% +0.18%] index_select strided 8 : Elapsed 0.017 ms (1.667 ms / 100) 1.670 -> 1.675 ( +0.30%) [ +0.00% +0.06% +0.30% / +0.30% +0.36% +0.48%] index_select strided 16 : Elapsed 0.017 ms (1.670 ms / 100) 1.665 -> 1.665 ( +0.00%) [ +0.12% +0.00% +0.12% / +0.00% +0.24% +0.24%] index_select random : Elapsed 0.017 ms (1.667 ms / 100) 1.666 -> 1.667 ( +0.06%) [ +0.18% +0.06% +0.00% / +0.06% +0.42% +0.30%] index_select random_sorted : Elapsed 0.017 ms (1.669 ms / 100) 1.662 -> 1.662 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +0.18% +0.36%] index_select perm : Elapsed 0.017 ms (1.665 ms / 100) 1.662 -> 1.663 ( +0.06%) [ +0.36% +0.00% +0.18% / +0.06% +0.36% +0.48%] index_select perm_sorted : Elapsed 0.017 ms (1.668 ms / 100) out_shape = [20, 5, 4, 40] in_shape = [20, 16, 4, 40] idx_dim = 1 B = [20, 5, 4, 40] (stride (800, 160, 40, 1)) A = [20, 16, 4, 40] (stride (4, 3200, 1, 80)) dim = 1 2.306 -> 2.310 ( +0.17%) [ +0.00% +0.13% +0.13% / +0.17% +0.61% +0.69%] index_select const : Elapsed 0.023 ms (2.306 ms / 100) 2.307 -> 2.312 ( +0.22%) [ +0.13% +0.09% +0.00% / +0.22% +0.48% +0.43%] index_select wrap : Elapsed 0.023 ms (2.310 ms / 100) 2.310 -> 2.311 ( +0.04%) [ +0.22% +0.17% +0.00% / +0.04% +0.43% +0.39%] index_select linear : Elapsed 0.023 ms (2.315 ms / 100) 2.319 -> 2.322 ( +0.13%) [ +0.00% +0.09% +0.13% / +0.13% +0.52% +0.34%] index_select reverse : Elapsed 0.023 ms (2.319 ms / 100) 2.301 -> 2.303 ( +0.09%) [ +0.00% +0.17% +0.13% / +0.09% +0.65% +0.61%] index_select skip64 : Elapsed 0.023 ms (2.301 ms / 100) 2.308 -> 2.307 ( -0.04%) [ +0.13% +0.00% +0.00% / -0.04% +0.56% +0.48%] index_select skip256 : Elapsed 0.023 ms (2.311 ms / 100) 2.324 -> 2.326 ( +0.09%) [ +0.22% +0.00% +0.00% / +0.09% +0.26% +0.39%] index_select spread : Elapsed 0.023 ms (2.329 ms / 100) 2.321 -> 2.325 ( +0.17%) [ +0.00% +0.04% +0.04% / +0.22% +0.17% +0.26%] index_select strided 3 : Elapsed 0.023 ms (2.321 ms / 100) 2.303 -> 2.307 ( +0.17%) [ +0.13% +0.09% +0.00% / +0.17% +0.39% +0.48%] index_select strided 5 : Elapsed 0.023 ms (2.306 ms / 100) 2.316 -> 2.315 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% +0.60% +0.60%] index_select strided 7 : Elapsed 0.023 ms (2.316 ms / 100) 2.302 -> 2.304 ( +0.09%) [ +0.22% +0.00% +0.04% / +0.09% +0.61% +0.48%] index_select strided 8 : Elapsed 0.023 ms (2.307 ms / 100) 2.295 -> 2.299 ( +0.17%) [ +0.00% +0.13% +0.13% / +0.17% +0.87% +0.83%] index_select random : Elapsed 0.023 ms (2.295 ms / 100) 2.298 -> 2.306 ( +0.35%) [ +0.09% +0.22% +0.00% / +0.35% +0.57% +0.35%] index_select random_sorted : Elapsed 0.023 ms (2.300 ms / 100) 2.298 -> 2.299 ( +0.04%) [ +0.13% +0.04% +0.00% / +0.04% +0.70% +0.61%] index_select perm : Elapsed 0.023 ms (2.301 ms / 100) 2.305 -> 2.311 ( +0.26%) [ +0.00% +0.13% +0.00% / +0.26% +0.52% +0.65%] index_select perm_sorted : Elapsed 0.023 ms (2.305 ms / 100) B = [20, 5, 4, 40] (stride (4, 3200, 1, 80)) A = [20, 16, 4, 40] (stride (1, 800, 12800, 20)) dim = 1 2.402 -> 2.403 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.50% +0.50%] index_select const : Elapsed 0.024 ms (2.404 ms / 100) 2.403 -> 2.403 ( +0.00%) [ +0.00% +0.17% +0.12% / +0.00% +0.33% +0.46%] index_select wrap : Elapsed 0.024 ms (2.403 ms / 100) 2.405 -> 2.407 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.12% +0.08% +0.25%] index_select linear : Elapsed 0.024 ms (2.406 ms / 100) 2.403 -> 2.399 ( -0.17%) [ +0.00% +0.00% +0.04% / -0.17% +0.12% +0.21%] index_select reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.401 -> 2.403 ( +0.08%) [ +0.29% +0.04% +0.00% / +0.08% +0.54% +0.46%] index_select skip64 : Elapsed 0.024 ms (2.408 ms / 100) 2.404 -> 2.403 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.46% +0.37%] index_select skip256 : Elapsed 0.024 ms (2.405 ms / 100) 2.396 -> 2.400 ( +0.17%) [ +0.00% +0.04% +0.08% / +0.17% +0.42% +0.29%] index_select spread : Elapsed 0.024 ms (2.396 ms / 100) 2.404 -> 2.402 ( -0.08%) [ +0.12% +0.00% +0.00% / -0.08% +0.25% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.407 ms / 100) 2.405 -> 2.406 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.21% +0.21%] index_select strided 5 : Elapsed 0.024 ms (2.405 ms / 100) 2.391 -> 2.391 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.54% +0.42%] index_select strided 7 : Elapsed 0.024 ms (2.391 ms / 100) 2.401 -> 2.400 ( -0.04%) [ +0.17% +0.08% +0.00% / -0.04% +0.50% +0.37%] index_select strided 8 : Elapsed 0.024 ms (2.405 ms / 100) 2.405 -> 2.405 ( +0.00%) [ +0.00% +0.17% +0.04% / +0.00% +0.33% +0.29%] index_select random : Elapsed 0.024 ms (2.405 ms / 100) 2.395 -> 2.397 ( +0.08%) [ +0.00% +0.33% +0.04% / +0.08% +0.33% +0.29%] index_select random_sorted : Elapsed 0.024 ms (2.395 ms / 100) 2.402 -> 2.405 ( +0.12%) [ +0.21% +0.00% +0.08% / +0.12% +0.29% +0.37%] index_select perm : Elapsed 0.024 ms (2.407 ms / 100) 2.399 -> 2.404 ( +0.21%) [ +0.17% +0.21% +0.00% / +0.21% +0.42% +0.54%] index_select perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) B = [20, 5, 4, 40] (stride (1, 20, 4000, 100)) A = [20, 16, 4, 40] (stride (2560, 160, 1, 4)) dim = 1 2.138 -> 2.138 ( +0.00%) [ +0.14% +0.05% +0.00% / +0.00% +0.51% +0.42%] index_select const : Elapsed 0.021 ms (2.141 ms / 100) 2.150 -> 2.152 ( +0.09%) [ +0.05% +0.05% +0.00% / +0.09% +0.88% +0.79%] index_select wrap : Elapsed 0.022 ms (2.151 ms / 100) 2.155 -> 2.154 ( -0.05%) [ +0.14% +0.05% +0.00% / -0.05% +0.65% +0.56%] index_select linear : Elapsed 0.022 ms (2.158 ms / 100) 2.153 -> 2.154 ( +0.05%) [ +0.00% +0.09% +0.14% / +0.05% +1.02% +0.98%] index_select reverse : Elapsed 0.022 ms (2.153 ms / 100) 2.126 -> 2.126 ( +0.00%) [ +0.24% +0.00% +0.14% / +0.00% +0.61% +0.66%] index_select skip64 : Elapsed 0.021 ms (2.131 ms / 100) 2.133 -> 2.139 ( +0.28%) [ +0.28% +0.19% +0.00% / +0.28% +0.70% +0.66%] index_select skip256 : Elapsed 0.021 ms (2.139 ms / 100) 2.153 -> 2.157 ( +0.19%) [ +0.37% +0.19% +0.00% / +0.19% +0.93% +1.11%] index_select spread : Elapsed 0.022 ms (2.161 ms / 100) 2.156 -> 2.157 ( +0.05%) [ +0.05% +0.19% +0.00% / +0.05% +0.51% +0.83%] index_select strided 3 : Elapsed 0.022 ms (2.157 ms / 100) 2.152 -> 2.153 ( +0.05%) [ +0.09% +0.00% +0.05% / +0.05% +1.02% +0.98%] index_select strided 5 : Elapsed 0.022 ms (2.154 ms / 100) 2.151 -> 2.152 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.93% +1.12%] index_select strided 7 : Elapsed 0.022 ms (2.152 ms / 100) 2.136 -> 2.144 ( +0.37%) [ +0.37% +0.28% +0.00% / +0.37% +0.61% +0.61%] index_select strided 8 : Elapsed 0.021 ms (2.144 ms / 100) 2.139 -> 2.141 ( +0.09%) [ +0.14% +0.19% +0.00% / +0.09% +1.03% +1.03%] index_select random : Elapsed 0.021 ms (2.142 ms / 100) 2.156 -> 2.158 ( +0.09%) [ +0.23% +0.05% +0.00% / +0.09% +0.56% +0.37%] index_select random_sorted : Elapsed 0.022 ms (2.161 ms / 100) 2.150 -> 2.149 ( -0.05%) [ +0.14% +0.05% +0.00% / -0.05% +0.79% +0.98%] index_select perm : Elapsed 0.022 ms (2.153 ms / 100) 2.151 -> 2.152 ( +0.05%) [ +0.05% +0.00% +0.37% / +0.05% +1.21% +0.84%] index_select perm_sorted : Elapsed 0.022 ms (2.152 ms / 100) B = [20, 5, 4, 40] (stride (5, 1, 100, 400)) A = [20, 16, 4, 40] (stride (40, 800, 12800, 1)) dim = 1 2.263 -> 2.262 ( -0.04%) [ +0.09% +0.04% +0.00% / -0.04% +0.09% +0.13%] index_select const : Elapsed 0.023 ms (2.265 ms / 100) 2.329 -> 2.318 ( -0.47%) [ +0.00% +0.09% +0.04% / -0.26% -0.21% -0.47%] index_select wrap : Elapsed 0.023 ms (2.329 ms / 100) 2.316 -> 2.312 ( -0.17%) [ +0.22% +0.26% +0.00% / +0.22% -0.17% -0.09%] index_select linear : Elapsed 0.023 ms (2.321 ms / 100) 2.319 -> 2.317 ( -0.09%) [ +0.17% +0.13% +0.00% / -0.09% +0.17% +0.00%] index_select reverse : Elapsed 0.023 ms (2.323 ms / 100) 2.261 -> 2.268 ( +0.31%) [ +0.04% +0.18% +0.00% / +0.31% +0.35% +0.57%] index_select skip64 : Elapsed 0.023 ms (2.262 ms / 100) 2.259 -> 2.263 ( +0.18%) [ +0.27% +0.04% +0.00% / +0.18% +0.53% +0.44%] index_select skip256 : Elapsed 0.023 ms (2.265 ms / 100) 2.320 -> 2.317 ( -0.13%) [ +0.13% +0.04% +0.00% / +0.00% -0.13% -0.09%] index_select spread : Elapsed 0.023 ms (2.323 ms / 100) 2.318 -> 2.315 ( -0.13%) [ +0.04% +0.04% +0.00% / -0.04% -0.04% -0.13%] index_select strided 3 : Elapsed 0.023 ms (2.319 ms / 100) 2.321 -> 2.309 ( -0.52%) [ +0.09% +0.09% +0.00% / +0.43% -0.39% -0.52%] index_select strided 5 : Elapsed 0.023 ms (2.323 ms / 100) 2.325 -> 2.314 ( -0.47%) [ +0.09% +0.04% +0.00% / -0.17% -0.30% -0.47%] index_select strided 7 : Elapsed 0.023 ms (2.327 ms / 100) 2.278 -> 2.272 ( -0.26%) [ +0.35% +0.00% +0.04% / +0.09% -0.26% +0.22%] index_select strided 8 : Elapsed 0.023 ms (2.286 ms / 100) 2.305 -> 2.300 ( -0.22%) [ +0.00% +0.04% +0.09% / +0.17% -0.13% -0.22%] index_select random : Elapsed 0.023 ms (2.305 ms / 100) 2.303 -> 2.297 ( -0.26%) [ +0.00% +0.13% +0.00% / +0.43% +0.04% -0.26%] index_select random_sorted : Elapsed 0.023 ms (2.303 ms / 100) 2.311 -> 2.317 ( +0.26%) [ +0.43% +0.30% +0.00% / +0.35% +0.43% +0.26%] index_select perm : Elapsed 0.023 ms (2.321 ms / 100) 2.332 -> 2.326 ( -0.26%) [ +0.30% +0.17% +0.00% / +0.30% -0.26% -0.21%] index_select perm_sorted : Elapsed 0.023 ms (2.339 ms / 100) out_shape = [20, 16, 5, 40] in_shape = [20, 16, 4, 40] idx_dim = 2 B = [20, 16, 5, 40] (stride (3200, 1, 16, 80)) A = [20, 16, 4, 40] (stride (160, 3200, 40, 1)) dim = 2 5.897 -> 5.884 ( -0.22%) [ +0.05% +0.00% +0.05% / +0.07% -0.15% -0.22%] index_add_ linear : Elapsed 0.059 ms (5.900 ms / 100) 5.836 -> 5.824 ( -0.21%) [ +0.12% +0.00% +0.19% / +0.15% -0.21% -0.05%] index_copy_ linear : Elapsed 0.058 ms (5.843 ms / 100) 5.899 -> 5.881 ( -0.31%) [ +0.14% +0.10% +0.00% / +0.00% -0.31% -0.24%] index_add_ reverse : Elapsed 0.059 ms (5.907 ms / 100) 5.832 -> 5.831 ( -0.02%) [ +0.12% +0.00% +0.22% / +0.31% -0.02% -0.02%] index_copy_ reverse : Elapsed 0.058 ms (5.839 ms / 100) 5.885 -> 5.877 ( -0.14%) [ +0.00% +0.10% +0.10% / +0.05% -0.02% -0.14%] index_add_ spread : Elapsed 0.059 ms (5.885 ms / 100) 5.830 -> 5.817 ( -0.22%) [ +0.12% +0.00% +0.00% / +0.07% -0.03% -0.22%] index_copy_ spread : Elapsed 0.058 ms (5.837 ms / 100) 5.906 -> 5.889 ( -0.29%) [ +0.03% +0.00% +0.08% / +0.05% -0.29% -0.20%] index_add_ strided 3 : Elapsed 0.059 ms (5.908 ms / 100) 5.849 -> 5.839 ( -0.17%) [ +0.05% +0.02% +0.00% / +0.14% -0.17% -0.12%] index_copy_ strided 3 : Elapsed 0.059 ms (5.852 ms / 100) 5.891 -> 5.883 ( -0.14%) [ +0.07% +0.00% +0.07% / +0.12% -0.14% -0.08%] index_add_ perm : Elapsed 0.059 ms (5.895 ms / 100) 5.829 -> 5.821 ( -0.14%) [ +0.00% +0.10% +0.07% / +0.17% +0.00% -0.14%] index_copy_ perm : Elapsed 0.058 ms (5.829 ms / 100) 5.890 -> 5.881 ( -0.15%) [ +0.03% +0.00% +0.10% / +0.19% -0.15% -0.15%] index_add_ perm_sorted : Elapsed 0.059 ms (5.892 ms / 100) 5.839 -> 5.834 ( -0.09%) [ +0.00% +0.10% +0.10% / +0.10% -0.09% -0.05%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.839 ms / 100) 6.136 -> 6.132 ( -0.07%) [ +0.00% +0.08% +0.16% / +0.24% +0.07% -0.07%] index_select const : Elapsed 0.061 ms (6.136 ms / 100) 6.238 -> 6.225 ( -0.21%) [ +0.02% +0.03% +0.00% / +0.02% -0.21% -0.21%] index_select wrap : Elapsed 0.062 ms (6.239 ms / 100) 6.196 -> 6.191 ( -0.08%) [ +0.00% +0.23% +0.15% / +0.24% -0.08% -0.02%] index_select linear : Elapsed 0.062 ms (6.196 ms / 100) 6.219 -> 6.210 ( -0.14%) [ +0.00% +0.14% +0.14% / +0.13% -0.14% -0.05%] index_select reverse : Elapsed 0.062 ms (6.219 ms / 100) 6.130 -> 6.122 ( -0.13%) [ +0.02% +0.00% +0.11% / +0.13% -0.05% -0.13%] index_select skip64 : Elapsed 0.061 ms (6.131 ms / 100) 6.142 -> 6.132 ( -0.16%) [ +0.03% +0.00% +0.07% / +0.05% -0.08% -0.16%] index_select skip256 : Elapsed 0.061 ms (6.144 ms / 100) 6.220 -> 6.209 ( -0.18%) [ +0.00% +0.03% +0.02% / +0.06% -0.18% -0.14%] index_select spread : Elapsed 0.062 ms (6.220 ms / 100) 6.221 -> 6.205 ( -0.26%) [ +0.00% +0.00% +0.10% / +0.03% -0.21% -0.26%] index_select strided 3 : Elapsed 0.062 ms (6.221 ms / 100) 6.193 -> 6.180 ( -0.21%) [ +0.03% +0.00% +0.06% / +0.08% -0.21% -0.16%] index_select random : Elapsed 0.062 ms (6.195 ms / 100) 6.201 -> 6.193 ( -0.13%) [ +0.00% +0.13% +0.11% / +0.16% -0.13% -0.08%] index_select random_sorted : Elapsed 0.062 ms (6.201 ms / 100) B = [20, 16, 5, 40] (stride (1, 4000, 20, 100)) A = [20, 16, 4, 40] (stride (1, 20, 320, 1280)) dim = 2 5.911 -> 5.918 ( +0.12%) [ +0.00% +0.03% +0.08% / +0.12% +0.61% +0.64%] index_add_ linear : Elapsed 0.059 ms (5.911 ms / 100) 5.853 -> 5.860 ( +0.12%) [ +0.10% +0.00% +0.09% / +0.12% +0.48% +0.58%] index_copy_ linear : Elapsed 0.059 ms (5.859 ms / 100) 5.910 -> 5.923 ( +0.22%) [ +0.07% +0.00% +0.20% / +0.22% +0.51% +0.58%] index_add_ reverse : Elapsed 0.059 ms (5.914 ms / 100) 5.846 -> 5.853 ( +0.12%) [ +0.00% +0.19% +0.38% / +0.12% +0.60% +0.74%] index_copy_ reverse : Elapsed 0.058 ms (5.846 ms / 100) 5.914 -> 5.920 ( +0.10%) [ +0.00% +0.07% +0.07% / +0.10% +0.66% +0.64%] index_add_ spread : Elapsed 0.059 ms (5.914 ms / 100) 5.852 -> 5.859 ( +0.12%) [ +0.00% +0.00% +0.07% / +0.12% +0.62% +0.50%] index_copy_ spread : Elapsed 0.059 ms (5.852 ms / 100) 5.944 -> 5.948 ( +0.07%) [ +0.02% +0.03% +0.00% / +0.07% +0.42% +0.45%] index_add_ strided 3 : Elapsed 0.059 ms (5.945 ms / 100) 5.882 -> 5.892 ( +0.17%) [ +0.00% +0.02% +0.00% / +0.17% +0.24% +0.19%] index_copy_ strided 3 : Elapsed 0.059 ms (5.882 ms / 100) 5.918 -> 5.926 ( +0.14%) [ +0.03% +0.00% +0.15% / +0.14% +0.66% +0.64%] index_add_ perm : Elapsed 0.059 ms (5.920 ms / 100) 5.858 -> 5.852 ( -0.10%) [ +0.14% +0.00% +0.00% / -0.10% +0.63% +0.56%] index_copy_ perm : Elapsed 0.059 ms (5.866 ms / 100) 5.922 -> 5.937 ( +0.25%) [ +0.03% +0.00% +0.08% / +0.25% +0.52% +0.56%] index_add_ perm_sorted : Elapsed 0.059 ms (5.924 ms / 100) 5.859 -> 5.869 ( +0.17%) [ +0.00% +0.12% +0.09% / +0.17% +0.51% +0.58%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.859 ms / 100) 6.166 -> 6.168 ( +0.03%) [ +0.03% +0.00% +0.05% / +0.03% +0.39% +0.23%] index_select const : Elapsed 0.062 ms (6.168 ms / 100) 6.233 -> 6.247 ( +0.22%) [ +0.14% +0.02% +0.00% / +0.22% +0.58% +0.56%] index_select wrap : Elapsed 0.062 ms (6.242 ms / 100) 6.227 -> 6.220 ( -0.11%) [ +0.00% +0.05% +0.08% / -0.11% +0.53% +0.45%] index_select linear : Elapsed 0.062 ms (6.227 ms / 100) 6.230 -> 6.240 ( +0.16%) [ +0.10% +0.00% +0.19% / +0.16% +0.34% +0.47%] index_select reverse : Elapsed 0.062 ms (6.236 ms / 100) 6.161 -> 6.171 ( +0.16%) [ +0.10% +0.11% +0.00% / +0.16% +0.45% +0.36%] index_select skip64 : Elapsed 0.062 ms (6.167 ms / 100) 6.160 -> 6.166 ( +0.10%) [ +0.05% +0.00% +0.36% / +0.10% +0.36% +0.34%] index_select skip256 : Elapsed 0.062 ms (6.163 ms / 100) 6.230 -> 6.248 ( +0.29%) [ +0.00% +0.10% +0.21% / +0.29% +0.32% +0.35%] index_select spread : Elapsed 0.062 ms (6.230 ms / 100) 6.237 -> 6.241 ( +0.06%) [ +0.03% +0.00% +0.02% / +0.06% +0.43% +0.46%] index_select strided 3 : Elapsed 0.062 ms (6.239 ms / 100) 6.247 -> 6.246 ( -0.02%) [ +0.00% +0.05% +0.06% / -0.02% +0.14% +0.05%] index_select random : Elapsed 0.062 ms (6.247 ms / 100) 6.208 -> 6.218 ( +0.16%) [ +0.05% +0.05% +0.00% / +0.16% +0.74% +0.64%] index_select random_sorted : Elapsed 0.062 ms (6.211 ms / 100) B = [20, 16, 5, 40] (stride (5, 100, 1, 1600)) A = [20, 16, 4, 40] (stride (640, 40, 12800, 1)) dim = 2 5.732 -> 5.735 ( +0.05%) [ +0.00% +0.09% +0.10% / +0.12% +0.05% +0.05%] index_add_ linear : Elapsed 0.057 ms (5.732 ms / 100) 5.686 -> 5.684 ( -0.04%) [ +0.04% +0.11% +0.00% / +0.11% +0.02% -0.04%] index_copy_ linear : Elapsed 0.057 ms (5.688 ms / 100) 5.728 -> 5.728 ( +0.00%) [ +0.21% +0.00% +0.12% / +0.07% +0.00% +0.03%] index_add_ reverse : Elapsed 0.057 ms (5.740 ms / 100) 5.685 -> 5.677 ( -0.14%) [ +0.00% +0.04% +0.09% / +0.12% -0.14% +0.02%] index_copy_ reverse : Elapsed 0.057 ms (5.685 ms / 100) 5.721 -> 5.721 ( +0.00%) [ +0.00% +0.17% +0.02% / +0.14% +0.09% +0.00%] index_add_ spread : Elapsed 0.057 ms (5.721 ms / 100) 5.677 -> 5.672 ( -0.09%) [ +0.02% +0.00% +0.07% / +0.18% -0.09% -0.07%] index_copy_ spread : Elapsed 0.057 ms (5.678 ms / 100) 5.722 -> 5.721 ( -0.02%) [ +0.05% +0.00% +0.10% / +0.09% +0.07% -0.02%] index_add_ strided 3 : Elapsed 0.057 ms (5.725 ms / 100) 5.681 -> 5.674 ( -0.12%) [ +0.11% +0.00% +0.02% / +0.07% -0.12% -0.07%] index_copy_ strided 3 : Elapsed 0.057 ms (5.687 ms / 100) 5.721 -> 5.724 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.14% +0.05% +0.12%] index_add_ perm : Elapsed 0.057 ms (5.724 ms / 100) 5.681 -> 5.671 ( -0.18%) [ +0.04% +0.00% +0.07% / -0.05% -0.04% -0.18%] index_copy_ perm : Elapsed 0.057 ms (5.683 ms / 100) 5.724 -> 5.717 ( -0.12%) [ +0.03% +0.00% +0.16% / -0.02% +0.00% -0.12%] index_add_ perm_sorted : Elapsed 0.057 ms (5.726 ms / 100) 5.679 -> 5.673 ( -0.11%) [ +0.04% +0.00% +0.02% / +0.14% -0.11% -0.07%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.681 ms / 100) 5.797 -> 5.803 ( +0.10%) [ +0.05% +0.00% +0.05% / +0.10% +0.53% +0.50%] index_select const : Elapsed 0.058 ms (5.800 ms / 100) 5.891 -> 5.889 ( -0.03%) [ +0.00% +0.05% +0.02% / +0.02% -0.03% +0.03%] index_select wrap : Elapsed 0.059 ms (5.891 ms / 100) 5.875 -> 5.879 ( +0.07%) [ +0.00% +0.14% +0.10% / +0.19% +0.14% +0.07%] index_select linear : Elapsed 0.059 ms (5.875 ms / 100) 5.886 -> 5.889 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.10% +0.05% +0.10%] index_select reverse : Elapsed 0.059 ms (5.889 ms / 100) 5.790 -> 5.798 ( +0.14%) [ +0.03% +0.05% +0.00% / +0.14% +0.17% +0.28%] index_select skip64 : Elapsed 0.058 ms (5.792 ms / 100) 5.798 -> 5.803 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.43% +0.40%] index_select skip256 : Elapsed 0.058 ms (5.798 ms / 100) 5.882 -> 5.877 ( -0.09%) [ +0.00% +0.07% +0.02% / +0.10% +0.00% -0.09%] index_select spread : Elapsed 0.059 ms (5.882 ms / 100) 5.885 -> 5.887 ( +0.03%) [ +0.00% +0.10% +0.08% / +0.03% +0.14% +0.08%] index_select strided 3 : Elapsed 0.059 ms (5.885 ms / 100) 5.865 -> 5.863 ( -0.03%) [ +0.03% +0.00% +0.02% / -0.02% -0.02% -0.03%] index_select random : Elapsed 0.059 ms (5.867 ms / 100) 5.867 -> 5.858 ( -0.15%) [ +0.00% +0.05% +0.07% / +0.05% +0.00% -0.15%] index_select random_sorted : Elapsed 0.059 ms (5.867 ms / 100) B = [20, 16, 5, 40] (stride (5, 100, 1, 1600)) A = [20, 16, 4, 40] (stride (4, 80, 1, 1280)) dim = 2 6.220 -> 6.225 ( +0.08%) [ +0.00% +0.00% +0.10% / +0.10% +0.08% +0.08%] index_add_ linear : Elapsed 0.062 ms (6.220 ms / 100) 6.188 -> 6.181 ( -0.11%) [ +0.00% +0.03% +0.15% / +0.16% -0.11% -0.06%] index_copy_ linear : Elapsed 0.062 ms (6.188 ms / 100) 6.215 -> 6.221 ( +0.10%) [ +0.11% +0.00% +0.18% / +0.14% +0.18% +0.10%] index_add_ reverse : Elapsed 0.062 ms (6.222 ms / 100) 6.189 -> 6.183 ( -0.10%) [ +0.03% +0.00% +0.18% / +0.13% +0.03% -0.10%] index_copy_ reverse : Elapsed 0.062 ms (6.191 ms / 100) 6.214 -> 6.223 ( +0.14%) [ +0.00% +0.11% +0.11% / +0.19% +0.18% +0.14%] index_add_ spread : Elapsed 0.062 ms (6.214 ms / 100) 6.190 -> 6.183 ( -0.11%) [ +0.00% +0.05% +0.15% / +0.08% -0.05% -0.11%] index_copy_ spread : Elapsed 0.062 ms (6.190 ms / 100) 6.214 -> 6.220 ( +0.10%) [ +0.05% +0.00% +0.11% / +0.14% +0.10% +0.11%] index_add_ strided 3 : Elapsed 0.062 ms (6.217 ms / 100) 6.189 -> 6.184 ( -0.08%) [ +0.02% +0.00% +0.06% / +0.05% -0.08% -0.06%] index_copy_ strided 3 : Elapsed 0.062 ms (6.190 ms / 100) 6.217 -> 6.222 ( +0.08%) [ +0.00% +0.03% +0.18% / +0.18% +0.08% +0.08%] index_add_ perm : Elapsed 0.062 ms (6.217 ms / 100) 6.190 -> 6.181 ( -0.15%) [ +0.00% +0.10% +0.00% / +0.02% -0.15% -0.15%] index_copy_ perm : Elapsed 0.062 ms (6.190 ms / 100) 6.221 -> 6.222 ( +0.02%) [ +0.03% +0.00% +0.06% / +0.08% +0.02% +0.02%] index_add_ perm_sorted : Elapsed 0.062 ms (6.223 ms / 100) 6.187 -> 6.178 ( -0.15%) [ +0.00% +0.03% +0.13% / +0.23% -0.15% -0.11%] index_copy_ perm_sorted : Elapsed 0.062 ms (6.187 ms / 100) 6.511 -> 6.502 ( -0.14%) [ +0.00% +0.05% +0.28% / +0.28% -0.14% -0.11%] index_select const : Elapsed 0.065 ms (6.511 ms / 100) 6.510 -> 6.503 ( -0.11%) [ +0.00% +0.17% +0.23% / +0.22% -0.11% +0.09%] index_select wrap : Elapsed 0.065 ms (6.510 ms / 100) 6.515 -> 6.510 ( -0.08%) [ +0.02% +0.00% +0.12% / +0.08% -0.06% -0.08%] index_select linear : Elapsed 0.065 ms (6.516 ms / 100) 6.514 -> 6.506 ( -0.12%) [ +0.08% +0.00% +0.08% / +0.25% -0.11% -0.12%] index_select reverse : Elapsed 0.065 ms (6.519 ms / 100) 6.508 -> 6.503 ( -0.08%) [ +0.00% +0.17% +0.14% / +0.23% -0.08% -0.05%] index_select skip64 : Elapsed 0.065 ms (6.508 ms / 100) 6.514 -> 6.504 ( -0.15%) [ +0.03% +0.00% +0.18% / +0.09% -0.06% -0.15%] index_select skip256 : Elapsed 0.065 ms (6.516 ms / 100) 6.512 -> 6.506 ( -0.09%) [ +0.09% +0.00% +0.18% / +0.29% -0.08% -0.09%] index_select spread : Elapsed 0.065 ms (6.518 ms / 100) 6.514 -> 6.506 ( -0.12%) [ +0.00% +0.11% +0.12% / +0.09% -0.06% -0.12%] index_select strided 3 : Elapsed 0.065 ms (6.514 ms / 100) 6.517 -> 6.505 ( -0.18%) [ +0.00% +0.00% +0.11% / +0.21% -0.18% -0.17%] index_select random : Elapsed 0.065 ms (6.517 ms / 100) 6.511 -> 6.508 ( -0.05%) [ +0.09% +0.00% +0.20% / +0.22% +0.05% -0.05%] index_select random_sorted : Elapsed 0.065 ms (6.517 ms / 100) out_shape = [20, 16, 4, 5] in_shape = [20, 16, 4, 40] idx_dim = 3 B = [20, 16, 4, 5] (stride (320, 20, 1, 4)) A = [20, 16, 4, 40] (stride (160, 3200, 40, 1)) dim = 3 1.500 -> 1.502 ( +0.13%) [ +0.13% +0.07% +0.00% / +0.13% +0.53% +0.27%] index_select const : Elapsed 0.015 ms (1.502 ms / 100) 1.497 -> 1.497 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.33% +0.33%] index_select wrap : Elapsed 0.015 ms (1.498 ms / 100) 1.493 -> 1.495 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.60% +0.54%] index_select linear : Elapsed 0.015 ms (1.494 ms / 100) 1.492 -> 1.495 ( +0.20%) [ +0.13% +0.00% +0.13% / +0.20% +0.67% +0.67%] index_select reverse : Elapsed 0.015 ms (1.494 ms / 100) 1.494 -> 1.497 ( +0.20%) [ +0.13% +0.13% +0.00% / +0.20% +0.60% +0.60%] index_select skip64 : Elapsed 0.015 ms (1.496 ms / 100) 1.500 -> 1.501 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.53% +0.47%] index_select skip256 : Elapsed 0.015 ms (1.502 ms / 100) 1.497 -> 1.497 ( +0.00%) [ +0.20% +0.00% +0.00% / +0.00% +0.40% +0.40%] index_select spread : Elapsed 0.015 ms (1.500 ms / 100) 1.495 -> 1.495 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.47% +0.60%] index_select strided 3 : Elapsed 0.015 ms (1.496 ms / 100) 1.490 -> 1.491 ( +0.07%) [ +0.20% +0.00% +0.20% / +0.07% +0.40% +0.54%] index_select strided 5 : Elapsed 0.015 ms (1.493 ms / 100) 1.493 -> 1.494 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.33% +0.40%] index_select strided 7 : Elapsed 0.015 ms (1.494 ms / 100) 1.496 -> 1.497 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.47% +0.53%] index_select strided 8 : Elapsed 0.015 ms (1.497 ms / 100) 1.492 -> 1.491 ( -0.07%) [ +0.13% +0.00% +0.07% / -0.07% +0.54% +0.67%] index_select strided 16 : Elapsed 0.015 ms (1.494 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.13% +0.20% +0.00% / +0.00% +0.74% +0.74%] index_select random : Elapsed 0.015 ms (1.495 ms / 100) 1.492 -> 1.493 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.74% +0.67%] index_select random_sorted : Elapsed 0.015 ms (1.493 ms / 100) 1.490 -> 1.493 ( +0.20%) [ +0.27% +0.07% +0.00% / +0.20% +0.81% +0.74%] index_select perm : Elapsed 0.015 ms (1.494 ms / 100) 1.490 -> 1.491 ( +0.07%) [ +0.20% +0.00% +0.07% / +0.07% +0.81% +0.60%] index_select perm_sorted : Elapsed 0.015 ms (1.493 ms / 100) B = [20, 16, 4, 5] (stride (320, 1, 80, 16)) A = [20, 16, 4, 40] (stride (1, 3200, 20, 80)) dim = 3 1.573 -> 1.574 ( +0.06%) [ +0.06% +0.00% +0.32% / +0.06% +0.06% +0.32%] index_select const : Elapsed 0.016 ms (1.574 ms / 100) 1.572 -> 1.574 ( +0.13%) [ +0.00% +0.00% +0.45% / +0.45% +0.13% +0.45%] index_select wrap : Elapsed 0.016 ms (1.572 ms / 100) 1.572 -> 1.574 ( +0.13%) [ +0.25% +0.00% +0.45% / +0.32% +0.57% +0.13%] index_select linear : Elapsed 0.016 ms (1.576 ms / 100) 1.574 -> 1.577 ( +0.19%) [ +0.38% +0.00% +0.00% / +0.19% +0.32% +0.25%] index_select reverse : Elapsed 0.016 ms (1.580 ms / 100) 1.553 -> 1.554 ( +0.06%) [ +0.32% +0.00% +1.09% / +0.06% +0.39% +1.55%] index_select skip64 : Elapsed 0.016 ms (1.558 ms / 100) 1.576 -> 1.577 ( +0.06%) [ +0.00% +0.19% +0.00% / +0.19% +0.25% +0.06%] index_select skip256 : Elapsed 0.016 ms (1.576 ms / 100) 1.544 -> 1.544 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +1.04% +1.81%] index_select spread : Elapsed 0.015 ms (1.545 ms / 100) 1.569 -> 1.574 ( +0.32%) [ +0.45% +0.76% +0.00% / +0.32% +0.64% +0.70%] index_select strided 3 : Elapsed 0.016 ms (1.576 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.06% +0.32% +0.00% / +0.13% +0.51% +0.38%] index_select strided 5 : Elapsed 0.016 ms (1.574 ms / 100) 1.574 -> 1.575 ( +0.06%) [ +0.00% +0.00% +0.32% / +0.06% +0.13% +0.19%] index_select strided 7 : Elapsed 0.016 ms (1.574 ms / 100) 1.581 -> 1.583 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.76% +0.76%] index_select strided 8 : Elapsed 0.016 ms (1.583 ms / 100) 1.539 -> 1.539 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +1.43% +0.84%] index_select strided 16 : Elapsed 0.015 ms (1.539 ms / 100) 1.577 -> 1.570 ( -0.44%) [ +0.00% +0.00% +0.06% / -0.44% +0.06% -0.13%] index_select random : Elapsed 0.016 ms (1.577 ms / 100) 1.567 -> 1.571 ( +0.26%) [ +0.51% +0.06% +0.00% / +0.26% +0.83% +0.57%] index_select random_sorted : Elapsed 0.016 ms (1.575 ms / 100) 1.572 -> 1.573 ( +0.06%) [ +0.00% +0.25% +0.00% / +0.06% +0.51% +0.51%] index_select perm : Elapsed 0.016 ms (1.572 ms / 100) 1.575 -> 1.574 ( -0.06%) [ +0.00% +0.13% +0.06% / +0.38% +0.06% -0.06%] index_select perm_sorted : Elapsed 0.016 ms (1.575 ms / 100) B = [20, 16, 4, 5] (stride (20, 400, 5, 1)) A = [20, 16, 4, 40] (stride (4, 80, 1, 1280)) dim = 3 1.476 -> 1.479 ( +0.20%) [ +0.14% +0.14% +0.00% / +0.20% +0.61% +0.41%] index_select const : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.54% +0.41%] index_select wrap : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.41% +0.34%] index_select linear : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.54% +0.47%] index_select reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.505 -> 1.508 ( +0.20%) [ +0.07% +0.00% +0.00% / +0.20% +0.66% +0.53%] index_select skip64 : Elapsed 0.015 ms (1.506 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.61%] index_select skip256 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.61% +0.47%] index_select spread : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.61% +0.54%] index_select strided 3 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.54% +0.47%] index_select strided 5 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.478 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.54% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.68% +0.47%] index_select strided 8 : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.81% +0.68%] index_select strided 16 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.68% +0.54%] index_select random : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.54% +0.75%] index_select random_sorted : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.61%] index_select perm : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) B = [20, 16, 4, 5] (stride (20, 400, 1, 4)) A = [20, 16, 4, 40] (stride (2560, 1, 640, 16)) dim = 3 1.485 -> 1.488 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +0.81% +0.81%] index_select const : Elapsed 0.015 ms (1.486 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.68% +0.61%] index_select wrap : Elapsed 0.015 ms (1.480 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.81% +0.74%] index_select linear : Elapsed 0.015 ms (1.479 ms / 100) 1.476 -> 1.478 ( +0.14%) [ +0.20% +0.20% +0.00% / +0.14% +0.88% +0.81%] index_select reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select skip64 : Elapsed 0.015 ms (1.478 ms / 100) 1.486 -> 1.486 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.81% +0.74%] index_select skip256 : Elapsed 0.015 ms (1.486 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.95% +0.68%] index_select spread : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.74% +0.74%] index_select strided 3 : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.27% +0.07% +0.00% / +0.14% +0.68% +0.68%] index_select strided 5 : Elapsed 0.015 ms (1.481 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.74% +0.74%] index_select strided 7 : Elapsed 0.015 ms (1.478 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.68% +0.61%] index_select strided 8 : Elapsed 0.015 ms (1.480 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.81% +0.74%] index_select strided 16 : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.74% +0.81%] index_select random : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.74% +0.74%] index_select random_sorted : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.81% +0.81%] index_select perm : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.20% +0.00% +0.00% / +0.07% +0.81% +0.74%] index_select perm_sorted : Elapsed 0.015 ms (1.480 ms / 100) B = [20, 16, 4, 5] (stride (64, 1, 16, 1280)) A = [20, 16, 4, 40] (stride (16, 1, 12800, 320)) dim = 3 1.383 -> 1.384 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.14% +0.14% +0.07%] index_select const : Elapsed 0.014 ms (1.383 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.22% +0.22% +0.00% / +0.14% +0.22% +0.29%] index_select wrap : Elapsed 0.014 ms (1.385 ms / 100) 1.383 -> 1.383 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.14% +0.22%] index_select linear : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.29% +0.07% +0.00% / +0.07% +0.29% +0.29%] index_select reverse : Elapsed 0.014 ms (1.386 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.22% +0.00% +0.22% / +0.14% +0.43% +0.43%] index_select skip64 : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.29% +0.29%] index_select skip256 : Elapsed 0.014 ms (1.383 ms / 100) 1.383 -> 1.381 ( -0.14%) [ +0.29% +0.00% +0.00% / -0.14% +0.14% +0.14%] index_select spread : Elapsed 0.014 ms (1.387 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.22% +0.00% +0.07% / +0.14% +0.29% +0.43%] index_select strided 3 : Elapsed 0.014 ms (1.385 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.22% +0.00% +0.14% / +0.14% +0.29% +0.29%] index_select strided 5 : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.386 ( +0.36%) [ +0.22% +0.00% +0.00% / +0.36% +0.43% +0.36%] index_select strided 7 : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.22% +0.22%] index_select strided 8 : Elapsed 0.014 ms (1.382 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.22% +0.00% +0.14% / +0.07% +0.43% +0.43%] index_select strided 16 : Elapsed 0.014 ms (1.383 ms / 100) 1.379 -> 1.378 ( -0.07%) [ +0.15% +0.22% +0.00% / -0.07% +0.51% +0.51%] index_select random : Elapsed 0.014 ms (1.381 ms / 100) 1.382 -> 1.385 ( +0.22%) [ +0.00% +0.00% +0.14% / +0.22% +0.43% +0.36%] index_select random_sorted : Elapsed 0.014 ms (1.382 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.29% +0.29%] index_select perm : Elapsed 0.014 ms (1.383 ms / 100) 1.382 -> 1.381 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.36% +0.29%] index_select perm_sorted : Elapsed 0.014 ms (1.383 ms / 100) B = [20, 16, 4, 5] (stride (4, 80, 1, 1280)) A = [20, 16, 4, 40] (stride (64, 1, 16, 1280)) dim = 3 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.74% +0.74%] index_select const : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.74%] index_select wrap : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.480 ( +0.20%) [ +0.07% +0.00% +0.07% / +0.20% +0.68% +0.68%] index_select linear : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.480 ( +0.27%) [ +0.14% +0.07% +0.00% / +0.27% +0.88% +0.95%] index_select reverse : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.81% +0.74%] index_select skip64 : Elapsed 0.015 ms (1.478 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.20% +0.07% +0.00% / +0.07% +0.68% +0.61%] index_select skip256 : Elapsed 0.015 ms (1.481 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select spread : Elapsed 0.015 ms (1.478 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.61% +0.74%] index_select strided 3 : Elapsed 0.015 ms (1.478 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.61% +0.07% +0.00% / +0.00% +0.68% +0.68%] index_select strided 5 : Elapsed 0.015 ms (1.487 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.74% +0.81%] index_select strided 7 : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.477 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.68% +0.68%] index_select strided 8 : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.74% +1.29%] index_select strided 16 : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.81% +0.74%] index_select random : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.81% +0.81%] index_select random_sorted : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.20% +0.14% +0.00% / +0.07% +0.74% +0.74%] index_select perm : Elapsed 0.015 ms (1.480 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.74% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.478 ms / 100) out_shape = [5, 16, 40, 4] in_shape = [20, 16, 40, 4] idx_dim = 0 B = [5, 16, 40, 4] (stride (2560, 160, 1, 40)) A = [20, 16, 40, 4] (stride (64, 4, 1280, 1)) dim = 0 1.794 -> 1.796 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +0.95% +0.95%] index_select const : Elapsed 0.018 ms (1.796 ms / 100) 1.815 -> 1.818 ( +0.17%) [ +0.11% +0.06% +0.00% / +0.17% +0.50% +0.72%] index_select wrap : Elapsed 0.018 ms (1.817 ms / 100) 1.812 -> 1.817 ( +0.28%) [ +0.11% +0.00% +0.22% / +0.28% +0.99% +0.77%] index_select linear : Elapsed 0.018 ms (1.814 ms / 100) 1.823 -> 1.824 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.66% +0.55%] index_select reverse : Elapsed 0.018 ms (1.823 ms / 100) 1.793 -> 1.796 ( +0.17%) [ +0.11% +0.28% +0.00% / +0.17% +0.78% +0.73%] index_select skip64 : Elapsed 0.018 ms (1.795 ms / 100) 1.794 -> 1.796 ( +0.11%) [ +0.00% +0.06% +0.17% / +0.11% +1.00% +0.56%] index_select skip256 : Elapsed 0.018 ms (1.794 ms / 100) 1.810 -> 1.814 ( +0.22%) [ +0.00% +0.11% +0.17% / +0.22% +1.05% +0.77%] index_select spread : Elapsed 0.018 ms (1.810 ms / 100) 1.817 -> 1.816 ( -0.06%) [ +0.22% +0.00% +0.06% / -0.06% +0.72% +0.61%] index_select strided 3 : Elapsed 0.018 ms (1.821 ms / 100) 1.798 -> 1.799 ( +0.06%) [ +0.00% +0.17% +0.06% / +0.06% +0.95% +0.61%] index_select strided 5 : Elapsed 0.018 ms (1.798 ms / 100) 1.812 -> 1.816 ( +0.22%) [ +0.00% +0.00% +0.06% / +0.22% +0.66% +0.55%] index_select strided 7 : Elapsed 0.018 ms (1.812 ms / 100) 1.810 -> 1.815 ( +0.28%) [ +0.11% +0.00% +0.17% / +0.28% +0.88% +1.05%] index_select strided 8 : Elapsed 0.018 ms (1.812 ms / 100) 1.810 -> 1.813 ( +0.17%) [ +0.00% +0.00% +0.11% / +0.17% +0.77% +0.83%] index_select strided 16 : Elapsed 0.018 ms (1.810 ms / 100) 1.815 -> 1.815 ( +0.00%) [ +0.17% +0.00% +0.06% / +0.00% +0.83% +0.50%] index_select random : Elapsed 0.018 ms (1.818 ms / 100) 1.812 -> 1.817 ( +0.28%) [ +0.22% +0.00% +0.17% / +0.28% +0.83% +1.10%] index_select random_sorted : Elapsed 0.018 ms (1.816 ms / 100) 1.813 -> 1.817 ( +0.22%) [ +0.11% +0.00% +0.22% / +0.22% +0.55% +0.50%] index_select perm : Elapsed 0.018 ms (1.815 ms / 100) 1.812 -> 1.813 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.72% +0.61%] index_select perm_sorted : Elapsed 0.018 ms (1.812 ms / 100) B = [5, 16, 40, 4] (stride (2560, 1, 64, 16)) A = [20, 16, 40, 4] (stride (160, 3200, 1, 40)) dim = 0 1.674 -> 1.676 ( +0.12%) [ +0.24% +0.00% +0.00% / +0.12% +0.60% +0.78%] index_select const : Elapsed 0.017 ms (1.678 ms / 100) 1.694 -> 1.693 ( -0.06%) [ +0.06% +0.06% +0.00% / +0.06% -0.06% -0.06%] index_select wrap : Elapsed 0.017 ms (1.695 ms / 100) 1.695 -> 1.692 ( -0.18%) [ +0.00% +0.18% +0.18% / +0.29% +0.12% -0.18%] index_select linear : Elapsed 0.017 ms (1.695 ms / 100) 1.698 -> 1.695 ( -0.18%) [ +0.00% +0.59% +0.18% / +0.18% -0.12% -0.18%] index_select reverse : Elapsed 0.017 ms (1.698 ms / 100) 1.670 -> 1.674 ( +0.24%) [ +0.12% +0.06% +0.00% / +0.24% +1.02% +0.84%] index_select skip64 : Elapsed 0.017 ms (1.672 ms / 100) 1.670 -> 1.671 ( +0.06%) [ +0.12% +0.30% +0.00% / +0.06% +0.96% +1.08%] index_select skip256 : Elapsed 0.017 ms (1.672 ms / 100) 1.693 -> 1.691 ( -0.12%) [ +0.06% +0.00% +0.00% / -0.12% +0.18% +0.06%] index_select spread : Elapsed 0.017 ms (1.694 ms / 100) 1.702 -> 1.695 ( -0.41%) [ +0.18% +0.00% +0.18% / +0.24% -0.41% -0.06%] index_select strided 3 : Elapsed 0.017 ms (1.705 ms / 100) 1.690 -> 1.687 ( -0.18%) [ +0.18% +0.00% +0.12% / +0.12% -0.18% -0.18%] index_select strided 5 : Elapsed 0.017 ms (1.693 ms / 100) 1.692 -> 1.692 ( +0.00%) [ +0.24% +0.12% +0.00% / +0.18% +0.00% +0.06%] index_select strided 7 : Elapsed 0.017 ms (1.696 ms / 100) 1.688 -> 1.689 ( +0.06%) [ +0.18% +0.00% +0.00% / +0.06% +0.41% +0.47%] index_select strided 8 : Elapsed 0.017 ms (1.691 ms / 100) 1.688 -> 1.687 ( -0.06%) [ +0.36% +0.00% +0.00% / -0.06% +0.36% +0.59%] index_select strided 16 : Elapsed 0.017 ms (1.694 ms / 100) 1.688 -> 1.691 ( +0.18%) [ +0.30% +0.12% +0.00% / +0.24% +0.18% +0.24%] index_select random : Elapsed 0.017 ms (1.693 ms / 100) 1.689 -> 1.689 ( +0.00%) [ +0.12% +0.18% +0.00% / +0.36% +0.18% +0.00%] index_select random_sorted : Elapsed 0.017 ms (1.691 ms / 100) 1.690 -> 1.694 ( +0.24%) [ +0.18% +0.18% +0.00% / +0.24% +0.47% +0.77%] index_select perm : Elapsed 0.017 ms (1.693 ms / 100) 1.692 -> 1.693 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.35% +0.30%] index_select perm_sorted : Elapsed 0.017 ms (1.693 ms / 100) B = [5, 16, 40, 4] (stride (64, 4, 320, 1)) A = [20, 16, 40, 4] (stride (40, 3200, 1, 800)) dim = 0 1.778 -> 1.781 ( +0.17%) [ +0.06% +0.28% +0.00% / +0.22% +0.28% +0.17%] index_select const : Elapsed 0.018 ms (1.779 ms / 100) 1.822 -> 1.819 ( -0.16%) [ +0.16% +0.00% +0.00% / -0.16% +0.38% +0.22%] index_select wrap : Elapsed 0.018 ms (1.825 ms / 100) 1.823 -> 1.824 ( +0.05%) [ +0.00% +0.00% +0.16% / +0.05% +0.38% +0.49%] index_select linear : Elapsed 0.018 ms (1.823 ms / 100) 1.817 -> 1.822 ( +0.28%) [ +0.00% +0.06% +0.17% / +0.28% +0.55% +0.94%] index_select reverse : Elapsed 0.018 ms (1.817 ms / 100) 1.777 -> 1.779 ( +0.11%) [ +0.34% +0.00% +0.11% / +0.11% +0.28% +0.39%] index_select skip64 : Elapsed 0.018 ms (1.783 ms / 100) 1.776 -> 1.780 ( +0.23%) [ +0.28% +0.00% +0.17% / +0.23% +0.51% +0.68%] index_select skip256 : Elapsed 0.018 ms (1.781 ms / 100) 1.820 -> 1.819 ( -0.05%) [ +0.16% +0.00% +0.00% / -0.05% +0.60% +0.38%] index_select spread : Elapsed 0.018 ms (1.823 ms / 100) 1.819 -> 1.821 ( +0.11%) [ +0.27% +0.00% +0.16% / +0.11% +0.33% +0.44%] index_select strided 3 : Elapsed 0.018 ms (1.824 ms / 100) 1.802 -> 1.806 ( +0.22%) [ +0.17% +0.00% +0.17% / +0.22% +0.83% +0.83%] index_select strided 5 : Elapsed 0.018 ms (1.805 ms / 100) 1.817 -> 1.821 ( +0.22%) [ +0.06% +0.17% +0.00% / +0.22% +0.72% +0.55%] index_select strided 7 : Elapsed 0.018 ms (1.818 ms / 100) 1.818 -> 1.821 ( +0.17%) [ +0.00% +0.22% +0.06% / +0.17% +0.66% +0.77%] index_select strided 8 : Elapsed 0.018 ms (1.818 ms / 100) 1.819 -> 1.822 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.60% +0.38%] index_select strided 16 : Elapsed 0.018 ms (1.819 ms / 100) 1.817 -> 1.819 ( +0.11%) [ +0.17% +0.06% +0.00% / +0.11% +0.66% +0.72%] index_select random : Elapsed 0.018 ms (1.820 ms / 100) 1.818 -> 1.820 ( +0.11%) [ +0.00% +0.22% +0.11% / +0.11% +0.83% +0.72%] index_select random_sorted : Elapsed 0.018 ms (1.818 ms / 100) 1.819 -> 1.822 ( +0.16%) [ +0.00% +0.11% +0.00% / +0.16% +0.60% +0.55%] index_select perm : Elapsed 0.018 ms (1.819 ms / 100) 1.818 -> 1.820 ( +0.11%) [ +0.00% +0.17% +0.11% / +0.11% +0.55% +0.83%] index_select perm_sorted : Elapsed 0.018 ms (1.818 ms / 100) B = [5, 16, 40, 4] (stride (640, 1, 16, 3200)) A = [20, 16, 40, 4] (stride (64, 1, 1280, 16)) dim = 0 1.845 -> 1.841 ( -0.22%) [ +0.00% +0.16% +0.27% / +0.11% -0.22% -0.22%] index_select const : Elapsed 0.018 ms (1.845 ms / 100) 1.827 -> 1.826 ( -0.05%) [ +0.11% +0.00% +0.00% / -0.05% +0.16% +0.05%] index_select wrap : Elapsed 0.018 ms (1.829 ms / 100) 1.829 -> 1.830 ( +0.05%) [ +0.16% +0.00% +0.05% / +0.05% +0.16% +0.44%] index_select linear : Elapsed 0.018 ms (1.832 ms / 100) 1.841 -> 1.839 ( -0.11%) [ +0.05% +0.00% +0.00% / -0.11% +0.16% +0.16%] index_select reverse : Elapsed 0.018 ms (1.842 ms / 100) 1.840 -> 1.842 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +0.33% +0.38%] index_select skip64 : Elapsed 0.018 ms (1.842 ms / 100) 1.837 -> 1.841 ( +0.22%) [ +0.16% +0.16% +0.00% / +0.22% +0.27% +0.27%] index_select skip256 : Elapsed 0.018 ms (1.840 ms / 100) 1.830 -> 1.837 ( +0.38%) [ +0.11% +0.00% +0.16% / +0.38% +0.66% +0.38%] index_select spread : Elapsed 0.018 ms (1.832 ms / 100) 1.847 -> 1.845 ( -0.11%) [ +0.00% +0.16% +0.00% / -0.11% +0.27% +0.32%] index_select strided 3 : Elapsed 0.018 ms (1.847 ms / 100) 1.840 -> 1.844 ( +0.22%) [ +0.00% +0.11% +0.11% / +0.22% +0.38% +0.49%] index_select strided 5 : Elapsed 0.018 ms (1.840 ms / 100) 1.836 -> 1.839 ( +0.16%) [ +0.05% +0.00% +0.05% / +0.16% +0.38% +0.33%] index_select strided 7 : Elapsed 0.018 ms (1.837 ms / 100) 1.832 -> 1.832 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.27% +0.16%] index_select strided 8 : Elapsed 0.018 ms (1.835 ms / 100) 1.839 -> 1.837 ( -0.11%) [ +0.00% +0.00% +0.05% / -0.11% +0.22% +0.38%] index_select strided 16 : Elapsed 0.018 ms (1.839 ms / 100) 1.835 -> 1.838 ( +0.16%) [ +0.33% +0.00% +0.33% / +0.16% +0.33% +0.33%] index_select random : Elapsed 0.018 ms (1.841 ms / 100) 1.824 -> 1.826 ( +0.11%) [ +0.00% +0.22% +0.11% / +0.11% +0.27% +0.27%] index_select random_sorted : Elapsed 0.018 ms (1.824 ms / 100) 1.839 -> 1.844 ( +0.27%) [ +0.27% +0.16% +0.00% / +0.27% +0.38% +0.54%] index_select perm : Elapsed 0.018 ms (1.844 ms / 100) 1.839 -> 1.840 ( +0.05%) [ +0.33% +0.00% +0.22% / +0.05% +0.54% +0.49%] index_select perm_sorted : Elapsed 0.018 ms (1.845 ms / 100) out_shape = [20, 5, 40, 4] in_shape = [20, 16, 40, 4] idx_dim = 1 B = [20, 5, 40, 4] (stride (800, 160, 1, 40)) A = [20, 16, 40, 4] (stride (1, 800, 20, 12800)) dim = 1 2.384 -> 2.385 ( +0.04%) [ +0.00% +0.29% +0.00% / +0.04% +0.55% +0.46%] index_select const : Elapsed 0.024 ms (2.384 ms / 100) 2.382 -> 2.383 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.04% +0.42% +0.46%] index_select wrap : Elapsed 0.024 ms (2.382 ms / 100) 2.393 -> 2.391 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.25% +0.50%] index_select linear : Elapsed 0.024 ms (2.394 ms / 100) 2.391 -> 2.393 ( +0.08%) [ +0.00% +0.13% +0.13% / +0.08% +0.46% +0.29%] index_select reverse : Elapsed 0.024 ms (2.391 ms / 100) 2.381 -> 2.385 ( +0.17%) [ +0.17% +0.21% +0.00% / +0.17% +0.46% +0.88%] index_select skip64 : Elapsed 0.024 ms (2.385 ms / 100) 2.382 -> 2.385 ( +0.13%) [ +0.25% +0.00% +0.13% / +0.13% +0.46% +0.42%] index_select skip256 : Elapsed 0.024 ms (2.388 ms / 100) 2.387 -> 2.392 ( +0.21%) [ +0.08% +0.00% +0.13% / +0.21% +0.29% +0.54%] index_select spread : Elapsed 0.024 ms (2.389 ms / 100) 2.386 -> 2.391 ( +0.21%) [ +0.34% +0.08% +0.00% / +0.21% +0.46% +0.50%] index_select strided 3 : Elapsed 0.024 ms (2.394 ms / 100) 2.391 -> 2.392 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.38% +0.50%] index_select strided 5 : Elapsed 0.024 ms (2.391 ms / 100) 2.385 -> 2.390 ( +0.21%) [ +0.04% +0.21% +0.00% / +0.21% +0.63% +0.59%] index_select strided 7 : Elapsed 0.024 ms (2.386 ms / 100) 2.386 -> 2.389 ( +0.13%) [ +0.00% +0.13% +0.08% / +0.13% +0.38% +0.54%] index_select strided 8 : Elapsed 0.024 ms (2.386 ms / 100) 2.386 -> 2.383 ( -0.13%) [ +0.00% +0.21% +0.04% / -0.13% +0.46% +0.46%] index_select random : Elapsed 0.024 ms (2.386 ms / 100) 2.392 -> 2.393 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.67% +0.67%] index_select random_sorted : Elapsed 0.024 ms (2.392 ms / 100) 2.388 -> 2.388 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.38% +0.50%] index_select perm : Elapsed 0.024 ms (2.388 ms / 100) 2.384 -> 2.384 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.00% +0.50% +0.38%] index_select perm_sorted : Elapsed 0.024 ms (2.385 ms / 100) B = [20, 5, 40, 4] (stride (800, 1, 20, 5)) A = [20, 16, 40, 4] (stride (1, 20, 320, 12800)) dim = 1 2.304 -> 2.303 ( -0.04%) [ +0.09% +0.13% +0.00% / -0.04% +0.48% +0.39%] index_select const : Elapsed 0.023 ms (2.306 ms / 100) 2.306 -> 2.310 ( +0.17%) [ +0.00% +0.13% +0.22% / +0.17% +0.39% +0.35%] index_select wrap : Elapsed 0.023 ms (2.306 ms / 100) 2.303 -> 2.303 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.61% +0.56%] index_select linear : Elapsed 0.023 ms (2.305 ms / 100) 2.312 -> 2.313 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.52% +0.43%] index_select reverse : Elapsed 0.023 ms (2.313 ms / 100) 2.304 -> 2.306 ( +0.09%) [ +0.22% +0.00% +0.00% / +0.09% +0.52% +0.39%] index_select skip64 : Elapsed 0.023 ms (2.309 ms / 100) 2.302 -> 2.305 ( +0.13%) [ +0.04% +0.00% +0.13% / +0.13% +0.52% +0.52%] index_select skip256 : Elapsed 0.023 ms (2.303 ms / 100) 2.312 -> 2.313 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.04% +0.17% +0.17%] index_select spread : Elapsed 0.023 ms (2.315 ms / 100) 2.309 -> 2.316 ( +0.30%) [ +0.09% +0.26% +0.00% / +0.30% +0.30% +0.30%] index_select strided 3 : Elapsed 0.023 ms (2.311 ms / 100) 2.311 -> 2.315 ( +0.17%) [ +0.26% +0.04% +0.00% / +0.17% +0.43% +0.61%] index_select strided 5 : Elapsed 0.023 ms (2.317 ms / 100) 2.305 -> 2.304 ( -0.04%) [ +0.17% +0.00% +0.17% / -0.04% +0.48% +0.30%] index_select strided 7 : Elapsed 0.023 ms (2.309 ms / 100) 2.302 -> 2.308 ( +0.26%) [ +0.00% +0.22% +0.13% / +0.26% +0.43% +0.39%] index_select strided 8 : Elapsed 0.023 ms (2.302 ms / 100) 2.316 -> 2.319 ( +0.13%) [ +0.22% +0.04% +0.00% / +0.13% +0.26% +0.26%] index_select random : Elapsed 0.023 ms (2.321 ms / 100) 2.322 -> 2.326 ( +0.17%) [ +0.17% +0.00% +0.30% / +0.17% +0.34% +0.30%] index_select random_sorted : Elapsed 0.023 ms (2.326 ms / 100) 2.314 -> 2.317 ( +0.13%) [ +0.22% +0.22% +0.00% / +0.13% +0.52% +0.35%] index_select perm : Elapsed 0.023 ms (2.319 ms / 100) 2.312 -> 2.317 ( +0.22%) [ +0.00% +0.22% +0.09% / +0.22% +0.43% +0.48%] index_select perm_sorted : Elapsed 0.023 ms (2.312 ms / 100) B = [20, 5, 40, 4] (stride (800, 1, 5, 200)) A = [20, 16, 40, 4] (stride (2560, 40, 1, 640)) dim = 1 2.284 -> 2.289 ( +0.22%) [ +0.26% +0.22% +0.00% / +0.22% +0.53% +0.44%] index_select const : Elapsed 0.023 ms (2.290 ms / 100) 2.331 -> 2.338 ( +0.30%) [ +0.00% +0.13% +0.26% / +0.30% +0.64% +0.47%] index_select wrap : Elapsed 0.023 ms (2.331 ms / 100) 2.336 -> 2.346 ( +0.43%) [ +0.00% +0.17% +0.04% / +0.43% +0.43% +0.47%] index_select linear : Elapsed 0.023 ms (2.336 ms / 100) 2.335 -> 2.341 ( +0.26%) [ +0.21% +0.43% +0.00% / +0.26% +0.51% +0.60%] index_select reverse : Elapsed 0.023 ms (2.340 ms / 100) 2.282 -> 2.281 ( -0.04%) [ +0.04% +0.00% +0.09% / -0.04% +0.53% +0.57%] index_select skip64 : Elapsed 0.023 ms (2.283 ms / 100) 2.284 -> 2.286 ( +0.09%) [ +0.04% +0.09% +0.00% / +0.09% +0.31% +0.44%] index_select skip256 : Elapsed 0.023 ms (2.285 ms / 100) 2.352 -> 2.352 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.13% +0.00% +0.34%] index_select spread : Elapsed 0.024 ms (2.352 ms / 100) 2.347 -> 2.346 ( -0.04%) [ +0.00% +0.09% +0.09% / -0.04% +0.38% +0.34%] index_select strided 3 : Elapsed 0.023 ms (2.347 ms / 100) 2.346 -> 2.353 ( +0.30%) [ +0.09% +0.00% +0.34% / +0.30% +0.60% +0.77%] index_select strided 5 : Elapsed 0.023 ms (2.348 ms / 100) 2.335 -> 2.338 ( +0.13%) [ +0.30% +0.00% +0.09% / +0.13% +0.81% +0.99%] index_select strided 7 : Elapsed 0.023 ms (2.342 ms / 100) 2.285 -> 2.286 ( +0.04%) [ +0.00% +0.18% +0.00% / +0.04% +0.57% +0.53%] index_select strided 8 : Elapsed 0.023 ms (2.285 ms / 100) 2.336 -> 2.339 ( +0.13%) [ +0.21% +0.21% +0.00% / +0.13% +0.68% +0.60%] index_select random : Elapsed 0.023 ms (2.341 ms / 100) 2.330 -> 2.338 ( +0.34%) [ +0.26% +0.13% +0.00% / +0.34% +1.12% +0.86%] index_select random_sorted : Elapsed 0.023 ms (2.336 ms / 100) 2.341 -> 2.343 ( +0.09%) [ +0.17% +0.00% +0.17% / +0.09% +0.56% +0.60%] index_select perm : Elapsed 0.023 ms (2.345 ms / 100) 2.328 -> 2.332 ( +0.17%) [ +0.04% +0.13% +0.00% / +0.17% +0.60% +0.56%] index_select perm_sorted : Elapsed 0.023 ms (2.329 ms / 100) B = [20, 5, 40, 4] (stride (800, 1, 5, 200)) A = [20, 16, 40, 4] (stride (1, 3200, 20, 800)) dim = 1 2.408 -> 2.412 ( +0.17%) [ +0.08% +0.04% +0.00% / +0.17% +0.37% +0.17%] index_select const : Elapsed 0.024 ms (2.410 ms / 100) 2.403 -> 2.405 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.21% +0.25%] index_select wrap : Elapsed 0.024 ms (2.406 ms / 100) 2.399 -> 2.402 ( +0.13%) [ +0.00% +0.25% +0.17% / +0.13% +0.29% +0.33%] index_select linear : Elapsed 0.024 ms (2.399 ms / 100) 2.396 -> 2.400 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.17% +0.42% +0.50%] index_select reverse : Elapsed 0.024 ms (2.396 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.17% +0.00% +0.04% / +0.12% +0.17% +0.25%] index_select skip64 : Elapsed 0.024 ms (2.407 ms / 100) 2.408 -> 2.410 ( +0.08%) [ +0.29% +0.00% +0.04% / +0.08% +0.29% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.415 ms / 100) 2.400 -> 2.399 ( -0.04%) [ +0.33% +0.00% +0.00% / +0.13% +0.21% -0.04%] index_select spread : Elapsed 0.024 ms (2.408 ms / 100) 2.404 -> 2.404 ( +0.00%) [ +0.21% +0.04% +0.00% / +0.17% +0.04% +0.00%] index_select strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.403 -> 2.400 ( -0.12%) [ +0.00% +0.04% +0.00% / -0.04% +0.25% -0.12%] index_select strided 5 : Elapsed 0.024 ms (2.403 ms / 100) 2.401 -> 2.403 ( +0.08%) [ +0.21% +0.00% +0.04% / +0.21% +0.08% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.406 ms / 100) 2.407 -> 2.408 ( +0.04%) [ +0.00% +0.17% +0.12% / +0.12% +0.04% +0.08%] index_select strided 8 : Elapsed 0.024 ms (2.407 ms / 100) 2.401 -> 2.402 ( +0.04%) [ +0.17% +0.00% +0.29% / +0.12% +0.04% +0.21%] index_select random : Elapsed 0.024 ms (2.405 ms / 100) 2.396 -> 2.401 ( +0.21%) [ +0.00% +0.25% +0.17% / +0.21% +0.25% +0.42%] index_select random_sorted : Elapsed 0.024 ms (2.396 ms / 100) 2.400 -> 2.399 ( -0.04%) [ +0.04% +0.13% +0.00% / -0.04% +0.29% +0.25%] index_select perm : Elapsed 0.024 ms (2.401 ms / 100) 2.404 -> 2.404 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.25% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) B = [20, 5, 40, 4] (stride (160, 3200, 4, 1)) A = [20, 16, 40, 4] (stride (1, 80, 1280, 20)) dim = 1 2.260 -> 2.263 ( +0.13%) [ +0.00% +0.35% +0.00% / +0.13% +0.35% +0.27%] index_select const : Elapsed 0.023 ms (2.260 ms / 100) 2.247 -> 2.247 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +0.45% +0.40%] index_select wrap : Elapsed 0.022 ms (2.249 ms / 100) 2.253 -> 2.253 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.80% +0.44%] index_select linear : Elapsed 0.023 ms (2.254 ms / 100) 2.263 -> 2.268 ( +0.22%) [ +0.22% +0.40% +0.00% / +0.22% +0.53% +0.53%] index_select reverse : Elapsed 0.023 ms (2.268 ms / 100) 2.256 -> 2.265 ( +0.40%) [ +0.00% +0.35% +0.22% / +0.40% +0.84% +0.71%] index_select skip64 : Elapsed 0.023 ms (2.256 ms / 100) 2.258 -> 2.259 ( +0.04%) [ +0.18% +0.22% +0.00% / +0.04% +0.53% +0.53%] index_select skip256 : Elapsed 0.023 ms (2.262 ms / 100) 2.259 -> 2.259 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.27% +0.31%] index_select spread : Elapsed 0.023 ms (2.260 ms / 100) 2.258 -> 2.264 ( +0.27%) [ +0.09% +0.04% +0.00% / +0.40% +0.27% +0.53%] index_select strided 3 : Elapsed 0.023 ms (2.260 ms / 100) 2.258 -> 2.257 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.31% +0.31%] index_select strided 5 : Elapsed 0.023 ms (2.258 ms / 100) 2.249 -> 2.250 ( +0.04%) [ +0.13% +0.22% +0.00% / +0.04% +0.44% +0.40%] index_select strided 7 : Elapsed 0.023 ms (2.252 ms / 100) 2.254 -> 2.259 ( +0.22%) [ +0.27% +0.00% +0.04% / +0.22% +0.75% +0.40%] index_select strided 8 : Elapsed 0.023 ms (2.260 ms / 100) 2.249 -> 2.247 ( -0.09%) [ +0.00% +0.27% +0.00% / -0.09% +0.36% +0.49%] index_select random : Elapsed 0.022 ms (2.249 ms / 100) 2.257 -> 2.257 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.44% +0.40%] index_select random_sorted : Elapsed 0.023 ms (2.257 ms / 100) 2.261 -> 2.269 ( +0.35%) [ +0.00% +0.18% +0.09% / +0.35% +0.35% +0.66%] index_select perm : Elapsed 0.023 ms (2.261 ms / 100) 2.255 -> 2.255 ( +0.00%) [ +0.09% +0.00% +0.13% / +0.00% +0.49% +0.67%] index_select perm_sorted : Elapsed 0.023 ms (2.257 ms / 100) B = [20, 5, 40, 4] (stride (160, 3200, 1, 40)) dim = 1 fill_cnt = 16 2.353 -> 2.354 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.68% +0.81%] index_fill_ const : Elapsed 0.024 ms (2.355 ms / 100) 2.351 -> 2.352 ( +0.04%) [ +0.00% +0.13% +0.13% / +0.04% +0.81% +0.68%] index_fill_ linear : Elapsed 0.024 ms (2.351 ms / 100) 2.360 -> 2.364 ( +0.17%) [ +0.00% +0.13% +0.08% / +0.17% +0.68% +0.47%] index_fill_ reverse : Elapsed 0.024 ms (2.360 ms / 100) 2.353 -> 2.360 ( +0.30%) [ +0.17% +0.34% +0.00% / +0.30% +0.51% +0.55%] index_fill_ skip64 : Elapsed 0.024 ms (2.357 ms / 100) 2.339 -> 2.342 ( +0.13%) [ +0.04% +0.00% +0.17% / +0.13% +0.60% +0.60%] index_fill_ skip256 : Elapsed 0.023 ms (2.340 ms / 100) 2.365 -> 2.366 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.63% +0.59%] index_fill_ spread : Elapsed 0.024 ms (2.365 ms / 100) 2.358 -> 2.360 ( +0.08%) [ +0.21% +0.21% +0.00% / +0.08% +0.81% +0.98%] index_fill_ strided 3 : Elapsed 0.024 ms (2.363 ms / 100) 2.360 -> 2.359 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.59% +0.72%] index_fill_ random : Elapsed 0.024 ms (2.360 ms / 100) 2.356 -> 2.361 ( +0.21%) [ +0.08% +0.21% +0.00% / +0.21% +0.72% +0.93%] index_fill_ random_sorted : Elapsed 0.024 ms (2.358 ms / 100) B = [20, 5, 40, 4] (stride (20, 1, 400, 5)) A = [20, 16, 40, 4] (stride (160, 3200, 4, 1)) dim = 1 2.122 -> 2.110 ( -0.57%) [ +0.00% +0.33% +0.14% / -0.09% -0.57% -0.28%] index_select const : Elapsed 0.021 ms (2.122 ms / 100) 2.156 -> 2.157 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.19% +0.09% +0.05%] index_select wrap : Elapsed 0.022 ms (2.156 ms / 100) 2.150 -> 2.157 ( +0.33%) [ +0.37% +0.09% +0.00% / +0.37% +0.42% +0.33%] index_select linear : Elapsed 0.022 ms (2.158 ms / 100) 2.158 -> 2.146 ( -0.56%) [ +0.00% +0.00% +0.14% / -0.05% -0.46% -0.56%] index_select reverse : Elapsed 0.022 ms (2.158 ms / 100) 2.121 -> 2.115 ( -0.28%) [ +0.00% +0.14% +0.00% / -0.05% -0.28% -0.24%] index_select skip64 : Elapsed 0.021 ms (2.121 ms / 100) 2.118 -> 2.109 ( -0.42%) [ +0.52% +0.38% +0.00% / +0.38% -0.28% -0.42%] index_select skip256 : Elapsed 0.021 ms (2.129 ms / 100) 2.183 -> 2.156 ( -1.24%) [ +0.14% +0.18% +0.00% / +0.05% -1.24% -1.05%] index_select spread : Elapsed 0.022 ms (2.186 ms / 100) 2.182 -> 2.156 ( -1.19%) [ +0.18% +0.00% +0.37% / +0.14% -1.10% -1.19%] index_select strided 3 : Elapsed 0.022 ms (2.186 ms / 100) 2.151 -> 2.156 ( +0.23%) [ +0.14% +0.09% +0.00% / +0.23% +0.74% +0.70%] index_select strided 5 : Elapsed 0.022 ms (2.154 ms / 100) 2.188 -> 2.151 ( -1.69%) [ +0.00% +0.09% +0.09% / -0.09% -1.60% -1.69%] index_select strided 7 : Elapsed 0.022 ms (2.188 ms / 100) 2.140 -> 2.130 ( -0.47%) [ +0.00% +0.19% +0.00% / +0.00% -0.47% -0.47%] index_select strided 8 : Elapsed 0.021 ms (2.140 ms / 100) 2.147 -> 2.149 ( +0.09%) [ +0.09% +0.61% +0.00% / +0.09% +0.37% +0.61%] index_select random : Elapsed 0.021 ms (2.149 ms / 100) 2.151 -> 2.150 ( -0.05%) [ +0.05% +0.00% +0.00% / +0.23% +0.00% -0.05%] index_select random_sorted : Elapsed 0.022 ms (2.152 ms / 100) 2.164 -> 2.142 ( -1.02%) [ +0.00% +0.00% +0.09% / +0.14% -1.02% -0.79%] index_select perm : Elapsed 0.022 ms (2.164 ms / 100) 2.165 -> 2.144 ( -0.97%) [ +0.00% +0.23% +0.18% / +0.14% -0.74% -0.97%] index_select perm_sorted : Elapsed 0.022 ms (2.165 ms / 100) B = [20, 5, 40, 4] (stride (20, 1, 400, 5)) A = [20, 16, 40, 4] (stride (160, 3200, 1, 40)) dim = 1 2.245 -> 2.246 ( +0.04%) [ +0.00% +0.13% +0.13% / +0.04% +0.85% +0.58%] index_select const : Elapsed 0.022 ms (2.245 ms / 100) 2.318 -> 2.315 ( -0.13%) [ +0.13% +0.22% +0.00% / -0.04% +0.17% -0.13%] index_select wrap : Elapsed 0.023 ms (2.321 ms / 100) 2.318 -> 2.318 ( +0.00%) [ +0.43% +0.35% +0.00% / +0.52% +0.00% +0.04%] index_select linear : Elapsed 0.023 ms (2.328 ms / 100) 2.335 -> 2.324 ( -0.47%) [ +0.39% +0.00% +0.26% / +0.09% -0.47% -0.34%] index_select reverse : Elapsed 0.023 ms (2.344 ms / 100) 2.241 -> 2.242 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.40% +0.58%] index_select skip64 : Elapsed 0.022 ms (2.242 ms / 100) 2.244 -> 2.251 ( +0.31%) [ +0.36% +0.00% +0.45% / +0.31% +0.94% +0.53%] index_select skip256 : Elapsed 0.023 ms (2.252 ms / 100) 2.310 -> 2.308 ( -0.09%) [ +0.04% +0.00% +0.09% / -0.09% +0.69% +0.61%] index_select spread : Elapsed 0.023 ms (2.311 ms / 100) 2.311 -> 2.310 ( -0.04%) [ +0.13% +0.22% +0.00% / -0.04% +0.43% +0.39%] index_select strided 3 : Elapsed 0.023 ms (2.314 ms / 100) 2.294 -> 2.298 ( +0.17%) [ +0.39% +0.22% +0.00% / +0.17% +0.61% +0.92%] index_select strided 5 : Elapsed 0.023 ms (2.303 ms / 100) 2.324 -> 2.318 ( -0.26%) [ +0.00% +0.04% +0.04% / -0.26% +0.34% +0.47%] index_select strided 7 : Elapsed 0.023 ms (2.324 ms / 100) 2.258 -> 2.260 ( +0.09%) [ +0.00% +0.04% +0.04% / +0.09% +0.71% +0.75%] index_select strided 8 : Elapsed 0.023 ms (2.258 ms / 100) 2.307 -> 2.303 ( -0.17%) [ +0.22% +0.09% +0.00% / +0.22% -0.09% -0.17%] index_select random : Elapsed 0.023 ms (2.312 ms / 100) 2.302 -> 2.310 ( +0.35%) [ +0.56% +0.26% +0.00% / +0.35% +0.78% +1.22%] index_select random_sorted : Elapsed 0.023 ms (2.315 ms / 100) 2.314 -> 2.314 ( +0.00%) [ +0.13% +0.00% +0.17% / +0.00% +0.65% +0.82%] index_select perm : Elapsed 0.023 ms (2.317 ms / 100) 2.304 -> 2.305 ( +0.04%) [ +0.13% +0.30% +0.00% / +0.04% +0.95% +1.00%] index_select perm_sorted : Elapsed 0.023 ms (2.307 ms / 100) B = [20, 5, 40, 4] (stride (5, 1, 400, 100)) A = [20, 16, 40, 4] (stride (2560, 1, 16, 640)) dim = 1 2.280 -> 2.274 ( -0.26%) [ +0.22% +0.00% +0.00% / +0.18% -0.18% -0.26%] index_select const : Elapsed 0.023 ms (2.285 ms / 100) 2.276 -> 2.273 ( -0.13%) [ +0.18% +0.13% +0.00% / +0.00% +0.04% -0.13%] index_select wrap : Elapsed 0.023 ms (2.280 ms / 100) 2.274 -> 2.267 ( -0.31%) [ +0.00% +0.09% +0.00% / +0.00% -0.22% -0.31%] index_select linear : Elapsed 0.023 ms (2.274 ms / 100) 2.267 -> 2.271 ( +0.18%) [ +0.13% +0.13% +0.00% / +0.18% +0.26% +0.35%] index_select reverse : Elapsed 0.023 ms (2.270 ms / 100) 2.274 -> 2.273 ( -0.04%) [ +0.00% +0.13% +0.04% / +0.09% -0.04% +0.04%] index_select skip64 : Elapsed 0.023 ms (2.274 ms / 100) 2.282 -> 2.272 ( -0.44%) [ +0.00% +0.22% +0.09% / +0.26% -0.26% -0.44%] index_select skip256 : Elapsed 0.023 ms (2.282 ms / 100) 2.286 -> 2.280 ( -0.26%) [ +0.00% +0.00% +0.00% / +0.04% -0.26% -0.04%] index_select spread : Elapsed 0.023 ms (2.286 ms / 100) 2.289 -> 2.281 ( -0.35%) [ +0.17% +0.00% +0.26% / +0.13% -0.35% -0.35%] index_select strided 3 : Elapsed 0.023 ms (2.293 ms / 100) 2.282 -> 2.284 ( +0.09%) [ +0.00% +0.22% +0.13% / +0.26% +0.09% +0.13%] index_select strided 5 : Elapsed 0.023 ms (2.282 ms / 100) 2.281 -> 2.279 ( -0.09%) [ +0.26% +0.09% +0.00% / +0.18% -0.09% +0.13%] index_select strided 7 : Elapsed 0.023 ms (2.287 ms / 100) 2.300 -> 2.289 ( -0.48%) [ +0.04% +0.00% +0.09% / +0.00% -0.48% -0.35%] index_select strided 8 : Elapsed 0.023 ms (2.301 ms / 100) 2.285 -> 2.283 ( -0.09%) [ +0.00% +0.18% +0.22% / +0.44% +0.13% -0.09%] index_select random : Elapsed 0.023 ms (2.285 ms / 100) 2.281 -> 2.280 ( -0.04%) [ +0.09% +0.00% +0.00% / -0.04% +0.22% -0.04%] index_select random_sorted : Elapsed 0.023 ms (2.283 ms / 100) 2.287 -> 2.285 ( -0.09%) [ +0.39% +0.31% +0.00% / +0.35% -0.09% +0.00%] index_select perm : Elapsed 0.023 ms (2.296 ms / 100) 2.292 -> 2.286 ( -0.26%) [ +0.00% +0.09% +0.09% / +0.04% -0.26% -0.17%] index_select perm_sorted : Elapsed 0.023 ms (2.292 ms / 100) out_shape = [20, 16, 5, 4] in_shape = [20, 16, 40, 4] idx_dim = 2 B = [20, 16, 5, 4] (stride (320, 4, 64, 1)) A = [20, 16, 40, 4] (stride (4, 80, 1280, 1)) dim = 2 1.477 -> 1.477 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.34% +0.34%] index_select const : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.47% +0.41%] index_select wrap : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.41% +0.41%] index_select linear : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.54% +0.47%] index_select reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.504 -> 1.505 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.66% +0.60%] index_select skip64 : Elapsed 0.015 ms (1.506 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.54% +0.54%] index_select skip256 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.47% +0.47%] index_select spread : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.61%] index_select strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.47%] index_select strided 5 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.47% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.47% +0.54%] index_select strided 8 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.54%] index_select strided 16 : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.27% +0.00% +0.07% / +0.07% +0.68% +0.68%] index_select random : Elapsed 0.015 ms (1.479 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select random_sorted : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.68%] index_select perm : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.61%] index_select perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) B = [20, 16, 5, 4] (stride (320, 5, 1, 80)) A = [20, 16, 40, 4] (stride (16, 1, 320, 12800)) dim = 2 1.381 -> 1.382 ( +0.07%) [ +0.22% +0.00% +0.00% / +0.07% +0.36% +0.29%] index_select const : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.36% +0.29% +0.00% / +0.29% +0.43% +0.58%] index_select wrap : Elapsed 0.014 ms (1.385 ms / 100) 1.380 -> 1.385 ( +0.36%) [ +0.22% +0.22% +0.00% / +0.36% +0.58% +0.51%] index_select linear : Elapsed 0.014 ms (1.383 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.00% +0.07% +0.00% / +0.14% +0.51% +0.43%] index_select reverse : Elapsed 0.014 ms (1.382 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.43% +0.43%] index_select skip64 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.15% +0.00% +0.15% / +0.22% +0.51% +0.58%] index_select skip256 : Elapsed 0.014 ms (1.381 ms / 100) 1.414 -> 1.415 ( +0.07%) [ +0.28% +0.14% +0.00% / +0.07% +0.71% +0.71%] index_select spread : Elapsed 0.014 ms (1.418 ms / 100) 1.382 -> 1.385 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.43% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.382 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.29% +0.00% +0.29% / +0.00% +0.51% +0.80%] index_select strided 5 : Elapsed 0.014 ms (1.384 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.29% +0.00% +0.07% / +0.15% +0.65% +0.58%] index_select strided 7 : Elapsed 0.014 ms (1.383 ms / 100) 1.419 -> 1.420 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.56% +0.56%] index_select strided 8 : Elapsed 0.014 ms (1.419 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.07% +0.22% +0.00% / +0.22% +0.51% +0.43%] index_select strided 16 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.58% +0.65%] index_select random : Elapsed 0.014 ms (1.381 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.00% +0.07% +0.00% / +0.14% +0.43% +0.43%] index_select random_sorted : Elapsed 0.014 ms (1.382 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.58% +0.00% +0.00% / +0.14% +0.43% +0.36%] index_select perm : Elapsed 0.014 ms (1.390 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.22% +0.00% +0.07% / +0.15% +0.73% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.382 ms / 100) B = [20, 16, 5, 4] (stride (16, 1, 1280, 320)) A = [20, 16, 40, 4] (stride (4, 3200, 80, 1)) dim = 2 1.412 -> 1.413 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.85% +0.64%] index_select const : Elapsed 0.014 ms (1.413 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.44% +0.44% +0.00% / +0.15% +0.44% +0.36%] index_select wrap : Elapsed 0.014 ms (1.385 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.14% +0.14% +0.00% / +0.36% +0.22% +0.22%] index_select linear : Elapsed 0.014 ms (1.383 ms / 100) 1.380 -> 1.383 ( +0.22%) [ +0.29% +0.00% +0.00% / +0.22% +0.43% +0.29%] index_select reverse : Elapsed 0.014 ms (1.384 ms / 100) 1.400 -> 1.400 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.64% +0.50%] index_select skip64 : Elapsed 0.014 ms (1.400 ms / 100) 1.411 -> 1.412 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.85% +0.71%] index_select skip256 : Elapsed 0.014 ms (1.412 ms / 100) 1.393 -> 1.394 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.50% +0.43%] index_select spread : Elapsed 0.014 ms (1.394 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.36% +0.36%] index_select strided 3 : Elapsed 0.014 ms (1.382 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.29% +0.14% +0.00% / +0.14% +0.29% +0.29%] index_select strided 5 : Elapsed 0.014 ms (1.385 ms / 100) 1.380 -> 1.383 ( +0.22%) [ +0.07% +0.00% +0.14% / +0.22% +0.36% +0.29%] index_select strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.412 -> 1.413 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.78% +0.64%] index_select strided 8 : Elapsed 0.014 ms (1.413 ms / 100) 1.400 -> 1.401 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.57% +0.64%] index_select strided 16 : Elapsed 0.014 ms (1.401 ms / 100) 1.377 -> 1.381 ( +0.29%) [ +0.51% +0.29% +0.00% / +0.29% +0.58% +0.51%] index_select random : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.383 ( +0.22%) [ +0.00% +0.00% +0.22% / +0.22% +0.58% +0.36%] index_select random_sorted : Elapsed 0.014 ms (1.380 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.00% +0.00% +0.07% / +0.14% +0.29% +0.29%] index_select perm : Elapsed 0.014 ms (1.381 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.29% +0.22% +0.00% / +0.00% +0.51% +0.51%] index_select perm_sorted : Elapsed 0.014 ms (1.382 ms / 100) B = [20, 16, 5, 4] (stride (5, 100, 1, 1600)) A = [20, 16, 40, 4] (stride (1, 20, 1280, 320)) dim = 2 1.576 -> 1.578 ( +0.13%) [ +0.38% +0.00% +0.00% / +0.13% +0.89% +0.82%] index_select const : Elapsed 0.016 ms (1.582 ms / 100) 1.576 -> 1.580 ( +0.25%) [ +0.13% +0.00% +0.00% / +0.25% +0.51% +0.63%] index_select wrap : Elapsed 0.016 ms (1.578 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.38% +0.89%] index_select linear : Elapsed 0.016 ms (1.575 ms / 100) 1.575 -> 1.574 ( -0.06%) [ +0.25% +0.13% +0.00% / -0.06% +0.38% +0.51%] index_select reverse : Elapsed 0.016 ms (1.579 ms / 100) 1.578 -> 1.577 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.32% +0.32%] index_select skip64 : Elapsed 0.016 ms (1.579 ms / 100) 1.577 -> 1.579 ( +0.13%) [ +0.25% +0.06% +0.00% / +0.13% +0.70% +0.57%] index_select skip256 : Elapsed 0.016 ms (1.581 ms / 100) 1.574 -> 1.573 ( -0.06%) [ +0.00% +0.44% +0.19% / -0.06% +0.51% +0.44%] index_select spread : Elapsed 0.016 ms (1.574 ms / 100) 1.574 -> 1.580 ( +0.38%) [ +0.06% +0.00% +0.32% / +0.38% +0.57% +0.64%] index_select strided 3 : Elapsed 0.016 ms (1.575 ms / 100) 1.573 -> 1.578 ( +0.32%) [ +0.00% +0.32% +0.38% / +0.32% +0.51% +0.70%] index_select strided 5 : Elapsed 0.016 ms (1.573 ms / 100) 1.572 -> 1.575 ( +0.19%) [ +0.38% +0.38% +0.00% / +0.19% +0.70% +0.64%] index_select strided 7 : Elapsed 0.016 ms (1.578 ms / 100) 1.576 -> 1.581 ( +0.32%) [ +0.19% +0.06% +0.00% / +0.32% +1.02% +0.70%] index_select strided 8 : Elapsed 0.016 ms (1.579 ms / 100) 1.577 -> 1.581 ( +0.25%) [ +0.00% +0.06% +0.00% / +0.25% +0.38% +0.25%] index_select strided 16 : Elapsed 0.016 ms (1.577 ms / 100) 1.585 -> 1.582 ( -0.19%) [ +0.19% +0.00% +0.06% / -0.19% +1.14% +1.07%] index_select random : Elapsed 0.016 ms (1.588 ms / 100) 1.574 -> 1.576 ( +0.13%) [ +0.06% +0.00% +0.32% / +0.13% +0.64% +0.70%] index_select random_sorted : Elapsed 0.016 ms (1.575 ms / 100) 1.573 -> 1.574 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.45% +0.38%] index_select perm : Elapsed 0.016 ms (1.574 ms / 100) 1.577 -> 1.579 ( +0.13%) [ +0.06% +0.13% +0.00% / +0.13% +0.95% +0.82%] index_select perm_sorted : Elapsed 0.016 ms (1.578 ms / 100) B = [20, 16, 5, 4] (stride (1, 100, 20, 1600)) A = [20, 16, 40, 4] (stride (40, 3200, 1, 800)) dim = 2 1.619 -> 1.620 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.43% +0.43%] index_select const : Elapsed 0.016 ms (1.619 ms / 100) 1.618 -> 1.619 ( +0.06%) [ +0.00% +0.06% +0.12% / +0.06% +0.31% +0.37%] index_select wrap : Elapsed 0.016 ms (1.618 ms / 100) 1.618 -> 1.617 ( -0.06%) [ +0.00% +0.12% +0.00% / -0.06% +0.43% +0.49%] index_select linear : Elapsed 0.016 ms (1.618 ms / 100) 1.617 -> 1.619 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.49% +0.49%] index_select reverse : Elapsed 0.016 ms (1.619 ms / 100) 1.619 -> 1.621 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.37% +0.56%] index_select skip64 : Elapsed 0.016 ms (1.619 ms / 100) 1.619 -> 1.617 ( -0.12%) [ +0.00% +0.06% +0.00% / -0.12% +0.43% +0.43%] index_select skip256 : Elapsed 0.016 ms (1.619 ms / 100) 1.607 -> 1.609 ( +0.12%) [ +0.31% +0.19% +0.00% / +0.12% +0.68% +0.56%] index_select spread : Elapsed 0.016 ms (1.612 ms / 100) 1.616 -> 1.618 ( +0.12%) [ +0.31% +0.31% +0.00% / +0.12% +0.62% +0.74%] index_select strided 3 : Elapsed 0.016 ms (1.621 ms / 100) 1.616 -> 1.618 ( +0.12%) [ +0.06% +0.00% +0.19% / +0.12% +0.62% +0.56%] index_select strided 5 : Elapsed 0.016 ms (1.617 ms / 100) 1.617 -> 1.617 ( +0.00%) [ +0.00% +0.19% +0.06% / +0.00% +0.37% +0.43%] index_select strided 7 : Elapsed 0.016 ms (1.617 ms / 100) 1.612 -> 1.616 ( +0.25%) [ +0.12% +0.06% +0.00% / +0.25% +0.37% +0.50%] index_select strided 8 : Elapsed 0.016 ms (1.614 ms / 100) 1.606 -> 1.609 ( +0.19%) [ +0.12% +0.06% +0.00% / +0.19% +0.75% +0.81%] index_select strided 16 : Elapsed 0.016 ms (1.608 ms / 100) 1.613 -> 1.614 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.62% +0.50%] index_select random : Elapsed 0.016 ms (1.613 ms / 100) 1.615 -> 1.618 ( +0.19%) [ +0.06% +0.00% +0.12% / +0.19% +0.74% +0.62%] index_select random_sorted : Elapsed 0.016 ms (1.616 ms / 100) 1.613 -> 1.614 ( +0.06%) [ +0.06% +0.12% +0.00% / +0.06% +0.68% +0.62%] index_select perm : Elapsed 0.016 ms (1.614 ms / 100) 1.612 -> 1.612 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.62% +0.68%] index_select perm_sorted : Elapsed 0.016 ms (1.614 ms / 100) out_shape = [20, 16, 40, 5] in_shape = [20, 16, 40, 4] idx_dim = 3 B = [20, 16, 40, 5] (stride (3200, 200, 1, 40)) A = [20, 16, 40, 4] (stride (160, 3200, 1, 40)) dim = 3 5.608 -> 5.595 ( -0.23%) [ +0.07% +0.00% +0.05% / +0.05% -0.23% -0.21%] index_add_ linear : Elapsed 0.056 ms (5.612 ms / 100) 5.533 -> 5.518 ( -0.27%) [ +0.00% +0.14% +0.14% / +0.05% -0.27% -0.25%] index_copy_ linear : Elapsed 0.055 ms (5.533 ms / 100) 5.605 -> 5.590 ( -0.27%) [ +0.05% +0.00% +0.07% / +0.09% -0.27% -0.16%] index_add_ reverse : Elapsed 0.056 ms (5.608 ms / 100) 5.521 -> 5.524 ( +0.05%) [ +0.24% +0.11% +0.00% / +0.24% +0.11% +0.05%] index_copy_ reverse : Elapsed 0.055 ms (5.534 ms / 100) 5.604 -> 5.591 ( -0.23%) [ +0.02% +0.05% +0.00% / +0.07% -0.09% -0.23%] index_add_ spread : Elapsed 0.056 ms (5.605 ms / 100) 5.533 -> 5.522 ( -0.20%) [ +0.00% +0.14% +0.09% / +0.20% -0.20% -0.20%] index_copy_ spread : Elapsed 0.055 ms (5.533 ms / 100) 5.601 -> 5.591 ( -0.18%) [ +0.09% +0.00% +0.05% / +0.12% -0.12% -0.18%] index_add_ strided 3 : Elapsed 0.056 ms (5.606 ms / 100) 5.532 -> 5.521 ( -0.20%) [ +0.00% +0.13% +0.20% / -0.09% -0.20% +0.02%] index_copy_ strided 3 : Elapsed 0.055 ms (5.532 ms / 100) 5.601 -> 5.590 ( -0.20%) [ +0.14% +0.00% +0.11% / +0.05% -0.11% -0.20%] index_add_ perm : Elapsed 0.056 ms (5.609 ms / 100) 5.531 -> 5.523 ( -0.14%) [ +0.20% +0.00% +0.05% / +0.04% -0.14% -0.13%] index_copy_ perm : Elapsed 0.055 ms (5.542 ms / 100) 5.606 -> 5.599 ( -0.12%) [ +0.00% +0.16% +0.02% / +0.12% -0.12% -0.11%] index_add_ perm_sorted : Elapsed 0.056 ms (5.606 ms / 100) 5.536 -> 5.529 ( -0.13%) [ +0.13% +0.00% +0.04% / -0.02% -0.13% -0.09%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.543 ms / 100) 5.728 -> 5.722 ( -0.10%) [ +0.07% +0.00% +0.05% / -0.10% +0.10% +0.03%] index_select const : Elapsed 0.057 ms (5.732 ms / 100) 5.852 -> 5.833 ( -0.32%) [ +0.09% +0.00% +0.00% / +0.05% -0.31% -0.32%] index_select wrap : Elapsed 0.059 ms (5.857 ms / 100) 5.837 -> 5.824 ( -0.22%) [ +0.09% +0.00% +0.12% / +0.10% -0.10% -0.22%] index_select linear : Elapsed 0.058 ms (5.842 ms / 100) 5.826 -> 5.829 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.21% +0.07% +0.05%] index_select reverse : Elapsed 0.058 ms (5.829 ms / 100) 5.724 -> 5.719 ( -0.09%) [ +0.00% +0.12% +0.23% / +0.19% -0.09% -0.07%] index_select skip64 : Elapsed 0.057 ms (5.724 ms / 100) 5.729 -> 5.734 ( +0.09%) [ +0.00% +0.00% +0.07% / +0.12% +0.09% +0.16%] index_select skip256 : Elapsed 0.057 ms (5.729 ms / 100) 5.832 -> 5.811 ( -0.36%) [ +0.00% +0.12% +0.03% / +0.12% -0.36% -0.31%] index_select spread : Elapsed 0.058 ms (5.832 ms / 100) 5.847 -> 5.831 ( -0.27%) [ +0.07% +0.00% +0.03% / +0.22% -0.21% -0.27%] index_select strided 3 : Elapsed 0.059 ms (5.851 ms / 100) 5.757 -> 5.759 ( +0.03%) [ +0.19% +0.00% +0.05% / +0.16% +0.03% +0.07%] index_select random : Elapsed 0.058 ms (5.768 ms / 100) 5.770 -> 5.764 ( -0.10%) [ +0.09% +0.00% +0.05% / -0.02% -0.05% -0.10%] index_select random_sorted : Elapsed 0.058 ms (5.775 ms / 100) B = [20, 16, 40, 5] (stride (3200, 200, 1, 40)) A = [20, 16, 40, 4] (stride (640, 40, 1, 12800)) dim = 3 5.098 -> 5.107 ( +0.18%) [ +0.10% +0.22% +0.00% / +0.18% +0.35% +0.33%] index_add_ linear : Elapsed 0.051 ms (5.103 ms / 100) 5.047 -> 5.056 ( +0.18%) [ +0.00% +0.14% +0.08% / +0.50% +0.18% +0.18%] index_copy_ linear : Elapsed 0.050 ms (5.047 ms / 100) 5.100 -> 5.106 ( +0.12%) [ +0.02% +0.16% +0.00% / +0.12% +0.27% +0.24%] index_add_ reverse : Elapsed 0.051 ms (5.101 ms / 100) 5.045 -> 5.048 ( +0.06%) [ +0.08% +0.00% +0.06% / +0.06% +0.20% +0.22%] index_copy_ reverse : Elapsed 0.050 ms (5.049 ms / 100) 5.104 -> 5.104 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.25% +0.20%] index_add_ spread : Elapsed 0.051 ms (5.104 ms / 100) 5.049 -> 5.053 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.24% +0.36%] index_copy_ spread : Elapsed 0.051 ms (5.051 ms / 100) 5.102 -> 5.110 ( +0.16%) [ +0.16% +0.10% +0.00% / +0.16% +0.16% +0.27%] index_add_ strided 3 : Elapsed 0.051 ms (5.110 ms / 100) 5.048 -> 5.051 ( +0.06%) [ +0.12% +0.14% +0.00% / +0.06% +0.22% +0.18%] index_copy_ strided 3 : Elapsed 0.051 ms (5.054 ms / 100) 5.103 -> 5.115 ( +0.24%) [ +0.02% +0.08% +0.00% / +0.24% +0.29% +0.35%] index_add_ perm : Elapsed 0.051 ms (5.104 ms / 100) 5.049 -> 5.052 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.20% +0.06%] index_copy_ perm : Elapsed 0.050 ms (5.049 ms / 100) 5.103 -> 5.106 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.24% +0.27%] index_add_ perm_sorted : Elapsed 0.051 ms (5.106 ms / 100) 5.050 -> 5.055 ( +0.10%) [ +0.18% +0.06% +0.00% / +0.12% +0.22% +0.10%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.059 ms / 100) 5.172 -> 5.170 ( -0.04%) [ +0.04% +0.14% +0.00% / -0.04% +0.14% +0.12%] index_select const : Elapsed 0.052 ms (5.174 ms / 100) 5.273 -> 5.274 ( +0.02%) [ +0.00% +0.11% +0.00% / +0.02% +0.40% +0.25%] index_select wrap : Elapsed 0.053 ms (5.273 ms / 100) 5.272 -> 5.278 ( +0.11%) [ +0.00% +0.15% +0.13% / +0.11% +0.42% +0.23%] index_select linear : Elapsed 0.053 ms (5.272 ms / 100) 5.270 -> 5.272 ( +0.04%) [ +0.00% +0.04% +0.02% / +0.04% +0.34% +0.27%] index_select reverse : Elapsed 0.053 ms (5.270 ms / 100) 5.175 -> 5.181 ( +0.12%) [ +0.08% +0.06% +0.00% / +0.15% +0.23% +0.12%] index_select skip64 : Elapsed 0.052 ms (5.179 ms / 100) 5.174 -> 5.172 ( -0.04%) [ +0.02% +0.00% +0.17% / -0.04% +0.23% +0.08%] index_select skip256 : Elapsed 0.052 ms (5.175 ms / 100) 5.269 -> 5.266 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.47% +0.25%] index_select spread : Elapsed 0.053 ms (5.272 ms / 100) 5.270 -> 5.274 ( +0.08%) [ +0.17% +0.04% +0.00% / +0.08% +0.49% +0.44%] index_select strided 3 : Elapsed 0.053 ms (5.279 ms / 100) 5.237 -> 5.237 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.00% +0.34% +0.42%] index_select random : Elapsed 0.052 ms (5.244 ms / 100) 5.231 -> 5.233 ( +0.04%) [ +0.02% +0.06% +0.00% / +0.04% +0.50% +0.40%] index_select random_sorted : Elapsed 0.052 ms (5.232 ms / 100) B = [20, 16, 40, 5] (stride (3200, 40, 1, 640)) A = [20, 16, 40, 4] (stride (2560, 160, 1, 40)) dim = 3 3.382 -> 3.356 ( -0.77%) [ +0.00% +0.12% +0.00% / +0.00% -0.77% -0.56%] index_add_ linear : Elapsed 0.034 ms (3.382 ms / 100) 3.309 -> 3.307 ( -0.06%) [ +0.33% +0.12% +0.00% / +0.18% -0.06% +0.06%] index_copy_ linear : Elapsed 0.033 ms (3.320 ms / 100) 3.371 -> 3.373 ( +0.06%) [ +0.27% +0.00% +0.15% / +0.21% +0.21% +0.06%] index_add_ reverse : Elapsed 0.034 ms (3.380 ms / 100) 3.317 -> 3.313 ( -0.12%) [ +0.00% +0.03% +0.00% / +0.00% -0.12% +0.15%] index_copy_ reverse : Elapsed 0.033 ms (3.317 ms / 100) 3.380 -> 3.361 ( -0.56%) [ +0.03% +0.12% +0.00% / +0.12% -0.38% -0.56%] index_add_ spread : Elapsed 0.034 ms (3.381 ms / 100) 3.307 -> 3.309 ( +0.06%) [ +0.24% +0.24% +0.00% / +0.12% +0.18% +0.06%] index_copy_ spread : Elapsed 0.033 ms (3.315 ms / 100) 3.375 -> 3.360 ( -0.44%) [ +0.00% +0.06% +0.00% / -0.12% -0.44% -0.44%] index_add_ strided 3 : Elapsed 0.034 ms (3.375 ms / 100) 3.311 -> 3.310 ( -0.03%) [ +0.27% +0.21% +0.00% / +0.27% +0.03% -0.03%] index_copy_ strided 3 : Elapsed 0.033 ms (3.320 ms / 100) 3.360 -> 3.363 ( +0.09%) [ +0.24% +0.24% +0.00% / +0.09% +0.18% +0.15%] index_add_ perm : Elapsed 0.034 ms (3.368 ms / 100) 3.309 -> 3.304 ( -0.15%) [ +0.00% +0.03% +0.12% / +0.18% -0.12% -0.15%] index_copy_ perm : Elapsed 0.033 ms (3.309 ms / 100) 3.385 -> 3.354 ( -0.92%) [ +0.00% +0.00% +0.03% / +0.00% -0.77% -0.92%] index_add_ perm_sorted : Elapsed 0.034 ms (3.385 ms / 100) 3.318 -> 3.299 ( -0.57%) [ +0.03% +0.00% +0.03% / +0.09% -0.57% -0.48%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.319 ms / 100) 3.292 -> 3.286 ( -0.18%) [ +0.00% +0.24% +0.09% / +0.12% -0.06% -0.18%] index_select const : Elapsed 0.033 ms (3.292 ms / 100) 3.417 -> 3.409 ( -0.23%) [ +0.26% +0.00% +0.20% / +0.09% -0.23% -0.18%] index_select wrap : Elapsed 0.034 ms (3.426 ms / 100) 3.416 -> 3.399 ( -0.50%) [ +0.20% +0.00% +0.00% / +0.03% -0.50% +0.44%] index_select linear : Elapsed 0.034 ms (3.423 ms / 100) 3.414 -> 3.402 ( -0.35%) [ +0.00% +0.06% +0.00% / +0.09% -0.35% -0.15%] index_select reverse : Elapsed 0.034 ms (3.414 ms / 100) 3.297 -> 3.284 ( -0.39%) [ +0.00% +0.06% +0.06% / +0.12% -0.39% -0.21%] index_select skip64 : Elapsed 0.033 ms (3.297 ms / 100) 3.294 -> 3.287 ( -0.21%) [ +0.15% +0.00% +0.18% / +0.03% -0.09% -0.21%] index_select skip256 : Elapsed 0.033 ms (3.299 ms / 100) 3.413 -> 3.402 ( -0.32%) [ +0.00% +0.09% +0.09% / +0.21% -0.32% -0.29%] index_select spread : Elapsed 0.034 ms (3.413 ms / 100) 3.418 -> 3.404 ( -0.41%) [ +0.00% +0.18% +0.03% / +0.12% -0.29% -0.41%] index_select strided 3 : Elapsed 0.034 ms (3.418 ms / 100) 3.380 -> 3.372 ( -0.24%) [ +0.00% +0.09% +0.00% / -0.09% -0.18% -0.24%] index_select random : Elapsed 0.034 ms (3.380 ms / 100) 3.379 -> 3.369 ( -0.30%) [ +0.18% +0.09% +0.00% / +0.06% -0.30% -0.18%] index_select random_sorted : Elapsed 0.034 ms (3.385 ms / 100) B = [20, 16, 40, 5] (stride (5, 4000, 100, 1)) A = [20, 16, 40, 4] (stride (4, 80, 1280, 1)) dim = 3 5.921 -> 5.921 ( +0.00%) [ +0.00% +0.07% +0.08% / +0.24% +0.00% +0.05%] index_add_ linear : Elapsed 0.059 ms (5.921 ms / 100) 5.908 -> 5.901 ( -0.12%) [ +0.00% +0.05% +0.20% / +0.25% -0.02% -0.12%] index_copy_ linear : Elapsed 0.059 ms (5.908 ms / 100) 5.919 -> 5.924 ( +0.08%) [ +0.00% +0.08% +0.14% / +0.25% +0.08% +0.10%] index_add_ reverse : Elapsed 0.059 ms (5.919 ms / 100) 5.904 -> 5.903 ( -0.02%) [ +0.00% +0.12% +0.25% / +0.29% -0.02% +0.00%] index_copy_ reverse : Elapsed 0.059 ms (5.904 ms / 100) 5.916 -> 5.921 ( +0.08%) [ +0.00% +0.19% +0.22% / +0.22% +0.20% +0.08%] index_add_ spread : Elapsed 0.059 ms (5.916 ms / 100) 5.908 -> 5.903 ( -0.08%) [ +0.00% +0.14% +0.20% / +0.14% +0.00% -0.08%] index_copy_ spread : Elapsed 0.059 ms (5.908 ms / 100) 5.924 -> 5.929 ( +0.08%) [ +0.02% +0.02% +0.00% / +0.12% +0.08% +0.08%] index_add_ strided 3 : Elapsed 0.059 ms (5.925 ms / 100) 5.911 -> 5.895 ( -0.27%) [ +0.00% +0.00% +0.10% / +0.22% -0.17% -0.27%] index_copy_ strided 3 : Elapsed 0.059 ms (5.911 ms / 100) 5.921 -> 5.914 ( -0.12%) [ +0.10% +0.00% +0.12% / +0.19% -0.12% -0.10%] index_add_ perm : Elapsed 0.059 ms (5.927 ms / 100) 5.915 -> 5.902 ( -0.22%) [ +0.00% +0.02% +0.10% / +0.10% -0.22% -0.15%] index_copy_ perm : Elapsed 0.059 ms (5.915 ms / 100) 5.926 -> 5.924 ( -0.03%) [ +0.02% +0.02% +0.00% / +0.05% -0.03% -0.02%] index_add_ perm_sorted : Elapsed 0.059 ms (5.927 ms / 100) 5.904 -> 5.904 ( +0.00%) [ +0.00% +0.22% +0.30% / +0.36% +0.00% +0.00%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.904 ms / 100) 6.183 -> 6.182 ( -0.02%) [ +0.10% +0.00% +0.10% / +0.24% -0.02% +0.00%] index_select const : Elapsed 0.062 ms (6.189 ms / 100) 6.188 -> 6.180 ( -0.13%) [ +0.00% +0.00% +0.11% / +0.16% -0.11% -0.13%] index_select wrap : Elapsed 0.062 ms (6.188 ms / 100) 6.183 -> 6.181 ( -0.03%) [ +0.05% +0.00% +0.21% / +0.18% -0.03% -0.03%] index_select linear : Elapsed 0.062 ms (6.186 ms / 100) 6.184 -> 6.174 ( -0.16%) [ +0.10% +0.00% +0.18% / +0.18% -0.02% -0.16%] index_select reverse : Elapsed 0.062 ms (6.190 ms / 100) 6.187 -> 6.174 ( -0.21%) [ +0.00% +0.00% +0.15% / +0.24% -0.21% -0.10%] index_select skip64 : Elapsed 0.062 ms (6.187 ms / 100) 6.181 -> 6.179 ( -0.03%) [ +0.00% +0.08% +0.18% / +0.23% -0.03% -0.02%] index_select skip256 : Elapsed 0.062 ms (6.181 ms / 100) 6.187 -> 6.174 ( -0.21%) [ +0.00% +0.00% +0.15% / +0.24% -0.21% -0.10%] index_select spread : Elapsed 0.062 ms (6.187 ms / 100) 6.187 -> 6.175 ( -0.19%) [ +0.13% +0.00% +0.10% / +0.16% -0.06% -0.19%] index_select strided 3 : Elapsed 0.062 ms (6.195 ms / 100) 6.181 -> 6.178 ( -0.05%) [ +0.00% +0.10% +0.26% / +0.26% -0.05% -0.05%] index_select random : Elapsed 0.062 ms (6.181 ms / 100) 6.184 -> 6.175 ( -0.15%) [ +0.13% +0.00% +0.26% / +0.29% -0.15% -0.11%] index_select random_sorted : Elapsed 0.062 ms (6.192 ms / 100) B = [20, 16, 40, 5] (stride (40, 4000, 1, 800)) A = [20, 16, 40, 4] (stride (640, 1, 16, 12800)) dim = 3 5.794 -> 5.774 ( -0.35%) [ +0.16% +0.00% +0.09% / +0.12% -0.35% -0.28%] index_add_ linear : Elapsed 0.058 ms (5.803 ms / 100) 5.718 -> 5.695 ( -0.40%) [ +0.00% +0.00% +0.14% / +0.10% -0.21% -0.40%] index_copy_ linear : Elapsed 0.057 ms (5.718 ms / 100) 5.760 -> 5.738 ( -0.38%) [ +0.00% +0.23% +0.21% / +0.21% -0.24% -0.38%] index_add_ reverse : Elapsed 0.058 ms (5.760 ms / 100) 5.698 -> 5.675 ( -0.40%) [ +0.23% +0.00% +0.16% / +0.09% -0.33% -0.40%] index_copy_ reverse : Elapsed 0.057 ms (5.711 ms / 100) 5.798 -> 5.772 ( -0.45%) [ +0.09% +0.03% +0.00% / +0.00% -0.26% -0.45%] index_add_ spread : Elapsed 0.058 ms (5.803 ms / 100) 5.720 -> 5.702 ( -0.31%) [ +0.02% +0.00% +0.14% / +0.09% -0.31% -0.28%] index_copy_ spread : Elapsed 0.057 ms (5.721 ms / 100) 5.784 -> 5.739 ( -0.78%) [ +0.00% +0.00% +0.02% / +0.17% -0.73% -0.78%] index_add_ strided 3 : Elapsed 0.058 ms (5.784 ms / 100) 5.706 -> 5.662 ( -0.77%) [ +0.00% +0.07% +0.11% / +0.23% -0.70% -0.77%] index_copy_ strided 3 : Elapsed 0.057 ms (5.706 ms / 100) 5.800 -> 5.761 ( -0.67%) [ +0.03% +0.00% +0.09% / +0.07% -0.64% -0.67%] index_add_ perm : Elapsed 0.058 ms (5.802 ms / 100) 5.717 -> 5.680 ( -0.65%) [ +0.02% +0.09% +0.00% / +0.37% -0.65% -0.63%] index_copy_ perm : Elapsed 0.057 ms (5.718 ms / 100) 5.800 -> 5.761 ( -0.67%) [ +0.00% +0.17% +0.02% / +0.07% -0.62% -0.67%] index_add_ perm_sorted : Elapsed 0.058 ms (5.800 ms / 100) 5.719 -> 5.682 ( -0.65%) [ +0.00% +0.21% +0.03% / +0.17% -0.52% -0.65%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.719 ms / 100) 6.028 -> 6.034 ( +0.10%) [ +0.00% +0.05% +0.12% / +0.10% +0.23% +0.23%] index_select const : Elapsed 0.060 ms (6.028 ms / 100) 6.086 -> 6.060 ( -0.43%) [ +0.00% +0.13% +0.10% / +0.16% -0.43% -0.38%] index_select wrap : Elapsed 0.061 ms (6.086 ms / 100) 6.078 -> 6.053 ( -0.41%) [ +0.12% +0.00% +0.10% / +0.16% -0.41% -0.20%] index_select linear : Elapsed 0.061 ms (6.085 ms / 100) 6.089 -> 6.064 ( -0.41%) [ +0.03% +0.00% +0.08% / +0.23% -0.36% -0.41%] index_select reverse : Elapsed 0.061 ms (6.091 ms / 100) 6.024 -> 6.035 ( +0.18%) [ +0.00% +0.10% +0.17% / +0.18% +0.30% +0.23%] index_select skip64 : Elapsed 0.060 ms (6.024 ms / 100) 6.027 -> 6.033 ( +0.10%) [ +0.00% +0.02% +0.13% / +0.10% +0.10% +0.17%] index_select skip256 : Elapsed 0.060 ms (6.027 ms / 100) 6.090 -> 6.057 ( -0.54%) [ +0.07% +0.03% +0.00% / +0.00% -0.53% -0.54%] index_select spread : Elapsed 0.061 ms (6.094 ms / 100) 6.081 -> 6.066 ( -0.25%) [ +0.00% +0.02% +0.10% / +0.23% -0.23% -0.25%] index_select strided 3 : Elapsed 0.061 ms (6.081 ms / 100) 6.098 -> 6.063 ( -0.57%) [ +0.08% +0.05% +0.00% / +0.10% -0.57% -0.52%] index_select random : Elapsed 0.061 ms (6.103 ms / 100) 6.080 -> 6.053 ( -0.44%) [ +0.00% +0.18% +0.23% / +0.05% -0.44% -0.41%] index_select random_sorted : Elapsed 0.061 ms (6.080 ms / 100) B = [20, 16, 40, 5] (stride (80, 5, 1600, 1)) A = [20, 16, 40, 4] (stride (16, 1, 1280, 320)) dim = 3 3.584 -> 3.587 ( +0.08%) [ +0.06% +0.00% +0.03% / +0.08% +1.51% +1.42%] index_add_ linear : Elapsed 0.036 ms (3.586 ms / 100) 3.587 -> 3.591 ( +0.11%) [ +0.00% +0.20% +0.22% / +0.11% +1.17% +1.39%] index_copy_ linear : Elapsed 0.036 ms (3.587 ms / 100) 3.581 -> 3.586 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +1.51% +1.45%] index_add_ reverse : Elapsed 0.036 ms (3.581 ms / 100) 3.591 -> 3.596 ( +0.14%) [ +0.25% +0.00% +0.08% / +0.14% +1.14% +1.23%] index_copy_ reverse : Elapsed 0.036 ms (3.600 ms / 100) 3.583 -> 3.582 ( -0.03%) [ +0.00% +0.20% +0.20% / -0.03% +1.42% +1.45%] index_add_ spread : Elapsed 0.036 ms (3.583 ms / 100) 3.592 -> 3.600 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +1.36% +1.09%] index_copy_ spread : Elapsed 0.036 ms (3.592 ms / 100) 3.606 -> 3.596 ( -0.28%) [ +0.08% +0.00% +0.03% / -0.28% +1.08% +1.05%] index_add_ strided 3 : Elapsed 0.036 ms (3.609 ms / 100) 3.608 -> 3.612 ( +0.11%) [ +0.00% +0.08% +0.03% / +0.11% +0.86% +0.89%] index_copy_ strided 3 : Elapsed 0.036 ms (3.608 ms / 100) 3.602 -> 3.606 ( +0.11%) [ +0.06% +0.00% +0.25% / +0.11% +0.86% +1.25%] index_add_ perm : Elapsed 0.036 ms (3.604 ms / 100) 3.604 -> 3.615 ( +0.31%) [ +0.06% +0.00% +0.00% / +0.31% +1.00% +0.86%] index_copy_ perm : Elapsed 0.036 ms (3.606 ms / 100) 3.603 -> 3.603 ( +0.00%) [ +0.17% +0.00% +0.17% / +0.00% +1.28% +1.39%] index_add_ perm_sorted : Elapsed 0.036 ms (3.609 ms / 100) 3.601 -> 3.606 ( +0.14%) [ +0.28% +0.00% +0.11% / +0.14% +1.08% +1.03%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.611 ms / 100) 3.656 -> 3.662 ( +0.16%) [ +0.03% +0.03% +0.00% / +0.16% +0.96% +0.90%] index_select const : Elapsed 0.037 ms (3.657 ms / 100) 3.663 -> 3.675 ( +0.33%) [ +0.25% +0.00% +0.22% / +0.33% +1.47% +1.53%] index_select wrap : Elapsed 0.037 ms (3.672 ms / 100) 3.663 -> 3.675 ( +0.33%) [ +0.35% +0.00% +0.08% / +0.33% +1.31% +1.20%] index_select linear : Elapsed 0.037 ms (3.676 ms / 100) 3.686 -> 3.648 ( -1.03%) [ +0.08% +0.00% +0.05% / +0.16% -1.00% -1.03%] index_select reverse : Elapsed 0.037 ms (3.689 ms / 100) 3.659 -> 3.660 ( +0.03%) [ +0.14% +0.00% +0.14% / +0.03% +0.60% +0.77%] index_select skip64 : Elapsed 0.037 ms (3.664 ms / 100) 3.657 -> 3.648 ( -0.25%) [ +0.08% +0.11% +0.00% / -0.25% +0.79% +0.88%] index_select skip256 : Elapsed 0.037 ms (3.660 ms / 100) 3.642 -> 3.643 ( +0.03%) [ +0.11% +0.00% +0.11% / +0.03% +1.07% +0.96%] index_select spread : Elapsed 0.036 ms (3.646 ms / 100) 3.675 -> 3.672 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.44% +0.52%] index_select strided 3 : Elapsed 0.037 ms (3.678 ms / 100) 3.681 -> 3.685 ( +0.11%) [ +0.22% +0.00% +0.08% / +0.11% +0.46% +0.60%] index_select random : Elapsed 0.037 ms (3.689 ms / 100) 3.658 -> 3.668 ( +0.27%) [ +0.25% +0.16% +0.00% / +0.27% +0.79% +0.85%] index_select random_sorted : Elapsed 0.037 ms (3.667 ms / 100) B = [20, 16, 40, 5] (stride (5, 100, 1600, 1)) A = [20, 16, 40, 4] (stride (2560, 4, 64, 1)) dim = 3 6.171 -> 6.169 ( -0.03%) [ +0.05% +0.02% +0.00% / -0.03% -0.02% +0.05%] index_add_ linear : Elapsed 0.062 ms (6.174 ms / 100) 6.130 -> 6.128 ( -0.03%) [ +0.00% +0.00% +0.03% / +0.03% -0.03% +0.02%] index_copy_ linear : Elapsed 0.061 ms (6.130 ms / 100) 6.165 -> 6.166 ( +0.02%) [ +0.06% +0.10% +0.00% / +0.10% +0.08% +0.02%] index_add_ reverse : Elapsed 0.062 ms (6.169 ms / 100) 6.128 -> 6.133 ( +0.08%) [ +0.11% +0.00% +0.03% / +0.23% +0.08% +0.11%] index_copy_ reverse : Elapsed 0.061 ms (6.135 ms / 100) 6.164 -> 6.168 ( +0.06%) [ +0.00% +0.16% +0.00% / +0.26% +0.13% +0.06%] index_add_ spread : Elapsed 0.062 ms (6.164 ms / 100) 6.129 -> 6.135 ( +0.10%) [ +0.00% +0.11% +0.00% / +0.10% +0.13% +0.10%] index_copy_ spread : Elapsed 0.061 ms (6.129 ms / 100) 6.163 -> 6.168 ( +0.08%) [ +0.06% +0.00% +0.11% / +0.08% +0.16% +0.08%] index_add_ strided 3 : Elapsed 0.062 ms (6.167 ms / 100) 6.129 -> 6.131 ( +0.03%) [ +0.00% +0.13% +0.03% / +0.08% +0.03% +0.05%] index_copy_ strided 3 : Elapsed 0.061 ms (6.129 ms / 100) 6.170 -> 6.169 ( -0.02%) [ +0.00% +0.02% +0.16% / +0.10% +0.02% -0.02%] index_add_ perm : Elapsed 0.062 ms (6.170 ms / 100) 6.132 -> 6.127 ( -0.08%) [ +0.00% +0.07% +0.07% / +0.00% -0.08% -0.03%] index_copy_ perm : Elapsed 0.061 ms (6.132 ms / 100) 6.171 -> 6.170 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.13% +0.00%] index_add_ perm_sorted : Elapsed 0.062 ms (6.171 ms / 100) 6.133 -> 6.129 ( -0.07%) [ +0.02% +0.03% +0.00% / +0.05% -0.02% -0.07%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.134 ms / 100) 6.448 -> 6.449 ( +0.02%) [ +0.00% +0.08% +0.05% / +0.02% +0.12% +0.03%] index_select const : Elapsed 0.064 ms (6.448 ms / 100) 6.443 -> 6.452 ( +0.14%) [ +0.06% +0.00% +0.03% / +0.14% +0.19% +0.20%] index_select wrap : Elapsed 0.064 ms (6.447 ms / 100) 6.449 -> 6.447 ( -0.03%) [ +0.14% +0.00% +0.08% / -0.02% +0.06% -0.03%] index_select linear : Elapsed 0.065 ms (6.458 ms / 100) 6.443 -> 6.446 ( +0.05%) [ +0.02% +0.00% +0.14% / +0.12% +0.05% +0.09%] index_select reverse : Elapsed 0.064 ms (6.444 ms / 100) 6.446 -> 6.447 ( +0.02%) [ +0.00% +0.06% +0.00% / +0.11% +0.08% +0.02%] index_select skip64 : Elapsed 0.064 ms (6.446 ms / 100) 6.442 -> 6.451 ( +0.14%) [ +0.00% +0.14% +0.14% / +0.20% +0.14% +0.16%] index_select skip256 : Elapsed 0.064 ms (6.442 ms / 100) 6.446 -> 6.442 ( -0.06%) [ +0.00% +0.03% +0.11% / -0.06% +0.11% +0.08%] index_select spread : Elapsed 0.064 ms (6.446 ms / 100) 6.443 -> 6.446 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.09% +0.19%] index_select strided 3 : Elapsed 0.064 ms (6.443 ms / 100) 6.444 -> 6.449 ( +0.08%) [ +0.00% +0.05% +0.11% / +0.14% +0.12% +0.08%] index_select random : Elapsed 0.064 ms (6.444 ms / 100) 6.448 -> 6.447 ( -0.02%) [ +0.06% +0.02% +0.00% / +0.09% +0.03% -0.02%] index_select random_sorted : Elapsed 0.065 ms (6.452 ms / 100) B = [20, 16, 40, 5] (stride (1, 100, 1600, 20)) A = [20, 16, 40, 4] (stride (2560, 160, 4, 1)) dim = 3 5.446 -> 5.456 ( +0.18%) [ +0.09% +0.00% +0.28% / +0.18% +1.03% +1.05%] index_add_ linear : Elapsed 0.055 ms (5.451 ms / 100) 5.357 -> 5.366 ( +0.17%) [ +0.00% +0.02% +0.21% / +0.17% +1.01% +0.91%] index_copy_ linear : Elapsed 0.054 ms (5.357 ms / 100) 5.447 -> 5.456 ( +0.17%) [ +0.07% +0.00% +0.07% / +0.17% +1.08% +1.03%] index_add_ reverse : Elapsed 0.055 ms (5.451 ms / 100) 5.357 -> 5.360 ( +0.06%) [ +0.06% +0.00% +0.04% / +0.06% +1.29% +1.03%] index_copy_ reverse : Elapsed 0.054 ms (5.360 ms / 100) 5.448 -> 5.458 ( +0.18%) [ +0.20% +0.00% +0.26% / +0.18% +0.97% +1.10%] index_add_ spread : Elapsed 0.055 ms (5.459 ms / 100) 5.357 -> 5.365 ( +0.15%) [ +0.00% +0.21% +0.17% / +0.15% +1.06% +1.06%] index_copy_ spread : Elapsed 0.054 ms (5.357 ms / 100) 5.463 -> 5.476 ( +0.24%) [ +0.13% +0.00% +0.31% / +0.24% +1.06% +1.03%] index_add_ strided 3 : Elapsed 0.055 ms (5.470 ms / 100) 5.373 -> 5.379 ( +0.11%) [ +0.09% +0.00% +0.09% / +0.11% +1.01% +1.08%] index_copy_ strided 3 : Elapsed 0.054 ms (5.378 ms / 100) 5.443 -> 5.454 ( +0.20%) [ +0.00% +0.13% +0.35% / +0.20% +1.51% +1.40%] index_add_ perm : Elapsed 0.054 ms (5.443 ms / 100) 5.357 -> 5.364 ( +0.13%) [ +0.15% +0.00% +0.15% / +0.13% +1.16% +1.23%] index_copy_ perm : Elapsed 0.054 ms (5.365 ms / 100) 5.440 -> 5.447 ( +0.13%) [ +0.00% +0.04% +0.11% / +0.13% +1.25% +1.21%] index_add_ perm_sorted : Elapsed 0.054 ms (5.440 ms / 100) 5.345 -> 5.356 ( +0.21%) [ +0.00% +0.06% +0.19% / +0.21% +1.23% +1.29%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.345 ms / 100) 5.633 -> 5.642 ( +0.16%) [ +0.00% +0.00% +0.11% / +0.16% +1.22% +1.22%] index_select const : Elapsed 0.056 ms (5.633 ms / 100) 5.629 -> 5.649 ( +0.36%) [ +0.00% +0.02% +0.21% / +0.36% +1.46% +1.42%] index_select wrap : Elapsed 0.056 ms (5.629 ms / 100) 5.631 -> 5.638 ( +0.12%) [ +0.00% +0.00% +0.18% / +0.12% +1.39% +1.49%] index_select linear : Elapsed 0.056 ms (5.631 ms / 100) 5.629 -> 5.640 ( +0.20%) [ +0.12% +0.00% +0.30% / +0.20% +1.40% +1.44%] index_select reverse : Elapsed 0.056 ms (5.636 ms / 100) 5.634 -> 5.645 ( +0.20%) [ +0.04% +0.00% +0.23% / +0.20% +1.51% +1.30%] index_select skip64 : Elapsed 0.056 ms (5.636 ms / 100) 5.626 -> 5.641 ( +0.27%) [ +0.00% +0.04% +0.30% / +0.27% +1.32% +1.23%] index_select skip256 : Elapsed 0.056 ms (5.626 ms / 100) 5.638 -> 5.647 ( +0.16%) [ +0.00% +0.02% +0.11% / +0.16% +1.29% +1.24%] index_select spread : Elapsed 0.056 ms (5.638 ms / 100) 5.627 -> 5.634 ( +0.12%) [ +0.11% +0.00% +0.25% / +0.12% +1.39% +1.44%] index_select strided 3 : Elapsed 0.056 ms (5.633 ms / 100) 5.637 -> 5.645 ( +0.14%) [ +0.00% +0.05% +0.11% / +0.14% +1.26% +1.35%] index_select random : Elapsed 0.056 ms (5.637 ms / 100) 5.628 -> 5.641 ( +0.23%) [ +0.00% +0.14% +0.23% / +0.23% +1.40% +1.35%] index_select random_sorted : Elapsed 0.056 ms (5.628 ms / 100) B = [20, 16, 40, 5] (stride (16, 1, 1600, 320)) A = [20, 16, 40, 4] (stride (160, 3200, 4, 1)) dim = 3 5.673 -> 5.654 ( -0.33%) [ +0.00% +0.09% +0.14% / +0.09% -0.23% -0.33%] index_add_ linear : Elapsed 0.057 ms (5.673 ms / 100) 5.589 -> 5.584 ( -0.09%) [ +0.00% +0.09% +0.09% / +0.16% -0.09% -0.05%] index_copy_ linear : Elapsed 0.056 ms (5.589 ms / 100) 5.671 -> 5.655 ( -0.28%) [ +0.00% +0.02% +0.09% / +0.14% -0.28% -0.25%] index_add_ reverse : Elapsed 0.057 ms (5.671 ms / 100) 5.589 -> 5.576 ( -0.23%) [ +0.09% +0.00% +0.16% / +0.07% -0.23% -0.23%] index_copy_ reverse : Elapsed 0.056 ms (5.594 ms / 100) 5.671 -> 5.648 ( -0.41%) [ +0.02% +0.00% +0.02% / +0.00% -0.26% -0.41%] index_add_ spread : Elapsed 0.057 ms (5.672 ms / 100) 5.583 -> 5.571 ( -0.21%) [ +0.00% +0.07% +0.02% / +0.07% -0.21% -0.16%] index_copy_ spread : Elapsed 0.056 ms (5.583 ms / 100) 5.673 -> 5.645 ( -0.49%) [ +0.02% +0.02% +0.00% / +0.05% -0.49% -0.48%] index_add_ strided 3 : Elapsed 0.057 ms (5.674 ms / 100) 5.580 -> 5.567 ( -0.23%) [ +0.20% +0.00% +0.00% / +0.07% -0.20% -0.23%] index_copy_ strided 3 : Elapsed 0.056 ms (5.591 ms / 100) 5.676 -> 5.651 ( -0.44%) [ +0.00% +0.00% +0.05% / +0.07% -0.44% -0.33%] index_add_ perm : Elapsed 0.057 ms (5.676 ms / 100) 5.585 -> 5.566 ( -0.34%) [ +0.11% +0.00% +0.16% / +0.13% -0.34% -0.29%] index_copy_ perm : Elapsed 0.056 ms (5.591 ms / 100) 5.674 -> 5.663 ( -0.19%) [ +0.00% +0.07% +0.04% / +0.09% -0.02% -0.19%] index_add_ perm_sorted : Elapsed 0.057 ms (5.674 ms / 100) 5.581 -> 5.585 ( +0.07%) [ +0.09% +0.00% +0.18% / +0.18% +0.18% +0.07%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.586 ms / 100) 5.943 -> 5.929 ( -0.24%) [ +0.07% +0.00% +0.07% / +0.03% -0.24% -0.15%] index_select const : Elapsed 0.059 ms (5.947 ms / 100) 5.943 -> 5.932 ( -0.19%) [ +0.00% +0.08% +0.05% / +0.02% -0.19% -0.08%] index_select wrap : Elapsed 0.059 ms (5.943 ms / 100) 5.937 -> 5.930 ( -0.12%) [ +0.08% +0.00% +0.12% / +0.13% -0.10% -0.12%] index_select linear : Elapsed 0.059 ms (5.942 ms / 100) 5.936 -> 5.921 ( -0.25%) [ +0.05% +0.05% +0.00% / +0.07% -0.20% -0.25%] index_select reverse : Elapsed 0.059 ms (5.939 ms / 100) 5.934 -> 5.915 ( -0.32%) [ +0.08% +0.05% +0.00% / +0.07% -0.32% -0.30%] index_select skip64 : Elapsed 0.059 ms (5.939 ms / 100) 5.945 -> 5.931 ( -0.24%) [ +0.00% +0.02% +0.03% / +0.08% -0.24% -0.24%] index_select skip256 : Elapsed 0.059 ms (5.945 ms / 100) 5.929 -> 5.912 ( -0.29%) [ +0.05% +0.00% +0.10% / +0.13% -0.29% -0.27%] index_select spread : Elapsed 0.059 ms (5.932 ms / 100) 5.930 -> 5.921 ( -0.15%) [ +0.03% +0.00% +0.10% / +0.13% -0.13% -0.15%] index_select strided 3 : Elapsed 0.059 ms (5.932 ms / 100) 5.940 -> 5.930 ( -0.17%) [ +0.03% +0.00% +0.05% / +0.05% -0.15% -0.17%] index_select random : Elapsed 0.059 ms (5.942 ms / 100) 5.947 -> 5.927 ( -0.34%) [ +0.02% +0.00% +0.07% / +0.08% -0.30% -0.34%] index_select random_sorted : Elapsed 0.059 ms (5.948 ms / 100) B = [20, 16, 40, 5] (stride (640, 1, 16, 12800)) A = [20, 16, 40, 4] (stride (40, 800, 1, 12800)) dim = 3 5.821 -> 5.803 ( -0.31%) [ +0.00% +0.02% +0.00% / -0.05% -0.29% -0.31%] index_add_ linear : Elapsed 0.058 ms (5.821 ms / 100) 5.763 -> 5.751 ( -0.21%) [ +0.00% +0.10% +0.14% / +0.07% -0.03% -0.21%] index_copy_ linear : Elapsed 0.058 ms (5.763 ms / 100) 5.809 -> 5.801 ( -0.14%) [ +0.10% +0.02% +0.00% / +0.12% -0.09% -0.14%] index_add_ reverse : Elapsed 0.058 ms (5.815 ms / 100) 5.752 -> 5.742 ( -0.17%) [ +0.07% +0.00% +0.05% / +0.21% -0.17% -0.07%] index_copy_ reverse : Elapsed 0.058 ms (5.756 ms / 100) 5.818 -> 5.801 ( -0.29%) [ +0.00% +0.00% +0.00% / -0.07% -0.19% -0.29%] index_add_ spread : Elapsed 0.058 ms (5.818 ms / 100) 5.766 -> 5.754 ( -0.21%) [ +0.00% +0.12% +0.03% / +0.05% -0.21% -0.21%] index_copy_ spread : Elapsed 0.058 ms (5.766 ms / 100) 5.804 -> 5.810 ( +0.10%) [ +0.02% +0.00% +0.09% / +0.17% +0.10% +0.17%] index_add_ strided 3 : Elapsed 0.058 ms (5.805 ms / 100) 5.749 -> 5.759 ( +0.17%) [ +0.10% +0.00% +0.10% / +0.17% +0.28% +0.26%] index_copy_ strided 3 : Elapsed 0.058 ms (5.755 ms / 100) 5.799 -> 5.807 ( +0.14%) [ +0.12% +0.02% +0.00% / +0.14% +0.43% +0.47%] index_add_ perm : Elapsed 0.058 ms (5.806 ms / 100) 5.748 -> 5.749 ( +0.02%) [ +0.00% +0.09% +0.07% / +0.02% +0.35% +0.38%] index_copy_ perm : Elapsed 0.057 ms (5.748 ms / 100) 5.815 -> 5.815 ( +0.00%) [ +0.00% +0.03% +0.15% / +0.07% +0.00% +0.00%] index_add_ perm_sorted : Elapsed 0.058 ms (5.815 ms / 100) 5.764 -> 5.761 ( -0.05%) [ +0.16% +0.16% +0.00% / +0.29% +0.07% -0.05%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.773 ms / 100) 6.038 -> 6.027 ( -0.18%) [ +0.07% +0.00% +0.13% / +0.10% -0.18% -0.08%] index_select const : Elapsed 0.060 ms (6.042 ms / 100) 6.130 -> 6.120 ( -0.16%) [ +0.21% +0.02% +0.00% / +0.15% -0.10% -0.16%] index_select wrap : Elapsed 0.061 ms (6.143 ms / 100) 6.113 -> 6.110 ( -0.05%) [ +0.00% +0.05% +0.13% / +0.15% +0.02% -0.05%] index_select linear : Elapsed 0.061 ms (6.113 ms / 100) 6.104 -> 6.108 ( +0.07%) [ +0.00% +0.00% +0.10% / +0.07% +0.10% +0.08%] index_select reverse : Elapsed 0.061 ms (6.104 ms / 100) 6.048 -> 6.047 ( -0.02%) [ +0.00% +0.13% +0.05% / +0.05% -0.02% +0.08%] index_select skip64 : Elapsed 0.060 ms (6.048 ms / 100) 6.031 -> 6.028 ( -0.05%) [ +0.00% +0.15% +0.22% / +0.13% +0.02% -0.05%] index_select skip256 : Elapsed 0.060 ms (6.031 ms / 100) 6.149 -> 6.131 ( -0.29%) [ +0.18% +0.00% +0.03% / +0.13% -0.20% -0.29%] index_select spread : Elapsed 0.062 ms (6.160 ms / 100) 6.117 -> 6.105 ( -0.20%) [ +0.11% +0.02% +0.00% / +0.11% -0.20% -0.16%] index_select strided 3 : Elapsed 0.061 ms (6.124 ms / 100) 6.148 -> 6.133 ( -0.24%) [ +0.05% +0.05% +0.00% / +0.13% -0.03% -0.24%] index_select random : Elapsed 0.062 ms (6.151 ms / 100) 6.139 -> 6.119 ( -0.33%) [ +0.05% +0.02% +0.00% / +0.02% -0.33% -0.31%] index_select random_sorted : Elapsed 0.061 ms (6.142 ms / 100) B = [20, 16, 40, 5] (stride (40, 800, 1, 12800)) A = [20, 16, 40, 4] (stride (64, 1, 1280, 16)) dim = 3 5.852 -> 5.829 ( -0.39%) [ +0.09% +0.00% +0.05% / +0.17% -0.31% -0.39%] index_add_ linear : Elapsed 0.059 ms (5.857 ms / 100) 5.800 -> 5.769 ( -0.53%) [ +0.09% +0.00% +0.03% / +0.09% -0.52% -0.53%] index_copy_ linear : Elapsed 0.058 ms (5.805 ms / 100) 5.847 -> 5.833 ( -0.24%) [ +0.00% +0.14% +0.17% / +0.15% -0.24% -0.22%] index_add_ reverse : Elapsed 0.058 ms (5.847 ms / 100) 5.792 -> 5.776 ( -0.28%) [ +0.00% +0.02% +0.09% / +0.19% -0.22% -0.28%] index_copy_ reverse : Elapsed 0.058 ms (5.792 ms / 100) 5.858 -> 5.830 ( -0.48%) [ +0.07% +0.03% +0.00% / +0.02% -0.39% -0.48%] index_add_ spread : Elapsed 0.059 ms (5.862 ms / 100) 5.801 -> 5.776 ( -0.43%) [ +0.00% +0.12% +0.05% / +0.10% -0.43% -0.40%] index_copy_ spread : Elapsed 0.058 ms (5.801 ms / 100) 5.847 -> 5.832 ( -0.26%) [ +0.10% +0.10% +0.00% / -0.02% -0.26% -0.24%] index_add_ strided 3 : Elapsed 0.059 ms (5.853 ms / 100) 5.790 -> 5.779 ( -0.19%) [ +0.00% +0.07% +0.00% / +0.03% -0.19% -0.17%] index_copy_ strided 3 : Elapsed 0.058 ms (5.790 ms / 100) 5.851 -> 5.836 ( -0.26%) [ +0.00% +0.02% +0.09% / +0.03% -0.14% -0.26%] index_add_ perm : Elapsed 0.059 ms (5.851 ms / 100) 5.794 -> 5.768 ( -0.45%) [ +0.03% +0.00% +0.16% / +0.14% -0.45% -0.41%] index_copy_ perm : Elapsed 0.058 ms (5.796 ms / 100) 5.856 -> 5.832 ( -0.41%) [ +0.03% +0.00% +0.27% / +0.07% -0.29% -0.41%] index_add_ perm_sorted : Elapsed 0.059 ms (5.858 ms / 100) 5.799 -> 5.776 ( -0.40%) [ +0.00% +0.05% +0.21% / +0.28% -0.40% -0.38%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.799 ms / 100) 6.106 -> 6.082 ( -0.39%) [ +0.03% +0.00% +0.15% / +0.07% -0.39% -0.31%] index_select const : Elapsed 0.061 ms (6.108 ms / 100) 6.190 -> 6.154 ( -0.58%) [ +0.00% +0.02% +0.10% / +0.08% -0.55% -0.58%] index_select wrap : Elapsed 0.062 ms (6.190 ms / 100) 6.179 -> 6.136 ( -0.70%) [ +0.02% +0.05% +0.00% / -0.03% -0.70% -0.63%] index_select linear : Elapsed 0.062 ms (6.180 ms / 100) 6.171 -> 6.132 ( -0.63%) [ +0.13% +0.00% +0.03% / +0.11% -0.50% -0.63%] index_select reverse : Elapsed 0.062 ms (6.179 ms / 100) 6.107 -> 6.077 ( -0.49%) [ +0.03% +0.02% +0.00% / +0.07% -0.36% -0.49%] index_select skip64 : Elapsed 0.061 ms (6.109 ms / 100) 6.106 -> 6.084 ( -0.36%) [ +0.02% +0.00% +0.05% / -0.05% -0.34% -0.36%] index_select skip256 : Elapsed 0.061 ms (6.107 ms / 100) 6.175 -> 6.136 ( -0.63%) [ +0.02% +0.00% +0.08% / +0.18% -0.60% -0.63%] index_select spread : Elapsed 0.062 ms (6.176 ms / 100) 6.181 -> 6.158 ( -0.37%) [ +0.21% +0.00% +0.15% / +0.21% -0.36% -0.37%] index_select strided 3 : Elapsed 0.062 ms (6.194 ms / 100) 6.150 -> 6.119 ( -0.50%) [ +0.00% +0.02% +0.20% / +0.26% -0.50% -0.46%] index_select random : Elapsed 0.062 ms (6.150 ms / 100) 6.148 -> 6.123 ( -0.41%) [ +0.00% +0.05% +0.11% / +0.21% -0.41% -0.34%] index_select random_sorted : Elapsed 0.061 ms (6.148 ms / 100) B = [20, 16, 40, 5] (stride (1, 800, 20, 12800)) A = [20, 16, 40, 4] (stride (160, 3200, 4, 1)) dim = 3 5.616 -> 5.623 ( +0.12%) [ +0.00% +0.21% +0.23% / +0.12% +0.37% +0.45%] index_add_ linear : Elapsed 0.056 ms (5.616 ms / 100) 5.556 -> 5.560 ( +0.07%) [ +0.13% +0.11% +0.00% / +0.07% +0.32% +0.43%] index_copy_ linear : Elapsed 0.056 ms (5.563 ms / 100) 5.613 -> 5.613 ( +0.00%) [ +0.07% +0.04% +0.00% / +0.00% +0.48% +0.43%] index_add_ reverse : Elapsed 0.056 ms (5.617 ms / 100) 5.552 -> 5.558 ( +0.11%) [ +0.00% +0.04% +0.13% / +0.11% +0.38% +0.45%] index_copy_ reverse : Elapsed 0.056 ms (5.552 ms / 100) 5.618 -> 5.627 ( +0.16%) [ +0.11% +0.00% +0.18% / +0.16% +0.32% +0.34%] index_add_ spread : Elapsed 0.056 ms (5.624 ms / 100) 5.558 -> 5.561 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.38% +0.23%] index_copy_ spread : Elapsed 0.056 ms (5.559 ms / 100) 5.633 -> 5.647 ( +0.25%) [ +0.00% +0.05% +0.21% / +0.25% +0.51% +0.53%] index_add_ strided 3 : Elapsed 0.056 ms (5.633 ms / 100) 5.572 -> 5.574 ( +0.04%) [ +0.07% +0.00% +0.14% / +0.04% +0.56% +0.48%] index_copy_ strided 3 : Elapsed 0.056 ms (5.576 ms / 100) 5.638 -> 5.647 ( +0.16%) [ +0.00% +0.04% +0.27% / +0.16% +0.69% +0.69%] index_add_ perm : Elapsed 0.056 ms (5.638 ms / 100) 5.585 -> 5.591 ( +0.11%) [ +0.00% +0.05% +0.11% / +0.11% +0.54% +0.50%] index_copy_ perm : Elapsed 0.056 ms (5.585 ms / 100) 5.631 -> 5.641 ( +0.18%) [ +0.00% +0.05% +0.07% / +0.18% +0.43% +0.27%] index_add_ perm_sorted : Elapsed 0.056 ms (5.631 ms / 100) 5.566 -> 5.574 ( +0.14%) [ +0.00% +0.04% +0.20% / +0.14% +0.41% +0.43%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.566 ms / 100) 5.909 -> 5.919 ( +0.17%) [ +0.05% +0.00% +0.37% / +0.17% +0.59% +0.54%] index_select const : Elapsed 0.059 ms (5.912 ms / 100) 5.908 -> 5.918 ( +0.17%) [ +0.03% +0.00% +0.25% / +0.17% +0.54% +0.58%] index_select wrap : Elapsed 0.059 ms (5.910 ms / 100) 5.908 -> 5.923 ( +0.25%) [ +0.10% +0.00% +0.15% / +0.25% +0.54% +0.59%] index_select linear : Elapsed 0.059 ms (5.914 ms / 100) 5.908 -> 5.917 ( +0.15%) [ +0.00% +0.14% +0.17% / +0.15% +0.47% +0.61%] index_select reverse : Elapsed 0.059 ms (5.908 ms / 100) 5.930 -> 5.933 ( +0.05%) [ +0.02% +0.00% +0.07% / +0.05% +0.40% +0.46%] index_select skip64 : Elapsed 0.059 ms (5.931 ms / 100) 5.907 -> 5.923 ( +0.27%) [ +0.00% +0.22% +0.27% / +0.27% +0.58% +0.52%] index_select skip256 : Elapsed 0.059 ms (5.907 ms / 100) 5.925 -> 5.938 ( +0.22%) [ +0.12% +0.00% +0.30% / +0.22% +0.46% +0.56%] index_select spread : Elapsed 0.059 ms (5.932 ms / 100) 5.909 -> 5.917 ( +0.14%) [ +0.00% +0.03% +0.17% / +0.14% +0.59% +0.69%] index_select strided 3 : Elapsed 0.059 ms (5.909 ms / 100) 5.930 -> 5.935 ( +0.08%) [ +0.02% +0.00% +0.08% / +0.08% +0.46% +0.34%] index_select random : Elapsed 0.059 ms (5.931 ms / 100) 5.913 -> 5.924 ( +0.19%) [ +0.03% +0.00% +0.25% / +0.19% +0.52% +0.58%] index_select random_sorted : Elapsed 0.059 ms (5.915 ms / 100) B = [20, 16, 40, 5] (stride (1, 20, 320, 12800)) dim = 3 fill_cnt = 4 3.549 -> 3.555 ( +0.17%) [ +0.00% +0.28% +0.08% / +0.17% +0.25% +0.42%] index_fill_ const : Elapsed 0.035 ms (3.549 ms / 100) 3.571 -> 3.572 ( +0.03%) [ +0.14% +0.08% +0.00% / +0.03% +0.14% +0.31%] index_fill_ linear : Elapsed 0.036 ms (3.576 ms / 100) 3.554 -> 3.564 ( +0.28%) [ +0.20% +0.00% +0.08% / +0.31% +0.28% +0.51%] index_fill_ reverse : Elapsed 0.036 ms (3.561 ms / 100) 3.532 -> 3.536 ( +0.11%) [ +0.17% +0.14% +0.00% / +0.20% +0.11% +0.37%] index_fill_ skip64 : Elapsed 0.035 ms (3.538 ms / 100) 3.536 -> 3.545 ( +0.25%) [ +0.08% +0.23% +0.00% / +0.25% +0.42% +0.34%] index_fill_ skip256 : Elapsed 0.035 ms (3.539 ms / 100) 3.563 -> 3.562 ( -0.03%) [ +0.06% +0.08% +0.00% / -0.03% +0.34% +0.14%] index_fill_ spread : Elapsed 0.036 ms (3.565 ms / 100) 3.573 -> 3.577 ( +0.11%) [ +0.00% +0.17% +0.11% / +0.11% +0.20% +0.20%] index_fill_ strided 3 : Elapsed 0.036 ms (3.573 ms / 100) 3.552 -> 3.556 ( +0.11%) [ +0.11% +0.00% +0.14% / +0.11% +0.20% +0.11%] index_fill_ random : Elapsed 0.036 ms (3.556 ms / 100) 3.536 -> 3.540 ( +0.11%) [ +0.00% +0.08% +0.08% / +0.11% +0.23% +0.25%] index_fill_ random_sorted : Elapsed 0.035 ms (3.536 ms / 100) 3.554 -> 3.553 ( -0.03%) [ +0.08% +0.00% +0.28% / -0.03% +0.25% +0.31%] index_fill_ perm : Elapsed 0.036 ms (3.557 ms / 100) 3.555 -> 3.555 ( +0.00%) [ +0.00% +0.20% +0.03% / +0.00% +0.23% +0.08%] index_fill_ perm_sorted : Elapsed 0.036 ms (3.555 ms / 100) B = [20, 16, 40, 5] (stride (1, 20, 320, 12800)) A = [20, 16, 40, 4] (stride (1, 800, 20, 12800)) dim = 3 5.601 -> 5.548 ( -0.95%) [ +0.00% +0.11% +0.30% / -0.02% -0.95% -0.79%] index_add_ linear : Elapsed 0.056 ms (5.601 ms / 100) 5.552 -> 5.504 ( -0.86%) [ +0.09% +0.02% +0.00% / -0.04% -0.77% -0.86%] index_copy_ linear : Elapsed 0.056 ms (5.557 ms / 100) 5.591 -> 5.546 ( -0.80%) [ +0.16% +0.30% +0.00% / +0.18% -0.79% -0.80%] index_add_ reverse : Elapsed 0.056 ms (5.600 ms / 100) 5.547 -> 5.503 ( -0.79%) [ +0.00% +0.14% +0.04% / -0.05% -0.79% -0.79%] index_copy_ reverse : Elapsed 0.055 ms (5.547 ms / 100) 5.610 -> 5.554 ( -1.00%) [ +0.00% +0.09% +0.09% / +0.00% -1.00% -0.91%] index_add_ spread : Elapsed 0.056 ms (5.610 ms / 100) 5.549 -> 5.512 ( -0.67%) [ +0.00% +0.07% +0.05% / +0.16% -0.67% -0.63%] index_copy_ spread : Elapsed 0.055 ms (5.549 ms / 100) 5.577 -> 5.554 ( -0.41%) [ +0.00% +0.04% +0.05% / +0.02% -0.41% -0.20%] index_add_ strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.525 -> 5.498 ( -0.49%) [ +0.00% +0.11% +0.09% / -0.07% -0.49% +0.13%] index_copy_ strided 3 : Elapsed 0.055 ms (5.525 ms / 100) 5.607 -> 5.563 ( -0.78%) [ +0.16% +0.07% +0.00% / +0.11% -0.78% -0.61%] index_add_ perm : Elapsed 0.056 ms (5.616 ms / 100) 5.555 -> 5.505 ( -0.90%) [ +0.14% +0.00% +0.00% / -0.07% -0.90% -0.67%] index_copy_ perm : Elapsed 0.056 ms (5.563 ms / 100) 5.602 -> 5.555 ( -0.84%) [ +0.02% +0.12% +0.00% / +0.12% -0.84% -0.70%] index_add_ perm_sorted : Elapsed 0.056 ms (5.603 ms / 100) 5.553 -> 5.500 ( -0.95%) [ +0.14% +0.00% +0.14% / +0.07% -0.85% -0.95%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.561 ms / 100) 5.811 -> 5.804 ( -0.12%) [ +0.09% +0.00% +0.00% / +0.03% +0.03% -0.12%] index_select const : Elapsed 0.058 ms (5.816 ms / 100) 5.878 -> 5.830 ( -0.82%) [ +0.07% +0.10% +0.00% / +0.10% -0.82% -0.70%] index_select wrap : Elapsed 0.059 ms (5.882 ms / 100) 5.872 -> 5.829 ( -0.73%) [ +0.00% +0.00% +0.03% / +0.09% -0.73% -0.72%] index_select linear : Elapsed 0.059 ms (5.872 ms / 100) 5.869 -> 5.820 ( -0.83%) [ +0.00% +0.09% +0.05% / +0.05% -0.83% -0.66%] index_select reverse : Elapsed 0.059 ms (5.869 ms / 100) 5.811 -> 5.803 ( -0.14%) [ +0.02% +0.00% +0.09% / +0.00% -0.14% -0.12%] index_select skip64 : Elapsed 0.058 ms (5.812 ms / 100) 5.809 -> 5.801 ( -0.14%) [ +0.07% +0.02% +0.00% / +0.10% -0.14% -0.14%] index_select skip256 : Elapsed 0.058 ms (5.813 ms / 100) 5.873 -> 5.826 ( -0.80%) [ +0.10% +0.00% +0.12% / +0.12% -0.63% -0.80%] index_select spread : Elapsed 0.059 ms (5.879 ms / 100) 5.875 -> 5.828 ( -0.80%) [ +0.05% +0.00% +0.05% / +0.02% -0.80% -0.75%] index_select strided 3 : Elapsed 0.059 ms (5.878 ms / 100) 5.863 -> 5.819 ( -0.75%) [ +0.20% +0.00% +0.03% / +0.09% -0.75% -0.70%] index_select random : Elapsed 0.059 ms (5.875 ms / 100) 5.878 -> 5.823 ( -0.94%) [ +0.10% +0.07% +0.00% / +0.03% -0.94% -0.88%] index_select random_sorted : Elapsed 0.059 ms (5.884 ms / 100) out_shape = [5, 40, 4, 16] in_shape = [20, 40, 4, 16] idx_dim = 0 B = [5, 40, 4, 16] (stride (2560, 64, 16, 1)) A = [20, 40, 4, 16] (stride (1, 20, 800, 3200)) dim = 0 1.691 -> 1.690 ( -0.06%) [ +0.18% +0.00% +0.41% / -0.06% +0.77% +0.71%] index_select const : Elapsed 0.017 ms (1.694 ms / 100) 1.702 -> 1.705 ( +0.18%) [ +0.00% +0.24% +0.18% / +0.18% +0.65% +0.71%] index_select wrap : Elapsed 0.017 ms (1.702 ms / 100) 1.704 -> 1.706 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.12% +0.59% +0.59%] index_select linear : Elapsed 0.017 ms (1.706 ms / 100) 1.707 -> 1.709 ( +0.12%) [ +0.00% +0.18% +0.12% / +0.12% +0.53% +0.47%] index_select reverse : Elapsed 0.017 ms (1.707 ms / 100) 1.693 -> 1.696 ( +0.18%) [ +0.06% +0.18% +0.00% / +0.18% +0.59% +0.41%] index_select skip64 : Elapsed 0.017 ms (1.694 ms / 100) 1.693 -> 1.692 ( -0.06%) [ +0.12% +0.06% +0.00% / -0.06% +0.59% +0.71%] index_select skip256 : Elapsed 0.017 ms (1.695 ms / 100) 1.718 -> 1.719 ( +0.06%) [ +0.29% +0.00% +0.29% / +0.06% +0.58% +0.64%] index_select spread : Elapsed 0.017 ms (1.723 ms / 100) 1.719 -> 1.721 ( +0.12%) [ +0.17% +0.00% +0.06% / +0.12% +0.76% +0.35%] index_select strided 3 : Elapsed 0.017 ms (1.722 ms / 100) 1.720 -> 1.721 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.70% +0.70%] index_select strided 5 : Elapsed 0.017 ms (1.720 ms / 100) 1.717 -> 1.717 ( +0.00%) [ +0.00% +0.12% +0.06% / +0.00% +0.47% +0.58%] index_select strided 7 : Elapsed 0.017 ms (1.717 ms / 100) 1.716 -> 1.718 ( +0.12%) [ +0.17% +0.00% +0.06% / +0.12% +0.76% +0.93%] index_select strided 8 : Elapsed 0.017 ms (1.719 ms / 100) 1.720 -> 1.722 ( +0.12%) [ +0.23% +0.00% +0.00% / +0.12% +0.70% +0.99%] index_select strided 16 : Elapsed 0.017 ms (1.724 ms / 100) 1.715 -> 1.716 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.23% +0.47%] index_select random : Elapsed 0.017 ms (1.716 ms / 100) 1.719 -> 1.720 ( +0.06%) [ +0.00% +0.17% +0.23% / +0.06% +0.41% +0.58%] index_select random_sorted : Elapsed 0.017 ms (1.719 ms / 100) 1.724 -> 1.727 ( +0.17%) [ +0.23% +0.06% +0.00% / +0.17% +0.52% +0.41%] index_select perm : Elapsed 0.017 ms (1.728 ms / 100) 1.716 -> 1.717 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.41% +0.47%] index_select perm_sorted : Elapsed 0.017 ms (1.717 ms / 100) B = [5, 40, 4, 16] (stride (2560, 16, 640, 1)) A = [20, 40, 4, 16] (stride (4, 1280, 1, 80)) dim = 0 1.838 -> 1.836 ( -0.11%) [ +0.16% +0.27% +0.00% / +0.27% +0.05% -0.11%] index_select const : Elapsed 0.018 ms (1.841 ms / 100) 1.847 -> 1.847 ( +0.00%) [ +0.05% +0.00% +0.11% / +0.00% +0.65% +0.43%] index_select wrap : Elapsed 0.018 ms (1.848 ms / 100) 1.846 -> 1.847 ( +0.05%) [ +0.00% +0.00% +0.11% / +0.05% +0.33% +0.38%] index_select linear : Elapsed 0.018 ms (1.846 ms / 100) 1.843 -> 1.842 ( -0.05%) [ +0.00% +0.22% +0.00% / +0.05% +0.27% -0.05%] index_select reverse : Elapsed 0.018 ms (1.843 ms / 100) 1.828 -> 1.829 ( +0.05%) [ +0.16% +0.11% +0.00% / +0.05% +0.66% +0.38%] index_select skip64 : Elapsed 0.018 ms (1.831 ms / 100) 1.836 -> 1.837 ( +0.05%) [ +0.00% +0.11% +0.00% / +0.05% +0.54% +0.44%] index_select skip256 : Elapsed 0.018 ms (1.836 ms / 100) 1.853 -> 1.859 ( +0.32%) [ +0.27% +0.00% +0.22% / +0.32% +0.59% +0.54%] index_select spread : Elapsed 0.019 ms (1.858 ms / 100) 1.864 -> 1.860 ( -0.21%) [ +0.00% +0.11% +0.00% / -0.21% +0.59% +0.70%] index_select strided 3 : Elapsed 0.019 ms (1.864 ms / 100) 1.857 -> 1.860 ( +0.16%) [ +0.16% +0.11% +0.00% / +0.16% +0.16% +0.59%] index_select strided 5 : Elapsed 0.019 ms (1.860 ms / 100) 1.849 -> 1.848 ( -0.05%) [ +0.16% +0.00% +0.05% / -0.05% +0.22% +0.38%] index_select strided 7 : Elapsed 0.019 ms (1.852 ms / 100) 1.850 -> 1.848 ( -0.11%) [ +0.11% +0.11% +0.00% / -0.11% +0.49% +0.54%] index_select strided 8 : Elapsed 0.019 ms (1.852 ms / 100) 1.855 -> 1.853 ( -0.11%) [ +0.16% +0.11% +0.00% / -0.11% +0.38% +0.43%] index_select strided 16 : Elapsed 0.019 ms (1.858 ms / 100) 1.857 -> 1.858 ( +0.05%) [ +0.11% +0.05% +0.00% / +0.05% +0.27% +0.22%] index_select random : Elapsed 0.019 ms (1.859 ms / 100) 1.858 -> 1.859 ( +0.05%) [ +0.22% +0.00% +0.05% / +0.11% +0.05% +0.22%] index_select random_sorted : Elapsed 0.019 ms (1.862 ms / 100) 1.859 -> 1.863 ( +0.22%) [ +0.16% +0.32% +0.00% / +0.22% +0.54% +0.43%] index_select perm : Elapsed 0.019 ms (1.862 ms / 100) 1.865 -> 1.872 ( +0.38%) [ +0.27% +0.05% +0.00% / +0.38% +0.38% +0.48%] index_select perm_sorted : Elapsed 0.019 ms (1.870 ms / 100) B = [5, 40, 4, 16] (stride (2560, 1, 640, 40)) A = [20, 40, 4, 16] (stride (1, 20, 800, 3200)) dim = 0 1.810 -> 1.813 ( +0.17%) [ +0.22% +0.39% +0.00% / +0.17% +0.66% +0.44%] index_select const : Elapsed 0.018 ms (1.814 ms / 100) 1.820 -> 1.819 ( -0.05%) [ +0.00% +0.27% +0.05% / -0.05% +0.60% +0.44%] index_select wrap : Elapsed 0.018 ms (1.820 ms / 100) 1.818 -> 1.824 ( +0.33%) [ +0.17% +0.11% +0.00% / +0.33% +0.55% +0.55%] index_select linear : Elapsed 0.018 ms (1.821 ms / 100) 1.826 -> 1.827 ( +0.05%) [ +0.16% +0.00% +0.05% / +0.05% +0.33% +0.27%] index_select reverse : Elapsed 0.018 ms (1.829 ms / 100) 1.812 -> 1.817 ( +0.28%) [ +0.06% +0.28% +0.00% / +0.28% +0.55% +0.50%] index_select skip64 : Elapsed 0.018 ms (1.813 ms / 100) 1.812 -> 1.812 ( +0.00%) [ +0.06% +0.11% +0.00% / +0.00% +0.33% +0.61%] index_select skip256 : Elapsed 0.018 ms (1.813 ms / 100) 1.838 -> 1.839 ( +0.05%) [ +0.11% +0.00% +0.11% / +0.05% +0.44% +0.44%] index_select spread : Elapsed 0.018 ms (1.840 ms / 100) 1.839 -> 1.842 ( +0.16%) [ +0.11% +0.05% +0.00% / +0.16% +0.38% +0.33%] index_select strided 3 : Elapsed 0.018 ms (1.841 ms / 100) 1.840 -> 1.839 ( -0.05%) [ +0.11% +0.00% +0.11% / -0.05% +0.54% +0.43%] index_select strided 5 : Elapsed 0.018 ms (1.842 ms / 100) 1.837 -> 1.837 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.27% +0.38%] index_select strided 7 : Elapsed 0.018 ms (1.838 ms / 100) 1.839 -> 1.839 ( +0.00%) [ +0.05% +0.16% +0.00% / +0.00% +0.22% +0.33%] index_select strided 8 : Elapsed 0.018 ms (1.840 ms / 100) 1.839 -> 1.838 ( -0.05%) [ +0.05% +0.00% +0.11% / -0.05% +0.33% +0.49%] index_select strided 16 : Elapsed 0.018 ms (1.840 ms / 100) 1.827 -> 1.830 ( +0.16%) [ +0.00% +0.16% +0.05% / +0.16% +0.44% +0.38%] index_select random : Elapsed 0.018 ms (1.827 ms / 100) 1.830 -> 1.830 ( +0.00%) [ +0.00% +0.11% +0.05% / +0.00% +0.55% +0.44%] index_select random_sorted : Elapsed 0.018 ms (1.830 ms / 100) 1.838 -> 1.840 ( +0.11%) [ +0.05% +0.00% +0.00% / +0.11% +0.49% +0.44%] index_select perm : Elapsed 0.018 ms (1.839 ms / 100) 1.835 -> 1.836 ( +0.05%) [ +0.00% +0.27% +0.00% / +0.05% +0.49% +0.60%] index_select perm_sorted : Elapsed 0.018 ms (1.835 ms / 100) B = [5, 40, 4, 16] (stride (64, 320, 1, 4)) A = [20, 40, 4, 16] (stride (2560, 1, 640, 40)) dim = 0 1.785 -> 1.788 ( +0.17%) [ +0.22% +0.06% +0.00% / +0.17% +0.17% +0.17%] index_select const : Elapsed 0.018 ms (1.789 ms / 100) 1.770 -> 1.768 ( -0.11%) [ +0.00% +0.00% +0.06% / -0.11% +0.23% +0.56%] index_select wrap : Elapsed 0.018 ms (1.770 ms / 100) 1.776 -> 1.780 ( +0.23%) [ +0.06% +0.00% +0.11% / +0.23% +0.39% +0.56%] index_select linear : Elapsed 0.018 ms (1.777 ms / 100) 1.772 -> 1.768 ( -0.23%) [ +0.17% +0.00% +0.00% / -0.23% +0.34% +0.45%] index_select reverse : Elapsed 0.018 ms (1.775 ms / 100) 1.786 -> 1.785 ( -0.06%) [ +0.06% +0.00% +0.11% / -0.06% +0.11% +0.22%] index_select skip64 : Elapsed 0.018 ms (1.787 ms / 100) 1.781 -> 1.785 ( +0.22%) [ +0.06% +0.00% +0.22% / +0.22% +0.28% +0.45%] index_select skip256 : Elapsed 0.018 ms (1.782 ms / 100) 1.774 -> 1.778 ( +0.23%) [ +0.11% +0.00% +0.11% / +0.23% +0.39% +0.34%] index_select spread : Elapsed 0.018 ms (1.776 ms / 100) 1.778 -> 1.779 ( +0.06%) [ +0.00% +0.22% +0.00% / +0.06% +0.45% +0.28%] index_select strided 3 : Elapsed 0.018 ms (1.778 ms / 100) 1.758 -> 1.761 ( +0.17%) [ +0.06% +0.00% +0.00% / +0.17% +0.63% +0.57%] index_select strided 5 : Elapsed 0.018 ms (1.759 ms / 100) 1.761 -> 1.768 ( +0.40%) [ +0.17% +0.17% +0.00% / +0.40% +0.74% +1.02%] index_select strided 7 : Elapsed 0.018 ms (1.764 ms / 100) 1.770 -> 1.774 ( +0.23%) [ +0.00% +0.23% +0.06% / +0.23% +0.73% +0.56%] index_select strided 8 : Elapsed 0.018 ms (1.770 ms / 100) 1.760 -> 1.762 ( +0.11%) [ +0.06% +0.11% +0.00% / +0.11% +0.74% +0.57%] index_select strided 16 : Elapsed 0.018 ms (1.761 ms / 100) 1.784 -> 1.780 ( -0.22%) [ +0.17% +0.11% +0.00% / +0.22% +0.00% -0.22%] index_select random : Elapsed 0.018 ms (1.787 ms / 100) 1.776 -> 1.777 ( +0.06%) [ +0.00% +0.11% +0.06% / +0.06% +0.51% +0.45%] index_select random_sorted : Elapsed 0.018 ms (1.776 ms / 100) 1.770 -> 1.772 ( +0.11%) [ +0.00% +0.00% +0.28% / +0.11% +0.34% +0.34%] index_select perm : Elapsed 0.018 ms (1.770 ms / 100) 1.783 -> 1.784 ( +0.06%) [ +0.17% +0.00% +0.00% / +0.06% +0.34% +0.39%] index_select perm_sorted : Elapsed 0.018 ms (1.786 ms / 100) B = [5, 40, 4, 16] (stride (1, 320, 5, 20)) A = [20, 40, 4, 16] (stride (1, 1280, 320, 20)) dim = 0 1.647 -> 1.651 ( +0.24%) [ +0.30% +0.00% +0.00% / +0.24% +0.30% +0.55%] index_select const : Elapsed 0.017 ms (1.652 ms / 100) 1.652 -> 1.654 ( +0.12%) [ +0.30% +0.24% +0.00% / +0.12% +0.79% +0.67%] index_select wrap : Elapsed 0.017 ms (1.657 ms / 100) 1.651 -> 1.653 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.30% +0.55%] index_select linear : Elapsed 0.017 ms (1.652 ms / 100) 1.656 -> 1.659 ( +0.18%) [ +0.12% +0.12% +0.00% / +0.18% +0.60% +0.85%] index_select reverse : Elapsed 0.017 ms (1.658 ms / 100) 1.644 -> 1.649 ( +0.30%) [ +0.06% +0.00% +0.36% / +0.30% +0.49% +0.43%] index_select skip64 : Elapsed 0.016 ms (1.645 ms / 100) 1.645 -> 1.649 ( +0.24%) [ +0.24% +0.18% +0.00% / +0.24% +0.49% +0.36%] index_select skip256 : Elapsed 0.016 ms (1.649 ms / 100) 1.670 -> 1.672 ( +0.12%) [ +0.24% +0.30% +0.00% / +0.12% +0.78% +0.66%] index_select spread : Elapsed 0.017 ms (1.674 ms / 100) 1.673 -> 1.676 ( +0.18%) [ +0.00% +0.06% +0.00% / +0.18% +0.48% +0.60%] index_select strided 3 : Elapsed 0.017 ms (1.673 ms / 100) 1.675 -> 1.673 ( -0.12%) [ +0.06% +0.00% +0.00% / -0.12% +0.72% +0.78%] index_select strided 5 : Elapsed 0.017 ms (1.676 ms / 100) 1.661 -> 1.664 ( +0.18%) [ +0.06% +0.06% +0.00% / +0.18% +0.66% +0.54%] index_select strided 7 : Elapsed 0.017 ms (1.662 ms / 100) 1.665 -> 1.663 ( -0.12%) [ +0.06% +0.12% +0.00% / -0.12% +0.60% +0.60%] index_select strided 8 : Elapsed 0.017 ms (1.666 ms / 100) 1.671 -> 1.672 ( +0.06%) [ +0.00% +0.12% +0.24% / +0.06% +0.48% +0.48%] index_select strided 16 : Elapsed 0.017 ms (1.671 ms / 100) 1.665 -> 1.664 ( -0.06%) [ +0.18% +0.18% +0.00% / -0.06% +0.42% +0.42%] index_select random : Elapsed 0.017 ms (1.668 ms / 100) 1.665 -> 1.663 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.12% +0.66% +0.78%] index_select random_sorted : Elapsed 0.017 ms (1.665 ms / 100) 1.661 -> 1.661 ( +0.00%) [ +0.30% +0.00% +0.24% / +0.00% +0.60% +0.84%] index_select perm : Elapsed 0.017 ms (1.666 ms / 100) 1.673 -> 1.674 ( +0.06%) [ +0.18% +0.30% +0.00% / +0.06% +0.60% +0.66%] index_select perm_sorted : Elapsed 0.017 ms (1.676 ms / 100) B = [5, 40, 4, 16] (stride (640, 1, 3200, 40)) A = [20, 40, 4, 16] (stride (40, 1, 12800, 800)) dim = 0 1.812 -> 1.801 ( -0.61%) [ +0.11% +0.06% +0.00% / +0.00% -0.55% -0.61%] index_select const : Elapsed 0.018 ms (1.814 ms / 100) 1.796 -> 1.795 ( -0.06%) [ +0.28% +0.00% +0.11% / -0.06% +0.33% +0.39%] index_select wrap : Elapsed 0.018 ms (1.801 ms / 100) 1.810 -> 1.811 ( +0.06%) [ +0.39% +0.00% +0.00% / +0.06% +0.39% +0.17%] index_select linear : Elapsed 0.018 ms (1.817 ms / 100) 1.782 -> 1.785 ( +0.17%) [ +0.06% +0.22% +0.00% / +0.17% +0.56% +0.51%] index_select reverse : Elapsed 0.018 ms (1.783 ms / 100) 1.799 -> 1.802 ( +0.17%) [ +0.00% +0.22% +0.28% / +0.28% +0.33% +0.17%] index_select skip64 : Elapsed 0.018 ms (1.799 ms / 100) 1.792 -> 1.795 ( +0.17%) [ +0.00% +0.33% +0.28% / +0.17% +0.45% +0.39%] index_select skip256 : Elapsed 0.018 ms (1.792 ms / 100) 1.812 -> 1.813 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.28% +0.28%] index_select spread : Elapsed 0.018 ms (1.812 ms / 100) 1.803 -> 1.804 ( +0.06%) [ +0.06% +0.28% +0.00% / +0.06% +0.39% +0.39%] index_select strided 3 : Elapsed 0.018 ms (1.804 ms / 100) 1.805 -> 1.805 ( +0.00%) [ +0.06% +0.17% +0.00% / +0.00% +1.00% +1.27%] index_select strided 5 : Elapsed 0.018 ms (1.806 ms / 100) 1.796 -> 1.801 ( +0.28%) [ +0.33% +0.50% +0.00% / +0.28% +0.50% +1.00%] index_select strided 7 : Elapsed 0.018 ms (1.802 ms / 100) 1.814 -> 1.815 ( +0.06%) [ +0.17% +0.00% +0.11% / +0.06% +0.50% +0.61%] index_select strided 8 : Elapsed 0.018 ms (1.817 ms / 100) 1.807 -> 1.812 ( +0.28%) [ +0.33% +0.00% +0.11% / +0.28% +0.33% +0.44%] index_select strided 16 : Elapsed 0.018 ms (1.813 ms / 100) 1.804 -> 1.802 ( -0.11%) [ +0.22% +0.11% +0.00% / -0.11% +0.39% +0.61%] index_select random : Elapsed 0.018 ms (1.808 ms / 100) 1.803 -> 1.806 ( +0.17%) [ +0.22% +0.17% +0.00% / +0.17% +0.50% +0.67%] index_select random_sorted : Elapsed 0.018 ms (1.807 ms / 100) 1.823 -> 1.823 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.49% +0.49%] index_select perm : Elapsed 0.018 ms (1.823 ms / 100) 1.811 -> 1.815 ( +0.22%) [ +0.11% +0.00% +0.17% / +0.28% +0.22% +0.33%] index_select perm_sorted : Elapsed 0.018 ms (1.813 ms / 100) B = [5, 40, 4, 16] (stride (4, 20, 1, 800)) A = [20, 40, 4, 16] (stride (1, 20, 12800, 800)) dim = 0 1.818 -> 1.821 ( +0.17%) [ +0.00% +0.22% +0.11% / +0.17% +0.50% +0.72%] index_select const : Elapsed 0.018 ms (1.818 ms / 100) 1.829 -> 1.831 ( +0.11%) [ +0.00% +0.16% +0.00% / +0.11% +0.49% +0.38%] index_select wrap : Elapsed 0.018 ms (1.829 ms / 100) 1.824 -> 1.827 ( +0.16%) [ +0.11% +0.22% +0.00% / +0.16% +0.66% +0.44%] index_select linear : Elapsed 0.018 ms (1.826 ms / 100) 1.828 -> 1.834 ( +0.33%) [ +0.00% +0.05% +0.16% / +0.33% +0.44% +0.44%] index_select reverse : Elapsed 0.018 ms (1.828 ms / 100) 1.823 -> 1.821 ( -0.11%) [ +0.00% +0.16% +0.00% / -0.11% +0.55% +0.55%] index_select skip64 : Elapsed 0.018 ms (1.823 ms / 100) 1.817 -> 1.817 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.39% +0.39%] index_select skip256 : Elapsed 0.018 ms (1.817 ms / 100) 1.842 -> 1.844 ( +0.11%) [ +0.00% +0.05% +0.16% / +0.11% +0.33% +0.43%] index_select spread : Elapsed 0.018 ms (1.842 ms / 100) 1.845 -> 1.844 ( -0.05%) [ +0.00% +0.11% +0.11% / -0.05% +0.43% +0.49%] index_select strided 3 : Elapsed 0.018 ms (1.845 ms / 100) 1.844 -> 1.843 ( -0.05%) [ +0.00% +0.22% +0.05% / -0.05% +0.38% +0.33%] index_select strided 5 : Elapsed 0.018 ms (1.844 ms / 100) 1.842 -> 1.841 ( -0.05%) [ +0.00% +0.22% +0.00% / -0.05% +0.38% +0.27%] index_select strided 7 : Elapsed 0.018 ms (1.842 ms / 100) 1.842 -> 1.844 ( +0.11%) [ +0.05% +0.05% +0.00% / +0.11% +0.33% +0.38%] index_select strided 8 : Elapsed 0.018 ms (1.843 ms / 100) 1.834 -> 1.840 ( +0.33%) [ +0.00% +0.11% +0.38% / +0.33% +0.38% +0.49%] index_select strided 16 : Elapsed 0.018 ms (1.834 ms / 100) 1.839 -> 1.843 ( +0.22%) [ +0.22% +0.05% +0.00% / +0.22% +0.60% +0.60%] index_select random : Elapsed 0.018 ms (1.843 ms / 100) 1.837 -> 1.841 ( +0.22%) [ +0.22% +0.27% +0.00% / +0.22% +0.54% +0.38%] index_select random_sorted : Elapsed 0.018 ms (1.841 ms / 100) 1.835 -> 1.836 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.49% +0.44%] index_select perm : Elapsed 0.018 ms (1.836 ms / 100) 1.834 -> 1.839 ( +0.27%) [ +0.16% +0.22% +0.00% / +0.27% +0.38% +0.55%] index_select perm_sorted : Elapsed 0.018 ms (1.837 ms / 100) B = [5, 40, 4, 16] (stride (40, 1, 200, 800)) A = [20, 40, 4, 16] (stride (2560, 4, 1, 160)) dim = 0 1.724 -> 1.724 ( +0.00%) [ +0.06% +0.00% +0.12% / +0.12% +0.06% +0.00%] index_select const : Elapsed 0.017 ms (1.725 ms / 100) 1.730 -> 1.726 ( -0.23%) [ +0.12% +0.17% +0.00% / -0.23% +0.35% +0.35%] index_select wrap : Elapsed 0.017 ms (1.732 ms / 100) 1.718 -> 1.717 ( -0.06%) [ +0.17% +0.06% +0.00% / -0.06% +0.17% +0.12%] index_select linear : Elapsed 0.017 ms (1.721 ms / 100) 1.721 -> 1.725 ( +0.23%) [ +0.00% +0.23% +0.29% / +0.35% +0.29% +0.23%] index_select reverse : Elapsed 0.017 ms (1.721 ms / 100) 1.718 -> 1.719 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.23% +0.52%] index_select skip64 : Elapsed 0.017 ms (1.720 ms / 100) 1.729 -> 1.731 ( +0.12%) [ +0.17% +0.00% +0.06% / +0.12% +0.35% +0.40%] index_select skip256 : Elapsed 0.017 ms (1.732 ms / 100) 1.742 -> 1.743 ( +0.06%) [ +0.11% +0.00% +0.17% / +0.06% +0.69% +0.80%] index_select spread : Elapsed 0.017 ms (1.744 ms / 100) 1.760 -> 1.758 ( -0.11%) [ +0.11% +0.06% +0.00% / +0.00% -0.06% -0.11%] index_select strided 3 : Elapsed 0.018 ms (1.762 ms / 100) 1.733 -> 1.735 ( +0.12%) [ +0.35% +0.00% +0.06% / +0.12% +0.46% +0.58%] index_select strided 5 : Elapsed 0.017 ms (1.739 ms / 100) 1.736 -> 1.734 ( -0.12%) [ +0.00% +0.06% +0.00% / -0.12% +0.17% +0.29%] index_select strided 7 : Elapsed 0.017 ms (1.736 ms / 100) 1.736 -> 1.737 ( +0.06%) [ +0.29% +0.23% +0.00% / +0.06% +0.81% +0.81%] index_select strided 8 : Elapsed 0.017 ms (1.741 ms / 100) 1.747 -> 1.745 ( -0.11%) [ +0.00% +0.06% +0.00% / -0.11% +0.46% +0.52%] index_select strided 16 : Elapsed 0.017 ms (1.747 ms / 100) 1.713 -> 1.714 ( +0.06%) [ +0.29% +0.47% +0.00% / +0.06% +0.47% +0.35%] index_select random : Elapsed 0.017 ms (1.718 ms / 100) 1.725 -> 1.726 ( +0.06%) [ +0.12% +0.00% +0.06% / +0.06% +0.41% +0.12%] index_select random_sorted : Elapsed 0.017 ms (1.727 ms / 100) 1.746 -> 1.746 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.74% +0.74%] index_select perm : Elapsed 0.017 ms (1.746 ms / 100) 1.753 -> 1.756 ( +0.17%) [ +0.17% +0.46% +0.00% / +0.17% +0.86% +0.80%] index_select perm_sorted : Elapsed 0.018 ms (1.756 ms / 100) out_shape = [20, 5, 4, 16] in_shape = [20, 40, 4, 16] idx_dim = 1 B = [20, 5, 4, 16] (stride (320, 1, 80, 5)) A = [20, 40, 4, 16] (stride (2560, 64, 16, 1)) dim = 1 0.618 -> 0.619 ( +0.16%) [ +2.43% +0.16% +0.00% / +0.16% +0.16% +4.69%] index_select const : Elapsed 0.006 ms (0.633 ms / 100) 0.619 -> 0.620 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +0.48% +0.65%] index_select wrap : Elapsed 0.006 ms (0.619 ms / 100) 0.619 -> 0.620 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.32% +4.20%] index_select linear : Elapsed 0.006 ms (0.620 ms / 100) 0.618 -> 0.618 ( +0.00%) [ +7.28% +0.16% +0.00% / +0.00% +0.65% +0.97%] index_select reverse : Elapsed 0.007 ms (0.663 ms / 100) 0.618 -> 0.618 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.49% +0.65%] index_select skip64 : Elapsed 0.006 ms (0.618 ms / 100) 0.619 -> 0.620 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.16% +0.32%] index_select skip256 : Elapsed 0.006 ms (0.619 ms / 100) 0.619 -> 0.619 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.16% +0.32%] index_select spread : Elapsed 0.006 ms (0.620 ms / 100) 0.619 -> 0.621 ( +0.32%) [ +0.32% +0.48% +0.00% / +0.32% +0.65% +0.32%] index_select strided 3 : Elapsed 0.006 ms (0.621 ms / 100) 0.619 -> 0.620 ( +0.16%) [ +0.32% +0.32% +0.00% / +0.32% +0.16% +0.48%] index_select strided 5 : Elapsed 0.006 ms (0.621 ms / 100) 0.618 -> 0.619 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.32% +0.65%] index_select strided 7 : Elapsed 0.006 ms (0.618 ms / 100) 0.618 -> 0.618 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.65% +0.32%] index_select strided 8 : Elapsed 0.006 ms (0.619 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +1.13% +1.94%] index_select strided 16 : Elapsed 0.006 ms (0.618 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.00% +1.30% +1.13%] index_select random : Elapsed 0.006 ms (0.618 ms / 100) 0.617 -> 0.617 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.13% +1.13%] index_select random_sorted : Elapsed 0.006 ms (0.617 ms / 100) 0.617 -> 0.618 ( +0.16%) [ +0.32% +0.16% +0.00% / +0.16% +0.97% +1.30%] index_select perm : Elapsed 0.006 ms (0.619 ms / 100) 0.617 -> 0.618 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.97% +1.30%] index_select perm_sorted : Elapsed 0.006 ms (0.618 ms / 100) B = [20, 5, 4, 16] (stride (64, 1280, 1, 4)) dim = 1 fill_cnt = 40 2.448 -> 2.439 ( -0.37%) [ +0.04% +0.08% +0.00% / -0.37% -0.25% -0.08%] index_fill_ const : Elapsed 0.024 ms (2.449 ms / 100) 2.452 -> 2.444 ( -0.33%) [ +0.00% +0.04% +0.12% / -0.33% -0.29% -0.12%] index_fill_ linear : Elapsed 0.025 ms (2.452 ms / 100) 2.443 -> 2.439 ( -0.16%) [ +0.12% +0.20% +0.00% / -0.16% +0.08% -0.08%] index_fill_ reverse : Elapsed 0.024 ms (2.446 ms / 100) 2.446 -> 2.438 ( -0.33%) [ +0.08% +0.25% +0.00% / -0.33% -0.12% -0.29%] index_fill_ skip64 : Elapsed 0.024 ms (2.448 ms / 100) 2.446 -> 2.438 ( -0.33%) [ +0.00% +0.16% +0.00% / -0.33% -0.16% -0.12%] index_fill_ skip256 : Elapsed 0.024 ms (2.446 ms / 100) 2.444 -> 2.437 ( -0.29%) [ +0.00% +0.12% +0.12% / -0.29% -0.20% +0.08%] index_fill_ spread : Elapsed 0.024 ms (2.444 ms / 100) 2.450 -> 2.443 ( -0.29%) [ +0.00% +0.08% +0.00% / -0.29% -0.12% -0.12%] index_fill_ strided 3 : Elapsed 0.025 ms (2.450 ms / 100) 2.445 -> 2.441 ( -0.16%) [ +0.04% +0.04% +0.00% / -0.16% +0.12% +0.12%] index_fill_ random : Elapsed 0.024 ms (2.446 ms / 100) 2.440 -> 2.432 ( -0.33%) [ +0.04% +0.00% +0.00% / -0.33% -0.04% +0.08%] index_fill_ random_sorted : Elapsed 0.024 ms (2.441 ms / 100) B = [20, 5, 4, 16] (stride (80, 1, 1600, 5)) A = [20, 40, 4, 16] (stride (40, 1, 800, 3200)) dim = 1 1.614 -> 1.614 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.56% +0.56%] index_select const : Elapsed 0.016 ms (1.616 ms / 100) 1.612 -> 1.609 ( -0.19%) [ +0.19% +0.00% +0.00% / -0.19% +0.56% +0.50%] index_select wrap : Elapsed 0.016 ms (1.615 ms / 100) 1.611 -> 1.612 ( +0.06%) [ +0.19% +0.12% +0.00% / +0.06% +0.74% +0.68%] index_select linear : Elapsed 0.016 ms (1.614 ms / 100) 1.612 -> 1.613 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.56% +0.93%] index_select reverse : Elapsed 0.016 ms (1.612 ms / 100) 1.611 -> 1.613 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.12% +0.56% +0.62%] index_select skip64 : Elapsed 0.016 ms (1.613 ms / 100) 1.615 -> 1.614 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.62% +0.62%] index_select skip256 : Elapsed 0.016 ms (1.615 ms / 100) 1.606 -> 1.607 ( +0.06%) [ +0.00% +0.06% +0.12% / +0.06% +0.81% +0.81%] index_select spread : Elapsed 0.016 ms (1.606 ms / 100) 1.606 -> 1.608 ( +0.12%) [ +0.19% +0.12% +0.00% / +0.12% +0.75% +0.81%] index_select strided 3 : Elapsed 0.016 ms (1.609 ms / 100) 1.609 -> 1.611 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.75% +0.62%] index_select strided 5 : Elapsed 0.016 ms (1.609 ms / 100) 1.610 -> 1.610 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.75% +0.68%] index_select strided 7 : Elapsed 0.016 ms (1.611 ms / 100) 1.612 -> 1.613 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.68% +0.68%] index_select strided 8 : Elapsed 0.016 ms (1.612 ms / 100) 1.608 -> 1.610 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.75% +0.87%] index_select strided 16 : Elapsed 0.016 ms (1.610 ms / 100) 1.609 -> 1.611 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.37% +0.87%] index_select random : Elapsed 0.016 ms (1.610 ms / 100) 1.606 -> 1.607 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.75% +0.75%] index_select random_sorted : Elapsed 0.016 ms (1.606 ms / 100) 1.606 -> 1.608 ( +0.12%) [ +0.06% +0.19% +0.00% / +0.12% +0.81% +0.81%] index_select perm : Elapsed 0.016 ms (1.607 ms / 100) 1.613 -> 1.613 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.68% +0.81%] index_select perm_sorted : Elapsed 0.016 ms (1.614 ms / 100) B = [20, 5, 4, 16] (stride (5, 1, 100, 400)) A = [20, 40, 4, 16] (stride (640, 16, 12800, 1)) dim = 1 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.41% +0.34%] index_select const : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.41% +0.34%] index_select wrap : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.41% +1.42%] index_select linear : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.61%] index_select reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.54% +0.61%] index_select skip64 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.47% +0.47%] index_select skip256 : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.478 ( +0.20%) [ +0.34% +0.14% +0.00% / +0.20% +0.54% +0.54%] index_select spread : Elapsed 0.015 ms (1.480 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.47% +0.61%] index_select strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.54%] index_select strided 5 : Elapsed 0.015 ms (1.477 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.54%] index_select strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.478 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.47% +0.61%] index_select strided 8 : Elapsed 0.015 ms (1.478 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.61% +0.75%] index_select strided 16 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.61%] index_select random : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.68% +0.68%] index_select random_sorted : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.483 ( +0.47%) [ +0.00% +0.00% +0.07% / +0.47% +0.61% +0.68%] index_select perm : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.61%] index_select perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) out_shape = [20, 40, 5, 16] in_shape = [20, 40, 4, 16] idx_dim = 2 B = [20, 40, 5, 16] (stride (640, 1, 12800, 40)) A = [20, 40, 4, 16] (stride (40, 1, 12800, 800)) dim = 2 5.598 -> 5.573 ( -0.45%) [ +0.16% +0.00% +0.13% / +0.21% -0.45% -0.45%] index_add_ linear : Elapsed 0.056 ms (5.607 ms / 100) 5.572 -> 5.535 ( -0.66%) [ +0.00% +0.18% +0.07% / +0.14% -0.66% -0.39%] index_copy_ linear : Elapsed 0.056 ms (5.572 ms / 100) 5.582 -> 5.556 ( -0.47%) [ +0.21% +0.14% +0.00% / +0.20% -0.47% -0.34%] index_add_ reverse : Elapsed 0.056 ms (5.594 ms / 100) 5.570 -> 5.538 ( -0.57%) [ +0.05% +0.02% +0.00% / +0.00% -0.50% -0.57%] index_copy_ reverse : Elapsed 0.056 ms (5.573 ms / 100) 5.603 -> 5.570 ( -0.59%) [ +0.00% +0.00% +0.18% / +0.11% -0.43% -0.59%] index_add_ spread : Elapsed 0.056 ms (5.603 ms / 100) 5.574 -> 5.537 ( -0.66%) [ +0.04% +0.00% +0.09% / +0.09% -0.66% -0.65%] index_copy_ spread : Elapsed 0.056 ms (5.576 ms / 100) 5.581 -> 5.564 ( -0.30%) [ +0.09% +0.14% +0.00% / +0.00% -0.30% -0.11%] index_add_ strided 3 : Elapsed 0.056 ms (5.586 ms / 100) 5.550 -> 5.548 ( -0.04%) [ +0.00% +0.16% +0.16% / +0.22% -0.02% -0.04%] index_copy_ strided 3 : Elapsed 0.056 ms (5.550 ms / 100) 5.598 -> 5.572 ( -0.46%) [ +0.05% +0.00% +0.11% / +0.07% -0.46% -0.46%] index_add_ perm : Elapsed 0.056 ms (5.601 ms / 100) 5.555 -> 5.538 ( -0.31%) [ +0.07% +0.00% +0.11% / +0.27% -0.31% -0.22%] index_copy_ perm : Elapsed 0.056 ms (5.559 ms / 100) 5.593 -> 5.569 ( -0.43%) [ +0.00% +0.20% +0.09% / +0.13% -0.25% -0.43%] index_add_ perm_sorted : Elapsed 0.056 ms (5.593 ms / 100) 5.564 -> 5.539 ( -0.45%) [ +0.13% +0.09% +0.00% / +0.00% -0.45% -0.38%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.571 ms / 100) 5.809 -> 5.811 ( +0.03%) [ +0.00% +0.03% +0.10% / +0.14% +0.03% +0.09%] index_select const : Elapsed 0.058 ms (5.809 ms / 100) 5.903 -> 5.876 ( -0.46%) [ +0.00% +0.07% +0.19% / -0.03% -0.42% -0.46%] index_select wrap : Elapsed 0.059 ms (5.903 ms / 100) 5.882 -> 5.861 ( -0.36%) [ +0.15% +0.00% +0.02% / +0.10% -0.36% -0.26%] index_select linear : Elapsed 0.059 ms (5.891 ms / 100) 5.895 -> 5.856 ( -0.66%) [ +0.00% +0.10% +0.08% / +0.00% -0.66% -0.58%] index_select reverse : Elapsed 0.059 ms (5.895 ms / 100) 5.813 -> 5.809 ( -0.07%) [ +0.09% +0.00% +0.02% / +0.05% -0.07% -0.05%] index_select skip64 : Elapsed 0.058 ms (5.818 ms / 100) 5.808 -> 5.813 ( +0.09%) [ +0.10% +0.00% +0.09% / +0.22% +0.17% +0.09%] index_select skip256 : Elapsed 0.058 ms (5.814 ms / 100) 5.888 -> 5.859 ( -0.49%) [ +0.14% +0.00% +0.10% / +0.17% -0.44% -0.49%] index_select spread : Elapsed 0.059 ms (5.896 ms / 100) 5.905 -> 5.870 ( -0.59%) [ +0.00% +0.05% +0.08% / +0.15% -0.59% -0.56%] index_select strided 3 : Elapsed 0.059 ms (5.905 ms / 100) 5.886 -> 5.859 ( -0.46%) [ +0.10% +0.00% +0.17% / +0.27% -0.34% -0.46%] index_select random : Elapsed 0.059 ms (5.892 ms / 100) 5.873 -> 5.846 ( -0.46%) [ +0.00% +0.15% +0.15% / +0.10% -0.46% -0.39%] index_select random_sorted : Elapsed 0.059 ms (5.873 ms / 100) B = [20, 40, 5, 16] (stride (1, 20, 12800, 800)) A = [20, 40, 4, 16] (stride (2560, 4, 1, 160)) dim = 2 5.964 -> 5.965 ( +0.02%) [ +0.00% +0.10% +0.20% / +0.30% +0.02% +0.15%] index_add_ linear : Elapsed 0.060 ms (5.964 ms / 100) 5.913 -> 5.920 ( +0.12%) [ +0.02% +0.00% +0.07% / +0.12% +0.12% +0.22%] index_copy_ linear : Elapsed 0.059 ms (5.914 ms / 100) 5.960 -> 5.968 ( +0.13%) [ +0.02% +0.00% +0.02% / +0.13% +0.25% +0.27%] index_add_ reverse : Elapsed 0.060 ms (5.961 ms / 100) 5.908 -> 5.919 ( +0.19%) [ +0.08% +0.08% +0.00% / +0.19% +0.22% +0.29%] index_copy_ reverse : Elapsed 0.059 ms (5.913 ms / 100) 5.970 -> 5.972 ( +0.03%) [ +0.03% +0.00% +0.18% / +0.17% +0.12% +0.03%] index_add_ spread : Elapsed 0.060 ms (5.972 ms / 100) 5.913 -> 5.915 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.14% +0.10% +0.03%] index_copy_ spread : Elapsed 0.059 ms (5.915 ms / 100) 5.979 -> 5.987 ( +0.13%) [ +0.03% +0.00% +0.12% / +0.13% +0.32% +0.30%] index_add_ strided 3 : Elapsed 0.060 ms (5.981 ms / 100) 5.917 -> 5.936 ( +0.32%) [ +0.15% +0.00% +0.25% / +0.32% +0.47% +0.41%] index_copy_ strided 3 : Elapsed 0.059 ms (5.926 ms / 100) 5.968 -> 5.974 ( +0.10%) [ +0.03% +0.00% +0.05% / +0.10% +0.64% +0.69%] index_add_ perm : Elapsed 0.060 ms (5.970 ms / 100) 5.916 -> 5.922 ( +0.10%) [ +0.00% +0.02% +0.12% / +0.10% +0.46% +0.47%] index_copy_ perm : Elapsed 0.059 ms (5.916 ms / 100) 5.973 -> 5.978 ( +0.08%) [ +0.00% +0.07% +0.22% / +0.08% +0.28% +0.18%] index_add_ perm_sorted : Elapsed 0.060 ms (5.973 ms / 100) 5.919 -> 5.925 ( +0.10%) [ +0.00% +0.12% +0.17% / +0.20% +0.12% +0.10%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.919 ms / 100) 6.327 -> 6.337 ( +0.16%) [ +0.00% +0.17% +0.17% / +0.16% +0.27% +0.25%] index_select const : Elapsed 0.063 ms (6.327 ms / 100) 6.334 -> 6.334 ( +0.00%) [ +0.00% +0.14% +0.22% / +0.16% +0.00% +0.02%] index_select wrap : Elapsed 0.063 ms (6.334 ms / 100) 6.325 -> 6.333 ( +0.13%) [ +0.00% +0.13% +0.27% / +0.21% +0.13% +0.21%] index_select linear : Elapsed 0.063 ms (6.325 ms / 100) 6.329 -> 6.341 ( +0.19%) [ +0.05% +0.00% +0.17% / +0.24% +0.19% +0.24%] index_select reverse : Elapsed 0.063 ms (6.332 ms / 100) 6.325 -> 6.339 ( +0.22%) [ +0.17% +0.00% +0.14% / +0.32% +0.25% +0.22%] index_select skip64 : Elapsed 0.063 ms (6.336 ms / 100) 6.336 -> 6.338 ( +0.03%) [ +0.00% +0.00% +0.06% / +0.08% +0.05% +0.03%] index_select skip256 : Elapsed 0.063 ms (6.336 ms / 100) 6.326 -> 6.332 ( +0.09%) [ +0.00% +0.08% +0.17% / +0.09% +0.27% +0.11%] index_select spread : Elapsed 0.063 ms (6.326 ms / 100) 6.325 -> 6.330 ( +0.08%) [ +0.14% +0.00% +0.24% / +0.13% +0.08% +0.19%] index_select strided 3 : Elapsed 0.063 ms (6.334 ms / 100) 6.331 -> 6.332 ( +0.02%) [ +0.00% +0.06% +0.16% / +0.02% +0.13% +0.06%] index_select random : Elapsed 0.063 ms (6.331 ms / 100) 6.326 -> 6.328 ( +0.03%) [ +0.06% +0.00% +0.14% / +0.03% +0.06% +0.21%] index_select random_sorted : Elapsed 0.063 ms (6.330 ms / 100) B = [20, 40, 5, 16] (stride (1, 20, 12800, 800)) A = [20, 40, 4, 16] (stride (4, 80, 1, 3200)) dim = 2 5.973 -> 5.953 ( -0.33%) [ +0.07% +0.00% +0.13% / -0.02% -0.33% -0.28%] index_add_ linear : Elapsed 0.060 ms (5.977 ms / 100) 5.923 -> 5.910 ( -0.22%) [ +0.00% +0.00% +0.03% / +0.10% -0.22% -0.22%] index_copy_ linear : Elapsed 0.059 ms (5.923 ms / 100) 5.969 -> 5.954 ( -0.25%) [ +0.00% +0.08% +0.03% / +0.05% -0.18% -0.25%] index_add_ reverse : Elapsed 0.060 ms (5.969 ms / 100) 5.922 -> 5.898 ( -0.41%) [ +0.03% +0.05% +0.00% / +0.08% -0.41% -0.20%] index_copy_ reverse : Elapsed 0.059 ms (5.924 ms / 100) 5.974 -> 5.955 ( -0.32%) [ +0.00% +0.02% +0.03% / +0.08% -0.32% -0.22%] index_add_ spread : Elapsed 0.060 ms (5.974 ms / 100) 5.924 -> 5.906 ( -0.30%) [ +0.05% +0.00% +0.00% / +0.00% -0.27% -0.30%] index_copy_ spread : Elapsed 0.059 ms (5.927 ms / 100) 5.953 -> 5.946 ( -0.12%) [ +0.00% +0.03% +0.02% / +0.17% -0.03% -0.12%] index_add_ strided 3 : Elapsed 0.060 ms (5.953 ms / 100) 5.899 -> 5.901 ( +0.03%) [ +0.00% +0.10% +0.12% / +0.17% +0.08% +0.03%] index_copy_ strided 3 : Elapsed 0.059 ms (5.899 ms / 100) 5.955 -> 5.943 ( -0.20%) [ +0.02% +0.00% +0.00% / +0.05% -0.05% -0.20%] index_add_ perm : Elapsed 0.060 ms (5.956 ms / 100) 5.893 -> 5.901 ( +0.14%) [ +0.14% +0.00% +0.05% / +0.17% +0.14% +0.17%] index_copy_ perm : Elapsed 0.059 ms (5.901 ms / 100) 5.960 -> 5.945 ( -0.25%) [ +0.02% +0.00% +0.07% / +0.18% -0.25% -0.22%] index_add_ perm_sorted : Elapsed 0.060 ms (5.961 ms / 100) 5.910 -> 5.896 ( -0.24%) [ +0.08% +0.00% +0.05% / +0.22% -0.24% -0.24%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.915 ms / 100) 6.326 -> 6.303 ( -0.36%) [ +0.00% +0.11% +0.06% / +0.11% -0.33% -0.36%] index_select const : Elapsed 0.063 ms (6.326 ms / 100) 6.322 -> 6.304 ( -0.28%) [ +0.08% +0.05% +0.00% / +0.24% -0.27% -0.28%] index_select wrap : Elapsed 0.063 ms (6.327 ms / 100) 6.325 -> 6.301 ( -0.38%) [ +0.00% +0.13% +0.06% / +0.05% -0.38% -0.35%] index_select linear : Elapsed 0.063 ms (6.325 ms / 100) 6.322 -> 6.297 ( -0.40%) [ +0.13% +0.00% +0.06% / +0.21% -0.40% -0.14%] index_select reverse : Elapsed 0.063 ms (6.330 ms / 100) 6.327 -> 6.302 ( -0.40%) [ +0.05% +0.00% +0.16% / +0.00% -0.36% -0.40%] index_select skip64 : Elapsed 0.063 ms (6.330 ms / 100) 6.331 -> 6.299 ( -0.51%) [ +0.05% +0.02% +0.00% / +0.03% -0.36% -0.51%] index_select skip256 : Elapsed 0.063 ms (6.334 ms / 100) 6.329 -> 6.302 ( -0.43%) [ +0.02% +0.00% +0.02% / -0.02% -0.33% -0.43%] index_select spread : Elapsed 0.063 ms (6.330 ms / 100) 6.330 -> 6.297 ( -0.52%) [ +0.00% +0.06% +0.03% / +0.03% -0.52% -0.46%] index_select strided 3 : Elapsed 0.063 ms (6.330 ms / 100) 6.326 -> 6.303 ( -0.36%) [ +0.08% +0.11% +0.00% / +0.17% -0.35% -0.36%] index_select random : Elapsed 0.063 ms (6.331 ms / 100) 6.328 -> 6.305 ( -0.36%) [ +0.08% +0.03% +0.00% / +0.13% -0.36% -0.35%] index_select random_sorted : Elapsed 0.063 ms (6.333 ms / 100) out_shape = [20, 40, 4, 5] in_shape = [20, 40, 4, 16] idx_dim = 3 B = [20, 40, 4, 5] (stride (800, 20, 5, 1)) A = [20, 40, 4, 16] (stride (640, 16, 12800, 1)) dim = 3 2.003 -> 2.003 ( +0.00%) [ +0.10% +0.00% +0.20% / +0.00% +0.45% +0.25%] index_select const : Elapsed 0.020 ms (2.005 ms / 100) 2.001 -> 2.001 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.40% +0.45%] index_select wrap : Elapsed 0.020 ms (2.001 ms / 100) 1.999 -> 2.002 ( +0.15%) [ +0.10% +0.20% +0.00% / +0.15% +0.50% +0.60%] index_select linear : Elapsed 0.020 ms (2.001 ms / 100) 1.999 -> 1.997 ( -0.10%) [ +0.05% +0.20% +0.00% / -0.10% +0.35% +0.30%] index_select reverse : Elapsed 0.020 ms (2.000 ms / 100) 2.001 -> 2.004 ( +0.15%) [ +0.10% +0.00% +0.20% / +0.15% +0.20% +0.45%] index_select skip64 : Elapsed 0.020 ms (2.003 ms / 100) 2.003 -> 2.003 ( +0.00%) [ +0.25% +0.00% +0.05% / +0.00% +0.40% +0.20%] index_select skip256 : Elapsed 0.020 ms (2.008 ms / 100) 2.017 -> 2.018 ( +0.05%) [ +0.00% +0.15% +0.25% / +0.05% +0.55% +0.45%] index_select spread : Elapsed 0.020 ms (2.017 ms / 100) 2.018 -> 2.017 ( -0.05%) [ +0.20% +0.15% +0.00% / -0.05% +0.45% +0.50%] index_select strided 3 : Elapsed 0.020 ms (2.022 ms / 100) 2.016 -> 2.014 ( -0.10%) [ +0.10% +0.20% +0.00% / -0.10% +0.40% +0.60%] index_select strided 5 : Elapsed 0.020 ms (2.018 ms / 100) 2.011 -> 2.015 ( +0.20%) [ +0.10% +0.15% +0.00% / +0.20% +0.65% +0.60%] index_select strided 7 : Elapsed 0.020 ms (2.013 ms / 100) 2.020 -> 2.023 ( +0.15%) [ +0.15% +0.05% +0.00% / +0.15% +0.54% +0.89%] index_select strided 8 : Elapsed 0.020 ms (2.023 ms / 100) 2.023 -> 2.019 ( -0.20%) [ +0.00% +0.00% +0.00% / -0.20% +0.59% +0.49%] index_select random : Elapsed 0.020 ms (2.023 ms / 100) 2.013 -> 2.016 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.50% +0.50%] index_select random_sorted : Elapsed 0.020 ms (2.016 ms / 100) 2.019 -> 2.021 ( +0.10%) [ +0.15% +0.00% +0.20% / +0.10% +0.59% +0.64%] index_select perm : Elapsed 0.020 ms (2.022 ms / 100) 2.020 -> 2.020 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.00% +0.45% +0.15%] index_select perm_sorted : Elapsed 0.020 ms (2.021 ms / 100) B = [20, 40, 4, 5] (stride (800, 20, 1, 4)) A = [20, 40, 4, 16] (stride (640, 1, 12800, 40)) dim = 3 2.150 -> 2.150 ( +0.00%) [ +0.19% +0.00% +0.00% / +0.00% +0.56% +0.42%] index_select const : Elapsed 0.022 ms (2.154 ms / 100) 2.218 -> 2.221 ( +0.14%) [ +0.09% +0.14% +0.00% / +0.14% +0.18% +0.14%] index_select wrap : Elapsed 0.022 ms (2.220 ms / 100) 2.207 -> 2.208 ( +0.05%) [ +0.36% +0.36% +0.00% / +0.05% +0.54% +0.68%] index_select linear : Elapsed 0.022 ms (2.215 ms / 100) 2.208 -> 2.210 ( +0.09%) [ +0.00% +0.05% +0.18% / +0.09% +0.63% +0.45%] index_select reverse : Elapsed 0.022 ms (2.208 ms / 100) 2.144 -> 2.145 ( +0.05%) [ +0.19% +0.00% +0.09% / +0.05% +0.33% +0.42%] index_select skip64 : Elapsed 0.021 ms (2.148 ms / 100) 2.150 -> 2.150 ( +0.00%) [ +0.23% +0.00% +0.14% / +0.00% +0.60% +0.60%] index_select skip256 : Elapsed 0.022 ms (2.155 ms / 100) 2.213 -> 2.215 ( +0.09%) [ +0.23% +0.05% +0.00% / +0.09% +0.45% +0.41%] index_select spread : Elapsed 0.022 ms (2.218 ms / 100) 2.215 -> 2.215 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +0.32% +0.27%] index_select strided 3 : Elapsed 0.022 ms (2.219 ms / 100) 2.215 -> 2.218 ( +0.14%) [ +0.00% +0.18% +0.32% / +0.14% +0.54% +0.45%] index_select strided 5 : Elapsed 0.022 ms (2.215 ms / 100) 2.208 -> 2.210 ( +0.09%) [ +0.05% +0.00% +0.14% / +0.09% +0.59% +0.50%] index_select strided 7 : Elapsed 0.022 ms (2.209 ms / 100) 2.154 -> 2.157 ( +0.14%) [ +0.14% +0.09% +0.00% / +0.14% +0.46% +0.65%] index_select strided 8 : Elapsed 0.022 ms (2.157 ms / 100) 2.189 -> 2.196 ( +0.32%) [ +0.00% +0.46% +0.23% / +0.32% +0.64% +0.69%] index_select random : Elapsed 0.022 ms (2.189 ms / 100) 2.188 -> 2.191 ( +0.14%) [ +0.05% +0.05% +0.00% / +0.14% +0.23% +0.27%] index_select random_sorted : Elapsed 0.022 ms (2.189 ms / 100) 2.216 -> 2.218 ( +0.09%) [ +0.18% +0.23% +0.00% / +0.09% +0.36% +0.23%] index_select perm : Elapsed 0.022 ms (2.220 ms / 100) 2.212 -> 2.218 ( +0.27%) [ +0.27% +0.18% +0.00% / +0.27% +0.36% +0.50%] index_select perm_sorted : Elapsed 0.022 ms (2.218 ms / 100) B = [20, 40, 4, 5] (stride (5, 100, 4000, 1)) A = [20, 40, 4, 16] (stride (1, 20, 800, 3200)) dim = 3 2.410 -> 2.409 ( -0.04%) [ +0.08% +0.12% +0.00% / -0.04% +0.50% +0.37%] index_select const : Elapsed 0.024 ms (2.412 ms / 100) 2.409 -> 2.411 ( +0.08%) [ +0.42% +0.25% +0.00% / +0.08% +0.29% +0.50%] index_select wrap : Elapsed 0.024 ms (2.419 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.12% +0.29% +0.37%] index_select linear : Elapsed 0.024 ms (2.422 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.00% +0.50% +0.21% / +0.46% +0.46% +0.08%] index_select reverse : Elapsed 0.024 ms (2.412 ms / 100) 2.406 -> 2.411 ( +0.21%) [ +0.00% +0.17% +0.00% / +0.21% +0.54% +0.46%] index_select skip64 : Elapsed 0.024 ms (2.406 ms / 100) 2.404 -> 2.411 ( +0.29%) [ +0.00% +0.21% +0.25% / +0.29% +0.50% +0.62%] index_select skip256 : Elapsed 0.024 ms (2.404 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.08% +0.54% +0.50%] index_select spread : Elapsed 0.024 ms (2.415 ms / 100) 2.417 -> 2.419 ( +0.08%) [ +0.00% +0.08% +0.17% / +0.08% +0.54% +0.46%] index_select strided 3 : Elapsed 0.024 ms (2.417 ms / 100) 2.418 -> 2.421 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.25% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.418 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.46% +0.37%] index_select strided 7 : Elapsed 0.024 ms (2.415 ms / 100) 2.405 -> 2.410 ( +0.21%) [ +0.25% +0.21% +0.00% / +0.21% +0.50% +0.62%] index_select strided 8 : Elapsed 0.024 ms (2.411 ms / 100) 2.403 -> 2.402 ( -0.04%) [ +0.21% +0.00% +0.08% / -0.04% +0.25% +0.46%] index_select random : Elapsed 0.024 ms (2.408 ms / 100) 2.411 -> 2.414 ( +0.12%) [ +0.17% +0.17% +0.00% / +0.12% +0.37% +0.41%] index_select random_sorted : Elapsed 0.024 ms (2.415 ms / 100) 2.411 -> 2.414 ( +0.12%) [ +0.00% +0.29% +0.12% / +0.12% +0.71% +0.41%] index_select perm : Elapsed 0.024 ms (2.411 ms / 100) 2.404 -> 2.403 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.37% +0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.405 ms / 100) B = [20, 40, 4, 5] (stride (160, 4, 1, 3200)) A = [20, 40, 4, 16] (stride (1, 1280, 320, 20)) dim = 3 2.029 -> 2.030 ( +0.05%) [ +0.20% +0.15% +0.00% / +0.05% +0.64% +0.44%] index_select const : Elapsed 0.020 ms (2.033 ms / 100) 2.030 -> 2.031 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.39% +0.39%] index_select wrap : Elapsed 0.020 ms (2.030 ms / 100) 2.030 -> 2.030 ( +0.00%) [ +0.15% +0.20% +0.00% / +0.00% +0.34% +0.49%] index_select linear : Elapsed 0.020 ms (2.033 ms / 100) 2.035 -> 2.039 ( +0.20%) [ +0.10% +0.00% +0.29% / +0.20% +0.54% +0.64%] index_select reverse : Elapsed 0.020 ms (2.037 ms / 100) 2.030 -> 2.025 ( -0.25%) [ +0.10% +0.05% +0.00% / -0.25% +0.44% +0.39%] index_select skip64 : Elapsed 0.020 ms (2.032 ms / 100) 2.029 -> 2.032 ( +0.15%) [ +0.05% +0.15% +0.00% / +0.15% +0.44% +0.59%] index_select skip256 : Elapsed 0.020 ms (2.030 ms / 100) 2.039 -> 2.038 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.05% +0.25%] index_select spread : Elapsed 0.020 ms (2.040 ms / 100) 2.040 -> 2.040 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.10% +0.29%] index_select strided 3 : Elapsed 0.020 ms (2.042 ms / 100) 2.038 -> 2.039 ( +0.05%) [ +0.00% +0.20% +0.10% / +0.05% +0.20% +0.34%] index_select strided 5 : Elapsed 0.020 ms (2.038 ms / 100) 2.036 -> 2.041 ( +0.25%) [ +0.00% +0.39% +0.34% / +0.25% +0.49% +0.25%] index_select strided 7 : Elapsed 0.020 ms (2.036 ms / 100) 2.024 -> 2.031 ( +0.35%) [ +0.00% +0.15% +0.15% / +0.35% +0.59% +0.44%] index_select strided 8 : Elapsed 0.020 ms (2.024 ms / 100) 2.037 -> 2.041 ( +0.20%) [ +0.05% +0.00% +0.05% / +0.25% +0.20% +0.20%] index_select random : Elapsed 0.020 ms (2.038 ms / 100) 2.029 -> 2.034 ( +0.25%) [ +0.15% +0.15% +0.00% / +0.25% +0.54% +0.54%] index_select random_sorted : Elapsed 0.020 ms (2.032 ms / 100) 2.042 -> 2.041 ( -0.05%) [ +0.10% +0.00% +0.10% / -0.05% +0.34% +0.15%] index_select perm : Elapsed 0.020 ms (2.044 ms / 100) 2.032 -> 2.035 ( +0.15%) [ +0.00% +0.15% +0.30% / +0.20% +0.25% +0.15%] index_select perm_sorted : Elapsed 0.020 ms (2.032 ms / 100) B = [20, 40, 4, 5] (stride (160, 4, 1, 3200)) A = [20, 40, 4, 16] (stride (40, 1, 12800, 800)) dim = 3 1.909 -> 1.911 ( +0.10%) [ +0.00% +0.16% +0.00% / +0.10% +0.31% +0.26%] index_select const : Elapsed 0.019 ms (1.909 ms / 100) 1.983 -> 1.990 ( +0.35%) [ +0.25% +0.20% +0.00% / +0.35% +0.81% +0.66%] index_select wrap : Elapsed 0.020 ms (1.988 ms / 100) 1.997 -> 1.996 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.25% +0.40%] index_select linear : Elapsed 0.020 ms (1.998 ms / 100) 1.997 -> 1.993 ( -0.20%) [ +0.00% +0.30% +0.05% / +0.00% -0.20% -0.05%] index_select reverse : Elapsed 0.020 ms (1.997 ms / 100) 1.905 -> 1.908 ( +0.16%) [ +0.10% +0.05% +0.00% / +0.16% +0.79% +0.63%] index_select skip64 : Elapsed 0.019 ms (1.907 ms / 100) 1.909 -> 1.909 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.52% +0.58%] index_select skip256 : Elapsed 0.019 ms (1.909 ms / 100) 1.991 -> 1.995 ( +0.20%) [ +0.00% +0.05% +0.20% / +0.20% +0.80% +0.80%] index_select spread : Elapsed 0.020 ms (1.991 ms / 100) 1.989 -> 1.991 ( +0.10%) [ +0.10% +0.00% +0.15% / +0.10% +0.60% +0.85%] index_select strided 3 : Elapsed 0.020 ms (1.991 ms / 100) 1.991 -> 1.992 ( +0.05%) [ +0.30% +0.05% +0.00% / +0.05% +0.60% +0.40%] index_select strided 5 : Elapsed 0.020 ms (1.997 ms / 100) 1.975 -> 1.978 ( +0.15%) [ +0.00% +0.10% +0.15% / +0.15% +1.22% +1.27%] index_select strided 7 : Elapsed 0.020 ms (1.975 ms / 100) 1.919 -> 1.923 ( +0.21%) [ +0.00% +0.21% +0.10% / +0.21% +0.94% +0.89%] index_select strided 8 : Elapsed 0.019 ms (1.919 ms / 100) 1.959 -> 1.961 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.51% +0.56%] index_select random : Elapsed 0.020 ms (1.961 ms / 100) 1.957 -> 1.956 ( -0.05%) [ +0.31% +0.10% +0.00% / +0.00% -0.05% +0.10%] index_select random_sorted : Elapsed 0.020 ms (1.963 ms / 100) 1.990 -> 1.994 ( +0.20%) [ +0.25% +0.00% +0.00% / +0.20% +0.35% +0.65%] index_select perm : Elapsed 0.020 ms (1.995 ms / 100) 1.989 -> 1.989 ( +0.00%) [ +0.20% +0.15% +0.00% / +0.20% +0.15% +0.00%] index_select perm_sorted : Elapsed 0.020 ms (1.993 ms / 100) out_shape = [5, 40, 16, 4] in_shape = [20, 40, 16, 4] idx_dim = 0 B = [5, 40, 16, 4] (stride (2560, 64, 1, 16)) dim = 0 fill_cnt = 20 3.509 -> 3.486 ( -0.66%) [ +0.00% +0.11% +0.09% / -0.66% -0.43% -0.46%] index_fill_ const : Elapsed 0.035 ms (3.509 ms / 100) 3.517 -> 3.504 ( -0.37%) [ +0.00% +0.09% +0.23% / -0.34% -0.37% -0.26%] index_fill_ linear : Elapsed 0.035 ms (3.517 ms / 100) 3.506 -> 3.489 ( -0.48%) [ +0.23% +0.20% +0.00% / -0.48% -0.43% -0.26%] index_fill_ reverse : Elapsed 0.035 ms (3.514 ms / 100) 3.519 -> 3.500 ( -0.54%) [ +0.00% +0.00% +0.11% / -0.54% -0.51% -0.54%] index_fill_ skip64 : Elapsed 0.035 ms (3.519 ms / 100) 3.515 -> 3.494 ( -0.60%) [ +0.14% +0.00% +0.09% / -0.60% -0.46% -0.26%] index_fill_ skip256 : Elapsed 0.035 ms (3.520 ms / 100) 3.512 -> 3.491 ( -0.60%) [ +0.00% +0.09% +0.03% / -0.54% -0.60% -0.34%] index_fill_ spread : Elapsed 0.035 ms (3.512 ms / 100) 3.516 -> 3.500 ( -0.46%) [ +0.00% +0.11% +0.17% / -0.46% -0.40% -0.43%] index_fill_ strided 3 : Elapsed 0.035 ms (3.516 ms / 100) 3.525 -> 3.498 ( -0.77%) [ +0.06% +0.00% +0.17% / -0.77% -0.48% -0.43%] index_fill_ random : Elapsed 0.035 ms (3.527 ms / 100) 3.516 -> 3.501 ( -0.43%) [ +0.20% +0.11% +0.00% / -0.43% -0.31% -0.31%] index_fill_ random_sorted : Elapsed 0.035 ms (3.523 ms / 100) B = [5, 40, 16, 4] (stride (2560, 16, 1, 640)) A = [20, 40, 16, 4] (stride (1, 20, 3200, 800)) dim = 0 0.692 -> 0.693 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.87% +1.45%] index_select const : Elapsed 0.007 ms (0.693 ms / 100) 0.700 -> 0.693 ( -1.00%) [ +0.14% +0.00% +0.00% / +0.00% -1.00% -0.86%] index_select wrap : Elapsed 0.007 ms (0.701 ms / 100) 0.700 -> 0.693 ( -1.00%) [ +0.14% +0.00% +0.00% / -0.14% -1.00% -0.86%] index_select linear : Elapsed 0.007 ms (0.701 ms / 100) 0.692 -> 0.693 ( +0.14%) [ +0.00% +0.29% +0.00% / +0.14% +0.43% +0.58%] index_select reverse : Elapsed 0.007 ms (0.692 ms / 100) 0.692 -> 0.692 ( +0.00%) [ +0.14% +0.29% +0.00% / +0.00% +0.43% +0.58%] index_select skip64 : Elapsed 0.007 ms (0.693 ms / 100) 0.693 -> 0.692 ( -0.14%) [ +0.00% +0.14% +0.00% / -0.14% +1.15% +1.15%] index_select skip256 : Elapsed 0.007 ms (0.693 ms / 100) 0.691 -> 0.693 ( +0.29%) [ +0.00% +0.00% +0.14% / +0.29% +1.16% +1.45%] index_select spread : Elapsed 0.007 ms (0.691 ms / 100) 0.693 -> 0.692 ( -0.14%) [ +0.14% +0.00% +0.00% / -0.14% +0.43% +0.43%] index_select strided 3 : Elapsed 0.007 ms (0.694 ms / 100) 0.692 -> 0.692 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.43% +1.01%] index_select strided 5 : Elapsed 0.007 ms (0.693 ms / 100) 0.692 -> 0.691 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.43% +0.14%] index_select strided 7 : Elapsed 0.007 ms (0.692 ms / 100) 0.690 -> 0.692 ( +0.29%) [ +0.14% +0.29% +0.00% / +0.29% +0.43% +0.58%] index_select strided 8 : Elapsed 0.007 ms (0.691 ms / 100) 0.696 -> 0.691 ( -0.72%) [ +0.14% +0.29% +0.00% / +0.29% -0.72% -0.72%] index_select strided 16 : Elapsed 0.007 ms (0.697 ms / 100) 0.696 -> 0.693 ( -0.43%) [ +0.29% +0.43% +0.00% / +0.14% -0.43% +2.59%] index_select random : Elapsed 0.007 ms (0.698 ms / 100) 0.691 -> 0.693 ( +0.29%) [ +0.00% +0.29% +0.14% / +0.29% +0.58% +0.58%] index_select random_sorted : Elapsed 0.007 ms (0.691 ms / 100) 0.689 -> 0.690 ( +0.15%) [ +0.15% +0.44% +0.00% / +0.15% +0.73% +0.87%] index_select perm : Elapsed 0.007 ms (0.690 ms / 100) 0.690 -> 0.691 ( +0.14%) [ +0.00% +0.29% +0.14% / +0.14% +1.16% +1.30%] index_select perm_sorted : Elapsed 0.007 ms (0.690 ms / 100) B = [5, 40, 16, 4] (stride (2560, 16, 1, 640)) A = [20, 40, 16, 4] (stride (40, 1, 800, 12800)) dim = 0 1.796 -> 1.794 ( -0.11%) [ +0.00% +0.17% +0.00% / +0.11% -0.11% -0.11%] index_select const : Elapsed 0.018 ms (1.796 ms / 100) 1.776 -> 1.776 ( +0.00%) [ +0.11% +0.17% +0.00% / +0.00% +0.34% +0.56%] index_select wrap : Elapsed 0.018 ms (1.778 ms / 100) 1.790 -> 1.793 ( +0.17%) [ +0.17% +0.22% +0.00% / +0.17% +0.73% +0.50%] index_select linear : Elapsed 0.018 ms (1.793 ms / 100) 1.782 -> 1.785 ( +0.17%) [ +0.00% +0.34% +0.11% / +0.17% +0.51% +0.51%] index_select reverse : Elapsed 0.018 ms (1.782 ms / 100) 1.788 -> 1.787 ( -0.06%) [ +0.17% +0.17% +0.00% / -0.06% +0.06% +0.17%] index_select skip64 : Elapsed 0.018 ms (1.791 ms / 100) 1.782 -> 1.780 ( -0.11%) [ +0.17% +0.22% +0.00% / -0.11% +0.28% +0.67%] index_select skip256 : Elapsed 0.018 ms (1.785 ms / 100) 1.791 -> 1.794 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.39% +0.45%] index_select spread : Elapsed 0.018 ms (1.794 ms / 100) 1.789 -> 1.789 ( +0.00%) [ +0.06% +0.11% +0.00% / +0.00% +0.45% +0.67%] index_select strided 3 : Elapsed 0.018 ms (1.790 ms / 100) 1.788 -> 1.788 ( +0.00%) [ +0.28% +0.17% +0.00% / +0.00% +1.01% +1.17%] index_select strided 5 : Elapsed 0.018 ms (1.793 ms / 100) 1.786 -> 1.789 ( +0.17%) [ +0.11% +0.06% +0.00% / +0.17% +0.45% +0.73%] index_select strided 7 : Elapsed 0.018 ms (1.788 ms / 100) 1.798 -> 1.799 ( +0.06%) [ +0.00% +0.17% +0.06% / +0.06% +0.33% +0.28%] index_select strided 8 : Elapsed 0.018 ms (1.798 ms / 100) 1.796 -> 1.795 ( -0.06%) [ +0.17% +0.00% +0.00% / -0.06% +0.45% +0.45%] index_select strided 16 : Elapsed 0.018 ms (1.799 ms / 100) 1.796 -> 1.796 ( +0.00%) [ +0.06% +0.45% +0.00% / +0.00% +0.06% +1.34%] index_select random : Elapsed 0.018 ms (1.797 ms / 100) 1.796 -> 1.796 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.33% +0.28%] index_select random_sorted : Elapsed 0.018 ms (1.796 ms / 100) 1.788 -> 1.788 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.56% +0.50%] index_select perm : Elapsed 0.018 ms (1.791 ms / 100) 1.788 -> 1.790 ( +0.11%) [ +0.22% +0.17% +0.00% / +0.11% +0.95% +0.78%] index_select perm_sorted : Elapsed 0.018 ms (1.792 ms / 100) out_shape = [20, 5, 16, 4] in_shape = [20, 40, 16, 4] idx_dim = 1 B = [20, 5, 16, 4] (stride (320, 64, 1, 16)) A = [20, 40, 16, 4] (stride (64, 1280, 4, 1)) dim = 1 1.377 -> 1.376 ( -0.07%) [ +0.15% +0.07% +0.00% / -0.07% +0.44% +0.51%] index_select const : Elapsed 0.014 ms (1.379 ms / 100) 1.376 -> 1.379 ( +0.22%) [ +0.22% +0.15% +0.00% / +0.22% +0.58% +0.58%] index_select wrap : Elapsed 0.014 ms (1.379 ms / 100) 1.376 -> 1.384 ( +0.58%) [ +0.07% +0.07% +0.00% / +0.80% +0.58% +0.65%] index_select linear : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.58% +0.51%] index_select reverse : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.58% +0.80%] index_select skip64 : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.58% +0.51%] index_select skip256 : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.379 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.58% +0.58%] index_select spread : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.382 ( +0.36%) [ +0.07% +0.07% +0.00% / +0.36% +0.87% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.44% +0.51%] index_select strided 5 : Elapsed 0.014 ms (1.379 ms / 100) 1.376 -> 1.379 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.65% +0.65%] index_select strided 7 : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.58% +0.58%] index_select strided 8 : Elapsed 0.014 ms (1.377 ms / 100) 1.374 -> 1.377 ( +0.22%) [ +0.15% +0.22% +0.00% / +0.22% +0.80% +0.73%] index_select strided 16 : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.375 ( -0.07%) [ +0.07% +0.15% +0.00% / -0.07% +0.58% +0.73%] index_select random : Elapsed 0.014 ms (1.377 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.73% +0.73%] index_select random_sorted : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.378 ( +0.22%) [ +0.07% +0.15% +0.00% / +0.22% +0.65% +0.65%] index_select perm : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.378 ( +0.29%) [ +0.07% +0.15% +0.00% / +0.29% +0.80% +0.73%] index_select perm_sorted : Elapsed 0.014 ms (1.375 ms / 100) B = [20, 5, 16, 4] (stride (320, 4, 20, 1)) A = [20, 40, 16, 4] (stride (2560, 64, 4, 1)) dim = 1 1.375 -> 1.377 ( +0.15%) [ +0.36% +0.22% +0.00% / +0.15% +0.65% +0.51%] index_select const : Elapsed 0.014 ms (1.380 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.73% +0.65%] index_select wrap : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.65% +0.65%] index_select linear : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.65% +0.65%] index_select reverse : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.29% +0.07% +0.00% / +0.07% +0.66% +0.73%] index_select skip64 : Elapsed 0.014 ms (1.378 ms / 100) 1.375 -> 1.382 ( +0.51%) [ +0.00% +0.00% +0.00% / +0.51% +0.58% +0.58%] index_select skip256 : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.65% +0.73%] index_select spread : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.65% +0.65%] index_select strided 3 : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.73% +0.73%] index_select strided 5 : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.22% +0.00% +0.07% / +0.07% +0.80% +0.66%] index_select strided 7 : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.65% +0.58%] index_select strided 8 : Elapsed 0.014 ms (1.375 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.22% +0.22% +0.00% / +0.07% +0.87% +0.80%] index_select strided 16 : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.73% +0.66%] index_select random : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.65% +0.65%] index_select random_sorted : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.65% +0.58%] index_select perm : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.374 ( -0.07%) [ +0.15% +0.00% +0.00% / -0.07% +0.73% +0.65%] index_select perm_sorted : Elapsed 0.014 ms (1.377 ms / 100) B = [20, 5, 16, 4] (stride (64, 1280, 4, 1)) A = [20, 40, 16, 4] (stride (2560, 16, 1, 640)) dim = 1 1.395 -> 1.397 ( +0.14%) [ +0.00% +0.22% +0.07% / +0.14% +0.43% +0.72%] index_select const : Elapsed 0.014 ms (1.395 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.22% +0.14%] index_select wrap : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.385 ( +0.22%) [ +0.14% +0.14% +0.00% / +0.29% +0.22% +0.29%] index_select linear : Elapsed 0.014 ms (1.384 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +0.22% +0.14%] index_select reverse : Elapsed 0.014 ms (1.387 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.22% +0.00% +0.00% / +0.22% +0.29% +0.36%] index_select skip64 : Elapsed 0.014 ms (1.384 ms / 100) 1.394 -> 1.394 ( +0.00%) [ +0.00% +0.22% +0.22% / +0.00% +0.79% +0.79%] index_select skip256 : Elapsed 0.014 ms (1.394 ms / 100) 1.381 -> 1.382 ( +0.07%) [ +0.22% +0.00% +0.00% / +0.07% +0.36% +0.36%] index_select spread : Elapsed 0.014 ms (1.384 ms / 100) 1.383 -> 1.383 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.29% +0.22%] index_select strided 3 : Elapsed 0.014 ms (1.385 ms / 100) 1.383 -> 1.385 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.29% +0.36%] index_select strided 5 : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.36% +0.14% +0.00% / +0.07% +0.51% +0.43%] index_select strided 7 : Elapsed 0.014 ms (1.385 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.22% +0.22%] index_select strided 8 : Elapsed 0.014 ms (1.384 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.29% +0.22%] index_select strided 16 : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.22% +0.22% +0.00% / +0.00% +0.43% +0.43%] index_select random : Elapsed 0.014 ms (1.384 ms / 100) 1.379 -> 1.384 ( +0.36%) [ +0.29% +0.22% +0.00% / +0.36% +0.58% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.383 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.36% +0.51%] index_select perm : Elapsed 0.014 ms (1.383 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.36% +0.00% +0.07% / +0.15% +0.65% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.384 ms / 100) B = [20, 5, 16, 4] (stride (1, 1280, 80, 20)) A = [20, 40, 16, 4] (stride (1, 1280, 20, 320)) dim = 1 1.459 -> 1.459 ( +0.00%) [ +0.21% +0.07% +0.00% / +0.00% +0.55% +0.89%] index_select const : Elapsed 0.015 ms (1.462 ms / 100) 1.451 -> 1.452 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.55% +0.55%] index_select wrap : Elapsed 0.015 ms (1.451 ms / 100) 1.452 -> 1.456 ( +0.28%) [ +0.14% +0.14% +0.00% / +0.28% +0.62% +0.48%] index_select linear : Elapsed 0.015 ms (1.454 ms / 100) 1.456 -> 1.459 ( +0.21%) [ +0.00% +0.07% +0.00% / +0.21% +0.76% +0.76%] index_select reverse : Elapsed 0.015 ms (1.456 ms / 100) 1.449 -> 1.450 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.69% +0.69%] index_select skip64 : Elapsed 0.015 ms (1.450 ms / 100) 1.458 -> 1.459 ( +0.07%) [ +0.21% +0.00% +0.00% / +0.07% +0.75% +0.69%] index_select skip256 : Elapsed 0.015 ms (1.461 ms / 100) 1.450 -> 1.449 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.76% +0.48%] index_select spread : Elapsed 0.015 ms (1.451 ms / 100) 1.447 -> 1.449 ( +0.14%) [ +0.21% +0.28% +0.00% / +0.14% +0.69% +0.83%] index_select strided 3 : Elapsed 0.014 ms (1.450 ms / 100) 1.456 -> 1.457 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.82% +0.82%] index_select strided 5 : Elapsed 0.015 ms (1.458 ms / 100) 1.449 -> 1.449 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.62% +0.62%] index_select strided 7 : Elapsed 0.015 ms (1.450 ms / 100) 1.459 -> 1.461 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.69% +0.55%] index_select strided 8 : Elapsed 0.015 ms (1.460 ms / 100) 1.449 -> 1.450 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.62% +0.55%] index_select strided 16 : Elapsed 0.014 ms (1.449 ms / 100) 1.458 -> 1.460 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.69% +0.69%] index_select random : Elapsed 0.015 ms (1.460 ms / 100) 1.451 -> 1.451 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.62% +0.62%] index_select random_sorted : Elapsed 0.015 ms (1.451 ms / 100) 1.451 -> 1.454 ( +0.21%) [ +0.14% +0.07% +0.00% / +0.21% +0.96% +0.83%] index_select perm : Elapsed 0.015 ms (1.453 ms / 100) 1.459 -> 1.459 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.62% +0.69%] index_select perm_sorted : Elapsed 0.015 ms (1.460 ms / 100) B = [20, 5, 16, 4] (stride (1, 1280, 80, 20)) A = [20, 40, 16, 4] (stride (1, 20, 3200, 800)) dim = 1 0.670 -> 0.669 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.30% +0.30%] index_select const : Elapsed 0.007 ms (0.670 ms / 100) 0.663 -> 0.663 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.00% +0.30%] index_select wrap : Elapsed 0.007 ms (0.663 ms / 100) 0.666 -> 0.666 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.45% +0.15%] index_select linear : Elapsed 0.007 ms (0.667 ms / 100) 0.666 -> 0.666 ( +0.00%) [ +0.45% +0.30% +0.00% / +0.00% +0.15% +0.60%] index_select reverse : Elapsed 0.007 ms (0.669 ms / 100) 0.670 -> 0.670 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.60% +0.60%] index_select skip64 : Elapsed 0.007 ms (0.670 ms / 100) 0.676 -> 0.676 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.15% +0.00% +0.30%] index_select skip256 : Elapsed 0.007 ms (0.676 ms / 100) 0.674 -> 0.676 ( +0.30%) [ +0.45% +0.59% +0.00% / +0.30% +0.45% +0.30%] index_select spread : Elapsed 0.007 ms (0.677 ms / 100) 0.665 -> 0.667 ( +0.30%) [ +0.30% +0.45% +0.00% / +0.45% +0.45% +0.30%] index_select strided 3 : Elapsed 0.007 ms (0.667 ms / 100) 0.668 -> 0.666 ( -0.30%) [ +0.00% +0.00% +0.00% / -0.15% -0.30% -0.15%] index_select strided 5 : Elapsed 0.007 ms (0.668 ms / 100) 0.660 -> 0.662 ( +0.30%) [ +0.45% +0.45% +0.00% / +0.30% +0.61% +0.61%] index_select strided 7 : Elapsed 0.007 ms (0.663 ms / 100) 0.673 -> 0.673 ( +0.00%) [ +0.30% +0.15% +0.00% / +0.00% +0.30% +0.30%] index_select strided 8 : Elapsed 0.007 ms (0.675 ms / 100) 0.675 -> 0.675 ( +0.00%) [ +1.63% +0.00% +0.00% / +0.00% +0.89% +0.59%] index_select strided 16 : Elapsed 0.007 ms (0.686 ms / 100) 0.672 -> 0.672 ( +0.00%) [ +0.00% +0.30% +0.00% / +0.00% +0.74% +0.45%] index_select random : Elapsed 0.007 ms (0.672 ms / 100) 0.672 -> 0.673 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.74% +0.74%] index_select random_sorted : Elapsed 0.007 ms (0.673 ms / 100) 0.667 -> 0.668 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.75% +0.60%] index_select perm : Elapsed 0.007 ms (0.667 ms / 100) 0.666 -> 0.668 ( +0.30%) [ +0.45% +0.15% +0.00% / +0.30% +0.90% +1.05%] index_select perm_sorted : Elapsed 0.007 ms (0.669 ms / 100) B = [20, 5, 16, 4] (stride (1, 1280, 20, 320)) A = [20, 40, 16, 4] (stride (2560, 64, 4, 1)) dim = 1 1.376 -> 1.376 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.58%] index_select const : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.65% +0.58%] index_select wrap : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.73% +0.80%] index_select linear : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.65% +0.73%] index_select reverse : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.377 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.73% +0.66%] index_select skip64 : Elapsed 0.014 ms (1.375 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.73%] index_select skip256 : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.73% +0.73%] index_select spread : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.73% +0.80%] index_select strided 3 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.65% +0.65%] index_select strided 5 : Elapsed 0.014 ms (1.377 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.73% +0.80%] index_select strided 7 : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.65% +0.73%] index_select strided 8 : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.15% +0.22% +0.00% / +0.15% +1.53% +0.73%] index_select strided 16 : Elapsed 0.014 ms (1.376 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +1.16% +0.80%] index_select random : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.65% +0.65%] index_select random_sorted : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.379 ( +0.29%) [ +0.29% +0.07% +0.00% / +0.29% +0.65% +0.65%] index_select perm : Elapsed 0.014 ms (1.379 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.65% +0.65%] index_select perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) B = [20, 5, 16, 4] (stride (80, 16, 1, 1600)) A = [20, 40, 16, 4] (stride (1, 80, 3200, 20)) dim = 1 1.573 -> 1.573 ( +0.00%) [ +0.00% +0.32% +0.00% / +0.13% +0.45% +0.00%] index_select const : Elapsed 0.016 ms (1.573 ms / 100) 1.573 -> 1.571 ( -0.13%) [ +0.13% +0.32% +0.00% / -0.13% +0.19% +0.13%] index_select wrap : Elapsed 0.016 ms (1.575 ms / 100) 1.575 -> 1.574 ( -0.06%) [ +0.38% +0.00% +0.00% / +0.06% +0.19% -0.06%] index_select linear : Elapsed 0.016 ms (1.581 ms / 100) 1.574 -> 1.579 ( +0.32%) [ +0.25% +0.06% +0.00% / +0.51% +0.32% +0.38%] index_select reverse : Elapsed 0.016 ms (1.578 ms / 100) 1.575 -> 1.575 ( +0.00%) [ +0.00% +0.19% +0.32% / +0.00% +0.13% +0.19%] index_select skip64 : Elapsed 0.016 ms (1.575 ms / 100) 1.573 -> 1.573 ( +0.00%) [ +0.06% +0.00% +0.32% / +0.38% +0.00% +0.32%] index_select skip256 : Elapsed 0.016 ms (1.574 ms / 100) 1.574 -> 1.573 ( -0.06%) [ +0.13% +0.38% +0.00% / -0.06% +0.38% +0.32%] index_select spread : Elapsed 0.016 ms (1.576 ms / 100) 1.575 -> 1.575 ( +0.00%) [ +0.00% +0.13% +0.06% / +0.25% +0.13% +0.00%] index_select strided 3 : Elapsed 0.016 ms (1.575 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.13% +0.00% +0.25% / +0.25% +0.13% +0.19%] index_select strided 5 : Elapsed 0.016 ms (1.575 ms / 100) 1.572 -> 1.575 ( +0.19%) [ +0.38% +0.00% +0.51% / +0.19% +0.45% +0.89%] index_select strided 7 : Elapsed 0.016 ms (1.578 ms / 100) 1.565 -> 1.574 ( +0.58%) [ +0.77% +0.64% +0.00% / +0.77% +0.58% +0.89%] index_select strided 8 : Elapsed 0.016 ms (1.577 ms / 100) 1.574 -> 1.573 ( -0.06%) [ +0.00% +0.06% +0.19% / -0.06% +0.00% +0.38%] index_select strided 16 : Elapsed 0.016 ms (1.574 ms / 100) 1.573 -> 1.574 ( +0.06%) [ +0.38% +0.19% +0.00% / +0.13% +0.06% +0.57%] index_select random : Elapsed 0.016 ms (1.579 ms / 100) 1.575 -> 1.573 ( -0.13%) [ +0.32% +0.25% +0.00% / -0.13% +0.32% -0.13%] index_select random_sorted : Elapsed 0.016 ms (1.580 ms / 100) 1.578 -> 1.574 ( -0.25%) [ +0.00% +0.00% +0.06% / -0.25% +0.06% -0.19%] index_select perm : Elapsed 0.016 ms (1.578 ms / 100) 1.574 -> 1.573 ( -0.06%) [ +0.32% +0.00% +0.32% / +0.00% +0.00% -0.06%] index_select perm_sorted : Elapsed 0.016 ms (1.579 ms / 100) B = [20, 5, 16, 4] (stride (80, 1, 5, 1600)) A = [20, 40, 16, 4] (stride (40, 1, 800, 12800)) dim = 1 1.615 -> 1.618 ( +0.19%) [ +0.00% +0.06% +0.00% / +0.19% +0.62% +0.74%] index_select const : Elapsed 0.016 ms (1.615 ms / 100) 1.612 -> 1.613 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.74% +0.56%] index_select wrap : Elapsed 0.016 ms (1.612 ms / 100) 1.613 -> 1.616 ( +0.19%) [ +0.25% +0.00% +0.06% / +0.19% +0.68% +0.68%] index_select linear : Elapsed 0.016 ms (1.617 ms / 100) 1.612 -> 1.615 ( +0.19%) [ +0.12% +0.00% +0.06% / +0.19% +0.87% +0.81%] index_select reverse : Elapsed 0.016 ms (1.614 ms / 100) 1.613 -> 1.616 ( +0.19%) [ +0.06% +0.06% +0.00% / +0.19% +0.68% +0.62%] index_select skip64 : Elapsed 0.016 ms (1.614 ms / 100) 1.614 -> 1.616 ( +0.12%) [ +0.06% +0.00% +0.06% / +0.12% +0.62% +0.62%] index_select skip256 : Elapsed 0.016 ms (1.615 ms / 100) 1.608 -> 1.610 ( +0.12%) [ +0.19% +0.00% +0.00% / +0.12% +0.68% +0.75%] index_select spread : Elapsed 0.016 ms (1.611 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.68% +0.68%] index_select strided 3 : Elapsed 0.016 ms (1.612 ms / 100) 1.608 -> 1.610 ( +0.12%) [ +0.19% +0.19% +0.00% / +0.12% +0.75% +0.68%] index_select strided 5 : Elapsed 0.016 ms (1.611 ms / 100) 1.611 -> 1.612 ( +0.06%) [ +0.12% +0.19% +0.00% / +0.06% +0.81% +0.74%] index_select strided 7 : Elapsed 0.016 ms (1.613 ms / 100) 1.611 -> 1.609 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.68% +0.74%] index_select strided 8 : Elapsed 0.016 ms (1.611 ms / 100) 1.611 -> 1.613 ( +0.12%) [ +0.19% +0.00% +0.12% / +0.12% +0.68% +0.74%] index_select strided 16 : Elapsed 0.016 ms (1.614 ms / 100) 1.610 -> 1.611 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.62% +0.56%] index_select random : Elapsed 0.016 ms (1.610 ms / 100) 1.606 -> 1.609 ( +0.19%) [ +0.12% +0.00% +0.06% / +0.19% +0.93% +0.81%] index_select random_sorted : Elapsed 0.016 ms (1.608 ms / 100) 1.612 -> 1.612 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.56% +0.62%] index_select perm : Elapsed 0.016 ms (1.612 ms / 100) 1.607 -> 1.608 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.62% +0.75%] index_select perm_sorted : Elapsed 0.016 ms (1.608 ms / 100) B = [20, 5, 16, 4] (stride (1, 20, 100, 1600)) dim = 1 fill_cnt = 40 2.484 -> 2.479 ( -0.20%) [ +0.04% +0.04% +0.00% / -0.20% +0.36% +0.28%] index_fill_ const : Elapsed 0.025 ms (2.485 ms / 100) 2.488 -> 2.483 ( -0.20%) [ +0.00% +0.04% +0.00% / -0.20% +0.28% +0.36%] index_fill_ linear : Elapsed 0.025 ms (2.488 ms / 100) 2.480 -> 2.476 ( -0.16%) [ +0.08% +0.24% +0.00% / -0.16% +0.44% +0.69%] index_fill_ reverse : Elapsed 0.025 ms (2.482 ms / 100) 2.481 -> 2.480 ( -0.04%) [ +0.20% +0.00% +0.24% / -0.04% +0.48% +0.44%] index_fill_ skip64 : Elapsed 0.025 ms (2.486 ms / 100) 2.482 -> 2.479 ( -0.12%) [ +0.16% +0.16% +0.00% / -0.12% +0.40% +0.24%] index_fill_ skip256 : Elapsed 0.025 ms (2.486 ms / 100) 2.480 -> 2.482 ( +0.08%) [ +0.08% +0.28% +0.00% / +0.08% +0.52% +0.44%] index_fill_ spread : Elapsed 0.025 ms (2.482 ms / 100) 2.487 -> 2.484 ( -0.12%) [ +0.00% +0.16% +0.00% / -0.12% +0.44% +0.40%] index_fill_ strided 3 : Elapsed 0.025 ms (2.487 ms / 100) 2.486 -> 2.480 ( -0.24%) [ +0.16% +0.20% +0.00% / -0.24% +0.32% +0.32%] index_fill_ random : Elapsed 0.025 ms (2.490 ms / 100) 2.481 -> 2.484 ( +0.12%) [ +0.08% +0.20% +0.00% / +0.12% +0.36% +0.48%] index_fill_ random_sorted : Elapsed 0.025 ms (2.483 ms / 100) out_shape = [20, 40, 5, 4] in_shape = [20, 40, 16, 4] idx_dim = 2 B = [20, 40, 5, 4] (stride (800, 4, 160, 1)) A = [20, 40, 16, 4] (stride (2560, 4, 160, 1)) dim = 2 2.218 -> 2.222 ( +0.18%) [ +0.09% +0.36% +0.00% / +0.18% +0.63% +0.50%] index_select const : Elapsed 0.022 ms (2.220 ms / 100) 2.270 -> 2.270 ( +0.00%) [ +0.22% +0.09% +0.00% / +0.00% +0.75% +1.06%] index_select wrap : Elapsed 0.023 ms (2.275 ms / 100) 2.280 -> 2.285 ( +0.22%) [ +0.00% +0.26% +0.18% / +0.22% +1.05% +0.92%] index_select linear : Elapsed 0.023 ms (2.280 ms / 100) 2.272 -> 2.268 ( -0.18%) [ +0.00% +0.18% +0.00% / -0.18% +1.32% +1.14%] index_select reverse : Elapsed 0.023 ms (2.272 ms / 100) 2.217 -> 2.226 ( +0.41%) [ +0.23% +0.00% +0.18% / +0.41% +0.41% +0.50%] index_select skip64 : Elapsed 0.022 ms (2.222 ms / 100) 2.215 -> 2.219 ( +0.18%) [ +0.41% +0.00% +0.09% / +0.18% +0.81% +0.86%] index_select skip256 : Elapsed 0.022 ms (2.224 ms / 100) 2.269 -> 2.274 ( +0.22%) [ +0.00% +0.09% +0.09% / +0.22% +1.10% +1.19%] index_select spread : Elapsed 0.023 ms (2.269 ms / 100) 2.283 -> 2.283 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.48% +0.39%] index_select strided 3 : Elapsed 0.023 ms (2.286 ms / 100) 2.285 -> 2.290 ( +0.22%) [ +0.22% +0.00% +0.13% / +0.22% +0.79% +0.61%] index_select strided 5 : Elapsed 0.023 ms (2.290 ms / 100) 2.263 -> 2.264 ( +0.04%) [ +0.00% +0.04% +0.31% / +0.04% +1.94% +1.41%] index_select strided 7 : Elapsed 0.023 ms (2.263 ms / 100) 2.228 -> 2.230 ( +0.09%) [ +0.27% +0.27% +0.00% / +0.09% +0.54% +0.49%] index_select strided 8 : Elapsed 0.022 ms (2.234 ms / 100) 2.269 -> 2.271 ( +0.09%) [ +0.09% +0.22% +0.00% / +0.09% +0.88% +0.88%] index_select random : Elapsed 0.023 ms (2.271 ms / 100) 2.267 -> 2.265 ( -0.09%) [ +0.04% +0.00% +0.00% / -0.09% +1.32% +1.50%] index_select random_sorted : Elapsed 0.023 ms (2.268 ms / 100) 2.274 -> 2.274 ( +0.00%) [ +0.26% +0.00% +0.26% / +0.00% +0.75% +0.97%] index_select perm : Elapsed 0.023 ms (2.280 ms / 100) 2.262 -> 2.269 ( +0.31%) [ +0.18% +0.62% +0.00% / +0.31% +1.02% +1.11%] index_select perm_sorted : Elapsed 0.023 ms (2.266 ms / 100) B = [20, 40, 5, 4] (stride (800, 4, 160, 1)) A = [20, 40, 16, 4] (stride (40, 1, 3200, 800)) dim = 2 2.162 -> 2.151 ( -0.51%) [ +0.00% +0.00% +0.05% / +0.23% -0.37% -0.51%] index_select const : Elapsed 0.022 ms (2.162 ms / 100) 2.227 -> 2.223 ( -0.18%) [ +0.04% +0.04% +0.00% / -0.18% -0.09% +0.09%] index_select wrap : Elapsed 0.022 ms (2.228 ms / 100) 2.219 -> 2.224 ( +0.23%) [ +0.18% +0.00% +0.09% / +0.23% +0.32% +0.27%] index_select linear : Elapsed 0.022 ms (2.223 ms / 100) 2.220 -> 2.212 ( -0.36%) [ +0.32% +0.00% +0.27% / +0.36% -0.32% -0.36%] index_select reverse : Elapsed 0.022 ms (2.227 ms / 100) 2.152 -> 2.151 ( -0.05%) [ +0.23% +0.33% +0.00% / +0.14% -0.05% -0.05%] index_select skip64 : Elapsed 0.022 ms (2.157 ms / 100) 2.161 -> 2.155 ( -0.28%) [ +0.09% +0.00% +0.00% / +0.00% -0.28% -0.23%] index_select skip256 : Elapsed 0.022 ms (2.163 ms / 100) 2.220 -> 2.223 ( +0.14%) [ +0.23% +0.18% +0.00% / +0.36% +0.23% +0.14%] index_select spread : Elapsed 0.022 ms (2.225 ms / 100) 2.224 -> 2.229 ( +0.22%) [ +0.00% +0.18% +0.09% / +0.22% +0.27% +0.22%] index_select strided 3 : Elapsed 0.022 ms (2.224 ms / 100) 2.228 -> 2.230 ( +0.09%) [ +0.00% +0.13% +0.00% / +0.13% +0.09% +0.18%] index_select strided 5 : Elapsed 0.022 ms (2.228 ms / 100) 2.228 -> 2.225 ( -0.13%) [ +0.04% +0.13% +0.00% / +0.13% -0.13% -0.09%] index_select strided 7 : Elapsed 0.022 ms (2.229 ms / 100) 2.170 -> 2.165 ( -0.23%) [ +0.09% +0.05% +0.00% / -0.09% -0.05% -0.23%] index_select strided 8 : Elapsed 0.022 ms (2.172 ms / 100) 2.195 -> 2.195 ( +0.00%) [ +0.18% +0.00% +0.05% / +0.14% +0.00% +0.09%] index_select random : Elapsed 0.022 ms (2.199 ms / 100) 2.210 -> 2.183 ( -1.22%) [ +0.23% +0.00% +0.00% / +0.09% -1.18% -1.22%] index_select random_sorted : Elapsed 0.022 ms (2.215 ms / 100) 2.227 -> 2.215 ( -0.54%) [ +0.22% +0.13% +0.00% / +0.22% -0.54% -0.31%] index_select perm : Elapsed 0.022 ms (2.232 ms / 100) 2.234 -> 2.225 ( -0.40%) [ +0.13% +0.18% +0.00% / +0.13% -0.36% -0.40%] index_select perm_sorted : Elapsed 0.022 ms (2.237 ms / 100) B = [20, 40, 5, 4] (stride (800, 5, 1, 200)) A = [20, 40, 16, 4] (stride (640, 1, 40, 12800)) dim = 2 2.270 -> 2.273 ( +0.13%) [ +0.13% +0.00% +0.31% / +0.13% +0.66% +0.53%] index_select const : Elapsed 0.023 ms (2.273 ms / 100) 2.332 -> 2.332 ( +0.00%) [ +0.00% +0.21% +0.09% / +0.00% +0.43% +0.51%] index_select wrap : Elapsed 0.023 ms (2.332 ms / 100) 2.342 -> 2.340 ( -0.09%) [ +0.00% +0.17% +0.00% / -0.09% +0.38% +0.51%] index_select linear : Elapsed 0.023 ms (2.342 ms / 100) 2.337 -> 2.338 ( +0.04%) [ +0.34% +0.30% +0.00% / +0.04% +0.64% +0.43%] index_select reverse : Elapsed 0.023 ms (2.345 ms / 100) 2.267 -> 2.265 ( -0.09%) [ +0.00% +0.09% +0.18% / -0.09% +0.49% +0.44%] index_select skip64 : Elapsed 0.023 ms (2.267 ms / 100) 2.268 -> 2.271 ( +0.13%) [ +0.00% +0.31% +0.13% / +0.13% +0.53% +0.49%] index_select skip256 : Elapsed 0.023 ms (2.268 ms / 100) 2.337 -> 2.340 ( +0.13%) [ +0.17% +0.00% +0.09% / +0.13% +1.07% +0.68%] index_select spread : Elapsed 0.023 ms (2.341 ms / 100) 2.343 -> 2.343 ( +0.00%) [ +0.00% +0.13% +0.04% / +0.00% +0.68% +0.60%] index_select strided 3 : Elapsed 0.023 ms (2.343 ms / 100) 2.343 -> 2.345 ( +0.09%) [ +0.17% +0.00% +0.21% / +0.09% +0.34% +0.21%] index_select strided 5 : Elapsed 0.023 ms (2.347 ms / 100) 2.341 -> 2.350 ( +0.38%) [ +0.00% +0.17% +0.13% / +0.38% +0.77% +0.68%] index_select strided 7 : Elapsed 0.023 ms (2.341 ms / 100) 2.282 -> 2.280 ( -0.09%) [ +0.00% +0.13% +0.00% / -0.09% +0.35% +0.31%] index_select strided 8 : Elapsed 0.023 ms (2.282 ms / 100) 2.312 -> 2.316 ( +0.17%) [ +0.13% +0.00% +0.04% / +0.17% +0.82% +0.78%] index_select random : Elapsed 0.023 ms (2.315 ms / 100) 2.315 -> 2.317 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.48% +0.48%] index_select random_sorted : Elapsed 0.023 ms (2.317 ms / 100) 2.337 -> 2.339 ( +0.09%) [ +0.00% +0.13% +0.21% / +0.09% +0.68% +0.68%] index_select perm : Elapsed 0.023 ms (2.337 ms / 100) 2.331 -> 2.336 ( +0.21%) [ +0.17% +0.00% +0.17% / +0.21% +0.99% +0.82%] index_select perm_sorted : Elapsed 0.023 ms (2.335 ms / 100) B = [20, 40, 5, 4] (stride (800, 1, 40, 200)) A = [20, 40, 16, 4] (stride (2560, 64, 4, 1)) dim = 2 2.202 -> 2.193 ( -0.41%) [ +0.00% +0.05% +0.05% / +0.00% -0.41% -0.27%] index_select const : Elapsed 0.022 ms (2.202 ms / 100) 2.228 -> 2.225 ( -0.13%) [ +0.00% +0.04% +0.00% / +0.00% -0.13% +0.00%] index_select wrap : Elapsed 0.022 ms (2.228 ms / 100) 2.229 -> 2.221 ( -0.36%) [ +0.00% +0.09% +0.13% / +0.13% -0.36% -0.27%] index_select linear : Elapsed 0.022 ms (2.229 ms / 100) 2.229 -> 2.224 ( -0.22%) [ +0.04% +0.00% +0.00% / +0.00% -0.09% -0.22%] index_select reverse : Elapsed 0.022 ms (2.230 ms / 100) 2.194 -> 2.196 ( +0.09%) [ +0.23% +0.00% +0.14% / +0.23% +0.09% +0.32%] index_select skip64 : Elapsed 0.022 ms (2.199 ms / 100) 2.201 -> 2.191 ( -0.45%) [ +0.18% +0.00% +0.14% / +0.09% -0.45% +0.05%] index_select skip256 : Elapsed 0.022 ms (2.205 ms / 100) 2.271 -> 2.261 ( -0.44%) [ +0.00% +0.00% +0.09% / +0.09% -0.44% -0.31%] index_select spread : Elapsed 0.023 ms (2.271 ms / 100) 2.275 -> 2.262 ( -0.57%) [ +0.13% +0.00% +0.00% / +0.09% -0.31% -0.57%] index_select strided 3 : Elapsed 0.023 ms (2.278 ms / 100) 2.254 -> 2.243 ( -0.49%) [ +0.04% +0.00% +0.00% / +0.09% -0.49% -0.22%] index_select strided 5 : Elapsed 0.023 ms (2.255 ms / 100) 2.271 -> 2.261 ( -0.44%) [ +0.18% +0.13% +0.00% / +0.00% -0.44% -0.44%] index_select strided 7 : Elapsed 0.023 ms (2.275 ms / 100) 2.217 -> 2.208 ( -0.41%) [ +0.00% +0.27% +0.09% / +0.27% -0.41% -0.32%] index_select strided 8 : Elapsed 0.022 ms (2.217 ms / 100) 2.248 -> 2.245 ( -0.13%) [ +0.18% +0.04% +0.00% / +0.13% -0.13% +0.00%] index_select random : Elapsed 0.023 ms (2.252 ms / 100) 2.249 -> 2.239 ( -0.44%) [ +0.13% +0.04% +0.00% / +0.00% -0.22% -0.44%] index_select random_sorted : Elapsed 0.023 ms (2.252 ms / 100) 2.249 -> 2.242 ( -0.31%) [ +0.27% +0.09% +0.00% / +0.09% -0.22% -0.31%] index_select perm : Elapsed 0.023 ms (2.255 ms / 100) 2.250 -> 2.248 ( -0.09%) [ +0.18% +0.00% +0.13% / -0.04% +0.04% -0.09%] index_select perm_sorted : Elapsed 0.023 ms (2.254 ms / 100) B = [20, 40, 5, 4] (stride (1, 400, 80, 20)) A = [20, 40, 16, 4] (stride (40, 1, 3200, 800)) dim = 2 2.140 -> 2.142 ( +0.09%) [ +0.14% +0.09% +0.00% / +0.09% +1.17% +1.31%] index_select const : Elapsed 0.021 ms (2.143 ms / 100) 2.230 -> 2.233 ( +0.13%) [ +0.09% +0.09% +0.00% / +0.18% +0.13% +0.45%] index_select wrap : Elapsed 0.022 ms (2.232 ms / 100) 2.237 -> 2.240 ( +0.13%) [ +0.00% +0.04% +0.09% / +0.31% +0.13% +0.18%] index_select linear : Elapsed 0.022 ms (2.237 ms / 100) 2.232 -> 2.231 ( -0.04%) [ +0.22% +0.00% +0.04% / +0.45% -0.04% +0.09%] index_select reverse : Elapsed 0.022 ms (2.237 ms / 100) 2.138 -> 2.145 ( +0.33%) [ +0.28% +0.14% +0.00% / +0.33% +1.03% +0.94%] index_select skip64 : Elapsed 0.021 ms (2.144 ms / 100) 2.141 -> 2.142 ( +0.05%) [ +0.23% +0.19% +0.00% / +0.05% +1.26% +1.17%] index_select skip256 : Elapsed 0.021 ms (2.146 ms / 100) 2.235 -> 2.232 ( -0.13%) [ +0.04% +0.00% +0.04% / +0.04% -0.13% +0.04%] index_select spread : Elapsed 0.022 ms (2.236 ms / 100) 2.237 -> 2.238 ( +0.04%) [ +0.27% +0.13% +0.00% / +0.04% +0.18% +0.13%] index_select strided 3 : Elapsed 0.022 ms (2.243 ms / 100) 2.228 -> 2.222 ( -0.27%) [ +0.00% +0.00% +0.00% / +0.09% -0.27% +0.09%] index_select strided 5 : Elapsed 0.022 ms (2.228 ms / 100) 2.238 -> 2.239 ( +0.04%) [ +0.09% +0.18% +0.00% / +0.18% +0.04% +0.49%] index_select strided 7 : Elapsed 0.022 ms (2.240 ms / 100) 2.158 -> 2.158 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.00% +0.79% +1.16%] index_select strided 8 : Elapsed 0.022 ms (2.159 ms / 100) 2.193 -> 2.193 ( +0.00%) [ +0.09% +0.18% +0.00% / +0.05% +0.00% +0.00%] index_select random : Elapsed 0.022 ms (2.195 ms / 100) 2.192 -> 2.188 ( -0.18%) [ +0.00% +0.14% +0.00% / -0.14% -0.18% +0.32%] index_select random_sorted : Elapsed 0.022 ms (2.192 ms / 100) 2.233 -> 2.230 ( -0.13%) [ +0.00% +0.31% +0.04% / -0.13% +0.67% +0.45%] index_select perm : Elapsed 0.022 ms (2.233 ms / 100) 2.224 -> 2.223 ( -0.04%) [ +0.31% +0.00% +0.04% / +0.18% +0.00% -0.04%] index_select perm_sorted : Elapsed 0.022 ms (2.231 ms / 100) B = [20, 40, 5, 4] (stride (1, 80, 3200, 20)) A = [20, 40, 16, 4] (stride (1, 320, 20, 12800)) dim = 2 2.456 -> 2.454 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.16% +0.12%] index_select const : Elapsed 0.025 ms (2.456 ms / 100) 2.462 -> 2.458 ( -0.16%) [ +0.08% +0.04% +0.00% / -0.16% +0.12% +0.32%] index_select wrap : Elapsed 0.025 ms (2.464 ms / 100) 2.456 -> 2.460 ( +0.16%) [ +0.00% +0.16% +0.12% / +0.16% +0.29% +0.37%] index_select linear : Elapsed 0.025 ms (2.456 ms / 100) 2.466 -> 2.469 ( +0.12%) [ +0.16% +0.12% +0.00% / +0.12% +0.28% +0.36%] index_select reverse : Elapsed 0.025 ms (2.470 ms / 100) 2.456 -> 2.460 ( +0.16%) [ +0.16% +0.24% +0.00% / +0.16% +0.33% +0.45%] index_select skip64 : Elapsed 0.025 ms (2.460 ms / 100) 2.454 -> 2.457 ( +0.12%) [ +0.00% +0.00% +0.08% / +0.12% +0.53% +0.45%] index_select skip256 : Elapsed 0.025 ms (2.454 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.04% +0.00%] index_select spread : Elapsed 0.025 ms (2.470 ms / 100) 2.464 -> 2.466 ( +0.08%) [ +0.00% +0.16% +0.04% / +0.08% +0.24% +0.24%] index_select strided 3 : Elapsed 0.025 ms (2.464 ms / 100) 2.460 -> 2.466 ( +0.24%) [ +0.24% +0.20% +0.00% / +0.24% +0.49% +0.41%] index_select strided 5 : Elapsed 0.025 ms (2.466 ms / 100) 2.457 -> 2.463 ( +0.24%) [ +0.00% +0.16% +0.12% / +0.24% +0.33% +0.33%] index_select strided 7 : Elapsed 0.025 ms (2.457 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.04% +0.00% +0.20% / +0.08% +0.20% +0.24%] index_select strided 8 : Elapsed 0.025 ms (2.454 ms / 100) 2.468 -> 2.473 ( +0.20%) [ +0.16% +0.08% +0.00% / +0.20% +0.24% +0.20%] index_select random : Elapsed 0.025 ms (2.472 ms / 100) 2.463 -> 2.469 ( +0.24%) [ +0.08% +0.37% +0.00% / +0.24% +0.41% +0.28%] index_select random_sorted : Elapsed 0.025 ms (2.465 ms / 100) 2.462 -> 2.461 ( -0.04%) [ +0.12% +0.12% +0.00% / -0.04% +0.28% +0.37%] index_select perm : Elapsed 0.025 ms (2.465 ms / 100) 2.470 -> 2.468 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.36% +0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) B = [20, 40, 5, 4] (stride (40, 1, 3200, 800)) A = [20, 40, 16, 4] (stride (640, 1, 40, 12800)) dim = 2 2.128 -> 2.126 ( -0.09%) [ +0.00% +0.05% +0.28% / -0.09% +0.28% +0.33%] index_select const : Elapsed 0.021 ms (2.128 ms / 100) 2.208 -> 2.209 ( +0.05%) [ +0.09% +0.18% +0.00% / +0.05% +0.41% +0.32%] index_select wrap : Elapsed 0.022 ms (2.210 ms / 100) 2.210 -> 2.211 ( +0.05%) [ +0.32% +0.23% +0.00% / +0.05% +0.50% +0.50%] index_select linear : Elapsed 0.022 ms (2.217 ms / 100) 2.211 -> 2.214 ( +0.14%) [ +0.14% +0.00% +0.18% / +0.18% +0.14% +0.18%] index_select reverse : Elapsed 0.022 ms (2.214 ms / 100) 2.122 -> 2.126 ( +0.19%) [ +0.00% +0.09% +0.00% / +0.19% +0.33% +0.57%] index_select skip64 : Elapsed 0.021 ms (2.122 ms / 100) 2.127 -> 2.126 ( -0.05%) [ +0.09% +0.19% +0.00% / -0.05% +0.56% +0.42%] index_select skip256 : Elapsed 0.021 ms (2.129 ms / 100) 2.208 -> 2.211 ( +0.14%) [ +0.27% +0.23% +0.00% / +0.14% +0.82% +0.68%] index_select spread : Elapsed 0.022 ms (2.214 ms / 100) 2.214 -> 2.216 ( +0.09%) [ +0.27% +0.00% +0.05% / +0.09% +0.72% +0.77%] index_select strided 3 : Elapsed 0.022 ms (2.220 ms / 100) 2.213 -> 2.217 ( +0.18%) [ +0.00% +0.27% +0.27% / +0.18% +0.50% +0.50%] index_select strided 5 : Elapsed 0.022 ms (2.213 ms / 100) 2.205 -> 2.215 ( +0.45%) [ +0.32% +0.27% +0.00% / +0.45% +1.04% +1.00%] index_select strided 7 : Elapsed 0.022 ms (2.212 ms / 100) 2.143 -> 2.146 ( +0.14%) [ +0.23% +0.05% +0.00% / +0.14% +0.75% +0.61%] index_select strided 8 : Elapsed 0.021 ms (2.148 ms / 100) 2.186 -> 2.189 ( +0.14%) [ +0.41% +0.27% +0.00% / +0.14% +0.87% +0.69%] index_select random : Elapsed 0.022 ms (2.195 ms / 100) 2.189 -> 2.190 ( +0.05%) [ +0.00% +0.09% +0.14% / +0.05% +0.55% +0.55%] index_select random_sorted : Elapsed 0.022 ms (2.189 ms / 100) 2.215 -> 2.215 ( +0.00%) [ +0.14% +0.23% +0.00% / +0.14% +0.00% +0.68%] index_select perm : Elapsed 0.022 ms (2.218 ms / 100) 2.209 -> 2.211 ( +0.09%) [ +0.32% +0.23% +0.00% / +0.09% +0.32% +0.32%] index_select perm_sorted : Elapsed 0.022 ms (2.216 ms / 100) B = [20, 40, 5, 4] (stride (200, 1, 40, 4000)) A = [20, 40, 16, 4] (stride (2560, 64, 1, 16)) dim = 2 2.270 -> 2.261 ( -0.40%) [ +0.13% +0.26% +0.00% / +0.13% +0.04% -0.40%] index_select const : Elapsed 0.023 ms (2.273 ms / 100) 2.256 -> 2.255 ( -0.04%) [ +0.04% +0.00% +0.13% / +0.09% +0.00% -0.04%] index_select wrap : Elapsed 0.023 ms (2.257 ms / 100) 2.260 -> 2.259 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.18% +0.04%] index_select linear : Elapsed 0.023 ms (2.261 ms / 100) 2.255 -> 2.255 ( +0.00%) [ +0.00% +0.18% +0.18% / +0.22% +0.18% +0.00%] index_select reverse : Elapsed 0.023 ms (2.255 ms / 100) 2.255 -> 2.254 ( -0.04%) [ +0.00% +0.13% +0.04% / +0.04% -0.04% -0.04%] index_select skip64 : Elapsed 0.023 ms (2.255 ms / 100) 2.272 -> 2.259 ( -0.57%) [ +0.00% +0.04% +0.04% / +0.18% -0.57% -0.57%] index_select skip256 : Elapsed 0.023 ms (2.272 ms / 100) 2.279 -> 2.270 ( -0.39%) [ +0.04% +0.00% +0.00% / +0.04% -0.39% -0.31%] index_select spread : Elapsed 0.023 ms (2.280 ms / 100) 2.279 -> 2.274 ( -0.22%) [ +0.00% +0.04% +0.04% / +0.04% -0.22% -0.18%] index_select strided 3 : Elapsed 0.023 ms (2.279 ms / 100) 2.280 -> 2.275 ( -0.22%) [ +0.04% +0.00% +0.04% / +0.04% -0.22% -0.09%] index_select strided 5 : Elapsed 0.023 ms (2.281 ms / 100) 2.279 -> 2.275 ( -0.18%) [ +0.04% +0.00% +0.00% / +0.00% -0.18% -0.18%] index_select strided 7 : Elapsed 0.023 ms (2.280 ms / 100) 2.290 -> 2.276 ( -0.61%) [ +0.04% +0.09% +0.00% / +0.13% -0.48% -0.61%] index_select strided 8 : Elapsed 0.023 ms (2.291 ms / 100) 2.277 -> 2.270 ( -0.31%) [ +0.18% +0.00% +0.09% / +0.00% -0.04% -0.31%] index_select random : Elapsed 0.023 ms (2.281 ms / 100) 2.277 -> 2.274 ( -0.13%) [ +0.00% +0.04% +0.09% / +0.13% -0.09% -0.13%] index_select random_sorted : Elapsed 0.023 ms (2.277 ms / 100) 2.282 -> 2.273 ( -0.39%) [ +0.00% +0.00% +0.09% / -0.04% -0.39% -0.31%] index_select perm : Elapsed 0.023 ms (2.282 ms / 100) 2.276 -> 2.269 ( -0.31%) [ +0.09% +0.00% +0.00% / +0.04% -0.04% -0.31%] index_select perm_sorted : Elapsed 0.023 ms (2.278 ms / 100) B = [20, 40, 5, 4] (stride (200, 1, 40, 4000)) A = [20, 40, 16, 4] (stride (64, 1280, 1, 16)) dim = 2 2.402 -> 2.411 ( +0.37%) [ +0.00% +0.46% +0.33% / +0.37% +0.50% +0.62%] index_select const : Elapsed 0.024 ms (2.402 ms / 100) 2.398 -> 2.407 ( +0.38%) [ +0.13% +0.13% +0.00% / +0.38% +0.46% +0.63%] index_select wrap : Elapsed 0.024 ms (2.401 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.00% +0.37% +0.25% / +0.08% +0.70% +0.70%] index_select linear : Elapsed 0.024 ms (2.412 ms / 100) 2.411 -> 2.404 ( -0.29%) [ +0.17% +0.33% +0.00% / -0.29% +0.50% +0.79%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.402 -> 2.403 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.25% +0.50%] index_select skip64 : Elapsed 0.024 ms (2.402 ms / 100) 2.407 -> 2.409 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.42% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.409 ms / 100) 2.427 -> 2.438 ( +0.45%) [ +0.16% +0.00% +0.16% / +0.62% +0.45% +0.70%] index_select spread : Elapsed 0.024 ms (2.431 ms / 100) 2.431 -> 2.435 ( +0.16%) [ +0.33% +0.00% +0.25% / +0.33% +0.16% +0.16%] index_select strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.436 -> 2.429 ( -0.29%) [ +0.04% +0.45% +0.00% / -0.29% +0.45% +0.21%] index_select strided 5 : Elapsed 0.024 ms (2.437 ms / 100) 2.436 -> 2.437 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.29% +0.29%] index_select strided 7 : Elapsed 0.024 ms (2.438 ms / 100) 2.430 -> 2.435 ( +0.21%) [ +0.00% +0.12% +0.29% / +0.21% +0.49% +0.53%] index_select strided 8 : Elapsed 0.024 ms (2.430 ms / 100) 2.398 -> 2.402 ( +0.17%) [ +0.25% +0.00% +0.00% / +0.17% +0.50% +0.63%] index_select random : Elapsed 0.024 ms (2.404 ms / 100) 2.409 -> 2.411 ( +0.08%) [ +0.00% +0.29% +0.12% / +0.08% +0.71% +0.62%] index_select random_sorted : Elapsed 0.024 ms (2.409 ms / 100) 2.426 -> 2.434 ( +0.33%) [ +0.16% +0.87% +0.00% / +0.33% +0.45% +0.70%] index_select perm : Elapsed 0.024 ms (2.430 ms / 100) 2.421 -> 2.421 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.29% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.421 ms / 100) B = [20, 40, 5, 4] (stride (1, 100, 20, 4000)) A = [20, 40, 16, 4] (stride (16, 320, 1, 12800)) dim = 2 2.514 -> 2.513 ( -0.04%) [ +0.08% +0.24% +0.00% / +0.04% -0.04% +0.08%] index_select const : Elapsed 0.025 ms (2.516 ms / 100) 2.516 -> 2.517 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.24% +0.04%] index_select wrap : Elapsed 0.025 ms (2.518 ms / 100) 2.510 -> 2.511 ( +0.04%) [ +0.16% +0.12% +0.00% / +0.04% +0.36% +0.28%] index_select linear : Elapsed 0.025 ms (2.514 ms / 100) 2.514 -> 2.514 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.16% +0.20%] index_select reverse : Elapsed 0.025 ms (2.516 ms / 100) 2.513 -> 2.514 ( +0.04%) [ +0.24% +0.08% +0.00% / +0.04% +0.24% +0.20%] index_select skip64 : Elapsed 0.025 ms (2.519 ms / 100) 2.513 -> 2.510 ( -0.12%) [ +0.28% +0.04% +0.00% / +0.04% -0.12% +0.00%] index_select skip256 : Elapsed 0.025 ms (2.520 ms / 100) 2.533 -> 2.535 ( +0.08%) [ +0.12% +0.00% +0.12% / +0.08% +0.20% +0.20%] index_select spread : Elapsed 0.025 ms (2.536 ms / 100) 2.535 -> 2.532 ( -0.12%) [ +0.20% +0.00% +0.04% / -0.12% +0.08% +0.24%] index_select strided 3 : Elapsed 0.025 ms (2.540 ms / 100) 2.539 -> 2.540 ( +0.04%) [ +0.04% +0.20% +0.00% / +0.12% +0.12% +0.04%] index_select strided 5 : Elapsed 0.025 ms (2.540 ms / 100) 2.539 -> 2.539 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.04% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.539 ms / 100) 2.542 -> 2.536 ( -0.24%) [ +0.00% +0.12% +0.00% / -0.12% -0.24% -0.08%] index_select strided 8 : Elapsed 0.025 ms (2.542 ms / 100) 2.538 -> 2.543 ( +0.20%) [ +0.39% +0.32% +0.00% / +0.24% +0.28% +0.20%] index_select random : Elapsed 0.025 ms (2.548 ms / 100) 2.537 -> 2.538 ( +0.04%) [ +0.04% +0.20% +0.00% / +0.28% +0.16% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.538 ms / 100) 2.541 -> 2.541 ( +0.00%) [ +0.00% +0.04% +0.12% / +0.00% +0.00% +0.08%] index_select perm : Elapsed 0.025 ms (2.541 ms / 100) 2.535 -> 2.536 ( +0.04%) [ +0.00% +0.16% +0.12% / +0.08% +0.20% +0.04%] index_select perm_sorted : Elapsed 0.025 ms (2.535 ms / 100) B = [20, 40, 5, 4] (stride (1, 20, 800, 4000)) A = [20, 40, 16, 4] (stride (2560, 16, 1, 640)) dim = 2 2.386 -> 2.387 ( +0.04%) [ +0.00% +0.59% +0.00% / +0.04% +0.34% +0.29%] index_select const : Elapsed 0.024 ms (2.386 ms / 100) 2.375 -> 2.378 ( +0.13%) [ +0.08% +0.00% +0.17% / +0.13% +0.46% +0.51%] index_select wrap : Elapsed 0.024 ms (2.377 ms / 100) 2.407 -> 2.410 ( +0.12%) [ +0.00% +0.00% +0.21% / +0.12% +0.50% +0.29%] index_select linear : Elapsed 0.024 ms (2.407 ms / 100) 2.386 -> 2.389 ( +0.13%) [ +0.04% +0.00% +0.13% / +0.13% +0.71% +0.67%] index_select reverse : Elapsed 0.024 ms (2.387 ms / 100) 2.375 -> 2.378 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.67% +0.51%] index_select skip64 : Elapsed 0.024 ms (2.378 ms / 100) 2.384 -> 2.388 ( +0.17%) [ +0.13% +0.08% +0.00% / +0.17% +0.59% +0.50%] index_select skip256 : Elapsed 0.024 ms (2.387 ms / 100) 2.400 -> 2.401 ( +0.04%) [ +0.37% +0.00% +0.04% / +0.04% +1.00% +0.92%] index_select spread : Elapsed 0.024 ms (2.409 ms / 100) 2.420 -> 2.426 ( +0.25%) [ +0.29% +0.04% +0.00% / +0.25% +0.54% +0.62%] index_select strided 3 : Elapsed 0.024 ms (2.427 ms / 100) 2.424 -> 2.423 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.21% +0.58%] index_select strided 5 : Elapsed 0.024 ms (2.424 ms / 100) 2.399 -> 2.399 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.92% +0.79%] index_select strided 7 : Elapsed 0.024 ms (2.401 ms / 100) 2.406 -> 2.406 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.37% +0.58%] index_select strided 8 : Elapsed 0.024 ms (2.406 ms / 100) 2.392 -> 2.398 ( +0.25%) [ +0.33% +0.25% +0.00% / +0.25% +0.88% +0.75%] index_select random : Elapsed 0.024 ms (2.400 ms / 100) 2.400 -> 2.398 ( -0.08%) [ +0.00% +0.00% +0.13% / -0.08% +0.63% +0.75%] index_select random_sorted : Elapsed 0.024 ms (2.400 ms / 100) 2.429 -> 2.430 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.49% +0.29%] index_select perm : Elapsed 0.024 ms (2.431 ms / 100) 2.394 -> 2.393 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.50% +0.63%] index_select perm_sorted : Elapsed 0.024 ms (2.394 ms / 100) out_shape = [20, 40, 16, 5] in_shape = [20, 40, 16, 4] idx_dim = 3 B = [20, 40, 16, 5] (stride (3200, 80, 1, 16)) A = [20, 40, 16, 4] (stride (1, 80, 3200, 20)) dim = 3 5.644 -> 5.644 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.11% +0.02% +0.00%] index_add_ linear : Elapsed 0.056 ms (5.644 ms / 100) 5.580 -> 5.574 ( -0.11%) [ +0.00% +0.05% +0.07% / +0.09% -0.09% -0.11%] index_copy_ linear : Elapsed 0.056 ms (5.580 ms / 100) 5.642 -> 5.631 ( -0.19%) [ +0.04% +0.00% +0.05% / +0.12% -0.19% -0.18%] index_add_ reverse : Elapsed 0.056 ms (5.644 ms / 100) 5.581 -> 5.572 ( -0.16%) [ +0.00% +0.09% +0.00% / +0.11% -0.13% -0.16%] index_copy_ reverse : Elapsed 0.056 ms (5.581 ms / 100) 5.645 -> 5.640 ( -0.09%) [ +0.02% +0.00% +0.11% / -0.09% -0.05% -0.09%] index_add_ spread : Elapsed 0.056 ms (5.646 ms / 100) 5.575 -> 5.582 ( +0.13%) [ +0.16% +0.00% +0.25% / +0.22% +0.13% +0.16%] index_copy_ spread : Elapsed 0.056 ms (5.584 ms / 100) 5.651 -> 5.643 ( -0.14%) [ +0.00% +0.02% +0.02% / +0.07% +0.23% -0.14%] index_add_ strided 3 : Elapsed 0.057 ms (5.651 ms / 100) 5.589 -> 5.589 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.13% +0.47% +0.00%] index_copy_ strided 3 : Elapsed 0.056 ms (5.595 ms / 100) 5.642 -> 5.632 ( -0.18%) [ +0.00% +0.04% +0.07% / +0.18% -0.18% -0.16%] index_add_ perm : Elapsed 0.056 ms (5.642 ms / 100) 5.583 -> 5.574 ( -0.16%) [ +0.00% +0.09% +0.16% / +0.21% -0.13% -0.16%] index_copy_ perm : Elapsed 0.056 ms (5.583 ms / 100) 5.638 -> 5.632 ( -0.11%) [ +0.00% +0.04% +0.11% / +0.16% -0.11% +0.00%] index_add_ perm_sorted : Elapsed 0.056 ms (5.638 ms / 100) 5.570 -> 5.573 ( +0.05%) [ +0.25% +0.00% +0.23% / +0.18% +0.05% +0.05%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.584 ms / 100) 5.888 -> 5.881 ( -0.12%) [ +0.07% +0.02% +0.00% / +0.00% -0.03% -0.12%] index_select const : Elapsed 0.059 ms (5.892 ms / 100) 5.940 -> 5.926 ( -0.24%) [ +0.00% +0.13% +0.08% / +0.13% -0.20% -0.24%] index_select wrap : Elapsed 0.059 ms (5.940 ms / 100) 5.928 -> 5.922 ( -0.10%) [ +0.05% +0.00% +0.19% / +0.20% -0.07% -0.10%] index_select linear : Elapsed 0.059 ms (5.931 ms / 100) 5.927 -> 5.905 ( -0.37%) [ +0.02% +0.00% +0.15% / +0.13% -0.37% -0.34%] index_select reverse : Elapsed 0.059 ms (5.928 ms / 100) 5.881 -> 5.874 ( -0.12%) [ +0.00% +0.26% +0.14% / +0.20% -0.03% -0.12%] index_select skip64 : Elapsed 0.059 ms (5.881 ms / 100) 5.888 -> 5.873 ( -0.25%) [ +0.00% +0.00% +0.19% / +0.17% -0.25% -0.22%] index_select skip256 : Elapsed 0.059 ms (5.888 ms / 100) 5.924 -> 5.911 ( -0.22%) [ +0.19% +0.00% +0.22% / +0.17% -0.03% -0.22%] index_select spread : Elapsed 0.059 ms (5.935 ms / 100) 5.937 -> 5.930 ( -0.12%) [ +0.00% +0.07% +0.10% / +0.19% +0.12% -0.12%] index_select strided 3 : Elapsed 0.059 ms (5.937 ms / 100) 5.926 -> 5.909 ( -0.29%) [ +0.15% +0.00% +0.17% / +0.29% -0.03% -0.29%] index_select random : Elapsed 0.059 ms (5.935 ms / 100) 5.908 -> 5.902 ( -0.10%) [ +0.12% +0.00% +0.15% / +0.08% -0.03% -0.10%] index_select random_sorted : Elapsed 0.059 ms (5.915 ms / 100) B = [20, 40, 16, 5] (stride (3200, 80, 1, 16)) A = [20, 40, 16, 4] (stride (16, 320, 1, 12800)) dim = 3 5.579 -> 5.588 ( +0.16%) [ +0.00% +0.18% +0.11% / +0.23% +0.16% +0.23%] index_add_ linear : Elapsed 0.056 ms (5.579 ms / 100) 5.521 -> 5.519 ( -0.04%) [ +0.00% +0.00% +0.11% / +0.14% +0.02% -0.04%] index_copy_ linear : Elapsed 0.055 ms (5.521 ms / 100) 5.581 -> 5.587 ( +0.11%) [ +0.14% +0.00% +0.14% / +0.11% +0.20% +0.27%] index_add_ reverse : Elapsed 0.056 ms (5.589 ms / 100) 5.523 -> 5.523 ( +0.00%) [ +0.00% +0.07% +0.09% / +0.02% +0.05% +0.00%] index_copy_ reverse : Elapsed 0.055 ms (5.523 ms / 100) 5.579 -> 5.583 ( +0.07%) [ +0.09% +0.05% +0.00% / +0.27% +0.29% +0.07%] index_add_ spread : Elapsed 0.056 ms (5.584 ms / 100) 5.519 -> 5.520 ( +0.02%) [ +0.00% +0.04% +0.05% / +0.11% +0.11% +0.02%] index_copy_ spread : Elapsed 0.055 ms (5.519 ms / 100) 5.587 -> 5.595 ( +0.14%) [ +0.00% +0.04% +0.18% / +0.20% +0.14% +0.36%] index_add_ strided 3 : Elapsed 0.056 ms (5.587 ms / 100) 5.533 -> 5.534 ( +0.02%) [ +0.14% +0.09% +0.00% / +0.22% +0.02% +0.18%] index_copy_ strided 3 : Elapsed 0.055 ms (5.541 ms / 100) 5.585 -> 5.589 ( +0.07%) [ +0.09% +0.00% +0.04% / +0.07% +0.23% +0.16%] index_add_ perm : Elapsed 0.056 ms (5.590 ms / 100) 5.521 -> 5.526 ( +0.09%) [ +0.00% +0.09% +0.11% / +0.09% +0.18% +0.31%] index_copy_ perm : Elapsed 0.055 ms (5.521 ms / 100) 5.589 -> 5.586 ( -0.05%) [ +0.11% +0.00% +0.05% / +0.02% +0.09% -0.05%] index_add_ perm_sorted : Elapsed 0.056 ms (5.595 ms / 100) 5.523 -> 5.534 ( +0.20%) [ +0.13% +0.00% +0.14% / +0.20% +0.22% +0.27%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.530 ms / 100) 5.723 -> 5.738 ( +0.26%) [ +0.07% +0.00% +0.19% / +0.31% +0.28% +0.26%] index_select const : Elapsed 0.057 ms (5.727 ms / 100) 5.843 -> 5.847 ( +0.07%) [ +0.10% +0.00% +0.12% / +0.15% +0.14% +0.07%] index_select wrap : Elapsed 0.058 ms (5.849 ms / 100) 5.834 -> 5.835 ( +0.02%) [ +0.00% +0.03% +0.03% / +0.02% +0.24% +0.19%] index_select linear : Elapsed 0.058 ms (5.834 ms / 100) 5.826 -> 5.832 ( +0.10%) [ +0.00% +0.02% +0.17% / +0.10% +0.45% +0.36%] index_select reverse : Elapsed 0.058 ms (5.826 ms / 100) 5.735 -> 5.746 ( +0.19%) [ +0.00% +0.07% +0.14% / +0.28% +0.24% +0.19%] index_select skip64 : Elapsed 0.057 ms (5.735 ms / 100) 5.724 -> 5.728 ( +0.07%) [ +0.09% +0.00% +0.12% / +0.30% +0.31% +0.07%] index_select skip256 : Elapsed 0.057 ms (5.729 ms / 100) 5.840 -> 5.843 ( +0.05%) [ +0.12% +0.00% +0.26% / +0.05% +0.26% +0.27%] index_select spread : Elapsed 0.058 ms (5.847 ms / 100) 5.845 -> 5.853 ( +0.14%) [ +0.00% +0.02% +0.15% / +0.14% +0.29% +0.26%] index_select strided 3 : Elapsed 0.058 ms (5.845 ms / 100) 5.789 -> 5.801 ( +0.21%) [ +0.12% +0.00% +0.22% / +0.21% +0.62% +0.59%] index_select random : Elapsed 0.058 ms (5.796 ms / 100) 5.790 -> 5.801 ( +0.19%) [ +0.00% +0.00% +0.12% / +0.19% +0.45% +0.45%] index_select random_sorted : Elapsed 0.058 ms (5.790 ms / 100) B = [20, 40, 16, 5] (stride (3200, 5, 200, 1)) A = [20, 40, 16, 4] (stride (16, 1280, 1, 320)) dim = 3 6.056 -> 6.046 ( -0.17%) [ +0.00% +0.05% +0.03% / +0.21% +0.05% -0.17%] index_add_ linear : Elapsed 0.061 ms (6.056 ms / 100) 6.040 -> 6.027 ( -0.22%) [ +0.00% +0.03% +0.20% / +0.15% -0.22% -0.05%] index_copy_ linear : Elapsed 0.060 ms (6.040 ms / 100) 6.056 -> 6.056 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.03% +0.00% +0.00%] index_add_ reverse : Elapsed 0.061 ms (6.057 ms / 100) 6.044 -> 6.032 ( -0.20%) [ +0.13% +0.05% +0.00% / +0.17% -0.12% -0.20%] index_copy_ reverse : Elapsed 0.061 ms (6.052 ms / 100) 6.045 -> 6.042 ( -0.05%) [ +0.00% +0.08% +0.12% / +0.13% -0.05% +0.02%] index_add_ spread : Elapsed 0.060 ms (6.045 ms / 100) 6.027 -> 6.013 ( -0.23%) [ +0.00% +0.08% +0.15% / +0.07% -0.20% -0.23%] index_copy_ spread : Elapsed 0.060 ms (6.027 ms / 100) 6.039 -> 6.042 ( +0.05%) [ +0.00% +0.13% +0.13% / +0.18% +0.12% +0.05%] index_add_ strided 3 : Elapsed 0.060 ms (6.039 ms / 100) 6.023 -> 6.016 ( -0.12%) [ +0.00% +0.10% +0.07% / +0.20% -0.12% +0.08%] index_copy_ strided 3 : Elapsed 0.060 ms (6.023 ms / 100) 6.046 -> 6.037 ( -0.15%) [ +0.00% +0.03% +0.05% / +0.13% -0.15% +0.03%] index_add_ perm : Elapsed 0.060 ms (6.046 ms / 100) 6.025 -> 6.015 ( -0.17%) [ +0.03% +0.00% +0.12% / +0.08% -0.17% -0.10%] index_copy_ perm : Elapsed 0.060 ms (6.027 ms / 100) 6.046 -> 6.042 ( -0.07%) [ +0.05% +0.00% +0.03% / -0.07% -0.05% -0.03%] index_add_ perm_sorted : Elapsed 0.060 ms (6.049 ms / 100) 6.029 -> 6.019 ( -0.17%) [ +0.02% +0.12% +0.00% / +0.13% -0.17% -0.15%] index_copy_ perm_sorted : Elapsed 0.060 ms (6.030 ms / 100) 6.232 -> 6.227 ( -0.08%) [ +0.05% +0.13% +0.00% / +0.24% -0.08% +0.00%] index_select const : Elapsed 0.062 ms (6.235 ms / 100) 6.327 -> 6.315 ( -0.19%) [ +0.00% +0.06% +0.03% / +0.00% -0.17% -0.19%] index_select wrap : Elapsed 0.063 ms (6.327 ms / 100) 6.293 -> 6.289 ( -0.06%) [ +0.06% +0.00% +0.08% / +0.24% -0.06% +0.00%] index_select linear : Elapsed 0.063 ms (6.297 ms / 100) 6.297 -> 6.311 ( +0.22%) [ +0.00% +0.06% +0.13% / +0.22% +0.25% +0.25%] index_select reverse : Elapsed 0.063 ms (6.297 ms / 100) 6.213 -> 6.212 ( -0.02%) [ +0.06% +0.00% +0.08% / +0.19% -0.02% -0.02%] index_select skip64 : Elapsed 0.062 ms (6.217 ms / 100) 6.235 -> 6.226 ( -0.14%) [ +0.00% +0.03% +0.13% / +0.11% -0.06% -0.14%] index_select skip256 : Elapsed 0.062 ms (6.235 ms / 100) 6.308 -> 6.305 ( -0.05%) [ +0.00% +0.10% +0.11% / +0.11% -0.05% -0.03%] index_select spread : Elapsed 0.063 ms (6.308 ms / 100) 6.297 -> 6.301 ( +0.06%) [ +0.02% +0.10% +0.00% / +0.10% +0.06% +0.14%] index_select strided 3 : Elapsed 0.063 ms (6.298 ms / 100) 6.261 -> 6.270 ( +0.14%) [ +0.13% +0.00% +0.11% / +0.14% +0.45% +0.45%] index_select random : Elapsed 0.063 ms (6.269 ms / 100) 6.263 -> 6.270 ( +0.11%) [ +0.05% +0.06% +0.00% / +0.11% +0.26% +0.30%] index_select random_sorted : Elapsed 0.063 ms (6.266 ms / 100) B = [20, 40, 16, 5] (stride (3200, 5, 200, 1)) A = [20, 40, 16, 4] (stride (640, 1, 40, 12800)) dim = 3 6.016 -> 6.021 ( +0.08%) [ +0.00% +0.08% +0.25% / +0.17% +0.10% +0.08%] index_add_ linear : Elapsed 0.060 ms (6.016 ms / 100) 6.006 -> 5.988 ( -0.30%) [ +0.12% +0.00% +0.13% / +0.05% -0.30% -0.27%] index_copy_ linear : Elapsed 0.060 ms (6.013 ms / 100) 6.016 -> 6.015 ( -0.02%) [ +0.02% +0.12% +0.00% / +0.17% -0.02% -0.02%] index_add_ reverse : Elapsed 0.060 ms (6.017 ms / 100) 6.011 -> 5.985 ( -0.43%) [ +0.03% +0.00% +0.00% / +0.15% -0.33% -0.43%] index_copy_ reverse : Elapsed 0.060 ms (6.013 ms / 100) 6.023 -> 6.008 ( -0.25%) [ +0.02% +0.00% +0.08% / -0.03% -0.05% -0.25%] index_add_ spread : Elapsed 0.060 ms (6.024 ms / 100) 6.006 -> 5.987 ( -0.32%) [ +0.05% +0.03% +0.00% / +0.17% -0.32% -0.25%] index_copy_ spread : Elapsed 0.060 ms (6.009 ms / 100) 6.018 -> 6.011 ( -0.12%) [ +0.03% +0.00% +0.05% / +0.18% +0.02% -0.12%] index_add_ strided 3 : Elapsed 0.060 ms (6.020 ms / 100) 6.006 -> 5.988 ( -0.30%) [ +0.00% +0.10% +0.07% / +0.17% -0.28% -0.30%] index_copy_ strided 3 : Elapsed 0.060 ms (6.006 ms / 100) 6.021 -> 6.014 ( -0.12%) [ +0.00% +0.05% +0.10% / +0.15% -0.07% -0.12%] index_add_ perm : Elapsed 0.060 ms (6.021 ms / 100) 6.009 -> 5.985 ( -0.40%) [ +0.07% +0.00% +0.05% / +0.10% -0.30% -0.40%] index_copy_ perm : Elapsed 0.060 ms (6.013 ms / 100) 6.020 -> 6.012 ( -0.13%) [ +0.15% +0.10% +0.00% / +0.22% -0.13% -0.13%] index_add_ perm_sorted : Elapsed 0.060 ms (6.029 ms / 100) 6.010 -> 5.984 ( -0.43%) [ +0.02% +0.00% +0.03% / +0.02% -0.43% -0.40%] index_copy_ perm_sorted : Elapsed 0.060 ms (6.011 ms / 100) 6.243 -> 6.232 ( -0.18%) [ +0.10% +0.00% +0.22% / +0.16% -0.18% -0.05%] index_select const : Elapsed 0.062 ms (6.249 ms / 100) 6.321 -> 6.299 ( -0.35%) [ +0.02% +0.03% +0.00% / +0.06% -0.30% -0.35%] index_select wrap : Elapsed 0.063 ms (6.322 ms / 100) 6.309 -> 6.287 ( -0.35%) [ +0.05% +0.05% +0.00% / +0.03% -0.32% -0.35%] index_select linear : Elapsed 0.063 ms (6.312 ms / 100) 6.307 -> 6.284 ( -0.36%) [ +0.02% +0.00% +0.05% / +0.03% -0.36% -0.25%] index_select reverse : Elapsed 0.063 ms (6.308 ms / 100) 6.249 -> 6.232 ( -0.27%) [ +0.00% +0.00% +0.02% / -0.02% -0.21% -0.27%] index_select skip64 : Elapsed 0.062 ms (6.249 ms / 100) 6.249 -> 6.234 ( -0.24%) [ +0.00% +0.00% +0.03% / +0.10% -0.21% -0.24%] index_select skip256 : Elapsed 0.062 ms (6.249 ms / 100) 6.307 -> 6.283 ( -0.38%) [ +0.10% +0.00% +0.10% / +0.13% -0.16% -0.38%] index_select spread : Elapsed 0.063 ms (6.313 ms / 100) 6.313 -> 6.304 ( -0.14%) [ +0.14% +0.00% +0.21% / +0.21% -0.14% -0.05%] index_select strided 3 : Elapsed 0.063 ms (6.322 ms / 100) 6.307 -> 6.295 ( -0.19%) [ +0.00% +0.05% +0.11% / +0.08% -0.17% -0.19%] index_select random : Elapsed 0.063 ms (6.307 ms / 100) 6.307 -> 6.285 ( -0.35%) [ +0.02% +0.06% +0.00% / +0.16% -0.33% -0.35%] index_select random_sorted : Elapsed 0.063 ms (6.308 ms / 100) B = [20, 40, 16, 5] (stride (3200, 16, 1, 640)) A = [20, 40, 16, 4] (stride (1, 1280, 80, 20)) dim = 3 3.368 -> 3.319 ( -1.45%) [ +0.06% +0.00% +0.00% / -0.06% -1.45% -1.34%] index_add_ linear : Elapsed 0.034 ms (3.370 ms / 100) 3.306 -> 3.277 ( -0.88%) [ +0.00% +0.15% +0.12% / +0.06% -0.88% -0.67%] index_copy_ linear : Elapsed 0.033 ms (3.306 ms / 100) 3.371 -> 3.345 ( -0.77%) [ +0.00% +0.18% +0.00% / +0.39% -0.77% -0.65%] index_add_ reverse : Elapsed 0.034 ms (3.371 ms / 100) 3.310 -> 3.294 ( -0.48%) [ +0.00% +0.12% +0.24% / +0.09% -0.45% -0.48%] index_copy_ reverse : Elapsed 0.033 ms (3.310 ms / 100) 3.366 -> 3.323 ( -1.28%) [ +0.27% +0.00% +0.18% / +0.00% -1.28% -0.95%] index_add_ spread : Elapsed 0.034 ms (3.375 ms / 100) 3.301 -> 3.287 ( -0.42%) [ +0.00% +0.36% +0.42% / +0.36% -0.39% -0.42%] index_copy_ spread : Elapsed 0.033 ms (3.301 ms / 100) 3.373 -> 3.327 ( -1.36%) [ +0.00% +0.03% +0.24% / +0.18% -1.36% -1.33%] index_add_ strided 3 : Elapsed 0.034 ms (3.373 ms / 100) 3.315 -> 3.285 ( -0.90%) [ +0.00% +0.15% +0.00% / +0.15% -0.90% -0.87%] index_copy_ strided 3 : Elapsed 0.033 ms (3.315 ms / 100) 3.385 -> 3.321 ( -1.89%) [ +0.09% +0.00% +0.09% / -0.03% -1.77% -1.89%] index_add_ perm : Elapsed 0.034 ms (3.388 ms / 100) 3.324 -> 3.278 ( -1.38%) [ +0.06% +0.00% +0.06% / -0.15% -1.38% -1.26%] index_copy_ perm : Elapsed 0.033 ms (3.326 ms / 100) 3.372 -> 3.324 ( -1.42%) [ +0.15% +0.09% +0.00% / -0.15% -1.42% -1.25%] index_add_ perm_sorted : Elapsed 0.034 ms (3.377 ms / 100) 3.308 -> 3.283 ( -0.76%) [ +0.24% +0.33% +0.00% / +0.12% -0.76% -0.48%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.316 ms / 100) 3.368 -> 3.360 ( -0.24%) [ +0.00% +0.30% +0.09% / +0.24% -0.09% -0.24%] index_select const : Elapsed 0.034 ms (3.368 ms / 100) 3.420 -> 3.389 ( -0.91%) [ +0.15% +0.15% +0.00% / +0.03% -0.91% -0.82%] index_select wrap : Elapsed 0.034 ms (3.425 ms / 100) 3.418 -> 3.384 ( -0.99%) [ +0.00% +0.20% +0.15% / +0.09% -0.99% -0.94%] index_select linear : Elapsed 0.034 ms (3.418 ms / 100) 3.407 -> 3.370 ( -1.09%) [ +0.23% +0.00% +0.03% / +0.26% -1.09% -0.82%] index_select reverse : Elapsed 0.034 ms (3.415 ms / 100) 3.381 -> 3.352 ( -0.86%) [ +0.06% +0.00% +0.18% / +0.03% -0.77% -0.86%] index_select skip64 : Elapsed 0.034 ms (3.383 ms / 100) 3.405 -> 3.353 ( -1.53%) [ +0.18% +0.00% +0.21% / +0.29% -1.35% -1.53%] index_select skip256 : Elapsed 0.034 ms (3.411 ms / 100) 3.407 -> 3.383 ( -0.70%) [ +0.00% +0.09% +0.03% / +0.00% -0.70% -0.70%] index_select spread : Elapsed 0.034 ms (3.407 ms / 100) 3.427 -> 3.394 ( -0.96%) [ +0.00% +0.03% +0.03% / +0.12% -0.90% -0.96%] index_select strided 3 : Elapsed 0.034 ms (3.427 ms / 100) 3.407 -> 3.374 ( -0.97%) [ +0.00% +0.06% +0.09% / +0.32% -0.88% -0.97%] index_select random : Elapsed 0.034 ms (3.407 ms / 100) 3.403 -> 3.384 ( -0.56%) [ +0.09% +0.00% +0.03% / +0.15% -0.50% -0.56%] index_select random_sorted : Elapsed 0.034 ms (3.406 ms / 100) B = [20, 40, 16, 5] (stride (80, 1600, 5, 1)) A = [20, 40, 16, 4] (stride (1, 80, 3200, 20)) dim = 3 6.105 -> 6.093 ( -0.20%) [ +0.00% +0.10% +0.08% / +0.05% -0.13% -0.20%] index_add_ linear : Elapsed 0.061 ms (6.105 ms / 100) 6.116 -> 6.097 ( -0.31%) [ +0.02% +0.00% +0.05% / +0.03% -0.31% -0.28%] index_copy_ linear : Elapsed 0.061 ms (6.117 ms / 100) 6.107 -> 6.097 ( -0.16%) [ +0.02% +0.00% +0.13% / +0.20% -0.13% -0.16%] index_add_ reverse : Elapsed 0.061 ms (6.108 ms / 100) 6.120 -> 6.097 ( -0.38%) [ +0.00% +0.00% +0.11% / +0.02% -0.38% -0.38%] index_copy_ reverse : Elapsed 0.061 ms (6.120 ms / 100) 6.109 -> 6.097 ( -0.20%) [ +0.00% +0.07% +0.08% / +0.07% -0.20% -0.16%] index_add_ spread : Elapsed 0.061 ms (6.109 ms / 100) 6.118 -> 6.100 ( -0.29%) [ +0.02% +0.21% +0.00% / +0.10% -0.29% -0.20%] index_copy_ spread : Elapsed 0.061 ms (6.119 ms / 100) 6.111 -> 6.100 ( -0.18%) [ +0.03% +0.00% +0.00% / +0.03% -0.16% -0.18%] index_add_ strided 3 : Elapsed 0.061 ms (6.113 ms / 100) 6.117 -> 6.100 ( -0.28%) [ +0.05% +0.00% +0.18% / +0.16% -0.26% -0.28%] index_copy_ strided 3 : Elapsed 0.061 ms (6.120 ms / 100) 6.104 -> 6.091 ( -0.21%) [ +0.00% +0.15% +0.18% / +0.11% -0.21% -0.16%] index_add_ perm : Elapsed 0.061 ms (6.104 ms / 100) 6.120 -> 6.093 ( -0.44%) [ +0.00% +0.05% +0.07% / +0.08% -0.44% -0.39%] index_copy_ perm : Elapsed 0.061 ms (6.120 ms / 100) 6.105 -> 6.092 ( -0.21%) [ +0.11% +0.00% +0.00% / +0.10% -0.21% -0.15%] index_add_ perm_sorted : Elapsed 0.061 ms (6.112 ms / 100) 6.115 -> 6.098 ( -0.28%) [ +0.11% +0.00% +0.10% / +0.13% -0.23% -0.28%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.122 ms / 100) 6.370 -> 6.351 ( -0.30%) [ +0.19% +0.00% +0.20% / +0.19% -0.30% -0.24%] index_select const : Elapsed 0.064 ms (6.382 ms / 100) 6.468 -> 6.456 ( -0.19%) [ +0.08% +0.00% +0.25% / +0.22% -0.15% -0.19%] index_select wrap : Elapsed 0.065 ms (6.473 ms / 100) 6.448 -> 6.432 ( -0.25%) [ +0.11% +0.00% +0.11% / +0.16% -0.11% -0.25%] index_select linear : Elapsed 0.065 ms (6.455 ms / 100) 6.444 -> 6.435 ( -0.14%) [ +0.05% +0.00% +0.16% / +0.06% -0.14% -0.09%] index_select reverse : Elapsed 0.064 ms (6.447 ms / 100) 6.375 -> 6.355 ( -0.31%) [ +0.03% +0.00% +0.16% / +0.16% -0.30% -0.31%] index_select skip64 : Elapsed 0.064 ms (6.377 ms / 100) 6.380 -> 6.352 ( -0.44%) [ +0.00% +0.09% +0.00% / +0.13% -0.42% -0.44%] index_select skip256 : Elapsed 0.064 ms (6.380 ms / 100) 6.439 -> 6.426 ( -0.20%) [ +0.00% +0.06% +0.25% / +0.20% -0.17% -0.20%] index_select spread : Elapsed 0.064 ms (6.439 ms / 100) 6.457 -> 6.450 ( -0.11%) [ +0.00% +0.14% +0.22% / +0.34% -0.11% +0.06%] index_select strided 3 : Elapsed 0.065 ms (6.457 ms / 100) 6.442 -> 6.426 ( -0.25%) [ +0.00% +0.14% +0.22% / +0.08% -0.25% -0.20%] index_select random : Elapsed 0.064 ms (6.442 ms / 100) 6.429 -> 6.412 ( -0.26%) [ +0.03% +0.00% +0.12% / +0.06% -0.20% -0.26%] index_select random_sorted : Elapsed 0.064 ms (6.431 ms / 100) B = [20, 40, 16, 5] (stride (80, 1600, 1, 16)) A = [20, 40, 16, 4] (stride (16, 1280, 1, 320)) dim = 3 5.902 -> 5.891 ( -0.19%) [ +0.00% +0.00% +0.14% / -0.03% -0.19% -0.17%] index_add_ linear : Elapsed 0.059 ms (5.902 ms / 100) 5.804 -> 5.801 ( -0.05%) [ +0.10% +0.00% +0.00% / +0.24% -0.05% -0.05%] index_copy_ linear : Elapsed 0.058 ms (5.810 ms / 100) 5.903 -> 5.890 ( -0.22%) [ +0.00% +0.00% +0.05% / -0.03% -0.19% -0.22%] index_add_ reverse : Elapsed 0.059 ms (5.903 ms / 100) 5.811 -> 5.805 ( -0.10%) [ +0.02% +0.00% +0.03% / +0.17% -0.07% -0.10%] index_copy_ reverse : Elapsed 0.058 ms (5.812 ms / 100) 5.886 -> 5.883 ( -0.05%) [ +0.00% +0.08% +0.22% / +0.19% -0.05% -0.05%] index_add_ spread : Elapsed 0.059 ms (5.886 ms / 100) 5.801 -> 5.796 ( -0.09%) [ +0.14% +0.00% +0.00% / +0.12% -0.09% -0.05%] index_copy_ spread : Elapsed 0.058 ms (5.809 ms / 100) 5.898 -> 5.880 ( -0.31%) [ +0.05% +0.00% +0.02% / +0.10% -0.20% -0.31%] index_add_ strided 3 : Elapsed 0.059 ms (5.901 ms / 100) 5.812 -> 5.805 ( -0.12%) [ +0.00% +0.15% +0.03% / +0.12% -0.03% -0.12%] index_copy_ strided 3 : Elapsed 0.058 ms (5.812 ms / 100) 5.894 -> 5.884 ( -0.17%) [ +0.00% +0.14% +0.00% / +0.12% -0.10% -0.17%] index_add_ perm : Elapsed 0.059 ms (5.894 ms / 100) 5.804 -> 5.798 ( -0.10%) [ +0.07% +0.00% +0.03% / +0.03% -0.10% +0.10%] index_copy_ perm : Elapsed 0.058 ms (5.808 ms / 100) 5.887 -> 5.880 ( -0.12%) [ +0.00% +0.07% +0.10% / +0.24% -0.12% +0.10%] index_add_ perm_sorted : Elapsed 0.059 ms (5.887 ms / 100) 5.805 -> 5.792 ( -0.22%) [ +0.12% +0.02% +0.00% / +0.09% -0.22% +0.07%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.812 ms / 100) 6.051 -> 6.058 ( +0.12%) [ +0.03% +0.13% +0.00% / +0.12% +0.21% +0.20%] index_select const : Elapsed 0.061 ms (6.053 ms / 100) 6.180 -> 6.181 ( +0.02%) [ +0.03% +0.00% +0.05% / +0.08% +0.08% +0.02%] index_select wrap : Elapsed 0.062 ms (6.182 ms / 100) 6.155 -> 6.155 ( +0.00%) [ +0.23% +0.00% +0.02% / +0.00% +0.08% +0.00%] index_select linear : Elapsed 0.062 ms (6.169 ms / 100) 6.159 -> 6.169 ( +0.16%) [ +0.05% +0.00% +0.06% / +0.16% +0.37% +0.29%] index_select reverse : Elapsed 0.062 ms (6.162 ms / 100) 6.041 -> 6.044 ( +0.05%) [ +0.08% +0.00% +0.13% / +0.05% +0.17% +0.10%] index_select skip64 : Elapsed 0.060 ms (6.046 ms / 100) 6.053 -> 6.055 ( +0.03%) [ +0.00% +0.10% +0.03% / +0.07% +0.03% +0.17%] index_select skip256 : Elapsed 0.061 ms (6.053 ms / 100) 6.168 -> 6.166 ( -0.03%) [ +0.10% +0.00% +0.13% / +0.08% -0.03% -0.03%] index_select spread : Elapsed 0.062 ms (6.174 ms / 100) 6.161 -> 6.174 ( +0.21%) [ +0.13% +0.00% +0.06% / +0.21% +0.29% +0.24%] index_select strided 3 : Elapsed 0.062 ms (6.169 ms / 100) 6.179 -> 6.164 ( -0.24%) [ +0.00% +0.05% +0.10% / +0.10% -0.24% -0.19%] index_select random : Elapsed 0.062 ms (6.179 ms / 100) 6.184 -> 6.169 ( -0.24%) [ +0.00% +0.13% +0.11% / +0.08% -0.13% -0.24%] index_select random_sorted : Elapsed 0.062 ms (6.184 ms / 100) B = [20, 40, 16, 5] (stride (5, 1600, 100, 1)) A = [20, 40, 16, 4] (stride (640, 16, 1, 12800)) dim = 3 5.206 -> 5.209 ( +0.06%) [ +0.00% +0.10% +0.06% / +0.06% +0.44% +0.33%] index_add_ linear : Elapsed 0.052 ms (5.206 ms / 100) 5.203 -> 5.206 ( +0.06%) [ +0.00% +0.10% +0.08% / +0.06% +0.19% +0.15%] index_copy_ linear : Elapsed 0.052 ms (5.203 ms / 100) 5.207 -> 5.210 ( +0.06%) [ +0.02% +0.00% +0.10% / +0.06% +0.29% +0.36%] index_add_ reverse : Elapsed 0.052 ms (5.208 ms / 100) 5.204 -> 5.206 ( +0.04%) [ +0.10% +0.00% +0.06% / +0.04% +0.17% +0.10%] index_copy_ reverse : Elapsed 0.052 ms (5.209 ms / 100) 5.207 -> 5.211 ( +0.08%) [ +0.00% +0.10% +0.10% / +0.08% +0.42% +0.42%] index_add_ spread : Elapsed 0.052 ms (5.207 ms / 100) 5.206 -> 5.210 ( +0.08%) [ +0.02% +0.00% +0.12% / +0.19% +0.13% +0.08%] index_copy_ spread : Elapsed 0.052 ms (5.207 ms / 100) 5.213 -> 5.215 ( +0.04%) [ +0.06% +0.00% +0.02% / +0.04% +0.23% +0.33%] index_add_ strided 3 : Elapsed 0.052 ms (5.216 ms / 100) 5.208 -> 5.207 ( -0.02%) [ +0.00% +0.00% +0.10% / +0.10% -0.02% +0.10%] index_copy_ strided 3 : Elapsed 0.052 ms (5.208 ms / 100) 5.210 -> 5.214 ( +0.08%) [ +0.00% +0.19% +0.17% / +0.08% +0.38% +0.27%] index_add_ perm : Elapsed 0.052 ms (5.210 ms / 100) 5.206 -> 5.215 ( +0.17%) [ +0.02% +0.00% +0.00% / +0.17% +0.25% +0.23%] index_copy_ perm : Elapsed 0.052 ms (5.207 ms / 100) 5.210 -> 5.208 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.33% +0.29%] index_add_ perm_sorted : Elapsed 0.052 ms (5.210 ms / 100) 5.208 -> 5.208 ( +0.00%) [ +0.00% +0.12% +0.08% / +0.08% +0.06% +0.00%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.208 ms / 100) 5.260 -> 5.264 ( +0.08%) [ +0.00% +0.15% +0.21% / +0.08% +0.21% +0.30%] index_select const : Elapsed 0.053 ms (5.260 ms / 100) 5.326 -> 5.326 ( +0.00%) [ +0.08% +0.00% +0.09% / +0.00% +0.30% +0.30%] index_select wrap : Elapsed 0.053 ms (5.330 ms / 100) 5.330 -> 5.332 ( +0.04%) [ +0.11% +0.02% +0.00% / +0.04% +0.34% +0.41%] index_select linear : Elapsed 0.053 ms (5.336 ms / 100) 5.335 -> 5.338 ( +0.06%) [ +0.07% +0.00% +0.11% / +0.06% +0.17% +0.24%] index_select reverse : Elapsed 0.053 ms (5.339 ms / 100) 5.259 -> 5.269 ( +0.19%) [ +0.23% +0.00% +0.21% / +0.19% +0.38% +0.30%] index_select skip64 : Elapsed 0.053 ms (5.271 ms / 100) 5.263 -> 5.261 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.21% +0.15%] index_select skip256 : Elapsed 0.053 ms (5.267 ms / 100) 5.334 -> 5.348 ( +0.26%) [ +0.07% +0.00% +0.06% / +0.26% +0.32% +0.32%] index_select spread : Elapsed 0.053 ms (5.338 ms / 100) 5.331 -> 5.331 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.39% +0.26%] index_select strided 3 : Elapsed 0.053 ms (5.331 ms / 100) 5.313 -> 5.319 ( +0.11%) [ +0.02% +0.02% +0.00% / +0.11% +0.15% +0.17%] index_select random : Elapsed 0.053 ms (5.314 ms / 100) 5.313 -> 5.315 ( +0.04%) [ +0.06% +0.06% +0.00% / +0.04% +0.23% +0.09%] index_select random_sorted : Elapsed 0.053 ms (5.316 ms / 100) B = [20, 40, 16, 5] (stride (1, 1600, 100, 20)) A = [20, 40, 16, 4] (stride (640, 16, 1, 12800)) dim = 3 5.118 -> 5.121 ( +0.06%) [ +0.04% +0.06% +0.00% / +0.06% +0.12% +0.20%] index_add_ linear : Elapsed 0.051 ms (5.120 ms / 100) 5.086 -> 5.092 ( +0.12%) [ +0.00% +0.08% +0.14% / +0.18% +0.14% +0.12%] index_copy_ linear : Elapsed 0.051 ms (5.086 ms / 100) 5.122 -> 5.122 ( +0.00%) [ +0.10% +0.08% +0.00% / +0.02% +0.20% +0.00%] index_add_ reverse : Elapsed 0.051 ms (5.127 ms / 100) 5.091 -> 5.088 ( -0.06%) [ +0.18% +0.00% +0.04% / -0.06% +0.08% +0.12%] index_copy_ reverse : Elapsed 0.051 ms (5.100 ms / 100) 5.119 -> 5.120 ( +0.02%) [ +0.14% +0.00% +0.14% / +0.14% +0.02% +0.21%] index_add_ spread : Elapsed 0.051 ms (5.126 ms / 100) 5.091 -> 5.093 ( +0.04%) [ +0.00% +0.10% +0.08% / +0.04% +0.10% +0.16%] index_copy_ spread : Elapsed 0.051 ms (5.091 ms / 100) 5.121 -> 5.124 ( +0.06%) [ +0.10% +0.14% +0.00% / +0.12% +0.10% +0.06%] index_add_ strided 3 : Elapsed 0.051 ms (5.126 ms / 100) 5.091 -> 5.092 ( +0.02%) [ +0.16% +0.14% +0.00% / +0.02% +0.08% +0.08%] index_copy_ strided 3 : Elapsed 0.051 ms (5.099 ms / 100) 5.121 -> 5.117 ( -0.08%) [ +0.18% +0.02% +0.00% / -0.08% +0.12% +0.02%] index_add_ perm : Elapsed 0.051 ms (5.130 ms / 100) 5.086 -> 5.092 ( +0.12%) [ +0.20% +0.00% +0.10% / +0.12% +0.20% +0.20%] index_copy_ perm : Elapsed 0.051 ms (5.096 ms / 100) 5.121 -> 5.119 ( -0.04%) [ +0.12% +0.14% +0.00% / -0.04% +0.06% +0.18%] index_add_ perm_sorted : Elapsed 0.051 ms (5.127 ms / 100) 5.086 -> 5.094 ( +0.16%) [ +0.20% +0.00% +0.06% / +0.16% +0.35% +0.24%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.096 ms / 100) 5.230 -> 5.233 ( +0.06%) [ +0.10% +0.00% +0.08% / +0.06% +0.27% +0.23%] index_select const : Elapsed 0.052 ms (5.235 ms / 100) 5.314 -> 5.307 ( -0.13%) [ +0.06% +0.04% +0.00% / -0.13% +0.02% +0.11%] index_select wrap : Elapsed 0.053 ms (5.317 ms / 100) 5.310 -> 5.313 ( +0.06%) [ +0.00% +0.02% +0.08% / +0.09% +0.06% +0.09%] index_select linear : Elapsed 0.053 ms (5.310 ms / 100) 5.312 -> 5.314 ( +0.04%) [ +0.00% +0.11% +0.00% / +0.04% +0.28% +0.08%] index_select reverse : Elapsed 0.053 ms (5.312 ms / 100) 5.230 -> 5.233 ( +0.06%) [ +0.11% +0.00% +0.00% / +0.06% +0.29% +0.11%] index_select skip64 : Elapsed 0.052 ms (5.236 ms / 100) 5.232 -> 5.233 ( +0.02%) [ +0.00% +0.19% +0.04% / +0.02% +0.23% +0.08%] index_select skip256 : Elapsed 0.052 ms (5.232 ms / 100) 5.305 -> 5.310 ( +0.09%) [ +0.06% +0.13% +0.00% / +0.15% +0.19% +0.09%] index_select spread : Elapsed 0.053 ms (5.308 ms / 100) 5.307 -> 5.311 ( +0.08%) [ +0.17% +0.00% +0.06% / +0.08% +0.21% +0.17%] index_select strided 3 : Elapsed 0.053 ms (5.316 ms / 100) 5.287 -> 5.285 ( -0.04%) [ +0.08% +0.00% +0.00% / +0.02% +0.08% -0.04%] index_select random : Elapsed 0.053 ms (5.291 ms / 100) 5.291 -> 5.289 ( -0.04%) [ +0.09% +0.08% +0.00% / +0.00% +0.02% -0.04%] index_select random_sorted : Elapsed 0.053 ms (5.296 ms / 100) B = [20, 40, 16, 5] (stride (16, 320, 1, 12800)) A = [20, 40, 16, 4] (stride (2560, 1, 40, 640)) dim = 3 5.767 -> 5.770 ( +0.05%) [ +0.03% +0.02% +0.00% / +0.17% +0.05% +0.07%] index_add_ linear : Elapsed 0.058 ms (5.769 ms / 100) 5.705 -> 5.710 ( +0.09%) [ +0.00% +0.04% +0.09% / +0.09% +0.21% +0.14%] index_copy_ linear : Elapsed 0.057 ms (5.705 ms / 100) 5.753 -> 5.762 ( +0.16%) [ +0.00% +0.05% +0.10% / +0.16% +0.37% +0.45%] index_add_ reverse : Elapsed 0.058 ms (5.753 ms / 100) 5.698 -> 5.704 ( +0.11%) [ +0.00% +0.09% +0.12% / +0.11% +0.35% +0.33%] index_copy_ reverse : Elapsed 0.057 ms (5.698 ms / 100) 5.767 -> 5.771 ( +0.07%) [ +0.03% +0.00% +0.09% / +0.07% +0.09% +0.07%] index_add_ spread : Elapsed 0.058 ms (5.769 ms / 100) 5.702 -> 5.712 ( +0.18%) [ +0.00% +0.18% +0.14% / +0.21% +0.19% +0.18%] index_copy_ spread : Elapsed 0.057 ms (5.702 ms / 100) 5.767 -> 5.773 ( +0.10%) [ +0.00% +0.09% +0.14% / +0.10% +0.40% +0.36%] index_add_ strided 3 : Elapsed 0.058 ms (5.767 ms / 100) 5.707 -> 5.718 ( +0.19%) [ +0.00% +0.00% +0.05% / +0.19% +0.37% +0.19%] index_copy_ strided 3 : Elapsed 0.057 ms (5.707 ms / 100) 5.768 -> 5.770 ( +0.03%) [ +0.02% +0.00% +0.02% / +0.03% +0.36% +0.19%] index_add_ perm : Elapsed 0.058 ms (5.769 ms / 100) 5.706 -> 5.713 ( +0.12%) [ +0.00% +0.02% +0.18% / +0.12% +0.39% +0.37%] index_copy_ perm : Elapsed 0.057 ms (5.706 ms / 100) 5.769 -> 5.764 ( -0.09%) [ +0.00% +0.00% +0.02% / -0.09% +0.07% +0.16%] index_add_ perm_sorted : Elapsed 0.058 ms (5.769 ms / 100) 5.705 -> 5.709 ( +0.07%) [ +0.05% +0.00% +0.28% / +0.11% +0.07% +0.18%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.708 ms / 100) 6.040 -> 6.024 ( -0.26%) [ +0.17% +0.00% +0.18% / +0.23% -0.25% -0.26%] index_select const : Elapsed 0.061 ms (6.050 ms / 100) 6.081 -> 6.076 ( -0.08%) [ +0.05% +0.00% +0.21% / +0.25% -0.08% +0.03%] index_select wrap : Elapsed 0.061 ms (6.084 ms / 100) 6.065 -> 6.068 ( +0.05%) [ +0.16% +0.00% +0.02% / +0.12% +0.25% +0.05%] index_select linear : Elapsed 0.061 ms (6.075 ms / 100) 6.071 -> 6.067 ( -0.07%) [ +0.10% +0.00% +0.18% / +0.21% +0.12% -0.07%] index_select reverse : Elapsed 0.061 ms (6.077 ms / 100) 6.044 -> 6.021 ( -0.38%) [ +0.00% +0.02% +0.02% / +0.07% -0.31% -0.38%] index_select skip64 : Elapsed 0.060 ms (6.044 ms / 100) 6.040 -> 6.022 ( -0.30%) [ +0.08% +0.00% +0.15% / +0.23% -0.30% -0.22%] index_select skip256 : Elapsed 0.060 ms (6.045 ms / 100) 6.075 -> 6.078 ( +0.05%) [ +0.00% +0.05% +0.10% / +0.05% +0.07% +0.12%] index_select spread : Elapsed 0.061 ms (6.075 ms / 100) 6.083 -> 6.078 ( -0.08%) [ +0.07% +0.00% +0.13% / +0.13% -0.08% +0.13%] index_select strided 3 : Elapsed 0.061 ms (6.087 ms / 100) 6.071 -> 6.043 ( -0.46%) [ +0.03% +0.00% +0.13% / +0.13% -0.46% -0.33%] index_select random : Elapsed 0.061 ms (6.073 ms / 100) 6.052 -> 6.028 ( -0.40%) [ +0.05% +0.00% +0.20% / +0.23% -0.40% -0.36%] index_select random_sorted : Elapsed 0.061 ms (6.055 ms / 100) B = [20, 40, 16, 5] (stride (1, 320, 20, 12800)) A = [20, 40, 16, 4] (stride (2560, 64, 4, 1)) dim = 3 5.140 -> 5.128 ( -0.23%) [ +0.00% +0.14% +0.02% / +0.04% -0.23% -0.23%] index_add_ linear : Elapsed 0.051 ms (5.140 ms / 100) 5.083 -> 5.068 ( -0.30%) [ +0.00% +0.00% +0.00% / +0.14% -0.30% -0.24%] index_copy_ linear : Elapsed 0.051 ms (5.083 ms / 100) 5.135 -> 5.123 ( -0.23%) [ +0.00% +0.25% +0.06% / +0.14% -0.23% -0.21%] index_add_ reverse : Elapsed 0.051 ms (5.135 ms / 100) 5.077 -> 5.069 ( -0.16%) [ +0.16% +0.16% +0.00% / +0.20% -0.04% -0.16%] index_copy_ reverse : Elapsed 0.051 ms (5.085 ms / 100) 5.141 -> 5.126 ( -0.29%) [ +0.00% +0.00% +0.02% / +0.06% -0.23% -0.29%] index_add_ spread : Elapsed 0.051 ms (5.141 ms / 100) 5.081 -> 5.064 ( -0.33%) [ +0.00% +0.12% +0.06% / -0.06% -0.20% -0.33%] index_copy_ spread : Elapsed 0.051 ms (5.081 ms / 100) 5.129 -> 5.123 ( -0.12%) [ +0.16% +0.14% +0.00% / +0.04% -0.08% -0.12%] index_add_ strided 3 : Elapsed 0.051 ms (5.137 ms / 100) 5.071 -> 5.064 ( -0.14%) [ +0.04% +0.02% +0.00% / +0.08% -0.14% -0.06%] index_copy_ strided 3 : Elapsed 0.051 ms (5.073 ms / 100) 5.133 -> 5.118 ( -0.29%) [ +0.10% +0.00% +0.00% / +0.08% -0.29% -0.29%] index_add_ perm : Elapsed 0.051 ms (5.138 ms / 100) 5.074 -> 5.062 ( -0.24%) [ +0.12% +0.00% +0.06% / +0.00% -0.18% -0.24%] index_copy_ perm : Elapsed 0.051 ms (5.080 ms / 100) 5.132 -> 5.119 ( -0.25%) [ +0.06% +0.04% +0.00% / +0.10% -0.25% -0.21%] index_add_ perm_sorted : Elapsed 0.051 ms (5.135 ms / 100) 5.078 -> 5.059 ( -0.37%) [ +0.00% +0.02% +0.02% / +0.00% -0.22% -0.37%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.078 ms / 100) 5.318 -> 5.298 ( -0.38%) [ +0.04% +0.15% +0.00% / +0.00% -0.26% -0.38%] index_select const : Elapsed 0.053 ms (5.320 ms / 100) 5.325 -> 5.304 ( -0.39%) [ +0.00% +0.21% +0.00% / +0.06% -0.39% -0.39%] index_select wrap : Elapsed 0.053 ms (5.325 ms / 100) 5.319 -> 5.295 ( -0.45%) [ +0.00% +0.06% +0.06% / -0.08% -0.45% -0.30%] index_select linear : Elapsed 0.053 ms (5.319 ms / 100) 5.316 -> 5.298 ( -0.34%) [ +0.00% +0.08% +0.13% / +0.24% -0.15% -0.34%] index_select reverse : Elapsed 0.053 ms (5.316 ms / 100) 5.316 -> 5.297 ( -0.36%) [ +0.04% +0.00% +0.04% / -0.02% -0.36% -0.19%] index_select skip64 : Elapsed 0.053 ms (5.318 ms / 100) 5.324 -> 5.299 ( -0.47%) [ +0.09% +0.08% +0.00% / +0.15% -0.36% -0.47%] index_select skip256 : Elapsed 0.053 ms (5.329 ms / 100) 5.315 -> 5.296 ( -0.36%) [ +0.04% +0.00% +0.15% / +0.06% -0.36% -0.36%] index_select spread : Elapsed 0.053 ms (5.317 ms / 100) 5.319 -> 5.303 ( -0.30%) [ +0.00% +0.13% +0.11% / +0.15% -0.30% -0.21%] index_select strided 3 : Elapsed 0.053 ms (5.319 ms / 100) 5.320 -> 5.297 ( -0.43%) [ +0.11% +0.08% +0.00% / -0.06% -0.39% -0.43%] index_select random : Elapsed 0.053 ms (5.326 ms / 100) 5.322 -> 5.299 ( -0.43%) [ +0.00% +0.04% +0.08% / +0.19% -0.43% -0.39%] index_select random_sorted : Elapsed 0.053 ms (5.322 ms / 100) B = [20, 40, 16, 5] (stride (40, 1, 800, 12800)) A = [20, 40, 16, 4] (stride (1, 1280, 80, 20)) dim = 3 3.477 -> 3.469 ( -0.23%) [ +0.06% +0.23% +0.00% / -0.23% +0.03% +0.23%] index_add_ linear : Elapsed 0.035 ms (3.479 ms / 100) 3.430 -> 3.436 ( +0.17%) [ +0.29% +0.12% +0.00% / +0.17% +0.47% +0.26%] index_copy_ linear : Elapsed 0.034 ms (3.440 ms / 100) 3.480 -> 3.488 ( +0.23%) [ +0.00% +0.06% +0.23% / +0.23% +0.49% +0.55%] index_add_ reverse : Elapsed 0.035 ms (3.480 ms / 100) 3.445 -> 3.443 ( -0.06%) [ +0.03% +0.00% +0.23% / -0.06% +0.15% +0.23%] index_copy_ reverse : Elapsed 0.034 ms (3.446 ms / 100) 3.480 -> 3.481 ( +0.03%) [ +0.00% +0.26% +0.00% / +0.06% +0.06% +0.03%] index_add_ spread : Elapsed 0.035 ms (3.480 ms / 100) 3.444 -> 3.442 ( -0.06%) [ +0.12% +0.06% +0.00% / -0.06% +0.67% +0.41%] index_copy_ spread : Elapsed 0.034 ms (3.448 ms / 100) 3.495 -> 3.501 ( +0.17%) [ +0.29% +0.00% +0.09% / +0.26% +0.20% +0.17%] index_add_ strided 3 : Elapsed 0.035 ms (3.505 ms / 100) 3.454 -> 3.451 ( -0.09%) [ +0.03% +0.06% +0.00% / -0.09% +0.06% -0.06%] index_copy_ strided 3 : Elapsed 0.035 ms (3.455 ms / 100) 3.486 -> 3.486 ( +0.00%) [ +0.23% +0.00% +0.09% / +0.09% +0.17% +0.00%] index_add_ perm : Elapsed 0.035 ms (3.494 ms / 100) 3.446 -> 3.446 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.17% +0.00% +0.12%] index_copy_ perm : Elapsed 0.034 ms (3.447 ms / 100) 3.475 -> 3.471 ( -0.12%) [ +0.20% +0.23% +0.00% / -0.12% +0.20% +0.14%] index_add_ perm_sorted : Elapsed 0.035 ms (3.482 ms / 100) 3.434 -> 3.436 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.41% +0.47%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.438 ms / 100) 3.594 -> 3.596 ( +0.06%) [ +0.03% +0.11% +0.00% / +0.06% +0.45% +0.61%] index_select const : Elapsed 0.036 ms (3.595 ms / 100) 3.611 -> 3.611 ( +0.00%) [ +0.14% +0.06% +0.00% / +0.00% +0.33% +0.28%] index_select wrap : Elapsed 0.036 ms (3.616 ms / 100) 3.609 -> 3.610 ( +0.03%) [ +0.00% +0.22% +0.19% / +0.03% +0.50% +0.44%] index_select linear : Elapsed 0.036 ms (3.609 ms / 100) 3.619 -> 3.604 ( -0.41%) [ +0.19% +0.03% +0.00% / +0.08% -0.33% -0.41%] index_select reverse : Elapsed 0.036 ms (3.626 ms / 100) 3.635 -> 3.609 ( -0.72%) [ +0.00% +0.03% +0.08% / -0.06% -0.72% -0.72%] index_select skip64 : Elapsed 0.036 ms (3.635 ms / 100) 3.611 -> 3.612 ( +0.03%) [ +0.25% +0.25% +0.00% / +0.19% +0.03% +0.03%] index_select skip256 : Elapsed 0.036 ms (3.620 ms / 100) 3.580 -> 3.582 ( +0.06%) [ +0.00% +0.06% +0.03% / +0.06% +0.78% +0.64%] index_select spread : Elapsed 0.036 ms (3.580 ms / 100) 3.598 -> 3.599 ( +0.03%) [ +0.00% +0.14% +0.00% / +0.03% +0.44% +0.44%] index_select strided 3 : Elapsed 0.036 ms (3.598 ms / 100) 3.628 -> 3.618 ( -0.28%) [ +0.00% +0.00% +0.06% / +0.06% -0.25% -0.28%] index_select random : Elapsed 0.036 ms (3.628 ms / 100) 3.622 -> 3.611 ( -0.30%) [ +0.03% +0.00% +0.06% / +0.00% -0.28% -0.30%] index_select random_sorted : Elapsed 0.036 ms (3.623 ms / 100) out_shape = [5, 4, 16, 20] in_shape = [40, 4, 16, 20] idx_dim = 0 B = [5, 4, 16, 20] (stride (20, 1600, 100, 1)) A = [40, 4, 16, 20] (stride (80, 1, 3200, 4)) dim = 0 1.384 -> 1.384 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.14% +0.14%] index_select const : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.29% +0.22%] index_select wrap : Elapsed 0.014 ms (1.383 ms / 100) 1.385 -> 1.384 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.07% +0.07%] index_select linear : Elapsed 0.014 ms (1.385 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.29% +0.29% +0.00% / +0.29% +0.51% +0.36%] index_select reverse : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.22% +0.00% +0.14% / +0.14% +0.51% +0.43%] index_select skip64 : Elapsed 0.014 ms (1.384 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.36% +0.29% +0.00% / +0.22% +1.23% +0.51%] index_select skip256 : Elapsed 0.014 ms (1.384 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.14% +0.22%] index_select spread : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.29% +0.00% +0.29% / +0.29% +0.51% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.384 ms / 100) 1.383 -> 1.381 ( -0.14%) [ +0.00% +4.99% +0.07% / -0.14% +0.22% +0.22%] index_select strided 5 : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.36% +0.22% +0.00% / +0.14% +0.43% +0.36%] index_select strided 7 : Elapsed 0.014 ms (1.386 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.29% +0.00% +0.22% / +0.22% +0.43% +0.43%] index_select strided 8 : Elapsed 0.014 ms (1.385 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.14% +0.14% +0.00% / +0.29% +0.58% +0.58%] index_select strided 16 : Elapsed 0.014 ms (1.382 ms / 100) 1.383 -> 1.383 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.29% +0.29%] index_select random : Elapsed 0.014 ms (1.383 ms / 100) 1.380 -> 1.383 ( +0.22%) [ +0.14% +0.00% +0.07% / +0.22% +0.72% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.382 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.29% +0.22% +0.00% / +0.07% +0.51% +0.51%] index_select perm : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.00% +0.58% +0.07% / +0.07% +0.43% +0.36%] index_select perm_sorted : Elapsed 0.014 ms (1.382 ms / 100) B = [5, 4, 16, 20] (stride (1, 1600, 100, 5)) A = [40, 4, 16, 20] (stride (1280, 1, 80, 4)) dim = 0 1.187 -> 1.186 ( -0.08%) [ +0.17% +0.00% +0.00% / -0.08% +0.17% +0.00%] index_select const : Elapsed 0.012 ms (1.189 ms / 100) 1.185 -> 1.188 ( +0.25%) [ +0.00% +0.08% +0.34% / +0.34% +0.25% +0.34%] index_select wrap : Elapsed 0.012 ms (1.185 ms / 100) 1.182 -> 1.187 ( +0.42%) [ +0.59% +0.59% +0.00% / +0.42% +0.68% +0.68%] index_select linear : Elapsed 0.012 ms (1.189 ms / 100) 1.185 -> 1.182 ( -0.25%) [ +0.17% +0.00% +0.25% / -0.25% +0.25% +0.42%] index_select reverse : Elapsed 0.012 ms (1.187 ms / 100) 1.182 -> 1.185 ( +0.25%) [ +0.25% +0.00% +0.25% / +0.25% +0.59% +0.42%] index_select skip64 : Elapsed 0.012 ms (1.185 ms / 100) 1.183 -> 1.186 ( +0.25%) [ +0.34% +0.00% +0.34% / +0.25% +0.42% +0.42%] index_select skip256 : Elapsed 0.012 ms (1.187 ms / 100) 1.183 -> 1.188 ( +0.42%) [ +0.42% +0.00% +0.08% / +0.42% +0.51% +0.42%] index_select spread : Elapsed 0.012 ms (1.188 ms / 100) 1.184 -> 1.186 ( +0.17%) [ +0.00% +0.42% +0.42% / +0.17% +0.51% +0.34%] index_select strided 3 : Elapsed 0.012 ms (1.184 ms / 100) 1.184 -> 1.182 ( -0.17%) [ +0.42% +0.17% +0.00% / -0.17% +0.51% +0.59%] index_select strided 5 : Elapsed 0.012 ms (1.189 ms / 100) 1.184 -> 1.188 ( +0.34%) [ +0.42% +0.00% +0.00% / +0.42% +0.34% +0.51%] index_select strided 7 : Elapsed 0.012 ms (1.189 ms / 100) 1.185 -> 1.186 ( +0.08%) [ +0.00% +0.17% +0.34% / +0.08% +0.34% +0.34%] index_select strided 8 : Elapsed 0.012 ms (1.185 ms / 100) 1.185 -> 1.188 ( +0.25%) [ +0.17% +0.00% +0.08% / +0.25% +0.34% +0.34%] index_select strided 16 : Elapsed 0.012 ms (1.187 ms / 100) 1.186 -> 1.184 ( -0.17%) [ +0.42% +0.00% +0.25% / -0.17% +0.25% +0.17%] index_select random : Elapsed 0.012 ms (1.191 ms / 100) 1.186 -> 1.186 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.25% +0.34%] index_select random_sorted : Elapsed 0.012 ms (1.186 ms / 100) 1.182 -> 1.184 ( +0.17%) [ +0.17% +0.00% +0.51% / +0.17% +0.59% +0.59%] index_select perm : Elapsed 0.012 ms (1.184 ms / 100) 1.187 -> 1.189 ( +0.17%) [ +0.17% +0.25% +0.00% / +0.17% +0.17% +0.17%] index_select perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) B = [5, 4, 16, 20] (stride (1, 1600, 100, 5)) A = [40, 4, 16, 20] (stride (1, 40, 3200, 160)) dim = 0 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.63% +0.63%] index_select const : Elapsed 0.013 ms (1.280 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.55%] index_select wrap : Elapsed 0.013 ms (1.276 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.39% +0.31%] index_select linear : Elapsed 0.013 ms (1.279 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.24% +0.16% +0.00% / +0.08% +0.78% +0.71%] index_select reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.55% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.276 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.00% +0.16% +0.08% / +0.16% +0.55% +0.70%] index_select skip256 : Elapsed 0.013 ms (1.278 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.55% +0.63%] index_select spread : Elapsed 0.013 ms (1.273 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.39% +0.08% +0.00% / +0.08% +0.47% +0.31%] index_select strided 3 : Elapsed 0.013 ms (1.280 ms / 100) 1.277 -> 1.280 ( +0.23%) [ +0.00% +0.08% +0.23% / +0.23% +0.47% +0.47%] index_select strided 5 : Elapsed 0.013 ms (1.277 ms / 100) 1.284 -> 1.284 ( +0.00%) [ +0.31% +0.00% +0.23% / +0.00% +0.55% +0.31%] index_select strided 7 : Elapsed 0.013 ms (1.288 ms / 100) 1.276 -> 1.279 ( +0.24%) [ +0.08% +0.00% +0.00% / +0.24% +0.39% +0.55%] index_select strided 8 : Elapsed 0.013 ms (1.277 ms / 100) 1.269 -> 1.270 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.71% +1.02%] index_select strided 16 : Elapsed 0.013 ms (1.271 ms / 100) 1.274 -> 1.277 ( +0.24%) [ +0.31% +0.08% +0.00% / +0.24% +0.86% +0.94%] index_select random : Elapsed 0.013 ms (1.278 ms / 100) 1.267 -> 1.268 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.71% +0.71%] index_select random_sorted : Elapsed 0.013 ms (1.267 ms / 100) 1.277 -> 1.276 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.47% +0.63%] index_select perm : Elapsed 0.013 ms (1.277 ms / 100) 1.272 -> 1.276 ( +0.31%) [ +0.00% +0.24% +0.00% / +0.31% +0.94% +0.94%] index_select perm_sorted : Elapsed 0.013 ms (1.272 ms / 100) B = [5, 4, 16, 20] (stride (16, 1600, 1, 80)) A = [40, 4, 16, 20] (stride (1, 800, 3200, 40)) dim = 0 1.611 -> 1.611 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.62% +0.62%] index_select const : Elapsed 0.016 ms (1.612 ms / 100) 1.608 -> 1.610 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.56% +0.62%] index_select wrap : Elapsed 0.016 ms (1.609 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.68% +0.68%] index_select linear : Elapsed 0.016 ms (1.613 ms / 100) 1.610 -> 1.611 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.81% +0.81%] index_select reverse : Elapsed 0.016 ms (1.611 ms / 100) 1.608 -> 1.610 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.75% +0.68%] index_select skip64 : Elapsed 0.016 ms (1.608 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.62% +0.56%] index_select skip256 : Elapsed 0.016 ms (1.611 ms / 100) 1.599 -> 1.600 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.75% +0.69%] index_select spread : Elapsed 0.016 ms (1.600 ms / 100) 1.606 -> 1.606 ( +0.00%) [ +0.19% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_select strided 3 : Elapsed 0.016 ms (1.609 ms / 100) 1.598 -> 1.601 ( +0.19%) [ +0.13% +0.19% +0.00% / +0.19% +0.75% +0.81%] index_select strided 5 : Elapsed 0.016 ms (1.600 ms / 100) 1.596 -> 1.598 ( +0.13%) [ +0.06% +0.06% +0.00% / +0.13% +0.81% +0.75%] index_select strided 7 : Elapsed 0.016 ms (1.597 ms / 100) 1.608 -> 1.609 ( +0.06%) [ +0.19% +0.00% +0.06% / +0.06% +1.31% +0.56%] index_select strided 8 : Elapsed 0.016 ms (1.611 ms / 100) 1.601 -> 1.601 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.69%] index_select strided 16 : Elapsed 0.016 ms (1.601 ms / 100) 1.607 -> 1.606 ( -0.06%) [ +0.12% +0.06% +0.00% / -0.06% +0.68% +0.62%] index_select random : Elapsed 0.016 ms (1.609 ms / 100) 1.604 -> 1.603 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.44% +0.50%] index_select random_sorted : Elapsed 0.016 ms (1.604 ms / 100) 1.600 -> 1.602 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.69% +0.69%] index_select perm : Elapsed 0.016 ms (1.600 ms / 100) 1.606 -> 1.607 ( +0.06%) [ +0.31% +0.00% +0.06% / +0.06% +0.93% +0.87%] index_select perm_sorted : Elapsed 0.016 ms (1.611 ms / 100) B = [5, 4, 16, 20] (stride (80, 20, 400, 1)) A = [40, 4, 16, 20] (stride (1, 12800, 800, 40)) dim = 0 1.395 -> 1.395 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.29%] index_select const : Elapsed 0.014 ms (1.395 ms / 100) 1.392 -> 1.390 ( -0.14%) [ +0.07% +0.00% +0.14% / -0.14% +0.22% +0.29%] index_select wrap : Elapsed 0.014 ms (1.393 ms / 100) 1.390 -> 1.395 ( +0.36%) [ +0.00% +0.14% +0.29% / +0.36% +0.65% +0.72%] index_select linear : Elapsed 0.014 ms (1.390 ms / 100) 1.390 -> 1.394 ( +0.29%) [ +0.36% +0.00% +0.07% / +0.29% +0.65% +0.58%] index_select reverse : Elapsed 0.014 ms (1.395 ms / 100) 1.390 -> 1.387 ( -0.22%) [ +0.07% +0.14% +0.00% / -0.22% +0.58% +0.65%] index_select skip64 : Elapsed 0.014 ms (1.391 ms / 100) 1.392 -> 1.394 ( +0.14%) [ +0.29% +0.07% +0.00% / +0.14% +0.72% +0.50%] index_select skip256 : Elapsed 0.014 ms (1.396 ms / 100) 1.385 -> 1.385 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.36% +0.22%] index_select spread : Elapsed 0.014 ms (1.385 ms / 100) 1.368 -> 1.372 ( +0.29%) [ +0.44% +0.00% +0.07% / +0.29% +0.66% +0.95%] index_select strided 3 : Elapsed 0.014 ms (1.374 ms / 100) 1.353 -> 1.354 ( +0.07%) [ +0.30% +0.15% +0.00% / +0.07% +0.52% +0.67%] index_select strided 5 : Elapsed 0.014 ms (1.357 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.43% +0.36%] index_select strided 7 : Elapsed 0.014 ms (1.384 ms / 100) 1.394 -> 1.395 ( +0.07%) [ +0.14% +0.22% +0.00% / +0.07% +0.65% +0.57%] index_select strided 8 : Elapsed 0.014 ms (1.396 ms / 100) 1.346 -> 1.345 ( -0.07%) [ +0.15% +0.00% +0.07% / -0.07% +0.82% +0.67%] index_select strided 16 : Elapsed 0.013 ms (1.348 ms / 100) 1.383 -> 1.386 ( +0.22%) [ +0.14% +0.22% +0.00% / +0.22% +1.01% +0.65%] index_select random : Elapsed 0.014 ms (1.385 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.36% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.385 ms / 100) 1.387 -> 1.387 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.72% +0.72%] index_select perm : Elapsed 0.014 ms (1.388 ms / 100) 1.353 -> 1.353 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.52% +0.52%] index_select perm_sorted : Elapsed 0.014 ms (1.354 ms / 100) B = [5, 4, 16, 20] (stride (80, 20, 400, 1)) A = [40, 4, 16, 20] (stride (64, 16, 1, 2560)) dim = 0 1.467 -> 1.467 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select const : Elapsed 0.015 ms (1.467 ms / 100) 1.473 -> 1.472 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.68% +0.61%] index_select wrap : Elapsed 0.015 ms (1.473 ms / 100) 1.469 -> 1.469 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.68% +0.54%] index_select linear : Elapsed 0.015 ms (1.470 ms / 100) 1.458 -> 1.460 ( +0.14%) [ +0.14% +0.00% +0.07% / +0.14% +0.62% +0.75%] index_select reverse : Elapsed 0.015 ms (1.460 ms / 100) 1.465 -> 1.468 ( +0.20%) [ +0.34% +0.07% +0.00% / +0.20% +0.75% +0.61%] index_select skip64 : Elapsed 0.015 ms (1.470 ms / 100) 1.466 -> 1.466 ( +0.00%) [ +0.27% +0.07% +0.00% / +0.00% +0.82% +0.61%] index_select skip256 : Elapsed 0.015 ms (1.470 ms / 100) 1.466 -> 1.467 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.68% +0.61%] index_select spread : Elapsed 0.015 ms (1.468 ms / 100) 1.482 -> 1.483 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.81% +0.88%] index_select strided 3 : Elapsed 0.015 ms (1.483 ms / 100) 1.455 -> 1.455 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.69% +0.69%] index_select strided 5 : Elapsed 0.015 ms (1.455 ms / 100) 1.447 -> 1.447 ( +0.00%) [ +0.00% +0.00% +0.21% / +0.00% +0.55% +0.55%] index_select strided 7 : Elapsed 0.014 ms (1.447 ms / 100) 1.458 -> 1.460 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.75% +0.69%] index_select strided 8 : Elapsed 0.015 ms (1.458 ms / 100) 1.454 -> 1.455 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.69% +0.89%] index_select strided 16 : Elapsed 0.015 ms (1.455 ms / 100) 1.441 -> 1.441 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.69% +0.62%] index_select random : Elapsed 0.014 ms (1.442 ms / 100) 1.462 -> 1.463 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.75% +0.75%] index_select random_sorted : Elapsed 0.015 ms (1.463 ms / 100) 1.467 -> 1.467 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.82% +0.75%] index_select perm : Elapsed 0.015 ms (1.469 ms / 100) 1.464 -> 1.465 ( +0.07%) [ +0.27% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.468 ms / 100) B = [5, 4, 16, 20] (stride (80, 20, 400, 1)) A = [40, 4, 16, 20] (stride (1, 640, 40, 2560)) dim = 0 1.502 -> 1.504 ( +0.13%) [ +0.07% +0.00% +0.00% / +0.13% +0.47% +0.40%] index_select const : Elapsed 0.015 ms (1.503 ms / 100) 1.498 -> 1.501 ( +0.20%) [ +0.00% +0.07% +0.07% / +0.20% +0.40% +0.40%] index_select wrap : Elapsed 0.015 ms (1.498 ms / 100) 1.499 -> 1.502 ( +0.20%) [ +0.20% +0.33% +0.00% / +0.20% +0.67% +0.67%] index_select linear : Elapsed 0.015 ms (1.502 ms / 100) 1.501 -> 1.500 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.53% +0.47%] index_select reverse : Elapsed 0.015 ms (1.501 ms / 100) 1.496 -> 1.498 ( +0.13%) [ +0.27% +0.00% +0.07% / +0.13% +0.60% +0.53%] index_select skip64 : Elapsed 0.015 ms (1.500 ms / 100) 1.503 -> 1.502 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.47% +0.40%] index_select skip256 : Elapsed 0.015 ms (1.503 ms / 100) 1.498 -> 1.499 ( +0.07%) [ +0.20% +0.07% +0.00% / +0.07% +0.60% +0.53%] index_select spread : Elapsed 0.015 ms (1.501 ms / 100) 1.493 -> 1.495 ( +0.13%) [ +0.00% +0.00% +0.07% / +0.13% +0.60% +0.74%] index_select strided 3 : Elapsed 0.015 ms (1.493 ms / 100) 1.497 -> 1.497 ( +0.00%) [ +0.27% +0.00% +0.07% / +0.00% +0.40% +0.40%] index_select strided 5 : Elapsed 0.015 ms (1.501 ms / 100) 1.496 -> 1.497 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.53% +0.53%] index_select strided 7 : Elapsed 0.015 ms (1.498 ms / 100) 1.500 -> 1.505 ( +0.33%) [ +0.27% +0.27% +0.00% / +0.33% +0.80% +0.93%] index_select strided 8 : Elapsed 0.015 ms (1.504 ms / 100) 1.495 -> 1.497 ( +0.13%) [ +0.13% +0.07% +0.00% / +0.13% +0.74% +0.47%] index_select strided 16 : Elapsed 0.015 ms (1.497 ms / 100) 1.502 -> 1.503 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.60% +0.53%] index_select random : Elapsed 0.015 ms (1.503 ms / 100) 1.496 -> 1.497 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.60% +0.80%] index_select random_sorted : Elapsed 0.015 ms (1.497 ms / 100) 1.497 -> 1.499 ( +0.13%) [ +0.07% +0.00% +0.07% / +0.13% +0.73% +0.67%] index_select perm : Elapsed 0.015 ms (1.498 ms / 100) 1.498 -> 1.498 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.67% +0.60%] index_select perm_sorted : Elapsed 0.015 ms (1.499 ms / 100) B = [5, 4, 16, 20] (stride (64, 16, 1, 320)) A = [40, 4, 16, 20] (stride (1280, 320, 1, 16)) dim = 0 1.384 -> 1.385 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.72% +0.65%] index_select const : Elapsed 0.014 ms (1.385 ms / 100) 1.385 -> 1.385 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.72% +0.72%] index_select wrap : Elapsed 0.014 ms (1.386 ms / 100) 1.385 -> 1.388 ( +0.22%) [ +0.00% +0.22% +0.14% / +0.22% +0.65% +0.72%] index_select linear : Elapsed 0.014 ms (1.385 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.79% +0.87%] index_select reverse : Elapsed 0.014 ms (1.385 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.80% +0.80%] index_select skip64 : Elapsed 0.014 ms (1.385 ms / 100) 1.384 -> 1.385 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.79% +0.79%] index_select skip256 : Elapsed 0.014 ms (1.385 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.29% +0.14% +0.00% / +0.00% +0.87% +0.79%] index_select spread : Elapsed 0.014 ms (1.388 ms / 100) 1.384 -> 1.385 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.79% +0.72%] index_select strided 3 : Elapsed 0.014 ms (1.385 ms / 100) 1.384 -> 1.386 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.72% +0.79%] index_select strided 5 : Elapsed 0.014 ms (1.385 ms / 100) 1.385 -> 1.385 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.72% +0.79%] index_select strided 7 : Elapsed 0.014 ms (1.386 ms / 100) 1.385 -> 1.386 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.79% +0.79%] index_select strided 8 : Elapsed 0.014 ms (1.386 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.79% +0.79%] index_select strided 16 : Elapsed 0.014 ms (1.385 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.79% +0.79%] index_select random : Elapsed 0.014 ms (1.384 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.22% +0.07% +0.00% / +0.00% +0.79% +0.79%] index_select random_sorted : Elapsed 0.014 ms (1.387 ms / 100) 1.384 -> 1.385 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.79% +0.72%] index_select perm : Elapsed 0.014 ms (1.385 ms / 100) 1.385 -> 1.387 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.87% +0.79%] index_select perm_sorted : Elapsed 0.014 ms (1.386 ms / 100) B = [5, 4, 16, 20] (stride (1, 80, 5, 320)) A = [40, 4, 16, 20] (stride (1280, 320, 20, 1)) dim = 0 1.183 -> 1.179 ( -0.34%) [ +0.08% +0.00% +0.25% / -0.34% +0.42% +0.17%] index_select const : Elapsed 0.012 ms (1.184 ms / 100) 1.178 -> 1.188 ( +0.85%) [ +0.34% +0.17% +0.00% / +0.85% +0.85% +1.02%] index_select wrap : Elapsed 0.012 ms (1.182 ms / 100) 1.182 -> 1.183 ( +0.08%) [ +0.42% +0.00% +0.08% / +0.08% +0.59% +0.51%] index_select linear : Elapsed 0.012 ms (1.187 ms / 100) 1.178 -> 1.186 ( +0.68%) [ +0.51% +0.00% +0.42% / +0.68% +0.93% +0.85%] index_select reverse : Elapsed 0.012 ms (1.184 ms / 100) 1.184 -> 1.184 ( +0.00%) [ +0.17% +0.00% +0.34% / +0.00% +0.25% +0.25%] index_select skip64 : Elapsed 0.012 ms (1.186 ms / 100) 1.182 -> 1.185 ( +0.25%) [ +0.08% +0.08% +0.00% / +0.59% +0.34% +0.25%] index_select skip256 : Elapsed 0.012 ms (1.183 ms / 100) 1.182 -> 1.187 ( +0.42%) [ +0.34% +0.59% +0.00% / +0.42% +0.42% +0.59%] index_select spread : Elapsed 0.012 ms (1.186 ms / 100) 1.182 -> 1.186 ( +0.34%) [ +0.34% +0.00% +0.17% / +0.34% +0.59% +0.42%] index_select strided 3 : Elapsed 0.012 ms (1.186 ms / 100) 1.179 -> 1.182 ( +0.25%) [ +0.68% +0.00% +0.25% / +0.25% +0.76% +0.76%] index_select strided 5 : Elapsed 0.012 ms (1.187 ms / 100) 1.180 -> 1.181 ( +0.08%) [ +0.68% +0.00% +0.34% / +0.08% +0.85% +0.76%] index_select strided 7 : Elapsed 0.012 ms (1.188 ms / 100) 1.179 -> 1.186 ( +0.59%) [ +0.68% +0.34% +0.00% / +0.76% +0.76% +0.59%] index_select strided 8 : Elapsed 0.012 ms (1.187 ms / 100) 1.179 -> 1.180 ( +0.08%) [ +0.42% +0.00% +0.85% / +0.08% +0.85% +0.76%] index_select strided 16 : Elapsed 0.012 ms (1.184 ms / 100) 1.180 -> 1.183 ( +0.25%) [ +0.34% +0.25% +0.00% / +0.25% +0.51% +0.76%] index_select random : Elapsed 0.012 ms (1.184 ms / 100) 1.179 -> 1.183 ( +0.34%) [ +0.76% +0.17% +0.00% / +0.34% +0.68% +0.76%] index_select random_sorted : Elapsed 0.012 ms (1.188 ms / 100) 1.179 -> 1.187 ( +0.68%) [ +0.34% +0.00% +0.00% / +0.68% +0.76% +0.85%] index_select perm : Elapsed 0.012 ms (1.183 ms / 100) 1.180 -> 1.183 ( +0.25%) [ +0.00% +0.00% +0.76% / +0.25% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.012 ms (1.180 ms / 100) B = [5, 4, 16, 20] (stride (4, 1, 20, 320)) dim = 0 fill_cnt = 40 2.484 -> 2.483 ( -0.04%) [ +0.20% +0.24% +0.00% / -0.04% +0.16% +0.00%] index_fill_ const : Elapsed 0.025 ms (2.489 ms / 100) 2.489 -> 2.486 ( -0.12%) [ +0.12% +0.00% +0.08% / -0.12% +0.00% +0.04%] index_fill_ linear : Elapsed 0.025 ms (2.492 ms / 100) 2.491 -> 2.484 ( -0.28%) [ +0.08% +0.00% +0.12% / -0.04% -0.04% -0.28%] index_fill_ reverse : Elapsed 0.025 ms (2.493 ms / 100) 2.490 -> 2.484 ( -0.24%) [ +0.04% +0.00% +0.08% / +0.00% -0.24% -0.20%] index_fill_ skip64 : Elapsed 0.025 ms (2.491 ms / 100) 2.490 -> 2.483 ( -0.28%) [ +0.04% +0.16% +0.00% / -0.28% -0.20% -0.12%] index_fill_ skip256 : Elapsed 0.025 ms (2.491 ms / 100) 2.488 -> 2.488 ( +0.00%) [ +0.32% +0.00% +0.16% / +0.00% +0.12% +0.16%] index_fill_ spread : Elapsed 0.025 ms (2.496 ms / 100) 2.491 -> 2.488 ( -0.12%) [ +0.16% +0.00% +0.12% / +0.00% -0.12% -0.04%] index_fill_ strided 3 : Elapsed 0.025 ms (2.495 ms / 100) 2.488 -> 2.485 ( -0.12%) [ +0.00% +0.00% +0.16% / -0.12% +0.12% +0.24%] index_fill_ random : Elapsed 0.025 ms (2.488 ms / 100) 2.488 -> 2.489 ( +0.04%) [ +0.16% +0.20% +0.00% / +0.04% +0.24% +0.12%] index_fill_ random_sorted : Elapsed 0.025 ms (2.492 ms / 100) B = [5, 4, 16, 20] (stride (1, 5, 20, 320)) A = [40, 4, 16, 20] (stride (64, 1, 4, 2560)) dim = 0 1.581 -> 1.581 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.57% +0.57%] index_select const : Elapsed 0.016 ms (1.581 ms / 100) 1.585 -> 1.587 ( +0.13%) [ +0.06% +0.19% +0.00% / +0.13% +0.69% +0.63%] index_select wrap : Elapsed 0.016 ms (1.586 ms / 100) 1.583 -> 1.583 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.57% +0.51%] index_select linear : Elapsed 0.016 ms (1.584 ms / 100) 1.578 -> 1.580 ( +0.13%) [ +0.13% +0.00% +0.06% / +0.13% +0.70% +0.76%] index_select reverse : Elapsed 0.016 ms (1.580 ms / 100) 1.578 -> 1.578 ( +0.00%) [ +0.25% +0.06% +0.00% / +0.00% +0.57% +0.63%] index_select skip64 : Elapsed 0.016 ms (1.582 ms / 100) 1.580 -> 1.581 ( +0.06%) [ +0.00% +0.06% +0.13% / +0.06% +0.57% +0.70%] index_select skip256 : Elapsed 0.016 ms (1.580 ms / 100) 1.584 -> 1.585 ( +0.06%) [ +0.00% +0.13% +0.06% / +0.06% +0.76% +0.76%] index_select spread : Elapsed 0.016 ms (1.584 ms / 100) 1.585 -> 1.586 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.76% +0.69%] index_select strided 3 : Elapsed 0.016 ms (1.586 ms / 100) 1.574 -> 1.578 ( +0.25%) [ +0.13% +0.00% +0.06% / +0.25% +0.44% +0.38%] index_select strided 5 : Elapsed 0.016 ms (1.576 ms / 100) 1.574 -> 1.577 ( +0.19%) [ +0.32% +0.13% +0.00% / +0.19% +0.32% +0.32%] index_select strided 7 : Elapsed 0.016 ms (1.579 ms / 100) 1.573 -> 1.578 ( +0.32%) [ +0.00% +0.32% +0.19% / +0.32% +0.38% +0.32%] index_select strided 8 : Elapsed 0.016 ms (1.573 ms / 100) 1.576 -> 1.578 ( +0.13%) [ +0.19% +0.19% +0.00% / +0.13% +0.70% +0.76%] index_select strided 16 : Elapsed 0.016 ms (1.579 ms / 100) 1.569 -> 1.577 ( +0.51%) [ +0.00% +0.25% +0.25% / +0.57% +0.57% +0.51%] index_select random : Elapsed 0.016 ms (1.569 ms / 100) 1.575 -> 1.576 ( +0.06%) [ +0.32% +0.00% +0.32% / +0.06% +0.13% +0.19%] index_select random_sorted : Elapsed 0.016 ms (1.580 ms / 100) 1.573 -> 1.576 ( +0.19%) [ +0.00% +0.38% +0.06% / +0.19% +0.38% +0.64%] index_select perm : Elapsed 0.016 ms (1.573 ms / 100) 1.574 -> 1.574 ( +0.00%) [ +0.38% +0.32% +0.00% / +0.00% +0.25% +0.19%] index_select perm_sorted : Elapsed 0.016 ms (1.580 ms / 100) out_shape = [40, 5, 16, 20] in_shape = [40, 4, 16, 20] idx_dim = 1 B = [40, 5, 16, 20] (stride (1600, 1, 5, 80)) A = [40, 4, 16, 20] (stride (1, 12800, 40, 640)) dim = 1 6.036 -> 6.015 ( -0.35%) [ +0.00% +0.05% +0.00% / -0.02% -0.25% -0.35%] index_add_ linear : Elapsed 0.060 ms (6.036 ms / 100) 6.035 -> 6.010 ( -0.41%) [ +0.13% +0.00% +0.03% / +0.02% -0.38% -0.41%] index_copy_ linear : Elapsed 0.060 ms (6.043 ms / 100) 6.036 -> 6.021 ( -0.25%) [ +0.03% +0.00% +0.03% / +0.08% -0.25% -0.25%] index_add_ reverse : Elapsed 0.060 ms (6.038 ms / 100) 6.036 -> 6.015 ( -0.35%) [ +0.00% +0.02% +0.05% / +0.10% -0.35% -0.35%] index_copy_ reverse : Elapsed 0.060 ms (6.036 ms / 100) 6.032 -> 6.017 ( -0.25%) [ +0.00% +0.12% +0.08% / +0.13% -0.25% -0.12%] index_add_ spread : Elapsed 0.060 ms (6.032 ms / 100) 6.027 -> 6.008 ( -0.32%) [ +0.28% +0.00% +0.15% / +0.23% -0.32% -0.13%] index_copy_ spread : Elapsed 0.060 ms (6.044 ms / 100) 6.037 -> 6.019 ( -0.30%) [ +0.05% +0.05% +0.00% / +0.03% -0.17% -0.30%] index_add_ strided 3 : Elapsed 0.060 ms (6.040 ms / 100) 6.034 -> 6.010 ( -0.40%) [ +0.00% +0.05% +0.08% / +0.08% -0.38% -0.40%] index_copy_ strided 3 : Elapsed 0.060 ms (6.034 ms / 100) 6.033 -> 6.017 ( -0.27%) [ +0.07% +0.00% +0.12% / +0.18% -0.27% -0.20%] index_add_ perm : Elapsed 0.060 ms (6.037 ms / 100) 6.042 -> 6.011 ( -0.51%) [ +0.00% +0.00% +0.00% / +0.03% -0.51% -0.48%] index_copy_ perm : Elapsed 0.060 ms (6.042 ms / 100) 6.035 -> 6.017 ( -0.30%) [ +0.05% +0.00% +0.13% / +0.17% -0.27% -0.30%] index_add_ perm_sorted : Elapsed 0.060 ms (6.038 ms / 100) 6.037 -> 6.013 ( -0.40%) [ +0.07% +0.02% +0.00% / +0.00% -0.40% -0.38%] index_copy_ perm_sorted : Elapsed 0.060 ms (6.041 ms / 100) 6.248 -> 6.242 ( -0.10%) [ +0.11% +0.00% +0.00% / -0.02% -0.10% -0.06%] index_select const : Elapsed 0.063 ms (6.255 ms / 100) 6.336 -> 6.314 ( -0.35%) [ +0.00% +0.05% +0.09% / +0.13% -0.35% -0.28%] index_select wrap : Elapsed 0.063 ms (6.336 ms / 100) 6.320 -> 6.309 ( -0.17%) [ +0.16% +0.00% +0.22% / +0.17% -0.16% -0.17%] index_select linear : Elapsed 0.063 ms (6.330 ms / 100) 6.333 -> 6.315 ( -0.28%) [ +0.06% +0.00% +0.14% / +0.02% -0.27% -0.28%] index_select reverse : Elapsed 0.063 ms (6.337 ms / 100) 6.245 -> 6.241 ( -0.06%) [ +0.14% +0.00% +0.10% / +0.05% -0.03% -0.06%] index_select skip64 : Elapsed 0.063 ms (6.254 ms / 100) 6.243 -> 6.242 ( -0.02%) [ +0.08% +0.14% +0.00% / +0.27% -0.02% +0.06%] index_select skip256 : Elapsed 0.062 ms (6.248 ms / 100) 6.327 -> 6.304 ( -0.36%) [ +0.03% +0.06% +0.00% / +0.09% -0.36% -0.27%] index_select spread : Elapsed 0.063 ms (6.329 ms / 100) 6.331 -> 6.319 ( -0.19%) [ +0.11% +0.09% +0.00% / +0.08% -0.19% -0.14%] index_select strided 3 : Elapsed 0.063 ms (6.338 ms / 100) 6.302 -> 6.298 ( -0.06%) [ +0.02% +0.08% +0.00% / +0.06% -0.03% -0.06%] index_select random : Elapsed 0.063 ms (6.303 ms / 100) 6.293 -> 6.283 ( -0.16%) [ +0.00% +0.17% +0.10% / +0.13% -0.13% -0.16%] index_select random_sorted : Elapsed 0.063 ms (6.293 ms / 100) B = [40, 5, 16, 20] (stride (1, 40, 4000, 200)) A = [40, 4, 16, 20] (stride (320, 12800, 20, 1)) dim = 1 3.454 -> 3.452 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.78% +0.49%] index_add_ linear : Elapsed 0.035 ms (3.454 ms / 100) 3.366 -> 3.366 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.39% +0.62%] index_copy_ linear : Elapsed 0.034 ms (3.368 ms / 100) 3.460 -> 3.461 ( +0.03%) [ +0.14% +0.20% +0.00% / +0.03% +0.32% +0.40%] index_add_ reverse : Elapsed 0.035 ms (3.465 ms / 100) 3.369 -> 3.381 ( +0.36%) [ +0.00% +0.06% +0.36% / +0.36% +0.36% +0.56%] index_copy_ reverse : Elapsed 0.034 ms (3.369 ms / 100) 3.445 -> 3.444 ( -0.03%) [ +0.00% +0.38% +0.15% / -0.03% +0.64% +0.73%] index_add_ spread : Elapsed 0.034 ms (3.445 ms / 100) 3.367 -> 3.367 ( +0.00%) [ +0.30% +0.00% +0.03% / +0.00% +0.53% +0.27%] index_copy_ spread : Elapsed 0.034 ms (3.377 ms / 100) 3.449 -> 3.448 ( -0.03%) [ +0.03% +0.12% +0.00% / -0.03% +0.58% +0.46%] index_add_ strided 3 : Elapsed 0.035 ms (3.450 ms / 100) 3.374 -> 3.381 ( +0.21%) [ +0.03% +0.00% +0.06% / +0.21% +0.27% +0.50%] index_copy_ strided 3 : Elapsed 0.034 ms (3.375 ms / 100) 3.439 -> 3.442 ( +0.09%) [ +0.00% +0.12% +0.09% / +0.09% +0.84% +0.93%] index_add_ perm : Elapsed 0.034 ms (3.439 ms / 100) 3.374 -> 3.376 ( +0.06%) [ +0.18% +0.18% +0.00% / +0.06% +0.41% +0.21%] index_copy_ perm : Elapsed 0.034 ms (3.380 ms / 100) 3.439 -> 3.446 ( +0.20%) [ +0.15% +0.12% +0.00% / +0.20% +0.90% +0.79%] index_add_ perm_sorted : Elapsed 0.034 ms (3.444 ms / 100) 3.368 -> 3.373 ( +0.15%) [ +0.00% +0.21% +0.24% / +0.15% +0.33% +0.36%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.368 ms / 100) 3.431 -> 3.429 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.03% +0.06% -0.06%] index_select const : Elapsed 0.034 ms (3.431 ms / 100) 3.489 -> 3.484 ( -0.14%) [ +0.06% +0.11% +0.00% / -0.14% +0.20% +0.20%] index_select wrap : Elapsed 0.035 ms (3.491 ms / 100) 3.488 -> 3.490 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.06% +0.14% +0.17%] index_select linear : Elapsed 0.035 ms (3.491 ms / 100) 3.482 -> 3.489 ( +0.20%) [ +0.20% +0.20% +0.00% / +0.20% +0.26% +0.55%] index_select reverse : Elapsed 0.035 ms (3.489 ms / 100) 3.435 -> 3.429 ( -0.17%) [ +0.00% +0.12% +0.03% / +0.29% -0.17% +0.06%] index_select skip64 : Elapsed 0.034 ms (3.435 ms / 100) 3.435 -> 3.432 ( -0.09%) [ +0.00% +0.06% +0.06% / +0.06% +0.03% -0.09%] index_select skip256 : Elapsed 0.034 ms (3.435 ms / 100) 3.482 -> 3.489 ( +0.20%) [ +0.00% +0.06% +0.20% / +0.20% +0.23% +0.43%] index_select spread : Elapsed 0.035 ms (3.482 ms / 100) 3.487 -> 3.486 ( -0.03%) [ +0.14% +0.00% +0.14% / -0.03% +0.34% +0.40%] index_select strided 3 : Elapsed 0.035 ms (3.492 ms / 100) 3.463 -> 3.462 ( -0.03%) [ +0.23% +0.00% +0.14% / -0.03% +0.32% +0.17%] index_select random : Elapsed 0.035 ms (3.471 ms / 100) 3.475 -> 3.470 ( -0.14%) [ +0.06% +0.03% +0.00% / -0.14% +0.14% +0.00%] index_select random_sorted : Elapsed 0.035 ms (3.477 ms / 100) B = [40, 5, 16, 20] (stride (1, 40, 4000, 200)) A = [40, 4, 16, 20] (stride (1, 40, 3200, 160)) dim = 1 3.514 -> 3.509 ( -0.14%) [ +0.17% +0.00% +0.00% / -0.11% -0.14% -0.09%] index_add_ linear : Elapsed 0.035 ms (3.520 ms / 100) 3.431 -> 3.429 ( -0.06%) [ +0.20% +0.03% +0.00% / +0.00% -0.06% +0.29%] index_copy_ linear : Elapsed 0.034 ms (3.438 ms / 100) 3.514 -> 3.511 ( -0.09%) [ +0.26% +0.00% +0.09% / +0.34% -0.09% -0.09%] index_add_ reverse : Elapsed 0.035 ms (3.523 ms / 100) 3.433 -> 3.432 ( -0.03%) [ +0.06% +0.00% +0.09% / +0.09% -0.03% +0.26%] index_copy_ reverse : Elapsed 0.034 ms (3.435 ms / 100) 3.512 -> 3.514 ( +0.06%) [ +0.00% +0.06% +0.14% / +0.17% +0.06% +0.26%] index_add_ spread : Elapsed 0.035 ms (3.512 ms / 100) 3.424 -> 3.433 ( +0.26%) [ +0.00% +0.26% +0.15% / +0.26% +0.47% +0.58%] index_copy_ spread : Elapsed 0.034 ms (3.424 ms / 100) 3.510 -> 3.498 ( -0.34%) [ +0.00% +0.00% +0.03% / +0.17% -0.09% -0.34%] index_add_ strided 3 : Elapsed 0.035 ms (3.510 ms / 100) 3.438 -> 3.439 ( +0.03%) [ +0.20% +0.29% +0.00% / +0.20% +0.03% +0.03%] index_copy_ strided 3 : Elapsed 0.034 ms (3.445 ms / 100) 3.512 -> 3.499 ( -0.37%) [ +0.00% +0.00% +0.20% / +0.28% -0.37% -0.34%] index_add_ perm : Elapsed 0.035 ms (3.512 ms / 100) 3.434 -> 3.437 ( +0.09%) [ +0.00% +0.12% +0.17% / +0.12% +0.09% +0.23%] index_copy_ perm : Elapsed 0.034 ms (3.434 ms / 100) 3.523 -> 3.510 ( -0.37%) [ +0.20% +0.00% +0.28% / -0.09% -0.37% -0.37%] index_add_ perm_sorted : Elapsed 0.035 ms (3.530 ms / 100) 3.445 -> 3.425 ( -0.58%) [ +0.20% +0.00% +0.23% / +0.23% -0.58% -0.52%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.452 ms / 100) 3.625 -> 3.633 ( +0.22%) [ +0.00% +0.14% +0.00% / +0.22% +0.22% +0.28%] index_select const : Elapsed 0.036 ms (3.625 ms / 100) 3.592 -> 3.595 ( +0.08%) [ +0.17% +0.33% +0.00% / +0.08% +0.28% +0.42%] index_select wrap : Elapsed 0.036 ms (3.598 ms / 100) 3.594 -> 3.601 ( +0.19%) [ +0.22% +0.00% +0.11% / +0.19% +0.28% +0.42%] index_select linear : Elapsed 0.036 ms (3.602 ms / 100) 3.598 -> 3.597 ( -0.03%) [ +0.00% +0.08% +0.47% / +0.25% -0.03% +0.03%] index_select reverse : Elapsed 0.036 ms (3.598 ms / 100) 3.630 -> 3.626 ( -0.11%) [ +0.00% +0.03% +0.03% / -0.11% +0.25% +0.03%] index_select skip64 : Elapsed 0.036 ms (3.630 ms / 100) 3.632 -> 3.633 ( +0.03%) [ +0.14% +0.00% +0.11% / +0.03% +0.06% +0.17%] index_select skip256 : Elapsed 0.036 ms (3.637 ms / 100) 3.608 -> 3.606 ( -0.06%) [ +0.00% +0.03% +0.00% / -0.06% +0.00% +0.11%] index_select spread : Elapsed 0.036 ms (3.608 ms / 100) 3.610 -> 3.610 ( +0.00%) [ +0.00% +0.22% +0.14% / +0.00% +0.11% +0.06%] index_select strided 3 : Elapsed 0.036 ms (3.610 ms / 100) 3.604 -> 3.596 ( -0.22%) [ +0.17% +0.00% +0.03% / +0.00% -0.19% -0.22%] index_select random : Elapsed 0.036 ms (3.610 ms / 100) 3.597 -> 3.593 ( -0.11%) [ +0.08% +0.03% +0.00% / +0.08% -0.06% -0.11%] index_select random_sorted : Elapsed 0.036 ms (3.600 ms / 100) B = [40, 5, 16, 20] (stride (80, 16, 1, 3200)) A = [40, 4, 16, 20] (stride (1, 640, 40, 2560)) dim = 1 5.904 -> 5.915 ( +0.19%) [ +0.19% +0.00% +0.12% / +0.19% +0.54% +0.63%] index_add_ linear : Elapsed 0.059 ms (5.915 ms / 100) 5.802 -> 5.808 ( +0.10%) [ +0.00% +0.05% +0.14% / +0.10% +0.60% +0.41%] index_copy_ linear : Elapsed 0.058 ms (5.802 ms / 100) 5.924 -> 5.937 ( +0.22%) [ +0.00% +0.00% +0.08% / +0.22% +0.24% +0.34%] index_add_ reverse : Elapsed 0.059 ms (5.924 ms / 100) 5.816 -> 5.818 ( +0.03%) [ +0.00% +0.12% +0.10% / +0.03% +0.14% +0.31%] index_copy_ reverse : Elapsed 0.058 ms (5.816 ms / 100) 5.908 -> 5.921 ( +0.22%) [ +0.00% +0.03% +0.05% / +0.22% +0.51% +0.56%] index_add_ spread : Elapsed 0.059 ms (5.908 ms / 100) 5.809 -> 5.809 ( +0.00%) [ +0.03% +0.00% +0.02% / +0.00% +0.45% +0.40%] index_copy_ spread : Elapsed 0.058 ms (5.811 ms / 100) 5.940 -> 5.942 ( +0.03%) [ +0.08% +0.00% +0.02% / +0.03% +0.19% +0.20%] index_add_ strided 3 : Elapsed 0.059 ms (5.945 ms / 100) 5.841 -> 5.844 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.05% +0.09%] index_copy_ strided 3 : Elapsed 0.058 ms (5.841 ms / 100) 5.924 -> 5.934 ( +0.17%) [ +0.15% +0.00% +0.25% / +0.17% +0.35% +0.34%] index_add_ perm : Elapsed 0.059 ms (5.933 ms / 100) 5.828 -> 5.829 ( +0.02%) [ +0.14% +0.00% +0.00% / +0.02% +0.31% +0.03%] index_copy_ perm : Elapsed 0.058 ms (5.836 ms / 100) 5.915 -> 5.919 ( +0.07%) [ +0.03% +0.00% +0.10% / +0.07% +0.54% +0.36%] index_add_ perm_sorted : Elapsed 0.059 ms (5.917 ms / 100) 5.811 -> 5.814 ( +0.05%) [ +0.00% +0.09% +0.14% / +0.05% +0.43% +0.26%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.811 ms / 100) 6.151 -> 6.149 ( -0.03%) [ +0.00% +0.07% +0.07% / +0.05% -0.03% +0.00%] index_select const : Elapsed 0.062 ms (6.151 ms / 100) 6.201 -> 6.213 ( +0.19%) [ +0.15% +0.00% +0.19% / +0.19% +0.34% +0.34%] index_select wrap : Elapsed 0.062 ms (6.210 ms / 100) 6.197 -> 6.200 ( +0.05%) [ +0.03% +0.00% +0.08% / +0.05% +0.24% +0.24%] index_select linear : Elapsed 0.062 ms (6.199 ms / 100) 6.207 -> 6.206 ( -0.02%) [ +0.00% +0.06% +0.18% / +0.16% -0.02% +0.00%] index_select reverse : Elapsed 0.062 ms (6.207 ms / 100) 6.153 -> 6.141 ( -0.20%) [ +0.00% +0.00% +0.18% / +0.20% -0.20% -0.08%] index_select skip64 : Elapsed 0.062 ms (6.153 ms / 100) 6.151 -> 6.147 ( -0.07%) [ +0.07% +0.00% +0.10% / +0.11% -0.07% -0.02%] index_select skip256 : Elapsed 0.062 ms (6.155 ms / 100) 6.202 -> 6.205 ( +0.05%) [ +0.00% +0.03% +0.08% / +0.05% +0.31% +0.45%] index_select spread : Elapsed 0.062 ms (6.202 ms / 100) 6.222 -> 6.213 ( -0.14%) [ +0.05% +0.00% +0.18% / +0.16% -0.14% -0.13%] index_select strided 3 : Elapsed 0.062 ms (6.225 ms / 100) 6.194 -> 6.204 ( +0.16%) [ +0.06% +0.00% +0.18% / +0.16% +0.44% +0.34%] index_select random : Elapsed 0.062 ms (6.198 ms / 100) 6.160 -> 6.178 ( +0.29%) [ +0.00% +0.21% +0.37% / +0.29% +0.75% +0.78%] index_select random_sorted : Elapsed 0.062 ms (6.160 ms / 100) B = [40, 5, 16, 20] (stride (16, 640, 1, 3200)) A = [40, 4, 16, 20] (stride (1280, 20, 80, 1)) dim = 1 5.922 -> 5.920 ( -0.03%) [ +0.08% +0.00% +0.29% / +0.03% +0.00% -0.03%] index_add_ linear : Elapsed 0.059 ms (5.927 ms / 100) 5.836 -> 5.828 ( -0.14%) [ +0.21% +0.12% +0.00% / +0.07% -0.14% -0.05%] index_copy_ linear : Elapsed 0.058 ms (5.848 ms / 100) 5.937 -> 5.904 ( -0.56%) [ +0.15% +0.00% +0.03% / +0.13% -0.56% -0.51%] index_add_ reverse : Elapsed 0.059 ms (5.946 ms / 100) 5.846 -> 5.827 ( -0.33%) [ +0.00% +0.02% +0.07% / +0.05% -0.33% -0.24%] index_copy_ reverse : Elapsed 0.058 ms (5.846 ms / 100) 5.916 -> 5.902 ( -0.24%) [ +0.10% +0.00% +0.07% / +0.10% -0.24% +0.00%] index_add_ spread : Elapsed 0.059 ms (5.922 ms / 100) 5.831 -> 5.815 ( -0.27%) [ +0.00% +0.05% +0.10% / +0.05% -0.14% -0.27%] index_copy_ spread : Elapsed 0.058 ms (5.831 ms / 100) 5.917 -> 5.918 ( +0.02%) [ +0.00% +0.03% +0.07% / +0.03% +0.17% +0.02%] index_add_ strided 3 : Elapsed 0.059 ms (5.917 ms / 100) 5.823 -> 5.832 ( +0.15%) [ +0.10% +0.00% +0.22% / +0.15% +0.21% +0.15%] index_copy_ strided 3 : Elapsed 0.058 ms (5.829 ms / 100) 5.919 -> 5.917 ( -0.03%) [ +0.00% +0.08% +0.08% / +0.05% -0.02% -0.03%] index_add_ perm : Elapsed 0.059 ms (5.919 ms / 100) 5.827 -> 5.827 ( +0.00%) [ +0.19% +0.00% +0.19% / +0.29% +0.00% +0.02%] index_copy_ perm : Elapsed 0.058 ms (5.838 ms / 100) 5.910 -> 5.921 ( +0.19%) [ +0.20% +0.00% +0.03% / +0.20% +0.19% +0.32%] index_add_ perm_sorted : Elapsed 0.059 ms (5.922 ms / 100) 5.823 -> 5.833 ( +0.17%) [ +0.00% +0.12% +0.26% / +0.17% +0.29% +0.33%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.823 ms / 100) 6.162 -> 6.152 ( -0.16%) [ +0.00% +0.02% +0.03% / -0.03% -0.10% -0.16%] index_select const : Elapsed 0.062 ms (6.162 ms / 100) 6.221 -> 6.212 ( -0.14%) [ +0.16% +0.00% +0.02% / +0.08% -0.14% -0.03%] index_select wrap : Elapsed 0.062 ms (6.231 ms / 100) 6.205 -> 6.190 ( -0.24%) [ +0.00% +0.02% +0.03% / +0.02% -0.23% -0.24%] index_select linear : Elapsed 0.062 ms (6.205 ms / 100) 6.208 -> 6.202 ( -0.10%) [ +0.00% +0.13% +0.05% / +0.10% -0.06% -0.10%] index_select reverse : Elapsed 0.062 ms (6.208 ms / 100) 6.148 -> 6.145 ( -0.05%) [ +0.08% +0.00% +0.21% / +0.15% -0.05% -0.03%] index_select skip64 : Elapsed 0.062 ms (6.153 ms / 100) 6.154 -> 6.152 ( -0.03%) [ +0.00% +0.02% +0.11% / +0.18% -0.03% -0.02%] index_select skip256 : Elapsed 0.062 ms (6.154 ms / 100) 6.220 -> 6.217 ( -0.05%) [ +0.10% +0.05% +0.00% / +0.08% +0.02% -0.05%] index_select spread : Elapsed 0.062 ms (6.226 ms / 100) 6.210 -> 6.200 ( -0.16%) [ +0.05% +0.02% +0.00% / +0.08% -0.16% -0.11%] index_select strided 3 : Elapsed 0.062 ms (6.213 ms / 100) 6.193 -> 6.189 ( -0.06%) [ +0.00% +0.10% +0.02% / +0.08% -0.06% -0.03%] index_select random : Elapsed 0.062 ms (6.193 ms / 100) 6.213 -> 6.202 ( -0.18%) [ +0.08% +0.00% +0.00% / +0.10% -0.18% -0.10%] index_select random_sorted : Elapsed 0.062 ms (6.218 ms / 100) out_shape = [40, 4, 5, 20] in_shape = [40, 4, 16, 20] idx_dim = 2 B = [40, 4, 5, 20] (stride (400, 1, 4, 20)) A = [40, 4, 16, 20] (stride (20, 800, 3200, 1)) dim = 2 2.243 -> 2.253 ( +0.45%) [ +0.00% +0.13% +0.18% / +0.45% +0.67% +0.53%] index_select const : Elapsed 0.022 ms (2.243 ms / 100) 2.316 -> 2.315 ( -0.04%) [ +0.09% +0.00% +0.00% / +0.04% -0.04% +0.04%] index_select wrap : Elapsed 0.023 ms (2.318 ms / 100) 2.323 -> 2.324 ( +0.04%) [ +0.09% +0.00% +0.04% / +0.22% +0.30% +0.04%] index_select linear : Elapsed 0.023 ms (2.325 ms / 100) 2.326 -> 2.328 ( +0.09%) [ +0.04% +0.00% +0.00% / +0.09% +0.17% +0.17%] index_select reverse : Elapsed 0.023 ms (2.327 ms / 100) 2.241 -> 2.246 ( +0.22%) [ +0.36% +0.22% +0.00% / +0.22% +0.76% +0.89%] index_select skip64 : Elapsed 0.022 ms (2.249 ms / 100) 2.244 -> 2.244 ( +0.00%) [ +0.00% +0.04% +0.18% / +0.00% +0.71% +0.71%] index_select skip256 : Elapsed 0.022 ms (2.244 ms / 100) 2.325 -> 2.321 ( -0.17%) [ +0.00% +0.04% +0.04% / +0.04% -0.17% -0.17%] index_select spread : Elapsed 0.023 ms (2.325 ms / 100) 2.326 -> 2.326 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.09% +0.13% +0.00%] index_select strided 3 : Elapsed 0.023 ms (2.329 ms / 100) 2.314 -> 2.310 ( -0.17%) [ +0.00% +0.09% +0.13% / -0.17% +0.09% +0.30%] index_select strided 5 : Elapsed 0.023 ms (2.314 ms / 100) 2.317 -> 2.325 ( +0.35%) [ +0.30% +0.39% +0.00% / +0.35% +0.56% +0.56%] index_select strided 7 : Elapsed 0.023 ms (2.324 ms / 100) 2.256 -> 2.258 ( +0.09%) [ +0.04% +0.04% +0.00% / +0.09% +1.02% +0.93%] index_select strided 8 : Elapsed 0.023 ms (2.257 ms / 100) 2.297 -> 2.303 ( +0.26%) [ +0.13% +0.22% +0.00% / +0.26% +1.18% +1.09%] index_select random : Elapsed 0.023 ms (2.300 ms / 100) 2.306 -> 2.312 ( +0.26%) [ +0.39% +0.00% +0.43% / +0.26% +0.65% +0.91%] index_select random_sorted : Elapsed 0.023 ms (2.315 ms / 100) 2.331 -> 2.329 ( -0.09%) [ +0.17% +0.00% +0.04% / -0.09% +0.17% +0.04%] index_select perm : Elapsed 0.023 ms (2.335 ms / 100) 2.319 -> 2.320 ( +0.04%) [ +0.39% +0.00% +0.43% / +0.47% +0.04% +0.13%] index_select perm_sorted : Elapsed 0.023 ms (2.328 ms / 100) B = [40, 4, 5, 20] (stride (20, 800, 3200, 1)) A = [40, 4, 16, 20] (stride (1, 640, 40, 2560)) dim = 2 2.434 -> 2.436 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.45% +0.45%] index_select const : Elapsed 0.024 ms (2.437 ms / 100) 2.443 -> 2.445 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.25% +0.20%] index_select wrap : Elapsed 0.024 ms (2.445 ms / 100) 2.440 -> 2.437 ( -0.12%) [ +0.12% +0.12% +0.00% / -0.12% +0.29% +0.29%] index_select linear : Elapsed 0.024 ms (2.443 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.37% +0.41%] index_select reverse : Elapsed 0.025 ms (2.452 ms / 100) 2.435 -> 2.436 ( +0.04%) [ +0.16% +0.12% +0.00% / +0.04% +0.74% +0.45%] index_select skip64 : Elapsed 0.024 ms (2.439 ms / 100) 2.434 -> 2.438 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.49% +0.21%] index_select skip256 : Elapsed 0.024 ms (2.436 ms / 100) 2.449 -> 2.446 ( -0.12%) [ +0.12% +0.04% +0.00% / -0.12% +0.24% +0.16%] index_select spread : Elapsed 0.025 ms (2.452 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.12% +0.16% +0.00% / +0.08% +0.45% +0.37%] index_select strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.461 -> 2.465 ( +0.16%) [ +0.12% +0.00% +0.04% / +0.24% +0.41% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.464 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.00% +0.33% +0.00% / +0.08% +0.41% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.436 -> 2.441 ( +0.21%) [ +0.21% +0.21% +0.00% / +0.21% +0.41% +0.41%] index_select strided 8 : Elapsed 0.024 ms (2.441 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.24% +0.29%] index_select random : Elapsed 0.025 ms (2.457 ms / 100) 2.455 -> 2.458 ( +0.12%) [ +0.24% +0.20% +0.00% / +0.12% +0.37% +0.41%] index_select random_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.444 -> 2.445 ( +0.04%) [ +0.08% +0.29% +0.00% / +0.04% +0.57% +0.33%] index_select perm : Elapsed 0.024 ms (2.446 ms / 100) 2.437 -> 2.440 ( +0.12%) [ +0.00% +0.37% +0.08% / +0.12% +0.41% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.437 ms / 100) B = [40, 4, 5, 20] (stride (20, 1, 4, 800)) A = [40, 4, 16, 20] (stride (16, 12800, 1, 640)) dim = 2 2.522 -> 2.527 ( +0.20%) [ +0.00% +0.16% +0.08% / +0.20% +0.52% +0.67%] index_select const : Elapsed 0.025 ms (2.522 ms / 100) 2.521 -> 2.528 ( +0.28%) [ +0.00% +0.20% +0.08% / +0.28% +0.87% +0.44%] index_select wrap : Elapsed 0.025 ms (2.521 ms / 100) 2.522 -> 2.529 ( +0.28%) [ +0.00% +0.00% +0.00% / +0.28% +0.83% +0.48%] index_select linear : Elapsed 0.025 ms (2.522 ms / 100) 2.523 -> 2.526 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.36% +0.63%] index_select reverse : Elapsed 0.025 ms (2.523 ms / 100) 2.521 -> 2.522 ( +0.04%) [ +0.16% +0.16% +0.00% / +0.04% +0.63% +0.63%] index_select skip64 : Elapsed 0.025 ms (2.525 ms / 100) 2.524 -> 2.521 ( -0.12%) [ +0.00% +0.16% +0.08% / -0.12% +0.48% +0.71%] index_select skip256 : Elapsed 0.025 ms (2.524 ms / 100) 2.538 -> 2.546 ( +0.32%) [ +0.51% +0.39% +0.00% / +0.32% +0.59% +0.71%] index_select spread : Elapsed 0.026 ms (2.551 ms / 100) 2.539 -> 2.544 ( +0.20%) [ +0.00% +0.20% +0.16% / +0.20% +0.63% +0.95%] index_select strided 3 : Elapsed 0.025 ms (2.539 ms / 100) 2.544 -> 2.542 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.55% +0.55%] index_select strided 5 : Elapsed 0.025 ms (2.544 ms / 100) 2.538 -> 2.542 ( +0.16%) [ +0.24% +0.00% +0.32% / +0.16% +0.55% +0.87%] index_select strided 7 : Elapsed 0.025 ms (2.544 ms / 100) 2.547 -> 2.544 ( -0.12%) [ +0.00% +0.00% +0.04% / -0.12% +0.55% +0.82%] index_select strided 8 : Elapsed 0.025 ms (2.547 ms / 100) 2.545 -> 2.550 ( +0.20%) [ +0.00% +0.16% +0.04% / +0.20% +0.90% +0.51%] index_select random : Elapsed 0.025 ms (2.545 ms / 100) 2.543 -> 2.540 ( -0.12%) [ +0.08% +0.08% +0.00% / -0.12% +0.63% +0.79%] index_select random_sorted : Elapsed 0.025 ms (2.545 ms / 100) 2.542 -> 2.545 ( +0.12%) [ +0.04% +0.31% +0.00% / +0.12% +0.75% +0.59%] index_select perm : Elapsed 0.025 ms (2.543 ms / 100) 2.548 -> 2.545 ( -0.12%) [ +0.00% +0.04% +0.08% / -0.12% +0.59% +0.59%] index_select perm_sorted : Elapsed 0.025 ms (2.548 ms / 100) B = [40, 4, 5, 20] (stride (1, 200, 40, 800)) A = [40, 4, 16, 20] (stride (1, 40, 160, 2560)) dim = 2 2.479 -> 2.485 ( +0.24%) [ +0.20% +0.00% +0.04% / +0.24% +0.77% +0.52%] index_select const : Elapsed 0.025 ms (2.484 ms / 100) 2.484 -> 2.489 ( +0.20%) [ +0.20% +0.00% +0.08% / +0.20% +0.32% +0.56%] index_select wrap : Elapsed 0.025 ms (2.489 ms / 100) 2.479 -> 2.479 ( +0.00%) [ +0.16% +0.36% +0.00% / +0.00% +0.61% +0.48%] index_select linear : Elapsed 0.025 ms (2.483 ms / 100) 2.475 -> 2.478 ( +0.12%) [ +0.16% +0.08% +0.00% / +0.12% +0.73% +0.53%] index_select reverse : Elapsed 0.025 ms (2.479 ms / 100) 2.484 -> 2.482 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.28% +0.28%] index_select skip64 : Elapsed 0.025 ms (2.486 ms / 100) 2.481 -> 2.483 ( +0.08%) [ +0.12% +0.00% +0.00% / +0.08% +0.52% +0.32%] index_select skip256 : Elapsed 0.025 ms (2.484 ms / 100) 2.485 -> 2.485 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.24% +0.16%] index_select spread : Elapsed 0.025 ms (2.485 ms / 100) 2.481 -> 2.479 ( -0.08%) [ +0.12% +0.08% +0.00% / -0.08% +0.56% +0.44%] index_select strided 3 : Elapsed 0.025 ms (2.484 ms / 100) 2.473 -> 2.477 ( +0.16%) [ +0.32% +0.24% +0.00% / +0.16% +0.81% +0.69%] index_select strided 5 : Elapsed 0.025 ms (2.481 ms / 100) 2.480 -> 2.472 ( -0.32%) [ +0.00% +0.08% +0.12% / -0.32% +0.48% +0.48%] index_select strided 7 : Elapsed 0.025 ms (2.480 ms / 100) 2.475 -> 2.476 ( +0.04%) [ +0.08% +0.24% +0.00% / +0.04% +0.36% +0.57%] index_select strided 8 : Elapsed 0.025 ms (2.477 ms / 100) 2.484 -> 2.485 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.28% +0.24%] index_select random : Elapsed 0.025 ms (2.484 ms / 100) 2.475 -> 2.477 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.53% +0.61%] index_select random_sorted : Elapsed 0.025 ms (2.475 ms / 100) 2.475 -> 2.474 ( -0.04%) [ +0.16% +0.00% +0.24% / -0.04% +0.48% +0.40%] index_select perm : Elapsed 0.025 ms (2.479 ms / 100) 2.476 -> 2.476 ( +0.00%) [ +0.24% +0.16% +0.00% / +0.00% +0.73% +0.61%] index_select perm_sorted : Elapsed 0.025 ms (2.482 ms / 100) out_shape = [40, 4, 16, 5] in_shape = [40, 4, 16, 20] idx_dim = 3 B = [40, 4, 16, 5] (stride (320, 5, 20, 1)) A = [40, 4, 16, 20] (stride (1280, 320, 1, 16)) dim = 3 1.678 -> 1.679 ( +0.06%) [ +0.00% +0.00% +0.30% / +0.06% +0.42% +0.60%] index_select const : Elapsed 0.017 ms (1.678 ms / 100) 1.715 -> 1.716 ( +0.06%) [ +0.06% +0.00% +0.12% / +0.06% +0.64% +0.52%] index_select wrap : Elapsed 0.017 ms (1.716 ms / 100) 1.717 -> 1.717 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.35%] index_select linear : Elapsed 0.017 ms (1.717 ms / 100) 1.715 -> 1.715 ( +0.00%) [ +0.12% +0.00% +0.17% / +0.00% +0.52% +0.41%] index_select reverse : Elapsed 0.017 ms (1.717 ms / 100) 1.679 -> 1.683 ( +0.24%) [ +0.00% +0.30% +0.18% / +0.24% +0.42% +0.30%] index_select skip64 : Elapsed 0.017 ms (1.679 ms / 100) 1.678 -> 1.680 ( +0.12%) [ +0.30% +0.36% +0.00% / +0.12% +0.42% +0.60%] index_select skip256 : Elapsed 0.017 ms (1.683 ms / 100) 1.715 -> 1.716 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.47% +0.70%] index_select spread : Elapsed 0.017 ms (1.715 ms / 100) 1.716 -> 1.717 ( +0.06%) [ +0.17% +0.00% +0.23% / +0.06% +0.58% +0.35%] index_select strided 3 : Elapsed 0.017 ms (1.719 ms / 100) 1.703 -> 1.702 ( -0.06%) [ +0.00% +0.18% +0.12% / -0.06% +0.88% +0.76%] index_select strided 5 : Elapsed 0.017 ms (1.703 ms / 100) 1.716 -> 1.717 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.23% +0.47%] index_select strided 7 : Elapsed 0.017 ms (1.717 ms / 100) 1.714 -> 1.715 ( +0.06%) [ +0.00% +0.06% +0.12% / +0.06% +0.53% +0.41%] index_select strided 8 : Elapsed 0.017 ms (1.714 ms / 100) 1.715 -> 1.715 ( +0.00%) [ +0.00% +0.12% +0.29% / +0.00% +0.52% +0.70%] index_select strided 16 : Elapsed 0.017 ms (1.715 ms / 100) 1.693 -> 1.694 ( +0.06%) [ +0.24% +0.00% +0.24% / +0.06% +0.83% +0.89%] index_select random : Elapsed 0.017 ms (1.697 ms / 100) 1.695 -> 1.694 ( -0.06%) [ +0.06% +0.00% +0.06% / -0.06% +0.65% +0.53%] index_select random_sorted : Elapsed 0.017 ms (1.696 ms / 100) 1.715 -> 1.715 ( +0.00%) [ +0.06% +0.00% +0.12% / +0.00% +0.41% +0.58%] index_select perm : Elapsed 0.017 ms (1.716 ms / 100) 1.716 -> 1.717 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.87% +0.58%] index_select perm_sorted : Elapsed 0.017 ms (1.718 ms / 100) B = [40, 4, 16, 5] (stride (320, 1, 20, 4)) A = [40, 4, 16, 20] (stride (1280, 320, 20, 1)) dim = 3 1.648 -> 1.647 ( -0.06%) [ +0.24% +0.12% +0.00% / -0.06% +0.06% +0.12%] index_select const : Elapsed 0.017 ms (1.652 ms / 100) 1.647 -> 1.655 ( +0.49%) [ +0.18% +0.55% +0.00% / +0.49% +0.97% +1.03%] index_select wrap : Elapsed 0.017 ms (1.650 ms / 100) 1.657 -> 1.655 ( -0.12%) [ +0.06% +0.00% +0.00% / -0.12% +0.66% +0.72%] index_select linear : Elapsed 0.017 ms (1.658 ms / 100) 1.651 -> 1.654 ( +0.18%) [ +0.00% +0.30% +0.06% / +0.18% +0.48% +0.73%] index_select reverse : Elapsed 0.017 ms (1.651 ms / 100) 1.647 -> 1.646 ( -0.06%) [ +0.06% +0.12% +0.00% / -0.06% +0.30% +0.36%] index_select skip64 : Elapsed 0.016 ms (1.648 ms / 100) 1.650 -> 1.651 ( +0.06%) [ +0.18% +0.18% +0.00% / +0.06% +0.42% +0.36%] index_select skip256 : Elapsed 0.017 ms (1.653 ms / 100) 1.673 -> 1.671 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.30% +0.42%] index_select spread : Elapsed 0.017 ms (1.673 ms / 100) 1.676 -> 1.676 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.66% +0.60%] index_select strided 3 : Elapsed 0.017 ms (1.678 ms / 100) 1.671 -> 1.675 ( +0.24%) [ +0.12% +0.12% +0.00% / +0.24% +0.72% +0.42%] index_select strided 5 : Elapsed 0.017 ms (1.673 ms / 100) 1.660 -> 1.661 ( +0.06%) [ +0.06% +0.18% +0.00% / +0.06% +0.42% +0.24%] index_select strided 7 : Elapsed 0.017 ms (1.661 ms / 100) 1.664 -> 1.664 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.24% +0.30%] index_select strided 8 : Elapsed 0.017 ms (1.665 ms / 100) 1.669 -> 1.674 ( +0.30%) [ +0.00% +0.06% +0.12% / +0.30% +0.54% +0.30%] index_select strided 16 : Elapsed 0.017 ms (1.669 ms / 100) 1.664 -> 1.664 ( +0.00%) [ +0.00% +0.12% +0.18% / +0.00% +0.54% +0.36%] index_select random : Elapsed 0.017 ms (1.664 ms / 100) 1.672 -> 1.673 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.54% +0.42%] index_select random_sorted : Elapsed 0.017 ms (1.674 ms / 100) 1.660 -> 1.662 ( +0.12%) [ +0.00% +0.06% +0.06% / +0.12% +0.54% +0.54%] index_select perm : Elapsed 0.017 ms (1.660 ms / 100) 1.673 -> 1.673 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.30% +0.24%] index_select perm_sorted : Elapsed 0.017 ms (1.673 ms / 100) B = [40, 4, 16, 5] (stride (320, 1, 4, 64)) A = [40, 4, 16, 20] (stride (64, 16, 1, 2560)) dim = 3 1.574 -> 1.572 ( -0.13%) [ +0.00% +0.19% +0.00% / -0.13% +0.06% +0.19%] index_select const : Elapsed 0.016 ms (1.574 ms / 100) 1.576 -> 1.574 ( -0.13%) [ +0.06% +0.32% +0.00% / -0.13% +0.32% +0.00%] index_select wrap : Elapsed 0.016 ms (1.577 ms / 100) 1.575 -> 1.576 ( +0.06%) [ +0.00% +0.19% +0.19% / +0.06% +0.32% +0.38%] index_select linear : Elapsed 0.016 ms (1.575 ms / 100) 1.577 -> 1.572 ( -0.32%) [ +0.06% +0.00% +0.38% / -0.32% +0.25% -0.13%] index_select reverse : Elapsed 0.016 ms (1.578 ms / 100) 1.573 -> 1.570 ( -0.19%) [ +0.00% +0.06% +0.06% / -0.19% +0.13% +0.45%] index_select skip64 : Elapsed 0.016 ms (1.573 ms / 100) 1.574 -> 1.576 ( +0.13%) [ +0.19% +0.00% +0.00% / +0.13% +0.25% +0.38%] index_select skip256 : Elapsed 0.016 ms (1.577 ms / 100) 1.590 -> 1.592 ( +0.13%) [ +0.19% +0.31% +0.00% / +0.13% +0.44% +0.38%] index_select spread : Elapsed 0.016 ms (1.593 ms / 100) 1.591 -> 1.591 ( +0.00%) [ +0.13% +0.06% +0.00% / +0.25% +0.06% +0.00%] index_select strided 3 : Elapsed 0.016 ms (1.593 ms / 100) 1.572 -> 1.574 ( +0.13%) [ +0.00% +0.51% +0.51% / +0.13% +0.25% +0.38%] index_select strided 5 : Elapsed 0.016 ms (1.572 ms / 100) 1.597 -> 1.587 ( -0.63%) [ +0.00% +0.31% +0.31% / +0.13% -0.63% -0.63%] index_select strided 7 : Elapsed 0.016 ms (1.597 ms / 100) 1.590 -> 1.589 ( -0.06%) [ +0.19% +0.00% +0.25% / -0.06% +0.63% +0.44%] index_select strided 8 : Elapsed 0.016 ms (1.593 ms / 100) 1.589 -> 1.591 ( +0.13%) [ +0.25% +0.00% +0.44% / +0.13% +0.44% +0.38%] index_select strided 16 : Elapsed 0.016 ms (1.593 ms / 100) 1.588 -> 1.586 ( -0.13%) [ +0.13% +0.00% +0.00% / -0.13% -0.13% +0.13%] index_select random : Elapsed 0.016 ms (1.590 ms / 100) 1.586 -> 1.585 ( -0.06%) [ +0.19% +0.00% +0.13% / +0.06% -0.06% -0.06%] index_select random_sorted : Elapsed 0.016 ms (1.589 ms / 100) 1.594 -> 1.584 ( -0.63%) [ +0.00% +0.13% +0.56% / +0.56% -0.50% -0.63%] index_select perm : Elapsed 0.016 ms (1.594 ms / 100) 1.593 -> 1.583 ( -0.63%) [ +0.00% +0.00% +0.63% / +0.88% -0.56% -0.63%] index_select perm_sorted : Elapsed 0.016 ms (1.593 ms / 100) B = [40, 4, 16, 5] (stride (5, 3200, 200, 1)) dim = 3 fill_cnt = 20 good 1.809 -> 1.685 ( -6.85%) [ +0.44% +0.00% +0.28% / -6.85% -6.30% -5.97%] index_fill_ const : Elapsed 0.018 ms (1.817 ms / 100) good 1.825 -> 1.682 ( -7.84%) [ +0.00% +0.00% +0.05% / -7.84% -6.90% -6.96%] index_fill_ linear : Elapsed 0.018 ms (1.825 ms / 100) good 1.826 -> 1.680 ( -8.00%) [ +0.00% +0.27% +0.11% / -8.00% -7.01% -6.68%] index_fill_ reverse : Elapsed 0.018 ms (1.826 ms / 100) good 1.812 -> 1.680 ( -7.28%) [ +0.00% +0.11% +0.11% / -5.02% -7.28% -7.28%] index_fill_ skip64 : Elapsed 0.018 ms (1.812 ms / 100) good 1.812 -> 1.673 ( -7.67%) [ +0.11% +0.11% +0.00% / -5.19% -7.67% -7.45%] index_fill_ skip256 : Elapsed 0.018 ms (1.814 ms / 100) good 1.820 -> 1.680 ( -7.69%) [ +0.16% +0.00% +0.22% / -6.26% -7.69% -7.47%] index_fill_ spread : Elapsed 0.018 ms (1.823 ms / 100) good 1.820 -> 1.683 ( -7.53%) [ +0.11% +0.00% +0.38% / -6.48% -7.36% -7.53%] index_fill_ strided 3 : Elapsed 0.018 ms (1.822 ms / 100) good 1.830 -> 1.664 ( -9.07%) [ +0.00% +0.05% +0.27% / -9.07% -6.23% -6.34%] index_fill_ random : Elapsed 0.018 ms (1.830 ms / 100) good 1.832 -> 1.657 ( -9.55%) [ +0.00% +0.11% +0.00% / -9.55% -6.71% -6.50%] index_fill_ random_sorted : Elapsed 0.018 ms (1.832 ms / 100) B = [40, 4, 16, 5] (stride (1, 200, 800, 40)) A = [40, 4, 16, 20] (stride (64, 16, 1, 2560)) dim = 3 1.576 -> 1.577 ( +0.06%) [ +0.00% +0.00% +0.13% / +0.13% +0.06% +0.06%] index_select const : Elapsed 0.016 ms (1.576 ms / 100) 1.572 -> 1.571 ( -0.06%) [ +0.00% +0.32% +0.38% / +0.57% -0.06% +0.13%] index_select wrap : Elapsed 0.016 ms (1.572 ms / 100) 1.568 -> 1.568 ( +0.00%) [ +0.19% +0.00% +0.13% / +0.00% +0.13% +0.83%] index_select linear : Elapsed 0.016 ms (1.571 ms / 100) 1.572 -> 1.573 ( +0.06%) [ +0.06% +0.00% +0.38% / +0.06% +0.32% +0.19%] index_select reverse : Elapsed 0.016 ms (1.573 ms / 100) 1.569 -> 1.572 ( +0.19%) [ +0.00% +0.57% +0.13% / +0.19% +0.51% +0.64%] index_select skip64 : Elapsed 0.016 ms (1.569 ms / 100) 1.571 -> 1.575 ( +0.25%) [ +0.13% +0.19% +0.00% / +0.25% +0.57% +0.32%] index_select skip256 : Elapsed 0.016 ms (1.573 ms / 100) 1.585 -> 1.584 ( -0.06%) [ +0.13% +0.00% +0.06% / -0.06% +0.50% +0.19%] index_select spread : Elapsed 0.016 ms (1.587 ms / 100) 1.599 -> 1.579 ( -1.25%) [ +0.00% +0.00% +0.13% / -0.13% -1.25% -0.88%] index_select strided 3 : Elapsed 0.016 ms (1.599 ms / 100) 1.583 -> 1.584 ( +0.06%) [ +0.00% +0.19% +0.19% / +0.06% +0.06% +0.13%] index_select strided 5 : Elapsed 0.016 ms (1.583 ms / 100) 1.590 -> 1.586 ( -0.25%) [ +0.06% +0.00% +0.00% / +0.06% -0.25% +0.00%] index_select strided 7 : Elapsed 0.016 ms (1.591 ms / 100) 1.581 -> 1.583 ( +0.13%) [ +0.25% +0.32% +0.00% / +0.44% +0.13% +0.38%] index_select strided 8 : Elapsed 0.016 ms (1.585 ms / 100) 1.584 -> 1.584 ( +0.00%) [ +0.06% +0.25% +0.00% / +0.00% +0.25% +0.06%] index_select strided 16 : Elapsed 0.016 ms (1.585 ms / 100) 1.595 -> 1.597 ( +0.13%) [ +0.00% +0.13% +0.25% / +0.44% +0.13% +0.19%] index_select random : Elapsed 0.016 ms (1.595 ms / 100) 1.599 -> 1.595 ( -0.25%) [ +0.00% +0.00% +0.06% / +0.00% -0.25% -0.13%] index_select random_sorted : Elapsed 0.016 ms (1.599 ms / 100) 1.592 -> 1.584 ( -0.50%) [ +0.00% +0.13% +0.00% / +0.00% -0.50% -0.31%] index_select perm : Elapsed 0.016 ms (1.592 ms / 100) 1.595 -> 1.587 ( -0.50%) [ +0.00% +0.19% +0.13% / +0.13% -0.44% -0.50%] index_select perm_sorted : Elapsed 0.016 ms (1.595 ms / 100) B = [40, 4, 16, 5] (stride (64, 16, 1, 2560)) A = [40, 4, 16, 20] (stride (1280, 1, 4, 64)) dim = 3 1.577 -> 1.581 ( +0.25%) [ +0.38% +0.00% +0.13% / +0.25% +1.20% +1.33%] index_select const : Elapsed 0.016 ms (1.583 ms / 100) 1.600 -> 1.601 ( +0.06%) [ +0.00% +0.00% +0.19% / +0.06% +1.44% +1.31%] index_select wrap : Elapsed 0.016 ms (1.600 ms / 100) 1.601 -> 1.602 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +1.25% +1.25%] index_select linear : Elapsed 0.016 ms (1.603 ms / 100) 1.600 -> 1.601 ( +0.06%) [ +0.13% +0.13% +0.00% / +0.06% +1.31% +1.62%] index_select reverse : Elapsed 0.016 ms (1.602 ms / 100) 1.577 -> 1.577 ( +0.00%) [ +0.19% +0.13% +0.00% / +0.00% +1.40% +1.14%] index_select skip64 : Elapsed 0.016 ms (1.580 ms / 100) 1.576 -> 1.582 ( +0.38%) [ +0.25% +0.13% +0.00% / +0.38% +1.08% +1.02%] index_select skip256 : Elapsed 0.016 ms (1.580 ms / 100) 1.593 -> 1.592 ( -0.06%) [ +0.00% +0.31% +0.06% / -0.06% +1.44% +1.38%] index_select spread : Elapsed 0.016 ms (1.593 ms / 100) 1.593 -> 1.593 ( +0.00%) [ +0.00% +0.38% +0.31% / +0.00% +1.32% +1.32%] index_select strided 3 : Elapsed 0.016 ms (1.593 ms / 100) 1.580 -> 1.582 ( +0.13%) [ +0.06% +0.00% +0.25% / +0.13% +1.46% +1.58%] index_select strided 5 : Elapsed 0.016 ms (1.581 ms / 100) 1.595 -> 1.593 ( -0.13%) [ +0.06% +0.00% +0.00% / -0.13% +1.00% +1.19%] index_select strided 7 : Elapsed 0.016 ms (1.596 ms / 100) 1.591 -> 1.593 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +1.45% +1.45%] index_select strided 8 : Elapsed 0.016 ms (1.593 ms / 100) 1.590 -> 1.591 ( +0.06%) [ +0.00% +0.13% +0.13% / +0.06% +1.19% +1.32%] index_select strided 16 : Elapsed 0.016 ms (1.590 ms / 100) 1.593 -> 1.597 ( +0.25%) [ +0.19% +0.00% +0.25% / +0.25% +1.32% +1.13%] index_select random : Elapsed 0.016 ms (1.596 ms / 100) 1.592 -> 1.600 ( +0.50%) [ +0.00% +0.19% +0.06% / +0.50% +1.19% +1.26%] index_select random_sorted : Elapsed 0.016 ms (1.592 ms / 100) 1.596 -> 1.597 ( +0.06%) [ +0.00% +0.00% +0.13% / +0.06% +1.38% +1.13%] index_select perm : Elapsed 0.016 ms (1.596 ms / 100) 1.594 -> 1.599 ( +0.31%) [ +0.06% +0.06% +0.00% / +0.31% +1.32% +1.51%] index_select perm_sorted : Elapsed 0.016 ms (1.595 ms / 100) B = [40, 4, 16, 5] (stride (64, 16, 1, 2560)) A = [40, 4, 16, 20] (stride (1, 40, 3200, 160)) dim = 3 1.669 -> 1.673 ( +0.24%) [ +0.00% +0.24% +0.24% / +0.42% +0.36% +0.24%] index_select const : Elapsed 0.017 ms (1.669 ms / 100) 1.674 -> 1.678 ( +0.24%) [ +0.36% +0.18% +0.00% / +0.36% +0.24% +0.36%] index_select wrap : Elapsed 0.017 ms (1.680 ms / 100) 1.667 -> 1.672 ( +0.30%) [ +0.30% +0.18% +0.00% / +0.30% +0.78% +0.48%] index_select linear : Elapsed 0.017 ms (1.672 ms / 100) 1.669 -> 1.664 ( -0.30%) [ +0.06% +0.18% +0.00% / +0.00% -0.30% -0.24%] index_select reverse : Elapsed 0.017 ms (1.670 ms / 100) 1.661 -> 1.662 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.90% +1.44%] index_select skip64 : Elapsed 0.017 ms (1.661 ms / 100) 1.665 -> 1.665 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.72% +0.78%] index_select skip256 : Elapsed 0.017 ms (1.665 ms / 100) 1.680 -> 1.680 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.18% +0.30%] index_select spread : Elapsed 0.017 ms (1.680 ms / 100) 1.658 -> 1.663 ( +0.30%) [ +0.00% +0.18% +0.12% / +0.30% +0.42% +0.54%] index_select strided 3 : Elapsed 0.017 ms (1.658 ms / 100) 1.669 -> 1.665 ( -0.24%) [ +0.06% +0.00% +0.00% / +0.00% -0.24% +0.12%] index_select strided 5 : Elapsed 0.017 ms (1.670 ms / 100) 1.661 -> 1.667 ( +0.36%) [ +0.00% +0.12% +0.54% / +0.36% +0.78% +0.60%] index_select strided 7 : Elapsed 0.017 ms (1.661 ms / 100) 1.676 -> 1.677 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.54% +0.42%] index_select strided 8 : Elapsed 0.017 ms (1.677 ms / 100) 1.675 -> 1.678 ( +0.18%) [ +0.18% +0.42% +0.00% / +0.18% +1.13% +1.13%] index_select strided 16 : Elapsed 0.017 ms (1.678 ms / 100) 1.655 -> 1.652 ( -0.18%) [ +0.00% +0.00% +0.00% / -0.18% +0.18% +0.18%] index_select random : Elapsed 0.017 ms (1.655 ms / 100) 1.662 -> 1.665 ( +0.18%) [ +0.06% +0.12% +0.00% / +0.18% +0.48% +0.54%] index_select random_sorted : Elapsed 0.017 ms (1.663 ms / 100) 1.679 -> 1.677 ( -0.12%) [ +0.06% +0.18% +0.00% / -0.06% -0.12% -0.12%] index_select perm : Elapsed 0.017 ms (1.680 ms / 100) 1.686 -> 1.687 ( +0.06%) [ +0.30% +0.12% +0.00% / +0.06% +0.18% +0.36%] index_select perm_sorted : Elapsed 0.017 ms (1.691 ms / 100) B = [40, 4, 16, 5] (stride (16, 640, 1, 2560)) A = [40, 4, 16, 20] (stride (1280, 20, 80, 1)) dim = 3 1.871 -> 1.872 ( +0.05%) [ +0.16% +0.00% +0.21% / +0.05% +0.43% +0.32%] index_select const : Elapsed 0.019 ms (1.874 ms / 100) 1.881 -> 1.878 ( -0.16%) [ +0.00% +0.00% +0.11% / -0.16% +0.48% +0.37%] index_select wrap : Elapsed 0.019 ms (1.881 ms / 100) 1.879 -> 1.878 ( -0.05%) [ +0.16% +0.00% +0.05% / -0.05% +0.43% +0.53%] index_select linear : Elapsed 0.019 ms (1.882 ms / 100) 1.881 -> 1.882 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.37% +0.43%] index_select reverse : Elapsed 0.019 ms (1.882 ms / 100) 1.874 -> 1.875 ( +0.05%) [ +0.27% +0.00% +0.05% / +0.05% +0.37% +0.32%] index_select skip64 : Elapsed 0.019 ms (1.879 ms / 100) 1.866 -> 1.871 ( +0.27%) [ +0.16% +0.00% +0.59% / +0.27% +0.64% +0.70%] index_select skip256 : Elapsed 0.019 ms (1.869 ms / 100) 1.905 -> 1.905 ( +0.00%) [ +0.00% +0.10% +0.10% / +0.00% +0.16% +0.10%] index_select spread : Elapsed 0.019 ms (1.905 ms / 100) 1.901 -> 1.902 ( +0.05%) [ +0.00% +0.21% +0.00% / +0.05% +0.42% +0.26%] index_select strided 3 : Elapsed 0.019 ms (1.901 ms / 100) 1.904 -> 1.904 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.26% +0.32%] index_select strided 5 : Elapsed 0.019 ms (1.905 ms / 100) 1.890 -> 1.894 ( +0.21%) [ +0.21% +0.05% +0.00% / +0.21% +0.69% +0.74%] index_select strided 7 : Elapsed 0.019 ms (1.894 ms / 100) 1.891 -> 1.890 ( -0.05%) [ +0.11% +0.05% +0.00% / -0.05% +0.69% +0.58%] index_select strided 8 : Elapsed 0.019 ms (1.893 ms / 100) 1.902 -> 1.905 ( +0.16%) [ +0.00% +0.11% +0.16% / +0.16% +0.37% +0.32%] index_select strided 16 : Elapsed 0.019 ms (1.902 ms / 100) 1.882 -> 1.887 ( +0.27%) [ +0.00% +0.05% +0.16% / +0.27% +0.58% +0.48%] index_select random : Elapsed 0.019 ms (1.882 ms / 100) 1.887 -> 1.885 ( -0.11%) [ +0.16% +0.00% +0.21% / -0.11% +0.58% +0.32%] index_select random_sorted : Elapsed 0.019 ms (1.890 ms / 100) 1.894 -> 1.890 ( -0.21%) [ +0.05% +0.00% +0.00% / -0.21% +0.53% +0.48%] index_select perm : Elapsed 0.019 ms (1.895 ms / 100) 1.894 -> 1.893 ( -0.05%) [ +0.16% +0.05% +0.00% / -0.05% +0.21% +0.42%] index_select perm_sorted : Elapsed 0.019 ms (1.897 ms / 100) B = [40, 4, 16, 5] (stride (4, 1, 160, 2560)) A = [40, 4, 16, 20] (stride (1280, 320, 20, 1)) dim = 3 1.542 -> 1.543 ( +0.06%) [ +0.52% +0.19% +0.00% / +0.26% +0.13% +0.06%] index_select const : Elapsed 0.015 ms (1.550 ms / 100) 1.546 -> 1.544 ( -0.13%) [ +0.19% +0.19% +0.00% / -0.13% +1.03% +0.71%] index_select wrap : Elapsed 0.015 ms (1.549 ms / 100) 1.551 -> 1.551 ( +0.00%) [ +0.00% +0.06% +0.26% / +0.00% +0.45% +0.52%] index_select linear : Elapsed 0.016 ms (1.551 ms / 100) 1.544 -> 1.546 ( +0.13%) [ +0.19% +0.00% +0.00% / +0.13% +0.71% +0.65%] index_select reverse : Elapsed 0.015 ms (1.547 ms / 100) 1.541 -> 1.542 ( +0.06%) [ +0.26% +0.00% +0.06% / +0.06% +0.26% +0.26%] index_select skip64 : Elapsed 0.015 ms (1.545 ms / 100) 1.544 -> 1.547 ( +0.19%) [ +0.00% +0.19% +0.00% / +0.19% +0.45% +0.45%] index_select skip256 : Elapsed 0.015 ms (1.544 ms / 100) 1.571 -> 1.569 ( -0.13%) [ +0.32% +0.64% +0.00% / -0.13% +0.38% +0.38%] index_select spread : Elapsed 0.016 ms (1.576 ms / 100) 1.568 -> 1.573 ( +0.32%) [ +0.06% +0.00% +0.26% / +0.32% +1.08% +0.70%] index_select strided 3 : Elapsed 0.016 ms (1.569 ms / 100) 1.567 -> 1.569 ( +0.13%) [ +0.06% +0.06% +0.00% / +0.13% +1.15% +0.96%] index_select strided 5 : Elapsed 0.016 ms (1.568 ms / 100) 1.556 -> 1.559 ( +0.19%) [ +0.06% +0.19% +0.00% / +0.19% +0.26% +0.84%] index_select strided 7 : Elapsed 0.016 ms (1.557 ms / 100) 1.564 -> 1.563 ( -0.06%) [ +0.00% +0.06% +0.00% / +0.00% -0.06% +0.26%] index_select strided 8 : Elapsed 0.016 ms (1.564 ms / 100) 1.568 -> 1.570 ( +0.13%) [ +0.06% +0.00% +0.13% / +0.13% +0.51% +0.57%] index_select strided 16 : Elapsed 0.016 ms (1.569 ms / 100) 1.562 -> 1.559 ( -0.19%) [ +0.13% +0.38% +0.00% / -0.19% +0.38% +0.51%] index_select random : Elapsed 0.016 ms (1.564 ms / 100) 1.573 -> 1.570 ( -0.19%) [ +0.00% +0.38% +0.00% / -0.19% -0.19% +0.13%] index_select random_sorted : Elapsed 0.016 ms (1.573 ms / 100) 1.556 -> 1.554 ( -0.13%) [ +0.39% +0.00% +0.32% / -0.13% +0.45% +0.45%] index_select perm : Elapsed 0.016 ms (1.562 ms / 100) 1.560 -> 1.559 ( -0.06%) [ +0.32% +0.00% +0.00% / -0.06% +0.51% +0.19%] index_select perm_sorted : Elapsed 0.016 ms (1.565 ms / 100) B = [40, 4, 16, 5] (stride (1, 40, 160, 2560)) A = [40, 4, 16, 20] (stride (320, 12800, 1, 16)) dim = 3 1.774 -> 1.777 ( +0.17%) [ +0.45% +0.00% +0.06% / +0.17% +0.90% +0.79%] index_select const : Elapsed 0.018 ms (1.782 ms / 100) 1.811 -> 1.814 ( +0.17%) [ +0.00% +0.06% +0.28% / +0.17% +0.94% +0.72%] index_select wrap : Elapsed 0.018 ms (1.811 ms / 100) 1.813 -> 1.817 ( +0.22%) [ +0.00% +0.06% +0.22% / +0.22% +0.55% +0.50%] index_select linear : Elapsed 0.018 ms (1.813 ms / 100) 1.813 -> 1.822 ( +0.50%) [ +0.00% +0.22% +0.17% / +0.50% +0.55% +0.50%] index_select reverse : Elapsed 0.018 ms (1.813 ms / 100) 1.775 -> 1.779 ( +0.23%) [ +0.11% +0.00% +0.06% / +0.23% +0.79% +0.56%] index_select skip64 : Elapsed 0.018 ms (1.777 ms / 100) 1.774 -> 1.774 ( +0.00%) [ +0.23% +0.11% +0.00% / +0.00% +0.73% +0.68%] index_select skip256 : Elapsed 0.018 ms (1.778 ms / 100) 1.810 -> 1.814 ( +0.22%) [ +0.00% +0.17% +0.11% / +0.22% +0.55% +0.66%] index_select spread : Elapsed 0.018 ms (1.810 ms / 100) 1.813 -> 1.817 ( +0.22%) [ +0.17% +0.11% +0.00% / +0.22% +0.61% +0.50%] index_select strided 3 : Elapsed 0.018 ms (1.816 ms / 100) 1.803 -> 1.804 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.39% +0.44%] index_select strided 5 : Elapsed 0.018 ms (1.803 ms / 100) 1.811 -> 1.817 ( +0.33%) [ +0.00% +0.17% +0.44% / +0.33% +0.66% +0.77%] index_select strided 7 : Elapsed 0.018 ms (1.811 ms / 100) 1.811 -> 1.815 ( +0.22%) [ +0.00% +0.06% +0.22% / +0.22% +0.66% +0.72%] index_select strided 8 : Elapsed 0.018 ms (1.811 ms / 100) 1.812 -> 1.814 ( +0.11%) [ +0.06% +0.06% +0.00% / +0.11% +0.61% +0.55%] index_select strided 16 : Elapsed 0.018 ms (1.813 ms / 100) 1.815 -> 1.815 ( +0.00%) [ +0.00% +0.11% +0.28% / +0.00% +0.28% +0.28%] index_select random : Elapsed 0.018 ms (1.815 ms / 100) 1.811 -> 1.817 ( +0.33%) [ +0.22% +0.00% +0.22% / +0.33% +0.61% +0.83%] index_select random_sorted : Elapsed 0.018 ms (1.815 ms / 100) 1.814 -> 1.816 ( +0.11%) [ +0.00% +0.06% +0.00% / +0.11% +0.61% +0.61%] index_select perm : Elapsed 0.018 ms (1.814 ms / 100) 1.814 -> 1.817 ( +0.17%) [ +0.00% +0.00% +0.11% / +0.17% +0.50% +0.33%] index_select perm_sorted : Elapsed 0.018 ms (1.814 ms / 100) out_shape = [5, 4, 20, 16] in_shape = [40, 4, 20, 16] idx_dim = 0 B = [5, 4, 20, 16] (stride (1280, 1, 64, 4)) A = [40, 4, 20, 16] (stride (64, 16, 2560, 1)) dim = 0 1.381 -> 1.382 ( +0.07%) [ +0.00% +0.29% +0.65% / +0.07% +0.14% +0.22%] index_select const : Elapsed 0.014 ms (1.381 ms / 100) 1.383 -> 1.379 ( -0.29%) [ +0.00% +0.00% +0.00% / -0.29% +0.22% +0.07%] index_select wrap : Elapsed 0.014 ms (1.383 ms / 100) 1.378 -> 1.384 ( +0.44%) [ +0.15% +0.22% +0.00% / +0.51% +0.44% +0.58%] index_select linear : Elapsed 0.014 ms (1.380 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.14% +0.00% +0.00% / -0.07% +0.14% +0.22%] index_select reverse : Elapsed 0.014 ms (1.385 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.00% +0.00% +0.07% / +0.22% +0.44% +0.29%] index_select skip64 : Elapsed 0.014 ms (1.379 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.00% +0.07% +0.00% / +0.15% +0.29% +0.44%] index_select skip256 : Elapsed 0.014 ms (1.379 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.36% +0.36%] index_select spread : Elapsed 0.014 ms (1.379 ms / 100) 1.379 -> 1.378 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.44% +0.44%] index_select strided 3 : Elapsed 0.014 ms (1.379 ms / 100) 1.378 -> 1.381 ( +0.22%) [ +0.15% +0.22% +0.00% / +0.22% +0.44% +0.36%] index_select strided 5 : Elapsed 0.014 ms (1.380 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.51% +0.51%] index_select strided 7 : Elapsed 0.014 ms (1.380 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.00% +0.22% +0.15% / +0.00% +0.44% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.51% +0.51%] index_select strided 16 : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.22% +0.00% +0.15% / +0.07% +0.51% +0.58%] index_select random : Elapsed 0.014 ms (1.380 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.58% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.378 ms / 100) 1.379 -> 1.377 ( -0.15%) [ +0.00% +0.00% +0.07% / -0.15% +0.44% +0.36%] index_select perm : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.58% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.378 ms / 100) B = [5, 4, 20, 16] (stride (320, 1600, 1, 20)) A = [40, 4, 20, 16] (stride (1280, 20, 1, 80)) dim = 0 1.436 -> 1.435 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.42% +0.42%] index_select const : Elapsed 0.014 ms (1.436 ms / 100) 1.424 -> 1.423 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.49% +0.49%] index_select wrap : Elapsed 0.014 ms (1.424 ms / 100) 1.437 -> 1.439 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.77% +0.90%] index_select linear : Elapsed 0.014 ms (1.439 ms / 100) 1.436 -> 1.436 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.63% +0.63%] index_select reverse : Elapsed 0.014 ms (1.437 ms / 100) 1.433 -> 1.433 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.63% +0.63%] index_select skip64 : Elapsed 0.014 ms (1.435 ms / 100) 1.432 -> 1.433 ( +0.07%) [ +0.21% +0.07% +0.00% / +0.07% +0.70% +0.77%] index_select skip256 : Elapsed 0.014 ms (1.435 ms / 100) 1.432 -> 1.434 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.84% +0.84%] index_select spread : Elapsed 0.014 ms (1.434 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.63% +0.63%] index_select strided 3 : Elapsed 0.014 ms (1.421 ms / 100) 1.438 -> 1.438 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.90% +0.90%] index_select strided 5 : Elapsed 0.014 ms (1.439 ms / 100) 1.433 -> 1.433 ( +0.00%) [ +0.28% +0.00% +0.14% / +0.00% +0.77% +0.77%] index_select strided 7 : Elapsed 0.014 ms (1.437 ms / 100) 1.437 -> 1.437 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.77% +0.70%] index_select strided 8 : Elapsed 0.014 ms (1.438 ms / 100) 1.438 -> 1.438 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.76%] index_select strided 16 : Elapsed 0.014 ms (1.438 ms / 100) 1.447 -> 1.449 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.55% +0.55%] index_select random : Elapsed 0.014 ms (1.449 ms / 100) 1.426 -> 1.425 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.91% +0.84%] index_select random_sorted : Elapsed 0.014 ms (1.426 ms / 100) 1.434 -> 1.434 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.56% +0.56%] index_select perm : Elapsed 0.014 ms (1.434 ms / 100) 1.423 -> 1.424 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.70% +0.70%] index_select perm_sorted : Elapsed 0.014 ms (1.424 ms / 100) B = [5, 4, 20, 16] (stride (320, 1600, 1, 20)) A = [40, 4, 20, 16] (stride (320, 12800, 16, 1)) dim = 0 1.377 -> 1.376 ( -0.07%) [ +0.15% +0.07% +0.00% / -0.07% +1.02% +0.51%] index_select const : Elapsed 0.014 ms (1.379 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.15% +0.00% +0.22% / +0.15% +0.58% +0.58%] index_select wrap : Elapsed 0.014 ms (1.378 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.36%] index_select linear : Elapsed 0.014 ms (1.378 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.58% +0.58%] index_select reverse : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.51% +0.44%] index_select skip64 : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.73% +0.58%] index_select skip256 : Elapsed 0.014 ms (1.376 ms / 100) 1.377 -> 1.376 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.51% +0.51%] index_select spread : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +1.16% +0.58%] index_select strided 3 : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.58% +0.58%] index_select strided 5 : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.15% +0.00% +0.22% / +0.15% +0.65% +0.65%] index_select strided 7 : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.07% +0.22% +0.00% / +0.07% +0.58% +0.58%] index_select strided 8 : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.73% +0.65%] index_select strided 16 : Elapsed 0.014 ms (1.376 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.65% +0.80%] index_select random : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.65% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.377 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.80% +0.73%] index_select perm : Elapsed 0.014 ms (1.375 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.58% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) B = [5, 4, 20, 16] (stride (1, 1600, 5, 100)) A = [40, 4, 20, 16] (stride (20, 800, 1, 3200)) dim = 0 1.574 -> 1.574 ( +0.00%) [ +0.00% +0.38% +0.25% / +0.00% +0.13% +0.06%] index_select const : Elapsed 0.016 ms (1.574 ms / 100) 1.544 -> 1.545 ( +0.06%) [ +0.06% +0.00% +0.13% / +0.06% +0.71% +0.97%] index_select wrap : Elapsed 0.015 ms (1.545 ms / 100) 1.540 -> 1.543 ( +0.19%) [ +0.06% +0.00% +0.06% / +0.19% +0.78% +0.71%] index_select linear : Elapsed 0.015 ms (1.541 ms / 100) 1.535 -> 1.537 ( +0.13%) [ +0.13% +0.20% +0.00% / +0.13% +0.85% +0.98%] index_select reverse : Elapsed 0.015 ms (1.537 ms / 100) 1.558 -> 1.552 ( -0.39%) [ +0.00% +0.06% +0.06% / -0.39% +0.19% +1.09%] index_select skip64 : Elapsed 0.016 ms (1.558 ms / 100) 1.575 -> 1.576 ( +0.06%) [ +0.00% +0.19% +0.00% / +0.25% +0.19% +0.06%] index_select skip256 : Elapsed 0.016 ms (1.575 ms / 100) 1.539 -> 1.538 ( -0.06%) [ +0.06% +0.00% +0.13% / -0.06% +1.23% +0.91%] index_select spread : Elapsed 0.015 ms (1.540 ms / 100) 1.541 -> 1.537 ( -0.26%) [ +0.00% +0.19% +0.13% / -0.26% +1.04% +0.84%] index_select strided 3 : Elapsed 0.015 ms (1.541 ms / 100) 1.537 -> 1.538 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.91% +0.78%] index_select strided 5 : Elapsed 0.015 ms (1.539 ms / 100) 1.550 -> 1.555 ( +0.32%) [ +0.00% +0.39% +0.00% / +0.32% +1.35% +1.74%] index_select strided 7 : Elapsed 0.015 ms (1.550 ms / 100) 1.531 -> 1.532 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.72% +0.72%] index_select strided 8 : Elapsed 0.015 ms (1.532 ms / 100) 1.554 -> 1.560 ( +0.39%) [ +0.32% +0.45% +0.00% / +0.39% +1.09% +1.61%] index_select strided 16 : Elapsed 0.016 ms (1.559 ms / 100) 1.539 -> 1.538 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.52% +0.58%] index_select random : Elapsed 0.015 ms (1.540 ms / 100) 1.536 -> 1.536 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.00% +0.98% +1.04%] index_select random_sorted : Elapsed 0.015 ms (1.538 ms / 100) 1.554 -> 1.557 ( +0.19%) [ +0.19% +0.64% +0.00% / +0.19% +1.09% +1.35%] index_select perm : Elapsed 0.016 ms (1.557 ms / 100) 1.546 -> 1.547 ( +0.06%) [ +0.19% +0.13% +0.00% / +0.06% +1.03% +2.20%] index_select perm_sorted : Elapsed 0.015 ms (1.549 ms / 100) B = [5, 4, 20, 16] (stride (1, 100, 5, 400)) A = [40, 4, 20, 16] (stride (4, 1, 2560, 160)) dim = 0 0.680 -> 0.680 ( +0.00%) [ +0.29% +0.15% +0.00% / +0.00% +0.59% +0.44%] index_select const : Elapsed 0.007 ms (0.682 ms / 100) 0.673 -> 0.675 ( +0.30%) [ +0.00% +0.30% +0.30% / +0.30% +0.59% +0.45%] index_select wrap : Elapsed 0.007 ms (0.673 ms / 100) 0.673 -> 0.672 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.30% +0.15%] index_select linear : Elapsed 0.007 ms (0.673 ms / 100) 0.672 -> 0.672 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.30% +0.45%] index_select reverse : Elapsed 0.007 ms (0.672 ms / 100) 0.674 -> 0.674 ( +0.00%) [ +0.00% +0.00% +0.45% / +0.00% +0.30% +0.30%] index_select skip64 : Elapsed 0.007 ms (0.674 ms / 100) 0.681 -> 0.683 ( +0.29%) [ +0.29% +0.44% +0.00% / +0.29% +0.29% +0.44%] index_select skip256 : Elapsed 0.007 ms (0.683 ms / 100) 0.670 -> 0.671 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.45% +0.45%] index_select spread : Elapsed 0.007 ms (0.670 ms / 100) 0.674 -> 0.675 ( +0.15%) [ +0.30% +0.00% +0.15% / +0.59% +0.15% +0.15%] index_select strided 3 : Elapsed 0.007 ms (0.676 ms / 100) 0.668 -> 0.670 ( +0.30%) [ +0.15% +0.30% +0.00% / +0.30% +0.60% +0.60%] index_select strided 5 : Elapsed 0.007 ms (0.669 ms / 100) 0.672 -> 0.673 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.30% +0.45%] index_select strided 7 : Elapsed 0.007 ms (0.672 ms / 100) 0.667 -> 0.667 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.30% +0.45%] index_select strided 8 : Elapsed 0.007 ms (0.667 ms / 100) 0.676 -> 0.678 ( +0.30%) [ +0.00% +0.15% +0.00% / +0.30% +0.89% +0.89%] index_select strided 16 : Elapsed 0.007 ms (0.676 ms / 100) 0.672 -> 0.672 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.60% +0.74%] index_select random : Elapsed 0.007 ms (0.673 ms / 100) 0.675 -> 0.676 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.59% +0.59%] index_select random_sorted : Elapsed 0.007 ms (0.675 ms / 100) 0.666 -> 0.666 ( +0.00%) [ +0.30% +0.15% +0.00% / +0.00% +0.75% +0.75%] index_select perm : Elapsed 0.007 ms (0.668 ms / 100) 0.667 -> 0.668 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +1.05% +1.20%] index_select perm_sorted : Elapsed 0.007 ms (0.668 ms / 100) B = [5, 4, 20, 16] (stride (4, 1, 20, 400)) A = [40, 4, 20, 16] (stride (1, 12800, 640, 40)) dim = 0 1.397 -> 1.398 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.64% +0.86%] index_select const : Elapsed 0.014 ms (1.398 ms / 100) 1.382 -> 1.389 ( +0.51%) [ +0.43% +0.80% +0.00% / +0.72% +0.51% +1.16%] index_select wrap : Elapsed 0.014 ms (1.388 ms / 100) 1.395 -> 1.396 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.72% +0.57%] index_select linear : Elapsed 0.014 ms (1.395 ms / 100) 1.394 -> 1.393 ( -0.07%) [ +0.00% +0.00% +0.29% / -0.07% +0.86% +0.72%] index_select reverse : Elapsed 0.014 ms (1.394 ms / 100) 1.388 -> 1.391 ( +0.22%) [ +0.07% +0.00% +0.00% / +0.22% +0.65% +0.43%] index_select skip64 : Elapsed 0.014 ms (1.389 ms / 100) 1.397 -> 1.398 ( +0.07%) [ +0.14% +0.00% +0.21% / +0.07% +0.72% +0.79%] index_select skip256 : Elapsed 0.014 ms (1.399 ms / 100) 1.379 -> 1.378 ( -0.07%) [ +0.00% +0.00% +0.15% / -0.07% +0.44% +0.44%] index_select spread : Elapsed 0.014 ms (1.379 ms / 100) 1.356 -> 1.353 ( -0.22%) [ +0.00% +0.07% +0.15% / -0.22% +0.74% +0.66%] index_select strided 3 : Elapsed 0.014 ms (1.356 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.00% +0.07% +0.80% / -0.07% +0.29% +0.07%] index_select strided 5 : Elapsed 0.014 ms (1.383 ms / 100) 1.363 -> 1.363 ( +0.00%) [ +0.07% +0.00% +0.59% / +0.00% +0.88% +0.51%] index_select strided 7 : Elapsed 0.014 ms (1.364 ms / 100) 1.383 -> 1.385 ( +0.14%) [ +0.14% +0.00% +0.22% / +0.14% +0.72% +0.58%] index_select strided 8 : Elapsed 0.014 ms (1.385 ms / 100) 1.365 -> 1.367 ( +0.15%) [ +0.29% +0.29% +0.00% / +0.15% +1.10% +1.03%] index_select strided 16 : Elapsed 0.014 ms (1.369 ms / 100) 1.391 -> 1.395 ( +0.29%) [ +0.00% +0.29% +0.22% / +0.29% +1.08% +1.01%] index_select random : Elapsed 0.014 ms (1.391 ms / 100) 1.354 -> 1.357 ( +0.22%) [ +0.07% +0.00% +0.59% / +0.22% +0.74% +0.89%] index_select random_sorted : Elapsed 0.014 ms (1.355 ms / 100) 1.373 -> 1.371 ( -0.15%) [ +0.07% +0.00% +0.44% / -0.15% +0.80% +0.87%] index_select perm : Elapsed 0.014 ms (1.374 ms / 100) 1.383 -> 1.381 ( -0.14%) [ +0.07% +0.00% +0.14% / -0.14% +0.43% +0.51%] index_select perm_sorted : Elapsed 0.014 ms (1.384 ms / 100) out_shape = [40, 5, 20, 16] in_shape = [40, 4, 20, 16] idx_dim = 1 B = [40, 5, 20, 16] (stride (1600, 16, 80, 1)) A = [40, 4, 20, 16] (stride (1, 800, 40, 3200)) dim = 1 5.814 -> 5.812 ( -0.03%) [ +0.00% +0.24% +0.43% / +0.26% +0.03% -0.03%] index_add_ linear : Elapsed 0.058 ms (5.814 ms / 100) 5.760 -> 5.759 ( -0.02%) [ +0.10% +0.00% +0.35% / +0.16% -0.02% +0.00%] index_copy_ linear : Elapsed 0.058 ms (5.766 ms / 100) 5.828 -> 5.817 ( -0.19%) [ +0.02% +0.00% +0.05% / +0.03% -0.15% -0.19%] index_add_ reverse : Elapsed 0.058 ms (5.829 ms / 100) 5.764 -> 5.763 ( -0.02%) [ +0.02% +0.00% +0.10% / +0.21% +0.21% -0.02%] index_copy_ reverse : Elapsed 0.058 ms (5.765 ms / 100) 5.818 -> 5.815 ( -0.05%) [ +0.17% +0.05% +0.00% / +0.10% +0.03% -0.05%] index_add_ spread : Elapsed 0.058 ms (5.828 ms / 100) 5.761 -> 5.759 ( -0.03%) [ +0.09% +0.00% +0.16% / -0.02% -0.03% +0.02%] index_copy_ spread : Elapsed 0.058 ms (5.766 ms / 100) 5.835 -> 5.821 ( -0.24%) [ +0.09% +0.02% +0.00% / +0.05% -0.24% -0.15%] index_add_ strided 3 : Elapsed 0.058 ms (5.840 ms / 100) 5.780 -> 5.765 ( -0.26%) [ +0.03% +0.07% +0.00% / -0.02% -0.21% -0.26%] index_copy_ strided 3 : Elapsed 0.058 ms (5.782 ms / 100) 5.825 -> 5.811 ( -0.24%) [ +0.14% +0.00% +0.15% / +0.27% -0.24% -0.19%] index_add_ perm : Elapsed 0.058 ms (5.833 ms / 100) 5.773 -> 5.761 ( -0.21%) [ +0.05% +0.00% +0.07% / +0.10% -0.21% -0.12%] index_copy_ perm : Elapsed 0.058 ms (5.776 ms / 100) 5.826 -> 5.815 ( -0.19%) [ +0.00% +0.03% +0.10% / +0.05% -0.19% -0.07%] index_add_ perm_sorted : Elapsed 0.058 ms (5.826 ms / 100) 5.768 -> 5.764 ( -0.07%) [ +0.00% +0.10% +0.09% / +0.21% +0.09% -0.07%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.768 ms / 100) 6.082 -> 6.072 ( -0.16%) [ +0.10% +0.00% +0.16% / +0.03% -0.16% -0.12%] index_select const : Elapsed 0.061 ms (6.088 ms / 100) 6.153 -> 6.139 ( -0.23%) [ +0.05% +0.00% +0.10% / +0.10% -0.07% -0.23%] index_select wrap : Elapsed 0.062 ms (6.156 ms / 100) 6.144 -> 6.143 ( -0.02%) [ +0.05% +0.03% +0.00% / +0.11% -0.02% -0.02%] index_select linear : Elapsed 0.061 ms (6.147 ms / 100) 6.149 -> 6.140 ( -0.15%) [ +0.00% +0.13% +0.00% / +0.07% -0.13% -0.15%] index_select reverse : Elapsed 0.061 ms (6.149 ms / 100) 6.079 -> 6.071 ( -0.13%) [ +0.02% +0.00% +0.20% / +0.16% -0.03% -0.13%] index_select skip64 : Elapsed 0.061 ms (6.080 ms / 100) 6.080 -> 6.064 ( -0.26%) [ +0.05% +0.00% +0.25% / +0.16% -0.08% -0.26%] index_select skip256 : Elapsed 0.061 ms (6.083 ms / 100) 6.142 -> 6.125 ( -0.28%) [ +0.00% +0.08% +0.03% / +0.18% -0.15% -0.28%] index_select spread : Elapsed 0.061 ms (6.142 ms / 100) 6.152 -> 6.137 ( -0.24%) [ +0.00% +0.23% +0.13% / +0.02% -0.08% -0.24%] index_select strided 3 : Elapsed 0.062 ms (6.152 ms / 100) 6.117 -> 6.121 ( +0.07%) [ +0.07% +0.00% +0.05% / +0.07% +0.23% +0.15%] index_select random : Elapsed 0.061 ms (6.121 ms / 100) 6.098 -> 6.105 ( +0.11%) [ +0.00% +0.03% +0.16% / +0.16% +0.11% +0.13%] index_select random_sorted : Elapsed 0.061 ms (6.098 ms / 100) B = [40, 5, 20, 16] (stride (320, 12800, 16, 1)) A = [40, 4, 20, 16] (stride (80, 1, 4, 3200)) dim = 1 5.683 -> 5.686 ( +0.05%) [ +0.00% +0.05% +0.02% / +0.05% +0.74% +0.49%] index_add_ linear : Elapsed 0.057 ms (5.683 ms / 100) 5.620 -> 5.639 ( +0.34%) [ +0.00% +0.11% +0.07% / +0.34% +0.91% +0.85%] index_copy_ linear : Elapsed 0.056 ms (5.620 ms / 100) 5.675 -> 5.685 ( +0.18%) [ +0.12% +0.14% +0.00% / +0.18% +0.81% +0.79%] index_add_ reverse : Elapsed 0.057 ms (5.682 ms / 100) 5.623 -> 5.634 ( +0.20%) [ +0.00% +0.23% +0.18% / +0.20% +0.85% +0.80%] index_copy_ reverse : Elapsed 0.056 ms (5.623 ms / 100) 5.683 -> 5.684 ( +0.02%) [ +0.04% +0.05% +0.00% / +0.02% +0.63% +0.70%] index_add_ spread : Elapsed 0.057 ms (5.685 ms / 100) 5.627 -> 5.635 ( +0.14%) [ +0.00% +0.02% +0.07% / +0.14% +0.66% +0.68%] index_copy_ spread : Elapsed 0.056 ms (5.627 ms / 100) 5.690 -> 5.692 ( +0.04%) [ +0.02% +0.02% +0.00% / +0.04% +0.72% +0.79%] index_add_ strided 3 : Elapsed 0.057 ms (5.691 ms / 100) 5.634 -> 5.641 ( +0.12%) [ +0.00% +0.04% +0.23% / +0.12% +0.83% +0.75%] index_copy_ strided 3 : Elapsed 0.056 ms (5.634 ms / 100) 5.684 -> 5.699 ( +0.26%) [ +0.14% +0.00% +0.26% / +0.26% +0.93% +0.86%] index_add_ perm : Elapsed 0.057 ms (5.692 ms / 100) 5.628 -> 5.643 ( +0.27%) [ +0.00% +0.11% +0.23% / +0.27% +0.94% +0.94%] index_copy_ perm : Elapsed 0.056 ms (5.628 ms / 100) 5.681 -> 5.682 ( +0.02%) [ +0.02% +0.00% +0.14% / +0.02% +0.76% +0.79%] index_add_ perm_sorted : Elapsed 0.057 ms (5.682 ms / 100) 5.631 -> 5.636 ( +0.09%) [ +0.00% +0.14% +0.04% / +0.09% +0.60% +0.60%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.631 ms / 100) 5.966 -> 5.979 ( +0.22%) [ +0.02% +0.00% +0.22% / +0.22% +1.07% +1.07%] index_select const : Elapsed 0.060 ms (5.967 ms / 100) 5.965 -> 5.977 ( +0.20%) [ +0.05% +0.08% +0.00% / +0.20% +1.09% +1.09%] index_select wrap : Elapsed 0.060 ms (5.968 ms / 100) 5.965 -> 5.974 ( +0.15%) [ +0.00% +0.00% +0.30% / +0.15% +1.07% +1.02%] index_select linear : Elapsed 0.060 ms (5.965 ms / 100) 5.964 -> 5.974 ( +0.17%) [ +0.00% +0.05% +0.20% / +0.17% +1.07% +0.96%] index_select reverse : Elapsed 0.060 ms (5.964 ms / 100) 5.965 -> 5.982 ( +0.28%) [ +0.15% +0.00% +0.22% / +0.28% +1.12% +1.19%] index_select skip64 : Elapsed 0.060 ms (5.974 ms / 100) 5.966 -> 5.978 ( +0.20%) [ +0.00% +0.15% +0.13% / +0.20% +0.97% +1.06%] index_select skip256 : Elapsed 0.060 ms (5.966 ms / 100) 5.974 -> 5.991 ( +0.28%) [ +0.07% +0.00% +0.03% / +0.28% +1.05% +0.95%] index_select spread : Elapsed 0.060 ms (5.978 ms / 100) 5.965 -> 5.983 ( +0.30%) [ +0.00% +0.05% +0.02% / +0.30% +0.96% +1.06%] index_select strided 3 : Elapsed 0.060 ms (5.965 ms / 100) 5.974 -> 5.994 ( +0.33%) [ +0.00% +0.08% +0.22% / +0.33% +0.95% +1.12%] index_select random : Elapsed 0.060 ms (5.974 ms / 100) 5.967 -> 5.975 ( +0.13%) [ +0.00% +0.12% +0.22% / +0.13% +0.97% +1.04%] index_select random_sorted : Elapsed 0.060 ms (5.967 ms / 100) B = [40, 5, 20, 16] (stride (320, 12800, 1, 20)) A = [40, 4, 20, 16] (stride (64, 16, 2560, 1)) dim = 1 5.883 -> 5.862 ( -0.36%) [ +0.12% +0.12% +0.00% / +0.05% -0.36% -0.32%] index_add_ linear : Elapsed 0.059 ms (5.890 ms / 100) 5.838 -> 5.814 ( -0.41%) [ +0.03% +0.03% +0.00% / +0.09% -0.29% -0.41%] index_copy_ linear : Elapsed 0.058 ms (5.840 ms / 100) 5.879 -> 5.857 ( -0.37%) [ +0.00% +0.00% +0.07% / -0.02% -0.37% -0.29%] index_add_ reverse : Elapsed 0.059 ms (5.879 ms / 100) 5.826 -> 5.811 ( -0.26%) [ +0.09% +0.00% +0.21% / +0.26% -0.26% -0.26%] index_copy_ reverse : Elapsed 0.058 ms (5.831 ms / 100) 5.873 -> 5.854 ( -0.32%) [ +0.12% +0.10% +0.00% / +0.20% -0.29% -0.32%] index_add_ spread : Elapsed 0.059 ms (5.880 ms / 100) 5.821 -> 5.802 ( -0.33%) [ +0.00% +0.09% +0.19% / +0.17% -0.33% -0.22%] index_copy_ spread : Elapsed 0.058 ms (5.821 ms / 100) 5.860 -> 5.852 ( -0.14%) [ +0.03% +0.00% +0.12% / +0.12% -0.07% -0.14%] index_add_ strided 3 : Elapsed 0.059 ms (5.862 ms / 100) 5.811 -> 5.800 ( -0.19%) [ +0.07% +0.00% +0.10% / +0.10% +0.00% -0.19%] index_copy_ strided 3 : Elapsed 0.058 ms (5.815 ms / 100) 5.855 -> 5.860 ( +0.09%) [ +0.00% +0.03% +0.22% / +0.10% +0.14% +0.09%] index_add_ perm : Elapsed 0.059 ms (5.855 ms / 100) 5.805 -> 5.803 ( -0.03%) [ +0.09% +0.02% +0.00% / +0.12% -0.03% +0.00%] index_copy_ perm : Elapsed 0.058 ms (5.810 ms / 100) 5.882 -> 5.856 ( -0.44%) [ +0.02% +0.00% +0.05% / -0.02% -0.43% -0.44%] index_add_ perm_sorted : Elapsed 0.059 ms (5.883 ms / 100) 5.830 -> 5.796 ( -0.58%) [ +0.00% +0.02% +0.10% / +0.09% -0.50% -0.58%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.830 ms / 100) 6.115 -> 6.091 ( -0.39%) [ +0.11% +0.00% +0.03% / +0.20% -0.39% -0.36%] index_select const : Elapsed 0.061 ms (6.122 ms / 100) 6.208 -> 6.175 ( -0.53%) [ +0.03% +0.00% +0.11% / +0.16% -0.53% -0.42%] index_select wrap : Elapsed 0.062 ms (6.210 ms / 100) 6.183 -> 6.156 ( -0.44%) [ +0.03% +0.00% +0.15% / +0.00% -0.44% -0.44%] index_select linear : Elapsed 0.062 ms (6.185 ms / 100) 6.189 -> 6.159 ( -0.48%) [ +0.00% +0.06% +0.18% / +0.21% -0.39% -0.48%] index_select reverse : Elapsed 0.062 ms (6.189 ms / 100) 6.096 -> 6.074 ( -0.36%) [ +0.10% +0.00% +0.16% / +0.11% -0.34% -0.36%] index_select skip64 : Elapsed 0.061 ms (6.102 ms / 100) 6.121 -> 6.089 ( -0.52%) [ +0.03% +0.11% +0.00% / +0.18% -0.52% -0.44%] index_select skip256 : Elapsed 0.061 ms (6.123 ms / 100) 6.189 -> 6.161 ( -0.45%) [ +0.00% +0.13% +0.16% / +0.39% -0.39% -0.45%] index_select spread : Elapsed 0.062 ms (6.189 ms / 100) 6.195 -> 6.165 ( -0.48%) [ +0.00% +0.06% +0.02% / +0.23% -0.48% -0.44%] index_select strided 3 : Elapsed 0.062 ms (6.195 ms / 100) 6.193 -> 6.161 ( -0.52%) [ +0.00% +0.02% +0.06% / -0.05% -0.44% -0.52%] index_select random : Elapsed 0.062 ms (6.193 ms / 100) 6.195 -> 6.164 ( -0.50%) [ +0.00% +0.11% +0.21% / +0.18% -0.50% -0.45%] index_select random_sorted : Elapsed 0.062 ms (6.195 ms / 100) B = [40, 5, 20, 16] (stride (100, 20, 1, 4000)) A = [40, 4, 20, 16] (stride (1, 40, 2560, 160)) dim = 1 5.708 -> 5.710 ( +0.04%) [ +0.00% +0.02% +0.05% / +0.04% +0.61% +0.40%] index_add_ linear : Elapsed 0.057 ms (5.708 ms / 100) 5.623 -> 5.639 ( +0.28%) [ +0.20% +0.00% +0.46% / +0.28% +0.48% +0.36%] index_copy_ linear : Elapsed 0.056 ms (5.634 ms / 100) 5.704 -> 5.711 ( +0.12%) [ +0.04% +0.00% +0.07% / +0.12% +0.54% +0.54%] index_add_ reverse : Elapsed 0.057 ms (5.706 ms / 100) 5.628 -> 5.642 ( +0.25%) [ +0.04% +0.12% +0.00% / +0.25% +0.43% +0.41%] index_copy_ reverse : Elapsed 0.056 ms (5.630 ms / 100) 5.708 -> 5.706 ( -0.04%) [ +0.00% +0.00% +0.02% / -0.04% +0.44% +0.72%] index_add_ spread : Elapsed 0.057 ms (5.708 ms / 100) 5.629 -> 5.636 ( +0.12%) [ +0.04% +0.04% +0.00% / +0.12% +0.28% +0.46%] index_copy_ spread : Elapsed 0.056 ms (5.631 ms / 100) 5.710 -> 5.723 ( +0.23%) [ +0.23% +0.00% +0.21% / +0.23% +0.60% +0.58%] index_add_ strided 3 : Elapsed 0.057 ms (5.723 ms / 100) 5.636 -> 5.647 ( +0.20%) [ +0.00% +0.09% +0.07% / +0.20% +0.60% +0.50%] index_copy_ strided 3 : Elapsed 0.056 ms (5.636 ms / 100) 5.710 -> 5.720 ( +0.18%) [ +0.04% +0.00% +0.09% / +0.18% +0.49% +0.42%] index_add_ perm : Elapsed 0.057 ms (5.712 ms / 100) 5.638 -> 5.636 ( -0.04%) [ +0.02% +0.02% +0.00% / -0.04% +0.37% +0.30%] index_copy_ perm : Elapsed 0.056 ms (5.639 ms / 100) 5.704 -> 5.714 ( +0.18%) [ +0.00% +0.02% +0.07% / +0.18% +0.53% +0.53%] index_add_ perm_sorted : Elapsed 0.057 ms (5.704 ms / 100) 5.629 -> 5.631 ( +0.04%) [ +0.00% +0.00% +0.27% / +0.04% +0.30% +0.39%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.629 ms / 100) 5.887 -> 5.898 ( +0.19%) [ +0.05% +0.00% +0.12% / +0.19% +0.63% +0.75%] index_select const : Elapsed 0.059 ms (5.890 ms / 100) 5.961 -> 5.973 ( +0.20%) [ +0.12% +0.00% +0.15% / +0.20% +0.40% +0.59%] index_select wrap : Elapsed 0.060 ms (5.968 ms / 100) 5.957 -> 5.966 ( +0.15%) [ +0.03% +0.00% +0.10% / +0.15% +0.34% +0.30%] index_select linear : Elapsed 0.060 ms (5.959 ms / 100) 5.940 -> 5.943 ( +0.05%) [ +0.00% +0.02% +0.10% / +0.05% +0.62% +0.56%] index_select reverse : Elapsed 0.059 ms (5.940 ms / 100) 5.892 -> 5.899 ( +0.12%) [ +0.00% +0.02% +0.10% / +0.12% +0.68% +0.53%] index_select skip64 : Elapsed 0.059 ms (5.892 ms / 100) 5.893 -> 5.897 ( +0.07%) [ +0.02% +0.00% +0.07% / +0.07% +0.64% +0.61%] index_select skip256 : Elapsed 0.059 ms (5.894 ms / 100) 5.949 -> 5.952 ( +0.05%) [ +0.00% +0.08% +0.10% / +0.05% +0.57% +0.47%] index_select spread : Elapsed 0.059 ms (5.949 ms / 100) 5.955 -> 5.965 ( +0.17%) [ +0.05% +0.00% +0.10% / +0.17% +0.59% +0.72%] index_select strided 3 : Elapsed 0.060 ms (5.958 ms / 100) 5.954 -> 5.968 ( +0.24%) [ +0.07% +0.00% +0.24% / +0.24% +0.77% +0.79%] index_select random : Elapsed 0.060 ms (5.958 ms / 100) 5.936 -> 5.947 ( +0.19%) [ +0.00% +0.05% +0.20% / +0.19% +0.59% +0.56%] index_select random_sorted : Elapsed 0.059 ms (5.936 ms / 100) B = [40, 5, 20, 16] (stride (20, 800, 1, 4000)) A = [40, 4, 20, 16] (stride (1, 40, 2560, 160)) dim = 1 5.766 -> 5.756 ( -0.17%) [ +0.00% +0.02% +0.10% / -0.17% +0.16% +0.00%] index_add_ linear : Elapsed 0.058 ms (5.766 ms / 100) 5.625 -> 5.613 ( -0.21%) [ +0.00% +0.00% +0.04% / +0.05% -0.02% -0.21%] index_copy_ linear : Elapsed 0.056 ms (5.625 ms / 100) 5.748 -> 5.737 ( -0.19%) [ +0.07% +0.10% +0.00% / +0.03% -0.19% +0.03%] index_add_ reverse : Elapsed 0.058 ms (5.752 ms / 100) 5.632 -> 5.616 ( -0.28%) [ +0.00% +0.00% +0.02% / +0.09% -0.21% -0.28%] index_copy_ reverse : Elapsed 0.056 ms (5.632 ms / 100) 5.764 -> 5.756 ( -0.14%) [ +0.09% +0.02% +0.00% / +0.05% -0.02% -0.14%] index_add_ spread : Elapsed 0.058 ms (5.769 ms / 100) 5.623 -> 5.616 ( -0.12%) [ +0.20% +0.00% +0.20% / -0.04% -0.12% -0.07%] index_copy_ spread : Elapsed 0.056 ms (5.634 ms / 100) 5.735 -> 5.666 ( -1.20%) [ +0.14% +0.00% +0.07% / +0.19% -0.92% -1.20%] index_add_ strided 3 : Elapsed 0.057 ms (5.743 ms / 100) 5.607 -> 5.567 ( -0.71%) [ +0.00% +0.04% +0.09% / +0.05% -0.71% -0.66%] index_copy_ strided 3 : Elapsed 0.056 ms (5.607 ms / 100) 5.716 -> 5.693 ( -0.40%) [ +0.00% +0.05% +0.05% / +0.00% -0.40% -0.40%] index_add_ perm : Elapsed 0.057 ms (5.716 ms / 100) 5.619 -> 5.592 ( -0.48%) [ +0.00% +0.05% +0.18% / +0.11% -0.43% -0.48%] index_copy_ perm : Elapsed 0.056 ms (5.619 ms / 100) 5.744 -> 5.699 ( -0.78%) [ +0.07% +0.00% +0.00% / -0.07% -0.75% -0.78%] index_add_ perm_sorted : Elapsed 0.057 ms (5.748 ms / 100) 5.610 -> 5.581 ( -0.52%) [ +0.00% +0.04% +0.18% / +0.14% -0.52% -0.45%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.610 ms / 100) 5.889 -> 5.882 ( -0.12%) [ +0.00% +0.14% +0.07% / +0.17% -0.12% -0.12%] index_select const : Elapsed 0.059 ms (5.889 ms / 100) 5.970 -> 5.946 ( -0.40%) [ +0.00% +0.10% +0.07% / -0.03% -0.40% -0.25%] index_select wrap : Elapsed 0.060 ms (5.970 ms / 100) 5.956 -> 5.942 ( -0.24%) [ +0.20% +0.03% +0.00% / +0.08% -0.24% -0.24%] index_select linear : Elapsed 0.060 ms (5.968 ms / 100) 5.953 -> 5.919 ( -0.57%) [ +0.17% +0.08% +0.00% / +0.20% -0.57% -0.40%] index_select reverse : Elapsed 0.060 ms (5.963 ms / 100) 5.894 -> 5.879 ( -0.25%) [ +0.07% +0.00% +0.08% / +0.10% -0.22% -0.25%] index_select skip64 : Elapsed 0.059 ms (5.898 ms / 100) 5.899 -> 5.879 ( -0.34%) [ +0.07% +0.05% +0.00% / -0.02% -0.31% -0.34%] index_select skip256 : Elapsed 0.059 ms (5.903 ms / 100) 5.953 -> 5.919 ( -0.57%) [ +0.07% +0.00% +0.07% / +0.05% -0.57% -0.49%] index_select spread : Elapsed 0.060 ms (5.957 ms / 100) 5.974 -> 5.937 ( -0.62%) [ +0.07% +0.05% +0.00% / +0.07% -0.55% -0.62%] index_select strided 3 : Elapsed 0.060 ms (5.978 ms / 100) 5.937 -> 5.919 ( -0.30%) [ +0.00% +0.05% +0.05% / +0.08% -0.30% -0.30%] index_select random : Elapsed 0.059 ms (5.937 ms / 100) 5.919 -> 5.893 ( -0.44%) [ +0.02% +0.00% +0.03% / -0.03% -0.37% -0.44%] index_select random_sorted : Elapsed 0.059 ms (5.920 ms / 100) B = [40, 5, 20, 16] (stride (1, 800, 40, 4000)) A = [40, 4, 20, 16] (stride (1280, 16, 64, 1)) dim = 1 5.898 -> 5.903 ( +0.08%) [ +0.02% +0.00% +0.07% / +0.17% +0.22% +0.08%] index_add_ linear : Elapsed 0.059 ms (5.899 ms / 100) 5.810 -> 5.814 ( +0.07%) [ +0.00% +0.21% +0.15% / +0.15% +0.14% +0.07%] index_copy_ linear : Elapsed 0.058 ms (5.810 ms / 100) 5.856 -> 5.867 ( +0.19%) [ +0.10% +0.00% +0.14% / +0.19% +0.53% +0.56%] index_add_ reverse : Elapsed 0.059 ms (5.862 ms / 100) 5.789 -> 5.792 ( +0.05%) [ +0.00% +0.12% +0.14% / +0.05% +0.36% +0.26%] index_copy_ reverse : Elapsed 0.058 ms (5.789 ms / 100) 5.891 -> 5.901 ( +0.17%) [ +0.20% +0.00% +0.15% / +0.22% +0.17% +0.20%] index_add_ spread : Elapsed 0.059 ms (5.903 ms / 100) 5.808 -> 5.816 ( +0.14%) [ +0.00% +0.09% +0.17% / +0.22% +0.26% +0.14%] index_copy_ spread : Elapsed 0.058 ms (5.808 ms / 100) 5.874 -> 5.880 ( +0.10%) [ +0.22% +0.00% +0.12% / +0.14% +0.14% +0.10%] index_add_ strided 3 : Elapsed 0.059 ms (5.887 ms / 100) 5.800 -> 5.797 ( -0.05%) [ +0.00% +0.14% +0.00% / +0.00% -0.05% +0.10%] index_copy_ strided 3 : Elapsed 0.058 ms (5.800 ms / 100) 5.865 -> 5.871 ( +0.10%) [ +0.14% +0.00% +0.20% / +0.10% +0.49% +0.46%] index_add_ perm : Elapsed 0.059 ms (5.873 ms / 100) 5.813 -> 5.823 ( +0.17%) [ +0.00% +0.00% +0.07% / +0.17% +0.28% +0.17%] index_copy_ perm : Elapsed 0.058 ms (5.813 ms / 100) 5.860 -> 5.860 ( +0.00%) [ +0.03% +0.02% +0.00% / +0.00% +0.38% +0.38%] index_add_ perm_sorted : Elapsed 0.059 ms (5.862 ms / 100) 5.809 -> 5.810 ( +0.02%) [ +0.02% +0.03% +0.00% / +0.09% +0.09% +0.02%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.810 ms / 100) 6.115 -> 6.124 ( +0.15%) [ +0.00% +0.05% +0.13% / +0.15% +0.20% +0.15%] index_select const : Elapsed 0.061 ms (6.115 ms / 100) 6.186 -> 6.191 ( +0.08%) [ +0.08% +0.00% +0.11% / +0.18% +0.08% +0.10%] index_select wrap : Elapsed 0.062 ms (6.191 ms / 100) 6.181 -> 6.184 ( +0.05%) [ +0.08% +0.00% +0.05% / +0.06% +0.05% +0.11%] index_select linear : Elapsed 0.062 ms (6.186 ms / 100) 6.199 -> 6.200 ( +0.02%) [ +0.03% +0.00% +0.15% / +0.02% +0.05% +0.03%] index_select reverse : Elapsed 0.062 ms (6.201 ms / 100) 6.143 -> 6.143 ( +0.00%) [ +0.03% +0.00% +0.08% / +0.13% +0.11% +0.00%] index_select skip64 : Elapsed 0.061 ms (6.145 ms / 100) 6.118 -> 6.115 ( -0.05%) [ +0.05% +0.00% +0.11% / -0.05% +0.02% +0.08%] index_select skip256 : Elapsed 0.061 ms (6.121 ms / 100) 6.213 -> 6.210 ( -0.05%) [ +0.00% +0.02% +0.00% / +0.00% -0.02% -0.05%] index_select spread : Elapsed 0.062 ms (6.213 ms / 100) 6.197 -> 6.197 ( +0.00%) [ +0.03% +0.00% +0.08% / +0.00% +0.02% +0.00%] index_select strided 3 : Elapsed 0.062 ms (6.199 ms / 100) 6.180 -> 6.187 ( +0.11%) [ +0.21% +0.19% +0.00% / +0.11% +0.11% +0.15%] index_select random : Elapsed 0.062 ms (6.193 ms / 100) 6.165 -> 6.162 ( -0.05%) [ +0.00% +0.10% +0.13% / +0.11% -0.05% +0.11%] index_select random_sorted : Elapsed 0.062 ms (6.165 ms / 100) B = [40, 5, 20, 16] (stride (5, 1, 200, 4000)) A = [40, 4, 20, 16] (stride (64, 16, 2560, 1)) dim = 1 6.082 -> 6.068 ( -0.23%) [ +0.03% +0.00% +0.02% / +0.08% -0.23% -0.15%] index_add_ linear : Elapsed 0.061 ms (6.084 ms / 100) 6.067 -> 6.048 ( -0.31%) [ +0.08% +0.03% +0.00% / +0.13% -0.18% -0.31%] index_copy_ linear : Elapsed 0.061 ms (6.072 ms / 100) 6.081 -> 6.073 ( -0.13%) [ +0.13% +0.08% +0.00% / +0.00% -0.07% -0.13%] index_add_ reverse : Elapsed 0.061 ms (6.089 ms / 100) 6.066 -> 6.051 ( -0.25%) [ +0.08% +0.00% +0.08% / +0.07% -0.03% -0.25%] index_copy_ reverse : Elapsed 0.061 ms (6.071 ms / 100) 6.067 -> 6.061 ( -0.10%) [ +0.13% +0.00% +0.10% / +0.08% -0.10% -0.10%] index_add_ spread : Elapsed 0.061 ms (6.075 ms / 100) 6.052 -> 6.037 ( -0.25%) [ +0.00% +0.12% +0.18% / +0.08% -0.25% -0.17%] index_copy_ spread : Elapsed 0.061 ms (6.052 ms / 100) 6.072 -> 6.052 ( -0.33%) [ +0.08% +0.02% +0.00% / +0.00% -0.33% -0.23%] index_add_ strided 3 : Elapsed 0.061 ms (6.077 ms / 100) 6.050 -> 6.037 ( -0.21%) [ +0.15% +0.00% +0.13% / +0.03% -0.21% -0.20%] index_copy_ strided 3 : Elapsed 0.061 ms (6.059 ms / 100) 6.065 -> 6.063 ( -0.03%) [ +0.10% +0.05% +0.00% / +0.08% -0.03% -0.03%] index_add_ perm : Elapsed 0.061 ms (6.071 ms / 100) 6.053 -> 6.038 ( -0.25%) [ +0.00% +0.13% +0.05% / +0.07% -0.20% -0.25%] index_copy_ perm : Elapsed 0.061 ms (6.053 ms / 100) 6.068 -> 6.055 ( -0.21%) [ +0.08% +0.10% +0.00% / +0.05% -0.16% -0.21%] index_add_ perm_sorted : Elapsed 0.061 ms (6.073 ms / 100) 6.056 -> 6.040 ( -0.26%) [ +0.00% +0.03% +0.03% / -0.05% -0.21% -0.26%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.056 ms / 100) 6.282 -> 6.260 ( -0.35%) [ +0.00% +0.14% +0.06% / +0.13% -0.22% -0.35%] index_select const : Elapsed 0.063 ms (6.282 ms / 100) 6.353 -> 6.339 ( -0.22%) [ +0.00% +0.00% +0.11% / +0.13% -0.22% -0.14%] index_select wrap : Elapsed 0.064 ms (6.353 ms / 100) 6.327 -> 6.310 ( -0.27%) [ +0.00% +0.00% +0.08% / +0.08% -0.27% -0.27%] index_select linear : Elapsed 0.063 ms (6.327 ms / 100) 6.337 -> 6.317 ( -0.32%) [ +0.06% +0.00% +0.09% / +0.00% -0.32% -0.14%] index_select reverse : Elapsed 0.063 ms (6.341 ms / 100) 6.256 -> 6.245 ( -0.18%) [ +0.16% +0.05% +0.00% / +0.11% -0.18% -0.13%] index_select skip64 : Elapsed 0.063 ms (6.266 ms / 100) 6.286 -> 6.267 ( -0.30%) [ +0.02% +0.00% +0.03% / +0.05% -0.27% -0.30%] index_select skip256 : Elapsed 0.063 ms (6.287 ms / 100) 6.329 -> 6.316 ( -0.21%) [ +0.00% +0.11% +0.03% / +0.25% -0.19% -0.21%] index_select spread : Elapsed 0.063 ms (6.329 ms / 100) 6.332 -> 6.310 ( -0.35%) [ +0.05% +0.02% +0.00% / +0.14% -0.17% -0.35%] index_select strided 3 : Elapsed 0.063 ms (6.335 ms / 100) 6.305 -> 6.287 ( -0.29%) [ +0.05% +0.00% +0.10% / +0.08% -0.17% -0.29%] index_select random : Elapsed 0.063 ms (6.308 ms / 100) 6.319 -> 6.297 ( -0.35%) [ +0.00% +0.14% +0.05% / +0.13% -0.27% -0.35%] index_select random_sorted : Elapsed 0.063 ms (6.319 ms / 100) B = [40, 5, 20, 16] (stride (5, 1, 200, 4000)) A = [40, 4, 20, 16] (stride (20, 800, 1, 3200)) dim = 1 6.063 -> 6.070 ( +0.12%) [ +0.08% +0.10% +0.00% / +0.16% +0.13% +0.12%] index_add_ linear : Elapsed 0.061 ms (6.068 ms / 100) 6.059 -> 6.053 ( -0.10%) [ +0.15% +0.00% +0.08% / +0.26% -0.07% -0.10%] index_copy_ linear : Elapsed 0.061 ms (6.068 ms / 100) 6.060 -> 6.064 ( +0.07%) [ +0.30% +0.00% +0.20% / +0.08% +0.07% +0.26%] index_add_ reverse : Elapsed 0.061 ms (6.078 ms / 100) 6.069 -> 6.053 ( -0.26%) [ +0.00% +0.00% +0.08% / +0.05% -0.21% -0.26%] index_copy_ reverse : Elapsed 0.061 ms (6.069 ms / 100) 6.068 -> 6.066 ( -0.03%) [ +0.18% +0.00% +0.08% / +0.08% -0.03% +0.10%] index_add_ spread : Elapsed 0.061 ms (6.079 ms / 100) 6.067 -> 6.058 ( -0.15%) [ +0.08% +0.03% +0.00% / +0.08% -0.10% -0.15%] index_copy_ spread : Elapsed 0.061 ms (6.072 ms / 100) 6.062 -> 6.065 ( +0.05%) [ +0.20% +0.00% +0.08% / +0.05% +0.20% +0.23%] index_add_ strided 3 : Elapsed 0.061 ms (6.074 ms / 100) 6.068 -> 6.054 ( -0.23%) [ +0.03% +0.00% +0.08% / +0.03% -0.21% -0.23%] index_copy_ strided 3 : Elapsed 0.061 ms (6.070 ms / 100) 6.070 -> 6.072 ( +0.03%) [ +0.07% +0.00% +0.16% / +0.12% +0.03% +0.05%] index_add_ perm : Elapsed 0.061 ms (6.074 ms / 100) 6.065 -> 6.058 ( -0.12%) [ +0.00% +0.00% +0.07% / +0.00% -0.05% -0.12%] index_copy_ perm : Elapsed 0.061 ms (6.065 ms / 100) 6.070 -> 6.067 ( -0.05%) [ +0.00% +0.02% +0.08% / +0.02% +0.00% -0.05%] index_add_ perm_sorted : Elapsed 0.061 ms (6.070 ms / 100) 6.065 -> 6.058 ( -0.12%) [ +0.00% +0.02% +0.02% / +0.03% -0.10% -0.12%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.065 ms / 100) 6.277 -> 6.256 ( -0.33%) [ +0.05% +0.00% +0.10% / +0.13% -0.33% -0.33%] index_select const : Elapsed 0.063 ms (6.280 ms / 100) 6.373 -> 6.360 ( -0.20%) [ +0.00% +0.00% +0.05% / +0.13% -0.20% -0.17%] index_select wrap : Elapsed 0.064 ms (6.373 ms / 100) 6.352 -> 6.346 ( -0.09%) [ +0.00% +0.20% +0.17% / +0.11% -0.05% -0.09%] index_select linear : Elapsed 0.064 ms (6.352 ms / 100) 6.369 -> 6.346 ( -0.36%) [ +0.00% +0.02% +0.06% / +0.06% -0.36% -0.28%] index_select reverse : Elapsed 0.064 ms (6.369 ms / 100) 6.276 -> 6.260 ( -0.25%) [ +0.00% +0.10% +0.11% / -0.02% -0.25% -0.21%] index_select skip64 : Elapsed 0.063 ms (6.276 ms / 100) 6.274 -> 6.259 ( -0.24%) [ +0.10% +0.00% +0.21% / +0.16% -0.24% -0.18%] index_select skip256 : Elapsed 0.063 ms (6.280 ms / 100) 6.358 -> 6.345 ( -0.20%) [ +0.02% +0.00% +0.09% / +0.13% -0.20% -0.13%] index_select spread : Elapsed 0.064 ms (6.359 ms / 100) 6.372 -> 6.354 ( -0.28%) [ +0.06% +0.00% +0.19% / +0.09% -0.19% -0.28%] index_select strided 3 : Elapsed 0.064 ms (6.376 ms / 100) 6.364 -> 6.353 ( -0.17%) [ +0.09% +0.00% +0.03% / +0.02% -0.11% -0.17%] index_select random : Elapsed 0.064 ms (6.370 ms / 100) 6.360 -> 6.347 ( -0.20%) [ +0.00% +0.13% +0.05% / +0.05% -0.17% -0.20%] index_select random_sorted : Elapsed 0.064 ms (6.360 ms / 100) out_shape = [40, 4, 5, 16] in_shape = [40, 4, 20, 16] idx_dim = 2 B = [40, 4, 5, 16] (stride (320, 80, 16, 1)) A = [40, 4, 20, 16] (stride (80, 20, 1, 3200)) dim = 2 0.679 -> 0.679 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.74% +1.03%] index_select const : Elapsed 0.007 ms (0.679 ms / 100) 0.688 -> 0.679 ( -1.31%) [ +0.00% +0.00% +0.00% / +0.00% -1.31% -1.16%] index_select wrap : Elapsed 0.007 ms (0.688 ms / 100) 0.688 -> 0.682 ( -0.87%) [ +0.00% +0.00% +0.00% / +0.00% -0.87% -0.73%] index_select linear : Elapsed 0.007 ms (0.688 ms / 100) 0.682 -> 0.682 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.59% +0.44%] index_select reverse : Elapsed 0.007 ms (0.682 ms / 100) 0.681 -> 0.681 ( +0.00%) [ +0.15% +0.00% +0.29% / +0.00% +0.44% +0.44%] index_select skip64 : Elapsed 0.007 ms (0.682 ms / 100) 0.682 -> 0.684 ( +0.29%) [ +0.00% +0.00% +0.00% / +0.29% +1.17% +1.17%] index_select skip256 : Elapsed 0.007 ms (0.682 ms / 100) 0.680 -> 0.679 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +1.03% +1.32%] index_select spread : Elapsed 0.007 ms (0.680 ms / 100) 0.679 -> 0.679 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.74% +0.59%] index_select strided 3 : Elapsed 0.007 ms (0.679 ms / 100) 0.682 -> 0.682 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.59% +0.73%] index_select strided 5 : Elapsed 0.007 ms (0.682 ms / 100) 0.680 -> 0.681 ( +0.15%) [ +0.29% +0.00% +0.00% / +0.15% +0.59% +0.44%] index_select strided 7 : Elapsed 0.007 ms (0.682 ms / 100) 0.682 -> 0.682 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.44% +0.44%] index_select strided 8 : Elapsed 0.007 ms (0.682 ms / 100) 0.684 -> 0.678 ( -0.88%) [ +1.61% +0.29% +0.00% / +0.15% -0.88% -0.73%] index_select strided 16 : Elapsed 0.007 ms (0.695 ms / 100) 0.687 -> 0.679 ( -1.16%) [ +0.00% +0.29% +0.29% / +0.00% -1.02% -1.16%] index_select random : Elapsed 0.007 ms (0.687 ms / 100) 0.680 -> 0.679 ( -0.15%) [ +0.00% +0.00% +0.29% / -0.15% +0.15% +0.59%] index_select random_sorted : Elapsed 0.007 ms (0.680 ms / 100) 0.679 -> 0.679 ( +0.00%) [ +0.00% +0.00% +0.29% / +0.00% +0.74% +0.74%] index_select perm : Elapsed 0.007 ms (0.679 ms / 100) 0.678 -> 0.679 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +1.47% +1.33%] index_select perm_sorted : Elapsed 0.007 ms (0.679 ms / 100) B = [40, 4, 5, 16] (stride (5, 3200, 1, 200)) dim = 2 fill_cnt = 20 3.580 -> 3.555 ( -0.70%) [ +0.00% +0.03% +0.08% / -0.70% -0.53% -0.67%] index_fill_ const : Elapsed 0.036 ms (3.580 ms / 100) 3.563 -> 3.538 ( -0.70%) [ +0.06% +0.00% +0.08% / -0.62% -0.65% -0.70%] index_fill_ linear : Elapsed 0.036 ms (3.565 ms / 100) 3.563 -> 3.538 ( -0.70%) [ +0.06% +0.11% +0.00% / -0.70% -0.62% -0.48%] index_fill_ reverse : Elapsed 0.036 ms (3.565 ms / 100) 3.565 -> 3.550 ( -0.42%) [ +0.28% +0.00% +0.14% / -0.42% -0.42% -0.20%] index_fill_ skip64 : Elapsed 0.036 ms (3.575 ms / 100) 3.572 -> 3.550 ( -0.62%) [ +0.00% +0.06% +0.03% / -0.62% -0.50% -0.59%] index_fill_ skip256 : Elapsed 0.036 ms (3.572 ms / 100) 3.563 -> 3.542 ( -0.59%) [ +0.14% +0.00% +0.06% / -0.59% -0.59% -0.42%] index_fill_ spread : Elapsed 0.036 ms (3.568 ms / 100) 3.559 -> 3.538 ( -0.59%) [ +0.06% +0.00% +0.17% / -0.48% -0.37% -0.59%] index_fill_ strided 3 : Elapsed 0.036 ms (3.561 ms / 100) 3.551 -> 3.524 ( -0.76%) [ +0.00% +0.06% +0.11% / -0.76% -0.56% -0.62%] index_fill_ random : Elapsed 0.036 ms (3.551 ms / 100) 3.546 -> 3.531 ( -0.42%) [ +0.00% +0.14% +0.25% / -0.31% -0.42% -0.39%] index_fill_ random_sorted : Elapsed 0.035 ms (3.546 ms / 100) B = [40, 4, 5, 16] (stride (4, 1, 2560, 160)) A = [40, 4, 20, 16] (stride (1, 40, 160, 3200)) dim = 2 1.795 -> 1.790 ( -0.28%) [ +0.00% +0.06% +0.06% / +0.22% -0.11% -0.28%] index_select const : Elapsed 0.018 ms (1.795 ms / 100) 1.799 -> 1.798 ( -0.06%) [ +0.28% +0.11% +0.00% / +0.17% -0.06% -0.06%] index_select wrap : Elapsed 0.018 ms (1.804 ms / 100) 1.794 -> 1.793 ( -0.06%) [ +0.00% +0.22% +0.17% / -0.06% +0.33% +0.28%] index_select linear : Elapsed 0.018 ms (1.794 ms / 100) 1.799 -> 1.783 ( -0.89%) [ +0.17% +0.22% +0.00% / +0.06% -0.72% -0.89%] index_select reverse : Elapsed 0.018 ms (1.802 ms / 100) 1.788 -> 1.787 ( -0.06%) [ +0.11% +0.00% +0.00% / -0.06% +0.50% +0.62%] index_select skip64 : Elapsed 0.018 ms (1.790 ms / 100) 1.788 -> 1.787 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.22% +0.39%] index_select skip256 : Elapsed 0.018 ms (1.788 ms / 100) 1.807 -> 1.803 ( -0.22%) [ +0.17% +0.00% +0.17% / -0.22% -0.06% -0.17%] index_select spread : Elapsed 0.018 ms (1.810 ms / 100) 1.803 -> 1.801 ( -0.11%) [ +0.22% +0.11% +0.00% / -0.06% -0.11% +0.00%] index_select strided 3 : Elapsed 0.018 ms (1.807 ms / 100) 1.802 -> 1.790 ( -0.67%) [ +0.00% +0.00% +0.00% / -0.17% -0.39% -0.67%] index_select strided 5 : Elapsed 0.018 ms (1.802 ms / 100) 1.794 -> 1.792 ( -0.11%) [ +0.28% +0.00% +0.11% / -0.11% +0.39% +0.50%] index_select strided 7 : Elapsed 0.018 ms (1.799 ms / 100) 1.805 -> 1.809 ( +0.22%) [ +0.11% +0.17% +0.00% / +0.22% +0.39% +0.39%] index_select strided 8 : Elapsed 0.018 ms (1.807 ms / 100) 1.804 -> 1.807 ( +0.17%) [ +0.00% +0.33% +0.17% / +0.17% +0.72% +0.55%] index_select strided 16 : Elapsed 0.018 ms (1.804 ms / 100) 1.796 -> 1.798 ( +0.11%) [ +0.00% +0.17% +0.33% / +0.11% +0.78% +0.67%] index_select random : Elapsed 0.018 ms (1.796 ms / 100) 1.791 -> 1.794 ( +0.17%) [ +0.17% +0.00% +0.34% / +0.17% +1.01% +0.84%] index_select random_sorted : Elapsed 0.018 ms (1.794 ms / 100) 1.794 -> 1.795 ( +0.06%) [ +0.22% +0.00% +0.06% / +0.06% +0.39% +0.33%] index_select perm : Elapsed 0.018 ms (1.798 ms / 100) 1.791 -> 1.792 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.34% +0.28%] index_select perm_sorted : Elapsed 0.018 ms (1.791 ms / 100) B = [40, 4, 5, 16] (stride (5, 200, 1, 800)) A = [40, 4, 20, 16] (stride (1, 40, 160, 3200)) dim = 2 1.896 -> 1.898 ( +0.11%) [ +0.32% +0.16% +0.00% / +0.11% +0.47% +0.58%] index_select const : Elapsed 0.019 ms (1.902 ms / 100) 1.896 -> 1.895 ( -0.05%) [ +0.16% +0.00% +0.26% / -0.05% +0.69% +0.63%] index_select wrap : Elapsed 0.019 ms (1.899 ms / 100) 1.897 -> 1.899 ( +0.11%) [ +0.00% +0.37% +0.16% / +0.11% +0.69% +0.74%] index_select linear : Elapsed 0.019 ms (1.897 ms / 100) 1.885 -> 1.887 ( +0.11%) [ +0.27% +0.00% +0.05% / +0.11% +0.64% +0.69%] index_select reverse : Elapsed 0.019 ms (1.890 ms / 100) 1.895 -> 1.897 ( +0.11%) [ +0.05% +0.26% +0.00% / +0.11% +0.63% +0.47%] index_select skip64 : Elapsed 0.019 ms (1.896 ms / 100) 1.901 -> 1.900 ( -0.05%) [ +0.16% +0.00% +0.16% / -0.05% +0.37% +0.53%] index_select skip256 : Elapsed 0.019 ms (1.904 ms / 100) 1.909 -> 1.912 ( +0.16%) [ +0.10% +0.00% +0.10% / +0.42% +0.37% +0.16%] index_select spread : Elapsed 0.019 ms (1.911 ms / 100) 1.904 -> 1.907 ( +0.16%) [ +0.16% +0.00% +0.05% / +0.16% +0.32% +0.37%] index_select strided 3 : Elapsed 0.019 ms (1.907 ms / 100) 1.908 -> 1.911 ( +0.16%) [ +0.05% +0.00% +0.10% / +0.26% +0.47% +0.16%] index_select strided 5 : Elapsed 0.019 ms (1.909 ms / 100) 1.904 -> 1.907 ( +0.16%) [ +0.05% +0.00% +0.05% / +0.16% +0.26% +0.26%] index_select strided 7 : Elapsed 0.019 ms (1.905 ms / 100) 1.910 -> 1.917 ( +0.37%) [ +0.21% +0.00% +0.21% / +0.37% +1.05% +0.63%] index_select strided 8 : Elapsed 0.019 ms (1.914 ms / 100) 1.906 -> 1.907 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +1.00% +0.89%] index_select strided 16 : Elapsed 0.019 ms (1.906 ms / 100) 1.901 -> 1.903 ( +0.11%) [ +0.00% +0.05% +0.11% / +0.11% +0.21% +0.42%] index_select random : Elapsed 0.019 ms (1.901 ms / 100) 1.895 -> 1.896 ( +0.05%) [ +0.00% +0.11% +0.00% / +0.05% +0.47% +0.58%] index_select random_sorted : Elapsed 0.019 ms (1.895 ms / 100) 1.880 -> 1.884 ( +0.21%) [ +0.37% +0.00% +0.21% / +0.21% +0.96% +1.17%] index_select perm : Elapsed 0.019 ms (1.887 ms / 100) 1.904 -> 1.906 ( +0.11%) [ +0.26% +0.11% +0.00% / +0.11% +0.53% +0.74%] index_select perm_sorted : Elapsed 0.019 ms (1.909 ms / 100) B = [40, 4, 5, 16] (stride (1, 200, 40, 800)) A = [40, 4, 20, 16] (stride (64, 1, 2560, 4)) dim = 2 1.781 -> 1.775 ( -0.34%) [ +0.17% +0.06% +0.00% / -0.06% -0.34% -0.11%] index_select const : Elapsed 0.018 ms (1.784 ms / 100) 1.775 -> 1.776 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.17% +0.23% +0.06%] index_select wrap : Elapsed 0.018 ms (1.775 ms / 100) 1.775 -> 1.775 ( +0.00%) [ +0.28% +0.00% +0.62% / +0.11% +0.00% +0.23%] index_select linear : Elapsed 0.018 ms (1.780 ms / 100) 1.776 -> 1.776 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.28% +0.00% +0.17%] index_select reverse : Elapsed 0.018 ms (1.776 ms / 100) 1.773 -> 1.776 ( +0.17%) [ +0.23% +0.00% +0.06% / +0.17% +0.17% +0.23%] index_select skip64 : Elapsed 0.018 ms (1.777 ms / 100) 1.773 -> 1.776 ( +0.17%) [ +0.17% +0.00% +0.06% / +0.17% +0.23% +0.51%] index_select skip256 : Elapsed 0.018 ms (1.776 ms / 100) 1.781 -> 1.778 ( -0.17%) [ +0.17% +0.00% +0.22% / -0.17% +0.28% +0.06%] index_select spread : Elapsed 0.018 ms (1.784 ms / 100) 1.788 -> 1.783 ( -0.28%) [ +0.22% +0.28% +0.00% / +0.11% -0.22% -0.28%] index_select strided 3 : Elapsed 0.018 ms (1.792 ms / 100) 1.783 -> 1.780 ( -0.17%) [ +0.22% +0.00% +0.06% / +0.11% -0.17% +0.06%] index_select strided 5 : Elapsed 0.018 ms (1.787 ms / 100) 1.786 -> 1.786 ( +0.00%) [ +0.06% +0.00% +0.11% / +0.06% +0.11% +0.00%] index_select strided 7 : Elapsed 0.018 ms (1.787 ms / 100) 1.781 -> 1.781 ( +0.00%) [ +0.51% +0.00% +0.11% / +0.00% +0.17% +0.22%] index_select strided 8 : Elapsed 0.018 ms (1.790 ms / 100) 1.779 -> 1.782 ( +0.17%) [ +0.28% +0.00% +0.06% / +0.17% +0.39% +0.17%] index_select strided 16 : Elapsed 0.018 ms (1.784 ms / 100) 1.784 -> 1.783 ( -0.06%) [ +0.00% +0.00% +0.11% / -0.06% +0.17% -0.06%] index_select random : Elapsed 0.018 ms (1.784 ms / 100) 1.782 -> 1.783 ( +0.06%) [ +0.11% +0.11% +0.00% / +0.06% +0.06% +0.06%] index_select random_sorted : Elapsed 0.018 ms (1.784 ms / 100) 1.784 -> 1.783 ( -0.06%) [ +0.06% +0.00% +0.11% / -0.06% +0.34% +0.45%] index_select perm : Elapsed 0.018 ms (1.785 ms / 100) 1.781 -> 1.784 ( +0.17%) [ +0.28% +0.00% +0.28% / +0.17% +0.28% +0.22%] index_select perm_sorted : Elapsed 0.018 ms (1.786 ms / 100) B = [40, 4, 5, 16] (stride (4, 1, 160, 800)) A = [40, 4, 20, 16] (stride (1280, 1, 64, 4)) dim = 2 1.676 -> 1.672 ( -0.24%) [ +0.18% +0.00% +0.06% / -0.24% +0.48% +0.54%] index_select const : Elapsed 0.017 ms (1.679 ms / 100) 1.698 -> 1.703 ( +0.29%) [ +0.18% +0.00% +0.41% / +0.29% +1.06% +1.24%] index_select wrap : Elapsed 0.017 ms (1.701 ms / 100) 1.699 -> 1.701 ( +0.12%) [ +0.00% +0.24% +0.18% / +0.12% +1.18% +1.24%] index_select linear : Elapsed 0.017 ms (1.699 ms / 100) 1.703 -> 1.709 ( +0.35%) [ +0.18% +0.12% +0.00% / +0.35% +0.82% +0.82%] index_select reverse : Elapsed 0.017 ms (1.706 ms / 100) 1.676 -> 1.678 ( +0.12%) [ +0.00% +0.06% +0.12% / +0.12% +0.60% +0.72%] index_select skip64 : Elapsed 0.017 ms (1.676 ms / 100) 1.674 -> 1.677 ( +0.18%) [ +0.18% +0.00% +0.30% / +0.18% +0.84% +1.02%] index_select skip256 : Elapsed 0.017 ms (1.677 ms / 100) 1.693 -> 1.693 ( +0.00%) [ +0.06% +0.00% +0.35% / +0.00% +1.24% +1.48%] index_select spread : Elapsed 0.017 ms (1.694 ms / 100) 1.695 -> 1.698 ( +0.18%) [ +0.00% +0.18% +0.12% / +0.18% +1.36% +1.36%] index_select strided 3 : Elapsed 0.017 ms (1.695 ms / 100) 1.683 -> 1.678 ( -0.30%) [ +0.24% +0.06% +0.00% / -0.30% +1.43% +1.37%] index_select strided 5 : Elapsed 0.017 ms (1.687 ms / 100) 1.693 -> 1.695 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.12% +1.24% +1.24%] index_select strided 7 : Elapsed 0.017 ms (1.695 ms / 100) 1.693 -> 1.689 ( -0.24%) [ +0.00% +0.00% +0.00% / -0.24% +1.18% +1.36%] index_select strided 8 : Elapsed 0.017 ms (1.693 ms / 100) 1.689 -> 1.692 ( +0.18%) [ +0.30% +0.00% +0.30% / +0.18% +1.54% +1.48%] index_select strided 16 : Elapsed 0.017 ms (1.694 ms / 100) 1.699 -> 1.702 ( +0.18%) [ +0.12% +0.00% +0.18% / +0.18% +1.12% +1.00%] index_select random : Elapsed 0.017 ms (1.701 ms / 100) 1.699 -> 1.702 ( +0.18%) [ +0.12% +0.00% +0.12% / +0.18% +1.18% +1.06%] index_select random_sorted : Elapsed 0.017 ms (1.701 ms / 100) 1.693 -> 1.692 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +1.36% +1.30%] index_select perm : Elapsed 0.017 ms (1.694 ms / 100) 1.693 -> 1.697 ( +0.24%) [ +0.18% +0.18% +0.00% / +0.24% +1.59% +1.42%] index_select perm_sorted : Elapsed 0.017 ms (1.696 ms / 100) B = [40, 4, 5, 16] (stride (1, 40, 160, 800)) A = [40, 4, 20, 16] (stride (1, 640, 2560, 40)) dim = 2 1.900 -> 1.899 ( -0.05%) [ +0.26% +0.21% +0.00% / +0.26% +0.16% -0.05%] index_select const : Elapsed 0.019 ms (1.905 ms / 100) 1.887 -> 1.889 ( +0.11%) [ +0.11% +0.26% +0.00% / +0.11% +0.37% +0.42%] index_select wrap : Elapsed 0.019 ms (1.889 ms / 100) 1.889 -> 1.893 ( +0.21%) [ +0.00% +0.16% +0.00% / +0.21% +0.42% +0.32%] index_select linear : Elapsed 0.019 ms (1.889 ms / 100) 1.894 -> 1.893 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.26% +0.16%] index_select reverse : Elapsed 0.019 ms (1.894 ms / 100) 1.901 -> 1.904 ( +0.16%) [ +0.16% +0.11% +0.00% / +0.16% +0.16% +0.21%] index_select skip64 : Elapsed 0.019 ms (1.904 ms / 100) 1.893 -> 1.894 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.21% +0.21%] index_select skip256 : Elapsed 0.019 ms (1.893 ms / 100) 1.893 -> 1.893 ( +0.00%) [ +0.00% +0.05% +0.16% / +0.00% +0.26% +0.37%] index_select spread : Elapsed 0.019 ms (1.893 ms / 100) 1.902 -> 1.901 ( -0.05%) [ +0.00% +0.16% +0.00% / -0.05% +0.11% +0.21%] index_select strided 3 : Elapsed 0.019 ms (1.902 ms / 100) 1.882 -> 1.882 ( +0.00%) [ +0.21% +0.16% +0.00% / +0.00% +0.64% +0.64%] index_select strided 5 : Elapsed 0.019 ms (1.886 ms / 100) 1.884 -> 1.885 ( +0.05%) [ +0.00% +0.11% +0.05% / +0.05% +0.90% +0.90%] index_select strided 7 : Elapsed 0.019 ms (1.884 ms / 100) 1.895 -> 1.896 ( +0.05%) [ +0.00% +0.11% +0.21% / +0.05% +0.69% +0.53%] index_select strided 8 : Elapsed 0.019 ms (1.895 ms / 100) 1.891 -> 1.897 ( +0.32%) [ +0.05% +0.00% +0.16% / +0.32% +0.58% +0.42%] index_select strided 16 : Elapsed 0.019 ms (1.892 ms / 100) 1.890 -> 1.891 ( +0.05%) [ +0.00% +0.21% +0.37% / +0.05% +0.63% +0.69%] index_select random : Elapsed 0.019 ms (1.890 ms / 100) 1.889 -> 1.889 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.32% +0.21%] index_select random_sorted : Elapsed 0.019 ms (1.889 ms / 100) 1.889 -> 1.889 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.69% +0.53%] index_select perm : Elapsed 0.019 ms (1.889 ms / 100) 1.901 -> 1.904 ( +0.16%) [ +0.00% +0.00% +0.11% / +0.16% +0.32% +0.26%] index_select perm_sorted : Elapsed 0.019 ms (1.901 ms / 100) out_shape = [40, 4, 20, 5] in_shape = [40, 4, 20, 16] idx_dim = 3 B = [40, 4, 20, 5] (stride (100, 4000, 5, 1)) A = [40, 4, 20, 16] (stride (4, 1, 160, 3200)) dim = 3 2.276 -> 2.275 ( -0.04%) [ +0.00% +0.04% +0.22% / -0.04% +0.31% +0.22%] index_select const : Elapsed 0.023 ms (2.276 ms / 100) 2.286 -> 2.293 ( +0.31%) [ +0.09% +0.00% +0.35% / +0.31% +0.48% +0.48%] index_select wrap : Elapsed 0.023 ms (2.288 ms / 100) 2.296 -> 2.297 ( +0.04%) [ +0.22% +0.17% +0.00% / +0.04% +0.39% +0.52%] index_select linear : Elapsed 0.023 ms (2.301 ms / 100) 2.287 -> 2.291 ( +0.17%) [ +0.22% +0.22% +0.00% / +0.17% +0.17% +0.22%] index_select reverse : Elapsed 0.023 ms (2.292 ms / 100) 2.275 -> 2.279 ( +0.18%) [ +0.00% +0.13% +0.13% / +0.18% +0.66% +0.44%] index_select skip64 : Elapsed 0.023 ms (2.275 ms / 100) 2.274 -> 2.272 ( -0.09%) [ +0.00% +0.04% +0.09% / -0.09% +0.35% +0.26%] index_select skip256 : Elapsed 0.023 ms (2.274 ms / 100) 2.290 -> 2.296 ( +0.26%) [ +0.44% +0.00% +0.17% / +0.26% +0.70% +0.83%] index_select spread : Elapsed 0.023 ms (2.300 ms / 100) 2.298 -> 2.301 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.22% +0.39%] index_select strided 3 : Elapsed 0.023 ms (2.298 ms / 100) 2.290 -> 2.295 ( +0.22%) [ +0.00% +0.04% +0.26% / +0.22% +0.48% +0.61%] index_select strided 5 : Elapsed 0.023 ms (2.290 ms / 100) 2.286 -> 2.284 ( -0.09%) [ +0.04% +0.00% +0.00% / -0.09% +0.48% +0.48%] index_select strided 7 : Elapsed 0.023 ms (2.287 ms / 100) 2.268 -> 2.268 ( +0.00%) [ +0.04% +0.00% +0.09% / +0.00% +0.53% +0.49%] index_select strided 8 : Elapsed 0.023 ms (2.269 ms / 100) 2.295 -> 2.295 ( +0.00%) [ +0.00% +0.04% +0.26% / +0.00% +0.17% +0.26%] index_select random : Elapsed 0.023 ms (2.295 ms / 100) 2.280 -> 2.285 ( +0.22%) [ +0.00% +0.39% +0.18% / +0.22% +0.57% +0.48%] index_select random_sorted : Elapsed 0.023 ms (2.280 ms / 100) 2.283 -> 2.285 ( +0.09%) [ +0.18% +0.09% +0.00% / +0.09% +0.13% +0.44%] index_select perm : Elapsed 0.023 ms (2.287 ms / 100) 2.274 -> 2.272 ( -0.09%) [ +0.22% +0.00% +0.22% / -0.09% +0.04% +0.09%] index_select perm_sorted : Elapsed 0.023 ms (2.279 ms / 100) B = [40, 4, 20, 5] (stride (1, 4000, 200, 40)) A = [40, 4, 20, 16] (stride (4, 1, 2560, 160)) dim = 3 0.888 -> 0.891 ( +0.34%) [ +0.11% +0.00% +0.11% / +0.34% +1.13% +1.35%] index_select const : Elapsed 0.009 ms (0.889 ms / 100) 0.880 -> 0.884 ( +0.45%) [ +0.23% +0.34% +0.00% / +0.45% +4.32% +4.55%] index_select wrap : Elapsed 0.009 ms (0.882 ms / 100) 0.876 -> 0.880 ( +0.46%) [ +0.34% +0.46% +0.00% / +0.46% +5.48% +5.25%] index_select linear : Elapsed 0.009 ms (0.879 ms / 100) 0.880 -> 0.884 ( +0.45%) [ +0.11% +0.11% +0.00% / +0.45% +0.80% +0.68%] index_select reverse : Elapsed 0.009 ms (0.881 ms / 100) 0.875 -> 0.881 ( +0.69%) [ +0.46% +0.00% +0.46% / +0.69% +2.97% +3.54%] index_select skip64 : Elapsed 0.009 ms (0.879 ms / 100) 0.879 -> 0.887 ( +0.91%) [ +0.00% +0.34% +0.11% / +0.91% +3.64% +3.53%] index_select skip256 : Elapsed 0.009 ms (0.879 ms / 100) 0.888 -> 0.890 ( +0.23%) [ +0.11% +0.45% +0.00% / +0.23% +5.74% +5.63%] index_select spread : Elapsed 0.009 ms (0.889 ms / 100) 0.885 -> 0.888 ( +0.34%) [ +0.45% +0.23% +0.00% / +0.34% +5.76% +6.21%] index_select strided 3 : Elapsed 0.009 ms (0.889 ms / 100) 0.869 -> 0.870 ( +0.12%) [ +0.00% +0.23% +0.12% / +0.12% +5.98% +6.44%] index_select strided 5 : Elapsed 0.009 ms (0.869 ms / 100) 0.900 -> 0.902 ( +0.22%) [ +0.22% +0.00% +0.22% / +0.22% +2.89% +2.78%] index_select strided 7 : Elapsed 0.009 ms (0.902 ms / 100) 0.892 -> 0.895 ( +0.34%) [ +0.34% +0.00% +0.56% / +0.34% +0.78% +0.56%] index_select strided 8 : Elapsed 0.009 ms (0.895 ms / 100) 0.901 -> 0.904 ( +0.33%) [ +0.11% +0.33% +0.00% / +0.33% +3.22% +3.33%] index_select random : Elapsed 0.009 ms (0.902 ms / 100) 0.878 -> 0.881 ( +0.34%) [ +0.00% +0.23% +0.46% / +0.34% +5.35% +5.47%] index_select random_sorted : Elapsed 0.009 ms (0.878 ms / 100) 0.897 -> 0.885 ( -1.34%) [ +0.11% +0.00% +0.00% / +0.22% -1.34% -1.34%] index_select perm : Elapsed 0.009 ms (0.898 ms / 100) 0.885 -> 0.882 ( -0.34%) [ +0.00% +0.11% +0.00% / +0.00% -0.11% -0.34%] index_select perm_sorted : Elapsed 0.009 ms (0.885 ms / 100) B = [40, 4, 20, 5] (stride (20, 4000, 1, 800)) A = [40, 4, 20, 16] (stride (4, 1, 2560, 160)) dim = 3 2.283 -> 2.282 ( -0.04%) [ +0.18% +0.00% +0.13% / -0.04% +0.39% +0.53%] index_select const : Elapsed 0.023 ms (2.287 ms / 100) 2.310 -> 2.308 ( -0.09%) [ +0.04% +0.22% +0.00% / -0.09% +0.43% +0.39%] index_select wrap : Elapsed 0.023 ms (2.311 ms / 100) 2.306 -> 2.310 ( +0.17%) [ +0.17% +0.00% +0.09% / +0.17% +0.39% +0.65%] index_select linear : Elapsed 0.023 ms (2.310 ms / 100) 2.299 -> 2.298 ( -0.04%) [ +0.00% +0.04% +0.09% / -0.04% +0.09% +0.22%] index_select reverse : Elapsed 0.023 ms (2.299 ms / 100) 2.281 -> 2.284 ( +0.13%) [ +0.04% +0.00% +0.00% / +0.13% +0.70% +0.61%] index_select skip64 : Elapsed 0.023 ms (2.282 ms / 100) 2.283 -> 2.281 ( -0.09%) [ +0.00% +0.00% +0.04% / -0.09% +0.48% +0.44%] index_select skip256 : Elapsed 0.023 ms (2.283 ms / 100) 2.293 -> 2.298 ( +0.22%) [ +0.22% +0.00% +0.26% / +0.22% +0.39% +0.57%] index_select spread : Elapsed 0.023 ms (2.298 ms / 100) 2.297 -> 2.301 ( +0.17%) [ +0.04% +0.00% +0.09% / +0.17% +0.39% +0.30%] index_select strided 3 : Elapsed 0.023 ms (2.298 ms / 100) 2.283 -> 2.288 ( +0.22%) [ +0.00% +0.31% +0.04% / +0.22% +0.92% +0.79%] index_select strided 5 : Elapsed 0.023 ms (2.283 ms / 100) 2.293 -> 2.296 ( +0.13%) [ +0.17% +0.17% +0.00% / +0.13% +0.70% +0.78%] index_select strided 7 : Elapsed 0.023 ms (2.297 ms / 100) 2.281 -> 2.280 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.48% +0.61%] index_select strided 8 : Elapsed 0.023 ms (2.281 ms / 100) 2.254 -> 2.254 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.27% +0.35%] index_select random : Elapsed 0.023 ms (2.255 ms / 100) 2.280 -> 2.284 ( +0.18%) [ +0.09% +0.00% +0.53% / +0.18% +0.70% +0.70%] index_select random_sorted : Elapsed 0.023 ms (2.282 ms / 100) 2.294 -> 2.298 ( +0.17%) [ +0.09% +0.17% +0.00% / +0.17% +0.31% +0.48%] index_select perm : Elapsed 0.023 ms (2.296 ms / 100) 2.289 -> 2.294 ( +0.22%) [ +0.13% +0.17% +0.00% / +0.22% +0.44% +0.57%] index_select perm_sorted : Elapsed 0.023 ms (2.292 ms / 100) B = [40, 4, 20, 5] (stride (4, 1, 800, 160)) A = [40, 4, 20, 16] (stride (4, 1, 2560, 160)) dim = 3 0.894 -> 0.894 ( +0.00%) [ +0.22% +0.22% +0.00% / +0.00% +1.23% +0.67%] index_select const : Elapsed 0.009 ms (0.896 ms / 100) 0.885 -> 0.889 ( +0.45%) [ +0.11% +0.11% +0.00% / +0.45% +3.50% +3.50%] index_select wrap : Elapsed 0.009 ms (0.886 ms / 100) 0.881 -> 0.885 ( +0.45%) [ +0.23% +0.23% +0.00% / +0.45% +4.43% +4.20%] index_select linear : Elapsed 0.009 ms (0.883 ms / 100) 0.882 -> 0.883 ( +0.11%) [ +0.23% +0.23% +0.00% / +0.11% +0.91% +0.91%] index_select reverse : Elapsed 0.009 ms (0.884 ms / 100) 0.879 -> 0.882 ( +0.34%) [ +0.34% +0.00% +0.57% / +0.34% +3.07% +3.07%] index_select skip64 : Elapsed 0.009 ms (0.882 ms / 100) 0.885 -> 0.888 ( +0.34%) [ +0.23% +0.11% +0.00% / +0.34% +2.94% +3.05%] index_select skip256 : Elapsed 0.009 ms (0.887 ms / 100) 0.889 -> 0.895 ( +0.67%) [ +0.00% +0.45% +0.67% / +0.67% +5.51% +5.51%] index_select spread : Elapsed 0.009 ms (0.889 ms / 100) 0.890 -> 0.892 ( +0.22%) [ +0.00% +0.45% +0.34% / +0.22% +5.28% +5.39%] index_select strided 3 : Elapsed 0.009 ms (0.890 ms / 100) 0.872 -> 0.875 ( +0.34%) [ +0.00% +0.11% +0.46% / +0.34% +6.19% +6.19%] index_select strided 5 : Elapsed 0.009 ms (0.872 ms / 100) 0.906 -> 0.907 ( +0.11%) [ +0.00% +0.22% +0.00% / +0.11% +2.54% +2.10%] index_select strided 7 : Elapsed 0.009 ms (0.906 ms / 100) 0.901 -> 0.897 ( -0.44%) [ +0.00% +0.00% +0.11% / +0.00% -0.44% +0.22%] index_select strided 8 : Elapsed 0.009 ms (0.901 ms / 100) 0.881 -> 0.879 ( -0.23%) [ +0.00% +0.11% +0.00% / -0.23% +1.02% +1.36%] index_select random : Elapsed 0.009 ms (0.881 ms / 100) 0.892 -> 0.890 ( -0.22%) [ +0.00% +0.11% +0.00% / -0.22% +1.91% +2.24%] index_select random_sorted : Elapsed 0.009 ms (0.892 ms / 100) 0.898 -> 0.898 ( +0.00%) [ +0.00% +0.33% +0.22% / +0.00% +2.34% +2.34%] index_select perm : Elapsed 0.009 ms (0.898 ms / 100) 0.910 -> 0.908 ( -0.22%) [ +0.11% +0.00% +0.33% / -0.22% +1.32% +1.43%] index_select perm_sorted : Elapsed 0.009 ms (0.911 ms / 100) B = [40, 4, 20, 5] (stride (1, 40, 160, 3200)) A = [40, 4, 20, 16] (stride (1280, 320, 16, 1)) dim = 3 2.155 -> 2.155 ( +0.00%) [ +0.05% +0.00% +0.14% / +0.00% +0.51% +0.51%] index_select const : Elapsed 0.022 ms (2.156 ms / 100) 2.152 -> 2.151 ( -0.05%) [ +0.23% +0.00% +0.00% / -0.05% +0.65% +0.46%] index_select wrap : Elapsed 0.022 ms (2.157 ms / 100) 2.160 -> 2.158 ( -0.09%) [ +0.00% +0.09% +0.00% / -0.09% +0.42% +0.42%] index_select linear : Elapsed 0.022 ms (2.160 ms / 100) 2.155 -> 2.160 ( +0.23%) [ +0.14% +0.00% +0.09% / +0.23% +0.56% +0.46%] index_select reverse : Elapsed 0.022 ms (2.158 ms / 100) 2.151 -> 2.152 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.60% +0.51%] index_select skip64 : Elapsed 0.022 ms (2.151 ms / 100) 2.152 -> 2.155 ( +0.14%) [ +0.19% +0.00% +0.28% / +0.14% +0.65% +0.56%] index_select skip256 : Elapsed 0.022 ms (2.156 ms / 100) 2.170 -> 2.173 ( +0.14%) [ +0.09% +0.00% +0.14% / +0.14% +0.65% +0.74%] index_select spread : Elapsed 0.022 ms (2.172 ms / 100) 2.173 -> 2.168 ( -0.23%) [ +0.05% +0.00% +0.09% / -0.23% +0.46% +0.32%] index_select strided 3 : Elapsed 0.022 ms (2.174 ms / 100) 2.167 -> 2.169 ( +0.09%) [ +0.00% +0.18% +0.09% / +0.09% +0.55% +0.60%] index_select strided 5 : Elapsed 0.022 ms (2.167 ms / 100) 2.166 -> 2.165 ( -0.05%) [ +0.00% +0.09% +0.05% / -0.05% +0.65% +0.92%] index_select strided 7 : Elapsed 0.022 ms (2.166 ms / 100) 2.167 -> 2.170 ( +0.14%) [ +0.18% +0.00% +0.18% / +0.14% +1.06% +1.02%] index_select strided 8 : Elapsed 0.022 ms (2.171 ms / 100) 2.166 -> 2.164 ( -0.09%) [ +0.18% +0.18% +0.00% / -0.09% +0.88% +0.55%] index_select random : Elapsed 0.022 ms (2.170 ms / 100) 2.165 -> 2.168 ( +0.14%) [ +0.14% +0.00% +0.09% / +0.14% +0.79% +0.65%] index_select random_sorted : Elapsed 0.022 ms (2.168 ms / 100) 2.166 -> 2.167 ( +0.05%) [ +0.28% +0.00% +0.00% / +0.05% +0.60% +0.55%] index_select perm : Elapsed 0.022 ms (2.172 ms / 100) 2.166 -> 2.166 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.51% +0.60%] index_select perm_sorted : Elapsed 0.022 ms (2.166 ms / 100) out_shape = [5, 16, 4, 20] in_shape = [40, 16, 4, 20] idx_dim = 0 B = [5, 16, 4, 20] (stride (1280, 20, 320, 1)) A = [40, 16, 4, 20] (stride (1, 3200, 800, 40)) dim = 0 1.394 -> 1.395 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.36% +0.43%] index_select const : Elapsed 0.014 ms (1.395 ms / 100) 1.390 -> 1.394 ( +0.29%) [ +0.14% +0.00% +0.00% / +0.29% +0.58% +0.29%] index_select wrap : Elapsed 0.014 ms (1.392 ms / 100) 1.389 -> 1.392 ( +0.22%) [ +0.22% +0.29% +0.00% / +0.22% +0.72% +0.58%] index_select linear : Elapsed 0.014 ms (1.392 ms / 100) 1.390 -> 1.392 ( +0.14%) [ +0.29% +0.00% +0.22% / +0.14% +0.50% +0.36%] index_select reverse : Elapsed 0.014 ms (1.394 ms / 100) 1.390 -> 1.385 ( -0.36%) [ +0.14% +0.00% +0.00% / -0.36% +0.36% +0.29%] index_select skip64 : Elapsed 0.014 ms (1.392 ms / 100) 1.393 -> 1.395 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.50% +0.50%] index_select skip256 : Elapsed 0.014 ms (1.395 ms / 100) 1.384 -> 1.385 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.36% +0.36%] index_select spread : Elapsed 0.014 ms (1.384 ms / 100) 1.369 -> 1.371 ( +0.15%) [ +0.22% +0.00% +0.22% / +0.15% +0.37% +0.73%] index_select strided 3 : Elapsed 0.014 ms (1.372 ms / 100) 1.354 -> 1.353 ( -0.07%) [ +0.15% +0.00% +0.07% / -0.07% +0.37% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.356 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.36% +0.43%] index_select strided 7 : Elapsed 0.014 ms (1.383 ms / 100) 1.394 -> 1.395 ( +0.07%) [ +0.22% +0.22% +0.00% / +0.07% +0.57% +0.43%] index_select strided 8 : Elapsed 0.014 ms (1.397 ms / 100) 1.346 -> 1.346 ( +0.00%) [ +0.00% +0.15% +0.07% / +0.00% +0.89% +0.67%] index_select strided 16 : Elapsed 0.013 ms (1.346 ms / 100) 1.357 -> 1.357 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.52% +0.59%] index_select random : Elapsed 0.014 ms (1.358 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.58% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.384 ms / 100) 1.395 -> 1.395 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.50% +0.43%] index_select perm : Elapsed 0.014 ms (1.395 ms / 100) 1.374 -> 1.377 ( +0.22%) [ +0.15% +0.00% +0.07% / +0.22% +0.73% +0.80%] index_select perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) B = [5, 16, 4, 20] (stride (1280, 1, 320, 16)) A = [40, 16, 4, 20] (stride (1, 160, 40, 2560)) dim = 0 0.670 -> 0.672 ( +0.30%) [ +0.00% +0.00% +0.00% / +0.30% +0.45% +0.60%] index_select const : Elapsed 0.007 ms (0.670 ms / 100) 0.669 -> 0.669 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.60% +0.90%] index_select wrap : Elapsed 0.007 ms (0.670 ms / 100) 0.668 -> 0.669 ( +0.15%) [ +1.50% +0.15% +0.00% / +0.15% +0.90% +0.75%] index_select linear : Elapsed 0.007 ms (0.678 ms / 100) 0.666 -> 0.667 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +0.75% +0.75%] index_select reverse : Elapsed 0.007 ms (0.668 ms / 100) 0.666 -> 0.668 ( +0.30%) [ +0.15% +0.30% +0.00% / +0.30% +1.05% +3.00%] index_select skip64 : Elapsed 0.007 ms (0.667 ms / 100) 0.668 -> 0.669 ( +0.15%) [ +0.30% +0.00% +0.00% / +0.15% +7.04% +0.75%] index_select skip256 : Elapsed 0.007 ms (0.670 ms / 100) 0.666 -> 0.668 ( +0.30%) [ +0.30% +0.00% +0.15% / +0.30% +0.90% +0.45%] index_select spread : Elapsed 0.007 ms (0.668 ms / 100) 0.667 -> 0.667 ( +0.00%) [ +0.00% +0.15% +0.75% / +0.00% +0.60% +0.75%] index_select strided 3 : Elapsed 0.007 ms (0.667 ms / 100) 0.668 -> 0.667 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.60% +0.15%] index_select strided 5 : Elapsed 0.007 ms (0.668 ms / 100) 0.665 -> 0.667 ( +0.30%) [ +0.45% +0.00% +0.15% / +0.30% +0.90% +1.05%] index_select strided 7 : Elapsed 0.007 ms (0.668 ms / 100) 0.667 -> 0.668 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.90% +0.60%] index_select strided 8 : Elapsed 0.007 ms (0.668 ms / 100) 0.667 -> 0.668 ( +0.15%) [ +0.15% +0.30% +0.00% / +0.15% +0.45% +0.30%] index_select strided 16 : Elapsed 0.007 ms (0.668 ms / 100) 0.667 -> 0.668 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.90% +0.75%] index_select random : Elapsed 0.007 ms (0.668 ms / 100) 0.667 -> 0.668 ( +0.15%) [ +0.15% +0.45% +0.00% / +0.15% +0.75% +1.20%] index_select random_sorted : Elapsed 0.007 ms (0.668 ms / 100) 0.669 -> 0.670 ( +0.15%) [ +0.00% +0.15% +0.75% / +0.15% +0.75% +1.05%] index_select perm : Elapsed 0.007 ms (0.669 ms / 100) 0.667 -> 0.667 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.90% +0.75%] index_select perm_sorted : Elapsed 0.007 ms (0.667 ms / 100) B = [5, 16, 4, 20] (stride (1280, 4, 1, 64)) A = [40, 16, 4, 20] (stride (80, 3200, 20, 1)) dim = 0 0.648 -> 0.648 ( +0.00%) [ +0.00% +0.31% +0.46% / +0.15% +0.15% +0.00%] index_select const : Elapsed 0.006 ms (0.648 ms / 100) 0.647 -> 0.647 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.62% +0.31%] index_select wrap : Elapsed 0.006 ms (0.648 ms / 100) 0.647 -> 0.648 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.15% +0.15%] index_select linear : Elapsed 0.006 ms (0.648 ms / 100) 0.645 -> 0.646 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +3.26% +0.62%] index_select reverse : Elapsed 0.006 ms (0.646 ms / 100) 0.645 -> 0.646 ( +0.16%) [ +0.31% +0.00% +0.16% / +0.16% +0.31% +0.16%] index_select skip64 : Elapsed 0.006 ms (0.647 ms / 100) 0.655 -> 0.654 ( -0.15%) [ +0.46% +0.00% +0.15% / -0.15% +0.00% -0.15%] index_select skip256 : Elapsed 0.007 ms (0.658 ms / 100) 0.655 -> 0.656 ( +0.15%) [ +0.00% +0.15% +0.46% / +0.15% +0.31% +0.31%] index_select spread : Elapsed 0.007 ms (0.655 ms / 100) 0.648 -> 0.648 ( +0.00%) [ +0.00% +0.31% +0.00% / +0.00% +0.31% +0.15%] index_select strided 3 : Elapsed 0.006 ms (0.648 ms / 100) 0.648 -> 0.649 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.15% +0.46%] index_select strided 5 : Elapsed 0.006 ms (0.648 ms / 100) 0.647 -> 0.648 ( +0.15%) [ +0.31% +0.15% +0.00% / +0.15% +0.31% +0.31%] index_select strided 7 : Elapsed 0.006 ms (0.649 ms / 100) 0.646 -> 0.647 ( +0.15%) [ +0.00% +0.31% +0.00% / +0.15% +0.46% +0.31%] index_select strided 8 : Elapsed 0.006 ms (0.646 ms / 100) 0.656 -> 0.655 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.30% +0.46%] index_select strided 16 : Elapsed 0.007 ms (0.656 ms / 100) 0.655 -> 0.656 ( +0.15%) [ +0.00% +0.15% +1.83% / +0.15% +0.61% +0.92%] index_select random : Elapsed 0.007 ms (0.655 ms / 100) 0.646 -> 0.646 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.46% +0.93%] index_select random_sorted : Elapsed 0.006 ms (0.646 ms / 100) 0.645 -> 0.646 ( +0.16%) [ +0.00% +0.00% +0.16% / +0.16% +0.78% +1.24%] index_select perm : Elapsed 0.006 ms (0.645 ms / 100) 0.646 -> 0.647 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.93% +0.77%] index_select perm_sorted : Elapsed 0.006 ms (0.646 ms / 100) B = [5, 16, 4, 20] (stride (4, 400, 1, 20)) A = [40, 16, 4, 20] (stride (4, 3200, 1, 160)) dim = 0 1.577 -> 1.579 ( +0.13%) [ +0.00% +0.00% +0.13% / +0.13% +0.89% +0.76%] index_select const : Elapsed 0.016 ms (1.577 ms / 100) 1.576 -> 1.575 ( -0.06%) [ +0.19% +0.00% +0.25% / -0.06% +0.63% +0.63%] index_select wrap : Elapsed 0.016 ms (1.579 ms / 100) 1.585 -> 1.585 ( +0.00%) [ +0.06% +0.00% +0.13% / +0.00% +0.69% +0.69%] index_select linear : Elapsed 0.016 ms (1.586 ms / 100) 1.585 -> 1.586 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.63% +0.63%] index_select reverse : Elapsed 0.016 ms (1.586 ms / 100) 1.575 -> 1.576 ( +0.06%) [ +0.38% +0.06% +0.00% / +0.06% +0.76% +0.83%] index_select skip64 : Elapsed 0.016 ms (1.581 ms / 100) 1.579 -> 1.578 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.70% +0.63%] index_select skip256 : Elapsed 0.016 ms (1.579 ms / 100) 1.575 -> 1.573 ( -0.13%) [ +0.32% +0.00% +0.19% / -0.13% +0.51% +0.38%] index_select spread : Elapsed 0.016 ms (1.580 ms / 100) 1.576 -> 1.578 ( +0.13%) [ +0.19% +0.00% +0.25% / +0.13% +0.32% +0.32%] index_select strided 3 : Elapsed 0.016 ms (1.579 ms / 100) 1.578 -> 1.578 ( +0.00%) [ +0.00% +0.13% +0.06% / +0.00% +0.70% +0.57%] index_select strided 5 : Elapsed 0.016 ms (1.578 ms / 100) 1.576 -> 1.578 ( +0.13%) [ +0.00% +0.25% +0.13% / +0.13% +0.38% +0.57%] index_select strided 7 : Elapsed 0.016 ms (1.576 ms / 100) 1.576 -> 1.577 ( +0.06%) [ +0.25% +0.00% +0.06% / +0.06% +0.19% +0.25%] index_select strided 8 : Elapsed 0.016 ms (1.580 ms / 100) 1.577 -> 1.580 ( +0.19%) [ +0.00% +0.13% +0.19% / +0.19% +0.57% +0.63%] index_select strided 16 : Elapsed 0.016 ms (1.577 ms / 100) 1.577 -> 1.574 ( -0.19%) [ +0.13% +0.13% +0.00% / -0.19% +0.44% +0.51%] index_select random : Elapsed 0.016 ms (1.579 ms / 100) 1.576 -> 1.580 ( +0.25%) [ +0.19% +0.00% +0.06% / +0.32% +0.25% +0.38%] index_select random_sorted : Elapsed 0.016 ms (1.579 ms / 100) 1.590 -> 1.592 ( +0.13%) [ +0.19% +0.00% +0.13% / +0.13% +1.01% +0.88%] index_select perm : Elapsed 0.016 ms (1.593 ms / 100) 1.578 -> 1.578 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.76% +0.82%] index_select perm_sorted : Elapsed 0.016 ms (1.578 ms / 100) B = [5, 16, 4, 20] (stride (320, 20, 1600, 1)) A = [40, 16, 4, 20] (stride (4, 3200, 1, 160)) dim = 0 1.602 -> 1.602 ( +0.00%) [ +0.00% +0.06% +0.12% / +0.00% +0.44% +0.50%] index_select const : Elapsed 0.016 ms (1.602 ms / 100) 1.589 -> 1.595 ( +0.38%) [ +0.00% +0.57% +0.06% / +0.50% +0.44% +0.38%] index_select wrap : Elapsed 0.016 ms (1.589 ms / 100) 1.591 -> 1.594 ( +0.19%) [ +0.19% +0.19% +0.00% / +0.25% +0.19% +0.57%] index_select linear : Elapsed 0.016 ms (1.594 ms / 100) 1.593 -> 1.587 ( -0.38%) [ +0.31% +0.00% +0.25% / -0.38% +0.56% +0.06%] index_select reverse : Elapsed 0.016 ms (1.598 ms / 100) 1.590 -> 1.592 ( +0.13%) [ +0.13% +0.06% +0.00% / +0.13% +0.69% +0.57%] index_select skip64 : Elapsed 0.016 ms (1.592 ms / 100) 1.598 -> 1.601 ( +0.19%) [ +0.44% +0.00% +0.44% / +0.19% +0.63% +0.88%] index_select skip256 : Elapsed 0.016 ms (1.605 ms / 100) 1.579 -> 1.577 ( -0.13%) [ +0.06% +0.00% +0.06% / -0.13% +0.13% +0.06%] index_select spread : Elapsed 0.016 ms (1.580 ms / 100) 1.586 -> 1.590 ( +0.25%) [ +0.00% +0.13% +0.25% / +0.25% +0.76% +0.44%] index_select strided 3 : Elapsed 0.016 ms (1.586 ms / 100) 1.575 -> 1.579 ( +0.25%) [ +0.13% +0.00% +0.25% / +0.25% +0.51% +0.32%] index_select strided 5 : Elapsed 0.016 ms (1.577 ms / 100) 1.576 -> 1.575 ( -0.06%) [ +0.19% +0.00% +0.00% / -0.06% +0.32% +0.25%] index_select strided 7 : Elapsed 0.016 ms (1.579 ms / 100) 1.577 -> 1.580 ( +0.19%) [ +0.00% +0.06% +0.32% / +0.19% +0.51% +0.51%] index_select strided 8 : Elapsed 0.016 ms (1.577 ms / 100) 1.579 -> 1.579 ( +0.00%) [ +0.13% +0.00% +0.06% / +0.00% +0.63% +0.57%] index_select strided 16 : Elapsed 0.016 ms (1.581 ms / 100) 1.576 -> 1.577 ( +0.06%) [ +0.00% +0.19% +0.25% / +0.06% +0.44% +0.51%] index_select random : Elapsed 0.016 ms (1.576 ms / 100) 1.570 -> 1.574 ( +0.25%) [ +0.25% +0.51% +0.00% / +0.25% +0.57% +0.64%] index_select random_sorted : Elapsed 0.016 ms (1.574 ms / 100) 1.576 -> 1.575 ( -0.06%) [ +0.32% +0.00% +0.00% / -0.06% +0.51% +0.63%] index_select perm : Elapsed 0.016 ms (1.581 ms / 100) 1.578 -> 1.578 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.57% +0.38%] index_select perm_sorted : Elapsed 0.016 ms (1.578 ms / 100) B = [5, 16, 4, 20] (stride (64, 4, 1, 320)) A = [40, 16, 4, 20] (stride (1280, 20, 320, 1)) dim = 0 1.376 -> 1.377 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.73% +0.65%] index_select const : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.58% +0.51%] index_select wrap : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.376 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.58% +0.58%] index_select linear : Elapsed 0.014 ms (1.377 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.22% +0.22% +0.00% / +0.15% +0.73% +0.73%] index_select reverse : Elapsed 0.014 ms (1.378 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.73% +0.65%] index_select skip64 : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.375 ( -0.07%) [ +0.07% +0.15% +0.00% / -0.07% +0.73% +0.65%] index_select skip256 : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.00% +0.00% +0.36% / +0.07% +0.58% +0.51%] index_select spread : Elapsed 0.014 ms (1.377 ms / 100) 1.378 -> 1.377 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.51% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.378 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.07% +0.00% +0.22% / +0.15% +1.38% +0.65%] index_select strided 5 : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.58% +0.58%] index_select strided 7 : Elapsed 0.014 ms (1.378 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.375 ( -0.15%) [ +0.00% +0.00% +0.07% / -0.15% +0.58% +0.58%] index_select strided 16 : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.65% +0.73%] index_select random : Elapsed 0.014 ms (1.377 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.73% +0.87%] index_select random_sorted : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.376 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.73% +0.73%] index_select perm : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.58% +0.65%] index_select perm_sorted : Elapsed 0.014 ms (1.378 ms / 100) B = [5, 16, 4, 20] (stride (1, 20, 5, 320)) A = [40, 16, 4, 20] (stride (64, 1, 16, 2560)) dim = 0 1.498 -> 1.501 ( +0.20%) [ +0.13% +0.27% +0.00% / +0.20% +0.33% +0.47%] index_select const : Elapsed 0.015 ms (1.500 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.41% +0.47%] index_select wrap : Elapsed 0.015 ms (1.478 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.47% +0.54%] index_select linear : Elapsed 0.015 ms (1.476 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.20% +0.00% +0.27% / +0.07% +0.54% +0.54%] index_select reverse : Elapsed 0.015 ms (1.489 ms / 100) 1.493 -> 1.495 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.54% +0.54%] index_select skip64 : Elapsed 0.015 ms (1.495 ms / 100) 1.499 -> 1.498 ( -0.07%) [ +0.13% +0.00% +0.00% / -0.07% +0.47% +0.33%] index_select skip256 : Elapsed 0.015 ms (1.501 ms / 100) 1.484 -> 1.491 ( +0.47%) [ +0.20% +0.00% +0.61% / +0.47% +0.74% +0.61%] index_select spread : Elapsed 0.015 ms (1.487 ms / 100) 1.484 -> 1.482 ( -0.13%) [ +0.13% +0.00% +0.07% / -0.13% +0.47% +0.54%] index_select strided 3 : Elapsed 0.015 ms (1.486 ms / 100) 1.481 -> 1.479 ( -0.14%) [ +0.07% +0.00% +0.00% / -0.14% +0.68% +0.54%] index_select strided 5 : Elapsed 0.015 ms (1.482 ms / 100) 1.488 -> 1.496 ( +0.54%) [ +0.67% +0.00% +0.07% / +0.54% +0.87% +1.08%] index_select strided 7 : Elapsed 0.015 ms (1.498 ms / 100) 1.500 -> 1.502 ( +0.13%) [ +0.20% +0.00% +0.13% / +0.13% +0.33% +0.27%] index_select strided 8 : Elapsed 0.015 ms (1.503 ms / 100) 1.479 -> 1.482 ( +0.20%) [ +0.14% +0.20% +0.00% / +0.20% +0.74% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.481 ms / 100) 1.472 -> 1.474 ( +0.14%) [ +0.20% +0.00% +0.14% / +0.14% +0.68% +0.75%] index_select random : Elapsed 0.015 ms (1.475 ms / 100) 1.482 -> 1.484 ( +0.13%) [ +0.20% +0.00% +0.00% / +0.13% +0.81% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.485 ms / 100) 1.494 -> 1.487 ( -0.47%) [ +0.47% +0.13% +0.00% / -0.47% +0.74% +0.07%] index_select perm : Elapsed 0.015 ms (1.501 ms / 100) 1.493 -> 1.495 ( +0.13%) [ +0.00% +0.20% +0.20% / +0.13% +0.67% +0.67%] index_select perm_sorted : Elapsed 0.015 ms (1.493 ms / 100) out_shape = [40, 5, 4, 20] in_shape = [40, 16, 4, 20] idx_dim = 1 B = [40, 5, 4, 20] (stride (400, 80, 20, 1)) A = [40, 16, 4, 20] (stride (4, 3200, 1, 160)) dim = 1 2.247 -> 2.249 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.93% +0.58%] index_select const : Elapsed 0.022 ms (2.247 ms / 100) 2.270 -> 2.274 ( +0.18%) [ +0.26% +0.09% +0.00% / +0.18% +0.84% +0.35%] index_select wrap : Elapsed 0.023 ms (2.276 ms / 100) 2.276 -> 2.278 ( +0.09%) [ +0.09% +0.18% +0.00% / +0.09% +0.62% +0.26%] index_select linear : Elapsed 0.023 ms (2.278 ms / 100) 2.261 -> 2.262 ( +0.04%) [ +0.00% +0.04% +0.09% / +0.04% +0.62% +0.18%] index_select reverse : Elapsed 0.023 ms (2.261 ms / 100) 2.255 -> 2.254 ( -0.04%) [ +0.04% +0.27% +0.00% / -0.04% +0.58% +0.67%] index_select skip64 : Elapsed 0.023 ms (2.256 ms / 100) 2.249 -> 2.251 ( +0.09%) [ +0.00% +0.00% +0.04% / +0.09% +0.58% +0.62%] index_select skip256 : Elapsed 0.022 ms (2.249 ms / 100) 2.270 -> 2.272 ( +0.09%) [ +0.18% +0.00% +0.22% / +0.09% +0.40% +0.35%] index_select spread : Elapsed 0.023 ms (2.274 ms / 100) 2.279 -> 2.281 ( +0.09%) [ +0.00% +0.09% +0.22% / +0.09% +0.18% +0.13%] index_select strided 3 : Elapsed 0.023 ms (2.279 ms / 100) 2.273 -> 2.272 ( -0.04%) [ +0.09% +0.09% +0.00% / -0.04% +0.31% +0.31%] index_select strided 5 : Elapsed 0.023 ms (2.275 ms / 100) 2.268 -> 2.271 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.44% +0.57%] index_select strided 7 : Elapsed 0.023 ms (2.268 ms / 100) 2.243 -> 2.246 ( +0.13%) [ +0.04% +0.13% +0.00% / +0.13% +0.67% +0.62%] index_select strided 8 : Elapsed 0.022 ms (2.244 ms / 100) 2.259 -> 2.260 ( +0.04%) [ +0.00% +0.18% +0.04% / +0.04% +0.62% +0.58%] index_select random : Elapsed 0.023 ms (2.259 ms / 100) 2.262 -> 2.261 ( -0.04%) [ +0.04% +0.13% +0.00% / -0.04% +0.31% +0.27%] index_select random_sorted : Elapsed 0.023 ms (2.263 ms / 100) 2.261 -> 2.265 ( +0.18%) [ +0.09% +0.00% +0.00% / +0.18% +0.44% +0.44%] index_select perm : Elapsed 0.023 ms (2.263 ms / 100) 2.266 -> 2.267 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.31% +0.53%] index_select perm_sorted : Elapsed 0.023 ms (2.267 ms / 100) B = [40, 5, 4, 20] (stride (400, 4, 1, 20)) A = [40, 16, 4, 20] (stride (1280, 1, 16, 64)) dim = 1 2.490 -> 2.488 ( -0.08%) [ +0.00% +0.40% +0.32% / +0.40% -0.04% -0.08%] index_select const : Elapsed 0.025 ms (2.490 ms / 100) 2.497 -> 2.483 ( -0.56%) [ +0.16% +0.12% +0.00% / -0.08% -0.32% -0.56%] index_select wrap : Elapsed 0.025 ms (2.501 ms / 100) 2.495 -> 2.485 ( -0.40%) [ +0.00% +0.00% +0.20% / -0.24% -0.20% -0.40%] index_select linear : Elapsed 0.025 ms (2.495 ms / 100) 2.493 -> 2.489 ( -0.16%) [ +0.20% +0.32% +0.00% / -0.08% -0.16% +0.00%] index_select reverse : Elapsed 0.025 ms (2.498 ms / 100) 2.496 -> 2.482 ( -0.56%) [ +0.44% +0.24% +0.00% / +0.36% -0.56% -0.40%] index_select skip64 : Elapsed 0.025 ms (2.507 ms / 100) 2.490 -> 2.487 ( -0.12%) [ +0.24% +0.24% +0.00% / +0.04% -0.12% -0.08%] index_select skip256 : Elapsed 0.025 ms (2.496 ms / 100) 2.513 -> 2.495 ( -0.72%) [ +0.36% +0.00% +0.16% / +0.20% -0.60% -0.72%] index_select spread : Elapsed 0.025 ms (2.522 ms / 100) 2.516 -> 2.507 ( -0.36%) [ +0.12% +0.08% +0.00% / +0.04% -0.36% -0.36%] index_select strided 3 : Elapsed 0.025 ms (2.519 ms / 100) 2.511 -> 2.505 ( -0.24%) [ +0.16% +0.00% +0.44% / +0.20% -0.24% -0.08%] index_select strided 5 : Elapsed 0.025 ms (2.515 ms / 100) 2.509 -> 2.502 ( -0.28%) [ +0.40% +0.00% +0.12% / +0.32% -0.28% -0.24%] index_select strided 7 : Elapsed 0.025 ms (2.519 ms / 100) 2.514 -> 2.504 ( -0.40%) [ +0.08% +0.00% +0.16% / +0.20% -0.40% -0.16%] index_select strided 8 : Elapsed 0.025 ms (2.516 ms / 100) 2.518 -> 2.500 ( -0.71%) [ +0.04% +0.00% +0.20% / -0.08% -0.68% -0.71%] index_select random : Elapsed 0.025 ms (2.519 ms / 100) 2.512 -> 2.507 ( -0.20%) [ +0.12% +0.00% +0.24% / +0.00% -0.20% -0.16%] index_select random_sorted : Elapsed 0.025 ms (2.515 ms / 100) 2.511 -> 2.510 ( -0.04%) [ +0.00% +0.24% +0.36% / +0.24% -0.04% -0.04%] index_select perm : Elapsed 0.025 ms (2.511 ms / 100) 2.514 -> 2.502 ( -0.48%) [ +0.00% +0.44% +0.00% / -0.12% -0.24% -0.48%] index_select perm_sorted : Elapsed 0.025 ms (2.514 ms / 100) B = [40, 5, 4, 20] (stride (400, 4, 1, 20)) A = [40, 16, 4, 20] (stride (80, 3200, 20, 1)) dim = 1 2.117 -> 2.118 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.43% +0.47%] index_select const : Elapsed 0.021 ms (2.120 ms / 100) 2.181 -> 2.175 ( -0.28%) [ +0.00% +0.09% +0.14% / -0.05% -0.28% -0.28%] index_select wrap : Elapsed 0.022 ms (2.181 ms / 100) 2.188 -> 2.183 ( -0.23%) [ +0.18% +0.00% +0.18% / +0.00% -0.09% -0.23%] index_select linear : Elapsed 0.022 ms (2.192 ms / 100) 2.180 -> 2.179 ( -0.05%) [ +0.18% +0.14% +0.00% / +0.28% -0.05% +0.00%] index_select reverse : Elapsed 0.022 ms (2.184 ms / 100) 2.110 -> 2.116 ( +0.28%) [ +0.00% +0.09% +0.09% / +0.28% +0.52% +0.52%] index_select skip64 : Elapsed 0.021 ms (2.110 ms / 100) 2.116 -> 2.122 ( +0.28%) [ +0.09% +0.00% +0.00% / +0.28% +0.43% +0.52%] index_select skip256 : Elapsed 0.021 ms (2.118 ms / 100) 2.177 -> 2.177 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.00% +0.14% +0.28%] index_select spread : Elapsed 0.022 ms (2.178 ms / 100) 2.181 -> 2.181 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.14% +0.00% +0.46%] index_select strided 3 : Elapsed 0.022 ms (2.183 ms / 100) 2.166 -> 2.169 ( +0.14%) [ +0.00% +0.18% +0.09% / +0.14% +0.51% +0.69%] index_select strided 5 : Elapsed 0.022 ms (2.166 ms / 100) 2.186 -> 2.185 ( -0.05%) [ +0.09% +0.00% +0.00% / -0.05% +0.32% +0.37%] index_select strided 7 : Elapsed 0.022 ms (2.188 ms / 100) 2.122 -> 2.127 ( +0.24%) [ +0.14% +0.05% +0.00% / +0.24% +0.85% +0.66%] index_select strided 8 : Elapsed 0.021 ms (2.125 ms / 100) 2.164 -> 2.166 ( +0.09%) [ +0.18% +0.28% +0.00% / +0.09% +1.34% +1.20%] index_select random : Elapsed 0.022 ms (2.168 ms / 100) 2.164 -> 2.167 ( +0.14%) [ +0.05% +0.00% +0.09% / +0.14% +1.11% +1.43%] index_select random_sorted : Elapsed 0.022 ms (2.165 ms / 100) 2.164 -> 2.158 ( -0.28%) [ +0.00% +0.42% +0.37% / +0.05% -0.28% -0.23%] index_select perm : Elapsed 0.022 ms (2.164 ms / 100) 2.161 -> 2.158 ( -0.14%) [ +0.00% +0.14% +0.05% / +0.09% -0.09% -0.14%] index_select perm_sorted : Elapsed 0.022 ms (2.161 ms / 100) B = [40, 5, 4, 20] (stride (80, 3200, 20, 1)) A = [40, 16, 4, 20] (stride (64, 1, 16, 2560)) dim = 1 2.314 -> 2.321 ( +0.30%) [ +0.48% +0.17% +0.00% / +0.30% +0.73% +0.73%] index_select const : Elapsed 0.023 ms (2.325 ms / 100) 2.321 -> 2.314 ( -0.30%) [ +0.00% +0.30% +0.04% / -0.30% +0.22% +0.09%] index_select wrap : Elapsed 0.023 ms (2.321 ms / 100) 2.318 -> 2.319 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.52% +0.65%] index_select linear : Elapsed 0.023 ms (2.322 ms / 100) 2.320 -> 2.321 ( +0.04%) [ +0.00% +0.17% +0.13% / +0.04% +0.65% +0.43%] index_select reverse : Elapsed 0.023 ms (2.320 ms / 100) 2.315 -> 2.322 ( +0.30%) [ +0.13% +0.26% +0.00% / +0.30% +0.35% +0.35%] index_select skip64 : Elapsed 0.023 ms (2.318 ms / 100) 2.320 -> 2.321 ( +0.04%) [ +0.00% +0.04% +0.13% / +0.04% +0.52% +0.09%] index_select skip256 : Elapsed 0.023 ms (2.320 ms / 100) 2.346 -> 2.345 ( -0.04%) [ +0.04% +0.26% +0.00% / +0.04% -0.04% +0.09%] index_select spread : Elapsed 0.023 ms (2.347 ms / 100) 2.347 -> 2.349 ( +0.09%) [ +0.13% +0.04% +0.00% / +0.09% +0.30% +0.47%] index_select strided 3 : Elapsed 0.024 ms (2.350 ms / 100) 2.352 -> 2.345 ( -0.30%) [ +0.00% +0.17% +0.00% / -0.30% +0.04% +0.21%] index_select strided 5 : Elapsed 0.024 ms (2.352 ms / 100) 2.346 -> 2.343 ( -0.13%) [ +0.00% +0.30% +0.09% / -0.13% +0.13% +0.30%] index_select strided 7 : Elapsed 0.023 ms (2.346 ms / 100) 2.349 -> 2.351 ( +0.09%) [ +0.00% +0.13% +0.04% / +0.17% +0.09% +0.30%] index_select strided 8 : Elapsed 0.023 ms (2.349 ms / 100) 2.343 -> 2.352 ( +0.38%) [ +0.00% +0.00% +0.13% / +0.60% +0.38% +0.47%] index_select random : Elapsed 0.023 ms (2.343 ms / 100) 2.343 -> 2.344 ( +0.04%) [ +0.26% +0.30% +0.00% / +0.04% +0.38% +0.17%] index_select random_sorted : Elapsed 0.023 ms (2.349 ms / 100) 2.347 -> 2.352 ( +0.21%) [ +0.00% +0.17% +0.04% / +0.38% +0.26% +0.21%] index_select perm : Elapsed 0.023 ms (2.347 ms / 100) 2.346 -> 2.346 ( +0.00%) [ +0.00% +0.30% +0.00% / +0.21% +0.13% +0.00%] index_select perm_sorted : Elapsed 0.023 ms (2.346 ms / 100) B = [40, 5, 4, 20] (stride (1, 160, 40, 800)) A = [40, 16, 4, 20] (stride (64, 1, 16, 2560)) dim = 1 2.493 -> 2.488 ( -0.20%) [ +0.08% +0.00% +0.00% / -0.20% +0.32% +0.32%] index_select const : Elapsed 0.025 ms (2.495 ms / 100) 2.484 -> 2.488 ( +0.16%) [ +0.00% +0.28% +0.20% / +0.16% +0.68% +0.72%] index_select wrap : Elapsed 0.025 ms (2.484 ms / 100) 2.491 -> 2.490 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.32% +0.24%] index_select linear : Elapsed 0.025 ms (2.491 ms / 100) 2.482 -> 2.489 ( +0.28%) [ +0.00% +0.52% +0.44% / +0.28% +0.56% +0.81%] index_select reverse : Elapsed 0.025 ms (2.482 ms / 100) 2.487 -> 2.489 ( +0.08%) [ +0.24% +0.00% +0.00% / +0.08% +0.56% +0.72%] index_select skip64 : Elapsed 0.025 ms (2.493 ms / 100) 2.488 -> 2.490 ( +0.08%) [ +0.16% +0.12% +0.00% / +0.08% +0.56% +0.48%] index_select skip256 : Elapsed 0.025 ms (2.492 ms / 100) 2.511 -> 2.510 ( -0.04%) [ +0.00% +0.20% +0.36% / -0.04% +0.64% +0.44%] index_select spread : Elapsed 0.025 ms (2.511 ms / 100) 2.512 -> 2.517 ( +0.20%) [ +0.20% +0.12% +0.00% / +0.20% +0.48% +0.60%] index_select strided 3 : Elapsed 0.025 ms (2.517 ms / 100) 2.515 -> 2.516 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.48% +0.56%] index_select strided 5 : Elapsed 0.025 ms (2.517 ms / 100) 2.509 -> 2.512 ( +0.12%) [ +0.00% +0.20% +0.20% / +0.12% +0.68% +0.84%] index_select strided 7 : Elapsed 0.025 ms (2.509 ms / 100) 2.519 -> 2.517 ( -0.08%) [ +0.00% +0.16% +0.04% / -0.08% +0.36% +0.48%] index_select strided 8 : Elapsed 0.025 ms (2.519 ms / 100) 2.508 -> 2.511 ( +0.12%) [ +0.20% +0.16% +0.00% / +0.12% +0.56% +0.92%] index_select random : Elapsed 0.025 ms (2.513 ms / 100) 2.512 -> 2.511 ( -0.04%) [ +0.00% +0.36% +0.12% / -0.04% +0.60% +0.60%] index_select random_sorted : Elapsed 0.025 ms (2.512 ms / 100) 2.515 -> 2.523 ( +0.32%) [ +0.08% +0.00% +0.16% / +0.32% +0.64% +0.68%] index_select perm : Elapsed 0.025 ms (2.517 ms / 100) 2.512 -> 2.513 ( +0.04%) [ +0.04% +0.00% +0.28% / +0.04% +0.60% +0.56%] index_select perm_sorted : Elapsed 0.025 ms (2.513 ms / 100) out_shape = [40, 16, 5, 20] in_shape = [40, 16, 4, 20] idx_dim = 2 B = [40, 16, 5, 20] (stride (100, 4000, 1, 5)) A = [40, 16, 4, 20] (stride (20, 800, 12800, 1)) dim = 2 6.045 -> 6.030 ( -0.25%) [ +0.08% +0.03% +0.00% / +0.17% -0.23% -0.25%] index_add_ linear : Elapsed 0.060 ms (6.050 ms / 100) 6.045 -> 6.025 ( -0.33%) [ +0.03% +0.00% +0.08% / +0.15% -0.31% -0.33%] index_copy_ linear : Elapsed 0.060 ms (6.047 ms / 100) 6.049 -> 6.027 ( -0.36%) [ +0.07% +0.12% +0.00% / +0.15% -0.36% -0.18%] index_add_ reverse : Elapsed 0.061 ms (6.053 ms / 100) 6.047 -> 6.020 ( -0.45%) [ +0.00% +0.18% +0.00% / +0.07% -0.45% -0.28%] index_copy_ reverse : Elapsed 0.060 ms (6.047 ms / 100) 6.043 -> 6.021 ( -0.36%) [ +0.02% +0.00% +0.07% / -0.12% -0.36% -0.30%] index_add_ spread : Elapsed 0.060 ms (6.044 ms / 100) 6.038 -> 6.016 ( -0.36%) [ +0.00% +0.15% +0.12% / +0.07% -0.35% -0.36%] index_copy_ spread : Elapsed 0.060 ms (6.038 ms / 100) 6.039 -> 6.024 ( -0.25%) [ +0.07% +0.00% +0.05% / +0.07% -0.22% -0.25%] index_add_ strided 3 : Elapsed 0.060 ms (6.043 ms / 100) 6.039 -> 6.011 ( -0.46%) [ +0.13% +0.08% +0.00% / +0.00% -0.46% -0.23%] index_copy_ strided 3 : Elapsed 0.060 ms (6.047 ms / 100) 6.038 -> 6.016 ( -0.36%) [ +0.02% +0.00% +0.05% / -0.03% -0.36% -0.23%] index_add_ perm : Elapsed 0.060 ms (6.039 ms / 100) 6.035 -> 6.014 ( -0.35%) [ +0.10% +0.10% +0.00% / +0.05% -0.31% -0.35%] index_copy_ perm : Elapsed 0.060 ms (6.041 ms / 100) 6.040 -> 6.019 ( -0.35%) [ +0.12% +0.02% +0.00% / +0.02% -0.30% -0.35%] index_add_ perm_sorted : Elapsed 0.060 ms (6.047 ms / 100) 6.035 -> 6.012 ( -0.38%) [ +0.07% +0.00% +0.03% / +0.08% -0.38% -0.23%] index_copy_ perm_sorted : Elapsed 0.060 ms (6.039 ms / 100) 6.225 -> 6.220 ( -0.08%) [ +0.02% +0.00% +0.03% / +0.16% +0.02% -0.08%] index_select const : Elapsed 0.062 ms (6.226 ms / 100) 6.336 -> 6.322 ( -0.22%) [ +0.00% +0.02% +0.03% / +0.13% -0.22% -0.17%] index_select wrap : Elapsed 0.063 ms (6.336 ms / 100) 6.310 -> 6.284 ( -0.41%) [ +0.00% +0.05% +0.10% / +0.16% -0.41% -0.21%] index_select linear : Elapsed 0.063 ms (6.310 ms / 100) 6.337 -> 6.315 ( -0.35%) [ +0.05% +0.00% +0.03% / +0.09% -0.24% -0.35%] index_select reverse : Elapsed 0.063 ms (6.340 ms / 100) 6.206 -> 6.209 ( +0.05%) [ +0.00% +0.18% +0.23% / +0.29% +0.05% +0.13%] index_select skip64 : Elapsed 0.062 ms (6.206 ms / 100) 6.227 -> 6.216 ( -0.18%) [ +0.14% +0.14% +0.00% / +0.00% -0.18% -0.18%] index_select skip256 : Elapsed 0.062 ms (6.236 ms / 100) 6.328 -> 6.310 ( -0.28%) [ +0.08% +0.00% +0.08% / +0.00% -0.17% -0.28%] index_select spread : Elapsed 0.063 ms (6.333 ms / 100) 6.321 -> 6.310 ( -0.17%) [ +0.00% +0.09% +0.13% / +0.14% -0.17% -0.08%] index_select strided 3 : Elapsed 0.063 ms (6.321 ms / 100) 6.275 -> 6.243 ( -0.51%) [ +0.08% +0.05% +0.00% / +0.13% -0.49% -0.51%] index_select random : Elapsed 0.063 ms (6.280 ms / 100) 6.285 -> 6.252 ( -0.53%) [ +0.00% +0.10% +0.02% / +0.06% -0.49% -0.53%] index_select random_sorted : Elapsed 0.063 ms (6.285 ms / 100) B = [40, 16, 5, 20] (stride (100, 4000, 1, 5)) A = [40, 16, 4, 20] (stride (64, 1, 16, 2560)) dim = 2 6.110 -> 6.112 ( +0.03%) [ +0.11% +0.00% +0.33% / +0.20% +0.10% +0.03%] index_add_ linear : Elapsed 0.061 ms (6.117 ms / 100) 6.127 -> 6.115 ( -0.20%) [ +0.00% +0.20% +0.29% / +0.28% -0.20% -0.15%] index_copy_ linear : Elapsed 0.061 ms (6.127 ms / 100) 6.121 -> 6.102 ( -0.31%) [ +0.00% +0.10% +0.13% / +0.08% -0.25% -0.31%] index_add_ reverse : Elapsed 0.061 ms (6.121 ms / 100) 6.133 -> 6.119 ( -0.23%) [ +0.00% +0.02% +0.13% / +0.11% -0.23% -0.20%] index_copy_ reverse : Elapsed 0.061 ms (6.133 ms / 100) 6.115 -> 6.109 ( -0.10%) [ +0.00% +0.13% +0.16% / +0.13% -0.10% -0.10%] index_add_ spread : Elapsed 0.061 ms (6.115 ms / 100) 6.132 -> 6.115 ( -0.28%) [ +0.15% +0.00% +0.15% / +0.13% -0.28% -0.13%] index_copy_ spread : Elapsed 0.061 ms (6.141 ms / 100) 6.118 -> 6.107 ( -0.18%) [ +0.00% +0.10% +0.13% / +0.18% -0.03% -0.18%] index_add_ strided 3 : Elapsed 0.061 ms (6.118 ms / 100) 6.136 -> 6.114 ( -0.36%) [ +0.00% +0.23% +0.15% / +0.08% -0.36% -0.31%] index_copy_ strided 3 : Elapsed 0.061 ms (6.136 ms / 100) 6.113 -> 6.108 ( -0.08%) [ +0.21% +0.00% +0.26% / +0.11% -0.08% -0.05%] index_add_ perm : Elapsed 0.061 ms (6.126 ms / 100) 6.129 -> 6.112 ( -0.28%) [ +0.00% +0.20% +0.13% / +0.21% -0.26% -0.28%] index_copy_ perm : Elapsed 0.061 ms (6.129 ms / 100) 6.117 -> 6.103 ( -0.23%) [ +0.10% +0.00% +0.16% / +0.21% -0.23% -0.20%] index_add_ perm_sorted : Elapsed 0.061 ms (6.123 ms / 100) 6.133 -> 6.110 ( -0.38%) [ +0.00% +0.05% +0.00% / +0.11% -0.38% -0.28%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.133 ms / 100) 6.364 -> 6.348 ( -0.25%) [ +0.03% +0.00% +0.03% / +0.05% -0.25% -0.13%] index_select const : Elapsed 0.064 ms (6.366 ms / 100) 6.466 -> 6.454 ( -0.19%) [ +0.00% +0.15% +0.12% / +0.20% -0.19% -0.19%] index_select wrap : Elapsed 0.065 ms (6.466 ms / 100) 6.453 -> 6.442 ( -0.17%) [ +0.00% +0.02% +0.15% / +0.19% -0.17% -0.15%] index_select linear : Elapsed 0.065 ms (6.453 ms / 100) 6.442 -> 6.422 ( -0.31%) [ +0.05% +0.00% +0.00% / +0.25% -0.31% -0.19%] index_select reverse : Elapsed 0.064 ms (6.445 ms / 100) 6.363 -> 6.350 ( -0.20%) [ +0.00% +0.08% +0.14% / +0.09% -0.20% -0.16%] index_select skip64 : Elapsed 0.064 ms (6.363 ms / 100) 6.362 -> 6.348 ( -0.22%) [ +0.00% +0.14% +0.08% / +0.14% -0.22% -0.14%] index_select skip256 : Elapsed 0.064 ms (6.362 ms / 100) 6.442 -> 6.428 ( -0.22%) [ +0.00% +0.12% +0.17% / +0.28% -0.11% -0.22%] index_select spread : Elapsed 0.064 ms (6.442 ms / 100) 6.469 -> 6.448 ( -0.32%) [ +0.00% +0.00% +0.20% / +0.12% -0.23% -0.32%] index_select strided 3 : Elapsed 0.065 ms (6.469 ms / 100) 6.448 -> 6.432 ( -0.25%) [ +0.14% +0.00% +0.09% / +0.16% -0.23% -0.25%] index_select random : Elapsed 0.065 ms (6.457 ms / 100) 6.425 -> 6.410 ( -0.23%) [ +0.06% +0.00% +0.09% / +0.05% -0.20% -0.23%] index_select random_sorted : Elapsed 0.064 ms (6.429 ms / 100) B = [40, 16, 5, 20] (stride (320, 1, 12800, 16)) A = [40, 16, 4, 20] (stride (20, 800, 12800, 1)) dim = 2 5.852 -> 5.836 ( -0.27%) [ +0.03% +0.31% +0.00% / +0.10% -0.24% -0.27%] index_add_ linear : Elapsed 0.059 ms (5.854 ms / 100) 5.808 -> 5.783 ( -0.43%) [ +0.00% +0.28% +0.09% / +0.10% -0.43% -0.38%] index_copy_ linear : Elapsed 0.058 ms (5.808 ms / 100) 5.848 -> 5.828 ( -0.34%) [ +0.00% +0.07% +0.07% / +0.07% -0.34% -0.32%] index_add_ reverse : Elapsed 0.058 ms (5.848 ms / 100) 5.806 -> 5.784 ( -0.38%) [ +0.00% +0.16% +0.10% / -0.02% -0.36% -0.38%] index_copy_ reverse : Elapsed 0.058 ms (5.806 ms / 100) 5.850 -> 5.828 ( -0.38%) [ +0.17% +0.12% +0.00% / +0.10% -0.38% -0.31%] index_add_ spread : Elapsed 0.059 ms (5.860 ms / 100) 5.792 -> 5.775 ( -0.29%) [ +0.17% +0.17% +0.00% / +0.17% -0.29% -0.21%] index_copy_ spread : Elapsed 0.058 ms (5.802 ms / 100) 5.832 -> 5.829 ( -0.05%) [ +0.07% +0.00% +0.07% / +0.05% -0.02% -0.05%] index_add_ strided 3 : Elapsed 0.058 ms (5.836 ms / 100) 5.779 -> 5.766 ( -0.22%) [ +0.14% +0.00% +0.03% / +0.09% -0.10% -0.22%] index_copy_ strided 3 : Elapsed 0.058 ms (5.787 ms / 100) 5.828 -> 5.823 ( -0.09%) [ +0.00% +0.07% +0.12% / +0.00% -0.09% +0.00%] index_add_ perm : Elapsed 0.058 ms (5.828 ms / 100) 5.774 -> 5.769 ( -0.09%) [ +0.02% +0.28% +0.00% / +0.16% -0.03% -0.09%] index_copy_ perm : Elapsed 0.058 ms (5.775 ms / 100) 5.838 -> 5.818 ( -0.34%) [ +0.03% +0.00% +0.07% / +0.07% -0.27% -0.34%] index_add_ perm_sorted : Elapsed 0.058 ms (5.840 ms / 100) 5.786 -> 5.766 ( -0.35%) [ +0.03% +0.07% +0.00% / +0.16% -0.35% -0.28%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.788 ms / 100) 6.045 -> 6.047 ( +0.03%) [ +0.00% +0.07% +0.00% / +0.22% +0.07% +0.03%] index_select const : Elapsed 0.060 ms (6.045 ms / 100) 6.172 -> 6.142 ( -0.49%) [ +0.00% +0.02% +0.16% / +0.08% -0.44% -0.49%] index_select wrap : Elapsed 0.062 ms (6.172 ms / 100) 6.147 -> 6.120 ( -0.44%) [ +0.02% +0.00% +0.10% / +0.15% -0.28% -0.44%] index_select linear : Elapsed 0.061 ms (6.148 ms / 100) 6.165 -> 6.141 ( -0.39%) [ +0.08% +0.00% +0.10% / +0.16% -0.36% -0.39%] index_select reverse : Elapsed 0.062 ms (6.170 ms / 100) 6.027 -> 6.035 ( +0.13%) [ +0.13% +0.00% +0.28% / +0.17% +0.13% +0.22%] index_select skip64 : Elapsed 0.060 ms (6.035 ms / 100) 6.049 -> 6.043 ( -0.10%) [ +0.00% +0.03% +0.02% / +0.12% -0.10% +0.00%] index_select skip256 : Elapsed 0.060 ms (6.049 ms / 100) 6.166 -> 6.130 ( -0.58%) [ +0.08% +0.00% +0.00% / +0.13% -0.58% -0.45%] index_select spread : Elapsed 0.062 ms (6.171 ms / 100) 6.162 -> 6.134 ( -0.45%) [ +0.06% +0.00% +0.06% / +0.06% -0.45% -0.44%] index_select strided 3 : Elapsed 0.062 ms (6.166 ms / 100) 6.152 -> 6.128 ( -0.39%) [ +0.05% +0.00% +0.05% / +0.10% -0.39% -0.33%] index_select random : Elapsed 0.062 ms (6.155 ms / 100) 6.170 -> 6.151 ( -0.31%) [ +0.00% +0.21% +0.06% / +0.21% -0.29% -0.31%] index_select random_sorted : Elapsed 0.062 ms (6.170 ms / 100) B = [40, 16, 5, 20] (stride (20, 800, 12800, 1)) A = [40, 16, 4, 20] (stride (64, 1, 16, 2560)) dim = 2 5.864 -> 5.868 ( +0.07%) [ +0.00% +0.07% +0.12% / +0.24% +0.22% +0.07%] index_add_ linear : Elapsed 0.059 ms (5.864 ms / 100) 5.806 -> 5.808 ( +0.03%) [ +0.00% +0.22% +0.19% / +0.12% +0.22% +0.03%] index_copy_ linear : Elapsed 0.058 ms (5.806 ms / 100) 5.861 -> 5.869 ( +0.14%) [ +0.00% +0.09% +0.10% / +0.14% +0.29% +0.19%] index_add_ reverse : Elapsed 0.059 ms (5.861 ms / 100) 5.809 -> 5.802 ( -0.12%) [ +0.14% +0.02% +0.00% / +0.03% +0.12% -0.12%] index_copy_ reverse : Elapsed 0.058 ms (5.817 ms / 100) 5.870 -> 5.868 ( -0.03%) [ +0.00% +0.03% +0.12% / +0.03% +0.02% -0.03%] index_add_ spread : Elapsed 0.059 ms (5.870 ms / 100) 5.812 -> 5.804 ( -0.14%) [ +0.00% +0.03% +0.07% / +0.03% -0.14% +0.07%] index_copy_ spread : Elapsed 0.058 ms (5.812 ms / 100) 5.865 -> 5.878 ( +0.22%) [ +0.05% +0.00% +0.14% / +0.22% +0.36% +0.43%] index_add_ strided 3 : Elapsed 0.059 ms (5.868 ms / 100) 5.800 -> 5.816 ( +0.28%) [ +0.16% +0.00% +0.26% / +0.28% +0.53% +0.41%] index_copy_ strided 3 : Elapsed 0.058 ms (5.809 ms / 100) 5.868 -> 5.883 ( +0.26%) [ +0.00% +0.00% +0.15% / +0.32% +0.36% +0.26%] index_add_ perm : Elapsed 0.059 ms (5.868 ms / 100) 5.826 -> 5.820 ( -0.10%) [ +0.03% +0.00% +0.05% / +0.10% -0.10% +0.10%] index_copy_ perm : Elapsed 0.058 ms (5.828 ms / 100) 5.872 -> 5.883 ( +0.19%) [ +0.00% +0.09% +0.03% / +0.26% +0.19% +0.24%] index_add_ perm_sorted : Elapsed 0.059 ms (5.872 ms / 100) 5.816 -> 5.824 ( +0.14%) [ +0.00% +0.05% +0.22% / +0.26% +0.14% +0.15%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.816 ms / 100) 6.113 -> 6.124 ( +0.18%) [ +0.21% +0.00% +0.28% / +0.36% +0.18% +0.18%] index_select const : Elapsed 0.061 ms (6.126 ms / 100) 6.193 -> 6.204 ( +0.18%) [ +0.11% +0.00% +0.11% / +0.18% +0.24% +0.34%] index_select wrap : Elapsed 0.062 ms (6.200 ms / 100) 6.176 -> 6.187 ( +0.18%) [ +0.24% +0.00% +0.21% / +0.44% +0.18% +0.36%] index_select linear : Elapsed 0.062 ms (6.191 ms / 100) 6.185 -> 6.185 ( +0.00%) [ +0.00% +0.03% +0.23% / +0.00% +0.11% +0.08%] index_select reverse : Elapsed 0.062 ms (6.185 ms / 100) 6.118 -> 6.122 ( +0.07%) [ +0.08% +0.00% +0.20% / +0.11% +0.20% +0.07%] index_select skip64 : Elapsed 0.061 ms (6.123 ms / 100) 6.117 -> 6.126 ( +0.15%) [ +0.00% +0.08% +0.16% / +0.20% +0.15% +0.23%] index_select skip256 : Elapsed 0.061 ms (6.117 ms / 100) 6.174 -> 6.190 ( +0.26%) [ +0.02% +0.00% +0.28% / +0.26% +0.32% +0.34%] index_select spread : Elapsed 0.062 ms (6.175 ms / 100) 6.191 -> 6.201 ( +0.16%) [ +0.00% +0.11% +0.31% / +0.37% +0.19% +0.16%] index_select strided 3 : Elapsed 0.062 ms (6.191 ms / 100) 6.165 -> 6.180 ( +0.24%) [ +0.02% +0.00% +0.21% / +0.26% +0.28% +0.24%] index_select random : Elapsed 0.062 ms (6.166 ms / 100) 6.161 -> 6.163 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.21% +0.24%] index_select random_sorted : Elapsed 0.062 ms (6.161 ms / 100) B = [40, 16, 5, 20] (stride (16, 1, 12800, 640)) A = [40, 16, 4, 20] (stride (4, 160, 1, 2560)) dim = 2 5.751 -> 5.708 ( -0.75%) [ +0.05% +0.00% +0.12% / +0.09% -0.66% -0.75%] index_add_ linear : Elapsed 0.058 ms (5.754 ms / 100) 5.691 -> 5.656 ( -0.62%) [ +0.00% +0.11% +0.04% / +0.16% -0.62% -0.47%] index_copy_ linear : Elapsed 0.057 ms (5.691 ms / 100) 5.752 -> 5.715 ( -0.64%) [ +0.05% +0.02% +0.00% / +0.03% -0.59% -0.64%] index_add_ reverse : Elapsed 0.058 ms (5.755 ms / 100) 5.690 -> 5.656 ( -0.60%) [ +0.07% +0.00% +0.00% / +0.07% -0.60% -0.54%] index_copy_ reverse : Elapsed 0.057 ms (5.694 ms / 100) 5.757 -> 5.713 ( -0.76%) [ +0.10% +0.00% +0.07% / +0.10% -0.73% -0.76%] index_add_ spread : Elapsed 0.058 ms (5.763 ms / 100) 5.690 -> 5.650 ( -0.70%) [ +0.14% +0.00% +0.04% / +0.11% -0.70% -0.49%] index_copy_ spread : Elapsed 0.057 ms (5.698 ms / 100) 5.729 -> 5.713 ( -0.28%) [ +0.07% +0.00% +0.09% / +0.05% -0.07% -0.28%] index_add_ strided 3 : Elapsed 0.057 ms (5.733 ms / 100) 5.668 -> 5.661 ( -0.12%) [ +0.00% +0.16% +0.26% / +0.16% -0.12% -0.12%] index_copy_ strided 3 : Elapsed 0.057 ms (5.668 ms / 100) 5.754 -> 5.708 ( -0.80%) [ +0.00% +0.03% +0.00% / -0.05% -0.73% -0.80%] index_add_ perm : Elapsed 0.058 ms (5.754 ms / 100) 5.685 -> 5.651 ( -0.60%) [ +0.00% +0.02% +0.14% / +0.12% -0.40% -0.60%] index_copy_ perm : Elapsed 0.057 ms (5.685 ms / 100) 5.758 -> 5.709 ( -0.85%) [ +0.00% +0.12% +0.03% / +0.12% -0.85% -0.82%] index_add_ perm_sorted : Elapsed 0.058 ms (5.758 ms / 100) 5.694 -> 5.656 ( -0.67%) [ +0.00% +0.05% +0.05% / +0.07% -0.67% -0.63%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.694 ms / 100) 6.048 -> 6.007 ( -0.68%) [ +0.00% +0.02% +0.07% / +0.12% -0.68% -0.60%] index_select const : Elapsed 0.060 ms (6.048 ms / 100) 6.048 -> 6.009 ( -0.64%) [ +0.05% +0.00% +0.05% / +0.05% -0.64% -0.45%] index_select wrap : Elapsed 0.061 ms (6.051 ms / 100) 6.048 -> 6.011 ( -0.61%) [ +0.10% +0.13% +0.00% / +0.07% -0.61% -0.61%] index_select linear : Elapsed 0.061 ms (6.054 ms / 100) 6.041 -> 6.001 ( -0.66%) [ +0.10% +0.00% +0.25% / +0.18% -0.36% -0.66%] index_select reverse : Elapsed 0.060 ms (6.047 ms / 100) 6.045 -> 6.001 ( -0.73%) [ +0.05% +0.00% +0.13% / +0.17% -0.73% -0.55%] index_select skip64 : Elapsed 0.060 ms (6.048 ms / 100) 6.050 -> 6.007 ( -0.71%) [ +0.00% +0.10% +0.17% / +0.07% -0.71% -0.68%] index_select skip256 : Elapsed 0.061 ms (6.050 ms / 100) 6.047 -> 6.011 ( -0.60%) [ +0.05% +0.00% +0.18% / +0.07% -0.23% -0.60%] index_select spread : Elapsed 0.061 ms (6.050 ms / 100) 6.049 -> 6.007 ( -0.69%) [ +0.10% +0.02% +0.00% / +0.02% -0.66% -0.69%] index_select strided 3 : Elapsed 0.061 ms (6.055 ms / 100) 6.047 -> 6.011 ( -0.60%) [ +0.15% +0.00% +0.08% / +0.10% -0.60% -0.56%] index_select random : Elapsed 0.061 ms (6.056 ms / 100) 6.045 -> 6.005 ( -0.66%) [ +0.08% +0.05% +0.00% / +0.13% -0.43% -0.66%] index_select random_sorted : Elapsed 0.060 ms (6.050 ms / 100) B = [40, 16, 5, 20] (stride (16, 1, 12800, 640)) A = [40, 16, 4, 20] (stride (1, 40, 640, 2560)) dim = 2 5.599 -> 5.605 ( +0.11%) [ +0.05% +0.00% +0.27% / +0.23% +0.11% +0.23%] index_add_ linear : Elapsed 0.056 ms (5.602 ms / 100) 5.547 -> 5.551 ( +0.07%) [ +0.00% +0.07% +0.22% / +0.07% +0.16% +0.20%] index_copy_ linear : Elapsed 0.055 ms (5.547 ms / 100) 5.602 -> 5.610 ( +0.14%) [ +0.00% +0.00% +0.16% / +0.14% +0.34% +0.32%] index_add_ reverse : Elapsed 0.056 ms (5.602 ms / 100) 5.547 -> 5.555 ( +0.14%) [ +0.02% +0.00% +0.22% / +0.14% +0.38% +0.29%] index_copy_ reverse : Elapsed 0.055 ms (5.548 ms / 100) 5.602 -> 5.611 ( +0.16%) [ +0.05% +0.00% +0.37% / +0.18% +0.23% +0.16%] index_add_ spread : Elapsed 0.056 ms (5.605 ms / 100) 5.541 -> 5.554 ( +0.23%) [ +0.00% +0.02% +0.22% / +0.29% +0.40% +0.23%] index_copy_ spread : Elapsed 0.055 ms (5.541 ms / 100) 5.613 -> 5.613 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.37% +0.43%] index_add_ strided 3 : Elapsed 0.056 ms (5.613 ms / 100) 5.551 -> 5.551 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.49% +0.58%] index_copy_ strided 3 : Elapsed 0.056 ms (5.551 ms / 100) 5.596 -> 5.614 ( +0.32%) [ +0.00% +0.20% +0.39% / +0.32% +0.70% +0.54%] index_add_ perm : Elapsed 0.056 ms (5.596 ms / 100) 5.547 -> 5.553 ( +0.11%) [ +0.00% +0.00% +0.27% / +0.11% +0.59% +0.49%] index_copy_ perm : Elapsed 0.055 ms (5.547 ms / 100) 5.610 -> 5.620 ( +0.18%) [ +0.00% +0.11% +0.27% / +0.23% +0.53% +0.18%] index_add_ perm_sorted : Elapsed 0.056 ms (5.610 ms / 100) 5.557 -> 5.558 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.02% +0.36% +0.22%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.559 ms / 100) 5.851 -> 5.835 ( -0.27%) [ +0.05% +0.00% +0.15% / +0.27% -0.27% -0.26%] index_select const : Elapsed 0.059 ms (5.854 ms / 100) 5.886 -> 5.892 ( +0.10%) [ +0.00% +0.07% +0.12% / +0.10% +0.25% +0.17%] index_select wrap : Elapsed 0.059 ms (5.886 ms / 100) 5.871 -> 5.890 ( +0.32%) [ +0.00% +0.10% +0.31% / +0.39% +0.32% +0.55%] index_select linear : Elapsed 0.059 ms (5.871 ms / 100) 5.883 -> 5.884 ( +0.02%) [ +0.10% +0.05% +0.00% / +0.02% +0.42% +0.34%] index_select reverse : Elapsed 0.059 ms (5.889 ms / 100) 5.855 -> 5.835 ( -0.34%) [ +0.03% +0.00% +0.29% / +0.19% -0.34% -0.27%] index_select skip64 : Elapsed 0.059 ms (5.857 ms / 100) 5.858 -> 5.833 ( -0.43%) [ +0.00% +0.05% +0.29% / +0.10% -0.43% -0.38%] index_select skip256 : Elapsed 0.059 ms (5.858 ms / 100) 5.879 -> 5.892 ( +0.22%) [ +0.07% +0.00% +0.22% / +0.22% +0.39% +0.32%] index_select spread : Elapsed 0.059 ms (5.883 ms / 100) 5.893 -> 5.891 ( -0.03%) [ +0.07% +0.00% +0.22% / +0.19% +0.25% -0.03%] index_select strided 3 : Elapsed 0.059 ms (5.897 ms / 100) 5.863 -> 5.863 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.97% +0.78%] index_select random : Elapsed 0.059 ms (5.863 ms / 100) 5.863 -> 5.877 ( +0.24%) [ +0.05% +0.00% +0.17% / +0.24% +0.82% +0.77%] index_select random_sorted : Elapsed 0.059 ms (5.866 ms / 100) B = [40, 16, 5, 20] (stride (5, 200, 1, 3200)) A = [40, 16, 4, 20] (stride (1280, 80, 20, 1)) dim = 2 5.857 -> 5.848 ( -0.15%) [ +0.00% +0.02% +0.05% / +0.15% -0.12% -0.15%] index_add_ linear : Elapsed 0.059 ms (5.857 ms / 100) 5.818 -> 5.802 ( -0.28%) [ +0.00% +0.00% +0.09% / +0.12% -0.21% -0.28%] index_copy_ linear : Elapsed 0.058 ms (5.818 ms / 100) 5.855 -> 5.852 ( -0.05%) [ +0.02% +0.00% +0.03% / +0.05% -0.05% -0.03%] index_add_ reverse : Elapsed 0.059 ms (5.856 ms / 100) 5.816 -> 5.806 ( -0.17%) [ +0.05% +0.00% +0.14% / +0.10% -0.17% -0.14%] index_copy_ reverse : Elapsed 0.058 ms (5.819 ms / 100) 5.849 -> 5.833 ( -0.27%) [ +0.00% +0.03% +0.10% / +0.14% -0.27% -0.15%] index_add_ spread : Elapsed 0.058 ms (5.849 ms / 100) 5.806 -> 5.797 ( -0.16%) [ +0.00% +0.10% +0.16% / +0.16% -0.14% -0.16%] index_copy_ spread : Elapsed 0.058 ms (5.806 ms / 100) 5.846 -> 5.839 ( -0.12%) [ +0.00% +0.09% +0.10% / +0.07% -0.05% -0.12%] index_add_ strided 3 : Elapsed 0.058 ms (5.846 ms / 100) 5.810 -> 5.797 ( -0.22%) [ +0.00% +0.17% +0.07% / +0.14% -0.15% -0.22%] index_copy_ strided 3 : Elapsed 0.058 ms (5.810 ms / 100) 5.850 -> 5.838 ( -0.21%) [ +0.02% +0.00% +0.17% / +0.07% -0.19% -0.21%] index_add_ perm : Elapsed 0.059 ms (5.851 ms / 100) 5.812 -> 5.796 ( -0.28%) [ +0.00% +0.03% +0.07% / +0.05% -0.28% -0.26%] index_copy_ perm : Elapsed 0.058 ms (5.812 ms / 100) 5.848 -> 5.840 ( -0.14%) [ +0.07% +0.00% +0.05% / +0.14% -0.14% -0.09%] index_add_ perm_sorted : Elapsed 0.059 ms (5.852 ms / 100) 5.800 -> 5.792 ( -0.14%) [ +0.29% +0.00% +0.10% / +0.21% +0.09% -0.14%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.817 ms / 100) 6.003 -> 5.976 ( -0.45%) [ +0.00% +0.03% +0.02% / -0.08% -0.45% -0.20%] index_select const : Elapsed 0.060 ms (6.003 ms / 100) 6.054 -> 6.038 ( -0.26%) [ +0.23% +0.08% +0.00% / +0.07% -0.23% -0.26%] index_select wrap : Elapsed 0.061 ms (6.068 ms / 100) 6.035 -> 6.015 ( -0.33%) [ +0.12% +0.00% +0.13% / +0.13% -0.33% -0.31%] index_select linear : Elapsed 0.060 ms (6.042 ms / 100) 6.046 -> 6.019 ( -0.45%) [ +0.03% +0.03% +0.00% / +0.12% -0.25% -0.45%] index_select reverse : Elapsed 0.060 ms (6.048 ms / 100) 5.984 -> 5.965 ( -0.32%) [ +0.15% +0.00% +0.07% / +0.13% -0.32% -0.27%] index_select skip64 : Elapsed 0.060 ms (5.993 ms / 100) 6.001 -> 5.985 ( -0.27%) [ +0.00% +0.12% +0.07% / -0.02% -0.27% -0.23%] index_select skip256 : Elapsed 0.060 ms (6.001 ms / 100) 6.050 -> 6.039 ( -0.18%) [ +0.03% +0.00% +0.03% / +0.13% -0.17% -0.18%] index_select spread : Elapsed 0.061 ms (6.052 ms / 100) 6.040 -> 6.025 ( -0.25%) [ +0.08% +0.00% +0.07% / +0.13% -0.25% -0.15%] index_select strided 3 : Elapsed 0.060 ms (6.045 ms / 100) 6.016 -> 6.000 ( -0.27%) [ +0.08% +0.00% +0.08% / +0.15% -0.22% -0.27%] index_select random : Elapsed 0.060 ms (6.021 ms / 100) 6.045 -> 6.031 ( -0.23%) [ +0.00% +0.07% +0.20% / +0.18% -0.23% +0.40%] index_select random_sorted : Elapsed 0.060 ms (6.045 ms / 100) out_shape = [40, 16, 4, 5] in_shape = [40, 16, 4, 20] idx_dim = 3 B = [40, 16, 4, 5] (stride (320, 1, 80, 16)) A = [40, 16, 4, 20] (stride (1, 40, 640, 2560)) dim = 3 1.896 -> 1.898 ( +0.11%) [ +0.21% +0.00% +0.11% / +0.11% +0.37% +0.32%] index_select const : Elapsed 0.019 ms (1.900 ms / 100) 1.884 -> 1.885 ( +0.05%) [ +0.00% +0.05% +0.11% / +0.16% +0.05% +0.16%] index_select wrap : Elapsed 0.019 ms (1.884 ms / 100) 1.877 -> 1.877 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.00% +0.32% +0.59%] index_select linear : Elapsed 0.019 ms (1.879 ms / 100) 1.884 -> 1.886 ( +0.11%) [ +0.05% +0.00% +0.16% / +0.11% +0.32% +0.27%] index_select reverse : Elapsed 0.019 ms (1.885 ms / 100) 1.892 -> 1.894 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.21% +0.21% +0.11%] index_select skip64 : Elapsed 0.019 ms (1.894 ms / 100) 1.894 -> 1.895 ( +0.05%) [ +0.00% +0.11% +0.16% / +0.11% +0.05% +0.16%] index_select skip256 : Elapsed 0.019 ms (1.894 ms / 100) 1.893 -> 1.896 ( +0.16%) [ +0.05% +0.00% +0.05% / +0.16% +0.21% +0.16%] index_select spread : Elapsed 0.019 ms (1.894 ms / 100) 1.882 -> 1.886 ( +0.21%) [ +0.00% +0.00% +0.11% / +0.21% +0.53% +0.48%] index_select strided 3 : Elapsed 0.019 ms (1.882 ms / 100) 1.883 -> 1.883 ( +0.00%) [ +0.16% +0.05% +0.00% / +0.00% +0.32% +0.11%] index_select strided 5 : Elapsed 0.019 ms (1.886 ms / 100) 1.882 -> 1.881 ( -0.05%) [ +0.11% +0.00% +0.11% / -0.05% +0.00% +0.16%] index_select strided 7 : Elapsed 0.019 ms (1.884 ms / 100) 1.892 -> 1.892 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.00% +0.00% +0.11%] index_select strided 8 : Elapsed 0.019 ms (1.892 ms / 100) 1.906 -> 1.902 ( -0.21%) [ +0.00% +0.00% +0.00% / -0.05% -0.21% -0.21%] index_select strided 16 : Elapsed 0.019 ms (1.906 ms / 100) 1.897 -> 1.897 ( +0.00%) [ +0.05% +0.00% +0.11% / +0.00% +0.32% +0.16%] index_select random : Elapsed 0.019 ms (1.898 ms / 100) 1.878 -> 1.880 ( +0.11%) [ +0.37% +0.16% +0.00% / +0.11% +0.64% +0.75%] index_select random_sorted : Elapsed 0.019 ms (1.885 ms / 100) 1.876 -> 1.876 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.59% +0.53%] index_select perm : Elapsed 0.019 ms (1.876 ms / 100) 1.892 -> 1.890 ( -0.11%) [ +0.11% +0.11% +0.00% / -0.11% +0.11% +0.05%] index_select perm_sorted : Elapsed 0.019 ms (1.894 ms / 100) B = [40, 16, 4, 5] (stride (320, 4, 1, 64)) A = [40, 16, 4, 20] (stride (4, 3200, 1, 160)) dim = 3 1.708 -> 1.703 ( -0.29%) [ +0.00% +0.35% +0.00% / +0.06% -0.29% -0.12%] index_select const : Elapsed 0.017 ms (1.708 ms / 100) 1.716 -> 1.698 ( -1.05%) [ +0.12% +0.00% +0.00% / +0.29% -1.05% -1.05%] index_select wrap : Elapsed 0.017 ms (1.718 ms / 100) 1.716 -> 1.696 ( -1.17%) [ +0.00% +0.00% +0.00% / -0.06% -1.17% -1.11%] index_select linear : Elapsed 0.017 ms (1.716 ms / 100) 1.717 -> 1.700 ( -0.99%) [ +0.12% +0.00% +0.00% / +0.00% -0.87% -0.99%] index_select reverse : Elapsed 0.017 ms (1.719 ms / 100) 1.704 -> 1.705 ( +0.06%) [ +0.06% +0.12% +0.00% / +0.06% +0.29% +0.12%] index_select skip64 : Elapsed 0.017 ms (1.705 ms / 100) 1.704 -> 1.705 ( +0.06%) [ +0.18% +0.29% +0.00% / +0.12% +0.18% +0.06%] index_select skip256 : Elapsed 0.017 ms (1.707 ms / 100) 1.728 -> 1.728 ( +0.00%) [ +0.12% +0.00% +0.06% / +0.29% +0.00% +0.06%] index_select spread : Elapsed 0.017 ms (1.730 ms / 100) 1.717 -> 1.714 ( -0.17%) [ +0.06% +0.06% +0.00% / -0.06% +0.12% -0.17%] index_select strided 3 : Elapsed 0.017 ms (1.718 ms / 100) 1.704 -> 1.693 ( -0.65%) [ +0.12% +0.00% +0.35% / +0.23% -0.47% -0.65%] index_select strided 5 : Elapsed 0.017 ms (1.706 ms / 100) 1.714 -> 1.714 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.06% +0.12%] index_select strided 7 : Elapsed 0.017 ms (1.714 ms / 100) 1.727 -> 1.726 ( -0.06%) [ +0.23% +0.12% +0.00% / +0.23% +0.12% -0.06%] index_select strided 8 : Elapsed 0.017 ms (1.731 ms / 100) 1.728 -> 1.730 ( +0.12%) [ +0.00% +0.12% +0.23% / +0.12% +0.75% +0.69%] index_select strided 16 : Elapsed 0.017 ms (1.728 ms / 100) 1.716 -> 1.718 ( +0.12%) [ +0.12% +0.17% +0.00% / +0.12% +0.23% +0.52%] index_select random : Elapsed 0.017 ms (1.718 ms / 100) 1.715 -> 1.716 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.64% +0.70%] index_select random_sorted : Elapsed 0.017 ms (1.717 ms / 100) 1.716 -> 1.714 ( -0.12%) [ +0.00% +0.12% +0.00% / +0.06% -0.06% -0.12%] index_select perm : Elapsed 0.017 ms (1.716 ms / 100) 1.715 -> 1.715 ( +0.00%) [ +0.06% +0.17% +0.00% / +0.12% +0.00% +0.00%] index_select perm_sorted : Elapsed 0.017 ms (1.716 ms / 100) B = [40, 16, 4, 5] (stride (20, 800, 1, 4)) A = [40, 16, 4, 20] (stride (320, 20, 12800, 1)) dim = 3 1.761 -> 1.762 ( +0.06%) [ +0.06% +0.17% +0.00% / +0.06% +0.17% +0.28%] index_select const : Elapsed 0.018 ms (1.762 ms / 100) 1.770 -> 1.768 ( -0.11%) [ +0.11% +0.34% +0.00% / +0.11% +0.23% -0.11%] index_select wrap : Elapsed 0.018 ms (1.772 ms / 100) 1.767 -> 1.767 ( +0.00%) [ +0.17% +0.11% +0.00% / +0.00% +0.40% +5.04%] index_select linear : Elapsed 0.018 ms (1.770 ms / 100) 1.769 -> 1.774 ( +0.28%) [ +0.34% +0.23% +0.00% / +0.28% +0.34% +0.34%] index_select reverse : Elapsed 0.018 ms (1.775 ms / 100) 1.758 -> 1.759 ( +0.06%) [ +0.00% +0.11% +0.00% / +0.06% +0.34% +0.17%] index_select skip64 : Elapsed 0.018 ms (1.758 ms / 100) 1.756 -> 1.755 ( -0.06%) [ +0.00% +0.06% +0.06% / -0.06% +0.51% +0.23%] index_select skip256 : Elapsed 0.018 ms (1.756 ms / 100) 1.793 -> 1.795 ( +0.11%) [ +0.28% +0.00% +0.00% / +0.11% +0.17% +0.33%] index_select spread : Elapsed 0.018 ms (1.798 ms / 100) 1.789 -> 1.790 ( +0.06%) [ +0.22% +0.28% +0.00% / +0.06% +0.56% +0.78%] index_select strided 3 : Elapsed 0.018 ms (1.793 ms / 100) 1.795 -> 1.795 ( +0.00%) [ +0.00% +0.11% +0.17% / +0.00% +0.33% +0.56%] index_select strided 5 : Elapsed 0.018 ms (1.795 ms / 100) 1.780 -> 1.780 ( +0.00%) [ +0.28% +0.22% +0.00% / +0.00% +0.62% +4.21%] index_select strided 7 : Elapsed 0.018 ms (1.785 ms / 100) 1.783 -> 1.783 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.50% +0.28%] index_select strided 8 : Elapsed 0.018 ms (1.784 ms / 100) 1.794 -> 1.795 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.45% +0.22%] index_select strided 16 : Elapsed 0.018 ms (1.794 ms / 100) 1.785 -> 1.787 ( +0.11%) [ +0.06% +0.06% +0.00% / +0.11% +0.34% +0.17%] index_select random : Elapsed 0.018 ms (1.786 ms / 100) 1.793 -> 1.793 ( +0.00%) [ +0.56% +0.06% +0.00% / +0.00% +0.39% +0.39%] index_select random_sorted : Elapsed 0.018 ms (1.803 ms / 100) 1.780 -> 1.784 ( +0.22%) [ +0.06% +0.00% +0.06% / +0.22% +0.56% +0.51%] index_select perm : Elapsed 0.018 ms (1.781 ms / 100) 1.786 -> 1.787 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.22% +0.28%] index_select perm_sorted : Elapsed 0.018 ms (1.786 ms / 100) B = [40, 16, 4, 5] (stride (64, 1, 16, 2560)) A = [40, 16, 4, 20] (stride (1, 40, 640, 2560)) dim = 3 1.891 -> 1.896 ( +0.26%) [ +0.11% +0.37% +0.00% / +0.37% +0.26% +0.32%] index_select const : Elapsed 0.019 ms (1.893 ms / 100) 1.891 -> 1.893 ( +0.11%) [ +0.21% +0.16% +0.00% / +0.11% +0.21% +0.32%] index_select wrap : Elapsed 0.019 ms (1.895 ms / 100) 1.888 -> 1.887 ( -0.05%) [ +0.00% +0.16% +0.16% / -0.05% +0.42% +0.32%] index_select linear : Elapsed 0.019 ms (1.888 ms / 100) 1.891 -> 1.893 ( +0.11%) [ +0.26% +0.16% +0.00% / +0.16% +0.32% +0.11%] index_select reverse : Elapsed 0.019 ms (1.896 ms / 100) 1.891 -> 1.893 ( +0.11%) [ +0.16% +0.11% +0.00% / +0.11% +0.32% +0.42%] index_select skip64 : Elapsed 0.019 ms (1.894 ms / 100) 1.889 -> 1.889 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.26% +0.26%] index_select skip256 : Elapsed 0.019 ms (1.890 ms / 100) 1.889 -> 1.889 ( +0.00%) [ +0.00% +0.26% +0.00% / +0.00% +0.42% +0.21%] index_select spread : Elapsed 0.019 ms (1.889 ms / 100) 1.890 -> 1.894 ( +0.21%) [ +0.00% +0.05% +0.05% / +0.21% +0.42% +0.48%] index_select strided 3 : Elapsed 0.019 ms (1.890 ms / 100) 1.876 -> 1.875 ( -0.05%) [ +0.00% +0.00% +0.11% / -0.05% +0.69% +0.69%] index_select strided 5 : Elapsed 0.019 ms (1.876 ms / 100) 1.874 -> 1.873 ( -0.05%) [ +0.00% +0.00% +0.11% / -0.05% +0.59% +0.80%] index_select strided 7 : Elapsed 0.019 ms (1.874 ms / 100) 1.886 -> 1.888 ( +0.11%) [ +0.05% +0.05% +0.00% / +0.11% +0.69% +0.58%] index_select strided 8 : Elapsed 0.019 ms (1.887 ms / 100) 1.889 -> 1.890 ( +0.05%) [ +0.00% +0.11% +0.00% / +0.05% +0.37% +0.16%] index_select strided 16 : Elapsed 0.019 ms (1.889 ms / 100) 1.882 -> 1.888 ( +0.32%) [ +0.16% +0.00% +0.11% / +0.32% +0.69% +0.80%] index_select random : Elapsed 0.019 ms (1.885 ms / 100) 1.883 -> 1.886 ( +0.16%) [ +0.21% +0.16% +0.00% / +0.16% +0.58% +0.42%] index_select random_sorted : Elapsed 0.019 ms (1.887 ms / 100) 1.887 -> 1.887 ( +0.00%) [ +0.26% +0.00% +0.11% / +0.00% +0.42% +0.48%] index_select perm : Elapsed 0.019 ms (1.892 ms / 100) 1.888 -> 1.888 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.05% +0.16% +0.00%] index_select perm_sorted : Elapsed 0.019 ms (1.889 ms / 100) B = [40, 16, 4, 5] (stride (16, 1, 640, 2560)) A = [40, 16, 4, 20] (stride (1, 800, 12800, 40)) dim = 3 1.778 -> 1.779 ( +0.06%) [ +0.39% +0.00% +0.06% / +0.06% +0.39% +0.45%] index_select const : Elapsed 0.018 ms (1.785 ms / 100) 1.789 -> 1.791 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.45% +0.50%] index_select wrap : Elapsed 0.018 ms (1.791 ms / 100) 1.790 -> 1.791 ( +0.06%) [ +0.00% +0.22% +0.11% / +0.06% +0.61% +0.67%] index_select linear : Elapsed 0.018 ms (1.790 ms / 100) 1.788 -> 1.795 ( +0.39%) [ +0.17% +0.00% +0.11% / +0.39% +0.73% +0.73%] index_select reverse : Elapsed 0.018 ms (1.791 ms / 100) 1.778 -> 1.782 ( +0.22%) [ +0.17% +0.28% +0.00% / +0.22% +0.56% +0.56%] index_select skip64 : Elapsed 0.018 ms (1.781 ms / 100) 1.780 -> 1.782 ( +0.11%) [ +0.22% +0.00% +0.17% / +0.11% +0.17% +0.17%] index_select skip256 : Elapsed 0.018 ms (1.784 ms / 100) 1.794 -> 1.799 ( +0.28%) [ +0.22% +0.00% +0.17% / +0.28% +0.28% +0.45%] index_select spread : Elapsed 0.018 ms (1.798 ms / 100) 1.785 -> 1.787 ( +0.11%) [ +0.17% +0.00% +0.34% / +0.11% +0.34% +0.39%] index_select strided 3 : Elapsed 0.018 ms (1.788 ms / 100) 1.774 -> 1.776 ( +0.11%) [ +0.00% +0.11% +0.06% / +0.11% +0.39% +0.39%] index_select strided 5 : Elapsed 0.018 ms (1.774 ms / 100) 1.787 -> 1.784 ( -0.17%) [ +0.22% +0.17% +0.00% / -0.17% +0.34% +0.28%] index_select strided 7 : Elapsed 0.018 ms (1.791 ms / 100) 1.795 -> 1.796 ( +0.06%) [ +0.00% +0.17% +0.17% / +0.06% +0.28% +0.39%] index_select strided 8 : Elapsed 0.018 ms (1.795 ms / 100) 1.784 -> 1.787 ( +0.17%) [ +0.00% +0.00% +0.06% / +0.17% +0.39% +0.78%] index_select strided 16 : Elapsed 0.018 ms (1.784 ms / 100) 1.784 -> 1.787 ( +0.17%) [ +0.00% +0.00% +0.17% / +0.17% +0.28% +0.34%] index_select random : Elapsed 0.018 ms (1.784 ms / 100) 1.781 -> 1.781 ( +0.00%) [ +0.06% +0.11% +0.00% / +0.00% +0.56% +0.28%] index_select random_sorted : Elapsed 0.018 ms (1.782 ms / 100) 1.799 -> 1.801 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.44% +0.22%] index_select perm : Elapsed 0.018 ms (1.799 ms / 100) 1.795 -> 1.797 ( +0.11%) [ +0.00% +0.17% +0.06% / +0.11% +0.56% +0.33%] index_select perm_sorted : Elapsed 0.018 ms (1.795 ms / 100) B = [40, 16, 4, 5] (stride (1, 40, 640, 2560)) A = [40, 16, 4, 20] (stride (1280, 1, 320, 16)) dim = 3 1.785 -> 1.786 ( +0.06%) [ +0.06% +0.34% +0.00% / +0.06% +0.50% +0.34%] index_select const : Elapsed 0.018 ms (1.786 ms / 100) 1.817 -> 1.818 ( +0.06%) [ +0.11% +0.00% +0.11% / +0.06% +0.39% +0.28%] index_select wrap : Elapsed 0.018 ms (1.819 ms / 100) 1.819 -> 1.820 ( +0.05%) [ +0.00% +0.22% +0.05% / +0.05% +0.22% +0.22%] index_select linear : Elapsed 0.018 ms (1.819 ms / 100) 1.819 -> 1.820 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.27% +0.33%] index_select reverse : Elapsed 0.018 ms (1.819 ms / 100) 1.783 -> 1.786 ( +0.17%) [ +0.00% +0.06% +0.00% / +0.17% +0.50% +0.62%] index_select skip64 : Elapsed 0.018 ms (1.783 ms / 100) 1.780 -> 1.780 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.51% +0.67%] index_select skip256 : Elapsed 0.018 ms (1.781 ms / 100) 1.822 -> 1.820 ( -0.11%) [ +0.05% +0.00% +0.00% / -0.11% +0.00% +0.11%] index_select spread : Elapsed 0.018 ms (1.823 ms / 100) 1.820 -> 1.819 ( -0.05%) [ +0.05% +0.00% +0.11% / -0.05% +0.11% +0.22%] index_select strided 3 : Elapsed 0.018 ms (1.821 ms / 100) 1.807 -> 1.806 ( -0.06%) [ +0.28% +0.00% +0.06% / +0.11% +0.17% -0.06%] index_select strided 5 : Elapsed 0.018 ms (1.812 ms / 100) 1.817 -> 1.817 ( +0.00%) [ +0.06% +0.22% +0.00% / +0.00% +0.28% +0.33%] index_select strided 7 : Elapsed 0.018 ms (1.818 ms / 100) 1.819 -> 1.818 ( -0.05%) [ +0.05% +0.11% +0.00% / -0.05% +0.27% +0.05%] index_select strided 8 : Elapsed 0.018 ms (1.820 ms / 100) 1.819 -> 1.820 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.11% +0.33% +0.05%] index_select strided 16 : Elapsed 0.018 ms (1.819 ms / 100) 1.808 -> 1.809 ( +0.06%) [ +0.00% +0.11% +0.06% / +0.06% +0.17% +0.28%] index_select random : Elapsed 0.018 ms (1.808 ms / 100) 1.806 -> 1.810 ( +0.22%) [ +0.28% +0.06% +0.00% / +0.22% +0.50% +0.28%] index_select random_sorted : Elapsed 0.018 ms (1.811 ms / 100) 1.820 -> 1.819 ( -0.05%) [ +0.05% +0.11% +0.00% / -0.05% +0.33% +0.27%] index_select perm : Elapsed 0.018 ms (1.821 ms / 100) 1.818 -> 1.819 ( +0.06%) [ +0.00% +0.06% +0.11% / +0.06% +0.39% +0.50%] index_select perm_sorted : Elapsed 0.018 ms (1.818 ms / 100) out_shape = [5, 16, 20, 4] in_shape = [40, 16, 20, 4] idx_dim = 0 B = [5, 16, 20, 4] (stride (1280, 80, 4, 1)) A = [40, 16, 20, 4] (stride (1280, 80, 1, 20)) dim = 0 1.279 -> 1.278 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.55% +0.55%] index_select const : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.63% +0.63%] index_select wrap : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.00% +0.08% +0.08% / +0.16% +0.63% +0.63%] index_select linear : Elapsed 0.013 ms (1.279 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.55%] index_select reverse : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.279 ms / 100) 1.279 -> 1.277 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.63% +0.63%] index_select skip256 : Elapsed 0.013 ms (1.279 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.63% +0.63%] index_select spread : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_select strided 3 : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.70% +0.63%] index_select strided 5 : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.63% +0.63%] index_select strided 7 : Elapsed 0.013 ms (1.279 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.70% +0.70%] index_select strided 8 : Elapsed 0.013 ms (1.280 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.70% +0.70%] index_select strided 16 : Elapsed 0.013 ms (1.279 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.78% +0.63%] index_select random : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_select random_sorted : Elapsed 0.013 ms (1.279 ms / 100) 1.280 -> 1.279 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.55% +0.55%] index_select perm : Elapsed 0.013 ms (1.280 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.70% +0.63%] index_select perm_sorted : Elapsed 0.013 ms (1.280 ms / 100) B = [5, 16, 20, 4] (stride (80, 400, 4, 1)) A = [40, 16, 20, 4] (stride (1280, 1, 64, 16)) dim = 0 0.633 -> 0.633 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.16% +0.00%] index_select const : Elapsed 0.006 ms (0.633 ms / 100) 0.629 -> 0.631 ( +0.32%) [ +0.16% +0.16% +0.00% / +0.32% +1.91% +1.27%] index_select wrap : Elapsed 0.006 ms (0.630 ms / 100) 0.630 -> 0.630 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.95% +1.11%] index_select linear : Elapsed 0.006 ms (0.630 ms / 100) 0.630 -> 0.630 ( +0.00%) [ +0.00% +0.16% +0.16% / +0.00% +0.63% +0.95%] index_select reverse : Elapsed 0.006 ms (0.630 ms / 100) 0.630 -> 0.633 ( +0.48%) [ +0.00% +0.16% +0.00% / +2.06% +0.63% +0.48%] index_select skip64 : Elapsed 0.006 ms (0.630 ms / 100) 0.631 -> 0.632 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.48% +0.32%] index_select skip256 : Elapsed 0.006 ms (0.632 ms / 100) 0.629 -> 0.631 ( +0.32%) [ +0.00% +0.00% +0.00% / +0.32% +0.95% +1.11%] index_select spread : Elapsed 0.006 ms (0.629 ms / 100) 0.631 -> 0.632 ( +0.16%) [ +0.16% +0.32% +0.00% / +0.16% +0.48% +0.32%] index_select strided 3 : Elapsed 0.006 ms (0.632 ms / 100) 0.631 -> 0.632 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +1.27% +1.11%] index_select strided 5 : Elapsed 0.006 ms (0.631 ms / 100) 0.628 -> 0.628 ( +0.00%) [ +0.32% +0.16% +0.00% / +0.00% +1.27% +0.96%] index_select strided 7 : Elapsed 0.006 ms (0.630 ms / 100) 0.629 -> 0.630 ( +0.16%) [ +0.32% +0.32% +0.00% / +0.16% +1.59% +1.75%] index_select strided 8 : Elapsed 0.006 ms (0.631 ms / 100) 0.628 -> 0.628 ( +0.00%) [ +1.11% +0.48% +0.00% / +0.00% +1.27% +1.11%] index_select strided 16 : Elapsed 0.006 ms (0.635 ms / 100) 0.629 -> 0.630 ( +0.16%) [ +0.48% +0.32% +0.00% / +0.16% +1.59% +1.59%] index_select random : Elapsed 0.006 ms (0.632 ms / 100) 0.631 -> 0.641 ( +1.58%) [ +0.16% +0.00% +0.00% / +1.58% +1.58% +1.58%] index_select random_sorted : Elapsed 0.006 ms (0.632 ms / 100) 0.630 -> 0.630 ( +0.00%) [ +2.06% +0.00% +0.00% / +0.00% +0.95% +1.11%] index_select perm : Elapsed 0.006 ms (0.643 ms / 100) 0.630 -> 0.630 ( +0.00%) [ +4.60% +0.00% +0.00% / +0.00% +1.27% +1.59%] index_select perm_sorted : Elapsed 0.007 ms (0.659 ms / 100) B = [5, 16, 20, 4] (stride (80, 400, 4, 1)) A = [40, 16, 20, 4] (stride (1, 3200, 160, 40)) dim = 0 1.278 -> 1.277 ( -0.08%) [ +0.23% +0.00% +0.08% / -0.08% +0.47% +0.47%] index_select const : Elapsed 0.013 ms (1.281 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.24% +0.00% / +0.16% +0.55% +0.39%] index_select wrap : Elapsed 0.013 ms (1.278 ms / 100) 1.278 -> 1.281 ( +0.23%) [ +0.16% +0.23% +0.00% / +0.23% +0.70% +1.02%] index_select linear : Elapsed 0.013 ms (1.280 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.39% +0.00% +0.08% / +0.00% +0.63% +0.47%] index_select reverse : Elapsed 0.013 ms (1.284 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.63% +0.39%] index_select skip64 : Elapsed 0.013 ms (1.277 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.47% +0.55%] index_select skip256 : Elapsed 0.013 ms (1.278 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.39%] index_select spread : Elapsed 0.013 ms (1.269 ms / 100) 1.261 -> 1.258 ( -0.24%) [ +0.00% +0.16% +0.08% / -0.24% +0.56% +0.56%] index_select strided 3 : Elapsed 0.013 ms (1.261 ms / 100) 1.247 -> 1.248 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.64% +0.72%] index_select strided 5 : Elapsed 0.012 ms (1.248 ms / 100) 1.268 -> 1.270 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.55% +0.55%] index_select strided 7 : Elapsed 0.013 ms (1.269 ms / 100) 1.281 -> 1.278 ( -0.23%) [ +0.00% +0.08% +0.23% / -0.23% +0.31% +0.47%] index_select strided 8 : Elapsed 0.013 ms (1.281 ms / 100) 1.238 -> 1.240 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.81% +0.65%] index_select strided 16 : Elapsed 0.012 ms (1.239 ms / 100) 1.273 -> 1.277 ( +0.31%) [ +0.00% +0.31% +0.31% / +0.31% +0.63% +0.94%] index_select random : Elapsed 0.013 ms (1.273 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.63% +0.47%] index_select random_sorted : Elapsed 0.013 ms (1.280 ms / 100) 1.258 -> 1.264 ( +0.48%) [ +0.24% +0.00% +1.11% / +0.48% +1.19% +1.59%] index_select perm : Elapsed 0.013 ms (1.261 ms / 100) 1.255 -> 1.255 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +1.04% +0.72%] index_select perm_sorted : Elapsed 0.013 ms (1.256 ms / 100) B = [5, 16, 20, 4] (stride (80, 400, 4, 1)) A = [40, 16, 20, 4] (stride (20, 3200, 1, 800)) dim = 0 1.379 -> 1.381 ( +0.15%) [ +0.36% +0.22% +0.00% / +0.15% +0.65% +0.51%] index_select const : Elapsed 0.014 ms (1.384 ms / 100) 1.383 -> 1.383 ( +0.00%) [ +0.29% +0.00% +0.07% / +0.00% +0.29% +0.22%] index_select wrap : Elapsed 0.014 ms (1.387 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.29% +0.22% +0.00% / +0.22% +0.51% +0.43%] index_select linear : Elapsed 0.014 ms (1.385 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.65% +0.72%] index_select reverse : Elapsed 0.014 ms (1.382 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.22% +0.14% +0.00% / +0.14% +0.51% +0.51%] index_select skip64 : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.385 ( +0.29%) [ +0.00% +0.14% +0.00% / +0.29% +0.51% +0.43%] index_select skip256 : Elapsed 0.014 ms (1.381 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.07% +0.00% +0.14% / +0.00% +0.51% +0.43%] index_select spread : Elapsed 0.014 ms (1.383 ms / 100) 1.380 -> 1.383 ( +0.22%) [ +0.07% +0.00% +0.07% / +0.22% +0.65% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.381 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.58% +0.51%] index_select strided 5 : Elapsed 0.014 ms (1.383 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.43% +0.43%] index_select strided 7 : Elapsed 0.014 ms (1.382 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.14% +0.00% +0.29% / +0.07% +0.58% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.29% +0.07% +0.00% / +0.00% +0.65% +0.65%] index_select strided 16 : Elapsed 0.014 ms (1.383 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.36% +0.36%] index_select random : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.14% +0.00% +0.07% / +0.14% +0.51% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.383 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.29% +0.00% / +0.00% +0.65% +0.72%] index_select perm : Elapsed 0.014 ms (1.380 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.58% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.383 ms / 100) B = [5, 16, 20, 4] (stride (20, 400, 1, 100)) A = [40, 16, 20, 4] (stride (1280, 4, 64, 1)) dim = 0 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.34% +0.34%] index_select const : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.47% +0.41%] index_select wrap : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.41% +0.41%] index_select linear : Elapsed 0.015 ms (1.479 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.47% +0.47%] index_select reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.41% +0.41%] index_select skip64 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.47% +0.41%] index_select skip256 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.68% +0.54%] index_select spread : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.61% +0.47%] index_select strided 3 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.61% +0.54%] index_select strided 5 : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.54% +0.41%] index_select strided 7 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.54%] index_select strided 8 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.475 ( -0.07%) [ +0.20% +0.00% +0.07% / -0.07% +0.61% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.479 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.68% +0.54%] index_select random : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.14% / +0.00% +0.61% +0.54%] index_select random_sorted : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.68% +0.54%] index_select perm : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.68% +0.61%] index_select perm_sorted : Elapsed 0.015 ms (1.477 ms / 100) B = [5, 16, 20, 4] (stride (1, 400, 5, 100)) A = [40, 16, 20, 4] (stride (1, 40, 640, 12800)) dim = 0 1.633 -> 1.631 ( -0.12%) [ +0.06% +0.06% +0.00% / +0.00% -0.12% +0.49%] index_select const : Elapsed 0.016 ms (1.634 ms / 100) 1.630 -> 1.622 ( -0.49%) [ +0.18% +0.00% +0.00% / -0.49% +0.55% +0.74%] index_select wrap : Elapsed 0.016 ms (1.633 ms / 100) 1.629 -> 1.631 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.25% +0.12% +0.61%] index_select linear : Elapsed 0.016 ms (1.631 ms / 100) 1.630 -> 1.635 ( +0.31%) [ +0.12% +0.00% +0.12% / +0.31% +0.74% +0.49%] index_select reverse : Elapsed 0.016 ms (1.632 ms / 100) 1.622 -> 1.631 ( +0.55%) [ +0.62% +0.62% +0.00% / +0.55% +0.99% +1.23%] index_select skip64 : Elapsed 0.016 ms (1.632 ms / 100) 1.629 -> 1.622 ( -0.43%) [ +0.31% +0.00% +0.12% / -0.43% +0.74% +0.68%] index_select skip256 : Elapsed 0.016 ms (1.634 ms / 100) 1.624 -> 1.626 ( +0.12%) [ +0.00% +0.18% +0.18% / +0.12% +0.92% +0.80%] index_select spread : Elapsed 0.016 ms (1.624 ms / 100) 1.621 -> 1.621 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.62% +0.68%] index_select strided 3 : Elapsed 0.016 ms (1.621 ms / 100) 1.617 -> 1.621 ( +0.25%) [ +0.31% +0.25% +0.00% / +0.25% +0.93% +0.93%] index_select strided 5 : Elapsed 0.016 ms (1.622 ms / 100) 1.622 -> 1.624 ( +0.12%) [ +0.18% +0.00% +0.00% / +0.12% +0.62% +0.80%] index_select strided 7 : Elapsed 0.016 ms (1.625 ms / 100) 1.617 -> 1.617 ( +0.00%) [ +0.19% +0.12% +0.00% / +0.00% +0.80% +0.87%] index_select strided 8 : Elapsed 0.016 ms (1.620 ms / 100) 1.618 -> 1.620 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.12% +0.80% +0.74%] index_select strided 16 : Elapsed 0.016 ms (1.620 ms / 100) 1.617 -> 1.621 ( +0.25%) [ +0.19% +0.19% +0.00% / +0.25% +0.74% +0.87%] index_select random : Elapsed 0.016 ms (1.620 ms / 100) 1.619 -> 1.620 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.80% +0.74%] index_select random_sorted : Elapsed 0.016 ms (1.621 ms / 100) 1.619 -> 1.620 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.68% +0.68%] index_select perm : Elapsed 0.016 ms (1.620 ms / 100) 1.618 -> 1.622 ( +0.25%) [ +0.12% +0.00% +0.12% / +0.25% +0.80% +0.93%] index_select perm_sorted : Elapsed 0.016 ms (1.620 ms / 100) B = [5, 16, 20, 4] (stride (16, 1, 320, 80)) A = [40, 16, 20, 4] (stride (1280, 80, 4, 1)) dim = 0 1.184 -> 1.185 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.17% +0.42%] index_select const : Elapsed 0.012 ms (1.184 ms / 100) 1.182 -> 1.186 ( +0.34%) [ +0.42% +0.00% +0.34% / +0.34% +0.51% +0.42%] index_select wrap : Elapsed 0.012 ms (1.187 ms / 100) 1.184 -> 1.181 ( -0.25%) [ +0.17% +0.08% +0.00% / -0.25% +0.17% +0.34%] index_select linear : Elapsed 0.012 ms (1.186 ms / 100) 1.181 -> 1.184 ( +0.25%) [ +0.59% +0.00% +0.76% / +0.25% +0.51% +0.59%] index_select reverse : Elapsed 0.012 ms (1.188 ms / 100) 1.180 -> 1.185 ( +0.42%) [ +0.17% +0.25% +0.00% / +0.42% +0.42% +0.51%] index_select skip64 : Elapsed 0.012 ms (1.182 ms / 100) 1.180 -> 1.181 ( +0.08%) [ +0.51% +0.00% +0.42% / +0.08% +0.51% +0.85%] index_select skip256 : Elapsed 0.012 ms (1.186 ms / 100) 1.181 -> 1.186 ( +0.42%) [ +0.85% +0.00% +0.42% / +0.68% +0.76% +0.42%] index_select spread : Elapsed 0.012 ms (1.191 ms / 100) 1.179 -> 1.183 ( +0.34%) [ +0.85% +0.25% +0.00% / +0.34% +0.85% +0.68%] index_select strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.180 -> 1.183 ( +0.25%) [ +0.17% +0.00% +0.42% / +0.25% +0.51% +0.76%] index_select strided 5 : Elapsed 0.012 ms (1.182 ms / 100) 1.184 -> 1.180 ( -0.34%) [ +0.08% +0.17% +0.00% / -0.34% +0.25% +0.17%] index_select strided 7 : Elapsed 0.012 ms (1.185 ms / 100) 1.179 -> 1.183 ( +0.34%) [ +0.34% +0.00% +0.08% / +0.34% +0.68% +0.76%] index_select strided 8 : Elapsed 0.012 ms (1.183 ms / 100) 1.178 -> 1.185 ( +0.59%) [ +0.51% +0.59% +0.00% / +0.59% +0.59% +0.93%] index_select strided 16 : Elapsed 0.012 ms (1.184 ms / 100) 1.181 -> 1.181 ( +0.00%) [ +0.00% +0.17% +0.76% / +0.00% +0.68% +0.68%] index_select random : Elapsed 0.012 ms (1.181 ms / 100) 1.184 -> 1.180 ( -0.34%) [ +0.25% +0.34% +0.00% / -0.34% +0.17% +0.34%] index_select random_sorted : Elapsed 0.012 ms (1.187 ms / 100) 1.179 -> 1.185 ( +0.51%) [ +0.00% +0.08% +0.51% / +0.51% +0.68% +0.76%] index_select perm : Elapsed 0.012 ms (1.179 ms / 100) 1.180 -> 1.186 ( +0.51%) [ +0.51% +0.51% +0.00% / +0.51% +0.51% +0.59%] index_select perm_sorted : Elapsed 0.012 ms (1.186 ms / 100) B = [5, 16, 20, 4] (stride (16, 1, 320, 80)) A = [40, 16, 20, 4] (stride (4, 3200, 160, 1)) dim = 0 0.643 -> 0.644 ( +0.16%) [ +0.16% +0.31% +0.00% / +0.16% +0.78% +0.62%] index_select const : Elapsed 0.006 ms (0.644 ms / 100) 0.642 -> 0.643 ( +0.16%) [ +0.31% +0.31% +0.00% / +0.16% +1.09% +1.25%] index_select wrap : Elapsed 0.006 ms (0.644 ms / 100) 0.647 -> 0.647 ( +0.00%) [ +0.15% +0.62% +0.00% / +0.00% +0.62% +1.08%] index_select linear : Elapsed 0.006 ms (0.648 ms / 100) 0.645 -> 0.648 ( +0.47%) [ +0.00% +0.31% +0.31% / +0.47% +1.09% +1.09%] index_select reverse : Elapsed 0.006 ms (0.645 ms / 100) 0.642 -> 0.643 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.93% +1.09%] index_select skip64 : Elapsed 0.006 ms (0.643 ms / 100) 0.642 -> 0.642 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.93% +0.78%] index_select skip256 : Elapsed 0.006 ms (0.643 ms / 100) 0.644 -> 0.643 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.78% +0.62%] index_select spread : Elapsed 0.006 ms (0.644 ms / 100) 0.643 -> 0.645 ( +0.31%) [ +0.00% +0.31% +0.16% / +0.31% +0.93% +1.09%] index_select strided 3 : Elapsed 0.006 ms (0.643 ms / 100) 0.644 -> 0.644 ( +0.00%) [ +0.16% +0.31% +0.00% / +0.00% +0.78% +0.62%] index_select strided 5 : Elapsed 0.006 ms (0.645 ms / 100) 0.642 -> 0.642 ( +0.00%) [ +0.31% +0.16% +0.00% / +0.00% +1.09% +1.09%] index_select strided 7 : Elapsed 0.006 ms (0.644 ms / 100) 0.642 -> 0.649 ( +1.09%) [ +0.31% +0.16% +0.00% / +1.40% +1.09% +1.25%] index_select strided 8 : Elapsed 0.006 ms (0.644 ms / 100) 0.644 -> 0.644 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.62% +0.93%] index_select strided 16 : Elapsed 0.006 ms (0.644 ms / 100) 0.643 -> 0.644 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +1.09% +1.40%] index_select random : Elapsed 0.006 ms (0.643 ms / 100) 0.643 -> 0.644 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.93% +0.93%] index_select random_sorted : Elapsed 0.006 ms (0.643 ms / 100) 0.642 -> 0.647 ( +0.78%) [ +0.31% +0.47% +0.00% / +0.78% +1.09% +1.09%] index_select perm : Elapsed 0.006 ms (0.644 ms / 100) 0.642 -> 0.643 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +1.25% +1.25%] index_select perm_sorted : Elapsed 0.006 ms (0.643 ms / 100) B = [5, 16, 20, 4] (stride (320, 20, 1, 1600)) dim = 0 fill_cnt = 40 0.971 -> 0.958 ( -1.34%) [ +0.00% +0.31% +0.31% / -0.93% -0.93% -1.34%] index_fill_ const : Elapsed 0.010 ms (0.971 ms / 100) 0.966 -> 0.951 ( -1.55%) [ +0.62% +0.21% +0.00% / -0.83% -1.55% -1.04%] index_fill_ linear : Elapsed 0.010 ms (0.972 ms / 100) 0.959 -> 0.947 ( -1.25%) [ +0.00% +0.42% +0.00% / -0.94% -0.94% -1.25%] index_fill_ reverse : Elapsed 0.010 ms (0.959 ms / 100) 0.960 -> 0.952 ( -0.83%) [ +0.00% +0.73% +0.52% / -0.73% -0.63% -0.83%] index_fill_ skip64 : Elapsed 0.010 ms (0.960 ms / 100) 0.964 -> 0.950 ( -1.45%) [ +0.31% +0.00% +0.00% / -0.73% -1.45% -1.24%] index_fill_ skip256 : Elapsed 0.010 ms (0.967 ms / 100) 0.955 -> 0.936 ( -1.99%) [ +0.00% +0.00% +0.00% / -1.57% -1.68% -1.99%] index_fill_ spread : Elapsed 0.010 ms (0.955 ms / 100) 0.952 -> 0.935 ( -1.79%) [ +0.00% +0.32% +0.11% / -1.79% -1.58% -1.47%] index_fill_ strided 3 : Elapsed 0.010 ms (0.952 ms / 100) 0.954 -> 0.935 ( -1.99%) [ +0.31% +0.00% +0.63% / -1.68% -1.26% -1.99%] index_fill_ random : Elapsed 0.010 ms (0.957 ms / 100) 0.955 -> 0.938 ( -1.78%) [ +0.21% +0.00% +0.31% / -1.47% -1.57% -1.78%] index_fill_ random_sorted : Elapsed 0.010 ms (0.957 ms / 100) B = [5, 16, 20, 4] (stride (320, 20, 1, 1600)) A = [40, 16, 20, 4] (stride (1280, 80, 1, 20)) dim = 0 1.381 -> 1.382 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.22% +0.14%] index_select const : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.22% +0.00% +0.14% / +0.14% +0.29% +0.22%] index_select wrap : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.29% +0.00% / +0.07% +0.29% +0.29%] index_select linear : Elapsed 0.014 ms (1.381 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.00% +0.29% +0.07% / +0.22% +0.14% +0.14%] index_select reverse : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.58% +0.00% +0.07% / +1.74% +0.44% +0.29%] index_select skip64 : Elapsed 0.014 ms (1.387 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.36% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.381 ms / 100) 1.381 -> 1.382 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.22% +0.29%] index_select spread : Elapsed 0.014 ms (1.381 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.15% +0.07% +0.00% / +0.29% +0.44% +0.58%] index_select strided 3 : Elapsed 0.014 ms (1.381 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.00% +0.15% +0.07% / +0.22% +0.44% +0.36%] index_select strided 5 : Elapsed 0.014 ms (1.379 ms / 100) 1.382 -> 1.381 ( -0.07%) [ +0.07% +0.00% +0.22% / -0.07% +0.29% +0.14%] index_select strided 7 : Elapsed 0.014 ms (1.383 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.51% +0.44%] index_select strided 8 : Elapsed 0.014 ms (1.381 ms / 100) 1.378 -> 1.380 ( +0.15%) [ +0.15% +0.29% +0.00% / +0.15% +0.58% +0.51%] index_select strided 16 : Elapsed 0.014 ms (1.380 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.00% +0.07% +0.07% / +0.29% +0.29% +0.43%] index_select random : Elapsed 0.014 ms (1.380 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.15% +0.29% +0.00% / +0.00% +0.44% +0.44%] index_select random_sorted : Elapsed 0.014 ms (1.381 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.22% +0.36%] index_select perm : Elapsed 0.014 ms (1.384 ms / 100) 1.379 -> 1.384 ( +0.36%) [ +0.15% +0.07% +0.00% / +0.36% +0.51% +0.51%] index_select perm_sorted : Elapsed 0.014 ms (1.381 ms / 100) B = [5, 16, 20, 4] (stride (1, 5, 80, 1600)) A = [40, 16, 20, 4] (stride (1280, 1, 16, 320)) dim = 0 1.535 -> 1.533 ( -0.13%) [ +0.00% +0.20% +0.20% / -0.13% +0.52% +0.52%] index_select const : Elapsed 0.015 ms (1.535 ms / 100) 1.539 -> 1.540 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.71% +0.84%] index_select wrap : Elapsed 0.015 ms (1.539 ms / 100) 1.538 -> 1.542 ( +0.26%) [ +0.13% +0.33% +0.00% / +0.26% +0.65% +1.04%] index_select linear : Elapsed 0.015 ms (1.540 ms / 100) 1.539 -> 1.541 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +1.88% +1.10%] index_select reverse : Elapsed 0.015 ms (1.541 ms / 100) 1.536 -> 1.539 ( +0.20%) [ +0.00% +0.20% +0.20% / +0.20% +0.98% +0.85%] index_select skip64 : Elapsed 0.015 ms (1.536 ms / 100) 1.531 -> 1.533 ( +0.13%) [ +0.00% +0.13% +0.07% / +0.13% +0.85% +0.85%] index_select skip256 : Elapsed 0.015 ms (1.531 ms / 100) 1.529 -> 1.531 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.65% +0.59%] index_select spread : Elapsed 0.015 ms (1.529 ms / 100) 1.534 -> 1.534 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.78% +0.72%] index_select strided 3 : Elapsed 0.015 ms (1.534 ms / 100) 1.531 -> 1.534 ( +0.20%) [ +0.00% +0.13% +0.13% / +0.20% +0.72% +0.78%] index_select strided 5 : Elapsed 0.015 ms (1.531 ms / 100) 1.534 -> 1.537 ( +0.20%) [ +0.13% +0.20% +0.00% / +0.20% +1.11% +0.72%] index_select strided 7 : Elapsed 0.015 ms (1.536 ms / 100) 1.526 -> 1.528 ( +0.13%) [ +0.07% +0.00% +0.07% / +0.13% +0.85% +0.72%] index_select strided 8 : Elapsed 0.015 ms (1.527 ms / 100) 1.527 -> 1.529 ( +0.13%) [ +0.07% +0.00% +0.07% / +0.13% +0.79% +0.79%] index_select strided 16 : Elapsed 0.015 ms (1.528 ms / 100) 1.533 -> 1.534 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.72% +0.98%] index_select random : Elapsed 0.015 ms (1.534 ms / 100) 1.537 -> 1.542 ( +0.33%) [ +0.00% +0.20% +0.07% / +0.33% +1.17% +0.91%] index_select random_sorted : Elapsed 0.015 ms (1.537 ms / 100) 1.540 -> 1.538 ( -0.13%) [ +0.00% +0.06% +0.13% / -0.13% +0.52% +0.71%] index_select perm : Elapsed 0.015 ms (1.540 ms / 100) 1.534 -> 1.533 ( -0.07%) [ +0.26% +0.00% +0.07% / -0.07% +0.46% +0.59%] index_select perm_sorted : Elapsed 0.015 ms (1.538 ms / 100) out_shape = [40, 5, 20, 4] in_shape = [40, 16, 20, 4] idx_dim = 1 B = [40, 5, 20, 4] (stride (400, 1, 20, 5)) A = [40, 16, 20, 4] (stride (4, 160, 2560, 1)) dim = 1 2.184 -> 2.180 ( -0.18%) [ +0.18% +0.09% +0.00% / -0.18% +0.46% +0.23%] index_select const : Elapsed 0.022 ms (2.188 ms / 100) 2.221 -> 2.218 ( -0.14%) [ +0.18% +0.05% +0.00% / -0.14% +0.50% +0.41%] index_select wrap : Elapsed 0.022 ms (2.225 ms / 100) 2.227 -> 2.235 ( +0.36%) [ +0.22% +0.00% +0.18% / +0.36% +0.58% +0.54%] index_select linear : Elapsed 0.022 ms (2.232 ms / 100) 2.229 -> 2.235 ( +0.27%) [ +0.00% +0.31% +0.27% / +0.27% +0.54% +0.45%] index_select reverse : Elapsed 0.022 ms (2.229 ms / 100) 2.184 -> 2.185 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.05% +0.46% +0.37%] index_select skip64 : Elapsed 0.022 ms (2.184 ms / 100) 2.182 -> 2.182 ( +0.00%) [ +0.18% +0.00% +0.14% / +0.00% +0.69% +0.55%] index_select skip256 : Elapsed 0.022 ms (2.186 ms / 100) 2.218 -> 2.222 ( +0.18%) [ +0.00% +0.18% +0.23% / +0.18% +0.32% +0.45%] index_select spread : Elapsed 0.022 ms (2.218 ms / 100) 2.224 -> 2.228 ( +0.18%) [ +0.04% +0.00% +0.04% / +0.18% +0.36% +0.40%] index_select strided 3 : Elapsed 0.022 ms (2.225 ms / 100) 2.225 -> 2.226 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.31% +0.54%] index_select strided 5 : Elapsed 0.022 ms (2.225 ms / 100) 2.221 -> 2.224 ( +0.14%) [ +0.23% +0.18% +0.00% / +0.14% +0.59% +0.63%] index_select strided 7 : Elapsed 0.022 ms (2.226 ms / 100) 2.198 -> 2.196 ( -0.09%) [ +0.05% +0.00% +0.05% / -0.09% +0.55% +0.41%] index_select strided 8 : Elapsed 0.022 ms (2.199 ms / 100) 2.223 -> 2.224 ( +0.04%) [ +0.04% +0.00% +0.09% / +0.04% +0.40% +0.18%] index_select random : Elapsed 0.022 ms (2.224 ms / 100) 2.218 -> 2.225 ( +0.32%) [ +0.05% +0.09% +0.00% / +0.32% +0.50% +0.45%] index_select random_sorted : Elapsed 0.022 ms (2.219 ms / 100) 2.219 -> 2.219 ( +0.00%) [ +0.14% +0.18% +0.00% / +0.00% +0.41% +0.41%] index_select perm : Elapsed 0.022 ms (2.222 ms / 100) 2.221 -> 2.227 ( +0.27%) [ +0.00% +0.14% +0.09% / +0.27% +0.59% +0.63%] index_select perm_sorted : Elapsed 0.022 ms (2.221 ms / 100) B = [40, 5, 20, 4] (stride (80, 3200, 4, 1)) A = [40, 16, 20, 4] (stride (4, 160, 2560, 1)) dim = 1 2.186 -> 2.184 ( -0.09%) [ +0.00% +0.05% +0.00% / -0.09% +0.32% +0.37%] index_select const : Elapsed 0.022 ms (2.186 ms / 100) 2.231 -> 2.228 ( -0.13%) [ +0.09% +0.00% +0.00% / -0.13% +0.31% +0.27%] index_select wrap : Elapsed 0.022 ms (2.233 ms / 100) 2.225 -> 2.228 ( +0.13%) [ +0.22% +0.00% +0.40% / +0.13% +0.40% +0.67%] index_select linear : Elapsed 0.022 ms (2.230 ms / 100) 2.222 -> 2.222 ( +0.00%) [ +0.18% +0.14% +0.00% / +0.00% +0.41% +0.41%] index_select reverse : Elapsed 0.022 ms (2.226 ms / 100) 2.181 -> 2.185 ( +0.18%) [ +0.28% +0.00% +0.09% / +0.18% +0.46% +0.55%] index_select skip64 : Elapsed 0.022 ms (2.187 ms / 100) 2.182 -> 2.185 ( +0.14%) [ +0.32% +0.14% +0.00% / +0.14% +0.32% +0.46%] index_select skip256 : Elapsed 0.022 ms (2.189 ms / 100) 2.212 -> 2.218 ( +0.27%) [ +0.36% +0.45% +0.00% / +0.27% +0.68% +0.59%] index_select spread : Elapsed 0.022 ms (2.220 ms / 100) 2.228 -> 2.228 ( +0.00%) [ +0.04% +0.22% +0.00% / +0.00% +0.40% +0.63%] index_select strided 3 : Elapsed 0.022 ms (2.229 ms / 100) 2.230 -> 2.231 ( +0.04%) [ +0.18% +0.00% +0.00% / +0.04% +0.58% +0.54%] index_select strided 5 : Elapsed 0.022 ms (2.234 ms / 100) 2.218 -> 2.224 ( +0.27%) [ +0.50% +0.00% +0.36% / +0.27% +0.63% +0.68%] index_select strided 7 : Elapsed 0.022 ms (2.229 ms / 100) 2.198 -> 2.198 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +0.23% +0.36%] index_select strided 8 : Elapsed 0.022 ms (2.202 ms / 100) 2.221 -> 2.225 ( +0.18%) [ +0.09% +0.32% +0.00% / +0.18% +0.41% +0.54%] index_select random : Elapsed 0.022 ms (2.223 ms / 100) 2.214 -> 2.220 ( +0.27%) [ +0.14% +0.00% +0.36% / +0.27% +0.54% +0.50%] index_select random_sorted : Elapsed 0.022 ms (2.217 ms / 100) 2.222 -> 2.225 ( +0.14%) [ +0.05% +0.00% +0.18% / +0.14% +0.41% +0.45%] index_select perm : Elapsed 0.022 ms (2.223 ms / 100) 2.217 -> 2.214 ( -0.14%) [ +0.05% +0.00% +0.27% / -0.14% +0.45% +0.41%] index_select perm_sorted : Elapsed 0.022 ms (2.218 ms / 100) B = [40, 5, 20, 4] (stride (1, 3200, 40, 800)) A = [40, 16, 20, 4] (stride (20, 3200, 1, 800)) dim = 1 2.267 -> 2.264 ( -0.13%) [ +0.00% +0.09% +0.22% / -0.13% +0.88% +0.75%] index_select const : Elapsed 0.023 ms (2.267 ms / 100) 2.342 -> 2.345 ( +0.13%) [ +0.17% +0.00% +0.21% / +0.13% +0.38% +0.34%] index_select wrap : Elapsed 0.023 ms (2.346 ms / 100) 2.348 -> 2.347 ( -0.04%) [ +0.26% +0.00% +0.13% / -0.04% +0.21% +0.47%] index_select linear : Elapsed 0.024 ms (2.354 ms / 100) 2.349 -> 2.352 ( +0.13%) [ +0.38% +0.34% +0.00% / +0.26% +0.17% +0.13%] index_select reverse : Elapsed 0.024 ms (2.358 ms / 100) 2.264 -> 2.264 ( +0.00%) [ +0.00% +0.04% +0.13% / +0.00% +0.57% +0.49%] index_select skip64 : Elapsed 0.023 ms (2.264 ms / 100) 2.266 -> 2.269 ( +0.13%) [ +0.00% +0.00% +0.04% / +0.13% +0.79% +0.79%] index_select skip256 : Elapsed 0.023 ms (2.266 ms / 100) 2.347 -> 2.345 ( -0.09%) [ +0.00% +0.13% +0.00% / -0.09% +0.13% +0.26%] index_select spread : Elapsed 0.023 ms (2.347 ms / 100) 2.354 -> 2.358 ( +0.17%) [ +0.21% +0.00% +0.17% / +0.17% +0.17% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.359 ms / 100) 2.342 -> 2.341 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.00% +0.21%] index_select strided 5 : Elapsed 0.023 ms (2.343 ms / 100) 2.349 -> 2.348 ( -0.04%) [ +0.00% +0.09% +0.17% / -0.04% +0.30% +0.47%] index_select strided 7 : Elapsed 0.023 ms (2.349 ms / 100) 2.285 -> 2.284 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.57% +0.61%] index_select strided 8 : Elapsed 0.023 ms (2.285 ms / 100) 2.329 -> 2.337 ( +0.34%) [ +0.26% +0.21% +0.00% / +0.34% +0.73% +0.77%] index_select random : Elapsed 0.023 ms (2.335 ms / 100) 2.332 -> 2.332 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +0.56% +0.51%] index_select random_sorted : Elapsed 0.023 ms (2.335 ms / 100) 2.334 -> 2.336 ( +0.09%) [ +0.26% +0.26% +0.00% / +0.09% +0.86% +0.81%] index_select perm : Elapsed 0.023 ms (2.340 ms / 100) 2.335 -> 2.337 ( +0.09%) [ +0.17% +0.09% +0.00% / +0.09% +0.73% +1.07%] index_select perm_sorted : Elapsed 0.023 ms (2.339 ms / 100) B = [40, 5, 20, 4] (stride (100, 20, 1, 4000)) A = [40, 16, 20, 4] (stride (1280, 20, 1, 320)) dim = 1 2.293 -> 2.286 ( -0.31%) [ +0.22% +0.00% +0.09% / -0.31% -0.26% -0.22%] index_select const : Elapsed 0.023 ms (2.298 ms / 100) 2.347 -> 2.342 ( -0.21%) [ +0.17% +0.00% +0.26% / +0.30% -0.21% -0.17%] index_select wrap : Elapsed 0.024 ms (2.351 ms / 100) 2.351 -> 2.335 ( -0.68%) [ +0.09% +0.09% +0.00% / +0.09% -0.68% -0.30%] index_select linear : Elapsed 0.024 ms (2.353 ms / 100) 2.353 -> 2.340 ( -0.55%) [ +0.13% +0.04% +0.00% / +0.21% -0.47% -0.55%] index_select reverse : Elapsed 0.024 ms (2.356 ms / 100) 2.279 -> 2.286 ( +0.31%) [ +0.44% +0.09% +0.00% / +0.31% +0.35% +0.53%] index_select skip64 : Elapsed 0.023 ms (2.289 ms / 100) 2.287 -> 2.283 ( -0.17%) [ +0.00% +0.39% +0.13% / +0.31% -0.17% +0.31%] index_select skip256 : Elapsed 0.023 ms (2.287 ms / 100) 2.360 -> 2.349 ( -0.47%) [ +0.17% +0.00% +0.00% / +0.21% -0.25% -0.47%] index_select spread : Elapsed 0.024 ms (2.364 ms / 100) 2.356 -> 2.351 ( -0.21%) [ +0.30% +0.04% +0.00% / -0.13% -0.04% -0.21%] index_select strided 3 : Elapsed 0.024 ms (2.363 ms / 100) 2.339 -> 2.331 ( -0.34%) [ +0.00% +0.38% +0.09% / +0.26% -0.09% -0.34%] index_select strided 5 : Elapsed 0.023 ms (2.339 ms / 100) 2.341 -> 2.335 ( -0.26%) [ +0.68% +0.00% +0.17% / +0.43% -0.26% -0.09%] index_select strided 7 : Elapsed 0.024 ms (2.357 ms / 100) 2.301 -> 2.295 ( -0.26%) [ +0.22% +0.00% +0.09% / +0.22% -0.09% -0.26%] index_select strided 8 : Elapsed 0.023 ms (2.306 ms / 100) 2.356 -> 2.350 ( -0.25%) [ +0.04% +0.04% +0.00% / +0.13% -0.21% -0.25%] index_select random : Elapsed 0.024 ms (2.357 ms / 100) 2.352 -> 2.348 ( -0.17%) [ +0.21% +0.13% +0.00% / +0.26% +0.00% -0.17%] index_select random_sorted : Elapsed 0.024 ms (2.357 ms / 100) 2.353 -> 2.340 ( -0.55%) [ +0.00% +0.21% +0.04% / +0.08% -0.42% -0.55%] index_select perm : Elapsed 0.024 ms (2.353 ms / 100) 2.344 -> 2.340 ( -0.17%) [ +0.34% +0.47% +0.00% / +0.17% -0.17% +0.34%] index_select perm_sorted : Elapsed 0.024 ms (2.352 ms / 100) B = [40, 5, 20, 4] (stride (100, 20, 1, 4000)) A = [40, 16, 20, 4] (stride (1, 3200, 160, 40)) dim = 1 2.326 -> 2.330 ( +0.17%) [ +0.09% +0.00% +0.04% / +0.17% +0.39% +0.43%] index_select const : Elapsed 0.023 ms (2.328 ms / 100) 2.343 -> 2.342 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.60% +0.13%] index_select wrap : Elapsed 0.023 ms (2.344 ms / 100) 2.343 -> 2.341 ( -0.09%) [ +0.09% +0.38% +0.00% / -0.09% +0.38% +0.47%] index_select linear : Elapsed 0.023 ms (2.345 ms / 100) 2.345 -> 2.345 ( +0.00%) [ +0.17% +0.00% +0.09% / +0.00% +0.30% +0.30%] index_select reverse : Elapsed 0.023 ms (2.349 ms / 100) 2.330 -> 2.334 ( +0.17%) [ +0.00% +0.04% +0.04% / +0.17% +0.56% +0.52%] index_select skip64 : Elapsed 0.023 ms (2.330 ms / 100) 2.325 -> 2.328 ( +0.13%) [ +0.04% +0.09% +0.00% / +0.13% +0.43% +0.47%] index_select skip256 : Elapsed 0.023 ms (2.326 ms / 100) 2.339 -> 2.346 ( +0.30%) [ +0.04% +0.17% +0.00% / +0.30% +0.90% +0.43%] index_select spread : Elapsed 0.023 ms (2.340 ms / 100) 2.341 -> 2.344 ( +0.13%) [ +0.13% +0.04% +0.00% / +0.13% +0.30% +0.26%] index_select strided 3 : Elapsed 0.023 ms (2.344 ms / 100) 2.332 -> 2.333 ( +0.04%) [ +0.09% +0.34% +0.00% / +0.04% +0.73% +0.51%] index_select strided 5 : Elapsed 0.023 ms (2.334 ms / 100) 2.345 -> 2.344 ( -0.04%) [ +0.26% +0.00% +0.04% / -0.04% +0.38% +0.43%] index_select strided 7 : Elapsed 0.024 ms (2.351 ms / 100) 2.323 -> 2.322 ( -0.04%) [ +0.17% +0.17% +0.00% / -0.04% +0.69% +0.73%] index_select strided 8 : Elapsed 0.023 ms (2.327 ms / 100) 2.337 -> 2.343 ( +0.26%) [ +0.13% +0.00% +0.26% / +0.26% +0.98% +0.64%] index_select random : Elapsed 0.023 ms (2.340 ms / 100) 2.341 -> 2.341 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +0.60% +0.43%] index_select random_sorted : Elapsed 0.023 ms (2.343 ms / 100) 2.342 -> 2.338 ( -0.17%) [ +0.00% +0.04% +0.04% / -0.17% +0.43% +0.21%] index_select perm : Elapsed 0.023 ms (2.342 ms / 100) 2.338 -> 2.341 ( +0.13%) [ +0.13% +0.04% +0.00% / +0.13% +0.38% +0.17%] index_select perm_sorted : Elapsed 0.023 ms (2.341 ms / 100) B = [40, 5, 20, 4] (stride (100, 1, 5, 4000)) A = [40, 16, 20, 4] (stride (80, 3200, 4, 1)) dim = 1 2.134 -> 2.128 ( -0.28%) [ +0.33% +0.00% +0.23% / +0.37% -0.28% -0.19%] index_select const : Elapsed 0.021 ms (2.141 ms / 100) 2.168 -> 2.165 ( -0.14%) [ +0.05% +0.05% +0.00% / -0.05% -0.14% -0.09%] index_select wrap : Elapsed 0.022 ms (2.169 ms / 100) 2.162 -> 2.165 ( +0.14%) [ +0.14% +0.23% +0.00% / +0.14% +0.23% +0.19%] index_select linear : Elapsed 0.022 ms (2.165 ms / 100) 2.162 -> 2.153 ( -0.42%) [ +0.19% +0.19% +0.00% / +0.14% -0.28% -0.42%] index_select reverse : Elapsed 0.022 ms (2.166 ms / 100) 2.131 -> 2.127 ( -0.19%) [ +0.00% +0.19% +0.19% / +0.09% -0.19% -0.14%] index_select skip64 : Elapsed 0.021 ms (2.131 ms / 100) 2.139 -> 2.119 ( -0.94%) [ +0.00% +0.09% +0.05% / -0.14% -0.94% -0.84%] index_select skip256 : Elapsed 0.021 ms (2.139 ms / 100) 2.193 -> 2.167 ( -1.19%) [ +0.00% +0.18% +0.23% / +0.18% -1.14% -1.19%] index_select spread : Elapsed 0.022 ms (2.193 ms / 100) 2.193 -> 2.165 ( -1.28%) [ +0.00% +0.27% +0.05% / -0.05% -1.28% -1.23%] index_select strided 3 : Elapsed 0.022 ms (2.193 ms / 100) 2.162 -> 2.168 ( +0.28%) [ +0.19% +0.42% +0.00% / +0.28% +0.56% +0.56%] index_select strided 5 : Elapsed 0.022 ms (2.166 ms / 100) 2.195 -> 2.161 ( -1.55%) [ +0.36% +0.00% +0.09% / +0.27% -1.55% -1.41%] index_select strided 7 : Elapsed 0.022 ms (2.203 ms / 100) 2.150 -> 2.141 ( -0.42%) [ +0.28% +0.23% +0.00% / +0.19% -0.42% -0.42%] index_select strided 8 : Elapsed 0.022 ms (2.156 ms / 100) 2.154 -> 2.156 ( +0.09%) [ +0.09% +0.00% +0.05% / +0.09% +0.97% +0.88%] index_select random : Elapsed 0.022 ms (2.156 ms / 100) 2.149 -> 2.151 ( +0.09%) [ +0.19% +0.19% +0.00% / +0.09% +0.88% +0.98%] index_select random_sorted : Elapsed 0.022 ms (2.153 ms / 100) 2.150 -> 2.152 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.74% +0.51%] index_select perm : Elapsed 0.022 ms (2.152 ms / 100) 2.146 -> 2.152 ( +0.28%) [ +0.00% +0.33% +0.28% / +0.28% +1.30% +1.35%] index_select perm_sorted : Elapsed 0.021 ms (2.146 ms / 100) B = [40, 5, 20, 4] (stride (20, 800, 1, 4000)) A = [40, 16, 20, 4] (stride (1280, 20, 1, 320)) dim = 1 2.274 -> 2.271 ( -0.13%) [ +0.00% +0.13% +0.04% / -0.13% +0.44% +0.48%] index_select const : Elapsed 0.023 ms (2.274 ms / 100) 2.324 -> 2.331 ( +0.30%) [ +0.30% +0.30% +0.00% / +0.30% +1.12% +0.99%] index_select wrap : Elapsed 0.023 ms (2.331 ms / 100) 2.331 -> 2.337 ( +0.26%) [ +0.26% +0.30% +0.00% / +0.26% +1.07% +1.16%] index_select linear : Elapsed 0.023 ms (2.337 ms / 100) 2.326 -> 2.331 ( +0.21%) [ +0.00% +0.21% +0.43% / +0.21% +1.16% +1.20%] index_select reverse : Elapsed 0.023 ms (2.326 ms / 100) 2.280 -> 2.288 ( +0.35%) [ +0.00% +0.18% +0.13% / +0.35% +1.01% +1.01%] index_select skip64 : Elapsed 0.023 ms (2.280 ms / 100) 2.270 -> 2.276 ( +0.26%) [ +0.00% +0.18% +0.26% / +0.26% +0.75% +0.66%] index_select skip256 : Elapsed 0.023 ms (2.270 ms / 100) 2.339 -> 2.355 ( +0.68%) [ +0.56% +0.34% +0.00% / +0.73% +1.07% +0.68%] index_select spread : Elapsed 0.024 ms (2.352 ms / 100) 2.346 -> 2.344 ( -0.09%) [ +0.00% +0.30% +0.26% / -0.09% +0.47% +0.51%] index_select strided 3 : Elapsed 0.023 ms (2.346 ms / 100) 2.329 -> 2.335 ( +0.26%) [ +0.09% +0.04% +0.00% / +0.26% +1.20% +0.99%] index_select strided 5 : Elapsed 0.023 ms (2.331 ms / 100) 2.331 -> 2.335 ( +0.17%) [ +0.56% +0.43% +0.00% / +0.17% +0.77% +1.12%] index_select strided 7 : Elapsed 0.023 ms (2.344 ms / 100) 2.275 -> 2.280 ( +0.22%) [ +0.09% +0.09% +0.00% / +0.22% +0.62% +0.92%] index_select strided 8 : Elapsed 0.023 ms (2.277 ms / 100) 2.345 -> 2.347 ( +0.09%) [ +0.00% +0.13% +0.26% / +0.09% +0.77% +0.85%] index_select random : Elapsed 0.023 ms (2.345 ms / 100) 2.349 -> 2.350 ( +0.04%) [ +0.38% +0.00% +0.38% / +0.04% +0.51% +0.85%] index_select random_sorted : Elapsed 0.024 ms (2.358 ms / 100) 2.334 -> 2.332 ( -0.09%) [ +0.47% +0.00% +0.00% / -0.09% +1.20% +0.60%] index_select perm : Elapsed 0.023 ms (2.345 ms / 100) 2.330 -> 2.333 ( +0.13%) [ +0.00% +0.47% +0.17% / +0.13% +0.77% +0.73%] index_select perm_sorted : Elapsed 0.023 ms (2.330 ms / 100) out_shape = [40, 16, 5, 4] in_shape = [40, 16, 20, 4] idx_dim = 2 B = [40, 16, 5, 4] (stride (320, 4, 64, 1)) A = [40, 16, 20, 4] (stride (20, 800, 1, 12800)) dim = 2 1.917 -> 1.916 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.21% +0.37%] index_select const : Elapsed 0.019 ms (1.917 ms / 100) 1.924 -> 1.928 ( +0.21%) [ +0.21% +0.00% +0.00% / +0.21% +0.57% +0.52%] index_select wrap : Elapsed 0.019 ms (1.928 ms / 100) 1.921 -> 1.925 ( +0.21%) [ +0.16% +0.16% +0.00% / +0.21% +0.73% +0.52%] index_select linear : Elapsed 0.019 ms (1.924 ms / 100) 1.928 -> 1.930 ( +0.10%) [ +0.36% +0.21% +0.00% / +0.10% +0.67% +0.62%] index_select reverse : Elapsed 0.019 ms (1.935 ms / 100) 1.911 -> 1.917 ( +0.31%) [ +0.26% +0.00% +0.47% / +0.31% +0.68% +0.78%] index_select skip64 : Elapsed 0.019 ms (1.916 ms / 100) 1.914 -> 1.915 ( +0.05%) [ +0.00% +0.00% +0.16% / +0.05% +0.47% +0.47%] index_select skip256 : Elapsed 0.019 ms (1.914 ms / 100) 1.945 -> 1.950 ( +0.26%) [ +0.00% +0.05% +0.00% / +0.26% +0.57% +0.31%] index_select spread : Elapsed 0.019 ms (1.945 ms / 100) 1.940 -> 1.942 ( +0.10%) [ +0.31% +0.41% +0.00% / +0.10% +0.77% +0.67%] index_select strided 3 : Elapsed 0.019 ms (1.946 ms / 100) 1.948 -> 1.949 ( +0.05%) [ +0.21% +0.00% +0.15% / +0.05% +0.56% +0.72%] index_select strided 5 : Elapsed 0.020 ms (1.952 ms / 100) 1.943 -> 1.944 ( +0.05%) [ +0.05% +0.00% +0.21% / +0.05% +0.41% +0.46%] index_select strided 7 : Elapsed 0.019 ms (1.944 ms / 100) 1.945 -> 1.947 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.46% +0.62%] index_select strided 8 : Elapsed 0.019 ms (1.946 ms / 100) 1.944 -> 1.944 ( +0.00%) [ +0.10% +0.00% +0.10% / +0.00% +0.51% +0.46%] index_select strided 16 : Elapsed 0.019 ms (1.946 ms / 100) 1.945 -> 1.948 ( +0.15%) [ +0.00% +0.26% +0.00% / +0.15% +0.72% +0.72%] index_select random : Elapsed 0.019 ms (1.945 ms / 100) 1.945 -> 1.949 ( +0.21%) [ +0.00% +0.05% +0.10% / +0.21% +0.77% +0.57%] index_select random_sorted : Elapsed 0.019 ms (1.945 ms / 100) 1.936 -> 1.935 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.46% +0.62%] index_select perm : Elapsed 0.019 ms (1.936 ms / 100) 1.936 -> 1.937 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.57% +0.57%] index_select perm_sorted : Elapsed 0.019 ms (1.936 ms / 100) B = [40, 16, 5, 4] (stride (320, 1, 64, 16)) A = [40, 16, 20, 4] (stride (64, 4, 2560, 1)) dim = 2 1.673 -> 1.675 ( +0.12%) [ +0.12% +0.00% +0.30% / +0.24% +0.24% +0.12%] index_select const : Elapsed 0.017 ms (1.675 ms / 100) 1.671 -> 1.674 ( +0.18%) [ +0.12% +0.24% +0.00% / +0.18% +0.42% +0.48%] index_select wrap : Elapsed 0.017 ms (1.673 ms / 100) 1.672 -> 1.671 ( -0.06%) [ +0.06% +0.00% +0.06% / -0.06% +0.30% +0.48%] index_select linear : Elapsed 0.017 ms (1.673 ms / 100) 1.668 -> 1.674 ( +0.36%) [ +0.48% +0.00% +0.42% / +0.36% +0.72% +1.02%] index_select reverse : Elapsed 0.017 ms (1.676 ms / 100) 1.672 -> 1.674 ( +0.12%) [ +0.18% +0.30% +0.00% / +0.12% +0.18% +0.36%] index_select skip64 : Elapsed 0.017 ms (1.675 ms / 100) 1.673 -> 1.675 ( +0.12%) [ +0.06% +0.12% +0.00% / +0.12% +0.36% +0.12%] index_select skip256 : Elapsed 0.017 ms (1.674 ms / 100) 1.682 -> 1.686 ( +0.24%) [ +0.24% +0.00% +0.36% / +0.24% +0.42% +0.36%] index_select spread : Elapsed 0.017 ms (1.686 ms / 100) 1.694 -> 1.682 ( -0.71%) [ +0.47% +0.41% +0.00% / +0.30% -0.35% -0.71%] index_select strided 3 : Elapsed 0.017 ms (1.702 ms / 100) 1.684 -> 1.683 ( -0.06%) [ +0.06% +0.18% +0.00% / +0.12% +0.06% -0.06%] index_select strided 5 : Elapsed 0.017 ms (1.685 ms / 100) 1.688 -> 1.689 ( +0.06%) [ +0.24% +0.24% +0.00% / +0.06% +0.30% +0.18%] index_select strided 7 : Elapsed 0.017 ms (1.692 ms / 100) 1.684 -> 1.685 ( +0.06%) [ +0.00% +0.18% +0.00% / +0.06% +0.18% +0.24%] index_select strided 8 : Elapsed 0.017 ms (1.684 ms / 100) 1.681 -> 1.684 ( +0.18%) [ +0.24% +0.00% +0.06% / +0.18% +0.36% +5.35%] index_select strided 16 : Elapsed 0.017 ms (1.685 ms / 100) 1.683 -> 1.677 ( -0.36%) [ +0.53% +0.00% +0.18% / +0.06% -0.18% -0.36%] index_select random : Elapsed 0.017 ms (1.692 ms / 100) 1.677 -> 1.677 ( +0.00%) [ +0.24% +0.36% +0.00% / +0.42% +0.00% +0.06%] index_select random_sorted : Elapsed 0.017 ms (1.681 ms / 100) 1.704 -> 1.682 ( -1.29%) [ +0.18% +0.00% +0.00% / -0.18% -1.29% -1.00%] index_select perm : Elapsed 0.017 ms (1.707 ms / 100) 1.709 -> 1.690 ( -1.11%) [ +0.00% +0.18% +0.23% / -0.12% -1.11% -1.11%] index_select perm_sorted : Elapsed 0.017 ms (1.709 ms / 100) B = [40, 16, 5, 4] (stride (20, 800, 1, 5)) A = [40, 16, 20, 4] (stride (16, 1, 2560, 640)) dim = 2 1.689 -> 1.679 ( -0.59%) [ +0.24% +0.06% +0.00% / +0.06% -0.36% -0.59%] index_select const : Elapsed 0.017 ms (1.693 ms / 100) 1.687 -> 1.679 ( -0.47%) [ +0.30% +0.06% +0.00% / +0.00% -0.18% -0.47%] index_select wrap : Elapsed 0.017 ms (1.692 ms / 100) 1.689 -> 1.677 ( -0.71%) [ +0.00% +0.24% +0.06% / -0.18% -0.71% -0.65%] index_select linear : Elapsed 0.017 ms (1.689 ms / 100) 1.687 -> 1.679 ( -0.47%) [ +0.06% +0.36% +0.00% / +0.06% -0.30% -0.47%] index_select reverse : Elapsed 0.017 ms (1.688 ms / 100) 1.686 -> 1.677 ( -0.53%) [ +0.18% +0.36% +0.00% / +0.30% -0.42% -0.53%] index_select skip64 : Elapsed 0.017 ms (1.689 ms / 100) 1.691 -> 1.678 ( -0.77%) [ +0.00% +0.06% +0.06% / -0.24% -0.65% -0.77%] index_select skip256 : Elapsed 0.017 ms (1.691 ms / 100) 1.722 -> 1.719 ( -0.17%) [ +0.06% +0.00% +0.17% / -0.17% -0.17% +0.06%] index_select spread : Elapsed 0.017 ms (1.723 ms / 100) 1.715 -> 1.699 ( -0.93%) [ +0.17% +0.00% +0.00% / +0.00% -0.76% -0.93%] index_select strided 3 : Elapsed 0.017 ms (1.718 ms / 100) 1.687 -> 1.685 ( -0.12%) [ +0.00% +0.12% +0.59% / +0.65% -0.12% +0.00%] index_select strided 5 : Elapsed 0.017 ms (1.687 ms / 100) 1.716 -> 1.711 ( -0.29%) [ +0.00% +0.00% +0.06% / -0.12% -0.29% -0.17%] index_select strided 7 : Elapsed 0.017 ms (1.716 ms / 100) 1.719 -> 1.718 ( -0.06%) [ +0.23% +0.17% +0.00% / +0.17% +0.00% -0.06%] index_select strided 8 : Elapsed 0.017 ms (1.723 ms / 100) 1.720 -> 1.714 ( -0.35%) [ +0.00% +0.06% +0.00% / -0.17% -0.35% -0.12%] index_select strided 16 : Elapsed 0.017 ms (1.720 ms / 100) 1.714 -> 1.714 ( +0.00%) [ +0.00% +0.18% +0.12% / +0.00% +0.53% +0.58%] index_select random : Elapsed 0.017 ms (1.714 ms / 100) 1.714 -> 1.716 ( +0.12%) [ +0.12% +0.00% +0.35% / +0.12% +0.53% +0.70%] index_select random_sorted : Elapsed 0.017 ms (1.716 ms / 100) 1.726 -> 1.721 ( -0.29%) [ +0.12% +0.17% +0.00% / +0.00% -0.29% -0.06%] index_select perm : Elapsed 0.017 ms (1.728 ms / 100) 1.728 -> 1.722 ( -0.35%) [ +0.06% +0.00% +0.12% / +0.17% -0.29% -0.35%] index_select perm_sorted : Elapsed 0.017 ms (1.729 ms / 100) B = [40, 16, 5, 4] (stride (20, 800, 1, 5)) A = [40, 16, 20, 4] (stride (1, 40, 640, 12800)) dim = 2 1.905 -> 1.897 ( -0.42%) [ +0.21% +0.00% +0.00% / +0.00% -0.37% -0.42%] index_select const : Elapsed 0.019 ms (1.909 ms / 100) 1.897 -> 1.889 ( -0.42%) [ +0.16% +0.11% +0.00% / -0.11% -0.32% -0.42%] index_select wrap : Elapsed 0.019 ms (1.900 ms / 100) 1.899 -> 1.895 ( -0.21%) [ +0.05% +0.00% +0.00% / +0.11% -0.21% -0.21%] index_select linear : Elapsed 0.019 ms (1.900 ms / 100) 1.892 -> 1.889 ( -0.16%) [ +0.26% +0.16% +0.00% / +0.16% +0.21% -0.16%] index_select reverse : Elapsed 0.019 ms (1.897 ms / 100) 1.895 -> 1.891 ( -0.21%) [ +0.05% +0.00% +0.00% / -0.11% -0.21% -0.16%] index_select skip64 : Elapsed 0.019 ms (1.896 ms / 100) 1.892 -> 1.894 ( +0.11%) [ +0.16% +0.05% +0.00% / +0.11% +0.32% +0.32%] index_select skip256 : Elapsed 0.019 ms (1.895 ms / 100) 1.904 -> 1.894 ( -0.53%) [ +0.00% +0.05% +0.00% / +0.11% -0.37% -0.53%] index_select spread : Elapsed 0.019 ms (1.904 ms / 100) 1.904 -> 1.899 ( -0.26%) [ +0.00% +0.37% +0.16% / +0.16% -0.05% -0.26%] index_select strided 3 : Elapsed 0.019 ms (1.904 ms / 100) 1.891 -> 1.891 ( +0.00%) [ +0.11% +0.00% +0.16% / +0.16% +0.11% +0.00%] index_select strided 5 : Elapsed 0.019 ms (1.893 ms / 100) 1.889 -> 1.889 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.79% +0.58%] index_select strided 7 : Elapsed 0.019 ms (1.889 ms / 100) 1.902 -> 1.893 ( -0.47%) [ +0.21% +0.16% +0.00% / -0.05% -0.47% -0.32%] index_select strided 8 : Elapsed 0.019 ms (1.906 ms / 100) 1.904 -> 1.894 ( -0.53%) [ +0.21% +0.16% +0.00% / +0.00% -0.53% -0.42%] index_select strided 16 : Elapsed 0.019 ms (1.908 ms / 100) 1.894 -> 1.890 ( -0.21%) [ +0.05% +0.11% +0.00% / -0.21% +0.63% +0.58%] index_select random : Elapsed 0.019 ms (1.895 ms / 100) 1.888 -> 1.890 ( +0.11%) [ +0.00% +0.05% +0.16% / +0.11% +0.90% +0.95%] index_select random_sorted : Elapsed 0.019 ms (1.888 ms / 100) 1.901 -> 1.894 ( -0.37%) [ +0.26% +0.11% +0.00% / +0.00% -0.21% -0.37%] index_select perm : Elapsed 0.019 ms (1.906 ms / 100) 1.895 -> 1.896 ( +0.05%) [ +0.42% +0.16% +0.00% / +0.16% +0.21% +0.05%] index_select perm_sorted : Elapsed 0.019 ms (1.903 ms / 100) B = [40, 16, 5, 4] (stride (4, 160, 2560, 1)) A = [40, 16, 20, 4] (stride (16, 1, 2560, 640)) dim = 2 1.677 -> 1.682 ( +0.30%) [ +0.36% +0.00% +0.18% / +0.30% +0.42% +0.36%] index_select const : Elapsed 0.017 ms (1.683 ms / 100) 1.680 -> 1.679 ( -0.06%) [ +0.00% +0.18% +0.30% / -0.06% +0.18% +0.18%] index_select wrap : Elapsed 0.017 ms (1.680 ms / 100) 1.677 -> 1.678 ( +0.06%) [ +0.06% +0.12% +0.00% / +0.06% +0.30% +0.30%] index_select linear : Elapsed 0.017 ms (1.678 ms / 100) 1.685 -> 1.682 ( -0.18%) [ +0.00% +0.00% +0.06% / +0.59% -0.06% -0.18%] index_select reverse : Elapsed 0.017 ms (1.685 ms / 100) 1.682 -> 1.678 ( -0.24%) [ +0.00% +0.12% +0.06% / -0.24% +0.00% +0.00%] index_select skip64 : Elapsed 0.017 ms (1.682 ms / 100) 1.675 -> 1.680 ( +0.30%) [ +0.18% +0.36% +0.00% / +0.30% +0.54% +0.48%] index_select skip256 : Elapsed 0.017 ms (1.678 ms / 100) 1.714 -> 1.719 ( +0.29%) [ +0.00% +0.23% +0.06% / +0.29% +0.93% +0.76%] index_select spread : Elapsed 0.017 ms (1.714 ms / 100) 1.712 -> 1.707 ( -0.29%) [ +0.06% +0.00% +0.06% / -0.29% -0.23% -0.12%] index_select strided 3 : Elapsed 0.017 ms (1.713 ms / 100) 1.678 -> 1.691 ( +0.77%) [ +0.12% +0.00% +0.77% / +0.89% +0.77% +0.77%] index_select strided 5 : Elapsed 0.017 ms (1.680 ms / 100) 1.708 -> 1.713 ( +0.29%) [ +0.18% +0.00% +0.18% / +0.29% +0.35% +0.29%] index_select strided 7 : Elapsed 0.017 ms (1.711 ms / 100) 1.715 -> 1.713 ( -0.12%) [ +0.17% +0.00% +0.12% / -0.12% +0.76% +0.35%] index_select strided 8 : Elapsed 0.017 ms (1.718 ms / 100) 1.713 -> 1.714 ( +0.06%) [ +0.23% +0.12% +0.00% / +0.06% +0.64% +0.76%] index_select strided 16 : Elapsed 0.017 ms (1.717 ms / 100) 1.702 -> 1.700 ( -0.12%) [ +0.12% +0.24% +0.00% / +0.00% -0.06% -0.12%] index_select random : Elapsed 0.017 ms (1.704 ms / 100) 1.699 -> 1.702 ( +0.18%) [ +0.24% +0.00% +0.35% / +0.35% +0.18% +0.24%] index_select random_sorted : Elapsed 0.017 ms (1.703 ms / 100) 1.702 -> 1.707 ( +0.29%) [ +0.29% +0.00% +0.35% / +0.29% +1.00% +1.00%] index_select perm : Elapsed 0.017 ms (1.707 ms / 100) 1.707 -> 1.710 ( +0.18%) [ +0.00% +0.35% +0.12% / +0.18% +0.88% +0.70%] index_select perm_sorted : Elapsed 0.017 ms (1.707 ms / 100) B = [40, 16, 5, 4] (stride (1, 160, 2560, 40)) A = [40, 16, 20, 4] (stride (1, 40, 2560, 640)) dim = 2 1.897 -> 1.900 ( +0.16%) [ +0.32% +0.21% +0.00% / +0.32% +0.21% +0.16%] index_select const : Elapsed 0.019 ms (1.903 ms / 100) 1.887 -> 1.890 ( +0.16%) [ +0.00% +0.00% +0.16% / +0.16% +0.32% +0.37%] index_select wrap : Elapsed 0.019 ms (1.887 ms / 100) 1.889 -> 1.892 ( +0.16%) [ +0.00% +0.16% +0.21% / +0.16% +0.42% +0.48%] index_select linear : Elapsed 0.019 ms (1.889 ms / 100) 1.892 -> 1.889 ( -0.16%) [ +0.11% +0.32% +0.00% / -0.16% +0.16% +0.32%] index_select reverse : Elapsed 0.019 ms (1.894 ms / 100) 1.899 -> 1.901 ( +0.11%) [ +0.26% +0.16% +0.00% / +0.11% +0.47% +1.11%] index_select skip64 : Elapsed 0.019 ms (1.904 ms / 100) 1.891 -> 1.890 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.48% +0.48%] index_select skip256 : Elapsed 0.019 ms (1.892 ms / 100) 1.894 -> 1.900 ( +0.32%) [ +0.16% +0.00% +0.16% / +0.32% +0.32% +0.42%] index_select spread : Elapsed 0.019 ms (1.897 ms / 100) 1.902 -> 1.903 ( +0.05%) [ +0.00% +0.16% +0.00% / +0.05% +0.21% +0.21%] index_select strided 3 : Elapsed 0.019 ms (1.902 ms / 100) 1.882 -> 1.880 ( -0.11%) [ +0.05% +0.00% +0.05% / -0.11% +0.53% +0.53%] index_select strided 5 : Elapsed 0.019 ms (1.883 ms / 100) 1.884 -> 1.886 ( +0.11%) [ +0.32% +0.16% +0.00% / +0.11% +0.58% +0.53%] index_select strided 7 : Elapsed 0.019 ms (1.890 ms / 100) 1.894 -> 1.891 ( -0.16%) [ +0.00% +0.05% +0.00% / -0.16% +0.69% +0.58%] index_select strided 8 : Elapsed 0.019 ms (1.894 ms / 100) 1.890 -> 1.892 ( +0.11%) [ +0.26% +0.00% +0.05% / +0.11% +0.42% +0.48%] index_select strided 16 : Elapsed 0.019 ms (1.895 ms / 100) 1.900 -> 1.905 ( +0.26%) [ +0.11% +0.00% +0.05% / +0.26% +0.68% +0.63%] index_select random : Elapsed 0.019 ms (1.902 ms / 100) 1.901 -> 1.902 ( +0.05%) [ +0.00% +0.16% +0.00% / +0.05% +0.37% +0.37%] index_select random_sorted : Elapsed 0.019 ms (1.901 ms / 100) 1.878 -> 1.879 ( +0.05%) [ +0.11% +0.00% +0.00% / +0.05% +0.69% +0.64%] index_select perm : Elapsed 0.019 ms (1.880 ms / 100) 1.899 -> 1.901 ( +0.11%) [ +0.00% +0.05% +0.21% / +0.11% +0.42% +0.37%] index_select perm_sorted : Elapsed 0.019 ms (1.899 ms / 100) B = [40, 16, 5, 4] (stride (16, 1, 2560, 640)) A = [40, 16, 20, 4] (stride (64, 4, 2560, 1)) dim = 2 0.653 -> 0.654 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +0.92% +0.92%] index_select const : Elapsed 0.007 ms (0.653 ms / 100) 0.659 -> 0.655 ( -0.61%) [ +0.00% +0.46% +0.00% / +0.15% -0.46% -0.61%] index_select wrap : Elapsed 0.007 ms (0.659 ms / 100) 0.659 -> 0.655 ( -0.61%) [ +0.00% +0.15% +0.00% / +0.15% -0.46% -0.61%] index_select linear : Elapsed 0.007 ms (0.659 ms / 100) 0.653 -> 0.654 ( +0.15%) [ +0.15% +0.31% +0.00% / +0.15% +0.61% +0.46%] index_select reverse : Elapsed 0.007 ms (0.654 ms / 100) 0.653 -> 0.653 ( +0.00%) [ +0.31% +0.00% +0.00% / +0.00% +0.61% +0.46%] index_select skip64 : Elapsed 0.007 ms (0.655 ms / 100) 0.653 -> 0.654 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +1.53% +1.07%] index_select skip256 : Elapsed 0.007 ms (0.653 ms / 100) 0.653 -> 0.654 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +1.38% +1.23%] index_select spread : Elapsed 0.007 ms (0.654 ms / 100) 0.655 -> 0.657 ( +0.31%) [ +0.15% +0.15% +0.00% / +0.46% +0.31% +0.46%] index_select strided 3 : Elapsed 0.007 ms (0.656 ms / 100) 0.654 -> 0.655 ( +0.15%) [ +0.15% +0.31% +0.00% / +0.15% +0.46% +0.46%] index_select strided 5 : Elapsed 0.007 ms (0.655 ms / 100) 0.654 -> 0.654 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.76% +0.61%] index_select strided 7 : Elapsed 0.007 ms (0.654 ms / 100) 0.653 -> 0.654 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.46% +16.85%] index_select strided 8 : Elapsed 0.007 ms (0.654 ms / 100) 0.659 -> 0.655 ( -0.61%) [ +0.30% +0.15% +0.00% / +0.15% -0.61% -0.46%] index_select strided 16 : Elapsed 0.007 ms (0.661 ms / 100) 0.659 -> 0.655 ( -0.61%) [ +0.00% +0.15% +0.00% / +0.00% -0.61% -0.61%] index_select random : Elapsed 0.007 ms (0.659 ms / 100) 0.654 -> 0.654 ( +0.00%) [ +1.07% +0.15% +0.00% / +0.00% +0.61% +0.46%] index_select random_sorted : Elapsed 0.007 ms (0.661 ms / 100) 0.654 -> 0.656 ( +0.31%) [ +0.15% +0.15% +0.00% / +0.31% +0.46% +0.61%] index_select perm : Elapsed 0.007 ms (0.655 ms / 100) 0.653 -> 0.655 ( +0.31%) [ +0.15% +0.15% +0.00% / +0.31% +1.38% +1.38%] index_select perm_sorted : Elapsed 0.007 ms (0.654 ms / 100) out_shape = [40, 16, 20, 5] in_shape = [40, 16, 20, 4] idx_dim = 3 B = [40, 16, 20, 5] (stride (1600, 100, 5, 1)) A = [40, 16, 20, 4] (stride (1280, 1, 16, 320)) dim = 3 5.491 -> 5.478 ( -0.24%) [ +0.00% +0.07% +0.09% / +0.07% -0.16% -0.24%] index_add_ linear : Elapsed 0.055 ms (5.491 ms / 100) 5.464 -> 5.434 ( -0.55%) [ +0.00% +0.02% +0.15% / +0.13% -0.55% -0.51%] index_copy_ linear : Elapsed 0.055 ms (5.464 ms / 100) 5.486 -> 5.482 ( -0.07%) [ +0.15% +0.00% +0.20% / +0.29% +0.00% -0.07%] index_add_ reverse : Elapsed 0.055 ms (5.494 ms / 100) 5.464 -> 5.437 ( -0.49%) [ +0.00% +0.00% +0.07% / +0.11% -0.49% -0.33%] index_copy_ reverse : Elapsed 0.055 ms (5.464 ms / 100) 5.486 -> 5.485 ( -0.02%) [ +0.09% +0.00% +0.26% / +0.16% -0.02% +0.00%] index_add_ spread : Elapsed 0.055 ms (5.491 ms / 100) 5.462 -> 5.434 ( -0.51%) [ +0.02% +0.00% +0.04% / +0.13% -0.51% -0.35%] index_copy_ spread : Elapsed 0.055 ms (5.463 ms / 100) 5.491 -> 5.477 ( -0.25%) [ +0.00% +0.02% +0.07% / +0.16% -0.25% -0.18%] index_add_ strided 3 : Elapsed 0.055 ms (5.491 ms / 100) 5.467 -> 5.435 ( -0.59%) [ +0.09% +0.00% +0.05% / +0.15% -0.55% -0.59%] index_copy_ strided 3 : Elapsed 0.055 ms (5.472 ms / 100) 5.495 -> 5.475 ( -0.36%) [ +0.00% +0.00% +0.07% / +0.04% -0.36% -0.18%] index_add_ perm : Elapsed 0.055 ms (5.495 ms / 100) 5.464 -> 5.438 ( -0.48%) [ +0.09% +0.00% +0.13% / +0.05% -0.38% -0.48%] index_copy_ perm : Elapsed 0.055 ms (5.469 ms / 100) 5.490 -> 5.478 ( -0.22%) [ +0.11% +0.13% +0.00% / +0.15% -0.22% +0.02%] index_add_ perm_sorted : Elapsed 0.055 ms (5.496 ms / 100) 5.458 -> 5.437 ( -0.38%) [ +0.24% +0.07% +0.00% / +0.15% -0.38% -0.33%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.471 ms / 100) 5.620 -> 5.629 ( +0.16%) [ +0.00% +0.02% +0.12% / +0.21% +0.16% +0.25%] index_select const : Elapsed 0.056 ms (5.620 ms / 100) 5.672 -> 5.657 ( -0.26%) [ +0.02% +0.00% +0.07% / +0.14% -0.18% -0.26%] index_select wrap : Elapsed 0.057 ms (5.673 ms / 100) 5.669 -> 5.653 ( -0.28%) [ +0.00% +0.05% +0.28% / +0.12% -0.28% -0.28%] index_select linear : Elapsed 0.057 ms (5.669 ms / 100) 5.658 -> 5.655 ( -0.05%) [ +0.09% +0.00% +0.25% / +0.21% +0.11% -0.05%] index_select reverse : Elapsed 0.057 ms (5.663 ms / 100) 5.619 -> 5.626 ( +0.12%) [ +0.00% +0.00% +0.21% / +0.12% +0.23% +0.20%] index_select skip64 : Elapsed 0.056 ms (5.619 ms / 100) 5.610 -> 5.623 ( +0.23%) [ +0.00% +0.18% +0.23% / +0.30% +0.39% +0.23%] index_select skip256 : Elapsed 0.056 ms (5.610 ms / 100) 5.654 -> 5.647 ( -0.12%) [ +0.00% +0.11% +0.28% / +0.27% -0.12% -0.11%] index_select spread : Elapsed 0.057 ms (5.654 ms / 100) 5.674 -> 5.653 ( -0.37%) [ +0.05% +0.07% +0.00% / +0.07% -0.30% -0.37%] index_select strided 3 : Elapsed 0.057 ms (5.677 ms / 100) 5.615 -> 5.623 ( +0.14%) [ +0.00% +0.02% +0.27% / +0.14% +0.66% +0.71%] index_select random : Elapsed 0.056 ms (5.615 ms / 100) 5.626 -> 5.633 ( +0.12%) [ +0.09% +0.00% +0.07% / +0.12% +0.48% +0.52%] index_select random_sorted : Elapsed 0.056 ms (5.631 ms / 100) B = [40, 16, 20, 5] (stride (1600, 100, 5, 1)) A = [40, 16, 20, 4] (stride (64, 1, 2560, 16)) dim = 3 5.583 -> 5.593 ( +0.18%) [ +0.00% +0.04% +0.07% / +0.18% +0.39% +0.34%] index_add_ linear : Elapsed 0.056 ms (5.583 ms / 100) 5.561 -> 5.571 ( +0.18%) [ +0.00% +0.14% +0.25% / +0.18% +0.41% +0.18%] index_copy_ linear : Elapsed 0.056 ms (5.561 ms / 100) 5.582 -> 5.588 ( +0.11%) [ +0.00% +0.04% +0.16% / +0.11% +0.54% +0.41%] index_add_ reverse : Elapsed 0.056 ms (5.582 ms / 100) 5.566 -> 5.567 ( +0.02%) [ +0.00% +0.00% +0.16% / +0.02% +0.20% +0.31%] index_copy_ reverse : Elapsed 0.056 ms (5.566 ms / 100) 5.584 -> 5.597 ( +0.23%) [ +0.00% +0.11% +0.13% / +0.23% +0.41% +0.41%] index_add_ spread : Elapsed 0.056 ms (5.584 ms / 100) 5.566 -> 5.567 ( +0.02%) [ +0.07% +0.00% +0.07% / +0.02% +0.23% +0.25%] index_copy_ spread : Elapsed 0.056 ms (5.570 ms / 100) 5.576 -> 5.592 ( +0.29%) [ +0.05% +0.00% +0.25% / +0.29% +0.66% +0.48%] index_add_ strided 3 : Elapsed 0.056 ms (5.579 ms / 100) 5.564 -> 5.576 ( +0.22%) [ +0.00% +0.05% +0.27% / +0.32% +0.23% +0.22%] index_copy_ strided 3 : Elapsed 0.056 ms (5.564 ms / 100) 5.585 -> 5.593 ( +0.14%) [ +0.27% +0.00% +0.00% / +0.14% +0.41% +0.36%] index_add_ perm : Elapsed 0.056 ms (5.600 ms / 100) 5.560 -> 5.570 ( +0.18%) [ +0.14% +0.00% +0.20% / +0.18% +0.36% +0.38%] index_copy_ perm : Elapsed 0.056 ms (5.568 ms / 100) 5.586 -> 5.591 ( +0.09%) [ +0.00% +0.05% +0.14% / +0.09% +0.25% +0.48%] index_add_ perm_sorted : Elapsed 0.056 ms (5.586 ms / 100) 5.561 -> 5.574 ( +0.23%) [ +0.14% +0.00% +0.23% / +0.23% +0.32% +0.36%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.569 ms / 100) 5.700 -> 5.715 ( +0.26%) [ +0.00% +0.09% +0.16% / +0.26% +0.74% +0.84%] index_select const : Elapsed 0.057 ms (5.700 ms / 100) 5.785 -> 5.796 ( +0.19%) [ +0.09% +0.00% +0.16% / +0.19% +0.83% +0.64%] index_select wrap : Elapsed 0.058 ms (5.790 ms / 100) 5.778 -> 5.790 ( +0.21%) [ +0.03% +0.00% +0.22% / +0.21% +0.62% +0.48%] index_select linear : Elapsed 0.058 ms (5.780 ms / 100) 5.756 -> 5.765 ( +0.16%) [ +0.14% +0.00% +0.10% / +0.16% +0.76% +0.75%] index_select reverse : Elapsed 0.058 ms (5.764 ms / 100) 5.704 -> 5.712 ( +0.14%) [ +0.11% +0.00% +0.11% / +0.14% +0.58% +0.60%] index_select skip64 : Elapsed 0.057 ms (5.710 ms / 100) 5.708 -> 5.713 ( +0.09%) [ +0.00% +0.04% +0.19% / +0.09% +0.51% +0.60%] index_select skip256 : Elapsed 0.057 ms (5.708 ms / 100) 5.754 -> 5.772 ( +0.31%) [ +0.12% +0.00% +0.17% / +0.31% +0.80% +0.82%] index_select spread : Elapsed 0.058 ms (5.761 ms / 100) 5.787 -> 5.793 ( +0.10%) [ +0.02% +0.00% +0.12% / +0.10% +0.50% +0.59%] index_select strided 3 : Elapsed 0.058 ms (5.788 ms / 100) 5.746 -> 5.758 ( +0.21%) [ +0.05% +0.00% +0.26% / +0.21% +0.49% +0.68%] index_select random : Elapsed 0.057 ms (5.749 ms / 100) 5.761 -> 5.771 ( +0.17%) [ +0.00% +0.14% +0.21% / +0.17% +0.62% +0.59%] index_select random_sorted : Elapsed 0.058 ms (5.761 ms / 100) B = [40, 16, 20, 5] (stride (5, 4000, 200, 1)) A = [40, 16, 20, 4] (stride (20, 800, 1, 12800)) dim = 3 5.767 -> 5.755 ( -0.21%) [ +0.05% +0.03% +0.00% / +0.02% -0.09% -0.21%] index_add_ linear : Elapsed 0.058 ms (5.770 ms / 100) 5.751 -> 5.733 ( -0.31%) [ +0.00% +0.00% +0.12% / +0.09% -0.24% -0.31%] index_copy_ linear : Elapsed 0.058 ms (5.751 ms / 100) 5.764 -> 5.758 ( -0.10%) [ +0.12% +0.17% +0.00% / +0.10% -0.10% -0.10%] index_add_ reverse : Elapsed 0.058 ms (5.771 ms / 100) 5.748 -> 5.737 ( -0.19%) [ +0.10% +0.02% +0.00% / +0.24% -0.17% -0.19%] index_copy_ reverse : Elapsed 0.058 ms (5.754 ms / 100) 5.765 -> 5.753 ( -0.21%) [ +0.07% +0.09% +0.00% / +0.16% -0.21% -0.14%] index_add_ spread : Elapsed 0.058 ms (5.769 ms / 100) 5.746 -> 5.728 ( -0.31%) [ +0.09% +0.00% +0.00% / +0.03% -0.31% -0.24%] index_copy_ spread : Elapsed 0.058 ms (5.751 ms / 100) 5.757 -> 5.750 ( -0.12%) [ +0.10% +0.00% +0.28% / +0.19% -0.10% -0.12%] index_add_ strided 3 : Elapsed 0.058 ms (5.763 ms / 100) 5.742 -> 5.727 ( -0.26%) [ +0.00% +0.05% +0.03% / +0.03% -0.14% -0.26%] index_copy_ strided 3 : Elapsed 0.057 ms (5.742 ms / 100) 5.756 -> 5.753 ( -0.05%) [ +0.00% +0.05% +0.07% / +0.23% -0.05% -0.05%] index_add_ perm : Elapsed 0.058 ms (5.756 ms / 100) 5.738 -> 5.724 ( -0.24%) [ +0.14% +0.00% +0.12% / +0.09% -0.17% -0.24%] index_copy_ perm : Elapsed 0.057 ms (5.746 ms / 100) 5.769 -> 5.759 ( -0.17%) [ +0.02% +0.03% +0.00% / +0.14% -0.12% -0.17%] index_add_ perm_sorted : Elapsed 0.058 ms (5.770 ms / 100) 5.746 -> 5.731 ( -0.26%) [ +0.09% +0.00% +0.03% / +0.02% -0.14% -0.26%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.751 ms / 100) 5.868 -> 5.872 ( +0.07%) [ +0.00% +0.09% +0.03% / +0.07% +0.29% +0.31%] index_select const : Elapsed 0.059 ms (5.868 ms / 100) 5.965 -> 5.961 ( -0.07%) [ +0.12% +0.00% +0.15% / +0.27% -0.07% -0.02%] index_select wrap : Elapsed 0.060 ms (5.972 ms / 100) 5.958 -> 5.955 ( -0.05%) [ +0.00% +0.08% +0.12% / +0.05% -0.03% -0.05%] index_select linear : Elapsed 0.060 ms (5.958 ms / 100) 5.962 -> 5.955 ( -0.12%) [ +0.02% +0.00% +0.15% / +0.13% -0.03% -0.12%] index_select reverse : Elapsed 0.060 ms (5.963 ms / 100) 5.864 -> 5.864 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.17% +0.27%] index_select skip64 : Elapsed 0.059 ms (5.864 ms / 100) 5.869 -> 5.876 ( +0.12%) [ +0.02% +0.00% +0.03% / +0.12% +0.27% +0.20%] index_select skip256 : Elapsed 0.059 ms (5.870 ms / 100) 5.960 -> 5.951 ( -0.15%) [ +0.02% +0.00% +0.02% / +0.05% -0.15% -0.05%] index_select spread : Elapsed 0.060 ms (5.961 ms / 100) 5.961 -> 5.948 ( -0.22%) [ +0.00% +0.00% +0.07% / +0.00% -0.13% -0.22%] index_select strided 3 : Elapsed 0.060 ms (5.961 ms / 100) 5.913 -> 5.904 ( -0.15%) [ +0.07% +0.00% +0.07% / +0.08% -0.02% -0.15%] index_select random : Elapsed 0.059 ms (5.917 ms / 100) 5.924 -> 5.908 ( -0.27%) [ +0.05% +0.00% +0.02% / +0.10% -0.27% -0.15%] index_select random_sorted : Elapsed 0.059 ms (5.927 ms / 100) B = [40, 16, 20, 5] (stride (5, 200, 3200, 1)) A = [40, 16, 20, 4] (stride (320, 1, 16, 12800)) dim = 3 6.012 -> 6.009 ( -0.05%) [ +0.00% +0.00% +0.03% / +0.07% +0.07% -0.05%] index_add_ linear : Elapsed 0.060 ms (6.012 ms / 100) 5.982 -> 5.960 ( -0.37%) [ +0.05% +0.00% +0.08% / +0.07% -0.35% -0.37%] index_copy_ linear : Elapsed 0.060 ms (5.985 ms / 100) 6.012 -> 6.006 ( -0.10%) [ +0.00% +0.15% +0.03% / +0.13% -0.03% -0.10%] index_add_ reverse : Elapsed 0.060 ms (6.012 ms / 100) 5.983 -> 5.961 ( -0.37%) [ +0.05% +0.03% +0.00% / +0.02% -0.30% -0.37%] index_copy_ reverse : Elapsed 0.060 ms (5.986 ms / 100) 6.011 -> 6.011 ( +0.00%) [ +0.03% +0.00% +0.13% / +0.15% +0.00% +0.05%] index_add_ spread : Elapsed 0.060 ms (6.013 ms / 100) 5.985 -> 5.962 ( -0.38%) [ +0.00% +0.00% +0.02% / +0.10% -0.38% -0.37%] index_copy_ spread : Elapsed 0.060 ms (5.985 ms / 100) 6.011 -> 6.006 ( -0.08%) [ +0.00% +0.05% +0.17% / +0.15% +0.00% -0.08%] index_add_ strided 3 : Elapsed 0.060 ms (6.011 ms / 100) 5.975 -> 5.960 ( -0.25%) [ +0.00% +0.12% +0.30% / +0.22% -0.25% -0.17%] index_copy_ strided 3 : Elapsed 0.060 ms (5.975 ms / 100) 6.013 -> 6.010 ( -0.05%) [ +0.00% +0.03% +0.08% / +0.12% -0.03% -0.05%] index_add_ perm : Elapsed 0.060 ms (6.013 ms / 100) 5.977 -> 5.958 ( -0.32%) [ +0.00% +0.08% +0.07% / +0.20% -0.32% -0.30%] index_copy_ perm : Elapsed 0.060 ms (5.977 ms / 100) 6.014 -> 6.010 ( -0.07%) [ +0.15% +0.00% +0.12% / +0.12% -0.05% -0.07%] index_add_ perm_sorted : Elapsed 0.060 ms (6.023 ms / 100) 5.978 -> 5.963 ( -0.25%) [ +0.00% +0.12% +0.03% / +0.07% -0.25% -0.17%] index_copy_ perm_sorted : Elapsed 0.060 ms (5.978 ms / 100) 6.233 -> 6.221 ( -0.19%) [ +0.03% +0.00% +0.10% / +0.02% -0.19% -0.18%] index_select const : Elapsed 0.062 ms (6.235 ms / 100) 6.282 -> 6.262 ( -0.32%) [ +0.00% +0.02% +0.00% / +0.05% -0.32% -0.32%] index_select wrap : Elapsed 0.063 ms (6.282 ms / 100) 6.272 -> 6.252 ( -0.32%) [ +0.05% +0.00% +0.00% / +0.00% -0.32% -0.29%] index_select linear : Elapsed 0.063 ms (6.275 ms / 100) 6.268 -> 6.262 ( -0.10%) [ +0.00% +0.02% +0.05% / +0.13% -0.10% -0.08%] index_select reverse : Elapsed 0.063 ms (6.268 ms / 100) 6.232 -> 6.222 ( -0.16%) [ +0.03% +0.11% +0.00% / +0.13% -0.16% -0.10%] index_select skip64 : Elapsed 0.062 ms (6.234 ms / 100) 6.231 -> 6.217 ( -0.22%) [ +0.00% +0.13% +0.05% / +0.16% -0.13% -0.22%] index_select skip256 : Elapsed 0.062 ms (6.231 ms / 100) 6.273 -> 6.254 ( -0.30%) [ +0.00% +0.02% +0.08% / +0.11% -0.29% -0.30%] index_select spread : Elapsed 0.063 ms (6.273 ms / 100) 6.272 -> 6.258 ( -0.22%) [ +0.00% +0.03% +0.05% / +0.24% -0.21% -0.22%] index_select strided 3 : Elapsed 0.063 ms (6.272 ms / 100) 6.270 -> 6.254 ( -0.26%) [ +0.00% +0.02% +0.10% / +0.03% -0.26% -0.22%] index_select random : Elapsed 0.063 ms (6.270 ms / 100) 6.253 -> 6.239 ( -0.22%) [ +0.14% +0.00% +0.08% / +0.19% -0.22% -0.06%] index_select random_sorted : Elapsed 0.063 ms (6.262 ms / 100) B = [40, 16, 20, 5] (stride (1, 40, 3200, 640)) A = [40, 16, 20, 4] (stride (1, 3200, 160, 40)) dim = 3 5.634 -> 5.629 ( -0.09%) [ +0.00% +0.16% +0.20% / +0.04% -0.09% -0.07%] index_add_ linear : Elapsed 0.056 ms (5.634 ms / 100) 5.566 -> 5.560 ( -0.11%) [ +0.00% +0.13% +0.09% / +0.14% -0.11% -0.07%] index_copy_ linear : Elapsed 0.056 ms (5.566 ms / 100) 5.643 -> 5.611 ( -0.57%) [ +0.04% +0.00% +0.04% / +0.05% -0.46% -0.57%] index_add_ reverse : Elapsed 0.056 ms (5.645 ms / 100) 5.575 -> 5.546 ( -0.52%) [ +0.00% +0.02% +0.16% / +0.13% -0.32% -0.52%] index_copy_ reverse : Elapsed 0.056 ms (5.575 ms / 100) 5.629 -> 5.625 ( -0.07%) [ +0.09% +0.18% +0.00% / +0.18% +0.09% -0.07%] index_add_ spread : Elapsed 0.056 ms (5.634 ms / 100) 5.563 -> 5.561 ( -0.04%) [ +0.00% +0.18% +0.16% / +0.20% +0.00% -0.04%] index_copy_ spread : Elapsed 0.056 ms (5.563 ms / 100) 5.631 -> 5.620 ( -0.20%) [ +0.07% +0.00% +0.16% / +0.00% -0.09% -0.20%] index_add_ strided 3 : Elapsed 0.056 ms (5.635 ms / 100) 5.567 -> 5.566 ( -0.02%) [ +0.13% +0.00% +0.11% / +0.02% +0.07% -0.02%] index_copy_ strided 3 : Elapsed 0.056 ms (5.574 ms / 100) 5.641 -> 5.630 ( -0.20%) [ +0.05% +0.00% +0.07% / +0.27% -0.20% -0.20%] index_add_ perm : Elapsed 0.056 ms (5.644 ms / 100) 5.576 -> 5.561 ( -0.27%) [ +0.16% +0.00% +0.16% / +0.14% -0.22% -0.27%] index_copy_ perm : Elapsed 0.056 ms (5.585 ms / 100) 5.626 -> 5.621 ( -0.09%) [ +0.16% +0.12% +0.00% / +0.16% +0.02% -0.09%] index_add_ perm_sorted : Elapsed 0.056 ms (5.635 ms / 100) 5.576 -> 5.559 ( -0.30%) [ +0.02% +0.05% +0.00% / +0.09% -0.14% -0.30%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.577 ms / 100) 5.850 -> 5.856 ( +0.10%) [ +0.10% +0.00% +0.24% / +0.24% +0.22% +0.10%] index_select const : Elapsed 0.059 ms (5.856 ms / 100) 5.911 -> 5.895 ( -0.27%) [ +0.10% +0.00% +0.14% / +0.07% -0.20% -0.27%] index_select wrap : Elapsed 0.059 ms (5.917 ms / 100) 5.900 -> 5.891 ( -0.15%) [ +0.00% +0.14% +0.10% / +0.07% -0.10% -0.15%] index_select linear : Elapsed 0.059 ms (5.900 ms / 100) 5.901 -> 5.888 ( -0.22%) [ +0.00% +0.02% +0.07% / +0.12% -0.17% -0.22%] index_select reverse : Elapsed 0.059 ms (5.901 ms / 100) 5.854 -> 5.852 ( -0.03%) [ +0.07% +0.00% +0.12% / -0.03% -0.02% +0.02%] index_select skip64 : Elapsed 0.059 ms (5.858 ms / 100) 5.854 -> 5.856 ( +0.03%) [ +0.00% +0.09% +0.14% / +0.10% +0.07% +0.03%] index_select skip256 : Elapsed 0.059 ms (5.854 ms / 100) 5.900 -> 5.887 ( -0.22%) [ +0.05% +0.02% +0.00% / +0.02% -0.17% -0.22%] index_select spread : Elapsed 0.059 ms (5.903 ms / 100) 5.914 -> 5.893 ( -0.36%) [ +0.03% +0.12% +0.00% / -0.07% -0.30% -0.36%] index_select strided 3 : Elapsed 0.059 ms (5.916 ms / 100) 5.904 -> 5.882 ( -0.37%) [ +0.05% +0.00% +0.03% / +0.08% -0.37% -0.30%] index_select random : Elapsed 0.059 ms (5.907 ms / 100) 5.885 -> 5.870 ( -0.25%) [ +0.17% +0.19% +0.00% / +0.05% -0.12% -0.25%] index_select random_sorted : Elapsed 0.059 ms (5.895 ms / 100) out_shape = [5, 20, 4, 16] in_shape = [40, 20, 4, 16] idx_dim = 0 B = [5, 20, 4, 16] (stride (1280, 64, 1, 4)) A = [40, 20, 4, 16] (stride (320, 16, 12800, 1)) dim = 0 1.478 -> 1.478 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.34% +0.27%] index_select const : Elapsed 0.015 ms (1.479 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.47% +0.47%] index_select wrap : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.41% +0.47%] index_select linear : Elapsed 0.015 ms (1.478 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.61% +0.41%] index_select reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.41% +0.47%] index_select skip64 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.54% +0.47%] index_select skip256 : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.47% +0.47%] index_select spread : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.47%] index_select strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.20% +0.07% +0.00% / +0.14% +0.61% +0.54%] index_select strided 5 : Elapsed 0.015 ms (1.478 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.61% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.54% +0.61%] index_select strided 8 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.20% +0.07% / +0.00% +0.61% +0.61%] index_select strided 16 : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.54%] index_select random : Elapsed 0.015 ms (1.476 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.27% +0.00% / +0.00% +0.54% +0.47%] index_select random_sorted : Elapsed 0.015 ms (1.477 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.54%] index_select perm : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) B = [5, 20, 4, 16] (stride (1280, 16, 320, 1)) A = [40, 20, 4, 16] (stride (80, 1, 20, 3200)) dim = 0 1.608 -> 1.608 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_select const : Elapsed 0.016 ms (1.609 ms / 100) 1.573 -> 1.574 ( +0.06%) [ +0.25% +0.06% +0.00% / +0.06% +0.38% +0.45%] index_select wrap : Elapsed 0.016 ms (1.577 ms / 100) 1.575 -> 1.579 ( +0.25%) [ +0.06% +0.19% +0.00% / +0.25% +0.32% +0.32%] index_select linear : Elapsed 0.016 ms (1.576 ms / 100) 1.575 -> 1.578 ( +0.19%) [ +0.19% +0.00% +0.32% / +0.25% +0.25% +0.19%] index_select reverse : Elapsed 0.016 ms (1.578 ms / 100) 1.606 -> 1.607 ( +0.06%) [ +0.12% +0.00% +0.00% / +0.06% +0.62% +0.56%] index_select skip64 : Elapsed 0.016 ms (1.608 ms / 100) 1.606 -> 1.609 ( +0.19%) [ +0.12% +0.12% +0.00% / +0.19% +0.68% +0.68%] index_select skip256 : Elapsed 0.016 ms (1.608 ms / 100) 1.597 -> 1.598 ( +0.06%) [ +0.13% +0.00% +0.00% / +0.06% +0.69% +0.69%] index_select spread : Elapsed 0.016 ms (1.599 ms / 100) 1.582 -> 1.582 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.70% +0.88%] index_select strided 3 : Elapsed 0.016 ms (1.583 ms / 100) 1.579 -> 1.581 ( +0.13%) [ +0.06% +0.00% +0.00% / +0.13% +0.70% +0.76%] index_select strided 5 : Elapsed 0.016 ms (1.580 ms / 100) 1.576 -> 1.578 ( +0.13%) [ +0.25% +0.19% +0.00% / +0.13% +0.19% +0.25%] index_select strided 7 : Elapsed 0.016 ms (1.580 ms / 100) 1.601 -> 1.602 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.75% +0.81%] index_select strided 8 : Elapsed 0.016 ms (1.603 ms / 100) 1.607 -> 1.607 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.56% +0.56%] index_select strided 16 : Elapsed 0.016 ms (1.608 ms / 100) 1.595 -> 1.594 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.56% +0.56%] index_select random : Elapsed 0.016 ms (1.595 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.25% +0.45% +0.00% / +0.13% +0.25% +0.38%] index_select random_sorted : Elapsed 0.016 ms (1.577 ms / 100) 1.573 -> 1.576 ( +0.19%) [ +0.00% +0.25% +0.13% / +0.19% +0.45% +0.38%] index_select perm : Elapsed 0.016 ms (1.573 ms / 100) 1.580 -> 1.580 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.76% +0.63%] index_select perm_sorted : Elapsed 0.016 ms (1.581 ms / 100) B = [5, 20, 4, 16] (stride (1280, 1, 320, 20)) A = [40, 20, 4, 16] (stride (16, 640, 12800, 1)) dim = 0 1.380 -> 1.383 ( +0.22%) [ +0.14% +0.14% +0.00% / +0.29% +0.29% +0.22%] index_select const : Elapsed 0.014 ms (1.382 ms / 100) 1.382 -> 1.381 ( -0.07%) [ +0.14% +0.00% +0.00% / -0.07% +0.14% +0.36%] index_select wrap : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.14% +0.22%] index_select linear : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.36% +0.15% +0.00% / +0.07% +0.44% +0.36%] index_select reverse : Elapsed 0.014 ms (1.384 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.22% +0.00% +0.07% / +0.07% +0.29% +0.29%] index_select skip64 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.07% +0.36% +0.00% / +0.29% +0.29% +0.22%] index_select skip256 : Elapsed 0.014 ms (1.380 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.44% +0.44%] index_select spread : Elapsed 0.014 ms (1.380 ms / 100) 1.377 -> 1.383 ( +0.44%) [ +0.15% +0.44% +0.00% / +0.44% +0.58% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.379 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.22% +0.15% +0.00% / +0.07% +0.51% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.381 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.44% +0.44%] index_select strided 7 : Elapsed 0.014 ms (1.379 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.07% +0.14% / +0.00% +0.29% +0.36%] index_select strided 8 : Elapsed 0.014 ms (1.380 ms / 100) 1.377 -> 1.382 ( +0.36%) [ +0.36% +0.07% +0.00% / +0.36% +0.58% +0.58%] index_select strided 16 : Elapsed 0.014 ms (1.382 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.73% +0.58%] index_select random : Elapsed 0.014 ms (1.378 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.58% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.378 ms / 100) 1.378 -> 1.381 ( +0.22%) [ +0.07% +0.15% +0.00% / +0.22% +0.44% +0.44%] index_select perm : Elapsed 0.014 ms (1.379 ms / 100) 1.377 -> 1.377 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.51% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.377 ms / 100) B = [5, 20, 4, 16] (stride (1280, 4, 1, 80)) A = [40, 20, 4, 16] (stride (80, 4, 1, 3200)) dim = 0 0.661 -> 0.661 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.91% +0.91%] index_select const : Elapsed 0.007 ms (0.661 ms / 100) 0.664 -> 0.664 ( +0.00%) [ +0.15% +0.60% +0.00% / +0.00% +0.45% +0.60%] index_select wrap : Elapsed 0.007 ms (0.665 ms / 100) 0.670 -> 0.668 ( -0.30%) [ +0.00% +0.00% +0.00% / -0.30% +0.90% +0.60%] index_select linear : Elapsed 0.007 ms (0.670 ms / 100) 0.661 -> 0.667 ( +0.91%) [ +0.00% +0.15% +0.00% / +1.06% +1.06% +0.91%] index_select reverse : Elapsed 0.007 ms (0.661 ms / 100) 0.662 -> 0.663 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.60% +0.76%] index_select skip64 : Elapsed 0.007 ms (0.662 ms / 100) 0.661 -> 0.662 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.76% +0.91%] index_select skip256 : Elapsed 0.007 ms (0.662 ms / 100) 0.663 -> 0.663 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.60% +0.75%] index_select spread : Elapsed 0.007 ms (0.663 ms / 100) 0.666 -> 0.667 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.75% +0.45%] index_select strided 3 : Elapsed 0.007 ms (0.667 ms / 100) 0.666 -> 0.667 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +0.45% +0.60%] index_select strided 5 : Elapsed 0.007 ms (0.666 ms / 100) 0.661 -> 0.661 ( +0.00%) [ +0.00% +0.30% +0.15% / +0.00% +1.06% +1.21%] index_select strided 7 : Elapsed 0.007 ms (0.661 ms / 100) 0.666 -> 0.668 ( +0.30%) [ +0.15% +0.00% +0.00% / +0.30% +0.90% +1.05%] index_select strided 8 : Elapsed 0.007 ms (0.667 ms / 100) 0.661 -> 0.664 ( +0.45%) [ +0.30% +0.30% +0.00% / +0.45% +0.91% +0.91%] index_select strided 16 : Elapsed 0.007 ms (0.663 ms / 100) 0.671 -> 0.671 ( +0.00%) [ +0.30% +0.00% +0.15% / +0.00% +1.04% +0.89%] index_select random : Elapsed 0.007 ms (0.673 ms / 100) 0.660 -> 0.659 ( -0.15%) [ +0.15% +0.15% +0.00% / -0.15% +0.91% +1.06%] index_select random_sorted : Elapsed 0.007 ms (0.661 ms / 100) 0.662 -> 0.663 ( +0.15%) [ +0.15% +0.30% +0.00% / +0.15% +0.60% +0.60%] index_select perm : Elapsed 0.007 ms (0.663 ms / 100) 0.660 -> 0.660 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +1.21% +1.06%] index_select perm_sorted : Elapsed 0.007 ms (0.660 ms / 100) B = [5, 20, 4, 16] (stride (16, 320, 80, 1)) A = [40, 20, 4, 16] (stride (1280, 1, 20, 80)) dim = 0 1.463 -> 1.462 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.34% +0.34%] index_select const : Elapsed 0.015 ms (1.464 ms / 100) 1.482 -> 1.481 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.47% +0.40%] index_select wrap : Elapsed 0.015 ms (1.483 ms / 100) 1.466 -> 1.467 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +0.41% +0.41%] index_select linear : Elapsed 0.015 ms (1.467 ms / 100) 1.465 -> 1.467 ( +0.14%) [ +0.14% +0.00% +0.07% / +0.14% +0.48% +0.48%] index_select reverse : Elapsed 0.015 ms (1.467 ms / 100) 1.462 -> 1.464 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.48% +0.48%] index_select skip64 : Elapsed 0.015 ms (1.463 ms / 100) 1.463 -> 1.463 ( +0.00%) [ +0.21% +0.00% +0.00% / +0.00% +0.55% +0.48%] index_select skip256 : Elapsed 0.015 ms (1.466 ms / 100) 1.460 -> 1.460 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.55% +0.62%] index_select spread : Elapsed 0.015 ms (1.460 ms / 100) 1.459 -> 1.459 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.55% +0.48%] index_select strided 3 : Elapsed 0.015 ms (1.459 ms / 100) 1.463 -> 1.463 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.41% +0.34%] index_select strided 5 : Elapsed 0.015 ms (1.463 ms / 100) 1.461 -> 1.462 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.48% +0.55%] index_select strided 7 : Elapsed 0.015 ms (1.461 ms / 100) 1.468 -> 1.468 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.61% +0.54%] index_select strided 8 : Elapsed 0.015 ms (1.469 ms / 100) 1.459 -> 1.460 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.69% +0.62%] index_select strided 16 : Elapsed 0.015 ms (1.460 ms / 100) 1.464 -> 1.463 ( -0.07%) [ +0.00% +0.14% +0.00% / -0.07% +0.55% +0.55%] index_select random : Elapsed 0.015 ms (1.464 ms / 100) 1.464 -> 1.463 ( -0.07%) [ +0.07% +0.34% +0.00% / -0.07% +0.55% +0.48%] index_select random_sorted : Elapsed 0.015 ms (1.465 ms / 100) 1.463 -> 1.464 ( +0.07%) [ +0.07% +0.21% +0.00% / +0.07% +0.62% +0.62%] index_select perm : Elapsed 0.015 ms (1.464 ms / 100) 1.462 -> 1.463 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.55% +0.55%] index_select perm_sorted : Elapsed 0.015 ms (1.463 ms / 100) B = [5, 20, 4, 16] (stride (16, 320, 80, 1)) A = [40, 20, 4, 16] (stride (1, 2560, 640, 40)) dim = 0 1.279 -> 1.279 ( +0.00%) [ +0.23% +0.00% +0.00% / +0.00% +0.86% +0.78%] index_select const : Elapsed 0.013 ms (1.282 ms / 100) 1.268 -> 1.276 ( +0.63%) [ +0.00% +0.00% +0.24% / +0.63% +0.79% +1.34%] index_select wrap : Elapsed 0.013 ms (1.268 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.08% +0.23% +0.00% / +0.00% +0.55% +0.62%] index_select linear : Elapsed 0.013 ms (1.282 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.70% +0.78%] index_select reverse : Elapsed 0.013 ms (1.280 ms / 100) 1.273 -> 1.277 ( +0.31%) [ +0.08% +0.31% +0.00% / +0.31% +0.31% +0.39%] index_select skip64 : Elapsed 0.013 ms (1.274 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.86% +0.86%] index_select skip256 : Elapsed 0.013 ms (1.281 ms / 100) 1.261 -> 1.263 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.71% +0.63%] index_select spread : Elapsed 0.013 ms (1.263 ms / 100) 1.245 -> 1.245 ( +0.00%) [ +0.00% +0.24% +0.08% / +0.00% +0.80% +0.48%] index_select strided 3 : Elapsed 0.012 ms (1.245 ms / 100) 1.267 -> 1.269 ( +0.16%) [ +0.00% +0.32% +0.16% / +0.16% +0.55% +0.79%] index_select strided 5 : Elapsed 0.013 ms (1.267 ms / 100) 1.251 -> 1.252 ( +0.08%) [ +0.00% +0.32% +0.08% / +0.08% +0.64% +0.64%] index_select strided 7 : Elapsed 0.013 ms (1.251 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.63% +0.71%] index_select strided 8 : Elapsed 0.013 ms (1.271 ms / 100) 1.251 -> 1.252 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.80% +0.80%] index_select strided 16 : Elapsed 0.013 ms (1.254 ms / 100) 1.271 -> 1.264 ( -0.55%) [ +0.16% +0.00% +0.00% / -0.55% +0.55% +0.55%] index_select random : Elapsed 0.013 ms (1.273 ms / 100) 1.243 -> 1.243 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.80% +0.80%] index_select random_sorted : Elapsed 0.012 ms (1.243 ms / 100) 1.244 -> 1.245 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.72% +0.88%] index_select perm : Elapsed 0.012 ms (1.245 ms / 100) 1.257 -> 1.257 ( +0.00%) [ +0.24% +0.00% +0.24% / +0.00% +0.80% +0.88%] index_select perm_sorted : Elapsed 0.013 ms (1.260 ms / 100) B = [5, 20, 4, 16] (stride (4, 320, 1, 20)) A = [40, 20, 4, 16] (stride (1, 2560, 40, 160)) dim = 0 1.613 -> 1.612 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.43% +0.37%] index_select const : Elapsed 0.016 ms (1.613 ms / 100) 1.611 -> 1.611 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.37% +0.43%] index_select wrap : Elapsed 0.016 ms (1.612 ms / 100) 1.612 -> 1.615 ( +0.19%) [ +0.00% +0.00% +0.06% / +0.19% +0.43% +0.31%] index_select linear : Elapsed 0.016 ms (1.612 ms / 100) 1.611 -> 1.612 ( +0.06%) [ +0.00% +0.12% +0.12% / +0.06% +0.50% +0.50%] index_select reverse : Elapsed 0.016 ms (1.611 ms / 100) 1.609 -> 1.608 ( -0.06%) [ +0.19% +0.12% +0.00% / -0.06% +0.44% +0.56%] index_select skip64 : Elapsed 0.016 ms (1.612 ms / 100) 1.611 -> 1.613 ( +0.12%) [ +0.12% +0.19% +0.00% / +0.12% +0.56% +0.56%] index_select skip256 : Elapsed 0.016 ms (1.613 ms / 100) 1.600 -> 1.602 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.50% +0.56%] index_select spread : Elapsed 0.016 ms (1.600 ms / 100) 1.604 -> 1.606 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.44% +0.44%] index_select strided 3 : Elapsed 0.016 ms (1.606 ms / 100) 1.599 -> 1.603 ( +0.25%) [ +0.25% +0.13% +0.00% / +0.25% +0.63% +0.69%] index_select strided 5 : Elapsed 0.016 ms (1.603 ms / 100) 1.601 -> 1.601 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.44% +0.50%] index_select strided 7 : Elapsed 0.016 ms (1.602 ms / 100) 1.613 -> 1.614 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.50% +0.43%] index_select strided 8 : Elapsed 0.016 ms (1.614 ms / 100) 1.594 -> 1.600 ( +0.38%) [ +0.00% +0.31% +0.25% / +0.38% +0.75% +1.00%] index_select strided 16 : Elapsed 0.016 ms (1.594 ms / 100) 1.601 -> 1.602 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.69% +0.69%] index_select random : Elapsed 0.016 ms (1.603 ms / 100) 1.603 -> 1.607 ( +0.25%) [ +0.12% +0.06% +0.00% / +0.25% +0.69% +0.62%] index_select random_sorted : Elapsed 0.016 ms (1.605 ms / 100) 1.609 -> 1.610 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.56% +0.62%] index_select perm : Elapsed 0.016 ms (1.610 ms / 100) 1.597 -> 1.600 ( +0.19%) [ +0.25% +0.13% +0.00% / +0.19% +0.81% +0.75%] index_select perm_sorted : Elapsed 0.016 ms (1.601 ms / 100) B = [5, 20, 4, 16] (stride (16, 80, 1600, 1)) A = [40, 20, 4, 16] (stride (80, 4, 1, 3200)) dim = 0 1.464 -> 1.464 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.55%] index_select const : Elapsed 0.015 ms (1.465 ms / 100) 1.433 -> 1.434 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.63% +0.56%] index_select wrap : Elapsed 0.014 ms (1.434 ms / 100) 1.436 -> 1.436 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.63%] index_select linear : Elapsed 0.014 ms (1.436 ms / 100) 1.423 -> 1.424 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.63% +0.56%] index_select reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.21% +0.00% / +0.00% +0.77% +0.70%] index_select skip64 : Elapsed 0.014 ms (1.423 ms / 100) 1.462 -> 1.464 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.82% +0.75%] index_select skip256 : Elapsed 0.015 ms (1.464 ms / 100) 1.426 -> 1.427 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.84% +0.91%] index_select spread : Elapsed 0.014 ms (1.427 ms / 100) 1.437 -> 1.438 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.84% +0.84%] index_select strided 3 : Elapsed 0.014 ms (1.438 ms / 100) 1.443 -> 1.444 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.83% +0.76%] index_select strided 5 : Elapsed 0.014 ms (1.444 ms / 100) 1.437 -> 1.439 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.77% +0.84%] index_select strided 7 : Elapsed 0.014 ms (1.438 ms / 100) 1.463 -> 1.463 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.68% +0.68%] index_select strided 8 : Elapsed 0.015 ms (1.463 ms / 100) 1.444 -> 1.446 ( +0.14%) [ +0.21% +0.00% +0.00% / +0.14% +0.62% +0.76%] index_select strided 16 : Elapsed 0.014 ms (1.447 ms / 100) 1.437 -> 1.438 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.90% +0.90%] index_select random : Elapsed 0.014 ms (1.437 ms / 100) 1.434 -> 1.436 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.70% +0.63%] index_select random_sorted : Elapsed 0.014 ms (1.436 ms / 100) 1.416 -> 1.417 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.71% +0.64%] index_select perm : Elapsed 0.014 ms (1.416 ms / 100) 1.436 -> 1.436 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.91% +0.77%] index_select perm_sorted : Elapsed 0.014 ms (1.438 ms / 100) B = [5, 20, 4, 16] (stride (20, 1, 1600, 100)) A = [40, 20, 4, 16] (stride (20, 1, 12800, 800)) dim = 0 0.674 -> 0.677 ( +0.45%) [ +0.15% +0.30% +0.00% / +0.45% +0.45% +0.59%] index_select const : Elapsed 0.007 ms (0.675 ms / 100) 0.667 -> 0.667 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.15% +0.30%] index_select wrap : Elapsed 0.007 ms (0.667 ms / 100) 0.671 -> 0.671 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.30% +0.45%] index_select linear : Elapsed 0.007 ms (0.671 ms / 100) 0.669 -> 0.671 ( +0.30%) [ +0.15% +0.00% +0.00% / +0.30% +0.30% +0.75%] index_select reverse : Elapsed 0.007 ms (0.670 ms / 100) 0.674 -> 0.676 ( +0.30%) [ +0.15% +0.30% +0.00% / +0.30% +0.45% +0.59%] index_select skip64 : Elapsed 0.007 ms (0.675 ms / 100) 0.677 -> 0.677 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.30% +0.30%] index_select skip256 : Elapsed 0.007 ms (0.677 ms / 100) 0.677 -> 0.678 ( +0.15%) [ +0.30% +0.00% +0.00% / +0.15% +0.15% +0.30%] index_select spread : Elapsed 0.007 ms (0.679 ms / 100) 0.666 -> 0.667 ( +0.15%) [ +0.30% +0.45% +0.00% / +0.75% +0.30% +0.15%] index_select strided 3 : Elapsed 0.007 ms (0.668 ms / 100) 0.668 -> 0.669 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.30% +0.15%] index_select strided 5 : Elapsed 0.007 ms (0.668 ms / 100) 0.666 -> 0.666 ( +0.00%) [ +0.30% +0.30% +0.00% / +0.00% +0.30% +760.21%] index_select strided 7 : Elapsed 0.007 ms (0.668 ms / 100) 0.679 -> 0.678 ( -0.15%) [ +0.15% +0.00% +0.00% / -0.15% +0.15% +45.51%] index_select strided 8 : Elapsed 0.007 ms (0.680 ms / 100) 0.675 -> 0.677 ( +0.30%) [ +0.00% +0.30% +0.15% / +0.30% +1.19% +0.89%] index_select strided 16 : Elapsed 0.007 ms (0.675 ms / 100) 0.666 -> 0.668 ( +0.30%) [ +0.00% +0.15% +0.00% / +0.30% +0.30% +0.30%] index_select random : Elapsed 0.007 ms (0.666 ms / 100) 0.674 -> 0.674 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.89% +0.89%] index_select random_sorted : Elapsed 0.007 ms (0.675 ms / 100) 0.667 -> 0.667 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.90% +0.75%] index_select perm : Elapsed 0.007 ms (0.668 ms / 100) 0.665 -> 0.667 ( +0.30%) [ +0.00% +0.15% +0.15% / +0.30% +0.90% +0.90%] index_select perm_sorted : Elapsed 0.007 ms (0.665 ms / 100) B = [5, 20, 4, 16] (stride (80, 1, 20, 400)) A = [40, 20, 4, 16] (stride (4, 160, 1, 3200)) dim = 0 1.552 -> 1.552 ( +0.00%) [ +0.45% +0.00% +0.13% / +0.00% +1.10% +1.68%] index_select const : Elapsed 0.016 ms (1.559 ms / 100) 1.549 -> 1.548 ( -0.06%) [ +0.13% +0.00% +0.00% / -0.06% +0.65% +1.36%] index_select wrap : Elapsed 0.016 ms (1.551 ms / 100) 1.559 -> 1.570 ( +0.71%) [ +0.71% +0.38% +0.00% / +0.90% +0.71% +1.28%] index_select linear : Elapsed 0.016 ms (1.570 ms / 100) 1.554 -> 1.561 ( +0.45%) [ +0.19% +0.90% +0.00% / +0.45% +1.22% +1.48%] index_select reverse : Elapsed 0.016 ms (1.557 ms / 100) 1.544 -> 1.548 ( +0.26%) [ +0.32% +0.00% +0.26% / +0.26% +0.78% +1.04%] index_select skip64 : Elapsed 0.015 ms (1.549 ms / 100) 1.556 -> 1.554 ( -0.13%) [ +0.19% +0.00% +0.39% / -0.13% +0.90% +0.84%] index_select skip256 : Elapsed 0.016 ms (1.559 ms / 100) 1.534 -> 1.535 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.85% +0.91%] index_select spread : Elapsed 0.015 ms (1.536 ms / 100) 1.541 -> 1.544 ( +0.19%) [ +0.19% +0.13% +0.00% / +0.19% +2.34% +0.84%] index_select strided 3 : Elapsed 0.015 ms (1.544 ms / 100) 1.542 -> 1.541 ( -0.06%) [ +0.19% +0.00% +0.19% / -0.06% +2.53% +1.23%] index_select strided 5 : Elapsed 0.015 ms (1.545 ms / 100) 1.544 -> 1.544 ( +0.00%) [ +0.06% +0.13% +0.00% / +0.00% +1.42% +1.30%] index_select strided 7 : Elapsed 0.015 ms (1.545 ms / 100) 1.532 -> 1.536 ( +0.26%) [ +0.33% +0.20% +0.00% / +0.26% +0.91% +0.72%] index_select strided 8 : Elapsed 0.015 ms (1.537 ms / 100) 1.543 -> 1.545 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +1.94% +0.97%] index_select strided 16 : Elapsed 0.015 ms (1.543 ms / 100) 1.543 -> 1.545 ( +0.13%) [ +0.26% +0.26% +0.00% / +0.13% +0.78% +0.84%] index_select random : Elapsed 0.015 ms (1.547 ms / 100) 1.544 -> 1.547 ( +0.19%) [ +0.26% +0.00% +0.13% / +0.19% +2.07% +1.94%] index_select random_sorted : Elapsed 0.015 ms (1.548 ms / 100) 1.537 -> 1.538 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.72% +0.98%] index_select perm : Elapsed 0.015 ms (1.538 ms / 100) 1.529 -> 1.531 ( +0.13%) [ +0.00% +0.07% +0.00% / +0.13% +0.98% +0.98%] index_select perm_sorted : Elapsed 0.015 ms (1.529 ms / 100) B = [5, 20, 4, 16] (stride (1, 20, 5, 400)) A = [40, 20, 4, 16] (stride (16, 640, 12800, 1)) dim = 0 1.383 -> 1.382 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.07% +0.07%] index_select const : Elapsed 0.014 ms (1.383 ms / 100) 1.380 -> 1.379 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.29% +0.43%] index_select wrap : Elapsed 0.014 ms (1.381 ms / 100) 1.382 -> 1.381 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.22% +0.14%] index_select linear : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.384 ( +0.36%) [ +0.07% +0.00% +0.15% / +0.36% +0.36% +0.44%] index_select reverse : Elapsed 0.014 ms (1.380 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.14% +0.00% +0.22% / +0.00% +0.36% +0.36%] index_select skip64 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.15% +0.00% +0.15% / +0.36% +0.29% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.381 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.36% +0.36%] index_select spread : Elapsed 0.014 ms (1.381 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.29% +0.15% +0.00% / +0.07% +0.44% +0.36%] index_select strided 3 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.44%] index_select strided 5 : Elapsed 0.014 ms (1.379 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.44% +0.44%] index_select strided 7 : Elapsed 0.014 ms (1.378 ms / 100) 1.379 -> 1.384 ( +0.36%) [ +0.15% +0.22% +0.00% / +0.36% +0.44% +0.36%] index_select strided 8 : Elapsed 0.014 ms (1.381 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.44% +0.44%] index_select strided 16 : Elapsed 0.014 ms (1.379 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.51% +0.44%] index_select random : Elapsed 0.014 ms (1.378 ms / 100) 1.379 -> 1.378 ( -0.07%) [ +0.00% +0.22% +0.00% / -0.07% +0.36% +1.16%] index_select random_sorted : Elapsed 0.014 ms (1.379 ms / 100) 1.376 -> 1.379 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.65% +0.65%] index_select perm : Elapsed 0.014 ms (1.377 ms / 100) 1.377 -> 1.376 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.58% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.377 ms / 100) out_shape = [40, 5, 4, 16] in_shape = [40, 20, 4, 16] idx_dim = 1 B = [40, 5, 4, 16] (stride (320, 64, 1, 4)) A = [40, 20, 4, 16] (stride (1, 40, 12800, 800)) dim = 1 1.777 -> 1.778 ( +0.06%) [ +0.23% +0.00% +0.00% / +0.06% +0.45% +0.28%] index_select const : Elapsed 0.018 ms (1.781 ms / 100) 1.782 -> 1.782 ( +0.00%) [ +0.11% +0.00% +0.17% / +0.00% +0.34% +0.28%] index_select wrap : Elapsed 0.018 ms (1.784 ms / 100) 1.777 -> 1.780 ( +0.17%) [ +0.28% +0.00% +0.17% / +0.17% +0.34% +0.45%] index_select linear : Elapsed 0.018 ms (1.782 ms / 100) 1.790 -> 1.791 ( +0.06%) [ +0.11% +0.06% +0.00% / +0.06% +0.61% +0.67%] index_select reverse : Elapsed 0.018 ms (1.792 ms / 100) 1.783 -> 1.782 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.34% +0.28%] index_select skip64 : Elapsed 0.018 ms (1.784 ms / 100) 1.776 -> 1.777 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.39% +0.45%] index_select skip256 : Elapsed 0.018 ms (1.776 ms / 100) 1.779 -> 1.781 ( +0.11%) [ +0.00% +0.28% +0.22% / +0.11% +0.51% +0.67%] index_select spread : Elapsed 0.018 ms (1.779 ms / 100) 1.776 -> 1.777 ( +0.06%) [ +0.28% +0.00% +0.00% / +0.06% +0.51% +0.45%] index_select strided 3 : Elapsed 0.018 ms (1.781 ms / 100) 1.775 -> 1.776 ( +0.06%) [ +0.17% +0.00% +0.11% / +0.06% +0.39% +0.39%] index_select strided 5 : Elapsed 0.018 ms (1.778 ms / 100) 1.776 -> 1.776 ( +0.00%) [ +0.00% +0.17% +0.06% / +0.00% +0.28% +0.28%] index_select strided 7 : Elapsed 0.018 ms (1.776 ms / 100) 1.790 -> 1.789 ( -0.06%) [ +0.06% +0.00% +0.11% / -0.06% +0.28% +0.39%] index_select strided 8 : Elapsed 0.018 ms (1.791 ms / 100) 1.776 -> 1.778 ( +0.11%) [ +0.00% +0.11% +0.06% / +0.11% +0.34% +0.39%] index_select strided 16 : Elapsed 0.018 ms (1.776 ms / 100) 1.782 -> 1.781 ( -0.06%) [ +0.00% +0.11% +0.45% / -0.06% +0.51% +0.51%] index_select random : Elapsed 0.018 ms (1.782 ms / 100) 1.778 -> 1.778 ( +0.00%) [ +0.06% +0.00% +0.28% / +0.00% +0.34% +0.45%] index_select random_sorted : Elapsed 0.018 ms (1.779 ms / 100) 1.782 -> 1.786 ( +0.22%) [ +0.17% +0.00% +0.11% / +0.22% +0.45% +0.67%] index_select perm : Elapsed 0.018 ms (1.785 ms / 100) 1.797 -> 1.798 ( +0.06%) [ +0.28% +0.06% +0.00% / +0.06% +0.39% +0.78%] index_select perm_sorted : Elapsed 0.018 ms (1.802 ms / 100) B = [40, 5, 4, 16] (stride (320, 1, 80, 5)) A = [40, 20, 4, 16] (stride (1, 2560, 640, 40)) dim = 1 0.687 -> 0.690 ( +0.44%) [ +0.29% +0.44% +0.00% / +0.44% +0.73% +0.58%] index_select const : Elapsed 0.007 ms (0.689 ms / 100) 0.685 -> 0.686 ( +0.15%) [ +0.00% +0.00% +0.29% / +0.15% +1.17% +1.31%] index_select wrap : Elapsed 0.007 ms (0.685 ms / 100) 0.686 -> 0.688 ( +0.29%) [ +0.00% +0.15% +0.15% / +0.29% +1.31% +1.31%] index_select linear : Elapsed 0.007 ms (0.686 ms / 100) 0.679 -> 0.680 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +1.33% +1.18%] index_select reverse : Elapsed 0.007 ms (0.679 ms / 100) 0.690 -> 0.691 ( +0.14%) [ +0.29% +0.14% +0.00% / +0.14% +0.72% +1.16%] index_select skip64 : Elapsed 0.007 ms (0.692 ms / 100) 0.695 -> 0.691 ( -0.58%) [ +0.43% +0.14% +0.00% / +0.14% -0.58% -0.43%] index_select skip256 : Elapsed 0.007 ms (0.698 ms / 100) 0.694 -> 0.688 ( -0.86%) [ +0.00% +0.14% +0.00% / +0.14% -0.72% -0.86%] index_select spread : Elapsed 0.007 ms (0.694 ms / 100) 0.692 -> 0.692 ( +0.00%) [ +0.29% +0.29% +0.00% / +0.43% +0.00% +0.00%] index_select strided 3 : Elapsed 0.007 ms (0.694 ms / 100) 0.682 -> 0.681 ( -0.15%) [ +0.29% +0.29% +0.00% / +0.00% -0.15% -0.15%] index_select strided 5 : Elapsed 0.007 ms (0.684 ms / 100) 0.684 -> 0.683 ( -0.15%) [ +0.29% +0.15% +0.00% / +0.00% -0.15% +0.00%] index_select strided 7 : Elapsed 0.007 ms (0.686 ms / 100) 0.689 -> 0.688 ( -0.15%) [ +0.00% +0.00% +0.00% / +0.15% -0.15% +0.15%] index_select strided 8 : Elapsed 0.007 ms (0.689 ms / 100) 0.686 -> 0.686 ( +0.00%) [ +0.44% +0.15% +0.00% / +0.15% +0.00% +0.29%] index_select strided 16 : Elapsed 0.007 ms (0.689 ms / 100) 0.687 -> 0.684 ( -0.44%) [ +0.15% +0.00% +0.00% / +0.00% -0.44% +0.29%] index_select random : Elapsed 0.007 ms (0.688 ms / 100) 0.684 -> 0.688 ( +0.58%) [ +0.15% +0.00% +0.15% / +0.58% +1.02% +1.17%] index_select random_sorted : Elapsed 0.007 ms (0.685 ms / 100) 0.685 -> 0.686 ( +0.15%) [ +0.29% +0.29% +0.00% / +0.15% +1.02% +1.02%] index_select perm : Elapsed 0.007 ms (0.687 ms / 100) 0.690 -> 0.691 ( +0.14%) [ +0.58% +0.00% +0.14% / +0.14% +1.74% +1.59%] index_select perm_sorted : Elapsed 0.007 ms (0.694 ms / 100) B = [40, 5, 4, 16] (stride (16, 2560, 640, 1)) dim = 1 fill_cnt = 20 3.508 -> 3.487 ( -0.60%) [ +0.20% +0.11% +0.00% / -0.51% -0.60% -0.60%] index_fill_ const : Elapsed 0.035 ms (3.515 ms / 100) 3.523 -> 3.503 ( -0.57%) [ +0.17% +0.06% +0.00% / -0.51% -0.40% -0.57%] index_fill_ linear : Elapsed 0.035 ms (3.529 ms / 100) 3.512 -> 3.494 ( -0.51%) [ +0.09% +0.03% +0.00% / -0.48% -0.40% -0.51%] index_fill_ reverse : Elapsed 0.035 ms (3.515 ms / 100) 3.513 -> 3.493 ( -0.57%) [ +0.17% +0.03% +0.00% / -0.57% -0.11% -0.26%] index_fill_ skip64 : Elapsed 0.035 ms (3.519 ms / 100) 3.515 -> 3.496 ( -0.54%) [ +0.00% +0.09% +0.09% / -0.54% -0.51% -0.43%] index_fill_ skip256 : Elapsed 0.035 ms (3.515 ms / 100) 3.514 -> 3.494 ( -0.57%) [ +0.06% +0.09% +0.00% / -0.48% -0.57% -0.37%] index_fill_ spread : Elapsed 0.035 ms (3.516 ms / 100) 3.521 -> 3.505 ( -0.45%) [ +0.00% +0.06% +0.20% / -0.45% -0.45% -0.37%] index_fill_ strided 3 : Elapsed 0.035 ms (3.521 ms / 100) 3.526 -> 3.508 ( -0.51%) [ +0.00% +0.14% +0.23% / -0.51% -0.23% -0.34%] index_fill_ random : Elapsed 0.035 ms (3.526 ms / 100) 3.522 -> 3.504 ( -0.51%) [ +0.09% +0.00% +0.06% / -0.40% -0.51% -0.06%] index_fill_ random_sorted : Elapsed 0.035 ms (3.525 ms / 100) B = [40, 5, 4, 16] (stride (1, 2560, 640, 40)) A = [40, 20, 4, 16] (stride (20, 1, 12800, 800)) dim = 1 0.716 -> 0.717 ( +0.14%) [ +0.00% +0.28% +0.14% / +0.14% +0.84% +1.12%] index_select const : Elapsed 0.007 ms (0.716 ms / 100) 0.724 -> 0.720 ( -0.55%) [ +0.00% +0.14% +0.14% / +0.00% -0.55% -0.55%] index_select wrap : Elapsed 0.007 ms (0.724 ms / 100) 0.722 -> 0.718 ( -0.55%) [ +0.55% +0.28% +0.00% / +0.28% -0.55% -0.42%] index_select linear : Elapsed 0.007 ms (0.726 ms / 100) 0.717 -> 0.718 ( +0.14%) [ +0.00% +0.42% +0.00% / +0.14% +0.42% +0.42%] index_select reverse : Elapsed 0.007 ms (0.717 ms / 100) 0.717 -> 0.718 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.42% +0.28%] index_select skip64 : Elapsed 0.007 ms (0.718 ms / 100) 0.717 -> 0.718 ( +0.14%) [ +0.00% +0.28% +0.28% / +0.14% +0.98% +1.12%] index_select skip256 : Elapsed 0.007 ms (0.717 ms / 100) 0.715 -> 0.716 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.98% +0.98%] index_select spread : Elapsed 0.007 ms (0.716 ms / 100) 0.717 -> 0.719 ( +0.28%) [ +0.00% +0.28% +0.14% / +0.28% +0.70% +0.84%] index_select strided 3 : Elapsed 0.007 ms (0.717 ms / 100) 0.717 -> 0.718 ( +0.14%) [ +0.00% +0.28% +0.00% / +0.14% +0.70% +0.70%] index_select strided 5 : Elapsed 0.007 ms (0.717 ms / 100) 0.716 -> 0.716 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.42% +0.28%] index_select strided 7 : Elapsed 0.007 ms (0.716 ms / 100) 0.713 -> 0.715 ( +0.28%) [ +0.42% +0.42% +0.00% / +0.28% +2.10% +0.56%] index_select strided 8 : Elapsed 0.007 ms (0.716 ms / 100) 0.721 -> 0.715 ( -0.83%) [ +0.00% +0.14% +0.00% / +0.00% -0.55% -0.83%] index_select strided 16 : Elapsed 0.007 ms (0.721 ms / 100) 0.722 -> 0.716 ( -0.83%) [ +0.14% +0.00% +0.00% / +0.55% -0.69% -0.83%] index_select random : Elapsed 0.007 ms (0.723 ms / 100) 0.716 -> 0.716 ( +0.00%) [ +0.00% +0.28% +0.00% / +0.00% +0.70% +0.70%] index_select random_sorted : Elapsed 0.007 ms (0.716 ms / 100) 0.715 -> 0.715 ( +0.00%) [ +0.00% +0.14% +0.28% / +0.00% +0.70% +0.70%] index_select perm : Elapsed 0.007 ms (0.715 ms / 100) 0.717 -> 0.715 ( -0.28%) [ +0.00% +0.00% +0.00% / -0.28% +0.84% +0.98%] index_select perm_sorted : Elapsed 0.007 ms (0.717 ms / 100) B = [40, 5, 4, 16] (stride (80, 1, 3200, 5)) A = [40, 20, 4, 16] (stride (1, 2560, 640, 40)) dim = 1 1.783 -> 1.785 ( +0.11%) [ +0.17% +0.28% +0.00% / +0.11% +0.34% +0.28%] index_select const : Elapsed 0.018 ms (1.786 ms / 100) 1.777 -> 1.778 ( +0.06%) [ +0.11% +0.06% +0.00% / +0.06% +0.23% +0.23%] index_select wrap : Elapsed 0.018 ms (1.779 ms / 100) 1.780 -> 1.780 ( +0.00%) [ +0.11% +0.28% +0.00% / +0.00% +0.22% +0.34%] index_select linear : Elapsed 0.018 ms (1.782 ms / 100) 1.769 -> 1.773 ( +0.23%) [ +0.51% +0.51% +0.00% / +0.23% +0.40% +0.68%] index_select reverse : Elapsed 0.018 ms (1.778 ms / 100) 1.786 -> 1.787 ( +0.06%) [ +0.00% +0.06% +0.11% / +0.06% +0.28% +0.17%] index_select skip64 : Elapsed 0.018 ms (1.786 ms / 100) 1.780 -> 1.782 ( +0.11%) [ +0.17% +0.06% +0.00% / +0.11% +0.45% +0.28%] index_select skip256 : Elapsed 0.018 ms (1.783 ms / 100) 1.781 -> 1.780 ( -0.06%) [ +0.17% +0.00% +0.00% / -0.06% +0.17% +0.17%] index_select spread : Elapsed 0.018 ms (1.784 ms / 100) 1.778 -> 1.779 ( +0.06%) [ +0.17% +0.06% +0.00% / +0.06% +0.34% +0.34%] index_select strided 3 : Elapsed 0.018 ms (1.781 ms / 100) 1.757 -> 1.761 ( +0.23%) [ +0.17% +0.00% +0.00% / +0.23% +0.57% +0.63%] index_select strided 5 : Elapsed 0.018 ms (1.760 ms / 100) 1.767 -> 1.770 ( +0.17%) [ +0.11% +0.11% +0.00% / +0.17% +0.79% +0.79%] index_select strided 7 : Elapsed 0.018 ms (1.769 ms / 100) 1.775 -> 1.779 ( +0.23%) [ +0.17% +0.06% +0.00% / +0.23% +0.56% +0.85%] index_select strided 8 : Elapsed 0.018 ms (1.778 ms / 100) 1.766 -> 1.764 ( -0.11%) [ +0.06% +0.00% +0.00% / -0.11% +0.68% +0.57%] index_select strided 16 : Elapsed 0.018 ms (1.767 ms / 100) 1.786 -> 1.790 ( +0.22%) [ +0.00% +0.34% +0.06% / +0.34% +0.45% +0.22%] index_select random : Elapsed 0.018 ms (1.786 ms / 100) 1.776 -> 1.782 ( +0.34%) [ +0.23% +0.23% +0.00% / +0.34% +0.96% +0.68%] index_select random_sorted : Elapsed 0.018 ms (1.780 ms / 100) 1.769 -> 1.768 ( -0.06%) [ +0.23% +0.06% +0.00% / -0.06% +0.51% +0.34%] index_select perm : Elapsed 0.018 ms (1.773 ms / 100) 1.764 -> 1.765 ( +0.06%) [ +0.06% +0.23% +0.00% / +0.06% +0.57% +0.62%] index_select perm_sorted : Elapsed 0.018 ms (1.765 ms / 100) B = [40, 5, 4, 16] (stride (80, 1, 3200, 5)) A = [40, 20, 4, 16] (stride (20, 1, 12800, 800)) dim = 1 1.817 -> 1.819 ( +0.11%) [ +0.00% +0.06% +0.00% / +0.11% +0.28% +0.22%] index_select const : Elapsed 0.018 ms (1.817 ms / 100) 1.826 -> 1.828 ( +0.11%) [ +0.05% +0.22% +0.00% / +0.11% +0.27% +0.33%] index_select wrap : Elapsed 0.018 ms (1.827 ms / 100) 1.822 -> 1.821 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.16% +0.38%] index_select linear : Elapsed 0.018 ms (1.823 ms / 100) 1.821 -> 1.825 ( +0.22%) [ +0.33% +0.00% +0.11% / +0.22% +0.60% +0.71%] index_select reverse : Elapsed 0.018 ms (1.827 ms / 100) 1.815 -> 1.820 ( +0.28%) [ +0.06% +0.06% +0.00% / +0.28% +0.33% +0.50%] index_select skip64 : Elapsed 0.018 ms (1.816 ms / 100) 1.812 -> 1.811 ( -0.06%) [ +0.11% +0.17% +0.00% / -0.06% +0.55% +0.44%] index_select skip256 : Elapsed 0.018 ms (1.814 ms / 100) 1.835 -> 1.838 ( +0.16%) [ +0.00% +0.11% +0.05% / +0.16% +0.65% +0.44%] index_select spread : Elapsed 0.018 ms (1.835 ms / 100) 1.835 -> 1.839 ( +0.22%) [ +0.05% +0.00% +0.05% / +0.22% +0.60% +0.49%] index_select strided 3 : Elapsed 0.018 ms (1.836 ms / 100) 1.839 -> 1.839 ( +0.00%) [ +0.00% +0.05% +0.11% / +0.00% +0.49% +0.49%] index_select strided 5 : Elapsed 0.018 ms (1.839 ms / 100) 1.832 -> 1.833 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.49% +0.44%] index_select strided 7 : Elapsed 0.018 ms (1.832 ms / 100) 1.833 -> 1.839 ( +0.33%) [ +0.22% +0.11% +0.00% / +0.33% +0.49% +0.44%] index_select strided 8 : Elapsed 0.018 ms (1.837 ms / 100) 1.831 -> 1.833 ( +0.11%) [ +0.22% +0.33% +0.00% / +0.11% +0.38% +0.49%] index_select strided 16 : Elapsed 0.018 ms (1.835 ms / 100) 1.832 -> 1.830 ( -0.11%) [ +0.27% +0.16% +0.00% / -0.11% +0.44% +0.49%] index_select random : Elapsed 0.018 ms (1.837 ms / 100) 1.838 -> 1.841 ( +0.16%) [ +0.00% +0.27% +0.11% / +0.16% +0.22% +0.33%] index_select random_sorted : Elapsed 0.018 ms (1.838 ms / 100) 1.830 -> 1.832 ( +0.11%) [ +0.00% +0.22% +0.05% / +0.11% +0.49% +0.66%] index_select perm : Elapsed 0.018 ms (1.830 ms / 100) 1.832 -> 1.836 ( +0.22%) [ +0.33% +0.16% +0.00% / +0.22% +0.27% +0.33%] index_select perm_sorted : Elapsed 0.018 ms (1.838 ms / 100) B = [40, 5, 4, 16] (stride (5, 1, 3200, 200)) A = [40, 20, 4, 16] (stride (80, 1, 20, 3200)) dim = 1 1.809 -> 1.806 ( -0.17%) [ +0.00% +0.06% +0.06% / +0.00% -0.17% -0.06%] index_select const : Elapsed 0.018 ms (1.809 ms / 100) 1.813 -> 1.816 ( +0.17%) [ +0.11% +0.06% +0.00% / +0.17% +0.88% +0.77%] index_select wrap : Elapsed 0.018 ms (1.815 ms / 100) 1.813 -> 1.814 ( +0.06%) [ +0.11% +0.33% +0.00% / +0.06% +0.88% +0.99%] index_select linear : Elapsed 0.018 ms (1.815 ms / 100) 1.813 -> 1.817 ( +0.22%) [ +0.22% +0.00% +0.00% / +0.22% +0.72% +0.66%] index_select reverse : Elapsed 0.018 ms (1.817 ms / 100) 1.801 -> 1.801 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.78% +0.44%] index_select skip64 : Elapsed 0.018 ms (1.801 ms / 100) 1.799 -> 1.805 ( +0.33%) [ +0.17% +0.28% +0.00% / +0.33% +0.78% +0.83%] index_select skip256 : Elapsed 0.018 ms (1.802 ms / 100) 1.830 -> 1.832 ( +0.11%) [ +0.49% +0.33% +0.00% / +0.11% +0.60% +0.55%] index_select spread : Elapsed 0.018 ms (1.839 ms / 100) 1.827 -> 1.830 ( +0.16%) [ +0.11% +0.05% +0.00% / +0.16% +0.55% +0.66%] index_select strided 3 : Elapsed 0.018 ms (1.829 ms / 100) 1.832 -> 1.837 ( +0.27%) [ +0.22% +0.11% +0.00% / +0.27% +0.49% +0.44%] index_select strided 5 : Elapsed 0.018 ms (1.836 ms / 100) 1.823 -> 1.827 ( +0.22%) [ +0.22% +0.00% +0.16% / +0.22% +0.49% +0.44%] index_select strided 7 : Elapsed 0.018 ms (1.827 ms / 100) 1.818 -> 1.820 ( +0.11%) [ +0.00% +0.17% +0.17% / +0.11% +0.22% +0.22%] index_select strided 8 : Elapsed 0.018 ms (1.818 ms / 100) 1.828 -> 1.824 ( -0.22%) [ +0.00% +0.05% +0.22% / -0.22% +0.22% +0.16%] index_select strided 16 : Elapsed 0.018 ms (1.828 ms / 100) 1.815 -> 1.815 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.33% +0.17%] index_select random : Elapsed 0.018 ms (1.818 ms / 100) 1.823 -> 1.826 ( +0.16%) [ +0.33% +0.00% +0.22% / +0.16% +0.60% +0.33%] index_select random_sorted : Elapsed 0.018 ms (1.829 ms / 100) 1.815 -> 1.822 ( +0.39%) [ +0.39% +0.50% +0.00% / +0.39% +0.50% +0.61%] index_select perm : Elapsed 0.018 ms (1.822 ms / 100) 1.833 -> 1.836 ( +0.16%) [ +0.05% +0.22% +0.00% / +0.16% +0.27% +0.38%] index_select perm_sorted : Elapsed 0.018 ms (1.834 ms / 100) B = [40, 5, 4, 16] (stride (1, 40, 3200, 200)) A = [40, 20, 4, 16] (stride (1, 2560, 640, 40)) dim = 1 0.709 -> 0.709 ( +0.00%) [ +0.28% +0.00% +0.00% / +0.00% +0.85% +0.71%] index_select const : Elapsed 0.007 ms (0.711 ms / 100) 0.711 -> 0.705 ( -0.84%) [ +0.00% +0.14% +0.42% / +0.28% -0.84% -0.70%] index_select wrap : Elapsed 0.007 ms (0.711 ms / 100) 0.709 -> 0.704 ( -0.71%) [ +0.14% +0.14% +0.00% / +0.14% -0.71% -0.56%] index_select linear : Elapsed 0.007 ms (0.710 ms / 100) 0.702 -> 0.703 ( +0.14%) [ +0.28% +0.14% +0.00% / +0.14% +0.43% +0.57%] index_select reverse : Elapsed 0.007 ms (0.704 ms / 100) 0.707 -> 0.708 ( +0.14%) [ +0.28% +0.42% +0.00% / +0.14% +0.28% +0.28%] index_select skip64 : Elapsed 0.007 ms (0.709 ms / 100) 0.705 -> 0.705 ( +0.00%) [ +0.00% +0.43% +0.00% / +0.00% +0.99% +0.99%] index_select skip256 : Elapsed 0.007 ms (0.705 ms / 100) 0.702 -> 0.704 ( +0.28%) [ +0.28% +0.43% +0.00% / +0.28% +1.28% +1.28%] index_select spread : Elapsed 0.007 ms (0.704 ms / 100) 0.704 -> 0.705 ( +0.14%) [ +0.00% +0.28% +0.14% / +0.14% +0.85% +0.71%] index_select strided 3 : Elapsed 0.007 ms (0.704 ms / 100) 0.704 -> 0.704 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.43% +0.43%] index_select strided 5 : Elapsed 0.007 ms (0.704 ms / 100) 0.702 -> 0.703 ( +0.14%) [ +0.28% +0.28% +0.00% / +0.14% +0.57% +0.43%] index_select strided 7 : Elapsed 0.007 ms (0.704 ms / 100) 0.703 -> 0.704 ( +0.14%) [ +0.28% +0.28% +0.00% / +0.14% +0.71% +0.57%] index_select strided 8 : Elapsed 0.007 ms (0.705 ms / 100) 0.710 -> 0.706 ( -0.56%) [ +0.00% +0.42% +0.14% / +0.00% -0.56% -0.42%] index_select strided 16 : Elapsed 0.007 ms (0.710 ms / 100) 0.710 -> 0.703 ( -0.99%) [ +0.00% +0.28% +0.00% / +0.14% -0.70% -0.99%] index_select random : Elapsed 0.007 ms (0.710 ms / 100) 0.706 -> 0.706 ( +0.00%) [ +0.14% +0.28% +0.00% / +0.00% +0.71% +0.85%] index_select random_sorted : Elapsed 0.007 ms (0.707 ms / 100) 0.711 -> 0.712 ( +0.14%) [ +0.00% +0.28% +0.00% / +0.14% +0.42% +0.28%] index_select perm : Elapsed 0.007 ms (0.711 ms / 100) 0.702 -> 0.702 ( +0.00%) [ +0.43% +0.43% +0.00% / +0.00% +1.28% +1.28%] index_select perm_sorted : Elapsed 0.007 ms (0.705 ms / 100) B = [40, 5, 4, 16] (stride (4, 160, 1, 800)) A = [40, 20, 4, 16] (stride (1280, 4, 1, 80)) dim = 1 1.850 -> 1.842 ( -0.43%) [ +0.32% +0.00% +0.22% / +0.05% -0.43% -0.38%] index_select const : Elapsed 0.019 ms (1.856 ms / 100) 1.854 -> 1.854 ( +0.00%) [ +0.05% +0.11% +0.00% / +0.00% +0.54% +0.76%] index_select wrap : Elapsed 0.019 ms (1.855 ms / 100) 1.848 -> 1.848 ( +0.00%) [ +0.22% +0.00% +0.11% / +0.00% +0.70% +0.27%] index_select linear : Elapsed 0.019 ms (1.852 ms / 100) 1.850 -> 1.850 ( +0.00%) [ +0.11% +0.00% +0.22% / +0.00% +0.38% +0.27%] index_select reverse : Elapsed 0.019 ms (1.852 ms / 100) 1.835 -> 1.837 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.44% +0.65%] index_select skip64 : Elapsed 0.018 ms (1.837 ms / 100) 1.837 -> 1.839 ( +0.11%) [ +0.33% +0.33% +0.00% / +0.11% +1.14% +0.98%] index_select skip256 : Elapsed 0.018 ms (1.843 ms / 100) 1.861 -> 1.860 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.70% +0.38%] index_select spread : Elapsed 0.019 ms (1.862 ms / 100) 1.865 -> 1.863 ( -0.11%) [ +0.05% +0.16% +0.00% / -0.11% +1.13% +0.91%] index_select strided 3 : Elapsed 0.019 ms (1.866 ms / 100) 1.857 -> 1.859 ( +0.11%) [ +0.27% +0.11% +0.00% / +0.11% +0.65% +0.48%] index_select strided 5 : Elapsed 0.019 ms (1.862 ms / 100) 1.856 -> 1.856 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +0.38% +0.32%] index_select strided 7 : Elapsed 0.019 ms (1.856 ms / 100) 1.854 -> 1.858 ( +0.22%) [ +0.11% +0.00% +0.05% / +0.22% +0.70% +0.49%] index_select strided 8 : Elapsed 0.019 ms (1.856 ms / 100) 1.859 -> 1.861 ( +0.11%) [ +0.00% +0.27% +0.05% / +0.11% +0.32% +0.27%] index_select strided 16 : Elapsed 0.019 ms (1.859 ms / 100) 1.867 -> 1.869 ( +0.11%) [ +0.16% +0.00% +0.11% / +0.11% +0.37% +0.59%] index_select random : Elapsed 0.019 ms (1.870 ms / 100) 1.850 -> 1.850 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.43% +0.38%] index_select random_sorted : Elapsed 0.019 ms (1.851 ms / 100) 1.861 -> 1.861 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.21% +0.38%] index_select perm : Elapsed 0.019 ms (1.864 ms / 100) 1.863 -> 1.865 ( +0.11%) [ +0.00% +0.00% +0.05% / +0.11% +0.43% +0.43%] index_select perm_sorted : Elapsed 0.019 ms (1.863 ms / 100) B = [40, 5, 4, 16] (stride (1, 160, 40, 800)) A = [40, 20, 4, 16] (stride (1280, 64, 16, 1)) dim = 1 1.670 -> 1.673 ( +0.18%) [ +0.00% +0.42% +0.24% / +0.18% +0.60% +0.72%] index_select const : Elapsed 0.017 ms (1.670 ms / 100) 1.704 -> 1.705 ( +0.06%) [ +0.12% +0.00% +0.00% / +0.06% +0.88% +0.88%] index_select wrap : Elapsed 0.017 ms (1.706 ms / 100) 1.704 -> 1.702 ( -0.12%) [ +0.18% +0.00% +0.06% / -0.12% +0.76% +0.70%] index_select linear : Elapsed 0.017 ms (1.707 ms / 100) 1.703 -> 1.704 ( +0.06%) [ +0.12% +0.18% +0.00% / +0.06% +0.76% +0.82%] index_select reverse : Elapsed 0.017 ms (1.705 ms / 100) 1.670 -> 1.678 ( +0.48%) [ +0.42% +0.24% +0.00% / +0.48% +0.66% +0.54%] index_select skip64 : Elapsed 0.017 ms (1.677 ms / 100) 1.673 -> 1.676 ( +0.18%) [ +0.12% +0.12% +0.00% / +0.18% +0.30% +1.08%] index_select skip256 : Elapsed 0.017 ms (1.675 ms / 100) 1.691 -> 1.691 ( +0.00%) [ +0.18% +0.06% +0.00% / +0.00% +1.36% +1.18%] index_select spread : Elapsed 0.017 ms (1.694 ms / 100) 1.696 -> 1.698 ( +0.12%) [ +0.06% +0.18% +0.00% / +0.12% +1.12% +1.12%] index_select strided 3 : Elapsed 0.017 ms (1.697 ms / 100) 1.683 -> 1.681 ( -0.12%) [ +0.00% +0.00% +0.06% / -0.12% +0.89% +1.19%] index_select strided 5 : Elapsed 0.017 ms (1.683 ms / 100) 1.695 -> 1.695 ( +0.00%) [ +0.12% +0.00% +0.06% / +0.00% +0.94% +1.00%] index_select strided 7 : Elapsed 0.017 ms (1.697 ms / 100) 1.692 -> 1.693 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +1.36% +1.30%] index_select strided 8 : Elapsed 0.017 ms (1.693 ms / 100) 1.693 -> 1.694 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +1.18% +1.30%] index_select strided 16 : Elapsed 0.017 ms (1.693 ms / 100) 1.685 -> 1.688 ( +0.18%) [ +0.06% +0.12% +0.00% / +0.18% +1.07% +1.19%] index_select random : Elapsed 0.017 ms (1.686 ms / 100) 1.685 -> 1.683 ( -0.12%) [ +0.18% +0.06% +0.00% / -0.12% +1.07% +1.19%] index_select random_sorted : Elapsed 0.017 ms (1.688 ms / 100) 1.700 -> 1.702 ( +0.12%) [ +0.06% +0.12% +0.00% / +0.12% +1.06% +1.00%] index_select perm : Elapsed 0.017 ms (1.701 ms / 100) 1.703 -> 1.704 ( +0.06%) [ +0.06% +0.00% +0.12% / +0.06% +0.76% +0.70%] index_select perm_sorted : Elapsed 0.017 ms (1.704 ms / 100) B = [40, 5, 4, 16] (stride (5, 1, 200, 800)) A = [40, 20, 4, 16] (stride (64, 2560, 1, 4)) dim = 1 1.784 -> 1.784 ( +0.00%) [ +0.22% +0.00% +0.06% / +0.00% +0.06% +0.06%] index_select const : Elapsed 0.018 ms (1.788 ms / 100) 1.783 -> 1.787 ( +0.22%) [ +0.06% +0.00% +0.06% / +0.39% +0.22% +0.50%] index_select wrap : Elapsed 0.018 ms (1.784 ms / 100) 1.782 -> 1.783 ( +0.06%) [ +0.00% +0.22% +0.22% / +0.06% +0.45% +0.28%] index_select linear : Elapsed 0.018 ms (1.782 ms / 100) 1.785 -> 1.783 ( -0.11%) [ +0.11% +0.00% +0.06% / -0.11% +0.34% +0.17%] index_select reverse : Elapsed 0.018 ms (1.787 ms / 100) 1.782 -> 1.783 ( +0.06%) [ +0.39% +0.22% +0.00% / +0.06% +0.51% +0.39%] index_select skip64 : Elapsed 0.018 ms (1.789 ms / 100) 1.784 -> 1.785 ( +0.06%) [ +0.00% +0.00% +0.28% / +0.17% +0.06% +0.45%] index_select skip256 : Elapsed 0.018 ms (1.784 ms / 100) 1.786 -> 1.788 ( +0.11%) [ +0.28% +0.22% +0.00% / +0.28% +0.11% +0.39%] index_select spread : Elapsed 0.018 ms (1.791 ms / 100) 1.792 -> 1.786 ( -0.33%) [ +0.39% +0.45% +0.00% / +0.22% -0.17% -0.33%] index_select strided 3 : Elapsed 0.018 ms (1.799 ms / 100) 1.786 -> 1.786 ( +0.00%) [ +0.39% +0.06% +0.00% / +0.06% +0.34% +0.00%] index_select strided 5 : Elapsed 0.018 ms (1.793 ms / 100) 1.786 -> 1.790 ( +0.22%) [ +0.45% +0.45% +0.00% / +0.28% +0.50% +0.22%] index_select strided 7 : Elapsed 0.018 ms (1.794 ms / 100) 1.788 -> 1.789 ( +0.06%) [ +0.00% +0.17% +0.00% / +0.06% +0.17% +0.45%] index_select strided 8 : Elapsed 0.018 ms (1.788 ms / 100) 1.786 -> 1.789 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.34% +0.34%] index_select strided 16 : Elapsed 0.018 ms (1.786 ms / 100) 1.810 -> 1.811 ( +0.06%) [ +0.11% +0.06% +0.00% / +0.06% +0.50% +0.17%] index_select random : Elapsed 0.018 ms (1.812 ms / 100) 1.800 -> 1.805 ( +0.28%) [ +0.39% +0.28% +0.00% / +0.28% +0.39% +0.61%] index_select random_sorted : Elapsed 0.018 ms (1.807 ms / 100) 1.792 -> 1.795 ( +0.17%) [ +0.28% +0.06% +0.00% / +0.17% +0.61% +0.39%] index_select perm : Elapsed 0.018 ms (1.797 ms / 100) 1.786 -> 1.787 ( +0.06%) [ +0.17% +0.06% +0.00% / +0.11% +0.06% +0.56%] index_select perm_sorted : Elapsed 0.018 ms (1.789 ms / 100) out_shape = [40, 20, 5, 16] in_shape = [40, 20, 4, 16] idx_dim = 2 B = [40, 20, 5, 16] (stride (1600, 1, 320, 20)) A = [40, 20, 4, 16] (stride (20, 1, 800, 3200)) dim = 2 5.613 -> 5.610 ( -0.05%) [ +0.00% +0.02% +0.07% / -0.02% +0.02% -0.05%] index_add_ linear : Elapsed 0.056 ms (5.613 ms / 100) 5.565 -> 5.555 ( -0.18%) [ +0.07% +0.00% +0.05% / -0.04% -0.18% -0.18%] index_copy_ linear : Elapsed 0.056 ms (5.569 ms / 100) 5.619 -> 5.604 ( -0.27%) [ +0.00% +0.02% +0.07% / +0.05% -0.16% -0.27%] index_add_ reverse : Elapsed 0.056 ms (5.619 ms / 100) 5.559 -> 5.559 ( +0.00%) [ +0.25% +0.16% +0.00% / +0.13% +0.00% +0.02%] index_copy_ reverse : Elapsed 0.056 ms (5.573 ms / 100) 5.614 -> 5.604 ( -0.18%) [ +0.00% +0.11% +0.00% / +0.02% +0.00% -0.18%] index_add_ spread : Elapsed 0.056 ms (5.614 ms / 100) 5.561 -> 5.560 ( -0.02%) [ +0.04% +0.00% +0.07% / +0.07% -0.02% +0.02%] index_copy_ spread : Elapsed 0.056 ms (5.563 ms / 100) 5.614 -> 5.596 ( -0.32%) [ +0.00% +0.00% +0.12% / +0.12% -0.32% -0.18%] index_add_ strided 3 : Elapsed 0.056 ms (5.614 ms / 100) 5.565 -> 5.555 ( -0.18%) [ +0.07% +0.00% +0.22% / +0.16% -0.16% -0.18%] index_copy_ strided 3 : Elapsed 0.056 ms (5.569 ms / 100) 5.630 -> 5.594 ( -0.64%) [ +0.00% +0.02% +0.05% / +0.07% -0.64% -0.62%] index_add_ perm : Elapsed 0.056 ms (5.630 ms / 100) 5.578 -> 5.541 ( -0.66%) [ +0.18% +0.00% +0.16% / +0.11% -0.66% -0.63%] index_copy_ perm : Elapsed 0.056 ms (5.588 ms / 100) 5.614 -> 5.607 ( -0.12%) [ +0.00% +0.05% +0.04% / +0.05% -0.12% +0.09%] index_add_ perm_sorted : Elapsed 0.056 ms (5.614 ms / 100) 5.560 -> 5.558 ( -0.04%) [ +0.13% +0.00% +0.07% / -0.04% -0.04% +0.05%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.567 ms / 100) 5.811 -> 5.806 ( -0.09%) [ +0.17% +0.03% +0.00% / -0.09% +0.02% +0.00%] index_select const : Elapsed 0.058 ms (5.821 ms / 100) 5.888 -> 5.892 ( +0.07%) [ +0.27% +0.00% +0.10% / +0.20% +0.07% +0.19%] index_select wrap : Elapsed 0.059 ms (5.904 ms / 100) 5.880 -> 5.885 ( +0.09%) [ +0.00% +0.09% +0.20% / +0.22% +0.14% +0.09%] index_select linear : Elapsed 0.059 ms (5.880 ms / 100) 5.876 -> 5.884 ( +0.14%) [ +0.00% +0.10% +0.10% / +0.14% +0.19% +0.22%] index_select reverse : Elapsed 0.059 ms (5.876 ms / 100) 5.809 -> 5.810 ( +0.02%) [ +0.10% +0.00% +0.02% / +0.10% +0.03% +0.02%] index_select skip64 : Elapsed 0.058 ms (5.815 ms / 100) 5.812 -> 5.811 ( -0.02%) [ +0.02% +0.00% +0.02% / +0.14% -0.02% +0.07%] index_select skip256 : Elapsed 0.058 ms (5.813 ms / 100) 5.881 -> 5.865 ( -0.27%) [ +0.03% +0.00% +0.00% / -0.05% -0.12% -0.27%] index_select spread : Elapsed 0.059 ms (5.883 ms / 100) 5.901 -> 5.894 ( -0.12%) [ +0.00% +0.10% +0.00% / +0.07% -0.12% -0.07%] index_select strided 3 : Elapsed 0.059 ms (5.901 ms / 100) 5.868 -> 5.846 ( -0.37%) [ +0.03% +0.00% +0.12% / +0.17% -0.22% -0.37%] index_select random : Elapsed 0.059 ms (5.870 ms / 100) 5.863 -> 5.858 ( -0.09%) [ +0.14% +0.00% +0.20% / +0.17% -0.05% -0.09%] index_select random_sorted : Elapsed 0.059 ms (5.871 ms / 100) B = [40, 20, 5, 16] (stride (1600, 1, 20, 100)) A = [40, 20, 4, 16] (stride (1280, 16, 320, 1)) dim = 2 5.847 -> 5.846 ( -0.02%) [ +0.09% +0.00% +0.09% / -0.02% +0.46% +0.43%] index_add_ linear : Elapsed 0.059 ms (5.852 ms / 100) 5.780 -> 5.788 ( +0.14%) [ +0.07% +0.02% +0.00% / +0.14% +0.26% +0.21%] index_copy_ linear : Elapsed 0.058 ms (5.784 ms / 100) 5.849 -> 5.857 ( +0.14%) [ +0.03% +0.07% +0.00% / +0.14% +0.55% +0.36%] index_add_ reverse : Elapsed 0.059 ms (5.851 ms / 100) 5.785 -> 5.782 ( -0.05%) [ +0.03% +0.00% +0.03% / -0.05% +0.22% +0.36%] index_copy_ reverse : Elapsed 0.058 ms (5.787 ms / 100) 5.850 -> 5.858 ( +0.14%) [ +0.00% +0.05% +0.21% / +0.14% +0.50% +0.51%] index_add_ spread : Elapsed 0.058 ms (5.850 ms / 100) 5.776 -> 5.787 ( +0.19%) [ +0.00% +0.12% +0.09% / +0.19% +0.38% +0.29%] index_copy_ spread : Elapsed 0.058 ms (5.776 ms / 100) 5.862 -> 5.868 ( +0.10%) [ +0.02% +0.00% +0.12% / +0.10% +0.39% +0.55%] index_add_ strided 3 : Elapsed 0.059 ms (5.863 ms / 100) 5.790 -> 5.793 ( +0.05%) [ +0.17% +0.00% +0.10% / +0.05% +0.48% +0.40%] index_copy_ strided 3 : Elapsed 0.058 ms (5.800 ms / 100) 5.877 -> 5.878 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.02% +0.29% +0.34%] index_add_ perm : Elapsed 0.059 ms (5.881 ms / 100) 5.797 -> 5.806 ( +0.16%) [ +0.09% +0.17% +0.00% / +0.16% +0.31% +0.36%] index_copy_ perm : Elapsed 0.058 ms (5.802 ms / 100) 5.849 -> 5.851 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.41% +0.60%] index_add_ perm_sorted : Elapsed 0.059 ms (5.852 ms / 100) 5.770 -> 5.779 ( +0.16%) [ +0.00% +0.29% +0.24% / +0.16% +0.55% +0.52%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.770 ms / 100) 6.046 -> 6.054 ( +0.13%) [ +0.03% +0.00% +0.03% / +0.13% +0.45% +0.53%] index_select const : Elapsed 0.060 ms (6.048 ms / 100) 6.125 -> 6.136 ( +0.18%) [ +0.00% +0.18% +0.18% / +0.18% +0.47% +0.33%] index_select wrap : Elapsed 0.061 ms (6.125 ms / 100) 6.119 -> 6.133 ( +0.23%) [ +0.00% +0.05% +0.02% / +0.23% +0.44% +0.56%] index_select linear : Elapsed 0.061 ms (6.119 ms / 100) 6.139 -> 6.140 ( +0.02%) [ +0.00% +0.10% +0.11% / +0.02% +0.21% +0.20%] index_select reverse : Elapsed 0.061 ms (6.139 ms / 100) 6.083 -> 6.091 ( +0.13%) [ +0.23% +0.00% +0.02% / +0.13% +0.41% +0.35%] index_select skip64 : Elapsed 0.061 ms (6.097 ms / 100) 6.051 -> 6.053 ( +0.03%) [ +0.15% +0.00% +0.12% / +0.03% +0.45% +0.40%] index_select skip256 : Elapsed 0.061 ms (6.060 ms / 100) 6.150 -> 6.158 ( +0.13%) [ +0.13% +0.00% +0.18% / +0.13% +0.34% +0.46%] index_select spread : Elapsed 0.062 ms (6.158 ms / 100) 6.138 -> 6.142 ( +0.07%) [ +0.05% +0.00% +0.05% / +0.07% +0.37% +0.31%] index_select strided 3 : Elapsed 0.061 ms (6.141 ms / 100) 6.168 -> 6.177 ( +0.15%) [ +0.00% +0.10% +0.16% / +0.15% +0.24% +0.15%] index_select random : Elapsed 0.062 ms (6.168 ms / 100) 6.124 -> 6.140 ( +0.26%) [ +0.10% +0.26% +0.00% / +0.26% +0.56% +0.62%] index_select random_sorted : Elapsed 0.061 ms (6.130 ms / 100) B = [40, 20, 5, 16] (stride (1, 3200, 640, 40)) A = [40, 20, 4, 16] (stride (1, 160, 40, 3200)) dim = 2 5.839 -> 5.836 ( -0.05%) [ +0.07% +0.00% +0.10% / +0.24% +0.00% -0.05%] index_add_ linear : Elapsed 0.058 ms (5.843 ms / 100) 5.808 -> 5.803 ( -0.09%) [ +0.09% +0.00% +0.05% / +0.12% -0.09% -0.07%] index_copy_ linear : Elapsed 0.058 ms (5.813 ms / 100) 5.864 -> 5.829 ( -0.60%) [ +0.07% +0.03% +0.00% / +0.00% -0.51% -0.60%] index_add_ reverse : Elapsed 0.059 ms (5.868 ms / 100) 5.822 -> 5.803 ( -0.33%) [ +0.02% +0.00% +0.10% / +0.12% -0.33% -0.33%] index_copy_ reverse : Elapsed 0.058 ms (5.823 ms / 100) 5.836 -> 5.838 ( +0.03%) [ +0.15% +0.00% +0.27% / +0.12% +0.09% +0.03%] index_add_ spread : Elapsed 0.058 ms (5.845 ms / 100) 5.811 -> 5.806 ( -0.09%) [ +0.05% +0.07% +0.00% / +0.05% -0.09% -0.03%] index_copy_ spread : Elapsed 0.058 ms (5.814 ms / 100) 5.847 -> 5.842 ( -0.09%) [ +0.00% +0.12% +0.15% / +0.22% -0.09% -0.02%] index_add_ strided 3 : Elapsed 0.058 ms (5.847 ms / 100) 5.816 -> 5.811 ( -0.09%) [ +0.00% +0.07% +0.05% / +0.26% -0.09% +0.03%] index_copy_ strided 3 : Elapsed 0.058 ms (5.816 ms / 100) 5.863 -> 5.829 ( -0.58%) [ +0.15% +0.00% +0.00% / +0.14% -0.51% -0.58%] index_add_ perm : Elapsed 0.059 ms (5.872 ms / 100) 5.821 -> 5.801 ( -0.34%) [ +0.05% +0.10% +0.00% / +0.10% -0.31% -0.34%] index_copy_ perm : Elapsed 0.058 ms (5.824 ms / 100) 5.842 -> 5.839 ( -0.05%) [ +0.03% +0.00% +0.10% / +0.12% -0.05% +0.07%] index_add_ perm_sorted : Elapsed 0.058 ms (5.844 ms / 100) 5.816 -> 5.800 ( -0.28%) [ +0.00% +0.00% +0.00% / -0.02% -0.26% -0.28%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.816 ms / 100) 6.139 -> 6.133 ( -0.10%) [ +0.08% +0.07% +0.00% / -0.10% -0.07% -0.02%] index_select const : Elapsed 0.061 ms (6.144 ms / 100) 6.202 -> 6.200 ( -0.03%) [ +0.18% +0.00% +0.24% / +0.18% +0.03% -0.03%] index_select wrap : Elapsed 0.062 ms (6.213 ms / 100) 6.190 -> 6.189 ( -0.02%) [ +0.06% +0.00% +0.18% / +0.13% -0.02% +0.00%] index_select linear : Elapsed 0.062 ms (6.194 ms / 100) 6.196 -> 6.185 ( -0.18%) [ +0.00% +0.00% +0.02% / +0.05% -0.06% -0.18%] index_select reverse : Elapsed 0.062 ms (6.196 ms / 100) 6.135 -> 6.130 ( -0.08%) [ +0.00% +0.03% +0.08% / -0.08% +0.02% -0.07%] index_select skip64 : Elapsed 0.061 ms (6.135 ms / 100) 6.133 -> 6.131 ( -0.03%) [ +0.13% +0.00% +0.20% / +0.13% -0.03% -0.03%] index_select skip256 : Elapsed 0.061 ms (6.141 ms / 100) 6.191 -> 6.187 ( -0.06%) [ +0.00% +0.02% +0.06% / +0.18% -0.06% -0.03%] index_select spread : Elapsed 0.062 ms (6.191 ms / 100) 6.200 -> 6.201 ( +0.02%) [ +0.00% +0.11% +0.23% / +0.19% +0.02% +0.05%] index_select strided 3 : Elapsed 0.062 ms (6.200 ms / 100) 6.206 -> 6.200 ( -0.10%) [ +0.00% +0.21% +0.13% / +0.15% -0.10% +0.02%] index_select random : Elapsed 0.062 ms (6.206 ms / 100) 6.201 -> 6.192 ( -0.15%) [ +0.00% +0.03% +0.03% / -0.02% -0.15% +0.02%] index_select random_sorted : Elapsed 0.062 ms (6.201 ms / 100) B = [40, 20, 5, 16] (stride (1, 640, 12800, 40)) A = [40, 20, 4, 16] (stride (1280, 1, 320, 20)) dim = 2 5.786 -> 5.799 ( +0.22%) [ +0.02% +0.00% +0.19% / +0.22% +0.24% +0.28%] index_add_ linear : Elapsed 0.058 ms (5.787 ms / 100) 5.752 -> 5.759 ( +0.12%) [ +0.00% +0.21% +0.10% / +0.12% +0.17% +0.17%] index_copy_ linear : Elapsed 0.058 ms (5.752 ms / 100) 5.780 -> 5.788 ( +0.14%) [ +0.00% +0.09% +0.19% / +0.14% +0.36% +0.48%] index_add_ reverse : Elapsed 0.058 ms (5.780 ms / 100) 5.747 -> 5.749 ( +0.03%) [ +0.00% +0.07% +0.02% / +0.03% +0.31% +0.33%] index_copy_ reverse : Elapsed 0.057 ms (5.747 ms / 100) 5.790 -> 5.794 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.17% +0.29%] index_add_ spread : Elapsed 0.058 ms (5.794 ms / 100) 5.749 -> 5.756 ( +0.12%) [ +0.14% +0.00% +0.07% / +0.23% +0.14% +0.12%] index_copy_ spread : Elapsed 0.058 ms (5.757 ms / 100) 5.783 -> 5.784 ( +0.02%) [ +0.00% +0.19% +0.03% / +0.02% +0.59% +0.57%] index_add_ strided 3 : Elapsed 0.058 ms (5.783 ms / 100) 5.751 -> 5.746 ( -0.09%) [ +0.03% +0.07% +0.00% / -0.09% +0.43% +0.47%] index_copy_ strided 3 : Elapsed 0.058 ms (5.753 ms / 100) 5.766 -> 5.771 ( +0.09%) [ +0.00% +0.12% +0.02% / +0.09% +0.66% +0.54%] index_add_ perm : Elapsed 0.058 ms (5.766 ms / 100) 5.735 -> 5.740 ( +0.09%) [ +0.05% +0.00% +0.02% / +0.09% +0.61% +0.58%] index_copy_ perm : Elapsed 0.057 ms (5.738 ms / 100) 5.780 -> 5.794 ( +0.24%) [ +0.00% +0.09% +0.09% / +0.24% +0.54% +0.57%] index_add_ perm_sorted : Elapsed 0.058 ms (5.780 ms / 100) 5.743 -> 5.746 ( +0.05%) [ +0.00% +0.02% +0.21% / +0.05% +0.42% +0.59%] index_copy_ perm_sorted : Elapsed 0.057 ms (5.743 ms / 100) 6.073 -> 6.097 ( +0.40%) [ +0.23% +0.00% +0.23% / +0.41% +0.40% +0.44%] index_select const : Elapsed 0.061 ms (6.087 ms / 100) 6.139 -> 6.145 ( +0.10%) [ +0.00% +0.02% +0.08% / +0.10% +0.10% +0.10%] index_select wrap : Elapsed 0.061 ms (6.139 ms / 100) 6.131 -> 6.132 ( +0.02%) [ +0.00% +0.03% +0.00% / +0.02% +0.16% +0.16%] index_select linear : Elapsed 0.061 ms (6.131 ms / 100) 6.136 -> 6.140 ( +0.07%) [ +0.16% +0.00% +0.16% / +0.13% +0.11% +0.07%] index_select reverse : Elapsed 0.061 ms (6.146 ms / 100) 6.078 -> 6.093 ( +0.25%) [ +0.10% +0.00% +0.20% / +0.25% +0.31% +0.28%] index_select skip64 : Elapsed 0.061 ms (6.084 ms / 100) 6.083 -> 6.091 ( +0.13%) [ +0.07% +0.00% +0.13% / +0.13% +0.26% +0.28%] index_select skip256 : Elapsed 0.061 ms (6.087 ms / 100) 6.130 -> 6.142 ( +0.20%) [ +0.00% +0.13% +0.10% / +0.20% +0.24% +0.21%] index_select spread : Elapsed 0.061 ms (6.130 ms / 100) 6.139 -> 6.149 ( +0.16%) [ +0.00% +0.13% +0.02% / +0.16% +0.20% +0.28%] index_select strided 3 : Elapsed 0.061 ms (6.139 ms / 100) 6.141 -> 6.123 ( -0.29%) [ +0.00% +0.05% +0.15% / +0.03% -0.11% -0.29%] index_select random : Elapsed 0.061 ms (6.141 ms / 100) 6.126 -> 6.111 ( -0.24%) [ +0.00% +0.08% +0.08% / +0.16% -0.24% -0.11%] index_select random_sorted : Elapsed 0.061 ms (6.126 ms / 100) out_shape = [40, 20, 4, 5] in_shape = [40, 20, 4, 16] idx_dim = 3 B = [40, 20, 4, 5] (stride (400, 20, 1, 4)) A = [40, 20, 4, 16] (stride (320, 1, 12800, 20)) dim = 3 2.144 -> 2.151 ( +0.33%) [ +0.28% +0.19% +0.00% / +0.33% +0.56% +0.51%] index_select const : Elapsed 0.022 ms (2.150 ms / 100) 2.204 -> 2.211 ( +0.32%) [ +0.36% +0.09% +0.00% / +0.32% +0.54% +0.82%] index_select wrap : Elapsed 0.022 ms (2.212 ms / 100) 2.209 -> 2.204 ( -0.23%) [ +0.05% +0.23% +0.00% / -0.23% +0.50% +0.86%] index_select linear : Elapsed 0.022 ms (2.210 ms / 100) 2.210 -> 2.212 ( +0.09%) [ +0.23% +0.27% +0.00% / +0.09% +1.22% +1.00%] index_select reverse : Elapsed 0.022 ms (2.215 ms / 100) 2.149 -> 2.146 ( -0.14%) [ +0.00% +0.05% +0.05% / -0.14% +0.47% +0.37%] index_select skip64 : Elapsed 0.021 ms (2.149 ms / 100) 2.145 -> 2.149 ( +0.19%) [ +0.23% +0.00% +0.37% / +0.19% +0.75% +0.65%] index_select skip256 : Elapsed 0.022 ms (2.150 ms / 100) 2.219 -> 2.226 ( +0.32%) [ +0.18% +0.00% +0.18% / +0.32% +0.77% +0.41%] index_select spread : Elapsed 0.022 ms (2.223 ms / 100) 2.215 -> 2.229 ( +0.63%) [ +0.00% +0.41% +0.23% / +0.63% +0.99% +1.22%] index_select strided 3 : Elapsed 0.022 ms (2.215 ms / 100) 2.206 -> 2.209 ( +0.14%) [ +0.23% +0.00% +0.14% / +0.14% +1.00% +0.95%] index_select strided 5 : Elapsed 0.022 ms (2.211 ms / 100) 2.212 -> 2.214 ( +0.09%) [ +0.45% +0.27% +0.00% / +0.09% +0.54% +0.50%] index_select strided 7 : Elapsed 0.022 ms (2.222 ms / 100) 2.163 -> 2.162 ( -0.05%) [ +0.00% +0.09% +0.18% / -0.05% +0.60% +0.79%] index_select strided 8 : Elapsed 0.022 ms (2.163 ms / 100) 2.201 -> 2.201 ( +0.00%) [ +0.00% +0.27% +0.27% / +0.00% +0.59% +0.55%] index_select random : Elapsed 0.022 ms (2.201 ms / 100) 2.204 -> 2.203 ( -0.05%) [ +0.32% +0.00% +0.32% / -0.05% +0.68% +0.64%] index_select random_sorted : Elapsed 0.022 ms (2.211 ms / 100) 2.215 -> 2.212 ( -0.14%) [ +0.00% +0.09% +0.09% / -0.14% +0.54% +0.54%] index_select perm : Elapsed 0.022 ms (2.215 ms / 100) 2.208 -> 2.202 ( -0.27%) [ +0.05% +0.00% +0.09% / -0.27% +0.72% +0.54%] index_select perm_sorted : Elapsed 0.022 ms (2.209 ms / 100) B = [40, 20, 4, 5] (stride (400, 1, 100, 20)) A = [40, 20, 4, 16] (stride (80, 4, 1, 3200)) dim = 3 2.000 -> 1.996 ( -0.20%) [ +0.10% +0.00% +0.60% / +0.40% -0.20% -0.15%] index_select const : Elapsed 0.020 ms (2.002 ms / 100) 2.036 -> 2.041 ( +0.25%) [ +0.29% +0.00% +0.49% / +0.54% +0.49% +0.25%] index_select wrap : Elapsed 0.020 ms (2.042 ms / 100) 2.038 -> 2.039 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.25% +0.34%] index_select linear : Elapsed 0.020 ms (2.039 ms / 100) 2.044 -> 2.031 ( -0.64%) [ +0.15% +0.10% +0.00% / -0.10% -0.64% -0.49%] index_select reverse : Elapsed 0.020 ms (2.047 ms / 100) 2.007 -> 2.003 ( -0.20%) [ +0.00% +0.15% +0.05% / +0.10% -0.20% -0.20%] index_select skip64 : Elapsed 0.020 ms (2.007 ms / 100) 2.004 -> 2.001 ( -0.15%) [ +0.30% +0.00% +0.35% / -0.05% -0.15% -0.15%] index_select skip256 : Elapsed 0.020 ms (2.010 ms / 100) 2.065 -> 2.047 ( -0.87%) [ +0.10% +0.15% +0.00% / +0.24% -0.87% -0.82%] index_select spread : Elapsed 0.021 ms (2.067 ms / 100) 2.064 -> 2.048 ( -0.78%) [ +0.24% +0.29% +0.00% / +0.24% -0.78% -0.68%] index_select strided 3 : Elapsed 0.021 ms (2.069 ms / 100) 2.037 -> 2.038 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.79% +1.67%] index_select strided 5 : Elapsed 0.020 ms (2.037 ms / 100) 2.068 -> 2.039 ( -1.40%) [ +0.15% +0.10% +0.00% / +0.10% -1.40% -1.35%] index_select strided 7 : Elapsed 0.021 ms (2.071 ms / 100) 2.020 -> 2.012 ( -0.40%) [ +0.05% +0.00% +0.25% / +0.15% -0.20% -0.40%] index_select strided 8 : Elapsed 0.020 ms (2.021 ms / 100) 2.068 -> 2.063 ( -0.24%) [ +0.15% +0.29% +0.00% / +0.34% -0.24% -0.24%] index_select random : Elapsed 0.021 ms (2.071 ms / 100) 2.069 -> 2.058 ( -0.53%) [ +0.05% +0.00% +0.10% / +0.24% -0.53% -0.39%] index_select random_sorted : Elapsed 0.021 ms (2.070 ms / 100) 2.046 -> 2.034 ( -0.59%) [ +0.15% +0.15% +0.00% / +0.00% -0.49% -0.59%] index_select perm : Elapsed 0.020 ms (2.049 ms / 100) 2.035 -> 2.036 ( +0.05%) [ +0.10% +0.00% +0.10% / +0.05% +0.15% +0.39%] index_select perm_sorted : Elapsed 0.020 ms (2.037 ms / 100) B = [40, 20, 4, 5] (stride (20, 800, 5, 1)) A = [40, 20, 4, 16] (stride (1280, 64, 1, 4)) dim = 3 2.201 -> 2.202 ( +0.05%) [ +0.27% +0.18% +0.00% / +0.05% +0.41% +0.41%] index_select const : Elapsed 0.022 ms (2.207 ms / 100) 2.222 -> 2.226 ( +0.18%) [ +0.32% +0.14% +0.00% / +0.18% +0.54% +0.72%] index_select wrap : Elapsed 0.022 ms (2.229 ms / 100) 2.228 -> 2.232 ( +0.18%) [ +0.00% +0.13% +0.13% / +0.18% +0.27% +0.40%] index_select linear : Elapsed 0.022 ms (2.228 ms / 100) 2.227 -> 2.228 ( +0.04%) [ +0.09% +0.00% +0.13% / +0.04% +0.54% +0.54%] index_select reverse : Elapsed 0.022 ms (2.229 ms / 100) 2.200 -> 2.203 ( +0.14%) [ +0.18% +0.00% +0.05% / +0.14% +0.41% +0.36%] index_select skip64 : Elapsed 0.022 ms (2.204 ms / 100) 2.203 -> 2.209 ( +0.27%) [ +0.09% +0.00% +0.05% / +0.27% +0.45% +0.27%] index_select skip256 : Elapsed 0.022 ms (2.205 ms / 100) 2.264 -> 2.266 ( +0.09%) [ +0.22% +0.00% +0.22% / +0.09% +0.53% +0.53%] index_select spread : Elapsed 0.023 ms (2.269 ms / 100) 2.264 -> 2.271 ( +0.31%) [ +0.22% +0.00% +0.18% / +0.31% +0.66% +0.80%] index_select strided 3 : Elapsed 0.023 ms (2.269 ms / 100) 2.248 -> 2.250 ( +0.09%) [ +0.09% +0.04% +0.00% / +0.09% +0.76% +0.62%] index_select strided 5 : Elapsed 0.023 ms (2.250 ms / 100) 2.266 -> 2.262 ( -0.18%) [ +0.00% +0.22% +0.18% / -0.18% +0.53% +0.49%] index_select strided 7 : Elapsed 0.023 ms (2.266 ms / 100) 2.220 -> 2.222 ( +0.09%) [ +0.14% +0.00% +0.00% / +0.09% +0.50% +0.45%] index_select strided 8 : Elapsed 0.022 ms (2.223 ms / 100) 2.225 -> 2.227 ( +0.09%) [ +0.09% +0.00% +0.13% / +0.09% +0.58% +0.81%] index_select random : Elapsed 0.022 ms (2.227 ms / 100) 2.229 -> 2.228 ( -0.04%) [ +0.27% +0.00% +0.18% / -0.04% +0.49% +0.40%] index_select random_sorted : Elapsed 0.022 ms (2.235 ms / 100) 2.251 -> 2.254 ( +0.13%) [ +0.00% +0.00% +0.18% / +0.13% +0.62% +0.40%] index_select perm : Elapsed 0.023 ms (2.251 ms / 100) 2.251 -> 2.249 ( -0.09%) [ +0.00% +0.13% +0.04% / -0.09% +0.40% +0.22%] index_select perm_sorted : Elapsed 0.023 ms (2.251 ms / 100) B = [40, 20, 4, 5] (stride (1, 800, 40, 160)) A = [40, 20, 4, 16] (stride (64, 2560, 16, 1)) dim = 3 2.413 -> 2.413 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.12% +0.00% +0.04%] index_select const : Elapsed 0.024 ms (2.413 ms / 100) 2.405 -> 2.415 ( +0.42%) [ +0.42% +0.00% +0.33% / +0.42% +1.16% +0.75%] index_select wrap : Elapsed 0.024 ms (2.415 ms / 100) 2.407 -> 2.411 ( +0.17%) [ +0.25% +0.17% +0.00% / +0.17% +0.33% +0.42%] index_select linear : Elapsed 0.024 ms (2.413 ms / 100) 2.399 -> 2.405 ( +0.25%) [ +0.25% +0.46% +0.00% / +0.25% +0.58% +0.54%] index_select reverse : Elapsed 0.024 ms (2.405 ms / 100) 2.412 -> 2.415 ( +0.12%) [ +0.21% +0.17% +0.00% / +0.25% +0.12% +0.37%] index_select skip64 : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.00% +0.00% +0.00% / +0.00% +0.12% -0.04%] index_select skip256 : Elapsed 0.024 ms (2.411 ms / 100) 2.422 -> 2.423 ( +0.04%) [ +0.00% +0.17% +0.17% / +0.04% +0.33% +0.12%] index_select spread : Elapsed 0.024 ms (2.422 ms / 100) 2.426 -> 2.426 ( +0.00%) [ +0.04% +0.21% +0.00% / +0.00% +0.12% +0.37%] index_select strided 3 : Elapsed 0.024 ms (2.427 ms / 100) 2.431 -> 2.424 ( -0.29%) [ +0.25% +0.21% +0.00% / -0.29% +0.04% +0.08%] index_select strided 5 : Elapsed 0.024 ms (2.437 ms / 100) 2.420 -> 2.427 ( +0.29%) [ +0.33% +0.21% +0.00% / +0.41% +0.29% +0.29%] index_select strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.434 -> 2.434 ( +0.00%) [ +0.21% +0.00% +0.00% / +0.00% +0.00% +0.00%] index_select strided 8 : Elapsed 0.024 ms (2.439 ms / 100) 2.426 -> 2.428 ( +0.08%) [ +0.33% +0.33% +0.00% / +0.08% +0.49% +0.41%] index_select random : Elapsed 0.024 ms (2.434 ms / 100) 2.420 -> 2.421 ( +0.04%) [ +0.12% +0.37% +0.00% / +0.17% +0.04% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.423 ms / 100) 2.408 -> 2.411 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.25% +0.12% +0.29%] index_select perm : Elapsed 0.024 ms (2.408 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.58% +0.00% +0.46% / +0.37% +0.25% +0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [40, 20, 4, 5] (stride (1, 800, 40, 160)) A = [40, 20, 4, 16] (stride (1, 640, 12800, 40)) dim = 3 2.478 -> 2.485 ( +0.28%) [ +0.24% +0.12% +0.00% / +0.28% +0.48% +0.40%] index_select const : Elapsed 0.025 ms (2.484 ms / 100) 2.487 -> 2.487 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.24% +0.36%] index_select wrap : Elapsed 0.025 ms (2.489 ms / 100) 2.487 -> 2.494 ( +0.28%) [ +0.16% +0.20% +0.00% / +0.28% +0.32% +0.48%] index_select linear : Elapsed 0.025 ms (2.491 ms / 100) 2.482 -> 2.483 ( +0.04%) [ +0.20% +0.00% +0.04% / +0.04% +0.24% +0.20%] index_select reverse : Elapsed 0.025 ms (2.487 ms / 100) 2.473 -> 2.474 ( +0.04%) [ +0.24% +0.00% +0.36% / +0.04% +0.28% +0.28%] index_select skip64 : Elapsed 0.025 ms (2.479 ms / 100) 2.476 -> 2.481 ( +0.20%) [ +0.16% +0.20% +0.00% / +0.20% +0.44% +0.48%] index_select skip256 : Elapsed 0.025 ms (2.480 ms / 100) 2.475 -> 2.470 ( -0.20%) [ +0.04% +0.00% +0.04% / -0.20% +0.16% +0.28%] index_select spread : Elapsed 0.025 ms (2.476 ms / 100) 2.470 -> 2.473 ( +0.12%) [ +0.00% +0.32% +0.04% / +0.12% +0.36% +0.57%] index_select strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.476 -> 2.483 ( +0.28%) [ +0.12% +0.00% +0.24% / +0.28% +0.57% +0.53%] index_select strided 5 : Elapsed 0.025 ms (2.479 ms / 100) 2.479 -> 2.480 ( +0.04%) [ +0.00% +0.12% +0.16% / +0.04% +0.56% +0.40%] index_select strided 7 : Elapsed 0.025 ms (2.479 ms / 100) 2.468 -> 2.466 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.28% +0.36%] index_select strided 8 : Elapsed 0.025 ms (2.469 ms / 100) 2.464 -> 2.467 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.20% +0.28%] index_select random : Elapsed 0.025 ms (2.465 ms / 100) 2.468 -> 2.472 ( +0.16%) [ +0.04% +0.16% +0.00% / +0.16% +0.24% +0.24%] index_select random_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.485 -> 2.485 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.40% +0.40%] index_select perm : Elapsed 0.025 ms (2.489 ms / 100) 2.484 -> 2.487 ( +0.12%) [ +0.20% +0.08% +0.00% / +0.12% +0.32% +0.40%] index_select perm_sorted : Elapsed 0.025 ms (2.489 ms / 100) B = [40, 20, 4, 5] (stride (1, 40, 4000, 800)) A = [40, 20, 4, 16] (stride (1, 40, 800, 3200)) dim = 3 2.469 -> 2.474 ( +0.20%) [ +0.24% +0.24% +0.00% / +0.20% +0.65% +0.45%] index_select const : Elapsed 0.025 ms (2.475 ms / 100) 2.487 -> 2.491 ( +0.16%) [ +0.12% +0.12% +0.00% / +0.16% +0.20% +0.36%] index_select wrap : Elapsed 0.025 ms (2.490 ms / 100) 2.483 -> 2.488 ( +0.20%) [ +0.40% +0.00% +0.24% / +0.20% +0.40% +0.56%] index_select linear : Elapsed 0.025 ms (2.493 ms / 100) 2.486 -> 2.487 ( +0.04%) [ +0.28% +0.12% +0.00% / +0.04% +0.72% +0.60%] index_select reverse : Elapsed 0.025 ms (2.493 ms / 100) 2.471 -> 2.476 ( +0.20%) [ +0.12% +0.00% +0.12% / +0.20% +0.53% +0.28%] index_select skip64 : Elapsed 0.025 ms (2.474 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.00% +0.20% +0.20% / +0.04% +0.32% +0.28%] index_select skip256 : Elapsed 0.025 ms (2.471 ms / 100) 2.490 -> 2.490 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.24% +0.00% +0.08%] index_select spread : Elapsed 0.025 ms (2.493 ms / 100) 2.492 -> 2.491 ( -0.04%) [ +0.08% +0.16% +0.00% / +0.28% -0.04% -0.04%] index_select strided 3 : Elapsed 0.025 ms (2.494 ms / 100) 2.491 -> 2.493 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.12% +0.24%] index_select strided 5 : Elapsed 0.025 ms (2.491 ms / 100) 2.491 -> 2.493 ( +0.08%) [ +0.16% +0.24% +0.00% / +0.08% +0.08% +0.24%] index_select strided 7 : Elapsed 0.025 ms (2.495 ms / 100) 2.475 -> 2.478 ( +0.12%) [ +0.00% +0.32% +0.12% / +0.28% +0.16% +0.12%] index_select strided 8 : Elapsed 0.025 ms (2.475 ms / 100) 2.488 -> 2.492 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.72% +0.56%] index_select random : Elapsed 0.025 ms (2.492 ms / 100) 2.488 -> 2.490 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.60% +0.64%] index_select random_sorted : Elapsed 0.025 ms (2.489 ms / 100) 2.491 -> 2.493 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.48% +0.48%] index_select perm : Elapsed 0.025 ms (2.495 ms / 100) 2.493 -> 2.494 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.28% +0.48%] index_select perm_sorted : Elapsed 0.025 ms (2.494 ms / 100) B = [40, 20, 4, 5] (stride (80, 4, 1, 3200)) A = [40, 20, 4, 16] (stride (1280, 64, 1, 4)) dim = 3 1.921 -> 1.923 ( +0.10%) [ +0.21% +0.00% +0.26% / +0.10% +0.73% +0.73%] index_select const : Elapsed 0.019 ms (1.925 ms / 100) 1.953 -> 1.949 ( -0.20%) [ +0.15% +0.15% +0.00% / -0.20% +0.77% +0.67%] index_select wrap : Elapsed 0.020 ms (1.956 ms / 100) 1.948 -> 1.948 ( +0.00%) [ +0.10% +0.31% +0.00% / +0.00% +1.03% +0.87%] index_select linear : Elapsed 0.020 ms (1.950 ms / 100) 1.956 -> 1.954 ( -0.10%) [ +0.10% +0.00% +0.00% / -0.10% +0.87% +1.02%] index_select reverse : Elapsed 0.020 ms (1.958 ms / 100) 1.920 -> 1.925 ( +0.26%) [ +0.26% +0.21% +0.00% / +0.26% +0.52% +0.68%] index_select skip64 : Elapsed 0.019 ms (1.925 ms / 100) 1.923 -> 1.924 ( +0.05%) [ +0.00% +0.21% +0.10% / +0.05% +0.57% +0.62%] index_select skip256 : Elapsed 0.019 ms (1.923 ms / 100) 1.997 -> 1.994 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.85% +0.85%] index_select spread : Elapsed 0.020 ms (1.997 ms / 100) 1.998 -> 1.997 ( -0.05%) [ +0.20% +0.00% +0.05% / -0.05% +0.70% +0.75%] index_select strided 3 : Elapsed 0.020 ms (2.002 ms / 100) 1.979 -> 1.976 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.81% +0.51%] index_select strided 5 : Elapsed 0.020 ms (1.979 ms / 100) 1.993 -> 1.993 ( +0.00%) [ +0.20% +0.30% +0.00% / +0.00% +1.00% +1.15%] index_select strided 7 : Elapsed 0.020 ms (1.997 ms / 100) 1.942 -> 1.943 ( +0.05%) [ +0.00% +0.10% +0.26% / +0.05% +0.67% +0.77%] index_select strided 8 : Elapsed 0.019 ms (1.942 ms / 100) 1.975 -> 1.977 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +1.06% +1.27%] index_select random : Elapsed 0.020 ms (1.975 ms / 100) 1.973 -> 1.973 ( +0.00%) [ +0.10% +0.20% +0.00% / +0.00% +0.86% +0.91%] index_select random_sorted : Elapsed 0.020 ms (1.975 ms / 100) 1.973 -> 1.978 ( +0.25%) [ +0.35% +0.35% +0.00% / +0.25% +1.22% +1.22%] index_select perm : Elapsed 0.020 ms (1.980 ms / 100) 1.972 -> 1.973 ( +0.05%) [ +0.25% +0.46% +0.00% / +0.05% +0.86% +0.71%] index_select perm_sorted : Elapsed 0.020 ms (1.977 ms / 100) B = [40, 20, 4, 5] (stride (20, 1, 800, 3200)) A = [40, 20, 4, 16] (stride (16, 640, 12800, 1)) dim = 3 2.337 -> 2.337 ( +0.00%) [ +0.09% +0.21% +0.00% / +0.00% +0.51% +0.51%] index_select const : Elapsed 0.023 ms (2.339 ms / 100) 2.341 -> 2.345 ( +0.17%) [ +0.26% +0.04% +0.00% / +0.17% +0.43% +0.21%] index_select wrap : Elapsed 0.023 ms (2.347 ms / 100) 2.341 -> 2.342 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.26% +0.30%] index_select linear : Elapsed 0.023 ms (2.343 ms / 100) 2.337 -> 2.343 ( +0.26%) [ +0.30% +0.00% +0.21% / +0.26% +0.56% +0.34%] index_select reverse : Elapsed 0.023 ms (2.344 ms / 100) 2.334 -> 2.341 ( +0.30%) [ +0.17% +0.00% +0.13% / +0.30% +0.39% +0.34%] index_select skip64 : Elapsed 0.023 ms (2.338 ms / 100) 2.336 -> 2.337 ( +0.04%) [ +0.34% +0.00% +0.21% / +0.04% +0.26% +0.39%] index_select skip256 : Elapsed 0.023 ms (2.344 ms / 100) 2.366 -> 2.369 ( +0.13%) [ +0.00% +0.21% +0.08% / +0.13% +0.55% +0.25%] index_select spread : Elapsed 0.024 ms (2.366 ms / 100) 2.371 -> 2.370 ( -0.04%) [ +0.08% +0.13% +0.00% / -0.04% +0.30% -0.04%] index_select strided 3 : Elapsed 0.024 ms (2.373 ms / 100) 2.368 -> 2.366 ( -0.08%) [ +0.30% +0.25% +0.00% / -0.08% +0.42% +0.13%] index_select strided 5 : Elapsed 0.024 ms (2.375 ms / 100) 2.367 -> 2.365 ( -0.08%) [ +0.21% +0.04% +0.00% / -0.08% +0.25% +0.30%] index_select strided 7 : Elapsed 0.024 ms (2.372 ms / 100) 2.373 -> 2.369 ( -0.17%) [ +0.00% +0.04% +0.00% / -0.17% -0.08% +0.13%] index_select strided 8 : Elapsed 0.024 ms (2.373 ms / 100) 2.366 -> 2.366 ( +0.00%) [ +0.17% +0.21% +0.00% / +0.04% +0.21% +0.00%] index_select random : Elapsed 0.024 ms (2.370 ms / 100) 2.367 -> 2.376 ( +0.38%) [ +0.13% +0.00% +0.34% / +0.38% +0.38% +0.42%] index_select random_sorted : Elapsed 0.024 ms (2.370 ms / 100) 2.371 -> 2.374 ( +0.13%) [ +0.21% +0.00% +0.04% / +0.13% +0.30% +0.21%] index_select perm : Elapsed 0.024 ms (2.376 ms / 100) 2.362 -> 2.370 ( +0.34%) [ +0.42% +0.42% +0.00% / +0.34% +0.68% +0.59%] index_select perm_sorted : Elapsed 0.024 ms (2.372 ms / 100) B = [40, 20, 4, 5] (stride (1, 40, 800, 3200)) A = [40, 20, 4, 16] (stride (20, 1, 800, 3200)) dim = 3 2.150 -> 2.151 ( +0.05%) [ +0.05% +0.09% +0.00% / +0.05% +1.12% +1.02%] index_select const : Elapsed 0.022 ms (2.151 ms / 100) 2.235 -> 2.239 ( +0.18%) [ +0.09% +0.09% +0.00% / +0.18% +0.54% +0.67%] index_select wrap : Elapsed 0.022 ms (2.237 ms / 100) 2.240 -> 2.244 ( +0.18%) [ +0.04% +0.00% +0.13% / +0.18% +0.49% +0.67%] index_select linear : Elapsed 0.022 ms (2.241 ms / 100) 2.234 -> 2.240 ( +0.27%) [ +0.22% +0.40% +0.00% / +0.27% +0.27% +0.76%] index_select reverse : Elapsed 0.022 ms (2.239 ms / 100) 2.149 -> 2.148 ( -0.05%) [ +0.09% +0.00% +0.05% / -0.05% +0.84% +0.74%] index_select skip64 : Elapsed 0.022 ms (2.151 ms / 100) 2.148 -> 2.150 ( +0.09%) [ +0.19% +0.19% +0.00% / +0.09% +1.07% +1.02%] index_select skip256 : Elapsed 0.022 ms (2.152 ms / 100) 2.233 -> 2.231 ( -0.09%) [ +0.00% +0.45% +0.04% / -0.09% +0.36% +0.40%] index_select spread : Elapsed 0.022 ms (2.233 ms / 100) 2.244 -> 2.246 ( +0.09%) [ +0.18% +0.13% +0.00% / +0.09% +0.18% +0.40%] index_select strided 3 : Elapsed 0.022 ms (2.248 ms / 100) 2.236 -> 2.235 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.00% -0.04% +0.00%] index_select strided 5 : Elapsed 0.022 ms (2.236 ms / 100) 2.239 -> 2.241 ( +0.09%) [ +0.18% +0.04% +0.00% / +0.13% +0.22% +0.09%] index_select strided 7 : Elapsed 0.022 ms (2.243 ms / 100) 2.163 -> 2.165 ( +0.09%) [ +0.14% +0.18% +0.00% / +0.09% +1.02% +1.06%] index_select strided 8 : Elapsed 0.022 ms (2.166 ms / 100) 2.219 -> 2.219 ( +0.00%) [ +0.00% +0.18% +0.14% / +0.00% +0.41% +0.54%] index_select random : Elapsed 0.022 ms (2.219 ms / 100) 2.216 -> 2.223 ( +0.32%) [ +0.18% +0.14% +0.00% / +0.32% +0.45% +0.59%] index_select random_sorted : Elapsed 0.022 ms (2.220 ms / 100) 2.243 -> 2.247 ( +0.18%) [ +0.00% +0.13% +0.00% / +0.18% +0.40% +0.22%] index_select perm : Elapsed 0.022 ms (2.243 ms / 100) 2.228 -> 2.228 ( +0.00%) [ +0.13% +0.22% +0.00% / +0.00% +0.18% +0.40%] index_select perm_sorted : Elapsed 0.022 ms (2.231 ms / 100) out_shape = [5, 20, 16, 4] in_shape = [40, 20, 16, 4] idx_dim = 0 B = [5, 20, 16, 4] (stride (4, 320, 20, 1)) A = [40, 20, 16, 4] (stride (16, 2560, 1, 640)) dim = 0 1.391 -> 1.394 ( +0.22%) [ +0.50% +0.43% +0.00% / +0.22% +1.08% +0.65%] index_select const : Elapsed 0.014 ms (1.398 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.14% +0.14%] index_select wrap : Elapsed 0.014 ms (1.384 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.14% +0.07%] index_select linear : Elapsed 0.014 ms (1.384 ms / 100) 1.383 -> 1.381 ( -0.14%) [ +0.14% +0.00% +0.00% / -0.14% +0.22% +0.22%] index_select reverse : Elapsed 0.014 ms (1.385 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.07% +0.14% +0.00% / +0.22% +0.29% +0.29%] index_select skip64 : Elapsed 0.014 ms (1.382 ms / 100) 1.394 -> 1.398 ( +0.29%) [ +0.00% +0.36% +0.36% / +0.29% +0.93% +0.79%] index_select skip256 : Elapsed 0.014 ms (1.394 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.22% +0.00% +0.00% / +0.14% +0.29% +0.29%] index_select spread : Elapsed 0.014 ms (1.385 ms / 100) 1.385 -> 1.384 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.07% +0.07%] index_select strided 3 : Elapsed 0.014 ms (1.385 ms / 100) 1.383 -> 1.386 ( +0.22%) [ +0.07% +0.07% +0.00% / +0.22% +0.22% +0.29%] index_select strided 5 : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +0.36% +0.36%] index_select strided 7 : Elapsed 0.014 ms (1.383 ms / 100) 1.382 -> 1.386 ( +0.29%) [ +0.22% +0.00% +0.07% / +0.29% +0.29% +0.36%] index_select strided 8 : Elapsed 0.014 ms (1.385 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.29% +0.00% +0.14% / +0.07% +0.51% +0.65%] index_select strided 16 : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.07% +0.07% +0.00% / +0.29% +0.51% +0.51%] index_select random : Elapsed 0.014 ms (1.381 ms / 100) 1.382 -> 1.379 ( -0.22%) [ +0.22% +0.00% +0.14% / -0.22% +0.36% +0.36%] index_select random_sorted : Elapsed 0.014 ms (1.385 ms / 100) 1.381 -> 1.380 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.43% +0.36%] index_select perm : Elapsed 0.014 ms (1.382 ms / 100) 1.380 -> 1.383 ( +0.22%) [ +0.29% +0.00% +0.14% / +0.22% +0.51% +0.58%] index_select perm_sorted : Elapsed 0.014 ms (1.384 ms / 100) B = [5, 20, 16, 4] (stride (80, 4, 400, 1)) A = [40, 20, 16, 4] (stride (1280, 64, 4, 1)) dim = 0 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.78% +0.86%] index_select const : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.71% +0.71%] index_select wrap : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.55% +0.47%] index_select linear : Elapsed 0.013 ms (1.277 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.71% +0.78%] index_select reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.86% +0.86%] index_select skip64 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.86% +0.78%] index_select skip256 : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +1.02% +0.86%] index_select spread : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.86% +0.86%] index_select strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.86% +0.78%] index_select strided 5 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.78% +0.78%] index_select strided 7 : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.86% +0.86%] index_select strided 8 : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.71% +0.86%] index_select strided 16 : Elapsed 0.013 ms (1.276 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +1.02% +1.02%] index_select random : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.71% +0.78%] index_select random_sorted : Elapsed 0.013 ms (1.276 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.31% +0.08% +0.00% / +0.08% +0.71% +0.78%] index_select perm : Elapsed 0.013 ms (1.279 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.78% +0.71%] index_select perm_sorted : Elapsed 0.013 ms (1.275 ms / 100) B = [5, 20, 16, 4] (stride (80, 4, 400, 1)) A = [40, 20, 16, 4] (stride (320, 1, 20, 12800)) dim = 0 1.574 -> 1.574 ( +0.00%) [ +0.00% +0.00% +0.19% / +0.00% +0.00% +0.00%] index_select const : Elapsed 0.016 ms (1.574 ms / 100) 1.578 -> 1.578 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.38% +0.51%] index_select wrap : Elapsed 0.016 ms (1.579 ms / 100) 1.576 -> 1.580 ( +0.25%) [ +0.19% +0.00% +0.00% / +0.25% +0.51% +0.57%] index_select linear : Elapsed 0.016 ms (1.579 ms / 100) 1.579 -> 1.579 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.19% +0.25%] index_select reverse : Elapsed 0.016 ms (1.580 ms / 100) 1.569 -> 1.574 ( +0.32%) [ +0.45% +0.00% +0.45% / +0.64% +0.32% +0.38%] index_select skip64 : Elapsed 0.016 ms (1.576 ms / 100) 1.563 -> 1.574 ( +0.70%) [ +1.09% +0.00% +0.13% / +0.83% +1.02% +0.70%] index_select skip256 : Elapsed 0.016 ms (1.580 ms / 100) 1.572 -> 1.573 ( +0.06%) [ +0.45% +0.45% +0.00% / +0.45% +0.06% +0.38%] index_select spread : Elapsed 0.016 ms (1.579 ms / 100) 1.570 -> 1.566 ( -0.25%) [ +0.51% +0.76% +0.00% / -0.25% +0.32% +0.32%] index_select strided 3 : Elapsed 0.016 ms (1.578 ms / 100) 1.574 -> 1.580 ( +0.38%) [ +0.00% +0.13% +0.13% / +0.38% +0.64% +0.57%] index_select strided 5 : Elapsed 0.016 ms (1.574 ms / 100) 1.570 -> 1.575 ( +0.32%) [ +0.38% +0.00% +0.25% / +0.32% +0.57% +0.45%] index_select strided 7 : Elapsed 0.016 ms (1.576 ms / 100) 1.573 -> 1.576 ( +0.19%) [ +0.25% +0.00% +0.06% / +0.19% +0.45% +0.51%] index_select strided 8 : Elapsed 0.016 ms (1.577 ms / 100) 1.566 -> 1.575 ( +0.57%) [ +0.89% +0.00% +0.38% / +0.70% +0.57% +0.57%] index_select strided 16 : Elapsed 0.016 ms (1.580 ms / 100) 1.575 -> 1.577 ( +0.13%) [ +0.25% +0.13% +0.00% / +0.13% +0.13% +0.19%] index_select random : Elapsed 0.016 ms (1.579 ms / 100) 1.571 -> 1.579 ( +0.51%) [ +0.25% +0.13% +0.00% / +0.57% +0.57% +0.51%] index_select random_sorted : Elapsed 0.016 ms (1.575 ms / 100) 1.566 -> 1.569 ( +0.19%) [ +0.83% +0.00% +0.45% / +0.19% +0.70% +0.51%] index_select perm : Elapsed 0.016 ms (1.579 ms / 100) 1.577 -> 1.576 ( -0.06%) [ +0.00% +0.00% +0.19% / -0.06% +0.51% +0.51%] index_select perm_sorted : Elapsed 0.016 ms (1.577 ms / 100) B = [5, 20, 16, 4] (stride (80, 1, 400, 20)) A = [40, 20, 16, 4] (stride (1280, 1, 80, 20)) dim = 0 1.466 -> 1.466 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.75% +0.75%] index_select const : Elapsed 0.015 ms (1.466 ms / 100) 1.448 -> 1.449 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.69% +0.62%] index_select wrap : Elapsed 0.014 ms (1.449 ms / 100) 1.459 -> 1.460 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.62% +0.75%] index_select linear : Elapsed 0.015 ms (1.460 ms / 100) 1.460 -> 1.463 ( +0.21%) [ +0.00% +0.21% +0.00% / +0.21% +0.68% +0.55%] index_select reverse : Elapsed 0.015 ms (1.460 ms / 100) 1.447 -> 1.448 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.62% +0.69%] index_select skip64 : Elapsed 0.014 ms (1.449 ms / 100) 1.465 -> 1.466 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +1.02% +0.89%] index_select skip256 : Elapsed 0.015 ms (1.465 ms / 100) 1.454 -> 1.455 ( +0.07%) [ +0.34% +0.07% +0.00% / +0.07% +0.96% +0.89%] index_select spread : Elapsed 0.015 ms (1.459 ms / 100) 1.448 -> 1.449 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.55% +0.62%] index_select strided 3 : Elapsed 0.014 ms (1.450 ms / 100) 1.458 -> 1.459 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.69% +0.75%] index_select strided 5 : Elapsed 0.015 ms (1.458 ms / 100) 1.455 -> 1.456 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.82% +0.89%] index_select strided 7 : Elapsed 0.015 ms (1.456 ms / 100) 1.464 -> 1.464 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.89% +0.89%] index_select strided 8 : Elapsed 0.015 ms (1.466 ms / 100) 1.455 -> 1.456 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.96% +0.89%] index_select strided 16 : Elapsed 0.015 ms (1.456 ms / 100) 1.465 -> 1.468 ( +0.20%) [ +0.14% +0.07% +0.00% / +0.20% +1.02% +0.96%] index_select random : Elapsed 0.015 ms (1.467 ms / 100) 1.448 -> 1.449 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +0.62% +0.62%] index_select random_sorted : Elapsed 0.014 ms (1.449 ms / 100) 1.459 -> 1.462 ( +0.21%) [ +0.07% +0.07% +0.00% / +0.21% +0.55% +0.62%] index_select perm : Elapsed 0.015 ms (1.460 ms / 100) 1.466 -> 1.466 ( +0.00%) [ +0.20% +0.07% +0.00% / +0.00% +1.02% +0.89%] index_select perm_sorted : Elapsed 0.015 ms (1.469 ms / 100) B = [5, 20, 16, 4] (stride (20, 1, 400, 100)) A = [40, 20, 16, 4] (stride (1, 2560, 40, 640)) dim = 0 1.503 -> 1.504 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.47% +0.53%] index_select const : Elapsed 0.015 ms (1.504 ms / 100) 1.502 -> 1.504 ( +0.13%) [ +0.00% +0.00% +0.07% / +0.13% +0.47% +0.40%] index_select wrap : Elapsed 0.015 ms (1.502 ms / 100) 1.502 -> 1.504 ( +0.13%) [ +0.20% +0.20% +0.00% / +0.13% +0.53% +0.47%] index_select linear : Elapsed 0.015 ms (1.505 ms / 100) 1.503 -> 1.503 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.00% +0.47% +0.40%] index_select reverse : Elapsed 0.015 ms (1.505 ms / 100) 1.501 -> 1.502 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.53% +0.40%] index_select skip64 : Elapsed 0.015 ms (1.502 ms / 100) 1.503 -> 1.505 ( +0.13%) [ +0.00% +0.00% +0.07% / +0.13% +0.53% +0.40%] index_select skip256 : Elapsed 0.015 ms (1.503 ms / 100) 1.490 -> 1.492 ( +0.13%) [ +0.00% +0.07% +0.00% / +0.13% +0.54% +1.01%] index_select spread : Elapsed 0.015 ms (1.490 ms / 100) 1.492 -> 1.493 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.40% +0.40%] index_select strided 3 : Elapsed 0.015 ms (1.492 ms / 100) 1.491 -> 1.491 ( +0.00%) [ +0.13% +0.20% +0.00% / +0.00% +0.80% +0.20%] index_select strided 5 : Elapsed 0.015 ms (1.493 ms / 100) 1.491 -> 1.493 ( +0.13%) [ +0.00% +0.27% +0.00% / +0.13% +0.47% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.491 ms / 100) 1.502 -> 1.503 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.53% +0.47%] index_select strided 8 : Elapsed 0.015 ms (1.503 ms / 100) 1.483 -> 1.490 ( +0.47%) [ +0.00% +0.27% +0.47% / +0.47% +0.74% +0.94%] index_select strided 16 : Elapsed 0.015 ms (1.483 ms / 100) 1.493 -> 1.491 ( -0.13%) [ +0.07% +0.00% +0.07% / -0.13% +0.67% +0.33%] index_select random : Elapsed 0.015 ms (1.494 ms / 100) 1.487 -> 1.488 ( +0.07%) [ +0.13% +0.00% +0.00% / +0.07% +0.74% +0.61%] index_select random_sorted : Elapsed 0.015 ms (1.489 ms / 100) 1.498 -> 1.501 ( +0.20%) [ +0.07% +0.13% +0.00% / +0.20% +0.60% +0.53%] index_select perm : Elapsed 0.015 ms (1.499 ms / 100) 1.495 -> 1.500 ( +0.33%) [ +0.20% +0.00% +0.13% / +0.33% +0.80% +0.87%] index_select perm_sorted : Elapsed 0.015 ms (1.498 ms / 100) B = [5, 20, 16, 4] (stride (320, 16, 1, 1600)) A = [40, 20, 16, 4] (stride (64, 2560, 1, 16)) dim = 0 1.381 -> 1.381 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.51% +0.51%] index_select const : Elapsed 0.014 ms (1.381 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.00% +0.07% +0.14% / +0.14% +0.51% +0.43%] index_select wrap : Elapsed 0.014 ms (1.380 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.14% +0.07% +0.00% / +0.29% +0.51% +0.51%] index_select linear : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.378 ( -0.07%) [ +0.15% +0.36% +0.00% / -0.07% +0.80% +0.65%] index_select reverse : Elapsed 0.014 ms (1.381 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.07% +0.07% +0.00% / +0.29% +0.73% +0.58%] index_select skip64 : Elapsed 0.014 ms (1.380 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.00% +0.22% / +0.07% +0.58% +0.58%] index_select skip256 : Elapsed 0.014 ms (1.381 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.36% +0.00% +0.22% / +0.00% +0.65% +0.73%] index_select spread : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.36% +0.00% +0.14% / +0.07% +0.36% +0.51%] index_select strided 3 : Elapsed 0.014 ms (1.387 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.07% +0.15% +0.00% / +0.29% +0.58% +0.58%] index_select strided 5 : Elapsed 0.014 ms (1.380 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.43% +0.36%] index_select strided 7 : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.14% +0.00% +0.22% / +0.14% +0.51% +0.51%] index_select strided 8 : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.51% +0.43%] index_select strided 16 : Elapsed 0.014 ms (1.381 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.14% +0.22% / +0.00% +0.51% +0.65%] index_select random : Elapsed 0.014 ms (1.380 ms / 100) 1.381 -> 1.382 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.51% +0.51%] index_select random_sorted : Elapsed 0.014 ms (1.381 ms / 100) 1.378 -> 1.380 ( +0.15%) [ +0.22% +0.29% +0.00% / +0.15% +0.80% +0.80%] index_select perm : Elapsed 0.014 ms (1.381 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.29% +0.14% +0.00% / +0.29% +0.58% +0.65%] index_select perm_sorted : Elapsed 0.014 ms (1.384 ms / 100) B = [5, 20, 16, 4] (stride (16, 80, 1, 1600)) A = [40, 20, 16, 4] (stride (320, 16, 1, 12800)) dim = 0 1.384 -> 1.383 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.14% +0.07%] index_select const : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.29% +0.22%] index_select wrap : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.22% +0.22%] index_select linear : Elapsed 0.014 ms (1.384 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.22% +0.14%] index_select reverse : Elapsed 0.014 ms (1.385 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.22% +0.22%] index_select skip64 : Elapsed 0.014 ms (1.384 ms / 100) 1.381 -> 1.384 ( +0.22%) [ +0.07% +0.22% +0.00% / +0.22% +0.29% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.36% +0.29% +0.00% / +0.29% +0.44% +0.44%] index_select spread : Elapsed 0.014 ms (1.384 ms / 100) 1.384 -> 1.380 ( -0.29%) [ +0.00% +0.00% +0.00% / -0.29% +0.14% +0.14%] index_select strided 3 : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.22% +0.22% +0.00% / +0.07% +0.43% +0.43%] index_select strided 5 : Elapsed 0.014 ms (1.383 ms / 100) 1.381 -> 1.380 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.29% +0.43%] index_select strided 7 : Elapsed 0.014 ms (1.382 ms / 100) 1.383 -> 1.385 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.22% +0.29%] index_select strided 8 : Elapsed 0.014 ms (1.385 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.07% +0.00% +0.14% / +0.00% +0.29% +0.29%] index_select strided 16 : Elapsed 0.014 ms (1.383 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.58% +0.51%] index_select random : Elapsed 0.014 ms (1.382 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.29% +0.00% +0.22% / +0.29% +0.51% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.384 ms / 100) 1.379 -> 1.384 ( +0.36%) [ +0.22% +0.15% +0.00% / +0.36% +0.58% +0.58%] index_select perm : Elapsed 0.014 ms (1.382 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.00% +0.36% +0.29% / +0.29% +0.58% +0.51%] index_select perm_sorted : Elapsed 0.014 ms (1.379 ms / 100) out_shape = [40, 5, 16, 4] in_shape = [40, 20, 16, 4] idx_dim = 1 B = [40, 5, 16, 4] (stride (320, 64, 1, 16)) A = [40, 20, 16, 4] (stride (1, 40, 3200, 800)) dim = 1 1.776 -> 1.777 ( +0.06%) [ +0.06% +0.34% +0.00% / +0.06% +0.39% +0.34%] index_select const : Elapsed 0.018 ms (1.777 ms / 100) 1.782 -> 1.785 ( +0.17%) [ +0.00% +0.11% +0.11% / +0.17% +0.34% +0.62%] index_select wrap : Elapsed 0.018 ms (1.782 ms / 100) 1.777 -> 1.779 ( +0.11%) [ +0.00% +0.51% +0.06% / +0.11% +1.01% +0.34%] index_select linear : Elapsed 0.018 ms (1.777 ms / 100) 1.791 -> 1.793 ( +0.11%) [ +0.17% +0.00% +0.00% / +0.11% +0.61% +0.67%] index_select reverse : Elapsed 0.018 ms (1.794 ms / 100) 1.781 -> 1.783 ( +0.11%) [ +0.06% +0.00% +0.11% / +0.11% +0.45% +0.51%] index_select skip64 : Elapsed 0.018 ms (1.782 ms / 100) 1.773 -> 1.776 ( +0.17%) [ +0.28% +0.17% +0.00% / +0.17% +0.68% +0.56%] index_select skip256 : Elapsed 0.018 ms (1.778 ms / 100) 1.778 -> 1.781 ( +0.17%) [ +0.22% +0.22% +0.00% / +0.17% +0.56% +0.67%] index_select spread : Elapsed 0.018 ms (1.782 ms / 100) 1.774 -> 1.777 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.56% +0.62%] index_select strided 3 : Elapsed 0.018 ms (1.777 ms / 100) 1.776 -> 1.777 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.51% +0.73%] index_select strided 5 : Elapsed 0.018 ms (1.777 ms / 100) 1.775 -> 1.774 ( -0.06%) [ +0.06% +0.00% +0.06% / -0.06% +1.01% +0.56%] index_select strided 7 : Elapsed 0.018 ms (1.776 ms / 100) 1.788 -> 1.790 ( +0.11%) [ +0.28% +0.17% +0.00% / +0.11% +0.95% +0.39%] index_select strided 8 : Elapsed 0.018 ms (1.793 ms / 100) 1.778 -> 1.778 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +1.12% +0.22%] index_select strided 16 : Elapsed 0.018 ms (1.778 ms / 100) 1.792 -> 1.791 ( -0.06%) [ +0.06% +0.11% +0.00% / +0.17% +0.06% -0.06%] index_select random : Elapsed 0.018 ms (1.793 ms / 100) 1.792 -> 1.789 ( -0.17%) [ +0.22% +0.00% +0.00% / -0.17% +0.50% +0.28%] index_select random_sorted : Elapsed 0.018 ms (1.796 ms / 100) 1.786 -> 1.787 ( +0.06%) [ +0.17% +0.00% +0.06% / +0.06% +0.45% +0.34%] index_select perm : Elapsed 0.018 ms (1.789 ms / 100) 1.775 -> 1.777 ( +0.11%) [ +0.17% +0.00% +0.17% / +0.11% +0.34% +0.96%] index_select perm_sorted : Elapsed 0.018 ms (1.778 ms / 100) B = [40, 5, 16, 4] (stride (320, 16, 1, 80)) A = [40, 20, 16, 4] (stride (1280, 64, 1, 16)) dim = 1 1.778 -> 1.780 ( +0.11%) [ +0.28% +0.06% +0.00% / +0.11% +0.17% +0.11%] index_select const : Elapsed 0.018 ms (1.783 ms / 100) 1.812 -> 1.809 ( -0.17%) [ +0.17% +0.00% +0.17% / -0.17% +0.28% +0.44%] index_select wrap : Elapsed 0.018 ms (1.815 ms / 100) 1.808 -> 1.809 ( +0.06%) [ +0.17% +0.00% +0.28% / +0.06% +0.66% +0.61%] index_select linear : Elapsed 0.018 ms (1.811 ms / 100) 1.808 -> 1.807 ( -0.06%) [ +0.06% +0.11% +0.00% / -0.06% +0.66% +0.77%] index_select reverse : Elapsed 0.018 ms (1.809 ms / 100) 1.774 -> 1.772 ( -0.11%) [ +0.06% +0.23% +0.00% / -0.11% +0.17% +0.39%] index_select skip64 : Elapsed 0.018 ms (1.775 ms / 100) 1.773 -> 1.771 ( -0.11%) [ +0.06% +0.00% +0.00% / -0.11% +0.39% +0.45%] index_select skip256 : Elapsed 0.018 ms (1.774 ms / 100) 1.805 -> 1.807 ( +0.11%) [ +0.22% +0.00% +0.11% / +0.11% +0.55% +0.28%] index_select spread : Elapsed 0.018 ms (1.809 ms / 100) 1.805 -> 1.810 ( +0.28%) [ +0.22% +0.44% +0.00% / +0.28% +0.39% +0.50%] index_select strided 3 : Elapsed 0.018 ms (1.809 ms / 100) 1.792 -> 1.796 ( +0.22%) [ +0.28% +0.22% +0.00% / +0.22% +0.56% +0.50%] index_select strided 5 : Elapsed 0.018 ms (1.797 ms / 100) 1.808 -> 1.809 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.22% +0.22%] index_select strided 7 : Elapsed 0.018 ms (1.809 ms / 100) 1.804 -> 1.807 ( +0.17%) [ +0.22% +0.17% +0.00% / +0.17% +0.33% +0.72%] index_select strided 8 : Elapsed 0.018 ms (1.808 ms / 100) 1.805 -> 1.809 ( +0.22%) [ +0.17% +0.00% +0.06% / +0.22% +0.39% +0.39%] index_select strided 16 : Elapsed 0.018 ms (1.808 ms / 100) 1.795 -> 1.801 ( +0.33%) [ +0.17% +0.00% +0.11% / +0.50% +0.33% +0.50%] index_select random : Elapsed 0.018 ms (1.798 ms / 100) 1.798 -> 1.800 ( +0.11%) [ +0.06% +0.00% +0.00% / +0.11% +0.44% +0.22%] index_select random_sorted : Elapsed 0.018 ms (1.799 ms / 100) 1.806 -> 1.806 ( +0.00%) [ +0.06% +0.00% +0.17% / +0.00% +0.39% +0.22%] index_select perm : Elapsed 0.018 ms (1.807 ms / 100) 1.806 -> 1.807 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.50% +0.61%] index_select perm_sorted : Elapsed 0.018 ms (1.806 ms / 100) B = [40, 5, 16, 4] (stride (320, 1, 5, 80)) A = [40, 20, 16, 4] (stride (4, 160, 3200, 1)) dim = 1 1.777 -> 1.777 ( +0.00%) [ +0.17% +0.23% +0.00% / +0.00% +0.56% +0.68%] index_select const : Elapsed 0.018 ms (1.780 ms / 100) 1.812 -> 1.812 ( +0.00%) [ +0.22% +0.00% +0.11% / +0.00% +1.27% +1.10%] index_select wrap : Elapsed 0.018 ms (1.816 ms / 100) 1.813 -> 1.814 ( +0.06%) [ +0.00% +0.17% +0.06% / +0.06% +1.21% +1.10%] index_select linear : Elapsed 0.018 ms (1.813 ms / 100) 1.810 -> 1.811 ( +0.06%) [ +0.39% +0.00% +0.22% / +0.06% +1.38% +1.44%] index_select reverse : Elapsed 0.018 ms (1.817 ms / 100) 1.776 -> 1.782 ( +0.34%) [ +0.28% +0.28% +0.00% / +0.34% +0.51% +0.51%] index_select skip64 : Elapsed 0.018 ms (1.781 ms / 100) 1.777 -> 1.778 ( +0.06%) [ +0.23% +0.00% +0.23% / +0.06% +0.56% +0.62%] index_select skip256 : Elapsed 0.018 ms (1.781 ms / 100) 1.830 -> 1.835 ( +0.27%) [ +0.33% +0.11% +0.00% / +0.27% +0.77% +0.98%] index_select spread : Elapsed 0.018 ms (1.836 ms / 100) 1.822 -> 1.823 ( +0.05%) [ +0.16% +0.16% +0.00% / +0.05% +0.44% +0.33%] index_select strided 3 : Elapsed 0.018 ms (1.825 ms / 100) 1.805 -> 1.808 ( +0.17%) [ +0.22% +0.00% +0.22% / +0.17% +0.89% +0.83%] index_select strided 5 : Elapsed 0.018 ms (1.809 ms / 100) 1.813 -> 1.812 ( -0.06%) [ +0.17% +0.00% +0.17% / -0.06% +0.17% +0.06%] index_select strided 7 : Elapsed 0.018 ms (1.816 ms / 100) 1.830 -> 1.836 ( +0.33%) [ +0.38% +0.00% +0.00% / +0.33% +0.71% +0.82%] index_select strided 8 : Elapsed 0.018 ms (1.837 ms / 100) 1.825 -> 1.825 ( +0.00%) [ +0.16% +0.05% +0.00% / +0.00% +0.66% +0.71%] index_select strided 16 : Elapsed 0.018 ms (1.828 ms / 100) 1.822 -> 1.825 ( +0.16%) [ +0.22% +0.00% +0.22% / +0.16% +0.22% +0.38%] index_select random : Elapsed 0.018 ms (1.826 ms / 100) 1.810 -> 1.812 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.61% +0.66%] index_select random_sorted : Elapsed 0.018 ms (1.810 ms / 100) 1.815 -> 1.816 ( +0.06%) [ +0.11% +0.00% +0.00% / +0.06% +0.94% +1.05%] index_select perm : Elapsed 0.018 ms (1.817 ms / 100) 1.813 -> 1.817 ( +0.22%) [ +0.00% +0.11% +0.17% / +0.22% +1.38% +1.05%] index_select perm_sorted : Elapsed 0.018 ms (1.813 ms / 100) B = [40, 5, 16, 4] (stride (5, 1, 800, 200)) A = [40, 20, 16, 4] (stride (16, 2560, 1, 640)) dim = 1 1.690 -> 1.690 ( +0.00%) [ +0.12% +0.18% +0.00% / +0.00% +0.18% +0.24%] index_select const : Elapsed 0.017 ms (1.692 ms / 100) 1.682 -> 1.683 ( +0.06%) [ +0.24% +0.24% +0.00% / +0.06% +0.24% +0.30%] index_select wrap : Elapsed 0.017 ms (1.686 ms / 100) 1.682 -> 1.684 ( +0.12%) [ +0.24% +0.24% +0.00% / +0.24% +0.48% +0.12%] index_select linear : Elapsed 0.017 ms (1.686 ms / 100) 1.681 -> 1.685 ( +0.24%) [ +0.24% +0.30% +0.00% / +0.24% +0.54% +0.42%] index_select reverse : Elapsed 0.017 ms (1.685 ms / 100) 1.689 -> 1.689 ( +0.00%) [ +0.06% +0.36% +0.00% / +0.00% +0.41% +0.41%] index_select skip64 : Elapsed 0.017 ms (1.690 ms / 100) 1.685 -> 1.691 ( +0.36%) [ +0.00% +0.12% +0.12% / +0.36% +0.53% +0.36%] index_select skip256 : Elapsed 0.017 ms (1.685 ms / 100) 1.716 -> 1.714 ( -0.12%) [ +0.23% +0.29% +0.00% / -0.12% +0.35% +0.06%] index_select spread : Elapsed 0.017 ms (1.720 ms / 100) 1.726 -> 1.703 ( -1.33%) [ +0.29% +0.00% +0.52% / +0.17% -1.33% -1.27%] index_select strided 3 : Elapsed 0.017 ms (1.731 ms / 100) 1.701 -> 1.700 ( -0.06%) [ +0.24% +0.29% +0.00% / +0.18% +0.24% -0.06%] index_select strided 5 : Elapsed 0.017 ms (1.705 ms / 100) 1.708 -> 1.710 ( +0.12%) [ +0.35% +0.00% +0.41% / +0.12% +0.70% +0.53%] index_select strided 7 : Elapsed 0.017 ms (1.714 ms / 100) 1.714 -> 1.711 ( -0.18%) [ +0.29% +0.29% +0.00% / -0.18% +0.29% +0.18%] index_select strided 8 : Elapsed 0.017 ms (1.719 ms / 100) 1.712 -> 1.712 ( +0.00%) [ +0.12% +0.35% +0.00% / +0.00% +0.47% +0.41%] index_select strided 16 : Elapsed 0.017 ms (1.714 ms / 100) 1.709 -> 1.711 ( +0.12%) [ +0.00% +0.18% +0.29% / +0.12% +0.88% +0.47%] index_select random : Elapsed 0.017 ms (1.709 ms / 100) 1.714 -> 1.712 ( -0.12%) [ +0.00% +0.18% +0.00% / +0.12% -0.12% +0.18%] index_select random_sorted : Elapsed 0.017 ms (1.714 ms / 100) 1.699 -> 1.699 ( +0.00%) [ +0.12% +0.24% +0.00% / +0.00% +1.29% +1.12%] index_select perm : Elapsed 0.017 ms (1.701 ms / 100) 1.700 -> 1.701 ( +0.06%) [ +0.18% +0.00% +0.12% / +0.06% +1.12% +1.00%] index_select perm_sorted : Elapsed 0.017 ms (1.703 ms / 100) out_shape = [40, 20, 5, 4] in_shape = [40, 20, 16, 4] idx_dim = 2 B = [40, 20, 5, 4] (stride (400, 20, 4, 1)) A = [40, 20, 16, 4] (stride (1280, 4, 80, 1)) dim = 2 2.092 -> 2.091 ( -0.05%) [ +0.29% +0.24% +0.00% / -0.05% +1.29% +0.67%] index_select const : Elapsed 0.021 ms (2.098 ms / 100) 2.153 -> 2.164 ( +0.51%) [ +0.05% +0.00% +0.05% / +0.51% +0.98% +1.30%] index_select wrap : Elapsed 0.022 ms (2.154 ms / 100) 2.164 -> 2.167 ( +0.14%) [ +0.00% +0.18% +0.23% / +0.14% +0.97% +1.02%] index_select linear : Elapsed 0.022 ms (2.164 ms / 100) 2.152 -> 2.159 ( +0.33%) [ +0.51% +0.00% +0.46% / +0.33% +1.49% +1.72%] index_select reverse : Elapsed 0.022 ms (2.163 ms / 100) 2.096 -> 2.093 ( -0.14%) [ +0.14% +0.00% +0.05% / -0.14% +0.72% +0.43%] index_select skip64 : Elapsed 0.021 ms (2.099 ms / 100) 2.091 -> 2.094 ( +0.14%) [ +0.19% +0.00% +0.43% / +0.14% +1.24% +0.81%] index_select skip256 : Elapsed 0.021 ms (2.095 ms / 100) 2.161 -> 2.160 ( -0.05%) [ +0.00% +0.14% +0.05% / -0.05% +1.16% +1.39%] index_select spread : Elapsed 0.022 ms (2.161 ms / 100) 2.178 -> 2.174 ( -0.18%) [ +0.18% +0.09% +0.00% / -0.18% +0.78% +0.51%] index_select strided 3 : Elapsed 0.022 ms (2.182 ms / 100) 2.172 -> 2.170 ( -0.09%) [ +0.09% +0.14% +0.00% / -0.09% +0.55% +0.41%] index_select strided 5 : Elapsed 0.022 ms (2.174 ms / 100) 2.149 -> 2.151 ( +0.09%) [ +0.33% +0.19% +0.00% / +0.09% +1.72% +1.44%] index_select strided 7 : Elapsed 0.022 ms (2.156 ms / 100) 2.106 -> 2.118 ( +0.57%) [ +0.85% +0.19% +0.00% / +0.57% +1.57% +0.76%] index_select strided 8 : Elapsed 0.021 ms (2.124 ms / 100) 2.160 -> 2.162 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.79% +0.88%] index_select random : Elapsed 0.022 ms (2.160 ms / 100) 2.153 -> 2.157 ( +0.19%) [ +0.28% +0.00% +0.23% / +0.19% +1.21% +1.30%] index_select random_sorted : Elapsed 0.022 ms (2.159 ms / 100) 2.162 -> 2.163 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +1.20% +0.93%] index_select perm : Elapsed 0.022 ms (2.163 ms / 100) 2.153 -> 2.156 ( +0.14%) [ +0.33% +0.23% +0.00% / +0.14% +0.79% +0.84%] index_select perm_sorted : Elapsed 0.022 ms (2.160 ms / 100) B = [40, 20, 5, 4] (stride (400, 20, 4, 1)) A = [40, 20, 16, 4] (stride (4, 2560, 160, 1)) dim = 2 2.177 -> 2.177 ( +0.00%) [ +0.00% +0.23% +0.14% / +0.00% +0.41% +0.28%] index_select const : Elapsed 0.022 ms (2.177 ms / 100) 2.224 -> 2.228 ( +0.18%) [ +0.18% +0.40% +0.00% / +0.18% +0.22% +0.31%] index_select wrap : Elapsed 0.022 ms (2.228 ms / 100) 2.223 -> 2.228 ( +0.22%) [ +0.00% +0.18% +0.09% / +0.22% +0.36% +0.63%] index_select linear : Elapsed 0.022 ms (2.223 ms / 100) 2.216 -> 2.223 ( +0.32%) [ +0.27% +0.00% +0.27% / +0.32% +0.41% +0.36%] index_select reverse : Elapsed 0.022 ms (2.222 ms / 100) 2.179 -> 2.181 ( +0.09%) [ +0.00% +0.00% +0.05% / +0.09% +0.37% +0.41%] index_select skip64 : Elapsed 0.022 ms (2.179 ms / 100) 2.178 -> 2.180 ( +0.09%) [ +0.00% +0.05% +0.00% / +0.09% +0.37% +0.55%] index_select skip256 : Elapsed 0.022 ms (2.178 ms / 100) 2.213 -> 2.214 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.36% +0.27%] index_select spread : Elapsed 0.022 ms (2.216 ms / 100) 2.220 -> 2.221 ( +0.05%) [ +0.45% +0.05% +0.00% / +0.05% +0.41% +0.18%] index_select strided 3 : Elapsed 0.022 ms (2.230 ms / 100) 2.228 -> 2.231 ( +0.13%) [ +0.22% +0.00% +0.22% / +0.13% +0.40% +0.18%] index_select strided 5 : Elapsed 0.022 ms (2.233 ms / 100) 2.222 -> 2.221 ( -0.05%) [ +0.09% +0.00% +0.09% / -0.05% +0.27% +0.09%] index_select strided 7 : Elapsed 0.022 ms (2.224 ms / 100) 2.194 -> 2.196 ( +0.09%) [ +0.27% +0.00% +0.18% / +0.09% +0.36% +0.23%] index_select strided 8 : Elapsed 0.022 ms (2.200 ms / 100) 2.220 -> 2.221 ( +0.05%) [ +0.00% +0.14% +0.09% / +0.14% +0.36% +0.05%] index_select random : Elapsed 0.022 ms (2.220 ms / 100) 2.207 -> 2.212 ( +0.23%) [ +0.00% +0.27% +0.05% / +0.27% +0.23% +0.36%] index_select random_sorted : Elapsed 0.022 ms (2.207 ms / 100) 2.213 -> 2.217 ( +0.18%) [ +0.05% +0.00% +0.05% / +0.32% +0.32% +0.18%] index_select perm : Elapsed 0.022 ms (2.214 ms / 100) 2.216 -> 2.220 ( +0.18%) [ +0.27% +0.09% +0.00% / +0.18% +0.18% +0.36%] index_select perm_sorted : Elapsed 0.022 ms (2.222 ms / 100) B = [40, 20, 5, 4] (stride (400, 1, 80, 20)) A = [40, 20, 16, 4] (stride (1, 160, 3200, 40)) dim = 2 2.455 -> 2.457 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.41% +0.41%] index_select const : Elapsed 0.025 ms (2.458 ms / 100) 2.469 -> 2.475 ( +0.24%) [ +0.12% +0.28% +0.00% / +0.24% +0.53% +0.45%] index_select wrap : Elapsed 0.025 ms (2.472 ms / 100) 2.472 -> 2.471 ( -0.04%) [ +0.04% +0.00% +0.12% / -0.04% +0.12% +0.28%] index_select linear : Elapsed 0.025 ms (2.473 ms / 100) 2.472 -> 2.475 ( +0.12%) [ +0.00% +0.04% +0.04% / +0.12% +0.40% +0.40%] index_select reverse : Elapsed 0.025 ms (2.472 ms / 100) 2.459 -> 2.462 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.12% +0.41% +0.53%] index_select skip64 : Elapsed 0.025 ms (2.462 ms / 100) 2.454 -> 2.458 ( +0.16%) [ +0.00% +0.20% +0.12% / +0.16% +0.57% +0.33%] index_select skip256 : Elapsed 0.025 ms (2.454 ms / 100) 2.471 -> 2.470 ( -0.04%) [ +0.12% +0.08% +0.00% / -0.04% +0.28% +0.45%] index_select spread : Elapsed 0.025 ms (2.474 ms / 100) 2.471 -> 2.470 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.16% +0.28%] index_select strided 3 : Elapsed 0.025 ms (2.472 ms / 100) 2.468 -> 2.469 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.12% +0.32%] index_select strided 5 : Elapsed 0.025 ms (2.469 ms / 100) 2.469 -> 2.471 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.65% +0.53%] index_select strided 7 : Elapsed 0.025 ms (2.469 ms / 100) 2.454 -> 2.456 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.61% +0.61%] index_select strided 8 : Elapsed 0.025 ms (2.457 ms / 100) 2.469 -> 2.472 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.24% +0.36%] index_select random : Elapsed 0.025 ms (2.470 ms / 100) 2.470 -> 2.473 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.36% +0.28%] index_select random_sorted : Elapsed 0.025 ms (2.470 ms / 100) 2.468 -> 2.468 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.28% +0.41%] index_select perm : Elapsed 0.025 ms (2.470 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.36% +0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.474 ms / 100) B = [40, 20, 5, 4] (stride (400, 5, 1, 100)) A = [40, 20, 16, 4] (stride (320, 16, 1, 12800)) dim = 2 2.285 -> 2.284 ( -0.04%) [ +0.00% +0.04% +0.18% / +0.09% -0.04% +0.00%] index_select const : Elapsed 0.023 ms (2.285 ms / 100) 2.274 -> 2.275 ( +0.04%) [ +0.00% +0.18% +0.00% / +0.04% +0.04% +0.13%] index_select wrap : Elapsed 0.023 ms (2.274 ms / 100) 2.275 -> 2.275 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +0.26% +0.18%] index_select linear : Elapsed 0.023 ms (2.277 ms / 100) 2.273 -> 2.274 ( +0.04%) [ +0.04% +0.13% +0.00% / +0.04% +0.09% +0.18%] index_select reverse : Elapsed 0.023 ms (2.274 ms / 100) 2.273 -> 2.272 ( -0.04%) [ +0.09% +0.00% +0.04% / +0.09% +0.26% -0.04%] index_select skip64 : Elapsed 0.023 ms (2.275 ms / 100) 2.281 -> 2.281 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.13% +0.09% +0.00%] index_select skip256 : Elapsed 0.023 ms (2.283 ms / 100) 2.288 -> 2.286 ( -0.09%) [ +0.00% +0.13% +0.17% / +0.17% -0.09% +0.17%] index_select spread : Elapsed 0.023 ms (2.288 ms / 100) 2.292 -> 2.285 ( -0.31%) [ +0.13% +0.09% +0.00% / -0.13% -0.31% +0.04%] index_select strided 3 : Elapsed 0.023 ms (2.295 ms / 100) 2.289 -> 2.289 ( +0.00%) [ +0.26% +0.09% +0.00% / +0.13% +0.22% +0.00%] index_select strided 5 : Elapsed 0.023 ms (2.295 ms / 100) 2.289 -> 2.286 ( -0.13%) [ +0.04% +0.26% +0.00% / +0.13% -0.09% -0.13%] index_select strided 7 : Elapsed 0.023 ms (2.290 ms / 100) 2.302 -> 2.299 ( -0.13%) [ +0.22% +0.04% +0.00% / +0.17% -0.04% -0.13%] index_select strided 8 : Elapsed 0.023 ms (2.307 ms / 100) 2.286 -> 2.285 ( -0.04%) [ +0.00% +0.17% +0.22% / +0.04% +0.00% -0.04%] index_select random : Elapsed 0.023 ms (2.286 ms / 100) 2.289 -> 2.290 ( +0.04%) [ +0.00% +0.09% +0.31% / +0.04% +0.09% +0.44%] index_select random_sorted : Elapsed 0.023 ms (2.289 ms / 100) 2.287 -> 2.290 ( +0.13%) [ +0.00% +0.04% +0.22% / +0.39% +0.13% +0.31%] index_select perm : Elapsed 0.023 ms (2.287 ms / 100) 2.285 -> 2.285 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.09% +0.13% +0.00%] index_select perm_sorted : Elapsed 0.023 ms (2.288 ms / 100) B = [40, 20, 5, 4] (stride (4, 800, 160, 1)) A = [40, 20, 16, 4] (stride (4, 2560, 160, 1)) dim = 2 2.302 -> 2.306 ( +0.17%) [ +0.00% +0.09% +0.09% / +0.17% +0.65% +0.56%] index_select const : Elapsed 0.023 ms (2.302 ms / 100) 2.346 -> 2.349 ( +0.13%) [ +0.21% +0.17% +0.00% / +0.13% +0.17% +0.47%] index_select wrap : Elapsed 0.024 ms (2.351 ms / 100) 2.364 -> 2.365 ( +0.04%) [ +0.13% +0.00% +0.34% / +0.04% +0.38% +0.17%] index_select linear : Elapsed 0.024 ms (2.367 ms / 100) 2.356 -> 2.362 ( +0.25%) [ +0.21% +0.13% +0.00% / +0.30% +0.34% +0.25%] index_select reverse : Elapsed 0.024 ms (2.361 ms / 100) 2.299 -> 2.306 ( +0.30%) [ +0.35% +0.30% +0.00% / +0.30% +0.78% +0.61%] index_select skip64 : Elapsed 0.023 ms (2.307 ms / 100) 2.303 -> 2.303 ( +0.00%) [ +0.00% +0.13% +0.09% / +0.00% +0.39% +0.35%] index_select skip256 : Elapsed 0.023 ms (2.303 ms / 100) 2.346 -> 2.345 ( -0.04%) [ +0.09% +0.00% +0.00% / -0.04% +0.04% +0.26%] index_select spread : Elapsed 0.023 ms (2.348 ms / 100) 2.354 -> 2.352 ( -0.08%) [ +0.00% +0.04% +0.04% / -0.08% +0.17% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.354 ms / 100) 2.356 -> 2.361 ( +0.21%) [ +0.13% +0.04% +0.00% / +0.21% +0.59% +0.93%] index_select strided 5 : Elapsed 0.024 ms (2.359 ms / 100) 2.347 -> 2.352 ( +0.21%) [ +0.30% +0.51% +0.00% / +0.21% +0.77% +0.94%] index_select strided 7 : Elapsed 0.024 ms (2.354 ms / 100) 2.319 -> 2.320 ( +0.04%) [ +0.13% +0.00% +0.17% / +0.04% +0.60% +0.52%] index_select strided 8 : Elapsed 0.023 ms (2.322 ms / 100) 2.347 -> 2.355 ( +0.34%) [ +0.21% +0.00% +0.09% / +0.34% +0.60% +0.68%] index_select random : Elapsed 0.024 ms (2.352 ms / 100) 2.351 -> 2.355 ( +0.17%) [ +0.09% +0.00% +0.17% / +0.17% +0.47% +0.34%] index_select random_sorted : Elapsed 0.024 ms (2.353 ms / 100) 2.350 -> 2.353 ( +0.13%) [ +0.13% +0.00% +0.21% / +0.13% +0.55% +0.72%] index_select perm : Elapsed 0.024 ms (2.353 ms / 100) 2.356 -> 2.358 ( +0.08%) [ +0.21% +0.00% +0.04% / +0.08% +0.25% +0.51%] index_select perm_sorted : Elapsed 0.024 ms (2.361 ms / 100) B = [40, 20, 5, 4] (stride (1, 800, 160, 40)) A = [40, 20, 16, 4] (stride (16, 2560, 1, 640)) dim = 2 2.507 -> 2.511 ( +0.16%) [ +0.00% +0.00% +0.04% / +0.16% +0.28% +0.20%] index_select const : Elapsed 0.025 ms (2.507 ms / 100) 2.503 -> 2.503 ( +0.00%) [ +0.24% +0.00% +0.00% / +0.00% +0.20% +0.36%] index_select wrap : Elapsed 0.025 ms (2.509 ms / 100) 2.501 -> 2.506 ( +0.20%) [ +0.12% +0.00% +0.16% / +0.20% +0.36% +0.48%] index_select linear : Elapsed 0.025 ms (2.504 ms / 100) 2.502 -> 2.498 ( -0.16%) [ +0.08% +0.20% +0.00% / -0.16% +0.44% +0.48%] index_select reverse : Elapsed 0.025 ms (2.504 ms / 100) 2.501 -> 2.504 ( +0.12%) [ +0.04% +0.00% +0.12% / +0.12% +0.56% +0.32%] index_select skip64 : Elapsed 0.025 ms (2.502 ms / 100) 2.499 -> 2.502 ( +0.12%) [ +0.28% +0.08% +0.00% / +0.12% +0.60% +0.68%] index_select skip256 : Elapsed 0.025 ms (2.506 ms / 100) 2.527 -> 2.524 ( -0.12%) [ +0.04% +0.00% +0.00% / +0.16% +0.20% -0.12%] index_select spread : Elapsed 0.025 ms (2.528 ms / 100) 2.522 -> 2.530 ( +0.32%) [ +0.24% +0.04% +0.00% / +0.32% +0.40% +0.40%] index_select strided 3 : Elapsed 0.025 ms (2.528 ms / 100) 2.530 -> 2.527 ( -0.12%) [ +0.16% +0.04% +0.00% / -0.12% +0.08% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.534 ms / 100) 2.525 -> 2.531 ( +0.24%) [ +0.20% +0.16% +0.00% / +0.28% +0.24% +0.55%] index_select strided 7 : Elapsed 0.025 ms (2.530 ms / 100) 2.530 -> 2.533 ( +0.12%) [ +0.12% +0.00% +0.04% / +0.12% +0.32% +0.40%] index_select strided 8 : Elapsed 0.025 ms (2.533 ms / 100) 2.502 -> 2.507 ( +0.20%) [ +0.20% +0.00% +0.16% / +0.20% +0.52% +0.36%] index_select random : Elapsed 0.025 ms (2.507 ms / 100) 2.500 -> 2.509 ( +0.36%) [ +0.08% +0.24% +0.00% / +0.36% +0.44% +0.52%] index_select random_sorted : Elapsed 0.025 ms (2.502 ms / 100) 2.525 -> 2.528 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.36% +0.16%] index_select perm : Elapsed 0.025 ms (2.525 ms / 100) 2.526 -> 2.528 ( +0.08%) [ +0.00% +0.04% +0.20% / +0.08% +0.16% +0.48%] index_select perm_sorted : Elapsed 0.025 ms (2.526 ms / 100) B = [40, 20, 5, 4] (stride (1, 40, 3200, 800)) A = [40, 20, 16, 4] (stride (64, 2560, 1, 16)) dim = 2 2.406 -> 2.410 ( +0.17%) [ +0.25% +0.08% +0.00% / +0.17% +0.25% +0.17%] index_select const : Elapsed 0.024 ms (2.412 ms / 100) 2.396 -> 2.401 ( +0.21%) [ +0.25% +0.38% +0.00% / +0.21% +0.38% +0.38%] index_select wrap : Elapsed 0.024 ms (2.402 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.04% +0.00% +0.50% / -0.08% +0.75% +0.12%] index_select linear : Elapsed 0.024 ms (2.406 ms / 100) 2.414 -> 2.409 ( -0.21%) [ +0.12% +0.29% +0.00% / -0.21% +0.12% +0.00%] index_select reverse : Elapsed 0.024 ms (2.417 ms / 100) 2.398 -> 2.400 ( +0.08%) [ +0.38% +0.00% +0.25% / +0.08% +0.46% +0.54%] index_select skip64 : Elapsed 0.024 ms (2.407 ms / 100) 2.407 -> 2.409 ( +0.08%) [ +0.25% +0.04% +0.00% / +0.08% +0.25% +0.50%] index_select skip256 : Elapsed 0.024 ms (2.413 ms / 100) 2.428 -> 2.429 ( +0.04%) [ +0.12% +0.00% +0.04% / +0.04% +0.54% +0.25%] index_select spread : Elapsed 0.024 ms (2.431 ms / 100) 2.419 -> 2.424 ( +0.21%) [ +0.00% +0.08% +0.04% / +0.50% +0.21% +0.45%] index_select strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.421 -> 2.433 ( +0.50%) [ +0.00% +0.00% +0.17% / +0.74% +0.70% +0.50%] index_select strided 5 : Elapsed 0.024 ms (2.421 ms / 100) 2.427 -> 2.434 ( +0.29%) [ +0.00% +0.29% +0.25% / +0.29% +0.41% +0.54%] index_select strided 7 : Elapsed 0.024 ms (2.427 ms / 100) 2.425 -> 2.427 ( +0.08%) [ +0.37% +0.00% +0.12% / +0.08% +0.37% +0.62%] index_select strided 8 : Elapsed 0.024 ms (2.434 ms / 100) 2.422 -> 2.418 ( -0.17%) [ +0.12% +0.00% +0.25% / -0.17% +0.58% +0.58%] index_select random : Elapsed 0.024 ms (2.425 ms / 100) 2.428 -> 2.430 ( +0.08%) [ +0.29% +0.00% +0.25% / +0.08% +0.45% +0.33%] index_select random_sorted : Elapsed 0.024 ms (2.435 ms / 100) 2.418 -> 2.428 ( +0.41%) [ +0.37% +0.12% +0.00% / +0.87% +0.50% +0.41%] index_select perm : Elapsed 0.024 ms (2.427 ms / 100) 2.424 -> 2.422 ( -0.08%) [ +0.00% +0.12% +0.04% / +0.00% +0.17% -0.08%] index_select perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) B = [40, 20, 5, 4] (stride (100, 5, 1, 4000)) A = [40, 20, 16, 4] (stride (16, 640, 1, 12800)) dim = 2 2.366 -> 2.360 ( -0.25%) [ +0.17% +0.00% +0.04% / +0.08% -0.08% -0.25%] index_select const : Elapsed 0.024 ms (2.370 ms / 100) 2.359 -> 2.361 ( +0.08%) [ +0.00% +0.34% +0.47% / +0.38% +0.08% +0.30%] index_select wrap : Elapsed 0.024 ms (2.359 ms / 100) 2.360 -> 2.366 ( +0.25%) [ +0.00% +0.17% +0.34% / +0.30% +0.25% +0.30%] index_select linear : Elapsed 0.024 ms (2.360 ms / 100) 2.364 -> 2.363 ( -0.04%) [ +0.08% +0.00% +0.08% / -0.04% +0.59% +0.42%] index_select reverse : Elapsed 0.024 ms (2.366 ms / 100) 2.361 -> 2.357 ( -0.17%) [ +0.00% +0.17% +0.00% / -0.17% +0.04% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.361 ms / 100) 2.359 -> 2.364 ( +0.21%) [ +0.17% +0.21% +0.00% / +0.21% +0.34% +0.34%] index_select skip256 : Elapsed 0.024 ms (2.363 ms / 100) 2.388 -> 2.388 ( +0.00%) [ +0.04% +0.29% +0.00% / +0.13% +0.00% +0.17%] index_select spread : Elapsed 0.024 ms (2.389 ms / 100) 2.389 -> 2.386 ( -0.13%) [ +0.21% +0.21% +0.00% / +0.17% +0.25% -0.13%] index_select strided 3 : Elapsed 0.024 ms (2.394 ms / 100) 2.389 -> 2.389 ( +0.00%) [ +0.00% +0.17% +0.17% / +0.04% +0.29% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.389 ms / 100) 2.381 -> 2.390 ( +0.38%) [ +0.00% +0.38% +0.08% / +0.38% +0.50% +0.59%] index_select strided 7 : Elapsed 0.024 ms (2.381 ms / 100) 2.389 -> 2.388 ( -0.04%) [ +0.21% +0.00% +0.13% / +0.13% +0.04% -0.04%] index_select strided 8 : Elapsed 0.024 ms (2.394 ms / 100) 2.389 -> 2.382 ( -0.29%) [ +0.00% +0.00% +0.00% / +0.04% -0.29% +0.00%] index_select random : Elapsed 0.024 ms (2.389 ms / 100) 2.384 -> 2.386 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.34% +0.38% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.385 ms / 100) 2.386 -> 2.394 ( +0.34%) [ +0.29% +0.21% +0.00% / +0.54% +0.38% +0.34%] index_select perm : Elapsed 0.024 ms (2.393 ms / 100) 2.387 -> 2.383 ( -0.17%) [ +0.25% +0.04% +0.00% / +0.04% +0.04% -0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.393 ms / 100) B = [40, 20, 5, 4] (stride (1, 200, 40, 4000)) A = [40, 20, 16, 4] (stride (320, 1, 20, 12800)) dim = 2 2.291 -> 2.294 ( +0.13%) [ +0.26% +0.31% +0.00% / +0.13% +0.31% +0.44%] index_select const : Elapsed 0.023 ms (2.297 ms / 100) 2.342 -> 2.342 ( +0.00%) [ +0.00% +0.09% +0.13% / +0.00% +0.47% +0.38%] index_select wrap : Elapsed 0.023 ms (2.342 ms / 100) 2.352 -> 2.349 ( -0.13%) [ +0.04% +0.17% +0.00% / -0.13% +0.30% +0.68%] index_select linear : Elapsed 0.024 ms (2.353 ms / 100) 2.346 -> 2.347 ( +0.04%) [ +0.00% +0.00% +0.13% / +0.04% +0.72% +0.77%] index_select reverse : Elapsed 0.023 ms (2.346 ms / 100) 2.290 -> 2.294 ( +0.17%) [ +0.04% +0.35% +0.00% / +0.17% +0.48% +0.39%] index_select skip64 : Elapsed 0.023 ms (2.291 ms / 100) 2.295 -> 2.287 ( -0.35%) [ +0.09% +0.09% +0.00% / -0.35% +0.35% +0.39%] index_select skip256 : Elapsed 0.023 ms (2.297 ms / 100) 2.352 -> 2.359 ( +0.30%) [ +0.43% +0.64% +0.00% / +0.30% +0.55% +0.89%] index_select spread : Elapsed 0.024 ms (2.362 ms / 100) 2.358 -> 2.353 ( -0.21%) [ +0.00% +0.00% +0.13% / -0.21% +0.59% +0.51%] index_select strided 3 : Elapsed 0.024 ms (2.358 ms / 100) 2.354 -> 2.349 ( -0.21%) [ +0.17% +0.04% +0.00% / -0.21% +0.38% +0.38%] index_select strided 5 : Elapsed 0.024 ms (2.358 ms / 100) 2.354 -> 2.356 ( +0.08%) [ +0.25% +0.00% +0.04% / +0.13% +0.34% +0.08%] index_select strided 7 : Elapsed 0.024 ms (2.360 ms / 100) 2.306 -> 2.309 ( +0.13%) [ +0.13% +0.30% +0.00% / +0.13% +0.56% +0.56%] index_select strided 8 : Elapsed 0.023 ms (2.309 ms / 100) 2.338 -> 2.334 ( -0.17%) [ +0.00% +0.00% +0.04% / -0.17% +0.90% +0.51%] index_select random : Elapsed 0.023 ms (2.338 ms / 100) 2.333 -> 2.334 ( +0.04%) [ +0.60% +0.00% +0.39% / +0.04% +0.81% +0.77%] index_select random_sorted : Elapsed 0.023 ms (2.347 ms / 100) 2.367 -> 2.367 ( +0.00%) [ +0.21% +0.00% +0.13% / +0.00% +0.34% +0.30%] index_select perm : Elapsed 0.024 ms (2.372 ms / 100) 2.351 -> 2.355 ( +0.17%) [ +0.04% +0.30% +0.00% / +0.17% +0.26% +0.60%] index_select perm_sorted : Elapsed 0.024 ms (2.352 ms / 100) out_shape = [40, 20, 16, 5] in_shape = [40, 20, 16, 4] idx_dim = 3 B = [40, 20, 16, 5] (stride (1600, 80, 5, 1)) A = [40, 20, 16, 4] (stride (16, 2560, 1, 640)) dim = 3 5.447 -> 5.448 ( +0.02%) [ +0.00% +0.17% +0.24% / +0.02% +0.18% +0.20%] index_add_ linear : Elapsed 0.054 ms (5.447 ms / 100) 5.429 -> 5.425 ( -0.07%) [ +0.00% +0.11% +0.02% / +0.09% +0.15% -0.07%] index_copy_ linear : Elapsed 0.054 ms (5.429 ms / 100) 5.449 -> 5.453 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.13% +0.15% +0.07%] index_add_ reverse : Elapsed 0.054 ms (5.449 ms / 100) 5.427 -> 5.429 ( +0.04%) [ +0.04% +0.00% +0.17% / +0.06% +0.15% +0.04%] index_copy_ reverse : Elapsed 0.054 ms (5.429 ms / 100) 5.454 -> 5.453 ( -0.02%) [ +0.00% +0.00% +0.04% / +0.09% +0.09% -0.02%] index_add_ spread : Elapsed 0.055 ms (5.454 ms / 100) 5.430 -> 5.426 ( -0.07%) [ +0.06% +0.00% +0.13% / +0.02% -0.07% -0.07%] index_copy_ spread : Elapsed 0.054 ms (5.433 ms / 100) 5.455 -> 5.452 ( -0.05%) [ +0.05% +0.04% +0.00% / +0.00% -0.05% -0.04%] index_add_ strided 3 : Elapsed 0.055 ms (5.458 ms / 100) 5.430 -> 5.421 ( -0.17%) [ +0.02% +0.00% +0.09% / +0.09% -0.17% -0.15%] index_copy_ strided 3 : Elapsed 0.054 ms (5.431 ms / 100) 5.442 -> 5.449 ( +0.13%) [ +0.24% +0.00% +0.09% / +0.15% +0.15% +0.13%] index_add_ perm : Elapsed 0.055 ms (5.455 ms / 100) 5.422 -> 5.415 ( -0.13%) [ +0.06% +0.00% +0.02% / +0.17% +0.02% -0.13%] index_copy_ perm : Elapsed 0.054 ms (5.425 ms / 100) 5.453 -> 5.449 ( -0.07%) [ +0.04% +0.00% +0.07% / -0.04% +0.11% -0.07%] index_add_ perm_sorted : Elapsed 0.055 ms (5.455 ms / 100) 5.430 -> 5.426 ( -0.07%) [ +0.00% +0.00% +0.04% / +0.17% -0.06% -0.07%] index_copy_ perm_sorted : Elapsed 0.054 ms (5.430 ms / 100) 5.490 -> 5.484 ( -0.11%) [ +0.00% +0.13% +0.29% / +0.16% -0.11% -0.05%] index_select const : Elapsed 0.055 ms (5.490 ms / 100) 5.601 -> 5.597 ( -0.07%) [ +0.07% +0.00% +0.20% / +0.18% -0.07% -0.02%] index_select wrap : Elapsed 0.056 ms (5.605 ms / 100) 5.591 -> 5.591 ( +0.00%) [ +0.00% +0.04% +0.02% / +0.11% +0.05% +0.00%] index_select linear : Elapsed 0.056 ms (5.591 ms / 100) 5.589 -> 5.583 ( -0.11%) [ +0.05% +0.00% +0.07% / +0.16% -0.11% +0.00%] index_select reverse : Elapsed 0.056 ms (5.592 ms / 100) 5.474 -> 5.466 ( -0.15%) [ +0.09% +0.00% +0.20% / +0.09% -0.15% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.479 ms / 100) 5.491 -> 5.482 ( -0.16%) [ +0.20% +0.00% +0.07% / +0.15% -0.16% -0.13%] index_select skip256 : Elapsed 0.055 ms (5.502 ms / 100) 5.573 -> 5.574 ( +0.02%) [ +0.20% +0.16% +0.00% / +0.14% +0.14% +0.02%] index_select spread : Elapsed 0.056 ms (5.584 ms / 100) 5.601 -> 5.594 ( -0.12%) [ +0.00% +0.04% +0.14% / +0.18% -0.12% -0.11%] index_select strided 3 : Elapsed 0.056 ms (5.601 ms / 100) 5.557 -> 5.560 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.32% +0.22%] index_select random : Elapsed 0.056 ms (5.557 ms / 100) 5.550 -> 5.556 ( +0.11%) [ +0.00% +0.29% +0.14% / +0.11% +0.16% +0.23%] index_select random_sorted : Elapsed 0.056 ms (5.550 ms / 100) B = [40, 20, 16, 5] (stride (1600, 1, 20, 320)) A = [40, 20, 16, 4] (stride (1280, 16, 1, 320)) dim = 3 5.528 -> 5.528 ( +0.00%) [ +0.00% +0.07% +0.09% / +0.00% +0.90% +0.85%] index_add_ linear : Elapsed 0.055 ms (5.528 ms / 100) 5.469 -> 5.470 ( +0.02%) [ +0.13% +0.00% +0.33% / +0.02% +0.84% +0.77%] index_copy_ linear : Elapsed 0.055 ms (5.476 ms / 100) 5.536 -> 5.533 ( -0.05%) [ +0.02% +0.00% +0.02% / -0.05% +0.61% +0.65%] index_add_ reverse : Elapsed 0.055 ms (5.537 ms / 100) 5.471 -> 5.476 ( +0.09%) [ +0.24% +0.00% +0.07% / +0.09% +0.86% +0.62%] index_copy_ reverse : Elapsed 0.055 ms (5.484 ms / 100) 5.529 -> 5.536 ( +0.13%) [ +0.07% +0.00% +0.00% / +0.13% +0.80% +0.74%] index_add_ spread : Elapsed 0.055 ms (5.533 ms / 100) 5.477 -> 5.482 ( +0.09%) [ +0.07% +0.00% +0.04% / +0.09% +0.71% +0.73%] index_copy_ spread : Elapsed 0.055 ms (5.481 ms / 100) 5.532 -> 5.539 ( +0.13%) [ +0.00% +0.00% +0.04% / +0.13% +0.52% +0.58%] index_add_ strided 3 : Elapsed 0.055 ms (5.532 ms / 100) 5.472 -> 5.481 ( +0.16%) [ +0.13% +0.00% +0.20% / +0.16% +0.57% +0.46%] index_copy_ strided 3 : Elapsed 0.055 ms (5.479 ms / 100) 5.548 -> 5.546 ( -0.04%) [ +0.02% +0.00% +0.00% / -0.04% +0.27% +0.40%] index_add_ perm : Elapsed 0.055 ms (5.549 ms / 100) 5.484 -> 5.487 ( +0.05%) [ +0.00% +0.05% +0.07% / +0.05% +0.42% +0.38%] index_copy_ perm : Elapsed 0.055 ms (5.484 ms / 100) 5.526 -> 5.534 ( +0.14%) [ +0.00% +0.22% +0.00% / +0.14% +1.07% +1.05%] index_add_ perm_sorted : Elapsed 0.055 ms (5.526 ms / 100) 5.472 -> 5.477 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.09% +0.80% +0.93%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.477 ms / 100) 5.671 -> 5.673 ( +0.04%) [ +0.00% +0.02% +0.05% / +0.04% +1.01% +0.85%] index_select const : Elapsed 0.057 ms (5.671 ms / 100) 5.772 -> 5.773 ( +0.02%) [ +0.00% +0.03% +0.03% / +0.02% +1.06% +0.78%] index_select wrap : Elapsed 0.058 ms (5.772 ms / 100) 5.765 -> 5.769 ( +0.07%) [ +0.09% +0.00% +0.00% / +0.07% +1.04% +0.95%] index_select linear : Elapsed 0.058 ms (5.770 ms / 100) 5.774 -> 5.774 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.00% +0.64% +0.54%] index_select reverse : Elapsed 0.058 ms (5.774 ms / 100) 5.678 -> 5.686 ( +0.14%) [ +0.09% +0.00% +0.04% / +0.14% +0.93% +1.06%] index_select skip64 : Elapsed 0.057 ms (5.683 ms / 100) 5.670 -> 5.666 ( -0.07%) [ +0.00% +0.05% +0.05% / -0.07% +0.92% +0.99%] index_select skip256 : Elapsed 0.057 ms (5.670 ms / 100) 5.773 -> 5.775 ( +0.03%) [ +0.03% +0.00% +0.12% / +0.03% +1.06% +0.88%] index_select spread : Elapsed 0.058 ms (5.775 ms / 100) 5.775 -> 5.771 ( -0.07%) [ +0.05% +0.00% +0.07% / -0.07% +0.88% +0.80%] index_select strided 3 : Elapsed 0.058 ms (5.778 ms / 100) 5.748 -> 5.758 ( +0.17%) [ +0.03% +0.07% +0.00% / +0.17% +0.59% +0.71%] index_select random : Elapsed 0.058 ms (5.750 ms / 100) 5.737 -> 5.738 ( +0.02%) [ +0.09% +0.00% +0.02% / +0.02% +0.82% +0.85%] index_select random_sorted : Elapsed 0.057 ms (5.742 ms / 100) B = [40, 20, 16, 5] (stride (1, 40, 4000, 800)) A = [40, 20, 16, 4] (stride (1280, 1, 80, 20)) dim = 3 5.937 -> 5.941 ( +0.07%) [ +0.00% +0.07% +0.12% / +0.22% +0.07% +0.22%] index_add_ linear : Elapsed 0.059 ms (5.937 ms / 100) 5.847 -> 5.850 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.09% +0.19% +0.05%] index_copy_ linear : Elapsed 0.059 ms (5.851 ms / 100) 5.906 -> 5.908 ( +0.03%) [ +0.00% +0.03% +0.07% / +0.03% +0.24% +0.17%] index_add_ reverse : Elapsed 0.059 ms (5.906 ms / 100) 5.832 -> 5.832 ( +0.00%) [ +0.00% +0.07% +0.02% / +0.00% +0.00% +0.14%] index_copy_ reverse : Elapsed 0.058 ms (5.832 ms / 100) 5.940 -> 5.935 ( -0.08%) [ +0.12% +0.00% +0.03% / +0.12% -0.08% +0.02%] index_add_ spread : Elapsed 0.059 ms (5.947 ms / 100) 5.852 -> 5.850 ( -0.03%) [ +0.00% +0.05% +0.02% / +0.03% -0.03% +0.12%] index_copy_ spread : Elapsed 0.059 ms (5.852 ms / 100) 5.914 -> 5.882 ( -0.54%) [ +0.00% +0.12% +0.00% / +0.19% -0.54% -0.49%] index_add_ strided 3 : Elapsed 0.059 ms (5.914 ms / 100) 5.829 -> 5.817 ( -0.21%) [ +0.00% +0.00% +0.12% / +0.12% -0.21% -0.10%] index_copy_ strided 3 : Elapsed 0.058 ms (5.829 ms / 100) 5.905 -> 5.908 ( +0.05%) [ +0.17% +0.00% +0.24% / +0.30% +0.08% +0.05%] index_add_ perm : Elapsed 0.059 ms (5.915 ms / 100) 5.837 -> 5.836 ( -0.02%) [ +0.03% +0.03% +0.00% / +0.19% -0.02% +0.00%] index_copy_ perm : Elapsed 0.058 ms (5.839 ms / 100) 5.917 -> 5.910 ( -0.12%) [ +0.05% +0.00% +0.12% / -0.03% -0.07% -0.12%] index_add_ perm_sorted : Elapsed 0.059 ms (5.920 ms / 100) 5.838 -> 5.831 ( -0.12%) [ +0.00% +0.03% +0.00% / -0.10% -0.12% +0.00%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.838 ms / 100) 6.207 -> 6.199 ( -0.13%) [ +0.08% +0.00% +0.06% / +0.05% -0.13% -0.06%] index_select const : Elapsed 0.062 ms (6.212 ms / 100) 6.250 -> 6.245 ( -0.08%) [ +0.00% +0.10% +0.13% / +0.14% -0.08% -0.03%] index_select wrap : Elapsed 0.063 ms (6.250 ms / 100) 6.245 -> 6.230 ( -0.24%) [ +0.08% +0.00% +0.14% / +0.14% -0.10% -0.24%] index_select linear : Elapsed 0.063 ms (6.250 ms / 100) 6.236 -> 6.230 ( -0.10%) [ +0.00% +0.00% +0.13% / +0.05% -0.05% -0.10%] index_select reverse : Elapsed 0.062 ms (6.236 ms / 100) 6.207 -> 6.195 ( -0.19%) [ +0.02% +0.06% +0.00% / +0.08% -0.18% -0.19%] index_select skip64 : Elapsed 0.062 ms (6.208 ms / 100) 6.207 -> 6.198 ( -0.14%) [ +0.00% +0.00% +0.14% / +0.14% -0.11% -0.14%] index_select skip256 : Elapsed 0.062 ms (6.207 ms / 100) 6.235 -> 6.232 ( -0.05%) [ +0.00% +0.08% +0.21% / +0.14% -0.03% -0.05%] index_select spread : Elapsed 0.062 ms (6.235 ms / 100) 6.248 -> 6.253 ( +0.08%) [ +0.03% +0.00% +0.13% / +0.08% +0.14% +0.10%] index_select strided 3 : Elapsed 0.062 ms (6.250 ms / 100) 6.245 -> 6.248 ( +0.05%) [ +0.06% +0.05% +0.00% / +0.10% +0.05% +0.11%] index_select random : Elapsed 0.062 ms (6.249 ms / 100) 6.234 -> 6.234 ( +0.00%) [ +0.21% +0.00% +0.21% / +0.16% +0.02% +0.00%] index_select random_sorted : Elapsed 0.062 ms (6.247 ms / 100) B = [40, 20, 16, 5] (stride (320, 1, 20, 12800)) A = [40, 20, 16, 4] (stride (4, 2560, 160, 1)) dim = 3 5.686 -> 5.686 ( +0.00%) [ +0.00% +0.02% +0.12% / +0.00% +0.51% +0.49%] index_add_ linear : Elapsed 0.057 ms (5.686 ms / 100) 5.633 -> 5.633 ( +0.00%) [ +0.00% +0.04% +0.07% / +0.00% +0.50% +0.41%] index_copy_ linear : Elapsed 0.056 ms (5.633 ms / 100) 5.674 -> 5.688 ( +0.25%) [ +0.09% +0.00% +0.02% / +0.25% +0.72% +0.70%] index_add_ reverse : Elapsed 0.057 ms (5.679 ms / 100) 5.628 -> 5.630 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.53% +0.68%] index_copy_ reverse : Elapsed 0.056 ms (5.630 ms / 100) 5.689 -> 5.693 ( +0.07%) [ +0.00% +0.00% +0.02% / +0.07% +0.46% +0.33%] index_add_ spread : Elapsed 0.057 ms (5.689 ms / 100) 5.628 -> 5.636 ( +0.14%) [ +0.00% +0.12% +0.00% / +0.14% +0.53% +0.55%] index_copy_ spread : Elapsed 0.056 ms (5.628 ms / 100) 5.694 -> 5.703 ( +0.16%) [ +0.00% +0.04% +0.16% / +0.16% +0.83% +0.74%] index_add_ strided 3 : Elapsed 0.057 ms (5.694 ms / 100) 5.633 -> 5.648 ( +0.27%) [ +0.00% +0.07% +0.16% / +0.27% +0.96% +0.89%] index_copy_ strided 3 : Elapsed 0.056 ms (5.633 ms / 100) 5.702 -> 5.711 ( +0.16%) [ +0.00% +0.02% +0.00% / +0.16% +0.42% +0.63%] index_add_ perm : Elapsed 0.057 ms (5.702 ms / 100) 5.644 -> 5.652 ( +0.14%) [ +0.00% +0.09% +0.09% / +0.14% +0.55% +0.58%] index_copy_ perm : Elapsed 0.056 ms (5.644 ms / 100) 5.705 -> 5.712 ( +0.12%) [ +0.00% +0.02% +0.05% / +0.12% +0.47% +0.46%] index_add_ perm_sorted : Elapsed 0.057 ms (5.705 ms / 100) 5.641 -> 5.648 ( +0.12%) [ +0.11% +0.00% +0.21% / +0.12% +0.62% +0.59%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.647 ms / 100) 5.977 -> 5.987 ( +0.17%) [ +0.03% +0.00% +0.07% / +0.17% +0.80% +0.85%] index_select const : Elapsed 0.060 ms (5.979 ms / 100) 5.980 -> 5.981 ( +0.02%) [ +0.00% +0.05% +0.03% / +0.02% +0.59% +0.75%] index_select wrap : Elapsed 0.060 ms (5.980 ms / 100) 5.977 -> 5.995 ( +0.30%) [ +0.00% +0.03% +0.13% / +0.30% +0.87% +0.82%] index_select linear : Elapsed 0.060 ms (5.977 ms / 100) 5.978 -> 5.990 ( +0.20%) [ +0.00% +0.10% +0.10% / +0.20% +0.82% +0.74%] index_select reverse : Elapsed 0.060 ms (5.978 ms / 100) 5.973 -> 5.987 ( +0.23%) [ +0.05% +0.00% +0.15% / +0.23% +0.82% +0.77%] index_select skip64 : Elapsed 0.060 ms (5.976 ms / 100) 5.977 -> 5.986 ( +0.15%) [ +0.00% +0.17% +0.18% / +0.15% +0.75% +0.62%] index_select skip256 : Elapsed 0.060 ms (5.977 ms / 100) 5.977 -> 5.980 ( +0.05%) [ +0.03% +0.00% +0.13% / +0.05% +0.87% +0.77%] index_select spread : Elapsed 0.060 ms (5.979 ms / 100) 5.979 -> 5.986 ( +0.12%) [ +0.12% +0.00% +0.08% / +0.12% +0.67% +0.79%] index_select strided 3 : Elapsed 0.060 ms (5.986 ms / 100) 5.977 -> 5.986 ( +0.15%) [ +0.08% +0.00% +0.23% / +0.15% +0.74% +0.79%] index_select random : Elapsed 0.060 ms (5.982 ms / 100) 5.975 -> 5.981 ( +0.10%) [ +0.25% +0.00% +0.10% / +0.10% +0.60% +0.67%] index_select random_sorted : Elapsed 0.060 ms (5.990 ms / 100) B = [40, 20, 16, 5] (stride (1, 640, 40, 12800)) A = [40, 20, 16, 4] (stride (1, 40, 800, 12800)) dim = 3 5.593 -> 5.552 ( -0.73%) [ +0.00% +0.00% +0.05% / -0.18% -0.73% -0.59%] index_add_ linear : Elapsed 0.056 ms (5.593 ms / 100) 5.543 -> 5.518 ( -0.45%) [ +0.00% +0.02% +0.05% / +0.07% -0.45% -0.41%] index_copy_ linear : Elapsed 0.055 ms (5.543 ms / 100) 5.571 -> 5.544 ( -0.48%) [ +0.00% +0.05% +0.05% / +0.14% -0.48% -0.45%] index_add_ reverse : Elapsed 0.056 ms (5.571 ms / 100) 5.536 -> 5.510 ( -0.47%) [ +0.23% +0.00% +0.18% / +0.22% -0.42% -0.47%] index_copy_ reverse : Elapsed 0.055 ms (5.549 ms / 100) 5.586 -> 5.554 ( -0.57%) [ +0.05% +0.05% +0.00% / +0.09% -0.54% -0.57%] index_add_ spread : Elapsed 0.056 ms (5.589 ms / 100) 5.546 -> 5.509 ( -0.67%) [ +0.04% +0.04% +0.00% / +0.11% -0.67% -0.60%] index_copy_ spread : Elapsed 0.055 ms (5.548 ms / 100) 5.560 -> 5.532 ( -0.50%) [ +0.04% +0.00% +0.04% / +0.31% -0.50% -0.38%] index_add_ strided 3 : Elapsed 0.056 ms (5.562 ms / 100) 5.515 -> 5.500 ( -0.27%) [ +0.00% +0.09% +0.09% / +0.11% -0.27% -0.15%] index_copy_ strided 3 : Elapsed 0.055 ms (5.515 ms / 100) 5.537 -> 5.533 ( -0.07%) [ +0.00% +0.07% +0.20% / +0.18% -0.07% -0.07%] index_add_ perm : Elapsed 0.055 ms (5.537 ms / 100) 5.518 -> 5.499 ( -0.34%) [ +0.18% +0.00% +0.07% / +0.11% -0.29% -0.34%] index_copy_ perm : Elapsed 0.055 ms (5.528 ms / 100) 5.578 -> 5.535 ( -0.77%) [ +0.00% +0.04% +0.11% / +0.14% -0.66% -0.77%] index_add_ perm_sorted : Elapsed 0.056 ms (5.578 ms / 100) 5.528 -> 5.495 ( -0.60%) [ +0.00% +0.07% +0.04% / +0.09% -0.56% -0.60%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.528 ms / 100) 5.806 -> 5.804 ( -0.03%) [ +0.00% +0.09% +0.09% / +0.02% -0.03% -0.02%] index_select const : Elapsed 0.058 ms (5.806 ms / 100) 5.875 -> 5.845 ( -0.51%) [ +0.00% +0.10% +0.14% / +0.14% -0.51% -0.36%] index_select wrap : Elapsed 0.059 ms (5.875 ms / 100) 5.870 -> 5.847 ( -0.39%) [ +0.05% +0.00% +0.14% / +0.03% -0.32% -0.39%] index_select linear : Elapsed 0.059 ms (5.873 ms / 100) 5.875 -> 5.834 ( -0.70%) [ +0.00% +0.27% +0.09% / +0.14% -0.61% -0.70%] index_select reverse : Elapsed 0.059 ms (5.875 ms / 100) 5.807 -> 5.804 ( -0.05%) [ +0.07% +0.00% +0.14% / +0.15% -0.02% -0.05%] index_select skip64 : Elapsed 0.058 ms (5.811 ms / 100) 5.813 -> 5.799 ( -0.24%) [ +0.00% +0.12% +0.03% / +0.00% -0.24% -0.17%] index_select skip256 : Elapsed 0.058 ms (5.813 ms / 100) 5.877 -> 5.830 ( -0.80%) [ +0.05% +0.03% +0.00% / +0.10% -0.80% -0.77%] index_select spread : Elapsed 0.059 ms (5.880 ms / 100) 5.873 -> 5.831 ( -0.72%) [ +0.26% +0.00% +0.14% / +0.14% -0.72% -0.54%] index_select strided 3 : Elapsed 0.059 ms (5.888 ms / 100) 5.860 -> 5.839 ( -0.36%) [ +0.07% +0.00% +0.39% / +0.29% -0.36% -0.26%] index_select random : Elapsed 0.059 ms (5.864 ms / 100) 5.875 -> 5.848 ( -0.46%) [ +0.05% +0.02% +0.00% / +0.05% -0.46% -0.44%] index_select random_sorted : Elapsed 0.059 ms (5.878 ms / 100) out_shape = [16, 5, 20, 40] in_shape = [4, 5, 20, 40] idx_dim = 0 B = [16, 5, 20, 40] (stride (4000, 20, 1, 100)) A = [4, 5, 20, 40] (stride (1, 160, 800, 4)) dim = 0 2.327 -> 2.326 ( -0.04%) [ +0.13% +0.00% +0.04% / -0.04% +0.64% +0.43%] index_add_ linear : Elapsed 0.023 ms (2.330 ms / 100) 2.290 -> 2.296 ( +0.26%) [ +0.22% +0.00% +0.04% / +0.26% +0.31% +0.39%] index_copy_ linear : Elapsed 0.023 ms (2.295 ms / 100) 2.331 -> 2.332 ( +0.04%) [ +0.00% +0.26% +0.00% / +0.04% +0.39% +0.43%] index_add_ reverse : Elapsed 0.023 ms (2.331 ms / 100) 2.291 -> 2.291 ( +0.00%) [ +0.00% +0.22% +0.09% / +0.00% +0.48% +0.35%] index_copy_ reverse : Elapsed 0.023 ms (2.291 ms / 100) 2.328 -> 2.330 ( +0.09%) [ +0.00% +0.26% +0.00% / +0.09% +0.30% +0.13%] index_add_ spread : Elapsed 0.023 ms (2.328 ms / 100) 2.294 -> 2.297 ( +0.13%) [ +0.26% +0.00% +0.04% / +0.13% +0.13% +0.13%] index_copy_ spread : Elapsed 0.023 ms (2.300 ms / 100) 2.329 -> 2.337 ( +0.34%) [ +0.30% +0.52% +0.00% / +0.34% +0.82% +0.52%] index_add_ strided 3 : Elapsed 0.023 ms (2.336 ms / 100) 2.297 -> 2.301 ( +0.17%) [ +0.26% +0.26% +0.00% / +0.17% +0.70% +0.26%] index_copy_ strided 3 : Elapsed 0.023 ms (2.303 ms / 100) 2.336 -> 2.330 ( -0.26%) [ +0.17% +0.00% +0.00% / -0.26% +0.13% +0.51%] index_add_ strided 5 : Elapsed 0.023 ms (2.340 ms / 100) 2.299 -> 2.302 ( +0.13%) [ +0.04% +0.00% +0.22% / +0.17% +0.13% +0.22%] index_copy_ strided 5 : Elapsed 0.023 ms (2.300 ms / 100) 2.326 -> 2.329 ( +0.13%) [ +0.00% +0.09% +0.13% / +0.13% +0.69% +0.56%] index_add_ strided 7 : Elapsed 0.023 ms (2.326 ms / 100) 2.294 -> 2.295 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.31% +0.52%] index_copy_ strided 7 : Elapsed 0.023 ms (2.295 ms / 100) 2.334 -> 2.335 ( +0.04%) [ +0.04% +0.00% +0.17% / +0.04% +0.21% +0.17%] index_add_ perm : Elapsed 0.023 ms (2.335 ms / 100) 2.293 -> 2.295 ( +0.09%) [ +0.00% +0.17% +0.04% / +0.09% +0.39% +0.17%] index_copy_ perm : Elapsed 0.023 ms (2.293 ms / 100) 2.326 -> 2.329 ( +0.13%) [ +0.13% +0.21% +0.00% / +0.13% +0.43% +0.43%] index_add_ perm_sorted : Elapsed 0.023 ms (2.329 ms / 100) 2.297 -> 2.292 ( -0.22%) [ +0.04% +0.00% +0.13% / -0.17% -0.22% -0.13%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.298 ms / 100) 4.932 -> 4.934 ( +0.04%) [ +0.00% +0.12% +0.00% / +0.04% +0.47% +0.36%] index_select const : Elapsed 0.049 ms (4.932 ms / 100) 4.933 -> 4.938 ( +0.10%) [ +0.00% +0.06% +0.04% / +0.10% +0.41% +0.41%] index_select wrap : Elapsed 0.049 ms (4.933 ms / 100) 4.931 -> 4.927 ( -0.08%) [ +0.10% +0.20% +0.00% / -0.08% +0.47% +0.39%] index_select linear : Elapsed 0.049 ms (4.936 ms / 100) 4.949 -> 4.942 ( -0.14%) [ +0.00% +0.02% +0.08% / -0.14% +0.34% +0.32%] index_select reverse : Elapsed 0.049 ms (4.949 ms / 100) 4.943 -> 4.950 ( +0.14%) [ +0.24% +0.00% +0.08% / +0.14% +0.38% +0.20%] index_select skip64 : Elapsed 0.050 ms (4.955 ms / 100) 4.935 -> 4.937 ( +0.04%) [ +0.06% +0.00% +0.04% / +0.04% +0.18% +0.18%] index_select skip256 : Elapsed 0.049 ms (4.938 ms / 100) 4.936 -> 4.941 ( +0.10%) [ +0.04% +0.10% +0.00% / +0.10% +0.34% +0.30%] index_select spread : Elapsed 0.049 ms (4.938 ms / 100) 4.937 -> 4.934 ( -0.06%) [ +0.00% +0.06% +0.04% / -0.06% +0.22% +0.28%] index_select strided 3 : Elapsed 0.049 ms (4.937 ms / 100) 4.943 -> 4.954 ( +0.22%) [ +0.00% +0.20% +0.20% / +0.22% +0.26% +0.34%] index_select random : Elapsed 0.049 ms (4.943 ms / 100) 4.935 -> 4.933 ( -0.04%) [ +0.00% +0.04% +0.02% / -0.04% +0.30% +0.30%] index_select random_sorted : Elapsed 0.049 ms (4.935 ms / 100) B = [16, 5, 20, 40] (stride (20, 12800, 1, 320)) A = [4, 5, 20, 40] (stride (100, 20, 1, 400)) dim = 0 2.493 -> 2.497 ( +0.16%) [ +0.00% +0.20% +0.32% / +0.16% +0.44% +0.48%] index_add_ linear : Elapsed 0.025 ms (2.493 ms / 100) 2.447 -> 2.447 ( +0.00%) [ +0.00% +0.12% +0.08% / +0.00% +0.29% +0.20%] index_copy_ linear : Elapsed 0.024 ms (2.447 ms / 100) 2.501 -> 2.501 ( +0.00%) [ +0.20% +0.20% +0.00% / +0.00% +0.28% +0.12%] index_add_ reverse : Elapsed 0.025 ms (2.506 ms / 100) 2.455 -> 2.453 ( -0.08%) [ +0.16% +0.00% +0.04% / -0.08% -0.04% +0.08%] index_copy_ reverse : Elapsed 0.025 ms (2.459 ms / 100) 2.492 -> 2.490 ( -0.08%) [ +0.00% +0.12% +0.12% / -0.04% -0.08% +0.20%] index_add_ spread : Elapsed 0.025 ms (2.492 ms / 100) 2.457 -> 2.457 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.04% +0.08%] index_copy_ spread : Elapsed 0.025 ms (2.457 ms / 100) 2.481 -> 2.481 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.24% +0.24%] index_add_ strided 3 : Elapsed 0.025 ms (2.483 ms / 100) 2.447 -> 2.449 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.12% +0.41%] index_copy_ strided 3 : Elapsed 0.024 ms (2.447 ms / 100) 2.503 -> 2.505 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.44% +0.40%] index_add_ strided 5 : Elapsed 0.025 ms (2.506 ms / 100) 2.468 -> 2.469 ( +0.04%) [ +0.00% +0.16% +0.08% / +0.04% +0.08% +0.12%] index_copy_ strided 5 : Elapsed 0.025 ms (2.468 ms / 100) 2.495 -> 2.496 ( +0.04%) [ +0.00% +0.00% +0.20% / +0.04% +0.32% +0.24%] index_add_ strided 7 : Elapsed 0.025 ms (2.495 ms / 100) 2.462 -> 2.465 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.32% +0.28%] index_copy_ strided 7 : Elapsed 0.025 ms (2.462 ms / 100) 2.488 -> 2.490 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.36% +0.08%] index_add_ perm : Elapsed 0.025 ms (2.490 ms / 100) 2.456 -> 2.455 ( -0.04%) [ +0.08% +0.00% +0.16% / -0.04% +0.29% +0.33%] index_copy_ perm : Elapsed 0.025 ms (2.458 ms / 100) 2.490 -> 2.489 ( -0.04%) [ +0.24% +0.16% +0.00% / -0.04% +0.16% +0.36%] index_add_ perm_sorted : Elapsed 0.025 ms (2.496 ms / 100) 2.462 -> 2.460 ( -0.08%) [ +0.00% +0.04% +0.00% / -0.08% +0.20% +0.20%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.462 ms / 100) 5.576 -> 5.577 ( +0.02%) [ +0.00% +0.09% +0.04% / +0.02% +0.48% +0.43%] index_select const : Elapsed 0.056 ms (5.576 ms / 100) 5.572 -> 5.578 ( +0.11%) [ +0.00% +0.13% +0.18% / +0.11% +0.31% +0.36%] index_select wrap : Elapsed 0.056 ms (5.572 ms / 100) 5.583 -> 5.584 ( +0.02%) [ +0.00% +0.05% +0.04% / +0.02% +0.20% +0.29%] index_select linear : Elapsed 0.056 ms (5.583 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.07% +0.31% +0.32%] index_select reverse : Elapsed 0.056 ms (5.568 ms / 100) 5.579 -> 5.575 ( -0.07%) [ +0.07% +0.00% +0.05% / -0.07% +0.27% +0.23%] index_select skip64 : Elapsed 0.056 ms (5.583 ms / 100) 5.589 -> 5.592 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.23% +0.39%] index_select skip256 : Elapsed 0.056 ms (5.592 ms / 100) 5.571 -> 5.573 ( +0.04%) [ +0.05% +0.00% +0.07% / +0.04% +0.22% +0.36%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.558 -> 5.562 ( +0.07%) [ +0.09% +0.14% +0.00% / +0.07% +0.32% +0.31%] index_select strided 3 : Elapsed 0.056 ms (5.563 ms / 100) 5.563 -> 5.563 ( +0.00%) [ +0.09% +0.00% +0.05% / +0.00% +0.25% +0.34%] index_select random : Elapsed 0.056 ms (5.568 ms / 100) 5.563 -> 5.565 ( +0.04%) [ +0.00% +0.04% +0.07% / +0.04% +0.32% +0.31%] index_select random_sorted : Elapsed 0.056 ms (5.563 ms / 100) B = [16, 5, 20, 40] (stride (1, 12800, 16, 320)) A = [4, 5, 20, 40] (stride (4000, 800, 1, 20)) dim = 0 2.548 -> 2.548 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +0.55% +0.63%] index_add_ linear : Elapsed 0.026 ms (2.551 ms / 100) 2.527 -> 2.531 ( +0.16%) [ +0.00% +0.08% +0.28% / +0.16% +0.59% +0.55%] index_copy_ linear : Elapsed 0.025 ms (2.527 ms / 100) 2.556 -> 2.552 ( -0.16%) [ +0.00% +0.20% +0.00% / -0.16% +0.27% +0.27%] index_add_ reverse : Elapsed 0.026 ms (2.556 ms / 100) 2.525 -> 2.528 ( +0.12%) [ +0.00% +0.20% +0.28% / +0.12% +0.59% +0.51%] index_copy_ reverse : Elapsed 0.025 ms (2.525 ms / 100) 2.585 -> 2.589 ( +0.15%) [ +0.12% +0.31% +0.00% / +0.15% +1.01% +0.66%] index_add_ spread : Elapsed 0.026 ms (2.588 ms / 100) 2.632 -> 2.632 ( +0.00%) [ +0.23% +0.11% +0.00% / +0.00% +0.53% +0.49%] index_copy_ spread : Elapsed 0.026 ms (2.638 ms / 100) 2.589 -> 2.598 ( +0.35%) [ +0.00% +0.31% +0.31% / +0.35% +0.73% +0.85%] index_add_ strided 3 : Elapsed 0.026 ms (2.589 ms / 100) 2.631 -> 2.635 ( +0.15%) [ +0.23% +0.30% +0.00% / +0.15% +0.65% +0.80%] index_copy_ strided 3 : Elapsed 0.026 ms (2.637 ms / 100) 2.594 -> 2.591 ( -0.12%) [ +0.08% +0.12% +0.00% / -0.12% +0.50% +0.19%] index_add_ strided 5 : Elapsed 0.026 ms (2.596 ms / 100) 2.636 -> 2.637 ( +0.04%) [ +0.23% +0.00% +0.04% / +0.04% +0.19% +0.38%] index_copy_ strided 5 : Elapsed 0.026 ms (2.642 ms / 100) 2.590 -> 2.596 ( +0.23%) [ +0.27% +0.00% +0.15% / +0.23% +0.23% +0.42%] index_add_ strided 7 : Elapsed 0.026 ms (2.597 ms / 100) 2.634 -> 2.632 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.30% +0.57%] index_copy_ strided 7 : Elapsed 0.026 ms (2.636 ms / 100) 2.557 -> 2.553 ( -0.16%) [ +0.00% +0.12% +0.08% / -0.16% +0.12% +0.16%] index_add_ perm : Elapsed 0.026 ms (2.557 ms / 100) 2.527 -> 2.529 ( +0.08%) [ +0.32% +0.00% +0.24% / +0.08% +0.75% +0.63%] index_copy_ perm : Elapsed 0.025 ms (2.535 ms / 100) 2.549 -> 2.555 ( +0.24%) [ +0.08% +0.00% +0.08% / +0.24% +0.27% +0.27%] index_add_ perm_sorted : Elapsed 0.026 ms (2.551 ms / 100) 2.527 -> 2.528 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.04% +0.32% +0.47%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.527 ms / 100) 5.718 -> 5.725 ( +0.12%) [ +0.12% +0.10% +0.00% / +0.12% +0.54% +0.51%] index_select const : Elapsed 0.057 ms (5.725 ms / 100) 5.750 -> 5.756 ( +0.10%) [ +0.03% +0.12% +0.00% / +0.10% +0.38% +0.42%] index_select wrap : Elapsed 0.058 ms (5.752 ms / 100) 5.758 -> 5.760 ( +0.03%) [ +0.00% +0.09% +0.02% / +0.03% +0.45% +0.50%] index_select linear : Elapsed 0.058 ms (5.758 ms / 100) 5.787 -> 5.783 ( -0.07%) [ +0.28% +0.00% +0.14% / -0.07% +0.45% +0.38%] index_select reverse : Elapsed 0.058 ms (5.803 ms / 100) 5.791 -> 5.794 ( +0.05%) [ +0.02% +0.03% +0.00% / +0.05% +0.55% +0.50%] index_select skip64 : Elapsed 0.058 ms (5.792 ms / 100) 5.766 -> 5.759 ( -0.12%) [ +0.10% +0.03% +0.00% / -0.12% +0.54% +0.50%] index_select skip256 : Elapsed 0.058 ms (5.772 ms / 100) 5.732 -> 5.739 ( +0.12%) [ +0.09% +0.21% +0.00% / +0.12% +0.28% +0.38%] index_select spread : Elapsed 0.057 ms (5.737 ms / 100) 5.746 -> 5.748 ( +0.03%) [ +0.00% +0.14% +0.02% / +0.03% +0.45% +0.42%] index_select strided 3 : Elapsed 0.057 ms (5.746 ms / 100) 5.797 -> 5.795 ( -0.03%) [ +0.05% +0.00% +0.12% / -0.03% +0.36% +0.43%] index_select random : Elapsed 0.058 ms (5.800 ms / 100) 5.723 -> 5.726 ( +0.05%) [ +0.14% +0.00% +0.09% / +0.05% +0.51% +0.56%] index_select random_sorted : Elapsed 0.057 ms (5.731 ms / 100) B = [16, 5, 20, 40] (stride (5, 1, 80, 1600)) A = [4, 5, 20, 40] (stride (5, 1, 20, 400)) dim = 0 2.632 -> 2.632 ( +0.00%) [ +0.11% +0.08% +0.00% / +0.00% +0.34% +0.49%] index_add_ linear : Elapsed 0.026 ms (2.635 ms / 100) 2.595 -> 2.597 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.23% +0.12%] index_copy_ linear : Elapsed 0.026 ms (2.597 ms / 100) 2.651 -> 2.650 ( -0.04%) [ +0.15% +0.00% +0.15% / -0.04% +0.11% +0.11%] index_add_ reverse : Elapsed 0.027 ms (2.655 ms / 100) 2.604 -> 2.604 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.15% +0.19% +0.00%] index_copy_ reverse : Elapsed 0.026 ms (2.604 ms / 100) 2.657 -> 2.656 ( -0.04%) [ +0.23% +0.00% +0.04% / +0.04% -0.04% -0.04%] index_add_ spread : Elapsed 0.027 ms (2.663 ms / 100) 2.639 -> 2.638 ( -0.04%) [ +0.19% +0.04% +0.00% / +0.00% +0.00% -0.04%] index_copy_ spread : Elapsed 0.026 ms (2.644 ms / 100) 2.651 -> 2.653 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.26% +0.19%] index_add_ strided 3 : Elapsed 0.027 ms (2.652 ms / 100) 2.655 -> 2.655 ( +0.00%) [ +0.23% +0.00% +0.04% / +0.11% +0.15% +0.00%] index_copy_ strided 3 : Elapsed 0.027 ms (2.661 ms / 100) 2.618 -> 2.622 ( +0.15%) [ +0.15% +0.38% +0.00% / +0.15% +0.50% +0.42%] index_add_ strided 5 : Elapsed 0.026 ms (2.622 ms / 100) 2.600 -> 2.599 ( -0.04%) [ +0.00% +0.15% +0.08% / +0.04% +0.04% -0.04%] index_copy_ strided 5 : Elapsed 0.026 ms (2.600 ms / 100) 2.631 -> 2.631 ( +0.00%) [ +0.27% +0.08% +0.00% / +0.00% +0.49% +0.57%] index_add_ strided 7 : Elapsed 0.026 ms (2.638 ms / 100) 2.618 -> 2.621 ( +0.11%) [ +0.19% +0.00% +0.04% / +0.11% +0.27% +0.27%] index_copy_ strided 7 : Elapsed 0.026 ms (2.623 ms / 100) 2.654 -> 2.660 ( +0.23%) [ +0.00% +0.00% +0.23% / +0.23% +0.53% +0.79%] index_add_ perm : Elapsed 0.027 ms (2.654 ms / 100) 2.637 -> 2.638 ( +0.04%) [ +0.00% +0.30% +0.11% / +0.04% +0.19% +0.15%] index_copy_ perm : Elapsed 0.026 ms (2.637 ms / 100) 2.659 -> 2.661 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.19% +0.08% +0.19%] index_add_ perm_sorted : Elapsed 0.027 ms (2.659 ms / 100) 2.642 -> 2.643 ( +0.04%) [ +0.00% +0.11% +0.00% / +0.11% +0.04% +0.15%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.642 ms / 100) 6.041 -> 6.042 ( +0.02%) [ +0.02% +0.12% +0.00% / +0.02% +0.36% +0.43%] index_select const : Elapsed 0.060 ms (6.042 ms / 100) 6.044 -> 6.039 ( -0.08%) [ +0.05% +0.08% +0.00% / -0.08% +0.28% +0.26%] index_select wrap : Elapsed 0.060 ms (6.047 ms / 100) 6.049 -> 6.051 ( +0.03%) [ +0.12% +0.05% +0.00% / +0.03% +0.30% +0.26%] index_select linear : Elapsed 0.061 ms (6.056 ms / 100) 6.061 -> 6.063 ( +0.03%) [ +0.15% +0.12% +0.00% / +0.03% +0.33% +0.26%] index_select reverse : Elapsed 0.061 ms (6.070 ms / 100) 6.051 -> 6.049 ( -0.03%) [ +0.03% +0.07% +0.00% / -0.03% +0.18% +0.20%] index_select skip64 : Elapsed 0.061 ms (6.053 ms / 100) 6.051 -> 6.053 ( +0.03%) [ +0.00% +0.03% +0.13% / +0.03% +0.38% +0.33%] index_select skip256 : Elapsed 0.061 ms (6.051 ms / 100) 6.043 -> 6.047 ( +0.07%) [ +0.02% +0.05% +0.00% / +0.07% +0.31% +0.26%] index_select spread : Elapsed 0.060 ms (6.044 ms / 100) 6.046 -> 6.049 ( +0.05%) [ +0.00% +0.08% +0.08% / +0.05% +0.43% +0.40%] index_select strided 3 : Elapsed 0.060 ms (6.046 ms / 100) 6.047 -> 6.050 ( +0.05%) [ +0.00% +0.02% +0.00% / +0.05% +0.31% +0.17%] index_select random : Elapsed 0.060 ms (6.047 ms / 100) 6.040 -> 6.048 ( +0.13%) [ +0.07% +0.05% +0.00% / +0.13% +0.40% +0.38%] index_select random_sorted : Elapsed 0.060 ms (6.044 ms / 100) out_shape = [4, 16, 20, 40] in_shape = [4, 5, 20, 40] idx_dim = 1 B = [4, 16, 20, 40] (stride (12800, 20, 1, 320)) dim = 1 fill_cnt = 5 1.111 -> 1.112 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.27% +0.36%] index_fill_ const : Elapsed 0.011 ms (1.111 ms / 100) 1.130 -> 1.132 ( +0.18%) [ +0.35% +0.18% +0.00% / +0.35% +0.18% +0.18%] index_fill_ linear : Elapsed 0.011 ms (1.134 ms / 100) 1.133 -> 1.132 ( -0.09%) [ +0.00% +0.26% +0.00% / -0.09% +0.09% -0.09%] index_fill_ reverse : Elapsed 0.011 ms (1.133 ms / 100) 1.109 -> 1.112 ( +0.27%) [ +0.45% +0.00% +0.00% / +0.27% +0.54% +0.45%] index_fill_ skip64 : Elapsed 0.011 ms (1.114 ms / 100) 1.110 -> 1.109 ( -0.09%) [ +0.27% +0.18% +0.00% / -0.09% +0.63% +0.36%] index_fill_ skip256 : Elapsed 0.011 ms (1.113 ms / 100) 1.168 -> 1.169 ( +0.09%) [ +0.00% +0.51% +0.00% / +0.34% +0.68% +0.09%] index_fill_ spread : Elapsed 0.012 ms (1.168 ms / 100) 1.169 -> 1.168 ( -0.09%) [ +0.34% +0.00% +0.00% / -0.09% +0.26% +0.60%] index_fill_ strided 3 : Elapsed 0.012 ms (1.173 ms / 100) 1.145 -> 1.146 ( +0.09%) [ +0.00% +0.17% +0.26% / +0.09% +0.09% +0.26%] index_fill_ strided 5 : Elapsed 0.011 ms (1.145 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.35% +0.52%] index_fill_ strided 7 : Elapsed 0.012 ms (1.151 ms / 100) 1.114 -> 1.115 ( +0.09%) [ +0.36% +0.09% +0.00% / +0.09% +0.90% +0.81%] index_fill_ strided 8 : Elapsed 0.011 ms (1.118 ms / 100) 1.140 -> 1.143 ( +0.26%) [ +0.00% +0.18% +0.26% / +0.26% +0.70% +0.53%] index_fill_ random : Elapsed 0.011 ms (1.140 ms / 100) 1.140 -> 1.143 ( +0.26%) [ +0.35% +0.44% +0.00% / +0.26% +0.61% +0.70%] index_fill_ random_sorted : Elapsed 0.011 ms (1.144 ms / 100) 1.147 -> 1.150 ( +0.26%) [ +0.00% +0.00% +0.09% / +0.26% +0.35% +0.52%] index_fill_ perm : Elapsed 0.011 ms (1.147 ms / 100) 1.147 -> 1.148 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.70% +0.70%] index_fill_ perm_sorted : Elapsed 0.011 ms (1.148 ms / 100) B = [4, 16, 20, 40] (stride (12800, 1, 16, 320)) A = [4, 5, 20, 40] (stride (4000, 1, 5, 100)) dim = 1 2.585 -> 2.588 ( +0.12%) [ +0.00% +0.15% +0.08% / +0.12% +0.97% +0.58%] index_add_ linear : Elapsed 0.026 ms (2.585 ms / 100) 2.540 -> 2.541 ( +0.04%) [ +0.24% +0.20% +0.00% / +0.04% +0.71% +0.47%] index_copy_ linear : Elapsed 0.025 ms (2.546 ms / 100) 2.595 -> 2.595 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.89% +0.66%] index_add_ reverse : Elapsed 0.026 ms (2.596 ms / 100) 2.544 -> 2.540 ( -0.16%) [ +0.00% +0.16% +0.04% / -0.16% +0.55% +0.47%] index_copy_ reverse : Elapsed 0.025 ms (2.544 ms / 100) 2.621 -> 2.616 ( -0.19%) [ +0.00% +0.08% +0.00% / -0.19% +0.42% +0.38%] index_add_ spread : Elapsed 0.026 ms (2.621 ms / 100) 2.609 -> 2.600 ( -0.34%) [ +0.04% +0.11% +0.00% / -0.34% +0.19% +0.38%] index_copy_ spread : Elapsed 0.026 ms (2.610 ms / 100) 2.618 -> 2.619 ( +0.04%) [ +0.15% +0.04% +0.00% / +0.04% +0.73% +0.95%] index_add_ strided 3 : Elapsed 0.026 ms (2.622 ms / 100) 2.606 -> 2.611 ( +0.19%) [ +0.04% +0.00% +0.15% / +0.19% +0.46% +0.58%] index_copy_ strided 3 : Elapsed 0.026 ms (2.607 ms / 100) 2.621 -> 2.620 ( -0.04%) [ +0.00% +0.27% +0.04% / -0.04% +0.61% +0.84%] index_add_ strided 5 : Elapsed 0.026 ms (2.621 ms / 100) 2.609 -> 2.604 ( -0.19%) [ +0.00% +0.00% +0.08% / -0.19% +0.54% +0.65%] index_copy_ strided 5 : Elapsed 0.026 ms (2.609 ms / 100) 2.610 -> 2.618 ( +0.31%) [ +0.31% +0.38% +0.00% / +0.31% +0.77% +0.73%] index_add_ strided 7 : Elapsed 0.026 ms (2.618 ms / 100) 2.601 -> 2.606 ( +0.19%) [ +0.15% +0.31% +0.00% / +0.19% +0.54% +0.50%] index_copy_ strided 7 : Elapsed 0.026 ms (2.605 ms / 100) 2.618 -> 2.619 ( +0.04%) [ +0.08% +0.15% +0.00% / +0.04% +0.65% +0.38%] index_add_ perm : Elapsed 0.026 ms (2.620 ms / 100) 2.608 -> 2.608 ( +0.00%) [ +0.04% +0.12% +0.00% / +0.00% +0.50% +0.61%] index_copy_ perm : Elapsed 0.026 ms (2.609 ms / 100) 2.613 -> 2.618 ( +0.19%) [ +0.27% +0.23% +0.00% / +0.19% +0.77% +0.69%] index_add_ perm_sorted : Elapsed 0.026 ms (2.620 ms / 100) 2.601 -> 2.607 ( +0.23%) [ +0.35% +0.19% +0.00% / +0.23% +0.54% +0.85%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.610 ms / 100) 5.694 -> 5.693 ( -0.02%) [ +0.00% +0.11% +0.07% / -0.02% +0.70% +0.39%] index_select const : Elapsed 0.057 ms (5.694 ms / 100) 5.698 -> 5.705 ( +0.12%) [ +0.00% +0.16% +0.11% / +0.12% +0.68% +0.86%] index_select wrap : Elapsed 0.057 ms (5.698 ms / 100) 5.680 -> 5.687 ( +0.12%) [ +0.21% +0.02% +0.00% / +0.12% +0.60% +0.49%] index_select linear : Elapsed 0.057 ms (5.692 ms / 100) 5.694 -> 5.691 ( -0.05%) [ +0.00% +0.04% +0.05% / -0.05% +0.58% +0.56%] index_select reverse : Elapsed 0.057 ms (5.694 ms / 100) 5.702 -> 5.705 ( +0.05%) [ +0.00% +0.07% +0.18% / +0.05% +0.79% +0.74%] index_select skip64 : Elapsed 0.057 ms (5.702 ms / 100) 5.687 -> 5.688 ( +0.02%) [ +0.00% +0.14% +0.04% / +0.02% +0.88% +0.60%] index_select skip256 : Elapsed 0.057 ms (5.687 ms / 100) 5.690 -> 5.701 ( +0.19%) [ +0.00% +0.07% +0.02% / +0.19% +0.72% +0.81%] index_select spread : Elapsed 0.057 ms (5.690 ms / 100) 5.686 -> 5.691 ( +0.09%) [ +0.11% +0.07% +0.00% / +0.09% +0.79% +0.65%] index_select strided 3 : Elapsed 0.057 ms (5.692 ms / 100) 5.684 -> 5.687 ( +0.05%) [ +0.05% +0.19% +0.00% / +0.05% +0.88% +0.86%] index_select random : Elapsed 0.057 ms (5.687 ms / 100) 5.684 -> 5.684 ( +0.00%) [ +0.00% +0.25% +0.05% / +0.00% +0.90% +0.90%] index_select random_sorted : Elapsed 0.057 ms (5.684 ms / 100) B = [4, 16, 20, 40] (stride (800, 3200, 40, 1)) A = [4, 5, 20, 40] (stride (1, 4, 20, 400)) dim = 1 2.391 -> 2.394 ( +0.13%) [ +0.21% +0.21% +0.00% / +0.13% +0.75% +0.67%] index_add_ linear : Elapsed 0.024 ms (2.396 ms / 100) 2.310 -> 2.311 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.69% +0.65%] index_copy_ linear : Elapsed 0.023 ms (2.314 ms / 100) 2.395 -> 2.394 ( -0.04%) [ +0.13% +0.00% +0.00% / -0.04% +0.33% +0.25%] index_add_ reverse : Elapsed 0.024 ms (2.398 ms / 100) 2.309 -> 2.311 ( +0.09%) [ +0.26% +0.13% +0.00% / +0.09% +0.52% +0.61%] index_copy_ reverse : Elapsed 0.023 ms (2.315 ms / 100) 2.393 -> 2.398 ( +0.21%) [ +0.21% +0.50% +0.00% / +0.21% +0.63% +0.50%] index_add_ spread : Elapsed 0.024 ms (2.398 ms / 100) 2.314 -> 2.318 ( +0.17%) [ +0.13% +0.09% +0.00% / +0.17% +0.52% +0.30%] index_copy_ spread : Elapsed 0.023 ms (2.317 ms / 100) 2.393 -> 2.396 ( +0.13%) [ +0.04% +0.08% +0.00% / +0.13% +0.67% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.394 ms / 100) 2.310 -> 2.315 ( +0.22%) [ +0.09% +0.09% +0.00% / +0.22% +0.82% +0.39%] index_copy_ strided 3 : Elapsed 0.023 ms (2.312 ms / 100) 2.394 -> 2.395 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.38% +0.46%] index_add_ strided 5 : Elapsed 0.024 ms (2.396 ms / 100) 2.311 -> 2.312 ( +0.04%) [ +0.39% +0.00% +0.17% / +0.04% +0.69% +0.61%] index_copy_ strided 5 : Elapsed 0.023 ms (2.320 ms / 100) 2.386 -> 2.390 ( +0.17%) [ +0.08% +0.25% +0.00% / +0.17% +0.50% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.388 ms / 100) 2.306 -> 2.311 ( +0.22%) [ +0.00% +0.26% +0.09% / +0.22% +0.52% +0.56%] index_copy_ strided 7 : Elapsed 0.023 ms (2.306 ms / 100) 2.382 -> 2.389 ( +0.29%) [ +0.00% +0.13% +0.04% / +0.29% +1.01% +1.13%] index_add_ perm : Elapsed 0.024 ms (2.382 ms / 100) 2.302 -> 2.310 ( +0.35%) [ +0.26% +0.00% +0.09% / +0.35% +1.26% +1.09%] index_copy_ perm : Elapsed 0.023 ms (2.308 ms / 100) 2.391 -> 2.392 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.71% +0.67%] index_add_ perm_sorted : Elapsed 0.024 ms (2.392 ms / 100) 2.300 -> 2.308 ( +0.35%) [ +0.35% +0.26% +0.00% / +0.35% +1.22% +1.22%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.308 ms / 100) 5.136 -> 5.144 ( +0.16%) [ +0.23% +0.21% +0.00% / +0.16% +0.93% +0.99%] index_select const : Elapsed 0.051 ms (5.148 ms / 100) 5.140 -> 5.148 ( +0.16%) [ +0.10% +0.06% +0.00% / +0.16% +0.70% +0.68%] index_select wrap : Elapsed 0.051 ms (5.145 ms / 100) 5.149 -> 5.149 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.64% +0.62%] index_select linear : Elapsed 0.052 ms (5.152 ms / 100) 5.150 -> 5.145 ( -0.10%) [ +0.10% +0.02% +0.00% / -0.10% +0.41% +0.56%] index_select reverse : Elapsed 0.052 ms (5.155 ms / 100) 5.127 -> 5.134 ( +0.14%) [ +0.16% +0.00% +0.08% / +0.14% +0.66% +0.60%] index_select skip64 : Elapsed 0.051 ms (5.135 ms / 100) 5.128 -> 5.122 ( -0.12%) [ +0.04% +0.06% +0.00% / -0.12% +0.66% +0.70%] index_select skip256 : Elapsed 0.051 ms (5.130 ms / 100) 5.136 -> 5.133 ( -0.06%) [ +0.08% +0.16% +0.00% / -0.06% +0.58% +0.56%] index_select spread : Elapsed 0.051 ms (5.140 ms / 100) 5.136 -> 5.139 ( +0.06%) [ +0.08% +0.02% +0.00% / +0.06% +0.62% +0.78%] index_select strided 3 : Elapsed 0.051 ms (5.140 ms / 100) 5.138 -> 5.144 ( +0.12%) [ +0.00% +0.04% +0.08% / +0.12% +0.58% +0.66%] index_select random : Elapsed 0.051 ms (5.138 ms / 100) 5.137 -> 5.141 ( +0.08%) [ +0.16% +0.02% +0.00% / +0.08% +0.76% +0.90%] index_select random_sorted : Elapsed 0.051 ms (5.145 ms / 100) B = [4, 16, 20, 40] (stride (40, 3200, 160, 1)) A = [4, 5, 20, 40] (stride (200, 1, 800, 5)) dim = 1 2.348 -> 2.349 ( +0.04%) [ +0.43% +0.34% +0.00% / +0.04% +0.09% +0.47%] index_add_ linear : Elapsed 0.024 ms (2.358 ms / 100) 2.283 -> 2.290 ( +0.31%) [ +0.31% +0.26% +0.00% / +0.31% +0.48% +0.61%] index_copy_ linear : Elapsed 0.023 ms (2.290 ms / 100) 2.339 -> 2.345 ( +0.26%) [ +0.17% +0.09% +0.00% / +0.26% +0.94% +0.81%] index_add_ reverse : Elapsed 0.023 ms (2.343 ms / 100) 2.282 -> 2.287 ( +0.22%) [ +0.18% +0.00% +0.09% / +0.22% +0.48% +0.61%] index_copy_ reverse : Elapsed 0.023 ms (2.286 ms / 100) 2.350 -> 2.353 ( +0.13%) [ +0.43% +0.17% +0.00% / +0.13% +0.55% +0.60%] index_add_ spread : Elapsed 0.024 ms (2.360 ms / 100) 2.290 -> 2.288 ( -0.09%) [ +0.00% +0.17% +0.04% / -0.09% +0.57% +0.48%] index_copy_ spread : Elapsed 0.023 ms (2.290 ms / 100) 2.349 -> 2.350 ( +0.04%) [ +0.00% +0.26% +0.00% / +0.04% +0.47% +0.30%] index_add_ strided 3 : Elapsed 0.023 ms (2.349 ms / 100) 2.285 -> 2.291 ( +0.26%) [ +0.00% +0.35% +0.13% / +0.26% +0.61% +0.83%] index_copy_ strided 3 : Elapsed 0.023 ms (2.285 ms / 100) 2.337 -> 2.345 ( +0.34%) [ +0.17% +0.00% +0.26% / +0.34% +0.64% +5.26%] index_add_ strided 5 : Elapsed 0.023 ms (2.341 ms / 100) 2.280 -> 2.284 ( +0.18%) [ +0.04% +0.00% +0.09% / +0.18% +0.48% +0.66%] index_copy_ strided 5 : Elapsed 0.023 ms (2.281 ms / 100) 2.345 -> 2.345 ( +0.00%) [ +0.00% +0.21% +0.00% / +0.00% +0.55% +0.60%] index_add_ strided 7 : Elapsed 0.023 ms (2.345 ms / 100) 2.281 -> 2.283 ( +0.09%) [ +0.18% +0.22% +0.00% / +0.09% +0.75% +0.48%] index_copy_ strided 7 : Elapsed 0.023 ms (2.285 ms / 100) 2.350 -> 2.349 ( -0.04%) [ +0.09% +0.09% +0.00% / -0.04% +0.72% +0.81%] index_add_ perm : Elapsed 0.024 ms (2.352 ms / 100) 2.285 -> 2.284 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.83% +1.05%] index_copy_ perm : Elapsed 0.023 ms (2.285 ms / 100) 2.348 -> 2.354 ( +0.26%) [ +0.13% +0.04% +0.00% / +0.26% +0.81% +0.85%] index_add_ perm_sorted : Elapsed 0.024 ms (2.351 ms / 100) 2.285 -> 2.294 ( +0.39%) [ +0.09% +0.04% +0.00% / +0.39% +1.05% +0.96%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.287 ms / 100) 4.969 -> 4.974 ( +0.10%) [ +0.00% +0.22% +0.28% / +0.10% +0.85% +0.72%] index_select const : Elapsed 0.050 ms (4.969 ms / 100) 4.974 -> 4.983 ( +0.18%) [ +0.12% +0.00% +0.14% / +0.18% +0.46% +0.60%] index_select wrap : Elapsed 0.050 ms (4.980 ms / 100) 5.005 -> 5.026 ( +0.42%) [ +0.04% +0.24% +0.00% / +0.42% +0.70% +0.56%] index_select linear : Elapsed 0.050 ms (5.007 ms / 100) 5.014 -> 4.994 ( -0.40%) [ +0.00% +0.18% +0.10% / -0.40% +0.68% +0.06%] index_select reverse : Elapsed 0.050 ms (5.014 ms / 100) 4.966 -> 4.968 ( +0.04%) [ +0.08% +0.10% +0.00% / +0.04% +0.64% +0.81%] index_select skip64 : Elapsed 0.050 ms (4.970 ms / 100) 4.972 -> 4.981 ( +0.18%) [ +0.00% +0.02% +0.00% / +0.18% +0.76% +0.80%] index_select skip256 : Elapsed 0.050 ms (4.972 ms / 100) 5.008 -> 4.978 ( -0.60%) [ +0.00% +0.02% +0.08% / -0.60% +0.66% +0.14%] index_select spread : Elapsed 0.050 ms (5.008 ms / 100) 4.996 -> 5.003 ( +0.14%) [ +0.00% +0.40% +0.42% / +0.14% +0.84% +1.00%] index_select strided 3 : Elapsed 0.050 ms (4.996 ms / 100) 4.994 -> 5.008 ( +0.28%) [ +0.28% +0.00% +0.44% / +0.28% +1.00% +1.02%] index_select random : Elapsed 0.050 ms (5.008 ms / 100) 4.975 -> 4.971 ( -0.08%) [ +0.08% +0.00% +0.02% / -0.08% +0.78% +0.94%] index_select random_sorted : Elapsed 0.050 ms (4.979 ms / 100) B = [4, 16, 20, 40] (stride (40, 3200, 160, 1)) A = [4, 5, 20, 40] (stride (5, 1, 20, 400)) dim = 1 2.556 -> 2.564 ( +0.31%) [ +0.00% +0.20% +0.23% / +0.31% +0.47% +0.35%] index_add_ linear : Elapsed 0.026 ms (2.556 ms / 100) 2.475 -> 2.481 ( +0.24%) [ +0.04% +0.16% +0.00% / +0.24% +0.48% +0.44%] index_copy_ linear : Elapsed 0.025 ms (2.476 ms / 100) 2.558 -> 2.555 ( -0.12%) [ +0.20% +0.00% +0.12% / -0.12% +0.27% +0.51%] index_add_ reverse : Elapsed 0.026 ms (2.563 ms / 100) 2.472 -> 2.475 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.12% +0.28% +0.65%] index_copy_ reverse : Elapsed 0.025 ms (2.474 ms / 100) 2.554 -> 2.562 ( +0.31%) [ +0.20% +0.12% +0.00% / +0.31% +0.67% +0.70%] index_add_ spread : Elapsed 0.026 ms (2.559 ms / 100) 2.471 -> 2.477 ( +0.24%) [ +0.16% +0.00% +0.08% / +0.24% +0.69% +0.77%] index_copy_ spread : Elapsed 0.025 ms (2.475 ms / 100) 2.549 -> 2.557 ( +0.31%) [ +0.31% +0.43% +0.00% / +0.31% +0.78% +0.94%] index_add_ strided 3 : Elapsed 0.026 ms (2.557 ms / 100) 2.469 -> 2.479 ( +0.41%) [ +0.24% +0.41% +0.00% / +0.41% +0.85% +0.85%] index_copy_ strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.558 -> 2.555 ( -0.12%) [ +0.12% +0.00% +0.04% / -0.12% +0.12% +0.27%] index_add_ strided 5 : Elapsed 0.026 ms (2.561 ms / 100) 2.474 -> 2.476 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.32% +0.40%] index_copy_ strided 5 : Elapsed 0.025 ms (2.477 ms / 100) 2.554 -> 2.557 ( +0.12%) [ +0.00% +0.04% +0.08% / +0.16% +0.12% +0.47%] index_add_ strided 7 : Elapsed 0.026 ms (2.554 ms / 100) 2.468 -> 2.476 ( +0.32%) [ +0.12% +0.24% +0.00% / +0.32% +0.32% +0.69%] index_copy_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.553 -> 2.560 ( +0.27%) [ +0.43% +0.00% +0.12% / +0.27% +0.43% +0.47%] index_add_ perm : Elapsed 0.026 ms (2.564 ms / 100) 2.471 -> 2.477 ( +0.24%) [ +0.16% +0.00% +0.28% / +0.24% +0.61% +0.65%] index_copy_ perm : Elapsed 0.025 ms (2.475 ms / 100) 2.554 -> 2.557 ( +0.12%) [ +0.23% +0.00% +0.16% / +0.12% +0.74% +0.70%] index_add_ perm_sorted : Elapsed 0.026 ms (2.560 ms / 100) 2.471 -> 2.477 ( +0.24%) [ +0.16% +0.08% +0.00% / +0.24% +0.69% +0.77%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.475 ms / 100) 5.590 -> 5.603 ( +0.23%) [ +0.00% +0.20% +0.25% / +0.23% +0.86% +0.82%] index_select const : Elapsed 0.056 ms (5.590 ms / 100) 5.600 -> 5.608 ( +0.14%) [ +0.00% +0.07% +0.20% / +0.14% +0.59% +0.41%] index_select wrap : Elapsed 0.056 ms (5.600 ms / 100) 5.602 -> 5.601 ( -0.02%) [ +0.07% +0.00% +0.05% / -0.02% +0.64% +0.48%] index_select linear : Elapsed 0.056 ms (5.606 ms / 100) 5.588 -> 5.589 ( +0.02%) [ +0.21% +0.32% +0.00% / +0.02% +0.91% +0.59%] index_select reverse : Elapsed 0.056 ms (5.600 ms / 100) 5.583 -> 5.604 ( +0.38%) [ +0.00% +0.09% +0.29% / +0.38% +0.82% +0.57%] index_select skip64 : Elapsed 0.056 ms (5.583 ms / 100) 5.579 -> 5.599 ( +0.36%) [ +0.00% +0.13% +0.30% / +0.36% +0.72% +0.82%] index_select skip256 : Elapsed 0.056 ms (5.579 ms / 100) 5.587 -> 5.608 ( +0.38%) [ +0.45% +0.05% +0.00% / +0.38% +0.70% +0.66%] index_select spread : Elapsed 0.056 ms (5.612 ms / 100) 5.603 -> 5.589 ( -0.25%) [ +0.00% +0.16% +0.04% / -0.25% +0.52% +0.66%] index_select strided 3 : Elapsed 0.056 ms (5.603 ms / 100) 5.604 -> 5.602 ( -0.04%) [ +0.04% +0.02% +0.00% / -0.04% +0.36% +0.64%] index_select random : Elapsed 0.056 ms (5.606 ms / 100) 5.580 -> 5.594 ( +0.25%) [ +0.32% +0.07% +0.00% / +0.25% +0.97% +0.99%] index_select random_sorted : Elapsed 0.056 ms (5.598 ms / 100) B = [4, 16, 20, 40] (stride (1, 3200, 160, 4)) A = [4, 5, 20, 40] (stride (5, 1, 800, 20)) dim = 1 0.849 -> 0.848 ( -0.12%) [ +0.00% +0.24% +0.12% / -0.12% +0.00% -0.12%] index_add_ linear : Elapsed 0.008 ms (0.849 ms / 100) 0.828 -> 0.830 ( +0.24%) [ +0.24% +0.36% +0.00% / +0.24% +0.24% +0.24%] index_copy_ linear : Elapsed 0.008 ms (0.830 ms / 100) 0.848 -> 0.847 ( -0.12%) [ +0.12% +0.24% +0.00% / +0.47% -0.12% +0.00%] index_add_ reverse : Elapsed 0.008 ms (0.849 ms / 100) 0.829 -> 0.831 ( +0.24%) [ +0.12% +0.24% +0.00% / +0.24% +0.36% +5.55%] index_copy_ reverse : Elapsed 0.008 ms (0.830 ms / 100) 0.854 -> 0.843 ( -1.29%) [ +0.23% +0.00% +0.00% / +0.59% -0.59% -1.29%] index_add_ spread : Elapsed 0.009 ms (0.856 ms / 100) 0.835 -> 0.825 ( -1.20%) [ +0.00% +0.12% +0.00% / +0.48% -1.20% -1.08%] index_copy_ spread : Elapsed 0.008 ms (0.835 ms / 100) 0.855 -> 0.846 ( -1.05%) [ +0.00% +0.35% +0.00% / +0.00% -1.05% -0.82%] index_add_ strided 3 : Elapsed 0.009 ms (0.855 ms / 100) 0.833 -> 0.823 ( -1.20%) [ +0.00% +0.84% +0.24% / +0.48% -0.84% -1.20%] index_copy_ strided 3 : Elapsed 0.008 ms (0.833 ms / 100) 0.851 -> 0.846 ( -0.59%) [ +0.35% +0.12% +0.00% / +0.12% -0.24% -0.59%] index_add_ strided 5 : Elapsed 0.009 ms (0.854 ms / 100) 0.834 -> 0.827 ( -0.84%) [ +0.00% +0.12% +0.00% / +0.12% -0.84% -0.72%] index_copy_ strided 5 : Elapsed 0.008 ms (0.834 ms / 100) 0.851 -> 0.850 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.94% +0.71%] index_add_ strided 7 : Elapsed 0.009 ms (0.851 ms / 100) 0.831 -> 0.832 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.96% +0.96%] index_copy_ strided 7 : Elapsed 0.008 ms (0.831 ms / 100) 0.853 -> 0.850 ( -0.35%) [ +0.12% +0.23% +0.00% / +0.12% -0.12% -0.35%] index_add_ perm : Elapsed 0.009 ms (0.854 ms / 100) 0.833 -> 0.834 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.48% +0.12% +0.12%] index_copy_ perm : Elapsed 0.008 ms (0.833 ms / 100) 0.852 -> 0.849 ( -0.35%) [ +0.12% +0.00% +0.12% / +0.00% -0.35% +0.12%] index_add_ perm_sorted : Elapsed 0.009 ms (0.853 ms / 100) 0.830 -> 0.830 ( +0.00%) [ +0.12% +0.36% +0.00% / +0.00% +0.48% +0.36%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.831 ms / 100) 1.683 -> 1.683 ( +0.00%) [ +0.24% +0.00% +0.18% / +0.18% +0.00% +0.12%] index_select const : Elapsed 0.017 ms (1.687 ms / 100) 1.679 -> 1.678 ( -0.06%) [ +0.24% +0.12% +0.00% / +0.12% -0.06% -0.06%] index_select wrap : Elapsed 0.017 ms (1.683 ms / 100) 1.686 -> 1.685 ( -0.06%) [ +0.18% +0.06% +0.00% / +0.24% -0.06% +0.24%] index_select linear : Elapsed 0.017 ms (1.689 ms / 100) 1.687 -> 1.687 ( +0.00%) [ +0.30% +0.06% +0.00% / +0.36% +0.00% +0.00%] index_select reverse : Elapsed 0.017 ms (1.692 ms / 100) 1.676 -> 1.673 ( -0.18%) [ +0.30% +0.00% +0.18% / -0.12% -0.18% +0.42%] index_select skip64 : Elapsed 0.017 ms (1.681 ms / 100) 1.679 -> 1.680 ( +0.06%) [ +0.48% +0.36% +0.00% / +0.06% +0.06% +0.24%] index_select skip256 : Elapsed 0.017 ms (1.687 ms / 100) 1.684 -> 1.688 ( +0.24%) [ +0.30% +0.18% +0.00% / +0.24% +0.24% +0.24%] index_select spread : Elapsed 0.017 ms (1.689 ms / 100) 1.690 -> 1.689 ( -0.06%) [ +0.12% +0.12% +0.00% / +0.30% +0.12% -0.06%] index_select strided 3 : Elapsed 0.017 ms (1.692 ms / 100) 1.689 -> 1.692 ( +0.18%) [ +0.18% +0.24% +0.00% / +0.41% +0.18% +5.74%] index_select random : Elapsed 0.017 ms (1.692 ms / 100) 1.681 -> 1.680 ( -0.06%) [ +0.00% +0.12% +0.06% / -0.06% +0.00% -0.06%] index_select random_sorted : Elapsed 0.017 ms (1.681 ms / 100) B = [4, 16, 20, 40] (stride (1, 160, 2560, 4)) A = [4, 5, 20, 40] (stride (1, 4, 800, 20)) dim = 1 2.332 -> 2.336 ( +0.17%) [ +0.26% +0.00% +0.04% / +0.17% +0.69% +0.34%] index_add_ linear : Elapsed 0.023 ms (2.338 ms / 100) 2.256 -> 2.264 ( +0.35%) [ +0.00% +0.09% +0.13% / +0.35% +0.53% +0.40%] index_copy_ linear : Elapsed 0.023 ms (2.256 ms / 100) 2.307 -> 2.307 ( +0.00%) [ +0.13% +0.00% +0.13% / +0.00% +0.35% +0.78%] index_add_ reverse : Elapsed 0.023 ms (2.310 ms / 100) 2.239 -> 2.242 ( +0.13%) [ +0.00% +0.13% +0.04% / +0.13% +0.40% +0.58%] index_copy_ reverse : Elapsed 0.022 ms (2.239 ms / 100) 2.327 -> 2.331 ( +0.17%) [ +0.00% +0.13% +0.04% / +0.17% +0.47% +1.68%] index_add_ spread : Elapsed 0.023 ms (2.327 ms / 100) 2.251 -> 2.256 ( +0.22%) [ +0.00% +0.36% +0.00% / +0.22% +0.53% +0.49%] index_copy_ spread : Elapsed 0.023 ms (2.251 ms / 100) 2.334 -> 2.337 ( +0.13%) [ +0.21% +0.00% +0.00% / +0.13% +0.56% +0.51%] index_add_ strided 3 : Elapsed 0.023 ms (2.339 ms / 100) 2.262 -> 2.266 ( +0.18%) [ +0.04% +0.09% +0.00% / +0.18% +0.35% +0.35%] index_copy_ strided 3 : Elapsed 0.023 ms (2.263 ms / 100) 2.346 -> 2.346 ( +0.00%) [ +0.26% +0.21% +0.00% / +0.00% +0.64% +0.51%] index_add_ strided 5 : Elapsed 0.024 ms (2.352 ms / 100) 2.274 -> 2.270 ( -0.18%) [ +0.00% +0.00% +0.09% / -0.18% +0.22% +0.31%] index_copy_ strided 5 : Elapsed 0.023 ms (2.274 ms / 100) 2.347 -> 2.349 ( +0.09%) [ +0.00% +0.21% +0.17% / +0.09% +0.43% +0.43%] index_add_ strided 7 : Elapsed 0.023 ms (2.347 ms / 100) 2.271 -> 2.276 ( +0.22%) [ +0.13% +0.13% +0.00% / +0.22% +0.53% +0.35%] index_copy_ strided 7 : Elapsed 0.023 ms (2.274 ms / 100) 2.335 -> 2.336 ( +0.04%) [ +0.30% +0.00% +0.13% / +0.04% +0.34% +0.47%] index_add_ perm : Elapsed 0.023 ms (2.342 ms / 100) 2.259 -> 2.264 ( +0.22%) [ +0.00% +0.13% +0.04% / +0.22% +0.44% +0.80%] index_copy_ perm : Elapsed 0.023 ms (2.259 ms / 100) 2.323 -> 2.325 ( +0.09%) [ +0.13% +0.04% +0.00% / +0.09% +0.82% +0.65%] index_add_ perm_sorted : Elapsed 0.023 ms (2.326 ms / 100) 2.250 -> 2.253 ( +0.13%) [ +0.22% +0.00% +0.13% / +0.13% +0.58% +0.49%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.255 ms / 100) 4.948 -> 4.952 ( +0.08%) [ +0.00% +0.02% +0.04% / +0.08% +0.75% +0.71%] index_select const : Elapsed 0.049 ms (4.948 ms / 100) 4.888 -> 4.894 ( +0.12%) [ +0.29% +0.00% +0.16% / +0.12% +0.76% +0.86%] index_select wrap : Elapsed 0.049 ms (4.902 ms / 100) 4.936 -> 4.942 ( +0.12%) [ +0.06% +0.00% +0.04% / +0.12% +0.61% +0.61%] index_select linear : Elapsed 0.049 ms (4.939 ms / 100) 4.951 -> 4.952 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.67% +0.48%] index_select reverse : Elapsed 0.050 ms (4.951 ms / 100) 4.969 -> 4.976 ( +0.14%) [ +0.18% +0.08% +0.00% / +0.14% +0.68% +0.70%] index_select skip64 : Elapsed 0.050 ms (4.978 ms / 100) 4.971 -> 4.974 ( +0.06%) [ +0.04% +0.06% +0.00% / +0.06% +0.72% +0.60%] index_select skip256 : Elapsed 0.050 ms (4.973 ms / 100) 4.951 -> 4.942 ( -0.18%) [ +0.00% +0.02% +0.10% / -0.18% +0.53% +0.50%] index_select spread : Elapsed 0.050 ms (4.951 ms / 100) 4.892 -> 4.900 ( +0.16%) [ +0.02% +0.00% +0.08% / +0.16% +0.72% +0.80%] index_select strided 3 : Elapsed 0.049 ms (4.893 ms / 100) 4.947 -> 4.944 ( -0.06%) [ +0.12% +0.00% +0.18% / -0.06% +0.65% +0.83%] index_select random : Elapsed 0.050 ms (4.953 ms / 100) 4.969 -> 4.970 ( +0.02%) [ +0.06% +0.00% +0.00% / +0.02% +0.74% +0.78%] index_select random_sorted : Elapsed 0.050 ms (4.972 ms / 100) B = [4, 16, 20, 40] (stride (20, 80, 1, 1280)) A = [4, 5, 20, 40] (stride (4000, 1, 200, 5)) dim = 1 2.199 -> 2.200 ( +0.05%) [ +0.27% +0.36% +0.00% / +0.05% +0.82% +1.09%] index_add_ linear : Elapsed 0.022 ms (2.205 ms / 100) 2.159 -> 2.160 ( +0.05%) [ +0.37% +0.09% +0.00% / +0.05% +0.74% +0.97%] index_copy_ linear : Elapsed 0.022 ms (2.167 ms / 100) 2.195 -> 2.198 ( +0.14%) [ +0.41% +0.36% +0.00% / +0.14% +1.05% +1.23%] index_add_ reverse : Elapsed 0.022 ms (2.204 ms / 100) 2.155 -> 2.156 ( +0.05%) [ +0.23% +0.37% +0.00% / +0.05% +0.93% +1.35%] index_copy_ reverse : Elapsed 0.022 ms (2.160 ms / 100) 2.201 -> 2.198 ( -0.14%) [ +0.14% +0.05% +0.00% / -0.14% +0.86% +0.77%] index_add_ spread : Elapsed 0.022 ms (2.204 ms / 100) 2.156 -> 2.155 ( -0.05%) [ +0.14% +0.09% +0.00% / -0.05% +0.79% +0.74%] index_copy_ spread : Elapsed 0.022 ms (2.159 ms / 100) 2.194 -> 2.194 ( +0.00%) [ +0.32% +0.09% +0.00% / +0.00% +0.91% +1.09%] index_add_ strided 3 : Elapsed 0.022 ms (2.201 ms / 100) 2.153 -> 2.153 ( +0.00%) [ +0.14% +0.70% +0.00% / +0.00% +0.65% +0.93%] index_copy_ strided 3 : Elapsed 0.022 ms (2.156 ms / 100) 2.192 -> 2.192 ( +0.00%) [ +0.41% +0.41% +0.00% / +0.00% +1.41% +1.28%] index_add_ strided 5 : Elapsed 0.022 ms (2.201 ms / 100) 2.154 -> 2.152 ( -0.09%) [ +0.19% +0.23% +0.00% / -0.09% +1.16% +1.16%] index_copy_ strided 5 : Elapsed 0.022 ms (2.158 ms / 100) 2.191 -> 2.191 ( +0.00%) [ +0.23% +0.41% +0.00% / +0.00% +1.55% +1.46%] index_add_ strided 7 : Elapsed 0.022 ms (2.196 ms / 100) 2.151 -> 2.155 ( +0.19%) [ +0.28% +0.28% +0.00% / +0.19% +1.58% +1.21%] index_copy_ strided 7 : Elapsed 0.022 ms (2.157 ms / 100) 2.192 -> 2.199 ( +0.32%) [ +0.18% +0.32% +0.00% / +0.32% +1.28% +1.05%] index_add_ perm : Elapsed 0.022 ms (2.196 ms / 100) 2.153 -> 2.156 ( +0.14%) [ +0.28% +0.00% +0.00% / +0.14% +1.21% +0.98%] index_copy_ perm : Elapsed 0.022 ms (2.159 ms / 100) 2.197 -> 2.202 ( +0.23%) [ +0.27% +0.05% +0.00% / +0.23% +0.91% +0.77%] index_add_ perm_sorted : Elapsed 0.022 ms (2.203 ms / 100) 2.151 -> 2.158 ( +0.33%) [ +0.74% +0.23% +0.00% / +0.33% +1.07% +1.02%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.167 ms / 100) 4.611 -> 4.619 ( +0.17%) [ +0.15% +0.00% +0.02% / +0.17% +0.93% +0.98%] index_select const : Elapsed 0.046 ms (4.618 ms / 100) 4.616 -> 4.614 ( -0.04%) [ +0.04% +0.02% +0.00% / -0.04% +0.91% +0.82%] index_select wrap : Elapsed 0.046 ms (4.618 ms / 100) 4.619 -> 4.627 ( +0.17%) [ +0.24% +0.00% +0.24% / +0.17% +1.08% +2.32%] index_select linear : Elapsed 0.046 ms (4.630 ms / 100) 4.621 -> 4.622 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.02% +0.74% +0.63%] index_select reverse : Elapsed 0.046 ms (4.623 ms / 100) 4.610 -> 4.617 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +1.15% +1.04%] index_select skip64 : Elapsed 0.046 ms (4.617 ms / 100) 4.612 -> 4.615 ( +0.07%) [ +0.00% +0.11% +0.09% / +0.07% +0.93% +0.93%] index_select skip256 : Elapsed 0.046 ms (4.612 ms / 100) 4.612 -> 4.624 ( +0.26%) [ +0.11% +0.09% +0.00% / +0.26% +1.06% +0.95%] index_select spread : Elapsed 0.046 ms (4.617 ms / 100) 4.608 -> 4.617 ( +0.20%) [ +0.00% +0.09% +0.15% / +0.20% +1.11% +1.26%] index_select strided 3 : Elapsed 0.046 ms (4.608 ms / 100) 4.616 -> 4.619 ( +0.06%) [ +0.09% +0.02% +0.00% / +0.06% +0.91% +0.80%] index_select random : Elapsed 0.046 ms (4.620 ms / 100) 4.611 -> 4.615 ( +0.09%) [ +0.09% +0.11% +0.00% / +0.09% +0.95% +1.02%] index_select random_sorted : Elapsed 0.046 ms (4.615 ms / 100) B = [4, 16, 20, 40] (stride (20, 80, 1, 1280)) A = [4, 5, 20, 40] (stride (4000, 1, 5, 100)) dim = 1 2.562 -> 2.561 ( -0.04%) [ +0.12% +0.08% +0.00% / +0.00% +0.12% -0.04%] index_add_ linear : Elapsed 0.026 ms (2.565 ms / 100) 2.494 -> 2.500 ( +0.24%) [ +0.28% +0.24% +0.00% / +0.24% +0.36% +0.24%] index_copy_ linear : Elapsed 0.025 ms (2.501 ms / 100) 2.562 -> 2.561 ( -0.04%) [ +0.12% +0.00% +0.12% / -0.04% +0.16% +0.00%] index_add_ reverse : Elapsed 0.026 ms (2.565 ms / 100) 2.498 -> 2.496 ( -0.08%) [ +0.04% +0.00% +0.12% / +0.16% -0.08% +0.04%] index_copy_ reverse : Elapsed 0.025 ms (2.499 ms / 100) 2.563 -> 2.561 ( -0.08%) [ +0.00% +0.12% +0.08% / -0.08% +0.04% +0.16%] index_add_ spread : Elapsed 0.026 ms (2.563 ms / 100) 2.492 -> 2.494 ( +0.08%) [ +0.16% +0.20% +0.00% / +0.24% +0.20% +0.08%] index_copy_ spread : Elapsed 0.025 ms (2.496 ms / 100) 2.558 -> 2.558 ( +0.00%) [ +0.39% +0.08% +0.00% / +0.00% +0.39% +0.39%] index_add_ strided 3 : Elapsed 0.026 ms (2.568 ms / 100) 2.492 -> 2.497 ( +0.20%) [ +0.12% +0.00% +0.24% / +0.24% +0.28% +0.20%] index_copy_ strided 3 : Elapsed 0.025 ms (2.495 ms / 100) 2.561 -> 2.563 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.08% +0.12% +0.08%] index_add_ strided 5 : Elapsed 0.026 ms (2.562 ms / 100) 2.495 -> 2.497 ( +0.08%) [ +0.00% +0.28% +0.04% / +0.08% +0.20% +0.20%] index_copy_ strided 5 : Elapsed 0.025 ms (2.495 ms / 100) 2.559 -> 2.564 ( +0.20%) [ +0.23% +0.27% +0.00% / +0.20% +0.27% +0.20%] index_add_ strided 7 : Elapsed 0.026 ms (2.565 ms / 100) 2.493 -> 2.498 ( +0.20%) [ +0.16% +0.16% +0.00% / +0.20% +0.32% +0.32%] index_copy_ strided 7 : Elapsed 0.025 ms (2.497 ms / 100) 2.566 -> 2.562 ( -0.16%) [ +0.00% +0.04% +0.00% / +0.39% -0.16% -0.12%] index_add_ perm : Elapsed 0.026 ms (2.566 ms / 100) 2.498 -> 2.498 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.08% +0.04%] index_copy_ perm : Elapsed 0.025 ms (2.500 ms / 100) 2.561 -> 2.562 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.04% +0.27% +0.27%] index_add_ perm_sorted : Elapsed 0.026 ms (2.561 ms / 100) 2.496 -> 2.497 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.24% +0.36%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.498 ms / 100) 5.668 -> 5.670 ( +0.04%) [ +0.11% +0.14% +0.00% / +0.04% +0.42% +0.49%] index_select const : Elapsed 0.057 ms (5.674 ms / 100) 5.670 -> 5.667 ( -0.05%) [ +0.11% +0.04% +0.00% / -0.05% +0.41% +0.41%] index_select wrap : Elapsed 0.057 ms (5.676 ms / 100) 5.667 -> 5.670 ( +0.05%) [ +0.16% +0.23% +0.00% / +0.05% +0.53% +0.53%] index_select linear : Elapsed 0.057 ms (5.676 ms / 100) 5.670 -> 5.669 ( -0.02%) [ +0.12% +0.11% +0.00% / -0.02% +0.60% +0.48%] index_select reverse : Elapsed 0.057 ms (5.677 ms / 100) 5.673 -> 5.681 ( +0.14%) [ +0.19% +0.16% +0.00% / +0.14% +0.51% +0.56%] index_select skip64 : Elapsed 0.057 ms (5.684 ms / 100) 5.677 -> 5.680 ( +0.05%) [ +0.00% +0.02% +0.02% / +0.05% +0.49% +1.18%] index_select skip256 : Elapsed 0.057 ms (5.677 ms / 100) 5.669 -> 5.670 ( +0.02%) [ +0.00% +0.07% +0.07% / +0.02% +0.35% +0.48%] index_select spread : Elapsed 0.057 ms (5.669 ms / 100) 5.663 -> 5.672 ( +0.16%) [ +0.05% +0.11% +0.00% / +0.16% +0.55% +0.58%] index_select strided 3 : Elapsed 0.057 ms (5.666 ms / 100) 5.667 -> 5.667 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.56% +0.60%] index_select random : Elapsed 0.057 ms (5.667 ms / 100) 5.670 -> 5.678 ( +0.14%) [ +0.09% +0.12% +0.00% / +0.14% +0.60% +0.72%] index_select random_sorted : Elapsed 0.057 ms (5.675 ms / 100) B = [4, 16, 20, 40] (stride (1, 80, 4, 1280)) A = [4, 5, 20, 40] (stride (100, 1, 5, 400)) dim = 1 2.556 -> 2.557 ( +0.04%) [ +0.00% +0.00% +0.20% / +0.04% +0.43% +0.47%] index_add_ linear : Elapsed 0.026 ms (2.556 ms / 100) 2.490 -> 2.493 ( +0.12%) [ +0.20% +0.24% +0.00% / +0.12% +0.64% +0.52%] index_copy_ linear : Elapsed 0.025 ms (2.495 ms / 100) 2.553 -> 2.558 ( +0.20%) [ +0.20% +0.12% +0.00% / +0.20% +0.71% +0.59%] index_add_ reverse : Elapsed 0.026 ms (2.558 ms / 100) 2.490 -> 2.496 ( +0.24%) [ +0.04% +0.00% +0.08% / +0.24% +0.64% +0.56%] index_copy_ reverse : Elapsed 0.025 ms (2.491 ms / 100) 2.548 -> 2.549 ( +0.04%) [ +0.20% +0.27% +0.00% / +0.04% +0.55% +0.59%] index_add_ spread : Elapsed 0.026 ms (2.553 ms / 100) 2.486 -> 2.484 ( -0.08%) [ +0.00% +0.20% +0.00% / -0.08% +0.52% +0.52%] index_copy_ spread : Elapsed 0.025 ms (2.486 ms / 100) 2.553 -> 2.557 ( +0.16%) [ +0.12% +0.24% +0.00% / +0.16% +0.39% +0.47%] index_add_ strided 3 : Elapsed 0.026 ms (2.556 ms / 100) 2.486 -> 2.493 ( +0.28%) [ +0.20% +0.24% +0.00% / +0.28% +0.68% +0.64%] index_copy_ strided 3 : Elapsed 0.025 ms (2.491 ms / 100) 2.556 -> 2.553 ( -0.12%) [ +0.08% +0.20% +0.00% / -0.12% +0.55% +0.59%] index_add_ strided 5 : Elapsed 0.026 ms (2.558 ms / 100) 2.492 -> 2.496 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.56% +0.60%] index_copy_ strided 5 : Elapsed 0.025 ms (2.494 ms / 100) 2.548 -> 2.550 ( +0.08%) [ +0.27% +0.12% +0.00% / +0.08% +0.94% +0.82%] index_add_ strided 7 : Elapsed 0.026 ms (2.555 ms / 100) 2.485 -> 2.486 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.04% +0.85% +1.01%] index_copy_ strided 7 : Elapsed 0.025 ms (2.485 ms / 100) 2.558 -> 2.563 ( +0.20%) [ +0.08% +0.08% +0.00% / +0.23% +0.20% +0.35%] index_add_ perm : Elapsed 0.026 ms (2.560 ms / 100) 2.491 -> 2.493 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.48% +0.48%] index_copy_ perm : Elapsed 0.025 ms (2.494 ms / 100) 2.548 -> 2.552 ( +0.16%) [ +0.16% +0.00% +0.04% / +0.16% +0.67% +0.55%] index_add_ perm_sorted : Elapsed 0.026 ms (2.552 ms / 100) 2.482 -> 2.489 ( +0.28%) [ +0.36% +0.16% +0.00% / +0.28% +0.68% +0.64%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.491 ms / 100) 5.654 -> 5.662 ( +0.14%) [ +0.16% +0.00% +0.12% / +0.14% +0.46% +0.64%] index_select const : Elapsed 0.057 ms (5.663 ms / 100) 5.656 -> 5.656 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.62% +0.69%] index_select wrap : Elapsed 0.057 ms (5.664 ms / 100) 5.633 -> 5.639 ( +0.11%) [ +0.00% +0.25% +0.21% / +0.11% +0.87% +0.60%] index_select linear : Elapsed 0.056 ms (5.633 ms / 100) 5.657 -> 5.661 ( +0.07%) [ +0.18% +0.18% +0.00% / +0.07% +0.53% +0.69%] index_select reverse : Elapsed 0.057 ms (5.667 ms / 100) 5.656 -> 5.665 ( +0.16%) [ +0.05% +0.09% +0.00% / +0.16% +0.81% +0.80%] index_select skip64 : Elapsed 0.057 ms (5.659 ms / 100) 5.648 -> 5.662 ( +0.25%) [ +0.19% +0.00% +0.18% / +0.25% +0.76% +0.78%] index_select skip256 : Elapsed 0.057 ms (5.659 ms / 100) 5.654 -> 5.661 ( +0.12%) [ +0.16% +0.05% +0.00% / +0.12% +0.67% +0.65%] index_select spread : Elapsed 0.057 ms (5.663 ms / 100) 5.638 -> 5.638 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.50% +0.57%] index_select strided 3 : Elapsed 0.056 ms (5.643 ms / 100) 5.651 -> 5.658 ( +0.12%) [ +0.14% +0.16% +0.00% / +0.12% +0.76% +0.60%] index_select random : Elapsed 0.057 ms (5.659 ms / 100) 5.645 -> 5.651 ( +0.11%) [ +0.18% +0.14% +0.00% / +0.11% +0.96% +0.83%] index_select random_sorted : Elapsed 0.057 ms (5.655 ms / 100) B = [4, 16, 20, 40] (stride (16, 1, 64, 1280)) A = [4, 5, 20, 40] (stride (20, 80, 1, 400)) dim = 1 2.577 -> 2.583 ( +0.23%) [ +0.16% +0.00% +0.00% / +0.35% +0.23% +0.39%] index_add_ linear : Elapsed 0.026 ms (2.581 ms / 100) 2.529 -> 2.533 ( +0.16%) [ +0.00% +0.04% +0.20% / +0.16% +0.36% +0.43%] index_copy_ linear : Elapsed 0.025 ms (2.529 ms / 100) 2.585 -> 2.589 ( +0.15%) [ +0.27% +0.19% +0.00% / +0.27% +0.19% +0.15%] index_add_ reverse : Elapsed 0.026 ms (2.592 ms / 100) 2.535 -> 2.541 ( +0.24%) [ +0.04% +0.04% +0.00% / +0.24% +0.32% +0.43%] index_copy_ reverse : Elapsed 0.025 ms (2.536 ms / 100) 2.608 -> 2.612 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.23% +0.15% +0.19%] index_add_ spread : Elapsed 0.026 ms (2.610 ms / 100) 2.593 -> 2.596 ( +0.12%) [ +0.27% +0.00% +0.35% / +0.12% +0.23% +0.23%] index_copy_ spread : Elapsed 0.026 ms (2.600 ms / 100) 2.606 -> 2.615 ( +0.35%) [ +0.38% +0.23% +0.00% / +0.38% +0.38% +0.35%] index_add_ strided 3 : Elapsed 0.026 ms (2.616 ms / 100) 2.594 -> 2.597 ( +0.12%) [ +0.12% +0.19% +0.00% / +0.12% +0.27% +0.39%] index_copy_ strided 3 : Elapsed 0.026 ms (2.597 ms / 100) 2.614 -> 2.616 ( +0.08%) [ +0.19% +0.00% +0.08% / +0.08% +0.27% +0.27%] index_add_ strided 5 : Elapsed 0.026 ms (2.619 ms / 100) 2.599 -> 2.604 ( +0.19%) [ +0.27% +0.04% +0.00% / +0.38% +0.27% +0.19%] index_copy_ strided 5 : Elapsed 0.026 ms (2.606 ms / 100) 2.623 -> 2.615 ( -0.30%) [ +0.00% +0.04% +0.00% / -0.04% -0.27% -0.30%] index_add_ strided 7 : Elapsed 0.026 ms (2.623 ms / 100) 2.599 -> 2.597 ( -0.08%) [ +0.00% +0.23% +0.00% / +0.19% -0.08% +0.08%] index_copy_ strided 7 : Elapsed 0.026 ms (2.599 ms / 100) 2.608 -> 2.608 ( +0.00%) [ +0.38% +0.23% +0.00% / +0.31% +0.00% +0.12%] index_add_ perm : Elapsed 0.026 ms (2.618 ms / 100) 2.595 -> 2.596 ( +0.04%) [ +0.23% +0.15% +0.00% / +0.04% +0.23% +0.12%] index_copy_ perm : Elapsed 0.026 ms (2.601 ms / 100) 2.611 -> 2.609 ( -0.08%) [ +0.00% +0.08% +0.11% / -0.08% +0.04% +0.00%] index_add_ perm_sorted : Elapsed 0.026 ms (2.611 ms / 100) 2.593 -> 2.595 ( +0.08%) [ +0.00% +0.23% +0.12% / +0.08% +0.31% +0.15%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.593 ms / 100) 5.675 -> 5.677 ( +0.04%) [ +0.00% +0.05% +0.07% / +0.04% +0.63% +0.42%] index_select const : Elapsed 0.057 ms (5.675 ms / 100) 5.685 -> 5.680 ( -0.09%) [ +0.02% +0.04% +0.00% / -0.09% +0.46% +0.42%] index_select wrap : Elapsed 0.057 ms (5.686 ms / 100) 5.670 -> 5.675 ( +0.09%) [ +0.11% +0.09% +0.00% / +0.09% +0.34% +0.48%] index_select linear : Elapsed 0.057 ms (5.676 ms / 100) 5.638 -> 5.640 ( +0.04%) [ +0.00% +0.07% +0.04% / +0.04% +0.55% +0.62%] index_select reverse : Elapsed 0.056 ms (5.638 ms / 100) 5.689 -> 5.689 ( +0.00%) [ +0.25% +0.09% +0.00% / +0.00% +0.49% +0.56%] index_select skip64 : Elapsed 0.057 ms (5.703 ms / 100) 5.690 -> 5.692 ( +0.04%) [ +0.05% +0.12% +0.00% / +0.04% +0.49% +0.56%] index_select skip256 : Elapsed 0.057 ms (5.693 ms / 100) 5.669 -> 5.668 ( -0.02%) [ +0.12% +0.12% +0.00% / -0.02% +0.39% +0.49%] index_select spread : Elapsed 0.057 ms (5.676 ms / 100) 5.676 -> 5.685 ( +0.16%) [ +0.05% +0.05% +0.00% / +0.16% +0.58% +0.62%] index_select strided 3 : Elapsed 0.057 ms (5.679 ms / 100) 5.662 -> 5.664 ( +0.04%) [ +0.05% +0.00% +0.00% / +0.04% +0.71% +0.60%] index_select random : Elapsed 0.057 ms (5.665 ms / 100) 5.679 -> 5.684 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.09% +0.63% +0.55%] index_select random_sorted : Elapsed 0.057 ms (5.684 ms / 100) out_shape = [4, 5, 16, 40] in_shape = [4, 5, 20, 40] idx_dim = 2 B = [4, 5, 16, 40] (stride (640, 2560, 40, 1)) A = [4, 5, 20, 40] (stride (4000, 800, 1, 20)) dim = 2 3.785 -> 3.785 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.77% +2.19%] index_select const : Elapsed 0.038 ms (3.786 ms / 100) 3.797 -> 3.798 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.71% +0.68%] index_select wrap : Elapsed 0.038 ms (3.797 ms / 100) 3.777 -> 3.776 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.66% +1.24%] index_select linear : Elapsed 0.038 ms (3.777 ms / 100) 3.770 -> 3.771 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.66% +0.66%] index_select reverse : Elapsed 0.038 ms (3.771 ms / 100) 3.777 -> 3.777 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.50% +0.48%] index_select skip64 : Elapsed 0.038 ms (3.779 ms / 100) 3.782 -> 3.783 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.61% +0.58%] index_select skip256 : Elapsed 0.038 ms (3.783 ms / 100) 3.798 -> 3.798 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.63% +0.66%] index_select spread : Elapsed 0.038 ms (3.799 ms / 100) 3.783 -> 3.783 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.783 ms / 100) 3.775 -> 3.776 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.74% +0.72%] index_select strided 5 : Elapsed 0.038 ms (3.777 ms / 100) 3.773 -> 3.773 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.66% +0.66%] index_select strided 7 : Elapsed 0.038 ms (3.773 ms / 100) 3.783 -> 3.783 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.69% +0.71%] index_select strided 8 : Elapsed 0.038 ms (3.783 ms / 100) 3.797 -> 3.797 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.63% +0.63%] index_select strided 16 : Elapsed 0.038 ms (3.798 ms / 100) 3.774 -> 3.774 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.56% +0.56%] index_select random : Elapsed 0.038 ms (3.776 ms / 100) 3.782 -> 3.782 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.42% +0.40%] index_select random_sorted : Elapsed 0.038 ms (3.783 ms / 100) 3.802 -> 3.801 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.47% +0.45%] index_select perm : Elapsed 0.038 ms (3.805 ms / 100) 3.787 -> 3.788 ( +0.03%) [ +0.05% +0.00% +0.16% / +0.03% +0.55% +0.53%] index_select perm_sorted : Elapsed 0.038 ms (3.789 ms / 100) B = [4, 5, 16, 40] (stride (40, 2560, 160, 1)) A = [4, 5, 20, 40] (stride (100, 20, 1, 400)) dim = 2 3.925 -> 3.933 ( +0.20%) [ +0.00% +0.15% +0.15% / +0.20% +0.54% +0.59%] index_select const : Elapsed 0.039 ms (3.925 ms / 100) 3.937 -> 3.941 ( +0.10%) [ +0.00% +0.13% +0.15% / +0.10% +0.61% +0.61%] index_select wrap : Elapsed 0.039 ms (3.937 ms / 100) 3.942 -> 3.944 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.56% +0.48%] index_select linear : Elapsed 0.039 ms (3.942 ms / 100) 3.924 -> 3.924 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.54% +0.46%] index_select reverse : Elapsed 0.039 ms (3.924 ms / 100) 3.925 -> 3.922 ( -0.08%) [ +0.03% +0.00% +0.03% / -0.08% +0.48% +0.46%] index_select skip64 : Elapsed 0.039 ms (3.926 ms / 100) 3.927 -> 3.922 ( -0.13%) [ +0.00% +0.31% +0.00% / -0.13% +0.74% +0.71%] index_select skip256 : Elapsed 0.039 ms (3.927 ms / 100) 3.945 -> 3.944 ( -0.03%) [ +0.00% +0.00% +0.13% / -0.03% +0.58% +0.51%] index_select spread : Elapsed 0.039 ms (3.945 ms / 100) 3.935 -> 3.939 ( +0.10%) [ +0.00% +0.08% +0.03% / +0.10% +0.64% +0.61%] index_select strided 3 : Elapsed 0.039 ms (3.935 ms / 100) 3.924 -> 3.923 ( -0.03%) [ +0.00% +0.08% +0.08% / -0.03% +0.38% +0.51%] index_select strided 5 : Elapsed 0.039 ms (3.924 ms / 100) 3.925 -> 3.929 ( +0.10%) [ +0.05% +0.00% +0.18% / +0.10% +0.56% +0.74%] index_select strided 7 : Elapsed 0.039 ms (3.927 ms / 100) 3.922 -> 3.926 ( +0.10%) [ +0.03% +0.00% +0.05% / +0.10% +0.66% +0.66%] index_select strided 8 : Elapsed 0.039 ms (3.923 ms / 100) 3.934 -> 3.936 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.56% +0.58%] index_select strided 16 : Elapsed 0.039 ms (3.934 ms / 100) 3.928 -> 3.933 ( +0.13%) [ +0.15% +0.00% +0.15% / +0.13% +0.76% +0.69%] index_select random : Elapsed 0.039 ms (3.934 ms / 100) 3.929 -> 3.929 ( +0.00%) [ +0.20% +0.23% +0.00% / +0.00% +0.76% +0.84%] index_select random_sorted : Elapsed 0.039 ms (3.937 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.71% +0.53%] index_select perm : Elapsed 0.039 ms (3.935 ms / 100) 3.932 -> 3.943 ( +0.28%) [ +0.15% +0.20% +0.00% / +0.28% +0.86% +0.71%] index_select perm_sorted : Elapsed 0.039 ms (3.938 ms / 100) B = [4, 5, 16, 40] (stride (1, 2560, 160, 4)) A = [4, 5, 20, 40] (stride (4000, 1, 200, 5)) dim = 2 3.905 -> 3.919 ( +0.36%) [ +0.08% +0.00% +0.15% / +0.36% +0.64% +0.59%] index_select const : Elapsed 0.039 ms (3.908 ms / 100) 3.921 -> 3.920 ( -0.03%) [ +0.00% +0.05% +0.05% / -0.03% +0.59% +0.61%] index_select wrap : Elapsed 0.039 ms (3.921 ms / 100) 3.908 -> 3.913 ( +0.13%) [ +0.00% +0.23% +0.08% / +0.13% +0.64% +0.72%] index_select linear : Elapsed 0.039 ms (3.908 ms / 100) 3.927 -> 3.928 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.79% +0.74%] index_select reverse : Elapsed 0.039 ms (3.927 ms / 100) 3.906 -> 3.904 ( -0.05%) [ +0.05% +0.00% +0.28% / -0.05% +0.49% +0.51%] index_select skip64 : Elapsed 0.039 ms (3.908 ms / 100) 3.895 -> 3.895 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.62% +0.69%] index_select skip256 : Elapsed 0.039 ms (3.895 ms / 100) 3.924 -> 3.927 ( +0.08%) [ +0.18% +0.00% +0.08% / +0.08% +0.59% +0.61%] index_select spread : Elapsed 0.039 ms (3.931 ms / 100) 3.922 -> 3.925 ( +0.08%) [ +0.05% +0.00% +0.08% / +0.08% +0.46% +0.41%] index_select strided 3 : Elapsed 0.039 ms (3.924 ms / 100) 3.905 -> 3.913 ( +0.20%) [ +0.00% +0.03% +0.46% / +0.20% +0.64% +0.56%] index_select strided 5 : Elapsed 0.039 ms (3.905 ms / 100) 3.925 -> 3.927 ( +0.05%) [ +0.00% +0.03% +0.08% / +0.05% +0.84% +0.84%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.919 -> 3.925 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.48% +0.56%] index_select strided 8 : Elapsed 0.039 ms (3.919 ms / 100) 3.924 -> 3.921 ( -0.08%) [ +0.05% +0.03% +0.00% / -0.08% +0.54% +0.56%] index_select strided 16 : Elapsed 0.039 ms (3.926 ms / 100) 3.925 -> 3.930 ( +0.13%) [ +0.00% +0.25% +0.00% / +0.13% +0.71% +0.59%] index_select random : Elapsed 0.039 ms (3.925 ms / 100) 3.919 -> 3.922 ( +0.08%) [ +0.08% +0.00% +0.20% / +0.08% +0.46% +0.43%] index_select random_sorted : Elapsed 0.039 ms (3.922 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.51% +0.51%] index_select perm : Elapsed 0.039 ms (3.925 ms / 100) 3.922 -> 3.924 ( +0.05%) [ +0.08% +0.00% +0.08% / +0.05% +0.38% +0.46%] index_select perm_sorted : Elapsed 0.039 ms (3.925 ms / 100) B = [4, 5, 16, 40] (stride (40, 160, 800, 1)) A = [4, 5, 20, 40] (stride (1, 4, 800, 20)) dim = 2 4.135 -> 4.135 ( +0.00%) [ +0.02% +0.07% +0.00% / +0.00% +0.34% +0.19%] index_select const : Elapsed 0.041 ms (4.136 ms / 100) 4.116 -> 4.122 ( +0.15%) [ +0.12% +0.07% +0.00% / +0.15% +0.53% +0.56%] index_select wrap : Elapsed 0.041 ms (4.121 ms / 100) 4.120 -> 4.124 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.58% +0.56%] index_select linear : Elapsed 0.041 ms (4.122 ms / 100) 4.112 -> 4.111 ( -0.02%) [ +0.10% +0.02% +0.00% / -0.02% +0.58% +0.54%] index_select reverse : Elapsed 0.041 ms (4.116 ms / 100) 4.139 -> 4.139 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.46% +0.46%] index_select skip64 : Elapsed 0.041 ms (4.139 ms / 100) 4.140 -> 4.141 ( +0.02%) [ +0.07% +0.00% +0.05% / +0.02% +0.43% +0.41%] index_select skip256 : Elapsed 0.041 ms (4.143 ms / 100) 4.112 -> 4.111 ( -0.02%) [ +0.05% +0.17% +0.00% / -0.02% +0.49% +0.54%] index_select spread : Elapsed 0.041 ms (4.114 ms / 100) 4.110 -> 4.109 ( -0.02%) [ +0.02% +0.00% +0.07% / -0.02% +0.90% +0.54%] index_select strided 3 : Elapsed 0.041 ms (4.111 ms / 100) 4.126 -> 4.119 ( -0.17%) [ +0.02% +0.00% +0.00% / -0.17% +0.44% +0.46%] index_select strided 5 : Elapsed 0.041 ms (4.127 ms / 100) 4.130 -> 4.136 ( +0.15%) [ +0.10% +0.10% +0.00% / +0.15% +0.44% +0.65%] index_select strided 7 : Elapsed 0.041 ms (4.134 ms / 100) 4.125 -> 4.124 ( -0.02%) [ +0.00% +0.05% +0.02% / -0.02% +0.56% +0.56%] index_select strided 8 : Elapsed 0.041 ms (4.125 ms / 100) 4.101 -> 4.107 ( +0.15%) [ +0.07% +0.00% +0.15% / +0.15% +0.59% +0.63%] index_select strided 16 : Elapsed 0.041 ms (4.104 ms / 100) 4.112 -> 4.115 ( +0.07%) [ +0.00% +0.05% +0.00% / +0.07% +0.56% +0.56%] index_select random : Elapsed 0.041 ms (4.112 ms / 100) 4.121 -> 4.120 ( -0.02%) [ +0.12% +0.10% +0.00% / -0.02% +0.75% +0.78%] index_select random_sorted : Elapsed 0.041 ms (4.126 ms / 100) 4.125 -> 4.123 ( -0.05%) [ +0.02% +0.02% +0.00% / -0.05% +0.73% +0.73%] index_select perm : Elapsed 0.041 ms (4.126 ms / 100) 4.103 -> 4.100 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.56% +0.63%] index_select perm_sorted : Elapsed 0.041 ms (4.103 ms / 100) out_shape = [4, 5, 20, 16] in_shape = [4, 5, 20, 40] idx_dim = 3 B = [4, 5, 20, 16] (stride (1600, 20, 1, 100)) A = [4, 5, 20, 40] (stride (1, 80, 4, 400)) dim = 3 1.284 -> 1.286 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.62% +0.70%] index_select const : Elapsed 0.013 ms (1.285 ms / 100) 1.286 -> 1.289 ( +0.23%) [ +0.00% +0.08% +0.08% / +0.31% +0.23% +0.23%] index_select wrap : Elapsed 0.013 ms (1.286 ms / 100) 1.285 -> 1.283 ( -0.16%) [ +0.08% +0.08% +0.00% / -0.16% +0.16% +0.16%] index_select linear : Elapsed 0.013 ms (1.286 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.63%] index_select reverse : Elapsed 0.013 ms (1.281 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.39% +0.31%] index_select skip64 : Elapsed 0.013 ms (1.286 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.55% +7.55%] index_select skip256 : Elapsed 0.013 ms (1.285 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.00% +0.16% / +0.00% +0.16% +0.00%] index_select spread : Elapsed 0.013 ms (1.281 ms / 100) 1.289 -> 1.286 ( -0.23%) [ +0.00% +0.00% +0.00% / -0.23% -0.08% -0.16%] index_select strided 3 : Elapsed 0.013 ms (1.289 ms / 100) 1.286 -> 1.283 ( -0.23%) [ +0.00% +0.00% +0.08% / +0.00% -0.08% -0.23%] index_select strided 5 : Elapsed 0.013 ms (1.286 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.16% +0.16%] index_select strided 7 : Elapsed 0.013 ms (1.284 ms / 100) 1.291 -> 1.294 ( +0.23%) [ +0.23% +0.00% +0.08% / +0.23% +0.23% +0.23%] index_select strided 8 : Elapsed 0.013 ms (1.294 ms / 100) 1.286 -> 1.286 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.23% +0.08%] index_select strided 16 : Elapsed 0.013 ms (1.286 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.39%] index_select random : Elapsed 0.013 ms (1.281 ms / 100) 1.286 -> 1.286 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.16% +0.08%] index_select random_sorted : Elapsed 0.013 ms (1.286 ms / 100) 1.282 -> 1.280 ( -0.16%) [ +0.08% +0.08% +0.00% / +0.08% -0.16% -0.16%] index_select perm : Elapsed 0.013 ms (1.283 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.08% +0.00% +0.00%] index_select perm_sorted : Elapsed 0.013 ms (1.283 ms / 100) B = [4, 5, 20, 16] (stride (320, 1280, 16, 1)) A = [4, 5, 20, 40] (stride (5, 1, 20, 400)) dim = 3 3.703 -> 3.703 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.68% +0.62%] index_select const : Elapsed 0.037 ms (3.703 ms / 100) 3.702 -> 3.703 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.03% +0.73% +0.76%] index_select wrap : Elapsed 0.037 ms (3.703 ms / 100) 3.703 -> 3.703 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.73% +0.73%] index_select linear : Elapsed 0.037 ms (3.703 ms / 100) 3.707 -> 3.707 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.59% +0.67%] index_select reverse : Elapsed 0.037 ms (3.707 ms / 100) 3.700 -> 3.700 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.51% +0.51%] index_select skip64 : Elapsed 0.037 ms (3.701 ms / 100) 3.699 -> 3.702 ( +0.08%) [ +0.05% +0.00% +0.03% / +0.08% +0.76% +0.84%] index_select skip256 : Elapsed 0.037 ms (3.701 ms / 100) 3.668 -> 3.668 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.90% +0.85%] index_select spread : Elapsed 0.037 ms (3.671 ms / 100) 3.710 -> 3.709 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.62% +0.59%] index_select strided 3 : Elapsed 0.037 ms (3.711 ms / 100) 3.701 -> 3.702 ( +0.03%) [ +0.05% +0.00% +0.08% / +0.03% +0.78% +0.76%] index_select strided 5 : Elapsed 0.037 ms (3.703 ms / 100) 3.694 -> 3.693 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.76% +0.73%] index_select strided 7 : Elapsed 0.037 ms (3.694 ms / 100) 3.711 -> 3.713 ( +0.05%) [ +0.03% +0.00% +0.08% / +0.05% +0.67% +0.70%] index_select strided 8 : Elapsed 0.037 ms (3.712 ms / 100) 3.706 -> 3.705 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.76% +0.73%] index_select strided 16 : Elapsed 0.037 ms (3.706 ms / 100) 3.721 -> 3.720 ( -0.03%) [ +0.05% +0.00% +0.05% / -0.03% +0.67% +0.73%] index_select random : Elapsed 0.037 ms (3.723 ms / 100) 3.694 -> 3.694 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.62% +0.65%] index_select random_sorted : Elapsed 0.037 ms (3.695 ms / 100) 3.690 -> 3.691 ( +0.03%) [ +0.03% +0.00% +0.05% / +0.03% +0.89% +0.87%] index_select perm : Elapsed 0.037 ms (3.691 ms / 100) 3.704 -> 3.701 ( -0.08%) [ +0.00% +0.00% +0.03% / -0.08% +0.70% +0.73%] index_select perm_sorted : Elapsed 0.037 ms (3.704 ms / 100) B = [4, 5, 20, 16] (stride (320, 1280, 1, 20)) A = [4, 5, 20, 40] (stride (4000, 1, 200, 5)) dim = 3 4.175 -> 4.185 ( +0.24%) [ +0.22% +0.00% +0.26% / +0.24% +0.74% +0.74%] index_select const : Elapsed 0.042 ms (4.184 ms / 100) 4.200 -> 4.202 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.67% +0.64%] index_select wrap : Elapsed 0.042 ms (4.200 ms / 100) 4.184 -> 4.187 ( +0.07%) [ +0.12% +0.00% +0.10% / +0.07% +0.55% +0.50%] index_select linear : Elapsed 0.042 ms (4.189 ms / 100) 4.174 -> 4.173 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.48% +0.43%] index_select reverse : Elapsed 0.042 ms (4.174 ms / 100) 4.162 -> 4.170 ( +0.19%) [ +0.00% +0.19% +0.05% / +0.19% +0.86% +0.43%] index_select skip64 : Elapsed 0.042 ms (4.162 ms / 100) 4.175 -> 4.172 ( -0.07%) [ +0.05% +0.00% +0.07% / -0.07% +0.62% +0.48%] index_select skip256 : Elapsed 0.042 ms (4.177 ms / 100) 4.205 -> 4.205 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.43% +0.40%] index_select spread : Elapsed 0.042 ms (4.205 ms / 100) 4.187 -> 4.191 ( +0.10%) [ +0.00% +0.05% +0.26% / +0.10% +0.48% +0.50%] index_select strided 3 : Elapsed 0.042 ms (4.187 ms / 100) 4.173 -> 4.180 ( +0.17%) [ +0.07% +0.00% +0.02% / +0.17% +0.34% +0.31%] index_select strided 5 : Elapsed 0.042 ms (4.176 ms / 100) 4.177 -> 4.186 ( +0.22%) [ +0.22% +0.00% +0.22% / +0.22% +0.36% +0.38%] index_select strided 7 : Elapsed 0.042 ms (4.186 ms / 100) 4.158 -> 4.160 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.55% +0.51%] index_select strided 8 : Elapsed 0.042 ms (4.160 ms / 100) 4.207 -> 4.209 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.48% +0.50%] index_select strided 16 : Elapsed 0.042 ms (4.209 ms / 100) 4.168 -> 4.165 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.50% +0.65%] index_select random : Elapsed 0.042 ms (4.168 ms / 100) 4.178 -> 4.178 ( +0.00%) [ +0.00% +0.10% +0.07% / +0.00% +0.65% +0.60%] index_select random_sorted : Elapsed 0.042 ms (4.178 ms / 100) 4.164 -> 4.164 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.41% +0.38%] index_select perm : Elapsed 0.042 ms (4.164 ms / 100) 4.192 -> 4.194 ( +0.05%) [ +0.00% +0.02% +0.05% / +0.05% +0.36% +0.41%] index_select perm_sorted : Elapsed 0.042 ms (4.192 ms / 100) B = [4, 5, 20, 16] (stride (16, 1280, 64, 1)) A = [4, 5, 20, 40] (stride (1, 160, 800, 4)) dim = 3 3.917 -> 3.917 ( +0.00%) [ +0.13% +0.00% +0.13% / +0.00% +0.56% +0.64%] index_select const : Elapsed 0.039 ms (3.922 ms / 100) 3.935 -> 3.930 ( -0.13%) [ +0.00% +0.05% +0.05% / -0.13% +0.64% +0.66%] index_select wrap : Elapsed 0.039 ms (3.935 ms / 100) 3.932 -> 3.931 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.58% +0.58%] index_select linear : Elapsed 0.039 ms (3.932 ms / 100) 3.930 -> 3.933 ( +0.08%) [ +0.15% +0.10% +0.00% / +0.08% +0.89% +0.89%] index_select reverse : Elapsed 0.039 ms (3.936 ms / 100) 3.920 -> 3.924 ( +0.10%) [ +0.03% +0.10% +0.00% / +0.10% +0.64% +0.48%] index_select skip64 : Elapsed 0.039 ms (3.921 ms / 100) 3.922 -> 3.918 ( -0.10%) [ +0.00% +0.05% +0.00% / -0.10% +0.51% +0.56%] index_select skip256 : Elapsed 0.039 ms (3.922 ms / 100) 3.938 -> 3.940 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.76% +0.81%] index_select spread : Elapsed 0.039 ms (3.938 ms / 100) 3.932 -> 3.938 ( +0.15%) [ +0.05% +0.00% +0.08% / +0.15% +0.89% +0.81%] index_select strided 3 : Elapsed 0.039 ms (3.934 ms / 100) 3.926 -> 3.929 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.51% +0.56%] index_select strided 5 : Elapsed 0.039 ms (3.929 ms / 100) 3.924 -> 3.926 ( +0.05%) [ +0.03% +0.00% +0.05% / +0.05% +0.69% +0.74%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.937 -> 3.939 ( +0.05%) [ +0.08% +0.08% +0.00% / +0.05% +0.48% +0.48%] index_select strided 8 : Elapsed 0.039 ms (3.940 ms / 100) 3.956 -> 3.959 ( +0.08%) [ +0.00% +0.18% +0.18% / +0.08% +0.63% +0.86%] index_select strided 16 : Elapsed 0.040 ms (3.956 ms / 100) 3.920 -> 3.920 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.69% +0.71%] index_select random : Elapsed 0.039 ms (3.921 ms / 100) 3.929 -> 3.932 ( +0.08%) [ +0.05% +0.10% +0.00% / +0.08% +0.71% +0.81%] index_select random_sorted : Elapsed 0.039 ms (3.931 ms / 100) 3.937 -> 3.937 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.81% +0.89%] index_select perm : Elapsed 0.039 ms (3.938 ms / 100) 3.933 -> 3.931 ( -0.05%) [ +0.00% +0.05% +0.03% / -0.05% +0.69% +0.74%] index_select perm_sorted : Elapsed 0.039 ms (3.933 ms / 100) B = [4, 5, 20, 16] (stride (100, 20, 1, 400)) A = [4, 5, 20, 40] (stride (100, 20, 1, 400)) dim = 3 1.100 -> 1.099 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.82% +0.64%] index_select const : Elapsed 0.011 ms (1.101 ms / 100) 1.111 -> 1.111 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.18% +0.09%] index_select wrap : Elapsed 0.011 ms (1.111 ms / 100) 1.111 -> 1.110 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.09% +0.00% +0.09%] index_select linear : Elapsed 0.011 ms (1.112 ms / 100) 1.109 -> 1.106 ( -0.27%) [ +0.00% +0.27% +0.00% / -0.27% +0.27% -0.09%] index_select reverse : Elapsed 0.011 ms (1.109 ms / 100) 1.107 -> 1.107 ( +0.00%) [ +0.09% +0.00% +0.27% / +0.00% +0.00% +0.00%] index_select skip64 : Elapsed 0.011 ms (1.108 ms / 100) 1.099 -> 1.103 ( +0.36%) [ +0.09% +0.00% +0.36% / +0.36% +0.55% +0.36%] index_select skip256 : Elapsed 0.011 ms (1.100 ms / 100) 1.098 -> 1.098 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.09% +0.00% +0.09%] index_select spread : Elapsed 0.011 ms (1.098 ms / 100) 1.105 -> 1.107 ( +0.18%) [ +0.27% +0.36% +0.00% / +0.54% +0.18% +0.45%] index_select strided 3 : Elapsed 0.011 ms (1.108 ms / 100) 1.104 -> 1.100 ( -0.36%) [ +0.00% +0.00% +0.00% / +0.09% -0.36% -0.27%] index_select strided 5 : Elapsed 0.011 ms (1.104 ms / 100) 1.112 -> 1.108 ( -0.36%) [ +0.09% +0.00% +0.18% / -0.09% -0.36% -0.36%] index_select strided 7 : Elapsed 0.011 ms (1.113 ms / 100) 1.107 -> 1.106 ( -0.09%) [ +0.09% +0.00% +0.18% / +0.18% -0.09% +0.09%] index_select strided 8 : Elapsed 0.011 ms (1.108 ms / 100) 1.099 -> 1.097 ( -0.18%) [ +0.09% +0.09% +0.00% / -0.18% +0.18% +0.00%] index_select strided 16 : Elapsed 0.011 ms (1.100 ms / 100) 1.105 -> 1.106 ( +0.09%) [ +0.54% +0.36% +0.00% / +0.09% +0.18% +0.36%] index_select random : Elapsed 0.011 ms (1.111 ms / 100) 1.100 -> 1.099 ( -0.09%) [ +0.18% +0.00% +0.09% / -0.09% +0.09% +0.09%] index_select random_sorted : Elapsed 0.011 ms (1.102 ms / 100) 1.111 -> 1.110 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.00% +0.09%] index_select perm : Elapsed 0.011 ms (1.112 ms / 100) 1.100 -> 1.100 ( +0.00%) [ +0.00% +0.09% +0.55% / +0.00% +0.09% +0.09%] index_select perm_sorted : Elapsed 0.011 ms (1.100 ms / 100) B = [4, 5, 20, 16] (stride (20, 80, 1, 400)) A = [4, 5, 20, 40] (stride (1, 80, 4, 400)) dim = 3 3.554 -> 3.552 ( -0.06%) [ +0.00% +0.03% +0.08% / -0.06% +0.84% +0.82%] index_select const : Elapsed 0.036 ms (3.554 ms / 100) 3.549 -> 3.550 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.73% +0.70%] index_select wrap : Elapsed 0.036 ms (3.550 ms / 100) 3.540 -> 3.544 ( +0.11%) [ +0.28% +0.20% +0.00% / +0.11% +0.99% +0.88%] index_select linear : Elapsed 0.035 ms (3.550 ms / 100) 3.550 -> 3.554 ( +0.11%) [ +0.00% +0.11% +0.14% / +0.11% +1.24% +0.65%] index_select reverse : Elapsed 0.036 ms (3.550 ms / 100) 3.535 -> 3.533 ( -0.06%) [ +0.14% +0.00% +0.00% / -0.06% +0.93% +0.96%] index_select skip64 : Elapsed 0.035 ms (3.540 ms / 100) 3.554 -> 3.554 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.73% +0.84%] index_select skip256 : Elapsed 0.036 ms (3.554 ms / 100) 3.551 -> 3.553 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +0.96% +0.87%] index_select spread : Elapsed 0.036 ms (3.552 ms / 100) 3.541 -> 3.539 ( -0.06%) [ +0.00% +0.00% +0.08% / -0.06% +0.82% +0.76%] index_select strided 3 : Elapsed 0.035 ms (3.541 ms / 100) 3.546 -> 3.547 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.03% +0.87% +0.90%] index_select strided 5 : Elapsed 0.035 ms (3.547 ms / 100) 3.537 -> 3.545 ( +0.23%) [ +0.06% +0.00% +0.23% / +0.23% +0.88% +0.90%] index_select strided 7 : Elapsed 0.035 ms (3.539 ms / 100) 3.549 -> 3.550 ( +0.03%) [ +0.00% +0.00% +0.06% / +0.03% +0.79% +0.79%] index_select strided 8 : Elapsed 0.035 ms (3.549 ms / 100) 3.550 -> 3.551 ( +0.03%) [ +0.00% +0.00% +0.14% / +0.03% +0.79% +0.79%] index_select strided 16 : Elapsed 0.035 ms (3.550 ms / 100) 3.540 -> 3.539 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.71% +0.71%] index_select random : Elapsed 0.035 ms (3.541 ms / 100) 3.538 -> 3.541 ( +0.08%) [ +0.03% +0.00% +0.11% / +0.08% +0.90% +0.85%] index_select random_sorted : Elapsed 0.035 ms (3.539 ms / 100) 3.543 -> 3.545 ( +0.06%) [ +0.08% +0.03% +0.00% / +0.06% +0.90% +0.82%] index_select perm : Elapsed 0.035 ms (3.546 ms / 100) 3.550 -> 3.552 ( +0.06%) [ +0.08% +0.00% +0.00% / +0.06% +0.68% +0.62%] index_select perm_sorted : Elapsed 0.036 ms (3.553 ms / 100) out_shape = [16, 5, 40, 20] in_shape = [4, 5, 40, 20] idx_dim = 0 B = [16, 5, 40, 20] (stride (4000, 40, 1, 200)) A = [4, 5, 40, 20] (stride (200, 40, 1, 800)) dim = 0 1.146 -> 1.150 ( +0.35%) [ +0.09% +0.35% +0.00% / +0.35% +0.87% +0.44%] index_add_ linear : Elapsed 0.011 ms (1.147 ms / 100) 1.147 -> 1.148 ( +0.09%) [ +0.09% +0.00% +0.17% / +0.09% +1.13% +1.05%] index_copy_ linear : Elapsed 0.011 ms (1.148 ms / 100) 1.148 -> 1.148 ( +0.00%) [ +0.00% +0.17% +0.09% / +0.00% +0.96% +0.78%] index_add_ reverse : Elapsed 0.011 ms (1.148 ms / 100) 1.145 -> 1.149 ( +0.35%) [ +0.70% +0.35% +0.00% / +0.35% +1.48% +1.75%] index_copy_ reverse : Elapsed 0.012 ms (1.153 ms / 100) 1.148 -> 1.152 ( +0.35%) [ +0.35% +0.09% +0.00% / +0.35% +0.87% +0.78%] index_add_ spread : Elapsed 0.012 ms (1.152 ms / 100) 1.147 -> 1.152 ( +0.44%) [ +0.00% +0.26% +0.17% / +0.44% +1.05% +0.78%] index_copy_ spread : Elapsed 0.011 ms (1.147 ms / 100) 1.154 -> 1.154 ( +0.00%) [ +0.61% +0.00% +0.09% / +0.26% +0.17% +0.00%] index_add_ strided 3 : Elapsed 0.012 ms (1.161 ms / 100) 1.154 -> 1.152 ( -0.17%) [ +0.17% +0.26% +0.00% / +0.17% -0.09% -0.17%] index_copy_ strided 3 : Elapsed 0.012 ms (1.156 ms / 100) 1.147 -> 1.150 ( +0.26%) [ +0.17% +0.00% +0.26% / +0.26% +0.35% +0.35%] index_add_ strided 5 : Elapsed 0.011 ms (1.149 ms / 100) 1.147 -> 1.145 ( -0.17%) [ +0.09% +0.35% +0.00% / -0.17% -0.09% +0.00%] index_copy_ strided 5 : Elapsed 0.011 ms (1.148 ms / 100) 1.148 -> 1.150 ( +0.17%) [ +0.09% +0.00% +0.26% / +0.17% +0.44% +0.26%] index_add_ strided 7 : Elapsed 0.011 ms (1.149 ms / 100) 1.147 -> 1.145 ( -0.17%) [ +0.00% +0.26% +0.09% / +0.00% -0.17% +0.00%] index_copy_ strided 7 : Elapsed 0.011 ms (1.147 ms / 100) 1.149 -> 1.150 ( +0.09%) [ +0.17% +0.00% +0.17% / +0.17% +0.44% +0.09%] index_add_ perm : Elapsed 0.012 ms (1.151 ms / 100) 1.148 -> 1.150 ( +0.17%) [ +0.00% +0.09% +0.09% / +0.17% +0.61% +0.96%] index_copy_ perm : Elapsed 0.011 ms (1.148 ms / 100) 1.147 -> 1.148 ( +0.09%) [ +0.00% +0.17% +0.26% / +0.09% +0.17% +0.35%] index_add_ perm_sorted : Elapsed 0.011 ms (1.147 ms / 100) 1.143 -> 1.154 ( +0.96%) [ +0.00% +0.26% +0.44% / +0.96% +1.14% +1.14%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.143 ms / 100) 2.082 -> 2.085 ( +0.14%) [ +0.00% +0.05% +0.24% / +0.14% +0.53% +0.58%] index_select const : Elapsed 0.021 ms (2.082 ms / 100) 2.151 -> 2.156 ( +0.23%) [ +0.00% +0.19% +0.05% / +0.23% +0.88% +0.93%] index_select wrap : Elapsed 0.022 ms (2.151 ms / 100) 2.169 -> 2.169 ( +0.00%) [ +0.05% +0.23% +0.00% / +0.00% +0.69% +0.83%] index_select linear : Elapsed 0.022 ms (2.170 ms / 100) 2.087 -> 2.090 ( +0.14%) [ +0.00% +0.24% +0.29% / +0.14% +1.01% +0.96%] index_select reverse : Elapsed 0.021 ms (2.087 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.14% +0.19%] index_select skip64 : Elapsed 0.021 ms (2.089 ms / 100) 2.077 -> 2.077 ( +0.00%) [ +0.00% +0.29% +0.10% / +0.00% +0.39% +0.39%] index_select skip256 : Elapsed 0.021 ms (2.077 ms / 100) 2.138 -> 2.140 ( +0.09%) [ +0.19% +0.23% +0.00% / +0.09% +0.65% +0.70%] index_select spread : Elapsed 0.021 ms (2.142 ms / 100) 2.142 -> 2.145 ( +0.14%) [ +0.05% +0.00% +0.09% / +0.14% +0.89% +0.98%] index_select strided 3 : Elapsed 0.021 ms (2.143 ms / 100) 2.124 -> 2.128 ( +0.19%) [ +0.19% +0.00% +0.28% / +0.19% +1.32% +1.13%] index_select random : Elapsed 0.021 ms (2.128 ms / 100) 2.138 -> 2.138 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.94% +0.84%] index_select random_sorted : Elapsed 0.021 ms (2.141 ms / 100) B = [16, 5, 40, 20] (stride (800, 12800, 20, 1)) A = [4, 5, 40, 20] (stride (4000, 40, 1, 200)) dim = 0 1.137 -> 1.140 ( +0.26%) [ +0.00% +0.09% +0.00% / +0.26% +0.79% +0.53%] index_add_ linear : Elapsed 0.011 ms (1.137 ms / 100) 1.115 -> 1.117 ( +0.18%) [ +0.18% +0.00% +0.18% / +0.18% +1.52% +1.17%] index_copy_ linear : Elapsed 0.011 ms (1.117 ms / 100) 1.128 -> 1.130 ( +0.18%) [ +0.09% +0.27% +0.00% / +0.18% +1.68% +1.42%] index_add_ reverse : Elapsed 0.011 ms (1.129 ms / 100) 1.114 -> 1.113 ( -0.09%) [ +0.36% +0.18% +0.00% / -0.09% +1.08% +1.17%] index_copy_ reverse : Elapsed 0.011 ms (1.118 ms / 100) 1.135 -> 1.137 ( +0.18%) [ +0.26% +0.35% +0.00% / +0.18% +0.97% +1.06%] index_add_ spread : Elapsed 0.011 ms (1.138 ms / 100) 1.119 -> 1.123 ( +0.36%) [ +0.36% +0.00% +0.27% / +0.36% +1.34% +1.07%] index_copy_ spread : Elapsed 0.011 ms (1.123 ms / 100) 1.125 -> 1.127 ( +0.18%) [ +0.00% +0.44% +0.36% / +0.18% +2.13% +1.78%] index_add_ strided 3 : Elapsed 0.011 ms (1.125 ms / 100) 1.110 -> 1.111 ( +0.09%) [ +0.09% +0.18% +0.00% / +0.09% +2.43% +1.80%] index_copy_ strided 3 : Elapsed 0.011 ms (1.111 ms / 100) 1.158 -> 1.151 ( -0.60%) [ +0.35% +0.00% +0.35% / +0.09% -0.52% -0.60%] index_add_ strided 5 : Elapsed 0.012 ms (1.162 ms / 100) 1.145 -> 1.139 ( -0.52%) [ +0.26% +0.44% +0.00% / +0.35% -0.52% -0.26%] index_copy_ strided 5 : Elapsed 0.011 ms (1.148 ms / 100) 1.147 -> 1.149 ( +0.17%) [ +0.26% +0.00% +0.00% / +0.17% +0.17% +0.17%] index_add_ strided 7 : Elapsed 0.011 ms (1.150 ms / 100) 1.129 -> 1.130 ( +0.09%) [ +0.44% +0.62% +0.00% / +0.62% +0.09% +0.53%] index_copy_ strided 7 : Elapsed 0.011 ms (1.134 ms / 100) 1.141 -> 1.146 ( +0.44%) [ +0.00% +0.35% +0.53% / +0.44% +0.61% +0.79%] index_add_ perm : Elapsed 0.011 ms (1.141 ms / 100) 1.133 -> 1.130 ( -0.26%) [ +0.00% +0.18% +0.09% / -0.09% -0.26% +0.18%] index_copy_ perm : Elapsed 0.011 ms (1.133 ms / 100) 1.143 -> 1.144 ( +0.09%) [ +0.00% +0.26% +0.35% / +0.09% +0.96% +0.52%] index_add_ perm_sorted : Elapsed 0.011 ms (1.143 ms / 100) 1.132 -> 1.134 ( +0.18%) [ +0.09% +0.00% +0.18% / +0.18% +1.06% +0.44%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.133 ms / 100) 1.954 -> 1.958 ( +0.20%) [ +0.00% +0.51% +0.61% / +0.20% +0.61% +0.56%] index_select const : Elapsed 0.020 ms (1.954 ms / 100) 2.026 -> 2.024 ( -0.10%) [ +0.00% +0.35% +0.05% / -0.10% +0.39% +0.44%] index_select wrap : Elapsed 0.020 ms (2.026 ms / 100) 2.051 -> 2.048 ( -0.15%) [ +0.00% +0.00% +0.05% / +0.00% -0.15% -0.10%] index_select linear : Elapsed 0.021 ms (2.051 ms / 100) 1.998 -> 1.995 ( -0.15%) [ +0.00% +0.40% +0.30% / +0.25% -0.15% -0.15%] index_select reverse : Elapsed 0.020 ms (1.998 ms / 100) 1.962 -> 1.963 ( +0.05%) [ +0.15% +0.00% +0.20% / +0.20% +0.05% +0.20%] index_select skip64 : Elapsed 0.020 ms (1.965 ms / 100) 1.958 -> 1.959 ( +0.05%) [ +0.05% +0.15% +0.00% / +0.05% +0.36% +0.46%] index_select skip256 : Elapsed 0.020 ms (1.959 ms / 100) 2.021 -> 2.013 ( -0.40%) [ +0.00% +0.10% +0.05% / +0.00% -0.40% -0.10%] index_select spread : Elapsed 0.020 ms (2.021 ms / 100) 2.018 -> 2.022 ( +0.20%) [ +0.10% +0.00% +0.15% / +0.20% +0.64% +0.59%] index_select strided 3 : Elapsed 0.020 ms (2.020 ms / 100) 2.006 -> 2.007 ( +0.05%) [ +0.15% +0.15% +0.00% / +0.10% +0.05% +0.10%] index_select random : Elapsed 0.020 ms (2.009 ms / 100) 2.023 -> 2.021 ( -0.10%) [ +0.40% +0.15% +0.00% / +0.20% -0.10% -0.10%] index_select random_sorted : Elapsed 0.020 ms (2.031 ms / 100) B = [16, 5, 40, 20] (stride (5, 1, 80, 3200)) A = [4, 5, 40, 20] (stride (800, 3200, 1, 40)) dim = 0 2.548 -> 2.551 ( +0.12%) [ +0.20% +0.20% +0.00% / +0.12% +0.47% +0.43%] index_add_ linear : Elapsed 0.026 ms (2.553 ms / 100) 2.518 -> 2.517 ( -0.04%) [ +0.12% +0.04% +0.00% / -0.04% +0.48% +0.36%] index_copy_ linear : Elapsed 0.025 ms (2.521 ms / 100) 2.546 -> 2.554 ( +0.31%) [ +0.20% +0.00% +0.20% / +0.31% +0.51% +0.47%] index_add_ reverse : Elapsed 0.026 ms (2.551 ms / 100) 2.516 -> 2.517 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.48% +0.40%] index_copy_ reverse : Elapsed 0.025 ms (2.518 ms / 100) 2.563 -> 2.565 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.66% +0.78%] index_add_ spread : Elapsed 0.026 ms (2.565 ms / 100) 2.561 -> 2.560 ( -0.04%) [ +0.00% +0.04% +0.23% / -0.04% +0.62% +0.59%] index_copy_ spread : Elapsed 0.026 ms (2.561 ms / 100) 2.559 -> 2.559 ( +0.00%) [ +0.00% +0.23% +0.16% / +0.00% +0.78% +0.98%] index_add_ strided 3 : Elapsed 0.026 ms (2.559 ms / 100) 2.577 -> 2.577 ( +0.00%) [ +0.00% +0.08% +0.16% / +0.00% +0.78% +0.62%] index_copy_ strided 3 : Elapsed 0.026 ms (2.577 ms / 100) 2.531 -> 2.529 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.36% +0.36%] index_add_ strided 5 : Elapsed 0.025 ms (2.531 ms / 100) 2.521 -> 2.523 ( +0.08%) [ +0.12% +0.00% +0.12% / +0.08% +0.40% +0.63%] index_copy_ strided 5 : Elapsed 0.025 ms (2.524 ms / 100) 2.539 -> 2.540 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.32% +0.35%] index_add_ strided 7 : Elapsed 0.025 ms (2.540 ms / 100) 2.541 -> 2.544 ( +0.12%) [ +0.28% +0.00% +0.04% / +0.12% +0.55% +0.47%] index_copy_ strided 7 : Elapsed 0.025 ms (2.548 ms / 100) 2.560 -> 2.559 ( -0.04%) [ +0.08% +0.12% +0.00% / +0.00% +0.16% -0.04%] index_add_ perm : Elapsed 0.026 ms (2.562 ms / 100) 2.558 -> 2.557 ( -0.04%) [ +0.00% +0.08% +0.20% / +0.08% -0.04% +0.23%] index_copy_ perm : Elapsed 0.026 ms (2.558 ms / 100) 2.557 -> 2.558 ( +0.04%) [ +0.04% +0.00% +0.20% / +0.23% +0.12% +0.04%] index_add_ perm_sorted : Elapsed 0.026 ms (2.558 ms / 100) 2.554 -> 2.557 ( +0.12%) [ +0.00% +0.20% +0.47% / +0.23% +0.31% +0.12%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.554 ms / 100) 5.758 -> 5.767 ( +0.16%) [ +0.00% +0.21% +0.24% / +0.16% +0.78% +0.56%] index_select const : Elapsed 0.058 ms (5.758 ms / 100) 5.740 -> 5.741 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.02% +0.45% +0.51%] index_select wrap : Elapsed 0.057 ms (5.740 ms / 100) 5.764 -> 5.760 ( -0.07%) [ +0.03% +0.00% +0.00% / -0.07% +0.50% +0.45%] index_select linear : Elapsed 0.058 ms (5.766 ms / 100) 5.772 -> 5.766 ( -0.10%) [ +0.00% +0.07% +0.16% / -0.10% +0.38% +0.47%] index_select reverse : Elapsed 0.058 ms (5.772 ms / 100) 5.802 -> 5.796 ( -0.10%) [ +0.00% +0.07% +0.02% / -0.10% +0.36% +0.31%] index_select skip64 : Elapsed 0.058 ms (5.802 ms / 100) 5.778 -> 5.773 ( -0.09%) [ +0.14% +0.00% +0.14% / -0.09% +0.28% +0.33%] index_select skip256 : Elapsed 0.058 ms (5.786 ms / 100) 5.735 -> 5.735 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.33% +0.23%] index_select spread : Elapsed 0.057 ms (5.737 ms / 100) 5.754 -> 5.757 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.40% +0.43%] index_select strided 3 : Elapsed 0.058 ms (5.757 ms / 100) 5.781 -> 5.787 ( +0.10%) [ +0.21% +0.00% +0.17% / +0.10% +0.45% +0.36%] index_select random : Elapsed 0.058 ms (5.793 ms / 100) 5.741 -> 5.749 ( +0.14%) [ +0.00% +0.19% +0.10% / +0.14% +0.42% +0.45%] index_select random_sorted : Elapsed 0.057 ms (5.741 ms / 100) B = [16, 5, 40, 20] (stride (1, 16, 80, 3200)) A = [4, 5, 40, 20] (stride (5, 1, 20, 800)) dim = 0 2.639 -> 2.640 ( +0.04%) [ +0.00% +0.11% +0.27% / +0.04% +0.19% +0.38%] index_add_ linear : Elapsed 0.026 ms (2.639 ms / 100) 2.606 -> 2.607 ( +0.04%) [ +0.00% +0.12% +0.35% / +0.04% +0.15% +0.19%] index_copy_ linear : Elapsed 0.026 ms (2.606 ms / 100) 2.639 -> 2.642 ( +0.11%) [ +0.00% +0.00% +0.04% / +0.11% +0.11% +0.27%] index_add_ reverse : Elapsed 0.026 ms (2.639 ms / 100) 2.608 -> 2.605 ( -0.12%) [ +0.00% +0.00% +0.00% / +0.04% -0.12% +0.08%] index_copy_ reverse : Elapsed 0.026 ms (2.608 ms / 100) 2.669 -> 2.673 ( +0.15%) [ +0.19% +0.00% +0.15% / +0.15% +0.45% +0.34%] index_add_ spread : Elapsed 0.027 ms (2.674 ms / 100) 2.706 -> 2.707 ( +0.04%) [ +0.07% +0.00% +0.18% / +0.04% +0.11% +0.07%] index_copy_ spread : Elapsed 0.027 ms (2.708 ms / 100) 2.666 -> 2.679 ( +0.49%) [ +0.38% +0.00% +0.15% / +0.53% +0.49% +0.53%] index_add_ strided 3 : Elapsed 0.027 ms (2.676 ms / 100) 2.700 -> 2.702 ( +0.07%) [ +0.04% +0.07% +0.00% / +0.19% +0.19% +0.07%] index_copy_ strided 3 : Elapsed 0.027 ms (2.701 ms / 100) 2.676 -> 2.675 ( -0.04%) [ +0.15% +0.00% +0.04% / -0.04% +0.30% +0.30%] index_add_ strided 5 : Elapsed 0.027 ms (2.680 ms / 100) 2.703 -> 2.707 ( +0.15%) [ +0.00% +0.15% +0.04% / +0.15% +0.26% +0.26%] index_copy_ strided 5 : Elapsed 0.027 ms (2.703 ms / 100) 2.669 -> 2.674 ( +0.19%) [ +0.00% +0.00% +0.30% / +0.19% +0.34% +0.45%] index_add_ strided 7 : Elapsed 0.027 ms (2.669 ms / 100) 2.702 -> 2.704 ( +0.07%) [ +0.19% +0.00% +0.74% / +0.22% +0.19% +0.07%] index_copy_ strided 7 : Elapsed 0.027 ms (2.707 ms / 100) 2.672 -> 2.673 ( +0.04%) [ +0.11% +0.04% +0.00% / +0.04% +0.49% +0.64%] index_add_ perm : Elapsed 0.027 ms (2.675 ms / 100) 2.704 -> 2.706 ( +0.07%) [ +0.00% +0.15% +0.22% / +0.07% +0.33% +0.30%] index_copy_ perm : Elapsed 0.027 ms (2.704 ms / 100) 2.676 -> 2.680 ( +0.15%) [ +0.19% +0.00% +0.15% / +0.15% +0.49% +0.45%] index_add_ perm_sorted : Elapsed 0.027 ms (2.681 ms / 100) 2.704 -> 2.709 ( +0.18%) [ +0.22% +0.00% +0.15% / +0.18% +0.26% +0.22%] index_copy_ perm_sorted : Elapsed 0.027 ms (2.710 ms / 100) 5.998 -> 6.006 ( +0.13%) [ +0.48% +0.00% +0.47% / +0.47% +0.70% +0.13%] index_select const : Elapsed 0.060 ms (6.027 ms / 100) 5.937 -> 5.948 ( +0.19%) [ +0.17% +0.27% +0.00% / +0.19% +0.39% +0.32%] index_select wrap : Elapsed 0.059 ms (5.947 ms / 100) 5.997 -> 5.996 ( -0.02%) [ +0.02% +0.02% +0.00% / -0.02% +0.08% +0.15%] index_select linear : Elapsed 0.060 ms (5.998 ms / 100) 6.021 -> 6.014 ( -0.12%) [ +0.05% +0.27% +0.00% / -0.05% +0.40% -0.12%] index_select reverse : Elapsed 0.060 ms (6.024 ms / 100) 6.001 -> 6.004 ( +0.05%) [ +0.00% +0.02% +0.08% / +0.05% +0.13% +0.37%] index_select skip64 : Elapsed 0.060 ms (6.001 ms / 100) 6.001 -> 5.999 ( -0.03%) [ +0.00% +0.15% +0.17% / -0.03% +0.30% +0.33%] index_select skip256 : Elapsed 0.060 ms (6.001 ms / 100) 5.955 -> 5.951 ( -0.07%) [ +0.10% +0.00% +0.12% / -0.07% +0.22% +9.57%] index_select spread : Elapsed 0.060 ms (5.961 ms / 100) 5.934 -> 5.939 ( +0.08%) [ +0.00% +0.13% +0.05% / +0.08% +0.52% +0.17%] index_select strided 3 : Elapsed 0.059 ms (5.934 ms / 100) 5.951 -> 5.960 ( +0.15%) [ +0.00% +0.22% +0.18% / +0.15% +0.39% +0.47%] index_select random : Elapsed 0.060 ms (5.951 ms / 100) 5.941 -> 5.938 ( -0.05%) [ +0.00% +0.08% +0.05% / -0.05% +0.27% +0.37%] index_select random_sorted : Elapsed 0.059 ms (5.941 ms / 100) out_shape = [4, 16, 40, 20] in_shape = [4, 5, 40, 20] idx_dim = 1 B = [4, 16, 40, 20] (stride (12800, 800, 1, 40)) A = [4, 5, 40, 20] (stride (1, 3200, 80, 4)) dim = 1 2.181 -> 2.181 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.60% +0.69%] index_add_ linear : Elapsed 0.022 ms (2.181 ms / 100) 2.137 -> 2.140 ( +0.14%) [ +0.23% +0.28% +0.00% / +0.14% +0.80% +0.70%] index_copy_ linear : Elapsed 0.021 ms (2.142 ms / 100) 2.178 -> 2.179 ( +0.05%) [ +0.05% +0.14% +0.00% / +0.05% +0.73% +0.78%] index_add_ reverse : Elapsed 0.022 ms (2.179 ms / 100) 2.133 -> 2.135 ( +0.09%) [ +0.09% +0.14% +0.00% / +0.09% +0.61% +0.70%] index_copy_ reverse : Elapsed 0.021 ms (2.135 ms / 100) 2.183 -> 2.184 ( +0.05%) [ +0.18% +0.00% +0.00% / +0.05% +0.55% +0.78%] index_add_ spread : Elapsed 0.022 ms (2.187 ms / 100) 2.139 -> 2.136 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.75% +0.51%] index_copy_ spread : Elapsed 0.021 ms (2.139 ms / 100) 2.181 -> 2.183 ( +0.09%) [ +0.23% +0.18% +0.00% / +0.09% +0.78% +0.50%] index_add_ strided 3 : Elapsed 0.022 ms (2.186 ms / 100) 2.134 -> 2.138 ( +0.19%) [ +0.00% +0.09% +0.09% / +0.19% +0.56% +0.56%] index_copy_ strided 3 : Elapsed 0.021 ms (2.134 ms / 100) 2.179 -> 2.182 ( +0.14%) [ +0.05% +0.05% +0.00% / +0.14% +0.41% +0.41%] index_add_ strided 5 : Elapsed 0.022 ms (2.180 ms / 100) 2.136 -> 2.136 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.37% +0.47%] index_copy_ strided 5 : Elapsed 0.021 ms (2.136 ms / 100) 2.187 -> 2.187 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.32% +0.27%] index_add_ strided 7 : Elapsed 0.022 ms (2.189 ms / 100) 2.144 -> 2.141 ( -0.14%) [ +0.09% +0.09% +0.00% / -0.14% +0.23% +0.33%] index_copy_ strided 7 : Elapsed 0.021 ms (2.146 ms / 100) 2.183 -> 2.185 ( +0.09%) [ +0.23% +0.09% +0.00% / +0.09% +0.73% +0.73%] index_add_ perm : Elapsed 0.022 ms (2.188 ms / 100) 2.136 -> 2.136 ( +0.00%) [ +0.19% +0.05% +0.00% / +0.00% +0.61% +0.66%] index_copy_ perm : Elapsed 0.021 ms (2.140 ms / 100) 2.186 -> 2.185 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.64% +0.69%] index_add_ perm_sorted : Elapsed 0.022 ms (2.187 ms / 100) 2.139 -> 2.141 ( +0.09%) [ +0.23% +0.00% +0.19% / +0.09% +0.70% +0.56%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.144 ms / 100) 4.565 -> 4.570 ( +0.11%) [ +0.09% +0.04% +0.00% / +0.11% +0.68% +0.70%] index_select const : Elapsed 0.046 ms (4.569 ms / 100) 4.556 -> 4.559 ( +0.07%) [ +0.07% +0.04% +0.00% / +0.07% +0.68% +0.61%] index_select wrap : Elapsed 0.046 ms (4.559 ms / 100) 4.571 -> 4.570 ( -0.02%) [ +0.00% +0.09% +0.02% / -0.02% +0.55% +0.55%] index_select linear : Elapsed 0.046 ms (4.571 ms / 100) 4.559 -> 4.559 ( +0.00%) [ +0.09% +0.02% +0.00% / +0.00% +0.50% +0.64%] index_select reverse : Elapsed 0.046 ms (4.563 ms / 100) 4.554 -> 4.560 ( +0.13%) [ +0.09% +0.00% +0.02% / +0.13% +0.75% +0.86%] index_select skip64 : Elapsed 0.046 ms (4.558 ms / 100) 4.561 -> 4.569 ( +0.18%) [ +0.04% +0.07% +0.00% / +0.18% +0.77% +0.77%] index_select skip256 : Elapsed 0.046 ms (4.563 ms / 100) 4.555 -> 4.557 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.81% +0.90%] index_select spread : Elapsed 0.046 ms (4.555 ms / 100) 4.561 -> 4.562 ( +0.02%) [ +0.11% +0.00% +0.09% / +0.02% +0.83% +0.86%] index_select strided 3 : Elapsed 0.046 ms (4.566 ms / 100) 4.555 -> 4.556 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.86% +0.86%] index_select random : Elapsed 0.046 ms (4.555 ms / 100) 4.562 -> 4.567 ( +0.11%) [ +0.11% +0.00% +0.07% / +0.11% +0.81% +0.81%] index_select random_sorted : Elapsed 0.046 ms (4.567 ms / 100) B = [4, 16, 40, 20] (stride (12800, 20, 320, 1)) dim = 1 fill_cnt = 5 1.085 -> 1.088 ( +0.28%) [ +0.28% +0.18% +0.00% / +0.28% +0.46% +0.37%] index_fill_ const : Elapsed 0.011 ms (1.088 ms / 100) 1.110 -> 1.109 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.09% +0.09% +0.45%] index_fill_ linear : Elapsed 0.011 ms (1.111 ms / 100) 1.109 -> 1.111 ( +0.18%) [ +0.18% +0.27% +0.00% / +0.18% +0.45% +0.45%] index_fill_ reverse : Elapsed 0.011 ms (1.111 ms / 100) 1.086 -> 1.086 ( +0.00%) [ +0.09% +0.18% +0.00% / +0.00% +0.37% +0.37%] index_fill_ skip64 : Elapsed 0.011 ms (1.087 ms / 100) 1.085 -> 1.087 ( +0.18%) [ +0.28% +0.28% +0.00% / +0.18% +0.46% +0.28%] index_fill_ skip256 : Elapsed 0.011 ms (1.088 ms / 100) 1.152 -> 1.151 ( -0.09%) [ +0.17% +0.17% +0.00% / -0.09% +0.26% +0.17%] index_fill_ spread : Elapsed 0.012 ms (1.154 ms / 100) 1.149 -> 1.150 ( +0.09%) [ +0.17% +0.70% +0.00% / +0.09% +0.70% +0.52%] index_fill_ strided 3 : Elapsed 0.012 ms (1.151 ms / 100) 1.125 -> 1.128 ( +0.27%) [ +0.00% +0.53% +0.53% / +0.53% +0.53% +0.27%] index_fill_ strided 5 : Elapsed 0.011 ms (1.125 ms / 100) 1.132 -> 1.138 ( +0.53%) [ +0.35% +0.00% +0.09% / +0.53% +0.88% +1.06%] index_fill_ strided 7 : Elapsed 0.011 ms (1.136 ms / 100) 1.093 -> 1.094 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.37% +0.18%] index_fill_ strided 8 : Elapsed 0.011 ms (1.094 ms / 100) 1.145 -> 1.146 ( +0.09%) [ +0.17% +0.26% +0.00% / +0.09% +0.44% +0.09%] index_fill_ random : Elapsed 0.011 ms (1.147 ms / 100) 1.145 -> 1.149 ( +0.35%) [ +0.17% +0.09% +0.00% / +0.79% +0.35% +0.35%] index_fill_ random_sorted : Elapsed 0.011 ms (1.147 ms / 100) 1.135 -> 1.131 ( -0.35%) [ +0.00% +0.09% +0.44% / -0.26% -0.35% +0.18%] index_fill_ perm : Elapsed 0.011 ms (1.135 ms / 100) 1.136 -> 1.133 ( -0.26%) [ +0.18% +0.00% +0.00% / -0.26% -0.18% +0.09%] index_fill_ perm_sorted : Elapsed 0.011 ms (1.138 ms / 100) B = [4, 16, 40, 20] (stride (800, 3200, 20, 1)) A = [4, 5, 40, 20] (stride (100, 1, 400, 5)) dim = 1 2.233 -> 2.234 ( +0.04%) [ +0.00% +0.22% +0.00% / +0.04% +0.31% +0.22%] index_add_ linear : Elapsed 0.022 ms (2.233 ms / 100) 2.169 -> 2.173 ( +0.18%) [ +0.14% +0.09% +0.00% / +0.18% +0.65% +0.74%] index_copy_ linear : Elapsed 0.022 ms (2.172 ms / 100) 2.223 -> 2.221 ( -0.09%) [ +0.00% +0.22% +0.18% / -0.09% +0.81% +0.54%] index_add_ reverse : Elapsed 0.022 ms (2.223 ms / 100) 2.164 -> 2.166 ( +0.09%) [ +0.00% +0.23% +0.23% / +0.09% +0.69% +0.60%] index_copy_ reverse : Elapsed 0.022 ms (2.164 ms / 100) 2.224 -> 2.224 ( +0.00%) [ +0.45% +0.00% +0.27% / +0.00% +0.67% +0.54%] index_add_ spread : Elapsed 0.022 ms (2.234 ms / 100) 2.164 -> 2.162 ( -0.09%) [ +0.28% +0.00% +0.14% / -0.09% +0.79% +0.46%] index_copy_ spread : Elapsed 0.022 ms (2.170 ms / 100) 2.227 -> 2.228 ( +0.04%) [ +0.31% +0.40% +0.00% / +0.04% +0.58% +0.72%] index_add_ strided 3 : Elapsed 0.022 ms (2.234 ms / 100) 2.165 -> 2.165 ( +0.00%) [ +0.18% +0.32% +0.00% / +0.00% +0.51% +0.65%] index_copy_ strided 3 : Elapsed 0.022 ms (2.169 ms / 100) 2.232 -> 2.237 ( +0.22%) [ +0.00% +0.09% +0.63% / +0.40% +0.63% +0.22%] index_add_ strided 5 : Elapsed 0.022 ms (2.232 ms / 100) 2.169 -> 2.181 ( +0.55%) [ +0.18% +0.00% +0.51% / +0.55% +1.01% +0.74%] index_copy_ strided 5 : Elapsed 0.022 ms (2.173 ms / 100) 2.231 -> 2.231 ( +0.00%) [ +0.22% +0.13% +0.00% / +0.40% +0.00% +0.54%] index_add_ strided 7 : Elapsed 0.022 ms (2.236 ms / 100) 2.170 -> 2.172 ( +0.09%) [ +0.00% +0.05% +0.28% / +0.18% +0.09% +0.46%] index_copy_ strided 7 : Elapsed 0.022 ms (2.170 ms / 100) 2.232 -> 2.231 ( -0.04%) [ +0.09% +0.00% +0.13% / -0.04% +0.22% +0.18%] index_add_ perm : Elapsed 0.022 ms (2.234 ms / 100) 2.166 -> 2.169 ( +0.14%) [ +0.18% +0.00% +0.09% / +0.14% +0.46% +0.28%] index_copy_ perm : Elapsed 0.022 ms (2.170 ms / 100) 2.223 -> 2.231 ( +0.36%) [ +0.00% +0.09% +0.40% / +0.36% +0.54% +0.63%] index_add_ perm_sorted : Elapsed 0.022 ms (2.223 ms / 100) 2.163 -> 2.167 ( +0.18%) [ +0.00% +0.05% +0.46% / +0.18% +0.60% +0.79%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.163 ms / 100) 4.626 -> 4.640 ( +0.30%) [ +0.04% +0.00% +0.06% / +0.30% +0.93% +0.99%] index_select const : Elapsed 0.046 ms (4.628 ms / 100) 4.634 -> 4.633 ( -0.02%) [ +0.45% +0.00% +0.56% / -0.02% +0.41% +0.52%] index_select wrap : Elapsed 0.047 ms (4.655 ms / 100) 4.635 -> 4.642 ( +0.15%) [ +0.15% +0.00% +0.06% / +0.15% +0.71% +0.73%] index_select linear : Elapsed 0.046 ms (4.642 ms / 100) 4.633 -> 4.641 ( +0.17%) [ +0.04% +0.02% +0.00% / +0.17% +0.80% +0.56%] index_select reverse : Elapsed 0.046 ms (4.635 ms / 100) 4.641 -> 4.644 ( +0.06%) [ +0.15% +0.00% +0.11% / +0.06% +0.82% +0.69%] index_select skip64 : Elapsed 0.046 ms (4.648 ms / 100) 4.634 -> 4.634 ( +0.00%) [ +0.26% +0.13% +0.00% / +0.00% +0.97% +0.80%] index_select skip256 : Elapsed 0.046 ms (4.646 ms / 100) 4.639 -> 4.646 ( +0.15%) [ +0.22% +0.02% +0.00% / +0.15% +0.93% +0.95%] index_select spread : Elapsed 0.046 ms (4.649 ms / 100) 4.644 -> 4.626 ( -0.39%) [ +0.09% +0.00% +0.19% / -0.39% +0.93% +0.41%] index_select strided 3 : Elapsed 0.046 ms (4.648 ms / 100) 4.632 -> 4.637 ( +0.11%) [ +0.22% +0.00% +0.02% / +0.11% +1.01% +0.99%] index_select random : Elapsed 0.046 ms (4.642 ms / 100) 4.651 -> 4.654 ( +0.06%) [ +0.00% +0.06% +0.24% / +0.06% +1.05% +0.99%] index_select random_sorted : Elapsed 0.047 ms (4.651 ms / 100) B = [4, 16, 40, 20] (stride (640, 1, 16, 2560)) A = [4, 5, 40, 20] (stride (4000, 1, 100, 5)) dim = 1 2.239 -> 2.243 ( +0.18%) [ +0.09% +0.00% +0.27% / +0.18% +1.65% +1.43%] index_add_ linear : Elapsed 0.022 ms (2.241 ms / 100) 2.207 -> 2.211 ( +0.18%) [ +0.05% +0.18% +0.00% / +0.18% +1.31% +1.40%] index_copy_ linear : Elapsed 0.022 ms (2.208 ms / 100) 2.234 -> 2.241 ( +0.31%) [ +0.18% +0.36% +0.00% / +0.31% +1.66% +1.70%] index_add_ reverse : Elapsed 0.022 ms (2.238 ms / 100) 2.208 -> 2.213 ( +0.23%) [ +0.23% +0.00% +0.00% / +0.23% +1.13% +1.27%] index_copy_ reverse : Elapsed 0.022 ms (2.213 ms / 100) 2.264 -> 2.266 ( +0.09%) [ +0.27% +0.04% +0.00% / +0.09% +1.19% +1.06%] index_add_ spread : Elapsed 0.023 ms (2.270 ms / 100) 2.271 -> 2.271 ( +0.00%) [ +0.22% +0.00% +0.09% / +0.00% +0.97% +0.97%] index_copy_ spread : Elapsed 0.023 ms (2.276 ms / 100) 2.261 -> 2.268 ( +0.31%) [ +0.27% +0.00% +0.09% / +0.31% +1.19% +1.50%] index_add_ strided 3 : Elapsed 0.023 ms (2.267 ms / 100) 2.271 -> 2.271 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.00% +0.92% +1.14%] index_copy_ strided 3 : Elapsed 0.023 ms (2.272 ms / 100) 2.266 -> 2.267 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +1.19% +1.41%] index_add_ strided 5 : Elapsed 0.023 ms (2.267 ms / 100) 2.268 -> 2.275 ( +0.31%) [ +0.00% +0.40% +0.35% / +0.31% +1.63% +1.76%] index_copy_ strided 5 : Elapsed 0.023 ms (2.268 ms / 100) 2.264 -> 2.268 ( +0.18%) [ +0.00% +0.13% +0.09% / +0.18% +1.37% +1.55%] index_add_ strided 7 : Elapsed 0.023 ms (2.264 ms / 100) 2.271 -> 2.272 ( +0.04%) [ +0.18% +0.22% +0.00% / +0.04% +1.10% +1.23%] index_copy_ strided 7 : Elapsed 0.023 ms (2.275 ms / 100) 2.264 -> 2.268 ( +0.18%) [ +0.49% +0.00% +0.18% / +0.18% +1.46% +1.41%] index_add_ perm : Elapsed 0.023 ms (2.275 ms / 100) 2.266 -> 2.272 ( +0.26%) [ +0.49% +0.00% +0.13% / +0.26% +1.32% +1.50%] index_copy_ perm : Elapsed 0.023 ms (2.277 ms / 100) 2.266 -> 2.265 ( -0.04%) [ +0.04% +0.09% +0.00% / -0.04% +1.32% +1.15%] index_add_ perm_sorted : Elapsed 0.023 ms (2.267 ms / 100) 2.267 -> 2.267 ( +0.00%) [ +0.00% +0.31% +0.18% / +0.00% +1.15% +1.41%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.267 ms / 100) 4.647 -> 4.652 ( +0.11%) [ +0.00% +0.06% +0.11% / +0.11% +1.12% +1.14%] index_select const : Elapsed 0.046 ms (4.647 ms / 100) 4.646 -> 4.662 ( +0.34%) [ +0.17% +0.13% +0.00% / +0.34% +1.12% +1.12%] index_select wrap : Elapsed 0.047 ms (4.654 ms / 100) 4.658 -> 4.654 ( -0.09%) [ +0.00% +0.02% +0.21% / -0.09% +0.94% +1.09%] index_select linear : Elapsed 0.047 ms (4.658 ms / 100) 4.660 -> 4.655 ( -0.11%) [ +0.21% +0.11% +0.00% / -0.11% +0.86% +0.86%] index_select reverse : Elapsed 0.047 ms (4.670 ms / 100) 4.650 -> 4.649 ( -0.02%) [ +0.02% +0.13% +0.00% / -0.02% +1.03% +1.08%] index_select skip64 : Elapsed 0.047 ms (4.651 ms / 100) 4.645 -> 4.645 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +1.23% +1.21%] index_select skip256 : Elapsed 0.046 ms (4.649 ms / 100) 4.656 -> 4.654 ( -0.04%) [ +0.04% +0.02% +0.00% / -0.04% +1.01% +1.07%] index_select spread : Elapsed 0.047 ms (4.658 ms / 100) 4.654 -> 4.657 ( +0.06%) [ +0.24% +0.02% +0.00% / +0.06% +1.12% +1.14%] index_select strided 3 : Elapsed 0.047 ms (4.665 ms / 100) 4.650 -> 4.651 ( +0.02%) [ +0.00% +0.06% +0.13% / +0.02% +1.23% +1.18%] index_select random : Elapsed 0.047 ms (4.650 ms / 100) 4.644 -> 4.646 ( +0.04%) [ +0.04% +0.00% +0.09% / +0.04% +1.25% +1.23%] index_select random_sorted : Elapsed 0.046 ms (4.646 ms / 100) B = [4, 16, 40, 20] (stride (640, 1, 16, 2560)) A = [4, 5, 40, 20] (stride (800, 3200, 1, 40)) dim = 1 2.474 -> 2.475 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.77% +0.97%] index_add_ linear : Elapsed 0.025 ms (2.474 ms / 100) 2.434 -> 2.436 ( +0.08%) [ +0.00% +0.21% +0.00% / +0.08% +0.62% +0.62%] index_copy_ linear : Elapsed 0.024 ms (2.434 ms / 100) 2.474 -> 2.480 ( +0.24%) [ +0.08% +0.08% +0.00% / +0.24% +0.73% +0.49%] index_add_ reverse : Elapsed 0.025 ms (2.476 ms / 100) 2.437 -> 2.440 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.25% +0.21% +0.12%] index_copy_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.512 -> 2.512 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.32% +0.44%] index_add_ spread : Elapsed 0.025 ms (2.514 ms / 100) 2.508 -> 2.503 ( -0.20%) [ +0.00% +0.00% +0.08% / -0.20% -0.08% -0.04%] index_copy_ spread : Elapsed 0.025 ms (2.508 ms / 100) 2.506 -> 2.504 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.68% +0.84%] index_add_ strided 3 : Elapsed 0.025 ms (2.508 ms / 100) 2.503 -> 2.503 ( +0.00%) [ +0.04% +0.16% +0.00% / +0.00% +0.20% +0.00%] index_copy_ strided 3 : Elapsed 0.025 ms (2.504 ms / 100) 2.513 -> 2.510 ( -0.12%) [ +0.04% +0.12% +0.00% / -0.12% +0.48% +0.48%] index_add_ strided 5 : Elapsed 0.025 ms (2.514 ms / 100) 2.509 -> 2.513 ( +0.16%) [ +0.12% +0.00% +0.08% / +0.16% +0.16% +0.16%] index_copy_ strided 5 : Elapsed 0.025 ms (2.512 ms / 100) 2.513 -> 2.514 ( +0.04%) [ +0.00% +0.36% +0.00% / +0.04% +0.32% +0.24%] index_add_ strided 7 : Elapsed 0.025 ms (2.513 ms / 100) 2.506 -> 2.507 ( +0.04%) [ +0.20% +0.36% +0.00% / +0.24% +0.04% +0.08%] index_copy_ strided 7 : Elapsed 0.025 ms (2.511 ms / 100) 2.506 -> 2.506 ( +0.00%) [ +0.24% +0.04% +0.00% / +0.00% +0.60% +0.68%] index_add_ perm : Elapsed 0.025 ms (2.512 ms / 100) 2.502 -> 2.501 ( -0.04%) [ +0.24% +0.00% +0.16% / -0.04% +0.20% +0.20%] index_copy_ perm : Elapsed 0.025 ms (2.508 ms / 100) 2.508 -> 2.511 ( +0.12%) [ +0.00% +0.24% +0.04% / +0.12% +0.36% +0.36%] index_add_ perm_sorted : Elapsed 0.025 ms (2.508 ms / 100) 2.508 -> 2.504 ( -0.16%) [ +0.00% +0.00% +0.08% / +0.00% -0.12% -0.16%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.508 ms / 100) 5.359 -> 5.370 ( +0.21%) [ +0.00% +0.26% +0.04% / +0.21% +0.93% +0.88%] index_select const : Elapsed 0.054 ms (5.359 ms / 100) 5.351 -> 5.352 ( +0.02%) [ +0.24% +0.13% +0.00% / +0.02% +0.32% +0.37%] index_select wrap : Elapsed 0.054 ms (5.364 ms / 100) 5.391 -> 5.394 ( +0.06%) [ +0.07% +0.09% +0.00% / +0.06% +0.39% +0.45%] index_select linear : Elapsed 0.054 ms (5.395 ms / 100) 5.366 -> 5.372 ( +0.11%) [ +0.17% +0.00% +0.00% / +0.11% +0.75% +0.86%] index_select reverse : Elapsed 0.054 ms (5.375 ms / 100) 5.386 -> 5.384 ( -0.04%) [ +0.11% +0.04% +0.00% / -0.04% +0.48% +0.41%] index_select skip64 : Elapsed 0.054 ms (5.392 ms / 100) 5.385 -> 5.389 ( +0.07%) [ +0.11% +0.00% +0.02% / +0.07% +0.50% +0.46%] index_select skip256 : Elapsed 0.054 ms (5.391 ms / 100) 5.363 -> 5.362 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.54% +0.54%] index_select spread : Elapsed 0.054 ms (5.363 ms / 100) 5.351 -> 5.350 ( -0.02%) [ +0.06% +0.06% +0.00% / -0.02% +0.37% +0.39%] index_select strided 3 : Elapsed 0.054 ms (5.354 ms / 100) 5.358 -> 5.363 ( +0.09%) [ +0.13% +0.00% +0.11% / +0.09% +0.78% +0.82%] index_select random : Elapsed 0.054 ms (5.365 ms / 100) 5.369 -> 5.378 ( +0.17%) [ +0.20% +0.22% +0.00% / +0.17% +0.80% +0.58%] index_select random_sorted : Elapsed 0.054 ms (5.380 ms / 100) out_shape = [4, 5, 16, 20] in_shape = [4, 5, 40, 20] idx_dim = 2 B = [4, 5, 16, 20] (stride (320, 1280, 20, 1)) A = [4, 5, 40, 20] (stride (100, 1, 400, 5)) dim = 2 3.850 -> 3.853 ( +0.08%) [ +0.05% +0.03% +0.00% / +0.08% +0.70% +0.83%] index_select const : Elapsed 0.039 ms (3.852 ms / 100) 3.851 -> 3.851 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.57% +0.91%] index_select wrap : Elapsed 0.039 ms (3.856 ms / 100) 3.856 -> 3.849 ( -0.18%) [ +0.05% +0.00% +0.05% / -0.18% +0.44% +0.52%] index_select linear : Elapsed 0.039 ms (3.858 ms / 100) 3.850 -> 3.852 ( +0.05%) [ +0.08% +0.00% +0.03% / +0.05% +0.52% +0.52%] index_select reverse : Elapsed 0.039 ms (3.853 ms / 100) 3.871 -> 3.875 ( +0.10%) [ +0.08% +0.08% +0.00% / +0.10% +0.46% +0.36%] index_select skip64 : Elapsed 0.039 ms (3.874 ms / 100) 3.856 -> 3.858 ( +0.05%) [ +0.08% +0.00% +0.00% / +0.05% +0.49% +0.49%] index_select skip256 : Elapsed 0.039 ms (3.859 ms / 100) 3.861 -> 3.860 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.18% +0.16%] index_select spread : Elapsed 0.039 ms (3.861 ms / 100) 3.854 -> 3.860 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.49% +0.54%] index_select strided 3 : Elapsed 0.039 ms (3.857 ms / 100) 3.841 -> 3.847 ( +0.16%) [ +0.16% +0.10% +0.00% / +0.16% +0.36% +0.36%] index_select strided 5 : Elapsed 0.038 ms (3.847 ms / 100) 3.866 -> 3.865 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.34% +0.34%] index_select strided 7 : Elapsed 0.039 ms (3.868 ms / 100) 3.870 -> 3.870 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.52% +0.49%] index_select strided 8 : Elapsed 0.039 ms (3.871 ms / 100) 3.851 -> 3.853 ( +0.05%) [ +0.16% +0.13% +0.00% / +0.05% +0.57% +0.60%] index_select strided 16 : Elapsed 0.039 ms (3.857 ms / 100) 3.857 -> 3.858 ( +0.03%) [ +0.05% +0.21% +0.00% / +0.03% +0.44% +0.44%] index_select random : Elapsed 0.039 ms (3.859 ms / 100) 3.855 -> 3.854 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.42% +0.52%] index_select random_sorted : Elapsed 0.039 ms (3.855 ms / 100) 3.853 -> 3.850 ( -0.08%) [ +0.00% +0.03% +0.03% / -0.08% +0.31% +0.31%] index_select perm : Elapsed 0.039 ms (3.853 ms / 100) 3.848 -> 3.849 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.49% +0.44%] index_select perm_sorted : Elapsed 0.038 ms (3.850 ms / 100) B = [4, 5, 16, 20] (stride (320, 1280, 1, 16)) A = [4, 5, 40, 20] (stride (4000, 1, 5, 200)) dim = 2 4.149 -> 4.159 ( +0.24%) [ +0.31% +0.24% +0.00% / +0.24% +0.94% +0.72%] index_select const : Elapsed 0.042 ms (4.162 ms / 100) 4.180 -> 4.183 ( +0.07%) [ +0.10% +0.00% +0.00% / +0.07% +0.79% +2.20%] index_select wrap : Elapsed 0.042 ms (4.184 ms / 100) 4.162 -> 4.169 ( +0.17%) [ +0.17% +0.12% +0.00% / +0.17% +0.65% +0.70%] index_select linear : Elapsed 0.042 ms (4.169 ms / 100) 4.180 -> 4.180 ( +0.00%) [ +0.22% +0.10% +0.00% / +0.00% +0.72% +0.98%] index_select reverse : Elapsed 0.042 ms (4.189 ms / 100) 4.172 -> 4.179 ( +0.17%) [ +0.22% +0.17% +0.00% / +0.17% +0.77% +0.91%] index_select skip64 : Elapsed 0.042 ms (4.181 ms / 100) 4.147 -> 4.149 ( +0.05%) [ +0.29% +0.29% +0.00% / +0.05% +1.09% +1.11%] index_select skip256 : Elapsed 0.042 ms (4.159 ms / 100) 4.159 -> 4.161 ( +0.05%) [ +0.10% +0.00% +0.10% / +0.05% +0.75% +0.65%] index_select spread : Elapsed 0.042 ms (4.163 ms / 100) 4.154 -> 4.154 ( +0.00%) [ +0.00% +0.26% +0.17% / +0.00% +0.84% +0.77%] index_select strided 3 : Elapsed 0.042 ms (4.154 ms / 100) 4.167 -> 4.160 ( -0.17%) [ +0.05% +0.00% +0.00% / -0.17% +0.72% +0.53%] index_select strided 5 : Elapsed 0.042 ms (4.169 ms / 100) 4.164 -> 4.165 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.72% +0.77%] index_select strided 7 : Elapsed 0.042 ms (4.164 ms / 100) 4.165 -> 4.166 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.70% +0.72%] index_select strided 8 : Elapsed 0.042 ms (4.166 ms / 100) 4.152 -> 4.153 ( +0.02%) [ +0.00% +0.29% +0.17% / +0.02% +0.77% +0.70%] index_select strided 16 : Elapsed 0.042 ms (4.152 ms / 100) 4.175 -> 4.176 ( +0.02%) [ +0.00% +0.14% +0.22% / +0.02% +0.93% +0.74%] index_select random : Elapsed 0.042 ms (4.175 ms / 100) 4.171 -> 4.176 ( +0.12%) [ +0.07% +0.00% +0.10% / +0.12% +0.94% +0.94%] index_select random_sorted : Elapsed 0.042 ms (4.174 ms / 100) 4.185 -> 4.182 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.84% +0.65%] index_select perm : Elapsed 0.042 ms (4.186 ms / 100) 4.156 -> 4.158 ( +0.05%) [ +0.12% +0.00% +0.17% / +0.05% +0.89% +0.91%] index_select perm_sorted : Elapsed 0.042 ms (4.161 ms / 100) B = [4, 5, 16, 20] (stride (320, 1280, 1, 16)) A = [4, 5, 40, 20] (stride (200, 40, 1, 800)) dim = 2 3.925 -> 3.926 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.38% +0.38%] index_select const : Elapsed 0.039 ms (3.927 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.56% +0.41%] index_select wrap : Elapsed 0.039 ms (3.936 ms / 100) 3.945 -> 3.950 ( +0.13%) [ +0.10% +0.05% +0.00% / +0.13% +0.53% +0.53%] index_select linear : Elapsed 0.039 ms (3.949 ms / 100) 3.942 -> 3.943 ( +0.03%) [ +0.10% +0.10% +0.00% / +0.03% +0.51% +0.56%] index_select reverse : Elapsed 0.039 ms (3.946 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.51% +0.51%] index_select skip64 : Elapsed 0.039 ms (3.935 ms / 100) 3.923 -> 3.923 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.56% +0.61%] index_select skip256 : Elapsed 0.039 ms (3.925 ms / 100) 3.920 -> 3.925 ( +0.13%) [ +0.46% +0.00% +0.13% / +0.13% +0.79% +0.54%] index_select spread : Elapsed 0.039 ms (3.938 ms / 100) 3.955 -> 3.961 ( +0.15%) [ +0.05% +0.00% +0.10% / +0.15% +0.46% +0.40%] index_select strided 3 : Elapsed 0.040 ms (3.957 ms / 100) 3.931 -> 3.931 ( +0.00%) [ +0.23% +0.25% +0.00% / +0.00% +0.36% +0.56%] index_select strided 5 : Elapsed 0.039 ms (3.940 ms / 100) 3.933 -> 3.935 ( +0.05%) [ +0.08% +0.15% +0.00% / +0.05% +0.53% +0.58%] index_select strided 7 : Elapsed 0.039 ms (3.936 ms / 100) 3.957 -> 3.955 ( -0.05%) [ +0.03% +0.00% +0.03% / -0.05% +0.35% +0.35%] index_select strided 8 : Elapsed 0.040 ms (3.958 ms / 100) 3.923 -> 3.929 ( +0.15%) [ +0.23% +0.13% +0.00% / +0.15% +0.54% +0.54%] index_select strided 16 : Elapsed 0.039 ms (3.932 ms / 100) 3.947 -> 3.949 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.33% +0.46%] index_select random : Elapsed 0.039 ms (3.949 ms / 100) 3.928 -> 3.933 ( +0.13%) [ +0.00% +0.08% +0.08% / +0.13% +0.36% +0.36%] index_select random_sorted : Elapsed 0.039 ms (3.928 ms / 100) 3.926 -> 3.930 ( +0.10%) [ +0.08% +0.00% +0.10% / +0.10% +0.41% +0.53%] index_select perm : Elapsed 0.039 ms (3.929 ms / 100) 3.939 -> 3.939 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.56% +0.46%] index_select perm_sorted : Elapsed 0.039 ms (3.939 ms / 100) B = [4, 5, 16, 20] (stride (1, 1280, 80, 4)) A = [4, 5, 40, 20] (stride (40, 160, 1, 800)) dim = 2 4.273 -> 4.278 ( +0.12%) [ +0.05% +0.07% +0.00% / +0.12% +0.68% +0.94%] index_select const : Elapsed 0.043 ms (4.275 ms / 100) 4.270 -> 4.277 ( +0.16%) [ +0.00% +0.07% +0.35% / +0.16% +0.91% +0.94%] index_select wrap : Elapsed 0.043 ms (4.270 ms / 100) 4.273 -> 4.276 ( +0.07%) [ +0.02% +0.00% +0.00% / +0.07% +0.66% +0.61%] index_select linear : Elapsed 0.043 ms (4.274 ms / 100) 4.272 -> 4.271 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.68% +0.59%] index_select reverse : Elapsed 0.043 ms (4.272 ms / 100) 4.268 -> 4.276 ( +0.19%) [ +0.00% +0.09% +0.23% / +0.19% +0.66% +0.68%] index_select skip64 : Elapsed 0.043 ms (4.268 ms / 100) 4.272 -> 4.273 ( +0.02%) [ +0.00% +0.07% +0.00% / +0.02% +0.82% +0.84%] index_select skip256 : Elapsed 0.043 ms (4.272 ms / 100) 4.275 -> 4.276 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +1.05% +0.75%] index_select spread : Elapsed 0.043 ms (4.277 ms / 100) 4.266 -> 4.272 ( +0.14%) [ +0.00% +0.14% +0.02% / +0.14% +0.82% +0.68%] index_select strided 3 : Elapsed 0.043 ms (4.266 ms / 100) 4.286 -> 4.286 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.72% +0.75%] index_select strided 5 : Elapsed 0.043 ms (4.286 ms / 100) 4.279 -> 4.284 ( +0.12%) [ +0.09% +0.09% +0.00% / +0.12% +0.84% +0.75%] index_select strided 7 : Elapsed 0.043 ms (4.283 ms / 100) 4.287 -> 4.288 ( +0.02%) [ +0.07% +0.00% +0.14% / +0.02% +0.75% +0.77%] index_select strided 8 : Elapsed 0.043 ms (4.290 ms / 100) 4.283 -> 4.283 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.79% +0.72%] index_select strided 16 : Elapsed 0.043 ms (4.283 ms / 100) 4.277 -> 4.287 ( +0.23%) [ +0.35% +0.00% +0.30% / +0.23% +0.79% +0.84%] index_select random : Elapsed 0.043 ms (4.292 ms / 100) 4.265 -> 4.267 ( +0.05%) [ +0.00% +0.14% +0.26% / +0.05% +0.91% +0.82%] index_select random_sorted : Elapsed 0.043 ms (4.265 ms / 100) 4.276 -> 4.282 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.70% +0.65%] index_select perm : Elapsed 0.043 ms (4.282 ms / 100) 4.281 -> 4.283 ( +0.05%) [ +0.12% +0.00% +0.00% / +0.05% +0.75% +0.79%] index_select perm_sorted : Elapsed 0.043 ms (4.286 ms / 100) B = [4, 5, 16, 20] (stride (100, 1, 400, 5)) A = [4, 5, 40, 20] (stride (1, 4, 400, 20)) dim = 2 4.035 -> 4.035 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.64% +0.59%] index_select const : Elapsed 0.040 ms (4.036 ms / 100) 4.011 -> 4.012 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.55% +0.57%] index_select wrap : Elapsed 0.040 ms (4.012 ms / 100) 4.021 -> 4.020 ( -0.02%) [ +0.02% +0.05% +0.00% / -0.02% +0.45% +0.47%] index_select linear : Elapsed 0.040 ms (4.022 ms / 100) 3.997 -> 3.998 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.50% +0.53%] index_select reverse : Elapsed 0.040 ms (3.998 ms / 100) 4.021 -> 4.023 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.52% +0.47%] index_select skip64 : Elapsed 0.040 ms (4.022 ms / 100) 4.035 -> 4.036 ( +0.02%) [ +0.02% +0.00% +0.07% / +0.02% +0.50% +0.50%] index_select skip256 : Elapsed 0.040 ms (4.036 ms / 100) 4.012 -> 4.012 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.42% +0.45%] index_select spread : Elapsed 0.040 ms (4.012 ms / 100) 4.030 -> 4.030 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.00% +0.45% +0.47%] index_select strided 3 : Elapsed 0.040 ms (4.031 ms / 100) 3.998 -> 3.999 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.40% +0.40%] index_select strided 5 : Elapsed 0.040 ms (3.998 ms / 100) 4.026 -> 4.027 ( +0.02%) [ +0.10% +0.02% +0.00% / +0.02% +0.37% +0.37%] index_select strided 7 : Elapsed 0.040 ms (4.030 ms / 100) 4.033 -> 4.033 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.40% +0.42%] index_select strided 8 : Elapsed 0.040 ms (4.034 ms / 100) 4.034 -> 4.035 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.55% +0.50%] index_select strided 16 : Elapsed 0.040 ms (4.037 ms / 100) 4.013 -> 4.014 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.57% +0.57%] index_select random : Elapsed 0.040 ms (4.013 ms / 100) 4.020 -> 4.020 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.50% +0.52%] index_select random_sorted : Elapsed 0.040 ms (4.022 ms / 100) 4.022 -> 4.024 ( +0.05%) [ +0.07% +0.00% +0.12% / +0.05% +0.50% +0.52%] index_select perm : Elapsed 0.040 ms (4.025 ms / 100) 4.018 -> 4.018 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.35% +0.37%] index_select perm_sorted : Elapsed 0.040 ms (4.018 ms / 100) B = [4, 5, 16, 20] (stride (80, 16, 1, 320)) A = [4, 5, 40, 20] (stride (4000, 40, 1, 200)) dim = 2 3.921 -> 3.925 ( +0.10%) [ +0.03% +0.08% +0.00% / +0.10% +0.71% +0.56%] index_select const : Elapsed 0.039 ms (3.922 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.59% +0.59%] index_select wrap : Elapsed 0.039 ms (3.925 ms / 100) 3.924 -> 3.930 ( +0.15%) [ +0.05% +0.13% +0.00% / +0.15% +0.69% +0.69%] index_select linear : Elapsed 0.039 ms (3.926 ms / 100) 3.926 -> 3.926 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.69% +0.79%] index_select reverse : Elapsed 0.039 ms (3.928 ms / 100) 3.918 -> 3.918 ( +0.00%) [ +0.05% +0.00% +0.13% / +0.00% +0.66% +0.61%] index_select skip64 : Elapsed 0.039 ms (3.920 ms / 100) 3.922 -> 3.923 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.61% +0.66%] index_select skip256 : Elapsed 0.039 ms (3.923 ms / 100) 3.917 -> 3.920 ( +0.08%) [ +0.18% +0.18% +0.00% / +0.08% +0.77% +1.05%] index_select spread : Elapsed 0.039 ms (3.924 ms / 100) 3.927 -> 3.932 ( +0.13%) [ +0.13% +0.00% +0.08% / +0.13% +0.81% +0.84%] index_select strided 3 : Elapsed 0.039 ms (3.932 ms / 100) 3.928 -> 3.936 ( +0.20%) [ +0.23% +0.05% +0.00% / +0.20% +0.81% +0.92%] index_select strided 5 : Elapsed 0.039 ms (3.937 ms / 100) 3.926 -> 3.924 ( -0.05%) [ +0.00% +0.08% +0.05% / -0.05% +0.76% +0.79%] index_select strided 7 : Elapsed 0.039 ms (3.926 ms / 100) 3.936 -> 3.937 ( +0.03%) [ +0.10% +0.00% +0.23% / +0.03% +0.76% +0.91%] index_select strided 8 : Elapsed 0.039 ms (3.940 ms / 100) 3.921 -> 3.920 ( -0.03%) [ +0.10% +0.10% +0.00% / -0.03% +0.82% +0.66%] index_select strided 16 : Elapsed 0.039 ms (3.925 ms / 100) 3.918 -> 3.919 ( +0.03%) [ +0.13% +0.20% +0.00% / +0.03% +0.61% +0.64%] index_select random : Elapsed 0.039 ms (3.923 ms / 100) 3.935 -> 3.934 ( -0.03%) [ +0.03% +0.00% +0.05% / -0.03% +0.71% +0.74%] index_select random_sorted : Elapsed 0.039 ms (3.936 ms / 100) 3.920 -> 3.925 ( +0.13%) [ +0.05% +0.08% +0.00% / +0.13% +0.61% +0.56%] index_select perm : Elapsed 0.039 ms (3.922 ms / 100) 3.939 -> 3.939 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.94% +0.94%] index_select perm_sorted : Elapsed 0.039 ms (3.941 ms / 100) B = [4, 5, 16, 20] (stride (5, 1, 20, 320)) dim = 2 fill_cnt = 40 0.766 -> 0.757 ( -1.17%) [ +0.65% +0.65% +0.00% / -1.17% -1.17% +13.71%] index_fill_ const : Elapsed 0.008 ms (0.771 ms / 100) 0.748 -> 0.737 ( -1.47%) [ +0.67% +0.53% +0.00% / -0.94% -1.47% -1.34%] index_fill_ linear : Elapsed 0.008 ms (0.753 ms / 100) 0.748 -> 0.736 ( -1.60%) [ +0.00% +0.40% +0.27% / -1.60% -1.34% -1.60%] index_fill_ reverse : Elapsed 0.007 ms (0.748 ms / 100) 0.757 -> 0.743 ( -1.85%) [ +0.00% +0.00% +0.26% / -1.32% -1.85% -1.45%] index_fill_ skip64 : Elapsed 0.008 ms (0.757 ms / 100) 0.757 -> 0.743 ( -1.85%) [ +0.26% +0.53% +0.00% / -1.72% -1.85% -1.85%] index_fill_ skip256 : Elapsed 0.008 ms (0.759 ms / 100) 0.724 -> 0.707 ( -2.35%) [ +0.00% +0.00% +0.00% / -2.35% -2.21% -2.35%] index_fill_ spread : Elapsed 0.007 ms (0.724 ms / 100) 0.724 -> 0.705 ( -2.62%) [ +0.28% +0.00% +0.28% / -1.93% -2.21% -2.62%] index_fill_ strided 3 : Elapsed 0.007 ms (0.726 ms / 100) 0.725 -> 0.706 ( -2.62%) [ +0.14% +0.14% +0.00% / -2.07% -2.62% -2.62%] index_fill_ strided 5 : Elapsed 0.007 ms (0.726 ms / 100) 0.721 -> 0.706 ( -2.08%) [ +0.42% +0.42% +0.00% / -2.08% -1.66% -1.53%] index_fill_ strided 7 : Elapsed 0.007 ms (0.724 ms / 100) 0.723 -> 0.710 ( -1.80%) [ +0.00% +0.14% +0.28% / -1.80% -1.66% -1.80%] index_fill_ strided 8 : Elapsed 0.007 ms (0.723 ms / 100) 0.721 -> 0.707 ( -1.94%) [ +0.42% +0.00% +0.42% / -1.94% -1.39% -1.94%] index_fill_ random : Elapsed 0.007 ms (0.724 ms / 100) 0.720 -> 0.706 ( -1.94%) [ +0.69% +0.00% +0.14% / -1.94% -1.67% -1.81%] index_fill_ random_sorted : Elapsed 0.007 ms (0.725 ms / 100) out_shape = [4, 5, 40, 16] in_shape = [4, 5, 40, 20] idx_dim = 3 B = [4, 5, 40, 16] (stride (3200, 16, 80, 1)) A = [4, 5, 40, 20] (stride (40, 160, 1, 800)) dim = 3 3.813 -> 3.815 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.89% +0.81%] index_select const : Elapsed 0.038 ms (3.814 ms / 100) 3.816 -> 3.816 ( +0.00%) [ +0.03% +0.10% +0.00% / +0.00% +0.81% +0.81%] index_select wrap : Elapsed 0.038 ms (3.817 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.68% +0.71%] index_select linear : Elapsed 0.038 ms (3.814 ms / 100) 3.812 -> 3.815 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.79% +0.76%] index_select reverse : Elapsed 0.038 ms (3.814 ms / 100) 3.814 -> 3.816 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.81% +0.79%] index_select skip64 : Elapsed 0.038 ms (3.815 ms / 100) 3.820 -> 3.822 ( +0.05%) [ +0.10% +0.08% +0.00% / +0.05% +0.86% +0.79%] index_select skip256 : Elapsed 0.038 ms (3.824 ms / 100) 3.820 -> 3.821 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.71% +0.63%] index_select spread : Elapsed 0.038 ms (3.821 ms / 100) 3.814 -> 3.815 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.68% +0.68%] index_select strided 3 : Elapsed 0.038 ms (3.816 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.68% +0.68%] index_select strided 5 : Elapsed 0.038 ms (3.814 ms / 100) 3.813 -> 3.815 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.73% +0.71%] index_select strided 7 : Elapsed 0.038 ms (3.815 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +0.60%] index_select strided 8 : Elapsed 0.038 ms (3.814 ms / 100) 3.817 -> 3.818 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.68% +0.73%] index_select strided 16 : Elapsed 0.038 ms (3.818 ms / 100) 3.815 -> 3.814 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.58% +0.58%] index_select random : Elapsed 0.038 ms (3.815 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.58% +0.58%] index_select random_sorted : Elapsed 0.038 ms (3.816 ms / 100) 3.823 -> 3.825 ( +0.05%) [ +0.10% +0.03% +0.00% / +0.05% +0.58% +0.52%] index_select perm : Elapsed 0.038 ms (3.827 ms / 100) 3.816 -> 3.817 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.60% +0.58%] index_select perm_sorted : Elapsed 0.038 ms (3.817 ms / 100) B = [4, 5, 40, 16] (stride (640, 2560, 16, 1)) A = [4, 5, 40, 20] (stride (20, 3200, 80, 1)) dim = 3 3.930 -> 3.931 ( +0.03%) [ +0.00% +0.03% +0.08% / +0.03% +0.43% +0.56%] index_select const : Elapsed 0.039 ms (3.930 ms / 100) 3.948 -> 3.949 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.28% +0.41%] index_select wrap : Elapsed 0.039 ms (3.949 ms / 100) 3.932 -> 3.937 ( +0.13%) [ +0.10% +0.15% +0.00% / +0.13% +0.64% +0.61%] index_select linear : Elapsed 0.039 ms (3.936 ms / 100) 3.936 -> 3.943 ( +0.18%) [ +0.23% +0.20% +0.00% / +0.18% +0.46% +0.48%] index_select reverse : Elapsed 0.039 ms (3.945 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.58% +0.48%] index_select skip64 : Elapsed 0.039 ms (3.934 ms / 100) 3.933 -> 3.938 ( +0.13%) [ +0.13% +0.08% +0.00% / +0.13% +0.46% +0.61%] index_select skip256 : Elapsed 0.039 ms (3.938 ms / 100) 3.936 -> 3.946 ( +0.25%) [ +0.20% +0.25% +0.00% / +0.25% +0.69% +0.48%] index_select spread : Elapsed 0.039 ms (3.944 ms / 100) 3.932 -> 3.936 ( +0.10%) [ +0.13% +0.00% +0.03% / +0.10% +0.58% +0.56%] index_select strided 3 : Elapsed 0.039 ms (3.937 ms / 100) 3.938 -> 3.941 ( +0.08%) [ +0.10% +0.20% +0.00% / +0.08% +0.56% +0.56%] index_select strided 5 : Elapsed 0.039 ms (3.942 ms / 100) 3.939 -> 3.940 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.58% +0.58%] index_select strided 7 : Elapsed 0.039 ms (3.942 ms / 100) 3.946 -> 3.944 ( -0.05%) [ +0.05% +0.00% +0.08% / -0.05% +0.48% +0.48%] index_select strided 8 : Elapsed 0.039 ms (3.948 ms / 100) 3.931 -> 3.934 ( +0.08%) [ +0.08% +0.00% +0.15% / +0.08% +0.64% +0.69%] index_select strided 16 : Elapsed 0.039 ms (3.934 ms / 100) 3.925 -> 3.925 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.61% +0.66%] index_select random : Elapsed 0.039 ms (3.925 ms / 100) 3.927 -> 3.926 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.59% +0.56%] index_select random_sorted : Elapsed 0.039 ms (3.927 ms / 100) 3.941 -> 3.942 ( +0.03%) [ +0.10% +0.15% +0.00% / +0.03% +0.76% +0.76%] index_select perm : Elapsed 0.039 ms (3.945 ms / 100) 3.928 -> 3.927 ( -0.03%) [ +0.00% +0.00% +0.05% / -0.03% +0.56% +0.66%] index_select perm_sorted : Elapsed 0.039 ms (3.928 ms / 100) B = [4, 5, 40, 16] (stride (640, 2560, 1, 40)) A = [4, 5, 40, 20] (stride (200, 40, 1, 800)) dim = 3 3.199 -> 3.197 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.88% +0.84%] index_select const : Elapsed 0.032 ms (3.199 ms / 100) 3.204 -> 3.206 ( +0.06%) [ +0.00% +0.03% +0.00% / +0.06% +0.81% +0.84%] index_select wrap : Elapsed 0.032 ms (3.204 ms / 100) 3.186 -> 3.185 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.82% +0.78%] index_select linear : Elapsed 0.032 ms (3.186 ms / 100) 3.196 -> 3.199 ( +0.09%) [ +0.16% +0.13% +0.00% / +0.09% +0.81% +0.88%] index_select reverse : Elapsed 0.032 ms (3.201 ms / 100) 3.197 -> 3.197 ( +0.00%) [ +0.09% +0.00% +0.03% / +0.00% +0.66% +0.63%] index_select skip64 : Elapsed 0.032 ms (3.200 ms / 100) 3.190 -> 3.191 ( +0.03%) [ +0.09% +0.00% +0.19% / +0.03% +0.75% +0.75%] index_select skip256 : Elapsed 0.032 ms (3.193 ms / 100) 3.210 -> 3.212 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.69% +0.65%] index_select spread : Elapsed 0.032 ms (3.212 ms / 100) 3.201 -> 3.204 ( +0.09%) [ +0.12% +0.00% +0.00% / +0.09% +0.69% +0.69%] index_select strided 3 : Elapsed 0.032 ms (3.205 ms / 100) 3.192 -> 3.191 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.63% +0.60%] index_select strided 5 : Elapsed 0.032 ms (3.192 ms / 100) 3.197 -> 3.197 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +0.66% +0.75%] index_select strided 7 : Elapsed 0.032 ms (3.200 ms / 100) 3.195 -> 3.198 ( +0.09%) [ +0.13% +0.09% +0.00% / +0.09% +0.85% +0.78%] index_select strided 8 : Elapsed 0.032 ms (3.199 ms / 100) 3.203 -> 3.204 ( +0.03%) [ +0.00% +0.06% +0.00% / +0.03% +0.81% +0.78%] index_select strided 16 : Elapsed 0.032 ms (3.203 ms / 100) 3.200 -> 3.202 ( +0.06%) [ +0.06% +0.09% +0.00% / +0.06% +0.41% +0.53%] index_select random : Elapsed 0.032 ms (3.202 ms / 100) 3.194 -> 3.192 ( -0.06%) [ +0.00% +0.00% +0.03% / -0.06% +0.47% +0.50%] index_select random_sorted : Elapsed 0.032 ms (3.194 ms / 100) 3.209 -> 3.209 ( +0.00%) [ +0.00% +0.22% +0.00% / +0.00% +0.72% +0.65%] index_select perm : Elapsed 0.032 ms (3.209 ms / 100) 3.200 -> 3.208 ( +0.25%) [ +0.22% +0.16% +0.00% / +0.25% +0.69% +0.75%] index_select perm_sorted : Elapsed 0.032 ms (3.207 ms / 100) B = [4, 5, 40, 16] (stride (1, 2560, 64, 4)) A = [4, 5, 40, 20] (stride (4000, 800, 20, 1)) dim = 3 3.170 -> 3.172 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.16% +2.81%] index_select const : Elapsed 0.032 ms (3.172 ms / 100) 3.160 -> 3.160 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.47% +0.47%] index_select wrap : Elapsed 0.032 ms (3.161 ms / 100) 3.172 -> 3.172 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_select linear : Elapsed 0.032 ms (3.173 ms / 100) 3.086 -> 3.086 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.42% +0.36%] index_select reverse : Elapsed 0.031 ms (3.087 ms / 100) 3.091 -> 3.091 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.65% +0.65%] index_select skip64 : Elapsed 0.031 ms (3.092 ms / 100) 3.176 -> 3.175 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.35% +0.31%] index_select skip256 : Elapsed 0.032 ms (3.177 ms / 100) 3.159 -> 3.160 ( +0.03%) [ +0.09% +0.03% +0.00% / +0.03% +0.47% +0.47%] index_select spread : Elapsed 0.032 ms (3.162 ms / 100) 3.093 -> 3.094 ( +0.03%) [ +0.10% +0.03% +0.00% / +0.03% +0.58% +0.48%] index_select strided 3 : Elapsed 0.031 ms (3.096 ms / 100) 3.086 -> 3.086 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.49% +0.42%] index_select strided 5 : Elapsed 0.031 ms (3.087 ms / 100) 3.158 -> 3.158 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.76% +0.76%] index_select strided 7 : Elapsed 0.032 ms (3.159 ms / 100) 3.171 -> 3.171 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.60% +0.60%] index_select strided 8 : Elapsed 0.032 ms (3.171 ms / 100) 3.083 -> 3.083 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.62% +0.62%] index_select strided 16 : Elapsed 0.031 ms (3.083 ms / 100) 3.090 -> 3.090 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.61%] index_select random : Elapsed 0.031 ms (3.090 ms / 100) 3.168 -> 3.169 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.79% +0.73%] index_select random_sorted : Elapsed 0.032 ms (3.169 ms / 100) 3.157 -> 3.158 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.73% +0.70%] index_select perm : Elapsed 0.032 ms (3.158 ms / 100) 3.091 -> 3.091 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.68% +0.74%] index_select perm_sorted : Elapsed 0.031 ms (3.092 ms / 100) out_shape = [16, 20, 5, 40] in_shape = [4, 20, 5, 40] idx_dim = 0 B = [16, 20, 5, 40] (stride (200, 3200, 40, 1)) A = [4, 20, 5, 40] (stride (1, 4, 3200, 80)) dim = 0 1.126 -> 1.128 ( +0.18%) [ +0.00% +0.53% +0.62% / +0.53% +0.18% +0.36%] index_add_ linear : Elapsed 0.011 ms (1.126 ms / 100) 1.110 -> 1.111 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.27% +0.09% +0.09%] index_copy_ linear : Elapsed 0.011 ms (1.110 ms / 100) 1.122 -> 1.126 ( +0.36%) [ +0.53% +0.36% +0.00% / +0.36% +1.07% +0.71%] index_add_ reverse : Elapsed 0.011 ms (1.128 ms / 100) 1.107 -> 1.107 ( +0.00%) [ +0.00% +0.18% +0.27% / +0.00% +0.72% +0.45%] index_copy_ reverse : Elapsed 0.011 ms (1.107 ms / 100) 1.132 -> 1.132 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.71% +1.33%] index_add_ spread : Elapsed 0.011 ms (1.132 ms / 100) 1.113 -> 1.116 ( +0.27%) [ +0.09% +0.00% +0.36% / +0.27% +0.90% +0.99%] index_copy_ spread : Elapsed 0.011 ms (1.114 ms / 100) 1.132 -> 1.131 ( -0.09%) [ +0.27% +0.00% +0.09% / -0.09% +0.62% +0.62%] index_add_ strided 3 : Elapsed 0.011 ms (1.135 ms / 100) 1.111 -> 1.119 ( +0.72%) [ +0.45% +0.00% +0.18% / +0.72% +1.08% +1.08%] index_copy_ strided 3 : Elapsed 0.011 ms (1.116 ms / 100) 1.137 -> 1.129 ( -0.70%) [ +0.35% +0.62% +0.00% / -0.18% -0.62% -0.70%] index_add_ strided 5 : Elapsed 0.011 ms (1.141 ms / 100) 1.122 -> 1.110 ( -1.07%) [ +0.00% +0.27% +0.09% / +0.36% -1.07% -0.98%] index_copy_ strided 5 : Elapsed 0.011 ms (1.122 ms / 100) 1.134 -> 1.130 ( -0.35%) [ +0.18% +0.18% +0.00% / -0.26% -0.18% -0.35%] index_add_ strided 7 : Elapsed 0.011 ms (1.136 ms / 100) 1.116 -> 1.111 ( -0.45%) [ +0.00% +0.18% +0.27% / +0.09% -0.45% -0.36%] index_copy_ strided 7 : Elapsed 0.011 ms (1.116 ms / 100) 1.133 -> 1.127 ( -0.53%) [ +0.53% +0.00% +0.26% / +0.44% -0.18% -0.53%] index_add_ perm : Elapsed 0.011 ms (1.139 ms / 100) 1.118 -> 1.099 ( -1.70%) [ +0.00% +0.54% +0.36% / +0.81% -1.16% -1.70%] index_copy_ perm : Elapsed 0.011 ms (1.118 ms / 100) 1.134 -> 1.121 ( -1.15%) [ +0.53% +0.00% +0.62% / +0.53% -0.97% -1.15%] index_add_ perm_sorted : Elapsed 0.011 ms (1.140 ms / 100) 1.118 -> 1.099 ( -1.70%) [ +0.27% +0.00% +0.18% / +0.54% -1.70% -1.34%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.121 ms / 100) 2.007 -> 2.010 ( +0.15%) [ +0.05% +0.00% +0.25% / +0.15% +0.90% +0.95%] index_select const : Elapsed 0.020 ms (2.008 ms / 100) 2.007 -> 2.012 ( +0.25%) [ +0.00% +0.05% +0.20% / +0.25% +1.05% +0.80%] index_select wrap : Elapsed 0.020 ms (2.007 ms / 100) 2.004 -> 2.008 ( +0.20%) [ +0.10% +0.20% +0.00% / +0.20% +1.25% +1.30%] index_select linear : Elapsed 0.020 ms (2.006 ms / 100) 2.002 -> 2.007 ( +0.25%) [ +0.30% +0.35% +0.00% / +0.25% +1.35% +1.40%] index_select reverse : Elapsed 0.020 ms (2.008 ms / 100) 2.010 -> 2.013 ( +0.15%) [ +0.15% +0.20% +0.00% / +0.15% +0.60% +0.60%] index_select skip64 : Elapsed 0.020 ms (2.013 ms / 100) 2.012 -> 2.012 ( +0.00%) [ +0.05% +0.15% +0.00% / +0.00% +0.50% +0.40%] index_select skip256 : Elapsed 0.020 ms (2.013 ms / 100) 2.007 -> 2.010 ( +0.15%) [ +0.00% +0.20% +0.30% / +0.15% +0.80% +0.80%] index_select spread : Elapsed 0.020 ms (2.007 ms / 100) 2.004 -> 2.008 ( +0.20%) [ +0.00% +0.15% +0.05% / +0.20% +1.00% +1.05%] index_select strided 3 : Elapsed 0.020 ms (2.004 ms / 100) 2.006 -> 2.008 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +1.05% +0.90%] index_select random : Elapsed 0.020 ms (2.008 ms / 100) 2.007 -> 2.012 ( +0.25%) [ +0.25% +0.00% +0.00% / +0.25% +0.90% +0.80%] index_select random_sorted : Elapsed 0.020 ms (2.012 ms / 100) B = [16, 20, 5, 40] (stride (5, 80, 1, 1600)) A = [4, 20, 5, 40] (stride (4000, 5, 1, 100)) dim = 0 2.508 -> 2.512 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.32% +0.16%] index_add_ linear : Elapsed 0.025 ms (2.510 ms / 100) 2.468 -> 2.471 ( +0.12%) [ +0.16% +0.08% +0.00% / +0.12% +0.32% +3.97%] index_copy_ linear : Elapsed 0.025 ms (2.472 ms / 100) 2.506 -> 2.508 ( +0.08%) [ +0.00% +0.20% +0.04% / +0.16% +0.36% +0.08%] index_add_ reverse : Elapsed 0.025 ms (2.506 ms / 100) 2.470 -> 2.468 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% -0.04% +0.04%] index_copy_ reverse : Elapsed 0.025 ms (2.472 ms / 100) 2.535 -> 2.533 ( -0.08%) [ +0.12% +0.08% +0.00% / +0.16% +0.20% -0.08%] index_add_ spread : Elapsed 0.025 ms (2.538 ms / 100) 2.517 -> 2.518 ( +0.04%) [ +0.16% +0.00% +0.08% / +0.04% +0.04% +0.04%] index_copy_ spread : Elapsed 0.025 ms (2.521 ms / 100) 2.521 -> 2.522 ( +0.04%) [ +0.12% +0.16% +0.00% / +0.04% +0.36% +0.44%] index_add_ strided 3 : Elapsed 0.025 ms (2.524 ms / 100) 2.535 -> 2.537 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.24% +0.08% +0.16%] index_copy_ strided 3 : Elapsed 0.025 ms (2.537 ms / 100) 2.490 -> 2.492 ( +0.08%) [ +0.36% +0.00% +0.20% / +0.08% +0.36% +0.48%] index_add_ strided 5 : Elapsed 0.025 ms (2.499 ms / 100) 2.476 -> 2.476 ( +0.00%) [ +0.12% +0.00% +0.08% / +0.12% +0.08% +0.00%] index_copy_ strided 5 : Elapsed 0.025 ms (2.479 ms / 100) 2.498 -> 2.502 ( +0.16%) [ +0.20% +0.00% +0.04% / +0.16% +0.36% +0.20%] index_add_ strided 7 : Elapsed 0.025 ms (2.503 ms / 100) 2.494 -> 2.497 ( +0.12%) [ +0.16% +0.00% +0.12% / +0.12% +0.20% +0.16%] index_copy_ strided 7 : Elapsed 0.025 ms (2.498 ms / 100) 2.520 -> 2.518 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.08% -0.04%] index_add_ perm : Elapsed 0.025 ms (2.521 ms / 100) 2.506 -> 2.509 ( +0.12%) [ +0.16% +0.24% +0.00% / +0.20% +0.12% +0.44%] index_copy_ perm : Elapsed 0.025 ms (2.510 ms / 100) 2.519 -> 2.521 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.28% +0.28%] index_add_ perm_sorted : Elapsed 0.025 ms (2.522 ms / 100) 2.508 -> 2.508 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.16% +0.12% +0.00%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.511 ms / 100) 5.583 -> 5.582 ( -0.02%) [ +0.14% +0.07% +0.00% / -0.02% +0.45% +0.38%] index_select const : Elapsed 0.056 ms (5.591 ms / 100) 5.615 -> 5.615 ( +0.00%) [ +0.04% +0.02% +0.00% / +0.00% +0.23% +0.16%] index_select wrap : Elapsed 0.056 ms (5.617 ms / 100) 5.615 -> 5.615 ( +0.00%) [ +0.12% +0.09% +0.00% / +0.00% +0.36% +0.28%] index_select linear : Elapsed 0.056 ms (5.622 ms / 100) 5.570 -> 5.569 ( -0.02%) [ +0.38% +0.20% +0.00% / -0.02% +0.56% +0.31%] index_select reverse : Elapsed 0.056 ms (5.591 ms / 100) 5.603 -> 5.600 ( -0.05%) [ +0.00% +0.07% +0.04% / -0.05% +0.14% +0.30%] index_select skip64 : Elapsed 0.056 ms (5.603 ms / 100) 5.595 -> 5.604 ( +0.16%) [ +0.04% +0.00% +0.04% / +0.16% +0.20% +0.32%] index_select skip256 : Elapsed 0.056 ms (5.597 ms / 100) 5.596 -> 5.599 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.32% +0.29%] index_select spread : Elapsed 0.056 ms (5.599 ms / 100) 5.599 -> 5.603 ( +0.07%) [ +0.11% +0.00% +0.00% / +0.07% +0.25% +0.32%] index_select strided 3 : Elapsed 0.056 ms (5.605 ms / 100) 5.594 -> 5.587 ( -0.13%) [ +0.00% +0.02% +0.05% / -0.13% +0.34% +0.32%] index_select random : Elapsed 0.056 ms (5.594 ms / 100) 5.583 -> 5.589 ( +0.11%) [ +0.02% +0.00% +0.07% / +0.11% +0.16% +0.23%] index_select random_sorted : Elapsed 0.056 ms (5.584 ms / 100) out_shape = [4, 16, 5, 40] in_shape = [4, 20, 5, 40] idx_dim = 1 B = [4, 16, 5, 40] (stride (200, 800, 40, 1)) A = [4, 20, 5, 40] (stride (100, 5, 1, 400)) dim = 1 3.916 -> 3.920 ( +0.10%) [ +0.08% +0.20% +0.00% / +0.10% +0.87% +0.79%] index_select const : Elapsed 0.039 ms (3.919 ms / 100) 3.939 -> 3.938 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.89% +3.10%] index_select wrap : Elapsed 0.039 ms (3.940 ms / 100) 3.921 -> 3.923 ( +0.05%) [ +0.00% +0.15% +0.13% / +0.05% +0.84% +0.64%] index_select linear : Elapsed 0.039 ms (3.921 ms / 100) 3.928 -> 3.928 ( +0.00%) [ +0.10% +0.10% +0.00% / +0.00% +0.74% +0.79%] index_select reverse : Elapsed 0.039 ms (3.932 ms / 100) 3.921 -> 3.923 ( +0.05%) [ +0.03% +0.00% +0.05% / +0.05% +0.82% +0.71%] index_select skip64 : Elapsed 0.039 ms (3.922 ms / 100) 3.917 -> 3.923 ( +0.15%) [ +0.18% +0.00% +0.08% / +0.15% +0.66% +0.66%] index_select skip256 : Elapsed 0.039 ms (3.924 ms / 100) 3.946 -> 3.946 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.66% +0.66%] index_select spread : Elapsed 0.039 ms (3.947 ms / 100) 3.923 -> 3.923 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.48% +0.56%] index_select strided 3 : Elapsed 0.039 ms (3.924 ms / 100) 3.928 -> 3.922 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.56% +0.53%] index_select strided 5 : Elapsed 0.039 ms (3.928 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.15% +0.00% +0.10% / +0.05% +0.43% +0.46%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.918 -> 3.925 ( +0.18%) [ +0.18% +0.08% +0.00% / +0.18% +0.74% +0.74%] index_select strided 8 : Elapsed 0.039 ms (3.925 ms / 100) 3.937 -> 3.936 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.61% +0.56%] index_select strided 16 : Elapsed 0.039 ms (3.938 ms / 100) 3.945 -> 3.945 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.58% +0.58%] index_select random : Elapsed 0.039 ms (3.946 ms / 100) 3.928 -> 3.929 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.53% +0.61%] index_select random_sorted : Elapsed 0.039 ms (3.929 ms / 100) 3.941 -> 3.940 ( -0.03%) [ +0.05% +0.00% +0.08% / -0.03% +0.56% +0.58%] index_select perm : Elapsed 0.039 ms (3.943 ms / 100) 3.933 -> 3.933 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.56% +0.56%] index_select perm_sorted : Elapsed 0.039 ms (3.936 ms / 100) B = [4, 16, 5, 40] (stride (200, 800, 1, 5)) A = [4, 20, 5, 40] (stride (40, 800, 160, 1)) dim = 1 3.817 -> 3.830 ( +0.34%) [ +0.00% +0.08% +0.03% / +0.42% +0.37% +0.34%] index_select const : Elapsed 0.038 ms (3.817 ms / 100) 3.820 -> 3.820 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.52% +0.52%] index_select wrap : Elapsed 0.038 ms (3.822 ms / 100) 3.815 -> 3.823 ( +0.21%) [ +0.10% +0.10% +0.00% / +0.21% +0.66% +0.60%] index_select linear : Elapsed 0.038 ms (3.819 ms / 100) 3.814 -> 3.815 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.55% +0.58%] index_select reverse : Elapsed 0.038 ms (3.816 ms / 100) 3.812 -> 3.815 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.52% +0.52%] index_select skip64 : Elapsed 0.038 ms (3.813 ms / 100) 3.826 -> 3.824 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.39% +0.42%] index_select skip256 : Elapsed 0.038 ms (3.826 ms / 100) 3.816 -> 3.817 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.55% +0.60%] index_select spread : Elapsed 0.038 ms (3.818 ms / 100) 3.822 -> 3.823 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.60% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.824 ms / 100) 3.814 -> 3.815 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.60% +0.58%] index_select strided 5 : Elapsed 0.038 ms (3.814 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.71% +0.73%] index_select strided 7 : Elapsed 0.038 ms (3.815 ms / 100) 3.817 -> 3.819 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.79% +0.73%] index_select strided 8 : Elapsed 0.038 ms (3.820 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.60% +0.66%] index_select strided 16 : Elapsed 0.038 ms (3.814 ms / 100) 3.815 -> 3.814 ( -0.03%) [ +0.00% +0.00% +0.05% / -0.03% +0.66% +0.66%] index_select random : Elapsed 0.038 ms (3.815 ms / 100) 3.815 -> 3.816 ( +0.03%) [ +0.00% +0.10% +0.00% / +0.03% +0.76% +0.87%] index_select random_sorted : Elapsed 0.038 ms (3.815 ms / 100) 3.814 -> 3.816 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.66% +0.68%] index_select perm : Elapsed 0.038 ms (3.814 ms / 100) 3.816 -> 3.821 ( +0.13%) [ +0.03% +0.13% +0.00% / +0.13% +0.76% +0.76%] index_select perm_sorted : Elapsed 0.038 ms (3.817 ms / 100) B = [4, 16, 5, 40] (stride (1, 800, 160, 4)) A = [4, 20, 5, 40] (stride (200, 800, 1, 5)) dim = 1 3.583 -> 3.582 ( -0.03%) [ +0.08% +0.06% +0.00% / -0.03% +0.81% +0.92%] index_select const : Elapsed 0.036 ms (3.586 ms / 100) 3.604 -> 3.604 ( +0.00%) [ +0.00% +0.19% +0.08% / +0.00% +0.80% +0.64%] index_select wrap : Elapsed 0.036 ms (3.604 ms / 100) 3.595 -> 3.592 ( -0.08%) [ +0.08% +0.00% +0.03% / -0.08% +0.67% +0.72%] index_select linear : Elapsed 0.036 ms (3.598 ms / 100) 3.615 -> 3.618 ( +0.08%) [ +0.25% +0.11% +0.00% / +0.08% +0.94% +0.86%] index_select reverse : Elapsed 0.036 ms (3.624 ms / 100) 3.598 -> 3.598 ( +0.00%) [ +0.14% +0.11% +0.00% / +0.00% +0.69% +0.67%] index_select skip64 : Elapsed 0.036 ms (3.603 ms / 100) 3.588 -> 3.589 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.75% +0.81%] index_select skip256 : Elapsed 0.036 ms (3.591 ms / 100) 3.596 -> 3.597 ( +0.03%) [ +0.06% +0.11% +0.00% / +0.03% +0.61% +0.64%] index_select spread : Elapsed 0.036 ms (3.598 ms / 100) 3.594 -> 3.593 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.67% +0.67%] index_select strided 3 : Elapsed 0.036 ms (3.595 ms / 100) 3.584 -> 3.588 ( +0.11%) [ +0.17% +0.17% +0.00% / +0.11% +0.81% +0.70%] index_select strided 5 : Elapsed 0.036 ms (3.590 ms / 100) 3.606 -> 3.607 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.78% +0.67%] index_select strided 7 : Elapsed 0.036 ms (3.607 ms / 100) 3.589 -> 3.589 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.70% +0.72%] index_select strided 8 : Elapsed 0.036 ms (3.591 ms / 100) 3.591 -> 3.592 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.70% +0.67%] index_select strided 16 : Elapsed 0.036 ms (3.592 ms / 100) 3.599 -> 3.604 ( +0.14%) [ +0.14% +0.06% +0.00% / +0.14% +0.64% +3.75%] index_select random : Elapsed 0.036 ms (3.604 ms / 100) 3.602 -> 3.601 ( -0.03%) [ +0.14% +0.03% +0.00% / -0.03% +0.50% +0.44%] index_select random_sorted : Elapsed 0.036 ms (3.607 ms / 100) 3.600 -> 3.599 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.56% +0.53%] index_select perm : Elapsed 0.036 ms (3.601 ms / 100) 3.596 -> 3.595 ( -0.03%) [ +0.00% +0.11% +0.14% / -0.03% +0.64% +0.67%] index_select perm_sorted : Elapsed 0.036 ms (3.596 ms / 100) B = [4, 16, 5, 40] (stride (640, 40, 2560, 1)) A = [4, 20, 5, 40] (stride (4000, 1, 20, 100)) dim = 1 4.265 -> 4.272 ( +0.16%) [ +0.26% +0.00% +0.09% / +0.16% +0.54% +0.61%] index_select const : Elapsed 0.043 ms (4.276 ms / 100) 4.265 -> 4.278 ( +0.30%) [ +0.19% +0.05% +0.00% / +0.30% +0.82% +0.52%] index_select wrap : Elapsed 0.043 ms (4.273 ms / 100) 4.272 -> 4.264 ( -0.19%) [ +0.07% +0.00% +0.02% / -0.19% +0.44% +0.40%] index_select linear : Elapsed 0.043 ms (4.275 ms / 100) 4.268 -> 4.268 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.49% +0.59%] index_select reverse : Elapsed 0.043 ms (4.271 ms / 100) 4.272 -> 4.277 ( +0.12%) [ +0.16% +0.26% +0.00% / +0.12% +0.54% +0.44%] index_select skip64 : Elapsed 0.043 ms (4.279 ms / 100) 4.263 -> 4.264 ( +0.02%) [ +0.07% +0.09% +0.00% / +0.02% +0.47% +0.54%] index_select skip256 : Elapsed 0.043 ms (4.266 ms / 100) 4.265 -> 4.271 ( +0.14%) [ +0.21% +0.19% +0.00% / +0.14% +0.52% +0.75%] index_select spread : Elapsed 0.043 ms (4.274 ms / 100) 4.269 -> 4.277 ( +0.19%) [ +0.00% +0.19% +0.12% / +0.19% +0.70% +2.01%] index_select strided 3 : Elapsed 0.043 ms (4.269 ms / 100) 4.267 -> 4.269 ( +0.05%) [ +0.19% +0.12% +0.00% / +0.05% +0.63% +0.61%] index_select strided 5 : Elapsed 0.043 ms (4.275 ms / 100) 4.265 -> 4.264 ( -0.02%) [ +0.00% +0.09% +0.05% / -0.02% +0.45% +0.63%] index_select strided 7 : Elapsed 0.043 ms (4.265 ms / 100) 4.272 -> 4.274 ( +0.05%) [ +0.09% +0.02% +0.00% / +0.05% +0.70% +0.63%] index_select strided 8 : Elapsed 0.043 ms (4.276 ms / 100) 4.268 -> 4.270 ( +0.05%) [ +0.12% +0.16% +0.00% / +0.05% +0.80% +0.84%] index_select strided 16 : Elapsed 0.043 ms (4.273 ms / 100) 4.278 -> 4.281 ( +0.07%) [ +0.02% +0.02% +0.00% / +0.07% +0.61% +0.65%] index_select random : Elapsed 0.043 ms (4.279 ms / 100) 4.268 -> 4.272 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.70% +0.73%] index_select random_sorted : Elapsed 0.043 ms (4.268 ms / 100) 4.264 -> 4.267 ( +0.07%) [ +0.05% +0.07% +0.00% / +0.07% +0.70% +0.82%] index_select perm : Elapsed 0.043 ms (4.266 ms / 100) 4.268 -> 4.274 ( +0.14%) [ +0.12% +0.09% +0.00% / +0.14% +0.56% +0.63%] index_select perm_sorted : Elapsed 0.043 ms (4.273 ms / 100) B = [4, 16, 5, 40] (stride (16, 1, 2560, 64)) A = [4, 20, 5, 40] (stride (1, 160, 3200, 4)) dim = 1 3.552 -> 3.550 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.90% +0.87%] index_select const : Elapsed 0.036 ms (3.554 ms / 100) 3.533 -> 3.536 ( +0.08%) [ +0.08% +0.06% +0.00% / +0.08% +0.76% +0.65%] index_select wrap : Elapsed 0.035 ms (3.536 ms / 100) 3.533 -> 3.534 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.54% +0.57%] index_select linear : Elapsed 0.035 ms (3.533 ms / 100) 3.540 -> 3.539 ( -0.03%) [ +0.11% +0.00% +0.06% / -0.03% +0.88% +0.76%] index_select reverse : Elapsed 0.035 ms (3.544 ms / 100) 3.564 -> 3.569 ( +0.14%) [ +0.06% +0.17% +0.00% / +0.14% +0.90% +0.70%] index_select skip64 : Elapsed 0.036 ms (3.566 ms / 100) 3.542 -> 3.544 ( +0.06%) [ +0.08% +0.06% +0.00% / +0.06% +0.73% +0.68%] index_select skip256 : Elapsed 0.035 ms (3.545 ms / 100) 3.533 -> 3.533 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.68% +0.65%] index_select spread : Elapsed 0.035 ms (3.535 ms / 100) 3.537 -> 3.532 ( -0.14%) [ +0.06% +0.00% +0.06% / -0.14% +0.76% +0.71%] index_select strided 3 : Elapsed 0.035 ms (3.539 ms / 100) 3.538 -> 3.541 ( +0.08%) [ +0.14% +0.14% +0.00% / +0.08% +0.88% +0.76%] index_select strided 5 : Elapsed 0.035 ms (3.543 ms / 100) 3.536 -> 3.535 ( -0.03%) [ +0.00% +0.11% +0.20% / -0.03% +0.82% +0.76%] index_select strided 7 : Elapsed 0.035 ms (3.536 ms / 100) 3.551 -> 3.551 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.65% +0.65%] index_select strided 8 : Elapsed 0.036 ms (3.552 ms / 100) 3.536 -> 3.536 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.76% +0.74%] index_select strided 16 : Elapsed 0.035 ms (3.537 ms / 100) 3.546 -> 3.546 ( +0.00%) [ +0.14% +0.20% +0.00% / +0.00% +0.54% +0.59%] index_select random : Elapsed 0.036 ms (3.551 ms / 100) 3.539 -> 3.534 ( -0.14%) [ +0.03% +0.00% +0.06% / -0.14% +0.57% +0.42%] index_select random_sorted : Elapsed 0.035 ms (3.540 ms / 100) 3.532 -> 3.535 ( +0.08%) [ +0.03% +0.00% +0.00% / +0.08% +0.48% +0.51%] index_select perm : Elapsed 0.035 ms (3.533 ms / 100) 3.551 -> 3.553 ( +0.06%) [ +0.00% +0.03% +0.03% / +0.06% +0.39% +0.28%] index_select perm_sorted : Elapsed 0.036 ms (3.551 ms / 100) B = [4, 16, 5, 40] (stride (16, 1, 2560, 64)) A = [4, 20, 5, 40] (stride (20, 1, 80, 400)) dim = 1 3.920 -> 3.921 ( +0.03%) [ +0.08% +0.15% +0.00% / +0.03% +0.36% +0.38%] index_select const : Elapsed 0.039 ms (3.923 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.48% +0.53%] index_select wrap : Elapsed 0.039 ms (3.932 ms / 100) 3.926 -> 3.924 ( -0.05%) [ +0.00% +0.03% +0.03% / -0.05% +0.53% +0.56%] index_select linear : Elapsed 0.039 ms (3.926 ms / 100) 3.941 -> 3.947 ( +0.15%) [ +0.05% +0.08% +0.00% / +0.15% +0.61% +0.61%] index_select reverse : Elapsed 0.039 ms (3.943 ms / 100) 3.932 -> 3.935 ( +0.08%) [ +0.10% +0.10% +0.00% / +0.08% +0.66% +0.61%] index_select skip64 : Elapsed 0.039 ms (3.936 ms / 100) 3.925 -> 3.927 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.59% +0.54%] index_select skip256 : Elapsed 0.039 ms (3.925 ms / 100) 3.938 -> 3.940 ( +0.05%) [ +0.03% +0.00% +0.10% / +0.05% +0.58% +0.66%] index_select spread : Elapsed 0.039 ms (3.939 ms / 100) 3.943 -> 3.945 ( +0.05%) [ +0.20% +0.18% +0.00% / +0.05% +0.58% +0.61%] index_select strided 3 : Elapsed 0.040 ms (3.951 ms / 100) 3.924 -> 3.928 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.66% +0.76%] index_select strided 5 : Elapsed 0.039 ms (3.928 ms / 100) 3.929 -> 3.931 ( +0.05%) [ +0.10% +0.00% +0.08% / +0.05% +0.69% +0.64%] index_select strided 7 : Elapsed 0.039 ms (3.933 ms / 100) 3.941 -> 3.948 ( +0.18%) [ +0.03% +0.05% +0.00% / +0.18% +0.81% +0.84%] index_select strided 8 : Elapsed 0.039 ms (3.942 ms / 100) 3.938 -> 3.938 ( +0.00%) [ +0.08% +0.05% +0.00% / +0.00% +0.76% +0.71%] index_select strided 16 : Elapsed 0.039 ms (3.941 ms / 100) 3.935 -> 3.933 ( -0.05%) [ +0.00% +0.03% +0.03% / -0.05% +0.66% +0.61%] index_select random : Elapsed 0.039 ms (3.935 ms / 100) 3.942 -> 3.943 ( +0.03%) [ +0.03% +0.00% +0.13% / +0.03% +0.46% +0.28%] index_select random_sorted : Elapsed 0.039 ms (3.943 ms / 100) 3.933 -> 3.934 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.53% +0.58%] index_select perm : Elapsed 0.039 ms (3.934 ms / 100) 3.940 -> 3.940 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.56% +0.63%] index_select perm_sorted : Elapsed 0.039 ms (3.942 ms / 100) B = [4, 16, 5, 40] (stride (1, 4, 2560, 64)) A = [4, 20, 5, 40] (stride (4000, 40, 800, 1)) dim = 1 3.509 -> 3.509 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.85% +0.83%] index_select const : Elapsed 0.035 ms (3.511 ms / 100) 3.509 -> 3.512 ( +0.09%) [ +0.03% +0.00% +0.03% / +0.09% +0.80% +0.74%] index_select wrap : Elapsed 0.035 ms (3.510 ms / 100) 3.505 -> 3.507 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.83% +0.86%] index_select linear : Elapsed 0.035 ms (3.505 ms / 100) 3.520 -> 3.520 ( +0.00%) [ +0.09% +0.06% +0.00% / +0.00% +0.74% +0.71%] index_select reverse : Elapsed 0.035 ms (3.523 ms / 100) 3.517 -> 3.522 ( +0.14%) [ +0.00% +0.14% +0.09% / +0.14% +0.63% +0.71%] index_select skip64 : Elapsed 0.035 ms (3.517 ms / 100) 3.513 -> 3.515 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.68% +0.48%] index_select skip256 : Elapsed 0.035 ms (3.515 ms / 100) 3.514 -> 3.516 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.63% +0.65%] index_select spread : Elapsed 0.035 ms (3.515 ms / 100) 3.512 -> 3.514 ( +0.06%) [ +0.09% +0.03% +0.00% / +0.06% +0.60% +0.65%] index_select strided 3 : Elapsed 0.035 ms (3.515 ms / 100) 3.504 -> 3.506 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.71% +0.86%] index_select strided 5 : Elapsed 0.035 ms (3.506 ms / 100) 3.523 -> 3.528 ( +0.14%) [ +0.06% +0.14% +0.00% / +0.14% +0.85% +0.74%] index_select strided 7 : Elapsed 0.035 ms (3.525 ms / 100) 3.513 -> 3.517 ( +0.11%) [ +0.09% +0.00% +0.09% / +0.11% +0.68% +0.71%] index_select strided 8 : Elapsed 0.035 ms (3.516 ms / 100) 3.510 -> 3.517 ( +0.20%) [ +0.09% +0.14% +0.00% / +0.20% +0.74% +0.74%] index_select strided 16 : Elapsed 0.035 ms (3.513 ms / 100) 3.520 -> 3.519 ( -0.03%) [ +0.00% +0.03% +0.06% / -0.03% +0.45% +0.40%] index_select random : Elapsed 0.035 ms (3.520 ms / 100) 3.507 -> 3.506 ( -0.03%) [ +0.00% +0.43% +0.00% / -0.03% +0.60% +0.63%] index_select random_sorted : Elapsed 0.035 ms (3.507 ms / 100) 3.516 -> 3.519 ( +0.09%) [ +0.00% +0.03% +0.00% / +0.09% +0.57% +0.57%] index_select perm : Elapsed 0.035 ms (3.516 ms / 100) 3.517 -> 3.516 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.48% +0.54%] index_select perm_sorted : Elapsed 0.035 ms (3.518 ms / 100) B = [4, 16, 5, 40] (stride (80, 5, 1, 320)) A = [4, 20, 5, 40] (stride (800, 1, 3200, 20)) dim = 1 4.135 -> 4.139 ( +0.10%) [ +0.07% +0.10% +0.00% / +0.10% +0.56% +0.56%] index_select const : Elapsed 0.041 ms (4.138 ms / 100) 4.131 -> 4.137 ( +0.15%) [ +0.00% +0.10% +0.05% / +0.15% +0.70% +0.58%] index_select wrap : Elapsed 0.041 ms (4.131 ms / 100) 4.137 -> 4.142 ( +0.12%) [ +0.05% +0.02% +0.00% / +0.12% +0.48% +0.48%] index_select linear : Elapsed 0.041 ms (4.139 ms / 100) 4.129 -> 4.136 ( +0.17%) [ +0.00% +0.12% +0.10% / +0.17% +0.65% +0.46%] index_select reverse : Elapsed 0.041 ms (4.129 ms / 100) 4.132 -> 4.127 ( -0.12%) [ +0.02% +0.02% +0.00% / -0.12% +0.56% +0.56%] index_select skip64 : Elapsed 0.041 ms (4.133 ms / 100) 4.138 -> 4.140 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.41% +0.36%] index_select skip256 : Elapsed 0.041 ms (4.140 ms / 100) 4.137 -> 4.137 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.48% +0.48%] index_select spread : Elapsed 0.041 ms (4.139 ms / 100) 4.130 -> 4.137 ( +0.17%) [ +0.17% +0.02% +0.00% / +0.17% +0.48% +0.46%] index_select strided 3 : Elapsed 0.041 ms (4.137 ms / 100) 4.130 -> 4.131 ( +0.02%) [ +0.12% +0.00% +0.17% / +0.02% +0.56% +0.46%] index_select strided 5 : Elapsed 0.041 ms (4.135 ms / 100) 4.131 -> 4.132 ( +0.02%) [ +0.07% +0.00% +0.10% / +0.02% +0.68% +0.61%] index_select strided 7 : Elapsed 0.041 ms (4.134 ms / 100) 4.138 -> 4.141 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.48% +0.48%] index_select strided 8 : Elapsed 0.041 ms (4.141 ms / 100) 4.128 -> 4.137 ( +0.22%) [ +0.05% +0.12% +0.00% / +0.22% +0.56% +0.56%] index_select strided 16 : Elapsed 0.041 ms (4.130 ms / 100) 4.128 -> 4.126 ( -0.05%) [ +0.02% +0.00% +0.10% / -0.05% +0.68% +0.65%] index_select random : Elapsed 0.041 ms (4.129 ms / 100) 4.134 -> 4.136 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.65% +0.65%] index_select random_sorted : Elapsed 0.041 ms (4.134 ms / 100) 4.130 -> 4.132 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.82% +0.75%] index_select perm : Elapsed 0.041 ms (4.133 ms / 100) 4.129 -> 4.133 ( +0.10%) [ +0.07% +0.00% +0.02% / +0.10% +0.73% +0.51%] index_select perm_sorted : Elapsed 0.041 ms (4.132 ms / 100) B = [4, 16, 5, 40] (stride (80, 1, 16, 320)) A = [4, 20, 5, 40] (stride (4000, 200, 1, 5)) dim = 1 3.907 -> 3.918 ( +0.28%) [ +0.44% +0.15% +0.00% / +0.28% +0.56% +2.46%] index_select const : Elapsed 0.039 ms (3.924 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.66% +0.71%] index_select wrap : Elapsed 0.039 ms (3.923 ms / 100) 3.920 -> 3.924 ( +0.10%) [ +0.13% +0.08% +0.00% / +0.10% +0.69% +0.69%] index_select linear : Elapsed 0.039 ms (3.925 ms / 100) 3.936 -> 3.934 ( -0.05%) [ +0.03% +0.00% +0.08% / -0.05% +0.86% +0.76%] index_select reverse : Elapsed 0.039 ms (3.937 ms / 100) 3.911 -> 3.919 ( +0.20%) [ +0.33% +0.31% +0.00% / +0.20% +0.66% +0.54%] index_select skip64 : Elapsed 0.039 ms (3.924 ms / 100) 3.900 -> 3.902 ( +0.05%) [ +0.13% +0.00% +0.08% / +0.05% +0.56% +0.64%] index_select skip256 : Elapsed 0.039 ms (3.905 ms / 100) 3.928 -> 3.931 ( +0.08%) [ +0.10% +0.00% +0.00% / +0.08% +0.69% +0.64%] index_select spread : Elapsed 0.039 ms (3.932 ms / 100) 3.922 -> 3.929 ( +0.18%) [ +0.03% +0.00% +0.05% / +0.18% +0.69% +0.76%] index_select strided 3 : Elapsed 0.039 ms (3.923 ms / 100) 3.915 -> 3.920 ( +0.13%) [ +0.15% +0.00% +0.26% / +0.13% +0.59% +0.69%] index_select strided 5 : Elapsed 0.039 ms (3.921 ms / 100) 3.927 -> 3.930 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.79% +0.89%] index_select strided 7 : Elapsed 0.039 ms (3.936 ms / 100) 3.922 -> 3.926 ( +0.10%) [ +0.13% +0.00% +0.10% / +0.10% +0.66% +0.66%] index_select strided 8 : Elapsed 0.039 ms (3.927 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.69% +0.66%] index_select strided 16 : Elapsed 0.039 ms (3.925 ms / 100) 3.927 -> 3.932 ( +0.13%) [ +0.00% +0.10% +0.08% / +0.13% +0.51% +0.61%] index_select random : Elapsed 0.039 ms (3.927 ms / 100) 3.920 -> 3.921 ( +0.03%) [ +0.13% +0.00% +0.13% / +0.03% +0.64% +0.64%] index_select random_sorted : Elapsed 0.039 ms (3.925 ms / 100) 3.933 -> 3.936 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.33% +0.31%] index_select perm : Elapsed 0.039 ms (3.933 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.00% +0.00% +0.08% / +0.05% +0.46% +0.48%] index_select perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) B = [4, 16, 5, 40] (stride (1, 20, 4, 320)) A = [4, 20, 5, 40] (stride (4000, 40, 800, 1)) dim = 1 3.836 -> 3.842 ( +0.16%) [ +0.00% +0.03% +0.03% / +0.16% +0.47% +0.50%] index_select const : Elapsed 0.038 ms (3.836 ms / 100) 3.822 -> 3.825 ( +0.08%) [ +0.10% +0.00% +0.00% / +0.08% +0.55% +0.55%] index_select wrap : Elapsed 0.038 ms (3.826 ms / 100) 3.823 -> 3.823 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.63% +0.52%] index_select linear : Elapsed 0.038 ms (3.823 ms / 100) 3.815 -> 3.818 ( +0.08%) [ +0.00% +0.05% +0.00% / +0.08% +0.68% +0.68%] index_select reverse : Elapsed 0.038 ms (3.815 ms / 100) 3.846 -> 3.852 ( +0.16%) [ +0.00% +0.18% +0.10% / +0.16% +0.73% +0.73%] index_select skip64 : Elapsed 0.038 ms (3.846 ms / 100) 3.829 -> 3.830 ( +0.03%) [ +0.03% +0.13% +0.00% / +0.03% +0.60% +0.47%] index_select skip256 : Elapsed 0.038 ms (3.830 ms / 100) 3.824 -> 3.823 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.60% +0.55%] index_select spread : Elapsed 0.038 ms (3.825 ms / 100) 3.828 -> 3.829 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.57% +0.55%] index_select strided 3 : Elapsed 0.038 ms (3.829 ms / 100) 3.819 -> 3.821 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.52% +0.50%] index_select strided 5 : Elapsed 0.038 ms (3.822 ms / 100) 3.820 -> 3.819 ( -0.03%) [ +0.05% +0.00% +0.00% / -0.03% +0.79% +0.76%] index_select strided 7 : Elapsed 0.038 ms (3.822 ms / 100) 3.815 -> 3.816 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.68% +0.66%] index_select strided 8 : Elapsed 0.038 ms (3.815 ms / 100) 3.808 -> 3.813 ( +0.13%) [ +0.21% +0.05% +0.00% / +0.13% +0.74% +0.71%] index_select strided 16 : Elapsed 0.038 ms (3.816 ms / 100) 3.815 -> 3.816 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.71% +0.63%] index_select random : Elapsed 0.038 ms (3.817 ms / 100) 3.818 -> 3.817 ( -0.03%) [ +0.05% +0.16% +0.00% / -0.03% +0.76% +0.79%] index_select random_sorted : Elapsed 0.038 ms (3.820 ms / 100) 3.816 -> 3.817 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.76% +0.76%] index_select perm : Elapsed 0.038 ms (3.818 ms / 100) 3.823 -> 3.828 ( +0.13%) [ +0.00% +0.05% +0.13% / +0.13% +0.68% +0.73%] index_select perm_sorted : Elapsed 0.038 ms (3.823 ms / 100) B = [4, 16, 5, 40] (stride (1, 20, 4, 320)) A = [4, 20, 5, 40] (stride (1, 4, 80, 400)) dim = 1 4.274 -> 4.273 ( -0.02%) [ +0.05% +0.00% +0.05% / -0.02% +0.68% +0.75%] index_select const : Elapsed 0.043 ms (4.276 ms / 100) 4.284 -> 4.282 ( -0.05%) [ +0.00% +0.05% +0.09% / -0.05% +0.70% +0.70%] index_select wrap : Elapsed 0.043 ms (4.284 ms / 100) 4.288 -> 4.287 ( -0.02%) [ +0.09% +0.02% +0.00% / -0.02% +0.70% +0.72%] index_select linear : Elapsed 0.043 ms (4.292 ms / 100) 4.286 -> 4.290 ( +0.09%) [ +0.14% +0.05% +0.00% / +0.09% +0.65% +0.77%] index_select reverse : Elapsed 0.043 ms (4.292 ms / 100) 4.287 -> 4.294 ( +0.16%) [ +0.00% +0.02% +0.05% / +0.16% +0.61% +0.58%] index_select skip64 : Elapsed 0.043 ms (4.287 ms / 100) 4.285 -> 4.295 ( +0.23%) [ +0.12% +0.00% +0.09% / +0.23% +0.61% +0.54%] index_select skip256 : Elapsed 0.043 ms (4.290 ms / 100) 4.280 -> 4.294 ( +0.33%) [ +0.33% +0.00% +0.16% / +0.33% +0.86% +0.82%] index_select spread : Elapsed 0.043 ms (4.294 ms / 100) 4.274 -> 4.285 ( +0.26%) [ +0.12% +0.09% +0.00% / +0.26% +0.96% +0.77%] index_select strided 3 : Elapsed 0.043 ms (4.279 ms / 100) 4.295 -> 4.296 ( +0.02%) [ +0.02% +0.00% +0.19% / +0.02% +0.86% +0.47%] index_select strided 5 : Elapsed 0.043 ms (4.296 ms / 100) 4.282 -> 4.285 ( +0.07%) [ +0.16% +0.02% +0.00% / +0.07% +0.65% +0.72%] index_select strided 7 : Elapsed 0.043 ms (4.289 ms / 100) 4.279 -> 4.281 ( +0.05%) [ +0.07% +0.02% +0.00% / +0.05% +0.68% +0.56%] index_select strided 8 : Elapsed 0.043 ms (4.282 ms / 100) 4.297 -> 4.305 ( +0.19%) [ +0.05% +0.00% +0.02% / +0.19% +0.72% +0.70%] index_select strided 16 : Elapsed 0.043 ms (4.299 ms / 100) 4.275 -> 4.286 ( +0.26%) [ +0.00% +0.07% +0.12% / +0.26% +0.44% +0.44%] index_select random : Elapsed 0.043 ms (4.275 ms / 100) 4.280 -> 4.290 ( +0.23%) [ +0.19% +0.16% +0.00% / +0.23% +0.77% +0.82%] index_select random_sorted : Elapsed 0.043 ms (4.288 ms / 100) 4.287 -> 4.291 ( +0.09%) [ +0.05% +0.02% +0.00% / +0.09% +0.61% +0.51%] index_select perm : Elapsed 0.043 ms (4.289 ms / 100) 4.281 -> 4.288 ( +0.16%) [ +0.07% +0.05% +0.00% / +0.16% +0.56% +0.72%] index_select perm_sorted : Elapsed 0.043 ms (4.284 ms / 100) out_shape = [4, 20, 16, 40] in_shape = [4, 20, 5, 40] idx_dim = 2 B = [4, 20, 16, 40] (stride (12800, 640, 1, 16)) A = [4, 20, 5, 40] (stride (800, 40, 3200, 1)) dim = 2 0.931 -> 0.904 ( -2.90%) [ +0.43% +0.00% +0.43% / +0.75% -2.90% -2.69%] index_add_ linear : Elapsed 0.009 ms (0.935 ms / 100) 0.925 -> 0.884 ( -4.43%) [ +0.00% +0.32% +0.11% / +0.32% -4.00% -4.43%] index_copy_ linear : Elapsed 0.009 ms (0.925 ms / 100) 0.935 -> 0.905 ( -3.21%) [ +0.11% +0.00% +0.11% / +0.11% -2.78% -3.21%] index_add_ reverse : Elapsed 0.009 ms (0.936 ms / 100) 0.923 -> 0.885 ( -4.12%) [ +0.00% +0.11% +0.11% / +0.22% -3.68% -4.12%] index_copy_ reverse : Elapsed 0.009 ms (0.923 ms / 100) 0.958 -> 0.929 ( -3.03%) [ +0.10% +0.00% +0.21% / +0.31% -3.03% -2.82%] index_add_ spread : Elapsed 0.010 ms (0.959 ms / 100) 0.946 -> 0.913 ( -3.49%) [ +0.21% +0.00% +0.32% / +0.42% -3.49% -3.49%] index_copy_ spread : Elapsed 0.009 ms (0.948 ms / 100) 0.959 -> 0.930 ( -3.02%) [ +0.21% +0.21% +0.00% / +0.52% -3.02% -3.02%] index_add_ strided 3 : Elapsed 0.010 ms (0.961 ms / 100) 0.948 -> 0.911 ( -3.90%) [ +0.42% +0.53% +0.00% / +0.21% -3.90% -3.80%] index_copy_ strided 3 : Elapsed 0.010 ms (0.952 ms / 100) 0.958 -> 0.929 ( -3.03%) [ +0.21% +0.00% +0.10% / -0.21% -2.82% -3.03%] index_add_ strided 5 : Elapsed 0.010 ms (0.960 ms / 100) 0.946 -> 0.912 ( -3.59%) [ +0.32% +0.00% +0.11% / +0.11% -3.38% -3.59%] index_copy_ strided 5 : Elapsed 0.009 ms (0.949 ms / 100) 0.956 -> 0.927 ( -3.03%) [ +0.21% +0.00% +0.00% / +0.42% -3.03% -2.41%] index_add_ strided 7 : Elapsed 0.010 ms (0.958 ms / 100) 0.947 -> 0.912 ( -3.70%) [ +0.11% +0.00% +0.00% / -0.11% -3.48% -3.70%] index_copy_ strided 7 : Elapsed 0.009 ms (0.948 ms / 100) 0.956 -> 0.928 ( -2.93%) [ +0.31% +0.10% +0.00% / +0.10% -2.93% -2.72%] index_add_ perm : Elapsed 0.010 ms (0.959 ms / 100) 0.946 -> 0.908 ( -4.02%) [ +0.63% +0.00% +0.11% / +0.11% -4.02% -3.38%] index_copy_ perm : Elapsed 0.010 ms (0.952 ms / 100) 0.954 -> 0.929 ( -2.62%) [ +0.31% +0.00% +0.21% / +0.42% -2.31% -2.62%] index_add_ perm_sorted : Elapsed 0.010 ms (0.957 ms / 100) 0.945 -> 0.912 ( -3.49%) [ +0.53% +0.00% +0.21% / +0.42% -3.49% -3.07%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.950 ms / 100) 1.649 -> 1.655 ( +0.36%) [ +0.06% +0.12% +0.00% / +0.36% +0.67% +0.85%] index_select const : Elapsed 0.017 ms (1.650 ms / 100) 1.691 -> 1.665 ( -1.54%) [ +0.12% +0.24% +0.00% / +0.59% -1.48% -1.54%] index_select wrap : Elapsed 0.017 ms (1.693 ms / 100) 1.697 -> 1.666 ( -1.83%) [ +0.35% +0.24% +0.00% / +0.00% -1.71% -1.83%] index_select linear : Elapsed 0.017 ms (1.703 ms / 100) 1.672 -> 1.657 ( -0.90%) [ +0.06% +0.00% +0.06% / +0.18% -0.54% -0.90%] index_select reverse : Elapsed 0.017 ms (1.673 ms / 100) 1.650 -> 1.650 ( +0.00%) [ +0.12% +0.30% +0.00% / +0.00% +1.03% +0.85%] index_select skip64 : Elapsed 0.017 ms (1.652 ms / 100) 1.653 -> 1.657 ( +0.24%) [ +0.06% +0.06% +0.00% / +0.24% +0.54% +0.54%] index_select skip256 : Elapsed 0.017 ms (1.654 ms / 100) 1.671 -> 1.653 ( -1.08%) [ +0.00% +0.48% +0.12% / +0.24% -1.08% -0.72%] index_select spread : Elapsed 0.017 ms (1.671 ms / 100) 1.683 -> 1.666 ( -1.01%) [ +0.18% +0.00% +0.00% / +0.24% -0.95% -1.01%] index_select strided 3 : Elapsed 0.017 ms (1.686 ms / 100) 1.693 -> 1.660 ( -1.95%) [ +0.00% +0.06% +0.06% / +0.00% -1.59% -1.95%] index_select random : Elapsed 0.017 ms (1.693 ms / 100) 1.675 -> 1.663 ( -0.72%) [ +0.12% +0.12% +0.00% / +0.00% -0.66% -0.72%] index_select random_sorted : Elapsed 0.017 ms (1.677 ms / 100) B = [4, 20, 16, 40] (stride (12800, 1, 20, 320)) A = [4, 20, 5, 40] (stride (1, 160, 3200, 4)) dim = 2 2.296 -> 2.296 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.39% +0.30%] index_add_ linear : Elapsed 0.023 ms (2.297 ms / 100) 2.249 -> 2.252 ( +0.13%) [ +0.00% +0.00% +0.04% / +0.13% +0.36% +0.44%] index_copy_ linear : Elapsed 0.022 ms (2.249 ms / 100) 2.296 -> 2.293 ( -0.13%) [ +0.00% +0.13% +0.09% / -0.13% +0.61% +0.52%] index_add_ reverse : Elapsed 0.023 ms (2.296 ms / 100) 2.251 -> 2.246 ( -0.22%) [ +0.00% +0.00% +0.04% / -0.22% +0.40% +0.53%] index_copy_ reverse : Elapsed 0.023 ms (2.251 ms / 100) 2.273 -> 2.271 ( -0.09%) [ +0.09% +0.18% +0.00% / -0.09% +0.31% +0.26%] index_add_ spread : Elapsed 0.023 ms (2.275 ms / 100) 2.234 -> 2.236 ( +0.09%) [ +0.13% +0.22% +0.00% / +0.09% +0.40% +0.40%] index_copy_ spread : Elapsed 0.022 ms (2.237 ms / 100) 2.275 -> 2.277 ( +0.09%) [ +0.22% +0.18% +0.00% / +0.09% +0.40% +0.22%] index_add_ strided 3 : Elapsed 0.023 ms (2.280 ms / 100) 2.236 -> 2.243 ( +0.31%) [ +0.09% +0.18% +0.00% / +0.31% +0.40% +0.49%] index_copy_ strided 3 : Elapsed 0.022 ms (2.238 ms / 100) 2.291 -> 2.297 ( +0.26%) [ +0.26% +0.26% +0.00% / +0.26% +0.52% +0.52%] index_add_ strided 5 : Elapsed 0.023 ms (2.297 ms / 100) 2.255 -> 2.258 ( +0.13%) [ +0.18% +0.00% +0.18% / +0.13% +0.35% +0.49%] index_copy_ strided 5 : Elapsed 0.023 ms (2.259 ms / 100) 2.290 -> 2.291 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.39% +0.48%] index_add_ strided 7 : Elapsed 0.023 ms (2.292 ms / 100) 2.258 -> 2.262 ( +0.18%) [ +0.00% +0.09% +0.00% / +2.44% +0.31% +0.18%] index_copy_ strided 7 : Elapsed 0.023 ms (2.258 ms / 100) 2.294 -> 2.293 ( -0.04%) [ +0.22% +0.00% +0.00% / -0.04% +0.35% +0.39%] index_add_ perm : Elapsed 0.023 ms (2.299 ms / 100) 2.252 -> 2.258 ( +0.27%) [ +0.00% +0.09% +0.13% / +0.27% +0.40% +0.40%] index_copy_ perm : Elapsed 0.023 ms (2.252 ms / 100) 2.279 -> 2.282 ( +0.13%) [ +0.04% +0.00% +0.00% / +0.13% +0.66% +0.70%] index_add_ perm_sorted : Elapsed 0.023 ms (2.280 ms / 100) 2.241 -> 2.243 ( +0.09%) [ +0.22% +0.13% +0.00% / +0.09% +0.76% +0.67%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.246 ms / 100) 4.931 -> 4.931 ( +0.00%) [ +0.00% +0.12% +0.04% / +0.00% +0.73% +0.65%] index_select const : Elapsed 0.049 ms (4.931 ms / 100) 4.927 -> 4.930 ( +0.06%) [ +0.04% +0.06% +0.00% / +0.06% +0.39% +0.49%] index_select wrap : Elapsed 0.049 ms (4.929 ms / 100) 4.926 -> 4.926 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.61% +0.51%] index_select linear : Elapsed 0.049 ms (4.929 ms / 100) 4.933 -> 4.932 ( -0.02%) [ +0.02% +0.00% +0.06% / -0.02% +0.65% +0.65%] index_select reverse : Elapsed 0.049 ms (4.934 ms / 100) 4.930 -> 4.931 ( +0.02%) [ +0.02% +0.06% +0.00% / +0.02% +0.63% +0.63%] index_select skip64 : Elapsed 0.049 ms (4.931 ms / 100) 4.933 -> 4.935 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.55% +0.59%] index_select skip256 : Elapsed 0.049 ms (4.933 ms / 100) 4.933 -> 4.935 ( +0.04%) [ +0.02% +0.08% +0.00% / +0.04% +0.59% +0.43%] index_select spread : Elapsed 0.049 ms (4.934 ms / 100) 4.922 -> 4.923 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.69% +0.61%] index_select strided 3 : Elapsed 0.049 ms (4.923 ms / 100) 4.924 -> 4.928 ( +0.08%) [ +0.02% +0.08% +0.00% / +0.08% +0.83% +0.79%] index_select random : Elapsed 0.049 ms (4.925 ms / 100) 4.928 -> 4.928 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.81% +0.73%] index_select random_sorted : Elapsed 0.049 ms (4.928 ms / 100) B = [4, 20, 16, 40] (stride (1, 2560, 4, 64)) A = [4, 20, 5, 40] (stride (5, 800, 1, 20)) dim = 2 2.487 -> 2.487 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +1.21% +1.09%] index_add_ linear : Elapsed 0.025 ms (2.489 ms / 100) 2.432 -> 2.435 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +1.07% +1.15%] index_copy_ linear : Elapsed 0.024 ms (2.434 ms / 100) 2.482 -> 2.487 ( +0.20%) [ +0.24% +0.12% +0.00% / +0.20% +1.21% +1.45%] index_add_ reverse : Elapsed 0.025 ms (2.488 ms / 100) 2.429 -> 2.433 ( +0.16%) [ +0.00% +0.21% +0.12% / +0.16% +1.28% +1.28%] index_copy_ reverse : Elapsed 0.024 ms (2.429 ms / 100) 2.500 -> 2.506 ( +0.24%) [ +0.04% +0.16% +0.00% / +0.24% +0.88% +1.00%] index_add_ spread : Elapsed 0.025 ms (2.501 ms / 100) 2.463 -> 2.464 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.85% +0.61%] index_copy_ spread : Elapsed 0.025 ms (2.466 ms / 100) 2.493 -> 2.491 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +1.04% +1.08%] index_add_ strided 3 : Elapsed 0.025 ms (2.493 ms / 100) 2.451 -> 2.457 ( +0.24%) [ +0.29% +0.24% +0.00% / +0.24% +1.18% +1.02%] index_copy_ strided 3 : Elapsed 0.025 ms (2.458 ms / 100) 2.473 -> 2.476 ( +0.12%) [ +0.16% +0.00% +0.00% / +0.12% +1.62% +1.33%] index_add_ strided 5 : Elapsed 0.025 ms (2.477 ms / 100) 2.423 -> 2.430 ( +0.29%) [ +0.12% +0.12% +0.00% / +0.29% +1.57% +1.44%] index_copy_ strided 5 : Elapsed 0.024 ms (2.426 ms / 100) 2.474 -> 2.481 ( +0.28%) [ +0.12% +0.08% +0.00% / +0.28% +1.41% +1.46%] index_add_ strided 7 : Elapsed 0.025 ms (2.477 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.98% +0.98%] index_copy_ strided 7 : Elapsed 0.024 ms (2.447 ms / 100) 2.483 -> 2.488 ( +0.20%) [ +0.12% +0.00% +0.16% / +0.20% +1.33% +1.37%] index_add_ perm : Elapsed 0.025 ms (2.486 ms / 100) 2.448 -> 2.454 ( +0.25%) [ +0.12% +0.25% +0.00% / +0.25% +1.14% +1.23%] index_copy_ perm : Elapsed 0.025 ms (2.451 ms / 100) 2.494 -> 2.495 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.84% +0.92%] index_add_ perm_sorted : Elapsed 0.025 ms (2.495 ms / 100) 2.461 -> 2.465 ( +0.16%) [ +0.00% +0.04% +0.16% / +0.16% +0.93% +0.98%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.461 ms / 100) 5.386 -> 5.394 ( +0.15%) [ +0.04% +0.11% +0.00% / +0.15% +0.74% +0.72%] index_select const : Elapsed 0.054 ms (5.388 ms / 100) 5.349 -> 5.356 ( +0.13%) [ +0.00% +0.07% +0.02% / +0.13% +1.14% +1.01%] index_select wrap : Elapsed 0.053 ms (5.349 ms / 100) 5.414 -> 5.412 ( -0.04%) [ +0.09% +0.00% +0.15% / -0.04% +0.87% +0.83%] index_select linear : Elapsed 0.054 ms (5.419 ms / 100) 5.398 -> 5.405 ( +0.13%) [ +0.04% +0.00% +0.07% / +0.13% +0.74% +0.72%] index_select reverse : Elapsed 0.054 ms (5.400 ms / 100) 5.349 -> 5.349 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +1.18% +1.07%] index_select skip64 : Elapsed 0.053 ms (5.349 ms / 100) 5.387 -> 5.392 ( +0.09%) [ +0.02% +0.04% +0.00% / +0.09% +0.76% +0.69%] index_select skip256 : Elapsed 0.054 ms (5.388 ms / 100) 5.402 -> 5.405 ( +0.06%) [ +0.00% +0.04% +0.02% / +0.06% +0.81% +0.76%] index_select spread : Elapsed 0.054 ms (5.402 ms / 100) 5.415 -> 5.417 ( +0.04%) [ +0.04% +0.00% +0.06% / +0.04% +0.78% +0.70%] index_select strided 3 : Elapsed 0.054 ms (5.417 ms / 100) 5.402 -> 5.400 ( -0.04%) [ +0.15% +0.00% +0.00% / -0.04% +0.85% +0.72%] index_select random : Elapsed 0.054 ms (5.410 ms / 100) 5.390 -> 5.387 ( -0.06%) [ +0.07% +0.00% +0.02% / -0.06% +0.71% +0.72%] index_select random_sorted : Elapsed 0.054 ms (5.394 ms / 100) B = [4, 20, 16, 40] (stride (1, 2560, 4, 64)) A = [4, 20, 5, 40] (stride (100, 1, 20, 400)) dim = 2 2.560 -> 2.561 ( +0.04%) [ +0.00% +0.20% +0.08% / +0.04% +0.43% +0.55%] index_add_ linear : Elapsed 0.026 ms (2.560 ms / 100) 2.509 -> 2.510 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.76% +0.40%] index_copy_ linear : Elapsed 0.025 ms (2.509 ms / 100) 2.562 -> 2.562 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.47% +0.27%] index_add_ reverse : Elapsed 0.026 ms (2.563 ms / 100) 2.507 -> 2.513 ( +0.24%) [ +0.12% +0.00% +0.04% / +0.24% +0.56% +0.44%] index_copy_ reverse : Elapsed 0.025 ms (2.510 ms / 100) 2.561 -> 2.563 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.35% +0.31%] index_add_ spread : Elapsed 0.026 ms (2.561 ms / 100) 2.526 -> 2.527 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.36% +0.24%] index_copy_ spread : Elapsed 0.025 ms (2.528 ms / 100) 2.563 -> 2.566 ( +0.12%) [ +0.23% +0.31% +0.00% / +0.12% +0.39% +0.20%] index_add_ strided 3 : Elapsed 0.026 ms (2.569 ms / 100) 2.528 -> 2.535 ( +0.28%) [ +0.16% +0.12% +0.00% / +0.63% +0.28% +0.47%] index_copy_ strided 3 : Elapsed 0.025 ms (2.532 ms / 100) 2.550 -> 2.558 ( +0.31%) [ +0.04% +0.24% +0.00% / +0.31% +0.47% +0.39%] index_add_ strided 5 : Elapsed 0.026 ms (2.551 ms / 100) 2.502 -> 2.511 ( +0.36%) [ +0.16% +0.16% +0.00% / +0.36% +0.52% +0.44%] index_copy_ strided 5 : Elapsed 0.025 ms (2.506 ms / 100) 2.554 -> 2.559 ( +0.20%) [ +0.00% +0.20% +0.12% / +0.20% +0.43% +0.27%] index_add_ strided 7 : Elapsed 0.026 ms (2.554 ms / 100) 2.522 -> 2.525 ( +0.12%) [ +0.04% +0.20% +0.00% / +0.12% +0.40% +0.20%] index_copy_ strided 7 : Elapsed 0.025 ms (2.523 ms / 100) 2.548 -> 2.552 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.20% +0.39%] index_add_ perm : Elapsed 0.025 ms (2.548 ms / 100) 2.507 -> 2.511 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.24% +0.32%] index_copy_ perm : Elapsed 0.025 ms (2.509 ms / 100) 2.545 -> 2.550 ( +0.20%) [ +0.16% +0.28% +0.00% / +0.20% +0.31% +0.35%] index_add_ perm_sorted : Elapsed 0.025 ms (2.549 ms / 100) 2.506 -> 2.514 ( +0.32%) [ +0.16% +0.00% +0.04% / +0.52% +0.32% +0.44%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.510 ms / 100) 5.666 -> 5.671 ( +0.09%) [ +0.05% +0.12% +0.00% / +0.09% +0.60% +0.60%] index_select const : Elapsed 0.057 ms (5.669 ms / 100) 5.644 -> 5.655 ( +0.19%) [ +0.04% +0.09% +0.00% / +0.19% +0.57% +0.67%] index_select wrap : Elapsed 0.056 ms (5.646 ms / 100) 5.656 -> 5.660 ( +0.07%) [ +0.00% +0.05% +0.05% / +0.07% +0.55% +0.55%] index_select linear : Elapsed 0.057 ms (5.656 ms / 100) 5.635 -> 5.638 ( +0.05%) [ +0.00% +0.11% +0.00% / +0.05% +0.53% +0.57%] index_select reverse : Elapsed 0.056 ms (5.635 ms / 100) 5.665 -> 5.673 ( +0.14%) [ +0.11% +0.16% +0.00% / +0.14% +0.53% +0.53%] index_select skip64 : Elapsed 0.057 ms (5.671 ms / 100) 5.664 -> 5.667 ( +0.05%) [ +0.11% +0.16% +0.00% / +0.05% +0.49% +0.48%] index_select skip256 : Elapsed 0.057 ms (5.670 ms / 100) 5.640 -> 5.639 ( -0.02%) [ +0.14% +0.04% +0.00% / -0.02% +0.53% +0.60%] index_select spread : Elapsed 0.056 ms (5.648 ms / 100) 5.618 -> 5.628 ( +0.18%) [ +0.21% +0.23% +0.00% / +0.18% +0.69% +0.73%] index_select strided 3 : Elapsed 0.056 ms (5.630 ms / 100) 5.639 -> 5.641 ( +0.04%) [ +0.09% +0.12% +0.00% / +0.04% +0.71% +0.60%] index_select random : Elapsed 0.056 ms (5.644 ms / 100) 5.630 -> 5.629 ( -0.02%) [ +0.07% +0.07% +0.00% / -0.02% +0.57% +0.64%] index_select random_sorted : Elapsed 0.056 ms (5.634 ms / 100) B = [4, 20, 16, 40] (stride (1, 160, 3200, 4)) A = [4, 20, 5, 40] (stride (800, 40, 3200, 1)) dim = 2 2.226 -> 2.225 ( -0.04%) [ +0.00% +0.13% +0.31% / +0.36% -0.04% +0.18%] index_add_ linear : Elapsed 0.022 ms (2.226 ms / 100) 2.173 -> 2.174 ( +0.05%) [ +0.09% +0.14% +0.00% / +0.37% +0.51% +0.05%] index_copy_ linear : Elapsed 0.022 ms (2.175 ms / 100) 2.221 -> 2.221 ( +0.00%) [ +0.14% +0.09% +0.00% / +0.32% +0.00% +0.09%] index_add_ reverse : Elapsed 0.022 ms (2.224 ms / 100) 2.166 -> 2.170 ( +0.18%) [ +0.00% +0.14% +0.14% / +0.18% +0.18% +0.32%] index_copy_ reverse : Elapsed 0.022 ms (2.166 ms / 100) 2.224 -> 2.230 ( +0.27%) [ +0.36% +0.00% +0.27% / +0.27% +0.54% +0.54%] index_add_ spread : Elapsed 0.022 ms (2.232 ms / 100) 2.174 -> 2.173 ( -0.05%) [ +0.14% +0.23% +0.00% / -0.05% +0.69% +0.60%] index_copy_ spread : Elapsed 0.022 ms (2.177 ms / 100) 2.228 -> 2.231 ( +0.13%) [ +0.13% +0.09% +0.00% / +0.13% +0.36% +0.31%] index_add_ strided 3 : Elapsed 0.022 ms (2.231 ms / 100) 2.172 -> 2.175 ( +0.14%) [ +0.00% +0.18% +0.09% / +0.14% +0.55% +0.55%] index_copy_ strided 3 : Elapsed 0.022 ms (2.172 ms / 100) 2.226 -> 2.225 ( -0.04%) [ +0.09% +0.00% +0.13% / -0.04% +0.40% +0.31%] index_add_ strided 5 : Elapsed 0.022 ms (2.228 ms / 100) 2.168 -> 2.171 ( +0.14%) [ +0.18% +0.23% +0.00% / +0.14% +0.78% +0.65%] index_copy_ strided 5 : Elapsed 0.022 ms (2.172 ms / 100) 2.223 -> 2.225 ( +0.09%) [ +0.00% +0.13% +0.04% / +0.09% +0.18% +0.67%] index_add_ strided 7 : Elapsed 0.022 ms (2.223 ms / 100) 2.168 -> 2.172 ( +0.18%) [ +0.00% +0.09% +0.18% / +0.18% +0.69% +0.74%] index_copy_ strided 7 : Elapsed 0.022 ms (2.168 ms / 100) 2.218 -> 2.222 ( +0.18%) [ +0.00% +0.00% +0.09% / +0.18% +0.72% +0.86%] index_add_ perm : Elapsed 0.022 ms (2.218 ms / 100) 2.165 -> 2.166 ( +0.05%) [ +0.14% +0.00% +0.00% / +0.05% +1.06% +0.83%] index_copy_ perm : Elapsed 0.022 ms (2.168 ms / 100) 2.217 -> 2.226 ( +0.41%) [ +0.00% +0.41% +0.14% / +0.41% +1.17% +1.04%] index_add_ perm_sorted : Elapsed 0.022 ms (2.217 ms / 100) 2.161 -> 2.163 ( +0.09%) [ +0.28% +0.46% +0.00% / +0.09% +1.39% +1.06%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.167 ms / 100) 4.374 -> 4.374 ( +0.00%) [ +0.09% +0.11% +0.00% / +0.00% +0.71% +0.62%] index_select const : Elapsed 0.044 ms (4.378 ms / 100) 4.409 -> 4.416 ( +0.16%) [ +0.11% +0.00% +0.14% / +0.16% +0.73% +0.75%] index_select wrap : Elapsed 0.044 ms (4.414 ms / 100) 4.440 -> 4.444 ( +0.09%) [ +0.00% +0.16% +0.05% / +0.09% +0.81% +0.63%] index_select linear : Elapsed 0.044 ms (4.440 ms / 100) 4.421 -> 4.421 ( +0.00%) [ +0.07% +0.00% +0.18% / +0.00% +0.88% +0.95%] index_select reverse : Elapsed 0.044 ms (4.424 ms / 100) 4.361 -> 4.363 ( +0.05%) [ +0.07% +0.14% +0.00% / +0.05% +0.55% +0.60%] index_select skip64 : Elapsed 0.044 ms (4.364 ms / 100) 4.373 -> 4.381 ( +0.18%) [ +0.02% +0.16% +0.00% / +0.18% +0.75% +0.55%] index_select skip256 : Elapsed 0.044 ms (4.374 ms / 100) 4.410 -> 4.416 ( +0.14%) [ +0.20% +0.00% +0.11% / +0.14% +1.04% +0.98%] index_select spread : Elapsed 0.044 ms (4.419 ms / 100) 4.441 -> 4.440 ( -0.02%) [ +0.02% +0.11% +0.00% / -0.02% +0.90% +0.95%] index_select strided 3 : Elapsed 0.044 ms (4.442 ms / 100) 4.420 -> 4.430 ( +0.23%) [ +0.20% +0.00% +0.07% / +0.23% +1.31% +1.38%] index_select random : Elapsed 0.044 ms (4.429 ms / 100) 4.412 -> 4.430 ( +0.41%) [ +0.09% +0.00% +0.25% / +0.41% +0.97% +1.00%] index_select random_sorted : Elapsed 0.044 ms (4.416 ms / 100) B = [4, 20, 16, 40] (stride (320, 16, 1, 1280)) A = [4, 20, 5, 40] (stride (100, 1, 20, 400)) dim = 2 2.411 -> 2.417 ( +0.25%) [ +0.12% +0.00% +0.33% / +0.25% +0.54% +0.62%] index_add_ linear : Elapsed 0.024 ms (2.414 ms / 100) 2.366 -> 2.369 ( +0.13%) [ +0.00% +0.21% +0.08% / +0.13% +0.51% +0.17%] index_copy_ linear : Elapsed 0.024 ms (2.366 ms / 100) 2.408 -> 2.414 ( +0.25%) [ +0.42% +0.37% +0.00% / +0.25% +0.91% +0.79%] index_add_ reverse : Elapsed 0.024 ms (2.418 ms / 100) 2.367 -> 2.368 ( +0.04%) [ +0.00% +0.08% +0.17% / +0.04% +0.30% +0.34%] index_copy_ reverse : Elapsed 0.024 ms (2.367 ms / 100) 2.444 -> 2.446 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.45% +0.74%] index_add_ spread : Elapsed 0.024 ms (2.446 ms / 100) 2.431 -> 2.439 ( +0.33%) [ +0.04% +0.21% +0.00% / +0.41% +0.33% +0.41%] index_copy_ spread : Elapsed 0.024 ms (2.432 ms / 100) 2.445 -> 2.448 ( +0.12%) [ +0.29% +0.25% +0.00% / +0.12% +0.57% +0.57%] index_add_ strided 3 : Elapsed 0.025 ms (2.452 ms / 100) 2.436 -> 2.436 ( +0.00%) [ +0.12% +0.00% +0.04% / +0.00% +0.29% +0.12%] index_copy_ strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.440 -> 2.447 ( +0.29%) [ +0.20% +0.29% +0.00% / +0.29% +0.74% +0.74%] index_add_ strided 5 : Elapsed 0.024 ms (2.445 ms / 100) 2.434 -> 2.440 ( +0.25%) [ +0.12% +0.04% +0.00% / +0.29% +0.45% +0.25%] index_copy_ strided 5 : Elapsed 0.024 ms (2.437 ms / 100) 2.441 -> 2.451 ( +0.41%) [ +0.12% +0.25% +0.00% / +0.49% +0.41% +0.61%] index_add_ strided 7 : Elapsed 0.024 ms (2.444 ms / 100) 2.433 -> 2.436 ( +0.12%) [ +0.21% +0.00% +0.04% / +0.16% +0.12% +0.37%] index_copy_ strided 7 : Elapsed 0.024 ms (2.438 ms / 100) 2.446 -> 2.445 ( -0.04%) [ +0.16% +0.04% +0.00% / -0.04% +0.33% +0.25%] index_add_ perm : Elapsed 0.024 ms (2.450 ms / 100) 2.434 -> 2.435 ( +0.04%) [ +0.16% +0.00% +0.16% / +0.04% +0.21% +0.66%] index_copy_ perm : Elapsed 0.024 ms (2.438 ms / 100) 2.443 -> 2.440 ( -0.12%) [ +0.12% +0.25% +0.00% / -0.12% +0.57% +0.45%] index_add_ perm_sorted : Elapsed 0.024 ms (2.446 ms / 100) 2.431 -> 2.437 ( +0.25%) [ +0.00% +0.12% +0.04% / +0.25% +0.53% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.431 ms / 100) 5.236 -> 5.240 ( +0.08%) [ +0.10% +0.00% +0.06% / +0.08% +0.63% +0.63%] index_select const : Elapsed 0.052 ms (5.241 ms / 100) 5.200 -> 5.205 ( +0.10%) [ +0.10% +0.15% +0.00% / +0.10% +0.69% +0.48%] index_select wrap : Elapsed 0.052 ms (5.205 ms / 100) 5.223 -> 5.233 ( +0.19%) [ +0.13% +0.00% +0.00% / +0.19% +0.54% +0.46%] index_select linear : Elapsed 0.052 ms (5.230 ms / 100) 5.206 -> 5.210 ( +0.08%) [ +0.15% +0.00% +0.04% / +0.08% +0.50% +0.61%] index_select reverse : Elapsed 0.052 ms (5.214 ms / 100) 5.218 -> 5.218 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.56% +0.50%] index_select skip64 : Elapsed 0.052 ms (5.222 ms / 100) 5.216 -> 5.221 ( +0.10%) [ +0.06% +0.04% +0.00% / +0.10% +0.48% +0.56%] index_select skip256 : Elapsed 0.052 ms (5.219 ms / 100) 5.212 -> 5.211 ( -0.02%) [ +0.02% +0.04% +0.00% / -0.02% +0.40% +0.42%] index_select spread : Elapsed 0.052 ms (5.213 ms / 100) 5.167 -> 5.180 ( +0.25%) [ +0.00% +0.21% +0.19% / +0.25% +0.70% +0.85%] index_select strided 3 : Elapsed 0.052 ms (5.167 ms / 100) 5.195 -> 5.211 ( +0.31%) [ +0.15% +0.25% +0.00% / +0.31% +0.83% +0.85%] index_select random : Elapsed 0.052 ms (5.203 ms / 100) 5.196 -> 5.197 ( +0.02%) [ +0.19% +0.06% +0.00% / +0.02% +0.64% +0.69%] index_select random_sorted : Elapsed 0.052 ms (5.206 ms / 100) B = [4, 20, 16, 40] (stride (320, 1, 20, 1280)) A = [4, 20, 5, 40] (stride (4000, 200, 40, 1)) dim = 2 2.207 -> 2.209 ( +0.09%) [ +0.32% +0.00% +0.09% / +0.09% +2.58% +2.67%] index_add_ linear : Elapsed 0.022 ms (2.214 ms / 100) 2.167 -> 2.178 ( +0.51%) [ +0.23% +0.00% +0.00% / +0.51% +2.31% +2.40%] index_copy_ linear : Elapsed 0.022 ms (2.172 ms / 100) 2.212 -> 2.215 ( +0.14%) [ +0.27% +0.23% +0.00% / +0.14% +2.53% +2.71%] index_add_ reverse : Elapsed 0.022 ms (2.218 ms / 100) 2.168 -> 2.181 ( +0.60%) [ +0.00% +0.18% +0.09% / +0.60% +2.58% +2.44%] index_copy_ reverse : Elapsed 0.022 ms (2.168 ms / 100) 2.205 -> 2.210 ( +0.23%) [ +0.23% +0.00% +0.09% / +0.23% +2.86% +2.77%] index_add_ spread : Elapsed 0.022 ms (2.210 ms / 100) 2.167 -> 2.165 ( -0.09%) [ +0.09% +0.32% +0.00% / -0.09% +2.72% +2.63%] index_copy_ spread : Elapsed 0.022 ms (2.169 ms / 100) 2.201 -> 2.211 ( +0.45%) [ +0.00% +0.18% +0.45% / +0.45% +2.64% +2.82%] index_add_ strided 3 : Elapsed 0.022 ms (2.201 ms / 100) 2.163 -> 2.175 ( +0.55%) [ +0.09% +0.00% +0.09% / +0.55% +2.77% +2.59%] index_copy_ strided 3 : Elapsed 0.022 ms (2.165 ms / 100) 2.204 -> 2.209 ( +0.23%) [ +0.00% +0.18% +0.36% / +0.23% +2.90% +2.99%] index_add_ strided 5 : Elapsed 0.022 ms (2.204 ms / 100) 2.167 -> 2.171 ( +0.18%) [ +0.00% +0.37% +0.09% / +0.18% +2.58% +2.86%] index_copy_ strided 5 : Elapsed 0.022 ms (2.167 ms / 100) 2.200 -> 2.206 ( +0.27%) [ +0.36% +0.36% +0.00% / +0.27% +2.77% +3.05%] index_add_ strided 7 : Elapsed 0.022 ms (2.208 ms / 100) 2.172 -> 2.170 ( -0.09%) [ +0.09% +0.18% +0.00% / -0.09% +2.85% +2.81%] index_copy_ strided 7 : Elapsed 0.022 ms (2.174 ms / 100) 2.223 -> 2.221 ( -0.09%) [ +0.22% +0.00% +0.00% / -0.09% +2.74% +2.74%] index_add_ perm : Elapsed 0.022 ms (2.228 ms / 100) 2.181 -> 2.182 ( +0.05%) [ +0.05% +0.09% +0.00% / +0.05% +2.66% +2.57%] index_copy_ perm : Elapsed 0.022 ms (2.182 ms / 100) 2.219 -> 2.222 ( +0.14%) [ +0.05% +0.00% +0.05% / +0.14% +2.52% +2.25%] index_add_ perm_sorted : Elapsed 0.022 ms (2.220 ms / 100) 2.174 -> 2.174 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +2.71% +2.67%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.176 ms / 100) 4.463 -> 4.470 ( +0.16%) [ +0.16% +0.00% +0.04% / +0.16% +0.76% +0.87%] index_select const : Elapsed 0.045 ms (4.470 ms / 100) 4.512 -> 4.516 ( +0.09%) [ +0.04% +0.00% +0.13% / +0.09% +1.64% +1.46%] index_select wrap : Elapsed 0.045 ms (4.514 ms / 100) 4.517 -> 4.517 ( +0.00%) [ +0.18% +0.00% +0.09% / +0.00% +1.39% +1.37%] index_select linear : Elapsed 0.045 ms (4.525 ms / 100) 4.516 -> 4.523 ( +0.16%) [ +0.20% +0.00% +0.13% / +0.16% +1.31% +1.28%] index_select reverse : Elapsed 0.045 ms (4.525 ms / 100) 4.462 -> 4.468 ( +0.13%) [ +0.02% +0.02% +0.00% / +0.13% +0.83% +0.90%] index_select skip64 : Elapsed 0.045 ms (4.463 ms / 100) 4.463 -> 4.471 ( +0.18%) [ +0.00% +0.18% +0.13% / +0.18% +0.94% +0.90%] index_select skip256 : Elapsed 0.045 ms (4.463 ms / 100) 4.513 -> 4.528 ( +0.33%) [ +0.00% +0.47% +0.55% / +0.33% +1.75% +1.35%] index_select spread : Elapsed 0.045 ms (4.513 ms / 100) 4.512 -> 4.513 ( +0.02%) [ +0.11% +0.04% +0.00% / +0.02% +1.82% +1.53%] index_select strided 3 : Elapsed 0.045 ms (4.517 ms / 100) 4.520 -> 4.533 ( +0.29%) [ +0.29% +0.27% +0.00% / +0.29% +1.46% +1.22%] index_select random : Elapsed 0.045 ms (4.533 ms / 100) 4.509 -> 4.524 ( +0.33%) [ +0.00% +0.16% +0.00% / +0.33% +1.66% +1.69%] index_select random_sorted : Elapsed 0.045 ms (4.509 ms / 100) B = [4, 20, 16, 40] (stride (1, 64, 4, 1280)) A = [4, 20, 5, 40] (stride (100, 5, 1, 400)) dim = 2 2.446 -> 2.446 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.53% +0.49%] index_add_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.391 -> 2.397 ( +0.25%) [ +0.33% +0.42% +0.00% / +0.25% +0.67% +0.59%] index_copy_ linear : Elapsed 0.024 ms (2.399 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.08% +0.37%] index_add_ reverse : Elapsed 0.025 ms (2.456 ms / 100) 2.395 -> 2.402 ( +0.29%) [ +0.17% +0.08% +0.00% / +0.33% +0.46% +0.29%] index_copy_ reverse : Elapsed 0.024 ms (2.399 ms / 100) 2.451 -> 2.456 ( +0.20%) [ +0.08% +0.20% +0.00% / +0.29% +0.20% +0.37%] index_add_ spread : Elapsed 0.025 ms (2.453 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.12% +0.00% +0.00% / +0.08% +0.33% +0.17%] index_copy_ spread : Elapsed 0.024 ms (2.417 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.20% +0.04% +0.00% / +0.04% +0.41% +0.37%] index_add_ strided 3 : Elapsed 0.025 ms (2.454 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.33% +0.46%] index_copy_ strided 3 : Elapsed 0.024 ms (2.411 ms / 100) 2.438 -> 2.440 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.41% +0.29%] index_add_ strided 5 : Elapsed 0.024 ms (2.438 ms / 100) 2.386 -> 2.388 ( +0.08%) [ +0.21% +0.29% +0.00% / +0.08% +0.46% +0.42%] index_copy_ strided 5 : Elapsed 0.024 ms (2.391 ms / 100) 2.440 -> 2.446 ( +0.25%) [ +0.20% +0.37% +0.00% / +0.25% +0.53% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.445 ms / 100) 2.409 -> 2.412 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.12% +0.46% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.411 ms / 100) 2.460 -> 2.463 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.28% +0.12%] index_add_ perm : Elapsed 0.025 ms (2.463 ms / 100) 2.413 -> 2.420 ( +0.29%) [ +0.00% +0.17% +0.29% / +0.46% +0.29% +0.29%] index_copy_ perm : Elapsed 0.024 ms (2.413 ms / 100) 2.452 -> 2.445 ( -0.29%) [ +0.12% +0.33% +0.00% / -0.29% +0.41% +0.57%] index_add_ perm_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.409 -> 2.412 ( +0.12%) [ +0.00% +0.17% +0.04% / +0.12% +0.33% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.409 ms / 100) 5.245 -> 5.249 ( +0.08%) [ +0.00% +0.08% +0.06% / +0.08% +0.44% +0.78%] index_select const : Elapsed 0.052 ms (5.245 ms / 100) 5.245 -> 5.255 ( +0.19%) [ +0.15% +0.27% +0.00% / +0.19% +0.88% +0.48%] index_select wrap : Elapsed 0.053 ms (5.253 ms / 100) 5.254 -> 5.260 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.59% +0.49%] index_select linear : Elapsed 0.053 ms (5.254 ms / 100) 5.242 -> 5.258 ( +0.31%) [ +0.31% +0.04% +0.00% / +0.31% +0.57% +0.46%] index_select reverse : Elapsed 0.053 ms (5.258 ms / 100) 5.253 -> 5.239 ( -0.27%) [ +0.06% +0.02% +0.00% / -0.27% +0.55% +0.19%] index_select skip64 : Elapsed 0.053 ms (5.256 ms / 100) 5.241 -> 5.253 ( +0.23%) [ +0.31% +0.00% +0.17% / +0.23% +0.73% +0.73%] index_select skip256 : Elapsed 0.053 ms (5.257 ms / 100) 5.244 -> 5.252 ( +0.15%) [ +0.04% +0.25% +0.00% / +0.15% +0.46% +0.80%] index_select spread : Elapsed 0.052 ms (5.246 ms / 100) 5.250 -> 5.247 ( -0.06%) [ +0.13% +0.10% +0.00% / -0.06% +0.61% +0.63%] index_select strided 3 : Elapsed 0.053 ms (5.257 ms / 100) 5.256 -> 5.242 ( -0.27%) [ +0.06% +0.00% +0.02% / -0.27% +0.27% +0.30%] index_select random : Elapsed 0.053 ms (5.259 ms / 100) 5.237 -> 5.250 ( +0.25%) [ +0.17% +0.27% +0.00% / +0.25% +0.78% +0.55%] index_select random_sorted : Elapsed 0.052 ms (5.246 ms / 100) B = [4, 20, 16, 40] (stride (1, 4, 80, 1280)) A = [4, 20, 5, 40] (stride (1, 20, 4, 400)) dim = 2 2.541 -> 2.543 ( +0.08%) [ +0.00% +0.35% +0.24% / +0.08% +0.35% +0.51%] index_add_ linear : Elapsed 0.025 ms (2.541 ms / 100) 2.477 -> 2.482 ( +0.20%) [ +0.12% +0.36% +0.00% / +0.20% +0.44% +0.20%] index_copy_ linear : Elapsed 0.025 ms (2.480 ms / 100) 2.545 -> 2.550 ( +0.20%) [ +0.39% +0.00% +0.12% / +0.20% +0.35% +0.31%] index_add_ reverse : Elapsed 0.026 ms (2.555 ms / 100) 2.480 -> 2.483 ( +0.12%) [ +0.16% +0.00% +0.12% / +0.24% +0.12% +0.24%] index_copy_ reverse : Elapsed 0.025 ms (2.484 ms / 100) 2.536 -> 2.542 ( +0.24%) [ +0.24% +0.20% +0.00% / +0.24% +0.75% +0.75%] index_add_ spread : Elapsed 0.025 ms (2.542 ms / 100) 2.473 -> 2.481 ( +0.32%) [ +0.12% +0.08% +0.00% / +0.32% +0.61% +0.65%] index_copy_ spread : Elapsed 0.025 ms (2.476 ms / 100) 2.540 -> 2.542 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.31% +0.55%] index_add_ strided 3 : Elapsed 0.025 ms (2.540 ms / 100) 2.474 -> 2.482 ( +0.32%) [ +0.16% +0.00% +0.20% / +0.32% +0.49% +0.61%] index_copy_ strided 3 : Elapsed 0.025 ms (2.478 ms / 100) 2.540 -> 2.550 ( +0.39%) [ +0.20% +0.39% +0.00% / +0.39% +0.79% +0.71%] index_add_ strided 5 : Elapsed 0.025 ms (2.545 ms / 100) 2.474 -> 2.482 ( +0.32%) [ +0.28% +0.12% +0.00% / +0.32% +0.77% +0.65%] index_copy_ strided 5 : Elapsed 0.025 ms (2.481 ms / 100) 2.537 -> 2.541 ( +0.16%) [ +0.20% +0.00% +0.16% / +0.16% +0.59% +0.79%] index_add_ strided 7 : Elapsed 0.025 ms (2.542 ms / 100) 2.470 -> 2.474 ( +0.16%) [ +0.28% +0.00% +0.08% / +0.16% +0.73% +0.65%] index_copy_ strided 7 : Elapsed 0.025 ms (2.477 ms / 100) 2.539 -> 2.540 ( +0.04%) [ +0.00% +0.12% +0.20% / +0.04% +0.67% +0.55%] index_add_ perm : Elapsed 0.025 ms (2.539 ms / 100) 2.474 -> 2.481 ( +0.28%) [ +0.00% +0.16% +0.12% / +0.28% +0.65% +0.85%] index_copy_ perm : Elapsed 0.025 ms (2.474 ms / 100) 2.539 -> 2.542 ( +0.12%) [ +0.28% +0.51% +0.00% / +0.12% +0.87% +0.59%] index_add_ perm_sorted : Elapsed 0.025 ms (2.546 ms / 100) 2.475 -> 2.479 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.85% +0.48%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.477 ms / 100) 5.639 -> 5.642 ( +0.05%) [ +0.32% +0.35% +0.00% / +0.05% +0.55% +0.87%] index_select const : Elapsed 0.057 ms (5.657 ms / 100) 5.652 -> 5.664 ( +0.21%) [ +0.05% +0.07% +0.00% / +0.21% +0.58% +0.64%] index_select wrap : Elapsed 0.057 ms (5.655 ms / 100) 5.652 -> 5.653 ( +0.02%) [ +0.16% +0.05% +0.00% / +0.02% +0.65% +0.46%] index_select linear : Elapsed 0.057 ms (5.661 ms / 100) 5.649 -> 5.658 ( +0.16%) [ +0.21% +0.28% +0.00% / +0.16% +0.51% +0.55%] index_select reverse : Elapsed 0.057 ms (5.661 ms / 100) 5.649 -> 5.665 ( +0.28%) [ +0.30% +0.00% +0.30% / +0.28% +0.69% +0.78%] index_select skip64 : Elapsed 0.057 ms (5.666 ms / 100) 5.641 -> 5.644 ( +0.05%) [ +0.27% +0.00% +0.30% / +0.05% +0.62% +0.64%] index_select skip256 : Elapsed 0.057 ms (5.656 ms / 100) 5.647 -> 5.656 ( +0.16%) [ +0.02% +0.04% +0.00% / +0.16% +0.74% +0.69%] index_select spread : Elapsed 0.056 ms (5.648 ms / 100) 5.645 -> 5.645 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.66% +0.58%] index_select strided 3 : Elapsed 0.056 ms (5.646 ms / 100) 5.640 -> 5.650 ( +0.18%) [ +0.11% +0.00% +0.12% / +0.18% +0.76% +0.64%] index_select random : Elapsed 0.056 ms (5.646 ms / 100) 5.644 -> 5.646 ( +0.04%) [ +0.04% +0.00% +0.09% / +0.04% +0.80% +0.82%] index_select random_sorted : Elapsed 0.056 ms (5.646 ms / 100) out_shape = [4, 20, 5, 16] in_shape = [4, 20, 5, 40] idx_dim = 3 B = [4, 20, 5, 16] (stride (1600, 1, 320, 20)) A = [4, 20, 5, 40] (stride (200, 800, 1, 5)) dim = 3 3.923 -> 3.929 ( +0.15%) [ +0.18% +0.15% +0.00% / +0.15% +0.51% +0.38%] index_select const : Elapsed 0.039 ms (3.930 ms / 100) 3.949 -> 3.953 ( +0.10%) [ +0.03% +0.00% +0.00% / +0.10% +0.56% +0.48%] index_select wrap : Elapsed 0.039 ms (3.950 ms / 100) 3.935 -> 3.939 ( +0.10%) [ +0.05% +0.00% +0.05% / +0.10% +0.58% +0.64%] index_select linear : Elapsed 0.039 ms (3.937 ms / 100) 3.924 -> 3.921 ( -0.08%) [ +0.13% +0.00% +0.03% / -0.08% +0.59% +0.38%] index_select reverse : Elapsed 0.039 ms (3.929 ms / 100) 3.928 -> 3.925 ( -0.08%) [ +0.00% +0.03% +0.00% / -0.08% +0.64% +0.61%] index_select skip64 : Elapsed 0.039 ms (3.928 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.23% +0.00% +0.23% / +0.05% +0.61% +0.92%] index_select skip256 : Elapsed 0.039 ms (3.929 ms / 100) 3.929 -> 3.930 ( +0.03%) [ +0.00% +0.05% +0.03% / +0.03% +0.43% +0.43%] index_select spread : Elapsed 0.039 ms (3.929 ms / 100) 3.943 -> 3.948 ( +0.13%) [ +0.10% +0.05% +0.00% / +0.13% +0.25% +0.41%] index_select strided 3 : Elapsed 0.039 ms (3.947 ms / 100) 3.927 -> 3.927 ( +0.00%) [ +0.10% +0.00% +0.13% / +0.00% +0.51% +0.48%] index_select strided 5 : Elapsed 0.039 ms (3.931 ms / 100) 3.921 -> 3.923 ( +0.05%) [ +0.00% +0.03% +0.05% / +0.05% +0.36% +0.26%] index_select strided 7 : Elapsed 0.039 ms (3.921 ms / 100) 3.921 -> 3.924 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.43% +0.33%] index_select strided 8 : Elapsed 0.039 ms (3.923 ms / 100) 3.917 -> 3.924 ( +0.18%) [ +0.08% +0.03% +0.00% / +0.18% +0.31% +0.23%] index_select strided 16 : Elapsed 0.039 ms (3.920 ms / 100) 3.929 -> 3.937 ( +0.20%) [ +0.00% +0.18% +0.13% / +0.20% +0.64% +0.56%] index_select random : Elapsed 0.039 ms (3.929 ms / 100) 3.923 -> 3.929 ( +0.15%) [ +0.10% +0.00% +0.08% / +0.15% +0.61% +0.69%] index_select random_sorted : Elapsed 0.039 ms (3.927 ms / 100) 3.922 -> 3.927 ( +0.13%) [ +0.03% +0.03% +0.00% / +0.13% +0.56% +0.43%] index_select perm : Elapsed 0.039 ms (3.923 ms / 100) 3.931 -> 3.934 ( +0.08%) [ +0.05% +0.03% +0.00% / +0.08% +0.38% +0.23%] index_select perm_sorted : Elapsed 0.039 ms (3.933 ms / 100) B = [4, 20, 5, 16] (stride (1600, 1, 20, 100)) A = [4, 20, 5, 40] (stride (5, 20, 1, 400)) dim = 3 3.866 -> 3.873 ( +0.18%) [ +0.21% +0.21% +0.00% / +0.18% +0.80% +1.01%] index_select const : Elapsed 0.039 ms (3.874 ms / 100) 3.875 -> 3.879 ( +0.10%) [ +0.05% +0.03% +0.00% / +0.10% +1.03% +0.80%] index_select wrap : Elapsed 0.039 ms (3.877 ms / 100) 3.878 -> 3.879 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.83% +1.03%] index_select linear : Elapsed 0.039 ms (3.881 ms / 100) 3.866 -> 3.870 ( +0.10%) [ +0.03% +0.00% +0.00% / +0.10% +0.72% +0.78%] index_select reverse : Elapsed 0.039 ms (3.867 ms / 100) 3.867 -> 3.865 ( -0.05%) [ +0.08% +0.00% +0.03% / -0.05% +1.40% +0.88%] index_select skip64 : Elapsed 0.039 ms (3.870 ms / 100) 3.869 -> 3.872 ( +0.08%) [ +0.00% +0.08% +0.03% / +0.08% +0.72% +0.72%] index_select skip256 : Elapsed 0.039 ms (3.869 ms / 100) 3.870 -> 3.873 ( +0.08%) [ +0.13% +0.13% +0.00% / +0.08% +0.85% +0.78%] index_select spread : Elapsed 0.039 ms (3.875 ms / 100) 3.869 -> 3.872 ( +0.08%) [ +0.08% +0.03% +0.00% / +0.08% +0.96% +0.83%] index_select strided 3 : Elapsed 0.039 ms (3.872 ms / 100) 3.866 -> 3.872 ( +0.16%) [ +0.08% +0.00% +0.05% / +0.16% +0.80% +0.85%] index_select strided 5 : Elapsed 0.039 ms (3.869 ms / 100) 3.872 -> 3.880 ( +0.21%) [ +0.00% +0.08% +0.05% / +0.21% +0.77% +1.03%] index_select strided 7 : Elapsed 0.039 ms (3.872 ms / 100) 3.872 -> 3.877 ( +0.13%) [ +0.00% +0.08% +0.05% / +0.13% +1.19% +1.34%] index_select strided 8 : Elapsed 0.039 ms (3.872 ms / 100) 3.873 -> 3.874 ( +0.03%) [ +0.05% +0.00% +0.13% / +0.03% +0.85% +1.16%] index_select strided 16 : Elapsed 0.039 ms (3.875 ms / 100) 3.870 -> 3.877 ( +0.18%) [ +0.03% +0.10% +0.00% / +0.18% +1.29% +1.03%] index_select random : Elapsed 0.039 ms (3.871 ms / 100) 3.860 -> 3.860 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +1.06% +0.85%] index_select random_sorted : Elapsed 0.039 ms (3.865 ms / 100) 3.867 -> 3.868 ( +0.03%) [ +0.10% +0.00% +0.08% / +0.03% +0.88% +0.96%] index_select perm : Elapsed 0.039 ms (3.871 ms / 100) 3.875 -> 3.882 ( +0.18%) [ +0.13% +0.00% +0.10% / +0.18% +0.95% +1.06%] index_select perm_sorted : Elapsed 0.039 ms (3.880 ms / 100) B = [4, 20, 5, 16] (stride (1, 320, 64, 4)) A = [4, 20, 5, 40] (stride (1, 800, 160, 4)) dim = 3 1.441 -> 1.443 ( +0.14%) [ +0.00% +0.28% +0.35% / +0.14% +0.42% +0.21%] index_select const : Elapsed 0.014 ms (1.441 ms / 100) 1.476 -> 1.475 ( -0.07%) [ +0.00% +0.20% +0.27% / -0.07% -0.07% +0.20%] index_select wrap : Elapsed 0.015 ms (1.476 ms / 100) 1.450 -> 1.451 ( +0.07%) [ +0.28% +0.28% +0.00% / +0.21% +0.07% +0.07%] index_select linear : Elapsed 0.015 ms (1.454 ms / 100) 1.448 -> 1.451 ( +0.21%) [ +0.00% +0.07% +0.00% / +0.21% +0.28% +0.21%] index_select reverse : Elapsed 0.014 ms (1.448 ms / 100) 1.470 -> 1.474 ( +0.27%) [ +0.54% +0.00% +0.20% / +0.48% +0.54% +0.27%] index_select skip64 : Elapsed 0.015 ms (1.478 ms / 100) 1.441 -> 1.441 ( +0.00%) [ +0.21% +0.00% +0.00% / +0.00% +0.21% +0.28%] index_select skip256 : Elapsed 0.014 ms (1.444 ms / 100) 1.461 -> 1.459 ( -0.14%) [ +0.07% +0.00% +0.00% / -0.07% +0.07% -0.14%] index_select spread : Elapsed 0.015 ms (1.462 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.07% +0.14%] index_select strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.454 -> 1.455 ( +0.07%) [ +0.41% +0.28% +0.00% / +0.55% +0.07% +0.07%] index_select strided 5 : Elapsed 0.015 ms (1.460 ms / 100) 1.464 -> 1.464 ( +0.00%) [ +0.00% +0.14% +0.34% / +0.41% +0.00% +0.00%] index_select strided 7 : Elapsed 0.015 ms (1.464 ms / 100) 1.462 -> 1.461 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.00% +0.41%] index_select strided 8 : Elapsed 0.015 ms (1.462 ms / 100) 1.471 -> 1.471 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.07% +0.00% +0.20%] index_select strided 16 : Elapsed 0.015 ms (1.471 ms / 100) 1.465 -> 1.466 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.27% +0.07%] index_select random : Elapsed 0.015 ms (1.467 ms / 100) 1.460 -> 1.458 ( -0.14%) [ +0.14% +0.00% +0.14% / -0.14% +0.21% +0.21%] index_select random_sorted : Elapsed 0.015 ms (1.462 ms / 100) 1.468 -> 1.467 ( -0.07%) [ +0.20% +0.14% +0.00% / +0.07% -0.07% +0.07%] index_select perm : Elapsed 0.015 ms (1.471 ms / 100) 1.456 -> 1.457 ( +0.07%) [ +0.00% +0.62% +0.34% / +0.07% +0.55% +0.21%] index_select perm_sorted : Elapsed 0.015 ms (1.456 ms / 100) B = [4, 20, 5, 16] (stride (1, 320, 4, 20)) A = [4, 20, 5, 40] (stride (4000, 200, 40, 1)) dim = 3 3.600 -> 3.600 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.56% +0.56%] index_select const : Elapsed 0.036 ms (3.600 ms / 100) 3.572 -> 3.573 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.56% +0.56%] index_select wrap : Elapsed 0.036 ms (3.573 ms / 100) 3.526 -> 3.532 ( +0.17%) [ +0.14% +0.17% +0.00% / +0.17% +0.74% +1.02%] index_select linear : Elapsed 0.035 ms (3.531 ms / 100) 3.574 -> 3.576 ( +0.06%) [ +0.08% +0.03% +0.00% / +0.06% +0.87% +0.90%] index_select reverse : Elapsed 0.036 ms (3.577 ms / 100) 3.556 -> 3.556 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.76%] index_select skip64 : Elapsed 0.036 ms (3.556 ms / 100) 3.597 -> 3.597 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.75% +0.72%] index_select skip256 : Elapsed 0.036 ms (3.597 ms / 100) 3.516 -> 3.516 ( +0.00%) [ +0.46% +0.06% +0.00% / +0.00% +0.71% +0.77%] index_select spread : Elapsed 0.035 ms (3.532 ms / 100) 3.550 -> 3.551 ( +0.03%) [ +0.00% +0.11% +0.08% / +0.03% +0.73% +0.65%] index_select strided 3 : Elapsed 0.035 ms (3.550 ms / 100) 3.533 -> 3.537 ( +0.11%) [ +0.00% +0.06% +0.11% / +0.11% +0.99% +0.88%] index_select strided 5 : Elapsed 0.035 ms (3.533 ms / 100) 3.532 -> 3.531 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.65% +0.65%] index_select strided 7 : Elapsed 0.035 ms (3.532 ms / 100) 3.582 -> 3.573 ( -0.25%) [ +0.17% +0.00% +0.14% / -0.25% +0.42% +0.84%] index_select strided 8 : Elapsed 0.036 ms (3.588 ms / 100) 3.552 -> 3.553 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.73% +0.73%] index_select strided 16 : Elapsed 0.036 ms (3.552 ms / 100) 3.586 -> 3.587 ( +0.03%) [ +0.00% +0.03% +0.08% / +0.03% +0.61% +0.75%] index_select random : Elapsed 0.036 ms (3.586 ms / 100) 3.538 -> 3.536 ( -0.06%) [ +0.03% +0.00% +0.03% / -0.06% +0.96% +0.79%] index_select random_sorted : Elapsed 0.035 ms (3.539 ms / 100) 3.553 -> 3.554 ( +0.03%) [ +0.06% +0.06% +0.00% / +0.03% +0.84% +0.93%] index_select perm : Elapsed 0.036 ms (3.555 ms / 100) 3.557 -> 3.560 ( +0.08%) [ +0.00% +0.08% +0.11% / +0.08% +0.84% +0.93%] index_select perm_sorted : Elapsed 0.036 ms (3.557 ms / 100) B = [4, 20, 5, 16] (stride (1, 320, 4, 20)) A = [4, 20, 5, 40] (stride (100, 1, 20, 400)) dim = 3 3.858 -> 3.859 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.70% +0.70%] index_select const : Elapsed 0.039 ms (3.860 ms / 100) 3.858 -> 3.863 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.60% +0.57%] index_select wrap : Elapsed 0.039 ms (3.863 ms / 100) 3.858 -> 3.858 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.49% +0.34%] index_select linear : Elapsed 0.039 ms (3.861 ms / 100) 3.858 -> 3.859 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.52% +0.49%] index_select reverse : Elapsed 0.039 ms (3.858 ms / 100) 3.865 -> 3.866 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.54% +0.52%] index_select skip64 : Elapsed 0.039 ms (3.866 ms / 100) 3.863 -> 3.865 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.47% +0.41%] index_select skip256 : Elapsed 0.039 ms (3.864 ms / 100) 3.859 -> 3.860 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.41% +0.39%] index_select spread : Elapsed 0.039 ms (3.861 ms / 100) 3.867 -> 3.874 ( +0.18%) [ +0.03% +0.18% +0.00% / +0.18% +0.41% +0.41%] index_select strided 3 : Elapsed 0.039 ms (3.868 ms / 100) 3.855 -> 3.855 ( +0.00%) [ +0.18% +0.13% +0.00% / +0.00% +0.39% +0.26%] index_select strided 5 : Elapsed 0.039 ms (3.862 ms / 100) 3.852 -> 3.852 ( +0.00%) [ +0.08% +0.00% +0.05% / +0.00% +0.36% +0.39%] index_select strided 7 : Elapsed 0.039 ms (3.855 ms / 100) 3.865 -> 3.871 ( +0.16%) [ +0.00% +0.05% +0.03% / +0.16% +0.34% +0.41%] index_select strided 8 : Elapsed 0.039 ms (3.865 ms / 100) 3.863 -> 3.862 ( -0.03%) [ +0.05% +0.00% +0.03% / -0.03% +0.41% +0.47%] index_select strided 16 : Elapsed 0.039 ms (3.865 ms / 100) 3.867 -> 3.867 ( +0.00%) [ +0.00% +0.05% +0.03% / +0.00% +0.41% +0.41%] index_select random : Elapsed 0.039 ms (3.867 ms / 100) 3.860 -> 3.865 ( +0.13%) [ +0.00% +0.00% +0.03% / +0.13% +0.47% +0.52%] index_select random_sorted : Elapsed 0.039 ms (3.860 ms / 100) 3.852 -> 3.857 ( +0.13%) [ +0.00% +0.03% +0.05% / +0.13% +0.44% +0.36%] index_select perm : Elapsed 0.039 ms (3.852 ms / 100) 3.860 -> 3.867 ( +0.18%) [ +0.00% +0.03% +0.16% / +0.18% +0.47% +0.54%] index_select perm_sorted : Elapsed 0.039 ms (3.860 ms / 100) B = [4, 20, 5, 16] (stride (5, 20, 1, 400)) A = [4, 20, 5, 40] (stride (1, 20, 4, 400)) dim = 3 3.553 -> 3.550 ( -0.08%) [ +0.00% +0.00% +0.06% / -0.08% +0.82% +0.82%] index_select const : Elapsed 0.036 ms (3.553 ms / 100) 3.549 -> 3.560 ( +0.31%) [ +0.06% +0.06% +0.00% / +0.31% +0.79% +0.76%] index_select wrap : Elapsed 0.036 ms (3.551 ms / 100) 3.547 -> 3.551 ( +0.11%) [ +0.08% +0.08% +0.00% / +0.11% +0.68% +0.59%] index_select linear : Elapsed 0.036 ms (3.550 ms / 100) 3.550 -> 3.552 ( +0.06%) [ +0.31% +0.06% +0.00% / +0.06% +0.68% +0.76%] index_select reverse : Elapsed 0.036 ms (3.561 ms / 100) 3.534 -> 3.534 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.99% +1.02%] index_select skip64 : Elapsed 0.035 ms (3.534 ms / 100) 3.553 -> 3.556 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.73% +0.87%] index_select skip256 : Elapsed 0.036 ms (3.553 ms / 100) 3.550 -> 3.552 ( +0.06%) [ +0.23% +0.00% +0.00% / +0.06% +0.76% +0.93%] index_select spread : Elapsed 0.036 ms (3.558 ms / 100) 3.538 -> 3.540 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.90% +0.82%] index_select strided 3 : Elapsed 0.035 ms (3.540 ms / 100) 3.544 -> 3.545 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.87% +0.99%] index_select strided 5 : Elapsed 0.035 ms (3.545 ms / 100) 3.536 -> 3.545 ( +0.25%) [ +0.11% +0.14% +0.00% / +0.25% +0.82% +0.76%] index_select strided 7 : Elapsed 0.035 ms (3.540 ms / 100) 3.548 -> 3.555 ( +0.20%) [ +0.00% +0.06% +0.06% / +0.20% +0.79% +0.73%] index_select strided 8 : Elapsed 0.035 ms (3.548 ms / 100) 3.548 -> 3.550 ( +0.06%) [ +0.08% +0.00% +0.06% / +0.06% +0.82% +0.82%] index_select strided 16 : Elapsed 0.036 ms (3.551 ms / 100) 3.551 -> 3.552 ( +0.03%) [ +0.11% +0.00% +0.20% / +0.03% +0.62% +0.65%] index_select random : Elapsed 0.036 ms (3.555 ms / 100) 3.543 -> 3.547 ( +0.11%) [ +0.03% +0.00% +0.03% / +0.11% +0.79% +0.79%] index_select random_sorted : Elapsed 0.035 ms (3.544 ms / 100) 3.554 -> 3.558 ( +0.11%) [ +0.00% +0.00% +0.06% / +0.11% +0.79% +0.79%] index_select perm : Elapsed 0.036 ms (3.554 ms / 100) 3.546 -> 3.553 ( +0.20%) [ +0.00% +0.25% +0.23% / +0.20% +0.90% +0.99%] index_select perm_sorted : Elapsed 0.035 ms (3.546 ms / 100) B = [4, 20, 5, 16] (stride (1, 20, 4, 400)) A = [4, 20, 5, 40] (stride (200, 800, 1, 5)) dim = 3 3.594 -> 3.596 ( +0.06%) [ +0.11% +0.03% +0.00% / +0.06% +0.50% +0.53%] index_select const : Elapsed 0.036 ms (3.598 ms / 100) 3.620 -> 3.626 ( +0.17%) [ +0.00% +0.14% +0.00% / +0.17% +0.55% +0.55%] index_select wrap : Elapsed 0.036 ms (3.620 ms / 100) 3.623 -> 3.635 ( +0.33%) [ +0.00% +0.00% +0.11% / +0.33% +0.61% +0.61%] index_select linear : Elapsed 0.036 ms (3.623 ms / 100) 3.595 -> 3.595 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.47% +0.50%] index_select reverse : Elapsed 0.036 ms (3.595 ms / 100) 3.604 -> 3.606 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.64% +0.64%] index_select skip64 : Elapsed 0.036 ms (3.605 ms / 100) 3.594 -> 3.595 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.61% +0.56%] index_select skip256 : Elapsed 0.036 ms (3.594 ms / 100) 3.613 -> 3.620 ( +0.19%) [ +0.00% +0.03% +0.00% / +0.19% +0.55% +0.55%] index_select spread : Elapsed 0.036 ms (3.613 ms / 100) 3.616 -> 3.616 ( +0.00%) [ +0.11% +0.00% +0.03% / +0.00% +0.36% +0.36%] index_select strided 3 : Elapsed 0.036 ms (3.620 ms / 100) 3.615 -> 3.613 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.44% +0.44%] index_select strided 5 : Elapsed 0.036 ms (3.617 ms / 100) 3.612 -> 3.615 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.36% +0.39%] index_select strided 7 : Elapsed 0.036 ms (3.612 ms / 100) 3.611 -> 3.619 ( +0.22%) [ +0.03% +0.00% +0.03% / +0.22% +0.44% +0.33%] index_select strided 8 : Elapsed 0.036 ms (3.612 ms / 100) 3.593 -> 3.598 ( +0.14%) [ +0.00% +0.17% +0.14% / +0.14% +0.50% +0.56%] index_select strided 16 : Elapsed 0.036 ms (3.593 ms / 100) 3.613 -> 3.616 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.55% +0.44%] index_select random : Elapsed 0.036 ms (3.613 ms / 100) 3.610 -> 3.616 ( +0.17%) [ +0.08% +0.11% +0.00% / +0.17% +0.47% +0.64%] index_select random_sorted : Elapsed 0.036 ms (3.613 ms / 100) 3.612 -> 3.619 ( +0.19%) [ +0.00% +0.00% +0.00% / +0.19% +0.36% +0.39%] index_select perm : Elapsed 0.036 ms (3.612 ms / 100) 3.617 -> 3.619 ( +0.06%) [ +0.00% +0.00% +0.03% / +0.06% +0.47% +0.44%] index_select perm_sorted : Elapsed 0.036 ms (3.617 ms / 100) out_shape = [16, 20, 40, 5] in_shape = [4, 20, 40, 5] idx_dim = 0 B = [16, 20, 40, 5] (stride (4000, 200, 1, 40)) A = [4, 20, 40, 5] (stride (4000, 40, 1, 800)) dim = 0 2.388 -> 2.394 ( +0.25%) [ +0.34% +0.00% +0.04% / +0.25% +0.38% +0.25%] index_add_ linear : Elapsed 0.024 ms (2.396 ms / 100) 2.347 -> 2.352 ( +0.21%) [ +0.04% +0.00% +0.04% / +0.21% +0.38% +0.38%] index_copy_ linear : Elapsed 0.023 ms (2.348 ms / 100) 2.399 -> 2.400 ( +0.04%) [ +0.13% +0.04% +0.00% / +0.04% +0.25% +0.29%] index_add_ reverse : Elapsed 0.024 ms (2.402 ms / 100) 2.353 -> 2.350 ( -0.13%) [ +0.13% +0.00% +0.08% / -0.13% +0.21% +0.30%] index_copy_ reverse : Elapsed 0.024 ms (2.356 ms / 100) 2.386 -> 2.387 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.29% +0.29%] index_add_ spread : Elapsed 0.024 ms (2.387 ms / 100) 2.347 -> 2.349 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.26% +0.26%] index_copy_ spread : Elapsed 0.023 ms (2.347 ms / 100) 2.391 -> 2.392 ( +0.04%) [ +0.29% +0.29% +0.00% / +0.04% +0.50% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.398 ms / 100) 2.351 -> 2.359 ( +0.34%) [ +0.26% +0.04% +0.00% / +0.34% +0.38% +0.38%] index_copy_ strided 3 : Elapsed 0.024 ms (2.357 ms / 100) 2.395 -> 2.394 ( -0.04%) [ +0.21% +0.04% +0.00% / +0.21% +0.13% -0.04%] index_add_ strided 5 : Elapsed 0.024 ms (2.400 ms / 100) 2.351 -> 2.356 ( +0.21%) [ +0.09% +0.00% +0.17% / +0.30% +0.38% +0.21%] index_copy_ strided 5 : Elapsed 0.024 ms (2.353 ms / 100) 2.388 -> 2.387 ( -0.04%) [ +0.13% +0.13% +0.00% / +0.04% -0.04% +0.04%] index_add_ strided 7 : Elapsed 0.024 ms (2.391 ms / 100) 2.348 -> 2.349 ( +0.04%) [ +0.09% +0.00% +0.00% / +0.09% +0.04% +0.09%] index_copy_ strided 7 : Elapsed 0.024 ms (2.350 ms / 100) 2.390 -> 2.393 ( +0.13%) [ +0.25% +0.29% +0.00% / +0.13% +0.25% +0.13%] index_add_ perm : Elapsed 0.024 ms (2.396 ms / 100) 2.350 -> 2.353 ( +0.13%) [ +0.00% +0.09% +0.30% / +0.13% +0.38% +0.34%] index_copy_ perm : Elapsed 0.024 ms (2.350 ms / 100) 2.389 -> 2.388 ( -0.04%) [ +0.04% +0.00% +0.04% / +0.17% -0.04% +0.17%] index_add_ perm_sorted : Elapsed 0.024 ms (2.390 ms / 100) 2.345 -> 2.352 ( +0.30%) [ +0.09% +0.00% +0.30% / +0.34% +0.30% +0.34%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.347 ms / 100) 4.942 -> 4.944 ( +0.04%) [ +0.00% +0.00% +0.02% / +0.04% +0.49% +0.47%] index_select const : Elapsed 0.049 ms (4.942 ms / 100) 5.006 -> 5.009 ( +0.06%) [ +0.04% +0.06% +0.00% / +0.06% +0.42% +0.42%] index_select wrap : Elapsed 0.050 ms (5.008 ms / 100) 4.991 -> 4.998 ( +0.14%) [ +0.00% +0.06% +0.04% / +0.14% +0.46% +0.52%] index_select linear : Elapsed 0.050 ms (4.991 ms / 100) 4.995 -> 5.003 ( +0.16%) [ +0.10% +0.00% +0.04% / +0.16% +0.40% +0.34%] index_select reverse : Elapsed 0.050 ms (5.000 ms / 100) 4.962 -> 4.970 ( +0.16%) [ +0.06% +0.04% +0.00% / +0.16% +0.44% +0.34%] index_select skip64 : Elapsed 0.050 ms (4.965 ms / 100) 4.943 -> 4.956 ( +0.26%) [ +0.32% +0.30% +0.00% / +0.26% +0.55% +0.32%] index_select skip256 : Elapsed 0.050 ms (4.959 ms / 100) 4.996 -> 4.995 ( -0.02%) [ +0.02% +0.04% +0.00% / -0.02% +0.22% +0.22%] index_select spread : Elapsed 0.050 ms (4.997 ms / 100) 5.011 -> 5.012 ( +0.02%) [ +0.08% +0.08% +0.00% / +0.02% +0.42% +0.38%] index_select strided 3 : Elapsed 0.050 ms (5.015 ms / 100) 5.022 -> 5.026 ( +0.08%) [ +0.00% +0.12% +0.02% / +0.08% +0.34% +0.38%] index_select random : Elapsed 0.050 ms (5.022 ms / 100) 4.983 -> 4.989 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.26% +0.30%] index_select random_sorted : Elapsed 0.050 ms (4.986 ms / 100) B = [16, 20, 40, 5] (stride (4000, 40, 1, 800)) A = [4, 20, 40, 5] (stride (4000, 200, 5, 1)) dim = 0 2.183 -> 2.182 ( -0.05%) [ +0.09% +0.27% +0.00% / -0.05% +0.18% +0.09%] index_add_ linear : Elapsed 0.022 ms (2.185 ms / 100) 2.141 -> 2.139 ( -0.09%) [ +0.00% +0.09% +0.14% / +0.23% -0.05% -0.09%] index_copy_ linear : Elapsed 0.021 ms (2.141 ms / 100) 2.185 -> 2.185 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.18% +0.00% +0.27%] index_add_ reverse : Elapsed 0.022 ms (2.185 ms / 100) 2.138 -> 2.137 ( -0.05%) [ +0.00% +0.05% +0.14% / +0.19% -0.05% +0.05%] index_copy_ reverse : Elapsed 0.021 ms (2.138 ms / 100) 2.187 -> 2.185 ( -0.09%) [ +0.00% +0.09% +0.05% / +0.14% -0.09% +0.05%] index_add_ spread : Elapsed 0.022 ms (2.187 ms / 100) 2.135 -> 2.137 ( +0.09%) [ +0.05% +0.05% +0.00% / +0.28% +0.09% +0.09%] index_copy_ spread : Elapsed 0.021 ms (2.136 ms / 100) 2.186 -> 2.184 ( -0.09%) [ +0.05% +0.05% +0.00% / +0.14% +0.00% -0.09%] index_add_ strided 3 : Elapsed 0.022 ms (2.187 ms / 100) 2.140 -> 2.139 ( -0.05%) [ +0.00% +0.05% +0.00% / +0.00% +0.05% -0.05%] index_copy_ strided 3 : Elapsed 0.021 ms (2.140 ms / 100) 2.183 -> 2.185 ( +0.09%) [ +0.14% +0.05% +0.00% / +0.09% +0.18% +0.41%] index_add_ strided 5 : Elapsed 0.022 ms (2.186 ms / 100) 2.138 -> 2.139 ( +0.05%) [ +0.14% +0.00% +0.09% / +0.05% +0.09% +0.33%] index_copy_ strided 5 : Elapsed 0.021 ms (2.141 ms / 100) 2.184 -> 2.184 ( +0.00%) [ +0.18% +0.05% +0.00% / +0.00% +0.00% +0.14%] index_add_ strided 7 : Elapsed 0.022 ms (2.188 ms / 100) 2.139 -> 2.140 ( +0.05%) [ +0.09% +0.05% +0.00% / +0.05% +0.14% +0.05%] index_copy_ strided 7 : Elapsed 0.021 ms (2.141 ms / 100) 2.185 -> 2.185 ( +0.00%) [ +0.14% +0.00% +0.09% / +0.00% +0.50% +0.32%] index_add_ perm : Elapsed 0.022 ms (2.188 ms / 100) 2.138 -> 2.140 ( +0.09%) [ +0.23% +0.00% +0.19% / +0.09% +0.33% +0.19%] index_copy_ perm : Elapsed 0.021 ms (2.143 ms / 100) 2.183 -> 2.183 ( +0.00%) [ +0.18% +0.00% +0.18% / +0.00% +0.23% +0.18%] index_add_ perm_sorted : Elapsed 0.022 ms (2.187 ms / 100) 2.137 -> 2.138 ( +0.05%) [ +0.00% +0.09% +0.23% / +0.05% +0.23% +0.28%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.137 ms / 100) 4.063 -> 4.068 ( +0.12%) [ +0.17% +0.05% +0.00% / +0.12% +0.47% +0.39%] index_select const : Elapsed 0.041 ms (4.070 ms / 100) 4.120 -> 4.128 ( +0.19%) [ +0.19% +0.10% +0.00% / +0.19% +0.32% +0.27%] index_select wrap : Elapsed 0.041 ms (4.128 ms / 100) 4.115 -> 4.118 ( +0.07%) [ +0.02% +0.02% +0.00% / +0.07% +0.10% +0.12%] index_select linear : Elapsed 0.041 ms (4.116 ms / 100) 4.113 -> 4.117 ( +0.10%) [ +0.15% +0.00% +0.00% / +0.10% +0.27% +0.29%] index_select reverse : Elapsed 0.041 ms (4.119 ms / 100) 4.052 -> 4.059 ( +0.17%) [ +0.30% +0.00% +0.25% / +0.17% +0.30% +0.27%] index_select skip64 : Elapsed 0.041 ms (4.064 ms / 100) 4.057 -> 4.057 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.12% +0.12%] index_select skip256 : Elapsed 0.041 ms (4.057 ms / 100) 4.097 -> 4.106 ( +0.22%) [ +0.22% +0.17% +0.00% / +0.22% +0.46% +0.37%] index_select spread : Elapsed 0.041 ms (4.106 ms / 100) 4.124 -> 4.124 ( +0.00%) [ +0.07% +0.00% +0.05% / +0.05% +0.10% +0.00%] index_select strided 3 : Elapsed 0.041 ms (4.127 ms / 100) 4.105 -> 4.107 ( +0.05%) [ +0.07% +0.27% +0.00% / +0.12% +0.15% +0.05%] index_select random : Elapsed 0.041 ms (4.108 ms / 100) 4.097 -> 4.101 ( +0.10%) [ +0.07% +0.05% +0.00% / +0.10% +0.24% +0.24%] index_select random_sorted : Elapsed 0.041 ms (4.100 ms / 100) B = [16, 20, 40, 5] (stride (4000, 40, 1, 800)) A = [4, 20, 40, 5] (stride (5, 20, 400, 1)) dim = 0 2.415 -> 2.419 ( +0.17%) [ +0.04% +0.00% +0.41% / +0.17% +0.29% +0.25%] index_add_ linear : Elapsed 0.024 ms (2.416 ms / 100) 2.362 -> 2.361 ( -0.04%) [ +0.00% +0.13% +0.08% / +0.04% +0.13% -0.04%] index_copy_ linear : Elapsed 0.024 ms (2.362 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +0.41% +0.21%] index_add_ reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.358 -> 2.360 ( +0.08%) [ +0.13% +0.00% +0.13% / +0.08% +0.51% +0.21%] index_copy_ reverse : Elapsed 0.024 ms (2.361 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.08% +0.21% +0.33%] index_add_ spread : Elapsed 0.024 ms (2.411 ms / 100) 2.359 -> 2.362 ( +0.13%) [ +0.04% +0.04% +0.00% / +0.13% +0.47% +0.13%] index_copy_ spread : Elapsed 0.024 ms (2.360 ms / 100) 2.411 -> 2.416 ( +0.21%) [ +0.37% +0.00% +0.17% / +0.21% +0.50% +0.62%] index_add_ strided 3 : Elapsed 0.024 ms (2.420 ms / 100) 2.363 -> 2.361 ( -0.08%) [ +0.00% +0.17% +0.04% / -0.08% +0.21% +0.13%] index_copy_ strided 3 : Elapsed 0.024 ms (2.363 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.41% +0.37%] index_add_ strided 5 : Elapsed 0.024 ms (2.417 ms / 100) 2.361 -> 2.365 ( +0.17%) [ +0.13% +0.04% +0.00% / +0.17% +0.38% +0.25%] index_copy_ strided 5 : Elapsed 0.024 ms (2.364 ms / 100) 2.407 -> 2.413 ( +0.25%) [ +0.46% +0.08% +0.00% / +0.25% +0.37% +0.37%] index_add_ strided 7 : Elapsed 0.024 ms (2.418 ms / 100) 2.358 -> 2.364 ( +0.25%) [ +0.08% +0.04% +0.00% / +0.30% +0.25% +0.25%] index_copy_ strided 7 : Elapsed 0.024 ms (2.360 ms / 100) 2.416 -> 2.415 ( -0.04%) [ +0.25% +0.21% +0.00% / +0.12% -0.04% +0.12%] index_add_ perm : Elapsed 0.024 ms (2.422 ms / 100) 2.361 -> 2.363 ( +0.08%) [ +0.00% +0.13% +0.13% / +0.17% +0.08% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.361 ms / 100) 2.411 -> 2.416 ( +0.21%) [ +0.12% +0.21% +0.00% / +0.21% +0.46% +0.29%] index_add_ perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.357 -> 2.363 ( +0.25%) [ +0.21% +0.21% +0.00% / +0.25% +0.38% +0.30%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.362 ms / 100) 5.059 -> 5.063 ( +0.08%) [ +0.00% +0.06% +0.30% / +0.08% +0.45% +0.28%] index_select const : Elapsed 0.051 ms (5.059 ms / 100) 5.084 -> 5.089 ( +0.10%) [ +0.24% +0.00% +0.12% / +0.10% +0.43% +0.18%] index_select wrap : Elapsed 0.051 ms (5.096 ms / 100) 5.096 -> 5.102 ( +0.12%) [ +0.06% +0.00% +0.10% / +0.12% +0.29% +0.29%] index_select linear : Elapsed 0.051 ms (5.099 ms / 100) 5.084 -> 5.097 ( +0.26%) [ +0.20% +0.06% +0.00% / +0.26% +0.26% +0.33%] index_select reverse : Elapsed 0.051 ms (5.094 ms / 100) 5.058 -> 5.065 ( +0.14%) [ +0.00% +0.12% +0.16% / +0.14% +0.36% +0.14%] index_select skip64 : Elapsed 0.051 ms (5.058 ms / 100) 5.042 -> 5.052 ( +0.20%) [ +0.00% +0.10% +0.00% / +0.20% +0.28% +0.34%] index_select skip256 : Elapsed 0.050 ms (5.042 ms / 100) 5.083 -> 5.092 ( +0.18%) [ +0.18% +0.14% +0.00% / +0.28% +0.24% +0.18%] index_select spread : Elapsed 0.051 ms (5.092 ms / 100) 5.071 -> 5.080 ( +0.18%) [ +0.16% +0.00% +0.04% / +0.22% +0.18% +0.26%] index_select strided 3 : Elapsed 0.051 ms (5.079 ms / 100) 5.084 -> 5.082 ( -0.04%) [ +0.43% +0.24% +0.00% / -0.04% +0.57% +0.37%] index_select random : Elapsed 0.051 ms (5.106 ms / 100) 5.085 -> 5.088 ( +0.06%) [ +0.00% +0.02% +0.04% / +0.06% +0.31% +0.20%] index_select random_sorted : Elapsed 0.051 ms (5.085 ms / 100) B = [16, 20, 40, 5] (stride (5, 3200, 80, 1)) A = [4, 20, 40, 5] (stride (1, 800, 4, 160)) dim = 0 2.380 -> 2.382 ( +0.08%) [ +0.21% +0.17% +0.00% / +0.08% +0.25% +0.42%] index_add_ linear : Elapsed 0.024 ms (2.385 ms / 100) 2.340 -> 2.343 ( +0.13%) [ +0.30% +0.21% +0.00% / +0.13% +0.34% +0.30%] index_copy_ linear : Elapsed 0.023 ms (2.347 ms / 100) 2.384 -> 2.387 ( +0.13%) [ +0.08% +0.00% +0.21% / +0.13% +0.13% +0.29%] index_add_ reverse : Elapsed 0.024 ms (2.386 ms / 100) 2.347 -> 2.347 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.30% +0.43%] index_copy_ reverse : Elapsed 0.023 ms (2.347 ms / 100) 2.386 -> 2.388 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.08% +0.21% +0.25%] index_add_ spread : Elapsed 0.024 ms (2.388 ms / 100) 2.373 -> 2.381 ( +0.34%) [ +0.21% +0.04% +0.00% / +0.34% +0.51% +0.34%] index_copy_ spread : Elapsed 0.024 ms (2.378 ms / 100) 2.387 -> 2.391 ( +0.17%) [ +0.04% +0.21% +0.00% / +0.17% +0.42% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.388 ms / 100) 2.402 -> 2.403 ( +0.04%) [ +0.00% +0.17% +0.00% / +0.04% +0.25% +0.33%] index_copy_ strided 3 : Elapsed 0.024 ms (2.402 ms / 100) 2.352 -> 2.357 ( +0.21%) [ +0.00% +0.21% +0.00% / +0.21% +0.26% +0.26%] index_add_ strided 5 : Elapsed 0.024 ms (2.352 ms / 100) 2.332 -> 2.340 ( +0.34%) [ +0.09% +0.34% +0.00% / +0.60% +0.34% +0.64%] index_copy_ strided 5 : Elapsed 0.023 ms (2.334 ms / 100) 2.359 -> 2.370 ( +0.47%) [ +0.00% +0.04% +0.17% / +0.51% +0.59% +0.47%] index_add_ strided 7 : Elapsed 0.024 ms (2.359 ms / 100) 2.357 -> 2.361 ( +0.17%) [ +0.13% +0.00% +0.00% / +0.17% +0.30% +0.25%] index_copy_ strided 7 : Elapsed 0.024 ms (2.360 ms / 100) 2.385 -> 2.386 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.34% +0.34%] index_add_ perm : Elapsed 0.024 ms (2.385 ms / 100) 2.371 -> 2.379 ( +0.34%) [ +0.21% +0.17% +0.00% / +0.34% +0.51% +0.34%] index_copy_ perm : Elapsed 0.024 ms (2.376 ms / 100) 2.387 -> 2.390 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.25% +0.13% +0.25%] index_add_ perm_sorted : Elapsed 0.024 ms (2.387 ms / 100) 2.374 -> 2.375 ( +0.04%) [ +0.21% +0.00% +0.13% / +0.04% +0.25% +0.34%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.379 ms / 100) 5.084 -> 5.083 ( -0.02%) [ +0.12% +0.00% +0.06% / -0.02% +0.45% +0.41%] index_select const : Elapsed 0.051 ms (5.090 ms / 100) 5.079 -> 5.086 ( +0.14%) [ +0.16% +0.02% +0.00% / +0.14% +0.39% +0.63%] index_select wrap : Elapsed 0.051 ms (5.087 ms / 100) 5.087 -> 5.076 ( -0.22%) [ +0.20% +0.00% +0.26% / -0.22% +0.51% +0.16%] index_select linear : Elapsed 0.051 ms (5.097 ms / 100) 5.063 -> 5.071 ( +0.16%) [ +0.20% +0.00% +0.24% / +0.16% +0.47% +0.49%] index_select reverse : Elapsed 0.051 ms (5.073 ms / 100) 5.074 -> 5.075 ( +0.02%) [ +0.00% +0.10% +0.04% / +0.02% +0.73% +0.49%] index_select skip64 : Elapsed 0.051 ms (5.074 ms / 100) 5.088 -> 5.096 ( +0.16%) [ +0.04% +0.00% +0.00% / +0.16% +0.65% +0.39%] index_select skip256 : Elapsed 0.051 ms (5.090 ms / 100) 5.083 -> 5.083 ( +0.00%) [ +0.18% +0.10% +0.00% / +0.00% +0.26% +0.26%] index_select spread : Elapsed 0.051 ms (5.092 ms / 100) 5.068 -> 5.075 ( +0.14%) [ +0.16% +0.00% +0.14% / +0.14% +0.45% +0.47%] index_select strided 3 : Elapsed 0.051 ms (5.076 ms / 100) 5.067 -> 5.080 ( +0.26%) [ +0.00% +0.22% +0.10% / +0.26% +0.47% +0.49%] index_select random : Elapsed 0.051 ms (5.067 ms / 100) 5.071 -> 5.070 ( -0.02%) [ +0.04% +0.24% +0.00% / -0.02% +0.32% +0.37%] index_select random_sorted : Elapsed 0.051 ms (5.073 ms / 100) B = [16, 20, 40, 5] (stride (1, 3200, 16, 640)) A = [4, 20, 40, 5] (stride (1, 800, 4, 160)) dim = 0 2.516 -> 2.520 ( +0.16%) [ +0.16% +0.00% +0.16% / +0.16% +0.56% +0.52%] index_add_ linear : Elapsed 0.025 ms (2.520 ms / 100) 2.504 -> 2.506 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.52% +0.48%] index_copy_ linear : Elapsed 0.025 ms (2.504 ms / 100) 2.513 -> 2.517 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.72% +0.88%] index_add_ reverse : Elapsed 0.025 ms (2.515 ms / 100) 2.500 -> 2.501 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.56% +0.64%] index_copy_ reverse : Elapsed 0.025 ms (2.501 ms / 100) 2.555 -> 2.553 ( -0.08%) [ +0.00% +0.12% +0.04% / -0.08% +0.63% +0.35%] index_add_ spread : Elapsed 0.026 ms (2.555 ms / 100) 2.607 -> 2.606 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.54% +0.50%] index_copy_ spread : Elapsed 0.026 ms (2.608 ms / 100) 2.550 -> 2.563 ( +0.51%) [ +0.00% +0.47% +0.16% / +0.51% +1.02% +0.78%] index_add_ strided 3 : Elapsed 0.026 ms (2.550 ms / 100) 2.606 -> 2.612 ( +0.23%) [ +0.04% +0.00% +0.15% / +0.23% +0.58% +0.65%] index_copy_ strided 3 : Elapsed 0.026 ms (2.607 ms / 100) 2.552 -> 2.554 ( +0.08%) [ +0.16% +0.27% +0.00% / +0.08% +0.63% +0.51%] index_add_ strided 5 : Elapsed 0.026 ms (2.556 ms / 100) 2.604 -> 2.610 ( +0.23%) [ +0.00% +0.23% +0.31% / +0.23% +0.54% +0.42%] index_copy_ strided 5 : Elapsed 0.026 ms (2.604 ms / 100) 2.550 -> 2.558 ( +0.31%) [ +0.43% +0.35% +0.00% / +0.31% +0.35% +0.47%] index_add_ strided 7 : Elapsed 0.026 ms (2.561 ms / 100) 2.602 -> 2.604 ( +0.08%) [ +0.08% +0.31% +0.00% / +0.08% +0.50% +0.69%] index_copy_ strided 7 : Elapsed 0.026 ms (2.604 ms / 100) 2.520 -> 2.517 ( -0.12%) [ +0.20% +0.04% +0.00% / -0.12% +0.28% +0.12%] index_add_ perm : Elapsed 0.025 ms (2.525 ms / 100) 2.502 -> 2.501 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.48% +0.40%] index_copy_ perm : Elapsed 0.025 ms (2.502 ms / 100) 2.518 -> 2.517 ( -0.04%) [ +0.00% +0.20% +0.04% / -0.04% +0.08% +0.36%] index_add_ perm_sorted : Elapsed 0.025 ms (2.518 ms / 100) 2.500 -> 2.496 ( -0.16%) [ +0.04% +0.04% +0.00% / -0.16% +0.40% +0.72%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.501 ms / 100) 5.517 -> 5.527 ( +0.18%) [ +0.22% +0.13% +0.00% / +0.18% +0.67% +0.54%] index_select const : Elapsed 0.055 ms (5.529 ms / 100) 5.530 -> 5.521 ( -0.16%) [ +0.11% +0.00% +0.04% / -0.16% +0.36% +0.56%] index_select wrap : Elapsed 0.055 ms (5.536 ms / 100) 5.528 -> 5.533 ( +0.09%) [ +0.13% +0.02% +0.00% / +0.09% +0.54% +0.65%] index_select linear : Elapsed 0.055 ms (5.535 ms / 100) 5.537 -> 5.533 ( -0.07%) [ +0.00% +0.05% +0.02% / -0.07% +0.58% +0.31%] index_select reverse : Elapsed 0.055 ms (5.537 ms / 100) 5.535 -> 5.544 ( +0.16%) [ +0.18% +0.20% +0.00% / +0.16% +0.61% +0.52%] index_select skip64 : Elapsed 0.055 ms (5.545 ms / 100) 5.530 -> 5.534 ( +0.07%) [ +0.00% +0.11% +0.02% / +0.07% +0.38% +0.49%] index_select skip256 : Elapsed 0.055 ms (5.530 ms / 100) 5.520 -> 5.525 ( +0.09%) [ +0.42% +0.36% +0.00% / +0.09% +0.65% +0.58%] index_select spread : Elapsed 0.055 ms (5.543 ms / 100) 5.530 -> 5.533 ( +0.05%) [ +0.02% +0.09% +0.00% / +0.05% +0.47% +0.47%] index_select strided 3 : Elapsed 0.055 ms (5.531 ms / 100) 5.537 -> 5.534 ( -0.05%) [ +0.00% +0.09% +0.16% / -0.05% +0.34% +0.45%] index_select random : Elapsed 0.055 ms (5.537 ms / 100) 5.516 -> 5.532 ( +0.29%) [ +0.34% +0.04% +0.00% / +0.29% +0.74% +0.42%] index_select random_sorted : Elapsed 0.055 ms (5.535 ms / 100) B = [16, 20, 40, 5] (stride (5, 80, 1600, 1)) A = [4, 20, 40, 5] (stride (4000, 1, 20, 800)) dim = 0 2.544 -> 2.552 ( +0.31%) [ +0.20% +0.12% +0.00% / +0.75% +0.31% +0.35%] index_add_ linear : Elapsed 0.025 ms (2.549 ms / 100) 2.500 -> 2.507 ( +0.28%) [ +0.00% +0.08% +0.16% / +1.60% +0.28% +0.36%] index_copy_ linear : Elapsed 0.025 ms (2.500 ms / 100) 2.544 -> 2.552 ( +0.31%) [ +0.08% +0.31% +0.00% / +1.02% +0.31% +0.35%] index_add_ reverse : Elapsed 0.025 ms (2.546 ms / 100) 2.502 -> 2.502 ( +0.00%) [ +0.00% +0.32% +0.04% / +0.44% +0.00% +0.20%] index_copy_ reverse : Elapsed 0.025 ms (2.502 ms / 100) 2.558 -> 2.557 ( -0.04%) [ +0.08% +0.16% +0.00% / -0.04% +0.12% +0.12%] index_add_ spread : Elapsed 0.026 ms (2.560 ms / 100) 2.538 -> 2.541 ( +0.12%) [ +0.24% +0.12% +0.00% / +0.12% +0.39% +0.16%] index_copy_ spread : Elapsed 0.025 ms (2.544 ms / 100) 2.557 -> 2.558 ( +0.04%) [ +0.08% +0.00% +0.12% / +0.04% +0.31% +0.39%] index_add_ strided 3 : Elapsed 0.026 ms (2.559 ms / 100) 2.561 -> 2.569 ( +0.31%) [ +0.00% +0.23% +0.12% / +0.35% +0.39% +0.31%] index_copy_ strided 3 : Elapsed 0.026 ms (2.561 ms / 100) 2.522 -> 2.522 ( +0.00%) [ +0.12% +0.00% +0.16% / +0.71% +0.00% +0.04%] index_add_ strided 5 : Elapsed 0.025 ms (2.525 ms / 100) 2.501 -> 2.504 ( +0.12%) [ +0.16% +0.00% +0.00% / +0.84% +0.20% +0.12%] index_copy_ strided 5 : Elapsed 0.025 ms (2.505 ms / 100) 2.533 -> 2.535 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.36% +0.28% +0.08%] index_add_ strided 7 : Elapsed 0.025 ms (2.533 ms / 100) 2.521 -> 2.523 ( +0.08%) [ +0.16% +0.00% +0.24% / +1.27% +0.12% +0.08%] index_copy_ strided 7 : Elapsed 0.025 ms (2.525 ms / 100) 2.537 -> 2.548 ( +0.43%) [ +0.08% +0.20% +0.00% / +0.83% +0.43% +0.43%] index_add_ perm : Elapsed 0.025 ms (2.539 ms / 100) 2.511 -> 2.518 ( +0.28%) [ +0.00% +0.04% +0.12% / +0.56% +0.32% +0.28%] index_copy_ perm : Elapsed 0.025 ms (2.511 ms / 100) 2.538 -> 2.545 ( +0.28%) [ +0.16% +0.12% +0.00% / +0.28% +0.32% +0.28%] index_add_ perm_sorted : Elapsed 0.025 ms (2.542 ms / 100) 2.517 -> 2.518 ( +0.04%) [ +0.12% +0.16% +0.00% / +0.04% +0.04% +0.32%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.520 ms / 100) 5.748 -> 5.751 ( +0.05%) [ +0.10% +0.00% +0.03% / +0.05% +0.31% +1.11%] index_select const : Elapsed 0.058 ms (5.754 ms / 100) 5.732 -> 5.746 ( +0.24%) [ +0.00% +0.00% +0.19% / +0.24% +0.42% +0.51%] index_select wrap : Elapsed 0.057 ms (5.732 ms / 100) 5.740 -> 5.741 ( +0.02%) [ +0.07% +0.12% +0.00% / +0.02% +0.28% +0.35%] index_select linear : Elapsed 0.057 ms (5.744 ms / 100) 5.750 -> 5.756 ( +0.10%) [ +0.00% +0.14% +0.03% / +0.10% +0.31% +0.37%] index_select reverse : Elapsed 0.057 ms (5.750 ms / 100) 5.748 -> 5.756 ( +0.14%) [ +0.17% +0.00% +0.17% / +0.14% +0.45% +0.40%] index_select skip64 : Elapsed 0.058 ms (5.758 ms / 100) 5.742 -> 5.739 ( -0.05%) [ +0.05% +0.00% +0.03% / -0.05% +0.64% +0.57%] index_select skip256 : Elapsed 0.057 ms (5.745 ms / 100) 5.736 -> 5.737 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.44% +0.17%] index_select spread : Elapsed 0.057 ms (5.736 ms / 100) 5.750 -> 5.751 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.38% +0.33%] index_select strided 3 : Elapsed 0.057 ms (5.750 ms / 100) 5.743 -> 5.746 ( +0.05%) [ +0.09% +0.09% +0.00% / +0.05% +0.31% +0.38%] index_select random : Elapsed 0.057 ms (5.748 ms / 100) 5.747 -> 5.745 ( -0.03%) [ +0.00% +0.09% +0.03% / -0.03% +0.30% +0.24%] index_select random_sorted : Elapsed 0.057 ms (5.747 ms / 100) B = [16, 20, 40, 5] (stride (40, 640, 1, 12800)) A = [4, 20, 40, 5] (stride (4000, 1, 20, 800)) dim = 0 2.542 -> 2.544 ( +0.08%) [ +0.08% +0.24% +0.00% / +0.08% +0.43% +0.31%] index_add_ linear : Elapsed 0.025 ms (2.544 ms / 100) 2.491 -> 2.489 ( -0.08%) [ +0.00% +0.04% +0.12% / -0.08% +0.52% +0.44%] index_copy_ linear : Elapsed 0.025 ms (2.491 ms / 100) 2.546 -> 2.547 ( +0.04%) [ +0.00% +0.00% +0.16% / +0.04% +0.12% +0.16%] index_add_ reverse : Elapsed 0.025 ms (2.546 ms / 100) 2.492 -> 2.490 ( -0.08%) [ +0.00% +0.08% +0.12% / -0.08% +0.36% +0.36%] index_copy_ reverse : Elapsed 0.025 ms (2.492 ms / 100) 2.539 -> 2.538 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.35% +0.08%] index_add_ spread : Elapsed 0.025 ms (2.539 ms / 100) 2.483 -> 2.485 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.56% +0.44%] index_copy_ spread : Elapsed 0.025 ms (2.483 ms / 100) 2.535 -> 2.537 ( +0.08%) [ +0.04% +0.16% +0.00% / +0.08% +0.28% +0.51%] index_add_ strided 3 : Elapsed 0.025 ms (2.536 ms / 100) 2.481 -> 2.486 ( +0.20%) [ +0.08% +0.00% +0.16% / +0.20% +0.48% +0.44%] index_copy_ strided 3 : Elapsed 0.025 ms (2.483 ms / 100) 2.536 -> 2.538 ( +0.08%) [ +0.04% +0.24% +0.00% / +0.08% +0.24% +0.39%] index_add_ strided 5 : Elapsed 0.025 ms (2.537 ms / 100) 2.485 -> 2.484 ( -0.04%) [ +0.00% +0.12% +0.04% / -0.04% +0.28% +0.24%] index_copy_ strided 5 : Elapsed 0.025 ms (2.485 ms / 100) 2.540 -> 2.539 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.08% +0.08%] index_add_ strided 7 : Elapsed 0.025 ms (2.540 ms / 100) 2.488 -> 2.486 ( -0.08%) [ +0.04% +0.00% +0.04% / -0.08% +0.20% +0.16%] index_copy_ strided 7 : Elapsed 0.025 ms (2.489 ms / 100) 2.540 -> 2.539 ( -0.04%) [ +0.08% +0.16% +0.00% / -0.04% -0.04% +0.16%] index_add_ perm : Elapsed 0.025 ms (2.542 ms / 100) 2.484 -> 2.484 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.08% +0.04%] index_copy_ perm : Elapsed 0.025 ms (2.484 ms / 100) 2.541 -> 2.540 ( -0.04%) [ +0.00% +0.12% +0.00% / +0.16% +0.12% -0.04%] index_add_ perm_sorted : Elapsed 0.025 ms (2.541 ms / 100) 2.485 -> 2.487 ( +0.08%) [ +0.04% +0.24% +0.00% / +0.08% +0.12% +0.12%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.486 ms / 100) 5.739 -> 5.747 ( +0.14%) [ +0.00% +0.35% +0.35% / +0.14% +0.28% +0.54%] index_select const : Elapsed 0.057 ms (5.739 ms / 100) 5.732 -> 5.734 ( +0.03%) [ +0.00% +0.17% +0.02% / +0.03% +0.56% +0.65%] index_select wrap : Elapsed 0.057 ms (5.732 ms / 100) 5.757 -> 5.765 ( +0.14%) [ +0.12% +0.00% +0.07% / +0.14% +0.59% +0.54%] index_select linear : Elapsed 0.058 ms (5.764 ms / 100) 5.745 -> 5.747 ( +0.03%) [ +0.07% +0.03% +0.00% / +0.03% +0.59% +0.45%] index_select reverse : Elapsed 0.057 ms (5.749 ms / 100) 5.751 -> 5.763 ( +0.21%) [ +0.00% +0.00% +0.21% / +0.21% +0.26% +0.63%] index_select skip64 : Elapsed 0.058 ms (5.751 ms / 100) 5.761 -> 5.767 ( +0.10%) [ +0.00% +0.03% +0.02% / +0.10% +0.50% +0.31%] index_select skip256 : Elapsed 0.058 ms (5.761 ms / 100) 5.733 -> 5.737 ( +0.07%) [ +0.05% +0.00% +0.03% / +0.07% +0.26% +0.38%] index_select spread : Elapsed 0.057 ms (5.736 ms / 100) 5.750 -> 5.754 ( +0.07%) [ +0.09% +0.00% +0.24% / +0.07% +0.35% +0.35%] index_select strided 3 : Elapsed 0.058 ms (5.755 ms / 100) 5.749 -> 5.746 ( -0.05%) [ +0.05% +0.03% +0.00% / -0.05% +0.43% +0.42%] index_select random : Elapsed 0.058 ms (5.752 ms / 100) 5.736 -> 5.744 ( +0.14%) [ +0.03% +0.00% +0.09% / +0.14% +0.47% +0.40%] index_select random_sorted : Elapsed 0.057 ms (5.738 ms / 100) out_shape = [4, 16, 40, 5] in_shape = [4, 20, 40, 5] idx_dim = 1 B = [4, 16, 40, 5] (stride (3200, 1, 80, 16)) A = [4, 20, 40, 5] (stride (40, 160, 1, 3200)) dim = 1 3.567 -> 3.568 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.73% +0.67%] index_select const : Elapsed 0.036 ms (3.569 ms / 100) 3.568 -> 3.570 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.70% +0.67%] index_select wrap : Elapsed 0.036 ms (3.568 ms / 100) 3.572 -> 3.571 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.73% +0.70%] index_select linear : Elapsed 0.036 ms (3.572 ms / 100) 3.577 -> 3.578 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.78% +0.78%] index_select reverse : Elapsed 0.036 ms (3.577 ms / 100) 3.566 -> 3.569 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.84% +0.84%] index_select skip64 : Elapsed 0.036 ms (3.567 ms / 100) 3.573 -> 3.573 ( +0.00%) [ +0.22% +0.00% +0.03% / +0.00% +0.73% +0.76%] index_select skip256 : Elapsed 0.036 ms (3.581 ms / 100) 3.571 -> 3.571 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.64% +0.64%] index_select spread : Elapsed 0.036 ms (3.572 ms / 100) 3.569 -> 3.570 ( +0.03%) [ +0.06% +0.00% +0.03% / +0.03% +0.78% +0.78%] index_select strided 3 : Elapsed 0.036 ms (3.571 ms / 100) 3.569 -> 3.569 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.78% +0.81%] index_select strided 5 : Elapsed 0.036 ms (3.570 ms / 100) 3.578 -> 3.579 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.75% +0.75%] index_select strided 7 : Elapsed 0.036 ms (3.579 ms / 100) 3.584 -> 3.584 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.59% +0.56%] index_select strided 8 : Elapsed 0.036 ms (3.585 ms / 100) 3.570 -> 3.568 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.64% +0.67%] index_select strided 16 : Elapsed 0.036 ms (3.570 ms / 100) 3.578 -> 3.579 ( +0.03%) [ +0.06% +0.00% +0.03% / +0.03% +0.45% +0.42%] index_select random : Elapsed 0.036 ms (3.580 ms / 100) 3.572 -> 3.573 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.53% +0.53%] index_select random_sorted : Elapsed 0.036 ms (3.574 ms / 100) 3.587 -> 3.589 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.61% +0.59%] index_select perm : Elapsed 0.036 ms (3.587 ms / 100) 3.574 -> 3.575 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.50% +0.48%] index_select perm_sorted : Elapsed 0.036 ms (3.575 ms / 100) B = [4, 16, 40, 5] (stride (3200, 40, 1, 640)) A = [4, 20, 40, 5] (stride (800, 1, 20, 3200)) dim = 1 4.135 -> 4.136 ( +0.02%) [ +0.07% +0.12% +0.00% / +0.02% +0.58% +0.56%] index_select const : Elapsed 0.041 ms (4.138 ms / 100) 4.128 -> 4.129 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.51% +0.48%] index_select wrap : Elapsed 0.041 ms (4.130 ms / 100) 4.149 -> 4.152 ( +0.07%) [ +0.00% +0.12% +0.10% / +0.07% +0.53% +0.58%] index_select linear : Elapsed 0.041 ms (4.149 ms / 100) 4.144 -> 4.148 ( +0.10%) [ +0.14% +0.05% +0.00% / +0.10% +0.63% +0.60%] index_select reverse : Elapsed 0.041 ms (4.150 ms / 100) 4.136 -> 4.141 ( +0.12%) [ +0.00% +0.07% +0.05% / +0.12% +0.58% +0.53%] index_select skip64 : Elapsed 0.041 ms (4.136 ms / 100) 4.132 -> 4.133 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.51% +0.51%] index_select skip256 : Elapsed 0.041 ms (4.133 ms / 100) 4.137 -> 4.138 ( +0.02%) [ +0.02% +0.10% +0.00% / +0.02% +0.63% +0.60%] index_select spread : Elapsed 0.041 ms (4.138 ms / 100) 4.152 -> 4.168 ( +0.39%) [ +0.00% +0.41% +0.17% / +0.39% +0.65% +0.65%] index_select strided 3 : Elapsed 0.042 ms (4.152 ms / 100) 4.123 -> 4.127 ( +0.10%) [ +0.15% +0.12% +0.00% / +0.10% +0.58% +0.58%] index_select strided 5 : Elapsed 0.041 ms (4.129 ms / 100) 4.133 -> 4.133 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.73% +1.48%] index_select strided 7 : Elapsed 0.041 ms (4.134 ms / 100) 4.131 -> 4.133 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.65% +0.65%] index_select strided 8 : Elapsed 0.041 ms (4.133 ms / 100) 4.131 -> 4.131 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.65% +0.65%] index_select strided 16 : Elapsed 0.041 ms (4.133 ms / 100) 4.131 -> 4.136 ( +0.12%) [ +0.07% +0.07% +0.00% / +0.12% +0.68% +0.77%] index_select random : Elapsed 0.041 ms (4.134 ms / 100) 4.143 -> 4.148 ( +0.12%) [ +0.14% +0.12% +0.00% / +0.12% +0.60% +0.68%] index_select random_sorted : Elapsed 0.041 ms (4.149 ms / 100) 4.148 -> 4.138 ( -0.24%) [ +0.02% +0.00% +0.00% / -0.24% +0.63% +0.75%] index_select perm : Elapsed 0.041 ms (4.149 ms / 100) 4.153 -> 4.156 ( +0.07%) [ +0.00% +0.05% +0.00% / +0.07% +0.63% +0.60%] index_select perm_sorted : Elapsed 0.042 ms (4.153 ms / 100) B = [4, 16, 40, 5] (stride (16, 1, 320, 64)) A = [4, 20, 40, 5] (stride (1, 160, 4, 3200)) dim = 1 3.643 -> 3.649 ( +0.16%) [ +0.33% +0.08% +0.00% / +0.16% +0.63% +0.63%] index_select const : Elapsed 0.037 ms (3.655 ms / 100) 3.645 -> 3.647 ( +0.05%) [ +0.00% +0.14% +0.08% / +0.05% +0.82% +0.63%] index_select wrap : Elapsed 0.036 ms (3.645 ms / 100) 3.633 -> 3.635 ( +0.06%) [ +0.00% +0.00% +0.25% / +0.06% +0.85% +0.83%] index_select linear : Elapsed 0.036 ms (3.633 ms / 100) 3.653 -> 3.658 ( +0.14%) [ +0.19% +0.03% +0.00% / +0.14% +0.74% +0.68%] index_select reverse : Elapsed 0.037 ms (3.660 ms / 100) 3.655 -> 3.661 ( +0.16%) [ +0.00% +0.22% +0.19% / +0.16% +0.74% +0.68%] index_select skip64 : Elapsed 0.037 ms (3.655 ms / 100) 3.653 -> 3.656 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.74% +0.79%] index_select skip256 : Elapsed 0.037 ms (3.659 ms / 100) 3.659 -> 3.661 ( +0.05%) [ +0.11% +0.00% +0.14% / +0.05% +0.60% +0.66%] index_select spread : Elapsed 0.037 ms (3.663 ms / 100) 3.640 -> 3.646 ( +0.16%) [ +0.19% +0.25% +0.00% / +0.16% +0.80% +0.85%] index_select strided 3 : Elapsed 0.036 ms (3.647 ms / 100) 3.636 -> 3.637 ( +0.03%) [ +0.08% +0.06% +0.00% / +0.03% +0.77% +0.83%] index_select strided 5 : Elapsed 0.036 ms (3.639 ms / 100) 3.648 -> 3.650 ( +0.05%) [ +0.11% +0.03% +0.00% / +0.05% +0.71% +0.74%] index_select strided 7 : Elapsed 0.037 ms (3.652 ms / 100) 3.648 -> 3.649 ( +0.03%) [ +0.11% +0.11% +0.00% / +0.03% +0.79% +0.63%] index_select strided 8 : Elapsed 0.037 ms (3.652 ms / 100) 3.649 -> 3.663 ( +0.38%) [ +0.38% +0.00% +0.19% / +0.38% +0.93% +0.96%] index_select strided 16 : Elapsed 0.037 ms (3.663 ms / 100) 3.641 -> 3.641 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.58% +0.60%] index_select random : Elapsed 0.036 ms (3.644 ms / 100) 3.651 -> 3.659 ( +0.22%) [ +0.08% +0.14% +0.00% / +0.22% +0.66% +0.63%] index_select random_sorted : Elapsed 0.037 ms (3.654 ms / 100) 3.649 -> 3.653 ( +0.11%) [ +0.00% +0.22% +0.19% / +0.11% +0.60% +0.66%] index_select perm : Elapsed 0.036 ms (3.649 ms / 100) 3.647 -> 3.646 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.52% +0.60%] index_select perm_sorted : Elapsed 0.036 ms (3.648 ms / 100) B = [4, 16, 40, 5] (stride (640, 40, 1, 2560)) A = [4, 20, 40, 5] (stride (800, 40, 1, 3200)) dim = 1 3.940 -> 3.942 ( +0.05%) [ +0.15% +0.00% +0.00% / +0.05% +0.58% +0.58%] index_select const : Elapsed 0.039 ms (3.946 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.18% +0.00% +0.18% / +0.05% +0.56% +0.66%] index_select wrap : Elapsed 0.039 ms (3.927 ms / 100) 3.927 -> 3.930 ( +0.08%) [ +0.00% +0.03% +0.05% / +0.08% +0.51% +0.41%] index_select linear : Elapsed 0.039 ms (3.927 ms / 100) 3.936 -> 3.934 ( -0.05%) [ +0.03% +0.00% +0.00% / -0.05% +0.36% +0.41%] index_select reverse : Elapsed 0.039 ms (3.937 ms / 100) 3.948 -> 3.959 ( +0.28%) [ +0.03% +0.13% +0.00% / +0.28% +0.63% +0.81%] index_select skip64 : Elapsed 0.039 ms (3.949 ms / 100) 3.936 -> 3.931 ( -0.13%) [ +0.03% +0.00% +0.03% / -0.13% +0.51% +0.51%] index_select skip256 : Elapsed 0.039 ms (3.937 ms / 100) 3.924 -> 3.927 ( +0.08%) [ +0.00% +0.03% +0.05% / +0.08% +0.43% +0.43%] index_select spread : Elapsed 0.039 ms (3.924 ms / 100) 3.927 -> 3.931 ( +0.10%) [ +0.08% +0.13% +0.00% / +0.10% +0.64% +0.59%] index_select strided 3 : Elapsed 0.039 ms (3.930 ms / 100) 3.925 -> 3.927 ( +0.05%) [ +0.08% +0.03% +0.00% / +0.05% +0.66% +0.61%] index_select strided 5 : Elapsed 0.039 ms (3.928 ms / 100) 3.921 -> 3.926 ( +0.13%) [ +0.10% +0.00% +0.15% / +0.13% +0.79% +0.77%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.930 -> 3.935 ( +0.13%) [ +0.15% +0.00% +0.08% / +0.13% +0.92% +0.87%] index_select strided 8 : Elapsed 0.039 ms (3.936 ms / 100) 3.929 -> 3.930 ( +0.03%) [ +0.00% +0.05% +0.10% / +0.03% +0.74% +0.69%] index_select strided 16 : Elapsed 0.039 ms (3.929 ms / 100) 3.931 -> 3.931 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.71% +0.66%] index_select random : Elapsed 0.039 ms (3.931 ms / 100) 3.932 -> 3.930 ( -0.05%) [ +0.13% +0.00% +0.10% / -0.05% +0.61% +0.56%] index_select random_sorted : Elapsed 0.039 ms (3.937 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.10% +0.00% +0.00% / +0.08% +0.71% +0.59%] index_select perm : Elapsed 0.039 ms (3.927 ms / 100) 3.926 -> 3.928 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.71% +0.59%] index_select perm_sorted : Elapsed 0.039 ms (3.926 ms / 100) B = [4, 16, 40, 5] (stride (1, 4, 64, 2560)) A = [4, 20, 40, 5] (stride (800, 40, 1, 3200)) dim = 1 3.937 -> 3.940 ( +0.08%) [ +0.00% +0.10% +0.05% / +0.08% +0.71% +0.89%] index_select const : Elapsed 0.039 ms (3.937 ms / 100) 3.919 -> 3.926 ( +0.18%) [ +0.03% +0.00% +0.13% / +0.18% +0.79% +0.77%] index_select wrap : Elapsed 0.039 ms (3.920 ms / 100) 3.924 -> 3.926 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.66% +0.71%] index_select linear : Elapsed 0.039 ms (3.925 ms / 100) 3.930 -> 3.933 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.89% +0.84%] index_select reverse : Elapsed 0.039 ms (3.930 ms / 100) 3.944 -> 3.960 ( +0.41%) [ +0.00% +0.35% +0.18% / +0.41% +1.04% +0.81%] index_select skip64 : Elapsed 0.039 ms (3.944 ms / 100) 3.954 -> 3.953 ( -0.03%) [ +0.03% +0.00% +0.20% / -0.03% +0.81% +0.40%] index_select skip256 : Elapsed 0.040 ms (3.955 ms / 100) 3.928 -> 3.932 ( +0.10%) [ +0.00% +0.08% +0.05% / +0.10% +0.81% +0.76%] index_select spread : Elapsed 0.039 ms (3.928 ms / 100) 3.927 -> 3.929 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.69% +0.66%] index_select strided 3 : Elapsed 0.039 ms (3.927 ms / 100) 3.922 -> 3.925 ( +0.08%) [ +0.10% +0.13% +0.00% / +0.08% +0.89% +0.84%] index_select strided 5 : Elapsed 0.039 ms (3.926 ms / 100) 3.922 -> 3.927 ( +0.13%) [ +0.05% +0.00% +0.08% / +0.13% +0.82% +0.92%] index_select strided 7 : Elapsed 0.039 ms (3.924 ms / 100) 3.948 -> 3.951 ( +0.08%) [ +0.00% +0.08% +0.05% / +0.08% +0.61% +0.56%] index_select strided 8 : Elapsed 0.039 ms (3.948 ms / 100) 3.943 -> 3.939 ( -0.10%) [ +0.03% +0.03% +0.00% / -0.10% +0.74% +0.74%] index_select strided 16 : Elapsed 0.039 ms (3.944 ms / 100) 3.930 -> 3.931 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.56% +0.59%] index_select random : Elapsed 0.039 ms (3.932 ms / 100) 3.923 -> 3.921 ( -0.05%) [ +0.08% +0.00% +0.08% / -0.05% +0.56% +0.51%] index_select random_sorted : Elapsed 0.039 ms (3.926 ms / 100) 3.930 -> 3.928 ( -0.05%) [ +0.13% +0.05% +0.00% / -0.05% +0.51% +0.56%] index_select perm : Elapsed 0.039 ms (3.935 ms / 100) 3.925 -> 3.922 ( -0.08%) [ +0.00% +0.00% +0.03% / -0.08% +0.56% +0.64%] index_select perm_sorted : Elapsed 0.039 ms (3.925 ms / 100) out_shape = [4, 20, 16, 5] in_shape = [4, 20, 40, 5] idx_dim = 2 B = [4, 20, 16, 5] (stride (1600, 80, 5, 1)) A = [4, 20, 40, 5] (stride (1, 800, 4, 160)) dim = 2 3.930 -> 3.935 ( +0.13%) [ +0.05% +0.05% +0.00% / +0.13% +0.66% +0.53%] index_select const : Elapsed 0.039 ms (3.932 ms / 100) 3.938 -> 3.939 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.51% +0.61%] index_select wrap : Elapsed 0.039 ms (3.939 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.53% +0.51%] index_select linear : Elapsed 0.039 ms (3.933 ms / 100) 3.930 -> 3.932 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.53% +0.51%] index_select reverse : Elapsed 0.039 ms (3.930 ms / 100) 3.927 -> 3.928 ( +0.03%) [ +0.13% +0.05% +0.00% / +0.03% +0.43% +0.48%] index_select skip64 : Elapsed 0.039 ms (3.932 ms / 100) 3.934 -> 3.938 ( +0.10%) [ +0.00% +0.10% +0.13% / +0.10% +0.41% +0.43%] index_select skip256 : Elapsed 0.039 ms (3.934 ms / 100) 3.926 -> 3.928 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.41% +0.41%] index_select spread : Elapsed 0.039 ms (3.926 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.25% +0.25%] index_select strided 3 : Elapsed 0.039 ms (3.933 ms / 100) 3.936 -> 3.937 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.51% +0.51%] index_select strided 5 : Elapsed 0.039 ms (3.937 ms / 100) 3.924 -> 3.924 ( +0.00%) [ +0.05% +0.00% +0.08% / +0.00% +0.54% +0.51%] index_select strided 7 : Elapsed 0.039 ms (3.926 ms / 100) 3.943 -> 3.952 ( +0.23%) [ +0.25% +0.00% +0.20% / +0.23% +0.43% +0.68%] index_select strided 8 : Elapsed 0.040 ms (3.953 ms / 100) 3.923 -> 3.922 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.46% +0.41%] index_select strided 16 : Elapsed 0.039 ms (3.923 ms / 100) 3.936 -> 3.937 ( +0.03%) [ +0.08% +0.00% +0.08% / +0.03% +0.41% +0.41%] index_select random : Elapsed 0.039 ms (3.939 ms / 100) 3.918 -> 3.923 ( +0.13%) [ +0.08% +0.05% +0.00% / +0.15% +0.13% +0.15%] index_select random_sorted : Elapsed 0.039 ms (3.921 ms / 100) 3.920 -> 3.929 ( +0.23%) [ +0.20% +0.20% +0.00% / +0.23% +0.66% +0.66%] index_select perm : Elapsed 0.039 ms (3.928 ms / 100) 3.922 -> 3.927 ( +0.13%) [ +0.13% +0.00% +0.10% / +0.13% +0.51% +0.36%] index_select perm_sorted : Elapsed 0.039 ms (3.927 ms / 100) B = [4, 20, 16, 5] (stride (1600, 16, 1, 320)) A = [4, 20, 40, 5] (stride (1, 160, 4, 3200)) dim = 2 4.277 -> 4.277 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.70% +0.68%] index_select const : Elapsed 0.043 ms (4.278 ms / 100) 4.272 -> 4.276 ( +0.09%) [ +0.02% +0.00% +0.00% / +0.09% +0.73% +0.98%] index_select wrap : Elapsed 0.043 ms (4.273 ms / 100) 4.277 -> 4.278 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.77% +0.65%] index_select linear : Elapsed 0.043 ms (4.277 ms / 100) 4.269 -> 4.270 ( +0.02%) [ +0.07% +0.02% +0.00% / +0.02% +0.75% +0.73%] index_select reverse : Elapsed 0.043 ms (4.272 ms / 100) 4.269 -> 4.270 ( +0.02%) [ +0.05% +0.00% +0.05% / +0.02% +0.80% +0.73%] index_select skip64 : Elapsed 0.043 ms (4.271 ms / 100) 4.274 -> 4.275 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.91% +0.91%] index_select skip256 : Elapsed 0.043 ms (4.275 ms / 100) 4.275 -> 4.279 ( +0.09%) [ +0.00% +0.00% +0.05% / +0.09% +0.75% +0.80%] index_select spread : Elapsed 0.043 ms (4.275 ms / 100) 4.284 -> 4.283 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.79% +0.75%] index_select strided 3 : Elapsed 0.043 ms (4.284 ms / 100) 4.273 -> 4.276 ( +0.07%) [ +0.09% +0.09% +0.00% / +0.07% +0.75% +0.75%] index_select strided 5 : Elapsed 0.043 ms (4.277 ms / 100) 4.273 -> 4.273 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.70% +0.73%] index_select strided 7 : Elapsed 0.043 ms (4.274 ms / 100) 4.279 -> 4.277 ( -0.05%) [ +0.07% +0.02% +0.00% / -0.05% +0.84% +0.79%] index_select strided 8 : Elapsed 0.043 ms (4.282 ms / 100) 4.305 -> 4.304 ( -0.02%) [ +0.00% +0.02% +0.09% / -0.02% +0.67% +0.60%] index_select strided 16 : Elapsed 0.043 ms (4.305 ms / 100) 4.286 -> 4.287 ( +0.02%) [ +0.12% +0.02% +0.00% / +0.02% +0.82% +0.75%] index_select random : Elapsed 0.043 ms (4.291 ms / 100) 4.282 -> 4.284 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +0.77% +0.75%] index_select random_sorted : Elapsed 0.043 ms (4.284 ms / 100) 4.265 -> 4.272 ( +0.16%) [ +0.09% +0.09% +0.00% / +0.16% +0.87% +0.80%] index_select perm : Elapsed 0.043 ms (4.269 ms / 100) 4.292 -> 4.291 ( -0.02%) [ +0.00% +0.09% +0.07% / -0.02% +0.70% +0.68%] index_select perm_sorted : Elapsed 0.043 ms (4.292 ms / 100) B = [4, 20, 16, 5] (stride (80, 320, 5, 1)) A = [4, 20, 40, 5] (stride (4000, 1, 100, 20)) dim = 2 3.871 -> 3.870 ( -0.03%) [ +0.00% +0.10% +0.10% / -0.03% +0.62% +0.80%] index_select const : Elapsed 0.039 ms (3.871 ms / 100) 3.876 -> 3.882 ( +0.15%) [ +0.05% +0.00% +0.00% / +0.15% +1.11% +0.72%] index_select wrap : Elapsed 0.039 ms (3.878 ms / 100) 3.872 -> 3.874 ( +0.05%) [ +0.00% +0.03% +0.08% / +0.05% +0.39% +0.54%] index_select linear : Elapsed 0.039 ms (3.872 ms / 100) 3.877 -> 3.882 ( +0.13%) [ +0.00% +0.10% +0.00% / +0.13% +0.49% +0.54%] index_select reverse : Elapsed 0.039 ms (3.877 ms / 100) 3.880 -> 3.882 ( +0.05%) [ +0.10% +0.00% +0.21% / +0.05% +0.54% +0.67%] index_select skip64 : Elapsed 0.039 ms (3.884 ms / 100) 3.874 -> 3.873 ( -0.03%) [ +0.05% +0.00% +0.03% / -0.03% +0.46% +0.41%] index_select skip256 : Elapsed 0.039 ms (3.876 ms / 100) 3.876 -> 3.878 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.41% +0.52%] index_select spread : Elapsed 0.039 ms (3.880 ms / 100) 3.889 -> 3.891 ( +0.05%) [ +0.10% +0.03% +0.00% / +0.05% +0.67% +0.95%] index_select strided 3 : Elapsed 0.039 ms (3.893 ms / 100) 3.856 -> 3.860 ( +0.10%) [ +0.21% +0.10% +0.00% / +0.10% +0.39% +0.52%] index_select strided 5 : Elapsed 0.039 ms (3.864 ms / 100) 3.879 -> 3.883 ( +0.10%) [ +0.00% +0.03% +0.05% / +0.10% +0.57% +0.44%] index_select strided 7 : Elapsed 0.039 ms (3.879 ms / 100) 3.876 -> 3.876 ( +0.00%) [ +0.26% +0.26% +0.00% / +0.00% +0.46% +0.49%] index_select strided 8 : Elapsed 0.039 ms (3.886 ms / 100) 3.869 -> 3.869 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.49% +0.65%] index_select strided 16 : Elapsed 0.039 ms (3.874 ms / 100) 3.863 -> 3.867 ( +0.10%) [ +0.00% +0.00% +0.05% / +0.10% +0.60% +0.49%] index_select random : Elapsed 0.039 ms (3.863 ms / 100) 3.872 -> 3.875 ( +0.08%) [ +0.10% +0.10% +0.00% / +0.08% +0.57% +0.49%] index_select random_sorted : Elapsed 0.039 ms (3.876 ms / 100) 3.891 -> 3.894 ( +0.08%) [ +0.00% +0.05% +0.05% / +0.08% +0.33% +0.39%] index_select perm : Elapsed 0.039 ms (3.891 ms / 100) 3.881 -> 3.886 ( +0.13%) [ +0.00% +0.15% +0.15% / +0.13% +0.39% +0.41%] index_select perm_sorted : Elapsed 0.039 ms (3.881 ms / 100) B = [4, 20, 16, 5] (stride (80, 320, 1, 16)) A = [4, 20, 40, 5] (stride (40, 800, 1, 160)) dim = 2 4.280 -> 4.285 ( +0.12%) [ +0.12% +0.00% +0.19% / +0.12% +0.77% +0.72%] index_select const : Elapsed 0.043 ms (4.285 ms / 100) 4.271 -> 4.273 ( +0.05%) [ +0.02% +0.00% +0.00% / +0.05% +0.61% +0.63%] index_select wrap : Elapsed 0.043 ms (4.272 ms / 100) 4.287 -> 4.280 ( -0.16%) [ +0.05% +0.02% +0.00% / -0.16% +0.63% +0.65%] index_select linear : Elapsed 0.043 ms (4.289 ms / 100) 4.267 -> 4.281 ( +0.33%) [ +0.23% +0.00% +0.02% / +0.33% +1.08% +1.12%] index_select reverse : Elapsed 0.043 ms (4.277 ms / 100) 4.261 -> 4.263 ( +0.05%) [ +0.26% +0.00% +0.23% / +0.05% +1.06% +0.70%] index_select skip64 : Elapsed 0.043 ms (4.272 ms / 100) 4.279 -> 4.285 ( +0.14%) [ +0.00% +0.02% +0.19% / +0.14% +0.77% +0.75%] index_select skip256 : Elapsed 0.043 ms (4.279 ms / 100) 4.279 -> 4.282 ( +0.07%) [ +0.00% +0.05% +0.00% / +0.07% +0.61% +0.56%] index_select spread : Elapsed 0.043 ms (4.279 ms / 100) 4.274 -> 4.274 ( +0.00%) [ +0.09% +0.00% +0.07% / +0.00% +0.73% +0.68%] index_select strided 3 : Elapsed 0.043 ms (4.278 ms / 100) 4.280 -> 4.279 ( -0.02%) [ +0.00% +0.09% +0.00% / -0.02% +0.84% +0.58%] index_select strided 5 : Elapsed 0.043 ms (4.280 ms / 100) 4.272 -> 4.276 ( +0.09%) [ +0.09% +0.00% +0.12% / +0.09% +0.63% +0.77%] index_select strided 7 : Elapsed 0.043 ms (4.276 ms / 100) 4.286 -> 4.286 ( +0.00%) [ +0.05% +0.00% +0.07% / +0.00% +0.72% +0.75%] index_select strided 8 : Elapsed 0.043 ms (4.288 ms / 100) 4.281 -> 4.283 ( +0.05%) [ +0.00% +0.21% +0.12% / +0.05% +0.79% +0.82%] index_select strided 16 : Elapsed 0.043 ms (4.281 ms / 100) 4.288 -> 4.290 ( +0.05%) [ +0.02% +0.00% +0.07% / +0.05% +0.70% +0.68%] index_select random : Elapsed 0.043 ms (4.289 ms / 100) 4.266 -> 4.268 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.80% +0.75%] index_select random_sorted : Elapsed 0.043 ms (4.268 ms / 100) 4.265 -> 4.258 ( -0.16%) [ +0.16% +0.05% +0.00% / -0.16% +0.77% +0.82%] index_select perm : Elapsed 0.043 ms (4.272 ms / 100) 4.284 -> 4.287 ( +0.07%) [ +0.02% +0.07% +0.00% / +0.07% +0.70% +0.72%] index_select perm_sorted : Elapsed 0.043 ms (4.285 ms / 100) B = [4, 20, 16, 5] (stride (1, 20, 400, 4)) A = [4, 20, 40, 5] (stride (40, 800, 1, 160)) dim = 2 4.276 -> 4.276 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.35% +0.61%] index_select const : Elapsed 0.043 ms (4.277 ms / 100) 4.275 -> 4.278 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.49% +0.40%] index_select wrap : Elapsed 0.043 ms (4.275 ms / 100) 4.273 -> 4.274 ( +0.02%) [ +0.16% +0.12% +0.00% / +0.02% +0.66% +0.61%] index_select linear : Elapsed 0.043 ms (4.280 ms / 100) 4.279 -> 4.279 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.00% +0.61% +0.56%] index_select reverse : Elapsed 0.043 ms (4.280 ms / 100) 4.270 -> 4.271 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.56% +0.54%] index_select skip64 : Elapsed 0.043 ms (4.272 ms / 100) 4.272 -> 4.274 ( +0.05%) [ +0.00% +0.12% +0.07% / +0.05% +0.56% +0.63%] index_select skip256 : Elapsed 0.043 ms (4.272 ms / 100) 4.269 -> 4.272 ( +0.07%) [ +0.00% +0.07% +0.09% / +0.07% +0.42% +0.35%] index_select spread : Elapsed 0.043 ms (4.269 ms / 100) 4.278 -> 4.285 ( +0.16%) [ +0.00% +0.21% +0.07% / +0.16% +0.42% +0.65%] index_select strided 3 : Elapsed 0.043 ms (4.278 ms / 100) 4.272 -> 4.271 ( -0.02%) [ +0.05% +0.00% +0.05% / -0.02% +0.44% +0.44%] index_select strided 5 : Elapsed 0.043 ms (4.274 ms / 100) 4.280 -> 4.285 ( +0.12%) [ +0.00% +0.09% +0.07% / +0.12% +0.63% +0.56%] index_select strided 7 : Elapsed 0.043 ms (4.280 ms / 100) 4.294 -> 4.297 ( +0.07%) [ +0.05% +0.00% +0.07% / +0.07% +0.51% +0.47%] index_select strided 8 : Elapsed 0.043 ms (4.296 ms / 100) 4.278 -> 4.282 ( +0.09%) [ +0.02% +0.00% +0.02% / +0.09% +0.37% +0.40%] index_select strided 16 : Elapsed 0.043 ms (4.279 ms / 100) 4.301 -> 4.303 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.33% +0.30%] index_select random : Elapsed 0.043 ms (4.303 ms / 100) 4.267 -> 4.272 ( +0.12%) [ +0.02% +0.12% +0.00% / +0.12% +0.52% +0.49%] index_select random_sorted : Elapsed 0.043 ms (4.268 ms / 100) 4.289 -> 4.289 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.54% +0.54%] index_select perm : Elapsed 0.043 ms (4.290 ms / 100) 4.265 -> 4.267 ( +0.05%) [ +0.00% +0.07% +0.12% / +0.05% +0.52% +0.45%] index_select perm_sorted : Elapsed 0.043 ms (4.265 ms / 100) B = [4, 20, 16, 5] (stride (1, 4, 80, 1280)) A = [4, 20, 40, 5] (stride (800, 40, 1, 3200)) dim = 2 3.931 -> 3.931 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.79% +0.69%] index_select const : Elapsed 0.039 ms (3.934 ms / 100) 3.930 -> 3.936 ( +0.15%) [ +0.13% +0.05% +0.00% / +0.15% +0.71% +0.69%] index_select wrap : Elapsed 0.039 ms (3.935 ms / 100) 3.946 -> 3.949 ( +0.08%) [ +0.28% +0.00% +0.05% / +0.08% +0.76% +0.43%] index_select linear : Elapsed 0.040 ms (3.957 ms / 100) 3.934 -> 3.937 ( +0.08%) [ +0.00% +0.18% +0.18% / +0.08% +0.81% +0.86%] index_select reverse : Elapsed 0.039 ms (3.934 ms / 100) 3.917 -> 3.923 ( +0.15%) [ +0.13% +0.00% +0.23% / +0.15% +0.94% +0.94%] index_select skip64 : Elapsed 0.039 ms (3.922 ms / 100) 3.929 -> 3.930 ( +0.03%) [ +0.08% +0.05% +0.00% / +0.03% +0.79% +0.66%] index_select skip256 : Elapsed 0.039 ms (3.932 ms / 100) 3.920 -> 3.923 ( +0.08%) [ +0.15% +0.10% +0.00% / +0.08% +0.74% +0.71%] index_select spread : Elapsed 0.039 ms (3.926 ms / 100) 3.920 -> 3.925 ( +0.13%) [ +0.10% +0.00% +0.03% / +0.13% +0.87% +0.84%] index_select strided 3 : Elapsed 0.039 ms (3.924 ms / 100) 3.926 -> 3.931 ( +0.13%) [ +0.00% +0.10% +0.13% / +0.13% +0.61% +0.66%] index_select strided 5 : Elapsed 0.039 ms (3.926 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.15% +0.13% +0.00% / +0.05% +0.64% +0.46%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.931 -> 3.931 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.61% +0.64%] index_select strided 8 : Elapsed 0.039 ms (3.934 ms / 100) 3.917 -> 3.924 ( +0.18%) [ +0.13% +0.00% +0.03% / +0.18% +0.69% +0.69%] index_select strided 16 : Elapsed 0.039 ms (3.922 ms / 100) 3.941 -> 3.943 ( +0.05%) [ +0.05% +0.13% +0.00% / +0.05% +0.81% +0.74%] index_select random : Elapsed 0.039 ms (3.943 ms / 100) 3.928 -> 3.932 ( +0.10%) [ +0.05% +0.00% +0.03% / +0.10% +0.51% +0.64%] index_select random_sorted : Elapsed 0.039 ms (3.930 ms / 100) 3.931 -> 3.939 ( +0.20%) [ +0.03% +0.00% +0.03% / +0.20% +0.69% +0.81%] index_select perm : Elapsed 0.039 ms (3.932 ms / 100) 3.933 -> 3.934 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.69% +0.64%] index_select perm_sorted : Elapsed 0.039 ms (3.935 ms / 100) out_shape = [4, 20, 40, 16] in_shape = [4, 20, 40, 5] idx_dim = 3 B = [4, 20, 40, 16] (stride (12800, 640, 16, 1)) A = [4, 20, 40, 5] (stride (20, 1, 80, 3200)) dim = 3 2.110 -> 2.112 ( +0.09%) [ +0.05% +0.14% +0.00% / +0.09% +0.85% +0.85%] index_add_ linear : Elapsed 0.021 ms (2.111 ms / 100) 2.059 -> 2.064 ( +0.24%) [ +0.00% +0.10% +0.24% / +0.24% +0.73% +0.73%] index_copy_ linear : Elapsed 0.021 ms (2.059 ms / 100) 2.115 -> 2.116 ( +0.05%) [ +0.00% +0.14% +0.00% / +0.05% +0.66% +0.57%] index_add_ reverse : Elapsed 0.021 ms (2.115 ms / 100) 2.064 -> 2.067 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.68% +1.07%] index_copy_ reverse : Elapsed 0.021 ms (2.067 ms / 100) 2.145 -> 2.147 ( +0.09%) [ +0.14% +0.00% +0.19% / +0.09% +0.65% +0.47%] index_add_ spread : Elapsed 0.021 ms (2.148 ms / 100) 2.125 -> 2.130 ( +0.24%) [ +0.09% +0.05% +0.00% / +0.24% +0.38% +0.66%] index_copy_ spread : Elapsed 0.021 ms (2.127 ms / 100) 2.143 -> 2.149 ( +0.28%) [ +0.23% +0.00% +0.05% / +0.28% +0.79% +0.84%] index_add_ strided 3 : Elapsed 0.021 ms (2.148 ms / 100) 2.124 -> 2.129 ( +0.24%) [ +0.05% +0.24% +0.00% / +0.24% +0.71% +0.56%] index_copy_ strided 3 : Elapsed 0.021 ms (2.125 ms / 100) 2.146 -> 2.144 ( -0.09%) [ +0.05% +0.09% +0.00% / -0.09% +0.65% +0.65%] index_add_ strided 5 : Elapsed 0.021 ms (2.147 ms / 100) 2.127 -> 2.125 ( -0.09%) [ +0.05% +0.00% +0.19% / -0.09% +0.56% +0.71%] index_copy_ strided 5 : Elapsed 0.021 ms (2.128 ms / 100) 2.139 -> 2.141 ( +0.09%) [ +0.14% +0.19% +0.00% / +0.09% +0.94% +0.79%] index_add_ strided 7 : Elapsed 0.021 ms (2.142 ms / 100) 2.120 -> 2.124 ( +0.19%) [ +0.09% +0.24% +0.00% / +0.19% +0.66% +0.75%] index_copy_ strided 7 : Elapsed 0.021 ms (2.122 ms / 100) 2.145 -> 2.147 ( +0.09%) [ +0.00% +0.14% +0.09% / +0.09% +0.65% +0.51%] index_add_ perm : Elapsed 0.021 ms (2.145 ms / 100) 2.126 -> 2.129 ( +0.14%) [ +0.00% +0.00% +0.05% / +0.14% +0.66% +0.56%] index_copy_ perm : Elapsed 0.021 ms (2.126 ms / 100) 2.141 -> 2.144 ( +0.14%) [ +0.28% +0.37% +0.00% / +0.14% +0.93% +0.79%] index_add_ perm_sorted : Elapsed 0.021 ms (2.147 ms / 100) 2.125 -> 2.126 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.05% +0.56% +0.66%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.125 ms / 100) 4.287 -> 4.291 ( +0.09%) [ +0.00% +0.14% +0.05% / +0.09% +0.61% +0.56%] index_select const : Elapsed 0.043 ms (4.287 ms / 100) 4.289 -> 4.295 ( +0.14%) [ +0.02% +0.00% +0.05% / +0.14% +0.70% +0.70%] index_select wrap : Elapsed 0.043 ms (4.290 ms / 100) 4.335 -> 4.343 ( +0.18%) [ +0.05% +0.00% +0.21% / +0.18% +0.55% +0.65%] index_select linear : Elapsed 0.043 ms (4.337 ms / 100) 4.314 -> 4.316 ( +0.05%) [ +0.00% +0.09% +0.07% / +0.05% +0.88% +0.76%] index_select reverse : Elapsed 0.043 ms (4.314 ms / 100) 4.285 -> 4.288 ( +0.07%) [ +0.00% +0.05% +0.09% / +0.07% +0.63% +0.70%] index_select skip64 : Elapsed 0.043 ms (4.285 ms / 100) 4.285 -> 4.287 ( +0.05%) [ +0.12% +0.05% +0.00% / +0.05% +0.68% +0.63%] index_select skip256 : Elapsed 0.043 ms (4.290 ms / 100) 4.297 -> 4.306 ( +0.21%) [ +0.16% +0.12% +0.00% / +0.21% +0.72% +0.77%] index_select spread : Elapsed 0.043 ms (4.304 ms / 100) 4.299 -> 4.298 ( -0.02%) [ +0.14% +0.00% +0.16% / -0.02% +0.74% +0.67%] index_select strided 3 : Elapsed 0.043 ms (4.305 ms / 100) 4.287 -> 4.286 ( -0.02%) [ +0.12% +0.00% +0.05% / -0.02% +0.77% +0.82%] index_select random : Elapsed 0.043 ms (4.292 ms / 100) 4.282 -> 4.288 ( +0.14%) [ +0.19% +0.19% +0.00% / +0.14% +0.86% +0.79%] index_select random_sorted : Elapsed 0.043 ms (4.290 ms / 100) B = [4, 20, 40, 16] (stride (12800, 16, 320, 1)) A = [4, 20, 40, 5] (stride (200, 800, 5, 1)) dim = 3 2.388 -> 2.390 ( +0.08%) [ +0.38% +0.17% +0.00% / +0.50% +0.25% +0.08%] index_add_ linear : Elapsed 0.024 ms (2.397 ms / 100) 2.353 -> 2.354 ( +0.04%) [ +0.30% +0.30% +0.00% / +0.76% +0.04% +0.21%] index_copy_ linear : Elapsed 0.024 ms (2.360 ms / 100) 2.382 -> 2.390 ( +0.34%) [ +0.25% +0.21% +0.00% / +0.50% +0.34% +0.63%] index_add_ reverse : Elapsed 0.024 ms (2.388 ms / 100) 2.346 -> 2.352 ( +0.26%) [ +0.26% +0.47% +0.00% / +0.26% +0.47% +0.51%] index_copy_ reverse : Elapsed 0.024 ms (2.352 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.21% +0.08% +0.00% / +0.12% +0.33% +0.08%] index_add_ spread : Elapsed 0.024 ms (2.418 ms / 100) 2.418 -> 2.417 ( -0.04%) [ +0.00% +0.12% +0.00% / -0.04% -0.04% +0.08%] index_copy_ spread : Elapsed 0.024 ms (2.418 ms / 100) 2.416 -> 2.419 ( +0.12%) [ +0.29% +0.37% +0.00% / +0.17% +0.29% +0.12%] index_add_ strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.420 -> 2.420 ( +0.00%) [ +0.37% +0.21% +0.00% / +0.74% +0.04% +0.00%] index_copy_ strided 3 : Elapsed 0.024 ms (2.429 ms / 100) 2.421 -> 2.424 ( +0.12%) [ +0.00% +0.00% +0.04% / +0.12% +0.29% +0.29%] index_add_ strided 5 : Elapsed 0.024 ms (2.421 ms / 100) 2.421 -> 2.421 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.37% +0.29%] index_copy_ strided 5 : Elapsed 0.024 ms (2.422 ms / 100) 2.422 -> 2.418 ( -0.17%) [ +0.25% +0.12% +0.00% / -0.08% +0.04% -0.17%] index_add_ strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.418 -> 2.416 ( -0.08%) [ +0.29% +0.37% +0.00% / +0.04% +0.33% -0.08%] index_copy_ strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.416 -> 2.411 ( -0.21%) [ +0.00% +0.25% +0.08% / +0.29% +0.25% -0.21%] index_add_ perm : Elapsed 0.024 ms (2.416 ms / 100) 2.420 -> 2.416 ( -0.17%) [ +0.00% +0.04% +0.12% / +0.21% -0.04% -0.17%] index_copy_ perm : Elapsed 0.024 ms (2.420 ms / 100) 2.415 -> 2.414 ( -0.04%) [ +0.41% +0.00% +0.12% / +0.08% -0.04% +0.00%] index_add_ perm_sorted : Elapsed 0.024 ms (2.425 ms / 100) 2.414 -> 2.408 ( -0.25%) [ +0.29% +0.00% +0.04% / +0.08% +0.08% -0.25%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.421 ms / 100) 5.058 -> 5.069 ( +0.22%) [ +0.00% +0.12% +0.24% / +0.22% +0.61% +0.69%] index_select const : Elapsed 0.051 ms (5.058 ms / 100) 5.059 -> 5.068 ( +0.18%) [ +0.32% +0.18% +0.00% / +0.18% +0.32% +0.57%] index_select wrap : Elapsed 0.051 ms (5.075 ms / 100) 5.064 -> 5.071 ( +0.14%) [ +0.00% +0.22% +0.02% / +0.14% +0.53% +0.49%] index_select linear : Elapsed 0.051 ms (5.064 ms / 100) 5.062 -> 5.073 ( +0.22%) [ +0.28% +0.18% +0.00% / +0.22% +0.38% +0.53%] index_select reverse : Elapsed 0.051 ms (5.076 ms / 100) 5.084 -> 5.083 ( -0.02%) [ +0.10% +0.14% +0.00% / -0.02% +0.22% +0.55%] index_select skip64 : Elapsed 0.051 ms (5.089 ms / 100) 5.090 -> 5.086 ( -0.08%) [ +0.10% +0.10% +0.00% / -0.08% +0.28% +0.31%] index_select skip256 : Elapsed 0.051 ms (5.095 ms / 100) 5.062 -> 5.063 ( +0.02%) [ +0.12% +0.10% +0.00% / +0.02% +0.51% +0.55%] index_select spread : Elapsed 0.051 ms (5.068 ms / 100) 5.058 -> 5.068 ( +0.20%) [ +0.00% +0.28% +0.00% / +0.20% +0.45% +0.59%] index_select strided 3 : Elapsed 0.051 ms (5.058 ms / 100) 5.056 -> 5.062 ( +0.12%) [ +0.20% +0.20% +0.00% / +0.12% +0.69% +0.59%] index_select random : Elapsed 0.051 ms (5.066 ms / 100) 5.082 -> 5.078 ( -0.08%) [ +0.02% +0.00% +0.24% / -0.08% +0.83% +0.61%] index_select random_sorted : Elapsed 0.051 ms (5.083 ms / 100) B = [4, 20, 40, 16] (stride (320, 1, 1280, 20)) dim = 3 fill_cnt = 5 1.110 -> 1.111 ( +0.09%) [ +0.45% +0.27% +0.00% / +0.09% +0.72% +0.63%] index_fill_ const : Elapsed 0.011 ms (1.115 ms / 100) 1.132 -> 1.131 ( -0.09%) [ +0.00% +0.53% +0.18% / -0.09% +0.27% +0.44%] index_fill_ linear : Elapsed 0.011 ms (1.132 ms / 100) 1.137 -> 1.134 ( -0.26%) [ +0.18% +0.26% +0.00% / -0.26% +0.09% +0.18%] index_fill_ reverse : Elapsed 0.011 ms (1.139 ms / 100) 1.111 -> 1.112 ( +0.09%) [ +0.27% +0.00% +0.09% / +0.09% +0.45% +0.63%] index_fill_ skip64 : Elapsed 0.011 ms (1.114 ms / 100) 1.112 -> 1.111 ( -0.09%) [ +0.00% +0.09% +0.00% / -0.09% +0.45% +0.45%] index_fill_ skip256 : Elapsed 0.011 ms (1.112 ms / 100) 1.171 -> 1.175 ( +0.34%) [ +0.17% +0.00% +0.00% / +0.34% +0.34% +0.34%] index_fill_ spread : Elapsed 0.012 ms (1.173 ms / 100) 1.171 -> 1.172 ( +0.09%) [ +0.43% +0.34% +0.00% / +0.17% +0.43% +0.09%] index_fill_ strided 3 : Elapsed 0.012 ms (1.176 ms / 100) 1.147 -> 1.146 ( -0.09%) [ +0.00% +0.09% +0.00% / -0.09% +0.09% +0.61%] index_fill_ strided 5 : Elapsed 0.011 ms (1.147 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.00% +0.52% +0.17% / +0.09% +0.61% +0.44%] index_fill_ strided 7 : Elapsed 0.011 ms (1.148 ms / 100) 1.117 -> 1.117 ( +0.00%) [ +0.00% +0.00% +0.27% / +0.00% +0.63% +0.81%] index_fill_ strided 8 : Elapsed 0.011 ms (1.117 ms / 100) 1.147 -> 1.148 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.87% +1.39%] index_fill_ random : Elapsed 0.011 ms (1.147 ms / 100) 1.147 -> 1.147 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.87% +0.78%] index_fill_ random_sorted : Elapsed 0.011 ms (1.147 ms / 100) 1.152 -> 1.151 ( -0.09%) [ +0.26% +0.00% +0.09% / -0.09% +0.26% +0.52%] index_fill_ perm : Elapsed 0.012 ms (1.155 ms / 100) 1.151 -> 1.148 ( -0.26%) [ +0.17% +0.00% +0.00% / -0.26% +0.26% +0.43%] index_fill_ perm_sorted : Elapsed 0.012 ms (1.153 ms / 100) B = [4, 20, 40, 16] (stride (800, 1, 20, 3200)) A = [4, 20, 40, 5] (stride (40, 160, 1, 3200)) dim = 3 2.339 -> 2.344 ( +0.21%) [ +0.00% +0.26% +0.13% / +0.21% +0.60% +0.60%] index_add_ linear : Elapsed 0.023 ms (2.339 ms / 100) 2.279 -> 2.286 ( +0.31%) [ +0.26% +0.00% +0.00% / +0.31% +1.10% +1.36%] index_copy_ linear : Elapsed 0.023 ms (2.285 ms / 100) 2.334 -> 2.331 ( -0.13%) [ +0.13% +0.04% +0.00% / -0.13% +1.20% +1.07%] index_add_ reverse : Elapsed 0.023 ms (2.337 ms / 100) 2.280 -> 2.278 ( -0.09%) [ +0.09% +0.00% +0.26% / -0.09% +1.40% +1.67%] index_copy_ reverse : Elapsed 0.023 ms (2.282 ms / 100) 2.340 -> 2.353 ( +0.56%) [ +0.51% +0.00% +0.47% / +0.56% +0.94% +1.07%] index_add_ spread : Elapsed 0.024 ms (2.352 ms / 100) 2.293 -> 2.296 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +1.05% +1.13%] index_copy_ spread : Elapsed 0.023 ms (2.293 ms / 100) 2.347 -> 2.346 ( -0.04%) [ +0.00% +0.04% +0.17% / -0.04% +0.94% +0.89%] index_add_ strided 3 : Elapsed 0.023 ms (2.347 ms / 100) 2.291 -> 2.296 ( +0.22%) [ +0.22% +0.00% +0.26% / +0.22% +1.48% +1.35%] index_copy_ strided 3 : Elapsed 0.023 ms (2.296 ms / 100) 2.339 -> 2.346 ( +0.30%) [ +0.00% +0.04% +0.04% / +0.30% +1.03% +1.37%] index_add_ strided 5 : Elapsed 0.023 ms (2.339 ms / 100) 2.285 -> 2.284 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +1.05% +1.18%] index_copy_ strided 5 : Elapsed 0.023 ms (2.285 ms / 100) 2.331 -> 2.332 ( +0.04%) [ +0.43% +0.00% +0.17% / +0.04% +0.82% +1.16%] index_add_ strided 7 : Elapsed 0.023 ms (2.341 ms / 100) 2.277 -> 2.280 ( +0.13%) [ +0.00% +0.00% +0.09% / +0.13% +1.45% +1.19%] index_copy_ strided 7 : Elapsed 0.023 ms (2.277 ms / 100) 2.342 -> 2.346 ( +0.17%) [ +0.00% +0.34% +0.30% / +0.17% +0.60% +0.51%] index_add_ perm : Elapsed 0.023 ms (2.342 ms / 100) 2.292 -> 2.296 ( +0.17%) [ +0.13% +0.00% +0.22% / +0.17% +1.13% +1.09%] index_copy_ perm : Elapsed 0.023 ms (2.295 ms / 100) 2.347 -> 2.350 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.38% +0.51%] index_add_ perm_sorted : Elapsed 0.024 ms (2.350 ms / 100) 2.289 -> 2.295 ( +0.26%) [ +0.13% +0.00% +0.26% / +0.26% +0.66% +1.31%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.292 ms / 100) 4.791 -> 4.800 ( +0.19%) [ +0.52% +0.00% +0.56% / +0.19% +0.73% +0.90%] index_select const : Elapsed 0.048 ms (4.816 ms / 100) 4.839 -> 4.848 ( +0.19%) [ +0.21% +0.00% +0.27% / +0.19% +0.89% +1.07%] index_select wrap : Elapsed 0.048 ms (4.849 ms / 100) 4.880 -> 4.882 ( +0.04%) [ +0.08% +0.14% +0.00% / +0.04% +1.27% +0.86%] index_select linear : Elapsed 0.049 ms (4.884 ms / 100) 4.884 -> 4.911 ( +0.55%) [ +0.72% +0.00% +0.61% / +0.55% +0.92% +0.80%] index_select reverse : Elapsed 0.049 ms (4.919 ms / 100) 4.796 -> 4.817 ( +0.44%) [ +0.00% +0.35% +0.33% / +0.44% +1.06% +0.71%] index_select skip64 : Elapsed 0.048 ms (4.796 ms / 100) 4.800 -> 4.811 ( +0.23%) [ +0.00% +0.19% +0.35% / +0.23% +0.60% +1.19%] index_select skip256 : Elapsed 0.048 ms (4.800 ms / 100) 4.864 -> 4.888 ( +0.49%) [ +0.00% +0.14% +0.29% / +0.49% +1.09% +1.44%] index_select spread : Elapsed 0.049 ms (4.864 ms / 100) 4.866 -> 4.869 ( +0.06%) [ +0.00% +0.23% +0.29% / +0.06% +0.92% +1.25%] index_select strided 3 : Elapsed 0.049 ms (4.866 ms / 100) 4.872 -> 4.872 ( +0.00%) [ +0.00% +0.12% +0.25% / +0.00% +1.52% +1.42%] index_select random : Elapsed 0.049 ms (4.872 ms / 100) 4.855 -> 4.870 ( +0.31%) [ +0.00% +0.16% +0.08% / +0.31% +1.40% +1.01%] index_select random_sorted : Elapsed 0.049 ms (4.855 ms / 100) out_shape = [16, 40, 5, 20] in_shape = [4, 40, 5, 20] idx_dim = 0 B = [16, 40, 5, 20] (stride (4000, 20, 800, 1)) A = [4, 40, 5, 20] (stride (100, 400, 20, 1)) dim = 0 2.386 -> 2.388 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.25% +0.29%] index_add_ linear : Elapsed 0.024 ms (2.387 ms / 100) 2.331 -> 2.332 ( +0.04%) [ +0.00% +0.00% +0.09% / +0.04% +0.47% +0.26%] index_copy_ linear : Elapsed 0.023 ms (2.331 ms / 100) 2.388 -> 2.385 ( -0.13%) [ +0.00% +0.00% +0.13% / -0.13% +0.42% +0.29%] index_add_ reverse : Elapsed 0.024 ms (2.388 ms / 100) 2.332 -> 2.331 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.34% +0.26%] index_copy_ reverse : Elapsed 0.023 ms (2.332 ms / 100) 2.378 -> 2.378 ( +0.00%) [ +0.21% +0.00% +0.17% / +0.00% +0.63% +0.46%] index_add_ spread : Elapsed 0.024 ms (2.383 ms / 100) 2.329 -> 2.332 ( +0.13%) [ +0.13% +0.04% +0.00% / +0.13% +1.07% +0.47%] index_copy_ spread : Elapsed 0.023 ms (2.332 ms / 100) 2.380 -> 2.379 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.50% +0.38%] index_add_ strided 3 : Elapsed 0.024 ms (2.382 ms / 100) 2.332 -> 2.335 ( +0.13%) [ +0.17% +0.00% +0.13% / +0.13% +0.47% +0.26%] index_copy_ strided 3 : Elapsed 0.023 ms (2.336 ms / 100) 2.381 -> 2.382 ( +0.04%) [ +0.13% +0.00% +0.17% / +0.04% +0.29% +0.29%] index_add_ strided 5 : Elapsed 0.024 ms (2.384 ms / 100) 2.334 -> 2.335 ( +0.04%) [ +0.21% +0.00% +0.30% / +0.04% +0.21% +0.21%] index_copy_ strided 5 : Elapsed 0.023 ms (2.339 ms / 100) 2.384 -> 2.385 ( +0.04%) [ +0.13% +0.08% +0.00% / +0.04% +0.34% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.387 ms / 100) 2.334 -> 2.336 ( +0.09%) [ +0.00% +0.04% +0.00% / +0.09% +0.09% +0.09%] index_copy_ strided 7 : Elapsed 0.023 ms (2.334 ms / 100) 2.381 -> 2.384 ( +0.13%) [ +0.00% +0.25% +0.21% / +0.29% +0.13% +0.13%] index_add_ perm : Elapsed 0.024 ms (2.381 ms / 100) 2.330 -> 2.329 ( -0.04%) [ +0.13% +0.00% +0.21% / -0.04% +0.21% +0.17%] index_copy_ perm : Elapsed 0.023 ms (2.333 ms / 100) 2.382 -> 2.375 ( -0.29%) [ +0.00% +0.08% +0.08% / -0.29% +0.17% +0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.382 ms / 100) 2.328 -> 2.332 ( +0.17%) [ +0.13% +0.00% +0.43% / +0.17% +0.39% +0.47%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.331 ms / 100) 4.786 -> 4.798 ( +0.25%) [ +0.23% +0.10% +0.00% / +0.25% +0.40% +0.29%] index_select const : Elapsed 0.048 ms (4.797 ms / 100) 4.856 -> 4.852 ( -0.08%) [ +0.00% +0.04% +0.04% / -0.08% +0.39% +0.29%] index_select wrap : Elapsed 0.049 ms (4.856 ms / 100) 4.844 -> 4.855 ( +0.23%) [ +0.21% +0.00% +0.08% / +0.23% +0.50% +0.68%] index_select linear : Elapsed 0.049 ms (4.854 ms / 100) 4.853 -> 4.854 ( +0.02%) [ +0.16% +0.23% +0.00% / +0.02% +0.52% +0.43%] index_select reverse : Elapsed 0.049 ms (4.861 ms / 100) 4.791 -> 4.798 ( +0.15%) [ +0.38% +0.35% +0.00% / +0.15% +0.50% +0.56%] index_select skip64 : Elapsed 0.048 ms (4.809 ms / 100) 4.794 -> 4.798 ( +0.08%) [ +0.02% +0.00% +0.10% / +0.08% +0.10% +0.17%] index_select skip256 : Elapsed 0.048 ms (4.795 ms / 100) 4.854 -> 4.851 ( -0.06%) [ +0.02% +0.00% +0.08% / -0.06% +0.23% +0.25%] index_select spread : Elapsed 0.049 ms (4.855 ms / 100) 4.851 -> 4.854 ( +0.06%) [ +0.08% +0.00% +0.02% / +0.06% +0.39% +0.25%] index_select strided 3 : Elapsed 0.049 ms (4.855 ms / 100) 4.853 -> 4.856 ( +0.06%) [ +0.02% +0.00% +0.06% / +0.06% +0.19% +0.21%] index_select random : Elapsed 0.049 ms (4.854 ms / 100) 4.833 -> 4.835 ( +0.04%) [ +0.10% +0.00% +0.02% / +0.04% +0.29% +0.19%] index_select random_sorted : Elapsed 0.048 ms (4.838 ms / 100) B = [16, 40, 5, 20] (stride (1, 1600, 320, 16)) A = [4, 40, 5, 20] (stride (4000, 5, 1, 200)) dim = 0 2.200 -> 2.200 ( +0.00%) [ +0.14% +0.23% +0.00% / +0.00% +0.27% +0.09%] index_add_ linear : Elapsed 0.022 ms (2.203 ms / 100) 2.183 -> 2.185 ( +0.09%) [ +0.00% +0.09% +0.05% / +0.09% +0.23% +0.23%] index_copy_ linear : Elapsed 0.022 ms (2.183 ms / 100) 2.203 -> 2.198 ( -0.23%) [ +0.00% +0.05% +0.45% / +0.09% -0.09% -0.23%] index_add_ reverse : Elapsed 0.022 ms (2.203 ms / 100) 2.187 -> 2.187 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.14% +0.09%] index_copy_ reverse : Elapsed 0.022 ms (2.187 ms / 100) 2.253 -> 2.251 ( -0.09%) [ +0.00% +0.04% +0.18% / -0.04% -0.09% +0.09%] index_add_ spread : Elapsed 0.023 ms (2.253 ms / 100) 2.283 -> 2.286 ( +0.13%) [ +0.00% +0.04% +0.04% / +0.13% +0.35% +0.44%] index_copy_ spread : Elapsed 0.023 ms (2.283 ms / 100) 2.246 -> 2.249 ( +0.13%) [ +0.00% +0.00% +0.09% / +0.13% +0.27% +0.27%] index_add_ strided 3 : Elapsed 0.022 ms (2.246 ms / 100) 2.285 -> 2.284 ( -0.04%) [ +0.13% +0.00% +0.13% / -0.04% +0.09% +0.13%] index_copy_ strided 3 : Elapsed 0.023 ms (2.288 ms / 100) 2.247 -> 2.247 ( +0.00%) [ +0.00% +0.04% +0.13% / +0.00% +0.13% +0.27%] index_add_ strided 5 : Elapsed 0.022 ms (2.247 ms / 100) 2.285 -> 2.282 ( -0.13%) [ +0.00% +0.04% +0.44% / -0.13% +0.26% +0.22%] index_copy_ strided 5 : Elapsed 0.023 ms (2.285 ms / 100) 2.248 -> 2.253 ( +0.22%) [ +0.00% +0.13% +0.49% / +0.22% +0.49% +0.40%] index_add_ strided 7 : Elapsed 0.022 ms (2.248 ms / 100) 2.284 -> 2.285 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.26% +0.44%] index_copy_ strided 7 : Elapsed 0.023 ms (2.284 ms / 100) 2.245 -> 2.248 ( +0.13%) [ +0.18% +0.04% +0.00% / +0.13% +0.22% +0.40%] index_add_ perm : Elapsed 0.022 ms (2.249 ms / 100) 2.280 -> 2.282 ( +0.09%) [ +0.04% +0.00% +0.13% / +0.09% +0.31% +0.44%] index_copy_ perm : Elapsed 0.023 ms (2.281 ms / 100) 2.245 -> 2.247 ( +0.09%) [ +0.13% +0.04% +0.00% / +0.22% +0.09% +0.31%] index_add_ perm_sorted : Elapsed 0.022 ms (2.248 ms / 100) 2.289 -> 2.288 ( -0.04%) [ +0.00% +0.00% +0.26% / -0.04% +0.04% +0.09%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.289 ms / 100) 4.464 -> 4.467 ( +0.07%) [ +0.00% +0.11% +0.00% / +0.07% +0.54% +0.34%] index_select const : Elapsed 0.045 ms (4.464 ms / 100) 4.521 -> 4.521 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +0.35% +0.07%] index_select wrap : Elapsed 0.045 ms (4.525 ms / 100) 4.527 -> 4.524 ( -0.07%) [ +0.00% +0.04% +0.04% / -0.07% +0.20% +0.15%] index_select linear : Elapsed 0.045 ms (4.527 ms / 100) 4.509 -> 4.514 ( +0.11%) [ +0.09% +0.09% +0.00% / +0.11% +0.64% +0.44%] index_select reverse : Elapsed 0.045 ms (4.513 ms / 100) 4.495 -> 4.503 ( +0.18%) [ +0.22% +0.16% +0.00% / +0.18% +0.22% +0.22%] index_select skip64 : Elapsed 0.045 ms (4.505 ms / 100) 4.493 -> 4.496 ( +0.07%) [ +0.20% +0.00% +0.09% / +0.07% +0.40% +0.47%] index_select skip256 : Elapsed 0.045 ms (4.502 ms / 100) 4.501 -> 4.504 ( +0.07%) [ +0.04% +0.00% +0.04% / +0.07% +0.31% +0.33%] index_select spread : Elapsed 0.045 ms (4.503 ms / 100) 4.533 -> 4.533 ( +0.00%) [ +0.04% +0.00% +0.09% / +0.00% +0.24% +0.31%] index_select strided 3 : Elapsed 0.045 ms (4.535 ms / 100) 4.477 -> 4.480 ( +0.07%) [ +0.13% +0.00% +0.02% / +0.07% +0.34% +0.38%] index_select random : Elapsed 0.045 ms (4.483 ms / 100) 4.509 -> 4.508 ( -0.02%) [ +0.11% +0.11% +0.00% / -0.02% +0.35% +0.33%] index_select random_sorted : Elapsed 0.045 ms (4.514 ms / 100) B = [16, 40, 5, 20] (stride (800, 20, 12800, 1)) A = [4, 40, 5, 20] (stride (5, 400, 1, 20)) dim = 0 2.527 -> 2.527 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.55% +0.36%] index_add_ linear : Elapsed 0.025 ms (2.527 ms / 100) 2.476 -> 2.475 ( -0.04%) [ +0.04% +0.08% +0.00% / -0.04% +0.61% +0.69%] index_copy_ linear : Elapsed 0.025 ms (2.477 ms / 100) 2.528 -> 2.530 ( +0.08%) [ +0.24% +0.20% +0.00% / +0.08% +0.63% +0.55%] index_add_ reverse : Elapsed 0.025 ms (2.534 ms / 100) 2.475 -> 2.479 ( +0.16%) [ +0.20% +0.12% +0.00% / +0.16% +0.57% +0.44%] index_copy_ reverse : Elapsed 0.025 ms (2.480 ms / 100) 2.528 -> 2.535 ( +0.28%) [ +0.00% +0.16% +0.28% / +0.28% +0.63% +0.51%] index_add_ spread : Elapsed 0.025 ms (2.528 ms / 100) 2.480 -> 2.477 ( -0.12%) [ +0.00% +0.08% +0.00% / -0.12% +0.44% +0.28%] index_copy_ spread : Elapsed 0.025 ms (2.480 ms / 100) 2.527 -> 2.532 ( +0.20%) [ +0.24% +0.12% +0.00% / +0.20% +0.63% +0.59%] index_add_ strided 3 : Elapsed 0.025 ms (2.533 ms / 100) 2.478 -> 2.478 ( +0.00%) [ +0.52% +0.00% +0.00% / +0.00% +0.36% +0.73%] index_copy_ strided 3 : Elapsed 0.025 ms (2.491 ms / 100) 2.527 -> 2.531 ( +0.16%) [ +0.32% +0.00% +0.08% / +0.16% +0.20% +0.40%] index_add_ strided 5 : Elapsed 0.025 ms (2.535 ms / 100) 2.475 -> 2.481 ( +0.24%) [ +0.20% +0.00% +0.32% / +0.24% +0.53% +0.44%] index_copy_ strided 5 : Elapsed 0.025 ms (2.480 ms / 100) 2.529 -> 2.532 ( +0.12%) [ +0.36% +0.00% +0.04% / +0.12% +0.40% +0.36%] index_add_ strided 7 : Elapsed 0.025 ms (2.538 ms / 100) 2.476 -> 2.479 ( +0.12%) [ +0.20% +0.00% +0.12% / +0.12% +0.48% +2.75%] index_copy_ strided 7 : Elapsed 0.025 ms (2.481 ms / 100) 2.534 -> 2.530 ( -0.16%) [ +0.12% +0.12% +0.00% / -0.16% +0.16% +0.20%] index_add_ perm : Elapsed 0.025 ms (2.537 ms / 100) 2.478 -> 2.478 ( +0.00%) [ +0.16% +0.24% +0.00% / +0.00% +0.40% +0.12%] index_copy_ perm : Elapsed 0.025 ms (2.482 ms / 100) 2.530 -> 2.530 ( +0.00%) [ +0.12% +0.16% +0.00% / +0.00% +0.20% +0.16%] index_add_ perm_sorted : Elapsed 0.025 ms (2.533 ms / 100) 2.477 -> 2.481 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.24% +0.28%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.479 ms / 100) 5.560 -> 5.553 ( -0.13%) [ +0.07% +0.05% +0.00% / -0.13% +0.74% +0.54%] index_select const : Elapsed 0.056 ms (5.564 ms / 100) 5.592 -> 5.610 ( +0.32%) [ +0.00% +0.13% +0.07% / +0.32% +0.63% +0.34%] index_select wrap : Elapsed 0.056 ms (5.592 ms / 100) 5.605 -> 5.603 ( -0.04%) [ +0.29% +0.00% +0.07% / -0.04% +0.64% +0.50%] index_select linear : Elapsed 0.056 ms (5.621 ms / 100) 5.589 -> 5.587 ( -0.04%) [ +0.00% +0.13% +0.14% / -0.04% +0.34% +0.61%] index_select reverse : Elapsed 0.056 ms (5.589 ms / 100) 5.569 -> 5.564 ( -0.09%) [ +0.00% +0.14% +0.02% / -0.09% +0.43% +0.47%] index_select skip64 : Elapsed 0.056 ms (5.569 ms / 100) 5.594 -> 5.593 ( -0.02%) [ +0.16% +0.04% +0.00% / -0.02% +0.20% +0.30%] index_select skip256 : Elapsed 0.056 ms (5.603 ms / 100) 5.595 -> 5.595 ( +0.00%) [ +0.07% +0.29% +0.00% / +0.00% +0.48% +0.36%] index_select spread : Elapsed 0.056 ms (5.599 ms / 100) 5.589 -> 5.595 ( +0.11%) [ +0.30% +0.18% +0.00% / +0.11% +0.43% +0.48%] index_select strided 3 : Elapsed 0.056 ms (5.606 ms / 100) 5.592 -> 5.591 ( -0.02%) [ +0.25% +0.00% +0.16% / -0.02% +0.55% +0.46%] index_select random : Elapsed 0.056 ms (5.606 ms / 100) 5.579 -> 5.587 ( +0.14%) [ +0.09% +0.00% +0.00% / +0.14% +0.34% +0.34%] index_select random_sorted : Elapsed 0.056 ms (5.584 ms / 100) B = [16, 40, 5, 20] (stride (800, 20, 12800, 1)) A = [4, 40, 5, 20] (stride (1, 80, 3200, 4)) dim = 0 2.439 -> 2.438 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.33% +0.45%] index_add_ linear : Elapsed 0.024 ms (2.439 ms / 100) 2.392 -> 2.392 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.21% +0.50%] index_copy_ linear : Elapsed 0.024 ms (2.394 ms / 100) 2.438 -> 2.441 ( +0.12%) [ +0.21% +0.08% +0.00% / +0.16% +0.12% +0.29%] index_add_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.390 -> 2.392 ( +0.08%) [ +0.17% +0.04% +0.00% / +0.08% +0.13% +0.25%] index_copy_ reverse : Elapsed 0.024 ms (2.394 ms / 100) 2.445 -> 2.442 ( -0.12%) [ +0.00% +0.08% +0.04% / -0.12% +0.08% +0.16%] index_add_ spread : Elapsed 0.024 ms (2.445 ms / 100) 2.393 -> 2.395 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.13% +0.17%] index_copy_ spread : Elapsed 0.024 ms (2.395 ms / 100) 2.443 -> 2.445 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.25% +0.20%] index_add_ strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.397 -> 2.400 ( +0.13%) [ +0.04% +0.00% +0.13% / +0.13% +0.13% +0.25%] index_copy_ strided 3 : Elapsed 0.024 ms (2.398 ms / 100) 2.441 -> 2.440 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.08% +0.16%] index_add_ strided 5 : Elapsed 0.024 ms (2.441 ms / 100) 2.397 -> 2.398 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.17% +0.17%] index_copy_ strided 5 : Elapsed 0.024 ms (2.397 ms / 100) 2.442 -> 2.445 ( +0.12%) [ +0.04% +0.00% +0.00% / +0.12% +0.37% +0.20%] index_add_ strided 7 : Elapsed 0.024 ms (2.443 ms / 100) 2.395 -> 2.398 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.21% +0.13%] index_copy_ strided 7 : Elapsed 0.024 ms (2.398 ms / 100) 2.441 -> 2.444 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.33% +0.29%] index_add_ perm : Elapsed 0.024 ms (2.441 ms / 100) 2.392 -> 2.393 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.17% +0.17%] index_copy_ perm : Elapsed 0.024 ms (2.394 ms / 100) 2.441 -> 2.440 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% +0.25% +0.12%] index_add_ perm_sorted : Elapsed 0.024 ms (2.441 ms / 100) 2.399 -> 2.398 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.17% -0.04%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.401 ms / 100) 5.300 -> 5.301 ( +0.02%) [ +0.06% +0.02% +0.00% / +0.02% +0.51% +0.34%] index_select const : Elapsed 0.053 ms (5.303 ms / 100) 5.304 -> 5.303 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.25% +0.21%] index_select wrap : Elapsed 0.053 ms (5.304 ms / 100) 5.305 -> 5.309 ( +0.08%) [ +0.09% +0.00% +0.13% / +0.15% +0.08% +0.47%] index_select linear : Elapsed 0.053 ms (5.310 ms / 100) 5.321 -> 5.332 ( +0.21%) [ +0.00% +0.19% +0.23% / +0.21% +0.68% +0.49%] index_select reverse : Elapsed 0.053 ms (5.321 ms / 100) 5.313 -> 5.315 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.09% +0.32% +0.04%] index_select skip64 : Elapsed 0.053 ms (5.315 ms / 100) 5.300 -> 5.312 ( +0.23%) [ +0.08% +0.17% +0.00% / +0.23% +0.42% +0.62%] index_select skip256 : Elapsed 0.053 ms (5.304 ms / 100) 5.303 -> 5.307 ( +0.08%) [ +0.04% +0.06% +0.00% / +0.08% +0.30% +0.36%] index_select spread : Elapsed 0.053 ms (5.305 ms / 100) 5.322 -> 5.318 ( -0.08%) [ +0.15% +0.04% +0.00% / -0.08% +0.51% +0.28%] index_select strided 3 : Elapsed 0.053 ms (5.330 ms / 100) 5.307 -> 5.305 ( -0.04%) [ +0.00% +0.00% +0.24% / -0.04% +0.32% +0.30%] index_select random : Elapsed 0.053 ms (5.307 ms / 100) 5.321 -> 5.328 ( +0.13%) [ +0.00% +0.00% +0.08% / +0.13% +0.58% +0.51%] index_select random_sorted : Elapsed 0.053 ms (5.321 ms / 100) B = [16, 40, 5, 20] (stride (40, 1, 12800, 640)) A = [4, 40, 5, 20] (stride (1, 4, 160, 800)) dim = 0 2.501 -> 2.505 ( +0.16%) [ +0.16% +0.12% +0.00% / +0.16% +0.36% +0.24%] index_add_ linear : Elapsed 0.025 ms (2.505 ms / 100) 2.453 -> 2.456 ( +0.12%) [ +0.00% +0.12% +0.16% / +0.12% +0.53% +0.49%] index_copy_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.499 -> 2.505 ( +0.24%) [ +0.00% +0.00% +0.04% / +0.24% +0.24% +0.44%] index_add_ reverse : Elapsed 0.025 ms (2.499 ms / 100) 2.451 -> 2.452 ( +0.04%) [ +0.00% +0.08% +0.24% / +0.04% +0.37% +0.41%] index_copy_ reverse : Elapsed 0.025 ms (2.451 ms / 100) 2.495 -> 2.496 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.56% +0.28%] index_add_ spread : Elapsed 0.025 ms (2.496 ms / 100) 2.447 -> 2.451 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.57% +0.53%] index_copy_ spread : Elapsed 0.024 ms (2.449 ms / 100) 2.495 -> 2.496 ( +0.04%) [ +0.00% +0.12% +0.04% / +0.04% +0.40% +0.32%] index_add_ strided 3 : Elapsed 0.025 ms (2.495 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.04% +0.41% +0.33%] index_copy_ strided 3 : Elapsed 0.024 ms (2.448 ms / 100) 2.495 -> 2.499 ( +0.16%) [ +0.00% +0.12% +0.12% / +0.16% +0.48% +0.48%] index_add_ strided 5 : Elapsed 0.025 ms (2.495 ms / 100) 2.451 -> 2.453 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.33% +0.29%] index_copy_ strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.498 -> 2.497 ( -0.04%) [ +0.08% +0.00% +0.04% / -0.04% +0.36% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.500 ms / 100) 2.447 -> 2.449 ( +0.08%) [ +0.25% +0.00% +0.04% / +0.08% +0.49% +0.61%] index_copy_ strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.495 -> 2.492 ( -0.12%) [ +0.08% +0.08% +0.00% / -0.12% +0.32% +0.28%] index_add_ perm : Elapsed 0.025 ms (2.497 ms / 100) 2.445 -> 2.445 ( +0.00%) [ +0.00% +0.25% +0.00% / +0.00% +0.33% +0.29%] index_copy_ perm : Elapsed 0.024 ms (2.445 ms / 100) 2.496 -> 2.500 ( +0.16%) [ +0.12% +0.08% +0.00% / +0.16% +0.44% +0.28%] index_add_ perm_sorted : Elapsed 0.025 ms (2.499 ms / 100) 2.448 -> 2.453 ( +0.20%) [ +0.08% +0.20% +0.00% / +0.20% +0.49% +0.41%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.450 ms / 100) 5.569 -> 5.566 ( -0.05%) [ +0.00% +0.16% +0.34% / -0.05% +0.45% +0.61%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.560 -> 5.567 ( +0.13%) [ +0.07% +0.13% +0.00% / +0.13% +0.45% +0.49%] index_select wrap : Elapsed 0.056 ms (5.564 ms / 100) 5.565 -> 5.567 ( +0.04%) [ +0.14% +0.00% +0.11% / +0.04% +0.40% +0.50%] index_select linear : Elapsed 0.056 ms (5.573 ms / 100) 5.566 -> 5.571 ( +0.09%) [ +0.13% +0.00% +0.05% / +0.09% +0.50% +0.52%] index_select reverse : Elapsed 0.056 ms (5.573 ms / 100) 5.568 -> 5.571 ( +0.05%) [ +0.07% +0.02% +0.00% / +0.05% +0.43% +0.38%] index_select skip64 : Elapsed 0.056 ms (5.572 ms / 100) 5.567 -> 5.569 ( +0.04%) [ +0.00% +0.13% +0.05% / +0.04% +0.31% +0.41%] index_select skip256 : Elapsed 0.056 ms (5.567 ms / 100) 5.558 -> 5.569 ( +0.20%) [ +0.00% +0.20% +0.11% / +0.20% +0.49% +0.40%] index_select spread : Elapsed 0.056 ms (5.558 ms / 100) 5.563 -> 5.563 ( +0.00%) [ +0.16% +0.02% +0.00% / +0.00% +0.47% +0.52%] index_select strided 3 : Elapsed 0.056 ms (5.572 ms / 100) 5.565 -> 5.565 ( +0.00%) [ +0.02% +0.14% +0.00% / +0.00% +0.45% +0.59%] index_select random : Elapsed 0.056 ms (5.566 ms / 100) 5.570 -> 5.576 ( +0.11%) [ +0.16% +0.00% +0.02% / +0.11% +0.29% +0.65%] index_select random_sorted : Elapsed 0.056 ms (5.579 ms / 100) B = [16, 40, 5, 20] (stride (200, 5, 1, 3200)) A = [4, 40, 5, 20] (stride (1, 4, 3200, 160)) dim = 0 1.148 -> 1.146 ( -0.17%) [ +0.35% +0.09% +0.00% / -0.17% +0.52% +0.26%] index_add_ linear : Elapsed 0.012 ms (1.152 ms / 100) 1.139 -> 1.142 ( +0.26%) [ +0.18% +0.00% +0.70% / +0.26% +1.32% +0.97%] index_copy_ linear : Elapsed 0.011 ms (1.141 ms / 100) 1.143 -> 1.145 ( +0.17%) [ +0.26% +0.09% +0.00% / +0.17% +2.01% +1.66%] index_add_ reverse : Elapsed 0.011 ms (1.146 ms / 100) 1.137 -> 1.143 ( +0.53%) [ +0.00% +0.26% +0.44% / +0.53% +1.85% +1.41%] index_copy_ reverse : Elapsed 0.011 ms (1.137 ms / 100) 1.147 -> 1.149 ( +0.17%) [ +0.00% +0.17% +0.52% / +0.17% +0.35% +0.44%] index_add_ spread : Elapsed 0.011 ms (1.147 ms / 100) 1.143 -> 1.147 ( +0.35%) [ +0.35% +0.00% +0.26% / +0.35% +0.96% +1.14%] index_copy_ spread : Elapsed 0.011 ms (1.147 ms / 100) 1.149 -> 1.152 ( +0.26%) [ +0.35% +0.00% +0.17% / +0.26% +0.52% +0.87%] index_add_ strided 3 : Elapsed 0.012 ms (1.153 ms / 100) 1.143 -> 1.145 ( +0.17%) [ +0.00% +0.17% +0.26% / +0.17% +0.52% +0.61%] index_copy_ strided 3 : Elapsed 0.011 ms (1.143 ms / 100) 1.168 -> 1.153 ( -1.28%) [ +0.34% +0.34% +0.00% / +0.09% -1.28% -1.20%] index_add_ strided 5 : Elapsed 0.012 ms (1.172 ms / 100) 1.155 -> 1.146 ( -0.78%) [ +0.00% +0.52% +0.26% / +0.17% -0.69% -0.78%] index_copy_ strided 5 : Elapsed 0.012 ms (1.155 ms / 100) 1.161 -> 1.160 ( -0.09%) [ +0.00% +0.26% +0.17% / -0.09% +0.26% +0.26%] index_add_ strided 7 : Elapsed 0.012 ms (1.161 ms / 100) 1.160 -> 1.149 ( -0.95%) [ +0.00% +0.17% +0.17% / +0.00% -0.95% -0.52%] index_copy_ strided 7 : Elapsed 0.012 ms (1.160 ms / 100) 1.156 -> 1.153 ( -0.26%) [ +0.09% +0.35% +0.00% / +0.17% +0.00% -0.26%] index_add_ perm : Elapsed 0.012 ms (1.157 ms / 100) 1.151 -> 1.147 ( -0.35%) [ +0.00% +0.52% +0.52% / -0.35% -0.26% +0.09%] index_copy_ perm : Elapsed 0.012 ms (1.151 ms / 100) 1.163 -> 1.155 ( -0.69%) [ +0.00% +0.09% +0.09% / +0.17% -0.52% -0.69%] index_add_ perm_sorted : Elapsed 0.012 ms (1.163 ms / 100) 1.155 -> 1.150 ( -0.43%) [ +0.09% +0.26% +0.00% / +0.17% -0.43% -0.26%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.156 ms / 100) 2.156 -> 2.156 ( +0.00%) [ +0.14% +0.09% +0.00% / +0.00% +0.97% +1.02%] index_select const : Elapsed 0.022 ms (2.159 ms / 100) 2.159 -> 2.160 ( +0.05%) [ +0.23% +0.00% +0.00% / +0.05% +0.74% +0.65%] index_select wrap : Elapsed 0.022 ms (2.164 ms / 100) 2.158 -> 2.161 ( +0.14%) [ +0.00% +0.00% +0.19% / +0.14% +0.74% +0.74%] index_select linear : Elapsed 0.022 ms (2.158 ms / 100) 2.162 -> 2.162 ( +0.00%) [ +0.14% +0.00% +0.09% / +0.00% +0.32% +0.51%] index_select reverse : Elapsed 0.022 ms (2.165 ms / 100) 2.161 -> 2.161 ( +0.00%) [ +0.19% +0.00% +0.14% / +0.00% +0.42% +0.56%] index_select skip64 : Elapsed 0.022 ms (2.165 ms / 100) 2.157 -> 2.165 ( +0.37%) [ +0.05% +0.00% +0.09% / +0.37% +1.02% +0.88%] index_select skip256 : Elapsed 0.022 ms (2.158 ms / 100) 2.160 -> 2.162 ( +0.09%) [ +0.19% +0.00% +0.00% / +0.09% +0.74% +0.74%] index_select spread : Elapsed 0.022 ms (2.164 ms / 100) 2.156 -> 2.157 ( +0.05%) [ +0.23% +0.00% +0.05% / +0.05% +0.93% +0.97%] index_select strided 3 : Elapsed 0.022 ms (2.161 ms / 100) 2.158 -> 2.165 ( +0.32%) [ +0.19% +0.19% +0.00% / +0.32% +0.65% +0.70%] index_select random : Elapsed 0.022 ms (2.162 ms / 100) 2.158 -> 2.162 ( +0.19%) [ +0.42% +0.00% +0.19% / +0.19% +0.88% +0.60%] index_select random_sorted : Elapsed 0.022 ms (2.167 ms / 100) B = [16, 40, 5, 20] (stride (200, 1, 40, 3200)) A = [4, 40, 5, 20] (stride (800, 20, 3200, 1)) dim = 0 2.489 -> 2.490 ( +0.04%) [ +0.16% +0.00% +0.00% / +0.04% +0.48% +0.36%] index_add_ linear : Elapsed 0.025 ms (2.493 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.16% +0.12% +0.00% / -0.08% +0.37% +0.41%] index_copy_ linear : Elapsed 0.025 ms (2.451 ms / 100) 2.491 -> 2.495 ( +0.16%) [ +0.20% +0.24% +0.00% / +0.16% +0.48% +0.48%] index_add_ reverse : Elapsed 0.025 ms (2.496 ms / 100) 2.448 -> 2.453 ( +0.20%) [ +0.00% +0.12% +0.08% / +0.20% +0.45% +0.49%] index_copy_ reverse : Elapsed 0.024 ms (2.448 ms / 100) 2.484 -> 2.489 ( +0.20%) [ +0.20% +0.00% +0.08% / +0.20% +0.56% +0.52%] index_add_ spread : Elapsed 0.025 ms (2.489 ms / 100) 2.441 -> 2.449 ( +0.33%) [ +0.37% +0.00% +0.20% / +0.33% +0.57% +0.45%] index_copy_ spread : Elapsed 0.024 ms (2.450 ms / 100) 2.496 -> 2.499 ( +0.12%) [ +0.04% +0.00% +0.00% / +0.12% +0.56% +0.56%] index_add_ strided 3 : Elapsed 0.025 ms (2.497 ms / 100) 2.450 -> 2.457 ( +0.29%) [ +0.00% +0.20% +0.08% / +0.29% +0.49% +0.65%] index_copy_ strided 3 : Elapsed 0.024 ms (2.450 ms / 100) 2.502 -> 2.498 ( -0.16%) [ +0.16% +0.00% +0.00% / -0.16% +0.16% -0.16%] index_add_ strided 5 : Elapsed 0.025 ms (2.506 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.12% +0.16% +0.00% / +0.00% +0.33% +0.00%] index_copy_ strided 5 : Elapsed 0.025 ms (2.458 ms / 100) 2.493 -> 2.496 ( +0.12%) [ +0.00% +0.00% +0.08% / +0.12% +0.12% +0.12%] index_add_ strided 7 : Elapsed 0.025 ms (2.493 ms / 100) 2.447 -> 2.452 ( +0.20%) [ +0.12% +0.00% +0.20% / +0.25% +0.69% +0.20%] index_copy_ strided 7 : Elapsed 0.024 ms (2.450 ms / 100) 2.501 -> 2.501 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.12% +0.00% +0.04%] index_add_ perm : Elapsed 0.025 ms (2.503 ms / 100) 2.454 -> 2.458 ( +0.16%) [ +0.08% +0.00% +0.12% / +0.45% +0.16% +0.24%] index_copy_ perm : Elapsed 0.025 ms (2.456 ms / 100) 2.493 -> 2.496 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.16% +0.20% +0.12%] index_add_ perm_sorted : Elapsed 0.025 ms (2.495 ms / 100) 2.448 -> 2.452 ( +0.16%) [ +0.12% +0.00% +0.04% / +0.33% +0.29% +0.16%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.451 ms / 100) 5.272 -> 5.250 ( -0.42%) [ +0.00% +0.00% +0.00% / -0.42% -0.02% +0.44%] index_select const : Elapsed 0.053 ms (5.272 ms / 100) 5.316 -> 5.334 ( +0.34%) [ +0.11% +0.08% +0.00% / +0.41% +0.45% +0.34%] index_select wrap : Elapsed 0.053 ms (5.322 ms / 100) 5.311 -> 5.323 ( +0.23%) [ +0.00% +0.08% +0.08% / +0.23% +0.43% +0.58%] index_select linear : Elapsed 0.053 ms (5.311 ms / 100) 5.314 -> 5.323 ( +0.17%) [ +0.13% +0.00% +0.04% / +0.17% +0.38% +0.41%] index_select reverse : Elapsed 0.053 ms (5.321 ms / 100) 5.258 -> 5.259 ( +0.02%) [ +0.29% +0.08% +0.00% / +0.02% +0.32% +0.25%] index_select skip64 : Elapsed 0.053 ms (5.273 ms / 100) 5.246 -> 5.246 ( +0.00%) [ +0.15% +0.17% +0.00% / +0.00% +0.34% +0.34%] index_select skip256 : Elapsed 0.053 ms (5.254 ms / 100) 5.316 -> 5.314 ( -0.04%) [ +0.00% +0.02% +0.00% / -0.04% +0.28% +0.28%] index_select spread : Elapsed 0.053 ms (5.316 ms / 100) 5.308 -> 5.319 ( +0.21%) [ +0.11% +0.00% +0.04% / +0.23% +0.21% +0.26%] index_select strided 3 : Elapsed 0.053 ms (5.314 ms / 100) 5.315 -> 5.325 ( +0.19%) [ +0.09% +0.21% +0.00% / +0.19% +0.21% +0.41%] index_select random : Elapsed 0.053 ms (5.320 ms / 100) 5.302 -> 5.308 ( +0.11%) [ +0.02% +0.15% +0.00% / +0.11% +0.23% +0.28%] index_select random_sorted : Elapsed 0.053 ms (5.303 ms / 100) out_shape = [4, 16, 5, 20] in_shape = [4, 40, 5, 20] idx_dim = 1 B = [4, 16, 5, 20] (stride (100, 400, 1, 5)) A = [4, 40, 5, 20] (stride (40, 1, 160, 800)) dim = 1 4.256 -> 4.260 ( +0.09%) [ +0.00% +0.12% +0.07% / +0.09% +0.61% +0.45%] index_select const : Elapsed 0.043 ms (4.256 ms / 100) 4.272 -> 4.274 ( +0.05%) [ +0.02% +0.00% +0.02% / +0.05% +0.44% +0.42%] index_select wrap : Elapsed 0.043 ms (4.273 ms / 100) 4.278 -> 4.279 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.54% +0.58%] index_select linear : Elapsed 0.043 ms (4.278 ms / 100) 4.267 -> 4.268 ( +0.02%) [ +0.23% +0.00% +0.21% / +0.02% +0.49% +0.70%] index_select reverse : Elapsed 0.043 ms (4.277 ms / 100) 4.272 -> 4.272 ( +0.00%) [ +0.02% +0.09% +0.00% / +0.00% +0.44% +0.44%] index_select skip64 : Elapsed 0.043 ms (4.273 ms / 100) 4.258 -> 4.258 ( +0.00%) [ +0.16% +0.00% +0.14% / +0.00% +0.47% +0.42%] index_select skip256 : Elapsed 0.043 ms (4.265 ms / 100) 4.259 -> 4.264 ( +0.12%) [ +0.05% +0.00% +0.05% / +0.12% +0.42% +0.45%] index_select spread : Elapsed 0.043 ms (4.261 ms / 100) 4.281 -> 4.281 ( +0.00%) [ +0.00% +0.02% +0.07% / +0.00% +0.42% +0.51%] index_select strided 3 : Elapsed 0.043 ms (4.281 ms / 100) 4.268 -> 4.275 ( +0.16%) [ +0.00% +0.14% +0.07% / +0.16% +0.54% +0.47%] index_select strided 5 : Elapsed 0.043 ms (4.268 ms / 100) 4.290 -> 4.295 ( +0.12%) [ +0.00% +0.00% +0.02% / +0.12% +0.54% +0.63%] index_select strided 7 : Elapsed 0.043 ms (4.290 ms / 100) 4.289 -> 4.292 ( +0.07%) [ +0.02% +0.00% +0.02% / +0.07% +0.35% +0.37%] index_select strided 8 : Elapsed 0.043 ms (4.290 ms / 100) 4.273 -> 4.273 ( +0.00%) [ +0.19% +0.07% +0.00% / +0.00% +0.49% +0.40%] index_select strided 16 : Elapsed 0.043 ms (4.281 ms / 100) 4.291 -> 4.292 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.28% +0.35%] index_select random : Elapsed 0.043 ms (4.292 ms / 100) 4.276 -> 4.281 ( +0.12%) [ +0.09% +0.00% +0.12% / +0.12% +0.40% +0.26%] index_select random_sorted : Elapsed 0.043 ms (4.280 ms / 100) 4.291 -> 4.291 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.54% +0.68%] index_select perm : Elapsed 0.043 ms (4.291 ms / 100) 4.270 -> 4.273 ( +0.07%) [ +0.05% +0.16% +0.00% / +0.07% +0.47% +0.54%] index_select perm_sorted : Elapsed 0.043 ms (4.272 ms / 100) B = [4, 16, 5, 20] (stride (1, 400, 4, 20)) A = [4, 40, 5, 20] (stride (200, 1, 40, 800)) dim = 1 4.288 -> 4.291 ( +0.07%) [ +0.02% +0.00% +0.00% / +0.07% +0.61% +0.68%] index_select const : Elapsed 0.043 ms (4.289 ms / 100) 4.293 -> 4.293 ( +0.00%) [ +0.00% +0.02% +0.14% / +0.00% +0.56% +0.65%] index_select wrap : Elapsed 0.043 ms (4.293 ms / 100) 4.290 -> 4.291 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.63% +0.58%] index_select linear : Elapsed 0.043 ms (4.291 ms / 100) 4.293 -> 4.293 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.58% +0.56%] index_select reverse : Elapsed 0.043 ms (4.294 ms / 100) 4.281 -> 4.286 ( +0.12%) [ +0.02% +0.02% +0.00% / +0.12% +0.77% +0.82%] index_select skip64 : Elapsed 0.043 ms (4.282 ms / 100) 4.285 -> 4.285 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.65% +0.65%] index_select skip256 : Elapsed 0.043 ms (4.286 ms / 100) 4.267 -> 4.272 ( +0.12%) [ +0.14% +0.00% +0.12% / +0.12% +0.84% +0.84%] index_select spread : Elapsed 0.043 ms (4.273 ms / 100) 4.260 -> 4.268 ( +0.19%) [ +0.00% +0.19% +0.26% / +0.19% +0.87% +0.70%] index_select strided 3 : Elapsed 0.043 ms (4.260 ms / 100) 4.277 -> 4.278 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.72% +0.77%] index_select strided 5 : Elapsed 0.043 ms (4.277 ms / 100) 4.275 -> 4.292 ( +0.40%) [ +0.42% +0.00% +0.30% / +0.40% +0.96% +1.08%] index_select strided 7 : Elapsed 0.043 ms (4.293 ms / 100) 4.297 -> 4.299 ( +0.05%) [ +0.00% +0.02% +0.00% / +0.05% +0.72% +0.72%] index_select strided 8 : Elapsed 0.043 ms (4.297 ms / 100) 4.294 -> 4.310 ( +0.37%) [ +0.16% +0.00% +0.37% / +0.37% +0.58% +0.54%] index_select strided 16 : Elapsed 0.043 ms (4.301 ms / 100) 4.302 -> 4.309 ( +0.16%) [ +0.05% +0.00% +0.00% / +0.16% +0.67% +0.63%] index_select random : Elapsed 0.043 ms (4.304 ms / 100) 4.260 -> 4.260 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.77% +0.66%] index_select random_sorted : Elapsed 0.043 ms (4.261 ms / 100) 4.273 -> 4.273 ( +0.00%) [ +0.09% +0.07% +0.00% / +0.00% +0.61% +0.70%] index_select perm : Elapsed 0.043 ms (4.277 ms / 100) 4.279 -> 4.278 ( -0.02%) [ +0.07% +0.02% +0.00% / -0.02% +0.86% +0.79%] index_select perm_sorted : Elapsed 0.043 ms (4.282 ms / 100) B = [4, 16, 5, 20] (stride (320, 20, 1280, 1)) A = [4, 40, 5, 20] (stride (200, 5, 1, 800)) dim = 1 4.168 -> 4.168 ( +0.00%) [ +0.00% +0.00% +0.17% / +0.00% +0.84% +0.72%] index_select const : Elapsed 0.042 ms (4.168 ms / 100) 4.164 -> 4.161 ( -0.07%) [ +0.00% +0.22% +0.10% / -0.07% +0.72% +0.82%] index_select wrap : Elapsed 0.042 ms (4.164 ms / 100) 4.163 -> 4.165 ( +0.05%) [ +0.10% +0.00% +0.10% / +0.05% +0.41% +0.53%] index_select linear : Elapsed 0.042 ms (4.167 ms / 100) 4.174 -> 4.185 ( +0.26%) [ +0.00% +0.00% +0.10% / +0.26% +0.55% +0.53%] index_select reverse : Elapsed 0.042 ms (4.174 ms / 100) 4.180 -> 4.194 ( +0.33%) [ +0.31% +0.31% +0.00% / +0.60% +0.81% +0.33%] index_select skip64 : Elapsed 0.042 ms (4.193 ms / 100) 4.165 -> 4.185 ( +0.48%) [ +0.10% +0.00% +0.07% / +0.48% +0.60% +0.77%] index_select skip256 : Elapsed 0.042 ms (4.169 ms / 100) 4.157 -> 4.164 ( +0.17%) [ +0.00% +0.10% +0.07% / +0.17% +0.53% +0.41%] index_select spread : Elapsed 0.042 ms (4.157 ms / 100) 4.179 -> 4.171 ( -0.19%) [ +0.00% +0.05% +0.00% / -0.19% +0.14% +0.17%] index_select strided 3 : Elapsed 0.042 ms (4.179 ms / 100) 4.175 -> 4.182 ( +0.17%) [ +0.17% +0.00% +0.07% / +0.17% +0.31% +0.34%] index_select strided 5 : Elapsed 0.042 ms (4.182 ms / 100) 4.185 -> 4.192 ( +0.17%) [ +0.07% +0.00% +0.05% / +0.17% +0.33% +0.36%] index_select strided 7 : Elapsed 0.042 ms (4.188 ms / 100) 4.183 -> 4.187 ( +0.10%) [ +0.02% +0.19% +0.00% / +0.10% +0.57% +0.62%] index_select strided 8 : Elapsed 0.042 ms (4.184 ms / 100) 4.168 -> 4.170 ( +0.05%) [ +0.00% +0.05% +0.02% / +0.05% +0.53% +0.43%] index_select strided 16 : Elapsed 0.042 ms (4.168 ms / 100) 4.167 -> 4.167 ( +0.00%) [ +0.07% +0.00% +0.22% / +0.00% +0.79% +0.79%] index_select random : Elapsed 0.042 ms (4.170 ms / 100) 4.169 -> 4.169 ( +0.00%) [ +0.05% +0.00% +0.12% / +0.00% +0.50% +0.46%] index_select random_sorted : Elapsed 0.042 ms (4.171 ms / 100) 4.188 -> 4.186 ( -0.05%) [ +0.00% +0.02% +0.02% / -0.05% +0.36% +0.36%] index_select perm : Elapsed 0.042 ms (4.188 ms / 100) 4.163 -> 4.170 ( +0.17%) [ +0.02% +0.05% +0.00% / +0.17% +0.34% +0.58%] index_select perm_sorted : Elapsed 0.042 ms (4.164 ms / 100) B = [4, 16, 5, 20] (stride (16, 1, 1280, 64)) A = [4, 40, 5, 20] (stride (100, 400, 1, 5)) dim = 1 3.540 -> 3.545 ( +0.14%) [ +0.00% +0.08% +0.11% / +0.14% +0.59% +0.65%] index_select const : Elapsed 0.035 ms (3.540 ms / 100) 3.533 -> 3.532 ( -0.03%) [ +0.06% +0.00% +0.00% / -0.03% +0.74% +0.71%] index_select wrap : Elapsed 0.035 ms (3.535 ms / 100) 3.537 -> 3.551 ( +0.40%) [ +0.08% +0.11% +0.00% / +0.40% +1.02% +0.88%] index_select linear : Elapsed 0.035 ms (3.540 ms / 100) 3.527 -> 3.543 ( +0.45%) [ +0.40% +0.00% +0.17% / +0.45% +0.99% +0.99%] index_select reverse : Elapsed 0.035 ms (3.541 ms / 100) 3.530 -> 3.537 ( +0.20%) [ +0.00% +0.00% +0.06% / +0.20% +0.74% +0.71%] index_select skip64 : Elapsed 0.035 ms (3.530 ms / 100) 3.537 -> 3.543 ( +0.17%) [ +0.00% +0.14% +0.06% / +0.17% +0.85% +0.90%] index_select skip256 : Elapsed 0.035 ms (3.537 ms / 100) 3.535 -> 3.543 ( +0.23%) [ +0.11% +0.00% +0.03% / +0.23% +0.85% +0.76%] index_select spread : Elapsed 0.035 ms (3.539 ms / 100) 3.541 -> 3.550 ( +0.25%) [ +0.14% +0.17% +0.00% / +0.25% +0.73% +0.85%] index_select strided 3 : Elapsed 0.035 ms (3.546 ms / 100) 3.544 -> 3.546 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.71% +0.71%] index_select strided 5 : Elapsed 0.035 ms (3.544 ms / 100) 3.541 -> 3.551 ( +0.28%) [ +0.06% +0.03% +0.00% / +0.28% +0.88% +0.88%] index_select strided 7 : Elapsed 0.035 ms (3.543 ms / 100) 3.543 -> 3.547 ( +0.11%) [ +0.06% +0.17% +0.00% / +0.11% +0.76% +0.76%] index_select strided 8 : Elapsed 0.035 ms (3.545 ms / 100) 3.543 -> 3.549 ( +0.17%) [ +0.08% +0.00% +0.00% / +0.17% +0.87% +0.85%] index_select strided 16 : Elapsed 0.035 ms (3.546 ms / 100) 3.532 -> 3.539 ( +0.20%) [ +0.00% +0.03% +0.08% / +0.20% +0.79% +0.79%] index_select random : Elapsed 0.035 ms (3.532 ms / 100) 3.526 -> 3.540 ( +0.40%) [ +0.14% +0.09% +0.00% / +0.40% +0.99% +0.99%] index_select random_sorted : Elapsed 0.035 ms (3.531 ms / 100) 3.544 -> 3.546 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.85% +0.87%] index_select perm : Elapsed 0.035 ms (3.545 ms / 100) 3.534 -> 3.537 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.76% +0.74%] index_select perm_sorted : Elapsed 0.035 ms (3.535 ms / 100) B = [4, 16, 5, 20] (stride (80, 1, 16, 320)) A = [4, 40, 5, 20] (stride (200, 5, 1, 800)) dim = 1 4.178 -> 4.177 ( -0.02%) [ +0.12% +0.00% +0.02% / -0.02% +0.65% +0.65%] index_select const : Elapsed 0.042 ms (4.183 ms / 100) 4.169 -> 4.177 ( +0.19%) [ +0.26% +0.12% +0.00% / +0.19% +0.84% +0.62%] index_select wrap : Elapsed 0.042 ms (4.180 ms / 100) 4.169 -> 4.170 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.48% +0.41%] index_select linear : Elapsed 0.042 ms (4.169 ms / 100) 4.178 -> 4.179 ( +0.02%) [ +0.17% +0.00% +0.10% / +0.02% +0.55% +0.62%] index_select reverse : Elapsed 0.042 ms (4.185 ms / 100) 4.185 -> 4.191 ( +0.14%) [ +0.26% +0.00% +0.24% / +0.14% +0.76% +0.29%] index_select skip64 : Elapsed 0.042 ms (4.196 ms / 100) 4.173 -> 4.183 ( +0.24%) [ +0.00% +0.24% +0.10% / +0.24% +0.72% +0.31%] index_select skip256 : Elapsed 0.042 ms (4.173 ms / 100) 4.162 -> 4.169 ( +0.17%) [ +0.10% +0.10% +0.00% / +0.17% +0.60% +0.55%] index_select spread : Elapsed 0.042 ms (4.166 ms / 100) 4.179 -> 4.185 ( +0.14%) [ +0.00% +0.05% +0.17% / +0.14% +0.50% +0.45%] index_select strided 3 : Elapsed 0.042 ms (4.179 ms / 100) 4.181 -> 4.190 ( +0.22%) [ +0.12% +0.10% +0.00% / +0.22% +0.36% +0.33%] index_select strided 5 : Elapsed 0.042 ms (4.186 ms / 100) 4.188 -> 4.193 ( +0.12%) [ +0.10% +0.00% +0.07% / +0.12% +0.36% +0.38%] index_select strided 7 : Elapsed 0.042 ms (4.192 ms / 100) 4.187 -> 4.190 ( +0.07%) [ +0.26% +0.00% +0.05% / +0.07% +0.57% +0.64%] index_select strided 8 : Elapsed 0.042 ms (4.198 ms / 100) 4.176 -> 4.179 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.48% +0.48%] index_select strided 16 : Elapsed 0.042 ms (4.176 ms / 100) 4.170 -> 4.188 ( +0.43%) [ +0.00% +0.26% +0.05% / +0.43% +0.65% +0.65%] index_select random : Elapsed 0.042 ms (4.170 ms / 100) 4.176 -> 4.188 ( +0.29%) [ +0.22% +0.00% +0.26% / +0.29% +0.53% +0.69%] index_select random_sorted : Elapsed 0.042 ms (4.185 ms / 100) 4.178 -> 4.182 ( +0.10%) [ +0.17% +0.17% +0.00% / +0.10% +0.34% +0.41%] index_select perm : Elapsed 0.042 ms (4.185 ms / 100) 4.182 -> 4.188 ( +0.14%) [ +0.05% +0.10% +0.00% / +0.14% +0.41% +0.31%] index_select perm_sorted : Elapsed 0.042 ms (4.184 ms / 100) out_shape = [4, 40, 16, 20] in_shape = [4, 40, 5, 20] idx_dim = 2 B = [4, 40, 16, 20] (stride (12800, 16, 1, 640)) A = [4, 40, 5, 20] (stride (20, 80, 3200, 1)) dim = 2 2.351 -> 2.356 ( +0.21%) [ +0.21% +0.34% +0.00% / +0.21% +1.49% +1.66%] index_add_ linear : Elapsed 0.024 ms (2.356 ms / 100) 2.320 -> 2.323 ( +0.13%) [ +0.00% +0.17% +0.13% / +0.13% +1.29% +1.38%] index_copy_ linear : Elapsed 0.023 ms (2.320 ms / 100) 2.364 -> 2.386 ( +0.93%) [ +0.17% +0.13% +0.00% / +0.93% +1.10% +1.23%] index_add_ reverse : Elapsed 0.024 ms (2.368 ms / 100) 2.330 -> 2.346 ( +0.69%) [ +0.04% +0.00% +0.09% / +0.69% +1.29% +1.37%] index_copy_ reverse : Elapsed 0.023 ms (2.331 ms / 100) 2.395 -> 2.409 ( +0.58%) [ +0.38% +0.42% +0.00% / +0.58% +1.34% +1.38%] index_add_ spread : Elapsed 0.024 ms (2.404 ms / 100) 2.401 -> 2.413 ( +0.50%) [ +0.00% +0.25% +0.00% / +0.50% +1.17% +1.46%] index_copy_ spread : Elapsed 0.024 ms (2.401 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.00% +0.29% +0.17% / +0.12% +0.83% +1.21%] index_add_ strided 3 : Elapsed 0.024 ms (2.403 ms / 100) 2.404 -> 2.403 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.96% +1.04%] index_copy_ strided 3 : Elapsed 0.024 ms (2.405 ms / 100) 2.395 -> 2.403 ( +0.33%) [ +0.00% +0.04% +0.17% / +0.33% +1.04% +1.17%] index_add_ strided 5 : Elapsed 0.024 ms (2.395 ms / 100) 2.395 -> 2.397 ( +0.08%) [ +0.00% +0.08% +0.13% / +0.08% +0.96% +1.13%] index_copy_ strided 5 : Elapsed 0.024 ms (2.395 ms / 100) 2.380 -> 2.387 ( +0.29%) [ +0.00% +0.08% +0.04% / +0.29% +1.60% +1.51%] index_add_ strided 7 : Elapsed 0.024 ms (2.380 ms / 100) 2.382 -> 2.388 ( +0.25%) [ +0.00% +0.00% +0.13% / +0.25% +1.51% +1.64%] index_copy_ strided 7 : Elapsed 0.024 ms (2.382 ms / 100) 2.402 -> 2.403 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.87% +0.96%] index_add_ perm : Elapsed 0.024 ms (2.404 ms / 100) 2.400 -> 2.410 ( +0.42%) [ +0.08% +0.13% +0.00% / +0.42% +1.13% +0.87%] index_copy_ perm : Elapsed 0.024 ms (2.402 ms / 100) 2.405 -> 2.407 ( +0.08%) [ +0.04% +0.00% +0.21% / +0.08% +0.96% +1.16%] index_add_ perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) 2.401 -> 2.403 ( +0.08%) [ +0.12% +0.21% +0.00% / +0.08% +0.96% +1.29%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.404 ms / 100) 4.900 -> 4.911 ( +0.22%) [ +0.00% +0.12% +0.02% / +0.22% +0.63% +0.61%] index_select const : Elapsed 0.049 ms (4.900 ms / 100) 4.940 -> 4.940 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.69% +0.75%] index_select wrap : Elapsed 0.049 ms (4.943 ms / 100) 4.961 -> 4.966 ( +0.10%) [ +0.00% +0.20% +0.42% / +0.10% +0.77% +0.85%] index_select linear : Elapsed 0.050 ms (4.961 ms / 100) 4.966 -> 4.979 ( +0.26%) [ +0.22% +0.14% +0.00% / +0.26% +0.85% +0.70%] index_select reverse : Elapsed 0.050 ms (4.977 ms / 100) 4.886 -> 4.893 ( +0.14%) [ +0.23% +0.00% +0.23% / +0.14% +0.76% +0.57%] index_select skip64 : Elapsed 0.049 ms (4.897 ms / 100) 4.896 -> 4.904 ( +0.16%) [ +0.02% +0.00% +0.20% / +0.16% +0.88% +0.63%] index_select skip256 : Elapsed 0.049 ms (4.897 ms / 100) 4.960 -> 4.964 ( +0.08%) [ +0.08% +0.12% +0.00% / +0.08% +0.67% +0.93%] index_select spread : Elapsed 0.050 ms (4.964 ms / 100) 4.956 -> 4.959 ( +0.06%) [ +0.00% +0.28% +0.34% / +0.06% +1.03% +1.17%] index_select strided 3 : Elapsed 0.050 ms (4.956 ms / 100) 4.952 -> 4.959 ( +0.14%) [ +0.24% +0.22% +0.00% / +0.14% +0.95% +1.17%] index_select random : Elapsed 0.050 ms (4.964 ms / 100) 4.934 -> 4.938 ( +0.08%) [ +0.02% +0.10% +0.00% / +0.08% +0.93% +1.20%] index_select random_sorted : Elapsed 0.049 ms (4.935 ms / 100) B = [4, 40, 16, 20] (stride (40, 1, 3200, 160)) A = [4, 40, 5, 20] (stride (1, 20, 4, 800)) dim = 2 2.367 -> 2.369 ( +0.08%) [ +0.17% +0.21% +0.00% / +0.08% +0.46% +0.46%] index_add_ linear : Elapsed 0.024 ms (2.371 ms / 100) 2.296 -> 2.298 ( +0.09%) [ +0.00% +0.22% +0.09% / +0.09% +0.65% +0.35%] index_copy_ linear : Elapsed 0.023 ms (2.296 ms / 100) 2.361 -> 2.372 ( +0.47%) [ +0.04% +0.25% +0.00% / +0.51% +0.47% +0.89%] index_add_ reverse : Elapsed 0.024 ms (2.362 ms / 100) 2.291 -> 2.303 ( +0.52%) [ +0.13% +0.17% +0.00% / +0.65% +0.52% +1.27%] index_copy_ reverse : Elapsed 0.023 ms (2.294 ms / 100) 2.365 -> 2.370 ( +0.21%) [ +0.00% +0.04% +0.13% / +0.21% +0.42% +0.38%] index_add_ spread : Elapsed 0.024 ms (2.365 ms / 100) 2.289 -> 2.296 ( +0.31%) [ +0.00% +0.26% +0.44% / +0.31% +0.66% +0.61%] index_copy_ spread : Elapsed 0.023 ms (2.289 ms / 100) 2.365 -> 2.368 ( +0.13%) [ +0.04% +0.00% +0.08% / +0.13% +0.59% +0.59%] index_add_ strided 3 : Elapsed 0.024 ms (2.366 ms / 100) 2.296 -> 2.301 ( +0.22%) [ +0.00% +0.13% +0.13% / +0.22% +0.57% +0.48%] index_copy_ strided 3 : Elapsed 0.023 ms (2.296 ms / 100) 2.366 -> 2.373 ( +0.30%) [ +0.00% +0.25% +0.13% / +0.30% +0.38% +0.46%] index_add_ strided 5 : Elapsed 0.024 ms (2.366 ms / 100) 2.300 -> 2.304 ( +0.17%) [ +0.00% +0.09% +0.13% / +0.26% +0.17% +0.35%] index_copy_ strided 5 : Elapsed 0.023 ms (2.300 ms / 100) 2.365 -> 2.370 ( +0.21%) [ +0.00% +0.17% +0.08% / +0.21% +0.42% +0.55%] index_add_ strided 7 : Elapsed 0.024 ms (2.365 ms / 100) 2.293 -> 2.304 ( +0.48%) [ +0.09% +0.00% +0.22% / +0.83% +0.48% +0.57%] index_copy_ strided 7 : Elapsed 0.023 ms (2.295 ms / 100) 2.372 -> 2.373 ( +0.04%) [ +0.25% +0.00% +0.00% / +0.04% +0.25% +0.42%] index_add_ perm : Elapsed 0.024 ms (2.378 ms / 100) 2.297 -> 2.305 ( +0.35%) [ +0.04% +0.00% +0.30% / +0.39% +0.35% +0.65%] index_copy_ perm : Elapsed 0.023 ms (2.298 ms / 100) 2.369 -> 2.371 ( +0.08%) [ +0.30% +0.08% +0.00% / +0.68% +0.08% +0.25%] index_add_ perm_sorted : Elapsed 0.024 ms (2.376 ms / 100) 2.298 -> 2.305 ( +0.30%) [ +0.13% +0.17% +0.00% / +1.17% +0.30% +0.39%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.301 ms / 100) 5.111 -> 5.126 ( +0.29%) [ +0.55% +0.00% +0.59% / +0.29% +0.65% +0.74%] index_select const : Elapsed 0.051 ms (5.139 ms / 100) 5.065 -> 5.079 ( +0.28%) [ +0.32% +0.22% +0.00% / +0.28% +0.93% +0.49%] index_select wrap : Elapsed 0.051 ms (5.081 ms / 100) 5.114 -> 5.135 ( +0.41%) [ +0.22% +0.00% +0.02% / +0.41% +0.57% +0.45%] index_select linear : Elapsed 0.051 ms (5.125 ms / 100) 5.112 -> 5.118 ( +0.12%) [ +0.25% +0.00% +0.10% / +0.12% +0.55% +0.74%] index_select reverse : Elapsed 0.051 ms (5.125 ms / 100) 5.116 -> 5.119 ( +0.06%) [ +0.04% +0.29% +0.00% / +0.06% +0.63% +0.82%] index_select skip64 : Elapsed 0.051 ms (5.118 ms / 100) 5.114 -> 5.134 ( +0.39%) [ +0.25% +0.14% +0.00% / +0.39% +0.72% +1.06%] index_select skip256 : Elapsed 0.051 ms (5.127 ms / 100) 5.065 -> 5.078 ( +0.26%) [ +0.24% +0.00% +0.26% / +0.26% +0.95% +0.77%] index_select spread : Elapsed 0.051 ms (5.077 ms / 100) 5.051 -> 5.077 ( +0.51%) [ +0.42% +0.00% +0.36% / +0.51% +0.71% +0.63%] index_select strided 3 : Elapsed 0.051 ms (5.072 ms / 100) 5.065 -> 5.075 ( +0.20%) [ +0.12% +0.22% +0.00% / +0.20% +0.89% +0.83%] index_select random : Elapsed 0.051 ms (5.071 ms / 100) 5.075 -> 5.075 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +0.87% +0.75%] index_select random_sorted : Elapsed 0.051 ms (5.086 ms / 100) B = [4, 40, 16, 20] (stride (1, 4, 3200, 160)) A = [4, 40, 5, 20] (stride (20, 400, 80, 1)) dim = 2 2.361 -> 2.369 ( +0.34%) [ +0.13% +0.00% +0.21% / +0.38% +0.34% +0.34%] index_add_ linear : Elapsed 0.024 ms (2.364 ms / 100) 2.315 -> 2.320 ( +0.22%) [ +0.30% +0.04% +0.00% / +0.22% +0.39% +0.43%] index_copy_ linear : Elapsed 0.023 ms (2.322 ms / 100) 2.357 -> 2.359 ( +0.08%) [ +0.30% +0.30% +0.00% / +0.25% +0.08% +0.59%] index_add_ reverse : Elapsed 0.024 ms (2.364 ms / 100) 2.309 -> 2.320 ( +0.48%) [ +0.22% +0.30% +0.00% / +0.48% +0.65% +0.91%] index_copy_ reverse : Elapsed 0.023 ms (2.314 ms / 100) 2.365 -> 2.374 ( +0.38%) [ +0.59% +0.38% +0.00% / +0.38% +0.42% +0.51%] index_add_ spread : Elapsed 0.024 ms (2.379 ms / 100) 2.323 -> 2.330 ( +0.30%) [ +0.04% +0.22% +0.00% / +0.34% +0.39% +0.30%] index_copy_ spread : Elapsed 0.023 ms (2.324 ms / 100) 2.364 -> 2.364 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.59% +0.63%] index_add_ strided 3 : Elapsed 0.024 ms (2.366 ms / 100) 2.316 -> 2.325 ( +0.39%) [ +0.13% +0.09% +0.00% / +0.39% +0.91% +0.65%] index_copy_ strided 3 : Elapsed 0.023 ms (2.319 ms / 100) 2.360 -> 2.362 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.76%] index_add_ strided 5 : Elapsed 0.024 ms (2.362 ms / 100) 2.312 -> 2.314 ( +0.09%) [ +0.00% +0.22% +0.13% / +0.09% +0.87% +0.91%] index_copy_ strided 5 : Elapsed 0.023 ms (2.312 ms / 100) 2.355 -> 2.359 ( +0.17%) [ +0.64% +0.21% +0.00% / +0.17% +0.64% +0.55%] index_add_ strided 7 : Elapsed 0.024 ms (2.370 ms / 100) 2.307 -> 2.312 ( +0.22%) [ +0.35% +0.26% +0.00% / +0.22% +0.95% +1.04%] index_copy_ strided 7 : Elapsed 0.023 ms (2.315 ms / 100) 2.354 -> 2.357 ( +0.13%) [ +0.00% +0.08% +0.25% / +0.13% +0.34% +0.47%] index_add_ perm : Elapsed 0.024 ms (2.354 ms / 100) 2.307 -> 2.307 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.82% +0.69%] index_copy_ perm : Elapsed 0.023 ms (2.307 ms / 100) 2.358 -> 2.357 ( -0.04%) [ +0.08% +0.00% +0.25% / +0.38% +0.30% -0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.360 ms / 100) 2.309 -> 2.317 ( +0.35%) [ +0.13% +0.00% +0.26% / +0.35% +0.69% +0.69%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.312 ms / 100) 4.892 -> 4.887 ( -0.10%) [ +0.16% +0.00% +0.02% / -0.10% +0.70% +0.67%] index_select const : Elapsed 0.049 ms (4.900 ms / 100) 4.915 -> 4.917 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.85% +0.63%] index_select wrap : Elapsed 0.049 ms (4.917 ms / 100) 4.933 -> 4.955 ( +0.45%) [ +0.00% +0.36% +0.00% / +0.45% +1.11% +0.69%] index_select linear : Elapsed 0.049 ms (4.933 ms / 100) 4.937 -> 4.949 ( +0.24%) [ +0.06% +0.10% +0.00% / +0.24% +1.01% +0.99%] index_select reverse : Elapsed 0.049 ms (4.940 ms / 100) 4.879 -> 4.906 ( +0.55%) [ +0.33% +0.35% +0.00% / +0.55% +0.88% +0.84%] index_select skip64 : Elapsed 0.049 ms (4.895 ms / 100) 4.897 -> 4.895 ( -0.04%) [ +0.06% +0.00% +0.04% / -0.04% +0.61% +0.47%] index_select skip256 : Elapsed 0.049 ms (4.900 ms / 100) 4.934 -> 4.936 ( +0.04%) [ +0.12% +0.24% +0.00% / +0.04% +0.93% +1.03%] index_select spread : Elapsed 0.049 ms (4.940 ms / 100) 4.936 -> 4.946 ( +0.20%) [ +0.10% +0.26% +0.00% / +0.20% +0.99% +0.93%] index_select strided 3 : Elapsed 0.049 ms (4.941 ms / 100) 4.934 -> 4.946 ( +0.24%) [ +0.10% +0.04% +0.00% / +0.24% +1.03% +1.13%] index_select random : Elapsed 0.049 ms (4.939 ms / 100) 4.925 -> 4.945 ( +0.41%) [ +0.00% +0.39% +0.10% / +0.41% +0.85% +1.04%] index_select random_sorted : Elapsed 0.049 ms (4.925 ms / 100) B = [4, 40, 16, 20] (stride (1, 4, 3200, 160)) A = [4, 40, 5, 20] (stride (1, 4, 160, 800)) dim = 2 2.467 -> 2.472 ( +0.20%) [ +0.08% +0.08% +0.00% / +0.24% +0.20% +4.86%] index_add_ linear : Elapsed 0.025 ms (2.469 ms / 100) 2.400 -> 2.405 ( +0.21%) [ +0.21% +0.00% +0.08% / +0.21% +0.46% +0.42%] index_copy_ linear : Elapsed 0.024 ms (2.405 ms / 100) 2.468 -> 2.467 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.04% +0.16%] index_add_ reverse : Elapsed 0.025 ms (2.468 ms / 100) 2.401 -> 2.405 ( +0.17%) [ +0.12% +0.04% +0.00% / +0.17% +0.25% +0.17%] index_copy_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.464 -> 2.467 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.16% +0.32% +0.12%] index_add_ spread : Elapsed 0.025 ms (2.467 ms / 100) 2.394 -> 2.399 ( +0.21%) [ +0.38% +0.33% +0.00% / +0.21% +0.50% +0.58%] index_copy_ spread : Elapsed 0.024 ms (2.403 ms / 100) 2.469 -> 2.469 ( +0.00%) [ +0.24% +0.12% +0.00% / +0.04% +0.00% +0.24%] index_add_ strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.403 -> 2.407 ( +0.17%) [ +0.25% +0.00% +0.17% / +0.17% +0.33% +0.42%] index_copy_ strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.470 -> 2.477 ( +0.28%) [ +0.16% +0.40% +0.00% / +0.28% +0.53% +0.45%] index_add_ strided 5 : Elapsed 0.025 ms (2.474 ms / 100) 2.409 -> 2.414 ( +0.21%) [ +0.00% +0.37% +0.12% / +0.21% +0.33% +0.42%] index_copy_ strided 5 : Elapsed 0.024 ms (2.409 ms / 100) 2.468 -> 2.472 ( +0.16%) [ +0.00% +0.08% +0.04% / +0.16% +0.36% +0.45%] index_add_ strided 7 : Elapsed 0.025 ms (2.468 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.21% +0.00% +0.04% / +0.17% +0.29% +0.42%] index_copy_ strided 7 : Elapsed 0.024 ms (2.410 ms / 100) 2.458 -> 2.467 ( +0.37%) [ +0.24% +0.28% +0.00% / +0.37% +0.85% +0.85%] index_add_ perm : Elapsed 0.025 ms (2.464 ms / 100) 2.399 -> 2.403 ( +0.17%) [ +0.25% +0.00% +0.04% / +0.17% +0.71% +0.63%] index_copy_ perm : Elapsed 0.024 ms (2.405 ms / 100) 2.462 -> 2.462 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.24% +0.32%] index_add_ perm_sorted : Elapsed 0.025 ms (2.463 ms / 100) 2.390 -> 2.397 ( +0.29%) [ +0.17% +0.54% +0.00% / +0.29% +0.63% +0.75%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.394 ms / 100) 5.408 -> 5.419 ( +0.20%) [ +0.11% +0.00% +0.00% / +0.20% +0.59% +0.57%] index_select const : Elapsed 0.054 ms (5.414 ms / 100) 5.363 -> 5.372 ( +0.17%) [ +0.00% +0.09% +0.02% / +0.17% +0.39% +0.37%] index_select wrap : Elapsed 0.054 ms (5.363 ms / 100) 5.384 -> 5.388 ( +0.07%) [ +0.11% +0.15% +0.00% / +0.07% +0.61% +0.54%] index_select linear : Elapsed 0.054 ms (5.390 ms / 100) 5.376 -> 5.380 ( +0.07%) [ +0.02% +0.09% +0.00% / +0.07% +0.48% +0.63%] index_select reverse : Elapsed 0.054 ms (5.377 ms / 100) 5.387 -> 5.387 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.76% +0.63%] index_select skip64 : Elapsed 0.054 ms (5.394 ms / 100) 5.403 -> 5.388 ( -0.28%) [ +0.07% +0.00% +0.00% / -0.28% +0.48% +0.24%] index_select skip256 : Elapsed 0.054 ms (5.407 ms / 100) 5.378 -> 5.384 ( +0.11%) [ +0.15% +0.09% +0.00% / +0.11% +0.60% +0.61%] index_select spread : Elapsed 0.054 ms (5.386 ms / 100) 5.358 -> 5.363 ( +0.09%) [ +0.13% +0.06% +0.00% / +0.09% +0.62% +0.71%] index_select strided 3 : Elapsed 0.054 ms (5.365 ms / 100) 5.361 -> 5.360 ( -0.02%) [ +0.04% +0.04% +0.00% / -0.02% +0.71% +0.67%] index_select random : Elapsed 0.054 ms (5.363 ms / 100) 5.393 -> 5.414 ( +0.39%) [ +0.45% +0.00% +0.04% / +0.39% +0.87% +0.85%] index_select random_sorted : Elapsed 0.054 ms (5.417 ms / 100) B = [4, 40, 16, 20] (stride (640, 1, 40, 2560)) A = [4, 40, 5, 20] (stride (4000, 20, 800, 1)) dim = 2 2.299 -> 2.300 ( +0.04%) [ +0.13% +0.13% +0.00% / +0.04% +2.35% +2.35%] index_add_ linear : Elapsed 0.023 ms (2.302 ms / 100) 2.248 -> 2.249 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +2.45% +2.54%] index_copy_ linear : Elapsed 0.022 ms (2.249 ms / 100) 2.295 -> 2.297 ( +0.09%) [ +0.17% +0.22% +0.00% / +0.09% +2.27% +2.40%] index_add_ reverse : Elapsed 0.023 ms (2.299 ms / 100) 2.243 -> 2.248 ( +0.22%) [ +0.04% +0.18% +0.00% / +0.22% +2.63% +2.72%] index_copy_ reverse : Elapsed 0.022 ms (2.244 ms / 100) 2.302 -> 2.303 ( +0.04%) [ +0.17% +0.00% +0.30% / +0.04% +2.35% +2.39%] index_add_ spread : Elapsed 0.023 ms (2.306 ms / 100) 2.250 -> 2.254 ( +0.18%) [ +0.09% +0.04% +0.00% / +0.18% +2.71% +2.53%] index_copy_ spread : Elapsed 0.023 ms (2.252 ms / 100) 2.296 -> 2.299 ( +0.13%) [ +0.00% +0.22% +0.04% / +0.13% +2.61% +2.61%] index_add_ strided 3 : Elapsed 0.023 ms (2.296 ms / 100) 2.244 -> 2.240 ( -0.18%) [ +0.00% +0.04% +0.13% / -0.18% +2.76% +2.76%] index_copy_ strided 3 : Elapsed 0.022 ms (2.244 ms / 100) 2.289 -> 2.288 ( -0.04%) [ +0.13% +0.13% +0.00% / -0.04% +2.71% +2.40%] index_add_ strided 5 : Elapsed 0.023 ms (2.292 ms / 100) 2.234 -> 2.235 ( +0.04%) [ +0.22% +0.18% +0.00% / +0.04% +3.18% +3.18%] index_copy_ strided 5 : Elapsed 0.022 ms (2.239 ms / 100) 2.291 -> 2.295 ( +0.17%) [ +0.00% +0.17% +0.13% / +0.17% +2.58% +2.62%] index_add_ strided 7 : Elapsed 0.023 ms (2.291 ms / 100) 2.237 -> 2.242 ( +0.22%) [ +0.09% +0.13% +0.00% / +0.22% +2.95% +3.04%] index_copy_ strided 7 : Elapsed 0.022 ms (2.239 ms / 100) 2.295 -> 2.297 ( +0.09%) [ +0.31% +0.26% +0.00% / +0.09% +2.48% +2.48%] index_add_ perm : Elapsed 0.023 ms (2.302 ms / 100) 2.244 -> 2.249 ( +0.22%) [ +0.18% +0.04% +0.00% / +0.22% +2.76% +2.67%] index_copy_ perm : Elapsed 0.022 ms (2.248 ms / 100) 2.305 -> 2.307 ( +0.09%) [ +0.00% +0.04% +0.09% / +0.09% +2.26% +2.43%] index_add_ perm_sorted : Elapsed 0.023 ms (2.305 ms / 100) 2.244 -> 2.249 ( +0.22%) [ +0.00% +0.36% +0.22% / +0.22% +3.12% +2.94%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.244 ms / 100) 4.755 -> 4.753 ( -0.04%) [ +0.00% +0.00% +0.02% / -0.04% +0.78% +0.80%] index_select const : Elapsed 0.048 ms (4.755 ms / 100) 4.755 -> 4.757 ( +0.04%) [ +0.15% +0.11% +0.00% / +0.04% +1.51% +1.54%] index_select wrap : Elapsed 0.048 ms (4.762 ms / 100) 4.791 -> 4.799 ( +0.17%) [ +0.04% +0.02% +0.00% / +0.17% +1.19% +1.29%] index_select linear : Elapsed 0.048 ms (4.793 ms / 100) 4.794 -> 4.793 ( -0.02%) [ +0.00% +0.15% +0.10% / -0.02% +0.94% +1.17%] index_select reverse : Elapsed 0.048 ms (4.794 ms / 100) 4.746 -> 4.754 ( +0.17%) [ +0.00% +0.00% +0.06% / +0.17% +0.70% +0.70%] index_select skip64 : Elapsed 0.047 ms (4.746 ms / 100) 4.753 -> 4.756 ( +0.06%) [ +0.00% +0.04% +0.02% / +0.06% +0.74% +0.78%] index_select skip256 : Elapsed 0.048 ms (4.753 ms / 100) 4.759 -> 4.758 ( -0.02%) [ +0.04% +0.02% +0.00% / -0.02% +1.62% +1.60%] index_select spread : Elapsed 0.048 ms (4.761 ms / 100) 4.770 -> 4.772 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +1.78% +1.72%] index_select strided 3 : Elapsed 0.048 ms (4.772 ms / 100) 4.765 -> 4.774 ( +0.19%) [ +0.00% +0.04% +0.02% / +0.19% +1.43% +1.34%] index_select random : Elapsed 0.048 ms (4.765 ms / 100) 4.752 -> 4.759 ( +0.15%) [ +0.08% +0.00% +0.04% / +0.15% +1.68% +1.58%] index_select random_sorted : Elapsed 0.048 ms (4.756 ms / 100) B = [4, 40, 16, 20] (stride (1, 64, 4, 2560)) A = [4, 40, 5, 20] (stride (100, 400, 1, 5)) dim = 2 2.385 -> 2.383 ( -0.08%) [ +0.17% +0.00% +0.04% / -0.08% +0.46% +0.38%] index_add_ linear : Elapsed 0.024 ms (2.389 ms / 100) 2.345 -> 2.353 ( +0.34%) [ +0.34% +0.26% +0.00% / +0.34% +0.47% +0.34%] index_copy_ linear : Elapsed 0.024 ms (2.353 ms / 100) 2.382 -> 2.383 ( +0.04%) [ +0.38% +0.00% +0.25% / +0.04% +0.34% +0.08%] index_add_ reverse : Elapsed 0.024 ms (2.391 ms / 100) 2.344 -> 2.348 ( +0.17%) [ +0.04% +0.00% +0.26% / +0.21% +0.17% +0.38%] index_copy_ reverse : Elapsed 0.023 ms (2.345 ms / 100) 2.386 -> 2.380 ( -0.25%) [ +0.21% +0.00% +0.25% / -0.25% +0.46% +0.04%] index_add_ spread : Elapsed 0.024 ms (2.391 ms / 100) 2.361 -> 2.361 ( +0.00%) [ +0.42% +0.00% +0.25% / +0.04% +0.25% +0.00%] index_copy_ spread : Elapsed 0.024 ms (2.371 ms / 100) 2.386 -> 2.388 ( +0.08%) [ +0.21% +0.25% +0.00% / +0.08% +0.46% +0.38%] index_add_ strided 3 : Elapsed 0.024 ms (2.391 ms / 100) 2.359 -> 2.361 ( +0.08%) [ +0.08% +0.21% +0.00% / +0.08% +0.42% +0.42%] index_copy_ strided 3 : Elapsed 0.024 ms (2.361 ms / 100) 2.379 -> 2.382 ( +0.13%) [ +0.25% +0.00% +0.29% / +0.13% +0.50% +0.38%] index_add_ strided 5 : Elapsed 0.024 ms (2.385 ms / 100) 2.345 -> 2.350 ( +0.21%) [ +0.47% +0.00% +0.47% / +0.21% +0.51% +0.38%] index_copy_ strided 5 : Elapsed 0.024 ms (2.356 ms / 100) 2.389 -> 2.385 ( -0.17%) [ +0.17% +0.17% +0.00% / -0.04% -0.17% +0.33%] index_add_ strided 7 : Elapsed 0.024 ms (2.393 ms / 100) 2.369 -> 2.362 ( -0.30%) [ +0.08% +0.00% +0.00% / -0.08% -0.30% +0.25%] index_copy_ strided 7 : Elapsed 0.024 ms (2.371 ms / 100) 2.363 -> 2.362 ( -0.04%) [ +0.17% +0.25% +0.00% / +0.08% +0.42% -0.04%] index_add_ perm : Elapsed 0.024 ms (2.367 ms / 100) 2.324 -> 2.324 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.22% +0.09% +0.00%] index_copy_ perm : Elapsed 0.023 ms (2.325 ms / 100) 2.357 -> 2.365 ( +0.34%) [ +0.13% +0.51% +0.00% / +0.38% +0.34% +0.59%] index_add_ perm_sorted : Elapsed 0.024 ms (2.360 ms / 100) 2.317 -> 2.321 ( +0.17%) [ +0.17% +0.39% +0.00% / +0.43% +0.17% +0.60%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.321 ms / 100) 5.077 -> 5.068 ( -0.18%) [ +0.10% +0.00% +0.08% / -0.18% +0.55% +0.22%] index_select const : Elapsed 0.051 ms (5.082 ms / 100) 5.073 -> 5.099 ( +0.51%) [ +0.39% +0.61% +0.00% / +0.51% +0.93% +0.81%] index_select wrap : Elapsed 0.051 ms (5.093 ms / 100) 5.076 -> 5.089 ( +0.26%) [ +0.06% +0.47% +0.00% / +0.26% +0.45% +0.47%] index_select linear : Elapsed 0.051 ms (5.079 ms / 100) 5.075 -> 5.075 ( +0.00%) [ +0.00% +0.12% +0.10% / +0.00% +0.69% +0.49%] index_select reverse : Elapsed 0.051 ms (5.075 ms / 100) 5.077 -> 5.083 ( +0.12%) [ +0.10% +0.00% +0.10% / +0.12% +0.59% +0.67%] index_select skip64 : Elapsed 0.051 ms (5.082 ms / 100) 5.075 -> 5.081 ( +0.12%) [ +0.00% +0.10% +0.10% / +0.12% +0.43% +0.43%] index_select skip256 : Elapsed 0.051 ms (5.075 ms / 100) 5.082 -> 5.092 ( +0.20%) [ +0.08% +0.16% +0.00% / +0.20% +0.57% +0.45%] index_select spread : Elapsed 0.051 ms (5.086 ms / 100) 5.072 -> 5.099 ( +0.53%) [ +0.00% +0.34% +0.34% / +0.53% +0.53% +0.97%] index_select strided 3 : Elapsed 0.051 ms (5.072 ms / 100) 5.081 -> 5.080 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.57% +0.63%] index_select random : Elapsed 0.051 ms (5.081 ms / 100) 5.083 -> 5.088 ( +0.10%) [ +0.12% +0.06% +0.00% / +0.10% +0.69% +0.85%] index_select random_sorted : Elapsed 0.051 ms (5.089 ms / 100) out_shape = [4, 40, 5, 16] in_shape = [4, 40, 5, 20] idx_dim = 3 B = [4, 40, 5, 16] (stride (3200, 16, 640, 1)) A = [4, 40, 5, 20] (stride (40, 1, 160, 800)) dim = 3 3.595 -> 3.601 ( +0.17%) [ +0.06% +0.03% +0.00% / +0.17% +0.86% +0.86%] index_select const : Elapsed 0.036 ms (3.597 ms / 100) 3.575 -> 3.579 ( +0.11%) [ +0.00% +0.03% +0.03% / +0.11% +0.73% +0.67%] index_select wrap : Elapsed 0.036 ms (3.575 ms / 100) 3.579 -> 3.582 ( +0.08%) [ +0.03% +0.08% +0.00% / +0.08% +0.87% +0.98%] index_select linear : Elapsed 0.036 ms (3.580 ms / 100) 3.571 -> 3.574 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.73% +0.70%] index_select reverse : Elapsed 0.036 ms (3.571 ms / 100) 3.580 -> 3.586 ( +0.17%) [ +0.00% +0.03% +0.00% / +0.17% +0.75% +0.75%] index_select skip64 : Elapsed 0.036 ms (3.580 ms / 100) 3.582 -> 3.583 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.84% +0.87%] index_select skip256 : Elapsed 0.036 ms (3.583 ms / 100) 3.574 -> 3.573 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.64% +0.62%] index_select spread : Elapsed 0.036 ms (3.574 ms / 100) 3.573 -> 3.575 ( +0.06%) [ +0.06% +0.08% +0.00% / +0.06% +0.70% +0.70%] index_select strided 3 : Elapsed 0.036 ms (3.575 ms / 100) 3.572 -> 3.573 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.64% +0.70%] index_select strided 5 : Elapsed 0.036 ms (3.575 ms / 100) 3.579 -> 3.579 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.75% +0.81%] index_select strided 7 : Elapsed 0.036 ms (3.580 ms / 100) 3.590 -> 3.588 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.64% +0.64%] index_select strided 8 : Elapsed 0.036 ms (3.592 ms / 100) 3.594 -> 3.595 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.75% +0.78%] index_select strided 16 : Elapsed 0.036 ms (3.595 ms / 100) 3.578 -> 3.580 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.53% +0.53%] index_select random : Elapsed 0.036 ms (3.580 ms / 100) 3.593 -> 3.593 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.56% +0.47%] index_select random_sorted : Elapsed 0.036 ms (3.594 ms / 100) 3.590 -> 3.589 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.47% +0.42%] index_select perm : Elapsed 0.036 ms (3.591 ms / 100) 3.585 -> 3.593 ( +0.22%) [ +0.11% +0.06% +0.00% / +0.22% +0.42% +0.39%] index_select perm_sorted : Elapsed 0.036 ms (3.589 ms / 100) B = [4, 40, 5, 16] (stride (3200, 5, 1, 200)) A = [4, 40, 5, 20] (stride (1, 400, 80, 4)) dim = 3 1.394 -> 1.395 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.22% +0.07%] index_select const : Elapsed 0.014 ms (1.395 ms / 100) 1.408 -> 1.412 ( +0.28%) [ +0.07% +0.00% +0.07% / +0.28% +0.43% +0.36%] index_select wrap : Elapsed 0.014 ms (1.409 ms / 100) 1.405 -> 1.408 ( +0.21%) [ +0.00% +0.36% +0.14% / +0.21% +0.36% +0.57%] index_select linear : Elapsed 0.014 ms (1.405 ms / 100) 1.405 -> 1.406 ( +0.07%) [ +0.00% +0.21% +0.07% / +0.07% +0.43% +0.50%] index_select reverse : Elapsed 0.014 ms (1.405 ms / 100) 1.390 -> 1.390 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.58%] index_select skip64 : Elapsed 0.014 ms (1.390 ms / 100) 1.390 -> 1.392 ( +0.14%) [ +0.29% +0.07% +0.00% / +0.14% +0.43% +0.36%] index_select skip256 : Elapsed 0.014 ms (1.394 ms / 100) 1.408 -> 1.409 ( +0.07%) [ +0.00% +0.07% +0.14% / +0.07% +0.28% +0.28%] index_select spread : Elapsed 0.014 ms (1.408 ms / 100) 1.404 -> 1.405 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.36% +0.21%] index_select strided 3 : Elapsed 0.014 ms (1.404 ms / 100) 1.406 -> 1.409 ( +0.21%) [ +0.14% +0.00% +0.07% / +0.21% +0.36% +0.21%] index_select strided 5 : Elapsed 0.014 ms (1.408 ms / 100) 1.389 -> 1.390 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.36% +0.43%] index_select strided 7 : Elapsed 0.014 ms (1.390 ms / 100) 1.373 -> 1.377 ( +0.29%) [ +0.00% +0.22% +0.00% / +0.29% +0.73% +0.66%] index_select strided 8 : Elapsed 0.014 ms (1.373 ms / 100) 1.400 -> 1.402 ( +0.14%) [ +0.14% +0.21% +0.00% / +0.14% +0.86% +1.36%] index_select strided 16 : Elapsed 0.014 ms (1.402 ms / 100) 1.399 -> 1.400 ( +0.07%) [ +0.14% +0.21% +0.00% / +0.07% +0.71% +0.93%] index_select random : Elapsed 0.014 ms (1.401 ms / 100) 1.403 -> 1.405 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.64% +0.64%] index_select random_sorted : Elapsed 0.014 ms (1.403 ms / 100) 1.404 -> 1.409 ( +0.36%) [ +0.00% +0.28% +0.00% / +0.36% +0.71% +0.50%] index_select perm : Elapsed 0.014 ms (1.404 ms / 100) 1.403 -> 1.409 ( +0.43%) [ +0.00% +0.29% +0.00% / +0.43% +0.86% +0.57%] index_select perm_sorted : Elapsed 0.014 ms (1.403 ms / 100) B = [4, 40, 5, 16] (stride (640, 1, 2560, 40)) A = [4, 40, 5, 20] (stride (20, 400, 80, 1)) dim = 3 3.938 -> 3.944 ( +0.15%) [ +0.08% +0.00% +0.08% / +0.15% +0.79% +0.63%] index_select const : Elapsed 0.039 ms (3.941 ms / 100) 3.939 -> 3.943 ( +0.10%) [ +0.10% +0.08% +0.00% / +0.10% +0.81% +0.74%] index_select wrap : Elapsed 0.039 ms (3.943 ms / 100) 3.934 -> 3.937 ( +0.08%) [ +0.03% +0.00% +0.00% / +0.08% +0.74% +0.74%] index_select linear : Elapsed 0.039 ms (3.935 ms / 100) 3.949 -> 3.952 ( +0.08%) [ +0.10% +0.10% +0.00% / +0.08% +0.56% +0.81%] index_select reverse : Elapsed 0.040 ms (3.953 ms / 100) 3.942 -> 3.949 ( +0.18%) [ +0.00% +0.13% +0.00% / +0.18% +0.79% +0.61%] index_select skip64 : Elapsed 0.039 ms (3.942 ms / 100) 3.944 -> 3.943 ( -0.03%) [ +0.00% +0.08% +0.05% / -0.03% +0.61% +0.61%] index_select skip256 : Elapsed 0.039 ms (3.944 ms / 100) 3.941 -> 3.945 ( +0.10%) [ +0.00% +0.05% +0.03% / +0.10% +0.56% +0.63%] index_select spread : Elapsed 0.039 ms (3.941 ms / 100) 3.946 -> 3.953 ( +0.18%) [ +0.18% +0.08% +0.00% / +0.18% +0.71% +0.81%] index_select strided 3 : Elapsed 0.040 ms (3.953 ms / 100) 3.955 -> 3.957 ( +0.05%) [ +0.00% +0.08% +0.00% / +0.05% +0.71% +0.53%] index_select strided 5 : Elapsed 0.040 ms (3.955 ms / 100) 3.945 -> 3.947 ( +0.05%) [ +0.03% +0.00% +0.05% / +0.05% +0.63% +0.56%] index_select strided 7 : Elapsed 0.039 ms (3.946 ms / 100) 3.952 -> 3.958 ( +0.15%) [ +0.00% +0.10% +0.05% / +0.15% +0.68% +0.35%] index_select strided 8 : Elapsed 0.040 ms (3.952 ms / 100) 3.953 -> 3.956 ( +0.08%) [ +0.13% +0.08% +0.00% / +0.08% +0.78% +0.68%] index_select strided 16 : Elapsed 0.040 ms (3.958 ms / 100) 3.958 -> 3.949 ( -0.23%) [ +0.05% +0.05% +0.00% / -0.23% +0.51% +0.43%] index_select random : Elapsed 0.040 ms (3.960 ms / 100) 3.944 -> 3.945 ( +0.03%) [ +0.13% +0.00% +0.08% / +0.03% +0.43% +0.38%] index_select random_sorted : Elapsed 0.039 ms (3.949 ms / 100) 3.951 -> 3.955 ( +0.10%) [ +0.03% +0.05% +0.00% / +0.10% +0.53% +0.56%] index_select perm : Elapsed 0.040 ms (3.952 ms / 100) 3.949 -> 3.947 ( -0.05%) [ +0.03% +0.00% +0.03% / -0.05% +0.63% +0.63%] index_select perm_sorted : Elapsed 0.040 ms (3.950 ms / 100) B = [4, 40, 5, 16] (stride (640, 1, 2560, 40)) A = [4, 40, 5, 20] (stride (200, 5, 1, 800)) dim = 3 3.207 -> 3.208 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.47% +0.41%] index_select const : Elapsed 0.032 ms (3.208 ms / 100) 3.202 -> 3.205 ( +0.09%) [ +0.06% +0.06% +0.00% / +0.09% +0.56% +0.53%] index_select wrap : Elapsed 0.032 ms (3.204 ms / 100) 3.214 -> 3.217 ( +0.09%) [ +0.06% +0.03% +0.00% / +0.09% +0.50% +0.31%] index_select linear : Elapsed 0.032 ms (3.216 ms / 100) 3.193 -> 3.196 ( +0.09%) [ +0.00% +0.03% +0.03% / +0.09% +0.56% +0.56%] index_select reverse : Elapsed 0.032 ms (3.193 ms / 100) 3.200 -> 3.201 ( +0.03%) [ +0.13% +0.13% +0.00% / +0.03% +0.66% +0.63%] index_select skip64 : Elapsed 0.032 ms (3.204 ms / 100) 3.210 -> 3.209 ( -0.03%) [ +0.12% +0.00% +0.12% / -0.03% +0.37% +0.53%] index_select skip256 : Elapsed 0.032 ms (3.214 ms / 100) 3.207 -> 3.209 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.50% +0.44%] index_select spread : Elapsed 0.032 ms (3.209 ms / 100) 3.200 -> 3.201 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.53% +0.56%] index_select strided 3 : Elapsed 0.032 ms (3.201 ms / 100) 3.191 -> 3.192 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.44% +0.50%] index_select strided 5 : Elapsed 0.032 ms (3.191 ms / 100) 3.206 -> 3.209 ( +0.09%) [ +0.03% +0.03% +0.00% / +0.09% +0.59% +0.59%] index_select strided 7 : Elapsed 0.032 ms (3.207 ms / 100) 3.198 -> 3.200 ( +0.06%) [ +0.00% +0.03% +0.03% / +0.06% +0.78% +0.75%] index_select strided 8 : Elapsed 0.032 ms (3.198 ms / 100) 3.189 -> 3.193 ( +0.13%) [ +0.19% +0.13% +0.00% / +0.13% +0.69% +0.82%] index_select strided 16 : Elapsed 0.032 ms (3.195 ms / 100) 3.202 -> 3.201 ( -0.03%) [ +0.00% +0.06% +0.00% / -0.03% +0.72% +0.66%] index_select random : Elapsed 0.032 ms (3.202 ms / 100) 3.204 -> 3.210 ( +0.19%) [ +0.16% +0.06% +0.00% / +0.19% +0.72% +0.87%] index_select random_sorted : Elapsed 0.032 ms (3.209 ms / 100) 3.202 -> 3.206 ( +0.12%) [ +0.09% +0.06% +0.00% / +0.12% +0.69% +0.75%] index_select perm : Elapsed 0.032 ms (3.205 ms / 100) 3.201 -> 3.203 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.66% +0.66%] index_select perm_sorted : Elapsed 0.032 ms (3.202 ms / 100) B = [4, 40, 5, 16] (stride (16, 64, 2560, 1)) A = [4, 40, 5, 20] (stride (800, 20, 3200, 1)) dim = 3 3.807 -> 3.807 ( +0.00%) [ +0.00% +0.03% +0.11% / +0.00% +0.95% +0.84%] index_select const : Elapsed 0.038 ms (3.807 ms / 100) 3.825 -> 3.827 ( +0.05%) [ +0.00% +0.03% +0.05% / +0.05% +0.89% +0.86%] index_select wrap : Elapsed 0.038 ms (3.825 ms / 100) 3.810 -> 3.813 ( +0.08%) [ +0.03% +0.00% +0.05% / +0.08% +0.79% +0.79%] index_select linear : Elapsed 0.038 ms (3.811 ms / 100) 3.810 -> 3.808 ( -0.05%) [ +0.03% +0.10% +0.00% / -0.05% +0.68% +0.76%] index_select reverse : Elapsed 0.038 ms (3.811 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.13% +0.00% +0.03% / +0.00% +0.76% +0.60%] index_select skip64 : Elapsed 0.038 ms (3.819 ms / 100) 3.809 -> 3.816 ( +0.18%) [ +0.00% +0.18% +0.13% / +0.18% +0.84% +0.79%] index_select skip256 : Elapsed 0.038 ms (3.809 ms / 100) 3.821 -> 3.823 ( +0.05%) [ +0.03% +0.00% +0.10% / +0.05% +0.81% +0.71%] index_select spread : Elapsed 0.038 ms (3.822 ms / 100) 3.810 -> 3.809 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.66% +0.52%] index_select strided 3 : Elapsed 0.038 ms (3.810 ms / 100) 3.802 -> 3.803 ( +0.03%) [ +0.11% +0.03% +0.00% / +0.03% +0.60% +0.74%] index_select strided 5 : Elapsed 0.038 ms (3.806 ms / 100) 3.798 -> 3.803 ( +0.13%) [ +0.00% +0.03% +0.05% / +0.13% +0.79% +0.68%] index_select strided 7 : Elapsed 0.038 ms (3.798 ms / 100) 3.803 -> 3.808 ( +0.13%) [ +0.05% +0.00% +0.00% / +0.13% +0.68% +0.66%] index_select strided 8 : Elapsed 0.038 ms (3.805 ms / 100) 3.812 -> 3.814 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.79% +0.73%] index_select strided 16 : Elapsed 0.038 ms (3.812 ms / 100) 3.818 -> 3.815 ( -0.08%) [ +0.16% +0.08% +0.00% / -0.08% +0.34% +0.55%] index_select random : Elapsed 0.038 ms (3.824 ms / 100) 3.819 -> 3.824 ( +0.13%) [ +0.00% +0.10% +0.16% / +0.13% +0.63% +0.65%] index_select random_sorted : Elapsed 0.038 ms (3.819 ms / 100) 3.807 -> 3.812 ( +0.13%) [ +0.11% +0.03% +0.00% / +0.13% +0.63% +0.68%] index_select perm : Elapsed 0.038 ms (3.811 ms / 100) 3.814 -> 3.811 ( -0.08%) [ +0.10% +0.00% +0.00% / -0.08% +0.55% +0.55%] index_select perm_sorted : Elapsed 0.038 ms (3.818 ms / 100) B = [4, 40, 5, 16] (stride (1, 4, 2560, 160)) A = [4, 40, 5, 20] (stride (4000, 100, 20, 1)) dim = 3 3.489 -> 3.489 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.29% +0.29%] index_select const : Elapsed 0.035 ms (3.489 ms / 100) 3.482 -> 3.485 ( +0.09%) [ +0.03% +0.03% +0.00% / +0.09% +0.49% +0.60%] index_select wrap : Elapsed 0.035 ms (3.483 ms / 100) 3.494 -> 3.498 ( +0.11%) [ +0.06% +0.03% +0.00% / +0.11% +0.46% +0.43%] index_select linear : Elapsed 0.035 ms (3.496 ms / 100) 3.399 -> 3.404 ( +0.15%) [ +0.06% +0.00% +0.06% / +0.15% +0.47% +0.47%] index_select reverse : Elapsed 0.034 ms (3.401 ms / 100) 3.399 -> 3.399 ( +0.00%) [ +0.00% +0.03% +0.06% / +0.00% +0.26% +0.21%] index_select skip64 : Elapsed 0.034 ms (3.399 ms / 100) 3.495 -> 3.502 ( +0.20%) [ +0.03% +0.00% +0.00% / +0.20% +0.46% +0.43%] index_select skip256 : Elapsed 0.035 ms (3.496 ms / 100) 3.482 -> 3.483 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.49% +0.52%] index_select spread : Elapsed 0.035 ms (3.483 ms / 100) 3.389 -> 3.392 ( +0.09%) [ +0.06% +0.00% +0.06% / +0.09% +0.53% +0.53%] index_select strided 3 : Elapsed 0.034 ms (3.391 ms / 100) 3.402 -> 3.406 ( +0.12%) [ +0.03% +0.00% +0.00% / +0.12% +0.32% +0.38%] index_select strided 5 : Elapsed 0.034 ms (3.403 ms / 100) 3.479 -> 3.482 ( +0.09%) [ +0.03% +0.00% +0.00% / +0.09% +0.75% +0.69%] index_select strided 7 : Elapsed 0.035 ms (3.480 ms / 100) 3.491 -> 3.501 ( +0.29%) [ +0.06% +0.03% +0.00% / +0.29% +0.72% +0.72%] index_select strided 8 : Elapsed 0.035 ms (3.493 ms / 100) 3.399 -> 3.402 ( +0.09%) [ +0.15% +0.09% +0.00% / +0.09% +0.50% +0.53%] index_select strided 16 : Elapsed 0.034 ms (3.404 ms / 100) 3.389 -> 3.391 ( +0.06%) [ +0.09% +0.00% +0.03% / +0.06% +0.71% +0.71%] index_select random : Elapsed 0.034 ms (3.392 ms / 100) 3.493 -> 3.493 ( +0.00%) [ +0.00% +0.03% +0.06% / +0.00% +0.66% +0.60%] index_select random_sorted : Elapsed 0.035 ms (3.493 ms / 100) 3.478 -> 3.483 ( +0.14%) [ +0.03% +0.06% +0.00% / +0.14% +0.72% +0.75%] index_select perm : Elapsed 0.035 ms (3.479 ms / 100) 3.388 -> 3.390 ( +0.06%) [ +0.00% +0.00% +0.03% / +0.06% +0.65% +0.68%] index_select perm_sorted : Elapsed 0.034 ms (3.388 ms / 100) B = [4, 40, 5, 16] (stride (200, 1, 40, 800)) A = [4, 40, 5, 20] (stride (100, 400, 20, 1)) dim = 3 4.167 -> 4.170 ( +0.07%) [ +0.05% +0.00% +0.17% / +0.07% +0.79% +0.58%] index_select const : Elapsed 0.042 ms (4.169 ms / 100) 4.169 -> 4.178 ( +0.22%) [ +0.26% +0.19% +0.00% / +0.22% +0.89% +0.89%] index_select wrap : Elapsed 0.042 ms (4.180 ms / 100) 4.170 -> 4.168 ( -0.05%) [ +0.02% +0.00% +0.07% / -0.05% +0.86% +0.65%] index_select linear : Elapsed 0.042 ms (4.171 ms / 100) 4.170 -> 4.172 ( +0.05%) [ +0.12% +0.10% +0.00% / +0.05% +0.91% +0.58%] index_select reverse : Elapsed 0.042 ms (4.175 ms / 100) 4.159 -> 4.169 ( +0.24%) [ +0.48% +0.46% +0.00% / +0.24% +1.01% +1.23%] index_select skip64 : Elapsed 0.042 ms (4.179 ms / 100) 4.158 -> 4.164 ( +0.14%) [ +0.19% +0.00% +0.14% / +0.14% +0.82% +0.67%] index_select skip256 : Elapsed 0.042 ms (4.166 ms / 100) 4.189 -> 4.188 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.67% +0.67%] index_select spread : Elapsed 0.042 ms (4.189 ms / 100) 4.186 -> 4.189 ( +0.07%) [ +0.05% +0.07% +0.00% / +0.07% +0.62% +0.55%] index_select strided 3 : Elapsed 0.042 ms (4.188 ms / 100) 4.158 -> 4.163 ( +0.12%) [ +0.17% +0.10% +0.00% / +0.12% +0.65% +0.67%] index_select strided 5 : Elapsed 0.042 ms (4.165 ms / 100) 4.176 -> 4.181 ( +0.12%) [ +0.05% +0.05% +0.00% / +0.12% +0.67% +0.57%] index_select strided 7 : Elapsed 0.042 ms (4.178 ms / 100) 4.160 -> 4.160 ( +0.00%) [ +0.14% +0.10% +0.00% / +0.00% +0.60% +0.70%] index_select strided 8 : Elapsed 0.042 ms (4.166 ms / 100) 4.169 -> 4.173 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +0.65% +0.50%] index_select strided 16 : Elapsed 0.042 ms (4.169 ms / 100) 4.175 -> 4.179 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.55% +0.53%] index_select random : Elapsed 0.042 ms (4.179 ms / 100) 4.177 -> 4.178 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.53% +0.48%] index_select random_sorted : Elapsed 0.042 ms (4.177 ms / 100) 4.193 -> 4.193 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.36% +0.41%] index_select perm : Elapsed 0.042 ms (4.197 ms / 100) 4.180 -> 4.182 ( +0.05%) [ +0.00% +0.05% +0.14% / +0.05% +0.65% +0.65%] index_select perm_sorted : Elapsed 0.042 ms (4.180 ms / 100) B = [4, 40, 5, 16] (stride (1, 4, 160, 800)) A = [4, 40, 5, 20] (stride (20, 400, 80, 1)) dim = 3 3.933 -> 3.936 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.48% +0.48%] index_select const : Elapsed 0.039 ms (3.934 ms / 100) 3.943 -> 3.948 ( +0.13%) [ +0.23% +0.10% +0.00% / +0.13% +0.66% +0.56%] index_select wrap : Elapsed 0.040 ms (3.952 ms / 100) 3.935 -> 3.938 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.58% +0.61%] index_select linear : Elapsed 0.039 ms (3.938 ms / 100) 3.939 -> 3.935 ( -0.10%) [ +0.18% +0.00% +0.05% / -0.10% +0.63% +0.51%] index_select reverse : Elapsed 0.039 ms (3.946 ms / 100) 3.933 -> 3.938 ( +0.13%) [ +0.13% +0.00% +0.03% / +0.13% +0.61% +0.61%] index_select skip64 : Elapsed 0.039 ms (3.938 ms / 100) 3.930 -> 3.939 ( +0.23%) [ +0.20% +0.00% +0.10% / +0.23% +0.71% +0.69%] index_select skip256 : Elapsed 0.039 ms (3.938 ms / 100) 3.939 -> 3.943 ( +0.10%) [ +0.00% +0.20% +0.03% / +0.10% +0.56% +0.53%] index_select spread : Elapsed 0.039 ms (3.939 ms / 100) 3.934 -> 3.936 ( +0.05%) [ +0.05% +0.08% +0.00% / +0.05% +0.53% +0.48%] index_select strided 3 : Elapsed 0.039 ms (3.936 ms / 100) 3.937 -> 3.942 ( +0.13%) [ +0.10% +0.00% +0.03% / +0.13% +0.41% +0.56%] index_select strided 5 : Elapsed 0.039 ms (3.941 ms / 100) 3.939 -> 3.943 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.63% +0.63%] index_select strided 7 : Elapsed 0.039 ms (3.943 ms / 100) 3.947 -> 3.945 ( -0.05%) [ +0.03% +0.03% +0.00% / -0.05% +0.41% +0.48%] index_select strided 8 : Elapsed 0.039 ms (3.948 ms / 100) 3.927 -> 3.931 ( +0.10%) [ +0.00% +0.18% +0.20% / +0.10% +0.89% +0.94%] index_select strided 16 : Elapsed 0.039 ms (3.927 ms / 100) 3.919 -> 3.919 ( +0.00%) [ +0.05% +0.00% +0.13% / +0.00% +0.64% +0.61%] index_select random : Elapsed 0.039 ms (3.921 ms / 100) 3.928 -> 3.932 ( +0.10%) [ +0.10% +0.03% +0.00% / +0.10% +0.76% +0.74%] index_select random_sorted : Elapsed 0.039 ms (3.932 ms / 100) 3.950 -> 3.952 ( +0.05%) [ +0.15% +0.00% +0.23% / +0.05% +0.66% +0.48%] index_select perm : Elapsed 0.040 ms (3.956 ms / 100) 3.928 -> 3.929 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.61% +0.66%] index_select perm_sorted : Elapsed 0.039 ms (3.931 ms / 100) out_shape = [16, 40, 20, 5] in_shape = [4, 40, 20, 5] idx_dim = 0 B = [16, 40, 20, 5] (stride (4000, 100, 1, 20)) A = [4, 40, 20, 5] (stride (4000, 1, 40, 800)) dim = 0 2.616 -> 2.619 ( +0.11%) [ +0.04% +0.04% +0.00% / +0.11% +0.23% +0.42%] index_add_ linear : Elapsed 0.026 ms (2.617 ms / 100) 2.557 -> 2.559 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.31% +0.12%] index_copy_ linear : Elapsed 0.026 ms (2.557 ms / 100) 2.616 -> 2.617 ( +0.04%) [ +0.15% +0.04% +0.00% / +0.04% +0.31% +0.23%] index_add_ reverse : Elapsed 0.026 ms (2.620 ms / 100) 2.550 -> 2.555 ( +0.20%) [ +0.24% +0.00% +0.12% / +0.20% +0.63% +0.63%] index_copy_ reverse : Elapsed 0.026 ms (2.556 ms / 100) 2.611 -> 2.616 ( +0.19%) [ +0.19% +0.00% +0.15% / +0.19% +0.50% +0.31%] index_add_ spread : Elapsed 0.026 ms (2.616 ms / 100) 2.553 -> 2.560 ( +0.27%) [ +0.00% +0.16% +0.08% / +0.27% +0.35% +0.43%] index_copy_ spread : Elapsed 0.026 ms (2.553 ms / 100) 2.608 -> 2.613 ( +0.19%) [ +0.19% +0.35% +0.00% / +0.19% +0.42% +0.31%] index_add_ strided 3 : Elapsed 0.026 ms (2.613 ms / 100) 2.548 -> 2.552 ( +0.16%) [ +0.24% +0.39% +0.00% / +0.16% +0.39% +0.43%] index_copy_ strided 3 : Elapsed 0.026 ms (2.554 ms / 100) 2.613 -> 2.613 ( +0.00%) [ +0.00% +0.15% +0.08% / +0.00% +0.27% +0.08%] index_add_ strided 5 : Elapsed 0.026 ms (2.613 ms / 100) 2.550 -> 2.553 ( +0.12%) [ +0.00% +0.12% +0.24% / +0.12% +0.31% +0.24%] index_copy_ strided 5 : Elapsed 0.026 ms (2.550 ms / 100) 2.615 -> 2.619 ( +0.15%) [ +0.04% +0.11% +0.00% / +0.15% +0.23% +0.27%] index_add_ strided 7 : Elapsed 0.026 ms (2.616 ms / 100) 2.550 -> 2.557 ( +0.27%) [ +0.20% +0.16% +0.00% / +0.63% +0.27% +0.27%] index_copy_ strided 7 : Elapsed 0.026 ms (2.555 ms / 100) 2.617 -> 2.616 ( -0.04%) [ +0.00% +0.04% +0.11% / +0.00% -0.04% -0.04%] index_add_ perm : Elapsed 0.026 ms (2.617 ms / 100) 2.553 -> 2.556 ( +0.12%) [ +0.12% +0.08% +0.00% / +0.35% +0.12% +0.24%] index_copy_ perm : Elapsed 0.026 ms (2.556 ms / 100) 2.617 -> 2.619 ( +0.08%) [ +0.00% +0.23% +0.00% / +0.08% +0.08% +0.08%] index_add_ perm_sorted : Elapsed 0.026 ms (2.617 ms / 100) 2.552 -> 2.556 ( +0.16%) [ +0.08% +0.24% +0.00% / +0.16% +0.27% +0.35%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.554 ms / 100) 5.961 -> 5.964 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.49% +0.55%] index_select const : Elapsed 0.060 ms (5.964 ms / 100) 5.965 -> 5.965 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.42% +0.44%] index_select wrap : Elapsed 0.060 ms (5.970 ms / 100) 5.967 -> 5.970 ( +0.05%) [ +0.13% +0.13% +0.00% / +0.05% +0.49% +0.49%] index_select linear : Elapsed 0.060 ms (5.975 ms / 100) 5.946 -> 5.955 ( +0.15%) [ +0.00% +0.03% +0.05% / +0.15% +0.55% +0.40%] index_select reverse : Elapsed 0.059 ms (5.946 ms / 100) 5.958 -> 5.961 ( +0.05%) [ +0.07% +0.02% +0.00% / +0.05% +0.17% +0.22%] index_select skip64 : Elapsed 0.060 ms (5.962 ms / 100) 5.959 -> 5.963 ( +0.07%) [ +0.08% +0.05% +0.00% / +0.07% +0.22% +0.22%] index_select skip256 : Elapsed 0.060 ms (5.964 ms / 100) 5.958 -> 5.958 ( +0.00%) [ +0.02% +0.03% +0.00% / +0.00% +0.15% +0.17%] index_select spread : Elapsed 0.060 ms (5.959 ms / 100) 5.969 -> 5.978 ( +0.15%) [ +0.13% +0.00% +0.05% / +0.15% +0.37% +0.37%] index_select strided 3 : Elapsed 0.060 ms (5.977 ms / 100) 5.952 -> 5.955 ( +0.05%) [ +0.05% +0.17% +0.00% / +0.05% +0.37% +0.45%] index_select random : Elapsed 0.060 ms (5.955 ms / 100) 5.957 -> 5.961 ( +0.07%) [ +0.17% +0.18% +0.00% / +0.07% +0.37% +0.69%] index_select random_sorted : Elapsed 0.060 ms (5.967 ms / 100) B = [16, 40, 20, 5] (stride (200, 5, 3200, 1)) A = [4, 40, 20, 5] (stride (40, 1, 160, 3200)) dim = 0 2.611 -> 2.615 ( +0.15%) [ +0.19% +0.19% +0.00% / +0.15% +0.38% +0.46%] index_add_ linear : Elapsed 0.026 ms (2.616 ms / 100) 2.554 -> 2.557 ( +0.12%) [ +0.16% +0.16% +0.00% / +0.16% +0.12% +0.16%] index_copy_ linear : Elapsed 0.026 ms (2.558 ms / 100) 2.613 -> 2.614 ( +0.04%) [ +0.00% +0.15% +0.15% / +0.04% +0.38% +0.27%] index_add_ reverse : Elapsed 0.026 ms (2.613 ms / 100) 2.553 -> 2.555 ( +0.08%) [ +0.00% +0.20% +0.24% / +0.08% +0.16% +0.31%] index_copy_ reverse : Elapsed 0.026 ms (2.553 ms / 100) 2.612 -> 2.610 ( -0.08%) [ +0.00% +0.11% +0.15% / -0.08% +0.08% +3.91%] index_add_ spread : Elapsed 0.026 ms (2.612 ms / 100) 2.553 -> 2.552 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% -0.04% -0.04%] index_copy_ spread : Elapsed 0.026 ms (2.553 ms / 100) 2.612 -> 2.611 ( -0.04%) [ +0.11% +0.00% +0.11% / -0.04% +0.11% +0.00%] index_add_ strided 3 : Elapsed 0.026 ms (2.615 ms / 100) 2.548 -> 2.548 ( +0.00%) [ +0.12% +0.00% +0.20% / +0.20% +0.00% +0.04%] index_copy_ strided 3 : Elapsed 0.026 ms (2.551 ms / 100) 2.614 -> 2.614 ( +0.00%) [ +0.15% +0.23% +0.00% / +0.00% +0.19% +0.00%] index_add_ strided 5 : Elapsed 0.026 ms (2.618 ms / 100) 2.555 -> 2.555 ( +0.00%) [ +0.35% +0.08% +0.00% / +0.08% +0.04% +0.00%] index_copy_ strided 5 : Elapsed 0.026 ms (2.564 ms / 100) 2.611 -> 2.613 ( +0.08%) [ +0.00% +0.11% +0.04% / +0.08% +0.31% +0.27%] index_add_ strided 7 : Elapsed 0.026 ms (2.611 ms / 100) 2.548 -> 2.553 ( +0.20%) [ +0.00% +0.27% +0.20% / +0.24% +0.20% +0.35%] index_copy_ strided 7 : Elapsed 0.025 ms (2.548 ms / 100) 2.614 -> 2.616 ( +0.08%) [ +0.15% +0.04% +0.00% / +0.08% +0.23% +0.38%] index_add_ perm : Elapsed 0.026 ms (2.618 ms / 100) 2.550 -> 2.553 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.12% +0.31% +0.39%] index_copy_ perm : Elapsed 0.026 ms (2.553 ms / 100) 2.618 -> 2.617 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.08% +0.11%] index_add_ perm_sorted : Elapsed 0.026 ms (2.618 ms / 100) 2.555 -> 2.558 ( +0.12%) [ +0.00% +0.08% +0.00% / +0.12% +0.20% +0.16%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.555 ms / 100) 5.931 -> 5.932 ( +0.02%) [ +0.08% +0.08% +0.00% / +0.02% +0.42% +0.27%] index_select const : Elapsed 0.059 ms (5.936 ms / 100) 5.940 -> 5.945 ( +0.08%) [ +0.19% +0.02% +0.00% / +0.08% +0.29% +0.17%] index_select wrap : Elapsed 0.060 ms (5.951 ms / 100) 5.962 -> 5.967 ( +0.08%) [ +0.10% +0.00% +0.08% / +0.08% +0.27% +0.25%] index_select linear : Elapsed 0.060 ms (5.968 ms / 100) 5.962 -> 5.963 ( +0.02%) [ +0.10% +0.05% +0.00% / +0.02% +0.30% +0.29%] index_select reverse : Elapsed 0.060 ms (5.968 ms / 100) 5.930 -> 5.941 ( +0.19%) [ +0.00% +0.10% +0.20% / +0.19% +0.49% +0.42%] index_select skip64 : Elapsed 0.059 ms (5.930 ms / 100) 5.936 -> 5.943 ( +0.12%) [ +0.00% +0.02% +0.07% / +0.12% +0.40% +0.40%] index_select skip256 : Elapsed 0.059 ms (5.936 ms / 100) 5.943 -> 5.948 ( +0.08%) [ +0.07% +0.03% +0.00% / +0.08% +0.29% +0.25%] index_select spread : Elapsed 0.059 ms (5.947 ms / 100) 5.936 -> 5.939 ( +0.05%) [ +0.17% +0.00% +0.08% / +0.05% +0.30% +0.27%] index_select strided 3 : Elapsed 0.059 ms (5.946 ms / 100) 5.928 -> 5.935 ( +0.12%) [ +0.10% +0.13% +0.00% / +0.12% +0.42% +0.35%] index_select random : Elapsed 0.059 ms (5.934 ms / 100) 5.935 -> 5.937 ( +0.03%) [ +0.07% +0.03% +0.00% / +0.03% +0.20% +0.24%] index_select random_sorted : Elapsed 0.059 ms (5.939 ms / 100) B = [16, 40, 20, 5] (stride (200, 1, 3200, 40)) A = [4, 40, 20, 5] (stride (40, 1, 800, 160)) dim = 0 2.482 -> 2.480 ( -0.08%) [ +0.08% +0.00% +0.24% / -0.08% +0.60% +0.56%] index_add_ linear : Elapsed 0.025 ms (2.484 ms / 100) 2.439 -> 2.444 ( +0.21%) [ +0.21% +0.04% +0.00% / +0.21% +0.57% +0.37%] index_copy_ linear : Elapsed 0.024 ms (2.444 ms / 100) 2.481 -> 2.483 ( +0.08%) [ +0.20% +0.04% +0.00% / +0.08% +0.48% +0.64%] index_add_ reverse : Elapsed 0.025 ms (2.486 ms / 100) 2.436 -> 2.438 ( +0.08%) [ +0.25% +0.16% +0.00% / +0.08% +0.53% +0.70%] index_copy_ reverse : Elapsed 0.024 ms (2.442 ms / 100) 2.479 -> 2.485 ( +0.24%) [ +0.00% +0.12% +0.12% / +0.24% +0.69% +0.73%] index_add_ spread : Elapsed 0.025 ms (2.479 ms / 100) 2.436 -> 2.445 ( +0.37%) [ +0.04% +0.00% +0.12% / +0.37% +0.57% +0.66%] index_copy_ spread : Elapsed 0.024 ms (2.437 ms / 100) 2.477 -> 2.480 ( +0.12%) [ +0.00% +0.12% +0.16% / +0.12% +0.69% +0.61%] index_add_ strided 3 : Elapsed 0.025 ms (2.477 ms / 100) 2.435 -> 2.438 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.53% +0.62%] index_copy_ strided 3 : Elapsed 0.024 ms (2.436 ms / 100) 2.485 -> 2.485 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.12% +0.08% +0.00%] index_add_ strided 5 : Elapsed 0.025 ms (2.486 ms / 100) 2.440 -> 2.442 ( +0.08%) [ +0.25% +0.20% +0.00% / +0.25% +0.08% +3.36%] index_copy_ strided 5 : Elapsed 0.024 ms (2.446 ms / 100) 2.485 -> 2.485 ( +0.00%) [ +0.12% +0.00% +0.08% / +0.00% +0.28% +0.28%] index_add_ strided 7 : Elapsed 0.025 ms (2.488 ms / 100) 2.444 -> 2.443 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.25% +0.25%] index_copy_ strided 7 : Elapsed 0.024 ms (2.446 ms / 100) 2.484 -> 2.486 ( +0.08%) [ +0.20% +0.00% +0.00% / +0.16% +0.08% +0.20%] index_add_ perm : Elapsed 0.025 ms (2.489 ms / 100) 2.443 -> 2.442 ( -0.04%) [ +0.08% +0.12% +0.00% / +0.16% +0.20% -0.04%] index_copy_ perm : Elapsed 0.024 ms (2.445 ms / 100) 2.486 -> 2.486 ( +0.00%) [ +0.00% +0.16% +0.12% / +0.08% +0.28% +0.00%] index_add_ perm_sorted : Elapsed 0.025 ms (2.486 ms / 100) 2.440 -> 2.446 ( +0.25%) [ +0.33% +0.16% +0.00% / +0.33% +0.33% +0.25%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.448 ms / 100) 5.563 -> 5.557 ( -0.11%) [ +0.11% +0.05% +0.00% / -0.11% +0.36% +0.27%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.533 -> 5.531 ( -0.04%) [ +0.02% +0.00% +0.16% / -0.04% +0.54% +0.42%] index_select wrap : Elapsed 0.055 ms (5.534 ms / 100) 5.557 -> 5.552 ( -0.09%) [ +0.05% +0.00% +0.00% / -0.09% +0.40% +0.50%] index_select linear : Elapsed 0.056 ms (5.560 ms / 100) 5.520 -> 5.521 ( +0.02%) [ +0.16% +0.02% +0.00% / +0.02% +0.40% +0.36%] index_select reverse : Elapsed 0.055 ms (5.529 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.04% +0.00% +0.05% / +0.05% +0.36% +0.23%] index_select skip64 : Elapsed 0.056 ms (5.566 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.00% +0.04% +0.13% / +0.04% +0.31% +0.31%] index_select skip256 : Elapsed 0.056 ms (5.562 ms / 100) 5.541 -> 5.552 ( +0.20%) [ +0.11% +0.00% +0.27% / +0.20% +0.42% +0.34%] index_select spread : Elapsed 0.055 ms (5.547 ms / 100) 5.518 -> 5.521 ( +0.05%) [ +0.14% +0.05% +0.00% / +0.05% +0.54% +0.43%] index_select strided 3 : Elapsed 0.055 ms (5.526 ms / 100) 5.517 -> 5.523 ( +0.11%) [ +0.00% +0.02% +0.05% / +0.11% +0.36% +0.38%] index_select random : Elapsed 0.055 ms (5.517 ms / 100) 5.531 -> 5.541 ( +0.18%) [ +0.09% +0.00% +0.02% / +0.18% +0.42% +0.31%] index_select random_sorted : Elapsed 0.055 ms (5.536 ms / 100) B = [16, 40, 20, 5] (stride (40, 1, 3200, 640)) A = [4, 40, 20, 5] (stride (1, 80, 4, 3200)) dim = 0 1.143 -> 1.143 ( +0.00%) [ +0.26% +0.09% +0.00% / +0.00% +0.44% +0.87%] index_add_ linear : Elapsed 0.011 ms (1.146 ms / 100) 1.134 -> 1.143 ( +0.79%) [ +0.35% +0.53% +0.00% / +0.79% +1.06% +1.15%] index_copy_ linear : Elapsed 0.011 ms (1.138 ms / 100) 1.139 -> 1.136 ( -0.26%) [ +0.35% +0.00% +0.26% / -0.26% +1.23% +1.14%] index_add_ reverse : Elapsed 0.011 ms (1.143 ms / 100) 1.134 -> 1.137 ( +0.26%) [ +0.44% +0.00% +0.26% / +0.26% +1.23% +1.06%] index_copy_ reverse : Elapsed 0.011 ms (1.139 ms / 100) 1.145 -> 1.151 ( +0.52%) [ +0.00% +0.09% +0.09% / +0.61% +0.70% +0.52%] index_add_ spread : Elapsed 0.011 ms (1.145 ms / 100) 1.146 -> 1.144 ( -0.17%) [ +0.09% +0.00% +0.00% / -0.17% +0.09% +0.35%] index_copy_ spread : Elapsed 0.011 ms (1.147 ms / 100) 1.139 -> 1.141 ( +0.18%) [ +0.18% +0.09% +0.00% / +0.18% +0.79% +0.97%] index_add_ strided 3 : Elapsed 0.011 ms (1.141 ms / 100) 1.135 -> 1.140 ( +0.44%) [ +0.00% +0.00% +0.26% / +0.44% +1.15% +0.79%] index_copy_ strided 3 : Elapsed 0.011 ms (1.135 ms / 100) 1.131 -> 1.135 ( +0.35%) [ +0.18% +0.00% +0.18% / +0.35% +2.56% +2.48%] index_add_ strided 5 : Elapsed 0.011 ms (1.133 ms / 100) 1.131 -> 1.131 ( +0.00%) [ +0.27% +0.09% +0.00% / +0.00% +2.03% +1.77%] index_copy_ strided 5 : Elapsed 0.011 ms (1.134 ms / 100) 1.143 -> 1.152 ( +0.79%) [ +0.35% +0.00% +0.35% / +0.79% +1.05% +1.05%] index_add_ strided 7 : Elapsed 0.011 ms (1.147 ms / 100) 1.139 -> 1.146 ( +0.61%) [ +0.26% +0.00% +0.35% / +0.79% +0.61% +0.79%] index_copy_ strided 7 : Elapsed 0.011 ms (1.142 ms / 100) 1.153 -> 1.156 ( +0.26%) [ +0.09% +0.00% +0.52% / +0.35% +0.43% +0.26%] index_add_ perm : Elapsed 0.012 ms (1.154 ms / 100) 1.142 -> 1.147 ( +0.44%) [ +0.18% +0.44% +0.00% / +0.44% +0.70% +0.88%] index_copy_ perm : Elapsed 0.011 ms (1.144 ms / 100) 1.155 -> 1.155 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.00% +0.09%] index_add_ perm_sorted : Elapsed 0.012 ms (1.155 ms / 100) 1.143 -> 1.145 ( +0.17%) [ +0.26% +0.00% +0.35% / +0.17% +0.61% +0.35%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.146 ms / 100) 2.086 -> 2.079 ( -0.34%) [ +0.10% +0.05% +0.00% / -0.34% +0.72% +0.72%] index_select const : Elapsed 0.021 ms (2.088 ms / 100) 2.084 -> 2.089 ( +0.24%) [ +0.43% +0.00% +0.24% / +0.24% +0.77% +0.53%] index_select wrap : Elapsed 0.021 ms (2.093 ms / 100) 2.083 -> 2.084 ( +0.05%) [ +0.10% +0.00% +0.29% / +0.05% +0.53% +0.67%] index_select linear : Elapsed 0.021 ms (2.085 ms / 100) 2.085 -> 2.092 ( +0.34%) [ +0.00% +0.14% +0.00% / +0.34% +0.34% +0.67%] index_select reverse : Elapsed 0.021 ms (2.085 ms / 100) 2.084 -> 2.087 ( +0.14%) [ +0.29% +0.00% +0.19% / +0.14% +0.62% +0.67%] index_select skip64 : Elapsed 0.021 ms (2.090 ms / 100) 2.079 -> 2.085 ( +0.29%) [ +0.14% +0.00% +0.05% / +0.29% +0.87% +1.01%] index_select skip256 : Elapsed 0.021 ms (2.082 ms / 100) 2.084 -> 2.086 ( +0.10%) [ +0.00% +0.19% +0.24% / +0.10% +0.72% +0.77%] index_select spread : Elapsed 0.021 ms (2.084 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.48% +0.43%] index_select strided 3 : Elapsed 0.021 ms (2.089 ms / 100) 2.085 -> 2.082 ( -0.14%) [ +0.14% +0.19% +0.00% / -0.14% +0.48% +0.43%] index_select random : Elapsed 0.021 ms (2.088 ms / 100) 2.081 -> 2.085 ( +0.19%) [ +0.14% +0.24% +0.00% / +0.19% +0.86% +1.06%] index_select random_sorted : Elapsed 0.021 ms (2.084 ms / 100) B = [16, 40, 20, 5] (stride (1, 16, 3200, 640)) A = [4, 40, 20, 5] (stride (100, 400, 5, 1)) dim = 0 1.253 -> 1.257 ( +0.32%) [ +0.00% +0.32% +0.24% / +0.32% +1.52% +1.92%] index_add_ linear : Elapsed 0.013 ms (1.253 ms / 100) 1.205 -> 1.206 ( +0.08%) [ +0.00% +0.50% +0.25% / +0.08% +2.24% +2.16%] index_copy_ linear : Elapsed 0.012 ms (1.205 ms / 100) 1.255 -> 1.254 ( -0.08%) [ +0.08% +0.24% +0.00% / -0.08% +1.59% +1.20%] index_add_ reverse : Elapsed 0.013 ms (1.256 ms / 100) 1.212 -> 1.215 ( +0.25%) [ +0.00% +0.17% +0.08% / +0.25% +1.82% +1.73%] index_copy_ reverse : Elapsed 0.012 ms (1.212 ms / 100) 1.300 -> 1.301 ( +0.08%) [ +0.23% +0.15% +0.00% / +0.08% +1.38% +1.38%] index_add_ spread : Elapsed 0.013 ms (1.303 ms / 100) 1.269 -> 1.282 ( +1.02%) [ +0.32% +0.00% +0.16% / +1.02% +2.36% +2.44%] index_copy_ spread : Elapsed 0.013 ms (1.273 ms / 100) 1.291 -> 1.299 ( +0.62%) [ +0.00% +0.00% +1.01% / +0.62% +2.09% +2.25%] index_add_ strided 3 : Elapsed 0.013 ms (1.291 ms / 100) 1.269 -> 1.278 ( +0.71%) [ +0.00% +0.16% +0.63% / +0.71% +2.44% +2.21%] index_copy_ strided 3 : Elapsed 0.013 ms (1.269 ms / 100) 1.300 -> 1.311 ( +0.85%) [ +0.08% +0.23% +0.00% / +0.85% +1.69% +1.46%] index_add_ strided 5 : Elapsed 0.013 ms (1.301 ms / 100) 1.271 -> 1.277 ( +0.47%) [ +0.00% +0.00% +0.16% / +0.47% +2.12% +2.20%] index_copy_ strided 5 : Elapsed 0.013 ms (1.271 ms / 100) 1.302 -> 1.307 ( +0.38%) [ +0.00% +0.08% +0.08% / +0.38% +1.54% +1.23%] index_add_ strided 7 : Elapsed 0.013 ms (1.302 ms / 100) 1.271 -> 1.274 ( +0.24%) [ +0.24% +0.00% +0.55% / +0.24% +2.12% +2.44%] index_copy_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.309 -> 1.317 ( +0.61%) [ +0.23% +0.00% +0.61% / +0.61% +1.38% +1.15%] index_add_ perm : Elapsed 0.013 ms (1.312 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +1.96% +1.57%] index_copy_ perm : Elapsed 0.013 ms (1.275 ms / 100) 1.309 -> 1.312 ( +0.23%) [ +0.53% +0.00% +0.53% / +0.23% +1.53% +1.30%] index_add_ perm_sorted : Elapsed 0.013 ms (1.316 ms / 100) 1.269 -> 1.280 ( +0.87%) [ +0.63% +0.00% +0.32% / +0.87% +2.05% +2.29%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.277 ms / 100) 2.031 -> 2.038 ( +0.34%) [ +0.15% +0.34% +0.00% / +0.34% +1.72% +1.48%] index_select const : Elapsed 0.020 ms (2.034 ms / 100) 2.079 -> 2.084 ( +0.24%) [ +0.00% +0.19% +0.05% / +0.24% +1.35% +1.20%] index_select wrap : Elapsed 0.021 ms (2.079 ms / 100) 2.073 -> 2.080 ( +0.34%) [ +0.39% +0.00% +0.43% / +0.34% +1.50% +1.64%] index_select linear : Elapsed 0.021 ms (2.081 ms / 100) 2.053 -> 2.053 ( +0.00%) [ +0.05% +0.15% +0.00% / +0.00% +1.75% +1.70%] index_select reverse : Elapsed 0.021 ms (2.054 ms / 100) 2.029 -> 2.033 ( +0.20%) [ +0.49% +0.44% +0.00% / +0.20% +1.53% +1.92%] index_select skip64 : Elapsed 0.020 ms (2.039 ms / 100) 2.031 -> 2.031 ( +0.00%) [ +0.20% +0.00% +0.15% / +0.00% +1.38% +1.43%] index_select skip256 : Elapsed 0.020 ms (2.035 ms / 100) 2.057 -> 2.061 ( +0.19%) [ +0.00% +0.34% +0.15% / +0.19% +1.60% +1.75%] index_select spread : Elapsed 0.021 ms (2.057 ms / 100) 2.077 -> 2.085 ( +0.39%) [ +0.34% +0.19% +0.00% / +0.39% +0.96% +1.35%] index_select strided 3 : Elapsed 0.021 ms (2.084 ms / 100) 2.059 -> 2.064 ( +0.24%) [ +0.24% +0.34% +0.00% / +0.24% +1.21% +1.46%] index_select random : Elapsed 0.021 ms (2.064 ms / 100) 2.059 -> 2.062 ( +0.15%) [ +0.00% +0.19% +0.10% / +0.15% +1.60% +1.89%] index_select random_sorted : Elapsed 0.021 ms (2.059 ms / 100) out_shape = [4, 16, 20, 5] in_shape = [4, 40, 20, 5] idx_dim = 1 B = [4, 16, 20, 5] (stride (100, 400, 1, 20)) A = [4, 40, 20, 5] (stride (4000, 100, 1, 20)) dim = 1 3.874 -> 3.869 ( -0.13%) [ +0.03% +0.00% +0.00% / -0.13% +0.70% +0.62%] index_select const : Elapsed 0.039 ms (3.875 ms / 100) 3.881 -> 3.901 ( +0.52%) [ +0.08% +0.00% +0.05% / +0.52% +0.62% +0.75%] index_select wrap : Elapsed 0.039 ms (3.884 ms / 100) 3.877 -> 3.884 ( +0.18%) [ +0.15% +0.10% +0.00% / +0.18% +0.77% +0.72%] index_select linear : Elapsed 0.039 ms (3.883 ms / 100) 3.877 -> 3.878 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.52% +0.49%] index_select reverse : Elapsed 0.039 ms (3.877 ms / 100) 3.874 -> 3.878 ( +0.10%) [ +0.05% +0.00% +0.18% / +0.10% +0.49% +0.46%] index_select skip64 : Elapsed 0.039 ms (3.876 ms / 100) 3.871 -> 3.883 ( +0.31%) [ +0.13% +0.18% +0.00% / +0.31% +0.46% +0.57%] index_select skip256 : Elapsed 0.039 ms (3.876 ms / 100) 3.879 -> 3.880 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.49% +0.44%] index_select spread : Elapsed 0.039 ms (3.879 ms / 100) 3.892 -> 3.895 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.57% +0.85%] index_select strided 3 : Elapsed 0.039 ms (3.892 ms / 100) 3.858 -> 3.863 ( +0.13%) [ +0.05% +0.05% +0.00% / +0.13% +0.47% +0.41%] index_select strided 5 : Elapsed 0.039 ms (3.860 ms / 100) 3.880 -> 3.891 ( +0.28%) [ +0.00% +0.13% +0.13% / +0.28% +0.39% +0.39%] index_select strided 7 : Elapsed 0.039 ms (3.880 ms / 100) 3.890 -> 3.895 ( +0.13%) [ +0.46% +0.00% +0.03% / +0.13% +0.85% +0.46%] index_select strided 8 : Elapsed 0.039 ms (3.908 ms / 100) 3.877 -> 3.878 ( +0.03%) [ +0.00% +0.15% +0.03% / +0.03% +0.59% +0.80%] index_select strided 16 : Elapsed 0.039 ms (3.877 ms / 100) 3.881 -> 3.887 ( +0.15%) [ +0.00% +0.13% +0.13% / +0.15% +0.62% +0.88%] index_select random : Elapsed 0.039 ms (3.881 ms / 100) 3.886 -> 3.889 ( +0.08%) [ +0.00% +0.08% +0.05% / +0.08% +0.51% +0.98%] index_select random_sorted : Elapsed 0.039 ms (3.886 ms / 100) 3.886 -> 3.888 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.57% +0.31%] index_select perm : Elapsed 0.039 ms (3.887 ms / 100) 3.886 -> 3.887 ( +0.03%) [ +0.00% +0.00% +0.08% / +0.03% +0.26% +0.18%] index_select perm_sorted : Elapsed 0.039 ms (3.886 ms / 100) B = [4, 16, 20, 5] (stride (100, 400, 1, 20)) A = [4, 40, 20, 5] (stride (800, 1, 40, 3200)) dim = 1 4.270 -> 4.272 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.84% +0.59%] index_select const : Elapsed 0.043 ms (4.272 ms / 100) 4.270 -> 4.268 ( -0.05%) [ +0.02% +0.00% +0.02% / -0.05% +0.68% +0.68%] index_select wrap : Elapsed 0.043 ms (4.271 ms / 100) 4.285 -> 4.293 ( +0.19%) [ +0.00% +0.14% +0.12% / +0.19% +0.70% +0.68%] index_select linear : Elapsed 0.043 ms (4.285 ms / 100) 4.271 -> 4.272 ( +0.02%) [ +0.00% +0.21% +0.16% / +0.02% +1.05% +0.80%] index_select reverse : Elapsed 0.043 ms (4.271 ms / 100) 4.255 -> 4.267 ( +0.28%) [ +0.05% +0.07% +0.00% / +0.28% +0.87% +0.94%] index_select skip64 : Elapsed 0.043 ms (4.257 ms / 100) 4.267 -> 4.265 ( -0.05%) [ +0.16% +0.09% +0.00% / -0.05% +0.61% +0.77%] index_select skip256 : Elapsed 0.043 ms (4.274 ms / 100) 4.261 -> 4.260 ( -0.02%) [ +0.00% +0.12% +0.09% / -0.02% +0.56% +0.59%] index_select spread : Elapsed 0.043 ms (4.261 ms / 100) 4.261 -> 4.265 ( +0.09%) [ +0.14% +0.02% +0.00% / +0.09% +0.66% +0.59%] index_select strided 3 : Elapsed 0.043 ms (4.267 ms / 100) 4.270 -> 4.272 ( +0.05%) [ +0.00% +0.19% +0.05% / +0.05% +0.75% +0.56%] index_select strided 5 : Elapsed 0.043 ms (4.270 ms / 100) 4.257 -> 4.261 ( +0.09%) [ +0.07% +0.07% +0.00% / +0.09% +0.78% +0.70%] index_select strided 7 : Elapsed 0.043 ms (4.260 ms / 100) 4.277 -> 4.281 ( +0.09%) [ +0.00% +0.09% +0.07% / +0.09% +0.75% +0.84%] index_select strided 8 : Elapsed 0.043 ms (4.277 ms / 100) 4.260 -> 4.268 ( +0.19%) [ +0.00% +0.09% +0.12% / +0.19% +0.75% +0.70%] index_select strided 16 : Elapsed 0.043 ms (4.260 ms / 100) 4.287 -> 4.287 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.61% +0.65%] index_select random : Elapsed 0.043 ms (4.287 ms / 100) 4.262 -> 4.262 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.73% +0.68%] index_select random_sorted : Elapsed 0.043 ms (4.265 ms / 100) 4.256 -> 4.257 ( +0.02%) [ +0.00% +0.38% +0.35% / +0.02% +1.01% +0.99%] index_select perm : Elapsed 0.043 ms (4.256 ms / 100) 4.269 -> 4.278 ( +0.21%) [ +0.19% +0.47% +0.00% / +0.21% +1.03% +0.96%] index_select perm_sorted : Elapsed 0.043 ms (4.277 ms / 100) B = [4, 16, 20, 5] (stride (1, 400, 20, 4)) A = [4, 40, 20, 5] (stride (4000, 1, 200, 40)) dim = 1 1.353 -> 1.354 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.67% +0.81%] index_select const : Elapsed 0.014 ms (1.353 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.72% +0.65%] index_select wrap : Elapsed 0.014 ms (1.382 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.44% +0.65% +0.00% / +0.07% +0.58% +0.51%] index_select linear : Elapsed 0.014 ms (1.381 ms / 100) 1.385 -> 1.387 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.51% +0.43%] index_select reverse : Elapsed 0.014 ms (1.387 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.00% +0.07% +0.00% / +0.22% -0.07% -0.07%] index_select skip64 : Elapsed 0.014 ms (1.383 ms / 100) 1.357 -> 1.357 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +0.07% +0.00%] index_select skip256 : Elapsed 0.014 ms (1.360 ms / 100) 1.371 -> 1.374 ( +0.22%) [ +0.44% +0.00% +0.44% / +0.22% +0.22% +0.36%] index_select spread : Elapsed 0.014 ms (1.377 ms / 100) 1.391 -> 1.393 ( +0.14%) [ +0.65% +0.79% +0.00% / +0.14% +0.72% +0.65%] index_select strided 3 : Elapsed 0.014 ms (1.400 ms / 100) 1.369 -> 1.369 ( +0.00%) [ +0.07% +0.00% +0.37% / +0.00% +0.15% +0.15%] index_select strided 5 : Elapsed 0.014 ms (1.370 ms / 100) 1.370 -> 1.368 ( -0.15%) [ +0.88% +0.80% +0.00% / +0.88% +0.36% -0.15%] index_select strided 7 : Elapsed 0.014 ms (1.382 ms / 100) 1.383 -> 1.383 ( +0.00%) [ +0.00% +0.07% +0.14% / +0.29% +0.00% +0.00%] index_select strided 8 : Elapsed 0.014 ms (1.383 ms / 100) 1.340 -> 1.341 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.07% +0.07%] index_select strided 16 : Elapsed 0.013 ms (1.340 ms / 100) 1.355 -> 1.355 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.30% +0.37%] index_select random : Elapsed 0.014 ms (1.357 ms / 100) 1.402 -> 1.399 ( -0.21%) [ +0.07% +0.21% +0.00% / +0.21% -0.14% -0.21%] index_select random_sorted : Elapsed 0.014 ms (1.403 ms / 100) 1.365 -> 1.367 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.44% +0.22%] index_select perm : Elapsed 0.014 ms (1.366 ms / 100) 1.386 -> 1.390 ( +0.29%) [ +0.43% +0.00% +0.14% / +0.43% +0.43% +0.29%] index_select perm_sorted : Elapsed 0.014 ms (1.392 ms / 100) B = [4, 16, 20, 5] (stride (80, 1, 320, 16)) A = [4, 40, 20, 5] (stride (4000, 20, 1, 800)) dim = 1 3.927 -> 3.929 ( +0.05%) [ +0.25% +0.23% +0.00% / +0.05% +0.94% +0.99%] index_select const : Elapsed 0.039 ms (3.937 ms / 100) 3.921 -> 3.922 ( +0.03%) [ +0.03% +0.10% +0.00% / +0.03% +0.46% +0.46%] index_select wrap : Elapsed 0.039 ms (3.922 ms / 100) 3.916 -> 3.925 ( +0.23%) [ +0.08% +0.00% +0.13% / +0.23% +0.56% +0.56%] index_select linear : Elapsed 0.039 ms (3.919 ms / 100) 3.921 -> 3.927 ( +0.15%) [ +0.03% +0.08% +0.00% / +0.15% +0.82% +0.82%] index_select reverse : Elapsed 0.039 ms (3.922 ms / 100) 3.923 -> 3.924 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.89% +0.84%] index_select skip64 : Elapsed 0.039 ms (3.926 ms / 100) 3.927 -> 3.937 ( +0.25%) [ +0.25% +0.00% +0.00% / +0.25% +0.81% +0.92%] index_select skip256 : Elapsed 0.039 ms (3.937 ms / 100) 3.920 -> 3.925 ( +0.13%) [ +0.00% +0.10% +0.03% / +0.13% +0.74% +0.71%] index_select spread : Elapsed 0.039 ms (3.920 ms / 100) 3.921 -> 3.920 ( -0.03%) [ +0.00% +0.10% +0.03% / -0.03% +0.59% +0.71%] index_select strided 3 : Elapsed 0.039 ms (3.921 ms / 100) 3.926 -> 3.926 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.74% +0.74%] index_select strided 5 : Elapsed 0.039 ms (3.926 ms / 100) 3.921 -> 3.926 ( +0.13%) [ +0.05% +0.00% +0.00% / +0.13% +0.74% +0.71%] index_select strided 7 : Elapsed 0.039 ms (3.923 ms / 100) 3.911 -> 3.911 ( +0.00%) [ +0.00% +0.05% +0.18% / +0.00% +0.66% +0.66%] index_select strided 8 : Elapsed 0.039 ms (3.911 ms / 100) 3.900 -> 3.911 ( +0.28%) [ +0.56% +0.49% +0.00% / +0.28% +0.92% +0.79%] index_select strided 16 : Elapsed 0.039 ms (3.922 ms / 100) 3.925 -> 3.928 ( +0.08%) [ +0.00% +0.05% +0.08% / +0.08% +0.64% +0.66%] index_select random : Elapsed 0.039 ms (3.925 ms / 100) 3.920 -> 3.921 ( +0.03%) [ +0.15% +0.00% +0.03% / +0.03% +0.77% +0.84%] index_select random_sorted : Elapsed 0.039 ms (3.926 ms / 100) 3.920 -> 3.923 ( +0.08%) [ +0.05% +0.00% +0.03% / +0.08% +0.69% +0.74%] index_select perm : Elapsed 0.039 ms (3.922 ms / 100) 3.922 -> 3.925 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.69% +0.74%] index_select perm_sorted : Elapsed 0.039 ms (3.925 ms / 100) B = [4, 16, 20, 5] (stride (5, 20, 320, 1)) A = [4, 40, 20, 5] (stride (200, 5, 800, 1)) dim = 1 3.937 -> 3.935 ( -0.05%) [ +0.00% +0.03% +0.10% / -0.05% +0.58% +0.25%] index_select const : Elapsed 0.039 ms (3.937 ms / 100) 3.945 -> 3.942 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.46% +0.48%] index_select wrap : Elapsed 0.039 ms (3.945 ms / 100) 3.944 -> 3.943 ( -0.03%) [ +0.00% +0.00% +0.10% / -0.03% +0.66% +0.66%] index_select linear : Elapsed 0.039 ms (3.944 ms / 100) 3.922 -> 3.925 ( +0.08%) [ +0.08% +0.00% +0.13% / +0.08% +0.56% +0.56%] index_select reverse : Elapsed 0.039 ms (3.925 ms / 100) 3.937 -> 3.937 ( +0.00%) [ +0.13% +0.00% +0.03% / +0.00% +0.69% +0.66%] index_select skip64 : Elapsed 0.039 ms (3.942 ms / 100) 3.936 -> 3.937 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.38% +0.46%] index_select skip256 : Elapsed 0.039 ms (3.936 ms / 100) 3.933 -> 3.934 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.33% +0.36%] index_select spread : Elapsed 0.039 ms (3.933 ms / 100) 3.927 -> 3.929 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.46% +0.43%] index_select strided 3 : Elapsed 0.039 ms (3.929 ms / 100) 3.931 -> 3.934 ( +0.08%) [ +0.00% +0.05% +0.08% / +0.08% +0.46% +0.38%] index_select strided 5 : Elapsed 0.039 ms (3.931 ms / 100) 3.924 -> 3.931 ( +0.18%) [ +0.00% +0.13% +0.08% / +0.18% +0.66% +0.59%] index_select strided 7 : Elapsed 0.039 ms (3.924 ms / 100) 3.918 -> 3.922 ( +0.10%) [ +0.15% +0.00% +0.13% / +0.10% +0.61% +0.64%] index_select strided 8 : Elapsed 0.039 ms (3.924 ms / 100) 3.917 -> 3.920 ( +0.08%) [ +0.00% +0.13% +0.10% / +0.08% +0.31% +0.31%] index_select strided 16 : Elapsed 0.039 ms (3.917 ms / 100) 3.926 -> 3.925 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.41% +0.41%] index_select random : Elapsed 0.039 ms (3.927 ms / 100) 3.920 -> 3.924 ( +0.10%) [ +0.00% +0.00% +0.13% / +0.10% +0.23% +0.26%] index_select random_sorted : Elapsed 0.039 ms (3.920 ms / 100) 3.936 -> 3.936 ( +0.00%) [ +0.10% +0.13% +0.00% / +0.00% +0.36% +0.43%] index_select perm : Elapsed 0.039 ms (3.940 ms / 100) 3.921 -> 3.926 ( +0.13%) [ +0.00% +0.05% +0.03% / +0.13% +0.41% +0.38%] index_select perm_sorted : Elapsed 0.039 ms (3.921 ms / 100) out_shape = [4, 40, 16, 5] in_shape = [4, 40, 20, 5] idx_dim = 2 B = [4, 40, 16, 5] (stride (5, 320, 20, 1)) A = [4, 40, 20, 5] (stride (4000, 100, 5, 1)) dim = 2 3.632 -> 3.630 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.88% +0.88%] index_select const : Elapsed 0.036 ms (3.632 ms / 100) 3.632 -> 3.629 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.80% +0.85%] index_select wrap : Elapsed 0.036 ms (3.632 ms / 100) 3.630 -> 3.632 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.94% +0.83%] index_select linear : Elapsed 0.036 ms (3.630 ms / 100) 3.628 -> 3.628 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.85% +0.74%] index_select reverse : Elapsed 0.036 ms (3.629 ms / 100) 3.642 -> 3.635 ( -0.19%) [ +0.05% +0.00% +0.11% / -0.19% +0.60% +0.41%] index_select skip64 : Elapsed 0.036 ms (3.644 ms / 100) 3.640 -> 3.641 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.58% +0.69%] index_select skip256 : Elapsed 0.036 ms (3.641 ms / 100) 3.628 -> 3.627 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.74% +0.58%] index_select spread : Elapsed 0.036 ms (3.629 ms / 100) 3.619 -> 3.618 ( -0.03%) [ +0.06% +0.00% +0.00% / -0.03% +0.69% +0.72%] index_select strided 3 : Elapsed 0.036 ms (3.621 ms / 100) 3.621 -> 3.620 ( -0.03%) [ +0.06% +0.00% +0.03% / -0.03% +0.64% +0.66%] index_select strided 5 : Elapsed 0.036 ms (3.623 ms / 100) 3.632 -> 3.633 ( +0.03%) [ +0.08% +0.00% +0.14% / +0.03% +0.83% +0.85%] index_select strided 7 : Elapsed 0.036 ms (3.635 ms / 100) 3.619 -> 3.621 ( +0.06%) [ +0.19% +0.14% +0.00% / +0.06% +0.75% +0.72%] index_select strided 8 : Elapsed 0.036 ms (3.626 ms / 100) 3.638 -> 3.638 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.55% +0.52%] index_select strided 16 : Elapsed 0.036 ms (3.639 ms / 100) 3.638 -> 3.641 ( +0.08%) [ +0.00% +0.05% +0.05% / +0.08% +0.52% +0.60%] index_select random : Elapsed 0.036 ms (3.638 ms / 100) 3.638 -> 3.639 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.19% +0.16%] index_select random_sorted : Elapsed 0.036 ms (3.639 ms / 100) 3.613 -> 3.615 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.55% +0.50%] index_select perm : Elapsed 0.036 ms (3.614 ms / 100) 3.627 -> 3.626 ( -0.03%) [ +0.00% +0.19% +0.00% / -0.03% +0.36% +0.41%] index_select perm_sorted : Elapsed 0.036 ms (3.627 ms / 100) B = [4, 40, 16, 5] (stride (1, 320, 20, 4)) A = [4, 40, 20, 5] (stride (1, 400, 4, 80)) dim = 2 4.283 -> 4.284 ( +0.02%) [ +0.00% +0.07% +0.02% / +0.02% +0.37% +0.33%] index_select const : Elapsed 0.043 ms (4.283 ms / 100) 4.297 -> 4.297 ( +0.00%) [ +0.14% +0.09% +0.00% / +0.00% +0.54% +0.51%] index_select wrap : Elapsed 0.043 ms (4.303 ms / 100) 4.284 -> 4.287 ( +0.07%) [ +0.00% +0.02% +0.02% / +0.07% +0.54% +0.61%] index_select linear : Elapsed 0.043 ms (4.284 ms / 100) 4.281 -> 4.283 ( +0.05%) [ +0.09% +0.05% +0.00% / +0.05% +0.61% +0.63%] index_select reverse : Elapsed 0.043 ms (4.285 ms / 100) 4.283 -> 4.282 ( -0.02%) [ +0.07% +0.00% +0.07% / -0.02% +0.70% +0.58%] index_select skip64 : Elapsed 0.043 ms (4.286 ms / 100) 4.273 -> 4.273 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.49% +0.54%] index_select skip256 : Elapsed 0.043 ms (4.274 ms / 100) 4.284 -> 4.286 ( +0.05%) [ +0.00% +0.00% +0.07% / +0.05% +0.68% +0.58%] index_select spread : Elapsed 0.043 ms (4.284 ms / 100) 4.292 -> 4.297 ( +0.12%) [ +0.09% +0.16% +0.00% / +0.12% +0.54% +0.58%] index_select strided 3 : Elapsed 0.043 ms (4.296 ms / 100) 4.280 -> 4.285 ( +0.12%) [ +0.02% +0.00% +0.00% / +0.12% +0.44% +0.89%] index_select strided 5 : Elapsed 0.043 ms (4.281 ms / 100) 4.270 -> 4.273 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.68%] index_select strided 7 : Elapsed 0.043 ms (4.273 ms / 100) 4.308 -> 4.306 ( -0.05%) [ +0.09% +0.05% +0.00% / -0.05% +0.46% +0.49%] index_select strided 8 : Elapsed 0.043 ms (4.312 ms / 100) 4.287 -> 4.287 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.63% +0.68%] index_select strided 16 : Elapsed 0.043 ms (4.288 ms / 100) 4.267 -> 4.271 ( +0.09%) [ +0.19% +0.00% +0.00% / +0.09% +0.70% +0.70%] index_select random : Elapsed 0.043 ms (4.275 ms / 100) 4.289 -> 4.296 ( +0.16%) [ +0.00% +0.07% +0.07% / +0.16% +0.75% +0.72%] index_select random_sorted : Elapsed 0.043 ms (4.289 ms / 100) 4.284 -> 4.281 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.65% +0.75%] index_select perm : Elapsed 0.043 ms (4.287 ms / 100) 4.276 -> 4.278 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.70% +3.20%] index_select perm_sorted : Elapsed 0.043 ms (4.280 ms / 100) B = [4, 40, 16, 5] (stride (16, 320, 1, 64)) A = [4, 40, 20, 5] (stride (800, 20, 1, 3200)) dim = 2 3.809 -> 3.808 ( -0.03%) [ +0.08% +0.11% +0.00% / -0.03% +0.81% +0.87%] index_select const : Elapsed 0.038 ms (3.812 ms / 100) 3.825 -> 3.827 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.78% +0.76%] index_select wrap : Elapsed 0.038 ms (3.825 ms / 100) 3.806 -> 3.811 ( +0.13%) [ +0.11% +0.00% +0.05% / +0.13% +0.89% +0.87%] index_select linear : Elapsed 0.038 ms (3.810 ms / 100) 3.806 -> 3.812 ( +0.16%) [ +0.16% +0.05% +0.00% / +0.16% +0.84% +0.81%] index_select reverse : Elapsed 0.038 ms (3.812 ms / 100) 3.821 -> 3.828 ( +0.18%) [ +0.03% +0.08% +0.00% / +0.18% +0.65% +0.60%] index_select skip64 : Elapsed 0.038 ms (3.822 ms / 100) 3.805 -> 3.805 ( +0.00%) [ +0.08% +0.13% +0.00% / +0.00% +0.79% +0.76%] index_select skip256 : Elapsed 0.038 ms (3.808 ms / 100) 3.826 -> 3.828 ( +0.05%) [ +0.03% +0.00% +0.13% / +0.05% +0.71% +0.71%] index_select spread : Elapsed 0.038 ms (3.827 ms / 100) 3.810 -> 3.814 ( +0.10%) [ +0.03% +0.08% +0.00% / +0.10% +0.63% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.811 ms / 100) 3.797 -> 3.803 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.76% +0.66%] index_select strided 5 : Elapsed 0.038 ms (3.797 ms / 100) 3.799 -> 3.812 ( +0.34%) [ +0.00% +0.03% +0.03% / +0.34% +0.71% +0.79%] index_select strided 7 : Elapsed 0.038 ms (3.799 ms / 100) 3.802 -> 3.802 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.63% +0.58%] index_select strided 8 : Elapsed 0.038 ms (3.802 ms / 100) 3.811 -> 3.817 ( +0.16%) [ +0.03% +0.05% +0.00% / +0.16% +0.76% +0.76%] index_select strided 16 : Elapsed 0.038 ms (3.812 ms / 100) 3.805 -> 3.806 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.47% +0.58%] index_select random : Elapsed 0.038 ms (3.806 ms / 100) 3.808 -> 3.813 ( +0.13%) [ +0.13% +0.00% +0.16% / +0.13% +0.63% +0.74%] index_select random_sorted : Elapsed 0.038 ms (3.813 ms / 100) 3.816 -> 3.819 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.58% +0.60%] index_select perm : Elapsed 0.038 ms (3.819 ms / 100) 3.808 -> 3.817 ( +0.24%) [ +0.16% +0.00% +0.18% / +0.24% +0.53% +0.63%] index_select perm_sorted : Elapsed 0.038 ms (3.814 ms / 100) B = [4, 40, 16, 5] (stride (40, 1, 800, 160)) A = [4, 40, 20, 5] (stride (4000, 100, 1, 20)) dim = 2 1.397 -> 1.396 ( -0.07%) [ +0.07% +0.29% +0.00% / +0.14% -0.07% +0.07%] index_select const : Elapsed 0.014 ms (1.398 ms / 100) 1.394 -> 1.397 ( +0.22%) [ +0.14% +0.00% +0.14% / +0.22% +0.43% +0.29%] index_select wrap : Elapsed 0.014 ms (1.396 ms / 100) 1.393 -> 1.392 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.14% +0.00%] index_select linear : Elapsed 0.014 ms (1.394 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.29% -0.07%] index_select reverse : Elapsed 0.014 ms (1.384 ms / 100) 1.373 -> 1.373 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.51% +0.36%] index_select skip64 : Elapsed 0.014 ms (1.374 ms / 100) 1.392 -> 1.392 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.22% +0.00% +0.07%] index_select skip256 : Elapsed 0.014 ms (1.394 ms / 100) 1.394 -> 1.397 ( +0.22%) [ +0.14% +0.14% +0.00% / +0.22% +0.36% +0.36%] index_select spread : Elapsed 0.014 ms (1.396 ms / 100) 1.371 -> 1.373 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.66% +0.58%] index_select strided 3 : Elapsed 0.014 ms (1.372 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.00% +0.22% +0.22% / +0.14% +0.43% +0.51%] index_select strided 5 : Elapsed 0.014 ms (1.381 ms / 100) 1.392 -> 1.393 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.72% +0.79%] index_select strided 7 : Elapsed 0.014 ms (1.392 ms / 100) 1.387 -> 1.388 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.79% +0.58%] index_select strided 8 : Elapsed 0.014 ms (1.389 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.22% +0.07% +0.00% / +0.22% +0.80% +0.87%] index_select strided 16 : Elapsed 0.014 ms (1.382 ms / 100) 1.369 -> 1.371 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.95% +0.88%] index_select random : Elapsed 0.014 ms (1.370 ms / 100) 1.390 -> 1.389 ( -0.07%) [ +0.00% +0.00% +0.14% / -0.07% +0.43% +0.58%] index_select random_sorted : Elapsed 0.014 ms (1.390 ms / 100) 1.391 -> 1.391 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.72% +0.65%] index_select perm : Elapsed 0.014 ms (1.391 ms / 100) 1.368 -> 1.370 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.88% +1.02%] index_select perm_sorted : Elapsed 0.014 ms (1.370 ms / 100) B = [4, 40, 16, 5] (stride (1, 4, 800, 160)) A = [4, 40, 20, 5] (stride (4000, 1, 40, 800)) dim = 2 3.941 -> 3.941 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.66% +0.74%] index_select const : Elapsed 0.039 ms (3.942 ms / 100) 3.922 -> 3.923 ( +0.03%) [ +0.13% +0.05% +0.00% / +0.03% +0.71% +0.61%] index_select wrap : Elapsed 0.039 ms (3.927 ms / 100) 3.905 -> 3.925 ( +0.51%) [ +0.56% +0.00% +0.20% / +0.51% +0.72% +0.82%] index_select linear : Elapsed 0.039 ms (3.927 ms / 100) 3.921 -> 3.919 ( -0.05%) [ +0.15% +0.00% +0.00% / -0.05% +0.82% +0.77%] index_select reverse : Elapsed 0.039 ms (3.927 ms / 100) 3.939 -> 3.941 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.56% +0.56%] index_select skip64 : Elapsed 0.039 ms (3.939 ms / 100) 3.937 -> 3.938 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.58% +0.61%] index_select skip256 : Elapsed 0.039 ms (3.939 ms / 100) 3.928 -> 3.931 ( +0.08%) [ +0.10% +0.00% +0.10% / +0.08% +0.71% +0.64%] index_select spread : Elapsed 0.039 ms (3.932 ms / 100) 3.922 -> 3.928 ( +0.15%) [ +0.03% +0.00% +0.08% / +0.15% +0.61% +0.61%] index_select strided 3 : Elapsed 0.039 ms (3.923 ms / 100) 3.937 -> 3.944 ( +0.18%) [ +0.00% +0.13% +0.13% / +0.18% +1.07% +0.94%] index_select strided 5 : Elapsed 0.039 ms (3.937 ms / 100) 3.923 -> 3.927 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.74% +0.71%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.941 -> 3.945 ( +0.10%) [ +0.30% +0.36% +0.00% / +0.10% +0.91% +0.91%] index_select strided 8 : Elapsed 0.040 ms (3.953 ms / 100) 3.938 -> 3.943 ( +0.13%) [ +0.00% +0.05% +0.08% / +0.13% +0.69% +0.71%] index_select strided 16 : Elapsed 0.039 ms (3.938 ms / 100) 3.924 -> 3.926 ( +0.05%) [ +0.00% +0.03% +0.03% / +0.05% +0.48% +0.48%] index_select random : Elapsed 0.039 ms (3.924 ms / 100) 3.928 -> 3.933 ( +0.13%) [ +0.15% +0.20% +0.00% / +0.13% +0.66% +0.69%] index_select random_sorted : Elapsed 0.039 ms (3.934 ms / 100) 3.941 -> 3.940 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.69% +0.79%] index_select perm : Elapsed 0.039 ms (3.941 ms / 100) 3.922 -> 3.923 ( +0.03%) [ +0.00% +0.00% +0.10% / +0.03% +0.54% +0.51%] index_select perm_sorted : Elapsed 0.039 ms (3.922 ms / 100) B = [4, 40, 16, 5] (stride (640, 16, 1, 2560)) A = [4, 40, 20, 5] (stride (4000, 5, 200, 1)) dim = 2 3.513 -> 3.518 ( +0.14%) [ +0.03% +0.14% +0.00% / +0.14% +0.60% +0.60%] index_select const : Elapsed 0.035 ms (3.514 ms / 100) 3.510 -> 3.512 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.68% +0.63%] index_select wrap : Elapsed 0.035 ms (3.512 ms / 100) 3.514 -> 3.515 ( +0.03%) [ +0.11% +0.00% +0.00% / +0.03% +0.63% +0.63%] index_select linear : Elapsed 0.035 ms (3.518 ms / 100) 3.507 -> 3.507 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.74% +0.71%] index_select reverse : Elapsed 0.035 ms (3.509 ms / 100) 3.522 -> 3.529 ( +0.20%) [ +0.09% +0.00% +0.09% / +0.20% +0.74% +0.71%] index_select skip64 : Elapsed 0.035 ms (3.525 ms / 100) 3.511 -> 3.512 ( +0.03%) [ +0.06% +0.00% +0.00% / +0.03% +0.60% +0.60%] index_select skip256 : Elapsed 0.035 ms (3.513 ms / 100) 3.510 -> 3.511 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.74% +0.66%] index_select spread : Elapsed 0.035 ms (3.510 ms / 100) 3.518 -> 3.522 ( +0.11%) [ +0.14% +0.17% +0.00% / +0.11% +0.51% +0.51%] index_select strided 3 : Elapsed 0.035 ms (3.523 ms / 100) 3.508 -> 3.508 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.74% +0.68%] index_select strided 5 : Elapsed 0.035 ms (3.508 ms / 100) 3.505 -> 3.506 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.83% +0.80%] index_select strided 7 : Elapsed 0.035 ms (3.506 ms / 100) 3.510 -> 3.510 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.71% +0.68%] index_select strided 8 : Elapsed 0.035 ms (3.511 ms / 100) 3.504 -> 3.503 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.91% +0.86%] index_select strided 16 : Elapsed 0.035 ms (3.505 ms / 100) 3.513 -> 3.513 ( +0.00%) [ +0.00% +0.06% +0.03% / +0.00% +0.63% +0.63%] index_select random : Elapsed 0.035 ms (3.513 ms / 100) 3.506 -> 3.506 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.77% +0.77%] index_select random_sorted : Elapsed 0.035 ms (3.509 ms / 100) 3.508 -> 3.507 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.80% +0.74%] index_select perm : Elapsed 0.035 ms (3.509 ms / 100) 3.520 -> 3.517 ( -0.09%) [ +0.00% +0.00% +0.03% / -0.09% +0.62% +0.60%] index_select perm_sorted : Elapsed 0.035 ms (3.520 ms / 100) B = [4, 40, 16, 5] (stride (640, 1, 40, 2560)) A = [4, 40, 20, 5] (stride (1, 4, 800, 160)) dim = 2 3.977 -> 3.982 ( +0.13%) [ +0.18% +0.00% +0.25% / +0.13% +0.85% +0.78%] index_select const : Elapsed 0.040 ms (3.984 ms / 100) 3.961 -> 3.964 ( +0.08%) [ +0.13% +0.10% +0.00% / +0.08% +0.66% +0.68%] index_select wrap : Elapsed 0.040 ms (3.966 ms / 100) 3.945 -> 3.946 ( +0.03%) [ +0.00% +0.05% +0.05% / +0.03% +0.76% +0.68%] index_select linear : Elapsed 0.039 ms (3.945 ms / 100) 3.966 -> 3.975 ( +0.23%) [ +0.15% +0.15% +0.00% / +0.23% +0.96% +0.63%] index_select reverse : Elapsed 0.040 ms (3.972 ms / 100) 3.987 -> 3.991 ( +0.10%) [ +0.25% +0.10% +0.00% / +0.10% +0.63% +0.53%] index_select skip64 : Elapsed 0.040 ms (3.997 ms / 100) 3.977 -> 3.977 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.60% +0.65%] index_select skip256 : Elapsed 0.040 ms (3.981 ms / 100) 3.969 -> 3.974 ( +0.13%) [ +0.00% +0.25% +0.25% / +0.13% +0.66% +0.96%] index_select spread : Elapsed 0.040 ms (3.969 ms / 100) 3.958 -> 3.961 ( +0.08%) [ +0.00% +0.05% +0.13% / +0.08% +0.63% +0.66%] index_select strided 3 : Elapsed 0.040 ms (3.958 ms / 100) 3.956 -> 3.963 ( +0.18%) [ +0.00% +0.15% +0.05% / +0.18% +0.68% +0.66%] index_select strided 5 : Elapsed 0.040 ms (3.956 ms / 100) 3.963 -> 3.963 ( +0.00%) [ +0.20% +0.00% +0.10% / +0.00% +0.83% +0.68%] index_select strided 7 : Elapsed 0.040 ms (3.971 ms / 100) 3.974 -> 3.988 ( +0.35%) [ +0.00% +0.08% +0.18% / +0.35% +0.96% +1.01%] index_select strided 8 : Elapsed 0.040 ms (3.974 ms / 100) 3.963 -> 3.968 ( +0.13%) [ +0.00% +0.03% +0.13% / +0.13% +0.86% +0.61%] index_select strided 16 : Elapsed 0.040 ms (3.963 ms / 100) 3.972 -> 3.975 ( +0.08%) [ +0.13% +0.00% +0.20% / +0.08% +0.45% +0.50%] index_select random : Elapsed 0.040 ms (3.977 ms / 100) 3.958 -> 3.958 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.63% +0.61%] index_select random_sorted : Elapsed 0.040 ms (3.964 ms / 100) 3.970 -> 3.967 ( -0.08%) [ +0.15% +0.00% +0.00% / -0.08% +0.63% +0.60%] index_select perm : Elapsed 0.040 ms (3.976 ms / 100) 3.963 -> 3.961 ( -0.05%) [ +0.03% +0.00% +0.00% / -0.05% +0.56% +0.40%] index_select perm_sorted : Elapsed 0.040 ms (3.964 ms / 100) B = [4, 40, 16, 5] (stride (1, 64, 4, 2560)) A = [4, 40, 20, 5] (stride (4000, 1, 40, 800)) dim = 2 3.942 -> 3.945 ( +0.08%) [ +0.00% +0.03% +0.00% / +0.08% +0.51% +0.41%] index_select const : Elapsed 0.039 ms (3.942 ms / 100) 3.931 -> 3.932 ( +0.03%) [ +0.08% +0.00% +0.10% / +0.03% +0.46% +0.48%] index_select wrap : Elapsed 0.039 ms (3.934 ms / 100) 3.932 -> 3.934 ( +0.05%) [ +0.10% +0.20% +0.00% / +0.05% +0.64% +3.66%] index_select linear : Elapsed 0.039 ms (3.936 ms / 100) 3.925 -> 3.923 ( -0.05%) [ +0.03% +0.00% +0.03% / -0.05% +0.31% +0.38%] index_select reverse : Elapsed 0.039 ms (3.926 ms / 100) 3.941 -> 3.943 ( +0.05%) [ +0.15% +0.08% +0.00% / +0.05% +0.56% +0.41%] index_select skip64 : Elapsed 0.039 ms (3.947 ms / 100) 3.942 -> 3.943 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.46% +0.48%] index_select skip256 : Elapsed 0.039 ms (3.944 ms / 100) 3.924 -> 3.927 ( +0.08%) [ +0.08% +0.00% +0.05% / +0.08% +0.48% +0.48%] index_select spread : Elapsed 0.039 ms (3.927 ms / 100) 3.926 -> 3.926 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.59% +0.56%] index_select strided 3 : Elapsed 0.039 ms (3.926 ms / 100) 3.920 -> 3.923 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.64% +0.51%] index_select strided 5 : Elapsed 0.039 ms (3.921 ms / 100) 3.920 -> 3.927 ( +0.18%) [ +0.13% +0.05% +0.00% / +0.18% +0.71% +0.92%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.946 -> 3.947 ( +0.03%) [ +0.00% +0.13% +0.00% / +0.03% +0.71% +0.71%] index_select strided 8 : Elapsed 0.039 ms (3.946 ms / 100) 3.932 -> 3.937 ( +0.13%) [ +0.05% +0.00% +0.20% / +0.13% +0.81% +0.81%] index_select strided 16 : Elapsed 0.039 ms (3.934 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.00% +0.05% +0.03% / +0.08% +0.82% +0.59%] index_select random : Elapsed 0.039 ms (3.923 ms / 100) 3.924 -> 3.943 ( +0.48%) [ +0.18% +0.00% +0.08% / +0.48% +0.84% +0.82%] index_select random_sorted : Elapsed 0.039 ms (3.931 ms / 100) 3.923 -> 3.927 ( +0.10%) [ +0.10% +0.03% +0.00% / +0.10% +0.84% +0.84%] index_select perm : Elapsed 0.039 ms (3.927 ms / 100) 3.917 -> 3.917 ( +0.00%) [ +0.00% +0.13% +0.20% / +0.00% +0.51% +0.59%] index_select perm_sorted : Elapsed 0.039 ms (3.917 ms / 100) B = [4, 40, 16, 5] (stride (40, 1, 160, 2560)) A = [4, 40, 20, 5] (stride (40, 1, 800, 160)) dim = 2 1.338 -> 1.339 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +1.20% +1.20%] index_select const : Elapsed 0.013 ms (1.340 ms / 100) 1.334 -> 1.335 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +1.05% +1.12%] index_select wrap : Elapsed 0.013 ms (1.335 ms / 100) 1.339 -> 1.341 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +1.12% +0.97%] index_select linear : Elapsed 0.013 ms (1.341 ms / 100) 1.330 -> 1.330 ( +0.00%) [ +0.23% +0.00% +0.08% / +0.00% +0.98% +0.98%] index_select reverse : Elapsed 0.013 ms (1.333 ms / 100) 1.336 -> 1.340 ( +0.30%) [ +0.07% +0.00% +0.00% / +0.30% +0.90% +0.90%] index_select skip64 : Elapsed 0.013 ms (1.337 ms / 100) 1.333 -> 1.332 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.98% +0.98%] index_select skip256 : Elapsed 0.013 ms (1.334 ms / 100) 1.337 -> 1.337 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.60% +0.67%] index_select spread : Elapsed 0.013 ms (1.338 ms / 100) 1.339 -> 1.340 ( +0.07%) [ +0.07% +0.30% +0.00% / +0.07% +0.75% +0.90%] index_select strided 3 : Elapsed 0.013 ms (1.340 ms / 100) 1.330 -> 1.332 ( +0.15%) [ +0.23% +0.15% +0.00% / +0.15% +0.90% +0.83%] index_select strided 5 : Elapsed 0.013 ms (1.333 ms / 100) 1.341 -> 1.341 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.67% +0.60%] index_select strided 7 : Elapsed 0.013 ms (1.341 ms / 100) 1.342 -> 1.343 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.97% +0.75%] index_select strided 8 : Elapsed 0.013 ms (1.342 ms / 100) 1.338 -> 1.340 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.82% +0.97%] index_select strided 16 : Elapsed 0.013 ms (1.338 ms / 100) 1.337 -> 1.337 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.37% +0.30%] index_select random : Elapsed 0.013 ms (1.339 ms / 100) 1.342 -> 1.343 ( +0.07%) [ +0.22% +0.07% +0.00% / +0.07% +0.22% +0.22%] index_select random_sorted : Elapsed 0.013 ms (1.345 ms / 100) 1.343 -> 1.344 ( +0.07%) [ +0.00% +0.00% +0.15% / +0.07% +0.37% +0.30%] index_select perm : Elapsed 0.013 ms (1.343 ms / 100) 1.336 -> 1.337 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.30% +0.15%] index_select perm_sorted : Elapsed 0.013 ms (1.337 ms / 100) out_shape = [4, 40, 20, 16] in_shape = [4, 40, 20, 5] idx_dim = 3 B = [4, 40, 20, 16] (stride (320, 1280, 16, 1)) A = [4, 40, 20, 5] (stride (5, 20, 800, 1)) dim = 3 2.563 -> 2.569 ( +0.23%) [ +0.20% +0.00% +0.16% / +0.23% +0.82% +0.78%] index_add_ linear : Elapsed 0.026 ms (2.568 ms / 100) 2.505 -> 2.507 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.60% +0.92%] index_copy_ linear : Elapsed 0.025 ms (2.507 ms / 100) 2.565 -> 2.572 ( +0.27%) [ +0.16% +0.31% +0.00% / +0.27% +0.74% +0.62%] index_add_ reverse : Elapsed 0.026 ms (2.569 ms / 100) 2.509 -> 2.511 ( +0.08%) [ +0.12% +0.00% +0.00% / +0.08% +0.64% +0.64%] index_copy_ reverse : Elapsed 0.025 ms (2.512 ms / 100) 2.595 -> 2.594 ( -0.04%) [ +0.15% +0.00% +0.04% / -0.04% +0.58% +0.42%] index_add_ spread : Elapsed 0.026 ms (2.599 ms / 100) 2.573 -> 2.565 ( -0.31%) [ +0.04% +0.12% +0.00% / -0.31% +0.31% +0.47%] index_copy_ spread : Elapsed 0.026 ms (2.574 ms / 100) 2.596 -> 2.598 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.50% +0.46%] index_add_ strided 3 : Elapsed 0.026 ms (2.599 ms / 100) 2.570 -> 2.566 ( -0.16%) [ +0.04% +0.00% +0.08% / -0.16% +0.39% +0.54%] index_copy_ strided 3 : Elapsed 0.026 ms (2.571 ms / 100) 2.594 -> 2.595 ( +0.04%) [ +0.00% +0.19% +0.23% / +0.04% +0.73% +0.85%] index_add_ strided 5 : Elapsed 0.026 ms (2.594 ms / 100) 2.572 -> 2.572 ( +0.00%) [ +0.00% +0.12% +0.04% / +0.00% +0.58% +0.86%] index_copy_ strided 5 : Elapsed 0.026 ms (2.572 ms / 100) 2.594 -> 2.602 ( +0.31%) [ +0.00% +0.04% +0.00% / +0.31% +0.62% +1.00%] index_add_ strided 7 : Elapsed 0.026 ms (2.594 ms / 100) 2.568 -> 2.569 ( +0.04%) [ +0.16% +0.04% +0.00% / +0.04% +0.70% +1.09%] index_copy_ strided 7 : Elapsed 0.026 ms (2.572 ms / 100) 2.593 -> 2.599 ( +0.23%) [ +0.00% +0.23% +0.08% / +0.23% +0.58% +0.58%] index_add_ perm : Elapsed 0.026 ms (2.593 ms / 100) 2.571 -> 2.571 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.54% +0.58%] index_copy_ perm : Elapsed 0.026 ms (2.571 ms / 100) 2.593 -> 2.589 ( -0.15%) [ +0.08% +0.12% +0.00% / -0.15% +0.39% +0.54%] index_add_ perm_sorted : Elapsed 0.026 ms (2.595 ms / 100) 2.570 -> 2.572 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.58% +0.74%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.571 ms / 100) 5.599 -> 5.601 ( +0.04%) [ +0.02% +0.64% +0.00% / +0.04% +0.27% +1.18%] index_select const : Elapsed 0.056 ms (5.600 ms / 100) 5.618 -> 5.624 ( +0.11%) [ +0.00% +0.09% +0.00% / +0.11% +0.48% +0.87%] index_select wrap : Elapsed 0.056 ms (5.618 ms / 100) 5.594 -> 5.600 ( +0.11%) [ +0.00% +0.00% +0.07% / +0.11% +1.27% +1.22%] index_select linear : Elapsed 0.056 ms (5.594 ms / 100) 5.590 -> 5.624 ( +0.61%) [ +0.77% +0.00% +0.63% / +0.61% +1.13% +1.23%] index_select reverse : Elapsed 0.056 ms (5.633 ms / 100) 5.610 -> 5.612 ( +0.04%) [ +0.20% +0.05% +0.00% / +0.04% +0.59% +0.43%] index_select skip64 : Elapsed 0.056 ms (5.621 ms / 100) 5.615 -> 5.602 ( -0.23%) [ +0.12% +0.00% +0.21% / -0.23% +0.77% +0.73%] index_select skip256 : Elapsed 0.056 ms (5.622 ms / 100) 5.589 -> 5.598 ( +0.16%) [ +0.57% +0.48% +0.00% / +0.16% +0.59% +1.11%] index_select spread : Elapsed 0.056 ms (5.621 ms / 100) 5.619 -> 5.592 ( -0.48%) [ +0.14% +0.00% +0.16% / -0.48% +0.37% +0.14%] index_select strided 3 : Elapsed 0.056 ms (5.627 ms / 100) 5.612 -> 5.583 ( -0.52%) [ +0.05% +0.05% +0.00% / -0.52% +0.89% +0.16%] index_select random : Elapsed 0.056 ms (5.615 ms / 100) 5.607 -> 5.601 ( -0.11%) [ +0.04% +0.09% +0.00% / -0.11% +0.73% +1.09%] index_select random_sorted : Elapsed 0.056 ms (5.609 ms / 100) B = [4, 40, 20, 16] (stride (1, 1280, 4, 80)) A = [4, 40, 20, 5] (stride (4000, 100, 5, 1)) dim = 3 2.087 -> 2.090 ( +0.14%) [ +0.00% +0.24% +0.29% / +0.14% +0.72% +0.48%] index_add_ linear : Elapsed 0.021 ms (2.087 ms / 100) 2.035 -> 2.034 ( -0.05%) [ +0.20% +0.00% +0.00% / -0.05% +0.39% +0.34%] index_copy_ linear : Elapsed 0.020 ms (2.039 ms / 100) 2.090 -> 2.092 ( +0.10%) [ +0.10% +0.00% +0.19% / +0.19% +0.24% +0.10%] index_add_ reverse : Elapsed 0.021 ms (2.092 ms / 100) 2.029 -> 2.036 ( +0.34%) [ +0.64% +0.00% +0.39% / +0.34% +0.39% +0.49%] index_copy_ reverse : Elapsed 0.020 ms (2.042 ms / 100) 2.086 -> 2.091 ( +0.24%) [ +0.19% +0.00% +0.05% / +0.24% +0.38% +0.58%] index_add_ spread : Elapsed 0.021 ms (2.090 ms / 100) 2.032 -> 2.035 ( +0.15%) [ +0.00% +0.05% +0.25% / +0.15% +0.15% +0.20%] index_copy_ spread : Elapsed 0.020 ms (2.032 ms / 100) 2.088 -> 2.090 ( +0.10%) [ +0.00% +0.10% +0.05% / +0.10% +0.34% +0.29%] index_add_ strided 3 : Elapsed 0.021 ms (2.088 ms / 100) 2.030 -> 2.034 ( +0.20%) [ +0.00% +0.39% +0.10% / +0.20% +0.59% +0.30%] index_copy_ strided 3 : Elapsed 0.020 ms (2.030 ms / 100) 2.091 -> 2.092 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.62% +0.43%] index_add_ strided 5 : Elapsed 0.021 ms (2.093 ms / 100) 2.037 -> 2.035 ( -0.10%) [ +0.00% +0.05% +0.15% / -0.10% +0.29% +0.49%] index_copy_ strided 5 : Elapsed 0.020 ms (2.037 ms / 100) 2.090 -> 2.095 ( +0.24%) [ +0.05% +0.05% +0.00% / +0.24% +0.29% +0.38%] index_add_ strided 7 : Elapsed 0.021 ms (2.091 ms / 100) 2.033 -> 2.039 ( +0.30%) [ +0.10% +0.25% +0.00% / +0.30% +0.44% +0.69%] index_copy_ strided 7 : Elapsed 0.020 ms (2.035 ms / 100) 2.090 -> 2.087 ( -0.14%) [ +0.10% +0.19% +0.00% / -0.14% +0.24% +0.33%] index_add_ perm : Elapsed 0.021 ms (2.092 ms / 100) 2.031 -> 2.032 ( +0.05%) [ +0.25% +0.30% +0.00% / +0.05% +0.54% +0.44%] index_copy_ perm : Elapsed 0.020 ms (2.036 ms / 100) 2.092 -> 2.092 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.24% +0.14%] index_add_ perm_sorted : Elapsed 0.021 ms (2.093 ms / 100) 2.034 -> 2.036 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.34% +0.49%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.036 ms / 100) 4.208 -> 4.237 ( +0.69%) [ +0.00% +0.05% +0.74% / +0.78% +1.45% +0.69%] index_select const : Elapsed 0.042 ms (4.208 ms / 100) 4.204 -> 4.215 ( +0.26%) [ +0.00% +0.38% +0.40% / +0.26% +0.83% +0.76%] index_select wrap : Elapsed 0.042 ms (4.204 ms / 100) 4.227 -> 4.225 ( -0.05%) [ +0.31% +0.31% +0.00% / -0.05% +0.78% +0.69%] index_select linear : Elapsed 0.042 ms (4.240 ms / 100) 4.218 -> 4.239 ( +0.50%) [ +0.00% +0.07% +0.00% / +0.62% +0.50% +0.52%] index_select reverse : Elapsed 0.042 ms (4.218 ms / 100) 4.238 -> 4.243 ( +0.12%) [ +0.14% +0.45% +0.00% / +0.12% +0.76% +0.64%] index_select skip64 : Elapsed 0.042 ms (4.244 ms / 100) 4.242 -> 4.241 ( -0.02%) [ +0.07% +0.00% +0.19% / -0.02% +0.59% +0.57%] index_select skip256 : Elapsed 0.042 ms (4.245 ms / 100) 4.216 -> 4.221 ( +0.12%) [ +0.52% +0.00% +0.55% / +0.12% +0.50% +0.50%] index_select spread : Elapsed 0.042 ms (4.238 ms / 100) 4.211 -> 4.207 ( -0.09%) [ +0.00% +0.09% +0.09% / -0.09% +0.57% +0.50%] index_select strided 3 : Elapsed 0.042 ms (4.211 ms / 100) 4.209 -> 4.215 ( +0.14%) [ +0.00% +0.57% +0.64% / +0.14% +0.71% +0.67%] index_select random : Elapsed 0.042 ms (4.209 ms / 100) 4.238 -> 4.253 ( +0.35%) [ +0.14% +0.00% +0.35% / +0.35% +0.85% +0.68%] index_select random_sorted : Elapsed 0.042 ms (4.244 ms / 100) B = [4, 40, 20, 16] (stride (1, 1280, 4, 80)) A = [4, 40, 20, 5] (stride (100, 400, 1, 20)) dim = 3 2.353 -> 2.355 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.34% +0.51%] index_add_ linear : Elapsed 0.024 ms (2.353 ms / 100) 2.300 -> 2.300 ( +0.00%) [ +0.00% +0.17% +0.04% / +0.00% +0.61% +0.70%] index_copy_ linear : Elapsed 0.023 ms (2.300 ms / 100) 2.349 -> 2.348 ( -0.04%) [ +0.17% +0.30% +0.00% / -0.04% +0.30% +0.64%] index_add_ reverse : Elapsed 0.024 ms (2.353 ms / 100) 2.291 -> 2.303 ( +0.52%) [ +0.00% +0.39% +0.31% / +0.52% +0.65% +0.52%] index_copy_ reverse : Elapsed 0.023 ms (2.291 ms / 100) 2.355 -> 2.357 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.72% +0.51%] index_add_ spread : Elapsed 0.024 ms (2.359 ms / 100) 2.299 -> 2.305 ( +0.26%) [ +0.43% +0.35% +0.00% / +0.26% +0.74% +0.83%] index_copy_ spread : Elapsed 0.023 ms (2.309 ms / 100) 2.348 -> 2.353 ( +0.21%) [ +0.00% +0.04% +0.04% / +0.21% +0.55% +0.64%] index_add_ strided 3 : Elapsed 0.023 ms (2.348 ms / 100) 2.290 -> 2.291 ( +0.04%) [ +0.00% +0.44% +0.31% / +0.04% +0.96% +1.05%] index_copy_ strided 3 : Elapsed 0.023 ms (2.290 ms / 100) 2.347 -> 2.348 ( +0.04%) [ +0.00% +0.17% +0.13% / +0.04% +0.43% +0.43%] index_add_ strided 5 : Elapsed 0.023 ms (2.347 ms / 100) 2.293 -> 2.294 ( +0.04%) [ +0.13% +0.00% +0.09% / +0.04% +0.61% +0.78%] index_copy_ strided 5 : Elapsed 0.023 ms (2.296 ms / 100) 2.347 -> 2.350 ( +0.13%) [ +0.00% +0.04% +0.17% / +0.13% +0.51% +0.68%] index_add_ strided 7 : Elapsed 0.023 ms (2.347 ms / 100) 2.295 -> 2.295 ( +0.00%) [ +0.13% +0.00% +0.09% / +0.00% +0.65% +1.00%] index_copy_ strided 7 : Elapsed 0.023 ms (2.298 ms / 100) 2.344 -> 2.349 ( +0.21%) [ +0.09% +0.00% +0.17% / +0.21% +1.02% +0.73%] index_add_ perm : Elapsed 0.023 ms (2.346 ms / 100) 2.288 -> 2.298 ( +0.44%) [ +0.00% +0.31% +0.13% / +0.44% +1.22% +1.14%] index_copy_ perm : Elapsed 0.023 ms (2.288 ms / 100) 2.355 -> 2.356 ( +0.04%) [ +0.00% +0.00% +0.08% / +0.04% +0.55% +0.55%] index_add_ perm_sorted : Elapsed 0.024 ms (2.355 ms / 100) 2.299 -> 2.303 ( +0.17%) [ +0.09% +0.17% +0.00% / +0.17% +0.83% +0.74%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.301 ms / 100) 4.851 -> 4.864 ( +0.27%) [ +0.08% +0.00% +0.12% / +0.27% +0.70% +0.72%] index_select const : Elapsed 0.049 ms (4.855 ms / 100) 4.872 -> 4.888 ( +0.33%) [ +0.08% +0.04% +0.00% / +0.33% +0.82% +0.84%] index_select wrap : Elapsed 0.049 ms (4.876 ms / 100) 4.906 -> 4.914 ( +0.16%) [ +0.00% +0.14% +0.14% / +0.16% +0.94% +1.02%] index_select linear : Elapsed 0.049 ms (4.906 ms / 100) 4.905 -> 4.911 ( +0.12%) [ +0.00% +0.20% +0.12% / +0.12% +0.75% +0.67%] index_select reverse : Elapsed 0.049 ms (4.905 ms / 100) 4.842 -> 4.846 ( +0.08%) [ +0.08% +0.02% +0.00% / +0.08% +0.54% +0.76%] index_select skip64 : Elapsed 0.048 ms (4.846 ms / 100) 4.849 -> 4.863 ( +0.29%) [ +0.02% +0.31% +0.00% / +0.29% +1.03% +0.72%] index_select skip256 : Elapsed 0.049 ms (4.850 ms / 100) 4.902 -> 4.900 ( -0.04%) [ +0.06% +0.00% +0.00% / -0.04% +0.75% +0.65%] index_select spread : Elapsed 0.049 ms (4.905 ms / 100) 4.906 -> 4.917 ( +0.22%) [ +0.04% +0.00% +0.29% / +0.22% +0.82% +0.59%] index_select strided 3 : Elapsed 0.049 ms (4.908 ms / 100) 4.888 -> 4.907 ( +0.39%) [ +0.41% +0.55% +0.00% / +0.39% +1.15% +1.13%] index_select random : Elapsed 0.049 ms (4.908 ms / 100) 4.890 -> 4.895 ( +0.10%) [ +0.08% +0.00% +0.04% / +0.10% +0.96% +0.67%] index_select random_sorted : Elapsed 0.049 ms (4.894 ms / 100) B = [4, 40, 20, 16] (stride (1, 4, 2560, 160)) A = [4, 40, 20, 5] (stride (40, 1, 160, 3200)) dim = 3 2.350 -> 2.356 ( +0.26%) [ +0.00% +0.26% +0.00% / +0.26% +0.55% +0.34%] index_add_ linear : Elapsed 0.023 ms (2.350 ms / 100) 2.293 -> 2.296 ( +0.13%) [ +0.00% +0.04% +0.04% / +0.13% +0.48% +0.31%] index_copy_ linear : Elapsed 0.023 ms (2.293 ms / 100) 2.345 -> 2.350 ( +0.21%) [ +0.13% +0.04% +0.00% / +0.21% +0.47% +0.34%] index_add_ reverse : Elapsed 0.023 ms (2.348 ms / 100) 2.290 -> 2.291 ( +0.04%) [ +0.00% +0.22% +0.26% / +0.04% +0.26% +0.35%] index_copy_ reverse : Elapsed 0.023 ms (2.290 ms / 100) 2.337 -> 2.342 ( +0.21%) [ +0.47% +0.30% +0.00% / +0.21% +0.26% +0.47%] index_add_ spread : Elapsed 0.023 ms (2.348 ms / 100) 2.281 -> 2.286 ( +0.22%) [ +0.22% +0.00% +0.57% / +0.26% +0.31% +0.22%] index_copy_ spread : Elapsed 0.023 ms (2.286 ms / 100) 2.353 -> 2.356 ( +0.13%) [ +0.17% +0.00% +0.04% / +0.13% +0.51% +0.34%] index_add_ strided 3 : Elapsed 0.024 ms (2.357 ms / 100) 2.295 -> 2.297 ( +0.09%) [ +0.09% +0.00% +0.22% / +0.17% +0.39% +0.09%] index_copy_ strided 3 : Elapsed 0.023 ms (2.297 ms / 100) 2.339 -> 2.340 ( +0.04%) [ +0.00% +0.17% +0.13% / +0.04% +0.51% +0.34%] index_add_ strided 5 : Elapsed 0.023 ms (2.339 ms / 100) 2.284 -> 2.287 ( +0.13%) [ +0.13% +0.26% +0.00% / +0.13% +0.44% +0.18%] index_copy_ strided 5 : Elapsed 0.023 ms (2.287 ms / 100) 2.337 -> 2.343 ( +0.26%) [ +0.30% +0.21% +0.00% / +0.26% +0.51% +0.39%] index_add_ strided 7 : Elapsed 0.023 ms (2.344 ms / 100) 2.282 -> 2.286 ( +0.18%) [ +0.13% +0.04% +0.00% / +0.18% +0.39% +0.26%] index_copy_ strided 7 : Elapsed 0.023 ms (2.285 ms / 100) 2.353 -> 2.352 ( -0.04%) [ +0.08% +0.21% +0.00% / -0.04% +0.38% +0.34%] index_add_ perm : Elapsed 0.024 ms (2.355 ms / 100) 2.294 -> 2.299 ( +0.22%) [ +0.22% +0.00% +0.13% / +0.26% +0.22% +0.35%] index_copy_ perm : Elapsed 0.023 ms (2.299 ms / 100) 2.336 -> 2.337 ( +0.04%) [ +0.34% +0.26% +0.00% / +0.04% +0.26% +0.51%] index_add_ perm_sorted : Elapsed 0.023 ms (2.344 ms / 100) 2.280 -> 2.283 ( +0.13%) [ +0.04% +0.00% +0.18% / +0.13% +0.31% +0.44%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.281 ms / 100) 4.994 -> 4.993 ( -0.02%) [ +0.00% +0.04% +0.16% / -0.02% +0.48% +0.38%] index_select const : Elapsed 0.050 ms (4.994 ms / 100) 5.010 -> 5.014 ( +0.08%) [ +0.06% +0.00% +0.06% / +0.08% +0.32% +0.26%] index_select wrap : Elapsed 0.050 ms (5.013 ms / 100) 5.033 -> 5.033 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.38% +0.38%] index_select linear : Elapsed 0.050 ms (5.036 ms / 100) 5.001 -> 5.002 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.58% +0.54%] index_select reverse : Elapsed 0.050 ms (5.001 ms / 100) 4.951 -> 4.957 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.55% +0.42%] index_select skip64 : Elapsed 0.050 ms (4.954 ms / 100) 4.949 -> 4.950 ( +0.02%) [ +0.02% +0.00% +0.04% / +0.02% +0.46% +0.55%] index_select skip256 : Elapsed 0.049 ms (4.950 ms / 100) 5.015 -> 5.020 ( +0.10%) [ +0.00% +0.06% +0.12% / +0.10% +0.42% +0.44%] index_select spread : Elapsed 0.050 ms (5.015 ms / 100) 4.993 -> 4.995 ( +0.04%) [ +0.00% +0.02% +0.00% / +0.04% +0.44% +0.32%] index_select strided 3 : Elapsed 0.050 ms (4.993 ms / 100) 5.018 -> 5.018 ( +0.00%) [ +0.02% +0.06% +0.00% / +0.00% +0.68% +0.78%] index_select random : Elapsed 0.050 ms (5.019 ms / 100) 4.952 -> 4.954 ( +0.04%) [ +0.00% +0.02% +0.00% / +0.04% +0.57% +0.59%] index_select random_sorted : Elapsed 0.050 ms (4.952 ms / 100) out_shape = [16, 4, 20, 40] in_shape = [5, 4, 20, 40] idx_dim = 0 B = [16, 4, 20, 40] (stride (3200, 20, 1, 80)) A = [5, 4, 20, 40] (stride (800, 4000, 1, 20)) dim = 0 2.313 -> 2.319 ( +0.26%) [ +0.00% +0.17% +0.22% / +0.26% +0.82% +0.78%] index_add_ linear : Elapsed 0.023 ms (2.313 ms / 100) 2.258 -> 2.258 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +0.66% +0.84%] index_copy_ linear : Elapsed 0.023 ms (2.261 ms / 100) 2.306 -> 2.303 ( -0.13%) [ +0.13% +0.13% +0.00% / -0.13% +0.87% +1.00%] index_add_ reverse : Elapsed 0.023 ms (2.309 ms / 100) 2.247 -> 2.246 ( -0.04%) [ +0.13% +0.00% +0.13% / -0.04% +1.20% +1.25%] index_copy_ reverse : Elapsed 0.023 ms (2.250 ms / 100) 2.323 -> 2.321 ( -0.09%) [ +0.00% +0.04% +0.00% / -0.09% +0.52% +0.47%] index_add_ spread : Elapsed 0.023 ms (2.323 ms / 100) 2.257 -> 2.259 ( +0.09%) [ +0.00% +0.04% +0.00% / +0.09% +0.93% +1.06%] index_copy_ spread : Elapsed 0.023 ms (2.257 ms / 100) 2.323 -> 2.326 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.77% +0.90%] index_add_ strided 3 : Elapsed 0.023 ms (2.326 ms / 100) 2.257 -> 2.261 ( +0.18%) [ +0.00% +0.31% +0.13% / +0.18% +0.93% +0.97%] index_copy_ strided 3 : Elapsed 0.023 ms (2.257 ms / 100) 2.306 -> 2.305 ( -0.04%) [ +0.04% +0.22% +0.00% / -0.04% +1.00% +0.95%] index_add_ strided 5 : Elapsed 0.023 ms (2.307 ms / 100) 2.249 -> 2.250 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.04% +1.29% +1.02%] index_copy_ strided 5 : Elapsed 0.022 ms (2.249 ms / 100) 2.313 -> 2.317 ( +0.17%) [ +0.09% +0.00% +0.04% / +0.17% +0.69% +0.65%] index_add_ strided 7 : Elapsed 0.023 ms (2.315 ms / 100) 2.252 -> 2.257 ( +0.22%) [ +0.00% +0.09% +0.09% / +0.22% +0.84% +1.07%] index_copy_ strided 7 : Elapsed 0.023 ms (2.252 ms / 100) 2.319 -> 2.322 ( +0.13%) [ +0.17% +0.00% +0.00% / +0.13% +1.08% +0.69%] index_add_ perm : Elapsed 0.023 ms (2.323 ms / 100) 2.252 -> 2.257 ( +0.22%) [ +0.00% +0.13% +0.00% / +0.22% +1.15% +1.07%] index_copy_ perm : Elapsed 0.023 ms (2.252 ms / 100) 2.318 -> 2.316 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.95% +0.91%] index_add_ perm_sorted : Elapsed 0.023 ms (2.320 ms / 100) 2.255 -> 2.258 ( +0.13%) [ +0.18% +0.04% +0.00% / +0.13% +0.98% +0.89%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.259 ms / 100) 4.943 -> 4.952 ( +0.18%) [ +0.18% +0.08% +0.00% / +0.18% +0.59% +0.67%] index_select const : Elapsed 0.050 ms (4.952 ms / 100) 4.903 -> 4.906 ( +0.06%) [ +0.04% +0.02% +0.00% / +0.06% +0.88% +0.61%] index_select wrap : Elapsed 0.049 ms (4.905 ms / 100) 4.955 -> 4.956 ( +0.02%) [ +0.00% +0.10% +0.20% / +0.02% +1.07% +0.99%] index_select linear : Elapsed 0.050 ms (4.955 ms / 100) 4.952 -> 4.947 ( -0.10%) [ +0.08% +0.04% +0.00% / -0.10% +0.40% +0.50%] index_select reverse : Elapsed 0.050 ms (4.956 ms / 100) 4.897 -> 4.885 ( -0.25%) [ +0.10% +0.00% +0.02% / -0.25% +0.67% +0.65%] index_select skip64 : Elapsed 0.049 ms (4.902 ms / 100) 4.943 -> 4.950 ( +0.14%) [ +0.10% +0.04% +0.00% / +0.14% +0.67% +1.82%] index_select skip256 : Elapsed 0.049 ms (4.948 ms / 100) 4.936 -> 4.934 ( -0.04%) [ +0.00% +0.04% +0.08% / -0.04% +0.75% +0.69%] index_select spread : Elapsed 0.049 ms (4.936 ms / 100) 4.965 -> 4.966 ( +0.02%) [ +0.02% +0.00% +0.04% / +0.02% +1.01% +0.95%] index_select strided 3 : Elapsed 0.050 ms (4.966 ms / 100) 4.936 -> 4.937 ( +0.02%) [ +0.06% +0.06% +0.00% / +0.02% +0.77% +0.67%] index_select random : Elapsed 0.049 ms (4.939 ms / 100) 4.933 -> 4.931 ( -0.04%) [ +0.24% +0.06% +0.00% / -0.04% +0.55% +0.63%] index_select random_sorted : Elapsed 0.049 ms (4.945 ms / 100) B = [16, 4, 20, 40] (stride (1, 12800, 640, 16)) A = [5, 4, 20, 40] (stride (3200, 1, 160, 4)) dim = 0 1.936 -> 1.939 ( +0.15%) [ +0.21% +0.00% +0.00% / +0.15% +1.14% +1.03%] index_add_ linear : Elapsed 0.019 ms (1.940 ms / 100) 1.892 -> 1.894 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +0.69% +0.79%] index_copy_ linear : Elapsed 0.019 ms (1.894 ms / 100) 1.943 -> 1.944 ( +0.05%) [ +0.05% +0.26% +0.00% / +0.05% +0.41% +0.36%] index_add_ reverse : Elapsed 0.019 ms (1.944 ms / 100) 1.898 -> 1.900 ( +0.11%) [ +0.05% +0.11% +0.00% / +0.11% +0.21% +0.32%] index_copy_ reverse : Elapsed 0.019 ms (1.899 ms / 100) 1.974 -> 1.984 ( +0.51%) [ +0.25% +0.35% +0.00% / +0.56% +0.51% +5.72%] index_add_ spread : Elapsed 0.020 ms (1.979 ms / 100) 1.972 -> 1.972 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.35% +0.41%] index_copy_ spread : Elapsed 0.020 ms (1.974 ms / 100) 1.967 -> 1.968 ( +0.05%) [ +0.15% +0.00% +0.15% / +0.05% +0.81% +0.97%] index_add_ strided 3 : Elapsed 0.020 ms (1.970 ms / 100) 1.962 -> 1.963 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.41% +0.56%] index_copy_ strided 3 : Elapsed 0.020 ms (1.962 ms / 100) 1.978 -> 1.979 ( +0.05%) [ +0.35% +0.15% +0.00% / +0.05% +0.71% +0.61%] index_add_ strided 5 : Elapsed 0.020 ms (1.985 ms / 100) 1.969 -> 1.971 ( +0.10%) [ +0.00% +0.46% +0.20% / +0.10% +0.61% +0.36%] index_copy_ strided 5 : Elapsed 0.020 ms (1.969 ms / 100) 1.980 -> 1.983 ( +0.15%) [ +0.25% +0.00% +0.00% / +0.15% +0.66% +0.51%] index_add_ strided 7 : Elapsed 0.020 ms (1.985 ms / 100) 1.971 -> 1.973 ( +0.10%) [ +0.20% +0.36% +0.00% / +0.10% +0.25% +0.51%] index_copy_ strided 7 : Elapsed 0.020 ms (1.975 ms / 100) 1.969 -> 1.970 ( +0.05%) [ +0.00% +0.15% +0.05% / +0.05% +0.81% +0.91%] index_add_ perm : Elapsed 0.020 ms (1.969 ms / 100) 1.956 -> 1.962 ( +0.31%) [ +0.36% +0.00% +0.10% / +0.31% +0.72% +0.66%] index_copy_ perm : Elapsed 0.020 ms (1.963 ms / 100) 1.975 -> 1.974 ( -0.05%) [ +0.00% +0.30% +0.05% / -0.05% +0.30% +0.46%] index_add_ perm_sorted : Elapsed 0.020 ms (1.975 ms / 100) 1.968 -> 1.968 ( +0.00%) [ +0.20% +0.00% +0.20% / +0.00% +0.61% +0.36%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.972 ms / 100) 3.794 -> 3.798 ( +0.11%) [ +0.00% +0.18% +0.13% / +0.11% +0.79% +0.87%] index_select const : Elapsed 0.038 ms (3.794 ms / 100) 3.817 -> 3.825 ( +0.21%) [ +0.18% +0.05% +0.00% / +0.21% +0.60% +0.73%] index_select wrap : Elapsed 0.038 ms (3.824 ms / 100) 3.825 -> 3.832 ( +0.18%) [ +0.18% +0.00% +0.10% / +0.18% +0.42% +0.42%] index_select linear : Elapsed 0.038 ms (3.832 ms / 100) 3.802 -> 3.803 ( +0.03%) [ +0.03% +0.00% +0.11% / +0.03% +0.63% +0.58%] index_select reverse : Elapsed 0.038 ms (3.803 ms / 100) 3.804 -> 3.806 ( +0.05%) [ +0.11% +0.03% +0.00% / +0.05% +0.68% +0.74%] index_select skip64 : Elapsed 0.038 ms (3.808 ms / 100) 3.803 -> 3.804 ( +0.03%) [ +0.11% +0.00% +0.00% / +0.03% +0.63% +0.71%] index_select skip256 : Elapsed 0.038 ms (3.807 ms / 100) 3.797 -> 3.807 ( +0.26%) [ +0.11% +0.21% +0.00% / +0.26% +0.74% +0.68%] index_select spread : Elapsed 0.038 ms (3.801 ms / 100) 3.816 -> 3.815 ( -0.03%) [ +0.08% +0.00% +0.05% / -0.03% +0.71% +0.79%] index_select strided 3 : Elapsed 0.038 ms (3.819 ms / 100) 3.798 -> 3.806 ( +0.21%) [ +0.05% +0.08% +0.00% / +0.21% +0.97% +0.95%] index_select random : Elapsed 0.038 ms (3.800 ms / 100) 3.800 -> 3.804 ( +0.11%) [ +0.05% +0.00% +0.05% / +0.11% +0.87% +0.89%] index_select random_sorted : Elapsed 0.038 ms (3.802 ms / 100) B = [16, 4, 20, 40] (stride (80, 20, 1, 1280)) A = [5, 4, 20, 40] (stride (800, 4000, 40, 1)) dim = 0 0.833 -> 0.830 ( -0.36%) [ +0.00% +0.00% +0.12% / +1.08% -0.36% +0.12%] index_add_ linear : Elapsed 0.008 ms (0.833 ms / 100) 0.861 -> 0.850 ( -1.28%) [ +0.12% +0.12% +0.00% / -0.23% -1.28% -1.16%] index_copy_ linear : Elapsed 0.009 ms (0.862 ms / 100) 0.842 -> 0.837 ( -0.59%) [ +0.24% +0.00% +0.24% / +0.71% -0.59% -0.59%] index_add_ reverse : Elapsed 0.008 ms (0.844 ms / 100) 0.860 -> 0.856 ( -0.47%) [ +0.00% +0.12% +0.00% / +0.35% -0.47% -0.47%] index_copy_ reverse : Elapsed 0.009 ms (0.860 ms / 100) 0.836 -> 0.835 ( -0.12%) [ +0.36% +0.00% +0.24% / -0.12% +0.24% +0.36%] index_add_ spread : Elapsed 0.008 ms (0.839 ms / 100) 0.855 -> 0.855 ( +0.00%) [ +0.00% +0.58% +0.12% / +0.70% +0.12% +0.00%] index_copy_ spread : Elapsed 0.009 ms (0.855 ms / 100) 0.837 -> 0.836 ( -0.12%) [ +0.00% +0.48% +0.36% / +0.24% +0.36% -0.12%] index_add_ strided 3 : Elapsed 0.008 ms (0.837 ms / 100) 0.854 -> 0.854 ( +0.00%) [ +0.00% +0.35% +0.47% / +0.00% +0.35% +0.23%] index_copy_ strided 3 : Elapsed 0.009 ms (0.854 ms / 100) 0.831 -> 0.832 ( +0.12%) [ +0.00% +0.36% +0.36% / +0.36% +0.24% +0.12%] index_add_ strided 5 : Elapsed 0.008 ms (0.831 ms / 100) 0.854 -> 0.847 ( -0.82%) [ +0.12% +0.23% +0.00% / -0.12% -0.59% -0.82%] index_copy_ strided 5 : Elapsed 0.009 ms (0.855 ms / 100) 0.829 -> 0.830 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.36% +0.12% +0.36%] index_add_ strided 7 : Elapsed 0.008 ms (0.830 ms / 100) 0.851 -> 0.850 ( -0.12%) [ +0.12% +0.00% +0.35% / +0.35% -0.12% +0.00%] index_copy_ strided 7 : Elapsed 0.009 ms (0.852 ms / 100) 0.836 -> 0.838 ( +0.24%) [ +0.00% +0.36% +0.24% / +0.36% +0.72% +0.24%] index_add_ perm : Elapsed 0.008 ms (0.836 ms / 100) 0.862 -> 0.857 ( -0.58%) [ +0.12% +0.12% +0.00% / +0.35% -0.46% -0.58%] index_copy_ perm : Elapsed 0.009 ms (0.863 ms / 100) 0.838 -> 0.837 ( -0.12%) [ +0.12% +0.00% +0.36% / +0.36% +0.24% -0.12%] index_add_ perm_sorted : Elapsed 0.008 ms (0.839 ms / 100) 0.862 -> 0.856 ( -0.70%) [ +0.70% +0.12% +0.00% / +0.35% -0.35% -0.70%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.868 ms / 100) 1.713 -> 1.714 ( +0.06%) [ +0.35% +0.12% +0.00% / +0.47% +0.06% +0.23%] index_select const : Elapsed 0.017 ms (1.719 ms / 100) 1.716 -> 1.713 ( -0.17%) [ +0.29% +0.00% +0.06% / +0.06% -0.17% +0.17%] index_select wrap : Elapsed 0.017 ms (1.721 ms / 100) 1.717 -> 1.714 ( -0.17%) [ +0.29% +0.12% +0.00% / +0.00% -0.12% -0.17%] index_select linear : Elapsed 0.017 ms (1.722 ms / 100) 1.713 -> 1.712 ( -0.06%) [ +0.35% +0.00% +0.06% / +0.18% -0.06% +0.06%] index_select reverse : Elapsed 0.017 ms (1.719 ms / 100) 1.712 -> 1.712 ( +0.00%) [ +0.00% +0.29% +0.00% / +0.35% +0.00% +0.41%] index_select skip64 : Elapsed 0.017 ms (1.712 ms / 100) 1.712 -> 1.714 ( +0.12%) [ +0.41% +0.00% +0.23% / +0.41% +0.12% +0.23%] index_select skip256 : Elapsed 0.017 ms (1.719 ms / 100) 1.715 -> 1.713 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.17% +0.17%] index_select spread : Elapsed 0.017 ms (1.715 ms / 100) 1.719 -> 1.721 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.23% +0.17% +0.12%] index_select strided 3 : Elapsed 0.017 ms (1.719 ms / 100) 1.714 -> 1.716 ( +0.12%) [ +0.35% +0.00% +0.18% / +0.12% +0.12% +0.23%] index_select random : Elapsed 0.017 ms (1.720 ms / 100) 1.711 -> 1.719 ( +0.47%) [ +0.00% +0.41% +0.35% / +0.53% +0.58% +0.47%] index_select random_sorted : Elapsed 0.017 ms (1.711 ms / 100) B = [16, 4, 20, 40] (stride (20, 320, 1, 1280)) A = [5, 4, 20, 40] (stride (4, 1, 800, 20)) dim = 0 2.340 -> 2.345 ( +0.21%) [ +0.00% +0.00% +0.09% / +0.21% +0.51% +0.73%] index_add_ linear : Elapsed 0.023 ms (2.340 ms / 100) 2.272 -> 2.271 ( -0.04%) [ +0.00% +0.04% +0.18% / -0.04% +0.48% +0.44%] index_copy_ linear : Elapsed 0.023 ms (2.272 ms / 100) 2.318 -> 2.320 ( +0.09%) [ +0.00% +0.04% +0.13% / +0.09% +0.47% +0.60%] index_add_ reverse : Elapsed 0.023 ms (2.318 ms / 100) 2.261 -> 2.262 ( +0.04%) [ +0.00% +0.18% +0.18% / +0.04% +0.53% +0.57%] index_copy_ reverse : Elapsed 0.023 ms (2.261 ms / 100) 2.306 -> 2.310 ( +0.17%) [ +0.13% +0.17% +0.00% / +0.17% +0.65% +0.48%] index_add_ spread : Elapsed 0.023 ms (2.309 ms / 100) 2.251 -> 2.253 ( +0.09%) [ +0.18% +0.00% +0.04% / +0.09% +0.62% +0.58%] index_copy_ spread : Elapsed 0.023 ms (2.255 ms / 100) 2.317 -> 2.322 ( +0.22%) [ +0.04% +0.13% +0.00% / +0.22% +0.86% +0.52%] index_add_ strided 3 : Elapsed 0.023 ms (2.318 ms / 100) 2.257 -> 2.261 ( +0.18%) [ +0.40% +0.31% +0.00% / +0.18% +0.71% +0.66%] index_copy_ strided 3 : Elapsed 0.023 ms (2.266 ms / 100) 2.351 -> 2.352 ( +0.04%) [ +0.34% +0.30% +0.00% / +0.04% +0.34% +0.34%] index_add_ strided 5 : Elapsed 0.024 ms (2.359 ms / 100) 2.287 -> 2.285 ( -0.09%) [ +0.04% +0.00% +0.09% / -0.09% +0.26% +0.17%] index_copy_ strided 5 : Elapsed 0.023 ms (2.288 ms / 100) 2.347 -> 2.351 ( +0.17%) [ +0.00% +0.09% +0.17% / +0.17% +0.34% +0.38%] index_add_ strided 7 : Elapsed 0.023 ms (2.347 ms / 100) 2.287 -> 2.291 ( +0.17%) [ +0.00% +0.04% +0.00% / +0.17% +0.35% +0.31%] index_copy_ strided 7 : Elapsed 0.023 ms (2.287 ms / 100) 2.343 -> 2.352 ( +0.38%) [ +0.26% +0.00% +0.09% / +0.51% +0.38% +0.64%] index_add_ perm : Elapsed 0.023 ms (2.349 ms / 100) 2.276 -> 2.284 ( +0.35%) [ +0.31% +0.35% +0.00% / +0.35% +0.53% +0.40%] index_copy_ perm : Elapsed 0.023 ms (2.283 ms / 100) 2.327 -> 2.331 ( +0.17%) [ +0.04% +0.00% +0.13% / +0.17% +0.60% +0.43%] index_add_ perm_sorted : Elapsed 0.023 ms (2.328 ms / 100) 2.271 -> 2.269 ( -0.09%) [ +0.04% +0.00% +0.00% / -0.09% +0.40% +0.31%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.272 ms / 100) 4.982 -> 4.975 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.62% +0.54%] index_select const : Elapsed 0.050 ms (4.982 ms / 100) 4.913 -> 4.915 ( +0.04%) [ +0.10% +0.00% +0.08% / +0.04% +0.59% +0.59%] index_select wrap : Elapsed 0.049 ms (4.918 ms / 100) 4.958 -> 4.963 ( +0.10%) [ +0.04% +0.00% +0.02% / +0.10% +0.61% +0.54%] index_select linear : Elapsed 0.050 ms (4.960 ms / 100) 4.981 -> 4.988 ( +0.14%) [ +0.00% +0.02% +0.04% / +0.14% +0.56% +0.84%] index_select reverse : Elapsed 0.050 ms (4.981 ms / 100) 5.012 -> 5.020 ( +0.16%) [ +0.00% +0.06% +0.02% / +0.16% +0.48% +0.52%] index_select skip64 : Elapsed 0.050 ms (5.012 ms / 100) 5.006 -> 5.011 ( +0.10%) [ +0.14% +0.18% +0.00% / +0.10% +0.54% +0.68%] index_select skip256 : Elapsed 0.050 ms (5.013 ms / 100) 4.982 -> 4.978 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.36% +0.34%] index_select spread : Elapsed 0.050 ms (4.986 ms / 100) 4.914 -> 4.913 ( -0.02%) [ +0.20% +0.00% +0.02% / -0.02% +0.59% +0.55%] index_select strided 3 : Elapsed 0.049 ms (4.924 ms / 100) 4.968 -> 4.975 ( +0.14%) [ +0.26% +0.06% +0.00% / +0.14% +0.97% +1.03%] index_select random : Elapsed 0.050 ms (4.981 ms / 100) 5.000 -> 5.008 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.82% +0.82%] index_select random_sorted : Elapsed 0.050 ms (5.000 ms / 100) out_shape = [5, 16, 20, 40] in_shape = [5, 4, 20, 40] idx_dim = 1 B = [5, 16, 20, 40] (stride (12800, 800, 40, 1)) A = [5, 4, 20, 40] (stride (80, 1, 4, 400)) dim = 1 2.493 -> 2.493 ( +0.00%) [ +0.16% +0.00% +0.12% / +0.00% +0.60% +0.36%] index_add_ linear : Elapsed 0.025 ms (2.497 ms / 100) 2.435 -> 2.435 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.00% +0.41% +0.41%] index_copy_ linear : Elapsed 0.024 ms (2.436 ms / 100) 2.492 -> 2.497 ( +0.20%) [ +0.04% +0.04% +0.00% / +0.20% +0.60% +0.52%] index_add_ reverse : Elapsed 0.025 ms (2.493 ms / 100) 2.438 -> 2.439 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.37% +0.41%] index_copy_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.499 -> 2.501 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.40% +0.24%] index_add_ spread : Elapsed 0.025 ms (2.502 ms / 100) 2.438 -> 2.439 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.49% +0.29%] index_copy_ spread : Elapsed 0.024 ms (2.439 ms / 100) 2.494 -> 2.491 ( -0.12%) [ +0.12% +0.00% +0.24% / -0.12% +0.36% +0.40%] index_add_ strided 3 : Elapsed 0.025 ms (2.497 ms / 100) 2.433 -> 2.437 ( +0.16%) [ +0.00% +0.12% +0.41% / +0.16% +0.53% +5.59%] index_copy_ strided 3 : Elapsed 0.024 ms (2.433 ms / 100) 2.494 -> 2.499 ( +0.20%) [ +0.08% +0.00% +0.20% / +0.20% +0.56% +0.44%] index_add_ strided 5 : Elapsed 0.025 ms (2.496 ms / 100) 2.437 -> 2.443 ( +0.25%) [ +0.00% +0.16% +0.00% / +0.25% +0.29% +0.37%] index_copy_ strided 5 : Elapsed 0.024 ms (2.437 ms / 100) 2.500 -> 2.494 ( -0.24%) [ +0.00% +0.00% +0.04% / -0.24% +0.28% +0.36%] index_add_ strided 7 : Elapsed 0.025 ms (2.500 ms / 100) 2.439 -> 2.441 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.25% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.439 ms / 100) 2.492 -> 2.495 ( +0.12%) [ +0.08% +0.00% +0.12% / +0.12% +0.24% +0.28%] index_add_ perm : Elapsed 0.025 ms (2.494 ms / 100) 2.437 -> 2.441 ( +0.16%) [ +0.12% +0.00% +0.00% / +0.16% +0.29% +0.29%] index_copy_ perm : Elapsed 0.024 ms (2.440 ms / 100) 2.490 -> 2.495 ( +0.20%) [ +0.20% +0.00% +0.24% / +0.20% +0.60% +0.28%] index_add_ perm_sorted : Elapsed 0.025 ms (2.495 ms / 100) 2.436 -> 2.438 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.21% +0.21%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.440 ms / 100) 5.505 -> 5.511 ( +0.11%) [ +0.04% +0.00% +0.02% / +0.11% +0.42% +0.49%] index_select const : Elapsed 0.055 ms (5.507 ms / 100) 5.505 -> 5.510 ( +0.09%) [ +0.07% +0.00% +0.00% / +0.09% +0.44% +0.42%] index_select wrap : Elapsed 0.055 ms (5.509 ms / 100) 5.518 -> 5.518 ( +0.00%) [ +0.04% +0.00% +0.07% / +0.00% +0.45% +0.45%] index_select linear : Elapsed 0.055 ms (5.520 ms / 100) 5.498 -> 5.509 ( +0.20%) [ +0.00% +0.11% +0.09% / +0.20% +0.64% +0.55%] index_select reverse : Elapsed 0.055 ms (5.498 ms / 100) 5.503 -> 5.508 ( +0.09%) [ +0.00% +0.02% +0.07% / +0.09% +0.42% +0.36%] index_select skip64 : Elapsed 0.055 ms (5.503 ms / 100) 5.524 -> 5.522 ( -0.04%) [ +0.00% +0.02% +0.07% / -0.04% +0.42% +0.40%] index_select skip256 : Elapsed 0.055 ms (5.524 ms / 100) 5.511 -> 5.515 ( +0.07%) [ +0.04% +0.11% +0.00% / +0.07% +0.27% +0.31%] index_select spread : Elapsed 0.055 ms (5.513 ms / 100) 5.522 -> 5.524 ( +0.04%) [ +0.02% +0.05% +0.00% / +0.04% +0.33% +0.27%] index_select strided 3 : Elapsed 0.055 ms (5.523 ms / 100) 5.502 -> 5.505 ( +0.05%) [ +0.07% +0.04% +0.00% / +0.05% +0.38% +0.42%] index_select random : Elapsed 0.055 ms (5.506 ms / 100) 5.503 -> 5.512 ( +0.16%) [ +0.13% +0.00% +0.11% / +0.16% +0.42% +0.36%] index_select random_sorted : Elapsed 0.055 ms (5.510 ms / 100) B = [5, 16, 20, 40] (stride (12800, 1, 640, 16)) A = [5, 4, 20, 40] (stride (40, 4000, 200, 1)) dim = 1 2.378 -> 2.381 ( +0.13%) [ +0.21% +0.00% +0.08% / +0.13% +0.55% +0.34%] index_add_ linear : Elapsed 0.024 ms (2.383 ms / 100) 2.367 -> 2.368 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.25% +0.30%] index_copy_ linear : Elapsed 0.024 ms (2.368 ms / 100) 2.379 -> 2.382 ( +0.13%) [ +0.17% +0.00% +0.21% / +0.13% +0.34% +0.50%] index_add_ reverse : Elapsed 0.024 ms (2.383 ms / 100) 2.365 -> 2.362 ( -0.13%) [ +0.17% +0.08% +0.00% / -0.13% +0.38% +0.51%] index_copy_ reverse : Elapsed 0.024 ms (2.369 ms / 100) 2.422 -> 2.428 ( +0.25%) [ +0.25% +0.00% +0.21% / +0.25% +0.41% +0.37%] index_add_ spread : Elapsed 0.024 ms (2.428 ms / 100) 2.466 -> 2.472 ( +0.24%) [ +0.00% +0.16% +0.24% / +0.57% +0.24% +0.45%] index_copy_ spread : Elapsed 0.025 ms (2.466 ms / 100) 2.423 -> 2.424 ( +0.04%) [ +0.08% +0.41% +0.00% / +0.04% +0.21% +0.25%] index_add_ strided 3 : Elapsed 0.024 ms (2.425 ms / 100) 2.467 -> 2.469 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.32% +0.12%] index_copy_ strided 3 : Elapsed 0.025 ms (2.467 ms / 100) 2.423 -> 2.423 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.29% +0.12%] index_add_ strided 5 : Elapsed 0.024 ms (2.426 ms / 100) 2.467 -> 2.467 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.16% +0.20%] index_copy_ strided 5 : Elapsed 0.025 ms (2.467 ms / 100) 2.424 -> 2.428 ( +0.17%) [ +0.08% +0.00% +0.12% / +0.17% +0.21% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.426 ms / 100) 2.469 -> 2.469 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.24% +0.16%] index_copy_ strided 7 : Elapsed 0.025 ms (2.469 ms / 100) 2.429 -> 2.430 ( +0.04%) [ +0.00% +0.08% +0.21% / +0.04% +0.37% +0.49%] index_add_ perm : Elapsed 0.024 ms (2.429 ms / 100) 2.467 -> 2.465 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.53% +0.20%] index_copy_ perm : Elapsed 0.025 ms (2.469 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.00% +0.04% +0.12% / +0.08% +0.21% +0.33%] index_add_ perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) 2.468 -> 2.466 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.12% +0.24%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.468 ms / 100) 4.810 -> 4.813 ( +0.06%) [ +0.10% +0.00% +0.04% / +0.06% +0.33% +0.35%] index_select const : Elapsed 0.048 ms (4.815 ms / 100) 4.881 -> 4.879 ( -0.04%) [ +0.00% +0.20% +0.20% / -0.04% +0.25% +0.45%] index_select wrap : Elapsed 0.049 ms (4.881 ms / 100) 4.876 -> 4.888 ( +0.25%) [ +0.04% +0.00% +0.14% / +0.25% +0.39% +0.25%] index_select linear : Elapsed 0.049 ms (4.878 ms / 100) 4.873 -> 4.888 ( +0.31%) [ +0.00% +0.33% +0.23% / +0.31% +0.74% +0.62%] index_select reverse : Elapsed 0.049 ms (4.873 ms / 100) 4.818 -> 4.820 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.19% +0.23%] index_select skip64 : Elapsed 0.048 ms (4.818 ms / 100) 4.812 -> 4.815 ( +0.06%) [ +0.08% +0.00% +0.08% / +0.06% +0.27% +0.33%] index_select skip256 : Elapsed 0.048 ms (4.816 ms / 100) 4.856 -> 4.862 ( +0.12%) [ +0.12% +0.00% +0.31% / +0.12% +0.54% +0.54%] index_select spread : Elapsed 0.049 ms (4.862 ms / 100) 4.873 -> 4.871 ( -0.04%) [ +0.21% +0.00% +0.04% / -0.04% +0.43% +0.37%] index_select strided 3 : Elapsed 0.049 ms (4.883 ms / 100) 4.853 -> 4.858 ( +0.10%) [ +0.02% +0.00% +0.16% / +0.10% +0.41% +0.43%] index_select random : Elapsed 0.049 ms (4.854 ms / 100) 4.868 -> 4.871 ( +0.06%) [ +0.00% +0.04% +0.10% / +0.06% +0.31% +0.37%] index_select random_sorted : Elapsed 0.049 ms (4.868 ms / 100) B = [5, 16, 20, 40] (stride (12800, 20, 1, 320)) A = [5, 4, 20, 40] (stride (3200, 40, 160, 1)) dim = 1 2.477 -> 2.481 ( +0.16%) [ +0.00% +0.24% +0.16% / +0.16% +0.44% +0.48%] index_add_ linear : Elapsed 0.025 ms (2.477 ms / 100) 2.428 -> 2.432 ( +0.16%) [ +0.00% +0.29% +0.00% / +0.16% +0.45% +0.49%] index_copy_ linear : Elapsed 0.024 ms (2.428 ms / 100) 2.489 -> 2.491 ( +0.08%) [ +0.12% +0.00% +0.28% / +0.08% +0.40% +0.32%] index_add_ reverse : Elapsed 0.025 ms (2.492 ms / 100) 2.443 -> 2.443 ( +0.00%) [ +0.00% +0.12% +0.16% / +0.00% +0.29% +0.29%] index_copy_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.479 -> 2.481 ( +0.08%) [ +0.20% +0.04% +0.00% / +0.08% +0.52% +0.52%] index_add_ spread : Elapsed 0.025 ms (2.484 ms / 100) 2.439 -> 2.443 ( +0.16%) [ +0.08% +0.21% +0.00% / +0.16% +0.70% +0.66%] index_copy_ spread : Elapsed 0.024 ms (2.441 ms / 100) 2.474 -> 2.476 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.61% +0.20%] index_add_ strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.440 -> 2.439 ( -0.04%) [ +0.00% +0.16% +0.00% / -0.04% +0.53% +0.45%] index_copy_ strided 3 : Elapsed 0.024 ms (2.440 ms / 100) 2.488 -> 2.489 ( +0.04%) [ +0.00% +0.12% +0.04% / +0.04% +0.40% +0.52%] index_add_ strided 5 : Elapsed 0.025 ms (2.488 ms / 100) 2.450 -> 2.446 ( -0.16%) [ +0.00% +0.04% +0.00% / -0.16% +0.29% +0.24%] index_copy_ strided 5 : Elapsed 0.024 ms (2.450 ms / 100) 2.483 -> 2.487 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.36% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.485 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.20% +0.00% +0.04% / +0.08% +0.29% +0.37%] index_copy_ strided 7 : Elapsed 0.025 ms (2.450 ms / 100) 2.491 -> 2.488 ( -0.12%) [ +0.08% +0.00% +0.04% / +0.00% +0.00% -0.12%] index_add_ perm : Elapsed 0.025 ms (2.493 ms / 100) 2.441 -> 2.445 ( +0.16%) [ +0.16% +0.00% +0.20% / +0.16% +0.29% +0.41%] index_copy_ perm : Elapsed 0.024 ms (2.445 ms / 100) 2.486 -> 2.490 ( +0.16%) [ +0.00% +0.24% +0.16% / +0.16% +0.44% +0.36%] index_add_ perm_sorted : Elapsed 0.025 ms (2.486 ms / 100) 2.440 -> 2.443 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.29% +0.49%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.440 ms / 100) 5.199 -> 5.199 ( +0.00%) [ +0.06% +0.15% +0.00% / +0.00% +0.42% +0.50%] index_select const : Elapsed 0.052 ms (5.202 ms / 100) 5.255 -> 5.258 ( +0.06%) [ +0.08% +0.00% +0.04% / +0.06% +0.34% +0.46%] index_select wrap : Elapsed 0.053 ms (5.259 ms / 100) 5.254 -> 5.263 ( +0.17%) [ +0.15% +0.00% +0.08% / +0.17% +0.53% +0.49%] index_select linear : Elapsed 0.053 ms (5.262 ms / 100) 5.259 -> 5.261 ( +0.04%) [ +0.10% +0.00% +0.02% / +0.04% +0.46% +0.55%] index_select reverse : Elapsed 0.053 ms (5.264 ms / 100) 5.216 -> 5.218 ( +0.04%) [ +0.13% +0.02% +0.00% / +0.04% +0.27% +0.23%] index_select skip64 : Elapsed 0.052 ms (5.223 ms / 100) 5.204 -> 5.205 ( +0.02%) [ +0.08% +0.00% +0.38% / +0.02% +0.35% +0.63%] index_select skip256 : Elapsed 0.052 ms (5.208 ms / 100) 5.248 -> 5.254 ( +0.11%) [ +0.13% +0.00% +0.06% / +0.11% +0.42% +0.29%] index_select spread : Elapsed 0.053 ms (5.255 ms / 100) 5.240 -> 5.254 ( +0.27%) [ +0.15% +0.00% +0.23% / +0.27% +0.38% +0.34%] index_select strided 3 : Elapsed 0.052 ms (5.248 ms / 100) 5.257 -> 5.273 ( +0.30%) [ +0.00% +0.25% +0.06% / +0.30% +0.53% +0.46%] index_select random : Elapsed 0.053 ms (5.257 ms / 100) 5.232 -> 5.232 ( +0.00%) [ +0.00% +0.19% +0.21% / +0.00% +0.40% +0.61%] index_select random_sorted : Elapsed 0.052 ms (5.232 ms / 100) B = [5, 16, 20, 40] (stride (12800, 20, 1, 320)) A = [5, 4, 20, 40] (stride (800, 4000, 1, 20)) dim = 1 2.540 -> 2.547 ( +0.28%) [ +0.24% +0.00% +0.20% / +0.28% +0.35% +0.39%] index_add_ linear : Elapsed 0.025 ms (2.546 ms / 100) 2.495 -> 2.498 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.24% +0.16%] index_copy_ linear : Elapsed 0.025 ms (2.496 ms / 100) 2.547 -> 2.550 ( +0.12%) [ +0.00% +0.08% +0.20% / +0.12% +0.27% +0.12%] index_add_ reverse : Elapsed 0.025 ms (2.547 ms / 100) 2.499 -> 2.501 ( +0.08%) [ +0.08% +0.24% +0.00% / +0.08% +0.16% +0.24%] index_copy_ reverse : Elapsed 0.025 ms (2.501 ms / 100) 2.534 -> 2.537 ( +0.12%) [ +0.00% +0.00% +0.08% / +0.20% +0.16% +0.12%] index_add_ spread : Elapsed 0.025 ms (2.534 ms / 100) 2.502 -> 2.502 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.04% +0.00%] index_copy_ spread : Elapsed 0.025 ms (2.503 ms / 100) 2.526 -> 2.526 ( +0.00%) [ +0.16% +0.12% +0.00% / +0.00% +0.40% +0.40%] index_add_ strided 3 : Elapsed 0.025 ms (2.530 ms / 100) 2.494 -> 2.500 ( +0.24%) [ +0.16% +0.08% +0.00% / +0.24% +0.24% +0.32%] index_copy_ strided 3 : Elapsed 0.025 ms (2.498 ms / 100) 2.544 -> 2.546 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.35% +0.24%] index_add_ strided 5 : Elapsed 0.025 ms (2.544 ms / 100) 2.510 -> 2.515 ( +0.20%) [ +0.00% +0.20% +0.12% / +0.20% +0.32% +0.20%] index_copy_ strided 5 : Elapsed 0.025 ms (2.510 ms / 100) 2.539 -> 2.541 ( +0.08%) [ +0.20% +0.00% +0.00% / +0.08% +0.32% +0.51%] index_add_ strided 7 : Elapsed 0.025 ms (2.544 ms / 100) 2.506 -> 2.511 ( +0.20%) [ +0.16% +0.04% +0.00% / +0.20% +0.44% +0.36%] index_copy_ strided 7 : Elapsed 0.025 ms (2.510 ms / 100) 2.540 -> 2.537 ( -0.12%) [ +0.00% +0.08% +0.08% / -0.12% +0.35% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.540 ms / 100) 2.506 -> 2.508 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.44% +0.40%] index_copy_ perm : Elapsed 0.025 ms (2.506 ms / 100) 2.529 -> 2.534 ( +0.20%) [ +0.16% +0.16% +0.00% / +0.20% +0.24% +0.24%] index_add_ perm_sorted : Elapsed 0.025 ms (2.533 ms / 100) 2.501 -> 2.502 ( +0.04%) [ +0.04% +0.20% +0.00% / +0.04% +0.32% +0.20%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.502 ms / 100) 5.724 -> 5.724 ( +0.00%) [ +0.12% +0.05% +0.00% / +0.00% +0.51% +0.51%] index_select const : Elapsed 0.057 ms (5.731 ms / 100) 5.744 -> 5.744 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.31% +0.21%] index_select wrap : Elapsed 0.057 ms (5.744 ms / 100) 5.731 -> 5.739 ( +0.14%) [ +0.26% +0.07% +0.00% / +0.14% +0.45% +0.30%] index_select linear : Elapsed 0.057 ms (5.746 ms / 100) 5.737 -> 5.739 ( +0.03%) [ +0.09% +0.09% +0.00% / +0.03% +0.42% +0.42%] index_select reverse : Elapsed 0.057 ms (5.742 ms / 100) 5.724 -> 5.736 ( +0.21%) [ +0.19% +0.14% +0.00% / +0.21% +0.37% +0.28%] index_select skip64 : Elapsed 0.057 ms (5.735 ms / 100) 5.710 -> 5.711 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.47% +0.40%] index_select skip256 : Elapsed 0.057 ms (5.713 ms / 100) 5.717 -> 5.719 ( +0.03%) [ +0.10% +0.00% +0.19% / +0.03% +0.47% +0.44%] index_select spread : Elapsed 0.057 ms (5.723 ms / 100) 5.748 -> 5.754 ( +0.10%) [ +0.12% +0.00% +0.14% / +0.10% +0.30% +0.49%] index_select strided 3 : Elapsed 0.058 ms (5.755 ms / 100) 5.744 -> 5.745 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.26% +0.12%] index_select random : Elapsed 0.057 ms (5.745 ms / 100) 5.740 -> 5.742 ( +0.03%) [ +0.00% +0.16% +0.16% / +0.03% +0.33% +0.33%] index_select random_sorted : Elapsed 0.057 ms (5.740 ms / 100) B = [5, 16, 20, 40] (stride (20, 4000, 1, 100)) A = [5, 4, 20, 40] (stride (3200, 800, 1, 20)) dim = 1 2.534 -> 2.537 ( +0.12%) [ +0.08% +0.12% +0.00% / +0.12% +0.39% +0.43%] index_add_ linear : Elapsed 0.025 ms (2.536 ms / 100) 2.489 -> 2.491 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.60% +0.56%] index_copy_ linear : Elapsed 0.025 ms (2.493 ms / 100) 2.537 -> 2.537 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.43% +0.35%] index_add_ reverse : Elapsed 0.025 ms (2.537 ms / 100) 2.490 -> 2.496 ( +0.24%) [ +0.04% +0.00% +0.00% / +0.24% +0.48% +0.44%] index_copy_ reverse : Elapsed 0.025 ms (2.491 ms / 100) 2.533 -> 2.536 ( +0.12%) [ +0.12% +0.08% +0.00% / +0.12% +0.55% +0.43%] index_add_ spread : Elapsed 0.025 ms (2.536 ms / 100) 2.491 -> 2.494 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.40% +0.48%] index_copy_ spread : Elapsed 0.025 ms (2.491 ms / 100) 2.535 -> 2.537 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.67% +0.79%] index_add_ strided 3 : Elapsed 0.025 ms (2.535 ms / 100) 2.491 -> 2.491 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.68% +0.64%] index_copy_ strided 3 : Elapsed 0.025 ms (2.493 ms / 100) 2.535 -> 2.543 ( +0.32%) [ +0.20% +0.04% +0.00% / +0.32% +0.59% +0.51%] index_add_ strided 5 : Elapsed 0.025 ms (2.540 ms / 100) 2.491 -> 2.494 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.60% +0.52%] index_copy_ strided 5 : Elapsed 0.025 ms (2.493 ms / 100) 2.534 -> 2.539 ( +0.20%) [ +0.24% +0.24% +0.00% / +0.20% +0.43% +0.47%] index_add_ strided 7 : Elapsed 0.025 ms (2.540 ms / 100) 2.490 -> 2.493 ( +0.12%) [ +0.20% +0.12% +0.00% / +0.12% +0.36% +0.28%] index_copy_ strided 7 : Elapsed 0.025 ms (2.495 ms / 100) 2.536 -> 2.538 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.12% +0.20%] index_add_ perm : Elapsed 0.025 ms (2.536 ms / 100) 2.489 -> 2.491 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.16% +0.20%] index_copy_ perm : Elapsed 0.025 ms (2.491 ms / 100) 2.532 -> 2.535 ( +0.12%) [ +0.20% +0.36% +0.00% / +0.12% +0.16% +0.28%] index_add_ perm_sorted : Elapsed 0.025 ms (2.537 ms / 100) 2.490 -> 2.491 ( +0.04%) [ +0.12% +0.04% +0.00% / +0.04% +0.32% +0.16%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.493 ms / 100) 5.710 -> 5.709 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.39% +0.39%] index_select const : Elapsed 0.057 ms (5.710 ms / 100) 5.730 -> 5.736 ( +0.10%) [ +0.14% +0.07% +0.00% / +0.10% +0.45% +0.44%] index_select wrap : Elapsed 0.057 ms (5.738 ms / 100) 5.742 -> 5.744 ( +0.03%) [ +0.10% +0.12% +0.00% / +0.03% +0.40% +0.52%] index_select linear : Elapsed 0.057 ms (5.748 ms / 100) 5.744 -> 5.743 ( -0.02%) [ +0.07% +0.00% +0.02% / -0.02% +0.40% +0.37%] index_select reverse : Elapsed 0.057 ms (5.748 ms / 100) 5.752 -> 5.748 ( -0.07%) [ +0.02% +0.00% +0.00% / -0.07% +0.24% +0.35%] index_select skip64 : Elapsed 0.058 ms (5.753 ms / 100) 5.740 -> 5.742 ( +0.03%) [ +0.16% +0.00% +0.03% / +0.03% +0.31% +0.45%] index_select skip256 : Elapsed 0.057 ms (5.749 ms / 100) 5.723 -> 5.717 ( -0.10%) [ +0.00% +0.00% +0.05% / -0.10% +0.21% +0.12%] index_select spread : Elapsed 0.057 ms (5.723 ms / 100) 5.739 -> 5.741 ( +0.03%) [ +0.00% +0.03% +0.02% / +0.03% +0.28% +0.30%] index_select strided 3 : Elapsed 0.057 ms (5.739 ms / 100) 5.755 -> 5.748 ( -0.12%) [ +0.03% +0.02% +0.00% / -0.12% +0.24% +0.23%] index_select random : Elapsed 0.058 ms (5.757 ms / 100) 5.699 -> 5.704 ( +0.09%) [ +0.14% +0.00% +0.09% / +0.09% +0.39% +0.30%] index_select random_sorted : Elapsed 0.057 ms (5.707 ms / 100) B = [5, 16, 20, 40] (stride (320, 1, 16, 1600)) A = [5, 4, 20, 40] (stride (20, 100, 1, 400)) dim = 1 2.643 -> 2.644 ( +0.04%) [ +0.08% +0.19% +0.00% / +0.04% +0.30% +0.61%] index_add_ linear : Elapsed 0.026 ms (2.645 ms / 100) 2.602 -> 2.604 ( +0.08%) [ +0.00% +0.38% +0.00% / +0.08% +0.15% +0.31%] index_copy_ linear : Elapsed 0.026 ms (2.602 ms / 100) 2.644 -> 2.647 ( +0.11%) [ +0.00% +0.19% +0.26% / +0.26% +0.11% +0.23%] index_add_ reverse : Elapsed 0.026 ms (2.644 ms / 100) 2.604 -> 2.605 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.15% +0.27% +0.04%] index_copy_ reverse : Elapsed 0.026 ms (2.607 ms / 100) 2.683 -> 2.686 ( +0.11%) [ +0.00% +0.15% +0.15% / +0.11% +0.30% +0.30%] index_add_ spread : Elapsed 0.027 ms (2.683 ms / 100) 2.705 -> 2.702 ( -0.11%) [ +0.04% +0.00% +0.00% / -0.11% +0.00% +0.22%] index_copy_ spread : Elapsed 0.027 ms (2.706 ms / 100) 2.684 -> 2.680 ( -0.15%) [ +0.00% +0.19% +0.11% / -0.15% +0.19% +0.19%] index_add_ strided 3 : Elapsed 0.027 ms (2.684 ms / 100) 2.700 -> 2.699 ( -0.04%) [ +0.11% +0.07% +0.00% / -0.04% +0.26% +0.15%] index_copy_ strided 3 : Elapsed 0.027 ms (2.703 ms / 100) 2.681 -> 2.686 ( +0.19%) [ +0.07% +0.48% +0.00% / +0.19% +0.37% +0.45%] index_add_ strided 5 : Elapsed 0.027 ms (2.683 ms / 100) 2.697 -> 2.707 ( +0.37%) [ +0.15% +0.41% +0.00% / +0.37% +0.48% +0.48%] index_copy_ strided 5 : Elapsed 0.027 ms (2.701 ms / 100) 2.680 -> 2.686 ( +0.22%) [ +0.11% +0.00% +0.15% / +0.22% +0.52% +0.41%] index_add_ strided 7 : Elapsed 0.027 ms (2.683 ms / 100) 2.702 -> 2.703 ( +0.04%) [ +0.15% +0.00% +0.15% / +0.04% +0.30% +0.19%] index_copy_ strided 7 : Elapsed 0.027 ms (2.706 ms / 100) 2.687 -> 2.692 ( +0.19%) [ +0.00% +0.00% +0.15% / +0.19% +0.41% +0.30%] index_add_ perm : Elapsed 0.027 ms (2.687 ms / 100) 2.700 -> 2.706 ( +0.22%) [ +0.00% +0.30% +0.04% / +0.22% +0.48% +0.33%] index_copy_ perm : Elapsed 0.027 ms (2.700 ms / 100) 2.687 -> 2.686 ( -0.04%) [ +0.07% +0.00% +0.26% / -0.04% +0.33% +0.74%] index_add_ perm_sorted : Elapsed 0.027 ms (2.689 ms / 100) 2.701 -> 2.705 ( +0.15%) [ +0.15% +0.00% +0.19% / +0.15% +0.15% +0.26%] index_copy_ perm_sorted : Elapsed 0.027 ms (2.705 ms / 100) 6.024 -> 6.027 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.38% +0.40%] index_select const : Elapsed 0.060 ms (6.027 ms / 100) 6.024 -> 6.031 ( +0.12%) [ +0.00% +0.00% +0.03% / +0.12% +0.25% +0.28%] index_select wrap : Elapsed 0.060 ms (6.024 ms / 100) 6.021 -> 6.028 ( +0.12%) [ +0.18% +0.18% +0.00% / +0.12% +0.32% +0.38%] index_select linear : Elapsed 0.060 ms (6.032 ms / 100) 6.022 -> 6.032 ( +0.17%) [ +0.15% +0.00% +0.08% / +0.17% +0.60% +0.45%] index_select reverse : Elapsed 0.060 ms (6.031 ms / 100) 6.026 -> 6.028 ( +0.03%) [ +0.12% +0.00% +0.07% / +0.03% +0.32% +0.51%] index_select skip64 : Elapsed 0.060 ms (6.033 ms / 100) 6.044 -> 6.046 ( +0.03%) [ +0.05% +0.00% +0.08% / +0.03% +0.31% +0.33%] index_select skip256 : Elapsed 0.060 ms (6.047 ms / 100) 6.022 -> 6.025 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.40% +0.38%] index_select spread : Elapsed 0.060 ms (6.026 ms / 100) 6.016 -> 6.018 ( +0.03%) [ +0.10% +0.00% +0.05% / +0.03% +0.23% +0.50%] index_select strided 3 : Elapsed 0.060 ms (6.022 ms / 100) 6.031 -> 6.028 ( -0.05%) [ +0.03% +0.00% +0.02% / -0.05% +0.25% +0.36%] index_select random : Elapsed 0.060 ms (6.033 ms / 100) 6.012 -> 6.015 ( +0.05%) [ +0.08% +0.07% +0.00% / +0.05% +0.47% +0.58%] index_select random_sorted : Elapsed 0.060 ms (6.017 ms / 100) out_shape = [5, 4, 16, 40] in_shape = [5, 4, 20, 40] idx_dim = 2 B = [5, 4, 16, 40] (stride (2560, 40, 160, 1)) A = [5, 4, 20, 40] (stride (1, 200, 800, 5)) dim = 2 3.874 -> 3.874 ( +0.00%) [ +0.08% +0.00% +0.03% / +0.00% +0.85% +0.83%] index_select const : Elapsed 0.039 ms (3.877 ms / 100) 3.878 -> 3.883 ( +0.13%) [ +0.05% +0.15% +0.00% / +0.13% +1.08% +1.08%] index_select wrap : Elapsed 0.039 ms (3.880 ms / 100) 3.873 -> 3.875 ( +0.05%) [ +0.00% +0.08% +0.00% / +0.05% +0.93% +0.90%] index_select linear : Elapsed 0.039 ms (3.873 ms / 100) 3.877 -> 3.885 ( +0.21%) [ +0.13% +0.10% +0.00% / +0.21% +0.95% +1.39%] index_select reverse : Elapsed 0.039 ms (3.882 ms / 100) 3.887 -> 3.889 ( +0.05%) [ +0.00% +0.05% +0.08% / +0.05% +0.64% +0.54%] index_select skip64 : Elapsed 0.039 ms (3.887 ms / 100) 3.865 -> 3.866 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.67% +0.80%] index_select skip256 : Elapsed 0.039 ms (3.866 ms / 100) 3.880 -> 3.883 ( +0.08%) [ +0.13% +0.00% +0.05% / +0.08% +0.70% +0.62%] index_select spread : Elapsed 0.039 ms (3.885 ms / 100) 3.877 -> 3.875 ( -0.05%) [ +0.03% +0.00% +0.03% / -0.05% +0.72% +0.75%] index_select strided 3 : Elapsed 0.039 ms (3.878 ms / 100) 3.873 -> 3.874 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.85% +0.67%] index_select strided 5 : Elapsed 0.039 ms (3.873 ms / 100) 3.877 -> 3.877 ( +0.00%) [ +0.00% +0.15% +0.18% / +0.00% +0.88% +0.75%] index_select strided 7 : Elapsed 0.039 ms (3.877 ms / 100) 3.873 -> 3.873 ( +0.00%) [ +0.10% +0.21% +0.00% / +0.00% +1.32% +0.72%] index_select strided 8 : Elapsed 0.039 ms (3.877 ms / 100) 3.878 -> 3.877 ( -0.03%) [ +0.13% +0.10% +0.00% / -0.03% +0.85% +0.77%] index_select strided 16 : Elapsed 0.039 ms (3.883 ms / 100) 3.885 -> 3.890 ( +0.13%) [ +0.15% +0.18% +0.00% / +0.13% +0.72% +1.00%] index_select random : Elapsed 0.039 ms (3.891 ms / 100) 3.877 -> 3.871 ( -0.15%) [ +0.03% +0.00% +0.00% / -0.15% +0.46% +0.57%] index_select random_sorted : Elapsed 0.039 ms (3.878 ms / 100) 3.885 -> 3.885 ( +0.00%) [ +0.18% +0.00% +0.13% / +0.00% +0.54% +0.64%] index_select perm : Elapsed 0.039 ms (3.892 ms / 100) 3.885 -> 3.887 ( +0.05%) [ +0.05% +0.00% +0.08% / +0.05% +0.93% +0.77%] index_select perm_sorted : Elapsed 0.039 ms (3.887 ms / 100) B = [5, 4, 16, 40] (stride (2560, 1, 4, 64)) A = [5, 4, 20, 40] (stride (160, 40, 800, 1)) dim = 2 3.503 -> 3.505 ( +0.06%) [ +0.11% +0.09% +0.00% / +0.06% +0.51% +0.54%] index_select const : Elapsed 0.035 ms (3.507 ms / 100) 3.511 -> 3.511 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.63% +0.71%] index_select wrap : Elapsed 0.035 ms (3.512 ms / 100) 3.517 -> 3.517 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.48% +0.54%] index_select linear : Elapsed 0.035 ms (3.518 ms / 100) 3.502 -> 3.502 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.54% +0.54%] index_select reverse : Elapsed 0.035 ms (3.503 ms / 100) 3.511 -> 3.511 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.68% +0.60%] index_select skip64 : Elapsed 0.035 ms (3.512 ms / 100) 3.508 -> 3.507 ( -0.03%) [ +0.06% +0.00% +0.00% / -0.03% +0.94% +0.66%] index_select skip256 : Elapsed 0.035 ms (3.510 ms / 100) 3.508 -> 3.510 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.66% +0.71%] index_select spread : Elapsed 0.035 ms (3.510 ms / 100) 3.517 -> 3.517 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.54% +0.54%] index_select strided 3 : Elapsed 0.035 ms (3.518 ms / 100) 3.501 -> 3.502 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.54% +0.49%] index_select strided 5 : Elapsed 0.035 ms (3.502 ms / 100) 3.508 -> 3.508 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.77% +0.71%] index_select strided 7 : Elapsed 0.035 ms (3.509 ms / 100) 3.516 -> 3.518 ( +0.06%) [ +0.09% +0.03% +0.00% / +0.06% +0.57% +0.57%] index_select strided 8 : Elapsed 0.035 ms (3.519 ms / 100) 3.499 -> 3.499 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.66% +0.71%] index_select strided 16 : Elapsed 0.035 ms (3.499 ms / 100) 3.509 -> 3.510 ( +0.03%) [ +0.11% +0.09% +0.00% / +0.03% +0.74% +0.68%] index_select random : Elapsed 0.035 ms (3.513 ms / 100) 3.512 -> 3.512 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.68% +0.68%] index_select random_sorted : Elapsed 0.035 ms (3.514 ms / 100) 3.505 -> 3.506 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.91% +0.83%] index_select perm : Elapsed 0.035 ms (3.506 ms / 100) 3.509 -> 3.510 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.77% +0.74%] index_select perm_sorted : Elapsed 0.035 ms (3.511 ms / 100) B = [5, 4, 16, 40] (stride (640, 3200, 40, 1)) A = [5, 4, 20, 40] (stride (1, 4000, 200, 5)) dim = 2 3.913 -> 3.912 ( -0.03%) [ +0.10% +0.00% +0.13% / -0.03% +0.54% +0.54%] index_select const : Elapsed 0.039 ms (3.917 ms / 100) 3.926 -> 3.926 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.64% +0.76%] index_select wrap : Elapsed 0.039 ms (3.926 ms / 100) 3.904 -> 3.913 ( +0.23%) [ +0.20% +0.18% +0.00% / +0.23% +0.72% +0.77%] index_select linear : Elapsed 0.039 ms (3.912 ms / 100) 3.919 -> 3.925 ( +0.15%) [ +0.05% +0.03% +0.00% / +0.15% +0.59% +0.56%] index_select reverse : Elapsed 0.039 ms (3.921 ms / 100) 3.915 -> 3.918 ( +0.08%) [ +0.18% +0.00% +0.20% / +0.08% +0.69% +0.61%] index_select skip64 : Elapsed 0.039 ms (3.922 ms / 100) 3.899 -> 3.903 ( +0.10%) [ +0.08% +0.05% +0.00% / +0.10% +0.59% +0.67%] index_select skip256 : Elapsed 0.039 ms (3.902 ms / 100) 3.909 -> 3.916 ( +0.18%) [ +0.26% +0.38% +0.00% / +0.18% +0.79% +0.79%] index_select spread : Elapsed 0.039 ms (3.919 ms / 100) 3.926 -> 3.920 ( -0.15%) [ +0.03% +0.00% +0.03% / -0.15% +0.53% +0.41%] index_select strided 3 : Elapsed 0.039 ms (3.927 ms / 100) 3.903 -> 3.904 ( +0.03%) [ +0.28% +0.15% +0.00% / +0.03% +0.69% +0.74%] index_select strided 5 : Elapsed 0.039 ms (3.914 ms / 100) 3.920 -> 3.919 ( -0.03%) [ +0.00% +0.13% +0.10% / -0.03% +0.64% +0.84%] index_select strided 7 : Elapsed 0.039 ms (3.920 ms / 100) 3.917 -> 3.918 ( +0.03%) [ +0.20% +0.00% +0.05% / +0.03% +0.46% +0.38%] index_select strided 8 : Elapsed 0.039 ms (3.925 ms / 100) 3.918 -> 3.923 ( +0.13%) [ +0.00% +0.03% +0.10% / +0.13% +0.51% +0.61%] index_select strided 16 : Elapsed 0.039 ms (3.918 ms / 100) 3.918 -> 3.920 ( +0.05%) [ +0.08% +0.00% +0.03% / +0.05% +0.48% +0.48%] index_select random : Elapsed 0.039 ms (3.921 ms / 100) 3.909 -> 3.920 ( +0.28%) [ +0.03% +0.00% +0.20% / +0.28% +0.49% +0.51%] index_select random_sorted : Elapsed 0.039 ms (3.910 ms / 100) 3.919 -> 3.924 ( +0.13%) [ +0.13% +0.00% +0.05% / +0.13% +0.46% +0.41%] index_select perm : Elapsed 0.039 ms (3.924 ms / 100) 3.923 -> 3.923 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.41% +0.36%] index_select perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) B = [5, 4, 16, 40] (stride (1, 3200, 200, 5)) dim = 2 fill_cnt = 20 3.139 -> 3.122 ( -0.54%) [ +0.25% +0.32% +0.00% / -0.54% -0.54% -0.29%] index_fill_ const : Elapsed 0.031 ms (3.147 ms / 100) 3.151 -> 3.133 ( -0.57%) [ +0.22% +0.19% +0.00% / -0.38% -0.57% -0.54%] index_fill_ linear : Elapsed 0.032 ms (3.158 ms / 100) 3.157 -> 3.131 ( -0.82%) [ +0.00% +0.00% +0.00% / -0.82% -0.63% -0.38%] index_fill_ reverse : Elapsed 0.032 ms (3.157 ms / 100) 3.140 -> 3.124 ( -0.51%) [ +0.19% +0.00% +0.19% / -0.38% -0.51% -0.29%] index_fill_ skip64 : Elapsed 0.031 ms (3.146 ms / 100) 3.142 -> 3.124 ( -0.57%) [ +0.13% +0.00% +0.19% / -0.57% -0.54% -0.54%] index_fill_ skip256 : Elapsed 0.031 ms (3.146 ms / 100) 3.151 -> 3.129 ( -0.70%) [ +0.03% +0.16% +0.00% / -0.51% -0.51% -0.70%] index_fill_ spread : Elapsed 0.032 ms (3.152 ms / 100) 3.151 -> 3.132 ( -0.60%) [ +0.16% +0.00% +0.13% / -0.57% -0.41% -0.60%] index_fill_ strided 3 : Elapsed 0.032 ms (3.156 ms / 100) 3.151 -> 3.132 ( -0.60%) [ +0.13% +0.22% +0.00% / -0.60% -0.48% -0.54%] index_fill_ strided 5 : Elapsed 0.032 ms (3.155 ms / 100) 3.157 -> 3.133 ( -0.76%) [ +0.00% +0.00% +0.00% / -0.76% -0.57% -0.22%] index_fill_ strided 7 : Elapsed 0.032 ms (3.157 ms / 100) 3.136 -> 3.119 ( -0.54%) [ +0.13% +0.00% +0.10% / -0.54% -0.51% -0.35%] index_fill_ strided 8 : Elapsed 0.031 ms (3.140 ms / 100) 3.150 -> 3.130 ( -0.63%) [ +0.03% +0.00% +0.10% / -0.63% -0.51% -0.51%] index_fill_ random : Elapsed 0.032 ms (3.151 ms / 100) 3.144 -> 3.125 ( -0.60%) [ +0.06% +0.03% +0.00% / -0.60% -0.38% -0.29%] index_fill_ random_sorted : Elapsed 0.031 ms (3.146 ms / 100) B = [5, 4, 16, 40] (stride (1, 5, 800, 20)) A = [5, 4, 20, 40] (stride (1, 4000, 5, 100)) dim = 2 4.280 -> 4.281 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.26% +0.47%] index_select const : Elapsed 0.043 ms (4.283 ms / 100) 4.278 -> 4.275 ( -0.07%) [ +0.07% +0.02% +0.00% / -0.07% +0.49% +0.44%] index_select wrap : Elapsed 0.043 ms (4.281 ms / 100) 4.283 -> 4.285 ( +0.05%) [ +0.14% +0.07% +0.00% / +0.05% +0.70% +0.51%] index_select linear : Elapsed 0.043 ms (4.289 ms / 100) 4.280 -> 4.276 ( -0.09%) [ +0.02% +0.00% +0.00% / -0.09% +0.40% +0.56%] index_select reverse : Elapsed 0.043 ms (4.281 ms / 100) 4.296 -> 4.296 ( +0.00%) [ +0.00% +0.07% +0.12% / +0.00% +0.42% +0.42%] index_select skip64 : Elapsed 0.043 ms (4.296 ms / 100) 4.273 -> 4.275 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.75% +0.68%] index_select skip256 : Elapsed 0.043 ms (4.273 ms / 100) 4.286 -> 4.291 ( +0.12%) [ +0.00% +0.07% +0.09% / +0.12% +0.68% +0.70%] index_select spread : Elapsed 0.043 ms (4.286 ms / 100) 4.285 -> 4.288 ( +0.07%) [ +0.12% +0.07% +0.00% / +0.07% +0.72% +0.70%] index_select strided 3 : Elapsed 0.043 ms (4.290 ms / 100) 4.281 -> 4.283 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.42% +0.37%] index_select strided 5 : Elapsed 0.043 ms (4.285 ms / 100) 4.278 -> 4.280 ( +0.05%) [ +0.00% +0.07% +0.07% / +0.05% +0.63% +0.75%] index_select strided 7 : Elapsed 0.043 ms (4.278 ms / 100) 4.272 -> 4.275 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.80% +0.77%] index_select strided 8 : Elapsed 0.043 ms (4.275 ms / 100) 4.290 -> 4.297 ( +0.16%) [ +0.12% +0.12% +0.00% / +0.16% +0.63% +0.63%] index_select strided 16 : Elapsed 0.043 ms (4.295 ms / 100) 4.273 -> 4.275 ( +0.05%) [ +0.00% +0.05% +0.02% / +0.05% +0.59% +0.68%] index_select random : Elapsed 0.043 ms (4.273 ms / 100) 4.285 -> 4.294 ( +0.21%) [ +0.09% +0.00% +0.02% / +0.21% +0.82% +0.77%] index_select random_sorted : Elapsed 0.043 ms (4.289 ms / 100) 4.266 -> 4.272 ( +0.14%) [ +0.00% +0.00% +0.14% / +0.14% +0.77% +0.80%] index_select perm : Elapsed 0.043 ms (4.266 ms / 100) 4.282 -> 4.290 ( +0.19%) [ +0.05% +0.12% +0.00% / +0.19% +0.72% +0.75%] index_select perm_sorted : Elapsed 0.043 ms (4.284 ms / 100) out_shape = [5, 4, 20, 16] in_shape = [5, 4, 20, 40] idx_dim = 3 B = [5, 4, 20, 16] (stride (1280, 16, 64, 1)) A = [5, 4, 20, 40] (stride (1, 5, 20, 400)) dim = 3 4.042 -> 4.042 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.57% +0.57%] index_select const : Elapsed 0.040 ms (4.043 ms / 100) 4.016 -> 4.020 ( +0.10%) [ +0.05% +0.00% +0.02% / +0.10% +0.52% +0.60%] index_select wrap : Elapsed 0.040 ms (4.018 ms / 100) 4.031 -> 4.038 ( +0.17%) [ +0.07% +0.00% +0.02% / +0.17% +0.60% +0.52%] index_select linear : Elapsed 0.040 ms (4.034 ms / 100) 4.004 -> 4.006 ( +0.05%) [ +0.02% +0.00% +0.00% / +0.05% +0.50% +0.50%] index_select reverse : Elapsed 0.040 ms (4.005 ms / 100) 4.028 -> 4.029 ( +0.02%) [ +0.00% +0.40% +0.00% / +0.02% +0.52% +0.60%] index_select skip64 : Elapsed 0.040 ms (4.028 ms / 100) 4.043 -> 4.043 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.49% +0.49%] index_select skip256 : Elapsed 0.040 ms (4.043 ms / 100) 4.030 -> 4.030 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.40% +0.42%] index_select spread : Elapsed 0.040 ms (4.030 ms / 100) 4.040 -> 4.042 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.40% +0.40%] index_select strided 3 : Elapsed 0.040 ms (4.041 ms / 100) 4.008 -> 4.010 ( +0.05%) [ +0.02% +0.00% +0.00% / +0.05% +0.32% +0.37%] index_select strided 5 : Elapsed 0.040 ms (4.009 ms / 100) 4.030 -> 4.034 ( +0.10%) [ +0.00% +0.12% +0.00% / +0.10% +0.50% +0.50%] index_select strided 7 : Elapsed 0.040 ms (4.030 ms / 100) 4.036 -> 4.045 ( +0.22%) [ +0.22% +0.02% +0.00% / +0.22% +0.50% +0.47%] index_select strided 8 : Elapsed 0.040 ms (4.045 ms / 100) 4.035 -> 4.039 ( +0.10%) [ +0.02% +0.05% +0.00% / +0.10% +0.52% +0.50%] index_select strided 16 : Elapsed 0.040 ms (4.036 ms / 100) 4.040 -> 4.040 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.42% +0.45%] index_select random : Elapsed 0.040 ms (4.041 ms / 100) 4.024 -> 4.024 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.47%] index_select random_sorted : Elapsed 0.040 ms (4.024 ms / 100) 4.016 -> 4.020 ( +0.10%) [ +0.05% +0.00% +0.02% / +0.10% +0.40% +0.40%] index_select perm : Elapsed 0.040 ms (4.018 ms / 100) 4.008 -> 4.009 ( +0.02%) [ +0.07% +0.02% +0.00% / +0.02% +0.35% +0.37%] index_select perm_sorted : Elapsed 0.040 ms (4.011 ms / 100) B = [5, 4, 20, 16] (stride (1280, 1, 4, 80)) A = [5, 4, 20, 40] (stride (3200, 20, 1, 80)) dim = 3 3.506 -> 3.508 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +0.74% +0.77%] index_select const : Elapsed 0.035 ms (3.507 ms / 100) 3.496 -> 3.500 ( +0.11%) [ +0.00% +0.03% +0.00% / +0.11% +0.74% +0.80%] index_select wrap : Elapsed 0.035 ms (3.496 ms / 100) 3.501 -> 3.502 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.91% +0.91%] index_select linear : Elapsed 0.035 ms (3.503 ms / 100) 3.499 -> 3.498 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +1.00% +0.94%] index_select reverse : Elapsed 0.035 ms (3.499 ms / 100) 3.498 -> 3.497 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.74% +0.86%] index_select skip64 : Elapsed 0.035 ms (3.498 ms / 100) 3.503 -> 3.503 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.86% +0.83%] index_select skip256 : Elapsed 0.035 ms (3.506 ms / 100) 3.496 -> 3.497 ( +0.03%) [ +0.06% +0.06% +0.00% / +0.03% +1.09% +1.00%] index_select spread : Elapsed 0.035 ms (3.498 ms / 100) 3.499 -> 3.499 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.97% +0.77%] index_select strided 3 : Elapsed 0.035 ms (3.500 ms / 100) 3.500 -> 3.502 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.97% +0.89%] index_select strided 5 : Elapsed 0.035 ms (3.502 ms / 100) 3.493 -> 3.495 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.86% +0.80%] index_select strided 7 : Elapsed 0.035 ms (3.495 ms / 100) 3.508 -> 3.511 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.68% +0.71%] index_select strided 8 : Elapsed 0.035 ms (3.508 ms / 100) 3.498 -> 3.498 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.97% +1.00%] index_select strided 16 : Elapsed 0.035 ms (3.499 ms / 100) 3.508 -> 3.508 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.74% +0.77%] index_select random : Elapsed 0.035 ms (3.509 ms / 100) 3.495 -> 3.494 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +1.09% +0.94%] index_select random_sorted : Elapsed 0.035 ms (3.495 ms / 100) 3.500 -> 3.500 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.91% +1.09%] index_select perm : Elapsed 0.035 ms (3.501 ms / 100) 3.512 -> 3.511 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.83% +0.77%] index_select perm_sorted : Elapsed 0.035 ms (3.512 ms / 100) B = [5, 4, 20, 16] (stride (16, 1600, 80, 1)) A = [5, 4, 20, 40] (stride (1, 100, 5, 400)) dim = 3 1.386 -> 1.388 ( +0.14%) [ +0.00% +0.07% +0.00% / +0.14% +1.23% +1.01%] index_select const : Elapsed 0.014 ms (1.386 ms / 100) 1.395 -> 1.396 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.65% +1.08%] index_select wrap : Elapsed 0.014 ms (1.397 ms / 100) 1.399 -> 1.398 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.43% +0.21%] index_select linear : Elapsed 0.014 ms (1.400 ms / 100) 1.401 -> 1.400 ( -0.07%) [ +0.21% +0.00% +0.07% / -0.07% +0.36% +0.21%] index_select reverse : Elapsed 0.014 ms (1.404 ms / 100) 1.392 -> 1.395 ( +0.22%) [ +0.00% +0.22% +0.36% / +0.22% +0.50% +0.50%] index_select skip64 : Elapsed 0.014 ms (1.392 ms / 100) 1.389 -> 1.390 ( +0.07%) [ +0.29% +0.00% +0.00% / +0.07% +0.58% +0.58%] index_select skip256 : Elapsed 0.014 ms (1.393 ms / 100) 1.399 -> 1.400 ( +0.07%) [ +0.21% +0.00% +0.14% / +0.07% +0.29% +0.07%] index_select spread : Elapsed 0.014 ms (1.402 ms / 100) 1.400 -> 1.402 ( +0.14%) [ +0.64% +0.07% +0.00% / +0.14% +0.14% +0.29%] index_select strided 3 : Elapsed 0.014 ms (1.409 ms / 100) 1.403 -> 1.403 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.07% +0.14% +0.00%] index_select strided 5 : Elapsed 0.014 ms (1.405 ms / 100) 1.399 -> 1.397 ( -0.14%) [ +0.00% +0.21% +0.07% / +0.00% -0.14% +0.00%] index_select strided 7 : Elapsed 0.014 ms (1.399 ms / 100) 1.397 -> 1.396 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.21% +0.21%] index_select strided 8 : Elapsed 0.014 ms (1.397 ms / 100) 1.391 -> 1.391 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.36% +0.43%] index_select strided 16 : Elapsed 0.014 ms (1.392 ms / 100) 1.398 -> 1.398 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.07% +0.21%] index_select random : Elapsed 0.014 ms (1.400 ms / 100) 1.405 -> 1.406 ( +0.07%) [ +0.07% +0.00% +0.21% / +0.07% +0.21% +0.21%] index_select random_sorted : Elapsed 0.014 ms (1.406 ms / 100) 1.399 -> 1.398 ( -0.07%) [ +0.07% +0.14% +0.00% / +0.00% +0.00% -0.07%] index_select perm : Elapsed 0.014 ms (1.400 ms / 100) 1.402 -> 1.403 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.29% +0.21% +0.07%] index_select perm_sorted : Elapsed 0.014 ms (1.404 ms / 100) B = [5, 4, 20, 16] (stride (1, 1600, 80, 5)) A = [5, 4, 20, 40] (stride (1, 4000, 5, 100)) dim = 3 3.596 -> 3.596 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.72% +0.81%] index_select const : Elapsed 0.036 ms (3.598 ms / 100) 3.595 -> 3.600 ( +0.14%) [ +0.06% +0.03% +0.00% / +0.14% +0.86% +0.83%] index_select wrap : Elapsed 0.036 ms (3.597 ms / 100) 3.608 -> 3.608 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.64% +0.64%] index_select linear : Elapsed 0.036 ms (3.608 ms / 100) 3.611 -> 3.613 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.83% +0.91%] index_select reverse : Elapsed 0.036 ms (3.612 ms / 100) 3.598 -> 3.598 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.86% +0.89%] index_select skip64 : Elapsed 0.036 ms (3.599 ms / 100) 3.595 -> 3.593 ( -0.06%) [ +0.00% +0.08% +0.08% / -0.06% +0.86% +0.86%] index_select skip256 : Elapsed 0.036 ms (3.595 ms / 100) 3.608 -> 3.611 ( +0.08%) [ +0.08% +0.00% +0.11% / +0.08% +0.80% +0.69%] index_select spread : Elapsed 0.036 ms (3.611 ms / 100) 3.600 -> 3.600 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.75% +0.64%] index_select strided 3 : Elapsed 0.036 ms (3.600 ms / 100) 3.607 -> 3.605 ( -0.06%) [ +0.00% +0.17% +0.08% / -0.06% +0.89% +0.72%] index_select strided 5 : Elapsed 0.036 ms (3.607 ms / 100) 3.609 -> 3.612 ( +0.08%) [ +0.00% +0.11% +0.00% / +0.08% +0.83% +0.78%] index_select strided 7 : Elapsed 0.036 ms (3.609 ms / 100) 3.609 -> 3.611 ( +0.06%) [ +0.03% +0.08% +0.00% / +0.06% +0.64% +0.67%] index_select strided 8 : Elapsed 0.036 ms (3.610 ms / 100) 3.595 -> 3.596 ( +0.03%) [ +0.06% +0.11% +0.00% / +0.03% +0.78% +0.81%] index_select strided 16 : Elapsed 0.036 ms (3.597 ms / 100) 3.603 -> 3.603 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.83% +0.80%] index_select random : Elapsed 0.036 ms (3.603 ms / 100) 3.588 -> 3.594 ( +0.17%) [ +0.00% +0.06% +0.11% / +0.17% +0.89% +0.81%] index_select random_sorted : Elapsed 0.036 ms (3.588 ms / 100) 3.596 -> 3.600 ( +0.11%) [ +0.14% +0.17% +0.00% / +0.11% +0.83% +0.75%] index_select perm : Elapsed 0.036 ms (3.601 ms / 100) 3.600 -> 3.602 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.78% +0.78%] index_select perm_sorted : Elapsed 0.036 ms (3.601 ms / 100) B = [5, 4, 20, 16] (stride (1, 80, 320, 5)) A = [5, 4, 20, 40] (stride (3200, 800, 40, 1)) dim = 3 3.577 -> 3.582 ( +0.14%) [ +0.06% +0.14% +0.00% / +0.14% +0.78% +0.73%] index_select const : Elapsed 0.036 ms (3.579 ms / 100) 3.587 -> 3.577 ( -0.28%) [ +0.00% +0.08% +0.00% / -0.28% +0.47% +0.33%] index_select wrap : Elapsed 0.036 ms (3.587 ms / 100) 3.586 -> 3.587 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.47% +0.47%] index_select linear : Elapsed 0.036 ms (3.589 ms / 100) 3.581 -> 3.572 ( -0.25%) [ +0.06% +0.03% +0.00% / -0.25% +0.47% +0.45%] index_select reverse : Elapsed 0.036 ms (3.583 ms / 100) 3.607 -> 3.606 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.39% +0.36%] index_select skip64 : Elapsed 0.036 ms (3.608 ms / 100) 3.582 -> 3.589 ( +0.20%) [ +0.06% +0.00% +0.03% / +0.20% +0.39% +0.45%] index_select skip256 : Elapsed 0.036 ms (3.584 ms / 100) 3.549 -> 3.550 ( +0.03%) [ +0.00% +0.06% +0.03% / +0.03% +0.37% +0.48%] index_select spread : Elapsed 0.035 ms (3.549 ms / 100) 3.573 -> 3.576 ( +0.08%) [ +0.08% +0.06% +0.00% / +0.08% +0.45% +0.48%] index_select strided 3 : Elapsed 0.036 ms (3.576 ms / 100) 3.552 -> 3.560 ( +0.23%) [ +0.39% +0.48% +0.00% / +0.23% +0.45% +0.37%] index_select strided 5 : Elapsed 0.036 ms (3.566 ms / 100) 3.556 -> 3.556 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.37% +0.37%] index_select strided 7 : Elapsed 0.036 ms (3.556 ms / 100) 3.592 -> 3.592 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.31% +0.53%] index_select strided 8 : Elapsed 0.036 ms (3.593 ms / 100) 3.550 -> 3.550 ( +0.00%) [ +0.20% +0.20% +0.00% / +0.00% +0.34% +0.34%] index_select strided 16 : Elapsed 0.036 ms (3.557 ms / 100) 3.557 -> 3.557 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.39% +0.37%] index_select random : Elapsed 0.036 ms (3.557 ms / 100) 3.550 -> 3.551 ( +0.03%) [ +0.00% +0.08% +0.06% / +0.03% +0.48% +0.54%] index_select random_sorted : Elapsed 0.036 ms (3.550 ms / 100) 3.545 -> 3.550 ( +0.14%) [ +0.08% +0.08% +0.00% / +0.14% +0.42% +0.54%] index_select perm : Elapsed 0.035 ms (3.548 ms / 100) 3.543 -> 3.545 ( +0.06%) [ +0.06% +0.11% +0.00% / +0.06% +0.48% +0.59%] index_select perm_sorted : Elapsed 0.035 ms (3.545 ms / 100) B = [5, 4, 20, 16] (stride (1, 80, 320, 5)) A = [5, 4, 20, 40] (stride (20, 100, 1, 400)) dim = 3 3.819 -> 3.823 ( +0.10%) [ +0.03% +0.00% +0.05% / +0.10% +0.71% +0.73%] index_select const : Elapsed 0.038 ms (3.820 ms / 100) 3.806 -> 3.806 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +0.63%] index_select wrap : Elapsed 0.038 ms (3.806 ms / 100) 3.807 -> 3.808 ( +0.03%) [ +0.05% +0.00% +0.08% / +0.03% +0.71% +0.68%] index_select linear : Elapsed 0.038 ms (3.809 ms / 100) 3.814 -> 3.813 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.81% +0.79%] index_select reverse : Elapsed 0.038 ms (3.817 ms / 100) 3.807 -> 3.808 ( +0.03%) [ +0.18% +0.11% +0.00% / +0.03% +0.92% +0.95%] index_select skip64 : Elapsed 0.038 ms (3.814 ms / 100) 3.818 -> 3.822 ( +0.10%) [ +0.00% +0.05% +0.00% / +0.10% +0.84% +0.84%] index_select skip256 : Elapsed 0.038 ms (3.818 ms / 100) 3.806 -> 3.809 ( +0.08%) [ +0.05% +0.00% +0.03% / +0.08% +0.81% +0.87%] index_select spread : Elapsed 0.038 ms (3.808 ms / 100) 3.810 -> 3.811 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.63% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.813 ms / 100) 3.819 -> 3.822 ( +0.08%) [ +0.05% +0.00% +0.00% / +0.08% +0.68% +0.65%] index_select strided 5 : Elapsed 0.038 ms (3.821 ms / 100) 3.827 -> 3.830 ( +0.08%) [ +0.03% +0.13% +0.00% / +0.08% +0.73% +0.81%] index_select strided 7 : Elapsed 0.038 ms (3.828 ms / 100) 3.818 -> 3.818 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.76% +0.73%] index_select strided 8 : Elapsed 0.038 ms (3.819 ms / 100) 3.806 -> 3.808 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.84% +0.84%] index_select strided 16 : Elapsed 0.038 ms (3.806 ms / 100) 3.814 -> 3.813 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.73% +0.76%] index_select random : Elapsed 0.038 ms (3.815 ms / 100) 3.804 -> 3.805 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.79% +0.79%] index_select random_sorted : Elapsed 0.038 ms (3.805 ms / 100) 3.805 -> 3.806 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.79% +0.84%] index_select perm : Elapsed 0.038 ms (3.806 ms / 100) 3.814 -> 3.816 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.79% +0.81%] index_select perm_sorted : Elapsed 0.038 ms (3.816 ms / 100) B = [5, 4, 20, 16] (stride (80, 20, 1, 400)) A = [5, 4, 20, 40] (stride (20, 4000, 1, 100)) dim = 3 3.217 -> 3.217 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_select const : Elapsed 0.032 ms (3.217 ms / 100) 3.205 -> 3.206 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.44% +0.44%] index_select wrap : Elapsed 0.032 ms (3.205 ms / 100) 3.205 -> 3.205 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.47% +0.62%] index_select linear : Elapsed 0.032 ms (3.205 ms / 100) 3.201 -> 3.203 ( +0.06%) [ +0.00% +0.03% +0.00% / +0.06% +0.47% +0.50%] index_select reverse : Elapsed 0.032 ms (3.201 ms / 100) 3.220 -> 3.218 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.37% +0.47%] index_select skip64 : Elapsed 0.032 ms (3.220 ms / 100) 3.216 -> 3.217 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.56% +0.53%] index_select skip256 : Elapsed 0.032 ms (3.216 ms / 100) 3.205 -> 3.205 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_select spread : Elapsed 0.032 ms (3.209 ms / 100) 3.204 -> 3.206 ( +0.06%) [ +0.03% +0.00% +0.09% / +0.06% +0.44% +0.41%] index_select strided 3 : Elapsed 0.032 ms (3.205 ms / 100) 3.200 -> 3.200 ( +0.00%) [ +0.13% +0.03% +0.00% / +0.00% +0.38% +0.34%] index_select strided 5 : Elapsed 0.032 ms (3.204 ms / 100) 3.215 -> 3.217 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.28% +0.28%] index_select strided 7 : Elapsed 0.032 ms (3.217 ms / 100) 3.210 -> 3.210 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.37% +0.44%] index_select strided 8 : Elapsed 0.032 ms (3.211 ms / 100) 3.221 -> 3.225 ( +0.12%) [ +0.25% +0.28% +0.00% / +0.12% +0.34% +0.40%] index_select strided 16 : Elapsed 0.032 ms (3.229 ms / 100) 3.203 -> 3.205 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.56% +0.50%] index_select random : Elapsed 0.032 ms (3.203 ms / 100) 3.199 -> 3.201 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.53% +0.69%] index_select random_sorted : Elapsed 0.032 ms (3.201 ms / 100) 3.212 -> 3.211 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.31% +0.37%] index_select perm : Elapsed 0.032 ms (3.213 ms / 100) 3.204 -> 3.206 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.31% +0.34%] index_select perm_sorted : Elapsed 0.032 ms (3.205 ms / 100) B = [5, 4, 20, 16] (stride (20, 100, 1, 400)) A = [5, 4, 20, 40] (stride (4, 1, 20, 400)) dim = 3 3.700 -> 3.701 ( +0.03%) [ +0.14% +0.03% +0.00% / +0.03% +0.78% +0.70%] index_select const : Elapsed 0.037 ms (3.705 ms / 100) 3.701 -> 3.702 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.76% +0.73%] index_select wrap : Elapsed 0.037 ms (3.702 ms / 100) 3.701 -> 3.701 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.70% +0.65%] index_select linear : Elapsed 0.037 ms (3.702 ms / 100) 3.701 -> 3.702 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.78% +0.84%] index_select reverse : Elapsed 0.037 ms (3.702 ms / 100) 3.700 -> 3.700 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.59% +0.51%] index_select skip64 : Elapsed 0.037 ms (3.701 ms / 100) 3.698 -> 3.701 ( +0.08%) [ +0.05% +0.08% +0.00% / +0.08% +0.81% +0.81%] index_select skip256 : Elapsed 0.037 ms (3.700 ms / 100) 3.666 -> 3.674 ( +0.22%) [ +0.08% +0.08% +0.00% / +0.22% +0.90% +0.85%] index_select spread : Elapsed 0.037 ms (3.669 ms / 100) 3.704 -> 3.705 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.67% +0.67%] index_select strided 3 : Elapsed 0.037 ms (3.705 ms / 100) 3.700 -> 3.699 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.76% +0.73%] index_select strided 5 : Elapsed 0.037 ms (3.700 ms / 100) 3.690 -> 3.692 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.76% +0.79%] index_select strided 7 : Elapsed 0.037 ms (3.691 ms / 100) 3.707 -> 3.711 ( +0.11%) [ +0.03% +0.03% +0.00% / +0.11% +0.73% +0.70%] index_select strided 8 : Elapsed 0.037 ms (3.708 ms / 100) 3.704 -> 3.704 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.78%] index_select strided 16 : Elapsed 0.037 ms (3.704 ms / 100) 3.704 -> 3.705 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.73% +0.73%] index_select random : Elapsed 0.037 ms (3.704 ms / 100) 3.694 -> 3.695 ( +0.03%) [ +0.03% +0.00% +0.05% / +0.03% +0.73% +0.68%] index_select random_sorted : Elapsed 0.037 ms (3.695 ms / 100) 3.701 -> 3.704 ( +0.08%) [ +0.11% +0.00% +0.03% / +0.08% +0.81% +0.78%] index_select perm : Elapsed 0.037 ms (3.705 ms / 100) 3.703 -> 3.704 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.73% +0.78%] index_select perm_sorted : Elapsed 0.037 ms (3.704 ms / 100) out_shape = [16, 4, 40, 20] in_shape = [5, 4, 40, 20] idx_dim = 0 B = [16, 4, 40, 20] (stride (3200, 1, 4, 160)) A = [5, 4, 40, 20] (stride (800, 4000, 1, 40)) dim = 0 2.459 -> 2.461 ( +0.08%) [ +0.00% +0.16% +0.04% / +0.08% +0.65% +0.69%] index_add_ linear : Elapsed 0.025 ms (2.459 ms / 100) 2.398 -> 2.399 ( +0.04%) [ +0.13% +0.00% +0.13% / +0.04% +0.79% +0.92%] index_copy_ linear : Elapsed 0.024 ms (2.401 ms / 100) 2.455 -> 2.462 ( +0.29%) [ +0.00% +0.12% +0.16% / +0.29% +0.73% +0.98%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.394 -> 2.415 ( +0.88%) [ +0.00% +0.33% +0.08% / +0.88% +1.04% +1.04%] index_copy_ reverse : Elapsed 0.024 ms (2.394 ms / 100) 2.464 -> 2.476 ( +0.49%) [ +0.00% +0.08% +0.20% / +0.49% +0.69% +0.49%] index_add_ spread : Elapsed 0.025 ms (2.464 ms / 100) 2.401 -> 2.411 ( +0.42%) [ +0.00% +0.00% +0.17% / +0.42% +0.96% +0.96%] index_copy_ spread : Elapsed 0.024 ms (2.401 ms / 100) 2.457 -> 2.463 ( +0.24%) [ +0.00% +0.00% +0.08% / +0.24% +0.77% +0.77%] index_add_ strided 3 : Elapsed 0.025 ms (2.457 ms / 100) 2.396 -> 2.408 ( +0.50%) [ +0.04% +0.08% +0.00% / +0.50% +0.92% +0.88%] index_copy_ strided 3 : Elapsed 0.024 ms (2.397 ms / 100) 2.454 -> 2.460 ( +0.24%) [ +0.16% +0.16% +0.00% / +0.24% +0.98% +0.98%] index_add_ strided 5 : Elapsed 0.025 ms (2.458 ms / 100) 2.398 -> 2.401 ( +0.13%) [ +0.04% +0.04% +0.00% / +0.13% +1.04% +1.08%] index_copy_ strided 5 : Elapsed 0.024 ms (2.399 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.00% +0.12% +0.00% / +0.08% +0.86% +0.77%] index_add_ strided 7 : Elapsed 0.025 ms (2.452 ms / 100) 2.391 -> 2.393 ( +0.08%) [ +0.00% +0.17% +0.08% / +0.08% +1.05% +1.00%] index_copy_ strided 7 : Elapsed 0.024 ms (2.391 ms / 100) 2.457 -> 2.456 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.90% +0.81%] index_add_ perm : Elapsed 0.025 ms (2.458 ms / 100) 2.396 -> 2.400 ( +0.17%) [ +0.00% +0.17% +0.17% / +0.17% +0.96% +1.04%] index_copy_ perm : Elapsed 0.024 ms (2.396 ms / 100) 2.464 -> 2.465 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.53% +0.57%] index_add_ perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) 2.399 -> 2.404 ( +0.21%) [ +0.17% +0.00% +0.17% / +0.21% +0.92% +0.92%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) 5.351 -> 5.357 ( +0.11%) [ +0.04% +0.00% +0.02% / +0.11% +0.71% +0.69%] index_select const : Elapsed 0.054 ms (5.353 ms / 100) 5.342 -> 5.347 ( +0.09%) [ +0.13% +0.15% +0.00% / +0.09% +0.97% +0.97%] index_select wrap : Elapsed 0.053 ms (5.349 ms / 100) 5.379 -> 5.379 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.00% +0.89% +0.82%] index_select linear : Elapsed 0.054 ms (5.386 ms / 100) 5.361 -> 5.368 ( +0.13%) [ +0.00% +0.13% +0.09% / +0.13% +0.60% +0.73%] index_select reverse : Elapsed 0.054 ms (5.361 ms / 100) 5.346 -> 5.353 ( +0.13%) [ +0.00% +0.06% +0.02% / +0.13% +0.62% +0.73%] index_select skip64 : Elapsed 0.053 ms (5.346 ms / 100) 5.351 -> 5.353 ( +0.04%) [ +0.00% +0.06% +0.07% / +0.04% +0.67% +0.67%] index_select skip256 : Elapsed 0.054 ms (5.351 ms / 100) 5.357 -> 5.353 ( -0.07%) [ +0.04% +0.02% +0.00% / -0.07% +0.56% +0.67%] index_select spread : Elapsed 0.054 ms (5.359 ms / 100) 5.378 -> 5.378 ( +0.00%) [ +0.20% +0.00% +0.11% / +0.00% +0.97% +1.04%] index_select strided 3 : Elapsed 0.054 ms (5.389 ms / 100) 5.364 -> 5.364 ( +0.00%) [ +0.02% +0.07% +0.00% / +0.00% +0.67% +0.67%] index_select random : Elapsed 0.054 ms (5.365 ms / 100) 5.331 -> 5.337 ( +0.11%) [ +0.02% +0.00% +0.11% / +0.11% +0.75% +0.77%] index_select random_sorted : Elapsed 0.053 ms (5.332 ms / 100) B = [16, 4, 40, 20] (stride (80, 20, 1280, 1)) A = [5, 4, 40, 20] (stride (1, 4000, 5, 200)) dim = 0 2.507 -> 2.503 ( -0.16%) [ +0.24% +0.00% +0.04% / +0.08% +0.16% -0.16%] index_add_ linear : Elapsed 0.025 ms (2.513 ms / 100) 2.429 -> 2.425 ( -0.16%) [ +0.12% +0.16% +0.00% / +0.00% +0.04% -0.16%] index_copy_ linear : Elapsed 0.024 ms (2.432 ms / 100) 2.507 -> 2.508 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.08% +0.04% +0.16%] index_add_ reverse : Elapsed 0.025 ms (2.508 ms / 100) 2.428 -> 2.430 ( +0.08%) [ +0.00% +0.08% +0.21% / +0.21% +0.08% +0.21%] index_copy_ reverse : Elapsed 0.024 ms (2.428 ms / 100) 2.503 -> 2.507 ( +0.16%) [ +0.16% +0.04% +0.00% / +0.16% +0.28% +0.24%] index_add_ spread : Elapsed 0.025 ms (2.507 ms / 100) 2.424 -> 2.430 ( +0.25%) [ +0.08% +0.00% +0.08% / +0.33% +0.25% +0.25%] index_copy_ spread : Elapsed 0.024 ms (2.426 ms / 100) 2.503 -> 2.506 ( +0.12%) [ +0.20% +0.00% +0.20% / +0.20% +0.16% +0.12%] index_add_ strided 3 : Elapsed 0.025 ms (2.508 ms / 100) 2.428 -> 2.429 ( +0.04%) [ +0.16% +0.00% +0.25% / +0.21% +0.21% +0.04%] index_copy_ strided 3 : Elapsed 0.024 ms (2.432 ms / 100) 2.505 -> 2.503 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.20% +0.04%] index_add_ strided 5 : Elapsed 0.025 ms (2.505 ms / 100) 2.425 -> 2.429 ( +0.16%) [ +0.16% +0.00% +0.12% / +0.16% +0.37% +0.25%] index_copy_ strided 5 : Elapsed 0.024 ms (2.429 ms / 100) 2.502 -> 2.502 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.16% +0.04%] index_add_ strided 7 : Elapsed 0.025 ms (2.504 ms / 100) 2.423 -> 2.429 ( +0.25%) [ +0.00% +0.25% +0.25% / +0.25% +0.29% +0.45%] index_copy_ strided 7 : Elapsed 0.024 ms (2.423 ms / 100) 2.505 -> 2.504 ( -0.04%) [ +0.00% +0.28% +0.04% / -0.04% +0.48% +0.36%] index_add_ perm : Elapsed 0.025 ms (2.505 ms / 100) 2.427 -> 2.428 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.41% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.427 ms / 100) 2.504 -> 2.501 ( -0.12%) [ +0.04% +0.12% +0.00% / -0.12% +0.40% +0.48%] index_add_ perm_sorted : Elapsed 0.025 ms (2.505 ms / 100) 2.426 -> 2.430 ( +0.16%) [ +0.16% +0.04% +0.00% / +0.16% +0.45% +0.45%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.430 ms / 100) 5.425 -> 5.435 ( +0.18%) [ +0.33% +0.61% +0.00% / +0.18% +0.74% +0.55%] index_select const : Elapsed 0.054 ms (5.443 ms / 100) 5.437 -> 5.447 ( +0.18%) [ +0.06% +0.06% +0.00% / +0.18% +0.35% +0.37%] index_select wrap : Elapsed 0.054 ms (5.440 ms / 100) 5.433 -> 5.432 ( -0.02%) [ +0.00% +0.06% +0.11% / -0.02% +0.61% +0.17%] index_select linear : Elapsed 0.054 ms (5.433 ms / 100) 5.461 -> 5.471 ( +0.18%) [ +0.05% +0.04% +0.00% / +0.18% +0.24% +0.44%] index_select reverse : Elapsed 0.055 ms (5.464 ms / 100) 5.432 -> 5.437 ( +0.09%) [ +0.31% +0.22% +0.00% / +0.09% +0.57% +0.70%] index_select skip64 : Elapsed 0.054 ms (5.449 ms / 100) 5.427 -> 5.441 ( +0.26%) [ +0.09% +0.00% +0.24% / +0.26% +0.59% +0.42%] index_select skip256 : Elapsed 0.054 ms (5.432 ms / 100) 5.440 -> 5.447 ( +0.13%) [ +0.00% +0.00% +0.24% / +0.31% +0.13% +0.17%] index_select spread : Elapsed 0.054 ms (5.440 ms / 100) 5.428 -> 5.426 ( -0.04%) [ +0.13% +0.00% +0.04% / -0.04% +0.53% +0.72%] index_select strided 3 : Elapsed 0.054 ms (5.435 ms / 100) 5.443 -> 5.455 ( +0.22%) [ +0.00% +0.20% +0.15% / +0.22% +0.53% +0.64%] index_select random : Elapsed 0.054 ms (5.443 ms / 100) 5.425 -> 5.430 ( +0.09%) [ +0.42% +0.00% +0.09% / +0.09% +0.81% +0.46%] index_select random_sorted : Elapsed 0.054 ms (5.448 ms / 100) B = [16, 4, 40, 20] (stride (160, 1, 4, 2560)) A = [5, 4, 40, 20] (stride (20, 4000, 100, 1)) dim = 0 2.216 -> 2.217 ( +0.05%) [ +0.27% +0.27% +0.00% / +0.05% +2.21% +2.08%] index_add_ linear : Elapsed 0.022 ms (2.222 ms / 100) 2.177 -> 2.179 ( +0.09%) [ +0.32% +0.00% +0.00% / +0.09% +1.93% +1.88%] index_copy_ linear : Elapsed 0.022 ms (2.184 ms / 100) 2.213 -> 2.225 ( +0.54%) [ +0.50% +0.23% +0.00% / +0.54% +1.81% +2.21%] index_add_ reverse : Elapsed 0.022 ms (2.224 ms / 100) 2.172 -> 2.187 ( +0.69%) [ +0.41% +0.14% +0.00% / +0.69% +1.70% +1.66%] index_copy_ reverse : Elapsed 0.022 ms (2.181 ms / 100) 2.228 -> 2.232 ( +0.18%) [ +0.13% +0.09% +0.00% / +0.18% +1.66% +1.97%] index_add_ spread : Elapsed 0.022 ms (2.231 ms / 100) 2.181 -> 2.186 ( +0.23%) [ +0.00% +0.14% +0.18% / +0.23% +1.97% +1.93%] index_copy_ spread : Elapsed 0.022 ms (2.181 ms / 100) 2.220 -> 2.228 ( +0.36%) [ +0.27% +0.00% +0.23% / +0.36% +2.25% +2.03%] index_add_ strided 3 : Elapsed 0.022 ms (2.226 ms / 100) 2.177 -> 2.192 ( +0.69%) [ +0.41% +0.00% +0.23% / +0.69% +1.98% +1.75%] index_copy_ strided 3 : Elapsed 0.022 ms (2.186 ms / 100) 2.215 -> 2.216 ( +0.05%) [ +0.09% +0.00% +0.27% / +0.05% +2.30% +2.17%] index_add_ strided 5 : Elapsed 0.022 ms (2.217 ms / 100) 2.175 -> 2.171 ( -0.18%) [ +0.09% +0.00% +0.00% / -0.18% +1.98% +1.98%] index_copy_ strided 5 : Elapsed 0.022 ms (2.177 ms / 100) 2.218 -> 2.226 ( +0.36%) [ +0.45% +0.00% +0.00% / +0.36% +2.03% +2.07%] index_add_ strided 7 : Elapsed 0.022 ms (2.228 ms / 100) 2.172 -> 2.177 ( +0.23%) [ +0.60% +0.00% +0.37% / +0.23% +2.39% +2.53%] index_copy_ strided 7 : Elapsed 0.022 ms (2.185 ms / 100) 2.224 -> 2.229 ( +0.22%) [ +0.00% +0.18% +0.04% / +0.22% +1.93% +2.25%] index_add_ perm : Elapsed 0.022 ms (2.224 ms / 100) 2.179 -> 2.186 ( +0.32%) [ +0.09% +0.00% +0.09% / +0.32% +1.79% +1.97%] index_copy_ perm : Elapsed 0.022 ms (2.181 ms / 100) 2.225 -> 2.230 ( +0.22%) [ +0.00% +0.40% +0.13% / +0.22% +1.66% +2.02%] index_add_ perm_sorted : Elapsed 0.022 ms (2.225 ms / 100) 2.180 -> 2.187 ( +0.32%) [ +0.05% +0.00% +0.05% / +0.32% +1.97% +1.97%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.181 ms / 100) 4.522 -> 4.526 ( +0.09%) [ +0.18% +0.00% +0.07% / +0.09% +0.73% +0.80%] index_select const : Elapsed 0.045 ms (4.530 ms / 100) 4.548 -> 4.559 ( +0.24%) [ +0.13% +0.07% +0.00% / +0.24% +1.41% +1.45%] index_select wrap : Elapsed 0.046 ms (4.554 ms / 100) 4.549 -> 4.582 ( +0.73%) [ +0.35% +0.00% +0.04% / +0.73% +1.43% +1.69%] index_select linear : Elapsed 0.046 ms (4.565 ms / 100) 4.561 -> 4.559 ( -0.04%) [ +0.09% +0.15% +0.00% / -0.04% +1.14% +1.12%] index_select reverse : Elapsed 0.046 ms (4.565 ms / 100) 4.509 -> 4.521 ( +0.27%) [ +0.00% +0.18% +0.04% / +0.27% +0.91% +0.86%] index_select skip64 : Elapsed 0.045 ms (4.509 ms / 100) 4.526 -> 4.530 ( +0.09%) [ +0.00% +0.07% +0.07% / +0.09% +0.75% +0.75%] index_select skip256 : Elapsed 0.045 ms (4.526 ms / 100) 4.561 -> 4.565 ( +0.09%) [ +0.13% +0.00% +0.09% / +0.09% +1.29% +1.38%] index_select spread : Elapsed 0.046 ms (4.567 ms / 100) 4.551 -> 4.560 ( +0.20%) [ +0.00% +0.04% +0.15% / +0.20% +1.58% +1.56%] index_select strided 3 : Elapsed 0.046 ms (4.551 ms / 100) 4.553 -> 4.558 ( +0.11%) [ +0.09% +0.00% +0.00% / +0.11% +1.05% +1.38%] index_select random : Elapsed 0.046 ms (4.557 ms / 100) 4.543 -> 4.550 ( +0.15%) [ +0.11% +0.09% +0.00% / +0.15% +1.54% +1.28%] index_select random_sorted : Elapsed 0.045 ms (4.548 ms / 100) out_shape = [5, 16, 40, 20] in_shape = [5, 4, 40, 20] idx_dim = 1 B = [5, 16, 40, 20] (stride (12800, 20, 320, 1)) A = [5, 4, 40, 20] (stride (40, 200, 1, 800)) dim = 1 2.550 -> 2.553 ( +0.12%) [ +0.08% +0.20% +0.00% / +0.12% +0.51% +0.35%] index_add_ linear : Elapsed 0.026 ms (2.552 ms / 100) 2.491 -> 2.492 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.04% +0.48% +0.48%] index_copy_ linear : Elapsed 0.025 ms (2.491 ms / 100) 2.560 -> 2.566 ( +0.23%) [ +0.08% +0.12% +0.00% / +0.59% +0.47% +0.23%] index_add_ reverse : Elapsed 0.026 ms (2.562 ms / 100) 2.498 -> 2.508 ( +0.40%) [ +0.16% +0.00% +0.16% / +0.68% +0.48% +0.40%] index_copy_ reverse : Elapsed 0.025 ms (2.502 ms / 100) 2.553 -> 2.555 ( +0.08%) [ +0.00% +0.12% +0.12% / +0.08% +0.43% +0.63%] index_add_ spread : Elapsed 0.026 ms (2.553 ms / 100) 2.502 -> 2.506 ( +0.16%) [ +0.24% +0.28% +0.00% / +0.16% +0.40% +0.68%] index_copy_ spread : Elapsed 0.025 ms (2.508 ms / 100) 2.545 -> 2.546 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.04% +0.39% +0.67%] index_add_ strided 3 : Elapsed 0.025 ms (2.546 ms / 100) 2.496 -> 2.508 ( +0.48%) [ +0.08% +0.00% +0.04% / +0.48% +0.52% +0.60%] index_copy_ strided 3 : Elapsed 0.025 ms (2.498 ms / 100) 2.561 -> 2.568 ( +0.27%) [ +0.00% +0.23% +0.23% / +0.27% +0.47% +0.47%] index_add_ strided 5 : Elapsed 0.026 ms (2.561 ms / 100) 2.513 -> 2.521 ( +0.32%) [ +0.00% +0.16% +0.32% / +0.36% +0.32% +0.32%] index_copy_ strided 5 : Elapsed 0.025 ms (2.513 ms / 100) 2.555 -> 2.562 ( +0.27%) [ +0.08% +0.00% +0.16% / +0.27% +0.39% +0.67%] index_add_ strided 7 : Elapsed 0.026 ms (2.557 ms / 100) 2.508 -> 2.510 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.36% +0.36%] index_copy_ strided 7 : Elapsed 0.025 ms (2.508 ms / 100) 2.552 -> 2.556 ( +0.16%) [ +0.00% +0.04% +0.12% / +0.16% +0.24% +0.16%] index_add_ perm : Elapsed 0.026 ms (2.552 ms / 100) 2.501 -> 2.504 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.28% +0.20%] index_copy_ perm : Elapsed 0.025 ms (2.501 ms / 100) 2.542 -> 2.546 ( +0.16%) [ +0.00% +0.00% +0.28% / +0.16% +0.24% +0.24%] index_add_ perm_sorted : Elapsed 0.025 ms (2.542 ms / 100) 2.494 -> 2.498 ( +0.16%) [ +0.12% +0.12% +0.00% / +0.16% +0.32% +0.40%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.497 ms / 100) 5.756 -> 5.762 ( +0.10%) [ +0.03% +0.00% +0.07% / +0.10% +0.49% +0.49%] index_select const : Elapsed 0.058 ms (5.758 ms / 100) 5.714 -> 5.723 ( +0.16%) [ +0.11% +0.00% +0.11% / +0.16% +0.82% +0.53%] index_select wrap : Elapsed 0.057 ms (5.720 ms / 100) 5.767 -> 5.780 ( +0.23%) [ +0.23% +0.10% +0.00% / +0.23% +0.68% +0.68%] index_select linear : Elapsed 0.058 ms (5.780 ms / 100) 5.802 -> 5.805 ( +0.05%) [ +0.07% +0.00% +0.00% / +0.05% +0.55% +0.52%] index_select reverse : Elapsed 0.058 ms (5.806 ms / 100) 5.746 -> 5.748 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.38% +0.38%] index_select skip64 : Elapsed 0.057 ms (5.746 ms / 100) 5.741 -> 5.745 ( +0.07%) [ +0.03% +0.09% +0.00% / +0.07% +0.31% +0.33%] index_select skip256 : Elapsed 0.057 ms (5.743 ms / 100) 5.717 -> 5.718 ( +0.02%) [ +0.03% +0.05% +0.00% / +0.02% +0.24% +0.37%] index_select spread : Elapsed 0.057 ms (5.719 ms / 100) 5.703 -> 5.706 ( +0.05%) [ +0.09% +0.00% +0.05% / +0.05% +0.33% +0.40%] index_select strided 3 : Elapsed 0.057 ms (5.708 ms / 100) 5.723 -> 5.735 ( +0.21%) [ +0.14% +0.12% +0.00% / +0.21% +0.45% +0.49%] index_select random : Elapsed 0.057 ms (5.731 ms / 100) 5.708 -> 5.714 ( +0.11%) [ +0.11% +0.00% +0.02% / +0.11% +0.46% +0.47%] index_select random_sorted : Elapsed 0.057 ms (5.714 ms / 100) B = [5, 16, 40, 20] (stride (12800, 1, 320, 16)) A = [5, 4, 40, 20] (stride (80, 1, 400, 4)) dim = 1 2.348 -> 2.348 ( +0.00%) [ +0.17% +0.00% +0.09% / +0.00% +0.30% +0.51%] index_add_ linear : Elapsed 0.024 ms (2.352 ms / 100) 2.326 -> 2.332 ( +0.26%) [ +0.30% +0.17% +0.00% / +0.26% +0.64% +0.73%] index_copy_ linear : Elapsed 0.023 ms (2.333 ms / 100) 2.349 -> 2.352 ( +0.13%) [ +0.09% +0.00% +0.13% / +0.13% +0.34% +0.21%] index_add_ reverse : Elapsed 0.024 ms (2.351 ms / 100) 2.333 -> 2.332 ( -0.04%) [ +0.21% +0.00% +0.00% / +0.04% +0.17% -0.04%] index_copy_ reverse : Elapsed 0.023 ms (2.338 ms / 100) 2.393 -> 2.394 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.17% +0.21%] index_add_ spread : Elapsed 0.024 ms (2.393 ms / 100) 2.433 -> 2.439 ( +0.25%) [ +0.04% +0.04% +0.00% / +0.25% +0.41% +0.49%] index_copy_ spread : Elapsed 0.024 ms (2.434 ms / 100) 2.394 -> 2.397 ( +0.13%) [ +0.08% +0.00% +0.17% / +0.13% +0.38% +0.13%] index_add_ strided 3 : Elapsed 0.024 ms (2.396 ms / 100) 2.433 -> 2.436 ( +0.12%) [ +0.12% +0.00% +0.16% / +0.12% +0.33% +0.45%] index_copy_ strided 3 : Elapsed 0.024 ms (2.436 ms / 100) 2.392 -> 2.390 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.13% +0.08%] index_add_ strided 5 : Elapsed 0.024 ms (2.392 ms / 100) 2.432 -> 2.437 ( +0.21%) [ +0.08% +0.12% +0.00% / +0.21% +0.37% +0.33%] index_copy_ strided 5 : Elapsed 0.024 ms (2.434 ms / 100) 2.393 -> 2.392 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.25% +0.21%] index_add_ strided 7 : Elapsed 0.024 ms (2.393 ms / 100) 2.436 -> 2.435 ( -0.04%) [ +0.08% +0.00% +0.00% / -0.04% +0.53% +0.33%] index_copy_ strided 7 : Elapsed 0.024 ms (2.438 ms / 100) 2.351 -> 2.350 ( -0.04%) [ +0.17% +0.09% +0.00% / -0.04% +0.38% +0.26%] index_add_ perm : Elapsed 0.024 ms (2.355 ms / 100) 2.329 -> 2.329 ( +0.00%) [ +0.26% +0.09% +0.00% / +0.00% +0.43% +0.21%] index_copy_ perm : Elapsed 0.023 ms (2.335 ms / 100) 2.353 -> 2.354 ( +0.04%) [ +0.00% +0.21% +0.17% / +0.04% +0.21% +0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.353 ms / 100) 2.332 -> 2.334 ( +0.09%) [ +0.00% +0.04% +0.17% / +0.09% +0.51% +0.21%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.332 ms / 100) 4.985 -> 4.985 ( +0.00%) [ +0.00% +0.16% +0.10% / +0.00% +0.54% +0.62%] index_select const : Elapsed 0.050 ms (4.985 ms / 100) 4.988 -> 4.989 ( +0.02%) [ +0.16% +0.00% +0.04% / +0.02% +0.28% +0.34%] index_select wrap : Elapsed 0.050 ms (4.996 ms / 100) 4.977 -> 4.982 ( +0.10%) [ +0.18% +0.00% +0.06% / +0.10% +0.32% +0.24%] index_select linear : Elapsed 0.050 ms (4.986 ms / 100) 4.994 -> 4.998 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.44% +0.58%] index_select reverse : Elapsed 0.050 ms (4.994 ms / 100) 4.982 -> 4.990 ( +0.16%) [ +0.00% +0.06% +0.10% / +0.16% +0.36% +0.38%] index_select skip64 : Elapsed 0.050 ms (4.982 ms / 100) 4.975 -> 4.977 ( +0.04%) [ +0.00% +0.04% +0.10% / +0.04% +0.44% +0.44%] index_select skip256 : Elapsed 0.050 ms (4.975 ms / 100) 4.984 -> 4.991 ( +0.14%) [ +0.10% +0.14% +0.00% / +0.14% +0.46% +0.44%] index_select spread : Elapsed 0.050 ms (4.989 ms / 100) 4.993 -> 4.994 ( +0.02%) [ +0.12% +0.12% +0.00% / +0.02% +0.42% +0.42%] index_select strided 3 : Elapsed 0.050 ms (4.999 ms / 100) 4.980 -> 4.989 ( +0.18%) [ +0.00% +0.12% +0.06% / +0.24% +0.34% +0.18%] index_select random : Elapsed 0.050 ms (4.980 ms / 100) 4.997 -> 5.007 ( +0.20%) [ +0.12% +0.00% +0.10% / +0.20% +0.40% +0.46%] index_select random_sorted : Elapsed 0.050 ms (5.003 ms / 100) B = [5, 16, 40, 20] (stride (1, 5, 1600, 80)) A = [5, 4, 40, 20] (stride (1, 5, 20, 800)) dim = 1 2.469 -> 2.472 ( +0.12%) [ +0.04% +0.00% +0.00% / +0.12% +0.20% +0.12%] index_add_ linear : Elapsed 0.025 ms (2.470 ms / 100) 2.429 -> 2.433 ( +0.16%) [ +0.00% +0.00% +0.12% / +0.16% +0.45% +0.37%] index_copy_ linear : Elapsed 0.024 ms (2.429 ms / 100) 2.482 -> 2.484 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.36% +0.44%] index_add_ reverse : Elapsed 0.025 ms (2.482 ms / 100) 2.442 -> 2.445 ( +0.12%) [ +0.00% +0.12% +0.08% / +0.12% +0.45% +0.57%] index_copy_ reverse : Elapsed 0.024 ms (2.442 ms / 100) 2.487 -> 2.486 ( -0.04%) [ +0.04% +0.08% +0.00% / -0.04% +0.52% +0.56%] index_add_ spread : Elapsed 0.025 ms (2.488 ms / 100) 2.473 -> 2.483 ( +0.40%) [ +0.12% +0.04% +0.00% / +0.40% +0.65% +0.53%] index_copy_ spread : Elapsed 0.025 ms (2.476 ms / 100) 2.482 -> 2.480 ( -0.08%) [ +0.16% +0.28% +0.00% / -0.08% +0.48% +0.52%] index_add_ strided 3 : Elapsed 0.025 ms (2.486 ms / 100) 2.494 -> 2.490 ( -0.16%) [ +0.12% +0.00% +0.16% / -0.16% +0.72% +0.68%] index_copy_ strided 3 : Elapsed 0.025 ms (2.497 ms / 100) 2.456 -> 2.459 ( +0.12%) [ +0.04% +0.00% +0.12% / +0.12% +0.33% +0.37%] index_add_ strided 5 : Elapsed 0.025 ms (2.457 ms / 100) 2.430 -> 2.439 ( +0.37%) [ +0.00% +0.21% +0.25% / +0.45% +0.62% +0.37%] index_copy_ strided 5 : Elapsed 0.024 ms (2.430 ms / 100) 2.467 -> 2.471 ( +0.16%) [ +0.00% +0.08% +0.04% / +0.20% +0.16% +0.28%] index_add_ strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.458 -> 2.469 ( +0.45%) [ +0.00% +0.08% +0.33% / +0.65% +0.45% +0.53%] index_copy_ strided 7 : Elapsed 0.025 ms (2.458 ms / 100) 2.470 -> 2.474 ( +0.16%) [ +0.20% +0.08% +0.00% / +0.16% +0.28% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.475 ms / 100) 2.453 -> 2.454 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.24% +0.45%] index_copy_ perm : Elapsed 0.025 ms (2.454 ms / 100) 2.470 -> 2.473 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.36% +0.36%] index_add_ perm_sorted : Elapsed 0.025 ms (2.471 ms / 100) 2.455 -> 2.459 ( +0.16%) [ +0.00% +0.04% +0.04% / +0.16% +0.29% +0.29%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.455 ms / 100) 5.479 -> 5.491 ( +0.22%) [ +0.35% +0.07% +0.00% / +0.22% +0.75% +0.86%] index_select const : Elapsed 0.055 ms (5.498 ms / 100) 5.462 -> 5.459 ( -0.05%) [ +0.16% +0.00% +0.09% / -0.05% +0.35% +0.44%] index_select wrap : Elapsed 0.055 ms (5.471 ms / 100) 5.485 -> 5.490 ( +0.09%) [ +0.00% +0.07% +0.47% / +0.09% +0.84% +0.78%] index_select linear : Elapsed 0.055 ms (5.485 ms / 100) 5.495 -> 5.507 ( +0.22%) [ +0.00% +0.00% +0.42% / +0.22% +0.44% +0.76%] index_select reverse : Elapsed 0.055 ms (5.495 ms / 100) 5.490 -> 5.491 ( +0.02%) [ +0.05% +0.00% +0.20% / +0.18% +0.02% +0.42%] index_select skip64 : Elapsed 0.055 ms (5.493 ms / 100) 5.486 -> 5.499 ( +0.24%) [ +0.36% +0.69% +0.00% / +0.51% +0.24% +0.93%] index_select skip256 : Elapsed 0.055 ms (5.506 ms / 100) 5.469 -> 5.474 ( +0.09%) [ +0.04% +0.00% +0.16% / +0.09% +0.37% +0.27%] index_select spread : Elapsed 0.055 ms (5.471 ms / 100) 5.469 -> 5.477 ( +0.15%) [ +0.05% +0.00% +0.11% / +0.15% +0.35% +0.51%] index_select strided 3 : Elapsed 0.055 ms (5.472 ms / 100) 5.453 -> 5.469 ( +0.29%) [ +0.17% +0.00% +0.02% / +0.29% +0.57% +0.31%] index_select random : Elapsed 0.055 ms (5.462 ms / 100) 5.455 -> 5.447 ( -0.15%) [ +0.04% +0.13% +0.00% / -0.15% +0.31% +0.37%] index_select random_sorted : Elapsed 0.055 ms (5.457 ms / 100) B = [5, 16, 40, 20] (stride (640, 1, 16, 3200)) A = [5, 4, 40, 20] (stride (1, 100, 400, 5)) dim = 1 2.476 -> 2.479 ( +0.12%) [ +0.32% +0.00% +0.00% / +0.12% +0.36% +0.32%] index_add_ linear : Elapsed 0.025 ms (2.484 ms / 100) 2.455 -> 2.452 ( -0.12%) [ +0.08% +0.00% +0.12% / -0.12% +0.24% +0.29%] index_copy_ linear : Elapsed 0.025 ms (2.457 ms / 100) 2.475 -> 2.479 ( +0.16%) [ +0.04% +0.00% +0.00% / +0.44% +0.16% +0.61%] index_add_ reverse : Elapsed 0.025 ms (2.476 ms / 100) 2.456 -> 2.458 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.33% +0.41%] index_copy_ reverse : Elapsed 0.025 ms (2.456 ms / 100) 2.514 -> 2.512 ( -0.08%) [ +0.16% +0.00% +0.04% / -0.08% +0.08% -0.04%] index_add_ spread : Elapsed 0.025 ms (2.518 ms / 100) 2.554 -> 2.556 ( +0.08%) [ +0.20% +0.08% +0.00% / +0.08% +0.16% +0.23%] index_copy_ spread : Elapsed 0.026 ms (2.559 ms / 100) 2.508 -> 2.516 ( +0.32%) [ +0.60% +0.00% +0.28% / +0.32% +0.44% +0.44%] index_add_ strided 3 : Elapsed 0.025 ms (2.523 ms / 100) 2.549 -> 2.553 ( +0.16%) [ +0.08% +0.24% +0.00% / +0.16% +0.31% +0.20%] index_copy_ strided 3 : Elapsed 0.026 ms (2.551 ms / 100) 2.510 -> 2.517 ( +0.28%) [ +0.04% +0.00% +0.20% / +0.28% +0.56% +0.56%] index_add_ strided 5 : Elapsed 0.025 ms (2.511 ms / 100) 2.552 -> 2.554 ( +0.08%) [ +0.00% +0.24% +0.04% / +0.08% +0.43% +0.47%] index_copy_ strided 5 : Elapsed 0.026 ms (2.552 ms / 100) 2.507 -> 2.511 ( +0.16%) [ +0.00% +0.16% +0.32% / +0.16% +0.80% +0.64%] index_add_ strided 7 : Elapsed 0.025 ms (2.507 ms / 100) 2.549 -> 2.552 ( +0.12%) [ +0.24% +0.20% +0.00% / +0.12% +0.51% +0.63%] index_copy_ strided 7 : Elapsed 0.026 ms (2.555 ms / 100) 2.516 -> 2.526 ( +0.40%) [ +0.20% +0.00% +0.04% / +0.56% +0.44% +0.40%] index_add_ perm : Elapsed 0.025 ms (2.521 ms / 100) 2.554 -> 2.559 ( +0.20%) [ +0.16% +0.00% +0.08% / +0.20% +0.35% +0.23%] index_copy_ perm : Elapsed 0.026 ms (2.558 ms / 100) 2.512 -> 2.520 ( +0.32%) [ +0.04% +0.20% +0.00% / +0.40% +0.36% +0.32%] index_add_ perm_sorted : Elapsed 0.025 ms (2.513 ms / 100) 2.551 -> 2.557 ( +0.24%) [ +0.00% +0.08% +0.20% / +0.24% +0.51% +0.43%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.551 ms / 100) 5.439 -> 5.447 ( +0.15%) [ +0.22% +0.00% +0.17% / +0.15% +0.44% +0.35%] index_select const : Elapsed 0.055 ms (5.451 ms / 100) 5.437 -> 5.442 ( +0.09%) [ +0.18% +0.07% +0.00% / +0.09% +0.33% +0.35%] index_select wrap : Elapsed 0.054 ms (5.447 ms / 100) 5.433 -> 5.446 ( +0.24%) [ +0.17% +0.06% +0.00% / +0.24% +0.31% +0.42%] index_select linear : Elapsed 0.054 ms (5.442 ms / 100) 5.435 -> 5.439 ( +0.07%) [ +0.15% +0.00% +0.02% / +0.07% +0.44% +0.42%] index_select reverse : Elapsed 0.054 ms (5.443 ms / 100) 5.442 -> 5.452 ( +0.18%) [ +0.00% +0.04% +0.00% / +0.18% +0.26% +0.44%] index_select skip64 : Elapsed 0.054 ms (5.442 ms / 100) 5.444 -> 5.441 ( -0.06%) [ +0.02% +0.02% +0.00% / -0.06% +0.31% +0.20%] index_select skip256 : Elapsed 0.054 ms (5.445 ms / 100) 5.453 -> 5.461 ( +0.15%) [ +0.04% +0.04% +0.00% / +0.15% +0.50% +0.31%] index_select spread : Elapsed 0.055 ms (5.455 ms / 100) 5.446 -> 5.450 ( +0.07%) [ +0.20% +0.00% +0.13% / +0.07% +0.39% +0.35%] index_select strided 3 : Elapsed 0.055 ms (5.457 ms / 100) 5.448 -> 5.460 ( +0.22%) [ +0.09% +0.02% +0.00% / +0.26% +0.22% +0.35%] index_select random : Elapsed 0.055 ms (5.453 ms / 100) 5.459 -> 5.473 ( +0.26%) [ +0.22% +0.02% +0.00% / +0.26% +0.35% +0.59%] index_select random_sorted : Elapsed 0.055 ms (5.471 ms / 100) B = [5, 16, 40, 20] (stride (40, 200, 1, 3200)) A = [5, 4, 40, 20] (stride (1, 5, 400, 20)) dim = 1 2.424 -> 2.427 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.33% +0.25%] index_add_ linear : Elapsed 0.024 ms (2.424 ms / 100) 2.369 -> 2.376 ( +0.30%) [ +0.25% +0.34% +0.00% / +0.30% +0.51% +0.76%] index_copy_ linear : Elapsed 0.024 ms (2.375 ms / 100) 2.423 -> 2.421 ( -0.08%) [ +0.00% +0.04% +0.29% / -0.08% +0.45% +0.37%] index_add_ reverse : Elapsed 0.024 ms (2.423 ms / 100) 2.369 -> 2.373 ( +0.17%) [ +0.00% +0.00% +0.08% / +0.17% +0.38% +0.59%] index_copy_ reverse : Elapsed 0.024 ms (2.369 ms / 100) 2.424 -> 2.425 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.08% +0.17% +0.04%] index_add_ spread : Elapsed 0.024 ms (2.425 ms / 100) 2.367 -> 2.374 ( +0.30%) [ +0.21% +0.00% +0.42% / +0.30% +0.59% +0.55%] index_copy_ spread : Elapsed 0.024 ms (2.372 ms / 100) 2.439 -> 2.441 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.41% +0.41%] index_add_ strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.385 -> 2.384 ( -0.04%) [ +0.00% +0.25% +0.13% / -0.04% +0.08% +0.38%] index_copy_ strided 3 : Elapsed 0.024 ms (2.385 ms / 100) 2.449 -> 2.445 ( -0.16%) [ +0.00% +0.04% +0.00% / +0.04% -0.16% -0.12%] index_add_ strided 5 : Elapsed 0.024 ms (2.449 ms / 100) 2.388 -> 2.386 ( -0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.04% -0.08%] index_copy_ strided 5 : Elapsed 0.024 ms (2.389 ms / 100) 2.428 -> 2.428 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.04% +0.00% +0.00%] index_add_ strided 7 : Elapsed 0.024 ms (2.430 ms / 100) 2.374 -> 2.370 ( -0.17%) [ +0.00% +0.08% +0.00% / -0.17% +0.25% +0.17%] index_copy_ strided 7 : Elapsed 0.024 ms (2.374 ms / 100) 2.424 -> 2.421 ( -0.12%) [ +0.00% +0.00% +0.33% / -0.12% +0.08% +0.04%] index_add_ perm : Elapsed 0.024 ms (2.424 ms / 100) 2.369 -> 2.370 ( +0.04%) [ +0.00% +0.17% +1.35% / +0.04% +0.30% +0.34%] index_copy_ perm : Elapsed 0.024 ms (2.369 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.00% +0.00% +0.45% / +0.08% +0.25% +0.12%] index_add_ perm_sorted : Elapsed 0.024 ms (2.423 ms / 100) 2.371 -> 2.377 ( +0.25%) [ +0.08% +0.00% +0.25% / +0.34% +0.25% +0.38%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.373 ms / 100) 5.276 -> 5.279 ( +0.06%) [ +0.00% +0.15% +0.11% / +0.06% +0.44% +0.42%] index_select const : Elapsed 0.053 ms (5.276 ms / 100) 5.311 -> 5.315 ( +0.08%) [ +0.09% +0.00% +0.41% / +0.08% +0.30% +0.47%] index_select wrap : Elapsed 0.053 ms (5.316 ms / 100) 5.300 -> 5.300 ( +0.00%) [ +0.02% +0.00% +0.30% / +0.00% +0.25% +0.32%] index_select linear : Elapsed 0.053 ms (5.301 ms / 100) 5.360 -> 5.361 ( +0.02%) [ +0.00% +0.04% +0.24% / +0.02% +0.45% +0.30%] index_select reverse : Elapsed 0.054 ms (5.360 ms / 100) 5.350 -> 5.359 ( +0.17%) [ +0.00% +0.09% +2.06% / +0.17% +0.30% +0.50%] index_select skip64 : Elapsed 0.053 ms (5.350 ms / 100) 5.299 -> 5.299 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.30% +0.30%] index_select skip256 : Elapsed 0.053 ms (5.299 ms / 100) 5.320 -> 5.321 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.06% +0.02% +0.11%] index_select spread : Elapsed 0.053 ms (5.320 ms / 100) 5.302 -> 5.308 ( +0.11%) [ +0.00% +0.06% +0.02% / +0.11% +0.28% +0.32%] index_select strided 3 : Elapsed 0.053 ms (5.302 ms / 100) 5.355 -> 5.359 ( +0.07%) [ +0.00% +0.00% +0.06% / +0.07% +0.24% +0.28%] index_select random : Elapsed 0.054 ms (5.355 ms / 100) 5.279 -> 5.279 ( +0.00%) [ +0.11% +0.00% +0.02% / +0.00% +0.30% +0.32%] index_select random_sorted : Elapsed 0.053 ms (5.285 ms / 100) B = [5, 16, 40, 20] (stride (1, 5, 80, 3200)) A = [5, 4, 40, 20] (stride (1, 200, 5, 800)) dim = 1 2.567 -> 2.570 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.31% +0.43%] index_add_ linear : Elapsed 0.026 ms (2.568 ms / 100) 2.532 -> 2.531 ( -0.04%) [ +0.00% +0.00% +0.08% / -0.04% -0.04% +0.16%] index_copy_ linear : Elapsed 0.025 ms (2.532 ms / 100) 2.577 -> 2.575 ( -0.08%) [ +0.00% +0.04% +0.04% / -0.08% +0.04% +0.23%] index_add_ reverse : Elapsed 0.026 ms (2.577 ms / 100) 2.532 -> 2.536 ( +0.16%) [ +0.16% +0.00% +0.16% / +0.24% +0.16% +0.28%] index_copy_ reverse : Elapsed 0.025 ms (2.536 ms / 100) 2.573 -> 2.574 ( +0.04%) [ +0.00% +0.08% +0.19% / +0.04% +0.31% +0.31%] index_add_ spread : Elapsed 0.026 ms (2.573 ms / 100) 2.561 -> 2.563 ( +0.08%) [ +0.00% +0.00% +0.16% / +0.51% +0.20% +0.08%] index_copy_ spread : Elapsed 0.026 ms (2.561 ms / 100) 2.576 -> 2.573 ( -0.12%) [ +0.00% +0.04% +0.04% / +0.12% +0.04% -0.12%] index_add_ strided 3 : Elapsed 0.026 ms (2.576 ms / 100) 2.586 -> 2.585 ( -0.04%) [ +0.19% +0.00% +0.00% / -0.04% +0.15% +0.00%] index_copy_ strided 3 : Elapsed 0.026 ms (2.591 ms / 100) 2.539 -> 2.545 ( +0.24%) [ +0.12% +0.00% +0.28% / +0.32% +0.59% +0.24%] index_add_ strided 5 : Elapsed 0.025 ms (2.542 ms / 100) 2.524 -> 2.524 ( +0.00%) [ +0.08% +0.00% +0.24% / +0.24% +0.28% +0.00%] index_copy_ strided 5 : Elapsed 0.025 ms (2.526 ms / 100) 2.550 -> 2.554 ( +0.16%) [ +0.08% +0.12% +0.00% / +0.43% +0.20% +0.16%] index_add_ strided 7 : Elapsed 0.026 ms (2.552 ms / 100) 2.540 -> 2.545 ( +0.20%) [ +0.04% +0.00% +0.20% / +0.28% +0.20% +0.28%] index_copy_ strided 7 : Elapsed 0.025 ms (2.541 ms / 100) 2.555 -> 2.559 ( +0.16%) [ +0.00% +0.20% +0.04% / +0.27% +0.43% +0.16%] index_add_ perm : Elapsed 0.026 ms (2.555 ms / 100) 2.540 -> 2.548 ( +0.31%) [ +0.20% +0.31% +0.00% / +0.47% +0.31% +0.47%] index_copy_ perm : Elapsed 0.025 ms (2.545 ms / 100) 2.559 -> 2.566 ( +0.27%) [ +0.23% +0.04% +0.00% / +0.39% +0.27% +0.31%] index_add_ perm_sorted : Elapsed 0.026 ms (2.565 ms / 100) 2.548 -> 2.549 ( +0.04%) [ +0.16% +0.00% +0.08% / +0.04% +0.12% +0.31%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.552 ms / 100) 5.819 -> 5.828 ( +0.15%) [ +0.07% +0.09% +0.00% / +0.15% +0.52% +0.40%] index_select const : Elapsed 0.058 ms (5.823 ms / 100) 5.791 -> 5.795 ( +0.07%) [ +0.10% +0.07% +0.00% / +0.07% +0.28% +0.24%] index_select wrap : Elapsed 0.058 ms (5.797 ms / 100) 5.808 -> 5.820 ( +0.21%) [ +0.10% +0.09% +0.00% / +0.21% +0.45% +0.34%] index_select linear : Elapsed 0.058 ms (5.814 ms / 100) 5.812 -> 5.816 ( +0.07%) [ +0.00% +0.24% +0.00% / +0.07% +0.29% +0.24%] index_select reverse : Elapsed 0.058 ms (5.812 ms / 100) 5.817 -> 5.826 ( +0.15%) [ +0.07% +0.10% +0.00% / +0.15% +0.28% +0.24%] index_select skip64 : Elapsed 0.058 ms (5.821 ms / 100) 5.810 -> 5.806 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.50% +0.36%] index_select skip256 : Elapsed 0.058 ms (5.814 ms / 100) 5.796 -> 5.804 ( +0.14%) [ +0.05% +0.02% +0.00% / +0.14% +0.48% +0.33%] index_select spread : Elapsed 0.058 ms (5.799 ms / 100) 5.801 -> 5.805 ( +0.07%) [ +0.09% +0.10% +0.00% / +0.07% +0.33% +0.41%] index_select strided 3 : Elapsed 0.058 ms (5.806 ms / 100) 5.806 -> 5.815 ( +0.16%) [ +0.00% +0.19% +0.26% / +0.16% +0.43% +0.33%] index_select random : Elapsed 0.058 ms (5.806 ms / 100) 5.805 -> 5.810 ( +0.09%) [ +0.00% +0.21% +0.10% / +0.09% +0.38% +0.45%] index_select random_sorted : Elapsed 0.058 ms (5.805 ms / 100) out_shape = [5, 4, 16, 20] in_shape = [5, 4, 40, 20] idx_dim = 2 B = [5, 4, 16, 20] (stride (4, 1, 400, 20)) A = [5, 4, 40, 20] (stride (80, 1, 400, 4)) dim = 2 3.535 -> 3.539 ( +0.11%) [ +0.03% +0.00% +0.06% / +0.11% +0.62% +0.57%] index_select const : Elapsed 0.035 ms (3.536 ms / 100) 3.520 -> 3.522 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.45% +0.43%] index_select wrap : Elapsed 0.035 ms (3.520 ms / 100) 3.533 -> 3.534 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.48% +0.51%] index_select linear : Elapsed 0.035 ms (3.533 ms / 100) 3.515 -> 3.520 ( +0.14%) [ +0.00% +0.00% +0.06% / +0.14% +0.54% +0.54%] index_select reverse : Elapsed 0.035 ms (3.515 ms / 100) 3.542 -> 3.543 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.42% +0.48%] index_select skip64 : Elapsed 0.035 ms (3.542 ms / 100) 3.536 -> 3.536 ( +0.00%) [ +0.00% +0.03% +0.06% / +0.00% +0.68% +0.59%] index_select skip256 : Elapsed 0.035 ms (3.536 ms / 100) 3.533 -> 3.531 ( -0.06%) [ +0.17% +0.00% +0.08% / -0.06% +0.25% +0.28%] index_select spread : Elapsed 0.035 ms (3.539 ms / 100) 3.525 -> 3.531 ( +0.17%) [ +0.03% +0.09% +0.00% / +0.17% +0.26% +0.34%] index_select strided 3 : Elapsed 0.035 ms (3.526 ms / 100) 3.512 -> 3.513 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.54% +0.57%] index_select strided 5 : Elapsed 0.035 ms (3.513 ms / 100) 3.536 -> 3.537 ( +0.03%) [ +0.28% +0.00% +0.00% / +0.03% +0.65% +0.40%] index_select strided 7 : Elapsed 0.035 ms (3.546 ms / 100) 3.536 -> 3.540 ( +0.11%) [ +0.00% +0.14% +0.23% / +0.11% +0.42% +0.42%] index_select strided 8 : Elapsed 0.035 ms (3.536 ms / 100) 3.547 -> 3.548 ( +0.03%) [ +0.03% +0.00% +0.28% / +0.03% +0.45% +0.45%] index_select strided 16 : Elapsed 0.035 ms (3.548 ms / 100) 3.539 -> 3.545 ( +0.17%) [ +0.06% +0.11% +0.00% / +0.17% +0.45% +0.54%] index_select random : Elapsed 0.035 ms (3.541 ms / 100) 3.535 -> 3.536 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.28% +0.31%] index_select random_sorted : Elapsed 0.035 ms (3.535 ms / 100) 3.531 -> 3.535 ( +0.11%) [ +0.00% +0.03% +0.11% / +0.11% +0.48% +0.51%] index_select perm : Elapsed 0.035 ms (3.531 ms / 100) 3.517 -> 3.520 ( +0.09%) [ +0.00% +0.03% +0.09% / +0.09% +0.48% +0.45%] index_select perm_sorted : Elapsed 0.035 ms (3.517 ms / 100) B = [5, 4, 16, 20] (stride (64, 16, 1, 320)) A = [5, 4, 40, 20] (stride (3200, 40, 1, 160)) dim = 2 3.929 -> 3.932 ( +0.08%) [ +0.05% +0.00% +0.05% / +0.08% +0.48% +0.59%] index_select const : Elapsed 0.039 ms (3.931 ms / 100) 3.910 -> 3.938 ( +0.72%) [ +0.36% +0.23% +0.00% / +0.72% +0.90% +0.82%] index_select wrap : Elapsed 0.039 ms (3.924 ms / 100) 3.925 -> 3.926 ( +0.03%) [ +0.08% +0.00% +0.03% / +0.03% +0.38% +0.43%] index_select linear : Elapsed 0.039 ms (3.928 ms / 100) 3.919 -> 3.926 ( +0.18%) [ +0.08% +0.00% +0.28% / +0.18% +0.56% +0.54%] index_select reverse : Elapsed 0.039 ms (3.922 ms / 100) 3.919 -> 3.916 ( -0.08%) [ +0.10% +0.00% +0.08% / -0.08% +0.46% +0.48%] index_select skip64 : Elapsed 0.039 ms (3.923 ms / 100) 3.924 -> 3.925 ( +0.03%) [ +0.00% +0.10% +0.05% / +0.03% +0.69% +0.69%] index_select skip256 : Elapsed 0.039 ms (3.924 ms / 100) 3.922 -> 3.917 ( -0.13%) [ +0.08% +0.00% +0.05% / -0.13% +0.74% +0.74%] index_select spread : Elapsed 0.039 ms (3.925 ms / 100) 3.919 -> 3.916 ( -0.08%) [ +0.00% +0.03% +0.03% / -0.08% +0.54% +0.51%] index_select strided 3 : Elapsed 0.039 ms (3.919 ms / 100) 3.920 -> 3.929 ( +0.23%) [ +0.03% +0.00% +0.05% / +0.23% +0.46% +0.43%] index_select strided 5 : Elapsed 0.039 ms (3.921 ms / 100) 3.924 -> 3.934 ( +0.25%) [ +0.28% +0.20% +0.00% / +0.25% +0.89% +0.92%] index_select strided 7 : Elapsed 0.039 ms (3.935 ms / 100) 3.942 -> 3.945 ( +0.08%) [ +0.13% +0.18% +0.00% / +0.08% +0.68% +0.71%] index_select strided 8 : Elapsed 0.039 ms (3.947 ms / 100) 3.925 -> 3.926 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.66% +0.71%] index_select strided 16 : Elapsed 0.039 ms (3.925 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.69% +0.66%] index_select random : Elapsed 0.039 ms (3.932 ms / 100) 3.906 -> 3.920 ( +0.36%) [ +0.31% +0.00% +0.33% / +0.36% +0.95% +0.84%] index_select random_sorted : Elapsed 0.039 ms (3.918 ms / 100) 3.929 -> 3.929 ( +0.00%) [ +0.00% +0.05% +0.15% / +0.00% +0.81% +0.81%] index_select perm : Elapsed 0.039 ms (3.929 ms / 100) 3.922 -> 3.927 ( +0.13%) [ +0.10% +0.08% +0.00% / +0.13% +0.74% +0.89%] index_select perm_sorted : Elapsed 0.039 ms (3.926 ms / 100) B = [5, 4, 16, 20] (stride (16, 80, 1, 320)) A = [5, 4, 40, 20] (stride (160, 1, 4, 800)) dim = 2 4.190 -> 4.208 ( +0.43%) [ +0.10% +0.00% +0.19% / +0.43% +0.48% +0.74%] index_select const : Elapsed 0.042 ms (4.194 ms / 100) 4.199 -> 4.216 ( +0.40%) [ +0.19% +0.00% +0.21% / +0.40% +0.62% +0.62%] index_select wrap : Elapsed 0.042 ms (4.207 ms / 100) 4.196 -> 4.199 ( +0.07%) [ +0.02% +0.00% +0.10% / +0.07% +0.52% +0.50%] index_select linear : Elapsed 0.042 ms (4.197 ms / 100) 4.187 -> 4.200 ( +0.31%) [ +0.24% +0.21% +0.00% / +0.31% +0.67% +0.69%] index_select reverse : Elapsed 0.042 ms (4.197 ms / 100) 4.206 -> 4.215 ( +0.21%) [ +0.00% +0.29% +0.05% / +0.21% +0.76% +0.69%] index_select skip64 : Elapsed 0.042 ms (4.206 ms / 100) 4.193 -> 4.191 ( -0.05%) [ +0.00% +0.07% +0.10% / -0.05% +0.74% +0.41%] index_select skip256 : Elapsed 0.042 ms (4.193 ms / 100) 4.194 -> 4.204 ( +0.24%) [ +0.17% +0.00% +0.05% / +0.24% +0.50% +0.43%] index_select spread : Elapsed 0.042 ms (4.201 ms / 100) 4.230 -> 4.232 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.33% +0.45%] index_select strided 3 : Elapsed 0.042 ms (4.230 ms / 100) 4.209 -> 4.209 ( +0.00%) [ +0.00% +0.02% +0.05% / +0.00% +0.48% +0.40%] index_select strided 5 : Elapsed 0.042 ms (4.209 ms / 100) 4.223 -> 4.220 ( -0.07%) [ +0.07% +0.00% +0.09% / -0.07% +0.45% +0.38%] index_select strided 7 : Elapsed 0.042 ms (4.226 ms / 100) 4.222 -> 4.220 ( -0.05%) [ +0.02% +0.00% +0.07% / -0.05% +0.54% +0.36%] index_select strided 8 : Elapsed 0.042 ms (4.223 ms / 100) 4.197 -> 4.196 ( -0.02%) [ +0.00% +0.05% +0.05% / -0.02% +0.50% +0.21%] index_select strided 16 : Elapsed 0.042 ms (4.197 ms / 100) 4.211 -> 4.213 ( +0.05%) [ +0.02% +0.00% +0.05% / +0.05% +0.38% +0.31%] index_select random : Elapsed 0.042 ms (4.212 ms / 100) 4.212 -> 4.213 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.33% +0.40%] index_select random_sorted : Elapsed 0.042 ms (4.212 ms / 100) 4.203 -> 4.213 ( +0.24%) [ +0.24% +0.00% +0.26% / +0.24% +0.52% +0.48%] index_select perm : Elapsed 0.042 ms (4.213 ms / 100) 4.205 -> 4.208 ( +0.07%) [ +0.00% +0.00% +0.02% / +0.07% +0.36% +0.48%] index_select perm_sorted : Elapsed 0.042 ms (4.205 ms / 100) B = [5, 4, 16, 20] (stride (1, 80, 5, 320)) A = [5, 4, 40, 20] (stride (80, 1, 400, 4)) dim = 2 3.864 -> 3.881 ( +0.44%) [ +0.13% +0.00% +0.10% / +0.44% +0.52% +0.70%] index_select const : Elapsed 0.039 ms (3.869 ms / 100) 3.821 -> 3.827 ( +0.16%) [ +0.03% +0.00% +0.05% / +0.16% +0.71% +0.68%] index_select wrap : Elapsed 0.038 ms (3.822 ms / 100) 3.820 -> 3.822 ( +0.05%) [ +0.03% +0.00% +0.13% / +0.05% +0.68% +0.68%] index_select linear : Elapsed 0.038 ms (3.821 ms / 100) 3.816 -> 3.817 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.03% +0.79% +0.81%] index_select reverse : Elapsed 0.038 ms (3.817 ms / 100) 3.845 -> 3.845 ( +0.00%) [ +0.00% +0.05% +0.03% / +0.00% +0.60% +0.57%] index_select skip64 : Elapsed 0.038 ms (3.845 ms / 100) 3.860 -> 3.868 ( +0.21%) [ +0.03% +0.00% +0.13% / +0.21% +0.67% +0.67%] index_select skip256 : Elapsed 0.039 ms (3.861 ms / 100) 3.819 -> 3.834 ( +0.39%) [ +0.00% +0.00% +0.10% / +0.39% +0.76% +0.76%] index_select spread : Elapsed 0.038 ms (3.819 ms / 100) 3.817 -> 3.819 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.63% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.817 ms / 100) 3.827 -> 3.829 ( +0.05%) [ +0.03% +0.00% +0.05% / +0.05% +0.65% +0.65%] index_select strided 5 : Elapsed 0.038 ms (3.828 ms / 100) 3.810 -> 3.820 ( +0.26%) [ +0.00% +0.03% +0.03% / +0.26% +0.76% +0.79%] index_select strided 7 : Elapsed 0.038 ms (3.810 ms / 100) 3.833 -> 3.847 ( +0.37%) [ +0.05% +0.00% +0.42% / +0.37% +1.04% +0.99%] index_select strided 8 : Elapsed 0.038 ms (3.835 ms / 100) 3.830 -> 3.843 ( +0.34%) [ +0.23% +0.00% +0.23% / +0.34% +0.89% +0.91%] index_select strided 16 : Elapsed 0.038 ms (3.839 ms / 100) 3.823 -> 3.826 ( +0.08%) [ +0.10% +0.00% +0.00% / +0.08% +0.78% +0.86%] index_select random : Elapsed 0.038 ms (3.827 ms / 100) 3.818 -> 3.830 ( +0.31%) [ +0.08% +0.00% +0.00% / +0.31% +0.84% +0.86%] index_select random_sorted : Elapsed 0.038 ms (3.821 ms / 100) 3.827 -> 3.839 ( +0.31%) [ +0.03% +0.00% +0.34% / +0.31% +0.78% +0.78%] index_select perm : Elapsed 0.038 ms (3.828 ms / 100) 3.827 -> 3.830 ( +0.08%) [ +0.00% +0.00% +0.03% / +0.08% +0.76% +0.73%] index_select perm_sorted : Elapsed 0.038 ms (3.827 ms / 100) out_shape = [5, 4, 40, 16] in_shape = [5, 4, 40, 20] idx_dim = 3 B = [5, 4, 40, 16] (stride (2560, 16, 64, 1)) A = [5, 4, 40, 20] (stride (1, 200, 5, 800)) dim = 3 3.564 -> 3.568 ( +0.11%) [ +0.00% +0.17% +0.08% / +0.11% +0.65% +0.79%] index_select const : Elapsed 0.036 ms (3.564 ms / 100) 3.560 -> 3.562 ( +0.06%) [ +0.14% +0.08% +0.00% / +0.06% +0.81% +0.79%] index_select wrap : Elapsed 0.036 ms (3.565 ms / 100) 3.558 -> 3.561 ( +0.08%) [ +0.00% +0.03% +0.22% / +0.08% +0.93% +0.90%] index_select linear : Elapsed 0.036 ms (3.558 ms / 100) 3.564 -> 3.567 ( +0.08%) [ +0.06% +0.00% +0.11% / +0.08% +0.79% +0.81%] index_select reverse : Elapsed 0.036 ms (3.566 ms / 100) 3.553 -> 3.574 ( +0.59%) [ +0.20% +0.11% +0.00% / +0.59% +0.84% +0.79%] index_select skip64 : Elapsed 0.036 ms (3.560 ms / 100) 3.559 -> 3.575 ( +0.45%) [ +0.06% +0.00% +0.03% / +0.45% +0.70% +0.65%] index_select skip256 : Elapsed 0.036 ms (3.561 ms / 100) 3.563 -> 3.562 ( -0.03%) [ +0.08% +0.00% +0.14% / -0.03% +0.62% +0.62%] index_select spread : Elapsed 0.036 ms (3.566 ms / 100) 3.565 -> 3.565 ( +0.00%) [ +0.14% +0.00% +0.03% / +0.00% +0.76% +0.70%] index_select strided 3 : Elapsed 0.036 ms (3.570 ms / 100) 3.569 -> 3.573 ( +0.11%) [ +0.03% +0.00% +0.00% / +0.11% +0.64% +0.64%] index_select strided 5 : Elapsed 0.036 ms (3.570 ms / 100) 3.562 -> 3.571 ( +0.25%) [ +0.03% +0.00% +0.11% / +0.25% +0.65% +0.70%] index_select strided 7 : Elapsed 0.036 ms (3.563 ms / 100) 3.558 -> 3.564 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.76% +0.82%] index_select strided 8 : Elapsed 0.036 ms (3.564 ms / 100) 3.568 -> 3.576 ( +0.22%) [ +0.06% +0.03% +0.00% / +0.22% +0.67% +0.76%] index_select strided 16 : Elapsed 0.036 ms (3.570 ms / 100) 3.570 -> 3.572 ( +0.06%) [ +0.00% +0.08% +0.00% / +0.06% +0.48% +0.48%] index_select random : Elapsed 0.036 ms (3.570 ms / 100) 3.558 -> 3.568 ( +0.28%) [ +0.00% +0.14% +0.08% / +0.28% +0.62% +1.69%] index_select random_sorted : Elapsed 0.036 ms (3.558 ms / 100) 3.571 -> 3.572 ( +0.03%) [ +0.03% +0.00% +0.17% / +0.03% +0.53% +0.56%] index_select perm : Elapsed 0.036 ms (3.572 ms / 100) 3.575 -> 3.579 ( +0.11%) [ +0.03% +0.00% +0.28% / +0.11% +0.48% +0.45%] index_select perm_sorted : Elapsed 0.036 ms (3.576 ms / 100) B = [5, 4, 40, 16] (stride (2560, 40, 1, 160)) A = [5, 4, 40, 20] (stride (4, 1, 400, 20)) dim = 3 1.400 -> 1.398 ( -0.14%) [ +0.07% +0.07% +0.00% / +0.07% -0.14% -0.14%] index_select const : Elapsed 0.014 ms (1.401 ms / 100) 1.412 -> 1.414 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.50% +0.28%] index_select wrap : Elapsed 0.014 ms (1.412 ms / 100) 1.399 -> 1.401 ( +0.14%) [ +0.00% +0.14% +0.07% / +0.14% +0.43% +0.21%] index_select linear : Elapsed 0.014 ms (1.399 ms / 100) 1.409 -> 1.410 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.14% +0.07% +0.07%] index_select reverse : Elapsed 0.014 ms (1.410 ms / 100) 1.394 -> 1.395 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.36% +0.36%] index_select skip64 : Elapsed 0.014 ms (1.395 ms / 100) 1.397 -> 1.399 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.14% +0.14%] index_select skip256 : Elapsed 0.014 ms (1.398 ms / 100) 1.416 -> 1.418 ( +0.14%) [ +0.00% +0.07% +0.00% / +0.14% +0.49% +0.42%] index_select spread : Elapsed 0.014 ms (1.416 ms / 100) 1.406 -> 1.409 ( +0.21%) [ +0.00% +0.00% +0.00% / +0.21% +0.43% +0.64%] index_select strided 3 : Elapsed 0.014 ms (1.406 ms / 100) 1.405 -> 1.411 ( +0.43%) [ +0.64% +0.36% +0.00% / +0.43% +0.64% +0.64%] index_select strided 5 : Elapsed 0.014 ms (1.414 ms / 100) 1.404 -> 1.405 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +1.21% +1.07%] index_select strided 7 : Elapsed 0.014 ms (1.406 ms / 100) 1.414 -> 1.416 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.28% +0.71%] index_select strided 8 : Elapsed 0.014 ms (1.415 ms / 100) 1.404 -> 1.406 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.57% +0.57%] index_select strided 16 : Elapsed 0.014 ms (1.406 ms / 100) 1.406 -> 1.407 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.78% +0.71%] index_select random : Elapsed 0.014 ms (1.407 ms / 100) 1.404 -> 1.407 ( +0.21%) [ +0.14% +0.00% +0.00% / +0.21% +0.57% +0.57%] index_select random_sorted : Elapsed 0.014 ms (1.406 ms / 100) 1.422 -> 1.425 ( +0.21%) [ +0.21% +0.00% +0.14% / +0.21% +0.70% +0.70%] index_select perm : Elapsed 0.014 ms (1.425 ms / 100) 1.420 -> 1.423 ( +0.21%) [ +0.00% +0.21% +0.21% / +0.21% +0.92% +0.70%] index_select perm_sorted : Elapsed 0.014 ms (1.420 ms / 100) B = [5, 4, 40, 16] (stride (640, 3200, 1, 40)) A = [5, 4, 40, 20] (stride (3200, 20, 80, 1)) dim = 3 4.273 -> 4.278 ( +0.12%) [ +0.00% +0.12% +0.09% / +0.12% +0.84% +0.84%] index_select const : Elapsed 0.043 ms (4.273 ms / 100) 4.280 -> 4.282 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.77% +0.75%] index_select wrap : Elapsed 0.043 ms (4.282 ms / 100) 4.280 -> 4.278 ( -0.05%) [ +0.09% +0.00% +0.14% / -0.05% +0.72% +0.82%] index_select linear : Elapsed 0.043 ms (4.284 ms / 100) 4.283 -> 4.294 ( +0.26%) [ +0.00% +0.16% +0.12% / +0.26% +0.79% +0.82%] index_select reverse : Elapsed 0.043 ms (4.283 ms / 100) 4.291 -> 4.297 ( +0.14%) [ +0.02% +0.00% +0.00% / +0.14% +0.58% +0.58%] index_select skip64 : Elapsed 0.043 ms (4.292 ms / 100) 4.284 -> 4.291 ( +0.16%) [ +0.05% +0.00% +0.07% / +0.16% +0.70% +0.77%] index_select skip256 : Elapsed 0.043 ms (4.286 ms / 100) 4.276 -> 4.276 ( +0.00%) [ +0.00% +0.02% +0.26% / +0.00% +0.72% +0.72%] index_select spread : Elapsed 0.043 ms (4.276 ms / 100) 4.285 -> 4.292 ( +0.16%) [ +0.00% +0.12% +0.07% / +0.16% +0.61% +0.79%] index_select strided 3 : Elapsed 0.043 ms (4.285 ms / 100) 4.282 -> 4.285 ( +0.07%) [ +0.28% +0.07% +0.00% / +0.07% +0.86% +1.00%] index_select strided 5 : Elapsed 0.043 ms (4.294 ms / 100) 4.273 -> 4.275 ( +0.05%) [ +0.00% +0.05% +0.23% / +0.05% +0.84% +0.75%] index_select strided 7 : Elapsed 0.043 ms (4.273 ms / 100) 4.278 -> 4.281 ( +0.07%) [ +0.00% +0.05% +0.21% / +0.07% +0.63% +0.58%] index_select strided 8 : Elapsed 0.043 ms (4.278 ms / 100) 4.290 -> 4.296 ( +0.14%) [ +0.12% +0.00% +0.30% / +0.14% +0.89% +0.79%] index_select strided 16 : Elapsed 0.043 ms (4.295 ms / 100) 4.301 -> 4.312 ( +0.26%) [ +0.00% +0.28% +0.14% / +0.26% +0.44% +0.79%] index_select random : Elapsed 0.043 ms (4.301 ms / 100) 4.282 -> 4.287 ( +0.12%) [ +0.09% +0.00% +0.14% / +0.12% +0.75% +0.79%] index_select random_sorted : Elapsed 0.043 ms (4.286 ms / 100) 4.282 -> 4.287 ( +0.12%) [ +0.09% +0.00% +0.07% / +0.12% +0.63% +0.75%] index_select perm : Elapsed 0.043 ms (4.286 ms / 100) 4.281 -> 4.282 ( +0.02%) [ +0.00% +0.05% +0.14% / +0.02% +0.49% +0.75%] index_select perm_sorted : Elapsed 0.043 ms (4.281 ms / 100) B = [5, 4, 40, 16] (stride (16, 80, 320, 1)) A = [5, 4, 40, 20] (stride (20, 100, 400, 1)) dim = 3 4.293 -> 4.292 ( -0.02%) [ +0.12% +0.02% +0.00% / -0.02% +0.65% +0.54%] index_select const : Elapsed 0.043 ms (4.298 ms / 100) 4.297 -> 4.306 ( +0.21%) [ +0.05% +0.14% +0.00% / +0.21% +0.56% +0.51%] index_select wrap : Elapsed 0.043 ms (4.299 ms / 100) 4.306 -> 4.308 ( +0.05%) [ +0.21% +0.00% +0.19% / +0.05% +0.74% +0.74%] index_select linear : Elapsed 0.043 ms (4.315 ms / 100) 4.294 -> 4.298 ( +0.09%) [ +0.05% +0.09% +0.00% / +0.09% +0.58% +0.65%] index_select reverse : Elapsed 0.043 ms (4.296 ms / 100) 4.278 -> 4.281 ( +0.07%) [ +0.00% +0.07% +0.28% / +0.07% +0.86% +0.58%] index_select skip64 : Elapsed 0.043 ms (4.278 ms / 100) 4.290 -> 4.291 ( +0.02%) [ +0.00% +0.19% +0.14% / +0.02% +0.58% +0.65%] index_select skip256 : Elapsed 0.043 ms (4.290 ms / 100) 4.298 -> 4.301 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.49% +0.65%] index_select spread : Elapsed 0.043 ms (4.301 ms / 100) 4.296 -> 4.299 ( +0.07%) [ +0.09% +0.00% +0.02% / +0.07% +0.51% +0.58%] index_select strided 3 : Elapsed 0.043 ms (4.300 ms / 100) 4.303 -> 4.300 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.37% +0.49%] index_select strided 5 : Elapsed 0.043 ms (4.303 ms / 100) 4.287 -> 4.290 ( +0.07%) [ +0.05% +0.00% +0.02% / +0.07% +0.68% +0.75%] index_select strided 7 : Elapsed 0.043 ms (4.289 ms / 100) 4.293 -> 4.296 ( +0.07%) [ +0.05% +0.00% +0.05% / +0.07% +0.51% +0.58%] index_select strided 8 : Elapsed 0.043 ms (4.295 ms / 100) 4.294 -> 4.297 ( +0.07%) [ +0.21% +0.19% +0.00% / +0.07% +0.72% +0.70%] index_select strided 16 : Elapsed 0.043 ms (4.303 ms / 100) 4.294 -> 4.295 ( +0.02%) [ +0.00% +0.07% +0.02% / +0.02% +0.68% +0.58%] index_select random : Elapsed 0.043 ms (4.294 ms / 100) 4.303 -> 4.312 ( +0.21%) [ +0.23% +0.00% +0.21% / +0.21% +0.67% +0.63%] index_select random_sorted : Elapsed 0.043 ms (4.313 ms / 100) 4.299 -> 4.295 ( -0.09%) [ +0.07% +0.00% +0.07% / -0.09% +0.60% +0.70%] index_select perm : Elapsed 0.043 ms (4.302 ms / 100) 4.293 -> 4.298 ( +0.12%) [ +0.00% +0.14% +0.05% / +0.12% +0.56% +0.70%] index_select perm_sorted : Elapsed 0.043 ms (4.293 ms / 100) out_shape = [16, 20, 4, 40] in_shape = [5, 20, 4, 40] idx_dim = 0 B = [16, 20, 4, 40] (stride (3200, 160, 1, 4)) A = [5, 20, 4, 40] (stride (160, 800, 1, 4)) dim = 0 2.302 -> 2.303 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +1.13% +1.13%] index_add_ linear : Elapsed 0.023 ms (2.302 ms / 100) 2.243 -> 2.244 ( +0.04%) [ +0.09% +0.13% +0.00% / +0.04% +1.20% +1.47%] index_copy_ linear : Elapsed 0.022 ms (2.245 ms / 100) 2.301 -> 2.306 ( +0.22%) [ +0.09% +0.04% +0.00% / +0.22% +1.35% +1.48%] index_add_ reverse : Elapsed 0.023 ms (2.303 ms / 100) 2.239 -> 2.245 ( +0.27%) [ +0.22% +0.27% +0.00% / +0.27% +1.70% +1.61%] index_copy_ reverse : Elapsed 0.022 ms (2.244 ms / 100) 2.302 -> 2.309 ( +0.30%) [ +0.00% +0.04% +0.17% / +0.30% +1.43% +1.43%] index_add_ spread : Elapsed 0.023 ms (2.302 ms / 100) 2.241 -> 2.249 ( +0.36%) [ +0.04% +0.00% +0.22% / +0.36% +1.65% +1.56%] index_copy_ spread : Elapsed 0.022 ms (2.242 ms / 100) 2.298 -> 2.298 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +1.22% +1.31%] index_add_ strided 3 : Elapsed 0.023 ms (2.301 ms / 100) 2.243 -> 2.246 ( +0.13%) [ +0.09% +0.00% +0.04% / +0.13% +1.52% +1.47%] index_copy_ strided 3 : Elapsed 0.022 ms (2.245 ms / 100) 2.301 -> 2.301 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +1.39% +1.52%] index_add_ strided 5 : Elapsed 0.023 ms (2.301 ms / 100) 2.244 -> 2.244 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +1.43% +1.65%] index_copy_ strided 5 : Elapsed 0.022 ms (2.247 ms / 100) 2.295 -> 2.298 ( +0.13%) [ +0.04% +0.00% +0.13% / +0.13% +1.61% +1.87%] index_add_ strided 7 : Elapsed 0.023 ms (2.296 ms / 100) 2.237 -> 2.246 ( +0.40%) [ +0.22% +0.00% +0.18% / +0.40% +1.70% +1.70%] index_copy_ strided 7 : Elapsed 0.022 ms (2.242 ms / 100) 2.294 -> 2.298 ( +0.17%) [ +0.00% +0.13% +0.04% / +0.17% +1.48% +1.18%] index_add_ perm : Elapsed 0.023 ms (2.294 ms / 100) 2.239 -> 2.241 ( +0.09%) [ +0.00% +0.13% +0.04% / +0.09% +1.38% +1.47%] index_copy_ perm : Elapsed 0.022 ms (2.239 ms / 100) 2.298 -> 2.301 ( +0.13%) [ +0.17% +0.00% +0.04% / +0.13% +1.31% +1.31%] index_add_ perm_sorted : Elapsed 0.023 ms (2.302 ms / 100) 2.239 -> 2.246 ( +0.31%) [ +0.04% +0.00% +0.22% / +0.31% +1.47% +1.65%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.240 ms / 100) 4.864 -> 4.865 ( +0.02%) [ +0.00% +0.21% +0.10% / +0.02% +0.70% +0.86%] index_select const : Elapsed 0.049 ms (4.864 ms / 100) 4.879 -> 4.882 ( +0.06%) [ +0.00% +0.10% +0.04% / +0.06% +1.29% +1.21%] index_select wrap : Elapsed 0.049 ms (4.879 ms / 100) 4.893 -> 4.893 ( +0.00%) [ +0.08% +0.00% +0.02% / +0.00% +1.12% +1.02%] index_select linear : Elapsed 0.049 ms (4.897 ms / 100) 4.882 -> 4.891 ( +0.18%) [ +0.00% +0.08% +0.20% / +0.18% +0.80% +0.78%] index_select reverse : Elapsed 0.049 ms (4.882 ms / 100) 4.865 -> 4.862 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.64% +0.68%] index_select skip64 : Elapsed 0.049 ms (4.865 ms / 100) 4.862 -> 4.873 ( +0.23%) [ +0.08% +0.00% +0.25% / +0.23% +0.76% +0.74%] index_select skip256 : Elapsed 0.049 ms (4.866 ms / 100) 4.882 -> 4.884 ( +0.04%) [ +0.12% +0.00% +0.06% / +0.04% +0.90% +0.96%] index_select spread : Elapsed 0.049 ms (4.888 ms / 100) 4.880 -> 4.884 ( +0.08%) [ +0.18% +0.00% +0.16% / +0.08% +1.15% +1.35%] index_select strided 3 : Elapsed 0.049 ms (4.889 ms / 100) 4.880 -> 4.888 ( +0.16%) [ +0.14% +0.00% +0.16% / +0.16% +0.92% +0.94%] index_select random : Elapsed 0.049 ms (4.887 ms / 100) 4.865 -> 4.873 ( +0.16%) [ +0.18% +0.08% +0.00% / +0.16% +1.25% +1.21%] index_select random_sorted : Elapsed 0.049 ms (4.874 ms / 100) B = [16, 20, 4, 40] (stride (3200, 4, 1, 80)) A = [5, 20, 4, 40] (stride (1, 800, 200, 5)) dim = 0 1.970 -> 1.971 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.25% +0.41%] index_add_ linear : Elapsed 0.020 ms (1.972 ms / 100) 1.934 -> 1.935 ( +0.05%) [ +0.00% +0.00% +0.10% / +0.05% +0.21% +0.10%] index_copy_ linear : Elapsed 0.019 ms (1.934 ms / 100) 1.968 -> 1.969 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.41% +0.56%] index_add_ reverse : Elapsed 0.020 ms (1.969 ms / 100) 1.930 -> 1.932 ( +0.10%) [ +0.00% +0.05% +0.16% / +0.26% +0.10% +0.31%] index_copy_ reverse : Elapsed 0.019 ms (1.930 ms / 100) 1.968 -> 1.975 ( +0.36%) [ +0.10% +0.05% +0.00% / +0.36% +0.61% +0.56%] index_add_ spread : Elapsed 0.020 ms (1.970 ms / 100) 1.933 -> 1.936 ( +0.16%) [ +0.26% +0.00% +0.31% / +0.52% +0.21% +0.16%] index_copy_ spread : Elapsed 0.019 ms (1.938 ms / 100) 1.966 -> 1.969 ( +0.15%) [ +0.31% +0.41% +0.00% / +0.15% +0.71% +0.46%] index_add_ strided 3 : Elapsed 0.020 ms (1.972 ms / 100) 1.931 -> 1.934 ( +0.16%) [ +0.10% +0.26% +0.00% / +0.16% +0.36% +0.26%] index_copy_ strided 3 : Elapsed 0.019 ms (1.933 ms / 100) 1.977 -> 1.976 ( -0.05%) [ +0.00% +0.10% +0.15% / -0.05% +0.20% -0.05%] index_add_ strided 5 : Elapsed 0.020 ms (1.977 ms / 100) 1.935 -> 1.931 ( -0.21%) [ +0.10% +0.00% +0.00% / +0.16% +0.16% -0.21%] index_copy_ strided 5 : Elapsed 0.019 ms (1.937 ms / 100) 1.971 -> 1.971 ( +0.00%) [ +0.00% +0.05% +0.10% / +0.00% +0.10% +0.25%] index_add_ strided 7 : Elapsed 0.020 ms (1.971 ms / 100) 1.931 -> 1.934 ( +0.16%) [ +0.00% +0.16% +0.10% / +0.21% +0.21% +0.16%] index_copy_ strided 7 : Elapsed 0.019 ms (1.931 ms / 100) 1.968 -> 1.973 ( +0.25%) [ +0.00% +0.25% +0.15% / +0.25% +0.41% +0.51%] index_add_ perm : Elapsed 0.020 ms (1.968 ms / 100) 1.930 -> 1.930 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.10% +0.21% +0.00%] index_copy_ perm : Elapsed 0.019 ms (1.932 ms / 100) 1.965 -> 1.965 ( +0.00%) [ +0.25% +0.15% +0.00% / +0.00% +0.66% +0.71%] index_add_ perm_sorted : Elapsed 0.020 ms (1.970 ms / 100) 1.930 -> 1.929 ( -0.05%) [ +0.00% +0.16% +0.00% / -0.05% +0.47% +0.16%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.930 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.05% +0.00% +0.08% / +0.05% +0.69% +0.56%] index_select const : Elapsed 0.039 ms (3.925 ms / 100) 3.912 -> 3.920 ( +0.20%) [ +0.00% +0.15% +0.20% / +0.20% +0.51% +0.64%] index_select wrap : Elapsed 0.039 ms (3.912 ms / 100) 3.922 -> 3.938 ( +0.41%) [ +0.28% +0.36% +0.00% / +0.41% +0.79% +0.51%] index_select linear : Elapsed 0.039 ms (3.933 ms / 100) 3.919 -> 3.927 ( +0.20%) [ +0.10% +0.10% +0.00% / +0.20% +0.66% +0.71%] index_select reverse : Elapsed 0.039 ms (3.923 ms / 100) 3.923 -> 3.937 ( +0.36%) [ +0.00% +0.10% +0.13% / +0.36% +0.48% +0.59%] index_select skip64 : Elapsed 0.039 ms (3.923 ms / 100) 3.921 -> 3.937 ( +0.41%) [ +0.31% +0.23% +0.00% / +0.48% +0.41% +0.61%] index_select skip256 : Elapsed 0.039 ms (3.933 ms / 100) 3.922 -> 3.937 ( +0.38%) [ +0.05% +0.00% +0.05% / +0.38% +0.46% +0.56%] index_select spread : Elapsed 0.039 ms (3.924 ms / 100) 3.918 -> 3.927 ( +0.23%) [ +0.03% +0.10% +0.00% / +0.23% +0.64% +0.64%] index_select strided 3 : Elapsed 0.039 ms (3.919 ms / 100) 3.917 -> 3.920 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.92% +0.97%] index_select random : Elapsed 0.039 ms (3.920 ms / 100) 3.920 -> 3.931 ( +0.28%) [ +0.05% +0.00% +0.20% / +0.28% +0.74% +0.77%] index_select random_sorted : Elapsed 0.039 ms (3.922 ms / 100) B = [16, 20, 4, 40] (stride (3200, 1, 20, 80)) A = [5, 20, 4, 40] (stride (800, 40, 4000, 1)) dim = 0 2.352 -> 2.358 ( +0.26%) [ +0.55% +0.09% +0.00% / +0.26% +1.36% +1.06%] index_add_ linear : Elapsed 0.024 ms (2.365 ms / 100) 2.310 -> 2.313 ( +0.13%) [ +0.26% +0.00% +0.04% / +0.13% +1.30% +0.69%] index_copy_ linear : Elapsed 0.023 ms (2.316 ms / 100) 2.330 -> 2.345 ( +0.64%) [ +0.56% +0.64% +0.00% / +0.64% +1.42% +1.55%] index_add_ reverse : Elapsed 0.023 ms (2.343 ms / 100) 2.297 -> 2.303 ( +0.26%) [ +0.17% +0.22% +0.00% / +0.26% +0.65% +0.87%] index_copy_ reverse : Elapsed 0.023 ms (2.301 ms / 100) 2.358 -> 2.360 ( +0.08%) [ +0.00% +0.04% +0.13% / +0.08% +1.02% +1.06%] index_add_ spread : Elapsed 0.024 ms (2.358 ms / 100) 2.314 -> 2.323 ( +0.39%) [ +0.09% +0.00% +0.22% / +0.39% +1.04% +1.94%] index_copy_ spread : Elapsed 0.023 ms (2.316 ms / 100) 2.348 -> 2.360 ( +0.51%) [ +0.47% +0.09% +0.00% / +0.51% +1.15% +1.19%] index_add_ strided 3 : Elapsed 0.024 ms (2.359 ms / 100) 2.305 -> 2.308 ( +0.13%) [ +0.13% +0.17% +0.00% / +0.13% +1.26% +0.91%] index_copy_ strided 3 : Elapsed 0.023 ms (2.308 ms / 100) 2.344 -> 2.338 ( -0.26%) [ +0.00% +0.17% +0.13% / -0.26% +1.02% +1.15%] index_add_ strided 5 : Elapsed 0.023 ms (2.344 ms / 100) 2.300 -> 2.299 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +1.09% +0.91%] index_copy_ strided 5 : Elapsed 0.023 ms (2.300 ms / 100) 2.348 -> 2.355 ( +0.30%) [ +0.34% +0.00% +0.21% / +0.30% +1.02% +1.28%] index_add_ strided 7 : Elapsed 0.024 ms (2.356 ms / 100) 2.308 -> 2.316 ( +0.35%) [ +0.09% +0.00% +0.17% / +0.35% +0.78% +1.60%] index_copy_ strided 7 : Elapsed 0.023 ms (2.310 ms / 100) 2.346 -> 2.351 ( +0.21%) [ +0.34% +0.00% +0.30% / +0.21% +1.19% +1.58%] index_add_ perm : Elapsed 0.024 ms (2.354 ms / 100) 2.304 -> 2.308 ( +0.17%) [ +0.09% +0.09% +0.00% / +0.17% +1.09% +0.95%] index_copy_ perm : Elapsed 0.023 ms (2.306 ms / 100) 2.365 -> 2.367 ( +0.08%) [ +0.38% +0.08% +0.00% / +0.08% +0.55% +0.85%] index_add_ perm_sorted : Elapsed 0.024 ms (2.374 ms / 100) 2.320 -> 2.322 ( +0.09%) [ +0.13% +0.17% +0.00% / +0.09% +0.60% +0.86%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.323 ms / 100) 4.831 -> 4.837 ( +0.12%) [ +0.23% +0.00% +0.08% / +0.12% +0.79% +0.77%] index_select const : Elapsed 0.048 ms (4.842 ms / 100) 4.875 -> 4.882 ( +0.14%) [ +0.00% +0.10% +0.12% / +0.14% +0.80% +0.78%] index_select wrap : Elapsed 0.049 ms (4.875 ms / 100) 4.891 -> 4.915 ( +0.49%) [ +0.10% +0.12% +0.00% / +0.49% +0.92% +0.86%] index_select linear : Elapsed 0.049 ms (4.896 ms / 100) 4.893 -> 4.905 ( +0.25%) [ +0.29% +0.00% +0.04% / +0.25% +0.76% +0.78%] index_select reverse : Elapsed 0.049 ms (4.907 ms / 100) 4.822 -> 4.829 ( +0.15%) [ +0.10% +0.06% +0.00% / +0.15% +0.81% +0.81%] index_select skip64 : Elapsed 0.048 ms (4.827 ms / 100) 4.833 -> 4.844 ( +0.23%) [ +0.10% +0.14% +0.00% / +0.23% +0.56% +0.70%] index_select skip256 : Elapsed 0.048 ms (4.838 ms / 100) 4.871 -> 4.887 ( +0.33%) [ +0.25% +0.00% +0.25% / +0.33% +1.25% +1.07%] index_select spread : Elapsed 0.049 ms (4.883 ms / 100) 4.878 -> 4.880 ( +0.04%) [ +0.27% +0.00% +0.18% / +0.04% +0.92% +1.03%] index_select strided 3 : Elapsed 0.049 ms (4.891 ms / 100) 4.879 -> 4.889 ( +0.20%) [ +0.04% +0.02% +0.00% / +0.20% +1.02% +1.11%] index_select random : Elapsed 0.049 ms (4.881 ms / 100) 4.887 -> 4.889 ( +0.04%) [ +0.02% +0.12% +0.00% / +0.04% +0.72% +0.80%] index_select random_sorted : Elapsed 0.049 ms (4.888 ms / 100) B = [16, 20, 4, 40] (stride (40, 2560, 640, 1)) A = [5, 20, 4, 40] (stride (3200, 1, 800, 20)) dim = 0 0.850 -> 0.851 ( +0.12%) [ +0.24% +0.12% +0.00% / +0.12% +0.35% +0.47%] index_add_ linear : Elapsed 0.009 ms (0.852 ms / 100) 0.828 -> 0.829 ( +0.12%) [ +0.12% +0.00% +0.36% / +0.12% +0.48% +0.60%] index_copy_ linear : Elapsed 0.008 ms (0.829 ms / 100) 0.851 -> 0.850 ( -0.12%) [ +0.00% +0.35% +0.24% / -0.12% +0.24% +0.35%] index_add_ reverse : Elapsed 0.009 ms (0.851 ms / 100) 0.828 -> 0.830 ( +0.24%) [ +0.24% +0.00% +0.00% / +0.24% +0.60% +0.36%] index_copy_ reverse : Elapsed 0.008 ms (0.830 ms / 100) 0.858 -> 0.861 ( +0.35%) [ +0.00% +0.47% +0.23% / +0.35% +0.82% +0.58%] index_add_ spread : Elapsed 0.009 ms (0.858 ms / 100) 0.837 -> 0.840 ( +0.36%) [ +0.00% +0.00% +0.00% / +0.36% +0.48% +0.36%] index_copy_ spread : Elapsed 0.008 ms (0.837 ms / 100) 0.857 -> 0.860 ( +0.35%) [ +0.00% +0.12% +0.00% / +0.35% +0.70% +0.58%] index_add_ strided 3 : Elapsed 0.009 ms (0.857 ms / 100) 0.836 -> 0.840 ( +0.48%) [ +0.48% +0.12% +0.00% / +0.48% +0.72% +0.72%] index_copy_ strided 3 : Elapsed 0.008 ms (0.840 ms / 100) 0.853 -> 0.855 ( +0.23%) [ +0.00% +0.12% +0.12% / +0.47% +0.23% +0.23%] index_add_ strided 5 : Elapsed 0.009 ms (0.853 ms / 100) 0.833 -> 0.834 ( +0.12%) [ +0.12% +0.00% +0.96% / +0.48% +0.12% +0.36%] index_copy_ strided 5 : Elapsed 0.008 ms (0.834 ms / 100) 0.855 -> 0.853 ( -0.23%) [ +0.23% +0.00% +0.23% / -0.23% +1.17% +0.94%] index_add_ strided 7 : Elapsed 0.009 ms (0.857 ms / 100) 0.834 -> 0.835 ( +0.12%) [ +0.00% +0.00% +0.36% / +0.12% +1.08% +0.60%] index_copy_ strided 7 : Elapsed 0.008 ms (0.834 ms / 100) 0.853 -> 0.855 ( +0.23%) [ +0.00% +0.12% +0.59% / +0.23% +1.17% +1.17%] index_add_ perm : Elapsed 0.009 ms (0.853 ms / 100) 0.832 -> 0.834 ( +0.24%) [ +0.00% +0.36% +0.48% / +0.24% +0.84% +0.72%] index_copy_ perm : Elapsed 0.008 ms (0.832 ms / 100) 0.852 -> 0.855 ( +0.35%) [ +0.35% +0.00% +0.82% / +0.35% +1.64% +1.53%] index_add_ perm_sorted : Elapsed 0.009 ms (0.855 ms / 100) 0.833 -> 0.832 ( -0.12%) [ +0.00% +0.24% +0.48% / -0.12% +1.68% +1.20%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.833 ms / 100) 1.650 -> 1.655 ( +0.30%) [ +0.00% +0.18% +0.12% / +0.55% +0.30% +0.30%] index_select const : Elapsed 0.017 ms (1.650 ms / 100) 1.644 -> 1.648 ( +0.24%) [ +0.00% +0.24% +0.18% / +0.36% +0.24% +0.36%] index_select wrap : Elapsed 0.016 ms (1.644 ms / 100) 1.651 -> 1.648 ( -0.18%) [ +0.00% +0.30% +0.06% / +0.24% +0.12% -0.18%] index_select linear : Elapsed 0.017 ms (1.651 ms / 100) 1.651 -> 1.654 ( +0.18%) [ +0.12% +0.00% +0.00% / +0.18% +0.55% +0.79%] index_select reverse : Elapsed 0.017 ms (1.653 ms / 100) 1.647 -> 1.650 ( +0.18%) [ +0.55% +0.12% +0.00% / +0.36% +0.18% +0.55%] index_select skip64 : Elapsed 0.017 ms (1.656 ms / 100) 1.649 -> 1.650 ( +0.06%) [ +0.30% +0.18% +0.00% / +0.06% +0.61% +0.73%] index_select skip256 : Elapsed 0.017 ms (1.654 ms / 100) 1.647 -> 1.650 ( +0.18%) [ +0.12% +0.00% +0.12% / +0.18% +0.61% +0.73%] index_select spread : Elapsed 0.016 ms (1.649 ms / 100) 1.644 -> 1.642 ( -0.12%) [ +0.43% +0.00% +0.00% / -0.12% +1.03% +1.16%] index_select strided 3 : Elapsed 0.017 ms (1.651 ms / 100) 1.647 -> 1.653 ( +0.36%) [ +0.36% +0.36% +0.00% / +0.36% +1.03% +0.85%] index_select random : Elapsed 0.017 ms (1.653 ms / 100) 1.654 -> 1.657 ( +0.18%) [ +0.12% +0.00% +0.12% / +0.30% +0.18% +0.24%] index_select random_sorted : Elapsed 0.017 ms (1.656 ms / 100) B = [16, 20, 4, 40] (stride (1, 2560, 16, 64)) A = [5, 20, 4, 40] (stride (1, 800, 200, 5)) dim = 0 2.107 -> 2.107 ( +0.00%) [ +0.05% +0.24% +0.00% / +0.00% +1.38% +1.23%] index_add_ linear : Elapsed 0.021 ms (2.108 ms / 100) 2.089 -> 2.093 ( +0.19%) [ +0.14% +0.00% +0.05% / +0.19% +1.05% +0.86%] index_copy_ linear : Elapsed 0.021 ms (2.092 ms / 100) 2.106 -> 2.101 ( -0.24%) [ +0.24% +0.33% +0.00% / -0.24% +1.19% +1.33%] index_add_ reverse : Elapsed 0.021 ms (2.111 ms / 100) 2.082 -> 2.086 ( +0.19%) [ +0.58% +0.34% +0.00% / +0.19% +1.54% +1.30%] index_copy_ reverse : Elapsed 0.021 ms (2.094 ms / 100) 2.135 -> 2.136 ( +0.05%) [ +0.00% +0.52% +0.37% / +0.05% +1.55% +1.64%] index_add_ spread : Elapsed 0.021 ms (2.135 ms / 100) 2.148 -> 2.156 ( +0.37%) [ +0.00% +0.56% +0.42% / +0.37% +1.63% +1.63%] index_copy_ spread : Elapsed 0.021 ms (2.148 ms / 100) 2.134 -> 2.138 ( +0.19%) [ +0.05% +0.19% +0.00% / +0.19% +1.45% +1.22%] index_add_ strided 3 : Elapsed 0.021 ms (2.135 ms / 100) 2.151 -> 2.151 ( +0.00%) [ +0.00% +0.37% +0.09% / +0.00% +1.16% +1.16%] index_copy_ strided 3 : Elapsed 0.022 ms (2.151 ms / 100) 2.135 -> 2.133 ( -0.09%) [ +0.05% +0.14% +0.00% / -0.09% +1.17% +1.50%] index_add_ strided 5 : Elapsed 0.021 ms (2.136 ms / 100) 2.151 -> 2.153 ( +0.09%) [ +0.00% +0.09% +0.14% / +0.09% +0.93% +1.72%] index_copy_ strided 5 : Elapsed 0.022 ms (2.151 ms / 100) 2.133 -> 2.137 ( +0.19%) [ +0.00% +0.09% +0.23% / +0.19% +1.31% +1.45%] index_add_ strided 7 : Elapsed 0.021 ms (2.133 ms / 100) 2.149 -> 2.151 ( +0.09%) [ +0.19% +0.14% +0.00% / +0.09% +1.30% +1.40%] index_copy_ strided 7 : Elapsed 0.022 ms (2.153 ms / 100) 2.136 -> 2.144 ( +0.37%) [ +0.28% +0.00% +0.00% / +0.37% +1.22% +1.12%] index_add_ perm : Elapsed 0.021 ms (2.142 ms / 100) 2.152 -> 2.157 ( +0.23%) [ +0.00% +0.00% +0.00% / +0.23% +0.98% +1.35%] index_copy_ perm : Elapsed 0.022 ms (2.152 ms / 100) 2.133 -> 2.133 ( +0.00%) [ +0.09% +0.23% +0.00% / +0.00% +1.41% +1.13%] index_add_ perm_sorted : Elapsed 0.021 ms (2.135 ms / 100) 2.148 -> 2.151 ( +0.14%) [ +0.00% +0.19% +0.09% / +0.14% +1.35% +1.21%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.148 ms / 100) 4.308 -> 4.309 ( +0.02%) [ +0.16% +0.00% +0.09% / +0.02% +0.95% +1.00%] index_select const : Elapsed 0.043 ms (4.315 ms / 100) 4.284 -> 4.289 ( +0.12%) [ +0.00% +0.30% +0.12% / +0.12% +1.17% +1.28%] index_select wrap : Elapsed 0.043 ms (4.284 ms / 100) 4.281 -> 4.291 ( +0.23%) [ +0.00% +0.28% +0.23% / +0.23% +1.68% +1.21%] index_select linear : Elapsed 0.043 ms (4.281 ms / 100) 4.316 -> 4.313 ( -0.07%) [ +0.09% +0.00% +0.09% / -0.07% +0.70% +0.53%] index_select reverse : Elapsed 0.043 ms (4.320 ms / 100) 4.298 -> 4.297 ( -0.02%) [ +0.00% +0.05% +0.05% / -0.02% +0.95% +0.74%] index_select skip64 : Elapsed 0.043 ms (4.298 ms / 100) 4.304 -> 4.304 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.88% +1.09%] index_select skip256 : Elapsed 0.043 ms (4.306 ms / 100) 4.305 -> 4.311 ( +0.14%) [ +0.12% +0.12% +0.00% / +0.14% +1.21% +1.11%] index_select spread : Elapsed 0.043 ms (4.310 ms / 100) 4.277 -> 4.272 ( -0.12%) [ +0.35% +0.05% +0.00% / -0.12% +1.38% +1.50%] index_select strided 3 : Elapsed 0.043 ms (4.292 ms / 100) 4.303 -> 4.311 ( +0.19%) [ +0.00% +0.00% +0.05% / +0.19% +1.02% +1.14%] index_select random : Elapsed 0.043 ms (4.303 ms / 100) 4.303 -> 4.298 ( -0.12%) [ +0.02% +0.00% +0.09% / -0.12% +1.16% +1.02%] index_select random_sorted : Elapsed 0.043 ms (4.304 ms / 100) B = [16, 20, 4, 40] (stride (800, 1, 12800, 20)) A = [5, 20, 4, 40] (stride (1, 5, 100, 400)) dim = 0 2.549 -> 2.554 ( +0.20%) [ +0.35% +0.35% +0.00% / +0.20% +0.82% +0.94%] index_add_ linear : Elapsed 0.026 ms (2.558 ms / 100) 2.476 -> 2.482 ( +0.24%) [ +0.24% +0.20% +0.00% / +0.24% +0.97% +0.81%] index_copy_ linear : Elapsed 0.025 ms (2.482 ms / 100) 2.556 -> 2.559 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.51% +0.47%] index_add_ reverse : Elapsed 0.026 ms (2.556 ms / 100) 2.480 -> 2.483 ( +0.12%) [ +0.00% +0.12% +0.08% / +0.12% +0.44% +0.65%] index_copy_ reverse : Elapsed 0.025 ms (2.480 ms / 100) 2.553 -> 2.553 ( +0.00%) [ +0.27% +0.20% +0.00% / +0.00% +0.67% +0.59%] index_add_ spread : Elapsed 0.026 ms (2.560 ms / 100) 2.481 -> 2.481 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.60% +0.48%] index_copy_ spread : Elapsed 0.025 ms (2.482 ms / 100) 2.552 -> 2.556 ( +0.16%) [ +0.24% +0.20% +0.00% / +0.16% +0.74% +0.63%] index_add_ strided 3 : Elapsed 0.026 ms (2.558 ms / 100) 2.481 -> 2.486 ( +0.20%) [ +0.16% +0.04% +0.00% / +0.20% +0.60% +0.73%] index_copy_ strided 3 : Elapsed 0.025 ms (2.485 ms / 100) 2.554 -> 2.555 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.39% +0.43%] index_add_ strided 5 : Elapsed 0.026 ms (2.556 ms / 100) 2.479 -> 2.482 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.40% +0.32%] index_copy_ strided 5 : Elapsed 0.025 ms (2.480 ms / 100) 2.558 -> 2.560 ( +0.08%) [ +0.16% +0.00% +0.12% / +0.08% +0.35% +0.51%] index_add_ strided 7 : Elapsed 0.026 ms (2.562 ms / 100) 2.483 -> 2.482 ( -0.04%) [ +0.00% +0.12% +0.08% / -0.04% +0.36% +0.52%] index_copy_ strided 7 : Elapsed 0.025 ms (2.483 ms / 100) 2.555 -> 2.564 ( +0.35%) [ +0.00% +0.00% +0.20% / +0.63% +0.35% +0.35%] index_add_ perm : Elapsed 0.026 ms (2.555 ms / 100) 2.482 -> 2.491 ( +0.36%) [ +0.16% +0.04% +0.00% / +0.36% +0.36% +0.36%] index_copy_ perm : Elapsed 0.025 ms (2.486 ms / 100) 2.557 -> 2.559 ( +0.08%) [ +0.35% +0.00% +0.08% / +0.08% +0.39% +0.23%] index_add_ perm_sorted : Elapsed 0.026 ms (2.566 ms / 100) 2.480 -> 2.480 ( +0.00%) [ +0.24% +0.04% +0.00% / +0.00% +0.48% +0.48%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.486 ms / 100) 5.626 -> 5.627 ( +0.02%) [ +0.28% +0.27% +0.00% / +0.02% +0.71% +0.60%] index_select const : Elapsed 0.056 ms (5.642 ms / 100) 5.637 -> 5.636 ( -0.02%) [ +0.04% +0.09% +0.00% / -0.02% +0.57% +0.55%] index_select wrap : Elapsed 0.056 ms (5.639 ms / 100) 5.637 -> 5.646 ( +0.16%) [ +0.00% +0.02% +0.02% / +0.16% +0.64% +0.43%] index_select linear : Elapsed 0.056 ms (5.637 ms / 100) 5.628 -> 5.639 ( +0.20%) [ +0.36% +0.37% +0.00% / +0.20% +0.89% +1.00%] index_select reverse : Elapsed 0.056 ms (5.648 ms / 100) 5.628 -> 5.636 ( +0.14%) [ +0.04% +0.00% +0.07% / +0.14% +0.66% +0.48%] index_select skip64 : Elapsed 0.056 ms (5.630 ms / 100) 5.622 -> 5.633 ( +0.20%) [ +0.21% +0.09% +0.00% / +0.20% +0.64% +0.62%] index_select skip256 : Elapsed 0.056 ms (5.634 ms / 100) 5.630 -> 5.643 ( +0.23%) [ +0.04% +0.00% +0.07% / +0.23% +0.60% +0.55%] index_select spread : Elapsed 0.056 ms (5.632 ms / 100) 5.629 -> 5.633 ( +0.07%) [ +0.04% +0.00% +0.07% / +0.07% +0.64% +0.64%] index_select strided 3 : Elapsed 0.056 ms (5.631 ms / 100) 5.618 -> 5.624 ( +0.11%) [ +0.16% +0.11% +0.00% / +0.11% +0.89% +1.19%] index_select random : Elapsed 0.056 ms (5.627 ms / 100) 5.626 -> 5.630 ( +0.07%) [ +0.00% +0.02% +0.11% / +0.07% +0.78% +0.76%] index_select random_sorted : Elapsed 0.056 ms (5.626 ms / 100) B = [16, 20, 4, 40] (stride (80, 4, 1, 1280)) A = [5, 20, 4, 40] (stride (3200, 1, 20, 80)) dim = 0 2.395 -> 2.395 ( +0.00%) [ +0.29% +0.00% +0.00% / +0.00% +0.33% +0.33%] index_add_ linear : Elapsed 0.024 ms (2.402 ms / 100) 2.330 -> 2.326 ( -0.17%) [ +0.04% +0.00% +0.26% / -0.17% +0.34% +0.30%] index_copy_ linear : Elapsed 0.023 ms (2.331 ms / 100) 2.396 -> 2.397 ( +0.04%) [ +0.08% +0.21% +0.00% / +0.04% +0.67% +0.42%] index_add_ reverse : Elapsed 0.024 ms (2.398 ms / 100) 2.332 -> 2.329 ( -0.13%) [ +0.13% +0.00% +0.00% / -0.13% +0.39% +0.34%] index_copy_ reverse : Elapsed 0.023 ms (2.335 ms / 100) 2.391 -> 2.398 ( +0.29%) [ +0.17% +0.13% +0.00% / +0.29% +0.46% +0.54%] index_add_ spread : Elapsed 0.024 ms (2.395 ms / 100) 2.325 -> 2.331 ( +0.26%) [ +0.00% +0.04% +0.00% / +0.26% +0.56% +0.69%] index_copy_ spread : Elapsed 0.023 ms (2.325 ms / 100) 2.391 -> 2.399 ( +0.33%) [ +0.21% +0.08% +0.00% / +0.33% +0.63% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.396 ms / 100) 2.327 -> 2.331 ( +0.17%) [ +0.00% +0.00% +0.09% / +0.17% +0.39% +0.34%] index_copy_ strided 3 : Elapsed 0.023 ms (2.327 ms / 100) 2.396 -> 2.398 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.42% +0.42%] index_add_ strided 5 : Elapsed 0.024 ms (2.398 ms / 100) 2.328 -> 2.333 ( +0.21%) [ +0.00% +0.00% +0.00% / +0.21% +0.34% +0.47%] index_copy_ strided 5 : Elapsed 0.023 ms (2.328 ms / 100) 2.391 -> 2.389 ( -0.08%) [ +0.08% +0.25% +0.00% / -0.08% +0.54% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.393 ms / 100) 2.320 -> 2.322 ( +0.09%) [ +0.13% +0.22% +0.00% / +0.09% +0.86% +1.29%] index_copy_ strided 7 : Elapsed 0.023 ms (2.323 ms / 100) 2.397 -> 2.400 ( +0.13%) [ +0.13% +0.04% +0.00% / +0.13% +0.50% +0.38%] index_add_ perm : Elapsed 0.024 ms (2.400 ms / 100) 2.328 -> 2.331 ( +0.13%) [ +0.21% +0.00% +0.04% / +0.13% +0.47% +0.34%] index_copy_ perm : Elapsed 0.023 ms (2.333 ms / 100) 2.392 -> 2.397 ( +0.21%) [ +0.13% +0.08% +0.00% / +0.21% +0.67% +0.50%] index_add_ perm_sorted : Elapsed 0.024 ms (2.395 ms / 100) 2.324 -> 2.329 ( +0.22%) [ +0.13% +0.00% +0.04% / +0.22% +0.52% +0.56%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.327 ms / 100) 5.183 -> 5.203 ( +0.39%) [ +0.31% +0.60% +0.00% / +0.39% +1.04% +1.22%] index_select const : Elapsed 0.052 ms (5.199 ms / 100) 5.166 -> 5.172 ( +0.12%) [ +0.17% +0.12% +0.00% / +0.12% +0.74% +0.75%] index_select wrap : Elapsed 0.052 ms (5.175 ms / 100) 5.190 -> 5.189 ( -0.02%) [ +0.25% +0.39% +0.00% / -0.02% +0.60% +0.81%] index_select linear : Elapsed 0.052 ms (5.203 ms / 100) 5.174 -> 5.172 ( -0.04%) [ +0.14% +0.00% +0.04% / -0.04% +0.64% +0.52%] index_select reverse : Elapsed 0.052 ms (5.181 ms / 100) 5.191 -> 5.190 ( -0.02%) [ +0.19% +0.00% +0.08% / -0.02% +0.39% +0.52%] index_select skip64 : Elapsed 0.052 ms (5.201 ms / 100) 5.198 -> 5.209 ( +0.21%) [ +0.00% +0.29% +0.19% / +0.21% +0.83% +0.67%] index_select skip256 : Elapsed 0.052 ms (5.198 ms / 100) 5.185 -> 5.192 ( +0.14%) [ +0.04% +0.00% +0.04% / +0.14% +0.68% +0.77%] index_select spread : Elapsed 0.052 ms (5.187 ms / 100) 5.174 -> 5.173 ( -0.02%) [ +0.08% +0.10% +0.00% / -0.02% +0.75% +0.79%] index_select strided 3 : Elapsed 0.052 ms (5.178 ms / 100) 5.175 -> 5.189 ( +0.27%) [ +0.27% +0.15% +0.00% / +0.27% +0.81% +0.85%] index_select random : Elapsed 0.052 ms (5.189 ms / 100) 5.175 -> 5.178 ( +0.06%) [ +0.00% +0.00% +0.15% / +0.06% +0.77% +0.77%] index_select random_sorted : Elapsed 0.052 ms (5.175 ms / 100) B = [16, 20, 4, 40] (stride (80, 1, 20, 1280)) A = [5, 20, 4, 40] (stride (3200, 1, 20, 80)) dim = 0 2.542 -> 2.545 ( +0.12%) [ +0.28% +0.12% +0.00% / +0.12% +0.39% +0.28%] index_add_ linear : Elapsed 0.025 ms (2.549 ms / 100) 2.474 -> 2.480 ( +0.24%) [ +0.12% +0.20% +0.00% / +0.24% +0.61% +0.40%] index_copy_ linear : Elapsed 0.025 ms (2.477 ms / 100) 2.538 -> 2.540 ( +0.08%) [ +0.16% +0.35% +0.00% / +0.08% +0.32% +0.51%] index_add_ reverse : Elapsed 0.025 ms (2.542 ms / 100) 2.471 -> 2.479 ( +0.32%) [ +0.24% +0.16% +0.00% / +0.32% +0.32% +0.57%] index_copy_ reverse : Elapsed 0.025 ms (2.477 ms / 100) 2.540 -> 2.543 ( +0.12%) [ +0.16% +0.08% +0.00% / +0.12% +0.55% +0.35%] index_add_ spread : Elapsed 0.025 ms (2.544 ms / 100) 2.471 -> 2.476 ( +0.20%) [ +0.00% +0.12% +0.00% / +0.20% +0.45% +0.53%] index_copy_ spread : Elapsed 0.025 ms (2.471 ms / 100) 2.542 -> 2.544 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.39% +0.51%] index_add_ strided 3 : Elapsed 0.025 ms (2.543 ms / 100) 2.471 -> 2.477 ( +0.24%) [ +0.16% +0.20% +0.00% / +0.24% +0.53% +0.40%] index_copy_ strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.540 -> 2.542 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.39% +0.08% +0.28%] index_add_ strided 5 : Elapsed 0.025 ms (2.540 ms / 100) 2.469 -> 2.477 ( +0.32%) [ +0.00% +0.20% +0.08% / +0.41% +0.32% +0.49%] index_copy_ strided 5 : Elapsed 0.025 ms (2.469 ms / 100) 2.536 -> 2.541 ( +0.20%) [ +0.12% +0.00% +0.04% / +0.43% +0.28% +0.20%] index_add_ strided 7 : Elapsed 0.025 ms (2.539 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.24% +0.36%] index_copy_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.545 -> 2.546 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.16% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.545 ms / 100) 2.476 -> 2.481 ( +0.20%) [ +0.08% +0.00% +0.16% / +0.28% +0.20% +0.24%] index_copy_ perm : Elapsed 0.025 ms (2.478 ms / 100) 2.542 -> 2.551 ( +0.35%) [ +0.00% +0.16% +0.00% / +0.43% +0.39% +0.35%] index_add_ perm_sorted : Elapsed 0.025 ms (2.542 ms / 100) 2.471 -> 2.476 ( +0.20%) [ +0.16% +0.00% +0.04% / +0.20% +0.40% +0.61%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.475 ms / 100) 5.626 -> 5.643 ( +0.30%) [ +0.00% +0.62% +0.46% / +0.30% +0.37% +1.03%] index_select const : Elapsed 0.056 ms (5.626 ms / 100) 5.638 -> 5.658 ( +0.35%) [ +0.09% +0.00% +0.00% / +0.43% +0.35% +0.60%] index_select wrap : Elapsed 0.056 ms (5.643 ms / 100) 5.664 -> 5.677 ( +0.23%) [ +0.14% +0.12% +0.00% / +0.23% +0.49% +0.53%] index_select linear : Elapsed 0.057 ms (5.672 ms / 100) 5.643 -> 5.644 ( +0.02%) [ +0.18% +0.00% +0.00% / +0.02% +0.50% +0.55%] index_select reverse : Elapsed 0.057 ms (5.653 ms / 100) 5.642 -> 5.654 ( +0.21%) [ +0.04% +0.00% +0.05% / +0.21% +0.50% +0.34%] index_select skip64 : Elapsed 0.056 ms (5.644 ms / 100) 5.630 -> 5.628 ( -0.04%) [ +0.18% +0.16% +0.00% / -0.04% +0.76% +0.59%] index_select skip256 : Elapsed 0.056 ms (5.640 ms / 100) 5.638 -> 5.641 ( +0.05%) [ +0.00% +0.14% +0.07% / +0.05% +0.51% +0.66%] index_select spread : Elapsed 0.056 ms (5.638 ms / 100) 5.636 -> 5.636 ( +0.00%) [ +0.02% +0.12% +0.00% / +0.00% +0.73% +0.53%] index_select strided 3 : Elapsed 0.056 ms (5.637 ms / 100) 5.631 -> 5.641 ( +0.18%) [ +0.18% +0.00% +0.09% / +0.18% +0.80% +0.71%] index_select random : Elapsed 0.056 ms (5.641 ms / 100) 5.620 -> 5.624 ( +0.07%) [ +0.04% +0.25% +0.00% / +0.07% +0.68% +0.80%] index_select random_sorted : Elapsed 0.056 ms (5.622 ms / 100) B = [16, 20, 4, 40] (stride (80, 1, 20, 1280)) A = [5, 20, 4, 40] (stride (80, 4, 1, 400)) dim = 0 2.421 -> 2.424 ( +0.12%) [ +0.04% +0.17% +0.00% / +0.12% +0.50% +0.29%] index_add_ linear : Elapsed 0.024 ms (2.422 ms / 100) 2.361 -> 2.366 ( +0.21%) [ +0.00% +0.04% +0.00% / +0.21% +0.34% +0.34%] index_copy_ linear : Elapsed 0.024 ms (2.361 ms / 100) 2.423 -> 2.423 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.25%] index_add_ reverse : Elapsed 0.024 ms (2.423 ms / 100) 2.356 -> 2.358 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.30% +0.42%] index_copy_ reverse : Elapsed 0.024 ms (2.356 ms / 100) 2.417 -> 2.420 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.12% +0.54% +0.50%] index_add_ spread : Elapsed 0.024 ms (2.420 ms / 100) 2.356 -> 2.361 ( +0.21%) [ +0.04% +0.04% +0.00% / +0.21% +0.47% +0.55%] index_copy_ spread : Elapsed 0.024 ms (2.357 ms / 100) 2.414 -> 2.424 ( +0.41%) [ +0.00% +0.17% +0.04% / +0.41% +0.62% +0.46%] index_add_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.354 -> 2.354 ( +0.00%) [ +0.00% +0.17% +0.08% / +0.00% +0.55% +0.47%] index_copy_ strided 3 : Elapsed 0.024 ms (2.354 ms / 100) 2.417 -> 2.420 ( +0.12%) [ +0.12% +0.00% +0.04% / +0.12% +0.50% +0.54%] index_add_ strided 5 : Elapsed 0.024 ms (2.420 ms / 100) 2.354 -> 2.361 ( +0.30%) [ +0.17% +0.21% +0.00% / +0.30% +0.59% +0.72%] index_copy_ strided 5 : Elapsed 0.024 ms (2.358 ms / 100) 2.415 -> 2.421 ( +0.25%) [ +0.37% +0.21% +0.00% / +0.25% +0.70% +0.79%] index_add_ strided 7 : Elapsed 0.024 ms (2.424 ms / 100) 2.355 -> 2.363 ( +0.34%) [ +0.00% +0.00% +0.04% / +0.34% +0.72% +0.68%] index_copy_ strided 7 : Elapsed 0.024 ms (2.355 ms / 100) 2.415 -> 2.413 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.58% +0.54%] index_add_ perm : Elapsed 0.024 ms (2.415 ms / 100) 2.352 -> 2.355 ( +0.13%) [ +0.00% +0.17% +0.17% / +0.13% +0.55% +0.43%] index_copy_ perm : Elapsed 0.024 ms (2.352 ms / 100) 2.414 -> 2.425 ( +0.46%) [ +0.33% +0.33% +0.00% / +0.46% +0.75% +0.66%] index_add_ perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) 2.356 -> 2.364 ( +0.34%) [ +0.00% +0.08% +0.00% / +0.34% +0.55% +0.59%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.356 ms / 100) 5.207 -> 5.215 ( +0.15%) [ +0.00% +0.10% +0.12% / +0.15% +0.60% +0.42%] index_select const : Elapsed 0.052 ms (5.207 ms / 100) 5.217 -> 5.220 ( +0.06%) [ +0.08% +0.02% +0.00% / +0.06% +0.69% +0.69%] index_select wrap : Elapsed 0.052 ms (5.221 ms / 100) 5.256 -> 5.258 ( +0.04%) [ +0.15% +0.17% +0.00% / +0.04% +0.57% +0.72%] index_select linear : Elapsed 0.053 ms (5.264 ms / 100) 5.235 -> 5.231 ( -0.08%) [ +0.06% +0.00% +0.00% / -0.08% +0.52% +0.53%] index_select reverse : Elapsed 0.052 ms (5.238 ms / 100) 5.202 -> 5.213 ( +0.21%) [ +0.04% +0.12% +0.00% / +0.21% +0.56% +0.60%] index_select skip64 : Elapsed 0.052 ms (5.204 ms / 100) 5.204 -> 5.210 ( +0.12%) [ +0.06% +0.04% +0.00% / +0.12% +0.56% +0.42%] index_select skip256 : Elapsed 0.052 ms (5.207 ms / 100) 5.194 -> 5.199 ( +0.10%) [ +0.00% +0.06% +0.04% / +0.10% +0.67% +0.71%] index_select spread : Elapsed 0.052 ms (5.194 ms / 100) 5.193 -> 5.200 ( +0.13%) [ +0.10% +0.15% +0.00% / +0.13% +0.89% +0.81%] index_select strided 3 : Elapsed 0.052 ms (5.198 ms / 100) 5.203 -> 5.208 ( +0.10%) [ +0.04% +0.08% +0.00% / +0.10% +0.60% +0.63%] index_select random : Elapsed 0.052 ms (5.205 ms / 100) 5.206 -> 5.205 ( -0.02%) [ +0.10% +0.00% +0.02% / -0.02% +0.71% +0.75%] index_select random_sorted : Elapsed 0.052 ms (5.211 ms / 100) out_shape = [5, 16, 4, 40] in_shape = [5, 20, 4, 40] idx_dim = 1 B = [5, 16, 4, 40] (stride (1, 800, 200, 5)) A = [5, 20, 4, 40] (stride (1, 5, 100, 400)) dim = 1 3.924 -> 3.930 ( +0.15%) [ +0.05% +0.05% +0.00% / +0.15% +0.59% +0.76%] index_select const : Elapsed 0.039 ms (3.926 ms / 100) 3.929 -> 3.929 ( +0.00%) [ +0.00% +0.10% +0.08% / +0.00% +0.71% +0.81%] index_select wrap : Elapsed 0.039 ms (3.929 ms / 100) 3.922 -> 3.923 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.03% +0.64% +0.69%] index_select linear : Elapsed 0.039 ms (3.923 ms / 100) 3.924 -> 3.926 ( +0.05%) [ +0.13% +0.20% +0.00% / +0.05% +0.84% +0.92%] index_select reverse : Elapsed 0.039 ms (3.929 ms / 100) 3.926 -> 3.931 ( +0.13%) [ +0.05% +0.05% +0.00% / +0.13% +0.74% +0.76%] index_select skip64 : Elapsed 0.039 ms (3.928 ms / 100) 3.931 -> 3.939 ( +0.20%) [ +0.08% +0.00% +0.03% / +0.20% +0.61% +0.64%] index_select skip256 : Elapsed 0.039 ms (3.934 ms / 100) 3.930 -> 3.934 ( +0.10%) [ +0.00% +0.05% +0.05% / +0.10% +0.64% +0.66%] index_select spread : Elapsed 0.039 ms (3.930 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.61% +0.61%] index_select strided 3 : Elapsed 0.039 ms (3.922 ms / 100) 3.925 -> 3.925 ( +0.00%) [ +0.18% +0.03% +0.00% / +0.00% +0.82% +0.82%] index_select strided 5 : Elapsed 0.039 ms (3.932 ms / 100) 3.913 -> 3.930 ( +0.43%) [ +0.26% +0.10% +0.00% / +0.43% +0.74% +0.77%] index_select strided 7 : Elapsed 0.039 ms (3.923 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.00% +0.05% +0.13% / +0.05% +0.43% +0.46%] index_select strided 8 : Elapsed 0.039 ms (3.920 ms / 100) 3.933 -> 3.940 ( +0.18%) [ +0.18% +0.00% +0.03% / +0.18% +0.71% +0.69%] index_select strided 16 : Elapsed 0.039 ms (3.940 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.46%] index_select random : Elapsed 0.039 ms (3.932 ms / 100) 3.924 -> 3.929 ( +0.13%) [ +0.13% +0.00% +0.15% / +0.13% +0.59% +0.71%] index_select random_sorted : Elapsed 0.039 ms (3.929 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.46% +0.53%] index_select perm : Elapsed 0.039 ms (3.936 ms / 100) 3.936 -> 3.932 ( -0.10%) [ +0.00% +0.05% +0.03% / -0.10% +0.33% +0.53%] index_select perm_sorted : Elapsed 0.039 ms (3.936 ms / 100) B = [5, 16, 4, 40] (stride (1, 200, 3200, 5)) A = [5, 20, 4, 40] (stride (1, 200, 4000, 5)) dim = 1 3.909 -> 3.924 ( +0.38%) [ +0.00% +0.13% +0.10% / +0.38% +0.54% +0.61%] index_select const : Elapsed 0.039 ms (3.909 ms / 100) 3.912 -> 3.915 ( +0.08%) [ +0.36% +0.18% +0.00% / +0.08% +0.51% +0.64%] index_select wrap : Elapsed 0.039 ms (3.926 ms / 100) 3.924 -> 3.929 ( +0.13%) [ +0.10% +0.00% +0.08% / +0.13% +0.69% +0.46%] index_select linear : Elapsed 0.039 ms (3.928 ms / 100) 3.925 -> 3.926 ( +0.03%) [ +0.05% +0.00% +0.05% / +0.03% +0.28% +0.31%] index_select reverse : Elapsed 0.039 ms (3.927 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.61% +0.58%] index_select skip64 : Elapsed 0.039 ms (3.934 ms / 100) 3.917 -> 3.909 ( -0.20%) [ +0.20% +0.00% +0.00% / -0.20% +0.20% +0.31%] index_select skip256 : Elapsed 0.039 ms (3.925 ms / 100) 3.919 -> 3.924 ( +0.13%) [ +0.00% +0.08% +0.05% / +0.13% +0.23% +0.41%] index_select spread : Elapsed 0.039 ms (3.919 ms / 100) 3.915 -> 3.926 ( +0.28%) [ +0.26% +0.18% +0.00% / +0.28% +0.59% +0.66%] index_select strided 3 : Elapsed 0.039 ms (3.925 ms / 100) 3.907 -> 3.914 ( +0.18%) [ +0.41% +0.00% +0.41% / +0.18% +0.46% +0.49%] index_select strided 5 : Elapsed 0.039 ms (3.923 ms / 100) 3.921 -> 3.924 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.64% +0.48%] index_select strided 7 : Elapsed 0.039 ms (3.922 ms / 100) 3.921 -> 3.927 ( +0.15%) [ +0.08% +0.00% +0.03% / +0.15% +0.61% +0.66%] index_select strided 8 : Elapsed 0.039 ms (3.924 ms / 100) 3.910 -> 3.921 ( +0.28%) [ +0.00% +0.18% +0.03% / +0.28% +0.59% +0.69%] index_select strided 16 : Elapsed 0.039 ms (3.910 ms / 100) 3.930 -> 3.932 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.74% +0.79%] index_select random : Elapsed 0.039 ms (3.932 ms / 100) 3.922 -> 3.923 ( +0.03%) [ +0.13% +0.08% +0.00% / +0.03% +0.71% +0.59%] index_select random_sorted : Elapsed 0.039 ms (3.927 ms / 100) 3.909 -> 3.920 ( +0.28%) [ +0.33% +0.00% +0.33% / +0.28% +0.67% +0.69%] index_select perm : Elapsed 0.039 ms (3.922 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.64% +0.59%] index_select perm_sorted : Elapsed 0.039 ms (3.927 ms / 100) B = [5, 16, 4, 40] (stride (16, 1, 3200, 80)) A = [5, 20, 4, 40] (stride (3200, 160, 40, 1)) dim = 1 1.379 -> 1.379 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +1.09% +1.23%] index_select const : Elapsed 0.014 ms (1.380 ms / 100) 1.386 -> 1.390 ( +0.29%) [ +0.00% +0.07% +0.22% / +0.29% +1.37% +1.15%] index_select wrap : Elapsed 0.014 ms (1.386 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +1.16% +1.08%] index_select linear : Elapsed 0.014 ms (1.385 ms / 100) 1.383 -> 1.385 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +1.16% +1.23%] index_select reverse : Elapsed 0.014 ms (1.384 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.07% +0.00% +0.15% / +0.07% +1.09% +1.09%] index_select skip64 : Elapsed 0.014 ms (1.379 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.87% +1.01%] index_select skip256 : Elapsed 0.014 ms (1.383 ms / 100) 1.388 -> 1.388 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.86% +0.79%] index_select spread : Elapsed 0.014 ms (1.388 ms / 100) 1.384 -> 1.386 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.87% +0.79%] index_select strided 3 : Elapsed 0.014 ms (1.385 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.94% +0.94%] index_select strided 5 : Elapsed 0.014 ms (1.382 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.80% +0.87%] index_select strided 7 : Elapsed 0.014 ms (1.384 ms / 100) 1.382 -> 1.384 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.65% +0.65%] index_select strided 8 : Elapsed 0.014 ms (1.383 ms / 100) 1.384 -> 1.386 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.72% +0.79%] index_select strided 16 : Elapsed 0.014 ms (1.384 ms / 100) 1.387 -> 1.389 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.36% +0.14%] index_select random : Elapsed 0.014 ms (1.389 ms / 100) 1.390 -> 1.389 ( -0.07%) [ +0.00% +0.14% +0.00% / -0.07% +0.43% +0.29%] index_select random_sorted : Elapsed 0.014 ms (1.390 ms / 100) 1.390 -> 1.392 ( +0.14%) [ +0.00% +0.07% +0.07% / +0.14% +0.36% +1.65%] index_select perm : Elapsed 0.014 ms (1.390 ms / 100) 1.389 -> 1.390 ( +0.07%) [ +0.07% +0.22% +0.00% / +0.07% +0.29% +0.36%] index_select perm_sorted : Elapsed 0.014 ms (1.390 ms / 100) B = [5, 16, 4, 40] (stride (16, 1, 3200, 80)) A = [5, 20, 4, 40] (stride (40, 200, 4000, 1)) dim = 1 3.532 -> 3.535 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.45% +0.45%] index_select const : Elapsed 0.035 ms (3.533 ms / 100) 3.516 -> 3.521 ( +0.14%) [ +0.09% +0.09% +0.00% / +0.14% +0.54% +0.51%] index_select wrap : Elapsed 0.035 ms (3.519 ms / 100) 3.520 -> 3.525 ( +0.14%) [ +0.77% +0.06% +0.00% / +0.14% +0.60% +0.45%] index_select linear : Elapsed 0.035 ms (3.547 ms / 100) 3.509 -> 3.510 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.66% +0.68%] index_select reverse : Elapsed 0.035 ms (3.509 ms / 100) 3.544 -> 3.547 ( +0.08%) [ +0.11% +0.31% +0.00% / +0.08% +0.59% +0.56%] index_select skip64 : Elapsed 0.035 ms (3.548 ms / 100) 3.542 -> 3.544 ( +0.06%) [ +0.00% +0.06% +0.11% / +0.06% +0.42% +0.45%] index_select skip256 : Elapsed 0.035 ms (3.542 ms / 100) 3.514 -> 3.516 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.60% +0.63%] index_select spread : Elapsed 0.035 ms (3.515 ms / 100) 3.528 -> 3.530 ( +0.06%) [ +0.00% +0.03% +0.09% / +0.06% +0.31% +0.43%] index_select strided 3 : Elapsed 0.035 ms (3.528 ms / 100) 3.519 -> 3.522 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.60% +0.51%] index_select strided 5 : Elapsed 0.035 ms (3.522 ms / 100) 3.526 -> 3.528 ( +0.06%) [ +0.09% +0.00% +0.03% / +0.06% +0.79% +0.54%] index_select strided 7 : Elapsed 0.035 ms (3.529 ms / 100) 3.519 -> 3.521 ( +0.06%) [ +0.14% +0.00% +0.23% / +0.06% +0.82% +0.65%] index_select strided 8 : Elapsed 0.035 ms (3.524 ms / 100) 3.517 -> 3.519 ( +0.06%) [ +0.00% +0.37% +0.11% / +0.06% +0.97% +0.74%] index_select strided 16 : Elapsed 0.035 ms (3.517 ms / 100) 3.522 -> 3.535 ( +0.37%) [ +0.09% +0.14% +0.00% / +0.37% +0.99% +0.82%] index_select random : Elapsed 0.035 ms (3.525 ms / 100) 3.511 -> 3.514 ( +0.09%) [ +0.06% +0.00% +0.06% / +0.09% +0.83% +0.77%] index_select random_sorted : Elapsed 0.035 ms (3.513 ms / 100) 3.518 -> 3.522 ( +0.11%) [ +0.03% +0.06% +0.00% / +0.11% +0.60% +0.60%] index_select perm : Elapsed 0.035 ms (3.519 ms / 100) 3.532 -> 3.538 ( +0.17%) [ +0.20% +0.00% +0.06% / +0.17% +0.79% +0.62%] index_select perm_sorted : Elapsed 0.035 ms (3.539 ms / 100) out_shape = [5, 20, 16, 40] in_shape = [5, 20, 4, 40] idx_dim = 2 B = [5, 20, 16, 40] (stride (12800, 640, 40, 1)) A = [5, 20, 4, 40] (stride (1, 20, 5, 400)) dim = 2 2.491 -> 2.503 ( +0.48%) [ +0.00% +0.08% +0.04% / +0.52% +0.52% +0.48%] index_add_ linear : Elapsed 0.025 ms (2.491 ms / 100) 2.427 -> 2.431 ( +0.16%) [ +0.04% +0.12% +0.00% / +0.16% +0.37% +2.39%] index_copy_ linear : Elapsed 0.024 ms (2.428 ms / 100) 2.492 -> 2.493 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.52% +0.32%] index_add_ reverse : Elapsed 0.025 ms (2.492 ms / 100) 2.428 -> 2.436 ( +0.33%) [ +0.04% +0.00% +0.00% / +0.33% +0.82% +0.49%] index_copy_ reverse : Elapsed 0.024 ms (2.429 ms / 100) 2.488 -> 2.487 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.64% +0.32%] index_add_ spread : Elapsed 0.025 ms (2.488 ms / 100) 2.424 -> 2.424 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.66% +0.62%] index_copy_ spread : Elapsed 0.024 ms (2.425 ms / 100) 2.485 -> 2.486 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.44% +0.32%] index_add_ strided 3 : Elapsed 0.025 ms (2.486 ms / 100) 2.420 -> 2.421 ( +0.04%) [ +0.12% +0.04% +0.00% / +0.04% +0.70% +0.50%] index_copy_ strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.480 -> 2.483 ( +0.12%) [ +0.20% +0.36% +0.00% / +0.12% +0.69% +0.60%] index_add_ strided 5 : Elapsed 0.025 ms (2.485 ms / 100) 2.417 -> 2.420 ( +0.12%) [ +0.12% +0.17% +0.00% / +0.12% +0.70% +0.54%] index_copy_ strided 5 : Elapsed 0.024 ms (2.420 ms / 100) 2.486 -> 2.489 ( +0.12%) [ +0.16% +0.00% +0.00% / +0.12% +0.80% +0.40%] index_add_ strided 7 : Elapsed 0.025 ms (2.490 ms / 100) 2.421 -> 2.426 ( +0.21%) [ +0.00% +0.00% +0.00% / +0.21% +0.95% +0.62%] index_copy_ strided 7 : Elapsed 0.024 ms (2.421 ms / 100) 2.481 -> 2.484 ( +0.12%) [ +0.00% +0.20% +0.12% / +0.12% +0.52% +0.52%] index_add_ perm : Elapsed 0.025 ms (2.481 ms / 100) 2.422 -> 2.421 ( -0.04%) [ +0.12% +0.00% +0.04% / -0.04% +0.41% +0.50%] index_copy_ perm : Elapsed 0.024 ms (2.425 ms / 100) 2.482 -> 2.486 ( +0.16%) [ +0.00% +0.20% +0.08% / +0.16% +0.56% +0.52%] index_add_ perm_sorted : Elapsed 0.025 ms (2.482 ms / 100) 2.418 -> 2.419 ( +0.04%) [ +0.17% +0.00% +0.17% / +0.04% +0.58% +0.66%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) 5.502 -> 5.507 ( +0.09%) [ +0.00% +0.04% +0.02% / +0.09% +0.44% +0.51%] index_select const : Elapsed 0.055 ms (5.502 ms / 100) 5.491 -> 5.506 ( +0.27%) [ +0.20% +0.00% +0.11% / +0.27% +0.33% +0.40%] index_select wrap : Elapsed 0.055 ms (5.502 ms / 100) 5.508 -> 5.512 ( +0.07%) [ +0.00% +0.09% +0.04% / +0.07% +0.44% +0.53%] index_select linear : Elapsed 0.055 ms (5.508 ms / 100) 5.504 -> 5.506 ( +0.04%) [ +0.09% +0.00% +0.15% / +0.04% +0.58% +0.67%] index_select reverse : Elapsed 0.055 ms (5.509 ms / 100) 5.498 -> 5.499 ( +0.02%) [ +0.04% +0.07% +0.00% / +0.02% +0.44% +0.38%] index_select skip64 : Elapsed 0.055 ms (5.500 ms / 100) 5.512 -> 5.513 ( +0.02%) [ +0.04% +0.04% +0.00% / +0.02% +0.42% +0.54%] index_select skip256 : Elapsed 0.055 ms (5.514 ms / 100) 5.498 -> 5.502 ( +0.07%) [ +0.00% +0.05% +0.04% / +0.07% +0.38% +0.47%] index_select spread : Elapsed 0.055 ms (5.498 ms / 100) 5.510 -> 5.520 ( +0.18%) [ +0.00% +0.24% +0.22% / +0.18% +0.51% +0.44%] index_select strided 3 : Elapsed 0.055 ms (5.510 ms / 100) 5.486 -> 5.487 ( +0.02%) [ +0.02% +0.00% +0.09% / +0.02% +0.42% +0.33%] index_select random : Elapsed 0.055 ms (5.487 ms / 100) 5.497 -> 5.504 ( +0.13%) [ +0.11% +0.09% +0.00% / +0.13% +0.65% +0.47%] index_select random_sorted : Elapsed 0.055 ms (5.503 ms / 100) B = [5, 20, 16, 40] (stride (640, 3200, 1, 16)) A = [5, 20, 4, 40] (stride (80, 4, 1, 400)) dim = 2 2.531 -> 2.530 ( -0.04%) [ +0.00% +0.12% +0.00% / +0.08% +0.32% -0.04%] index_add_ linear : Elapsed 0.025 ms (2.531 ms / 100) 2.503 -> 2.505 ( +0.08%) [ +0.00% +0.24% +0.20% / +0.08% +0.28% +0.20%] index_copy_ linear : Elapsed 0.025 ms (2.503 ms / 100) 2.528 -> 2.529 ( +0.04%) [ +0.08% +0.24% +0.00% / +0.28% +0.04% +0.08%] index_add_ reverse : Elapsed 0.025 ms (2.530 ms / 100) 2.503 -> 2.500 ( -0.12%) [ +0.24% +0.24% +0.00% / +0.16% +0.04% -0.12%] index_copy_ reverse : Elapsed 0.025 ms (2.509 ms / 100) 2.569 -> 2.571 ( +0.08%) [ +0.35% +0.08% +0.00% / +0.12% +0.16% +0.08%] index_add_ spread : Elapsed 0.026 ms (2.578 ms / 100) 2.602 -> 2.607 ( +0.19%) [ +0.38% +0.04% +0.00% / +0.27% +0.19% +0.19%] index_copy_ spread : Elapsed 0.026 ms (2.612 ms / 100) 2.573 -> 2.571 ( -0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.04% -0.08%] index_add_ strided 3 : Elapsed 0.026 ms (2.574 ms / 100) 2.604 -> 2.602 ( -0.08%) [ +0.08% +0.31% +0.00% / -0.08% +0.15% +0.04%] index_copy_ strided 3 : Elapsed 0.026 ms (2.606 ms / 100) 2.567 -> 2.575 ( +0.31%) [ +0.43% +0.23% +0.00% / +0.35% +0.43% +0.31%] index_add_ strided 5 : Elapsed 0.026 ms (2.578 ms / 100) 2.602 -> 2.608 ( +0.23%) [ +0.12% +0.19% +0.00% / +0.38% +0.31% +0.23%] index_copy_ strided 5 : Elapsed 0.026 ms (2.605 ms / 100) 2.575 -> 2.572 ( -0.12%) [ +0.00% +0.12% +0.04% / -0.04% -0.12% -0.04%] index_add_ strided 7 : Elapsed 0.026 ms (2.575 ms / 100) 2.606 -> 2.603 ( -0.12%) [ +0.08% +0.12% +0.00% / -0.12% +0.27% +0.08%] index_copy_ strided 7 : Elapsed 0.026 ms (2.608 ms / 100) 2.577 -> 2.574 ( -0.12%) [ +0.00% +0.12% +0.04% / +0.04% -0.12% -0.08%] index_add_ perm : Elapsed 0.026 ms (2.577 ms / 100) 2.604 -> 2.605 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.19% +0.04% +0.08%] index_copy_ perm : Elapsed 0.026 ms (2.605 ms / 100) 2.572 -> 2.568 ( -0.16%) [ +0.00% +0.08% +0.16% / +0.04% +0.08% -0.16%] index_add_ perm_sorted : Elapsed 0.026 ms (2.572 ms / 100) 2.603 -> 2.601 ( -0.08%) [ +0.12% +0.00% +0.00% / +0.31% +0.27% -0.08%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.606 ms / 100) 5.602 -> 5.605 ( +0.05%) [ +0.14% +0.11% +0.00% / +0.05% +0.37% +0.34%] index_select const : Elapsed 0.056 ms (5.610 ms / 100) 5.605 -> 5.610 ( +0.09%) [ +0.05% +0.00% +0.02% / +0.09% +0.32% +0.21%] index_select wrap : Elapsed 0.056 ms (5.608 ms / 100) 5.613 -> 5.615 ( +0.04%) [ +0.12% +0.07% +0.00% / +0.04% +0.23% +0.23%] index_select linear : Elapsed 0.056 ms (5.620 ms / 100) 5.603 -> 5.610 ( +0.12%) [ +0.00% +0.07% +0.09% / +0.12% +0.34% +0.61%] index_select reverse : Elapsed 0.056 ms (5.603 ms / 100) 5.605 -> 5.599 ( -0.11%) [ +0.23% +0.09% +0.00% / -0.11% +0.32% +0.32%] index_select skip64 : Elapsed 0.056 ms (5.618 ms / 100) 5.613 -> 5.613 ( +0.00%) [ +0.12% +0.09% +0.00% / +0.00% +0.32% +0.36%] index_select skip256 : Elapsed 0.056 ms (5.620 ms / 100) 5.596 -> 5.610 ( +0.25%) [ +0.23% +0.14% +0.00% / +0.25% +0.48% +0.43%] index_select spread : Elapsed 0.056 ms (5.609 ms / 100) 5.604 -> 5.612 ( +0.14%) [ +0.16% +0.00% +0.09% / +0.14% +0.45% +0.30%] index_select strided 3 : Elapsed 0.056 ms (5.613 ms / 100) 5.612 -> 5.604 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.18% +0.11%] index_select random : Elapsed 0.056 ms (5.612 ms / 100) 5.604 -> 5.606 ( +0.04%) [ +0.02% +0.00% +0.05% / +0.04% +0.29% +0.20%] index_select random_sorted : Elapsed 0.056 ms (5.605 ms / 100) B = [5, 20, 16, 40] (stride (16, 3200, 1, 80)) A = [5, 20, 4, 40] (stride (160, 800, 40, 1)) dim = 2 2.500 -> 2.505 ( +0.20%) [ +0.16% +0.12% +0.00% / +0.20% +0.44% +0.68%] index_add_ linear : Elapsed 0.025 ms (2.504 ms / 100) 2.490 -> 2.491 ( +0.04%) [ +0.20% +0.00% +0.16% / +0.04% +0.64% +0.52%] index_copy_ linear : Elapsed 0.025 ms (2.495 ms / 100) 2.503 -> 2.508 ( +0.20%) [ +0.24% +0.20% +0.00% / +0.20% +0.52% +0.68%] index_add_ reverse : Elapsed 0.025 ms (2.509 ms / 100) 2.494 -> 2.498 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.20% +0.16% +0.48%] index_copy_ reverse : Elapsed 0.025 ms (2.494 ms / 100) 2.541 -> 2.545 ( +0.16%) [ +0.00% +0.16% +0.04% / +0.16% +0.63% +0.51%] index_add_ spread : Elapsed 0.025 ms (2.541 ms / 100) 2.592 -> 2.589 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.12% +0.42% +0.31%] index_copy_ spread : Elapsed 0.026 ms (2.592 ms / 100) 2.543 -> 2.544 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.71% +0.75%] index_add_ strided 3 : Elapsed 0.025 ms (2.543 ms / 100) 2.589 -> 2.597 ( +0.31%) [ +0.08% +0.19% +0.00% / +0.31% +0.77% +0.70%] index_copy_ strided 3 : Elapsed 0.026 ms (2.591 ms / 100) 2.541 -> 2.547 ( +0.24%) [ +0.04% +0.24% +0.00% / +0.24% +0.75% +0.47%] index_add_ strided 5 : Elapsed 0.025 ms (2.542 ms / 100) 2.592 -> 2.592 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.93% +0.23%] index_copy_ strided 5 : Elapsed 0.026 ms (2.594 ms / 100) 2.547 -> 2.549 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.63% +0.24%] index_add_ strided 7 : Elapsed 0.025 ms (2.547 ms / 100) 2.590 -> 2.595 ( +0.19%) [ +0.15% +0.00% +0.04% / +0.19% +1.16% +0.35%] index_copy_ strided 7 : Elapsed 0.026 ms (2.594 ms / 100) 2.502 -> 2.511 ( +0.36%) [ +0.08% +0.36% +0.00% / +0.48% +1.00% +0.36%] index_add_ perm : Elapsed 0.025 ms (2.504 ms / 100) 2.493 -> 2.496 ( +0.12%) [ +0.00% +0.08% +0.12% / +0.12% +0.60% +0.40%] index_copy_ perm : Elapsed 0.025 ms (2.493 ms / 100) 2.504 -> 2.505 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.40% +0.28% +0.04%] index_add_ perm_sorted : Elapsed 0.025 ms (2.505 ms / 100) 2.488 -> 2.491 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.36% +0.32%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.489 ms / 100) 5.243 -> 5.255 ( +0.23%) [ +0.10% +0.00% +0.13% / +0.23% +0.53% +0.46%] index_select const : Elapsed 0.052 ms (5.248 ms / 100) 5.283 -> 5.290 ( +0.13%) [ +0.00% +0.02% +0.15% / +0.13% +0.72% +0.42%] index_select wrap : Elapsed 0.053 ms (5.283 ms / 100) 5.291 -> 5.297 ( +0.11%) [ +0.32% +0.30% +0.00% / +0.11% +0.60% +0.68%] index_select linear : Elapsed 0.053 ms (5.308 ms / 100) 5.305 -> 5.301 ( -0.08%) [ +0.15% +0.13% +0.00% / -0.08% +0.43% +0.60%] index_select reverse : Elapsed 0.053 ms (5.313 ms / 100) 5.256 -> 5.269 ( +0.25%) [ +0.13% +0.17% +0.00% / +0.25% +0.46% +0.48%] index_select skip64 : Elapsed 0.053 ms (5.263 ms / 100) 5.250 -> 5.248 ( -0.04%) [ +0.10% +0.17% +0.00% / -0.04% +0.38% +0.29%] index_select skip256 : Elapsed 0.053 ms (5.255 ms / 100) 5.287 -> 5.289 ( +0.04%) [ +0.13% +0.00% +0.00% / +0.04% +0.23% +0.23%] index_select spread : Elapsed 0.053 ms (5.294 ms / 100) 5.282 -> 5.287 ( +0.09%) [ +0.13% +0.00% +0.06% / +0.09% +0.28% +0.32%] index_select strided 3 : Elapsed 0.053 ms (5.289 ms / 100) 5.298 -> 5.292 ( -0.11%) [ +0.02% +0.15% +0.00% / -0.11% +0.19% +0.42%] index_select random : Elapsed 0.053 ms (5.299 ms / 100) 5.289 -> 5.287 ( -0.04%) [ +0.00% +0.04% +0.02% / -0.04% +0.38% +0.38%] index_select random_sorted : Elapsed 0.053 ms (5.289 ms / 100) B = [5, 20, 16, 40] (stride (800, 1, 4000, 20)) A = [5, 20, 4, 40] (stride (3200, 40, 800, 1)) dim = 2 2.467 -> 2.467 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.28% +0.12%] index_add_ linear : Elapsed 0.025 ms (2.469 ms / 100) 2.419 -> 2.420 ( +0.04%) [ +0.04% +0.00% +0.21% / +0.29% +0.04% +0.08%] index_copy_ linear : Elapsed 0.024 ms (2.420 ms / 100) 2.466 -> 2.470 ( +0.16%) [ +0.04% +0.12% +0.00% / +0.16% +0.20% +0.16%] index_add_ reverse : Elapsed 0.025 ms (2.467 ms / 100) 2.420 -> 2.420 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.08% +0.00% +0.17%] index_copy_ reverse : Elapsed 0.024 ms (2.421 ms / 100) 2.464 -> 2.468 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.20% +0.20% +0.16%] index_add_ spread : Elapsed 0.025 ms (2.466 ms / 100) 2.417 -> 2.414 ( -0.12%) [ +0.00% +0.21% +0.04% / -0.12% +0.58% +0.08%] index_copy_ spread : Elapsed 0.024 ms (2.417 ms / 100) 2.465 -> 2.468 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.16% +0.20%] index_add_ strided 3 : Elapsed 0.025 ms (2.467 ms / 100) 2.418 -> 2.418 ( +0.00%) [ +0.21% +0.00% +0.12% / +0.00% +0.17% +0.08%] index_copy_ strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.464 -> 2.469 ( +0.20%) [ +0.20% +0.24% +0.00% / +0.20% +0.32% +0.24%] index_add_ strided 5 : Elapsed 0.025 ms (2.469 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.04% +0.00% +0.21% / +0.12% +0.29% +0.29%] index_copy_ strided 5 : Elapsed 0.024 ms (2.420 ms / 100) 2.466 -> 2.468 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.36% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.469 ms / 100) 2.418 -> 2.423 ( +0.21%) [ +0.00% +0.08% +0.08% / +0.29% +0.21% +0.25%] index_copy_ strided 7 : Elapsed 0.024 ms (2.418 ms / 100) 2.463 -> 2.462 ( -0.04%) [ +0.12% +0.16% +0.00% / -0.04% +0.45% +0.37%] index_add_ perm : Elapsed 0.025 ms (2.466 ms / 100) 2.419 -> 2.415 ( -0.17%) [ +0.04% +0.00% +0.00% / -0.17% +0.45% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.420 ms / 100) 2.466 -> 2.468 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.20% +0.24%] index_add_ perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) 2.415 -> 2.419 ( +0.17%) [ +0.00% +0.08% +0.12% / +0.25% +0.54% +0.17%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) 5.118 -> 5.111 ( -0.14%) [ +0.00% +0.16% +0.02% / -0.14% +0.53% +0.16%] index_select const : Elapsed 0.051 ms (5.118 ms / 100) 5.142 -> 5.148 ( +0.12%) [ +0.00% +0.14% +0.06% / +0.12% +0.64% +0.35%] index_select wrap : Elapsed 0.051 ms (5.142 ms / 100) 5.136 -> 5.137 ( +0.02%) [ +0.04% +0.06% +0.00% / +0.02% +0.31% +0.29%] index_select linear : Elapsed 0.051 ms (5.138 ms / 100) 5.151 -> 5.162 ( +0.21%) [ +0.14% +0.17% +0.00% / +0.21% +0.49% +0.45%] index_select reverse : Elapsed 0.052 ms (5.158 ms / 100) 5.107 -> 5.101 ( -0.12%) [ +0.00% +0.04% +0.02% / -0.12% +0.25% +0.16%] index_select skip64 : Elapsed 0.051 ms (5.107 ms / 100) 5.093 -> 5.104 ( +0.22%) [ +0.06% +0.00% +0.00% / +0.22% +0.41% +0.39%] index_select skip256 : Elapsed 0.051 ms (5.096 ms / 100) 5.136 -> 5.139 ( +0.06%) [ +0.00% +0.04% +0.04% / +0.06% +0.37% +0.41%] index_select spread : Elapsed 0.051 ms (5.136 ms / 100) 5.145 -> 5.146 ( +0.02%) [ +0.04% +0.00% +0.06% / +0.02% +0.25% +0.19%] index_select strided 3 : Elapsed 0.051 ms (5.147 ms / 100) 5.136 -> 5.137 ( +0.02%) [ +0.12% +0.02% +0.00% / +0.02% +0.27% +0.31%] index_select random : Elapsed 0.051 ms (5.142 ms / 100) 5.143 -> 5.148 ( +0.10%) [ +0.00% +0.12% +0.02% / +0.10% +0.41% +0.27%] index_select random_sorted : Elapsed 0.051 ms (5.143 ms / 100) B = [5, 20, 16, 40] (stride (1, 200, 4000, 5)) A = [5, 20, 4, 40] (stride (160, 800, 40, 1)) dim = 2 2.472 -> 2.472 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.28% +0.20%] index_add_ linear : Elapsed 0.025 ms (2.474 ms / 100) 2.428 -> 2.426 ( -0.08%) [ +0.00% +0.00% +0.08% / +0.00% +0.12% -0.08%] index_copy_ linear : Elapsed 0.024 ms (2.428 ms / 100) 2.472 -> 2.471 ( -0.04%) [ +0.00% +0.16% +0.04% / -0.04% +0.36% +0.32%] index_add_ reverse : Elapsed 0.025 ms (2.472 ms / 100) 2.421 -> 2.428 ( +0.29%) [ +0.12% +0.00% +0.17% / +0.29% +0.54% +0.54%] index_copy_ reverse : Elapsed 0.024 ms (2.424 ms / 100) 2.471 -> 2.470 ( -0.04%) [ +0.04% +0.00% +0.12% / -0.04% +0.45% +0.24%] index_add_ spread : Elapsed 0.025 ms (2.472 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.17% +0.21% +0.00% / +0.08% +0.66% +0.29%] index_copy_ spread : Elapsed 0.024 ms (2.427 ms / 100) 2.466 -> 2.469 ( +0.12%) [ +0.00% +0.28% +0.04% / +0.12% +1.14% +0.69%] index_add_ strided 3 : Elapsed 0.025 ms (2.466 ms / 100) 2.420 -> 2.426 ( +0.25%) [ +0.17% +0.29% +0.00% / +0.25% +0.79% +0.74%] index_copy_ strided 3 : Elapsed 0.024 ms (2.424 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.40% +0.36%] index_add_ strided 5 : Elapsed 0.025 ms (2.472 ms / 100) 2.423 -> 2.423 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.41% +0.25%] index_copy_ strided 5 : Elapsed 0.024 ms (2.423 ms / 100) 2.471 -> 2.475 ( +0.16%) [ +0.20% +0.20% +0.00% / +0.16% +0.20% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.476 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.08% +0.00% +0.12% / +0.12% +0.54% +0.08%] index_copy_ strided 7 : Elapsed 0.024 ms (2.426 ms / 100) 2.474 -> 2.474 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.00% +0.00% +0.08%] index_add_ perm : Elapsed 0.025 ms (2.474 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.21% +0.29%] index_copy_ perm : Elapsed 0.024 ms (2.426 ms / 100) 2.472 -> 2.473 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.12% +0.16%] index_add_ perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) 2.423 -> 2.424 ( +0.04%) [ +0.17% +0.04% +0.00% / +0.21% +0.17% +0.04%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.427 ms / 100) 5.152 -> 5.161 ( +0.17%) [ +0.08% +0.00% +0.17% / +0.17% +0.37% +0.31%] index_select const : Elapsed 0.052 ms (5.156 ms / 100) 5.206 -> 5.209 ( +0.06%) [ +0.00% +0.10% +0.17% / +0.06% +0.36% +0.29%] index_select wrap : Elapsed 0.052 ms (5.206 ms / 100) 5.218 -> 5.225 ( +0.13%) [ +0.11% +0.00% +0.08% / +0.13% +0.44% +0.48%] index_select linear : Elapsed 0.052 ms (5.224 ms / 100) 5.218 -> 5.223 ( +0.10%) [ +0.15% +0.00% +0.02% / +0.10% +0.44% +0.56%] index_select reverse : Elapsed 0.052 ms (5.226 ms / 100) 5.174 -> 5.170 ( -0.08%) [ +0.02% +0.00% +0.00% / -0.08% +0.23% +0.29%] index_select skip64 : Elapsed 0.052 ms (5.175 ms / 100) 5.151 -> 5.165 ( +0.27%) [ +0.00% +0.37% +0.17% / +0.27% +0.47% +0.52%] index_select skip256 : Elapsed 0.052 ms (5.151 ms / 100) 5.201 -> 5.211 ( +0.19%) [ +0.15% +0.00% +0.00% / +0.19% +0.27% +0.35%] index_select spread : Elapsed 0.052 ms (5.209 ms / 100) 5.204 -> 5.204 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.42% +0.17%] index_select strided 3 : Elapsed 0.052 ms (5.204 ms / 100) 5.211 -> 5.205 ( -0.12%) [ +0.00% +0.12% +0.10% / -0.12% +0.25% +0.08%] index_select random : Elapsed 0.052 ms (5.211 ms / 100) 5.190 -> 5.195 ( +0.10%) [ +0.17% +0.00% +0.12% / +0.10% +0.50% +0.60%] index_select random_sorted : Elapsed 0.052 ms (5.199 ms / 100) B = [5, 20, 16, 40] (stride (1, 5, 4000, 100)) A = [5, 20, 4, 40] (stride (3200, 4, 1, 80)) dim = 2 2.619 -> 2.618 ( -0.04%) [ +0.08% +0.00% +0.19% / -0.04% +0.38% +0.15%] index_add_ linear : Elapsed 0.026 ms (2.621 ms / 100) 2.574 -> 2.576 ( +0.08%) [ +0.08% +0.00% +0.12% / +0.08% +0.19% +0.31%] index_copy_ linear : Elapsed 0.026 ms (2.576 ms / 100) 2.618 -> 2.617 ( -0.04%) [ +0.15% +0.11% +0.00% / -0.04% +0.04% +0.04%] index_add_ reverse : Elapsed 0.026 ms (2.622 ms / 100) 2.572 -> 2.575 ( +0.12%) [ +0.16% +0.08% +0.00% / +0.16% +0.31% +0.12%] index_copy_ reverse : Elapsed 0.026 ms (2.576 ms / 100) 2.625 -> 2.624 ( -0.04%) [ +0.15% +0.08% +0.00% / +0.11% -0.04% +0.00%] index_add_ spread : Elapsed 0.026 ms (2.629 ms / 100) 2.576 -> 2.575 ( -0.04%) [ +0.16% +0.08% +0.00% / +0.31% +0.00% -0.04%] index_copy_ spread : Elapsed 0.026 ms (2.580 ms / 100) 2.616 -> 2.620 ( +0.15%) [ +0.23% +0.11% +0.00% / +0.15% +0.34% +0.34%] index_add_ strided 3 : Elapsed 0.026 ms (2.622 ms / 100) 2.569 -> 2.570 ( +0.04%) [ +0.31% +0.08% +0.00% / +0.04% +0.47% +0.47%] index_copy_ strided 3 : Elapsed 0.026 ms (2.577 ms / 100) 2.621 -> 2.622 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.15% +0.34%] index_add_ strided 5 : Elapsed 0.026 ms (2.623 ms / 100) 2.577 -> 2.576 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% -0.04% +0.04%] index_copy_ strided 5 : Elapsed 0.026 ms (2.577 ms / 100) 2.621 -> 2.623 ( +0.08%) [ +0.00% +0.15% +0.00% / +0.08% +0.38% +0.50%] index_add_ strided 7 : Elapsed 0.026 ms (2.621 ms / 100) 2.574 -> 2.578 ( +0.16%) [ +0.04% +0.19% +0.00% / +0.19% +0.19% +0.16%] index_copy_ strided 7 : Elapsed 0.026 ms (2.575 ms / 100) 2.617 -> 2.617 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.34% +0.38%] index_add_ perm : Elapsed 0.026 ms (2.619 ms / 100) 2.575 -> 2.574 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.08% +0.08%] index_copy_ perm : Elapsed 0.026 ms (2.575 ms / 100) 2.619 -> 2.617 ( -0.08%) [ +0.00% +0.00% +0.11% / -0.08% +0.11% +0.19%] index_add_ perm_sorted : Elapsed 0.026 ms (2.619 ms / 100) 2.574 -> 2.573 ( -0.04%) [ +0.19% +0.00% +0.00% / -0.04% -0.04% +0.12%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.579 ms / 100) 6.020 -> 6.012 ( -0.13%) [ +0.00% +0.05% +0.00% / -0.13% +0.28% +0.33%] index_select const : Elapsed 0.060 ms (6.020 ms / 100) 6.017 -> 6.028 ( +0.18%) [ +0.08% +0.07% +0.00% / +0.18% +0.27% +0.28%] index_select wrap : Elapsed 0.060 ms (6.022 ms / 100) 6.030 -> 6.031 ( +0.02%) [ +0.00% +0.08% +0.07% / +0.02% +0.17% +0.32%] index_select linear : Elapsed 0.060 ms (6.030 ms / 100) 6.023 -> 6.018 ( -0.08%) [ +0.00% +0.17% +0.02% / -0.08% +0.25% +0.22%] index_select reverse : Elapsed 0.060 ms (6.023 ms / 100) 6.022 -> 6.028 ( +0.10%) [ +0.12% +0.00% +0.00% / +0.10% +0.32% +0.43%] index_select skip64 : Elapsed 0.060 ms (6.029 ms / 100) 6.029 -> 6.028 ( -0.02%) [ +0.00% +0.03% +0.00% / -0.02% +0.32% +0.36%] index_select skip256 : Elapsed 0.060 ms (6.029 ms / 100) 6.015 -> 6.017 ( +0.03%) [ +0.07% +0.00% +0.12% / +0.03% +0.37% +0.38%] index_select spread : Elapsed 0.060 ms (6.019 ms / 100) 6.016 -> 6.017 ( +0.02%) [ +0.08% +0.02% +0.00% / +0.02% +0.43% +0.42%] index_select strided 3 : Elapsed 0.060 ms (6.021 ms / 100) 6.025 -> 6.030 ( +0.08%) [ +0.03% +0.10% +0.00% / +0.08% +0.32% +0.18%] index_select random : Elapsed 0.060 ms (6.027 ms / 100) 6.022 -> 6.022 ( +0.00%) [ +0.03% +0.10% +0.00% / +0.00% +0.28% +0.25%] index_select random_sorted : Elapsed 0.060 ms (6.024 ms / 100) B = [5, 20, 16, 40] (stride (1, 5, 4000, 100)) A = [5, 20, 4, 40] (stride (1, 5, 4000, 100)) dim = 2 2.601 -> 2.600 ( -0.04%) [ +0.04% +0.00% +0.08% / -0.04% +0.38% +0.31%] index_add_ linear : Elapsed 0.026 ms (2.602 ms / 100) 2.550 -> 2.550 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.31% +0.59%] index_copy_ linear : Elapsed 0.026 ms (2.550 ms / 100) 2.598 -> 2.603 ( +0.19%) [ +0.12% +0.00% +0.04% / +0.19% +0.31% +0.27%] index_add_ reverse : Elapsed 0.026 ms (2.601 ms / 100) 2.546 -> 2.553 ( +0.27%) [ +0.00% +0.16% +0.12% / +0.27% +0.43% +0.47%] index_copy_ reverse : Elapsed 0.025 ms (2.546 ms / 100) 2.599 -> 2.602 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.54% +0.42%] index_add_ spread : Elapsed 0.026 ms (2.599 ms / 100) 2.549 -> 2.550 ( +0.04%) [ +0.00% +0.16% +0.08% / +0.04% +0.27% +0.27%] index_copy_ spread : Elapsed 0.025 ms (2.549 ms / 100) 2.596 -> 2.598 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.50% +0.42%] index_add_ strided 3 : Elapsed 0.026 ms (2.600 ms / 100) 2.546 -> 2.550 ( +0.16%) [ +0.00% +0.00% +0.20% / +0.16% +0.55% +0.35%] index_copy_ strided 3 : Elapsed 0.025 ms (2.546 ms / 100) 2.595 -> 2.599 ( +0.15%) [ +0.15% +0.27% +0.00% / +0.15% +0.46% +0.42%] index_add_ strided 5 : Elapsed 0.026 ms (2.599 ms / 100) 2.548 -> 2.552 ( +0.16%) [ +0.00% +0.20% +0.12% / +0.16% +0.24% +0.35%] index_copy_ strided 5 : Elapsed 0.025 ms (2.548 ms / 100) 2.600 -> 2.604 ( +0.15%) [ +0.00% +0.08% +0.04% / +0.15% +0.35% +0.54%] index_add_ strided 7 : Elapsed 0.026 ms (2.600 ms / 100) 2.551 -> 2.554 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.16% +0.16% +0.12%] index_copy_ strided 7 : Elapsed 0.026 ms (2.552 ms / 100) 2.598 -> 2.600 ( +0.08%) [ +0.15% +0.15% +0.00% / +0.08% +0.08% +0.19%] index_add_ perm : Elapsed 0.026 ms (2.602 ms / 100) 2.545 -> 2.552 ( +0.28%) [ +0.04% +0.04% +0.00% / +0.28% +0.47% +0.31%] index_copy_ perm : Elapsed 0.025 ms (2.546 ms / 100) 2.598 -> 2.600 ( +0.08%) [ +0.12% +0.27% +0.00% / +0.19% +0.38% +0.08%] index_add_ perm_sorted : Elapsed 0.026 ms (2.601 ms / 100) 2.550 -> 2.549 ( -0.04%) [ +0.00% +0.12% +0.00% / +0.27% +0.43% -0.04%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.550 ms / 100) 6.005 -> 6.010 ( +0.08%) [ +0.12% +0.07% +0.00% / +0.08% +0.45% +0.45%] index_select const : Elapsed 0.060 ms (6.012 ms / 100) 5.965 -> 5.968 ( +0.05%) [ +0.08% +0.00% +0.00% / +0.05% +0.40% +0.47%] index_select wrap : Elapsed 0.060 ms (5.970 ms / 100) 5.992 -> 5.992 ( +0.00%) [ +0.15% +0.08% +0.00% / +0.00% +0.48% +0.37%] index_select linear : Elapsed 0.060 ms (6.001 ms / 100) 5.994 -> 6.007 ( +0.22%) [ +0.12% +0.03% +0.00% / +0.22% +0.45% +0.48%] index_select reverse : Elapsed 0.060 ms (6.001 ms / 100) 6.012 -> 6.018 ( +0.10%) [ +0.02% +0.00% +0.00% / +0.10% +0.32% +0.32%] index_select skip64 : Elapsed 0.060 ms (6.013 ms / 100) 6.005 -> 6.009 ( +0.07%) [ +0.00% +0.02% +0.10% / +0.07% +0.28% +0.38%] index_select skip256 : Elapsed 0.060 ms (6.005 ms / 100) 5.975 -> 5.979 ( +0.07%) [ +0.08% +0.00% +0.17% / +0.07% +0.44% +0.37%] index_select spread : Elapsed 0.060 ms (5.980 ms / 100) 5.975 -> 5.982 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.12% +0.30% +0.25%] index_select strided 3 : Elapsed 0.060 ms (5.975 ms / 100) 5.973 -> 5.979 ( +0.10%) [ +0.00% +0.03% +0.03% / +0.10% +0.30% +0.33%] index_select random : Elapsed 0.060 ms (5.973 ms / 100) 5.984 -> 5.987 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.28% +0.27%] index_select random_sorted : Elapsed 0.060 ms (5.986 ms / 100) B = [5, 20, 16, 40] (stride (16, 80, 1, 1600)) A = [5, 20, 4, 40] (stride (80, 1, 20, 400)) dim = 2 2.654 -> 2.658 ( +0.15%) [ +0.45% +0.00% +0.26% / +0.15% +0.68% +0.72%] index_add_ linear : Elapsed 0.027 ms (2.666 ms / 100) 2.619 -> 2.621 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.19% +0.27%] index_copy_ linear : Elapsed 0.026 ms (2.621 ms / 100) 2.658 -> 2.658 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.00% +0.41% +0.41%] index_add_ reverse : Elapsed 0.027 ms (2.661 ms / 100) 2.619 -> 2.616 ( -0.11%) [ +0.04% +0.00% +0.00% / -0.11% +0.04% +0.27%] index_copy_ reverse : Elapsed 0.026 ms (2.620 ms / 100) 2.692 -> 2.702 ( +0.37%) [ +0.00% +0.15% +0.30% / +0.41% +0.41% +0.37%] index_add_ spread : Elapsed 0.027 ms (2.692 ms / 100) 2.715 -> 2.718 ( +0.11%) [ +0.18% +0.00% +0.15% / +0.15% +0.41% +0.11%] index_copy_ spread : Elapsed 0.027 ms (2.720 ms / 100) 2.696 -> 2.701 ( +0.19%) [ +0.00% +0.07% +0.26% / +0.19% +0.30% +0.59%] index_add_ strided 3 : Elapsed 0.027 ms (2.696 ms / 100) 2.716 -> 2.718 ( +0.07%) [ +0.18% +0.00% +0.11% / +0.07% +0.18% +0.33%] index_copy_ strided 3 : Elapsed 0.027 ms (2.721 ms / 100) 2.697 -> 2.701 ( +0.15%) [ +0.00% +0.30% +0.04% / +0.15% +0.26% +0.30%] index_add_ strided 5 : Elapsed 0.027 ms (2.697 ms / 100) 2.721 -> 2.723 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.07% +0.11%] index_copy_ strided 5 : Elapsed 0.027 ms (2.725 ms / 100) 2.695 -> 2.694 ( -0.04%) [ +0.00% +0.07% +0.33% / -0.04% +0.37% +0.59%] index_add_ strided 7 : Elapsed 0.027 ms (2.695 ms / 100) 2.715 -> 2.723 ( +0.29%) [ +0.22% +0.07% +0.00% / +0.29% +0.52% +0.33%] index_copy_ strided 7 : Elapsed 0.027 ms (2.721 ms / 100) 2.700 -> 2.705 ( +0.19%) [ +0.15% +0.11% +0.00% / +0.19% +0.70% +0.70%] index_add_ perm : Elapsed 0.027 ms (2.704 ms / 100) 2.714 -> 2.725 ( +0.41%) [ +0.15% +0.26% +0.00% / +0.41% +0.52% +0.52%] index_copy_ perm : Elapsed 0.027 ms (2.718 ms / 100) 2.697 -> 2.701 ( +0.15%) [ +0.00% +0.22% +0.15% / +0.15% +0.63% +0.52%] index_add_ perm_sorted : Elapsed 0.027 ms (2.697 ms / 100) 2.718 -> 2.719 ( +0.04%) [ +0.11% +0.00% +0.22% / +0.04% +0.26% +0.18%] index_copy_ perm_sorted : Elapsed 0.027 ms (2.721 ms / 100) 6.052 -> 6.056 ( +0.07%) [ +0.02% +0.08% +0.00% / +0.07% +0.36% +0.31%] index_select const : Elapsed 0.061 ms (6.053 ms / 100) 6.048 -> 6.054 ( +0.10%) [ +0.13% +0.08% +0.00% / +0.10% +0.25% +0.30%] index_select wrap : Elapsed 0.061 ms (6.056 ms / 100) 6.056 -> 6.058 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.03% +0.28% +0.28%] index_select linear : Elapsed 0.061 ms (6.058 ms / 100) 6.062 -> 6.070 ( +0.13%) [ +0.07% +0.00% +0.13% / +0.13% +0.36% +0.45%] index_select reverse : Elapsed 0.061 ms (6.066 ms / 100) 6.052 -> 6.057 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.28% +0.30%] index_select skip64 : Elapsed 0.061 ms (6.057 ms / 100) 6.055 -> 6.053 ( -0.03%) [ +0.00% +0.08% +0.00% / -0.03% +0.26% +0.30%] index_select skip256 : Elapsed 0.061 ms (6.055 ms / 100) 6.044 -> 6.054 ( +0.17%) [ +0.05% +0.00% +0.07% / +0.17% +0.35% +0.31%] index_select spread : Elapsed 0.060 ms (6.047 ms / 100) 6.047 -> 6.047 ( +0.00%) [ +0.00% +0.05% +0.10% / +0.00% +0.26% +0.26%] index_select strided 3 : Elapsed 0.060 ms (6.047 ms / 100) 6.047 -> 6.058 ( +0.18%) [ +0.15% +0.20% +0.00% / +0.18% +0.21% +0.45%] index_select random : Elapsed 0.061 ms (6.056 ms / 100) 6.047 -> 6.056 ( +0.15%) [ +0.00% +0.02% +0.03% / +0.15% +0.28% +0.26%] index_select random_sorted : Elapsed 0.060 ms (6.047 ms / 100) B = [5, 20, 16, 40] (stride (20, 1, 100, 1600)) A = [5, 20, 4, 40] (stride (20, 1, 4000, 100)) dim = 2 1.146 -> 1.145 ( -0.09%) [ +0.17% +0.26% +0.00% / -0.09% +0.09% +0.17%] index_add_ linear : Elapsed 0.011 ms (1.148 ms / 100) 1.145 -> 1.148 ( +0.26%) [ +0.00% +0.35% +0.17% / +0.52% +0.26% +0.26%] index_copy_ linear : Elapsed 0.011 ms (1.145 ms / 100) 1.149 -> 1.148 ( -0.09%) [ +0.09% +0.00% +0.17% / -0.09% +0.00% -0.09%] index_add_ reverse : Elapsed 0.011 ms (1.150 ms / 100) 1.148 -> 1.150 ( +0.17%) [ +0.00% +0.17% +0.17% / +0.17% +0.44% +0.35%] index_copy_ reverse : Elapsed 0.011 ms (1.148 ms / 100) 1.153 -> 1.152 ( -0.09%) [ +0.00% +0.35% +0.00% / -0.09% -0.09% +0.09%] index_add_ spread : Elapsed 0.012 ms (1.153 ms / 100) 1.152 -> 1.150 ( -0.17%) [ +0.09% +0.09% +0.00% / +0.17% -0.17% +0.17%] index_copy_ spread : Elapsed 0.012 ms (1.153 ms / 100) 1.151 -> 1.151 ( +0.00%) [ +0.17% +0.00% +0.35% / +0.09% +0.00% +0.00%] index_add_ strided 3 : Elapsed 0.012 ms (1.153 ms / 100) 1.150 -> 1.152 ( +0.17%) [ +0.61% +0.35% +0.00% / +0.17% +0.26% +0.17%] index_copy_ strided 3 : Elapsed 0.012 ms (1.157 ms / 100) 1.147 -> 1.148 ( +0.09%) [ +0.09% +0.17% +0.00% / +0.09% +0.26% +0.09%] index_add_ strided 5 : Elapsed 0.011 ms (1.148 ms / 100) 1.148 -> 1.145 ( -0.26%) [ +0.00% +0.44% +0.09% / +0.09% -0.09% -0.26%] index_copy_ strided 5 : Elapsed 0.011 ms (1.148 ms / 100) 1.150 -> 1.150 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +1.22% +1.13%] index_add_ strided 7 : Elapsed 0.012 ms (1.151 ms / 100) 1.144 -> 1.154 ( +0.87%) [ +0.35% +0.00% +0.44% / +0.87% +1.14% +1.05%] index_copy_ strided 7 : Elapsed 0.011 ms (1.148 ms / 100) 1.147 -> 1.147 ( +0.00%) [ +0.17% +0.00% +0.09% / +0.00% +0.35% +0.26%] index_add_ perm : Elapsed 0.011 ms (1.149 ms / 100) 1.145 -> 1.147 ( +0.17%) [ +0.09% +0.00% +0.09% / +0.17% +0.17% +0.26%] index_copy_ perm : Elapsed 0.011 ms (1.146 ms / 100) 1.146 -> 1.149 ( +0.26%) [ +0.00% +0.00% +0.00% / +0.44% +0.26% +0.26%] index_add_ perm_sorted : Elapsed 0.011 ms (1.146 ms / 100) 1.141 -> 1.146 ( +0.44%) [ +0.26% +0.00% +0.18% / +0.53% +0.44% +0.44%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.144 ms / 100) 2.121 -> 2.120 ( -0.05%) [ +0.19% +0.09% +0.00% / -0.05% +0.24% +0.05%] index_select const : Elapsed 0.021 ms (2.125 ms / 100) 2.166 -> 2.170 ( +0.18%) [ +0.23% +0.18% +0.00% / +0.18% +0.97% +1.15%] index_select wrap : Elapsed 0.022 ms (2.171 ms / 100) 2.182 -> 2.175 ( -0.32%) [ +0.05% +0.23% +0.00% / -0.32% +0.78% +0.73%] index_select linear : Elapsed 0.022 ms (2.183 ms / 100) 2.123 -> 2.123 ( +0.00%) [ +0.05% +0.28% +0.00% / +0.00% +0.71% +0.71%] index_select reverse : Elapsed 0.021 ms (2.124 ms / 100) 2.120 -> 2.119 ( -0.05%) [ +0.00% +0.33% +0.28% / +0.00% -0.05% +0.05%] index_select skip64 : Elapsed 0.021 ms (2.120 ms / 100) 2.122 -> 2.114 ( -0.38%) [ +0.00% +0.05% +0.05% / +0.05% -0.24% -0.38%] index_select skip256 : Elapsed 0.021 ms (2.122 ms / 100) 2.159 -> 2.160 ( +0.05%) [ +0.09% +0.00% +0.05% / +0.19% +0.05% +0.09%] index_select spread : Elapsed 0.022 ms (2.161 ms / 100) 2.181 -> 2.180 ( -0.05%) [ +0.46% +0.32% +0.00% / +0.28% -0.05% +0.05%] index_select strided 3 : Elapsed 0.022 ms (2.191 ms / 100) 2.165 -> 2.166 ( +0.05%) [ +0.00% +0.14% +0.00% / +0.05% +0.83% +1.06%] index_select random : Elapsed 0.022 ms (2.165 ms / 100) 2.156 -> 2.160 ( +0.19%) [ +0.00% +0.23% +0.14% / +0.28% +0.19% +0.23%] index_select random_sorted : Elapsed 0.022 ms (2.156 ms / 100) B = [5, 20, 16, 40] (stride (1, 5, 100, 1600)) A = [5, 20, 4, 40] (stride (1, 20, 5, 400)) dim = 2 2.626 -> 2.626 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.08% +0.15%] index_add_ linear : Elapsed 0.026 ms (2.626 ms / 100) 2.572 -> 2.570 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% -0.04% -0.04%] index_copy_ linear : Elapsed 0.026 ms (2.572 ms / 100) 2.623 -> 2.625 ( +0.08%) [ +0.23% +0.23% +0.00% / +0.11% +0.15% +0.08%] index_add_ reverse : Elapsed 0.026 ms (2.629 ms / 100) 2.567 -> 2.572 ( +0.19%) [ +0.00% +0.23% +0.23% / +0.19% +0.19% +0.19%] index_copy_ reverse : Elapsed 0.026 ms (2.567 ms / 100) 2.629 -> 2.626 ( -0.11%) [ +0.15% +0.08% +0.00% / -0.04% -0.11% +0.04%] index_add_ spread : Elapsed 0.026 ms (2.633 ms / 100) 2.574 -> 2.572 ( -0.08%) [ +0.12% +0.16% +0.00% / +0.19% -0.08% +0.19%] index_copy_ spread : Elapsed 0.026 ms (2.577 ms / 100) 2.625 -> 2.626 ( +0.04%) [ +0.19% +0.11% +0.00% / +0.04% +0.11% +0.19%] index_add_ strided 3 : Elapsed 0.026 ms (2.630 ms / 100) 2.574 -> 2.574 ( +0.00%) [ +0.12% +0.00% +0.16% / +0.00% +0.00% +0.08%] index_copy_ strided 3 : Elapsed 0.026 ms (2.577 ms / 100) 2.627 -> 2.626 ( -0.04%) [ +0.11% +0.00% +0.08% / -0.04% +0.38% +0.11%] index_add_ strided 5 : Elapsed 0.026 ms (2.630 ms / 100) 2.573 -> 2.577 ( +0.16%) [ +0.00% +0.12% +0.19% / +0.19% +0.23% +0.16%] index_copy_ strided 5 : Elapsed 0.026 ms (2.573 ms / 100) 2.624 -> 2.626 ( +0.08%) [ +0.27% +0.00% +0.15% / +0.08% +0.34% +0.50%] index_add_ strided 7 : Elapsed 0.026 ms (2.631 ms / 100) 2.575 -> 2.577 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.12% +0.08% +0.31%] index_copy_ strided 7 : Elapsed 0.026 ms (2.576 ms / 100) 2.626 -> 2.630 ( +0.15%) [ +0.11% +0.00% +0.19% / +0.15% +0.15% +0.15%] index_add_ perm : Elapsed 0.026 ms (2.629 ms / 100) 2.571 -> 2.572 ( +0.04%) [ +0.12% +0.00% +0.19% / +0.08% +0.08% +0.04%] index_copy_ perm : Elapsed 0.026 ms (2.574 ms / 100) 2.626 -> 2.626 ( +0.00%) [ +0.00% +0.08% +0.11% / +0.00% +0.30% +0.30%] index_add_ perm_sorted : Elapsed 0.026 ms (2.626 ms / 100) 2.574 -> 2.569 ( -0.19%) [ +0.08% +0.16% +0.00% / +0.00% +0.00% -0.19%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.576 ms / 100) 6.015 -> 6.009 ( -0.10%) [ +0.00% +0.00% +0.02% / -0.10% +0.45% +0.45%] index_select const : Elapsed 0.060 ms (6.015 ms / 100) 6.013 -> 6.014 ( +0.02%) [ +0.08% +0.17% +0.00% / +0.02% +0.33% +0.28%] index_select wrap : Elapsed 0.060 ms (6.018 ms / 100) 6.029 -> 6.024 ( -0.08%) [ +0.00% +0.05% +0.05% / -0.08% +0.25% +0.23%] index_select linear : Elapsed 0.060 ms (6.029 ms / 100) 6.035 -> 6.032 ( -0.05%) [ +0.00% +0.02% +0.03% / -0.05% +0.40% +0.33%] index_select reverse : Elapsed 0.060 ms (6.035 ms / 100) 6.021 -> 6.021 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.35% +0.32%] index_select skip64 : Elapsed 0.060 ms (6.026 ms / 100) 6.017 -> 6.028 ( +0.18%) [ +0.05% +0.00% +0.17% / +0.18% +0.43% +0.38%] index_select skip256 : Elapsed 0.060 ms (6.020 ms / 100) 6.009 -> 6.014 ( +0.08%) [ +0.13% +0.10% +0.00% / +0.08% +0.42% +0.40%] index_select spread : Elapsed 0.060 ms (6.017 ms / 100) 6.017 -> 6.028 ( +0.18%) [ +0.10% +0.12% +0.00% / +0.18% +0.47% +0.35%] index_select strided 3 : Elapsed 0.060 ms (6.023 ms / 100) 6.022 -> 6.022 ( +0.00%) [ +0.05% +0.12% +0.00% / +0.00% +0.37% +0.43%] index_select random : Elapsed 0.060 ms (6.025 ms / 100) 6.017 -> 6.019 ( +0.03%) [ +0.05% +0.00% +0.07% / +0.03% +0.25% +0.32%] index_select random_sorted : Elapsed 0.060 ms (6.020 ms / 100) out_shape = [5, 20, 4, 16] in_shape = [5, 20, 4, 40] idx_dim = 3 B = [5, 20, 4, 16] (stride (1280, 16, 320, 1)) A = [5, 20, 4, 40] (stride (1, 800, 200, 5)) dim = 3 3.934 -> 3.940 ( +0.15%) [ +0.13% +0.00% +0.13% / +0.15% +0.66% +0.64%] index_select const : Elapsed 0.039 ms (3.939 ms / 100) 3.940 -> 3.942 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.58% +0.66%] index_select wrap : Elapsed 0.039 ms (3.944 ms / 100) 3.937 -> 3.940 ( +0.08%) [ +0.10% +0.05% +0.00% / +0.08% +0.53% +0.53%] index_select linear : Elapsed 0.039 ms (3.941 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.51% +0.41%] index_select reverse : Elapsed 0.039 ms (3.937 ms / 100) 3.932 -> 3.942 ( +0.25%) [ +0.00% +0.08% +0.18% / +0.25% +0.69% +0.53%] index_select skip64 : Elapsed 0.039 ms (3.932 ms / 100) 3.940 -> 3.938 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.51% +0.46%] index_select skip256 : Elapsed 0.039 ms (3.944 ms / 100) 3.936 -> 3.935 ( -0.03%) [ +0.05% +0.00% +0.00% / -0.03% +0.33% +0.36%] index_select spread : Elapsed 0.039 ms (3.938 ms / 100) 3.932 -> 3.936 ( +0.10%) [ +0.20% +0.00% +0.18% / +0.10% +0.61% +0.43%] index_select strided 3 : Elapsed 0.039 ms (3.940 ms / 100) 3.927 -> 3.927 ( +0.00%) [ +0.10% +0.00% +0.03% / +0.00% +0.41% +0.46%] index_select strided 5 : Elapsed 0.039 ms (3.931 ms / 100) 3.932 -> 3.938 ( +0.15%) [ +0.00% +0.05% +0.10% / +0.15% +0.36% +0.51%] index_select strided 7 : Elapsed 0.039 ms (3.932 ms / 100) 3.934 -> 3.936 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.51% +0.53%] index_select strided 8 : Elapsed 0.039 ms (3.936 ms / 100) 3.918 -> 3.931 ( +0.33%) [ +0.18% +0.15% +0.00% / +0.33% +0.59% +0.38%] index_select strided 16 : Elapsed 0.039 ms (3.925 ms / 100) 3.925 -> 3.924 ( -0.03%) [ +0.05% +0.10% +0.00% / -0.03% +0.41% +0.46%] index_select random : Elapsed 0.039 ms (3.927 ms / 100) 3.935 -> 3.936 ( +0.03%) [ +0.28% +0.28% +0.00% / +0.03% +0.46% +0.53%] index_select random_sorted : Elapsed 0.039 ms (3.946 ms / 100) 3.932 -> 3.930 ( -0.05%) [ +0.00% +0.03% +0.00% / -0.05% +0.51% +0.46%] index_select perm : Elapsed 0.039 ms (3.932 ms / 100) 3.926 -> 3.926 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.23% +0.38%] index_select perm_sorted : Elapsed 0.039 ms (3.926 ms / 100) B = [5, 20, 4, 16] (stride (1280, 1, 320, 20)) A = [5, 20, 4, 40] (stride (3200, 4, 1, 80)) dim = 3 3.505 -> 3.510 ( +0.14%) [ +0.09% +0.06% +0.00% / +0.14% +0.83% +0.74%] index_select const : Elapsed 0.035 ms (3.508 ms / 100) 3.496 -> 3.496 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.74% +0.80%] index_select wrap : Elapsed 0.035 ms (3.496 ms / 100) 3.501 -> 3.501 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.89% +0.86%] index_select linear : Elapsed 0.035 ms (3.502 ms / 100) 3.497 -> 3.499 ( +0.06%) [ +0.06% +0.00% +0.09% / +0.06% +1.03% +1.03%] index_select reverse : Elapsed 0.035 ms (3.499 ms / 100) 3.497 -> 3.497 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +1.03% +0.92%] index_select skip64 : Elapsed 0.035 ms (3.499 ms / 100) 3.503 -> 3.508 ( +0.14%) [ +0.00% +0.17% +0.11% / +0.14% +0.83% +0.83%] index_select skip256 : Elapsed 0.035 ms (3.503 ms / 100) 3.496 -> 3.498 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +1.00% +1.00%] index_select spread : Elapsed 0.035 ms (3.497 ms / 100) 3.499 -> 3.502 ( +0.09%) [ +0.00% +0.03% +0.00% / +0.09% +0.71% +0.83%] index_select strided 3 : Elapsed 0.035 ms (3.499 ms / 100) 3.501 -> 3.502 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.71% +0.91%] index_select strided 5 : Elapsed 0.035 ms (3.502 ms / 100) 3.494 -> 3.495 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.74% +0.74%] index_select strided 7 : Elapsed 0.035 ms (3.495 ms / 100) 3.506 -> 3.510 ( +0.11%) [ +0.00% +0.14% +0.14% / +0.11% +0.77% +0.74%] index_select strided 8 : Elapsed 0.035 ms (3.506 ms / 100) 3.498 -> 3.499 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.83% +0.94%] index_select strided 16 : Elapsed 0.035 ms (3.499 ms / 100) 3.512 -> 3.516 ( +0.11%) [ +0.03% +0.00% +0.00% / +0.11% +0.80% +0.77%] index_select random : Elapsed 0.035 ms (3.513 ms / 100) 3.495 -> 3.498 ( +0.09%) [ +0.06% +0.00% +0.06% / +0.09% +0.94% +0.83%] index_select random_sorted : Elapsed 0.035 ms (3.497 ms / 100) 3.499 -> 3.506 ( +0.20%) [ +0.03% +0.03% +0.00% / +0.20% +0.94% +1.03%] index_select perm : Elapsed 0.035 ms (3.500 ms / 100) 3.509 -> 3.513 ( +0.11%) [ +0.09% +0.06% +0.00% / +0.11% +0.80% +0.80%] index_select perm_sorted : Elapsed 0.035 ms (3.512 ms / 100) B = [5, 20, 4, 16] (stride (1, 320, 80, 5)) A = [5, 20, 4, 40] (stride (3200, 40, 800, 1)) dim = 3 3.948 -> 3.953 ( +0.13%) [ +0.10% +0.00% +0.15% / +0.13% +0.61% +0.48%] index_select const : Elapsed 0.040 ms (3.952 ms / 100) 3.938 -> 3.939 ( +0.03%) [ +0.00% +0.00% +0.05% / +0.03% +0.51% +0.51%] index_select wrap : Elapsed 0.039 ms (3.938 ms / 100) 3.937 -> 3.936 ( -0.03%) [ +0.05% +0.00% +0.00% / -0.03% +0.41% +0.46%] index_select linear : Elapsed 0.039 ms (3.939 ms / 100) 3.919 -> 3.926 ( +0.18%) [ +0.05% +0.05% +0.00% / +0.18% +0.54% +0.51%] index_select reverse : Elapsed 0.039 ms (3.921 ms / 100) 3.933 -> 3.937 ( +0.10%) [ +0.00% +0.00% +0.03% / +0.10% +0.69% +0.69%] index_select skip64 : Elapsed 0.039 ms (3.933 ms / 100) 3.948 -> 3.953 ( +0.13%) [ +0.00% +0.15% +0.05% / +0.13% +0.48% +0.53%] index_select skip256 : Elapsed 0.039 ms (3.948 ms / 100) 3.933 -> 3.935 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.46% +0.43%] index_select spread : Elapsed 0.039 ms (3.937 ms / 100) 3.952 -> 3.953 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.38% +0.33%] index_select strided 3 : Elapsed 0.040 ms (3.954 ms / 100) 3.923 -> 3.924 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.38% +0.38%] index_select strided 5 : Elapsed 0.039 ms (3.925 ms / 100) 3.958 -> 3.958 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.38% +0.38%] index_select strided 7 : Elapsed 0.040 ms (3.958 ms / 100) 3.941 -> 3.944 ( +0.08%) [ +0.03% +0.00% +0.00% / +0.08% +0.41% +0.38%] index_select strided 8 : Elapsed 0.039 ms (3.942 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.05% +0.00% +0.15% / +0.05% +0.48% +0.56%] index_select strided 16 : Elapsed 0.039 ms (3.925 ms / 100) 3.932 -> 3.937 ( +0.13%) [ +0.00% +0.08% +0.03% / +0.13% +0.71% +0.71%] index_select random : Elapsed 0.039 ms (3.932 ms / 100) 3.917 -> 3.923 ( +0.15%) [ +0.10% +0.10% +0.00% / +0.15% +0.36% +0.61%] index_select random_sorted : Elapsed 0.039 ms (3.921 ms / 100) 3.926 -> 3.936 ( +0.25%) [ +0.00% +0.08% +0.23% / +0.25% +0.43% +0.25%] index_select perm : Elapsed 0.039 ms (3.926 ms / 100) 3.931 -> 3.930 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.46% +0.43%] index_select perm_sorted : Elapsed 0.039 ms (3.931 ms / 100) B = [5, 20, 4, 16] (stride (320, 1, 1600, 20)) A = [5, 20, 4, 40] (stride (3200, 1, 800, 20)) dim = 3 3.885 -> 3.889 ( +0.10%) [ +0.13% +0.15% +0.00% / +0.10% +1.08% +0.67%] index_select const : Elapsed 0.039 ms (3.890 ms / 100) 3.905 -> 3.898 ( -0.18%) [ +0.08% +0.05% +0.00% / -0.18% +0.54% +0.54%] index_select wrap : Elapsed 0.039 ms (3.908 ms / 100) 3.886 -> 3.888 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.64% +0.72%] index_select linear : Elapsed 0.039 ms (3.888 ms / 100) 3.905 -> 3.922 ( +0.44%) [ +0.44% +0.00% +0.18% / +0.44% +0.69% +0.90%] index_select reverse : Elapsed 0.039 ms (3.922 ms / 100) 3.887 -> 3.889 ( +0.05%) [ +0.05% +0.00% +0.08% / +0.05% +0.82% +0.87%] index_select skip64 : Elapsed 0.039 ms (3.889 ms / 100) 3.881 -> 3.881 ( +0.00%) [ +0.00% +0.10% +0.15% / +0.00% +0.95% +1.08%] index_select skip256 : Elapsed 0.039 ms (3.881 ms / 100) 3.885 -> 3.894 ( +0.23%) [ +0.44% +0.00% +0.03% / +0.23% +0.95% +0.72%] index_select spread : Elapsed 0.039 ms (3.902 ms / 100) 3.896 -> 3.902 ( +0.15%) [ +0.08% +0.05% +0.00% / +0.15% +0.74% +0.87%] index_select strided 3 : Elapsed 0.039 ms (3.899 ms / 100) 3.882 -> 3.887 ( +0.13%) [ +0.15% +0.00% +0.08% / +0.13% +1.06% +0.93%] index_select strided 5 : Elapsed 0.039 ms (3.888 ms / 100) 3.899 -> 3.899 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.00% +0.77% +0.69%] index_select strided 7 : Elapsed 0.039 ms (3.903 ms / 100) 3.885 -> 3.894 ( +0.23%) [ +0.00% +0.21% +0.18% / +0.23% +0.67% +0.67%] index_select strided 8 : Elapsed 0.039 ms (3.885 ms / 100) 3.879 -> 3.884 ( +0.13%) [ +0.03% +0.00% +0.10% / +0.13% +1.01% +1.11%] index_select strided 16 : Elapsed 0.039 ms (3.880 ms / 100) 3.921 -> 3.921 ( +0.00%) [ +0.10% +0.10% +0.00% / +0.00% +0.54% +0.54%] index_select random : Elapsed 0.039 ms (3.925 ms / 100) 3.887 -> 3.890 ( +0.08%) [ +0.00% +0.05% +0.05% / +0.08% +0.87% +0.98%] index_select random_sorted : Elapsed 0.039 ms (3.887 ms / 100) 3.891 -> 3.894 ( +0.08%) [ +0.18% +0.05% +0.00% / +0.08% +0.75% +0.95%] index_select perm : Elapsed 0.039 ms (3.898 ms / 100) 3.906 -> 3.909 ( +0.08%) [ +0.38% +0.00% +0.33% / +0.08% +0.74% +0.82%] index_select perm_sorted : Elapsed 0.039 ms (3.921 ms / 100) out_shape = [16, 20, 40, 4] in_shape = [5, 20, 40, 4] idx_dim = 0 B = [16, 20, 40, 4] (stride (3200, 1, 80, 20)) dim = 0 fill_cnt = 5 0.598 -> 0.599 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.17% +0.33%] index_fill_ const : Elapsed 0.006 ms (0.599 ms / 100) 0.595 -> 0.595 ( +0.00%) [ +0.00% +0.17% +0.17% / +0.00% +0.67% +0.50%] index_fill_ linear : Elapsed 0.006 ms (0.595 ms / 100) 0.591 -> 0.595 ( +0.68%) [ +1.02% +0.68% +0.00% / +0.68% +1.18% +1.18%] index_fill_ reverse : Elapsed 0.006 ms (0.597 ms / 100) 0.598 -> 0.599 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.17% +0.17%] index_fill_ skip64 : Elapsed 0.006 ms (0.599 ms / 100) 0.597 -> 0.598 ( +0.17%) [ +0.34% +0.17% +0.00% / +0.34% +0.34% +0.17%] index_fill_ skip256 : Elapsed 0.006 ms (0.599 ms / 100) 0.599 -> 0.600 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.17% +0.50%] index_fill_ spread : Elapsed 0.006 ms (0.599 ms / 100) 0.597 -> 0.598 ( +0.17%) [ +0.00% +0.17% +0.50% / +0.17% +0.34% +0.50%] index_fill_ strided 3 : Elapsed 0.006 ms (0.597 ms / 100) 0.594 -> 0.598 ( +0.67%) [ +0.17% +0.51% +0.00% / +0.67% +0.84% +0.84%] index_fill_ strided 5 : Elapsed 0.006 ms (0.595 ms / 100) 0.594 -> 0.597 ( +0.51%) [ +0.00% +0.17% +0.17% / +0.84% +0.51% +0.84%] index_fill_ strided 7 : Elapsed 0.006 ms (0.594 ms / 100) 0.594 -> 0.595 ( +0.17%) [ +0.00% +0.51% +0.00% / +0.17% +1.01% +0.84%] index_fill_ strided 8 : Elapsed 0.006 ms (0.594 ms / 100) 0.596 -> 0.597 ( +0.17%) [ +0.00% +0.00% +0.34% / +0.17% +0.34% +0.34%] index_fill_ random : Elapsed 0.006 ms (0.596 ms / 100) 0.594 -> 0.598 ( +0.67%) [ +0.34% +0.00% +0.84% / +0.67% +0.67% +0.84%] index_fill_ random_sorted : Elapsed 0.006 ms (0.596 ms / 100) 0.594 -> 0.595 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.51% +1.18%] index_fill_ perm : Elapsed 0.006 ms (0.594 ms / 100) 0.594 -> 0.592 ( -0.34%) [ +0.00% +0.34% +0.34% / -0.34% +0.84% +0.51%] index_fill_ perm_sorted : Elapsed 0.006 ms (0.594 ms / 100) B = [16, 20, 40, 4] (stride (3200, 1, 20, 800)) A = [5, 20, 40, 4] (stride (20, 1, 400, 100)) dim = 0 2.418 -> 2.418 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.50% +0.33%] index_add_ linear : Elapsed 0.024 ms (2.420 ms / 100) 2.357 -> 2.360 ( +0.13%) [ +0.13% +0.00% +0.04% / +0.13% +0.47% +0.47%] index_copy_ linear : Elapsed 0.024 ms (2.360 ms / 100) 2.413 -> 2.419 ( +0.25%) [ +0.00% +0.21% +0.12% / +0.25% +0.58% +0.79%] index_add_ reverse : Elapsed 0.024 ms (2.413 ms / 100) 2.353 -> 2.358 ( +0.21%) [ +0.21% +0.00% +0.08% / +0.21% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.024 ms (2.358 ms / 100) 2.416 -> 2.419 ( +0.12%) [ +0.17% +0.00% +0.04% / +0.12% +0.75% +0.46%] index_add_ spread : Elapsed 0.024 ms (2.420 ms / 100) 2.352 -> 2.356 ( +0.17%) [ +0.26% +0.00% +0.21% / +0.17% +0.51% +0.55%] index_copy_ spread : Elapsed 0.024 ms (2.358 ms / 100) 2.420 -> 2.416 ( -0.17%) [ +0.04% +0.00% +0.00% / -0.17% +0.29% +0.37%] index_add_ strided 3 : Elapsed 0.024 ms (2.421 ms / 100) 2.355 -> 2.357 ( +0.08%) [ +0.04% +0.00% +0.17% / +0.08% +0.34% +0.47%] index_copy_ strided 3 : Elapsed 0.024 ms (2.356 ms / 100) 2.415 -> 2.418 ( +0.12%) [ +0.21% +0.00% +0.17% / +0.12% +0.70% +0.66%] index_add_ strided 5 : Elapsed 0.024 ms (2.420 ms / 100) 2.351 -> 2.357 ( +0.26%) [ +0.26% +0.09% +0.00% / +0.26% +0.64% +0.55%] index_copy_ strided 5 : Elapsed 0.024 ms (2.357 ms / 100) 2.412 -> 2.411 ( -0.04%) [ +0.25% +0.25% +0.00% / -0.04% +0.50% +0.62%] index_add_ strided 7 : Elapsed 0.024 ms (2.418 ms / 100) 2.353 -> 2.352 ( -0.04%) [ +0.00% +0.17% +0.00% / -0.04% +0.47% +0.38%] index_copy_ strided 7 : Elapsed 0.024 ms (2.353 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.70% +0.83%] index_add_ perm : Elapsed 0.024 ms (2.413 ms / 100) 2.353 -> 2.361 ( +0.34%) [ +0.13% +0.00% +0.00% / +0.34% +0.64% +0.59%] index_copy_ perm : Elapsed 0.024 ms (2.356 ms / 100) 2.417 -> 2.413 ( -0.17%) [ +0.00% +0.00% +0.04% / -0.17% +0.66% +0.87%] index_add_ perm_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.351 -> 2.352 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.81% +0.94%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.353 ms / 100) 5.219 -> 5.229 ( +0.19%) [ +0.42% +0.00% +0.21% / +0.19% +0.92% +0.92%] index_select const : Elapsed 0.052 ms (5.241 ms / 100) 5.226 -> 5.229 ( +0.06%) [ +0.08% +0.00% +0.06% / +0.06% +0.65% +0.61%] index_select wrap : Elapsed 0.052 ms (5.230 ms / 100) 5.235 -> 5.237 ( +0.04%) [ +0.19% +0.17% +0.00% / +0.04% +0.55% +0.63%] index_select linear : Elapsed 0.052 ms (5.245 ms / 100) 5.221 -> 5.216 ( -0.10%) [ +0.19% +0.00% +0.11% / -0.10% +0.48% +0.54%] index_select reverse : Elapsed 0.052 ms (5.231 ms / 100) 5.224 -> 5.225 ( +0.02%) [ +0.21% +0.00% +0.23% / +0.02% +0.80% +0.57%] index_select skip64 : Elapsed 0.052 ms (5.235 ms / 100) 5.219 -> 5.239 ( +0.38%) [ +0.44% +0.00% +0.33% / +0.38% +0.88% +0.77%] index_select skip256 : Elapsed 0.052 ms (5.242 ms / 100) 5.224 -> 5.227 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.77% +0.65%] index_select spread : Elapsed 0.052 ms (5.224 ms / 100) 5.213 -> 5.214 ( +0.02%) [ +0.04% +0.10% +0.00% / +0.02% +0.63% +0.73%] index_select strided 3 : Elapsed 0.052 ms (5.215 ms / 100) 5.223 -> 5.226 ( +0.06%) [ +0.00% +0.13% +0.04% / +0.06% +0.77% +0.90%] index_select random : Elapsed 0.052 ms (5.223 ms / 100) 5.219 -> 5.220 ( +0.02%) [ +0.00% +0.04% +0.02% / +0.02% +0.84% +0.96%] index_select random_sorted : Elapsed 0.052 ms (5.219 ms / 100) B = [16, 20, 40, 4] (stride (40, 2560, 1, 640)) A = [5, 20, 40, 4] (stride (4, 20, 400, 1)) dim = 0 2.422 -> 2.426 ( +0.17%) [ +0.29% +0.00% +0.25% / +0.17% +0.33% +0.33%] index_add_ linear : Elapsed 0.024 ms (2.429 ms / 100) 2.347 -> 2.356 ( +0.38%) [ +0.00% +0.21% +0.34% / +0.38% +0.43% +0.68%] index_copy_ linear : Elapsed 0.023 ms (2.347 ms / 100) 2.424 -> 2.423 ( -0.04%) [ +0.29% +0.37% +0.00% / -0.04% +0.29% +0.37%] index_add_ reverse : Elapsed 0.024 ms (2.431 ms / 100) 2.349 -> 2.354 ( +0.21%) [ +0.21% +0.38% +0.00% / +0.21% +0.68% +0.68%] index_copy_ reverse : Elapsed 0.024 ms (2.354 ms / 100) 2.426 -> 2.427 ( +0.04%) [ +0.25% +0.12% +0.00% / +0.04% +0.45% +0.33%] index_add_ spread : Elapsed 0.024 ms (2.432 ms / 100) 2.352 -> 2.349 ( -0.13%) [ +0.00% +0.26% +0.21% / -0.13% +0.38% +0.43%] index_copy_ spread : Elapsed 0.024 ms (2.352 ms / 100) 2.421 -> 2.422 ( +0.04%) [ +0.45% +0.00% +0.37% / +0.04% +0.25% +0.41%] index_add_ strided 3 : Elapsed 0.024 ms (2.432 ms / 100) 2.345 -> 2.348 ( +0.13%) [ +0.38% +0.00% +0.43% / +0.13% +0.38% +0.68%] index_copy_ strided 3 : Elapsed 0.024 ms (2.354 ms / 100) 2.434 -> 2.440 ( +0.25%) [ +0.00% +0.00% +0.25% / +0.29% +0.25% +0.37%] index_add_ strided 5 : Elapsed 0.024 ms (2.434 ms / 100) 2.355 -> 2.361 ( +0.25%) [ +0.17% +0.00% +0.13% / +0.25% +0.51% +0.64%] index_copy_ strided 5 : Elapsed 0.024 ms (2.359 ms / 100) 2.429 -> 2.427 ( -0.08%) [ +0.16% +0.00% +0.04% / -0.08% +0.33% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.433 ms / 100) 2.352 -> 2.354 ( +0.09%) [ +0.17% +0.00% +0.04% / +0.09% +0.43% +0.34%] index_copy_ strided 7 : Elapsed 0.024 ms (2.356 ms / 100) 2.424 -> 2.424 ( +0.00%) [ +0.21% +0.21% +0.00% / +0.17% +0.00% +0.41%] index_add_ perm : Elapsed 0.024 ms (2.429 ms / 100) 2.351 -> 2.355 ( +0.17%) [ +0.00% +0.13% +0.09% / +0.17% +0.26% +0.34%] index_copy_ perm : Elapsed 0.024 ms (2.351 ms / 100) 2.431 -> 2.426 ( -0.21%) [ +0.16% +0.25% +0.00% / +0.00% +0.04% -0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.435 ms / 100) 2.351 -> 2.359 ( +0.34%) [ +0.34% +0.43% +0.00% / +0.34% +0.60% +0.55%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.359 ms / 100) 5.052 -> 5.066 ( +0.28%) [ +0.34% +0.00% +0.28% / +0.28% +0.81% +0.95%] index_select const : Elapsed 0.051 ms (5.069 ms / 100) 5.108 -> 5.100 ( -0.16%) [ +0.02% +0.00% +0.00% / -0.16% +0.57% +0.57%] index_select wrap : Elapsed 0.051 ms (5.109 ms / 100) 5.099 -> 5.106 ( +0.14%) [ +0.14% +0.00% +0.04% / +0.14% +0.53% +0.55%] index_select linear : Elapsed 0.051 ms (5.106 ms / 100) 5.074 -> 5.096 ( +0.43%) [ +0.00% +0.00% +0.35% / +0.43% +0.79% +0.63%] index_select reverse : Elapsed 0.051 ms (5.074 ms / 100) 5.060 -> 5.064 ( +0.08%) [ +0.14% +0.18% +0.00% / +0.08% +0.63% +0.63%] index_select skip64 : Elapsed 0.051 ms (5.067 ms / 100) 5.062 -> 5.065 ( +0.06%) [ +0.00% +0.16% +0.02% / +0.06% +0.51% +0.59%] index_select skip256 : Elapsed 0.051 ms (5.062 ms / 100) 5.092 -> 5.099 ( +0.14%) [ +0.04% +0.06% +0.00% / +0.14% +0.49% +0.24%] index_select spread : Elapsed 0.051 ms (5.094 ms / 100) 5.096 -> 5.097 ( +0.02%) [ +0.16% +0.00% +0.12% / +0.02% +0.71% +0.47%] index_select strided 3 : Elapsed 0.051 ms (5.104 ms / 100) 5.096 -> 5.109 ( +0.26%) [ +0.33% +0.27% +0.00% / +0.26% +0.55% +0.55%] index_select random : Elapsed 0.051 ms (5.113 ms / 100) 5.090 -> 5.091 ( +0.02%) [ +0.00% +0.02% +0.10% / +0.02% +0.61% +0.65%] index_select random_sorted : Elapsed 0.051 ms (5.090 ms / 100) B = [16, 20, 40, 4] (stride (1, 2560, 16, 640)) A = [5, 20, 40, 4] (stride (3200, 1, 20, 800)) dim = 0 2.458 -> 2.467 ( +0.37%) [ +0.41% +0.00% +0.08% / +0.37% +0.57% +1.10%] index_add_ linear : Elapsed 0.025 ms (2.468 ms / 100) 2.416 -> 2.421 ( +0.21%) [ +0.17% +0.00% +0.12% / +0.21% +0.66% +0.79%] index_copy_ linear : Elapsed 0.024 ms (2.420 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.00% +0.16% +0.00% / +0.12% +0.49% +0.24%] index_add_ reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.418 -> 2.418 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.50% +0.62%] index_copy_ reverse : Elapsed 0.024 ms (2.418 ms / 100) 2.495 -> 2.504 ( +0.36%) [ +0.12% +0.28% +0.00% / +0.36% +0.96% +0.88%] index_add_ spread : Elapsed 0.025 ms (2.498 ms / 100) 2.491 -> 2.496 ( +0.20%) [ +0.00% +0.12% +0.16% / +0.20% +0.72% +0.88%] index_copy_ spread : Elapsed 0.025 ms (2.491 ms / 100) 2.497 -> 2.493 ( -0.16%) [ +0.12% +0.08% +0.00% / -0.16% +0.60% +0.32%] index_add_ strided 3 : Elapsed 0.025 ms (2.500 ms / 100) 2.489 -> 2.487 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.48% +0.36%] index_copy_ strided 3 : Elapsed 0.025 ms (2.489 ms / 100) 2.485 -> 2.486 ( +0.04%) [ +0.16% +0.16% +0.00% / +0.04% +0.60% +0.80%] index_add_ strided 5 : Elapsed 0.025 ms (2.489 ms / 100) 2.483 -> 2.481 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.44% +0.68%] index_copy_ strided 5 : Elapsed 0.025 ms (2.485 ms / 100) 2.490 -> 2.495 ( +0.20%) [ +0.00% +0.04% +0.12% / +0.20% +0.56% +0.48%] index_add_ strided 7 : Elapsed 0.025 ms (2.490 ms / 100) 2.486 -> 2.482 ( -0.16%) [ +0.00% +0.12% +0.00% / -0.16% +0.52% +0.52%] index_copy_ strided 7 : Elapsed 0.025 ms (2.486 ms / 100) 2.465 -> 2.466 ( +0.04%) [ +0.00% +0.04% +0.24% / +0.04% +0.53% +0.49%] index_add_ perm : Elapsed 0.025 ms (2.465 ms / 100) 2.420 -> 2.421 ( +0.04%) [ +0.00% +0.25% +0.00% / +0.04% +0.62% +0.66%] index_copy_ perm : Elapsed 0.024 ms (2.420 ms / 100) 2.470 -> 2.475 ( +0.20%) [ +0.08% +0.12% +0.00% / +0.20% +0.73% +0.69%] index_add_ perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) 2.430 -> 2.427 ( -0.12%) [ +0.00% +0.04% +0.04% / -0.12% +0.41% +0.78%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.430 ms / 100) 5.382 -> 5.383 ( +0.02%) [ +0.13% +0.24% +0.00% / +0.02% +0.87% +0.84%] index_select const : Elapsed 0.054 ms (5.389 ms / 100) 5.353 -> 5.348 ( -0.09%) [ +0.00% +0.00% +0.02% / -0.09% +0.56% +0.50%] index_select wrap : Elapsed 0.054 ms (5.353 ms / 100) 5.412 -> 5.418 ( +0.11%) [ +0.07% +0.00% +0.13% / +0.11% +0.79% +0.68%] index_select linear : Elapsed 0.054 ms (5.416 ms / 100) 5.360 -> 5.373 ( +0.24%) [ +0.00% +0.07% +0.26% / +0.24% +0.80% +0.62%] index_select reverse : Elapsed 0.054 ms (5.360 ms / 100) 5.369 -> 5.368 ( -0.02%) [ +0.22% +0.00% +0.09% / -0.02% +1.10% +1.23%] index_select skip64 : Elapsed 0.054 ms (5.381 ms / 100) 5.383 -> 5.399 ( +0.30%) [ +0.00% +0.02% +0.22% / +0.30% +0.78% +0.72%] index_select skip256 : Elapsed 0.054 ms (5.383 ms / 100) 5.374 -> 5.376 ( +0.04%) [ +0.07% +0.00% +0.02% / +0.04% +0.60% +0.65%] index_select spread : Elapsed 0.054 ms (5.378 ms / 100) 5.403 -> 5.408 ( +0.09%) [ +0.07% +0.24% +0.00% / +0.09% +0.74% +0.83%] index_select strided 3 : Elapsed 0.054 ms (5.407 ms / 100) 5.370 -> 5.383 ( +0.24%) [ +0.09% +0.00% +0.02% / +0.24% +0.71% +0.80%] index_select random : Elapsed 0.054 ms (5.375 ms / 100) 5.351 -> 5.360 ( +0.17%) [ +0.02% +0.00% +0.00% / +0.17% +0.71% +0.65%] index_select random_sorted : Elapsed 0.054 ms (5.352 ms / 100) B = [16, 20, 40, 4] (stride (1, 64, 1280, 16)) A = [5, 20, 40, 4] (stride (20, 1, 100, 4000)) dim = 0 2.560 -> 2.563 ( +0.12%) [ +0.20% +0.04% +0.00% / +0.12% +0.35% +0.39%] index_add_ linear : Elapsed 0.026 ms (2.565 ms / 100) 2.500 -> 2.507 ( +0.28%) [ +0.08% +0.12% +0.00% / +0.48% +0.28% +0.44%] index_copy_ linear : Elapsed 0.025 ms (2.502 ms / 100) 2.553 -> 2.563 ( +0.39%) [ +0.31% +0.00% +0.16% / +0.63% +0.39% +0.47%] index_add_ reverse : Elapsed 0.026 ms (2.561 ms / 100) 2.499 -> 2.505 ( +0.24%) [ +0.00% +0.00% +0.00% / +0.28% +0.24% +0.36%] index_copy_ reverse : Elapsed 0.025 ms (2.499 ms / 100) 2.589 -> 2.593 ( +0.15%) [ +0.12% +0.00% +0.04% / +0.42% +0.15% +0.31%] index_add_ spread : Elapsed 0.026 ms (2.592 ms / 100) 2.567 -> 2.570 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.35% +0.12% +0.16%] index_copy_ spread : Elapsed 0.026 ms (2.569 ms / 100) 2.589 -> 2.591 ( +0.08%) [ +0.31% +0.12% +0.00% / +0.08% +0.31% +0.50%] index_add_ strided 3 : Elapsed 0.026 ms (2.597 ms / 100) 2.572 -> 2.571 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.16% +0.12%] index_copy_ strided 3 : Elapsed 0.026 ms (2.572 ms / 100) 2.585 -> 2.589 ( +0.15%) [ +0.04% +0.23% +0.00% / +0.46% +0.39% +0.15%] index_add_ strided 5 : Elapsed 0.026 ms (2.586 ms / 100) 2.565 -> 2.567 ( +0.08%) [ +0.04% +0.16% +0.00% / +0.47% +0.23% +0.08%] index_copy_ strided 5 : Elapsed 0.026 ms (2.566 ms / 100) 2.583 -> 2.588 ( +0.19%) [ +0.19% +0.15% +0.00% / +0.23% +0.19% +0.46%] index_add_ strided 7 : Elapsed 0.026 ms (2.588 ms / 100) 2.566 -> 2.566 ( +0.00%) [ +0.19% +0.08% +0.00% / +0.00% +0.04% +0.16%] index_copy_ strided 7 : Elapsed 0.026 ms (2.571 ms / 100) 2.593 -> 2.596 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.35% +0.15%] index_add_ perm : Elapsed 0.026 ms (2.594 ms / 100) 2.571 -> 2.573 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.31% +0.08% +0.23%] index_copy_ perm : Elapsed 0.026 ms (2.572 ms / 100) 2.588 -> 2.585 ( -0.12%) [ +0.12% +0.12% +0.00% / -0.12% +0.43% +0.35%] index_add_ perm_sorted : Elapsed 0.026 ms (2.591 ms / 100) 2.568 -> 2.567 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.19% +0.31%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.570 ms / 100) 5.648 -> 5.647 ( -0.02%) [ +0.04% +0.04% +0.00% / -0.02% +0.37% +0.37%] index_select const : Elapsed 0.056 ms (5.650 ms / 100) 5.620 -> 5.627 ( +0.12%) [ +0.00% +0.23% +0.09% / +0.12% +0.43% +0.64%] index_select wrap : Elapsed 0.056 ms (5.620 ms / 100) 5.651 -> 5.649 ( -0.04%) [ +0.00% +0.07% +0.04% / -0.04% +0.48% +0.53%] index_select linear : Elapsed 0.057 ms (5.651 ms / 100) 5.633 -> 5.654 ( +0.37%) [ +0.21% +0.32% +0.00% / +0.37% +0.67% +0.76%] index_select reverse : Elapsed 0.056 ms (5.645 ms / 100) 5.625 -> 5.610 ( -0.27%) [ +0.21% +0.00% +0.20% / -0.27% +0.37% +0.66%] index_select skip64 : Elapsed 0.056 ms (5.637 ms / 100) 5.606 -> 5.621 ( +0.27%) [ +0.00% +0.41% +0.00% / +0.27% +0.98% +1.07%] index_select skip256 : Elapsed 0.056 ms (5.606 ms / 100) 5.627 -> 5.635 ( +0.14%) [ +0.11% +0.14% +0.00% / +0.14% +0.62% +0.59%] index_select spread : Elapsed 0.056 ms (5.633 ms / 100) 5.620 -> 5.612 ( -0.14%) [ +0.09% +0.11% +0.00% / -0.14% +0.50% +1.53%] index_select strided 3 : Elapsed 0.056 ms (5.625 ms / 100) 5.639 -> 5.642 ( +0.05%) [ +0.00% +0.00% +0.07% / +0.05% +0.64% +0.48%] index_select random : Elapsed 0.056 ms (5.639 ms / 100) 5.623 -> 5.626 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.60% +0.71%] index_select random_sorted : Elapsed 0.056 ms (5.624 ms / 100) B = [16, 20, 40, 4] (stride (40, 640, 1, 12800)) A = [5, 20, 40, 4] (stride (40, 200, 1, 4000)) dim = 0 2.393 -> 2.392 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.50% +0.67%] index_add_ linear : Elapsed 0.024 ms (2.393 ms / 100) 2.335 -> 2.337 ( +0.09%) [ +0.00% +0.00% +0.04% / +0.09% +0.30% +0.34%] index_copy_ linear : Elapsed 0.023 ms (2.335 ms / 100) 2.392 -> 2.393 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.67% +0.54%] index_add_ reverse : Elapsed 0.024 ms (2.394 ms / 100) 2.332 -> 2.332 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.69% +0.39%] index_copy_ reverse : Elapsed 0.023 ms (2.333 ms / 100) 2.390 -> 2.397 ( +0.29%) [ +0.29% +0.13% +0.00% / +0.29% +0.63% +0.67%] index_add_ spread : Elapsed 0.024 ms (2.397 ms / 100) 2.329 -> 2.328 ( -0.04%) [ +0.00% +0.09% +0.21% / -0.04% +0.56% +0.60%] index_copy_ spread : Elapsed 0.023 ms (2.329 ms / 100) 2.388 -> 2.391 ( +0.13%) [ +0.00% +0.17% +0.00% / +0.13% +0.42% +0.63%] index_add_ strided 3 : Elapsed 0.024 ms (2.388 ms / 100) 2.323 -> 2.321 ( -0.09%) [ +0.22% +0.00% +0.00% / -0.09% +0.39% +0.82%] index_copy_ strided 3 : Elapsed 0.023 ms (2.328 ms / 100) 2.385 -> 2.393 ( +0.34%) [ +0.00% +0.25% +0.29% / +0.34% +0.84% +0.92%] index_add_ strided 5 : Elapsed 0.024 ms (2.385 ms / 100) 2.326 -> 2.328 ( +0.09%) [ +0.00% +0.04% +0.13% / +0.09% +1.03% +0.86%] index_copy_ strided 5 : Elapsed 0.023 ms (2.326 ms / 100) 2.382 -> 2.386 ( +0.17%) [ +0.00% +0.17% +0.42% / +0.17% +0.97% +0.92%] index_add_ strided 7 : Elapsed 0.024 ms (2.382 ms / 100) 2.325 -> 2.331 ( +0.26%) [ +0.00% +0.17% +0.09% / +0.26% +0.60% +0.90%] index_copy_ strided 7 : Elapsed 0.023 ms (2.325 ms / 100) 2.385 -> 2.391 ( +0.25%) [ +0.00% +0.25% +0.08% / +0.25% +0.63% +0.67%] index_add_ perm : Elapsed 0.024 ms (2.385 ms / 100) 2.327 -> 2.330 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.34% +0.64%] index_copy_ perm : Elapsed 0.023 ms (2.330 ms / 100) 2.388 -> 2.390 ( +0.08%) [ +0.00% +0.08% +0.17% / +0.08% +0.92% +0.75%] index_add_ perm_sorted : Elapsed 0.024 ms (2.388 ms / 100) 2.326 -> 2.329 ( +0.13%) [ +0.13% +0.09% +0.00% / +0.13% +0.82% +0.86%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.329 ms / 100) 4.894 -> 4.904 ( +0.20%) [ +0.00% +0.22% +0.02% / +0.20% +0.74% +0.61%] index_select const : Elapsed 0.049 ms (4.894 ms / 100) 4.954 -> 4.955 ( +0.02%) [ +0.00% +0.06% +0.00% / +0.02% +0.77% +0.67%] index_select wrap : Elapsed 0.050 ms (4.954 ms / 100) 4.970 -> 4.972 ( +0.04%) [ +0.14% +0.14% +0.00% / +0.04% +0.93% +0.82%] index_select linear : Elapsed 0.050 ms (4.977 ms / 100) 4.991 -> 5.005 ( +0.28%) [ +0.30% +0.00% +0.10% / +0.28% +0.76% +1.04%] index_select reverse : Elapsed 0.050 ms (5.006 ms / 100) 4.887 -> 4.891 ( +0.08%) [ +0.00% +0.16% +0.04% / +0.08% +0.72% +0.65%] index_select skip64 : Elapsed 0.049 ms (4.887 ms / 100) 4.894 -> 4.904 ( +0.20%) [ +0.00% +0.10% +0.16% / +0.20% +0.67% +0.80%] index_select skip256 : Elapsed 0.049 ms (4.894 ms / 100) 4.955 -> 4.964 ( +0.18%) [ +0.00% +0.16% +0.08% / +0.18% +0.61% +0.79%] index_select spread : Elapsed 0.050 ms (4.955 ms / 100) 4.965 -> 4.962 ( -0.06%) [ +0.16% +0.00% +0.26% / -0.06% +0.68% +0.83%] index_select strided 3 : Elapsed 0.050 ms (4.973 ms / 100) 4.952 -> 4.960 ( +0.16%) [ +0.02% +0.02% +0.00% / +0.16% +0.67% +0.81%] index_select random : Elapsed 0.050 ms (4.953 ms / 100) 4.952 -> 4.954 ( +0.04%) [ +0.04% +0.00% +0.06% / +0.04% +0.73% +0.81%] index_select random_sorted : Elapsed 0.050 ms (4.954 ms / 100) out_shape = [5, 16, 40, 4] in_shape = [5, 20, 40, 4] idx_dim = 1 B = [5, 16, 40, 4] (stride (2560, 160, 4, 1)) A = [5, 20, 40, 4] (stride (3200, 1, 80, 20)) dim = 1 1.347 -> 1.348 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +1.04% +1.48%] index_select const : Elapsed 0.013 ms (1.349 ms / 100) 1.340 -> 1.341 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +1.12% +1.19%] index_select wrap : Elapsed 0.013 ms (1.342 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.89% +0.82%] index_select linear : Elapsed 0.013 ms (1.344 ms / 100) 1.342 -> 1.342 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.82% +0.89%] index_select reverse : Elapsed 0.013 ms (1.343 ms / 100) 1.347 -> 1.347 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.52% +0.59%] index_select skip64 : Elapsed 0.013 ms (1.348 ms / 100) 1.349 -> 1.350 ( +0.07%) [ +0.15% +0.22% +0.00% / +0.07% +0.74% +0.74%] index_select skip256 : Elapsed 0.014 ms (1.351 ms / 100) 1.341 -> 1.341 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.82% +0.97%] index_select spread : Elapsed 0.013 ms (1.343 ms / 100) 1.345 -> 1.345 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.74% +0.89%] index_select strided 3 : Elapsed 0.013 ms (1.347 ms / 100) 1.345 -> 1.346 ( +0.07%) [ +0.00% +0.22% +0.00% / +0.07% +0.74% +0.82%] index_select strided 5 : Elapsed 0.013 ms (1.345 ms / 100) 1.344 -> 1.344 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.74% +0.74%] index_select strided 7 : Elapsed 0.013 ms (1.346 ms / 100) 1.346 -> 1.347 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.67% +0.74%] index_select strided 8 : Elapsed 0.013 ms (1.348 ms / 100) 1.343 -> 1.343 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.67% +0.52%] index_select strided 16 : Elapsed 0.013 ms (1.343 ms / 100) 1.344 -> 1.344 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.30% +0.30%] index_select random : Elapsed 0.013 ms (1.344 ms / 100) 1.348 -> 1.348 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.52% +0.67%] index_select random_sorted : Elapsed 0.013 ms (1.349 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.22% +0.30% +0.00% / +0.15% +0.45% +0.60%] index_select perm : Elapsed 0.013 ms (1.347 ms / 100) 1.351 -> 1.352 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.22% +0.22%] index_select perm_sorted : Elapsed 0.014 ms (1.352 ms / 100) B = [5, 16, 40, 4] (stride (2560, 1, 64, 16)) A = [5, 20, 40, 4] (stride (3200, 40, 1, 800)) dim = 1 3.547 -> 3.549 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.62% +0.73%] index_select const : Elapsed 0.035 ms (3.548 ms / 100) 3.551 -> 3.552 ( +0.03%) [ +0.06% +0.00% +0.03% / +0.03% +0.45% +0.59%] index_select wrap : Elapsed 0.036 ms (3.553 ms / 100) 3.533 -> 3.533 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.51% +0.54%] index_select linear : Elapsed 0.035 ms (3.533 ms / 100) 3.523 -> 3.530 ( +0.20%) [ +0.00% +0.20% +0.14% / +0.20% +0.62% +0.82%] index_select reverse : Elapsed 0.035 ms (3.523 ms / 100) 3.561 -> 3.565 ( +0.11%) [ +0.14% +0.00% +0.20% / +0.11% +0.65% +0.53%] index_select skip64 : Elapsed 0.036 ms (3.566 ms / 100) 3.562 -> 3.564 ( +0.06%) [ +0.00% +0.03% +0.03% / +0.06% +0.62% +0.76%] index_select skip256 : Elapsed 0.036 ms (3.562 ms / 100) 3.546 -> 3.544 ( -0.06%) [ +0.00% +0.08% +0.06% / -0.06% +0.56% +0.56%] index_select spread : Elapsed 0.035 ms (3.546 ms / 100) 3.554 -> 3.558 ( +0.11%) [ +0.06% +0.00% +0.00% / +0.11% +0.51% +1.10%] index_select strided 3 : Elapsed 0.036 ms (3.556 ms / 100) 3.543 -> 3.544 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.48% +0.51%] index_select strided 5 : Elapsed 0.035 ms (3.544 ms / 100) 3.543 -> 3.543 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.65% +0.79%] index_select strided 7 : Elapsed 0.035 ms (3.544 ms / 100) 3.549 -> 3.549 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.85% +0.82%] index_select strided 8 : Elapsed 0.035 ms (3.550 ms / 100) 3.541 -> 3.544 ( +0.08%) [ +0.00% +0.00% +0.11% / +0.08% +0.76% +0.68%] index_select strided 16 : Elapsed 0.035 ms (3.541 ms / 100) 3.535 -> 3.535 ( +0.00%) [ +0.06% +0.00% +0.08% / +0.00% +0.74% +0.74%] index_select random : Elapsed 0.035 ms (3.537 ms / 100) 3.544 -> 3.538 ( -0.17%) [ +0.00% +0.06% +0.03% / -0.17% +0.76% +0.79%] index_select random_sorted : Elapsed 0.035 ms (3.544 ms / 100) 3.549 -> 3.561 ( +0.34%) [ +0.00% +0.03% +0.06% / +0.34% +0.68% +0.65%] index_select perm : Elapsed 0.035 ms (3.549 ms / 100) 3.535 -> 3.533 ( -0.06%) [ +0.00% +0.06% +0.06% / -0.06% +0.59% +0.65%] index_select perm_sorted : Elapsed 0.035 ms (3.535 ms / 100) B = [5, 16, 40, 4] (stride (160, 800, 4, 1)) A = [5, 20, 40, 4] (stride (3200, 40, 1, 800)) dim = 1 3.534 -> 3.535 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.79% +0.79%] index_select const : Elapsed 0.035 ms (3.535 ms / 100) 3.533 -> 3.532 ( -0.03%) [ +0.00% +0.03% +0.06% / -0.03% +0.59% +0.85%] index_select wrap : Elapsed 0.035 ms (3.533 ms / 100) 3.527 -> 3.527 ( +0.00%) [ +0.17% +0.00% +0.17% / +0.00% +0.85% +0.85%] index_select linear : Elapsed 0.035 ms (3.533 ms / 100) 3.534 -> 3.536 ( +0.06%) [ +0.14% +0.00% +0.17% / +0.06% +1.02% +0.85%] index_select reverse : Elapsed 0.035 ms (3.539 ms / 100) 3.548 -> 3.554 ( +0.17%) [ +0.20% +0.00% +0.14% / +0.17% +0.85% +0.85%] index_select skip64 : Elapsed 0.036 ms (3.555 ms / 100) 3.536 -> 3.537 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.59% +0.76%] index_select skip256 : Elapsed 0.035 ms (3.537 ms / 100) 3.530 -> 3.536 ( +0.17%) [ +0.14% +0.17% +0.00% / +0.17% +0.79% +0.76%] index_select spread : Elapsed 0.035 ms (3.535 ms / 100) 3.537 -> 3.543 ( +0.17%) [ +0.00% +0.25% +0.14% / +0.17% +0.90% +0.85%] index_select strided 3 : Elapsed 0.035 ms (3.537 ms / 100) 3.553 -> 3.548 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.68% +0.73%] index_select strided 5 : Elapsed 0.036 ms (3.553 ms / 100) 3.533 -> 3.534 ( +0.03%) [ +0.06% +0.00% +0.03% / +0.03% +0.51% +0.74%] index_select strided 7 : Elapsed 0.035 ms (3.535 ms / 100) 3.546 -> 3.550 ( +0.11%) [ +0.06% +0.00% +0.03% / +0.11% +0.65% +0.71%] index_select strided 8 : Elapsed 0.035 ms (3.548 ms / 100) 3.544 -> 3.546 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +0.71% +0.79%] index_select strided 16 : Elapsed 0.035 ms (3.545 ms / 100) 3.538 -> 3.548 ( +0.28%) [ +0.34% +0.20% +0.00% / +0.28% +0.57% +0.68%] index_select random : Elapsed 0.035 ms (3.550 ms / 100) 3.537 -> 3.537 ( +0.00%) [ +0.11% +0.06% +0.00% / +0.00% +0.59% +0.74%] index_select random_sorted : Elapsed 0.035 ms (3.541 ms / 100) 3.533 -> 3.533 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.91% +0.96%] index_select perm : Elapsed 0.035 ms (3.533 ms / 100) 3.535 -> 3.536 ( +0.03%) [ +0.03% +0.00% +0.06% / +0.03% +0.51% +0.37%] index_select perm_sorted : Elapsed 0.035 ms (3.536 ms / 100) B = [5, 16, 40, 4] (stride (160, 800, 4, 1)) A = [5, 20, 40, 4] (stride (800, 1, 20, 4000)) dim = 1 3.798 -> 3.798 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.55% +0.55%] index_select const : Elapsed 0.038 ms (3.798 ms / 100) 3.798 -> 3.799 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.53% +0.50%] index_select wrap : Elapsed 0.038 ms (3.798 ms / 100) 3.797 -> 3.799 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.55% +0.74%] index_select linear : Elapsed 0.038 ms (3.798 ms / 100) 3.791 -> 3.795 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.11% +0.66% +0.53%] index_select reverse : Elapsed 0.038 ms (3.795 ms / 100) 3.805 -> 3.808 ( +0.08%) [ +0.00% +0.05% +0.00% / +0.08% +0.60% +0.63%] index_select skip64 : Elapsed 0.038 ms (3.805 ms / 100) 3.797 -> 3.798 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.58% +0.61%] index_select skip256 : Elapsed 0.038 ms (3.797 ms / 100) 3.797 -> 3.800 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.50%] index_select spread : Elapsed 0.038 ms (3.800 ms / 100) 3.808 -> 3.811 ( +0.08%) [ +0.11% +0.08% +0.00% / +0.08% +0.47% +0.47%] index_select strided 3 : Elapsed 0.038 ms (3.812 ms / 100) 3.793 -> 3.795 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.55% +0.58%] index_select strided 5 : Elapsed 0.038 ms (3.796 ms / 100) 3.797 -> 3.796 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.63% +0.63%] index_select strided 7 : Elapsed 0.038 ms (3.797 ms / 100) 3.794 -> 3.796 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.74% +0.76%] index_select strided 8 : Elapsed 0.038 ms (3.796 ms / 100) 3.794 -> 3.793 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.50% +0.69%] index_select strided 16 : Elapsed 0.038 ms (3.795 ms / 100) 3.806 -> 3.805 ( -0.03%) [ +0.00% +0.05% +0.03% / -0.03% +0.60% +0.66%] index_select random : Elapsed 0.038 ms (3.806 ms / 100) 3.796 -> 3.795 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.68% +0.68%] index_select random_sorted : Elapsed 0.038 ms (3.797 ms / 100) 3.794 -> 3.795 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.74% +0.82%] index_select perm : Elapsed 0.038 ms (3.794 ms / 100) 3.805 -> 3.804 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.66% +0.60%] index_select perm_sorted : Elapsed 0.038 ms (3.805 ms / 100) B = [5, 16, 40, 4] (stride (40, 800, 1, 200)) A = [5, 20, 40, 4] (stride (1, 800, 20, 5)) dim = 1 3.564 -> 3.569 ( +0.14%) [ +0.20% +0.20% +0.00% / +0.14% +0.98% +0.98%] index_select const : Elapsed 0.036 ms (3.571 ms / 100) 3.560 -> 3.560 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.76% +0.90%] index_select wrap : Elapsed 0.036 ms (3.561 ms / 100) 3.556 -> 3.557 ( +0.03%) [ +0.08% +0.06% +0.00% / +0.03% +0.84% +0.84%] index_select linear : Elapsed 0.036 ms (3.559 ms / 100) 3.563 -> 3.568 ( +0.14%) [ +0.17% +0.14% +0.00% / +0.14% +0.79% +0.84%] index_select reverse : Elapsed 0.036 ms (3.569 ms / 100) 3.569 -> 3.568 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.59% +0.76%] index_select skip64 : Elapsed 0.036 ms (3.570 ms / 100) 3.568 -> 3.568 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.53% +0.56%] index_select skip256 : Elapsed 0.036 ms (3.569 ms / 100) 3.558 -> 3.566 ( +0.22%) [ +0.08% +0.14% +0.00% / +0.22% +0.90% +0.79%] index_select spread : Elapsed 0.036 ms (3.561 ms / 100) 3.559 -> 3.565 ( +0.17%) [ +0.17% +0.20% +0.00% / +0.17% +0.87% +0.96%] index_select strided 3 : Elapsed 0.036 ms (3.565 ms / 100) 3.570 -> 3.570 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.73% +0.70%] index_select strided 5 : Elapsed 0.036 ms (3.570 ms / 100) 3.571 -> 3.577 ( +0.17%) [ +0.31% +0.00% +0.06% / +0.17% +0.78% +0.84%] index_select strided 7 : Elapsed 0.036 ms (3.582 ms / 100) 3.567 -> 3.569 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.70% +0.70%] index_select strided 8 : Elapsed 0.036 ms (3.567 ms / 100) 3.567 -> 3.570 ( +0.08%) [ +0.08% +0.03% +0.00% / +0.08% +0.84% +0.73%] index_select strided 16 : Elapsed 0.036 ms (3.570 ms / 100) 3.566 -> 3.570 ( +0.11%) [ +0.03% +0.03% +0.00% / +0.11% +0.59% +0.59%] index_select random : Elapsed 0.036 ms (3.567 ms / 100) 3.564 -> 3.564 ( +0.00%) [ +0.11% +0.00% +0.08% / +0.00% +0.53% +0.51%] index_select random_sorted : Elapsed 0.036 ms (3.568 ms / 100) 3.573 -> 3.575 ( +0.06%) [ +0.03% +0.00% +0.06% / +0.06% +0.53% +0.53%] index_select perm : Elapsed 0.036 ms (3.574 ms / 100) 3.567 -> 3.569 ( +0.06%) [ +0.14% +0.11% +0.00% / +0.06% +0.56% +0.56%] index_select perm_sorted : Elapsed 0.036 ms (3.572 ms / 100) B = [5, 16, 40, 4] (stride (64, 4, 320, 1)) A = [5, 20, 40, 4] (stride (20, 1, 400, 100)) dim = 1 3.947 -> 3.948 ( +0.03%) [ +0.08% +0.00% +0.08% / +0.03% +0.48% +0.46%] index_select const : Elapsed 0.039 ms (3.950 ms / 100) 3.931 -> 3.930 ( -0.03%) [ +0.00% +0.00% +0.05% / -0.03% +0.46% +0.66%] index_select wrap : Elapsed 0.039 ms (3.931 ms / 100) 3.928 -> 3.927 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.46% +0.46%] index_select linear : Elapsed 0.039 ms (3.931 ms / 100) 3.932 -> 3.953 ( +0.53%) [ +0.46% +0.00% +0.28% / +0.53% +0.92% +1.02%] index_select reverse : Elapsed 0.040 ms (3.950 ms / 100) 3.938 -> 3.940 ( +0.05%) [ +0.15% +0.03% +0.00% / +0.05% +0.58% +0.56%] index_select skip64 : Elapsed 0.039 ms (3.944 ms / 100) 3.930 -> 3.932 ( +0.05%) [ +0.13% +0.00% +0.05% / +0.05% +0.71% +0.48%] index_select skip256 : Elapsed 0.039 ms (3.935 ms / 100) 3.926 -> 3.930 ( +0.10%) [ +0.15% +0.00% +0.08% / +0.10% +0.64% +0.61%] index_select spread : Elapsed 0.039 ms (3.932 ms / 100) 3.941 -> 3.946 ( +0.13%) [ +0.20% +0.00% +0.05% / +0.13% +0.51% +0.51%] index_select strided 3 : Elapsed 0.039 ms (3.949 ms / 100) 3.927 -> 3.928 ( +0.03%) [ +0.15% +0.00% +0.00% / +0.03% +0.48% +0.56%] index_select strided 5 : Elapsed 0.039 ms (3.933 ms / 100) 3.925 -> 3.928 ( +0.08%) [ +0.05% +0.00% +0.05% / +0.08% +0.69% +0.69%] index_select strided 7 : Elapsed 0.039 ms (3.927 ms / 100) 3.934 -> 3.930 ( -0.10%) [ +0.08% +0.00% +0.13% / -0.10% +0.36% +0.58%] index_select strided 8 : Elapsed 0.039 ms (3.937 ms / 100) 3.945 -> 3.948 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.71% +0.71%] index_select strided 16 : Elapsed 0.039 ms (3.945 ms / 100) 3.939 -> 3.933 ( -0.15%) [ +0.20% +0.00% +0.00% / -0.15% +0.58% +0.63%] index_select random : Elapsed 0.039 ms (3.947 ms / 100) 3.932 -> 3.935 ( +0.08%) [ +0.00% +0.08% +0.10% / +0.08% +0.64% +0.79%] index_select random_sorted : Elapsed 0.039 ms (3.932 ms / 100) 3.931 -> 3.936 ( +0.13%) [ +0.00% +0.00% +0.03% / +0.13% +0.86% +0.66%] index_select perm : Elapsed 0.039 ms (3.931 ms / 100) 3.936 -> 3.936 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.97% +0.76%] index_select perm_sorted : Elapsed 0.039 ms (3.936 ms / 100) B = [5, 16, 40, 4] (stride (64, 1, 320, 16)) A = [5, 20, 40, 4] (stride (800, 40, 1, 4000)) dim = 1 3.856 -> 3.862 ( +0.16%) [ +0.21% +0.18% +0.00% / +0.16% +0.91% +0.93%] index_select const : Elapsed 0.039 ms (3.864 ms / 100) 3.839 -> 3.845 ( +0.16%) [ +0.05% +0.03% +0.00% / +0.16% +0.76% +0.76%] index_select wrap : Elapsed 0.038 ms (3.841 ms / 100) 3.828 -> 3.828 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.76% +0.73%] index_select linear : Elapsed 0.038 ms (3.829 ms / 100) 3.834 -> 3.837 ( +0.08%) [ +0.08% +0.13% +0.00% / +0.08% +0.91% +0.99%] index_select reverse : Elapsed 0.038 ms (3.837 ms / 100) 3.875 -> 3.874 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.62% +0.62%] index_select skip64 : Elapsed 0.039 ms (3.875 ms / 100) 3.874 -> 3.876 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.70% +0.72%] index_select skip256 : Elapsed 0.039 ms (3.874 ms / 100) 3.846 -> 3.845 ( -0.03%) [ +0.08% +0.00% +0.05% / -0.03% +0.73% +0.73%] index_select spread : Elapsed 0.038 ms (3.849 ms / 100) 3.855 -> 3.861 ( +0.16%) [ +0.03% +0.10% +0.00% / +0.16% +0.83% +0.86%] index_select strided 3 : Elapsed 0.039 ms (3.856 ms / 100) 3.856 -> 3.860 ( +0.10%) [ +0.00% +0.08% +0.13% / +0.10% +0.91% +0.99%] index_select strided 5 : Elapsed 0.039 ms (3.856 ms / 100) 3.851 -> 3.852 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.75% +0.91%] index_select strided 7 : Elapsed 0.039 ms (3.854 ms / 100) 3.857 -> 3.856 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.65% +0.73%] index_select strided 8 : Elapsed 0.039 ms (3.858 ms / 100) 3.839 -> 3.839 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +1.46% +1.54%] index_select strided 16 : Elapsed 0.038 ms (3.840 ms / 100) 3.865 -> 3.865 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.54% +0.60%] index_select random : Elapsed 0.039 ms (3.866 ms / 100) 3.849 -> 3.851 ( +0.05%) [ +0.08% +0.08% +0.00% / +0.05% +0.60% +0.57%] index_select random_sorted : Elapsed 0.039 ms (3.852 ms / 100) 3.847 -> 3.847 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.55% +0.62%] index_select perm : Elapsed 0.038 ms (3.848 ms / 100) 3.832 -> 3.835 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.57%] index_select perm_sorted : Elapsed 0.038 ms (3.835 ms / 100) B = [5, 16, 40, 4] (stride (1, 20, 320, 5)) A = [5, 20, 40, 4] (stride (800, 1, 20, 4000)) dim = 1 4.141 -> 4.142 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.39% +0.46%] index_select const : Elapsed 0.041 ms (4.141 ms / 100) 4.139 -> 4.140 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.51% +0.48%] index_select wrap : Elapsed 0.041 ms (4.142 ms / 100) 4.135 -> 4.134 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.48% +0.51%] index_select linear : Elapsed 0.041 ms (4.135 ms / 100) 4.136 -> 4.137 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.44% +0.51%] index_select reverse : Elapsed 0.041 ms (4.136 ms / 100) 4.138 -> 4.138 ( +0.00%) [ +0.00% +0.10% +0.10% / +0.00% +0.60% +0.63%] index_select skip64 : Elapsed 0.041 ms (4.138 ms / 100) 4.133 -> 4.133 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.51% +0.60%] index_select skip256 : Elapsed 0.041 ms (4.134 ms / 100) 4.139 -> 4.139 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.48% +0.51%] index_select spread : Elapsed 0.041 ms (4.141 ms / 100) 4.143 -> 4.144 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.41% +0.48%] index_select strided 3 : Elapsed 0.041 ms (4.145 ms / 100) 4.135 -> 4.136 ( +0.02%) [ +0.00% +0.10% +0.05% / +0.02% +0.53% +0.48%] index_select strided 5 : Elapsed 0.041 ms (4.135 ms / 100) 4.138 -> 4.138 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.65% +0.68%] index_select strided 7 : Elapsed 0.041 ms (4.140 ms / 100) 4.130 -> 4.130 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.73% +0.80%] index_select strided 8 : Elapsed 0.041 ms (4.131 ms / 100) 4.135 -> 4.133 ( -0.05%) [ +0.00% +0.02% +0.00% / -0.05% +0.53% +0.58%] index_select strided 16 : Elapsed 0.041 ms (4.135 ms / 100) 4.137 -> 4.145 ( +0.19%) [ +0.12% +0.12% +0.00% / +0.19% +0.68% +0.56%] index_select random : Elapsed 0.041 ms (4.142 ms / 100) 4.132 -> 4.132 ( +0.00%) [ +0.02% +0.00% +0.07% / +0.00% +0.63% +0.63%] index_select random_sorted : Elapsed 0.041 ms (4.133 ms / 100) 4.138 -> 4.135 ( -0.07%) [ +0.02% +0.00% +0.00% / -0.07% +0.63% +0.75%] index_select perm : Elapsed 0.041 ms (4.139 ms / 100) 4.135 -> 4.140 ( +0.12%) [ +0.15% +0.15% +0.00% / +0.12% +0.75% +0.77%] index_select perm_sorted : Elapsed 0.041 ms (4.141 ms / 100) B = [5, 16, 40, 4] (stride (1, 200, 5, 3200)) A = [5, 20, 40, 4] (stride (40, 800, 1, 200)) dim = 1 3.885 -> 3.883 ( -0.05%) [ +0.00% +0.03% +0.00% / -0.05% +0.64% +1.08%] index_select const : Elapsed 0.039 ms (3.885 ms / 100) 3.845 -> 3.846 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.65% +0.68%] index_select wrap : Elapsed 0.038 ms (3.845 ms / 100) 3.835 -> 3.835 ( +0.00%) [ +0.05% +0.08% +0.00% / +0.00% +0.63% +0.76%] index_select linear : Elapsed 0.038 ms (3.837 ms / 100) 3.838 -> 3.839 ( +0.03%) [ +0.00% +0.08% +0.03% / +0.03% +1.15% +1.04%] index_select reverse : Elapsed 0.038 ms (3.838 ms / 100) 3.866 -> 3.867 ( +0.03%) [ +0.00% +0.03% +0.13% / +0.03% +1.42% +0.65%] index_select skip64 : Elapsed 0.039 ms (3.866 ms / 100) 3.869 -> 3.874 ( +0.13%) [ +0.00% +0.13% +0.00% / +0.13% +0.62% +0.75%] index_select skip256 : Elapsed 0.039 ms (3.869 ms / 100) 3.843 -> 3.844 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.68% +0.68%] index_select spread : Elapsed 0.038 ms (3.844 ms / 100) 3.838 -> 3.836 ( -0.05%) [ +0.00% +0.03% +0.05% / -0.05% +0.78% +0.83%] index_select strided 3 : Elapsed 0.038 ms (3.838 ms / 100) 3.854 -> 3.854 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.65% +0.60%] index_select strided 5 : Elapsed 0.039 ms (3.855 ms / 100) 3.844 -> 3.847 ( +0.08%) [ +0.00% +0.00% +0.05% / +0.08% +0.83% +0.86%] index_select strided 7 : Elapsed 0.038 ms (3.844 ms / 100) 3.860 -> 3.861 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.67% +0.70%] index_select strided 8 : Elapsed 0.039 ms (3.861 ms / 100) 3.847 -> 3.849 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.68% +0.70%] index_select strided 16 : Elapsed 0.038 ms (3.847 ms / 100) 3.867 -> 3.867 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.62% +0.62%] index_select random : Elapsed 0.039 ms (3.871 ms / 100) 3.836 -> 3.838 ( +0.05%) [ +0.03% +0.08% +0.00% / +0.05% +0.55% +0.52%] index_select random_sorted : Elapsed 0.038 ms (3.837 ms / 100) 3.845 -> 3.847 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.47% +0.60%] index_select perm : Elapsed 0.038 ms (3.846 ms / 100) 3.862 -> 3.865 ( +0.08%) [ +0.00% +0.13% +0.10% / +0.08% +0.52% +0.52%] index_select perm_sorted : Elapsed 0.039 ms (3.862 ms / 100) B = [5, 16, 40, 4] (stride (1, 5, 80, 3200)) A = [5, 20, 40, 4] (stride (1, 5, 100, 4000)) dim = 1 4.290 -> 4.289 ( -0.02%) [ +0.05% +0.00% +0.00% / -0.02% +0.54% +0.42%] index_select const : Elapsed 0.043 ms (4.292 ms / 100) 4.282 -> 4.281 ( -0.02%) [ +0.00% +0.05% +0.14% / -0.02% +0.65% +0.54%] index_select wrap : Elapsed 0.043 ms (4.282 ms / 100) 4.291 -> 4.292 ( +0.02%) [ +0.09% +0.00% +0.05% / +0.02% +0.63% +0.58%] index_select linear : Elapsed 0.043 ms (4.295 ms / 100) 4.290 -> 4.294 ( +0.09%) [ +0.00% +0.05% +0.02% / +0.09% +0.54% +0.56%] index_select reverse : Elapsed 0.043 ms (4.290 ms / 100) 4.286 -> 4.288 ( +0.05%) [ +0.02% +0.00% +0.00% / +0.05% +0.63% +0.63%] index_select skip64 : Elapsed 0.043 ms (4.287 ms / 100) 4.290 -> 4.294 ( +0.09%) [ +0.07% +0.00% +0.09% / +0.09% +0.40% +0.56%] index_select skip256 : Elapsed 0.043 ms (4.293 ms / 100) 4.285 -> 4.288 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.61% +0.72%] index_select spread : Elapsed 0.043 ms (4.291 ms / 100) 4.276 -> 4.277 ( +0.02%) [ +0.00% +0.07% +0.02% / +0.02% +0.56% +0.56%] index_select strided 3 : Elapsed 0.043 ms (4.276 ms / 100) 4.285 -> 4.285 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.63% +0.47%] index_select strided 5 : Elapsed 0.043 ms (4.285 ms / 100) 4.274 -> 4.276 ( +0.05%) [ +0.00% +0.02% +0.05% / +0.05% +0.80% +0.73%] index_select strided 7 : Elapsed 0.043 ms (4.274 ms / 100) 4.273 -> 4.270 ( -0.07%) [ +0.09% +0.00% +0.09% / -0.07% +0.54% +0.66%] index_select strided 8 : Elapsed 0.043 ms (4.277 ms / 100) 4.280 -> 4.279 ( -0.02%) [ +0.07% +0.05% +0.00% / -0.02% +0.54% +0.54%] index_select strided 16 : Elapsed 0.043 ms (4.283 ms / 100) 4.268 -> 4.267 ( -0.02%) [ +0.00% +0.07% +0.00% / -0.02% +0.66% +0.59%] index_select random : Elapsed 0.043 ms (4.268 ms / 100) 4.280 -> 4.281 ( +0.02%) [ +0.16% +0.00% +0.14% / +0.02% +0.63% +0.58%] index_select random_sorted : Elapsed 0.043 ms (4.287 ms / 100) 4.278 -> 4.281 ( +0.07%) [ +0.09% +0.02% +0.00% / +0.07% +0.65% +0.58%] index_select perm : Elapsed 0.043 ms (4.282 ms / 100) 4.277 -> 4.278 ( +0.02%) [ +0.02% +0.12% +0.00% / +0.02% +0.72% +0.94%] index_select perm_sorted : Elapsed 0.043 ms (4.278 ms / 100) out_shape = [5, 20, 16, 4] in_shape = [5, 20, 40, 4] idx_dim = 2 B = [5, 20, 16, 4] (stride (1280, 1, 80, 20)) A = [5, 20, 40, 4] (stride (40, 200, 1, 4000)) dim = 2 4.254 -> 4.253 ( -0.02%) [ +0.09% +0.12% +0.00% / -0.02% +0.66% +0.54%] index_select const : Elapsed 0.043 ms (4.258 ms / 100) 4.277 -> 4.281 ( +0.09%) [ +0.00% +0.14% +0.14% / +0.09% +0.65% +0.47%] index_select wrap : Elapsed 0.043 ms (4.277 ms / 100) 4.260 -> 4.257 ( -0.07%) [ +0.16% +0.00% +0.05% / -0.07% +0.49% +0.56%] index_select linear : Elapsed 0.043 ms (4.267 ms / 100) 4.275 -> 4.278 ( +0.07%) [ +0.05% +0.00% +0.05% / +0.07% +0.63% +0.44%] index_select reverse : Elapsed 0.043 ms (4.277 ms / 100) 4.276 -> 4.281 ( +0.12%) [ +0.00% +0.02% +0.09% / +0.12% +0.37% +0.61%] index_select skip64 : Elapsed 0.043 ms (4.276 ms / 100) 4.250 -> 4.250 ( +0.00%) [ +0.19% +0.16% +0.00% / +0.00% +0.64% +0.61%] index_select skip256 : Elapsed 0.043 ms (4.258 ms / 100) 4.267 -> 4.272 ( +0.12%) [ +0.07% +0.12% +0.00% / +0.12% +0.45% +0.45%] index_select spread : Elapsed 0.043 ms (4.270 ms / 100) 4.289 -> 4.290 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.44% +0.56%] index_select strided 3 : Elapsed 0.043 ms (4.292 ms / 100) 4.261 -> 4.269 ( +0.19%) [ +0.05% +0.19% +0.00% / +0.19% +0.45% +1.01%] index_select strided 5 : Elapsed 0.043 ms (4.263 ms / 100) 4.274 -> 4.275 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.33% +0.40%] index_select strided 7 : Elapsed 0.043 ms (4.275 ms / 100) 4.296 -> 4.297 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.42% +0.47%] index_select strided 8 : Elapsed 0.043 ms (4.296 ms / 100) 4.265 -> 4.271 ( +0.14%) [ +0.00% +0.09% +0.07% / +0.14% +0.45% +0.40%] index_select strided 16 : Elapsed 0.043 ms (4.265 ms / 100) 4.271 -> 4.275 ( +0.09%) [ +0.07% +0.16% +0.00% / +0.09% +0.47% +0.42%] index_select random : Elapsed 0.043 ms (4.274 ms / 100) 4.282 -> 4.276 ( -0.14%) [ +0.09% +0.00% +0.00% / -0.14% +0.54% +0.68%] index_select random_sorted : Elapsed 0.043 ms (4.286 ms / 100) 4.270 -> 4.272 ( +0.05%) [ +0.00% +0.02% +0.05% / +0.05% +0.42% +0.44%] index_select perm : Elapsed 0.043 ms (4.270 ms / 100) 4.273 -> 4.272 ( -0.02%) [ +0.05% +0.00% +0.07% / -0.02% +0.44% +0.44%] index_select perm_sorted : Elapsed 0.043 ms (4.275 ms / 100) B = [5, 20, 16, 4] (stride (1280, 1, 20, 320)) A = [5, 20, 40, 4] (stride (3200, 160, 4, 1)) dim = 2 3.633 -> 3.636 ( +0.08%) [ +0.00% +0.06% +0.14% / +0.08% +0.88% +0.88%] index_select const : Elapsed 0.036 ms (3.633 ms / 100) 3.611 -> 3.615 ( +0.11%) [ +0.00% +0.03% +0.03% / +0.11% +0.91% +0.75%] index_select wrap : Elapsed 0.036 ms (3.611 ms / 100) 3.626 -> 3.633 ( +0.19%) [ +0.00% +0.08% +0.17% / +0.19% +0.72% +0.77%] index_select linear : Elapsed 0.036 ms (3.626 ms / 100) 3.612 -> 3.613 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.97% +0.78%] index_select reverse : Elapsed 0.036 ms (3.612 ms / 100) 3.610 -> 3.612 ( +0.06%) [ +0.00% +0.08% +0.19% / +0.06% +0.91% +0.80%] index_select skip64 : Elapsed 0.036 ms (3.610 ms / 100) 3.630 -> 3.637 ( +0.19%) [ +0.06% +0.00% +0.00% / +0.19% +0.91% +0.88%] index_select skip256 : Elapsed 0.036 ms (3.632 ms / 100) 3.608 -> 3.613 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.80% +0.89%] index_select spread : Elapsed 0.036 ms (3.613 ms / 100) 3.607 -> 3.607 ( +0.00%) [ +0.11% +0.00% +0.08% / +0.00% +0.64% +0.89%] index_select strided 3 : Elapsed 0.036 ms (3.611 ms / 100) 3.606 -> 3.608 ( +0.06%) [ +0.00% +0.03% +0.03% / +0.06% +0.69% +0.53%] index_select strided 5 : Elapsed 0.036 ms (3.606 ms / 100) 3.596 -> 3.600 ( +0.11%) [ +0.00% +0.11% +0.11% / +0.11% +0.81% +0.78%] index_select strided 7 : Elapsed 0.036 ms (3.596 ms / 100) 3.614 -> 3.612 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.86% +0.77%] index_select strided 8 : Elapsed 0.036 ms (3.614 ms / 100) 3.594 -> 3.599 ( +0.14%) [ +0.11% +0.08% +0.00% / +0.14% +0.75% +0.78%] index_select strided 16 : Elapsed 0.036 ms (3.598 ms / 100) 3.605 -> 3.606 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.78% +0.80%] index_select random : Elapsed 0.036 ms (3.605 ms / 100) 3.604 -> 3.613 ( +0.25%) [ +0.06% +0.00% +0.25% / +0.25% +0.83% +0.86%] index_select random_sorted : Elapsed 0.036 ms (3.606 ms / 100) 3.613 -> 3.614 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.72% +0.75%] index_select perm : Elapsed 0.036 ms (3.614 ms / 100) 3.621 -> 3.623 ( +0.06%) [ +0.00% +0.03% +0.00% / +0.06% +0.80% +0.69%] index_select perm_sorted : Elapsed 0.036 ms (3.621 ms / 100) B = [5, 20, 16, 4] (stride (1280, 1, 20, 320)) A = [5, 20, 40, 4] (stride (1, 800, 5, 200)) dim = 2 4.268 -> 4.275 ( +0.16%) [ +0.00% +0.16% +0.19% / +0.16% +0.91% +0.80%] index_select const : Elapsed 0.043 ms (4.268 ms / 100) 4.276 -> 4.278 ( +0.05%) [ +0.07% +0.09% +0.00% / +0.05% +0.51% +0.49%] index_select wrap : Elapsed 0.043 ms (4.279 ms / 100) 4.271 -> 4.271 ( +0.00%) [ +0.02% +0.00% +0.14% / +0.00% +0.52% +0.47%] index_select linear : Elapsed 0.043 ms (4.272 ms / 100) 4.274 -> 4.274 ( +0.00%) [ +0.30% +0.12% +0.00% / +0.00% +0.80% +0.61%] index_select reverse : Elapsed 0.043 ms (4.287 ms / 100) 4.268 -> 4.267 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.54% +0.59%] index_select skip64 : Elapsed 0.043 ms (4.269 ms / 100) 4.274 -> 4.275 ( +0.02%) [ +0.00% +0.07% +0.09% / +0.02% +0.42% +0.66%] index_select skip256 : Elapsed 0.043 ms (4.274 ms / 100) 4.273 -> 4.282 ( +0.21%) [ +0.00% +0.09% +0.12% / +0.21% +0.44% +0.61%] index_select spread : Elapsed 0.043 ms (4.273 ms / 100) 4.268 -> 4.271 ( +0.07%) [ +0.05% +0.00% +0.09% / +0.07% +0.45% +0.40%] index_select strided 3 : Elapsed 0.043 ms (4.270 ms / 100) 4.267 -> 4.267 ( +0.00%) [ +0.14% +0.23% +0.00% / +0.00% +0.52% +0.54%] index_select strided 5 : Elapsed 0.043 ms (4.273 ms / 100) 4.267 -> 4.269 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.33% +0.40%] index_select strided 7 : Elapsed 0.043 ms (4.267 ms / 100) 4.267 -> 4.272 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.12% +0.47% +0.45%] index_select strided 8 : Elapsed 0.043 ms (4.272 ms / 100) 4.280 -> 4.280 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.35% +0.40%] index_select strided 16 : Elapsed 0.043 ms (4.282 ms / 100) 4.276 -> 4.279 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.37% +0.49%] index_select random : Elapsed 0.043 ms (4.279 ms / 100) 4.270 -> 4.275 ( +0.12%) [ +0.09% +0.09% +0.00% / +0.12% +0.42% +0.49%] index_select random_sorted : Elapsed 0.043 ms (4.274 ms / 100) 4.260 -> 4.261 ( +0.02%) [ +0.02% +0.09% +0.00% / +0.02% +0.38% +0.40%] index_select perm : Elapsed 0.043 ms (4.261 ms / 100) 4.268 -> 4.275 ( +0.16%) [ +0.23% +0.00% +0.09% / +0.16% +0.40% +0.40%] index_select perm_sorted : Elapsed 0.043 ms (4.278 ms / 100) B = [5, 20, 16, 4] (stride (16, 320, 1, 80)) A = [5, 20, 40, 4] (stride (80, 1, 400, 20)) dim = 2 3.863 -> 3.861 ( -0.05%) [ +0.03% +0.00% +0.00% / -0.05% +0.54% +0.41%] index_select const : Elapsed 0.039 ms (3.864 ms / 100) 3.816 -> 3.819 ( +0.08%) [ +0.05% +0.03% +0.00% / +0.08% +0.73% +0.71%] index_select wrap : Elapsed 0.038 ms (3.818 ms / 100) 3.814 -> 3.816 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.71% +0.71%] index_select linear : Elapsed 0.038 ms (3.814 ms / 100) 3.814 -> 3.815 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.79% +0.89%] index_select reverse : Elapsed 0.038 ms (3.814 ms / 100) 3.840 -> 3.843 ( +0.08%) [ +0.03% +0.00% +0.05% / +0.08% +0.70% +0.70%] index_select skip64 : Elapsed 0.038 ms (3.841 ms / 100) 3.858 -> 3.861 ( +0.08%) [ +0.10% +0.00% +0.05% / +0.08% +0.67% +0.80%] index_select skip256 : Elapsed 0.039 ms (3.862 ms / 100) 3.812 -> 3.812 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.71% +0.66%] index_select spread : Elapsed 0.038 ms (3.812 ms / 100) 3.812 -> 3.812 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.68% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.814 ms / 100) 3.821 -> 3.823 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.68% +0.68%] index_select strided 5 : Elapsed 0.038 ms (3.821 ms / 100) 3.806 -> 3.806 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.71% +0.68%] index_select strided 7 : Elapsed 0.038 ms (3.807 ms / 100) 3.827 -> 3.825 ( -0.05%) [ +0.00% +0.00% +0.29% / -0.05% +1.10% +0.81%] index_select strided 8 : Elapsed 0.038 ms (3.827 ms / 100) 3.833 -> 3.832 ( -0.03%) [ +0.18% +0.00% +0.00% / -0.03% +0.73% +0.78%] index_select strided 16 : Elapsed 0.038 ms (3.840 ms / 100) 3.818 -> 3.819 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.79% +0.79%] index_select random : Elapsed 0.038 ms (3.818 ms / 100) 3.805 -> 3.807 ( +0.05%) [ +0.08% +0.00% +0.00% / +0.05% +0.79% +0.79%] index_select random_sorted : Elapsed 0.038 ms (3.808 ms / 100) 3.819 -> 3.826 ( +0.18%) [ +0.16% +0.00% +0.13% / +0.18% +0.86% +1.00%] index_select perm : Elapsed 0.038 ms (3.825 ms / 100) 3.820 -> 3.821 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.76% +0.79%] index_select perm_sorted : Elapsed 0.038 ms (3.823 ms / 100) B = [5, 20, 16, 4] (stride (80, 1, 400, 20)) A = [5, 20, 40, 4] (stride (80, 4, 400, 1)) dim = 2 3.498 -> 3.498 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.66%] index_select const : Elapsed 0.035 ms (3.498 ms / 100) 3.517 -> 3.517 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.54% +0.48%] index_select wrap : Elapsed 0.035 ms (3.517 ms / 100) 3.507 -> 3.509 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.54% +0.68%] index_select linear : Elapsed 0.035 ms (3.508 ms / 100) 3.500 -> 3.501 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.57% +0.60%] index_select reverse : Elapsed 0.035 ms (3.501 ms / 100) 3.503 -> 3.503 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.80% +0.80%] index_select skip64 : Elapsed 0.035 ms (3.503 ms / 100) 3.497 -> 3.499 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.51% +0.51%] index_select skip256 : Elapsed 0.035 ms (3.497 ms / 100) 3.504 -> 3.505 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.49% +0.37%] index_select spread : Elapsed 0.035 ms (3.505 ms / 100) 3.517 -> 3.517 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.48% +0.51%] index_select strided 3 : Elapsed 0.035 ms (3.518 ms / 100) 3.501 -> 3.503 ( +0.06%) [ +0.00% +0.06% +0.03% / +0.06% +0.51% +0.37%] index_select strided 5 : Elapsed 0.035 ms (3.501 ms / 100) 3.510 -> 3.511 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.51% +0.57%] index_select strided 7 : Elapsed 0.035 ms (3.510 ms / 100) 3.509 -> 3.514 ( +0.14%) [ +0.11% +0.00% +0.11% / +0.14% +0.63% +0.68%] index_select strided 8 : Elapsed 0.035 ms (3.513 ms / 100) 3.500 -> 3.500 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.43% +0.43%] index_select strided 16 : Elapsed 0.035 ms (3.500 ms / 100) 3.504 -> 3.505 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.83% +0.80%] index_select random : Elapsed 0.035 ms (3.504 ms / 100) 3.500 -> 3.503 ( +0.09%) [ +0.03% +0.00% +0.00% / +0.09% +0.43% +0.46%] index_select random_sorted : Elapsed 0.035 ms (3.501 ms / 100) 3.512 -> 3.515 ( +0.09%) [ +0.00% +0.03% +0.00% / +0.09% +0.40% +0.48%] index_select perm : Elapsed 0.035 ms (3.512 ms / 100) 3.504 -> 3.506 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.34% +0.29%] index_select perm_sorted : Elapsed 0.035 ms (3.505 ms / 100) B = [5, 20, 16, 4] (stride (1, 20, 400, 5)) A = [5, 20, 40, 4] (stride (20, 1, 400, 100)) dim = 2 3.571 -> 3.573 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.78% +0.81%] index_select const : Elapsed 0.036 ms (3.571 ms / 100) 3.540 -> 3.541 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.62% +0.68%] index_select wrap : Elapsed 0.035 ms (3.540 ms / 100) 3.554 -> 3.555 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.65% +0.68%] index_select linear : Elapsed 0.036 ms (3.554 ms / 100) 3.557 -> 3.556 ( -0.03%) [ +0.03% +0.08% +0.00% / -0.03% +0.70% +0.79%] index_select reverse : Elapsed 0.036 ms (3.558 ms / 100) 3.538 -> 3.541 ( +0.08%) [ +0.06% +0.08% +0.00% / +0.08% +0.65% +0.59%] index_select skip64 : Elapsed 0.035 ms (3.540 ms / 100) 3.569 -> 3.569 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.90% +0.84%] index_select skip256 : Elapsed 0.036 ms (3.569 ms / 100) 3.541 -> 3.546 ( +0.14%) [ +0.14% +0.00% +0.11% / +0.14% +0.85% +0.96%] index_select spread : Elapsed 0.035 ms (3.546 ms / 100) 3.534 -> 3.535 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.82% +0.76%] index_select strided 3 : Elapsed 0.035 ms (3.535 ms / 100) 3.555 -> 3.567 ( +0.34%) [ +0.03% +0.03% +0.00% / +0.34% +0.68% +0.76%] index_select strided 5 : Elapsed 0.036 ms (3.556 ms / 100) 3.547 -> 3.542 ( -0.14%) [ +0.00% +0.03% +0.06% / -0.14% +0.82% +0.76%] index_select strided 7 : Elapsed 0.035 ms (3.547 ms / 100) 3.569 -> 3.573 ( +0.11%) [ +0.03% +0.00% +0.03% / +0.11% +0.81% +0.81%] index_select strided 8 : Elapsed 0.036 ms (3.570 ms / 100) 3.560 -> 3.561 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.67% +0.73%] index_select strided 16 : Elapsed 0.036 ms (3.560 ms / 100) 3.557 -> 3.557 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.76% +0.70%] index_select random : Elapsed 0.036 ms (3.557 ms / 100) 3.542 -> 3.543 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.82% +0.90%] index_select random_sorted : Elapsed 0.035 ms (3.544 ms / 100) 3.532 -> 3.532 ( +0.00%) [ +0.03% +0.08% +0.00% / +0.00% +0.76% +0.71%] index_select perm : Elapsed 0.035 ms (3.533 ms / 100) 3.532 -> 3.529 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.65% +0.65%] index_select perm_sorted : Elapsed 0.035 ms (3.532 ms / 100) B = [5, 20, 16, 4] (stride (320, 16, 1, 1600)) A = [5, 20, 40, 4] (stride (800, 40, 1, 4000)) dim = 2 1.435 -> 1.434 ( -0.07%) [ +0.14% +0.00% +0.00% / -0.07% +0.21% +0.14%] index_select const : Elapsed 0.014 ms (1.437 ms / 100) 1.443 -> 1.444 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.42% +0.35%] index_select wrap : Elapsed 0.014 ms (1.443 ms / 100) 1.444 -> 1.445 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.28% +0.07%] index_select linear : Elapsed 0.014 ms (1.445 ms / 100) 1.459 -> 1.460 ( +0.07%) [ +0.27% +0.00% +0.07% / +0.21% +0.14% +0.07%] index_select reverse : Elapsed 0.015 ms (1.463 ms / 100) 1.436 -> 1.432 ( -0.28%) [ +0.07% +0.00% +0.00% / -0.28% +0.00% -0.07%] index_select skip64 : Elapsed 0.014 ms (1.437 ms / 100) 1.436 -> 1.435 ( -0.07%) [ +0.07% +0.00% +0.00% / +0.07% -0.07% +0.07%] index_select skip256 : Elapsed 0.014 ms (1.437 ms / 100) 1.446 -> 1.447 ( +0.07%) [ +0.00% +0.21% +0.14% / +0.07% +0.07% +0.07%] index_select spread : Elapsed 0.014 ms (1.446 ms / 100) 1.465 -> 1.463 ( -0.14%) [ +0.34% +0.00% +0.07% / -0.14% +0.00% +0.00%] index_select strided 3 : Elapsed 0.015 ms (1.470 ms / 100) 1.443 -> 1.445 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.21% +0.28% +0.14%] index_select strided 5 : Elapsed 0.014 ms (1.444 ms / 100) 1.440 -> 1.441 ( +0.07%) [ +0.21% +0.00% +0.07% / +0.07% +0.07% +0.07%] index_select strided 7 : Elapsed 0.014 ms (1.443 ms / 100) 1.444 -> 1.443 ( -0.07%) [ +0.00% +0.35% +0.00% / +0.00% -0.07% +0.14%] index_select strided 8 : Elapsed 0.014 ms (1.444 ms / 100) 1.437 -> 1.439 ( +0.14%) [ +0.00% +0.14% +0.07% / +0.14% +0.28% +0.28%] index_select strided 16 : Elapsed 0.014 ms (1.437 ms / 100) 1.443 -> 1.446 ( +0.21%) [ +0.14% +0.00% +0.28% / +0.21% +0.35% +0.21%] index_select random : Elapsed 0.014 ms (1.445 ms / 100) 1.447 -> 1.446 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.00% +0.14%] index_select random_sorted : Elapsed 0.014 ms (1.447 ms / 100) 1.446 -> 1.445 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.21% +0.07%] index_select perm : Elapsed 0.014 ms (1.447 ms / 100) 1.451 -> 1.450 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.34% +0.34%] index_select perm_sorted : Elapsed 0.015 ms (1.452 ms / 100) out_shape = [5, 20, 40, 16] in_shape = [5, 20, 40, 4] idx_dim = 3 B = [5, 20, 40, 16] (stride (12800, 16, 320, 1)) A = [5, 20, 40, 4] (stride (4, 800, 20, 1)) dim = 3 2.452 -> 2.456 ( +0.16%) [ +0.00% +0.41% +0.12% / +0.16% +0.73% +0.86%] index_add_ linear : Elapsed 0.025 ms (2.452 ms / 100) 2.424 -> 2.427 ( +0.12%) [ +0.33% +0.00% +0.08% / +0.12% +0.58% +0.70%] index_copy_ linear : Elapsed 0.024 ms (2.432 ms / 100) 2.459 -> 2.456 ( -0.12%) [ +0.04% +0.00% +0.08% / -0.12% +0.08% +0.45%] index_add_ reverse : Elapsed 0.025 ms (2.460 ms / 100) 2.425 -> 2.429 ( +0.16%) [ +0.12% +0.16% +0.00% / +0.16% +0.66% +0.62%] index_copy_ reverse : Elapsed 0.024 ms (2.428 ms / 100) 2.493 -> 2.489 ( -0.16%) [ +0.24% +0.00% +0.04% / -0.16% +0.68% +0.64%] index_add_ spread : Elapsed 0.025 ms (2.499 ms / 100) 2.526 -> 2.522 ( -0.16%) [ +0.00% +0.08% +0.00% / -0.16% +0.63% +0.59%] index_copy_ spread : Elapsed 0.025 ms (2.526 ms / 100) 2.514 -> 2.520 ( +0.24%) [ +0.32% +0.12% +0.00% / +0.24% +0.56% +0.56%] index_add_ strided 3 : Elapsed 0.025 ms (2.522 ms / 100) 2.542 -> 2.543 ( +0.04%) [ +0.35% +0.24% +0.00% / +0.04% +0.67% +0.51%] index_copy_ strided 3 : Elapsed 0.026 ms (2.551 ms / 100) 2.512 -> 2.518 ( +0.24%) [ +0.00% +0.08% +0.20% / +0.24% +0.60% +0.48%] index_add_ strided 5 : Elapsed 0.025 ms (2.512 ms / 100) 2.540 -> 2.547 ( +0.28%) [ +0.12% +0.04% +0.00% / +0.28% +0.67% +0.71%] index_copy_ strided 5 : Elapsed 0.025 ms (2.543 ms / 100) 2.487 -> 2.492 ( +0.20%) [ +0.00% +0.32% +0.24% / +0.20% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.025 ms (2.487 ms / 100) 2.524 -> 2.527 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.025 ms (2.525 ms / 100) 2.489 -> 2.492 ( +0.12%) [ +0.00% +0.40% +0.36% / +0.12% +0.28% +0.52%] index_add_ perm : Elapsed 0.025 ms (2.489 ms / 100) 2.520 -> 2.524 ( +0.16%) [ +0.48% +0.00% +0.28% / +0.16% +0.67% +0.71%] index_copy_ perm : Elapsed 0.025 ms (2.532 ms / 100) 2.501 -> 2.502 ( +0.04%) [ +0.32% +0.00% +0.20% / +0.04% +0.60% +0.56%] index_add_ perm_sorted : Elapsed 0.025 ms (2.509 ms / 100) 2.524 -> 2.525 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.04% +0.40% +0.44%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.527 ms / 100) 5.333 -> 5.340 ( +0.13%) [ +0.09% +0.06% +0.00% / +0.13% +0.56% +0.60%] index_select const : Elapsed 0.053 ms (5.338 ms / 100) 5.355 -> 5.361 ( +0.11%) [ +0.02% +0.06% +0.00% / +0.11% +0.45% +0.52%] index_select wrap : Elapsed 0.054 ms (5.356 ms / 100) 5.342 -> 5.344 ( +0.04%) [ +0.07% +0.00% +0.07% / +0.04% +0.49% +0.79%] index_select linear : Elapsed 0.053 ms (5.346 ms / 100) 5.415 -> 5.416 ( +0.02%) [ +0.18% +0.00% +0.20% / +0.02% +0.33% +0.52%] index_select reverse : Elapsed 0.054 ms (5.425 ms / 100) 5.412 -> 5.422 ( +0.18%) [ +0.24% +0.13% +0.00% / +0.18% +0.52% +0.57%] index_select skip64 : Elapsed 0.054 ms (5.425 ms / 100) 5.343 -> 5.344 ( +0.02%) [ +0.02% +0.04% +0.00% / +0.02% +0.49% +0.36%] index_select skip256 : Elapsed 0.053 ms (5.344 ms / 100) 5.355 -> 5.352 ( -0.06%) [ +0.17% +0.13% +0.00% / -0.06% +0.50% +0.45%] index_select spread : Elapsed 0.054 ms (5.364 ms / 100) 5.344 -> 5.349 ( +0.09%) [ +0.00% +0.15% +0.06% / +0.09% +0.43% +0.47%] index_select strided 3 : Elapsed 0.053 ms (5.344 ms / 100) 5.414 -> 5.419 ( +0.09%) [ +0.22% +0.20% +0.00% / +0.09% +0.39% +0.57%] index_select random : Elapsed 0.054 ms (5.426 ms / 100) 5.335 -> 5.342 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.52% +0.54%] index_select random_sorted : Elapsed 0.053 ms (5.339 ms / 100) B = [5, 20, 40, 16] (stride (12800, 1, 320, 20)) A = [5, 20, 40, 4] (stride (1, 800, 5, 200)) dim = 3 2.441 -> 2.444 ( +0.12%) [ +0.25% +0.00% +0.00% / +0.12% +0.33% +0.29%] index_add_ linear : Elapsed 0.024 ms (2.447 ms / 100) 2.400 -> 2.402 ( +0.08%) [ +0.25% +0.00% +0.13% / +0.08% +0.37% +0.46%] index_copy_ linear : Elapsed 0.024 ms (2.406 ms / 100) 2.448 -> 2.450 ( +0.08%) [ +0.25% +0.29% +0.00% / +0.08% +0.25% +0.25%] index_add_ reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.406 -> 2.408 ( +0.08%) [ +0.00% +0.12% +0.33% / +0.08% +0.21% +0.25%] index_copy_ reverse : Elapsed 0.024 ms (2.406 ms / 100) 2.429 -> 2.424 ( -0.21%) [ +0.25% +0.00% +0.00% / +0.00% -0.21% +0.12%] index_add_ spread : Elapsed 0.024 ms (2.435 ms / 100) 2.398 -> 2.400 ( +0.08%) [ +0.33% +0.00% +0.08% / +0.13% +0.08% +0.17%] index_copy_ spread : Elapsed 0.024 ms (2.406 ms / 100) 2.414 -> 2.418 ( +0.17%) [ +0.08% +0.00% +0.12% / +0.17% +0.25% +0.41%] index_add_ strided 3 : Elapsed 0.024 ms (2.416 ms / 100) 2.387 -> 2.387 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.21% +0.34%] index_copy_ strided 3 : Elapsed 0.024 ms (2.389 ms / 100) 2.445 -> 2.443 ( -0.08%) [ +0.08% +0.12% +0.00% / -0.08% +0.33% +0.08%] index_add_ strided 5 : Elapsed 0.024 ms (2.447 ms / 100) 2.416 -> 2.418 ( +0.08%) [ +0.17% +0.00% +0.25% / +0.08% +0.29% +0.21%] index_copy_ strided 5 : Elapsed 0.024 ms (2.420 ms / 100) 2.433 -> 2.435 ( +0.08%) [ +0.00% +0.12% +0.00% / +0.08% +0.33% +0.33%] index_add_ strided 7 : Elapsed 0.024 ms (2.433 ms / 100) 2.404 -> 2.412 ( +0.33%) [ +0.00% +0.33% +0.12% / +0.37% +0.33% +0.42%] index_copy_ strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.426 -> 2.426 ( +0.00%) [ +0.08% +0.00% +0.21% / +0.00% +0.33% +0.12%] index_add_ perm : Elapsed 0.024 ms (2.428 ms / 100) 2.395 -> 2.398 ( +0.13%) [ +0.21% +0.08% +0.00% / +0.13% +0.46% +0.33%] index_copy_ perm : Elapsed 0.024 ms (2.400 ms / 100) 2.434 -> 2.435 ( +0.04%) [ +0.00% +0.00% +0.16% / +0.04% +0.25% +0.41%] index_add_ perm_sorted : Elapsed 0.024 ms (2.434 ms / 100) 2.405 -> 2.405 ( +0.00%) [ +0.00% +0.29% +0.12% / +0.00% +0.37% +0.17%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.405 ms / 100) 5.380 -> 5.383 ( +0.06%) [ +0.11% +0.11% +0.00% / +0.06% +0.45% +0.45%] index_select const : Elapsed 0.054 ms (5.386 ms / 100) 5.385 -> 5.389 ( +0.07%) [ +0.00% +0.13% +0.07% / +0.07% +0.11% +0.30%] index_select wrap : Elapsed 0.054 ms (5.385 ms / 100) 5.387 -> 5.396 ( +0.17%) [ +0.07% +0.11% +0.00% / +0.17% +0.37% +0.35%] index_select linear : Elapsed 0.054 ms (5.391 ms / 100) 5.399 -> 5.404 ( +0.09%) [ +0.13% +0.00% +0.07% / +0.09% +0.57% +0.43%] index_select reverse : Elapsed 0.054 ms (5.406 ms / 100) 5.382 -> 5.393 ( +0.20%) [ +0.09% +0.19% +0.00% / +0.20% +0.32% +0.32%] index_select skip64 : Elapsed 0.054 ms (5.387 ms / 100) 5.383 -> 5.387 ( +0.07%) [ +0.02% +0.11% +0.00% / +0.07% +0.30% +0.33%] index_select skip256 : Elapsed 0.054 ms (5.384 ms / 100) 5.388 -> 5.391 ( +0.06%) [ +0.04% +0.00% +0.13% / +0.06% +0.39% +0.35%] index_select spread : Elapsed 0.054 ms (5.390 ms / 100) 5.403 -> 5.397 ( -0.11%) [ +0.06% +0.04% +0.00% / -0.11% +0.22% +0.24%] index_select strided 3 : Elapsed 0.054 ms (5.406 ms / 100) 5.393 -> 5.388 ( -0.09%) [ +0.09% +0.04% +0.00% / -0.09% +0.20% +0.15%] index_select random : Elapsed 0.054 ms (5.398 ms / 100) 5.396 -> 5.408 ( +0.22%) [ +0.11% +0.13% +0.00% / +0.22% +0.46% +0.44%] index_select random_sorted : Elapsed 0.054 ms (5.402 ms / 100) B = [5, 20, 40, 16] (stride (12800, 40, 1, 800)) A = [5, 20, 40, 4] (stride (3200, 4, 80, 1)) dim = 3 2.486 -> 2.488 ( +0.08%) [ +0.00% +0.16% +0.20% / +0.08% +0.60% +0.44%] index_add_ linear : Elapsed 0.025 ms (2.486 ms / 100) 2.424 -> 2.429 ( +0.21%) [ +0.12% +0.00% +0.04% / +0.21% +0.58% +0.62%] index_copy_ linear : Elapsed 0.024 ms (2.427 ms / 100) 2.483 -> 2.487 ( +0.16%) [ +0.24% +0.00% +0.12% / +0.16% +0.52% +0.52%] index_add_ reverse : Elapsed 0.025 ms (2.489 ms / 100) 2.426 -> 2.430 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.41% +0.41%] index_copy_ reverse : Elapsed 0.024 ms (2.426 ms / 100) 2.486 -> 2.495 ( +0.36%) [ +0.04% +0.00% +0.20% / +0.36% +0.40% +0.40%] index_add_ spread : Elapsed 0.025 ms (2.487 ms / 100) 2.427 -> 2.428 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.58% +0.41%] index_copy_ spread : Elapsed 0.024 ms (2.429 ms / 100) 2.486 -> 2.489 ( +0.12%) [ +0.00% +0.28% +0.04% / +0.12% +0.52% +0.32%] index_add_ strided 3 : Elapsed 0.025 ms (2.486 ms / 100) 2.429 -> 2.428 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.49% +0.45%] index_copy_ strided 3 : Elapsed 0.024 ms (2.430 ms / 100) 2.491 -> 2.490 ( -0.04%) [ +0.04% +0.04% +0.00% / +0.00% -0.04% +0.08%] index_add_ strided 5 : Elapsed 0.025 ms (2.492 ms / 100) 2.430 -> 2.429 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.16% +0.25%] index_copy_ strided 5 : Elapsed 0.024 ms (2.430 ms / 100) 2.489 -> 2.491 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.12% +0.16%] index_add_ strided 7 : Elapsed 0.025 ms (2.489 ms / 100) 2.430 -> 2.431 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.12% +0.37%] index_copy_ strided 7 : Elapsed 0.024 ms (2.430 ms / 100) 2.485 -> 2.488 ( +0.12%) [ +0.24% +0.04% +0.00% / +0.24% +0.36% +0.12%] index_add_ perm : Elapsed 0.025 ms (2.491 ms / 100) 2.425 -> 2.429 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.16% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.429 ms / 100) 2.490 -> 2.490 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.00% +0.28% +0.56%] index_add_ perm_sorted : Elapsed 0.025 ms (2.491 ms / 100) 2.422 -> 2.428 ( +0.25%) [ +0.12% +0.08% +0.00% / +0.25% +0.37% +0.87%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.425 ms / 100) 5.489 -> 5.486 ( -0.05%) [ +0.05% +0.00% +0.02% / -0.05% +0.40% +0.47%] index_select const : Elapsed 0.055 ms (5.492 ms / 100) 5.489 -> 5.494 ( +0.09%) [ +0.00% +0.04% +0.09% / +0.09% +0.53% +0.47%] index_select wrap : Elapsed 0.055 ms (5.489 ms / 100) 5.490 -> 5.500 ( +0.18%) [ +0.00% +0.00% +0.05% / +0.18% +0.36% +0.55%] index_select linear : Elapsed 0.055 ms (5.490 ms / 100) 5.486 -> 5.493 ( +0.13%) [ +0.07% +0.00% +0.04% / +0.13% +0.38% +0.40%] index_select reverse : Elapsed 0.055 ms (5.490 ms / 100) 5.490 -> 5.491 ( +0.02%) [ +0.00% +0.00% +0.04% / +0.02% +0.29% +0.29%] index_select skip64 : Elapsed 0.055 ms (5.490 ms / 100) 5.498 -> 5.495 ( -0.05%) [ +0.00% +0.09% +0.13% / -0.05% +0.31% +0.44%] index_select skip256 : Elapsed 0.055 ms (5.498 ms / 100) 5.489 -> 5.492 ( +0.05%) [ +0.00% +0.11% +0.09% / +0.05% +0.27% +0.33%] index_select spread : Elapsed 0.055 ms (5.489 ms / 100) 5.496 -> 5.496 ( +0.00%) [ +0.20% +0.00% +0.00% / +0.00% +0.33% +0.35%] index_select strided 3 : Elapsed 0.055 ms (5.507 ms / 100) 5.491 -> 5.493 ( +0.04%) [ +0.00% +0.02% +0.00% / +0.04% +0.31% +0.24%] index_select random : Elapsed 0.055 ms (5.491 ms / 100) 5.488 -> 5.488 ( +0.00%) [ +0.07% +0.00% +0.02% / +0.00% +0.33% +0.47%] index_select random_sorted : Elapsed 0.055 ms (5.492 ms / 100) B = [5, 20, 40, 16] (stride (12800, 1, 20, 800)) A = [5, 20, 40, 4] (stride (20, 1, 400, 100)) dim = 3 2.492 -> 2.493 ( +0.04%) [ +0.08% +0.12% +0.00% / +0.04% +0.28% +0.56%] index_add_ linear : Elapsed 0.025 ms (2.494 ms / 100) 2.442 -> 2.441 ( -0.04%) [ +0.16% +0.00% +0.08% / -0.04% +0.20% +1.15%] index_copy_ linear : Elapsed 0.024 ms (2.446 ms / 100) 2.494 -> 2.495 ( +0.04%) [ +0.08% +0.00% +0.12% / +0.24% +0.16% +0.04%] index_add_ reverse : Elapsed 0.025 ms (2.496 ms / 100) 2.444 -> 2.445 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.04% +0.04%] index_copy_ reverse : Elapsed 0.024 ms (2.446 ms / 100) 2.493 -> 2.496 ( +0.12%) [ +0.12% +0.24% +0.00% / +0.12% +0.12% +0.24%] index_add_ spread : Elapsed 0.025 ms (2.496 ms / 100) 2.445 -> 2.443 ( -0.08%) [ +0.00% +0.00% +0.04% / +0.08% -0.08% +0.25%] index_copy_ spread : Elapsed 0.024 ms (2.445 ms / 100) 2.492 -> 2.498 ( +0.24%) [ +0.16% +0.00% +0.16% / +0.24% +0.32% +0.48%] index_add_ strided 3 : Elapsed 0.025 ms (2.496 ms / 100) 2.444 -> 2.443 ( -0.04%) [ +0.04% +0.00% +0.04% / +0.04% -0.04% +0.37%] index_copy_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.496 -> 2.497 ( +0.04%) [ +0.20% +0.00% +0.12% / +0.08% +0.04% +0.12%] index_add_ strided 5 : Elapsed 0.025 ms (2.501 ms / 100) 2.447 -> 2.450 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.20% +0.12% +0.20%] index_copy_ strided 5 : Elapsed 0.024 ms (2.447 ms / 100) 2.490 -> 2.495 ( +0.20%) [ +0.08% +0.00% +0.12% / +0.20% +0.32% +0.48%] index_add_ strided 7 : Elapsed 0.025 ms (2.492 ms / 100) 2.442 -> 2.443 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.12% +0.04% +0.53%] index_copy_ strided 7 : Elapsed 0.024 ms (2.443 ms / 100) 2.491 -> 2.491 ( +0.00%) [ +0.12% +0.16% +0.00% / +0.00% +0.12% +0.12%] index_add_ perm : Elapsed 0.025 ms (2.494 ms / 100) 2.445 -> 2.444 ( -0.04%) [ +0.08% +0.00% +0.00% / -0.04% +0.08% +0.08%] index_copy_ perm : Elapsed 0.024 ms (2.447 ms / 100) 2.497 -> 2.498 ( +0.04%) [ +0.00% +0.08% +0.16% / +0.04% +0.16% +0.12%] index_add_ perm_sorted : Elapsed 0.025 ms (2.497 ms / 100) 2.445 -> 2.449 ( +0.16%) [ +0.20% +0.16% +0.00% / +0.25% +0.16% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.450 ms / 100) 5.557 -> 5.559 ( +0.04%) [ +0.02% +0.11% +0.00% / +0.04% +0.43% +0.56%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.556 -> 5.557 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.22% +0.32%] index_select wrap : Elapsed 0.056 ms (5.559 ms / 100) 5.562 -> 5.560 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.20% +0.43%] index_select linear : Elapsed 0.056 ms (5.564 ms / 100) 5.541 -> 5.537 ( -0.07%) [ +0.02% +0.00% +0.00% / -0.07% +0.22% +0.23%] index_select reverse : Elapsed 0.055 ms (5.542 ms / 100) 5.562 -> 5.562 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.29% +0.38%] index_select skip64 : Elapsed 0.056 ms (5.564 ms / 100) 5.566 -> 5.567 ( +0.02%) [ +0.00% +0.05% +0.09% / +0.02% +0.25% +0.50%] index_select skip256 : Elapsed 0.056 ms (5.566 ms / 100) 5.548 -> 5.550 ( +0.04%) [ +0.07% +0.04% +0.00% / +0.04% +0.29% +0.43%] index_select spread : Elapsed 0.056 ms (5.552 ms / 100) 5.538 -> 5.538 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.20% +0.22%] index_select strided 3 : Elapsed 0.055 ms (5.538 ms / 100) 5.525 -> 5.531 ( +0.11%) [ +0.00% +0.09% +0.00% / +0.11% +0.27% +0.33%] index_select random : Elapsed 0.055 ms (5.525 ms / 100) 5.529 -> 5.532 ( +0.05%) [ +0.05% +0.04% +0.00% / +0.05% +0.22% +0.34%] index_select random_sorted : Elapsed 0.055 ms (5.532 ms / 100) B = [5, 20, 40, 16] (stride (12800, 1, 20, 800)) A = [5, 20, 40, 4] (stride (20, 1, 100, 4000)) dim = 3 2.499 -> 2.500 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.28% +0.48%] index_add_ linear : Elapsed 0.025 ms (2.499 ms / 100) 2.447 -> 2.444 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.12% +0.33% +0.29%] index_copy_ linear : Elapsed 0.024 ms (2.447 ms / 100) 2.501 -> 2.504 ( +0.12%) [ +0.00% +0.08% +0.20% / +0.12% +0.44% +0.44%] index_add_ reverse : Elapsed 0.025 ms (2.501 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.00% +0.29% +0.08% / +0.00% +0.29% +0.49%] index_copy_ reverse : Elapsed 0.024 ms (2.448 ms / 100) 2.499 -> 2.498 ( -0.04%) [ +0.12% +0.16% +0.00% / -0.04% +0.36% +0.40%] index_add_ spread : Elapsed 0.025 ms (2.502 ms / 100) 2.444 -> 2.450 ( +0.25%) [ +0.00% +0.25% +0.29% / +0.25% +0.70% +0.57%] index_copy_ spread : Elapsed 0.024 ms (2.444 ms / 100) 2.496 -> 2.499 ( +0.12%) [ +0.16% +0.00% +0.16% / +0.12% +0.44% +0.40%] index_add_ strided 3 : Elapsed 0.025 ms (2.500 ms / 100) 2.447 -> 2.449 ( +0.08%) [ +0.00% +0.08% +0.12% / +0.08% +0.49% +0.49%] index_copy_ strided 3 : Elapsed 0.024 ms (2.447 ms / 100) 2.500 -> 2.502 ( +0.08%) [ +0.24% +0.20% +0.00% / +0.12% +0.08% +0.12%] index_add_ strided 5 : Elapsed 0.025 ms (2.506 ms / 100) 2.451 -> 2.450 ( -0.04%) [ +0.12% +0.20% +0.00% / -0.04% +0.00% +0.33%] index_copy_ strided 5 : Elapsed 0.025 ms (2.454 ms / 100) 2.507 -> 2.505 ( -0.08%) [ +0.00% +0.00% +0.00% / +0.00% +0.24% -0.08%] index_add_ strided 7 : Elapsed 0.025 ms (2.507 ms / 100) 2.450 -> 2.453 ( +0.12%) [ +0.00% +0.04% +0.12% / +0.12% +0.78% +0.24%] index_copy_ strided 7 : Elapsed 0.025 ms (2.450 ms / 100) 2.503 -> 2.501 ( -0.08%) [ +0.12% +0.12% +0.00% / +0.12% -0.08% +0.08%] index_add_ perm : Elapsed 0.025 ms (2.506 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.12% +0.16% +0.00% / +0.16% +0.12% +0.20%] index_copy_ perm : Elapsed 0.025 ms (2.454 ms / 100) 2.505 -> 2.501 ( -0.16%) [ +0.16% +0.00% +0.00% / -0.04% -0.16% -0.12%] index_add_ perm_sorted : Elapsed 0.025 ms (2.509 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.00% +0.16% +0.16% / +0.12% +0.04% +0.04%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.452 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.50% +0.49%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.580 -> 5.587 ( +0.13%) [ +0.11% +0.07% +0.00% / +0.13% +0.38% +0.39%] index_select wrap : Elapsed 0.056 ms (5.586 ms / 100) 5.582 -> 5.587 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.09% +0.50% +0.48%] index_select linear : Elapsed 0.056 ms (5.587 ms / 100) 5.550 -> 5.553 ( +0.05%) [ +0.11% +0.00% +0.00% / +0.05% +0.41% +0.43%] index_select reverse : Elapsed 0.056 ms (5.556 ms / 100) 5.566 -> 5.566 ( +0.00%) [ +0.02% +0.04% +0.00% / +0.00% +0.36% +0.34%] index_select skip64 : Elapsed 0.056 ms (5.567 ms / 100) 5.563 -> 5.576 ( +0.23%) [ +0.16% +0.20% +0.00% / +0.23% +0.45% +0.45%] index_select skip256 : Elapsed 0.056 ms (5.572 ms / 100) 5.563 -> 5.573 ( +0.18%) [ +0.07% +0.04% +0.00% / +0.18% +0.27% +0.20%] index_select spread : Elapsed 0.056 ms (5.567 ms / 100) 5.572 -> 5.575 ( +0.05%) [ +0.07% +0.11% +0.00% / +0.05% +0.27% +0.22%] index_select strided 3 : Elapsed 0.056 ms (5.576 ms / 100) 5.560 -> 5.570 ( +0.18%) [ +0.05% +0.00% +0.05% / +0.18% +0.23% +0.18%] index_select random : Elapsed 0.056 ms (5.563 ms / 100) 5.555 -> 5.561 ( +0.11%) [ +0.04% +0.04% +0.00% / +0.11% +0.36% +0.29%] index_select random_sorted : Elapsed 0.056 ms (5.557 ms / 100) B = [5, 20, 40, 16] (stride (16, 3200, 80, 1)) A = [5, 20, 40, 4] (stride (1, 5, 100, 4000)) dim = 3 2.511 -> 2.512 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.16% +0.24% +0.04%] index_add_ linear : Elapsed 0.025 ms (2.512 ms / 100) 2.479 -> 2.484 ( +0.20%) [ +0.00% +0.08% +0.04% / +0.28% +0.32% +0.20%] index_copy_ linear : Elapsed 0.025 ms (2.479 ms / 100) 2.510 -> 2.510 ( +0.00%) [ +0.00% +0.40% +0.04% / +0.24% +0.00% +0.24%] index_add_ reverse : Elapsed 0.025 ms (2.510 ms / 100) 2.483 -> 2.478 ( -0.20%) [ +0.04% +0.00% +0.36% / +0.20% -0.04% -0.20%] index_copy_ reverse : Elapsed 0.025 ms (2.484 ms / 100) 2.548 -> 2.549 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.04% +0.24% +0.20%] index_add_ spread : Elapsed 0.025 ms (2.548 ms / 100) 2.579 -> 2.579 ( +0.00%) [ +0.04% +0.43% +0.00% / +0.19% +0.04% +0.00%] index_copy_ spread : Elapsed 0.026 ms (2.580 ms / 100) 2.544 -> 2.553 ( +0.35%) [ +0.24% +0.12% +0.00% / +0.39% +0.35% +0.39%] index_add_ strided 3 : Elapsed 0.025 ms (2.550 ms / 100) 2.579 -> 2.583 ( +0.16%) [ +0.00% +0.19% +0.04% / +0.16% +0.31% +0.19%] index_copy_ strided 3 : Elapsed 0.026 ms (2.579 ms / 100) 2.548 -> 2.550 ( +0.08%) [ +0.00% +0.12% +0.20% / +0.08% +0.12% +0.08%] index_add_ strided 5 : Elapsed 0.025 ms (2.548 ms / 100) 2.579 -> 2.578 ( -0.04%) [ +0.00% +0.19% +0.12% / +0.19% -0.04% +0.16%] index_copy_ strided 5 : Elapsed 0.026 ms (2.579 ms / 100) 2.541 -> 2.547 ( +0.24%) [ +0.20% +0.28% +0.00% / +0.24% +0.43% +0.47%] index_add_ strided 7 : Elapsed 0.025 ms (2.546 ms / 100) 2.575 -> 2.581 ( +0.23%) [ +0.16% +0.16% +0.00% / +0.23% +0.39% +0.47%] index_copy_ strided 7 : Elapsed 0.026 ms (2.579 ms / 100) 2.549 -> 2.550 ( +0.04%) [ +0.16% +0.00% +0.04% / +0.04% +0.08% +0.24%] index_add_ perm : Elapsed 0.026 ms (2.553 ms / 100) 2.576 -> 2.586 ( +0.39%) [ +0.35% +0.27% +0.00% / +0.62% +0.39% +0.39%] index_copy_ perm : Elapsed 0.026 ms (2.585 ms / 100) 2.544 -> 2.549 ( +0.20%) [ +0.20% +0.31% +0.00% / +0.28% +0.20% +0.20%] index_add_ perm_sorted : Elapsed 0.025 ms (2.549 ms / 100) 2.580 -> 2.578 ( -0.08%) [ +0.08% +0.35% +0.00% / +0.23% +0.16% -0.08%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.582 ms / 100) 5.562 -> 5.570 ( +0.14%) [ +0.27% +0.00% +0.05% / +0.14% +0.52% +0.40%] index_select const : Elapsed 0.056 ms (5.577 ms / 100) 5.549 -> 5.554 ( +0.09%) [ +0.00% +0.20% +0.23% / +0.09% +0.38% +0.18%] index_select wrap : Elapsed 0.055 ms (5.549 ms / 100) 5.583 -> 5.578 ( -0.09%) [ +0.02% +0.07% +0.00% / -0.09% +0.25% +0.25%] index_select linear : Elapsed 0.056 ms (5.584 ms / 100) 5.579 -> 5.583 ( +0.07%) [ +0.02% +0.02% +0.00% / +0.07% +0.45% +0.41%] index_select reverse : Elapsed 0.056 ms (5.580 ms / 100) 5.551 -> 5.555 ( +0.07%) [ +0.20% +0.00% +0.05% / +0.07% +0.67% +0.38%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.566 -> 5.556 ( -0.18%) [ +0.09% +0.02% +0.00% / -0.18% +0.40% +0.49%] index_select skip256 : Elapsed 0.056 ms (5.571 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.22% +0.22%] index_select spread : Elapsed 0.056 ms (5.565 ms / 100) 5.551 -> 5.553 ( +0.04%) [ +0.00% +0.02% +0.04% / +0.04% +0.20% +0.16%] index_select strided 3 : Elapsed 0.056 ms (5.551 ms / 100) 5.573 -> 5.578 ( +0.09%) [ +0.00% +0.04% +0.02% / +0.09% +0.29% +0.23%] index_select random : Elapsed 0.056 ms (5.573 ms / 100) 5.574 -> 5.578 ( +0.07%) [ +0.14% +0.11% +0.00% / +0.07% +0.38% +0.20%] index_select random_sorted : Elapsed 0.056 ms (5.582 ms / 100) B = [5, 20, 40, 16] (stride (1, 3200, 80, 5)) A = [5, 20, 40, 4] (stride (40, 800, 1, 200)) dim = 3 2.387 -> 2.386 ( -0.04%) [ +0.13% +0.00% +0.04% / -0.04% +0.38% +0.38%] index_add_ linear : Elapsed 0.024 ms (2.390 ms / 100) 2.354 -> 2.353 ( -0.04%) [ +0.00% +0.17% +0.13% / -0.04% +0.30% +0.25%] index_copy_ linear : Elapsed 0.024 ms (2.354 ms / 100) 2.385 -> 2.394 ( +0.38%) [ +0.38% +0.21% +0.00% / +0.38% +0.55% +0.59%] index_add_ reverse : Elapsed 0.024 ms (2.394 ms / 100) 2.356 -> 2.361 ( +0.21%) [ +0.00% +0.00% +0.08% / +0.21% +0.51% +0.42%] index_copy_ reverse : Elapsed 0.024 ms (2.356 ms / 100) 2.412 -> 2.417 ( +0.21%) [ +0.12% +0.00% +0.08% / +0.21% +0.75% +0.50%] index_add_ spread : Elapsed 0.024 ms (2.415 ms / 100) 2.401 -> 2.405 ( +0.17%) [ +0.08% +0.04% +0.00% / +0.17% +0.62% +0.50%] index_copy_ spread : Elapsed 0.024 ms (2.403 ms / 100) 2.418 -> 2.418 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.50% +0.66%] index_add_ strided 3 : Elapsed 0.024 ms (2.420 ms / 100) 2.428 -> 2.432 ( +0.16%) [ +0.00% +0.16% +0.12% / +0.16% +0.66% +0.54%] index_copy_ strided 3 : Elapsed 0.024 ms (2.428 ms / 100) 2.376 -> 2.376 ( +0.00%) [ +0.25% +0.00% +0.13% / +0.00% +0.29% +0.59%] index_add_ strided 5 : Elapsed 0.024 ms (2.382 ms / 100) 2.365 -> 2.371 ( +0.25%) [ +0.00% +0.04% +0.13% / +0.34% +0.25% +0.30%] index_copy_ strided 5 : Elapsed 0.024 ms (2.365 ms / 100) 2.389 -> 2.391 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.38% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.389 ms / 100) 2.383 -> 2.385 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.08% +0.38% +0.46%] index_copy_ strided 7 : Elapsed 0.024 ms (2.384 ms / 100) 2.408 -> 2.403 ( -0.21%) [ +0.00% +0.00% +0.00% / -0.21% -0.21% -0.08%] index_add_ perm : Elapsed 0.024 ms (2.408 ms / 100) 2.385 -> 2.390 ( +0.21%) [ +0.08% +0.00% +0.29% / +0.21% +0.50% +0.42%] index_copy_ perm : Elapsed 0.024 ms (2.387 ms / 100) 2.405 -> 2.406 ( +0.04%) [ +0.17% +0.04% +0.00% / +0.04% +0.08% +0.37%] index_add_ perm_sorted : Elapsed 0.024 ms (2.409 ms / 100) 2.388 -> 2.391 ( +0.13%) [ +0.00% +0.17% +0.00% / +0.13% +0.46% +0.46%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.388 ms / 100) 4.868 -> 4.870 ( +0.04%) [ +0.06% +0.21% +0.00% / +0.04% +0.49% +0.43%] index_select const : Elapsed 0.049 ms (4.871 ms / 100) 4.922 -> 4.926 ( +0.08%) [ +0.00% +0.14% +0.14% / +0.08% +0.47% +0.45%] index_select wrap : Elapsed 0.049 ms (4.922 ms / 100) 4.928 -> 4.936 ( +0.16%) [ +0.00% +0.16% +0.02% / +0.16% +0.45% +0.34%] index_select linear : Elapsed 0.049 ms (4.928 ms / 100) 4.924 -> 4.930 ( +0.12%) [ +0.28% +0.22% +0.00% / +0.12% +0.45% +0.59%] index_select reverse : Elapsed 0.049 ms (4.938 ms / 100) 4.879 -> 4.885 ( +0.12%) [ +0.18% +0.04% +0.00% / +0.12% +0.43% +0.49%] index_select skip64 : Elapsed 0.049 ms (4.888 ms / 100) 4.879 -> 4.883 ( +0.08%) [ +0.06% +0.08% +0.00% / +0.08% +0.25% +0.20%] index_select skip256 : Elapsed 0.049 ms (4.882 ms / 100) 4.921 -> 4.919 ( -0.04%) [ +0.08% +0.00% +0.04% / -0.04% +0.08% +0.28%] index_select spread : Elapsed 0.049 ms (4.925 ms / 100) 4.922 -> 4.931 ( +0.18%) [ +0.12% +0.00% +0.04% / +0.18% +0.28% +0.37%] index_select strided 3 : Elapsed 0.049 ms (4.928 ms / 100) 4.919 -> 4.930 ( +0.22%) [ +0.28% +0.00% +0.12% / +0.22% +0.43% +0.24%] index_select random : Elapsed 0.049 ms (4.933 ms / 100) 4.916 -> 4.924 ( +0.16%) [ +0.04% +0.16% +0.00% / +0.16% +0.24% +0.33%] index_select random_sorted : Elapsed 0.049 ms (4.918 ms / 100) B = [5, 20, 40, 16] (stride (320, 16, 1600, 1)) A = [5, 20, 40, 4] (stride (4, 800, 20, 1)) dim = 3 1.219 -> 1.225 ( +0.49%) [ +0.25% +0.00% +0.49% / +0.49% +1.89% +2.30%] index_add_ linear : Elapsed 0.012 ms (1.222 ms / 100) 1.186 -> 1.189 ( +0.25%) [ +0.08% +0.34% +0.00% / +0.25% +1.26% +1.85%] index_copy_ linear : Elapsed 0.012 ms (1.187 ms / 100) 1.217 -> 1.217 ( +0.00%) [ +0.58% +0.25% +0.00% / +0.00% +1.81% +1.40%] index_add_ reverse : Elapsed 0.012 ms (1.224 ms / 100) 1.179 -> 1.188 ( +0.76%) [ +0.51% +0.51% +0.00% / +0.76% +2.29% +1.78%] index_copy_ reverse : Elapsed 0.012 ms (1.185 ms / 100) 1.265 -> 1.271 ( +0.47%) [ +0.00% +0.08% +0.08% / +0.47% +1.66% +1.50%] index_add_ spread : Elapsed 0.013 ms (1.265 ms / 100) 1.257 -> 1.262 ( +0.40%) [ +0.16% +0.40% +0.00% / +0.40% +1.35% +0.88%] index_copy_ spread : Elapsed 0.013 ms (1.259 ms / 100) 1.253 -> 1.258 ( +0.40%) [ +0.00% +0.24% +0.08% / +0.40% +2.31% +2.23%] index_add_ strided 3 : Elapsed 0.013 ms (1.253 ms / 100) 1.255 -> 1.257 ( +0.16%) [ +0.00% +0.16% +0.08% / +0.16% +1.20% +1.35%] index_copy_ strided 3 : Elapsed 0.013 ms (1.255 ms / 100) 1.263 -> 1.266 ( +0.24%) [ +0.00% +0.00% +0.00% / +0.24% +2.14% +2.22%] index_add_ strided 5 : Elapsed 0.013 ms (1.263 ms / 100) 1.254 -> 1.261 ( +0.56%) [ +0.08% +0.08% +0.00% / +0.56% +1.44% +1.36%] index_copy_ strided 5 : Elapsed 0.013 ms (1.255 ms / 100) 1.262 -> 1.265 ( +0.24%) [ +0.00% +0.08% +0.48% / +0.24% +2.38% +2.38%] index_add_ strided 7 : Elapsed 0.013 ms (1.262 ms / 100) 1.256 -> 1.269 ( +1.04%) [ +0.16% +0.00% +0.24% / +1.04% +1.75% +1.35%] index_copy_ strided 7 : Elapsed 0.013 ms (1.258 ms / 100) 1.282 -> 1.285 ( +0.23%) [ +0.00% +0.23% +0.08% / +0.23% +2.34% +2.57%] index_add_ perm : Elapsed 0.013 ms (1.282 ms / 100) 1.259 -> 1.261 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +1.19% +1.51%] index_copy_ perm : Elapsed 0.013 ms (1.260 ms / 100) 1.263 -> 1.266 ( +0.24%) [ +0.00% +0.16% +0.16% / +0.24% +1.82% +1.90%] index_add_ perm_sorted : Elapsed 0.013 ms (1.263 ms / 100) 1.256 -> 1.257 ( +0.08%) [ +0.00% +0.00% +0.48% / +0.08% +1.51% +1.04%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.256 ms / 100) 2.120 -> 2.119 ( -0.05%) [ +0.09% +0.05% +0.00% / -0.05% +1.13% +0.90%] index_select const : Elapsed 0.021 ms (2.122 ms / 100) 2.119 -> 2.118 ( -0.05%) [ +0.05% +0.14% +0.00% / -0.05% +0.80% +0.85%] index_select wrap : Elapsed 0.021 ms (2.120 ms / 100) 2.118 -> 2.119 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.99% +0.94%] index_select linear : Elapsed 0.021 ms (2.118 ms / 100) 2.118 -> 2.125 ( +0.33%) [ +0.14% +0.09% +0.00% / +0.33% +1.23% +1.46%] index_select reverse : Elapsed 0.021 ms (2.121 ms / 100) 2.118 -> 2.123 ( +0.24%) [ +0.00% +0.28% +0.05% / +0.24% +1.27% +1.51%] index_select skip64 : Elapsed 0.021 ms (2.118 ms / 100) 2.114 -> 2.119 ( +0.24%) [ +0.19% +0.09% +0.00% / +0.24% +1.14% +1.47%] index_select skip256 : Elapsed 0.021 ms (2.118 ms / 100) 2.113 -> 2.120 ( +0.33%) [ +0.38% +0.33% +0.00% / +0.33% +1.09% +1.37%] index_select spread : Elapsed 0.021 ms (2.121 ms / 100) 2.120 -> 2.123 ( +0.14%) [ +0.24% +0.33% +0.00% / +0.14% +0.90% +1.51%] index_select strided 3 : Elapsed 0.021 ms (2.125 ms / 100) 2.116 -> 2.121 ( +0.24%) [ +0.24% +0.09% +0.00% / +0.24% +0.99% +1.13%] index_select random : Elapsed 0.021 ms (2.121 ms / 100) 2.121 -> 2.125 ( +0.19%) [ +0.19% +0.00% +0.14% / +0.19% +0.80% +1.08%] index_select random_sorted : Elapsed 0.021 ms (2.125 ms / 100) out_shape = [16, 40, 4, 20] in_shape = [5, 40, 4, 20] idx_dim = 0 B = [16, 40, 4, 20] (stride (3200, 80, 20, 1)) A = [5, 40, 4, 20] (stride (1, 400, 100, 5)) dim = 0 0.769 -> 0.769 ( +0.00%) [ +0.39% +0.00% +0.39% / +0.00% +0.39% +0.52%] index_add_ linear : Elapsed 0.008 ms (0.772 ms / 100) 0.756 -> 0.757 ( +0.13%) [ +0.26% +0.13% +0.00% / +0.13% +1.19% +1.32%] index_copy_ linear : Elapsed 0.008 ms (0.758 ms / 100) 0.770 -> 0.771 ( +0.13%) [ +0.52% +0.00% +0.39% / +0.13% +1.04% +0.52%] index_add_ reverse : Elapsed 0.008 ms (0.774 ms / 100) 0.751 -> 0.761 ( +1.33%) [ +0.80% +0.00% +0.13% / +1.33% +2.26% +1.86%] index_copy_ reverse : Elapsed 0.008 ms (0.757 ms / 100) 0.772 -> 0.775 ( +0.39%) [ +0.26% +0.00% +0.78% / +0.78% +0.65% +0.39%] index_add_ spread : Elapsed 0.008 ms (0.774 ms / 100) 0.756 -> 0.763 ( +0.93%) [ +0.13% +1.19% +0.00% / +0.93% +1.72% +1.46%] index_copy_ spread : Elapsed 0.008 ms (0.757 ms / 100) 0.771 -> 0.772 ( +0.13%) [ +0.00% +0.26% +0.13% / +0.13% +0.52% +1.04%] index_add_ strided 3 : Elapsed 0.008 ms (0.771 ms / 100) 0.753 -> 0.764 ( +1.46%) [ +0.00% +1.46% +0.53% / +1.46% +1.46% +2.12%] index_copy_ strided 3 : Elapsed 0.008 ms (0.753 ms / 100) 0.770 -> 0.770 ( +0.00%) [ +0.26% +0.00% +0.39% / +0.52% +0.00% +0.26%] index_add_ strided 5 : Elapsed 0.008 ms (0.772 ms / 100) 0.763 -> 0.756 ( -0.92%) [ +0.00% +0.52% +0.26% / +0.13% -0.92% -0.26%] index_copy_ strided 5 : Elapsed 0.008 ms (0.763 ms / 100) 0.768 -> 0.774 ( +0.78%) [ +0.65% +0.52% +0.00% / +0.78% +3.78% +2.34%] index_add_ strided 7 : Elapsed 0.008 ms (0.773 ms / 100) 0.747 -> 0.754 ( +0.94%) [ +0.00% +0.54% +0.40% / +0.94% +3.08% +4.55%] index_copy_ strided 7 : Elapsed 0.007 ms (0.747 ms / 100) 0.770 -> 0.771 ( +0.13%) [ +0.26% +0.00% +0.00% / +0.13% +0.39% +0.52%] index_add_ perm : Elapsed 0.008 ms (0.772 ms / 100) 0.757 -> 0.766 ( +1.19%) [ +0.26% +0.00% +0.26% / +1.19% +1.72% +1.59%] index_copy_ perm : Elapsed 0.008 ms (0.759 ms / 100) 0.770 -> 0.776 ( +0.78%) [ +0.00% +0.26% +0.91% / +0.78% +1.30% +0.91%] index_add_ perm_sorted : Elapsed 0.008 ms (0.770 ms / 100) 0.756 -> 0.763 ( +0.93%) [ +0.40% +0.00% +0.53% / +1.19% +1.98% +0.93%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.759 ms / 100) 1.514 -> 1.516 ( +0.13%) [ +0.20% +0.00% +0.26% / +0.20% +0.46% +0.13%] index_select const : Elapsed 0.015 ms (1.517 ms / 100) 1.519 -> 1.518 ( -0.07%) [ +0.07% +0.00% +0.20% / +0.13% -0.07% +0.13%] index_select wrap : Elapsed 0.015 ms (1.520 ms / 100) 1.521 -> 1.519 ( -0.13%) [ +0.07% +0.07% +0.00% / -0.07% -0.07% -0.13%] index_select linear : Elapsed 0.015 ms (1.522 ms / 100) 1.517 -> 1.521 ( +0.26%) [ +0.00% +0.07% +0.20% / +0.26% +0.33% +0.26%] index_select reverse : Elapsed 0.015 ms (1.517 ms / 100) 1.516 -> 1.516 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.26% +0.53%] index_select skip64 : Elapsed 0.015 ms (1.517 ms / 100) 1.515 -> 1.515 ( +0.00%) [ +0.26% +0.33% +0.00% / +0.13% +0.40% +0.00%] index_select skip256 : Elapsed 0.015 ms (1.519 ms / 100) 1.515 -> 1.516 ( +0.07%) [ +0.00% +0.33% +0.20% / +0.07% +0.53% +0.26%] index_select spread : Elapsed 0.015 ms (1.515 ms / 100) 1.519 -> 1.520 ( +0.07%) [ +0.39% +0.00% +0.07% / +0.07% +0.39% +0.13%] index_select strided 3 : Elapsed 0.015 ms (1.525 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.00% +0.13% +0.07% / +0.07% +0.20% +0.20%] index_select random : Elapsed 0.015 ms (1.518 ms / 100) 1.512 -> 1.517 ( +0.33%) [ +0.00% +0.13% +0.33% / +0.33% +0.66% +0.60%] index_select random_sorted : Elapsed 0.015 ms (1.512 ms / 100) B = [16, 40, 4, 20] (stride (3200, 80, 1, 4)) A = [5, 40, 4, 20] (stride (40, 1, 200, 800)) dim = 0 2.571 -> 2.571 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.12% +0.12%] index_add_ linear : Elapsed 0.026 ms (2.575 ms / 100) 2.482 -> 2.489 ( +0.28%) [ +0.12% +0.16% +0.00% / +0.28% +0.28% +0.48%] index_copy_ linear : Elapsed 0.025 ms (2.485 ms / 100) 2.562 -> 2.565 ( +0.12%) [ +0.04% +0.20% +0.00% / +0.12% +0.39% +0.39%] index_add_ reverse : Elapsed 0.026 ms (2.563 ms / 100) 2.476 -> 2.479 ( +0.12%) [ +0.16% +0.00% +0.00% / +0.12% +0.36% +0.32%] index_copy_ reverse : Elapsed 0.025 ms (2.480 ms / 100) 2.563 -> 2.569 ( +0.23%) [ +0.12% +0.12% +0.00% / +0.39% +0.27% +0.23%] index_add_ spread : Elapsed 0.026 ms (2.566 ms / 100) 2.478 -> 2.478 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +0.44% +0.40%] index_copy_ spread : Elapsed 0.025 ms (2.481 ms / 100) 2.565 -> 2.569 ( +0.16%) [ +0.00% +0.23% +0.04% / +0.16% +0.35% +0.51%] index_add_ strided 3 : Elapsed 0.026 ms (2.565 ms / 100) 2.480 -> 2.482 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.48% +0.40%] index_copy_ strided 3 : Elapsed 0.025 ms (2.482 ms / 100) 2.561 -> 2.567 ( +0.23%) [ +0.12% +0.20% +0.00% / +0.23% +0.39% +0.31%] index_add_ strided 5 : Elapsed 0.026 ms (2.564 ms / 100) 2.478 -> 2.481 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.36% +0.36% +0.12%] index_copy_ strided 5 : Elapsed 0.025 ms (2.479 ms / 100) 2.559 -> 2.562 ( +0.12%) [ +0.12% +0.31% +0.00% / +0.12% +0.51% +0.55%] index_add_ strided 7 : Elapsed 0.026 ms (2.562 ms / 100) 2.472 -> 2.472 ( +0.00%) [ +0.12% +0.32% +0.00% / +0.00% +0.61% +0.77%] index_copy_ strided 7 : Elapsed 0.025 ms (2.475 ms / 100) 2.561 -> 2.562 ( +0.04%) [ +0.31% +0.00% +0.20% / +0.04% +0.47% +0.59%] index_add_ perm : Elapsed 0.026 ms (2.569 ms / 100) 2.479 -> 2.481 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.40% +0.44%] index_copy_ perm : Elapsed 0.025 ms (2.479 ms / 100) 2.553 -> 2.560 ( +0.27%) [ +0.27% +0.20% +0.00% / +0.27% +0.67% +0.71%] index_add_ perm_sorted : Elapsed 0.026 ms (2.560 ms / 100) 2.468 -> 2.482 ( +0.57%) [ +0.16% +0.16% +0.00% / +0.61% +0.57% +0.81%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) 5.589 -> 5.599 ( +0.18%) [ +0.27% +0.00% +0.14% / +0.18% +0.66% +0.68%] index_select const : Elapsed 0.056 ms (5.604 ms / 100) 5.606 -> 5.616 ( +0.18%) [ +0.14% +0.00% +0.02% / +0.18% +0.46% +0.52%] index_select wrap : Elapsed 0.056 ms (5.614 ms / 100) 5.618 -> 5.622 ( +0.07%) [ +0.05% +0.16% +0.00% / +0.07% +0.48% +0.68%] index_select linear : Elapsed 0.056 ms (5.621 ms / 100) 5.599 -> 5.596 ( -0.05%) [ +0.07% +0.00% +0.00% / -0.05% +0.61% +0.64%] index_select reverse : Elapsed 0.056 ms (5.603 ms / 100) 5.576 -> 5.593 ( +0.30%) [ +0.30% +0.23% +0.00% / +0.30% +0.38% +0.79%] index_select skip64 : Elapsed 0.056 ms (5.593 ms / 100) 5.567 -> 5.579 ( +0.22%) [ +0.56% +0.49% +0.00% / +0.22% +1.02% +0.95%] index_select skip256 : Elapsed 0.056 ms (5.598 ms / 100) 5.595 -> 5.598 ( +0.05%) [ +0.04% +0.07% +0.00% / +0.05% +0.61% +0.61%] index_select spread : Elapsed 0.056 ms (5.597 ms / 100) 5.604 -> 5.607 ( +0.05%) [ +0.14% +0.02% +0.00% / +0.05% +0.61% +0.62%] index_select strided 3 : Elapsed 0.056 ms (5.612 ms / 100) 5.600 -> 5.602 ( +0.04%) [ +0.13% +0.04% +0.00% / +0.04% +0.66% +0.70%] index_select random : Elapsed 0.056 ms (5.607 ms / 100) 5.584 -> 5.591 ( +0.13%) [ +0.00% +0.20% +0.07% / +0.13% +0.72% +0.63%] index_select random_sorted : Elapsed 0.056 ms (5.584 ms / 100) B = [16, 40, 4, 20] (stride (3200, 20, 800, 1)) A = [5, 40, 4, 20] (stride (80, 400, 1, 4)) dim = 0 2.296 -> 2.300 ( +0.17%) [ +0.35% +0.13% +0.00% / +0.17% +1.74% +1.44%] index_add_ linear : Elapsed 0.023 ms (2.304 ms / 100) 2.246 -> 2.258 ( +0.53%) [ +0.22% +0.18% +0.00% / +0.53% +1.47% +1.20%] index_copy_ linear : Elapsed 0.023 ms (2.251 ms / 100) 2.296 -> 2.306 ( +0.44%) [ +0.00% +0.17% +0.17% / +0.44% +1.70% +1.52%] index_add_ reverse : Elapsed 0.023 ms (2.296 ms / 100) 2.245 -> 2.263 ( +0.80%) [ +0.09% +0.40% +0.00% / +0.80% +1.47% +1.34%] index_copy_ reverse : Elapsed 0.022 ms (2.247 ms / 100) 2.303 -> 2.305 ( +0.09%) [ +0.00% +0.04% +0.13% / +0.09% +1.52% +1.65%] index_add_ spread : Elapsed 0.023 ms (2.303 ms / 100) 2.250 -> 2.267 ( +0.76%) [ +0.00% +0.13% +0.00% / +0.76% +1.38% +1.33%] index_copy_ spread : Elapsed 0.022 ms (2.250 ms / 100) 2.303 -> 2.307 ( +0.17%) [ +0.22% +0.17% +0.00% / +0.17% +1.26% +1.04%] index_add_ strided 3 : Elapsed 0.023 ms (2.308 ms / 100) 2.249 -> 2.253 ( +0.18%) [ +0.36% +0.44% +0.00% / +0.18% +1.02% +1.11%] index_copy_ strided 3 : Elapsed 0.023 ms (2.257 ms / 100) 2.296 -> 2.302 ( +0.26%) [ +0.22% +0.30% +0.00% / +0.26% +1.70% +1.83%] index_add_ strided 5 : Elapsed 0.023 ms (2.301 ms / 100) 2.249 -> 2.246 ( -0.13%) [ +0.09% +0.04% +0.00% / -0.13% +1.47% +1.42%] index_copy_ strided 5 : Elapsed 0.023 ms (2.251 ms / 100) 2.299 -> 2.297 ( -0.09%) [ +0.04% +0.00% +0.09% / -0.09% +1.35% +1.48%] index_add_ strided 7 : Elapsed 0.023 ms (2.300 ms / 100) 2.243 -> 2.249 ( +0.27%) [ +0.13% +0.22% +0.00% / +0.27% +1.20% +1.52%] index_copy_ strided 7 : Elapsed 0.022 ms (2.246 ms / 100) 2.305 -> 2.296 ( -0.39%) [ +0.00% +0.00% +0.30% / -0.39% +1.56% +1.65%] index_add_ perm : Elapsed 0.023 ms (2.305 ms / 100) 2.253 -> 2.249 ( -0.18%) [ +0.00% +0.36% +0.31% / -0.18% +1.24% +1.33%] index_copy_ perm : Elapsed 0.023 ms (2.253 ms / 100) 2.301 -> 2.300 ( -0.04%) [ +0.26% +0.04% +0.00% / -0.04% +1.69% +1.65%] index_add_ perm_sorted : Elapsed 0.023 ms (2.307 ms / 100) 2.248 -> 2.245 ( -0.13%) [ +0.00% +0.00% +0.04% / -0.13% +1.78% +1.65%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.248 ms / 100) 4.864 -> 4.872 ( +0.16%) [ +0.04% +0.12% +0.00% / +0.16% +0.93% +0.66%] index_select const : Elapsed 0.049 ms (4.866 ms / 100) 4.863 -> 4.883 ( +0.41%) [ +0.14% +0.02% +0.00% / +0.41% +1.19% +1.27%] index_select wrap : Elapsed 0.049 ms (4.870 ms / 100) 4.886 -> 4.890 ( +0.08%) [ +0.00% +0.08% +0.02% / +0.08% +1.02% +0.90%] index_select linear : Elapsed 0.049 ms (4.886 ms / 100) 4.879 -> 4.884 ( +0.10%) [ +0.18% +0.00% +0.20% / +0.10% +0.78% +0.76%] index_select reverse : Elapsed 0.049 ms (4.888 ms / 100) 4.865 -> 4.866 ( +0.02%) [ +0.43% +0.00% +0.21% / +0.02% +0.66% +1.27%] index_select skip64 : Elapsed 0.049 ms (4.886 ms / 100) 4.864 -> 4.876 ( +0.25%) [ +0.08% +0.08% +0.00% / +0.25% +0.82% +0.74%] index_select skip256 : Elapsed 0.049 ms (4.868 ms / 100) 4.863 -> 4.872 ( +0.19%) [ +0.27% +0.33% +0.00% / +0.19% +1.15% +1.34%] index_select spread : Elapsed 0.049 ms (4.876 ms / 100) 4.855 -> 4.871 ( +0.33%) [ +0.00% +0.08% +0.47% / +0.33% +1.79% +1.34%] index_select strided 3 : Elapsed 0.049 ms (4.855 ms / 100) 4.876 -> 4.890 ( +0.29%) [ +0.00% +0.04% +0.10% / +0.29% +0.96% +1.23%] index_select random : Elapsed 0.049 ms (4.876 ms / 100) 4.850 -> 4.865 ( +0.31%) [ +0.06% +0.21% +0.00% / +0.31% +1.40% +1.42%] index_select random_sorted : Elapsed 0.049 ms (4.853 ms / 100) B = [16, 40, 4, 20] (stride (3200, 4, 1, 160)) A = [5, 40, 4, 20] (stride (20, 400, 100, 1)) dim = 0 0.821 -> 0.826 ( +0.61%) [ +0.12% +0.00% +0.12% / +0.61% +7.80% +7.67%] index_add_ linear : Elapsed 0.008 ms (0.822 ms / 100) 0.840 -> 0.844 ( +0.48%) [ +0.00% +0.24% +0.12% / +0.48% +5.24% +4.64%] index_copy_ linear : Elapsed 0.008 ms (0.840 ms / 100) 0.818 -> 0.820 ( +0.24%) [ +0.00% +0.49% +0.37% / +0.24% +7.58% +7.95%] index_add_ reverse : Elapsed 0.008 ms (0.818 ms / 100) 0.838 -> 0.840 ( +0.24%) [ +0.12% +0.60% +0.00% / +0.24% +5.25% +4.77%] index_copy_ reverse : Elapsed 0.008 ms (0.839 ms / 100) 0.820 -> 0.826 ( +0.73%) [ +0.00% +0.85% +0.61% / +0.73% +7.56% +7.68%] index_add_ spread : Elapsed 0.008 ms (0.820 ms / 100) 0.846 -> 0.850 ( +0.47%) [ +0.24% +0.00% +0.12% / +0.47% +3.66% +3.55%] index_copy_ spread : Elapsed 0.008 ms (0.848 ms / 100) 0.824 -> 0.828 ( +0.49%) [ +0.12% +0.24% +0.00% / +0.49% +6.31% +6.43%] index_add_ strided 3 : Elapsed 0.008 ms (0.825 ms / 100) 0.844 -> 0.852 ( +0.95%) [ +0.12% +0.00% +0.00% / +0.95% +4.38% +4.38%] index_copy_ strided 3 : Elapsed 0.008 ms (0.845 ms / 100) 0.867 -> 0.871 ( +0.46%) [ +0.00% +0.23% +0.12% / +0.46% +0.46% +0.58%] index_add_ strided 5 : Elapsed 0.009 ms (0.867 ms / 100) 0.862 -> 0.869 ( +0.81%) [ +0.46% +0.46% +0.00% / +0.81% +1.28% +0.81%] index_copy_ strided 5 : Elapsed 0.009 ms (0.866 ms / 100) 0.821 -> 0.822 ( +0.12%) [ +0.00% +0.24% +0.73% / +0.12% +7.80% +7.67%] index_add_ strided 7 : Elapsed 0.008 ms (0.821 ms / 100) 0.843 -> 0.847 ( +0.47%) [ +0.47% +0.00% +1.30% / +0.47% +5.46% +5.46%] index_copy_ strided 7 : Elapsed 0.008 ms (0.847 ms / 100) 0.820 -> 0.820 ( +0.00%) [ +0.12% +0.00% +0.61% / +0.00% +7.20% +6.83%] index_add_ perm : Elapsed 0.008 ms (0.821 ms / 100) 0.840 -> 0.844 ( +0.48%) [ +0.00% +0.60% +1.43% / +0.48% +4.64% +4.17%] index_copy_ perm : Elapsed 0.008 ms (0.840 ms / 100) 0.820 -> 0.820 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +6.95% +7.44%] index_add_ perm_sorted : Elapsed 0.008 ms (0.821 ms / 100) 0.838 -> 0.838 ( +0.00%) [ +0.00% +0.00% +0.24% / +0.00% +5.61% +4.53%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.838 ms / 100) 1.718 -> 1.719 ( +0.06%) [ +0.00% +0.00% +0.17% / +0.17% +0.06% +0.35%] index_select const : Elapsed 0.017 ms (1.718 ms / 100) 1.734 -> 1.734 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.29% +0.00% +0.40%] index_select wrap : Elapsed 0.017 ms (1.735 ms / 100) 1.736 -> 1.746 ( +0.58%) [ +0.17% +0.00% +0.00% / +0.58% +1.04% +0.98%] index_select linear : Elapsed 0.017 ms (1.739 ms / 100) 1.727 -> 1.730 ( +0.17%) [ +0.29% +0.00% +0.12% / +0.17% +0.75% +0.69%] index_select reverse : Elapsed 0.017 ms (1.732 ms / 100) 1.720 -> 1.723 ( +0.17%) [ +0.00% +0.23% +0.58% / +0.35% +0.17% +0.52%] index_select skip64 : Elapsed 0.017 ms (1.720 ms / 100) 1.726 -> 1.725 ( -0.06%) [ +0.12% +0.00% +0.06% / -0.06% +0.17% +0.12%] index_select skip256 : Elapsed 0.017 ms (1.728 ms / 100) 1.731 -> 1.741 ( +0.58%) [ +0.29% +0.35% +0.00% / +0.58% +0.92% +1.04%] index_select spread : Elapsed 0.017 ms (1.736 ms / 100) 1.726 -> 1.738 ( +0.70%) [ +0.00% +0.29% +0.06% / +0.70% +1.68% +1.74%] index_select strided 3 : Elapsed 0.017 ms (1.726 ms / 100) 1.724 -> 1.736 ( +0.70%) [ +0.17% +0.41% +0.00% / +0.70% +0.93% +0.81%] index_select random : Elapsed 0.017 ms (1.727 ms / 100) 1.734 -> 1.741 ( +0.40%) [ +0.23% +0.00% +0.29% / +0.40% +0.92% +0.98%] index_select random_sorted : Elapsed 0.017 ms (1.738 ms / 100) B = [16, 40, 4, 20] (stride (80, 1280, 20, 1)) A = [5, 40, 4, 20] (stride (3200, 4, 1, 160)) dim = 0 0.859 -> 0.853 ( -0.70%) [ +0.00% +0.12% +0.12% / +0.35% -0.58% -0.70%] index_add_ linear : Elapsed 0.009 ms (0.859 ms / 100) 0.835 -> 0.831 ( -0.48%) [ +0.24% +0.00% +0.36% / +1.08% -0.48% -0.12%] index_copy_ linear : Elapsed 0.008 ms (0.837 ms / 100) 0.857 -> 0.855 ( -0.23%) [ +0.47% +0.00% +0.23% / +0.12% -0.12% -0.23%] index_add_ reverse : Elapsed 0.009 ms (0.861 ms / 100) 0.836 -> 0.835 ( -0.12%) [ +0.48% +0.00% +0.36% / +0.48% -0.12% +0.00%] index_copy_ reverse : Elapsed 0.008 ms (0.840 ms / 100) 0.868 -> 0.863 ( -0.58%) [ +0.00% +0.00% +0.23% / +0.00% -0.58% -0.23%] index_add_ spread : Elapsed 0.009 ms (0.868 ms / 100) 0.846 -> 0.841 ( -0.59%) [ +0.00% +0.12% +0.24% / +0.12% -0.12% -0.59%] index_copy_ spread : Elapsed 0.008 ms (0.846 ms / 100) 0.865 -> 0.855 ( -1.16%) [ +0.69% +0.00% +0.81% / +0.00% -1.16% -1.04%] index_add_ strided 3 : Elapsed 0.009 ms (0.871 ms / 100) 0.844 -> 0.836 ( -0.95%) [ +0.59% +0.00% +0.47% / +0.47% -0.95% -0.83%] index_copy_ strided 3 : Elapsed 0.008 ms (0.849 ms / 100) 0.868 -> 0.850 ( -2.07%) [ +0.00% +0.12% +0.12% / +0.35% -2.07% -1.96%] index_add_ strided 5 : Elapsed 0.009 ms (0.868 ms / 100) 0.847 -> 0.829 ( -2.13%) [ +0.12% +0.00% +0.00% / +0.59% -2.13% -1.53%] index_copy_ strided 5 : Elapsed 0.008 ms (0.848 ms / 100) 0.858 -> 0.850 ( -0.93%) [ +0.00% +0.23% +0.23% / +0.00% -0.93% -0.93%] index_add_ strided 7 : Elapsed 0.009 ms (0.858 ms / 100) 0.833 -> 0.829 ( -0.48%) [ +0.00% +0.36% +0.36% / +0.60% -0.24% -0.48%] index_copy_ strided 7 : Elapsed 0.008 ms (0.833 ms / 100) 0.864 -> 0.857 ( -0.81%) [ +0.00% +0.35% +0.69% / +0.23% -0.81% -0.46%] index_add_ perm : Elapsed 0.009 ms (0.864 ms / 100) 0.841 -> 0.840 ( -0.12%) [ +0.00% +0.12% +0.48% / +0.95% -0.12% +0.48%] index_copy_ perm : Elapsed 0.008 ms (0.841 ms / 100) 0.862 -> 0.864 ( +0.23%) [ +0.00% +0.12% +0.23% / +0.46% +0.23% +0.46%] index_add_ perm_sorted : Elapsed 0.009 ms (0.862 ms / 100) 0.839 -> 0.844 ( +0.60%) [ +0.12% +0.12% +0.00% / +0.60% +0.83% +1.55%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.840 ms / 100) 1.642 -> 1.650 ( +0.49%) [ +0.30% +0.24% +0.00% / +0.55% +0.49% +0.73%] index_select const : Elapsed 0.016 ms (1.647 ms / 100) 1.680 -> 1.680 ( +0.00%) [ +0.18% +0.18% +0.00% / +0.18% +0.00% +0.00%] index_select wrap : Elapsed 0.017 ms (1.683 ms / 100) 1.708 -> 1.688 ( -1.17%) [ +0.23% +0.23% +0.00% / +0.18% -0.82% -1.17%] index_select linear : Elapsed 0.017 ms (1.712 ms / 100) 1.671 -> 1.672 ( +0.06%) [ +0.24% +0.12% +0.00% / +0.42% +0.12% +0.06%] index_select reverse : Elapsed 0.017 ms (1.675 ms / 100) 1.665 -> 1.674 ( +0.54%) [ +0.30% +0.00% +0.54% / +0.54% +0.72% +0.84%] index_select skip64 : Elapsed 0.017 ms (1.670 ms / 100) 1.644 -> 1.646 ( +0.12%) [ +0.00% +0.30% +0.12% / +0.12% +0.55% +0.36%] index_select skip256 : Elapsed 0.016 ms (1.644 ms / 100) 1.686 -> 1.683 ( -0.18%) [ +0.12% +0.00% +0.12% / +0.12% +0.12% -0.18%] index_select spread : Elapsed 0.017 ms (1.688 ms / 100) 1.686 -> 1.682 ( -0.24%) [ +0.12% +0.00% +0.06% / +0.18% -0.24% -0.18%] index_select strided 3 : Elapsed 0.017 ms (1.688 ms / 100) 1.684 -> 1.675 ( -0.53%) [ +0.06% +0.00% +0.06% / -0.18% -0.42% -0.53%] index_select random : Elapsed 0.017 ms (1.685 ms / 100) 1.671 -> 1.661 ( -0.60%) [ +0.12% +0.06% +0.00% / +0.18% -0.60% -0.12%] index_select random_sorted : Elapsed 0.017 ms (1.673 ms / 100) B = [16, 40, 4, 20] (stride (80, 1280, 1, 4)) A = [5, 40, 4, 20] (stride (1, 400, 100, 5)) dim = 0 2.090 -> 2.090 ( +0.00%) [ +0.24% +0.00% +0.14% / +0.00% +0.10% +0.33%] index_add_ linear : Elapsed 0.021 ms (2.095 ms / 100) 2.034 -> 2.039 ( +0.25%) [ +0.05% +0.00% +0.74% / +0.25% +0.54% +0.29%] index_copy_ linear : Elapsed 0.020 ms (2.035 ms / 100) 2.092 -> 2.089 ( -0.14%) [ +0.19% +0.05% +0.00% / -0.14% +0.10% +0.57%] index_add_ reverse : Elapsed 0.021 ms (2.096 ms / 100) 2.037 -> 2.030 ( -0.34%) [ +0.10% +0.15% +0.00% / -0.34% +0.10% +0.64%] index_copy_ reverse : Elapsed 0.020 ms (2.039 ms / 100) 2.085 -> 2.087 ( +0.10%) [ +0.10% +0.24% +0.00% / +0.10% +0.62% +0.62%] index_add_ spread : Elapsed 0.021 ms (2.087 ms / 100) 2.031 -> 2.031 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.34% +0.59%] index_copy_ spread : Elapsed 0.020 ms (2.031 ms / 100) 2.085 -> 2.088 ( +0.14%) [ +0.24% +0.00% +0.05% / +0.14% +0.43% +0.53%] index_add_ strided 3 : Elapsed 0.021 ms (2.090 ms / 100) 2.030 -> 2.030 ( +0.00%) [ +0.00% +0.10% +0.25% / +0.00% +0.54% +0.39%] index_copy_ strided 3 : Elapsed 0.020 ms (2.030 ms / 100) 2.087 -> 2.091 ( +0.19%) [ +0.00% +0.14% +0.19% / +0.19% +0.38% +0.34%] index_add_ strided 5 : Elapsed 0.021 ms (2.087 ms / 100) 2.029 -> 2.029 ( +0.00%) [ +0.15% +0.00% +0.25% / +0.00% +0.34% +0.44%] index_copy_ strided 5 : Elapsed 0.020 ms (2.032 ms / 100) 2.087 -> 2.090 ( +0.14%) [ +0.10% +0.14% +0.00% / +0.14% +0.38% +0.43%] index_add_ strided 7 : Elapsed 0.021 ms (2.089 ms / 100) 2.029 -> 2.030 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.64% +0.39%] index_copy_ strided 7 : Elapsed 0.020 ms (2.029 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.00% +0.05% +0.34% / +0.05% +0.34% +0.29%] index_add_ perm : Elapsed 0.021 ms (2.089 ms / 100) 2.030 -> 2.034 ( +0.20%) [ +0.15% +0.00% +0.15% / +0.20% +0.54% +0.59%] index_copy_ perm : Elapsed 0.020 ms (2.033 ms / 100) 2.092 -> 2.090 ( -0.10%) [ +0.10% +0.00% +0.05% / -0.10% +0.19% +0.43%] index_add_ perm_sorted : Elapsed 0.021 ms (2.094 ms / 100) 2.030 -> 2.033 ( +0.15%) [ +0.00% +0.05% +0.15% / +0.15% +0.64% +0.64%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.030 ms / 100) 4.205 -> 4.208 ( +0.07%) [ +0.74% +0.78% +0.00% / +0.07% +1.40% +0.64%] index_select const : Elapsed 0.042 ms (4.236 ms / 100) 4.212 -> 4.210 ( -0.05%) [ +0.00% +0.12% +0.12% / -0.05% +0.74% +0.59%] index_select wrap : Elapsed 0.042 ms (4.212 ms / 100) 4.223 -> 4.229 ( +0.14%) [ +0.00% +0.45% +0.31% / +0.14% +1.07% +0.90%] index_select linear : Elapsed 0.042 ms (4.223 ms / 100) 4.215 -> 4.216 ( +0.02%) [ +0.00% +0.62% +0.47% / +0.02% +0.45% +1.16%] index_select reverse : Elapsed 0.042 ms (4.215 ms / 100) 4.242 -> 4.241 ( -0.02%) [ +0.00% +0.12% +0.09% / -0.02% +0.54% +0.61%] index_select skip64 : Elapsed 0.042 ms (4.242 ms / 100) 4.235 -> 4.251 ( +0.38%) [ +0.05% +0.00% +0.09% / +0.38% +0.52% +0.68%] index_select skip256 : Elapsed 0.042 ms (4.237 ms / 100) 4.213 -> 4.235 ( +0.52%) [ +0.02% +0.57% +0.00% / +0.52% +0.55% +0.59%] index_select spread : Elapsed 0.042 ms (4.214 ms / 100) 4.213 -> 4.224 ( +0.26%) [ +0.00% +0.00% +0.00% / +0.26% +0.74% +0.62%] index_select strided 3 : Elapsed 0.042 ms (4.213 ms / 100) 4.205 -> 4.223 ( +0.43%) [ +0.05% +0.69% +0.00% / +0.43% +1.57% +1.52%] index_select random : Elapsed 0.042 ms (4.207 ms / 100) 4.236 -> 4.240 ( +0.09%) [ +0.33% +0.09% +0.00% / +0.09% +0.78% +0.76%] index_select random_sorted : Elapsed 0.042 ms (4.250 ms / 100) B = [16, 40, 4, 20] (stride (20, 1280, 320, 1)) A = [5, 40, 4, 20] (stride (1, 5, 200, 800)) dim = 0 2.409 -> 2.415 ( +0.25%) [ +0.37% +0.17% +0.00% / +0.25% +1.04% +1.00%] index_add_ linear : Elapsed 0.024 ms (2.418 ms / 100) 2.328 -> 2.333 ( +0.21%) [ +0.09% +0.00% +0.21% / +0.21% +0.90% +1.12%] index_copy_ linear : Elapsed 0.023 ms (2.330 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.08% +0.00% +0.12% / +0.08% +0.62% +0.66%] index_add_ reverse : Elapsed 0.024 ms (2.414 ms / 100) 2.325 -> 2.338 ( +0.56%) [ +0.30% +0.00% +0.43% / +0.56% +0.95% +0.77%] index_copy_ reverse : Elapsed 0.023 ms (2.332 ms / 100) 2.388 -> 2.400 ( +0.50%) [ +0.21% +0.29% +0.00% / +0.50% +0.75% +0.84%] index_add_ spread : Elapsed 0.024 ms (2.393 ms / 100) 2.316 -> 2.337 ( +0.91%) [ +0.09% +0.00% +0.30% / +0.91% +0.95% +0.91%] index_copy_ spread : Elapsed 0.023 ms (2.318 ms / 100) 2.390 -> 2.389 ( -0.04%) [ +0.00% +0.00% +0.13% / -0.04% +0.75% +0.79%] index_add_ strided 3 : Elapsed 0.024 ms (2.390 ms / 100) 2.318 -> 2.324 ( +0.26%) [ +0.00% +0.04% +0.13% / +0.26% +0.82% +0.73%] index_copy_ strided 3 : Elapsed 0.023 ms (2.318 ms / 100) 2.409 -> 2.408 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.79% +0.75%] index_add_ strided 5 : Elapsed 0.024 ms (2.411 ms / 100) 2.331 -> 2.342 ( +0.47%) [ +0.00% +0.04% +0.21% / +0.47% +1.24% +0.73%] index_copy_ strided 5 : Elapsed 0.023 ms (2.331 ms / 100) 2.405 -> 2.414 ( +0.37%) [ +0.12% +0.00% +0.08% / +0.37% +0.58% +0.58%] index_add_ strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.334 -> 2.337 ( +0.13%) [ +0.04% +0.00% +0.09% / +0.13% +0.90% +0.69%] index_copy_ strided 7 : Elapsed 0.023 ms (2.335 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.04% +0.00% +0.12% / +0.08% +0.54% +0.70%] index_add_ perm : Elapsed 0.024 ms (2.414 ms / 100) 2.336 -> 2.339 ( +0.13%) [ +0.04% +0.00% +0.09% / +0.13% +0.86% +0.64%] index_copy_ perm : Elapsed 0.023 ms (2.337 ms / 100) 2.411 -> 2.412 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.46% +0.50%] index_add_ perm_sorted : Elapsed 0.024 ms (2.412 ms / 100) 2.327 -> 2.336 ( +0.39%) [ +0.26% +0.30% +0.00% / +0.39% +0.99% +0.95%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.333 ms / 100) 5.175 -> 5.174 ( -0.02%) [ +0.00% +0.04% +0.06% / -0.02% +0.56% +0.56%] index_select const : Elapsed 0.052 ms (5.175 ms / 100) 5.167 -> 5.165 ( -0.04%) [ +0.06% +0.00% +0.08% / -0.04% +0.75% +0.81%] index_select wrap : Elapsed 0.052 ms (5.170 ms / 100) 5.167 -> 5.173 ( +0.12%) [ +0.06% +0.00% +0.08% / +0.12% +0.64% +0.68%] index_select linear : Elapsed 0.052 ms (5.170 ms / 100) 5.167 -> 5.170 ( +0.06%) [ +0.08% +0.10% +0.00% / +0.06% +0.60% +0.58%] index_select reverse : Elapsed 0.052 ms (5.171 ms / 100) 5.174 -> 5.179 ( +0.10%) [ +0.00% +0.14% +0.02% / +0.10% +0.75% +0.83%] index_select skip64 : Elapsed 0.052 ms (5.174 ms / 100) 5.172 -> 5.183 ( +0.21%) [ +0.02% +0.00% +0.12% / +0.21% +0.68% +0.64%] index_select skip256 : Elapsed 0.052 ms (5.173 ms / 100) 5.163 -> 5.164 ( +0.02%) [ +0.17% +0.00% +0.08% / +0.02% +0.87% +0.72%] index_select spread : Elapsed 0.052 ms (5.172 ms / 100) 5.165 -> 5.168 ( +0.06%) [ +0.02% +0.04% +0.00% / +0.06% +0.64% +0.64%] index_select strided 3 : Elapsed 0.052 ms (5.166 ms / 100) 5.164 -> 5.171 ( +0.14%) [ +0.10% +0.14% +0.00% / +0.14% +0.79% +0.79%] index_select random : Elapsed 0.052 ms (5.169 ms / 100) 5.164 -> 5.168 ( +0.08%) [ +0.08% +0.19% +0.00% / +0.08% +0.83% +0.77%] index_select random_sorted : Elapsed 0.052 ms (5.168 ms / 100) B = [16, 40, 4, 20] (stride (800, 1, 12800, 40)) A = [5, 40, 4, 20] (stride (1, 20, 5, 800)) dim = 0 2.391 -> 2.400 ( +0.38%) [ +0.17% +0.08% +0.00% / +0.38% +0.84% +0.84%] index_add_ linear : Elapsed 0.024 ms (2.395 ms / 100) 2.336 -> 2.344 ( +0.34%) [ +0.09% +0.09% +0.00% / +0.34% +0.60% +0.60%] index_copy_ linear : Elapsed 0.023 ms (2.338 ms / 100) 2.393 -> 2.392 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.71% +0.63%] index_add_ reverse : Elapsed 0.024 ms (2.393 ms / 100) 2.335 -> 2.338 ( +0.13%) [ +0.26% +0.00% +0.17% / +0.13% +0.69% +0.39%] index_copy_ reverse : Elapsed 0.023 ms (2.341 ms / 100) 2.394 -> 2.398 ( +0.17%) [ +0.17% +0.00% +0.04% / +0.17% +0.67% +0.67%] index_add_ spread : Elapsed 0.024 ms (2.398 ms / 100) 2.339 -> 2.340 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.47% +0.34%] index_copy_ spread : Elapsed 0.023 ms (2.339 ms / 100) 2.394 -> 2.401 ( +0.29%) [ +0.04% +0.29% +0.00% / +0.29% +0.46% +0.63%] index_add_ strided 3 : Elapsed 0.024 ms (2.395 ms / 100) 2.336 -> 2.350 ( +0.60%) [ +0.26% +0.21% +0.00% / +0.60% +0.60% +0.86%] index_copy_ strided 3 : Elapsed 0.023 ms (2.342 ms / 100) 2.398 -> 2.398 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.42% +0.38%] index_add_ strided 5 : Elapsed 0.024 ms (2.399 ms / 100) 2.339 -> 2.339 ( +0.00%) [ +0.00% +0.21% +0.04% / +0.00% +0.56% +0.64%] index_copy_ strided 5 : Elapsed 0.023 ms (2.339 ms / 100) 2.402 -> 2.402 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.25% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.402 ms / 100) 2.343 -> 2.347 ( +0.17%) [ +0.04% +0.00% +0.17% / +0.17% +0.17% +0.34%] index_copy_ strided 7 : Elapsed 0.023 ms (2.344 ms / 100) 2.390 -> 2.396 ( +0.25%) [ +0.00% +0.13% +0.08% / +0.25% +0.42% +0.42%] index_add_ perm : Elapsed 0.024 ms (2.390 ms / 100) 2.331 -> 2.340 ( +0.39%) [ +0.00% +0.04% +0.30% / +0.39% +0.64% +0.69%] index_copy_ perm : Elapsed 0.023 ms (2.331 ms / 100) 2.390 -> 2.391 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.88% +0.88%] index_add_ perm_sorted : Elapsed 0.024 ms (2.391 ms / 100) 2.331 -> 2.336 ( +0.21%) [ +0.04% +0.00% +0.00% / +0.21% +0.69% +0.90%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.332 ms / 100) 5.134 -> 5.149 ( +0.29%) [ +0.35% +0.16% +0.00% / +0.29% +0.78% +0.58%] index_select const : Elapsed 0.052 ms (5.152 ms / 100) 5.125 -> 5.142 ( +0.33%) [ +0.18% +0.16% +0.00% / +0.33% +0.62% +0.53%] index_select wrap : Elapsed 0.051 ms (5.134 ms / 100) 5.118 -> 5.119 ( +0.02%) [ +0.12% +0.00% +0.31% / +0.02% +0.76% +0.76%] index_select linear : Elapsed 0.051 ms (5.124 ms / 100) 5.119 -> 5.130 ( +0.21%) [ +0.18% +0.00% +0.14% / +0.21% +0.59% +0.92%] index_select reverse : Elapsed 0.051 ms (5.128 ms / 100) 5.121 -> 5.126 ( +0.10%) [ +0.00% +0.06% +0.14% / +0.10% +0.70% +0.53%] index_select skip64 : Elapsed 0.051 ms (5.121 ms / 100) 5.120 -> 5.120 ( +0.00%) [ +0.00% +0.10% +0.12% / +0.00% +0.55% +0.57%] index_select skip256 : Elapsed 0.051 ms (5.120 ms / 100) 5.129 -> 5.144 ( +0.29%) [ +0.27% +0.00% +0.23% / +0.29% +0.80% +0.68%] index_select spread : Elapsed 0.051 ms (5.143 ms / 100) 5.127 -> 5.135 ( +0.16%) [ +0.02% +0.02% +0.00% / +0.16% +0.60% +0.57%] index_select strided 3 : Elapsed 0.051 ms (5.128 ms / 100) 5.119 -> 5.122 ( +0.06%) [ +0.25% +0.00% +0.27% / +0.06% +1.04% +1.04%] index_select random : Elapsed 0.051 ms (5.132 ms / 100) 5.109 -> 5.117 ( +0.16%) [ +0.14% +0.00% +0.04% / +0.16% +0.92% +0.74%] index_select random_sorted : Elapsed 0.051 ms (5.116 ms / 100) out_shape = [5, 16, 4, 20] in_shape = [5, 40, 4, 20] idx_dim = 1 B = [5, 16, 4, 20] (stride (1280, 20, 320, 1)) A = [5, 40, 4, 20] (stride (1, 5, 4000, 200)) dim = 1 3.923 -> 3.933 ( +0.25%) [ +0.15% +0.28% +0.00% / +0.25% +0.82% +0.89%] index_select const : Elapsed 0.039 ms (3.929 ms / 100) 3.924 -> 3.929 ( +0.13%) [ +0.13% +0.10% +0.00% / +0.13% +0.74% +0.69%] index_select wrap : Elapsed 0.039 ms (3.929 ms / 100) 3.923 -> 3.923 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.54% +0.51%] index_select linear : Elapsed 0.039 ms (3.925 ms / 100) 3.934 -> 3.938 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +0.51% +0.33%] index_select reverse : Elapsed 0.039 ms (3.934 ms / 100) 3.925 -> 3.932 ( +0.18%) [ +0.13% +0.00% +0.18% / +0.18% +0.61% +0.54%] index_select skip64 : Elapsed 0.039 ms (3.930 ms / 100) 3.933 -> 3.946 ( +0.33%) [ +0.23% +0.00% +0.25% / +0.33% +0.46% +0.46%] index_select skip256 : Elapsed 0.039 ms (3.942 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.33% +0.36%] index_select spread : Elapsed 0.039 ms (3.923 ms / 100) 3.925 -> 3.925 ( +0.00%) [ +0.05% +0.00% +0.03% / +0.00% +0.28% +0.31%] index_select strided 3 : Elapsed 0.039 ms (3.927 ms / 100) 3.914 -> 3.915 ( +0.03%) [ +0.00% +0.15% +0.18% / +0.03% +0.20% +0.26%] index_select strided 5 : Elapsed 0.039 ms (3.914 ms / 100) 3.919 -> 3.920 ( +0.03%) [ +0.15% +0.10% +0.00% / +0.03% +0.18% +0.20%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.925 -> 3.927 ( +0.05%) [ +0.05% +0.25% +0.00% / +0.05% +0.31% +0.33%] index_select strided 8 : Elapsed 0.039 ms (3.927 ms / 100) 3.925 -> 3.926 ( +0.03%) [ +0.00% +0.05% +0.05% / +0.03% +0.25% +0.13%] index_select strided 16 : Elapsed 0.039 ms (3.925 ms / 100) 3.933 -> 3.934 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.51% +0.53%] index_select random : Elapsed 0.039 ms (3.933 ms / 100) 3.930 -> 3.925 ( -0.13%) [ +0.10% +0.10% +0.00% / -0.13% +0.56% +0.46%] index_select random_sorted : Elapsed 0.039 ms (3.934 ms / 100) 3.926 -> 3.924 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.33% +0.18%] index_select perm : Elapsed 0.039 ms (3.926 ms / 100) 3.919 -> 3.928 ( +0.23%) [ +0.00% +0.05% +0.03% / +0.23% +0.23% +0.33%] index_select perm_sorted : Elapsed 0.039 ms (3.919 ms / 100) B = [5, 16, 4, 20] (stride (1280, 4, 1, 64)) A = [5, 40, 4, 20] (stride (40, 1, 200, 800)) dim = 1 4.268 -> 4.285 ( +0.40%) [ +0.19% +0.00% +0.02% / +0.40% +0.63% +0.84%] index_select const : Elapsed 0.043 ms (4.276 ms / 100) 4.266 -> 4.265 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.70% +0.56%] index_select wrap : Elapsed 0.043 ms (4.266 ms / 100) 4.285 -> 4.282 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.75% +0.72%] index_select linear : Elapsed 0.043 ms (4.286 ms / 100) 4.292 -> 4.292 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.65% +0.65%] index_select reverse : Elapsed 0.043 ms (4.292 ms / 100) 4.262 -> 4.262 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.68% +0.70%] index_select skip64 : Elapsed 0.043 ms (4.262 ms / 100) 4.263 -> 4.274 ( +0.26%) [ +0.00% +0.09% +0.21% / +0.26% +0.91% +0.89%] index_select skip256 : Elapsed 0.043 ms (4.263 ms / 100) 4.276 -> 4.278 ( +0.05%) [ +0.05% +0.07% +0.00% / +0.05% +0.89% +0.89%] index_select spread : Elapsed 0.043 ms (4.278 ms / 100) 4.275 -> 4.275 ( +0.00%) [ +0.05% +0.16% +0.00% / +0.00% +0.98% +0.89%] index_select strided 3 : Elapsed 0.043 ms (4.277 ms / 100) 4.275 -> 4.277 ( +0.05%) [ +0.09% +0.00% +0.07% / +0.05% +0.77% +0.84%] index_select strided 5 : Elapsed 0.043 ms (4.279 ms / 100) 4.277 -> 4.284 ( +0.16%) [ +0.00% +0.26% +0.00% / +0.16% +0.96% +0.91%] index_select strided 7 : Elapsed 0.043 ms (4.277 ms / 100) 4.294 -> 4.288 ( -0.14%) [ +0.02% +0.00% +0.07% / -0.14% +0.70% +0.68%] index_select strided 8 : Elapsed 0.043 ms (4.295 ms / 100) 4.281 -> 4.279 ( -0.05%) [ +0.12% +0.05% +0.00% / -0.05% +0.82% +0.75%] index_select strided 16 : Elapsed 0.043 ms (4.286 ms / 100) 4.272 -> 4.283 ( +0.26%) [ +0.02% +0.05% +0.00% / +0.26% +0.75% +0.77%] index_select random : Elapsed 0.043 ms (4.273 ms / 100) 4.269 -> 4.275 ( +0.14%) [ +0.09% +0.00% +0.07% / +0.14% +0.98% +0.89%] index_select random_sorted : Elapsed 0.043 ms (4.273 ms / 100) 4.284 -> 4.295 ( +0.26%) [ +0.00% +0.23% +0.14% / +0.26% +0.82% +0.96%] index_select perm : Elapsed 0.043 ms (4.284 ms / 100) 4.285 -> 4.286 ( +0.02%) [ +0.12% +0.02% +0.00% / +0.02% +0.77% +0.72%] index_select perm_sorted : Elapsed 0.043 ms (4.290 ms / 100) B = [5, 16, 4, 20] (stride (80, 400, 1, 4)) A = [5, 40, 4, 20] (stride (20, 100, 4000, 1)) dim = 1 3.828 -> 3.830 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.57% +0.52%] index_select const : Elapsed 0.038 ms (3.830 ms / 100) 3.824 -> 3.824 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.39%] index_select wrap : Elapsed 0.038 ms (3.824 ms / 100) 3.828 -> 3.827 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.39% +0.34%] index_select linear : Elapsed 0.038 ms (3.829 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.42% +0.45%] index_select reverse : Elapsed 0.038 ms (3.814 ms / 100) 3.835 -> 3.837 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.57% +0.55%] index_select skip64 : Elapsed 0.038 ms (3.837 ms / 100) 3.827 -> 3.826 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.57% +0.57%] index_select skip256 : Elapsed 0.038 ms (3.827 ms / 100) 3.827 -> 3.826 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.44% +0.29%] index_select spread : Elapsed 0.038 ms (3.829 ms / 100) 3.819 -> 3.821 ( +0.05%) [ +0.18% +0.16% +0.00% / +0.05% +0.52% +0.55%] index_select strided 3 : Elapsed 0.038 ms (3.826 ms / 100) 3.812 -> 3.815 ( +0.08%) [ +0.03% +0.00% +0.05% / +0.08% +0.45% +0.39%] index_select strided 5 : Elapsed 0.038 ms (3.813 ms / 100) 3.825 -> 3.828 ( +0.08%) [ +0.13% +0.00% +0.05% / +0.08% +0.42% +0.34%] index_select strided 7 : Elapsed 0.038 ms (3.830 ms / 100) 3.826 -> 3.827 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.47% +0.55%] index_select strided 8 : Elapsed 0.038 ms (3.827 ms / 100) 3.833 -> 3.839 ( +0.16%) [ +0.05% +0.03% +0.00% / +0.16% +0.50% +0.39%] index_select strided 16 : Elapsed 0.038 ms (3.835 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.66% +0.63%] index_select random : Elapsed 0.038 ms (3.814 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.52% +0.52%] index_select random_sorted : Elapsed 0.038 ms (3.814 ms / 100) 3.831 -> 3.836 ( +0.13%) [ +0.05% +0.10% +0.00% / +0.13% +0.37% +0.29%] index_select perm : Elapsed 0.038 ms (3.833 ms / 100) 3.815 -> 3.816 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.34% +0.37%] index_select perm_sorted : Elapsed 0.038 ms (3.818 ms / 100) B = [5, 16, 4, 20] (stride (80, 400, 1, 4)) A = [5, 40, 4, 20] (stride (1, 100, 4000, 5)) dim = 1 3.917 -> 3.921 ( +0.10%) [ +0.08% +0.00% +0.13% / +0.10% +0.71% +0.74%] index_select const : Elapsed 0.039 ms (3.920 ms / 100) 3.926 -> 3.925 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.69% +0.74%] index_select wrap : Elapsed 0.039 ms (3.927 ms / 100) 3.914 -> 3.922 ( +0.20%) [ +0.15% +0.00% +0.31% / +0.20% +0.69% +0.84%] index_select linear : Elapsed 0.039 ms (3.920 ms / 100) 3.920 -> 3.924 ( +0.10%) [ +0.10% +0.00% +0.05% / +0.10% +0.89% +0.94%] index_select reverse : Elapsed 0.039 ms (3.924 ms / 100) 3.907 -> 3.903 ( -0.10%) [ +0.00% +0.00% +0.28% / -0.10% +0.77% +0.74%] index_select skip64 : Elapsed 0.039 ms (3.907 ms / 100) 3.918 -> 3.921 ( +0.08%) [ +0.00% +0.03% +0.05% / +0.08% +0.74% +0.74%] index_select skip256 : Elapsed 0.039 ms (3.918 ms / 100) 3.919 -> 3.916 ( -0.08%) [ +0.10% +0.13% +0.00% / -0.08% +0.51% +0.54%] index_select spread : Elapsed 0.039 ms (3.923 ms / 100) 3.918 -> 3.919 ( +0.03%) [ +0.13% +0.05% +0.00% / +0.03% +0.54% +0.56%] index_select strided 3 : Elapsed 0.039 ms (3.923 ms / 100) 3.908 -> 3.913 ( +0.13%) [ +0.13% +0.00% +0.05% / +0.13% +0.61% +0.54%] index_select strided 5 : Elapsed 0.039 ms (3.913 ms / 100) 3.920 -> 3.920 ( +0.00%) [ +0.15% +0.05% +0.00% / +0.00% +0.66% +0.66%] index_select strided 7 : Elapsed 0.039 ms (3.926 ms / 100) 3.920 -> 3.925 ( +0.13%) [ +0.08% +0.10% +0.00% / +0.13% +0.61% +0.56%] index_select strided 8 : Elapsed 0.039 ms (3.923 ms / 100) 3.913 -> 3.931 ( +0.46%) [ +0.03% +0.00% +0.15% / +0.46% +0.77% +0.79%] index_select strided 16 : Elapsed 0.039 ms (3.914 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.82% +0.84%] index_select random : Elapsed 0.039 ms (3.920 ms / 100) 3.906 -> 3.914 ( +0.20%) [ +0.13% +0.23% +0.00% / +0.20% +1.00% +0.92%] index_select random_sorted : Elapsed 0.039 ms (3.911 ms / 100) 3.915 -> 3.924 ( +0.23%) [ +0.26% +0.08% +0.00% / +0.23% +0.79% +0.79%] index_select perm : Elapsed 0.039 ms (3.925 ms / 100) 3.920 -> 3.924 ( +0.10%) [ +0.13% +0.00% +0.15% / +0.10% +0.99% +1.02%] index_select perm_sorted : Elapsed 0.039 ms (3.925 ms / 100) B = [5, 16, 4, 20] (stride (20, 400, 100, 1)) A = [5, 40, 4, 20] (stride (3200, 4, 1, 160)) dim = 1 4.203 -> 4.218 ( +0.36%) [ +0.00% +0.00% +0.00% / +0.59% +0.36% +0.36%] index_select const : Elapsed 0.042 ms (4.203 ms / 100) 4.215 -> 4.227 ( +0.28%) [ +0.07% +0.17% +0.00% / +0.28% +0.69% +0.47%] index_select wrap : Elapsed 0.042 ms (4.218 ms / 100) 4.180 -> 4.185 ( +0.12%) [ +0.00% +0.00% +0.10% / +0.12% +0.50% +0.43%] index_select linear : Elapsed 0.042 ms (4.180 ms / 100) 4.170 -> 4.176 ( +0.14%) [ +0.22% +0.22% +0.00% / +0.14% +0.55% +0.65%] index_select reverse : Elapsed 0.042 ms (4.179 ms / 100) 4.228 -> 4.237 ( +0.21%) [ +0.00% +0.21% +0.02% / +0.21% +0.50% +0.73%] index_select skip64 : Elapsed 0.042 ms (4.228 ms / 100) 4.204 -> 4.205 ( +0.02%) [ +0.00% +0.52% +0.55% / +0.02% +1.07% +0.69%] index_select skip256 : Elapsed 0.042 ms (4.204 ms / 100) 4.192 -> 4.192 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.57% +0.57%] index_select spread : Elapsed 0.042 ms (4.194 ms / 100) 4.201 -> 4.201 ( +0.00%) [ +0.00% +0.10% +0.07% / +0.00% +0.67% +0.64%] index_select strided 3 : Elapsed 0.042 ms (4.201 ms / 100) 4.187 -> 4.189 ( +0.05%) [ +0.02% +0.00% +0.12% / +0.05% +0.60% +0.55%] index_select strided 5 : Elapsed 0.042 ms (4.188 ms / 100) 4.207 -> 4.207 ( +0.00%) [ +0.07% +0.00% +0.02% / +0.00% +0.29% +0.38%] index_select strided 7 : Elapsed 0.042 ms (4.210 ms / 100) 4.218 -> 4.219 ( +0.02%) [ +0.00% +0.71% +0.47% / +0.02% +0.43% +0.43%] index_select strided 8 : Elapsed 0.042 ms (4.218 ms / 100) 4.185 -> 4.184 ( -0.02%) [ +0.00% +0.07% +0.02% / -0.02% +0.48% +0.48%] index_select strided 16 : Elapsed 0.042 ms (4.185 ms / 100) 4.185 -> 4.185 ( +0.00%) [ +0.19% +0.00% +0.07% / +0.00% +0.60% +0.48%] index_select random : Elapsed 0.042 ms (4.193 ms / 100) 4.184 -> 4.188 ( +0.10%) [ +0.07% +0.26% +0.00% / +0.10% +0.36% +0.50%] index_select random_sorted : Elapsed 0.042 ms (4.187 ms / 100) 4.191 -> 4.192 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.64% +0.38%] index_select perm : Elapsed 0.042 ms (4.193 ms / 100) 4.202 -> 4.209 ( +0.17%) [ +0.00% +0.21% +0.00% / +0.17% +0.57% +0.48%] index_select perm_sorted : Elapsed 0.042 ms (4.202 ms / 100) B = [5, 16, 4, 20] (stride (20, 400, 100, 1)) A = [5, 40, 4, 20] (stride (40, 1, 4000, 200)) dim = 1 3.922 -> 3.923 ( +0.03%) [ +0.13% +0.00% +0.10% / +0.03% +0.74% +0.79%] index_select const : Elapsed 0.039 ms (3.927 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.08% +0.00% +0.05% / +0.05% +0.74% +0.66%] index_select wrap : Elapsed 0.039 ms (3.926 ms / 100) 3.919 -> 3.930 ( +0.28%) [ +0.10% +0.28% +0.00% / +0.28% +0.74% +0.56%] index_select linear : Elapsed 0.039 ms (3.923 ms / 100) 3.940 -> 3.942 ( +0.05%) [ +0.15% +0.03% +0.00% / +0.05% +0.74% +0.71%] index_select reverse : Elapsed 0.039 ms (3.946 ms / 100) 3.910 -> 3.923 ( +0.33%) [ +0.23% +0.23% +0.00% / +0.33% +0.69% +0.74%] index_select skip64 : Elapsed 0.039 ms (3.919 ms / 100) 3.922 -> 3.921 ( -0.03%) [ +0.05% +0.00% +0.03% / -0.03% +0.82% +0.74%] index_select skip256 : Elapsed 0.039 ms (3.924 ms / 100) 3.916 -> 3.923 ( +0.18%) [ +0.20% +0.18% +0.00% / +0.18% +0.72% +0.77%] index_select spread : Elapsed 0.039 ms (3.924 ms / 100) 3.930 -> 3.939 ( +0.23%) [ +0.00% +0.00% +0.20% / +0.23% +0.97% +0.76%] index_select strided 3 : Elapsed 0.039 ms (3.930 ms / 100) 3.917 -> 3.919 ( +0.05%) [ +0.13% +0.00% +0.13% / +0.05% +0.54% +0.64%] index_select strided 5 : Elapsed 0.039 ms (3.922 ms / 100) 3.921 -> 3.925 ( +0.10%) [ +0.10% +0.00% +0.05% / +0.10% +0.56% +0.56%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.946 -> 3.942 ( -0.10%) [ +0.00% +0.00% +0.05% / -0.10% +0.76% +0.73%] index_select strided 8 : Elapsed 0.039 ms (3.946 ms / 100) 3.908 -> 3.909 ( +0.03%) [ +0.20% +0.38% +0.00% / +0.03% +0.95% +0.54%] index_select strided 16 : Elapsed 0.039 ms (3.916 ms / 100) 3.924 -> 3.927 ( +0.08%) [ +0.13% +0.08% +0.00% / +0.08% +0.74% +0.84%] index_select random : Elapsed 0.039 ms (3.929 ms / 100) 3.925 -> 3.933 ( +0.20%) [ +0.00% +0.00% +0.15% / +0.20% +0.69% +0.84%] index_select random_sorted : Elapsed 0.039 ms (3.925 ms / 100) 3.923 -> 3.924 ( +0.03%) [ +0.00% +0.00% +0.05% / +0.03% +0.69% +0.71%] index_select perm : Elapsed 0.039 ms (3.923 ms / 100) 3.929 -> 3.935 ( +0.15%) [ +0.05% +0.00% +0.23% / +0.15% +0.84% +0.79%] index_select perm_sorted : Elapsed 0.039 ms (3.931 ms / 100) B = [5, 16, 4, 20] (stride (1, 400, 100, 5)) A = [5, 40, 4, 20] (stride (80, 400, 1, 4)) dim = 1 3.530 -> 3.534 ( +0.11%) [ +0.08% +0.00% +0.11% / +0.11% +0.74% +0.59%] index_select const : Elapsed 0.035 ms (3.533 ms / 100) 3.515 -> 3.523 ( +0.23%) [ +0.26% +0.00% +0.00% / +0.23% +0.63% +0.65%] index_select wrap : Elapsed 0.035 ms (3.524 ms / 100) 3.524 -> 3.526 ( +0.06%) [ +0.14% +0.00% +0.26% / +0.06% +0.57% +0.60%] index_select linear : Elapsed 0.035 ms (3.529 ms / 100) 3.515 -> 3.516 ( +0.03%) [ +0.14% +0.06% +0.00% / +0.03% +0.63% +0.51%] index_select reverse : Elapsed 0.035 ms (3.520 ms / 100) 3.536 -> 3.539 ( +0.08%) [ +0.03% +0.08% +0.00% / +0.08% +0.54% +0.48%] index_select skip64 : Elapsed 0.035 ms (3.537 ms / 100) 3.531 -> 3.532 ( +0.03%) [ +0.06% +0.06% +0.00% / +0.03% +0.79% +0.68%] index_select skip256 : Elapsed 0.035 ms (3.533 ms / 100) 3.531 -> 3.533 ( +0.06%) [ +0.08% +0.00% +0.08% / +0.06% +0.48% +0.42%] index_select spread : Elapsed 0.035 ms (3.534 ms / 100) 3.522 -> 3.521 ( -0.03%) [ +0.20% +0.06% +0.00% / -0.03% +0.40% +0.40%] index_select strided 3 : Elapsed 0.035 ms (3.529 ms / 100) 3.511 -> 3.513 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.06% +0.68% +0.63%] index_select strided 5 : Elapsed 0.035 ms (3.514 ms / 100) 3.539 -> 3.543 ( +0.11%) [ +0.00% +0.03% +0.03% / +0.11% +0.51% +0.54%] index_select strided 7 : Elapsed 0.035 ms (3.539 ms / 100) 3.533 -> 3.536 ( +0.08%) [ +0.00% +0.03% +0.08% / +0.08% +0.45% +0.48%] index_select strided 8 : Elapsed 0.035 ms (3.533 ms / 100) 3.551 -> 3.557 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.42% +0.56%] index_select strided 16 : Elapsed 0.036 ms (3.557 ms / 100) 3.519 -> 3.521 ( +0.06%) [ +0.17% +0.06% +0.00% / +0.06% +0.54% +0.51%] index_select random : Elapsed 0.035 ms (3.525 ms / 100) 3.515 -> 3.518 ( +0.09%) [ +0.03% +0.00% +0.00% / +0.09% +0.54% +0.48%] index_select random_sorted : Elapsed 0.035 ms (3.516 ms / 100) 3.534 -> 3.532 ( -0.06%) [ +0.06% +0.00% +0.03% / -0.06% +0.37% +0.06%] index_select perm : Elapsed 0.035 ms (3.536 ms / 100) 3.519 -> 3.523 ( +0.11%) [ +0.06% +0.00% +0.09% / +0.11% +0.37% +0.45%] index_select perm_sorted : Elapsed 0.035 ms (3.521 ms / 100) B = [5, 16, 4, 20] (stride (1, 400, 5, 20)) A = [5, 40, 4, 20] (stride (3200, 80, 1, 4)) dim = 1 3.838 -> 3.844 ( +0.16%) [ +0.26% +0.00% +0.18% / +0.16% +0.76% +0.70%] index_select const : Elapsed 0.038 ms (3.848 ms / 100) 3.814 -> 3.817 ( +0.08%) [ +0.03% +0.00% +0.00% / +0.08% +0.71% +0.71%] index_select wrap : Elapsed 0.038 ms (3.815 ms / 100) 3.820 -> 3.822 ( +0.05%) [ +0.03% +0.00% +0.10% / +0.05% +0.79% +0.76%] index_select linear : Elapsed 0.038 ms (3.821 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.89% +0.87%] index_select reverse : Elapsed 0.038 ms (3.813 ms / 100) 3.837 -> 3.838 ( +0.03%) [ +0.03% +0.00% +0.05% / +0.03% +0.63% +0.63%] index_select skip64 : Elapsed 0.038 ms (3.838 ms / 100) 3.839 -> 3.838 ( -0.03%) [ +0.08% +0.05% +0.00% / -0.03% +0.83% +0.76%] index_select skip256 : Elapsed 0.038 ms (3.842 ms / 100) 3.837 -> 3.839 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.83% +1.15%] index_select spread : Elapsed 0.038 ms (3.837 ms / 100) 3.831 -> 3.842 ( +0.29%) [ +0.29% +0.23% +0.00% / +0.29% +0.86% +0.86%] index_select strided 3 : Elapsed 0.038 ms (3.842 ms / 100) 3.841 -> 3.843 ( +0.05%) [ +0.08% +0.08% +0.00% / +0.05% +0.68% +0.73%] index_select strided 5 : Elapsed 0.038 ms (3.844 ms / 100) 3.831 -> 3.832 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.78% +0.81%] index_select strided 7 : Elapsed 0.038 ms (3.832 ms / 100) 3.852 -> 3.851 ( -0.03%) [ +0.00% +0.05% +0.03% / -0.03% +0.73% +0.78%] index_select strided 8 : Elapsed 0.039 ms (3.852 ms / 100) 3.838 -> 3.838 ( +0.00%) [ +0.13% +0.00% +0.08% / +0.00% +0.91% +0.83%] index_select strided 16 : Elapsed 0.038 ms (3.843 ms / 100) 3.842 -> 3.845 ( +0.08%) [ +0.05% +0.00% +0.05% / +0.08% +0.86% +0.83%] index_select random : Elapsed 0.038 ms (3.844 ms / 100) 3.836 -> 3.844 ( +0.21%) [ +0.08% +0.21% +0.00% / +0.21% +0.76% +0.73%] index_select random_sorted : Elapsed 0.038 ms (3.839 ms / 100) 3.828 -> 3.835 ( +0.18%) [ +0.29% +0.00% +0.10% / +0.18% +0.84% +0.91%] index_select perm : Elapsed 0.038 ms (3.839 ms / 100) 3.820 -> 3.821 ( +0.03%) [ +0.03% +0.16% +0.00% / +0.03% +0.79% +0.79%] index_select perm_sorted : Elapsed 0.038 ms (3.821 ms / 100) B = [5, 16, 4, 20] (stride (64, 4, 1, 320)) A = [5, 40, 4, 20] (stride (1, 5, 200, 800)) dim = 1 4.295 -> 4.294 ( -0.02%) [ +0.07% +0.02% +0.00% / -0.02% +0.61% +0.61%] index_select const : Elapsed 0.043 ms (4.298 ms / 100) 4.280 -> 4.287 ( +0.16%) [ +0.16% +0.00% +0.12% / +0.16% +0.65% +0.58%] index_select wrap : Elapsed 0.043 ms (4.287 ms / 100) 4.286 -> 4.291 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.12% +0.49% +0.68%] index_select linear : Elapsed 0.043 ms (4.291 ms / 100) 4.274 -> 4.274 ( +0.00%) [ +0.21% +0.00% +0.00% / +0.00% +0.42% +0.49%] index_select reverse : Elapsed 0.043 ms (4.283 ms / 100) 4.279 -> 4.290 ( +0.26%) [ +0.00% +0.00% +0.14% / +0.26% +0.44% +0.79%] index_select skip64 : Elapsed 0.043 ms (4.279 ms / 100) 4.284 -> 4.292 ( +0.19%) [ +0.26% +0.00% +0.35% / +0.19% +0.84% +0.72%] index_select skip256 : Elapsed 0.043 ms (4.295 ms / 100) 4.273 -> 4.273 ( +0.00%) [ +0.07% +0.00% +0.05% / +0.00% +0.56% +0.56%] index_select spread : Elapsed 0.043 ms (4.276 ms / 100) 4.294 -> 4.293 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.56% +0.54%] index_select strided 3 : Elapsed 0.043 ms (4.294 ms / 100) 4.264 -> 4.269 ( +0.12%) [ +0.07% +0.00% +0.02% / +0.12% +0.54% +0.56%] index_select strided 5 : Elapsed 0.043 ms (4.267 ms / 100) 4.271 -> 4.272 ( +0.02%) [ +0.02% +0.00% +0.09% / +0.02% +0.47% +0.52%] index_select strided 7 : Elapsed 0.043 ms (4.272 ms / 100) 4.296 -> 4.302 ( +0.14%) [ +0.37% +0.19% +0.00% / +0.14% +0.30% +0.49%] index_select strided 8 : Elapsed 0.043 ms (4.312 ms / 100) 4.287 -> 4.294 ( +0.16%) [ +0.07% +0.12% +0.00% / +0.16% +0.42% +0.79%] index_select strided 16 : Elapsed 0.043 ms (4.290 ms / 100) 4.280 -> 4.285 ( +0.12%) [ +0.21% +0.00% +0.12% / +0.12% +0.68% +0.51%] index_select random : Elapsed 0.043 ms (4.289 ms / 100) 4.280 -> 4.284 ( +0.09%) [ +0.07% +0.00% +0.05% / +0.09% +0.44% +0.63%] index_select random_sorted : Elapsed 0.043 ms (4.283 ms / 100) 4.287 -> 4.291 ( +0.09%) [ +0.00% +0.07% +0.12% / +0.09% +0.61% +0.61%] index_select perm : Elapsed 0.043 ms (4.287 ms / 100) 4.278 -> 4.278 ( +0.00%) [ +0.02% +0.00% +0.07% / +0.00% +0.42% +0.49%] index_select perm_sorted : Elapsed 0.043 ms (4.279 ms / 100) out_shape = [5, 40, 16, 20] in_shape = [5, 40, 4, 20] idx_dim = 2 B = [5, 40, 16, 20] (stride (12800, 320, 20, 1)) A = [5, 40, 4, 20] (stride (20, 400, 100, 1)) dim = 2 2.387 -> 2.390 ( +0.13%) [ +0.29% +0.00% +0.13% / +0.13% +0.29% +0.63%] index_add_ linear : Elapsed 0.024 ms (2.394 ms / 100) 2.329 -> 2.340 ( +0.47%) [ +0.26% +0.00% +0.30% / +0.47% +0.47% +0.56%] index_copy_ linear : Elapsed 0.023 ms (2.335 ms / 100) 2.391 -> 2.388 ( -0.13%) [ +0.04% +0.00% +0.04% / -0.13% +0.33% +0.46%] index_add_ reverse : Elapsed 0.024 ms (2.392 ms / 100) 2.332 -> 2.336 ( +0.17%) [ +0.00% +0.26% +0.17% / +0.17% +0.51% +0.39%] index_copy_ reverse : Elapsed 0.023 ms (2.332 ms / 100) 2.388 -> 2.394 ( +0.25%) [ +0.17% +0.29% +0.00% / +0.25% +0.50% +0.67%] index_add_ spread : Elapsed 0.024 ms (2.392 ms / 100) 2.349 -> 2.350 ( +0.04%) [ +0.00% +0.21% +0.17% / +0.04% +0.47% +1.23%] index_copy_ spread : Elapsed 0.023 ms (2.349 ms / 100) 2.389 -> 2.389 ( +0.00%) [ +0.17% +0.13% +0.00% / +0.00% +0.38% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.393 ms / 100) 2.346 -> 2.345 ( -0.04%) [ +0.00% +0.04% +0.26% / -0.04% +0.34% +0.43%] index_copy_ strided 3 : Elapsed 0.023 ms (2.346 ms / 100) 2.398 -> 2.398 ( +0.00%) [ +0.00% +0.25% +0.08% / +0.00% +0.46% +0.38%] index_add_ strided 5 : Elapsed 0.024 ms (2.398 ms / 100) 2.352 -> 2.353 ( +0.04%) [ +0.00% +0.34% +0.26% / +0.04% +0.51% +0.64%] index_copy_ strided 5 : Elapsed 0.024 ms (2.352 ms / 100) 2.391 -> 2.391 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.25% +0.42%] index_add_ strided 7 : Elapsed 0.024 ms (2.392 ms / 100) 2.350 -> 2.353 ( +0.13%) [ +0.04% +0.26% +0.00% / +0.13% +0.38% +0.21%] index_copy_ strided 7 : Elapsed 0.024 ms (2.351 ms / 100) 2.397 -> 2.393 ( -0.17%) [ +0.17% +0.00% +0.04% / -0.17% +0.04% +0.13%] index_add_ perm : Elapsed 0.024 ms (2.401 ms / 100) 2.346 -> 2.346 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.26% +0.34%] index_copy_ perm : Elapsed 0.023 ms (2.346 ms / 100) 2.398 -> 2.394 ( -0.17%) [ +0.00% +0.04% +0.13% / -0.17% +0.29% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.398 ms / 100) 2.343 -> 2.343 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.30% +0.17%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.343 ms / 100) 4.824 -> 4.835 ( +0.23%) [ +0.31% +0.00% +0.19% / +0.23% +0.56% +0.60%] index_select const : Elapsed 0.048 ms (4.839 ms / 100) 4.906 -> 4.903 ( -0.06%) [ +0.04% +0.00% +0.10% / -0.06% +0.39% +0.35%] index_select wrap : Elapsed 0.049 ms (4.908 ms / 100) 4.901 -> 4.912 ( +0.22%) [ +0.29% +0.12% +0.00% / +0.22% +0.65% +0.39%] index_select linear : Elapsed 0.049 ms (4.915 ms / 100) 4.894 -> 4.899 ( +0.10%) [ +0.10% +0.00% +0.20% / +0.10% +0.53% +0.43%] index_select reverse : Elapsed 0.049 ms (4.899 ms / 100) 4.861 -> 4.856 ( -0.10%) [ +0.12% +0.19% +0.00% / -0.10% +0.41% +0.49%] index_select skip64 : Elapsed 0.049 ms (4.867 ms / 100) 4.844 -> 4.844 ( +0.00%) [ +0.08% +0.00% +0.10% / +0.00% +0.43% +0.37%] index_select skip256 : Elapsed 0.048 ms (4.848 ms / 100) 4.894 -> 4.896 ( +0.04%) [ +0.10% +0.00% +0.12% / +0.04% +0.43% +0.43%] index_select spread : Elapsed 0.049 ms (4.899 ms / 100) 4.906 -> 4.907 ( +0.02%) [ +0.00% +0.06% +0.04% / +0.02% +0.29% +0.33%] index_select strided 3 : Elapsed 0.049 ms (4.906 ms / 100) 4.877 -> 4.884 ( +0.14%) [ +0.12% +0.00% +0.23% / +0.14% +0.43% +0.27%] index_select random : Elapsed 0.049 ms (4.883 ms / 100) 4.850 -> 4.856 ( +0.12%) [ +0.04% +0.00% +0.27% / +0.12% +0.47% +0.52%] index_select random_sorted : Elapsed 0.049 ms (4.852 ms / 100) B = [5, 40, 16, 20] (stride (12800, 1, 40, 640)) A = [5, 40, 4, 20] (stride (1, 5, 200, 800)) dim = 2 2.558 -> 2.558 ( +0.00%) [ +0.00% +0.04% +0.47% / +0.00% +0.20% +0.20%] index_add_ linear : Elapsed 0.026 ms (2.558 ms / 100) 2.510 -> 2.511 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.12% +0.04% +0.32%] index_copy_ linear : Elapsed 0.025 ms (2.510 ms / 100) 2.560 -> 2.559 ( -0.04%) [ +0.00% +0.08% +0.08% / +0.00% -0.04% -0.04%] index_add_ reverse : Elapsed 0.026 ms (2.560 ms / 100) 2.508 -> 2.510 ( +0.08%) [ +0.00% +0.12% +0.16% / +0.24% +0.28% +0.08%] index_copy_ reverse : Elapsed 0.025 ms (2.508 ms / 100) 2.547 -> 2.544 ( -0.12%) [ +0.00% +0.12% +0.20% / -0.12% -0.08% +0.08%] index_add_ spread : Elapsed 0.025 ms (2.547 ms / 100) 2.495 -> 2.492 ( -0.12%) [ +0.00% +0.08% +0.44% / +0.04% -0.12% +0.00%] index_copy_ spread : Elapsed 0.025 ms (2.495 ms / 100) 2.553 -> 2.551 ( -0.08%) [ +0.12% +0.08% +0.00% / -0.08% +0.24% +0.20%] index_add_ strided 3 : Elapsed 0.026 ms (2.556 ms / 100) 2.499 -> 2.501 ( +0.08%) [ +0.00% +0.24% +0.48% / +0.08% +0.24% +0.16%] index_copy_ strided 3 : Elapsed 0.025 ms (2.499 ms / 100) 2.545 -> 2.547 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.31% +0.24%] index_add_ strided 5 : Elapsed 0.025 ms (2.545 ms / 100) 2.499 -> 2.494 ( -0.20%) [ +0.00% +0.08% +0.00% / -0.20% +0.12% +0.00%] index_copy_ strided 5 : Elapsed 0.025 ms (2.499 ms / 100) 2.542 -> 2.540 ( -0.08%) [ +0.00% +0.08% +0.47% / -0.08% +0.35% +0.39%] index_add_ strided 7 : Elapsed 0.025 ms (2.542 ms / 100) 2.493 -> 2.490 ( -0.12%) [ +0.04% +0.00% +0.36% / -0.12% +0.20% +0.32%] index_copy_ strided 7 : Elapsed 0.025 ms (2.494 ms / 100) 2.551 -> 2.551 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.08% +0.00% +0.12%] index_add_ perm : Elapsed 0.026 ms (2.552 ms / 100) 2.496 -> 2.500 ( +0.16%) [ +0.24% +0.00% +0.24% / +0.16% +0.36% +0.32%] index_copy_ perm : Elapsed 0.025 ms (2.502 ms / 100) 2.547 -> 2.546 ( -0.04%) [ +0.16% +0.04% +0.00% / +0.12% -0.04% +0.20%] index_add_ perm_sorted : Elapsed 0.026 ms (2.551 ms / 100) 2.496 -> 2.497 ( +0.04%) [ +0.08% +0.00% +0.44% / +0.04% +0.20% +0.36%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.498 ms / 100) 5.803 -> 5.798 ( -0.09%) [ +0.00% +0.09% +0.14% / -0.09% +0.34% +0.29%] index_select const : Elapsed 0.058 ms (5.803 ms / 100) 5.768 -> 5.766 ( -0.03%) [ +0.14% +0.00% +0.10% / -0.03% +0.31% +0.28%] index_select wrap : Elapsed 0.058 ms (5.776 ms / 100) 5.787 -> 5.788 ( +0.02%) [ +0.09% +0.00% +0.29% / +0.02% +0.21% +0.26%] index_select linear : Elapsed 0.058 ms (5.792 ms / 100) 5.803 -> 5.796 ( -0.12%) [ +0.03% +0.03% +0.00% / -0.12% +0.07% +0.38%] index_select reverse : Elapsed 0.058 ms (5.805 ms / 100) 5.793 -> 5.798 ( +0.09%) [ +0.02% +0.00% +0.05% / +0.09% +0.22% +0.24%] index_select skip64 : Elapsed 0.058 ms (5.794 ms / 100) 5.783 -> 5.791 ( +0.14%) [ +0.21% +0.00% +0.26% / +0.14% +0.42% +0.47%] index_select skip256 : Elapsed 0.058 ms (5.795 ms / 100) 5.773 -> 5.777 ( +0.07%) [ +0.05% +0.00% +0.16% / +0.07% +0.36% +0.29%] index_select spread : Elapsed 0.058 ms (5.776 ms / 100) 5.777 -> 5.777 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.31% +0.36%] index_select strided 3 : Elapsed 0.058 ms (5.777 ms / 100) 5.770 -> 5.779 ( +0.16%) [ +0.12% +0.00% +0.07% / +0.16% +0.42% +0.33%] index_select random : Elapsed 0.058 ms (5.777 ms / 100) 5.774 -> 5.769 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.35% +0.23%] index_select random_sorted : Elapsed 0.058 ms (5.774 ms / 100) B = [5, 40, 16, 20] (stride (1, 1600, 5, 80)) dim = 2 fill_cnt = 4 1.368 -> 1.370 ( +0.15%) [ +0.00% +0.44% +0.29% / +0.15% +0.51% +0.58%] index_fill_ const : Elapsed 0.014 ms (1.368 ms / 100) 1.388 -> 1.390 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.22% +0.22%] index_fill_ linear : Elapsed 0.014 ms (1.389 ms / 100) 1.402 -> 1.405 ( +0.21%) [ +0.00% +0.21% +0.29% / +0.29% +0.43% +0.21%] index_fill_ reverse : Elapsed 0.014 ms (1.402 ms / 100) 1.369 -> 1.371 ( +0.15%) [ +0.00% +0.07% +0.37% / +0.15% +0.22% +0.51%] index_fill_ skip64 : Elapsed 0.014 ms (1.369 ms / 100) 1.368 -> 1.374 ( +0.44%) [ +0.07% +0.00% +0.15% / +0.44% +0.51% +0.66%] index_fill_ skip256 : Elapsed 0.014 ms (1.369 ms / 100) 1.463 -> 1.465 ( +0.14%) [ +0.34% +0.21% +0.00% / +0.14% +0.89% +0.96%] index_fill_ spread : Elapsed 0.015 ms (1.468 ms / 100) 1.481 -> 1.487 ( +0.41%) [ +0.20% +0.20% +0.00% / +0.41% +0.68% +1.01%] index_fill_ strided 3 : Elapsed 0.015 ms (1.484 ms / 100) 1.430 -> 1.431 ( +0.07%) [ +0.00% +0.07% +0.77% / +0.07% +0.42% +0.42%] index_fill_ strided 5 : Elapsed 0.014 ms (1.430 ms / 100) 1.455 -> 1.456 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.48% +0.48%] index_fill_ strided 7 : Elapsed 0.015 ms (1.456 ms / 100) 1.394 -> 1.397 ( +0.22%) [ +0.50% +0.43% +0.00% / +0.29% +0.50% +0.22%] index_fill_ strided 8 : Elapsed 0.014 ms (1.401 ms / 100) 1.446 -> 1.447 ( +0.07%) [ +0.35% +0.00% +0.28% / +0.07% +0.69% +0.62%] index_fill_ random : Elapsed 0.015 ms (1.451 ms / 100) 1.456 -> 1.460 ( +0.27%) [ +0.34% +0.00% +0.48% / +0.27% +1.03% +0.96%] index_fill_ random_sorted : Elapsed 0.015 ms (1.461 ms / 100) 1.451 -> 1.452 ( +0.07%) [ +0.00% +0.07% +0.21% / +0.07% +1.10% +0.48%] index_fill_ perm : Elapsed 0.015 ms (1.451 ms / 100) 1.433 -> 1.433 ( +0.00%) [ +0.00% +0.21% +0.14% / +0.00% +0.84% +0.84%] index_fill_ perm_sorted : Elapsed 0.014 ms (1.433 ms / 100) B = [5, 40, 16, 20] (stride (1, 1600, 5, 80)) A = [5, 40, 4, 20] (stride (3200, 80, 20, 1)) dim = 2 2.395 -> 2.391 ( -0.17%) [ +0.00% +0.58% +0.25% / -0.17% +0.29% +0.25%] index_add_ linear : Elapsed 0.024 ms (2.395 ms / 100) 2.361 -> 2.359 ( -0.08%) [ +0.00% +0.08% +0.55% / -0.08% +0.47% +0.38%] index_copy_ linear : Elapsed 0.024 ms (2.361 ms / 100) 2.398 -> 2.397 ( -0.04%) [ +0.00% +0.04% +0.25% / -0.04% +0.29% +0.63%] index_add_ reverse : Elapsed 0.024 ms (2.398 ms / 100) 2.367 -> 2.373 ( +0.25%) [ +0.04% +0.00% +0.38% / +0.25% +0.42% +0.25%] index_copy_ reverse : Elapsed 0.024 ms (2.368 ms / 100) 2.421 -> 2.419 ( -0.08%) [ +0.17% +0.00% +0.08% / -0.08% +0.29% +0.45%] index_add_ spread : Elapsed 0.024 ms (2.425 ms / 100) 2.406 -> 2.415 ( +0.37%) [ +0.04% +0.21% +0.00% / +0.37% +0.67% +0.62%] index_copy_ spread : Elapsed 0.024 ms (2.407 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.00% +0.12% +0.37% / +0.12% +0.54% +0.78%] index_add_ strided 3 : Elapsed 0.024 ms (2.426 ms / 100) 2.438 -> 2.442 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.62% +0.62%] index_copy_ strided 3 : Elapsed 0.024 ms (2.438 ms / 100) 2.393 -> 2.391 ( -0.08%) [ +0.13% +0.00% +0.04% / -0.08% +0.25% +0.08%] index_add_ strided 5 : Elapsed 0.024 ms (2.396 ms / 100) 2.379 -> 2.379 ( +0.00%) [ +0.13% +0.25% +0.00% / +0.00% +0.38% +0.55%] index_copy_ strided 5 : Elapsed 0.024 ms (2.382 ms / 100) 2.397 -> 2.393 ( -0.17%) [ +0.00% +0.00% +0.21% / -0.17% +0.04% +0.08%] index_add_ strided 7 : Elapsed 0.024 ms (2.397 ms / 100) 2.392 -> 2.391 ( -0.04%) [ +0.00% +0.08% +0.38% / -0.04% +0.46% +0.33%] index_copy_ strided 7 : Elapsed 0.024 ms (2.392 ms / 100) 2.425 -> 2.425 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.16% +0.00%] index_add_ perm : Elapsed 0.024 ms (2.426 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.00% +0.17% +0.17% / +0.12% +0.41% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.419 ms / 100) 2.422 -> 2.418 ( -0.17%) [ +0.00% +0.08% +0.04% / -0.17% -0.12% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) 2.415 -> 2.417 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.25% +0.29%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) 4.914 -> 4.910 ( -0.08%) [ +0.02% +0.00% +0.14% / -0.08% +0.39% +0.35%] index_select const : Elapsed 0.049 ms (4.915 ms / 100) 4.968 -> 4.970 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.56% +0.36%] index_select wrap : Elapsed 0.050 ms (4.972 ms / 100) 4.962 -> 4.959 ( -0.06%) [ +0.10% +0.00% +0.08% / -0.06% +0.50% +0.50%] index_select linear : Elapsed 0.050 ms (4.967 ms / 100) 4.972 -> 4.981 ( +0.18%) [ +0.06% +0.00% +0.16% / +0.18% +0.38% +0.48%] index_select reverse : Elapsed 0.050 ms (4.975 ms / 100) 4.931 -> 4.929 ( -0.04%) [ +0.10% +0.10% +0.00% / -0.04% +0.24% +0.20%] index_select skip64 : Elapsed 0.049 ms (4.936 ms / 100) 4.924 -> 4.916 ( -0.16%) [ +0.00% +0.02% +0.02% / -0.16% +0.10% +0.18%] index_select skip256 : Elapsed 0.049 ms (4.924 ms / 100) 4.967 -> 4.964 ( -0.06%) [ +0.00% +0.10% +0.02% / -0.06% +0.36% +0.28%] index_select spread : Elapsed 0.050 ms (4.967 ms / 100) 4.969 -> 4.965 ( -0.08%) [ +0.02% +0.00% +0.16% / -0.08% +0.26% +0.22%] index_select strided 3 : Elapsed 0.050 ms (4.970 ms / 100) 4.975 -> 4.972 ( -0.06%) [ +0.20% +0.26% +0.00% / -0.06% +0.32% +0.34%] index_select random : Elapsed 0.050 ms (4.985 ms / 100) 4.947 -> 4.947 ( +0.00%) [ +0.06% +0.00% +0.12% / +0.00% +0.44% +0.42%] index_select random_sorted : Elapsed 0.049 ms (4.950 ms / 100) B = [5, 40, 16, 20] (stride (1, 1600, 5, 80)) A = [5, 40, 4, 20] (stride (1, 100, 4000, 5)) dim = 2 2.436 -> 2.438 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.08% +0.41% +0.33%] index_add_ linear : Elapsed 0.024 ms (2.437 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.12% +0.00% +0.08% / -0.04% +0.58% +0.41%] index_copy_ linear : Elapsed 0.024 ms (2.414 ms / 100) 2.455 -> 2.445 ( -0.41%) [ +0.00% +0.00% +0.08% / +0.04% +0.00% -0.41%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.419 -> 2.415 ( -0.17%) [ +0.00% +0.12% +0.21% / +0.08% +0.41% -0.17%] index_copy_ reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.450 -> 2.443 ( -0.29%) [ +0.12% +0.00% +0.33% / -0.04% -0.08% -0.29%] index_add_ spread : Elapsed 0.025 ms (2.453 ms / 100) 2.450 -> 2.447 ( -0.12%) [ +0.04% +0.00% +0.33% / -0.08% -0.12% -0.08%] index_copy_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.449 -> 2.447 ( -0.08%) [ +0.16% +0.08% +0.00% / -0.08% +0.04% +0.08%] index_add_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.472 -> 2.477 ( +0.20%) [ +0.12% +0.00% +0.12% / +0.20% +0.49% +0.32%] index_copy_ strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.414 -> 2.413 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.12% +0.29%] index_add_ strided 5 : Elapsed 0.024 ms (2.415 ms / 100) 2.415 -> 2.412 ( -0.12%) [ +0.08% +0.00% +0.12% / +0.00% +0.04% -0.12%] index_copy_ strided 5 : Elapsed 0.024 ms (2.417 ms / 100) 2.427 -> 2.427 ( +0.00%) [ +0.16% +0.00% +0.25% / +0.00% +0.00% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.431 ms / 100) 2.425 -> 2.432 ( +0.29%) [ +0.25% +0.00% +0.25% / +0.29% +0.45% +0.66%] index_copy_ strided 7 : Elapsed 0.024 ms (2.431 ms / 100) 2.431 -> 2.433 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.21% +0.41%] index_add_ perm : Elapsed 0.024 ms (2.431 ms / 100) 2.426 -> 2.425 ( -0.04%) [ +0.08% +0.00% +0.41% / -0.04% +0.37% +0.08%] index_copy_ perm : Elapsed 0.024 ms (2.428 ms / 100) 2.429 -> 2.430 ( +0.04%) [ +0.00% +0.21% +0.21% / +0.04% +0.12% +0.29%] index_add_ perm_sorted : Elapsed 0.024 ms (2.429 ms / 100) 2.430 -> 2.432 ( +0.08%) [ +0.29% +0.00% +0.21% / +0.16% +0.33% +0.08%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.437 ms / 100) 5.366 -> 5.361 ( -0.09%) [ +0.00% +0.02% +0.00% / -0.09% +0.26% +0.20%] index_select const : Elapsed 0.054 ms (5.366 ms / 100) 5.351 -> 5.360 ( +0.17%) [ +0.00% +0.11% +0.13% / +0.17% +0.19% +0.37%] index_select wrap : Elapsed 0.054 ms (5.351 ms / 100) 5.352 -> 5.352 ( +0.00%) [ +0.09% +0.06% +0.00% / +0.00% +0.09% +0.26%] index_select linear : Elapsed 0.054 ms (5.357 ms / 100) 5.375 -> 5.382 ( +0.13%) [ +0.06% +0.02% +0.00% / +0.13% +0.33% +0.24%] index_select reverse : Elapsed 0.054 ms (5.378 ms / 100) 5.356 -> 5.362 ( +0.11%) [ +0.19% +0.19% +0.00% / +0.11% +0.41% +0.26%] index_select skip64 : Elapsed 0.054 ms (5.366 ms / 100) 5.343 -> 5.348 ( +0.09%) [ +0.06% +0.13% +0.00% / +0.09% +0.43% +0.37%] index_select skip256 : Elapsed 0.053 ms (5.346 ms / 100) 5.350 -> 5.358 ( +0.15%) [ +0.02% +0.07% +0.00% / +0.15% +0.37% +0.43%] index_select spread : Elapsed 0.054 ms (5.351 ms / 100) 5.375 -> 5.364 ( -0.20%) [ +0.04% +0.00% +0.04% / -0.20% +0.13% +0.13%] index_select strided 3 : Elapsed 0.054 ms (5.377 ms / 100) 5.360 -> 5.365 ( +0.09%) [ +0.21% +0.00% +0.06% / +0.09% +0.30% +0.28%] index_select random : Elapsed 0.054 ms (5.371 ms / 100) 5.367 -> 5.378 ( +0.20%) [ +0.06% +0.06% +0.00% / +0.20% +0.35% +0.37%] index_select random_sorted : Elapsed 0.054 ms (5.370 ms / 100) B = [5, 40, 16, 20] (stride (640, 16, 1, 3200)) A = [5, 40, 4, 20] (stride (800, 1, 4000, 40)) dim = 2 2.429 -> 2.433 ( +0.16%) [ +0.00% +0.21% +0.08% / +0.16% +0.54% +0.45%] index_add_ linear : Elapsed 0.024 ms (2.429 ms / 100) 2.407 -> 2.406 ( -0.04%) [ +0.08% +0.00% +0.21% / -0.04% +0.29% +0.25%] index_copy_ linear : Elapsed 0.024 ms (2.409 ms / 100) 2.433 -> 2.431 ( -0.08%) [ +0.04% +0.16% +0.00% / -0.08% +0.45% +0.53%] index_add_ reverse : Elapsed 0.024 ms (2.434 ms / 100) 2.411 -> 2.412 ( +0.04%) [ +0.00% +0.12% +0.04% / +0.04% +0.41% +0.37%] index_copy_ reverse : Elapsed 0.024 ms (2.411 ms / 100) 2.472 -> 2.476 ( +0.16%) [ +0.24% +0.16% +0.00% / +0.16% +1.05% +0.85%] index_add_ spread : Elapsed 0.025 ms (2.478 ms / 100) 2.516 -> 2.518 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +1.11% +0.28%] index_copy_ spread : Elapsed 0.025 ms (2.519 ms / 100) 2.479 -> 2.480 ( +0.04%) [ +0.00% +0.16% +0.04% / +0.04% +0.65% +0.61%] index_add_ strided 3 : Elapsed 0.025 ms (2.479 ms / 100) 2.513 -> 2.514 ( +0.04%) [ +0.04% +0.00% +0.28% / +0.04% +0.44% +0.32%] index_copy_ strided 3 : Elapsed 0.025 ms (2.514 ms / 100) 2.480 -> 2.479 ( -0.04%) [ +0.12% +0.00% +0.36% / -0.04% +0.32% +0.48%] index_add_ strided 5 : Elapsed 0.025 ms (2.483 ms / 100) 2.511 -> 2.515 ( +0.16%) [ +0.00% +0.24% +0.20% / +0.16% +0.52% +0.44%] index_copy_ strided 5 : Elapsed 0.025 ms (2.511 ms / 100) 2.476 -> 2.479 ( +0.12%) [ +0.12% +0.00% +0.20% / +0.12% +0.48% +0.36%] index_add_ strided 7 : Elapsed 0.025 ms (2.479 ms / 100) 2.514 -> 2.518 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.68% +0.48%] index_copy_ strided 7 : Elapsed 0.025 ms (2.516 ms / 100) 2.474 -> 2.481 ( +0.28%) [ +0.44% +0.00% +0.28% / +0.36% +0.53% +0.28%] index_add_ perm : Elapsed 0.025 ms (2.485 ms / 100) 2.512 -> 2.509 ( -0.12%) [ +0.20% +0.00% +0.04% / -0.12% +0.28% +0.08%] index_copy_ perm : Elapsed 0.025 ms (2.517 ms / 100) 2.465 -> 2.470 ( +0.20%) [ +0.32% +0.12% +0.00% / +0.20% +0.32% +0.53%] index_add_ perm_sorted : Elapsed 0.025 ms (2.473 ms / 100) 2.506 -> 2.508 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.08% +0.48% +0.40%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.507 ms / 100) 5.329 -> 5.337 ( +0.15%) [ +0.04% +0.13% +0.00% / +0.15% +0.28% +0.26%] index_select const : Elapsed 0.053 ms (5.331 ms / 100) 5.336 -> 5.331 ( -0.09%) [ +0.11% +0.00% +0.13% / -0.09% +0.37% +0.26%] index_select wrap : Elapsed 0.053 ms (5.342 ms / 100) 5.320 -> 5.324 ( +0.08%) [ +0.00% +0.13% +0.04% / +0.08% +0.51% +0.81%] index_select linear : Elapsed 0.053 ms (5.320 ms / 100) 5.339 -> 5.344 ( +0.09%) [ +0.00% +0.02% +0.02% / +0.09% +0.58% +0.66%] index_select reverse : Elapsed 0.053 ms (5.339 ms / 100) 5.347 -> 5.350 ( +0.06%) [ +0.04% +0.00% +0.11% / +0.06% +0.49% +0.39%] index_select skip64 : Elapsed 0.053 ms (5.349 ms / 100) 5.338 -> 5.341 ( +0.06%) [ +0.19% +0.22% +0.00% / +0.06% +0.52% +0.60%] index_select skip256 : Elapsed 0.053 ms (5.348 ms / 100) 5.329 -> 5.327 ( -0.04%) [ +0.23% +0.00% +0.04% / -0.04% +0.47% +0.43%] index_select spread : Elapsed 0.053 ms (5.341 ms / 100) 5.332 -> 5.335 ( +0.06%) [ +0.13% +0.04% +0.00% / +0.06% +0.39% +0.32%] index_select strided 3 : Elapsed 0.053 ms (5.339 ms / 100) 5.345 -> 5.347 ( +0.04%) [ +0.07% +0.09% +0.00% / +0.04% +0.24% +0.28%] index_select random : Elapsed 0.053 ms (5.349 ms / 100) 5.306 -> 5.309 ( +0.06%) [ +0.06% +0.00% +0.02% / +0.06% +0.26% +0.49%] index_select random_sorted : Elapsed 0.053 ms (5.309 ms / 100) out_shape = [5, 40, 4, 16] in_shape = [5, 40, 4, 20] idx_dim = 3 B = [5, 40, 4, 16] (stride (2560, 64, 16, 1)) A = [5, 40, 4, 20] (stride (1, 5, 200, 800)) dim = 3 3.305 -> 3.307 ( +0.06%) [ +0.00% +0.03% +0.09% / +0.06% +0.94% +0.88%] index_select const : Elapsed 0.033 ms (3.305 ms / 100) 3.309 -> 3.310 ( +0.03%) [ +0.06% +0.00% +0.09% / +0.03% +0.82% +0.79%] index_select wrap : Elapsed 0.033 ms (3.311 ms / 100) 3.309 -> 3.314 ( +0.15%) [ +0.09% +0.00% +0.00% / +0.15% +0.73% +0.91%] index_select linear : Elapsed 0.033 ms (3.312 ms / 100) 3.301 -> 3.303 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.82% +0.76%] index_select reverse : Elapsed 0.033 ms (3.302 ms / 100) 3.306 -> 3.313 ( +0.21%) [ +0.21% +0.00% +0.24% / +0.21% +1.00% +0.94%] index_select skip64 : Elapsed 0.033 ms (3.313 ms / 100) 3.301 -> 3.305 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.79% +0.76%] index_select skip256 : Elapsed 0.033 ms (3.305 ms / 100) 3.306 -> 3.309 ( +0.09%) [ +0.03% +0.06% +0.00% / +0.09% +0.79% +0.73%] index_select spread : Elapsed 0.033 ms (3.307 ms / 100) 3.312 -> 3.310 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.63% +0.69%] index_select strided 3 : Elapsed 0.033 ms (3.312 ms / 100) 3.299 -> 3.299 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.64% +0.67%] index_select strided 5 : Elapsed 0.033 ms (3.300 ms / 100) 3.301 -> 3.303 ( +0.06%) [ +0.09% +0.00% +0.09% / +0.06% +0.73% +0.76%] index_select strided 7 : Elapsed 0.033 ms (3.304 ms / 100) 3.310 -> 3.310 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.73% +0.73%] index_select strided 8 : Elapsed 0.033 ms (3.310 ms / 100) 3.300 -> 3.308 ( +0.24%) [ +0.06% +0.00% +0.09% / +0.24% +0.82% +0.88%] index_select strided 16 : Elapsed 0.033 ms (3.302 ms / 100) 3.314 -> 3.314 ( +0.00%) [ +0.12% +0.09% +0.00% / +0.00% +0.45% +0.48%] index_select random : Elapsed 0.033 ms (3.318 ms / 100) 3.305 -> 3.308 ( +0.09%) [ +0.00% +0.06% +0.12% / +0.09% +0.64% +0.54%] index_select random_sorted : Elapsed 0.033 ms (3.305 ms / 100) 3.302 -> 3.305 ( +0.09%) [ +0.03% +0.03% +0.00% / +0.09% +0.51% +0.61%] index_select perm : Elapsed 0.033 ms (3.303 ms / 100) 3.311 -> 3.313 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.06% +0.69% +0.66%] index_select perm_sorted : Elapsed 0.033 ms (3.314 ms / 100) B = [5, 40, 4, 16] (stride (1, 320, 80, 5)) A = [5, 40, 4, 20] (stride (3200, 20, 800, 1)) dim = 3 3.803 -> 3.803 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.37% +0.50%] index_select const : Elapsed 0.038 ms (3.805 ms / 100) 3.805 -> 3.808 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.58% +0.60%] index_select wrap : Elapsed 0.038 ms (3.805 ms / 100) 3.813 -> 3.813 ( +0.00%) [ +0.00% +0.08% +0.03% / +0.00% +0.45% +0.39%] index_select linear : Elapsed 0.038 ms (3.813 ms / 100) 3.812 -> 3.810 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.55% +0.63%] index_select reverse : Elapsed 0.038 ms (3.812 ms / 100) 3.810 -> 3.809 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.45% +0.47%] index_select skip64 : Elapsed 0.038 ms (3.810 ms / 100) 3.812 -> 3.813 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.50% +0.45%] index_select skip256 : Elapsed 0.038 ms (3.815 ms / 100) 3.806 -> 3.806 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.47% +0.47%] index_select spread : Elapsed 0.038 ms (3.806 ms / 100) 3.808 -> 3.812 ( +0.11%) [ +0.16% +0.00% +0.11% / +0.11% +0.55% +0.71%] index_select strided 3 : Elapsed 0.038 ms (3.814 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.13% +0.00% +0.00% / +0.03% +0.47% +0.47%] index_select strided 5 : Elapsed 0.038 ms (3.818 ms / 100) 3.798 -> 3.805 ( +0.18%) [ +0.13% +0.16% +0.00% / +0.18% +0.79% +0.71%] index_select strided 7 : Elapsed 0.038 ms (3.803 ms / 100) 3.806 -> 3.812 ( +0.16%) [ +0.21% +0.00% +0.11% / +0.16% +0.66% +0.66%] index_select strided 8 : Elapsed 0.038 ms (3.814 ms / 100) 3.811 -> 3.816 ( +0.13%) [ +0.10% +0.00% +0.05% / +0.13% +0.55% +0.66%] index_select strided 16 : Elapsed 0.038 ms (3.815 ms / 100) 3.805 -> 3.803 ( -0.05%) [ +0.00% +0.03% +0.08% / -0.05% +0.60% +0.68%] index_select random : Elapsed 0.038 ms (3.805 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.52% +0.55%] index_select random_sorted : Elapsed 0.038 ms (3.814 ms / 100) 3.802 -> 3.805 ( +0.08%) [ +0.08% +0.03% +0.00% / +0.08% +0.63% +0.68%] index_select perm : Elapsed 0.038 ms (3.805 ms / 100) 3.807 -> 3.805 ( -0.05%) [ +0.00% +0.13% +0.00% / -0.05% +0.71% +0.63%] index_select perm_sorted : Elapsed 0.038 ms (3.807 ms / 100) B = [5, 40, 4, 16] (stride (4, 320, 1, 20)) A = [5, 40, 4, 20] (stride (80, 400, 20, 1)) dim = 3 4.136 -> 4.138 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.58% +0.65%] index_select const : Elapsed 0.041 ms (4.138 ms / 100) 4.169 -> 4.172 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.84% +0.84%] index_select wrap : Elapsed 0.042 ms (4.172 ms / 100) 4.166 -> 4.170 ( +0.10%) [ +0.14% +0.00% +0.17% / +0.10% +0.84% +0.77%] index_select linear : Elapsed 0.042 ms (4.172 ms / 100) 4.146 -> 4.148 ( +0.05%) [ +0.14% +0.12% +0.00% / +0.05% +0.58% +0.68%] index_select reverse : Elapsed 0.042 ms (4.152 ms / 100) 4.143 -> 4.146 ( +0.07%) [ +0.00% +0.19% +0.00% / +0.07% +0.58% +0.53%] index_select skip64 : Elapsed 0.041 ms (4.143 ms / 100) 4.133 -> 4.137 ( +0.10%) [ +0.05% +0.00% +0.12% / +0.10% +0.85% +0.80%] index_select skip256 : Elapsed 0.041 ms (4.135 ms / 100) 4.171 -> 4.168 ( -0.07%) [ +0.05% +0.00% +0.02% / -0.07% +0.74% +0.70%] index_select spread : Elapsed 0.042 ms (4.173 ms / 100) 4.156 -> 4.160 ( +0.10%) [ +0.14% +0.07% +0.00% / +0.10% +0.72% +0.72%] index_select strided 3 : Elapsed 0.042 ms (4.162 ms / 100) 4.149 -> 4.146 ( -0.07%) [ +0.10% +0.00% +0.00% / -0.07% +0.70% +0.75%] index_select strided 5 : Elapsed 0.042 ms (4.153 ms / 100) 4.149 -> 4.154 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.70% +0.89%] index_select strided 7 : Elapsed 0.042 ms (4.154 ms / 100) 4.156 -> 4.162 ( +0.14%) [ +0.00% +0.02% +0.12% / +0.14% +0.75% +0.63%] index_select strided 8 : Elapsed 0.042 ms (4.156 ms / 100) 4.153 -> 4.154 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.77% +0.79%] index_select strided 16 : Elapsed 0.042 ms (4.153 ms / 100) 4.161 -> 4.164 ( +0.07%) [ +0.14% +0.00% +0.12% / +0.07% +0.60% +0.60%] index_select random : Elapsed 0.042 ms (4.167 ms / 100) 4.181 -> 4.182 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.53% +0.57%] index_select random_sorted : Elapsed 0.042 ms (4.181 ms / 100) 4.149 -> 4.148 ( -0.02%) [ +0.05% +0.00% +0.00% / -0.02% +0.65% +0.60%] index_select perm : Elapsed 0.042 ms (4.151 ms / 100) 4.152 -> 4.151 ( -0.02%) [ +0.00% +0.02% +0.05% / -0.02% +0.53% +0.58%] index_select perm_sorted : Elapsed 0.042 ms (4.152 ms / 100) B = [5, 40, 4, 16] (stride (1, 320, 5, 20)) A = [5, 40, 4, 20] (stride (3200, 80, 20, 1)) dim = 3 3.487 -> 3.489 ( +0.06%) [ +0.11% +0.03% +0.00% / +0.06% +0.34% +0.34%] index_select const : Elapsed 0.035 ms (3.491 ms / 100) 3.480 -> 3.483 ( +0.09%) [ +0.06% +0.06% +0.00% / +0.09% +0.55% +0.55%] index_select wrap : Elapsed 0.035 ms (3.482 ms / 100) 3.493 -> 3.495 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.46% +0.46%] index_select linear : Elapsed 0.035 ms (3.495 ms / 100) 3.398 -> 3.399 ( +0.03%) [ +0.00% +0.00% +0.18% / +0.03% +0.47% +0.47%] index_select reverse : Elapsed 0.034 ms (3.398 ms / 100) 3.389 -> 3.390 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.41% +0.47%] index_select skip64 : Elapsed 0.034 ms (3.390 ms / 100) 3.495 -> 3.497 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.43% +0.43%] index_select skip256 : Elapsed 0.035 ms (3.496 ms / 100) 3.481 -> 3.482 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.49% +0.52%] index_select spread : Elapsed 0.035 ms (3.482 ms / 100) 3.382 -> 3.384 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.62% +0.62%] index_select strided 3 : Elapsed 0.034 ms (3.384 ms / 100) 3.402 -> 3.403 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.38% +0.38%] index_select strided 5 : Elapsed 0.034 ms (3.403 ms / 100) 3.478 -> 3.479 ( +0.03%) [ +0.00% +0.03% +0.09% / +0.03% +0.75% +0.72%] index_select strided 7 : Elapsed 0.035 ms (3.478 ms / 100) 3.491 -> 3.492 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.72% +0.66%] index_select strided 8 : Elapsed 0.035 ms (3.492 ms / 100) 3.398 -> 3.402 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.53% +0.53%] index_select strided 16 : Elapsed 0.034 ms (3.402 ms / 100) 3.380 -> 3.381 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.71% +0.80%] index_select random : Elapsed 0.034 ms (3.382 ms / 100) 3.491 -> 3.491 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.66% +0.63%] index_select random_sorted : Elapsed 0.035 ms (3.492 ms / 100) 3.479 -> 3.479 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.66% +0.72%] index_select perm : Elapsed 0.035 ms (3.479 ms / 100) 3.382 -> 3.381 ( -0.03%) [ +0.00% +0.00% +0.09% / -0.03% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.034 ms (3.382 ms / 100) B = [5, 40, 4, 16] (stride (640, 1, 3200, 40)) A = [5, 40, 4, 20] (stride (1, 100, 4000, 5)) dim = 3 4.272 -> 4.272 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.68% +0.66%] index_select const : Elapsed 0.043 ms (4.273 ms / 100) 4.283 -> 4.282 ( -0.02%) [ +0.14% +0.00% +0.02% / -0.02% +0.77% +0.72%] index_select wrap : Elapsed 0.043 ms (4.289 ms / 100) 4.278 -> 4.281 ( +0.07%) [ +0.09% +0.12% +0.00% / +0.07% +0.75% +0.63%] index_select linear : Elapsed 0.043 ms (4.282 ms / 100) 4.267 -> 4.267 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.87% +0.73%] index_select reverse : Elapsed 0.043 ms (4.270 ms / 100) 4.267 -> 4.272 ( +0.12%) [ +0.14% +0.00% +0.14% / +0.12% +0.94% +0.73%] index_select skip64 : Elapsed 0.043 ms (4.273 ms / 100) 4.291 -> 4.292 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.63% +0.63%] index_select skip256 : Elapsed 0.043 ms (4.293 ms / 100) 4.288 -> 4.290 ( +0.05%) [ +0.19% +0.00% +0.21% / +0.05% +0.65% +0.79%] index_select spread : Elapsed 0.043 ms (4.296 ms / 100) 4.271 -> 4.271 ( +0.00%) [ +0.00% +0.02% +0.05% / +0.00% +0.77% +0.66%] index_select strided 3 : Elapsed 0.043 ms (4.271 ms / 100) 4.278 -> 4.281 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.65% +0.89%] index_select strided 5 : Elapsed 0.043 ms (4.278 ms / 100) 4.260 -> 4.258 ( -0.05%) [ +0.00% +0.00% +0.02% / -0.05% +0.59% +0.54%] index_select strided 7 : Elapsed 0.043 ms (4.260 ms / 100) 4.279 -> 4.281 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.86% +0.84%] index_select strided 8 : Elapsed 0.043 ms (4.280 ms / 100) 4.297 -> 4.293 ( -0.09%) [ +0.00% +0.12% +0.05% / -0.09% +0.47% +0.49%] index_select strided 16 : Elapsed 0.043 ms (4.297 ms / 100) 4.261 -> 4.264 ( +0.07%) [ +0.16% +0.00% +0.14% / +0.07% +0.59% +0.56%] index_select random : Elapsed 0.043 ms (4.268 ms / 100) 4.278 -> 4.280 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.56% +0.51%] index_select random_sorted : Elapsed 0.043 ms (4.281 ms / 100) 4.280 -> 4.282 ( +0.05%) [ +0.12% +0.00% +0.07% / +0.05% +0.56% +0.44%] index_select perm : Elapsed 0.043 ms (4.285 ms / 100) 4.281 -> 4.286 ( +0.12%) [ +0.12% +0.00% +0.02% / +0.12% +0.79% +0.58%] index_select perm_sorted : Elapsed 0.043 ms (4.286 ms / 100) B = [5, 40, 4, 16] (stride (1, 5, 3200, 200)) A = [5, 40, 4, 20] (stride (1, 100, 4000, 5)) dim = 3 4.285 -> 4.290 ( +0.12%) [ +0.09% +0.00% +0.12% / +0.12% +0.44% +0.42%] index_select const : Elapsed 0.043 ms (4.289 ms / 100) 4.289 -> 4.288 ( -0.02%) [ +0.00% +0.02% +0.05% / -0.02% +0.58% +0.68%] index_select wrap : Elapsed 0.043 ms (4.289 ms / 100) 4.287 -> 4.284 ( -0.07%) [ +0.00% +0.05% +0.09% / -0.07% +0.63% +0.68%] index_select linear : Elapsed 0.043 ms (4.287 ms / 100) 4.288 -> 4.289 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.02% +0.51% +0.75%] index_select reverse : Elapsed 0.043 ms (4.291 ms / 100) 4.282 -> 4.283 ( +0.02%) [ +0.09% +0.00% +0.05% / +0.02% +0.58% +0.65%] index_select skip64 : Elapsed 0.043 ms (4.286 ms / 100) 4.281 -> 4.284 ( +0.07%) [ +0.00% +0.02% +0.16% / +0.07% +0.63% +0.63%] index_select skip256 : Elapsed 0.043 ms (4.281 ms / 100) 4.286 -> 4.288 ( +0.05%) [ +0.00% +0.02% +0.00% / +0.05% +0.61% +0.58%] index_select spread : Elapsed 0.043 ms (4.286 ms / 100) 4.274 -> 4.276 ( +0.05%) [ +0.00% +0.05% +0.07% / +0.05% +0.42% +0.47%] index_select strided 3 : Elapsed 0.043 ms (4.274 ms / 100) 4.281 -> 4.274 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.42% +0.61%] index_select strided 5 : Elapsed 0.043 ms (4.281 ms / 100) 4.274 -> 4.277 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.73% +0.73%] index_select strided 7 : Elapsed 0.043 ms (4.280 ms / 100) 4.269 -> 4.277 ( +0.19%) [ +0.00% +0.05% +0.09% / +0.19% +0.56% +0.56%] index_select strided 8 : Elapsed 0.043 ms (4.269 ms / 100) 4.282 -> 4.284 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.75% +0.75%] index_select strided 16 : Elapsed 0.043 ms (4.285 ms / 100) 4.272 -> 4.273 ( +0.02%) [ +0.12% +0.05% +0.00% / +0.02% +0.68% +0.73%] index_select random : Elapsed 0.043 ms (4.277 ms / 100) 4.286 -> 4.286 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.00% +0.70% +0.68%] index_select random_sorted : Elapsed 0.043 ms (4.287 ms / 100) 4.267 -> 4.272 ( +0.12%) [ +0.12% +0.00% +0.09% / +0.12% +0.70% +0.68%] index_select perm : Elapsed 0.043 ms (4.272 ms / 100) 4.288 -> 4.296 ( +0.19%) [ +0.12% +0.00% +0.12% / +0.19% +0.91% +0.79%] index_select perm_sorted : Elapsed 0.043 ms (4.293 ms / 100) out_shape = [16, 40, 20, 4] in_shape = [5, 40, 20, 4] idx_dim = 0 B = [16, 40, 20, 4] (stride (3200, 20, 1, 800)) A = [5, 40, 20, 4] (stride (20, 100, 1, 4000)) dim = 0 2.279 -> 2.276 ( -0.13%) [ +0.13% +0.00% +0.00% / -0.13% +0.13% +0.48%] index_add_ linear : Elapsed 0.023 ms (2.282 ms / 100) 2.217 -> 2.212 ( -0.23%) [ +0.00% +0.05% +0.09% / -0.23% +0.41% +0.32%] index_copy_ linear : Elapsed 0.022 ms (2.217 ms / 100) 2.273 -> 2.278 ( +0.22%) [ +0.13% +0.00% +0.26% / +0.22% +0.44% +0.40%] index_add_ reverse : Elapsed 0.023 ms (2.276 ms / 100) 2.211 -> 2.216 ( +0.23%) [ +0.23% +0.00% +0.23% / +0.23% +0.36% +0.32%] index_copy_ reverse : Elapsed 0.022 ms (2.216 ms / 100) 2.275 -> 2.274 ( -0.04%) [ +0.26% +0.04% +0.00% / -0.04% +0.22% +0.04%] index_add_ spread : Elapsed 0.023 ms (2.281 ms / 100) 2.208 -> 2.211 ( +0.14%) [ +0.00% +0.00% +0.18% / +0.14% +0.45% +0.18%] index_copy_ spread : Elapsed 0.022 ms (2.208 ms / 100) 2.273 -> 2.275 ( +0.09%) [ +0.00% +0.31% +0.04% / +0.09% +0.13% +0.31%] index_add_ strided 3 : Elapsed 0.023 ms (2.273 ms / 100) 2.210 -> 2.207 ( -0.14%) [ +0.00% +0.00% +0.18% / -0.14% +0.18% +0.18%] index_copy_ strided 3 : Elapsed 0.022 ms (2.210 ms / 100) 2.269 -> 2.270 ( +0.04%) [ +0.40% +0.00% +0.09% / +0.04% +0.75% +0.75%] index_add_ strided 5 : Elapsed 0.023 ms (2.278 ms / 100) 2.207 -> 2.203 ( -0.18%) [ +0.05% +0.00% +0.14% / -0.18% +0.91% +0.77%] index_copy_ strided 5 : Elapsed 0.022 ms (2.208 ms / 100) 2.273 -> 2.273 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.53% +0.31%] index_add_ strided 7 : Elapsed 0.023 ms (2.274 ms / 100) 2.209 -> 2.208 ( -0.05%) [ +0.00% +0.27% +0.23% / -0.05% +0.63% +0.68%] index_copy_ strided 7 : Elapsed 0.022 ms (2.209 ms / 100) 2.272 -> 2.269 ( -0.13%) [ +0.09% +0.13% +0.00% / -0.13% +0.22% +0.13%] index_add_ perm : Elapsed 0.023 ms (2.274 ms / 100) 2.211 -> 2.207 ( -0.18%) [ +0.00% +0.09% +0.09% / -0.09% +0.09% -0.18%] index_copy_ perm : Elapsed 0.022 ms (2.211 ms / 100) 2.271 -> 2.274 ( +0.13%) [ +0.04% +0.26% +0.00% / +0.22% +0.13% +0.48%] index_add_ perm_sorted : Elapsed 0.023 ms (2.272 ms / 100) 2.207 -> 2.212 ( +0.23%) [ +0.27% +0.23% +0.00% / +0.23% +0.27% +0.54%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.213 ms / 100) 4.576 -> 4.590 ( +0.31%) [ +0.00% +0.31% +0.26% / +0.31% +0.46% +0.59%] index_select const : Elapsed 0.046 ms (4.576 ms / 100) 4.640 -> 4.645 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.62% +0.41%] index_select wrap : Elapsed 0.046 ms (4.640 ms / 100) 4.651 -> 4.663 ( +0.26%) [ +0.04% +0.00% +0.34% / +0.26% +0.58% +0.62%] index_select linear : Elapsed 0.047 ms (4.653 ms / 100) 4.634 -> 4.627 ( -0.15%) [ +0.17% +0.02% +0.00% / -0.15% +0.69% +0.52%] index_select reverse : Elapsed 0.046 ms (4.642 ms / 100) 4.557 -> 4.558 ( +0.02%) [ +0.20% +0.00% +0.20% / +0.02% +0.57% +0.75%] index_select skip64 : Elapsed 0.046 ms (4.566 ms / 100) 4.570 -> 4.585 ( +0.33%) [ +0.09% +0.00% +0.39% / +0.33% +1.07% +1.25%] index_select skip256 : Elapsed 0.046 ms (4.574 ms / 100) 4.639 -> 4.643 ( +0.09%) [ +0.13% +0.04% +0.00% / +0.09% +0.84% +0.84%] index_select spread : Elapsed 0.046 ms (4.645 ms / 100) 4.660 -> 4.653 ( -0.15%) [ +0.04% +0.00% +0.04% / -0.15% +0.52% +0.49%] index_select strided 3 : Elapsed 0.047 ms (4.662 ms / 100) 4.636 -> 4.643 ( +0.15%) [ +0.00% +0.04% +0.17% / +0.15% +1.01% +0.67%] index_select random : Elapsed 0.046 ms (4.636 ms / 100) 4.645 -> 4.637 ( -0.17%) [ +0.00% +0.15% +0.00% / -0.17% +0.67% +0.62%] index_select random_sorted : Elapsed 0.046 ms (4.645 ms / 100) B = [16, 40, 20, 4] (stride (4, 1280, 64, 1)) dim = 0 fill_cnt = 5 0.534 -> 0.534 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.56%] index_fill_ const : Elapsed 0.005 ms (0.534 ms / 100) 0.534 -> 0.534 ( +0.00%) [ +0.00% +0.19% +0.19% / +0.00% +0.37% +0.56%] index_fill_ linear : Elapsed 0.005 ms (0.534 ms / 100) 0.534 -> 0.534 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.56% +0.75%] index_fill_ reverse : Elapsed 0.005 ms (0.534 ms / 100) 0.533 -> 0.533 ( +0.00%) [ +0.00% +0.00% +0.19% / +0.00% +0.75% +1.88%] index_fill_ skip64 : Elapsed 0.005 ms (0.533 ms / 100) 0.533 -> 0.534 ( +0.19%) [ +0.00% +0.19% +0.00% / +0.19% +0.94% +0.94%] index_fill_ skip256 : Elapsed 0.005 ms (0.533 ms / 100) 0.533 -> 0.533 ( +0.00%) [ +0.00% +0.19% +0.00% / +0.00% +0.75% +0.75%] index_fill_ spread : Elapsed 0.005 ms (0.533 ms / 100) 0.533 -> 0.533 ( +0.00%) [ +0.00% +0.19% +0.19% / +0.00% +0.75% +0.75%] index_fill_ strided 3 : Elapsed 0.005 ms (0.533 ms / 100) 0.533 -> 0.535 ( +0.38%) [ +0.00% +0.00% +0.19% / +0.38% +0.75% +0.75%] index_fill_ strided 5 : Elapsed 0.005 ms (0.533 ms / 100) 0.533 -> 0.533 ( +0.00%) [ +0.00% +0.00% +0.38% / +0.00% +0.75% +1.13%] index_fill_ strided 7 : Elapsed 0.005 ms (0.533 ms / 100) 0.533 -> 0.533 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.94%] index_fill_ strided 8 : Elapsed 0.005 ms (0.533 ms / 100) 0.533 -> 0.534 ( +0.19%) [ +0.00% +0.00% +0.38% / +0.19% +1.13% +1.13%] index_fill_ random : Elapsed 0.005 ms (0.533 ms / 100) 0.534 -> 0.533 ( -0.19%) [ +2.43% +0.00% +0.37% / -0.19% +0.94% +0.94%] index_fill_ random_sorted : Elapsed 0.005 ms (0.547 ms / 100) 0.534 -> 0.533 ( -0.19%) [ +0.19% +0.19% +0.00% / -0.19% +0.56% +0.56%] index_fill_ perm : Elapsed 0.005 ms (0.535 ms / 100) 0.533 -> 0.534 ( +0.19%) [ +0.19% +0.19% +0.00% / +0.19% +0.75% +0.94%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.534 ms / 100) B = [16, 40, 20, 4] (stride (20, 1280, 1, 320)) A = [5, 40, 20, 4] (stride (800, 1, 40, 4000)) dim = 0 2.558 -> 2.557 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.27% +0.23%] index_add_ linear : Elapsed 0.026 ms (2.559 ms / 100) 2.472 -> 2.471 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.28% +0.53%] index_copy_ linear : Elapsed 0.025 ms (2.473 ms / 100) 2.559 -> 2.556 ( -0.12%) [ +0.04% +0.04% +0.00% / -0.12% +0.39% +0.39%] index_add_ reverse : Elapsed 0.026 ms (2.560 ms / 100) 2.471 -> 2.474 ( +0.12%) [ +0.20% +0.04% +0.00% / +0.12% +0.40% +0.45%] index_copy_ reverse : Elapsed 0.025 ms (2.476 ms / 100) 2.545 -> 2.545 ( +0.00%) [ +0.24% +0.31% +0.00% / +0.00% +0.31% +0.12%] index_add_ spread : Elapsed 0.026 ms (2.551 ms / 100) 2.468 -> 2.472 ( +0.16%) [ +0.00% +0.36% +0.04% / +0.16% +0.45% +0.41%] index_copy_ spread : Elapsed 0.025 ms (2.468 ms / 100) 2.544 -> 2.546 ( +0.08%) [ +0.16% +0.20% +0.00% / +0.08% +0.24% +0.35%] index_add_ strided 3 : Elapsed 0.025 ms (2.548 ms / 100) 2.468 -> 2.469 ( +0.04%) [ +0.00% +0.16% +0.08% / +0.04% +0.32% +0.57%] index_copy_ strided 3 : Elapsed 0.025 ms (2.468 ms / 100) 2.555 -> 2.558 ( +0.12%) [ +0.12% +0.00% +0.04% / +0.12% +0.39% +0.55%] index_add_ strided 5 : Elapsed 0.026 ms (2.558 ms / 100) 2.477 -> 2.479 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.36% +0.57%] index_copy_ strided 5 : Elapsed 0.025 ms (2.478 ms / 100) 2.556 -> 2.556 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.16% +0.23%] index_add_ strided 7 : Elapsed 0.026 ms (2.557 ms / 100) 2.480 -> 2.483 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.12% +0.36%] index_copy_ strided 7 : Elapsed 0.025 ms (2.482 ms / 100) 2.555 -> 2.554 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.31% +0.47%] index_add_ perm : Elapsed 0.026 ms (2.557 ms / 100) 2.482 -> 2.485 ( +0.12%) [ +0.00% +0.28% +0.08% / +0.12% +0.44% +0.32%] index_copy_ perm : Elapsed 0.025 ms (2.482 ms / 100) 2.554 -> 2.555 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.35% +0.39%] index_add_ perm_sorted : Elapsed 0.026 ms (2.555 ms / 100) 2.477 -> 2.478 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.52% +0.69%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.477 ms / 100) 5.595 -> 5.593 ( -0.04%) [ +0.07% +0.04% +0.00% / -0.04% +0.61% +0.54%] index_select const : Elapsed 0.056 ms (5.599 ms / 100) 5.610 -> 5.614 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.55% +0.50%] index_select wrap : Elapsed 0.056 ms (5.610 ms / 100) 5.621 -> 5.625 ( +0.07%) [ +0.11% +0.00% +0.04% / +0.07% +0.48% +0.46%] index_select linear : Elapsed 0.056 ms (5.627 ms / 100) 5.605 -> 5.604 ( -0.02%) [ +0.05% +0.00% +0.02% / -0.02% +0.54% +0.54%] index_select reverse : Elapsed 0.056 ms (5.608 ms / 100) 5.591 -> 5.594 ( +0.05%) [ +0.09% +0.04% +0.00% / +0.05% +0.66% +0.64%] index_select skip64 : Elapsed 0.056 ms (5.596 ms / 100) 5.593 -> 5.591 ( -0.04%) [ +0.00% +0.05% +0.02% / -0.04% +0.52% +0.61%] index_select skip256 : Elapsed 0.056 ms (5.593 ms / 100) 5.597 -> 5.601 ( +0.07%) [ +0.05% +0.00% +0.02% / +0.07% +0.50% +0.54%] index_select spread : Elapsed 0.056 ms (5.600 ms / 100) 5.613 -> 5.616 ( +0.05%) [ +0.05% +0.07% +0.00% / +0.05% +0.59% +0.64%] index_select strided 3 : Elapsed 0.056 ms (5.616 ms / 100) 5.599 -> 5.602 ( +0.05%) [ +0.09% +0.09% +0.00% / +0.05% +0.64% +0.71%] index_select random : Elapsed 0.056 ms (5.604 ms / 100) 5.590 -> 5.594 ( +0.07%) [ +0.05% +0.14% +0.00% / +0.07% +0.75% +0.68%] index_select random_sorted : Elapsed 0.056 ms (5.593 ms / 100) B = [16, 40, 20, 4] (stride (160, 4, 2560, 1)) A = [5, 40, 20, 4] (stride (1, 20, 800, 5)) dim = 0 2.423 -> 2.426 ( +0.12%) [ +0.04% +0.00% +0.08% / +0.12% +0.50% +0.58%] index_add_ linear : Elapsed 0.024 ms (2.424 ms / 100) 2.351 -> 2.357 ( +0.26%) [ +0.21% +0.09% +0.00% / +0.26% +0.64% +0.51%] index_copy_ linear : Elapsed 0.024 ms (2.356 ms / 100) 2.421 -> 2.420 ( -0.04%) [ +0.08% +0.12% +0.00% / -0.04% +0.50% +0.66%] index_add_ reverse : Elapsed 0.024 ms (2.423 ms / 100) 2.349 -> 2.358 ( +0.38%) [ +0.21% +0.21% +0.00% / +0.38% +0.47% +0.38%] index_copy_ reverse : Elapsed 0.024 ms (2.354 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.17% +0.00% +0.08% / +0.12% +0.74% +0.62%] index_add_ spread : Elapsed 0.024 ms (2.423 ms / 100) 2.347 -> 2.358 ( +0.47%) [ +0.51% +0.30% +0.00% / +0.47% +0.72% +0.85%] index_copy_ spread : Elapsed 0.024 ms (2.359 ms / 100) 2.422 -> 2.421 ( -0.04%) [ +0.00% +0.17% +0.04% / -0.04% +0.45% +0.58%] index_add_ strided 3 : Elapsed 0.024 ms (2.422 ms / 100) 2.352 -> 2.354 ( +0.09%) [ +0.26% +0.00% +0.04% / +0.09% +0.47% +0.34%] index_copy_ strided 3 : Elapsed 0.024 ms (2.358 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.50% +0.70%] index_add_ strided 5 : Elapsed 0.024 ms (2.422 ms / 100) 2.352 -> 2.354 ( +0.09%) [ +0.17% +0.04% +0.00% / +0.09% +0.30% +0.64%] index_copy_ strided 5 : Elapsed 0.024 ms (2.356 ms / 100) 2.424 -> 2.425 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.87% +0.74%] index_add_ strided 7 : Elapsed 0.024 ms (2.426 ms / 100) 2.352 -> 2.363 ( +0.47%) [ +0.00% +0.09% +0.04% / +0.47% +0.68% +0.68%] index_copy_ strided 7 : Elapsed 0.024 ms (2.352 ms / 100) 2.421 -> 2.426 ( +0.21%) [ +0.00% +0.00% +0.04% / +0.21% +0.66% +0.54%] index_add_ perm : Elapsed 0.024 ms (2.421 ms / 100) 2.350 -> 2.354 ( +0.17%) [ +0.00% +0.04% +0.00% / +0.17% +0.60% +0.51%] index_copy_ perm : Elapsed 0.024 ms (2.350 ms / 100) 2.416 -> 2.420 ( +0.17%) [ +0.25% +0.00% +0.04% / +0.17% +0.66% +0.79%] index_add_ perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) 2.350 -> 2.358 ( +0.34%) [ +0.34% +0.04% +0.00% / +0.34% +0.38% +0.47%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.358 ms / 100) 5.142 -> 5.157 ( +0.29%) [ +0.43% +0.12% +0.00% / +0.29% +0.74% +0.99%] index_select const : Elapsed 0.052 ms (5.164 ms / 100) 5.160 -> 5.168 ( +0.16%) [ +0.08% +0.00% +0.06% / +0.16% +0.60% +0.58%] index_select wrap : Elapsed 0.052 ms (5.164 ms / 100) 5.166 -> 5.165 ( -0.02%) [ +0.02% +0.00% +0.29% / -0.02% +0.66% +0.45%] index_select linear : Elapsed 0.052 ms (5.167 ms / 100) 5.148 -> 5.143 ( -0.10%) [ +0.00% +0.17% +0.02% / -0.10% +0.60% +0.51%] index_select reverse : Elapsed 0.051 ms (5.148 ms / 100) 5.134 -> 5.152 ( +0.35%) [ +0.12% +0.29% +0.00% / +0.35% +0.74% +0.60%] index_select skip64 : Elapsed 0.051 ms (5.140 ms / 100) 5.145 -> 5.151 ( +0.12%) [ +0.12% +0.00% +0.29% / +0.12% +0.62% +0.70%] index_select skip256 : Elapsed 0.052 ms (5.151 ms / 100) 5.160 -> 5.173 ( +0.25%) [ +0.21% +0.35% +0.00% / +0.25% +1.09% +0.89%] index_select spread : Elapsed 0.052 ms (5.171 ms / 100) 5.166 -> 5.176 ( +0.19%) [ +0.19% +0.00% +0.19% / +0.19% +0.74% +0.60%] index_select strided 3 : Elapsed 0.052 ms (5.176 ms / 100) 5.156 -> 5.157 ( +0.02%) [ +0.06% +0.02% +0.00% / +0.02% +0.70% +0.66%] index_select random : Elapsed 0.052 ms (5.159 ms / 100) 5.196 -> 5.198 ( +0.04%) [ +0.00% +0.21% +0.08% / +0.04% +0.81% +0.54%] index_select random_sorted : Elapsed 0.052 ms (5.196 ms / 100) B = [16, 40, 20, 4] (stride (1, 64, 2560, 16)) A = [5, 40, 20, 4] (stride (3200, 4, 160, 1)) dim = 0 2.419 -> 2.425 ( +0.25%) [ +0.29% +0.00% +0.25% / +0.25% +0.25% +0.50%] index_add_ linear : Elapsed 0.024 ms (2.426 ms / 100) 2.380 -> 2.380 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.08% +0.13%] index_copy_ linear : Elapsed 0.024 ms (2.380 ms / 100) 2.422 -> 2.426 ( +0.17%) [ +0.33% +0.21% +0.00% / +0.33% +0.17% +0.29%] index_add_ reverse : Elapsed 0.024 ms (2.430 ms / 100) 2.380 -> 2.379 ( -0.04%) [ +0.25% +0.17% +0.00% / +0.13% +0.00% -0.04%] index_copy_ reverse : Elapsed 0.024 ms (2.386 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.16% +0.00% +0.00% / +0.04% +0.33% +0.49%] index_add_ spread : Elapsed 0.025 ms (2.458 ms / 100) 2.442 -> 2.443 ( +0.04%) [ +0.25% +0.12% +0.00% / +0.12% +0.04% +0.20%] index_copy_ spread : Elapsed 0.024 ms (2.448 ms / 100) 2.451 -> 2.457 ( +0.24%) [ +0.08% +0.20% +0.00% / +0.37% +0.37% +0.24%] index_add_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.447 -> 2.444 ( -0.12%) [ +0.00% +0.12% +0.00% / +0.29% +0.04% -0.12%] index_copy_ strided 3 : Elapsed 0.024 ms (2.447 ms / 100) 2.449 -> 2.455 ( +0.24%) [ +0.24% +0.12% +0.00% / +0.29% +0.24% +0.29%] index_add_ strided 5 : Elapsed 0.025 ms (2.455 ms / 100) 2.440 -> 2.441 ( +0.04%) [ +0.25% +0.25% +0.00% / +0.25% +0.04% +0.25%] index_copy_ strided 5 : Elapsed 0.024 ms (2.446 ms / 100) 2.449 -> 2.451 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.24% +0.37%] index_add_ strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.445 -> 2.444 ( -0.04%) [ +0.00% +0.20% +0.16% / +0.00% +0.00% -0.04%] index_copy_ strided 7 : Elapsed 0.024 ms (2.445 ms / 100) 2.455 -> 2.459 ( +0.16%) [ +0.41% +0.12% +0.00% / +0.16% +0.29% +0.16%] index_add_ perm : Elapsed 0.025 ms (2.465 ms / 100) 2.445 -> 2.439 ( -0.25%) [ +0.16% +0.29% +0.00% / +0.16% -0.25% -0.16%] index_copy_ perm : Elapsed 0.024 ms (2.449 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.33% +0.41% +0.00% / +0.12% +0.37% +0.37%] index_add_ perm_sorted : Elapsed 0.025 ms (2.456 ms / 100) 2.442 -> 2.441 ( -0.04%) [ +0.04% +0.16% +0.00% / +0.08% -0.04% +0.12%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.443 ms / 100) 5.071 -> 5.080 ( +0.18%) [ +0.00% +0.00% +0.04% / +0.18% +0.43% +0.55%] index_select const : Elapsed 0.051 ms (5.071 ms / 100) 5.119 -> 5.125 ( +0.12%) [ +0.06% +0.00% +0.10% / +0.12% +0.41% +0.57%] index_select wrap : Elapsed 0.051 ms (5.122 ms / 100) 5.121 -> 5.122 ( +0.02%) [ +0.00% +0.23% +0.02% / +0.02% +0.57% +0.37%] index_select linear : Elapsed 0.051 ms (5.121 ms / 100) 5.081 -> 5.093 ( +0.24%) [ +0.30% +0.10% +0.00% / +0.24% +0.83% +0.61%] index_select reverse : Elapsed 0.051 ms (5.096 ms / 100) 5.063 -> 5.060 ( -0.06%) [ +0.18% +0.20% +0.00% / +0.41% +0.73% -0.06%] index_select skip64 : Elapsed 0.051 ms (5.072 ms / 100) 5.054 -> 5.073 ( +0.38%) [ +0.51% +0.30% +0.00% / +0.38% +0.55% +0.75%] index_select skip256 : Elapsed 0.051 ms (5.080 ms / 100) 5.096 -> 5.097 ( +0.02%) [ +0.10% +0.00% +0.29% / +0.02% +0.43% +0.61%] index_select spread : Elapsed 0.051 ms (5.101 ms / 100) 5.108 -> 5.113 ( +0.10%) [ +0.00% +0.20% +0.06% / +0.10% +0.53% +0.51%] index_select strided 3 : Elapsed 0.051 ms (5.108 ms / 100) 5.111 -> 5.109 ( -0.04%) [ +0.12% +0.00% +0.02% / -0.04% +0.68% +0.80%] index_select random : Elapsed 0.051 ms (5.117 ms / 100) 5.108 -> 5.111 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.61% +0.45%] index_select random_sorted : Elapsed 0.051 ms (5.108 ms / 100) B = [16, 40, 20, 4] (stride (800, 20, 1, 12800)) A = [5, 40, 20, 4] (stride (3200, 80, 1, 20)) dim = 0 2.228 -> 2.235 ( +0.31%) [ +0.13% +0.27% +0.00% / +0.31% +1.35% +1.21%] index_add_ linear : Elapsed 0.022 ms (2.231 ms / 100) 2.170 -> 2.182 ( +0.55%) [ +0.28% +0.23% +0.00% / +0.55% +1.11% +1.06%] index_copy_ linear : Elapsed 0.022 ms (2.176 ms / 100) 2.227 -> 2.228 ( +0.04%) [ +0.31% +0.09% +0.00% / +0.04% +1.21% +1.12%] index_add_ reverse : Elapsed 0.022 ms (2.234 ms / 100) 2.168 -> 2.175 ( +0.32%) [ +0.00% +0.32% +0.46% / +0.32% +1.43% +1.20%] index_copy_ reverse : Elapsed 0.022 ms (2.168 ms / 100) 2.237 -> 2.234 ( -0.13%) [ +0.22% +0.00% +0.18% / -0.13% +0.76% +1.21%] index_add_ spread : Elapsed 0.022 ms (2.242 ms / 100) 2.177 -> 2.181 ( +0.18%) [ +0.00% +0.00% +0.32% / +0.18% +0.64% +0.83%] index_copy_ spread : Elapsed 0.022 ms (2.177 ms / 100) 2.234 -> 2.240 ( +0.27%) [ +0.09% +0.00% +0.27% / +0.27% +0.63% +0.94%] index_add_ strided 3 : Elapsed 0.022 ms (2.236 ms / 100) 2.179 -> 2.187 ( +0.37%) [ +0.00% +0.09% +0.28% / +0.37% +0.50% +0.50%] index_copy_ strided 3 : Elapsed 0.022 ms (2.179 ms / 100) 2.238 -> 2.236 ( -0.09%) [ +0.00% +0.00% +0.04% / -0.09% -0.04% +0.04%] index_add_ strided 5 : Elapsed 0.022 ms (2.238 ms / 100) 2.179 -> 2.179 ( +0.00%) [ +0.00% +0.18% +0.23% / +0.00% +0.23% +0.50%] index_copy_ strided 5 : Elapsed 0.022 ms (2.179 ms / 100) 2.236 -> 2.239 ( +0.13%) [ +0.04% +0.00% +0.13% / +0.13% +0.45% +0.45%] index_add_ strided 7 : Elapsed 0.022 ms (2.237 ms / 100) 2.180 -> 2.184 ( +0.18%) [ +0.05% +0.09% +0.00% / +0.23% +0.18% +0.32%] index_copy_ strided 7 : Elapsed 0.022 ms (2.181 ms / 100) 2.230 -> 2.234 ( +0.18%) [ +0.18% +0.00% +0.09% / +0.18% +0.72% +0.54%] index_add_ perm : Elapsed 0.022 ms (2.234 ms / 100) 2.176 -> 2.177 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.64% +0.64%] index_copy_ perm : Elapsed 0.022 ms (2.179 ms / 100) 2.236 -> 2.238 ( +0.09%) [ +0.04% +0.00% +0.09% / +0.09% +0.49% +0.36%] index_add_ perm_sorted : Elapsed 0.022 ms (2.237 ms / 100) 2.174 -> 2.180 ( +0.28%) [ +0.00% +0.09% +0.00% / +0.28% +0.60% +0.69%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.174 ms / 100) 4.492 -> 4.495 ( +0.07%) [ +0.11% +0.00% +0.07% / +0.07% +0.45% +0.69%] index_select const : Elapsed 0.045 ms (4.497 ms / 100) 4.541 -> 4.546 ( +0.11%) [ +0.02% +0.07% +0.00% / +0.11% +0.84% +0.77%] index_select wrap : Elapsed 0.045 ms (4.542 ms / 100) 4.550 -> 4.560 ( +0.22%) [ +0.18% +0.00% +0.09% / +0.22% +0.84% +0.53%] index_select linear : Elapsed 0.046 ms (4.558 ms / 100) 4.536 -> 4.535 ( -0.02%) [ +0.00% +0.24% +0.00% / -0.02% +0.90% +0.88%] index_select reverse : Elapsed 0.045 ms (4.536 ms / 100) 4.480 -> 4.486 ( +0.13%) [ +0.00% +0.04% +0.07% / +0.13% +0.80% +0.65%] index_select skip64 : Elapsed 0.045 ms (4.480 ms / 100) 4.490 -> 4.491 ( +0.02%) [ +0.07% +0.18% +0.00% / +0.02% +0.67% +0.73%] index_select skip256 : Elapsed 0.045 ms (4.493 ms / 100) 4.539 -> 4.541 ( +0.04%) [ +0.00% +0.20% +0.18% / +0.04% +0.77% +0.99%] index_select spread : Elapsed 0.045 ms (4.539 ms / 100) 4.557 -> 4.557 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.64% +0.53%] index_select strided 3 : Elapsed 0.046 ms (4.558 ms / 100) 4.535 -> 4.541 ( +0.13%) [ +0.18% +0.00% +0.20% / +0.13% +1.04% +1.08%] index_select random : Elapsed 0.045 ms (4.543 ms / 100) 4.533 -> 4.534 ( +0.02%) [ +0.00% +0.13% +0.18% / +0.02% +0.82% +0.77%] index_select random_sorted : Elapsed 0.045 ms (4.533 ms / 100) B = [16, 40, 20, 4] (stride (1, 16, 640, 12800)) A = [5, 40, 20, 4] (stride (80, 400, 1, 20)) dim = 0 2.396 -> 2.391 ( -0.21%) [ +0.13% +0.42% +0.00% / +0.29% -0.21% -0.17%] index_add_ linear : Elapsed 0.024 ms (2.399 ms / 100) 2.364 -> 2.348 ( -0.68%) [ +0.00% +0.13% +0.13% / +0.17% -0.51% -0.68%] index_copy_ linear : Elapsed 0.024 ms (2.364 ms / 100) 2.399 -> 2.389 ( -0.42%) [ +0.25% +0.17% +0.00% / +0.17% -0.42% -0.38%] index_add_ reverse : Elapsed 0.024 ms (2.405 ms / 100) 2.365 -> 2.352 ( -0.55%) [ +0.08% +0.21% +0.00% / -0.08% -0.55% -0.51%] index_copy_ reverse : Elapsed 0.024 ms (2.367 ms / 100) 2.432 -> 2.412 ( -0.82%) [ +0.29% +0.00% +0.25% / +0.16% -0.82% -0.33%] index_add_ spread : Elapsed 0.024 ms (2.439 ms / 100) 2.433 -> 2.409 ( -0.99%) [ +0.00% +0.00% +0.04% / +0.04% -0.99% -0.78%] index_copy_ spread : Elapsed 0.024 ms (2.433 ms / 100) 2.434 -> 2.416 ( -0.74%) [ +0.16% +0.04% +0.00% / +0.04% -0.74% -0.58%] index_add_ strided 3 : Elapsed 0.024 ms (2.438 ms / 100) 2.431 -> 2.414 ( -0.70%) [ +0.00% +0.04% +0.04% / -0.08% -0.70% -0.53%] index_copy_ strided 3 : Elapsed 0.024 ms (2.431 ms / 100) 2.439 -> 2.422 ( -0.70%) [ +0.08% +0.00% +0.04% / +0.21% -0.70% -0.66%] index_add_ strided 5 : Elapsed 0.024 ms (2.441 ms / 100) 2.431 -> 2.416 ( -0.62%) [ +0.00% +0.04% +0.12% / +0.41% -0.62% -0.41%] index_copy_ strided 5 : Elapsed 0.024 ms (2.431 ms / 100) 2.440 -> 2.420 ( -0.82%) [ +0.12% +0.08% +0.00% / -0.04% -0.82% -0.45%] index_add_ strided 7 : Elapsed 0.024 ms (2.443 ms / 100) 2.431 -> 2.412 ( -0.78%) [ +0.12% +0.12% +0.00% / +0.12% -0.78% -0.53%] index_copy_ strided 7 : Elapsed 0.024 ms (2.434 ms / 100) 2.435 -> 2.414 ( -0.86%) [ +0.33% +0.16% +0.00% / +0.16% -0.86% -0.78%] index_add_ perm : Elapsed 0.024 ms (2.443 ms / 100) 2.429 -> 2.404 ( -1.03%) [ +0.00% +0.04% +0.25% / +0.21% -0.99% -1.03%] index_copy_ perm : Elapsed 0.024 ms (2.429 ms / 100) 2.434 -> 2.414 ( -0.82%) [ +0.00% +0.37% +0.04% / -0.08% -0.37% -0.82%] index_add_ perm_sorted : Elapsed 0.024 ms (2.434 ms / 100) 2.429 -> 2.403 ( -1.07%) [ +0.00% +0.08% +0.08% / +0.00% -0.70% -1.07%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.429 ms / 100) 4.953 -> 4.962 ( +0.18%) [ +0.00% +0.14% +0.04% / +0.18% +0.44% +0.48%] index_select const : Elapsed 0.050 ms (4.953 ms / 100) 5.002 -> 4.991 ( -0.22%) [ +0.12% +0.00% +0.10% / +0.04% -0.20% -0.22%] index_select wrap : Elapsed 0.050 ms (5.008 ms / 100) 5.002 -> 5.009 ( +0.14%) [ +0.24% +0.00% +0.18% / +0.20% +0.14% +0.16%] index_select linear : Elapsed 0.050 ms (5.014 ms / 100) 5.011 -> 5.006 ( -0.10%) [ +0.08% +0.00% +0.10% / +0.18% -0.10% -0.10%] index_select reverse : Elapsed 0.050 ms (5.015 ms / 100) 4.963 -> 4.970 ( +0.14%) [ +0.22% +0.12% +0.00% / +0.14% +0.20% +0.34%] index_select skip64 : Elapsed 0.050 ms (4.974 ms / 100) 4.968 -> 4.972 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.20% +0.10%] index_select skip256 : Elapsed 0.050 ms (4.968 ms / 100) 4.996 -> 4.997 ( +0.02%) [ +0.00% +0.08% +0.02% / +0.12% +0.02% +0.10%] index_select spread : Elapsed 0.050 ms (4.996 ms / 100) 4.998 -> 4.997 ( -0.02%) [ +0.14% +0.00% +0.00% / +0.30% -0.02% -0.02%] index_select strided 3 : Elapsed 0.050 ms (5.005 ms / 100) 4.979 -> 4.981 ( +0.04%) [ +0.10% +0.00% +0.16% / +0.04% +0.34% +0.28%] index_select random : Elapsed 0.050 ms (4.984 ms / 100) 4.997 -> 4.993 ( -0.08%) [ +0.12% +0.04% +0.00% / +0.20% -0.08% +0.00%] index_select random_sorted : Elapsed 0.050 ms (5.003 ms / 100) out_shape = [5, 16, 20, 4] in_shape = [5, 40, 20, 4] idx_dim = 1 B = [5, 16, 20, 4] (stride (1280, 80, 4, 1)) A = [5, 40, 20, 4] (stride (80, 400, 1, 20)) dim = 1 3.530 -> 3.532 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +0.45% +0.51%] index_select const : Elapsed 0.035 ms (3.531 ms / 100) 3.511 -> 3.511 ( +0.00%) [ +0.00% +0.23% +0.06% / +0.00% +0.74% +0.57%] index_select wrap : Elapsed 0.035 ms (3.511 ms / 100) 3.521 -> 3.525 ( +0.11%) [ +0.03% +0.06% +0.00% / +0.11% +0.54% +0.54%] index_select linear : Elapsed 0.035 ms (3.522 ms / 100) 3.510 -> 3.513 ( +0.09%) [ +0.03% +0.09% +0.00% / +0.09% +0.74% +0.60%] index_select reverse : Elapsed 0.035 ms (3.511 ms / 100) 3.531 -> 3.538 ( +0.20%) [ +0.00% +0.17% +0.06% / +0.20% +0.62% +0.57%] index_select skip64 : Elapsed 0.035 ms (3.531 ms / 100) 3.530 -> 3.532 ( +0.06%) [ +0.03% +0.00% +0.08% / +0.06% +0.45% +0.48%] index_select skip256 : Elapsed 0.035 ms (3.531 ms / 100) 3.532 -> 3.529 ( -0.08%) [ +0.03% +0.00% +0.08% / -0.08% +0.25% +0.17%] index_select spread : Elapsed 0.035 ms (3.533 ms / 100) 3.516 -> 3.518 ( +0.06%) [ +0.11% +0.00% +0.14% / +0.06% +0.54% +0.54%] index_select strided 3 : Elapsed 0.035 ms (3.520 ms / 100) 3.507 -> 3.508 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.43% +0.51%] index_select strided 5 : Elapsed 0.035 ms (3.508 ms / 100) 3.538 -> 3.542 ( +0.11%) [ +0.14% +0.00% +0.03% / +0.11% +0.54% +0.71%] index_select strided 7 : Elapsed 0.035 ms (3.543 ms / 100) 3.531 -> 3.532 ( +0.03%) [ +0.08% +0.00% +0.03% / +0.03% +0.37% +0.45%] index_select strided 8 : Elapsed 0.035 ms (3.534 ms / 100) 3.549 -> 3.547 ( -0.06%) [ +0.03% +0.11% +0.00% / -0.06% +0.34% +0.39%] index_select strided 16 : Elapsed 0.036 ms (3.550 ms / 100) 3.521 -> 3.534 ( +0.37%) [ +0.06% +0.00% +0.06% / +0.37% +0.65% +0.57%] index_select random : Elapsed 0.035 ms (3.523 ms / 100) 3.512 -> 3.514 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.63% +0.63%] index_select random_sorted : Elapsed 0.035 ms (3.513 ms / 100) 3.533 -> 3.535 ( +0.06%) [ +0.00% +0.00% +0.23% / +0.06% +0.42% +0.65%] index_select perm : Elapsed 0.035 ms (3.533 ms / 100) 3.517 -> 3.515 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.43% +0.40%] index_select perm_sorted : Elapsed 0.035 ms (3.517 ms / 100) B = [5, 16, 20, 4] (stride (1280, 1, 16, 320)) A = [5, 40, 20, 4] (stride (3200, 1, 40, 800)) dim = 1 4.291 -> 4.292 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.49% +0.51%] index_select const : Elapsed 0.043 ms (4.291 ms / 100) 4.289 -> 4.293 ( +0.09%) [ +0.05% +0.02% +0.00% / +0.09% +0.65% +0.68%] index_select wrap : Elapsed 0.043 ms (4.291 ms / 100) 4.295 -> 4.309 ( +0.33%) [ +0.00% +0.02% +0.00% / +0.33% +0.72% +0.70%] index_select linear : Elapsed 0.043 ms (4.295 ms / 100) 4.284 -> 4.285 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.68% +0.75%] index_select reverse : Elapsed 0.043 ms (4.284 ms / 100) 4.272 -> 4.273 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.70% +0.77%] index_select skip64 : Elapsed 0.043 ms (4.274 ms / 100) 4.288 -> 4.289 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.61% +0.61%] index_select skip256 : Elapsed 0.043 ms (4.290 ms / 100) 4.273 -> 4.274 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.68% +0.94%] index_select spread : Elapsed 0.043 ms (4.273 ms / 100) 4.273 -> 4.282 ( +0.21%) [ +0.09% +0.00% +0.00% / +0.21% +0.73% +0.73%] index_select strided 3 : Elapsed 0.043 ms (4.277 ms / 100) 4.287 -> 4.289 ( +0.05%) [ +0.07% +0.00% +0.05% / +0.05% +0.68% +0.65%] index_select strided 5 : Elapsed 0.043 ms (4.290 ms / 100) 4.274 -> 4.276 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.70% +0.75%] index_select strided 7 : Elapsed 0.043 ms (4.276 ms / 100) 4.275 -> 4.276 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.84% +0.87%] index_select strided 8 : Elapsed 0.043 ms (4.275 ms / 100) 4.282 -> 4.283 ( +0.02%) [ +0.14% +0.05% +0.00% / +0.02% +0.72% +0.77%] index_select strided 16 : Elapsed 0.043 ms (4.288 ms / 100) 4.296 -> 4.301 ( +0.12%) [ +0.07% +0.05% +0.00% / +0.12% +0.81% +0.79%] index_select random : Elapsed 0.043 ms (4.299 ms / 100) 4.259 -> 4.265 ( +0.14%) [ +0.12% +0.00% +0.14% / +0.14% +0.89% +0.87%] index_select random_sorted : Elapsed 0.043 ms (4.264 ms / 100) 4.272 -> 4.273 ( +0.02%) [ +0.23% +0.00% +0.37% / +0.02% +1.03% +1.03%] index_select perm : Elapsed 0.043 ms (4.282 ms / 100) 4.284 -> 4.291 ( +0.16%) [ +0.07% +0.00% +0.02% / +0.16% +0.82% +0.84%] index_select perm_sorted : Elapsed 0.043 ms (4.287 ms / 100) B = [5, 16, 20, 4] (stride (80, 400, 1, 20)) A = [5, 40, 20, 4] (stride (800, 20, 1, 4000)) dim = 1 3.895 -> 3.899 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.59% +0.67%] index_select const : Elapsed 0.039 ms (3.897 ms / 100) 3.921 -> 3.922 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.03% +0.46% +0.38%] index_select wrap : Elapsed 0.039 ms (3.922 ms / 100) 3.897 -> 3.898 ( +0.03%) [ +0.13% +0.21% +0.00% / +0.03% +0.74% +0.64%] index_select linear : Elapsed 0.039 ms (3.902 ms / 100) 3.909 -> 3.916 ( +0.18%) [ +0.08% +0.00% +0.15% / +0.18% +0.43% +0.41%] index_select reverse : Elapsed 0.039 ms (3.912 ms / 100) 3.910 -> 3.918 ( +0.20%) [ +0.31% +0.18% +0.00% / +0.20% +0.59% +0.38%] index_select skip64 : Elapsed 0.039 ms (3.922 ms / 100) 3.896 -> 3.898 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.67% +0.54%] index_select skip256 : Elapsed 0.039 ms (3.896 ms / 100) 3.898 -> 3.907 ( +0.23%) [ +0.03% +0.00% +0.03% / +0.23% +0.44% +0.67%] index_select spread : Elapsed 0.039 ms (3.899 ms / 100) 3.908 -> 3.914 ( +0.15%) [ +0.00% +0.08% +0.20% / +0.15% +0.31% +0.38%] index_select strided 3 : Elapsed 0.039 ms (3.908 ms / 100) 3.891 -> 3.892 ( +0.03%) [ +0.00% +0.00% +0.08% / +0.03% +0.39% +0.39%] index_select strided 5 : Elapsed 0.039 ms (3.891 ms / 100) 3.896 -> 3.917 ( +0.54%) [ +0.05% +0.31% +0.00% / +0.54% +0.54% +0.62%] index_select strided 7 : Elapsed 0.039 ms (3.898 ms / 100) 3.899 -> 3.914 ( +0.38%) [ +0.03% +0.21% +0.00% / +0.38% +0.72% +0.41%] index_select strided 8 : Elapsed 0.039 ms (3.900 ms / 100) 3.897 -> 3.901 ( +0.10%) [ +0.00% +0.13% +0.00% / +0.10% +0.51% +0.64%] index_select strided 16 : Elapsed 0.039 ms (3.897 ms / 100) 3.895 -> 3.914 ( +0.49%) [ +0.10% +0.00% +0.56% / +0.49% +0.59% +0.80%] index_select random : Elapsed 0.039 ms (3.899 ms / 100) 3.900 -> 3.919 ( +0.49%) [ +0.36% +0.23% +0.00% / +0.49% +0.54% +0.62%] index_select random_sorted : Elapsed 0.039 ms (3.914 ms / 100) 3.899 -> 3.902 ( +0.08%) [ +0.08% +0.10% +0.00% / +0.08% +0.64% +0.36%] index_select perm : Elapsed 0.039 ms (3.902 ms / 100) 3.887 -> 3.897 ( +0.26%) [ +0.21% +0.23% +0.00% / +0.26% +0.93% +0.82%] index_select perm_sorted : Elapsed 0.039 ms (3.895 ms / 100) B = [5, 16, 20, 4] (stride (1, 400, 20, 5)) dim = 1 fill_cnt = 40 0.772 -> 0.760 ( -1.55%) [ +0.65% +0.39% +0.00% / -0.78% -1.17% -1.55%] index_fill_ const : Elapsed 0.008 ms (0.777 ms / 100) 0.721 -> 0.711 ( -1.39%) [ +0.97% +0.28% +0.00% / -0.69% -1.11% -1.39%] index_fill_ linear : Elapsed 0.007 ms (0.728 ms / 100) 0.726 -> 0.717 ( -1.24%) [ +0.00% +0.14% +0.00% / -1.24% -0.69% -1.10%] index_fill_ reverse : Elapsed 0.007 ms (0.726 ms / 100) 0.764 -> 0.747 ( -2.23%) [ +0.00% +0.65% +0.13% / -0.65% -1.70% -2.23%] index_fill_ skip64 : Elapsed 0.008 ms (0.764 ms / 100) 0.769 -> 0.761 ( -1.04%) [ +0.00% +0.13% +0.52% / -0.65% -0.39% -1.04%] index_fill_ skip256 : Elapsed 0.008 ms (0.769 ms / 100) 0.695 -> 0.679 ( -2.30%) [ +0.43% +0.00% +0.14% / -2.16% -1.87% -2.30%] index_fill_ spread : Elapsed 0.007 ms (0.698 ms / 100) 0.691 -> 0.676 ( -2.17%) [ +0.00% +0.58% +0.43% / -1.74% -2.17% -1.88%] index_fill_ strided 3 : Elapsed 0.007 ms (0.691 ms / 100) 0.687 -> 0.672 ( -2.18%) [ +0.44% +0.58% +0.00% / -2.04% -1.75% -2.18%] index_fill_ strided 5 : Elapsed 0.007 ms (0.690 ms / 100) 0.700 -> 0.681 ( -2.71%) [ +0.00% +0.29% +0.00% / -2.14% -2.57% -2.71%] index_fill_ strided 7 : Elapsed 0.007 ms (0.700 ms / 100) 0.710 -> 0.700 ( -1.41%) [ +0.00% +0.99% +0.00% / -0.85% -1.41% -1.41%] index_fill_ strided 8 : Elapsed 0.007 ms (0.710 ms / 100) 0.691 -> 0.674 ( -2.46%) [ +0.43% +0.00% +0.58% / -1.16% -2.46% -2.32%] index_fill_ random : Elapsed 0.007 ms (0.694 ms / 100) 0.691 -> 0.677 ( -2.03%) [ +0.43% +0.58% +0.00% / -2.03% -2.03% -2.03%] index_fill_ random_sorted : Elapsed 0.007 ms (0.694 ms / 100) B = [5, 16, 20, 4] (stride (16, 1, 320, 80)) A = [5, 40, 20, 4] (stride (1, 20, 800, 5)) dim = 1 3.625 -> 3.629 ( +0.11%) [ +0.08% +0.03% +0.00% / +0.11% +0.88% +0.80%] index_select const : Elapsed 0.036 ms (3.628 ms / 100) 3.671 -> 3.674 ( +0.08%) [ +0.05% +0.00% +0.11% / +0.08% +0.71% +0.74%] index_select wrap : Elapsed 0.037 ms (3.673 ms / 100) 3.694 -> 3.693 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.68% +0.65%] index_select linear : Elapsed 0.037 ms (3.697 ms / 100) 3.680 -> 3.679 ( -0.03%) [ +0.03% +0.00% +0.14% / -0.03% +0.84% +0.84%] index_select reverse : Elapsed 0.037 ms (3.681 ms / 100) 3.614 -> 3.618 ( +0.11%) [ +0.14% +0.00% +0.03% / +0.11% +0.83% +0.83%] index_select skip64 : Elapsed 0.036 ms (3.619 ms / 100) 3.623 -> 3.627 ( +0.11%) [ +0.11% +0.08% +0.00% / +0.11% +0.97% +0.94%] index_select skip256 : Elapsed 0.036 ms (3.627 ms / 100) 3.685 -> 3.684 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.76% +0.76%] index_select spread : Elapsed 0.037 ms (3.686 ms / 100) 3.677 -> 3.677 ( +0.00%) [ +0.00% +0.11% +0.03% / +0.00% +0.76% +0.84%] index_select strided 3 : Elapsed 0.037 ms (3.677 ms / 100) 3.674 -> 3.676 ( +0.05%) [ +0.08% +0.00% +0.00% / +0.05% +0.68% +0.60%] index_select strided 5 : Elapsed 0.037 ms (3.677 ms / 100) 3.662 -> 3.667 ( +0.14%) [ +0.08% +0.00% +0.19% / +0.14% +0.82% +0.85%] index_select strided 7 : Elapsed 0.037 ms (3.665 ms / 100) 3.623 -> 3.620 ( -0.08%) [ +0.00% +0.00% +0.17% / -0.08% +0.69% +0.69%] index_select strided 8 : Elapsed 0.036 ms (3.623 ms / 100) 3.598 -> 3.600 ( +0.06%) [ +0.00% +0.03% +0.03% / +0.06% +0.81% +0.89%] index_select strided 16 : Elapsed 0.036 ms (3.598 ms / 100) 3.702 -> 3.702 ( +0.00%) [ +0.08% +0.05% +0.00% / +0.00% +0.76% +0.84%] index_select random : Elapsed 0.037 ms (3.705 ms / 100) 3.675 -> 3.682 ( +0.19%) [ +0.00% +0.03% +0.05% / +0.19% +0.87% +1.01%] index_select random_sorted : Elapsed 0.037 ms (3.675 ms / 100) 3.701 -> 3.704 ( +0.08%) [ +0.00% +0.14% +0.08% / +0.08% +0.89% +0.97%] index_select perm : Elapsed 0.037 ms (3.701 ms / 100) 3.695 -> 3.702 ( +0.19%) [ +0.16% +0.19% +0.00% / +0.19% +0.78% +0.70%] index_select perm_sorted : Elapsed 0.037 ms (3.701 ms / 100) B = [5, 16, 20, 4] (stride (20, 100, 1, 1600)) A = [5, 40, 20, 4] (stride (3200, 80, 4, 1)) dim = 1 3.504 -> 3.511 ( +0.20%) [ +0.03% +0.00% +0.20% / +0.20% +0.57% +0.60%] index_select const : Elapsed 0.035 ms (3.505 ms / 100) 3.521 -> 3.523 ( +0.06%) [ +0.00% +0.06% +0.14% / +0.06% +0.40% +0.37%] index_select wrap : Elapsed 0.035 ms (3.521 ms / 100) 3.509 -> 3.510 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.54% +0.74%] index_select linear : Elapsed 0.035 ms (3.510 ms / 100) 3.500 -> 3.500 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.57% +0.57%] index_select reverse : Elapsed 0.035 ms (3.500 ms / 100) 3.511 -> 3.510 ( -0.03%) [ +0.00% +0.14% +0.06% / -0.03% +0.57% +0.54%] index_select skip64 : Elapsed 0.035 ms (3.511 ms / 100) 3.511 -> 3.512 ( +0.03%) [ +0.17% +0.00% +0.03% / +0.03% +0.37% +0.40%] index_select skip256 : Elapsed 0.035 ms (3.517 ms / 100) 3.499 -> 3.500 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.54% +0.60%] index_select spread : Elapsed 0.035 ms (3.500 ms / 100) 3.522 -> 3.525 ( +0.09%) [ +0.03% +0.03% +0.00% / +0.09% +0.40% +0.37%] index_select strided 3 : Elapsed 0.035 ms (3.523 ms / 100) 3.501 -> 3.503 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.43% +0.46%] index_select strided 5 : Elapsed 0.035 ms (3.502 ms / 100) 3.507 -> 3.507 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.57% +0.60%] index_select strided 7 : Elapsed 0.035 ms (3.508 ms / 100) 3.514 -> 3.517 ( +0.09%) [ +0.14% +0.14% +0.00% / +0.09% +0.60% +0.57%] index_select strided 8 : Elapsed 0.035 ms (3.519 ms / 100) 3.503 -> 3.503 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.34% +0.34%] index_select strided 16 : Elapsed 0.035 ms (3.504 ms / 100) 3.513 -> 3.515 ( +0.06%) [ +0.11% +0.03% +0.00% / +0.06% +0.54% +0.60%] index_select random : Elapsed 0.035 ms (3.517 ms / 100) 3.502 -> 3.502 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.37% +0.37%] index_select random_sorted : Elapsed 0.035 ms (3.503 ms / 100) 3.520 -> 3.518 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.26% +0.37%] index_select perm : Elapsed 0.035 ms (3.520 ms / 100) 3.504 -> 3.503 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.34% +0.31%] index_select perm_sorted : Elapsed 0.035 ms (3.505 ms / 100) B = [5, 16, 20, 4] (stride (1, 100, 5, 1600)) dim = 1 fill_cnt = 40 1.802 -> 1.803 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.17% +0.17%] index_fill_ const : Elapsed 0.018 ms (1.802 ms / 100) 1.857 -> 1.850 ( -0.38%) [ +0.00% +0.05% +0.11% / -0.11% -0.16% -0.38%] index_fill_ linear : Elapsed 0.019 ms (1.857 ms / 100) 1.852 -> 1.846 ( -0.32%) [ +0.00% +0.22% +0.27% / -0.32% -0.27% -0.27%] index_fill_ reverse : Elapsed 0.019 ms (1.852 ms / 100) 1.808 -> 1.804 ( -0.22%) [ +0.00% +0.17% +0.11% / -0.11% -0.17% -0.22%] index_fill_ skip64 : Elapsed 0.018 ms (1.808 ms / 100) 1.803 -> 1.800 ( -0.17%) [ +0.00% +0.17% +0.17% / -0.17% +0.00% +0.00%] index_fill_ skip256 : Elapsed 0.018 ms (1.803 ms / 100) 1.804 -> 1.801 ( -0.17%) [ +0.00% +0.22% +0.06% / -0.17% -0.11% +0.17%] index_fill_ spread : Elapsed 0.018 ms (1.804 ms / 100) 1.805 -> 1.806 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.17% +0.06%] index_fill_ strided 3 : Elapsed 0.018 ms (1.805 ms / 100) 1.807 -> 1.804 ( -0.17%) [ +0.11% +0.00% +0.11% / -0.17% -0.11% -0.06%] index_fill_ strided 5 : Elapsed 0.018 ms (1.809 ms / 100) 1.805 -> 1.804 ( -0.06%) [ +0.00% +0.22% +0.50% / -0.06% +0.06% +0.00%] index_fill_ strided 7 : Elapsed 0.018 ms (1.805 ms / 100) 1.802 -> 1.801 ( -0.06%) [ +0.06% +0.00% +0.28% / -0.06% +0.11% +0.06%] index_fill_ strided 8 : Elapsed 0.018 ms (1.803 ms / 100) 1.806 -> 1.803 ( -0.17%) [ +0.00% +0.06% +0.06% / -0.17% +0.00% +0.00%] index_fill_ random : Elapsed 0.018 ms (1.806 ms / 100) 1.805 -> 1.802 ( -0.17%) [ +0.06% +0.00% +0.00% / -0.17% +0.06% +0.00%] index_fill_ random_sorted : Elapsed 0.018 ms (1.806 ms / 100) B = [5, 16, 20, 4] (stride (16, 1, 80, 1600)) A = [5, 40, 20, 4] (stride (3200, 1, 40, 800)) dim = 1 4.292 -> 4.292 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.54% +0.51%] index_select const : Elapsed 0.043 ms (4.292 ms / 100) 4.291 -> 4.290 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.63% +0.61%] index_select wrap : Elapsed 0.043 ms (4.291 ms / 100) 4.298 -> 4.305 ( +0.16%) [ +0.07% +0.05% +0.00% / +0.16% +0.67% +0.67%] index_select linear : Elapsed 0.043 ms (4.301 ms / 100) 4.284 -> 4.286 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.72% +0.84%] index_select reverse : Elapsed 0.043 ms (4.286 ms / 100) 4.271 -> 4.275 ( +0.09%) [ +0.00% +0.02% +0.05% / +0.09% +0.73% +0.73%] index_select skip64 : Elapsed 0.043 ms (4.271 ms / 100) 4.288 -> 4.290 ( +0.05%) [ +0.00% +0.02% +0.05% / +0.05% +0.63% +0.63%] index_select skip256 : Elapsed 0.043 ms (4.288 ms / 100) 4.273 -> 4.274 ( +0.02%) [ +0.07% +0.00% +0.05% / +0.02% +0.82% +0.80%] index_select spread : Elapsed 0.043 ms (4.276 ms / 100) 4.269 -> 4.277 ( +0.19%) [ +0.05% +0.00% +0.21% / +0.19% +0.94% +0.89%] index_select strided 3 : Elapsed 0.043 ms (4.271 ms / 100) 4.277 -> 4.277 ( +0.00%) [ +0.05% +0.19% +0.00% / +0.00% +0.87% +0.72%] index_select strided 5 : Elapsed 0.043 ms (4.279 ms / 100) 4.275 -> 4.274 ( -0.02%) [ +0.00% +0.07% +0.05% / -0.02% +0.70% +0.77%] index_select strided 7 : Elapsed 0.043 ms (4.275 ms / 100) 4.274 -> 4.277 ( +0.07%) [ +0.00% +0.05% +0.05% / +0.07% +0.94% +0.84%] index_select strided 8 : Elapsed 0.043 ms (4.274 ms / 100) 4.276 -> 4.279 ( +0.07%) [ +0.16% +0.21% +0.00% / +0.07% +0.94% +0.91%] index_select strided 16 : Elapsed 0.043 ms (4.283 ms / 100) 4.287 -> 4.288 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.72% +0.72%] index_select random : Elapsed 0.043 ms (4.287 ms / 100) 4.260 -> 4.259 ( -0.02%) [ +0.00% +0.02% +0.05% / -0.02% +0.66% +0.73%] index_select random_sorted : Elapsed 0.043 ms (4.260 ms / 100) 4.258 -> 4.248 ( -0.23%) [ +0.00% +0.02% +0.09% / -0.23% +0.66% +0.73%] index_select perm : Elapsed 0.043 ms (4.258 ms / 100) 4.285 -> 4.286 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.72% +0.72%] index_select perm_sorted : Elapsed 0.043 ms (4.285 ms / 100) B = [5, 16, 20, 4] (stride (1, 5, 80, 1600)) A = [5, 40, 20, 4] (stride (1, 100, 5, 4000)) dim = 1 3.962 -> 3.962 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.71%] index_select const : Elapsed 0.040 ms (3.962 ms / 100) 3.960 -> 3.959 ( -0.03%) [ +0.00% +0.10% +0.00% / -0.03% +0.61% +0.53%] index_select wrap : Elapsed 0.040 ms (3.960 ms / 100) 3.964 -> 3.955 ( -0.23%) [ +0.13% +0.03% +0.00% / -0.23% +0.58% +0.45%] index_select linear : Elapsed 0.040 ms (3.969 ms / 100) 3.963 -> 3.965 ( +0.05%) [ +0.00% +0.08% +0.15% / +0.05% +0.56% +0.61%] index_select reverse : Elapsed 0.040 ms (3.963 ms / 100) 3.971 -> 3.978 ( +0.18%) [ +0.00% +0.08% +0.05% / +0.18% +0.71% +0.55%] index_select skip64 : Elapsed 0.040 ms (3.971 ms / 100) 3.966 -> 3.966 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.48% +0.48%] index_select skip256 : Elapsed 0.040 ms (3.968 ms / 100) 3.960 -> 3.959 ( -0.03%) [ +0.05% +0.13% +0.00% / -0.03% +0.51% +0.56%] index_select spread : Elapsed 0.040 ms (3.962 ms / 100) 3.977 -> 3.984 ( +0.18%) [ +0.23% +0.20% +0.00% / +0.18% +0.50% +0.50%] index_select strided 3 : Elapsed 0.040 ms (3.986 ms / 100) 3.977 -> 3.976 ( -0.03%) [ +0.00% +0.00% +0.05% / -0.03% +0.35% +0.35%] index_select strided 5 : Elapsed 0.040 ms (3.977 ms / 100) 3.971 -> 3.969 ( -0.05%) [ +0.03% +0.00% +0.03% / -0.05% +0.38% +0.55%] index_select strided 7 : Elapsed 0.040 ms (3.972 ms / 100) 3.999 -> 4.000 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.48% +0.40%] index_select strided 8 : Elapsed 0.040 ms (3.999 ms / 100) 3.974 -> 3.973 ( -0.03%) [ +0.05% +0.00% +0.03% / -0.03% +0.53% +0.55%] index_select strided 16 : Elapsed 0.040 ms (3.976 ms / 100) 3.984 -> 3.979 ( -0.13%) [ +0.10% +0.13% +0.00% / -0.13% +0.33% +0.33%] index_select random : Elapsed 0.040 ms (3.988 ms / 100) 3.964 -> 3.964 ( +0.00%) [ +0.05% +0.08% +0.00% / +0.00% +0.48% +0.50%] index_select random_sorted : Elapsed 0.040 ms (3.966 ms / 100) 3.973 -> 3.982 ( +0.23%) [ +0.00% +0.20% +0.10% / +0.23% +0.58% +0.38%] index_select perm : Elapsed 0.040 ms (3.973 ms / 100) 3.969 -> 3.970 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.43% +0.35%] index_select perm_sorted : Elapsed 0.040 ms (3.970 ms / 100) out_shape = [5, 40, 16, 4] in_shape = [5, 40, 20, 4] idx_dim = 2 B = [5, 40, 16, 4] (stride (2560, 4, 160, 1)) dim = 2 fill_cnt = 20 3.137 -> 3.114 ( -0.73%) [ +0.03% +0.00% +0.10% / -0.61% -0.54% -0.73%] index_fill_ const : Elapsed 0.031 ms (3.138 ms / 100) 3.152 -> 3.135 ( -0.54%) [ +0.25% +0.22% +0.00% / -0.44% -0.51% -0.54%] index_fill_ linear : Elapsed 0.032 ms (3.160 ms / 100) 3.151 -> 3.136 ( -0.48%) [ +0.22% +0.00% +0.13% / -0.41% -0.32% -0.48%] index_fill_ reverse : Elapsed 0.032 ms (3.158 ms / 100) 3.137 -> 3.114 ( -0.73%) [ +0.13% +0.10% +0.00% / -0.73% -0.48% -0.51%] index_fill_ skip64 : Elapsed 0.031 ms (3.141 ms / 100) 3.135 -> 3.118 ( -0.54%) [ +0.10% +0.10% +0.00% / -0.48% -0.54% -0.54%] index_fill_ skip256 : Elapsed 0.031 ms (3.138 ms / 100) 3.152 -> 3.134 ( -0.57%) [ +0.00% +0.19% +0.06% / -0.48% -0.57% -0.57%] index_fill_ spread : Elapsed 0.032 ms (3.152 ms / 100) 3.150 -> 3.129 ( -0.67%) [ +0.00% +0.16% +0.16% / -0.51% -0.38% -0.67%] index_fill_ strided 3 : Elapsed 0.032 ms (3.150 ms / 100) 3.158 -> 3.131 ( -0.85%) [ +0.00% +0.06% +0.03% / -0.66% -0.85% -0.73%] index_fill_ strided 5 : Elapsed 0.032 ms (3.158 ms / 100) 3.146 -> 3.135 ( -0.35%) [ +0.19% +0.00% +0.16% / -0.35% -0.16% -0.19%] index_fill_ strided 7 : Elapsed 0.032 ms (3.152 ms / 100) 3.134 -> 3.116 ( -0.57%) [ +0.00% +0.06% +0.32% / -0.57% -0.57% -0.41%] index_fill_ strided 8 : Elapsed 0.031 ms (3.134 ms / 100) 3.149 -> 3.126 ( -0.73%) [ +0.10% +0.03% +0.00% / -0.73% -0.60% -0.67%] index_fill_ random : Elapsed 0.032 ms (3.152 ms / 100) 3.145 -> 3.124 ( -0.67%) [ +0.06% +0.16% +0.00% / -0.67% -0.51% -0.60%] index_fill_ random_sorted : Elapsed 0.031 ms (3.147 ms / 100) B = [5, 40, 16, 4] (stride (64, 320, 1, 16)) A = [5, 40, 20, 4] (stride (20, 100, 1, 4000)) dim = 2 4.282 -> 4.286 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.75% +0.77%] index_select const : Elapsed 0.043 ms (4.286 ms / 100) 4.281 -> 4.284 ( +0.07%) [ +0.12% +0.00% +0.12% / +0.07% +0.72% +0.70%] index_select wrap : Elapsed 0.043 ms (4.286 ms / 100) 4.271 -> 4.275 ( +0.09%) [ +0.00% +0.16% +0.16% / +0.09% +0.77% +0.77%] index_select linear : Elapsed 0.043 ms (4.271 ms / 100) 4.274 -> 4.278 ( +0.09%) [ +0.00% +0.00% +0.07% / +0.09% +0.98% +0.94%] index_select reverse : Elapsed 0.043 ms (4.274 ms / 100) 4.270 -> 4.274 ( +0.09%) [ +0.05% +0.00% +0.12% / +0.09% +0.63% +0.73%] index_select skip64 : Elapsed 0.043 ms (4.272 ms / 100) 4.286 -> 4.288 ( +0.05%) [ +0.07% +0.00% +0.05% / +0.05% +0.61% +0.56%] index_select skip256 : Elapsed 0.043 ms (4.289 ms / 100) 4.290 -> 4.290 ( +0.00%) [ +0.00% +0.14% +0.09% / +0.00% +0.68% +0.65%] index_select spread : Elapsed 0.043 ms (4.290 ms / 100) 4.289 -> 4.293 ( +0.09%) [ +0.07% +0.07% +0.00% / +0.09% +0.72% +0.70%] index_select strided 3 : Elapsed 0.043 ms (4.292 ms / 100) 4.275 -> 4.280 ( +0.12%) [ +0.07% +0.09% +0.00% / +0.12% +0.82% +0.75%] index_select strided 5 : Elapsed 0.043 ms (4.278 ms / 100) 4.274 -> 4.272 ( -0.05%) [ +0.00% +0.02% +0.14% / -0.05% +0.96% +0.96%] index_select strided 7 : Elapsed 0.043 ms (4.274 ms / 100) 4.275 -> 4.283 ( +0.19%) [ +0.07% +0.12% +0.00% / +0.19% +0.75% +0.96%] index_select strided 8 : Elapsed 0.043 ms (4.278 ms / 100) 4.283 -> 4.291 ( +0.19%) [ +0.05% +0.12% +0.00% / +0.19% +0.72% +0.72%] index_select strided 16 : Elapsed 0.043 ms (4.285 ms / 100) 4.287 -> 4.289 ( +0.05%) [ +0.42% +0.00% +0.19% / +0.05% +0.51% +0.63%] index_select random : Elapsed 0.043 ms (4.305 ms / 100) 4.276 -> 4.283 ( +0.16%) [ +0.09% +0.02% +0.00% / +0.16% +0.49% +0.51%] index_select random_sorted : Elapsed 0.043 ms (4.280 ms / 100) 4.296 -> 4.303 ( +0.16%) [ +0.16% +0.12% +0.00% / +0.16% +0.58% +0.70%] index_select perm : Elapsed 0.043 ms (4.303 ms / 100) 4.292 -> 4.299 ( +0.16%) [ +0.00% +0.09% +0.05% / +0.16% +0.51% +0.58%] index_select perm_sorted : Elapsed 0.043 ms (4.292 ms / 100) B = [5, 40, 16, 4] (stride (1, 320, 20, 5)) A = [5, 40, 20, 4] (stride (800, 1, 40, 4000)) dim = 2 3.899 -> 3.904 ( +0.13%) [ +0.10% +0.05% +0.00% / +0.13% +0.28% +0.46%] index_select const : Elapsed 0.039 ms (3.903 ms / 100) 3.844 -> 3.844 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.47% +0.62%] index_select wrap : Elapsed 0.038 ms (3.844 ms / 100) 3.842 -> 3.839 ( -0.08%) [ +0.03% +0.00% +0.00% / -0.08% +0.52% +0.49%] index_select linear : Elapsed 0.038 ms (3.843 ms / 100) 3.825 -> 3.826 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.55% +0.55%] index_select reverse : Elapsed 0.038 ms (3.827 ms / 100) 3.865 -> 3.867 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.80% +0.83%] index_select skip64 : Elapsed 0.039 ms (3.867 ms / 100) 3.858 -> 3.860 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.36% +0.39%] index_select skip256 : Elapsed 0.039 ms (3.859 ms / 100) 3.845 -> 3.850 ( +0.13%) [ +0.13% +0.08% +0.00% / +0.13% +0.65% +0.65%] index_select spread : Elapsed 0.038 ms (3.850 ms / 100) 3.845 -> 3.844 ( -0.03%) [ +0.05% +0.00% +0.03% / -0.03% +0.62% +0.60%] index_select strided 3 : Elapsed 0.038 ms (3.847 ms / 100) 3.860 -> 3.863 ( +0.08%) [ +0.08% +0.00% +0.05% / +0.08% +0.57% +0.57%] index_select strided 5 : Elapsed 0.039 ms (3.863 ms / 100) 3.841 -> 3.848 ( +0.18%) [ +0.00% +0.13% +0.05% / +0.18% +0.89% +0.99%] index_select strided 7 : Elapsed 0.038 ms (3.841 ms / 100) 3.854 -> 3.854 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.73% +0.70%] index_select strided 8 : Elapsed 0.039 ms (3.855 ms / 100) 3.846 -> 3.848 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.65% +0.78%] index_select strided 16 : Elapsed 0.038 ms (3.847 ms / 100) 3.855 -> 3.856 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.57% +0.65%] index_select random : Elapsed 0.039 ms (3.856 ms / 100) 3.842 -> 3.847 ( +0.13%) [ +0.29% +0.00% +0.21% / +0.13% +0.88% +1.02%] index_select random_sorted : Elapsed 0.039 ms (3.853 ms / 100) 3.842 -> 3.838 ( -0.10%) [ +0.03% +0.00% +0.05% / -0.10% +0.78% +0.65%] index_select perm : Elapsed 0.038 ms (3.843 ms / 100) 3.847 -> 3.844 ( -0.08%) [ +0.00% +0.03% +0.00% / -0.08% +0.60% +0.60%] index_select perm_sorted : Elapsed 0.038 ms (3.847 ms / 100) B = [5, 40, 16, 4] (stride (1, 5, 800, 200)) A = [5, 40, 20, 4] (stride (3200, 20, 1, 800)) dim = 2 4.130 -> 4.130 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.63% +0.65%] index_select const : Elapsed 0.041 ms (4.133 ms / 100) 4.136 -> 4.139 ( +0.07%) [ +0.00% +0.07% +0.05% / +0.07% +0.80% +0.85%] index_select wrap : Elapsed 0.041 ms (4.136 ms / 100) 4.136 -> 4.139 ( +0.07%) [ +0.00% +0.05% +0.00% / +0.07% +0.75% +0.75%] index_select linear : Elapsed 0.041 ms (4.136 ms / 100) 4.135 -> 4.137 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.77% +0.87%] index_select reverse : Elapsed 0.041 ms (4.138 ms / 100) 4.134 -> 4.140 ( +0.15%) [ +0.02% +0.15% +0.00% / +0.15% +0.77% +0.82%] index_select skip64 : Elapsed 0.041 ms (4.135 ms / 100) 4.139 -> 4.135 ( -0.10%) [ +0.10% +0.05% +0.00% / -0.10% +0.53% +0.56%] index_select skip256 : Elapsed 0.041 ms (4.143 ms / 100) 4.139 -> 4.142 ( +0.07%) [ +0.10% +0.02% +0.00% / +0.07% +0.68% +0.70%] index_select spread : Elapsed 0.041 ms (4.143 ms / 100) 4.132 -> 4.130 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.61% +0.61%] index_select strided 3 : Elapsed 0.041 ms (4.134 ms / 100) 4.136 -> 4.141 ( +0.12%) [ +0.05% +0.00% +0.12% / +0.12% +0.68% +0.82%] index_select strided 5 : Elapsed 0.041 ms (4.138 ms / 100) 4.136 -> 4.135 ( -0.02%) [ +0.12% +0.00% +0.02% / -0.02% +0.60% +0.63%] index_select strided 7 : Elapsed 0.041 ms (4.141 ms / 100) 4.129 -> 4.129 ( +0.00%) [ +0.07% +0.00% +0.05% / +0.00% +0.68% +0.68%] index_select strided 8 : Elapsed 0.041 ms (4.132 ms / 100) 4.138 -> 4.139 ( +0.02%) [ +0.00% +0.10% +0.00% / +0.02% +0.70% +0.63%] index_select strided 16 : Elapsed 0.041 ms (4.138 ms / 100) 4.137 -> 4.143 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.60% +0.60%] index_select random : Elapsed 0.041 ms (4.140 ms / 100) 4.135 -> 4.141 ( +0.15%) [ +0.12% +0.10% +0.00% / +0.15% +0.60% +0.70%] index_select random_sorted : Elapsed 0.041 ms (4.140 ms / 100) 4.140 -> 4.146 ( +0.14%) [ +0.14% +0.12% +0.00% / +0.14% +0.65% +0.63%] index_select perm : Elapsed 0.041 ms (4.146 ms / 100) 4.131 -> 4.133 ( +0.05%) [ +0.00% +0.07% +0.00% / +0.05% +0.61% +0.53%] index_select perm_sorted : Elapsed 0.041 ms (4.131 ms / 100) B = [5, 40, 16, 4] (stride (16, 80, 1, 3200)) A = [5, 40, 20, 4] (stride (40, 1, 800, 200)) dim = 2 3.559 -> 3.561 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.28% +0.39%] index_select const : Elapsed 0.036 ms (3.560 ms / 100) 3.545 -> 3.547 ( +0.06%) [ +0.14% +0.00% +0.03% / +0.06% +0.51% +0.54%] index_select wrap : Elapsed 0.035 ms (3.550 ms / 100) 3.540 -> 3.545 ( +0.14%) [ +0.00% +0.03% +0.00% / +0.14% +0.51% +0.45%] index_select linear : Elapsed 0.035 ms (3.540 ms / 100) 3.538 -> 3.541 ( +0.08%) [ +0.06% +0.06% +0.00% / +0.08% +0.62% +0.51%] index_select reverse : Elapsed 0.035 ms (3.540 ms / 100) 3.563 -> 3.567 ( +0.11%) [ +0.06% +0.06% +0.00% / +0.11% +0.42% +0.53%] index_select skip64 : Elapsed 0.036 ms (3.565 ms / 100) 3.569 -> 3.577 ( +0.22%) [ +0.20% +0.00% +0.14% / +0.22% +0.56% +0.45%] index_select skip256 : Elapsed 0.036 ms (3.576 ms / 100) 3.543 -> 3.546 ( +0.08%) [ +0.08% +0.00% +0.03% / +0.08% +0.59% +0.56%] index_select spread : Elapsed 0.035 ms (3.546 ms / 100) 3.569 -> 3.568 ( -0.03%) [ +0.00% +0.00% +0.08% / -0.03% +0.31% +0.25%] index_select strided 3 : Elapsed 0.036 ms (3.569 ms / 100) 3.541 -> 3.541 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.40% +0.42%] index_select strided 5 : Elapsed 0.035 ms (3.542 ms / 100) 3.535 -> 3.537 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.54% +0.54%] index_select strided 7 : Elapsed 0.035 ms (3.535 ms / 100) 3.550 -> 3.551 ( +0.03%) [ +0.00% +0.00% +0.06% / +0.03% +0.56% +0.51%] index_select strided 8 : Elapsed 0.035 ms (3.550 ms / 100) 3.541 -> 3.542 ( +0.03%) [ +0.06% +0.00% +0.08% / +0.03% +0.79% +0.79%] index_select strided 16 : Elapsed 0.035 ms (3.543 ms / 100) 3.541 -> 3.542 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.79% +0.73%] index_select random : Elapsed 0.035 ms (3.542 ms / 100) 3.549 -> 3.551 ( +0.06%) [ +0.11% +0.06% +0.00% / +0.06% +0.68% +0.76%] index_select random_sorted : Elapsed 0.036 ms (3.553 ms / 100) 3.541 -> 3.545 ( +0.11%) [ +0.08% +0.00% +0.06% / +0.11% +0.56% +0.54%] index_select perm : Elapsed 0.035 ms (3.544 ms / 100) 3.557 -> 3.559 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.70% +0.73%] index_select perm_sorted : Elapsed 0.036 ms (3.559 ms / 100) out_shape = [5, 40, 20, 16] in_shape = [5, 40, 20, 4] idx_dim = 3 B = [5, 40, 20, 16] (stride (16, 1600, 80, 1)) A = [5, 40, 20, 4] (stride (20, 100, 1, 4000)) dim = 3 2.402 -> 2.407 ( +0.21%) [ +0.37% +0.17% +0.00% / +0.67% +0.21% +0.42%] index_add_ linear : Elapsed 0.024 ms (2.411 ms / 100) 2.388 -> 2.394 ( +0.25%) [ +0.25% +0.13% +0.00% / +0.71% +0.25% +0.38%] index_copy_ linear : Elapsed 0.024 ms (2.394 ms / 100) 2.402 -> 2.411 ( +0.37%) [ +0.17% +0.08% +0.00% / +0.46% +0.58% +0.37%] index_add_ reverse : Elapsed 0.024 ms (2.406 ms / 100) 2.380 -> 2.393 ( +0.55%) [ +0.34% +0.50% +0.00% / +0.67% +0.55% +0.76%] index_copy_ reverse : Elapsed 0.024 ms (2.388 ms / 100) 2.441 -> 2.446 ( +0.20%) [ +0.12% +0.08% +0.00% / +0.20% +0.41% +0.37%] index_add_ spread : Elapsed 0.024 ms (2.444 ms / 100) 2.479 -> 2.484 ( +0.20%) [ +0.12% +0.16% +0.00% / +0.20% +0.48% +0.52%] index_copy_ spread : Elapsed 0.025 ms (2.482 ms / 100) 2.443 -> 2.446 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.12% +0.45% +0.49%] index_add_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.481 -> 2.491 ( +0.40%) [ +0.00% +0.24% +0.16% / +0.40% +0.52% +0.48%] index_copy_ strided 3 : Elapsed 0.025 ms (2.481 ms / 100) 2.435 -> 2.443 ( +0.33%) [ +0.00% +0.37% +0.21% / +0.49% +0.57% +0.33%] index_add_ strided 5 : Elapsed 0.024 ms (2.435 ms / 100) 2.483 -> 2.486 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.40% +0.32%] index_copy_ strided 5 : Elapsed 0.025 ms (2.483 ms / 100) 2.439 -> 2.444 ( +0.21%) [ +0.16% +0.25% +0.00% / +0.33% +0.45% +0.21%] index_add_ strided 7 : Elapsed 0.024 ms (2.443 ms / 100) 2.479 -> 2.491 ( +0.48%) [ +0.04% +0.08% +0.00% / +0.56% +0.48% +0.48%] index_copy_ strided 7 : Elapsed 0.025 ms (2.480 ms / 100) 2.449 -> 2.451 ( +0.08%) [ +0.00% +0.41% +0.16% / +0.24% +0.12% +0.08%] index_add_ perm : Elapsed 0.024 ms (2.449 ms / 100) 2.483 -> 2.492 ( +0.36%) [ +0.00% +0.24% +0.00% / +0.36% +0.40% +0.40%] index_copy_ perm : Elapsed 0.025 ms (2.483 ms / 100) 2.440 -> 2.450 ( +0.41%) [ +0.00% +0.33% +0.33% / +0.49% +0.41% +0.82%] index_add_ perm_sorted : Elapsed 0.024 ms (2.440 ms / 100) 2.480 -> 2.488 ( +0.32%) [ +0.16% +0.00% +0.32% / +0.32% +0.52% +0.77%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.484 ms / 100) 4.918 -> 4.928 ( +0.20%) [ +0.00% +0.31% +0.24% / +0.20% +0.57% +0.33%] index_select const : Elapsed 0.049 ms (4.918 ms / 100) 4.982 -> 4.979 ( -0.06%) [ +0.12% +0.14% +0.00% / -0.06% +0.38% +0.40%] index_select wrap : Elapsed 0.050 ms (4.988 ms / 100) 4.967 -> 4.974 ( +0.14%) [ +0.00% +0.08% +0.26% / +0.14% +0.40% +0.50%] index_select linear : Elapsed 0.050 ms (4.967 ms / 100) 4.995 -> 5.006 ( +0.22%) [ +0.04% +0.04% +0.00% / +0.22% +0.58% +0.44%] index_select reverse : Elapsed 0.050 ms (4.997 ms / 100) 4.941 -> 4.949 ( +0.16%) [ +0.00% +0.06% +0.02% / +0.16% +0.32% +0.28%] index_select skip64 : Elapsed 0.049 ms (4.941 ms / 100) 4.938 -> 4.931 ( -0.14%) [ +0.00% +0.02% +0.06% / -0.14% +0.43% +0.02%] index_select skip256 : Elapsed 0.049 ms (4.938 ms / 100) 4.979 -> 4.976 ( -0.06%) [ +0.02% +0.00% +0.06% / -0.06% +0.18% +0.28%] index_select spread : Elapsed 0.050 ms (4.980 ms / 100) 4.968 -> 4.969 ( +0.02%) [ +0.00% +0.00% +0.04% / +0.02% +0.28% +0.36%] index_select strided 3 : Elapsed 0.050 ms (4.968 ms / 100) 4.974 -> 4.980 ( +0.12%) [ +0.00% +0.04% +0.04% / +0.12% +0.16% +0.16%] index_select random : Elapsed 0.050 ms (4.974 ms / 100) 4.962 -> 4.971 ( +0.18%) [ +0.06% +0.14% +0.00% / +0.18% +0.32% +0.48%] index_select random_sorted : Elapsed 0.050 ms (4.965 ms / 100) B = [5, 40, 20, 16] (stride (640, 16, 3200, 1)) A = [5, 40, 20, 4] (stride (4, 400, 20, 1)) dim = 3 1.216 -> 1.218 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +1.97% +1.32%] index_add_ linear : Elapsed 0.012 ms (1.216 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.00% +0.17% +0.17% / +0.00% +1.26% +1.18%] index_copy_ linear : Elapsed 0.012 ms (1.191 ms / 100) 1.210 -> 1.212 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.99% +1.07%] index_add_ reverse : Elapsed 0.012 ms (1.211 ms / 100) 1.185 -> 1.189 ( +0.34%) [ +0.00% +0.08% +0.08% / +0.34% +1.10% +1.01%] index_copy_ reverse : Elapsed 0.012 ms (1.185 ms / 100) 1.258 -> 1.264 ( +0.48%) [ +0.16% +0.00% +0.16% / +0.48% +1.51% +1.67%] index_add_ spread : Elapsed 0.013 ms (1.260 ms / 100) 1.257 -> 1.265 ( +0.64%) [ +0.16% +0.00% +0.16% / +0.64% +0.72% +0.72%] index_copy_ spread : Elapsed 0.013 ms (1.259 ms / 100) 1.253 -> 1.256 ( +0.24%) [ +0.00% +0.00% +0.08% / +0.24% +0.80% +1.44%] index_add_ strided 3 : Elapsed 0.013 ms (1.253 ms / 100) 1.256 -> 1.266 ( +0.80%) [ +0.08% +0.56% +0.00% / +0.96% +0.80% +1.35%] index_copy_ strided 3 : Elapsed 0.013 ms (1.257 ms / 100) 1.255 -> 1.263 ( +0.64%) [ +0.00% +0.24% +0.24% / +0.64% +1.59% +1.91%] index_add_ strided 5 : Elapsed 0.013 ms (1.255 ms / 100) 1.258 -> 1.263 ( +0.40%) [ +0.00% +0.32% +0.16% / +0.40% +1.03% +0.72%] index_copy_ strided 5 : Elapsed 0.013 ms (1.258 ms / 100) 1.258 -> 1.263 ( +0.40%) [ +0.00% +0.24% +0.16% / +0.40% +1.43% +1.19%] index_add_ strided 7 : Elapsed 0.013 ms (1.258 ms / 100) 1.258 -> 1.267 ( +0.72%) [ +0.32% +0.24% +0.00% / +0.72% +1.35% +1.03%] index_copy_ strided 7 : Elapsed 0.013 ms (1.262 ms / 100) 1.271 -> 1.278 ( +0.55%) [ +0.39% +0.39% +0.00% / +0.55% +2.36% +2.36%] index_add_ perm : Elapsed 0.013 ms (1.276 ms / 100) 1.258 -> 1.258 ( +0.00%) [ +0.40% +0.32% +0.00% / +0.00% +1.35% +0.72%] index_copy_ perm : Elapsed 0.013 ms (1.263 ms / 100) 1.255 -> 1.256 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +1.91% +1.91%] index_add_ perm_sorted : Elapsed 0.013 ms (1.257 ms / 100) 1.258 -> 1.261 ( +0.24%) [ +0.00% +0.08% +0.16% / +0.24% +1.27% +0.95%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.258 ms / 100) 2.126 -> 2.131 ( +0.24%) [ +0.00% +0.24% +0.09% / +0.24% +0.52% +0.61%] index_select const : Elapsed 0.021 ms (2.126 ms / 100) 2.124 -> 2.130 ( +0.28%) [ +0.00% +0.14% +0.19% / +0.42% +0.28% +0.47%] index_select wrap : Elapsed 0.021 ms (2.124 ms / 100) 2.119 -> 2.126 ( +0.33%) [ +0.19% +0.19% +0.00% / +0.33% +0.57% +0.47%] index_select linear : Elapsed 0.021 ms (2.123 ms / 100) 2.121 -> 2.125 ( +0.19%) [ +0.47% +0.00% +0.24% / +0.19% +0.61% +0.57%] index_select reverse : Elapsed 0.021 ms (2.131 ms / 100) 2.123 -> 2.126 ( +0.14%) [ +0.00% +0.19% +0.33% / +0.14% +0.33% +0.57%] index_select skip64 : Elapsed 0.021 ms (2.123 ms / 100) 2.122 -> 2.128 ( +0.28%) [ +0.05% +0.24% +0.00% / +0.28% +0.71% +0.66%] index_select skip256 : Elapsed 0.021 ms (2.123 ms / 100) 2.125 -> 2.129 ( +0.19%) [ +0.09% +0.19% +0.00% / +0.19% +0.56% +0.38%] index_select spread : Elapsed 0.021 ms (2.127 ms / 100) 2.127 -> 2.136 ( +0.42%) [ +0.14% +0.05% +0.00% / +0.42% +0.42% +0.56%] index_select strided 3 : Elapsed 0.021 ms (2.130 ms / 100) 2.121 -> 2.128 ( +0.33%) [ +0.24% +0.19% +0.00% / +0.33% +0.75% +0.71%] index_select random : Elapsed 0.021 ms (2.126 ms / 100) 2.125 -> 2.131 ( +0.28%) [ +0.09% +0.24% +0.00% / +0.28% +0.61% +0.47%] index_select random_sorted : Elapsed 0.021 ms (2.127 ms / 100) B = [5, 40, 20, 16] (stride (1, 5, 3200, 200)) A = [5, 40, 20, 4] (stride (1, 20, 800, 5)) dim = 3 2.597 -> 2.596 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.46% +0.46%] index_add_ linear : Elapsed 0.026 ms (2.599 ms / 100) 2.543 -> 2.546 ( +0.12%) [ +0.00% +0.16% +0.04% / +0.12% +0.35% +0.39%] index_copy_ linear : Elapsed 0.025 ms (2.543 ms / 100) 2.598 -> 2.599 ( +0.04%) [ +0.08% +0.15% +0.00% / +0.04% +0.31% +0.46%] index_add_ reverse : Elapsed 0.026 ms (2.600 ms / 100) 2.545 -> 2.547 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.43% +0.47%] index_copy_ reverse : Elapsed 0.025 ms (2.547 ms / 100) 2.600 -> 2.600 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.31% +0.35%] index_add_ spread : Elapsed 0.026 ms (2.600 ms / 100) 2.546 -> 2.548 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.27% +0.82%] index_copy_ spread : Elapsed 0.025 ms (2.548 ms / 100) 2.593 -> 2.595 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.46% +0.54%] index_add_ strided 3 : Elapsed 0.026 ms (2.599 ms / 100) 2.540 -> 2.549 ( +0.35%) [ +0.12% +0.12% +0.00% / +0.35% +0.51% +0.47%] index_copy_ strided 3 : Elapsed 0.025 ms (2.543 ms / 100) 2.596 -> 2.599 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.27% +0.12% +0.31%] index_add_ strided 5 : Elapsed 0.026 ms (2.599 ms / 100) 2.545 -> 2.546 ( +0.04%) [ +0.08% +0.12% +0.00% / +0.04% +0.28% +0.28%] index_copy_ strided 5 : Elapsed 0.025 ms (2.547 ms / 100) 2.602 -> 2.601 ( -0.04%) [ +0.12% +0.00% +0.04% / -0.04% +0.38% +0.27%] index_add_ strided 7 : Elapsed 0.026 ms (2.605 ms / 100) 2.546 -> 2.547 ( +0.04%) [ +0.12% +0.20% +0.00% / +0.04% +0.47% +0.43%] index_copy_ strided 7 : Elapsed 0.025 ms (2.549 ms / 100) 2.598 -> 2.601 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.42% +0.58%] index_add_ perm : Elapsed 0.026 ms (2.599 ms / 100) 2.547 -> 2.547 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.27% +0.63%] index_copy_ perm : Elapsed 0.025 ms (2.549 ms / 100) 2.597 -> 2.601 ( +0.15%) [ +0.00% +0.12% +0.23% / +0.15% +0.46% +0.46%] index_add_ perm_sorted : Elapsed 0.026 ms (2.597 ms / 100) 2.546 -> 2.549 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.39% +0.43%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.546 ms / 100) 5.922 -> 5.941 ( +0.32%) [ +0.83% +0.00% +0.32% / +0.32% +1.18% +1.00%] index_select const : Elapsed 0.060 ms (5.971 ms / 100) 5.899 -> 5.908 ( +0.15%) [ +0.25% +0.14% +0.00% / +0.15% +0.51% +0.44%] index_select wrap : Elapsed 0.059 ms (5.914 ms / 100) 5.941 -> 5.944 ( +0.05%) [ +0.13% +0.19% +0.00% / +0.05% +0.91% +0.61%] index_select linear : Elapsed 0.059 ms (5.949 ms / 100) 5.979 -> 5.945 ( -0.57%) [ +0.03% +0.00% +0.02% / -0.57% +0.13% -0.13%] index_select reverse : Elapsed 0.060 ms (5.981 ms / 100) 5.944 -> 5.954 ( +0.17%) [ +0.37% +0.00% +0.19% / +0.17% +0.39% +0.27%] index_select skip64 : Elapsed 0.060 ms (5.966 ms / 100) 5.967 -> 5.957 ( -0.17%) [ +0.12% +0.25% +0.00% / +0.12% -0.17% +0.45%] index_select skip256 : Elapsed 0.060 ms (5.974 ms / 100) 5.909 -> 5.916 ( +0.12%) [ +0.00% +0.19% +0.10% / +0.12% +0.36% +0.39%] index_select spread : Elapsed 0.059 ms (5.909 ms / 100) 5.915 -> 5.921 ( +0.10%) [ +0.00% +0.03% +0.10% / +0.10% +0.32% +0.24%] index_select strided 3 : Elapsed 0.059 ms (5.915 ms / 100) 5.898 -> 5.913 ( +0.25%) [ +0.10% +0.32% +0.00% / +0.32% +0.47% +0.25%] index_select random : Elapsed 0.059 ms (5.904 ms / 100) 5.905 -> 5.897 ( -0.14%) [ +0.00% +0.02% +0.00% / -0.14% +0.32% +0.37%] index_select random_sorted : Elapsed 0.059 ms (5.905 ms / 100) B = [5, 40, 20, 16] (stride (1, 100, 5, 4000)) A = [5, 40, 20, 4] (stride (1, 5, 200, 4000)) dim = 3 2.432 -> 2.434 ( +0.08%) [ +0.29% +0.21% +0.00% / +0.12% +0.08% +0.16%] index_add_ linear : Elapsed 0.024 ms (2.439 ms / 100) 2.378 -> 2.382 ( +0.17%) [ +0.00% +0.08% +0.04% / +0.34% +0.17% +0.21%] index_copy_ linear : Elapsed 0.024 ms (2.378 ms / 100) 2.436 -> 2.431 ( -0.21%) [ +0.41% +0.04% +0.00% / -0.08% -0.21% -0.08%] index_add_ reverse : Elapsed 0.024 ms (2.446 ms / 100) 2.378 -> 2.375 ( -0.13%) [ +0.04% +0.00% +0.25% / +0.13% -0.13% +0.00%] index_copy_ reverse : Elapsed 0.024 ms (2.379 ms / 100) 2.431 -> 2.424 ( -0.29%) [ +0.12% +0.04% +0.00% / -0.12% -0.25% -0.29%] index_add_ spread : Elapsed 0.024 ms (2.434 ms / 100) 2.375 -> 2.370 ( -0.21%) [ +0.25% +0.08% +0.00% / +0.08% -0.21% -0.17%] index_copy_ spread : Elapsed 0.024 ms (2.381 ms / 100) 2.430 -> 2.432 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.25% +0.08% +0.08%] index_add_ strided 3 : Elapsed 0.024 ms (2.431 ms / 100) 2.375 -> 2.378 ( +0.13%) [ +0.34% +0.00% +0.00% / +0.34% +0.13% +0.17%] index_copy_ strided 3 : Elapsed 0.024 ms (2.383 ms / 100) 2.436 -> 2.432 ( -0.16%) [ +0.29% +0.08% +0.00% / +0.00% -0.16% -0.04%] index_add_ strided 5 : Elapsed 0.024 ms (2.443 ms / 100) 2.377 -> 2.383 ( +0.25%) [ +0.38% +0.25% +0.00% / +0.25% +0.25% +0.29%] index_copy_ strided 5 : Elapsed 0.024 ms (2.386 ms / 100) 2.428 -> 2.428 ( +0.00%) [ +0.29% +0.12% +0.00% / +0.00% +0.12% +0.04%] index_add_ strided 7 : Elapsed 0.024 ms (2.435 ms / 100) 2.376 -> 2.372 ( -0.17%) [ +0.08% +0.00% +0.00% / -0.17% +0.29% +0.00%] index_copy_ strided 7 : Elapsed 0.024 ms (2.378 ms / 100) 2.433 -> 2.431 ( -0.08%) [ +0.08% +0.00% +0.04% / +0.04% -0.08% +0.04%] index_add_ perm : Elapsed 0.024 ms (2.435 ms / 100) 2.376 -> 2.375 ( -0.04%) [ +0.17% +0.08% +0.00% / +0.04% +0.21% -0.04%] index_copy_ perm : Elapsed 0.024 ms (2.380 ms / 100) 2.431 -> 2.430 ( -0.04%) [ +0.25% +0.00% +0.29% / +0.33% -0.04% +0.00%] index_add_ perm_sorted : Elapsed 0.024 ms (2.437 ms / 100) 2.382 -> 2.377 ( -0.21%) [ +0.04% +0.00% +0.17% / +0.04% -0.21% -0.21%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.383 ms / 100) 5.318 -> 5.320 ( +0.04%) [ +0.21% +0.00% +0.13% / +0.04% +0.38% +0.56%] index_select const : Elapsed 0.053 ms (5.329 ms / 100) 5.301 -> 5.308 ( +0.13%) [ +0.00% +0.08% +0.06% / +0.13% +0.25% +0.21%] index_select wrap : Elapsed 0.053 ms (5.301 ms / 100) 5.319 -> 5.325 ( +0.11%) [ +0.00% +0.09% +0.02% / +0.11% +0.23% +0.34%] index_select linear : Elapsed 0.053 ms (5.319 ms / 100) 5.322 -> 5.332 ( +0.19%) [ +0.26% +0.09% +0.00% / +0.19% +0.36% +0.39%] index_select reverse : Elapsed 0.053 ms (5.336 ms / 100) 5.344 -> 5.327 ( -0.32%) [ +0.00% +0.04% +0.00% / -0.32% +0.17% +0.30%] index_select skip64 : Elapsed 0.053 ms (5.344 ms / 100) 5.319 -> 5.326 ( +0.13%) [ +0.13% +0.15% +0.00% / +0.13% +0.55% +0.32%] index_select skip256 : Elapsed 0.053 ms (5.326 ms / 100) 5.322 -> 5.325 ( +0.06%) [ +0.00% +0.04% +0.00% / +0.06% +0.23% +0.26%] index_select spread : Elapsed 0.053 ms (5.322 ms / 100) 5.291 -> 5.299 ( +0.15%) [ +0.08% +0.00% +0.00% / +0.15% +0.30% +0.43%] index_select strided 3 : Elapsed 0.053 ms (5.295 ms / 100) 5.324 -> 5.327 ( +0.06%) [ +0.15% +0.00% +0.04% / +0.06% +0.08% +0.15%] index_select random : Elapsed 0.053 ms (5.332 ms / 100) 5.315 -> 5.316 ( +0.02%) [ +0.09% +0.00% +0.00% / +0.02% +0.32% +0.19%] index_select random_sorted : Elapsed 0.053 ms (5.320 ms / 100) out_shape = [16, 4, 5, 40] in_shape = [20, 4, 5, 40] idx_dim = 0 B = [16, 4, 5, 40] (stride (200, 3200, 1, 5)) A = [20, 4, 5, 40] (stride (4, 1, 80, 400)) dim = 0 4.269 -> 4.274 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.68% +0.68%] index_select const : Elapsed 0.043 ms (4.274 ms / 100) 4.277 -> 4.301 ( +0.56%) [ +0.09% +0.14% +0.00% / +0.56% +0.77% +0.84%] index_select wrap : Elapsed 0.043 ms (4.281 ms / 100) 4.275 -> 4.287 ( +0.28%) [ +0.09% +0.14% +0.00% / +0.28% +0.96% +0.94%] index_select linear : Elapsed 0.043 ms (4.279 ms / 100) 4.279 -> 4.280 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.72% +0.86%] index_select reverse : Elapsed 0.043 ms (4.279 ms / 100) 4.273 -> 4.280 ( +0.16%) [ +0.21% +0.00% +0.09% / +0.16% +0.77% +0.66%] index_select skip64 : Elapsed 0.043 ms (4.282 ms / 100) 4.272 -> 4.277 ( +0.12%) [ +0.16% +0.14% +0.00% / +0.12% +0.70% +0.68%] index_select skip256 : Elapsed 0.043 ms (4.279 ms / 100) 4.278 -> 4.282 ( +0.09%) [ +0.09% +0.05% +0.00% / +0.09% +0.61% +0.65%] index_select spread : Elapsed 0.043 ms (4.282 ms / 100) 4.272 -> 4.277 ( +0.12%) [ +0.02% +0.00% +0.09% / +0.12% +0.77% +0.75%] index_select strided 3 : Elapsed 0.043 ms (4.273 ms / 100) 4.289 -> 4.292 ( +0.07%) [ +0.14% +0.26% +0.00% / +0.07% +0.58% +0.72%] index_select strided 5 : Elapsed 0.043 ms (4.295 ms / 100) 4.271 -> 4.271 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.54% +0.73%] index_select strided 7 : Elapsed 0.043 ms (4.275 ms / 100) 4.270 -> 4.272 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.73% +0.84%] index_select strided 8 : Elapsed 0.043 ms (4.273 ms / 100) 4.298 -> 4.289 ( -0.21%) [ +0.07% +0.05% +0.00% / -0.21% +0.60% +0.42%] index_select strided 16 : Elapsed 0.043 ms (4.301 ms / 100) 4.281 -> 4.281 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.42% +0.56%] index_select random : Elapsed 0.043 ms (4.283 ms / 100) 4.279 -> 4.284 ( +0.12%) [ +0.05% +0.00% +0.05% / +0.12% +0.51% +0.61%] index_select random_sorted : Elapsed 0.043 ms (4.281 ms / 100) 4.285 -> 4.288 ( +0.07%) [ +0.26% +0.12% +0.00% / +0.07% +0.70% +0.68%] index_select perm : Elapsed 0.043 ms (4.296 ms / 100) 4.275 -> 4.277 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.51% +0.68%] index_select perm_sorted : Elapsed 0.043 ms (4.277 ms / 100) B = [16, 4, 5, 40] (stride (160, 40, 2560, 1)) A = [20, 4, 5, 40] (stride (1, 20, 80, 400)) dim = 0 4.271 -> 4.272 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.47% +0.42%] index_select const : Elapsed 0.043 ms (4.273 ms / 100) 4.269 -> 4.267 ( -0.05%) [ +0.07% +0.00% +0.02% / -0.05% +0.52% +0.40%] index_select wrap : Elapsed 0.043 ms (4.272 ms / 100) 4.259 -> 4.267 ( +0.19%) [ +0.23% +0.00% +0.21% / +0.19% +0.52% +0.75%] index_select linear : Elapsed 0.043 ms (4.269 ms / 100) 4.286 -> 4.294 ( +0.19%) [ +0.09% +0.00% +0.16% / +0.19% +0.63% +0.63%] index_select reverse : Elapsed 0.043 ms (4.290 ms / 100) 4.284 -> 4.281 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.30% +0.33%] index_select skip64 : Elapsed 0.043 ms (4.285 ms / 100) 4.268 -> 4.267 ( -0.02%) [ +0.00% +0.12% +0.00% / -0.02% +0.49% +0.61%] index_select skip256 : Elapsed 0.043 ms (4.268 ms / 100) 4.263 -> 4.264 ( +0.02%) [ +0.00% +0.26% +0.05% / +0.02% +0.75% +0.77%] index_select spread : Elapsed 0.043 ms (4.263 ms / 100) 4.288 -> 4.288 ( +0.00%) [ +0.02% +0.00% +0.07% / +0.00% +0.63% +0.49%] index_select strided 3 : Elapsed 0.043 ms (4.289 ms / 100) 4.267 -> 4.267 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.47% +0.56%] index_select strided 5 : Elapsed 0.043 ms (4.267 ms / 100) 4.268 -> 4.271 ( +0.07%) [ +0.00% +0.14% +0.12% / +0.07% +0.63% +0.82%] index_select strided 7 : Elapsed 0.043 ms (4.268 ms / 100) 4.291 -> 4.291 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.54% +0.56%] index_select strided 8 : Elapsed 0.043 ms (4.293 ms / 100) 4.273 -> 4.277 ( +0.09%) [ +0.00% +0.12% +0.07% / +0.09% +0.63% +0.70%] index_select strided 16 : Elapsed 0.043 ms (4.273 ms / 100) 4.275 -> 4.281 ( +0.14%) [ +0.12% +0.07% +0.00% / +0.14% +0.63% +0.65%] index_select random : Elapsed 0.043 ms (4.280 ms / 100) 4.274 -> 4.274 ( +0.00%) [ +0.00% +0.19% +0.19% / +0.00% +0.58% +0.70%] index_select random_sorted : Elapsed 0.043 ms (4.274 ms / 100) 4.293 -> 4.299 ( +0.14%) [ +0.12% +0.14% +0.00% / +0.14% +0.75% +0.65%] index_select perm : Elapsed 0.043 ms (4.298 ms / 100) 4.273 -> 4.277 ( +0.09%) [ +0.21% +0.05% +0.00% / +0.09% +0.98% +0.49%] index_select perm_sorted : Elapsed 0.043 ms (4.282 ms / 100) B = [16, 4, 5, 40] (stride (160, 1, 2560, 4)) A = [20, 4, 5, 40] (stride (800, 40, 160, 1)) dim = 0 3.805 -> 3.807 ( +0.05%) [ +0.08% +0.03% +0.00% / +0.05% +1.05% +0.92%] index_select const : Elapsed 0.038 ms (3.808 ms / 100) 3.807 -> 3.809 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.87% +0.87%] index_select wrap : Elapsed 0.038 ms (3.809 ms / 100) 3.812 -> 3.813 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.76% +0.73%] index_select linear : Elapsed 0.038 ms (3.814 ms / 100) 3.814 -> 3.813 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.66% +0.73%] index_select reverse : Elapsed 0.038 ms (3.815 ms / 100) 3.815 -> 3.815 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.92% +0.92%] index_select skip64 : Elapsed 0.038 ms (3.816 ms / 100) 3.826 -> 3.825 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.58% +0.58%] index_select skip256 : Elapsed 0.038 ms (3.827 ms / 100) 3.815 -> 3.815 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.58% +0.63%] index_select spread : Elapsed 0.038 ms (3.815 ms / 100) 3.807 -> 3.807 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.66% +0.68%] index_select strided 3 : Elapsed 0.038 ms (3.809 ms / 100) 3.814 -> 3.815 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.79% +0.71%] index_select strided 5 : Elapsed 0.038 ms (3.814 ms / 100) 3.814 -> 3.818 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.71% +0.68%] index_select strided 7 : Elapsed 0.038 ms (3.816 ms / 100) 3.805 -> 3.812 ( +0.18%) [ +0.03% +0.00% +0.00% / +0.18% +0.68% +0.66%] index_select strided 8 : Elapsed 0.038 ms (3.806 ms / 100) 3.811 -> 3.818 ( +0.18%) [ +0.05% +0.00% +0.00% / +0.18% +0.68% +0.60%] index_select strided 16 : Elapsed 0.038 ms (3.813 ms / 100) 3.819 -> 3.829 ( +0.26%) [ +0.03% +0.00% +0.03% / +0.26% +0.55% +0.47%] index_select random : Elapsed 0.038 ms (3.820 ms / 100) 3.815 -> 3.835 ( +0.52%) [ +0.00% +0.00% +0.00% / +0.55% +0.52% +0.55%] index_select random_sorted : Elapsed 0.038 ms (3.815 ms / 100) 3.814 -> 3.837 ( +0.60%) [ +0.00% +0.05% +0.03% / +0.60% +0.71% +0.68%] index_select perm : Elapsed 0.038 ms (3.814 ms / 100) 3.812 -> 3.830 ( +0.47%) [ +0.03% +0.05% +0.00% / +0.60% +0.50% +0.47%] index_select perm_sorted : Elapsed 0.038 ms (3.813 ms / 100) B = [16, 4, 5, 40] (stride (4, 1, 2560, 64)) A = [20, 4, 5, 40] (stride (1, 20, 80, 400)) dim = 0 3.923 -> 3.939 ( +0.41%) [ +0.08% +0.03% +0.00% / +0.46% +0.41% +0.41%] index_select const : Elapsed 0.039 ms (3.926 ms / 100) 3.925 -> 3.940 ( +0.38%) [ +0.33% +0.20% +0.00% / +0.38% +0.69% +0.59%] index_select wrap : Elapsed 0.039 ms (3.938 ms / 100) 3.924 -> 3.938 ( +0.36%) [ +0.08% +0.00% +0.03% / +0.36% +0.51% +0.59%] index_select linear : Elapsed 0.039 ms (3.927 ms / 100) 3.943 -> 3.958 ( +0.38%) [ +0.05% +0.00% +0.10% / +0.38% +0.51% +0.58%] index_select reverse : Elapsed 0.039 ms (3.945 ms / 100) 3.937 -> 3.950 ( +0.33%) [ +0.08% +0.08% +0.00% / +0.43% +0.33% +0.48%] index_select skip64 : Elapsed 0.039 ms (3.940 ms / 100) 3.925 -> 3.943 ( +0.46%) [ +0.00% +0.03% +0.03% / +0.76% +0.56% +0.46%] index_select skip256 : Elapsed 0.039 ms (3.925 ms / 100) 3.937 -> 3.949 ( +0.30%) [ +0.08% +0.08% +0.00% / +0.30% +0.36% +0.46%] index_select spread : Elapsed 0.039 ms (3.940 ms / 100) 3.951 -> 3.969 ( +0.46%) [ +0.25% +0.00% +0.03% / +0.51% +0.51% +0.46%] index_select strided 3 : Elapsed 0.040 ms (3.961 ms / 100) 3.924 -> 3.944 ( +0.51%) [ +0.00% +0.08% +0.05% / +0.51% +0.56% +0.64%] index_select strided 5 : Elapsed 0.039 ms (3.924 ms / 100) 3.930 -> 3.934 ( +0.10%) [ +0.08% +0.23% +0.00% / +0.10% +0.76% +0.76%] index_select strided 7 : Elapsed 0.039 ms (3.933 ms / 100) 3.943 -> 3.942 ( -0.03%) [ +0.08% +0.08% +0.00% / -0.03% +0.71% +0.56%] index_select strided 8 : Elapsed 0.039 ms (3.946 ms / 100) 3.938 -> 3.951 ( +0.33%) [ +0.00% +0.13% +0.23% / +0.33% +0.79% +0.76%] index_select strided 16 : Elapsed 0.039 ms (3.938 ms / 100) 3.937 -> 3.958 ( +0.53%) [ +0.08% +0.03% +0.00% / +0.53% +0.66% +0.69%] index_select random : Elapsed 0.039 ms (3.940 ms / 100) 3.938 -> 3.942 ( +0.10%) [ +0.08% +0.10% +0.00% / +0.10% +0.66% +0.53%] index_select random_sorted : Elapsed 0.039 ms (3.941 ms / 100) 3.943 -> 3.949 ( +0.15%) [ +0.10% +0.00% +0.05% / +0.15% +0.43% +0.56%] index_select perm : Elapsed 0.039 ms (3.947 ms / 100) 3.952 -> 3.963 ( +0.28%) [ +0.00% +0.15% +0.10% / +0.28% +0.76% +0.66%] index_select perm_sorted : Elapsed 0.040 ms (3.952 ms / 100) B = [16, 4, 5, 40] (stride (5, 80, 1, 320)) A = [20, 4, 5, 40] (stride (1, 20, 3200, 80)) dim = 0 3.944 -> 3.950 ( +0.15%) [ +0.13% +0.00% +0.08% / +0.15% +0.71% +0.81%] index_select const : Elapsed 0.039 ms (3.949 ms / 100) 3.948 -> 3.950 ( +0.05%) [ +0.00% +0.03% +0.05% / +0.05% +0.63% +0.66%] index_select wrap : Elapsed 0.039 ms (3.948 ms / 100) 3.941 -> 3.944 ( +0.08%) [ +0.10% +0.00% +0.05% / +0.08% +0.81% +0.69%] index_select linear : Elapsed 0.039 ms (3.945 ms / 100) 3.956 -> 3.968 ( +0.30%) [ +0.03% +0.18% +0.00% / +0.30% +0.81% +0.83%] index_select reverse : Elapsed 0.040 ms (3.957 ms / 100) 3.950 -> 3.950 ( +0.00%) [ +0.08% +0.15% +0.00% / +0.00% +0.61% +0.68%] index_select skip64 : Elapsed 0.040 ms (3.953 ms / 100) 3.953 -> 3.960 ( +0.18%) [ +0.00% +0.00% +0.05% / +0.18% +0.53% +0.68%] index_select skip256 : Elapsed 0.040 ms (3.953 ms / 100) 3.947 -> 3.959 ( +0.30%) [ +0.00% +0.10% +0.10% / +0.30% +0.66% +0.71%] index_select spread : Elapsed 0.039 ms (3.947 ms / 100) 3.956 -> 3.963 ( +0.18%) [ +0.00% +0.08% +0.03% / +0.18% +0.68% +0.63%] index_select strided 3 : Elapsed 0.040 ms (3.956 ms / 100) 3.959 -> 3.962 ( +0.08%) [ +0.10% +0.00% +0.03% / +0.08% +0.81% +0.71%] index_select strided 5 : Elapsed 0.040 ms (3.963 ms / 100) 3.945 -> 3.966 ( +0.53%) [ +0.08% +0.13% +0.00% / +0.53% +0.74% +0.94%] index_select strided 7 : Elapsed 0.039 ms (3.948 ms / 100) 3.949 -> 3.955 ( +0.15%) [ +0.00% +0.08% +0.03% / +0.15% +0.79% +0.79%] index_select strided 8 : Elapsed 0.039 ms (3.949 ms / 100) 3.962 -> 3.964 ( +0.05%) [ +0.10% +0.18% +0.00% / +0.05% +0.76% +0.81%] index_select strided 16 : Elapsed 0.040 ms (3.966 ms / 100) 3.964 -> 3.971 ( +0.18%) [ +0.33% +0.00% +0.08% / +0.18% +0.50% +0.50%] index_select random : Elapsed 0.040 ms (3.977 ms / 100) 3.952 -> 3.954 ( +0.05%) [ +0.10% +0.00% +0.13% / +0.05% +0.63% +0.56%] index_select random_sorted : Elapsed 0.040 ms (3.956 ms / 100) 3.957 -> 3.972 ( +0.38%) [ +0.10% +0.05% +0.00% / +0.38% +0.56% +0.58%] index_select perm : Elapsed 0.040 ms (3.961 ms / 100) 3.955 -> 3.960 ( +0.13%) [ +0.08% +0.08% +0.00% / +0.13% +0.61% +0.61%] index_select perm_sorted : Elapsed 0.040 ms (3.958 ms / 100) B = [16, 4, 5, 40] (stride (1, 80, 16, 320)) A = [20, 4, 5, 40] (stride (800, 200, 1, 5)) dim = 0 3.593 -> 3.604 ( +0.31%) [ +0.03% +0.06% +0.00% / +0.31% +0.47% +0.42%] index_select const : Elapsed 0.036 ms (3.594 ms / 100) 3.603 -> 3.611 ( +0.22%) [ +0.14% +0.17% +0.00% / +0.22% +0.61% +0.50%] index_select wrap : Elapsed 0.036 ms (3.608 ms / 100) 3.605 -> 3.607 ( +0.06%) [ +0.00% +0.03% +0.00% / +0.06% +0.47% +0.42%] index_select linear : Elapsed 0.036 ms (3.605 ms / 100) 3.603 -> 3.604 ( +0.03%) [ +0.06% +0.11% +0.00% / +0.03% +0.56% +0.53%] index_select reverse : Elapsed 0.036 ms (3.605 ms / 100) 3.586 -> 3.588 ( +0.06%) [ +0.11% +0.00% +0.11% / +0.06% +0.64% +0.61%] index_select skip64 : Elapsed 0.036 ms (3.590 ms / 100) 3.598 -> 3.604 ( +0.17%) [ +0.06% +0.06% +0.00% / +0.17% +0.67% +0.53%] index_select skip256 : Elapsed 0.036 ms (3.600 ms / 100) 3.595 -> 3.597 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.50% +0.56%] index_select spread : Elapsed 0.036 ms (3.596 ms / 100) 3.612 -> 3.619 ( +0.19%) [ +0.00% +0.03% +0.00% / +0.19% +0.58% +0.58%] index_select strided 3 : Elapsed 0.036 ms (3.612 ms / 100) 3.601 -> 3.601 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.50% +0.47%] index_select strided 5 : Elapsed 0.036 ms (3.602 ms / 100) 3.602 -> 3.606 ( +0.11%) [ +0.03% +0.14% +0.00% / +0.11% +0.69% +0.67%] index_select strided 7 : Elapsed 0.036 ms (3.603 ms / 100) 3.598 -> 3.602 ( +0.11%) [ +0.14% +0.03% +0.00% / +0.11% +0.72% +0.67%] index_select strided 8 : Elapsed 0.036 ms (3.603 ms / 100) 3.602 -> 3.604 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.56% +0.58%] index_select strided 16 : Elapsed 0.036 ms (3.604 ms / 100) 3.607 -> 3.609 ( +0.06%) [ +0.06% +0.22% +0.00% / +0.06% +0.67% +0.61%] index_select random : Elapsed 0.036 ms (3.609 ms / 100) 3.602 -> 3.607 ( +0.14%) [ +0.17% +0.00% +0.08% / +0.14% +0.69% +0.86%] index_select random_sorted : Elapsed 0.036 ms (3.608 ms / 100) 3.594 -> 3.598 ( +0.11%) [ +0.08% +0.08% +0.00% / +0.11% +0.86% +0.81%] index_select perm : Elapsed 0.036 ms (3.597 ms / 100) 3.610 -> 3.615 ( +0.14%) [ +0.00% +0.06% +0.00% / +0.14% +0.64% +0.64%] index_select perm_sorted : Elapsed 0.036 ms (3.610 ms / 100) B = [16, 4, 5, 40] (stride (1, 80, 16, 320)) A = [20, 4, 5, 40] (stride (1, 20, 3200, 80)) dim = 0 1.476 -> 1.475 ( -0.07%) [ +0.14% +0.00% +0.47% / -0.07% +1.08% +0.95%] index_select const : Elapsed 0.015 ms (1.478 ms / 100) 1.463 -> 1.470 ( +0.48%) [ +0.00% +0.34% +0.41% / +0.48% +2.05% +1.85%] index_select wrap : Elapsed 0.015 ms (1.463 ms / 100) 1.455 -> 1.458 ( +0.21%) [ +0.07% +0.00% +0.00% / +0.21% +0.96% +0.89%] index_select linear : Elapsed 0.015 ms (1.456 ms / 100) 1.461 -> 1.464 ( +0.21%) [ +0.21% +0.00% +0.55% / +0.21% +1.51% +1.44%] index_select reverse : Elapsed 0.015 ms (1.464 ms / 100) 1.477 -> 1.475 ( -0.14%) [ +0.20% +0.20% +0.00% / -0.14% +1.22% +0.61%] index_select skip64 : Elapsed 0.015 ms (1.480 ms / 100) 1.458 -> 1.457 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +1.03% +1.23%] index_select skip256 : Elapsed 0.015 ms (1.458 ms / 100) 1.476 -> 1.480 ( +0.27%) [ +0.00% +0.07% +0.00% / +0.27% +0.54% +0.68%] index_select spread : Elapsed 0.015 ms (1.476 ms / 100) 1.467 -> 1.469 ( +0.14%) [ +0.48% +0.14% +0.00% / +0.14% +0.75% +1.23%] index_select strided 3 : Elapsed 0.015 ms (1.474 ms / 100) 1.458 -> 1.463 ( +0.34%) [ +0.00% +0.07% +0.07% / +0.34% +0.82% +0.62%] index_select strided 5 : Elapsed 0.015 ms (1.458 ms / 100) 1.471 -> 1.475 ( +0.27%) [ +0.00% +0.20% +0.34% / +0.27% +0.95% +0.88%] index_select strided 7 : Elapsed 0.015 ms (1.471 ms / 100) 1.472 -> 1.466 ( -0.41%) [ +0.20% +0.00% +0.00% / -0.41% +0.41% +0.41%] index_select strided 8 : Elapsed 0.015 ms (1.475 ms / 100) 1.463 -> 1.464 ( +0.07%) [ +0.00% +0.07% +0.14% / +0.07% +0.82% +0.75%] index_select strided 16 : Elapsed 0.015 ms (1.463 ms / 100) 1.470 -> 1.471 ( +0.07%) [ +0.07% +0.41% +0.00% / +0.07% +0.20% +0.41%] index_select random : Elapsed 0.015 ms (1.471 ms / 100) 1.457 -> 1.461 ( +0.27%) [ +0.34% +0.21% +0.00% / +0.34% +0.27% +0.48%] index_select random_sorted : Elapsed 0.015 ms (1.462 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.00% +0.00% +0.14% / +0.14% +0.27% +0.20%] index_select perm : Elapsed 0.015 ms (1.477 ms / 100) 1.470 -> 1.471 ( +0.07%) [ +0.00% +0.34% +0.20% / +0.07% +0.68% +0.61%] index_select perm_sorted : Elapsed 0.015 ms (1.470 ms / 100) B = [16, 4, 5, 40] (stride (4, 1, 64, 320)) A = [20, 4, 5, 40] (stride (20, 1, 4, 400)) dim = 0 4.284 -> 4.286 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.35% +0.44%] index_select const : Elapsed 0.043 ms (4.284 ms / 100) 4.284 -> 4.286 ( +0.05%) [ +0.00% +0.02% +0.02% / +0.05% +0.56% +0.58%] index_select wrap : Elapsed 0.043 ms (4.284 ms / 100) 4.280 -> 4.287 ( +0.16%) [ +0.05% +0.00% +0.00% / +0.16% +0.51% +0.47%] index_select linear : Elapsed 0.043 ms (4.282 ms / 100) 4.279 -> 4.287 ( +0.19%) [ +0.00% +0.02% +0.05% / +0.19% +0.51% +0.40%] index_select reverse : Elapsed 0.043 ms (4.279 ms / 100) 4.260 -> 4.271 ( +0.26%) [ +0.05% +0.00% +0.00% / +0.26% +0.54% +0.54%] index_select skip64 : Elapsed 0.043 ms (4.262 ms / 100) 4.268 -> 4.283 ( +0.35%) [ +0.09% +0.05% +0.00% / +0.35% +0.37% +0.49%] index_select skip256 : Elapsed 0.043 ms (4.272 ms / 100) 4.297 -> 4.303 ( +0.14%) [ +0.00% +0.00% +0.05% / +0.14% +0.37% +0.47%] index_select spread : Elapsed 0.043 ms (4.297 ms / 100) 4.278 -> 4.283 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.49% +0.63%] index_select strided 3 : Elapsed 0.043 ms (4.283 ms / 100) 4.290 -> 4.293 ( +0.07%) [ +0.12% +0.07% +0.00% / +0.07% +0.44% +0.49%] index_select strided 5 : Elapsed 0.043 ms (4.295 ms / 100) 4.277 -> 4.278 ( +0.02%) [ +0.00% +0.02% +0.09% / +0.02% +0.65% +0.51%] index_select strided 7 : Elapsed 0.043 ms (4.277 ms / 100) 4.293 -> 4.299 ( +0.14%) [ +0.09% +0.00% +0.05% / +0.14% +0.54% +0.49%] index_select strided 8 : Elapsed 0.043 ms (4.297 ms / 100) 4.297 -> 4.304 ( +0.16%) [ +0.02% +0.12% +0.00% / +0.16% +0.58% +0.63%] index_select strided 16 : Elapsed 0.043 ms (4.298 ms / 100) 4.289 -> 4.297 ( +0.19%) [ +0.14% +0.00% +0.00% / +0.19% +0.70% +0.68%] index_select random : Elapsed 0.043 ms (4.295 ms / 100) 4.285 -> 4.295 ( +0.23%) [ +0.12% +0.12% +0.00% / +0.23% +0.72% +0.68%] index_select random_sorted : Elapsed 0.043 ms (4.290 ms / 100) 4.279 -> 4.281 ( +0.05%) [ +0.12% +0.16% +0.00% / +0.05% +0.68% +0.68%] index_select perm : Elapsed 0.043 ms (4.284 ms / 100) 4.294 -> 4.296 ( +0.05%) [ +0.02% +0.00% +0.00% / +0.05% +0.56% +0.54%] index_select perm_sorted : Elapsed 0.043 ms (4.295 ms / 100) out_shape = [20, 16, 5, 40] in_shape = [20, 4, 5, 40] idx_dim = 1 B = [20, 16, 5, 40] (stride (3200, 200, 40, 1)) A = [20, 4, 5, 40] (stride (1, 800, 3200, 20)) dim = 1 2.396 -> 2.401 ( +0.21%) [ +0.00% +0.21% +0.21% / +0.21% +0.54% +0.42%] index_add_ linear : Elapsed 0.024 ms (2.396 ms / 100) 2.341 -> 2.349 ( +0.34%) [ +0.47% +0.09% +0.00% / +0.34% +0.68% +0.43%] index_copy_ linear : Elapsed 0.024 ms (2.352 ms / 100) 2.401 -> 2.398 ( -0.12%) [ +0.04% +0.04% +0.00% / -0.12% +0.46% +0.50%] index_add_ reverse : Elapsed 0.024 ms (2.402 ms / 100) 2.348 -> 2.348 ( +0.00%) [ +0.00% +0.00% +0.21% / +0.00% +0.43% +0.38%] index_copy_ reverse : Elapsed 0.023 ms (2.348 ms / 100) 2.400 -> 2.408 ( +0.33%) [ +0.13% +0.13% +0.00% / +0.33% +0.42% +0.42%] index_add_ spread : Elapsed 0.024 ms (2.403 ms / 100) 2.346 -> 2.352 ( +0.26%) [ +0.00% +0.04% +0.00% / +0.26% +0.47% +0.34%] index_copy_ spread : Elapsed 0.023 ms (2.346 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.33% +0.25% +0.00% / +0.17% +0.75% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.413 ms / 100) 2.348 -> 2.352 ( +0.17%) [ +0.00% +0.09% +0.00% / +0.17% +0.51% +0.47%] index_copy_ strided 3 : Elapsed 0.023 ms (2.348 ms / 100) 2.410 -> 2.409 ( -0.04%) [ +0.00% +0.12% +0.08% / -0.04% +0.41% +0.33%] index_add_ strided 5 : Elapsed 0.024 ms (2.410 ms / 100) 2.351 -> 2.354 ( +0.13%) [ +0.00% +0.00% +0.34% / +0.13% +0.17% +0.26%] index_copy_ strided 5 : Elapsed 0.024 ms (2.351 ms / 100) 2.406 -> 2.410 ( +0.17%) [ +0.04% +0.00% +0.00% / +0.29% +0.17% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.407 ms / 100) 2.349 -> 2.351 ( +0.09%) [ +0.04% +0.00% +0.00% / +0.09% +0.34% +0.26%] index_copy_ strided 7 : Elapsed 0.024 ms (2.350 ms / 100) 2.403 -> 2.405 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.50% +0.08% +0.08%] index_add_ perm : Elapsed 0.024 ms (2.404 ms / 100) 2.350 -> 2.348 ( -0.09%) [ +0.09% +0.00% +0.09% / +0.60% -0.09% -0.04%] index_copy_ perm : Elapsed 0.024 ms (2.352 ms / 100) 2.398 -> 2.402 ( +0.17%) [ +0.25% +0.00% +0.04% / +0.21% +0.25% +0.17%] index_add_ perm_sorted : Elapsed 0.024 ms (2.404 ms / 100) 2.342 -> 2.346 ( +0.17%) [ +0.00% +0.04% +0.21% / +0.21% +0.68% +0.17%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.342 ms / 100) 5.236 -> 5.241 ( +0.10%) [ +0.13% +0.00% +0.11% / +0.10% +0.71% +0.63%] index_select const : Elapsed 0.052 ms (5.243 ms / 100) 5.286 -> 5.295 ( +0.17%) [ +0.00% +0.08% +0.06% / +0.17% +0.42% +0.40%] index_select wrap : Elapsed 0.053 ms (5.286 ms / 100) 5.266 -> 5.270 ( +0.08%) [ +0.06% +0.02% +0.00% / +0.08% +0.38% +0.44%] index_select linear : Elapsed 0.053 ms (5.269 ms / 100) 5.274 -> 5.276 ( +0.04%) [ +0.00% +0.08% +0.11% / +0.04% +0.40% +0.34%] index_select reverse : Elapsed 0.053 ms (5.274 ms / 100) 5.282 -> 5.291 ( +0.17%) [ +0.08% +0.00% +0.08% / +0.17% +0.42% +0.30%] index_select skip64 : Elapsed 0.053 ms (5.286 ms / 100) 5.267 -> 5.260 ( -0.13%) [ +0.04% +0.00% +0.00% / -0.13% +0.09% +0.17%] index_select skip256 : Elapsed 0.053 ms (5.269 ms / 100) 5.282 -> 5.287 ( +0.09%) [ +0.06% +0.04% +0.00% / +0.09% +0.27% +0.38%] index_select spread : Elapsed 0.053 ms (5.285 ms / 100) 5.256 -> 5.262 ( +0.11%) [ +0.17% +0.00% +0.13% / +0.11% +0.51% +0.40%] index_select strided 3 : Elapsed 0.053 ms (5.265 ms / 100) 5.286 -> 5.289 ( +0.06%) [ +0.00% +0.09% +0.04% / +0.06% +0.36% +0.26%] index_select random : Elapsed 0.053 ms (5.286 ms / 100) 5.238 -> 5.253 ( +0.29%) [ +0.04% +0.06% +0.00% / +0.29% +0.32% +0.42%] index_select random_sorted : Elapsed 0.052 ms (5.240 ms / 100) B = [20, 16, 5, 40] (stride (200, 4000, 1, 5)) A = [20, 4, 5, 40] (stride (800, 200, 40, 1)) dim = 1 2.374 -> 2.377 ( +0.13%) [ +0.08% +0.13% +0.00% / +0.13% +0.17% +0.29%] index_add_ linear : Elapsed 0.024 ms (2.376 ms / 100) 2.327 -> 2.330 ( +0.13%) [ +0.13% +0.17% +0.00% / +0.17% +0.26% +0.13%] index_copy_ linear : Elapsed 0.023 ms (2.330 ms / 100) 2.382 -> 2.380 ( -0.08%) [ +0.17% +0.08% +0.00% / +0.25% +0.04% -0.08%] index_add_ reverse : Elapsed 0.024 ms (2.386 ms / 100) 2.326 -> 2.335 ( +0.39%) [ +0.13% +0.26% +0.00% / +0.47% +0.43% +0.39%] index_copy_ reverse : Elapsed 0.023 ms (2.329 ms / 100) 2.376 -> 2.378 ( +0.08%) [ +0.13% +0.08% +0.00% / +0.21% +0.42% +0.08%] index_add_ spread : Elapsed 0.024 ms (2.379 ms / 100) 2.326 -> 2.332 ( +0.26%) [ +0.04% +0.30% +0.00% / +0.39% +0.56% +0.26%] index_copy_ spread : Elapsed 0.023 ms (2.327 ms / 100) 2.378 -> 2.380 ( +0.08%) [ +0.13% +0.17% +0.00% / +0.08% +0.08% +0.29%] index_add_ strided 3 : Elapsed 0.024 ms (2.381 ms / 100) 2.330 -> 2.331 ( +0.04%) [ +0.00% +0.09% +0.09% / +0.04% +0.26% +0.13%] index_copy_ strided 3 : Elapsed 0.023 ms (2.330 ms / 100) 2.380 -> 2.381 ( +0.04%) [ +0.25% +0.00% +0.00% / +0.08% +0.13% +0.04%] index_add_ strided 5 : Elapsed 0.024 ms (2.386 ms / 100) 2.326 -> 2.332 ( +0.26%) [ +0.13% +0.26% +0.00% / +0.26% +0.30% +0.47%] index_copy_ strided 5 : Elapsed 0.023 ms (2.329 ms / 100) 2.378 -> 2.383 ( +0.21%) [ +0.00% +0.08% +0.17% / +0.21% +0.25% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.378 ms / 100) 2.326 -> 2.335 ( +0.39%) [ +0.13% +0.00% +0.17% / +0.39% +0.43% +0.39%] index_copy_ strided 7 : Elapsed 0.023 ms (2.329 ms / 100) 2.375 -> 2.378 ( +0.13%) [ +0.13% +0.00% +0.04% / +0.13% +0.13% +0.13%] index_add_ perm : Elapsed 0.024 ms (2.378 ms / 100) 2.326 -> 2.330 ( +0.17%) [ +0.09% +0.00% +0.04% / +0.17% +0.30% +0.21%] index_copy_ perm : Elapsed 0.023 ms (2.328 ms / 100) 2.378 -> 2.375 ( -0.13%) [ +0.00% +0.00% +0.25% / -0.13% +0.04% +0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.378 ms / 100) 2.326 -> 2.332 ( +0.26%) [ +0.00% +0.30% +0.17% / +0.26% +0.34% +0.34%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.326 ms / 100) 4.797 -> 4.799 ( +0.04%) [ +0.10% +0.00% +0.06% / +0.04% +0.29% +0.13%] index_select const : Elapsed 0.048 ms (4.802 ms / 100) 4.839 -> 4.841 ( +0.04%) [ +0.00% +0.02% +0.14% / +0.04% +0.21% +0.14%] index_select wrap : Elapsed 0.048 ms (4.839 ms / 100) 4.835 -> 4.834 ( -0.02%) [ +0.06% +0.02% +0.00% / -0.02% +0.00% +0.25%] index_select linear : Elapsed 0.048 ms (4.838 ms / 100) 4.837 -> 4.841 ( +0.08%) [ +0.27% +0.43% +0.00% / +0.08% +0.35% +0.58%] index_select reverse : Elapsed 0.049 ms (4.850 ms / 100) 4.787 -> 4.791 ( +0.08%) [ +0.00% +0.02% +0.04% / +0.08% +0.19% +0.17%] index_select skip64 : Elapsed 0.048 ms (4.787 ms / 100) 4.771 -> 4.783 ( +0.25%) [ +0.19% +0.08% +0.00% / +0.25% +0.36% +0.40%] index_select skip256 : Elapsed 0.048 ms (4.780 ms / 100) 4.829 -> 4.837 ( +0.17%) [ +0.04% +0.00% +0.10% / +0.17% +0.23% +0.31%] index_select spread : Elapsed 0.048 ms (4.831 ms / 100) 4.848 -> 4.852 ( +0.08%) [ +0.00% +0.10% +0.02% / +0.08% +0.25% +0.35%] index_select strided 3 : Elapsed 0.048 ms (4.848 ms / 100) 4.822 -> 4.831 ( +0.19%) [ +0.02% +0.04% +0.00% / +0.19% +0.31% +0.44%] index_select random : Elapsed 0.048 ms (4.823 ms / 100) 4.837 -> 4.847 ( +0.21%) [ +0.04% +0.14% +0.00% / +0.21% +0.27% +0.48%] index_select random_sorted : Elapsed 0.048 ms (4.839 ms / 100) B = [20, 16, 5, 40] (stride (640, 1, 12800, 16)) A = [20, 4, 5, 40] (stride (800, 1, 160, 4)) dim = 1 2.343 -> 2.341 ( -0.09%) [ +0.17% +0.00% +0.13% / -0.09% +0.38% +0.38%] index_add_ linear : Elapsed 0.023 ms (2.347 ms / 100) 2.321 -> 2.330 ( +0.39%) [ +0.17% +0.00% +0.26% / +0.39% +0.43% +0.56%] index_copy_ linear : Elapsed 0.023 ms (2.325 ms / 100) 2.338 -> 2.344 ( +0.26%) [ +0.26% +0.30% +0.00% / +0.26% +0.60% +0.64%] index_add_ reverse : Elapsed 0.023 ms (2.344 ms / 100) 2.324 -> 2.327 ( +0.13%) [ +0.17% +0.09% +0.00% / +0.13% +0.13% +0.34%] index_copy_ reverse : Elapsed 0.023 ms (2.328 ms / 100) 2.376 -> 2.378 ( +0.08%) [ +0.29% +0.08% +0.00% / +0.08% +0.55% +0.59%] index_add_ spread : Elapsed 0.024 ms (2.383 ms / 100) 2.422 -> 2.427 ( +0.21%) [ +0.45% +0.04% +0.00% / +0.21% +0.50% +0.54%] index_copy_ spread : Elapsed 0.024 ms (2.433 ms / 100) 2.394 -> 2.397 ( +0.13%) [ +0.04% +0.17% +0.00% / +0.13% +0.38% +0.29%] index_add_ strided 3 : Elapsed 0.024 ms (2.395 ms / 100) 2.435 -> 2.437 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.62% +0.49%] index_copy_ strided 3 : Elapsed 0.024 ms (2.435 ms / 100) 2.393 -> 2.399 ( +0.25%) [ +0.29% +0.17% +0.00% / +0.33% +0.25% +0.25%] index_add_ strided 5 : Elapsed 0.024 ms (2.400 ms / 100) 2.434 -> 2.441 ( +0.29%) [ +0.12% +0.00% +0.08% / +0.29% +0.53% +0.58%] index_copy_ strided 5 : Elapsed 0.024 ms (2.437 ms / 100) 2.381 -> 2.378 ( -0.13%) [ +0.08% +0.00% +0.00% / -0.13% +0.04% +0.34%] index_add_ strided 7 : Elapsed 0.024 ms (2.383 ms / 100) 2.424 -> 2.427 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.37% +0.45%] index_copy_ strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.385 -> 2.388 ( +0.13%) [ +0.08% +0.08% +0.00% / +0.13% +0.21% +0.21%] index_add_ perm : Elapsed 0.024 ms (2.387 ms / 100) 2.422 -> 2.425 ( +0.12%) [ +0.00% +0.17% +0.04% / +0.12% +0.50% +0.58%] index_copy_ perm : Elapsed 0.024 ms (2.422 ms / 100) 2.385 -> 2.388 ( +0.13%) [ +0.00% +0.21% +0.17% / +0.13% +0.17% +0.17%] index_add_ perm_sorted : Elapsed 0.024 ms (2.385 ms / 100) 2.425 -> 2.425 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.54% +0.21%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.425 ms / 100) 4.930 -> 4.930 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.39% +0.43%] index_select const : Elapsed 0.049 ms (4.932 ms / 100) 4.931 -> 4.938 ( +0.14%) [ +0.00% +0.06% +0.02% / +0.14% +0.45% +0.45%] index_select wrap : Elapsed 0.049 ms (4.931 ms / 100) 4.926 -> 4.929 ( +0.06%) [ +0.00% +0.00% +0.02% / +0.06% +0.43% +0.49%] index_select linear : Elapsed 0.049 ms (4.926 ms / 100) 4.941 -> 4.943 ( +0.04%) [ +0.00% +0.12% +0.02% / +0.04% +0.61% +0.59%] index_select reverse : Elapsed 0.049 ms (4.941 ms / 100) 4.944 -> 4.946 ( +0.04%) [ +0.06% +0.00% +0.02% / +0.04% +0.32% +0.44%] index_select skip64 : Elapsed 0.049 ms (4.947 ms / 100) 4.926 -> 4.928 ( +0.04%) [ +0.06% +0.02% +0.00% / +0.04% +0.35% +0.30%] index_select skip256 : Elapsed 0.049 ms (4.929 ms / 100) 4.930 -> 4.933 ( +0.06%) [ +0.10% +0.00% +0.08% / +0.06% +0.34% +0.41%] index_select spread : Elapsed 0.049 ms (4.935 ms / 100) 4.923 -> 4.928 ( +0.10%) [ +0.18% +0.14% +0.00% / +0.10% +0.43% +0.39%] index_select strided 3 : Elapsed 0.049 ms (4.932 ms / 100) 4.940 -> 4.950 ( +0.20%) [ +0.10% +0.08% +0.00% / +0.20% +0.38% +0.43%] index_select random : Elapsed 0.049 ms (4.945 ms / 100) 4.928 -> 4.931 ( +0.06%) [ +0.00% +0.12% +0.04% / +0.06% +0.41% +0.43%] index_select random_sorted : Elapsed 0.049 ms (4.928 ms / 100) B = [20, 16, 5, 40] (stride (40, 800, 12800, 1)) A = [20, 4, 5, 40] (stride (5, 100, 1, 400)) dim = 1 2.603 -> 2.609 ( +0.23%) [ +0.00% +0.27% +0.38% / +0.27% +0.23% +0.38%] index_add_ linear : Elapsed 0.026 ms (2.603 ms / 100) 2.544 -> 2.551 ( +0.28%) [ +0.28% +0.00% +0.04% / +0.31% +0.35% +0.28%] index_copy_ linear : Elapsed 0.026 ms (2.551 ms / 100) 2.605 -> 2.608 ( +0.12%) [ +0.15% +0.04% +0.00% / +0.19% +0.12% +0.31%] index_add_ reverse : Elapsed 0.026 ms (2.609 ms / 100) 2.544 -> 2.546 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.08% +0.16%] index_copy_ reverse : Elapsed 0.025 ms (2.547 ms / 100) 2.608 -> 2.606 ( -0.08%) [ +0.00% +0.04% +0.04% / +0.04% -0.08% +0.15%] index_add_ spread : Elapsed 0.026 ms (2.608 ms / 100) 2.541 -> 2.543 ( +0.08%) [ +0.16% +0.00% +0.04% / +0.24% +0.08% +0.08%] index_copy_ spread : Elapsed 0.025 ms (2.545 ms / 100) 2.604 -> 2.608 ( +0.15%) [ +0.23% +0.08% +0.00% / +0.19% +0.15% +0.23%] index_add_ strided 3 : Elapsed 0.026 ms (2.610 ms / 100) 2.544 -> 2.542 ( -0.08%) [ +0.00% +0.00% +0.04% / +0.16% +0.04% -0.08%] index_copy_ strided 3 : Elapsed 0.025 ms (2.544 ms / 100) 2.609 -> 2.607 ( -0.08%) [ +0.08% +0.00% +0.08% / +0.19% -0.08% +0.11%] index_add_ strided 5 : Elapsed 0.026 ms (2.611 ms / 100) 2.544 -> 2.544 ( +0.00%) [ +0.00% +0.08% +0.28% / +0.28% +0.00% +0.08%] index_copy_ strided 5 : Elapsed 0.025 ms (2.544 ms / 100) 2.606 -> 2.608 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.27% +0.08% +0.19%] index_add_ strided 7 : Elapsed 0.026 ms (2.610 ms / 100) 2.541 -> 2.546 ( +0.20%) [ +0.24% +0.12% +0.00% / +0.35% +0.24% +0.20%] index_copy_ strided 7 : Elapsed 0.025 ms (2.547 ms / 100) 2.606 -> 2.611 ( +0.19%) [ +0.15% +0.15% +0.00% / +0.19% +0.31% +0.35%] index_add_ perm : Elapsed 0.026 ms (2.610 ms / 100) 2.545 -> 2.549 ( +0.16%) [ +0.12% +0.00% +0.04% / +0.16% +0.28% +0.16%] index_copy_ perm : Elapsed 0.025 ms (2.548 ms / 100) 2.604 -> 2.606 ( +0.08%) [ +0.08% +0.00% +0.15% / +0.08% +0.27% +0.27%] index_add_ perm_sorted : Elapsed 0.026 ms (2.606 ms / 100) 2.544 -> 2.545 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.08% +0.04% +0.08%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.545 ms / 100) 5.929 -> 5.934 ( +0.08%) [ +0.03% +0.19% +0.00% / +0.08% +0.49% +0.32%] index_select const : Elapsed 0.059 ms (5.931 ms / 100) 5.935 -> 5.941 ( +0.10%) [ +0.03% +0.15% +0.00% / +0.10% +0.42% +0.20%] index_select wrap : Elapsed 0.059 ms (5.937 ms / 100) 5.933 -> 5.935 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.25% +0.24%] index_select linear : Elapsed 0.059 ms (5.936 ms / 100) 5.933 -> 5.936 ( +0.05%) [ +0.03% +0.02% +0.00% / +0.05% +0.22% +0.29%] index_select reverse : Elapsed 0.059 ms (5.935 ms / 100) 5.935 -> 5.941 ( +0.10%) [ +0.00% +0.15% +0.13% / +0.10% +0.30% +0.24%] index_select skip64 : Elapsed 0.059 ms (5.935 ms / 100) 5.946 -> 5.949 ( +0.05%) [ +0.13% +0.00% +0.05% / +0.05% +0.34% +0.40%] index_select skip256 : Elapsed 0.060 ms (5.954 ms / 100) 5.931 -> 5.930 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.29% +0.40%] index_select spread : Elapsed 0.059 ms (5.931 ms / 100) 5.926 -> 5.936 ( +0.17%) [ +0.02% +0.07% +0.00% / +0.19% +0.17% +0.24%] index_select strided 3 : Elapsed 0.059 ms (5.927 ms / 100) 5.930 -> 5.932 ( +0.03%) [ +0.07% +0.15% +0.00% / +0.03% +0.29% +0.29%] index_select random : Elapsed 0.059 ms (5.934 ms / 100) 5.929 -> 5.929 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.20% +0.37%] index_select random_sorted : Elapsed 0.059 ms (5.929 ms / 100) B = [20, 16, 5, 40] (stride (1, 800, 12800, 20)) A = [20, 4, 5, 40] (stride (20, 1, 4, 400)) dim = 1 2.627 -> 2.632 ( +0.19%) [ +0.08% +0.19% +0.00% / +0.19% +0.72% +0.65%] index_add_ linear : Elapsed 0.026 ms (2.629 ms / 100) 2.573 -> 2.578 ( +0.19%) [ +0.04% +0.08% +0.00% / +0.19% +0.62% +0.47%] index_copy_ linear : Elapsed 0.026 ms (2.574 ms / 100) 2.624 -> 2.623 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.76% +0.76%] index_add_ reverse : Elapsed 0.026 ms (2.625 ms / 100) 2.567 -> 2.572 ( +0.19%) [ +0.19% +0.00% +0.16% / +0.19% +0.93% +0.66%] index_copy_ reverse : Elapsed 0.026 ms (2.572 ms / 100) 2.623 -> 2.624 ( +0.04%) [ +0.15% +0.00% +0.08% / +0.04% +0.61% +0.69%] index_add_ spread : Elapsed 0.026 ms (2.627 ms / 100) 2.570 -> 2.577 ( +0.27%) [ +0.12% +0.12% +0.00% / +0.27% +0.51% +0.66%] index_copy_ spread : Elapsed 0.026 ms (2.573 ms / 100) 2.625 -> 2.626 ( +0.04%) [ +0.00% +0.00% +0.11% / +0.04% +0.57% +0.46%] index_add_ strided 3 : Elapsed 0.026 ms (2.625 ms / 100) 2.571 -> 2.572 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.62% +0.43%] index_copy_ strided 3 : Elapsed 0.026 ms (2.573 ms / 100) 2.628 -> 2.634 ( +0.23%) [ +0.11% +0.15% +0.00% / +0.23% +0.46% +0.30%] index_add_ strided 5 : Elapsed 0.026 ms (2.631 ms / 100) 2.575 -> 2.579 ( +0.16%) [ +0.04% +0.00% +0.08% / +0.16% +0.19% +0.35%] index_copy_ strided 5 : Elapsed 0.026 ms (2.576 ms / 100) 2.625 -> 2.626 ( +0.04%) [ +0.08% +0.15% +0.00% / +0.04% +0.53% +0.65%] index_add_ strided 7 : Elapsed 0.026 ms (2.627 ms / 100) 2.571 -> 2.575 ( +0.16%) [ +0.12% +0.00% +0.08% / +0.16% +0.54% +0.51%] index_copy_ strided 7 : Elapsed 0.026 ms (2.574 ms / 100) 2.625 -> 2.626 ( +0.04%) [ +0.00% +0.11% +0.11% / +0.04% +0.65% +0.46%] index_add_ perm : Elapsed 0.026 ms (2.625 ms / 100) 2.570 -> 2.573 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.58% +0.39%] index_copy_ perm : Elapsed 0.026 ms (2.573 ms / 100) 2.630 -> 2.629 ( -0.04%) [ +0.04% +0.15% +0.00% / -0.04% +0.42% +0.15%] index_add_ perm_sorted : Elapsed 0.026 ms (2.631 ms / 100) 2.572 -> 2.578 ( +0.23%) [ +0.04% +0.19% +0.00% / +0.23% +0.43% +0.35%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.573 ms / 100) 6.018 -> 6.020 ( +0.03%) [ +0.02% +0.00% +0.02% / +0.03% +0.50% +0.43%] index_select const : Elapsed 0.060 ms (6.019 ms / 100) 6.009 -> 6.008 ( -0.02%) [ +0.00% +0.07% +0.05% / -0.02% +0.48% +0.37%] index_select wrap : Elapsed 0.060 ms (6.009 ms / 100) 6.008 -> 6.015 ( +0.12%) [ +0.00% +0.00% +0.02% / +0.12% +0.47% +0.28%] index_select linear : Elapsed 0.060 ms (6.008 ms / 100) 6.002 -> 6.003 ( +0.02%) [ +0.03% +0.00% +0.00% / +0.02% +0.47% +0.48%] index_select reverse : Elapsed 0.060 ms (6.004 ms / 100) 6.003 -> 6.002 ( -0.02%) [ +0.10% +0.00% +0.02% / -0.02% +0.38% +0.33%] index_select skip64 : Elapsed 0.060 ms (6.009 ms / 100) 6.008 -> 6.012 ( +0.07%) [ +0.00% +0.03% +0.03% / +0.07% +0.37% +0.43%] index_select skip256 : Elapsed 0.060 ms (6.008 ms / 100) 6.011 -> 6.012 ( +0.02%) [ +0.10% +0.02% +0.00% / +0.02% +0.28% +0.25%] index_select spread : Elapsed 0.060 ms (6.017 ms / 100) 6.009 -> 6.016 ( +0.12%) [ +0.10% +0.03% +0.00% / +0.12% +0.35% +0.28%] index_select strided 3 : Elapsed 0.060 ms (6.015 ms / 100) 6.004 -> 5.998 ( -0.10%) [ +0.07% +0.02% +0.00% / -0.10% +0.32% +0.33%] index_select random : Elapsed 0.060 ms (6.008 ms / 100) 6.020 -> 6.023 ( +0.05%) [ +0.03% +0.07% +0.00% / +0.05% +0.30% +0.33%] index_select random_sorted : Elapsed 0.060 ms (6.022 ms / 100) B = [20, 16, 5, 40] (stride (1, 100, 20, 1600)) dim = 1 fill_cnt = 4 1.336 -> 1.335 ( -0.07%) [ +0.00% +0.00% +0.00% / +0.15% -0.07% +0.00%] index_fill_ const : Elapsed 0.013 ms (1.336 ms / 100) 1.357 -> 1.355 ( -0.15%) [ +0.00% +0.29% +0.15% / -0.15% +0.88% +0.52%] index_fill_ linear : Elapsed 0.014 ms (1.357 ms / 100) 1.358 -> 1.361 ( +0.22%) [ +0.00% +0.22% +0.07% / +0.22% +0.59% +0.52%] index_fill_ reverse : Elapsed 0.014 ms (1.358 ms / 100) 1.336 -> 1.334 ( -0.15%) [ +0.15% +0.07% +0.00% / -0.15% +0.00% +0.07%] index_fill_ skip64 : Elapsed 0.013 ms (1.338 ms / 100) 1.334 -> 1.338 ( +0.30%) [ +0.37% +0.00% +0.22% / +0.37% +0.30% +0.30%] index_fill_ skip256 : Elapsed 0.013 ms (1.339 ms / 100) 1.368 -> 1.364 ( -0.29%) [ +0.00% +0.15% +0.07% / +0.15% -0.15% -0.29%] index_fill_ spread : Elapsed 0.014 ms (1.368 ms / 100) 1.361 -> 1.368 ( +0.51%) [ +0.15% +0.00% +0.29% / +1.10% +0.51% +0.66%] index_fill_ strided 3 : Elapsed 0.014 ms (1.363 ms / 100) 1.362 -> 1.360 ( -0.15%) [ +0.00% +0.22% +0.29% / +0.07% -0.15% +0.00%] index_fill_ strided 5 : Elapsed 0.014 ms (1.362 ms / 100) 1.358 -> 1.357 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +1.25% +1.10%] index_fill_ strided 7 : Elapsed 0.014 ms (1.358 ms / 100) 1.345 -> 1.348 ( +0.22%) [ +0.00% +0.07% +0.22% / +0.22% +0.37% +0.52%] index_fill_ strided 8 : Elapsed 0.013 ms (1.345 ms / 100) 1.367 -> 1.365 ( -0.15%) [ +0.29% +0.00% +0.22% / +0.44% +0.00% -0.15%] index_fill_ random : Elapsed 0.014 ms (1.371 ms / 100) 1.366 -> 1.361 ( -0.37%) [ +0.15% +0.07% +0.00% / +0.00% -0.37% -0.22%] index_fill_ random_sorted : Elapsed 0.014 ms (1.368 ms / 100) 1.361 -> 1.363 ( +0.15%) [ +0.00% +0.00% +0.29% / +0.15% +0.44% +0.22%] index_fill_ perm : Elapsed 0.014 ms (1.361 ms / 100) 1.358 -> 1.360 ( +0.15%) [ +0.37% +0.37% +0.00% / +0.15% +0.59% +0.22%] index_fill_ perm_sorted : Elapsed 0.014 ms (1.363 ms / 100) B = [20, 16, 5, 40] (stride (1, 20, 320, 1600)) A = [20, 4, 5, 40] (stride (1, 4000, 800, 20)) dim = 1 2.413 -> 2.418 ( +0.21%) [ +0.08% +0.00% +0.08% / +0.21% +0.41% +0.29%] index_add_ linear : Elapsed 0.024 ms (2.415 ms / 100) 2.363 -> 2.371 ( +0.34%) [ +0.00% +0.13% +0.13% / +0.34% +0.38% +0.42%] index_copy_ linear : Elapsed 0.024 ms (2.363 ms / 100) 2.416 -> 2.419 ( +0.12%) [ +0.00% +0.25% +0.21% / +0.12% +0.29% +0.41%] index_add_ reverse : Elapsed 0.024 ms (2.416 ms / 100) 2.367 -> 2.371 ( +0.17%) [ +0.17% +0.04% +0.00% / +0.17% +0.25% +0.25%] index_copy_ reverse : Elapsed 0.024 ms (2.371 ms / 100) 2.405 -> 2.401 ( -0.17%) [ +0.12% +0.00% +0.12% / +0.17% -0.17% -0.08%] index_add_ spread : Elapsed 0.024 ms (2.408 ms / 100) 2.367 -> 2.361 ( -0.25%) [ +0.00% +0.13% +0.13% / +0.00% -0.25% -0.08%] index_copy_ spread : Elapsed 0.024 ms (2.367 ms / 100) 2.408 -> 2.413 ( +0.21%) [ +0.00% +0.12% +0.12% / +0.21% +0.42% +0.58%] index_add_ strided 3 : Elapsed 0.024 ms (2.408 ms / 100) 2.370 -> 2.372 ( +0.08%) [ +0.00% +0.04% +0.17% / +0.34% +0.30% +0.08%] index_copy_ strided 3 : Elapsed 0.024 ms (2.370 ms / 100) 2.416 -> 2.415 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.33% +0.33%] index_add_ strided 5 : Elapsed 0.024 ms (2.418 ms / 100) 2.380 -> 2.385 ( +0.21%) [ +0.25% +0.00% +0.21% / +0.21% +0.38% +0.38%] index_copy_ strided 5 : Elapsed 0.024 ms (2.386 ms / 100) 2.409 -> 2.407 ( -0.08%) [ +0.29% +0.04% +0.00% / -0.08% +0.29% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.416 ms / 100) 2.372 -> 2.374 ( +0.08%) [ +0.25% +0.00% +0.13% / +0.08% +0.25% +0.17%] index_copy_ strided 7 : Elapsed 0.024 ms (2.378 ms / 100) 2.409 -> 2.410 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.42% +0.17%] index_add_ perm : Elapsed 0.024 ms (2.409 ms / 100) 2.364 -> 2.375 ( +0.47%) [ +0.08% +0.34% +0.00% / +0.47% +0.76% +0.68%] index_copy_ perm : Elapsed 0.024 ms (2.366 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.00% +0.00% +0.08% / +0.04% +0.33% +0.17%] index_add_ perm_sorted : Elapsed 0.024 ms (2.410 ms / 100) 2.373 -> 2.379 ( +0.25%) [ +0.21% +0.21% +0.00% / +0.25% +0.29% +0.29%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.378 ms / 100) 5.286 -> 5.289 ( +0.06%) [ +0.00% +0.08% +0.06% / +0.06% +0.19% +0.28%] index_select const : Elapsed 0.053 ms (5.286 ms / 100) 5.315 -> 5.322 ( +0.13%) [ +0.00% +0.02% +0.06% / +0.13% +0.21% +0.24%] index_select wrap : Elapsed 0.053 ms (5.315 ms / 100) 5.290 -> 5.301 ( +0.21%) [ +0.02% +0.11% +0.00% / +0.21% +0.30% +0.34%] index_select linear : Elapsed 0.053 ms (5.291 ms / 100) 5.353 -> 5.354 ( +0.02%) [ +0.09% +0.00% +0.11% / +0.02% +0.35% +0.32%] index_select reverse : Elapsed 0.054 ms (5.358 ms / 100) 5.286 -> 5.285 ( -0.02%) [ +0.00% +0.00% +0.08% / -0.02% +0.30% +0.26%] index_select skip64 : Elapsed 0.053 ms (5.286 ms / 100) 5.277 -> 5.273 ( -0.08%) [ +0.04% +0.06% +0.00% / -0.08% +0.19% +0.25%] index_select skip256 : Elapsed 0.053 ms (5.279 ms / 100) 5.298 -> 5.304 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.47% +0.40%] index_select spread : Elapsed 0.053 ms (5.304 ms / 100) 5.346 -> 5.344 ( -0.04%) [ +0.00% +0.13% +0.04% / -0.04% +0.26% +0.22%] index_select strided 3 : Elapsed 0.053 ms (5.346 ms / 100) 5.292 -> 5.298 ( +0.11%) [ +0.09% +0.00% +0.08% / +0.11% +0.36% +0.43%] index_select random : Elapsed 0.053 ms (5.297 ms / 100) 5.346 -> 5.349 ( +0.06%) [ +0.00% +0.15% +0.00% / +0.06% +0.24% +0.30%] index_select random_sorted : Elapsed 0.053 ms (5.346 ms / 100) out_shape = [20, 4, 16, 40] in_shape = [20, 4, 5, 40] idx_dim = 2 B = [20, 4, 16, 40] (stride (2560, 1, 160, 4)) A = [20, 4, 5, 40] (stride (800, 5, 1, 20)) dim = 2 2.467 -> 2.464 ( -0.12%) [ +0.00% +0.00% +0.04% / -0.12% +0.41% +0.57%] index_add_ linear : Elapsed 0.025 ms (2.467 ms / 100) 2.389 -> 2.388 ( -0.04%) [ +0.08% +0.00% +0.17% / -0.04% +0.75% +0.63%] index_copy_ linear : Elapsed 0.024 ms (2.391 ms / 100) 2.461 -> 2.459 ( -0.08%) [ +0.12% +0.00% +0.20% / -0.08% +0.81% +0.77%] index_add_ reverse : Elapsed 0.025 ms (2.464 ms / 100) 2.382 -> 2.393 ( +0.46%) [ +0.08% +0.00% +0.34% / +0.46% +1.01% +1.13%] index_copy_ reverse : Elapsed 0.024 ms (2.384 ms / 100) 2.473 -> 2.482 ( +0.36%) [ +0.12% +0.24% +0.00% / +0.36% +0.93% +0.97%] index_add_ spread : Elapsed 0.025 ms (2.476 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.79% +0.79%] index_copy_ spread : Elapsed 0.024 ms (2.406 ms / 100) 2.471 -> 2.471 ( +0.00%) [ +0.00% +0.16% +0.32% / +0.00% +0.85% +0.85%] index_add_ strided 3 : Elapsed 0.025 ms (2.471 ms / 100) 2.402 -> 2.404 ( +0.08%) [ +0.00% +0.12% +0.04% / +0.08% +0.62% +0.79%] index_copy_ strided 3 : Elapsed 0.024 ms (2.402 ms / 100) 2.460 -> 2.462 ( +0.08%) [ +0.20% +0.00% +0.20% / +0.08% +1.06% +0.98%] index_add_ strided 5 : Elapsed 0.025 ms (2.465 ms / 100) 2.383 -> 2.395 ( +0.50%) [ +0.17% +0.25% +0.00% / +0.50% +1.05% +1.09%] index_copy_ strided 5 : Elapsed 0.024 ms (2.387 ms / 100) 2.465 -> 2.462 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.93% +0.69%] index_add_ strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.391 -> 2.387 ( -0.17%) [ +0.13% +0.00% +0.00% / -0.17% +0.84% +0.92%] index_copy_ strided 7 : Elapsed 0.024 ms (2.394 ms / 100) 2.481 -> 2.477 ( -0.16%) [ +0.00% +0.04% +0.00% / -0.16% +0.36% +0.52%] index_add_ perm : Elapsed 0.025 ms (2.481 ms / 100) 2.404 -> 2.405 ( +0.04%) [ +0.25% +0.04% +0.00% / +0.04% +0.67% +0.62%] index_copy_ perm : Elapsed 0.024 ms (2.410 ms / 100) 2.479 -> 2.481 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.73% +0.85%] index_add_ perm_sorted : Elapsed 0.025 ms (2.481 ms / 100) 2.403 -> 2.417 ( +0.58%) [ +0.00% +0.21% +0.04% / +0.58% +0.79% +0.87%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) 5.338 -> 5.339 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.86% +0.84%] index_select const : Elapsed 0.053 ms (5.338 ms / 100) 5.315 -> 5.313 ( -0.04%) [ +0.00% +0.15% +0.06% / -0.04% +1.22% +0.87%] index_select wrap : Elapsed 0.053 ms (5.315 ms / 100) 5.370 -> 5.379 ( +0.17%) [ +0.11% +0.00% +0.09% / +0.17% +0.76% +0.73%] index_select linear : Elapsed 0.054 ms (5.376 ms / 100) 5.356 -> 5.362 ( +0.11%) [ +0.07% +0.00% +0.28% / +0.11% +0.97% +0.88%] index_select reverse : Elapsed 0.054 ms (5.360 ms / 100) 5.315 -> 5.316 ( +0.02%) [ +0.00% +0.02% +0.09% / +0.02% +1.17% +0.96%] index_select skip64 : Elapsed 0.053 ms (5.315 ms / 100) 5.337 -> 5.344 ( +0.13%) [ +0.00% +0.00% +0.09% / +0.13% +0.96% +0.88%] index_select skip256 : Elapsed 0.053 ms (5.337 ms / 100) 5.353 -> 5.361 ( +0.15%) [ +0.00% +0.13% +0.26% / +0.15% +1.05% +0.92%] index_select spread : Elapsed 0.054 ms (5.353 ms / 100) 5.371 -> 5.373 ( +0.04%) [ +0.09% +0.06% +0.00% / +0.04% +0.78% +0.67%] index_select strided 3 : Elapsed 0.054 ms (5.376 ms / 100) 5.357 -> 5.358 ( +0.02%) [ +0.00% +0.15% +0.21% / +0.02% +0.97% +0.88%] index_select random : Elapsed 0.054 ms (5.357 ms / 100) 5.338 -> 5.342 ( +0.07%) [ +0.00% +0.04% +0.07% / +0.07% +0.82% +0.94%] index_select random_sorted : Elapsed 0.053 ms (5.338 ms / 100) B = [20, 4, 16, 40] (stride (640, 12800, 1, 16)) A = [20, 4, 5, 40] (stride (4, 1, 3200, 80)) dim = 2 2.420 -> 2.419 ( -0.04%) [ +0.00% +0.17% +0.00% / -0.04% +0.08% +0.04%] index_add_ linear : Elapsed 0.024 ms (2.420 ms / 100) 2.361 -> 2.360 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.04% +0.04%] index_copy_ linear : Elapsed 0.024 ms (2.361 ms / 100) 2.422 -> 2.422 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.33% +0.08% +0.00%] index_add_ reverse : Elapsed 0.024 ms (2.423 ms / 100) 2.361 -> 2.365 ( +0.17%) [ +0.00% +0.21% +0.04% / +0.21% +0.17% +0.17%] index_copy_ reverse : Elapsed 0.024 ms (2.361 ms / 100) 2.447 -> 2.449 ( +0.08%) [ +0.25% +0.20% +0.00% / +0.20% +0.08% +0.08%] index_add_ spread : Elapsed 0.025 ms (2.453 ms / 100) 2.426 -> 2.426 ( +0.00%) [ +0.21% +0.00% +0.12% / +0.16% +0.12% +0.00%] index_copy_ spread : Elapsed 0.024 ms (2.431 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.12% +0.20% +0.00% / +0.12% +0.04% +0.08%] index_add_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.428 -> 2.423 ( -0.21%) [ +0.00% +0.29% +0.04% / +0.21% -0.04% -0.21%] index_copy_ strided 3 : Elapsed 0.024 ms (2.428 ms / 100) 2.451 -> 2.446 ( -0.20%) [ +0.04% +0.00% +0.00% / +0.00% +0.16% -0.20%] index_add_ strided 5 : Elapsed 0.025 ms (2.452 ms / 100) 2.427 -> 2.423 ( -0.16%) [ +0.00% +0.08% +0.16% / +0.12% +0.04% -0.16%] index_copy_ strided 5 : Elapsed 0.024 ms (2.427 ms / 100) 2.452 -> 2.449 ( -0.12%) [ +0.04% +0.00% +0.00% / +0.08% -0.12% -0.04%] index_add_ strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.424 -> 2.423 ( -0.04%) [ +0.08% +0.21% +0.00% / +0.29% +0.04% -0.04%] index_copy_ strided 7 : Elapsed 0.024 ms (2.426 ms / 100) 2.452 -> 2.450 ( -0.08%) [ +0.24% +0.00% +0.04% / +0.12% +0.04% -0.08%] index_add_ perm : Elapsed 0.025 ms (2.458 ms / 100) 2.426 -> 2.422 ( -0.16%) [ +0.08% +0.00% +0.12% / +0.33% -0.12% -0.16%] index_copy_ perm : Elapsed 0.024 ms (2.428 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.08% +0.00% +0.20%] index_add_ perm_sorted : Elapsed 0.024 ms (2.450 ms / 100) 2.425 -> 2.423 ( -0.08%) [ +0.12% +0.00% +0.04% / +0.21% -0.08% +0.00%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.428 ms / 100) 5.177 -> 5.175 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.44% +0.48%] index_select const : Elapsed 0.052 ms (5.181 ms / 100) 5.187 -> 5.190 ( +0.06%) [ +0.08% +0.00% +0.08% / +0.06% +0.29% +0.19%] index_select wrap : Elapsed 0.052 ms (5.191 ms / 100) 5.207 -> 5.198 ( -0.17%) [ +0.06% +0.00% +0.00% / -0.17% +0.19% +0.33%] index_select linear : Elapsed 0.052 ms (5.210 ms / 100) 5.180 -> 5.187 ( +0.14%) [ +0.06% +0.00% +0.04% / +0.14% +0.69% +0.52%] index_select reverse : Elapsed 0.052 ms (5.183 ms / 100) 5.166 -> 5.176 ( +0.19%) [ +0.08% +0.00% +0.23% / +0.19% +0.60% +0.31%] index_select skip64 : Elapsed 0.052 ms (5.170 ms / 100) 5.164 -> 5.168 ( +0.08%) [ +0.06% +0.27% +0.00% / +0.08% +0.60% +0.64%] index_select skip256 : Elapsed 0.052 ms (5.167 ms / 100) 5.181 -> 5.187 ( +0.12%) [ +0.14% +0.00% +0.08% / +0.12% +0.42% +0.42%] index_select spread : Elapsed 0.052 ms (5.188 ms / 100) 5.183 -> 5.185 ( +0.04%) [ +0.06% +0.10% +0.00% / +0.04% +0.41% +0.35%] index_select strided 3 : Elapsed 0.052 ms (5.186 ms / 100) 5.167 -> 5.168 ( +0.02%) [ +0.02% +0.06% +0.00% / +0.02% +0.56% +0.58%] index_select random : Elapsed 0.052 ms (5.168 ms / 100) 5.177 -> 5.184 ( +0.14%) [ +0.08% +0.14% +0.00% / +0.14% +0.70% +0.75%] index_select random_sorted : Elapsed 0.052 ms (5.181 ms / 100) B = [20, 4, 16, 40] (stride (16, 12800, 1, 320)) A = [20, 4, 5, 40] (stride (4, 1, 80, 400)) dim = 2 2.435 -> 2.436 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.37% +0.53%] index_add_ linear : Elapsed 0.024 ms (2.437 ms / 100) 2.390 -> 2.388 ( -0.08%) [ +0.13% +0.00% +0.04% / -0.08% +0.38% +0.50%] index_copy_ linear : Elapsed 0.024 ms (2.393 ms / 100) 2.431 -> 2.430 ( -0.04%) [ +0.33% +0.16% +0.00% / -0.04% +0.58% +0.62%] index_add_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.386 -> 2.387 ( +0.04%) [ +0.00% +0.13% +0.04% / +0.04% +0.59% +0.59%] index_copy_ reverse : Elapsed 0.024 ms (2.386 ms / 100) 2.463 -> 2.468 ( +0.20%) [ +0.12% +0.24% +0.00% / +0.20% +0.41% +0.20%] index_add_ spread : Elapsed 0.025 ms (2.466 ms / 100) 2.451 -> 2.456 ( +0.20%) [ +0.00% +0.00% +0.00% / +0.20% +0.41% +0.53%] index_copy_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.00% +0.12% +0.45%] index_add_ strided 3 : Elapsed 0.025 ms (2.465 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.41% +0.57%] index_copy_ strided 3 : Elapsed 0.025 ms (2.452 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.00% +0.08% +0.00% / +0.12% +0.45% +0.57%] index_add_ strided 5 : Elapsed 0.025 ms (2.461 ms / 100) 2.450 -> 2.463 ( +0.53%) [ +0.00% +0.00% +0.00% / +0.53% +0.65% +0.78%] index_copy_ strided 5 : Elapsed 0.024 ms (2.450 ms / 100) 2.465 -> 2.468 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.12% +0.45% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.453 -> 2.459 ( +0.24%) [ +0.04% +0.00% +0.00% / +0.24% +0.49% +0.61%] index_copy_ strided 7 : Elapsed 0.025 ms (2.454 ms / 100) 2.462 -> 2.466 ( +0.16%) [ +0.00% +0.28% +0.12% / +0.16% +0.45% +0.41%] index_add_ perm : Elapsed 0.025 ms (2.462 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.20% +0.25% +0.00% / +0.12% +0.45% +0.57%] index_copy_ perm : Elapsed 0.025 ms (2.453 ms / 100) 2.463 -> 2.460 ( -0.12%) [ +0.16% +0.00% +0.04% / -0.12% +0.37% +0.45%] index_add_ perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.24% +0.00% +0.00% / +0.12% +0.41% +0.61%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.457 ms / 100) 5.233 -> 5.239 ( +0.11%) [ +0.21% +0.00% +0.06% / +0.11% +0.46% +0.54%] index_select const : Elapsed 0.052 ms (5.244 ms / 100) 5.223 -> 5.238 ( +0.29%) [ +0.19% +0.13% +0.00% / +0.29% +0.77% +0.80%] index_select wrap : Elapsed 0.052 ms (5.233 ms / 100) 5.270 -> 5.273 ( +0.06%) [ +0.19% +0.00% +0.04% / +0.06% +0.55% +0.61%] index_select linear : Elapsed 0.053 ms (5.280 ms / 100) 5.261 -> 5.251 ( -0.19%) [ +0.08% +0.00% +0.13% / -0.19% +0.51% +0.40%] index_select reverse : Elapsed 0.053 ms (5.265 ms / 100) 5.230 -> 5.231 ( +0.02%) [ +0.00% +0.02% +0.10% / +0.02% +0.65% +0.50%] index_select skip64 : Elapsed 0.052 ms (5.230 ms / 100) 5.228 -> 5.232 ( +0.08%) [ +0.00% +0.04% +0.13% / +0.08% +0.54% +0.59%] index_select skip256 : Elapsed 0.052 ms (5.228 ms / 100) 5.212 -> 5.217 ( +0.10%) [ +0.12% +0.08% +0.00% / +0.10% +0.67% +0.75%] index_select spread : Elapsed 0.052 ms (5.218 ms / 100) 5.208 -> 5.210 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.04% +0.65% +0.63%] index_select strided 3 : Elapsed 0.052 ms (5.215 ms / 100) 5.210 -> 5.215 ( +0.10%) [ +0.13% +0.06% +0.00% / +0.10% +0.75% +0.79%] index_select random : Elapsed 0.052 ms (5.217 ms / 100) 5.219 -> 5.222 ( +0.06%) [ +0.06% +0.13% +0.00% / +0.06% +0.86% +0.98%] index_select random_sorted : Elapsed 0.052 ms (5.222 ms / 100) B = [20, 4, 16, 40] (stride (4, 1, 3200, 80)) A = [20, 4, 5, 40] (stride (800, 1, 4, 20)) dim = 2 2.338 -> 2.341 ( +0.13%) [ +0.04% +0.09% +0.00% / +0.17% +0.13% +0.47%] index_add_ linear : Elapsed 0.023 ms (2.339 ms / 100) 2.279 -> 2.279 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.26% +0.00% +0.04%] index_copy_ linear : Elapsed 0.023 ms (2.280 ms / 100) 2.326 -> 2.321 ( -0.21%) [ +0.00% +0.21% +0.17% / +0.17% -0.04% -0.21%] index_add_ reverse : Elapsed 0.023 ms (2.326 ms / 100) 2.277 -> 2.276 ( -0.04%) [ +0.09% +0.00% +0.13% / +0.35% -0.04% +0.00%] index_copy_ reverse : Elapsed 0.023 ms (2.279 ms / 100) 2.335 -> 2.333 ( -0.09%) [ +0.09% +0.00% +0.13% / +0.04% -0.04% -0.09%] index_add_ spread : Elapsed 0.023 ms (2.337 ms / 100) 2.280 -> 2.276 ( -0.18%) [ +0.00% +0.04% +0.04% / -0.04% -0.18% -0.04%] index_copy_ spread : Elapsed 0.023 ms (2.280 ms / 100) 2.341 -> 2.337 ( -0.17%) [ +0.00% +0.17% +0.00% / -0.17% +0.21% +0.00%] index_add_ strided 3 : Elapsed 0.023 ms (2.341 ms / 100) 2.278 -> 2.280 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.40% +0.22% +0.09%] index_copy_ strided 3 : Elapsed 0.023 ms (2.280 ms / 100) 2.350 -> 2.342 ( -0.34%) [ +0.17% +0.00% +0.09% / +0.09% -0.30% -0.34%] index_add_ strided 5 : Elapsed 0.024 ms (2.354 ms / 100) 2.280 -> 2.282 ( +0.09%) [ +0.00% +0.18% +0.09% / +0.48% +0.09% +0.09%] index_copy_ strided 5 : Elapsed 0.023 ms (2.280 ms / 100) 2.341 -> 2.341 ( +0.00%) [ +0.26% +0.13% +0.00% / +0.00% +0.04% +0.04%] index_add_ strided 7 : Elapsed 0.023 ms (2.347 ms / 100) 2.278 -> 2.279 ( +0.04%) [ +0.22% +0.00% +0.09% / +0.13% +0.04% +0.09%] index_copy_ strided 7 : Elapsed 0.023 ms (2.283 ms / 100) 2.335 -> 2.339 ( +0.17%) [ +0.13% +0.00% +0.00% / +0.17% +0.30% +0.39%] index_add_ perm : Elapsed 0.023 ms (2.338 ms / 100) 2.279 -> 2.281 ( +0.09%) [ +0.00% +0.00% +0.04% / +0.26% +0.22% +0.09%] index_copy_ perm : Elapsed 0.023 ms (2.279 ms / 100) 2.333 -> 2.338 ( +0.21%) [ +0.09% +0.00% +0.09% / +0.21% +0.43% +0.39%] index_add_ perm_sorted : Elapsed 0.023 ms (2.335 ms / 100) 2.279 -> 2.280 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.26% +0.13% +0.04%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.280 ms / 100) 4.942 -> 4.950 ( +0.16%) [ +0.18% +0.24% +0.00% / +0.16% +0.65% +0.81%] index_select const : Elapsed 0.050 ms (4.951 ms / 100) 4.920 -> 4.930 ( +0.20%) [ +0.00% +0.06% +0.06% / +0.20% +0.28% +0.24%] index_select wrap : Elapsed 0.049 ms (4.920 ms / 100) 4.973 -> 4.980 ( +0.14%) [ +0.10% +0.12% +0.00% / +0.14% +0.38% +0.38%] index_select linear : Elapsed 0.050 ms (4.978 ms / 100) 4.950 -> 4.956 ( +0.12%) [ +0.10% +0.12% +0.00% / +0.12% +0.61% +0.44%] index_select reverse : Elapsed 0.050 ms (4.955 ms / 100) 4.970 -> 4.976 ( +0.12%) [ +0.06% +0.26% +0.00% / +0.12% +0.54% +0.52%] index_select skip64 : Elapsed 0.050 ms (4.973 ms / 100) 4.970 -> 4.973 ( +0.06%) [ +0.06% +0.18% +0.00% / +0.06% +0.66% +0.56%] index_select skip256 : Elapsed 0.050 ms (4.973 ms / 100) 4.946 -> 4.943 ( -0.06%) [ +0.06% +0.14% +0.00% / -0.06% +0.57% +0.51%] index_select spread : Elapsed 0.049 ms (4.949 ms / 100) 4.912 -> 4.915 ( +0.06%) [ +0.00% +0.04% +0.00% / +0.06% +0.47% +0.29%] index_select strided 3 : Elapsed 0.049 ms (4.912 ms / 100) 4.948 -> 4.948 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.89% +0.73%] index_select random : Elapsed 0.049 ms (4.948 ms / 100) 4.966 -> 4.970 ( +0.08%) [ +0.00% +0.20% +0.14% / +0.08% +0.66% +0.66%] index_select random_sorted : Elapsed 0.050 ms (4.966 ms / 100) B = [20, 4, 16, 40] (stride (64, 1, 4, 1280)) A = [20, 4, 5, 40] (stride (40, 800, 3200, 1)) dim = 2 2.367 -> 2.370 ( +0.13%) [ +0.04% +0.21% +0.00% / +0.13% +0.97% +0.97%] index_add_ linear : Elapsed 0.024 ms (2.368 ms / 100) 2.319 -> 2.325 ( +0.26%) [ +0.09% +0.04% +0.00% / +0.26% +1.21% +1.47%] index_copy_ linear : Elapsed 0.023 ms (2.321 ms / 100) 2.351 -> 2.357 ( +0.26%) [ +0.17% +0.17% +0.00% / +0.26% +0.81% +1.11%] index_add_ reverse : Elapsed 0.024 ms (2.355 ms / 100) 2.309 -> 2.315 ( +0.26%) [ +0.00% +0.09% +0.30% / +0.26% +1.17% +1.39%] index_copy_ reverse : Elapsed 0.023 ms (2.309 ms / 100) 2.389 -> 2.385 ( -0.17%) [ +0.33% +0.17% +0.00% / -0.17% +1.05% +0.96%] index_add_ spread : Elapsed 0.024 ms (2.397 ms / 100) 2.354 -> 2.354 ( +0.00%) [ +0.30% +0.25% +0.00% / +0.00% +1.40% +1.53%] index_copy_ spread : Elapsed 0.024 ms (2.361 ms / 100) 2.382 -> 2.390 ( +0.34%) [ +0.13% +0.29% +0.00% / +0.34% +1.34% +1.30%] index_add_ strided 3 : Elapsed 0.024 ms (2.385 ms / 100) 2.351 -> 2.358 ( +0.30%) [ +0.13% +0.13% +0.00% / +0.30% +1.49% +1.40%] index_copy_ strided 3 : Elapsed 0.024 ms (2.354 ms / 100) 2.360 -> 2.358 ( -0.08%) [ +0.08% +0.30% +0.00% / -0.08% +0.93% +0.85%] index_add_ strided 5 : Elapsed 0.024 ms (2.362 ms / 100) 2.317 -> 2.313 ( -0.17%) [ +0.09% +0.17% +0.00% / -0.17% +1.51% +1.68%] index_copy_ strided 5 : Elapsed 0.023 ms (2.319 ms / 100) 2.375 -> 2.384 ( +0.38%) [ +0.08% +0.34% +0.00% / +0.38% +1.14% +0.72%] index_add_ strided 7 : Elapsed 0.024 ms (2.377 ms / 100) 2.349 -> 2.358 ( +0.38%) [ +0.17% +0.00% +0.09% / +0.38% +1.15% +1.19%] index_copy_ strided 7 : Elapsed 0.024 ms (2.353 ms / 100) 2.376 -> 2.379 ( +0.13%) [ +0.34% +0.08% +0.00% / +0.13% +1.35% +1.26%] index_add_ perm : Elapsed 0.024 ms (2.384 ms / 100) 2.346 -> 2.351 ( +0.21%) [ +0.00% +0.26% +0.30% / +0.21% +1.66% +1.45%] index_copy_ perm : Elapsed 0.023 ms (2.346 ms / 100) 2.381 -> 2.384 ( +0.13%) [ +0.34% +0.25% +0.00% / +0.13% +1.34% +1.13%] index_add_ perm_sorted : Elapsed 0.024 ms (2.389 ms / 100) 2.353 -> 2.356 ( +0.13%) [ +0.17% +0.34% +0.00% / +0.13% +1.49% +1.44%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.357 ms / 100) 4.838 -> 4.840 ( +0.04%) [ +0.14% +0.00% +0.08% / +0.04% +0.68% +1.01%] index_select const : Elapsed 0.048 ms (4.845 ms / 100) 4.880 -> 4.888 ( +0.16%) [ +0.04% +0.14% +0.00% / +0.16% +0.86% +0.90%] index_select wrap : Elapsed 0.049 ms (4.882 ms / 100) 4.903 -> 4.909 ( +0.12%) [ +0.02% +0.00% +0.10% / +0.12% +0.92% +0.92%] index_select linear : Elapsed 0.049 ms (4.904 ms / 100) 4.900 -> 4.906 ( +0.12%) [ +0.00% +0.14% +0.10% / +0.12% +0.98% +0.82%] index_select reverse : Elapsed 0.049 ms (4.900 ms / 100) 4.838 -> 4.841 ( +0.06%) [ +0.10% +0.00% +0.04% / +0.06% +0.66% +0.72%] index_select skip64 : Elapsed 0.048 ms (4.843 ms / 100) 4.839 -> 4.844 ( +0.10%) [ +0.08% +0.00% +0.19% / +0.10% +0.93% +0.89%] index_select skip256 : Elapsed 0.048 ms (4.843 ms / 100) 4.887 -> 4.900 ( +0.27%) [ +0.20% +0.29% +0.00% / +0.27% +1.17% +1.21%] index_select spread : Elapsed 0.049 ms (4.897 ms / 100) 4.889 -> 4.891 ( +0.04%) [ +0.00% +0.27% +0.04% / +0.04% +1.06% +1.27%] index_select strided 3 : Elapsed 0.049 ms (4.889 ms / 100) 4.900 -> 4.894 ( -0.12%) [ +0.08% +0.00% +0.08% / -0.12% +0.94% +1.04%] index_select random : Elapsed 0.049 ms (4.904 ms / 100) 4.886 -> 4.888 ( +0.04%) [ +0.23% +0.18% +0.00% / +0.04% +1.33% +1.47%] index_select random_sorted : Elapsed 0.049 ms (4.897 ms / 100) B = [20, 4, 16, 40] (stride (64, 1, 4, 1280)) A = [20, 4, 5, 40] (stride (4, 1, 80, 400)) dim = 2 2.438 -> 2.437 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.37% +0.45%] index_add_ linear : Elapsed 0.024 ms (2.438 ms / 100) 2.382 -> 2.381 ( -0.04%) [ +0.00% +0.04% +0.21% / -0.04% +0.50% +0.34%] index_copy_ linear : Elapsed 0.024 ms (2.382 ms / 100) 2.439 -> 2.438 ( -0.04%) [ +0.12% +0.00% +0.04% / -0.04% +0.41% +0.49%] index_add_ reverse : Elapsed 0.024 ms (2.442 ms / 100) 2.381 -> 2.384 ( +0.13%) [ +0.00% +0.17% +0.08% / +0.13% +0.50% +0.46%] index_copy_ reverse : Elapsed 0.024 ms (2.381 ms / 100) 2.441 -> 2.443 ( +0.08%) [ +0.33% +0.16% +0.00% / +0.08% +0.25% +0.66%] index_add_ spread : Elapsed 0.024 ms (2.449 ms / 100) 2.402 -> 2.403 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.17% +0.33%] index_copy_ spread : Elapsed 0.024 ms (2.402 ms / 100) 2.447 -> 2.446 ( -0.04%) [ +0.25% +0.16% +0.00% / -0.04% +0.25% +0.25%] index_add_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.406 -> 2.410 ( +0.17%) [ +0.17% +0.37% +0.00% / +0.17% +0.42% +0.33%] index_copy_ strided 3 : Elapsed 0.024 ms (2.410 ms / 100) 2.433 -> 2.433 ( +0.00%) [ +0.16% +0.12% +0.00% / +0.00% +0.45% +0.45%] index_add_ strided 5 : Elapsed 0.024 ms (2.437 ms / 100) 2.383 -> 2.381 ( -0.08%) [ +0.04% +0.21% +0.00% / -0.08% +0.42% +0.21%] index_copy_ strided 5 : Elapsed 0.024 ms (2.384 ms / 100) 2.436 -> 2.440 ( +0.16%) [ +0.25% +0.16% +0.00% / +0.16% +0.21% +0.41%] index_add_ strided 7 : Elapsed 0.024 ms (2.442 ms / 100) 2.402 -> 2.404 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.25% +0.25%] index_copy_ strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.441 -> 2.445 ( +0.16%) [ +0.16% +0.00% +0.04% / +0.16% +0.20% +0.33%] index_add_ perm : Elapsed 0.024 ms (2.445 ms / 100) 2.401 -> 2.398 ( -0.12%) [ +0.00% +0.04% +0.00% / -0.12% +0.21% +0.29%] index_copy_ perm : Elapsed 0.024 ms (2.401 ms / 100) 2.443 -> 2.445 ( +0.08%) [ +0.00% +0.04% +0.16% / +0.08% +0.08% +0.12%] index_add_ perm_sorted : Elapsed 0.024 ms (2.443 ms / 100) 2.400 -> 2.399 ( -0.04%) [ +0.00% +0.21% +0.25% / -0.04% +0.25% +0.25%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.400 ms / 100) 5.239 -> 5.250 ( +0.21%) [ +0.08% +0.00% +0.04% / +0.21% +0.61% +0.61%] index_select const : Elapsed 0.052 ms (5.243 ms / 100) 5.240 -> 5.242 ( +0.04%) [ +0.00% +0.04% +0.10% / +0.04% +0.27% +0.38%] index_select wrap : Elapsed 0.052 ms (5.240 ms / 100) 5.259 -> 5.259 ( +0.00%) [ +0.00% +0.04% +0.02% / +0.00% +0.48% +0.42%] index_select linear : Elapsed 0.053 ms (5.259 ms / 100) 5.206 -> 5.211 ( +0.10%) [ +0.17% +0.08% +0.00% / +0.10% +0.58% +0.63%] index_select reverse : Elapsed 0.052 ms (5.215 ms / 100) 5.256 -> 5.256 ( +0.00%) [ +0.00% +0.02% +0.17% / +0.00% +0.59% +0.40%] index_select skip64 : Elapsed 0.053 ms (5.256 ms / 100) 5.253 -> 5.258 ( +0.10%) [ +0.08% +0.00% +0.06% / +0.10% +0.59% +0.55%] index_select skip256 : Elapsed 0.053 ms (5.257 ms / 100) 5.229 -> 5.229 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.48% +0.52%] index_select spread : Elapsed 0.052 ms (5.229 ms / 100) 5.233 -> 5.231 ( -0.04%) [ +0.04% +0.00% +0.11% / -0.04% +0.52% +0.67%] index_select strided 3 : Elapsed 0.052 ms (5.235 ms / 100) 5.227 -> 5.234 ( +0.13%) [ +0.00% +0.10% +0.02% / +0.13% +0.77% +0.78%] index_select random : Elapsed 0.052 ms (5.227 ms / 100) 5.232 -> 5.228 ( -0.08%) [ +0.00% +0.02% +0.02% / -0.08% +0.75% +0.61%] index_select random_sorted : Elapsed 0.052 ms (5.232 ms / 100) B = [20, 4, 16, 40] (stride (16, 320, 1, 1280)) A = [20, 4, 5, 40] (stride (800, 200, 1, 5)) dim = 2 2.232 -> 2.238 ( +0.27%) [ +0.67% +0.31% +0.00% / +0.27% +1.66% +1.79%] index_add_ linear : Elapsed 0.022 ms (2.247 ms / 100) 2.208 -> 2.207 ( -0.05%) [ +0.18% +0.05% +0.00% / -0.05% +1.27% +1.18%] index_copy_ linear : Elapsed 0.022 ms (2.212 ms / 100) 2.237 -> 2.240 ( +0.13%) [ +0.00% +0.49% +0.00% / +0.13% +1.65% +1.48%] index_add_ reverse : Elapsed 0.022 ms (2.237 ms / 100) 2.202 -> 2.206 ( +0.18%) [ +0.05% +0.64% +0.00% / +0.18% +1.27% +1.63%] index_copy_ reverse : Elapsed 0.022 ms (2.203 ms / 100) 2.268 -> 2.270 ( +0.09%) [ +0.13% +0.18% +0.00% / +0.09% +1.15% +1.28%] index_add_ spread : Elapsed 0.023 ms (2.271 ms / 100) 2.274 -> 2.275 ( +0.04%) [ +0.26% +0.26% +0.00% / +0.04% +1.01% +1.19%] index_copy_ spread : Elapsed 0.023 ms (2.280 ms / 100) 2.264 -> 2.271 ( +0.31%) [ +0.09% +0.00% +0.13% / +0.31% +1.28% +1.06%] index_add_ strided 3 : Elapsed 0.023 ms (2.266 ms / 100) 2.271 -> 2.272 ( +0.04%) [ +0.00% +0.09% +0.04% / +0.04% +0.84% +1.10%] index_copy_ strided 3 : Elapsed 0.023 ms (2.271 ms / 100) 2.268 -> 2.265 ( -0.13%) [ +0.04% +0.00% +0.09% / -0.13% +1.01% +1.15%] index_add_ strided 5 : Elapsed 0.023 ms (2.269 ms / 100) 2.269 -> 2.273 ( +0.18%) [ +0.35% +0.00% +0.31% / +0.18% +1.32% +1.41%] index_copy_ strided 5 : Elapsed 0.023 ms (2.277 ms / 100) 2.264 -> 2.265 ( +0.04%) [ +0.13% +0.00% +0.35% / +0.04% +1.55% +1.68%] index_add_ strided 7 : Elapsed 0.023 ms (2.267 ms / 100) 2.270 -> 2.270 ( +0.00%) [ +0.00% +0.13% +0.09% / +0.00% +1.41% +1.41%] index_copy_ strided 7 : Elapsed 0.023 ms (2.270 ms / 100) 2.264 -> 2.265 ( +0.04%) [ +0.27% +0.13% +0.00% / +0.04% +1.10% +0.97%] index_add_ perm : Elapsed 0.023 ms (2.270 ms / 100) 2.270 -> 2.271 ( +0.04%) [ +0.09% +0.13% +0.00% / +0.04% +1.28% +0.97%] index_copy_ perm : Elapsed 0.023 ms (2.272 ms / 100) 2.268 -> 2.270 ( +0.09%) [ +0.00% +0.09% +0.04% / +0.09% +0.97% +0.88%] index_add_ perm_sorted : Elapsed 0.023 ms (2.268 ms / 100) 2.270 -> 2.279 ( +0.40%) [ +0.00% +0.22% +0.04% / +0.40% +0.88% +1.41%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.270 ms / 100) 4.650 -> 4.653 ( +0.06%) [ +0.00% +0.13% +0.06% / +0.06% +0.97% +0.90%] index_select const : Elapsed 0.047 ms (4.650 ms / 100) 4.657 -> 4.654 ( -0.06%) [ +0.02% +0.02% +0.00% / -0.06% +0.84% +0.90%] index_select wrap : Elapsed 0.047 ms (4.658 ms / 100) 4.662 -> 4.663 ( +0.02%) [ +0.09% +0.00% +0.06% / +0.02% +0.51% +0.94%] index_select linear : Elapsed 0.047 ms (4.666 ms / 100) 4.660 -> 4.664 ( +0.09%) [ +0.15% +0.00% +0.13% / +0.09% +0.90% +0.99%] index_select reverse : Elapsed 0.047 ms (4.667 ms / 100) 4.650 -> 4.650 ( +0.00%) [ +0.13% +0.00% +0.13% / +0.00% +0.95% +0.99%] index_select skip64 : Elapsed 0.047 ms (4.656 ms / 100) 4.647 -> 4.649 ( +0.04%) [ +0.00% +0.11% +0.02% / +0.04% +1.08% +1.03%] index_select skip256 : Elapsed 0.046 ms (4.647 ms / 100) 4.652 -> 4.653 ( +0.02%) [ +0.21% +0.17% +0.00% / +0.02% +1.16% +1.33%] index_select spread : Elapsed 0.047 ms (4.662 ms / 100) 4.652 -> 4.663 ( +0.24%) [ +0.21% +0.00% +0.13% / +0.24% +1.14% +1.27%] index_select strided 3 : Elapsed 0.047 ms (4.662 ms / 100) 4.655 -> 4.661 ( +0.13%) [ +0.04% +0.00% +0.11% / +0.13% +1.12% +1.01%] index_select random : Elapsed 0.047 ms (4.657 ms / 100) 4.642 -> 4.652 ( +0.22%) [ +0.17% +0.00% +0.00% / +0.22% +1.31% +1.18%] index_select random_sorted : Elapsed 0.047 ms (4.650 ms / 100) out_shape = [20, 4, 5, 16] in_shape = [20, 4, 5, 40] idx_dim = 3 B = [20, 4, 5, 16] (stride (320, 16, 64, 1)) A = [20, 4, 5, 40] (stride (20, 1, 4, 400)) dim = 3 3.814 -> 3.813 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.60% +0.60%] index_select const : Elapsed 0.038 ms (3.814 ms / 100) 3.823 -> 3.823 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.42% +0.44%] index_select wrap : Elapsed 0.038 ms (3.824 ms / 100) 3.815 -> 3.817 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.55% +0.55%] index_select linear : Elapsed 0.038 ms (3.816 ms / 100) 3.814 -> 3.815 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.45% +0.45%] index_select reverse : Elapsed 0.038 ms (3.814 ms / 100) 3.826 -> 3.830 ( +0.10%) [ +0.13% +0.05% +0.00% / +0.10% +0.52% +0.52%] index_select skip64 : Elapsed 0.038 ms (3.831 ms / 100) 3.814 -> 3.813 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.50% +0.50%] index_select skip256 : Elapsed 0.038 ms (3.814 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.42% +0.39%] index_select spread : Elapsed 0.038 ms (3.814 ms / 100) 3.825 -> 3.827 ( +0.05%) [ +0.10% +0.08% +0.00% / +0.05% +0.39% +0.37%] index_select strided 3 : Elapsed 0.038 ms (3.829 ms / 100) 3.815 -> 3.816 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.34% +0.34%] index_select strided 5 : Elapsed 0.038 ms (3.816 ms / 100) 3.820 -> 3.823 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.37% +0.37%] index_select strided 7 : Elapsed 0.038 ms (3.820 ms / 100) 3.822 -> 3.833 ( +0.29%) [ +0.08% +0.05% +0.00% / +0.29% +0.44% +0.50%] index_select strided 8 : Elapsed 0.038 ms (3.825 ms / 100) 3.817 -> 3.820 ( +0.08%) [ +0.03% +0.00% +0.08% / +0.08% +0.39% +0.39%] index_select strided 16 : Elapsed 0.038 ms (3.818 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.52% +0.52%] index_select random : Elapsed 0.038 ms (3.814 ms / 100) 3.814 -> 3.813 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.55% +0.52%] index_select random_sorted : Elapsed 0.038 ms (3.814 ms / 100) 3.815 -> 3.817 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.50% +0.42%] index_select perm : Elapsed 0.038 ms (3.816 ms / 100) 3.814 -> 3.813 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.39% +0.39%] index_select perm_sorted : Elapsed 0.038 ms (3.815 ms / 100) B = [20, 4, 5, 16] (stride (1, 320, 1280, 20)) A = [20, 4, 5, 40] (stride (800, 200, 40, 1)) dim = 3 3.606 -> 3.607 ( +0.03%) [ +0.06% +0.00% +0.00% / +0.03% +0.55% +0.53%] index_select const : Elapsed 0.036 ms (3.608 ms / 100) 3.572 -> 3.579 ( +0.20%) [ +0.00% +0.06% +0.14% / +0.20% +0.64% +0.81%] index_select wrap : Elapsed 0.036 ms (3.572 ms / 100) 3.532 -> 3.531 ( -0.03%) [ +0.14% +0.00% +0.08% / -0.03% +0.65% +0.68%] index_select linear : Elapsed 0.035 ms (3.537 ms / 100) 3.580 -> 3.581 ( +0.03%) [ +0.17% +0.03% +0.00% / +0.03% +0.92% +0.81%] index_select reverse : Elapsed 0.036 ms (3.586 ms / 100) 3.561 -> 3.561 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.79% +0.79%] index_select skip64 : Elapsed 0.036 ms (3.562 ms / 100) 3.602 -> 3.603 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.75% +0.81%] index_select skip256 : Elapsed 0.036 ms (3.603 ms / 100) 3.524 -> 3.525 ( +0.03%) [ +0.26% +0.23% +0.00% / +0.03% +0.65% +0.68%] index_select spread : Elapsed 0.035 ms (3.533 ms / 100) 3.560 -> 3.560 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.67% +0.65%] index_select strided 3 : Elapsed 0.036 ms (3.561 ms / 100) 3.539 -> 3.541 ( +0.06%) [ +0.00% +0.08% +0.06% / +0.06% +0.88% +0.88%] index_select strided 5 : Elapsed 0.035 ms (3.539 ms / 100) 3.535 -> 3.536 ( +0.03%) [ +0.06% +0.00% +0.00% / +0.03% +0.62% +0.65%] index_select strided 7 : Elapsed 0.035 ms (3.537 ms / 100) 3.577 -> 3.587 ( +0.28%) [ +0.62% +0.00% +0.25% / +0.28% +0.98% +1.06%] index_select strided 8 : Elapsed 0.036 ms (3.599 ms / 100) 3.559 -> 3.559 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.73% +0.73%] index_select strided 16 : Elapsed 0.036 ms (3.559 ms / 100) 3.592 -> 3.596 ( +0.11%) [ +0.00% +0.14% +0.11% / +0.11% +0.70% +0.70%] index_select random : Elapsed 0.036 ms (3.592 ms / 100) 3.558 -> 3.561 ( +0.08%) [ +0.06% +0.06% +0.00% / +0.08% +0.82% +0.82%] index_select random_sorted : Elapsed 0.036 ms (3.560 ms / 100) 3.558 -> 3.562 ( +0.11%) [ +0.11% +0.00% +0.14% / +0.11% +0.73% +0.84%] index_select perm : Elapsed 0.036 ms (3.562 ms / 100) 3.588 -> 3.588 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.75% +0.75%] index_select perm_sorted : Elapsed 0.036 ms (3.591 ms / 100) B = [20, 4, 5, 16] (stride (1, 100, 20, 400)) A = [20, 4, 5, 40] (stride (20, 1, 4, 400)) dim = 3 3.510 -> 3.510 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.68% +0.57%] index_select const : Elapsed 0.035 ms (3.510 ms / 100) 3.513 -> 3.515 ( +0.06%) [ +0.09% +0.03% +0.00% / +0.06% +0.63% +0.60%] index_select wrap : Elapsed 0.035 ms (3.516 ms / 100) 3.512 -> 3.514 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.60% +0.60%] index_select linear : Elapsed 0.035 ms (3.513 ms / 100) 3.504 -> 3.504 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.74% +0.54%] index_select reverse : Elapsed 0.035 ms (3.505 ms / 100) 3.517 -> 3.517 ( +0.00%) [ +0.20% +0.00% +0.43% / +0.00% +0.57% +0.57%] index_select skip64 : Elapsed 0.035 ms (3.524 ms / 100) 3.510 -> 3.511 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.66% +0.60%] index_select skip256 : Elapsed 0.035 ms (3.511 ms / 100) 3.508 -> 3.510 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.54% +0.48%] index_select spread : Elapsed 0.035 ms (3.510 ms / 100) 3.515 -> 3.519 ( +0.11%) [ +0.00% +0.03% +0.00% / +0.11% +0.63% +0.57%] index_select strided 3 : Elapsed 0.035 ms (3.515 ms / 100) 3.508 -> 3.509 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.43% +0.34%] index_select strided 5 : Elapsed 0.035 ms (3.508 ms / 100) 3.513 -> 3.513 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.65% +0.63%] index_select strided 7 : Elapsed 0.035 ms (3.515 ms / 100) 3.515 -> 3.515 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.48% +0.48%] index_select strided 8 : Elapsed 0.035 ms (3.515 ms / 100) 3.507 -> 3.510 ( +0.09%) [ +0.03% +0.09% +0.00% / +0.09% +0.51% +0.68%] index_select strided 16 : Elapsed 0.035 ms (3.508 ms / 100) 3.513 -> 3.514 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.60% +0.77%] index_select random : Elapsed 0.035 ms (3.514 ms / 100) 3.504 -> 3.505 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.80% +0.77%] index_select random_sorted : Elapsed 0.035 ms (3.505 ms / 100) 3.513 -> 3.513 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.57% +0.63%] index_select perm : Elapsed 0.035 ms (3.515 ms / 100) 3.509 -> 3.509 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.40% +0.34%] index_select perm_sorted : Elapsed 0.035 ms (3.509 ms / 100) B = [20, 4, 5, 16] (stride (1, 20, 80, 400)) A = [20, 4, 5, 40] (stride (1, 4000, 800, 20)) dim = 3 3.822 -> 3.823 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.68% +0.68%] index_select const : Elapsed 0.038 ms (3.824 ms / 100) 3.803 -> 3.807 ( +0.11%) [ +0.03% +0.00% +0.08% / +0.11% +0.68% +0.63%] index_select wrap : Elapsed 0.038 ms (3.804 ms / 100) 3.809 -> 3.814 ( +0.13%) [ +0.13% +0.00% +0.03% / +0.13% +0.79% +0.71%] index_select linear : Elapsed 0.038 ms (3.814 ms / 100) 3.809 -> 3.816 ( +0.18%) [ +0.08% +0.00% +0.03% / +0.18% +0.92% +0.92%] index_select reverse : Elapsed 0.038 ms (3.812 ms / 100) 3.808 -> 3.811 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.79% +0.76%] index_select skip64 : Elapsed 0.038 ms (3.809 ms / 100) 3.820 -> 3.820 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.76% +0.73%] index_select skip256 : Elapsed 0.038 ms (3.820 ms / 100) 3.798 -> 3.799 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.76% +0.82%] index_select spread : Elapsed 0.038 ms (3.800 ms / 100) 3.808 -> 3.808 ( +0.00%) [ +0.13% +0.11% +0.00% / +0.00% +0.71% +0.79%] index_select strided 3 : Elapsed 0.038 ms (3.813 ms / 100) 3.801 -> 3.800 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.63% +0.68%] index_select strided 5 : Elapsed 0.038 ms (3.802 ms / 100) 3.806 -> 3.806 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.74% +0.74%] index_select strided 7 : Elapsed 0.038 ms (3.808 ms / 100) 3.805 -> 3.807 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.81% +0.76%] index_select strided 8 : Elapsed 0.038 ms (3.806 ms / 100) 3.759 -> 3.760 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.74% +0.74%] index_select strided 16 : Elapsed 0.038 ms (3.760 ms / 100) 3.827 -> 3.827 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.60% +0.78%] index_select random : Elapsed 0.038 ms (3.828 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.84% +0.87%] index_select random_sorted : Elapsed 0.038 ms (3.815 ms / 100) 3.817 -> 3.819 ( +0.05%) [ +0.10% +0.08% +0.00% / +0.05% +0.79% +0.86%] index_select perm : Elapsed 0.038 ms (3.821 ms / 100) 3.821 -> 3.824 ( +0.08%) [ +0.00% +0.10% +0.10% / +0.08% +0.81% +0.68%] index_select perm_sorted : Elapsed 0.038 ms (3.821 ms / 100) out_shape = [16, 4, 40, 5] in_shape = [20, 4, 40, 5] idx_dim = 0 B = [16, 4, 40, 5] (stride (800, 200, 5, 1)) A = [20, 4, 40, 5] (stride (200, 4000, 1, 40)) dim = 0 3.277 -> 3.273 ( -0.12%) [ +0.03% +0.00% +0.00% / -0.12% +0.64% +0.58%] index_select const : Elapsed 0.033 ms (3.278 ms / 100) 3.290 -> 3.291 ( +0.03%) [ +0.00% +0.06% +0.00% / +0.03% +0.61% +0.64%] index_select wrap : Elapsed 0.033 ms (3.290 ms / 100) 3.283 -> 3.284 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.64% +0.67%] index_select linear : Elapsed 0.033 ms (3.283 ms / 100) 3.283 -> 3.287 ( +0.12%) [ +0.03% +0.06% +0.00% / +0.12% +0.70% +0.64%] index_select reverse : Elapsed 0.033 ms (3.284 ms / 100) 3.279 -> 3.279 ( +0.00%) [ +0.00% +0.06% +0.09% / +0.00% +0.91% +0.82%] index_select skip64 : Elapsed 0.033 ms (3.279 ms / 100) 3.272 -> 3.273 ( +0.03%) [ +0.03% +0.00% +0.06% / +0.03% +0.46% +0.40%] index_select skip256 : Elapsed 0.033 ms (3.273 ms / 100) 3.287 -> 3.294 ( +0.21%) [ +0.09% +0.00% +0.21% / +0.21% +0.79% +0.64%] index_select spread : Elapsed 0.033 ms (3.290 ms / 100) 3.279 -> 3.281 ( +0.06%) [ +0.00% +0.09% +0.00% / +0.06% +0.67% +0.67%] index_select strided 3 : Elapsed 0.033 ms (3.279 ms / 100) 3.273 -> 3.273 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.73% +0.76%] index_select strided 5 : Elapsed 0.033 ms (3.274 ms / 100) 3.292 -> 3.296 ( +0.12%) [ +0.03% +0.09% +0.00% / +0.12% +0.55% +0.61%] index_select strided 7 : Elapsed 0.033 ms (3.293 ms / 100) 3.291 -> 3.294 ( +0.09%) [ +0.06% +0.09% +0.00% / +0.09% +0.58% +0.70%] index_select strided 8 : Elapsed 0.033 ms (3.293 ms / 100) 3.278 -> 3.278 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.67% +0.67%] index_select strided 16 : Elapsed 0.033 ms (3.278 ms / 100) 3.287 -> 3.286 ( -0.03%) [ +0.18% +0.12% +0.00% / -0.03% +0.40% +0.46%] index_select random : Elapsed 0.033 ms (3.293 ms / 100) 3.281 -> 3.282 ( +0.03%) [ +0.03% +0.00% +0.15% / +0.03% +0.61% +0.70%] index_select random_sorted : Elapsed 0.033 ms (3.282 ms / 100) 3.290 -> 3.288 ( -0.06%) [ +0.06% +0.00% +0.15% / -0.06% +0.67% +0.52%] index_select perm : Elapsed 0.033 ms (3.292 ms / 100) 3.279 -> 3.280 ( +0.03%) [ +0.06% +0.00% +0.06% / +0.03% +0.64% +0.58%] index_select perm_sorted : Elapsed 0.033 ms (3.281 ms / 100) B = [16, 4, 40, 5] (stride (800, 5, 20, 1)) A = [20, 4, 40, 5] (stride (20, 5, 400, 1)) dim = 0 3.959 -> 3.962 ( +0.08%) [ +0.13% +0.00% +0.20% / +0.08% +0.53% +0.38%] index_select const : Elapsed 0.040 ms (3.964 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.18% +0.00% +0.05% / +0.05% +0.51% +0.66%] index_select wrap : Elapsed 0.039 ms (3.927 ms / 100) 3.931 -> 3.935 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +0.61% +0.48%] index_select linear : Elapsed 0.039 ms (3.935 ms / 100) 3.921 -> 3.926 ( +0.13%) [ +0.15% +0.00% +0.03% / +0.13% +0.54% +0.54%] index_select reverse : Elapsed 0.039 ms (3.927 ms / 100) 3.973 -> 3.975 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.43% +0.43%] index_select skip64 : Elapsed 0.040 ms (3.974 ms / 100) 3.956 -> 3.956 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.53% +0.58%] index_select skip256 : Elapsed 0.040 ms (3.956 ms / 100) 3.927 -> 3.926 ( -0.03%) [ +0.08% +0.00% +0.03% / -0.03% +0.51% +0.56%] index_select spread : Elapsed 0.039 ms (3.930 ms / 100) 3.920 -> 3.925 ( +0.13%) [ +0.00% +0.03% +0.08% / +0.13% +0.59% +0.56%] index_select strided 3 : Elapsed 0.039 ms (3.920 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.56% +0.54%] index_select strided 5 : Elapsed 0.039 ms (3.925 ms / 100) 3.921 -> 3.922 ( +0.03%) [ +0.00% +0.15% +0.15% / +0.03% +0.54% +0.51%] index_select strided 7 : Elapsed 0.039 ms (3.921 ms / 100) 3.935 -> 3.941 ( +0.15%) [ +0.13% +0.05% +0.00% / +0.15% +0.81% +0.81%] index_select strided 8 : Elapsed 0.039 ms (3.940 ms / 100) 3.942 -> 3.949 ( +0.18%) [ +0.23% +0.00% +0.05% / +0.18% +0.89% +0.81%] index_select strided 16 : Elapsed 0.040 ms (3.951 ms / 100) 3.925 -> 3.926 ( +0.03%) [ +0.00% +0.08% +0.10% / +0.03% +0.71% +0.82%] index_select random : Elapsed 0.039 ms (3.925 ms / 100) 3.926 -> 3.928 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.61% +0.61%] index_select random_sorted : Elapsed 0.039 ms (3.926 ms / 100) 3.922 -> 3.921 ( -0.03%) [ +0.00% +0.05% +0.10% / -0.03% +0.54% +0.59%] index_select perm : Elapsed 0.039 ms (3.922 ms / 100) 3.921 -> 3.923 ( +0.05%) [ +0.00% +0.08% +0.10% / +0.05% +0.59% +0.71%] index_select perm_sorted : Elapsed 0.039 ms (3.921 ms / 100) B = [16, 4, 40, 5] (stride (1, 3200, 80, 16)) A = [20, 4, 40, 5] (stride (1, 20, 400, 80)) dim = 0 3.246 -> 3.250 ( +0.12%) [ +0.22% +0.18% +0.00% / +0.12% +0.83% +0.80%] index_select const : Elapsed 0.033 ms (3.253 ms / 100) 3.229 -> 3.234 ( +0.15%) [ +0.00% +0.12% +0.15% / +0.15% +0.93% +0.77%] index_select wrap : Elapsed 0.032 ms (3.229 ms / 100) 3.235 -> 3.237 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.65% +0.56%] index_select linear : Elapsed 0.032 ms (3.237 ms / 100) 3.252 -> 3.253 ( +0.03%) [ +0.12% +0.09% +0.00% / +0.03% +0.80% +0.95%] index_select reverse : Elapsed 0.033 ms (3.256 ms / 100) 3.253 -> 3.255 ( +0.06%) [ +0.00% +0.15% +0.06% / +0.06% +0.49% +0.52%] index_select skip64 : Elapsed 0.033 ms (3.253 ms / 100) 3.241 -> 3.251 ( +0.31%) [ +0.25% +0.00% +0.43% / +0.31% +1.23% +0.68%] index_select skip256 : Elapsed 0.032 ms (3.249 ms / 100) 3.236 -> 3.232 ( -0.12%) [ +0.06% +0.03% +0.00% / -0.12% +0.74% +0.53%] index_select spread : Elapsed 0.032 ms (3.238 ms / 100) 3.258 -> 3.259 ( +0.03%) [ +0.15% +0.09% +0.00% / +0.03% +0.64% +0.49%] index_select strided 3 : Elapsed 0.033 ms (3.263 ms / 100) 3.238 -> 3.241 ( +0.09%) [ +0.06% +0.03% +0.00% / +0.09% +0.49% +0.68%] index_select strided 5 : Elapsed 0.032 ms (3.240 ms / 100) 3.238 -> 3.243 ( +0.15%) [ +0.03% +0.06% +0.00% / +0.15% +0.65% +0.53%] index_select strided 7 : Elapsed 0.032 ms (3.239 ms / 100) 3.242 -> 3.239 ( -0.09%) [ +0.06% +0.06% +0.00% / -0.09% +0.80% +0.68%] index_select strided 8 : Elapsed 0.032 ms (3.244 ms / 100) 3.243 -> 3.246 ( +0.09%) [ +0.25% +0.25% +0.00% / +0.09% +0.86% +0.86%] index_select strided 16 : Elapsed 0.033 ms (3.251 ms / 100) 3.239 -> 3.241 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.40% +0.49%] index_select random : Elapsed 0.032 ms (3.239 ms / 100) 3.234 -> 3.231 ( -0.09%) [ +0.06% +0.03% +0.00% / -0.09% +0.46% +0.25%] index_select random_sorted : Elapsed 0.032 ms (3.236 ms / 100) 3.250 -> 3.245 ( -0.15%) [ +0.15% +0.06% +0.00% / -0.15% +0.55% +0.52%] index_select perm : Elapsed 0.033 ms (3.255 ms / 100) 3.247 -> 3.255 ( +0.25%) [ +0.06% +0.00% +0.03% / +0.25% +0.80% +0.71%] index_select perm_sorted : Elapsed 0.032 ms (3.249 ms / 100) B = [16, 4, 40, 5] (stride (1, 3200, 16, 640)) A = [20, 4, 40, 5] (stride (1, 100, 400, 20)) dim = 0 4.154 -> 4.154 ( +0.00%) [ +0.05% +0.07% +0.00% / +0.00% +0.55% +0.63%] index_select const : Elapsed 0.042 ms (4.156 ms / 100) 4.156 -> 4.160 ( +0.10%) [ +0.07% +0.07% +0.00% / +0.10% +0.70% +0.55%] index_select wrap : Elapsed 0.042 ms (4.159 ms / 100) 4.164 -> 4.171 ( +0.17%) [ +0.14% +0.19% +0.00% / +0.17% +0.60% +0.70%] index_select linear : Elapsed 0.042 ms (4.170 ms / 100) 4.171 -> 4.174 ( +0.07%) [ +0.17% +0.00% +0.05% / +0.07% +0.60% +0.65%] index_select reverse : Elapsed 0.042 ms (4.178 ms / 100) 4.166 -> 4.173 ( +0.17%) [ +0.12% +0.00% +0.10% / +0.17% +0.65% +0.53%] index_select skip64 : Elapsed 0.042 ms (4.171 ms / 100) 4.156 -> 4.158 ( +0.05%) [ +0.00% +0.07% +0.12% / +0.05% +0.65% +0.70%] index_select skip256 : Elapsed 0.042 ms (4.156 ms / 100) 4.158 -> 4.159 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.60% +0.51%] index_select spread : Elapsed 0.042 ms (4.159 ms / 100) 4.170 -> 4.175 ( +0.12%) [ +0.07% +0.00% +0.10% / +0.12% +0.60% +0.58%] index_select strided 3 : Elapsed 0.042 ms (4.173 ms / 100) 4.169 -> 4.174 ( +0.12%) [ +0.14% +0.00% +0.19% / +0.12% +0.55% +0.41%] index_select strided 5 : Elapsed 0.042 ms (4.175 ms / 100) 4.160 -> 4.161 ( +0.02%) [ +0.00% +0.02% +0.10% / +0.02% +0.60% +0.60%] index_select strided 7 : Elapsed 0.042 ms (4.160 ms / 100) 4.165 -> 4.160 ( -0.12%) [ +0.00% +0.07% +0.02% / -0.12% +0.50% +0.62%] index_select strided 8 : Elapsed 0.042 ms (4.165 ms / 100) 4.163 -> 4.160 ( -0.07%) [ +0.00% +0.02% +0.00% / -0.07% +0.70% +0.62%] index_select strided 16 : Elapsed 0.042 ms (4.163 ms / 100) 4.166 -> 4.163 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.62% +0.55%] index_select random : Elapsed 0.042 ms (4.167 ms / 100) 4.170 -> 4.169 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.72% +0.65%] index_select random_sorted : Elapsed 0.042 ms (4.170 ms / 100) 4.166 -> 4.163 ( -0.07%) [ +0.02% +0.07% +0.00% / -0.07% +0.60% +0.67%] index_select perm : Elapsed 0.042 ms (4.167 ms / 100) 4.165 -> 4.167 ( +0.05%) [ +0.12% +0.00% +0.07% / +0.05% +0.58% +0.62%] index_select perm_sorted : Elapsed 0.042 ms (4.170 ms / 100) B = [16, 4, 40, 5] (stride (20, 1, 320, 4)) A = [20, 4, 40, 5] (stride (800, 40, 1, 160)) dim = 0 3.589 -> 3.590 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.84% +0.84%] index_select const : Elapsed 0.036 ms (3.589 ms / 100) 3.569 -> 3.569 ( +0.00%) [ +0.00% +0.03% +0.06% / +0.00% +0.78% +0.78%] index_select wrap : Elapsed 0.036 ms (3.569 ms / 100) 3.578 -> 3.578 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.78% +0.81%] index_select linear : Elapsed 0.036 ms (3.579 ms / 100) 3.566 -> 3.566 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.73% +0.73%] index_select reverse : Elapsed 0.036 ms (3.566 ms / 100) 3.593 -> 3.593 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.75% +0.78%] index_select skip64 : Elapsed 0.036 ms (3.595 ms / 100) 3.571 -> 3.570 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.73% +0.76%] index_select skip256 : Elapsed 0.036 ms (3.571 ms / 100) 3.572 -> 3.573 ( +0.03%) [ +0.06% +0.00% +0.00% / +0.03% +0.70% +0.67%] index_select spread : Elapsed 0.036 ms (3.574 ms / 100) 3.569 -> 3.571 ( +0.06%) [ +0.06% +0.08% +0.00% / +0.06% +0.67% +0.64%] index_select strided 3 : Elapsed 0.036 ms (3.571 ms / 100) 3.570 -> 3.572 ( +0.06%) [ +0.08% +0.00% +0.00% / +0.06% +0.78% +0.76%] index_select strided 5 : Elapsed 0.036 ms (3.573 ms / 100) 3.574 -> 3.574 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.67% +0.73%] index_select strided 7 : Elapsed 0.036 ms (3.574 ms / 100) 3.579 -> 3.580 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.64% +0.61%] index_select strided 8 : Elapsed 0.036 ms (3.581 ms / 100) 3.580 -> 3.582 ( +0.06%) [ +0.00% +0.03% +0.03% / +0.06% +0.84% +0.81%] index_select strided 16 : Elapsed 0.036 ms (3.580 ms / 100) 3.584 -> 3.584 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.59% +0.59%] index_select random : Elapsed 0.036 ms (3.586 ms / 100) 3.584 -> 3.584 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.47% +0.47%] index_select random_sorted : Elapsed 0.036 ms (3.584 ms / 100) 3.574 -> 3.575 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.36% +0.39%] index_select perm : Elapsed 0.036 ms (3.575 ms / 100) 3.579 -> 3.580 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.36% +0.39%] index_select perm_sorted : Elapsed 0.036 ms (3.580 ms / 100) B = [16, 4, 40, 5] (stride (160, 1, 4, 2560)) A = [20, 4, 40, 5] (stride (40, 800, 1, 3200)) dim = 0 3.934 -> 3.938 ( +0.10%) [ +0.03% +0.00% +0.00% / +0.10% +0.56% +0.53%] index_select const : Elapsed 0.039 ms (3.935 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.00% +0.13% +0.00% / +0.05% +0.48% +0.56%] index_select wrap : Elapsed 0.039 ms (3.920 ms / 100) 3.928 -> 3.926 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.53% +0.59%] index_select linear : Elapsed 0.039 ms (3.930 ms / 100) 3.928 -> 3.934 ( +0.15%) [ +0.00% +0.15% +0.13% / +0.15% +0.61% +0.69%] index_select reverse : Elapsed 0.039 ms (3.928 ms / 100) 3.958 -> 3.960 ( +0.05%) [ +0.03% +0.00% +0.08% / +0.05% +0.45% +0.40%] index_select skip64 : Elapsed 0.040 ms (3.959 ms / 100) 3.936 -> 3.941 ( +0.13%) [ +0.00% +0.03% +0.03% / +0.13% +0.43% +0.46%] index_select skip256 : Elapsed 0.039 ms (3.936 ms / 100) 3.925 -> 3.920 ( -0.13%) [ +0.00% +0.03% +0.03% / -0.13% +0.43% +0.48%] index_select spread : Elapsed 0.039 ms (3.925 ms / 100) 3.924 -> 3.927 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.54% +0.54%] index_select strided 3 : Elapsed 0.039 ms (3.927 ms / 100) 3.924 -> 3.925 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.43% +0.41%] index_select strided 5 : Elapsed 0.039 ms (3.924 ms / 100) 3.924 -> 3.924 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.48% +0.51%] index_select strided 7 : Elapsed 0.039 ms (3.926 ms / 100) 3.933 -> 3.937 ( +0.10%) [ +0.10% +0.00% +0.13% / +0.10% +0.86% +0.71%] index_select strided 8 : Elapsed 0.039 ms (3.937 ms / 100) 3.928 -> 3.926 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.69% +0.69%] index_select strided 16 : Elapsed 0.039 ms (3.928 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.00% +0.03% +0.05% / +0.08% +0.59% +0.59%] index_select random : Elapsed 0.039 ms (3.923 ms / 100) 3.924 -> 3.925 ( +0.03%) [ +0.00% +0.15% +0.15% / +0.03% +0.97% +0.97%] index_select random_sorted : Elapsed 0.039 ms (3.924 ms / 100) 3.924 -> 3.927 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.74% +0.66%] index_select perm : Elapsed 0.039 ms (3.927 ms / 100) 3.921 -> 3.927 ( +0.15%) [ +0.13% +0.05% +0.00% / +0.15% +0.64% +0.61%] index_select perm_sorted : Elapsed 0.039 ms (3.926 ms / 100) B = [16, 4, 40, 5] (stride (40, 640, 1, 2560)) A = [20, 4, 40, 5] (stride (800, 200, 1, 40)) dim = 0 3.902 -> 3.903 ( +0.03%) [ +0.51% +0.41% +0.00% / +0.03% +0.92% +0.59%] index_select const : Elapsed 0.039 ms (3.922 ms / 100) 3.911 -> 3.910 ( -0.03%) [ +0.28% +0.00% +0.15% / -0.03% +0.64% +0.64%] index_select wrap : Elapsed 0.039 ms (3.922 ms / 100) 3.908 -> 3.917 ( +0.23%) [ +0.00% +0.41% +0.49% / +0.23% +0.87% +0.90%] index_select linear : Elapsed 0.039 ms (3.908 ms / 100) 3.903 -> 3.912 ( +0.23%) [ +0.05% +0.26% +0.00% / +0.23% +0.82% +0.90%] index_select reverse : Elapsed 0.039 ms (3.905 ms / 100) 3.919 -> 3.923 ( +0.10%) [ +0.10% +0.03% +0.00% / +0.10% +0.61% +0.64%] index_select skip64 : Elapsed 0.039 ms (3.923 ms / 100) 3.908 -> 3.905 ( -0.08%) [ +0.18% +0.00% +0.23% / -0.08% +0.56% +0.51%] index_select skip256 : Elapsed 0.039 ms (3.915 ms / 100) 3.901 -> 3.904 ( +0.08%) [ +0.21% +0.00% +0.03% / +0.08% +0.64% +0.72%] index_select spread : Elapsed 0.039 ms (3.909 ms / 100) 3.912 -> 3.909 ( -0.08%) [ +0.18% +0.00% +0.26% / -0.08% +0.51% +0.51%] index_select strided 3 : Elapsed 0.039 ms (3.919 ms / 100) 3.909 -> 3.922 ( +0.33%) [ +0.31% +0.00% +0.23% / +0.33% +0.67% +0.67%] index_select strided 5 : Elapsed 0.039 ms (3.921 ms / 100) 3.913 -> 3.917 ( +0.10%) [ +0.00% +0.08% +0.05% / +0.10% +0.56% +0.61%] index_select strided 7 : Elapsed 0.039 ms (3.913 ms / 100) 3.896 -> 3.904 ( +0.21%) [ +0.08% +0.00% +0.49% / +0.21% +0.77% +0.67%] index_select strided 8 : Elapsed 0.039 ms (3.899 ms / 100) 3.915 -> 3.917 ( +0.05%) [ +0.00% +0.08% +0.18% / +0.05% +0.74% +0.56%] index_select strided 16 : Elapsed 0.039 ms (3.915 ms / 100) 3.904 -> 3.903 ( -0.03%) [ +0.00% +0.03% +0.08% / -0.03% +0.64% +0.64%] index_select random : Elapsed 0.039 ms (3.904 ms / 100) 3.915 -> 3.923 ( +0.20%) [ +0.15% +0.10% +0.00% / +0.20% +0.56% +0.56%] index_select random_sorted : Elapsed 0.039 ms (3.921 ms / 100) 3.922 -> 3.921 ( -0.03%) [ +0.08% +0.05% +0.00% / -0.03% +0.31% +0.36%] index_select perm : Elapsed 0.039 ms (3.925 ms / 100) 3.921 -> 3.922 ( +0.03%) [ +0.08% +0.00% +0.05% / +0.03% +0.31% +0.48%] index_select perm_sorted : Elapsed 0.039 ms (3.924 ms / 100) out_shape = [20, 16, 40, 5] in_shape = [20, 4, 40, 5] idx_dim = 1 B = [20, 16, 40, 5] (stride (3200, 200, 1, 40)) A = [20, 4, 40, 5] (stride (5, 100, 400, 1)) dim = 1 2.492 -> 2.500 ( +0.32%) [ +0.20% +0.28% +0.00% / +0.32% +0.72% +0.60%] index_add_ linear : Elapsed 0.025 ms (2.497 ms / 100) 2.447 -> 2.452 ( +0.20%) [ +0.00% +0.12% +0.04% / +0.20% +0.45% +0.49%] index_copy_ linear : Elapsed 0.024 ms (2.447 ms / 100) 2.501 -> 2.504 ( +0.12%) [ +0.16% +0.12% +0.00% / +0.12% +0.44% +0.44%] index_add_ reverse : Elapsed 0.025 ms (2.505 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.61% +0.49%] index_copy_ reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.494 -> 2.497 ( +0.12%) [ +0.12% +0.08% +0.00% / +0.12% +0.44% +0.64%] index_add_ spread : Elapsed 0.025 ms (2.497 ms / 100) 2.445 -> 2.448 ( +0.12%) [ +0.04% +0.04% +0.00% / +0.12% +0.49% +0.61%] index_copy_ spread : Elapsed 0.024 ms (2.446 ms / 100) 2.493 -> 2.498 ( +0.20%) [ +0.16% +0.00% +0.04% / +0.20% +0.76% +0.64%] index_add_ strided 3 : Elapsed 0.025 ms (2.497 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.00% +0.12% +0.00% / +0.04% +0.57% +0.57%] index_copy_ strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.498 -> 2.501 ( +0.12%) [ +0.00% +0.04% +0.08% / +0.12% +0.32% +0.44%] index_add_ strided 5 : Elapsed 0.025 ms (2.498 ms / 100) 2.449 -> 2.452 ( +0.12%) [ +0.00% +0.16% +0.16% / +0.12% +0.33% +0.33%] index_copy_ strided 5 : Elapsed 0.024 ms (2.449 ms / 100) 2.498 -> 2.502 ( +0.16%) [ +0.00% +0.12% +0.16% / +0.16% +0.56% +0.48%] index_add_ strided 7 : Elapsed 0.025 ms (2.498 ms / 100) 2.447 -> 2.451 ( +0.16%) [ +0.25% +0.29% +0.00% / +0.16% +0.57% +0.65%] index_copy_ strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.505 -> 2.505 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.04% +0.00% +0.16%] index_add_ perm : Elapsed 0.025 ms (2.507 ms / 100) 2.455 -> 2.458 ( +0.12%) [ +0.00% +0.24% +0.12% / +0.24% +0.12% +0.33%] index_copy_ perm : Elapsed 0.025 ms (2.455 ms / 100) 2.499 -> 2.499 ( +0.00%) [ +0.24% +0.00% +0.12% / +0.00% +0.20% +0.20%] index_add_ perm_sorted : Elapsed 0.025 ms (2.505 ms / 100) 2.450 -> 2.453 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.12% +0.16% +0.29%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.450 ms / 100) 5.401 -> 5.418 ( +0.31%) [ +0.33% +0.31% +0.00% / +0.31% +0.70% +0.59%] index_select const : Elapsed 0.054 ms (5.419 ms / 100) 5.446 -> 5.454 ( +0.15%) [ +0.28% +0.11% +0.00% / +0.15% +0.48% +0.62%] index_select wrap : Elapsed 0.055 ms (5.461 ms / 100) 5.485 -> 5.498 ( +0.24%) [ +0.00% +0.18% +0.18% / +0.24% +0.31% +0.64%] index_select linear : Elapsed 0.055 ms (5.485 ms / 100) 5.451 -> 5.446 ( -0.09%) [ +0.02% +0.09% +0.00% / -0.09% +0.40% +0.46%] index_select reverse : Elapsed 0.055 ms (5.452 ms / 100) 5.410 -> 5.425 ( +0.28%) [ +0.06% +0.15% +0.00% / +0.43% +0.28% +0.55%] index_select skip64 : Elapsed 0.054 ms (5.413 ms / 100) 5.416 -> 5.435 ( +0.35%) [ +0.15% +0.00% +0.33% / +0.37% +0.48% +0.35%] index_select skip256 : Elapsed 0.054 ms (5.424 ms / 100) 5.433 -> 5.446 ( +0.24%) [ +0.18% +0.00% +0.17% / +0.24% +0.39% +0.59%] index_select spread : Elapsed 0.054 ms (5.443 ms / 100) 5.443 -> 5.454 ( +0.20%) [ +0.00% +0.07% +0.26% / +0.20% +0.48% +0.44%] index_select strided 3 : Elapsed 0.054 ms (5.443 ms / 100) 5.455 -> 5.456 ( +0.02%) [ +0.00% +0.05% +0.11% / +0.02% +0.38% +0.35%] index_select random : Elapsed 0.055 ms (5.455 ms / 100) 5.421 -> 5.427 ( +0.11%) [ +0.00% +0.09% +0.06% / +0.11% +0.39% +0.39%] index_select random_sorted : Elapsed 0.054 ms (5.421 ms / 100) B = [20, 16, 40, 5] (stride (3200, 5, 80, 1)) A = [20, 4, 40, 5] (stride (5, 4000, 100, 1)) dim = 1 2.500 -> 2.499 ( -0.04%) [ +0.00% +0.00% +0.24% / -0.04% +0.36% +0.36%] index_add_ linear : Elapsed 0.025 ms (2.500 ms / 100) 2.459 -> 2.465 ( +0.24%) [ +0.12% +0.00% +0.20% / +0.24% +0.33% +0.53%] index_copy_ linear : Elapsed 0.025 ms (2.462 ms / 100) 2.497 -> 2.497 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.44% +0.16%] index_add_ reverse : Elapsed 0.025 ms (2.499 ms / 100) 2.462 -> 2.462 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.08% +0.28%] index_copy_ reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.526 -> 2.528 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.08% +0.28%] index_add_ spread : Elapsed 0.025 ms (2.526 ms / 100) 2.506 -> 2.512 ( +0.24%) [ +0.36% +0.16% +0.00% / +0.24% +0.48% +0.28%] index_copy_ spread : Elapsed 0.025 ms (2.515 ms / 100) 2.528 -> 2.528 ( +0.00%) [ +0.00% +0.20% +0.20% / +0.00% +0.28% +0.24%] index_add_ strided 3 : Elapsed 0.025 ms (2.528 ms / 100) 2.535 -> 2.535 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.20% +0.32%] index_copy_ strided 3 : Elapsed 0.025 ms (2.536 ms / 100) 2.486 -> 2.488 ( +0.08%) [ +0.16% +0.00% +0.04% / +0.08% +0.20% +0.24%] index_add_ strided 5 : Elapsed 0.025 ms (2.490 ms / 100) 2.467 -> 2.471 ( +0.16%) [ +0.00% +0.12% +0.20% / +0.16% +0.36% +0.24%] index_copy_ strided 5 : Elapsed 0.025 ms (2.467 ms / 100) 2.496 -> 2.495 ( -0.04%) [ +0.16% +0.04% +0.00% / -0.04% +0.24% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.500 ms / 100) 2.488 -> 2.486 ( -0.08%) [ +0.20% +0.00% +0.04% / -0.08% +0.16% +0.28%] index_copy_ strided 7 : Elapsed 0.025 ms (2.493 ms / 100) 2.520 -> 2.529 ( +0.36%) [ +0.00% +0.44% +0.28% / +0.36% +0.56% +0.60%] index_add_ perm : Elapsed 0.025 ms (2.520 ms / 100) 2.528 -> 2.534 ( +0.24%) [ +0.04% +0.00% +0.24% / +0.24% +0.51% +0.59%] index_copy_ perm : Elapsed 0.025 ms (2.529 ms / 100) 2.526 -> 2.527 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.08% +0.04% +0.32%] index_add_ perm_sorted : Elapsed 0.025 ms (2.527 ms / 100) 2.531 -> 2.535 ( +0.16%) [ +0.08% +0.04% +0.00% / +0.16% +0.40% +0.51%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.533 ms / 100) 5.422 -> 5.417 ( -0.09%) [ +0.00% +0.02% +0.07% / -0.09% +0.41% +0.39%] index_select const : Elapsed 0.054 ms (5.422 ms / 100) 5.452 -> 5.459 ( +0.13%) [ +0.09% +0.00% +0.04% / +0.13% +0.20% +0.39%] index_select wrap : Elapsed 0.055 ms (5.457 ms / 100) 5.471 -> 5.473 ( +0.04%) [ +0.00% +0.05% +0.20% / +0.04% +0.20% +0.15%] index_select linear : Elapsed 0.055 ms (5.471 ms / 100) 5.433 -> 5.443 ( +0.18%) [ +0.00% +0.11% +0.07% / +0.18% +0.55% +0.31%] index_select reverse : Elapsed 0.054 ms (5.433 ms / 100) 5.425 -> 5.424 ( -0.02%) [ +0.39% +0.28% +0.00% / -0.02% +0.63% +0.48%] index_select skip64 : Elapsed 0.054 ms (5.446 ms / 100) 5.418 -> 5.416 ( -0.04%) [ +0.00% +0.04% +0.13% / -0.04% +0.41% +0.44%] index_select skip256 : Elapsed 0.054 ms (5.418 ms / 100) 5.440 -> 5.443 ( +0.06%) [ +0.18% +0.00% +0.18% / +0.06% +0.39% +0.48%] index_select spread : Elapsed 0.055 ms (5.450 ms / 100) 5.470 -> 5.476 ( +0.11%) [ +0.00% +0.11% +0.07% / +0.11% +0.35% +0.33%] index_select strided 3 : Elapsed 0.055 ms (5.470 ms / 100) 5.461 -> 5.465 ( +0.07%) [ +0.00% +0.27% +0.27% / +0.07% +0.49% +0.49%] index_select random : Elapsed 0.055 ms (5.461 ms / 100) 5.443 -> 5.454 ( +0.20%) [ +0.06% +0.06% +0.00% / +0.20% +0.39% +0.70%] index_select random_sorted : Elapsed 0.054 ms (5.446 ms / 100) B = [20, 16, 40, 5] (stride (200, 4000, 5, 1)) A = [20, 4, 40, 5] (stride (160, 1, 4, 3200)) dim = 1 2.369 -> 2.372 ( +0.13%) [ +0.30% +0.00% +0.04% / +0.13% +0.30% +0.25%] index_add_ linear : Elapsed 0.024 ms (2.376 ms / 100) 2.323 -> 2.324 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.43% +0.30%] index_copy_ linear : Elapsed 0.023 ms (2.324 ms / 100) 2.373 -> 2.375 ( +0.08%) [ +0.00% +0.04% +0.17% / +0.08% +0.25% +0.21%] index_add_ reverse : Elapsed 0.024 ms (2.373 ms / 100) 2.327 -> 2.327 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.17% +0.26%] index_copy_ reverse : Elapsed 0.023 ms (2.329 ms / 100) 2.375 -> 2.380 ( +0.21%) [ +0.21% +0.00% +0.25% / +0.21% +0.46% +0.42%] index_add_ spread : Elapsed 0.024 ms (2.380 ms / 100) 2.324 -> 2.328 ( +0.17%) [ +0.00% +0.09% +0.04% / +0.17% +0.39% +0.39%] index_copy_ spread : Elapsed 0.023 ms (2.324 ms / 100) 2.375 -> 2.371 ( -0.17%) [ +0.08% +0.00% +0.25% / -0.17% +0.34% +0.51%] index_add_ strided 3 : Elapsed 0.024 ms (2.377 ms / 100) 2.325 -> 2.330 ( +0.22%) [ +0.04% +0.00% +0.17% / +0.22% +0.30% +0.47%] index_copy_ strided 3 : Elapsed 0.023 ms (2.326 ms / 100) 2.377 -> 2.377 ( +0.00%) [ +0.46% +0.21% +0.00% / +0.00% +0.38% +0.42%] index_add_ strided 5 : Elapsed 0.024 ms (2.388 ms / 100) 2.328 -> 2.329 ( +0.04%) [ +0.17% +0.04% +0.00% / +0.04% +0.21% +0.21%] index_copy_ strided 5 : Elapsed 0.023 ms (2.332 ms / 100) 2.382 -> 2.377 ( -0.21%) [ +0.00% +0.00% +0.13% / -0.21% +0.08% +0.04%] index_add_ strided 7 : Elapsed 0.024 ms (2.382 ms / 100) 2.326 -> 2.326 ( +0.00%) [ +0.30% +0.09% +0.00% / +0.00% +0.13% +0.00%] index_copy_ strided 7 : Elapsed 0.023 ms (2.333 ms / 100) 2.374 -> 2.377 ( +0.13%) [ +0.13% +0.17% +0.00% / +0.13% +0.55% +0.25%] index_add_ perm : Elapsed 0.024 ms (2.377 ms / 100) 2.323 -> 2.327 ( +0.17%) [ +0.00% +0.13% +0.00% / +0.17% +0.52% +0.43%] index_copy_ perm : Elapsed 0.023 ms (2.323 ms / 100) 2.372 -> 2.373 ( +0.04%) [ +0.13% +0.00% +0.17% / +0.04% +0.34% +0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.375 ms / 100) 2.325 -> 2.325 ( +0.00%) [ +0.00% +0.13% +0.17% / +0.04% +0.17% +0.00%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.325 ms / 100) 5.038 -> 5.054 ( +0.32%) [ +0.00% +0.22% +0.26% / +0.32% +0.48% +0.64%] index_select const : Elapsed 0.050 ms (5.038 ms / 100) 5.057 -> 5.059 ( +0.04%) [ +0.00% +0.10% +0.14% / +0.04% +0.55% +0.34%] index_select wrap : Elapsed 0.051 ms (5.057 ms / 100) 5.067 -> 5.064 ( -0.06%) [ +0.08% +0.00% +0.02% / -0.06% +0.14% +0.04%] index_select linear : Elapsed 0.051 ms (5.071 ms / 100) 5.064 -> 5.065 ( +0.02%) [ +0.06% +0.00% +0.08% / +0.02% +0.22% +0.43%] index_select reverse : Elapsed 0.051 ms (5.067 ms / 100) 5.069 -> 5.076 ( +0.14%) [ +0.00% +0.20% +0.24% / +0.14% +0.24% +0.22%] index_select skip64 : Elapsed 0.051 ms (5.069 ms / 100) 5.055 -> 5.069 ( +0.28%) [ +0.00% +0.20% +0.04% / +0.28% +0.32% +0.45%] index_select skip256 : Elapsed 0.051 ms (5.055 ms / 100) 5.061 -> 5.063 ( +0.04%) [ +0.00% +0.20% +0.14% / +0.24% +0.53% +0.04%] index_select spread : Elapsed 0.051 ms (5.061 ms / 100) 5.050 -> 5.068 ( +0.36%) [ +0.00% +0.36% +0.12% / +0.46% +0.51% +0.36%] index_select strided 3 : Elapsed 0.051 ms (5.050 ms / 100) 5.066 -> 5.072 ( +0.12%) [ +0.06% +0.00% +0.10% / +0.12% +0.26% +0.12%] index_select random : Elapsed 0.051 ms (5.069 ms / 100) 5.044 -> 5.051 ( +0.14%) [ +0.28% +0.22% +0.00% / +0.14% +0.48% +0.32%] index_select random_sorted : Elapsed 0.051 ms (5.058 ms / 100) B = [20, 16, 40, 5] (stride (1, 4000, 100, 20)) A = [20, 4, 40, 5] (stride (40, 800, 1, 3200)) dim = 1 2.383 -> 2.386 ( +0.13%) [ +0.17% +0.13% +0.00% / +0.13% +0.17% +0.25%] index_add_ linear : Elapsed 0.024 ms (2.387 ms / 100) 2.346 -> 2.350 ( +0.17%) [ +0.34% +0.00% +0.00% / +0.26% +0.17% +0.17%] index_copy_ linear : Elapsed 0.024 ms (2.354 ms / 100) 2.382 -> 2.384 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.13% +0.21%] index_add_ reverse : Elapsed 0.024 ms (2.382 ms / 100) 2.343 -> 2.342 ( -0.04%) [ +0.21% +0.00% +0.00% / -0.04% +0.13% +0.13%] index_copy_ reverse : Elapsed 0.023 ms (2.348 ms / 100) 2.380 -> 2.381 ( +0.04%) [ +0.17% +0.29% +0.00% / +0.29% +0.08% +0.04%] index_add_ spread : Elapsed 0.024 ms (2.384 ms / 100) 2.340 -> 2.342 ( +0.09%) [ +0.00% +0.13% +0.17% / +0.09% +0.26% +0.51%] index_copy_ spread : Elapsed 0.023 ms (2.340 ms / 100) 2.389 -> 2.391 ( +0.08%) [ +0.04% +0.17% +0.00% / +0.29% +0.08% +0.33%] index_add_ strided 3 : Elapsed 0.024 ms (2.390 ms / 100) 2.345 -> 2.347 ( +0.09%) [ +0.00% +0.43% +0.21% / +0.30% +0.09% +0.34%] index_copy_ strided 3 : Elapsed 0.023 ms (2.345 ms / 100) 2.384 -> 2.389 ( +0.21%) [ +0.21% +0.00% +0.08% / +0.21% +0.21% +0.25%] index_add_ strided 5 : Elapsed 0.024 ms (2.389 ms / 100) 2.342 -> 2.344 ( +0.09%) [ +0.00% +0.30% +0.34% / +0.51% +0.09% +0.17%] index_copy_ strided 5 : Elapsed 0.023 ms (2.342 ms / 100) 2.383 -> 2.382 ( -0.04%) [ +0.13% +0.00% +0.00% / +0.13% +0.04% -0.04%] index_add_ strided 7 : Elapsed 0.024 ms (2.386 ms / 100) 2.344 -> 2.344 ( +0.00%) [ +0.09% +0.00% +0.21% / +0.00% +0.17% +0.00%] index_copy_ strided 7 : Elapsed 0.023 ms (2.346 ms / 100) 2.385 -> 2.384 ( -0.04%) [ +0.13% +0.08% +0.00% / +0.13% -0.04% +0.13%] index_add_ perm : Elapsed 0.024 ms (2.388 ms / 100) 2.343 -> 2.342 ( -0.04%) [ +0.04% +0.17% +0.00% / +0.34% -0.04% -0.04%] index_copy_ perm : Elapsed 0.023 ms (2.344 ms / 100) 2.381 -> 2.382 ( +0.04%) [ +0.21% +0.13% +0.00% / +0.04% +0.17% +0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.386 ms / 100) 2.346 -> 2.348 ( +0.09%) [ +0.00% +0.17% +0.13% / +0.09% +0.09% +0.13%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.346 ms / 100) 4.948 -> 4.946 ( -0.04%) [ +0.00% +0.02% +0.02% / -0.04% +0.40% +0.42%] index_select const : Elapsed 0.049 ms (4.948 ms / 100) 5.009 -> 5.010 ( +0.02%) [ +0.00% +0.04% +0.02% / +0.02% +0.12% +0.24%] index_select wrap : Elapsed 0.050 ms (5.009 ms / 100) 5.014 -> 5.011 ( -0.06%) [ +0.00% +0.02% +0.00% / -0.06% +0.10% +0.14%] index_select linear : Elapsed 0.050 ms (5.014 ms / 100) 4.986 -> 4.988 ( +0.04%) [ +0.14% +0.00% +0.08% / +0.04% +0.40% +0.46%] index_select reverse : Elapsed 0.050 ms (4.993 ms / 100) 4.946 -> 4.950 ( +0.08%) [ +0.10% +0.00% +0.14% / +0.08% +0.22% +0.26%] index_select skip64 : Elapsed 0.050 ms (4.951 ms / 100) 4.947 -> 4.943 ( -0.08%) [ +0.00% +0.02% +0.00% / -0.08% +0.22% +0.18%] index_select skip256 : Elapsed 0.049 ms (4.947 ms / 100) 4.997 -> 5.002 ( +0.10%) [ +0.10% +0.12% +0.00% / +0.10% +0.32% +0.32%] index_select spread : Elapsed 0.050 ms (5.002 ms / 100) 5.050 -> 5.049 ( -0.02%) [ +0.06% +0.00% +0.00% / +0.02% +0.06% -0.02%] index_select strided 3 : Elapsed 0.051 ms (5.053 ms / 100) 5.007 -> 5.010 ( +0.06%) [ +0.06% +0.04% +0.00% / +0.06% +0.26% +0.26%] index_select random : Elapsed 0.050 ms (5.010 ms / 100) 5.012 -> 5.012 ( +0.00%) [ +0.20% +0.06% +0.00% / +0.00% +0.30% +0.24%] index_select random_sorted : Elapsed 0.050 ms (5.022 ms / 100) B = [20, 16, 40, 5] (stride (40, 4000, 1, 800)) A = [20, 4, 40, 5] (stride (800, 200, 1, 40)) dim = 1 2.489 -> 2.487 ( -0.08%) [ +0.00% +0.32% +0.04% / -0.08% +0.40% +0.44%] index_add_ linear : Elapsed 0.025 ms (2.489 ms / 100) 2.434 -> 2.439 ( +0.21%) [ +0.00% +0.08% +0.16% / +0.21% +0.33% +0.45%] index_copy_ linear : Elapsed 0.024 ms (2.434 ms / 100) 2.488 -> 2.488 ( +0.00%) [ +0.08% +0.00% +0.16% / +0.00% +0.32% +0.32%] index_add_ reverse : Elapsed 0.025 ms (2.490 ms / 100) 2.438 -> 2.437 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.62% +0.37%] index_copy_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.486 -> 2.488 ( +0.08%) [ +0.12% +0.00% +0.32% / +0.08% +0.72% +0.56%] index_add_ spread : Elapsed 0.025 ms (2.489 ms / 100) 2.441 -> 2.439 ( -0.08%) [ +0.08% +0.00% +0.20% / -0.08% +0.29% +0.25%] index_copy_ spread : Elapsed 0.024 ms (2.443 ms / 100) 2.488 -> 2.487 ( -0.04%) [ +0.00% +0.04% +0.12% / -0.04% +0.32% +0.52%] index_add_ strided 3 : Elapsed 0.025 ms (2.488 ms / 100) 2.436 -> 2.437 ( +0.04%) [ +0.12% +0.21% +0.00% / +0.04% +0.37% +0.49%] index_copy_ strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.484 -> 2.487 ( +0.12%) [ +0.20% +0.00% +0.08% / +0.12% +0.44% +0.64%] index_add_ strided 5 : Elapsed 0.025 ms (2.489 ms / 100) 2.437 -> 2.438 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.04% +0.29% +0.25%] index_copy_ strided 5 : Elapsed 0.024 ms (2.438 ms / 100) 2.489 -> 2.493 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.36% +0.40%] index_add_ strided 7 : Elapsed 0.025 ms (2.493 ms / 100) 2.443 -> 2.445 ( +0.08%) [ +0.12% +0.00% +0.00% / +0.08% +0.33% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.446 ms / 100) 2.487 -> 2.487 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.16% +0.16%] index_add_ perm : Elapsed 0.025 ms (2.489 ms / 100) 2.435 -> 2.434 ( -0.04%) [ +0.00% +0.21% +0.00% / -0.04% +0.21% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.435 ms / 100) 2.481 -> 2.485 ( +0.16%) [ +0.00% +0.04% +0.24% / +0.16% +0.36% +0.48%] index_add_ perm_sorted : Elapsed 0.025 ms (2.481 ms / 100) 2.433 -> 2.433 ( +0.00%) [ +0.08% +0.00% +0.12% / +0.00% +0.53% +0.41%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.435 ms / 100) 5.336 -> 5.348 ( +0.22%) [ +0.00% +0.11% +0.22% / +0.22% +0.56% +0.56%] index_select const : Elapsed 0.053 ms (5.336 ms / 100) 5.388 -> 5.388 ( +0.00%) [ +0.22% +0.20% +0.00% / +0.00% +0.46% +0.50%] index_select wrap : Elapsed 0.054 ms (5.400 ms / 100) 5.419 -> 5.425 ( +0.11%) [ +0.00% +0.09% +0.13% / +0.11% +0.41% +0.52%] index_select linear : Elapsed 0.054 ms (5.419 ms / 100) 5.390 -> 5.392 ( +0.04%) [ +0.06% +0.06% +0.00% / +0.04% +0.30% +0.39%] index_select reverse : Elapsed 0.054 ms (5.393 ms / 100) 5.378 -> 5.388 ( +0.19%) [ +0.04% +0.00% +0.06% / +0.19% +0.22% +0.20%] index_select skip64 : Elapsed 0.054 ms (5.380 ms / 100) 5.361 -> 5.366 ( +0.09%) [ +0.09% +0.07% +0.00% / +0.09% +0.34% +0.28%] index_select skip256 : Elapsed 0.054 ms (5.366 ms / 100) 5.392 -> 5.386 ( -0.11%) [ +0.15% +0.00% +0.22% / -0.11% +0.43% +0.43%] index_select spread : Elapsed 0.054 ms (5.400 ms / 100) 5.408 -> 5.418 ( +0.18%) [ +0.20% +0.00% +0.15% / +0.18% +0.28% +0.20%] index_select strided 3 : Elapsed 0.054 ms (5.419 ms / 100) 5.419 -> 5.417 ( -0.04%) [ +0.06% +0.00% +0.02% / -0.04% +0.13% +0.20%] index_select random : Elapsed 0.054 ms (5.422 ms / 100) 5.383 -> 5.391 ( +0.15%) [ +0.06% +0.06% +0.00% / +0.15% +0.39% +0.33%] index_select random_sorted : Elapsed 0.054 ms (5.386 ms / 100) B = [20, 16, 40, 5] (stride (5, 100, 1600, 1)) A = [20, 4, 40, 5] (stride (20, 1, 400, 4)) dim = 1 2.506 -> 2.509 ( +0.12%) [ +0.04% +0.00% +0.08% / +0.12% +0.40% +0.24%] index_add_ linear : Elapsed 0.025 ms (2.507 ms / 100) 2.453 -> 2.456 ( +0.12%) [ +0.00% +0.16% +0.29% / +0.33% +0.20% +0.12%] index_copy_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.508 -> 2.505 ( -0.12%) [ +0.12% +0.08% +0.00% / -0.12% +0.12% +0.16%] index_add_ reverse : Elapsed 0.025 ms (2.511 ms / 100) 2.453 -> 2.458 ( +0.20%) [ +0.12% +0.00% +0.20% / +0.20% +0.20% +0.29%] index_copy_ reverse : Elapsed 0.025 ms (2.456 ms / 100) 2.507 -> 2.510 ( +0.12%) [ +0.00% +0.20% +0.36% / +0.12% +0.16% +0.32%] index_add_ spread : Elapsed 0.025 ms (2.507 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.08% +0.04% +0.08%] index_copy_ spread : Elapsed 0.025 ms (2.461 ms / 100) 2.511 -> 2.509 ( -0.08%) [ +0.40% +0.00% +0.20% / -0.08% +0.16% +0.00%] index_add_ strided 3 : Elapsed 0.025 ms (2.521 ms / 100) 2.462 -> 2.457 ( -0.20%) [ +0.00% +0.00% +0.00% / -0.20% -0.04% -0.12%] index_copy_ strided 3 : Elapsed 0.025 ms (2.462 ms / 100) 2.508 -> 2.510 ( +0.08%) [ +0.00% +0.28% +0.08% / +0.08% +0.36% +0.40%] index_add_ strided 5 : Elapsed 0.025 ms (2.508 ms / 100) 2.460 -> 2.458 ( -0.08%) [ +0.12% +0.00% +0.12% / -0.08% +0.12% +0.20%] index_copy_ strided 5 : Elapsed 0.025 ms (2.463 ms / 100) 2.509 -> 2.507 ( -0.08%) [ +0.04% +0.00% +0.04% / -0.08% +0.16% +0.36%] index_add_ strided 7 : Elapsed 0.025 ms (2.510 ms / 100) 2.456 -> 2.458 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.08% +0.29% +0.16%] index_copy_ strided 7 : Elapsed 0.025 ms (2.457 ms / 100) 2.506 -> 2.507 ( +0.04%) [ +0.40% +0.00% +0.04% / +0.20% +0.24% +0.04%] index_add_ perm : Elapsed 0.025 ms (2.516 ms / 100) 2.453 -> 2.458 ( +0.20%) [ +0.16% +0.16% +0.00% / +0.41% +0.20% +0.24%] index_copy_ perm : Elapsed 0.025 ms (2.457 ms / 100) 2.508 -> 2.511 ( +0.12%) [ +0.00% +0.16% +0.24% / +0.12% +0.36% +0.32%] index_add_ perm_sorted : Elapsed 0.025 ms (2.508 ms / 100) 2.457 -> 2.461 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +0.16% +0.20%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.457 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.00% +0.13% +0.24% / +0.00% +0.38% +0.31%] index_select const : Elapsed 0.055 ms (5.491 ms / 100) 5.496 -> 5.502 ( +0.11%) [ +0.11% +0.20% +0.00% / +0.11% +0.33% +0.25%] index_select wrap : Elapsed 0.055 ms (5.502 ms / 100) 5.499 -> 5.502 ( +0.05%) [ +0.05% +0.04% +0.00% / +0.05% +0.31% +0.25%] index_select linear : Elapsed 0.055 ms (5.502 ms / 100) 5.494 -> 5.497 ( +0.05%) [ +0.24% +0.09% +0.00% / +0.05% +0.31% +0.47%] index_select reverse : Elapsed 0.055 ms (5.507 ms / 100) 5.499 -> 5.497 ( -0.04%) [ +0.13% +0.00% +0.09% / -0.04% +0.36% +0.40%] index_select skip64 : Elapsed 0.055 ms (5.506 ms / 100) 5.502 -> 5.503 ( +0.02%) [ +0.00% +0.04% +0.07% / +0.02% +0.27% +0.24%] index_select skip256 : Elapsed 0.055 ms (5.502 ms / 100) 5.497 -> 5.494 ( -0.05%) [ +0.07% +0.02% +0.00% / -0.05% +0.25% +0.45%] index_select spread : Elapsed 0.055 ms (5.501 ms / 100) 5.496 -> 5.498 ( +0.04%) [ +0.16% +0.16% +0.00% / +0.04% +0.38% +0.22%] index_select strided 3 : Elapsed 0.055 ms (5.505 ms / 100) 5.501 -> 5.503 ( +0.04%) [ +0.00% +0.13% +0.15% / +0.04% +0.31% +0.16%] index_select random : Elapsed 0.055 ms (5.501 ms / 100) 5.497 -> 5.501 ( +0.07%) [ +0.15% +0.00% +0.18% / +0.07% +0.33% +0.36%] index_select random_sorted : Elapsed 0.055 ms (5.505 ms / 100) B = [20, 16, 40, 5] (stride (1, 20, 1600, 320)) A = [20, 4, 40, 5] (stride (4, 1, 80, 3200)) dim = 1 2.497 -> 2.497 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.60% +0.52%] index_add_ linear : Elapsed 0.025 ms (2.499 ms / 100) 2.445 -> 2.451 ( +0.25%) [ +0.41% +0.00% +0.20% / +0.25% +0.65% +0.65%] index_copy_ linear : Elapsed 0.025 ms (2.455 ms / 100) 2.503 -> 2.507 ( +0.16%) [ +0.20% +0.00% +0.20% / +0.16% +0.92% +0.48%] index_add_ reverse : Elapsed 0.025 ms (2.508 ms / 100) 2.458 -> 2.461 ( +0.12%) [ +0.04% +0.00% +0.00% / +0.12% +0.41% +0.45%] index_copy_ reverse : Elapsed 0.025 ms (2.459 ms / 100) 2.484 -> 2.485 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.68% +0.60%] index_add_ spread : Elapsed 0.025 ms (2.486 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.20% +0.00% +0.00% / +0.04% +0.41% +0.49%] index_copy_ spread : Elapsed 0.025 ms (2.459 ms / 100) 2.472 -> 2.478 ( +0.24%) [ +0.20% +0.16% +0.00% / +0.24% +0.93% +0.81%] index_add_ strided 3 : Elapsed 0.025 ms (2.477 ms / 100) 2.442 -> 2.444 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.66% +0.53%] index_copy_ strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.505 -> 2.506 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.40% +0.32%] index_add_ strided 5 : Elapsed 0.025 ms (2.506 ms / 100) 2.462 -> 2.469 ( +0.28%) [ +0.32% +0.00% +0.37% / +0.28% +0.32% +0.41%] index_copy_ strided 5 : Elapsed 0.025 ms (2.470 ms / 100) 2.496 -> 2.496 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.44% +0.52%] index_add_ strided 7 : Elapsed 0.025 ms (2.496 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.00% +0.20% +0.12% / +0.00% +0.41% +0.24%] index_copy_ strided 7 : Elapsed 0.025 ms (2.460 ms / 100) 2.487 -> 2.489 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.36% +0.32%] index_add_ perm : Elapsed 0.025 ms (2.490 ms / 100) 2.453 -> 2.460 ( +0.29%) [ +0.00% +0.16% +0.04% / +0.29% +0.49% +0.41%] index_copy_ perm : Elapsed 0.025 ms (2.453 ms / 100) 2.486 -> 2.492 ( +0.24%) [ +0.36% +0.24% +0.00% / +0.24% +0.52% +0.36%] index_add_ perm_sorted : Elapsed 0.025 ms (2.495 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.41% +0.53%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.455 ms / 100) 5.590 -> 5.589 ( -0.02%) [ +0.21% +0.00% +0.21% / -0.02% +0.47% +0.54%] index_select const : Elapsed 0.056 ms (5.602 ms / 100) 5.578 -> 5.584 ( +0.11%) [ +0.41% +0.43% +0.00% / +0.11% +0.70% +0.50%] index_select wrap : Elapsed 0.056 ms (5.601 ms / 100) 5.585 -> 5.588 ( +0.05%) [ +0.00% +0.27% +0.30% / +0.05% +0.61% +0.52%] index_select linear : Elapsed 0.056 ms (5.585 ms / 100) 5.583 -> 5.584 ( +0.02%) [ +0.29% +0.00% +0.05% / +0.02% +0.47% +0.50%] index_select reverse : Elapsed 0.056 ms (5.599 ms / 100) 5.588 -> 5.606 ( +0.32%) [ +0.00% +0.07% +0.21% / +0.32% +0.64% +0.47%] index_select skip64 : Elapsed 0.056 ms (5.588 ms / 100) 5.591 -> 5.597 ( +0.11%) [ +0.00% +0.00% +0.04% / +0.11% +0.47% +0.21%] index_select skip256 : Elapsed 0.056 ms (5.591 ms / 100) 5.588 -> 5.588 ( +0.00%) [ +0.00% +0.09% +0.16% / +0.00% +0.38% +0.41%] index_select spread : Elapsed 0.056 ms (5.588 ms / 100) 5.593 -> 5.601 ( +0.14%) [ +0.00% +0.02% +0.02% / +0.14% +0.23% +0.30%] index_select strided 3 : Elapsed 0.056 ms (5.593 ms / 100) 5.584 -> 5.574 ( -0.18%) [ +0.16% +0.05% +0.00% / -0.18% +0.29% +0.39%] index_select random : Elapsed 0.056 ms (5.593 ms / 100) 5.582 -> 5.586 ( +0.07%) [ +0.29% +0.00% +0.41% / +0.07% +0.68% +0.54%] index_select random_sorted : Elapsed 0.056 ms (5.598 ms / 100) B = [20, 16, 40, 5] (stride (40, 800, 1, 12800)) A = [20, 4, 40, 5] (stride (800, 200, 1, 40)) dim = 1 2.493 -> 2.495 ( +0.08%) [ +0.12% +0.00% +0.16% / +0.08% +0.36% +0.32%] index_add_ linear : Elapsed 0.025 ms (2.496 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.16% +0.12% +0.08%] index_copy_ linear : Elapsed 0.024 ms (2.445 ms / 100) 2.492 -> 2.492 ( +0.00%) [ +0.16% +0.12% +0.00% / +0.00% +0.24% +0.20%] index_add_ reverse : Elapsed 0.025 ms (2.496 ms / 100) 2.443 -> 2.445 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.12% +0.08% +0.12%] index_copy_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.492 -> 2.488 ( -0.16%) [ +0.04% +0.00% +0.16% / +0.00% -0.08% -0.16%] index_add_ spread : Elapsed 0.025 ms (2.493 ms / 100) 2.440 -> 2.439 ( -0.04%) [ +0.00% +0.12% +0.57% / -0.04% -0.04% -0.04%] index_copy_ spread : Elapsed 0.024 ms (2.440 ms / 100) 2.493 -> 2.493 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.04% +0.24% +0.00%] index_add_ strided 3 : Elapsed 0.025 ms (2.496 ms / 100) 2.443 -> 2.440 ( -0.12%) [ +0.08% +0.08% +0.00% / -0.04% +0.08% -0.12%] index_copy_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.496 -> 2.498 ( +0.08%) [ +0.00% +0.00% +0.12% / +0.08% +0.16% +0.12%] index_add_ strided 5 : Elapsed 0.025 ms (2.496 ms / 100) 2.445 -> 2.449 ( +0.16%) [ +0.29% +0.12% +0.00% / +0.33% +0.25% +0.16%] index_copy_ strided 5 : Elapsed 0.025 ms (2.452 ms / 100) 2.487 -> 2.490 ( +0.12%) [ +0.00% +0.08% +0.28% / +0.12% +0.20% +0.12%] index_add_ strided 7 : Elapsed 0.025 ms (2.487 ms / 100) 2.435 -> 2.438 ( +0.12%) [ +0.25% +0.00% +0.33% / +0.12% +0.29% +0.16%] index_copy_ strided 7 : Elapsed 0.024 ms (2.441 ms / 100) 2.497 -> 2.493 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.12% +0.16%] index_add_ perm : Elapsed 0.025 ms (2.497 ms / 100) 2.444 -> 2.441 ( -0.12%) [ +0.00% +0.04% +0.00% / -0.12% +0.08% +0.00%] index_copy_ perm : Elapsed 0.024 ms (2.444 ms / 100) 2.490 -> 2.492 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.12% +0.08% +0.28%] index_add_ perm_sorted : Elapsed 0.025 ms (2.491 ms / 100) 2.439 -> 2.444 ( +0.21%) [ +0.00% +0.21% +0.04% / +0.37% +0.45% +0.21%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.439 ms / 100) 5.354 -> 5.357 ( +0.06%) [ +0.24% +0.09% +0.00% / +0.06% +0.39% +0.39%] index_select const : Elapsed 0.054 ms (5.367 ms / 100) 5.401 -> 5.403 ( +0.04%) [ +0.04% +0.02% +0.00% / +0.04% +0.11% +0.15%] index_select wrap : Elapsed 0.054 ms (5.403 ms / 100) 5.410 -> 5.411 ( +0.02%) [ +0.00% +0.00% +0.06% / +0.02% +0.07% +0.07%] index_select linear : Elapsed 0.054 ms (5.410 ms / 100) 5.391 -> 5.388 ( -0.06%) [ +0.17% +0.02% +0.00% / -0.06% +0.46% +0.35%] index_select reverse : Elapsed 0.054 ms (5.400 ms / 100) 5.353 -> 5.353 ( +0.00%) [ +0.19% +0.00% +0.13% / +0.00% +0.19% +0.54%] index_select skip64 : Elapsed 0.054 ms (5.363 ms / 100) 5.355 -> 5.367 ( +0.22%) [ +0.21% +0.00% +0.06% / +0.24% +0.41% +0.22%] index_select skip256 : Elapsed 0.054 ms (5.366 ms / 100) 5.385 -> 5.396 ( +0.20%) [ +0.20% +0.00% +0.06% / +0.20% +0.39% +0.35%] index_select spread : Elapsed 0.054 ms (5.396 ms / 100) 5.405 -> 5.408 ( +0.06%) [ +0.00% +0.09% +0.11% / +0.06% +0.19% +0.26%] index_select strided 3 : Elapsed 0.054 ms (5.405 ms / 100) 5.400 -> 5.400 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.28% +0.19%] index_select random : Elapsed 0.054 ms (5.400 ms / 100) 5.421 -> 5.418 ( -0.06%) [ +0.00% +0.02% +0.09% / -0.06% +0.30% +0.35%] index_select random_sorted : Elapsed 0.054 ms (5.421 ms / 100) out_shape = [20, 4, 16, 5] in_shape = [20, 4, 40, 5] idx_dim = 2 B = [20, 4, 16, 5] (stride (320, 5, 20, 1)) dim = 2 fill_cnt = 40 1.796 -> 1.793 ( -0.17%) [ +0.00% +0.11% +0.06% / -0.11% +0.17% -0.17%] index_fill_ const : Elapsed 0.018 ms (1.796 ms / 100) 1.799 -> 1.801 ( +0.11%) [ +0.22% +0.22% +0.00% / +0.11% +0.33% +0.17%] index_fill_ linear : Elapsed 0.018 ms (1.803 ms / 100) 1.790 -> 1.791 ( +0.06%) [ +0.22% +0.00% +0.22% / +0.06% +0.28% +0.39%] index_fill_ reverse : Elapsed 0.018 ms (1.794 ms / 100) 1.796 -> 1.794 ( -0.11%) [ +0.22% +0.00% +0.11% / -0.11% +0.17% -0.06%] index_fill_ skip64 : Elapsed 0.018 ms (1.800 ms / 100) 1.794 -> 1.796 ( +0.11%) [ +0.22% +0.00% +0.00% / +0.22% +0.11% +0.28%] index_fill_ skip256 : Elapsed 0.018 ms (1.798 ms / 100) 1.795 -> 1.796 ( +0.06%) [ +0.11% +0.22% +0.00% / +0.06% +0.39% +0.06%] index_fill_ spread : Elapsed 0.018 ms (1.797 ms / 100) 1.799 -> 1.799 ( +0.00%) [ +0.11% +0.22% +0.00% / +0.00% +0.17% +0.28%] index_fill_ strided 3 : Elapsed 0.018 ms (1.801 ms / 100) 1.801 -> 1.802 ( +0.06%) [ +0.22% +0.33% +0.00% / +0.06% +0.33% +0.17%] index_fill_ strided 5 : Elapsed 0.018 ms (1.805 ms / 100) 1.802 -> 1.802 ( +0.00%) [ +0.00% +0.06% +0.11% / +0.00% +0.00% +0.11%] index_fill_ strided 7 : Elapsed 0.018 ms (1.802 ms / 100) 1.797 -> 1.794 ( -0.17%) [ +0.06% +0.11% +0.00% / -0.17% +0.06% -0.06%] index_fill_ strided 8 : Elapsed 0.018 ms (1.798 ms / 100) 1.802 -> 1.802 ( +0.00%) [ +0.11% +0.06% +0.00% / +0.00% +0.22% +0.22%] index_fill_ random : Elapsed 0.018 ms (1.804 ms / 100) 1.798 -> 1.800 ( +0.11%) [ +0.11% +0.22% +0.00% / +0.33% +0.11% +0.22%] index_fill_ random_sorted : Elapsed 0.018 ms (1.800 ms / 100) B = [20, 4, 16, 5] (stride (320, 5, 20, 1)) A = [20, 4, 40, 5] (stride (4, 1, 80, 3200)) dim = 2 3.610 -> 3.610 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.42% +0.39%] index_select const : Elapsed 0.036 ms (3.611 ms / 100) 3.609 -> 3.614 ( +0.14%) [ +0.08% +0.03% +0.00% / +0.14% +0.42% +0.53%] index_select wrap : Elapsed 0.036 ms (3.612 ms / 100) 3.593 -> 3.596 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.08% +0.58% +0.64%] index_select linear : Elapsed 0.036 ms (3.596 ms / 100) 3.591 -> 3.592 ( +0.03%) [ +0.00% +0.00% +0.08% / +0.03% +0.39% +0.39%] index_select reverse : Elapsed 0.036 ms (3.591 ms / 100) 3.609 -> 3.609 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.64% +0.64%] index_select skip64 : Elapsed 0.036 ms (3.611 ms / 100) 3.608 -> 3.611 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.64% +0.64%] index_select skip256 : Elapsed 0.036 ms (3.609 ms / 100) 3.597 -> 3.597 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.64% +0.42%] index_select spread : Elapsed 0.036 ms (3.602 ms / 100) 3.620 -> 3.627 ( +0.19%) [ +0.03% +0.19% +0.00% / +0.19% +0.33% +0.47%] index_select strided 3 : Elapsed 0.036 ms (3.621 ms / 100) 3.584 -> 3.585 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.50% +0.50%] index_select strided 5 : Elapsed 0.036 ms (3.586 ms / 100) 3.591 -> 3.593 ( +0.06%) [ +0.00% +0.03% +0.06% / +0.06% +0.28% +0.56%] index_select strided 7 : Elapsed 0.036 ms (3.591 ms / 100) 3.613 -> 3.613 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.39% +0.42%] index_select strided 8 : Elapsed 0.036 ms (3.614 ms / 100) 3.615 -> 3.615 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.33% +0.41%] index_select strided 16 : Elapsed 0.036 ms (3.616 ms / 100) 3.591 -> 3.591 ( +0.00%) [ +0.00% +0.11% +0.06% / +0.00% +0.45% +0.47%] index_select random : Elapsed 0.036 ms (3.591 ms / 100) 3.584 -> 3.585 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.53% +0.53%] index_select random_sorted : Elapsed 0.036 ms (3.585 ms / 100) 3.590 -> 3.591 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.33% +0.36%] index_select perm : Elapsed 0.036 ms (3.590 ms / 100) 3.597 -> 3.597 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.53% +0.53%] index_select perm_sorted : Elapsed 0.036 ms (3.598 ms / 100) B = [20, 4, 16, 5] (stride (1, 1600, 100, 20)) A = [20, 4, 40, 5] (stride (20, 1, 400, 4)) dim = 2 3.829 -> 3.832 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.65% +0.65%] index_select const : Elapsed 0.038 ms (3.829 ms / 100) 3.810 -> 3.810 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.66% +0.68%] index_select wrap : Elapsed 0.038 ms (3.810 ms / 100) 3.813 -> 3.815 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.68% +0.68%] index_select linear : Elapsed 0.038 ms (3.814 ms / 100) 3.807 -> 3.808 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.84% +0.81%] index_select reverse : Elapsed 0.038 ms (3.807 ms / 100) 3.821 -> 3.829 ( +0.21%) [ +0.16% +0.13% +0.00% / +0.21% +0.84% +0.76%] index_select skip64 : Elapsed 0.038 ms (3.827 ms / 100) 3.827 -> 3.831 ( +0.10%) [ +0.13% +0.03% +0.00% / +0.10% +0.86% +0.84%] index_select skip256 : Elapsed 0.038 ms (3.832 ms / 100) 3.802 -> 3.803 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.76% +0.76%] index_select spread : Elapsed 0.038 ms (3.802 ms / 100) 3.811 -> 3.812 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.63% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.811 ms / 100) 3.813 -> 3.813 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_select strided 5 : Elapsed 0.038 ms (3.814 ms / 100) 3.803 -> 3.803 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.71% +0.76%] index_select strided 7 : Elapsed 0.038 ms (3.805 ms / 100) 3.818 -> 3.818 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.97% +0.84%] index_select strided 8 : Elapsed 0.038 ms (3.818 ms / 100) 3.801 -> 3.803 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.71% +0.74%] index_select strided 16 : Elapsed 0.038 ms (3.802 ms / 100) 3.817 -> 3.819 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.79% +0.81%] index_select random : Elapsed 0.038 ms (3.818 ms / 100) 3.807 -> 3.806 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.79% +0.79%] index_select random_sorted : Elapsed 0.038 ms (3.808 ms / 100) 3.809 -> 3.809 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.79% +0.81%] index_select perm : Elapsed 0.038 ms (3.812 ms / 100) 3.816 -> 3.815 ( -0.03%) [ +0.05% +0.05% +0.00% / -0.03% +0.79% +0.86%] index_select perm_sorted : Elapsed 0.038 ms (3.818 ms / 100) B = [20, 4, 16, 5] (stride (1, 100, 400, 20)) A = [20, 4, 40, 5] (stride (200, 4000, 5, 1)) dim = 2 3.917 -> 3.923 ( +0.15%) [ +0.13% +0.00% +0.18% / +0.15% +0.38% +0.41%] index_select const : Elapsed 0.039 ms (3.922 ms / 100) 3.943 -> 3.948 ( +0.13%) [ +0.10% +0.03% +0.00% / +0.13% +0.58% +0.48%] index_select wrap : Elapsed 0.039 ms (3.947 ms / 100) 3.928 -> 3.936 ( +0.20%) [ +0.00% +0.00% +0.33% / +0.20% +0.51% +0.51%] index_select linear : Elapsed 0.039 ms (3.928 ms / 100) 3.937 -> 3.938 ( +0.03%) [ +0.05% +0.20% +0.00% / +0.03% +0.53% +0.61%] index_select reverse : Elapsed 0.039 ms (3.939 ms / 100) 3.920 -> 3.919 ( -0.03%) [ +0.05% +0.00% +0.00% / -0.03% +0.41% +0.48%] index_select skip64 : Elapsed 0.039 ms (3.922 ms / 100) 3.916 -> 3.921 ( +0.13%) [ +0.15% +0.10% +0.00% / +0.13% +0.54% +0.54%] index_select skip256 : Elapsed 0.039 ms (3.922 ms / 100) 3.921 -> 3.922 ( +0.03%) [ +0.00% +0.00% +0.15% / +0.03% +0.41% +0.38%] index_select spread : Elapsed 0.039 ms (3.921 ms / 100) 3.922 -> 3.919 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.36% +0.38%] index_select strided 3 : Elapsed 0.039 ms (3.922 ms / 100) 3.918 -> 3.923 ( +0.13%) [ +0.05% +0.00% +0.13% / +0.13% +0.28% +0.36%] index_select strided 5 : Elapsed 0.039 ms (3.920 ms / 100) 3.922 -> 3.932 ( +0.25%) [ +0.05% +0.15% +0.00% / +0.25% +0.41% +0.48%] index_select strided 7 : Elapsed 0.039 ms (3.924 ms / 100) 3.927 -> 3.926 ( -0.03%) [ +0.36% +0.00% +0.33% / -0.03% +0.64% +0.41%] index_select strided 8 : Elapsed 0.039 ms (3.941 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.00% +0.13% +0.08% / +0.05% +0.46% +0.41%] index_select strided 16 : Elapsed 0.039 ms (3.919 ms / 100) 3.924 -> 3.924 ( +0.00%) [ +0.03% +0.13% +0.00% / +0.00% +0.46% +0.33%] index_select random : Elapsed 0.039 ms (3.925 ms / 100) 3.923 -> 3.927 ( +0.10%) [ +0.08% +0.05% +0.00% / +0.10% +0.38% +0.51%] index_select random_sorted : Elapsed 0.039 ms (3.926 ms / 100) 3.926 -> 3.934 ( +0.20%) [ +0.00% +0.03% +0.20% / +0.20% +0.43% +0.36%] index_select perm : Elapsed 0.039 ms (3.926 ms / 100) 3.938 -> 3.943 ( +0.13%) [ +0.00% +0.13% +0.05% / +0.13% +0.53% +0.51%] index_select perm_sorted : Elapsed 0.039 ms (3.938 ms / 100) B = [20, 4, 16, 5] (stride (4, 1, 400, 80)) A = [20, 4, 40, 5] (stride (1, 4000, 20, 800)) dim = 2 3.812 -> 3.812 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_select const : Elapsed 0.038 ms (3.813 ms / 100) 3.798 -> 3.802 ( +0.11%) [ +0.05% +0.08% +0.00% / +0.11% +0.74% +0.76%] index_select wrap : Elapsed 0.038 ms (3.800 ms / 100) 3.789 -> 3.788 ( -0.03%) [ +0.00% +0.13% +0.11% / -0.03% +0.74% +0.58%] index_select linear : Elapsed 0.038 ms (3.789 ms / 100) 3.807 -> 3.810 ( +0.08%) [ +0.18% +0.00% +0.16% / +0.08% +0.76% +0.92%] index_select reverse : Elapsed 0.038 ms (3.814 ms / 100) 3.795 -> 3.796 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.84% +0.82%] index_select skip64 : Elapsed 0.038 ms (3.797 ms / 100) 3.811 -> 3.811 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.68% +0.71%] index_select skip256 : Elapsed 0.038 ms (3.812 ms / 100) 3.811 -> 3.814 ( +0.08%) [ +0.16% +0.13% +0.00% / +0.08% +0.87% +0.87%] index_select spread : Elapsed 0.038 ms (3.817 ms / 100) 3.803 -> 3.800 ( -0.08%) [ +0.00% +0.03% +0.00% / -0.08% +0.55% +0.66%] index_select strided 3 : Elapsed 0.038 ms (3.803 ms / 100) 3.781 -> 3.781 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.61% +0.61%] index_select strided 5 : Elapsed 0.038 ms (3.781 ms / 100) 3.791 -> 3.795 ( +0.11%) [ +0.16% +0.11% +0.00% / +0.11% +0.87% +0.74%] index_select strided 7 : Elapsed 0.038 ms (3.797 ms / 100) 3.778 -> 3.778 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.71% +0.69%] index_select strided 8 : Elapsed 0.038 ms (3.779 ms / 100) 3.740 -> 3.742 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.78% +0.75%] index_select strided 16 : Elapsed 0.037 ms (3.741 ms / 100) 3.794 -> 3.794 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.74% +0.74%] index_select random : Elapsed 0.038 ms (3.795 ms / 100) 3.814 -> 3.817 ( +0.08%) [ +0.13% +0.00% +0.10% / +0.08% +0.89% +0.84%] index_select random_sorted : Elapsed 0.038 ms (3.819 ms / 100) 3.795 -> 3.795 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.79% +0.87%] index_select perm : Elapsed 0.038 ms (3.797 ms / 100) 3.840 -> 3.839 ( -0.03%) [ +0.05% +0.05% +0.00% / -0.03% +0.83% +0.78%] index_select perm_sorted : Elapsed 0.038 ms (3.842 ms / 100) B = [20, 4, 16, 5] (stride (4, 1, 400, 80)) A = [20, 4, 40, 5] (stride (160, 1, 4, 3200)) dim = 2 3.688 -> 3.685 ( -0.08%) [ +0.03% +0.00% +0.03% / -0.08% +0.79% +0.62%] index_select const : Elapsed 0.037 ms (3.689 ms / 100) 3.690 -> 3.690 ( +0.00%) [ +0.14% +0.00% +0.11% / +0.00% +0.33% +0.54%] index_select wrap : Elapsed 0.037 ms (3.695 ms / 100) 3.700 -> 3.702 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.59% +0.59%] index_select linear : Elapsed 0.037 ms (3.701 ms / 100) 3.691 -> 3.698 ( +0.19%) [ +0.00% +0.16% +0.19% / +0.19% +0.51% +0.43%] index_select reverse : Elapsed 0.037 ms (3.691 ms / 100) 3.683 -> 3.684 ( +0.03%) [ +0.00% +0.16% +0.19% / +0.03% +0.73% +0.60%] index_select skip64 : Elapsed 0.037 ms (3.683 ms / 100) 3.689 -> 3.699 ( +0.27%) [ +0.30% +0.16% +0.00% / +0.27% +0.41% +0.54%] index_select skip256 : Elapsed 0.037 ms (3.700 ms / 100) 3.703 -> 3.702 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.43% +0.43%] index_select spread : Elapsed 0.037 ms (3.703 ms / 100) 3.698 -> 3.702 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +0.41% +0.51%] index_select strided 3 : Elapsed 0.037 ms (3.702 ms / 100) 3.695 -> 3.694 ( -0.03%) [ +0.05% +0.00% +0.05% / -0.03% +0.51% +0.41%] index_select strided 5 : Elapsed 0.037 ms (3.697 ms / 100) 3.694 -> 3.695 ( +0.03%) [ +0.03% +0.11% +0.00% / +0.03% +0.38% +0.43%] index_select strided 7 : Elapsed 0.037 ms (3.695 ms / 100) 3.696 -> 3.699 ( +0.08%) [ +0.00% +0.05% +0.11% / +0.08% +0.41% +0.41%] index_select strided 8 : Elapsed 0.037 ms (3.696 ms / 100) 3.696 -> 3.700 ( +0.11%) [ +0.00% +0.08% +0.03% / +0.11% +0.38% +0.41%] index_select strided 16 : Elapsed 0.037 ms (3.696 ms / 100) 3.695 -> 3.702 ( +0.19%) [ +0.03% +0.00% +0.08% / +0.19% +0.22% +0.60%] index_select random : Elapsed 0.037 ms (3.696 ms / 100) 3.694 -> 3.702 ( +0.22%) [ +0.08% +0.00% +0.00% / +0.24% +0.22% +0.49%] index_select random_sorted : Elapsed 0.037 ms (3.697 ms / 100) 3.695 -> 3.697 ( +0.05%) [ +0.00% +0.00% +0.08% / +0.05% +0.49% +0.35%] index_select perm : Elapsed 0.037 ms (3.695 ms / 100) 3.693 -> 3.699 ( +0.16%) [ +0.05% +0.00% +0.22% / +0.16% +0.46% +0.46%] index_select perm_sorted : Elapsed 0.037 ms (3.695 ms / 100) B = [20, 4, 16, 5] (stride (1, 20, 400, 80)) A = [20, 4, 40, 5] (stride (1, 4000, 100, 20)) dim = 2 4.023 -> 4.024 ( +0.02%) [ +0.07% +0.00% +0.05% / +0.02% +0.77% +0.77%] index_select const : Elapsed 0.040 ms (4.026 ms / 100) 4.030 -> 4.035 ( +0.12%) [ +0.00% +0.05% +0.15% / +0.12% +0.72% +0.65%] index_select wrap : Elapsed 0.040 ms (4.030 ms / 100) 4.015 -> 4.015 ( +0.00%) [ +0.00% +0.05% +0.02% / +0.00% +0.57% +0.62%] index_select linear : Elapsed 0.040 ms (4.015 ms / 100) 4.011 -> 4.008 ( -0.07%) [ +0.00% +0.02% +0.05% / -0.07% +0.72% +0.72%] index_select reverse : Elapsed 0.040 ms (4.011 ms / 100) 4.061 -> 4.063 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +0.89% +0.86%] index_select skip64 : Elapsed 0.041 ms (4.063 ms / 100) 4.024 -> 4.022 ( -0.05%) [ +0.02% +0.02% +0.00% / -0.05% +0.75% +0.77%] index_select skip256 : Elapsed 0.040 ms (4.025 ms / 100) 4.020 -> 4.022 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.67% +0.72%] index_select spread : Elapsed 0.040 ms (4.024 ms / 100) 4.043 -> 4.044 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.59% +0.59%] index_select strided 3 : Elapsed 0.040 ms (4.043 ms / 100) 4.016 -> 4.017 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.67% +0.72%] index_select strided 5 : Elapsed 0.040 ms (4.018 ms / 100) 4.042 -> 4.041 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.64% +0.67%] index_select strided 7 : Elapsed 0.040 ms (4.043 ms / 100) 4.040 -> 4.041 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.02% +0.72% +0.74%] index_select strided 8 : Elapsed 0.040 ms (4.040 ms / 100) 4.036 -> 4.037 ( +0.02%) [ +0.07% +0.00% +0.05% / +0.02% +0.79% +0.82%] index_select strided 16 : Elapsed 0.040 ms (4.039 ms / 100) 4.029 -> 4.037 ( +0.20%) [ +0.00% +0.00% +0.00% / +0.20% +0.72% +0.74%] index_select random : Elapsed 0.040 ms (4.029 ms / 100) 4.018 -> 4.019 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.67% +0.70%] index_select random_sorted : Elapsed 0.040 ms (4.019 ms / 100) 4.033 -> 4.035 ( +0.05%) [ +0.02% +0.00% +0.10% / +0.05% +0.82% +0.92%] index_select perm : Elapsed 0.040 ms (4.034 ms / 100) 4.055 -> 4.054 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.72% +0.69%] index_select perm_sorted : Elapsed 0.041 ms (4.055 ms / 100) B = [20, 4, 16, 5] (stride (4, 1, 80, 1280)) A = [20, 4, 40, 5] (stride (1, 800, 20, 3200)) dim = 2 3.793 -> 3.793 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.69% +0.63%] index_select const : Elapsed 0.038 ms (3.794 ms / 100) 3.817 -> 3.817 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.52% +0.55%] index_select wrap : Elapsed 0.038 ms (3.817 ms / 100) 3.810 -> 3.814 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.58% +0.47%] index_select linear : Elapsed 0.038 ms (3.814 ms / 100) 3.804 -> 3.805 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.47% +0.45%] index_select reverse : Elapsed 0.038 ms (3.806 ms / 100) 3.797 -> 3.798 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.45% +0.45%] index_select skip64 : Elapsed 0.038 ms (3.798 ms / 100) 3.793 -> 3.793 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.50% +0.53%] index_select skip256 : Elapsed 0.038 ms (3.795 ms / 100) 3.801 -> 3.800 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.34% +0.37%] index_select spread : Elapsed 0.038 ms (3.802 ms / 100) 3.807 -> 3.813 ( +0.16%) [ +0.11% +0.13% +0.00% / +0.16% +0.55% +0.47%] index_select strided 3 : Elapsed 0.038 ms (3.811 ms / 100) 3.790 -> 3.801 ( +0.29%) [ +0.00% +0.08% +0.03% / +0.29% +0.42% +0.29%] index_select strided 5 : Elapsed 0.038 ms (3.790 ms / 100) 3.821 -> 3.819 ( -0.05%) [ +0.03% +0.08% +0.00% / -0.05% +0.47% +0.47%] index_select strided 7 : Elapsed 0.038 ms (3.822 ms / 100) 3.811 -> 3.811 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.37% +0.37%] index_select strided 8 : Elapsed 0.038 ms (3.812 ms / 100) 3.824 -> 3.825 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.37% +0.37%] index_select strided 16 : Elapsed 0.038 ms (3.825 ms / 100) 3.813 -> 3.813 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.24%] index_select random : Elapsed 0.038 ms (3.813 ms / 100) 3.794 -> 3.794 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.53% +0.47%] index_select random_sorted : Elapsed 0.038 ms (3.796 ms / 100) 3.797 -> 3.797 ( +0.00%) [ +0.11% +0.00% +0.08% / +0.00% +0.45% +0.45%] index_select perm : Elapsed 0.038 ms (3.801 ms / 100) 3.791 -> 3.797 ( +0.16%) [ +0.11% +0.11% +0.00% / +0.16% +0.47% +0.40%] index_select perm_sorted : Elapsed 0.038 ms (3.795 ms / 100) out_shape = [20, 4, 40, 16] in_shape = [20, 4, 40, 5] idx_dim = 3 B = [20, 4, 40, 16] (stride (2560, 640, 1, 40)) A = [20, 4, 40, 5] (stride (160, 40, 1, 3200)) dim = 3 1.998 -> 1.998 ( +0.00%) [ +0.25% +0.00% +0.45% / +0.15% +0.00% +0.05%] index_add_ linear : Elapsed 0.020 ms (2.003 ms / 100) 1.954 -> 1.956 ( +0.10%) [ +0.26% +0.05% +0.00% / +0.10% +0.20% +0.10%] index_copy_ linear : Elapsed 0.020 ms (1.959 ms / 100) 2.005 -> 1.996 ( -0.45%) [ +0.00% +0.10% +0.05% / -0.25% -0.35% -0.45%] index_add_ reverse : Elapsed 0.020 ms (2.005 ms / 100) 1.955 -> 1.954 ( -0.05%) [ +0.26% +0.10% +0.00% / -0.05% +0.31% +0.05%] index_copy_ reverse : Elapsed 0.020 ms (1.960 ms / 100) 1.999 -> 1.999 ( +0.00%) [ +0.15% +0.10% +0.00% / +0.00% +0.25% +0.15%] index_add_ spread : Elapsed 0.020 ms (2.002 ms / 100) 1.953 -> 1.958 ( +0.26%) [ +0.05% +0.10% +0.00% / +0.26% +0.51% +0.67%] index_copy_ spread : Elapsed 0.020 ms (1.954 ms / 100) 1.999 -> 2.001 ( +0.10%) [ +0.25% +0.00% +0.50% / +0.10% +0.10% +0.15%] index_add_ strided 3 : Elapsed 0.020 ms (2.004 ms / 100) 1.955 -> 1.959 ( +0.20%) [ +0.05% +0.00% +0.26% / +0.20% +0.56% +0.31%] index_copy_ strided 3 : Elapsed 0.020 ms (1.956 ms / 100) 2.001 -> 2.001 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.20% +0.00% +0.10%] index_add_ strided 5 : Elapsed 0.020 ms (2.004 ms / 100) 1.955 -> 1.956 ( +0.05%) [ +0.15% +0.15% +0.00% / +0.05% +0.46% +0.51%] index_copy_ strided 5 : Elapsed 0.020 ms (1.958 ms / 100) 1.998 -> 1.997 ( -0.05%) [ +0.25% +0.00% +0.00% / -0.05% +0.25% +0.00%] index_add_ strided 7 : Elapsed 0.020 ms (2.003 ms / 100) 1.957 -> 1.958 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.05% +0.56% +0.46%] index_copy_ strided 7 : Elapsed 0.020 ms (1.958 ms / 100) 1.998 -> 1.999 ( +0.05%) [ +0.25% +0.50% +0.00% / +0.25% +0.10% +0.05%] index_add_ perm : Elapsed 0.020 ms (2.003 ms / 100) 1.957 -> 1.963 ( +0.31%) [ +0.00% +0.05% +0.05% / +0.31% +0.61% +0.46%] index_copy_ perm : Elapsed 0.020 ms (1.957 ms / 100) 1.998 -> 1.999 ( +0.05%) [ +0.15% +0.00% +0.10% / +0.20% +0.05% +0.15%] index_add_ perm_sorted : Elapsed 0.020 ms (2.001 ms / 100) 1.959 -> 1.956 ( -0.15%) [ +0.05% +0.10% +0.00% / -0.15% +0.10% +0.46%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.960 ms / 100) 3.717 -> 3.714 ( -0.08%) [ +0.16% +0.03% +0.00% / -0.08% +0.73% +0.75%] index_select const : Elapsed 0.037 ms (3.723 ms / 100) 3.755 -> 3.757 ( +0.05%) [ +0.00% +0.05% +0.16% / +0.05% +0.91% +0.77%] index_select wrap : Elapsed 0.038 ms (3.755 ms / 100) 3.782 -> 3.779 ( -0.08%) [ +0.00% +0.08% +0.03% / -0.08% +0.50% +0.42%] index_select linear : Elapsed 0.038 ms (3.782 ms / 100) 3.767 -> 3.772 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.69% +0.80%] index_select reverse : Elapsed 0.038 ms (3.772 ms / 100) 3.693 -> 3.695 ( +0.05%) [ +0.16% +0.16% +0.00% / +0.05% +0.87% +0.84%] index_select skip64 : Elapsed 0.037 ms (3.699 ms / 100) 3.715 -> 3.721 ( +0.16%) [ +0.00% +0.11% +0.05% / +0.16% +0.73% +0.73%] index_select skip256 : Elapsed 0.037 ms (3.715 ms / 100) 3.744 -> 3.758 ( +0.37%) [ +0.00% +0.13% +0.08% / +0.37% +1.04% +1.10%] index_select spread : Elapsed 0.037 ms (3.744 ms / 100) 3.776 -> 3.777 ( +0.03%) [ +0.00% +0.05% +0.05% / +0.03% +0.72% +0.72%] index_select strided 3 : Elapsed 0.038 ms (3.776 ms / 100) 3.769 -> 3.766 ( -0.08%) [ +0.00% +0.11% +0.00% / -0.08% +0.53% +0.58%] index_select random : Elapsed 0.038 ms (3.769 ms / 100) 3.762 -> 3.760 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.66% +0.72%] index_select random_sorted : Elapsed 0.038 ms (3.762 ms / 100) B = [20, 4, 40, 16] (stride (2560, 16, 64, 1)) dim = 3 fill_cnt = 5 1.189 -> 1.186 ( -0.25%) [ +0.00% +0.08% +0.00% / -0.25% +0.00% +0.25%] index_fill_ const : Elapsed 0.012 ms (1.189 ms / 100) 1.187 -> 1.189 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.17% +0.17%] index_fill_ linear : Elapsed 0.012 ms (1.187 ms / 100) 1.187 -> 1.186 ( -0.08%) [ +0.00% +0.00% +0.25% / -0.08% +0.34% +0.25%] index_fill_ reverse : Elapsed 0.012 ms (1.187 ms / 100) 1.186 -> 1.187 ( +0.08%) [ +0.42% +0.00% +0.08% / +0.17% +0.08% +0.51%] index_fill_ skip64 : Elapsed 0.012 ms (1.191 ms / 100) 1.186 -> 1.188 ( +0.17%) [ +0.00% +0.00% +0.17% / +0.17% +0.34% +0.34%] index_fill_ skip256 : Elapsed 0.012 ms (1.186 ms / 100) 1.241 -> 1.243 ( +0.16%) [ +0.00% +0.16% +0.32% / +0.16% +0.40% +0.48%] index_fill_ spread : Elapsed 0.012 ms (1.241 ms / 100) 1.243 -> 1.241 ( -0.16%) [ +0.16% +0.00% +0.16% / -0.16% +0.56% +0.48%] index_fill_ strided 3 : Elapsed 0.012 ms (1.245 ms / 100) 1.243 -> 1.244 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.40% +0.24%] index_fill_ strided 5 : Elapsed 0.012 ms (1.244 ms / 100) 1.242 -> 1.243 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.32% +0.89%] index_fill_ strided 7 : Elapsed 0.012 ms (1.244 ms / 100) 1.244 -> 1.242 ( -0.16%) [ +0.00% +0.08% +0.24% / -0.16% +0.16% +0.64%] index_fill_ strided 8 : Elapsed 0.012 ms (1.244 ms / 100) 1.186 -> 1.185 ( -0.08%) [ +0.25% +0.08% +0.00% / -0.08% +0.25% +0.42%] index_fill_ random : Elapsed 0.012 ms (1.189 ms / 100) 1.186 -> 1.188 ( +0.17%) [ +0.25% +0.00% +0.00% / +0.17% +0.51% +0.51%] index_fill_ random_sorted : Elapsed 0.012 ms (1.189 ms / 100) 1.240 -> 1.241 ( +0.08%) [ +0.56% +0.32% +0.00% / +0.08% +0.40% +0.89%] index_fill_ perm : Elapsed 0.012 ms (1.247 ms / 100) 1.239 -> 1.241 ( +0.16%) [ +0.40% +0.48% +0.00% / +0.16% +0.40% +0.81%] index_fill_ perm_sorted : Elapsed 0.012 ms (1.244 ms / 100) B = [20, 4, 40, 16] (stride (640, 12800, 1, 40)) A = [20, 4, 40, 5] (stride (1, 20, 400, 80)) dim = 3 2.533 -> 2.534 ( +0.04%) [ +0.04% +0.51% +0.00% / +0.04% +0.43% +0.75%] index_add_ linear : Elapsed 0.025 ms (2.534 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.00% +0.20% +0.04% / +0.04% +0.49% +0.49%] index_copy_ linear : Elapsed 0.025 ms (2.458 ms / 100) 2.536 -> 2.540 ( +0.16%) [ +0.16% +0.12% +0.00% / +0.16% +0.55% +0.47%] index_add_ reverse : Elapsed 0.025 ms (2.540 ms / 100) 2.458 -> 2.461 ( +0.12%) [ +0.16% +0.00% +0.12% / +0.12% +0.49% +0.45%] index_copy_ reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.530 -> 2.531 ( +0.04%) [ +0.24% +0.08% +0.00% / +0.04% +0.59% +0.55%] index_add_ spread : Elapsed 0.025 ms (2.536 ms / 100) 2.453 -> 2.458 ( +0.20%) [ +0.16% +0.16% +0.00% / +0.20% +0.61% +0.61%] index_copy_ spread : Elapsed 0.025 ms (2.457 ms / 100) 2.524 -> 2.530 ( +0.24%) [ +0.28% +0.36% +0.00% / +0.24% +0.71% +0.87%] index_add_ strided 3 : Elapsed 0.025 ms (2.531 ms / 100) 2.450 -> 2.457 ( +0.29%) [ +0.16% +0.29% +0.00% / +0.29% +0.61% +0.78%] index_copy_ strided 3 : Elapsed 0.025 ms (2.454 ms / 100) 2.529 -> 2.529 ( +0.00%) [ +0.12% +0.24% +0.00% / +0.00% +0.63% +0.51%] index_add_ strided 5 : Elapsed 0.025 ms (2.532 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.08% +0.00% +0.12% / +0.12% +0.65% +0.49%] index_copy_ strided 5 : Elapsed 0.025 ms (2.453 ms / 100) 2.528 -> 2.529 ( +0.04%) [ +0.20% +0.00% +0.12% / +0.04% +0.59% +0.36%] index_add_ strided 7 : Elapsed 0.025 ms (2.533 ms / 100) 2.451 -> 2.451 ( +0.00%) [ +0.16% +0.00% +0.12% / +0.00% +0.53% +0.33%] index_copy_ strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.532 -> 2.532 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.39% +0.43%] index_add_ perm : Elapsed 0.025 ms (2.534 ms / 100) 2.455 -> 2.452 ( -0.12%) [ +0.20% +0.20% +0.00% / -0.12% +0.57% +0.37%] index_copy_ perm : Elapsed 0.025 ms (2.460 ms / 100) 2.532 -> 2.535 ( +0.12%) [ +0.16% +0.00% +0.16% / +0.12% +0.79% +0.67%] index_add_ perm_sorted : Elapsed 0.025 ms (2.536 ms / 100) 2.455 -> 2.463 ( +0.33%) [ +0.08% +0.00% +0.16% / +0.33% +0.61% +0.65%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.457 ms / 100) 5.572 -> 5.568 ( -0.07%) [ +0.00% +0.14% +0.09% / -0.07% +0.77% +0.81%] index_select const : Elapsed 0.056 ms (5.572 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.04% +0.13% +0.00% / +0.04% +0.56% +0.63%] index_select wrap : Elapsed 0.056 ms (5.564 ms / 100) 5.581 -> 5.583 ( +0.04%) [ +0.11% +0.22% +0.00% / +0.04% +0.66% +0.57%] index_select linear : Elapsed 0.056 ms (5.587 ms / 100) 5.556 -> 5.577 ( +0.38%) [ +0.40% +0.00% +0.05% / +0.38% +0.88% +0.59%] index_select reverse : Elapsed 0.056 ms (5.578 ms / 100) 5.554 -> 5.564 ( +0.18%) [ +0.27% +0.00% +0.29% / +0.18% +0.94% +1.04%] index_select skip64 : Elapsed 0.056 ms (5.569 ms / 100) 5.551 -> 5.574 ( +0.41%) [ +0.00% +0.34% +0.47% / +0.41% +0.67% +0.83%] index_select skip256 : Elapsed 0.056 ms (5.551 ms / 100) 5.555 -> 5.565 ( +0.18%) [ +0.11% +0.04% +0.00% / +0.18% +0.77% +0.56%] index_select spread : Elapsed 0.056 ms (5.561 ms / 100) 5.555 -> 5.563 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.67% +0.65%] index_select strided 3 : Elapsed 0.056 ms (5.559 ms / 100) 5.553 -> 5.555 ( +0.04%) [ +0.11% +0.07% +0.00% / +0.04% +0.77% +0.72%] index_select random : Elapsed 0.056 ms (5.559 ms / 100) 5.557 -> 5.558 ( +0.02%) [ +0.02% +0.00% +0.05% / +0.02% +0.61% +0.68%] index_select random_sorted : Elapsed 0.056 ms (5.558 ms / 100) B = [20, 4, 40, 16] (stride (1, 12800, 20, 800)) A = [20, 4, 40, 5] (stride (4, 1, 400, 80)) dim = 3 2.413 -> 2.416 ( +0.12%) [ +0.12% +0.17% +0.00% / +0.12% +0.83% +0.87%] index_add_ linear : Elapsed 0.024 ms (2.416 ms / 100) 2.347 -> 2.358 ( +0.47%) [ +0.30% +0.13% +0.00% / +0.47% +0.77% +0.72%] index_copy_ linear : Elapsed 0.024 ms (2.354 ms / 100) 2.413 -> 2.413 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.66% +0.91%] index_add_ reverse : Elapsed 0.024 ms (2.414 ms / 100) 2.345 -> 2.347 ( +0.09%) [ +0.26% +0.13% +0.00% / +0.09% +0.64% +0.64%] index_copy_ reverse : Elapsed 0.024 ms (2.351 ms / 100) 2.415 -> 2.420 ( +0.21%) [ +0.21% +0.04% +0.00% / +0.21% +0.70% +0.75%] index_add_ spread : Elapsed 0.024 ms (2.420 ms / 100) 2.349 -> 2.348 ( -0.04%) [ +0.13% +0.00% +0.30% / -0.04% +0.60% +0.64%] index_copy_ spread : Elapsed 0.024 ms (2.352 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.21% +0.12% +0.00% / +0.08% +0.62% +0.70%] index_add_ strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.342 -> 2.346 ( +0.17%) [ +0.47% +0.38% +0.00% / +0.17% +0.77% +0.90%] index_copy_ strided 3 : Elapsed 0.024 ms (2.353 ms / 100) 2.415 -> 2.418 ( +0.12%) [ +0.17% +0.04% +0.00% / +0.12% +0.91% +0.50%] index_add_ strided 5 : Elapsed 0.024 ms (2.419 ms / 100) 2.350 -> 2.350 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.38% +0.43%] index_copy_ strided 5 : Elapsed 0.024 ms (2.351 ms / 100) 2.421 -> 2.424 ( +0.12%) [ +0.08% +0.12% +0.00% / +0.12% +0.33% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.423 ms / 100) 2.353 -> 2.355 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.42% +0.34%] index_copy_ strided 7 : Elapsed 0.024 ms (2.354 ms / 100) 2.415 -> 2.415 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.83% +0.83%] index_add_ perm : Elapsed 0.024 ms (2.417 ms / 100) 2.346 -> 2.343 ( -0.13%) [ +0.00% +0.00% +0.04% / -0.13% +0.68% +0.55%] index_copy_ perm : Elapsed 0.023 ms (2.346 ms / 100) 2.414 -> 2.412 ( -0.08%) [ +0.21% +0.00% +0.00% / -0.08% +0.66% +0.70%] index_add_ perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) 2.347 -> 2.348 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.60% +0.64%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.347 ms / 100) 5.180 -> 5.184 ( +0.08%) [ +0.10% +0.10% +0.00% / +0.08% +0.69% +0.71%] index_select const : Elapsed 0.052 ms (5.185 ms / 100) 5.191 -> 5.189 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.64% +0.67%] index_select wrap : Elapsed 0.052 ms (5.191 ms / 100) 5.227 -> 5.230 ( +0.06%) [ +0.00% +0.11% +0.00% / +0.06% +0.73% +0.57%] index_select linear : Elapsed 0.052 ms (5.227 ms / 100) 5.209 -> 5.215 ( +0.12%) [ +0.13% +0.00% +0.12% / +0.12% +0.40% +0.56%] index_select reverse : Elapsed 0.052 ms (5.216 ms / 100) 5.180 -> 5.182 ( +0.04%) [ +0.12% +0.04% +0.00% / +0.04% +0.69% +0.64%] index_select skip64 : Elapsed 0.052 ms (5.186 ms / 100) 5.179 -> 5.180 ( +0.02%) [ +0.08% +0.00% +0.10% / +0.02% +0.68% +0.75%] index_select skip256 : Elapsed 0.052 ms (5.183 ms / 100) 5.169 -> 5.168 ( -0.02%) [ +0.00% +0.08% +0.00% / -0.02% +0.64% +0.62%] index_select spread : Elapsed 0.052 ms (5.169 ms / 100) 5.168 -> 5.173 ( +0.10%) [ +0.14% +0.08% +0.00% / +0.10% +0.79% +0.79%] index_select strided 3 : Elapsed 0.052 ms (5.175 ms / 100) 5.177 -> 5.173 ( -0.08%) [ +0.00% +0.17% +0.04% / -0.08% +0.70% +0.70%] index_select random : Elapsed 0.052 ms (5.177 ms / 100) 5.181 -> 5.181 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.83% +0.77%] index_select random_sorted : Elapsed 0.052 ms (5.181 ms / 100) B = [20, 4, 40, 16] (stride (40, 800, 1, 3200)) A = [20, 4, 40, 5] (stride (200, 4000, 5, 1)) dim = 3 2.344 -> 2.345 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.38% +0.47%] index_add_ linear : Elapsed 0.023 ms (2.344 ms / 100) 2.279 -> 2.281 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.48% +0.53%] index_copy_ linear : Elapsed 0.023 ms (2.281 ms / 100) 2.344 -> 2.349 ( +0.21%) [ +0.00% +0.30% +0.26% / +0.21% +0.26% +0.60%] index_add_ reverse : Elapsed 0.023 ms (2.344 ms / 100) 2.281 -> 2.281 ( +0.00%) [ +0.13% +0.00% +0.26% / +0.00% +0.18% +0.66%] index_copy_ reverse : Elapsed 0.023 ms (2.284 ms / 100) 2.342 -> 2.342 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.34% +0.43%] index_add_ spread : Elapsed 0.023 ms (2.344 ms / 100) 2.283 -> 2.282 ( -0.04%) [ +0.13% +0.09% +0.00% / -0.04% +0.35% +0.39%] index_copy_ spread : Elapsed 0.023 ms (2.286 ms / 100) 2.337 -> 2.343 ( +0.26%) [ +0.30% +0.00% +0.04% / +0.26% +0.73% +0.60%] index_add_ strided 3 : Elapsed 0.023 ms (2.344 ms / 100) 2.278 -> 2.282 ( +0.18%) [ +0.13% +0.09% +0.00% / +0.18% +0.61% +0.48%] index_copy_ strided 3 : Elapsed 0.023 ms (2.281 ms / 100) 2.347 -> 2.356 ( +0.38%) [ +0.26% +0.21% +0.00% / +0.43% +0.38% +0.43%] index_add_ strided 5 : Elapsed 0.024 ms (2.353 ms / 100) 2.284 -> 2.291 ( +0.31%) [ +0.26% +0.35% +0.00% / +0.31% +0.70% +0.66%] index_copy_ strided 5 : Elapsed 0.023 ms (2.290 ms / 100) 2.342 -> 2.347 ( +0.21%) [ +0.34% +0.47% +0.00% / +0.21% +0.64% +0.64%] index_add_ strided 7 : Elapsed 0.024 ms (2.350 ms / 100) 2.280 -> 2.282 ( +0.09%) [ +0.18% +0.26% +0.00% / +0.09% +0.66% +0.35%] index_copy_ strided 7 : Elapsed 0.023 ms (2.284 ms / 100) 2.340 -> 2.346 ( +0.26%) [ +0.00% +0.09% +0.00% / +0.30% +0.47% +0.26%] index_add_ perm : Elapsed 0.023 ms (2.340 ms / 100) 2.279 -> 2.281 ( +0.09%) [ +0.18% +0.04% +0.00% / +0.09% +0.31% +0.31%] index_copy_ perm : Elapsed 0.023 ms (2.283 ms / 100) 2.339 -> 2.343 ( +0.17%) [ +0.00% +0.13% +0.17% / +0.30% +0.17% +0.38%] index_add_ perm_sorted : Elapsed 0.023 ms (2.339 ms / 100) 2.282 -> 2.280 ( -0.09%) [ +0.00% +0.04% +0.04% / +0.04% -0.09% +0.18%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.282 ms / 100) 5.000 -> 5.003 ( +0.06%) [ +0.04% +0.00% +0.10% / +0.06% +0.74% +0.76%] index_select const : Elapsed 0.050 ms (5.002 ms / 100) 4.997 -> 5.007 ( +0.20%) [ +0.12% +0.10% +0.00% / +0.20% +0.74% +0.76%] index_select wrap : Elapsed 0.050 ms (5.003 ms / 100) 5.000 -> 4.996 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.70% +0.64%] index_select linear : Elapsed 0.050 ms (5.000 ms / 100) 4.996 -> 5.002 ( +0.12%) [ +0.00% +0.14% +0.00% / +0.12% +0.76% +0.70%] index_select reverse : Elapsed 0.050 ms (4.996 ms / 100) 5.002 -> 4.999 ( -0.06%) [ +0.00% +0.18% +0.04% / -0.06% +0.46% +0.54%] index_select skip64 : Elapsed 0.050 ms (5.002 ms / 100) 4.997 -> 5.006 ( +0.18%) [ +0.00% +0.34% +0.12% / +0.18% +0.92% +0.90%] index_select skip256 : Elapsed 0.050 ms (4.997 ms / 100) 4.993 -> 5.000 ( +0.14%) [ +0.26% +0.12% +0.00% / +0.14% +0.94% +0.94%] index_select spread : Elapsed 0.050 ms (5.006 ms / 100) 4.990 -> 4.996 ( +0.12%) [ +0.00% +0.32% +0.28% / +0.12% +0.84% +0.80%] index_select strided 3 : Elapsed 0.050 ms (4.990 ms / 100) 4.991 -> 4.992 ( +0.02%) [ +0.18% +0.00% +0.08% / +0.02% +1.06% +1.06%] index_select random : Elapsed 0.050 ms (5.000 ms / 100) 4.996 -> 4.996 ( +0.00%) [ +0.16% +0.14% +0.00% / +0.00% +0.96% +0.74%] index_select random_sorted : Elapsed 0.050 ms (5.004 ms / 100) B = [20, 4, 40, 16] (stride (1, 800, 20, 3200)) A = [20, 4, 40, 5] (stride (1, 4000, 20, 800)) dim = 3 2.318 -> 2.323 ( +0.22%) [ +0.09% +0.00% +0.13% / +0.22% +0.52% +0.52%] index_add_ linear : Elapsed 0.023 ms (2.320 ms / 100) 2.251 -> 2.259 ( +0.36%) [ +0.00% +0.22% +0.13% / +0.40% +0.36% +0.40%] index_copy_ linear : Elapsed 0.023 ms (2.251 ms / 100) 2.311 -> 2.307 ( -0.17%) [ +0.22% +0.09% +0.00% / -0.17% +0.39% +0.65%] index_add_ reverse : Elapsed 0.023 ms (2.316 ms / 100) 2.242 -> 2.245 ( +0.13%) [ +0.04% +0.13% +0.00% / +0.13% +0.54% +0.40%] index_copy_ reverse : Elapsed 0.022 ms (2.243 ms / 100) 2.332 -> 2.333 ( +0.04%) [ +0.09% +0.00% +0.00% / +0.04% +0.13% +0.34%] index_add_ spread : Elapsed 0.023 ms (2.334 ms / 100) 2.255 -> 2.257 ( +0.09%) [ +0.00% +0.13% +0.27% / +0.09% +0.49% +0.53%] index_copy_ spread : Elapsed 0.023 ms (2.255 ms / 100) 2.328 -> 2.327 ( -0.04%) [ +0.09% +0.09% +0.00% / -0.04% +0.60% +0.39%] index_add_ strided 3 : Elapsed 0.023 ms (2.330 ms / 100) 2.256 -> 2.257 ( +0.04%) [ +0.27% +0.00% +0.04% / +0.04% +0.66% +0.66%] index_copy_ strided 3 : Elapsed 0.023 ms (2.262 ms / 100) 2.311 -> 2.315 ( +0.17%) [ +0.17% +0.00% +0.04% / +0.17% +0.52% +0.43%] index_add_ strided 5 : Elapsed 0.023 ms (2.315 ms / 100) 2.246 -> 2.243 ( -0.13%) [ +0.00% +0.04% +0.00% / -0.13% +0.53% +0.53%] index_copy_ strided 5 : Elapsed 0.022 ms (2.246 ms / 100) 2.316 -> 2.316 ( +0.00%) [ +0.17% +0.09% +0.00% / +0.00% +0.52% +0.60%] index_add_ strided 7 : Elapsed 0.023 ms (2.320 ms / 100) 2.247 -> 2.252 ( +0.22%) [ +0.13% +0.04% +0.00% / +0.22% +0.45% +0.67%] index_copy_ strided 7 : Elapsed 0.023 ms (2.250 ms / 100) 2.328 -> 2.333 ( +0.21%) [ +0.09% +0.04% +0.00% / +0.21% +0.64% +0.90%] index_add_ perm : Elapsed 0.023 ms (2.330 ms / 100) 2.259 -> 2.259 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.58% +0.66%] index_copy_ perm : Elapsed 0.023 ms (2.259 ms / 100) 2.329 -> 2.330 ( +0.04%) [ +0.43% +0.13% +0.00% / +0.04% +0.43% +0.56%] index_add_ perm_sorted : Elapsed 0.023 ms (2.339 ms / 100) 2.253 -> 2.260 ( +0.31%) [ +0.18% +0.00% +0.04% / +0.31% +0.67% +0.67%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.257 ms / 100) 4.933 -> 4.943 ( +0.20%) [ +0.08% +0.00% +0.00% / +0.20% +0.71% +0.59%] index_select const : Elapsed 0.049 ms (4.937 ms / 100) 4.882 -> 4.894 ( +0.25%) [ +0.14% +0.16% +0.00% / +0.25% +0.76% +0.82%] index_select wrap : Elapsed 0.049 ms (4.889 ms / 100) 4.959 -> 4.952 ( -0.14%) [ +0.10% +0.00% +0.02% / -0.14% +0.69% +0.58%] index_select linear : Elapsed 0.050 ms (4.964 ms / 100) 4.903 -> 4.907 ( +0.08%) [ +0.61% +0.37% +0.00% / +0.08% +0.78% +1.18%] index_select reverse : Elapsed 0.049 ms (4.933 ms / 100) 4.885 -> 4.902 ( +0.35%) [ +0.10% +0.23% +0.00% / +0.35% +0.63% +0.72%] index_select skip64 : Elapsed 0.049 ms (4.890 ms / 100) 4.935 -> 4.938 ( +0.06%) [ +0.10% +0.18% +0.00% / +0.06% +0.63% +0.71%] index_select skip256 : Elapsed 0.049 ms (4.940 ms / 100) 4.924 -> 4.927 ( +0.06%) [ +0.04% +0.00% +0.02% / +0.06% +0.71% +0.79%] index_select spread : Elapsed 0.049 ms (4.926 ms / 100) 4.956 -> 4.963 ( +0.14%) [ +0.00% +0.02% +0.08% / +0.14% +0.56% +0.69%] index_select strided 3 : Elapsed 0.050 ms (4.956 ms / 100) 4.922 -> 4.924 ( +0.04%) [ +0.14% +0.06% +0.00% / +0.04% +0.59% +0.73%] index_select random : Elapsed 0.049 ms (4.929 ms / 100) 4.933 -> 4.935 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.59% +0.55%] index_select random_sorted : Elapsed 0.049 ms (4.933 ms / 100) B = [20, 4, 40, 16] (stride (1, 800, 20, 3200)) A = [20, 4, 40, 5] (stride (1, 20, 80, 3200)) dim = 3 2.393 -> 2.400 ( +0.29%) [ +0.29% +0.33% +0.00% / +0.29% +0.38% +0.29%] index_add_ linear : Elapsed 0.024 ms (2.400 ms / 100) 2.319 -> 2.326 ( +0.30%) [ +0.04% +0.00% +0.00% / +0.39% +0.30% +0.39%] index_copy_ linear : Elapsed 0.023 ms (2.320 ms / 100) 2.388 -> 2.390 ( +0.08%) [ +0.17% +0.25% +0.00% / +0.08% +0.34% +0.29%] index_add_ reverse : Elapsed 0.024 ms (2.392 ms / 100) 2.312 -> 2.312 ( +0.00%) [ +0.26% +0.35% +0.00% / +0.00% +0.30% +0.39%] index_copy_ reverse : Elapsed 0.023 ms (2.318 ms / 100) 2.395 -> 2.391 ( -0.17%) [ +0.13% +0.04% +0.00% / -0.17% +0.21% +0.04%] index_add_ spread : Elapsed 0.024 ms (2.398 ms / 100) 2.318 -> 2.317 ( -0.04%) [ +0.13% +0.00% +0.00% / -0.04% +0.22% +0.04%] index_copy_ spread : Elapsed 0.023 ms (2.321 ms / 100) 2.392 -> 2.393 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.21% +0.25%] index_add_ strided 3 : Elapsed 0.024 ms (2.392 ms / 100) 2.319 -> 2.318 ( -0.04%) [ +0.09% +0.04% +0.00% / -0.04% +0.13% +0.39%] index_copy_ strided 3 : Elapsed 0.023 ms (2.321 ms / 100) 2.392 -> 2.398 ( +0.25%) [ +0.29% +0.21% +0.00% / +0.25% +0.29% +0.71%] index_add_ strided 5 : Elapsed 0.024 ms (2.399 ms / 100) 2.316 -> 2.323 ( +0.30%) [ +0.35% +0.30% +0.00% / +0.30% +0.65% +1.30%] index_copy_ strided 5 : Elapsed 0.023 ms (2.324 ms / 100) 2.390 -> 2.391 ( +0.04%) [ +0.17% +0.00% +0.08% / +0.04% +0.33% +0.21%] index_add_ strided 7 : Elapsed 0.024 ms (2.394 ms / 100) 2.316 -> 2.320 ( +0.17%) [ +0.04% +0.17% +0.00% / +0.17% +0.35% +0.26%] index_copy_ strided 7 : Elapsed 0.023 ms (2.317 ms / 100) 2.392 -> 2.391 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.46% +0.67%] index_add_ perm : Elapsed 0.024 ms (2.392 ms / 100) 2.316 -> 2.318 ( +0.09%) [ +0.00% +0.09% +0.26% / +0.09% +0.35% +0.52%] index_copy_ perm : Elapsed 0.023 ms (2.316 ms / 100) 2.387 -> 2.388 ( +0.04%) [ +0.08% +0.21% +0.00% / +0.04% +0.42% +0.75%] index_add_ perm_sorted : Elapsed 0.024 ms (2.389 ms / 100) 2.311 -> 2.310 ( -0.04%) [ +0.17% +0.00% +0.09% / -0.04% +0.82% +0.69%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.315 ms / 100) 5.161 -> 5.166 ( +0.10%) [ +0.39% +0.12% +0.00% / +0.10% +0.56% +0.64%] index_select const : Elapsed 0.052 ms (5.181 ms / 100) 5.136 -> 5.138 ( +0.04%) [ +0.31% +0.27% +0.00% / +0.04% +0.80% +1.07%] index_select wrap : Elapsed 0.052 ms (5.152 ms / 100) 5.178 -> 5.192 ( +0.27%) [ +0.31% +0.19% +0.00% / +0.42% +0.44% +0.27%] index_select linear : Elapsed 0.052 ms (5.194 ms / 100) 5.165 -> 5.164 ( -0.02%) [ +0.41% +0.48% +0.00% / -0.02% +1.01% +0.97%] index_select reverse : Elapsed 0.052 ms (5.186 ms / 100) 5.171 -> 5.156 ( -0.29%) [ +0.15% +0.02% +0.00% / -0.29% +0.75% +0.89%] index_select skip64 : Elapsed 0.052 ms (5.179 ms / 100) 5.182 -> 5.175 ( -0.14%) [ +0.17% +0.27% +0.00% / -0.14% +0.19% -0.14%] index_select skip256 : Elapsed 0.052 ms (5.191 ms / 100) 5.158 -> 5.174 ( +0.31%) [ +0.08% +0.00% +0.39% / +0.31% +0.87% +0.76%] index_select spread : Elapsed 0.052 ms (5.162 ms / 100) 5.137 -> 5.146 ( +0.18%) [ +0.45% +0.00% +0.49% / +0.18% +0.78% +0.58%] index_select strided 3 : Elapsed 0.052 ms (5.160 ms / 100) 5.154 -> 5.156 ( +0.04%) [ +0.00% +0.29% +0.23% / +0.04% +0.68% +0.91%] index_select random : Elapsed 0.052 ms (5.154 ms / 100) 5.155 -> 5.152 ( -0.06%) [ +0.14% +0.00% +0.16% / -0.06% +0.47% +0.80%] index_select random_sorted : Elapsed 0.052 ms (5.162 ms / 100) out_shape = [16, 5, 4, 40] in_shape = [20, 5, 4, 40] idx_dim = 0 B = [16, 5, 4, 40] (stride (800, 40, 200, 1)) A = [20, 5, 4, 40] (stride (4, 80, 1, 400)) dim = 0 4.252 -> 4.259 ( +0.16%) [ +0.00% +0.12% +0.19% / +0.16% +0.78% +0.80%] index_select const : Elapsed 0.043 ms (4.252 ms / 100) 4.267 -> 4.267 ( +0.00%) [ +0.07% +0.05% +0.00% / +0.00% +0.82% +0.80%] index_select wrap : Elapsed 0.043 ms (4.270 ms / 100) 4.269 -> 4.270 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.63% +0.66%] index_select linear : Elapsed 0.043 ms (4.271 ms / 100) 4.278 -> 4.280 ( +0.05%) [ +0.00% +0.05% +0.07% / +0.05% +0.86% +0.82%] index_select reverse : Elapsed 0.043 ms (4.278 ms / 100) 4.267 -> 4.266 ( -0.02%) [ +0.00% +0.00% +0.09% / -0.02% +0.73% +0.56%] index_select skip64 : Elapsed 0.043 ms (4.267 ms / 100) 4.267 -> 4.270 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.75% +0.56%] index_select skip256 : Elapsed 0.043 ms (4.270 ms / 100) 4.269 -> 4.275 ( +0.14%) [ +0.00% +0.12% +0.07% / +0.14% +0.73% +0.73%] index_select spread : Elapsed 0.043 ms (4.269 ms / 100) 4.277 -> 4.279 ( +0.05%) [ +0.07% +0.09% +0.00% / +0.05% +0.82% +0.68%] index_select strided 3 : Elapsed 0.043 ms (4.280 ms / 100) 4.277 -> 4.273 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.51% +0.63%] index_select strided 5 : Elapsed 0.043 ms (4.277 ms / 100) 4.268 -> 4.271 ( +0.07%) [ +0.14% +0.00% +0.12% / +0.07% +0.59% +0.56%] index_select strided 7 : Elapsed 0.043 ms (4.274 ms / 100) 4.275 -> 4.272 ( -0.07%) [ +0.00% +0.00% +0.02% / -0.07% +0.56% +0.56%] index_select strided 8 : Elapsed 0.043 ms (4.275 ms / 100) 4.292 -> 4.291 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.58% +0.61%] index_select strided 16 : Elapsed 0.043 ms (4.292 ms / 100) 4.275 -> 4.281 ( +0.14%) [ +0.14% +0.00% +0.16% / +0.14% +0.56% +0.80%] index_select random : Elapsed 0.043 ms (4.281 ms / 100) 4.280 -> 4.281 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.49% +0.49%] index_select random_sorted : Elapsed 0.043 ms (4.280 ms / 100) 4.290 -> 4.293 ( +0.07%) [ +0.16% +0.00% +0.07% / +0.07% +0.42% +0.56%] index_select perm : Elapsed 0.043 ms (4.297 ms / 100) 4.270 -> 4.271 ( +0.02%) [ +0.07% +0.07% +0.00% / +0.02% +0.61% +0.54%] index_select perm_sorted : Elapsed 0.043 ms (4.273 ms / 100) B = [16, 5, 4, 40] (stride (800, 1, 200, 5)) A = [20, 5, 4, 40] (stride (800, 160, 40, 1)) dim = 0 2.888 -> 2.890 ( +0.07%) [ +0.00% +0.00% +0.03% / +0.07% +0.38% +0.45%] index_select const : Elapsed 0.029 ms (2.888 ms / 100) 2.883 -> 2.884 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.52% +0.52%] index_select wrap : Elapsed 0.029 ms (2.884 ms / 100) 2.897 -> 2.898 ( +0.03%) [ +0.03% +0.10% +0.00% / +0.03% +0.48% +0.66%] index_select linear : Elapsed 0.029 ms (2.898 ms / 100) 2.888 -> 2.891 ( +0.10%) [ +0.07% +0.00% +0.07% / +0.10% +0.59% +0.59%] index_select reverse : Elapsed 0.029 ms (2.890 ms / 100) 2.908 -> 2.909 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.41% +0.38%] index_select skip64 : Elapsed 0.029 ms (2.909 ms / 100) 2.896 -> 2.898 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.48% +0.41%] index_select skip256 : Elapsed 0.029 ms (2.896 ms / 100) 2.896 -> 2.897 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.48% +0.52%] index_select spread : Elapsed 0.029 ms (2.897 ms / 100) 2.901 -> 2.903 ( +0.07%) [ +0.10% +0.07% +0.00% / +0.07% +0.55% +0.59%] index_select strided 3 : Elapsed 0.029 ms (2.904 ms / 100) 2.893 -> 2.894 ( +0.03%) [ +0.07% +0.03% +0.00% / +0.03% +0.35% +0.35%] index_select strided 5 : Elapsed 0.029 ms (2.895 ms / 100) 2.893 -> 2.893 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.62% +0.59%] index_select strided 7 : Elapsed 0.029 ms (2.895 ms / 100) 2.886 -> 2.883 ( -0.10%) [ +0.00% +0.14% +0.03% / -0.10% +0.80% +0.83%] index_select strided 8 : Elapsed 0.029 ms (2.886 ms / 100) 2.889 -> 2.890 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.66% +0.66%] index_select strided 16 : Elapsed 0.029 ms (2.890 ms / 100) 2.897 -> 2.898 ( +0.03%) [ +0.07% +0.07% +0.00% / +0.03% +0.76% +0.79%] index_select random : Elapsed 0.029 ms (2.899 ms / 100) 2.896 -> 2.896 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.86% +0.90%] index_select random_sorted : Elapsed 0.029 ms (2.897 ms / 100) 2.893 -> 2.893 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.80% +0.76%] index_select perm : Elapsed 0.029 ms (2.894 ms / 100) 2.898 -> 2.898 ( +0.00%) [ +0.00% +0.07% +0.03% / +0.00% +0.69% +0.66%] index_select perm_sorted : Elapsed 0.029 ms (2.898 ms / 100) B = [16, 5, 4, 40] (stride (160, 2560, 40, 1)) A = [20, 5, 4, 40] (stride (20, 4, 1, 400)) dim = 0 1.385 -> 1.386 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.94% +0.87%] index_select const : Elapsed 0.014 ms (1.387 ms / 100) 1.406 -> 1.409 ( +0.21%) [ +0.00% +0.21% +0.00% / +0.21% +1.14% +1.14%] index_select wrap : Elapsed 0.014 ms (1.406 ms / 100) 1.409 -> 1.407 ( -0.14%) [ +0.21% +0.00% +0.07% / -0.14% +0.78% +0.99%] index_select linear : Elapsed 0.014 ms (1.412 ms / 100) 1.397 -> 1.397 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.72% +0.86%] index_select reverse : Elapsed 0.014 ms (1.398 ms / 100) 1.388 -> 1.390 ( +0.14%) [ +0.00% +0.14% +0.14% / +0.14% +0.50% +0.65%] index_select skip64 : Elapsed 0.014 ms (1.388 ms / 100) 1.391 -> 1.392 ( +0.07%) [ +0.07% +0.29% +0.00% / +0.07% +0.65% +0.58%] index_select skip256 : Elapsed 0.014 ms (1.392 ms / 100) 1.395 -> 1.399 ( +0.29%) [ +0.36% +0.29% +0.00% / +0.29% +0.79% +1.00%] index_select spread : Elapsed 0.014 ms (1.400 ms / 100) 1.388 -> 1.392 ( +0.29%) [ +0.14% +0.14% +0.00% / +0.29% +0.79% +0.86%] index_select strided 3 : Elapsed 0.014 ms (1.390 ms / 100) 1.393 -> 1.391 ( -0.14%) [ +0.07% +0.07% +0.00% / -0.14% +0.72% +0.86%] index_select strided 5 : Elapsed 0.014 ms (1.394 ms / 100) 1.389 -> 1.390 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.58% +0.58%] index_select strided 7 : Elapsed 0.014 ms (1.389 ms / 100) 1.399 -> 1.401 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.79% +0.79%] index_select strided 8 : Elapsed 0.014 ms (1.401 ms / 100) 1.410 -> 1.410 ( +0.00%) [ +0.28% +0.00% +0.35% / +0.00% +0.78% +0.78%] index_select strided 16 : Elapsed 0.014 ms (1.414 ms / 100) 1.400 -> 1.405 ( +0.36%) [ +0.36% +0.36% +0.00% / +0.43% +0.36% +0.50%] index_select random : Elapsed 0.014 ms (1.405 ms / 100) 1.409 -> 1.410 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.14% +0.07% +0.43%] index_select random_sorted : Elapsed 0.014 ms (1.410 ms / 100) 1.399 -> 1.391 ( -0.57%) [ +0.07% +0.07% +0.00% / -0.57% -0.14% +0.14%] index_select perm : Elapsed 0.014 ms (1.400 ms / 100) 1.398 -> 1.399 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.21% +0.07%] index_select perm_sorted : Elapsed 0.014 ms (1.399 ms / 100) B = [16, 5, 4, 40] (stride (40, 2560, 640, 1)) A = [20, 5, 4, 40] (stride (5, 1, 100, 400)) dim = 0 3.924 -> 3.924 ( +0.00%) [ +0.05% +0.13% +0.00% / +0.00% +0.59% +0.59%] index_select const : Elapsed 0.039 ms (3.926 ms / 100) 3.932 -> 3.937 ( +0.13%) [ +0.00% +0.00% +0.13% / +0.13% +0.61% +0.53%] index_select wrap : Elapsed 0.039 ms (3.932 ms / 100) 3.932 -> 3.931 ( -0.03%) [ +0.13% +0.00% +0.03% / -0.03% +0.46% +0.58%] index_select linear : Elapsed 0.039 ms (3.937 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.03% +0.13% +0.00% / +0.00% +0.71% +0.58%] index_select reverse : Elapsed 0.039 ms (3.933 ms / 100) 3.926 -> 3.929 ( +0.08%) [ +0.10% +0.00% +0.10% / +0.08% +0.53% +0.51%] index_select skip64 : Elapsed 0.039 ms (3.930 ms / 100) 3.928 -> 3.931 ( +0.08%) [ +0.18% +0.18% +0.00% / +0.08% +0.66% +0.48%] index_select skip256 : Elapsed 0.039 ms (3.935 ms / 100) 3.922 -> 3.923 ( +0.03%) [ +0.10% +0.10% +0.00% / +0.03% +0.51% +0.61%] index_select spread : Elapsed 0.039 ms (3.926 ms / 100) 3.924 -> 3.929 ( +0.13%) [ +0.20% +0.00% +0.13% / +0.13% +0.36% +0.46%] index_select strided 3 : Elapsed 0.039 ms (3.932 ms / 100) 3.925 -> 3.926 ( +0.03%) [ +0.00% +0.00% +0.13% / +0.03% +0.43% +0.46%] index_select strided 5 : Elapsed 0.039 ms (3.925 ms / 100) 3.915 -> 3.918 ( +0.08%) [ +0.10% +0.00% +0.23% / +0.08% +0.36% +0.49%] index_select strided 7 : Elapsed 0.039 ms (3.919 ms / 100) 3.928 -> 3.922 ( -0.15%) [ +0.00% +0.05% +0.05% / -0.15% +0.69% +0.43%] index_select strided 8 : Elapsed 0.039 ms (3.928 ms / 100) 3.928 -> 3.927 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.76% +0.64%] index_select strided 16 : Elapsed 0.039 ms (3.929 ms / 100) 3.923 -> 3.918 ( -0.13%) [ +0.00% +0.03% +0.05% / -0.13% +0.61% +0.51%] index_select random : Elapsed 0.039 ms (3.923 ms / 100) 3.921 -> 3.921 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.69% +0.66%] index_select random_sorted : Elapsed 0.039 ms (3.921 ms / 100) 3.924 -> 3.924 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.71% +0.69%] index_select perm : Elapsed 0.039 ms (3.925 ms / 100) 3.919 -> 3.920 ( +0.03%) [ +0.10% +0.00% +0.10% / +0.03% +0.61% +0.66%] index_select perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) B = [16, 5, 4, 40] (stride (1, 640, 3200, 16)) A = [20, 5, 4, 40] (stride (1, 800, 4000, 20)) dim = 0 4.124 -> 4.126 ( +0.05%) [ +0.10% +0.00% +0.07% / +0.05% +0.75% +0.68%] index_select const : Elapsed 0.041 ms (4.128 ms / 100) 4.142 -> 4.144 ( +0.05%) [ +0.12% +0.00% +0.02% / +0.05% +0.80% +0.75%] index_select wrap : Elapsed 0.041 ms (4.147 ms / 100) 4.123 -> 4.126 ( +0.07%) [ +0.00% +0.02% +0.10% / +0.07% +1.04% +0.87%] index_select linear : Elapsed 0.041 ms (4.123 ms / 100) 4.119 -> 4.122 ( +0.07%) [ +0.12% +0.00% +0.07% / +0.07% +1.00% +0.95%] index_select reverse : Elapsed 0.041 ms (4.124 ms / 100) 4.125 -> 4.124 ( -0.02%) [ +0.05% +0.00% +0.07% / -0.02% +0.87% +0.90%] index_select skip64 : Elapsed 0.041 ms (4.127 ms / 100) 4.126 -> 4.125 ( -0.02%) [ +0.10% +0.00% +0.00% / -0.02% +0.75% +0.87%] index_select skip256 : Elapsed 0.041 ms (4.130 ms / 100) 4.145 -> 4.146 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.68% +0.63%] index_select spread : Elapsed 0.041 ms (4.147 ms / 100) 4.126 -> 4.125 ( -0.02%) [ +0.00% +0.07% +0.00% / -0.02% +0.58% +0.51%] index_select strided 3 : Elapsed 0.041 ms (4.126 ms / 100) 4.124 -> 4.128 ( +0.10%) [ +0.07% +0.12% +0.00% / +0.10% +0.78% +0.61%] index_select strided 5 : Elapsed 0.041 ms (4.127 ms / 100) 4.128 -> 4.125 ( -0.07%) [ +0.02% +0.05% +0.00% / -0.07% +0.61% +0.63%] index_select strided 7 : Elapsed 0.041 ms (4.129 ms / 100) 4.125 -> 4.126 ( +0.02%) [ +0.00% +0.12% +0.00% / +0.02% +0.58% +0.58%] index_select strided 8 : Elapsed 0.041 ms (4.125 ms / 100) 4.145 -> 4.146 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.58% +0.55%] index_select strided 16 : Elapsed 0.041 ms (4.147 ms / 100) 4.127 -> 4.124 ( -0.07%) [ +0.00% +0.10% +0.07% / -0.07% +0.58% +0.61%] index_select random : Elapsed 0.041 ms (4.127 ms / 100) 4.131 -> 4.130 ( -0.02%) [ +0.05% +0.00% +0.00% / -0.02% +0.61% +0.58%] index_select random_sorted : Elapsed 0.041 ms (4.133 ms / 100) 4.149 -> 4.151 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.51% +0.48%] index_select perm : Elapsed 0.041 ms (4.149 ms / 100) 4.128 -> 4.132 ( +0.10%) [ +0.00% +0.02% +0.02% / +0.10% +0.53% +0.46%] index_select perm_sorted : Elapsed 0.041 ms (4.128 ms / 100) out_shape = [20, 16, 4, 40] in_shape = [20, 5, 4, 40] idx_dim = 1 B = [20, 16, 4, 40] (stride (2560, 160, 40, 1)) A = [20, 5, 4, 40] (stride (1, 800, 4000, 20)) dim = 1 2.314 -> 2.318 ( +0.17%) [ +0.00% +0.04% +0.13% / +0.17% +0.48% +0.61%] index_add_ linear : Elapsed 0.023 ms (2.314 ms / 100) 2.244 -> 2.247 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.49% +0.36%] index_copy_ linear : Elapsed 0.022 ms (2.247 ms / 100) 2.303 -> 2.308 ( +0.22%) [ +0.04% +0.13% +0.00% / +0.22% +0.61% +0.56%] index_add_ reverse : Elapsed 0.023 ms (2.304 ms / 100) 2.236 -> 2.240 ( +0.18%) [ +0.04% +0.18% +0.00% / +0.18% +0.31% +0.54%] index_copy_ reverse : Elapsed 0.022 ms (2.237 ms / 100) 2.327 -> 2.323 ( -0.17%) [ +0.04% +0.04% +0.00% / -0.17% +0.30% +0.26%] index_add_ spread : Elapsed 0.023 ms (2.328 ms / 100) 2.252 -> 2.250 ( -0.09%) [ +0.00% +0.09% +0.04% / -0.09% +0.49% +0.49%] index_copy_ spread : Elapsed 0.023 ms (2.252 ms / 100) 2.321 -> 2.319 ( -0.09%) [ +0.00% +0.22% +0.04% / -0.09% +0.69% +0.65%] index_add_ strided 3 : Elapsed 0.023 ms (2.321 ms / 100) 2.249 -> 2.251 ( +0.09%) [ +0.13% +0.00% +0.00% / +0.09% +0.93% +0.76%] index_copy_ strided 3 : Elapsed 0.023 ms (2.252 ms / 100) 2.308 -> 2.310 ( +0.09%) [ +0.17% +0.09% +0.00% / +0.09% +0.43% +0.48%] index_add_ strided 5 : Elapsed 0.023 ms (2.312 ms / 100) 2.238 -> 2.240 ( +0.09%) [ +0.00% +0.04% +0.09% / +0.09% +0.49% +0.45%] index_copy_ strided 5 : Elapsed 0.022 ms (2.238 ms / 100) 2.316 -> 2.313 ( -0.13%) [ +0.13% +0.09% +0.00% / -0.13% +0.73% +0.69%] index_add_ strided 7 : Elapsed 0.023 ms (2.319 ms / 100) 2.240 -> 2.245 ( +0.22%) [ +0.00% +0.18% +0.27% / +0.22% +0.85% +0.94%] index_copy_ strided 7 : Elapsed 0.022 ms (2.240 ms / 100) 2.321 -> 2.322 ( +0.04%) [ +0.09% +0.00% +0.00% / +0.04% +0.69% +0.73%] index_add_ perm : Elapsed 0.023 ms (2.323 ms / 100) 2.248 -> 2.253 ( +0.22%) [ +0.00% +0.04% +0.00% / +0.22% +0.58% +0.62%] index_copy_ perm : Elapsed 0.022 ms (2.248 ms / 100) 2.326 -> 2.329 ( +0.13%) [ +0.00% +0.00% +0.09% / +0.13% +0.30% +0.13%] index_add_ perm_sorted : Elapsed 0.023 ms (2.326 ms / 100) 2.249 -> 2.255 ( +0.27%) [ +0.36% +0.13% +0.00% / +0.27% +0.67% +0.62%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.257 ms / 100) 4.929 -> 4.936 ( +0.14%) [ +0.02% +0.00% +0.16% / +0.14% +0.69% +0.71%] index_select const : Elapsed 0.049 ms (4.930 ms / 100) 4.866 -> 4.879 ( +0.27%) [ +0.33% +0.43% +0.00% / +0.27% +0.92% +0.99%] index_select wrap : Elapsed 0.049 ms (4.882 ms / 100) 4.940 -> 4.938 ( -0.04%) [ +0.02% +0.00% +0.02% / -0.04% +0.63% +0.77%] index_select linear : Elapsed 0.049 ms (4.941 ms / 100) 4.929 -> 4.928 ( -0.02%) [ +0.10% +0.00% +0.02% / -0.02% +0.20% +0.57%] index_select reverse : Elapsed 0.049 ms (4.934 ms / 100) 4.878 -> 4.899 ( +0.43%) [ +0.33% +0.00% +0.45% / +0.43% +0.84% +1.09%] index_select skip64 : Elapsed 0.049 ms (4.894 ms / 100) 4.929 -> 4.929 ( +0.00%) [ +0.12% +0.02% +0.00% / +0.00% +0.65% +0.59%] index_select skip256 : Elapsed 0.049 ms (4.935 ms / 100) 4.921 -> 4.926 ( +0.10%) [ +0.00% +0.20% +0.10% / +0.10% +0.63% +0.71%] index_select spread : Elapsed 0.049 ms (4.921 ms / 100) 4.933 -> 4.934 ( +0.02%) [ +0.00% +0.28% +0.12% / +0.02% +0.81% +0.83%] index_select strided 3 : Elapsed 0.049 ms (4.933 ms / 100) 4.914 -> 4.922 ( +0.16%) [ +0.02% +0.20% +0.00% / +0.16% +0.79% +0.63%] index_select random : Elapsed 0.049 ms (4.915 ms / 100) 4.922 -> 4.922 ( +0.00%) [ +0.06% +0.00% +0.08% / +0.00% +0.43% +0.55%] index_select random_sorted : Elapsed 0.049 ms (4.925 ms / 100) B = [20, 16, 4, 40] (stride (2560, 1, 16, 64)) A = [20, 5, 4, 40] (stride (1, 20, 100, 400)) dim = 1 2.554 -> 2.551 ( -0.12%) [ +0.08% +0.00% +0.04% / -0.12% +0.67% +0.67%] index_add_ linear : Elapsed 0.026 ms (2.556 ms / 100) 2.509 -> 2.509 ( +0.00%) [ +0.12% +0.16% +0.00% / +0.00% +0.52% +0.52%] index_copy_ linear : Elapsed 0.025 ms (2.512 ms / 100) 2.556 -> 2.558 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.35% +0.63%] index_add_ reverse : Elapsed 0.026 ms (2.556 ms / 100) 2.511 -> 2.511 ( +0.00%) [ +0.00% +0.00% +0.20% / +0.00% +0.32% +0.36%] index_copy_ reverse : Elapsed 0.025 ms (2.511 ms / 100) 2.587 -> 2.592 ( +0.19%) [ +0.39% +0.27% +0.00% / +0.19% +0.66% +0.46%] index_add_ spread : Elapsed 0.026 ms (2.597 ms / 100) 2.575 -> 2.580 ( +0.19%) [ +0.50% +0.35% +0.00% / +0.19% +0.50% +0.43%] index_copy_ spread : Elapsed 0.026 ms (2.588 ms / 100) 2.584 -> 2.587 ( +0.12%) [ +0.31% +0.27% +0.00% / +0.12% +0.62% +0.62%] index_add_ strided 3 : Elapsed 0.026 ms (2.592 ms / 100) 2.579 -> 2.580 ( +0.04%) [ +0.19% +0.04% +0.00% / +0.04% +0.43% +0.35%] index_copy_ strided 3 : Elapsed 0.026 ms (2.584 ms / 100) 2.585 -> 2.583 ( -0.08%) [ +0.39% +0.08% +0.00% / -0.08% +0.50% +0.35%] index_add_ strided 5 : Elapsed 0.026 ms (2.595 ms / 100) 2.575 -> 2.577 ( +0.08%) [ +0.31% +0.16% +0.00% / +0.08% +0.43% +0.27%] index_copy_ strided 5 : Elapsed 0.026 ms (2.583 ms / 100) 2.584 -> 2.587 ( +0.12%) [ +0.00% +0.00% +0.08% / +0.12% +0.46% +0.39%] index_add_ strided 7 : Elapsed 0.026 ms (2.584 ms / 100) 2.574 -> 2.579 ( +0.19%) [ +0.00% +0.23% +0.27% / +0.19% +0.43% +0.35%] index_copy_ strided 7 : Elapsed 0.026 ms (2.574 ms / 100) 2.586 -> 2.589 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.27% +0.43%] index_add_ perm : Elapsed 0.026 ms (2.587 ms / 100) 2.579 -> 2.583 ( +0.16%) [ +0.19% +0.00% +0.08% / +0.23% +0.16% +0.23%] index_copy_ perm : Elapsed 0.026 ms (2.584 ms / 100) 2.585 -> 2.593 ( +0.31%) [ +0.23% +0.08% +0.00% / +0.31% +0.35% +0.66%] index_add_ perm_sorted : Elapsed 0.026 ms (2.591 ms / 100) 2.579 -> 2.583 ( +0.16%) [ +0.04% +0.00% +0.00% / +0.16% +0.19% +0.27%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.580 ms / 100) 5.672 -> 5.674 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.26% +0.53%] index_select const : Elapsed 0.057 ms (5.672 ms / 100) 5.627 -> 5.640 ( +0.23%) [ +0.00% +0.16% +0.23% / +0.23% +0.41% +0.52%] index_select wrap : Elapsed 0.056 ms (5.627 ms / 100) 5.663 -> 5.666 ( +0.05%) [ +0.30% +0.00% +0.19% / +0.05% +0.51% +0.64%] index_select linear : Elapsed 0.057 ms (5.680 ms / 100) 5.665 -> 5.636 ( -0.51%) [ +0.00% +0.07% +0.16% / -0.51% +0.51% +0.28%] index_select reverse : Elapsed 0.057 ms (5.665 ms / 100) 5.637 -> 5.654 ( +0.30%) [ +0.34% +0.28% +0.00% / +0.30% +0.48% +0.64%] index_select skip64 : Elapsed 0.057 ms (5.656 ms / 100) 5.653 -> 5.625 ( -0.50%) [ +0.27% +0.00% +0.25% / -0.50% +0.74% +0.78%] index_select skip256 : Elapsed 0.057 ms (5.668 ms / 100) 5.643 -> 5.650 ( +0.12%) [ +0.19% +0.19% +0.00% / +0.12% +0.71% +0.60%] index_select spread : Elapsed 0.057 ms (5.654 ms / 100) 5.621 -> 5.622 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.60% +0.59%] index_select strided 3 : Elapsed 0.056 ms (5.621 ms / 100) 5.632 -> 5.629 ( -0.05%) [ +0.00% +0.11% +0.00% / -0.05% +0.80% +0.73%] index_select random : Elapsed 0.056 ms (5.632 ms / 100) 5.635 -> 5.639 ( +0.07%) [ +0.27% +0.20% +0.00% / +0.07% +0.78% +0.80%] index_select random_sorted : Elapsed 0.057 ms (5.650 ms / 100) B = [20, 16, 4, 40] (stride (1, 800, 12800, 20)) A = [20, 5, 4, 40] (stride (800, 160, 1, 4)) dim = 1 2.300 -> 2.300 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +1.74% +1.78%] index_add_ linear : Elapsed 0.023 ms (2.305 ms / 100) 2.256 -> 2.254 ( -0.09%) [ +0.00% +0.09% +0.00% / -0.09% +1.46% +1.24%] index_copy_ linear : Elapsed 0.023 ms (2.256 ms / 100) 2.297 -> 2.296 ( -0.04%) [ +0.00% +0.13% +0.22% / -0.04% +1.74% +1.87%] index_add_ reverse : Elapsed 0.023 ms (2.297 ms / 100) 2.244 -> 2.252 ( +0.36%) [ +0.31% +0.00% +0.22% / +0.36% +1.56% +1.56%] index_copy_ reverse : Elapsed 0.023 ms (2.251 ms / 100) 2.309 -> 2.306 ( -0.13%) [ +0.22% +0.00% +0.04% / -0.13% +1.43% +1.39%] index_add_ spread : Elapsed 0.023 ms (2.314 ms / 100) 2.255 -> 2.258 ( +0.13%) [ +0.00% +0.22% +0.31% / +0.13% +1.29% +1.55%] index_copy_ spread : Elapsed 0.023 ms (2.255 ms / 100) 2.305 -> 2.305 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +1.48% +1.34%] index_add_ strided 3 : Elapsed 0.023 ms (2.305 ms / 100) 2.254 -> 2.256 ( +0.09%) [ +0.27% +0.09% +0.00% / +0.09% +1.60% +1.38%] index_copy_ strided 3 : Elapsed 0.023 ms (2.260 ms / 100) 2.300 -> 2.300 ( +0.00%) [ +0.22% +0.04% +0.00% / +0.00% +1.61% +1.61%] index_add_ strided 5 : Elapsed 0.023 ms (2.305 ms / 100) 2.252 -> 2.255 ( +0.13%) [ +0.00% +0.04% +0.00% / +0.13% +1.42% +1.42%] index_copy_ strided 5 : Elapsed 0.023 ms (2.252 ms / 100) 2.303 -> 2.307 ( +0.17%) [ +0.13% +0.04% +0.00% / +0.17% +1.69% +1.48%] index_add_ strided 7 : Elapsed 0.023 ms (2.306 ms / 100) 2.256 -> 2.258 ( +0.09%) [ +0.04% +0.04% +0.00% / +0.09% +1.11% +1.29%] index_copy_ strided 7 : Elapsed 0.023 ms (2.257 ms / 100) 2.302 -> 2.304 ( +0.09%) [ +0.22% +0.26% +0.00% / +0.09% +1.30% +1.17%] index_add_ perm : Elapsed 0.023 ms (2.307 ms / 100) 2.253 -> 2.258 ( +0.22%) [ +0.00% +0.22% +0.09% / +0.22% +1.24% +1.38%] index_copy_ perm : Elapsed 0.023 ms (2.253 ms / 100) 2.310 -> 2.313 ( +0.13%) [ +0.04% +0.00% +0.04% / +0.13% +1.39% +1.43%] index_add_ perm_sorted : Elapsed 0.023 ms (2.311 ms / 100) 2.258 -> 2.257 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +1.28% +1.28%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.259 ms / 100) 4.891 -> 4.903 ( +0.25%) [ +0.00% +0.18% +0.04% / +0.25% +0.76% +0.76%] index_select const : Elapsed 0.049 ms (4.891 ms / 100) 4.905 -> 4.916 ( +0.22%) [ +0.00% +0.18% +0.27% / +0.22% +1.39% +1.37%] index_select wrap : Elapsed 0.049 ms (4.905 ms / 100) 4.920 -> 4.924 ( +0.08%) [ +0.10% +0.00% +0.02% / +0.08% +1.18% +1.22%] index_select linear : Elapsed 0.049 ms (4.925 ms / 100) 4.912 -> 4.918 ( +0.12%) [ +0.08% +0.12% +0.00% / +0.12% +0.67% +0.81%] index_select reverse : Elapsed 0.049 ms (4.916 ms / 100) 4.876 -> 4.891 ( +0.31%) [ +0.33% +0.21% +0.00% / +0.31% +0.94% +0.94%] index_select skip64 : Elapsed 0.049 ms (4.892 ms / 100) 4.888 -> 4.888 ( +0.00%) [ +0.00% +0.20% +0.06% / +0.00% +0.80% +0.90%] index_select skip256 : Elapsed 0.049 ms (4.888 ms / 100) 4.907 -> 4.913 ( +0.12%) [ +0.06% +0.12% +0.00% / +0.12% +1.10% +1.10%] index_select spread : Elapsed 0.049 ms (4.910 ms / 100) 4.910 -> 4.916 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +1.41% +1.43%] index_select strided 3 : Elapsed 0.049 ms (4.916 ms / 100) 4.916 -> 4.925 ( +0.18%) [ +0.16% +0.00% +0.12% / +0.18% +1.06% +1.10%] index_select random : Elapsed 0.049 ms (4.924 ms / 100) 4.900 -> 4.905 ( +0.10%) [ +0.12% +0.00% +0.04% / +0.10% +1.24% +1.12%] index_select random_sorted : Elapsed 0.049 ms (4.906 ms / 100) B = [20, 16, 4, 40] (stride (4, 80, 1, 1280)) A = [20, 5, 4, 40] (stride (1, 80, 20, 400)) dim = 1 2.546 -> 2.550 ( +0.16%) [ +0.27% +0.00% +0.20% / +0.16% +0.24% +0.16%] index_add_ linear : Elapsed 0.026 ms (2.553 ms / 100) 2.473 -> 2.478 ( +0.20%) [ +0.44% +0.24% +0.00% / +0.20% +0.65% +0.53%] index_copy_ linear : Elapsed 0.025 ms (2.484 ms / 100) 2.547 -> 2.543 ( -0.16%) [ +0.20% +0.04% +0.00% / -0.16% +0.24% +0.16%] index_add_ reverse : Elapsed 0.026 ms (2.552 ms / 100) 2.475 -> 2.480 ( +0.20%) [ +0.00% +0.24% +0.12% / +0.20% +0.53% +0.36%] index_copy_ reverse : Elapsed 0.025 ms (2.475 ms / 100) 2.552 -> 2.551 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.24% +0.08%] index_add_ spread : Elapsed 0.026 ms (2.553 ms / 100) 2.478 -> 2.485 ( +0.28%) [ +0.00% +0.00% +0.04% / +0.28% +0.36% +0.56%] index_copy_ spread : Elapsed 0.025 ms (2.478 ms / 100) 2.548 -> 2.553 ( +0.20%) [ +0.00% +0.39% +0.00% / +0.24% +0.27% +0.20%] index_add_ strided 3 : Elapsed 0.025 ms (2.548 ms / 100) 2.476 -> 2.473 ( -0.12%) [ +0.00% +0.24% +0.20% / -0.12% +0.44% +0.32%] index_copy_ strided 3 : Elapsed 0.025 ms (2.476 ms / 100) 2.544 -> 2.543 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.24% +0.24%] index_add_ strided 5 : Elapsed 0.025 ms (2.544 ms / 100) 2.473 -> 2.469 ( -0.16%) [ +0.08% +0.00% +0.08% / -0.16% +0.32% +0.40%] index_copy_ strided 5 : Elapsed 0.025 ms (2.475 ms / 100) 2.542 -> 2.546 ( +0.16%) [ +0.04% +0.16% +0.00% / +0.16% +0.35% +0.35%] index_add_ strided 7 : Elapsed 0.025 ms (2.543 ms / 100) 2.472 -> 2.477 ( +0.20%) [ +0.00% +0.04% +0.12% / +0.20% +0.28% +0.32%] index_copy_ strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.551 -> 2.550 ( -0.04%) [ +0.24% +0.20% +0.00% / -0.04% +0.47% +0.20%] index_add_ perm : Elapsed 0.026 ms (2.557 ms / 100) 2.477 -> 2.486 ( +0.36%) [ +0.20% +0.20% +0.00% / +0.36% +0.48% +0.36%] index_copy_ perm : Elapsed 0.025 ms (2.482 ms / 100) 2.547 -> 2.548 ( +0.04%) [ +0.16% +0.16% +0.00% / +0.04% +0.27% +0.47%] index_add_ perm_sorted : Elapsed 0.026 ms (2.551 ms / 100) 2.473 -> 2.476 ( +0.12%) [ +0.32% +0.32% +0.00% / +0.12% +0.69% +0.49%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.481 ms / 100) 5.599 -> 5.631 ( +0.57%) [ +0.54% +0.02% +0.00% / +0.57% +1.38% +1.18%] index_select const : Elapsed 0.056 ms (5.629 ms / 100) 5.628 -> 5.632 ( +0.07%) [ +0.11% +0.12% +0.00% / +0.07% +0.57% +0.48%] index_select wrap : Elapsed 0.056 ms (5.634 ms / 100) 5.671 -> 5.662 ( -0.16%) [ +0.16% +0.11% +0.00% / -0.16% +0.23% +0.55%] index_select linear : Elapsed 0.057 ms (5.680 ms / 100) 5.624 -> 5.646 ( +0.39%) [ +0.12% +0.09% +0.00% / +0.39% +0.52% +0.52%] index_select reverse : Elapsed 0.056 ms (5.631 ms / 100) 5.603 -> 5.606 ( +0.05%) [ +0.00% +0.59% +0.52% / +0.05% +1.00% +1.02%] index_select skip64 : Elapsed 0.056 ms (5.603 ms / 100) 5.605 -> 5.628 ( +0.41%) [ +0.45% +0.41% +0.00% / +0.41% +1.28% +0.98%] index_select skip256 : Elapsed 0.056 ms (5.630 ms / 100) 5.624 -> 5.625 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.02% +0.53% +0.57%] index_select spread : Elapsed 0.056 ms (5.628 ms / 100) 5.626 -> 5.627 ( +0.02%) [ +0.09% +0.02% +0.00% / +0.02% +0.71% +0.84%] index_select strided 3 : Elapsed 0.056 ms (5.631 ms / 100) 5.630 -> 5.623 ( -0.12%) [ +0.00% +0.05% +0.12% / -0.12% +0.66% +0.53%] index_select random : Elapsed 0.056 ms (5.630 ms / 100) 5.619 -> 5.622 ( +0.05%) [ +0.02% +0.20% +0.00% / +0.05% +0.80% +0.85%] index_select random_sorted : Elapsed 0.056 ms (5.620 ms / 100) out_shape = [20, 5, 16, 40] in_shape = [20, 5, 4, 40] idx_dim = 2 B = [20, 5, 16, 40] (stride (3200, 640, 40, 1)) A = [20, 5, 4, 40] (stride (1, 800, 4000, 20)) dim = 2 2.397 -> 2.398 ( +0.04%) [ +0.00% +0.21% +0.17% / +0.04% +0.63% +0.50%] index_add_ linear : Elapsed 0.024 ms (2.397 ms / 100) 2.340 -> 2.339 ( -0.04%) [ +0.38% +0.17% +0.00% / -0.04% +0.47% +0.47%] index_copy_ linear : Elapsed 0.023 ms (2.349 ms / 100) 2.403 -> 2.401 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.42% +0.29%] index_add_ reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.342 -> 2.346 ( +0.17%) [ +0.26% +0.00% +0.51% / +0.17% +0.77% +0.43%] index_copy_ reverse : Elapsed 0.023 ms (2.348 ms / 100) 2.396 -> 2.397 ( +0.04%) [ +0.04% +0.13% +0.00% / +0.04% +0.46% +0.33%] index_add_ spread : Elapsed 0.024 ms (2.397 ms / 100) 2.337 -> 2.339 ( +0.09%) [ +0.00% +0.21% +0.17% / +0.09% +0.47% +0.47%] index_copy_ spread : Elapsed 0.023 ms (2.337 ms / 100) 2.401 -> 2.405 ( +0.17%) [ +0.00% +0.04% +0.08% / +0.17% +0.58% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.401 ms / 100) 2.345 -> 2.347 ( +0.09%) [ +0.04% +0.21% +0.00% / +0.09% +0.60% +0.43%] index_copy_ strided 3 : Elapsed 0.023 ms (2.346 ms / 100) 2.405 -> 2.401 ( -0.17%) [ +0.00% +0.00% +0.00% / -0.17% +0.17% +0.12%] index_add_ strided 5 : Elapsed 0.024 ms (2.405 ms / 100) 2.346 -> 2.351 ( +0.21%) [ +0.17% +0.13% +0.00% / +0.21% +0.47% +0.21%] index_copy_ strided 5 : Elapsed 0.024 ms (2.350 ms / 100) 2.394 -> 2.394 ( +0.00%) [ +0.04% +0.00% +0.17% / +0.00% +0.33% +0.38%] index_add_ strided 7 : Elapsed 0.024 ms (2.395 ms / 100) 2.338 -> 2.343 ( +0.21%) [ +0.00% +0.09% +0.34% / +0.21% +0.38% +0.56%] index_copy_ strided 7 : Elapsed 0.023 ms (2.338 ms / 100) 2.396 -> 2.393 ( -0.13%) [ +0.00% +0.08% +0.00% / -0.13% +0.21% +0.25%] index_add_ perm : Elapsed 0.024 ms (2.396 ms / 100) 2.338 -> 2.344 ( +0.26%) [ +0.00% +0.26% +0.04% / +0.56% +0.51% +0.26%] index_copy_ perm : Elapsed 0.023 ms (2.338 ms / 100) 2.391 -> 2.395 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.17% +0.33% +0.33%] index_add_ perm_sorted : Elapsed 0.024 ms (2.391 ms / 100) 2.332 -> 2.341 ( +0.39%) [ +0.30% +0.30% +0.00% / +0.39% +0.64% +0.47%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.339 ms / 100) 5.229 -> 5.232 ( +0.06%) [ +0.10% +0.00% +0.11% / +0.06% +0.55% +0.61%] index_select const : Elapsed 0.052 ms (5.234 ms / 100) 5.273 -> 5.280 ( +0.13%) [ +0.23% +0.04% +0.00% / +0.13% +0.36% +0.42%] index_select wrap : Elapsed 0.053 ms (5.285 ms / 100) 5.261 -> 5.260 ( -0.02%) [ +0.00% +0.06% +0.04% / -0.02% +0.51% +0.40%] index_select linear : Elapsed 0.053 ms (5.261 ms / 100) 5.275 -> 5.277 ( +0.04%) [ +0.08% +0.00% +0.06% / +0.04% +0.36% +0.44%] index_select reverse : Elapsed 0.053 ms (5.279 ms / 100) 5.274 -> 5.283 ( +0.17%) [ +0.00% +0.09% +0.06% / +0.17% +0.36% +0.36%] index_select skip64 : Elapsed 0.053 ms (5.274 ms / 100) 5.253 -> 5.255 ( +0.04%) [ +0.04% +0.00% +0.10% / +0.04% +0.51% +0.53%] index_select skip256 : Elapsed 0.053 ms (5.255 ms / 100) 5.253 -> 5.271 ( +0.34%) [ +0.00% +0.30% +0.13% / +0.34% +0.57% +0.70%] index_select spread : Elapsed 0.053 ms (5.253 ms / 100) 5.259 -> 5.258 ( -0.02%) [ +0.06% +0.00% +0.04% / -0.02% +0.34% +0.40%] index_select strided 3 : Elapsed 0.053 ms (5.262 ms / 100) 5.277 -> 5.274 ( -0.06%) [ +0.04% +0.00% +0.04% / -0.06% +0.36% +0.34%] index_select random : Elapsed 0.053 ms (5.279 ms / 100) 5.235 -> 5.232 ( -0.06%) [ +0.02% +0.06% +0.00% / -0.06% +0.34% +0.27%] index_select random_sorted : Elapsed 0.052 ms (5.236 ms / 100) B = [20, 5, 16, 40] (stride (3200, 640, 1, 16)) A = [20, 5, 4, 40] (stride (20, 4, 1, 400)) dim = 2 1.211 -> 1.216 ( +0.41%) [ +0.00% +0.33% +0.33% / +0.41% +2.31% +2.39%] index_add_ linear : Elapsed 0.012 ms (1.211 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.00% +0.34% +0.25% / +0.17% +2.69% +2.27%] index_copy_ linear : Elapsed 0.012 ms (1.191 ms / 100) 1.204 -> 1.210 ( +0.50%) [ +0.00% +0.83% +0.42% / +0.50% +2.24% +2.49%] index_add_ reverse : Elapsed 0.012 ms (1.204 ms / 100) 1.186 -> 1.191 ( +0.42%) [ +0.17% +0.00% +0.42% / +0.42% +2.61% +2.36%] index_copy_ reverse : Elapsed 0.012 ms (1.188 ms / 100) 1.253 -> 1.257 ( +0.32%) [ +0.16% +0.00% +0.24% / +0.32% +1.68% +2.39%] index_add_ spread : Elapsed 0.013 ms (1.255 ms / 100) 1.261 -> 1.266 ( +0.40%) [ +0.24% +0.00% +0.08% / +0.40% +2.22% +2.14%] index_copy_ spread : Elapsed 0.013 ms (1.264 ms / 100) 1.246 -> 1.253 ( +0.56%) [ +0.00% +0.48% +0.24% / +0.56% +2.41% +2.41%] index_add_ strided 3 : Elapsed 0.012 ms (1.246 ms / 100) 1.262 -> 1.266 ( +0.32%) [ +0.08% +0.00% +0.16% / +0.32% +2.30% +2.06%] index_copy_ strided 3 : Elapsed 0.013 ms (1.263 ms / 100) 1.251 -> 1.254 ( +0.24%) [ +0.00% +0.24% +0.32% / +0.24% +2.40% +2.08%] index_add_ strided 5 : Elapsed 0.013 ms (1.251 ms / 100) 1.258 -> 1.264 ( +0.48%) [ +0.00% +0.24% +0.48% / +0.48% +2.54% +2.62%] index_copy_ strided 5 : Elapsed 0.013 ms (1.258 ms / 100) 1.251 -> 1.260 ( +0.72%) [ +0.64% +0.00% +0.88% / +0.72% +3.04% +2.80%] index_add_ strided 7 : Elapsed 0.013 ms (1.259 ms / 100) 1.262 -> 1.266 ( +0.32%) [ +0.00% +0.08% +0.00% / +0.32% +2.77% +2.30%] index_copy_ strided 7 : Elapsed 0.013 ms (1.262 ms / 100) 1.262 -> 1.264 ( +0.16%) [ +0.08% +0.00% +0.32% / +0.16% +1.98% +2.30%] index_add_ perm : Elapsed 0.013 ms (1.263 ms / 100) 1.260 -> 1.263 ( +0.24%) [ +0.08% +0.00% +0.40% / +0.24% +2.62% +2.14%] index_copy_ perm : Elapsed 0.013 ms (1.261 ms / 100) 1.260 -> 1.261 ( +0.08%) [ +0.56% +0.00% +0.56% / +0.08% +1.90% +1.35%] index_add_ perm_sorted : Elapsed 0.013 ms (1.267 ms / 100) 1.259 -> 1.259 ( +0.00%) [ +0.08% +0.00% +0.16% / +0.00% +1.91% +1.99%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.260 ms / 100) 2.126 -> 2.130 ( +0.19%) [ +0.00% +0.09% +0.24% / +0.19% +1.18% +1.32%] index_select const : Elapsed 0.021 ms (2.126 ms / 100) 2.122 -> 2.122 ( +0.00%) [ +0.28% +0.05% +0.00% / +0.00% +1.32% +1.23%] index_select wrap : Elapsed 0.021 ms (2.128 ms / 100) 2.120 -> 2.125 ( +0.24%) [ +0.05% +0.00% +0.05% / +0.24% +1.46% +1.46%] index_select linear : Elapsed 0.021 ms (2.121 ms / 100) 2.120 -> 2.124 ( +0.19%) [ +0.14% +0.00% +0.00% / +0.19% +1.46% +1.60%] index_select reverse : Elapsed 0.021 ms (2.123 ms / 100) 2.119 -> 2.119 ( +0.00%) [ +0.00% +0.14% +0.09% / +0.00% +1.65% +1.56%] index_select skip64 : Elapsed 0.021 ms (2.119 ms / 100) 2.123 -> 2.127 ( +0.19%) [ +0.24% +0.05% +0.00% / +0.19% +1.51% +1.27%] index_select skip256 : Elapsed 0.021 ms (2.128 ms / 100) 2.123 -> 2.126 ( +0.14%) [ +0.14% +0.00% +0.05% / +0.14% +1.74% +1.60%] index_select spread : Elapsed 0.021 ms (2.126 ms / 100) 2.125 -> 2.127 ( +0.09%) [ +0.00% +0.00% +0.14% / +0.09% +1.18% +1.18%] index_select strided 3 : Elapsed 0.021 ms (2.125 ms / 100) 2.119 -> 2.123 ( +0.19%) [ +0.28% +0.09% +0.00% / +0.19% +1.46% +1.65%] index_select random : Elapsed 0.021 ms (2.125 ms / 100) 2.124 -> 2.120 ( -0.19%) [ +0.00% +0.00% +0.00% / -0.19% +1.37% +1.55%] index_select random_sorted : Elapsed 0.021 ms (2.124 ms / 100) B = [20, 5, 16, 40] (stride (3200, 1, 200, 5)) A = [20, 5, 4, 40] (stride (200, 40, 4000, 1)) dim = 2 2.370 -> 2.370 ( +0.00%) [ +0.13% +0.00% +0.17% / +0.00% +0.51% +0.42%] index_add_ linear : Elapsed 0.024 ms (2.373 ms / 100) 2.322 -> 2.323 ( +0.04%) [ +0.13% +0.26% +0.00% / +0.04% +0.26% +0.34%] index_copy_ linear : Elapsed 0.023 ms (2.325 ms / 100) 2.370 -> 2.374 ( +0.17%) [ +0.00% +0.08% +0.34% / +0.17% +0.46% +0.55%] index_add_ reverse : Elapsed 0.024 ms (2.370 ms / 100) 2.325 -> 2.329 ( +0.17%) [ +0.04% +0.00% +0.00% / +0.17% +0.52% +0.26%] index_copy_ reverse : Elapsed 0.023 ms (2.326 ms / 100) 2.368 -> 2.369 ( +0.04%) [ +0.13% +0.21% +0.00% / +0.04% +0.51% +0.51%] index_add_ spread : Elapsed 0.024 ms (2.371 ms / 100) 2.321 -> 2.322 ( +0.04%) [ +0.00% +0.17% +0.00% / +0.04% +0.26% +0.34%] index_copy_ spread : Elapsed 0.023 ms (2.321 ms / 100) 2.368 -> 2.372 ( +0.17%) [ +0.21% +0.21% +0.00% / +0.17% +0.63% +0.59%] index_add_ strided 3 : Elapsed 0.024 ms (2.373 ms / 100) 2.324 -> 2.330 ( +0.26%) [ +0.04% +0.00% +0.00% / +0.30% +0.26% +0.30%] index_copy_ strided 3 : Elapsed 0.023 ms (2.325 ms / 100) 2.368 -> 2.375 ( +0.30%) [ +0.00% +0.13% +0.13% / +0.30% +0.59% +0.38%] index_add_ strided 5 : Elapsed 0.024 ms (2.368 ms / 100) 2.323 -> 2.324 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +0.13% +0.17%] index_copy_ strided 5 : Elapsed 0.023 ms (2.324 ms / 100) 2.371 -> 2.371 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.38% +0.30%] index_add_ strided 7 : Elapsed 0.024 ms (2.373 ms / 100) 2.324 -> 2.321 ( -0.13%) [ +0.04% +0.04% +0.00% / -0.13% +0.34% +0.39%] index_copy_ strided 7 : Elapsed 0.023 ms (2.325 ms / 100) 2.374 -> 2.377 ( +0.13%) [ +0.29% +0.42% +0.00% / +0.13% +0.25% +0.17%] index_add_ perm : Elapsed 0.024 ms (2.381 ms / 100) 2.327 -> 2.327 ( +0.00%) [ +0.09% +0.04% +0.00% / +0.00% +0.56% +0.17%] index_copy_ perm : Elapsed 0.023 ms (2.329 ms / 100) 2.374 -> 2.373 ( -0.04%) [ +0.08% +0.17% +0.00% / -0.04% +0.04% +0.08%] index_add_ perm_sorted : Elapsed 0.024 ms (2.376 ms / 100) 2.326 -> 2.324 ( -0.09%) [ +0.00% +0.04% +0.13% / -0.09% +0.09% +0.21%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.326 ms / 100) 4.724 -> 4.727 ( +0.06%) [ +0.00% +0.08% +0.00% / +0.06% +0.44% +0.36%] index_select const : Elapsed 0.047 ms (4.724 ms / 100) 4.789 -> 4.787 ( -0.04%) [ +0.02% +0.00% +0.02% / -0.04% +0.48% +0.35%] index_select wrap : Elapsed 0.048 ms (4.790 ms / 100) 4.786 -> 4.793 ( +0.15%) [ +0.04% +0.04% +0.00% / +0.15% +0.42% +0.38%] index_select linear : Elapsed 0.048 ms (4.788 ms / 100) 4.788 -> 4.793 ( +0.10%) [ +0.15% +0.00% +0.13% / +0.10% +0.38% +0.52%] index_select reverse : Elapsed 0.048 ms (4.795 ms / 100) 4.740 -> 4.745 ( +0.11%) [ +0.08% +0.11% +0.00% / +0.11% +0.36% +0.36%] index_select skip64 : Elapsed 0.047 ms (4.744 ms / 100) 4.733 -> 4.736 ( +0.06%) [ +0.04% +0.02% +0.00% / +0.06% +0.15% +0.17%] index_select skip256 : Elapsed 0.047 ms (4.735 ms / 100) 4.783 -> 4.786 ( +0.06%) [ +0.00% +0.17% +0.00% / +0.06% +0.17% +0.31%] index_select spread : Elapsed 0.048 ms (4.783 ms / 100) 4.786 -> 4.786 ( +0.00%) [ +0.02% +0.10% +0.00% / +0.00% +0.25% +0.38%] index_select strided 3 : Elapsed 0.048 ms (4.787 ms / 100) 4.793 -> 4.798 ( +0.10%) [ +0.08% +0.10% +0.00% / +0.10% +0.21% +0.25%] index_select random : Elapsed 0.048 ms (4.797 ms / 100) 4.763 -> 4.767 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.31% +0.38%] index_select random_sorted : Elapsed 0.048 ms (4.767 ms / 100) B = [20, 5, 16, 40] (stride (16, 12800, 1, 320)) A = [20, 5, 4, 40] (stride (5, 1, 100, 400)) dim = 2 2.522 -> 2.520 ( -0.08%) [ +0.16% +0.00% +0.20% / -0.08% +0.40% +0.52%] index_add_ linear : Elapsed 0.025 ms (2.526 ms / 100) 2.486 -> 2.490 ( +0.16%) [ +0.04% +0.16% +0.00% / +0.16% +0.28% +0.32%] index_copy_ linear : Elapsed 0.025 ms (2.487 ms / 100) 2.520 -> 2.517 ( -0.12%) [ +0.16% +0.00% +0.12% / -0.12% +0.28% +0.40%] index_add_ reverse : Elapsed 0.025 ms (2.524 ms / 100) 2.487 -> 2.489 ( +0.08%) [ +0.04% +0.00% +0.24% / +0.12% +0.20% +0.08%] index_copy_ reverse : Elapsed 0.025 ms (2.488 ms / 100) 2.558 -> 2.559 ( +0.04%) [ +0.23% +0.31% +0.00% / +0.04% +0.27% +0.31%] index_add_ spread : Elapsed 0.026 ms (2.564 ms / 100) 2.590 -> 2.586 ( -0.15%) [ +0.27% +0.00% +0.04% / -0.04% -0.15% -0.08%] index_copy_ spread : Elapsed 0.026 ms (2.597 ms / 100) 2.561 -> 2.561 ( +0.00%) [ +0.20% +0.00% +0.04% / +0.00% +0.59% +0.31%] index_add_ strided 3 : Elapsed 0.026 ms (2.566 ms / 100) 2.588 -> 2.587 ( -0.04%) [ +0.08% +0.12% +0.00% / +0.08% +0.04% -0.04%] index_copy_ strided 3 : Elapsed 0.026 ms (2.590 ms / 100) 2.565 -> 2.562 ( -0.12%) [ +0.00% +0.08% +0.04% / -0.12% +0.31% +0.23%] index_add_ strided 5 : Elapsed 0.026 ms (2.565 ms / 100) 2.589 -> 2.592 ( +0.12%) [ +0.00% +0.19% +0.08% / +0.19% +0.12% +0.12%] index_copy_ strided 5 : Elapsed 0.026 ms (2.589 ms / 100) 2.560 -> 2.562 ( +0.08%) [ +0.00% +0.23% +0.04% / +0.08% +0.51% +0.35%] index_add_ strided 7 : Elapsed 0.026 ms (2.560 ms / 100) 2.587 -> 2.589 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.15% +0.08% +0.19%] index_copy_ strided 7 : Elapsed 0.026 ms (2.590 ms / 100) 2.565 -> 2.561 ( -0.16%) [ +0.12% +0.00% +0.04% / -0.16% +0.39% +0.27%] index_add_ perm : Elapsed 0.026 ms (2.568 ms / 100) 2.587 -> 2.588 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.12% +0.23%] index_copy_ perm : Elapsed 0.026 ms (2.589 ms / 100) 2.569 -> 2.572 ( +0.12%) [ +0.04% +0.00% +0.08% / +0.12% +0.43% +0.43%] index_add_ perm_sorted : Elapsed 0.026 ms (2.570 ms / 100) 2.590 -> 2.592 ( +0.08%) [ +0.00% +0.15% +0.19% / +0.12% +0.08% +0.12%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.590 ms / 100) 5.598 -> 5.601 ( +0.05%) [ +0.13% +0.00% +0.09% / +0.05% +0.41% +0.55%] index_select const : Elapsed 0.056 ms (5.605 ms / 100) 5.589 -> 5.592 ( +0.05%) [ +0.05% +0.00% +0.07% / +0.05% +0.27% +0.23%] index_select wrap : Elapsed 0.056 ms (5.592 ms / 100) 5.594 -> 5.598 ( +0.07%) [ +0.11% +0.11% +0.00% / +0.07% +0.25% +0.38%] index_select linear : Elapsed 0.056 ms (5.600 ms / 100) 5.588 -> 5.592 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.25% +0.34%] index_select reverse : Elapsed 0.056 ms (5.591 ms / 100) 5.601 -> 5.614 ( +0.23%) [ +0.00% +0.23% +0.00% / +0.23% +0.41% +0.43%] index_select skip64 : Elapsed 0.056 ms (5.601 ms / 100) 5.614 -> 5.619 ( +0.09%) [ +0.04% +0.18% +0.00% / +0.09% +0.29% +0.25%] index_select skip256 : Elapsed 0.056 ms (5.616 ms / 100) 5.589 -> 5.591 ( +0.04%) [ +0.09% +0.00% +0.02% / +0.04% +0.30% +0.36%] index_select spread : Elapsed 0.056 ms (5.594 ms / 100) 5.570 -> 5.576 ( +0.11%) [ +0.11% +0.00% +0.07% / +0.11% +0.25% +0.25%] index_select strided 3 : Elapsed 0.056 ms (5.576 ms / 100) 5.572 -> 5.576 ( +0.07%) [ +0.02% +0.00% +0.04% / +0.07% +0.25% +0.22%] index_select random : Elapsed 0.056 ms (5.573 ms / 100) 5.574 -> 5.578 ( +0.07%) [ +0.00% +0.13% +0.07% / +0.07% +0.23% +0.29%] index_select random_sorted : Elapsed 0.056 ms (5.574 ms / 100) B = [20, 5, 16, 40] (stride (1, 20, 4000, 100)) A = [20, 5, 4, 40] (stride (800, 4, 1, 20)) dim = 2 2.548 -> 2.546 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.35% +0.47%] index_add_ linear : Elapsed 0.026 ms (2.550 ms / 100) 2.500 -> 2.507 ( +0.28%) [ +0.28% +0.04% +0.00% / +0.28% +0.40% +0.36%] index_copy_ linear : Elapsed 0.025 ms (2.507 ms / 100) 2.546 -> 2.546 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.43% +0.31%] index_add_ reverse : Elapsed 0.025 ms (2.546 ms / 100) 2.500 -> 2.501 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.44% +0.24%] index_copy_ reverse : Elapsed 0.025 ms (2.501 ms / 100) 2.551 -> 2.555 ( +0.16%) [ +0.04% +0.24% +0.00% / +0.16% +0.51% +0.47%] index_add_ spread : Elapsed 0.026 ms (2.552 ms / 100) 2.506 -> 2.506 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.32% +0.20%] index_copy_ spread : Elapsed 0.025 ms (2.506 ms / 100) 2.559 -> 2.556 ( -0.12%) [ +0.16% +0.00% +0.00% / -0.12% +0.59% +0.59%] index_add_ strided 3 : Elapsed 0.026 ms (2.563 ms / 100) 2.511 -> 2.513 ( +0.08%) [ +0.00% +0.12% +0.04% / +0.08% +0.48% +0.40%] index_copy_ strided 3 : Elapsed 0.025 ms (2.511 ms / 100) 2.557 -> 2.561 ( +0.16%) [ +0.04% +0.12% +0.00% / +0.16% +0.51% +0.66%] index_add_ strided 5 : Elapsed 0.026 ms (2.558 ms / 100) 2.508 -> 2.510 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.56% +0.36%] index_copy_ strided 5 : Elapsed 0.025 ms (2.512 ms / 100) 2.552 -> 2.550 ( -0.08%) [ +0.12% +0.16% +0.00% / -0.08% +0.43% +0.39%] index_add_ strided 7 : Elapsed 0.026 ms (2.555 ms / 100) 2.507 -> 2.508 ( +0.04%) [ +0.20% +0.16% +0.00% / +0.04% +0.32% +0.36%] index_copy_ strided 7 : Elapsed 0.025 ms (2.512 ms / 100) 2.545 -> 2.547 ( +0.08%) [ +0.20% +0.00% +0.00% / +0.08% +0.39% +0.20%] index_add_ perm : Elapsed 0.025 ms (2.550 ms / 100) 2.498 -> 2.501 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.16% +0.24%] index_copy_ perm : Elapsed 0.025 ms (2.499 ms / 100) 2.546 -> 2.551 ( +0.20%) [ +0.04% +0.00% +0.20% / +0.20% +0.27% +0.39%] index_add_ perm_sorted : Elapsed 0.025 ms (2.547 ms / 100) 2.503 -> 2.501 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.04% +0.04%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.504 ms / 100) 5.714 -> 5.721 ( +0.12%) [ +0.25% +0.25% +0.00% / +0.12% +0.65% +0.61%] index_select const : Elapsed 0.057 ms (5.728 ms / 100) 5.751 -> 5.750 ( -0.02%) [ +0.05% +0.00% +0.00% / -0.02% +0.35% +0.30%] index_select wrap : Elapsed 0.058 ms (5.754 ms / 100) 5.744 -> 5.750 ( +0.10%) [ +0.14% +0.00% +0.07% / +0.10% +0.44% +0.38%] index_select linear : Elapsed 0.058 ms (5.752 ms / 100) 5.764 -> 5.765 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.35% +0.33%] index_select reverse : Elapsed 0.058 ms (5.764 ms / 100) 5.767 -> 5.763 ( -0.07%) [ +0.00% +0.07% +0.12% / -0.07% +0.29% +0.40%] index_select skip64 : Elapsed 0.058 ms (5.767 ms / 100) 5.754 -> 5.750 ( -0.07%) [ +0.00% +0.12% +0.00% / -0.07% +0.14% +0.14%] index_select skip256 : Elapsed 0.058 ms (5.754 ms / 100) 5.751 -> 5.747 ( -0.07%) [ +0.00% +0.09% +0.00% / -0.07% +0.30% +0.37%] index_select spread : Elapsed 0.058 ms (5.751 ms / 100) 5.746 -> 5.744 ( -0.03%) [ +0.00% +0.12% +0.12% / -0.03% +0.33% +0.23%] index_select strided 3 : Elapsed 0.057 ms (5.746 ms / 100) 5.764 -> 5.766 ( +0.03%) [ +0.05% +0.07% +0.00% / +0.03% +0.31% +0.29%] index_select random : Elapsed 0.058 ms (5.767 ms / 100) 5.717 -> 5.723 ( +0.10%) [ +0.05% +0.19% +0.00% / +0.10% +0.54% +0.49%] index_select random_sorted : Elapsed 0.057 ms (5.720 ms / 100) B = [20, 5, 16, 40] (stride (80, 16, 1, 1600)) A = [20, 5, 4, 40] (stride (800, 1, 200, 5)) dim = 2 2.364 -> 2.364 ( +0.00%) [ +0.00% +0.08% +0.25% / +0.00% +0.34% +0.47%] index_add_ linear : Elapsed 0.024 ms (2.364 ms / 100) 2.343 -> 2.343 ( +0.00%) [ +0.34% +0.00% +0.13% / +0.00% +0.38% +0.38%] index_copy_ linear : Elapsed 0.024 ms (2.351 ms / 100) 2.363 -> 2.367 ( +0.17%) [ +0.04% +0.30% +0.00% / +0.17% +0.17% +0.42%] index_add_ reverse : Elapsed 0.024 ms (2.364 ms / 100) 2.341 -> 2.345 ( +0.17%) [ +0.09% +0.26% +0.00% / +0.17% +0.34% +0.30%] index_copy_ reverse : Elapsed 0.023 ms (2.343 ms / 100) 2.400 -> 2.403 ( +0.13%) [ +0.13% +0.17% +0.00% / +0.13% +0.33% +0.50%] index_add_ spread : Elapsed 0.024 ms (2.403 ms / 100) 2.442 -> 2.442 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.33% +0.41%] index_copy_ spread : Elapsed 0.024 ms (2.445 ms / 100) 2.408 -> 2.411 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.37% +0.37%] index_add_ strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.37% +0.25%] index_copy_ strided 3 : Elapsed 0.024 ms (2.447 ms / 100) 2.402 -> 2.402 ( +0.00%) [ +0.17% +0.00% +0.29% / +0.00% +0.67% +0.79%] index_add_ strided 5 : Elapsed 0.024 ms (2.406 ms / 100) 2.442 -> 2.447 ( +0.20%) [ +0.00% +0.04% +0.04% / +0.20% +0.37% +0.61%] index_copy_ strided 5 : Elapsed 0.024 ms (2.442 ms / 100) 2.404 -> 2.407 ( +0.12%) [ +0.21% +0.25% +0.00% / +0.12% +0.25% +0.46%] index_add_ strided 7 : Elapsed 0.024 ms (2.409 ms / 100) 2.440 -> 2.443 ( +0.12%) [ +0.25% +0.00% +0.08% / +0.12% +0.29% +0.49%] index_copy_ strided 7 : Elapsed 0.024 ms (2.446 ms / 100) 2.403 -> 2.408 ( +0.21%) [ +0.37% +0.04% +0.00% / +0.21% +0.58% +0.62%] index_add_ perm : Elapsed 0.024 ms (2.412 ms / 100) 2.441 -> 2.445 ( +0.16%) [ +0.16% +0.25% +0.00% / +0.16% +0.57% +0.57%] index_copy_ perm : Elapsed 0.024 ms (2.445 ms / 100) 2.404 -> 2.407 ( +0.12%) [ +0.00% +0.21% +0.04% / +0.12% +0.42% +0.46%] index_add_ perm_sorted : Elapsed 0.024 ms (2.404 ms / 100) 2.443 -> 2.444 ( +0.04%) [ +0.00% +0.00% +0.16% / +0.04% +0.45% +0.29%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.443 ms / 100) 5.045 -> 5.054 ( +0.18%) [ +0.04% +0.00% +0.04% / +0.18% +0.50% +0.48%] index_select const : Elapsed 0.050 ms (5.047 ms / 100) 5.047 -> 5.049 ( +0.04%) [ +0.00% +0.02% +0.02% / +0.04% +0.24% +0.34%] index_select wrap : Elapsed 0.050 ms (5.047 ms / 100) 5.064 -> 5.057 ( -0.14%) [ +0.00% +0.06% +0.06% / -0.14% +0.22% +0.22%] index_select linear : Elapsed 0.051 ms (5.064 ms / 100) 5.052 -> 5.055 ( +0.06%) [ +0.00% +0.04% +0.04% / +0.06% +0.22% +0.34%] index_select reverse : Elapsed 0.051 ms (5.052 ms / 100) 5.036 -> 5.039 ( +0.06%) [ +0.06% +0.10% +0.00% / +0.06% +0.32% +0.36%] index_select skip64 : Elapsed 0.050 ms (5.039 ms / 100) 5.033 -> 5.038 ( +0.10%) [ +0.16% +0.00% +0.10% / +0.10% +0.32% +0.36%] index_select skip256 : Elapsed 0.050 ms (5.041 ms / 100) 5.046 -> 5.049 ( +0.06%) [ +0.08% +0.02% +0.00% / +0.06% +0.40% +0.30%] index_select spread : Elapsed 0.051 ms (5.050 ms / 100) 5.058 -> 5.069 ( +0.22%) [ +0.10% +0.02% +0.00% / +0.22% +0.47% +0.34%] index_select strided 3 : Elapsed 0.051 ms (5.063 ms / 100) 5.053 -> 5.049 ( -0.08%) [ +0.04% +0.12% +0.00% / -0.08% +0.32% +0.28%] index_select random : Elapsed 0.051 ms (5.055 ms / 100) 5.048 -> 5.059 ( +0.22%) [ +0.12% +0.04% +0.00% / +0.22% +0.52% +0.36%] index_select random_sorted : Elapsed 0.051 ms (5.054 ms / 100) out_shape = [20, 5, 4, 16] in_shape = [20, 5, 4, 40] idx_dim = 3 B = [20, 5, 4, 16] (stride (320, 64, 1, 4)) A = [20, 5, 4, 40] (stride (5, 1, 100, 400)) dim = 3 1.281 -> 1.284 ( +0.23%) [ +0.23% +0.08% +0.00% / +0.23% +0.70% +0.86%] index_select const : Elapsed 0.013 ms (1.284 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.31% +0.31%] index_select wrap : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.24% +0.24%] index_select linear : Elapsed 0.013 ms (1.276 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.39% +0.39%] index_select reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.285 -> 1.285 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.39% +0.54%] index_select skip64 : Elapsed 0.013 ms (1.287 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.47% +0.62%] index_select skip256 : Elapsed 0.013 ms (1.281 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.08% +0.16%] index_select spread : Elapsed 0.013 ms (1.277 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.39% +0.00% +0.00%] index_select strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.278 -> 1.275 ( -0.23%) [ +0.00% +0.08% +0.00% / +0.00% -0.23% -0.23%] index_select strided 5 : Elapsed 0.013 ms (1.278 ms / 100) 1.277 -> 1.273 ( -0.31%) [ +0.00% +0.00% +0.00% / +0.00% -0.31% -0.23%] index_select strided 7 : Elapsed 0.013 ms (1.277 ms / 100) 1.285 -> 1.285 ( +0.00%) [ +0.00% +0.00% +0.23% / +0.08% +0.00% +0.08%] index_select strided 8 : Elapsed 0.013 ms (1.285 ms / 100) 1.281 -> 1.283 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.16% +0.31%] index_select strided 16 : Elapsed 0.013 ms (1.281 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.24% +0.16% +0.00% / +0.08% +0.39% +0.39%] index_select random : Elapsed 0.013 ms (1.278 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.47% +0.00% +0.00% / +0.08% +0.16% +0.23%] index_select random_sorted : Elapsed 0.013 ms (1.290 ms / 100) 1.287 -> 1.283 ( -0.31%) [ +0.00% +0.00% +0.00% / +0.08% -0.31% -0.31%] index_select perm : Elapsed 0.013 ms (1.287 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.16% +0.16% +0.08%] index_select perm_sorted : Elapsed 0.013 ms (1.280 ms / 100) B = [20, 5, 4, 16] (stride (64, 1280, 1, 4)) dim = 3 fill_cnt = 40 1.792 -> 1.793 ( +0.06%) [ +0.28% +0.22% +0.00% / +0.06% +0.22% +0.33%] index_fill_ const : Elapsed 0.018 ms (1.797 ms / 100) 1.800 -> 1.801 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.28% +0.22%] index_fill_ linear : Elapsed 0.018 ms (1.801 ms / 100) 1.795 -> 1.792 ( -0.17%) [ +0.17% +0.22% +0.00% / -0.17% +0.00% +0.06%] index_fill_ reverse : Elapsed 0.018 ms (1.798 ms / 100) 1.795 -> 1.793 ( -0.11%) [ +0.06% +0.06% +0.00% / +0.06% -0.11% -0.11%] index_fill_ skip64 : Elapsed 0.018 ms (1.796 ms / 100) 1.794 -> 1.793 ( -0.06%) [ +0.06% +0.28% +0.00% / -0.06% -0.06% +0.06%] index_fill_ skip256 : Elapsed 0.018 ms (1.795 ms / 100) 1.797 -> 1.801 ( +0.22%) [ +0.11% +0.33% +0.00% / +0.22% +0.28% +0.33%] index_fill_ spread : Elapsed 0.018 ms (1.799 ms / 100) 1.801 -> 1.798 ( -0.17%) [ +0.06% +0.00% +0.06% / -0.17% +0.28% -0.11%] index_fill_ strided 3 : Elapsed 0.018 ms (1.802 ms / 100) 1.802 -> 1.800 ( -0.11%) [ +0.17% +0.00% +0.06% / -0.06% +0.00% -0.11%] index_fill_ strided 5 : Elapsed 0.018 ms (1.805 ms / 100) 1.796 -> 1.798 ( +0.11%) [ +0.39% +0.00% +0.28% / +0.11% +0.22% +0.72%] index_fill_ strided 7 : Elapsed 0.018 ms (1.803 ms / 100) 1.796 -> 1.791 ( -0.28%) [ +0.17% +0.06% +0.00% / -0.28% -0.06% +0.00%] index_fill_ strided 8 : Elapsed 0.018 ms (1.799 ms / 100) 1.802 -> 1.802 ( +0.00%) [ +0.22% +0.00% +0.17% / +0.11% +0.06% +0.00%] index_fill_ random : Elapsed 0.018 ms (1.806 ms / 100) 1.802 -> 1.802 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.00% +0.17%] index_fill_ random_sorted : Elapsed 0.018 ms (1.803 ms / 100) B = [20, 5, 4, 16] (stride (80, 16, 1600, 1)) A = [20, 5, 4, 40] (stride (160, 3200, 1, 4)) dim = 3 3.611 -> 3.615 ( +0.11%) [ +0.08% +0.00% +0.17% / +0.11% +0.69% +0.75%] index_select const : Elapsed 0.036 ms (3.614 ms / 100) 3.618 -> 3.616 ( -0.06%) [ +0.03% +0.03% +0.00% / -0.06% +0.58% +0.66%] index_select wrap : Elapsed 0.036 ms (3.619 ms / 100) 3.620 -> 3.620 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.66% +0.69%] index_select linear : Elapsed 0.036 ms (3.623 ms / 100) 3.617 -> 3.616 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.75% +0.66%] index_select reverse : Elapsed 0.036 ms (3.617 ms / 100) 3.603 -> 3.604 ( +0.03%) [ +0.19% +0.06% +0.00% / +0.03% +0.83% +0.83%] index_select skip64 : Elapsed 0.036 ms (3.610 ms / 100) 3.611 -> 3.612 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.86% +0.91%] index_select skip256 : Elapsed 0.036 ms (3.611 ms / 100) 3.614 -> 3.616 ( +0.06%) [ +0.14% +0.06% +0.00% / +0.06% +0.77% +0.97%] index_select spread : Elapsed 0.036 ms (3.619 ms / 100) 3.608 -> 3.613 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.75% +0.78%] index_select strided 3 : Elapsed 0.036 ms (3.613 ms / 100) 3.613 -> 3.614 ( +0.03%) [ +0.00% +0.11% +0.00% / +0.03% +0.58% +0.72%] index_select strided 5 : Elapsed 0.036 ms (3.613 ms / 100) 3.616 -> 3.617 ( +0.03%) [ +0.00% +0.00% +0.06% / +0.03% +0.77% +0.77%] index_select strided 7 : Elapsed 0.036 ms (3.616 ms / 100) 3.594 -> 3.598 ( +0.11%) [ +0.11% +0.00% +0.08% / +0.11% +0.72% +0.78%] index_select strided 8 : Elapsed 0.036 ms (3.598 ms / 100) 3.603 -> 3.607 ( +0.11%) [ +0.08% +0.00% +0.17% / +0.11% +0.64% +0.75%] index_select strided 16 : Elapsed 0.036 ms (3.606 ms / 100) 3.613 -> 3.619 ( +0.17%) [ +0.00% +0.17% +0.14% / +0.17% +0.86% +0.86%] index_select random : Elapsed 0.036 ms (3.613 ms / 100) 3.599 -> 3.599 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.81% +0.78%] index_select random_sorted : Elapsed 0.036 ms (3.601 ms / 100) 3.612 -> 3.612 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.69% +0.72%] index_select perm : Elapsed 0.036 ms (3.613 ms / 100) 3.626 -> 3.619 ( -0.19%) [ +0.00% +0.03% +0.00% / -0.19% +0.58% +0.61%] index_select perm_sorted : Elapsed 0.036 ms (3.626 ms / 100) B = [20, 5, 4, 16] (stride (1, 20, 1600, 100)) A = [20, 5, 4, 40] (stride (200, 40, 4000, 1)) dim = 3 3.933 -> 3.938 ( +0.13%) [ +0.43% +0.00% +0.10% / +0.13% +0.43% +0.53%] index_select const : Elapsed 0.040 ms (3.950 ms / 100) 3.935 -> 3.937 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.58% +0.61%] index_select wrap : Elapsed 0.039 ms (3.935 ms / 100) 3.939 -> 3.939 ( +0.00%) [ +0.00% +0.08% +0.28% / +0.00% +0.58% +0.43%] index_select linear : Elapsed 0.039 ms (3.939 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.00% +0.20% +0.05% / +0.00% +0.69% +0.66%] index_select reverse : Elapsed 0.039 ms (3.934 ms / 100) 3.946 -> 3.947 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.51% +0.51%] index_select skip64 : Elapsed 0.039 ms (3.948 ms / 100) 3.941 -> 3.943 ( +0.05%) [ +0.00% +0.05% +0.08% / +0.05% +0.38% +0.48%] index_select skip256 : Elapsed 0.039 ms (3.941 ms / 100) 3.920 -> 3.925 ( +0.13%) [ +0.00% +0.28% +0.28% / +0.13% +0.61% +0.66%] index_select spread : Elapsed 0.039 ms (3.920 ms / 100) 3.945 -> 3.943 ( -0.05%) [ +0.00% +0.03% +0.03% / -0.05% +0.56% +0.43%] index_select strided 3 : Elapsed 0.039 ms (3.945 ms / 100) 3.934 -> 3.933 ( -0.03%) [ +0.05% +0.00% +0.08% / -0.03% +0.48% +0.48%] index_select strided 5 : Elapsed 0.039 ms (3.936 ms / 100) 3.901 -> 3.920 ( +0.49%) [ +0.21% +0.00% +0.31% / +0.49% +0.59% +0.59%] index_select strided 7 : Elapsed 0.039 ms (3.909 ms / 100) 3.947 -> 3.952 ( +0.13%) [ +0.00% +0.15% +0.00% / +0.13% +0.38% +0.41%] index_select strided 8 : Elapsed 0.039 ms (3.947 ms / 100) 3.920 -> 3.920 ( +0.00%) [ +0.00% +0.13% +0.08% / +0.00% +0.64% +0.56%] index_select strided 16 : Elapsed 0.039 ms (3.920 ms / 100) 3.919 -> 3.924 ( +0.13%) [ +0.13% +0.00% +0.10% / +0.13% +0.41% +0.31%] index_select random : Elapsed 0.039 ms (3.924 ms / 100) 3.919 -> 3.923 ( +0.10%) [ +0.15% +0.20% +0.00% / +0.10% +0.77% +0.51%] index_select random_sorted : Elapsed 0.039 ms (3.925 ms / 100) 3.915 -> 3.922 ( +0.18%) [ +0.20% +0.15% +0.00% / +0.18% +0.33% +0.46%] index_select perm : Elapsed 0.039 ms (3.923 ms / 100) 3.926 -> 3.925 ( -0.03%) [ +0.03% +0.00% +0.05% / -0.03% +0.13% +0.23%] index_select perm_sorted : Elapsed 0.039 ms (3.927 ms / 100) out_shape = [16, 5, 40, 4] in_shape = [20, 5, 40, 4] idx_dim = 0 B = [16, 5, 40, 4] (stride (160, 2560, 4, 1)) A = [20, 5, 40, 4] (stride (800, 1, 20, 5)) dim = 0 1.285 -> 1.287 ( +0.16%) [ +0.23% +0.16% +0.00% / +0.16% +1.09% +1.32%] index_select const : Elapsed 0.013 ms (1.288 ms / 100) 1.287 -> 1.284 ( -0.23%) [ +0.00% +0.00% +0.00% / -0.23% +0.93% +1.01%] index_select wrap : Elapsed 0.013 ms (1.287 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.01% +1.01%] index_select linear : Elapsed 0.013 ms (1.287 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +1.09% +1.09%] index_select reverse : Elapsed 0.013 ms (1.287 ms / 100) 1.293 -> 1.294 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.85% +0.85%] index_select skip64 : Elapsed 0.013 ms (1.293 ms / 100) 1.288 -> 1.290 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.93% +0.93%] index_select skip256 : Elapsed 0.013 ms (1.290 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.78% +0.78%] index_select spread : Elapsed 0.013 ms (1.287 ms / 100) 1.286 -> 1.286 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.78% +0.78%] index_select strided 3 : Elapsed 0.013 ms (1.286 ms / 100) 1.284 -> 1.286 ( +0.16%) [ +0.00% +0.00% +0.23% / +0.16% +0.78% +0.86%] index_select strided 5 : Elapsed 0.013 ms (1.284 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +1.17% +0.70%] index_select strided 7 : Elapsed 0.013 ms (1.287 ms / 100) 1.288 -> 1.286 ( -0.16%) [ +0.08% +0.00% +0.00% / -0.16% +0.54% +0.62%] index_select strided 8 : Elapsed 0.013 ms (1.289 ms / 100) 1.292 -> 1.293 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.70% +0.70%] index_select strided 16 : Elapsed 0.013 ms (1.293 ms / 100) 1.292 -> 1.292 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.23% +0.23%] index_select random : Elapsed 0.013 ms (1.293 ms / 100) 1.288 -> 1.287 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.31% +0.23%] index_select random_sorted : Elapsed 0.013 ms (1.288 ms / 100) 1.293 -> 1.293 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.23% +0.23%] index_select perm : Elapsed 0.013 ms (1.293 ms / 100) 1.288 -> 1.288 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.39% +0.31%] index_select perm_sorted : Elapsed 0.013 ms (1.289 ms / 100) B = [16, 5, 40, 4] (stride (1, 64, 320, 16)) A = [20, 5, 40, 4] (stride (800, 40, 1, 200)) dim = 0 3.550 -> 3.554 ( +0.11%) [ +0.08% +0.00% +0.06% / +0.11% +0.56% +0.62%] index_select const : Elapsed 0.036 ms (3.553 ms / 100) 3.533 -> 3.535 ( +0.06%) [ +0.08% +0.00% +0.08% / +0.06% +0.48% +0.59%] index_select wrap : Elapsed 0.035 ms (3.536 ms / 100) 3.533 -> 3.533 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.51%] index_select linear : Elapsed 0.035 ms (3.533 ms / 100) 3.542 -> 3.544 ( +0.06%) [ +0.08% +0.00% +0.03% / +0.06% +0.56% +0.56%] index_select reverse : Elapsed 0.035 ms (3.545 ms / 100) 3.558 -> 3.563 ( +0.14%) [ +0.08% +0.11% +0.00% / +0.14% +0.67% +0.53%] index_select skip64 : Elapsed 0.036 ms (3.561 ms / 100) 3.560 -> 3.561 ( +0.03%) [ +0.00% +0.00% +0.11% / +0.03% +0.53% +0.48%] index_select skip256 : Elapsed 0.036 ms (3.560 ms / 100) 3.533 -> 3.532 ( -0.03%) [ +0.00% +0.06% +0.06% / -0.03% +0.37% +0.42%] index_select spread : Elapsed 0.035 ms (3.533 ms / 100) 3.541 -> 3.541 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.54% +0.40%] index_select strided 3 : Elapsed 0.035 ms (3.542 ms / 100) 3.543 -> 3.542 ( -0.03%) [ +0.00% +0.06% +0.03% / -0.03% +0.54% +0.51%] index_select strided 5 : Elapsed 0.035 ms (3.543 ms / 100) 3.533 -> 3.535 ( +0.06%) [ +0.08% +0.03% +0.00% / +0.06% +0.59% +0.54%] index_select strided 7 : Elapsed 0.035 ms (3.536 ms / 100) 3.536 -> 3.536 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.65% +0.59%] index_select strided 8 : Elapsed 0.035 ms (3.537 ms / 100) 3.538 -> 3.536 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.68% +0.68%] index_select strided 16 : Elapsed 0.035 ms (3.540 ms / 100) 3.537 -> 3.537 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.71% +0.74%] index_select random : Elapsed 0.035 ms (3.539 ms / 100) 3.532 -> 3.534 ( +0.06%) [ +0.00% +0.00% +0.08% / +0.06% +0.48% +0.57%] index_select random_sorted : Elapsed 0.035 ms (3.532 ms / 100) 3.524 -> 3.529 ( +0.14%) [ +0.00% +0.03% +0.23% / +0.14% +0.60% +0.62%] index_select perm : Elapsed 0.035 ms (3.524 ms / 100) 3.541 -> 3.540 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.62% +0.62%] index_select perm_sorted : Elapsed 0.035 ms (3.542 ms / 100) B = [16, 5, 40, 4] (stride (200, 40, 1, 3200)) A = [20, 5, 40, 4] (stride (1, 20, 400, 100)) dim = 0 1.386 -> 1.389 ( +0.22%) [ +0.22% +0.22% +0.00% / +0.22% +0.94% +0.94%] index_select const : Elapsed 0.014 ms (1.389 ms / 100) 1.395 -> 1.395 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.86% +1.08%] index_select wrap : Elapsed 0.014 ms (1.395 ms / 100) 1.397 -> 1.399 ( +0.14%) [ +0.21% +0.07% +0.00% / +0.14% +0.72% +0.93%] index_select linear : Elapsed 0.014 ms (1.400 ms / 100) 1.403 -> 1.404 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.78% +0.93%] index_select reverse : Elapsed 0.014 ms (1.403 ms / 100) 1.395 -> 1.396 ( +0.07%) [ +0.22% +0.14% +0.00% / +0.07% +0.57% +0.65%] index_select skip64 : Elapsed 0.014 ms (1.398 ms / 100) 1.393 -> 1.395 ( +0.14%) [ +0.00% +0.29% +0.14% / +0.14% +0.72% +0.86%] index_select skip256 : Elapsed 0.014 ms (1.393 ms / 100) 1.395 -> 1.395 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.72% +0.79%] index_select spread : Elapsed 0.014 ms (1.395 ms / 100) 1.403 -> 1.405 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.64% +0.64%] index_select strided 3 : Elapsed 0.014 ms (1.405 ms / 100) 1.399 -> 1.400 ( +0.07%) [ +0.36% +0.07% +0.00% / +0.07% +0.50% +0.86%] index_select strided 5 : Elapsed 0.014 ms (1.404 ms / 100) 1.404 -> 1.404 ( +0.00%) [ +0.07% +0.00% +0.14% / +0.00% +0.71% +0.78%] index_select strided 7 : Elapsed 0.014 ms (1.405 ms / 100) 1.405 -> 1.406 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.36% +0.64%] index_select strided 8 : Elapsed 0.014 ms (1.405 ms / 100) 1.401 -> 1.401 ( +0.00%) [ +0.14% +0.00% +0.21% / +0.00% +0.43% +0.36%] index_select strided 16 : Elapsed 0.014 ms (1.403 ms / 100) 1.405 -> 1.405 ( +0.00%) [ +0.21% +0.28% +0.00% / +0.00% +0.71% +0.36%] index_select random : Elapsed 0.014 ms (1.408 ms / 100) 1.403 -> 1.403 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.14% +0.21% +0.00%] index_select random_sorted : Elapsed 0.014 ms (1.404 ms / 100) 1.395 -> 1.395 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.50% +0.57%] index_select perm : Elapsed 0.014 ms (1.397 ms / 100) 1.412 -> 1.413 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.28% +0.21%] index_select perm_sorted : Elapsed 0.014 ms (1.414 ms / 100) B = [16, 5, 40, 4] (stride (200, 1, 5, 3200)) A = [20, 5, 40, 4] (stride (800, 160, 1, 40)) dim = 0 3.854 -> 3.854 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.39% +0.42%] index_select const : Elapsed 0.039 ms (3.855 ms / 100) 3.823 -> 3.828 ( +0.13%) [ +0.16% +0.13% +0.00% / +0.13% +0.65% +0.63%] index_select wrap : Elapsed 0.038 ms (3.829 ms / 100) 3.829 -> 3.829 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.55% +0.60%] index_select linear : Elapsed 0.038 ms (3.829 ms / 100) 3.823 -> 3.827 ( +0.10%) [ +0.16% +0.16% +0.00% / +0.10% +0.65% +0.65%] index_select reverse : Elapsed 0.038 ms (3.829 ms / 100) 3.865 -> 3.867 ( +0.05%) [ +0.13% +0.10% +0.00% / +0.05% +0.52% +0.60%] index_select skip64 : Elapsed 0.039 ms (3.870 ms / 100) 3.867 -> 3.866 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.44% +0.41%] index_select skip256 : Elapsed 0.039 ms (3.868 ms / 100) 3.824 -> 3.829 ( +0.13%) [ +0.16% +0.13% +0.00% / +0.13% +0.60% +0.81%] index_select spread : Elapsed 0.038 ms (3.830 ms / 100) 3.825 -> 3.829 ( +0.10%) [ +0.16% +0.00% +0.10% / +0.10% +0.55% +0.52%] index_select strided 3 : Elapsed 0.038 ms (3.831 ms / 100) 3.842 -> 3.844 ( +0.05%) [ +0.18% +0.03% +0.00% / +0.05% +0.60% +0.60%] index_select strided 5 : Elapsed 0.038 ms (3.849 ms / 100) 3.823 -> 3.824 ( +0.03%) [ +0.00% +0.00% +0.13% / +0.03% +0.71% +0.55%] index_select strided 7 : Elapsed 0.038 ms (3.823 ms / 100) 3.841 -> 3.839 ( -0.05%) [ +0.00% +0.00% +0.03% / -0.05% +0.57% +0.62%] index_select strided 8 : Elapsed 0.038 ms (3.841 ms / 100) 3.848 -> 3.851 ( +0.08%) [ +0.21% +0.13% +0.00% / +0.08% +0.86% +0.65%] index_select strided 16 : Elapsed 0.039 ms (3.856 ms / 100) 3.841 -> 3.837 ( -0.10%) [ +0.00% +0.00% +0.00% / -0.10% +0.73% +0.62%] index_select random : Elapsed 0.038 ms (3.841 ms / 100) 3.838 -> 3.839 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.70% +0.76%] index_select random_sorted : Elapsed 0.038 ms (3.840 ms / 100) 3.816 -> 3.822 ( +0.16%) [ +0.00% +0.18% +0.13% / +0.16% +0.79% +0.79%] index_select perm : Elapsed 0.038 ms (3.816 ms / 100) 3.822 -> 3.829 ( +0.18%) [ +0.10% +0.05% +0.00% / +0.18% +0.78% +0.73%] index_select perm_sorted : Elapsed 0.038 ms (3.826 ms / 100) B = [16, 5, 40, 4] (stride (200, 1, 5, 3200)) A = [20, 5, 40, 4] (stride (1, 80, 400, 20)) dim = 0 4.137 -> 4.139 ( +0.05%) [ +0.02% +0.10% +0.00% / +0.05% +0.65% +0.68%] index_select const : Elapsed 0.041 ms (4.138 ms / 100) 4.166 -> 4.172 ( +0.14%) [ +0.05% +0.00% +0.05% / +0.14% +0.82% +0.79%] index_select wrap : Elapsed 0.042 ms (4.168 ms / 100) 4.176 -> 4.181 ( +0.12%) [ +0.02% +0.05% +0.00% / +0.12% +0.60% +0.72%] index_select linear : Elapsed 0.042 ms (4.177 ms / 100) 4.148 -> 4.147 ( -0.02%) [ +0.00% +0.05% +0.05% / -0.02% +0.63% +0.70%] index_select reverse : Elapsed 0.041 ms (4.148 ms / 100) 4.142 -> 4.138 ( -0.10%) [ +0.07% +0.02% +0.00% / -0.10% +0.53% +0.60%] index_select skip64 : Elapsed 0.041 ms (4.145 ms / 100) 4.138 -> 4.139 ( +0.02%) [ +0.00% +0.12% +0.12% / +0.02% +0.72% +0.77%] index_select skip256 : Elapsed 0.041 ms (4.138 ms / 100) 4.164 -> 4.171 ( +0.17%) [ +0.02% +0.19% +0.00% / +0.17% +0.77% +0.74%] index_select spread : Elapsed 0.042 ms (4.165 ms / 100) 4.151 -> 4.157 ( +0.14%) [ +0.12% +0.00% +0.12% / +0.14% +0.77% +0.70%] index_select strided 3 : Elapsed 0.042 ms (4.156 ms / 100) 4.153 -> 4.155 ( +0.05%) [ +0.12% +0.00% +0.10% / +0.05% +0.55% +0.70%] index_select strided 5 : Elapsed 0.042 ms (4.158 ms / 100) 4.149 -> 4.162 ( +0.31%) [ +0.10% +0.17% +0.00% / +0.31% +0.87% +0.75%] index_select strided 7 : Elapsed 0.042 ms (4.153 ms / 100) 4.157 -> 4.156 ( -0.02%) [ +0.12% +0.05% +0.00% / -0.02% +0.70% +0.72%] index_select strided 8 : Elapsed 0.042 ms (4.162 ms / 100) 4.157 -> 4.158 ( +0.02%) [ +0.10% +0.00% +0.10% / +0.02% +0.70% +0.72%] index_select strided 16 : Elapsed 0.042 ms (4.161 ms / 100) 4.176 -> 4.179 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.67% +0.43%] index_select random : Elapsed 0.042 ms (4.178 ms / 100) 4.186 -> 4.194 ( +0.19%) [ +0.00% +0.31% +0.31% / +0.19% +0.74% +0.48%] index_select random_sorted : Elapsed 0.042 ms (4.186 ms / 100) 4.187 -> 4.189 ( +0.05%) [ +0.10% +0.00% +0.07% / +0.05% +0.57% +0.62%] index_select perm : Elapsed 0.042 ms (4.191 ms / 100) 4.163 -> 4.165 ( +0.05%) [ +0.07% +0.00% +0.02% / +0.05% +0.58% +0.53%] index_select perm_sorted : Elapsed 0.042 ms (4.166 ms / 100) B = [16, 5, 40, 4] (stride (200, 1, 5, 3200)) A = [20, 5, 40, 4] (stride (40, 800, 1, 4000)) dim = 0 3.882 -> 3.883 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.18% +0.18%] index_select const : Elapsed 0.039 ms (3.882 ms / 100) 3.842 -> 3.846 ( +0.10%) [ +0.00% +0.13% +0.13% / +0.10% +0.52% +0.55%] index_select wrap : Elapsed 0.038 ms (3.842 ms / 100) 3.840 -> 3.839 ( -0.03%) [ +0.00% +0.08% +0.05% / -0.03% +0.63% +0.63%] index_select linear : Elapsed 0.038 ms (3.840 ms / 100) 3.823 -> 3.826 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.63% +0.58%] index_select reverse : Elapsed 0.038 ms (3.824 ms / 100) 3.866 -> 3.870 ( +0.10%) [ +0.00% +0.10% +0.08% / +0.10% +0.65% +0.67%] index_select skip64 : Elapsed 0.039 ms (3.866 ms / 100) 3.864 -> 3.864 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.34% +0.39%] index_select skip256 : Elapsed 0.039 ms (3.865 ms / 100) 3.846 -> 3.850 ( +0.10%) [ +0.00% +0.13% +0.10% / +0.10% +0.73% +0.68%] index_select spread : Elapsed 0.038 ms (3.846 ms / 100) 3.840 -> 3.842 ( +0.05%) [ +0.00% +0.18% +0.03% / +0.05% +0.60% +0.65%] index_select strided 3 : Elapsed 0.038 ms (3.840 ms / 100) 3.872 -> 3.874 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.57% +0.49%] index_select strided 5 : Elapsed 0.039 ms (3.872 ms / 100) 3.838 -> 3.843 ( +0.13%) [ +0.10% +0.03% +0.00% / +0.13% +0.81% +0.70%] index_select strided 7 : Elapsed 0.038 ms (3.842 ms / 100) 3.844 -> 3.844 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.68% +0.62%] index_select strided 8 : Elapsed 0.038 ms (3.845 ms / 100) 3.852 -> 3.853 ( +0.03%) [ +0.03% +0.00% +0.05% / +0.03% +0.62% +0.65%] index_select strided 16 : Elapsed 0.039 ms (3.853 ms / 100) 3.856 -> 3.855 ( -0.03%) [ +0.05% +0.00% +0.05% / -0.03% +0.78% +0.70%] index_select random : Elapsed 0.039 ms (3.858 ms / 100) 3.848 -> 3.848 ( +0.00%) [ +0.00% +0.05% +0.08% / +0.00% +0.78% +0.73%] index_select random_sorted : Elapsed 0.038 ms (3.848 ms / 100) 3.832 -> 3.832 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.76%] index_select perm : Elapsed 0.038 ms (3.832 ms / 100) 3.840 -> 3.838 ( -0.05%) [ +0.00% +0.00% +0.13% / -0.05% +0.83% +0.73%] index_select perm_sorted : Elapsed 0.038 ms (3.840 ms / 100) out_shape = [20, 16, 40, 4] in_shape = [20, 5, 40, 4] idx_dim = 1 B = [20, 16, 40, 4] (stride (2560, 1, 64, 16)) A = [20, 5, 40, 4] (stride (160, 3200, 4, 1)) dim = 1 0.931 -> 0.906 ( -2.69%) [ +0.00% +0.11% +0.00% / +0.11% -2.69% -2.58%] index_add_ linear : Elapsed 0.009 ms (0.931 ms / 100) 0.920 -> 0.886 ( -3.70%) [ +0.11% +0.00% +0.11% / +0.33% -3.37% -3.70%] index_copy_ linear : Elapsed 0.009 ms (0.921 ms / 100) 0.932 -> 0.908 ( -2.58%) [ +0.11% +0.21% +0.00% / +0.11% -2.58% -2.58%] index_add_ reverse : Elapsed 0.009 ms (0.933 ms / 100) 0.921 -> 0.886 ( -3.80%) [ +0.00% +0.11% +0.11% / -0.11% -3.80% -3.47%] index_copy_ reverse : Elapsed 0.009 ms (0.921 ms / 100) 0.955 -> 0.934 ( -2.20%) [ +0.10% +0.63% +0.00% / +0.52% -2.20% -1.88%] index_add_ spread : Elapsed 0.010 ms (0.956 ms / 100) 0.947 -> 0.914 ( -3.48%) [ +0.00% +0.32% +0.32% / +0.42% -3.48% -3.17%] index_copy_ spread : Elapsed 0.009 ms (0.947 ms / 100) 0.957 -> 0.929 ( -2.93%) [ +0.10% +0.63% +0.00% / +0.21% -2.93% -2.72%] index_add_ strided 3 : Elapsed 0.010 ms (0.958 ms / 100) 0.948 -> 0.910 ( -4.01%) [ +0.32% +0.00% +0.00% / +0.42% -4.01% -3.69%] index_copy_ strided 3 : Elapsed 0.010 ms (0.951 ms / 100) 0.957 -> 0.933 ( -2.51%) [ +0.00% +0.21% +0.10% / +0.21% -2.40% -2.51%] index_add_ strided 5 : Elapsed 0.010 ms (0.957 ms / 100) 0.947 -> 0.908 ( -4.12%) [ +0.00% +0.42% +0.11% / +0.32% -4.12% -3.59%] index_copy_ strided 5 : Elapsed 0.009 ms (0.947 ms / 100) 0.958 -> 0.930 ( -2.92%) [ +0.00% +0.00% +0.10% / -0.10% -2.92% -2.92%] index_add_ strided 7 : Elapsed 0.010 ms (0.958 ms / 100) 0.949 -> 0.913 ( -3.79%) [ +0.00% +0.11% +0.53% / +0.00% -3.58% -3.79%] index_copy_ strided 7 : Elapsed 0.009 ms (0.949 ms / 100) 0.957 -> 0.926 ( -3.24%) [ +0.00% +0.00% +0.31% / +0.00% -3.03% -3.24%] index_add_ perm : Elapsed 0.010 ms (0.957 ms / 100) 0.947 -> 0.909 ( -4.01%) [ +0.00% +0.11% +0.00% / +0.11% -4.01% -4.01%] index_copy_ perm : Elapsed 0.009 ms (0.947 ms / 100) 0.956 -> 0.927 ( -3.03%) [ +0.31% +0.00% +0.42% / +0.42% -3.03% -3.03%] index_add_ perm_sorted : Elapsed 0.010 ms (0.959 ms / 100) 0.945 -> 0.905 ( -4.23%) [ +0.42% +0.21% +0.00% / -0.11% -4.23% -3.81%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.949 ms / 100) 1.648 -> 1.654 ( +0.36%) [ +0.42% +0.00% +0.18% / +0.36% +0.79% +0.85%] index_select const : Elapsed 0.017 ms (1.655 ms / 100) 1.692 -> 1.662 ( -1.77%) [ +0.06% +0.00% +0.00% / +0.18% -1.77% -1.77%] index_select wrap : Elapsed 0.017 ms (1.693 ms / 100) 1.697 -> 1.665 ( -1.89%) [ +0.12% +0.12% +0.00% / +0.12% -1.89% -1.77%] index_select linear : Elapsed 0.017 ms (1.699 ms / 100) 1.667 -> 1.660 ( -0.42%) [ +0.00% +0.24% +0.24% / +0.24% -0.42% -0.42%] index_select reverse : Elapsed 0.017 ms (1.667 ms / 100) 1.648 -> 1.653 ( +0.30%) [ +0.00% +0.36% +0.06% / +0.30% +0.85% +0.73%] index_select skip64 : Elapsed 0.016 ms (1.648 ms / 100) 1.649 -> 1.651 ( +0.12%) [ +0.24% +0.30% +0.00% / +0.12% +1.15% +0.91%] index_select skip256 : Elapsed 0.017 ms (1.653 ms / 100) 1.670 -> 1.662 ( -0.48%) [ +0.00% +0.18% +0.06% / +0.18% -0.48% -0.48%] index_select spread : Elapsed 0.017 ms (1.670 ms / 100) 1.683 -> 1.665 ( -1.07%) [ +0.12% +0.12% +0.00% / +0.18% -0.89% -1.07%] index_select strided 3 : Elapsed 0.017 ms (1.685 ms / 100) 1.692 -> 1.664 ( -1.65%) [ +0.00% +0.06% +0.24% / +0.12% -1.65% -1.65%] index_select random : Elapsed 0.017 ms (1.692 ms / 100) 1.674 -> 1.663 ( -0.66%) [ +0.00% +0.18% +0.00% / +0.00% -0.66% -0.66%] index_select random_sorted : Elapsed 0.017 ms (1.674 ms / 100) B = [20, 16, 40, 4] (stride (4, 3200, 80, 1)) dim = 1 fill_cnt = 5 1.082 -> 1.081 ( -0.09%) [ +0.65% +0.37% +0.00% / -0.09% +0.55% +0.46%] index_fill_ const : Elapsed 0.011 ms (1.089 ms / 100) 1.098 -> 1.098 ( +0.00%) [ +0.27% +0.09% +0.00% / +0.00% +0.36% +0.27%] index_fill_ linear : Elapsed 0.011 ms (1.101 ms / 100) 1.099 -> 1.098 ( -0.09%) [ +0.27% +0.00% +0.09% / -0.09% +0.27% +0.36%] index_fill_ reverse : Elapsed 0.011 ms (1.102 ms / 100) 1.081 -> 1.084 ( +0.28%) [ +0.19% +0.37% +0.00% / +0.28% +0.46% +0.46%] index_fill_ skip64 : Elapsed 0.011 ms (1.083 ms / 100) 1.081 -> 1.082 ( +0.09%) [ +0.28% +0.28% +0.00% / +0.09% +0.46% +0.37%] index_fill_ skip256 : Elapsed 0.011 ms (1.084 ms / 100) 1.102 -> 1.101 ( -0.09%) [ +0.18% +0.00% +0.09% / -0.09% +0.36% +0.45%] index_fill_ spread : Elapsed 0.011 ms (1.104 ms / 100) 1.101 -> 1.106 ( +0.45%) [ +0.00% +0.18% +0.18% / +0.45% +0.64% +0.54%] index_fill_ strided 3 : Elapsed 0.011 ms (1.101 ms / 100) 1.101 -> 1.101 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.27% +0.18%] index_fill_ strided 5 : Elapsed 0.011 ms (1.101 ms / 100) 1.096 -> 1.096 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +1.64% +2.01%] index_fill_ strided 7 : Elapsed 0.011 ms (1.096 ms / 100) 1.082 -> 1.085 ( +0.28%) [ +0.28% +0.18% +0.00% / +0.28% +0.65% +0.37%] index_fill_ strided 8 : Elapsed 0.011 ms (1.085 ms / 100) 1.097 -> 1.099 ( +0.18%) [ +0.09% +0.18% +0.00% / +0.18% +1.37% +1.37%] index_fill_ random : Elapsed 0.011 ms (1.098 ms / 100) 1.096 -> 1.098 ( +0.18%) [ +0.09% +0.00% +0.00% / +0.18% +1.46% +1.46%] index_fill_ random_sorted : Elapsed 0.011 ms (1.097 ms / 100) 1.095 -> 1.099 ( +0.37%) [ +0.09% +0.46% +0.00% / +0.37% +1.28% +0.91%] index_fill_ perm : Elapsed 0.011 ms (1.096 ms / 100) 1.097 -> 1.097 ( +0.00%) [ +0.00% +0.18% +0.18% / +0.00% +0.46% +0.73%] index_fill_ perm_sorted : Elapsed 0.011 ms (1.097 ms / 100) B = [20, 16, 40, 4] (stride (16, 1, 1280, 320)) A = [20, 5, 40, 4] (stride (40, 800, 1, 4000)) dim = 1 2.302 -> 2.302 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +0.00% +0.09%] index_add_ linear : Elapsed 0.023 ms (2.305 ms / 100) 2.271 -> 2.270 ( -0.04%) [ +0.31% +0.00% +0.09% / -0.04% +0.13% +0.22%] index_copy_ linear : Elapsed 0.023 ms (2.278 ms / 100) 2.304 -> 2.304 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +0.00% +0.13%] index_add_ reverse : Elapsed 0.023 ms (2.307 ms / 100) 2.276 -> 2.273 ( -0.13%) [ +0.00% +0.18% +0.04% / +0.04% -0.13% +0.09%] index_copy_ reverse : Elapsed 0.023 ms (2.276 ms / 100) 2.330 -> 2.328 ( -0.09%) [ +0.21% +0.00% +0.13% / -0.09% +0.39% +0.09%] index_add_ spread : Elapsed 0.023 ms (2.335 ms / 100) 2.333 -> 2.337 ( +0.17%) [ +0.00% +0.00% +0.34% / +0.17% +0.34% +0.51%] index_copy_ spread : Elapsed 0.023 ms (2.333 ms / 100) 2.340 -> 2.333 ( -0.30%) [ +0.00% +0.09% +0.21% / -0.30% +0.21% +0.21%] index_add_ strided 3 : Elapsed 0.023 ms (2.340 ms / 100) 2.339 -> 2.336 ( -0.13%) [ +0.00% +0.04% +0.00% / -0.13% -0.09% +0.13%] index_copy_ strided 3 : Elapsed 0.023 ms (2.339 ms / 100) 2.344 -> 2.344 ( +0.00%) [ +0.21% +0.00% +0.34% / +0.17% +0.00% +0.13%] index_add_ strided 5 : Elapsed 0.023 ms (2.349 ms / 100) 2.342 -> 2.341 ( -0.04%) [ +0.17% +0.04% +0.00% / -0.04% -0.04% +0.38%] index_copy_ strided 5 : Elapsed 0.023 ms (2.346 ms / 100) 2.344 -> 2.343 ( -0.04%) [ +0.09% +0.30% +0.00% / +0.17% -0.04% +0.00%] index_add_ strided 7 : Elapsed 0.023 ms (2.346 ms / 100) 2.337 -> 2.337 ( +0.00%) [ +0.21% +0.30% +0.00% / +0.17% +0.09% +0.00%] index_copy_ strided 7 : Elapsed 0.023 ms (2.342 ms / 100) 2.300 -> 2.299 ( -0.04%) [ +0.61% +0.00% +0.26% / +0.09% +0.13% -0.04%] index_add_ perm : Elapsed 0.023 ms (2.314 ms / 100) 2.269 -> 2.274 ( +0.22%) [ +0.40% +0.00% +0.26% / +0.22% +0.26% +0.26%] index_copy_ perm : Elapsed 0.023 ms (2.278 ms / 100) 2.294 -> 2.293 ( -0.04%) [ +0.26% +0.31% +0.00% / +0.31% +0.52% -0.04%] index_add_ perm_sorted : Elapsed 0.023 ms (2.300 ms / 100) 2.269 -> 2.273 ( +0.18%) [ +0.09% +0.00% +0.09% / +0.18% +0.26% +0.31%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.271 ms / 100) 4.628 -> 4.625 ( -0.06%) [ +0.02% +0.11% +0.00% / -0.06% +0.41% +0.52%] index_select const : Elapsed 0.046 ms (4.629 ms / 100) 4.670 -> 4.666 ( -0.09%) [ +0.02% +0.06% +0.00% / -0.09% +0.51% +0.49%] index_select wrap : Elapsed 0.047 ms (4.671 ms / 100) 4.699 -> 4.702 ( +0.06%) [ +0.00% +0.13% +0.04% / +0.06% +0.53% +0.55%] index_select linear : Elapsed 0.047 ms (4.699 ms / 100) 4.695 -> 4.698 ( +0.06%) [ +0.09% +0.02% +0.00% / +0.06% +0.70% +0.66%] index_select reverse : Elapsed 0.047 ms (4.699 ms / 100) 4.624 -> 4.629 ( +0.11%) [ +0.19% +0.26% +0.00% / +0.11% +0.61% +0.63%] index_select skip64 : Elapsed 0.046 ms (4.633 ms / 100) 4.627 -> 4.634 ( +0.15%) [ +0.19% +0.13% +0.00% / +0.15% +0.43% +0.48%] index_select skip256 : Elapsed 0.046 ms (4.636 ms / 100) 4.670 -> 4.681 ( +0.24%) [ +0.06% +0.00% +0.00% / +0.24% +0.60% +0.41%] index_select spread : Elapsed 0.047 ms (4.673 ms / 100) 4.667 -> 4.667 ( +0.00%) [ +0.00% +0.02% +0.11% / +0.00% +0.60% +0.60%] index_select strided 3 : Elapsed 0.047 ms (4.667 ms / 100) 4.678 -> 4.676 ( -0.04%) [ +0.02% +0.00% +0.00% / -0.04% +0.53% +0.34%] index_select random : Elapsed 0.047 ms (4.679 ms / 100) 4.669 -> 4.680 ( +0.24%) [ +0.17% +0.00% +0.09% / +0.24% +0.77% +0.77%] index_select random_sorted : Elapsed 0.047 ms (4.677 ms / 100) B = [20, 16, 40, 4] (stride (1, 800, 20, 12800)) A = [20, 5, 40, 4] (stride (800, 1, 5, 200)) dim = 1 2.373 -> 2.372 ( -0.04%) [ +0.00% +0.00% +0.21% / -0.04% +1.05% +1.22%] index_add_ linear : Elapsed 0.024 ms (2.373 ms / 100) 2.325 -> 2.324 ( -0.04%) [ +0.04% +0.00% +0.43% / -0.04% +0.73% +1.03%] index_copy_ linear : Elapsed 0.023 ms (2.326 ms / 100) 2.367 -> 2.365 ( -0.08%) [ +0.38% +0.00% +0.00% / -0.08% +1.35% +1.31%] index_add_ reverse : Elapsed 0.024 ms (2.376 ms / 100) 2.313 -> 2.315 ( +0.09%) [ +0.48% +0.22% +0.00% / +0.09% +1.51% +1.60%] index_copy_ reverse : Elapsed 0.023 ms (2.324 ms / 100) 2.373 -> 2.377 ( +0.17%) [ +0.21% +0.00% +0.25% / +0.17% +1.01% +1.22%] index_add_ spread : Elapsed 0.024 ms (2.378 ms / 100) 2.324 -> 2.328 ( +0.17%) [ +0.04% +0.00% +0.22% / +0.17% +0.82% +1.20%] index_copy_ spread : Elapsed 0.023 ms (2.325 ms / 100) 2.380 -> 2.387 ( +0.29%) [ +0.00% +0.38% +0.25% / +0.29% +1.05% +0.97%] index_add_ strided 3 : Elapsed 0.024 ms (2.380 ms / 100) 2.331 -> 2.338 ( +0.30%) [ +0.00% +0.21% +0.39% / +0.30% +1.03% +1.20%] index_copy_ strided 3 : Elapsed 0.023 ms (2.331 ms / 100) 2.368 -> 2.370 ( +0.08%) [ +0.55% +0.00% +0.59% / +0.08% +1.10% +1.44%] index_add_ strided 5 : Elapsed 0.024 ms (2.381 ms / 100) 2.316 -> 2.325 ( +0.39%) [ +0.60% +0.00% +0.56% / +0.39% +0.99% +0.95%] index_copy_ strided 5 : Elapsed 0.023 ms (2.330 ms / 100) 2.377 -> 2.379 ( +0.08%) [ +0.21% +0.42% +0.00% / +0.08% +1.01% +0.84%] index_add_ strided 7 : Elapsed 0.024 ms (2.382 ms / 100) 2.333 -> 2.327 ( -0.26%) [ +0.13% +0.00% +0.00% / -0.26% +0.64% +0.60%] index_copy_ strided 7 : Elapsed 0.023 ms (2.336 ms / 100) 2.384 -> 2.385 ( +0.04%) [ +0.25% +0.00% +0.08% / +0.04% +1.30% +1.01%] index_add_ perm : Elapsed 0.024 ms (2.390 ms / 100) 2.333 -> 2.340 ( +0.30%) [ +0.51% +0.13% +0.00% / +0.30% +1.07% +0.86%] index_copy_ perm : Elapsed 0.023 ms (2.345 ms / 100) 2.374 -> 2.375 ( +0.04%) [ +0.13% +0.38% +0.00% / +0.04% +0.80% +0.84%] index_add_ perm_sorted : Elapsed 0.024 ms (2.377 ms / 100) 2.322 -> 2.326 ( +0.17%) [ +0.00% +0.22% +0.00% / +0.17% +0.73% +0.86%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.322 ms / 100) 5.083 -> 5.111 ( +0.55%) [ +0.00% +0.24% +0.45% / +0.55% +0.98% +0.94%] index_select const : Elapsed 0.051 ms (5.083 ms / 100) 5.096 -> 5.106 ( +0.20%) [ +0.00% +0.22% +0.18% / +0.20% +0.71% +0.67%] index_select wrap : Elapsed 0.051 ms (5.096 ms / 100) 5.089 -> 5.107 ( +0.35%) [ +0.35% +0.31% +0.00% / +0.35% +0.81% +0.79%] index_select linear : Elapsed 0.051 ms (5.107 ms / 100) 5.111 -> 5.114 ( +0.06%) [ +0.06% +0.10% +0.00% / +0.06% +0.63% +0.51%] index_select reverse : Elapsed 0.051 ms (5.114 ms / 100) 5.091 -> 5.105 ( +0.27%) [ +0.00% +0.02% +0.26% / +0.27% +0.81% +0.75%] index_select skip64 : Elapsed 0.051 ms (5.091 ms / 100) 5.080 -> 5.116 ( +0.71%) [ +0.73% +0.63% +0.00% / +0.71% +1.26% +1.04%] index_select skip256 : Elapsed 0.051 ms (5.117 ms / 100) 5.100 -> 5.105 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.92% +0.84%] index_select spread : Elapsed 0.051 ms (5.100 ms / 100) 5.095 -> 5.098 ( +0.06%) [ +0.06% +0.20% +0.00% / +0.06% +0.82% +0.94%] index_select strided 3 : Elapsed 0.051 ms (5.098 ms / 100) 5.103 -> 5.104 ( +0.02%) [ +0.06% +0.00% +0.00% / +0.02% +0.74% +0.71%] index_select random : Elapsed 0.051 ms (5.106 ms / 100) 5.079 -> 5.075 ( -0.08%) [ +0.63% +0.00% +0.45% / -0.08% +1.34% +1.36%] index_select random_sorted : Elapsed 0.051 ms (5.111 ms / 100) out_shape = [20, 5, 16, 4] in_shape = [20, 5, 40, 4] idx_dim = 2 B = [20, 5, 16, 4] (stride (320, 64, 1, 16)) A = [20, 5, 40, 4] (stride (4, 3200, 80, 1)) dim = 2 3.565 -> 3.565 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.62% +0.53%] index_select const : Elapsed 0.036 ms (3.565 ms / 100) 3.580 -> 3.578 ( -0.06%) [ +0.03% +0.03% +0.00% / -0.06% +0.47% +0.59%] index_select wrap : Elapsed 0.036 ms (3.581 ms / 100) 3.578 -> 3.578 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.56% +0.59%] index_select linear : Elapsed 0.036 ms (3.580 ms / 100) 3.574 -> 3.574 ( +0.00%) [ +0.20% +0.03% +0.00% / +0.00% +0.50% +0.50%] index_select reverse : Elapsed 0.036 ms (3.581 ms / 100) 3.590 -> 3.590 ( +0.00%) [ +0.00% +0.03% +0.06% / +0.00% +0.64% +0.56%] index_select skip64 : Elapsed 0.036 ms (3.590 ms / 100) 3.567 -> 3.569 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +0.45% +0.45%] index_select skip256 : Elapsed 0.036 ms (3.568 ms / 100) 3.566 -> 3.570 ( +0.11%) [ +0.03% +0.00% +0.06% / +0.11% +0.39% +0.39%] index_select spread : Elapsed 0.036 ms (3.567 ms / 100) 3.589 -> 3.598 ( +0.25%) [ +0.00% +0.28% +0.22% / +0.25% +0.67% +0.45%] index_select strided 3 : Elapsed 0.036 ms (3.589 ms / 100) 3.570 -> 3.569 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.25% +0.36%] index_select strided 5 : Elapsed 0.036 ms (3.571 ms / 100) 3.575 -> 3.576 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.42% +0.42%] index_select strided 7 : Elapsed 0.036 ms (3.576 ms / 100) 3.583 -> 3.584 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.45% +0.45%] index_select strided 8 : Elapsed 0.036 ms (3.583 ms / 100) 3.603 -> 3.602 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.36% +0.39%] index_select strided 16 : Elapsed 0.036 ms (3.603 ms / 100) 3.569 -> 3.569 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.50% +0.31%] index_select random : Elapsed 0.036 ms (3.571 ms / 100) 3.565 -> 3.568 ( +0.08%) [ +0.06% +0.08% +0.00% / +0.08% +0.36% +0.42%] index_select random_sorted : Elapsed 0.036 ms (3.567 ms / 100) 3.582 -> 3.584 ( +0.06%) [ +0.08% +0.00% +0.06% / +0.06% +0.39% +0.28%] index_select perm : Elapsed 0.036 ms (3.585 ms / 100) 3.570 -> 3.567 ( -0.08%) [ +0.03% +0.00% +0.03% / -0.08% +0.42% +0.39%] index_select perm_sorted : Elapsed 0.036 ms (3.571 ms / 100) B = [20, 5, 16, 4] (stride (4, 1280, 80, 1)) A = [20, 5, 40, 4] (stride (40, 3200, 1, 800)) dim = 2 4.282 -> 4.283 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.72% +0.61%] index_select const : Elapsed 0.043 ms (4.284 ms / 100) 4.293 -> 4.297 ( +0.09%) [ +0.00% +0.00% +0.02% / +0.09% +0.56% +0.56%] index_select wrap : Elapsed 0.043 ms (4.293 ms / 100) 4.286 -> 4.286 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.61% +0.61%] index_select linear : Elapsed 0.043 ms (4.287 ms / 100) 4.292 -> 4.293 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.56% +0.58%] index_select reverse : Elapsed 0.043 ms (4.293 ms / 100) 4.285 -> 4.288 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.72% +0.56%] index_select skip64 : Elapsed 0.043 ms (4.287 ms / 100) 4.278 -> 4.280 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.77% +0.89%] index_select skip256 : Elapsed 0.043 ms (4.280 ms / 100) 4.262 -> 4.260 ( -0.05%) [ +0.00% +0.02% +0.23% / -0.05% +0.75% +0.73%] index_select spread : Elapsed 0.043 ms (4.262 ms / 100) 4.259 -> 4.257 ( -0.05%) [ +0.02% +0.00% +0.02% / -0.05% +0.54% +0.63%] index_select strided 3 : Elapsed 0.043 ms (4.260 ms / 100) 4.273 -> 4.272 ( -0.02%) [ +0.00% +0.09% +0.09% / -0.02% +1.10% +1.01%] index_select strided 5 : Elapsed 0.043 ms (4.273 ms / 100) 4.287 -> 4.293 ( +0.14%) [ +0.16% +0.19% +0.00% / +0.14% +0.86% +0.72%] index_select strided 7 : Elapsed 0.043 ms (4.294 ms / 100) 4.295 -> 4.295 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.75% +0.84%] index_select strided 8 : Elapsed 0.043 ms (4.296 ms / 100) 4.299 -> 4.297 ( -0.05%) [ +0.00% +0.12% +0.09% / -0.05% +0.40% +0.67%] index_select strided 16 : Elapsed 0.043 ms (4.299 ms / 100) 4.294 -> 4.295 ( +0.02%) [ +0.07% +0.02% +0.00% / +0.02% +0.65% +0.68%] index_select random : Elapsed 0.043 ms (4.297 ms / 100) 4.266 -> 4.266 ( +0.00%) [ +0.07% +0.00% +0.02% / +0.00% +0.70% +0.73%] index_select random_sorted : Elapsed 0.043 ms (4.269 ms / 100) 4.278 -> 4.277 ( -0.02%) [ +0.00% +0.23% +0.00% / -0.02% +0.63% +0.63%] index_select perm : Elapsed 0.043 ms (4.278 ms / 100) 4.267 -> 4.272 ( +0.12%) [ +0.00% +0.09% +0.09% / +0.12% +0.77% +0.68%] index_select perm_sorted : Elapsed 0.043 ms (4.267 ms / 100) B = [20, 5, 16, 4] (stride (1, 320, 20, 1600)) A = [20, 5, 40, 4] (stride (1, 20, 100, 4000)) dim = 2 4.021 -> 4.023 ( +0.05%) [ +0.07% +0.10% +0.00% / +0.05% +0.85% +0.82%] index_select const : Elapsed 0.040 ms (4.024 ms / 100) 4.020 -> 4.022 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.65% +0.67%] index_select wrap : Elapsed 0.040 ms (4.022 ms / 100) 4.016 -> 4.017 ( +0.02%) [ +0.10% +0.20% +0.00% / +0.02% +0.55% +0.57%] index_select linear : Elapsed 0.040 ms (4.020 ms / 100) 4.041 -> 4.049 ( +0.20%) [ +0.12% +0.02% +0.00% / +0.20% +0.64% +0.52%] index_select reverse : Elapsed 0.040 ms (4.046 ms / 100) 4.039 -> 4.039 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.54% +0.54%] index_select skip64 : Elapsed 0.040 ms (4.040 ms / 100) 4.027 -> 4.026 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.57% +0.55%] index_select skip256 : Elapsed 0.040 ms (4.027 ms / 100) 4.023 -> 4.032 ( +0.22%) [ +0.17% +0.00% +0.22% / +0.22% +0.57% +0.42%] index_select spread : Elapsed 0.040 ms (4.030 ms / 100) 4.053 -> 4.054 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.37% +0.44%] index_select strided 3 : Elapsed 0.041 ms (4.053 ms / 100) 4.043 -> 4.043 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.00% +0.37% +0.40%] index_select strided 5 : Elapsed 0.040 ms (4.043 ms / 100) 4.040 -> 4.043 ( +0.07%) [ +0.02% +0.00% +0.00% / +0.07% +0.52% +0.54%] index_select strided 7 : Elapsed 0.040 ms (4.041 ms / 100) 4.036 -> 4.037 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.50% +0.47%] index_select strided 8 : Elapsed 0.040 ms (4.037 ms / 100) 4.035 -> 4.036 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.50% +0.50%] index_select strided 16 : Elapsed 0.040 ms (4.036 ms / 100) 4.040 -> 4.046 ( +0.15%) [ +0.15% +0.00% +0.12% / +0.15% +0.37% +0.32%] index_select random : Elapsed 0.040 ms (4.046 ms / 100) 4.030 -> 4.032 ( +0.05%) [ +0.00% +0.05% +0.07% / +0.05% +0.50% +0.50%] index_select random_sorted : Elapsed 0.040 ms (4.030 ms / 100) 4.061 -> 4.063 ( +0.05%) [ +0.07% +0.02% +0.00% / +0.05% +0.34% +0.34%] index_select perm : Elapsed 0.041 ms (4.064 ms / 100) 4.044 -> 4.043 ( -0.02%) [ +0.15% +0.15% +0.00% / -0.02% +0.52% +0.54%] index_select perm_sorted : Elapsed 0.041 ms (4.050 ms / 100) out_shape = [20, 5, 40, 16] in_shape = [20, 5, 40, 4] idx_dim = 3 B = [20, 5, 40, 16] (stride (3200, 40, 1, 200)) A = [20, 5, 40, 4] (stride (800, 1, 20, 5)) dim = 3 2.421 -> 2.424 ( +0.12%) [ +0.04% +0.00% +0.08% / +0.12% +0.29% +0.25%] index_add_ linear : Elapsed 0.024 ms (2.422 ms / 100) 2.364 -> 2.363 ( -0.04%) [ +0.30% +0.00% +0.00% / -0.04% +0.51% +0.25%] index_copy_ linear : Elapsed 0.024 ms (2.371 ms / 100) 2.420 -> 2.420 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.25% +0.29%] index_add_ reverse : Elapsed 0.024 ms (2.423 ms / 100) 2.362 -> 2.368 ( +0.25%) [ +0.00% +0.30% +0.08% / +0.25% +0.38% +0.42%] index_copy_ reverse : Elapsed 0.024 ms (2.362 ms / 100) 2.421 -> 2.425 ( +0.17%) [ +0.00% +0.45% +0.45% / +0.17% +0.54% +0.70%] index_add_ spread : Elapsed 0.024 ms (2.421 ms / 100) 2.366 -> 2.371 ( +0.21%) [ +0.00% +0.51% +0.55% / +0.21% +0.25% +0.34%] index_copy_ spread : Elapsed 0.024 ms (2.366 ms / 100) 2.429 -> 2.434 ( +0.21%) [ +0.16% +0.21% +0.00% / +0.21% +0.54% +0.25%] index_add_ strided 3 : Elapsed 0.024 ms (2.433 ms / 100) 2.371 -> 2.375 ( +0.17%) [ +0.00% +0.04% +0.00% / +0.17% +0.63% +0.38%] index_copy_ strided 3 : Elapsed 0.024 ms (2.371 ms / 100) 2.435 -> 2.436 ( +0.04%) [ +0.00% +0.12% +0.08% / +0.04% +0.25% +0.21%] index_add_ strided 5 : Elapsed 0.024 ms (2.435 ms / 100) 2.373 -> 2.375 ( +0.08%) [ +0.00% +0.13% +0.21% / +0.08% +0.21% +0.21%] index_copy_ strided 5 : Elapsed 0.024 ms (2.373 ms / 100) 2.428 -> 2.436 ( +0.33%) [ +0.45% +0.00% +0.08% / +0.37% +0.45% +0.33%] index_add_ strided 7 : Elapsed 0.024 ms (2.439 ms / 100) 2.370 -> 2.374 ( +0.17%) [ +0.30% +0.00% +0.17% / +0.17% +0.51% +0.34%] index_copy_ strided 7 : Elapsed 0.024 ms (2.377 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.00% +0.29% +0.12% / +0.37% +0.12% +0.29%] index_add_ perm : Elapsed 0.024 ms (2.419 ms / 100) 2.366 -> 2.370 ( +0.17%) [ +0.00% +0.04% +0.04% / +0.17% +0.17% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.366 ms / 100) 2.422 -> 2.426 ( +0.17%) [ +0.12% +0.17% +0.00% / +0.33% +0.17% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.425 ms / 100) 2.369 -> 2.369 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.08% +0.17% +0.00%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.370 ms / 100) 5.256 -> 5.260 ( +0.08%) [ +0.13% +0.00% +0.10% / +0.08% +0.55% +0.59%] index_select const : Elapsed 0.053 ms (5.263 ms / 100) 5.278 -> 5.283 ( +0.09%) [ +0.08% +0.00% +0.15% / +0.09% +0.45% +0.42%] index_select wrap : Elapsed 0.053 ms (5.282 ms / 100) 5.294 -> 5.305 ( +0.21%) [ +0.15% +0.00% +0.13% / +0.21% +0.34% +0.51%] index_select linear : Elapsed 0.053 ms (5.302 ms / 100) 5.304 -> 5.304 ( +0.00%) [ +0.04% +0.06% +0.00% / +0.00% +0.57% +0.51%] index_select reverse : Elapsed 0.053 ms (5.306 ms / 100) 5.301 -> 5.306 ( +0.09%) [ +0.17% +0.11% +0.00% / +0.09% +0.34% +0.28%] index_select skip64 : Elapsed 0.053 ms (5.310 ms / 100) 5.287 -> 5.290 ( +0.06%) [ +0.02% +0.00% +0.04% / +0.06% +0.26% +0.21%] index_select skip256 : Elapsed 0.053 ms (5.288 ms / 100) 5.276 -> 5.275 ( -0.02%) [ +0.06% +0.00% +0.00% / -0.02% +0.27% +0.25%] index_select spread : Elapsed 0.053 ms (5.279 ms / 100) 5.290 -> 5.293 ( +0.06%) [ +0.09% +0.11% +0.00% / +0.06% +0.28% +0.42%] index_select strided 3 : Elapsed 0.053 ms (5.295 ms / 100) 5.300 -> 5.303 ( +0.06%) [ +0.04% +0.09% +0.00% / +0.06% +0.49% +0.36%] index_select random : Elapsed 0.053 ms (5.302 ms / 100) 5.260 -> 5.264 ( +0.08%) [ +0.06% +0.04% +0.00% / +0.08% +0.34% +0.53%] index_select random_sorted : Elapsed 0.053 ms (5.263 ms / 100) B = [20, 5, 40, 16] (stride (640, 12800, 16, 1)) A = [20, 5, 40, 4] (stride (800, 160, 1, 40)) dim = 3 2.391 -> 2.392 ( +0.04%) [ +0.00% +0.00% +0.00% / +1.05% +0.04% +0.21%] index_add_ linear : Elapsed 0.024 ms (2.391 ms / 100) 2.376 -> 2.376 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.34% +0.25%] index_copy_ linear : Elapsed 0.024 ms (2.380 ms / 100) 2.392 -> 2.392 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.13% +0.00% +0.00%] index_add_ reverse : Elapsed 0.024 ms (2.393 ms / 100) 2.377 -> 2.376 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.21% +0.25%] index_copy_ reverse : Elapsed 0.024 ms (2.378 ms / 100) 2.437 -> 2.437 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.04% +0.04%] index_add_ spread : Elapsed 0.024 ms (2.437 ms / 100) 2.473 -> 2.474 ( +0.04%) [ +0.00% +0.08% +0.12% / +0.04% +0.44% +0.28%] index_copy_ spread : Elapsed 0.025 ms (2.473 ms / 100) 2.432 -> 2.435 ( +0.12%) [ +0.16% +0.00% +0.12% / +0.12% +0.29% +0.16%] index_add_ strided 3 : Elapsed 0.024 ms (2.436 ms / 100) 2.472 -> 2.478 ( +0.24%) [ +0.00% +0.28% +0.20% / +0.24% +0.53% +0.44%] index_copy_ strided 3 : Elapsed 0.025 ms (2.472 ms / 100) 2.434 -> 2.438 ( +0.16%) [ +0.08% +0.00% +0.21% / +0.21% +0.16% +0.33%] index_add_ strided 5 : Elapsed 0.024 ms (2.436 ms / 100) 2.471 -> 2.476 ( +0.20%) [ +0.28% +0.04% +0.00% / +0.20% +0.24% +0.40%] index_copy_ strided 5 : Elapsed 0.025 ms (2.478 ms / 100) 2.437 -> 2.435 ( -0.08%) [ +0.00% +0.12% +0.00% / -0.08% +0.29% +0.08%] index_add_ strided 7 : Elapsed 0.024 ms (2.437 ms / 100) 2.471 -> 2.475 ( +0.16%) [ +0.00% +0.24% +0.24% / +0.16% +0.36% +0.28%] index_copy_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.440 -> 2.438 ( -0.08%) [ +0.29% +0.16% +0.00% / -0.08% +0.12% +0.25%] index_add_ perm : Elapsed 0.024 ms (2.447 ms / 100) 2.476 -> 2.473 ( -0.12%) [ +0.00% +0.08% +0.08% / -0.12% +0.20% +0.00%] index_copy_ perm : Elapsed 0.025 ms (2.476 ms / 100) 2.439 -> 2.441 ( +0.08%) [ +0.25% +0.00% +0.12% / +0.08% +0.33% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.445 ms / 100) 2.472 -> 2.474 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.36% +0.12%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.473 ms / 100) 4.829 -> 4.840 ( +0.23%) [ +0.08% +0.14% +0.00% / +0.23% +0.66% +0.89%] index_select const : Elapsed 0.048 ms (4.833 ms / 100) 4.905 -> 4.907 ( +0.04%) [ +0.00% +0.10% +0.10% / +0.04% +0.18% +0.27%] index_select wrap : Elapsed 0.049 ms (4.905 ms / 100) 4.905 -> 4.905 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.41% +0.33%] index_select linear : Elapsed 0.049 ms (4.905 ms / 100) 4.897 -> 4.900 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.31% +0.22%] index_select reverse : Elapsed 0.049 ms (4.897 ms / 100) 4.827 -> 4.848 ( +0.44%) [ +0.00% +0.37% +0.33% / +0.44% +0.66% +0.48%] index_select skip64 : Elapsed 0.048 ms (4.827 ms / 100) 4.842 -> 4.848 ( +0.12%) [ +0.00% +0.14% +0.19% / +0.12% +0.58% +0.68%] index_select skip256 : Elapsed 0.048 ms (4.842 ms / 100) 4.888 -> 4.896 ( +0.16%) [ +0.06% +0.14% +0.00% / +0.16% +0.49% +0.43%] index_select spread : Elapsed 0.049 ms (4.891 ms / 100) 4.904 -> 4.905 ( +0.02%) [ +0.04% +0.04% +0.00% / +0.02% +0.43% +0.41%] index_select strided 3 : Elapsed 0.049 ms (4.906 ms / 100) 4.900 -> 4.899 ( -0.02%) [ +0.10% +0.10% +0.00% / -0.02% +0.33% +0.18%] index_select random : Elapsed 0.049 ms (4.905 ms / 100) 4.897 -> 4.899 ( +0.04%) [ +0.04% +0.10% +0.00% / +0.04% +0.57% +0.37%] index_select random_sorted : Elapsed 0.049 ms (4.899 ms / 100) B = [20, 5, 40, 16] (stride (1, 12800, 320, 20)) A = [20, 5, 40, 4] (stride (40, 800, 1, 4000)) dim = 3 2.386 -> 2.390 ( +0.17%) [ +0.00% +0.04% +0.08% / +0.17% +0.29% +0.42%] index_add_ linear : Elapsed 0.024 ms (2.386 ms / 100) 2.330 -> 2.336 ( +0.26%) [ +0.00% +0.26% +0.09% / +0.26% +0.30% +0.39%] index_copy_ linear : Elapsed 0.023 ms (2.330 ms / 100) 2.394 -> 2.393 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.00% +0.08%] index_add_ reverse : Elapsed 0.024 ms (2.394 ms / 100) 2.339 -> 2.338 ( -0.04%) [ +0.09% +0.00% +0.00% / -0.04% +0.56% +0.43%] index_copy_ reverse : Elapsed 0.023 ms (2.341 ms / 100) 2.389 -> 2.393 ( +0.17%) [ +0.04% +0.13% +0.00% / +0.17% +0.38% +0.38%] index_add_ spread : Elapsed 0.024 ms (2.390 ms / 100) 2.351 -> 2.352 ( +0.04%) [ +0.00% +0.21% +0.34% / +0.04% +0.51% +0.38%] index_copy_ spread : Elapsed 0.024 ms (2.351 ms / 100) 2.383 -> 2.386 ( +0.13%) [ +0.00% +0.13% +0.04% / +0.13% +0.38% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.383 ms / 100) 2.346 -> 2.350 ( +0.17%) [ +0.09% +0.09% +0.00% / +0.17% +0.26% +0.26%] index_copy_ strided 3 : Elapsed 0.023 ms (2.348 ms / 100) 2.400 -> 2.395 ( -0.21%) [ +0.00% +0.08% +0.00% / -0.21% +0.13% -0.04%] index_add_ strided 5 : Elapsed 0.024 ms (2.400 ms / 100) 2.351 -> 2.357 ( +0.26%) [ +0.00% +0.13% +0.21% / +0.26% +0.51% +0.43%] index_copy_ strided 5 : Elapsed 0.024 ms (2.351 ms / 100) 2.393 -> 2.394 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.08% +0.04% +0.21%] index_add_ strided 7 : Elapsed 0.024 ms (2.393 ms / 100) 2.351 -> 2.355 ( +0.17%) [ +0.09% +0.00% +0.30% / +0.21% +0.30% +0.17%] index_copy_ strided 7 : Elapsed 0.024 ms (2.353 ms / 100) 2.392 -> 2.393 ( +0.04%) [ +0.00% +0.17% +0.13% / +0.08% +0.04% +0.17%] index_add_ perm : Elapsed 0.024 ms (2.392 ms / 100) 2.348 -> 2.345 ( -0.13%) [ +0.13% +0.00% +0.30% / -0.13% +0.38% +0.26%] index_copy_ perm : Elapsed 0.024 ms (2.351 ms / 100) 2.393 -> 2.390 ( -0.13%) [ +0.08% +0.17% +0.00% / -0.13% +0.25% +0.17%] index_add_ perm_sorted : Elapsed 0.024 ms (2.395 ms / 100) 2.350 -> 2.354 ( +0.17%) [ +0.00% +0.09% +0.04% / +0.17% +0.30% +0.17%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.350 ms / 100) 4.827 -> 4.835 ( +0.17%) [ +0.21% +0.00% +0.04% / +0.17% +0.52% +0.48%] index_select const : Elapsed 0.048 ms (4.837 ms / 100) 4.898 -> 4.903 ( +0.10%) [ +0.16% +0.00% +0.14% / +0.10% +0.27% +0.33%] index_select wrap : Elapsed 0.049 ms (4.906 ms / 100) 4.897 -> 4.908 ( +0.22%) [ +0.37% +0.25% +0.00% / +0.22% +0.78% +0.47%] index_select linear : Elapsed 0.049 ms (4.915 ms / 100) 4.897 -> 4.911 ( +0.29%) [ +0.25% +0.22% +0.00% / +0.29% +0.53% +0.35%] index_select reverse : Elapsed 0.049 ms (4.909 ms / 100) 4.842 -> 4.842 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.48% +0.29%] index_select skip64 : Elapsed 0.048 ms (4.846 ms / 100) 4.830 -> 4.843 ( +0.27%) [ +0.54% +0.00% +0.33% / +0.27% +0.75% +0.72%] index_select skip256 : Elapsed 0.049 ms (4.856 ms / 100) 4.894 -> 4.902 ( +0.16%) [ +0.43% +0.00% +0.27% / +0.16% +0.61% +0.27%] index_select spread : Elapsed 0.049 ms (4.915 ms / 100) 4.906 -> 4.912 ( +0.12%) [ +0.16% +0.08% +0.00% / +0.12% +0.20% +0.31%] index_select strided 3 : Elapsed 0.049 ms (4.914 ms / 100) 4.896 -> 4.898 ( +0.04%) [ +0.10% +0.06% +0.00% / +0.04% +0.37% +0.29%] index_select random : Elapsed 0.049 ms (4.901 ms / 100) 4.888 -> 4.886 ( -0.04%) [ +0.14% +0.00% +0.08% / -0.04% +0.35% +0.47%] index_select random_sorted : Elapsed 0.049 ms (4.895 ms / 100) B = [20, 5, 40, 16] (stride (80, 16, 1600, 1)) A = [20, 5, 40, 4] (stride (20, 4, 400, 1)) dim = 3 1.246 -> 1.246 ( +0.00%) [ +0.00% +0.16% +0.80% / +0.00% +1.36% +1.69%] index_add_ linear : Elapsed 0.012 ms (1.246 ms / 100) 1.221 -> 1.220 ( -0.08%) [ +0.00% +0.41% +0.66% / -0.08% +1.56% +1.23%] index_copy_ linear : Elapsed 0.012 ms (1.221 ms / 100) 1.238 -> 1.241 ( +0.24%) [ +0.40% +0.65% +0.00% / +0.24% +1.53% +1.45%] index_add_ reverse : Elapsed 0.012 ms (1.243 ms / 100) 1.214 -> 1.217 ( +0.25%) [ +0.00% +0.25% +0.16% / +0.25% +2.06% +1.81%] index_copy_ reverse : Elapsed 0.012 ms (1.214 ms / 100) 1.285 -> 1.289 ( +0.31%) [ +0.00% +0.31% +0.31% / +0.31% +1.79% +1.79%] index_add_ spread : Elapsed 0.013 ms (1.285 ms / 100) 1.293 -> 1.291 ( -0.15%) [ +0.08% +0.00% +0.08% / -0.15% +0.85% +1.39%] index_copy_ spread : Elapsed 0.013 ms (1.294 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.00% +0.23% +0.00% / +0.08% +2.42% +1.95%] index_add_ strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.289 -> 1.295 ( +0.47%) [ +0.16% +0.16% +0.00% / +0.47% +1.32% +1.24%] index_copy_ strided 3 : Elapsed 0.013 ms (1.291 ms / 100) 1.288 -> 1.289 ( +0.08%) [ +0.31% +0.31% +0.00% / +0.08% +1.86% +1.32%] index_add_ strided 5 : Elapsed 0.013 ms (1.292 ms / 100) 1.288 -> 1.289 ( +0.08%) [ +0.23% +0.00% +0.00% / +0.08% +1.79% +1.24%] index_copy_ strided 5 : Elapsed 0.013 ms (1.291 ms / 100) 1.290 -> 1.289 ( -0.08%) [ +0.39% +0.00% +0.08% / -0.08% +1.71% +1.32%] index_add_ strided 7 : Elapsed 0.013 ms (1.295 ms / 100) 1.284 -> 1.288 ( +0.31%) [ +0.62% +0.00% +0.47% / +0.31% +1.95% +1.79%] index_copy_ strided 7 : Elapsed 0.013 ms (1.292 ms / 100) 1.287 -> 1.290 ( +0.23%) [ +0.00% +0.39% +0.39% / +0.23% +1.94% +2.25%] index_add_ perm : Elapsed 0.013 ms (1.287 ms / 100) 1.284 -> 1.288 ( +0.31%) [ +0.47% +0.00% +0.23% / +0.31% +1.95% +2.02%] index_copy_ perm : Elapsed 0.013 ms (1.290 ms / 100) 1.284 -> 1.291 ( +0.55%) [ +0.00% +0.55% +0.55% / +0.55% +1.64% +1.95%] index_add_ perm_sorted : Elapsed 0.013 ms (1.284 ms / 100) 1.287 -> 1.290 ( +0.23%) [ +0.08% +0.00% +0.08% / +0.23% +1.71% +1.71%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.288 ms / 100) 2.198 -> 2.203 ( +0.23%) [ +0.23% +0.23% +0.00% / +0.23% +1.18% +1.18%] index_select const : Elapsed 0.022 ms (2.203 ms / 100) 2.190 -> 2.197 ( +0.32%) [ +0.14% +0.18% +0.00% / +0.32% +1.05% +1.19%] index_select wrap : Elapsed 0.022 ms (2.193 ms / 100) 2.192 -> 2.195 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +1.19% +1.00%] index_select linear : Elapsed 0.022 ms (2.195 ms / 100) 2.194 -> 2.196 ( +0.09%) [ +0.00% +0.00% +0.05% / +0.09% +1.19% +1.05%] index_select reverse : Elapsed 0.022 ms (2.194 ms / 100) 2.191 -> 2.189 ( -0.09%) [ +0.00% +0.41% +0.32% / -0.09% +1.19% +1.14%] index_select skip64 : Elapsed 0.022 ms (2.191 ms / 100) 2.193 -> 2.194 ( +0.05%) [ +0.09% +0.14% +0.00% / +0.05% +0.91% +1.09%] index_select skip256 : Elapsed 0.022 ms (2.195 ms / 100) 2.194 -> 2.197 ( +0.14%) [ +0.27% +0.00% +0.00% / +0.14% +1.00% +1.05%] index_select spread : Elapsed 0.022 ms (2.200 ms / 100) 2.192 -> 2.199 ( +0.32%) [ +0.09% +0.32% +0.00% / +0.32% +1.00% +1.05%] index_select strided 3 : Elapsed 0.022 ms (2.194 ms / 100) 2.191 -> 2.194 ( +0.14%) [ +0.05% +0.00% +0.00% / +0.14% +1.19% +1.32%] index_select random : Elapsed 0.022 ms (2.192 ms / 100) 2.192 -> 2.193 ( +0.05%) [ +0.14% +0.23% +0.00% / +0.05% +1.32% +1.28%] index_select random_sorted : Elapsed 0.022 ms (2.195 ms / 100) B = [20, 5, 40, 16] (stride (16, 320, 1600, 1)) A = [20, 5, 40, 4] (stride (200, 1, 5, 4000)) dim = 3 2.478 -> 2.481 ( +0.12%) [ +0.32% +0.00% +0.24% / +0.12% +0.69% +0.61%] index_add_ linear : Elapsed 0.025 ms (2.486 ms / 100) 2.455 -> 2.458 ( +0.12%) [ +0.29% +0.00% +0.29% / +0.12% +0.37% +0.20%] index_copy_ linear : Elapsed 0.025 ms (2.462 ms / 100) 2.481 -> 2.483 ( +0.08%) [ +0.20% +0.00% +0.20% / +0.08% +0.60% +0.48%] index_add_ reverse : Elapsed 0.025 ms (2.486 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.12% +0.00% +0.08% / +0.12% +0.33% +0.16%] index_copy_ reverse : Elapsed 0.025 ms (2.464 ms / 100) 2.522 -> 2.522 ( +0.00%) [ +0.08% +0.24% +0.00% / +0.00% +0.67% +0.48%] index_add_ spread : Elapsed 0.025 ms (2.524 ms / 100) 2.558 -> 2.563 ( +0.20%) [ +0.00% +0.12% +0.27% / +0.20% +0.90% +0.39%] index_copy_ spread : Elapsed 0.026 ms (2.558 ms / 100) 2.522 -> 2.527 ( +0.20%) [ +0.08% +0.00% +0.28% / +0.20% +0.63% +0.67%] index_add_ strided 3 : Elapsed 0.025 ms (2.524 ms / 100) 2.557 -> 2.559 ( +0.08%) [ +0.00% +0.20% +0.20% / +0.08% +0.35% +0.43%] index_copy_ strided 3 : Elapsed 0.026 ms (2.557 ms / 100) 2.525 -> 2.527 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.40% +0.48%] index_add_ strided 5 : Elapsed 0.025 ms (2.531 ms / 100) 2.558 -> 2.562 ( +0.16%) [ +0.12% +0.04% +0.00% / +0.16% +0.20% +0.31%] index_copy_ strided 5 : Elapsed 0.026 ms (2.561 ms / 100) 2.527 -> 2.529 ( +0.08%) [ +0.00% +0.20% +0.00% / +0.08% +0.55% +0.36%] index_add_ strided 7 : Elapsed 0.025 ms (2.527 ms / 100) 2.563 -> 2.562 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.23% +0.16%] index_copy_ strided 7 : Elapsed 0.026 ms (2.563 ms / 100) 2.523 -> 2.527 ( +0.16%) [ +0.36% +0.04% +0.00% / +0.16% +0.32% +0.32%] index_add_ perm : Elapsed 0.025 ms (2.532 ms / 100) 2.561 -> 2.559 ( -0.08%) [ +0.20% +0.00% +0.04% / -0.08% +0.12% +0.00%] index_copy_ perm : Elapsed 0.026 ms (2.566 ms / 100) 2.523 -> 2.523 ( +0.00%) [ +0.16% +0.32% +0.00% / +0.00% +0.32% +0.28%] index_add_ perm_sorted : Elapsed 0.025 ms (2.527 ms / 100) 2.558 -> 2.558 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.04% +0.00% +0.16%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.558 ms / 100) 5.421 -> 5.420 ( -0.02%) [ +0.09% +0.00% +0.02% / -0.02% +0.41% +0.30%] index_select const : Elapsed 0.054 ms (5.426 ms / 100) 5.426 -> 5.429 ( +0.06%) [ +0.02% +0.04% +0.00% / +0.06% +0.46% +0.53%] index_select wrap : Elapsed 0.054 ms (5.427 ms / 100) 5.455 -> 5.453 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.55% +0.40%] index_select linear : Elapsed 0.055 ms (5.457 ms / 100) 5.440 -> 5.446 ( +0.11%) [ +0.00% +0.04% +0.02% / +0.11% +0.40% +0.26%] index_select reverse : Elapsed 0.054 ms (5.440 ms / 100) 5.433 -> 5.434 ( +0.02%) [ +0.00% +0.07% +0.00% / +0.02% +0.07% +0.26%] index_select skip64 : Elapsed 0.054 ms (5.433 ms / 100) 5.421 -> 5.420 ( -0.02%) [ +0.07% +0.11% +0.00% / -0.02% +0.44% +0.24%] index_select skip256 : Elapsed 0.054 ms (5.425 ms / 100) 5.426 -> 5.430 ( +0.07%) [ +0.06% +0.04% +0.00% / +0.07% +0.33% +0.28%] index_select spread : Elapsed 0.054 ms (5.429 ms / 100) 5.444 -> 5.447 ( +0.06%) [ +0.06% +0.07% +0.00% / +0.06% +0.37% +0.35%] index_select strided 3 : Elapsed 0.054 ms (5.447 ms / 100) 5.451 -> 5.455 ( +0.07%) [ +0.00% +0.07% +0.02% / +0.07% +0.09% +0.26%] index_select random : Elapsed 0.055 ms (5.451 ms / 100) 5.426 -> 5.425 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.37% +0.33%] index_select random_sorted : Elapsed 0.054 ms (5.426 ms / 100) B = [20, 5, 40, 16] (stride (5, 1, 1600, 100)) A = [20, 5, 40, 4] (stride (1, 3200, 20, 800)) dim = 3 2.410 -> 2.410 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.17% +0.41%] index_add_ linear : Elapsed 0.024 ms (2.410 ms / 100) 2.361 -> 2.363 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.08% +0.64% +0.30%] index_copy_ linear : Elapsed 0.024 ms (2.362 ms / 100) 2.410 -> 2.408 ( -0.08%) [ +0.00% +0.04% +0.00% / -0.08% +0.17% +0.12%] index_add_ reverse : Elapsed 0.024 ms (2.410 ms / 100) 2.359 -> 2.361 ( +0.08%) [ +0.08% +0.00% +0.34% / +0.21% +0.21% +0.08%] index_copy_ reverse : Elapsed 0.024 ms (2.361 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.12% +0.17% +0.00% / +0.04% +0.29% +0.21%] index_add_ spread : Elapsed 0.024 ms (2.413 ms / 100) 2.361 -> 2.363 ( +0.08%) [ +0.00% +0.25% +0.00% / +0.08% +0.38% +0.13%] index_copy_ spread : Elapsed 0.024 ms (2.361 ms / 100) 2.420 -> 2.419 ( -0.04%) [ +0.04% +0.12% +0.00% / -0.04% +0.29% +0.25%] index_add_ strided 3 : Elapsed 0.024 ms (2.421 ms / 100) 2.367 -> 2.373 ( +0.25%) [ +0.00% +0.25% +0.34% / +0.30% +0.51% +0.25%] index_copy_ strided 3 : Elapsed 0.024 ms (2.367 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.21% +0.00% +0.12% / +0.04% +0.12% +0.21%] index_add_ strided 5 : Elapsed 0.024 ms (2.419 ms / 100) 2.368 -> 2.369 ( +0.04%) [ +0.00% +0.00% +0.13% / +0.04% +0.08% +0.13%] index_copy_ strided 5 : Elapsed 0.024 ms (2.368 ms / 100) 2.412 -> 2.409 ( -0.12%) [ +0.00% +0.08% +0.08% / -0.12% +0.12% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.412 ms / 100) 2.364 -> 2.363 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% +0.13% +0.30%] index_copy_ strided 7 : Elapsed 0.024 ms (2.364 ms / 100) 2.411 -> 2.412 ( +0.04%) [ +0.00% +0.04% +0.12% / +0.04% +0.25% +0.17%] index_add_ perm : Elapsed 0.024 ms (2.411 ms / 100) 2.360 -> 2.363 ( +0.13%) [ +0.00% +0.00% +0.17% / +0.13% +0.25% +0.25%] index_copy_ perm : Elapsed 0.024 ms (2.360 ms / 100) 2.412 -> 2.411 ( -0.04%) [ +0.04% +0.00% +0.21% / -0.04% +0.17% +0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.413 ms / 100) 2.365 -> 2.364 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.21% +0.13%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.367 ms / 100) 5.295 -> 5.311 ( +0.30%) [ +0.30% +0.21% +0.00% / +0.30% +0.57% +0.34%] index_select const : Elapsed 0.053 ms (5.311 ms / 100) 5.310 -> 5.311 ( +0.02%) [ +0.00% +0.11% +0.09% / +0.06% +0.02% +0.11%] index_select wrap : Elapsed 0.053 ms (5.310 ms / 100) 5.280 -> 5.282 ( +0.04%) [ +0.00% +0.00% +0.15% / +0.04% +0.42% +0.21%] index_select linear : Elapsed 0.053 ms (5.280 ms / 100) 5.337 -> 5.337 ( +0.00%) [ +0.09% +0.11% +0.00% / +0.00% +0.28% +0.24%] index_select reverse : Elapsed 0.053 ms (5.342 ms / 100) 5.290 -> 5.290 ( +0.00%) [ +0.00% +0.15% +0.11% / +0.00% +0.43% +0.30%] index_select skip64 : Elapsed 0.053 ms (5.290 ms / 100) 5.274 -> 5.274 ( +0.00%) [ +0.11% +0.00% +0.02% / +0.00% +0.19% +0.32%] index_select skip256 : Elapsed 0.053 ms (5.280 ms / 100) 5.282 -> 5.295 ( +0.25%) [ +0.34% +0.00% +0.08% / +0.25% +0.53% +0.61%] index_select spread : Elapsed 0.053 ms (5.300 ms / 100) 5.325 -> 5.321 ( -0.08%) [ +0.02% +0.11% +0.00% / -0.08% +0.26% +0.19%] index_select strided 3 : Elapsed 0.053 ms (5.326 ms / 100) 5.280 -> 5.286 ( +0.11%) [ +0.13% +0.00% +0.09% / +0.11% +0.27% +0.25%] index_select random : Elapsed 0.053 ms (5.287 ms / 100) 5.317 -> 5.318 ( +0.02%) [ +0.08% +0.00% +0.04% / +0.02% +0.41% +0.47%] index_select random_sorted : Elapsed 0.053 ms (5.321 ms / 100) out_shape = [16, 40, 4, 5] in_shape = [20, 40, 4, 5] idx_dim = 0 B = [16, 40, 4, 5] (stride (800, 20, 1, 4)) A = [20, 40, 4, 5] (stride (800, 1, 40, 160)) dim = 0 4.125 -> 4.127 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.80% +0.75%] index_select const : Elapsed 0.041 ms (4.126 ms / 100) 4.162 -> 4.162 ( +0.00%) [ +0.00% +0.05% +0.02% / +0.00% +0.60% +0.65%] index_select wrap : Elapsed 0.042 ms (4.162 ms / 100) 4.114 -> 4.117 ( +0.07%) [ +0.07% +0.00% +0.02% / +0.07% +0.63% +0.63%] index_select linear : Elapsed 0.041 ms (4.117 ms / 100) 4.097 -> 4.097 ( +0.00%) [ +0.17% +0.00% +0.12% / +0.00% +1.05% +0.88%] index_select reverse : Elapsed 0.041 ms (4.104 ms / 100) 4.138 -> 4.139 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.65% +0.68%] index_select skip64 : Elapsed 0.041 ms (4.139 ms / 100) 4.139 -> 4.141 ( +0.05%) [ +0.02% +0.00% +0.00% / +0.05% +0.65% +0.63%] index_select skip256 : Elapsed 0.041 ms (4.140 ms / 100) 4.141 -> 4.141 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.60%] index_select spread : Elapsed 0.041 ms (4.141 ms / 100) 4.143 -> 4.143 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.65% +0.68%] index_select strided 3 : Elapsed 0.041 ms (4.143 ms / 100) 4.098 -> 4.125 ( +0.66%) [ +0.02% +0.00% +0.02% / +1.07% +0.66% +0.66%] index_select strided 5 : Elapsed 0.041 ms (4.099 ms / 100) 4.123 -> 4.125 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.63% +0.65%] index_select strided 7 : Elapsed 0.041 ms (4.126 ms / 100) 4.118 -> 4.118 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.51% +0.53%] index_select strided 8 : Elapsed 0.041 ms (4.118 ms / 100) 4.147 -> 4.147 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.00% +0.63% +0.63%] index_select strided 16 : Elapsed 0.041 ms (4.148 ms / 100) 4.112 -> 4.114 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.49% +0.51%] index_select random : Elapsed 0.041 ms (4.114 ms / 100) 4.133 -> 4.134 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.53% +0.51%] index_select random_sorted : Elapsed 0.041 ms (4.134 ms / 100) 4.129 -> 4.128 ( -0.02%) [ +0.02% +0.00% +0.07% / -0.02% +0.36% +0.39%] index_select perm : Elapsed 0.041 ms (4.130 ms / 100) 4.136 -> 4.137 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.51% +0.56%] index_select perm_sorted : Elapsed 0.041 ms (4.137 ms / 100) B = [16, 40, 4, 5] (stride (5, 320, 80, 1)) A = [20, 40, 4, 5] (stride (5, 400, 100, 1)) dim = 0 1.315 -> 1.318 ( +0.23%) [ +0.15% +0.08% +0.00% / +0.23% +0.23% +0.23%] index_select const : Elapsed 0.013 ms (1.317 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.08% +0.23% +0.00% / +0.00% +0.08% +0.15%] index_select wrap : Elapsed 0.013 ms (1.319 ms / 100) 1.319 -> 1.318 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.00% +0.08%] index_select linear : Elapsed 0.013 ms (1.319 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.08% +0.15%] index_select reverse : Elapsed 0.013 ms (1.319 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.15% +0.08% +0.00% / +0.00% +0.15% +0.38%] index_select skip64 : Elapsed 0.013 ms (1.318 ms / 100) 1.314 -> 1.315 ( +0.08%) [ +0.23% +0.00% +0.08% / +0.15% +0.08% +0.23%] index_select skip256 : Elapsed 0.013 ms (1.317 ms / 100) 1.314 -> 1.318 ( +0.30%) [ +0.00% +0.15% +0.00% / +0.38% +0.46% +0.30%] index_select spread : Elapsed 0.013 ms (1.314 ms / 100) 1.316 -> 1.320 ( +0.30%) [ +0.00% +0.23% +0.15% / +0.38% +0.30% +0.38%] index_select strided 3 : Elapsed 0.013 ms (1.316 ms / 100) 1.316 -> 1.318 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.23% +0.15%] index_select strided 5 : Elapsed 0.013 ms (1.318 ms / 100) 1.314 -> 1.314 ( +0.00%) [ +0.08% +0.23% +0.00% / +0.00% +0.76% +0.68%] index_select strided 7 : Elapsed 0.013 ms (1.315 ms / 100) 1.316 -> 1.317 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.53% +0.61%] index_select strided 8 : Elapsed 0.013 ms (1.317 ms / 100) 1.312 -> 1.311 ( -0.08%) [ +0.38% +0.00% +0.00% / -0.08% +0.53% +0.46%] index_select strided 16 : Elapsed 0.013 ms (1.317 ms / 100) 1.316 -> 1.319 ( +0.23%) [ +0.15% +0.00% +0.15% / +0.23% +0.61% +0.76%] index_select random : Elapsed 0.013 ms (1.318 ms / 100) 1.315 -> 1.317 ( +0.15%) [ +0.23% +0.00% +0.08% / +0.15% +0.53% +0.46%] index_select random_sorted : Elapsed 0.013 ms (1.318 ms / 100) 1.310 -> 1.311 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.92% +0.84%] index_select perm : Elapsed 0.013 ms (1.311 ms / 100) 1.316 -> 1.318 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.38% +0.46%] index_select perm_sorted : Elapsed 0.013 ms (1.317 ms / 100) B = [16, 40, 4, 5] (stride (160, 1, 40, 2560)) A = [20, 40, 4, 5] (stride (40, 1, 800, 3200)) dim = 0 4.135 -> 4.138 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.58% +0.58%] index_select const : Elapsed 0.041 ms (4.137 ms / 100) 4.145 -> 4.147 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.92% +0.94%] index_select wrap : Elapsed 0.041 ms (4.147 ms / 100) 4.145 -> 4.145 ( +0.00%) [ +0.00% +0.02% +0.05% / +0.00% +0.70% +0.70%] index_select linear : Elapsed 0.041 ms (4.145 ms / 100) 4.156 -> 4.158 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.77% +0.72%] index_select reverse : Elapsed 0.042 ms (4.156 ms / 100) 4.118 -> 4.119 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.56% +0.53%] index_select skip64 : Elapsed 0.041 ms (4.120 ms / 100) 4.138 -> 4.138 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.51% +0.53%] index_select skip256 : Elapsed 0.041 ms (4.139 ms / 100) 4.149 -> 4.149 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.75% +0.75%] index_select spread : Elapsed 0.042 ms (4.154 ms / 100) 4.131 -> 4.135 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.65% +0.63%] index_select strided 3 : Elapsed 0.041 ms (4.133 ms / 100) 4.102 -> 4.102 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.61% +0.61%] index_select strided 5 : Elapsed 0.041 ms (4.102 ms / 100) 4.136 -> 4.137 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.65% +0.77%] index_select strided 7 : Elapsed 0.041 ms (4.136 ms / 100) 4.155 -> 4.157 ( +0.05%) [ +0.02% +0.00% +0.05% / +0.05% +0.70% +0.70%] index_select strided 8 : Elapsed 0.042 ms (4.156 ms / 100) 4.149 -> 4.149 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.58% +0.53%] index_select strided 16 : Elapsed 0.041 ms (4.149 ms / 100) 4.133 -> 4.134 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.60% +0.58%] index_select random : Elapsed 0.041 ms (4.135 ms / 100) 4.158 -> 4.159 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.55% +0.58%] index_select random_sorted : Elapsed 0.042 ms (4.158 ms / 100) 4.148 -> 4.152 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.41% +0.41%] index_select perm : Elapsed 0.041 ms (4.150 ms / 100) 4.123 -> 4.124 ( +0.02%) [ +0.05% +0.00% +0.05% / +0.02% +0.49% +0.56%] index_select perm_sorted : Elapsed 0.041 ms (4.125 ms / 100) out_shape = [20, 16, 4, 5] in_shape = [20, 40, 4, 5] idx_dim = 1 B = [20, 16, 4, 5] (stride (320, 20, 5, 1)) A = [20, 40, 4, 5] (stride (800, 1, 40, 160)) dim = 1 3.935 -> 3.942 ( +0.18%) [ +0.13% +0.10% +0.00% / +0.18% +0.76% +0.71%] index_select const : Elapsed 0.039 ms (3.940 ms / 100) 3.923 -> 3.946 ( +0.59%) [ +0.00% +0.00% +0.00% / +0.69% +0.59% +0.59%] index_select wrap : Elapsed 0.039 ms (3.923 ms / 100) 3.909 -> 3.919 ( +0.26%) [ +0.05% +0.00% +0.31% / +0.26% +0.61% +0.49%] index_select linear : Elapsed 0.039 ms (3.911 ms / 100) 3.914 -> 3.917 ( +0.08%) [ +0.18% +0.00% +0.00% / +0.08% +0.49% +0.49%] index_select reverse : Elapsed 0.039 ms (3.921 ms / 100) 3.919 -> 3.919 ( +0.00%) [ +0.00% +0.13% +0.10% / +0.00% +0.61% +0.66%] index_select skip64 : Elapsed 0.039 ms (3.919 ms / 100) 3.939 -> 3.940 ( +0.03%) [ +0.13% +0.00% +0.00% / +0.03% +0.61% +0.48%] index_select skip256 : Elapsed 0.039 ms (3.944 ms / 100) 3.921 -> 3.920 ( -0.03%) [ +0.08% +0.05% +0.00% / -0.03% +0.15% +0.23%] index_select spread : Elapsed 0.039 ms (3.924 ms / 100) 3.926 -> 3.931 ( +0.13%) [ +0.10% +0.00% +0.03% / +0.13% +0.33% +0.51%] index_select strided 3 : Elapsed 0.039 ms (3.930 ms / 100) 3.917 -> 3.921 ( +0.10%) [ +0.15% +0.18% +0.00% / +0.10% +0.36% +0.36%] index_select strided 5 : Elapsed 0.039 ms (3.923 ms / 100) 3.908 -> 3.923 ( +0.38%) [ +0.26% +0.00% +0.36% / +0.38% +0.46% +0.46%] index_select strided 7 : Elapsed 0.039 ms (3.918 ms / 100) 3.927 -> 3.931 ( +0.10%) [ +0.08% +0.00% +0.20% / +0.10% +0.53% +0.48%] index_select strided 8 : Elapsed 0.039 ms (3.930 ms / 100) 3.913 -> 3.924 ( +0.28%) [ +0.00% +0.20% +0.13% / +0.28% +0.51% +0.49%] index_select strided 16 : Elapsed 0.039 ms (3.913 ms / 100) 3.915 -> 3.923 ( +0.20%) [ +0.15% +0.15% +0.00% / +0.20% +0.66% +0.66%] index_select random : Elapsed 0.039 ms (3.921 ms / 100) 3.919 -> 3.922 ( +0.08%) [ +0.10% +0.00% +0.00% / +0.08% +0.36% +0.36%] index_select random_sorted : Elapsed 0.039 ms (3.923 ms / 100) 3.921 -> 3.925 ( +0.10%) [ +0.00% +0.05% +0.10% / +0.10% +0.20% +0.28%] index_select perm : Elapsed 0.039 ms (3.921 ms / 100) 3.906 -> 3.908 ( +0.05%) [ +0.03% +0.08% +0.00% / +0.05% +0.41% +0.38%] index_select perm_sorted : Elapsed 0.039 ms (3.907 ms / 100) B = [20, 16, 4, 5] (stride (20, 400, 1, 4)) A = [20, 40, 4, 5] (stride (200, 5, 4000, 1)) dim = 1 3.921 -> 3.922 ( +0.03%) [ +0.00% +0.08% +0.05% / +0.03% +0.71% +0.74%] index_select const : Elapsed 0.039 ms (3.921 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.66% +0.77%] index_select wrap : Elapsed 0.039 ms (3.922 ms / 100) 3.927 -> 3.926 ( -0.03%) [ +0.05% +0.10% +0.00% / -0.03% +0.64% +0.71%] index_select linear : Elapsed 0.039 ms (3.929 ms / 100) 3.923 -> 3.928 ( +0.13%) [ +0.18% +0.00% +0.15% / +0.13% +0.99% +0.87%] index_select reverse : Elapsed 0.039 ms (3.930 ms / 100) 3.921 -> 3.924 ( +0.08%) [ +0.03% +0.00% +0.10% / +0.08% +0.94% +0.99%] index_select skip64 : Elapsed 0.039 ms (3.922 ms / 100) 3.921 -> 3.922 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.77% +0.79%] index_select skip256 : Elapsed 0.039 ms (3.924 ms / 100) 3.921 -> 3.920 ( -0.03%) [ +0.00% +0.03% +0.10% / -0.03% +0.61% +0.64%] index_select spread : Elapsed 0.039 ms (3.921 ms / 100) 3.920 -> 3.920 ( +0.00%) [ +0.13% +0.03% +0.00% / +0.00% +0.69% +0.79%] index_select strided 3 : Elapsed 0.039 ms (3.925 ms / 100) 3.917 -> 3.922 ( +0.13%) [ +0.00% +0.13% +0.13% / +0.13% +0.64% +0.59%] index_select strided 5 : Elapsed 0.039 ms (3.917 ms / 100) 3.923 -> 3.924 ( +0.03%) [ +0.00% +0.00% +0.05% / +0.03% +0.61% +0.59%] index_select strided 7 : Elapsed 0.039 ms (3.923 ms / 100) 3.921 -> 3.922 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.61% +0.66%] index_select strided 8 : Elapsed 0.039 ms (3.923 ms / 100) 3.913 -> 3.921 ( +0.20%) [ +0.18% +0.00% +0.15% / +0.20% +0.82% +0.84%] index_select strided 16 : Elapsed 0.039 ms (3.920 ms / 100) 3.935 -> 3.936 ( +0.03%) [ +0.08% +0.00% +0.03% / +0.03% +0.71% +0.71%] index_select random : Elapsed 0.039 ms (3.938 ms / 100) 3.917 -> 3.912 ( -0.13%) [ +0.05% +0.08% +0.00% / -0.13% +0.71% +0.74%] index_select random_sorted : Elapsed 0.039 ms (3.919 ms / 100) 3.925 -> 3.927 ( +0.05%) [ +0.00% +0.00% +0.08% / +0.05% +0.92% +1.07%] index_select perm : Elapsed 0.039 ms (3.925 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.08% +0.00% +0.00% / +0.05% +0.79% +0.77%] index_select perm_sorted : Elapsed 0.039 ms (3.922 ms / 100) B = [20, 16, 4, 5] (stride (1, 400, 100, 20)) A = [20, 40, 4, 5] (stride (1, 20, 800, 3200)) dim = 1 3.809 -> 3.810 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.74% +0.71%] index_select const : Elapsed 0.038 ms (3.809 ms / 100) 3.806 -> 3.805 ( -0.03%) [ +0.03% +0.08% +0.00% / -0.03% +0.55% +0.55%] index_select wrap : Elapsed 0.038 ms (3.807 ms / 100) 3.804 -> 3.806 ( +0.05%) [ +0.03% +0.00% +0.11% / +0.05% +0.63% +0.50%] index_select linear : Elapsed 0.038 ms (3.805 ms / 100) 3.798 -> 3.799 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.47% +0.47%] index_select reverse : Elapsed 0.038 ms (3.800 ms / 100) 3.804 -> 3.806 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.53% +0.47%] index_select skip64 : Elapsed 0.038 ms (3.804 ms / 100) 3.812 -> 3.813 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.47% +0.47%] index_select skip256 : Elapsed 0.038 ms (3.813 ms / 100) 3.805 -> 3.805 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.39% +0.37%] index_select spread : Elapsed 0.038 ms (3.807 ms / 100) 3.808 -> 3.809 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.03% +0.45% +0.42%] index_select strided 3 : Elapsed 0.038 ms (3.808 ms / 100) 3.788 -> 3.788 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.29% +0.32%] index_select strided 5 : Elapsed 0.038 ms (3.788 ms / 100) 3.820 -> 3.820 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.45% +0.37%] index_select strided 7 : Elapsed 0.038 ms (3.820 ms / 100) 3.799 -> 3.799 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.47% +0.45%] index_select strided 8 : Elapsed 0.038 ms (3.800 ms / 100) 3.816 -> 3.817 ( +0.03%) [ +0.03% +0.00% +0.05% / +0.03% +0.47% +0.47%] index_select strided 16 : Elapsed 0.038 ms (3.817 ms / 100) 3.787 -> 3.788 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.45% +0.48%] index_select random : Elapsed 0.038 ms (3.789 ms / 100) 3.813 -> 3.812 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.34% +0.39%] index_select random_sorted : Elapsed 0.038 ms (3.814 ms / 100) 3.807 -> 3.807 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.34% +0.29%] index_select perm : Elapsed 0.038 ms (3.808 ms / 100) 3.793 -> 3.793 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.32% +0.40%] index_select perm_sorted : Elapsed 0.038 ms (3.796 ms / 100) B = [20, 16, 4, 5] (stride (1, 20, 1600, 320)) A = [20, 40, 4, 5] (stride (160, 4, 1, 3200)) dim = 1 3.698 -> 3.701 ( +0.08%) [ +0.00% +0.05% +0.05% / +0.08% +0.78% +0.81%] index_select const : Elapsed 0.037 ms (3.698 ms / 100) 3.697 -> 3.701 ( +0.11%) [ +0.14% +0.14% +0.00% / +0.11% +0.89% +0.87%] index_select wrap : Elapsed 0.037 ms (3.702 ms / 100) 3.699 -> 3.704 ( +0.14%) [ +0.19% +0.00% +0.03% / +0.14% +0.68% +0.84%] index_select linear : Elapsed 0.037 ms (3.706 ms / 100) 3.689 -> 3.697 ( +0.22%) [ +0.22% +0.19% +0.00% / +0.22% +1.08% +1.08%] index_select reverse : Elapsed 0.037 ms (3.697 ms / 100) 3.687 -> 3.691 ( +0.11%) [ +0.00% +0.14% +0.22% / +0.11% +1.08% +0.95%] index_select skip64 : Elapsed 0.037 ms (3.687 ms / 100) 3.697 -> 3.699 ( +0.05%) [ +0.00% +0.05% +0.08% / +0.05% +0.78% +0.60%] index_select skip256 : Elapsed 0.037 ms (3.697 ms / 100) 3.682 -> 3.684 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.76% +0.73%] index_select spread : Elapsed 0.037 ms (3.682 ms / 100) 3.695 -> 3.695 ( +0.00%) [ +0.00% +0.11% +0.03% / +0.00% +0.73% +0.76%] index_select strided 3 : Elapsed 0.037 ms (3.695 ms / 100) 3.683 -> 3.682 ( -0.03%) [ +0.05% +0.05% +0.00% / -0.03% +0.60% +0.68%] index_select strided 5 : Elapsed 0.037 ms (3.685 ms / 100) 3.700 -> 3.702 ( +0.05%) [ +0.11% +0.00% +0.03% / +0.05% +0.84% +0.68%] index_select strided 7 : Elapsed 0.037 ms (3.704 ms / 100) 3.696 -> 3.697 ( +0.03%) [ +0.08% +0.00% +0.03% / +0.03% +0.84% +0.84%] index_select strided 8 : Elapsed 0.037 ms (3.699 ms / 100) 3.692 -> 3.696 ( +0.11%) [ +0.00% +0.05% +0.16% / +0.11% +0.70% +0.70%] index_select strided 16 : Elapsed 0.037 ms (3.692 ms / 100) 3.690 -> 3.690 ( +0.00%) [ +0.08% +0.22% +0.00% / +0.00% +1.11% +0.70%] index_select random : Elapsed 0.037 ms (3.693 ms / 100) 3.700 -> 3.700 ( +0.00%) [ +0.05% +0.00% +0.03% / +0.00% +0.81% +0.78%] index_select random_sorted : Elapsed 0.037 ms (3.702 ms / 100) 3.689 -> 3.689 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.81% +0.79%] index_select perm : Elapsed 0.037 ms (3.689 ms / 100) 3.700 -> 3.707 ( +0.19%) [ +0.14% +0.00% +0.08% / +0.19% +0.84% +0.81%] index_select perm_sorted : Elapsed 0.037 ms (3.705 ms / 100) B = [20, 16, 4, 5] (stride (64, 4, 1, 1280)) A = [20, 40, 4, 5] (stride (800, 1, 200, 40)) dim = 1 3.925 -> 3.928 ( +0.08%) [ +0.00% +0.00% +0.20% / +0.08% +0.59% +0.46%] index_select const : Elapsed 0.039 ms (3.925 ms / 100) 3.948 -> 3.947 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.41% +0.43%] index_select wrap : Elapsed 0.039 ms (3.948 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.00% +0.05% +0.08% / +0.00% +0.41% +0.54%] index_select linear : Elapsed 0.039 ms (3.922 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.48% +0.59%] index_select reverse : Elapsed 0.039 ms (3.926 ms / 100) 3.942 -> 3.945 ( +0.08%) [ +0.08% +0.00% +0.03% / +0.08% +0.48% +0.51%] index_select skip64 : Elapsed 0.039 ms (3.945 ms / 100) 3.928 -> 3.932 ( +0.10%) [ +0.25% +0.03% +0.00% / +0.10% +0.61% +0.61%] index_select skip256 : Elapsed 0.039 ms (3.938 ms / 100) 3.898 -> 3.898 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +0.54% +0.28%] index_select spread : Elapsed 0.039 ms (3.905 ms / 100) 3.932 -> 3.936 ( +0.10%) [ +0.15% +0.00% +0.08% / +0.10% +0.61% +0.53%] index_select strided 3 : Elapsed 0.039 ms (3.938 ms / 100) 3.911 -> 3.925 ( +0.36%) [ +0.23% +0.00% +0.18% / +0.41% +0.36% +0.41%] index_select strided 5 : Elapsed 0.039 ms (3.920 ms / 100) 3.891 -> 3.896 ( +0.13%) [ +0.05% +0.00% +0.00% / +0.13% +0.41% +0.75%] index_select strided 7 : Elapsed 0.039 ms (3.893 ms / 100) 3.942 -> 3.942 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.23% +0.36%] index_select strided 8 : Elapsed 0.039 ms (3.943 ms / 100) 3.903 -> 3.902 ( -0.03%) [ +0.15% +0.03% +0.00% / -0.03% +0.44% +0.08%] index_select strided 16 : Elapsed 0.039 ms (3.909 ms / 100) 3.895 -> 3.886 ( -0.23%) [ +0.08% +0.00% +0.15% / -0.23% +0.15% +0.10%] index_select random : Elapsed 0.039 ms (3.898 ms / 100) 3.917 -> 3.919 ( +0.05%) [ +0.03% +0.00% +0.13% / +0.05% +0.38% +0.31%] index_select random_sorted : Elapsed 0.039 ms (3.918 ms / 100) 3.902 -> 3.923 ( +0.54%) [ +0.05% +0.00% +0.38% / +0.56% +0.67% +0.54%] index_select perm : Elapsed 0.039 ms (3.904 ms / 100) 3.921 -> 3.924 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.23% +0.26%] index_select perm_sorted : Elapsed 0.039 ms (3.922 ms / 100) B = [20, 16, 4, 5] (stride (4, 80, 1, 1280)) A = [20, 40, 4, 5] (stride (1, 400, 100, 20)) dim = 1 3.702 -> 3.703 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.73% +0.70%] index_select const : Elapsed 0.037 ms (3.703 ms / 100) 3.712 -> 3.712 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.67% +0.65%] index_select wrap : Elapsed 0.037 ms (3.714 ms / 100) 3.704 -> 3.702 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.73% +0.70%] index_select linear : Elapsed 0.037 ms (3.704 ms / 100) 3.708 -> 3.709 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.73% +0.70%] index_select reverse : Elapsed 0.037 ms (3.708 ms / 100) 3.700 -> 3.700 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.57% +0.57%] index_select skip64 : Elapsed 0.037 ms (3.701 ms / 100) 3.702 -> 3.701 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.78% +0.73%] index_select skip256 : Elapsed 0.037 ms (3.702 ms / 100) 3.671 -> 3.671 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.87% +0.79%] index_select spread : Elapsed 0.037 ms (3.671 ms / 100) 3.695 -> 3.696 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.73% +0.76%] index_select strided 3 : Elapsed 0.037 ms (3.696 ms / 100) 3.710 -> 3.708 ( -0.05%) [ +0.03% +0.03% +0.00% / -0.05% +0.57% +0.59%] index_select strided 5 : Elapsed 0.037 ms (3.711 ms / 100) 3.694 -> 3.695 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.79% +0.76%] index_select strided 7 : Elapsed 0.037 ms (3.696 ms / 100) 3.707 -> 3.707 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.62% +0.65%] index_select strided 8 : Elapsed 0.037 ms (3.708 ms / 100) 3.706 -> 3.707 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.78% +0.78%] index_select strided 16 : Elapsed 0.037 ms (3.708 ms / 100) 3.702 -> 3.702 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.76% +0.76%] index_select random : Elapsed 0.037 ms (3.703 ms / 100) 3.687 -> 3.690 ( +0.08%) [ +0.11% +0.08% +0.00% / +0.08% +0.95% +0.90%] index_select random_sorted : Elapsed 0.037 ms (3.691 ms / 100) 3.694 -> 3.695 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.62% +0.65%] index_select perm : Elapsed 0.037 ms (3.695 ms / 100) 3.702 -> 3.701 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.78% +0.78%] index_select perm_sorted : Elapsed 0.037 ms (3.702 ms / 100) out_shape = [20, 40, 16, 5] in_shape = [20, 40, 4, 5] idx_dim = 2 B = [20, 40, 16, 5] (stride (80, 1600, 1, 16)) A = [20, 40, 4, 5] (stride (800, 20, 1, 4)) dim = 2 2.350 -> 2.361 ( +0.47%) [ +0.00% +0.09% +0.34% / +0.47% +0.51% +0.51%] index_add_ linear : Elapsed 0.024 ms (2.350 ms / 100) 2.341 -> 2.342 ( +0.04%) [ +0.09% +0.00% +0.04% / +0.04% +0.43% +0.56%] index_copy_ linear : Elapsed 0.023 ms (2.343 ms / 100) 2.353 -> 2.355 ( +0.08%) [ +0.17% +0.21% +0.00% / +0.08% +0.30% +0.38%] index_add_ reverse : Elapsed 0.024 ms (2.357 ms / 100) 2.338 -> 2.343 ( +0.21%) [ +0.26% +0.00% +0.17% / +0.21% +0.34% +0.38%] index_copy_ reverse : Elapsed 0.023 ms (2.344 ms / 100) 2.389 -> 2.393 ( +0.17%) [ +0.25% +0.04% +0.00% / +0.17% +0.88% +0.67%] index_add_ spread : Elapsed 0.024 ms (2.395 ms / 100) 2.441 -> 2.438 ( -0.12%) [ +0.00% +0.00% +0.16% / -0.12% +0.45% +0.57%] index_copy_ spread : Elapsed 0.024 ms (2.441 ms / 100) 2.406 -> 2.407 ( +0.04%) [ +0.29% +0.25% +0.00% / +0.04% +0.46% +0.33%] index_add_ strided 3 : Elapsed 0.024 ms (2.413 ms / 100) 2.453 -> 2.454 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.04% +0.41% +0.45%] index_copy_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.405 -> 2.405 ( +0.00%) [ +0.00% +0.17% +0.17% / +0.00% +0.37% +0.29%] index_add_ strided 5 : Elapsed 0.024 ms (2.405 ms / 100) 2.453 -> 2.456 ( +0.12%) [ +0.00% +0.08% +0.00% / +0.12% +0.33% +0.33%] index_copy_ strided 5 : Elapsed 0.025 ms (2.453 ms / 100) 2.392 -> 2.394 ( +0.08%) [ +0.17% +0.13% +0.00% / +0.08% +0.25% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.396 ms / 100) 2.440 -> 2.447 ( +0.29%) [ +0.12% +0.04% +0.00% / +0.29% +0.70% +0.45%] index_copy_ strided 7 : Elapsed 0.024 ms (2.443 ms / 100) 2.393 -> 2.396 ( +0.13%) [ +0.00% +0.08% +0.04% / +0.13% +0.13% +0.33%] index_add_ perm : Elapsed 0.024 ms (2.393 ms / 100) 2.440 -> 2.438 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.41% +0.25%] index_copy_ perm : Elapsed 0.024 ms (2.442 ms / 100) 2.394 -> 2.392 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.13% +0.17%] index_add_ perm_sorted : Elapsed 0.024 ms (2.394 ms / 100) 2.440 -> 2.444 ( +0.16%) [ +0.00% +0.04% +0.12% / +0.16% +0.20% +0.41%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.440 ms / 100) 4.946 -> 4.947 ( +0.02%) [ +0.10% +0.00% +0.06% / +0.02% +0.53% +0.57%] index_select const : Elapsed 0.050 ms (4.951 ms / 100) 4.954 -> 4.953 ( -0.02%) [ +0.06% +0.00% +0.02% / -0.02% +0.34% +0.42%] index_select wrap : Elapsed 0.050 ms (4.957 ms / 100) 4.951 -> 4.951 ( +0.00%) [ +0.00% +0.04% +0.02% / +0.00% +0.26% +0.30%] index_select linear : Elapsed 0.050 ms (4.951 ms / 100) 4.962 -> 4.966 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.40% +0.50%] index_select reverse : Elapsed 0.050 ms (4.964 ms / 100) 4.961 -> 4.961 ( +0.00%) [ +0.00% +0.14% +0.08% / +0.00% +0.38% +0.32%] index_select skip64 : Elapsed 0.050 ms (4.961 ms / 100) 4.942 -> 4.946 ( +0.08%) [ +0.22% +0.06% +0.00% / +0.08% +0.51% +0.42%] index_select skip256 : Elapsed 0.050 ms (4.953 ms / 100) 4.957 -> 4.962 ( +0.10%) [ +0.02% +0.04% +0.00% / +0.10% +0.30% +0.30%] index_select spread : Elapsed 0.050 ms (4.958 ms / 100) 4.950 -> 4.951 ( +0.02%) [ +0.00% +0.02% +0.04% / +0.02% +0.24% +0.22%] index_select strided 3 : Elapsed 0.050 ms (4.950 ms / 100) 4.965 -> 4.971 ( +0.12%) [ +0.12% +0.00% +0.02% / +0.12% +0.30% +0.24%] index_select random : Elapsed 0.050 ms (4.971 ms / 100) 4.951 -> 4.952 ( +0.02%) [ +0.00% +0.02% +0.04% / +0.02% +0.32% +0.34%] index_select random_sorted : Elapsed 0.050 ms (4.951 ms / 100) B = [20, 40, 16, 5] (stride (5, 1600, 100, 1)) A = [20, 40, 4, 5] (stride (200, 5, 4000, 1)) dim = 2 2.384 -> 2.381 ( -0.13%) [ +0.08% +0.00% +0.04% / -0.13% +0.25% +0.25%] index_add_ linear : Elapsed 0.024 ms (2.386 ms / 100) 2.327 -> 2.330 ( +0.13%) [ +0.30% +0.00% +0.17% / +0.13% +0.17% +0.21%] index_copy_ linear : Elapsed 0.023 ms (2.334 ms / 100) 2.381 -> 2.381 ( +0.00%) [ +0.13% +0.08% +0.00% / +0.00% +0.04% +0.08%] index_add_ reverse : Elapsed 0.024 ms (2.384 ms / 100) 2.327 -> 2.322 ( -0.21%) [ +0.00% +0.00% +0.17% / -0.21% +0.09% +0.17%] index_copy_ reverse : Elapsed 0.023 ms (2.327 ms / 100) 2.382 -> 2.385 ( +0.13%) [ +0.08% +0.17% +0.00% / +0.13% +0.17% +0.25%] index_add_ spread : Elapsed 0.024 ms (2.384 ms / 100) 2.328 -> 2.330 ( +0.09%) [ +0.00% +0.17% +0.13% / +0.13% +0.13% +0.09%] index_copy_ spread : Elapsed 0.023 ms (2.328 ms / 100) 2.382 -> 2.382 ( +0.00%) [ +0.04% +0.00% +0.29% / +0.00% +0.38% +0.38%] index_add_ strided 3 : Elapsed 0.024 ms (2.383 ms / 100) 2.330 -> 2.331 ( +0.04%) [ +0.09% +0.00% +0.04% / +0.09% +0.04% +0.21%] index_copy_ strided 3 : Elapsed 0.023 ms (2.332 ms / 100) 2.382 -> 2.382 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.34% +0.42%] index_add_ strided 5 : Elapsed 0.024 ms (2.386 ms / 100) 2.329 -> 2.330 ( +0.04%) [ +0.17% +0.00% +0.00% / +0.04% +0.26% +0.30%] index_copy_ strided 5 : Elapsed 0.023 ms (2.333 ms / 100) 2.380 -> 2.382 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.63% +0.38%] index_add_ strided 7 : Elapsed 0.024 ms (2.382 ms / 100) 2.326 -> 2.329 ( +0.13%) [ +0.21% +0.00% +0.21% / +0.13% +0.56% +0.52%] index_copy_ strided 7 : Elapsed 0.023 ms (2.331 ms / 100) 2.385 -> 2.385 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.29% +0.13%] index_add_ perm : Elapsed 0.024 ms (2.387 ms / 100) 2.331 -> 2.328 ( -0.13%) [ +0.09% +0.00% +0.09% / -0.13% +0.26% +0.21%] index_copy_ perm : Elapsed 0.023 ms (2.333 ms / 100) 2.382 -> 2.386 ( +0.17%) [ +0.21% +0.08% +0.00% / +0.42% +0.17% +0.42%] index_add_ perm_sorted : Elapsed 0.024 ms (2.387 ms / 100) 2.331 -> 2.333 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.13% +0.30% +0.09%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.331 ms / 100) 4.737 -> 4.735 ( -0.04%) [ +0.00% +0.06% +0.02% / -0.04% +0.34% +0.34%] index_select const : Elapsed 0.047 ms (4.737 ms / 100) 4.785 -> 4.786 ( +0.02%) [ +0.08% +0.00% +0.00% / +0.02% +0.29% +0.25%] index_select wrap : Elapsed 0.048 ms (4.789 ms / 100) 4.787 -> 4.783 ( -0.08%) [ +0.02% +0.00% +0.19% / -0.08% +0.40% +0.25%] index_select linear : Elapsed 0.048 ms (4.788 ms / 100) 4.808 -> 4.806 ( -0.04%) [ +0.06% +0.00% +0.25% / -0.04% +0.31% +0.19%] index_select reverse : Elapsed 0.048 ms (4.811 ms / 100) 4.732 -> 4.730 ( -0.04%) [ +0.11% +0.04% +0.00% / -0.04% +0.11% +0.08%] index_select skip64 : Elapsed 0.047 ms (4.737 ms / 100) 4.727 -> 4.729 ( +0.04%) [ +0.21% +0.04% +0.00% / +0.04% +0.23% +0.34%] index_select skip256 : Elapsed 0.047 ms (4.737 ms / 100) 4.774 -> 4.781 ( +0.15%) [ +0.06% +0.36% +0.00% / +0.15% +0.54% +0.52%] index_select spread : Elapsed 0.048 ms (4.777 ms / 100) 4.796 -> 4.800 ( +0.08%) [ +0.04% +0.19% +0.00% / +0.08% +0.25% +0.29%] index_select strided 3 : Elapsed 0.048 ms (4.798 ms / 100) 4.764 -> 4.763 ( -0.02%) [ +0.13% +0.00% +0.02% / -0.02% +0.23% +0.36%] index_select random : Elapsed 0.048 ms (4.770 ms / 100) 4.780 -> 4.773 ( -0.15%) [ +0.10% +0.00% +0.10% / -0.15% +0.36% +0.27%] index_select random_sorted : Elapsed 0.048 ms (4.785 ms / 100) B = [20, 40, 16, 5] (stride (5, 1600, 100, 1)) A = [20, 40, 4, 5] (stride (160, 1, 40, 3200)) dim = 2 2.507 -> 2.511 ( +0.16%) [ +0.12% +0.00% +0.24% / +0.16% +0.56% +0.32%] index_add_ linear : Elapsed 0.025 ms (2.510 ms / 100) 2.452 -> 2.458 ( +0.24%) [ +0.12% +0.00% +0.16% / +0.24% +0.53% +0.45%] index_copy_ linear : Elapsed 0.025 ms (2.455 ms / 100) 2.505 -> 2.507 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.28% +0.48%] index_add_ reverse : Elapsed 0.025 ms (2.507 ms / 100) 2.451 -> 2.452 ( +0.04%) [ +0.00% +0.41% +0.16% / +0.04% +0.37% +0.65%] index_copy_ reverse : Elapsed 0.025 ms (2.451 ms / 100) 2.513 -> 2.515 ( +0.08%) [ +0.20% +0.12% +0.00% / +0.08% +0.60% +0.48%] index_add_ spread : Elapsed 0.025 ms (2.518 ms / 100) 2.465 -> 2.469 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.28% +0.41%] index_copy_ spread : Elapsed 0.025 ms (2.465 ms / 100) 2.506 -> 2.512 ( +0.24%) [ +0.00% +0.24% +0.32% / +0.24% +0.56% +0.68%] index_add_ strided 3 : Elapsed 0.025 ms (2.506 ms / 100) 2.457 -> 2.461 ( +0.16%) [ +0.00% +0.04% +0.24% / +0.16% +0.45% +0.53%] index_copy_ strided 3 : Elapsed 0.025 ms (2.457 ms / 100) 2.511 -> 2.511 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.36% +0.40%] index_add_ strided 5 : Elapsed 0.025 ms (2.513 ms / 100) 2.461 -> 2.463 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.37% +0.24%] index_copy_ strided 5 : Elapsed 0.025 ms (2.461 ms / 100) 2.512 -> 2.520 ( +0.32%) [ +0.12% +0.16% +0.00% / +0.32% +0.36% +0.60%] index_add_ strided 7 : Elapsed 0.025 ms (2.515 ms / 100) 2.464 -> 2.468 ( +0.16%) [ +0.00% +0.16% +0.20% / +0.16% +0.32% +0.24%] index_copy_ strided 7 : Elapsed 0.025 ms (2.464 ms / 100) 2.513 -> 2.514 ( +0.04%) [ +0.00% +0.20% +0.00% / +0.04% +0.16% +0.20%] index_add_ perm : Elapsed 0.025 ms (2.513 ms / 100) 2.462 -> 2.461 ( -0.04%) [ +0.00% +0.16% +0.12% / -0.04% +0.28% +0.24%] index_copy_ perm : Elapsed 0.025 ms (2.462 ms / 100) 2.510 -> 2.508 ( -0.08%) [ +0.12% +0.16% +0.00% / -0.08% +0.32% +0.20%] index_add_ perm_sorted : Elapsed 0.025 ms (2.513 ms / 100) 2.458 -> 2.464 ( +0.24%) [ +0.24% +0.24% +0.00% / +0.24% +0.53% +0.28%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) 5.371 -> 5.373 ( +0.04%) [ +0.13% +0.00% +0.20% / +0.04% +0.39% +0.43%] index_select const : Elapsed 0.054 ms (5.378 ms / 100) 5.411 -> 5.411 ( +0.00%) [ +0.00% +0.04% +0.02% / +0.00% +0.46% +0.41%] index_select wrap : Elapsed 0.054 ms (5.411 ms / 100) 5.433 -> 5.433 ( +0.00%) [ +0.24% +0.00% +0.31% / +0.00% +0.52% +0.33%] index_select linear : Elapsed 0.054 ms (5.446 ms / 100) 5.403 -> 5.404 ( +0.02%) [ +0.00% +0.09% +0.11% / +0.02% +0.30% +0.46%] index_select reverse : Elapsed 0.054 ms (5.403 ms / 100) 5.387 -> 5.389 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.26% +0.26%] index_select skip64 : Elapsed 0.054 ms (5.387 ms / 100) 5.382 -> 5.392 ( +0.19%) [ +0.00% +0.00% +0.00% / +0.19% +0.26% +0.32%] index_select skip256 : Elapsed 0.054 ms (5.382 ms / 100) 5.400 -> 5.402 ( +0.04%) [ +0.04% +0.02% +0.00% / +0.04% +0.22% +0.30%] index_select spread : Elapsed 0.054 ms (5.402 ms / 100) 5.410 -> 5.419 ( +0.17%) [ +0.00% +0.09% +0.00% / +0.17% +0.28% +0.18%] index_select strided 3 : Elapsed 0.054 ms (5.410 ms / 100) 5.426 -> 5.430 ( +0.07%) [ +0.18% +0.02% +0.00% / +0.07% +0.17% +0.31%] index_select random : Elapsed 0.054 ms (5.436 ms / 100) 5.398 -> 5.406 ( +0.15%) [ +0.04% +0.00% +0.06% / +0.15% +0.31% +0.41%] index_select random_sorted : Elapsed 0.054 ms (5.400 ms / 100) B = [20, 40, 16, 5] (stride (16, 1600, 1, 320)) A = [20, 40, 4, 5] (stride (1, 20, 4000, 800)) dim = 2 2.583 -> 2.586 ( +0.12%) [ +0.04% +0.00% +0.23% / +0.12% +0.62% +0.39%] index_add_ linear : Elapsed 0.026 ms (2.584 ms / 100) 2.549 -> 2.551 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.12% +0.39% +0.08%] index_copy_ linear : Elapsed 0.025 ms (2.549 ms / 100) 2.584 -> 2.584 ( +0.00%) [ +0.00% +0.08% +0.31% / +0.15% +0.31% +0.00%] index_add_ reverse : Elapsed 0.026 ms (2.584 ms / 100) 2.551 -> 2.548 ( -0.12%) [ +0.00% +0.08% +0.04% / -0.08% -0.12% -0.08%] index_copy_ reverse : Elapsed 0.026 ms (2.551 ms / 100) 2.624 -> 2.628 ( +0.15%) [ +0.00% +0.04% +0.23% / +0.19% +0.15% +0.23%] index_add_ spread : Elapsed 0.026 ms (2.624 ms / 100) 2.655 -> 2.652 ( -0.11%) [ +0.04% +0.26% +0.00% / +0.19% -0.11% +0.11%] index_copy_ spread : Elapsed 0.027 ms (2.656 ms / 100) 2.624 -> 2.627 ( +0.11%) [ +0.00% +0.23% +0.04% / +0.11% +0.46% +0.34%] index_add_ strided 3 : Elapsed 0.026 ms (2.624 ms / 100) 2.654 -> 2.655 ( +0.04%) [ +0.15% +0.04% +0.00% / +0.08% +0.11% +0.04%] index_copy_ strided 3 : Elapsed 0.027 ms (2.658 ms / 100) 2.619 -> 2.626 ( +0.27%) [ +0.31% +0.00% +0.04% / +0.27% +0.42% +0.42%] index_add_ strided 5 : Elapsed 0.026 ms (2.627 ms / 100) 2.654 -> 2.655 ( +0.04%) [ +0.00% +0.00% +0.15% / +0.04% +0.15% +0.04%] index_copy_ strided 5 : Elapsed 0.027 ms (2.654 ms / 100) 2.618 -> 2.627 ( +0.34%) [ +0.31% +0.23% +0.00% / +0.34% +0.46% +0.50%] index_add_ strided 7 : Elapsed 0.026 ms (2.626 ms / 100) 2.655 -> 2.655 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.04% +0.11%] index_copy_ strided 7 : Elapsed 0.027 ms (2.655 ms / 100) 2.621 -> 2.625 ( +0.15%) [ +0.00% +0.31% +0.08% / +0.15% +0.69% +0.80%] index_add_ perm : Elapsed 0.026 ms (2.621 ms / 100) 2.652 -> 2.650 ( -0.08%) [ +0.15% +0.04% +0.00% / -0.08% +0.08% +0.11%] index_copy_ perm : Elapsed 0.027 ms (2.656 ms / 100) 2.621 -> 2.625 ( +0.15%) [ +0.15% +0.00% +0.42% / +0.15% +0.53% +0.42%] index_add_ perm_sorted : Elapsed 0.026 ms (2.625 ms / 100) 2.655 -> 2.655 ( +0.00%) [ +0.08% +0.00% +0.23% / +0.08% +0.00% +0.08%] index_copy_ perm_sorted : Elapsed 0.027 ms (2.657 ms / 100) 5.790 -> 5.794 ( +0.07%) [ +0.00% +0.17% +0.17% / +0.07% +0.54% +0.64%] index_select const : Elapsed 0.058 ms (5.790 ms / 100) 5.798 -> 5.796 ( -0.03%) [ +0.19% +0.00% +0.03% / -0.03% +0.33% +0.28%] index_select wrap : Elapsed 0.058 ms (5.809 ms / 100) 5.794 -> 5.802 ( +0.14%) [ +0.00% +0.28% +0.07% / +0.14% +0.38% +0.28%] index_select linear : Elapsed 0.058 ms (5.794 ms / 100) 5.798 -> 5.798 ( +0.00%) [ +0.10% +0.02% +0.00% / +0.00% +0.24% +0.36%] index_select reverse : Elapsed 0.058 ms (5.804 ms / 100) 5.799 -> 5.820 ( +0.36%) [ +0.10% +0.10% +0.00% / +0.36% +0.48% +0.40%] index_select skip64 : Elapsed 0.058 ms (5.805 ms / 100) 5.791 -> 5.797 ( +0.10%) [ +0.26% +0.10% +0.00% / +0.10% +0.45% +0.36%] index_select skip256 : Elapsed 0.058 ms (5.806 ms / 100) 5.792 -> 5.796 ( +0.07%) [ +0.07% +0.05% +0.00% / +0.07% +0.35% +0.41%] index_select spread : Elapsed 0.058 ms (5.796 ms / 100) 5.798 -> 5.800 ( +0.03%) [ +0.02% +0.12% +0.00% / +0.03% +0.28% +0.36%] index_select strided 3 : Elapsed 0.058 ms (5.799 ms / 100) 5.793 -> 5.794 ( +0.02%) [ +0.16% +0.00% +0.05% / +0.02% +0.41% +0.22%] index_select random : Elapsed 0.058 ms (5.802 ms / 100) 5.791 -> 5.797 ( +0.10%) [ +0.00% +0.05% +0.09% / +0.10% +0.26% +0.24%] index_select random_sorted : Elapsed 0.058 ms (5.791 ms / 100) B = [20, 40, 16, 5] (stride (16, 1600, 1, 320)) A = [20, 40, 4, 5] (stride (40, 1, 800, 3200)) dim = 2 2.409 -> 2.416 ( +0.29%) [ +0.00% +0.21% +0.17% / +0.29% +0.91% +0.91%] index_add_ linear : Elapsed 0.024 ms (2.409 ms / 100) 2.394 -> 2.392 ( -0.08%) [ +0.00% +0.17% +0.00% / -0.08% +0.58% +0.63%] index_copy_ linear : Elapsed 0.024 ms (2.394 ms / 100) 2.413 -> 2.419 ( +0.25%) [ +0.50% +0.33% +0.00% / +0.25% +0.83% +0.99%] index_add_ reverse : Elapsed 0.024 ms (2.425 ms / 100) 2.399 -> 2.404 ( +0.21%) [ +0.08% +0.13% +0.00% / +0.21% +0.33% +0.58%] index_copy_ reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.12% +0.16% +0.00% / +0.04% +0.77% +0.82%] index_add_ spread : Elapsed 0.025 ms (2.455 ms / 100) 2.492 -> 2.496 ( +0.16%) [ +0.04% +0.00% +0.20% / +0.16% +0.56% +0.52%] index_copy_ spread : Elapsed 0.025 ms (2.493 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.28% +0.20% +0.00% / +0.04% +0.69% +1.02%] index_add_ strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.496 -> 2.498 ( +0.08%) [ +0.12% +0.24% +0.00% / +0.08% +0.60% +0.48%] index_copy_ strided 3 : Elapsed 0.025 ms (2.499 ms / 100) 2.466 -> 2.467 ( +0.04%) [ +0.24% +0.00% +0.20% / +0.04% +0.49% +0.73%] index_add_ strided 5 : Elapsed 0.025 ms (2.472 ms / 100) 2.494 -> 2.506 ( +0.48%) [ +0.28% +0.28% +0.00% / +1.68% +0.60% +0.48%] index_copy_ strided 5 : Elapsed 0.025 ms (2.501 ms / 100) 2.456 -> 2.459 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.12% +0.37% +0.57%] index_add_ strided 7 : Elapsed 0.025 ms (2.458 ms / 100) 2.494 -> 2.496 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.48% +0.56%] index_copy_ strided 7 : Elapsed 0.025 ms (2.494 ms / 100) 2.465 -> 2.466 ( +0.04%) [ +0.12% +0.20% +0.00% / +0.04% +0.53% +0.45%] index_add_ perm : Elapsed 0.025 ms (2.468 ms / 100) 2.496 -> 2.493 ( -0.12%) [ +0.00% +0.08% +0.04% / -0.12% +0.36% +0.32%] index_copy_ perm : Elapsed 0.025 ms (2.496 ms / 100) 2.458 -> 2.464 ( +0.24%) [ +0.00% +0.24% +0.00% / +0.24% +0.65% +0.61%] index_add_ perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) 2.488 -> 2.492 ( +0.16%) [ +0.20% +0.00% +0.20% / +0.16% +0.60% +0.48%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.493 ms / 100) 5.008 -> 5.012 ( +0.08%) [ +0.08% +0.28% +0.00% / +0.08% +0.60% +0.58%] index_select const : Elapsed 0.050 ms (5.012 ms / 100) 5.056 -> 5.062 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.12% +0.44% +0.47%] index_select wrap : Elapsed 0.051 ms (5.062 ms / 100) 5.062 -> 5.068 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.12% +0.47% +0.49%] index_select linear : Elapsed 0.051 ms (5.068 ms / 100) 5.082 -> 5.076 ( -0.12%) [ +0.00% +0.00% +0.04% / -0.12% +0.28% +0.28%] index_select reverse : Elapsed 0.051 ms (5.082 ms / 100) 5.033 -> 5.035 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.04% +0.30% +0.30%] index_select skip64 : Elapsed 0.050 ms (5.035 ms / 100) 5.020 -> 5.025 ( +0.10%) [ +0.24% +0.04% +0.00% / +0.10% +0.20% +0.42%] index_select skip256 : Elapsed 0.050 ms (5.032 ms / 100) 5.057 -> 5.056 ( -0.02%) [ +0.10% +0.20% +0.00% / -0.02% +0.28% +0.20%] index_select spread : Elapsed 0.051 ms (5.062 ms / 100) 5.061 -> 5.063 ( +0.04%) [ +0.10% +0.08% +0.00% / +0.04% +0.41% +0.38%] index_select strided 3 : Elapsed 0.051 ms (5.066 ms / 100) 5.087 -> 5.089 ( +0.04%) [ +0.10% +0.08% +0.00% / +0.04% +0.08% +0.18%] index_select random : Elapsed 0.051 ms (5.092 ms / 100) 5.059 -> 5.064 ( +0.10%) [ +0.00% +0.14% +0.02% / +0.10% +0.43% +0.32%] index_select random_sorted : Elapsed 0.051 ms (5.059 ms / 100) B = [20, 40, 16, 5] (stride (1, 100, 4000, 20)) A = [20, 40, 4, 5] (stride (1, 20, 4000, 800)) dim = 2 2.547 -> 2.549 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.55% +0.27%] index_add_ linear : Elapsed 0.026 ms (2.551 ms / 100) 2.500 -> 2.500 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.12% +0.20%] index_copy_ linear : Elapsed 0.025 ms (2.502 ms / 100) 2.549 -> 2.548 ( -0.04%) [ +0.16% +0.04% +0.00% / -0.04% +0.04% +0.12%] index_add_ reverse : Elapsed 0.026 ms (2.553 ms / 100) 2.499 -> 2.502 ( +0.12%) [ +0.16% +0.12% +0.00% / +0.16% +0.12% +0.20%] index_copy_ reverse : Elapsed 0.025 ms (2.503 ms / 100) 2.553 -> 2.545 ( -0.31%) [ +0.00% +0.04% +0.12% / +0.00% -0.16% -0.31%] index_add_ spread : Elapsed 0.026 ms (2.553 ms / 100) 2.504 -> 2.497 ( -0.28%) [ +0.00% +0.12% +0.16% / -0.04% -0.28% -0.24%] index_copy_ spread : Elapsed 0.025 ms (2.504 ms / 100) 2.550 -> 2.548 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.00% +0.12%] index_add_ strided 3 : Elapsed 0.025 ms (2.550 ms / 100) 2.500 -> 2.502 ( +0.08%) [ +0.04% +0.00% +0.12% / +0.08% +0.20% +0.12%] index_copy_ strided 3 : Elapsed 0.025 ms (2.501 ms / 100) 2.553 -> 2.552 ( -0.04%) [ +0.00% +0.00% +0.08% / +0.08% -0.04% +0.04%] index_add_ strided 5 : Elapsed 0.026 ms (2.553 ms / 100) 2.503 -> 2.502 ( -0.04%) [ +0.00% +0.32% +0.04% / +0.04% +0.20% -0.04%] index_copy_ strided 5 : Elapsed 0.025 ms (2.503 ms / 100) 2.554 -> 2.553 ( -0.04%) [ +0.08% +0.00% +0.04% / +0.00% -0.04% +0.12%] index_add_ strided 7 : Elapsed 0.026 ms (2.556 ms / 100) 2.505 -> 2.504 ( -0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.00% -0.04%] index_copy_ strided 7 : Elapsed 0.025 ms (2.506 ms / 100) 2.549 -> 2.551 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.16% +0.08% +0.27%] index_add_ perm : Elapsed 0.026 ms (2.550 ms / 100) 2.501 -> 2.499 ( -0.08%) [ +0.00% +0.08% +0.04% / +0.16% -0.08% -0.08%] index_copy_ perm : Elapsed 0.025 ms (2.501 ms / 100) 2.549 -> 2.551 ( +0.08%) [ +0.20% +0.00% +0.12% / +0.08% +0.16% +0.16%] index_add_ perm_sorted : Elapsed 0.026 ms (2.554 ms / 100) 2.500 -> 2.500 ( +0.00%) [ +0.20% +0.00% +0.04% / +0.00% +0.20% +0.24%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.505 ms / 100) 5.746 -> 5.752 ( +0.10%) [ +0.00% +0.12% +0.14% / +0.10% +0.59% +0.35%] index_select const : Elapsed 0.057 ms (5.746 ms / 100) 5.752 -> 5.759 ( +0.12%) [ +0.19% +0.23% +0.00% / +0.12% +0.21% +0.23%] index_select wrap : Elapsed 0.058 ms (5.763 ms / 100) 5.752 -> 5.752 ( +0.00%) [ +0.03% +0.09% +0.00% / +0.00% +0.17% +0.28%] index_select linear : Elapsed 0.058 ms (5.754 ms / 100) 5.753 -> 5.750 ( -0.05%) [ +0.05% +0.03% +0.00% / -0.05% +0.31% +0.37%] index_select reverse : Elapsed 0.058 ms (5.756 ms / 100) 5.753 -> 5.755 ( +0.03%) [ +0.16% +0.24% +0.00% / +0.03% +0.50% +0.45%] index_select skip64 : Elapsed 0.058 ms (5.762 ms / 100) 5.749 -> 5.745 ( -0.07%) [ +0.19% +0.14% +0.00% / -0.07% +0.54% +0.40%] index_select skip256 : Elapsed 0.058 ms (5.760 ms / 100) 5.746 -> 5.745 ( -0.02%) [ +0.05% +0.00% +0.03% / -0.02% +0.40% +0.24%] index_select spread : Elapsed 0.057 ms (5.749 ms / 100) 5.747 -> 5.756 ( +0.16%) [ +0.05% +0.12% +0.00% / +0.16% +0.52% +0.31%] index_select strided 3 : Elapsed 0.058 ms (5.750 ms / 100) 5.754 -> 5.761 ( +0.12%) [ +0.16% +0.17% +0.00% / +0.12% +0.40% +0.40%] index_select random : Elapsed 0.058 ms (5.763 ms / 100) 5.743 -> 5.747 ( +0.07%) [ +0.14% +0.00% +0.16% / +0.07% +0.37% +0.45%] index_select random_sorted : Elapsed 0.058 ms (5.751 ms / 100) out_shape = [20, 40, 4, 16] in_shape = [20, 40, 4, 5] idx_dim = 3 B = [20, 40, 4, 16] (stride (2560, 64, 1, 4)) A = [20, 40, 4, 5] (stride (20, 400, 1, 4)) dim = 3 2.282 -> 2.281 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +1.01% +0.79%] index_add_ linear : Elapsed 0.023 ms (2.282 ms / 100) 2.231 -> 2.234 ( +0.13%) [ +0.00% +0.00% +0.27% / +0.13% +0.94% +0.99%] index_copy_ linear : Elapsed 0.022 ms (2.231 ms / 100) 2.280 -> 2.290 ( +0.44%) [ +0.18% +0.22% +0.00% / +0.44% +0.79% +0.92%] index_add_ reverse : Elapsed 0.023 ms (2.284 ms / 100) 2.222 -> 2.229 ( +0.32%) [ +0.36% +0.00% +0.18% / +0.32% +0.90% +1.31%] index_copy_ reverse : Elapsed 0.022 ms (2.230 ms / 100) 2.301 -> 2.300 ( -0.04%) [ +0.09% +0.00% +0.13% / -0.04% +1.04% +1.17%] index_add_ spread : Elapsed 0.023 ms (2.303 ms / 100) 2.269 -> 2.263 ( -0.26%) [ +0.22% +0.13% +0.00% / -0.26% +0.53% +0.93%] index_copy_ spread : Elapsed 0.023 ms (2.274 ms / 100) 2.294 -> 2.301 ( +0.31%) [ +0.04% +0.09% +0.00% / +0.31% +1.39% +1.57%] index_add_ strided 3 : Elapsed 0.023 ms (2.295 ms / 100) 2.252 -> 2.264 ( +0.53%) [ +0.27% +0.18% +0.00% / +0.53% +1.02% +1.47%] index_copy_ strided 3 : Elapsed 0.023 ms (2.258 ms / 100) 2.287 -> 2.281 ( -0.26%) [ +0.61% +0.09% +0.00% / -0.26% +0.83% +1.22%] index_add_ strided 5 : Elapsed 0.023 ms (2.301 ms / 100) 2.233 -> 2.235 ( +0.09%) [ +0.27% +0.00% +0.18% / +0.09% +1.03% +1.16%] index_copy_ strided 5 : Elapsed 0.022 ms (2.239 ms / 100) 2.281 -> 2.287 ( +0.26%) [ +0.44% +0.31% +0.00% / +0.26% +1.27% +1.27%] index_add_ strided 7 : Elapsed 0.023 ms (2.291 ms / 100) 2.257 -> 2.253 ( -0.18%) [ +0.04% +0.00% +0.04% / -0.18% +0.75% +0.97%] index_copy_ strided 7 : Elapsed 0.023 ms (2.258 ms / 100) 2.290 -> 2.293 ( +0.13%) [ +0.17% +0.22% +0.00% / +0.13% +1.27% +1.44%] index_add_ perm : Elapsed 0.023 ms (2.294 ms / 100) 2.248 -> 2.251 ( +0.13%) [ +0.44% +0.27% +0.00% / +0.13% +0.89% +1.20%] index_copy_ perm : Elapsed 0.023 ms (2.258 ms / 100) 2.280 -> 2.288 ( +0.35%) [ +0.35% +0.26% +0.00% / +0.35% +1.01% +0.96%] index_add_ perm_sorted : Elapsed 0.023 ms (2.288 ms / 100) 2.242 -> 2.248 ( +0.27%) [ +0.09% +0.31% +0.00% / +0.27% +1.16% +0.98%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.244 ms / 100) 4.673 -> 4.688 ( +0.32%) [ +0.11% +0.00% +0.30% / +0.32% +1.03% +0.86%] index_select const : Elapsed 0.047 ms (4.678 ms / 100) 4.706 -> 4.715 ( +0.19%) [ +0.15% +0.00% +0.32% / +0.19% +1.04% +0.68%] index_select wrap : Elapsed 0.047 ms (4.713 ms / 100) 4.728 -> 4.726 ( -0.04%) [ +0.00% +0.21% +0.30% / -0.04% +1.12% +0.82%] index_select linear : Elapsed 0.047 ms (4.728 ms / 100) 4.707 -> 4.709 ( +0.04%) [ +0.00% +0.38% +0.23% / +0.04% +0.98% +0.85%] index_select reverse : Elapsed 0.047 ms (4.707 ms / 100) 4.663 -> 4.669 ( +0.13%) [ +0.00% +0.17% +0.24% / +0.13% +0.88% +0.88%] index_select skip64 : Elapsed 0.047 ms (4.663 ms / 100) 4.685 -> 4.675 ( -0.21%) [ +0.00% +0.02% +0.02% / -0.21% +0.70% +0.51%] index_select skip256 : Elapsed 0.047 ms (4.685 ms / 100) 4.704 -> 4.701 ( -0.06%) [ +0.00% +0.17% +0.09% / -0.06% +0.74% +0.72%] index_select spread : Elapsed 0.047 ms (4.704 ms / 100) 4.721 -> 4.730 ( +0.19%) [ +0.11% +0.17% +0.00% / +0.19% +0.57% +0.80%] index_select strided 3 : Elapsed 0.047 ms (4.726 ms / 100) 4.716 -> 4.725 ( +0.19%) [ +0.42% +0.00% +0.08% / +0.19% +0.83% +0.83%] index_select random : Elapsed 0.047 ms (4.736 ms / 100) 4.715 -> 4.718 ( +0.06%) [ +0.00% +0.08% +0.04% / +0.06% +0.98% +0.81%] index_select random_sorted : Elapsed 0.047 ms (4.715 ms / 100) B = [20, 40, 4, 16] (stride (2560, 16, 640, 1)) A = [20, 40, 4, 5] (stride (20, 400, 5, 1)) dim = 3 2.459 -> 2.459 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.33% +0.20%] index_add_ linear : Elapsed 0.025 ms (2.462 ms / 100) 2.411 -> 2.420 ( +0.37%) [ +0.37% +0.37% +0.00% / +0.37% +0.54% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.420 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.04% +0.29% +0.00% / +0.16% +0.49% +0.45%] index_add_ reverse : Elapsed 0.024 ms (2.447 ms / 100) 2.403 -> 2.404 ( +0.04%) [ +0.21% +0.33% +0.00% / +0.04% +0.29% +0.54%] index_copy_ reverse : Elapsed 0.024 ms (2.408 ms / 100) 2.476 -> 2.473 ( -0.12%) [ +0.00% +0.04% +0.08% / -0.12% +0.65% +0.40%] index_add_ spread : Elapsed 0.025 ms (2.476 ms / 100) 2.477 -> 2.474 ( -0.12%) [ +0.00% +0.08% +0.12% / -0.12% +0.24% +0.20%] index_copy_ spread : Elapsed 0.025 ms (2.477 ms / 100) 2.490 -> 2.489 ( -0.04%) [ +0.00% +0.12% +0.00% / -0.04% +0.36% +0.20%] index_add_ strided 3 : Elapsed 0.025 ms (2.490 ms / 100) 2.483 -> 2.488 ( +0.20%) [ +0.04% +0.24% +0.00% / +0.24% +0.32% +0.20%] index_copy_ strided 3 : Elapsed 0.025 ms (2.484 ms / 100) 2.476 -> 2.478 ( +0.08%) [ +0.00% +0.12% +0.04% / +0.08% +0.53% +0.20%] index_add_ strided 5 : Elapsed 0.025 ms (2.476 ms / 100) 2.474 -> 2.478 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.20% +0.28% +0.16%] index_copy_ strided 5 : Elapsed 0.025 ms (2.476 ms / 100) 2.473 -> 2.474 ( +0.04%) [ +0.00% +0.20% +0.08% / +0.04% +0.40% +0.53%] index_add_ strided 7 : Elapsed 0.025 ms (2.473 ms / 100) 2.475 -> 2.472 ( -0.12%) [ +0.04% +0.16% +0.00% / -0.04% +0.04% -0.12%] index_copy_ strided 7 : Elapsed 0.025 ms (2.476 ms / 100) 2.486 -> 2.488 ( +0.08%) [ +0.12% +0.00% +0.16% / +0.08% +0.48% +0.60%] index_add_ perm : Elapsed 0.025 ms (2.489 ms / 100) 2.488 -> 2.484 ( -0.16%) [ +0.04% +0.00% +0.00% / -0.16% +0.12% +0.32%] index_copy_ perm : Elapsed 0.025 ms (2.489 ms / 100) 2.472 -> 2.481 ( +0.36%) [ +0.24% +0.00% +0.16% / +0.36% +0.85% +0.57%] index_add_ perm_sorted : Elapsed 0.025 ms (2.478 ms / 100) 2.470 -> 2.476 ( +0.24%) [ +0.04% +0.00% +0.16% / +0.24% +0.36% +0.32%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.471 ms / 100) 5.265 -> 5.279 ( +0.27%) [ +0.09% +0.00% +0.19% / +0.27% +0.76% +0.82%] index_select const : Elapsed 0.053 ms (5.270 ms / 100) 5.219 -> 5.230 ( +0.21%) [ +0.10% +0.11% +0.00% / +0.21% +0.65% +0.59%] index_select wrap : Elapsed 0.052 ms (5.224 ms / 100) 5.224 -> 5.247 ( +0.44%) [ +0.00% +0.50% +0.46% / +0.44% +0.80% +0.77%] index_select linear : Elapsed 0.052 ms (5.224 ms / 100) 5.216 -> 5.221 ( +0.10%) [ +0.17% +0.00% +0.04% / +0.10% +0.61% +0.56%] index_select reverse : Elapsed 0.052 ms (5.225 ms / 100) 5.212 -> 5.215 ( +0.06%) [ +0.21% +0.04% +0.00% / +0.06% +0.63% +0.65%] index_select skip64 : Elapsed 0.052 ms (5.223 ms / 100) 5.208 -> 5.226 ( +0.35%) [ +0.23% +0.10% +0.00% / +0.35% +0.67% +0.58%] index_select skip256 : Elapsed 0.052 ms (5.220 ms / 100) 5.265 -> 5.253 ( -0.23%) [ +0.19% +0.09% +0.00% / -0.23% +0.57% +0.47%] index_select spread : Elapsed 0.053 ms (5.275 ms / 100) 5.221 -> 5.224 ( +0.06%) [ +0.15% +0.15% +0.00% / +0.06% +0.46% +0.44%] index_select strided 3 : Elapsed 0.052 ms (5.229 ms / 100) 5.222 -> 5.223 ( +0.02%) [ +0.08% +0.00% +0.21% / +0.02% +0.75% +0.80%] index_select random : Elapsed 0.052 ms (5.226 ms / 100) 5.260 -> 5.252 ( -0.15%) [ +0.02% +0.08% +0.00% / -0.15% +0.57% +0.42%] index_select random_sorted : Elapsed 0.053 ms (5.261 ms / 100) B = [20, 40, 4, 16] (stride (2560, 1, 640, 40)) dim = 3 fill_cnt = 5 1.084 -> 1.085 ( +0.09%) [ +0.18% +0.00% +0.09% / +0.09% +0.65% +0.83%] index_fill_ const : Elapsed 0.011 ms (1.086 ms / 100) 1.107 -> 1.110 ( +0.27%) [ +0.00% +0.45% +0.45% / +0.27% +0.45% +0.63%] index_fill_ linear : Elapsed 0.011 ms (1.107 ms / 100) 1.110 -> 1.111 ( +0.09%) [ +0.00% +0.27% +0.09% / +0.09% +0.45% +0.09%] index_fill_ reverse : Elapsed 0.011 ms (1.110 ms / 100) 1.084 -> 1.083 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.09% +0.46% +0.74%] index_fill_ skip64 : Elapsed 0.011 ms (1.085 ms / 100) 1.082 -> 1.083 ( +0.09%) [ +0.28% +0.18% +0.00% / +0.09% +0.92% +0.74%] index_fill_ skip256 : Elapsed 0.011 ms (1.085 ms / 100) 1.118 -> 1.119 ( +0.09%) [ +0.09% +0.00% +0.36% / +0.09% +0.81% +0.89%] index_fill_ spread : Elapsed 0.011 ms (1.119 ms / 100) 1.118 -> 1.118 ( +0.00%) [ +0.27% +0.27% +0.00% / +0.00% +0.81% +0.81%] index_fill_ strided 3 : Elapsed 0.011 ms (1.121 ms / 100) 1.109 -> 1.111 ( +0.18%) [ +0.00% +0.09% +0.27% / +0.18% +1.08% +0.72%] index_fill_ strided 5 : Elapsed 0.011 ms (1.109 ms / 100) 1.114 -> 1.116 ( +0.18%) [ +0.27% +0.00% +0.00% / +0.18% +0.72% +0.27%] index_fill_ strided 7 : Elapsed 0.011 ms (1.117 ms / 100) 1.091 -> 1.089 ( -0.18%) [ +0.00% +0.18% +0.00% / -0.18% +0.64% +0.37%] index_fill_ strided 8 : Elapsed 0.011 ms (1.091 ms / 100) 1.107 -> 1.107 ( +0.00%) [ +0.27% +0.00% +0.09% / +0.00% +0.72% +0.72%] index_fill_ random : Elapsed 0.011 ms (1.110 ms / 100) 1.107 -> 1.108 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.72% +0.54%] index_fill_ random_sorted : Elapsed 0.011 ms (1.108 ms / 100) 1.107 -> 1.109 ( +0.18%) [ +0.00% +0.00% +0.00% / +0.18% +0.27% +0.27%] index_fill_ perm : Elapsed 0.011 ms (1.107 ms / 100) 1.107 -> 1.107 ( +0.00%) [ +0.54% +0.00% +0.09% / +0.09% +0.27% +0.00%] index_fill_ perm_sorted : Elapsed 0.011 ms (1.113 ms / 100) B = [20, 40, 4, 16] (stride (2560, 4, 1, 160)) A = [20, 40, 4, 5] (stride (1, 20, 4000, 800)) dim = 3 2.308 -> 2.310 ( +0.09%) [ +0.00% +0.04% +0.17% / +0.09% +0.35% +0.26%] index_add_ linear : Elapsed 0.023 ms (2.308 ms / 100) 2.242 -> 2.242 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.40% +0.54%] index_copy_ linear : Elapsed 0.022 ms (2.243 ms / 100) 2.302 -> 2.311 ( +0.39%) [ +0.13% +0.17% +0.00% / +0.39% +0.65% +0.39%] index_add_ reverse : Elapsed 0.023 ms (2.305 ms / 100) 2.236 -> 2.239 ( +0.13%) [ +0.09% +0.13% +0.00% / +0.13% +0.49% +0.45%] index_copy_ reverse : Elapsed 0.022 ms (2.238 ms / 100) 2.321 -> 2.321 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.65% +0.30%] index_add_ spread : Elapsed 0.023 ms (2.325 ms / 100) 2.246 -> 2.247 ( +0.04%) [ +0.27% +0.00% +0.09% / +0.04% +0.76% +0.76%] index_copy_ spread : Elapsed 0.023 ms (2.252 ms / 100) 2.319 -> 2.322 ( +0.13%) [ +0.13% +0.00% +0.17% / +0.13% +0.82% +1.03%] index_add_ strided 3 : Elapsed 0.023 ms (2.322 ms / 100) 2.249 -> 2.249 ( +0.00%) [ +0.00% +0.13% +0.09% / +0.00% +0.76% +0.98%] index_copy_ strided 3 : Elapsed 0.022 ms (2.249 ms / 100) 2.303 -> 2.306 ( +0.13%) [ +0.30% +0.00% +0.13% / +0.13% +0.65% +0.69%] index_add_ strided 5 : Elapsed 0.023 ms (2.310 ms / 100) 2.237 -> 2.241 ( +0.18%) [ +0.13% +0.13% +0.00% / +0.18% +0.40% +0.54%] index_copy_ strided 5 : Elapsed 0.022 ms (2.240 ms / 100) 2.305 -> 2.311 ( +0.26%) [ +0.00% +0.30% +0.09% / +0.26% +0.87% +0.74%] index_add_ strided 7 : Elapsed 0.023 ms (2.305 ms / 100) 2.242 -> 2.240 ( -0.09%) [ +0.00% +0.18% +0.04% / -0.09% +0.89% +0.85%] index_copy_ strided 7 : Elapsed 0.022 ms (2.242 ms / 100) 2.319 -> 2.321 ( +0.09%) [ +0.26% +0.04% +0.00% / +0.09% +0.78% +0.82%] index_add_ perm : Elapsed 0.023 ms (2.325 ms / 100) 2.250 -> 2.251 ( +0.04%) [ +0.13% +0.18% +0.00% / +0.04% +0.76% +0.62%] index_copy_ perm : Elapsed 0.023 ms (2.253 ms / 100) 2.321 -> 2.323 ( +0.09%) [ +0.00% +0.17% +0.00% / +0.09% +0.47% +0.26%] index_add_ perm_sorted : Elapsed 0.023 ms (2.321 ms / 100) 2.245 -> 2.252 ( +0.31%) [ +0.00% +0.22% +0.04% / +0.31% +0.40% +0.40%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.245 ms / 100) 4.920 -> 4.920 ( +0.00%) [ +0.04% +0.06% +0.00% / +0.00% +0.41% +0.49%] index_select const : Elapsed 0.049 ms (4.922 ms / 100) 4.883 -> 4.869 ( -0.29%) [ +0.12% +0.00% +0.06% / -0.29% +0.61% +0.68%] index_select wrap : Elapsed 0.049 ms (4.889 ms / 100) 4.933 -> 4.932 ( -0.02%) [ +0.00% +0.04% +0.04% / -0.02% +0.53% +0.63%] index_select linear : Elapsed 0.049 ms (4.933 ms / 100) 4.920 -> 4.923 ( +0.06%) [ +0.00% +0.00% +0.04% / +0.06% +0.63% +0.28%] index_select reverse : Elapsed 0.049 ms (4.920 ms / 100) 4.889 -> 4.890 ( +0.02%) [ +0.10% +0.00% +0.20% / +0.02% +0.67% +0.65%] index_select skip64 : Elapsed 0.049 ms (4.894 ms / 100) 4.911 -> 4.905 ( -0.12%) [ +0.29% +0.00% +0.20% / -0.12% +0.31% +0.69%] index_select skip256 : Elapsed 0.049 ms (4.925 ms / 100) 4.925 -> 4.927 ( +0.04%) [ +0.00% +0.02% +0.06% / +0.04% +0.79% +0.75%] index_select spread : Elapsed 0.049 ms (4.925 ms / 100) 4.930 -> 4.929 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.55% +0.65%] index_select strided 3 : Elapsed 0.049 ms (4.930 ms / 100) 4.916 -> 4.915 ( -0.02%) [ +0.24% +0.00% +0.20% / -0.02% +0.96% +1.04%] index_select random : Elapsed 0.049 ms (4.928 ms / 100) 4.903 -> 4.902 ( -0.02%) [ +0.20% +0.12% +0.00% / -0.02% +0.59% +0.61%] index_select random_sorted : Elapsed 0.049 ms (4.913 ms / 100) B = [20, 40, 4, 16] (stride (64, 1280, 16, 1)) A = [20, 40, 4, 5] (stride (1, 400, 100, 20)) dim = 3 2.428 -> 2.428 ( +0.00%) [ +0.04% +0.25% +0.00% / +0.00% +0.45% +0.58%] index_add_ linear : Elapsed 0.024 ms (2.429 ms / 100) 2.367 -> 2.371 ( +0.17%) [ +0.25% +0.21% +0.00% / +0.17% +0.51% +0.42%] index_copy_ linear : Elapsed 0.024 ms (2.373 ms / 100) 2.426 -> 2.428 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.37% +0.37%] index_add_ reverse : Elapsed 0.024 ms (2.429 ms / 100) 2.368 -> 2.370 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.21% +0.25%] index_copy_ reverse : Elapsed 0.024 ms (2.369 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.20% +0.28% +0.00% / +0.00% +0.28% +0.33%] index_add_ spread : Elapsed 0.025 ms (2.465 ms / 100) 2.441 -> 2.439 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.04% -0.08% +0.04%] index_copy_ spread : Elapsed 0.024 ms (2.441 ms / 100) 2.461 -> 2.461 ( +0.00%) [ +0.00% +0.16% +0.12% / +0.00% +0.33% +0.24%] index_add_ strided 3 : Elapsed 0.025 ms (2.461 ms / 100) 2.437 -> 2.440 ( +0.12%) [ +0.16% +0.16% +0.00% / +0.12% +0.25% +0.25%] index_copy_ strided 3 : Elapsed 0.024 ms (2.441 ms / 100) 2.458 -> 2.457 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.37% +0.33%] index_add_ strided 5 : Elapsed 0.025 ms (2.459 ms / 100) 2.436 -> 2.437 ( +0.04%) [ +0.00% +0.16% +0.33% / +0.04% +0.29% +0.16%] index_copy_ strided 5 : Elapsed 0.024 ms (2.436 ms / 100) 2.455 -> 2.461 ( +0.24%) [ +0.08% +0.08% +0.00% / +0.24% +0.49% +0.37%] index_add_ strided 7 : Elapsed 0.025 ms (2.457 ms / 100) 2.433 -> 2.436 ( +0.12%) [ +0.00% +0.16% +0.08% / +0.12% +0.25% +0.12%] index_copy_ strided 7 : Elapsed 0.024 ms (2.433 ms / 100) 2.458 -> 2.461 ( +0.12%) [ +0.20% +0.00% +0.12% / +0.12% +0.33% +0.12%] index_add_ perm : Elapsed 0.025 ms (2.463 ms / 100) 2.440 -> 2.437 ( -0.12%) [ +0.04% +0.00% +0.04% / +0.00% +0.12% -0.12%] index_copy_ perm : Elapsed 0.024 ms (2.441 ms / 100) 2.460 -> 2.465 ( +0.20%) [ +0.00% +0.20% +0.04% / +0.20% +0.28% +0.41%] index_add_ perm_sorted : Elapsed 0.025 ms (2.460 ms / 100) 2.435 -> 2.441 ( +0.25%) [ +0.00% +0.12% +0.04% / +0.25% +0.33% +0.41%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.435 ms / 100) 5.219 -> 5.212 ( -0.13%) [ +0.10% +0.06% +0.00% / -0.13% +0.63% +0.59%] index_select const : Elapsed 0.052 ms (5.224 ms / 100) 5.200 -> 5.205 ( +0.10%) [ +0.10% +0.00% +0.15% / +0.10% +0.40% +0.58%] index_select wrap : Elapsed 0.052 ms (5.205 ms / 100) 5.229 -> 5.240 ( +0.21%) [ +0.13% +0.08% +0.00% / +0.21% +0.63% +0.59%] index_select linear : Elapsed 0.052 ms (5.236 ms / 100) 5.216 -> 5.221 ( +0.10%) [ +0.17% +0.21% +0.00% / +0.10% +0.44% +0.59%] index_select reverse : Elapsed 0.052 ms (5.225 ms / 100) 5.207 -> 5.208 ( +0.02%) [ +0.12% +0.06% +0.00% / +0.02% +0.61% +0.61%] index_select skip64 : Elapsed 0.052 ms (5.213 ms / 100) 5.208 -> 5.208 ( +0.00%) [ +0.00% +0.08% +0.02% / +0.00% +0.54% +0.63%] index_select skip256 : Elapsed 0.052 ms (5.208 ms / 100) 5.211 -> 5.219 ( +0.15%) [ +0.00% +0.10% +0.12% / +0.15% +0.33% +0.54%] index_select spread : Elapsed 0.052 ms (5.211 ms / 100) 5.204 -> 5.194 ( -0.19%) [ +0.02% +0.04% +0.00% / -0.19% +0.40% +0.31%] index_select strided 3 : Elapsed 0.052 ms (5.205 ms / 100) 5.203 -> 5.211 ( +0.15%) [ +0.02% +0.12% +0.00% / +0.15% +0.63% +0.75%] index_select random : Elapsed 0.052 ms (5.204 ms / 100) 5.193 -> 5.192 ( -0.02%) [ +0.00% +0.08% +0.08% / -0.02% +0.71% +0.64%] index_select random_sorted : Elapsed 0.052 ms (5.193 ms / 100) B = [20, 40, 4, 16] (stride (16, 1280, 320, 1)) A = [20, 40, 4, 5] (stride (800, 20, 1, 4)) dim = 3 0.918 -> 0.911 ( -0.76%) [ +0.00% +0.11% +0.33% / +0.22% -0.76% -0.11%] index_add_ linear : Elapsed 0.009 ms (0.918 ms / 100) 0.925 -> 0.924 ( -0.11%) [ +0.32% +0.32% +0.00% / -0.11% +0.97% +1.62%] index_copy_ linear : Elapsed 0.009 ms (0.928 ms / 100) 0.916 -> 0.912 ( -0.44%) [ +0.00% +0.44% +0.33% / +0.33% +0.00% -0.44%] index_add_ reverse : Elapsed 0.009 ms (0.916 ms / 100) 0.928 -> 0.926 ( -0.22%) [ +0.43% +0.00% +0.00% / -0.22% +1.19% +0.54%] index_copy_ reverse : Elapsed 0.009 ms (0.932 ms / 100) 0.943 -> 0.939 ( -0.42%) [ +0.11% +0.74% +0.00% / +0.11% -0.32% -0.42%] index_add_ spread : Elapsed 0.009 ms (0.944 ms / 100) 0.955 -> 0.954 ( -0.10%) [ +0.00% +0.21% +0.00% / -0.10% +0.42% +0.63%] index_copy_ spread : Elapsed 0.010 ms (0.955 ms / 100) 0.941 -> 0.935 ( -0.64%) [ +0.32% +0.11% +0.00% / +0.00% -0.64% -0.11%] index_add_ strided 3 : Elapsed 0.009 ms (0.944 ms / 100) 0.951 -> 0.956 ( +0.53%) [ +0.32% +0.00% +0.63% / +0.53% +0.84% +0.95%] index_copy_ strided 3 : Elapsed 0.010 ms (0.954 ms / 100) 0.945 -> 0.939 ( -0.63%) [ +0.00% +0.11% +0.00% / -0.11% -0.53% -0.63%] index_add_ strided 5 : Elapsed 0.009 ms (0.945 ms / 100) 0.955 -> 0.957 ( +0.21%) [ +0.00% +0.00% +0.21% / +0.21% +0.73% +0.94%] index_copy_ strided 5 : Elapsed 0.010 ms (0.955 ms / 100) 0.940 -> 0.938 ( -0.21%) [ +0.00% +0.53% +0.43% / +0.43% -0.21% +0.11%] index_add_ strided 7 : Elapsed 0.009 ms (0.940 ms / 100) 0.950 -> 0.955 ( +0.53%) [ +0.00% +0.63% +0.63% / +0.53% +1.16% +1.26%] index_copy_ strided 7 : Elapsed 0.010 ms (0.950 ms / 100) 0.945 -> 0.934 ( -1.16%) [ +0.00% +0.21% +0.53% / +0.63% -1.16% -0.85%] index_add_ perm : Elapsed 0.009 ms (0.945 ms / 100) 0.951 -> 0.950 ( -0.11%) [ +0.32% +0.42% +0.00% / -0.11% +0.63% +0.63%] index_copy_ perm : Elapsed 0.010 ms (0.954 ms / 100) 0.943 -> 0.937 ( -0.64%) [ +0.00% +0.32% +0.32% / +0.42% -0.11% -0.64%] index_add_ perm_sorted : Elapsed 0.009 ms (0.943 ms / 100) 0.951 -> 0.952 ( +0.11%) [ +0.00% +0.32% +0.53% / +0.11% +0.84% +1.05%] index_copy_ perm_sorted : Elapsed 0.010 ms (0.951 ms / 100) 1.765 -> 1.772 ( +0.40%) [ +0.28% +0.06% +0.00% / +0.40% +0.74% +0.85%] index_select const : Elapsed 0.018 ms (1.770 ms / 100) 1.769 -> 1.765 ( -0.23%) [ +0.17% +0.06% +0.00% / -0.23% +0.51% +0.62%] index_select wrap : Elapsed 0.018 ms (1.772 ms / 100) 1.764 -> 1.766 ( +0.11%) [ +0.00% +0.23% +0.34% / +0.11% +0.79% +0.79%] index_select linear : Elapsed 0.018 ms (1.764 ms / 100) 1.764 -> 1.768 ( +0.23%) [ +0.23% +0.00% +0.17% / +0.23% +0.79% +0.57%] index_select reverse : Elapsed 0.018 ms (1.768 ms / 100) 1.767 -> 1.770 ( +0.17%) [ +0.00% +0.23% +0.06% / +0.17% +0.57% +0.62%] index_select skip64 : Elapsed 0.018 ms (1.767 ms / 100) 1.767 -> 1.768 ( +0.06%) [ +0.11% +0.00% +0.06% / +0.06% +0.45% +0.79%] index_select skip256 : Elapsed 0.018 ms (1.769 ms / 100) 1.765 -> 1.765 ( +0.00%) [ +0.00% +0.11% +0.06% / +0.00% +0.51% +0.74%] index_select spread : Elapsed 0.018 ms (1.765 ms / 100) 1.768 -> 1.768 ( +0.00%) [ +0.00% +0.17% +0.11% / +0.00% +0.51% +0.90%] index_select strided 3 : Elapsed 0.018 ms (1.768 ms / 100) 1.764 -> 1.766 ( +0.11%) [ +0.34% +0.00% +0.11% / +0.11% +0.91% +0.85%] index_select random : Elapsed 0.018 ms (1.770 ms / 100) 1.754 -> 1.761 ( +0.40%) [ +0.57% +0.40% +0.00% / +0.40% +2.05% +1.43%] index_select random_sorted : Elapsed 0.018 ms (1.764 ms / 100) B = [20, 40, 4, 16] (stride (1, 1280, 320, 20)) A = [20, 40, 4, 5] (stride (200, 1, 4000, 40)) dim = 3 2.263 -> 2.266 ( +0.13%) [ +0.00% +0.09% +0.09% / +0.13% +0.44% +0.71%] index_add_ linear : Elapsed 0.023 ms (2.263 ms / 100) 2.216 -> 2.218 ( +0.09%) [ +0.05% +0.00% +0.09% / +0.09% +0.54% +0.86%] index_copy_ linear : Elapsed 0.022 ms (2.217 ms / 100) 2.274 -> 2.274 ( +0.00%) [ +0.00% +0.18% +0.09% / +0.00% +0.57% +0.57%] index_add_ reverse : Elapsed 0.023 ms (2.274 ms / 100) 2.224 -> 2.226 ( +0.09%) [ +0.00% +0.04% +0.04% / +0.09% +0.67% +0.67%] index_copy_ reverse : Elapsed 0.022 ms (2.224 ms / 100) 2.266 -> 2.260 ( -0.26%) [ +0.26% +0.22% +0.00% / -0.26% +0.35% +0.35%] index_add_ spread : Elapsed 0.023 ms (2.272 ms / 100) 2.228 -> 2.226 ( -0.09%) [ +0.13% +0.31% +0.00% / -0.09% +0.13% +0.45%] index_copy_ spread : Elapsed 0.022 ms (2.231 ms / 100) 2.260 -> 2.263 ( +0.13%) [ +0.35% +0.00% +0.04% / +0.13% +0.31% +0.49%] index_add_ strided 3 : Elapsed 0.023 ms (2.268 ms / 100) 2.223 -> 2.226 ( +0.13%) [ +0.00% +0.09% +0.09% / +0.13% +0.45% +0.36%] index_copy_ strided 3 : Elapsed 0.022 ms (2.223 ms / 100) 2.276 -> 2.276 ( +0.00%) [ +0.18% +0.18% +0.00% / +0.00% +0.31% +0.31%] index_add_ strided 5 : Elapsed 0.023 ms (2.280 ms / 100) 2.228 -> 2.238 ( +0.45%) [ +0.27% +0.00% +0.40% / +0.45% +0.58% +0.99%] index_copy_ strided 5 : Elapsed 0.022 ms (2.234 ms / 100) 2.272 -> 2.271 ( -0.04%) [ +0.00% +0.04% +0.44% / -0.04% +0.09% +0.48%] index_add_ strided 7 : Elapsed 0.023 ms (2.272 ms / 100) 2.235 -> 2.238 ( +0.13%) [ +0.22% +0.04% +0.00% / +0.18% +0.13% +0.31%] index_copy_ strided 7 : Elapsed 0.022 ms (2.240 ms / 100) 2.260 -> 2.270 ( +0.44%) [ +0.27% +0.49% +0.00% / +0.44% +0.44% +0.71%] index_add_ perm : Elapsed 0.023 ms (2.266 ms / 100) 2.230 -> 2.227 ( -0.13%) [ +0.09% +0.00% +0.04% / +0.00% -0.13% +0.18%] index_copy_ perm : Elapsed 0.022 ms (2.232 ms / 100) 2.265 -> 2.270 ( +0.22%) [ +0.18% +0.53% +0.00% / +0.22% +0.44% +0.57%] index_add_ perm_sorted : Elapsed 0.023 ms (2.269 ms / 100) 2.227 -> 2.232 ( +0.22%) [ +0.22% +0.22% +0.00% / +0.22% +0.67% +0.54%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.232 ms / 100) 4.570 -> 4.568 ( -0.04%) [ +0.35% +0.00% +0.07% / -0.04% +0.79% +0.70%] index_select const : Elapsed 0.046 ms (4.586 ms / 100) 4.646 -> 4.646 ( +0.00%) [ +0.06% +0.00% +0.02% / +0.00% +0.45% +0.52%] index_select wrap : Elapsed 0.046 ms (4.649 ms / 100) 4.639 -> 4.648 ( +0.19%) [ +0.19% +0.28% +0.00% / +0.19% +0.86% +0.80%] index_select linear : Elapsed 0.046 ms (4.648 ms / 100) 4.661 -> 4.666 ( +0.11%) [ +0.00% +0.19% +0.19% / +0.11% +0.64% +0.62%] index_select reverse : Elapsed 0.047 ms (4.661 ms / 100) 4.574 -> 4.572 ( -0.04%) [ +0.11% +0.04% +0.00% / -0.04% +0.61% +0.63%] index_select skip64 : Elapsed 0.046 ms (4.579 ms / 100) 4.571 -> 4.570 ( -0.02%) [ +0.20% +0.09% +0.00% / -0.02% +0.77% +0.68%] index_select skip256 : Elapsed 0.046 ms (4.580 ms / 100) 4.649 -> 4.656 ( +0.15%) [ +0.09% +0.09% +0.00% / +0.15% +0.92% +0.84%] index_select spread : Elapsed 0.047 ms (4.653 ms / 100) 4.640 -> 4.645 ( +0.11%) [ +0.00% +0.15% +0.09% / +0.11% +0.60% +0.73%] index_select strided 3 : Elapsed 0.046 ms (4.640 ms / 100) 4.648 -> 4.651 ( +0.06%) [ +0.11% +0.04% +0.00% / +0.06% +0.80% +0.69%] index_select random : Elapsed 0.047 ms (4.653 ms / 100) 4.649 -> 4.652 ( +0.06%) [ +0.00% +0.04% +0.02% / +0.06% +0.82% +0.86%] index_select random_sorted : Elapsed 0.046 ms (4.649 ms / 100) B = [20, 40, 4, 16] (stride (1, 320, 12800, 20)) A = [20, 40, 4, 5] (stride (800, 1, 200, 40)) dim = 3 2.348 -> 2.353 ( +0.21%) [ +0.00% +0.09% +0.09% / +0.21% +1.87% +2.04%] index_add_ linear : Elapsed 0.023 ms (2.348 ms / 100) 2.295 -> 2.300 ( +0.22%) [ +0.00% +0.22% +0.48% / +0.22% +2.18% +2.18%] index_copy_ linear : Elapsed 0.023 ms (2.295 ms / 100) 2.354 -> 2.405 ( +2.17%) [ +0.00% +0.00% +0.21% / +4.63% +2.17% +2.25%] index_add_ reverse : Elapsed 0.024 ms (2.354 ms / 100) 2.306 -> 2.311 ( +0.22%) [ +0.17% +0.00% +0.04% / +0.22% +2.08% +1.99%] index_copy_ reverse : Elapsed 0.023 ms (2.310 ms / 100) 2.329 -> 2.335 ( +0.26%) [ +0.00% +0.52% +0.39% / +0.26% +2.66% +2.75%] index_add_ spread : Elapsed 0.023 ms (2.329 ms / 100) 2.296 -> 2.298 ( +0.09%) [ +0.04% +0.00% +0.09% / +0.09% +2.31% +2.44%] index_copy_ spread : Elapsed 0.023 ms (2.297 ms / 100) 2.336 -> 2.332 ( -0.17%) [ +0.47% +0.00% +0.17% / -0.17% +2.14% +1.97%] index_add_ strided 3 : Elapsed 0.023 ms (2.347 ms / 100) 2.295 -> 2.299 ( +0.17%) [ +0.31% +0.22% +0.00% / +0.17% +2.00% +2.14%] index_copy_ strided 3 : Elapsed 0.023 ms (2.302 ms / 100) 2.344 -> 2.344 ( +0.00%) [ +0.30% +0.47% +0.00% / +0.00% +2.13% +2.35%] index_add_ strided 5 : Elapsed 0.024 ms (2.351 ms / 100) 2.303 -> 2.302 ( -0.04%) [ +0.13% +0.04% +0.00% / -0.04% +2.04% +2.34%] index_copy_ strided 5 : Elapsed 0.023 ms (2.306 ms / 100) 2.338 -> 2.338 ( +0.00%) [ +0.00% +0.51% +0.04% / +0.00% +2.44% +2.31%] index_add_ strided 7 : Elapsed 0.023 ms (2.338 ms / 100) 2.297 -> 2.299 ( +0.09%) [ +0.13% +0.26% +0.00% / +0.09% +2.22% +2.44%] index_copy_ strided 7 : Elapsed 0.023 ms (2.300 ms / 100) 2.352 -> 2.350 ( -0.09%) [ +0.21% +0.00% +0.00% / -0.09% +2.00% +1.96%] index_add_ perm : Elapsed 0.024 ms (2.357 ms / 100) 2.309 -> 2.314 ( +0.22%) [ +0.35% +0.26% +0.00% / +0.22% +2.25% +2.30%] index_copy_ perm : Elapsed 0.023 ms (2.317 ms / 100) 2.341 -> 2.373 ( +1.37%) [ +0.04% +0.00% +0.09% / +1.37% +2.05% +2.22%] index_add_ perm_sorted : Elapsed 0.023 ms (2.342 ms / 100) 2.304 -> 2.307 ( +0.13%) [ +0.00% +0.09% +0.00% / +0.13% +2.26% +2.08%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.304 ms / 100) 4.917 -> 4.916 ( -0.02%) [ +0.02% +0.04% +0.00% / -0.02% +0.73% +0.81%] index_select const : Elapsed 0.049 ms (4.918 ms / 100) 4.961 -> 4.966 ( +0.10%) [ +0.10% +0.00% +0.12% / +0.10% +1.39% +1.27%] index_select wrap : Elapsed 0.050 ms (4.966 ms / 100) 4.982 -> 4.984 ( +0.04%) [ +0.04% +0.00% +0.10% / +0.04% +1.28% +1.26%] index_select linear : Elapsed 0.050 ms (4.984 ms / 100) 5.006 -> 5.010 ( +0.08%) [ +0.08% +0.26% +0.00% / +0.08% +1.24% +1.12%] index_select reverse : Elapsed 0.050 ms (5.010 ms / 100) 4.908 -> 4.905 ( -0.06%) [ +0.00% +0.29% +0.26% / -0.06% +1.04% +1.06%] index_select skip64 : Elapsed 0.049 ms (4.908 ms / 100) 4.911 -> 4.909 ( -0.04%) [ +0.18% +0.08% +0.00% / -0.04% +0.75% +0.92%] index_select skip256 : Elapsed 0.049 ms (4.920 ms / 100) 4.973 -> 4.969 ( -0.08%) [ +0.20% +0.00% +0.08% / -0.08% +1.13% +1.49%] index_select spread : Elapsed 0.050 ms (4.983 ms / 100) 4.957 -> 4.963 ( +0.12%) [ +0.00% +0.04% +0.06% / +0.12% +1.65% +1.63%] index_select strided 3 : Elapsed 0.050 ms (4.957 ms / 100) 4.953 -> 4.957 ( +0.08%) [ +0.06% +0.00% +0.00% / +0.08% +1.07% +1.19%] index_select random : Elapsed 0.050 ms (4.956 ms / 100) 4.951 -> 4.951 ( +0.00%) [ +0.08% +0.06% +0.00% / +0.00% +1.33% +1.45%] index_select random_sorted : Elapsed 0.050 ms (4.955 ms / 100) B = [20, 40, 4, 16] (stride (1, 320, 12800, 20)) A = [20, 40, 4, 5] (stride (160, 1, 40, 3200)) dim = 3 2.358 -> 2.368 ( +0.42%) [ +0.51% +0.08% +0.00% / +0.42% +0.51% +0.76%] index_add_ linear : Elapsed 0.024 ms (2.370 ms / 100) 2.308 -> 2.311 ( +0.13%) [ +0.04% +0.13% +0.00% / +0.13% +0.30% +0.52%] index_copy_ linear : Elapsed 0.023 ms (2.309 ms / 100) 2.370 -> 2.372 ( +0.08%) [ +0.17% +0.00% +0.04% / +0.13% +0.38% +0.08%] index_add_ reverse : Elapsed 0.024 ms (2.374 ms / 100) 2.321 -> 2.311 ( -0.43%) [ +0.04% +0.00% +0.00% / +0.00% -0.34% -0.43%] index_copy_ reverse : Elapsed 0.023 ms (2.322 ms / 100) 2.366 -> 2.363 ( -0.13%) [ +0.08% +0.00% +0.04% / +0.08% -0.13% -0.13%] index_add_ spread : Elapsed 0.024 ms (2.368 ms / 100) 2.321 -> 2.305 ( -0.69%) [ +0.34% +0.13% +0.00% / +0.17% -0.52% -0.69%] index_copy_ spread : Elapsed 0.023 ms (2.329 ms / 100) 2.370 -> 2.361 ( -0.38%) [ +0.08% +0.00% +0.04% / -0.04% -0.21% -0.38%] index_add_ strided 3 : Elapsed 0.024 ms (2.372 ms / 100) 2.325 -> 2.315 ( -0.43%) [ +0.04% +0.00% +0.09% / -0.09% -0.43% -0.39%] index_copy_ strided 3 : Elapsed 0.023 ms (2.326 ms / 100) 2.372 -> 2.374 ( +0.08%) [ +0.00% +0.04% +0.21% / +0.08% +0.21% +0.25%] index_add_ strided 5 : Elapsed 0.024 ms (2.372 ms / 100) 2.323 -> 2.325 ( +0.09%) [ +0.00% +0.39% +0.09% / +0.09% +0.30% +0.13%] index_copy_ strided 5 : Elapsed 0.023 ms (2.323 ms / 100) 2.375 -> 2.370 ( -0.21%) [ +0.21% +0.13% +0.00% / -0.21% -0.04% +0.08%] index_add_ strided 7 : Elapsed 0.024 ms (2.380 ms / 100) 2.330 -> 2.325 ( -0.21%) [ +0.09% +0.00% +0.09% / +0.13% -0.21% -0.09%] index_copy_ strided 7 : Elapsed 0.023 ms (2.332 ms / 100) 2.382 -> 2.377 ( -0.21%) [ +0.25% +0.04% +0.00% / +0.17% -0.21% -0.21%] index_add_ perm : Elapsed 0.024 ms (2.388 ms / 100) 2.337 -> 2.321 ( -0.68%) [ +0.26% +0.13% +0.00% / +0.17% -0.68% -0.26%] index_copy_ perm : Elapsed 0.023 ms (2.343 ms / 100) 2.372 -> 2.377 ( +0.21%) [ +0.42% +0.00% +0.17% / +0.42% +0.25% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.382 ms / 100) 2.328 -> 2.322 ( -0.26%) [ +0.34% +0.04% +0.00% / -0.04% -0.09% -0.26%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.336 ms / 100) 4.946 -> 4.941 ( -0.10%) [ +0.04% +0.16% +0.00% / -0.10% +0.36% +0.42%] index_select const : Elapsed 0.049 ms (4.948 ms / 100) 4.975 -> 4.979 ( +0.08%) [ +0.02% +0.00% +0.14% / +0.08% +0.28% +0.34%] index_select wrap : Elapsed 0.050 ms (4.976 ms / 100) 4.982 -> 4.993 ( +0.22%) [ +0.16% +0.02% +0.00% / +0.22% +0.32% +0.30%] index_select linear : Elapsed 0.050 ms (4.990 ms / 100) 4.982 -> 4.983 ( +0.02%) [ +0.54% +0.18% +0.00% / +0.08% +0.02% +0.28%] index_select reverse : Elapsed 0.050 ms (5.009 ms / 100) 4.933 -> 4.936 ( +0.06%) [ +0.04% +0.04% +0.00% / +0.06% +0.41% +0.39%] index_select skip64 : Elapsed 0.049 ms (4.935 ms / 100) 4.933 -> 4.930 ( -0.06%) [ +0.00% +0.04% +0.06% / -0.06% +0.41% +0.36%] index_select skip256 : Elapsed 0.049 ms (4.933 ms / 100) 4.990 -> 4.990 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.48% +0.40%] index_select spread : Elapsed 0.050 ms (4.991 ms / 100) 4.976 -> 4.981 ( +0.10%) [ +0.02% +0.00% +0.02% / +0.10% +0.44% +0.36%] index_select strided 3 : Elapsed 0.050 ms (4.977 ms / 100) 4.964 -> 4.967 ( +0.06%) [ +0.00% +0.12% +0.10% / +0.06% +0.58% +0.50%] index_select random : Elapsed 0.050 ms (4.964 ms / 100) 4.975 -> 4.984 ( +0.18%) [ +0.00% +0.18% +0.22% / +0.18% +0.50% +0.92%] index_select random_sorted : Elapsed 0.050 ms (4.975 ms / 100) B = [20, 40, 4, 16] (stride (160, 4, 1, 3200)) A = [20, 40, 4, 5] (stride (20, 400, 5, 1)) dim = 3 2.150 -> 2.154 ( +0.19%) [ +0.28% +0.33% +0.00% / +0.19% +0.56% +0.56%] index_add_ linear : Elapsed 0.022 ms (2.156 ms / 100) 2.090 -> 2.089 ( -0.05%) [ +0.10% +0.10% +0.00% / -0.05% +0.24% +0.24%] index_copy_ linear : Elapsed 0.021 ms (2.092 ms / 100) 2.145 -> 2.147 ( +0.09%) [ +0.00% +0.23% +0.19% / +0.09% +0.89% +0.89%] index_add_ reverse : Elapsed 0.021 ms (2.145 ms / 100) 2.087 -> 2.091 ( +0.19%) [ +0.10% +0.05% +0.00% / +0.19% +0.24% +0.19%] index_copy_ reverse : Elapsed 0.021 ms (2.089 ms / 100) 2.152 -> 2.155 ( +0.14%) [ +0.28% +0.00% +0.14% / +0.14% +0.37% +0.60%] index_add_ spread : Elapsed 0.022 ms (2.158 ms / 100) 2.090 -> 2.092 ( +0.10%) [ +0.00% +0.05% +0.00% / +0.10% +0.29% +0.24%] index_copy_ spread : Elapsed 0.021 ms (2.090 ms / 100) 2.156 -> 2.155 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.37% +0.23%] index_add_ strided 3 : Elapsed 0.022 ms (2.157 ms / 100) 2.090 -> 2.087 ( -0.14%) [ +0.10% +0.00% +0.10% / -0.14% +0.29% +0.48%] index_copy_ strided 3 : Elapsed 0.021 ms (2.092 ms / 100) 2.149 -> 2.156 ( +0.33%) [ +0.00% +0.00% +0.28% / +0.33% +0.70% +0.84%] index_add_ strided 5 : Elapsed 0.021 ms (2.149 ms / 100) 2.090 -> 2.092 ( +0.10%) [ +0.14% +0.05% +0.00% / +0.10% +0.43% +0.14%] index_copy_ strided 5 : Elapsed 0.021 ms (2.093 ms / 100) 2.151 -> 2.151 ( +0.00%) [ +0.28% +0.00% +0.00% / +0.00% +0.56% +0.74%] index_add_ strided 7 : Elapsed 0.022 ms (2.157 ms / 100) 2.087 -> 2.088 ( +0.05%) [ +0.19% +0.29% +0.00% / +0.05% +0.38% +0.48%] index_copy_ strided 7 : Elapsed 0.021 ms (2.091 ms / 100) 2.147 -> 2.146 ( -0.05%) [ +0.19% +0.05% +0.00% / -0.05% +0.70% +0.65%] index_add_ perm : Elapsed 0.022 ms (2.151 ms / 100) 2.089 -> 2.089 ( +0.00%) [ +0.00% +0.14% +0.05% / +0.00% +0.29% +0.38%] index_copy_ perm : Elapsed 0.021 ms (2.089 ms / 100) 2.146 -> 2.160 ( +0.65%) [ +0.23% +0.14% +0.00% / +3.26% +0.65% +0.89%] index_add_ perm_sorted : Elapsed 0.022 ms (2.151 ms / 100) 2.087 -> 2.091 ( +0.19%) [ +0.10% +0.14% +0.00% / +0.19% +0.34% +0.43%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.089 ms / 100) 4.380 -> 4.383 ( +0.07%) [ +0.09% +0.05% +0.00% / +0.07% +0.75% +0.64%] index_select const : Elapsed 0.044 ms (4.384 ms / 100) 4.375 -> 4.382 ( +0.16%) [ +0.00% +0.18% +0.09% / +0.16% +0.78% +0.62%] index_select wrap : Elapsed 0.044 ms (4.375 ms / 100) 4.382 -> 4.392 ( +0.23%) [ +0.00% +0.41% +0.09% / +0.23% +0.89% +0.87%] index_select linear : Elapsed 0.044 ms (4.382 ms / 100) 4.388 -> 4.388 ( +0.00%) [ +0.02% +0.00% +0.09% / +0.00% +0.87% +0.77%] index_select reverse : Elapsed 0.044 ms (4.389 ms / 100) 4.365 -> 4.368 ( +0.07%) [ +0.23% +0.09% +0.00% / +0.07% +0.96% +1.17%] index_select skip64 : Elapsed 0.044 ms (4.375 ms / 100) 4.377 -> 4.380 ( +0.07%) [ +0.00% +0.16% +0.11% / +0.07% +0.73% +0.98%] index_select skip256 : Elapsed 0.044 ms (4.377 ms / 100) 4.377 -> 4.409 ( +0.73%) [ +0.69% +0.91% +0.00% / +0.73% +1.35% +1.12%] index_select spread : Elapsed 0.044 ms (4.407 ms / 100) 4.380 -> 4.390 ( +0.23%) [ +0.32% +0.09% +0.00% / +0.23% +0.91% +0.91%] index_select strided 3 : Elapsed 0.044 ms (4.394 ms / 100) 4.380 -> 4.376 ( -0.09%) [ +0.18% +0.18% +0.00% / -0.09% +0.71% +0.71%] index_select random : Elapsed 0.044 ms (4.388 ms / 100) 4.384 -> 4.387 ( +0.07%) [ +0.30% +0.00% +0.11% / +0.07% +0.89% +0.71%] index_select random_sorted : Elapsed 0.044 ms (4.397 ms / 100) B = [20, 40, 4, 16] (stride (4, 80, 1, 3200)) A = [20, 40, 4, 5] (stride (800, 20, 5, 1)) dim = 3 2.090 -> 2.096 ( +0.29%) [ +0.14% +0.33% +0.00% / +0.29% +0.29% +0.53%] index_add_ linear : Elapsed 0.021 ms (2.093 ms / 100) 2.035 -> 2.039 ( +0.20%) [ +0.20% +0.25% +0.00% / +0.20% +0.29% +0.54%] index_copy_ linear : Elapsed 0.020 ms (2.039 ms / 100) 2.087 -> 2.087 ( +0.00%) [ +0.14% +0.00% +0.24% / +0.00% +0.24% +0.48%] index_add_ reverse : Elapsed 0.021 ms (2.090 ms / 100) 2.031 -> 2.029 ( -0.10%) [ +0.10% +0.00% +0.20% / -0.10% +0.34% +0.30%] index_copy_ reverse : Elapsed 0.020 ms (2.033 ms / 100) 2.089 -> 2.091 ( +0.10%) [ +0.05% +0.14% +0.00% / +0.10% +0.29% +0.38%] index_add_ spread : Elapsed 0.021 ms (2.090 ms / 100) 2.033 -> 2.033 ( +0.00%) [ +0.00% +0.30% +0.20% / +0.00% +0.15% +0.30%] index_copy_ spread : Elapsed 0.020 ms (2.033 ms / 100) 2.091 -> 2.092 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.05% +0.38% +0.48%] index_add_ strided 3 : Elapsed 0.021 ms (2.092 ms / 100) 2.033 -> 2.034 ( +0.05%) [ +0.25% +0.00% +0.25% / +0.05% +0.54% +0.69%] index_copy_ strided 3 : Elapsed 0.020 ms (2.038 ms / 100) 2.096 -> 2.093 ( -0.14%) [ +0.14% +0.05% +0.00% / +0.10% +0.29% -0.14%] index_add_ strided 5 : Elapsed 0.021 ms (2.099 ms / 100) 2.041 -> 2.041 ( +0.00%) [ +0.15% +0.20% +0.00% / +0.00% +0.05% +0.00%] index_copy_ strided 5 : Elapsed 0.020 ms (2.044 ms / 100) 2.089 -> 2.091 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.34% +0.48%] index_add_ strided 7 : Elapsed 0.021 ms (2.090 ms / 100) 2.033 -> 2.034 ( +0.05%) [ +0.05% +0.20% +0.00% / +0.05% +0.34% +0.59%] index_copy_ strided 7 : Elapsed 0.020 ms (2.034 ms / 100) 2.092 -> 2.094 ( +0.10%) [ +0.00% +0.14% +0.14% / +0.10% +0.14% +0.24%] index_add_ perm : Elapsed 0.021 ms (2.092 ms / 100) 2.031 -> 2.033 ( +0.10%) [ +0.20% +0.20% +0.00% / +0.10% +0.44% +0.25%] index_copy_ perm : Elapsed 0.020 ms (2.035 ms / 100) 2.086 -> 2.093 ( +0.34%) [ +0.29% +0.19% +0.00% / +0.34% +0.48% +0.38%] index_add_ perm_sorted : Elapsed 0.021 ms (2.092 ms / 100) 2.030 -> 2.032 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.10% +0.44% +0.34%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.030 ms / 100) 4.236 -> 4.230 ( -0.14%) [ +0.00% +0.00% +0.02% / -0.02% +0.83% -0.14%] index_select const : Elapsed 0.042 ms (4.236 ms / 100) 4.212 -> 4.217 ( +0.12%) [ +0.17% +0.00% +0.17% / +0.12% +0.66% +0.57%] index_select wrap : Elapsed 0.042 ms (4.219 ms / 100) 4.220 -> 4.238 ( +0.43%) [ +0.21% +0.43% +0.00% / +0.43% +1.18% +1.11%] index_select linear : Elapsed 0.042 ms (4.229 ms / 100) 4.209 -> 4.213 ( +0.10%) [ +0.00% +0.02% +0.88% / +0.10% +0.69% +1.40%] index_select reverse : Elapsed 0.042 ms (4.209 ms / 100) 4.240 -> 4.255 ( +0.35%) [ +0.02% +0.00% +0.24% / +0.35% +0.73% +0.68%] index_select skip64 : Elapsed 0.042 ms (4.241 ms / 100) 4.238 -> 4.252 ( +0.33%) [ +0.00% +0.31% +0.17% / +0.33% +0.73% +0.47%] index_select skip256 : Elapsed 0.042 ms (4.238 ms / 100) 4.209 -> 4.212 ( +0.07%) [ +0.74% +0.76% +0.00% / +0.07% +1.43% +1.52%] index_select spread : Elapsed 0.042 ms (4.240 ms / 100) 4.203 -> 4.211 ( +0.19%) [ +0.29% +0.00% +0.33% / +0.19% +0.88% +0.98%] index_select strided 3 : Elapsed 0.042 ms (4.215 ms / 100) 4.219 -> 4.207 ( -0.28%) [ +0.00% +0.38% +0.38% / -0.28% +0.52% +1.40%] index_select random : Elapsed 0.042 ms (4.219 ms / 100) 4.241 -> 4.238 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.66% +0.80%] index_select random_sorted : Elapsed 0.042 ms (4.244 ms / 100) B = [20, 40, 4, 16] (stride (1, 80, 20, 3200)) A = [20, 40, 4, 5] (stride (200, 5, 4000, 1)) dim = 3 0.857 -> 0.859 ( +0.23%) [ +0.00% +0.23% +0.00% / +0.23% +0.23% +0.70%] index_add_ linear : Elapsed 0.009 ms (0.857 ms / 100) 0.847 -> 0.849 ( +0.24%) [ +0.35% +0.47% +0.00% / +0.24% +0.59% +0.35%] index_copy_ linear : Elapsed 0.009 ms (0.850 ms / 100) 0.859 -> 0.859 ( +0.00%) [ +0.00% +0.23% +0.12% / +0.00% +0.23% +0.23%] index_add_ reverse : Elapsed 0.009 ms (0.859 ms / 100) 0.846 -> 0.850 ( +0.47%) [ +0.24% +0.35% +0.00% / +0.47% +1.06% +1.06%] index_copy_ reverse : Elapsed 0.008 ms (0.848 ms / 100) 0.864 -> 0.859 ( -0.58%) [ +0.00% +0.00% +0.12% / +0.00% -0.58% -0.23%] index_add_ spread : Elapsed 0.009 ms (0.864 ms / 100) 0.853 -> 0.850 ( -0.35%) [ +0.00% +0.12% +0.23% / +0.35% +0.12% -0.35%] index_copy_ spread : Elapsed 0.009 ms (0.853 ms / 100) 0.860 -> 0.861 ( +0.12%) [ +0.00% +0.35% +0.35% / +0.81% +0.23% +0.12%] index_add_ strided 3 : Elapsed 0.009 ms (0.860 ms / 100) 0.852 -> 0.850 ( -0.23%) [ +0.12% +0.12% +0.00% / +0.23% -0.23% +0.23%] index_copy_ strided 3 : Elapsed 0.009 ms (0.853 ms / 100) 0.862 -> 0.863 ( +0.12%) [ +0.00% +0.23% +0.12% / +0.12% +0.35% +0.12%] index_add_ strided 5 : Elapsed 0.009 ms (0.862 ms / 100) 0.853 -> 0.850 ( -0.35%) [ +0.00% +0.23% +0.00% / +0.47% -0.12% -0.35%] index_copy_ strided 5 : Elapsed 0.009 ms (0.853 ms / 100) 0.856 -> 0.854 ( -0.23%) [ +0.00% +0.00% +0.12% / -0.23% +1.64% +1.64%] index_add_ strided 7 : Elapsed 0.009 ms (0.856 ms / 100) 0.844 -> 0.842 ( -0.24%) [ +0.24% +0.36% +0.00% / -0.24% +1.90% +1.90%] index_copy_ strided 7 : Elapsed 0.008 ms (0.846 ms / 100) 0.857 -> 0.855 ( -0.23%) [ +0.00% +0.00% +0.35% / -0.23% +0.93% +0.70%] index_add_ perm : Elapsed 0.009 ms (0.857 ms / 100) 0.848 -> 0.849 ( +0.12%) [ +0.35% +0.00% +0.24% / +0.12% +0.94% +1.06%] index_copy_ perm : Elapsed 0.009 ms (0.851 ms / 100) 0.859 -> 0.864 ( +0.58%) [ +0.12% +0.58% +0.00% / +0.58% +0.58% +0.70%] index_add_ perm_sorted : Elapsed 0.009 ms (0.860 ms / 100) 0.853 -> 0.851 ( -0.23%) [ +0.12% +0.00% +0.12% / -0.23% +0.47% +0.23%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.854 ms / 100) 1.725 -> 1.727 ( +0.12%) [ +0.35% +0.00% +0.23% / +0.12% +0.12% +0.29%] index_select const : Elapsed 0.017 ms (1.731 ms / 100) 1.724 -> 1.722 ( -0.12%) [ +0.00% +0.41% +0.23% / +0.58% +0.12% -0.12%] index_select wrap : Elapsed 0.017 ms (1.724 ms / 100) 1.729 -> 1.726 ( -0.17%) [ +0.17% +0.00% +0.17% / +0.52% -0.17% +0.12%] index_select linear : Elapsed 0.017 ms (1.732 ms / 100) 1.732 -> 1.725 ( -0.40%) [ +0.00% +0.00% +0.00% / -0.40% -0.12% -0.17%] index_select reverse : Elapsed 0.017 ms (1.732 ms / 100) 1.726 -> 1.723 ( -0.17%) [ +0.46% +0.00% +0.17% / -0.06% -0.17% -0.06%] index_select skip64 : Elapsed 0.017 ms (1.734 ms / 100) 1.727 -> 1.727 ( +0.00%) [ +0.29% +0.23% +0.00% / +0.23% +0.00% +0.00%] index_select skip256 : Elapsed 0.017 ms (1.732 ms / 100) 1.724 -> 1.724 ( +0.00%) [ +0.29% +0.06% +0.00% / +0.46% +0.00% +0.29%] index_select spread : Elapsed 0.017 ms (1.729 ms / 100) 1.734 -> 1.735 ( +0.06%) [ +0.00% +0.17% +0.35% / +4.84% +0.40% +0.06%] index_select strided 3 : Elapsed 0.017 ms (1.734 ms / 100) 1.728 -> 1.726 ( -0.12%) [ +0.00% +0.52% +0.35% / +0.17% -0.12% +0.06%] index_select random : Elapsed 0.017 ms (1.728 ms / 100) 1.724 -> 1.729 ( +0.29%) [ +0.29% +0.35% +0.00% / +0.29% +0.29% +0.41%] index_select random_sorted : Elapsed 0.017 ms (1.729 ms / 100) out_shape = [16, 40, 5, 4] in_shape = [20, 40, 5, 4] idx_dim = 0 B = [16, 40, 5, 4] (stride (800, 1, 40, 200)) A = [20, 40, 5, 4] (stride (200, 1, 40, 4000)) dim = 0 4.149 -> 4.148 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.65% +0.77%] index_select const : Elapsed 0.041 ms (4.149 ms / 100) 4.137 -> 4.139 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.68% +0.73%] index_select wrap : Elapsed 0.041 ms (4.139 ms / 100) 4.131 -> 4.132 ( +0.02%) [ +0.10% +0.00% +0.05% / +0.02% +0.63% +0.80%] index_select linear : Elapsed 0.041 ms (4.135 ms / 100) 4.137 -> 4.137 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.70% +0.68%] index_select reverse : Elapsed 0.041 ms (4.138 ms / 100) 4.142 -> 4.142 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.77%] index_select skip64 : Elapsed 0.041 ms (4.142 ms / 100) 4.149 -> 4.149 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.48% +0.48%] index_select skip256 : Elapsed 0.042 ms (4.151 ms / 100) 4.141 -> 4.142 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.65% +0.56%] index_select spread : Elapsed 0.041 ms (4.141 ms / 100) 4.139 -> 4.139 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.60% +0.60%] index_select strided 3 : Elapsed 0.041 ms (4.141 ms / 100) 4.146 -> 4.147 ( +0.02%) [ +0.07% +0.00% +0.05% / +0.02% +0.65% +0.68%] index_select strided 5 : Elapsed 0.041 ms (4.149 ms / 100) 4.146 -> 4.147 ( +0.02%) [ +0.07% +0.00% +0.05% / +0.02% +0.60% +0.58%] index_select strided 7 : Elapsed 0.041 ms (4.149 ms / 100) 4.126 -> 4.127 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.70% +0.65%] index_select strided 8 : Elapsed 0.041 ms (4.128 ms / 100) 4.167 -> 4.168 ( +0.02%) [ +0.05% +0.00% +0.10% / +0.02% +0.65% +0.65%] index_select strided 16 : Elapsed 0.042 ms (4.169 ms / 100) 4.154 -> 4.155 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.51% +0.51%] index_select random : Elapsed 0.042 ms (4.154 ms / 100) 4.142 -> 4.145 ( +0.07%) [ +0.05% +0.10% +0.00% / +0.07% +0.53% +0.43%] index_select random_sorted : Elapsed 0.041 ms (4.144 ms / 100) 4.133 -> 4.133 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.70% +0.68%] index_select perm : Elapsed 0.041 ms (4.133 ms / 100) 4.132 -> 4.133 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.65% +0.65%] index_select perm_sorted : Elapsed 0.041 ms (4.134 ms / 100) B = [16, 40, 5, 4] (stride (20, 320, 4, 1)) A = [20, 40, 5, 4] (stride (20, 400, 1, 5)) dim = 0 3.533 -> 3.535 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.57% +0.62%] index_select const : Elapsed 0.035 ms (3.535 ms / 100) 3.524 -> 3.530 ( +0.17%) [ +0.06% +0.00% +0.06% / +0.17% +0.43% +0.45%] index_select wrap : Elapsed 0.035 ms (3.526 ms / 100) 3.526 -> 3.529 ( +0.09%) [ +0.20% +0.06% +0.00% / +0.09% +0.40% +0.37%] index_select linear : Elapsed 0.035 ms (3.533 ms / 100) 3.533 -> 3.533 ( +0.00%) [ +0.03% +0.00% +0.06% / +0.00% +0.51% +0.37%] index_select reverse : Elapsed 0.035 ms (3.534 ms / 100) 3.532 -> 3.532 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.65% +0.65%] index_select skip64 : Elapsed 0.035 ms (3.535 ms / 100) 3.541 -> 3.540 ( -0.03%) [ +0.00% +0.08% +0.00% / -0.03% +0.54% +0.54%] index_select skip256 : Elapsed 0.035 ms (3.541 ms / 100) 3.513 -> 3.514 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.60% +0.65%] index_select spread : Elapsed 0.035 ms (3.515 ms / 100) 3.517 -> 3.517 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.48%] index_select strided 3 : Elapsed 0.035 ms (3.517 ms / 100) 3.536 -> 3.535 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.62% +0.62%] index_select strided 5 : Elapsed 0.035 ms (3.537 ms / 100) 3.514 -> 3.535 ( +0.60%) [ +0.00% +0.00% +0.00% / +3.36% +0.60% +0.63%] index_select strided 7 : Elapsed 0.035 ms (3.514 ms / 100) 3.526 -> 3.530 ( +0.11%) [ +0.26% +0.00% +0.20% / +0.11% +0.57% +0.57%] index_select strided 8 : Elapsed 0.035 ms (3.535 ms / 100) 3.518 -> 3.517 ( -0.03%) [ +0.06% +0.00% +0.00% / -0.03% +0.65% +0.57%] index_select strided 16 : Elapsed 0.035 ms (3.520 ms / 100) 3.521 -> 3.522 ( +0.03%) [ +0.00% +0.09% +0.11% / +0.03% +0.82% +0.74%] index_select random : Elapsed 0.035 ms (3.521 ms / 100) 3.519 -> 3.522 ( +0.09%) [ +0.00% +0.09% +0.03% / +0.09% +0.68% +0.68%] index_select random_sorted : Elapsed 0.035 ms (3.519 ms / 100) 3.513 -> 3.514 ( +0.03%) [ +0.06% +0.00% +0.00% / +0.03% +0.60% +0.68%] index_select perm : Elapsed 0.035 ms (3.515 ms / 100) 3.513 -> 3.513 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.63% +0.65%] index_select perm_sorted : Elapsed 0.035 ms (3.514 ms / 100) B = [16, 40, 5, 4] (stride (5, 80, 1, 3200)) A = [20, 40, 5, 4] (stride (1, 400, 20, 100)) dim = 0 4.137 -> 4.137 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.77% +0.70%] index_select const : Elapsed 0.041 ms (4.140 ms / 100) 4.159 -> 4.157 ( -0.05%) [ +0.07% +0.00% +0.05% / -0.05% +0.77% +0.63%] index_select wrap : Elapsed 0.042 ms (4.162 ms / 100) 4.153 -> 4.150 ( -0.07%) [ +0.10% +0.05% +0.00% / -0.07% +0.75% +0.79%] index_select linear : Elapsed 0.042 ms (4.157 ms / 100) 4.153 -> 4.154 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.96% +0.84%] index_select reverse : Elapsed 0.042 ms (4.155 ms / 100) 4.145 -> 4.152 ( +0.17%) [ +0.00% +0.24% +0.19% / +0.17% +0.80% +0.68%] index_select skip64 : Elapsed 0.041 ms (4.145 ms / 100) 4.148 -> 4.147 ( -0.02%) [ +0.00% +0.07% +0.05% / -0.02% +0.51% +0.68%] index_select skip256 : Elapsed 0.041 ms (4.148 ms / 100) 4.151 -> 4.159 ( +0.19%) [ +0.24% +0.19% +0.00% / +0.19% +0.82% +0.82%] index_select spread : Elapsed 0.042 ms (4.161 ms / 100) 4.167 -> 4.168 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.60% +0.67%] index_select strided 3 : Elapsed 0.042 ms (4.170 ms / 100) 4.145 -> 4.148 ( +0.07%) [ +0.10% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_select strided 5 : Elapsed 0.041 ms (4.149 ms / 100) 4.152 -> 4.157 ( +0.12%) [ +0.00% +0.02% +0.07% / +0.12% +0.82% +0.75%] index_select strided 7 : Elapsed 0.042 ms (4.152 ms / 100) 4.148 -> 4.145 ( -0.07%) [ +0.02% +0.00% +0.00% / -0.07% +0.53% +0.65%] index_select strided 8 : Elapsed 0.041 ms (4.149 ms / 100) 4.146 -> 4.148 ( +0.05%) [ +0.00% +0.05% +0.12% / +0.05% +0.72% +0.72%] index_select strided 16 : Elapsed 0.041 ms (4.146 ms / 100) 4.154 -> 4.161 ( +0.17%) [ +0.00% +0.12% +0.17% / +0.17% +0.63% +0.58%] index_select random : Elapsed 0.042 ms (4.154 ms / 100) 4.162 -> 4.161 ( -0.02%) [ +0.00% +0.05% +0.02% / -0.02% +0.58% +0.53%] index_select random_sorted : Elapsed 0.042 ms (4.162 ms / 100) 4.148 -> 4.156 ( +0.19%) [ +0.10% +0.07% +0.00% / +0.19% +0.70% +0.60%] index_select perm : Elapsed 0.042 ms (4.152 ms / 100) 4.165 -> 4.169 ( +0.10%) [ +0.17% +0.00% +0.00% / +0.10% +0.60% +0.62%] index_select perm_sorted : Elapsed 0.042 ms (4.172 ms / 100) B = [16, 40, 5, 4] (stride (1, 80, 16, 3200)) A = [20, 40, 5, 4] (stride (5, 400, 1, 100)) dim = 0 3.679 -> 3.686 ( +0.19%) [ +0.19% +0.00% +0.03% / +0.19% +0.49% +0.57%] index_select const : Elapsed 0.037 ms (3.686 ms / 100) 3.672 -> 3.674 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.57% +0.60%] index_select wrap : Elapsed 0.037 ms (3.673 ms / 100) 3.676 -> 3.687 ( +0.30%) [ +0.19% +0.27% +0.00% / +0.30% +0.54% +0.71%] index_select linear : Elapsed 0.037 ms (3.683 ms / 100) 3.682 -> 3.681 ( -0.03%) [ +0.08% +0.00% +0.05% / -0.03% +0.49% +0.49%] index_select reverse : Elapsed 0.037 ms (3.685 ms / 100) 3.670 -> 3.671 ( +0.03%) [ +0.19% +0.11% +0.00% / +0.03% +0.60% +0.60%] index_select skip64 : Elapsed 0.037 ms (3.677 ms / 100) 3.670 -> 3.669 ( -0.03%) [ +0.19% +0.22% +0.00% / -0.03% +0.74% +0.76%] index_select skip256 : Elapsed 0.037 ms (3.677 ms / 100) 3.684 -> 3.683 ( -0.03%) [ +0.05% +0.05% +0.00% / -0.03% +0.60% +0.60%] index_select spread : Elapsed 0.037 ms (3.686 ms / 100) 3.670 -> 3.676 ( +0.16%) [ +0.14% +0.00% +0.05% / +0.16% +0.74% +0.57%] index_select strided 3 : Elapsed 0.037 ms (3.675 ms / 100) 3.667 -> 3.667 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.57% +0.60%] index_select strided 5 : Elapsed 0.037 ms (3.670 ms / 100) 3.669 -> 3.667 ( -0.05%) [ +0.00% +0.11% +0.08% / -0.05% +0.74% +0.68%] index_select strided 7 : Elapsed 0.037 ms (3.669 ms / 100) 3.671 -> 3.674 ( +0.08%) [ +0.05% +0.14% +0.00% / +0.08% +0.57% +0.57%] index_select strided 8 : Elapsed 0.037 ms (3.673 ms / 100) 3.667 -> 3.678 ( +0.30%) [ +0.00% +0.11% +0.11% / +0.30% +0.82% +0.93%] index_select strided 16 : Elapsed 0.037 ms (3.667 ms / 100) 3.664 -> 3.662 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.71% +0.68%] index_select random : Elapsed 0.037 ms (3.664 ms / 100) 3.664 -> 3.666 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.57% +0.74%] index_select random_sorted : Elapsed 0.037 ms (3.664 ms / 100) 3.663 -> 3.665 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.74% +0.71%] index_select perm : Elapsed 0.037 ms (3.663 ms / 100) 3.658 -> 3.657 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.60% +0.71%] index_select perm_sorted : Elapsed 0.037 ms (3.658 ms / 100) out_shape = [20, 16, 5, 4] in_shape = [20, 40, 5, 4] idx_dim = 1 B = [20, 16, 5, 4] (stride (320, 20, 4, 1)) A = [20, 40, 5, 4] (stride (160, 1, 3200, 40)) dim = 1 3.918 -> 3.922 ( +0.10%) [ +0.00% +0.08% +0.05% / +0.10% +0.41% +0.41%] index_select const : Elapsed 0.039 ms (3.918 ms / 100) 3.908 -> 3.918 ( +0.26%) [ +0.00% +0.05% +0.20% / +0.26% +0.87% +0.59%] index_select wrap : Elapsed 0.039 ms (3.908 ms / 100) 3.897 -> 3.922 ( +0.64%) [ +0.00% +0.10% +0.00% / +2.62% +0.64% +0.64%] index_select linear : Elapsed 0.039 ms (3.897 ms / 100) 3.924 -> 3.924 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.51% +0.38%] index_select reverse : Elapsed 0.039 ms (3.924 ms / 100) 3.920 -> 3.921 ( +0.03%) [ +0.13% +0.00% +0.10% / +0.03% +0.54% +0.51%] index_select skip64 : Elapsed 0.039 ms (3.925 ms / 100) 3.916 -> 3.920 ( +0.10%) [ +0.15% +0.00% +0.20% / +0.10% +0.33% +0.33%] index_select skip256 : Elapsed 0.039 ms (3.922 ms / 100) 3.921 -> 3.925 ( +0.10%) [ +0.08% +0.10% +0.00% / +0.10% +0.66% +0.69%] index_select spread : Elapsed 0.039 ms (3.924 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.13% +0.00% +0.15% / +0.00% +0.56% +0.28%] index_select strided 3 : Elapsed 0.039 ms (3.927 ms / 100) 3.918 -> 3.919 ( +0.03%) [ +0.00% +0.15% +0.08% / +0.03% +0.43% +0.54%] index_select strided 5 : Elapsed 0.039 ms (3.918 ms / 100) 3.924 -> 3.917 ( -0.18%) [ +0.05% +0.03% +0.00% / -0.18% +0.20% +0.18%] index_select strided 7 : Elapsed 0.039 ms (3.926 ms / 100) 3.917 -> 3.913 ( -0.10%) [ +0.08% +0.13% +0.00% / -0.10% +0.43% +0.43%] index_select strided 8 : Elapsed 0.039 ms (3.920 ms / 100) 3.928 -> 3.933 ( +0.13%) [ +0.18% +0.13% +0.00% / +0.13% +0.59% +0.48%] index_select strided 16 : Elapsed 0.039 ms (3.935 ms / 100) 3.922 -> 3.921 ( -0.03%) [ +0.08% +0.00% +0.00% / -0.03% +0.38% +0.36%] index_select random : Elapsed 0.039 ms (3.925 ms / 100) 3.924 -> 3.924 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.33% +0.48%] index_select random_sorted : Elapsed 0.039 ms (3.924 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.15% +0.08% +0.00% / +0.05% +0.54% +0.51%] index_select perm : Elapsed 0.039 ms (3.925 ms / 100) 3.920 -> 3.921 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.41% +0.28%] index_select perm_sorted : Elapsed 0.039 ms (3.922 ms / 100) B = [20, 16, 5, 4] (stride (320, 20, 1, 5)) dim = 1 fill_cnt = 40 1.791 -> 1.795 ( +0.22%) [ +0.22% +0.17% +0.00% / +0.22% +0.39% +0.39%] index_fill_ const : Elapsed 0.018 ms (1.795 ms / 100) 1.797 -> 1.799 ( +0.11%) [ +0.17% +0.00% +0.17% / +0.11% +0.39% +0.28%] index_fill_ linear : Elapsed 0.018 ms (1.800 ms / 100) 1.794 -> 1.792 ( -0.11%) [ +0.00% +0.00% +0.11% / -0.11% +0.17% -0.06%] index_fill_ reverse : Elapsed 0.018 ms (1.794 ms / 100) 1.793 -> 1.791 ( -0.11%) [ +0.28% +0.00% +0.28% / +0.22% +0.33% -0.11%] index_fill_ skip64 : Elapsed 0.018 ms (1.798 ms / 100) 1.795 -> 1.796 ( +0.06%) [ +0.06% +0.39% +0.00% / +2.90% +0.06% +0.22%] index_fill_ skip256 : Elapsed 0.018 ms (1.796 ms / 100) 1.797 -> 1.797 ( +0.00%) [ +0.00% +0.17% +0.11% / +0.17% +0.00% +0.06%] index_fill_ spread : Elapsed 0.018 ms (1.797 ms / 100) 1.801 -> 1.798 ( -0.17%) [ +0.00% +0.00% +0.22% / -0.17% -0.06% +0.00%] index_fill_ strided 3 : Elapsed 0.018 ms (1.801 ms / 100) 1.801 -> 1.800 ( -0.06%) [ +0.00% +0.17% +0.00% / +0.06% -0.06% +0.11%] index_fill_ strided 5 : Elapsed 0.018 ms (1.801 ms / 100) 1.802 -> 1.800 ( -0.11%) [ +0.00% +0.06% +0.00% / -0.11% -0.06% -0.06%] index_fill_ strided 7 : Elapsed 0.018 ms (1.802 ms / 100) 1.796 -> 1.795 ( -0.06%) [ +0.17% +0.00% +0.00% / -0.06% +0.22% -0.06%] index_fill_ strided 8 : Elapsed 0.018 ms (1.799 ms / 100) 1.801 -> 1.796 ( -0.28%) [ +0.17% +0.00% +0.22% / -0.28% +0.00% +0.11%] index_fill_ random : Elapsed 0.018 ms (1.804 ms / 100) 1.797 -> 1.798 ( +0.06%) [ +0.00% +0.11% +0.17% / +0.11% +0.17% +0.06%] index_fill_ random_sorted : Elapsed 0.018 ms (1.797 ms / 100) B = [20, 16, 5, 4] (stride (320, 4, 64, 1)) A = [20, 40, 5, 4] (stride (1, 100, 20, 4000)) dim = 1 4.027 -> 4.028 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.79% +0.74%] index_select const : Elapsed 0.040 ms (4.027 ms / 100) 4.030 -> 4.029 ( -0.02%) [ +0.00% +0.15% +0.17% / -0.02% +0.62% +0.65%] index_select wrap : Elapsed 0.040 ms (4.030 ms / 100) 4.018 -> 4.018 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.82% +0.62%] index_select linear : Elapsed 0.040 ms (4.018 ms / 100) 4.006 -> 4.038 ( +0.80%) [ +0.07% +0.00% +0.02% / +1.65% +0.82% +0.80%] index_select reverse : Elapsed 0.040 ms (4.009 ms / 100) 4.058 -> 4.058 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.84% +0.84%] index_select skip64 : Elapsed 0.041 ms (4.059 ms / 100) 4.026 -> 4.027 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.82% +0.82%] index_select skip256 : Elapsed 0.040 ms (4.027 ms / 100) 4.013 -> 4.013 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.87% +0.80%] index_select spread : Elapsed 0.040 ms (4.013 ms / 100) 4.042 -> 4.045 ( +0.07%) [ +0.10% +0.00% +0.02% / +0.07% +0.64% +0.64%] index_select strided 3 : Elapsed 0.040 ms (4.046 ms / 100) 4.019 -> 4.022 ( +0.07%) [ +0.07% +0.12% +0.00% / +0.07% +0.85% +0.87%] index_select strided 5 : Elapsed 0.040 ms (4.022 ms / 100) 4.032 -> 4.032 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.67% +0.67%] index_select strided 7 : Elapsed 0.040 ms (4.032 ms / 100) 4.042 -> 4.043 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.72% +0.74%] index_select strided 8 : Elapsed 0.040 ms (4.044 ms / 100) 4.051 -> 4.051 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.77% +0.74%] index_select strided 16 : Elapsed 0.041 ms (4.053 ms / 100) 4.022 -> 4.022 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.85% +0.77%] index_select random : Elapsed 0.040 ms (4.024 ms / 100) 4.016 -> 4.018 ( +0.05%) [ +0.17% +0.00% +0.15% / +0.05% +0.80% +1.00%] index_select random_sorted : Elapsed 0.040 ms (4.023 ms / 100) 4.036 -> 4.036 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.74% +0.84%] index_select perm : Elapsed 0.040 ms (4.037 ms / 100) 4.053 -> 4.055 ( +0.05%) [ +0.07% +0.12% +0.00% / +0.05% +0.81% +0.69%] index_select perm_sorted : Elapsed 0.041 ms (4.056 ms / 100) B = [20, 16, 5, 4] (stride (4, 80, 1280, 1)) A = [20, 40, 5, 4] (stride (160, 1, 3200, 40)) dim = 1 4.253 -> 4.258 ( +0.12%) [ +0.02% +0.12% +0.00% / +0.12% +0.54% +0.40%] index_select const : Elapsed 0.043 ms (4.254 ms / 100) 4.251 -> 4.250 ( -0.02%) [ +0.02% +0.12% +0.00% / -0.02% +0.52% +0.52%] index_select wrap : Elapsed 0.043 ms (4.252 ms / 100) 4.247 -> 4.247 ( +0.00%) [ +0.00% +0.09% +0.16% / +0.00% +0.59% +0.57%] index_select linear : Elapsed 0.042 ms (4.247 ms / 100) 4.277 -> 4.283 ( +0.14%) [ +0.19% +0.00% +0.16% / +0.14% +0.40% +0.33%] index_select reverse : Elapsed 0.043 ms (4.285 ms / 100) 4.276 -> 4.274 ( -0.05%) [ +0.16% +0.00% +0.00% / -0.05% +0.42% +0.35%] index_select skip64 : Elapsed 0.043 ms (4.283 ms / 100) 4.254 -> 4.249 ( -0.12%) [ +0.12% +0.12% +0.00% / -0.12% +0.49% +0.59%] index_select skip256 : Elapsed 0.043 ms (4.259 ms / 100) 4.268 -> 4.287 ( +0.45%) [ +0.00% +0.14% +0.00% / +2.76% +0.61% +0.45%] index_select spread : Elapsed 0.043 ms (4.268 ms / 100) 4.268 -> 4.268 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.52% +0.56%] index_select strided 3 : Elapsed 0.043 ms (4.269 ms / 100) 4.272 -> 4.276 ( +0.09%) [ +0.12% +0.02% +0.00% / +0.09% +0.44% +0.51%] index_select strided 5 : Elapsed 0.043 ms (4.277 ms / 100) 4.262 -> 4.262 ( +0.00%) [ +0.14% +0.00% +0.02% / +0.00% +0.54% +0.52%] index_select strided 7 : Elapsed 0.043 ms (4.268 ms / 100) 4.266 -> 4.262 ( -0.09%) [ +0.00% +0.16% +0.14% / -0.09% +0.56% +0.56%] index_select strided 8 : Elapsed 0.043 ms (4.266 ms / 100) 4.281 -> 4.286 ( +0.12%) [ +0.16% +0.30% +0.00% / +0.12% +0.47% +0.54%] index_select strided 16 : Elapsed 0.043 ms (4.288 ms / 100) 4.285 -> 4.287 ( +0.05%) [ +0.02% +0.00% +0.02% / +0.05% +0.28% +0.33%] index_select random : Elapsed 0.043 ms (4.286 ms / 100) 4.279 -> 4.279 ( +0.00%) [ +0.09% +0.12% +0.00% / +0.00% +0.47% +0.44%] index_select random_sorted : Elapsed 0.043 ms (4.283 ms / 100) 4.285 -> 4.286 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.54% +0.51%] index_select perm : Elapsed 0.043 ms (4.285 ms / 100) 4.274 -> 4.281 ( +0.16%) [ +0.12% +0.14% +0.00% / +0.16% +0.75% +0.61%] index_select perm_sorted : Elapsed 0.043 ms (4.279 ms / 100) B = [20, 16, 5, 4] (stride (1, 100, 20, 1600)) A = [20, 40, 5, 4] (stride (4, 400, 80, 1)) dim = 1 3.872 -> 3.872 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.62% +0.62%] index_select const : Elapsed 0.039 ms (3.872 ms / 100) 3.878 -> 3.873 ( -0.13%) [ +0.00% +0.03% +0.03% / -0.13% +0.67% +0.64%] index_select wrap : Elapsed 0.039 ms (3.878 ms / 100) 3.870 -> 3.874 ( +0.10%) [ +0.08% +0.00% +0.03% / +0.10% +1.06% +0.75%] index_select linear : Elapsed 0.039 ms (3.873 ms / 100) 3.873 -> 3.875 ( +0.05%) [ +0.15% +0.00% +0.03% / +0.05% +1.34% +0.93%] index_select reverse : Elapsed 0.039 ms (3.879 ms / 100) 3.872 -> 3.871 ( -0.03%) [ +0.00% +0.00% +0.05% / -0.03% +0.65% +0.62%] index_select skip64 : Elapsed 0.039 ms (3.872 ms / 100) 3.868 -> 3.873 ( +0.13%) [ +0.13% +0.18% +0.00% / +0.13% +0.72% +0.93%] index_select skip256 : Elapsed 0.039 ms (3.873 ms / 100) 3.883 -> 3.886 ( +0.08%) [ +0.05% +0.00% +0.00% / +0.08% +1.11% +1.00%] index_select spread : Elapsed 0.039 ms (3.885 ms / 100) 3.872 -> 3.872 ( +0.00%) [ +0.05% +0.00% +0.03% / +0.00% +0.93% +0.90%] index_select strided 3 : Elapsed 0.039 ms (3.874 ms / 100) 3.883 -> 3.882 ( -0.03%) [ +0.08% +0.00% +0.10% / -0.03% +0.98% +1.03%] index_select strided 5 : Elapsed 0.039 ms (3.886 ms / 100) 3.872 -> 3.873 ( +0.03%) [ +0.10% +0.15% +0.00% / +0.03% +0.77% +1.11%] index_select strided 7 : Elapsed 0.039 ms (3.876 ms / 100) 3.891 -> 3.886 ( -0.13%) [ +0.05% +0.05% +0.00% / -0.13% +0.75% +0.80%] index_select strided 8 : Elapsed 0.039 ms (3.893 ms / 100) 3.878 -> 3.881 ( +0.08%) [ +0.13% +0.18% +0.00% / +0.08% +1.11% +1.13%] index_select strided 16 : Elapsed 0.039 ms (3.883 ms / 100) 3.877 -> 3.885 ( +0.21%) [ +0.21% +0.28% +0.00% / +0.21% +1.11% +1.08%] index_select random : Elapsed 0.039 ms (3.885 ms / 100) 3.870 -> 3.870 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.83% +0.90%] index_select random_sorted : Elapsed 0.039 ms (3.871 ms / 100) 3.879 -> 3.878 ( -0.03%) [ +0.05% +0.00% +0.18% / -0.03% +1.03% +1.03%] index_select perm : Elapsed 0.039 ms (3.881 ms / 100) 3.879 -> 3.879 ( +0.00%) [ +0.00% +0.10% +0.31% / +0.00% +1.11% +0.90%] index_select perm_sorted : Elapsed 0.039 ms (3.879 ms / 100) out_shape = [20, 40, 16, 4] in_shape = [20, 40, 5, 4] idx_dim = 2 B = [20, 40, 16, 4] (stride (2560, 64, 1, 16)) A = [20, 40, 5, 4] (stride (800, 4, 160, 1)) dim = 2 2.182 -> 2.187 ( +0.23%) [ +0.09% +0.18% +0.00% / +0.23% +2.57% +2.75%] index_add_ linear : Elapsed 0.022 ms (2.184 ms / 100) 2.140 -> 2.141 ( +0.05%) [ +0.19% +0.09% +0.00% / +0.05% +3.18% +3.18%] index_copy_ linear : Elapsed 0.021 ms (2.144 ms / 100) 2.184 -> 2.187 ( +0.14%) [ +0.05% +0.05% +0.00% / +0.14% +2.11% +2.24%] index_add_ reverse : Elapsed 0.022 ms (2.185 ms / 100) 2.141 -> 2.142 ( +0.05%) [ +0.23% +0.23% +0.00% / +0.05% +3.04% +3.22%] index_copy_ reverse : Elapsed 0.021 ms (2.146 ms / 100) 2.218 -> 2.219 ( +0.05%) [ +0.00% +0.32% +0.09% / +0.05% +2.57% +2.57%] index_add_ spread : Elapsed 0.022 ms (2.218 ms / 100) 2.213 -> 2.230 ( +0.77%) [ +0.05% +0.05% +0.00% / +0.77% +3.12% +2.94%] index_copy_ spread : Elapsed 0.022 ms (2.214 ms / 100) 2.218 -> 2.216 ( -0.09%) [ +0.09% +0.09% +0.00% / -0.09% +2.34% +2.25%] index_add_ strided 3 : Elapsed 0.022 ms (2.220 ms / 100) 2.206 -> 2.205 ( -0.05%) [ +0.09% +0.00% +0.27% / -0.05% +2.95% +3.08%] index_copy_ strided 3 : Elapsed 0.022 ms (2.208 ms / 100) 2.210 -> 2.214 ( +0.18%) [ +0.27% +0.18% +0.00% / +0.18% +2.62% +2.62%] index_add_ strided 5 : Elapsed 0.022 ms (2.216 ms / 100) 2.207 -> 2.209 ( +0.09%) [ +0.05% +0.09% +0.00% / +0.09% +2.99% +3.04%] index_copy_ strided 5 : Elapsed 0.022 ms (2.208 ms / 100) 2.212 -> 2.221 ( +0.41%) [ +0.27% +0.09% +0.00% / +0.41% +2.35% +2.67%] index_add_ strided 7 : Elapsed 0.022 ms (2.218 ms / 100) 2.204 -> 2.208 ( +0.18%) [ +0.00% +0.27% +0.18% / +0.18% +3.09% +2.99%] index_copy_ strided 7 : Elapsed 0.022 ms (2.204 ms / 100) 2.213 -> 2.213 ( +0.00%) [ +0.18% +0.00% +0.23% / +0.00% +2.39% +2.58%] index_add_ perm : Elapsed 0.022 ms (2.217 ms / 100) 2.207 -> 2.207 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +2.90% +3.08%] index_copy_ perm : Elapsed 0.022 ms (2.207 ms / 100) 2.217 -> 2.220 ( +0.14%) [ +0.27% +0.00% +0.18% / +0.14% +2.57% +2.71%] index_add_ perm_sorted : Elapsed 0.022 ms (2.223 ms / 100) 2.207 -> 2.214 ( +0.32%) [ +0.18% +0.23% +0.00% / +0.32% +2.90% +3.26%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.211 ms / 100) 4.376 -> 4.386 ( +0.23%) [ +0.00% +0.27% +0.11% / +0.23% +1.17% +1.05%] index_select const : Elapsed 0.044 ms (4.376 ms / 100) 4.374 -> 4.377 ( +0.07%) [ +0.00% +0.07% +0.09% / +0.07% +2.10% +2.01%] index_select wrap : Elapsed 0.044 ms (4.374 ms / 100) 4.405 -> 4.414 ( +0.20%) [ +0.36% +0.32% +0.00% / +0.20% +2.18% +2.38%] index_select linear : Elapsed 0.044 ms (4.421 ms / 100) 4.415 -> 4.426 ( +0.25%) [ +0.00% +0.18% +0.18% / +0.25% +1.90% +1.81%] index_select reverse : Elapsed 0.044 ms (4.415 ms / 100) 4.373 -> 4.369 ( -0.09%) [ +0.00% +0.02% +0.07% / -0.09% +0.87% +0.94%] index_select skip64 : Elapsed 0.044 ms (4.373 ms / 100) 4.377 -> 4.385 ( +0.18%) [ +0.16% +0.39% +0.00% / +0.18% +1.01% +1.05%] index_select skip256 : Elapsed 0.044 ms (4.384 ms / 100) 4.400 -> 4.399 ( -0.02%) [ +0.00% +0.00% +0.05% / -0.02% +1.55% +1.48%] index_select spread : Elapsed 0.044 ms (4.400 ms / 100) 4.403 -> 4.408 ( +0.11%) [ +0.07% +0.00% +0.02% / +0.11% +2.45% +2.38%] index_select strided 3 : Elapsed 0.044 ms (4.406 ms / 100) 4.407 -> 4.413 ( +0.14%) [ +0.14% +0.00% +0.18% / +0.14% +1.70% +1.63%] index_select random : Elapsed 0.044 ms (4.413 ms / 100) 4.376 -> 4.377 ( +0.02%) [ +0.00% +0.05% +0.09% / +0.02% +2.03% +1.92%] index_select random_sorted : Elapsed 0.044 ms (4.376 ms / 100) B = [20, 40, 16, 4] (stride (2560, 1, 160, 40)) A = [20, 40, 5, 4] (stride (800, 20, 4, 1)) dim = 2 2.245 -> 2.239 ( -0.27%) [ +0.27% +0.09% +0.00% / +0.00% -0.13% -0.27%] index_add_ linear : Elapsed 0.023 ms (2.251 ms / 100) 2.187 -> 2.185 ( -0.09%) [ +0.18% +0.05% +0.00% / +0.32% +0.05% -0.09%] index_copy_ linear : Elapsed 0.022 ms (2.191 ms / 100) 2.244 -> 2.245 ( +0.04%) [ +0.18% +0.31% +0.00% / +0.04% +0.22% +0.13%] index_add_ reverse : Elapsed 0.022 ms (2.248 ms / 100) 2.184 -> 2.180 ( -0.18%) [ +0.37% +0.09% +0.00% / +0.14% -0.18% +0.00%] index_copy_ reverse : Elapsed 0.022 ms (2.192 ms / 100) 2.246 -> 2.243 ( -0.13%) [ +0.31% +0.09% +0.00% / -0.04% -0.13% -0.04%] index_add_ spread : Elapsed 0.023 ms (2.253 ms / 100) 2.188 -> 2.184 ( -0.18%) [ +0.14% +0.18% +0.00% / -0.05% -0.14% -0.18%] index_copy_ spread : Elapsed 0.022 ms (2.191 ms / 100) 2.247 -> 2.243 ( -0.18%) [ +0.18% +0.13% +0.00% / +0.13% -0.04% -0.18%] index_add_ strided 3 : Elapsed 0.023 ms (2.251 ms / 100) 2.186 -> 2.186 ( +0.00%) [ +0.37% +0.18% +0.00% / +0.23% +0.14% +0.00%] index_copy_ strided 3 : Elapsed 0.022 ms (2.194 ms / 100) 2.241 -> 2.241 ( +0.00%) [ +0.54% +0.00% +0.13% / +0.45% +0.18% +0.00%] index_add_ strided 5 : Elapsed 0.023 ms (2.253 ms / 100) 2.186 -> 2.191 ( +0.23%) [ +0.50% +0.09% +0.00% / +0.32% +0.27% +0.23%] index_copy_ strided 5 : Elapsed 0.022 ms (2.197 ms / 100) 2.244 -> 2.244 ( +0.00%) [ +0.40% +0.22% +0.00% / +0.04% +0.00% +0.22%] index_add_ strided 7 : Elapsed 0.023 ms (2.253 ms / 100) 2.186 -> 2.191 ( +0.23%) [ +0.27% +0.37% +0.00% / +0.41% +0.23% +0.27%] index_copy_ strided 7 : Elapsed 0.022 ms (2.192 ms / 100) 2.246 -> 2.236 ( -0.45%) [ +0.00% +0.13% +0.04% / +0.00% -0.13% -0.45%] index_add_ perm : Elapsed 0.022 ms (2.246 ms / 100) 2.186 -> 2.182 ( -0.18%) [ +0.00% +0.14% +0.14% / +0.32% -0.09% -0.18%] index_copy_ perm : Elapsed 0.022 ms (2.186 ms / 100) 2.250 -> 2.247 ( -0.13%) [ +0.13% +0.04% +0.00% / -0.13% -0.04% -0.13%] index_add_ perm_sorted : Elapsed 0.023 ms (2.253 ms / 100) 2.187 -> 2.190 ( +0.14%) [ +0.23% +0.00% +0.23% / +0.37% +0.14% +0.18%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.192 ms / 100) 4.558 -> 4.579 ( +0.46%) [ +0.64% +0.00% +0.09% / +0.46% +0.97% +0.77%] index_select const : Elapsed 0.046 ms (4.587 ms / 100) 4.607 -> 4.610 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.35% +0.41%] index_select wrap : Elapsed 0.046 ms (4.607 ms / 100) 4.616 -> 4.614 ( -0.04%) [ +0.11% +0.00% +0.15% / -0.04% +0.58% +0.30%] index_select linear : Elapsed 0.046 ms (4.621 ms / 100) 4.605 -> 4.622 ( +0.37%) [ +0.00% +0.13% +0.07% / +0.37% +0.46% +0.46%] index_select reverse : Elapsed 0.046 ms (4.605 ms / 100) 4.586 -> 4.588 ( +0.04%) [ +0.13% +0.00% +0.15% / +0.04% +0.33% +0.39%] index_select skip64 : Elapsed 0.046 ms (4.592 ms / 100) 4.571 -> 4.584 ( +0.28%) [ +0.42% +0.00% +0.42% / +0.28% +0.98% +0.98%] index_select skip256 : Elapsed 0.046 ms (4.590 ms / 100) 4.604 -> 4.610 ( +0.13%) [ +0.00% +0.11% +0.04% / +0.13% +0.67% +0.74%] index_select spread : Elapsed 0.046 ms (4.604 ms / 100) 4.584 -> 4.580 ( -0.09%) [ +0.20% +0.13% +0.00% / -0.09% +0.65% +0.57%] index_select strided 3 : Elapsed 0.046 ms (4.593 ms / 100) 4.591 -> 4.581 ( -0.22%) [ +0.00% +0.33% +0.26% / -0.22% +0.96% +0.54%] index_select random : Elapsed 0.046 ms (4.591 ms / 100) 4.605 -> 4.605 ( +0.00%) [ +0.00% +0.30% +0.07% / +0.00% +0.50% +0.56%] index_select random_sorted : Elapsed 0.046 ms (4.605 ms / 100) B = [20, 40, 16, 4] (stride (2560, 1, 40, 640)) A = [20, 40, 5, 4] (stride (1, 20, 800, 4000)) dim = 2 2.456 -> 2.459 ( +0.12%) [ +0.04% +0.20% +0.00% / +0.12% +0.69% +0.65%] index_add_ linear : Elapsed 0.025 ms (2.457 ms / 100) 2.383 -> 2.384 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.97% +0.63%] index_copy_ linear : Elapsed 0.024 ms (2.387 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.45% +0.49%] index_add_ reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.382 -> 2.382 ( +0.00%) [ +0.04% +0.00% +0.50% / +0.00% +0.59% +0.50%] index_copy_ reverse : Elapsed 0.024 ms (2.383 ms / 100) 2.464 -> 2.468 ( +0.16%) [ +0.00% +0.20% +0.08% / +0.16% +0.45% +0.37%] index_add_ spread : Elapsed 0.025 ms (2.464 ms / 100) 2.392 -> 2.390 ( -0.08%) [ +0.00% +0.13% +0.04% / -0.08% +0.38% +0.50%] index_copy_ spread : Elapsed 0.024 ms (2.392 ms / 100) 2.455 -> 2.457 ( +0.08%) [ +0.04% +0.00% +0.12% / +0.08% +0.86% +0.65%] index_add_ strided 3 : Elapsed 0.025 ms (2.456 ms / 100) 2.384 -> 2.388 ( +0.17%) [ +0.00% +0.00% +0.04% / +0.17% +0.96% +0.76%] index_copy_ strided 3 : Elapsed 0.024 ms (2.384 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.04% +0.65% +0.65%] index_add_ strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.377 -> 2.374 ( -0.13%) [ +0.08% +0.00% +0.00% / -0.13% +0.46% +0.55%] index_copy_ strided 5 : Elapsed 0.024 ms (2.379 ms / 100) 2.452 -> 2.452 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.77% +0.45%] index_add_ strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.378 -> 2.379 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.71% +0.50%] index_copy_ strided 7 : Elapsed 0.024 ms (2.380 ms / 100) 2.464 -> 2.469 ( +0.20%) [ +0.00% +0.04% +0.04% / +0.20% +0.49% +0.61%] index_add_ perm : Elapsed 0.025 ms (2.464 ms / 100) 2.391 -> 2.393 ( +0.08%) [ +0.00% +0.13% +0.17% / +0.08% +0.75% +0.79%] index_copy_ perm : Elapsed 0.024 ms (2.391 ms / 100) 2.466 -> 2.466 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.53% +0.61%] index_add_ perm_sorted : Elapsed 0.025 ms (2.466 ms / 100) 2.392 -> 2.395 ( +0.13%) [ +0.17% +0.00% +0.00% / +0.13% +0.59% +0.67%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.396 ms / 100) 5.337 -> 5.339 ( +0.04%) [ +0.07% +0.00% +0.00% / +0.04% +0.60% +0.69%] index_select const : Elapsed 0.053 ms (5.341 ms / 100) 5.302 -> 5.310 ( +0.15%) [ +0.00% +0.13% +0.32% / +0.15% +0.85% +0.89%] index_select wrap : Elapsed 0.053 ms (5.302 ms / 100) 5.359 -> 5.361 ( +0.04%) [ +0.04% +0.00% +0.02% / +0.04% +0.50% +0.62%] index_select linear : Elapsed 0.054 ms (5.361 ms / 100) 5.320 -> 5.332 ( +0.23%) [ +0.17% +0.28% +0.00% / +0.23% +0.90% +1.02%] index_select reverse : Elapsed 0.053 ms (5.329 ms / 100) 5.321 -> 5.335 ( +0.26%) [ +0.21% +0.04% +0.00% / +0.26% +0.94% +0.58%] index_select skip64 : Elapsed 0.053 ms (5.332 ms / 100) 5.323 -> 5.327 ( +0.08%) [ +0.23% +0.26% +0.00% / +0.08% +0.90% +0.92%] index_select skip256 : Elapsed 0.053 ms (5.335 ms / 100) 5.341 -> 5.346 ( +0.09%) [ +0.09% +0.02% +0.00% / +0.09% +0.81% +0.79%] index_select spread : Elapsed 0.053 ms (5.346 ms / 100) 5.353 -> 5.358 ( +0.09%) [ +0.15% +0.00% +0.11% / +0.09% +0.65% +0.73%] index_select strided 3 : Elapsed 0.054 ms (5.361 ms / 100) 5.320 -> 5.330 ( +0.19%) [ +0.11% +0.17% +0.00% / +0.19% +0.77% +0.85%] index_select random : Elapsed 0.053 ms (5.326 ms / 100) 5.319 -> 5.322 ( +0.06%) [ +0.02% +0.11% +0.00% / +0.06% +0.73% +0.71%] index_select random_sorted : Elapsed 0.053 ms (5.320 ms / 100) B = [20, 40, 16, 4] (stride (64, 1280, 4, 1)) A = [20, 40, 5, 4] (stride (200, 1, 40, 4000)) dim = 2 2.391 -> 2.385 ( -0.25%) [ +0.33% +0.42% +0.00% / -0.25% +0.46% +0.92%] index_add_ linear : Elapsed 0.024 ms (2.399 ms / 100) 2.345 -> 2.349 ( +0.17%) [ +0.13% +0.38% +0.00% / +0.17% +0.51% +0.77%] index_copy_ linear : Elapsed 0.023 ms (2.348 ms / 100) 2.398 -> 2.400 ( +0.08%) [ +0.21% +0.00% +0.00% / +0.08% +0.33% +0.17%] index_add_ reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.344 -> 2.352 ( +0.34%) [ +0.09% +0.00% +0.21% / +0.34% +0.51% +0.38%] index_copy_ reverse : Elapsed 0.023 ms (2.346 ms / 100) 2.421 -> 2.425 ( +0.17%) [ +0.25% +0.21% +0.00% / +0.17% +0.45% +0.45%] index_add_ spread : Elapsed 0.024 ms (2.427 ms / 100) 2.376 -> 2.383 ( +0.29%) [ +0.34% +0.42% +0.00% / +0.29% +0.55% +0.34%] index_copy_ spread : Elapsed 0.024 ms (2.384 ms / 100) 2.420 -> 2.423 ( +0.12%) [ +0.29% +0.00% +0.04% / +0.12% +0.58% +0.45%] index_add_ strided 3 : Elapsed 0.024 ms (2.427 ms / 100) 2.382 -> 2.388 ( +0.25%) [ +0.04% +0.00% +0.08% / +0.25% +0.38% +0.34%] index_copy_ strided 3 : Elapsed 0.024 ms (2.383 ms / 100) 2.415 -> 2.422 ( +0.29%) [ +0.00% +0.33% +0.08% / +0.29% +0.58% +0.54%] index_add_ strided 5 : Elapsed 0.024 ms (2.415 ms / 100) 2.369 -> 2.366 ( -0.13%) [ +0.00% +0.00% +0.04% / -0.13% +0.46% +0.00%] index_copy_ strided 5 : Elapsed 0.024 ms (2.369 ms / 100) 2.421 -> 2.423 ( +0.08%) [ +0.08% +0.00% +0.17% / +2.56% +0.21% +0.08%] index_add_ strided 7 : Elapsed 0.024 ms (2.423 ms / 100) 2.385 -> 2.388 ( +0.13%) [ +0.08% +0.00% +0.00% / +0.13% +0.21% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.387 ms / 100) 2.402 -> 2.404 ( +0.08%) [ +0.17% +0.00% +0.08% / +0.08% +0.25% +0.17%] index_add_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.363 -> 2.362 ( -0.04%) [ +0.21% +0.00% +0.00% / -0.04% +0.21% +0.30%] index_copy_ perm : Elapsed 0.024 ms (2.368 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.37% +0.58%] index_add_ perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.371 -> 2.371 ( +0.00%) [ +0.00% +0.21% +0.21% / +0.00% +0.59% +0.55%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.371 ms / 100) 4.907 -> 4.909 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.61% +0.71%] index_select const : Elapsed 0.049 ms (4.909 ms / 100) 4.966 -> 4.967 ( +0.02%) [ +0.10% +0.00% +0.04% / +0.02% +0.42% +0.46%] index_select wrap : Elapsed 0.050 ms (4.971 ms / 100) 4.971 -> 4.979 ( +0.16%) [ +0.12% +0.00% +0.00% / +0.16% +0.62% +0.52%] index_select linear : Elapsed 0.050 ms (4.977 ms / 100) 4.996 -> 4.995 ( -0.02%) [ +0.02% +0.06% +0.00% / -0.02% +0.62% +0.64%] index_select reverse : Elapsed 0.050 ms (4.997 ms / 100) 4.896 -> 4.907 ( +0.22%) [ +0.00% +0.31% +0.25% / +0.22% +0.84% +0.74%] index_select skip64 : Elapsed 0.049 ms (4.896 ms / 100) 4.897 -> 4.907 ( +0.20%) [ +0.16% +0.29% +0.00% / +0.20% +0.88% +0.63%] index_select skip256 : Elapsed 0.049 ms (4.905 ms / 100) 4.984 -> 4.984 ( +0.00%) [ +0.14% +0.00% +0.12% / +0.00% +0.52% +0.60%] index_select spread : Elapsed 0.050 ms (4.991 ms / 100) 4.963 -> 4.962 ( -0.02%) [ +0.30% +0.10% +0.00% / -0.02% +0.38% +0.69%] index_select strided 3 : Elapsed 0.050 ms (4.978 ms / 100) 4.975 -> 4.975 ( +0.00%) [ +0.14% +0.06% +0.00% / +0.00% +0.80% +0.92%] index_select random : Elapsed 0.050 ms (4.982 ms / 100) 4.988 -> 4.988 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.68% +0.76%] index_select random_sorted : Elapsed 0.050 ms (4.989 ms / 100) B = [20, 40, 16, 4] (stride (160, 1, 3200, 40)) A = [20, 40, 5, 4] (stride (4, 400, 80, 1)) dim = 2 2.406 -> 2.408 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.83% +0.75%] index_add_ linear : Elapsed 0.024 ms (2.406 ms / 100) 2.343 -> 2.350 ( +0.30%) [ +0.00% +0.00% +0.13% / +0.30% +0.51% +0.55%] index_copy_ linear : Elapsed 0.023 ms (2.343 ms / 100) 2.405 -> 2.408 ( +0.12%) [ +0.08% +0.21% +0.00% / +0.12% +0.75% +0.79%] index_add_ reverse : Elapsed 0.024 ms (2.407 ms / 100) 2.343 -> 2.350 ( +0.30%) [ +0.00% +0.21% +0.00% / +0.30% +0.60% +0.64%] index_copy_ reverse : Elapsed 0.023 ms (2.343 ms / 100) 2.410 -> 2.409 ( -0.04%) [ +0.12% +0.00% +0.50% / -0.04% +0.66% +0.87%] index_add_ spread : Elapsed 0.024 ms (2.413 ms / 100) 2.347 -> 2.345 ( -0.09%) [ +0.00% +0.13% +0.34% / -0.09% +0.51% +0.72%] index_copy_ spread : Elapsed 0.023 ms (2.347 ms / 100) 2.398 -> 2.404 ( +0.25%) [ +0.38% +0.25% +0.00% / +0.25% +0.83% +1.00%] index_add_ strided 3 : Elapsed 0.024 ms (2.407 ms / 100) 2.336 -> 2.341 ( +0.21%) [ +0.39% +0.21% +0.00% / +0.21% +0.77% +0.94%] index_copy_ strided 3 : Elapsed 0.023 ms (2.345 ms / 100) 2.403 -> 2.402 ( -0.04%) [ +0.00% +0.25% +0.04% / -0.04% +1.00% +0.96%] index_add_ strided 5 : Elapsed 0.024 ms (2.403 ms / 100) 2.339 -> 2.347 ( +0.34%) [ +0.34% +0.09% +0.00% / +0.34% +0.77% +0.77%] index_copy_ strided 5 : Elapsed 0.023 ms (2.347 ms / 100) 2.399 -> 2.407 ( +0.33%) [ +0.29% +0.38% +0.00% / +0.33% +1.08% +0.96%] index_add_ strided 7 : Elapsed 0.024 ms (2.406 ms / 100) 2.338 -> 2.344 ( +0.26%) [ +0.09% +0.26% +0.00% / +0.26% +0.77% +0.90%] index_copy_ strided 7 : Elapsed 0.023 ms (2.340 ms / 100) 2.399 -> 2.403 ( +0.17%) [ +0.25% +0.00% +0.21% / +0.17% +0.67% +0.46%] index_add_ perm : Elapsed 0.024 ms (2.405 ms / 100) 2.338 -> 2.338 ( +0.00%) [ +0.00% +0.21% +0.21% / +0.00% +0.60% +0.56%] index_copy_ perm : Elapsed 0.023 ms (2.338 ms / 100) 2.408 -> 2.412 ( +0.17%) [ +0.42% +0.00% +0.04% / +0.17% +0.42% +0.58%] index_add_ perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) 2.345 -> 2.350 ( +0.21%) [ +0.30% +0.00% +0.00% / +0.21% +0.43% +0.30%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.352 ms / 100) 5.028 -> 5.032 ( +0.08%) [ +0.00% +0.14% +0.22% / +0.08% +0.76% +0.80%] index_select const : Elapsed 0.050 ms (5.028 ms / 100) 5.079 -> 5.081 ( +0.04%) [ +0.08% +0.22% +0.00% / +0.04% +0.69% +0.79%] index_select wrap : Elapsed 0.051 ms (5.083 ms / 100) 5.098 -> 5.109 ( +0.22%) [ +0.00% +0.00% +0.02% / +0.22% +0.71% +0.59%] index_select linear : Elapsed 0.051 ms (5.098 ms / 100) 5.049 -> 5.055 ( +0.12%) [ +0.24% +0.18% +0.00% / +0.12% +0.75% +0.73%] index_select reverse : Elapsed 0.051 ms (5.061 ms / 100) 5.029 -> 5.031 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.56% +0.70%] index_select skip64 : Elapsed 0.050 ms (5.033 ms / 100) 5.030 -> 5.031 ( +0.02%) [ +0.20% +0.00% +0.08% / +0.02% +0.64% +0.72%] index_select skip256 : Elapsed 0.050 ms (5.040 ms / 100) 5.073 -> 5.076 ( +0.06%) [ +0.00% +0.02% +0.06% / +0.06% +0.61% +0.75%] index_select spread : Elapsed 0.051 ms (5.073 ms / 100) 5.095 -> 5.089 ( -0.12%) [ +0.00% +0.06% +0.02% / -0.12% +0.67% +0.49%] index_select strided 3 : Elapsed 0.051 ms (5.095 ms / 100) 5.079 -> 5.082 ( +0.06%) [ +0.16% +0.00% +0.06% / +0.06% +0.83% +0.71%] index_select random : Elapsed 0.051 ms (5.087 ms / 100) 5.067 -> 5.073 ( +0.12%) [ +0.10% +0.00% +0.06% / +0.12% +0.95% +0.91%] index_select random_sorted : Elapsed 0.051 ms (5.072 ms / 100) B = [20, 40, 16, 4] (stride (4, 80, 3200, 1)) A = [20, 40, 5, 4] (stride (800, 5, 1, 200)) dim = 2 2.371 -> 2.373 ( +0.08%) [ +0.55% +0.59% +0.00% / +0.46% +0.46% +0.08%] index_add_ linear : Elapsed 0.024 ms (2.384 ms / 100) 2.309 -> 2.307 ( -0.09%) [ +0.43% +0.52% +0.00% / +0.17% +0.48% -0.09%] index_copy_ linear : Elapsed 0.023 ms (2.319 ms / 100) 2.375 -> 2.377 ( +0.08%) [ +0.34% +0.04% +0.00% / +0.17% +0.08% +0.34%] index_add_ reverse : Elapsed 0.024 ms (2.383 ms / 100) 2.309 -> 2.311 ( +0.09%) [ +0.17% +0.00% +0.00% / +0.26% +0.09% +0.35%] index_copy_ reverse : Elapsed 0.023 ms (2.313 ms / 100) 2.376 -> 2.376 ( +0.00%) [ +0.17% +0.00% +0.34% / +0.00% +0.17% +0.04%] index_add_ spread : Elapsed 0.024 ms (2.380 ms / 100) 2.309 -> 2.308 ( -0.04%) [ +0.30% +0.00% +0.43% / -0.04% +0.39% +0.39%] index_copy_ spread : Elapsed 0.023 ms (2.316 ms / 100) 2.381 -> 2.372 ( -0.38%) [ +0.08% +0.08% +0.00% / -0.38% +0.00% -0.17%] index_add_ strided 3 : Elapsed 0.024 ms (2.383 ms / 100) 2.311 -> 2.304 ( -0.30%) [ +0.00% +0.17% +0.00% / -0.30% +0.22% -0.13%] index_copy_ strided 3 : Elapsed 0.023 ms (2.311 ms / 100) 2.388 -> 2.389 ( +0.04%) [ +0.00% +0.21% +0.17% / +0.04% +0.29% +0.08%] index_add_ strided 5 : Elapsed 0.024 ms (2.388 ms / 100) 2.323 -> 2.320 ( -0.13%) [ +0.00% +0.04% +0.04% / +0.09% +0.04% -0.13%] index_copy_ strided 5 : Elapsed 0.023 ms (2.323 ms / 100) 2.383 -> 2.381 ( -0.08%) [ +0.25% +0.25% +0.00% / +0.25% +0.25% -0.08%] index_add_ strided 7 : Elapsed 0.024 ms (2.389 ms / 100) 2.314 -> 2.314 ( +0.00%) [ +0.30% +0.48% +0.00% / +0.13% +0.30% +0.00%] index_copy_ strided 7 : Elapsed 0.023 ms (2.321 ms / 100) 2.363 -> 2.375 ( +0.51%) [ +0.85% +0.00% +0.51% / +0.76% +0.76% +0.51%] index_add_ perm : Elapsed 0.024 ms (2.383 ms / 100) 2.301 -> 2.306 ( +0.22%) [ +0.56% +0.00% +0.26% / +0.61% +0.56% +0.22%] index_copy_ perm : Elapsed 0.023 ms (2.314 ms / 100) 2.374 -> 2.378 ( +0.17%) [ +0.13% +0.51% +0.00% / +0.34% +0.17% +0.42%] index_add_ perm_sorted : Elapsed 0.024 ms (2.377 ms / 100) 2.308 -> 2.313 ( +0.22%) [ +0.22% +0.26% +0.00% / +0.22% +0.35% +0.52%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.313 ms / 100) 5.061 -> 5.061 ( +0.00%) [ +0.00% +0.30% +0.38% / +0.00% +0.61% +0.41%] index_select const : Elapsed 0.051 ms (5.061 ms / 100) 5.059 -> 5.084 ( +0.49%) [ +0.00% +0.32% +0.38% / +0.49% +0.85% +0.75%] index_select wrap : Elapsed 0.051 ms (5.059 ms / 100) 5.062 -> 5.065 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.71% +0.49%] index_select linear : Elapsed 0.051 ms (5.065 ms / 100) 5.069 -> 5.072 ( +0.06%) [ +0.24% +0.00% +0.04% / +0.06% +0.65% +0.06%] index_select reverse : Elapsed 0.051 ms (5.081 ms / 100) 5.075 -> 5.072 ( -0.06%) [ +0.12% +0.00% +0.10% / -0.06% +0.61% +0.47%] index_select skip64 : Elapsed 0.051 ms (5.081 ms / 100) 5.075 -> 5.078 ( +0.06%) [ +0.08% +0.00% +0.10% / +0.06% +0.45% +0.53%] index_select skip256 : Elapsed 0.051 ms (5.079 ms / 100) 5.063 -> 5.070 ( +0.14%) [ +0.34% +0.00% +0.18% / +0.14% +0.28% +0.28%] index_select spread : Elapsed 0.051 ms (5.080 ms / 100) 5.068 -> 5.055 ( -0.26%) [ +0.10% +0.00% +0.14% / -0.26% +0.45% +0.49%] index_select strided 3 : Elapsed 0.051 ms (5.073 ms / 100) 5.061 -> 5.039 ( -0.43%) [ +0.00% +0.24% +0.20% / -0.43% +0.85% +0.83%] index_select random : Elapsed 0.051 ms (5.061 ms / 100) 5.069 -> 5.076 ( +0.14%) [ +0.00% +0.20% +0.08% / +0.14% +0.75% +0.71%] index_select random_sorted : Elapsed 0.051 ms (5.069 ms / 100) B = [20, 40, 16, 4] (stride (1, 80, 3200, 20)) dim = 2 fill_cnt = 5 1.087 -> 1.092 ( +0.46%) [ +0.28% +0.18% +0.00% / +0.46% +1.01% +0.74%] index_fill_ const : Elapsed 0.011 ms (1.090 ms / 100) 1.106 -> 1.107 ( +0.09%) [ +0.27% +0.09% +0.00% / +0.09% +0.45% +0.63%] index_fill_ linear : Elapsed 0.011 ms (1.109 ms / 100) 1.105 -> 1.106 ( +0.09%) [ +0.18% +0.18% +0.00% / +0.09% +0.81% +0.72%] index_fill_ reverse : Elapsed 0.011 ms (1.107 ms / 100) 1.090 -> 1.091 ( +0.09%) [ +0.18% +0.18% +0.00% / +0.09% +0.46% +0.37%] index_fill_ skip64 : Elapsed 0.011 ms (1.092 ms / 100) 1.088 -> 1.090 ( +0.18%) [ +0.37% +0.28% +0.00% / +0.18% +0.55% +0.74%] index_fill_ skip256 : Elapsed 0.011 ms (1.092 ms / 100) 1.111 -> 1.112 ( +0.09%) [ +0.00% +0.27% +0.09% / +0.09% +0.54% +0.54%] index_fill_ spread : Elapsed 0.011 ms (1.111 ms / 100) 1.111 -> 1.113 ( +0.18%) [ +0.00% +0.27% +0.00% / +0.18% +0.54% +0.18%] index_fill_ strided 3 : Elapsed 0.011 ms (1.111 ms / 100) 1.109 -> 1.109 ( +0.00%) [ +0.00% +0.18% +0.36% / +0.27% +0.54% +0.00%] index_fill_ strided 5 : Elapsed 0.011 ms (1.109 ms / 100) 1.106 -> 1.107 ( +0.09%) [ +0.00% +0.09% +0.18% / +0.09% +1.36% +1.36%] index_fill_ strided 7 : Elapsed 0.011 ms (1.106 ms / 100) 1.092 -> 1.090 ( -0.18%) [ +0.00% +0.18% +0.00% / -0.18% +0.09% +0.37%] index_fill_ strided 8 : Elapsed 0.011 ms (1.092 ms / 100) 1.110 -> 1.106 ( -0.36%) [ +0.09% +0.00% +0.09% / +0.00% -0.36% -0.27%] index_fill_ random : Elapsed 0.011 ms (1.111 ms / 100) 1.109 -> 1.103 ( -0.54%) [ +0.36% +0.18% +0.00% / +0.36% -0.54% -0.36%] index_fill_ random_sorted : Elapsed 0.011 ms (1.113 ms / 100) 1.106 -> 1.106 ( +0.00%) [ +0.27% +0.00% +0.18% / +0.00% +0.36% +0.54%] index_fill_ perm : Elapsed 0.011 ms (1.109 ms / 100) 1.105 -> 1.106 ( +0.09%) [ +0.09% +0.27% +0.00% / +0.09% +0.45% +0.63%] index_fill_ perm_sorted : Elapsed 0.011 ms (1.106 ms / 100) B = [20, 40, 16, 4] (stride (640, 16, 1, 12800)) A = [20, 40, 5, 4] (stride (1, 80, 3200, 20)) dim = 2 2.314 -> 2.319 ( +0.22%) [ +0.00% +0.17% +0.17% / +0.22% +0.48% +0.56%] index_add_ linear : Elapsed 0.023 ms (2.314 ms / 100) 2.274 -> 2.274 ( +0.00%) [ +0.00% +0.09% +0.04% / +0.00% +0.48% +0.44%] index_copy_ linear : Elapsed 0.023 ms (2.274 ms / 100) 2.307 -> 2.312 ( +0.22%) [ +0.22% +0.00% +0.17% / +0.26% +0.26% +0.22%] index_add_ reverse : Elapsed 0.023 ms (2.312 ms / 100) 2.270 -> 2.272 ( +0.09%) [ +0.00% +0.22% +0.04% / +0.09% +0.35% +0.35%] index_copy_ reverse : Elapsed 0.023 ms (2.270 ms / 100) 2.366 -> 2.367 ( +0.04%) [ +0.25% +0.13% +0.00% / +0.04% +0.13% +0.08%] index_add_ spread : Elapsed 0.024 ms (2.372 ms / 100) 2.352 -> 2.351 ( -0.04%) [ +0.00% +0.17% +0.13% / -0.04% +0.21% +0.21%] index_copy_ spread : Elapsed 0.024 ms (2.352 ms / 100) 2.357 -> 2.358 ( +0.04%) [ +0.00% +0.13% +0.21% / +0.04% +0.30% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.357 ms / 100) 2.345 -> 2.347 ( +0.09%) [ +0.17% +0.00% +0.21% / +0.09% +0.38% +0.34%] index_copy_ strided 3 : Elapsed 0.023 ms (2.349 ms / 100) 2.338 -> 2.338 ( +0.00%) [ +0.00% +0.13% +0.04% / +0.00% +0.38% +0.43%] index_add_ strided 5 : Elapsed 0.023 ms (2.338 ms / 100) 2.331 -> 2.332 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.04% +0.43% +0.39%] index_copy_ strided 5 : Elapsed 0.023 ms (2.331 ms / 100) 2.347 -> 2.343 ( -0.17%) [ +0.00% +0.00% +0.13% / -0.17% +0.34% +0.21%] index_add_ strided 7 : Elapsed 0.023 ms (2.347 ms / 100) 2.340 -> 2.341 ( +0.04%) [ +0.00% +0.09% +0.04% / +0.04% +0.43% +0.30%] index_copy_ strided 7 : Elapsed 0.023 ms (2.340 ms / 100) 2.357 -> 2.360 ( +0.13%) [ +0.21% +0.00% +0.04% / +0.13% +0.59% +0.51%] index_add_ perm : Elapsed 0.024 ms (2.362 ms / 100) 2.342 -> 2.348 ( +0.26%) [ +0.00% +0.17% +0.00% / +0.26% +0.51% +0.51%] index_copy_ perm : Elapsed 0.023 ms (2.342 ms / 100) 2.363 -> 2.366 ( +0.13%) [ +0.13% +0.30% +0.00% / +0.13% +0.17% +0.17%] index_add_ perm_sorted : Elapsed 0.024 ms (2.366 ms / 100) 2.347 -> 2.353 ( +0.26%) [ +0.00% +0.34% +0.26% / +0.26% +0.60% +0.47%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.347 ms / 100) 4.965 -> 4.966 ( +0.02%) [ +0.10% +0.00% +0.16% / +0.02% +0.85% +0.83%] index_select const : Elapsed 0.050 ms (4.970 ms / 100) 4.899 -> 4.900 ( +0.02%) [ +0.20% +0.00% +0.00% / +0.02% +0.59% +0.59%] index_select wrap : Elapsed 0.049 ms (4.909 ms / 100) 4.976 -> 4.986 ( +0.20%) [ +0.18% +0.16% +0.00% / +0.20% +0.64% +0.64%] index_select linear : Elapsed 0.050 ms (4.985 ms / 100) 4.954 -> 4.964 ( +0.20%) [ +0.22% +0.12% +0.00% / +0.20% +0.59% +0.75%] index_select reverse : Elapsed 0.050 ms (4.965 ms / 100) 4.883 -> 4.880 ( -0.06%) [ +0.10% +0.00% +0.04% / -0.06% +0.68% +0.66%] index_select skip64 : Elapsed 0.049 ms (4.888 ms / 100) 4.971 -> 4.976 ( +0.10%) [ +0.02% +0.02% +0.00% / +0.10% +0.72% +0.58%] index_select skip256 : Elapsed 0.050 ms (4.972 ms / 100) 4.947 -> 4.951 ( +0.08%) [ +0.18% +0.00% +0.14% / +0.08% +0.79% +0.89%] index_select spread : Elapsed 0.050 ms (4.956 ms / 100) 4.979 -> 4.984 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +0.68% +0.62%] index_select strided 3 : Elapsed 0.050 ms (4.979 ms / 100) 4.954 -> 4.962 ( +0.16%) [ +0.02% +0.06% +0.00% / +0.16% +0.67% +0.71%] index_select random : Elapsed 0.050 ms (4.955 ms / 100) 4.967 -> 4.977 ( +0.20%) [ +0.16% +0.00% +0.14% / +0.20% +0.74% +0.58%] index_select random_sorted : Elapsed 0.050 ms (4.975 ms / 100) B = [20, 40, 16, 4] (stride (640, 1, 40, 12800)) A = [20, 40, 5, 4] (stride (800, 5, 1, 200)) dim = 2 2.372 -> 2.378 ( +0.25%) [ +0.63% +0.00% +0.00% / +0.25% +0.59% +0.51%] index_add_ linear : Elapsed 0.024 ms (2.387 ms / 100) 2.305 -> 2.303 ( -0.09%) [ +0.17% +0.13% +0.00% / -0.09% +1.17% +0.39%] index_copy_ linear : Elapsed 0.023 ms (2.309 ms / 100) 2.383 -> 2.382 ( -0.04%) [ +0.00% +0.21% +0.13% / +0.17% -0.04% +0.00%] index_add_ reverse : Elapsed 0.024 ms (2.383 ms / 100) 2.313 -> 2.314 ( +0.04%) [ +0.00% +0.17% +0.30% / +0.22% +0.04% +0.35%] index_copy_ reverse : Elapsed 0.023 ms (2.313 ms / 100) 2.371 -> 2.370 ( -0.04%) [ +0.00% +0.25% +0.21% / -0.04% +0.38% +0.55%] index_add_ spread : Elapsed 0.024 ms (2.371 ms / 100) 2.308 -> 2.307 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.26% +0.17%] index_copy_ spread : Elapsed 0.023 ms (2.309 ms / 100) 2.375 -> 2.373 ( -0.08%) [ +0.55% +0.00% +0.08% / +0.13% -0.04% -0.08%] index_add_ strided 3 : Elapsed 0.024 ms (2.388 ms / 100) 2.303 -> 2.304 ( +0.04%) [ +0.56% +0.00% +0.35% / +0.04% +0.13% +0.09%] index_copy_ strided 3 : Elapsed 0.023 ms (2.316 ms / 100) 2.383 -> 2.381 ( -0.08%) [ +0.00% +0.04% +0.08% / -0.08% +0.21% +0.50%] index_add_ strided 5 : Elapsed 0.024 ms (2.383 ms / 100) 2.316 -> 2.320 ( +0.17%) [ +0.00% +0.09% +0.26% / +0.17% +0.30% +0.39%] index_copy_ strided 5 : Elapsed 0.023 ms (2.316 ms / 100) 2.381 -> 2.383 ( +0.08%) [ +0.00% +0.25% +0.04% / +0.17% +0.34% +0.08%] index_add_ strided 7 : Elapsed 0.024 ms (2.381 ms / 100) 2.319 -> 2.316 ( -0.13%) [ +0.04% +0.04% +0.00% / +0.09% +0.26% -0.13%] index_copy_ strided 7 : Elapsed 0.023 ms (2.320 ms / 100) 2.374 -> 2.375 ( +0.04%) [ +0.00% +0.42% +0.13% / +0.04% +0.08% +0.21%] index_add_ perm : Elapsed 0.024 ms (2.374 ms / 100) 2.305 -> 2.306 ( +0.04%) [ +0.00% +0.30% +0.30% / +0.13% +0.04% +0.35%] index_copy_ perm : Elapsed 0.023 ms (2.305 ms / 100) 2.379 -> 2.378 ( -0.04%) [ +0.00% +0.04% +0.08% / +0.04% -0.04% -0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.379 ms / 100) 2.308 -> 2.310 ( +0.09%) [ +0.26% +0.00% +0.17% / +0.09% +0.17% +0.17%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.314 ms / 100) 5.063 -> 5.039 ( -0.47%) [ +0.12% +0.00% +0.20% / -0.47% +0.51% +0.00%] index_select const : Elapsed 0.051 ms (5.069 ms / 100) 5.074 -> 5.067 ( -0.14%) [ +0.12% +0.00% +0.00% / -0.14% +0.41% +0.55%] index_select wrap : Elapsed 0.051 ms (5.080 ms / 100) 5.064 -> 5.072 ( +0.16%) [ +0.00% +0.16% +0.04% / +0.16% +0.51% +0.83%] index_select linear : Elapsed 0.051 ms (5.064 ms / 100) 5.068 -> 5.053 ( -0.30%) [ +0.12% +0.00% +0.04% / -0.30% +0.55% +0.61%] index_select reverse : Elapsed 0.051 ms (5.074 ms / 100) 5.081 -> 5.082 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.02% +0.39% +0.35%] index_select skip64 : Elapsed 0.051 ms (5.083 ms / 100) 5.076 -> 5.077 ( +0.02%) [ +0.00% +0.12% +0.08% / +0.02% +0.39% +0.63%] index_select skip256 : Elapsed 0.051 ms (5.076 ms / 100) 5.058 -> 5.068 ( +0.20%) [ +0.55% +0.08% +0.00% / +0.20% +0.79% +0.73%] index_select spread : Elapsed 0.051 ms (5.086 ms / 100) 5.064 -> 5.078 ( +0.28%) [ +0.26% +0.02% +0.00% / +0.28% +0.67% +0.89%] index_select strided 3 : Elapsed 0.051 ms (5.077 ms / 100) 5.044 -> 5.083 ( +0.77%) [ +0.00% +0.63% +0.56% / +0.77% +1.21% +1.25%] index_select random : Elapsed 0.050 ms (5.044 ms / 100) 5.075 -> 5.085 ( +0.20%) [ +0.16% +0.00% +0.00% / +0.20% +0.67% +0.59%] index_select random_sorted : Elapsed 0.051 ms (5.083 ms / 100) B = [20, 40, 16, 4] (stride (640, 1, 40, 12800)) A = [20, 40, 5, 4] (stride (4, 400, 80, 1)) dim = 2 2.404 -> 2.409 ( +0.21%) [ +0.17% +0.17% +0.00% / +0.21% +0.92% +0.96%] index_add_ linear : Elapsed 0.024 ms (2.408 ms / 100) 2.342 -> 2.343 ( +0.04%) [ +0.13% +0.21% +0.00% / +0.04% +0.64% +0.77%] index_copy_ linear : Elapsed 0.023 ms (2.345 ms / 100) 2.405 -> 2.410 ( +0.21%) [ +0.21% +0.25% +0.00% / +0.21% +1.00% +0.79%] index_add_ reverse : Elapsed 0.024 ms (2.410 ms / 100) 2.340 -> 2.342 ( +0.09%) [ +0.04% +0.00% +0.09% / +0.09% +0.73% +0.73%] index_copy_ reverse : Elapsed 0.023 ms (2.341 ms / 100) 2.411 -> 2.420 ( +0.37%) [ +0.04% +0.12% +0.00% / +0.37% +0.66% +0.75%] index_add_ spread : Elapsed 0.024 ms (2.412 ms / 100) 2.343 -> 2.357 ( +0.60%) [ +0.21% +0.00% +0.09% / +0.60% +0.85% +0.81%] index_copy_ spread : Elapsed 0.023 ms (2.348 ms / 100) 2.403 -> 2.399 ( -0.17%) [ +0.00% +0.17% +0.17% / -0.17% +0.79% +0.79%] index_add_ strided 3 : Elapsed 0.024 ms (2.403 ms / 100) 2.342 -> 2.338 ( -0.17%) [ +0.00% +0.04% +0.00% / -0.17% +0.64% +0.56%] index_copy_ strided 3 : Elapsed 0.023 ms (2.342 ms / 100) 2.401 -> 2.409 ( +0.33%) [ +0.29% +0.54% +0.00% / +0.33% +1.17% +1.04%] index_add_ strided 5 : Elapsed 0.024 ms (2.408 ms / 100) 2.335 -> 2.337 ( +0.09%) [ +0.21% +0.60% +0.00% / +0.09% +0.99% +0.90%] index_copy_ strided 5 : Elapsed 0.023 ms (2.340 ms / 100) 2.403 -> 2.408 ( +0.21%) [ +0.08% +0.00% +0.00% / +0.21% +1.00% +0.92%] index_add_ strided 7 : Elapsed 0.024 ms (2.405 ms / 100) 2.339 -> 2.343 ( +0.17%) [ +0.13% +0.00% +0.04% / +0.17% +0.77% +0.47%] index_copy_ strided 7 : Elapsed 0.023 ms (2.342 ms / 100) 2.395 -> 2.395 ( +0.00%) [ +0.17% +0.29% +0.00% / +0.00% +1.04% +1.17%] index_add_ perm : Elapsed 0.024 ms (2.399 ms / 100) 2.334 -> 2.333 ( -0.04%) [ +0.04% +0.26% +0.00% / -0.04% +0.81% +0.81%] index_copy_ perm : Elapsed 0.023 ms (2.335 ms / 100) 2.407 -> 2.405 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +1.12% +0.96%] index_add_ perm_sorted : Elapsed 0.024 ms (2.408 ms / 100) 2.337 -> 2.346 ( +0.39%) [ +0.34% +0.13% +0.00% / +0.39% +0.98% +1.03%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.345 ms / 100) 5.029 -> 5.029 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.78% +0.82%] index_select const : Elapsed 0.050 ms (5.029 ms / 100) 5.085 -> 5.088 ( +0.06%) [ +0.00% +0.06% +0.02% / +0.06% +0.75% +0.63%] index_select wrap : Elapsed 0.051 ms (5.085 ms / 100) 5.100 -> 5.105 ( +0.10%) [ +0.14% +0.00% +0.00% / +0.10% +0.67% +0.71%] index_select linear : Elapsed 0.051 ms (5.107 ms / 100) 5.054 -> 5.048 ( -0.12%) [ +0.20% +0.02% +0.00% / -0.12% +0.71% +0.61%] index_select reverse : Elapsed 0.051 ms (5.064 ms / 100) 5.033 -> 5.024 ( -0.18%) [ +0.00% +0.02% +0.14% / -0.18% +0.58% +0.66%] index_select skip64 : Elapsed 0.050 ms (5.033 ms / 100) 5.029 -> 5.035 ( +0.12%) [ +0.06% +0.00% +0.10% / +0.12% +0.78% +0.84%] index_select skip256 : Elapsed 0.050 ms (5.032 ms / 100) 5.072 -> 5.070 ( -0.04%) [ +0.00% +0.00% +0.06% / -0.04% +0.81% +0.73%] index_select spread : Elapsed 0.051 ms (5.072 ms / 100) 5.088 -> 5.099 ( +0.22%) [ +0.00% +0.20% +0.37% / +0.22% +0.92% +0.71%] index_select strided 3 : Elapsed 0.051 ms (5.088 ms / 100) 5.084 -> 5.083 ( -0.02%) [ +0.20% +0.18% +0.00% / -0.02% +0.79% +0.77%] index_select random : Elapsed 0.051 ms (5.094 ms / 100) 5.069 -> 5.072 ( +0.06%) [ +0.08% +0.00% +0.02% / +0.06% +0.85% +0.83%] index_select random_sorted : Elapsed 0.051 ms (5.073 ms / 100) B = [20, 40, 16, 4] (stride (1, 320, 20, 12800)) A = [20, 40, 5, 4] (stride (160, 1, 3200, 40)) dim = 2 2.365 -> 2.364 ( -0.04%) [ +0.08% +0.21% +0.00% / -0.04% +0.55% +0.51%] index_add_ linear : Elapsed 0.024 ms (2.367 ms / 100) 2.310 -> 2.309 ( -0.04%) [ +0.00% +0.13% +0.00% / -0.04% +0.56% +0.39%] index_copy_ linear : Elapsed 0.023 ms (2.310 ms / 100) 2.368 -> 2.371 ( +0.13%) [ +0.21% +0.00% +0.08% / +0.13% +0.25% +0.21%] index_add_ reverse : Elapsed 0.024 ms (2.373 ms / 100) 2.319 -> 2.309 ( -0.43%) [ +0.09% +0.09% +0.00% / +0.04% -0.34% -0.43%] index_copy_ reverse : Elapsed 0.023 ms (2.321 ms / 100) 2.367 -> 2.358 ( -0.38%) [ +0.17% +0.13% +0.00% / +0.04% -0.38% +0.08%] index_add_ spread : Elapsed 0.024 ms (2.371 ms / 100) 2.319 -> 2.307 ( -0.52%) [ +0.22% +0.00% +0.17% / +0.34% -0.52% -0.30%] index_copy_ spread : Elapsed 0.023 ms (2.324 ms / 100) 2.367 -> 2.366 ( -0.04%) [ +0.30% +0.25% +0.00% / +0.13% -0.04% +0.00%] index_add_ strided 3 : Elapsed 0.024 ms (2.374 ms / 100) 2.325 -> 2.314 ( -0.47%) [ +0.00% +0.17% +0.04% / -0.04% -0.43% -0.47%] index_copy_ strided 3 : Elapsed 0.023 ms (2.325 ms / 100) 2.377 -> 2.373 ( -0.17%) [ +0.04% +0.00% +0.04% / -0.17% +0.08% +0.13%] index_add_ strided 5 : Elapsed 0.024 ms (2.378 ms / 100) 2.329 -> 2.325 ( -0.17%) [ +0.00% +0.17% +0.21% / -0.17% +0.09% +0.04%] index_copy_ strided 5 : Elapsed 0.023 ms (2.329 ms / 100) 2.377 -> 2.374 ( -0.13%) [ +0.08% +0.17% +0.00% / -0.04% -0.13% -0.04%] index_add_ strided 7 : Elapsed 0.024 ms (2.379 ms / 100) 2.331 -> 2.322 ( -0.39%) [ +0.04% +0.00% +0.00% / +0.09% -0.13% -0.39%] index_copy_ strided 7 : Elapsed 0.023 ms (2.332 ms / 100) 2.385 -> 2.373 ( -0.50%) [ +0.25% +0.29% +0.00% / +0.13% -0.50% -0.38%] index_add_ perm : Elapsed 0.024 ms (2.391 ms / 100) 2.340 -> 2.328 ( -0.51%) [ +0.09% +0.17% +0.00% / +0.13% -0.34% -0.51%] index_copy_ perm : Elapsed 0.023 ms (2.342 ms / 100) 2.374 -> 2.373 ( -0.04%) [ +0.13% +0.00% +0.08% / +0.34% +0.13% -0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.377 ms / 100) 2.328 -> 2.322 ( -0.26%) [ +0.17% +0.13% +0.00% / +0.17% +0.13% -0.26%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.332 ms / 100) 4.938 -> 4.931 ( -0.14%) [ +0.06% +0.00% +0.12% / -0.14% +0.43% +0.45%] index_select const : Elapsed 0.049 ms (4.941 ms / 100) 4.964 -> 4.969 ( +0.10%) [ +0.04% +0.00% +0.10% / +0.10% +0.54% +0.24%] index_select wrap : Elapsed 0.050 ms (4.966 ms / 100) 4.977 -> 4.983 ( +0.12%) [ +0.14% +0.08% +0.00% / +0.12% +0.26% +0.28%] index_select linear : Elapsed 0.050 ms (4.984 ms / 100) 4.992 -> 4.990 ( -0.04%) [ +0.60% +0.50% +0.00% / +0.00% +0.46% -0.04%] index_select reverse : Elapsed 0.050 ms (5.022 ms / 100) 4.927 -> 4.929 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.39% +0.30%] index_select skip64 : Elapsed 0.049 ms (4.927 ms / 100) 4.927 -> 4.926 ( -0.02%) [ +0.00% +0.14% +0.04% / -0.02% +0.37% +0.41%] index_select skip256 : Elapsed 0.049 ms (4.927 ms / 100) 4.982 -> 4.987 ( +0.10%) [ +0.00% +0.18% +0.12% / +0.10% +0.40% +0.52%] index_select spread : Elapsed 0.050 ms (4.982 ms / 100) 4.966 -> 4.972 ( +0.12%) [ +0.22% +0.12% +0.00% / +0.12% +0.46% +0.52%] index_select strided 3 : Elapsed 0.050 ms (4.977 ms / 100) 4.984 -> 4.987 ( +0.06%) [ +0.10% +0.06% +0.00% / +0.06% +0.58% +0.46%] index_select random : Elapsed 0.050 ms (4.989 ms / 100) 4.982 -> 4.985 ( +0.06%) [ +0.12% +0.00% +0.04% / +0.06% +0.54% +0.56%] index_select random_sorted : Elapsed 0.050 ms (4.988 ms / 100) B = [20, 40, 16, 4] (stride (40, 1, 800, 12800)) A = [20, 40, 5, 4] (stride (40, 1, 800, 4000)) dim = 2 0.842 -> 0.839 ( -0.36%) [ +0.48% +0.24% +0.00% / +0.12% -0.24% -0.36%] index_add_ linear : Elapsed 0.008 ms (0.846 ms / 100) 0.826 -> 0.819 ( -0.85%) [ +0.24% +0.24% +0.00% / +0.24% -0.48% -0.85%] index_copy_ linear : Elapsed 0.008 ms (0.828 ms / 100) 0.833 -> 0.836 ( +0.36%) [ +0.24% +0.00% +0.12% / +0.36% +0.72% +0.48%] index_add_ reverse : Elapsed 0.008 ms (0.835 ms / 100) 0.814 -> 0.816 ( +0.25%) [ +0.00% +0.37% +0.25% / +0.25% +0.61% +0.61%] index_copy_ reverse : Elapsed 0.008 ms (0.814 ms / 100) 0.852 -> 0.837 ( -1.76%) [ +0.12% +0.00% +0.00% / +0.12% -1.76% -1.64%] index_add_ spread : Elapsed 0.009 ms (0.853 ms / 100) 0.834 -> 0.818 ( -1.92%) [ +0.36% +0.36% +0.00% / +0.36% -1.92% -1.44%] index_copy_ spread : Elapsed 0.008 ms (0.837 ms / 100) 0.852 -> 0.836 ( -1.88%) [ +0.12% +0.23% +0.00% / -0.47% -1.88% -1.76%] index_add_ strided 3 : Elapsed 0.009 ms (0.853 ms / 100) 0.832 -> 0.815 ( -2.04%) [ +0.24% +0.12% +0.00% / +0.12% -2.04% -1.80%] index_copy_ strided 3 : Elapsed 0.008 ms (0.834 ms / 100) 0.828 -> 0.827 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.12% +0.60% +0.12%] index_add_ strided 5 : Elapsed 0.008 ms (0.828 ms / 100) 0.806 -> 0.808 ( +0.25%) [ +0.37% +0.50% +0.00% / +0.25% +0.74% +0.87%] index_copy_ strided 5 : Elapsed 0.008 ms (0.809 ms / 100) 0.835 -> 0.831 ( -0.48%) [ +0.24% +0.24% +0.00% / +0.12% -0.24% -0.48%] index_add_ strided 7 : Elapsed 0.008 ms (0.837 ms / 100) 0.814 -> 0.812 ( -0.25%) [ +0.74% +0.00% +0.61% / +0.25% -0.25% +0.12%] index_copy_ strided 7 : Elapsed 0.008 ms (0.820 ms / 100) 0.831 -> 0.835 ( +0.48%) [ +0.36% +0.36% +0.00% / +0.84% +0.48% +0.60%] index_add_ perm : Elapsed 0.008 ms (0.834 ms / 100) 0.811 -> 0.813 ( +0.25%) [ +0.25% +0.00% +0.25% / +0.25% +0.49% +0.86%] index_copy_ perm : Elapsed 0.008 ms (0.813 ms / 100) 0.835 -> 0.837 ( +0.24%) [ +0.24% +0.36% +0.00% / +0.24% +0.60% +0.24%] index_add_ perm_sorted : Elapsed 0.008 ms (0.837 ms / 100) 0.812 -> 0.818 ( +0.74%) [ +0.49% +0.49% +0.00% / +0.74% +1.23% +0.74%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.816 ms / 100) 1.625 -> 1.628 ( +0.18%) [ +0.00% +0.06% +0.00% / +0.18% +0.68% +0.62%] index_select const : Elapsed 0.016 ms (1.625 ms / 100) 1.635 -> 1.635 ( +0.00%) [ +0.12% +0.00% +0.06% / +0.00% +0.55% +0.55%] index_select wrap : Elapsed 0.016 ms (1.637 ms / 100) 1.659 -> 1.637 ( -1.33%) [ +0.00% +0.18% +0.00% / +0.24% -1.33% -1.33%] index_select linear : Elapsed 0.017 ms (1.659 ms / 100) 1.661 -> 1.638 ( -1.38%) [ +0.06% +0.24% +0.00% / -0.12% -1.38% -1.14%] index_select reverse : Elapsed 0.017 ms (1.662 ms / 100) 1.629 -> 1.627 ( -0.12%) [ +0.12% +0.00% +0.06% / -0.12% +0.43% +0.31%] index_select skip64 : Elapsed 0.016 ms (1.631 ms / 100) 1.625 -> 1.629 ( +0.25%) [ +0.00% +0.06% +0.06% / +0.25% +0.68% +0.55%] index_select skip256 : Elapsed 0.016 ms (1.625 ms / 100) 1.653 -> 1.636 ( -1.03%) [ +0.24% +0.24% +0.00% / +0.12% -0.85% -1.03%] index_select spread : Elapsed 0.017 ms (1.657 ms / 100) 1.645 -> 1.640 ( -0.30%) [ +0.24% +0.06% +0.00% / -0.12% -0.30% -0.24%] index_select strided 3 : Elapsed 0.016 ms (1.649 ms / 100) 1.660 -> 1.638 ( -1.33%) [ +0.36% +0.30% +0.00% / +0.06% -1.33% -1.20%] index_select random : Elapsed 0.017 ms (1.666 ms / 100) 1.653 -> 1.641 ( -0.73%) [ +0.18% +0.00% +0.00% / +0.12% -0.73% -0.54%] index_select random_sorted : Elapsed 0.017 ms (1.656 ms / 100) out_shape = [20, 40, 5, 16] in_shape = [20, 40, 5, 4] idx_dim = 3 B = [20, 40, 5, 16] (stride (3200, 80, 16, 1)) A = [20, 40, 5, 4] (stride (1, 80, 3200, 20)) dim = 3 2.351 -> 2.355 ( +0.17%) [ +0.04% +0.30% +0.00% / +0.17% +0.68% +0.38%] index_add_ linear : Elapsed 0.024 ms (2.352 ms / 100) 2.323 -> 2.325 ( +0.09%) [ +0.30% +0.26% +0.00% / +0.09% +0.73% +0.56%] index_copy_ linear : Elapsed 0.023 ms (2.330 ms / 100) 2.354 -> 2.352 ( -0.08%) [ +0.25% +0.13% +0.00% / -0.08% +0.38% +0.51%] index_add_ reverse : Elapsed 0.024 ms (2.360 ms / 100) 2.319 -> 2.323 ( +0.17%) [ +0.34% +0.00% +0.26% / +0.17% +0.91% +1.12%] index_copy_ reverse : Elapsed 0.023 ms (2.327 ms / 100) 2.394 -> 2.394 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.71% +0.58%] index_add_ spread : Elapsed 0.024 ms (2.397 ms / 100) 2.429 -> 2.424 ( -0.21%) [ +0.04% +0.00% +0.00% / -0.21% +0.66% +0.78%] index_copy_ spread : Elapsed 0.024 ms (2.430 ms / 100) 2.392 -> 2.392 ( +0.00%) [ +0.17% +0.04% +0.00% / +0.00% +0.63% +0.67%] index_add_ strided 3 : Elapsed 0.024 ms (2.396 ms / 100) 2.426 -> 2.425 ( -0.04%) [ +0.00% +0.16% +0.00% / -0.04% +0.78% +0.70%] index_copy_ strided 3 : Elapsed 0.024 ms (2.426 ms / 100) 2.393 -> 2.394 ( +0.04%) [ +0.21% +0.00% +0.13% / +0.04% +0.33% +0.21%] index_add_ strided 5 : Elapsed 0.024 ms (2.398 ms / 100) 2.427 -> 2.430 ( +0.12%) [ +0.16% +0.00% +0.08% / +0.12% +0.49% +0.66%] index_copy_ strided 5 : Elapsed 0.024 ms (2.431 ms / 100) 2.396 -> 2.393 ( -0.13%) [ +0.04% +0.00% +0.08% / -0.13% +0.46% +0.33%] index_add_ strided 7 : Elapsed 0.024 ms (2.397 ms / 100) 2.429 -> 2.442 ( +0.54%) [ +0.00% +0.04% +0.12% / +1.61% +0.74% +0.54%] index_copy_ strided 7 : Elapsed 0.024 ms (2.429 ms / 100) 2.400 -> 2.398 ( -0.08%) [ +0.00% +0.04% +0.00% / -0.08% +0.21% +0.29%] index_add_ perm : Elapsed 0.024 ms (2.400 ms / 100) 2.424 -> 2.431 ( +0.29%) [ +0.00% +0.37% +0.21% / +0.29% +0.62% +0.83%] index_copy_ perm : Elapsed 0.024 ms (2.424 ms / 100) 2.392 -> 2.393 ( +0.04%) [ +0.17% +0.33% +0.00% / +0.04% +0.33% +0.42%] index_add_ perm_sorted : Elapsed 0.024 ms (2.396 ms / 100) 2.426 -> 2.425 ( -0.04%) [ +0.12% +0.00% +0.08% / -0.04% +0.58% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.429 ms / 100) 5.083 -> 5.089 ( +0.12%) [ +0.10% +0.00% +0.00% / +0.12% +0.43% +0.47%] index_select const : Elapsed 0.051 ms (5.088 ms / 100) 5.064 -> 5.061 ( -0.06%) [ +0.00% +0.02% +0.00% / -0.06% +0.38% +0.38%] index_select wrap : Elapsed 0.051 ms (5.064 ms / 100) 5.071 -> 5.066 ( -0.10%) [ +0.20% +0.08% +0.00% / -0.10% +0.47% +0.57%] index_select linear : Elapsed 0.051 ms (5.081 ms / 100) 5.072 -> 5.073 ( +0.02%) [ +0.08% +0.06% +0.00% / +0.02% +0.63% +0.63%] index_select reverse : Elapsed 0.051 ms (5.076 ms / 100) 5.088 -> 5.090 ( +0.04%) [ +0.00% +0.04% +0.02% / +0.04% +0.18% +0.24%] index_select skip64 : Elapsed 0.051 ms (5.088 ms / 100) 5.089 -> 5.090 ( +0.02%) [ +0.10% +0.10% +0.00% / +0.02% +0.53% +0.29%] index_select skip256 : Elapsed 0.051 ms (5.094 ms / 100) 5.070 -> 5.076 ( +0.12%) [ +0.18% +0.06% +0.00% / +0.12% +0.36% +0.30%] index_select spread : Elapsed 0.051 ms (5.079 ms / 100) 5.073 -> 5.074 ( +0.02%) [ +0.08% +0.04% +0.00% / +0.02% +0.39% +0.37%] index_select strided 3 : Elapsed 0.051 ms (5.077 ms / 100) 5.062 -> 5.060 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.61% +0.36%] index_select random : Elapsed 0.051 ms (5.064 ms / 100) 5.069 -> 5.067 ( -0.04%) [ +0.12% +0.00% +0.04% / -0.04% +0.55% +0.43%] index_select random_sorted : Elapsed 0.051 ms (5.075 ms / 100) B = [20, 40, 5, 16] (stride (80, 1600, 1, 5)) A = [20, 40, 5, 4] (stride (4, 400, 80, 1)) dim = 3 2.501 -> 2.502 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.20% +0.52%] index_add_ linear : Elapsed 0.025 ms (2.501 ms / 100) 2.452 -> 2.455 ( +0.12%) [ +0.69% +0.00% +0.08% / +0.12% +0.29% +0.45%] index_copy_ linear : Elapsed 0.025 ms (2.469 ms / 100) 2.504 -> 2.502 ( -0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.24% -0.08%] index_add_ reverse : Elapsed 0.025 ms (2.507 ms / 100) 2.457 -> 2.460 ( +0.12%) [ +0.00% +0.08% +0.16% / +0.16% +0.12% +0.12%] index_copy_ reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.512 -> 2.511 ( -0.04%) [ +0.12% +0.00% +0.04% / -0.04% +0.16% +0.12%] index_add_ spread : Elapsed 0.025 ms (2.515 ms / 100) 2.485 -> 2.486 ( +0.04%) [ +0.08% +0.12% +0.00% / +0.04% +0.04% +0.16%] index_copy_ spread : Elapsed 0.025 ms (2.487 ms / 100) 2.509 -> 2.512 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.28% +0.40%] index_add_ strided 3 : Elapsed 0.025 ms (2.512 ms / 100) 2.509 -> 2.509 ( +0.00%) [ +0.00% +0.08% +0.12% / +0.00% +0.36% +0.36%] index_copy_ strided 3 : Elapsed 0.025 ms (2.509 ms / 100) 2.474 -> 2.476 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.36% +0.12%] index_add_ strided 5 : Elapsed 0.025 ms (2.476 ms / 100) 2.447 -> 2.446 ( -0.04%) [ +0.16% +0.20% +0.00% / -0.04% +0.12% +0.33%] index_copy_ strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.483 -> 2.485 ( +0.08%) [ +0.00% +0.12% +0.16% / +0.08% +0.24% +0.40%] index_add_ strided 7 : Elapsed 0.025 ms (2.483 ms / 100) 2.466 -> 2.466 ( +0.00%) [ +0.00% +0.24% +0.16% / +0.00% +0.28% +0.73%] index_copy_ strided 7 : Elapsed 0.025 ms (2.466 ms / 100) 2.502 -> 2.501 ( -0.04%) [ +0.00% +0.04% +0.08% / -0.04% +0.48% +0.28%] index_add_ perm : Elapsed 0.025 ms (2.502 ms / 100) 2.484 -> 2.479 ( -0.20%) [ +0.00% +0.00% +0.00% / -0.20% +0.16% -0.04%] index_copy_ perm : Elapsed 0.025 ms (2.484 ms / 100) 2.507 -> 2.507 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.20% +0.12%] index_add_ perm_sorted : Elapsed 0.025 ms (2.510 ms / 100) 2.484 -> 2.487 ( +0.12%) [ +0.16% +0.08% +0.00% / +1.25% +0.20% +0.12%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.488 ms / 100) 5.545 -> 5.545 ( +0.00%) [ +0.00% +0.04% +0.20% / +0.00% +0.45% +0.67%] index_select const : Elapsed 0.055 ms (5.545 ms / 100) 5.551 -> 5.550 ( -0.02%) [ +0.04% +0.11% +0.00% / -0.02% +0.25% +0.43%] index_select wrap : Elapsed 0.056 ms (5.553 ms / 100) 5.553 -> 5.561 ( +0.14%) [ +0.16% +0.11% +0.00% / +0.14% +0.45% +0.32%] index_select linear : Elapsed 0.056 ms (5.562 ms / 100) 5.548 -> 5.549 ( +0.02%) [ +0.13% +0.00% +0.04% / +0.02% +0.43% +0.49%] index_select reverse : Elapsed 0.056 ms (5.555 ms / 100) 5.553 -> 5.556 ( +0.05%) [ +0.00% +0.09% +0.04% / +0.05% +0.47% +0.45%] index_select skip64 : Elapsed 0.056 ms (5.553 ms / 100) 5.555 -> 5.559 ( +0.07%) [ +0.00% +0.13% +0.07% / +0.07% +0.41% +0.25%] index_select skip256 : Elapsed 0.056 ms (5.555 ms / 100) 5.550 -> 5.542 ( -0.14%) [ +0.14% +0.00% +0.00% / -0.14% +0.31% +0.34%] index_select spread : Elapsed 0.056 ms (5.558 ms / 100) 5.552 -> 5.555 ( +0.05%) [ +0.07% +0.00% +0.05% / +0.05% +0.38% +0.22%] index_select strided 3 : Elapsed 0.056 ms (5.556 ms / 100) 5.558 -> 5.552 ( -0.11%) [ +0.00% +0.09% +0.00% / -0.11% +0.20% +0.32%] index_select random : Elapsed 0.056 ms (5.558 ms / 100) 5.548 -> 5.557 ( +0.16%) [ +0.16% +0.09% +0.00% / +0.16% +0.47% +0.40%] index_select random_sorted : Elapsed 0.056 ms (5.557 ms / 100) B = [20, 40, 5, 16] (stride (5, 1600, 1, 100)) A = [20, 40, 5, 4] (stride (1, 100, 20, 4000)) dim = 3 2.406 -> 2.406 ( +0.00%) [ +0.17% +0.00% +0.08% / +0.00% +0.46% +0.54%] index_add_ linear : Elapsed 0.024 ms (2.410 ms / 100) 2.356 -> 2.358 ( +0.08%) [ +0.25% +0.04% +0.00% / +0.08% +0.42% +0.21%] index_copy_ linear : Elapsed 0.024 ms (2.362 ms / 100) 2.407 -> 2.409 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.54% +0.29%] index_add_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.357 -> 2.355 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.34% +0.13%] index_copy_ reverse : Elapsed 0.024 ms (2.357 ms / 100) 2.409 -> 2.410 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.46% +0.62%] index_add_ spread : Elapsed 0.024 ms (2.411 ms / 100) 2.358 -> 2.358 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.21% +0.30%] index_copy_ spread : Elapsed 0.024 ms (2.360 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.25% +0.00% +0.08% / +0.12% +0.45% +0.54%] index_add_ strided 3 : Elapsed 0.024 ms (2.432 ms / 100) 2.368 -> 2.372 ( +0.17%) [ +0.00% +0.21% +0.13% / +0.17% +0.51% +0.42%] index_copy_ strided 3 : Elapsed 0.024 ms (2.368 ms / 100) 2.431 -> 2.431 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.45% +0.21%] index_add_ strided 5 : Elapsed 0.024 ms (2.431 ms / 100) 2.373 -> 2.375 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.63% +0.42%] index_copy_ strided 5 : Elapsed 0.024 ms (2.373 ms / 100) 2.408 -> 2.413 ( +0.21%) [ +0.12% +0.25% +0.00% / +0.21% +0.62% +0.46%] index_add_ strided 7 : Elapsed 0.024 ms (2.411 ms / 100) 2.359 -> 2.361 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.42% +0.08%] index_copy_ strided 7 : Elapsed 0.024 ms (2.361 ms / 100) 2.409 -> 2.412 ( +0.12%) [ +0.12% +0.25% +0.00% / +3.32% +0.21% +0.12%] index_add_ perm : Elapsed 0.024 ms (2.412 ms / 100) 2.358 -> 2.360 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.30% +0.08%] index_copy_ perm : Elapsed 0.024 ms (2.360 ms / 100) 2.408 -> 2.414 ( +0.25%) [ +0.12% +0.21% +0.00% / +0.25% +0.29% +0.25%] index_add_ perm_sorted : Elapsed 0.024 ms (2.411 ms / 100) 2.359 -> 2.359 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.34% +0.08%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.360 ms / 100) 5.251 -> 5.255 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.53% +0.42%] index_select const : Elapsed 0.053 ms (5.255 ms / 100) 5.270 -> 5.272 ( +0.04%) [ +0.11% +0.00% +0.04% / +0.04% +0.42% +0.59%] index_select wrap : Elapsed 0.053 ms (5.276 ms / 100) 5.279 -> 5.278 ( -0.02%) [ +0.00% +0.15% +0.04% / -0.02% +0.42% +0.32%] index_select linear : Elapsed 0.053 ms (5.279 ms / 100) 5.317 -> 5.322 ( +0.09%) [ +0.08% +0.00% +0.04% / +0.09% +0.38% +0.43%] index_select reverse : Elapsed 0.053 ms (5.321 ms / 100) 5.319 -> 5.330 ( +0.21%) [ +0.06% +0.00% +0.08% / +0.21% +0.51% +0.55%] index_select skip64 : Elapsed 0.053 ms (5.322 ms / 100) 5.270 -> 5.268 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.30% +0.40%] index_select skip256 : Elapsed 0.053 ms (5.270 ms / 100) 5.265 -> 5.276 ( +0.21%) [ +0.23% +0.00% +0.00% / +0.21% +0.23% +0.30%] index_select spread : Elapsed 0.053 ms (5.277 ms / 100) 5.277 -> 5.280 ( +0.06%) [ +0.06% +0.15% +0.00% / +0.06% +0.34% +0.28%] index_select strided 3 : Elapsed 0.053 ms (5.280 ms / 100) 5.319 -> 5.321 ( +0.04%) [ +0.21% +0.06% +0.00% / +0.04% +0.23% +0.36%] index_select random : Elapsed 0.053 ms (5.330 ms / 100) 5.250 -> 5.260 ( +0.19%) [ +0.06% +0.04% +0.00% / +0.19% +0.27% +0.36%] index_select random_sorted : Elapsed 0.053 ms (5.253 ms / 100) B = [20, 40, 5, 16] (stride (640, 1, 12800, 40)) A = [20, 40, 5, 4] (stride (20, 400, 1, 5)) dim = 3 2.529 -> 2.527 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.20% +0.40%] index_add_ linear : Elapsed 0.025 ms (2.529 ms / 100) 2.472 -> 2.471 ( -0.04%) [ +0.00% +0.12% +0.12% / -0.04% +0.28% +0.40%] index_copy_ linear : Elapsed 0.025 ms (2.472 ms / 100) 2.531 -> 2.532 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.12% +0.08% +0.04%] index_add_ reverse : Elapsed 0.025 ms (2.532 ms / 100) 2.476 -> 2.471 ( -0.20%) [ +0.16% +0.04% +0.00% / +0.16% +0.16% -0.20%] index_copy_ reverse : Elapsed 0.025 ms (2.480 ms / 100) 2.524 -> 2.528 ( +0.16%) [ +0.12% +0.00% +0.32% / +0.16% +0.28% +0.20%] index_add_ spread : Elapsed 0.025 ms (2.527 ms / 100) 2.469 -> 2.474 ( +0.20%) [ +0.20% +0.08% +0.00% / +0.32% +0.28% +0.20%] index_copy_ spread : Elapsed 0.025 ms (2.474 ms / 100) 2.528 -> 2.525 ( -0.12%) [ +0.28% +0.00% +0.04% / -0.12% +0.08% +0.24%] index_add_ strided 3 : Elapsed 0.025 ms (2.535 ms / 100) 2.469 -> 2.470 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.28% +0.28%] index_copy_ strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.530 -> 2.535 ( +0.20%) [ +0.12% +0.00% +0.00% / +1.30% +0.28% +0.20%] index_add_ strided 5 : Elapsed 0.025 ms (2.533 ms / 100) 2.474 -> 2.477 ( +0.12%) [ +0.16% +0.00% +0.04% / +0.16% +0.12% +0.12%] index_copy_ strided 5 : Elapsed 0.025 ms (2.478 ms / 100) 2.525 -> 2.525 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.36% +0.24%] index_add_ strided 7 : Elapsed 0.025 ms (2.527 ms / 100) 2.467 -> 2.469 ( +0.08%) [ +0.61% +0.16% +0.00% / +0.08% +0.36% +0.41%] index_copy_ strided 7 : Elapsed 0.025 ms (2.482 ms / 100) 2.526 -> 2.527 ( +0.04%) [ +0.00% +0.12% +0.20% / +0.04% +0.55% +0.40%] index_add_ perm : Elapsed 0.025 ms (2.526 ms / 100) 2.472 -> 2.471 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% +0.36% +0.24%] index_copy_ perm : Elapsed 0.025 ms (2.472 ms / 100) 2.531 -> 2.530 ( -0.04%) [ +0.04% +0.00% +0.20% / -0.04% +0.28% +0.16%] index_add_ perm_sorted : Elapsed 0.025 ms (2.532 ms / 100) 2.474 -> 2.479 ( +0.20%) [ +0.00% +0.16% +0.08% / +0.20% +0.44% +0.32%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.474 ms / 100) 5.434 -> 5.434 ( +0.00%) [ +0.07% +0.04% +0.00% / +0.00% +0.39% +0.53%] index_select const : Elapsed 0.054 ms (5.438 ms / 100) 5.471 -> 5.475 ( +0.07%) [ +0.09% +0.09% +0.00% / +0.07% +0.48% +0.16%] index_select wrap : Elapsed 0.055 ms (5.476 ms / 100) 5.481 -> 5.502 ( +0.38%) [ +0.24% +0.13% +0.00% / +0.38% +0.55% +0.62%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.473 -> 5.466 ( -0.13%) [ +0.00% +0.24% +0.15% / -0.13% +0.16% +0.15%] index_select reverse : Elapsed 0.055 ms (5.473 ms / 100) 5.451 -> 5.472 ( +0.39%) [ +0.00% +0.17% +0.55% / +0.42% +0.39% +0.39%] index_select skip64 : Elapsed 0.055 ms (5.451 ms / 100) 5.448 -> 5.436 ( -0.22%) [ +0.09% +0.00% +0.20% / -0.22% +0.39% +0.46%] index_select skip256 : Elapsed 0.055 ms (5.453 ms / 100) 5.472 -> 5.476 ( +0.07%) [ +0.00% +0.02% +0.09% / +0.07% +0.27% +0.46%] index_select spread : Elapsed 0.055 ms (5.472 ms / 100) 5.462 -> 5.467 ( +0.09%) [ +0.00% +0.27% +0.11% / +0.09% +0.48% +0.48%] index_select strided 3 : Elapsed 0.055 ms (5.462 ms / 100) 5.483 -> 5.480 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.29% +0.27%] index_select random : Elapsed 0.055 ms (5.483 ms / 100) 5.477 -> 5.479 ( +0.04%) [ +0.13% +0.02% +0.00% / +0.04% +0.35% +0.38%] index_select random_sorted : Elapsed 0.055 ms (5.484 ms / 100) B = [20, 40, 5, 16] (stride (40, 1, 12800, 800)) A = [20, 40, 5, 4] (stride (800, 1, 40, 200)) dim = 3 2.378 -> 2.380 ( +0.08%) [ +0.13% +0.00% +0.00% / +0.08% +0.55% +0.59%] index_add_ linear : Elapsed 0.024 ms (2.381 ms / 100) 2.330 -> 2.331 ( +0.04%) [ +0.09% +0.13% +0.00% / +0.04% +0.47% +0.30%] index_copy_ linear : Elapsed 0.023 ms (2.332 ms / 100) 2.376 -> 2.378 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.51% +0.59%] index_add_ reverse : Elapsed 0.024 ms (2.378 ms / 100) 2.326 -> 2.331 ( +0.21%) [ +0.00% +0.00% +0.00% / +0.21% +0.82% +0.56%] index_copy_ reverse : Elapsed 0.023 ms (2.326 ms / 100) 2.382 -> 2.383 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.29% +0.29%] index_add_ spread : Elapsed 0.024 ms (2.382 ms / 100) 2.334 -> 2.338 ( +0.17%) [ +0.04% +0.13% +0.00% / +0.17% +0.30% +0.51%] index_copy_ spread : Elapsed 0.023 ms (2.335 ms / 100) 2.384 -> 2.383 ( -0.04%) [ +0.13% +0.17% +0.00% / -0.04% +0.42% +0.34%] index_add_ strided 3 : Elapsed 0.024 ms (2.387 ms / 100) 2.332 -> 2.334 ( +0.09%) [ +0.00% +0.09% +0.21% / +0.09% +0.21% +0.43%] index_copy_ strided 3 : Elapsed 0.023 ms (2.332 ms / 100) 2.383 -> 2.388 ( +0.21%) [ +0.13% +0.13% +0.00% / +0.21% +0.25% +0.21%] index_add_ strided 5 : Elapsed 0.024 ms (2.386 ms / 100) 2.334 -> 2.335 ( +0.04%) [ +0.30% +0.04% +0.00% / +0.13% +0.09% +0.04%] index_copy_ strided 5 : Elapsed 0.023 ms (2.341 ms / 100) 2.380 -> 2.384 ( +0.17%) [ +0.29% +0.17% +0.00% / +0.17% +0.42% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.387 ms / 100) 2.334 -> 2.334 ( +0.00%) [ +0.26% +0.13% +0.00% / +0.00% +0.30% +0.21%] index_copy_ strided 7 : Elapsed 0.023 ms (2.340 ms / 100) 2.382 -> 2.382 ( +0.00%) [ +0.25% +0.00% +0.08% / +0.00% +0.29% +0.17%] index_add_ perm : Elapsed 0.024 ms (2.388 ms / 100) 2.336 -> 2.334 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.00% +0.13%] index_copy_ perm : Elapsed 0.023 ms (2.336 ms / 100) 2.380 -> 2.379 ( -0.04%) [ +0.25% +0.00% +0.04% / -0.04% +0.34% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.386 ms / 100) 2.327 -> 2.333 ( +0.26%) [ +0.17% +0.21% +0.00% / +0.26% +0.34% +0.30%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.331 ms / 100) 4.978 -> 4.977 ( -0.02%) [ +0.00% +0.06% +0.02% / -0.02% +0.40% +0.20%] index_select const : Elapsed 0.050 ms (4.978 ms / 100) 5.023 -> 5.026 ( +0.06%) [ +0.16% +0.20% +0.00% / +0.06% +0.44% +0.46%] index_select wrap : Elapsed 0.050 ms (5.031 ms / 100) 5.044 -> 5.050 ( +0.12%) [ +0.00% +0.02% +0.12% / +0.12% +0.46% +0.36%] index_select linear : Elapsed 0.050 ms (5.044 ms / 100) 5.003 -> 5.015 ( +0.24%) [ +0.00% +0.34% +0.16% / +0.24% +0.40% +0.60%] index_select reverse : Elapsed 0.050 ms (5.003 ms / 100) 5.006 -> 4.993 ( -0.26%) [ +0.02% +0.12% +0.00% / -0.26% +0.08% +0.24%] index_select skip64 : Elapsed 0.050 ms (5.007 ms / 100) 4.982 -> 4.980 ( -0.04%) [ +0.22% +0.20% +0.00% / -0.04% +0.30% +0.24%] index_select skip256 : Elapsed 0.050 ms (4.993 ms / 100) 5.014 -> 5.026 ( +0.24%) [ +0.14% +0.00% +0.30% / +0.24% +0.42% +0.50%] index_select spread : Elapsed 0.050 ms (5.021 ms / 100) 5.040 -> 5.042 ( +0.04%) [ +0.00% +0.02% +0.08% / +0.04% +0.14% +0.06%] index_select strided 3 : Elapsed 0.050 ms (5.040 ms / 100) 5.031 -> 5.030 ( -0.02%) [ +0.00% +0.00% +0.18% / -0.02% +0.12% +0.34%] index_select random : Elapsed 0.050 ms (5.031 ms / 100) 5.010 -> 5.013 ( +0.06%) [ +0.14% +0.08% +0.00% / +0.06% +0.38% +0.36%] index_select random_sorted : Elapsed 0.050 ms (5.017 ms / 100) B = [20, 40, 5, 16] (stride (1, 20, 800, 4000)) A = [20, 40, 5, 4] (stride (800, 1, 160, 40)) dim = 3 2.502 -> 2.503 ( +0.04%) [ +0.00% +0.24% +0.00% / +0.04% +0.16% +0.08%] index_add_ linear : Elapsed 0.025 ms (2.502 ms / 100) 2.465 -> 2.466 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.24% +0.12%] index_copy_ linear : Elapsed 0.025 ms (2.467 ms / 100) 2.499 -> 2.496 ( -0.12%) [ +0.16% +0.00% +0.08% / +0.04% -0.12% +0.08%] index_add_ reverse : Elapsed 0.025 ms (2.503 ms / 100) 2.467 -> 2.464 ( -0.12%) [ +0.00% +0.00% +0.00% / +0.00% -0.12% -0.04%] index_copy_ reverse : Elapsed 0.025 ms (2.467 ms / 100) 2.499 -> 2.496 ( -0.12%) [ +0.08% +0.12% +0.00% / -0.12% +0.08% +0.20%] index_add_ spread : Elapsed 0.025 ms (2.501 ms / 100) 2.465 -> 2.460 ( -0.20%) [ +0.00% +0.00% +0.08% / -0.16% -0.20% -0.16%] index_copy_ spread : Elapsed 0.025 ms (2.465 ms / 100) 2.496 -> 2.499 ( +0.12%) [ +0.00% +0.28% +0.04% / +0.12% +0.32% +0.36%] index_add_ strided 3 : Elapsed 0.025 ms (2.496 ms / 100) 2.458 -> 2.462 ( +0.16%) [ +0.00% +0.28% +0.28% / +0.16% +0.45% +0.24%] index_copy_ strided 3 : Elapsed 0.025 ms (2.458 ms / 100) 2.495 -> 2.502 ( +0.28%) [ +0.00% +0.04% +0.24% / +0.28% +0.56% +0.56%] index_add_ strided 5 : Elapsed 0.025 ms (2.495 ms / 100) 2.463 -> 2.469 ( +0.24%) [ +0.00% +0.20% +0.20% / +0.32% +0.28% +0.24%] index_copy_ strided 5 : Elapsed 0.025 ms (2.463 ms / 100) 2.500 -> 2.505 ( +0.20%) [ +0.04% +0.00% +0.00% / +0.88% +0.24% +0.20%] index_add_ strided 7 : Elapsed 0.025 ms (2.501 ms / 100) 2.459 -> 2.466 ( +0.28%) [ +0.16% +0.28% +0.00% / +0.28% +0.33% +0.33%] index_copy_ strided 7 : Elapsed 0.025 ms (2.463 ms / 100) 2.498 -> 2.503 ( +0.20%) [ +0.00% +0.04% +0.16% / +0.20% +0.24% +0.32%] index_add_ perm : Elapsed 0.025 ms (2.498 ms / 100) 2.464 -> 2.463 ( -0.04%) [ +0.16% +0.00% +0.16% / +0.00% -0.04% +0.12%] index_copy_ perm : Elapsed 0.025 ms (2.468 ms / 100) 2.499 -> 2.498 ( -0.04%) [ +0.04% +0.16% +0.00% / -0.04% +0.16% -0.04%] index_add_ perm_sorted : Elapsed 0.025 ms (2.500 ms / 100) 2.460 -> 2.467 ( +0.28%) [ +0.28% +0.12% +0.00% / +0.28% +0.28% +0.37%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) 5.412 -> 5.417 ( +0.09%) [ +0.22% +0.09% +0.00% / +0.09% +0.39% +0.50%] index_select const : Elapsed 0.054 ms (5.424 ms / 100) 5.443 -> 5.449 ( +0.11%) [ +0.15% +0.00% +0.17% / +0.11% +0.26% +0.20%] index_select wrap : Elapsed 0.055 ms (5.451 ms / 100) 5.465 -> 5.469 ( +0.07%) [ +0.00% +0.09% +0.04% / +0.07% +0.22% +0.26%] index_select linear : Elapsed 0.055 ms (5.465 ms / 100) 5.439 -> 5.448 ( +0.17%) [ +0.00% +0.48% +0.28% / +0.29% +0.51% +0.17%] index_select reverse : Elapsed 0.054 ms (5.439 ms / 100) 5.410 -> 5.419 ( +0.17%) [ +0.28% +0.04% +0.00% / +0.17% +0.33% +0.37%] index_select skip64 : Elapsed 0.054 ms (5.425 ms / 100) 5.414 -> 5.408 ( -0.11%) [ +0.09% +0.00% +0.00% / -0.11% +0.30% +0.31%] index_select skip256 : Elapsed 0.054 ms (5.419 ms / 100) 5.442 -> 5.456 ( +0.26%) [ +0.07% +0.00% +0.09% / +0.31% +0.26% +0.44%] index_select spread : Elapsed 0.054 ms (5.446 ms / 100) 5.447 -> 5.450 ( +0.06%) [ +0.07% +0.06% +0.00% / +0.06% +0.17% +0.24%] index_select strided 3 : Elapsed 0.055 ms (5.451 ms / 100) 5.457 -> 5.458 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.31% +0.16%] index_select random : Elapsed 0.055 ms (5.460 ms / 100) 5.459 -> 5.458 ( -0.02%) [ +0.16% +0.05% +0.00% / -0.02% +0.20% +0.22%] index_select random_sorted : Elapsed 0.055 ms (5.468 ms / 100) out_shape = [16, 4, 5, 20] in_shape = [40, 4, 5, 20] idx_dim = 0 B = [16, 4, 5, 20] (stride (5, 1600, 1, 80)) A = [40, 4, 5, 20] (stride (400, 100, 20, 1)) dim = 0 3.193 -> 3.194 ( +0.03%) [ +0.03% +0.00% +0.06% / +0.03% +0.66% +0.66%] index_select const : Elapsed 0.032 ms (3.194 ms / 100) 3.205 -> 3.206 ( +0.03%) [ +0.06% +0.00% +0.03% / +0.03% +0.56% +0.56%] index_select wrap : Elapsed 0.032 ms (3.207 ms / 100) 3.214 -> 3.214 ( +0.00%) [ +0.03% +0.09% +0.00% / +0.00% +0.28% +0.28%] index_select linear : Elapsed 0.032 ms (3.215 ms / 100) 3.193 -> 3.193 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.50%] index_select reverse : Elapsed 0.032 ms (3.193 ms / 100) 3.202 -> 3.202 ( +0.00%) [ +0.00% +0.03% +0.09% / +0.00% +0.56% +0.56%] index_select skip64 : Elapsed 0.032 ms (3.202 ms / 100) 3.195 -> 3.195 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.50% +0.47%] index_select skip256 : Elapsed 0.032 ms (3.196 ms / 100) 3.196 -> 3.197 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.44% +0.44%] index_select spread : Elapsed 0.032 ms (3.198 ms / 100) 3.200 -> 3.205 ( +0.16%) [ +0.28% +0.00% +0.00% / +0.16% +0.50% +0.63%] index_select strided 3 : Elapsed 0.032 ms (3.209 ms / 100) 3.193 -> 3.196 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.41% +0.34%] index_select strided 5 : Elapsed 0.032 ms (3.196 ms / 100) 3.206 -> 3.208 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.50% +0.50%] index_select strided 7 : Elapsed 0.032 ms (3.208 ms / 100) 3.197 -> 3.206 ( +0.28%) [ +0.06% +0.00% +0.00% / +0.28% +0.34% +0.34%] index_select strided 8 : Elapsed 0.032 ms (3.199 ms / 100) 3.198 -> 3.197 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.41% +0.41%] index_select strided 16 : Elapsed 0.032 ms (3.199 ms / 100) 3.208 -> 3.210 ( +0.06%) [ +0.00% +0.06% +0.03% / +0.06% +0.44% +0.44%] index_select random : Elapsed 0.032 ms (3.208 ms / 100) 3.193 -> 3.193 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_select random_sorted : Elapsed 0.032 ms (3.193 ms / 100) 3.216 -> 3.216 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.34% +0.34%] index_select perm : Elapsed 0.032 ms (3.217 ms / 100) 3.198 -> 3.198 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.28% +0.28%] index_select perm_sorted : Elapsed 0.032 ms (3.199 ms / 100) B = [16, 4, 5, 20] (stride (5, 1600, 1, 80)) A = [40, 4, 5, 20] (stride (5, 200, 1, 800)) dim = 0 4.206 -> 4.213 ( +0.17%) [ +0.00% +0.17% +0.19% / +0.17% +0.76% +0.71%] index_select const : Elapsed 0.042 ms (4.206 ms / 100) 4.185 -> 4.188 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.72% +0.74%] index_select wrap : Elapsed 0.042 ms (4.187 ms / 100) 4.204 -> 4.213 ( +0.21%) [ +0.00% +0.07% +0.07% / +0.21% +0.90% +1.07%] index_select linear : Elapsed 0.042 ms (4.204 ms / 100) 4.173 -> 4.186 ( +0.31%) [ +0.34% +0.00% +0.31% / +0.31% +1.17% +0.81%] index_select reverse : Elapsed 0.042 ms (4.187 ms / 100) 4.201 -> 4.216 ( +0.36%) [ +0.38% +0.38% +0.00% / +0.36% +1.12% +0.71%] index_select skip64 : Elapsed 0.042 ms (4.217 ms / 100) 4.192 -> 4.194 ( +0.05%) [ +0.00% +0.31% +0.19% / +0.05% +0.95% +0.95%] index_select skip256 : Elapsed 0.042 ms (4.192 ms / 100) 4.187 -> 4.185 ( -0.05%) [ +0.00% +0.02% +0.02% / -0.05% +0.76% +0.79%] index_select spread : Elapsed 0.042 ms (4.187 ms / 100) 4.205 -> 4.205 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_select strided 3 : Elapsed 0.042 ms (4.206 ms / 100) 4.175 -> 4.179 ( +0.10%) [ +0.00% +0.07% +0.00% / +0.10% +0.67% +0.60%] index_select strided 5 : Elapsed 0.042 ms (4.175 ms / 100) 4.183 -> 4.186 ( +0.07%) [ +0.07% +0.05% +0.00% / +0.07% +0.74% +0.84%] index_select strided 7 : Elapsed 0.042 ms (4.186 ms / 100) 4.202 -> 4.202 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.64% +0.71%] index_select strided 8 : Elapsed 0.042 ms (4.204 ms / 100) 4.178 -> 4.177 ( -0.02%) [ +0.00% +0.02% +0.07% / -0.02% +0.74% +0.77%] index_select strided 16 : Elapsed 0.042 ms (4.178 ms / 100) 4.186 -> 4.186 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.76% +0.76%] index_select random : Elapsed 0.042 ms (4.188 ms / 100) 4.194 -> 4.195 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.74% +0.83%] index_select random_sorted : Elapsed 0.042 ms (4.195 ms / 100) 4.182 -> 4.188 ( +0.14%) [ +0.00% +0.02% +0.14% / +0.14% +0.88% +0.88%] index_select perm : Elapsed 0.042 ms (4.182 ms / 100) 4.174 -> 4.184 ( +0.24%) [ +0.05% +0.24% +0.00% / +0.24% +0.67% +0.89%] index_select perm_sorted : Elapsed 0.042 ms (4.176 ms / 100) B = [16, 4, 5, 20] (stride (1, 1600, 16, 80)) A = [40, 4, 5, 20] (stride (4, 1, 3200, 160)) dim = 0 3.942 -> 3.944 ( +0.05%) [ +0.00% +0.03% +0.03% / +0.05% +0.53% +0.48%] index_select const : Elapsed 0.039 ms (3.942 ms / 100) 3.958 -> 3.959 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.40% +0.48%] index_select wrap : Elapsed 0.040 ms (3.959 ms / 100) 3.945 -> 3.944 ( -0.03%) [ +0.05% +0.00% +0.00% / -0.03% +0.53% +0.53%] index_select linear : Elapsed 0.039 ms (3.947 ms / 100) 3.944 -> 3.944 ( +0.00%) [ +0.18% +0.00% +0.03% / +0.00% +0.71% +0.53%] index_select reverse : Elapsed 0.040 ms (3.951 ms / 100) 3.948 -> 3.948 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.41% +0.33%] index_select skip64 : Elapsed 0.039 ms (3.948 ms / 100) 3.947 -> 3.946 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.43% +0.33%] index_select skip256 : Elapsed 0.039 ms (3.947 ms / 100) 3.929 -> 3.932 ( +0.08%) [ +0.05% +0.10% +0.00% / +0.08% +0.46% +0.46%] index_select spread : Elapsed 0.039 ms (3.931 ms / 100) 3.952 -> 3.958 ( +0.15%) [ +0.18% +0.05% +0.00% / +0.15% +0.46% +0.51%] index_select strided 3 : Elapsed 0.040 ms (3.959 ms / 100) 3.952 -> 3.958 ( +0.15%) [ +0.00% +0.05% +0.05% / +0.15% +0.53% +0.51%] index_select strided 5 : Elapsed 0.040 ms (3.952 ms / 100) 3.930 -> 3.932 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.53% +0.51%] index_select strided 7 : Elapsed 0.039 ms (3.931 ms / 100) 3.946 -> 3.948 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.30% +0.35%] index_select strided 8 : Elapsed 0.039 ms (3.948 ms / 100) 3.927 -> 3.924 ( -0.08%) [ +0.03% +0.20% +0.00% / -0.08% +0.51% +0.41%] index_select strided 16 : Elapsed 0.039 ms (3.928 ms / 100) 3.927 -> 3.930 ( +0.08%) [ +0.05% +0.03% +0.00% / +0.08% +0.43% +0.43%] index_select random : Elapsed 0.039 ms (3.929 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.36% +0.33%] index_select random_sorted : Elapsed 0.039 ms (3.922 ms / 100) 3.922 -> 3.927 ( +0.13%) [ +0.10% +0.03% +0.00% / +0.13% +0.61% +0.54%] index_select perm : Elapsed 0.039 ms (3.926 ms / 100) 3.942 -> 3.943 ( +0.03%) [ +0.05% +0.00% +0.08% / +0.03% +0.48% +0.43%] index_select perm_sorted : Elapsed 0.039 ms (3.944 ms / 100) B = [16, 4, 5, 20] (stride (80, 1, 1280, 4)) A = [40, 4, 5, 20] (stride (4, 1, 3200, 160)) dim = 0 3.921 -> 3.922 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.03% +0.51% +0.51%] index_select const : Elapsed 0.039 ms (3.921 ms / 100) 3.927 -> 3.927 ( +0.00%) [ +0.20% +0.00% +0.25% / +0.00% +0.87% +0.81%] index_select wrap : Elapsed 0.039 ms (3.935 ms / 100) 3.926 -> 3.925 ( -0.03%) [ +0.00% +0.05% +0.03% / -0.03% +0.71% +0.59%] index_select linear : Elapsed 0.039 ms (3.926 ms / 100) 3.918 -> 3.924 ( +0.15%) [ +0.10% +0.00% +0.13% / +0.15% +0.61% +0.74%] index_select reverse : Elapsed 0.039 ms (3.922 ms / 100) 3.917 -> 3.919 ( +0.05%) [ +0.00% +0.18% +0.13% / +0.05% +0.66% +0.64%] index_select skip64 : Elapsed 0.039 ms (3.917 ms / 100) 3.917 -> 3.923 ( +0.15%) [ +0.03% +0.00% +0.18% / +0.15% +0.66% +0.64%] index_select skip256 : Elapsed 0.039 ms (3.918 ms / 100) 3.920 -> 3.927 ( +0.18%) [ +0.18% +0.00% +0.20% / +0.18% +0.92% +0.79%] index_select spread : Elapsed 0.039 ms (3.927 ms / 100) 3.931 -> 3.932 ( +0.03%) [ +0.00% +0.08% +0.05% / +0.03% +0.61% +0.64%] index_select strided 3 : Elapsed 0.039 ms (3.931 ms / 100) 3.922 -> 3.924 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.59% +0.61%] index_select strided 5 : Elapsed 0.039 ms (3.923 ms / 100) 3.912 -> 3.925 ( +0.33%) [ +0.00% +0.20% +0.31% / +0.33% +0.95% +0.95%] index_select strided 7 : Elapsed 0.039 ms (3.912 ms / 100) 3.923 -> 3.924 ( +0.03%) [ +0.13% +0.03% +0.00% / +0.03% +0.64% +0.94%] index_select strided 8 : Elapsed 0.039 ms (3.928 ms / 100) 3.927 -> 3.928 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.69% +0.76%] index_select strided 16 : Elapsed 0.039 ms (3.927 ms / 100) 3.915 -> 3.917 ( +0.05%) [ +0.23% +0.00% +0.18% / +0.05% +0.72% +0.72%] index_select random : Elapsed 0.039 ms (3.924 ms / 100) 3.924 -> 3.922 ( -0.05%) [ +0.00% +0.03% +0.08% / -0.05% +0.79% +0.71%] index_select random_sorted : Elapsed 0.039 ms (3.924 ms / 100) 3.930 -> 3.929 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.69% +0.64%] index_select perm : Elapsed 0.039 ms (3.930 ms / 100) 3.930 -> 3.930 ( +0.00%) [ +0.10% +0.08% +0.00% / +0.00% +0.66% +0.69%] index_select perm_sorted : Elapsed 0.039 ms (3.934 ms / 100) B = [16, 4, 5, 20] (stride (20, 1, 4, 320)) A = [40, 4, 5, 20] (stride (400, 1, 80, 4)) dim = 0 3.547 -> 3.547 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.73% +0.70%] index_select const : Elapsed 0.035 ms (3.549 ms / 100) 3.553 -> 3.553 ( +0.00%) [ +0.08% +0.00% +0.06% / +0.00% +0.51% +0.51%] index_select wrap : Elapsed 0.036 ms (3.556 ms / 100) 3.562 -> 3.563 ( +0.03%) [ +0.14% +0.17% +0.00% / +0.03% +0.67% +0.62%] index_select linear : Elapsed 0.036 ms (3.567 ms / 100) 3.549 -> 3.550 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.54% +0.51%] index_select reverse : Elapsed 0.035 ms (3.549 ms / 100) 3.557 -> 3.560 ( +0.08%) [ +0.11% +0.06% +0.00% / +0.08% +0.59% +0.53%] index_select skip64 : Elapsed 0.036 ms (3.561 ms / 100) 3.551 -> 3.550 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.51% +0.48%] index_select skip256 : Elapsed 0.036 ms (3.551 ms / 100) 3.539 -> 3.541 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.37% +0.42%] index_select spread : Elapsed 0.035 ms (3.541 ms / 100) 3.547 -> 3.550 ( +0.08%) [ +0.00% +0.08% +0.06% / +0.08% +0.51% +0.39%] index_select strided 3 : Elapsed 0.035 ms (3.547 ms / 100) 3.543 -> 3.546 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.31% +0.45%] index_select strided 5 : Elapsed 0.035 ms (3.543 ms / 100) 3.555 -> 3.558 ( +0.08%) [ +0.08% +0.03% +0.00% / +0.08% +0.70% +0.51%] index_select strided 7 : Elapsed 0.036 ms (3.558 ms / 100) 3.559 -> 3.560 ( +0.03%) [ +0.14% +0.00% +0.00% / +0.03% +0.51% +0.51%] index_select strided 8 : Elapsed 0.036 ms (3.564 ms / 100) 3.561 -> 3.564 ( +0.08%) [ +0.00% +0.17% +0.00% / +0.08% +0.59% +0.56%] index_select strided 16 : Elapsed 0.036 ms (3.561 ms / 100) 3.562 -> 3.565 ( +0.08%) [ +0.03% +0.00% +0.06% / +0.08% +0.42% +0.31%] index_select random : Elapsed 0.036 ms (3.563 ms / 100) 3.557 -> 3.557 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.28% +0.34%] index_select random_sorted : Elapsed 0.036 ms (3.559 ms / 100) 3.564 -> 3.563 ( -0.03%) [ +0.17% +0.00% +0.00% / -0.03% +0.53% +0.56%] index_select perm : Elapsed 0.036 ms (3.570 ms / 100) 3.539 -> 3.544 ( +0.14%) [ +0.06% +0.00% +0.00% / +0.14% +0.37% +0.40%] index_select perm_sorted : Elapsed 0.035 ms (3.541 ms / 100) B = [16, 4, 5, 20] (stride (1, 16, 64, 320)) A = [40, 4, 5, 20] (stride (4, 1, 3200, 160)) dim = 0 3.931 -> 3.930 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.59% +0.59%] index_select const : Elapsed 0.039 ms (3.932 ms / 100) 3.937 -> 3.946 ( +0.23%) [ +0.00% +0.03% +0.25% / +0.23% +0.64% +0.86%] index_select wrap : Elapsed 0.039 ms (3.937 ms / 100) 3.935 -> 3.937 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.64% +0.58%] index_select linear : Elapsed 0.039 ms (3.939 ms / 100) 3.928 -> 3.928 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.74% +0.74%] index_select reverse : Elapsed 0.039 ms (3.929 ms / 100) 3.926 -> 3.927 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.71% +0.66%] index_select skip64 : Elapsed 0.039 ms (3.927 ms / 100) 3.929 -> 3.927 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.74% +0.64%] index_select skip256 : Elapsed 0.039 ms (3.929 ms / 100) 3.932 -> 3.939 ( +0.18%) [ +0.00% +0.05% +0.00% / +0.18% +0.86% +0.86%] index_select spread : Elapsed 0.039 ms (3.932 ms / 100) 3.937 -> 3.937 ( +0.00%) [ +0.05% +0.00% +0.15% / +0.00% +0.79% +0.64%] index_select strided 3 : Elapsed 0.039 ms (3.939 ms / 100) 3.932 -> 3.935 ( +0.08%) [ +0.03% +0.00% +0.00% / +0.08% +0.64% +0.61%] index_select strided 5 : Elapsed 0.039 ms (3.933 ms / 100) 3.918 -> 3.928 ( +0.26%) [ +0.00% +0.33% +0.26% / +0.26% +0.94% +1.05%] index_select strided 7 : Elapsed 0.039 ms (3.918 ms / 100) 3.938 -> 3.933 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.56% +0.66%] index_select strided 8 : Elapsed 0.039 ms (3.938 ms / 100) 3.938 -> 3.939 ( +0.03%) [ +0.00% +0.08% +0.05% / +0.03% +0.63% +0.81%] index_select strided 16 : Elapsed 0.039 ms (3.938 ms / 100) 3.929 -> 3.933 ( +0.10%) [ +0.03% +0.00% +0.08% / +0.10% +0.81% +0.84%] index_select random : Elapsed 0.039 ms (3.930 ms / 100) 3.936 -> 3.938 ( +0.05%) [ +0.00% +0.03% +0.03% / +0.05% +0.81% +0.79%] index_select random_sorted : Elapsed 0.039 ms (3.936 ms / 100) 3.924 -> 3.925 ( +0.03%) [ +0.00% +0.03% +0.05% / +0.03% +0.82% +0.79%] index_select perm : Elapsed 0.039 ms (3.924 ms / 100) 3.946 -> 3.950 ( +0.10%) [ +0.03% +0.00% +0.10% / +0.10% +0.68% +0.81%] index_select perm_sorted : Elapsed 0.039 ms (3.947 ms / 100) out_shape = [40, 16, 5, 20] in_shape = [40, 4, 5, 20] idx_dim = 1 B = [40, 16, 5, 20] (stride (100, 4000, 20, 1)) A = [40, 4, 5, 20] (stride (1, 4000, 40, 200)) dim = 1 2.478 -> 2.482 ( +0.16%) [ +0.12% +0.04% +0.00% / +0.16% +0.48% +0.28%] index_add_ linear : Elapsed 0.025 ms (2.481 ms / 100) 2.414 -> 2.418 ( +0.17%) [ +0.50% +0.17% +0.00% / +0.17% +0.66% +0.54%] index_copy_ linear : Elapsed 0.024 ms (2.426 ms / 100) 2.478 -> 2.484 ( +0.24%) [ +0.12% +0.28% +0.00% / +0.24% +0.40% +0.36%] index_add_ reverse : Elapsed 0.025 ms (2.481 ms / 100) 2.415 -> 2.419 ( +0.17%) [ +0.12% +0.08% +0.00% / +0.17% +0.46% +0.37%] index_copy_ reverse : Elapsed 0.024 ms (2.418 ms / 100) 2.481 -> 2.482 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.40% +0.16%] index_add_ spread : Elapsed 0.025 ms (2.481 ms / 100) 2.417 -> 2.421 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.17% +0.46% +0.37%] index_copy_ spread : Elapsed 0.024 ms (2.417 ms / 100) 2.476 -> 2.479 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.32% +0.32%] index_add_ strided 3 : Elapsed 0.025 ms (2.478 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.33% +0.50%] index_copy_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.474 -> 2.476 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.36% +0.36%] index_add_ strided 5 : Elapsed 0.025 ms (2.474 ms / 100) 2.417 -> 2.418 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.37% +0.04%] index_copy_ strided 5 : Elapsed 0.024 ms (2.417 ms / 100) 2.480 -> 2.483 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.36% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.482 ms / 100) 2.418 -> 2.423 ( +0.21%) [ +0.00% +0.00% +0.12% / +0.21% +0.25% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.418 ms / 100) 2.476 -> 2.476 ( +0.00%) [ +0.00% +0.08% +0.20% / +0.00% +0.28% +0.20%] index_add_ perm : Elapsed 0.025 ms (2.476 ms / 100) 2.416 -> 2.419 ( +0.12%) [ +0.17% +0.17% +0.00% / +0.12% +0.21% +0.12%] index_copy_ perm : Elapsed 0.024 ms (2.420 ms / 100) 2.477 -> 2.477 ( +0.00%) [ +0.04% +0.20% +0.00% / +0.00% +0.20% +0.20%] index_add_ perm_sorted : Elapsed 0.025 ms (2.478 ms / 100) 2.418 -> 2.416 ( -0.08%) [ +0.00% +0.04% +0.04% / -0.08% +0.21% +0.17%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) 5.497 -> 5.497 ( +0.00%) [ +0.05% +0.00% +0.16% / +0.00% +0.38% +0.38%] index_select const : Elapsed 0.055 ms (5.500 ms / 100) 5.480 -> 5.485 ( +0.09%) [ +0.00% +0.11% +0.02% / +0.09% +0.36% +0.36%] index_select wrap : Elapsed 0.055 ms (5.480 ms / 100) 5.498 -> 5.503 ( +0.09%) [ +0.05% +0.05% +0.00% / +0.09% +0.40% +0.51%] index_select linear : Elapsed 0.055 ms (5.501 ms / 100) 5.473 -> 5.478 ( +0.09%) [ +0.20% +0.00% +0.05% / +0.09% +0.29% +0.55%] index_select reverse : Elapsed 0.055 ms (5.484 ms / 100) 5.489 -> 5.494 ( +0.09%) [ +0.20% +0.15% +0.00% / +0.09% +0.29% +0.35%] index_select skip64 : Elapsed 0.055 ms (5.500 ms / 100) 5.504 -> 5.501 ( -0.05%) [ +0.05% +0.02% +0.00% / -0.05% +0.15% +0.25%] index_select skip256 : Elapsed 0.055 ms (5.507 ms / 100) 5.485 -> 5.487 ( +0.04%) [ +0.00% +0.07% +0.00% / +0.04% +0.24% +0.18%] index_select spread : Elapsed 0.055 ms (5.485 ms / 100) 5.492 -> 5.491 ( -0.02%) [ +0.02% +0.00% +0.04% / -0.02% +0.25% +0.16%] index_select strided 3 : Elapsed 0.055 ms (5.493 ms / 100) 5.476 -> 5.480 ( +0.07%) [ +0.05% +0.04% +0.00% / +0.07% +0.22% +0.29%] index_select random : Elapsed 0.055 ms (5.479 ms / 100) 5.490 -> 5.491 ( +0.02%) [ +0.09% +0.05% +0.00% / +0.02% +0.40% +0.46%] index_select random_sorted : Elapsed 0.055 ms (5.495 ms / 100) B = [40, 16, 5, 20] (stride (320, 20, 12800, 1)) A = [40, 4, 5, 20] (stride (5, 4000, 1, 200)) dim = 1 2.553 -> 2.555 ( +0.08%) [ +0.00% +0.20% +0.20% / +4.74% +0.08% +0.39%] index_add_ linear : Elapsed 0.026 ms (2.553 ms / 100) 2.496 -> 2.499 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.24% +0.36%] index_copy_ linear : Elapsed 0.025 ms (2.499 ms / 100) 2.553 -> 2.557 ( +0.16%) [ +0.20% +0.20% +0.00% / +0.16% +0.24% +0.27%] index_add_ reverse : Elapsed 0.026 ms (2.558 ms / 100) 2.497 -> 2.500 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.16% +0.16% +0.12%] index_copy_ reverse : Elapsed 0.025 ms (2.500 ms / 100) 2.558 -> 2.558 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.08% +0.00% +0.00%] index_add_ spread : Elapsed 0.026 ms (2.561 ms / 100) 2.509 -> 2.506 ( -0.12%) [ +0.20% +0.04% +0.00% / +0.16% -0.12% +0.00%] index_copy_ spread : Elapsed 0.025 ms (2.514 ms / 100) 2.555 -> 2.553 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.00% +0.04%] index_add_ strided 3 : Elapsed 0.026 ms (2.556 ms / 100) 2.506 -> 2.501 ( -0.20%) [ +0.08% +0.00% +0.00% / -0.04% -0.08% -0.20%] index_copy_ strided 3 : Elapsed 0.025 ms (2.508 ms / 100) 2.564 -> 2.568 ( +0.16%) [ +0.00% +0.16% +0.20% / +0.23% +0.23% +0.16%] index_add_ strided 5 : Elapsed 0.026 ms (2.564 ms / 100) 2.514 -> 2.516 ( +0.08%) [ +0.00% +0.12% +0.24% / +0.12% +0.20% +0.08%] index_copy_ strided 5 : Elapsed 0.025 ms (2.514 ms / 100) 2.559 -> 2.560 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.31% +0.12%] index_add_ strided 7 : Elapsed 0.026 ms (2.562 ms / 100) 2.515 -> 2.515 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.08% +0.16%] index_copy_ strided 7 : Elapsed 0.025 ms (2.518 ms / 100) 2.558 -> 2.563 ( +0.20%) [ +0.04% +0.20% +0.00% / +0.20% +0.23% +0.31%] index_add_ perm : Elapsed 0.026 ms (2.559 ms / 100) 2.506 -> 2.505 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.16% +0.08%] index_copy_ perm : Elapsed 0.025 ms (2.506 ms / 100) 2.559 -> 2.563 ( +0.16%) [ +0.12% +0.00% +0.31% / +0.16% +0.31% +0.20%] index_add_ perm_sorted : Elapsed 0.026 ms (2.562 ms / 100) 2.509 -> 2.506 ( -0.12%) [ +0.08% +0.00% +0.00% / -0.12% +0.24% +0.12%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.511 ms / 100) 5.740 -> 5.747 ( +0.12%) [ +0.03% +0.09% +0.00% / +0.12% +0.49% +0.52%] index_select const : Elapsed 0.057 ms (5.742 ms / 100) 5.718 -> 5.730 ( +0.21%) [ +0.00% +0.17% +0.19% / +0.26% +0.21% +0.38%] index_select wrap : Elapsed 0.057 ms (5.718 ms / 100) 5.741 -> 5.749 ( +0.14%) [ +0.05% +0.09% +0.00% / +0.14% +0.28% +0.17%] index_select linear : Elapsed 0.057 ms (5.744 ms / 100) 5.698 -> 5.702 ( +0.07%) [ +0.12% +0.07% +0.00% / +0.07% +0.40% +0.35%] index_select reverse : Elapsed 0.057 ms (5.705 ms / 100) 5.719 -> 5.732 ( +0.23%) [ +0.17% +0.14% +0.00% / +0.23% +0.38% +0.52%] index_select skip64 : Elapsed 0.057 ms (5.729 ms / 100) 5.735 -> 5.735 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.37% +0.28%] index_select skip256 : Elapsed 0.057 ms (5.735 ms / 100) 5.707 -> 5.709 ( +0.04%) [ +0.05% +0.09% +0.00% / +0.04% +0.30% +0.32%] index_select spread : Elapsed 0.057 ms (5.710 ms / 100) 5.706 -> 5.711 ( +0.09%) [ +0.05% +0.07% +0.00% / +0.09% +0.28% +0.35%] index_select strided 3 : Elapsed 0.057 ms (5.709 ms / 100) 5.724 -> 5.727 ( +0.05%) [ +0.03% +0.00% +0.05% / +0.05% +0.26% +0.24%] index_select random : Elapsed 0.057 ms (5.726 ms / 100) 5.714 -> 5.713 ( -0.02%) [ +0.04% +0.05% +0.00% / -0.02% +0.26% +0.28%] index_select random_sorted : Elapsed 0.057 ms (5.716 ms / 100) B = [40, 16, 5, 20] (stride (1, 40, 12800, 640)) A = [40, 4, 5, 20] (stride (5, 200, 1, 800)) dim = 1 2.443 -> 2.449 ( +0.25%) [ +0.08% +0.00% +0.08% / +0.25% +0.33% +0.49%] index_add_ linear : Elapsed 0.024 ms (2.445 ms / 100) 2.394 -> 2.400 ( +0.25%) [ +0.29% +0.21% +0.00% / +0.25% +0.63% +0.58%] index_copy_ linear : Elapsed 0.024 ms (2.401 ms / 100) 2.450 -> 2.452 ( +0.08%) [ +0.00% +0.08% +0.24% / +0.08% +0.45% +0.57%] index_add_ reverse : Elapsed 0.024 ms (2.450 ms / 100) 2.399 -> 2.403 ( +0.17%) [ +0.00% +0.13% +0.00% / +0.17% +0.38% +0.33%] index_copy_ reverse : Elapsed 0.024 ms (2.399 ms / 100) 2.448 -> 2.446 ( -0.08%) [ +0.00% +0.04% +0.16% / -0.08% +0.29% +0.25%] index_add_ spread : Elapsed 0.024 ms (2.448 ms / 100) 2.397 -> 2.399 ( +0.08%) [ +0.17% +0.00% +0.04% / +0.08% +0.46% +0.33%] index_copy_ spread : Elapsed 0.024 ms (2.401 ms / 100) 2.444 -> 2.450 ( +0.25%) [ +0.29% +0.08% +0.00% / +0.25% +0.70% +0.61%] index_add_ strided 3 : Elapsed 0.025 ms (2.451 ms / 100) 2.398 -> 2.403 ( +0.21%) [ +0.04% +0.00% +0.00% / +0.33% +0.21% +0.21%] index_copy_ strided 3 : Elapsed 0.024 ms (2.399 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.04% +0.20% +0.00%] index_add_ strided 5 : Elapsed 0.025 ms (2.450 ms / 100) 2.396 -> 2.398 ( +0.08%) [ +0.00% +0.21% +0.04% / +0.08% +0.29% +0.21%] index_copy_ strided 5 : Elapsed 0.024 ms (2.396 ms / 100) 2.444 -> 2.448 ( +0.16%) [ +0.00% +0.00% +0.08% / +0.16% +0.37% +0.29%] index_add_ strided 7 : Elapsed 0.024 ms (2.444 ms / 100) 2.394 -> 2.398 ( +0.17%) [ +0.21% +0.00% +0.17% / +0.17% +0.50% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.399 ms / 100) 2.453 -> 2.454 ( +0.04%) [ +0.16% +0.24% +0.00% / +0.08% +0.04% +0.37%] index_add_ perm : Elapsed 0.025 ms (2.457 ms / 100) 2.401 -> 2.403 ( +0.08%) [ +0.17% +0.21% +0.00% / +0.08% +0.21% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.405 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.20% +0.20% +0.00% / +0.08% +0.33% +0.33%] index_add_ perm_sorted : Elapsed 0.025 ms (2.451 ms / 100) 2.400 -> 2.401 ( +0.04%) [ +0.17% +0.04% +0.00% / +0.04% +0.13% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.404 ms / 100) 5.361 -> 5.357 ( -0.07%) [ +0.06% +0.09% +0.00% / -0.07% +0.41% +0.41%] index_select const : Elapsed 0.054 ms (5.364 ms / 100) 5.347 -> 5.350 ( +0.06%) [ +0.11% +0.07% +0.00% / +0.06% +0.39% +0.43%] index_select wrap : Elapsed 0.054 ms (5.353 ms / 100) 5.366 -> 5.363 ( -0.06%) [ +0.11% +0.04% +0.00% / -0.06% +0.41% +0.45%] index_select linear : Elapsed 0.054 ms (5.372 ms / 100) 5.409 -> 5.410 ( +0.02%) [ +0.00% +0.02% +0.07% / +0.02% +0.50% +0.55%] index_select reverse : Elapsed 0.054 ms (5.409 ms / 100) 5.352 -> 5.349 ( -0.06%) [ +0.00% +0.02% +0.06% / -0.06% +0.34% +0.26%] index_select skip64 : Elapsed 0.054 ms (5.352 ms / 100) 5.355 -> 5.360 ( +0.09%) [ +0.07% +0.00% +0.09% / +0.09% +0.37% +0.30%] index_select skip256 : Elapsed 0.054 ms (5.359 ms / 100) 5.333 -> 5.335 ( +0.04%) [ +0.00% +0.08% +0.02% / +0.04% +0.19% +0.21%] index_select spread : Elapsed 0.053 ms (5.333 ms / 100) 5.339 -> 5.339 ( +0.00%) [ +0.00% +0.06% +0.02% / +0.00% +0.41% +0.37%] index_select strided 3 : Elapsed 0.053 ms (5.339 ms / 100) 5.345 -> 5.345 ( +0.00%) [ +0.09% +0.07% +0.00% / +0.00% +0.34% +0.37%] index_select random : Elapsed 0.053 ms (5.350 ms / 100) 5.310 -> 5.311 ( +0.02%) [ +0.11% +0.11% +0.00% / +0.02% +0.45% +0.41%] index_select random_sorted : Elapsed 0.053 ms (5.316 ms / 100) B = [40, 16, 5, 20] (stride (80, 5, 1, 3200)) A = [40, 4, 5, 20] (stride (400, 1, 80, 4)) dim = 1 2.345 -> 2.341 ( -0.17%) [ +0.17% +0.00% +0.09% / -0.17% +0.09% +0.38%] index_add_ linear : Elapsed 0.023 ms (2.349 ms / 100) 2.314 -> 2.314 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.00% +0.30% +0.22%] index_copy_ linear : Elapsed 0.023 ms (2.317 ms / 100) 2.349 -> 2.355 ( +0.26%) [ +0.17% +0.17% +0.00% / +0.26% +0.30% +0.30%] index_add_ reverse : Elapsed 0.024 ms (2.353 ms / 100) 2.322 -> 2.321 ( -0.04%) [ +0.17% +0.04% +0.00% / +0.04% -0.04% +0.26%] index_copy_ reverse : Elapsed 0.023 ms (2.326 ms / 100) 2.351 -> 2.353 ( +0.09%) [ +0.26% +0.00% +0.13% / +0.09% +0.17% +0.26%] index_add_ spread : Elapsed 0.024 ms (2.357 ms / 100) 2.352 -> 2.355 ( +0.13%) [ +0.00% +0.04% +0.21% / +0.13% +0.30% +0.13%] index_copy_ spread : Elapsed 0.024 ms (2.352 ms / 100) 2.360 -> 2.360 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.25% +0.55%] index_add_ strided 3 : Elapsed 0.024 ms (2.364 ms / 100) 2.382 -> 2.384 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.17% +0.29% +0.08%] index_copy_ strided 3 : Elapsed 0.024 ms (2.383 ms / 100) 2.314 -> 2.313 ( -0.04%) [ +0.17% +0.30% +0.00% / -0.04% +0.30% +0.35%] index_add_ strided 5 : Elapsed 0.023 ms (2.318 ms / 100) 2.309 -> 2.311 ( +0.09%) [ +0.04% +0.17% +0.00% / +0.09% +0.13% +0.30%] index_copy_ strided 5 : Elapsed 0.023 ms (2.310 ms / 100) 2.327 -> 2.331 ( +0.17%) [ +0.09% +0.13% +0.00% / +0.17% +0.47% +0.47%] index_add_ strided 7 : Elapsed 0.023 ms (2.329 ms / 100) 2.330 -> 2.334 ( +0.17%) [ +0.17% +0.00% +0.21% / +0.17% +0.69% +0.39%] index_copy_ strided 7 : Elapsed 0.023 ms (2.334 ms / 100) 2.348 -> 2.349 ( +0.04%) [ +0.00% +0.09% +0.04% / +0.04% +0.09% +0.17%] index_add_ perm : Elapsed 0.023 ms (2.348 ms / 100) 2.344 -> 2.348 ( +0.17%) [ +0.17% +0.09% +0.00% / +0.17% +0.38% +0.47%] index_copy_ perm : Elapsed 0.023 ms (2.348 ms / 100) 2.343 -> 2.342 ( -0.04%) [ +0.13% +0.00% +0.04% / -0.04% +0.13% +0.04%] index_add_ perm_sorted : Elapsed 0.023 ms (2.346 ms / 100) 2.345 -> 2.349 ( +0.17%) [ +0.38% +0.21% +0.00% / +0.17% +0.26% +0.30%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.354 ms / 100) 4.954 -> 4.964 ( +0.20%) [ +0.16% +0.10% +0.00% / +0.20% +0.36% +0.40%] index_select const : Elapsed 0.050 ms (4.962 ms / 100) 4.960 -> 4.959 ( -0.02%) [ +0.18% +0.02% +0.00% / -0.02% +0.16% +0.22%] index_select wrap : Elapsed 0.050 ms (4.969 ms / 100) 4.959 -> 4.952 ( -0.14%) [ +0.02% +0.02% +0.00% / -0.14% +0.24% +0.20%] index_select linear : Elapsed 0.050 ms (4.960 ms / 100) 4.970 -> 4.972 ( +0.04%) [ +0.02% +0.02% +0.00% / +0.04% +0.28% +0.32%] index_select reverse : Elapsed 0.050 ms (4.971 ms / 100) 4.949 -> 4.949 ( +0.00%) [ +0.02% +0.00% +0.06% / +0.00% +0.30% +0.30%] index_select skip64 : Elapsed 0.050 ms (4.950 ms / 100) 4.958 -> 4.963 ( +0.10%) [ +0.04% +0.00% +0.00% / +0.10% +0.26% +0.28%] index_select skip256 : Elapsed 0.050 ms (4.960 ms / 100) 4.958 -> 4.958 ( +0.00%) [ +0.10% +0.10% +0.00% / +0.00% +0.36% +0.26%] index_select spread : Elapsed 0.050 ms (4.963 ms / 100) 4.968 -> 4.971 ( +0.06%) [ +0.02% +0.08% +0.00% / +0.06% +0.28% +0.24%] index_select strided 3 : Elapsed 0.050 ms (4.969 ms / 100) 4.952 -> 4.951 ( -0.02%) [ +0.00% +0.02% +0.10% / -0.02% +0.30% +0.22%] index_select random : Elapsed 0.050 ms (4.952 ms / 100) 4.965 -> 4.969 ( +0.08%) [ +0.24% +0.08% +0.00% / +0.08% +0.44% +0.36%] index_select random_sorted : Elapsed 0.050 ms (4.977 ms / 100) B = [40, 16, 5, 20] (stride (80, 5, 1, 3200)) A = [40, 4, 5, 20] (stride (1, 4000, 800, 40)) dim = 1 2.487 -> 2.491 ( +0.16%) [ +0.24% +0.00% +0.20% / +0.16% +0.56% +0.48%] index_add_ linear : Elapsed 0.025 ms (2.493 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.24% +0.00% +0.16% / +0.04% +0.49% +0.37%] index_copy_ linear : Elapsed 0.025 ms (2.456 ms / 100) 2.490 -> 2.498 ( +0.32%) [ +0.00% +0.16% +0.12% / +0.32% +0.56% +0.68%] index_add_ reverse : Elapsed 0.025 ms (2.490 ms / 100) 2.452 -> 2.456 ( +0.16%) [ +0.00% +0.16% +0.04% / +0.16% +0.53% +0.45%] index_copy_ reverse : Elapsed 0.025 ms (2.452 ms / 100) 2.508 -> 2.508 ( +0.00%) [ +0.12% +0.00% +0.04% / +0.00% +0.48% +0.56%] index_add_ spread : Elapsed 0.025 ms (2.511 ms / 100) 2.489 -> 2.498 ( +0.36%) [ +0.16% +0.00% +0.04% / +0.36% +0.40% +0.64%] index_copy_ spread : Elapsed 0.025 ms (2.493 ms / 100) 2.500 -> 2.509 ( +0.36%) [ +0.00% +0.04% +0.16% / +0.36% +0.44% +0.60%] index_add_ strided 3 : Elapsed 0.025 ms (2.500 ms / 100) 2.508 -> 2.512 ( +0.16%) [ +0.12% +0.04% +0.00% / +0.16% +0.36% +0.44%] index_copy_ strided 3 : Elapsed 0.025 ms (2.511 ms / 100) 2.473 -> 2.470 ( -0.12%) [ +0.00% +0.08% +0.04% / -0.12% +0.08% +0.00%] index_add_ strided 5 : Elapsed 0.025 ms (2.473 ms / 100) 2.449 -> 2.452 ( +0.12%) [ +0.24% +0.29% +0.00% / +0.12% +0.45% +0.45%] index_copy_ strided 5 : Elapsed 0.025 ms (2.455 ms / 100) 2.477 -> 2.482 ( +0.20%) [ +0.32% +0.16% +0.00% / +0.20% +0.48% +0.44%] index_add_ strided 7 : Elapsed 0.025 ms (2.485 ms / 100) 2.474 -> 2.473 ( -0.04%) [ +0.16% +0.04% +0.00% / -0.04% +0.36% +0.24%] index_copy_ strided 7 : Elapsed 0.025 ms (2.478 ms / 100) 2.489 -> 2.491 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.24% +0.08% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.491 ms / 100) 2.467 -> 2.470 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.16% +0.12% +0.24%] index_copy_ perm : Elapsed 0.025 ms (2.469 ms / 100) 2.488 -> 2.486 ( -0.08%) [ +0.24% +0.00% +0.04% / -0.08% +0.16% +0.16%] index_add_ perm_sorted : Elapsed 0.025 ms (2.494 ms / 100) 2.462 -> 2.469 ( +0.28%) [ +0.28% +0.24% +0.00% / +0.28% +0.37% +0.49%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.469 ms / 100) 5.587 -> 5.589 ( +0.04%) [ +0.07% +0.00% +0.09% / +0.04% +0.45% +0.36%] index_select const : Elapsed 0.056 ms (5.591 ms / 100) 5.579 -> 5.586 ( +0.13%) [ +0.00% +0.07% +0.09% / +0.13% +0.30% +0.39%] index_select wrap : Elapsed 0.056 ms (5.579 ms / 100) 5.594 -> 5.595 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.43% +0.43%] index_select linear : Elapsed 0.056 ms (5.597 ms / 100) 5.568 -> 5.578 ( +0.18%) [ +0.16% +0.00% +0.11% / +0.18% +0.45% +0.45%] index_select reverse : Elapsed 0.056 ms (5.577 ms / 100) 5.584 -> 5.583 ( -0.02%) [ +0.07% +0.00% +0.00% / -0.02% +0.13% +0.38%] index_select skip64 : Elapsed 0.056 ms (5.588 ms / 100) 5.591 -> 5.597 ( +0.11%) [ +0.14% +0.07% +0.00% / +0.11% +0.25% +0.36%] index_select skip256 : Elapsed 0.056 ms (5.599 ms / 100) 5.581 -> 5.577 ( -0.07%) [ +0.00% +0.02% +0.04% / -0.07% +0.13% +0.13%] index_select spread : Elapsed 0.056 ms (5.581 ms / 100) 5.588 -> 5.587 ( -0.02%) [ +0.00% +0.14% +0.05% / -0.02% +0.38% +0.25%] index_select strided 3 : Elapsed 0.056 ms (5.588 ms / 100) 5.578 -> 5.580 ( +0.04%) [ +0.00% +0.07% +0.16% / +0.04% +0.32% +0.47%] index_select random : Elapsed 0.056 ms (5.578 ms / 100) 5.586 -> 5.584 ( -0.04%) [ +0.00% +0.09% +0.13% / -0.04% +0.16% +0.50%] index_select random_sorted : Elapsed 0.056 ms (5.586 ms / 100) out_shape = [40, 4, 16, 20] in_shape = [40, 4, 5, 20] idx_dim = 2 B = [40, 4, 16, 20] (stride (1280, 320, 20, 1)) A = [40, 4, 5, 20] (stride (400, 100, 20, 1)) dim = 2 0.825 -> 0.823 ( -0.24%) [ +0.00% +0.00% +0.24% / -0.24% +0.24% -0.24%] index_add_ linear : Elapsed 0.008 ms (0.825 ms / 100) 0.801 -> 0.803 ( +0.25%) [ +0.87% +0.25% +0.00% / +0.25% +0.37% +0.25%] index_copy_ linear : Elapsed 0.008 ms (0.808 ms / 100) 0.824 -> 0.823 ( -0.12%) [ +0.24% +0.12% +0.00% / -0.12% -0.12% +0.36%] index_add_ reverse : Elapsed 0.008 ms (0.826 ms / 100) 0.803 -> 0.802 ( -0.12%) [ +0.00% +0.12% +0.00% / +0.37% -0.12% +0.00%] index_copy_ reverse : Elapsed 0.008 ms (0.803 ms / 100) 0.877 -> 0.861 ( -1.82%) [ +0.23% +0.00% +0.00% / +0.68% -1.82% -1.71%] index_add_ spread : Elapsed 0.009 ms (0.879 ms / 100) 0.855 -> 0.844 ( -1.29%) [ +0.23% +0.00% +0.82% / +0.47% -1.29% -1.29%] index_copy_ spread : Elapsed 0.009 ms (0.857 ms / 100) 0.876 -> 0.858 ( -2.05%) [ +0.46% +0.00% +0.23% / -0.11% -1.71% -2.05%] index_add_ strided 3 : Elapsed 0.009 ms (0.880 ms / 100) 0.856 -> 0.842 ( -1.64%) [ +0.12% +0.35% +0.00% / -0.12% -1.52% -1.64%] index_copy_ strided 3 : Elapsed 0.009 ms (0.857 ms / 100) 0.834 -> 0.832 ( -0.24%) [ +0.48% +0.12% +0.00% / +0.24% -0.24% -0.24%] index_add_ strided 5 : Elapsed 0.008 ms (0.838 ms / 100) 0.817 -> 0.814 ( -0.37%) [ +0.24% +0.24% +0.00% / +0.12% -0.37% -0.37%] index_copy_ strided 5 : Elapsed 0.008 ms (0.819 ms / 100) 0.841 -> 0.835 ( -0.71%) [ +0.83% +0.00% +0.59% / +0.71% -0.48% -0.71%] index_add_ strided 7 : Elapsed 0.008 ms (0.848 ms / 100) 0.825 -> 0.822 ( -0.36%) [ +0.48% +0.00% +0.12% / +1.09% -0.12% -0.36%] index_copy_ strided 7 : Elapsed 0.008 ms (0.829 ms / 100) 0.830 -> 0.830 ( +0.00%) [ +0.24% +0.00% +0.24% / +0.60% +0.00% +0.00%] index_add_ perm : Elapsed 0.008 ms (0.832 ms / 100) 0.813 -> 0.809 ( -0.49%) [ +0.12% +0.37% +0.00% / -0.12% -0.12% -0.49%] index_copy_ perm : Elapsed 0.008 ms (0.814 ms / 100) 0.837 -> 0.836 ( -0.12%) [ +0.12% +0.12% +0.00% / +0.00% -0.12% -0.12%] index_add_ perm_sorted : Elapsed 0.008 ms (0.838 ms / 100) 0.816 -> 0.817 ( +0.12%) [ +0.00% +0.25% +0.37% / +0.25% +0.49% +0.12%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.816 ms / 100) 1.583 -> 1.583 ( +0.00%) [ +0.00% +0.32% +0.13% / +0.00% +0.25% +0.25%] index_select const : Elapsed 0.016 ms (1.583 ms / 100) 1.588 -> 1.586 ( -0.13%) [ +0.00% +0.31% +0.25% / +0.06% -0.13% +0.25%] index_select wrap : Elapsed 0.016 ms (1.588 ms / 100) 1.594 -> 1.592 ( -0.13%) [ +0.00% +0.19% +0.31% / -0.13% +0.31% +0.19%] index_select linear : Elapsed 0.016 ms (1.594 ms / 100) 1.590 -> 1.586 ( -0.25%) [ +0.13% +0.06% +0.00% / -0.25% +0.13% +0.25%] index_select reverse : Elapsed 0.016 ms (1.592 ms / 100) 1.579 -> 1.582 ( +0.19%) [ +0.13% +0.13% +0.00% / +0.19% +0.38% +0.82%] index_select skip64 : Elapsed 0.016 ms (1.581 ms / 100) 1.579 -> 1.580 ( +0.06%) [ +0.06% +0.19% +0.00% / +0.06% +0.32% +0.76%] index_select skip256 : Elapsed 0.016 ms (1.580 ms / 100) 1.588 -> 1.588 ( +0.00%) [ +0.00% +0.25% +0.00% / +0.00% +0.76% +0.44%] index_select spread : Elapsed 0.016 ms (1.588 ms / 100) 1.592 -> 1.591 ( -0.06%) [ +0.00% +0.13% +0.06% / -0.06% +0.38% +0.19%] index_select strided 3 : Elapsed 0.016 ms (1.592 ms / 100) 1.585 -> 1.589 ( +0.25%) [ +0.44% +0.00% +0.25% / +0.25% +0.63% +0.63%] index_select random : Elapsed 0.016 ms (1.592 ms / 100) 1.584 -> 1.587 ( +0.19%) [ +0.06% +0.25% +0.00% / +0.19% +0.44% +0.69%] index_select random_sorted : Elapsed 0.016 ms (1.585 ms / 100) B = [40, 4, 16, 20] (stride (1280, 320, 20, 1)) A = [40, 4, 5, 20] (stride (20, 4000, 800, 1)) dim = 2 2.247 -> 2.243 ( -0.18%) [ +0.00% +0.13% +0.22% / -0.18% +0.58% +0.49%] index_add_ linear : Elapsed 0.022 ms (2.247 ms / 100) 2.192 -> 2.186 ( -0.27%) [ +0.09% +0.00% +0.18% / -0.27% +0.64% +0.41%] index_copy_ linear : Elapsed 0.022 ms (2.194 ms / 100) 2.240 -> 2.249 ( +0.40%) [ +0.00% +0.36% +0.00% / +0.54% +0.40% +0.49%] index_add_ reverse : Elapsed 0.022 ms (2.240 ms / 100) 2.179 -> 2.188 ( +0.41%) [ +0.00% +0.50% +0.41% / +0.41% +0.96% +0.87%] index_copy_ reverse : Elapsed 0.022 ms (2.179 ms / 100) 2.243 -> 2.244 ( +0.04%) [ +0.22% +0.13% +0.00% / +0.36% +0.13% +0.04%] index_add_ spread : Elapsed 0.022 ms (2.248 ms / 100) 2.193 -> 2.198 ( +0.23%) [ +0.18% +0.32% +0.00% / +0.23% +0.64% +0.68%] index_copy_ spread : Elapsed 0.022 ms (2.197 ms / 100) 2.252 -> 2.248 ( -0.18%) [ +0.18% +0.13% +0.00% / -0.09% -0.18% +0.00%] index_add_ strided 3 : Elapsed 0.023 ms (2.256 ms / 100) 2.205 -> 2.201 ( -0.18%) [ +0.00% +0.00% +0.00% / -0.18% +0.14% +0.00%] index_copy_ strided 3 : Elapsed 0.022 ms (2.205 ms / 100) 2.253 -> 2.263 ( +0.44%) [ +0.36% +0.49% +0.00% / +0.49% +0.44% +0.62%] index_add_ strided 5 : Elapsed 0.023 ms (2.261 ms / 100) 2.211 -> 2.212 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.32% +0.32%] index_copy_ strided 5 : Elapsed 0.022 ms (2.211 ms / 100) 2.256 -> 2.256 ( +0.00%) [ +0.18% +0.35% +0.00% / +0.00% +0.18% +0.13%] index_add_ strided 7 : Elapsed 0.023 ms (2.260 ms / 100) 2.215 -> 2.213 ( -0.09%) [ +0.00% +0.23% +0.00% / -0.09% +0.23% -0.05%] index_copy_ strided 7 : Elapsed 0.022 ms (2.215 ms / 100) 2.248 -> 2.246 ( -0.09%) [ +0.13% +0.00% +0.00% / -0.09% +0.13% -0.04%] index_add_ perm : Elapsed 0.023 ms (2.251 ms / 100) 2.197 -> 2.205 ( +0.36%) [ +0.09% +0.00% +0.46% / +0.50% +0.36% +0.50%] index_copy_ perm : Elapsed 0.022 ms (2.199 ms / 100) 2.240 -> 2.247 ( +0.31%) [ +0.31% +0.00% +0.40% / +0.31% +0.49% +0.54%] index_add_ perm_sorted : Elapsed 0.022 ms (2.247 ms / 100) 2.196 -> 2.199 ( +0.14%) [ +0.00% +0.09% +0.00% / +0.14% +0.23% +0.55%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.196 ms / 100) 4.486 -> 4.486 ( +0.00%) [ +0.00% +0.45% +0.22% / +0.00% +0.82% +1.07%] index_select const : Elapsed 0.045 ms (4.486 ms / 100) 4.533 -> 4.534 ( +0.02%) [ +0.24% +0.18% +0.00% / +0.02% +0.95% +0.82%] index_select wrap : Elapsed 0.045 ms (4.544 ms / 100) 4.588 -> 4.589 ( +0.02%) [ +0.00% +0.09% +0.00% / +0.02% +0.61% +0.59%] index_select linear : Elapsed 0.046 ms (4.588 ms / 100) 4.564 -> 4.562 ( -0.04%) [ +0.18% +0.09% +0.00% / -0.04% +0.70% +0.50%] index_select reverse : Elapsed 0.046 ms (4.572 ms / 100) 4.498 -> 4.507 ( +0.20%) [ +0.07% +0.04% +0.00% / +0.20% +0.80% +0.78%] index_select skip64 : Elapsed 0.045 ms (4.501 ms / 100) 4.500 -> 4.496 ( -0.09%) [ +0.18% +0.09% +0.00% / -0.09% +0.93% +0.76%] index_select skip256 : Elapsed 0.045 ms (4.508 ms / 100) 4.544 -> 4.549 ( +0.11%) [ +0.00% +0.07% +0.07% / +0.11% +0.73% +0.70%] index_select spread : Elapsed 0.045 ms (4.544 ms / 100) 4.531 -> 4.553 ( +0.49%) [ +0.00% +0.15% +0.53% / +0.49% +1.04% +1.32%] index_select strided 3 : Elapsed 0.045 ms (4.531 ms / 100) 4.545 -> 4.558 ( +0.29%) [ +0.13% +0.00% +0.18% / +0.29% +0.79% +0.77%] index_select random : Elapsed 0.046 ms (4.551 ms / 100) 4.546 -> 4.556 ( +0.22%) [ +0.02% +0.33% +0.00% / +0.22% +0.75% +0.62%] index_select random_sorted : Elapsed 0.045 ms (4.547 ms / 100) B = [40, 4, 16, 20] (stride (1280, 1, 80, 4)) A = [40, 4, 5, 20] (stride (20, 5, 1, 800)) dim = 2 2.376 -> 2.380 ( +0.17%) [ +0.29% +0.21% +0.00% / +0.17% +0.55% +0.67%] index_add_ linear : Elapsed 0.024 ms (2.383 ms / 100) 2.306 -> 2.306 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.13% +0.43%] index_copy_ linear : Elapsed 0.023 ms (2.309 ms / 100) 2.374 -> 2.375 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.51% +0.59%] index_add_ reverse : Elapsed 0.024 ms (2.376 ms / 100) 2.298 -> 2.304 ( +0.26%) [ +0.30% +0.00% +0.17% / +0.26% +0.57% +0.61%] index_copy_ reverse : Elapsed 0.023 ms (2.305 ms / 100) 2.381 -> 2.378 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.67% +0.71%] index_add_ spread : Elapsed 0.024 ms (2.381 ms / 100) 2.305 -> 2.304 ( -0.04%) [ +0.09% +0.04% +0.00% / -0.04% +0.69% +0.61%] index_copy_ spread : Elapsed 0.023 ms (2.307 ms / 100) 2.378 -> 2.379 ( +0.04%) [ +0.17% +0.00% +0.00% / +0.04% +0.76% +0.63%] index_add_ strided 3 : Elapsed 0.024 ms (2.382 ms / 100) 2.296 -> 2.306 ( +0.44%) [ +0.35% +0.61% +0.00% / +0.44% +1.18% +1.00%] index_copy_ strided 3 : Elapsed 0.023 ms (2.304 ms / 100) 2.371 -> 2.369 ( -0.08%) [ +0.00% +0.04% +0.13% / -0.08% +0.93% +0.89%] index_add_ strided 5 : Elapsed 0.024 ms (2.371 ms / 100) 2.295 -> 2.289 ( -0.26%) [ +0.00% +0.09% +0.31% / -0.26% +0.92% +0.87%] index_copy_ strided 5 : Elapsed 0.023 ms (2.295 ms / 100) 2.371 -> 2.375 ( +0.17%) [ +0.21% +0.00% +0.21% / +0.17% +0.93% +1.10%] index_add_ strided 7 : Elapsed 0.024 ms (2.376 ms / 100) 2.298 -> 2.299 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.87% +1.00%] index_copy_ strided 7 : Elapsed 0.023 ms (2.298 ms / 100) 2.374 -> 2.380 ( +0.25%) [ +0.29% +0.08% +0.00% / +0.25% +0.88% +1.01%] index_add_ perm : Elapsed 0.024 ms (2.381 ms / 100) 2.298 -> 2.305 ( +0.30%) [ +0.26% +0.00% +0.26% / +0.30% +1.17% +1.04%] index_copy_ perm : Elapsed 0.023 ms (2.304 ms / 100) 2.381 -> 2.379 ( -0.08%) [ +0.08% +0.17% +0.00% / -0.08% +0.76% +0.59%] index_add_ perm_sorted : Elapsed 0.024 ms (2.383 ms / 100) 2.304 -> 2.306 ( +0.09%) [ +0.04% +0.04% +0.00% / +0.09% +0.78% +0.69%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.305 ms / 100) 5.018 -> 5.045 ( +0.54%) [ +0.00% +0.22% +0.22% / +0.54% +0.82% +0.74%] index_select const : Elapsed 0.050 ms (5.018 ms / 100) 5.002 -> 5.011 ( +0.18%) [ +0.02% +0.14% +0.00% / +0.18% +0.86% +1.00%] index_select wrap : Elapsed 0.050 ms (5.003 ms / 100) 5.082 -> 5.090 ( +0.16%) [ +0.00% +0.04% +0.06% / +0.16% +0.57% +0.49%] index_select linear : Elapsed 0.051 ms (5.082 ms / 100) 4.997 -> 5.004 ( +0.14%) [ +0.86% +1.34% +0.00% / +0.14% +1.20% +1.18%] index_select reverse : Elapsed 0.050 ms (5.040 ms / 100) 5.020 -> 5.028 ( +0.16%) [ +0.36% +0.58% +0.00% / +0.16% +1.04% +1.04%] index_select skip64 : Elapsed 0.050 ms (5.038 ms / 100) 5.023 -> 5.059 ( +0.72%) [ +0.24% +0.00% +0.38% / +0.72% +1.15% +1.11%] index_select skip256 : Elapsed 0.050 ms (5.035 ms / 100) 5.016 -> 5.015 ( -0.02%) [ +0.00% +0.14% +0.04% / -0.02% +0.96% +0.96%] index_select spread : Elapsed 0.050 ms (5.016 ms / 100) 5.041 -> 5.050 ( +0.18%) [ +0.08% +0.08% +0.00% / +0.18% +0.81% +0.97%] index_select strided 3 : Elapsed 0.050 ms (5.045 ms / 100) 4.999 -> 5.003 ( +0.08%) [ +0.30% +0.12% +0.00% / +0.08% +0.84% +0.74%] index_select random : Elapsed 0.050 ms (5.014 ms / 100) 5.010 -> 5.010 ( +0.00%) [ +0.00% +0.30% +0.02% / +0.00% +1.00% +1.04%] index_select random_sorted : Elapsed 0.050 ms (5.010 ms / 100) B = [40, 4, 16, 20] (stride (1280, 16, 1, 64)) dim = 2 fill_cnt = 5 1.187 -> 1.189 ( +0.17%) [ +0.08% +0.17% +0.00% / +0.17% +0.34% +0.17%] index_fill_ const : Elapsed 0.012 ms (1.188 ms / 100) 1.188 -> 1.192 ( +0.34%) [ +0.17% +0.17% +0.00% / +0.34% +0.42% +0.34%] index_fill_ linear : Elapsed 0.012 ms (1.190 ms / 100) 1.189 -> 1.191 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.25% +0.17% +0.17%] index_fill_ reverse : Elapsed 0.012 ms (1.189 ms / 100) 1.189 -> 1.187 ( -0.17%) [ +0.08% +0.00% +0.00% / -0.17% +0.00% -0.08%] index_fill_ skip64 : Elapsed 0.012 ms (1.190 ms / 100) 1.188 -> 1.187 ( -0.08%) [ +0.17% +0.08% +0.00% / -0.08% +0.25% +0.17%] index_fill_ skip256 : Elapsed 0.012 ms (1.190 ms / 100) 1.244 -> 1.241 ( -0.24%) [ +0.00% +0.08% +0.00% / -0.24% +0.08% +0.40%] index_fill_ spread : Elapsed 0.012 ms (1.244 ms / 100) 1.241 -> 1.245 ( +0.32%) [ +0.00% +0.32% +0.40% / +0.32% +0.64% +0.48%] index_fill_ strided 3 : Elapsed 0.012 ms (1.241 ms / 100) 1.241 -> 1.244 ( +0.24%) [ +0.40% +0.16% +0.00% / +0.24% +0.40% +0.40%] index_fill_ strided 5 : Elapsed 0.012 ms (1.246 ms / 100) 1.244 -> 1.242 ( -0.16%) [ +0.00% +0.00% +0.24% / -0.16% +0.40% +0.24%] index_fill_ strided 7 : Elapsed 0.012 ms (1.244 ms / 100) 1.241 -> 1.245 ( +0.32%) [ +0.56% +0.56% +0.00% / +0.32% +0.64% +0.64%] index_fill_ strided 8 : Elapsed 0.012 ms (1.248 ms / 100) 1.240 -> 1.239 ( -0.08%) [ +0.32% +0.00% +0.16% / -0.08% +0.56% +0.73%] index_fill_ random : Elapsed 0.012 ms (1.244 ms / 100) 1.243 -> 1.242 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.72% +0.64%] index_fill_ random_sorted : Elapsed 0.012 ms (1.243 ms / 100) 1.242 -> 1.243 ( +0.08%) [ +0.40% +0.00% +0.32% / +0.08% +0.72% +0.48%] index_fill_ perm : Elapsed 0.012 ms (1.247 ms / 100) 1.242 -> 1.246 ( +0.32%) [ +0.24% +0.00% +0.16% / +0.32% +0.64% +0.56%] index_fill_ perm_sorted : Elapsed 0.012 ms (1.245 ms / 100) B = [40, 4, 16, 20] (stride (1280, 16, 1, 64)) A = [40, 4, 5, 20] (stride (1, 200, 40, 800)) dim = 2 2.574 -> 2.575 ( +0.04%) [ +0.16% +0.23% +0.00% / +0.04% +0.58% +0.35%] index_add_ linear : Elapsed 0.026 ms (2.578 ms / 100) 2.523 -> 2.528 ( +0.20%) [ +0.16% +0.00% +0.00% / +0.20% +0.63% +0.52%] index_copy_ linear : Elapsed 0.025 ms (2.527 ms / 100) 2.569 -> 2.573 ( +0.16%) [ +0.19% +0.00% +0.00% / +0.16% +0.58% +0.66%] index_add_ reverse : Elapsed 0.026 ms (2.574 ms / 100) 2.518 -> 2.523 ( +0.20%) [ +0.16% +0.16% +0.00% / +0.20% +0.44% +0.52%] index_copy_ reverse : Elapsed 0.025 ms (2.522 ms / 100) 2.599 -> 2.602 ( +0.12%) [ +0.23% +0.00% +0.00% / +0.12% +0.50% +0.54%] index_add_ spread : Elapsed 0.026 ms (2.605 ms / 100) 2.589 -> 2.592 ( +0.12%) [ +0.08% +0.00% +0.12% / +0.12% +0.31% +0.31%] index_copy_ spread : Elapsed 0.026 ms (2.591 ms / 100) 2.604 -> 2.605 ( +0.04%) [ +0.19% +0.08% +0.00% / +0.04% +0.54% +0.61%] index_add_ strided 3 : Elapsed 0.026 ms (2.609 ms / 100) 2.593 -> 2.593 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +0.31% +0.31%] index_copy_ strided 3 : Elapsed 0.026 ms (2.596 ms / 100) 2.598 -> 2.598 ( +0.00%) [ +0.04% +0.12% +0.00% / +0.00% +0.38% +0.38%] index_add_ strided 5 : Elapsed 0.026 ms (2.599 ms / 100) 2.588 -> 2.588 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.19% +0.31%] index_copy_ strided 5 : Elapsed 0.026 ms (2.588 ms / 100) 2.593 -> 2.598 ( +0.19%) [ +0.23% +0.00% +0.12% / +0.19% +0.62% +0.39%] index_add_ strided 7 : Elapsed 0.026 ms (2.599 ms / 100) 2.585 -> 2.585 ( +0.00%) [ +0.08% +0.00% +0.39% / +0.00% +0.35% +0.23%] index_copy_ strided 7 : Elapsed 0.026 ms (2.587 ms / 100) 2.602 -> 2.603 ( +0.04%) [ +0.35% +0.35% +0.00% / +0.04% +0.61% +0.61%] index_add_ perm : Elapsed 0.026 ms (2.611 ms / 100) 2.591 -> 2.594 ( +0.12%) [ +0.15% +0.23% +0.00% / +0.12% +0.35% +0.31%] index_copy_ perm : Elapsed 0.026 ms (2.595 ms / 100) 2.598 -> 2.599 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.58% +0.50%] index_add_ perm_sorted : Elapsed 0.026 ms (2.598 ms / 100) 2.588 -> 2.586 ( -0.08%) [ +0.00% +0.15% +0.15% / -0.08% +0.50% +0.46%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.588 ms / 100) 5.658 -> 5.679 ( +0.37%) [ +0.28% +0.28% +0.00% / +0.37% +0.83% +0.87%] index_select const : Elapsed 0.057 ms (5.674 ms / 100) 5.684 -> 5.687 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.48% +0.49%] index_select wrap : Elapsed 0.057 ms (5.688 ms / 100) 5.700 -> 5.701 ( +0.02%) [ +0.07% +0.07% +0.00% / +0.02% +0.53% +0.56%] index_select linear : Elapsed 0.057 ms (5.704 ms / 100) 5.682 -> 5.684 ( +0.04%) [ +0.11% +0.07% +0.00% / +0.04% +0.53% +0.42%] index_select reverse : Elapsed 0.057 ms (5.688 ms / 100) 5.654 -> 5.657 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.65% +0.74%] index_select skip64 : Elapsed 0.057 ms (5.658 ms / 100) 5.648 -> 5.645 ( -0.05%) [ +0.00% +0.19% +0.11% / -0.05% +0.51% +0.76%] index_select skip256 : Elapsed 0.056 ms (5.648 ms / 100) 5.681 -> 5.683 ( +0.04%) [ +0.12% +0.00% +0.07% / +0.04% +0.53% +0.46%] index_select spread : Elapsed 0.057 ms (5.688 ms / 100) 5.684 -> 5.686 ( +0.04%) [ +0.02% +0.05% +0.00% / +0.04% +0.56% +0.56%] index_select strided 3 : Elapsed 0.057 ms (5.685 ms / 100) 5.684 -> 5.690 ( +0.11%) [ +0.00% +0.11% +0.04% / +0.11% +0.63% +0.79%] index_select random : Elapsed 0.057 ms (5.684 ms / 100) 5.659 -> 5.661 ( +0.04%) [ +0.12% +0.00% +0.14% / +0.04% +0.71% +0.67%] index_select random_sorted : Elapsed 0.057 ms (5.666 ms / 100) B = [40, 4, 16, 20] (stride (320, 12800, 20, 1)) A = [40, 4, 5, 20] (stride (400, 1, 80, 4)) dim = 2 2.294 -> 2.297 ( +0.13%) [ +0.39% +0.26% +0.00% / +0.13% +1.53% +1.92%] index_add_ linear : Elapsed 0.023 ms (2.303 ms / 100) 2.245 -> 2.247 ( +0.09%) [ +0.00% +0.18% +0.04% / +0.09% +1.43% +1.56%] index_copy_ linear : Elapsed 0.022 ms (2.245 ms / 100) 2.299 -> 2.300 ( +0.04%) [ +0.04% +0.00% +0.09% / +0.04% +1.74% +1.83%] index_add_ reverse : Elapsed 0.023 ms (2.300 ms / 100) 2.249 -> 2.254 ( +0.22%) [ +0.00% +0.13% +0.00% / +0.22% +1.56% +1.69%] index_copy_ reverse : Elapsed 0.022 ms (2.249 ms / 100) 2.293 -> 2.291 ( -0.09%) [ +0.00% +0.17% +0.00% / -0.09% +1.88% +1.70%] index_add_ spread : Elapsed 0.023 ms (2.293 ms / 100) 2.251 -> 2.250 ( -0.04%) [ +0.00% +0.00% +0.09% / -0.04% +1.82% +1.42%] index_copy_ spread : Elapsed 0.023 ms (2.251 ms / 100) 2.292 -> 2.290 ( -0.09%) [ +0.17% +0.13% +0.00% / -0.09% +1.44% +1.53%] index_add_ strided 3 : Elapsed 0.023 ms (2.296 ms / 100) 2.254 -> 2.257 ( +0.13%) [ +0.00% +0.31% +0.09% / +0.13% +1.24% +1.42%] index_copy_ strided 3 : Elapsed 0.023 ms (2.254 ms / 100) 2.293 -> 2.300 ( +0.31%) [ +0.00% +0.13% +0.35% / +0.31% +2.01% +2.05%] index_add_ strided 5 : Elapsed 0.023 ms (2.293 ms / 100) 2.256 -> 2.257 ( +0.04%) [ +0.13% +0.22% +0.00% / +0.04% +1.37% +1.68%] index_copy_ strided 5 : Elapsed 0.023 ms (2.259 ms / 100) 2.289 -> 2.290 ( +0.04%) [ +0.17% +0.09% +0.00% / +0.04% +1.83% +1.97%] index_add_ strided 7 : Elapsed 0.023 ms (2.293 ms / 100) 2.255 -> 2.257 ( +0.09%) [ +0.00% +0.04% +0.00% / +0.09% +1.51% +1.69%] index_copy_ strided 7 : Elapsed 0.023 ms (2.255 ms / 100) 2.293 -> 2.302 ( +0.39%) [ +0.52% +0.00% +0.31% / +0.39% +1.53% +1.53%] index_add_ perm : Elapsed 0.023 ms (2.305 ms / 100) 2.254 -> 2.262 ( +0.35%) [ +0.44% +0.00% +0.40% / +0.35% +1.15% +1.11%] index_copy_ perm : Elapsed 0.023 ms (2.264 ms / 100) 2.289 -> 2.300 ( +0.48%) [ +0.09% +0.09% +0.00% / +0.48% +2.01% +1.75%] index_add_ perm_sorted : Elapsed 0.023 ms (2.291 ms / 100) 2.249 -> 2.253 ( +0.18%) [ +0.00% +0.27% +0.13% / +0.18% +1.56% +1.29%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.249 ms / 100) 4.858 -> 4.863 ( +0.10%) [ +0.31% +0.33% +0.00% / +0.10% +0.86% +0.86%] index_select const : Elapsed 0.049 ms (4.873 ms / 100) 4.870 -> 4.862 ( -0.16%) [ +0.16% +0.16% +0.00% / -0.16% +0.97% +0.94%] index_select wrap : Elapsed 0.049 ms (4.878 ms / 100) 4.892 -> 4.893 ( +0.02%) [ +0.10% +0.04% +0.00% / +0.02% +0.94% +1.00%] index_select linear : Elapsed 0.049 ms (4.897 ms / 100) 4.875 -> 4.890 ( +0.31%) [ +0.31% +0.00% +0.27% / +0.31% +1.27% +0.86%] index_select reverse : Elapsed 0.049 ms (4.890 ms / 100) 4.873 -> 4.869 ( -0.08%) [ +0.21% +0.02% +0.00% / -0.08% +0.86% +0.80%] index_select skip64 : Elapsed 0.049 ms (4.883 ms / 100) 4.855 -> 4.861 ( +0.12%) [ +0.06% +0.00% +0.14% / +0.12% +0.80% +0.87%] index_select skip256 : Elapsed 0.049 ms (4.858 ms / 100) 4.868 -> 4.877 ( +0.18%) [ +0.00% +0.16% +0.14% / +0.18% +1.15% +1.19%] index_select spread : Elapsed 0.049 ms (4.868 ms / 100) 4.858 -> 4.889 ( +0.64%) [ +0.68% +0.68% +0.00% / +0.64% +1.44% +1.79%] index_select strided 3 : Elapsed 0.049 ms (4.891 ms / 100) 4.872 -> 4.878 ( +0.12%) [ +0.14% +0.18% +0.00% / +0.12% +1.09% +1.21%] index_select random : Elapsed 0.049 ms (4.879 ms / 100) 4.835 -> 4.830 ( -0.10%) [ +0.08% +0.10% +0.00% / -0.10% +1.22% +1.34%] index_select random_sorted : Elapsed 0.048 ms (4.839 ms / 100) B = [40, 4, 16, 20] (stride (20, 12800, 800, 1)) A = [40, 4, 5, 20] (stride (1, 4000, 800, 40)) dim = 2 2.551 -> 2.553 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.47%] index_add_ linear : Elapsed 0.026 ms (2.553 ms / 100) 2.470 -> 2.469 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.08% +0.00%] index_copy_ linear : Elapsed 0.025 ms (2.470 ms / 100) 2.544 -> 2.548 ( +0.16%) [ +0.28% +0.24% +0.00% / +0.16% +0.51% +0.59%] index_add_ reverse : Elapsed 0.026 ms (2.551 ms / 100) 2.467 -> 2.468 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.08% +0.16%] index_copy_ reverse : Elapsed 0.025 ms (2.469 ms / 100) 2.550 -> 2.551 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.04% +0.51% +0.47%] index_add_ spread : Elapsed 0.026 ms (2.551 ms / 100) 2.468 -> 2.471 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.12% +0.32% +0.16%] index_copy_ spread : Elapsed 0.025 ms (2.470 ms / 100) 2.552 -> 2.554 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.35% +0.43%] index_add_ strided 3 : Elapsed 0.026 ms (2.552 ms / 100) 2.468 -> 2.468 ( +0.00%) [ +0.04% +0.00% +0.12% / +0.00% +0.08% +0.24%] index_copy_ strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.546 -> 2.548 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.35% +0.31%] index_add_ strided 5 : Elapsed 0.025 ms (2.550 ms / 100) 2.467 -> 2.468 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.12% +0.08% +0.04%] index_copy_ strided 5 : Elapsed 0.025 ms (2.468 ms / 100) 2.550 -> 2.552 ( +0.08%) [ +0.00% +0.20% +0.08% / +0.08% +0.20% +0.08%] index_add_ strided 7 : Elapsed 0.025 ms (2.550 ms / 100) 2.470 -> 2.468 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.04% +0.00% -0.08%] index_copy_ strided 7 : Elapsed 0.025 ms (2.470 ms / 100) 2.552 -> 2.553 ( +0.04%) [ +0.12% +0.04% +0.00% / +0.04% +0.27% +0.31%] index_add_ perm : Elapsed 0.026 ms (2.555 ms / 100) 2.470 -> 2.468 ( -0.08%) [ +0.16% +0.00% +0.12% / -0.08% +0.00% +0.36%] index_copy_ perm : Elapsed 0.025 ms (2.474 ms / 100) 2.553 -> 2.557 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.24% +0.47%] index_add_ perm_sorted : Elapsed 0.026 ms (2.557 ms / 100) 2.468 -> 2.476 ( +0.32%) [ +0.32% +0.08% +0.00% / +0.36% +0.32% +0.41%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.476 ms / 100) 5.579 -> 5.581 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.57% +0.56%] index_select const : Elapsed 0.056 ms (5.579 ms / 100) 5.599 -> 5.605 ( +0.11%) [ +0.05% +0.02% +0.00% / +0.11% +0.50% +0.41%] index_select wrap : Elapsed 0.056 ms (5.602 ms / 100) 5.608 -> 5.607 ( -0.02%) [ +0.05% +0.09% +0.00% / -0.02% +0.50% +0.45%] index_select linear : Elapsed 0.056 ms (5.611 ms / 100) 5.596 -> 5.606 ( +0.18%) [ +0.05% +0.09% +0.00% / +0.18% +0.52% +0.57%] index_select reverse : Elapsed 0.056 ms (5.599 ms / 100) 5.581 -> 5.583 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.50% +0.50%] index_select skip64 : Elapsed 0.056 ms (5.583 ms / 100) 5.579 -> 5.585 ( +0.11%) [ +0.00% +0.00% +0.04% / +0.11% +0.56% +0.54%] index_select skip256 : Elapsed 0.056 ms (5.579 ms / 100) 5.582 -> 5.590 ( +0.14%) [ +0.11% +0.11% +0.00% / +0.14% +0.56% +0.59%] index_select spread : Elapsed 0.056 ms (5.588 ms / 100) 5.596 -> 5.603 ( +0.13%) [ +0.11% +0.09% +0.00% / +0.13% +0.52% +0.59%] index_select strided 3 : Elapsed 0.056 ms (5.602 ms / 100) 5.592 -> 5.612 ( +0.36%) [ +0.04% +0.00% +0.04% / +0.36% +0.52% +0.64%] index_select random : Elapsed 0.056 ms (5.594 ms / 100) 5.578 -> 5.579 ( +0.02%) [ +0.07% +0.13% +0.00% / +0.02% +0.65% +0.68%] index_select random_sorted : Elapsed 0.056 ms (5.582 ms / 100) B = [40, 4, 16, 20] (stride (1, 12800, 800, 40)) A = [40, 4, 5, 20] (stride (80, 20, 3200, 1)) dim = 2 2.224 -> 2.238 ( +0.63%) [ +0.31% +0.27% +0.00% / +0.63% +0.99% +1.12%] index_add_ linear : Elapsed 0.022 ms (2.231 ms / 100) 2.178 -> 2.189 ( +0.51%) [ +0.00% +0.18% +0.18% / +0.51% +1.06% +0.96%] index_copy_ linear : Elapsed 0.022 ms (2.178 ms / 100) 2.215 -> 2.225 ( +0.45%) [ +0.50% +0.00% +0.36% / +0.45% +1.08% +0.99%] index_add_ reverse : Elapsed 0.022 ms (2.226 ms / 100) 2.174 -> 2.173 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +1.10% +0.74%] index_copy_ reverse : Elapsed 0.022 ms (2.174 ms / 100) 2.229 -> 2.233 ( +0.18%) [ +0.36% +0.22% +0.00% / +0.18% +0.81% +0.67%] index_add_ spread : Elapsed 0.022 ms (2.237 ms / 100) 2.185 -> 2.186 ( +0.05%) [ +0.05% +0.14% +0.00% / +0.05% +0.73% +0.69%] index_copy_ spread : Elapsed 0.022 ms (2.186 ms / 100) 2.227 -> 2.244 ( +0.76%) [ +0.22% +0.18% +0.00% / +5.66% +0.90% +0.76%] index_add_ strided 3 : Elapsed 0.022 ms (2.232 ms / 100) 2.179 -> 2.186 ( +0.32%) [ +0.14% +0.14% +0.00% / +0.32% +0.92% +0.96%] index_copy_ strided 3 : Elapsed 0.022 ms (2.182 ms / 100) 2.223 -> 2.228 ( +0.22%) [ +0.00% +0.27% +0.22% / +0.22% +0.67% +0.63%] index_add_ strided 5 : Elapsed 0.022 ms (2.223 ms / 100) 2.178 -> 2.178 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.64% +0.60%] index_copy_ strided 5 : Elapsed 0.022 ms (2.178 ms / 100) 2.234 -> 2.231 ( -0.13%) [ +0.27% +0.22% +0.00% / +0.13% -0.13% +0.09%] index_add_ strided 7 : Elapsed 0.022 ms (2.240 ms / 100) 2.182 -> 2.183 ( +0.05%) [ +0.32% +0.23% +0.00% / +0.05% +0.18% +0.23%] index_copy_ strided 7 : Elapsed 0.022 ms (2.189 ms / 100) 2.222 -> 2.230 ( +0.36%) [ +0.00% +0.36% +0.14% / +0.36% +0.99% +0.95%] index_add_ perm : Elapsed 0.022 ms (2.222 ms / 100) 2.178 -> 2.180 ( +0.09%) [ +0.00% +0.18% +0.18% / +0.09% +1.24% +0.92%] index_copy_ perm : Elapsed 0.022 ms (2.178 ms / 100) 2.229 -> 2.234 ( +0.22%) [ +0.00% +0.04% +0.00% / +0.27% +0.22% +0.27%] index_add_ perm_sorted : Elapsed 0.022 ms (2.229 ms / 100) 2.179 -> 2.186 ( +0.32%) [ +0.00% +0.00% +0.18% / +0.32% +0.60% +0.73%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.179 ms / 100) 4.421 -> 4.423 ( +0.05%) [ +0.00% +0.09% +0.00% / +0.05% +0.81% +0.77%] index_select const : Elapsed 0.044 ms (4.421 ms / 100) 4.464 -> 4.464 ( +0.00%) [ +0.02% +0.00% +0.04% / +0.00% +0.90% +0.81%] index_select wrap : Elapsed 0.045 ms (4.465 ms / 100) 4.481 -> 4.486 ( +0.11%) [ +0.09% +0.13% +0.00% / +0.11% +0.74% +0.96%] index_select linear : Elapsed 0.045 ms (4.485 ms / 100) 4.468 -> 4.475 ( +0.16%) [ +0.20% +0.00% +0.02% / +0.16% +0.90% +0.92%] index_select reverse : Elapsed 0.045 ms (4.477 ms / 100) 4.411 -> 4.417 ( +0.14%) [ +0.18% +0.00% +0.00% / +0.14% +0.95% +0.66%] index_select skip64 : Elapsed 0.044 ms (4.419 ms / 100) 4.420 -> 4.424 ( +0.09%) [ +0.11% +0.20% +0.00% / +0.09% +0.81% +0.86%] index_select skip256 : Elapsed 0.044 ms (4.425 ms / 100) 4.447 -> 4.452 ( +0.11%) [ +0.16% +0.29% +0.00% / +0.11% +1.12% +1.15%] index_select spread : Elapsed 0.045 ms (4.454 ms / 100) 4.480 -> 4.481 ( +0.02%) [ +0.07% +0.07% +0.00% / +0.02% +0.80% +0.80%] index_select strided 3 : Elapsed 0.045 ms (4.483 ms / 100) 4.456 -> 4.455 ( -0.02%) [ +0.07% +0.00% +0.00% / -0.02% +1.26% +1.14%] index_select random : Elapsed 0.045 ms (4.459 ms / 100) 4.473 -> 4.473 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.85% +0.83%] index_select random_sorted : Elapsed 0.045 ms (4.473 ms / 100) B = [40, 4, 16, 20] (stride (64, 1, 4, 2560)) A = [40, 4, 5, 20] (stride (80, 20, 3200, 1)) dim = 2 2.234 -> 2.235 ( +0.04%) [ +0.00% +0.18% +0.45% / +0.04% +0.90% +1.16%] index_add_ linear : Elapsed 0.022 ms (2.234 ms / 100) 2.197 -> 2.199 ( +0.09%) [ +0.00% +0.46% +0.05% / +0.09% +0.41% +0.41%] index_copy_ linear : Elapsed 0.022 ms (2.197 ms / 100) 2.228 -> 2.232 ( +0.18%) [ +0.27% +0.40% +0.00% / +0.18% +1.12% +1.03%] index_add_ reverse : Elapsed 0.022 ms (2.234 ms / 100) 2.193 -> 2.194 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.55% +0.14%] index_copy_ reverse : Elapsed 0.022 ms (2.195 ms / 100) 2.267 -> 2.265 ( -0.09%) [ +0.26% +0.13% +0.00% / -0.09% +0.00% +0.04%] index_add_ spread : Elapsed 0.023 ms (2.273 ms / 100) 2.239 -> 2.226 ( -0.58%) [ +0.00% +0.00% +0.18% / +0.18% -0.49% -0.58%] index_copy_ spread : Elapsed 0.022 ms (2.239 ms / 100) 2.267 -> 2.273 ( +0.26%) [ +0.22% +0.18% +0.00% / +0.26% +0.44% +0.40%] index_add_ strided 3 : Elapsed 0.023 ms (2.272 ms / 100) 2.243 -> 2.231 ( -0.53%) [ +0.09% +0.00% +0.31% / -0.04% -0.53% -0.45%] index_copy_ strided 3 : Elapsed 0.022 ms (2.245 ms / 100) 2.255 -> 2.256 ( +0.04%) [ +0.27% +0.22% +0.00% / +0.35% +0.13% +0.04%] index_add_ strided 5 : Elapsed 0.023 ms (2.261 ms / 100) 2.222 -> 2.201 ( -0.95%) [ +0.05% +0.00% +0.05% / +0.09% -0.72% -0.95%] index_copy_ strided 5 : Elapsed 0.022 ms (2.223 ms / 100) 2.263 -> 2.256 ( -0.31%) [ +0.00% +0.22% +0.13% / -0.09% -0.22% -0.31%] index_add_ strided 7 : Elapsed 0.023 ms (2.263 ms / 100) 2.242 -> 2.220 ( -0.98%) [ +0.09% +0.00% +0.04% / +0.09% -0.98% -0.89%] index_copy_ strided 7 : Elapsed 0.022 ms (2.244 ms / 100) 2.264 -> 2.259 ( -0.22%) [ +0.27% +0.18% +0.00% / +0.49% -0.22% -0.22%] index_add_ perm : Elapsed 0.023 ms (2.270 ms / 100) 2.246 -> 2.221 ( -1.11%) [ +0.00% +0.09% +0.09% / +0.58% -0.58% -1.11%] index_copy_ perm : Elapsed 0.022 ms (2.246 ms / 100) 2.256 -> 2.261 ( +0.22%) [ +0.04% +0.00% +0.22% / +0.66% +0.22% +0.22%] index_add_ perm_sorted : Elapsed 0.023 ms (2.257 ms / 100) 2.238 -> 2.223 ( -0.67%) [ +0.00% +0.04% +0.04% / +0.49% -0.40% -0.67%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.238 ms / 100) 4.455 -> 4.458 ( +0.07%) [ +0.00% +0.16% +0.07% / +0.07% +0.45% +0.61%] index_select const : Elapsed 0.045 ms (4.455 ms / 100) 4.485 -> 4.493 ( +0.18%) [ +0.09% +0.22% +0.00% / +0.18% +0.29% +0.33%] index_select wrap : Elapsed 0.045 ms (4.489 ms / 100) 4.496 -> 4.507 ( +0.24%) [ +0.00% +0.09% +0.11% / +0.24% +0.42% +0.31%] index_select linear : Elapsed 0.045 ms (4.496 ms / 100) 4.499 -> 4.503 ( +0.09%) [ +0.18% +0.00% +0.20% / +0.09% +0.29% +0.36%] index_select reverse : Elapsed 0.045 ms (4.507 ms / 100) 4.464 -> 4.470 ( +0.13%) [ +0.09% +0.27% +0.00% / +0.43% +0.20% +0.13%] index_select skip64 : Elapsed 0.045 ms (4.468 ms / 100) 4.473 -> 4.463 ( -0.22%) [ +0.02% +0.07% +0.00% / -0.22% -0.04% +0.04%] index_select skip256 : Elapsed 0.045 ms (4.474 ms / 100) 4.496 -> 4.502 ( +0.13%) [ +0.00% +0.11% +0.13% / +0.13% +0.22% +0.24%] index_select spread : Elapsed 0.045 ms (4.496 ms / 100) 4.484 -> 4.489 ( +0.11%) [ +0.20% +0.00% +0.07% / +0.11% +0.27% +0.20%] index_select strided 3 : Elapsed 0.045 ms (4.493 ms / 100) 4.471 -> 4.475 ( +0.09%) [ +0.04% +0.04% +0.00% / +0.09% +0.49% +0.45%] index_select random : Elapsed 0.045 ms (4.473 ms / 100) 4.486 -> 4.495 ( +0.20%) [ +0.02% +0.09% +0.00% / +0.20% +0.38% +0.38%] index_select random_sorted : Elapsed 0.045 ms (4.487 ms / 100) B = [40, 4, 16, 20] (stride (1, 640, 40, 2560)) A = [40, 4, 5, 20] (stride (1, 4000, 800, 40)) dim = 2 2.550 -> 2.557 ( +0.27%) [ +0.20% +0.24% +0.00% / +0.27% +0.86% +0.90%] index_add_ linear : Elapsed 0.026 ms (2.555 ms / 100) 2.486 -> 2.487 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.44% +0.64%] index_copy_ linear : Elapsed 0.025 ms (2.488 ms / 100) 2.554 -> 2.559 ( +0.20%) [ +0.04% +0.16% +0.00% / +0.20% +0.86% +0.78%] index_add_ reverse : Elapsed 0.026 ms (2.555 ms / 100) 2.486 -> 2.488 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.08% +0.80% +0.48%] index_copy_ reverse : Elapsed 0.025 ms (2.487 ms / 100) 2.550 -> 2.554 ( +0.16%) [ +0.04% +0.00% +0.00% / +0.16% +0.75% +0.75%] index_add_ spread : Elapsed 0.026 ms (2.551 ms / 100) 2.479 -> 2.476 ( -0.12%) [ +0.04% +0.04% +0.00% / -0.12% +0.81% +0.69%] index_copy_ spread : Elapsed 0.025 ms (2.480 ms / 100) 2.548 -> 2.551 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.12% +0.94% +0.86%] index_add_ strided 3 : Elapsed 0.026 ms (2.550 ms / 100) 2.480 -> 2.479 ( -0.04%) [ +0.12% +0.08% +0.00% / -0.04% +0.73% +0.56%] index_copy_ strided 3 : Elapsed 0.025 ms (2.483 ms / 100) 2.549 -> 2.554 ( +0.20%) [ +0.20% +0.20% +0.00% / +0.20% +0.82% +1.02%] index_add_ strided 5 : Elapsed 0.026 ms (2.554 ms / 100) 2.479 -> 2.491 ( +0.48%) [ +0.08% +0.20% +0.00% / +0.48% +0.85% +0.81%] index_copy_ strided 5 : Elapsed 0.025 ms (2.481 ms / 100) 2.548 -> 2.560 ( +0.47%) [ +0.24% +0.00% +0.27% / +0.47% +0.86% +0.86%] index_add_ strided 7 : Elapsed 0.026 ms (2.554 ms / 100) 2.482 -> 2.488 ( +0.24%) [ +0.08% +0.08% +0.00% / +0.24% +0.60% +0.60%] index_copy_ strided 7 : Elapsed 0.025 ms (2.484 ms / 100) 2.554 -> 2.558 ( +0.16%) [ +0.08% +0.00% +0.04% / +0.16% +0.43% +0.67%] index_add_ perm : Elapsed 0.026 ms (2.556 ms / 100) 2.481 -> 2.484 ( +0.12%) [ +0.24% +0.00% +0.24% / +0.12% +0.60% +0.64%] index_copy_ perm : Elapsed 0.025 ms (2.487 ms / 100) 2.551 -> 2.555 ( +0.16%) [ +0.04% +0.00% +0.16% / +0.16% +0.74% +0.78%] index_add_ perm_sorted : Elapsed 0.026 ms (2.552 ms / 100) 2.479 -> 2.478 ( -0.04%) [ +0.00% +0.20% +0.04% / -0.04% +0.97% +0.93%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.479 ms / 100) 5.654 -> 5.657 ( +0.05%) [ +0.00% +0.11% +0.02% / +0.05% +0.67% +0.73%] index_select const : Elapsed 0.057 ms (5.654 ms / 100) 5.676 -> 5.679 ( +0.05%) [ +0.11% +0.16% +0.00% / +0.05% +0.70% +0.67%] index_select wrap : Elapsed 0.057 ms (5.682 ms / 100) 5.673 -> 5.680 ( +0.12%) [ +0.11% +0.14% +0.00% / +0.12% +0.74% +0.76%] index_select linear : Elapsed 0.057 ms (5.679 ms / 100) 5.662 -> 5.671 ( +0.16%) [ +0.19% +0.23% +0.00% / +0.16% +0.62% +0.69%] index_select reverse : Elapsed 0.057 ms (5.673 ms / 100) 5.659 -> 5.663 ( +0.07%) [ +0.16% +0.00% +0.18% / +0.07% +0.62% +0.67%] index_select skip64 : Elapsed 0.057 ms (5.668 ms / 100) 5.651 -> 5.651 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.73% +0.64%] index_select skip256 : Elapsed 0.057 ms (5.660 ms / 100) 5.659 -> 5.660 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.53% +0.58%] index_select spread : Elapsed 0.057 ms (5.660 ms / 100) 5.668 -> 5.673 ( +0.09%) [ +0.11% +0.18% +0.00% / +0.09% +0.88% +0.60%] index_select strided 3 : Elapsed 0.057 ms (5.674 ms / 100) 5.654 -> 5.655 ( +0.02%) [ +0.11% +0.05% +0.00% / +0.02% +0.69% +0.71%] index_select random : Elapsed 0.057 ms (5.660 ms / 100) 5.655 -> 5.656 ( +0.02%) [ +0.09% +0.07% +0.00% / +0.02% +0.85% +0.85%] index_select random_sorted : Elapsed 0.057 ms (5.660 ms / 100) out_shape = [40, 4, 5, 16] in_shape = [40, 4, 5, 20] idx_dim = 3 B = [40, 4, 5, 16] (stride (320, 80, 16, 1)) A = [40, 4, 5, 20] (stride (100, 4000, 1, 5)) dim = 3 3.303 -> 3.302 ( -0.03%) [ +0.00% +0.12% +0.00% / -0.03% +0.82% +0.67%] index_select const : Elapsed 0.033 ms (3.303 ms / 100) 3.307 -> 3.310 ( +0.09%) [ +0.00% +0.00% +0.03% / +0.09% +1.00% +1.00%] index_select wrap : Elapsed 0.033 ms (3.307 ms / 100) 3.299 -> 3.300 ( +0.03%) [ +0.00% +0.06% +0.09% / +0.03% +0.70% +0.82%] index_select linear : Elapsed 0.033 ms (3.299 ms / 100) 3.299 -> 3.298 ( -0.03%) [ +0.03% +0.06% +0.00% / -0.03% +0.88% +0.91%] index_select reverse : Elapsed 0.033 ms (3.300 ms / 100) 3.320 -> 3.327 ( +0.21%) [ +0.21% +0.15% +0.00% / +0.21% +0.87% +0.84%] index_select skip64 : Elapsed 0.033 ms (3.327 ms / 100) 3.313 -> 3.316 ( +0.09%) [ +0.00% +0.00% +0.03% / +0.09% +0.85% +0.72%] index_select skip256 : Elapsed 0.033 ms (3.313 ms / 100) 3.314 -> 3.312 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.75% +0.72%] index_select spread : Elapsed 0.033 ms (3.314 ms / 100) 3.307 -> 3.307 ( +0.00%) [ +0.03% +0.00% +0.06% / +0.00% +0.67% +0.76%] index_select strided 3 : Elapsed 0.033 ms (3.308 ms / 100) 3.300 -> 3.301 ( +0.03%) [ +0.03% +0.00% +0.06% / +0.03% +0.73% +0.79%] index_select strided 5 : Elapsed 0.033 ms (3.301 ms / 100) 3.298 -> 3.301 ( +0.09%) [ +0.12% +0.03% +0.00% / +0.09% +0.76% +0.85%] index_select strided 7 : Elapsed 0.033 ms (3.302 ms / 100) 3.308 -> 3.309 ( +0.03%) [ +0.09% +0.00% +0.03% / +0.03% +0.79% +0.70%] index_select strided 8 : Elapsed 0.033 ms (3.311 ms / 100) 3.306 -> 3.310 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.67% +0.79%] index_select strided 16 : Elapsed 0.033 ms (3.310 ms / 100) 3.320 -> 3.319 ( -0.03%) [ +0.00% +0.06% +0.06% / -0.03% +0.45% +0.45%] index_select random : Elapsed 0.033 ms (3.320 ms / 100) 3.304 -> 3.311 ( +0.21%) [ +0.30% +0.36% +0.00% / +0.21% +0.82% +0.58%] index_select random_sorted : Elapsed 0.033 ms (3.314 ms / 100) 3.307 -> 3.308 ( +0.03%) [ +0.12% +0.00% +0.06% / +0.03% +0.60% +0.67%] index_select perm : Elapsed 0.033 ms (3.311 ms / 100) 3.312 -> 3.312 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.60% +0.60%] index_select perm_sorted : Elapsed 0.033 ms (3.312 ms / 100) B = [40, 4, 5, 16] (stride (5, 3200, 1, 200)) A = [40, 4, 5, 20] (stride (1, 800, 3200, 40)) dim = 3 4.129 -> 4.130 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.53% +0.53%] index_select const : Elapsed 0.041 ms (4.130 ms / 100) 4.135 -> 4.135 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.48%] index_select wrap : Elapsed 0.041 ms (4.135 ms / 100) 4.149 -> 4.149 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.46% +0.48%] index_select linear : Elapsed 0.041 ms (4.149 ms / 100) 4.125 -> 4.127 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.61% +0.61%] index_select reverse : Elapsed 0.041 ms (4.127 ms / 100) 4.130 -> 4.135 ( +0.12%) [ +0.05% +0.00% +0.00% / +0.12% +0.63% +0.61%] index_select skip64 : Elapsed 0.041 ms (4.132 ms / 100) 4.133 -> 4.133 ( +0.00%) [ +0.10% +0.00% +0.02% / +0.00% +0.48% +0.48%] index_select skip256 : Elapsed 0.041 ms (4.137 ms / 100) 4.124 -> 4.129 ( +0.12%) [ +0.00% +0.15% +0.07% / +0.12% +0.48% +0.73%] index_select spread : Elapsed 0.041 ms (4.124 ms / 100) 4.143 -> 4.141 ( -0.05%) [ +0.17% +0.00% +0.00% / -0.05% +0.68% +0.51%] index_select strided 3 : Elapsed 0.041 ms (4.150 ms / 100) 4.120 -> 4.121 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.44% +0.46%] index_select strided 5 : Elapsed 0.041 ms (4.122 ms / 100) 4.138 -> 4.138 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.51% +0.51%] index_select strided 7 : Elapsed 0.041 ms (4.138 ms / 100) 4.131 -> 4.132 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.58% +0.58%] index_select strided 8 : Elapsed 0.041 ms (4.133 ms / 100) 4.117 -> 4.121 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.63% +0.58%] index_select strided 16 : Elapsed 0.041 ms (4.119 ms / 100) 4.134 -> 4.135 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.63% +0.65%] index_select random : Elapsed 0.041 ms (4.135 ms / 100) 4.142 -> 4.142 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.65% +0.70%] index_select random_sorted : Elapsed 0.041 ms (4.143 ms / 100) 4.130 -> 4.130 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.68% +0.70%] index_select perm : Elapsed 0.041 ms (4.131 ms / 100) 4.138 -> 4.139 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.70% +0.68%] index_select perm_sorted : Elapsed 0.041 ms (4.140 ms / 100) B = [40, 4, 5, 16] (stride (1, 640, 2560, 40)) A = [40, 4, 5, 20] (stride (400, 20, 80, 1)) dim = 3 4.149 -> 4.150 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.84% +0.80%] index_select const : Elapsed 0.041 ms (4.149 ms / 100) 4.163 -> 4.161 ( -0.05%) [ +0.10% +0.00% +0.10% / -0.05% +0.60% +0.65%] index_select wrap : Elapsed 0.042 ms (4.167 ms / 100) 4.170 -> 4.171 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.72% +0.74%] index_select linear : Elapsed 0.042 ms (4.170 ms / 100) 4.150 -> 4.160 ( +0.24%) [ +0.19% +0.00% +0.14% / +0.24% +0.82% +0.87%] index_select reverse : Elapsed 0.042 ms (4.158 ms / 100) 4.144 -> 4.145 ( +0.02%) [ +0.10% +0.00% +0.10% / +0.02% +0.77% +0.58%] index_select skip64 : Elapsed 0.041 ms (4.148 ms / 100) 4.151 -> 4.148 ( -0.07%) [ +0.00% +0.05% +0.02% / -0.07% +0.72% +0.72%] index_select skip256 : Elapsed 0.042 ms (4.151 ms / 100) 4.160 -> 4.162 ( +0.05%) [ +0.07% +0.10% +0.00% / +0.05% +0.65% +0.60%] index_select spread : Elapsed 0.042 ms (4.163 ms / 100) 4.159 -> 4.158 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.63% +0.65%] index_select strided 3 : Elapsed 0.042 ms (4.160 ms / 100) 4.138 -> 4.139 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.68% +0.60%] index_select strided 5 : Elapsed 0.041 ms (4.138 ms / 100) 4.143 -> 4.141 ( -0.05%) [ +0.00% +0.02% +0.02% / -0.05% +0.75% +0.68%] index_select strided 7 : Elapsed 0.041 ms (4.143 ms / 100) 4.148 -> 4.156 ( +0.19%) [ +0.12% +0.00% +0.17% / +0.19% +0.63% +0.80%] index_select strided 8 : Elapsed 0.042 ms (4.153 ms / 100) 4.143 -> 4.148 ( +0.12%) [ +0.14% +0.00% +0.10% / +0.12% +0.68% +0.63%] index_select strided 16 : Elapsed 0.041 ms (4.149 ms / 100) 4.168 -> 4.174 ( +0.14%) [ +0.05% +0.10% +0.00% / +0.14% +0.60% +0.62%] index_select random : Elapsed 0.042 ms (4.170 ms / 100) 4.182 -> 4.185 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.38% +0.41%] index_select random_sorted : Elapsed 0.042 ms (4.184 ms / 100) 4.149 -> 4.149 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.00% +0.48% +0.51%] index_select perm : Elapsed 0.041 ms (4.149 ms / 100) 4.167 -> 4.174 ( +0.17%) [ +0.17% +0.00% +0.12% / +0.17% +0.74% +0.62%] index_select perm_sorted : Elapsed 0.042 ms (4.174 ms / 100) B = [40, 4, 5, 16] (stride (1, 40, 2560, 160)) A = [40, 4, 5, 20] (stride (400, 20, 80, 1)) dim = 3 4.144 -> 4.147 ( +0.07%) [ +0.02% +0.00% +0.10% / +0.07% +0.39% +0.48%] index_select const : Elapsed 0.041 ms (4.145 ms / 100) 4.172 -> 4.171 ( -0.02%) [ +0.12% +0.05% +0.00% / -0.02% +0.46% +0.46%] index_select wrap : Elapsed 0.042 ms (4.177 ms / 100) 4.162 -> 4.164 ( +0.05%) [ +0.07% +0.00% +0.10% / +0.05% +0.58% +0.46%] index_select linear : Elapsed 0.042 ms (4.165 ms / 100) 4.160 -> 4.164 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +0.62% +0.55%] index_select reverse : Elapsed 0.042 ms (4.160 ms / 100) 4.148 -> 4.148 ( +0.00%) [ +0.00% +0.14% +0.14% / +0.00% +0.87% +0.51%] index_select skip64 : Elapsed 0.041 ms (4.148 ms / 100) 4.146 -> 4.145 ( -0.02%) [ +0.10% +0.12% +0.00% / -0.02% +0.58% +0.55%] index_select skip256 : Elapsed 0.041 ms (4.150 ms / 100) 4.162 -> 4.167 ( +0.12%) [ +0.12% +0.00% +0.02% / +0.12% +0.53% +0.46%] index_select spread : Elapsed 0.042 ms (4.167 ms / 100) 4.181 -> 4.186 ( +0.12%) [ +0.10% +0.07% +0.00% / +0.12% +0.43% +0.36%] index_select strided 3 : Elapsed 0.042 ms (4.185 ms / 100) 4.148 -> 4.156 ( +0.19%) [ +0.05% +0.00% +0.14% / +0.19% +0.53% +0.51%] index_select strided 5 : Elapsed 0.042 ms (4.150 ms / 100) 4.157 -> 4.164 ( +0.17%) [ +0.00% +0.02% +0.05% / +0.17% +0.67% +0.72%] index_select strided 7 : Elapsed 0.042 ms (4.157 ms / 100) 4.145 -> 4.144 ( -0.02%) [ +0.00% +0.05% +0.00% / -0.02% +0.43% +0.46%] index_select strided 8 : Elapsed 0.041 ms (4.145 ms / 100) 4.149 -> 4.153 ( +0.10%) [ +0.05% +0.14% +0.00% / +0.10% +0.72% +0.75%] index_select strided 16 : Elapsed 0.042 ms (4.151 ms / 100) 4.156 -> 4.156 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.00% +0.87% +0.67%] index_select random : Elapsed 0.042 ms (4.157 ms / 100) 4.162 -> 4.163 ( +0.02%) [ +0.10% +0.17% +0.00% / +0.02% +0.79% +0.67%] index_select random_sorted : Elapsed 0.042 ms (4.166 ms / 100) 4.144 -> 4.148 ( +0.10%) [ +0.17% +0.22% +0.00% / +0.10% +0.87% +0.97%] index_select perm : Elapsed 0.042 ms (4.151 ms / 100) 4.176 -> 4.179 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.60% +0.62%] index_select perm_sorted : Elapsed 0.042 ms (4.178 ms / 100) B = [40, 4, 5, 16] (stride (1, 40, 160, 800)) A = [40, 4, 5, 20] (stride (20, 800, 3200, 1)) dim = 3 4.245 -> 4.244 ( -0.02%) [ +0.05% +0.16% +0.00% / -0.02% +1.04% +0.94%] index_select const : Elapsed 0.042 ms (4.247 ms / 100) 4.230 -> 4.232 ( +0.05%) [ +0.12% +0.00% +0.02% / +0.05% +0.90% +0.64%] index_select wrap : Elapsed 0.042 ms (4.235 ms / 100) 4.229 -> 4.237 ( +0.19%) [ +0.24% +0.00% +0.05% / +0.19% +0.71% +0.87%] index_select linear : Elapsed 0.042 ms (4.239 ms / 100) 4.205 -> 4.210 ( +0.12%) [ +0.14% +0.24% +0.00% / +0.12% +0.88% +0.83%] index_select reverse : Elapsed 0.042 ms (4.211 ms / 100) 4.251 -> 4.254 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.73% +0.73%] index_select skip64 : Elapsed 0.043 ms (4.251 ms / 100) 4.245 -> 4.243 ( -0.05%) [ +0.00% +0.24% +0.24% / -0.05% +0.92% +0.75%] index_select skip256 : Elapsed 0.042 ms (4.245 ms / 100) 4.242 -> 4.244 ( +0.05%) [ +0.00% +0.02% +0.00% / +0.05% +0.61% +0.61%] index_select spread : Elapsed 0.042 ms (4.242 ms / 100) 4.224 -> 4.231 ( +0.17%) [ +0.19% +0.00% +0.09% / +0.17% +0.73% +0.78%] index_select strided 3 : Elapsed 0.042 ms (4.232 ms / 100) 4.223 -> 4.229 ( +0.14%) [ +0.12% +0.00% +0.12% / +0.14% +0.66% +0.71%] index_select strided 5 : Elapsed 0.042 ms (4.228 ms / 100) 4.229 -> 4.240 ( +0.26%) [ +0.00% +0.17% +0.00% / +0.26% +0.80% +0.76%] index_select strided 7 : Elapsed 0.042 ms (4.229 ms / 100) 4.221 -> 4.220 ( -0.02%) [ +0.02% +0.05% +0.00% / -0.02% +0.62% +0.73%] index_select strided 8 : Elapsed 0.042 ms (4.222 ms / 100) 4.248 -> 4.251 ( +0.07%) [ +0.02% +0.00% +0.12% / +0.07% +0.75% +0.64%] index_select strided 16 : Elapsed 0.042 ms (4.249 ms / 100) 4.230 -> 4.234 ( +0.09%) [ +0.00% +0.00% +0.12% / +0.09% +0.69% +0.71%] index_select random : Elapsed 0.042 ms (4.230 ms / 100) 4.225 -> 4.229 ( +0.09%) [ +0.00% +0.17% +0.12% / +0.09% +0.59% +0.54%] index_select random_sorted : Elapsed 0.042 ms (4.225 ms / 100) 4.252 -> 4.252 ( +0.00%) [ +0.07% +0.02% +0.00% / +0.00% +0.61% +0.59%] index_select perm : Elapsed 0.043 ms (4.255 ms / 100) 4.222 -> 4.227 ( +0.12%) [ +0.17% +0.14% +0.00% / +0.12% +0.50% +0.62%] index_select perm_sorted : Elapsed 0.042 ms (4.229 ms / 100) out_shape = [16, 4, 20, 5] in_shape = [40, 4, 20, 5] idx_dim = 0 B = [16, 4, 20, 5] (stride (400, 100, 1, 20)) A = [40, 4, 20, 5] (stride (400, 1, 4, 80)) dim = 0 3.956 -> 3.956 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.66% +0.63%] index_select const : Elapsed 0.040 ms (3.957 ms / 100) 3.930 -> 3.930 ( +0.00%) [ +0.08% +0.00% +0.05% / +0.00% +0.59% +0.51%] index_select wrap : Elapsed 0.039 ms (3.933 ms / 100) 3.922 -> 3.927 ( +0.13%) [ +0.00% +0.18% +0.20% / +0.13% +0.43% +0.51%] index_select linear : Elapsed 0.039 ms (3.922 ms / 100) 3.924 -> 3.927 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.51% +0.46%] index_select reverse : Elapsed 0.039 ms (3.925 ms / 100) 3.959 -> 3.981 ( +0.56%) [ +0.00% +0.35% +0.25% / +0.56% +0.88% +0.76%] index_select skip64 : Elapsed 0.040 ms (3.959 ms / 100) 3.956 -> 3.977 ( +0.53%) [ +0.03% +0.00% +0.10% / +0.73% +0.53% +0.56%] index_select skip256 : Elapsed 0.040 ms (3.957 ms / 100) 3.948 -> 3.960 ( +0.30%) [ +0.00% +0.00% +0.00% / +0.30% +0.43% +0.33%] index_select spread : Elapsed 0.039 ms (3.948 ms / 100) 3.945 -> 3.949 ( +0.10%) [ +0.00% +0.05% +0.03% / +0.10% +0.48% +0.56%] index_select strided 3 : Elapsed 0.039 ms (3.945 ms / 100) 3.934 -> 3.940 ( +0.15%) [ +0.10% +0.00% +0.10% / +0.15% +0.53% +0.58%] index_select strided 5 : Elapsed 0.039 ms (3.938 ms / 100) 3.944 -> 3.952 ( +0.20%) [ +0.00% +0.10% +0.10% / +0.20% +0.53% +0.51%] index_select strided 7 : Elapsed 0.039 ms (3.944 ms / 100) 3.956 -> 3.958 ( +0.05%) [ +0.13% +0.00% +0.10% / +0.05% +0.61% +0.58%] index_select strided 8 : Elapsed 0.040 ms (3.961 ms / 100) 3.949 -> 3.955 ( +0.15%) [ +0.00% +0.03% +0.00% / +0.15% +0.51% +0.33%] index_select strided 16 : Elapsed 0.039 ms (3.949 ms / 100) 3.932 -> 3.955 ( +0.58%) [ +0.03% +0.00% +0.08% / +0.61% +0.61% +0.58%] index_select random : Elapsed 0.039 ms (3.933 ms / 100) 3.920 -> 3.933 ( +0.33%) [ +0.00% +0.03% +0.00% / +0.33% +0.48% +0.51%] index_select random_sorted : Elapsed 0.039 ms (3.920 ms / 100) 3.949 -> 3.960 ( +0.28%) [ +0.00% +0.08% +0.08% / +0.28% +0.38% +0.48%] index_select perm : Elapsed 0.039 ms (3.949 ms / 100) 3.934 -> 3.937 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.43% +0.46%] index_select perm_sorted : Elapsed 0.039 ms (3.937 ms / 100) B = [16, 4, 20, 5] (stride (400, 100, 1, 20)) A = [40, 4, 20, 5] (stride (80, 1, 4, 3200)) dim = 0 3.963 -> 3.970 ( +0.18%) [ +0.15% +0.15% +0.00% / +0.18% +0.88% +0.93%] index_select const : Elapsed 0.040 ms (3.969 ms / 100) 3.957 -> 3.958 ( +0.03%) [ +0.15% +0.10% +0.00% / +0.03% +0.73% +0.73%] index_select wrap : Elapsed 0.040 ms (3.963 ms / 100) 3.962 -> 3.966 ( +0.10%) [ +0.08% +0.05% +0.00% / +0.10% +0.71% +0.66%] index_select linear : Elapsed 0.040 ms (3.965 ms / 100) 3.958 -> 3.963 ( +0.13%) [ +0.08% +0.03% +0.00% / +0.13% +0.83% +0.83%] index_select reverse : Elapsed 0.040 ms (3.961 ms / 100) 3.963 -> 3.964 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.96% +0.93%] index_select skip64 : Elapsed 0.040 ms (3.965 ms / 100) 3.965 -> 3.961 ( -0.10%) [ +0.05% +0.00% +0.05% / -0.10% +0.86% +0.86%] index_select skip256 : Elapsed 0.040 ms (3.967 ms / 100) 3.950 -> 3.954 ( +0.10%) [ +0.10% +0.00% +0.08% / +0.10% +0.76% +0.84%] index_select spread : Elapsed 0.040 ms (3.954 ms / 100) 3.947 -> 3.946 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.73% +0.63%] index_select strided 3 : Elapsed 0.039 ms (3.949 ms / 100) 3.947 -> 3.949 ( +0.05%) [ +0.10% +0.03% +0.00% / +0.05% +0.86% +0.89%] index_select strided 5 : Elapsed 0.040 ms (3.951 ms / 100) 3.951 -> 3.954 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.76% +0.76%] index_select strided 7 : Elapsed 0.040 ms (3.954 ms / 100) 3.969 -> 3.978 ( +0.23%) [ +0.23% +0.23% +0.00% / +0.23% +0.98% +0.73%] index_select strided 8 : Elapsed 0.040 ms (3.978 ms / 100) 3.968 -> 3.968 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.83%] index_select strided 16 : Elapsed 0.040 ms (3.968 ms / 100) 3.960 -> 3.968 ( +0.20%) [ +0.15% +0.00% +0.23% / +0.20% +0.83% +0.71%] index_select random : Elapsed 0.040 ms (3.966 ms / 100) 3.964 -> 3.961 ( -0.08%) [ +0.05% +0.00% +0.05% / -0.08% +0.78% +0.68%] index_select random_sorted : Elapsed 0.040 ms (3.966 ms / 100) 3.947 -> 3.951 ( +0.10%) [ +0.18% +0.25% +0.00% / +0.10% +1.09% +0.96%] index_select perm : Elapsed 0.040 ms (3.954 ms / 100) 3.950 -> 3.955 ( +0.13%) [ +0.18% +0.13% +0.00% / +0.13% +0.94% +0.73%] index_select perm_sorted : Elapsed 0.040 ms (3.957 ms / 100) B = [16, 4, 20, 5] (stride (400, 5, 20, 1)) A = [40, 4, 20, 5] (stride (100, 4000, 1, 20)) dim = 0 3.869 -> 3.868 ( -0.03%) [ +0.18% +0.00% +0.10% / -0.03% +0.62% +0.52%] index_select const : Elapsed 0.039 ms (3.876 ms / 100) 3.883 -> 3.883 ( +0.00%) [ +0.10% +0.03% +0.00% / +0.00% +0.70% +0.49%] index_select wrap : Elapsed 0.039 ms (3.887 ms / 100) 3.870 -> 3.867 ( -0.08%) [ +0.05% +0.00% +0.03% / -0.08% +0.54% +0.44%] index_select linear : Elapsed 0.039 ms (3.872 ms / 100) 3.876 -> 3.878 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.49% +0.52%] index_select reverse : Elapsed 0.039 ms (3.878 ms / 100) 3.876 -> 3.882 ( +0.15%) [ +0.10% +0.00% +0.26% / +0.15% +0.54% +0.54%] index_select skip64 : Elapsed 0.039 ms (3.880 ms / 100) 3.874 -> 3.873 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.31% +0.39%] index_select skip256 : Elapsed 0.039 ms (3.877 ms / 100) 3.878 -> 3.878 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.57% +0.54%] index_select spread : Elapsed 0.039 ms (3.878 ms / 100) 3.891 -> 3.888 ( -0.08%) [ +0.00% +0.00% +0.03% / -0.08% +0.41% +0.33%] index_select strided 3 : Elapsed 0.039 ms (3.891 ms / 100) 3.860 -> 3.862 ( +0.05%) [ +0.00% +0.00% +0.08% / +0.05% +0.39% +0.47%] index_select strided 5 : Elapsed 0.039 ms (3.860 ms / 100) 3.884 -> 3.876 ( -0.21%) [ +0.00% +0.05% +0.00% / -0.21% +0.31% +0.31%] index_select strided 7 : Elapsed 0.039 ms (3.884 ms / 100) 3.887 -> 3.889 ( +0.05%) [ +0.03% +0.10% +0.00% / +0.05% +0.51% +0.54%] index_select strided 8 : Elapsed 0.039 ms (3.888 ms / 100) 3.865 -> 3.865 ( +0.00%) [ +0.08% +0.00% +0.03% / +0.00% +0.54% +0.57%] index_select strided 16 : Elapsed 0.039 ms (3.868 ms / 100) 3.870 -> 3.872 ( +0.05%) [ +0.18% +0.00% +0.36% / +0.05% +0.70% +0.75%] index_select random : Elapsed 0.039 ms (3.877 ms / 100) 3.881 -> 3.892 ( +0.28%) [ +0.00% +0.64% +0.28% / +0.28% +0.72% +0.77%] index_select random_sorted : Elapsed 0.039 ms (3.881 ms / 100) 3.875 -> 3.870 ( -0.13%) [ +0.03% +0.00% +0.00% / -0.13% +0.36% +0.28%] index_select perm : Elapsed 0.039 ms (3.876 ms / 100) 3.870 -> 3.878 ( +0.21%) [ +0.21% +0.26% +0.00% / +0.21% +0.59% +0.65%] index_select perm_sorted : Elapsed 0.039 ms (3.878 ms / 100) B = [16, 4, 20, 5] (stride (400, 1, 20, 4)) A = [40, 4, 20, 5] (stride (20, 800, 1, 3200)) dim = 0 3.582 -> 3.582 ( +0.00%) [ +0.06% +0.00% +0.14% / +0.00% +0.78% +0.78%] index_select const : Elapsed 0.036 ms (3.584 ms / 100) 3.607 -> 3.608 ( +0.03%) [ +0.06% +0.00% +0.06% / +0.03% +0.67% +0.69%] index_select wrap : Elapsed 0.036 ms (3.609 ms / 100) 3.600 -> 3.603 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.78% +0.69%] index_select linear : Elapsed 0.036 ms (3.603 ms / 100) 3.611 -> 3.608 ( -0.08%) [ +0.06% +0.06% +0.00% / -0.08% +0.69% +0.83%] index_select reverse : Elapsed 0.036 ms (3.613 ms / 100) 3.596 -> 3.598 ( +0.06%) [ +0.03% +0.11% +0.00% / +0.06% +0.86% +0.78%] index_select skip64 : Elapsed 0.036 ms (3.597 ms / 100) 3.582 -> 3.583 ( +0.03%) [ +0.17% +0.00% +0.00% / +0.03% +0.81% +0.84%] index_select skip256 : Elapsed 0.036 ms (3.588 ms / 100) 3.598 -> 3.602 ( +0.11%) [ +0.03% +0.08% +0.00% / +0.11% +0.94% +0.86%] index_select spread : Elapsed 0.036 ms (3.599 ms / 100) 3.594 -> 3.593 ( -0.03%) [ +0.17% +0.03% +0.00% / -0.03% +0.75% +0.67%] index_select strided 3 : Elapsed 0.036 ms (3.600 ms / 100) 3.591 -> 3.598 ( +0.19%) [ +0.08% +0.22% +0.00% / +0.19% +0.81% +0.78%] index_select strided 5 : Elapsed 0.036 ms (3.594 ms / 100) 3.600 -> 3.607 ( +0.19%) [ +0.00% +0.08% +0.03% / +0.19% +0.78% +0.72%] index_select strided 7 : Elapsed 0.036 ms (3.600 ms / 100) 3.588 -> 3.590 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +0.70% +0.67%] index_select strided 8 : Elapsed 0.036 ms (3.589 ms / 100) 3.583 -> 3.583 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.81% +0.70%] index_select strided 16 : Elapsed 0.036 ms (3.583 ms / 100) 3.607 -> 3.609 ( +0.06%) [ +0.25% +0.00% +0.03% / +0.06% +0.75% +0.78%] index_select random : Elapsed 0.036 ms (3.616 ms / 100) 3.601 -> 3.605 ( +0.11%) [ +0.11% +0.06% +0.00% / +0.11% +0.83% +0.89%] index_select random_sorted : Elapsed 0.036 ms (3.605 ms / 100) 3.606 -> 3.609 ( +0.08%) [ +0.03% +0.08% +0.00% / +0.08% +0.72% +1.19%] index_select perm : Elapsed 0.036 ms (3.607 ms / 100) 3.605 -> 3.605 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.64% +1.14%] index_select perm_sorted : Elapsed 0.036 ms (3.606 ms / 100) B = [16, 4, 20, 5] (stride (5, 1600, 80, 1)) A = [40, 4, 20, 5] (stride (1, 200, 800, 40)) dim = 0 3.917 -> 3.918 ( +0.03%) [ +0.03% +0.00% +0.10% / +0.03% +0.56% +0.54%] index_select const : Elapsed 0.039 ms (3.918 ms / 100) 3.932 -> 3.931 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.36% +0.38%] index_select wrap : Elapsed 0.039 ms (3.932 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.28% +0.41%] index_select linear : Elapsed 0.039 ms (3.934 ms / 100) 3.922 -> 3.923 ( +0.03%) [ +0.10% +0.00% +0.08% / +0.03% +0.43% +0.48%] index_select reverse : Elapsed 0.039 ms (3.926 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.54% +0.54%] index_select skip64 : Elapsed 0.039 ms (3.923 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.33% +0.38%] index_select skip256 : Elapsed 0.039 ms (3.922 ms / 100) 3.920 -> 3.921 ( +0.03%) [ +0.26% +0.00% +0.00% / +0.03% +0.56% +0.36%] index_select spread : Elapsed 0.039 ms (3.930 ms / 100) 3.930 -> 3.933 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.46% +0.48%] index_select strided 3 : Elapsed 0.039 ms (3.932 ms / 100) 3.927 -> 3.923 ( -0.10%) [ +0.15% +0.08% +0.00% / -0.10% +0.23% +0.38%] index_select strided 5 : Elapsed 0.039 ms (3.933 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.13% +0.00% +0.05% / +0.05% +0.20% +0.28%] index_select strided 7 : Elapsed 0.039 ms (3.928 ms / 100) 3.927 -> 3.929 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.41% +0.41%] index_select strided 8 : Elapsed 0.039 ms (3.929 ms / 100) 3.912 -> 3.912 ( +0.00%) [ +0.18% +0.00% +0.10% / +0.00% +0.26% +0.33%] index_select strided 16 : Elapsed 0.039 ms (3.919 ms / 100) 3.920 -> 3.923 ( +0.08%) [ +0.00% +0.10% +0.05% / +0.08% +0.46% +0.48%] index_select random : Elapsed 0.039 ms (3.920 ms / 100) 3.930 -> 3.931 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.59% +0.38%] index_select random_sorted : Elapsed 0.039 ms (3.931 ms / 100) 3.919 -> 3.923 ( +0.10%) [ +0.00% +0.13% +0.05% / +0.10% +0.31% +0.28%] index_select perm : Elapsed 0.039 ms (3.919 ms / 100) 3.912 -> 3.913 ( +0.03%) [ +0.28% +0.31% +0.00% / +0.03% +0.51% +0.36%] index_select perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) B = [16, 4, 20, 5] (stride (20, 5, 320, 1)) A = [40, 4, 20, 5] (stride (20, 4000, 1, 800)) dim = 0 3.933 -> 3.934 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.76% +0.76%] index_select const : Elapsed 0.039 ms (3.936 ms / 100) 3.923 -> 3.920 ( -0.08%) [ +0.03% +0.00% +0.03% / -0.08% +0.54% +0.54%] index_select wrap : Elapsed 0.039 ms (3.924 ms / 100) 3.915 -> 3.924 ( +0.23%) [ +0.20% +0.00% +0.18% / +0.23% +0.56% +0.61%] index_select linear : Elapsed 0.039 ms (3.923 ms / 100) 3.918 -> 3.923 ( +0.13%) [ +0.00% +0.03% +0.23% / +0.13% +0.84% +0.82%] index_select reverse : Elapsed 0.039 ms (3.918 ms / 100) 3.919 -> 3.920 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.79% +0.74%] index_select skip64 : Elapsed 0.039 ms (3.920 ms / 100) 3.927 -> 3.935 ( +0.20%) [ +0.18% +0.00% +0.18% / +0.20% +1.12% +0.76%] index_select skip256 : Elapsed 0.039 ms (3.934 ms / 100) 3.917 -> 3.926 ( +0.23%) [ +0.03% +0.00% +0.23% / +0.23% +0.71% +0.66%] index_select spread : Elapsed 0.039 ms (3.918 ms / 100) 3.920 -> 3.925 ( +0.13%) [ +0.00% +0.05% +0.18% / +0.13% +0.54% +0.51%] index_select strided 3 : Elapsed 0.039 ms (3.920 ms / 100) 3.921 -> 3.922 ( +0.03%) [ +0.03% +0.10% +0.00% / +0.03% +0.66% +0.74%] index_select strided 5 : Elapsed 0.039 ms (3.922 ms / 100) 3.922 -> 3.919 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.71% +0.66%] index_select strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.899 -> 3.900 ( +0.03%) [ +0.00% +0.03% +0.15% / +0.03% +0.72% +0.69%] index_select strided 8 : Elapsed 0.039 ms (3.899 ms / 100) 3.904 -> 3.921 ( +0.44%) [ +0.41% +0.38% +0.00% / +0.44% +0.77% +0.77%] index_select strided 16 : Elapsed 0.039 ms (3.920 ms / 100) 3.918 -> 3.917 ( -0.03%) [ +0.08% +0.00% +0.00% / -0.03% +0.71% +0.74%] index_select random : Elapsed 0.039 ms (3.921 ms / 100) 3.913 -> 3.904 ( -0.23%) [ +0.00% +0.20% +0.03% / -0.23% +0.74% +0.74%] index_select random_sorted : Elapsed 0.039 ms (3.913 ms / 100) 3.916 -> 3.919 ( +0.08%) [ +0.18% +0.00% +0.08% / +0.08% +0.69% +0.72%] index_select perm : Elapsed 0.039 ms (3.923 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.20% +0.00% +0.05% / +0.05% +0.82% +0.82%] index_select perm_sorted : Elapsed 0.039 ms (3.927 ms / 100) B = [16, 4, 20, 5] (stride (20, 5, 320, 1)) A = [40, 4, 20, 5] (stride (80, 1, 4, 3200)) dim = 0 3.967 -> 3.968 ( +0.03%) [ +0.08% +0.23% +0.00% / +0.03% +0.60% +0.76%] index_select const : Elapsed 0.040 ms (3.970 ms / 100) 3.941 -> 3.945 ( +0.10%) [ +0.03% +0.10% +0.00% / +0.10% +0.69% +0.51%] index_select wrap : Elapsed 0.039 ms (3.942 ms / 100) 3.940 -> 3.939 ( -0.03%) [ +0.15% +0.08% +0.00% / -0.03% +0.63% +0.53%] index_select linear : Elapsed 0.039 ms (3.946 ms / 100) 3.947 -> 3.950 ( +0.08%) [ +0.00% +0.00% +0.03% / +0.08% +0.46% +0.38%] index_select reverse : Elapsed 0.039 ms (3.947 ms / 100) 3.992 -> 3.987 ( -0.13%) [ +0.05% +0.03% +0.00% / -0.13% +0.23% +0.33%] index_select skip64 : Elapsed 0.040 ms (3.994 ms / 100) 3.971 -> 3.979 ( +0.20%) [ +0.23% +0.15% +0.00% / +0.20% +0.60% +0.43%] index_select skip256 : Elapsed 0.040 ms (3.980 ms / 100) 3.960 -> 3.959 ( -0.03%) [ +0.00% +0.03% +0.08% / -0.03% +0.33% +0.33%] index_select spread : Elapsed 0.040 ms (3.960 ms / 100) 3.948 -> 3.954 ( +0.15%) [ +0.03% +0.00% +0.03% / +0.15% +0.38% +0.28%] index_select strided 3 : Elapsed 0.039 ms (3.949 ms / 100) 3.966 -> 3.967 ( +0.03%) [ +0.13% +0.00% +0.03% / +0.03% +0.53% +0.40%] index_select strided 5 : Elapsed 0.040 ms (3.971 ms / 100) 3.946 -> 3.955 ( +0.23%) [ +0.20% +0.00% +0.20% / +0.23% +0.53% +0.56%] index_select strided 7 : Elapsed 0.040 ms (3.954 ms / 100) 3.967 -> 3.969 ( +0.05%) [ +0.15% +0.05% +0.00% / +0.05% +0.45% +0.45%] index_select strided 8 : Elapsed 0.040 ms (3.973 ms / 100) 3.970 -> 3.969 ( -0.03%) [ +0.05% +0.00% +0.05% / -0.03% +0.38% +0.48%] index_select strided 16 : Elapsed 0.040 ms (3.972 ms / 100) 3.958 -> 3.961 ( +0.08%) [ +0.05% +0.03% +0.00% / +0.08% +0.61% +0.53%] index_select random : Elapsed 0.040 ms (3.960 ms / 100) 3.960 -> 3.963 ( +0.08%) [ +0.10% +0.00% +0.05% / +0.08% +0.51% +0.61%] index_select random_sorted : Elapsed 0.040 ms (3.964 ms / 100) 3.950 -> 3.953 ( +0.08%) [ +0.05% +0.00% +0.18% / +0.08% +0.51% +0.58%] index_select perm : Elapsed 0.040 ms (3.952 ms / 100) 3.962 -> 3.967 ( +0.13%) [ +0.00% +0.08% +0.10% / +0.13% +0.38% +0.38%] index_select perm_sorted : Elapsed 0.040 ms (3.962 ms / 100) B = [16, 4, 20, 5] (stride (1, 80, 320, 16)) A = [40, 4, 20, 5] (stride (1, 40, 800, 160)) dim = 0 3.938 -> 3.940 ( +0.05%) [ +0.18% +0.00% +0.05% / +0.05% +0.79% +0.79%] index_select const : Elapsed 0.039 ms (3.945 ms / 100) 3.935 -> 3.932 ( -0.08%) [ +0.13% +0.10% +0.00% / -0.08% +0.58% +0.58%] index_select wrap : Elapsed 0.039 ms (3.940 ms / 100) 3.942 -> 3.941 ( -0.03%) [ +0.00% +0.15% +0.10% / -0.03% +0.89% +0.84%] index_select linear : Elapsed 0.039 ms (3.942 ms / 100) 3.938 -> 3.945 ( +0.18%) [ +0.20% +0.15% +0.00% / +0.18% +1.02% +1.02%] index_select reverse : Elapsed 0.039 ms (3.946 ms / 100) 3.923 -> 3.924 ( +0.03%) [ +0.00% +0.13% +0.00% / +0.03% +0.84% +1.17%] index_select skip64 : Elapsed 0.039 ms (3.923 ms / 100) 3.939 -> 3.940 ( +0.03%) [ +0.00% +0.03% +0.08% / +0.03% +0.74% +0.81%] index_select skip256 : Elapsed 0.039 ms (3.939 ms / 100) 3.945 -> 3.950 ( +0.13%) [ +0.08% +0.05% +0.00% / +0.13% +0.81% +0.76%] index_select spread : Elapsed 0.039 ms (3.948 ms / 100) 3.942 -> 3.942 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.48% +0.61%] index_select strided 3 : Elapsed 0.039 ms (3.942 ms / 100) 3.942 -> 3.939 ( -0.08%) [ +0.00% +0.08% +0.13% / -0.08% +0.74% +0.76%] index_select strided 5 : Elapsed 0.039 ms (3.942 ms / 100) 3.937 -> 3.937 ( +0.00%) [ +0.13% +0.00% +0.05% / +0.00% +0.79% +0.71%] index_select strided 7 : Elapsed 0.039 ms (3.942 ms / 100) 3.949 -> 3.951 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.71% +0.68%] index_select strided 8 : Elapsed 0.040 ms (3.951 ms / 100) 3.938 -> 3.947 ( +0.23%) [ +0.23% +0.00% +0.25% / +0.23% +0.81% +0.76%] index_select strided 16 : Elapsed 0.039 ms (3.947 ms / 100) 3.944 -> 3.948 ( +0.10%) [ +0.08% +0.00% +0.00% / +0.10% +0.81% +0.63%] index_select random : Elapsed 0.039 ms (3.947 ms / 100) 3.936 -> 3.939 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.74% +0.84%] index_select random_sorted : Elapsed 0.039 ms (3.937 ms / 100) 3.945 -> 3.951 ( +0.15%) [ +0.03% +0.00% +0.20% / +0.15% +1.04% +0.86%] index_select perm : Elapsed 0.039 ms (3.946 ms / 100) 3.949 -> 3.943 ( -0.15%) [ +0.08% +0.05% +0.00% / -0.15% +0.79% +0.76%] index_select perm_sorted : Elapsed 0.040 ms (3.952 ms / 100) B = [16, 4, 20, 5] (stride (1, 80, 320, 16)) A = [40, 4, 20, 5] (stride (80, 1, 4, 3200)) dim = 0 3.978 -> 3.969 ( -0.23%) [ +0.03% +0.00% +0.00% / -0.23% +0.53% +0.45%] index_select const : Elapsed 0.040 ms (3.979 ms / 100) 3.938 -> 3.938 ( +0.00%) [ +0.13% +0.08% +0.00% / +0.00% +0.61% +0.76%] index_select wrap : Elapsed 0.039 ms (3.943 ms / 100) 3.944 -> 3.946 ( +0.05%) [ +0.10% +0.00% +0.05% / +0.05% +0.58% +0.56%] index_select linear : Elapsed 0.039 ms (3.948 ms / 100) 3.945 -> 3.953 ( +0.20%) [ +0.00% +0.08% +0.05% / +0.20% +0.63% +0.51%] index_select reverse : Elapsed 0.039 ms (3.945 ms / 100) 3.982 -> 3.987 ( +0.13%) [ +0.13% +0.25% +0.00% / +0.13% +0.65% +0.55%] index_select skip64 : Elapsed 0.040 ms (3.987 ms / 100) 3.970 -> 3.973 ( +0.08%) [ +0.00% +0.28% +0.28% / +0.08% +0.73% +0.48%] index_select skip256 : Elapsed 0.040 ms (3.970 ms / 100) 3.964 -> 3.963 ( -0.03%) [ +0.00% +0.08% +0.10% / -0.03% +0.50% +0.33%] index_select spread : Elapsed 0.040 ms (3.964 ms / 100) 3.944 -> 3.948 ( +0.10%) [ +0.10% +0.00% +0.23% / +0.10% +0.56% +0.63%] index_select strided 3 : Elapsed 0.039 ms (3.948 ms / 100) 3.970 -> 3.969 ( -0.03%) [ +0.05% +0.00% +0.00% / -0.03% +0.33% +0.35%] index_select strided 5 : Elapsed 0.040 ms (3.972 ms / 100) 3.948 -> 3.956 ( +0.20%) [ +0.00% +0.10% +0.20% / +0.20% +0.48% +0.53%] index_select strided 7 : Elapsed 0.039 ms (3.948 ms / 100) 3.971 -> 3.976 ( +0.13%) [ +0.00% +0.00% +0.10% / +0.13% +0.45% +0.40%] index_select strided 8 : Elapsed 0.040 ms (3.971 ms / 100) 3.970 -> 3.976 ( +0.15%) [ +0.10% +0.18% +0.00% / +0.15% +0.55% +0.53%] index_select strided 16 : Elapsed 0.040 ms (3.974 ms / 100) 3.955 -> 3.958 ( +0.08%) [ +0.03% +0.00% +0.08% / +0.08% +0.63% +0.58%] index_select random : Elapsed 0.040 ms (3.956 ms / 100) 3.948 -> 3.952 ( +0.10%) [ +0.00% +0.10% +0.13% / +0.10% +0.53% +0.53%] index_select random_sorted : Elapsed 0.039 ms (3.948 ms / 100) 3.963 -> 3.965 ( +0.05%) [ +0.10% +0.03% +0.00% / +0.05% +0.45% +0.38%] index_select perm : Elapsed 0.040 ms (3.967 ms / 100) 3.955 -> 3.955 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.40% +0.38%] index_select perm_sorted : Elapsed 0.040 ms (3.956 ms / 100) B = [16, 4, 20, 5] (stride (1, 320, 16, 1280)) A = [40, 4, 20, 5] (stride (4, 1, 800, 160)) dim = 0 1.458 -> 1.462 ( +0.27%) [ +0.00% +0.14% +0.21% / +0.27% +0.75% +0.82%] index_select const : Elapsed 0.015 ms (1.458 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.54% +0.61%] index_select wrap : Elapsed 0.015 ms (1.479 ms / 100) 1.454 -> 1.454 ( +0.00%) [ +0.21% +0.00% +0.28% / +0.00% +0.96% +0.89%] index_select linear : Elapsed 0.015 ms (1.457 ms / 100) 1.454 -> 1.454 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.96% +0.96%] index_select reverse : Elapsed 0.015 ms (1.456 ms / 100) 1.473 -> 1.475 ( +0.14%) [ +0.00% +0.20% +0.00% / +0.14% +1.29% +1.36%] index_select skip64 : Elapsed 0.015 ms (1.473 ms / 100) 1.456 -> 1.459 ( +0.21%) [ +0.14% +0.00% +0.07% / +0.21% +1.03% +1.24%] index_select skip256 : Elapsed 0.015 ms (1.458 ms / 100) 1.469 -> 1.468 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.88% +0.88%] index_select spread : Elapsed 0.015 ms (1.470 ms / 100) 1.465 -> 1.463 ( -0.14%) [ +0.00% +0.14% +0.00% / -0.14% +0.96% +0.89%] index_select strided 3 : Elapsed 0.015 ms (1.465 ms / 100) 1.456 -> 1.456 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.48% +0.48%] index_select strided 5 : Elapsed 0.015 ms (1.456 ms / 100) 1.459 -> 1.462 ( +0.21%) [ +0.27% +0.00% +0.21% / +0.21% +0.75% +0.69%] index_select strided 7 : Elapsed 0.015 ms (1.463 ms / 100) 1.446 -> 1.448 ( +0.14%) [ +0.21% +0.00% +0.07% / +0.14% +0.83% +1.04%] index_select strided 8 : Elapsed 0.014 ms (1.449 ms / 100) 1.456 -> 1.458 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.76% +0.96%] index_select strided 16 : Elapsed 0.015 ms (1.457 ms / 100) 1.474 -> 1.477 ( +0.20%) [ +0.00% +0.14% +0.27% / +0.20% +1.02% +1.09%] index_select random : Elapsed 0.015 ms (1.474 ms / 100) 1.454 -> 1.455 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +1.03% +0.89%] index_select random_sorted : Elapsed 0.015 ms (1.454 ms / 100) 1.458 -> 1.466 ( +0.55%) [ +0.34% +0.00% +0.62% / +0.55% +1.10% +0.96%] index_select perm : Elapsed 0.015 ms (1.463 ms / 100) 1.463 -> 1.465 ( +0.14%) [ +0.14% +0.21% +0.00% / +0.14% +0.96% +1.03%] index_select perm_sorted : Elapsed 0.015 ms (1.465 ms / 100) B = [16, 4, 20, 5] (stride (4, 1, 64, 1280)) A = [40, 4, 20, 5] (stride (20, 4000, 1, 800)) dim = 0 3.932 -> 3.932 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.66% +0.64%] index_select const : Elapsed 0.039 ms (3.933 ms / 100) 3.935 -> 3.939 ( +0.10%) [ +0.13% +0.08% +0.00% / +0.10% +0.48% +0.56%] index_select wrap : Elapsed 0.039 ms (3.940 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.00% +0.15% +0.00% / +0.03% +0.71% +0.48%] index_select linear : Elapsed 0.039 ms (3.932 ms / 100) 3.929 -> 3.930 ( +0.03%) [ +0.08% +0.00% +0.08% / +0.03% +0.46% +0.53%] index_select reverse : Elapsed 0.039 ms (3.932 ms / 100) 3.919 -> 3.925 ( +0.15%) [ +0.13% +0.00% +0.10% / +0.15% +0.41% +0.43%] index_select skip64 : Elapsed 0.039 ms (3.924 ms / 100) 3.936 -> 3.937 ( +0.03%) [ +0.18% +0.05% +0.00% / +0.03% +0.48% +0.48%] index_select skip256 : Elapsed 0.039 ms (3.943 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.00% +0.03% +0.08% / +0.08% +0.48% +0.43%] index_select spread : Elapsed 0.039 ms (3.923 ms / 100) 3.925 -> 3.926 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.33% +0.36%] index_select strided 3 : Elapsed 0.039 ms (3.927 ms / 100) 3.926 -> 3.927 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.41% +0.48%] index_select strided 5 : Elapsed 0.039 ms (3.926 ms / 100) 3.936 -> 3.936 ( +0.00%) [ +0.13% +0.00% +0.03% / +0.00% +0.43% +0.41%] index_select strided 7 : Elapsed 0.039 ms (3.941 ms / 100) 3.941 -> 3.941 ( +0.00%) [ +0.00% +0.18% +0.38% / +0.00% +0.66% +0.76%] index_select strided 8 : Elapsed 0.039 ms (3.941 ms / 100) 3.927 -> 3.929 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.43% +0.46%] index_select strided 16 : Elapsed 0.039 ms (3.929 ms / 100) 3.925 -> 3.922 ( -0.08%) [ +0.00% +0.05% +0.05% / -0.08% +0.43% +0.31%] index_select random : Elapsed 0.039 ms (3.925 ms / 100) 3.926 -> 3.927 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.51% +0.46%] index_select random_sorted : Elapsed 0.039 ms (3.928 ms / 100) 3.930 -> 3.934 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.51% +0.46%] index_select perm : Elapsed 0.039 ms (3.930 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.15% +0.00% +0.10% / +0.08% +0.46% +0.56%] index_select perm_sorted : Elapsed 0.039 ms (3.929 ms / 100) B = [16, 4, 20, 5] (stride (1, 16, 64, 1280)) A = [40, 4, 20, 5] (stride (400, 5, 20, 1)) dim = 0 3.873 -> 3.878 ( +0.13%) [ +0.10% +0.00% +0.08% / +0.13% +0.90% +0.72%] index_select const : Elapsed 0.039 ms (3.877 ms / 100) 3.875 -> 3.877 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.54% +0.54%] index_select wrap : Elapsed 0.039 ms (3.877 ms / 100) 3.874 -> 3.875 ( +0.03%) [ +0.00% +0.18% +0.05% / +0.03% +0.67% +0.67%] index_select linear : Elapsed 0.039 ms (3.874 ms / 100) 3.865 -> 3.864 ( -0.03%) [ +0.08% +0.08% +0.00% / -0.03% +0.88% +0.78%] index_select reverse : Elapsed 0.039 ms (3.868 ms / 100) 3.861 -> 3.868 ( +0.18%) [ +0.21% +0.10% +0.00% / +0.18% +0.88% +0.98%] index_select skip64 : Elapsed 0.039 ms (3.869 ms / 100) 3.866 -> 3.867 ( +0.03%) [ +0.03% +0.21% +0.00% / +0.03% +0.98% +1.06%] index_select skip256 : Elapsed 0.039 ms (3.867 ms / 100) 3.870 -> 3.872 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +1.21% +0.85%] index_select spread : Elapsed 0.039 ms (3.871 ms / 100) 3.870 -> 3.871 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +1.11% +0.98%] index_select strided 3 : Elapsed 0.039 ms (3.870 ms / 100) 3.865 -> 3.870 ( +0.13%) [ +0.00% +0.05% +0.13% / +0.13% +0.78% +0.70%] index_select strided 5 : Elapsed 0.039 ms (3.865 ms / 100) 3.869 -> 3.871 ( +0.05%) [ +0.08% +0.10% +0.00% / +0.05% +0.72% +0.93%] index_select strided 7 : Elapsed 0.039 ms (3.872 ms / 100) 3.870 -> 3.875 ( +0.13%) [ +0.16% +0.18% +0.00% / +0.13% +0.98% +0.85%] index_select strided 8 : Elapsed 0.039 ms (3.876 ms / 100) 3.869 -> 3.871 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.05% +1.34% +1.01%] index_select strided 16 : Elapsed 0.039 ms (3.873 ms / 100) 3.880 -> 3.878 ( -0.05%) [ +0.05% +0.00% +0.03% / -0.05% +1.21% +0.95%] index_select random : Elapsed 0.039 ms (3.882 ms / 100) 3.867 -> 3.868 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.85% +0.65%] index_select random_sorted : Elapsed 0.039 ms (3.870 ms / 100) 3.860 -> 3.865 ( +0.13%) [ +0.18% +0.23% +0.00% / +0.13% +1.48% +1.01%] index_select perm : Elapsed 0.039 ms (3.867 ms / 100) 3.883 -> 3.886 ( +0.08%) [ +0.00% +0.05% +0.15% / +0.08% +0.77% +1.11%] index_select perm_sorted : Elapsed 0.039 ms (3.883 ms / 100) out_shape = [40, 16, 20, 5] in_shape = [40, 4, 20, 5] idx_dim = 1 B = [40, 16, 20, 5] (stride (1600, 100, 5, 1)) A = [40, 4, 20, 5] (stride (100, 4000, 1, 20)) dim = 1 2.358 -> 2.359 ( +0.04%) [ +0.17% +0.00% +0.00% / +0.04% +0.51% +0.47%] index_add_ linear : Elapsed 0.024 ms (2.362 ms / 100) 2.306 -> 2.309 ( +0.13%) [ +0.26% +0.30% +0.00% / +0.13% +0.82% +0.65%] index_copy_ linear : Elapsed 0.023 ms (2.312 ms / 100) 2.358 -> 2.360 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.17% +0.08% +0.34%] index_add_ reverse : Elapsed 0.024 ms (2.358 ms / 100) 2.312 -> 2.312 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.26% +0.43%] index_copy_ reverse : Elapsed 0.023 ms (2.312 ms / 100) 2.359 -> 2.356 ( -0.13%) [ +0.08% +0.08% +0.00% / -0.13% +0.34% +0.42%] index_add_ spread : Elapsed 0.024 ms (2.361 ms / 100) 2.309 -> 2.310 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.65% +0.35%] index_copy_ spread : Elapsed 0.023 ms (2.313 ms / 100) 2.364 -> 2.364 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.38% +0.51%] index_add_ strided 3 : Elapsed 0.024 ms (2.365 ms / 100) 2.319 -> 2.321 ( +0.09%) [ +0.00% +0.13% +0.22% / +0.09% +0.43% +0.43%] index_copy_ strided 3 : Elapsed 0.023 ms (2.319 ms / 100) 2.366 -> 2.367 ( +0.04%) [ +0.00% +0.21% +0.13% / +0.04% +0.34% +0.34%] index_add_ strided 5 : Elapsed 0.024 ms (2.366 ms / 100) 2.319 -> 2.321 ( +0.09%) [ +0.09% +0.13% +0.00% / +0.09% +0.47% +0.47%] index_copy_ strided 5 : Elapsed 0.023 ms (2.321 ms / 100) 2.357 -> 2.359 ( +0.08%) [ +0.17% +0.13% +0.00% / +0.08% +0.30% +0.38%] index_add_ strided 7 : Elapsed 0.024 ms (2.361 ms / 100) 2.312 -> 2.316 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.30% +0.39%] index_copy_ strided 7 : Elapsed 0.023 ms (2.316 ms / 100) 2.362 -> 2.361 ( -0.04%) [ +0.08% +0.00% +0.08% / -0.04% +0.47% +0.30%] index_add_ perm : Elapsed 0.024 ms (2.364 ms / 100) 2.314 -> 2.319 ( +0.22%) [ +0.22% +0.00% +0.22% / +0.22% +0.39% +0.43%] index_copy_ perm : Elapsed 0.023 ms (2.319 ms / 100) 2.358 -> 2.362 ( +0.17%) [ +0.08% +0.13% +0.00% / +0.17% +0.30% +0.30%] index_add_ perm_sorted : Elapsed 0.024 ms (2.360 ms / 100) 2.310 -> 2.314 ( +0.17%) [ +0.00% +0.04% +0.00% / +0.17% +0.52% +0.61%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.310 ms / 100) 4.880 -> 4.885 ( +0.10%) [ +0.00% +0.06% +0.00% / +0.10% +0.45% +0.51%] index_select const : Elapsed 0.049 ms (4.880 ms / 100) 4.928 -> 4.932 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.57% +0.59%] index_select wrap : Elapsed 0.049 ms (4.932 ms / 100) 4.944 -> 4.941 ( -0.06%) [ +0.00% +0.06% +0.12% / -0.06% +0.38% +0.38%] index_select linear : Elapsed 0.049 ms (4.944 ms / 100) 4.929 -> 4.925 ( -0.08%) [ +0.00% +0.24% +0.24% / -0.08% +0.18% +0.39%] index_select reverse : Elapsed 0.049 ms (4.929 ms / 100) 4.901 -> 4.901 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.22% +0.37%] index_select skip64 : Elapsed 0.049 ms (4.901 ms / 100) 4.886 -> 4.890 ( +0.08%) [ +0.06% +0.14% +0.00% / +0.08% +0.47% +0.35%] index_select skip256 : Elapsed 0.049 ms (4.889 ms / 100) 4.921 -> 4.938 ( +0.35%) [ +0.30% +0.22% +0.00% / +0.37% +0.37% +0.35%] index_select spread : Elapsed 0.049 ms (4.936 ms / 100) 4.930 -> 4.930 ( +0.00%) [ +0.22% +0.14% +0.00% / +0.00% +0.43% +0.34%] index_select strided 3 : Elapsed 0.049 ms (4.941 ms / 100) 4.929 -> 4.926 ( -0.06%) [ +0.12% +0.18% +0.00% / -0.06% +0.14% +0.39%] index_select random : Elapsed 0.049 ms (4.935 ms / 100) 4.920 -> 4.927 ( +0.14%) [ +0.00% +0.08% +0.00% / +0.14% +0.30% +0.28%] index_select random_sorted : Elapsed 0.049 ms (4.920 ms / 100) B = [40, 16, 20, 5] (stride (1600, 100, 1, 20)) A = [40, 4, 20, 5] (stride (5, 4000, 200, 1)) dim = 1 2.505 -> 2.505 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.24% +0.36%] index_add_ linear : Elapsed 0.025 ms (2.508 ms / 100) 2.457 -> 2.456 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.16% +0.37%] index_copy_ linear : Elapsed 0.025 ms (2.459 ms / 100) 2.507 -> 2.506 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.28% +0.12%] index_add_ reverse : Elapsed 0.025 ms (2.509 ms / 100) 2.459 -> 2.458 ( -0.04%) [ +0.08% +0.00% +0.08% / -0.04% +0.08% +0.00%] index_copy_ reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.506 -> 2.505 ( -0.04%) [ +0.12% +0.00% +0.12% / -0.04% +0.24% +0.20%] index_add_ spread : Elapsed 0.025 ms (2.509 ms / 100) 2.460 -> 2.459 ( -0.04%) [ +0.00% +0.00% +0.12% / +0.53% +0.20% -0.04%] index_copy_ spread : Elapsed 0.025 ms (2.460 ms / 100) 2.505 -> 2.508 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.24% +0.24%] index_add_ strided 3 : Elapsed 0.025 ms (2.507 ms / 100) 2.460 -> 2.462 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.12% +0.16% +0.08%] index_copy_ strided 3 : Elapsed 0.025 ms (2.463 ms / 100) 2.508 -> 2.508 ( +0.00%) [ +0.28% +0.00% +0.00% / +0.00% +0.24% +0.36%] index_add_ strided 5 : Elapsed 0.025 ms (2.515 ms / 100) 2.461 -> 2.465 ( +0.16%) [ +0.12% +0.00% +0.04% / +0.20% +0.16% +0.37%] index_copy_ strided 5 : Elapsed 0.025 ms (2.464 ms / 100) 2.507 -> 2.503 ( -0.16%) [ +0.08% +0.12% +0.00% / -0.16% +0.20% +0.20%] index_add_ strided 7 : Elapsed 0.025 ms (2.509 ms / 100) 2.458 -> 2.457 ( -0.04%) [ +0.20% +0.57% +0.00% / -0.04% +0.20% +0.24%] index_copy_ strided 7 : Elapsed 0.025 ms (2.463 ms / 100) 2.505 -> 2.507 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.40% +0.40%] index_add_ perm : Elapsed 0.025 ms (2.507 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.00% +0.37% +0.20%] index_copy_ perm : Elapsed 0.025 ms (2.464 ms / 100) 2.508 -> 2.507 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.40% +0.28%] index_add_ perm_sorted : Elapsed 0.025 ms (2.508 ms / 100) 2.462 -> 2.461 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.16% +0.20%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.463 ms / 100) 5.404 -> 5.416 ( +0.22%) [ +0.35% +0.33% +0.00% / +0.22% +0.67% +0.39%] index_select const : Elapsed 0.054 ms (5.423 ms / 100) 5.451 -> 5.458 ( +0.13%) [ +0.02% +0.18% +0.00% / +0.13% +0.33% +0.33%] index_select wrap : Elapsed 0.055 ms (5.452 ms / 100) 5.478 -> 5.481 ( +0.05%) [ +0.04% +0.00% +0.02% / +0.05% +0.15% +0.22%] index_select linear : Elapsed 0.055 ms (5.480 ms / 100) 5.429 -> 5.432 ( +0.06%) [ +0.00% +0.18% +0.02% / +0.06% +0.29% +0.53%] index_select reverse : Elapsed 0.054 ms (5.429 ms / 100) 5.408 -> 5.407 ( -0.02%) [ +0.24% +0.00% +0.02% / -0.02% +0.30% +0.39%] index_select skip64 : Elapsed 0.054 ms (5.421 ms / 100) 5.396 -> 5.396 ( +0.00%) [ +0.02% +0.00% +0.15% / +0.00% +0.33% +0.37%] index_select skip256 : Elapsed 0.054 ms (5.397 ms / 100) 5.444 -> 5.444 ( +0.00%) [ +0.04% +0.17% +0.00% / +0.00% +0.40% +0.35%] index_select spread : Elapsed 0.054 ms (5.446 ms / 100) 5.457 -> 5.458 ( +0.02%) [ +0.16% +0.00% +0.07% / +0.02% +0.53% +0.15%] index_select strided 3 : Elapsed 0.055 ms (5.466 ms / 100) 5.468 -> 5.473 ( +0.09%) [ +0.04% +0.15% +0.00% / +0.09% +0.33% +0.33%] index_select random : Elapsed 0.055 ms (5.470 ms / 100) 5.457 -> 5.463 ( +0.11%) [ +0.18% +0.13% +0.00% / +0.11% +0.38% +0.35%] index_select random_sorted : Elapsed 0.055 ms (5.467 ms / 100) B = [40, 16, 20, 5] (stride (1600, 5, 80, 1)) A = [40, 4, 20, 5] (stride (400, 5, 20, 1)) dim = 1 2.475 -> 2.474 ( -0.04%) [ +0.00% +0.04% +0.16% / -0.04% +0.53% +0.44%] index_add_ linear : Elapsed 0.025 ms (2.475 ms / 100) 2.438 -> 2.438 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.57% +0.29%] index_copy_ linear : Elapsed 0.024 ms (2.438 ms / 100) 2.487 -> 2.486 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.36% +0.20%] index_add_ reverse : Elapsed 0.025 ms (2.487 ms / 100) 2.445 -> 2.453 ( +0.33%) [ +0.08% +0.20% +0.00% / +0.33% +0.53% +0.61%] index_copy_ reverse : Elapsed 0.024 ms (2.447 ms / 100) 2.505 -> 2.507 ( +0.08%) [ +0.00% +0.24% +0.16% / +0.08% +0.56% +0.56%] index_add_ spread : Elapsed 0.025 ms (2.505 ms / 100) 2.491 -> 2.490 ( -0.04%) [ +0.08% +0.00% +0.16% / -0.04% +0.60% +0.48%] index_copy_ spread : Elapsed 0.025 ms (2.493 ms / 100) 2.511 -> 2.514 ( +0.12%) [ +0.24% +0.24% +0.00% / +0.12% +0.60% +0.60%] index_add_ strided 3 : Elapsed 0.025 ms (2.517 ms / 100) 2.517 -> 2.517 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.44% +0.40%] index_copy_ strided 3 : Elapsed 0.025 ms (2.517 ms / 100) 2.482 -> 2.481 ( -0.04%) [ +0.16% +0.08% +0.00% / -0.04% +0.28% +0.36%] index_add_ strided 5 : Elapsed 0.025 ms (2.486 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.12% +0.00% +0.12% / +0.04% +0.37% +0.08%] index_copy_ strided 5 : Elapsed 0.025 ms (2.465 ms / 100) 2.483 -> 2.488 ( +0.20%) [ +0.12% +0.28% +0.00% / +0.20% +0.44% +0.48%] index_add_ strided 7 : Elapsed 0.025 ms (2.486 ms / 100) 2.476 -> 2.479 ( +0.12%) [ +0.20% +0.00% +0.12% / +0.12% +0.32% +0.40%] index_copy_ strided 7 : Elapsed 0.025 ms (2.481 ms / 100) 2.481 -> 2.483 ( +0.08%) [ +0.24% +0.00% +0.20% / +0.08% +0.16% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.487 ms / 100) 2.467 -> 2.469 ( +0.08%) [ +0.04% +0.24% +0.00% / +0.08% +0.20% +0.32%] index_copy_ perm : Elapsed 0.025 ms (2.468 ms / 100) 2.473 -> 2.478 ( +0.20%) [ +0.40% +0.00% +0.28% / +0.20% +0.40% +0.61%] index_add_ perm_sorted : Elapsed 0.025 ms (2.483 ms / 100) 2.460 -> 2.465 ( +0.20%) [ +0.04% +0.16% +0.00% / +0.20% +0.28% +0.28%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.461 ms / 100) 5.301 -> 5.301 ( +0.00%) [ +0.08% +0.11% +0.00% / +0.00% +0.45% +0.38%] index_select const : Elapsed 0.053 ms (5.305 ms / 100) 5.331 -> 5.343 ( +0.23%) [ +0.00% +0.21% +0.11% / +0.23% +0.47% +0.49%] index_select wrap : Elapsed 0.053 ms (5.331 ms / 100) 5.349 -> 5.351 ( +0.04%) [ +0.00% +0.22% +0.06% / +0.04% +0.52% +0.45%] index_select linear : Elapsed 0.053 ms (5.349 ms / 100) 5.346 -> 5.350 ( +0.07%) [ +0.54% +0.13% +0.00% / +0.07% +1.03% +0.39%] index_select reverse : Elapsed 0.054 ms (5.375 ms / 100) 5.311 -> 5.318 ( +0.13%) [ +0.19% +0.04% +0.00% / +0.13% +0.38% +0.55%] index_select skip64 : Elapsed 0.053 ms (5.321 ms / 100) 5.311 -> 5.314 ( +0.06%) [ +0.11% +0.00% +0.04% / +0.06% +0.21% +0.49%] index_select skip256 : Elapsed 0.053 ms (5.317 ms / 100) 5.352 -> 5.348 ( -0.07%) [ +0.15% +0.00% +0.02% / -0.07% +0.22% +0.26%] index_select spread : Elapsed 0.054 ms (5.360 ms / 100) 5.330 -> 5.336 ( +0.11%) [ +0.09% +0.09% +0.00% / +0.11% +0.17% +0.36%] index_select strided 3 : Elapsed 0.053 ms (5.335 ms / 100) 5.344 -> 5.350 ( +0.11%) [ +0.13% +0.04% +0.00% / +0.11% +0.47% +0.45%] index_select random : Elapsed 0.054 ms (5.351 ms / 100) 5.335 -> 5.337 ( +0.04%) [ +0.00% +0.06% +0.06% / +0.04% +0.32% +0.28%] index_select random_sorted : Elapsed 0.053 ms (5.335 ms / 100) B = [40, 16, 20, 5] (stride (1600, 1, 80, 16)) A = [40, 4, 20, 5] (stride (80, 1, 4, 3200)) dim = 1 2.390 -> 2.389 ( -0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.21% -0.04%] index_add_ linear : Elapsed 0.024 ms (2.390 ms / 100) 2.366 -> 2.367 ( +0.04%) [ +0.17% +0.04% +0.00% / +0.04% +0.30% +0.30%] index_copy_ linear : Elapsed 0.024 ms (2.370 ms / 100) 2.393 -> 2.390 ( -0.13%) [ +0.00% +0.13% +0.17% / +0.13% -0.13% -0.04%] index_add_ reverse : Elapsed 0.024 ms (2.393 ms / 100) 2.366 -> 2.368 ( +0.08%) [ +0.08% +0.00% +0.21% / +0.08% +0.25% +0.25%] index_copy_ reverse : Elapsed 0.024 ms (2.368 ms / 100) 2.431 -> 2.428 ( -0.12%) [ +0.08% +0.08% +0.00% / -0.12% +0.16% +0.29%] index_add_ spread : Elapsed 0.024 ms (2.433 ms / 100) 2.468 -> 2.467 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.20% +0.12%] index_copy_ spread : Elapsed 0.025 ms (2.468 ms / 100) 2.433 -> 2.436 ( +0.12%) [ +0.21% +0.16% +0.00% / +0.12% +0.16% +0.12%] index_add_ strided 3 : Elapsed 0.024 ms (2.438 ms / 100) 2.467 -> 2.471 ( +0.16%) [ +0.00% +0.04% +0.00% / +0.16% +0.32% +0.28%] index_copy_ strided 3 : Elapsed 0.025 ms (2.467 ms / 100) 2.431 -> 2.433 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.12% +0.21%] index_add_ strided 5 : Elapsed 0.024 ms (2.431 ms / 100) 2.469 -> 2.467 ( -0.08%) [ +0.12% +0.08% +0.00% / -0.08% +0.32% +0.24%] index_copy_ strided 5 : Elapsed 0.025 ms (2.472 ms / 100) 2.433 -> 2.431 ( -0.08%) [ +0.04% +0.16% +0.00% / -0.08% +0.08% +0.00%] index_add_ strided 7 : Elapsed 0.024 ms (2.434 ms / 100) 2.467 -> 2.469 ( +0.08%) [ +0.00% +0.12% +0.04% / +0.08% +0.24% +0.28%] index_copy_ strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.429 -> 2.428 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.29% +0.29%] index_add_ perm : Elapsed 0.024 ms (2.429 ms / 100) 2.465 -> 2.467 ( +0.08%) [ +0.12% +0.16% +0.00% / +0.08% +0.41% +0.41%] index_copy_ perm : Elapsed 0.025 ms (2.468 ms / 100) 2.430 -> 2.433 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.12% +0.21% +0.29%] index_add_ perm_sorted : Elapsed 0.024 ms (2.432 ms / 100) 2.468 -> 2.474 ( +0.24%) [ +0.00% +0.36% +0.16% / +0.28% +0.36% +0.24%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.468 ms / 100) 5.100 -> 5.111 ( +0.22%) [ +0.57% +0.00% +0.41% / +0.22% +0.39% +0.73%] index_select const : Elapsed 0.051 ms (5.129 ms / 100) 5.116 -> 5.114 ( -0.04%) [ +0.02% +0.00% +0.12% / -0.04% +0.00% +0.22%] index_select wrap : Elapsed 0.051 ms (5.117 ms / 100) 5.108 -> 5.112 ( +0.08%) [ +0.06% +0.16% +0.00% / +0.10% +0.08% +0.22%] index_select linear : Elapsed 0.051 ms (5.111 ms / 100) 5.110 -> 5.108 ( -0.04%) [ +0.00% +0.06% +0.18% / -0.04% +0.37% +0.14%] index_select reverse : Elapsed 0.051 ms (5.110 ms / 100) 5.123 -> 5.123 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.66% +0.31%] index_select skip64 : Elapsed 0.051 ms (5.124 ms / 100) 5.112 -> 5.116 ( +0.08%) [ +0.00% +0.20% +0.16% / +0.08% +0.10% +0.20%] index_select skip256 : Elapsed 0.051 ms (5.112 ms / 100) 5.105 -> 5.106 ( +0.02%) [ +0.00% +0.02% +0.37% / +0.02% +0.18% +0.55%] index_select spread : Elapsed 0.051 ms (5.105 ms / 100) 5.107 -> 5.104 ( -0.06%) [ +0.16% +0.02% +0.00% / -0.06% +0.22% +0.47%] index_select strided 3 : Elapsed 0.051 ms (5.115 ms / 100) 5.125 -> 5.128 ( +0.06%) [ +0.02% +0.00% +0.02% / +0.08% +0.33% +0.06%] index_select random : Elapsed 0.051 ms (5.126 ms / 100) 5.106 -> 5.104 ( -0.04%) [ +0.00% +0.16% +0.12% / -0.04% +0.18% +0.55%] index_select random_sorted : Elapsed 0.051 ms (5.106 ms / 100) B = [40, 16, 20, 5] (stride (1600, 1, 80, 16)) A = [40, 4, 20, 5] (stride (1, 800, 40, 3200)) dim = 1 2.482 -> 2.486 ( +0.16%) [ +0.12% +0.00% +0.12% / +0.16% +0.40% +0.60%] index_add_ linear : Elapsed 0.025 ms (2.485 ms / 100) 2.456 -> 2.460 ( +0.16%) [ +0.12% +0.00% +0.16% / +0.16% +0.45% +0.57%] index_copy_ linear : Elapsed 0.025 ms (2.459 ms / 100) 2.481 -> 2.482 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.48% +0.48%] index_add_ reverse : Elapsed 0.025 ms (2.484 ms / 100) 2.456 -> 2.456 ( +0.00%) [ +0.04% +0.00% +0.12% / +0.00% +0.37% +0.33%] index_copy_ reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.519 -> 2.524 ( +0.20%) [ +0.24% +0.32% +0.00% / +0.20% +0.71% +0.48%] index_add_ spread : Elapsed 0.025 ms (2.525 ms / 100) 2.554 -> 2.556 ( +0.08%) [ +0.00% +0.16% +0.04% / +0.08% +0.63% +0.55%] index_copy_ spread : Elapsed 0.026 ms (2.554 ms / 100) 2.522 -> 2.522 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.59% +0.56%] index_add_ strided 3 : Elapsed 0.025 ms (2.522 ms / 100) 2.554 -> 2.562 ( +0.31%) [ +0.04% +0.00% +0.00% / +0.31% +0.55% +0.55%] index_copy_ strided 3 : Elapsed 0.026 ms (2.555 ms / 100) 2.524 -> 2.522 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.28% +0.16%] index_add_ strided 5 : Elapsed 0.025 ms (2.524 ms / 100) 2.553 -> 2.552 ( -0.04%) [ +0.00% +0.16% +0.16% / -0.04% +0.47% +0.59%] index_copy_ strided 5 : Elapsed 0.026 ms (2.553 ms / 100) 2.523 -> 2.527 ( +0.16%) [ +0.00% +0.12% +0.16% / +0.16% +0.44% +0.40%] index_add_ strided 7 : Elapsed 0.025 ms (2.523 ms / 100) 2.556 -> 2.561 ( +0.20%) [ +0.16% +0.20% +0.00% / +0.20% +0.47% +0.51%] index_copy_ strided 7 : Elapsed 0.026 ms (2.560 ms / 100) 2.483 -> 2.486 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.16% +0.60%] index_add_ perm : Elapsed 0.025 ms (2.483 ms / 100) 2.459 -> 2.455 ( -0.16%) [ +0.04% +0.00% +0.12% / -0.16% +0.28% +1.67%] index_copy_ perm : Elapsed 0.025 ms (2.460 ms / 100) 2.481 -> 2.484 ( +0.12%) [ +0.20% +0.28% +0.00% / +0.12% +0.52% +0.24%] index_add_ perm_sorted : Elapsed 0.025 ms (2.486 ms / 100) 2.456 -> 2.459 ( +0.12%) [ +0.16% +0.00% +0.08% / +0.12% +0.45% +0.24%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.460 ms / 100) 5.547 -> 5.544 ( -0.05%) [ +0.13% +0.00% +0.04% / -0.05% +0.47% +0.40%] index_select const : Elapsed 0.056 ms (5.554 ms / 100) 5.522 -> 5.525 ( +0.05%) [ +0.13% +0.07% +0.00% / +0.05% +0.49% +0.47%] index_select wrap : Elapsed 0.055 ms (5.529 ms / 100) 5.556 -> 5.553 ( -0.05%) [ +0.11% +0.09% +0.00% / -0.05% +0.61% +0.58%] index_select linear : Elapsed 0.056 ms (5.562 ms / 100) 5.536 -> 5.546 ( +0.18%) [ +0.04% +0.09% +0.00% / +0.18% +0.52% +0.54%] index_select reverse : Elapsed 0.055 ms (5.538 ms / 100) 5.541 -> 5.540 ( -0.02%) [ +0.16% +0.00% +0.07% / -0.02% +0.40% +0.51%] index_select skip64 : Elapsed 0.055 ms (5.550 ms / 100) 5.544 -> 5.548 ( +0.07%) [ +0.00% +0.05% +0.05% / +0.07% +0.51% +0.47%] index_select skip256 : Elapsed 0.055 ms (5.544 ms / 100) 5.521 -> 5.523 ( +0.04%) [ +0.05% +0.13% +0.00% / +0.04% +0.40% +0.45%] index_select spread : Elapsed 0.055 ms (5.524 ms / 100) 5.547 -> 5.548 ( +0.02%) [ +0.09% +0.09% +0.00% / +0.02% +0.36% +0.40%] index_select strided 3 : Elapsed 0.056 ms (5.552 ms / 100) 5.527 -> 5.522 ( -0.09%) [ +0.00% +0.11% +0.04% / -0.09% +0.20% +0.25%] index_select random : Elapsed 0.055 ms (5.527 ms / 100) 5.535 -> 5.532 ( -0.05%) [ +0.07% +0.09% +0.00% / -0.05% +0.56% +0.42%] index_select random_sorted : Elapsed 0.055 ms (5.539 ms / 100) B = [40, 16, 20, 5] (stride (1600, 1, 80, 16)) A = [40, 4, 20, 5] (stride (4, 1, 160, 3200)) dim = 1 2.512 -> 2.514 ( +0.08%) [ +0.20% +0.00% +0.08% / +0.12% +0.08% +0.24%] index_add_ linear : Elapsed 0.025 ms (2.517 ms / 100) 2.478 -> 2.482 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.32% +0.65%] index_copy_ linear : Elapsed 0.025 ms (2.480 ms / 100) 2.513 -> 2.510 ( -0.12%) [ +0.04% +0.12% +0.00% / -0.12% -0.08% +0.12%] index_add_ reverse : Elapsed 0.025 ms (2.514 ms / 100) 2.477 -> 2.481 ( +0.16%) [ +0.32% +0.00% +0.28% / +0.16% +0.16% +0.24%] index_copy_ reverse : Elapsed 0.025 ms (2.485 ms / 100) 2.552 -> 2.553 ( +0.04%) [ +0.20% +0.16% +0.00% / +0.08% +0.04% +0.12%] index_add_ spread : Elapsed 0.026 ms (2.557 ms / 100) 2.583 -> 2.586 ( +0.12%) [ +0.04% +0.15% +0.00% / +0.27% +0.12% +0.19%] index_copy_ spread : Elapsed 0.026 ms (2.584 ms / 100) 2.555 -> 2.551 ( -0.16%) [ +0.00% +0.08% +0.08% / +0.04% -0.16% +0.12%] index_add_ strided 3 : Elapsed 0.026 ms (2.555 ms / 100) 2.581 -> 2.583 ( +0.08%) [ +0.00% +0.15% +0.15% / +0.08% +0.15% +0.12%] index_copy_ strided 3 : Elapsed 0.026 ms (2.581 ms / 100) 2.552 -> 2.552 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.24% +0.08% +0.00%] index_add_ strided 5 : Elapsed 0.026 ms (2.553 ms / 100) 2.582 -> 2.584 ( +0.08%) [ +0.08% +0.00% +0.12% / +0.15% +0.35% +0.08%] index_copy_ strided 5 : Elapsed 0.026 ms (2.584 ms / 100) 2.550 -> 2.552 ( +0.08%) [ +0.20% +0.00% +0.31% / +0.08% +0.31% +0.16%] index_add_ strided 7 : Elapsed 0.026 ms (2.555 ms / 100) 2.582 -> 2.586 ( +0.15%) [ +0.00% +0.08% +0.19% / +0.19% +0.27% +0.15%] index_copy_ strided 7 : Elapsed 0.026 ms (2.582 ms / 100) 2.550 -> 2.555 ( +0.20%) [ +0.24% +0.00% +0.00% / +0.20% +0.31% +0.31%] index_add_ perm : Elapsed 0.026 ms (2.556 ms / 100) 2.580 -> 2.582 ( +0.08%) [ +0.00% +0.16% +0.16% / +0.08% +0.31% +0.12%] index_copy_ perm : Elapsed 0.026 ms (2.580 ms / 100) 2.553 -> 2.553 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.04% +0.24% +0.00%] index_add_ perm_sorted : Elapsed 0.026 ms (2.555 ms / 100) 2.584 -> 2.582 ( -0.08%) [ +0.00% +0.00% +0.00% / +0.00% +0.15% -0.08%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.584 ms / 100) 5.560 -> 5.556 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.45% +0.38%] index_select const : Elapsed 0.056 ms (5.564 ms / 100) 5.559 -> 5.568 ( +0.16%) [ +0.05% +0.09% +0.00% / +0.23% +0.22% +0.16%] index_select wrap : Elapsed 0.056 ms (5.562 ms / 100) 5.573 -> 5.577 ( +0.07%) [ +0.00% +0.23% +0.02% / +0.07% +0.27% +0.20%] index_select linear : Elapsed 0.056 ms (5.573 ms / 100) 5.553 -> 5.582 ( +0.52%) [ +0.45% +0.00% +0.43% / +0.52% +0.81% +0.72%] index_select reverse : Elapsed 0.056 ms (5.578 ms / 100) 5.575 -> 5.578 ( +0.05%) [ +0.20% +0.00% +0.05% / +0.05% +0.43% +0.47%] index_select skip64 : Elapsed 0.056 ms (5.586 ms / 100) 5.569 -> 5.568 ( -0.02%) [ +0.00% +0.11% +0.09% / -0.02% +0.34% +0.36%] index_select skip256 : Elapsed 0.056 ms (5.569 ms / 100) 5.561 -> 5.558 ( -0.05%) [ +0.04% +0.00% +0.07% / -0.05% +0.47% +0.32%] index_select spread : Elapsed 0.056 ms (5.563 ms / 100) 5.556 -> 5.555 ( -0.02%) [ +0.00% +0.02% +0.38% / -0.02% +0.34% +0.27%] index_select strided 3 : Elapsed 0.056 ms (5.556 ms / 100) 5.574 -> 5.584 ( +0.18%) [ +0.00% +0.00% +0.02% / +0.18% +0.45% +0.43%] index_select random : Elapsed 0.056 ms (5.574 ms / 100) 5.559 -> 5.568 ( +0.16%) [ +0.00% +0.32% +0.36% / +0.34% +0.27% +0.16%] index_select random_sorted : Elapsed 0.056 ms (5.559 ms / 100) B = [40, 16, 20, 5] (stride (1, 40, 3200, 640)) A = [40, 4, 20, 5] (stride (1, 4000, 200, 40)) dim = 1 1.136 -> 1.135 ( -0.09%) [ +0.26% +0.26% +0.00% / +0.53% +0.00% -0.09%] index_add_ linear : Elapsed 0.011 ms (1.139 ms / 100) 1.139 -> 1.131 ( -0.70%) [ +0.35% +0.35% +0.00% / +0.09% -0.70% -0.70%] index_copy_ linear : Elapsed 0.011 ms (1.143 ms / 100) 1.138 -> 1.129 ( -0.79%) [ +0.26% +0.79% +0.00% / +0.44% -0.79% -0.44%] index_add_ reverse : Elapsed 0.011 ms (1.141 ms / 100) 1.138 -> 1.130 ( -0.70%) [ +0.35% +0.53% +0.00% / +0.44% -0.70% -0.44%] index_copy_ reverse : Elapsed 0.011 ms (1.142 ms / 100) 1.143 -> 1.141 ( -0.17%) [ +0.09% +0.35% +0.00% / -0.17% +0.26% -0.09%] index_add_ spread : Elapsed 0.011 ms (1.144 ms / 100) 1.140 -> 1.135 ( -0.44%) [ +0.26% +0.00% +0.00% / +0.18% -0.35% -0.44%] index_copy_ spread : Elapsed 0.011 ms (1.143 ms / 100) 1.144 -> 1.142 ( -0.17%) [ +0.52% +0.35% +0.00% / +0.09% -0.17% +0.00%] index_add_ strided 3 : Elapsed 0.011 ms (1.150 ms / 100) 1.138 -> 1.133 ( -0.44%) [ +0.00% +0.26% +0.18% / +0.26% -0.44% -0.35%] index_copy_ strided 3 : Elapsed 0.011 ms (1.138 ms / 100) 1.143 -> 1.143 ( +0.00%) [ +0.26% +0.00% +0.00% / +0.00% +0.26% +0.17%] index_add_ strided 5 : Elapsed 0.011 ms (1.146 ms / 100) 1.136 -> 1.137 ( +0.09%) [ +0.18% +0.00% +0.53% / +0.09% +0.26% +0.26%] index_copy_ strided 5 : Elapsed 0.011 ms (1.138 ms / 100) 1.137 -> 1.138 ( +0.09%) [ +0.70% +0.00% +0.53% / +0.70% +0.09% +0.44%] index_add_ strided 7 : Elapsed 0.011 ms (1.145 ms / 100) 1.136 -> 1.131 ( -0.44%) [ +0.09% +0.18% +0.00% / +0.35% -0.44% -0.44%] index_copy_ strided 7 : Elapsed 0.011 ms (1.137 ms / 100) 1.146 -> 1.138 ( -0.70%) [ +0.35% +0.17% +0.00% / +0.35% -0.70% -0.61%] index_add_ perm : Elapsed 0.012 ms (1.150 ms / 100) 1.140 -> 1.129 ( -0.96%) [ +0.18% +0.00% +0.26% / -0.44% -0.96% -0.96%] index_copy_ perm : Elapsed 0.011 ms (1.142 ms / 100) 1.144 -> 1.135 ( -0.79%) [ +0.00% +0.17% +0.09% / +0.17% -0.79% -0.70%] index_add_ perm_sorted : Elapsed 0.011 ms (1.144 ms / 100) 1.139 -> 1.125 ( -1.23%) [ +0.26% +0.00% +0.44% / +0.35% -0.70% -1.23%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.142 ms / 100) 2.118 -> 2.118 ( +0.00%) [ +0.05% +0.09% +0.00% / +0.00% +0.52% +0.42%] index_select const : Elapsed 0.021 ms (2.119 ms / 100) 2.128 -> 2.135 ( +0.33%) [ +0.09% +0.38% +0.00% / +0.33% +1.27% +0.85%] index_select wrap : Elapsed 0.021 ms (2.130 ms / 100) 2.138 -> 2.135 ( -0.14%) [ +0.05% +0.05% +0.00% / -0.14% +0.75% +0.89%] index_select linear : Elapsed 0.021 ms (2.139 ms / 100) 2.108 -> 2.114 ( +0.28%) [ +0.28% +0.09% +0.00% / +0.28% +1.00% +1.19%] index_select reverse : Elapsed 0.021 ms (2.114 ms / 100) 2.120 -> 2.123 ( +0.14%) [ +0.24% +0.00% +0.14% / +0.24% +0.14% +0.24%] index_select skip64 : Elapsed 0.021 ms (2.125 ms / 100) 2.120 -> 2.121 ( +0.05%) [ +0.28% +0.00% +0.19% / +0.24% +0.05% +0.38%] index_select skip256 : Elapsed 0.021 ms (2.126 ms / 100) 2.126 -> 2.128 ( +0.09%) [ +0.24% +0.09% +0.00% / +0.14% +0.09% +0.09%] index_select spread : Elapsed 0.021 ms (2.131 ms / 100) 2.140 -> 2.138 ( -0.09%) [ +0.37% +0.37% +0.00% / -0.09% +0.23% +0.28%] index_select strided 3 : Elapsed 0.021 ms (2.148 ms / 100) 2.117 -> 2.119 ( +0.09%) [ +0.14% +0.19% +0.00% / +0.09% +0.99% +0.85%] index_select random : Elapsed 0.021 ms (2.120 ms / 100) 2.123 -> 2.126 ( +0.14%) [ +0.42% +0.00% +0.24% / +0.14% +0.57% +0.47%] index_select random_sorted : Elapsed 0.021 ms (2.132 ms / 100) out_shape = [40, 4, 16, 5] in_shape = [40, 4, 20, 5] idx_dim = 2 B = [40, 4, 16, 5] (stride (320, 80, 5, 1)) A = [40, 4, 20, 5] (stride (5, 4000, 200, 1)) dim = 2 3.580 -> 3.578 ( -0.06%) [ +0.00% +0.08% +0.00% / -0.06% +0.73% +0.81%] index_select const : Elapsed 0.036 ms (3.580 ms / 100) 3.573 -> 3.571 ( -0.06%) [ +0.06% +0.03% +0.00% / -0.06% +0.67% +0.67%] index_select wrap : Elapsed 0.036 ms (3.575 ms / 100) 3.561 -> 3.558 ( -0.08%) [ +0.03% +0.00% +0.03% / -0.08% +0.65% +0.70%] index_select linear : Elapsed 0.036 ms (3.562 ms / 100) 3.561 -> 3.563 ( +0.06%) [ +0.17% +0.00% +0.03% / +0.06% +0.76% +0.81%] index_select reverse : Elapsed 0.036 ms (3.567 ms / 100) 3.614 -> 3.616 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.97% +0.97%] index_select skip64 : Elapsed 0.036 ms (3.614 ms / 100) 3.573 -> 3.582 ( +0.25%) [ +0.25% +0.25% +0.00% / +0.25% +0.76% +0.76%] index_select skip256 : Elapsed 0.036 ms (3.582 ms / 100) 3.560 -> 3.563 ( +0.08%) [ +0.17% +0.06% +0.00% / +0.08% +0.76% +0.81%] index_select spread : Elapsed 0.036 ms (3.566 ms / 100) 3.566 -> 3.570 ( +0.11%) [ +0.00% +0.03% +0.06% / +0.11% +0.73% +0.70%] index_select strided 3 : Elapsed 0.036 ms (3.566 ms / 100) 3.556 -> 3.557 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.76% +0.76%] index_select strided 5 : Elapsed 0.036 ms (3.557 ms / 100) 3.556 -> 3.556 ( +0.00%) [ +0.03% +0.00% +0.06% / +0.00% +0.56% +0.67%] index_select strided 7 : Elapsed 0.036 ms (3.557 ms / 100) 3.577 -> 3.583 ( +0.17%) [ +0.14% +0.00% +0.14% / +0.17% +0.92% +0.81%] index_select strided 8 : Elapsed 0.036 ms (3.582 ms / 100) 3.583 -> 3.586 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.81% +0.61%] index_select strided 16 : Elapsed 0.036 ms (3.583 ms / 100) 3.566 -> 3.571 ( +0.14%) [ +0.00% +0.17% +0.06% / +0.14% +0.50% +0.70%] index_select random : Elapsed 0.036 ms (3.566 ms / 100) 3.560 -> 3.560 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_select random_sorted : Elapsed 0.036 ms (3.564 ms / 100) 3.567 -> 3.568 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.53% +0.50%] index_select perm : Elapsed 0.036 ms (3.569 ms / 100) 3.562 -> 3.563 ( +0.03%) [ +0.00% +0.14% +0.11% / +0.03% +0.59% +0.65%] index_select perm_sorted : Elapsed 0.036 ms (3.562 ms / 100) B = [40, 4, 16, 5] (stride (320, 1, 20, 4)) A = [40, 4, 20, 5] (stride (400, 100, 5, 1)) dim = 2 3.648 -> 3.648 ( +0.00%) [ +0.05% +0.00% +0.11% / +0.00% +0.41% +0.38%] index_select const : Elapsed 0.036 ms (3.650 ms / 100) 3.622 -> 3.625 ( +0.08%) [ +0.03% +0.08% +0.00% / +0.08% +0.58% +0.50%] index_select wrap : Elapsed 0.036 ms (3.623 ms / 100) 3.625 -> 3.629 ( +0.11%) [ +0.22% +0.11% +0.00% / +0.11% +0.58% +0.63%] index_select linear : Elapsed 0.036 ms (3.633 ms / 100) 3.625 -> 3.623 ( -0.06%) [ +0.11% +0.08% +0.00% / -0.06% +0.83% +0.61%] index_select reverse : Elapsed 0.036 ms (3.629 ms / 100) 3.645 -> 3.646 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.52% +0.33%] index_select skip64 : Elapsed 0.036 ms (3.645 ms / 100) 3.646 -> 3.649 ( +0.08%) [ +0.11% +0.14% +0.00% / +0.08% +0.49% +0.47%] index_select skip256 : Elapsed 0.037 ms (3.650 ms / 100) 3.621 -> 3.624 ( +0.08%) [ +0.00% +0.06% +0.11% / +0.08% +0.47% +0.47%] index_select spread : Elapsed 0.036 ms (3.621 ms / 100) 3.630 -> 3.627 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.47% +0.47%] index_select strided 3 : Elapsed 0.036 ms (3.633 ms / 100) 3.619 -> 3.619 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.36% +0.39%] index_select strided 5 : Elapsed 0.036 ms (3.621 ms / 100) 3.621 -> 3.619 ( -0.06%) [ +0.03% +0.08% +0.00% / -0.06% +0.64% +0.58%] index_select strided 7 : Elapsed 0.036 ms (3.622 ms / 100) 3.623 -> 3.632 ( +0.25%) [ +0.00% +0.00% +0.06% / +0.25% +0.63% +0.63%] index_select strided 8 : Elapsed 0.036 ms (3.623 ms / 100) 3.621 -> 3.622 ( +0.03%) [ +0.11% +0.03% +0.00% / +0.03% +0.72% +0.72%] index_select strided 16 : Elapsed 0.036 ms (3.625 ms / 100) 3.626 -> 3.623 ( -0.08%) [ +0.00% +0.03% +0.03% / -0.08% +0.66% +0.72%] index_select random : Elapsed 0.036 ms (3.626 ms / 100) 3.624 -> 3.625 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.61% +0.72%] index_select random_sorted : Elapsed 0.036 ms (3.625 ms / 100) 3.609 -> 3.609 ( +0.00%) [ +0.00% +0.08% +0.06% / +0.00% +0.64% +0.58%] index_select perm : Elapsed 0.036 ms (3.609 ms / 100) 3.614 -> 3.616 ( +0.06%) [ +0.11% +0.00% +0.14% / +0.06% +0.80% +0.80%] index_select perm_sorted : Elapsed 0.036 ms (3.618 ms / 100) B = [40, 4, 16, 5] (stride (320, 16, 1, 64)) A = [40, 4, 20, 5] (stride (5, 4000, 200, 1)) dim = 2 3.889 -> 3.891 ( +0.05%) [ +0.03% +0.08% +0.00% / +0.05% +0.77% +0.95%] index_select const : Elapsed 0.039 ms (3.890 ms / 100) 3.888 -> 3.892 ( +0.10%) [ +0.31% +0.10% +0.00% / +0.10% +1.00% +1.00%] index_select wrap : Elapsed 0.039 ms (3.900 ms / 100) 3.875 -> 3.877 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.77% +0.85%] index_select linear : Elapsed 0.039 ms (3.879 ms / 100) 3.873 -> 3.874 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +1.37% +1.34%] index_select reverse : Elapsed 0.039 ms (3.874 ms / 100) 3.912 -> 3.908 ( -0.10%) [ +0.08% +0.00% +0.10% / -0.10% +0.61% +0.54%] index_select skip64 : Elapsed 0.039 ms (3.915 ms / 100) 3.891 -> 3.893 ( +0.05%) [ +0.00% +0.13% +0.00% / +0.05% +0.67% +0.69%] index_select skip256 : Elapsed 0.039 ms (3.891 ms / 100) 3.876 -> 3.881 ( +0.13%) [ +0.15% +0.00% +0.13% / +0.13% +0.90% +0.62%] index_select spread : Elapsed 0.039 ms (3.882 ms / 100) 3.878 -> 3.880 ( +0.05%) [ +0.08% +0.10% +0.00% / +0.05% +0.59% +0.77%] index_select strided 3 : Elapsed 0.039 ms (3.881 ms / 100) 3.868 -> 3.870 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +1.03% +0.90%] index_select strided 5 : Elapsed 0.039 ms (3.870 ms / 100) 3.871 -> 3.875 ( +0.10%) [ +0.10% +0.00% +0.08% / +0.10% +0.70% +0.77%] index_select strided 7 : Elapsed 0.039 ms (3.875 ms / 100) 3.897 -> 3.907 ( +0.26%) [ +0.21% +0.13% +0.00% / +0.26% +0.69% +0.69%] index_select strided 8 : Elapsed 0.039 ms (3.905 ms / 100) 3.891 -> 3.897 ( +0.15%) [ +0.00% +0.10% +0.00% / +0.15% +0.85% +0.98%] index_select strided 16 : Elapsed 0.039 ms (3.891 ms / 100) 3.878 -> 3.881 ( +0.08%) [ +0.15% +0.13% +0.00% / +0.08% +0.57% +0.64%] index_select random : Elapsed 0.039 ms (3.884 ms / 100) 3.870 -> 3.871 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.67% +0.44%] index_select random_sorted : Elapsed 0.039 ms (3.870 ms / 100) 3.886 -> 3.888 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.98% +0.75%] index_select perm : Elapsed 0.039 ms (3.888 ms / 100) 3.884 -> 3.884 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.59% +0.46%] index_select perm_sorted : Elapsed 0.039 ms (3.886 ms / 100) B = [40, 4, 16, 5] (stride (80, 3200, 1, 16)) A = [40, 4, 20, 5] (stride (80, 1, 4, 3200)) dim = 2 4.023 -> 4.028 ( +0.12%) [ +0.10% +0.00% +0.05% / +0.12% +0.37% +0.42%] index_select const : Elapsed 0.040 ms (4.027 ms / 100) 4.019 -> 4.017 ( -0.05%) [ +0.00% +0.07% +0.12% / -0.05% +0.47% +0.97%] index_select wrap : Elapsed 0.040 ms (4.019 ms / 100) 4.020 -> 4.023 ( +0.07%) [ +0.07% +0.00% +0.10% / +0.07% +0.55% +0.62%] index_select linear : Elapsed 0.040 ms (4.023 ms / 100) 4.012 -> 4.013 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.60% +0.65%] index_select reverse : Elapsed 0.040 ms (4.013 ms / 100) 4.023 -> 4.026 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.67% +0.60%] index_select skip64 : Elapsed 0.040 ms (4.026 ms / 100) 4.023 -> 4.022 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.57% +0.57%] index_select skip256 : Elapsed 0.040 ms (4.024 ms / 100) 4.020 -> 4.027 ( +0.17%) [ +0.17% +0.12% +0.00% / +0.17% +0.45% +0.65%] index_select spread : Elapsed 0.040 ms (4.027 ms / 100) 4.013 -> 4.013 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.57% +0.57%] index_select strided 3 : Elapsed 0.040 ms (4.013 ms / 100) 4.007 -> 4.011 ( +0.10%) [ +0.12% +0.00% +0.05% / +0.10% +0.82% +0.75%] index_select strided 5 : Elapsed 0.040 ms (4.012 ms / 100) 4.025 -> 4.025 ( +0.00%) [ +0.12% +0.10% +0.00% / +0.00% +0.70% +0.65%] index_select strided 7 : Elapsed 0.040 ms (4.030 ms / 100) 4.023 -> 4.025 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +0.62% +0.65%] index_select strided 8 : Elapsed 0.040 ms (4.025 ms / 100) 4.023 -> 4.028 ( +0.12%) [ +0.10% +0.05% +0.00% / +0.12% +0.65% +0.60%] index_select strided 16 : Elapsed 0.040 ms (4.027 ms / 100) 4.012 -> 4.013 ( +0.02%) [ +0.07% +0.00% +0.07% / +0.02% +0.60% +0.65%] index_select random : Elapsed 0.040 ms (4.015 ms / 100) 4.035 -> 4.034 ( -0.02%) [ +0.05% +0.00% +0.00% / -0.02% +0.47% +0.47%] index_select random_sorted : Elapsed 0.040 ms (4.037 ms / 100) 4.023 -> 4.035 ( +0.30%) [ +0.30% +0.27% +0.00% / +0.30% +0.89% +0.75%] index_select perm : Elapsed 0.040 ms (4.035 ms / 100) 4.014 -> 4.015 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.60% +0.70%] index_select perm_sorted : Elapsed 0.040 ms (4.014 ms / 100) B = [40, 4, 16, 5] (stride (5, 3200, 200, 1)) A = [40, 4, 20, 5] (stride (80, 20, 1, 3200)) dim = 2 3.801 -> 3.801 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.89% +0.82%] index_select const : Elapsed 0.038 ms (3.803 ms / 100) 3.815 -> 3.820 ( +0.13%) [ +0.10% +0.00% +0.10% / +0.13% +0.92% +0.92%] index_select wrap : Elapsed 0.038 ms (3.819 ms / 100) 3.803 -> 3.805 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.84% +0.87%] index_select linear : Elapsed 0.038 ms (3.805 ms / 100) 3.803 -> 3.808 ( +0.13%) [ +0.00% +0.16% +0.32% / +0.13% +0.89% +0.95%] index_select reverse : Elapsed 0.038 ms (3.803 ms / 100) 3.813 -> 3.809 ( -0.10%) [ +0.03% +0.00% +0.00% / -0.10% +0.55% +0.63%] index_select skip64 : Elapsed 0.038 ms (3.814 ms / 100) 3.800 -> 3.802 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.89% +0.87%] index_select skip256 : Elapsed 0.038 ms (3.800 ms / 100) 3.817 -> 3.821 ( +0.10%) [ +0.13% +0.08% +0.00% / +0.10% +0.71% +0.60%] index_select spread : Elapsed 0.038 ms (3.822 ms / 100) 3.799 -> 3.799 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.63% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.799 ms / 100) 3.793 -> 3.792 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.66% +0.63%] index_select strided 5 : Elapsed 0.038 ms (3.795 ms / 100) 3.794 -> 3.800 ( +0.16%) [ +0.16% +0.03% +0.00% / +0.16% +0.66% +0.63%] index_select strided 7 : Elapsed 0.038 ms (3.800 ms / 100) 3.793 -> 3.794 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.58% +0.61%] index_select strided 8 : Elapsed 0.038 ms (3.793 ms / 100) 3.806 -> 3.809 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.74% +0.74%] index_select strided 16 : Elapsed 0.038 ms (3.809 ms / 100) 3.803 -> 3.808 ( +0.13%) [ +0.34% +0.37% +0.00% / +0.13% +0.84% +0.66%] index_select random : Elapsed 0.038 ms (3.816 ms / 100) 3.798 -> 3.802 ( +0.11%) [ +0.00% +0.03% +0.16% / +0.11% +0.63% +0.66%] index_select random_sorted : Elapsed 0.038 ms (3.798 ms / 100) 3.803 -> 3.804 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.71% +0.63%] index_select perm : Elapsed 0.038 ms (3.803 ms / 100) 3.806 -> 3.807 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.58% +0.58%] index_select perm_sorted : Elapsed 0.038 ms (3.809 ms / 100) B = [40, 4, 16, 5] (stride (1, 200, 800, 40)) A = [40, 4, 20, 5] (stride (1, 40, 800, 160)) dim = 2 4.122 -> 4.122 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.46% +0.46%] index_select const : Elapsed 0.041 ms (4.123 ms / 100) 4.138 -> 4.138 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.46% +0.46%] index_select wrap : Elapsed 0.041 ms (4.138 ms / 100) 4.130 -> 4.129 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.51% +0.48%] index_select linear : Elapsed 0.041 ms (4.131 ms / 100) 4.121 -> 4.121 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.66% +0.58%] index_select reverse : Elapsed 0.041 ms (4.121 ms / 100) 4.126 -> 4.126 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.58% +0.56%] index_select skip64 : Elapsed 0.041 ms (4.128 ms / 100) 4.153 -> 4.153 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.36% +0.39%] index_select skip256 : Elapsed 0.042 ms (4.154 ms / 100) 4.122 -> 4.122 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.58%] index_select spread : Elapsed 0.041 ms (4.122 ms / 100) 4.137 -> 4.143 ( +0.15%) [ +0.00% +0.00% +0.15% / +0.15% +0.73% +0.44%] index_select strided 3 : Elapsed 0.041 ms (4.137 ms / 100) 4.125 -> 4.125 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.36%] index_select strided 5 : Elapsed 0.041 ms (4.125 ms / 100) 4.138 -> 4.139 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.63% +0.68%] index_select strided 7 : Elapsed 0.041 ms (4.139 ms / 100) 4.136 -> 4.137 ( +0.02%) [ +0.12% +0.00% +0.00% / +0.02% +0.65% +0.63%] index_select strided 8 : Elapsed 0.041 ms (4.141 ms / 100) 4.118 -> 4.120 ( +0.05%) [ +0.00% +0.05% +0.02% / +0.05% +0.53% +0.53%] index_select strided 16 : Elapsed 0.041 ms (4.118 ms / 100) 4.133 -> 4.136 ( +0.07%) [ +0.05% +0.07% +0.00% / +0.07% +0.65% +0.65%] index_select random : Elapsed 0.041 ms (4.135 ms / 100) 4.134 -> 4.134 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.73% +0.73%] index_select random_sorted : Elapsed 0.041 ms (4.135 ms / 100) 4.127 -> 4.129 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.73% +0.70%] index_select perm : Elapsed 0.041 ms (4.129 ms / 100) 4.100 -> 4.099 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.68% +0.66%] index_select perm_sorted : Elapsed 0.041 ms (4.100 ms / 100) B = [40, 4, 16, 5] (stride (1, 640, 40, 2560)) A = [40, 4, 20, 5] (stride (100, 4000, 5, 1)) dim = 2 3.946 -> 3.956 ( +0.25%) [ +0.20% +0.23% +0.00% / +0.25% +0.71% +0.99%] index_select const : Elapsed 0.040 ms (3.954 ms / 100) 3.933 -> 3.941 ( +0.20%) [ +0.00% +0.05% +0.05% / +0.20% +0.74% +0.79%] index_select wrap : Elapsed 0.039 ms (3.933 ms / 100) 3.927 -> 3.931 ( +0.10%) [ +0.13% +0.05% +0.00% / +0.10% +0.84% +0.79%] index_select linear : Elapsed 0.039 ms (3.932 ms / 100) 3.929 -> 3.934 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.94% +0.84%] index_select reverse : Elapsed 0.039 ms (3.934 ms / 100) 3.956 -> 3.954 ( -0.05%) [ +0.03% +0.15% +0.00% / -0.05% +0.83% +0.83%] index_select skip64 : Elapsed 0.040 ms (3.957 ms / 100) 3.958 -> 3.962 ( +0.10%) [ +0.23% +0.00% +0.03% / +0.10% +0.71% +0.68%] index_select skip256 : Elapsed 0.040 ms (3.967 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.64% +0.66%] index_select spread : Elapsed 0.039 ms (3.933 ms / 100) 3.930 -> 3.930 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.64% +0.66%] index_select strided 3 : Elapsed 0.039 ms (3.933 ms / 100) 3.923 -> 3.923 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +1.15% +0.79%] index_select strided 5 : Elapsed 0.039 ms (3.923 ms / 100) 3.943 -> 3.944 ( +0.03%) [ +0.05% +0.00% +0.05% / +0.03% +1.07% +0.68%] index_select strided 7 : Elapsed 0.039 ms (3.945 ms / 100) 3.940 -> 3.938 ( -0.05%) [ +0.33% +0.00% +0.00% / -0.05% +1.09% +0.86%] index_select strided 8 : Elapsed 0.040 ms (3.953 ms / 100) 3.947 -> 3.954 ( +0.18%) [ +0.15% +0.10% +0.00% / +0.18% +0.94% +0.73%] index_select strided 16 : Elapsed 0.040 ms (3.953 ms / 100) 3.945 -> 3.943 ( -0.05%) [ +0.05% +0.10% +0.00% / -0.05% +0.71% +0.63%] index_select random : Elapsed 0.039 ms (3.947 ms / 100) 3.925 -> 3.929 ( +0.10%) [ +0.03% +0.10% +0.00% / +0.10% +0.56% +0.56%] index_select random_sorted : Elapsed 0.039 ms (3.926 ms / 100) 3.934 -> 3.933 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.61% +0.56%] index_select perm : Elapsed 0.039 ms (3.936 ms / 100) 3.929 -> 3.929 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.64% +0.61%] index_select perm_sorted : Elapsed 0.039 ms (3.931 ms / 100) out_shape = [40, 4, 20, 16] in_shape = [40, 4, 20, 5] idx_dim = 3 B = [40, 4, 20, 16] (stride (1280, 16, 64, 1)) A = [40, 4, 20, 5] (stride (400, 100, 1, 20)) dim = 3 2.238 -> 2.243 ( +0.22%) [ +0.00% +0.27% +0.00% / +0.22% +2.19% +2.01%] index_add_ linear : Elapsed 0.022 ms (2.238 ms / 100) 2.212 -> 2.215 ( +0.14%) [ +0.00% +0.27% +0.23% / +0.14% +2.08% +2.17%] index_copy_ linear : Elapsed 0.022 ms (2.212 ms / 100) 2.233 -> 2.232 ( -0.04%) [ +0.00% +0.09% +0.13% / -0.04% +2.46% +2.19%] index_add_ reverse : Elapsed 0.022 ms (2.233 ms / 100) 2.213 -> 2.206 ( -0.32%) [ +0.00% +0.00% +0.05% / -0.32% +2.03% +1.99%] index_copy_ reverse : Elapsed 0.022 ms (2.213 ms / 100) 2.276 -> 2.277 ( +0.04%) [ +0.13% +0.00% +0.13% / +0.04% +2.15% +2.11%] index_add_ spread : Elapsed 0.023 ms (2.279 ms / 100) 2.280 -> 2.286 ( +0.26%) [ +0.13% +0.13% +0.00% / +0.26% +2.11% +2.19%] index_copy_ spread : Elapsed 0.023 ms (2.283 ms / 100) 2.271 -> 2.271 ( +0.00%) [ +0.00% +0.04% +0.09% / +0.00% +2.03% +1.98%] index_add_ strided 3 : Elapsed 0.023 ms (2.271 ms / 100) 2.279 -> 2.278 ( -0.04%) [ +0.00% +0.18% +0.13% / -0.04% +2.11% +2.06%] index_copy_ strided 3 : Elapsed 0.023 ms (2.279 ms / 100) 2.259 -> 2.268 ( +0.40%) [ +0.00% +0.40% +0.00% / +0.40% +2.17% +1.99%] index_add_ strided 5 : Elapsed 0.023 ms (2.259 ms / 100) 2.275 -> 2.277 ( +0.09%) [ +0.18% +0.09% +0.00% / +0.09% +1.85% +1.80%] index_copy_ strided 5 : Elapsed 0.023 ms (2.279 ms / 100) 2.264 -> 2.271 ( +0.31%) [ +0.44% +0.35% +0.00% / +0.31% +2.30% +2.08%] index_add_ strided 7 : Elapsed 0.023 ms (2.274 ms / 100) 2.274 -> 2.285 ( +0.48%) [ +0.13% +0.26% +0.00% / +0.48% +2.07% +2.07%] index_copy_ strided 7 : Elapsed 0.023 ms (2.277 ms / 100) 2.270 -> 2.272 ( +0.09%) [ +0.00% +0.09% +0.31% / +0.09% +2.07% +1.81%] index_add_ perm : Elapsed 0.023 ms (2.270 ms / 100) 2.275 -> 2.278 ( +0.13%) [ +0.22% +0.00% +0.44% / +0.13% +2.29% +2.11%] index_copy_ perm : Elapsed 0.023 ms (2.280 ms / 100) 2.274 -> 2.278 ( +0.18%) [ +0.31% +0.13% +0.00% / +0.18% +2.46% +2.20%] index_add_ perm_sorted : Elapsed 0.023 ms (2.281 ms / 100) 2.278 -> 2.288 ( +0.44%) [ +0.18% +0.18% +0.00% / +0.44% +2.11% +2.37%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.282 ms / 100) 4.531 -> 4.539 ( +0.18%) [ +0.00% +0.13% +0.13% / +0.18% +0.90% +0.93%] index_select const : Elapsed 0.045 ms (4.531 ms / 100) 4.568 -> 4.569 ( +0.02%) [ +0.00% +0.18% +0.04% / +0.02% +1.27% +1.36%] index_select wrap : Elapsed 0.046 ms (4.568 ms / 100) 4.575 -> 4.585 ( +0.22%) [ +0.00% +0.04% +0.17% / +0.22% +1.38% +1.57%] index_select linear : Elapsed 0.046 ms (4.575 ms / 100) 4.569 -> 4.587 ( +0.39%) [ +0.13% +0.00% +0.07% / +0.39% +1.44% +1.60%] index_select reverse : Elapsed 0.046 ms (4.575 ms / 100) 4.527 -> 4.529 ( +0.04%) [ +0.00% +0.07% +0.29% / +0.04% +1.17% +0.91%] index_select skip64 : Elapsed 0.045 ms (4.527 ms / 100) 4.527 -> 4.536 ( +0.20%) [ +0.00% +0.11% +0.13% / +0.20% +1.26% +0.99%] index_select skip256 : Elapsed 0.045 ms (4.527 ms / 100) 4.580 -> 4.583 ( +0.07%) [ +0.07% +0.02% +0.00% / +0.07% +1.31% +1.20%] index_select spread : Elapsed 0.046 ms (4.583 ms / 100) 4.575 -> 4.590 ( +0.33%) [ +0.00% +0.39% +0.09% / +0.33% +1.70% +1.68%] index_select strided 3 : Elapsed 0.046 ms (4.575 ms / 100) 4.571 -> 4.573 ( +0.04%) [ +0.00% +0.28% +0.28% / +0.04% +1.36% +1.55%] index_select random : Elapsed 0.046 ms (4.571 ms / 100) 4.571 -> 4.573 ( +0.04%) [ +0.09% +0.00% +0.02% / +0.04% +1.66% +1.51%] index_select random_sorted : Elapsed 0.046 ms (4.575 ms / 100) B = [40, 4, 20, 16] (stride (1280, 16, 64, 1)) A = [40, 4, 20, 5] (stride (4, 1, 800, 160)) dim = 3 2.358 -> 2.360 ( +0.08%) [ +0.21% +0.00% +0.13% / +0.08% +0.85% +0.68%] index_add_ linear : Elapsed 0.024 ms (2.363 ms / 100) 2.330 -> 2.333 ( +0.13%) [ +0.17% +0.00% +0.04% / +0.13% +0.52% +0.69%] index_copy_ linear : Elapsed 0.023 ms (2.334 ms / 100) 2.367 -> 2.367 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.42%] index_add_ reverse : Elapsed 0.024 ms (2.367 ms / 100) 2.338 -> 2.341 ( +0.13%) [ +0.09% +0.17% +0.00% / +0.13% +0.47% +0.51%] index_copy_ reverse : Elapsed 0.023 ms (2.340 ms / 100) 2.398 -> 2.402 ( +0.17%) [ +0.04% +0.00% +0.00% / +0.17% +0.67% +0.46%] index_add_ spread : Elapsed 0.024 ms (2.399 ms / 100) 2.400 -> 2.404 ( +0.17%) [ +0.00% +0.04% +0.00% / +0.17% +0.33% +0.46%] index_copy_ spread : Elapsed 0.024 ms (2.400 ms / 100) 2.391 -> 2.395 ( +0.17%) [ +0.17% +0.04% +0.00% / +0.17% +0.67% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.395 ms / 100) 2.397 -> 2.397 ( +0.00%) [ +0.13% +0.08% +0.00% / +0.00% +0.29% +0.25%] index_copy_ strided 3 : Elapsed 0.024 ms (2.400 ms / 100) 2.392 -> 2.394 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.46% +0.67%] index_add_ strided 5 : Elapsed 0.024 ms (2.392 ms / 100) 2.399 -> 2.400 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.42% +0.46%] index_copy_ strided 5 : Elapsed 0.024 ms (2.399 ms / 100) 2.394 -> 2.392 ( -0.08%) [ +0.13% +0.04% +0.00% / -0.08% +0.25% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.397 ms / 100) 2.397 -> 2.397 ( +0.00%) [ +0.04% +0.00% +0.17% / +0.00% +0.25% +0.21%] index_copy_ strided 7 : Elapsed 0.024 ms (2.398 ms / 100) 2.360 -> 2.360 ( +0.00%) [ +0.21% +0.08% +0.00% / +0.00% +0.68% +0.55%] index_add_ perm : Elapsed 0.024 ms (2.365 ms / 100) 2.330 -> 2.334 ( +0.17%) [ +0.21% +0.00% +0.04% / +0.17% +0.47% +0.39%] index_copy_ perm : Elapsed 0.023 ms (2.335 ms / 100) 2.364 -> 2.367 ( +0.13%) [ +0.00% +0.13% +0.30% / +0.13% +0.63% +0.47%] index_add_ perm_sorted : Elapsed 0.024 ms (2.364 ms / 100) 2.333 -> 2.337 ( +0.17%) [ +0.00% +0.04% +0.09% / +0.17% +0.51% +0.60%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.333 ms / 100) 5.068 -> 5.076 ( +0.16%) [ +0.02% +0.00% +0.04% / +0.16% +0.63% +0.79%] index_select const : Elapsed 0.051 ms (5.069 ms / 100) 5.029 -> 5.034 ( +0.10%) [ +0.00% +0.08% +0.14% / +0.10% +0.52% +0.48%] index_select wrap : Elapsed 0.050 ms (5.029 ms / 100) 5.053 -> 5.059 ( +0.12%) [ +0.00% +0.04% +0.00% / +0.12% +0.47% +0.49%] index_select linear : Elapsed 0.051 ms (5.053 ms / 100) 5.017 -> 5.016 ( -0.02%) [ +0.00% +0.08% +0.00% / -0.02% +0.58% +0.50%] index_select reverse : Elapsed 0.050 ms (5.017 ms / 100) 5.072 -> 5.074 ( +0.04%) [ +0.10% +0.02% +0.00% / +0.04% +0.63% +0.61%] index_select skip64 : Elapsed 0.051 ms (5.077 ms / 100) 5.074 -> 5.069 ( -0.10%) [ +0.04% +0.02% +0.00% / -0.10% +0.51% +0.49%] index_select skip256 : Elapsed 0.051 ms (5.076 ms / 100) 5.015 -> 5.018 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.52% +0.62%] index_select spread : Elapsed 0.050 ms (5.018 ms / 100) 5.043 -> 5.042 ( -0.02%) [ +0.04% +0.04% +0.00% / -0.02% +0.56% +0.65%] index_select strided 3 : Elapsed 0.050 ms (5.045 ms / 100) 5.021 -> 5.020 ( -0.02%) [ +0.08% +0.00% +0.08% / -0.02% +0.78% +0.84%] index_select random : Elapsed 0.050 ms (5.025 ms / 100) 4.997 -> 5.001 ( +0.08%) [ +0.10% +0.12% +0.00% / +0.08% +0.80% +0.68%] index_select random_sorted : Elapsed 0.050 ms (5.002 ms / 100) B = [40, 4, 20, 16] (stride (64, 16, 2560, 1)) A = [40, 4, 20, 5] (stride (80, 1, 4, 3200)) dim = 3 2.214 -> 2.214 ( +0.00%) [ +0.18% +0.14% +0.00% / +0.00% +0.36% +0.45%] index_add_ linear : Elapsed 0.022 ms (2.218 ms / 100) 2.192 -> 2.192 ( +0.00%) [ +0.05% +0.09% +0.00% / +0.00% +0.59% +0.55%] index_copy_ linear : Elapsed 0.022 ms (2.193 ms / 100) 2.214 -> 2.219 ( +0.23%) [ +0.00% +0.32% +0.27% / +0.23% +0.41% +0.86%] index_add_ reverse : Elapsed 0.022 ms (2.214 ms / 100) 2.193 -> 2.192 ( -0.05%) [ +0.00% +0.14% +0.09% / -0.05% +0.55% +0.55%] index_copy_ reverse : Elapsed 0.022 ms (2.193 ms / 100) 2.244 -> 2.248 ( +0.18%) [ +0.31% +0.18% +0.00% / +0.18% +0.94% +0.62%] index_add_ spread : Elapsed 0.023 ms (2.251 ms / 100) 2.257 -> 2.260 ( +0.13%) [ +0.22% +0.00% +0.27% / +0.13% +1.02% +0.93%] index_copy_ spread : Elapsed 0.023 ms (2.262 ms / 100) 2.253 -> 2.250 ( -0.13%) [ +0.00% +0.27% +0.04% / -0.13% +0.53% +0.40%] index_add_ strided 3 : Elapsed 0.023 ms (2.253 ms / 100) 2.261 -> 2.263 ( +0.09%) [ +0.00% +0.27% +0.09% / +0.09% +0.62% +0.75%] index_copy_ strided 3 : Elapsed 0.023 ms (2.261 ms / 100) 2.246 -> 2.251 ( +0.22%) [ +0.27% +0.09% +0.00% / +0.22% +0.71% +0.62%] index_add_ strided 5 : Elapsed 0.023 ms (2.252 ms / 100) 2.257 -> 2.259 ( +0.09%) [ +0.00% +0.04% +0.04% / +0.09% +0.58% +0.75%] index_copy_ strided 5 : Elapsed 0.023 ms (2.257 ms / 100) 2.243 -> 2.252 ( +0.40%) [ +0.31% +0.13% +0.00% / +0.40% +0.80% +0.62%] index_add_ strided 7 : Elapsed 0.022 ms (2.250 ms / 100) 2.252 -> 2.256 ( +0.18%) [ +0.18% +0.27% +0.00% / +0.18% +0.98% +0.80%] index_copy_ strided 7 : Elapsed 0.023 ms (2.256 ms / 100) 2.249 -> 2.252 ( +0.13%) [ +0.18% +0.00% +0.22% / +0.13% +0.44% +0.62%] index_add_ perm : Elapsed 0.023 ms (2.253 ms / 100) 2.259 -> 2.260 ( +0.04%) [ +0.18% +0.00% +0.09% / +0.04% +0.53% +0.66%] index_copy_ perm : Elapsed 0.023 ms (2.263 ms / 100) 2.247 -> 2.253 ( +0.27%) [ +0.36% +0.22% +0.00% / +0.27% +0.58% +0.89%] index_add_ perm_sorted : Elapsed 0.023 ms (2.255 ms / 100) 2.250 -> 2.259 ( +0.40%) [ +0.67% +0.36% +0.00% / +0.40% +1.02% +0.98%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.265 ms / 100) 4.567 -> 4.568 ( +0.02%) [ +0.00% +0.04% +0.15% / +0.02% +0.61% +0.44%] index_select const : Elapsed 0.046 ms (4.567 ms / 100) 4.587 -> 4.589 ( +0.04%) [ +0.09% +0.15% +0.00% / +0.04% +1.00% +0.70%] index_select wrap : Elapsed 0.046 ms (4.591 ms / 100) 4.609 -> 4.615 ( +0.13%) [ +0.00% +0.13% +0.02% / +0.13% +0.74% +0.93%] index_select linear : Elapsed 0.046 ms (4.609 ms / 100) 4.581 -> 4.586 ( +0.11%) [ +0.02% +0.28% +0.00% / +0.11% +0.87% +0.98%] index_select reverse : Elapsed 0.046 ms (4.582 ms / 100) 4.559 -> 4.573 ( +0.31%) [ +0.00% +0.04% +0.04% / +0.31% +0.53% +0.77%] index_select skip64 : Elapsed 0.046 ms (4.559 ms / 100) 4.569 -> 4.570 ( +0.02%) [ +0.00% +0.04% +0.11% / +0.02% +0.59% +0.55%] index_select skip256 : Elapsed 0.046 ms (4.569 ms / 100) 4.596 -> 4.594 ( -0.04%) [ +0.11% +0.04% +0.00% / -0.04% +0.67% +0.67%] index_select spread : Elapsed 0.046 ms (4.601 ms / 100) 4.596 -> 4.599 ( +0.07%) [ +0.44% +0.00% +0.39% / +0.07% +1.20% +0.89%] index_select strided 3 : Elapsed 0.046 ms (4.616 ms / 100) 4.591 -> 4.591 ( +0.00%) [ +0.00% +0.13% +0.07% / +0.00% +1.09% +1.20%] index_select random : Elapsed 0.046 ms (4.591 ms / 100) 4.580 -> 4.584 ( +0.09%) [ +0.09% +0.00% +0.11% / +0.09% +1.11% +1.00%] index_select random_sorted : Elapsed 0.046 ms (4.584 ms / 100) B = [40, 4, 20, 16] (stride (64, 1, 2560, 4)) A = [40, 4, 20, 5] (stride (400, 100, 1, 20)) dim = 3 2.268 -> 2.268 ( +0.00%) [ +0.40% +0.18% +0.00% / +0.00% +0.31% +0.22%] index_add_ linear : Elapsed 0.023 ms (2.277 ms / 100) 2.229 -> 2.233 ( +0.18%) [ +0.09% +0.18% +0.00% / +0.18% +0.22% +0.27%] index_copy_ linear : Elapsed 0.022 ms (2.231 ms / 100) 2.270 -> 2.273 ( +0.13%) [ +0.13% +0.26% +0.00% / +0.18% +0.13% +0.18%] index_add_ reverse : Elapsed 0.023 ms (2.273 ms / 100) 2.230 -> 2.228 ( -0.09%) [ +0.00% +0.22% +0.45% / +0.18% -0.09% +0.09%] index_copy_ reverse : Elapsed 0.022 ms (2.230 ms / 100) 2.295 -> 2.287 ( -0.35%) [ +0.09% +0.13% +0.00% / +0.04% -0.22% -0.35%] index_add_ spread : Elapsed 0.023 ms (2.297 ms / 100) 2.268 -> 2.254 ( -0.62%) [ +0.13% +0.18% +0.00% / +0.40% -0.62% -0.62%] index_copy_ spread : Elapsed 0.023 ms (2.271 ms / 100) 2.290 -> 2.289 ( -0.04%) [ +0.39% +0.17% +0.00% / +0.13% +0.31% -0.04%] index_add_ strided 3 : Elapsed 0.023 ms (2.299 ms / 100) 2.259 -> 2.260 ( +0.04%) [ +0.31% +0.18% +0.00% / +0.40% +0.09% +0.04%] index_copy_ strided 3 : Elapsed 0.023 ms (2.266 ms / 100) 2.292 -> 2.283 ( -0.39%) [ +0.00% +0.00% +0.17% / +0.04% -0.39% -0.22%] index_add_ strided 5 : Elapsed 0.023 ms (2.292 ms / 100) 2.254 -> 2.240 ( -0.62%) [ +0.00% +0.09% +0.04% / +0.09% -0.62% -0.58%] index_copy_ strided 5 : Elapsed 0.023 ms (2.254 ms / 100) 2.296 -> 2.279 ( -0.74%) [ +0.35% +0.00% +0.52% / +0.00% -0.74% -0.35%] index_add_ strided 7 : Elapsed 0.023 ms (2.304 ms / 100) 2.272 -> 2.255 ( -0.75%) [ +0.00% +0.18% +0.26% / +0.00% -0.62% -0.75%] index_copy_ strided 7 : Elapsed 0.023 ms (2.272 ms / 100) 2.289 -> 2.289 ( +0.00%) [ +0.35% +0.00% +0.26% / +0.22% +0.13% +0.00%] index_add_ perm : Elapsed 0.023 ms (2.297 ms / 100) 2.262 -> 2.257 ( -0.22%) [ +0.18% +0.22% +0.00% / +0.13% -0.04% -0.22%] index_copy_ perm : Elapsed 0.023 ms (2.266 ms / 100) 2.287 -> 2.285 ( -0.09%) [ +0.04% +0.00% +0.04% / +0.04% +0.00% -0.09%] index_add_ perm_sorted : Elapsed 0.023 ms (2.288 ms / 100) 2.265 -> 2.251 ( -0.62%) [ +0.00% +0.00% +0.04% / +0.00% -0.49% -0.62%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.265 ms / 100) 4.538 -> 4.531 ( -0.15%) [ +0.02% +0.07% +0.00% / -0.15% +0.42% +0.48%] index_select const : Elapsed 0.045 ms (4.539 ms / 100) 4.597 -> 4.594 ( -0.07%) [ +0.07% +0.00% +0.17% / -0.07% +0.28% -0.02%] index_select wrap : Elapsed 0.046 ms (4.600 ms / 100) 4.598 -> 4.601 ( +0.07%) [ +0.00% +0.30% +0.15% / +0.07% +0.28% +0.30%] index_select linear : Elapsed 0.046 ms (4.598 ms / 100) 4.596 -> 4.605 ( +0.20%) [ +0.13% +0.22% +0.00% / +0.20% +0.26% +0.37%] index_select reverse : Elapsed 0.046 ms (4.602 ms / 100) 4.541 -> 4.548 ( +0.15%) [ +0.11% +0.22% +0.00% / +0.15% +0.62% +0.53%] index_select skip64 : Elapsed 0.045 ms (4.546 ms / 100) 4.544 -> 4.544 ( +0.00%) [ +0.11% +0.00% +0.26% / +0.00% +0.73% +0.48%] index_select skip256 : Elapsed 0.045 ms (4.549 ms / 100) 4.600 -> 4.610 ( +0.22%) [ +0.09% +0.00% +0.11% / +0.22% +0.35% +0.30%] index_select spread : Elapsed 0.046 ms (4.604 ms / 100) 4.592 -> 4.598 ( +0.13%) [ +0.09% +0.00% +0.04% / +0.13% +0.33% +0.26%] index_select strided 3 : Elapsed 0.046 ms (4.596 ms / 100) 4.584 -> 4.580 ( -0.09%) [ +0.00% +0.02% +0.15% / -0.09% +0.72% +0.70%] index_select random : Elapsed 0.046 ms (4.584 ms / 100) 4.601 -> 4.598 ( -0.07%) [ +0.00% +0.11% +0.02% / -0.07% +0.26% +0.28%] index_select random_sorted : Elapsed 0.046 ms (4.601 ms / 100) B = [40, 4, 20, 16] (stride (16, 640, 2560, 1)) A = [40, 4, 20, 5] (stride (100, 4000, 1, 20)) dim = 3 2.393 -> 2.402 ( +0.38%) [ +0.13% +0.08% +0.00% / +0.42% +0.42% +0.38%] index_add_ linear : Elapsed 0.024 ms (2.396 ms / 100) 2.358 -> 2.368 ( +0.42%) [ +0.08% +0.17% +0.00% / +0.42% +0.68% +0.55%] index_copy_ linear : Elapsed 0.024 ms (2.360 ms / 100) 2.391 -> 2.397 ( +0.25%) [ +0.29% +0.29% +0.00% / +0.25% +0.79% +0.63%] index_add_ reverse : Elapsed 0.024 ms (2.398 ms / 100) 2.359 -> 2.362 ( +0.13%) [ +0.13% +0.00% +0.04% / +0.13% +0.38% +0.59%] index_copy_ reverse : Elapsed 0.024 ms (2.362 ms / 100) 2.439 -> 2.443 ( +0.16%) [ +0.29% +0.00% +0.21% / +0.16% +0.25% +0.16%] index_add_ spread : Elapsed 0.024 ms (2.446 ms / 100) 2.436 -> 2.432 ( -0.16%) [ +0.16% +0.12% +0.00% / -0.16% +0.41% +0.21%] index_copy_ spread : Elapsed 0.024 ms (2.440 ms / 100) 2.439 -> 2.442 ( +0.12%) [ +0.16% +0.00% +0.00% / +0.12% +0.12% +0.29%] index_add_ strided 3 : Elapsed 0.024 ms (2.443 ms / 100) 2.434 -> 2.436 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.08% +0.21% +0.41%] index_copy_ strided 3 : Elapsed 0.024 ms (2.434 ms / 100) 2.429 -> 2.433 ( +0.16%) [ +0.29% +0.00% +0.08% / +0.16% +0.37% +0.74%] index_add_ strided 5 : Elapsed 0.024 ms (2.436 ms / 100) 2.427 -> 2.422 ( -0.21%) [ +0.00% +0.21% +0.08% / -0.21% +0.45% +0.66%] index_copy_ strided 5 : Elapsed 0.024 ms (2.427 ms / 100) 2.430 -> 2.432 ( +0.08%) [ +0.25% +0.12% +0.00% / +0.08% +0.29% +0.45%] index_add_ strided 7 : Elapsed 0.024 ms (2.436 ms / 100) 2.432 -> 2.425 ( -0.29%) [ +0.04% +0.08% +0.00% / -0.29% +0.16% +0.21%] index_copy_ strided 7 : Elapsed 0.024 ms (2.433 ms / 100) 2.438 -> 2.443 ( +0.21%) [ +0.25% +0.21% +0.00% / +0.21% +0.41% +0.45%] index_add_ perm : Elapsed 0.024 ms (2.444 ms / 100) 2.435 -> 2.435 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.04% +0.29% +0.00%] index_copy_ perm : Elapsed 0.024 ms (2.437 ms / 100) 2.439 -> 2.433 ( -0.25%) [ +0.21% +0.21% +0.00% / -0.25% +0.21% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.444 ms / 100) 2.435 -> 2.434 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.25% +0.33%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.435 ms / 100) 4.938 -> 4.933 ( -0.10%) [ +0.00% +0.20% +0.30% / -0.10% +0.75% +0.73%] index_select const : Elapsed 0.049 ms (4.938 ms / 100) 4.971 -> 4.975 ( +0.08%) [ +0.08% +0.02% +0.00% / +0.08% +0.64% +0.30%] index_select wrap : Elapsed 0.050 ms (4.975 ms / 100) 5.025 -> 5.031 ( +0.12%) [ +0.10% +0.22% +0.00% / +0.12% +0.34% +0.40%] index_select linear : Elapsed 0.050 ms (5.030 ms / 100) 5.009 -> 5.013 ( +0.08%) [ +0.00% +0.06% +0.02% / +0.08% +0.52% +0.38%] index_select reverse : Elapsed 0.050 ms (5.009 ms / 100) 4.945 -> 4.942 ( -0.06%) [ +0.24% +0.04% +0.00% / -0.06% +0.75% +0.73%] index_select skip64 : Elapsed 0.050 ms (4.957 ms / 100) 4.933 -> 4.936 ( +0.06%) [ +0.00% +0.28% +0.06% / +0.06% +0.67% +0.61%] index_select skip256 : Elapsed 0.049 ms (4.933 ms / 100) 4.987 -> 4.995 ( +0.16%) [ +0.18% +0.00% +0.18% / +0.16% +0.56% +0.64%] index_select spread : Elapsed 0.050 ms (4.996 ms / 100) 4.986 -> 4.990 ( +0.08%) [ +0.22% +0.00% +0.32% / +0.08% +0.72% +0.46%] index_select strided 3 : Elapsed 0.050 ms (4.997 ms / 100) 4.977 -> 4.976 ( -0.02%) [ +0.24% +0.00% +0.00% / -0.02% +0.48% +0.46%] index_select random : Elapsed 0.050 ms (4.989 ms / 100) 4.967 -> 4.967 ( +0.00%) [ +0.06% +0.16% +0.00% / +0.00% +0.74% +0.46%] index_select random_sorted : Elapsed 0.050 ms (4.970 ms / 100) out_shape = [16, 5, 4, 20] in_shape = [40, 5, 4, 20] idx_dim = 0 B = [16, 5, 4, 20] (stride (400, 80, 1, 4)) A = [40, 5, 4, 20] (stride (1, 40, 4000, 200)) dim = 0 3.919 -> 3.920 ( +0.03%) [ +0.13% +0.00% +0.03% / +0.03% +0.54% +0.56%] index_select const : Elapsed 0.039 ms (3.924 ms / 100) 3.943 -> 3.944 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.46% +0.61%] index_select wrap : Elapsed 0.039 ms (3.945 ms / 100) 3.924 -> 3.922 ( -0.05%) [ +0.00% +0.03% +0.00% / -0.05% +0.61% +0.51%] index_select linear : Elapsed 0.039 ms (3.924 ms / 100) 3.934 -> 3.933 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.46% +0.48%] index_select reverse : Elapsed 0.039 ms (3.934 ms / 100) 3.918 -> 3.930 ( +0.31%) [ +0.00% +0.00% +0.23% / +0.31% +0.77% +0.64%] index_select skip64 : Elapsed 0.039 ms (3.918 ms / 100) 3.916 -> 3.919 ( +0.08%) [ +0.10% +0.00% +0.18% / +0.08% +0.66% +0.61%] index_select skip256 : Elapsed 0.039 ms (3.920 ms / 100) 3.919 -> 3.925 ( +0.15%) [ +0.08% +0.05% +0.00% / +0.15% +0.26% +0.41%] index_select spread : Elapsed 0.039 ms (3.922 ms / 100) 3.953 -> 3.961 ( +0.20%) [ +0.00% +0.05% +0.20% / +0.20% +0.35% +0.28%] index_select strided 3 : Elapsed 0.040 ms (3.953 ms / 100) 3.926 -> 3.925 ( -0.03%) [ +0.00% +0.08% +0.03% / -0.03% +0.43% +0.43%] index_select strided 5 : Elapsed 0.039 ms (3.926 ms / 100) 3.948 -> 3.951 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.30% +0.33%] index_select strided 7 : Elapsed 0.039 ms (3.950 ms / 100) 3.934 -> 3.939 ( +0.13%) [ +0.18% +0.15% +0.00% / +0.13% +0.53% +0.64%] index_select strided 8 : Elapsed 0.039 ms (3.941 ms / 100) 3.918 -> 3.920 ( +0.05%) [ +0.08% +0.00% +0.18% / +0.05% +0.46% +0.28%] index_select strided 16 : Elapsed 0.039 ms (3.921 ms / 100) 3.925 -> 3.927 ( +0.05%) [ +0.00% +0.15% +0.08% / +0.05% +0.51% +0.33%] index_select random : Elapsed 0.039 ms (3.925 ms / 100) 3.935 -> 3.936 ( +0.03%) [ +0.10% +0.03% +0.00% / +0.03% +0.46% +0.51%] index_select random_sorted : Elapsed 0.039 ms (3.939 ms / 100) 3.924 -> 3.928 ( +0.10%) [ +0.15% +0.00% +0.08% / +0.10% +0.43% +0.28%] index_select perm : Elapsed 0.039 ms (3.930 ms / 100) 3.947 -> 3.949 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.08% +0.33% +0.05%] index_select perm_sorted : Elapsed 0.040 ms (3.951 ms / 100) B = [16, 5, 4, 20] (stride (400, 4, 1, 20)) A = [40, 5, 4, 20] (stride (20, 3200, 800, 1)) dim = 0 1.347 -> 1.348 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.74% +0.67%] index_select const : Elapsed 0.013 ms (1.348 ms / 100) 1.350 -> 1.352 ( +0.15%) [ +0.00% +0.00% +0.15% / +0.15% +0.81% +0.81%] index_select wrap : Elapsed 0.014 ms (1.350 ms / 100) 1.348 -> 1.348 ( +0.00%) [ +0.22% +0.15% +0.00% / +0.00% +0.82% +0.89%] index_select linear : Elapsed 0.014 ms (1.351 ms / 100) 1.339 -> 1.338 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +1.42% +1.27%] index_select reverse : Elapsed 0.013 ms (1.339 ms / 100) 1.339 -> 1.341 ( +0.15%) [ +0.00% +0.00% +0.30% / +0.15% +1.12% +1.19%] index_select skip64 : Elapsed 0.013 ms (1.339 ms / 100) 1.346 -> 1.348 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.74% +0.74%] index_select skip256 : Elapsed 0.013 ms (1.348 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.00% +0.07% +0.15% / +0.15% +1.19% +1.26%] index_select spread : Elapsed 0.013 ms (1.344 ms / 100) 1.351 -> 1.351 ( +0.00%) [ +0.00% +0.00% +0.22% / +0.00% +0.96% +0.74%] index_select strided 3 : Elapsed 0.014 ms (1.351 ms / 100) 1.347 -> 1.346 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.67% +0.82%] index_select strided 5 : Elapsed 0.013 ms (1.347 ms / 100) 1.347 -> 1.348 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.89% +0.82%] index_select strided 7 : Elapsed 0.013 ms (1.348 ms / 100) 1.347 -> 1.348 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.89% +0.89%] index_select strided 8 : Elapsed 0.013 ms (1.348 ms / 100) 1.348 -> 1.350 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.89% +1.04%] index_select strided 16 : Elapsed 0.013 ms (1.348 ms / 100) 1.347 -> 1.348 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +1.19% +1.26%] index_select random : Elapsed 0.013 ms (1.348 ms / 100) 1.341 -> 1.343 ( +0.15%) [ +0.00% +0.15% +0.07% / +0.15% +1.34% +1.34%] index_select random_sorted : Elapsed 0.013 ms (1.341 ms / 100) 1.347 -> 1.347 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +1.63% +1.56%] index_select perm : Elapsed 0.013 ms (1.348 ms / 100) 1.345 -> 1.348 ( +0.22%) [ +0.00% +0.15% +0.00% / +0.22% +0.82% +0.97%] index_select perm_sorted : Elapsed 0.013 ms (1.345 ms / 100) B = [16, 5, 4, 20] (stride (400, 1, 5, 20)) A = [40, 5, 4, 20] (stride (80, 3200, 20, 1)) dim = 0 3.497 -> 3.497 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.66% +0.71%] index_select const : Elapsed 0.035 ms (3.498 ms / 100) 3.511 -> 3.513 ( +0.06%) [ +0.03% +0.00% +0.06% / +0.06% +0.63% +0.60%] index_select wrap : Elapsed 0.035 ms (3.512 ms / 100) 3.508 -> 3.510 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.06% +0.77% +0.74%] index_select linear : Elapsed 0.035 ms (3.511 ms / 100) 3.499 -> 3.500 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.54% +0.49%] index_select reverse : Elapsed 0.035 ms (3.500 ms / 100) 3.509 -> 3.510 ( +0.03%) [ +0.00% +0.09% +0.09% / +0.03% +0.66% +0.48%] index_select skip64 : Elapsed 0.035 ms (3.509 ms / 100) 3.503 -> 3.502 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.29% +0.34%] index_select skip256 : Elapsed 0.035 ms (3.503 ms / 100) 3.501 -> 3.502 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.43% +0.51%] index_select spread : Elapsed 0.035 ms (3.502 ms / 100) 3.513 -> 3.514 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.54% +0.54%] index_select strided 3 : Elapsed 0.035 ms (3.513 ms / 100) 3.502 -> 3.501 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.43% +0.40%] index_select strided 5 : Elapsed 0.035 ms (3.502 ms / 100) 3.512 -> 3.511 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.65% +0.63%] index_select strided 7 : Elapsed 0.035 ms (3.512 ms / 100) 3.511 -> 3.511 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.40% +0.57%] index_select strided 8 : Elapsed 0.035 ms (3.514 ms / 100) 3.497 -> 3.496 ( -0.03%) [ +0.06% +0.00% +0.00% / -0.03% +0.37% +0.43%] index_select strided 16 : Elapsed 0.035 ms (3.499 ms / 100) 3.517 -> 3.516 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.48% +0.48%] index_select random : Elapsed 0.035 ms (3.518 ms / 100) 3.501 -> 3.501 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.40% +0.43%] index_select random_sorted : Elapsed 0.035 ms (3.502 ms / 100) 3.508 -> 3.508 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.57% +0.54%] index_select perm : Elapsed 0.035 ms (3.509 ms / 100) 3.502 -> 3.502 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.37% +0.40%] index_select perm_sorted : Elapsed 0.035 ms (3.504 ms / 100) B = [16, 5, 4, 20] (stride (80, 1280, 1, 4)) A = [40, 5, 4, 20] (stride (400, 80, 20, 1)) dim = 0 3.196 -> 3.198 ( +0.06%) [ +0.09% +0.00% +0.00% / +0.06% +0.78% +0.75%] index_select const : Elapsed 0.032 ms (3.199 ms / 100) 3.192 -> 3.192 ( +0.00%) [ +0.13% +0.00% +0.03% / +0.00% +0.69% +0.69%] index_select wrap : Elapsed 0.032 ms (3.196 ms / 100) 3.204 -> 3.204 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.66% +0.84%] index_select linear : Elapsed 0.032 ms (3.207 ms / 100) 3.205 -> 3.204 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.84% +0.78%] index_select reverse : Elapsed 0.032 ms (3.206 ms / 100) 3.186 -> 3.189 ( +0.09%) [ +0.09% +0.06% +0.00% / +0.09% +0.88% +0.82%] index_select skip64 : Elapsed 0.032 ms (3.189 ms / 100) 3.194 -> 3.197 ( +0.09%) [ +0.09% +0.13% +0.00% / +0.09% +0.81% +0.85%] index_select skip256 : Elapsed 0.032 ms (3.197 ms / 100) 3.190 -> 3.191 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.82% +0.82%] index_select spread : Elapsed 0.032 ms (3.190 ms / 100) 3.192 -> 3.192 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.63% +0.60%] index_select strided 3 : Elapsed 0.032 ms (3.192 ms / 100) 3.206 -> 3.206 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.72% +0.69%] index_select strided 5 : Elapsed 0.032 ms (3.207 ms / 100) 3.193 -> 3.192 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.72% +0.75%] index_select strided 7 : Elapsed 0.032 ms (3.193 ms / 100) 3.189 -> 3.191 ( +0.06%) [ +0.19% +0.09% +0.00% / +0.06% +0.72% +0.85%] index_select strided 8 : Elapsed 0.032 ms (3.195 ms / 100) 3.191 -> 3.191 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.72% +0.72%] index_select strided 16 : Elapsed 0.032 ms (3.192 ms / 100) 3.196 -> 3.195 ( -0.03%) [ +0.03% +0.06% +0.00% / -0.03% +0.81% +0.84%] index_select random : Elapsed 0.032 ms (3.197 ms / 100) 3.187 -> 3.189 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.91% +0.82%] index_select random_sorted : Elapsed 0.032 ms (3.187 ms / 100) 3.201 -> 3.204 ( +0.09%) [ +0.06% +0.00% +0.00% / +0.09% +0.78% +0.81%] index_select perm : Elapsed 0.032 ms (3.203 ms / 100) 3.201 -> 3.201 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.78% +0.78%] index_select perm_sorted : Elapsed 0.032 ms (3.201 ms / 100) B = [16, 5, 4, 20] (stride (1, 1280, 320, 16)) A = [40, 5, 4, 20] (stride (1, 40, 200, 800)) dim = 0 3.581 -> 3.587 ( +0.17%) [ +0.17% +0.14% +0.00% / +0.17% +0.56% +0.39%] index_select const : Elapsed 0.036 ms (3.587 ms / 100) 3.581 -> 3.580 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.36% +0.36%] index_select wrap : Elapsed 0.036 ms (3.581 ms / 100) 3.579 -> 3.585 ( +0.17%) [ +0.14% +0.20% +0.00% / +0.17% +0.64% +0.59%] index_select linear : Elapsed 0.036 ms (3.584 ms / 100) 3.578 -> 3.578 ( +0.00%) [ +0.00% +0.03% +0.06% / +0.00% +0.34% +0.39%] index_select reverse : Elapsed 0.036 ms (3.578 ms / 100) 3.587 -> 3.588 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.36% +0.50%] index_select skip64 : Elapsed 0.036 ms (3.587 ms / 100) 3.585 -> 3.581 ( -0.11%) [ +0.06% +0.00% +0.06% / -0.11% +0.36% +0.47%] index_select skip256 : Elapsed 0.036 ms (3.587 ms / 100) 3.583 -> 3.580 ( -0.08%) [ +0.03% +0.00% +0.00% / -0.08% +0.22% +0.25%] index_select spread : Elapsed 0.036 ms (3.584 ms / 100) 3.582 -> 3.585 ( +0.08%) [ +0.14% +0.03% +0.00% / +0.08% +0.42% +0.31%] index_select strided 3 : Elapsed 0.036 ms (3.587 ms / 100) 3.577 -> 3.577 ( +0.00%) [ +0.06% +0.00% +0.11% / +0.00% +0.36% +0.34%] index_select strided 5 : Elapsed 0.036 ms (3.579 ms / 100) 3.585 -> 3.586 ( +0.03%) [ +0.00% +0.06% +0.03% / +0.03% +0.25% +0.33%] index_select strided 7 : Elapsed 0.036 ms (3.585 ms / 100) 3.576 -> 3.578 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.42% +0.42%] index_select strided 8 : Elapsed 0.036 ms (3.578 ms / 100) 3.594 -> 3.586 ( -0.22%) [ +0.06% +0.00% +0.00% / -0.22% +0.08% +0.17%] index_select strided 16 : Elapsed 0.036 ms (3.596 ms / 100) 3.587 -> 3.591 ( +0.11%) [ +0.06% +0.00% +0.06% / +0.11% +0.28% +0.39%] index_select random : Elapsed 0.036 ms (3.589 ms / 100) 3.562 -> 3.572 ( +0.28%) [ +0.31% +0.00% +0.14% / +0.28% +0.67% +0.65%] index_select random_sorted : Elapsed 0.036 ms (3.573 ms / 100) 3.585 -> 3.580 ( -0.14%) [ +0.00% +0.00% +0.11% / -0.14% +0.17% +0.39%] index_select perm : Elapsed 0.036 ms (3.585 ms / 100) 3.581 -> 3.583 ( +0.06%) [ +0.03% +0.17% +0.00% / +0.06% +0.39% +0.34%] index_select perm_sorted : Elapsed 0.036 ms (3.582 ms / 100) B = [16, 5, 4, 20] (stride (4, 1280, 1, 64)) dim = 0 fill_cnt = 40 1.803 -> 1.802 ( -0.06%) [ +0.00% +0.11% +0.11% / -0.06% +0.06% +0.06%] index_fill_ const : Elapsed 0.018 ms (1.803 ms / 100) 1.805 -> 1.805 ( +0.00%) [ +0.00% +0.22% +0.22% / +0.00% +0.39% +0.11%] index_fill_ linear : Elapsed 0.018 ms (1.805 ms / 100) 1.806 -> 1.803 ( -0.17%) [ +0.00% +0.11% +0.00% / -0.17% +0.00% +0.11%] index_fill_ reverse : Elapsed 0.018 ms (1.806 ms / 100) 1.808 -> 1.802 ( -0.33%) [ +0.00% +0.00% +0.00% / -0.22% -0.33% -0.28%] index_fill_ skip64 : Elapsed 0.018 ms (1.808 ms / 100) 1.804 -> 1.804 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.06% +0.00% +0.17%] index_fill_ skip256 : Elapsed 0.018 ms (1.806 ms / 100) 1.810 -> 1.808 ( -0.11%) [ +0.06% +0.00% +0.06% / -0.11% -0.06% -0.11%] index_fill_ spread : Elapsed 0.018 ms (1.811 ms / 100) 1.807 -> 1.807 ( +0.00%) [ +0.17% +0.22% +0.00% / +0.00% +0.22% +0.06%] index_fill_ strided 3 : Elapsed 0.018 ms (1.810 ms / 100) 1.806 -> 1.807 ( +0.06%) [ +0.00% +0.44% +0.28% / +0.06% +0.22% +0.17%] index_fill_ strided 5 : Elapsed 0.018 ms (1.806 ms / 100) 1.808 -> 1.805 ( -0.17%) [ +0.11% +0.00% +0.06% / -0.17% +0.17% +0.22%] index_fill_ strided 7 : Elapsed 0.018 ms (1.810 ms / 100) 1.804 -> 1.799 ( -0.28%) [ +0.06% +0.06% +0.00% / -0.28% -0.06% +0.11%] index_fill_ strided 8 : Elapsed 0.018 ms (1.805 ms / 100) 1.811 -> 1.808 ( -0.17%) [ +0.06% +0.00% +0.17% / -0.17% +0.06% +0.06%] index_fill_ random : Elapsed 0.018 ms (1.812 ms / 100) 1.811 -> 1.807 ( -0.22%) [ +0.00% +0.33% +0.06% / -0.22% +0.11% -0.06%] index_fill_ random_sorted : Elapsed 0.018 ms (1.811 ms / 100) B = [16, 5, 4, 20] (stride (1, 320, 1600, 16)) A = [40, 5, 4, 20] (stride (20, 1, 5, 800)) dim = 0 4.141 -> 4.141 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.75% +0.75%] index_select const : Elapsed 0.041 ms (4.142 ms / 100) 4.151 -> 4.155 ( +0.10%) [ +0.07% +0.19% +0.00% / +0.10% +0.82% +0.79%] index_select wrap : Elapsed 0.042 ms (4.154 ms / 100) 4.159 -> 4.159 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.63% +0.60%] index_select linear : Elapsed 0.042 ms (4.160 ms / 100) 4.153 -> 4.155 ( +0.05%) [ +0.00% +0.14% +0.14% / +0.05% +0.92% +0.79%] index_select reverse : Elapsed 0.042 ms (4.153 ms / 100) 4.154 -> 4.155 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.72% +0.72%] index_select skip64 : Elapsed 0.042 ms (4.154 ms / 100) 4.140 -> 4.141 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.82% +0.80%] index_select skip256 : Elapsed 0.041 ms (4.142 ms / 100) 4.175 -> 4.184 ( +0.22%) [ +0.00% +0.17% +0.19% / +0.22% +0.96% +0.91%] index_select spread : Elapsed 0.042 ms (4.175 ms / 100) 4.193 -> 4.193 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.79% +0.76%] index_select strided 3 : Elapsed 0.042 ms (4.194 ms / 100) 4.152 -> 4.159 ( +0.17%) [ +0.02% +0.00% +0.17% / +0.17% +0.53% +0.63%] index_select strided 5 : Elapsed 0.042 ms (4.153 ms / 100) 4.143 -> 4.145 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.70% +0.77%] index_select strided 7 : Elapsed 0.041 ms (4.146 ms / 100) 4.123 -> 4.123 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.78% +0.80%] index_select strided 8 : Elapsed 0.041 ms (4.124 ms / 100) 4.083 -> 4.083 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.86% +0.86%] index_select strided 16 : Elapsed 0.041 ms (4.084 ms / 100) 4.175 -> 4.184 ( +0.22%) [ +0.00% +0.22% +0.17% / +0.22% +0.81% +0.89%] index_select random : Elapsed 0.042 ms (4.175 ms / 100) 4.198 -> 4.196 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.50% +0.50%] index_select random_sorted : Elapsed 0.042 ms (4.200 ms / 100) 4.169 -> 4.169 ( +0.00%) [ +0.00% +0.14% +0.10% / +0.00% +0.84% +0.86%] index_select perm : Elapsed 0.042 ms (4.169 ms / 100) 4.152 -> 4.151 ( -0.02%) [ +0.10% +0.14% +0.00% / -0.02% +0.89% +0.92%] index_select perm_sorted : Elapsed 0.042 ms (4.156 ms / 100) B = [16, 5, 4, 20] (stride (1, 16, 1600, 80)) A = [40, 5, 4, 20] (stride (1, 800, 4000, 40)) dim = 0 3.922 -> 3.921 ( -0.03%) [ +0.05% +0.00% +0.08% / -0.03% +0.61% +0.41%] index_select const : Elapsed 0.039 ms (3.924 ms / 100) 3.928 -> 3.931 ( +0.08%) [ +0.05% +0.03% +0.00% / +0.08% +0.56% +0.48%] index_select wrap : Elapsed 0.039 ms (3.930 ms / 100) 3.931 -> 3.934 ( +0.08%) [ +0.00% +0.10% +0.15% / +0.08% +0.41% +0.46%] index_select linear : Elapsed 0.039 ms (3.931 ms / 100) 3.933 -> 3.930 ( -0.08%) [ +0.08% +0.00% +0.03% / -0.08% +0.56% +0.38%] index_select reverse : Elapsed 0.039 ms (3.936 ms / 100) 3.936 -> 3.935 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.43% +0.43%] index_select skip64 : Elapsed 0.039 ms (3.936 ms / 100) 3.921 -> 3.926 ( +0.13%) [ +0.03% +0.00% +0.08% / +0.13% +0.59% +0.51%] index_select skip256 : Elapsed 0.039 ms (3.922 ms / 100) 3.908 -> 3.916 ( +0.20%) [ +0.44% +0.00% +0.36% / +0.20% +0.38% +0.46%] index_select spread : Elapsed 0.039 ms (3.925 ms / 100) 3.936 -> 3.940 ( +0.10%) [ +0.13% +0.13% +0.00% / +0.10% +0.51% +0.43%] index_select strided 3 : Elapsed 0.039 ms (3.941 ms / 100) 3.915 -> 3.917 ( +0.05%) [ +0.15% +0.00% +0.23% / +0.05% +0.36% +0.31%] index_select strided 5 : Elapsed 0.039 ms (3.921 ms / 100) 3.921 -> 3.927 ( +0.15%) [ +0.08% +0.00% +0.08% / +0.15% +0.38% +0.56%] index_select strided 7 : Elapsed 0.039 ms (3.924 ms / 100) 3.945 -> 3.947 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.41% +0.35%] index_select strided 8 : Elapsed 0.039 ms (3.947 ms / 100) 3.917 -> 3.925 ( +0.20%) [ +0.08% +0.00% +0.08% / +0.20% +0.41% +0.41%] index_select strided 16 : Elapsed 0.039 ms (3.920 ms / 100) 3.923 -> 3.927 ( +0.10%) [ +0.08% +0.00% +0.10% / +0.10% +0.61% +0.61%] index_select random : Elapsed 0.039 ms (3.926 ms / 100) 3.923 -> 3.921 ( -0.05%) [ +0.08% +0.08% +0.00% / -0.05% +0.43% +0.66%] index_select random_sorted : Elapsed 0.039 ms (3.926 ms / 100) 3.922 -> 3.931 ( +0.23%) [ +0.25% +0.23% +0.00% / +0.23% +0.51% +0.61%] index_select perm : Elapsed 0.039 ms (3.932 ms / 100) 3.925 -> 3.935 ( +0.25%) [ +0.00% +0.13% +0.03% / +0.25% +0.33% +0.43%] index_select perm_sorted : Elapsed 0.039 ms (3.925 ms / 100) B = [16, 5, 4, 20] (stride (1, 64, 16, 320)) A = [40, 5, 4, 20] (stride (80, 3200, 20, 1)) dim = 0 1.376 -> 1.377 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +0.44% +0.44%] index_select const : Elapsed 0.014 ms (1.378 ms / 100) 1.381 -> 1.382 ( +0.07%) [ +0.22% +0.14% +0.00% / +0.07% +0.80% +1.01%] index_select wrap : Elapsed 0.014 ms (1.384 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.80% +0.80%] index_select linear : Elapsed 0.014 ms (1.384 ms / 100) 1.380 -> 1.379 ( -0.07%) [ +0.00% +0.14% +0.07% / -0.07% +1.09% +1.09%] index_select reverse : Elapsed 0.014 ms (1.380 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.95% +1.17%] index_select skip64 : Elapsed 0.014 ms (1.368 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.58% +0.58%] index_select skip256 : Elapsed 0.014 ms (1.374 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.22% +0.00% +0.00% / +0.15% +1.16% +1.09%] index_select spread : Elapsed 0.014 ms (1.377 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +1.01% +1.01%] index_select strided 3 : Elapsed 0.014 ms (1.383 ms / 100) 1.376 -> 1.375 ( -0.07%) [ +0.00% +0.29% +0.00% / -0.07% +0.94% +1.02%] index_select strided 5 : Elapsed 0.014 ms (1.376 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +1.16% +1.02%] index_select strided 7 : Elapsed 0.014 ms (1.378 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.29% +0.07% +0.00% / +0.00% +0.80% +0.66%] index_select strided 8 : Elapsed 0.014 ms (1.378 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.07% +0.00% +0.22% / +0.07% +1.24% +1.24%] index_select strided 16 : Elapsed 0.014 ms (1.373 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +1.01% +1.09%] index_select random : Elapsed 0.014 ms (1.382 ms / 100) 1.382 -> 1.383 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.94% +1.01%] index_select random_sorted : Elapsed 0.014 ms (1.383 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +2.03% +1.16%] index_select perm : Elapsed 0.014 ms (1.378 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.00% +0.14% +0.22% / +0.14% +1.16% +1.09%] index_select perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) out_shape = [40, 16, 4, 20] in_shape = [40, 5, 4, 20] idx_dim = 1 B = [40, 16, 4, 20] (stride (1280, 80, 1, 4)) A = [40, 5, 4, 20] (stride (100, 20, 4000, 1)) dim = 1 2.354 -> 2.356 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.68% +0.47%] index_add_ linear : Elapsed 0.024 ms (2.358 ms / 100) 2.306 -> 2.312 ( +0.26%) [ +0.22% +0.17% +0.00% / +0.26% +0.39% +0.43%] index_copy_ linear : Elapsed 0.023 ms (2.311 ms / 100) 2.357 -> 2.363 ( +0.25%) [ +0.00% +0.17% +0.13% / +0.25% +0.30% +0.42%] index_add_ reverse : Elapsed 0.024 ms (2.357 ms / 100) 2.305 -> 2.308 ( +0.13%) [ +0.00% +0.26% +0.09% / +0.13% +0.35% +0.39%] index_copy_ reverse : Elapsed 0.023 ms (2.305 ms / 100) 2.365 -> 2.366 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.34% +0.30%] index_add_ spread : Elapsed 0.024 ms (2.366 ms / 100) 2.310 -> 2.316 ( +0.26%) [ +0.00% +0.04% +0.35% / +0.26% +0.43% +0.43%] index_copy_ spread : Elapsed 0.023 ms (2.310 ms / 100) 2.356 -> 2.357 ( +0.04%) [ +0.00% +0.13% +0.08% / +0.04% +0.47% +0.76%] index_add_ strided 3 : Elapsed 0.024 ms (2.356 ms / 100) 2.304 -> 2.307 ( +0.13%) [ +0.22% +0.00% +0.13% / +0.13% +0.52% +0.74%] index_copy_ strided 3 : Elapsed 0.023 ms (2.309 ms / 100) 2.356 -> 2.361 ( +0.21%) [ +0.00% +0.17% +0.21% / +0.21% +0.47% +0.59%] index_add_ strided 5 : Elapsed 0.024 ms (2.356 ms / 100) 2.300 -> 2.304 ( +0.17%) [ +0.09% +0.00% +0.13% / +0.17% +0.74% +0.57%] index_copy_ strided 5 : Elapsed 0.023 ms (2.302 ms / 100) 2.345 -> 2.357 ( +0.51%) [ +0.60% +0.43% +0.00% / +0.51% +0.98% +0.85%] index_add_ strided 7 : Elapsed 0.024 ms (2.359 ms / 100) 2.295 -> 2.303 ( +0.35%) [ +0.31% +0.39% +0.00% / +0.35% +1.13% +1.00%] index_copy_ strided 7 : Elapsed 0.023 ms (2.302 ms / 100) 2.358 -> 2.355 ( -0.13%) [ +0.04% +0.25% +0.00% / -0.13% +0.59% +0.47%] index_add_ perm : Elapsed 0.024 ms (2.359 ms / 100) 2.311 -> 2.308 ( -0.13%) [ +0.00% +0.04% +0.09% / -0.13% +0.43% +0.39%] index_copy_ perm : Elapsed 0.023 ms (2.311 ms / 100) 2.361 -> 2.363 ( +0.08%) [ +0.17% +0.00% +0.13% / +0.08% +0.59% +0.38%] index_add_ perm_sorted : Elapsed 0.024 ms (2.365 ms / 100) 2.310 -> 2.314 ( +0.17%) [ +0.13% +0.00% +0.04% / +0.17% +0.69% +0.43%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.313 ms / 100) 4.849 -> 4.869 ( +0.41%) [ +0.08% +0.00% +0.25% / +0.41% +0.93% +0.76%] index_select const : Elapsed 0.049 ms (4.853 ms / 100) 4.879 -> 4.880 ( +0.02%) [ +0.31% +0.23% +0.00% / +0.02% +1.00% +0.98%] index_select wrap : Elapsed 0.049 ms (4.894 ms / 100) 4.939 -> 4.944 ( +0.10%) [ +0.00% +0.04% +0.08% / +0.10% +0.61% +0.65%] index_select linear : Elapsed 0.049 ms (4.939 ms / 100) 4.930 -> 4.945 ( +0.30%) [ +0.28% +0.24% +0.00% / +0.30% +0.77% +0.75%] index_select reverse : Elapsed 0.049 ms (4.944 ms / 100) 4.857 -> 4.858 ( +0.02%) [ +0.00% +0.35% +0.43% / +0.02% +0.76% +0.68%] index_select skip64 : Elapsed 0.049 ms (4.857 ms / 100) 4.861 -> 4.846 ( -0.31%) [ +0.02% +0.00% +0.00% / -0.31% +0.72% +0.70%] index_select skip256 : Elapsed 0.049 ms (4.862 ms / 100) 4.901 -> 4.907 ( +0.12%) [ +0.24% +0.18% +0.00% / +0.12% +0.55% +0.90%] index_select spread : Elapsed 0.049 ms (4.913 ms / 100) 4.907 -> 4.900 ( -0.14%) [ +0.06% +0.00% +0.10% / -0.14% +0.71% +0.69%] index_select strided 3 : Elapsed 0.049 ms (4.910 ms / 100) 4.902 -> 4.912 ( +0.20%) [ +0.00% +0.22% +0.06% / +0.20% +0.73% +0.90%] index_select random : Elapsed 0.049 ms (4.902 ms / 100) 4.887 -> 4.892 ( +0.10%) [ +0.12% +0.06% +0.00% / +0.10% +0.76% +0.88%] index_select random_sorted : Elapsed 0.049 ms (4.893 ms / 100) B = [40, 16, 4, 20] (stride (4, 3200, 1, 160)) A = [40, 5, 4, 20] (stride (1, 800, 4000, 40)) dim = 1 2.558 -> 2.563 ( +0.20%) [ +0.27% +0.16% +0.00% / +0.20% +0.31% +0.23%] index_add_ linear : Elapsed 0.026 ms (2.565 ms / 100) 2.488 -> 2.485 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.28% +0.32%] index_copy_ linear : Elapsed 0.025 ms (2.488 ms / 100) 2.557 -> 2.555 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.12% +0.20%] index_add_ reverse : Elapsed 0.026 ms (2.559 ms / 100) 2.487 -> 2.486 ( -0.04%) [ +0.24% +0.00% +0.00% / -0.04% +0.16% +0.04%] index_copy_ reverse : Elapsed 0.025 ms (2.493 ms / 100) 2.554 -> 2.558 ( +0.16%) [ +0.23% +0.00% +0.16% / +0.16% +0.39% +0.27%] index_add_ spread : Elapsed 0.026 ms (2.560 ms / 100) 2.488 -> 2.487 ( -0.04%) [ +0.08% +0.12% +0.00% / -0.04% +0.20% +0.52%] index_copy_ spread : Elapsed 0.025 ms (2.490 ms / 100) 2.557 -> 2.559 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.20% +0.39%] index_add_ strided 3 : Elapsed 0.026 ms (2.560 ms / 100) 2.484 -> 2.490 ( +0.24%) [ +0.16% +0.24% +0.00% / +0.24% +0.40% +0.40%] index_copy_ strided 3 : Elapsed 0.025 ms (2.488 ms / 100) 2.553 -> 2.552 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.51% +0.16%] index_add_ strided 5 : Elapsed 0.026 ms (2.553 ms / 100) 2.483 -> 2.485 ( +0.08%) [ +0.00% +0.12% +0.12% / +0.08% +0.28% +0.12%] index_copy_ strided 5 : Elapsed 0.025 ms (2.483 ms / 100) 2.547 -> 2.553 ( +0.24%) [ +0.12% +0.24% +0.00% / +0.24% +0.39% +0.51%] index_add_ strided 7 : Elapsed 0.026 ms (2.550 ms / 100) 2.477 -> 2.482 ( +0.20%) [ +0.16% +0.12% +0.00% / +0.20% +0.28% +0.36%] index_copy_ strided 7 : Elapsed 0.025 ms (2.481 ms / 100) 2.553 -> 2.555 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.47% +0.43%] index_add_ perm : Elapsed 0.026 ms (2.553 ms / 100) 2.479 -> 2.480 ( +0.04%) [ +0.20% +0.08% +0.00% / +0.04% +0.81% +0.69%] index_copy_ perm : Elapsed 0.025 ms (2.484 ms / 100) 2.553 -> 2.555 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.63% +0.51%] index_add_ perm_sorted : Elapsed 0.026 ms (2.553 ms / 100) 2.479 -> 2.485 ( +0.24%) [ +0.24% +0.00% +0.08% / +0.24% +0.81% +0.85%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.485 ms / 100) 5.652 -> 5.656 ( +0.07%) [ +0.00% +0.04% +0.02% / +0.07% +0.51% +0.53%] index_select const : Elapsed 0.057 ms (5.652 ms / 100) 5.675 -> 5.675 ( +0.00%) [ +0.11% +0.00% +0.02% / +0.00% +0.46% +0.49%] index_select wrap : Elapsed 0.057 ms (5.681 ms / 100) 5.680 -> 5.689 ( +0.16%) [ +0.11% +0.00% +0.04% / +0.16% +0.55% +0.53%] index_select linear : Elapsed 0.057 ms (5.686 ms / 100) 5.669 -> 5.670 ( +0.02%) [ +0.11% +0.16% +0.00% / +0.02% +0.48% +0.62%] index_select reverse : Elapsed 0.057 ms (5.675 ms / 100) 5.651 -> 5.656 ( +0.09%) [ +0.09% +0.00% +0.02% / +0.09% +0.53% +0.51%] index_select skip64 : Elapsed 0.057 ms (5.656 ms / 100) 5.651 -> 5.646 ( -0.09%) [ +0.00% +0.05% +0.00% / -0.09% +0.55% +0.71%] index_select skip256 : Elapsed 0.057 ms (5.651 ms / 100) 5.658 -> 5.655 ( -0.05%) [ +0.07% +0.02% +0.00% / -0.05% +0.41% +0.44%] index_select spread : Elapsed 0.057 ms (5.662 ms / 100) 5.670 -> 5.675 ( +0.09%) [ +0.04% +0.11% +0.00% / +0.09% +0.56% +0.46%] index_select strided 3 : Elapsed 0.057 ms (5.672 ms / 100) 5.666 -> 5.667 ( +0.02%) [ +0.00% +0.12% +0.02% / +0.02% +0.58% +0.60%] index_select random : Elapsed 0.057 ms (5.666 ms / 100) 5.646 -> 5.643 ( -0.05%) [ +0.02% +0.07% +0.00% / -0.05% +0.67% +0.69%] index_select random_sorted : Elapsed 0.056 ms (5.647 ms / 100) B = [40, 16, 4, 20] (stride (1, 800, 12800, 40)) A = [40, 5, 4, 20] (stride (80, 3200, 1, 4)) dim = 1 2.323 -> 2.322 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.52% +0.39%] index_add_ linear : Elapsed 0.023 ms (2.323 ms / 100) 2.283 -> 2.280 ( -0.13%) [ +0.04% +0.00% +0.04% / -0.13% +0.35% +0.48%] index_copy_ linear : Elapsed 0.023 ms (2.284 ms / 100) 2.318 -> 2.321 ( +0.13%) [ +0.09% +0.09% +0.00% / +0.13% +0.73% +0.60%] index_add_ reverse : Elapsed 0.023 ms (2.320 ms / 100) 2.274 -> 2.277 ( +0.13%) [ +0.00% +0.31% +0.22% / +0.13% +0.84% +0.88%] index_copy_ reverse : Elapsed 0.023 ms (2.274 ms / 100) 2.323 -> 2.323 ( +0.00%) [ +0.00% +0.26% +0.22% / +0.00% +0.99% +0.82%] index_add_ spread : Elapsed 0.023 ms (2.323 ms / 100) 2.284 -> 2.284 ( +0.00%) [ +0.04% +0.00% +0.09% / +0.00% +0.66% +0.74%] index_copy_ spread : Elapsed 0.023 ms (2.285 ms / 100) 2.322 -> 2.330 ( +0.34%) [ +0.39% +0.30% +0.00% / +0.34% +0.56% +0.65%] index_add_ strided 3 : Elapsed 0.023 ms (2.331 ms / 100) 2.283 -> 2.283 ( +0.00%) [ +0.09% +0.22% +0.00% / +0.00% +0.61% +0.74%] index_copy_ strided 3 : Elapsed 0.023 ms (2.285 ms / 100) 2.322 -> 2.324 ( +0.09%) [ +0.04% +0.00% +0.22% / +0.09% +0.34% +0.30%] index_add_ strided 5 : Elapsed 0.023 ms (2.323 ms / 100) 2.280 -> 2.285 ( +0.22%) [ +0.04% +0.00% +0.13% / +0.22% +0.22% +0.26%] index_copy_ strided 5 : Elapsed 0.023 ms (2.281 ms / 100) 2.324 -> 2.330 ( +0.26%) [ +0.26% +0.30% +0.00% / +0.26% +0.47% +0.34%] index_add_ strided 7 : Elapsed 0.023 ms (2.330 ms / 100) 2.277 -> 2.284 ( +0.31%) [ +0.48% +0.40% +0.00% / +0.31% +0.79% +0.48%] index_copy_ strided 7 : Elapsed 0.023 ms (2.288 ms / 100) 2.320 -> 2.318 ( -0.09%) [ +0.13% +0.00% +0.09% / -0.09% +0.91% +0.78%] index_add_ perm : Elapsed 0.023 ms (2.323 ms / 100) 2.281 -> 2.284 ( +0.13%) [ +0.00% +0.09% +0.13% / +0.13% +0.57% +0.53%] index_copy_ perm : Elapsed 0.023 ms (2.281 ms / 100) 2.327 -> 2.329 ( +0.09%) [ +0.26% +0.13% +0.00% / +0.09% +0.56% +0.60%] index_add_ perm_sorted : Elapsed 0.023 ms (2.333 ms / 100) 2.281 -> 2.285 ( +0.18%) [ +0.26% +0.26% +0.00% / +0.18% +0.61% +0.75%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.287 ms / 100) 4.891 -> 4.900 ( +0.18%) [ +0.06% +0.08% +0.00% / +0.18% +0.86% +0.82%] index_select const : Elapsed 0.049 ms (4.894 ms / 100) 4.924 -> 4.931 ( +0.14%) [ +0.00% +0.12% +0.06% / +0.14% +0.95% +0.75%] index_select wrap : Elapsed 0.049 ms (4.924 ms / 100) 4.937 -> 4.930 ( -0.14%) [ +0.22% +0.00% +0.22% / -0.14% +1.32% +1.05%] index_select linear : Elapsed 0.049 ms (4.948 ms / 100) 4.921 -> 4.919 ( -0.04%) [ +0.00% +0.06% +0.08% / -0.04% +0.73% +0.77%] index_select reverse : Elapsed 0.049 ms (4.921 ms / 100) 4.886 -> 4.893 ( +0.14%) [ +0.16% +0.27% +0.00% / +0.14% +0.78% +0.92%] index_select skip64 : Elapsed 0.049 ms (4.894 ms / 100) 4.891 -> 4.905 ( +0.29%) [ +0.12% +0.06% +0.00% / +0.29% +0.86% +0.88%] index_select skip256 : Elapsed 0.049 ms (4.897 ms / 100) 4.931 -> 4.939 ( +0.16%) [ +0.10% +0.00% +0.02% / +0.16% +0.83% +0.87%] index_select spread : Elapsed 0.049 ms (4.936 ms / 100) 4.928 -> 4.951 ( +0.47%) [ +0.00% +0.43% +0.37% / +0.47% +1.12% +0.93%] index_select strided 3 : Elapsed 0.049 ms (4.928 ms / 100) 4.930 -> 4.929 ( -0.02%) [ +0.00% +0.37% +0.22% / -0.02% +0.99% +0.93%] index_select random : Elapsed 0.049 ms (4.930 ms / 100) 4.911 -> 4.917 ( +0.12%) [ +0.20% +0.08% +0.00% / +0.12% +0.98% +1.16%] index_select random_sorted : Elapsed 0.049 ms (4.921 ms / 100) B = [40, 16, 4, 20] (stride (16, 1, 12800, 640)) A = [40, 5, 4, 20] (stride (20, 1, 5, 800)) dim = 1 2.397 -> 2.397 ( +0.00%) [ +0.13% +0.00% +0.25% / +0.00% +0.92% +1.00%] index_add_ linear : Elapsed 0.024 ms (2.400 ms / 100) 2.346 -> 2.349 ( +0.13%) [ +0.00% +0.00% +0.26% / +0.13% +0.77% +0.77%] index_copy_ linear : Elapsed 0.023 ms (2.346 ms / 100) 2.402 -> 2.411 ( +0.37%) [ +0.29% +0.25% +0.00% / +0.37% +0.58% +0.75%] index_add_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.351 -> 2.353 ( +0.09%) [ +0.00% +0.21% +0.00% / +0.09% +0.55% +0.43%] index_copy_ reverse : Elapsed 0.024 ms (2.351 ms / 100) 2.429 -> 2.432 ( +0.12%) [ +0.45% +0.29% +0.00% / +0.12% +0.86% +0.95%] index_add_ spread : Elapsed 0.024 ms (2.440 ms / 100) 2.421 -> 2.425 ( +0.17%) [ +0.12% +0.00% +0.12% / +0.17% +0.50% +0.41%] index_copy_ spread : Elapsed 0.024 ms (2.424 ms / 100) 2.428 -> 2.432 ( +0.16%) [ +0.21% +0.29% +0.00% / +0.16% +0.91% +0.74%] index_add_ strided 3 : Elapsed 0.024 ms (2.433 ms / 100) 2.420 -> 2.423 ( +0.12%) [ +0.29% +0.00% +0.12% / +0.12% +0.21% +0.12%] index_copy_ strided 3 : Elapsed 0.024 ms (2.427 ms / 100) 2.434 -> 2.440 ( +0.25%) [ +0.16% +0.33% +0.00% / +0.25% +0.62% +0.58%] index_add_ strided 5 : Elapsed 0.024 ms (2.438 ms / 100) 2.425 -> 2.427 ( +0.08%) [ +0.00% +0.04% +0.12% / +0.08% +0.25% +0.37%] index_copy_ strided 5 : Elapsed 0.024 ms (2.425 ms / 100) 2.438 -> 2.436 ( -0.08%) [ +0.04% +0.16% +0.00% / -0.08% +0.16% +0.37%] index_add_ strided 7 : Elapsed 0.024 ms (2.439 ms / 100) 2.424 -> 2.425 ( +0.04%) [ +0.04% +0.21% +0.00% / +0.04% +0.08% +0.21%] index_copy_ strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.433 -> 2.434 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.41% +0.74%] index_add_ perm : Elapsed 0.024 ms (2.433 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.21% +0.62%] index_copy_ perm : Elapsed 0.024 ms (2.414 ms / 100) 2.441 -> 2.438 ( -0.12%) [ +0.00% +0.00% +0.04% / -0.12% +0.45% +0.16%] index_add_ perm_sorted : Elapsed 0.024 ms (2.441 ms / 100) 2.417 -> 2.421 ( +0.17%) [ +0.00% +0.33% +0.08% / +0.17% +0.58% +0.50%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.417 ms / 100) 5.101 -> 5.123 ( +0.43%) [ +0.02% +0.00% +0.37% / +0.43% +1.08% +0.57%] index_select const : Elapsed 0.051 ms (5.102 ms / 100) 5.107 -> 5.110 ( +0.06%) [ +0.02% +0.04% +0.00% / +0.06% +0.37% +0.47%] index_select wrap : Elapsed 0.051 ms (5.108 ms / 100) 5.106 -> 5.109 ( +0.06%) [ +0.24% +0.00% +0.14% / +0.06% +0.49% +0.51%] index_select linear : Elapsed 0.051 ms (5.118 ms / 100) 5.078 -> 5.082 ( +0.08%) [ +0.14% +0.00% +0.04% / +0.08% +0.53% +0.59%] index_select reverse : Elapsed 0.051 ms (5.085 ms / 100) 5.103 -> 5.095 ( -0.16%) [ +0.00% +0.24% +0.37% / -0.16% +0.84% +0.78%] index_select skip64 : Elapsed 0.051 ms (5.103 ms / 100) 5.093 -> 5.103 ( +0.20%) [ +0.00% +0.63% +0.18% / +0.20% +0.81% +1.06%] index_select skip256 : Elapsed 0.051 ms (5.093 ms / 100) 5.074 -> 5.079 ( +0.10%) [ +0.12% +0.00% +0.06% / +0.10% +0.61% +0.69%] index_select spread : Elapsed 0.051 ms (5.080 ms / 100) 5.101 -> 5.108 ( +0.14%) [ +0.14% +0.00% +0.10% / +0.14% +0.49% +0.55%] index_select strided 3 : Elapsed 0.051 ms (5.108 ms / 100) 5.078 -> 5.076 ( -0.04%) [ +0.00% +0.16% +0.08% / -0.04% +0.71% +0.63%] index_select random : Elapsed 0.051 ms (5.078 ms / 100) 5.083 -> 5.084 ( +0.02%) [ +0.00% +0.14% +0.02% / +0.02% +1.00% +0.79%] index_select random_sorted : Elapsed 0.051 ms (5.083 ms / 100) B = [40, 16, 4, 20] (stride (64, 4, 1, 2560)) A = [40, 5, 4, 20] (stride (1, 160, 40, 800)) dim = 1 2.578 -> 2.584 ( +0.23%) [ +0.23% +0.27% +0.00% / +0.23% +0.78% +0.81%] index_add_ linear : Elapsed 0.026 ms (2.584 ms / 100) 2.512 -> 2.517 ( +0.20%) [ +0.28% +0.24% +0.00% / +0.20% +0.84% +0.92%] index_copy_ linear : Elapsed 0.025 ms (2.519 ms / 100) 2.574 -> 2.578 ( +0.16%) [ +0.04% +0.12% +0.00% / +0.16% +0.78% +0.74%] index_add_ reverse : Elapsed 0.026 ms (2.575 ms / 100) 2.515 -> 2.519 ( +0.16%) [ +0.04% +0.00% +0.08% / +0.16% +0.68% +0.60%] index_copy_ reverse : Elapsed 0.025 ms (2.516 ms / 100) 2.582 -> 2.583 ( +0.04%) [ +0.23% +0.15% +0.00% / +0.04% +1.01% +0.97%] index_add_ spread : Elapsed 0.026 ms (2.588 ms / 100) 2.539 -> 2.534 ( -0.20%) [ +0.12% +0.00% +0.00% / -0.20% +0.51% +0.83%] index_copy_ spread : Elapsed 0.025 ms (2.542 ms / 100) 2.583 -> 2.580 ( -0.12%) [ +0.08% +0.04% +0.00% / -0.12% +0.77% +0.62%] index_add_ strided 3 : Elapsed 0.026 ms (2.585 ms / 100) 2.537 -> 2.537 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +0.71% +0.67%] index_copy_ strided 3 : Elapsed 0.025 ms (2.540 ms / 100) 2.571 -> 2.569 ( -0.08%) [ +0.12% +0.00% +0.04% / -0.08% +1.13% +0.74%] index_add_ strided 5 : Elapsed 0.026 ms (2.574 ms / 100) 2.511 -> 2.515 ( +0.16%) [ +0.08% +0.00% +0.04% / +0.16% +0.92% +1.00%] index_copy_ strided 5 : Elapsed 0.025 ms (2.513 ms / 100) 2.578 -> 2.581 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.85% +0.74%] index_add_ strided 7 : Elapsed 0.026 ms (2.581 ms / 100) 2.536 -> 2.539 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +0.63% +0.63%] index_copy_ strided 7 : Elapsed 0.025 ms (2.538 ms / 100) 2.561 -> 2.560 ( -0.04%) [ +0.12% +0.04% +0.00% / -0.04% +0.62% +0.74%] index_add_ perm : Elapsed 0.026 ms (2.564 ms / 100) 2.504 -> 2.507 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.12% +0.88% +0.72%] index_copy_ perm : Elapsed 0.025 ms (2.506 ms / 100) 2.564 -> 2.562 ( -0.08%) [ +0.20% +0.00% +0.00% / -0.08% +0.86% +0.94%] index_add_ perm_sorted : Elapsed 0.026 ms (2.569 ms / 100) 2.512 -> 2.514 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.80% +0.96%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.516 ms / 100) 5.675 -> 5.681 ( +0.11%) [ +0.16% +0.00% +0.02% / +0.11% +0.58% +0.63%] index_select const : Elapsed 0.057 ms (5.684 ms / 100) 5.689 -> 5.689 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.60% +0.62%] index_select wrap : Elapsed 0.057 ms (5.690 ms / 100) 5.666 -> 5.668 ( +0.04%) [ +0.12% +0.19% +0.00% / +0.04% +0.42% +0.78%] index_select linear : Elapsed 0.057 ms (5.673 ms / 100) 5.642 -> 5.643 ( +0.02%) [ +0.21% +0.00% +0.07% / +0.02% +0.55% +0.46%] index_select reverse : Elapsed 0.057 ms (5.654 ms / 100) 5.679 -> 5.681 ( +0.04%) [ +0.00% +0.02% +0.00% / +0.04% +0.67% +0.69%] index_select skip64 : Elapsed 0.057 ms (5.679 ms / 100) 5.675 -> 5.682 ( +0.12%) [ +0.05% +0.00% +0.05% / +0.12% +0.56% +0.63%] index_select skip256 : Elapsed 0.057 ms (5.678 ms / 100) 5.666 -> 5.664 ( -0.04%) [ +0.07% +0.02% +0.00% / -0.04% +0.67% +0.60%] index_select spread : Elapsed 0.057 ms (5.670 ms / 100) 5.668 -> 5.677 ( +0.16%) [ +0.23% +0.21% +0.00% / +0.16% +0.69% +0.79%] index_select strided 3 : Elapsed 0.057 ms (5.681 ms / 100) 5.680 -> 5.682 ( +0.04%) [ +0.00% +0.02% +0.02% / +0.04% +0.60% +0.53%] index_select random : Elapsed 0.057 ms (5.680 ms / 100) 5.681 -> 5.681 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.72% +0.72%] index_select random_sorted : Elapsed 0.057 ms (5.681 ms / 100) B = [40, 16, 4, 20] (stride (1, 40, 640, 2560)) A = [40, 5, 4, 20] (stride (1, 40, 4000, 200)) dim = 1 2.419 -> 2.425 ( +0.25%) [ +0.08% +0.33% +0.00% / +0.25% +0.41% +0.41%] index_add_ linear : Elapsed 0.024 ms (2.421 ms / 100) 2.355 -> 2.354 ( -0.04%) [ +0.00% +0.13% +0.13% / -0.04% +0.30% +0.51%] index_copy_ linear : Elapsed 0.024 ms (2.355 ms / 100) 2.414 -> 2.418 ( +0.17%) [ +0.29% +0.00% +0.25% / +0.17% +0.46% +0.66%] index_add_ reverse : Elapsed 0.024 ms (2.421 ms / 100) 2.353 -> 2.354 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.25% +0.34%] index_copy_ reverse : Elapsed 0.024 ms (2.354 ms / 100) 2.417 -> 2.418 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.50% +0.41%] index_add_ spread : Elapsed 0.024 ms (2.418 ms / 100) 2.353 -> 2.357 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.47% +0.30%] index_copy_ spread : Elapsed 0.024 ms (2.357 ms / 100) 2.415 -> 2.420 ( +0.21%) [ +0.08% +0.12% +0.00% / +0.21% +0.29% +0.41%] index_add_ strided 3 : Elapsed 0.024 ms (2.417 ms / 100) 2.350 -> 2.353 ( +0.13%) [ +0.00% +0.13% +0.09% / +0.13% +0.38% +0.30%] index_copy_ strided 3 : Elapsed 0.023 ms (2.350 ms / 100) 2.414 -> 2.419 ( +0.21%) [ +0.00% +0.08% +0.17% / +0.21% +0.50% +0.37%] index_add_ strided 5 : Elapsed 0.024 ms (2.414 ms / 100) 2.351 -> 2.347 ( -0.17%) [ +0.04% +0.00% +0.00% / -0.17% +0.21% +0.17%] index_copy_ strided 5 : Elapsed 0.024 ms (2.352 ms / 100) 2.413 -> 2.417 ( +0.17%) [ +0.08% +0.04% +0.00% / +0.17% +0.46% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.415 ms / 100) 2.347 -> 2.350 ( +0.13%) [ +0.43% +0.00% +0.09% / +0.13% +0.38% +0.47%] index_copy_ strided 7 : Elapsed 0.024 ms (2.357 ms / 100) 2.417 -> 2.419 ( +0.08%) [ +0.21% +0.17% +0.00% / +0.08% +0.46% +0.50%] index_add_ perm : Elapsed 0.024 ms (2.422 ms / 100) 2.355 -> 2.358 ( +0.13%) [ +0.00% +0.00% +0.04% / +0.13% +0.25% +0.25%] index_copy_ perm : Elapsed 0.024 ms (2.355 ms / 100) 2.410 -> 2.417 ( +0.29%) [ +0.33% +0.33% +0.00% / +0.29% +0.75% +0.79%] index_add_ perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) 2.350 -> 2.351 ( +0.04%) [ +0.26% +0.34% +0.00% / +0.04% +0.47% +0.51%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.356 ms / 100) 5.194 -> 5.221 ( +0.52%) [ +0.50% +0.25% +0.00% / +0.52% +0.87% +0.75%] index_select const : Elapsed 0.052 ms (5.220 ms / 100) 5.210 -> 5.207 ( -0.06%) [ +0.13% +0.00% +0.31% / -0.06% +0.71% +0.56%] index_select wrap : Elapsed 0.052 ms (5.217 ms / 100) 5.248 -> 5.240 ( -0.15%) [ +0.11% +0.08% +0.00% / -0.15% +0.57% +0.59%] index_select linear : Elapsed 0.053 ms (5.254 ms / 100) 5.212 -> 5.209 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.48% +0.56%] index_select reverse : Elapsed 0.052 ms (5.212 ms / 100) 5.188 -> 5.184 ( -0.08%) [ +0.17% +0.00% +0.19% / -0.08% +0.48% +0.71%] index_select skip64 : Elapsed 0.052 ms (5.197 ms / 100) 5.194 -> 5.183 ( -0.21%) [ +0.04% +0.10% +0.00% / -0.21% +0.69% +0.79%] index_select skip256 : Elapsed 0.052 ms (5.196 ms / 100) 5.205 -> 5.199 ( -0.12%) [ +0.00% +0.02% +0.08% / -0.12% +0.54% +0.67%] index_select spread : Elapsed 0.052 ms (5.205 ms / 100) 5.208 -> 5.202 ( -0.12%) [ +0.02% +0.08% +0.00% / -0.12% +0.56% +0.56%] index_select strided 3 : Elapsed 0.052 ms (5.209 ms / 100) 5.213 -> 5.212 ( -0.02%) [ +0.00% +0.17% +0.00% / -0.02% +0.90% +0.58%] index_select random : Elapsed 0.052 ms (5.213 ms / 100) 5.196 -> 5.198 ( +0.04%) [ +0.06% +0.10% +0.00% / +0.04% +0.87% +0.73%] index_select random_sorted : Elapsed 0.052 ms (5.199 ms / 100) out_shape = [40, 5, 16, 20] in_shape = [40, 5, 4, 20] idx_dim = 2 B = [40, 5, 16, 20] (stride (100, 1, 4000, 5)) A = [40, 5, 4, 20] (stride (1, 3200, 800, 40)) dim = 2 2.603 -> 2.608 ( +0.19%) [ +0.00% +0.00% +0.04% / +0.19% +0.27% +0.38%] index_add_ linear : Elapsed 0.026 ms (2.603 ms / 100) 2.541 -> 2.545 ( +0.16%) [ +0.04% +0.08% +0.00% / +0.16% +0.47% +0.39%] index_copy_ linear : Elapsed 0.025 ms (2.542 ms / 100) 2.603 -> 2.606 ( +0.12%) [ +0.00% +0.15% +0.00% / +0.12% +0.50% +0.19%] index_add_ reverse : Elapsed 0.026 ms (2.603 ms / 100) 2.540 -> 2.544 ( +0.16%) [ +0.12% +0.08% +0.00% / +0.16% +0.47% +0.35%] index_copy_ reverse : Elapsed 0.025 ms (2.543 ms / 100) 2.605 -> 2.605 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.38% +0.54%] index_add_ spread : Elapsed 0.026 ms (2.606 ms / 100) 2.542 -> 2.544 ( +0.08%) [ +0.16% +0.00% +0.04% / +0.08% +0.28% +0.31%] index_copy_ spread : Elapsed 0.025 ms (2.546 ms / 100) 2.596 -> 2.599 ( +0.12%) [ +0.31% +0.31% +0.00% / +0.12% +0.54% +0.50%] index_add_ strided 3 : Elapsed 0.026 ms (2.604 ms / 100) 2.538 -> 2.537 ( -0.04%) [ +0.04% +0.08% +0.00% / -0.04% +0.47% +0.39%] index_copy_ strided 3 : Elapsed 0.025 ms (2.539 ms / 100) 2.599 -> 2.599 ( +0.00%) [ +0.00% +0.19% +0.12% / +0.00% +0.42% +0.27%] index_add_ strided 5 : Elapsed 0.026 ms (2.599 ms / 100) 2.538 -> 2.539 ( +0.04%) [ +0.00% +0.20% +0.04% / +0.04% +0.39% +0.35%] index_copy_ strided 5 : Elapsed 0.025 ms (2.538 ms / 100) 2.607 -> 2.608 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.31% +0.31%] index_add_ strided 7 : Elapsed 0.026 ms (2.609 ms / 100) 2.544 -> 2.543 ( -0.04%) [ +0.24% +0.00% +0.00% / -0.04% +0.20% +0.28%] index_copy_ strided 7 : Elapsed 0.026 ms (2.550 ms / 100) 2.607 -> 2.604 ( -0.12%) [ +0.12% +0.04% +0.00% / -0.12% +0.08% -0.08%] index_add_ perm : Elapsed 0.026 ms (2.610 ms / 100) 2.541 -> 2.545 ( +0.16%) [ +0.35% +0.00% +0.12% / +0.31% +0.35% +0.16%] index_copy_ perm : Elapsed 0.026 ms (2.550 ms / 100) 2.605 -> 2.606 ( +0.04%) [ +0.00% +0.23% +0.12% / +0.12% +0.12% +0.04%] index_add_ perm_sorted : Elapsed 0.026 ms (2.605 ms / 100) 2.544 -> 2.547 ( +0.12%) [ +0.00% +0.16% +0.12% / +0.12% +0.16% +0.16%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.544 ms / 100) 5.950 -> 5.956 ( +0.10%) [ +0.15% +0.12% +0.00% / +0.10% +0.44% +0.55%] index_select const : Elapsed 0.060 ms (5.959 ms / 100) 5.935 -> 5.945 ( +0.17%) [ +0.00% +0.17% +0.15% / +0.17% +0.57% +0.52%] index_select wrap : Elapsed 0.059 ms (5.935 ms / 100) 5.956 -> 5.959 ( +0.05%) [ +0.00% +0.02% +0.02% / +0.05% +0.45% +0.45%] index_select linear : Elapsed 0.060 ms (5.956 ms / 100) 5.944 -> 5.950 ( +0.10%) [ +0.03% +0.13% +0.00% / +0.10% +0.37% +0.37%] index_select reverse : Elapsed 0.059 ms (5.946 ms / 100) 5.949 -> 5.953 ( +0.07%) [ +0.07% +0.10% +0.00% / +0.07% +0.34% +0.34%] index_select skip64 : Elapsed 0.060 ms (5.953 ms / 100) 5.952 -> 5.954 ( +0.03%) [ +0.02% +0.05% +0.00% / +0.03% +0.32% +0.42%] index_select skip256 : Elapsed 0.060 ms (5.953 ms / 100) 5.939 -> 5.942 ( +0.05%) [ +0.08% +0.12% +0.00% / +0.05% +0.29% +0.40%] index_select spread : Elapsed 0.059 ms (5.944 ms / 100) 5.959 -> 5.959 ( +0.00%) [ +0.00% +0.08% +0.05% / +0.00% +0.29% +0.25%] index_select strided 3 : Elapsed 0.060 ms (5.959 ms / 100) 5.939 -> 5.941 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.32% +0.40%] index_select random : Elapsed 0.059 ms (5.941 ms / 100) 5.953 -> 5.952 ( -0.02%) [ +0.05% +0.08% +0.00% / -0.02% +0.39% +0.42%] index_select random_sorted : Elapsed 0.060 ms (5.956 ms / 100) B = [40, 5, 16, 20] (stride (20, 800, 4000, 1)) A = [40, 5, 4, 20] (stride (1, 160, 40, 800)) dim = 2 2.610 -> 2.611 ( +0.04%) [ +0.00% +0.00% +0.15% / +0.04% +0.19% +0.08%] index_add_ linear : Elapsed 0.026 ms (2.610 ms / 100) 2.548 -> 2.550 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.16% +0.20% +0.08%] index_copy_ linear : Elapsed 0.026 ms (2.551 ms / 100) 2.609 -> 2.610 ( +0.04%) [ +0.15% +0.11% +0.00% / +0.04% +0.46% +0.08%] index_add_ reverse : Elapsed 0.026 ms (2.613 ms / 100) 2.548 -> 2.548 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.08% +0.04%] index_copy_ reverse : Elapsed 0.025 ms (2.548 ms / 100) 2.611 -> 2.606 ( -0.19%) [ +0.00% +0.00% +0.11% / +0.04% -0.08% -0.19%] index_add_ spread : Elapsed 0.026 ms (2.611 ms / 100) 2.547 -> 2.542 ( -0.20%) [ +0.00% +0.04% +0.24% / +0.08% -0.20% -0.12%] index_copy_ spread : Elapsed 0.025 ms (2.547 ms / 100) 2.606 -> 2.607 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.15% +0.15%] index_add_ strided 3 : Elapsed 0.026 ms (2.608 ms / 100) 2.543 -> 2.547 ( +0.16%) [ +0.16% +0.20% +0.00% / +0.16% +0.24% +0.16%] index_copy_ strided 3 : Elapsed 0.025 ms (2.547 ms / 100) 2.609 -> 2.611 ( +0.08%) [ +0.15% +0.19% +0.00% / +0.15% +0.08% +0.31%] index_add_ strided 5 : Elapsed 0.026 ms (2.613 ms / 100) 2.549 -> 2.546 ( -0.12%) [ +0.00% +0.04% +0.12% / +0.08% -0.12% +0.16%] index_copy_ strided 5 : Elapsed 0.025 ms (2.549 ms / 100) 2.608 -> 2.609 ( +0.04%) [ +0.23% +0.00% +0.12% / +0.15% +0.27% +0.04%] index_add_ strided 7 : Elapsed 0.026 ms (2.614 ms / 100) 2.548 -> 2.544 ( -0.16%) [ +0.00% +0.08% +0.08% / -0.12% +0.08% -0.16%] index_copy_ strided 7 : Elapsed 0.025 ms (2.548 ms / 100) 2.608 -> 2.610 ( +0.08%) [ +0.15% +0.12% +0.00% / +0.15% +0.15% +0.08%] index_add_ perm : Elapsed 0.026 ms (2.612 ms / 100) 2.546 -> 2.547 ( +0.04%) [ +0.00% +0.12% +0.16% / +0.20% +0.12% +0.04%] index_copy_ perm : Elapsed 0.025 ms (2.546 ms / 100) 2.610 -> 2.609 ( -0.04%) [ +0.08% +0.00% +0.04% / -0.04% +0.19% +0.19%] index_add_ perm_sorted : Elapsed 0.026 ms (2.612 ms / 100) 2.543 -> 2.544 ( +0.04%) [ +0.20% +0.08% +0.00% / +0.04% +0.35% +0.31%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.548 ms / 100) 5.921 -> 5.919 ( -0.03%) [ +0.10% +0.00% +0.03% / -0.03% +0.27% +0.39%] index_select const : Elapsed 0.059 ms (5.927 ms / 100) 5.926 -> 5.927 ( +0.02%) [ +0.03% +0.13% +0.00% / +0.02% +0.29% +0.34%] index_select wrap : Elapsed 0.059 ms (5.928 ms / 100) 5.949 -> 5.951 ( +0.03%) [ +0.02% +0.12% +0.00% / +0.03% +0.20% +0.24%] index_select linear : Elapsed 0.060 ms (5.950 ms / 100) 5.954 -> 5.956 ( +0.03%) [ +0.05% +0.10% +0.00% / +0.03% +0.30% +0.32%] index_select reverse : Elapsed 0.060 ms (5.957 ms / 100) 5.931 -> 5.928 ( -0.05%) [ +0.00% +0.03% +0.02% / -0.05% +0.37% +0.20%] index_select skip64 : Elapsed 0.059 ms (5.931 ms / 100) 5.925 -> 5.929 ( +0.07%) [ +0.10% +0.05% +0.00% / +0.07% +0.49% +0.37%] index_select skip256 : Elapsed 0.059 ms (5.931 ms / 100) 5.937 -> 5.935 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.25% +0.29%] index_select spread : Elapsed 0.059 ms (5.937 ms / 100) 5.922 -> 5.925 ( +0.05%) [ +0.00% +0.03% +0.03% / +0.05% +0.20% +0.24%] index_select strided 3 : Elapsed 0.059 ms (5.922 ms / 100) 5.927 -> 5.928 ( +0.02%) [ +0.03% +0.05% +0.00% / +0.02% +0.20% +0.29%] index_select random : Elapsed 0.059 ms (5.929 ms / 100) 5.922 -> 5.927 ( +0.08%) [ +0.10% +0.05% +0.00% / +0.08% +0.30% +0.24%] index_select random_sorted : Elapsed 0.059 ms (5.928 ms / 100) B = [40, 5, 16, 20] (stride (80, 16, 1, 3200)) A = [40, 5, 4, 20] (stride (400, 1, 100, 5)) dim = 2 2.356 -> 2.354 ( -0.08%) [ +0.08% +0.13% +0.00% / -0.08% +0.72% +0.42%] index_add_ linear : Elapsed 0.024 ms (2.358 ms / 100) 2.345 -> 2.349 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.51% +0.64%] index_copy_ linear : Elapsed 0.023 ms (2.345 ms / 100) 2.350 -> 2.347 ( -0.13%) [ +0.00% +0.04% +0.13% / -0.13% +0.51% +0.64%] index_add_ reverse : Elapsed 0.024 ms (2.350 ms / 100) 2.339 -> 2.341 ( +0.09%) [ +0.13% +0.00% +0.09% / +0.09% +0.56% +0.68%] index_copy_ reverse : Elapsed 0.023 ms (2.342 ms / 100) 2.395 -> 2.400 ( +0.21%) [ +0.04% +0.13% +0.00% / +0.21% +0.79% +0.54%] index_add_ spread : Elapsed 0.024 ms (2.396 ms / 100) 2.449 -> 2.451 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.49% +0.57%] index_copy_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.399 -> 2.403 ( +0.17%) [ +0.13% +0.13% +0.00% / +0.17% +0.71% +0.71%] index_add_ strided 3 : Elapsed 0.024 ms (2.402 ms / 100) 2.449 -> 2.452 ( +0.12%) [ +0.08% +0.12% +0.00% / +0.12% +0.65% +0.57%] index_copy_ strided 3 : Elapsed 0.025 ms (2.451 ms / 100) 2.405 -> 2.404 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.46% +0.25%] index_add_ strided 5 : Elapsed 0.024 ms (2.405 ms / 100) 2.450 -> 2.446 ( -0.16%) [ +0.00% +0.00% +0.08% / -0.16% +0.33% +0.45%] index_copy_ strided 5 : Elapsed 0.024 ms (2.450 ms / 100) 2.394 -> 2.403 ( +0.38%) [ +0.13% +0.00% +0.08% / +0.38% +0.63% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.397 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.49% +0.41%] index_copy_ strided 7 : Elapsed 0.024 ms (2.450 ms / 100) 2.397 -> 2.403 ( +0.25%) [ +0.04% +0.00% +0.13% / +0.25% +0.54% +0.25%] index_add_ perm : Elapsed 0.024 ms (2.398 ms / 100) 2.444 -> 2.441 ( -0.12%) [ +0.04% +0.04% +0.00% / -0.12% +0.49% +0.49%] index_copy_ perm : Elapsed 0.024 ms (2.445 ms / 100) 2.396 -> 2.400 ( +0.17%) [ +0.00% +0.17% +0.21% / +0.17% +0.42% +0.46%] index_add_ perm_sorted : Elapsed 0.024 ms (2.396 ms / 100) 2.448 -> 2.446 ( -0.08%) [ +0.04% +0.29% +0.00% / -0.08% +0.41% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.449 ms / 100) 4.981 -> 4.983 ( +0.04%) [ +0.16% +0.02% +0.00% / +0.04% +0.34% +0.32%] index_select const : Elapsed 0.050 ms (4.989 ms / 100) 4.997 -> 5.002 ( +0.10%) [ +0.26% +0.00% +0.08% / +0.10% +0.60% +0.60%] index_select wrap : Elapsed 0.050 ms (5.010 ms / 100) 5.004 -> 5.002 ( -0.04%) [ +0.18% +0.14% +0.00% / -0.04% +0.36% +0.30%] index_select linear : Elapsed 0.050 ms (5.013 ms / 100) 4.992 -> 4.994 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.34% +0.46%] index_select reverse : Elapsed 0.050 ms (4.994 ms / 100) 4.991 -> 4.992 ( +0.02%) [ +0.12% +0.14% +0.00% / +0.02% +0.22% +0.14%] index_select skip64 : Elapsed 0.050 ms (4.997 ms / 100) 4.988 -> 4.985 ( -0.06%) [ +0.10% +0.02% +0.00% / -0.06% +0.32% +0.34%] index_select skip256 : Elapsed 0.050 ms (4.993 ms / 100) 5.004 -> 5.007 ( +0.06%) [ +0.16% +0.00% +0.12% / +0.06% +0.30% +0.24%] index_select spread : Elapsed 0.050 ms (5.012 ms / 100) 5.009 -> 5.017 ( +0.16%) [ +0.04% +0.00% +0.02% / +0.16% +0.18% +0.30%] index_select strided 3 : Elapsed 0.050 ms (5.011 ms / 100) 5.000 -> 4.994 ( -0.12%) [ +0.06% +0.08% +0.00% / -0.12% +0.38% +0.36%] index_select random : Elapsed 0.050 ms (5.003 ms / 100) 4.988 -> 4.991 ( +0.06%) [ +0.06% +0.08% +0.00% / +0.06% +0.40% +0.44%] index_select random_sorted : Elapsed 0.050 ms (4.991 ms / 100) B = [40, 5, 16, 20] (stride (80, 16, 1, 3200)) A = [40, 5, 4, 20] (stride (100, 20, 4000, 1)) dim = 2 1.272 -> 1.268 ( -0.31%) [ +0.00% +0.16% +0.00% / +0.24% -0.31% +0.00%] index_add_ linear : Elapsed 0.013 ms (1.272 ms / 100) 1.237 -> 1.237 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.16% +0.24%] index_copy_ linear : Elapsed 0.012 ms (1.239 ms / 100) 1.262 -> 1.261 ( -0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.16% -0.08%] index_add_ reverse : Elapsed 0.013 ms (1.263 ms / 100) 1.233 -> 1.236 ( +0.24%) [ +0.00% +0.00% +0.00% / +0.24% +0.89% +0.57%] index_copy_ reverse : Elapsed 0.012 ms (1.233 ms / 100) 1.309 -> 1.309 ( +0.00%) [ +0.00% +0.00% +0.46% / +0.00% +0.31% +0.31%] index_add_ spread : Elapsed 0.013 ms (1.309 ms / 100) 1.301 -> 1.303 ( +0.15%) [ +0.08% +0.00% +0.15% / +0.15% +0.85% +0.46%] index_copy_ spread : Elapsed 0.013 ms (1.302 ms / 100) 1.308 -> 1.305 ( -0.23%) [ +0.38% +0.23% +0.00% / +0.15% +0.69% -0.23%] index_add_ strided 3 : Elapsed 0.013 ms (1.313 ms / 100) 1.297 -> 1.302 ( +0.39%) [ +0.00% +0.23% +0.69% / +0.39% +1.46% +1.23%] index_copy_ strided 3 : Elapsed 0.013 ms (1.297 ms / 100) 1.307 -> 1.311 ( +0.31%) [ +0.00% +0.31% +0.38% / +0.31% +0.69% +0.54%] index_add_ strided 5 : Elapsed 0.013 ms (1.307 ms / 100) 1.299 -> 1.304 ( +0.38%) [ +0.31% +0.00% +0.31% / +0.38% +1.23% +1.46%] index_copy_ strided 5 : Elapsed 0.013 ms (1.303 ms / 100) 1.308 -> 1.308 ( +0.00%) [ +0.00% +0.08% +0.23% / +0.00% +0.46% +0.61%] index_add_ strided 7 : Elapsed 0.013 ms (1.308 ms / 100) 1.296 -> 1.310 ( +1.08%) [ +0.00% +0.31% +0.46% / +1.08% +1.47% +1.47%] index_copy_ strided 7 : Elapsed 0.013 ms (1.296 ms / 100) 1.306 -> 1.307 ( +0.08%) [ +0.15% +0.00% +0.31% / +0.08% +0.08% +0.31%] index_add_ perm : Elapsed 0.013 ms (1.308 ms / 100) 1.299 -> 1.305 ( +0.46%) [ +0.00% +0.23% +0.23% / +0.46% +0.92% +1.08%] index_copy_ perm : Elapsed 0.013 ms (1.299 ms / 100) 1.313 -> 1.316 ( +0.23%) [ +0.08% +0.00% +0.08% / +0.23% +0.69% +0.69%] index_add_ perm_sorted : Elapsed 0.013 ms (1.314 ms / 100) 1.297 -> 1.303 ( +0.46%) [ +0.00% +0.23% +0.31% / +0.46% +1.00% +1.23%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.297 ms / 100) 2.033 -> 2.039 ( +0.30%) [ +0.00% +0.30% +0.05% / +0.30% +0.44% +0.59%] index_select const : Elapsed 0.020 ms (2.033 ms / 100) 2.091 -> 2.094 ( +0.14%) [ +0.48% +0.00% +0.33% / +0.14% +0.43% +0.38%] index_select wrap : Elapsed 0.021 ms (2.101 ms / 100) 2.094 -> 2.095 ( +0.05%) [ +0.00% +0.29% +0.29% / +0.24% +0.19% +0.05%] index_select linear : Elapsed 0.021 ms (2.094 ms / 100) 2.075 -> 2.074 ( -0.05%) [ +0.14% +0.00% +0.19% / -0.05% +0.43% +0.43%] index_select reverse : Elapsed 0.021 ms (2.078 ms / 100) 2.033 -> 2.035 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +0.69% +0.74%] index_select skip64 : Elapsed 0.020 ms (2.033 ms / 100) 2.033 -> 2.033 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.49% +0.69%] index_select skip256 : Elapsed 0.020 ms (2.033 ms / 100) 2.074 -> 2.077 ( +0.14%) [ +0.00% +0.19% +0.34% / +0.14% +0.39% +0.39%] index_select spread : Elapsed 0.021 ms (2.074 ms / 100) 2.092 -> 2.097 ( +0.24%) [ +0.00% +0.10% +0.29% / +0.24% +0.33% +0.33%] index_select strided 3 : Elapsed 0.021 ms (2.092 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.00% +0.14% +0.05% / +0.05% +0.14% +0.29%] index_select random : Elapsed 0.021 ms (2.089 ms / 100) 2.096 -> 2.101 ( +0.24%) [ +0.19% +0.00% +0.24% / +0.24% +0.33% +0.43%] index_select random_sorted : Elapsed 0.021 ms (2.100 ms / 100) B = [40, 5, 16, 20] (stride (5, 1, 200, 3200)) A = [40, 5, 4, 20] (stride (20, 4, 1, 800)) dim = 2 1.140 -> 1.134 ( -0.53%) [ +0.79% +0.09% +0.00% / +0.44% -0.53% -0.44%] index_add_ linear : Elapsed 0.011 ms (1.149 ms / 100) 1.136 -> 1.138 ( +0.18%) [ +0.26% +0.00% +0.09% / +0.18% +0.26% +0.62%] index_copy_ linear : Elapsed 0.011 ms (1.139 ms / 100) 1.135 -> 1.139 ( +0.35%) [ +0.26% +0.35% +0.00% / +0.35% +0.97% +0.44%] index_add_ reverse : Elapsed 0.011 ms (1.138 ms / 100) 1.132 -> 1.131 ( -0.09%) [ +0.71% +0.35% +0.00% / -0.09% +1.06% +0.97%] index_copy_ reverse : Elapsed 0.011 ms (1.140 ms / 100) 1.143 -> 1.145 ( +0.17%) [ +0.35% +0.00% +0.00% / +0.17% +0.35% +0.35%] index_add_ spread : Elapsed 0.011 ms (1.147 ms / 100) 1.140 -> 1.139 ( -0.09%) [ +0.00% +0.09% +0.35% / -0.09% +0.70% +0.70%] index_copy_ spread : Elapsed 0.011 ms (1.140 ms / 100) 1.139 -> 1.137 ( -0.18%) [ +0.00% +0.09% +0.53% / -0.18% +0.61% +0.70%] index_add_ strided 3 : Elapsed 0.011 ms (1.139 ms / 100) 1.136 -> 1.137 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +1.06% +0.79%] index_copy_ strided 3 : Elapsed 0.011 ms (1.137 ms / 100) 1.153 -> 1.146 ( -0.61%) [ +0.00% +0.52% +0.26% / +0.43% -0.61% -0.61%] index_add_ strided 5 : Elapsed 0.012 ms (1.153 ms / 100) 1.148 -> 1.141 ( -0.61%) [ +0.09% +0.00% +0.52% / +0.44% -0.26% -0.61%] index_copy_ strided 5 : Elapsed 0.011 ms (1.149 ms / 100) 1.142 -> 1.144 ( +0.18%) [ +0.09% +0.44% +0.00% / +0.18% +0.35% +0.53%] index_add_ strided 7 : Elapsed 0.011 ms (1.143 ms / 100) 1.144 -> 1.145 ( +0.09%) [ +0.52% +0.00% +0.09% / +0.09% +0.35% +0.09%] index_copy_ strided 7 : Elapsed 0.012 ms (1.150 ms / 100) 1.144 -> 1.146 ( +0.17%) [ +0.26% +0.00% +0.35% / +0.35% +0.17% +0.26%] index_add_ perm : Elapsed 0.011 ms (1.147 ms / 100) 1.140 -> 1.142 ( +0.18%) [ +0.00% +0.35% +0.35% / +0.18% +0.26% +0.44%] index_copy_ perm : Elapsed 0.011 ms (1.140 ms / 100) 1.145 -> 1.142 ( -0.26%) [ +0.00% +0.09% +0.44% / -0.26% +0.17% +0.17%] index_add_ perm_sorted : Elapsed 0.011 ms (1.145 ms / 100) 1.141 -> 1.138 ( -0.26%) [ +0.26% +0.35% +0.00% / -0.09% -0.26% -0.18%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.144 ms / 100) 2.096 -> 2.102 ( +0.29%) [ +0.38% +0.00% +0.05% / +0.29% +1.15% +1.19%] index_select const : Elapsed 0.021 ms (2.104 ms / 100) 2.096 -> 2.100 ( +0.19%) [ +0.43% +0.00% +0.43% / +0.19% +1.34% +1.43%] index_select wrap : Elapsed 0.021 ms (2.105 ms / 100) 2.096 -> 2.102 ( +0.29%) [ +0.29% +0.00% +0.33% / +0.29% +1.48% +1.57%] index_select linear : Elapsed 0.021 ms (2.102 ms / 100) 2.102 -> 2.106 ( +0.19%) [ +0.05% +0.14% +0.00% / +0.19% +1.71% +1.47%] index_select reverse : Elapsed 0.021 ms (2.103 ms / 100) 2.102 -> 2.101 ( -0.05%) [ +0.19% +0.14% +0.00% / -0.05% +0.71% +0.90%] index_select skip64 : Elapsed 0.021 ms (2.106 ms / 100) 2.098 -> 2.101 ( +0.14%) [ +0.00% +0.14% +0.38% / +0.14% +0.81% +0.86%] index_select skip256 : Elapsed 0.021 ms (2.098 ms / 100) 2.101 -> 2.104 ( +0.14%) [ +0.05% +0.19% +0.00% / +0.14% +1.09% +1.05%] index_select spread : Elapsed 0.021 ms (2.102 ms / 100) 2.099 -> 2.101 ( +0.10%) [ +0.19% +0.14% +0.00% / +0.10% +1.00% +1.05%] index_select strided 3 : Elapsed 0.021 ms (2.103 ms / 100) 2.101 -> 2.105 ( +0.19%) [ +0.38% +0.19% +0.00% / +0.19% +1.19% +1.05%] index_select random : Elapsed 0.021 ms (2.109 ms / 100) 2.094 -> 2.101 ( +0.33%) [ +0.00% +0.38% +0.33% / +0.33% +1.67% +1.05%] index_select random_sorted : Elapsed 0.021 ms (2.094 ms / 100) out_shape = [40, 5, 4, 16] in_shape = [40, 5, 4, 20] idx_dim = 3 B = [40, 5, 4, 16] (stride (320, 64, 16, 1)) A = [40, 5, 4, 20] (stride (20, 1, 5, 800)) dim = 3 3.206 -> 3.206 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.75% +0.75%] index_select const : Elapsed 0.032 ms (3.207 ms / 100) 3.204 -> 3.205 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.84% +0.81%] index_select wrap : Elapsed 0.032 ms (3.205 ms / 100) 3.203 -> 3.201 ( -0.06%) [ +0.03% +0.00% +0.00% / -0.06% +0.72% +0.75%] index_select linear : Elapsed 0.032 ms (3.204 ms / 100) 3.203 -> 3.205 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.94% +0.91%] index_select reverse : Elapsed 0.032 ms (3.204 ms / 100) 3.212 -> 3.212 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.81% +0.62%] index_select skip64 : Elapsed 0.032 ms (3.214 ms / 100) 3.209 -> 3.208 ( -0.03%) [ +0.00% +0.03% +0.06% / -0.03% +0.62% +0.87%] index_select skip256 : Elapsed 0.032 ms (3.209 ms / 100) 3.204 -> 3.205 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.75% +0.66%] index_select spread : Elapsed 0.032 ms (3.205 ms / 100) 3.205 -> 3.208 ( +0.09%) [ +0.06% +0.06% +0.00% / +0.09% +0.69% +0.72%] index_select strided 3 : Elapsed 0.032 ms (3.207 ms / 100) 3.204 -> 3.203 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.56% +0.56%] index_select strided 5 : Elapsed 0.032 ms (3.204 ms / 100) 3.208 -> 3.209 ( +0.03%) [ +0.03% +0.00% +0.06% / +0.03% +0.72% +0.75%] index_select strided 7 : Elapsed 0.032 ms (3.209 ms / 100) 3.205 -> 3.204 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.69% +0.66%] index_select strided 8 : Elapsed 0.032 ms (3.205 ms / 100) 3.203 -> 3.204 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.75% +0.69%] index_select strided 16 : Elapsed 0.032 ms (3.204 ms / 100) 3.213 -> 3.214 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.59% +0.59%] index_select random : Elapsed 0.032 ms (3.213 ms / 100) 3.205 -> 3.205 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.53% +0.56%] index_select random_sorted : Elapsed 0.032 ms (3.205 ms / 100) 3.208 -> 3.208 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.44% +0.47%] index_select perm : Elapsed 0.032 ms (3.209 ms / 100) 3.210 -> 3.210 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.59% +0.53%] index_select perm_sorted : Elapsed 0.032 ms (3.211 ms / 100) B = [40, 5, 4, 16] (stride (320, 64, 1, 4)) A = [40, 5, 4, 20] (stride (80, 3200, 1, 4)) dim = 3 3.631 -> 3.631 ( +0.00%) [ +0.11% +0.03% +0.00% / +0.00% +0.52% +0.50%] index_select const : Elapsed 0.036 ms (3.635 ms / 100) 3.619 -> 3.623 ( +0.11%) [ +0.14% +0.00% +0.11% / +0.11% +0.50% +0.47%] index_select wrap : Elapsed 0.036 ms (3.624 ms / 100) 3.614 -> 3.612 ( -0.06%) [ +0.03% +0.00% +0.00% / -0.06% +0.72% +0.53%] index_select linear : Elapsed 0.036 ms (3.615 ms / 100) 3.610 -> 3.614 ( +0.11%) [ +0.22% +0.03% +0.00% / +0.11% +0.66% +0.44%] index_select reverse : Elapsed 0.036 ms (3.618 ms / 100) 3.613 -> 3.613 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.50% +0.61%] index_select skip64 : Elapsed 0.036 ms (3.615 ms / 100) 3.631 -> 3.630 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.55% +0.63%] index_select skip256 : Elapsed 0.036 ms (3.631 ms / 100) 3.612 -> 3.617 ( +0.14%) [ +0.17% +0.11% +0.00% / +0.14% +0.61% +0.64%] index_select spread : Elapsed 0.036 ms (3.618 ms / 100) 3.610 -> 3.613 ( +0.08%) [ +0.06% +0.06% +0.00% / +0.08% +0.50% +0.50%] index_select strided 3 : Elapsed 0.036 ms (3.612 ms / 100) 3.612 -> 3.619 ( +0.19%) [ +0.17% +0.00% +0.22% / +0.19% +0.72% +0.72%] index_select strided 5 : Elapsed 0.036 ms (3.618 ms / 100) 3.614 -> 3.617 ( +0.08%) [ +0.03% +0.00% +0.00% / +0.08% +0.75% +0.69%] index_select strided 7 : Elapsed 0.036 ms (3.615 ms / 100) 3.613 -> 3.619 ( +0.17%) [ +0.22% +0.00% +0.08% / +0.17% +0.83% +0.72%] index_select strided 8 : Elapsed 0.036 ms (3.621 ms / 100) 3.613 -> 3.620 ( +0.19%) [ +0.03% +0.00% +0.00% / +0.19% +0.80% +0.72%] index_select strided 16 : Elapsed 0.036 ms (3.614 ms / 100) 3.607 -> 3.610 ( +0.08%) [ +0.03% +0.00% +0.17% / +0.08% +0.78% +0.72%] index_select random : Elapsed 0.036 ms (3.608 ms / 100) 3.617 -> 3.627 ( +0.28%) [ +0.00% +0.11% +0.22% / +0.28% +0.69% +0.86%] index_select random_sorted : Elapsed 0.036 ms (3.617 ms / 100) 3.613 -> 3.616 ( +0.08%) [ +0.00% +0.08% +0.11% / +0.08% +0.53% +0.58%] index_select perm : Elapsed 0.036 ms (3.613 ms / 100) 3.605 -> 3.608 ( +0.08%) [ +0.03% +0.06% +0.00% / +0.08% +0.61% +0.58%] index_select perm_sorted : Elapsed 0.036 ms (3.606 ms / 100) B = [40, 5, 4, 16] (stride (320, 16, 80, 1)) A = [40, 5, 4, 20] (stride (1, 3200, 40, 160)) dim = 3 4.126 -> 4.127 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.85% +0.87%] index_select const : Elapsed 0.041 ms (4.128 ms / 100) 4.101 -> 4.102 ( +0.02%) [ +0.02% +0.10% +0.00% / +0.02% +0.68% +0.66%] index_select wrap : Elapsed 0.041 ms (4.102 ms / 100) 4.117 -> 4.120 ( +0.07%) [ +0.12% +0.10% +0.00% / +0.07% +0.78% +0.85%] index_select linear : Elapsed 0.041 ms (4.122 ms / 100) 4.110 -> 4.111 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.75% +0.71%] index_select reverse : Elapsed 0.041 ms (4.112 ms / 100) 4.131 -> 4.131 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.70%] index_select skip64 : Elapsed 0.041 ms (4.131 ms / 100) 4.145 -> 4.145 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.60% +0.63%] index_select skip256 : Elapsed 0.041 ms (4.145 ms / 100) 4.117 -> 4.120 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.63%] index_select spread : Elapsed 0.041 ms (4.120 ms / 100) 4.170 -> 4.171 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.60% +0.60%] index_select strided 3 : Elapsed 0.042 ms (4.170 ms / 100) 4.103 -> 4.102 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.73% +0.73%] index_select strided 5 : Elapsed 0.041 ms (4.103 ms / 100) 4.120 -> 4.121 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.75% +0.70%] index_select strided 7 : Elapsed 0.041 ms (4.121 ms / 100) 4.144 -> 4.146 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.60% +0.56%] index_select strided 8 : Elapsed 0.041 ms (4.145 ms / 100) 4.134 -> 4.133 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.53% +0.56%] index_select strided 16 : Elapsed 0.041 ms (4.134 ms / 100) 4.122 -> 4.122 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.56% +0.63%] index_select random : Elapsed 0.041 ms (4.124 ms / 100) 4.128 -> 4.128 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.48% +0.41%] index_select random_sorted : Elapsed 0.041 ms (4.129 ms / 100) 4.138 -> 4.138 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.36% +0.36%] index_select perm : Elapsed 0.041 ms (4.139 ms / 100) 4.121 -> 4.118 ( -0.07%) [ +0.00% +0.02% +0.00% / -0.07% +0.49% +0.53%] index_select perm_sorted : Elapsed 0.041 ms (4.121 ms / 100) B = [40, 5, 4, 16] (stride (64, 2560, 16, 1)) A = [40, 5, 4, 20] (stride (400, 80, 1, 4)) dim = 3 3.630 -> 3.628 ( -0.06%) [ +0.00% +0.03% +0.03% / -0.06% +0.33% +0.36%] index_select const : Elapsed 0.036 ms (3.630 ms / 100) 3.609 -> 3.613 ( +0.11%) [ +0.11% +0.03% +0.00% / +0.11% +0.39% +0.42%] index_select wrap : Elapsed 0.036 ms (3.613 ms / 100) 3.612 -> 3.609 ( -0.08%) [ +0.06% +0.00% +0.03% / -0.08% +0.58% +0.58%] index_select linear : Elapsed 0.036 ms (3.614 ms / 100) 3.612 -> 3.611 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.44% +0.44%] index_select reverse : Elapsed 0.036 ms (3.613 ms / 100) 3.621 -> 3.623 ( +0.06%) [ +0.00% +0.00% +0.03% / +0.06% +0.47% +0.44%] index_select skip64 : Elapsed 0.036 ms (3.621 ms / 100) 3.643 -> 3.644 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.55% +0.55%] index_select skip256 : Elapsed 0.036 ms (3.645 ms / 100) 3.610 -> 3.613 ( +0.08%) [ +0.08% +0.00% +0.03% / +0.08% +0.53% +0.64%] index_select spread : Elapsed 0.036 ms (3.613 ms / 100) 3.613 -> 3.618 ( +0.14%) [ +0.03% +0.11% +0.00% / +0.14% +0.55% +0.64%] index_select strided 3 : Elapsed 0.036 ms (3.614 ms / 100) 3.608 -> 3.607 ( -0.03%) [ +0.14% +0.00% +0.06% / -0.03% +0.47% +0.47%] index_select strided 5 : Elapsed 0.036 ms (3.613 ms / 100) 3.600 -> 3.601 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.72% +0.72%] index_select strided 7 : Elapsed 0.036 ms (3.601 ms / 100) 3.612 -> 3.612 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.58% +0.72%] index_select strided 8 : Elapsed 0.036 ms (3.612 ms / 100) 3.602 -> 3.604 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.64% +0.69%] index_select strided 16 : Elapsed 0.036 ms (3.603 ms / 100) 3.613 -> 3.613 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +0.58%] index_select random : Elapsed 0.036 ms (3.613 ms / 100) 3.608 -> 3.609 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.64% +0.55%] index_select random_sorted : Elapsed 0.036 ms (3.608 ms / 100) 3.608 -> 3.607 ( -0.03%) [ +0.08% +0.00% +0.03% / -0.03% +0.69% +0.69%] index_select perm : Elapsed 0.036 ms (3.611 ms / 100) 3.606 -> 3.606 ( +0.00%) [ +0.00% +0.11% +0.03% / +0.00% +0.61% +0.72%] index_select perm_sorted : Elapsed 0.036 ms (3.606 ms / 100) B = [40, 5, 4, 16] (stride (64, 2560, 1, 4)) A = [40, 5, 4, 20] (stride (4, 3200, 1, 160)) dim = 3 3.861 -> 3.860 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.62% +0.65%] index_select const : Elapsed 0.039 ms (3.861 ms / 100) 3.874 -> 3.874 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +1.24% +1.34%] index_select wrap : Elapsed 0.039 ms (3.874 ms / 100) 3.872 -> 3.874 ( +0.05%) [ +0.08% +0.00% +0.08% / +0.05% +0.83% +0.75%] index_select linear : Elapsed 0.039 ms (3.875 ms / 100) 3.877 -> 3.877 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.11% +0.90%] index_select reverse : Elapsed 0.039 ms (3.877 ms / 100) 3.881 -> 3.877 ( -0.10%) [ +0.00% +0.05% +0.03% / -0.10% +0.70% +0.72%] index_select skip64 : Elapsed 0.039 ms (3.881 ms / 100) 3.873 -> 3.873 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +1.06% +1.37%] index_select skip256 : Elapsed 0.039 ms (3.874 ms / 100) 3.875 -> 3.875 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.14% +0.67%] index_select spread : Elapsed 0.039 ms (3.875 ms / 100) 3.867 -> 3.867 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.72% +0.70%] index_select strided 3 : Elapsed 0.039 ms (3.867 ms / 100) 3.873 -> 3.874 ( +0.03%) [ +0.08% +0.00% +0.08% / +0.03% +0.72% +0.65%] index_select strided 5 : Elapsed 0.039 ms (3.876 ms / 100) 3.872 -> 3.873 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +1.34% +1.16%] index_select strided 7 : Elapsed 0.039 ms (3.873 ms / 100) 3.876 -> 3.878 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.80% +0.98%] index_select strided 8 : Elapsed 0.039 ms (3.877 ms / 100) 3.870 -> 3.873 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.75% +0.88%] index_select strided 16 : Elapsed 0.039 ms (3.870 ms / 100) 3.886 -> 3.890 ( +0.10%) [ +0.05% +0.00% +0.08% / +0.10% +0.98% +0.93%] index_select random : Elapsed 0.039 ms (3.888 ms / 100) 3.876 -> 3.878 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.54% +0.77%] index_select random_sorted : Elapsed 0.039 ms (3.876 ms / 100) 3.882 -> 3.883 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +1.06% +0.72%] index_select perm : Elapsed 0.039 ms (3.883 ms / 100) 3.866 -> 3.866 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.52% +0.54%] index_select perm_sorted : Elapsed 0.039 ms (3.867 ms / 100) B = [40, 5, 4, 16] (stride (1, 2560, 40, 160)) A = [40, 5, 4, 20] (stride (4, 160, 1, 800)) dim = 3 3.892 -> 3.896 ( +0.10%) [ +0.05% +0.13% +0.00% / +0.10% +0.36% +0.62%] index_select const : Elapsed 0.039 ms (3.894 ms / 100) 3.910 -> 3.916 ( +0.15%) [ +0.05% +0.00% +0.26% / +0.15% +0.43% +0.38%] index_select wrap : Elapsed 0.039 ms (3.912 ms / 100) 3.899 -> 3.898 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.69% +0.56%] index_select linear : Elapsed 0.039 ms (3.899 ms / 100) 3.885 -> 3.885 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.98% +0.51%] index_select reverse : Elapsed 0.039 ms (3.886 ms / 100) 3.897 -> 3.898 ( +0.03%) [ +0.10% +0.00% +0.00% / +0.03% +0.56% +0.72%] index_select skip64 : Elapsed 0.039 ms (3.901 ms / 100) 3.899 -> 3.922 ( +0.59%) [ +0.00% +0.26% +0.03% / +0.59% +0.67% +0.59%] index_select skip256 : Elapsed 0.039 ms (3.899 ms / 100) 3.876 -> 3.878 ( +0.05%) [ +0.13% +0.08% +0.00% / +0.05% +0.64% +0.62%] index_select spread : Elapsed 0.039 ms (3.881 ms / 100) 3.883 -> 3.884 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.95% +0.52%] index_select strided 3 : Elapsed 0.039 ms (3.884 ms / 100) 3.898 -> 3.909 ( +0.28%) [ +0.08% +0.00% +0.03% / +0.28% +0.64% +0.62%] index_select strided 5 : Elapsed 0.039 ms (3.901 ms / 100) 3.875 -> 3.875 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.72% +0.67%] index_select strided 7 : Elapsed 0.039 ms (3.876 ms / 100) 3.869 -> 3.870 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.70% +0.70%] index_select strided 8 : Elapsed 0.039 ms (3.871 ms / 100) 3.873 -> 3.873 ( +0.00%) [ +0.03% +0.00% +0.10% / +0.00% +0.83% +0.77%] index_select strided 16 : Elapsed 0.039 ms (3.874 ms / 100) 3.879 -> 3.881 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.05% +1.06% +1.13%] index_select random : Elapsed 0.039 ms (3.883 ms / 100) 3.886 -> 3.888 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +1.00% +0.54%] index_select random_sorted : Elapsed 0.039 ms (3.887 ms / 100) 3.905 -> 3.916 ( +0.28%) [ +0.15% +0.00% +0.41% / +0.28% +0.64% +0.64%] index_select perm : Elapsed 0.039 ms (3.911 ms / 100) 3.880 -> 3.881 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +1.16% +1.19%] index_select perm_sorted : Elapsed 0.039 ms (3.881 ms / 100) B = [40, 5, 4, 16] (stride (80, 1, 3200, 5)) A = [40, 5, 4, 20] (stride (400, 80, 1, 4)) dim = 3 3.611 -> 3.612 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.72% +0.72%] index_select const : Elapsed 0.036 ms (3.612 ms / 100) 3.617 -> 3.620 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.94% +0.94%] index_select wrap : Elapsed 0.036 ms (3.618 ms / 100) 3.606 -> 3.604 ( -0.06%) [ +0.03% +0.08% +0.00% / -0.06% +0.75% +0.72%] index_select linear : Elapsed 0.036 ms (3.607 ms / 100) 3.615 -> 3.616 ( +0.03%) [ +0.11% +0.17% +0.00% / +0.03% +0.91% +0.80%] index_select reverse : Elapsed 0.036 ms (3.619 ms / 100) 3.613 -> 3.617 ( +0.11%) [ +0.14% +0.03% +0.00% / +0.11% +0.61% +0.75%] index_select skip64 : Elapsed 0.036 ms (3.618 ms / 100) 3.619 -> 3.622 ( +0.08%) [ +0.08% +0.03% +0.00% / +0.08% +0.64% +0.64%] index_select skip256 : Elapsed 0.036 ms (3.622 ms / 100) 3.616 -> 3.620 ( +0.11%) [ +0.03% +0.08% +0.00% / +0.11% +0.72% +0.61%] index_select spread : Elapsed 0.036 ms (3.617 ms / 100) 3.604 -> 3.602 ( -0.06%) [ +0.06% +0.00% +0.06% / -0.06% +0.75% +0.75%] index_select strided 3 : Elapsed 0.036 ms (3.606 ms / 100) 3.600 -> 3.599 ( -0.03%) [ +0.14% +0.08% +0.00% / -0.03% +0.69% +0.78%] index_select strided 5 : Elapsed 0.036 ms (3.605 ms / 100) 3.608 -> 3.610 ( +0.06%) [ +0.00% +0.11% +0.11% / +0.06% +0.67% +0.67%] index_select strided 7 : Elapsed 0.036 ms (3.608 ms / 100) 3.590 -> 3.591 ( +0.03%) [ +0.00% +0.00% +0.08% / +0.03% +0.67% +0.70%] index_select strided 8 : Elapsed 0.036 ms (3.590 ms / 100) 3.606 -> 3.605 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.61% +0.61%] index_select strided 16 : Elapsed 0.036 ms (3.607 ms / 100) 3.603 -> 3.606 ( +0.08%) [ +0.17% +0.00% +0.08% / +0.08% +0.67% +0.61%] index_select random : Elapsed 0.036 ms (3.609 ms / 100) 3.605 -> 3.603 ( -0.06%) [ +0.19% +0.08% +0.00% / -0.06% +0.58% +0.61%] index_select random_sorted : Elapsed 0.036 ms (3.612 ms / 100) 3.612 -> 3.613 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.58% +0.58%] index_select perm : Elapsed 0.036 ms (3.613 ms / 100) 3.613 -> 3.612 ( -0.03%) [ +0.08% +0.00% +0.11% / -0.03% +0.53% +0.55%] index_select perm_sorted : Elapsed 0.036 ms (3.616 ms / 100) B = [40, 5, 4, 16] (stride (5, 1, 3200, 200)) dim = 3 fill_cnt = 20 1.529 -> 1.517 ( -0.78%) [ +0.33% +0.00% +0.13% / -0.65% -0.78% -0.52%] index_fill_ const : Elapsed 0.015 ms (1.534 ms / 100) 1.520 -> 1.497 ( -1.51%) [ +0.00% +0.07% +0.07% / -1.51% -0.72% -1.05%] index_fill_ linear : Elapsed 0.015 ms (1.520 ms / 100) 1.521 -> 1.504 ( -1.12%) [ +0.13% +0.07% +0.00% / -1.12% -0.92% -0.92%] index_fill_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.535 -> 1.509 ( -1.69%) [ +0.13% +0.00% +0.52% / -0.91% -1.50% -1.69%] index_fill_ skip64 : Elapsed 0.015 ms (1.537 ms / 100) 1.538 -> 1.515 ( -1.50%) [ +0.07% +0.07% +0.00% / -1.04% -1.30% -1.50%] index_fill_ skip256 : Elapsed 0.015 ms (1.539 ms / 100) 1.520 -> 1.500 ( -1.32%) [ +0.00% +0.20% +0.13% / -1.32% -1.12% -1.32%] index_fill_ spread : Elapsed 0.015 ms (1.520 ms / 100) 1.513 -> 1.498 ( -0.99%) [ +0.40% +0.00% +0.33% / -0.73% -0.59% -0.99%] index_fill_ strided 3 : Elapsed 0.015 ms (1.519 ms / 100) 1.518 -> 1.500 ( -1.19%) [ +0.07% +0.13% +0.00% / -1.19% -0.99% -1.05%] index_fill_ strided 5 : Elapsed 0.015 ms (1.519 ms / 100) 1.514 -> 1.506 ( -0.53%) [ +0.00% +0.20% +0.40% / -0.53% -0.53% -0.26%] index_fill_ strided 7 : Elapsed 0.015 ms (1.514 ms / 100) 1.518 -> 1.497 ( -1.38%) [ +0.00% +0.20% +0.07% / -0.99% -1.25% -1.38%] index_fill_ strided 8 : Elapsed 0.015 ms (1.518 ms / 100) 1.516 -> 1.501 ( -0.99%) [ +0.00% +0.26% +0.00% / -0.79% -0.86% -0.99%] index_fill_ random : Elapsed 0.015 ms (1.516 ms / 100) 1.515 -> 1.498 ( -1.12%) [ +0.00% +0.20% +0.00% / -1.06% -1.12% -0.79%] index_fill_ random_sorted : Elapsed 0.015 ms (1.515 ms / 100) B = [40, 5, 4, 16] (stride (5, 1, 3200, 200)) A = [40, 5, 4, 20] (stride (4, 160, 1, 800)) dim = 3 3.569 -> 3.570 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.45% +0.42%] index_select const : Elapsed 0.036 ms (3.569 ms / 100) 3.579 -> 3.579 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_select wrap : Elapsed 0.036 ms (3.580 ms / 100) 3.582 -> 3.586 ( +0.11%) [ +0.03% +0.11% +0.00% / +0.11% +0.59% +0.56%] index_select linear : Elapsed 0.036 ms (3.583 ms / 100) 3.559 -> 3.561 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.62% +0.62%] index_select reverse : Elapsed 0.036 ms (3.561 ms / 100) 3.589 -> 3.590 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.53% +0.64%] index_select skip64 : Elapsed 0.036 ms (3.592 ms / 100) 3.585 -> 3.584 ( -0.03%) [ +0.03% +0.06% +0.00% / -0.03% +0.39% +0.45%] index_select skip256 : Elapsed 0.036 ms (3.586 ms / 100) 3.565 -> 3.565 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.48% +0.42%] index_select spread : Elapsed 0.036 ms (3.565 ms / 100) 3.559 -> 3.560 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.56% +0.62%] index_select strided 3 : Elapsed 0.036 ms (3.560 ms / 100) 3.570 -> 3.572 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.53% +0.59%] index_select strided 5 : Elapsed 0.036 ms (3.572 ms / 100) 3.555 -> 3.555 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.70% +0.70%] index_select strided 7 : Elapsed 0.036 ms (3.556 ms / 100) 3.548 -> 3.548 ( +0.00%) [ +0.03% +0.00% +0.08% / +0.00% +0.59% +0.59%] index_select strided 8 : Elapsed 0.035 ms (3.549 ms / 100) 3.562 -> 3.563 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.81% +0.81%] index_select strided 16 : Elapsed 0.036 ms (3.565 ms / 100) 3.558 -> 3.558 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.67% +0.65%] index_select random : Elapsed 0.036 ms (3.559 ms / 100) 3.571 -> 3.571 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.81% +0.73%] index_select random_sorted : Elapsed 0.036 ms (3.572 ms / 100) 3.581 -> 3.581 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.95% +0.87%] index_select perm : Elapsed 0.036 ms (3.581 ms / 100) 3.558 -> 3.559 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.62% +0.62%] index_select perm_sorted : Elapsed 0.036 ms (3.559 ms / 100) B = [40, 5, 4, 16] (stride (1, 160, 40, 800)) A = [40, 5, 4, 20] (stride (1, 3200, 800, 40)) dim = 3 1.428 -> 1.429 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +1.26% +1.26%] index_select const : Elapsed 0.014 ms (1.429 ms / 100) 1.451 -> 1.452 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.96% +1.24%] index_select wrap : Elapsed 0.015 ms (1.452 ms / 100) 1.440 -> 1.442 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +1.04% +0.97%] index_select linear : Elapsed 0.014 ms (1.441 ms / 100) 1.448 -> 1.447 ( -0.07%) [ +0.00% +0.00% +0.14% / -0.07% +0.69% +0.90%] index_select reverse : Elapsed 0.014 ms (1.448 ms / 100) 1.451 -> 1.452 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.83% +0.83%] index_select skip64 : Elapsed 0.015 ms (1.452 ms / 100) 1.430 -> 1.430 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.91% +1.05%] index_select skip256 : Elapsed 0.014 ms (1.431 ms / 100) 1.451 -> 1.452 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.76% +0.76%] index_select spread : Elapsed 0.015 ms (1.451 ms / 100) 1.447 -> 1.449 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.97% +0.83%] index_select strided 3 : Elapsed 0.014 ms (1.449 ms / 100) 1.447 -> 1.444 ( -0.21%) [ +0.00% +0.00% +0.00% / -0.21% +0.41% +0.35%] index_select strided 5 : Elapsed 0.014 ms (1.447 ms / 100) 1.453 -> 1.456 ( +0.21%) [ +0.07% +0.00% +0.00% / +0.21% +0.69% +0.62%] index_select strided 7 : Elapsed 0.015 ms (1.454 ms / 100) 1.446 -> 1.448 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.55% +0.55%] index_select strided 8 : Elapsed 0.014 ms (1.447 ms / 100) 1.432 -> 1.433 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.63% +0.70%] index_select strided 16 : Elapsed 0.014 ms (1.434 ms / 100) 1.442 -> 1.443 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.42% +0.62%] index_select random : Elapsed 0.014 ms (1.443 ms / 100) 1.435 -> 1.434 ( -0.07%) [ +0.07% +0.14% +0.00% / -0.07% +0.28% +0.28%] index_select random_sorted : Elapsed 0.014 ms (1.436 ms / 100) 1.449 -> 1.449 ( +0.00%) [ +0.14% +0.21% +0.00% / +0.00% +0.35% +0.62%] index_select perm : Elapsed 0.015 ms (1.451 ms / 100) 1.446 -> 1.449 ( +0.21%) [ +0.00% +0.14% +0.00% / +0.21% +0.69% +0.48%] index_select perm_sorted : Elapsed 0.014 ms (1.446 ms / 100) B = [40, 5, 4, 16] (stride (1, 160, 40, 800)) A = [40, 5, 4, 20] (stride (1, 160, 40, 800)) dim = 3 1.423 -> 1.424 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.07% +0.21%] index_select const : Elapsed 0.014 ms (1.424 ms / 100) 1.456 -> 1.459 ( +0.21%) [ +0.21% +0.14% +0.00% / +0.21% +0.55% +0.62%] index_select wrap : Elapsed 0.015 ms (1.459 ms / 100) 1.448 -> 1.448 ( +0.00%) [ +0.21% +0.14% +0.00% / +0.00% +0.35% +0.35%] index_select linear : Elapsed 0.015 ms (1.451 ms / 100) 1.448 -> 1.447 ( -0.07%) [ +0.14% +0.14% +0.00% / +0.21% +0.00% -0.07%] index_select reverse : Elapsed 0.014 ms (1.450 ms / 100) 1.434 -> 1.430 ( -0.28%) [ +0.00% +0.07% +0.07% / -0.28% +0.35% +0.21%] index_select skip64 : Elapsed 0.014 ms (1.434 ms / 100) 1.416 -> 1.414 ( -0.14%) [ +0.35% +0.00% +0.14% / -0.14% +0.56% +0.49%] index_select skip256 : Elapsed 0.014 ms (1.421 ms / 100) 1.454 -> 1.455 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.28% +0.41%] index_select spread : Elapsed 0.015 ms (1.456 ms / 100) 1.444 -> 1.445 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.21% +0.28%] index_select strided 3 : Elapsed 0.014 ms (1.446 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.28% +0.07%] index_select strided 5 : Elapsed 0.014 ms (1.422 ms / 100) 1.440 -> 1.441 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.69% +0.63%] index_select strided 7 : Elapsed 0.014 ms (1.441 ms / 100) 1.442 -> 1.443 ( +0.07%) [ +0.35% +0.28% +0.00% / +0.07% +0.69% +0.76%] index_select strided 8 : Elapsed 0.014 ms (1.447 ms / 100) 1.429 -> 1.430 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.98% +0.91%] index_select strided 16 : Elapsed 0.014 ms (1.429 ms / 100) 1.432 -> 1.432 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.70% +0.70%] index_select random : Elapsed 0.014 ms (1.433 ms / 100) 1.434 -> 1.435 ( +0.07%) [ +0.00% +0.21% +0.14% / +0.07% +0.42% +0.56%] index_select random_sorted : Elapsed 0.014 ms (1.434 ms / 100) 1.441 -> 1.441 ( +0.00%) [ +0.07% +0.00% +0.14% / +0.00% +0.69% +0.56%] index_select perm : Elapsed 0.014 ms (1.442 ms / 100) 1.445 -> 1.447 ( +0.14%) [ +0.00% +0.07% +0.07% / +0.14% +0.69% +0.76%] index_select perm_sorted : Elapsed 0.014 ms (1.445 ms / 100) B = [40, 5, 4, 16] (stride (5, 1, 200, 800)) A = [40, 5, 4, 20] (stride (1, 3200, 800, 40)) dim = 3 1.366 -> 1.368 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +1.32% +1.32%] index_select const : Elapsed 0.014 ms (1.368 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.07% +0.22% +0.00% / +0.00% +0.94% +1.16%] index_select wrap : Elapsed 0.014 ms (1.385 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +1.02% +1.02%] index_select linear : Elapsed 0.014 ms (1.370 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.07% +0.00% +0.14% / +0.14% +0.87% +0.72%] index_select reverse : Elapsed 0.014 ms (1.382 ms / 100) 1.366 -> 1.367 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.95% +1.02%] index_select skip64 : Elapsed 0.014 ms (1.366 ms / 100) 1.372 -> 1.372 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.87% +0.73%] index_select skip256 : Elapsed 0.014 ms (1.373 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.80% +0.65%] index_select spread : Elapsed 0.014 ms (1.378 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.80% +0.73%] index_select strided 3 : Elapsed 0.014 ms (1.371 ms / 100) 1.365 -> 1.366 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +1.25% +1.25%] index_select strided 5 : Elapsed 0.014 ms (1.366 ms / 100) 1.380 -> 1.378 ( -0.14%) [ +0.00% +0.00% +0.07% / -0.14% +0.65% +0.58%] index_select strided 7 : Elapsed 0.014 ms (1.380 ms / 100) 1.379 -> 1.378 ( -0.07%) [ +0.00% +0.29% +0.22% / -0.07% +0.36% +0.29%] index_select strided 8 : Elapsed 0.014 ms (1.379 ms / 100) 1.372 -> 1.372 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.80% +0.87%] index_select strided 16 : Elapsed 0.014 ms (1.373 ms / 100) 1.384 -> 1.385 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.43% +0.51%] index_select random : Elapsed 0.014 ms (1.385 ms / 100) 1.383 -> 1.383 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.29% +0.36%] index_select random_sorted : Elapsed 0.014 ms (1.384 ms / 100) 1.389 -> 1.391 ( +0.14%) [ +0.43% +0.00% +0.00% / +0.43% +0.22% +0.14%] index_select perm : Elapsed 0.014 ms (1.395 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.22% +0.07% +0.00% / +0.15% +0.36% +0.44%] index_select perm_sorted : Elapsed 0.014 ms (1.377 ms / 100) B = [40, 5, 4, 16] (stride (1, 40, 200, 800)) A = [40, 5, 4, 20] (stride (400, 80, 1, 4)) dim = 3 3.631 -> 3.633 ( +0.06%) [ +0.00% +0.03% +0.03% / +0.06% +0.50% +0.50%] index_select const : Elapsed 0.036 ms (3.631 ms / 100) 3.612 -> 3.619 ( +0.19%) [ +0.08% +0.03% +0.00% / +0.19% +0.50% +0.44%] index_select wrap : Elapsed 0.036 ms (3.615 ms / 100) 3.612 -> 3.615 ( +0.08%) [ +0.06% +0.00% +0.03% / +0.08% +0.58% +0.61%] index_select linear : Elapsed 0.036 ms (3.614 ms / 100) 3.613 -> 3.612 ( -0.03%) [ +0.08% +0.00% +0.08% / -0.03% +0.42% +0.53%] index_select reverse : Elapsed 0.036 ms (3.616 ms / 100) 3.624 -> 3.624 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.44%] index_select skip64 : Elapsed 0.036 ms (3.624 ms / 100) 3.643 -> 3.645 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.60% +0.58%] index_select skip256 : Elapsed 0.036 ms (3.645 ms / 100) 3.613 -> 3.616 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.55% +0.55%] index_select spread : Elapsed 0.036 ms (3.613 ms / 100) 3.621 -> 3.622 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.58% +0.61%] index_select strided 3 : Elapsed 0.036 ms (3.623 ms / 100) 3.611 -> 3.613 ( +0.06%) [ +0.06% +0.00% +0.03% / +0.06% +0.42% +0.42%] index_select strided 5 : Elapsed 0.036 ms (3.613 ms / 100) 3.604 -> 3.605 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.78% +0.72%] index_select strided 7 : Elapsed 0.036 ms (3.605 ms / 100) 3.613 -> 3.613 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.55% +0.55%] index_select strided 8 : Elapsed 0.036 ms (3.614 ms / 100) 3.605 -> 3.605 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.69% +0.75%] index_select strided 16 : Elapsed 0.036 ms (3.605 ms / 100) 3.614 -> 3.614 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.83% +0.89%] index_select random : Elapsed 0.036 ms (3.614 ms / 100) 3.605 -> 3.607 ( +0.06%) [ +0.00% +0.06% +0.03% / +0.06% +0.78% +0.75%] index_select random_sorted : Elapsed 0.036 ms (3.605 ms / 100) 3.599 -> 3.602 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.75% +0.72%] index_select perm : Elapsed 0.036 ms (3.599 ms / 100) 3.608 -> 3.613 ( +0.14%) [ +0.14% +0.11% +0.00% / +0.14% +0.67% +0.72%] index_select perm_sorted : Elapsed 0.036 ms (3.613 ms / 100) out_shape = [16, 5, 20, 4] in_shape = [40, 5, 20, 4] idx_dim = 0 B = [16, 5, 20, 4] (stride (400, 20, 1, 100)) A = [40, 5, 20, 4] (stride (20, 800, 1, 4000)) dim = 0 3.563 -> 3.565 ( +0.06%) [ +0.08% +0.00% +0.03% / +0.06% +0.67% +0.65%] index_select const : Elapsed 0.036 ms (3.566 ms / 100) 3.609 -> 3.608 ( -0.03%) [ +0.03% +0.11% +0.00% / -0.03% +0.42% +0.39%] index_select wrap : Elapsed 0.036 ms (3.610 ms / 100) 3.586 -> 3.588 ( +0.06%) [ +0.00% +0.11% +0.06% / +0.06% +0.47% +0.67%] index_select linear : Elapsed 0.036 ms (3.586 ms / 100) 3.573 -> 3.576 ( +0.08%) [ +0.14% +0.14% +0.00% / +0.08% +0.53% +0.64%] index_select reverse : Elapsed 0.036 ms (3.578 ms / 100) 3.596 -> 3.595 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.39% +0.67%] index_select skip64 : Elapsed 0.036 ms (3.596 ms / 100) 3.569 -> 3.569 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.36% +0.34%] index_select skip256 : Elapsed 0.036 ms (3.570 ms / 100) 3.575 -> 3.571 ( -0.11%) [ +0.00% +0.06% +0.17% / -0.11% +0.53% +0.42%] index_select spread : Elapsed 0.036 ms (3.575 ms / 100) 3.588 -> 3.595 ( +0.20%) [ +0.22% +0.00% +0.11% / +0.20% +0.28% +0.28%] index_select strided 3 : Elapsed 0.036 ms (3.596 ms / 100) 3.575 -> 3.573 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.34% +0.25%] index_select strided 5 : Elapsed 0.036 ms (3.575 ms / 100) 3.573 -> 3.572 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.53% +0.50%] index_select strided 7 : Elapsed 0.036 ms (3.573 ms / 100) 3.582 -> 3.588 ( +0.17%) [ +0.00% +0.11% +0.00% / +0.17% +0.61% +0.39%] index_select strided 8 : Elapsed 0.036 ms (3.582 ms / 100) 3.567 -> 3.566 ( -0.03%) [ +0.06% +0.03% +0.00% / -0.03% +0.42% +0.45%] index_select strided 16 : Elapsed 0.036 ms (3.569 ms / 100) 3.584 -> 3.585 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.53% +0.45%] index_select random : Elapsed 0.036 ms (3.587 ms / 100) 3.561 -> 3.563 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.51% +0.51%] index_select random_sorted : Elapsed 0.036 ms (3.561 ms / 100) 3.583 -> 3.589 ( +0.17%) [ +0.03% +0.20% +0.00% / +0.17% +0.47% +0.50%] index_select perm : Elapsed 0.036 ms (3.584 ms / 100) 3.586 -> 3.591 ( +0.14%) [ +0.00% +0.11% +0.06% / +0.14% +0.42% +0.50%] index_select perm_sorted : Elapsed 0.036 ms (3.586 ms / 100) B = [16, 5, 20, 4] (stride (80, 1280, 1, 20)) A = [40, 5, 20, 4] (stride (100, 20, 1, 4000)) dim = 0 3.543 -> 3.542 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.62% +0.59%] index_select const : Elapsed 0.035 ms (3.543 ms / 100) 3.534 -> 3.540 ( +0.17%) [ +0.00% +0.11% +0.17% / +0.17% +0.74% +0.85%] index_select wrap : Elapsed 0.035 ms (3.534 ms / 100) 3.555 -> 3.560 ( +0.14%) [ +0.06% +0.06% +0.00% / +0.14% +0.79% +0.76%] index_select linear : Elapsed 0.036 ms (3.557 ms / 100) 3.543 -> 3.553 ( +0.28%) [ +0.34% +0.00% +0.06% / +0.28% +1.30% +0.99%] index_select reverse : Elapsed 0.036 ms (3.555 ms / 100) 3.531 -> 3.532 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.82% +0.85%] index_select skip64 : Elapsed 0.035 ms (3.532 ms / 100) 3.539 -> 3.539 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.82% +0.85%] index_select skip256 : Elapsed 0.035 ms (3.540 ms / 100) 3.532 -> 3.533 ( +0.03%) [ +0.06% +0.00% +0.00% / +0.03% +0.82% +0.76%] index_select spread : Elapsed 0.035 ms (3.534 ms / 100) 3.549 -> 3.557 ( +0.23%) [ +0.17% +0.00% +0.00% / +0.23% +0.90% +0.70%] index_select strided 3 : Elapsed 0.036 ms (3.555 ms / 100) 3.552 -> 3.551 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.70% +0.59%] index_select strided 5 : Elapsed 0.036 ms (3.552 ms / 100) 3.537 -> 3.541 ( +0.11%) [ +0.08% +0.03% +0.00% / +0.11% +0.82% +0.82%] index_select strided 7 : Elapsed 0.035 ms (3.540 ms / 100) 3.560 -> 3.560 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.79% +0.76%] index_select strided 8 : Elapsed 0.036 ms (3.560 ms / 100) 3.539 -> 3.538 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.85% +0.88%] index_select strided 16 : Elapsed 0.035 ms (3.539 ms / 100) 3.551 -> 3.554 ( +0.08%) [ +0.03% +0.00% +0.14% / +0.08% +0.79% +0.76%] index_select random : Elapsed 0.036 ms (3.552 ms / 100) 3.536 -> 3.540 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.85% +0.85%] index_select random_sorted : Elapsed 0.035 ms (3.536 ms / 100) 3.564 -> 3.565 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.79% +0.76%] index_select perm : Elapsed 0.036 ms (3.564 ms / 100) 3.543 -> 3.546 ( +0.08%) [ +0.06% +0.00% +0.03% / +0.08% +0.79% +0.73%] index_select perm_sorted : Elapsed 0.035 ms (3.545 ms / 100) B = [16, 5, 20, 4] (stride (20, 1280, 1, 320)) A = [40, 5, 20, 4] (stride (20, 1, 800, 5)) dim = 0 3.944 -> 3.948 ( +0.10%) [ +0.13% +0.00% +0.08% / +0.10% +0.66% +0.76%] index_select const : Elapsed 0.039 ms (3.949 ms / 100) 4.011 -> 4.011 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.47% +0.42%] index_select wrap : Elapsed 0.040 ms (4.011 ms / 100) 4.004 -> 4.004 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.00% +0.50% +0.42%] index_select linear : Elapsed 0.040 ms (4.004 ms / 100) 3.996 -> 4.000 ( +0.10%) [ +0.00% +0.03% +0.10% / +0.10% +0.53% +0.48%] index_select reverse : Elapsed 0.040 ms (3.996 ms / 100) 3.965 -> 3.966 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.58% +0.61%] index_select skip64 : Elapsed 0.040 ms (3.968 ms / 100) 3.953 -> 3.952 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.46% +0.38%] index_select skip256 : Elapsed 0.040 ms (3.954 ms / 100) 4.000 -> 3.999 ( -0.02%) [ +0.00% +0.17% +0.10% / -0.02% +0.50% +0.45%] index_select spread : Elapsed 0.040 ms (4.000 ms / 100) 4.020 -> 4.022 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.45% +0.50%] index_select strided 3 : Elapsed 0.040 ms (4.022 ms / 100) 3.986 -> 3.989 ( +0.08%) [ +0.00% +0.05% +0.08% / +0.08% +0.33% +0.35%] index_select strided 5 : Elapsed 0.040 ms (3.986 ms / 100) 4.000 -> 4.006 ( +0.15%) [ +0.10% +0.00% +0.08% / +0.15% +0.43% +0.32%] index_select strided 7 : Elapsed 0.040 ms (4.004 ms / 100) 3.964 -> 3.964 ( +0.00%) [ +0.03% +0.00% +0.10% / +0.00% +0.53% +0.50%] index_select strided 8 : Elapsed 0.040 ms (3.965 ms / 100) 3.950 -> 3.948 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.41% +0.46%] index_select strided 16 : Elapsed 0.039 ms (3.950 ms / 100) 3.994 -> 4.004 ( +0.25%) [ +0.03% +0.25% +0.00% / +0.25% +0.53% +0.50%] index_select random : Elapsed 0.040 ms (3.995 ms / 100) 4.004 -> 4.003 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.70% +0.52%] index_select random_sorted : Elapsed 0.040 ms (4.004 ms / 100) 4.008 -> 4.012 ( +0.10%) [ +0.07% +0.02% +0.00% / +0.10% +0.55% +0.47%] index_select perm : Elapsed 0.040 ms (4.011 ms / 100) 4.003 -> 4.004 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.50% +0.50%] index_select perm_sorted : Elapsed 0.040 ms (4.004 ms / 100) B = [16, 5, 20, 4] (stride (1, 16, 320, 80)) A = [40, 5, 20, 4] (stride (4, 3200, 160, 1)) dim = 0 1.389 -> 1.392 ( +0.22%) [ +0.00% +0.14% +0.14% / +0.22% +0.72% +0.94%] index_select const : Elapsed 0.014 ms (1.389 ms / 100) 1.385 -> 1.387 ( +0.14%) [ +0.00% +0.07% +0.07% / +0.14% +0.72% +0.79%] index_select wrap : Elapsed 0.014 ms (1.385 ms / 100) 1.386 -> 1.386 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.79% +0.94%] index_select linear : Elapsed 0.014 ms (1.388 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.94% +0.87%] index_select reverse : Elapsed 0.014 ms (1.379 ms / 100) 1.386 -> 1.387 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +1.08% +1.08%] index_select skip64 : Elapsed 0.014 ms (1.387 ms / 100) 1.388 -> 1.388 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +1.01% +0.86%] index_select skip256 : Elapsed 0.014 ms (1.389 ms / 100) 1.384 -> 1.384 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.87% +0.79%] index_select spread : Elapsed 0.014 ms (1.384 ms / 100) 1.393 -> 1.394 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.86% +1.01%] index_select strided 3 : Elapsed 0.014 ms (1.394 ms / 100) 1.391 -> 1.391 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.79% +0.65%] index_select strided 5 : Elapsed 0.014 ms (1.391 ms / 100) 1.387 -> 1.389 ( +0.14%) [ +0.14% +0.00% +0.07% / +0.14% +1.01% +0.94%] index_select strided 7 : Elapsed 0.014 ms (1.389 ms / 100) 1.397 -> 1.394 ( -0.21%) [ +0.00% +0.00% +0.29% / -0.21% +0.79% +0.93%] index_select strided 8 : Elapsed 0.014 ms (1.397 ms / 100) 1.387 -> 1.387 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +1.08% +1.08%] index_select strided 16 : Elapsed 0.014 ms (1.387 ms / 100) 1.391 -> 1.393 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +1.37% +1.01%] index_select random : Elapsed 0.014 ms (1.391 ms / 100) 1.392 -> 1.393 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.93% +0.93%] index_select random_sorted : Elapsed 0.014 ms (1.394 ms / 100) 1.391 -> 1.392 ( +0.07%) [ +0.00% +0.14% +0.14% / +0.07% +1.08% +1.01%] index_select perm : Elapsed 0.014 ms (1.391 ms / 100) 1.396 -> 1.396 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.72% +0.72%] index_select perm_sorted : Elapsed 0.014 ms (1.396 ms / 100) B = [16, 5, 20, 4] (stride (100, 20, 1, 1600)) A = [40, 5, 20, 4] (stride (400, 80, 4, 1)) dim = 0 2.881 -> 2.881 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.49% +0.52%] index_select const : Elapsed 0.029 ms (2.882 ms / 100) 2.901 -> 2.900 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.52% +0.55%] index_select wrap : Elapsed 0.029 ms (2.902 ms / 100) 2.900 -> 2.900 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.55%] index_select linear : Elapsed 0.029 ms (2.900 ms / 100) 2.882 -> 2.884 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.56% +0.52%] index_select reverse : Elapsed 0.029 ms (2.884 ms / 100) 2.893 -> 2.896 ( +0.10%) [ +0.03% +0.07% +0.00% / +0.10% +0.48% +0.59%] index_select skip64 : Elapsed 0.029 ms (2.894 ms / 100) 2.881 -> 2.881 ( +0.00%) [ +0.00% +0.07% +0.03% / +0.00% +0.45% +0.42%] index_select skip256 : Elapsed 0.029 ms (2.881 ms / 100) 2.887 -> 2.887 ( +0.00%) [ +0.00% +0.07% +0.28% / +0.00% +0.42% +0.45%] index_select spread : Elapsed 0.029 ms (2.887 ms / 100) 2.898 -> 2.900 ( +0.07%) [ +0.14% +0.00% +0.10% / +0.07% +0.38% +0.24%] index_select strided 3 : Elapsed 0.029 ms (2.902 ms / 100) 2.886 -> 2.887 ( +0.03%) [ +0.07% +0.00% +0.00% / +0.03% +0.45% +0.24%] index_select strided 5 : Elapsed 0.029 ms (2.888 ms / 100) 2.904 -> 2.905 ( +0.03%) [ +0.07% +0.03% +0.00% / +0.03% +0.24% +0.24%] index_select strided 7 : Elapsed 0.029 ms (2.906 ms / 100) 2.890 -> 2.891 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.38% +0.42%] index_select strided 8 : Elapsed 0.029 ms (2.890 ms / 100) 2.880 -> 2.880 ( +0.00%) [ +0.00% +0.03% +0.07% / +0.00% +0.28% +0.28%] index_select strided 16 : Elapsed 0.029 ms (2.880 ms / 100) 2.909 -> 2.908 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.38% +0.21%] index_select random : Elapsed 0.029 ms (2.909 ms / 100) 2.884 -> 2.886 ( +0.07%) [ +0.03% +0.03% +0.00% / +0.07% +0.52% +0.59%] index_select random_sorted : Elapsed 0.029 ms (2.885 ms / 100) 2.904 -> 2.904 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.34% +0.34%] index_select perm : Elapsed 0.029 ms (2.905 ms / 100) 2.886 -> 2.885 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.28% +0.31%] index_select perm_sorted : Elapsed 0.029 ms (2.887 ms / 100) B = [16, 5, 20, 4] (stride (100, 1, 5, 1600)) A = [40, 5, 20, 4] (stride (1, 3200, 160, 40)) dim = 0 3.597 -> 3.599 ( +0.06%) [ +0.00% +0.00% +0.03% / +0.06% +0.53% +0.50%] index_select const : Elapsed 0.036 ms (3.597 ms / 100) 3.572 -> 3.576 ( +0.11%) [ +0.11% +0.00% +0.06% / +0.11% +0.73% +0.70%] index_select wrap : Elapsed 0.036 ms (3.576 ms / 100) 3.528 -> 3.525 ( -0.09%) [ +0.11% +0.11% +0.00% / -0.09% +0.65% +0.68%] index_select linear : Elapsed 0.035 ms (3.532 ms / 100) 3.559 -> 3.561 ( +0.06%) [ +0.06% +0.00% +0.11% / +0.06% +0.84% +0.93%] index_select reverse : Elapsed 0.036 ms (3.561 ms / 100) 3.557 -> 3.557 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.73% +0.76%] index_select skip64 : Elapsed 0.036 ms (3.558 ms / 100) 3.594 -> 3.594 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.70% +0.70%] index_select skip256 : Elapsed 0.036 ms (3.595 ms / 100) 3.529 -> 3.530 ( +0.03%) [ +0.09% +0.00% +0.00% / +0.03% +0.62% +0.60%] index_select spread : Elapsed 0.035 ms (3.532 ms / 100) 3.563 -> 3.564 ( +0.03%) [ +0.00% +0.08% +0.03% / +0.03% +0.53% +0.70%] index_select strided 3 : Elapsed 0.036 ms (3.563 ms / 100) 3.535 -> 3.536 ( +0.03%) [ +0.00% +0.06% +0.03% / +0.03% +0.76% +0.62%] index_select strided 5 : Elapsed 0.035 ms (3.535 ms / 100) 3.533 -> 3.534 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.65% +0.65%] index_select strided 7 : Elapsed 0.035 ms (3.534 ms / 100) 3.571 -> 3.575 ( +0.11%) [ +0.11% +0.08% +0.00% / +0.11% +0.87% +0.90%] index_select strided 8 : Elapsed 0.036 ms (3.575 ms / 100) 3.547 -> 3.550 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.85% +0.82%] index_select strided 16 : Elapsed 0.035 ms (3.550 ms / 100) 3.591 -> 3.594 ( +0.08%) [ +0.03% +0.14% +0.00% / +0.08% +0.78% +0.72%] index_select random : Elapsed 0.036 ms (3.592 ms / 100) 3.567 -> 3.565 ( -0.06%) [ +0.00% +0.03% +0.06% / -0.06% +0.70% +0.81%] index_select random_sorted : Elapsed 0.036 ms (3.567 ms / 100) 3.524 -> 3.533 ( +0.26%) [ +0.03% +0.00% +0.31% / +0.26% +0.94% +0.82%] index_select perm : Elapsed 0.035 ms (3.525 ms / 100) 3.557 -> 3.566 ( +0.25%) [ +0.06% +0.14% +0.00% / +0.25% +0.98% +0.93%] index_select perm_sorted : Elapsed 0.036 ms (3.559 ms / 100) out_shape = [40, 16, 20, 4] in_shape = [40, 5, 20, 4] idx_dim = 1 B = [40, 16, 20, 4] (stride (1280, 80, 4, 1)) A = [40, 5, 20, 4] (stride (80, 3200, 4, 1)) dim = 1 0.833 -> 0.817 ( -1.92%) [ +0.36% +0.12% +0.00% / +0.24% -1.68% -1.92%] index_add_ linear : Elapsed 0.008 ms (0.836 ms / 100) 0.814 -> 0.801 ( -1.60%) [ +0.86% +0.00% +0.00% / -0.12% -1.60% -1.47%] index_copy_ linear : Elapsed 0.008 ms (0.821 ms / 100) 0.835 -> 0.819 ( -1.92%) [ +0.00% +0.24% +0.24% / -0.24% -1.80% -1.92%] index_add_ reverse : Elapsed 0.008 ms (0.835 ms / 100) 0.815 -> 0.795 ( -2.45%) [ +0.12% +0.37% +0.00% / +0.49% -1.72% -2.45%] index_copy_ reverse : Elapsed 0.008 ms (0.816 ms / 100) 0.843 -> 0.823 ( -2.37%) [ +0.00% +0.12% +0.24% / +0.12% -2.37% -2.37%] index_add_ spread : Elapsed 0.008 ms (0.843 ms / 100) 0.824 -> 0.808 ( -1.94%) [ +0.00% +0.24% +0.00% / +0.24% -1.21% -1.94%] index_copy_ spread : Elapsed 0.008 ms (0.824 ms / 100) 0.844 -> 0.823 ( -2.49%) [ +0.24% +0.00% +0.47% / +0.59% -1.90% -2.49%] index_add_ strided 3 : Elapsed 0.008 ms (0.846 ms / 100) 0.825 -> 0.807 ( -2.18%) [ +0.00% +0.00% +0.36% / +0.24% -2.18% -2.06%] index_copy_ strided 3 : Elapsed 0.008 ms (0.825 ms / 100) 0.840 -> 0.815 ( -2.98%) [ +0.12% +0.12% +0.00% / -0.24% -2.74% -2.98%] index_add_ strided 5 : Elapsed 0.008 ms (0.841 ms / 100) 0.820 -> 0.799 ( -2.56%) [ +0.00% +0.49% +0.24% / -0.49% -2.32% -2.56%] index_copy_ strided 5 : Elapsed 0.008 ms (0.820 ms / 100) 0.835 -> 0.811 ( -2.87%) [ +0.36% +0.24% +0.00% / +0.24% -2.87% -2.51%] index_add_ strided 7 : Elapsed 0.008 ms (0.838 ms / 100) 0.814 -> 0.797 ( -2.09%) [ +0.37% +0.25% +0.00% / +0.00% -1.97% -2.09%] index_copy_ strided 7 : Elapsed 0.008 ms (0.817 ms / 100) 0.845 -> 0.817 ( -3.31%) [ +0.24% +0.83% +0.00% / +0.47% -3.31% -3.31%] index_add_ perm : Elapsed 0.008 ms (0.847 ms / 100) 0.821 -> 0.798 ( -2.80%) [ +0.00% +0.24% +0.24% / +0.24% -2.80% -2.44%] index_copy_ perm : Elapsed 0.008 ms (0.821 ms / 100) 0.840 -> 0.818 ( -2.62%) [ +0.12% +0.48% +0.00% / +0.12% -2.62% -2.50%] index_add_ perm_sorted : Elapsed 0.008 ms (0.841 ms / 100) 0.819 -> 0.799 ( -2.44%) [ +0.24% +0.00% +0.24% / +0.61% -2.44% -2.32%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.821 ms / 100) 1.567 -> 1.571 ( +0.26%) [ +0.32% +0.19% +0.00% / +0.26% +0.32% +0.57%] index_select const : Elapsed 0.016 ms (1.572 ms / 100) 1.588 -> 1.565 ( -1.45%) [ +0.19% +0.00% +0.19% / +0.13% -0.94% -1.45%] index_select wrap : Elapsed 0.016 ms (1.591 ms / 100) 1.603 -> 1.574 ( -1.81%) [ +0.25% +0.19% +0.00% / +0.12% -1.37% -1.81%] index_select linear : Elapsed 0.016 ms (1.607 ms / 100) 1.584 -> 1.572 ( -0.76%) [ +0.32% +0.00% +0.32% / +0.51% -0.51% -0.76%] index_select reverse : Elapsed 0.016 ms (1.589 ms / 100) 1.563 -> 1.575 ( +0.77%) [ +0.38% +0.58% +0.00% / +0.77% +0.77% +0.77%] index_select skip64 : Elapsed 0.016 ms (1.569 ms / 100) 1.568 -> 1.569 ( +0.06%) [ +0.06% +0.13% +0.00% / +0.06% +0.51% +0.64%] index_select skip256 : Elapsed 0.016 ms (1.569 ms / 100) 1.589 -> 1.576 ( -0.82%) [ +0.31% +0.00% +0.19% / -0.19% -0.82% -0.76%] index_select spread : Elapsed 0.016 ms (1.594 ms / 100) 1.599 -> 1.576 ( -1.44%) [ +0.25% +0.00% +0.06% / -0.06% -1.38% -1.44%] index_select strided 3 : Elapsed 0.016 ms (1.603 ms / 100) 1.589 -> 1.574 ( -0.94%) [ +0.25% +0.00% +0.00% / +0.06% -0.88% -0.94%] index_select random : Elapsed 0.016 ms (1.593 ms / 100) 1.591 -> 1.573 ( -1.13%) [ +0.00% +0.00% +0.19% / +0.19% -0.82% -1.13%] index_select random_sorted : Elapsed 0.016 ms (1.591 ms / 100) B = [40, 16, 20, 4] (stride (1280, 80, 4, 1)) A = [40, 5, 20, 4] (stride (1, 3200, 160, 40)) dim = 1 0.867 -> 0.865 ( -0.23%) [ +0.12% +0.00% +0.23% / +0.00% -0.23% -0.23%] index_add_ linear : Elapsed 0.009 ms (0.868 ms / 100) 0.846 -> 0.840 ( -0.71%) [ +0.47% +0.00% +0.83% / +0.59% -0.71% -0.12%] index_copy_ linear : Elapsed 0.008 ms (0.850 ms / 100) 0.866 -> 0.863 ( -0.35%) [ +0.12% +0.12% +0.00% / +0.12% -0.35% +0.12%] index_add_ reverse : Elapsed 0.009 ms (0.867 ms / 100) 0.844 -> 0.845 ( +0.12%) [ +0.00% +0.47% +0.36% / +0.36% +0.12% +0.24%] index_copy_ reverse : Elapsed 0.008 ms (0.844 ms / 100) 0.865 -> 0.866 ( +0.12%) [ +0.00% +0.35% +0.00% / +0.58% +0.12% +0.46%] index_add_ spread : Elapsed 0.009 ms (0.865 ms / 100) 0.846 -> 0.846 ( +0.00%) [ +0.00% +0.24% +0.00% / +0.12% +0.00% +0.12%] index_copy_ spread : Elapsed 0.008 ms (0.846 ms / 100) 0.864 -> 0.865 ( +0.12%) [ +0.00% +0.46% +0.12% / +0.12% +0.46% +0.81%] index_add_ strided 3 : Elapsed 0.009 ms (0.864 ms / 100) 0.842 -> 0.846 ( +0.48%) [ +0.24% +0.71% +0.00% / +0.48% +0.83% +0.48%] index_copy_ strided 3 : Elapsed 0.008 ms (0.844 ms / 100) 0.863 -> 0.858 ( -0.58%) [ +0.35% +0.00% +0.12% / +0.35% -0.23% -0.58%] index_add_ strided 5 : Elapsed 0.009 ms (0.866 ms / 100) 0.841 -> 0.839 ( -0.24%) [ +0.71% +0.36% +0.00% / +0.48% +0.12% -0.24%] index_copy_ strided 5 : Elapsed 0.008 ms (0.847 ms / 100) 0.861 -> 0.866 ( +0.58%) [ +0.35% +0.00% +0.12% / +0.58% +0.70% +1.16%] index_add_ strided 7 : Elapsed 0.009 ms (0.864 ms / 100) 0.842 -> 0.842 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.008 ms (0.843 ms / 100) 0.858 -> 0.860 ( +0.23%) [ +0.23% +0.70% +0.00% / +0.23% +0.58% +0.58%] index_add_ perm : Elapsed 0.009 ms (0.860 ms / 100) 0.838 -> 0.842 ( +0.48%) [ +0.24% +0.12% +0.00% / +0.48% +0.95% +0.60%] index_copy_ perm : Elapsed 0.008 ms (0.840 ms / 100) 0.860 -> 0.863 ( +0.35%) [ +0.12% +0.00% +0.12% / +0.35% +1.05% +0.81%] index_add_ perm_sorted : Elapsed 0.009 ms (0.861 ms / 100) 0.838 -> 0.840 ( +0.24%) [ +0.60% +0.24% +0.00% / +0.24% +0.95% +0.72%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.843 ms / 100) 1.703 -> 1.705 ( +0.12%) [ +0.29% +0.00% +0.06% / +0.12% +0.59% +0.12%] index_select const : Elapsed 0.017 ms (1.708 ms / 100) 1.703 -> 1.703 ( +0.00%) [ +0.12% +0.00% +0.06% / +0.00% +0.29% +0.29%] index_select wrap : Elapsed 0.017 ms (1.705 ms / 100) 1.709 -> 1.706 ( -0.18%) [ +0.23% +0.00% +0.00% / -0.18% -0.18% +0.06%] index_select linear : Elapsed 0.017 ms (1.713 ms / 100) 1.704 -> 1.706 ( +0.12%) [ +0.00% +0.23% +0.06% / +0.12% +0.35% +0.41%] index_select reverse : Elapsed 0.017 ms (1.704 ms / 100) 1.705 -> 1.708 ( +0.18%) [ +0.18% +0.23% +0.00% / +0.18% +0.23% +0.18%] index_select skip64 : Elapsed 0.017 ms (1.708 ms / 100) 1.705 -> 1.704 ( -0.06%) [ +0.00% +0.00% +0.35% / -0.06% +0.12% +0.53%] index_select skip256 : Elapsed 0.017 ms (1.705 ms / 100) 1.704 -> 1.703 ( -0.06%) [ +0.06% +0.00% +0.12% / -0.06% +0.35% +0.53%] index_select spread : Elapsed 0.017 ms (1.705 ms / 100) 1.701 -> 1.703 ( +0.12%) [ +0.00% +0.29% +0.18% / +0.12% +0.88% +0.94%] index_select strided 3 : Elapsed 0.017 ms (1.701 ms / 100) 1.698 -> 1.698 ( +0.00%) [ +0.12% +0.06% +0.00% / +0.00% +0.47% +0.82%] index_select random : Elapsed 0.017 ms (1.700 ms / 100) 1.705 -> 1.709 ( +0.23%) [ +0.12% +0.29% +0.00% / +0.23% +0.35% +0.29%] index_select random_sorted : Elapsed 0.017 ms (1.707 ms / 100) B = [40, 16, 20, 4] (stride (1280, 20, 1, 320)) A = [40, 5, 20, 4] (stride (20, 3200, 1, 800)) dim = 1 2.392 -> 2.392 ( +0.00%) [ +0.00% +0.33% +0.00% / +0.00% +0.54% +0.63%] index_add_ linear : Elapsed 0.024 ms (2.392 ms / 100) 2.327 -> 2.326 ( -0.04%) [ +0.26% +0.17% +0.00% / -0.04% +0.95% +0.86%] index_copy_ linear : Elapsed 0.023 ms (2.333 ms / 100) 2.405 -> 2.404 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.46% +0.33%] index_add_ reverse : Elapsed 0.024 ms (2.406 ms / 100) 2.338 -> 2.335 ( -0.13%) [ +0.00% +0.13% +0.00% / -0.13% +0.68% +0.64%] index_copy_ reverse : Elapsed 0.023 ms (2.338 ms / 100) 2.400 -> 2.399 ( -0.04%) [ +0.21% +0.00% +0.00% / -0.04% +0.54% +0.54%] index_add_ spread : Elapsed 0.024 ms (2.405 ms / 100) 2.342 -> 2.343 ( +0.04%) [ +0.13% +0.09% +0.00% / +0.04% +0.81% +0.73%] index_copy_ spread : Elapsed 0.023 ms (2.345 ms / 100) 2.392 -> 2.395 ( +0.13%) [ +0.00% +0.13% +0.00% / +0.13% +0.54% +0.71%] index_add_ strided 3 : Elapsed 0.024 ms (2.392 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.26% +0.22% +0.00% / +0.04% +1.12% +1.25%] index_copy_ strided 3 : Elapsed 0.023 ms (2.330 ms / 100) 2.404 -> 2.405 ( +0.04%) [ +0.25% +0.00% +0.04% / +0.04% +0.50% +0.37%] index_add_ strided 5 : Elapsed 0.024 ms (2.410 ms / 100) 2.339 -> 2.341 ( +0.09%) [ +0.00% +0.34% +0.17% / +0.09% +0.77% +0.94%] index_copy_ strided 5 : Elapsed 0.023 ms (2.339 ms / 100) 2.393 -> 2.397 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.71% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.397 ms / 100) 2.335 -> 2.338 ( +0.13%) [ +0.04% +0.17% +0.00% / +0.13% +0.99% +1.03%] index_copy_ strided 7 : Elapsed 0.023 ms (2.336 ms / 100) 2.399 -> 2.398 ( -0.04%) [ +0.13% +0.21% +0.00% / -0.04% +0.83% +1.00%] index_add_ perm : Elapsed 0.024 ms (2.402 ms / 100) 2.333 -> 2.336 ( +0.13%) [ +0.13% +0.00% +0.17% / +0.13% +1.24% +1.24%] index_copy_ perm : Elapsed 0.023 ms (2.336 ms / 100) 2.406 -> 2.407 ( +0.04%) [ +0.00% +0.17% +0.12% / +0.04% +0.75% +0.75%] index_add_ perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) 2.344 -> 2.352 ( +0.34%) [ +0.17% +0.43% +0.00% / +0.34% +0.98% +0.98%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.348 ms / 100) 4.892 -> 4.891 ( -0.02%) [ +0.00% +0.08% +0.06% / -0.02% +0.59% +0.72%] index_select const : Elapsed 0.049 ms (4.892 ms / 100) 4.961 -> 4.963 ( +0.04%) [ +0.00% +0.00% +0.02% / +0.04% +0.54% +0.62%] index_select wrap : Elapsed 0.050 ms (4.961 ms / 100) 5.003 -> 5.008 ( +0.10%) [ +0.00% +0.00% +0.08% / +0.10% +0.82% +0.82%] index_select linear : Elapsed 0.050 ms (5.003 ms / 100) 4.979 -> 4.988 ( +0.18%) [ +0.18% +0.18% +0.00% / +0.18% +0.94% +0.98%] index_select reverse : Elapsed 0.050 ms (4.988 ms / 100) 4.880 -> 4.882 ( +0.04%) [ +0.00% +0.04% +0.06% / +0.04% +0.78% +0.80%] index_select skip64 : Elapsed 0.049 ms (4.880 ms / 100) 4.892 -> 4.894 ( +0.04%) [ +0.04% +0.00% +0.02% / +0.04% +0.65% +0.70%] index_select skip256 : Elapsed 0.049 ms (4.894 ms / 100) 4.947 -> 4.949 ( +0.04%) [ +0.00% +0.10% +0.00% / +0.04% +0.89% +0.97%] index_select spread : Elapsed 0.049 ms (4.947 ms / 100) 4.986 -> 4.988 ( +0.04%) [ +0.08% +0.00% +0.12% / +0.04% +0.78% +0.78%] index_select strided 3 : Elapsed 0.050 ms (4.990 ms / 100) 4.956 -> 4.960 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.99% +0.93%] index_select random : Elapsed 0.050 ms (4.962 ms / 100) 4.952 -> 4.957 ( +0.10%) [ +0.10% +0.14% +0.00% / +0.10% +0.99% +0.99%] index_select random_sorted : Elapsed 0.050 ms (4.957 ms / 100) B = [40, 16, 20, 4] (stride (80, 3200, 1, 20)) dim = 1 fill_cnt = 5 1.081 -> 1.082 ( +0.09%) [ +0.28% +0.19% +0.00% / +0.09% +0.56% +0.28%] index_fill_ const : Elapsed 0.011 ms (1.084 ms / 100) 1.097 -> 1.098 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.73% +0.27%] index_fill_ linear : Elapsed 0.011 ms (1.098 ms / 100) 1.097 -> 1.099 ( +0.18%) [ +0.18% +0.00% +0.00% / +0.18% +0.64% +0.36%] index_fill_ reverse : Elapsed 0.011 ms (1.099 ms / 100) 1.083 -> 1.082 ( -0.09%) [ +0.00% +0.18% +0.00% / -0.09% +0.37% +0.37%] index_fill_ skip64 : Elapsed 0.011 ms (1.083 ms / 100) 1.081 -> 1.081 ( +0.00%) [ +0.00% +0.28% +0.28% / +0.00% +0.56% +0.65%] index_fill_ skip256 : Elapsed 0.011 ms (1.081 ms / 100) 1.103 -> 1.101 ( -0.18%) [ +0.00% +0.27% +0.09% / -0.18% +0.09% -0.09%] index_fill_ spread : Elapsed 0.011 ms (1.103 ms / 100) 1.102 -> 1.101 ( -0.09%) [ +0.00% +0.45% +0.00% / -0.09% +0.36% +0.64%] index_fill_ strided 3 : Elapsed 0.011 ms (1.102 ms / 100) 1.102 -> 1.101 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.00% +0.18%] index_fill_ strided 5 : Elapsed 0.011 ms (1.102 ms / 100) 1.096 -> 1.094 ( -0.18%) [ +0.00% +0.27% +0.00% / -0.18% +1.37% +1.46%] index_fill_ strided 7 : Elapsed 0.011 ms (1.096 ms / 100) 1.083 -> 1.085 ( +0.18%) [ +0.09% +0.28% +0.00% / +0.18% +0.46% +0.74%] index_fill_ strided 8 : Elapsed 0.011 ms (1.084 ms / 100) 1.092 -> 1.095 ( +0.27%) [ +0.09% +0.18% +0.00% / +0.27% +0.64% +0.82%] index_fill_ random : Elapsed 0.011 ms (1.093 ms / 100) 1.091 -> 1.094 ( +0.27%) [ +0.27% +0.27% +0.00% / +0.27% +0.82% +0.82%] index_fill_ random_sorted : Elapsed 0.011 ms (1.094 ms / 100) 1.091 -> 1.096 ( +0.46%) [ +0.55% +0.27% +0.00% / +0.46% +1.74% +1.56%] index_fill_ perm : Elapsed 0.011 ms (1.097 ms / 100) 1.092 -> 1.092 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +1.28% +1.47%] index_fill_ perm_sorted : Elapsed 0.011 ms (1.092 ms / 100) B = [40, 16, 20, 4] (stride (1, 3200, 160, 40)) A = [40, 5, 20, 4] (stride (400, 80, 4, 1)) dim = 1 0.818 -> 0.819 ( +0.12%) [ +0.49% +0.00% +0.12% / +0.12% +8.19% +7.82%] index_add_ linear : Elapsed 0.008 ms (0.822 ms / 100) 0.841 -> 0.843 ( +0.24%) [ +0.24% +0.12% +0.00% / +0.24% +5.59% +4.16%] index_copy_ linear : Elapsed 0.008 ms (0.843 ms / 100) 0.818 -> 0.823 ( +0.61%) [ +0.37% +0.12% +0.00% / +0.61% +6.97% +7.95%] index_add_ reverse : Elapsed 0.008 ms (0.821 ms / 100) 0.837 -> 0.843 ( +0.72%) [ +0.24% +0.00% +0.24% / +0.72% +4.90% +5.26%] index_copy_ reverse : Elapsed 0.008 ms (0.839 ms / 100) 0.823 -> 0.829 ( +0.73%) [ +0.49% +0.00% +0.00% / +0.73% +6.68% +6.44%] index_add_ spread : Elapsed 0.008 ms (0.827 ms / 100) 0.843 -> 0.845 ( +0.24%) [ +0.36% +0.59% +0.00% / +0.24% +4.03% +4.03%] index_copy_ spread : Elapsed 0.008 ms (0.846 ms / 100) 0.824 -> 0.820 ( -0.49%) [ +0.24% +0.00% +0.24% / -0.49% +6.80% +6.31%] index_add_ strided 3 : Elapsed 0.008 ms (0.826 ms / 100) 0.846 -> 0.845 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.12% +4.26% +3.66%] index_copy_ strided 3 : Elapsed 0.008 ms (0.846 ms / 100) 0.841 -> 0.844 ( +0.36%) [ +0.00% +0.12% +0.59% / +0.36% +1.55% +1.78%] index_add_ strided 5 : Elapsed 0.008 ms (0.841 ms / 100) 0.848 -> 0.851 ( +0.35%) [ +0.12% +0.00% +0.24% / +0.35% +2.00% +1.42%] index_copy_ strided 5 : Elapsed 0.008 ms (0.849 ms / 100) 0.817 -> 0.817 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +6.00% +6.12%] index_add_ strided 7 : Elapsed 0.008 ms (0.818 ms / 100) 0.838 -> 0.837 ( -0.12%) [ +0.24% +0.00% +0.12% / -0.12% +4.18% +4.42%] index_copy_ strided 7 : Elapsed 0.008 ms (0.840 ms / 100) 0.821 -> 0.820 ( -0.12%) [ +0.37% +0.00% +0.24% / -0.12% +7.55% +8.04%] index_add_ perm : Elapsed 0.008 ms (0.824 ms / 100) 0.843 -> 0.841 ( -0.24%) [ +0.00% +0.00% +0.00% / -0.24% +4.51% +4.51%] index_copy_ perm : Elapsed 0.008 ms (0.843 ms / 100) 0.822 -> 0.820 ( -0.24%) [ +0.00% +0.00% +0.00% / -0.24% +7.79% +7.54%] index_add_ perm_sorted : Elapsed 0.008 ms (0.822 ms / 100) 0.842 -> 0.842 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +5.11% +4.75%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.842 ms / 100) 1.711 -> 1.715 ( +0.23%) [ +0.00% +0.18% +0.12% / +0.29% +0.23% +0.23%] index_select const : Elapsed 0.017 ms (1.711 ms / 100) 1.716 -> 1.714 ( -0.12%) [ +0.12% +0.23% +0.00% / -0.12% +0.52% +0.52%] index_select wrap : Elapsed 0.017 ms (1.718 ms / 100) 1.717 -> 1.718 ( +0.06%) [ +0.12% +0.00% +0.06% / +0.06% +1.57% +1.63%] index_select linear : Elapsed 0.017 ms (1.719 ms / 100) 1.717 -> 1.727 ( +0.58%) [ +0.06% +0.00% +0.12% / +0.58% +1.34% +1.34%] index_select reverse : Elapsed 0.017 ms (1.718 ms / 100) 1.713 -> 1.715 ( +0.12%) [ +0.35% +0.00% +0.06% / +0.23% +0.12% +0.35%] index_select skip64 : Elapsed 0.017 ms (1.719 ms / 100) 1.711 -> 1.712 ( +0.06%) [ +0.47% +0.23% +0.00% / +0.06% +0.76% +0.23%] index_select skip256 : Elapsed 0.017 ms (1.719 ms / 100) 1.714 -> 1.714 ( +0.00%) [ +0.06% +0.00% +0.12% / +0.00% +1.52% +1.69%] index_select spread : Elapsed 0.017 ms (1.715 ms / 100) 1.716 -> 1.717 ( +0.06%) [ +0.00% +0.52% +0.17% / +0.06% +0.87% +0.76%] index_select strided 3 : Elapsed 0.017 ms (1.716 ms / 100) 1.713 -> 1.714 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +1.46% +1.11%] index_select random : Elapsed 0.017 ms (1.714 ms / 100) 1.715 -> 1.714 ( -0.06%) [ +0.12% +0.00% +0.12% / -0.06% +1.46% +0.93%] index_select random_sorted : Elapsed 0.017 ms (1.717 ms / 100) B = [40, 16, 20, 4] (stride (64, 4, 2560, 1)) A = [40, 5, 20, 4] (stride (20, 4, 800, 1)) dim = 1 2.426 -> 2.427 ( +0.04%) [ +0.00% +0.08% +0.12% / +0.04% +1.28% +1.15%] index_add_ linear : Elapsed 0.024 ms (2.426 ms / 100) 2.363 -> 2.368 ( +0.21%) [ +0.00% +0.25% +0.30% / +0.21% +1.23% +1.31%] index_copy_ linear : Elapsed 0.024 ms (2.363 ms / 100) 2.426 -> 2.425 ( -0.04%) [ +0.37% +0.00% +0.25% / -0.04% +1.20% +0.99%] index_add_ reverse : Elapsed 0.024 ms (2.435 ms / 100) 2.361 -> 2.362 ( +0.04%) [ +0.30% +0.00% +0.21% / +0.04% +1.36% +1.10%] index_copy_ reverse : Elapsed 0.024 ms (2.368 ms / 100) 2.443 -> 2.447 ( +0.16%) [ +0.25% +0.65% +0.00% / +0.16% +1.47% +1.68%] index_add_ spread : Elapsed 0.024 ms (2.449 ms / 100) 2.393 -> 2.403 ( +0.42%) [ +0.00% +0.42% +0.00% / +0.42% +1.21% +1.42%] index_copy_ spread : Elapsed 0.024 ms (2.393 ms / 100) 2.450 -> 2.449 ( -0.04%) [ +0.53% +0.00% +0.00% / -0.04% +1.10% +1.02%] index_add_ strided 3 : Elapsed 0.025 ms (2.463 ms / 100) 2.398 -> 2.394 ( -0.17%) [ +0.21% +0.04% +0.00% / -0.17% +0.92% +1.21%] index_copy_ strided 3 : Elapsed 0.024 ms (2.403 ms / 100) 2.427 -> 2.434 ( +0.29%) [ +0.00% +0.16% +0.37% / +0.29% +1.40% +1.44%] index_add_ strided 5 : Elapsed 0.024 ms (2.427 ms / 100) 2.366 -> 2.374 ( +0.34%) [ +0.00% +0.46% +0.00% / +0.34% +1.23% +1.39%] index_copy_ strided 5 : Elapsed 0.024 ms (2.366 ms / 100) 2.435 -> 2.440 ( +0.21%) [ +0.33% +0.00% +0.16% / +0.21% +1.36% +1.40%] index_add_ strided 7 : Elapsed 0.024 ms (2.443 ms / 100) 2.390 -> 2.396 ( +0.25%) [ +0.00% +0.17% +0.13% / +0.25% +1.21% +1.30%] index_copy_ strided 7 : Elapsed 0.024 ms (2.390 ms / 100) 2.429 -> 2.436 ( +0.29%) [ +0.04% +0.16% +0.00% / +0.29% +1.48% +1.19%] index_add_ perm : Elapsed 0.024 ms (2.430 ms / 100) 2.379 -> 2.381 ( +0.08%) [ +0.00% +0.13% +0.13% / +0.08% +1.13% +1.13%] index_copy_ perm : Elapsed 0.024 ms (2.379 ms / 100) 2.425 -> 2.431 ( +0.25%) [ +0.12% +0.08% +0.00% / +0.25% +1.65% +1.48%] index_add_ perm_sorted : Elapsed 0.024 ms (2.428 ms / 100) 2.374 -> 2.375 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +1.52% +1.39%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.376 ms / 100) 5.046 -> 5.047 ( +0.02%) [ +0.40% +0.00% +0.14% / +0.02% +0.95% +0.99%] index_select const : Elapsed 0.051 ms (5.066 ms / 100) 5.093 -> 5.090 ( -0.06%) [ +0.04% +0.08% +0.00% / -0.06% +0.79% +0.94%] index_select wrap : Elapsed 0.051 ms (5.095 ms / 100) 5.115 -> 5.128 ( +0.25%) [ +0.00% +0.02% +0.04% / +0.25% +0.98% +0.88%] index_select linear : Elapsed 0.051 ms (5.115 ms / 100) 5.090 -> 5.089 ( -0.02%) [ +0.00% +0.04% +0.10% / -0.02% +0.59% +1.12%] index_select reverse : Elapsed 0.051 ms (5.090 ms / 100) 5.034 -> 5.038 ( +0.08%) [ +0.10% +0.08% +0.00% / +0.08% +0.56% +0.62%] index_select skip64 : Elapsed 0.050 ms (5.039 ms / 100) 5.045 -> 5.043 ( -0.04%) [ +0.18% +0.00% +0.22% / -0.04% +0.69% +1.03%] index_select skip256 : Elapsed 0.051 ms (5.054 ms / 100) 5.094 -> 5.092 ( -0.04%) [ +0.43% +0.26% +0.00% / -0.04% +0.63% +1.18%] index_select spread : Elapsed 0.051 ms (5.116 ms / 100) 5.110 -> 5.109 ( -0.02%) [ +0.06% +0.14% +0.00% / -0.02% +0.92% +0.74%] index_select strided 3 : Elapsed 0.051 ms (5.113 ms / 100) 5.108 -> 5.101 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +1.04% +1.00%] index_select random : Elapsed 0.051 ms (5.108 ms / 100) 5.079 -> 5.085 ( +0.12%) [ +0.28% +0.16% +0.00% / +0.12% +0.93% +0.95%] index_select random_sorted : Elapsed 0.051 ms (5.093 ms / 100) B = [40, 16, 20, 4] (stride (20, 800, 1, 12800)) A = [40, 5, 20, 4] (stride (400, 4, 20, 1)) dim = 1 2.364 -> 2.362 ( -0.08%) [ +0.17% +0.00% +0.13% / +0.08% -0.08% +0.00%] index_add_ linear : Elapsed 0.024 ms (2.368 ms / 100) 2.301 -> 2.303 ( +0.09%) [ +0.17% +0.17% +0.00% / +0.09% +0.09% +0.13%] index_copy_ linear : Elapsed 0.023 ms (2.305 ms / 100) 2.356 -> 2.358 ( +0.08%) [ +0.04% +0.38% +0.00% / +0.21% +0.08% +0.25%] index_add_ reverse : Elapsed 0.024 ms (2.357 ms / 100) 2.295 -> 2.295 ( +0.00%) [ +0.09% +0.44% +0.00% / +0.44% +0.00% +0.04%] index_copy_ reverse : Elapsed 0.023 ms (2.297 ms / 100) 2.362 -> 2.362 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.25% +0.21%] index_add_ spread : Elapsed 0.024 ms (2.362 ms / 100) 2.295 -> 2.297 ( +0.09%) [ +0.22% +0.31% +0.00% / +0.09% +0.31% +0.26%] index_copy_ spread : Elapsed 0.023 ms (2.300 ms / 100) 2.358 -> 2.362 ( +0.17%) [ +0.42% +0.00% +0.25% / +0.25% +0.34% +0.17%] index_add_ strided 3 : Elapsed 0.024 ms (2.368 ms / 100) 2.302 -> 2.300 ( -0.09%) [ +0.00% +0.09% +0.09% / +0.13% +0.04% -0.09%] index_copy_ strided 3 : Elapsed 0.023 ms (2.302 ms / 100) 2.361 -> 2.360 ( -0.04%) [ +0.34% +0.13% +0.00% / -0.04% +0.17% +0.25%] index_add_ strided 5 : Elapsed 0.024 ms (2.369 ms / 100) 2.300 -> 2.298 ( -0.09%) [ +0.48% +0.17% +0.00% / -0.09% +0.13% -0.04%] index_copy_ strided 5 : Elapsed 0.023 ms (2.311 ms / 100) 2.367 -> 2.372 ( +0.21%) [ +0.51% +0.46% +0.00% / +0.30% +0.25% +0.21%] index_add_ strided 7 : Elapsed 0.024 ms (2.379 ms / 100) 2.307 -> 2.302 ( -0.22%) [ +0.22% +0.39% +0.00% / +0.26% -0.22% -0.17%] index_copy_ strided 7 : Elapsed 0.023 ms (2.312 ms / 100) 2.367 -> 2.362 ( -0.21%) [ +0.13% +0.00% +0.13% / -0.13% -0.08% -0.21%] index_add_ perm : Elapsed 0.024 ms (2.370 ms / 100) 2.307 -> 2.304 ( -0.13%) [ +0.04% +0.00% +0.17% / +0.04% -0.04% -0.13%] index_copy_ perm : Elapsed 0.023 ms (2.308 ms / 100) 2.366 -> 2.358 ( -0.34%) [ +0.17% +0.00% +0.00% / +0.04% -0.34% -0.30%] index_add_ perm_sorted : Elapsed 0.024 ms (2.370 ms / 100) 2.302 -> 2.300 ( -0.09%) [ +0.00% +0.00% +0.04% / +0.17% -0.04% -0.09%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.302 ms / 100) 4.929 -> 4.951 ( +0.45%) [ +0.06% +0.47% +0.00% / +0.45% +0.59% +0.59%] index_select const : Elapsed 0.049 ms (4.932 ms / 100) 4.967 -> 4.972 ( +0.10%) [ +0.10% +0.08% +0.00% / +0.10% +0.46% +0.24%] index_select wrap : Elapsed 0.050 ms (4.972 ms / 100) 4.986 -> 4.992 ( +0.12%) [ +0.02% +0.04% +0.00% / +0.12% +0.18% +0.12%] index_select linear : Elapsed 0.050 ms (4.987 ms / 100) 4.977 -> 4.988 ( +0.22%) [ +0.18% +0.44% +0.00% / +0.22% +0.66% +0.58%] index_select reverse : Elapsed 0.050 ms (4.986 ms / 100) 4.940 -> 4.940 ( +0.00%) [ +0.16% +0.12% +0.00% / +0.00% +0.53% +0.55%] index_select skip64 : Elapsed 0.049 ms (4.948 ms / 100) 4.939 -> 4.941 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.53% +0.47%] index_select skip256 : Elapsed 0.049 ms (4.939 ms / 100) 4.982 -> 4.984 ( +0.04%) [ +0.14% +0.16% +0.00% / +0.04% +0.50% +0.30%] index_select spread : Elapsed 0.050 ms (4.989 ms / 100) 4.950 -> 4.955 ( +0.10%) [ +0.20% +0.06% +0.00% / +0.10% +0.28% +0.42%] index_select strided 3 : Elapsed 0.050 ms (4.960 ms / 100) 4.983 -> 4.981 ( -0.04%) [ +0.08% +0.00% +0.46% / -0.04% +0.70% +0.28%] index_select random : Elapsed 0.050 ms (4.987 ms / 100) 4.980 -> 4.980 ( +0.00%) [ +0.04% +0.00% +0.12% / +0.00% +0.56% +0.40%] index_select random_sorted : Elapsed 0.050 ms (4.982 ms / 100) B = [40, 16, 20, 4] (stride (16, 1, 640, 12800)) A = [40, 5, 20, 4] (stride (400, 1, 20, 5)) dim = 1 2.249 -> 2.245 ( -0.18%) [ +0.00% +0.00% +0.00% / -0.18% +0.71% +0.98%] index_add_ linear : Elapsed 0.022 ms (2.249 ms / 100) 2.217 -> 2.224 ( +0.32%) [ +0.18% +0.00% +0.00% / +0.32% +0.90% +1.22%] index_copy_ linear : Elapsed 0.022 ms (2.221 ms / 100) 2.247 -> 2.249 ( +0.09%) [ +0.13% +0.04% +0.00% / +0.09% +1.20% +0.98%] index_add_ reverse : Elapsed 0.022 ms (2.250 ms / 100) 2.209 -> 2.218 ( +0.41%) [ +0.54% +0.45% +0.00% / +0.41% +0.95% +1.09%] index_copy_ reverse : Elapsed 0.022 ms (2.221 ms / 100) 2.270 -> 2.278 ( +0.35%) [ +0.31% +0.13% +0.00% / +0.35% +1.06% +0.84%] index_add_ spread : Elapsed 0.023 ms (2.277 ms / 100) 2.276 -> 2.285 ( +0.40%) [ +0.48% +0.18% +0.00% / +0.40% +0.75% +0.88%] index_copy_ spread : Elapsed 0.023 ms (2.287 ms / 100) 2.270 -> 2.271 ( +0.04%) [ +0.09% +0.22% +0.00% / +0.04% +1.19% +1.06%] index_add_ strided 3 : Elapsed 0.023 ms (2.272 ms / 100) 2.276 -> 2.274 ( -0.09%) [ +0.18% +0.31% +0.00% / -0.09% +0.83% +1.14%] index_copy_ strided 3 : Elapsed 0.023 ms (2.280 ms / 100) 2.272 -> 2.278 ( +0.26%) [ +0.09% +0.40% +0.00% / +0.26% +0.84% +1.14%] index_add_ strided 5 : Elapsed 0.023 ms (2.274 ms / 100) 2.276 -> 2.282 ( +0.26%) [ +0.00% +0.31% +0.09% / +0.26% +0.88% +0.92%] index_copy_ strided 5 : Elapsed 0.023 ms (2.276 ms / 100) 2.272 -> 2.272 ( +0.00%) [ +0.35% +0.09% +0.00% / +0.00% +1.19% +1.06%] index_add_ strided 7 : Elapsed 0.023 ms (2.280 ms / 100) 2.276 -> 2.283 ( +0.31%) [ +0.00% +0.22% +0.13% / +0.31% +1.27% +0.97%] index_copy_ strided 7 : Elapsed 0.023 ms (2.276 ms / 100) 2.275 -> 2.279 ( +0.18%) [ +0.00% +0.04% +0.04% / +0.18% +0.84% +1.05%] index_add_ perm : Elapsed 0.023 ms (2.275 ms / 100) 2.275 -> 2.279 ( +0.18%) [ +0.13% +0.00% +0.04% / +0.18% +0.88% +1.14%] index_copy_ perm : Elapsed 0.023 ms (2.278 ms / 100) 2.269 -> 2.274 ( +0.22%) [ +0.31% +0.35% +0.00% / +0.22% +1.15% +1.06%] index_add_ perm_sorted : Elapsed 0.023 ms (2.276 ms / 100) 2.275 -> 2.274 ( -0.04%) [ +0.13% +0.04% +0.00% / -0.04% +0.88% +1.01%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.278 ms / 100) 4.651 -> 4.656 ( +0.11%) [ +0.00% +0.13% +0.09% / +0.11% +0.95% +0.90%] index_select const : Elapsed 0.047 ms (4.651 ms / 100) 4.654 -> 4.664 ( +0.21%) [ +0.11% +0.00% +0.13% / +0.21% +1.05% +0.97%] index_select wrap : Elapsed 0.047 ms (4.659 ms / 100) 4.668 -> 4.675 ( +0.15%) [ +0.00% +0.02% +0.04% / +0.15% +0.71% +0.58%] index_select linear : Elapsed 0.047 ms (4.668 ms / 100) 4.664 -> 4.667 ( +0.06%) [ +0.09% +0.04% +0.00% / +0.06% +0.66% +0.64%] index_select reverse : Elapsed 0.047 ms (4.668 ms / 100) 4.653 -> 4.657 ( +0.09%) [ +0.09% +0.04% +0.00% / +0.09% +1.14% +0.86%] index_select skip64 : Elapsed 0.047 ms (4.657 ms / 100) 4.650 -> 4.650 ( +0.00%) [ +0.00% +0.26% +0.15% / +0.00% +1.10% +1.05%] index_select skip256 : Elapsed 0.047 ms (4.650 ms / 100) 4.661 -> 4.656 ( -0.11%) [ +0.02% +0.00% +0.02% / -0.11% +0.75% +0.86%] index_select spread : Elapsed 0.047 ms (4.662 ms / 100) 4.662 -> 4.668 ( +0.13%) [ +0.06% +0.00% +0.13% / +0.13% +0.94% +0.86%] index_select strided 3 : Elapsed 0.047 ms (4.665 ms / 100) 4.656 -> 4.656 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +1.03% +1.07%] index_select random : Elapsed 0.047 ms (4.656 ms / 100) 4.646 -> 4.653 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +1.23% +1.05%] index_select random_sorted : Elapsed 0.047 ms (4.653 ms / 100) out_shape = [40, 5, 16, 4] in_shape = [40, 5, 20, 4] idx_dim = 2 B = [40, 5, 16, 4] (stride (320, 64, 4, 1)) A = [40, 5, 20, 4] (stride (80, 3200, 1, 20)) dim = 2 3.791 -> 3.791 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.82% +0.76%] index_select const : Elapsed 0.038 ms (3.794 ms / 100) 3.816 -> 3.816 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.94% +0.92%] index_select wrap : Elapsed 0.038 ms (3.817 ms / 100) 3.812 -> 3.823 ( +0.29%) [ +0.24% +0.31% +0.00% / +0.29% +1.13% +1.05%] index_select linear : Elapsed 0.038 ms (3.821 ms / 100) 3.790 -> 3.787 ( -0.08%) [ +0.03% +0.08% +0.00% / -0.08% +0.71% +0.69%] index_select reverse : Elapsed 0.038 ms (3.791 ms / 100) 3.793 -> 3.794 ( +0.03%) [ +0.00% +0.05% +0.05% / +0.03% +0.74% +0.55%] index_select skip64 : Elapsed 0.038 ms (3.793 ms / 100) 3.794 -> 3.794 ( +0.00%) [ +0.11% +0.00% +0.05% / +0.00% +0.74% +0.58%] index_select skip256 : Elapsed 0.038 ms (3.798 ms / 100) 3.815 -> 3.819 ( +0.10%) [ +0.18% +0.10% +0.00% / +0.10% +0.60% +0.68%] index_select spread : Elapsed 0.038 ms (3.822 ms / 100) 3.808 -> 3.815 ( +0.18%) [ +0.24% +0.11% +0.00% / +0.18% +0.84% +0.79%] index_select strided 3 : Elapsed 0.038 ms (3.817 ms / 100) 3.796 -> 3.802 ( +0.16%) [ +0.05% +0.00% +0.18% / +0.16% +0.84% +0.74%] index_select strided 5 : Elapsed 0.038 ms (3.798 ms / 100) 3.814 -> 3.820 ( +0.16%) [ +0.16% +0.10% +0.00% / +0.16% +0.71% +0.87%] index_select strided 7 : Elapsed 0.038 ms (3.820 ms / 100) 3.807 -> 3.808 ( +0.03%) [ +0.00% +0.05% +0.16% / +0.03% +0.63% +0.68%] index_select strided 8 : Elapsed 0.038 ms (3.807 ms / 100) 3.806 -> 3.813 ( +0.18%) [ +0.08% +0.18% +0.00% / +0.18% +0.74% +0.84%] index_select strided 16 : Elapsed 0.038 ms (3.809 ms / 100) 3.798 -> 3.800 ( +0.05%) [ +0.00% +0.05% +0.08% / +0.05% +0.61% +0.55%] index_select random : Elapsed 0.038 ms (3.798 ms / 100) 3.818 -> 3.819 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.03% +0.52% +0.55%] index_select random_sorted : Elapsed 0.038 ms (3.819 ms / 100) 3.827 -> 3.825 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.29% +0.34%] index_select perm : Elapsed 0.038 ms (3.827 ms / 100) 3.820 -> 3.827 ( +0.18%) [ +0.00% +0.03% +0.08% / +0.18% +0.63% +0.68%] index_select perm_sorted : Elapsed 0.038 ms (3.820 ms / 100) B = [40, 5, 16, 4] (stride (64, 2560, 4, 1)) A = [40, 5, 20, 4] (stride (400, 4, 20, 1)) dim = 2 3.843 -> 3.844 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.57% +0.60%] index_select const : Elapsed 0.038 ms (3.845 ms / 100) 3.816 -> 3.815 ( -0.03%) [ +0.05% +0.00% +0.05% / -0.03% +0.42% +0.47%] index_select wrap : Elapsed 0.038 ms (3.818 ms / 100) 3.820 -> 3.820 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.58% +0.55%] index_select linear : Elapsed 0.038 ms (3.822 ms / 100) 3.813 -> 3.815 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.60% +0.55%] index_select reverse : Elapsed 0.038 ms (3.815 ms / 100) 3.849 -> 3.851 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.62% +0.62%] index_select skip64 : Elapsed 0.039 ms (3.852 ms / 100) 3.852 -> 3.854 ( +0.05%) [ +0.00% +0.08% +0.13% / +0.05% +0.57% +0.52%] index_select skip256 : Elapsed 0.039 ms (3.852 ms / 100) 3.817 -> 3.819 ( +0.05%) [ +0.08% +0.03% +0.00% / +0.05% +0.52% +0.55%] index_select spread : Elapsed 0.038 ms (3.820 ms / 100) 3.829 -> 3.834 ( +0.13%) [ +0.03% +0.16% +0.00% / +0.13% +0.65% +0.73%] index_select strided 3 : Elapsed 0.038 ms (3.830 ms / 100) 3.832 -> 3.837 ( +0.13%) [ +0.05% +0.05% +0.00% / +0.13% +0.60% +0.55%] index_select strided 5 : Elapsed 0.038 ms (3.834 ms / 100) 3.812 -> 3.812 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.71% +0.71%] index_select strided 7 : Elapsed 0.038 ms (3.812 ms / 100) 3.823 -> 3.827 ( +0.10%) [ +0.00% +0.13% +0.03% / +0.10% +0.71% +0.81%] index_select strided 8 : Elapsed 0.038 ms (3.823 ms / 100) 3.822 -> 3.821 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.73% +0.68%] index_select strided 16 : Elapsed 0.038 ms (3.822 ms / 100) 3.833 -> 3.835 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.73% +0.78%] index_select random : Elapsed 0.038 ms (3.835 ms / 100) 3.822 -> 3.822 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.84% +0.81%] index_select random_sorted : Elapsed 0.038 ms (3.822 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.92% +0.68%] index_select perm : Elapsed 0.038 ms (3.815 ms / 100) 3.829 -> 3.831 ( +0.05%) [ +0.08% +0.08% +0.00% / +0.05% +0.76% +0.73%] index_select perm_sorted : Elapsed 0.038 ms (3.832 ms / 100) B = [40, 5, 16, 4] (stride (20, 1, 800, 5)) A = [40, 5, 20, 4] (stride (5, 1, 200, 4000)) dim = 2 3.573 -> 3.572 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.92% +0.90%] index_select const : Elapsed 0.036 ms (3.574 ms / 100) 3.532 -> 3.533 ( +0.03%) [ +0.11% +0.11% +0.00% / +0.03% +0.71% +0.71%] index_select wrap : Elapsed 0.035 ms (3.536 ms / 100) 3.536 -> 3.538 ( +0.06%) [ +0.00% +0.00% +0.03% / +0.06% +0.88% +0.71%] index_select linear : Elapsed 0.035 ms (3.536 ms / 100) 3.552 -> 3.552 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.70% +0.76%] index_select reverse : Elapsed 0.036 ms (3.553 ms / 100) 3.597 -> 3.597 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.72%] index_select skip64 : Elapsed 0.036 ms (3.597 ms / 100) 3.582 -> 3.582 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.56% +0.56%] index_select skip256 : Elapsed 0.036 ms (3.583 ms / 100) 3.534 -> 3.531 ( -0.08%) [ +0.03% +0.00% +0.00% / -0.08% +0.65% +0.62%] index_select spread : Elapsed 0.035 ms (3.535 ms / 100) 3.532 -> 3.536 ( +0.11%) [ +0.11% +0.00% +0.14% / +0.11% +0.76% +0.62%] index_select strided 3 : Elapsed 0.035 ms (3.536 ms / 100) 3.553 -> 3.556 ( +0.08%) [ +0.06% +0.00% +0.17% / +0.08% +0.62% +0.68%] index_select strided 5 : Elapsed 0.036 ms (3.555 ms / 100) 3.550 -> 3.555 ( +0.14%) [ +0.17% +0.08% +0.00% / +0.14% +0.90% +0.85%] index_select strided 7 : Elapsed 0.036 ms (3.556 ms / 100) 3.541 -> 3.541 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.65% +0.65%] index_select strided 8 : Elapsed 0.035 ms (3.543 ms / 100) 3.547 -> 3.547 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.70% +0.68%] index_select strided 16 : Elapsed 0.035 ms (3.548 ms / 100) 3.552 -> 3.561 ( +0.25%) [ +0.17% +0.00% +0.14% / +0.25% +0.62% +0.65%] index_select random : Elapsed 0.036 ms (3.558 ms / 100) 3.541 -> 3.542 ( +0.03%) [ +0.11% +0.00% +0.00% / +0.03% +0.56% +0.54%] index_select random_sorted : Elapsed 0.035 ms (3.545 ms / 100) 3.534 -> 3.535 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.45% +0.45%] index_select perm : Elapsed 0.035 ms (3.535 ms / 100) 3.542 -> 3.542 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.68% +0.65%] index_select perm_sorted : Elapsed 0.035 ms (3.545 ms / 100) B = [40, 5, 16, 4] (stride (80, 16, 1, 3200)) A = [40, 5, 20, 4] (stride (20, 1, 800, 5)) dim = 2 3.530 -> 3.533 ( +0.08%) [ +0.00% +0.08% +0.03% / +0.08% +0.20% +0.28%] index_select const : Elapsed 0.035 ms (3.530 ms / 100) 3.518 -> 3.517 ( -0.03%) [ +0.09% +0.00% +0.06% / -0.03% +0.48% +0.51%] index_select wrap : Elapsed 0.035 ms (3.521 ms / 100) 3.518 -> 3.517 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.45% +0.48%] index_select linear : Elapsed 0.035 ms (3.519 ms / 100) 3.511 -> 3.512 ( +0.03%) [ +0.09% +0.03% +0.00% / +0.03% +0.71% +0.63%] index_select reverse : Elapsed 0.035 ms (3.514 ms / 100) 3.530 -> 3.532 ( +0.06%) [ +0.11% +0.06% +0.00% / +0.06% +0.71% +0.65%] index_select skip64 : Elapsed 0.035 ms (3.534 ms / 100) 3.522 -> 3.533 ( +0.31%) [ +0.06% +0.00% +0.26% / +0.31% +0.65% +0.65%] index_select skip256 : Elapsed 0.035 ms (3.524 ms / 100) 3.517 -> 3.520 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.51% +0.51%] index_select spread : Elapsed 0.035 ms (3.517 ms / 100) 3.530 -> 3.530 ( +0.00%) [ +0.14% +0.06% +0.00% / +0.00% +0.45% +0.42%] index_select strided 3 : Elapsed 0.035 ms (3.535 ms / 100) 3.523 -> 3.529 ( +0.17%) [ +0.31% +0.31% +0.00% / +0.17% +0.57% +0.51%] index_select strided 5 : Elapsed 0.035 ms (3.534 ms / 100) 3.512 -> 3.514 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.68% +0.68%] index_select strided 7 : Elapsed 0.035 ms (3.513 ms / 100) 3.510 -> 3.510 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.77% +0.74%] index_select strided 8 : Elapsed 0.035 ms (3.511 ms / 100) 3.507 -> 3.508 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.80% +0.74%] index_select strided 16 : Elapsed 0.035 ms (3.508 ms / 100) 3.526 -> 3.523 ( -0.09%) [ +0.03% +0.00% +0.00% / -0.09% +0.57% +0.54%] index_select random : Elapsed 0.035 ms (3.527 ms / 100) 3.515 -> 3.514 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.60% +0.60%] index_select random_sorted : Elapsed 0.035 ms (3.515 ms / 100) 3.519 -> 3.525 ( +0.17%) [ +0.28% +0.14% +0.00% / +0.17% +0.68% +0.65%] index_select perm : Elapsed 0.035 ms (3.529 ms / 100) 3.522 -> 3.523 ( +0.03%) [ +0.17% +0.03% +0.00% / +0.03% +0.68% +0.68%] index_select perm_sorted : Elapsed 0.035 ms (3.528 ms / 100) B = [40, 5, 16, 4] (stride (16, 640, 1, 3200)) A = [40, 5, 20, 4] (stride (20, 3200, 1, 800)) dim = 2 4.242 -> 4.262 ( +0.47%) [ +0.35% +0.40% +0.00% / +0.47% +1.08% +0.97%] index_select const : Elapsed 0.043 ms (4.257 ms / 100) 4.205 -> 4.211 ( +0.14%) [ +0.00% +0.12% +0.12% / +0.14% +0.78% +0.67%] index_select wrap : Elapsed 0.042 ms (4.205 ms / 100) 4.206 -> 4.212 ( +0.14%) [ +0.14% +0.00% +0.05% / +0.14% +0.83% +0.78%] index_select linear : Elapsed 0.042 ms (4.212 ms / 100) 4.207 -> 4.211 ( +0.10%) [ +0.00% +0.07% +0.05% / +0.10% +0.74% +0.86%] index_select reverse : Elapsed 0.042 ms (4.207 ms / 100) 4.240 -> 4.250 ( +0.24%) [ +0.26% +0.12% +0.00% / +0.24% +0.68% +0.73%] index_select skip64 : Elapsed 0.043 ms (4.251 ms / 100) 4.246 -> 4.257 ( +0.26%) [ +0.26% +0.00% +0.14% / +0.26% +0.87% +0.73%] index_select skip256 : Elapsed 0.043 ms (4.257 ms / 100) 4.215 -> 4.218 ( +0.07%) [ +0.09% +0.00% +0.09% / +0.07% +0.76% +0.71%] index_select spread : Elapsed 0.042 ms (4.219 ms / 100) 4.225 -> 4.227 ( +0.05%) [ +0.14% +0.00% +0.12% / +0.05% +0.73% +0.62%] index_select strided 3 : Elapsed 0.042 ms (4.231 ms / 100) 4.219 -> 4.227 ( +0.19%) [ +0.02% +0.21% +0.00% / +0.19% +0.73% +0.64%] index_select strided 5 : Elapsed 0.042 ms (4.220 ms / 100) 4.204 -> 4.210 ( +0.14%) [ +0.00% +0.21% +0.02% / +0.14% +0.76% +0.88%] index_select strided 7 : Elapsed 0.042 ms (4.204 ms / 100) 4.208 -> 4.211 ( +0.07%) [ +0.05% +0.10% +0.00% / +0.07% +0.71% +0.52%] index_select strided 8 : Elapsed 0.042 ms (4.210 ms / 100) 4.229 -> 4.236 ( +0.17%) [ +0.35% +0.00% +0.21% / +0.17% +0.64% +0.92%] index_select strided 16 : Elapsed 0.042 ms (4.244 ms / 100) 4.210 -> 4.213 ( +0.07%) [ +0.14% +0.02% +0.00% / +0.07% +0.59% +0.57%] index_select random : Elapsed 0.042 ms (4.216 ms / 100) 4.225 -> 4.228 ( +0.07%) [ +0.07% +0.17% +0.00% / +0.07% +0.54% +0.50%] index_select random_sorted : Elapsed 0.042 ms (4.228 ms / 100) 4.223 -> 4.222 ( -0.02%) [ +0.05% +0.00% +0.09% / -0.02% +0.59% +0.54%] index_select perm : Elapsed 0.042 ms (4.225 ms / 100) 4.226 -> 4.221 ( -0.12%) [ +0.00% +0.02% +0.05% / -0.12% +0.45% +0.54%] index_select perm_sorted : Elapsed 0.042 ms (4.226 ms / 100) out_shape = [40, 5, 20, 16] in_shape = [40, 5, 20, 4] idx_dim = 3 B = [40, 5, 20, 16] (stride (1600, 1, 80, 5)) A = [40, 5, 20, 4] (stride (20, 1, 800, 5)) dim = 3 2.579 -> 2.585 ( +0.23%) [ +0.27% +0.23% +0.00% / +0.23% +0.58% +0.54%] index_add_ linear : Elapsed 0.026 ms (2.586 ms / 100) 2.553 -> 2.556 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.47% +0.59%] index_copy_ linear : Elapsed 0.026 ms (2.554 ms / 100) 2.594 -> 2.591 ( -0.12%) [ +0.04% +0.00% +0.19% / -0.12% +0.39% +0.46%] index_add_ reverse : Elapsed 0.026 ms (2.595 ms / 100) 2.564 -> 2.565 ( +0.04%) [ +0.00% +0.04% +0.16% / +0.04% +0.39% +0.35%] index_copy_ reverse : Elapsed 0.026 ms (2.564 ms / 100) 2.608 -> 2.612 ( +0.15%) [ +0.12% +0.00% +0.08% / +0.15% +0.61% +0.46%] index_add_ spread : Elapsed 0.026 ms (2.611 ms / 100) 2.604 -> 2.606 ( +0.08%) [ +0.19% +0.00% +0.04% / +0.08% +0.54% +0.38%] index_copy_ spread : Elapsed 0.026 ms (2.609 ms / 100) 2.607 -> 2.608 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.35% +0.58%] index_add_ strided 3 : Elapsed 0.026 ms (2.609 ms / 100) 2.622 -> 2.621 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.31% +0.65%] index_copy_ strided 3 : Elapsed 0.026 ms (2.623 ms / 100) 2.576 -> 2.583 ( +0.27%) [ +0.16% +0.12% +0.00% / +0.27% +0.27% +0.27%] index_add_ strided 5 : Elapsed 0.026 ms (2.580 ms / 100) 2.562 -> 2.568 ( +0.23%) [ +0.20% +0.23% +0.00% / +0.23% +0.43% +0.35%] index_copy_ strided 5 : Elapsed 0.026 ms (2.567 ms / 100) 2.590 -> 2.588 ( -0.08%) [ +0.00% +0.12% +0.08% / -0.08% +0.27% +0.19%] index_add_ strided 7 : Elapsed 0.026 ms (2.590 ms / 100) 2.590 -> 2.590 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.15% +0.27%] index_copy_ strided 7 : Elapsed 0.026 ms (2.590 ms / 100) 2.581 -> 2.581 ( +0.00%) [ +0.31% +0.04% +0.00% / +0.00% +0.15% +0.19%] index_add_ perm : Elapsed 0.026 ms (2.589 ms / 100) 2.565 -> 2.562 ( -0.12%) [ +0.27% +0.00% +0.12% / -0.12% +0.39% +0.51%] index_copy_ perm : Elapsed 0.026 ms (2.572 ms / 100) 2.575 -> 2.578 ( +0.12%) [ +0.23% +0.00% +0.04% / +0.12% +0.31% +0.31%] index_add_ perm_sorted : Elapsed 0.026 ms (2.581 ms / 100) 2.558 -> 2.565 ( +0.27%) [ +0.27% +0.00% +0.35% / +0.27% +0.31% +0.47%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.565 ms / 100) 5.838 -> 5.839 ( +0.02%) [ +0.00% +0.10% +0.15% / +0.02% +0.46% +0.67%] index_select const : Elapsed 0.058 ms (5.838 ms / 100) 5.857 -> 5.858 ( +0.02%) [ +0.14% +0.15% +0.00% / +0.02% +0.58% +0.56%] index_select wrap : Elapsed 0.059 ms (5.865 ms / 100) 5.866 -> 5.871 ( +0.09%) [ +0.02% +0.00% +0.02% / +0.09% +0.39% +0.41%] index_select linear : Elapsed 0.059 ms (5.867 ms / 100) 5.864 -> 5.862 ( -0.03%) [ +0.02% +0.00% +0.12% / -0.03% +0.41% +0.55%] index_select reverse : Elapsed 0.059 ms (5.865 ms / 100) 5.850 -> 5.845 ( -0.09%) [ +0.43% +0.00% +0.39% / -0.09% +0.31% +0.70%] index_select skip64 : Elapsed 0.059 ms (5.875 ms / 100) 5.848 -> 5.856 ( +0.14%) [ +0.24% +0.38% +0.00% / +0.14% +0.63% +0.43%] index_select skip256 : Elapsed 0.059 ms (5.862 ms / 100) 5.867 -> 5.870 ( +0.05%) [ +0.29% +0.20% +0.00% / +0.05% +0.17% +0.26%] index_select spread : Elapsed 0.059 ms (5.884 ms / 100) 5.857 -> 5.867 ( +0.17%) [ +0.12% +0.09% +0.00% / +0.17% +0.43% +0.50%] index_select strided 3 : Elapsed 0.059 ms (5.864 ms / 100) 5.855 -> 5.856 ( +0.02%) [ +0.03% +0.00% +0.05% / +0.02% +0.39% +0.31%] index_select random : Elapsed 0.059 ms (5.857 ms / 100) 5.855 -> 5.865 ( +0.17%) [ +0.07% +0.12% +0.00% / +0.17% +0.51% +0.43%] index_select random_sorted : Elapsed 0.059 ms (5.859 ms / 100) B = [40, 5, 20, 16] (stride (1600, 20, 1, 100)) A = [40, 5, 20, 4] (stride (400, 80, 4, 1)) dim = 3 2.116 -> 2.121 ( +0.24%) [ +0.14% +0.00% +0.33% / +0.24% +0.57% +0.33%] index_add_ linear : Elapsed 0.021 ms (2.119 ms / 100) 2.075 -> 2.077 ( +0.10%) [ +0.19% +0.05% +0.00% / +0.10% +0.39% +0.24%] index_copy_ linear : Elapsed 0.021 ms (2.079 ms / 100) 2.123 -> 2.121 ( -0.09%) [ +0.00% +0.05% +0.00% / -0.09% +0.05% +0.09%] index_add_ reverse : Elapsed 0.021 ms (2.123 ms / 100) 2.078 -> 2.077 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.10% +0.14%] index_copy_ reverse : Elapsed 0.021 ms (2.079 ms / 100) 2.119 -> 2.121 ( +0.09%) [ +0.00% +0.14% +0.05% / +0.09% +0.24% +0.09%] index_add_ spread : Elapsed 0.021 ms (2.119 ms / 100) 2.078 -> 2.078 ( +0.00%) [ +0.00% +0.05% +0.19% / +0.29% +0.00% +0.14%] index_copy_ spread : Elapsed 0.021 ms (2.078 ms / 100) 2.124 -> 2.125 ( +0.05%) [ +0.09% +0.00% +0.09% / +0.05% +0.42% +0.19%] index_add_ strided 3 : Elapsed 0.021 ms (2.126 ms / 100) 2.082 -> 2.084 ( +0.10%) [ +0.00% +0.24% +0.19% / +0.10% +0.24% +0.29%] index_copy_ strided 3 : Elapsed 0.021 ms (2.082 ms / 100) 2.112 -> 2.117 ( +0.24%) [ +0.24% +0.28% +0.00% / +0.24% +0.47% +0.57%] index_add_ strided 5 : Elapsed 0.021 ms (2.117 ms / 100) 2.076 -> 2.076 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.29% +0.43%] index_copy_ strided 5 : Elapsed 0.021 ms (2.077 ms / 100) 2.117 -> 2.117 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.28% +0.33%] index_add_ strided 7 : Elapsed 0.021 ms (2.117 ms / 100) 2.078 -> 2.079 ( +0.05%) [ +0.14% +0.10% +0.00% / +0.05% +0.19% +0.24%] index_copy_ strided 7 : Elapsed 0.021 ms (2.081 ms / 100) 2.120 -> 2.120 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.00% +0.28% +0.24%] index_add_ perm : Elapsed 0.021 ms (2.122 ms / 100) 2.082 -> 2.078 ( -0.19%) [ +0.00% +0.38% +0.05% / -0.19% +0.05% -0.05%] index_copy_ perm : Elapsed 0.021 ms (2.082 ms / 100) 2.119 -> 2.119 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.19% +0.09%] index_add_ perm_sorted : Elapsed 0.021 ms (2.119 ms / 100) 2.074 -> 2.073 ( -0.05%) [ +0.05% +0.19% +0.00% / -0.05% +0.14% +0.34%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.075 ms / 100) 4.184 -> 4.180 ( -0.10%) [ +0.00% +0.02% +0.02% / -0.10% +0.33% +0.26%] index_select const : Elapsed 0.042 ms (4.184 ms / 100) 4.184 -> 4.186 ( +0.05%) [ +0.00% +0.10% +0.07% / +0.05% +0.17% +0.17%] index_select wrap : Elapsed 0.042 ms (4.184 ms / 100) 4.164 -> 4.167 ( +0.07%) [ +0.02% +0.05% +0.00% / +0.07% +0.34% +0.29%] index_select linear : Elapsed 0.042 ms (4.165 ms / 100) 4.173 -> 4.175 ( +0.05%) [ +0.12% +0.14% +0.00% / +0.05% +0.65% +0.36%] index_select reverse : Elapsed 0.042 ms (4.178 ms / 100) 4.162 -> 4.165 ( +0.07%) [ +0.00% +0.10% +0.02% / +0.07% +0.24% +0.19%] index_select skip64 : Elapsed 0.042 ms (4.162 ms / 100) 4.162 -> 4.163 ( +0.02%) [ +0.12% +0.05% +0.00% / +0.02% +0.41% +0.48%] index_select skip256 : Elapsed 0.042 ms (4.167 ms / 100) 4.179 -> 4.184 ( +0.12%) [ +0.00% +0.26% +0.14% / +0.12% +0.31% +0.41%] index_select spread : Elapsed 0.042 ms (4.179 ms / 100) 4.176 -> 4.179 ( +0.07%) [ +0.07% +0.10% +0.00% / +0.07% +0.38% +0.22%] index_select strided 3 : Elapsed 0.042 ms (4.179 ms / 100) 4.161 -> 4.163 ( +0.05%) [ +0.05% +0.07% +0.00% / +0.05% +0.24% +0.12%] index_select random : Elapsed 0.042 ms (4.163 ms / 100) 4.173 -> 4.175 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.43% +0.48%] index_select random_sorted : Elapsed 0.042 ms (4.176 ms / 100) B = [40, 5, 20, 16] (stride (1600, 1, 5, 100)) A = [40, 5, 20, 4] (stride (400, 1, 20, 5)) dim = 3 2.526 -> 2.528 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.51% +0.87%] index_add_ linear : Elapsed 0.025 ms (2.526 ms / 100) 2.474 -> 2.471 ( -0.12%) [ +0.00% +0.00% +0.08% / -0.12% +0.61% +1.17%] index_copy_ linear : Elapsed 0.025 ms (2.474 ms / 100) 2.531 -> 2.525 ( -0.24%) [ +0.16% +0.04% +0.00% / -0.24% +0.20% +0.40%] index_add_ reverse : Elapsed 0.025 ms (2.535 ms / 100) 2.476 -> 2.470 ( -0.24%) [ +0.00% +0.24% +0.16% / -0.24% +0.28% +0.44%] index_copy_ reverse : Elapsed 0.025 ms (2.476 ms / 100) 2.531 -> 2.534 ( +0.12%) [ +0.00% +0.04% +0.04% / +0.12% +0.51% +0.43%] index_add_ spread : Elapsed 0.025 ms (2.531 ms / 100) 2.479 -> 2.484 ( +0.20%) [ +0.00% +0.00% +0.08% / +0.20% +0.56% +0.48%] index_copy_ spread : Elapsed 0.025 ms (2.479 ms / 100) 2.518 -> 2.529 ( +0.44%) [ +0.40% +0.00% +0.16% / +0.44% +0.75% +0.79%] index_add_ strided 3 : Elapsed 0.025 ms (2.528 ms / 100) 2.475 -> 2.479 ( +0.16%) [ +0.40% +0.00% +0.04% / +0.16% +0.65% +0.65%] index_copy_ strided 3 : Elapsed 0.025 ms (2.485 ms / 100) 2.529 -> 2.531 ( +0.08%) [ +0.00% +0.16% +0.36% / +0.08% +0.24% +0.28%] index_add_ strided 5 : Elapsed 0.025 ms (2.529 ms / 100) 2.475 -> 2.484 ( +0.36%) [ +0.00% +0.44% +0.48% / +0.40% +0.36% +0.48%] index_copy_ strided 5 : Elapsed 0.025 ms (2.475 ms / 100) 2.528 -> 2.532 ( +0.16%) [ +0.12% +0.20% +0.00% / +0.16% +0.63% +0.55%] index_add_ strided 7 : Elapsed 0.025 ms (2.531 ms / 100) 2.479 -> 2.481 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.52% +0.48%] index_copy_ strided 7 : Elapsed 0.025 ms (2.480 ms / 100) 2.528 -> 2.532 ( +0.16%) [ +0.28% +0.24% +0.00% / +0.16% +0.32% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.535 ms / 100) 2.481 -> 2.488 ( +0.28%) [ +0.08% +0.04% +0.00% / +0.32% +0.40% +0.28%] index_copy_ perm : Elapsed 0.025 ms (2.483 ms / 100) 2.530 -> 2.532 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.20% +0.12%] index_add_ perm_sorted : Elapsed 0.025 ms (2.530 ms / 100) 2.481 -> 2.484 ( +0.12%) [ +0.20% +0.08% +0.00% / +0.12% +0.36% +0.24%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.486 ms / 100) 5.553 -> 5.565 ( +0.22%) [ +0.09% +0.00% +0.11% / +0.22% +0.85% +1.24%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.600 -> 5.602 ( +0.04%) [ +0.27% +0.18% +0.00% / +0.04% +0.34% +0.34%] index_select wrap : Elapsed 0.056 ms (5.615 ms / 100) 5.613 -> 5.616 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.68% +0.64%] index_select linear : Elapsed 0.056 ms (5.618 ms / 100) 5.599 -> 5.635 ( +0.64%) [ +0.00% +0.25% +0.20% / +0.64% +0.91% +0.89%] index_select reverse : Elapsed 0.056 ms (5.599 ms / 100) 5.586 -> 5.586 ( +0.00%) [ +0.07% +0.00% +0.05% / +0.00% +0.36% +0.18%] index_select skip64 : Elapsed 0.056 ms (5.590 ms / 100) 5.591 -> 5.601 ( +0.18%) [ +0.00% +0.25% +0.14% / +0.18% +0.30% +0.54%] index_select skip256 : Elapsed 0.056 ms (5.591 ms / 100) 5.608 -> 5.614 ( +0.11%) [ +0.18% +0.14% +0.00% / +0.11% +0.20% +0.14%] index_select spread : Elapsed 0.056 ms (5.618 ms / 100) 5.594 -> 5.610 ( +0.29%) [ +0.18% +0.25% +0.00% / +0.29% +0.63% +0.77%] index_select strided 3 : Elapsed 0.056 ms (5.604 ms / 100) 5.609 -> 5.617 ( +0.14%) [ +0.00% +0.11% +0.27% / +0.14% +0.48% +0.68%] index_select random : Elapsed 0.056 ms (5.609 ms / 100) 5.579 -> 5.584 ( +0.09%) [ +0.11% +0.00% +0.16% / +0.09% +0.50% +0.59%] index_select random_sorted : Elapsed 0.056 ms (5.585 ms / 100) B = [40, 5, 20, 16] (stride (16, 12800, 640, 1)) A = [40, 5, 20, 4] (stride (400, 20, 1, 100)) dim = 3 1.289 -> 1.290 ( +0.08%) [ +0.16% +0.31% +0.00% / +0.47% +0.31% +0.08%] index_add_ linear : Elapsed 0.013 ms (1.291 ms / 100) 1.242 -> 1.244 ( +0.16%) [ +0.32% +0.00% +0.08% / +0.16% +0.81% +0.89%] index_copy_ linear : Elapsed 0.012 ms (1.246 ms / 100) 1.287 -> 1.283 ( -0.31%) [ +0.31% +0.00% +0.16% / +0.00% +0.08% -0.31%] index_add_ reverse : Elapsed 0.013 ms (1.291 ms / 100) 1.240 -> 1.243 ( +0.24%) [ +0.08% +0.00% +0.08% / +0.24% +0.48% +0.48%] index_copy_ reverse : Elapsed 0.012 ms (1.241 ms / 100) 1.330 -> 1.334 ( +0.30%) [ +0.08% +0.00% +0.08% / +0.30% +0.60% +0.45%] index_add_ spread : Elapsed 0.013 ms (1.331 ms / 100) 1.308 -> 1.316 ( +0.61%) [ +0.00% +0.00% +0.46% / +0.61% +0.69% +0.76%] index_copy_ spread : Elapsed 0.013 ms (1.308 ms / 100) 1.327 -> 1.329 ( +0.15%) [ +0.00% +0.23% +0.00% / +0.45% +0.23% +0.15%] index_add_ strided 3 : Elapsed 0.013 ms (1.327 ms / 100) 1.307 -> 1.305 ( -0.15%) [ +0.00% +0.38% +0.00% / -0.15% +0.99% +1.15%] index_copy_ strided 3 : Elapsed 0.013 ms (1.307 ms / 100) 1.333 -> 1.329 ( -0.30%) [ +0.00% +0.00% +0.00% / -0.30% +0.30% +0.38%] index_add_ strided 5 : Elapsed 0.013 ms (1.333 ms / 100) 1.309 -> 1.310 ( +0.08%) [ +0.00% +0.15% +0.46% / +0.08% +0.53% +0.61%] index_copy_ strided 5 : Elapsed 0.013 ms (1.309 ms / 100) 1.333 -> 1.332 ( -0.08%) [ +0.00% +0.23% +0.23% / +0.30% +0.08% -0.08%] index_add_ strided 7 : Elapsed 0.013 ms (1.333 ms / 100) 1.310 -> 1.312 ( +0.15%) [ +0.08% +0.00% +0.23% / +0.15% +0.84% +0.38%] index_copy_ strided 7 : Elapsed 0.013 ms (1.311 ms / 100) 1.330 -> 1.327 ( -0.23%) [ +0.45% +0.00% +0.60% / -0.23% +0.53% +0.38%] index_add_ perm : Elapsed 0.013 ms (1.336 ms / 100) 1.309 -> 1.317 ( +0.61%) [ +0.00% +0.15% +0.15% / +0.61% +0.84% +0.69%] index_copy_ perm : Elapsed 0.013 ms (1.309 ms / 100) 1.333 -> 1.330 ( -0.23%) [ +0.00% +0.00% +0.00% / -0.23% +0.15% -0.23%] index_add_ perm_sorted : Elapsed 0.013 ms (1.333 ms / 100) 1.306 -> 1.313 ( +0.54%) [ +0.38% +0.00% +0.69% / +0.54% +0.84% +0.61%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.311 ms / 100) 2.051 -> 2.054 ( +0.15%) [ +0.10% +0.00% +0.24% / +0.15% +1.02% +0.98%] index_select const : Elapsed 0.021 ms (2.053 ms / 100) 2.104 -> 2.113 ( +0.43%) [ +0.43% +0.38% +0.00% / +0.43% +0.95% +0.62%] index_select wrap : Elapsed 0.021 ms (2.113 ms / 100) 2.114 -> 2.111 ( -0.14%) [ +0.00% +0.05% +0.00% / -0.14% +0.19% +0.47%] index_select linear : Elapsed 0.021 ms (2.114 ms / 100) 2.087 -> 2.095 ( +0.38%) [ +0.14% +0.19% +0.00% / +0.38% +0.48% +0.72%] index_select reverse : Elapsed 0.021 ms (2.090 ms / 100) 2.050 -> 2.056 ( +0.29%) [ +0.00% +0.15% +0.10% / +0.29% +0.68% +0.59%] index_select skip64 : Elapsed 0.020 ms (2.050 ms / 100) 2.050 -> 2.046 ( -0.20%) [ +0.00% +0.15% +0.05% / -0.20% +0.54% +0.54%] index_select skip256 : Elapsed 0.021 ms (2.050 ms / 100) 2.091 -> 2.091 ( +0.00%) [ +0.00% +0.10% +0.19% / +0.00% +0.24% +0.57%] index_select spread : Elapsed 0.021 ms (2.091 ms / 100) 2.109 -> 2.110 ( +0.05%) [ +0.00% +0.43% +0.43% / +0.05% +0.81% +1.00%] index_select strided 3 : Elapsed 0.021 ms (2.109 ms / 100) 2.081 -> 2.086 ( +0.24%) [ +0.38% +0.48% +0.00% / +0.24% +1.15% +1.11%] index_select random : Elapsed 0.021 ms (2.089 ms / 100) 2.063 -> 2.071 ( +0.39%) [ +0.24% +0.24% +0.00% / +0.39% +1.26% +1.07%] index_select random_sorted : Elapsed 0.021 ms (2.068 ms / 100) B = [40, 5, 20, 16] (stride (20, 12800, 1, 800)) A = [40, 5, 20, 4] (stride (1, 800, 40, 4000)) dim = 3 2.484 -> 2.487 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.12% +0.36% +0.28%] index_add_ linear : Elapsed 0.025 ms (2.487 ms / 100) 2.421 -> 2.427 ( +0.25%) [ +0.00% +0.21% +0.17% / +0.25% +0.41% +0.37%] index_copy_ linear : Elapsed 0.024 ms (2.421 ms / 100) 2.481 -> 2.479 ( -0.08%) [ +0.00% +0.16% +0.08% / -0.08% +0.52% +0.40%] index_add_ reverse : Elapsed 0.025 ms (2.481 ms / 100) 2.418 -> 2.420 ( +0.08%) [ +0.12% +0.17% +0.00% / +0.08% +0.45% +0.45%] index_copy_ reverse : Elapsed 0.024 ms (2.421 ms / 100) 2.480 -> 2.478 ( -0.08%) [ +0.24% +0.20% +0.00% / -0.08% +0.60% +0.48%] index_add_ spread : Elapsed 0.025 ms (2.486 ms / 100) 2.421 -> 2.421 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.45% +0.37%] index_copy_ spread : Elapsed 0.024 ms (2.424 ms / 100) 2.479 -> 2.486 ( +0.28%) [ +0.12% +0.00% +0.08% / +0.28% +0.56% +0.40%] index_add_ strided 3 : Elapsed 0.025 ms (2.482 ms / 100) 2.420 -> 2.425 ( +0.21%) [ +0.08% +0.00% +0.12% / +0.21% +0.33% +0.29%] index_copy_ strided 3 : Elapsed 0.024 ms (2.422 ms / 100) 2.484 -> 2.483 ( -0.04%) [ +0.16% +0.12% +0.00% / -0.04% +0.20% +0.00%] index_add_ strided 5 : Elapsed 0.025 ms (2.488 ms / 100) 2.424 -> 2.427 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.17% +0.12%] index_copy_ strided 5 : Elapsed 0.024 ms (2.424 ms / 100) 2.486 -> 2.486 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.00% +0.24% +0.16%] index_add_ strided 7 : Elapsed 0.025 ms (2.488 ms / 100) 2.422 -> 2.425 ( +0.12%) [ +0.21% +0.00% +0.17% / +0.12% +0.33% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.427 ms / 100) 2.484 -> 2.484 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.00% +0.16%] index_add_ perm : Elapsed 0.025 ms (2.484 ms / 100) 2.421 -> 2.421 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.12% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.423 ms / 100) 2.486 -> 2.488 ( +0.08%) [ +0.12% +0.16% +0.00% / +0.12% +0.40% +0.08%] index_add_ perm_sorted : Elapsed 0.025 ms (2.489 ms / 100) 2.421 -> 2.426 ( +0.21%) [ +0.21% +0.00% +0.21% / +0.21% +0.25% +0.29%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.426 ms / 100) 5.511 -> 5.514 ( +0.05%) [ +0.07% +0.09% +0.00% / +0.05% +0.34% +0.40%] index_select const : Elapsed 0.055 ms (5.515 ms / 100) 5.495 -> 5.501 ( +0.11%) [ +0.22% +0.00% +0.15% / +0.11% +0.53% +0.55%] index_select wrap : Elapsed 0.055 ms (5.507 ms / 100) 5.511 -> 5.515 ( +0.07%) [ +0.09% +0.07% +0.00% / +0.07% +0.47% +0.51%] index_select linear : Elapsed 0.055 ms (5.516 ms / 100) 5.496 -> 5.492 ( -0.07%) [ +0.00% +0.09% +0.02% / -0.07% +0.20% +0.33%] index_select reverse : Elapsed 0.055 ms (5.496 ms / 100) 5.502 -> 5.507 ( +0.09%) [ +0.00% +0.18% +0.05% / +0.09% +0.31% +0.38%] index_select skip64 : Elapsed 0.055 ms (5.502 ms / 100) 5.513 -> 5.516 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.34% +0.36%] index_select skip256 : Elapsed 0.055 ms (5.516 ms / 100) 5.499 -> 5.504 ( +0.09%) [ +0.00% +0.09% +0.02% / +0.09% +0.22% +0.27%] index_select spread : Elapsed 0.055 ms (5.499 ms / 100) 5.512 -> 5.516 ( +0.07%) [ +0.07% +0.00% +0.09% / +0.07% +0.31% +0.31%] index_select strided 3 : Elapsed 0.055 ms (5.516 ms / 100) 5.501 -> 5.498 ( -0.05%) [ +0.18% +0.15% +0.00% / -0.05% +0.24% +0.25%] index_select random : Elapsed 0.055 ms (5.511 ms / 100) 5.507 -> 5.509 ( +0.04%) [ +0.00% +0.00% +0.09% / +0.04% +0.33% +0.42%] index_select random_sorted : Elapsed 0.055 ms (5.507 ms / 100) B = [40, 5, 20, 16] (stride (1, 12800, 40, 800)) A = [40, 5, 20, 4] (stride (1, 40, 200, 4000)) dim = 3 2.610 -> 2.611 ( +0.04%) [ +0.15% +0.08% +0.00% / +0.04% +0.27% +0.27%] index_add_ linear : Elapsed 0.026 ms (2.614 ms / 100) 2.561 -> 2.563 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.51% +0.39%] index_copy_ linear : Elapsed 0.026 ms (2.562 ms / 100) 2.617 -> 2.614 ( -0.11%) [ +0.00% +0.04% +0.00% / +0.00% -0.11% -0.11%] index_add_ reverse : Elapsed 0.026 ms (2.617 ms / 100) 2.564 -> 2.565 ( +0.04%) [ +0.23% +0.00% +0.04% / +0.31% +0.04% +0.04%] index_copy_ reverse : Elapsed 0.026 ms (2.570 ms / 100) 2.613 -> 2.611 ( -0.08%) [ +0.04% +0.00% +0.15% / -0.08% +0.15% -0.04%] index_add_ spread : Elapsed 0.026 ms (2.614 ms / 100) 2.565 -> 2.566 ( +0.04%) [ +0.19% +0.00% +0.08% / +0.04% +0.08% +0.04%] index_copy_ spread : Elapsed 0.026 ms (2.570 ms / 100) 2.612 -> 2.612 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.04% +0.08% +0.00%] index_add_ strided 3 : Elapsed 0.026 ms (2.614 ms / 100) 2.563 -> 2.564 ( +0.04%) [ +0.00% +0.00% +0.20% / +0.04% +0.04% +0.16%] index_copy_ strided 3 : Elapsed 0.026 ms (2.563 ms / 100) 2.616 -> 2.612 ( -0.15%) [ +0.00% +0.04% +0.08% / +0.11% -0.11% -0.15%] index_add_ strided 5 : Elapsed 0.026 ms (2.616 ms / 100) 2.565 -> 2.564 ( -0.04%) [ +0.04% +0.00% +0.00% / +0.04% -0.04% +0.08%] index_copy_ strided 5 : Elapsed 0.026 ms (2.566 ms / 100) 2.610 -> 2.612 ( +0.08%) [ +0.11% +0.00% +0.04% / +0.15% +0.23% +0.08%] index_add_ strided 7 : Elapsed 0.026 ms (2.613 ms / 100) 2.564 -> 2.563 ( -0.04%) [ +0.12% +0.00% +0.08% / -0.04% +0.23% +0.16%] index_copy_ strided 7 : Elapsed 0.026 ms (2.567 ms / 100) 2.610 -> 2.610 ( +0.00%) [ +0.11% +0.23% +0.00% / +0.00% +0.19% +0.19%] index_add_ perm : Elapsed 0.026 ms (2.613 ms / 100) 2.565 -> 2.567 ( +0.08%) [ +0.16% +0.00% +0.12% / +0.08% +0.12% +0.16%] index_copy_ perm : Elapsed 0.026 ms (2.569 ms / 100) 2.612 -> 2.613 ( +0.04%) [ +0.11% +0.08% +0.00% / +0.31% +0.08% +0.04%] index_add_ perm_sorted : Elapsed 0.026 ms (2.615 ms / 100) 2.566 -> 2.567 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.08% +0.08%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.568 ms / 100) 6.021 -> 6.024 ( +0.05%) [ +0.08% +0.07% +0.00% / +0.05% +0.43% +0.47%] index_select const : Elapsed 0.060 ms (6.026 ms / 100) 6.016 -> 6.019 ( +0.05%) [ +0.00% +0.07% +0.00% / +0.05% +0.20% +0.08%] index_select wrap : Elapsed 0.060 ms (6.016 ms / 100) 6.030 -> 6.026 ( -0.07%) [ +0.08% +0.02% +0.00% / -0.07% +0.20% +0.18%] index_select linear : Elapsed 0.060 ms (6.035 ms / 100) 6.012 -> 6.005 ( -0.12%) [ +0.05% +0.00% +0.08% / -0.12% +0.32% +0.37%] index_select reverse : Elapsed 0.060 ms (6.015 ms / 100) 6.021 -> 6.021 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.33% +0.27%] index_select skip64 : Elapsed 0.060 ms (6.021 ms / 100) 6.015 -> 6.021 ( +0.10%) [ +0.20% +0.13% +0.00% / +0.10% +0.33% +0.42%] index_select skip256 : Elapsed 0.060 ms (6.027 ms / 100) 6.006 -> 6.008 ( +0.03%) [ +0.00% +0.02% +0.13% / +0.03% +0.32% +0.23%] index_select spread : Elapsed 0.060 ms (6.006 ms / 100) 6.006 -> 6.013 ( +0.12%) [ +0.05% +0.25% +0.00% / +0.12% +0.43% +0.38%] index_select strided 3 : Elapsed 0.060 ms (6.009 ms / 100) 6.002 -> 6.003 ( +0.02%) [ +0.13% +0.10% +0.00% / +0.02% +0.42% +0.28%] index_select random : Elapsed 0.060 ms (6.010 ms / 100) 6.012 -> 6.015 ( +0.05%) [ +0.07% +0.00% +0.10% / +0.05% +0.27% +0.35%] index_select random_sorted : Elapsed 0.060 ms (6.016 ms / 100) B = [40, 5, 20, 16] (stride (5, 1, 3200, 200)) A = [40, 5, 20, 4] (stride (400, 80, 4, 1)) dim = 3 2.127 -> 2.129 ( +0.09%) [ +0.00% +0.14% +0.14% / +0.09% +0.33% +0.42%] index_add_ linear : Elapsed 0.021 ms (2.127 ms / 100) 2.089 -> 2.093 ( +0.19%) [ +0.14% +0.05% +0.00% / +0.19% +0.38% +0.29%] index_copy_ linear : Elapsed 0.021 ms (2.092 ms / 100) 2.125 -> 2.126 ( +0.05%) [ +0.09% +0.05% +0.00% / +0.05% +0.61% +0.52%] index_add_ reverse : Elapsed 0.021 ms (2.127 ms / 100) 2.089 -> 2.087 ( -0.10%) [ +0.00% +0.14% +0.05% / -0.10% +0.38% +0.38%] index_copy_ reverse : Elapsed 0.021 ms (2.089 ms / 100) 2.122 -> 2.123 ( +0.05%) [ +0.00% +0.19% +0.09% / +0.05% +0.19% +0.24%] index_add_ spread : Elapsed 0.021 ms (2.122 ms / 100) 2.082 -> 2.085 ( +0.14%) [ +0.00% +0.19% +0.05% / +0.14% +0.48% +0.38%] index_copy_ spread : Elapsed 0.021 ms (2.082 ms / 100) 2.132 -> 2.136 ( +0.19%) [ +0.19% +0.14% +0.00% / +0.23% +0.19% +0.42%] index_add_ strided 3 : Elapsed 0.021 ms (2.136 ms / 100) 2.091 -> 2.092 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.19% +0.33%] index_copy_ strided 3 : Elapsed 0.021 ms (2.093 ms / 100) 2.141 -> 2.132 ( -0.42%) [ +0.00% +0.14% +0.05% / +0.33% -0.42% -0.05%] index_add_ strided 5 : Elapsed 0.021 ms (2.141 ms / 100) 2.097 -> 2.096 ( -0.05%) [ +0.05% +0.00% +0.24% / +0.10% -0.05% +0.05%] index_copy_ strided 5 : Elapsed 0.021 ms (2.098 ms / 100) 2.126 -> 2.124 ( -0.09%) [ +0.00% +0.09% +0.24% / -0.09% +0.14% +0.05%] index_add_ strided 7 : Elapsed 0.021 ms (2.126 ms / 100) 2.086 -> 2.087 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.14% +0.19%] index_copy_ strided 7 : Elapsed 0.021 ms (2.086 ms / 100) 2.131 -> 2.128 ( -0.14%) [ +0.00% +0.23% +0.05% / -0.14% -0.05% +0.09%] index_add_ perm : Elapsed 0.021 ms (2.131 ms / 100) 2.092 -> 2.091 ( -0.05%) [ +0.10% +0.00% +0.14% / -0.05% +0.24% +0.19%] index_copy_ perm : Elapsed 0.021 ms (2.094 ms / 100) 2.125 -> 2.128 ( +0.14%) [ +0.09% +0.33% +0.00% / +0.14% +0.28% +0.28%] index_add_ perm_sorted : Elapsed 0.021 ms (2.127 ms / 100) 2.089 -> 2.084 ( -0.24%) [ +0.00% +0.14% +0.05% / -0.24% +0.29% +0.29%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.089 ms / 100) 4.228 -> 4.225 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.24% +0.14%] index_select const : Elapsed 0.042 ms (4.229 ms / 100) 4.242 -> 4.239 ( -0.07%) [ +0.00% +0.05% +0.02% / -0.07% +0.19% +0.12%] index_select wrap : Elapsed 0.042 ms (4.242 ms / 100) 4.216 -> 4.223 ( +0.17%) [ +0.17% +0.05% +0.00% / +0.17% +0.43% +0.47%] index_select linear : Elapsed 0.042 ms (4.223 ms / 100) 4.236 -> 4.235 ( -0.02%) [ +0.07% +0.05% +0.00% / -0.02% +0.35% +0.38%] index_select reverse : Elapsed 0.042 ms (4.239 ms / 100) 4.238 -> 4.240 ( +0.05%) [ +0.00% +0.02% +0.09% / +0.05% +0.17% +0.24%] index_select skip64 : Elapsed 0.042 ms (4.238 ms / 100) 4.222 -> 4.225 ( +0.07%) [ +0.00% +0.14% +0.09% / +0.07% +0.26% +0.28%] index_select skip256 : Elapsed 0.042 ms (4.222 ms / 100) 4.244 -> 4.243 ( -0.02%) [ +0.02% +0.00% +0.00% / +0.05% +0.00% -0.02%] index_select spread : Elapsed 0.042 ms (4.245 ms / 100) 4.225 -> 4.225 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.21% +0.19%] index_select strided 3 : Elapsed 0.042 ms (4.226 ms / 100) 4.241 -> 4.239 ( -0.05%) [ +0.07% +0.00% +0.00% / -0.05% +0.09% +0.09%] index_select random : Elapsed 0.042 ms (4.244 ms / 100) 4.224 -> 4.225 ( +0.02%) [ +0.00% +0.07% +0.05% / +0.02% +0.28% +0.24%] index_select random_sorted : Elapsed 0.042 ms (4.224 ms / 100) B = [40, 5, 20, 16] (stride (1, 40, 3200, 200)) dim = 3 fill_cnt = 4 1.341 -> 1.332 ( -0.67%) [ +0.00% +0.00% +0.00% / +0.15% -0.67% -0.37%] index_fill_ const : Elapsed 0.013 ms (1.341 ms / 100) 1.359 -> 1.346 ( -0.96%) [ +0.15% +0.00% +0.00% / +0.07% -0.96% -0.96%] index_fill_ linear : Elapsed 0.014 ms (1.361 ms / 100) 1.357 -> 1.359 ( +0.15%) [ +0.22% +0.37% +0.00% / +0.15% +0.44% +0.37%] index_fill_ reverse : Elapsed 0.014 ms (1.360 ms / 100) 1.337 -> 1.332 ( -0.37%) [ +0.00% +0.22% +0.15% / +0.00% -0.37% -0.22%] index_fill_ skip64 : Elapsed 0.013 ms (1.337 ms / 100) 1.338 -> 1.330 ( -0.60%) [ +0.15% +0.22% +0.00% / +0.07% -0.60% -0.37%] index_fill_ skip256 : Elapsed 0.013 ms (1.340 ms / 100) 1.360 -> 1.354 ( -0.44%) [ +0.22% +0.00% +0.15% / +0.07% -0.29% -0.44%] index_fill_ spread : Elapsed 0.014 ms (1.363 ms / 100) 1.366 -> 1.365 ( -0.07%) [ +0.07% +0.00% +0.07% / +0.15% -0.07% +0.00%] index_fill_ strided 3 : Elapsed 0.014 ms (1.367 ms / 100) 1.374 -> 1.344 ( -2.18%) [ +0.07% +0.22% +0.00% / +0.36% -2.11% -2.18%] index_fill_ strided 5 : Elapsed 0.014 ms (1.375 ms / 100) 1.363 -> 1.351 ( -0.88%) [ +0.07% +0.15% +0.00% / -0.07% -0.66% -0.88%] index_fill_ strided 7 : Elapsed 0.014 ms (1.364 ms / 100) 1.341 -> 1.339 ( -0.15%) [ +0.00% +0.07% +0.07% / -0.15% +0.75% +0.52%] index_fill_ strided 8 : Elapsed 0.013 ms (1.341 ms / 100) 1.352 -> 1.355 ( +0.22%) [ +0.37% +0.07% +0.00% / +0.22% +1.18% +1.04%] index_fill_ random : Elapsed 0.014 ms (1.357 ms / 100) 1.361 -> 1.355 ( -0.44%) [ +0.07% +0.00% +0.07% / +0.22% -0.22% -0.44%] index_fill_ random_sorted : Elapsed 0.014 ms (1.362 ms / 100) 1.365 -> 1.359 ( -0.44%) [ +0.00% +0.22% +0.15% / +0.29% -0.37% -0.44%] index_fill_ perm : Elapsed 0.014 ms (1.365 ms / 100) 1.376 -> 1.355 ( -1.53%) [ +0.00% +0.15% +0.15% / +0.22% -1.53% -1.53%] index_fill_ perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) B = [40, 5, 20, 16] (stride (1, 40, 3200, 200)) A = [40, 5, 20, 4] (stride (4, 160, 800, 1)) dim = 3 2.623 -> 2.627 ( +0.15%) [ +0.11% +0.15% +0.00% / +0.15% +0.30% +0.15%] index_add_ linear : Elapsed 0.026 ms (2.626 ms / 100) 2.575 -> 2.579 ( +0.16%) [ +0.00% +0.12% +0.00% / +0.16% +0.27% +0.19%] index_copy_ linear : Elapsed 0.026 ms (2.575 ms / 100) 2.625 -> 2.627 ( +0.08%) [ +0.15% +0.11% +0.00% / +0.08% +0.19% +0.11%] index_add_ reverse : Elapsed 0.026 ms (2.629 ms / 100) 2.577 -> 2.576 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.08% +0.16%] index_copy_ reverse : Elapsed 0.026 ms (2.577 ms / 100) 2.628 -> 2.620 ( -0.30%) [ +0.11% +0.00% +0.00% / -0.23% -0.11% -0.30%] index_add_ spread : Elapsed 0.026 ms (2.631 ms / 100) 2.578 -> 2.571 ( -0.27%) [ +0.08% +0.19% +0.00% / +0.00% -0.12% -0.27%] index_copy_ spread : Elapsed 0.026 ms (2.580 ms / 100) 2.629 -> 2.625 ( -0.15%) [ +0.11% +0.04% +0.00% / -0.11% -0.04% -0.15%] index_add_ strided 3 : Elapsed 0.026 ms (2.632 ms / 100) 2.574 -> 2.576 ( +0.08%) [ +0.27% +0.00% +0.04% / +0.08% +0.12% +0.19%] index_copy_ strided 3 : Elapsed 0.026 ms (2.581 ms / 100) 2.628 -> 2.628 ( +0.00%) [ +0.08% +0.30% +0.00% / +0.08% +0.04% +0.00%] index_add_ strided 5 : Elapsed 0.026 ms (2.630 ms / 100) 2.581 -> 2.577 ( -0.15%) [ +0.08% +0.12% +0.00% / +0.00% -0.04% -0.15%] index_copy_ strided 5 : Elapsed 0.026 ms (2.583 ms / 100) 2.627 -> 2.628 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.15% +0.04% +0.08%] index_add_ strided 7 : Elapsed 0.026 ms (2.629 ms / 100) 2.578 -> 2.580 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.08% +0.19%] index_copy_ strided 7 : Elapsed 0.026 ms (2.578 ms / 100) 2.622 -> 2.623 ( +0.04%) [ +0.23% +0.23% +0.00% / +0.04% +0.11% +0.23%] index_add_ perm : Elapsed 0.026 ms (2.628 ms / 100) 2.578 -> 2.575 ( -0.12%) [ +0.04% +0.00% +0.04% / -0.12% -0.04% +0.08%] index_copy_ perm : Elapsed 0.026 ms (2.579 ms / 100) 2.627 -> 2.627 ( +0.00%) [ +0.00% +0.19% +0.08% / +0.00% +0.23% +0.11%] index_add_ perm_sorted : Elapsed 0.026 ms (2.627 ms / 100) 2.578 -> 2.582 ( +0.16%) [ +0.12% +0.12% +0.00% / +0.16% +0.27% +0.31%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.581 ms / 100) 6.021 -> 6.016 ( -0.08%) [ +0.00% +0.03% +0.00% / -0.08% +0.22% +0.35%] index_select const : Elapsed 0.060 ms (6.021 ms / 100) 6.018 -> 6.018 ( +0.00%) [ +0.08% +0.02% +0.00% / +0.00% +0.30% +0.25%] index_select wrap : Elapsed 0.060 ms (6.023 ms / 100) 6.037 -> 6.030 ( -0.12%) [ +0.03% +0.08% +0.00% / -0.12% +0.18% +0.12%] index_select linear : Elapsed 0.060 ms (6.039 ms / 100) 6.018 -> 6.016 ( -0.03%) [ +0.12% +0.00% +0.13% / -0.03% +0.30% +0.25%] index_select reverse : Elapsed 0.060 ms (6.025 ms / 100) 6.037 -> 6.033 ( -0.07%) [ +0.07% +0.00% +0.02% / -0.07% +0.25% +0.18%] index_select skip64 : Elapsed 0.060 ms (6.041 ms / 100) 6.034 -> 6.038 ( +0.07%) [ +0.00% +0.05% +0.00% / +0.07% +0.30% +0.30%] index_select skip256 : Elapsed 0.060 ms (6.034 ms / 100) 6.020 -> 6.020 ( +0.00%) [ +0.05% +0.08% +0.00% / +0.00% +0.28% +0.30%] index_select spread : Elapsed 0.060 ms (6.023 ms / 100) 6.016 -> 6.014 ( -0.03%) [ +0.05% +0.17% +0.00% / -0.03% +0.35% +0.27%] index_select strided 3 : Elapsed 0.060 ms (6.019 ms / 100) 6.034 -> 6.044 ( +0.17%) [ +0.07% +0.07% +0.00% / +0.17% +0.20% +0.22%] index_select random : Elapsed 0.060 ms (6.038 ms / 100) 6.019 -> 6.016 ( -0.05%) [ +0.08% +0.08% +0.00% / -0.05% +0.23% +0.30%] index_select random_sorted : Elapsed 0.060 ms (6.024 ms / 100) B = [40, 5, 20, 16] (stride (20, 800, 1, 4000)) A = [40, 5, 20, 4] (stride (20, 800, 1, 4000)) dim = 3 2.492 -> 2.496 ( +0.16%) [ +0.00% +0.12% +0.08% / +0.16% +0.28% +0.28%] index_add_ linear : Elapsed 0.025 ms (2.492 ms / 100) 2.443 -> 2.444 ( +0.04%) [ +0.20% +0.08% +0.00% / +0.04% +0.08% +0.12%] index_copy_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.496 -> 2.497 ( +0.04%) [ +0.12% +0.28% +0.00% / +0.04% +0.44% +0.28%] index_add_ reverse : Elapsed 0.025 ms (2.499 ms / 100) 2.439 -> 2.442 ( +0.12%) [ +0.00% +0.16% +0.08% / +0.12% +0.45% +0.33%] index_copy_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.493 -> 2.485 ( -0.32%) [ +0.04% +0.12% +0.00% / -0.32% +0.04% +0.16%] index_add_ spread : Elapsed 0.025 ms (2.494 ms / 100) 2.440 -> 2.443 ( +0.12%) [ +0.04% +0.00% +0.20% / +0.12% +0.53% +0.53%] index_copy_ spread : Elapsed 0.024 ms (2.441 ms / 100) 2.493 -> 2.498 ( +0.20%) [ +0.20% +0.20% +0.00% / +0.20% +0.48% +0.56%] index_add_ strided 3 : Elapsed 0.025 ms (2.498 ms / 100) 2.443 -> 2.446 ( +0.12%) [ +0.41% +0.29% +0.00% / +0.12% +0.29% +0.45%] index_copy_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.497 -> 2.500 ( +0.12%) [ +0.28% +0.32% +0.00% / +0.32% +0.12% +0.12%] index_add_ strided 5 : Elapsed 0.025 ms (2.504 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.04% +0.04%] index_copy_ strided 5 : Elapsed 0.024 ms (2.450 ms / 100) 2.490 -> 2.491 ( +0.04%) [ +0.00% +0.16% +0.20% / +0.24% +0.16% +0.04%] index_add_ strided 7 : Elapsed 0.025 ms (2.490 ms / 100) 2.445 -> 2.444 ( -0.04%) [ +0.00% +0.00% +0.00% / +0.04% -0.04% +0.04%] index_copy_ strided 7 : Elapsed 0.024 ms (2.445 ms / 100) 2.488 -> 2.486 ( -0.08%) [ +0.16% +0.20% +0.00% / -0.08% +0.20% +0.04%] index_add_ perm : Elapsed 0.025 ms (2.492 ms / 100) 2.440 -> 2.442 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.12% +0.16% +0.08%] index_copy_ perm : Elapsed 0.024 ms (2.444 ms / 100) 2.488 -> 2.490 ( +0.08%) [ +0.00% +0.12% +0.20% / +0.08% +0.28% +0.36%] index_add_ perm_sorted : Elapsed 0.025 ms (2.488 ms / 100) 2.436 -> 2.444 ( +0.33%) [ +0.08% +0.12% +0.00% / +0.33% +0.37% +0.41%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.438 ms / 100) 5.207 -> 5.205 ( -0.04%) [ +0.00% +0.35% +0.13% / -0.04% +0.58% +0.35%] index_select const : Elapsed 0.052 ms (5.207 ms / 100) 5.279 -> 5.278 ( -0.02%) [ +0.02% +0.23% +0.00% / -0.02% +0.49% +0.47%] index_select wrap : Elapsed 0.053 ms (5.280 ms / 100) 5.272 -> 5.272 ( +0.00%) [ +0.06% +0.04% +0.00% / +0.00% +0.44% +0.36%] index_select linear : Elapsed 0.053 ms (5.275 ms / 100) 5.266 -> 5.266 ( +0.00%) [ +0.13% +0.17% +0.00% / +0.00% +0.34% +0.38%] index_select reverse : Elapsed 0.053 ms (5.273 ms / 100) 5.217 -> 5.225 ( +0.15%) [ +0.21% +0.38% +0.00% / +0.15% +0.44% +0.15%] index_select skip64 : Elapsed 0.052 ms (5.228 ms / 100) 5.209 -> 5.201 ( -0.15%) [ +0.00% +0.02% +0.21% / -0.15% +0.19% +0.12%] index_select skip256 : Elapsed 0.052 ms (5.209 ms / 100) 5.272 -> 5.271 ( -0.02%) [ +0.13% +0.09% +0.00% / -0.02% +0.13% +0.27%] index_select spread : Elapsed 0.053 ms (5.279 ms / 100) 5.274 -> 5.282 ( +0.15%) [ +0.04% +0.00% +0.02% / +0.15% +0.40% +0.32%] index_select strided 3 : Elapsed 0.053 ms (5.276 ms / 100) 5.272 -> 5.266 ( -0.11%) [ +0.00% +0.00% +0.15% / -0.11% +0.09% +0.21%] index_select random : Elapsed 0.053 ms (5.272 ms / 100) 5.254 -> 5.257 ( +0.06%) [ +0.10% +0.00% +0.00% / +0.06% +0.36% +0.30%] index_select random_sorted : Elapsed 0.053 ms (5.259 ms / 100) out_shape = [16, 20, 4, 5] in_shape = [40, 20, 4, 5] idx_dim = 0 B = [16, 20, 4, 5] (stride (1, 320, 80, 16)) A = [40, 20, 4, 5] (stride (1, 160, 40, 3200)) dim = 0 3.234 -> 3.235 ( +0.03%) [ +0.06% +0.00% +0.00% / +0.03% +0.40% +0.43%] index_select const : Elapsed 0.032 ms (3.236 ms / 100) 3.219 -> 3.224 ( +0.16%) [ +0.06% +0.00% +0.06% / +0.16% +0.62% +0.65%] index_select wrap : Elapsed 0.032 ms (3.221 ms / 100) 3.217 -> 3.222 ( +0.16%) [ +0.12% +0.25% +0.00% / +0.16% +0.65% +0.47%] index_select linear : Elapsed 0.032 ms (3.221 ms / 100) 3.226 -> 3.234 ( +0.25%) [ +0.46% +0.00% +0.15% / +0.25% +0.84% +0.84%] index_select reverse : Elapsed 0.032 ms (3.241 ms / 100) 3.233 -> 3.233 ( +0.00%) [ +0.00% +0.19% +0.09% / +0.00% +0.37% +0.49%] index_select skip64 : Elapsed 0.032 ms (3.233 ms / 100) 3.230 -> 3.230 ( +0.00%) [ +0.00% +0.15% +0.12% / +0.00% +0.56% +0.53%] index_select skip256 : Elapsed 0.032 ms (3.230 ms / 100) 3.202 -> 3.205 ( +0.09%) [ +0.37% +0.00% +0.31% / +0.09% +0.47% +0.69%] index_select spread : Elapsed 0.032 ms (3.214 ms / 100) 3.230 -> 3.232 ( +0.06%) [ +0.00% +0.00% +0.03% / +0.06% +0.40% +0.22%] index_select strided 3 : Elapsed 0.032 ms (3.230 ms / 100) 3.219 -> 3.223 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.43% +0.28%] index_select strided 5 : Elapsed 0.032 ms (3.221 ms / 100) 3.214 -> 3.214 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.28% +0.37%] index_select strided 7 : Elapsed 0.032 ms (3.216 ms / 100) 3.224 -> 3.225 ( +0.03%) [ +0.09% +0.00% +0.06% / +0.03% +0.34% +0.34%] index_select strided 8 : Elapsed 0.032 ms (3.227 ms / 100) 3.198 -> 3.200 ( +0.06%) [ +0.31% +0.03% +0.00% / +0.06% +0.44% +0.69%] index_select strided 16 : Elapsed 0.032 ms (3.208 ms / 100) 3.200 -> 3.208 ( +0.25%) [ +0.00% +0.19% +0.06% / +0.25% +0.56% +0.66%] index_select random : Elapsed 0.032 ms (3.200 ms / 100) 3.216 -> 3.228 ( +0.37%) [ +0.37% +0.00% +0.16% / +0.37% +0.90% +0.75%] index_select random_sorted : Elapsed 0.032 ms (3.228 ms / 100) 3.219 -> 3.221 ( +0.06%) [ +0.00% +0.00% +0.16% / +0.06% +0.43% +0.43%] index_select perm : Elapsed 0.032 ms (3.219 ms / 100) 3.195 -> 3.196 ( +0.03%) [ +0.06% +0.13% +0.00% / +0.03% +0.44% +0.44%] index_select perm_sorted : Elapsed 0.032 ms (3.197 ms / 100) B = [16, 20, 4, 5] (stride (100, 1, 1600, 20)) A = [40, 20, 4, 5] (stride (20, 1, 800, 3200)) dim = 0 4.140 -> 4.142 ( +0.05%) [ +0.02% +0.00% +0.00% / +0.05% +0.70% +0.75%] index_select const : Elapsed 0.041 ms (4.141 ms / 100) 4.119 -> 4.120 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.68% +0.68%] index_select wrap : Elapsed 0.041 ms (4.121 ms / 100) 4.119 -> 4.117 ( -0.05%) [ +0.02% +0.00% +0.00% / -0.05% +0.58% +0.63%] index_select linear : Elapsed 0.041 ms (4.120 ms / 100) 4.145 -> 4.148 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.87% +0.82%] index_select reverse : Elapsed 0.041 ms (4.145 ms / 100) 4.151 -> 4.152 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.77% +0.79%] index_select skip64 : Elapsed 0.042 ms (4.153 ms / 100) 4.138 -> 4.138 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.77% +0.75%] index_select skip256 : Elapsed 0.041 ms (4.138 ms / 100) 4.138 -> 4.141 ( +0.07%) [ +0.17% +0.00% +0.02% / +0.07% +0.89% +0.77%] index_select spread : Elapsed 0.041 ms (4.145 ms / 100) 4.145 -> 4.149 ( +0.10%) [ +0.17% +0.12% +0.00% / +0.10% +0.70% +0.68%] index_select strided 3 : Elapsed 0.042 ms (4.152 ms / 100) 4.113 -> 4.116 ( +0.07%) [ +0.10% +0.12% +0.00% / +0.07% +0.63% +0.58%] index_select strided 5 : Elapsed 0.041 ms (4.117 ms / 100) 4.121 -> 4.122 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.70% +0.68%] index_select strided 7 : Elapsed 0.041 ms (4.121 ms / 100) 4.123 -> 4.124 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.80% +0.78%] index_select strided 8 : Elapsed 0.041 ms (4.125 ms / 100) 4.081 -> 4.081 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.76% +0.76%] index_select strided 16 : Elapsed 0.041 ms (4.082 ms / 100) 4.138 -> 4.137 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.72% +0.72%] index_select random : Elapsed 0.041 ms (4.139 ms / 100) 4.133 -> 4.137 ( +0.10%) [ +0.15% +0.00% +0.00% / +0.10% +0.85% +0.80%] index_select random_sorted : Elapsed 0.041 ms (4.139 ms / 100) 4.128 -> 4.129 ( +0.02%) [ +0.02% +0.00% +0.10% / +0.02% +0.73% +0.78%] index_select perm : Elapsed 0.041 ms (4.129 ms / 100) 4.146 -> 4.147 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.70% +0.68%] index_select perm_sorted : Elapsed 0.041 ms (4.148 ms / 100) out_shape = [40, 16, 4, 5] in_shape = [40, 20, 4, 5] idx_dim = 1 B = [40, 16, 4, 5] (stride (320, 1, 80, 16)) A = [40, 20, 4, 5] (stride (100, 1, 4000, 20)) dim = 1 3.807 -> 3.812 ( +0.13%) [ +0.00% +0.05% +0.03% / +0.13% +0.81% +0.74%] index_select const : Elapsed 0.038 ms (3.807 ms / 100) 3.811 -> 3.808 ( -0.08%) [ +0.05% +0.05% +0.00% / -0.08% +0.84% +0.73%] index_select wrap : Elapsed 0.038 ms (3.813 ms / 100) 3.810 -> 3.811 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.76% +0.73%] index_select linear : Elapsed 0.038 ms (3.812 ms / 100) 3.820 -> 3.819 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.73% +0.73%] index_select reverse : Elapsed 0.038 ms (3.821 ms / 100) 3.801 -> 3.804 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.61% +0.68%] index_select skip64 : Elapsed 0.038 ms (3.803 ms / 100) 3.802 -> 3.811 ( +0.24%) [ +0.00% +0.26% +0.08% / +0.24% +0.66% +0.76%] index_select skip256 : Elapsed 0.038 ms (3.802 ms / 100) 3.813 -> 3.821 ( +0.21%) [ +0.05% +0.00% +0.03% / +0.21% +0.84% +0.89%] index_select spread : Elapsed 0.038 ms (3.815 ms / 100) 3.817 -> 3.822 ( +0.13%) [ +0.05% +0.13% +0.00% / +0.13% +0.73% +0.68%] index_select strided 3 : Elapsed 0.038 ms (3.819 ms / 100) 3.800 -> 3.808 ( +0.21%) [ +0.11% +0.00% +0.26% / +0.21% +0.84% +0.97%] index_select strided 5 : Elapsed 0.038 ms (3.804 ms / 100) 3.808 -> 3.803 ( -0.13%) [ +0.03% +0.00% +0.00% / -0.13% +0.68% +0.58%] index_select strided 7 : Elapsed 0.038 ms (3.809 ms / 100) 3.800 -> 3.801 ( +0.03%) [ +0.00% +0.08% +0.08% / +0.03% +0.63% +0.63%] index_select strided 8 : Elapsed 0.038 ms (3.800 ms / 100) 3.810 -> 3.814 ( +0.10%) [ +0.08% +0.05% +0.00% / +0.10% +0.76% +0.63%] index_select strided 16 : Elapsed 0.038 ms (3.813 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.45% +0.52%] index_select random : Elapsed 0.038 ms (3.814 ms / 100) 3.817 -> 3.819 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.52% +0.42%] index_select random_sorted : Elapsed 0.038 ms (3.818 ms / 100) 3.817 -> 3.824 ( +0.18%) [ +0.13% +0.00% +0.10% / +0.18% +0.55% +0.55%] index_select perm : Elapsed 0.038 ms (3.822 ms / 100) 3.813 -> 3.815 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.60% +0.55%] index_select perm_sorted : Elapsed 0.038 ms (3.815 ms / 100) B = [40, 16, 4, 5] (stride (5, 800, 200, 1)) A = [40, 20, 4, 5] (stride (400, 1, 20, 80)) dim = 1 4.137 -> 4.138 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.39% +0.39%] index_select const : Elapsed 0.041 ms (4.138 ms / 100) 4.159 -> 4.158 ( -0.02%) [ +0.00% +0.12% +0.00% / -0.02% +0.58% +0.46%] index_select wrap : Elapsed 0.042 ms (4.159 ms / 100) 4.154 -> 4.153 ( -0.02%) [ +0.07% +0.07% +0.00% / -0.02% +0.53% +0.51%] index_select linear : Elapsed 0.042 ms (4.157 ms / 100) 4.149 -> 4.152 ( +0.07%) [ +0.00% +0.07% +0.05% / +0.07% +0.70% +0.60%] index_select reverse : Elapsed 0.041 ms (4.149 ms / 100) 4.137 -> 4.142 ( +0.12%) [ +0.00% +0.07% +0.12% / +0.12% +0.60% +0.63%] index_select skip64 : Elapsed 0.041 ms (4.137 ms / 100) 4.138 -> 4.138 ( +0.00%) [ +0.12% +0.02% +0.00% / +0.00% +0.43% +0.48%] index_select skip256 : Elapsed 0.041 ms (4.143 ms / 100) 4.150 -> 4.154 ( +0.10%) [ +0.00% +0.07% +0.02% / +0.10% +0.53% +0.39%] index_select spread : Elapsed 0.042 ms (4.150 ms / 100) 4.167 -> 4.175 ( +0.19%) [ +0.12% +0.19% +0.00% / +0.19% +0.58% +0.58%] index_select strided 3 : Elapsed 0.042 ms (4.172 ms / 100) 4.145 -> 4.147 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.43% +0.43%] index_select strided 5 : Elapsed 0.041 ms (4.147 ms / 100) 4.139 -> 4.144 ( +0.12%) [ +0.00% +0.02% +0.14% / +0.12% +0.70% +0.72%] index_select strided 7 : Elapsed 0.041 ms (4.139 ms / 100) 4.133 -> 4.135 ( +0.05%) [ +0.00% +0.07% +0.02% / +0.05% +0.60% +0.80%] index_select strided 8 : Elapsed 0.041 ms (4.133 ms / 100) 4.142 -> 4.144 ( +0.05%) [ +0.14% +0.14% +0.00% / +0.05% +0.65% +0.65%] index_select strided 16 : Elapsed 0.041 ms (4.148 ms / 100) 4.139 -> 4.143 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.70% +0.75%] index_select random : Elapsed 0.041 ms (4.139 ms / 100) 4.154 -> 4.155 ( +0.02%) [ +0.14% +0.00% +0.12% / +0.02% +0.79% +0.70%] index_select random_sorted : Elapsed 0.042 ms (4.160 ms / 100) 4.151 -> 4.151 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.58% +0.51%] index_select perm : Elapsed 0.042 ms (4.151 ms / 100) 4.156 -> 4.159 ( +0.07%) [ +0.00% +0.02% +0.10% / +0.07% +0.72% +0.72%] index_select perm_sorted : Elapsed 0.042 ms (4.156 ms / 100) B = [40, 16, 4, 5] (stride (4, 800, 1, 160)) A = [40, 20, 4, 5] (stride (1, 40, 4000, 800)) dim = 1 3.814 -> 3.815 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.71% +0.68%] index_select const : Elapsed 0.038 ms (3.815 ms / 100) 3.800 -> 3.801 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.82% +0.82%] index_select wrap : Elapsed 0.038 ms (3.801 ms / 100) 3.820 -> 3.821 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.73% +0.73%] index_select linear : Elapsed 0.038 ms (3.822 ms / 100) 3.802 -> 3.804 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.71% +0.68%] index_select reverse : Elapsed 0.038 ms (3.803 ms / 100) 3.782 -> 3.783 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.69% +0.66%] index_select skip64 : Elapsed 0.038 ms (3.783 ms / 100) 3.802 -> 3.802 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.66%] index_select skip256 : Elapsed 0.038 ms (3.802 ms / 100) 3.828 -> 3.828 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.76% +0.65%] index_select spread : Elapsed 0.038 ms (3.829 ms / 100) 3.810 -> 3.813 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.60% +0.58%] index_select strided 3 : Elapsed 0.038 ms (3.813 ms / 100) 3.793 -> 3.794 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.55% +0.55%] index_select strided 5 : Elapsed 0.038 ms (3.794 ms / 100) 3.820 -> 3.821 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.71% +0.73%] index_select strided 7 : Elapsed 0.038 ms (3.821 ms / 100) 3.826 -> 3.827 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.55% +0.55%] index_select strided 8 : Elapsed 0.038 ms (3.827 ms / 100) 3.817 -> 3.816 ( -0.03%) [ +0.00% +0.00% +0.08% / -0.03% +0.58% +0.55%] index_select strided 16 : Elapsed 0.038 ms (3.817 ms / 100) 3.798 -> 3.798 ( +0.00%) [ +0.05% +0.00% +0.03% / +0.00% +0.50% +0.50%] index_select random : Elapsed 0.038 ms (3.800 ms / 100) 3.827 -> 3.828 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.47% +0.47%] index_select random_sorted : Elapsed 0.038 ms (3.829 ms / 100) 3.823 -> 3.824 ( +0.03%) [ +0.00% +0.00% +0.29% / +0.03% +0.76% +0.44%] index_select perm : Elapsed 0.038 ms (3.823 ms / 100) 3.805 -> 3.805 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.58% +0.58%] index_select perm_sorted : Elapsed 0.038 ms (3.806 ms / 100) B = [40, 16, 4, 5] (stride (1, 800, 40, 160)) A = [40, 20, 4, 5] (stride (4, 160, 1, 3200)) dim = 1 3.870 -> 3.868 ( -0.05%) [ +0.00% +0.10% +0.03% / -0.05% +0.41% +0.49%] index_select const : Elapsed 0.039 ms (3.870 ms / 100) 3.891 -> 3.889 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.62% +0.82%] index_select wrap : Elapsed 0.039 ms (3.891 ms / 100) 3.903 -> 3.905 ( +0.05%) [ +0.08% +0.00% +0.05% / +0.05% +0.54% +0.51%] index_select linear : Elapsed 0.039 ms (3.906 ms / 100) 3.893 -> 3.894 ( +0.03%) [ +0.10% +0.10% +0.00% / +0.03% +0.69% +0.74%] index_select reverse : Elapsed 0.039 ms (3.897 ms / 100) 3.880 -> 3.882 ( +0.05%) [ +0.05% +0.00% +0.08% / +0.05% +0.46% +0.46%] index_select skip64 : Elapsed 0.039 ms (3.882 ms / 100) 3.860 -> 3.866 ( +0.16%) [ +0.05% +0.13% +0.00% / +0.16% +0.60% +0.57%] index_select skip256 : Elapsed 0.039 ms (3.862 ms / 100) 3.919 -> 3.924 ( +0.13%) [ +0.00% +0.18% +0.13% / +0.13% +0.28% +0.26%] index_select spread : Elapsed 0.039 ms (3.919 ms / 100) 3.881 -> 3.881 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +1.16% +0.67%] index_select strided 3 : Elapsed 0.039 ms (3.882 ms / 100) 3.886 -> 3.890 ( +0.10%) [ +0.00% +0.10% +0.13% / +0.10% +0.87% +0.54%] index_select strided 5 : Elapsed 0.039 ms (3.886 ms / 100) 3.901 -> 3.908 ( +0.18%) [ +0.36% +0.00% +0.05% / +0.18% +0.69% +0.62%] index_select strided 7 : Elapsed 0.039 ms (3.915 ms / 100) 3.886 -> 3.887 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.67% +0.67%] index_select strided 8 : Elapsed 0.039 ms (3.887 ms / 100) 3.896 -> 3.901 ( +0.13%) [ +0.23% +0.00% +0.44% / +0.13% +0.72% +0.69%] index_select strided 16 : Elapsed 0.039 ms (3.905 ms / 100) 3.897 -> 3.897 ( +0.00%) [ +0.21% +0.00% +0.15% / +0.00% +0.64% +0.80%] index_select random : Elapsed 0.039 ms (3.905 ms / 100) 3.877 -> 3.880 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +1.03% +1.03%] index_select random_sorted : Elapsed 0.039 ms (3.880 ms / 100) 3.876 -> 3.878 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.80% +1.26%] index_select perm : Elapsed 0.039 ms (3.878 ms / 100) 3.884 -> 3.885 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.93% +1.11%] index_select perm_sorted : Elapsed 0.039 ms (3.885 ms / 100) B = [40, 16, 4, 5] (stride (80, 5, 3200, 1)) A = [40, 20, 4, 5] (stride (400, 20, 5, 1)) dim = 1 3.536 -> 3.536 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.88% +0.82%] index_select const : Elapsed 0.035 ms (3.541 ms / 100) 3.517 -> 3.518 ( +0.03%) [ +0.00% +0.06% +0.00% / +0.03% +0.71% +0.71%] index_select wrap : Elapsed 0.035 ms (3.517 ms / 100) 3.500 -> 3.502 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.91% +1.00%] index_select linear : Elapsed 0.035 ms (3.501 ms / 100) 3.523 -> 3.531 ( +0.23%) [ +0.00% +0.14% +0.26% / +0.23% +0.88% +1.08%] index_select reverse : Elapsed 0.035 ms (3.523 ms / 100) 3.538 -> 3.536 ( -0.06%) [ +0.00% +0.06% +0.03% / -0.06% +0.65% +0.62%] index_select skip64 : Elapsed 0.035 ms (3.538 ms / 100) 3.532 -> 3.531 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.62% +0.65%] index_select skip256 : Elapsed 0.035 ms (3.532 ms / 100) 3.516 -> 3.519 ( +0.09%) [ +0.11% +0.03% +0.00% / +0.09% +0.60% +0.68%] index_select spread : Elapsed 0.035 ms (3.520 ms / 100) 3.518 -> 3.518 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.68% +0.65%] index_select strided 3 : Elapsed 0.035 ms (3.519 ms / 100) 3.515 -> 3.514 ( -0.03%) [ +0.00% +0.06% +0.00% / -0.03% +0.54% +0.63%] index_select strided 5 : Elapsed 0.035 ms (3.515 ms / 100) 3.518 -> 3.518 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.51% +0.51%] index_select strided 7 : Elapsed 0.035 ms (3.520 ms / 100) 3.512 -> 3.514 ( +0.06%) [ +0.11% +0.09% +0.00% / +0.06% +0.77% +0.74%] index_select strided 8 : Elapsed 0.035 ms (3.516 ms / 100) 3.515 -> 3.517 ( +0.06%) [ +0.06% +0.09% +0.00% / +0.06% +0.71% +0.71%] index_select strided 16 : Elapsed 0.035 ms (3.517 ms / 100) 3.520 -> 3.518 ( -0.06%) [ +0.03% +0.03% +0.00% / -0.06% +0.45% +0.43%] index_select random : Elapsed 0.035 ms (3.521 ms / 100) 3.505 -> 3.504 ( -0.03%) [ +0.26% +0.00% +0.00% / -0.03% +0.71% +0.57%] index_select random_sorted : Elapsed 0.035 ms (3.514 ms / 100) 3.522 -> 3.527 ( +0.14%) [ +0.06% +0.00% +0.20% / +0.14% +0.43% +0.43%] index_select perm : Elapsed 0.035 ms (3.524 ms / 100) 3.519 -> 3.520 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.51% +0.43%] index_select perm_sorted : Elapsed 0.035 ms (3.519 ms / 100) B = [40, 16, 4, 5] (stride (5, 200, 3200, 1)) A = [40, 20, 4, 5] (stride (100, 1, 4000, 20)) dim = 1 4.139 -> 4.133 ( -0.14%) [ +0.17% +0.07% +0.00% / -0.14% +0.48% +0.46%] index_select const : Elapsed 0.041 ms (4.146 ms / 100) 4.139 -> 4.139 ( +0.00%) [ +0.10% +0.07% +0.00% / +0.00% +0.56% +0.51%] index_select wrap : Elapsed 0.041 ms (4.143 ms / 100) 4.152 -> 4.157 ( +0.12%) [ +0.14% +0.02% +0.00% / +0.12% +0.48% +0.51%] index_select linear : Elapsed 0.042 ms (4.158 ms / 100) 4.151 -> 4.154 ( +0.07%) [ +0.00% +0.12% +0.07% / +0.07% +0.72% +0.75%] index_select reverse : Elapsed 0.042 ms (4.151 ms / 100) 4.143 -> 4.145 ( +0.05%) [ +0.00% +0.02% +0.12% / +0.05% +0.51% +0.51%] index_select skip64 : Elapsed 0.041 ms (4.143 ms / 100) 4.137 -> 4.138 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.53% +0.51%] index_select skip256 : Elapsed 0.041 ms (4.139 ms / 100) 4.138 -> 4.142 ( +0.10%) [ +0.02% +0.07% +0.00% / +0.10% +0.53% +0.56%] index_select spread : Elapsed 0.041 ms (4.139 ms / 100) 4.150 -> 4.157 ( +0.17%) [ +0.00% +0.14% +0.07% / +0.17% +0.82% +0.55%] index_select strided 3 : Elapsed 0.041 ms (4.150 ms / 100) 4.133 -> 4.136 ( +0.07%) [ +0.00% +0.05% +0.15% / +0.07% +0.51% +0.51%] index_select strided 5 : Elapsed 0.041 ms (4.133 ms / 100) 4.132 -> 4.136 ( +0.10%) [ +0.27% +0.15% +0.00% / +0.10% +0.68% +0.80%] index_select strided 7 : Elapsed 0.041 ms (4.143 ms / 100) 4.146 -> 4.148 ( +0.05%) [ +0.00% +0.02% +0.05% / +0.05% +0.68% +0.77%] index_select strided 8 : Elapsed 0.041 ms (4.146 ms / 100) 4.125 -> 4.128 ( +0.07%) [ +0.05% +0.00% +0.02% / +0.07% +0.75% +0.75%] index_select strided 16 : Elapsed 0.041 ms (4.127 ms / 100) 4.133 -> 4.141 ( +0.19%) [ +0.00% +0.24% +0.27% / +0.19% +0.80% +0.73%] index_select random : Elapsed 0.041 ms (4.133 ms / 100) 4.160 -> 4.159 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.60% +0.65%] index_select random_sorted : Elapsed 0.042 ms (4.160 ms / 100) 4.127 -> 4.135 ( +0.19%) [ +0.00% +0.12% +0.15% / +0.19% +0.85% +0.73%] index_select perm : Elapsed 0.041 ms (4.127 ms / 100) 4.144 -> 4.148 ( +0.10%) [ +0.00% +0.02% +0.05% / +0.10% +0.68% +0.70%] index_select perm_sorted : Elapsed 0.041 ms (4.144 ms / 100) B = [40, 16, 4, 5] (stride (64, 1, 16, 2560)) A = [40, 20, 4, 5] (stride (1, 800, 40, 160)) dim = 1 4.132 -> 4.131 ( -0.02%) [ +0.00% +0.05% +0.00% / -0.02% +0.73% +0.73%] index_select const : Elapsed 0.041 ms (4.132 ms / 100) 4.168 -> 4.169 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.60% +0.62%] index_select wrap : Elapsed 0.042 ms (4.170 ms / 100) 4.131 -> 4.130 ( -0.02%) [ +0.05% +0.00% +0.02% / -0.02% +0.73% +0.70%] index_select linear : Elapsed 0.041 ms (4.133 ms / 100) 4.097 -> 4.099 ( +0.05%) [ +0.00% +0.27% +0.00% / +0.05% +0.71% +0.93%] index_select reverse : Elapsed 0.041 ms (4.097 ms / 100) 4.148 -> 4.148 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.65% +0.60%] index_select skip64 : Elapsed 0.041 ms (4.148 ms / 100) 4.148 -> 4.148 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.65% +0.75%] index_select skip256 : Elapsed 0.041 ms (4.148 ms / 100) 4.147 -> 4.149 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +0.58% +0.55%] index_select spread : Elapsed 0.041 ms (4.149 ms / 100) 4.155 -> 4.154 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.60% +0.72%] index_select strided 3 : Elapsed 0.042 ms (4.156 ms / 100) 4.106 -> 4.107 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.78% +0.75%] index_select strided 5 : Elapsed 0.041 ms (4.107 ms / 100) 4.125 -> 4.127 ( +0.05%) [ +0.10% +0.00% +0.05% / +0.05% +0.61% +0.61%] index_select strided 7 : Elapsed 0.041 ms (4.129 ms / 100) 4.126 -> 4.126 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.61%] index_select strided 8 : Elapsed 0.041 ms (4.126 ms / 100) 4.135 -> 4.138 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.07% +0.70% +0.68%] index_select strided 16 : Elapsed 0.041 ms (4.137 ms / 100) 4.123 -> 4.123 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.63% +0.63%] index_select random : Elapsed 0.041 ms (4.125 ms / 100) 4.120 -> 4.122 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.53% +0.49%] index_select random_sorted : Elapsed 0.041 ms (4.122 ms / 100) 4.143 -> 4.143 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.72% +0.48%] index_select perm : Elapsed 0.041 ms (4.144 ms / 100) 4.135 -> 4.138 ( +0.07%) [ +0.10% +0.00% +0.02% / +0.07% +0.41% +0.44%] index_select perm_sorted : Elapsed 0.041 ms (4.139 ms / 100) B = [40, 16, 4, 5] (stride (16, 1, 640, 2560)) A = [40, 20, 4, 5] (stride (1, 800, 200, 40)) dim = 1 3.794 -> 3.793 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.40% +0.45%] index_select const : Elapsed 0.038 ms (3.795 ms / 100) 3.812 -> 3.807 ( -0.13%) [ +0.05% +0.00% +0.00% / -0.13% +0.52% +0.45%] index_select wrap : Elapsed 0.038 ms (3.814 ms / 100) 3.822 -> 3.821 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.37% +0.44%] index_select linear : Elapsed 0.038 ms (3.823 ms / 100) 3.792 -> 3.793 ( +0.03%) [ +0.13% +0.00% +0.05% / +0.03% +0.55% +0.55%] index_select reverse : Elapsed 0.038 ms (3.797 ms / 100) 3.796 -> 3.796 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.53% +0.47%] index_select skip64 : Elapsed 0.038 ms (3.796 ms / 100) 3.806 -> 3.808 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.60% +0.58%] index_select skip256 : Elapsed 0.038 ms (3.808 ms / 100) 3.808 -> 3.818 ( +0.26%) [ +0.00% +0.00% +0.24% / +0.26% +0.76% +0.81%] index_select spread : Elapsed 0.038 ms (3.808 ms / 100) 3.824 -> 3.824 ( +0.00%) [ +0.26% +0.21% +0.00% / +0.00% +0.86% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.834 ms / 100) 3.797 -> 3.797 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.45% +0.42%] index_select strided 5 : Elapsed 0.038 ms (3.798 ms / 100) 3.797 -> 3.797 ( +0.00%) [ +0.08% +0.05% +0.00% / +0.00% +0.63% +0.58%] index_select strided 7 : Elapsed 0.038 ms (3.800 ms / 100) 3.812 -> 3.814 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.63% +0.63%] index_select strided 8 : Elapsed 0.038 ms (3.814 ms / 100) 3.812 -> 3.812 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.55% +0.58%] index_select strided 16 : Elapsed 0.038 ms (3.813 ms / 100) 3.816 -> 3.812 ( -0.10%) [ +0.03% +0.05% +0.00% / -0.10% +0.55% +0.66%] index_select random : Elapsed 0.038 ms (3.817 ms / 100) 3.826 -> 3.826 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.60% +0.58%] index_select random_sorted : Elapsed 0.038 ms (3.827 ms / 100) 3.812 -> 3.814 ( +0.05%) [ +0.10% +0.03% +0.00% / +0.05% +0.58% +0.60%] index_select perm : Elapsed 0.038 ms (3.816 ms / 100) 3.804 -> 3.809 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.66% +0.71%] index_select perm_sorted : Elapsed 0.038 ms (3.804 ms / 100) out_shape = [40, 20, 16, 5] in_shape = [40, 20, 4, 5] idx_dim = 2 B = [40, 20, 16, 5] (stride (1600, 80, 5, 1)) A = [40, 20, 4, 5] (stride (400, 5, 100, 1)) dim = 2 2.383 -> 2.380 ( -0.13%) [ +0.17% +0.08% +0.00% / -0.13% +0.38% +0.34%] index_add_ linear : Elapsed 0.024 ms (2.387 ms / 100) 2.338 -> 2.344 ( +0.26%) [ +0.17% +0.04% +0.00% / +0.26% +0.43% +0.30%] index_copy_ linear : Elapsed 0.023 ms (2.342 ms / 100) 2.371 -> 2.379 ( +0.34%) [ +0.21% +0.30% +0.00% / +0.34% +0.72% +0.59%] index_add_ reverse : Elapsed 0.024 ms (2.376 ms / 100) 2.338 -> 2.337 ( -0.04%) [ +0.34% +0.00% +0.13% / -0.04% +0.56% +0.51%] index_copy_ reverse : Elapsed 0.023 ms (2.346 ms / 100) 2.406 -> 2.409 ( +0.12%) [ +0.00% +0.17% +0.04% / +0.12% +0.58% +0.54%] index_add_ spread : Elapsed 0.024 ms (2.406 ms / 100) 2.389 -> 2.391 ( +0.08%) [ +0.00% +0.13% +0.00% / +0.08% +0.54% +0.59%] index_copy_ spread : Elapsed 0.024 ms (2.389 ms / 100) 2.409 -> 2.409 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.37% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.411 ms / 100) 2.410 -> 2.414 ( +0.17%) [ +0.37% +0.08% +0.00% / +0.17% +0.66% +0.54%] index_copy_ strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.367 -> 2.373 ( +0.25%) [ +0.00% +0.25% +0.13% / +0.25% +0.30% +0.55%] index_add_ strided 5 : Elapsed 0.024 ms (2.367 ms / 100) 2.349 -> 2.352 ( +0.13%) [ +0.17% +0.00% +0.34% / +0.13% +0.34% +0.38%] index_copy_ strided 5 : Elapsed 0.024 ms (2.353 ms / 100) 2.380 -> 2.379 ( -0.04%) [ +0.08% +0.00% +0.08% / -0.04% +0.17% +0.17%] index_add_ strided 7 : Elapsed 0.024 ms (2.382 ms / 100) 2.369 -> 2.372 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.59% +0.55%] index_copy_ strided 7 : Elapsed 0.024 ms (2.372 ms / 100) 2.403 -> 2.404 ( +0.04%) [ +0.12% +0.37% +0.00% / +0.04% +0.42% +0.42%] index_add_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.389 -> 2.390 ( +0.04%) [ +0.00% +0.13% +0.08% / +0.04% +0.42% +0.59%] index_copy_ perm : Elapsed 0.024 ms (2.389 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.42% +0.42%] index_add_ perm_sorted : Elapsed 0.024 ms (2.409 ms / 100) 2.389 -> 2.391 ( +0.08%) [ +0.13% +0.00% +0.13% / +0.08% +0.46% +0.42%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.392 ms / 100) 4.801 -> 4.801 ( +0.00%) [ +0.08% +0.29% +0.00% / +0.00% +0.60% +0.71%] index_select const : Elapsed 0.048 ms (4.805 ms / 100) 4.865 -> 4.864 ( -0.02%) [ +0.29% +0.29% +0.00% / -0.02% +0.49% +0.47%] index_select wrap : Elapsed 0.049 ms (4.879 ms / 100) 4.865 -> 4.865 ( +0.00%) [ +0.00% +0.10% +0.06% / +0.00% +0.58% +0.49%] index_select linear : Elapsed 0.049 ms (4.865 ms / 100) 4.868 -> 4.871 ( +0.06%) [ +0.00% +0.02% +0.12% / +0.06% +0.31% +0.62%] index_select reverse : Elapsed 0.049 ms (4.868 ms / 100) 4.807 -> 4.809 ( +0.04%) [ +0.00% +0.10% +0.06% / +0.04% +0.25% +0.48%] index_select skip64 : Elapsed 0.048 ms (4.807 ms / 100) 4.814 -> 4.814 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.19% +0.21%] index_select skip256 : Elapsed 0.048 ms (4.818 ms / 100) 4.859 -> 4.863 ( +0.08%) [ +0.02% +0.00% +0.00% / +0.08% +0.41% +0.41%] index_select spread : Elapsed 0.049 ms (4.860 ms / 100) 4.863 -> 4.861 ( -0.04%) [ +0.33% +0.27% +0.00% / -0.04% +0.66% +0.56%] index_select strided 3 : Elapsed 0.049 ms (4.879 ms / 100) 4.864 -> 4.868 ( +0.08%) [ +0.00% +0.02% +0.21% / +0.08% +0.33% +0.16%] index_select random : Elapsed 0.049 ms (4.864 ms / 100) 4.849 -> 4.853 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.47% +0.39%] index_select random_sorted : Elapsed 0.049 ms (4.851 ms / 100) B = [40, 20, 16, 5] (stride (1600, 5, 100, 1)) A = [40, 20, 4, 5] (stride (1, 800, 40, 160)) dim = 2 2.593 -> 2.595 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.23% +0.39%] index_add_ linear : Elapsed 0.026 ms (2.593 ms / 100) 2.534 -> 2.537 ( +0.12%) [ +0.04% +0.16% +0.00% / +0.12% +0.28% +0.39%] index_copy_ linear : Elapsed 0.025 ms (2.535 ms / 100) 2.596 -> 2.597 ( +0.04%) [ +0.19% +0.04% +0.00% / +0.04% +0.19% +0.23%] index_add_ reverse : Elapsed 0.026 ms (2.601 ms / 100) 2.534 -> 2.536 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.08% +0.20%] index_copy_ reverse : Elapsed 0.025 ms (2.537 ms / 100) 2.594 -> 2.593 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.12% +0.12% -0.04%] index_add_ spread : Elapsed 0.026 ms (2.594 ms / 100) 2.534 -> 2.536 ( +0.08%) [ +0.00% +0.08% +0.20% / +0.08% +0.36% +0.16%] index_copy_ spread : Elapsed 0.025 ms (2.534 ms / 100) 2.593 -> 2.591 ( -0.08%) [ +0.04% +0.00% +0.08% / -0.08% +0.15% +0.00%] index_add_ strided 3 : Elapsed 0.026 ms (2.594 ms / 100) 2.532 -> 2.532 ( +0.00%) [ +0.04% +0.20% +0.00% / +0.00% +0.20% +0.16%] index_copy_ strided 3 : Elapsed 0.025 ms (2.533 ms / 100) 2.595 -> 2.595 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.35% +0.27%] index_add_ strided 5 : Elapsed 0.026 ms (2.598 ms / 100) 2.538 -> 2.537 ( -0.04%) [ +0.12% +0.04% +0.00% / -0.04% +0.12% +0.24%] index_copy_ strided 5 : Elapsed 0.025 ms (2.541 ms / 100) 2.592 -> 2.594 ( +0.08%) [ +0.19% +0.08% +0.00% / +0.08% +0.35% +0.35%] index_add_ strided 7 : Elapsed 0.026 ms (2.597 ms / 100) 2.535 -> 2.534 ( -0.04%) [ +0.20% +0.00% +0.04% / -0.04% +0.20% +0.12%] index_copy_ strided 7 : Elapsed 0.025 ms (2.540 ms / 100) 2.597 -> 2.597 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +0.27% +0.15%] index_add_ perm : Elapsed 0.026 ms (2.600 ms / 100) 2.537 -> 2.537 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.20% +0.12%] index_copy_ perm : Elapsed 0.025 ms (2.539 ms / 100) 2.594 -> 2.596 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.19% +0.39%] index_add_ perm_sorted : Elapsed 0.026 ms (2.596 ms / 100) 2.535 -> 2.538 ( +0.12%) [ +0.20% +0.16% +0.00% / +0.12% +0.39% +0.36%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.540 ms / 100) 5.916 -> 5.917 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.47% +0.37%] index_select const : Elapsed 0.059 ms (5.919 ms / 100) 5.900 -> 5.903 ( +0.05%) [ +0.27% +0.00% +0.08% / +0.05% +0.42% +0.15%] index_select wrap : Elapsed 0.059 ms (5.916 ms / 100) 5.929 -> 5.918 ( -0.19%) [ +0.07% +0.08% +0.00% / -0.19% +0.24% +0.17%] index_select linear : Elapsed 0.059 ms (5.933 ms / 100) 5.922 -> 5.928 ( +0.10%) [ +0.00% +0.12% +0.08% / +0.10% +0.46% +0.42%] index_select reverse : Elapsed 0.059 ms (5.922 ms / 100) 5.929 -> 5.931 ( +0.03%) [ +0.08% +0.02% +0.00% / +0.03% +0.27% +0.30%] index_select skip64 : Elapsed 0.059 ms (5.934 ms / 100) 5.915 -> 5.927 ( +0.20%) [ +0.15% +0.22% +0.00% / +0.20% +0.46% +0.41%] index_select skip256 : Elapsed 0.059 ms (5.924 ms / 100) 5.912 -> 5.911 ( -0.02%) [ +0.02% +0.00% +0.05% / -0.02% +0.37% +0.36%] index_select spread : Elapsed 0.059 ms (5.913 ms / 100) 5.881 -> 5.878 ( -0.05%) [ +0.20% +0.51% +0.00% / -0.05% +0.43% +0.46%] index_select strided 3 : Elapsed 0.059 ms (5.893 ms / 100) 5.908 -> 5.914 ( +0.10%) [ +0.08% +0.03% +0.00% / +0.10% +0.17% +0.24%] index_select random : Elapsed 0.059 ms (5.913 ms / 100) 5.909 -> 5.912 ( +0.05%) [ +0.07% +0.10% +0.00% / +0.05% +0.34% +0.42%] index_select random_sorted : Elapsed 0.059 ms (5.913 ms / 100) B = [40, 20, 16, 5] (stride (1600, 1, 100, 20)) A = [40, 20, 4, 5] (stride (400, 5, 100, 1)) dim = 2 2.468 -> 2.471 ( +0.12%) [ +0.04% +0.16% +0.00% / +0.12% +0.69% +0.61%] index_add_ linear : Elapsed 0.025 ms (2.469 ms / 100) 2.418 -> 2.420 ( +0.08%) [ +0.00% +0.08% +0.12% / +0.08% +0.50% +0.45%] index_copy_ linear : Elapsed 0.024 ms (2.418 ms / 100) 2.470 -> 2.469 ( -0.04%) [ +0.08% +0.00% +0.00% / -0.04% +0.45% +0.20%] index_add_ reverse : Elapsed 0.025 ms (2.472 ms / 100) 2.417 -> 2.422 ( +0.21%) [ +0.08% +0.08% +0.00% / +0.21% +0.46% +0.41%] index_copy_ reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.469 -> 2.467 ( -0.08%) [ +0.04% +0.16% +0.00% / -0.08% +0.53% +0.49%] index_add_ spread : Elapsed 0.025 ms (2.470 ms / 100) 2.420 -> 2.418 ( -0.08%) [ +0.00% +0.04% +0.04% / -0.08% +0.50% +0.58%] index_copy_ spread : Elapsed 0.024 ms (2.420 ms / 100) 2.467 -> 2.470 ( +0.12%) [ +0.08% +0.00% +0.20% / +0.12% +0.57% +0.61%] index_add_ strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.422 -> 2.421 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.45% +0.29%] index_copy_ strided 3 : Elapsed 0.024 ms (2.422 ms / 100) 2.471 -> 2.473 ( +0.08%) [ +0.04% +0.00% +0.12% / +0.08% +0.32% +0.40%] index_add_ strided 5 : Elapsed 0.025 ms (2.472 ms / 100) 2.425 -> 2.426 ( +0.04%) [ +0.21% +0.00% +0.04% / +0.04% +0.16% +0.16%] index_copy_ strided 5 : Elapsed 0.024 ms (2.430 ms / 100) 2.471 -> 2.470 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.28% +0.40%] index_add_ strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.423 -> 2.423 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +0.33% +0.33%] index_copy_ strided 7 : Elapsed 0.024 ms (2.426 ms / 100) 2.472 -> 2.471 ( -0.04%) [ +0.20% +0.00% +0.16% / -0.04% +0.28% +0.32%] index_add_ perm : Elapsed 0.025 ms (2.477 ms / 100) 2.421 -> 2.419 ( -0.08%) [ +0.08% +0.00% +0.25% / -0.08% +0.21% +0.33%] index_copy_ perm : Elapsed 0.024 ms (2.423 ms / 100) 2.474 -> 2.474 ( +0.00%) [ +0.12% +0.16% +0.00% / +0.00% +0.32% +0.28%] index_add_ perm_sorted : Elapsed 0.025 ms (2.477 ms / 100) 2.422 -> 2.426 ( +0.17%) [ +0.04% +0.08% +0.00% / +0.17% +0.29% +0.29%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.423 ms / 100) 5.142 -> 5.169 ( +0.53%) [ +0.12% +0.41% +0.00% / +0.53% +1.01% +0.53%] index_select const : Elapsed 0.051 ms (5.148 ms / 100) 5.211 -> 5.217 ( +0.12%) [ +0.21% +0.00% +0.08% / +0.12% +0.40% +0.52%] index_select wrap : Elapsed 0.052 ms (5.222 ms / 100) 5.192 -> 5.198 ( +0.12%) [ +0.21% +0.13% +0.00% / +0.12% +0.44% +0.62%] index_select linear : Elapsed 0.052 ms (5.203 ms / 100) 5.213 -> 5.213 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.38% +0.42%] index_select reverse : Elapsed 0.052 ms (5.216 ms / 100) 5.142 -> 5.143 ( +0.02%) [ +0.16% +0.16% +0.00% / +0.02% +0.41% +0.41%] index_select skip64 : Elapsed 0.051 ms (5.150 ms / 100) 5.152 -> 5.156 ( +0.08%) [ +0.00% +0.02% +0.08% / +0.08% +0.29% +0.31%] index_select skip256 : Elapsed 0.052 ms (5.152 ms / 100) 5.216 -> 5.218 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.17% +0.27%] index_select spread : Elapsed 0.052 ms (5.218 ms / 100) 5.197 -> 5.203 ( +0.12%) [ +0.10% +0.08% +0.00% / +0.12% +0.40% +0.37%] index_select strided 3 : Elapsed 0.052 ms (5.202 ms / 100) 5.207 -> 5.211 ( +0.08%) [ +0.00% +0.02% +0.13% / +0.08% +0.48% +0.36%] index_select random : Elapsed 0.052 ms (5.207 ms / 100) 5.188 -> 5.191 ( +0.06%) [ +0.27% +0.19% +0.00% / +0.06% +0.50% +0.54%] index_select random_sorted : Elapsed 0.052 ms (5.202 ms / 100) B = [40, 20, 16, 5] (stride (80, 3200, 1, 16)) A = [40, 20, 4, 5] (stride (5, 200, 4000, 1)) dim = 2 2.512 -> 2.518 ( +0.24%) [ +0.28% +0.00% +0.00% / +0.36% +0.24% +0.36%] index_add_ linear : Elapsed 0.025 ms (2.519 ms / 100) 2.503 -> 2.506 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.24% +0.12% +0.36%] index_copy_ linear : Elapsed 0.025 ms (2.503 ms / 100) 2.514 -> 2.512 ( -0.08%) [ +0.04% +0.08% +0.00% / +0.16% +0.04% -0.08%] index_add_ reverse : Elapsed 0.025 ms (2.515 ms / 100) 2.503 -> 2.503 ( +0.00%) [ +0.16% +0.00% +0.12% / +0.00% +0.16% +0.08%] index_copy_ reverse : Elapsed 0.025 ms (2.507 ms / 100) 2.552 -> 2.556 ( +0.16%) [ +0.00% +0.24% +0.24% / +0.24% +0.24% +0.16%] index_add_ spread : Elapsed 0.026 ms (2.552 ms / 100) 2.602 -> 2.600 ( -0.08%) [ +0.08% +0.12% +0.00% / -0.08% +0.04% +0.23%] index_copy_ spread : Elapsed 0.026 ms (2.604 ms / 100) 2.556 -> 2.559 ( +0.12%) [ +0.20% +0.08% +0.00% / +0.12% +0.16% +0.31%] index_add_ strided 3 : Elapsed 0.026 ms (2.561 ms / 100) 2.602 -> 2.602 ( +0.00%) [ +0.00% +0.27% +0.00% / +0.00% +0.00% +0.15%] index_copy_ strided 3 : Elapsed 0.026 ms (2.602 ms / 100) 2.555 -> 2.562 ( +0.27%) [ +0.23% +0.23% +0.00% / +0.31% +0.31% +0.27%] index_add_ strided 5 : Elapsed 0.026 ms (2.561 ms / 100) 2.598 -> 2.604 ( +0.23%) [ +0.27% +0.42% +0.00% / +0.23% +0.42% +0.50%] index_copy_ strided 5 : Elapsed 0.026 ms (2.605 ms / 100) 2.556 -> 2.555 ( -0.04%) [ +0.20% +0.00% +0.00% / -0.04% +0.20% +0.31%] index_add_ strided 7 : Elapsed 0.026 ms (2.561 ms / 100) 2.599 -> 2.602 ( +0.12%) [ +0.19% +0.12% +0.00% / +0.12% +0.35% +0.42%] index_copy_ strided 7 : Elapsed 0.026 ms (2.604 ms / 100) 2.553 -> 2.559 ( +0.24%) [ +0.20% +0.31% +0.00% / +0.24% +0.39% +0.27%] index_add_ perm : Elapsed 0.026 ms (2.558 ms / 100) 2.600 -> 2.604 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.27% +0.31%] index_copy_ perm : Elapsed 0.026 ms (2.600 ms / 100) 2.558 -> 2.559 ( +0.04%) [ +0.16% +0.08% +0.00% / +0.04% +0.20% +0.16%] index_add_ perm_sorted : Elapsed 0.026 ms (2.562 ms / 100) 2.601 -> 2.601 ( +0.00%) [ +0.35% +0.00% +0.12% / +0.19% +0.15% +0.00%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.610 ms / 100) 5.461 -> 5.455 ( -0.11%) [ +0.04% +0.13% +0.00% / -0.11% +0.33% +0.60%] index_select const : Elapsed 0.055 ms (5.463 ms / 100) 5.506 -> 5.495 ( -0.20%) [ +0.00% +0.00% +0.04% / -0.20% +0.00% -0.02%] index_select wrap : Elapsed 0.055 ms (5.506 ms / 100) 5.523 -> 5.530 ( +0.13%) [ +0.09% +0.11% +0.00% / +0.13% +0.25% +0.24%] index_select linear : Elapsed 0.055 ms (5.528 ms / 100) 5.477 -> 5.468 ( -0.16%) [ +0.13% +0.00% +0.13% / -0.16% +0.46% +0.31%] index_select reverse : Elapsed 0.055 ms (5.484 ms / 100) 5.458 -> 5.457 ( -0.02%) [ +0.00% +0.22% +0.02% / -0.02% +0.24% +0.31%] index_select skip64 : Elapsed 0.055 ms (5.458 ms / 100) 5.449 -> 5.452 ( +0.06%) [ +0.06% +0.11% +0.00% / +0.06% +0.44% +0.46%] index_select skip256 : Elapsed 0.055 ms (5.452 ms / 100) 5.483 -> 5.481 ( -0.04%) [ +0.11% +0.13% +0.00% / -0.04% +0.35% +0.36%] index_select spread : Elapsed 0.055 ms (5.489 ms / 100) 5.505 -> 5.504 ( -0.02%) [ +0.04% +0.02% +0.00% / -0.02% +0.22% +0.35%] index_select strided 3 : Elapsed 0.055 ms (5.507 ms / 100) 5.495 -> 5.495 ( +0.00%) [ +0.11% +0.04% +0.00% / +0.00% +0.31% +0.27%] index_select random : Elapsed 0.055 ms (5.501 ms / 100) 5.496 -> 5.500 ( +0.07%) [ +0.16% +0.00% +0.20% / +0.07% +0.22% +0.29%] index_select random_sorted : Elapsed 0.055 ms (5.505 ms / 100) B = [40, 20, 16, 5] (stride (1, 3200, 200, 40)) A = [40, 20, 4, 5] (stride (400, 20, 5, 1)) dim = 2 2.369 -> 2.371 ( +0.08%) [ +0.13% +0.04% +0.00% / +0.08% +0.38% +0.46%] index_add_ linear : Elapsed 0.024 ms (2.372 ms / 100) 2.329 -> 2.330 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +0.52% +0.52%] index_copy_ linear : Elapsed 0.023 ms (2.330 ms / 100) 2.377 -> 2.379 ( +0.08%) [ +0.00% +0.17% +0.25% / +0.08% +0.50% +0.34%] index_add_ reverse : Elapsed 0.024 ms (2.377 ms / 100) 2.336 -> 2.339 ( +0.13%) [ +0.21% +0.00% +0.13% / +0.13% +0.73% +0.73%] index_copy_ reverse : Elapsed 0.023 ms (2.341 ms / 100) 2.373 -> 2.375 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.42% +0.25%] index_add_ spread : Elapsed 0.024 ms (2.374 ms / 100) 2.331 -> 2.335 ( +0.17%) [ +0.09% +0.00% +0.13% / +0.17% +0.39% +0.26%] index_copy_ spread : Elapsed 0.023 ms (2.333 ms / 100) 2.389 -> 2.391 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.38% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.389 ms / 100) 2.343 -> 2.345 ( +0.09%) [ +0.00% +0.26% +0.13% / +0.09% +0.17% +0.30%] index_copy_ strided 3 : Elapsed 0.023 ms (2.343 ms / 100) 2.387 -> 2.387 ( +0.00%) [ +0.00% +0.21% +0.08% / +0.00% +0.08% +0.25%] index_add_ strided 5 : Elapsed 0.024 ms (2.387 ms / 100) 2.343 -> 2.346 ( +0.13%) [ +0.00% +0.21% +0.17% / +0.13% +0.26% +0.17%] index_copy_ strided 5 : Elapsed 0.023 ms (2.343 ms / 100) 2.377 -> 2.377 ( +0.00%) [ +0.00% +0.17% +0.13% / +0.00% +0.08% +0.13%] index_add_ strided 7 : Elapsed 0.024 ms (2.377 ms / 100) 2.336 -> 2.337 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.17% +0.34% +0.04%] index_copy_ strided 7 : Elapsed 0.023 ms (2.340 ms / 100) 2.381 -> 2.383 ( +0.08%) [ +0.17% +0.04% +0.00% / +0.08% +0.25% +0.21%] index_add_ perm : Elapsed 0.024 ms (2.385 ms / 100) 2.340 -> 2.340 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.13% +0.17%] index_copy_ perm : Elapsed 0.023 ms (2.342 ms / 100) 2.370 -> 2.373 ( +0.13%) [ +0.00% +0.13% +0.13% / +0.13% +0.46% +0.38%] index_add_ perm_sorted : Elapsed 0.024 ms (2.370 ms / 100) 2.329 -> 2.331 ( +0.09%) [ +0.17% +0.21% +0.00% / +0.09% +0.39% +0.39%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.333 ms / 100) 4.957 -> 4.957 ( +0.00%) [ +0.00% +0.10% +0.08% / +0.00% +0.38% +0.44%] index_select const : Elapsed 0.050 ms (4.957 ms / 100) 4.996 -> 5.002 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.12% +0.50% +0.52%] index_select wrap : Elapsed 0.050 ms (5.000 ms / 100) 4.997 -> 5.011 ( +0.28%) [ +0.02% +0.36% +0.00% / +0.28% +0.62% +0.46%] index_select linear : Elapsed 0.050 ms (4.998 ms / 100) 4.997 -> 5.003 ( +0.12%) [ +0.12% +0.20% +0.00% / +0.12% +0.86% +0.80%] index_select reverse : Elapsed 0.050 ms (5.003 ms / 100) 4.969 -> 4.974 ( +0.10%) [ +0.10% +0.00% +0.08% / +0.10% +0.56% +0.26%] index_select skip64 : Elapsed 0.050 ms (4.974 ms / 100) 4.967 -> 4.978 ( +0.22%) [ +0.00% +0.24% +0.14% / +0.22% +0.30% +0.22%] index_select skip256 : Elapsed 0.050 ms (4.967 ms / 100) 4.999 -> 5.009 ( +0.20%) [ +0.08% +0.00% +0.10% / +0.20% +0.22% +0.30%] index_select spread : Elapsed 0.050 ms (5.003 ms / 100) 4.984 -> 4.981 ( -0.06%) [ +0.18% +0.06% +0.00% / -0.06% +0.24% +0.26%] index_select strided 3 : Elapsed 0.050 ms (4.993 ms / 100) 5.004 -> 5.015 ( +0.22%) [ +0.22% +0.18% +0.00% / +0.22% +0.22% +0.28%] index_select random : Elapsed 0.050 ms (5.015 ms / 100) 4.990 -> 4.998 ( +0.16%) [ +0.16% +0.06% +0.00% / +0.16% +0.32% +0.36%] index_select random_sorted : Elapsed 0.050 ms (4.998 ms / 100) B = [40, 20, 16, 5] (stride (1, 3200, 40, 640)) A = [40, 20, 4, 5] (stride (4, 800, 1, 160)) dim = 2 2.625 -> 2.625 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.15% +0.00% +0.23%] index_add_ linear : Elapsed 0.026 ms (2.628 ms / 100) 2.573 -> 2.577 ( +0.16%) [ +0.00% +0.04% +0.19% / +0.16% +0.16% +0.23%] index_copy_ linear : Elapsed 0.026 ms (2.573 ms / 100) 2.626 -> 2.624 ( -0.08%) [ +0.08% +0.00% +0.11% / -0.04% -0.08% +0.00%] index_add_ reverse : Elapsed 0.026 ms (2.628 ms / 100) 2.572 -> 2.571 ( -0.04%) [ +0.00% +0.19% +0.08% / +0.00% +0.12% -0.04%] index_copy_ reverse : Elapsed 0.026 ms (2.572 ms / 100) 2.623 -> 2.620 ( -0.11%) [ +0.00% +0.00% +0.11% / +0.08% -0.04% -0.11%] index_add_ spread : Elapsed 0.026 ms (2.623 ms / 100) 2.570 -> 2.566 ( -0.16%) [ +0.08% +0.16% +0.00% / +0.12% -0.04% -0.16%] index_copy_ spread : Elapsed 0.026 ms (2.572 ms / 100) 2.619 -> 2.624 ( +0.19%) [ +0.19% +0.00% +0.23% / +0.19% +0.23% +0.23%] index_add_ strided 3 : Elapsed 0.026 ms (2.624 ms / 100) 2.567 -> 2.573 ( +0.23%) [ +0.12% +0.19% +0.00% / +0.23% +0.31% +0.39%] index_copy_ strided 3 : Elapsed 0.026 ms (2.570 ms / 100) 2.622 -> 2.622 ( +0.00%) [ +0.23% +0.00% +0.11% / +0.19% +0.34% +0.00%] index_add_ strided 5 : Elapsed 0.026 ms (2.628 ms / 100) 2.570 -> 2.571 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.12% +0.23% +0.04%] index_copy_ strided 5 : Elapsed 0.026 ms (2.571 ms / 100) 2.617 -> 2.619 ( +0.08%) [ +0.00% +0.00% +0.15% / +0.08% +0.19% +0.31%] index_add_ strided 7 : Elapsed 0.026 ms (2.617 ms / 100) 2.566 -> 2.566 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.00% +0.19% +0.27%] index_copy_ strided 7 : Elapsed 0.026 ms (2.570 ms / 100) 2.618 -> 2.620 ( +0.08%) [ +0.00% +0.15% +0.15% / +0.08% +0.19% +0.38%] index_add_ perm : Elapsed 0.026 ms (2.618 ms / 100) 2.566 -> 2.566 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.39% +0.31%] index_copy_ perm : Elapsed 0.026 ms (2.566 ms / 100) 2.620 -> 2.621 ( +0.04%) [ +0.15% +0.00% +0.19% / +0.04% +0.19% +0.23%] index_add_ perm_sorted : Elapsed 0.026 ms (2.624 ms / 100) 2.568 -> 2.570 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.12% +0.08%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.568 ms / 100) 6.008 -> 6.006 ( -0.03%) [ +0.20% +0.02% +0.00% / -0.03% +0.60% +0.42%] index_select const : Elapsed 0.060 ms (6.020 ms / 100) 6.005 -> 6.023 ( +0.30%) [ +0.17% +0.00% +0.07% / +0.30% +0.53% +0.42%] index_select wrap : Elapsed 0.060 ms (6.015 ms / 100) 6.015 -> 6.026 ( +0.18%) [ +0.18% +0.00% +0.23% / +0.23% +0.43% +0.18%] index_select linear : Elapsed 0.060 ms (6.026 ms / 100) 6.013 -> 6.015 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.22% +0.33%] index_select reverse : Elapsed 0.060 ms (6.015 ms / 100) 6.014 -> 6.014 ( +0.00%) [ +0.03% +0.00% +0.10% / +0.00% +0.42% +0.35%] index_select skip64 : Elapsed 0.060 ms (6.016 ms / 100) 6.018 -> 6.014 ( -0.07%) [ +0.12% +0.00% +0.10% / -0.07% +0.50% +0.48%] index_select skip256 : Elapsed 0.060 ms (6.025 ms / 100) 6.010 -> 6.009 ( -0.02%) [ +0.18% +0.15% +0.00% / -0.02% +0.42% +0.33%] index_select spread : Elapsed 0.060 ms (6.021 ms / 100) 6.010 -> 6.016 ( +0.10%) [ +0.00% +0.15% +0.05% / +0.10% +0.35% +0.42%] index_select strided 3 : Elapsed 0.060 ms (6.010 ms / 100) 6.020 -> 6.012 ( -0.13%) [ +0.02% +0.05% +0.00% / -0.13% +0.32% +0.37%] index_select random : Elapsed 0.060 ms (6.021 ms / 100) 6.011 -> 6.014 ( +0.05%) [ +0.00% +0.05% +0.10% / +0.05% +0.30% +0.40%] index_select random_sorted : Elapsed 0.060 ms (6.011 ms / 100) B = [40, 20, 16, 5] (stride (100, 1, 4000, 20)) A = [40, 20, 4, 5] (stride (400, 1, 20, 80)) dim = 2 2.502 -> 2.501 ( -0.04%) [ +0.00% +0.16% +0.12% / -0.04% +0.24% +0.24%] index_add_ linear : Elapsed 0.025 ms (2.502 ms / 100) 2.453 -> 2.451 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.29% +0.20%] index_copy_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.500 -> 2.507 ( +0.28%) [ +0.16% +0.20% +0.00% / +0.28% +0.56% +0.68%] index_add_ reverse : Elapsed 0.025 ms (2.504 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.00% +0.12% +0.08% / +0.00% +0.33% +0.61%] index_copy_ reverse : Elapsed 0.024 ms (2.448 ms / 100) 2.499 -> 2.498 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.40% +0.48%] index_add_ spread : Elapsed 0.025 ms (2.500 ms / 100) 2.451 -> 2.453 ( +0.08%) [ +0.00% +0.00% +0.16% / +0.08% +0.20% +0.33%] index_copy_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.496 -> 2.496 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.52% +0.76%] index_add_ strided 3 : Elapsed 0.025 ms (2.499 ms / 100) 2.445 -> 2.446 ( +0.04%) [ +0.20% +0.16% +0.00% / +0.04% +0.70% +0.49%] index_copy_ strided 3 : Elapsed 0.024 ms (2.450 ms / 100) 2.498 -> 2.500 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.28% +0.12%] index_add_ strided 5 : Elapsed 0.025 ms (2.500 ms / 100) 2.447 -> 2.453 ( +0.25%) [ +0.08% +0.08% +0.00% / +0.25% +0.41% +0.25%] index_copy_ strided 5 : Elapsed 0.024 ms (2.449 ms / 100) 2.501 -> 2.504 ( +0.12%) [ +0.20% +0.08% +0.00% / +0.12% +0.28% +0.40%] index_add_ strided 7 : Elapsed 0.025 ms (2.506 ms / 100) 2.455 -> 2.450 ( -0.20%) [ +0.04% +0.00% +0.00% / -0.20% +0.12% +0.24%] index_copy_ strided 7 : Elapsed 0.025 ms (2.456 ms / 100) 2.497 -> 2.502 ( +0.20%) [ +0.16% +0.32% +0.00% / +0.36% +0.20% +0.40%] index_add_ perm : Elapsed 0.025 ms (2.501 ms / 100) 2.450 -> 2.454 ( +0.16%) [ +0.00% +0.04% +0.00% / +0.24% +0.20% +0.16%] index_copy_ perm : Elapsed 0.024 ms (2.450 ms / 100) 2.497 -> 2.500 ( +0.12%) [ +0.20% +0.16% +0.00% / +0.12% +0.32% +0.24%] index_add_ perm_sorted : Elapsed 0.025 ms (2.502 ms / 100) 2.449 -> 2.448 ( -0.04%) [ +0.04% +0.16% +0.00% / -0.04% +0.33% +0.45%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.450 ms / 100) 5.394 -> 5.394 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.43% +0.33%] index_select const : Elapsed 0.054 ms (5.400 ms / 100) 5.425 -> 5.416 ( -0.17%) [ +0.11% +0.00% +0.02% / -0.17% +0.48% +0.52%] index_select wrap : Elapsed 0.054 ms (5.431 ms / 100) 5.451 -> 5.466 ( +0.28%) [ +0.26% +0.00% +0.24% / +0.28% +0.70% +0.37%] index_select linear : Elapsed 0.055 ms (5.465 ms / 100) 5.428 -> 5.434 ( +0.11%) [ +0.02% +0.15% +0.00% / +0.11% +0.48% +0.39%] index_select reverse : Elapsed 0.054 ms (5.429 ms / 100) 5.410 -> 5.418 ( +0.15%) [ +0.11% +0.04% +0.00% / +0.18% +0.15% +0.33%] index_select skip64 : Elapsed 0.054 ms (5.416 ms / 100) 5.401 -> 5.391 ( -0.19%) [ +0.37% +0.00% +0.22% / -0.19% +0.57% +0.44%] index_select skip256 : Elapsed 0.054 ms (5.421 ms / 100) 5.425 -> 5.432 ( +0.13%) [ +0.17% +0.02% +0.00% / +0.13% +0.26% +0.26%] index_select spread : Elapsed 0.054 ms (5.434 ms / 100) 5.423 -> 5.425 ( +0.04%) [ +0.04% +0.13% +0.00% / +0.04% +0.48% +0.39%] index_select strided 3 : Elapsed 0.054 ms (5.425 ms / 100) 5.427 -> 5.448 ( +0.39%) [ +0.00% +0.44% +0.17% / +0.46% +0.39% +0.61%] index_select random : Elapsed 0.054 ms (5.427 ms / 100) 5.425 -> 5.426 ( +0.02%) [ +0.26% +0.13% +0.00% / +0.02% +0.29% +0.39%] index_select random_sorted : Elapsed 0.054 ms (5.439 ms / 100) B = [40, 20, 16, 5] (stride (20, 1, 4000, 800)) A = [40, 20, 4, 5] (stride (100, 1, 4000, 20)) dim = 2 2.366 -> 2.368 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.08% +0.25%] index_add_ linear : Elapsed 0.024 ms (2.366 ms / 100) 2.317 -> 2.320 ( +0.13%) [ +0.52% +0.00% +0.09% / +0.13% +0.30% +0.17%] index_copy_ linear : Elapsed 0.023 ms (2.329 ms / 100) 2.364 -> 2.364 ( +0.00%) [ +0.17% +0.00% +0.08% / +0.00% +0.30% +0.25%] index_add_ reverse : Elapsed 0.024 ms (2.368 ms / 100) 2.318 -> 2.320 ( +0.09%) [ +0.13% +0.00% +0.00% / +0.09% +0.52% +0.13%] index_copy_ reverse : Elapsed 0.023 ms (2.321 ms / 100) 2.369 -> 2.367 ( -0.08%) [ +0.08% +0.13% +0.00% / +0.13% -0.08% +0.13%] index_add_ spread : Elapsed 0.024 ms (2.371 ms / 100) 2.318 -> 2.319 ( +0.04%) [ +0.00% +0.09% +0.13% / +0.04% +0.04% +0.09%] index_copy_ spread : Elapsed 0.023 ms (2.318 ms / 100) 2.375 -> 2.374 ( -0.04%) [ +0.08% +0.00% +0.17% / +0.04% -0.04% +0.04%] index_add_ strided 3 : Elapsed 0.024 ms (2.377 ms / 100) 2.326 -> 2.326 ( +0.00%) [ +0.13% +0.21% +0.00% / +0.00% +0.00% +0.09%] index_copy_ strided 3 : Elapsed 0.023 ms (2.329 ms / 100) 2.369 -> 2.365 ( -0.17%) [ +0.00% +0.04% +0.00% / -0.17% +0.00% +0.13%] index_add_ strided 5 : Elapsed 0.024 ms (2.369 ms / 100) 2.324 -> 2.323 ( -0.04%) [ +0.09% +0.09% +0.00% / +0.17% -0.04% +0.09%] index_copy_ strided 5 : Elapsed 0.023 ms (2.326 ms / 100) 2.363 -> 2.365 ( +0.08%) [ +0.13% +0.30% +0.00% / +0.08% +0.51% +0.13%] index_add_ strided 7 : Elapsed 0.024 ms (2.366 ms / 100) 2.322 -> 2.317 ( -0.22%) [ +0.00% +0.04% +0.13% / -0.22% +0.09% -0.13%] index_copy_ strided 7 : Elapsed 0.023 ms (2.322 ms / 100) 2.361 -> 2.362 ( +0.04%) [ +0.17% +0.13% +0.00% / +0.04% +0.21% +0.13%] index_add_ perm : Elapsed 0.024 ms (2.365 ms / 100) 2.317 -> 2.315 ( -0.09%) [ +0.00% +0.04% +0.00% / -0.09% +0.22% -0.04%] index_copy_ perm : Elapsed 0.023 ms (2.317 ms / 100) 2.364 -> 2.365 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.42% +0.47%] index_add_ perm_sorted : Elapsed 0.024 ms (2.366 ms / 100) 2.318 -> 2.318 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.13% +0.13% +0.00%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.320 ms / 100) 4.917 -> 4.923 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.12% +0.31% +0.24%] index_select const : Elapsed 0.049 ms (4.921 ms / 100) 4.947 -> 4.948 ( +0.02%) [ +0.28% +0.00% +0.02% / +0.02% +0.40% +0.26%] index_select wrap : Elapsed 0.050 ms (4.961 ms / 100) 4.946 -> 4.950 ( +0.08%) [ +0.22% +0.04% +0.00% / +0.08% +0.28% +0.38%] index_select linear : Elapsed 0.050 ms (4.957 ms / 100) 4.939 -> 4.950 ( +0.22%) [ +0.26% +0.00% +0.16% / +0.22% +0.53% +0.63%] index_select reverse : Elapsed 0.050 ms (4.952 ms / 100) 4.905 -> 4.895 ( -0.20%) [ +0.06% +0.06% +0.00% / -0.20% +0.16% +0.22%] index_select skip64 : Elapsed 0.049 ms (4.908 ms / 100) 4.895 -> 4.905 ( +0.20%) [ +0.12% +0.06% +0.00% / +0.20% +0.49% +0.61%] index_select skip256 : Elapsed 0.049 ms (4.901 ms / 100) 4.944 -> 4.944 ( +0.00%) [ +0.22% +0.12% +0.00% / +0.00% +0.30% +0.28%] index_select spread : Elapsed 0.050 ms (4.955 ms / 100) 4.960 -> 4.959 ( -0.02%) [ +0.12% +0.00% +0.10% / -0.02% +0.18% +0.22%] index_select strided 3 : Elapsed 0.050 ms (4.966 ms / 100) 4.943 -> 4.951 ( +0.16%) [ +0.08% +0.04% +0.00% / +0.16% +0.28% +0.20%] index_select random : Elapsed 0.049 ms (4.947 ms / 100) 4.946 -> 4.959 ( +0.26%) [ +0.28% +0.04% +0.00% / +0.30% +0.26% +0.49%] index_select random_sorted : Elapsed 0.050 ms (4.960 ms / 100) B = [40, 20, 16, 5] (stride (16, 640, 1, 12800)) A = [40, 20, 4, 5] (stride (1, 40, 800, 3200)) dim = 2 2.650 -> 2.660 ( +0.38%) [ +0.30% +0.19% +0.00% / +0.38% +0.53% +0.57%] index_add_ linear : Elapsed 0.027 ms (2.658 ms / 100) 2.603 -> 2.604 ( +0.04%) [ +0.00% +0.27% +0.12% / +0.04% +0.38% +0.50%] index_copy_ linear : Elapsed 0.026 ms (2.603 ms / 100) 2.652 -> 2.656 ( +0.15%) [ +0.11% +0.00% +0.15% / +0.15% +0.57% +0.90%] index_add_ reverse : Elapsed 0.027 ms (2.655 ms / 100) 2.602 -> 2.603 ( +0.04%) [ +0.15% +0.00% +0.00% / +0.04% +0.65% +0.38%] index_copy_ reverse : Elapsed 0.026 ms (2.606 ms / 100) 2.691 -> 2.689 ( -0.07%) [ +0.00% +0.15% +0.11% / -0.07% +0.63% +0.41%] index_add_ spread : Elapsed 0.027 ms (2.691 ms / 100) 2.699 -> 2.702 ( +0.11%) [ +0.00% +0.30% +0.15% / +0.11% +0.67% +0.70%] index_copy_ spread : Elapsed 0.027 ms (2.699 ms / 100) 2.687 -> 2.687 ( +0.00%) [ +0.15% +0.04% +0.00% / +0.00% +0.60% +0.74%] index_add_ strided 3 : Elapsed 0.027 ms (2.691 ms / 100) 2.701 -> 2.704 ( +0.11%) [ +0.00% +0.04% +0.00% / +0.11% +0.48% +0.44%] index_copy_ strided 3 : Elapsed 0.027 ms (2.701 ms / 100) 2.688 -> 2.692 ( +0.15%) [ +0.07% +0.00% +0.15% / +0.15% +0.45% +0.37%] index_add_ strided 5 : Elapsed 0.027 ms (2.690 ms / 100) 2.700 -> 2.707 ( +0.26%) [ +0.00% +0.15% +0.11% / +0.26% +0.41% +0.26%] index_copy_ strided 5 : Elapsed 0.027 ms (2.700 ms / 100) 2.687 -> 2.696 ( +0.33%) [ +0.00% +0.19% +0.22% / +0.33% +0.74% +0.67%] index_add_ strided 7 : Elapsed 0.027 ms (2.687 ms / 100) 2.702 -> 2.707 ( +0.19%) [ +0.11% +0.22% +0.00% / +0.19% +0.48% +0.56%] index_copy_ strided 7 : Elapsed 0.027 ms (2.705 ms / 100) 2.690 -> 2.697 ( +0.26%) [ +0.00% +0.19% +0.26% / +0.26% +0.37% +0.41%] index_add_ perm : Elapsed 0.027 ms (2.690 ms / 100) 2.704 -> 2.705 ( +0.04%) [ +0.15% +0.07% +0.00% / +0.04% +0.26% +0.37%] index_copy_ perm : Elapsed 0.027 ms (2.708 ms / 100) 2.691 -> 2.690 ( -0.04%) [ +0.15% +0.00% +0.04% / -0.04% +0.26% +0.22%] index_add_ perm_sorted : Elapsed 0.027 ms (2.695 ms / 100) 2.701 -> 2.708 ( +0.26%) [ +0.00% +0.19% +0.04% / +0.26% +0.41% +0.48%] index_copy_ perm_sorted : Elapsed 0.027 ms (2.701 ms / 100) 6.041 -> 6.045 ( +0.07%) [ +0.10% +0.00% +0.02% / +0.07% +0.36% +0.38%] index_select const : Elapsed 0.060 ms (6.047 ms / 100) 6.027 -> 6.027 ( +0.00%) [ +0.05% +0.12% +0.00% / +0.00% +0.55% +0.51%] index_select wrap : Elapsed 0.060 ms (6.030 ms / 100) 6.042 -> 6.041 ( -0.02%) [ +0.00% +0.12% +0.03% / -0.02% +0.60% +0.43%] index_select linear : Elapsed 0.060 ms (6.042 ms / 100) 6.036 -> 6.034 ( -0.03%) [ +0.12% +0.05% +0.00% / -0.03% +0.43% +0.41%] index_select reverse : Elapsed 0.060 ms (6.043 ms / 100) 6.039 -> 6.039 ( +0.00%) [ +0.07% +0.17% +0.00% / +0.00% +0.30% +0.22%] index_select skip64 : Elapsed 0.060 ms (6.043 ms / 100) 6.045 -> 6.046 ( +0.02%) [ +0.05% +0.08% +0.00% / +0.02% +0.28% +0.40%] index_select skip256 : Elapsed 0.060 ms (6.048 ms / 100) 6.029 -> 6.023 ( -0.10%) [ +0.00% +0.13% +0.00% / -0.10% +0.32% +0.30%] index_select spread : Elapsed 0.060 ms (6.029 ms / 100) 6.041 -> 6.039 ( -0.03%) [ +0.05% +0.00% +0.02% / -0.03% +0.35% +0.30%] index_select strided 3 : Elapsed 0.060 ms (6.044 ms / 100) 6.026 -> 6.028 ( +0.03%) [ +0.00% +0.07% +0.10% / +0.03% +0.43% +0.40%] index_select random : Elapsed 0.060 ms (6.026 ms / 100) 6.036 -> 6.038 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.38% +0.38%] index_select random_sorted : Elapsed 0.060 ms (6.036 ms / 100) B = [40, 20, 16, 5] (stride (1, 640, 40, 12800)) A = [40, 20, 4, 5] (stride (20, 1, 800, 3200)) dim = 2 2.402 -> 2.399 ( -0.12%) [ +0.08% +0.00% +0.00% / -0.12% +0.08% +0.21%] index_add_ linear : Elapsed 0.024 ms (2.404 ms / 100) 2.362 -> 2.359 ( -0.13%) [ +0.04% +0.00% +0.00% / -0.13% -0.13% +0.04%] index_copy_ linear : Elapsed 0.024 ms (2.363 ms / 100) 2.398 -> 2.401 ( +0.13%) [ +0.25% +0.17% +0.00% / +0.38% +0.13% +0.25%] index_add_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.361 -> 2.362 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.04% +0.17%] index_copy_ reverse : Elapsed 0.024 ms (2.361 ms / 100) 2.400 -> 2.403 ( +0.13%) [ +0.00% +0.29% +0.33% / +0.17% +0.21% +0.13%] index_add_ spread : Elapsed 0.024 ms (2.400 ms / 100) 2.362 -> 2.360 ( -0.08%) [ +0.08% +0.21% +0.00% / -0.08% +0.04% +0.34%] index_copy_ spread : Elapsed 0.024 ms (2.364 ms / 100) 2.409 -> 2.408 ( -0.04%) [ +0.00% +0.25% +0.12% / -0.04% +0.04% +0.21%] index_add_ strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.364 -> 2.363 ( -0.04%) [ +0.08% +0.00% +0.13% / -0.04% +0.13% +0.08%] index_copy_ strided 3 : Elapsed 0.024 ms (2.366 ms / 100) 2.393 -> 2.393 ( +0.00%) [ +0.13% +0.21% +0.00% / +0.25% +0.00% +0.21%] index_add_ strided 5 : Elapsed 0.024 ms (2.396 ms / 100) 2.354 -> 2.355 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.08% +0.04% +0.17%] index_copy_ strided 5 : Elapsed 0.024 ms (2.354 ms / 100) 2.399 -> 2.397 ( -0.08%) [ +0.13% +0.25% +0.00% / -0.08% +0.00% +0.38%] index_add_ strided 7 : Elapsed 0.024 ms (2.402 ms / 100) 2.358 -> 2.359 ( +0.04%) [ +0.08% +0.17% +0.00% / +0.04% +0.34% +0.34%] index_copy_ strided 7 : Elapsed 0.024 ms (2.360 ms / 100) 2.407 -> 2.406 ( -0.04%) [ +0.00% +0.04% +0.08% / -0.04% +0.00% +0.21%] index_add_ perm : Elapsed 0.024 ms (2.407 ms / 100) 2.362 -> 2.363 ( +0.04%) [ +0.04% +0.00% +0.13% / +0.17% +0.04% +0.04%] index_copy_ perm : Elapsed 0.024 ms (2.363 ms / 100) 2.394 -> 2.400 ( +0.25%) [ +0.38% +0.21% +0.00% / +0.38% +0.25% +0.58%] index_add_ perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) 2.359 -> 2.361 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.13% +0.21%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.361 ms / 100) 4.974 -> 4.976 ( +0.04%) [ +0.16% +0.02% +0.00% / +0.04% +0.38% +0.60%] index_select const : Elapsed 0.050 ms (4.982 ms / 100) 5.038 -> 5.039 ( +0.02%) [ +0.00% +0.10% +0.00% / +0.02% +0.32% +0.14%] index_select wrap : Elapsed 0.050 ms (5.038 ms / 100) 5.034 -> 5.042 ( +0.16%) [ +0.20% +0.12% +0.00% / +0.16% +0.36% +0.38%] index_select linear : Elapsed 0.050 ms (5.044 ms / 100) 5.029 -> 5.038 ( +0.18%) [ +0.12% +0.14% +0.00% / +0.18% +0.42% +0.38%] index_select reverse : Elapsed 0.050 ms (5.035 ms / 100) 4.973 -> 4.975 ( +0.04%) [ +0.02% +0.00% +0.04% / +0.04% +0.38% +0.44%] index_select skip64 : Elapsed 0.050 ms (4.974 ms / 100) 4.970 -> 4.968 ( -0.04%) [ +0.00% +0.02% +0.02% / -0.04% +0.44% +0.42%] index_select skip256 : Elapsed 0.050 ms (4.970 ms / 100) 5.029 -> 5.030 ( +0.02%) [ +0.08% +0.02% +0.00% / +0.02% +0.32% +0.46%] index_select spread : Elapsed 0.050 ms (5.033 ms / 100) 5.064 -> 5.066 ( +0.04%) [ +0.06% +0.00% +0.06% / +0.04% +0.18% +0.16%] index_select strided 3 : Elapsed 0.051 ms (5.067 ms / 100) 5.041 -> 5.044 ( +0.06%) [ +0.00% +0.10% +0.12% / +0.06% +0.32% +0.42%] index_select random : Elapsed 0.050 ms (5.041 ms / 100) 5.059 -> 5.055 ( -0.08%) [ +0.00% +0.02% +0.06% / -0.08% +0.22% +0.30%] index_select random_sorted : Elapsed 0.051 ms (5.059 ms / 100) out_shape = [40, 20, 4, 16] in_shape = [40, 20, 4, 5] idx_dim = 3 B = [40, 20, 4, 16] (stride (1280, 64, 1, 4)) A = [40, 20, 4, 5] (stride (100, 1, 4000, 20)) dim = 3 2.269 -> 2.270 ( +0.04%) [ +0.09% +0.00% +0.13% / +0.04% +0.84% +0.53%] index_add_ linear : Elapsed 0.023 ms (2.271 ms / 100) 2.220 -> 2.229 ( +0.41%) [ +0.00% +0.36% +0.27% / +0.41% +0.90% +0.68%] index_copy_ linear : Elapsed 0.022 ms (2.220 ms / 100) 2.267 -> 2.273 ( +0.26%) [ +0.00% +0.62% +0.26% / +0.26% +0.57% +0.93%] index_add_ reverse : Elapsed 0.023 ms (2.267 ms / 100) 2.223 -> 2.225 ( +0.09%) [ +0.00% +0.22% +0.31% / +0.09% +0.54% +0.58%] index_copy_ reverse : Elapsed 0.022 ms (2.223 ms / 100) 2.285 -> 2.289 ( +0.18%) [ +0.18% +0.22% +0.00% / +0.18% +0.70% +0.53%] index_add_ spread : Elapsed 0.023 ms (2.289 ms / 100) 2.253 -> 2.255 ( +0.09%) [ +0.00% +0.36% +0.13% / +0.09% +0.98% +0.58%] index_copy_ spread : Elapsed 0.023 ms (2.253 ms / 100) 2.287 -> 2.290 ( +0.13%) [ +0.00% +0.52% +0.04% / +0.13% +0.31% +0.35%] index_add_ strided 3 : Elapsed 0.023 ms (2.287 ms / 100) 2.259 -> 2.255 ( -0.18%) [ +0.22% +0.09% +0.00% / -0.18% +0.13% +0.09%] index_copy_ strided 3 : Elapsed 0.023 ms (2.264 ms / 100) 2.274 -> 2.276 ( +0.09%) [ +0.22% +0.26% +0.00% / +0.09% +0.48% +0.79%] index_add_ strided 5 : Elapsed 0.023 ms (2.279 ms / 100) 2.233 -> 2.232 ( -0.04%) [ +0.36% +0.00% +0.04% / -0.04% +0.67% +0.67%] index_copy_ strided 5 : Elapsed 0.022 ms (2.241 ms / 100) 2.283 -> 2.280 ( -0.13%) [ +0.18% +0.31% +0.00% / -0.13% +0.31% +0.53%] index_add_ strided 7 : Elapsed 0.023 ms (2.287 ms / 100) 2.259 -> 2.256 ( -0.13%) [ +0.00% +0.04% +0.00% / -0.13% +0.49% +0.31%] index_copy_ strided 7 : Elapsed 0.023 ms (2.259 ms / 100) 2.286 -> 2.285 ( -0.04%) [ +0.00% +0.00% +0.26% / -0.04% +0.52% +0.31%] index_add_ perm : Elapsed 0.023 ms (2.286 ms / 100) 2.250 -> 2.252 ( +0.09%) [ +0.00% +0.49% +0.40% / +0.09% +0.80% +0.67%] index_copy_ perm : Elapsed 0.023 ms (2.250 ms / 100) 2.287 -> 2.278 ( -0.39%) [ +0.00% +0.22% +0.09% / -0.39% +0.66% +0.35%] index_add_ perm_sorted : Elapsed 0.023 ms (2.287 ms / 100) 2.254 -> 2.249 ( -0.22%) [ +0.13% +0.00% +0.00% / -0.22% +0.44% +0.53%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.257 ms / 100) 4.585 -> 4.600 ( +0.33%) [ +0.46% +0.35% +0.00% / +0.33% +0.96% +0.63%] index_select const : Elapsed 0.046 ms (4.606 ms / 100) 4.646 -> 4.651 ( +0.11%) [ +0.00% +0.17% +0.02% / +0.11% +0.58% +0.58%] index_select wrap : Elapsed 0.046 ms (4.646 ms / 100) 4.658 -> 4.671 ( +0.28%) [ +0.09% +0.00% +0.13% / +0.28% +0.94% +0.82%] index_select linear : Elapsed 0.047 ms (4.662 ms / 100) 4.637 -> 4.647 ( +0.22%) [ +0.00% +0.30% +0.00% / +0.22% +0.82% +1.01%] index_select reverse : Elapsed 0.046 ms (4.637 ms / 100) 4.570 -> 4.569 ( -0.02%) [ +0.00% +0.11% +0.00% / -0.02% +0.57% +0.68%] index_select skip64 : Elapsed 0.046 ms (4.570 ms / 100) 4.576 -> 4.603 ( +0.59%) [ +0.55% +0.52% +0.00% / +0.59% +0.81% +1.18%] index_select skip256 : Elapsed 0.046 ms (4.601 ms / 100) 4.650 -> 4.649 ( -0.02%) [ +0.28% +0.00% +0.22% / -0.02% +0.67% +0.86%] index_select spread : Elapsed 0.047 ms (4.663 ms / 100) 4.656 -> 4.670 ( +0.30%) [ +0.32% +0.21% +0.00% / +0.30% +0.90% +0.79%] index_select strided 3 : Elapsed 0.047 ms (4.671 ms / 100) 4.658 -> 4.661 ( +0.06%) [ +0.28% +0.00% +0.06% / +0.06% +0.82% +1.12%] index_select random : Elapsed 0.047 ms (4.671 ms / 100) 4.638 -> 4.638 ( +0.00%) [ +0.13% +0.15% +0.00% / +0.00% +1.10% +0.71%] index_select random_sorted : Elapsed 0.046 ms (4.644 ms / 100) B = [40, 20, 4, 16] (stride (1280, 1, 320, 20)) A = [40, 20, 4, 5] (stride (5, 200, 4000, 1)) dim = 3 2.555 -> 2.558 ( +0.12%) [ +0.23% +0.20% +0.00% / +0.12% +0.63% +0.63%] index_add_ linear : Elapsed 0.026 ms (2.561 ms / 100) 2.475 -> 2.476 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.65% +0.53%] index_copy_ linear : Elapsed 0.025 ms (2.477 ms / 100) 2.558 -> 2.563 ( +0.20%) [ +0.23% +0.00% +0.12% / +0.20% +0.66% +0.59%] index_add_ reverse : Elapsed 0.026 ms (2.564 ms / 100) 2.475 -> 2.492 ( +0.69%) [ +0.00% +0.16% +0.36% / +0.69% +0.85% +0.69%] index_copy_ reverse : Elapsed 0.025 ms (2.475 ms / 100) 2.536 -> 2.541 ( +0.20%) [ +0.35% +0.00% +0.24% / +0.20% +0.43% +0.32%] index_add_ spread : Elapsed 0.025 ms (2.545 ms / 100) 2.468 -> 2.468 ( +0.00%) [ +0.12% +0.00% +0.04% / +0.00% +0.32% +0.32%] index_copy_ spread : Elapsed 0.025 ms (2.471 ms / 100) 2.538 -> 2.537 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.55% +0.35%] index_add_ strided 3 : Elapsed 0.025 ms (2.539 ms / 100) 2.467 -> 2.470 ( +0.12%) [ +0.16% +0.00% +0.08% / +0.12% +0.24% +0.32%] index_copy_ strided 3 : Elapsed 0.025 ms (2.471 ms / 100) 2.556 -> 2.555 ( -0.04%) [ +0.00% +0.23% +0.08% / -0.04% +0.51% +0.59%] index_add_ strided 5 : Elapsed 0.026 ms (2.556 ms / 100) 2.476 -> 2.481 ( +0.20%) [ +0.16% +0.24% +0.00% / +0.20% +0.77% +0.81%] index_copy_ strided 5 : Elapsed 0.025 ms (2.480 ms / 100) 2.551 -> 2.553 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.71%] index_add_ strided 7 : Elapsed 0.026 ms (2.553 ms / 100) 2.484 -> 2.485 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.56% +0.40%] index_copy_ strided 7 : Elapsed 0.025 ms (2.485 ms / 100) 2.562 -> 2.563 ( +0.04%) [ +0.20% +0.12% +0.00% / +0.04% +0.35% +0.59%] index_add_ perm : Elapsed 0.026 ms (2.567 ms / 100) 2.481 -> 2.480 ( -0.04%) [ +0.00% +0.16% +0.20% / -0.04% +0.69% +0.44%] index_copy_ perm : Elapsed 0.025 ms (2.481 ms / 100) 2.557 -> 2.561 ( +0.16%) [ +0.27% +0.20% +0.00% / +0.16% +0.51% +0.59%] index_add_ perm_sorted : Elapsed 0.026 ms (2.564 ms / 100) 2.482 -> 2.488 ( +0.24%) [ +0.00% +0.04% +0.12% / +0.24% +0.48% +0.64%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.482 ms / 100) 5.610 -> 5.618 ( +0.14%) [ +0.21% +0.05% +0.00% / +0.14% +0.77% +0.68%] index_select const : Elapsed 0.056 ms (5.622 ms / 100) 5.608 -> 5.605 ( -0.05%) [ +0.30% +0.04% +0.00% / -0.05% +0.61% +0.84%] index_select wrap : Elapsed 0.056 ms (5.625 ms / 100) 5.619 -> 5.621 ( +0.04%) [ +0.05% +0.14% +0.00% / +0.04% +0.46% +0.55%] index_select linear : Elapsed 0.056 ms (5.622 ms / 100) 5.619 -> 5.622 ( +0.05%) [ +0.02% +0.04% +0.00% / +0.05% +0.55% +0.53%] index_select reverse : Elapsed 0.056 ms (5.620 ms / 100) 5.613 -> 5.625 ( +0.21%) [ +0.00% +0.14% +0.14% / +0.21% +0.62% +0.69%] index_select skip64 : Elapsed 0.056 ms (5.613 ms / 100) 5.605 -> 5.625 ( +0.36%) [ +0.14% +0.27% +0.00% / +0.36% +0.66% +0.82%] index_select skip256 : Elapsed 0.056 ms (5.613 ms / 100) 5.611 -> 5.624 ( +0.23%) [ +0.09% +0.00% +0.16% / +0.23% +0.78% +0.62%] index_select spread : Elapsed 0.056 ms (5.616 ms / 100) 5.604 -> 5.617 ( +0.23%) [ +0.27% +0.00% +0.00% / +0.23% +0.68% +0.71%] index_select strided 3 : Elapsed 0.056 ms (5.619 ms / 100) 5.616 -> 5.620 ( +0.07%) [ +0.00% +0.00% +0.04% / +0.07% +0.66% +0.71%] index_select random : Elapsed 0.056 ms (5.616 ms / 100) 5.605 -> 5.616 ( +0.20%) [ +0.29% +0.27% +0.00% / +0.20% +0.82% +0.84%] index_select random_sorted : Elapsed 0.056 ms (5.621 ms / 100) B = [40, 20, 4, 16] (stride (64, 2560, 16, 1)) A = [40, 20, 4, 5] (stride (400, 1, 100, 20)) dim = 3 2.370 -> 2.380 ( +0.42%) [ +0.17% +0.34% +0.00% / +0.42% +2.28% +2.24%] index_add_ linear : Elapsed 0.024 ms (2.374 ms / 100) 2.331 -> 2.336 ( +0.21%) [ +0.00% +0.21% +0.04% / +0.21% +2.15% +2.23%] index_copy_ linear : Elapsed 0.023 ms (2.331 ms / 100) 2.369 -> 2.375 ( +0.25%) [ +0.00% +0.63% +0.63% / +0.25% +2.57% +2.53%] index_add_ reverse : Elapsed 0.024 ms (2.369 ms / 100) 2.333 -> 2.341 ( +0.34%) [ +0.00% +0.73% +0.64% / +0.34% +2.10% +2.36%] index_copy_ reverse : Elapsed 0.023 ms (2.333 ms / 100) 2.398 -> 2.402 ( +0.17%) [ +0.29% +0.00% +0.38% / +0.17% +2.67% +2.71%] index_add_ spread : Elapsed 0.024 ms (2.405 ms / 100) 2.400 -> 2.403 ( +0.13%) [ +0.08% +0.00% +0.33% / +0.13% +2.29% +2.37%] index_copy_ spread : Elapsed 0.024 ms (2.402 ms / 100) 2.409 -> 2.406 ( -0.12%) [ +0.37% +0.17% +0.00% / -0.12% +2.32% +2.20%] index_add_ strided 3 : Elapsed 0.024 ms (2.418 ms / 100) 2.410 -> 2.410 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +1.91% +1.87%] index_copy_ strided 3 : Elapsed 0.024 ms (2.413 ms / 100) 2.401 -> 2.400 ( -0.04%) [ +0.46% +0.00% +0.17% / -0.04% +2.58% +2.54%] index_add_ strided 5 : Elapsed 0.024 ms (2.412 ms / 100) 2.398 -> 2.397 ( -0.04%) [ +0.46% +0.00% +0.04% / -0.04% +2.25% +2.29%] index_copy_ strided 5 : Elapsed 0.024 ms (2.409 ms / 100) 2.406 -> 2.398 ( -0.33%) [ +0.00% +0.21% +0.04% / -0.33% +2.33% +2.24%] index_add_ strided 7 : Elapsed 0.024 ms (2.406 ms / 100) 2.398 -> 2.394 ( -0.17%) [ +0.08% +0.33% +0.00% / -0.17% +2.25% +2.34%] index_copy_ strided 7 : Elapsed 0.024 ms (2.400 ms / 100) 2.405 -> 2.416 ( +0.46%) [ +0.37% +0.21% +0.00% / +0.46% +2.62% +2.41%] index_add_ perm : Elapsed 0.024 ms (2.414 ms / 100) 2.407 -> 2.413 ( +0.25%) [ +0.25% +0.25% +0.00% / +0.25% +1.99% +1.91%] index_copy_ perm : Elapsed 0.024 ms (2.413 ms / 100) 2.401 -> 2.389 ( -0.50%) [ +0.00% +0.04% +0.21% / -0.50% +2.08% +2.46%] index_add_ perm_sorted : Elapsed 0.024 ms (2.401 ms / 100) 2.400 -> 2.394 ( -0.25%) [ +0.13% +0.00% +0.21% / -0.25% +1.75% +1.92%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) 4.959 -> 4.971 ( +0.24%) [ +0.12% +0.00% +0.71% / +0.24% +1.11% +1.25%] index_select const : Elapsed 0.050 ms (4.965 ms / 100) 5.013 -> 5.018 ( +0.10%) [ +0.22% +0.08% +0.00% / +0.10% +1.44% +1.60%] index_select wrap : Elapsed 0.050 ms (5.024 ms / 100) 5.044 -> 5.043 ( -0.02%) [ +0.14% +0.18% +0.00% / -0.02% +0.83% +1.19%] index_select linear : Elapsed 0.051 ms (5.051 ms / 100) 5.042 -> 5.047 ( +0.10%) [ +0.00% +0.16% +0.04% / +0.10% +1.03% +1.01%] index_select reverse : Elapsed 0.050 ms (5.042 ms / 100) 4.982 -> 4.982 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +1.02% +0.98%] index_select skip64 : Elapsed 0.050 ms (4.982 ms / 100) 4.986 -> 4.960 ( -0.52%) [ +0.12% +0.14% +0.00% / -0.52% +0.52% +0.60%] index_select skip256 : Elapsed 0.050 ms (4.992 ms / 100) 5.039 -> 5.031 ( -0.16%) [ +0.12% +0.00% +0.10% / -0.16% +1.43% +1.31%] index_select spread : Elapsed 0.050 ms (5.045 ms / 100) 5.036 -> 5.037 ( +0.02%) [ +0.48% +0.28% +0.00% / +0.02% +1.33% +1.65%] index_select strided 3 : Elapsed 0.051 ms (5.060 ms / 100) 5.038 -> 5.038 ( +0.00%) [ +0.28% +0.06% +0.00% / +0.00% +1.45% +1.59%] index_select random : Elapsed 0.051 ms (5.052 ms / 100) 5.003 -> 5.023 ( +0.40%) [ +0.34% +0.00% +0.18% / +0.40% +1.56% +1.94%] index_select random_sorted : Elapsed 0.050 ms (5.020 ms / 100) B = [40, 20, 4, 16] (stride (64, 2560, 16, 1)) A = [40, 20, 4, 5] (stride (1, 800, 40, 160)) dim = 3 2.553 -> 2.554 ( +0.04%) [ +0.00% +0.00% +0.12% / +0.04% +0.67% +0.59%] index_add_ linear : Elapsed 0.026 ms (2.553 ms / 100) 2.493 -> 2.496 ( +0.12%) [ +0.00% +0.04% +0.16% / +0.12% +0.64% +0.60%] index_copy_ linear : Elapsed 0.025 ms (2.493 ms / 100) 2.551 -> 2.558 ( +0.27%) [ +0.08% +0.08% +0.00% / +0.27% +0.74% +0.51%] index_add_ reverse : Elapsed 0.026 ms (2.553 ms / 100) 2.491 -> 2.496 ( +0.20%) [ +0.08% +0.12% +0.00% / +0.20% +0.40% +0.40%] index_copy_ reverse : Elapsed 0.025 ms (2.493 ms / 100) 2.583 -> 2.585 ( +0.08%) [ +0.19% +0.00% +0.04% / +0.08% +0.50% +0.46%] index_add_ spread : Elapsed 0.026 ms (2.588 ms / 100) 2.557 -> 2.562 ( +0.20%) [ +0.27% +0.08% +0.00% / +0.20% +0.59% +0.43%] index_copy_ spread : Elapsed 0.026 ms (2.564 ms / 100) 2.584 -> 2.586 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.50% +0.54%] index_add_ strided 3 : Elapsed 0.026 ms (2.584 ms / 100) 2.561 -> 2.561 ( +0.00%) [ +0.00% +0.16% +0.12% / +0.00% +0.35% +0.27%] index_copy_ strided 3 : Elapsed 0.026 ms (2.561 ms / 100) 2.578 -> 2.580 ( +0.08%) [ +0.04% +0.00% +0.12% / +0.08% +0.43% +0.35%] index_add_ strided 5 : Elapsed 0.026 ms (2.579 ms / 100) 2.556 -> 2.556 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.39% +0.35%] index_copy_ strided 5 : Elapsed 0.026 ms (2.556 ms / 100) 2.575 -> 2.579 ( +0.16%) [ +0.31% +0.12% +0.00% / +0.16% +0.54% +0.54%] index_add_ strided 7 : Elapsed 0.026 ms (2.583 ms / 100) 2.554 -> 2.561 ( +0.27%) [ +0.12% +0.00% +0.08% / +0.27% +0.27% +0.39%] index_copy_ strided 7 : Elapsed 0.026 ms (2.557 ms / 100) 2.583 -> 2.584 ( +0.04%) [ +0.12% +0.19% +0.00% / +0.04% +0.43% +0.43%] index_add_ perm : Elapsed 0.026 ms (2.586 ms / 100) 2.558 -> 2.563 ( +0.20%) [ +0.16% +0.08% +0.00% / +0.20% +0.43% +0.47%] index_copy_ perm : Elapsed 0.026 ms (2.562 ms / 100) 2.578 -> 2.582 ( +0.16%) [ +0.16% +0.31% +0.00% / +0.16% +0.70% +0.62%] index_add_ perm_sorted : Elapsed 0.026 ms (2.582 ms / 100) 2.557 -> 2.560 ( +0.12%) [ +0.23% +0.27% +0.00% / +0.12% +0.55% +0.43%] index_copy_ perm_sorted : Elapsed 0.026 ms (2.563 ms / 100) 5.628 -> 5.636 ( +0.14%) [ +0.02% +0.07% +0.00% / +0.14% +0.73% +0.68%] index_select const : Elapsed 0.056 ms (5.629 ms / 100) 5.632 -> 5.637 ( +0.09%) [ +0.00% +0.05% +0.04% / +0.09% +0.25% +0.48%] index_select wrap : Elapsed 0.056 ms (5.632 ms / 100) 5.648 -> 5.650 ( +0.04%) [ +0.00% +0.05% +0.04% / +0.04% +0.53% +0.55%] index_select linear : Elapsed 0.056 ms (5.648 ms / 100) 5.632 -> 5.633 ( +0.02%) [ +0.02% +0.04% +0.00% / +0.02% +0.50% +0.39%] index_select reverse : Elapsed 0.056 ms (5.633 ms / 100) 5.621 -> 5.623 ( +0.04%) [ +0.00% +0.04% +0.12% / +0.04% +0.52% +0.66%] index_select skip64 : Elapsed 0.056 ms (5.621 ms / 100) 5.623 -> 5.627 ( +0.07%) [ +0.00% +0.12% +0.04% / +0.07% +0.59% +0.55%] index_select skip256 : Elapsed 0.056 ms (5.623 ms / 100) 5.623 -> 5.624 ( +0.02%) [ +0.14% +0.00% +0.07% / +0.02% +0.62% +0.53%] index_select spread : Elapsed 0.056 ms (5.631 ms / 100) 5.625 -> 5.623 ( -0.04%) [ +0.00% +0.07% +0.05% / -0.04% +0.53% +0.68%] index_select strided 3 : Elapsed 0.056 ms (5.625 ms / 100) 5.630 -> 5.635 ( +0.09%) [ +0.04% +0.00% +0.00% / +0.09% +0.78% +0.67%] index_select random : Elapsed 0.056 ms (5.632 ms / 100) 5.599 -> 5.604 ( +0.09%) [ +0.18% +0.11% +0.00% / +0.09% +0.70% +0.71%] index_select random_sorted : Elapsed 0.056 ms (5.609 ms / 100) B = [40, 20, 4, 16] (stride (16, 2560, 640, 1)) A = [40, 20, 4, 5] (stride (100, 1, 4000, 20)) dim = 3 2.295 -> 2.301 ( +0.26%) [ +0.00% +0.17% +0.17% / +0.26% +0.48% +0.61%] index_add_ linear : Elapsed 0.023 ms (2.295 ms / 100) 2.266 -> 2.272 ( +0.26%) [ +0.00% +0.00% +0.22% / +0.26% +0.49% +0.57%] index_copy_ linear : Elapsed 0.023 ms (2.266 ms / 100) 2.295 -> 2.301 ( +0.26%) [ +0.00% +0.00% +0.04% / +0.26% +0.26% +0.35%] index_add_ reverse : Elapsed 0.023 ms (2.295 ms / 100) 2.264 -> 2.268 ( +0.18%) [ +0.04% +0.27% +0.00% / +0.18% +0.22% +0.49%] index_copy_ reverse : Elapsed 0.023 ms (2.265 ms / 100) 2.330 -> 2.333 ( +0.13%) [ +0.47% +0.00% +0.09% / +0.39% +0.13% +0.52%] index_add_ spread : Elapsed 0.023 ms (2.341 ms / 100) 2.333 -> 2.331 ( -0.09%) [ +0.13% +0.04% +0.00% / -0.09% +0.17% +0.21%] index_copy_ spread : Elapsed 0.023 ms (2.336 ms / 100) 2.336 -> 2.333 ( -0.13%) [ +0.00% +0.04% +0.21% / -0.13% +0.04% +0.13%] index_add_ strided 3 : Elapsed 0.023 ms (2.336 ms / 100) 2.328 -> 2.331 ( +0.13%) [ +0.00% +0.17% +0.69% / +0.34% +0.47% +0.13%] index_copy_ strided 3 : Elapsed 0.023 ms (2.328 ms / 100) 2.329 -> 2.328 ( -0.04%) [ +0.30% +0.00% +0.21% / -0.04% +0.17% +0.94%] index_add_ strided 5 : Elapsed 0.023 ms (2.336 ms / 100) 2.325 -> 2.331 ( +0.26%) [ +0.00% +0.04% +0.00% / +0.26% +0.26% +0.34%] index_copy_ strided 5 : Elapsed 0.023 ms (2.325 ms / 100) 2.328 -> 2.336 ( +0.34%) [ +0.13% +0.52% +0.00% / +0.39% +0.99% +0.34%] index_add_ strided 7 : Elapsed 0.023 ms (2.331 ms / 100) 2.332 -> 2.333 ( +0.04%) [ +0.13% +0.43% +0.00% / +0.04% +0.64% +0.30%] index_copy_ strided 7 : Elapsed 0.023 ms (2.335 ms / 100) 2.336 -> 2.329 ( -0.30%) [ +0.04% +0.04% +0.00% / -0.30% +0.21% -0.09%] index_add_ perm : Elapsed 0.023 ms (2.337 ms / 100) 2.331 -> 2.329 ( -0.09%) [ +0.00% +0.43% +0.26% / -0.09% +0.34% +0.21%] index_copy_ perm : Elapsed 0.023 ms (2.331 ms / 100) 2.333 -> 2.336 ( +0.13%) [ +0.39% +0.00% +0.17% / +0.13% +0.39% +0.39%] index_add_ perm_sorted : Elapsed 0.023 ms (2.342 ms / 100) 2.329 -> 2.330 ( +0.04%) [ +0.00% +0.00% +0.39% / +0.04% +0.30% +0.52%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.329 ms / 100) 4.661 -> 4.668 ( +0.15%) [ +0.04% +0.00% +0.17% / +0.15% +0.75% +0.51%] index_select const : Elapsed 0.047 ms (4.663 ms / 100) 4.715 -> 4.731 ( +0.34%) [ +0.00% +0.00% +0.25% / +0.38% +0.34% +0.42%] index_select wrap : Elapsed 0.047 ms (4.715 ms / 100) 4.736 -> 4.732 ( -0.08%) [ +0.17% +0.19% +0.00% / -0.08% +0.38% +0.53%] index_select linear : Elapsed 0.047 ms (4.744 ms / 100) 4.719 -> 4.718 ( -0.02%) [ +0.00% +0.15% +0.04% / -0.02% +0.45% +0.49%] index_select reverse : Elapsed 0.047 ms (4.719 ms / 100) 4.651 -> 4.658 ( +0.15%) [ +0.00% +0.02% +0.02% / +0.15% +0.73% +0.60%] index_select skip64 : Elapsed 0.047 ms (4.651 ms / 100) 4.663 -> 4.678 ( +0.32%) [ +0.00% +0.26% +0.19% / +0.32% +0.90% +0.90%] index_select skip256 : Elapsed 0.047 ms (4.663 ms / 100) 4.716 -> 4.721 ( +0.11%) [ +0.08% +0.13% +0.00% / +0.11% +0.72% +0.55%] index_select spread : Elapsed 0.047 ms (4.720 ms / 100) 4.736 -> 4.737 ( +0.02%) [ +0.00% +0.08% +0.02% / +0.02% +0.44% +0.36%] index_select strided 3 : Elapsed 0.047 ms (4.736 ms / 100) 4.719 -> 4.727 ( +0.17%) [ +0.23% +0.32% +0.00% / +0.17% +0.70% +0.83%] index_select random : Elapsed 0.047 ms (4.730 ms / 100) 4.706 -> 4.720 ( +0.30%) [ +0.34% +0.00% +0.32% / +0.30% +0.70% +0.79%] index_select random_sorted : Elapsed 0.047 ms (4.722 ms / 100) B = [40, 20, 4, 16] (stride (20, 1, 12800, 800)) A = [40, 20, 4, 5] (stride (400, 1, 20, 80)) dim = 3 2.231 -> 2.235 ( +0.18%) [ +0.09% +0.22% +0.00% / +0.18% +0.22% +0.40%] index_add_ linear : Elapsed 0.022 ms (2.233 ms / 100) 2.178 -> 2.179 ( +0.05%) [ +0.18% +0.37% +0.00% / +0.37% +0.05% +0.05%] index_copy_ linear : Elapsed 0.022 ms (2.182 ms / 100) 2.233 -> 2.228 ( -0.22%) [ +0.09% +0.00% +0.09% / -0.04% -0.22% -0.22%] index_add_ reverse : Elapsed 0.022 ms (2.235 ms / 100) 2.178 -> 2.165 ( -0.60%) [ +0.18% +0.00% +0.18% / +0.00% -0.60% -0.55%] index_copy_ reverse : Elapsed 0.022 ms (2.182 ms / 100) 2.234 -> 2.227 ( -0.31%) [ +0.00% +0.09% +0.00% / +0.00% -0.09% -0.31%] index_add_ spread : Elapsed 0.022 ms (2.234 ms / 100) 2.177 -> 2.168 ( -0.41%) [ +0.28% +0.28% +0.00% / +0.28% -0.41% -0.23%] index_copy_ spread : Elapsed 0.022 ms (2.183 ms / 100) 2.232 -> 2.233 ( +0.04%) [ +0.31% +0.00% +0.00% / +0.22% +0.09% +0.04%] index_add_ strided 3 : Elapsed 0.022 ms (2.239 ms / 100) 2.178 -> 2.176 ( -0.09%) [ +0.09% +0.00% +0.18% / +0.32% +0.28% -0.09%] index_copy_ strided 3 : Elapsed 0.022 ms (2.180 ms / 100) 2.239 -> 2.228 ( -0.49%) [ +0.31% +0.45% +0.00% / +0.18% -0.49% -0.22%] index_add_ strided 5 : Elapsed 0.022 ms (2.246 ms / 100) 2.185 -> 2.170 ( -0.69%) [ +0.00% +0.00% +0.09% / +0.05% -0.69% -0.32%] index_copy_ strided 5 : Elapsed 0.022 ms (2.185 ms / 100) 2.244 -> 2.234 ( -0.45%) [ +0.49% +0.31% +0.00% / +0.13% -0.45% -0.27%] index_add_ strided 7 : Elapsed 0.023 ms (2.255 ms / 100) 2.196 -> 2.176 ( -0.91%) [ +0.00% +0.05% +0.05% / +0.00% -0.77% -0.91%] index_copy_ strided 7 : Elapsed 0.022 ms (2.196 ms / 100) 2.241 -> 2.229 ( -0.54%) [ +0.04% +0.00% +0.13% / -0.09% -0.31% -0.54%] index_add_ perm : Elapsed 0.022 ms (2.242 ms / 100) 2.185 -> 2.171 ( -0.64%) [ +0.32% +0.09% +0.00% / -0.09% -0.23% -0.64%] index_copy_ perm : Elapsed 0.022 ms (2.192 ms / 100) 2.239 -> 2.229 ( -0.45%) [ +0.31% +0.00% +0.31% / +0.31% -0.45% -0.27%] index_add_ perm_sorted : Elapsed 0.022 ms (2.246 ms / 100) 2.186 -> 2.178 ( -0.37%) [ +0.32% +0.00% +0.23% / +0.27% -0.37% -0.37%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.193 ms / 100) 4.512 -> 4.528 ( +0.35%) [ +0.38% +0.33% +0.00% / +0.35% +1.04% +0.86%] index_select const : Elapsed 0.045 ms (4.529 ms / 100) 4.553 -> 4.556 ( +0.07%) [ +0.13% +0.00% +0.13% / +0.07% +0.20% +0.20%] index_select wrap : Elapsed 0.046 ms (4.559 ms / 100) 4.569 -> 4.573 ( +0.09%) [ +0.00% +0.00% +0.04% / +0.13% +0.09% +0.13%] index_select linear : Elapsed 0.046 ms (4.569 ms / 100) 4.565 -> 4.564 ( -0.02%) [ +0.00% +0.09% +0.15% / -0.02% +0.00% +0.37%] index_select reverse : Elapsed 0.046 ms (4.565 ms / 100) 4.531 -> 4.526 ( -0.11%) [ +0.09% +0.00% +0.07% / -0.11% +0.22% +0.38%] index_select skip64 : Elapsed 0.045 ms (4.535 ms / 100) 4.526 -> 4.536 ( +0.22%) [ +0.07% +0.00% +0.20% / +0.22% +0.38% +0.64%] index_select skip256 : Elapsed 0.045 ms (4.529 ms / 100) 4.547 -> 4.557 ( +0.22%) [ +0.24% +0.20% +0.00% / +0.22% +0.33% +0.51%] index_select spread : Elapsed 0.046 ms (4.558 ms / 100) 4.550 -> 4.547 ( -0.07%) [ +0.18% +0.00% +0.04% / -0.07% +0.42% +0.42%] index_select strided 3 : Elapsed 0.046 ms (4.558 ms / 100) 4.558 -> 4.552 ( -0.13%) [ +0.15% +0.00% +0.09% / -0.13% +0.46% +0.31%] index_select random : Elapsed 0.046 ms (4.565 ms / 100) 4.549 -> 4.548 ( -0.02%) [ +0.24% +0.00% +0.07% / -0.02% +0.62% +0.68%] index_select random_sorted : Elapsed 0.046 ms (4.560 ms / 100) B = [40, 20, 4, 16] (stride (80, 1, 20, 3200)) A = [40, 20, 4, 5] (stride (100, 5, 4000, 1)) dim = 3 2.261 -> 2.264 ( +0.13%) [ +0.00% +0.09% +0.22% / +0.27% +0.49% +0.13%] index_add_ linear : Elapsed 0.023 ms (2.261 ms / 100) 2.196 -> 2.201 ( +0.23%) [ +0.00% +0.05% +0.18% / +0.27% +0.55% +0.23%] index_copy_ linear : Elapsed 0.022 ms (2.196 ms / 100) 2.263 -> 2.262 ( -0.04%) [ +0.27% +0.00% +0.04% / -0.04% +0.22% +0.22%] index_add_ reverse : Elapsed 0.023 ms (2.269 ms / 100) 2.201 -> 2.206 ( +0.23%) [ +0.05% +0.05% +0.00% / +0.23% +0.45% +0.27%] index_copy_ reverse : Elapsed 0.022 ms (2.202 ms / 100) 2.260 -> 2.262 ( +0.09%) [ +0.27% +0.00% +0.04% / +0.09% +0.44% +0.40%] index_add_ spread : Elapsed 0.023 ms (2.266 ms / 100) 2.197 -> 2.203 ( +0.27%) [ +0.05% +0.00% +0.14% / +0.27% +0.36% +0.41%] index_copy_ spread : Elapsed 0.022 ms (2.198 ms / 100) 2.257 -> 2.258 ( +0.04%) [ +0.00% +0.35% +0.31% / +0.04% +0.35% +0.97%] index_add_ strided 3 : Elapsed 0.023 ms (2.257 ms / 100) 2.190 -> 2.195 ( +0.23%) [ +0.00% +0.59% +0.59% / +0.23% +0.59% +1.05%] index_copy_ strided 3 : Elapsed 0.022 ms (2.190 ms / 100) 2.265 -> 2.259 ( -0.26%) [ +0.00% +0.00% +0.18% / -0.26% +0.31% +0.44%] index_add_ strided 5 : Elapsed 0.023 ms (2.265 ms / 100) 2.200 -> 2.202 ( +0.09%) [ +0.05% +0.09% +0.00% / +0.09% +0.36% +0.45%] index_copy_ strided 5 : Elapsed 0.022 ms (2.201 ms / 100) 2.252 -> 2.260 ( +0.36%) [ +0.00% +0.36% +0.04% / +0.36% +0.44% +0.93%] index_add_ strided 7 : Elapsed 0.023 ms (2.252 ms / 100) 2.193 -> 2.193 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.36% +0.73%] index_copy_ strided 7 : Elapsed 0.022 ms (2.193 ms / 100) 2.266 -> 2.256 ( -0.44%) [ +0.00% +0.09% +0.13% / -0.44% -0.04% -0.13%] index_add_ perm : Elapsed 0.023 ms (2.266 ms / 100) 2.202 -> 2.192 ( -0.45%) [ +0.00% +0.05% +0.09% / -0.45% -0.05% +0.05%] index_copy_ perm : Elapsed 0.022 ms (2.202 ms / 100) 2.263 -> 2.261 ( -0.09%) [ +0.04% +0.13% +0.00% / -0.09% +0.44% +0.35%] index_add_ perm_sorted : Elapsed 0.023 ms (2.264 ms / 100) 2.199 -> 2.198 ( -0.05%) [ +0.23% +0.18% +0.00% / -0.05% +0.45% +0.27%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.204 ms / 100) 4.688 -> 4.713 ( +0.53%) [ +0.41% +0.49% +0.00% / +0.55% +0.53% +0.62%] index_select const : Elapsed 0.047 ms (4.707 ms / 100) 4.683 -> 4.693 ( +0.21%) [ +0.66% +0.51% +0.00% / +0.21% +0.56% +0.49%] index_select wrap : Elapsed 0.047 ms (4.714 ms / 100) 4.681 -> 4.685 ( +0.09%) [ +0.13% +0.13% +0.00% / +0.09% +0.47% +0.66%] index_select linear : Elapsed 0.047 ms (4.687 ms / 100) 4.683 -> 4.683 ( +0.00%) [ +0.23% +0.09% +0.00% / +0.00% +0.79% +0.66%] index_select reverse : Elapsed 0.047 ms (4.694 ms / 100) 4.696 -> 4.678 ( -0.38%) [ +0.34% +0.00% +0.38% / -0.38% +0.38% +0.38%] index_select skip64 : Elapsed 0.047 ms (4.712 ms / 100) 4.707 -> 4.726 ( +0.40%) [ +0.02% +0.45% +0.00% / +0.40% +0.70% +0.74%] index_select skip256 : Elapsed 0.047 ms (4.708 ms / 100) 4.678 -> 4.680 ( +0.04%) [ +0.00% +0.13% +0.02% / +0.04% +0.94% +0.66%] index_select spread : Elapsed 0.047 ms (4.678 ms / 100) 4.678 -> 4.684 ( +0.13%) [ +0.32% +0.00% +0.24% / +0.13% +0.66% +0.68%] index_select strided 3 : Elapsed 0.047 ms (4.693 ms / 100) 4.682 -> 4.685 ( +0.06%) [ +0.11% +0.00% +0.06% / +0.06% +0.66% +0.70%] index_select random : Elapsed 0.047 ms (4.687 ms / 100) 4.680 -> 4.712 ( +0.68%) [ +0.28% +0.00% +0.62% / +0.68% +1.67% +1.32%] index_select random_sorted : Elapsed 0.047 ms (4.693 ms / 100) B = [40, 20, 4, 16] (stride (4, 160, 1, 3200)) A = [40, 20, 4, 5] (stride (1, 800, 40, 160)) dim = 3 2.552 -> 2.550 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.12% +0.04%] index_add_ linear : Elapsed 0.026 ms (2.552 ms / 100) 2.465 -> 2.467 ( +0.08%) [ +0.16% +0.00% +0.12% / +0.08% +0.28% +0.37%] index_copy_ linear : Elapsed 0.025 ms (2.469 ms / 100) 2.541 -> 2.542 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.04% +0.28% +0.43%] index_add_ reverse : Elapsed 0.025 ms (2.544 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.28% +0.33%] index_copy_ reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.543 -> 2.546 ( +0.12%) [ +0.12% +0.00% +0.20% / +0.12% +0.47% +0.43%] index_add_ spread : Elapsed 0.025 ms (2.546 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.32% +0.24%] index_copy_ spread : Elapsed 0.025 ms (2.467 ms / 100) 2.544 -> 2.547 ( +0.12%) [ +0.00% +0.35% +0.16% / +0.12% +0.31% +0.31%] index_add_ strided 3 : Elapsed 0.025 ms (2.544 ms / 100) 2.466 -> 2.467 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.16% +0.20%] index_copy_ strided 3 : Elapsed 0.025 ms (2.466 ms / 100) 2.542 -> 2.544 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.24% +0.24%] index_add_ strided 5 : Elapsed 0.025 ms (2.545 ms / 100) 2.463 -> 2.465 ( +0.08%) [ +0.00% +0.12% +0.08% / +0.08% +0.28% +0.12%] index_copy_ strided 5 : Elapsed 0.025 ms (2.463 ms / 100) 2.537 -> 2.536 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.16% +0.43%] index_add_ strided 7 : Elapsed 0.025 ms (2.537 ms / 100) 2.456 -> 2.456 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.41% +0.57%] index_copy_ strided 7 : Elapsed 0.025 ms (2.458 ms / 100) 2.544 -> 2.546 ( +0.08%) [ +0.12% +0.20% +0.00% / +0.08% +0.39% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.547 ms / 100) 2.467 -> 2.467 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.08% +0.16%] index_copy_ perm : Elapsed 0.025 ms (2.468 ms / 100) 2.543 -> 2.545 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.43% +0.43%] index_add_ perm_sorted : Elapsed 0.025 ms (2.543 ms / 100) 2.464 -> 2.467 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.20% +0.28%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.465 ms / 100) 5.581 -> 5.584 ( +0.05%) [ +0.07% +0.02% +0.00% / +0.05% +0.61% +0.61%] index_select const : Elapsed 0.056 ms (5.585 ms / 100) 5.579 -> 5.577 ( -0.04%) [ +0.20% +0.14% +0.00% / -0.04% +0.66% +0.48%] index_select wrap : Elapsed 0.056 ms (5.590 ms / 100) 5.596 -> 5.601 ( +0.09%) [ +0.11% +0.13% +0.00% / +0.09% +0.61% +0.61%] index_select linear : Elapsed 0.056 ms (5.602 ms / 100) 5.578 -> 5.582 ( +0.07%) [ +0.05% +0.07% +0.00% / +0.07% +0.61% +0.57%] index_select reverse : Elapsed 0.056 ms (5.581 ms / 100) 5.578 -> 5.583 ( +0.09%) [ +0.05% +0.04% +0.00% / +0.09% +0.65% +0.63%] index_select skip64 : Elapsed 0.056 ms (5.581 ms / 100) 5.576 -> 5.582 ( +0.11%) [ +0.00% +0.13% +0.07% / +0.11% +0.65% +0.59%] index_select skip256 : Elapsed 0.056 ms (5.576 ms / 100) 5.572 -> 5.576 ( +0.07%) [ +0.09% +0.13% +0.00% / +0.07% +0.56% +0.57%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.580 -> 5.584 ( +0.07%) [ +0.04% +0.18% +0.00% / +0.07% +0.73% +0.63%] index_select strided 3 : Elapsed 0.056 ms (5.582 ms / 100) 5.581 -> 5.579 ( -0.04%) [ +0.04% +0.00% +0.02% / -0.04% +0.70% +0.65%] index_select random : Elapsed 0.056 ms (5.583 ms / 100) 5.568 -> 5.574 ( +0.11%) [ +0.04% +0.09% +0.00% / +0.11% +0.61% +0.70%] index_select random_sorted : Elapsed 0.056 ms (5.570 ms / 100) B = [40, 20, 4, 16] (stride (4, 160, 1, 3200)) A = [40, 20, 4, 5] (stride (4, 160, 1, 3200)) dim = 3 2.396 -> 2.400 ( +0.17%) [ +0.25% +0.29% +0.00% / +0.17% +0.79% +0.88%] index_add_ linear : Elapsed 0.024 ms (2.402 ms / 100) 2.335 -> 2.341 ( +0.26%) [ +0.30% +0.21% +0.00% / +0.26% +0.77% +0.73%] index_copy_ linear : Elapsed 0.023 ms (2.342 ms / 100) 2.393 -> 2.398 ( +0.21%) [ +0.33% +0.00% +0.21% / +0.21% +0.75% +0.71%] index_add_ reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.334 -> 2.332 ( -0.09%) [ +0.09% +0.13% +0.00% / -0.09% +0.43% +0.47%] index_copy_ reverse : Elapsed 0.023 ms (2.336 ms / 100) 2.412 -> 2.411 ( -0.04%) [ +0.08% +0.08% +0.00% / -0.04% +0.79% +0.79%] index_add_ spread : Elapsed 0.024 ms (2.414 ms / 100) 2.343 -> 2.344 ( +0.04%) [ +0.21% +0.21% +0.00% / +0.04% +0.94% +0.68%] index_copy_ spread : Elapsed 0.023 ms (2.348 ms / 100) 2.398 -> 2.400 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.83% +0.83%] index_add_ strided 3 : Elapsed 0.024 ms (2.398 ms / 100) 2.329 -> 2.334 ( +0.21%) [ +0.26% +0.26% +0.00% / +0.21% +1.03% +0.94%] index_copy_ strided 3 : Elapsed 0.023 ms (2.335 ms / 100) 2.396 -> 2.397 ( +0.04%) [ +0.13% +0.08% +0.00% / +0.04% +0.71% +0.79%] index_add_ strided 5 : Elapsed 0.024 ms (2.399 ms / 100) 2.331 -> 2.334 ( +0.13%) [ +0.34% +0.17% +0.00% / +0.13% +0.51% +0.82%] index_copy_ strided 5 : Elapsed 0.023 ms (2.339 ms / 100) 2.398 -> 2.395 ( -0.13%) [ +0.08% +0.00% +0.00% / -0.13% +0.79% +0.83%] index_add_ strided 7 : Elapsed 0.024 ms (2.400 ms / 100) 2.337 -> 2.337 ( +0.00%) [ +0.09% +0.00% +0.13% / +0.00% +0.90% +0.86%] index_copy_ strided 7 : Elapsed 0.023 ms (2.339 ms / 100) 2.397 -> 2.393 ( -0.17%) [ +0.04% +0.00% +0.13% / -0.17% +0.75% +0.75%] index_add_ perm : Elapsed 0.024 ms (2.398 ms / 100) 2.331 -> 2.332 ( +0.04%) [ +0.00% +0.21% +0.09% / +0.04% +0.64% +0.69%] index_copy_ perm : Elapsed 0.023 ms (2.331 ms / 100) 2.410 -> 2.413 ( +0.12%) [ +0.00% +0.25% +0.37% / +0.12% +0.79% +0.79%] index_add_ perm_sorted : Elapsed 0.024 ms (2.410 ms / 100) 2.345 -> 2.353 ( +0.34%) [ +0.00% +0.34% +0.30% / +0.34% +0.64% +0.72%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.345 ms / 100) 5.044 -> 5.049 ( +0.10%) [ +0.16% +0.16% +0.00% / +0.10% +0.81% +0.85%] index_select const : Elapsed 0.051 ms (5.052 ms / 100) 5.072 -> 5.078 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.12% +0.65% +0.79%] index_select wrap : Elapsed 0.051 ms (5.078 ms / 100) 5.092 -> 5.098 ( +0.12%) [ +0.04% +0.00% +0.04% / +0.12% +0.67% +1.00%] index_select linear : Elapsed 0.051 ms (5.094 ms / 100) 5.053 -> 5.058 ( +0.10%) [ +0.00% +0.04% +0.00% / +0.10% +0.77% +0.87%] index_select reverse : Elapsed 0.051 ms (5.053 ms / 100) 5.044 -> 5.061 ( +0.34%) [ +0.10% +0.06% +0.00% / +0.34% +0.73% +0.73%] index_select skip64 : Elapsed 0.050 ms (5.049 ms / 100) 5.050 -> 5.055 ( +0.10%) [ +0.00% +0.14% +0.10% / +0.10% +0.65% +0.67%] index_select skip256 : Elapsed 0.050 ms (5.050 ms / 100) 5.080 -> 5.080 ( +0.00%) [ +0.02% +0.06% +0.00% / +0.00% +0.71% +0.93%] index_select spread : Elapsed 0.051 ms (5.081 ms / 100) 5.078 -> 5.089 ( +0.22%) [ +0.00% +0.18% +0.16% / +0.22% +0.89% +0.81%] index_select strided 3 : Elapsed 0.051 ms (5.078 ms / 100) 5.080 -> 5.087 ( +0.14%) [ +0.16% +0.00% +0.00% / +0.14% +0.94% +1.02%] index_select random : Elapsed 0.051 ms (5.088 ms / 100) 5.059 -> 5.079 ( +0.40%) [ +0.42% +0.04% +0.00% / +0.40% +1.05% +1.11%] index_select random_sorted : Elapsed 0.051 ms (5.080 ms / 100) out_shape = [16, 20, 5, 4] in_shape = [40, 20, 5, 4] idx_dim = 0 B = [16, 20, 5, 4] (stride (400, 4, 80, 1)) A = [40, 20, 5, 4] (stride (4, 160, 3200, 1)) dim = 0 3.924 -> 3.922 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.66% +0.59%] index_select const : Elapsed 0.039 ms (3.928 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.00% +0.03% +0.08% / +0.00% +0.41% +0.36%] index_select wrap : Elapsed 0.039 ms (3.932 ms / 100) 3.939 -> 3.939 ( +0.00%) [ +0.00% +0.08% +0.05% / +0.00% +0.48% +0.56%] index_select linear : Elapsed 0.039 ms (3.939 ms / 100) 3.935 -> 3.941 ( +0.15%) [ +0.18% +0.05% +0.00% / +0.15% +0.56% +0.66%] index_select reverse : Elapsed 0.039 ms (3.942 ms / 100) 3.939 -> 3.939 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.23% +0.38%] index_select skip64 : Elapsed 0.039 ms (3.940 ms / 100) 3.929 -> 3.927 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.38% +0.38%] index_select skip256 : Elapsed 0.039 ms (3.929 ms / 100) 3.912 -> 3.921 ( +0.23%) [ +0.10% +0.00% +0.26% / +0.23% +0.46% +0.43%] index_select spread : Elapsed 0.039 ms (3.916 ms / 100) 3.935 -> 3.941 ( +0.15%) [ +0.00% +0.05% +0.03% / +0.15% +0.36% +0.43%] index_select strided 3 : Elapsed 0.039 ms (3.935 ms / 100) 3.922 -> 3.925 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.41% +0.43%] index_select strided 5 : Elapsed 0.039 ms (3.924 ms / 100) 3.923 -> 3.921 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.48% +0.48%] index_select strided 7 : Elapsed 0.039 ms (3.923 ms / 100) 3.943 -> 3.947 ( +0.10%) [ +0.00% +0.10% +0.05% / +0.10% +0.38% +0.38%] index_select strided 8 : Elapsed 0.039 ms (3.943 ms / 100) 3.922 -> 3.924 ( +0.05%) [ +0.03% +0.10% +0.00% / +0.05% +0.48% +0.41%] index_select strided 16 : Elapsed 0.039 ms (3.923 ms / 100) 3.923 -> 3.922 ( -0.03%) [ +0.00% +0.08% +0.05% / -0.03% +0.38% +0.48%] index_select random : Elapsed 0.039 ms (3.923 ms / 100) 3.925 -> 3.925 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.48% +0.43%] index_select random_sorted : Elapsed 0.039 ms (3.925 ms / 100) 3.921 -> 3.926 ( +0.13%) [ +0.15% +0.00% +0.08% / +0.13% +0.48% +0.51%] index_select perm : Elapsed 0.039 ms (3.927 ms / 100) 3.923 -> 3.923 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.36% +0.36%] index_select perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) B = [16, 20, 5, 4] (stride (1, 320, 64, 16)) A = [40, 20, 5, 4] (stride (400, 4, 80, 1)) dim = 0 3.241 -> 3.242 ( +0.03%) [ +0.00% +0.06% +0.03% / +0.03% +0.71% +0.68%] index_select const : Elapsed 0.032 ms (3.241 ms / 100) 3.252 -> 3.249 ( -0.09%) [ +0.03% +0.00% +0.03% / -0.09% +0.74% +0.58%] index_select wrap : Elapsed 0.033 ms (3.253 ms / 100) 3.245 -> 3.247 ( +0.06%) [ +0.18% +0.00% +0.03% / +0.06% +0.74% +0.77%] index_select linear : Elapsed 0.033 ms (3.251 ms / 100) 3.249 -> 3.255 ( +0.18%) [ +0.00% +0.22% +0.03% / +0.18% +1.05% +0.77%] index_select reverse : Elapsed 0.032 ms (3.249 ms / 100) 3.243 -> 3.243 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.74% +0.89%] index_select skip64 : Elapsed 0.032 ms (3.245 ms / 100) 3.239 -> 3.243 ( +0.12%) [ +0.06% +0.09% +0.00% / +0.12% +0.74% +0.68%] index_select skip256 : Elapsed 0.032 ms (3.241 ms / 100) 3.238 -> 3.240 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.74% +0.80%] index_select spread : Elapsed 0.032 ms (3.238 ms / 100) 3.248 -> 3.242 ( -0.18%) [ +0.09% +0.00% +0.06% / -0.18% +0.68% +0.65%] index_select strided 3 : Elapsed 0.033 ms (3.251 ms / 100) 3.247 -> 3.246 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.80% +0.80%] index_select strided 5 : Elapsed 0.032 ms (3.248 ms / 100) 3.257 -> 3.259 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.89% +0.68%] index_select strided 7 : Elapsed 0.033 ms (3.261 ms / 100) 3.251 -> 3.256 ( +0.15%) [ +0.00% +0.03% +0.12% / +0.15% +1.08% +0.68%] index_select strided 8 : Elapsed 0.033 ms (3.251 ms / 100) 3.245 -> 3.246 ( +0.03%) [ +0.09% +0.06% +0.00% / +0.03% +0.71% +0.86%] index_select strided 16 : Elapsed 0.032 ms (3.248 ms / 100) 3.247 -> 3.248 ( +0.03%) [ +0.00% +0.00% +0.06% / +0.03% +0.83% +0.77%] index_select random : Elapsed 0.032 ms (3.247 ms / 100) 3.242 -> 3.244 ( +0.06%) [ +0.00% +0.03% +0.00% / +0.06% +0.89% +1.02%] index_select random_sorted : Elapsed 0.032 ms (3.242 ms / 100) 3.247 -> 3.248 ( +0.03%) [ +0.00% +0.06% +0.06% / +0.03% +0.86% +0.83%] index_select perm : Elapsed 0.032 ms (3.247 ms / 100) 3.238 -> 3.240 ( +0.06%) [ +0.00% +0.09% +0.06% / +0.06% +0.80% +0.83%] index_select perm_sorted : Elapsed 0.032 ms (3.238 ms / 100) B = [16, 20, 5, 4] (stride (1, 64, 1280, 16)) A = [40, 20, 5, 4] (stride (400, 20, 4, 1)) dim = 0 3.191 -> 3.192 ( +0.03%) [ +0.06% +0.06% +0.00% / +0.03% +0.72% +0.69%] index_select const : Elapsed 0.032 ms (3.193 ms / 100) 3.203 -> 3.204 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.59% +0.62%] index_select wrap : Elapsed 0.032 ms (3.203 ms / 100) 3.212 -> 3.213 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.34% +0.31%] index_select linear : Elapsed 0.032 ms (3.213 ms / 100) 3.191 -> 3.192 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.53% +0.50%] index_select reverse : Elapsed 0.032 ms (3.192 ms / 100) 3.198 -> 3.203 ( +0.16%) [ +0.13% +0.09% +0.00% / +0.16% +0.59% +0.44%] index_select skip64 : Elapsed 0.032 ms (3.202 ms / 100) 3.194 -> 3.194 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.47% +0.50%] index_select skip256 : Elapsed 0.032 ms (3.195 ms / 100) 3.196 -> 3.196 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.47% +0.44%] index_select spread : Elapsed 0.032 ms (3.196 ms / 100) 3.202 -> 3.204 ( +0.06%) [ +0.03% +0.00% +0.06% / +0.06% +0.47% +0.37%] index_select strided 3 : Elapsed 0.032 ms (3.203 ms / 100) 3.195 -> 3.194 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.28% +0.25%] index_select strided 5 : Elapsed 0.032 ms (3.196 ms / 100) 3.204 -> 3.204 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.50%] index_select strided 7 : Elapsed 0.032 ms (3.204 ms / 100) 3.196 -> 3.196 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.41% +0.34%] index_select strided 8 : Elapsed 0.032 ms (3.196 ms / 100) 3.196 -> 3.196 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.41% +0.47%] index_select strided 16 : Elapsed 0.032 ms (3.197 ms / 100) 3.206 -> 3.205 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.50% +0.50%] index_select random : Elapsed 0.032 ms (3.207 ms / 100) 3.194 -> 3.193 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.47% +0.47%] index_select random_sorted : Elapsed 0.032 ms (3.194 ms / 100) 3.211 -> 3.212 ( +0.03%) [ +0.06% +0.12% +0.00% / +0.03% +0.31% +0.37%] index_select perm : Elapsed 0.032 ms (3.213 ms / 100) 3.197 -> 3.197 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.34% +0.28%] index_select perm_sorted : Elapsed 0.032 ms (3.198 ms / 100) B = [16, 20, 5, 4] (stride (1, 80, 16, 1600)) A = [40, 20, 5, 4] (stride (400, 20, 4, 1)) dim = 0 2.894 -> 2.893 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.73% +0.62%] index_select const : Elapsed 0.029 ms (2.894 ms / 100) 2.883 -> 2.884 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.69% +0.69%] index_select wrap : Elapsed 0.029 ms (2.883 ms / 100) 2.896 -> 2.896 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.62% +0.66%] index_select linear : Elapsed 0.029 ms (2.897 ms / 100) 2.902 -> 2.902 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.96% +1.00%] index_select reverse : Elapsed 0.029 ms (2.902 ms / 100) 2.881 -> 2.881 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.73% +0.80%] index_select skip64 : Elapsed 0.029 ms (2.882 ms / 100) 2.889 -> 2.890 ( +0.03%) [ +0.14% +0.10% +0.00% / +0.03% +0.87% +0.83%] index_select skip256 : Elapsed 0.029 ms (2.893 ms / 100) 2.891 -> 2.890 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.80% +0.76%] index_select spread : Elapsed 0.029 ms (2.891 ms / 100) 2.884 -> 2.884 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_select strided 3 : Elapsed 0.029 ms (2.884 ms / 100) 2.897 -> 2.897 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.79% +0.76%] index_select strided 5 : Elapsed 0.029 ms (2.897 ms / 100) 2.891 -> 2.891 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.69% +0.69%] index_select strided 7 : Elapsed 0.029 ms (2.893 ms / 100) 2.881 -> 2.881 ( +0.00%) [ +0.07% +0.17% +0.00% / +0.00% +0.94% +0.80%] index_select strided 8 : Elapsed 0.029 ms (2.883 ms / 100) 2.869 -> 2.870 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.77% +0.70%] index_select strided 16 : Elapsed 0.029 ms (2.869 ms / 100) 2.890 -> 2.894 ( +0.14%) [ +0.10% +0.10% +0.00% / +0.14% +0.80% +0.83%] index_select random : Elapsed 0.029 ms (2.893 ms / 100) 2.882 -> 2.882 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.90% +0.87%] index_select random_sorted : Elapsed 0.029 ms (2.882 ms / 100) 2.899 -> 2.899 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.97% +0.93%] index_select perm : Elapsed 0.029 ms (2.900 ms / 100) 2.892 -> 2.893 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.80% +0.86%] index_select perm_sorted : Elapsed 0.029 ms (2.892 ms / 100) B = [16, 20, 5, 4] (stride (1, 80, 16, 1600)) A = [40, 20, 5, 4] (stride (4, 160, 3200, 1)) dim = 0 3.614 -> 3.618 ( +0.11%) [ +0.25% +0.00% +0.30% / +0.11% +0.69% +0.55%] index_select const : Elapsed 0.036 ms (3.623 ms / 100) 3.613 -> 3.612 ( -0.03%) [ +0.14% +0.00% +0.08% / -0.03% +0.64% +0.66%] index_select wrap : Elapsed 0.036 ms (3.618 ms / 100) 3.622 -> 3.631 ( +0.25%) [ +0.22% +0.19% +0.00% / +0.25% +0.72% +0.44%] index_select linear : Elapsed 0.036 ms (3.630 ms / 100) 3.622 -> 3.624 ( +0.06%) [ +0.11% +0.00% +0.08% / +0.06% +0.44% +0.61%] index_select reverse : Elapsed 0.036 ms (3.626 ms / 100) 3.624 -> 3.625 ( +0.03%) [ +0.14% +0.00% +0.08% / +0.03% +0.69% +0.50%] index_select skip64 : Elapsed 0.036 ms (3.629 ms / 100) 3.618 -> 3.624 ( +0.17%) [ +0.17% +0.14% +0.00% / +0.17% +0.39% +0.64%] index_select skip256 : Elapsed 0.036 ms (3.624 ms / 100) 3.608 -> 3.610 ( +0.06%) [ +0.00% +0.14% +0.11% / +0.06% +0.53% +0.53%] index_select spread : Elapsed 0.036 ms (3.608 ms / 100) 3.621 -> 3.623 ( +0.06%) [ +0.08% +0.03% +0.00% / +0.06% +0.77% +0.50%] index_select strided 3 : Elapsed 0.036 ms (3.624 ms / 100) 3.603 -> 3.603 ( +0.00%) [ +0.19% +0.00% +0.11% / +0.00% +0.53% +0.47%] index_select strided 5 : Elapsed 0.036 ms (3.610 ms / 100) 3.616 -> 3.618 ( +0.06%) [ +0.00% +0.06% +0.03% / +0.06% +0.55% +0.44%] index_select strided 7 : Elapsed 0.036 ms (3.616 ms / 100) 3.628 -> 3.629 ( +0.03%) [ +0.00% +0.03% +0.08% / +0.03% +0.44% +0.44%] index_select strided 8 : Elapsed 0.036 ms (3.628 ms / 100) 3.617 -> 3.630 ( +0.36%) [ +0.06% +0.06% +0.00% / +0.36% +0.36% +0.44%] index_select strided 16 : Elapsed 0.036 ms (3.619 ms / 100) 3.613 -> 3.615 ( +0.06%) [ +0.08% +0.00% +0.11% / +0.06% +0.47% +0.42%] index_select random : Elapsed 0.036 ms (3.616 ms / 100) 3.610 -> 3.613 ( +0.08%) [ +0.06% +0.06% +0.00% / +0.08% +0.58% +0.55%] index_select random_sorted : Elapsed 0.036 ms (3.612 ms / 100) 3.623 -> 3.625 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.50% +0.47%] index_select perm : Elapsed 0.036 ms (3.623 ms / 100) 3.604 -> 3.605 ( +0.03%) [ +0.11% +0.17% +0.00% / +0.03% +0.55% +0.47%] index_select perm_sorted : Elapsed 0.036 ms (3.608 ms / 100) out_shape = [40, 16, 5, 4] in_shape = [40, 20, 5, 4] idx_dim = 1 B = [40, 16, 5, 4] (stride (320, 1, 64, 16)) A = [40, 20, 5, 4] (stride (1, 40, 800, 4000)) dim = 1 3.782 -> 3.782 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.85% +0.82%] index_select const : Elapsed 0.038 ms (3.783 ms / 100) 3.806 -> 3.811 ( +0.13%) [ +0.00% +0.08% +0.05% / +0.13% +1.00% +1.00%] index_select wrap : Elapsed 0.038 ms (3.806 ms / 100) 3.814 -> 3.815 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.73% +0.71%] index_select linear : Elapsed 0.038 ms (3.815 ms / 100) 3.804 -> 3.806 ( +0.05%) [ +0.11% +0.00% +0.03% / +0.05% +0.68% +0.68%] index_select reverse : Elapsed 0.038 ms (3.808 ms / 100) 3.775 -> 3.775 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.72% +0.69%] index_select skip64 : Elapsed 0.038 ms (3.776 ms / 100) 3.789 -> 3.791 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.66% +0.66%] index_select skip256 : Elapsed 0.038 ms (3.789 ms / 100) 3.811 -> 3.815 ( +0.10%) [ +0.21% +0.03% +0.00% / +0.10% +0.79% +0.68%] index_select spread : Elapsed 0.038 ms (3.819 ms / 100) 3.796 -> 3.796 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.66% +0.63%] index_select strided 3 : Elapsed 0.038 ms (3.797 ms / 100) 3.786 -> 3.785 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.61% +0.58%] index_select strided 5 : Elapsed 0.038 ms (3.786 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.79% +0.68%] index_select strided 7 : Elapsed 0.038 ms (3.814 ms / 100) 3.799 -> 3.800 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.71% +0.66%] index_select strided 8 : Elapsed 0.038 ms (3.801 ms / 100) 3.808 -> 3.808 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.68% +0.60%] index_select strided 16 : Elapsed 0.038 ms (3.809 ms / 100) 3.824 -> 3.828 ( +0.10%) [ +0.03% +0.05% +0.00% / +0.10% +0.60% +0.55%] index_select random : Elapsed 0.038 ms (3.825 ms / 100) 3.794 -> 3.795 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.55% +0.50%] index_select random_sorted : Elapsed 0.038 ms (3.795 ms / 100) 3.808 -> 3.809 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.50% +0.47%] index_select perm : Elapsed 0.038 ms (3.808 ms / 100) 3.798 -> 3.799 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.45% +0.47%] index_select perm_sorted : Elapsed 0.038 ms (3.800 ms / 100) B = [40, 16, 5, 4] (stride (320, 5, 1, 80)) A = [40, 20, 5, 4] (stride (400, 20, 1, 5)) dim = 1 3.850 -> 3.851 ( +0.03%) [ +0.00% +0.00% +0.08% / +0.03% +0.52% +0.60%] index_select const : Elapsed 0.038 ms (3.850 ms / 100) 3.832 -> 3.834 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.55% +0.52%] index_select wrap : Elapsed 0.038 ms (3.833 ms / 100) 3.837 -> 3.839 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.52% +0.52%] index_select linear : Elapsed 0.038 ms (3.839 ms / 100) 3.846 -> 3.839 ( -0.18%) [ +0.08% +0.05% +0.00% / -0.18% +0.47% +0.42%] index_select reverse : Elapsed 0.038 ms (3.849 ms / 100) 3.857 -> 3.860 ( +0.08%) [ +0.13% +0.00% +0.10% / +0.08% +0.75% +0.60%] index_select skip64 : Elapsed 0.039 ms (3.862 ms / 100) 3.857 -> 3.859 ( +0.05%) [ +0.00% +0.08% +0.00% / +0.05% +0.83% +0.60%] index_select skip256 : Elapsed 0.039 ms (3.857 ms / 100) 3.835 -> 3.837 ( +0.05%) [ +0.13% +0.10% +0.00% / +0.05% +0.57% +0.52%] index_select spread : Elapsed 0.038 ms (3.840 ms / 100) 3.842 -> 3.843 ( +0.03%) [ +0.00% +0.18% +0.10% / +0.03% +0.73% +0.68%] index_select strided 3 : Elapsed 0.038 ms (3.842 ms / 100) 3.850 -> 3.850 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.55% +0.49%] index_select strided 5 : Elapsed 0.039 ms (3.852 ms / 100) 3.834 -> 3.834 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.76% +0.60%] index_select strided 7 : Elapsed 0.038 ms (3.834 ms / 100) 3.836 -> 3.837 ( +0.03%) [ +0.08% +0.05% +0.00% / +0.03% +0.83% +0.65%] index_select strided 8 : Elapsed 0.038 ms (3.839 ms / 100) 3.826 -> 3.824 ( -0.05%) [ +0.05% +0.03% +0.00% / -0.05% +0.65% +0.63%] index_select strided 16 : Elapsed 0.038 ms (3.828 ms / 100) 3.827 -> 3.836 ( +0.24%) [ +0.24% +0.26% +0.00% / +0.24% +0.81% +0.68%] index_select random : Elapsed 0.038 ms (3.836 ms / 100) 3.830 -> 3.828 ( -0.05%) [ +0.08% +0.05% +0.00% / -0.05% +0.78% +0.73%] index_select random_sorted : Elapsed 0.038 ms (3.833 ms / 100) 3.832 -> 3.838 ( +0.16%) [ +0.05% +0.00% +0.03% / +0.16% +0.84% +0.84%] index_select perm : Elapsed 0.038 ms (3.834 ms / 100) 3.816 -> 3.815 ( -0.03%) [ +0.05% +0.05% +0.00% / -0.03% +0.63% +0.66%] index_select perm_sorted : Elapsed 0.038 ms (3.818 ms / 100) B = [40, 16, 5, 4] (stride (20, 800, 1, 5)) A = [40, 20, 5, 4] (stride (400, 5, 1, 100)) dim = 1 3.984 -> 3.989 ( +0.13%) [ +0.00% +0.15% +0.13% / +0.13% +0.90% +0.95%] index_select const : Elapsed 0.040 ms (3.984 ms / 100) 3.978 -> 3.977 ( -0.03%) [ +0.10% +0.00% +0.08% / -0.03% +0.83% +0.83%] index_select wrap : Elapsed 0.040 ms (3.982 ms / 100) 3.991 -> 3.996 ( +0.13%) [ +0.15% +0.00% +0.13% / +0.13% +0.88% +0.85%] index_select linear : Elapsed 0.040 ms (3.997 ms / 100) 3.992 -> 3.992 ( +0.00%) [ +0.13% +0.00% +0.13% / +0.00% +0.88% +0.93%] index_select reverse : Elapsed 0.040 ms (3.997 ms / 100) 3.990 -> 3.990 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.85% +0.83%] index_select skip64 : Elapsed 0.040 ms (3.996 ms / 100) 4.005 -> 4.005 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.57% +0.70%] index_select skip256 : Elapsed 0.040 ms (4.005 ms / 100) 3.986 -> 3.990 ( +0.10%) [ +0.00% +0.00% +0.10% / +0.10% +0.70% +0.70%] index_select spread : Elapsed 0.040 ms (3.986 ms / 100) 3.981 -> 3.984 ( +0.08%) [ +0.00% +0.03% +0.08% / +0.08% +0.75% +0.68%] index_select strided 3 : Elapsed 0.040 ms (3.981 ms / 100) 3.980 -> 3.983 ( +0.08%) [ +0.03% +0.10% +0.00% / +0.08% +0.73% +0.83%] index_select strided 5 : Elapsed 0.040 ms (3.981 ms / 100) 3.996 -> 4.002 ( +0.15%) [ +0.03% +0.00% +0.00% / +0.15% +0.75% +0.58%] index_select strided 7 : Elapsed 0.040 ms (3.997 ms / 100) 3.997 -> 3.998 ( +0.03%) [ +0.08% +0.00% +0.13% / +0.03% +0.63% +0.73%] index_select strided 8 : Elapsed 0.040 ms (4.000 ms / 100) 3.994 -> 3.995 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.63% +0.63%] index_select strided 16 : Elapsed 0.040 ms (3.994 ms / 100) 3.979 -> 3.990 ( +0.28%) [ +0.00% +0.38% +0.18% / +0.28% +0.83% +0.78%] index_select random : Elapsed 0.040 ms (3.979 ms / 100) 3.993 -> 4.004 ( +0.28%) [ +0.25% +0.00% +0.13% / +0.28% +0.78% +0.70%] index_select random_sorted : Elapsed 0.040 ms (4.003 ms / 100) 3.991 -> 3.988 ( -0.08%) [ +0.03% +0.00% +0.13% / -0.08% +0.58% +0.45%] index_select perm : Elapsed 0.040 ms (3.992 ms / 100) 4.003 -> 4.000 ( -0.07%) [ +0.02% +0.00% +0.07% / -0.07% +0.30% +0.37%] index_select perm_sorted : Elapsed 0.040 ms (4.004 ms / 100) B = [40, 16, 5, 4] (stride (1, 800, 40, 200)) A = [40, 20, 5, 4] (stride (80, 4, 3200, 1)) dim = 1 3.954 -> 3.957 ( +0.08%) [ +0.03% +0.13% +0.00% / +0.08% +0.51% +0.51%] index_select const : Elapsed 0.040 ms (3.955 ms / 100) 3.933 -> 3.937 ( +0.10%) [ +0.10% +0.00% +0.13% / +0.10% +0.58% +0.66%] index_select wrap : Elapsed 0.039 ms (3.937 ms / 100) 3.936 -> 3.935 ( -0.03%) [ +0.15% +0.00% +0.00% / -0.03% +0.64% +0.56%] index_select linear : Elapsed 0.039 ms (3.942 ms / 100) 3.934 -> 3.940 ( +0.15%) [ +0.05% +0.18% +0.00% / +0.15% +0.48% +0.56%] index_select reverse : Elapsed 0.039 ms (3.936 ms / 100) 3.952 -> 3.955 ( +0.08%) [ +0.13% +0.00% +0.00% / +0.08% +0.58% +0.30%] index_select skip64 : Elapsed 0.040 ms (3.957 ms / 100) 3.945 -> 3.948 ( +0.08%) [ +0.05% +0.00% +0.05% / +0.08% +0.58% +0.58%] index_select skip256 : Elapsed 0.039 ms (3.947 ms / 100) 3.933 -> 3.936 ( +0.08%) [ +0.00% +0.05% +0.00% / +0.08% +0.61% +0.64%] index_select spread : Elapsed 0.039 ms (3.933 ms / 100) 3.935 -> 3.939 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.79% +1.04%] index_select strided 3 : Elapsed 0.039 ms (3.937 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.00% +0.05% +0.03% / +0.00% +0.64% +0.58%] index_select strided 5 : Elapsed 0.039 ms (3.934 ms / 100) 3.941 -> 3.942 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.46% +0.58%] index_select strided 7 : Elapsed 0.039 ms (3.942 ms / 100) 3.945 -> 3.948 ( +0.08%) [ +0.05% +0.13% +0.00% / +0.08% +0.79% +0.84%] index_select strided 8 : Elapsed 0.039 ms (3.947 ms / 100) 3.934 -> 3.936 ( +0.05%) [ +0.00% +0.08% +0.13% / +0.05% +0.81% +0.74%] index_select strided 16 : Elapsed 0.039 ms (3.934 ms / 100) 3.937 -> 3.940 ( +0.08%) [ +0.08% +0.13% +0.00% / +0.08% +0.71% +0.71%] index_select random : Elapsed 0.039 ms (3.940 ms / 100) 3.944 -> 3.939 ( -0.13%) [ +0.03% +0.00% +0.00% / -0.13% +0.58% +0.51%] index_select random_sorted : Elapsed 0.039 ms (3.945 ms / 100) 3.935 -> 3.940 ( +0.13%) [ +0.15% +0.18% +0.00% / +0.13% +0.61% +0.79%] index_select perm : Elapsed 0.039 ms (3.941 ms / 100) 3.929 -> 3.932 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.71% +0.71%] index_select perm_sorted : Elapsed 0.039 ms (3.932 ms / 100) B = [40, 16, 5, 4] (stride (80, 5, 1, 3200)) A = [40, 20, 5, 4] (stride (400, 5, 1, 100)) dim = 1 3.989 -> 3.992 ( +0.08%) [ +0.00% +0.10% +0.00% / +0.08% +0.85% +0.85%] index_select const : Elapsed 0.040 ms (3.989 ms / 100) 3.981 -> 3.980 ( -0.03%) [ +0.05% +0.00% +0.03% / -0.03% +0.60% +0.60%] index_select wrap : Elapsed 0.040 ms (3.983 ms / 100) 3.992 -> 3.998 ( +0.15%) [ +0.15% +0.13% +0.00% / +0.15% +0.80% +0.88%] index_select linear : Elapsed 0.040 ms (3.998 ms / 100) 3.993 -> 3.998 ( +0.13%) [ +0.10% +0.00% +0.13% / +0.13% +0.83% +1.00%] index_select reverse : Elapsed 0.040 ms (3.997 ms / 100) 3.999 -> 3.998 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.65% +0.63%] index_select skip64 : Elapsed 0.040 ms (3.999 ms / 100) 3.998 -> 4.006 ( +0.20%) [ +0.23% +0.00% +0.23% / +0.20% +0.73% +0.75%] index_select skip256 : Elapsed 0.040 ms (4.007 ms / 100) 3.989 -> 3.991 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.55% +0.63%] index_select spread : Elapsed 0.040 ms (3.989 ms / 100) 3.983 -> 3.983 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +0.63% +0.65%] index_select strided 3 : Elapsed 0.040 ms (3.990 ms / 100) 3.981 -> 3.986 ( +0.13%) [ +0.00% +0.13% +0.10% / +0.13% +0.78% +0.88%] index_select strided 5 : Elapsed 0.040 ms (3.981 ms / 100) 4.000 -> 4.005 ( +0.12%) [ +0.00% +0.05% +0.05% / +0.12% +0.45% +0.50%] index_select strided 7 : Elapsed 0.040 ms (4.000 ms / 100) 3.995 -> 3.995 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.68% +0.63%] index_select strided 8 : Elapsed 0.040 ms (3.997 ms / 100) 3.998 -> 4.000 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.65% +0.60%] index_select strided 16 : Elapsed 0.040 ms (3.998 ms / 100) 3.996 -> 3.995 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.50% +0.48%] index_select random : Elapsed 0.040 ms (3.997 ms / 100) 3.997 -> 3.996 ( -0.03%) [ +0.38% +0.00% +0.18% / -0.03% +0.83% +0.70%] index_select random_sorted : Elapsed 0.040 ms (4.012 ms / 100) 3.986 -> 3.989 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.78% +0.70%] index_select perm : Elapsed 0.040 ms (3.989 ms / 100) 3.996 -> 4.000 ( +0.10%) [ +0.10% +0.03% +0.00% / +0.10% +0.48% +0.48%] index_select perm_sorted : Elapsed 0.040 ms (4.000 ms / 100) B = [40, 16, 5, 4] (stride (16, 1, 640, 3200)) A = [40, 20, 5, 4] (stride (100, 1, 20, 4000)) dim = 1 4.143 -> 4.142 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.43% +0.41%] index_select const : Elapsed 0.041 ms (4.144 ms / 100) 4.142 -> 4.144 ( +0.05%) [ +0.07% +0.02% +0.00% / +0.05% +0.53% +0.56%] index_select wrap : Elapsed 0.041 ms (4.145 ms / 100) 4.142 -> 4.141 ( -0.02%) [ +0.02% +0.00% +0.19% / -0.02% +0.43% +0.43%] index_select linear : Elapsed 0.041 ms (4.143 ms / 100) 4.143 -> 4.143 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.43% +0.41%] index_select reverse : Elapsed 0.041 ms (4.145 ms / 100) 4.145 -> 4.145 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.48% +0.46%] index_select skip64 : Elapsed 0.041 ms (4.146 ms / 100) 4.138 -> 4.138 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.58% +0.63%] index_select skip256 : Elapsed 0.041 ms (4.142 ms / 100) 4.142 -> 4.142 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.53% +0.51%] index_select spread : Elapsed 0.041 ms (4.144 ms / 100) 4.148 -> 4.147 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.34% +0.43%] index_select strided 3 : Elapsed 0.041 ms (4.149 ms / 100) 4.141 -> 4.141 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.58% +0.53%] index_select strided 5 : Elapsed 0.041 ms (4.142 ms / 100) 4.139 -> 4.142 ( +0.07%) [ +0.07% +0.05% +0.00% / +0.07% +0.68% +0.65%] index_select strided 7 : Elapsed 0.041 ms (4.142 ms / 100) 4.136 -> 4.136 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.73% +0.73%] index_select strided 8 : Elapsed 0.041 ms (4.138 ms / 100) 4.139 -> 4.139 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.65% +0.65%] index_select strided 16 : Elapsed 0.041 ms (4.140 ms / 100) 4.145 -> 4.141 ( -0.10%) [ +0.00% +0.00% +0.02% / -0.10% +0.53% +0.60%] index_select random : Elapsed 0.041 ms (4.145 ms / 100) 4.139 -> 4.139 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.56% +0.58%] index_select random_sorted : Elapsed 0.041 ms (4.140 ms / 100) 4.140 -> 4.140 ( +0.00%) [ +0.39% +0.02% +0.00% / +0.00% +0.63% +0.63%] index_select perm : Elapsed 0.042 ms (4.156 ms / 100) 4.141 -> 4.146 ( +0.12%) [ +0.17% +0.00% +0.17% / +0.12% +0.70% +0.72%] index_select perm_sorted : Elapsed 0.041 ms (4.148 ms / 100) B = [40, 16, 5, 4] (stride (1, 40, 640, 3200)) A = [40, 20, 5, 4] (stride (400, 5, 1, 100)) dim = 1 3.991 -> 3.994 ( +0.08%) [ +0.00% +0.18% +0.15% / +0.08% +0.90% +0.85%] index_select const : Elapsed 0.040 ms (3.991 ms / 100) 3.984 -> 3.982 ( -0.05%) [ +0.08% +0.03% +0.00% / -0.05% +0.68% +0.75%] index_select wrap : Elapsed 0.040 ms (3.987 ms / 100) 3.996 -> 3.998 ( +0.05%) [ +0.08% +0.08% +0.00% / +0.05% +0.83% +0.75%] index_select linear : Elapsed 0.040 ms (3.999 ms / 100) 3.997 -> 4.003 ( +0.15%) [ +0.10% +0.18% +0.00% / +0.15% +0.85% +0.85%] index_select reverse : Elapsed 0.040 ms (4.001 ms / 100) 3.995 -> 3.998 ( +0.08%) [ +0.18% +0.00% +0.15% / +0.08% +0.68% +0.68%] index_select skip64 : Elapsed 0.040 ms (4.002 ms / 100) 4.006 -> 4.002 ( -0.10%) [ +0.05% +0.00% +0.05% / -0.10% +0.60% +0.57%] index_select skip256 : Elapsed 0.040 ms (4.008 ms / 100) 3.994 -> 3.994 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.63% +0.58%] index_select spread : Elapsed 0.040 ms (3.994 ms / 100) 3.984 -> 3.991 ( +0.18%) [ +0.23% +0.20% +0.00% / +0.18% +0.70% +0.75%] index_select strided 3 : Elapsed 0.040 ms (3.993 ms / 100) 3.986 -> 3.982 ( -0.10%) [ +0.00% +0.05% +0.00% / -0.10% +0.63% +0.78%] index_select strided 5 : Elapsed 0.040 ms (3.986 ms / 100) 4.003 -> 4.005 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.55% +0.50%] index_select strided 7 : Elapsed 0.040 ms (4.003 ms / 100) 3.999 -> 3.996 ( -0.08%) [ +0.00% +0.03% +0.05% / -0.08% +0.65% +0.60%] index_select strided 8 : Elapsed 0.040 ms (3.999 ms / 100) 4.001 -> 3.999 ( -0.05%) [ +0.00% +0.02% +0.00% / -0.05% +0.60% +0.62%] index_select strided 16 : Elapsed 0.040 ms (4.001 ms / 100) 4.001 -> 3.995 ( -0.15%) [ +0.00% +0.20% +0.10% / -0.15% +0.50% +0.60%] index_select random : Elapsed 0.040 ms (4.001 ms / 100) 3.996 -> 3.997 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.53% +0.50%] index_select random_sorted : Elapsed 0.040 ms (3.999 ms / 100) 3.994 -> 4.004 ( +0.25%) [ +0.28% +0.28% +0.00% / +0.25% +0.75% +0.73%] index_select perm : Elapsed 0.040 ms (4.005 ms / 100) 3.997 -> 3.997 ( +0.00%) [ +0.03% +0.00% +0.08% / +0.00% +0.50% +0.50%] index_select perm_sorted : Elapsed 0.040 ms (3.998 ms / 100) out_shape = [40, 20, 16, 4] in_shape = [40, 20, 5, 4] idx_dim = 2 B = [40, 20, 16, 4] (stride (1280, 64, 4, 1)) A = [40, 20, 5, 4] (stride (400, 1, 20, 100)) dim = 2 2.236 -> 2.236 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.00% +2.15% +2.19%] index_add_ linear : Elapsed 0.022 ms (2.237 ms / 100) 2.191 -> 2.190 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +2.05% +2.28%] index_copy_ linear : Elapsed 0.022 ms (2.191 ms / 100) 2.234 -> 2.238 ( +0.18%) [ +0.22% +0.40% +0.00% / +0.18% +2.28% +2.10%] index_add_ reverse : Elapsed 0.022 ms (2.239 ms / 100) 2.191 -> 2.197 ( +0.27%) [ +0.00% +0.18% +0.05% / +0.27% +1.87% +2.19%] index_copy_ reverse : Elapsed 0.022 ms (2.191 ms / 100) 2.237 -> 2.245 ( +0.36%) [ +0.13% +0.00% +0.18% / +0.36% +2.68% +2.68%] index_add_ spread : Elapsed 0.022 ms (2.240 ms / 100) 2.207 -> 2.207 ( +0.00%) [ +0.00% +0.05% +0.14% / +0.00% +2.49% +2.45%] index_copy_ spread : Elapsed 0.022 ms (2.207 ms / 100) 2.250 -> 2.253 ( +0.13%) [ +0.00% +0.09% +0.13% / +0.13% +2.13% +2.13%] index_add_ strided 3 : Elapsed 0.023 ms (2.250 ms / 100) 2.218 -> 2.222 ( +0.18%) [ +0.05% +0.00% +0.23% / +0.18% +2.07% +2.16%] index_copy_ strided 3 : Elapsed 0.022 ms (2.219 ms / 100) 2.235 -> 2.239 ( +0.18%) [ +0.04% +0.09% +0.00% / +0.18% +2.24% +2.42%] index_add_ strided 5 : Elapsed 0.022 ms (2.236 ms / 100) 2.194 -> 2.200 ( +0.27%) [ +0.00% +0.32% +0.05% / +0.27% +2.37% +2.28%] index_copy_ strided 5 : Elapsed 0.022 ms (2.194 ms / 100) 2.238 -> 2.237 ( -0.04%) [ +0.00% +0.18% +0.22% / -0.04% +2.64% +2.41%] index_add_ strided 7 : Elapsed 0.022 ms (2.238 ms / 100) 2.212 -> 2.213 ( +0.05%) [ +0.05% +0.00% +0.18% / +0.05% +2.31% +2.22%] index_copy_ strided 7 : Elapsed 0.022 ms (2.213 ms / 100) 2.246 -> 2.249 ( +0.13%) [ +0.27% +0.00% +0.04% / +0.13% +2.14% +1.87%] index_add_ perm : Elapsed 0.023 ms (2.252 ms / 100) 2.214 -> 2.213 ( -0.05%) [ +0.18% +0.00% +0.09% / -0.05% +2.17% +1.85%] index_copy_ perm : Elapsed 0.022 ms (2.218 ms / 100) 2.234 -> 2.243 ( +0.40%) [ +0.31% +0.13% +0.00% / +0.40% +2.33% +2.06%] index_add_ perm_sorted : Elapsed 0.022 ms (2.241 ms / 100) 2.202 -> 2.212 ( +0.45%) [ +0.32% +0.00% +0.27% / +0.45% +2.23% +2.45%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.209 ms / 100) 4.575 -> 4.586 ( +0.24%) [ +0.39% +0.37% +0.00% / +0.24% +0.92% +0.90%] index_select const : Elapsed 0.046 ms (4.593 ms / 100) 4.618 -> 4.623 ( +0.11%) [ +0.04% +0.09% +0.00% / +0.11% +1.21% +1.19%] index_select wrap : Elapsed 0.046 ms (4.620 ms / 100) 4.632 -> 4.630 ( -0.04%) [ +0.04% +0.15% +0.00% / -0.04% +1.27% +1.38%] index_select linear : Elapsed 0.046 ms (4.634 ms / 100) 4.626 -> 4.633 ( +0.15%) [ +0.00% +0.17% +0.15% / +0.15% +1.56% +1.58%] index_select reverse : Elapsed 0.046 ms (4.626 ms / 100) 4.586 -> 4.585 ( -0.02%) [ +0.00% +0.07% +0.07% / -0.02% +0.81% +0.85%] index_select skip64 : Elapsed 0.046 ms (4.586 ms / 100) 4.575 -> 4.582 ( +0.15%) [ +0.00% +0.17% +0.15% / +0.15% +0.81% +0.92%] index_select skip256 : Elapsed 0.046 ms (4.575 ms / 100) 4.628 -> 4.625 ( -0.06%) [ +0.00% +0.09% +0.11% / -0.06% +1.71% +1.17%] index_select spread : Elapsed 0.046 ms (4.628 ms / 100) 4.636 -> 4.637 ( +0.02%) [ +0.00% +0.02% +0.04% / +0.02% +1.79% +1.57%] index_select strided 3 : Elapsed 0.046 ms (4.636 ms / 100) 4.632 -> 4.632 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +1.36% +1.55%] index_select random : Elapsed 0.046 ms (4.632 ms / 100) 4.604 -> 4.608 ( +0.09%) [ +0.11% +0.00% +0.15% / +0.09% +1.54% +1.82%] index_select random_sorted : Elapsed 0.046 ms (4.609 ms / 100) B = [40, 20, 16, 4] (stride (1280, 64, 4, 1)) A = [40, 20, 5, 4] (stride (5, 800, 1, 200)) dim = 2 2.417 -> 2.420 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +0.37% +0.62%] index_add_ linear : Elapsed 0.024 ms (2.419 ms / 100) 2.348 -> 2.347 ( -0.04%) [ +0.17% +0.00% +0.30% / -0.04% +0.55% +0.30%] index_copy_ linear : Elapsed 0.024 ms (2.352 ms / 100) 2.418 -> 2.420 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.45% +0.41%] index_add_ reverse : Elapsed 0.024 ms (2.420 ms / 100) 2.346 -> 2.350 ( +0.17%) [ +0.26% +0.04% +0.00% / +0.17% +0.51% +0.34%] index_copy_ reverse : Elapsed 0.024 ms (2.352 ms / 100) 2.414 -> 2.420 ( +0.25%) [ +0.12% +0.00% +0.00% / +0.25% +0.54% +0.58%] index_add_ spread : Elapsed 0.024 ms (2.417 ms / 100) 2.365 -> 2.365 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.25% +0.34%] index_copy_ spread : Elapsed 0.024 ms (2.366 ms / 100) 2.418 -> 2.425 ( +0.29%) [ +0.17% +0.21% +0.00% / +0.29% +0.33% +0.29%] index_add_ strided 3 : Elapsed 0.024 ms (2.422 ms / 100) 2.368 -> 2.370 ( +0.08%) [ +0.04% +0.17% +0.00% / +0.08% +0.13% +0.21%] index_copy_ strided 3 : Elapsed 0.024 ms (2.369 ms / 100) 2.405 -> 2.407 ( +0.08%) [ +0.21% +0.00% +0.04% / +0.08% +0.58% +0.42%] index_add_ strided 5 : Elapsed 0.024 ms (2.410 ms / 100) 2.345 -> 2.346 ( +0.04%) [ +0.17% +0.00% +0.30% / +0.04% +0.30% +0.21%] index_copy_ strided 5 : Elapsed 0.023 ms (2.349 ms / 100) 2.413 -> 2.411 ( -0.08%) [ +0.00% +0.00% +0.12% / -0.08% +0.12% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.413 ms / 100) 2.362 -> 2.362 ( +0.00%) [ +0.00% +0.13% +0.21% / +0.08% +0.00% +0.38%] index_copy_ strided 7 : Elapsed 0.024 ms (2.362 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.04% +0.00% +0.04% / +0.17% +0.37% +0.46%] index_add_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.343 -> 2.348 ( +0.21%) [ +0.47% +0.00% +0.17% / +0.21% +0.47% +0.47%] index_copy_ perm : Elapsed 0.024 ms (2.354 ms / 100) 2.406 -> 2.409 ( +0.12%) [ +0.04% +0.04% +0.00% / +0.12% +0.25% +0.29%] index_add_ perm_sorted : Elapsed 0.024 ms (2.407 ms / 100) 2.344 -> 2.347 ( +0.13%) [ +0.00% +0.17% +0.00% / +0.13% +0.60% +0.38%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.344 ms / 100) 5.182 -> 5.185 ( +0.06%) [ +0.04% +0.06% +0.00% / +0.06% +0.50% +0.42%] index_select const : Elapsed 0.052 ms (5.184 ms / 100) 5.172 -> 5.169 ( -0.06%) [ +0.19% +0.10% +0.00% / -0.06% +0.37% +0.52%] index_select wrap : Elapsed 0.052 ms (5.182 ms / 100) 5.174 -> 5.170 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.48% +0.50%] index_select linear : Elapsed 0.052 ms (5.176 ms / 100) 5.162 -> 5.156 ( -0.12%) [ +0.04% +0.00% +0.02% / -0.12% +0.76% +0.48%] index_select reverse : Elapsed 0.052 ms (5.164 ms / 100) 5.167 -> 5.182 ( +0.29%) [ +0.14% +0.17% +0.00% / +0.29% +0.68% +0.68%] index_select skip64 : Elapsed 0.052 ms (5.174 ms / 100) 5.172 -> 5.179 ( +0.14%) [ +0.02% +0.00% +0.21% / +0.14% +0.37% +0.48%] index_select skip256 : Elapsed 0.052 ms (5.173 ms / 100) 5.179 -> 5.177 ( -0.04%) [ +0.06% +0.00% +0.04% / -0.04% +0.29% +0.48%] index_select spread : Elapsed 0.052 ms (5.182 ms / 100) 5.166 -> 5.172 ( +0.12%) [ +0.25% +0.00% +0.17% / +0.12% +0.50% +0.64%] index_select strided 3 : Elapsed 0.052 ms (5.179 ms / 100) 5.166 -> 5.176 ( +0.19%) [ +0.14% +0.00% +0.15% / +0.19% +0.66% +0.83%] index_select random : Elapsed 0.052 ms (5.173 ms / 100) 5.154 -> 5.171 ( +0.33%) [ +0.14% +0.14% +0.00% / +0.33% +0.70% +0.95%] index_select random_sorted : Elapsed 0.052 ms (5.161 ms / 100) B = [40, 20, 16, 4] (stride (1280, 64, 1, 16)) A = [40, 20, 5, 4] (stride (20, 1, 800, 4000)) dim = 2 0.929 -> 0.915 ( -1.51%) [ +0.54% +0.00% +0.32% / +0.32% -0.65% -1.51%] index_add_ linear : Elapsed 0.009 ms (0.934 ms / 100) 0.918 -> 0.897 ( -2.29%) [ +0.33% +0.54% +0.00% / +0.22% -1.85% -2.29%] index_copy_ linear : Elapsed 0.009 ms (0.921 ms / 100) 0.926 -> 0.916 ( -1.08%) [ +0.00% +0.32% +0.54% / +0.22% -0.97% -1.08%] index_add_ reverse : Elapsed 0.009 ms (0.926 ms / 100) 0.913 -> 0.894 ( -2.08%) [ +0.22% +0.22% +0.00% / +0.00% -1.86% -2.08%] index_copy_ reverse : Elapsed 0.009 ms (0.915 ms / 100) 0.956 -> 0.939 ( -1.78%) [ +0.63% +0.31% +0.00% / -0.10% -1.78% -1.36%] index_add_ spread : Elapsed 0.010 ms (0.962 ms / 100) 0.943 -> 0.923 ( -2.12%) [ +0.42% +0.00% +0.42% / +0.00% -2.12% -2.01%] index_copy_ spread : Elapsed 0.009 ms (0.947 ms / 100) 0.954 -> 0.936 ( -1.89%) [ +0.00% +0.42% +0.31% / +0.10% -1.89% -1.26%] index_add_ strided 3 : Elapsed 0.010 ms (0.954 ms / 100) 0.943 -> 0.918 ( -2.65%) [ +0.00% +0.21% +0.11% / -0.21% -2.65% -2.23%] index_copy_ strided 3 : Elapsed 0.009 ms (0.943 ms / 100) 0.952 -> 0.940 ( -1.26%) [ +0.00% +0.00% +0.42% / +0.11% -1.16% -1.26%] index_add_ strided 5 : Elapsed 0.010 ms (0.952 ms / 100) 0.937 -> 0.920 ( -1.81%) [ +0.11% +0.43% +0.00% / +0.53% -1.81% -1.49%] index_copy_ strided 5 : Elapsed 0.009 ms (0.938 ms / 100) 0.956 -> 0.942 ( -1.46%) [ +0.00% +0.21% +0.42% / +0.10% -1.46% -0.94%] index_add_ strided 7 : Elapsed 0.010 ms (0.956 ms / 100) 0.943 -> 0.925 ( -1.91%) [ +0.00% +0.42% +0.42% / +0.00% -1.91% -1.91%] index_copy_ strided 7 : Elapsed 0.009 ms (0.943 ms / 100) 0.954 -> 0.937 ( -1.78%) [ +0.21% +0.10% +0.00% / +0.52% -1.57% -1.78%] index_add_ perm : Elapsed 0.010 ms (0.956 ms / 100) 0.939 -> 0.918 ( -2.24%) [ +0.32% +0.32% +0.00% / +0.21% -2.24% -2.13%] index_copy_ perm : Elapsed 0.009 ms (0.942 ms / 100) 0.954 -> 0.941 ( -1.36%) [ +0.00% +0.52% +0.00% / +0.31% -1.36% -1.26%] index_add_ perm_sorted : Elapsed 0.010 ms (0.954 ms / 100) 0.942 -> 0.922 ( -2.12%) [ +0.00% +0.32% +0.32% / +0.32% -2.02% -2.12%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.942 ms / 100) 1.681 -> 1.686 ( +0.30%) [ +0.24% +0.00% +0.06% / +0.30% +1.07% +0.77%] index_select const : Elapsed 0.017 ms (1.685 ms / 100) 1.719 -> 1.705 ( -0.81%) [ +0.58% +0.00% +0.35% / +0.47% -0.81% -0.81%] index_select wrap : Elapsed 0.017 ms (1.729 ms / 100) 1.717 -> 1.701 ( -0.93%) [ +0.12% +0.29% +0.00% / +0.17% -0.87% -0.93%] index_select linear : Elapsed 0.017 ms (1.719 ms / 100) 1.708 -> 1.700 ( -0.47%) [ +0.41% +0.53% +0.00% / +0.18% -0.41% -0.47%] index_select reverse : Elapsed 0.017 ms (1.715 ms / 100) 1.687 -> 1.685 ( -0.12%) [ +0.00% +0.06% +0.00% / -0.12% +0.89% +0.71%] index_select skip64 : Elapsed 0.017 ms (1.687 ms / 100) 1.683 -> 1.680 ( -0.18%) [ +0.06% +0.00% +0.06% / -0.18% +0.77% +0.89%] index_select skip256 : Elapsed 0.017 ms (1.684 ms / 100) 1.712 -> 1.698 ( -0.82%) [ +0.06% +0.00% +0.06% / -0.06% -0.82% -0.64%] index_select spread : Elapsed 0.017 ms (1.713 ms / 100) 1.733 -> 1.700 ( -1.90%) [ +0.12% +0.23% +0.00% / +0.12% -1.90% -1.62%] index_select strided 3 : Elapsed 0.017 ms (1.735 ms / 100) 1.742 -> 1.700 ( -2.41%) [ +0.00% +0.06% +0.17% / -0.23% -2.41% -2.18%] index_select random : Elapsed 0.017 ms (1.742 ms / 100) 1.716 -> 1.702 ( -0.82%) [ +0.00% +0.06% +0.00% / -0.12% -0.82% -0.52%] index_select random_sorted : Elapsed 0.017 ms (1.716 ms / 100) B = [40, 20, 16, 4] (stride (1280, 1, 20, 320)) A = [40, 20, 5, 4] (stride (5, 800, 1, 200)) dim = 2 2.558 -> 2.558 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.51% +0.51%] index_add_ linear : Elapsed 0.026 ms (2.560 ms / 100) 2.474 -> 2.480 ( +0.24%) [ +0.20% +0.16% +0.00% / +0.24% +0.73% +0.57%] index_copy_ linear : Elapsed 0.025 ms (2.479 ms / 100) 2.559 -> 2.561 ( +0.08%) [ +0.20% +0.04% +0.00% / +0.08% +0.43% +0.66%] index_add_ reverse : Elapsed 0.026 ms (2.564 ms / 100) 2.479 -> 2.476 ( -0.12%) [ +0.00% +0.00% +0.12% / -0.12% +0.48% +0.56%] index_copy_ reverse : Elapsed 0.025 ms (2.479 ms / 100) 2.537 -> 2.539 ( +0.08%) [ +0.28% +0.20% +0.00% / +0.08% +0.67% +0.47%] index_add_ spread : Elapsed 0.025 ms (2.544 ms / 100) 2.466 -> 2.464 ( -0.08%) [ +0.12% +0.00% +0.16% / -0.08% +0.53% +0.49%] index_copy_ spread : Elapsed 0.025 ms (2.469 ms / 100) 2.540 -> 2.542 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.39% +0.43%] index_add_ strided 3 : Elapsed 0.025 ms (2.540 ms / 100) 2.469 -> 2.468 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.16% +0.36%] index_copy_ strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.556 -> 2.553 ( -0.12%) [ +0.04% +0.12% +0.00% / -0.12% +0.43% +0.51%] index_add_ strided 5 : Elapsed 0.026 ms (2.557 ms / 100) 2.476 -> 2.485 ( +0.36%) [ +0.20% +0.08% +0.00% / +0.36% +0.57% +0.69%] index_copy_ strided 5 : Elapsed 0.025 ms (2.481 ms / 100) 2.552 -> 2.552 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.51% +0.51%] index_add_ strided 7 : Elapsed 0.026 ms (2.552 ms / 100) 2.479 -> 2.481 ( +0.08%) [ +0.00% +0.20% +0.12% / +0.08% +0.56% +0.61%] index_copy_ strided 7 : Elapsed 0.025 ms (2.479 ms / 100) 2.553 -> 2.557 ( +0.16%) [ +0.39% +0.35% +0.00% / +0.16% +0.47% +0.51%] index_add_ perm : Elapsed 0.026 ms (2.563 ms / 100) 2.481 -> 2.482 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.04% +0.24% +0.20%] index_copy_ perm : Elapsed 0.025 ms (2.484 ms / 100) 2.555 -> 2.557 ( +0.08%) [ +0.12% +0.43% +0.00% / +0.08% +0.59% +0.51%] index_add_ perm_sorted : Elapsed 0.026 ms (2.558 ms / 100) 2.479 -> 2.480 ( +0.04%) [ +0.00% +0.56% +0.00% / +0.04% +0.56% +0.40%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.479 ms / 100) 5.620 -> 5.621 ( +0.02%) [ +0.00% +0.30% +0.00% / +0.02% +0.60% +0.57%] index_select const : Elapsed 0.056 ms (5.620 ms / 100) 5.610 -> 5.607 ( -0.05%) [ +0.00% +0.07% +0.02% / -0.05% +0.71% +0.57%] index_select wrap : Elapsed 0.056 ms (5.610 ms / 100) 5.608 -> 5.612 ( +0.07%) [ +0.11% +0.20% +0.00% / +0.07% +0.84% +0.71%] index_select linear : Elapsed 0.056 ms (5.614 ms / 100) 5.614 -> 5.612 ( -0.04%) [ +0.09% +0.00% +0.00% / -0.04% +0.59% +0.52%] index_select reverse : Elapsed 0.056 ms (5.619 ms / 100) 5.617 -> 5.622 ( +0.09%) [ +0.04% +0.00% +0.04% / +0.09% +0.59% +0.61%] index_select skip64 : Elapsed 0.056 ms (5.619 ms / 100) 5.615 -> 5.619 ( +0.07%) [ +0.07% +0.11% +0.00% / +0.07% +0.66% +0.55%] index_select skip256 : Elapsed 0.056 ms (5.619 ms / 100) 5.615 -> 5.621 ( +0.11%) [ +0.02% +0.09% +0.00% / +0.11% +0.62% +0.62%] index_select spread : Elapsed 0.056 ms (5.616 ms / 100) 5.607 -> 5.608 ( +0.02%) [ +0.09% +0.00% +0.09% / +0.02% +0.78% +0.71%] index_select strided 3 : Elapsed 0.056 ms (5.612 ms / 100) 5.607 -> 5.612 ( +0.09%) [ +0.25% +0.16% +0.00% / +0.09% +0.78% +0.80%] index_select random : Elapsed 0.056 ms (5.621 ms / 100) 5.604 -> 5.604 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.71% +0.73%] index_select random_sorted : Elapsed 0.056 ms (5.604 ms / 100) B = [40, 20, 16, 4] (stride (1, 160, 3200, 40)) A = [40, 20, 5, 4] (stride (400, 20, 1, 5)) dim = 2 2.197 -> 2.205 ( +0.36%) [ +0.00% +0.05% +0.05% / +0.36% +0.73% +1.00%] index_add_ linear : Elapsed 0.022 ms (2.197 ms / 100) 2.155 -> 2.162 ( +0.32%) [ +0.00% +0.32% +0.37% / +0.32% +0.79% +1.25%] index_copy_ linear : Elapsed 0.022 ms (2.155 ms / 100) 2.199 -> 2.198 ( -0.05%) [ +0.05% +0.14% +0.00% / -0.05% +1.00% +1.09%] index_add_ reverse : Elapsed 0.022 ms (2.200 ms / 100) 2.163 -> 2.160 ( -0.14%) [ +0.23% +0.18% +0.00% / -0.14% +0.65% +0.60%] index_copy_ reverse : Elapsed 0.022 ms (2.168 ms / 100) 2.196 -> 2.199 ( +0.14%) [ +0.36% +0.23% +0.00% / +0.14% +1.00% +1.18%] index_add_ spread : Elapsed 0.022 ms (2.204 ms / 100) 2.152 -> 2.159 ( +0.33%) [ +0.33% +0.46% +0.00% / +0.33% +0.98% +1.25%] index_copy_ spread : Elapsed 0.022 ms (2.159 ms / 100) 2.193 -> 2.196 ( +0.14%) [ +0.00% +0.09% +0.14% / +0.14% +1.23% +1.28%] index_add_ strided 3 : Elapsed 0.022 ms (2.193 ms / 100) 2.153 -> 2.155 ( +0.09%) [ +0.09% +0.05% +0.00% / +0.09% +0.84% +0.98%] index_copy_ strided 3 : Elapsed 0.022 ms (2.155 ms / 100) 2.198 -> 2.201 ( +0.14%) [ +0.27% +0.00% +0.09% / +0.14% +1.23% +0.82%] index_add_ strided 5 : Elapsed 0.022 ms (2.204 ms / 100) 2.163 -> 2.159 ( -0.18%) [ +0.32% +0.18% +0.00% / -0.18% +0.69% +0.65%] index_copy_ strided 5 : Elapsed 0.022 ms (2.170 ms / 100) 2.194 -> 2.198 ( +0.18%) [ +0.23% +0.27% +0.00% / +0.18% +1.14% +1.09%] index_add_ strided 7 : Elapsed 0.022 ms (2.199 ms / 100) 2.156 -> 2.158 ( +0.09%) [ +0.05% +0.23% +0.00% / +0.09% +0.79% +1.07%] index_copy_ strided 7 : Elapsed 0.022 ms (2.157 ms / 100) 2.203 -> 2.204 ( +0.05%) [ +0.00% +0.23% +0.00% / +0.05% +0.86% +0.59%] index_add_ perm : Elapsed 0.022 ms (2.203 ms / 100) 2.158 -> 2.156 ( -0.09%) [ +0.00% +0.37% +0.14% / -0.09% +0.74% +0.70%] index_copy_ perm : Elapsed 0.022 ms (2.158 ms / 100) 2.197 -> 2.202 ( +0.23%) [ +0.50% +0.14% +0.00% / +0.23% +1.14% +1.23%] index_add_ perm_sorted : Elapsed 0.022 ms (2.208 ms / 100) 2.158 -> 2.156 ( -0.09%) [ +0.23% +0.00% +0.05% / -0.09% +0.93% +0.88%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.163 ms / 100) 4.617 -> 4.619 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.04% +2.04% +0.71%] index_select const : Elapsed 0.046 ms (4.617 ms / 100) 4.607 -> 4.610 ( +0.07%) [ +0.20% +0.00% +0.04% / +0.07% +1.09% +0.76%] index_select wrap : Elapsed 0.046 ms (4.616 ms / 100) 4.610 -> 4.617 ( +0.15%) [ +0.00% +0.37% +0.26% / +0.15% +1.00% +1.02%] index_select linear : Elapsed 0.046 ms (4.610 ms / 100) 4.621 -> 4.627 ( +0.13%) [ +0.00% +0.02% +0.06% / +0.13% +0.97% +0.91%] index_select reverse : Elapsed 0.046 ms (4.621 ms / 100) 4.611 -> 4.606 ( -0.11%) [ +0.00% +0.07% +0.02% / -0.11% +0.98% +0.91%] index_select skip64 : Elapsed 0.046 ms (4.611 ms / 100) 4.602 -> 4.612 ( +0.22%) [ +0.20% +0.13% +0.00% / +0.22% +0.89% +0.93%] index_select skip256 : Elapsed 0.046 ms (4.611 ms / 100) 4.615 -> 4.617 ( +0.04%) [ +0.04% +0.00% +0.02% / +0.04% +1.00% +1.13%] index_select spread : Elapsed 0.046 ms (4.617 ms / 100) 4.606 -> 4.606 ( +0.00%) [ +0.41% +0.00% +0.04% / +0.00% +0.87% +1.30%] index_select strided 3 : Elapsed 0.046 ms (4.625 ms / 100) 4.616 -> 4.616 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.84% +1.15%] index_select random : Elapsed 0.046 ms (4.617 ms / 100) 4.608 -> 4.614 ( +0.13%) [ +0.02% +0.00% +0.00% / +0.13% +0.89% +0.91%] index_select random_sorted : Elapsed 0.046 ms (4.609 ms / 100) B = [40, 20, 16, 4] (stride (20, 1, 3200, 800)) A = [40, 20, 5, 4] (stride (20, 800, 4, 1)) dim = 2 2.309 -> 2.302 ( -0.30%) [ +0.00% +0.09% +0.09% / -0.30% -0.13% +0.26%] index_add_ linear : Elapsed 0.023 ms (2.309 ms / 100) 2.240 -> 2.234 ( -0.27%) [ +0.04% +0.09% +0.00% / -0.18% -0.27% -0.27%] index_copy_ linear : Elapsed 0.022 ms (2.241 ms / 100) 2.305 -> 2.295 ( -0.43%) [ +0.22% +0.04% +0.00% / -0.35% -0.17% -0.43%] index_add_ reverse : Elapsed 0.023 ms (2.310 ms / 100) 2.234 -> 2.230 ( -0.18%) [ +0.04% +0.13% +0.00% / -0.18% -0.04% -0.13%] index_copy_ reverse : Elapsed 0.022 ms (2.235 ms / 100) 2.296 -> 2.297 ( +0.04%) [ +0.00% +0.22% +0.30% / +0.22% +0.04% +0.22%] index_add_ spread : Elapsed 0.023 ms (2.296 ms / 100) 2.229 -> 2.233 ( +0.18%) [ +0.00% +0.04% +0.27% / +0.18% +0.22% +0.27%] index_copy_ spread : Elapsed 0.022 ms (2.229 ms / 100) 2.301 -> 2.296 ( -0.22%) [ +0.26% +0.26% +0.00% / -0.22% +0.04% +0.17%] index_add_ strided 3 : Elapsed 0.023 ms (2.307 ms / 100) 2.231 -> 2.230 ( -0.04%) [ +0.31% +0.22% +0.00% / -0.04% +0.27% +0.13%] index_copy_ strided 3 : Elapsed 0.022 ms (2.238 ms / 100) 2.310 -> 2.308 ( -0.09%) [ +0.13% +0.00% +0.09% / -0.09% -0.09% +0.00%] index_add_ strided 5 : Elapsed 0.023 ms (2.313 ms / 100) 2.239 -> 2.237 ( -0.09%) [ +0.04% +0.18% +0.00% / +0.04% -0.04% -0.09%] index_copy_ strided 5 : Elapsed 0.022 ms (2.240 ms / 100) 2.302 -> 2.303 ( +0.04%) [ +0.13% +0.17% +0.00% / +0.04% +0.04% +0.13%] index_add_ strided 7 : Elapsed 0.023 ms (2.305 ms / 100) 2.232 -> 2.229 ( -0.13%) [ +0.00% +0.18% +0.09% / +0.04% -0.13% +0.04%] index_copy_ strided 7 : Elapsed 0.022 ms (2.232 ms / 100) 2.299 -> 2.301 ( +0.09%) [ +0.00% +0.26% +0.09% / +0.09% +0.43% +0.52%] index_add_ perm : Elapsed 0.023 ms (2.299 ms / 100) 2.230 -> 2.234 ( +0.18%) [ +0.00% +0.31% +0.09% / +0.18% +0.36% +0.54%] index_copy_ perm : Elapsed 0.022 ms (2.230 ms / 100) 2.301 -> 2.295 ( -0.26%) [ +0.30% +0.00% +0.22% / -0.26% +0.17% +0.43%] index_add_ perm_sorted : Elapsed 0.023 ms (2.308 ms / 100) 2.230 -> 2.225 ( -0.22%) [ +0.31% +0.00% +0.13% / -0.22% +0.27% +0.13%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.237 ms / 100) 4.676 -> 4.677 ( +0.02%) [ +0.11% +0.00% +0.26% / +0.02% +0.86% +0.86%] index_select const : Elapsed 0.047 ms (4.681 ms / 100) 4.717 -> 4.719 ( +0.04%) [ +0.00% +0.17% +0.04% / +0.04% +0.45% +0.53%] index_select wrap : Elapsed 0.047 ms (4.717 ms / 100) 4.730 -> 4.739 ( +0.19%) [ +0.00% +0.27% +0.21% / +0.19% +0.76% +0.89%] index_select linear : Elapsed 0.047 ms (4.730 ms / 100) 4.716 -> 4.733 ( +0.36%) [ +0.19% +0.17% +0.00% / +0.36% +0.76% +0.89%] index_select reverse : Elapsed 0.047 ms (4.725 ms / 100) 4.680 -> 4.673 ( -0.15%) [ +0.11% +0.06% +0.00% / -0.15% +0.41% +0.62%] index_select skip64 : Elapsed 0.047 ms (4.685 ms / 100) 4.680 -> 4.683 ( +0.06%) [ +0.11% +0.19% +0.00% / +0.06% +0.58% +0.75%] index_select skip256 : Elapsed 0.047 ms (4.685 ms / 100) 4.725 -> 4.731 ( +0.13%) [ +0.00% +0.11% +0.02% / +0.13% +0.66% +0.70%] index_select spread : Elapsed 0.047 ms (4.725 ms / 100) 4.703 -> 4.701 ( -0.04%) [ +0.00% +0.19% +0.00% / -0.04% +0.60% +0.64%] index_select strided 3 : Elapsed 0.047 ms (4.703 ms / 100) 4.719 -> 4.724 ( +0.11%) [ +0.21% +0.19% +0.00% / +0.11% +0.76% +0.81%] index_select random : Elapsed 0.047 ms (4.729 ms / 100) 4.713 -> 4.714 ( +0.02%) [ +0.00% +0.19% +0.08% / +0.02% +0.68% +0.42%] index_select random_sorted : Elapsed 0.047 ms (4.713 ms / 100) B = [40, 20, 16, 4] (stride (16, 640, 1, 12800)) A = [40, 20, 5, 4] (stride (400, 4, 80, 1)) dim = 2 2.350 -> 2.352 ( +0.09%) [ +0.26% +0.21% +0.00% / +0.09% +4.04% +1.53%] index_add_ linear : Elapsed 0.024 ms (2.356 ms / 100) 2.308 -> 2.313 ( +0.22%) [ +0.09% +0.00% +0.17% / +0.22% +1.95% +2.04%] index_copy_ linear : Elapsed 0.023 ms (2.310 ms / 100) 2.350 -> 2.349 ( -0.04%) [ +0.13% +0.00% +0.09% / -0.04% +1.40% +1.19%] index_add_ reverse : Elapsed 0.024 ms (2.353 ms / 100) 2.310 -> 2.319 ( +0.39%) [ +0.17% +0.30% +0.00% / +0.39% +2.03% +1.90%] index_copy_ reverse : Elapsed 0.023 ms (2.314 ms / 100) 2.397 -> 2.395 ( -0.08%) [ +0.29% +0.08% +0.00% / -0.08% +1.08% +1.04%] index_add_ spread : Elapsed 0.024 ms (2.404 ms / 100) 2.385 -> 2.390 ( +0.21%) [ +0.00% +0.04% +0.04% / +0.21% +1.55% +1.76%] index_copy_ spread : Elapsed 0.024 ms (2.385 ms / 100) 2.388 -> 2.389 ( +0.04%) [ +0.13% +0.29% +0.00% / +0.04% +1.55% +1.59%] index_add_ strided 3 : Elapsed 0.024 ms (2.391 ms / 100) 2.385 -> 2.386 ( +0.04%) [ +0.00% +0.13% +0.04% / +0.04% +1.93% +2.01%] index_copy_ strided 3 : Elapsed 0.024 ms (2.385 ms / 100) 2.380 -> 2.381 ( +0.04%) [ +0.21% +0.13% +0.00% / +0.04% +1.76% +1.60%] index_add_ strided 5 : Elapsed 0.024 ms (2.385 ms / 100) 2.373 -> 2.375 ( +0.08%) [ +0.21% +0.04% +0.00% / +0.08% +1.85% +2.02%] index_copy_ strided 5 : Elapsed 0.024 ms (2.378 ms / 100) 2.375 -> 2.381 ( +0.25%) [ +0.17% +0.29% +0.00% / +0.25% +1.98% +1.94%] index_add_ strided 7 : Elapsed 0.024 ms (2.379 ms / 100) 2.371 -> 2.367 ( -0.17%) [ +0.00% +0.17% +0.04% / -0.17% +2.02% +1.94%] index_copy_ strided 7 : Elapsed 0.024 ms (2.371 ms / 100) 2.356 -> 2.359 ( +0.13%) [ +0.17% +0.00% +0.04% / +0.13% +1.78% +1.83%] index_add_ perm : Elapsed 0.024 ms (2.360 ms / 100) 2.318 -> 2.316 ( -0.09%) [ +0.04% +0.04% +0.00% / -0.09% +1.86% +2.16%] index_copy_ perm : Elapsed 0.023 ms (2.319 ms / 100) 2.370 -> 2.372 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.68% +1.01%] index_add_ perm_sorted : Elapsed 0.024 ms (2.370 ms / 100) 2.322 -> 2.325 ( +0.13%) [ +0.22% +0.17% +0.00% / +0.13% +1.64% +1.72%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.327 ms / 100) 4.788 -> 4.792 ( +0.08%) [ +0.13% +0.15% +0.00% / +0.08% +0.69% +0.71%] index_select const : Elapsed 0.048 ms (4.794 ms / 100) 4.821 -> 4.826 ( +0.10%) [ +0.10% +0.12% +0.00% / +0.10% +1.33% +1.41%] index_select wrap : Elapsed 0.048 ms (4.826 ms / 100) 4.847 -> 4.853 ( +0.12%) [ +0.00% +0.27% +0.14% / +0.12% +1.11% +1.03%] index_select linear : Elapsed 0.048 ms (4.847 ms / 100) 4.853 -> 4.856 ( +0.06%) [ +0.25% +0.00% +0.29% / +0.06% +0.95% +1.20%] index_select reverse : Elapsed 0.049 ms (4.865 ms / 100) 4.777 -> 4.783 ( +0.13%) [ +0.10% +0.23% +0.00% / +0.13% +1.11% +1.03%] index_select skip64 : Elapsed 0.048 ms (4.782 ms / 100) 4.780 -> 4.789 ( +0.19%) [ +0.15% +0.21% +0.00% / +0.19% +1.05% +0.98%] index_select skip256 : Elapsed 0.048 ms (4.787 ms / 100) 4.832 -> 4.845 ( +0.27%) [ +0.17% +0.19% +0.00% / +0.27% +1.20% +1.35%] index_select spread : Elapsed 0.048 ms (4.840 ms / 100) 4.847 -> 4.853 ( +0.12%) [ +0.08% +0.00% +0.02% / +0.12% +1.38% +1.30%] index_select strided 3 : Elapsed 0.049 ms (4.851 ms / 100) 4.832 -> 4.837 ( +0.10%) [ +0.00% +0.19% +0.12% / +0.10% +1.66% +1.61%] index_select random : Elapsed 0.048 ms (4.832 ms / 100) 4.822 -> 4.816 ( -0.12%) [ +0.21% +0.00% +0.02% / -0.12% +1.58% +1.35%] index_select random_sorted : Elapsed 0.048 ms (4.832 ms / 100) B = [40, 20, 16, 4] (stride (20, 1, 800, 12800)) A = [40, 20, 5, 4] (stride (1, 200, 40, 4000)) dim = 2 2.403 -> 2.411 ( +0.33%) [ +0.00% +0.37% +0.17% / +0.33% +0.62% +0.62%] index_add_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.326 -> 2.328 ( +0.09%) [ +0.04% +0.00% +0.00% / +0.09% +0.47% +0.52%] index_copy_ linear : Elapsed 0.023 ms (2.327 ms / 100) 2.404 -> 2.406 ( +0.08%) [ +0.17% +0.25% +0.00% / +0.08% +0.62% +0.58%] index_add_ reverse : Elapsed 0.024 ms (2.408 ms / 100) 2.324 -> 2.326 ( +0.09%) [ +0.00% +0.09% +0.04% / +0.09% +0.39% +0.39%] index_copy_ reverse : Elapsed 0.023 ms (2.324 ms / 100) 2.406 -> 2.407 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.04% +0.50% +0.67%] index_add_ spread : Elapsed 0.024 ms (2.406 ms / 100) 2.323 -> 2.321 ( -0.09%) [ +0.00% +0.22% +0.00% / -0.09% +0.47% +0.43%] index_copy_ spread : Elapsed 0.023 ms (2.323 ms / 100) 2.406 -> 2.408 ( +0.08%) [ +0.00% +0.21% +0.08% / +0.08% +0.42% +0.37%] index_add_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.324 -> 2.327 ( +0.13%) [ +0.04% +0.22% +0.00% / +0.13% +0.52% +0.43%] index_copy_ strided 3 : Elapsed 0.023 ms (2.325 ms / 100) 2.406 -> 2.407 ( +0.04%) [ +0.21% +0.00% +0.04% / +0.04% +0.33% +0.21%] index_add_ strided 5 : Elapsed 0.024 ms (2.411 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.09% +0.00% +0.04% / +0.04% +0.09% +0.04%] index_copy_ strided 5 : Elapsed 0.023 ms (2.326 ms / 100) 2.407 -> 2.409 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +4.65% +0.29%] index_add_ strided 7 : Elapsed 0.024 ms (2.410 ms / 100) 2.323 -> 2.327 ( +0.17%) [ +0.00% +0.04% +0.04% / +0.22% +0.34% +0.17%] index_copy_ strided 7 : Elapsed 0.023 ms (2.323 ms / 100) 2.407 -> 2.408 ( +0.04%) [ +0.04% +0.00% +0.17% / +0.04% +0.29% +0.54%] index_add_ perm : Elapsed 0.024 ms (2.408 ms / 100) 2.324 -> 2.326 ( +0.09%) [ +0.09% +0.00% +0.13% / +0.09% +0.34% +0.56%] index_copy_ perm : Elapsed 0.023 ms (2.326 ms / 100) 2.406 -> 2.409 ( +0.12%) [ +0.21% +0.17% +0.00% / +0.12% +0.46% +0.37%] index_add_ perm_sorted : Elapsed 0.024 ms (2.411 ms / 100) 2.321 -> 2.326 ( +0.22%) [ +0.17% +0.04% +0.00% / +0.22% +0.43% +0.47%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.325 ms / 100) 5.159 -> 5.161 ( +0.04%) [ +0.00% +0.02% +0.04% / +0.04% +0.66% +0.68%] index_select const : Elapsed 0.052 ms (5.159 ms / 100) 5.145 -> 5.151 ( +0.12%) [ +0.14% +0.16% +0.00% / +0.12% +0.58% +0.62%] index_select wrap : Elapsed 0.052 ms (5.152 ms / 100) 5.176 -> 5.183 ( +0.14%) [ +0.00% +0.02% +0.14% / +0.14% +0.52% +0.62%] index_select linear : Elapsed 0.052 ms (5.176 ms / 100) 5.157 -> 5.154 ( -0.06%) [ +0.04% +0.00% +0.00% / -0.06% +0.45% +0.56%] index_select reverse : Elapsed 0.052 ms (5.159 ms / 100) 5.130 -> 5.129 ( -0.02%) [ +0.02% +0.02% +0.00% / -0.02% +0.68% +0.64%] index_select skip64 : Elapsed 0.051 ms (5.131 ms / 100) 5.126 -> 5.130 ( +0.08%) [ +0.16% +0.10% +0.00% / +0.08% +2.01% +0.80%] index_select skip256 : Elapsed 0.051 ms (5.134 ms / 100) 5.146 -> 5.147 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.47% +0.51%] index_select spread : Elapsed 0.051 ms (5.146 ms / 100) 5.143 -> 5.151 ( +0.16%) [ +0.00% +0.14% +0.04% / +0.16% +0.66% +0.62%] index_select strided 3 : Elapsed 0.051 ms (5.143 ms / 100) 5.148 -> 5.156 ( +0.16%) [ +0.00% +0.14% +0.25% / +0.16% +0.78% +0.68%] index_select random : Elapsed 0.051 ms (5.148 ms / 100) 5.150 -> 5.147 ( -0.06%) [ +0.00% +0.02% +0.06% / -0.06% +0.54% +0.62%] index_select random_sorted : Elapsed 0.052 ms (5.150 ms / 100) B = [40, 20, 16, 4] (stride (1, 40, 800, 12800)) A = [40, 20, 5, 4] (stride (400, 1, 80, 20)) dim = 2 2.312 -> 2.321 ( +0.39%) [ +0.35% +0.48% +0.00% / +0.39% +2.85% +2.98%] index_add_ linear : Elapsed 0.023 ms (2.320 ms / 100) 2.276 -> 2.277 ( +0.04%) [ +0.00% +0.22% +0.18% / +0.04% +2.64% +2.77%] index_copy_ linear : Elapsed 0.023 ms (2.276 ms / 100) 2.317 -> 2.320 ( +0.13%) [ +0.00% +0.22% +0.04% / +0.13% +2.46% +2.55%] index_add_ reverse : Elapsed 0.023 ms (2.317 ms / 100) 2.276 -> 2.279 ( +0.13%) [ +0.48% +0.13% +0.00% / +0.13% +2.33% +2.46%] index_copy_ reverse : Elapsed 0.023 ms (2.287 ms / 100) 2.335 -> 2.337 ( +0.09%) [ +0.04% +0.09% +0.00% / +0.09% +2.10% +1.97%] index_add_ spread : Elapsed 0.023 ms (2.336 ms / 100) 2.292 -> 2.296 ( +0.17%) [ +0.00% +0.09% +0.00% / +0.17% +2.18% +2.05%] index_copy_ spread : Elapsed 0.023 ms (2.292 ms / 100) 2.333 -> 2.330 ( -0.13%) [ +0.00% +0.26% +0.04% / -0.13% +1.84% +2.06%] index_add_ strided 3 : Elapsed 0.023 ms (2.333 ms / 100) 2.288 -> 2.291 ( +0.13%) [ +0.00% +0.09% +0.17% / +0.13% +1.97% +1.84%] index_copy_ strided 3 : Elapsed 0.023 ms (2.288 ms / 100) 2.321 -> 2.329 ( +0.34%) [ +0.47% +0.26% +0.00% / +0.34% +2.59% +2.24%] index_add_ strided 5 : Elapsed 0.023 ms (2.332 ms / 100) 2.286 -> 2.283 ( -0.13%) [ +0.09% +0.04% +0.00% / -0.13% +2.27% +2.19%] index_copy_ strided 5 : Elapsed 0.023 ms (2.288 ms / 100) 2.325 -> 2.316 ( -0.39%) [ +0.13% +0.22% +0.00% / -0.39% +2.02% +2.06%] index_add_ strided 7 : Elapsed 0.023 ms (2.328 ms / 100) 2.277 -> 2.286 ( +0.40%) [ +0.48% +0.48% +0.00% / +0.40% +2.06% +2.15%] index_copy_ strided 7 : Elapsed 0.023 ms (2.288 ms / 100) 2.332 -> 2.336 ( +0.17%) [ +0.13% +0.21% +0.00% / +0.17% +1.67% +1.50%] index_add_ perm : Elapsed 0.023 ms (2.335 ms / 100) 2.287 -> 2.290 ( +0.13%) [ +0.22% +0.17% +0.00% / +0.13% +2.40% +1.88%] index_copy_ perm : Elapsed 0.023 ms (2.292 ms / 100) 2.335 -> 2.337 ( +0.09%) [ +0.17% +0.00% +0.13% / +0.09% +2.01% +2.06%] index_add_ perm_sorted : Elapsed 0.023 ms (2.339 ms / 100) 2.291 -> 2.290 ( -0.04%) [ +0.09% +0.00% +0.04% / -0.04% +1.92% +2.31%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.293 ms / 100) 4.908 -> 4.915 ( +0.14%) [ +0.00% +0.04% +0.04% / +0.14% +0.59% +0.69%] index_select const : Elapsed 0.049 ms (4.908 ms / 100) 4.925 -> 4.929 ( +0.08%) [ +0.10% +0.24% +0.00% / +0.08% +1.30% +1.28%] index_select wrap : Elapsed 0.049 ms (4.930 ms / 100) 4.948 -> 4.942 ( -0.12%) [ +0.06% +0.34% +0.00% / -0.12% +1.25% +1.31%] index_select linear : Elapsed 0.050 ms (4.951 ms / 100) 4.957 -> 4.952 ( -0.10%) [ +0.00% +0.06% +0.04% / -0.10% +1.21% +1.15%] index_select reverse : Elapsed 0.050 ms (4.957 ms / 100) 4.916 -> 4.925 ( +0.18%) [ +0.00% +0.02% +0.06% / +0.18% +0.65% +0.71%] index_select skip64 : Elapsed 0.049 ms (4.916 ms / 100) 4.906 -> 4.910 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.82% +0.86%] index_select skip256 : Elapsed 0.049 ms (4.910 ms / 100) 4.939 -> 4.943 ( +0.08%) [ +0.00% +0.14% +0.14% / +0.08% +1.54% +1.40%] index_select spread : Elapsed 0.049 ms (4.939 ms / 100) 4.941 -> 4.946 ( +0.10%) [ +0.18% +0.08% +0.00% / +0.10% +3.58% +1.34%] index_select strided 3 : Elapsed 0.049 ms (4.950 ms / 100) 4.934 -> 4.941 ( +0.14%) [ +0.14% +0.00% +0.24% / +0.14% +1.34% +1.60%] index_select random : Elapsed 0.049 ms (4.941 ms / 100) 4.912 -> 4.911 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +1.59% +1.55%] index_select random_sorted : Elapsed 0.049 ms (4.912 ms / 100) out_shape = [40, 20, 5, 16] in_shape = [40, 20, 5, 4] idx_dim = 3 B = [40, 20, 5, 16] (stride (1600, 16, 320, 1)) dim = 3 fill_cnt = 4 1.443 -> 1.446 ( +0.21%) [ +0.69% +0.35% +0.00% / +0.21% +1.32% +1.32%] index_fill_ const : Elapsed 0.015 ms (1.453 ms / 100) 1.441 -> 1.441 ( +0.00%) [ +0.28% +0.07% +0.00% / +0.00% +1.53% +1.46%] index_fill_ linear : Elapsed 0.014 ms (1.445 ms / 100) 1.439 -> 1.445 ( +0.42%) [ +0.14% +0.00% +0.14% / +0.42% +1.67% +1.74%] index_fill_ reverse : Elapsed 0.014 ms (1.441 ms / 100) 1.442 -> 1.442 ( +0.00%) [ +0.07% +0.21% +0.00% / +0.00% +1.60% +1.39%] index_fill_ skip64 : Elapsed 0.014 ms (1.443 ms / 100) 1.442 -> 1.441 ( -0.07%) [ +0.14% +0.00% +0.14% / -0.07% +1.39% +1.25%] index_fill_ skip256 : Elapsed 0.014 ms (1.444 ms / 100) 1.536 -> 1.536 ( +0.00%) [ +0.13% +0.65% +0.00% / +0.00% +1.69% +1.56%] index_fill_ spread : Elapsed 0.015 ms (1.538 ms / 100) 1.535 -> 1.538 ( +0.20%) [ +0.00% +0.20% +0.07% / +0.20% +1.63% +1.56%] index_fill_ strided 3 : Elapsed 0.015 ms (1.535 ms / 100) 1.533 -> 1.532 ( -0.07%) [ +0.00% +0.13% +0.33% / -0.07% +1.83% +1.70%] index_fill_ strided 5 : Elapsed 0.015 ms (1.533 ms / 100) 1.534 -> 1.534 ( +0.00%) [ +0.39% +0.00% +0.07% / +0.00% +1.50% +1.63%] index_fill_ strided 7 : Elapsed 0.015 ms (1.540 ms / 100) 1.536 -> 1.536 ( +0.00%) [ +0.20% +0.00% +0.26% / +0.00% +1.56% +1.95%] index_fill_ strided 8 : Elapsed 0.015 ms (1.539 ms / 100) 1.537 -> 1.540 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +1.50% +1.63%] index_fill_ random : Elapsed 0.015 ms (1.538 ms / 100) 1.534 -> 1.534 ( +0.00%) [ +0.46% +0.00% +0.26% / +0.00% +2.09% +1.83%] index_fill_ random_sorted : Elapsed 0.015 ms (1.541 ms / 100) 1.533 -> 1.533 ( +0.00%) [ +0.00% +0.39% +0.07% / +0.00% +2.28% +1.89%] index_fill_ perm : Elapsed 0.015 ms (1.533 ms / 100) 1.534 -> 1.533 ( -0.07%) [ +0.26% +0.00% +0.13% / -0.07% +1.63% +1.63%] index_fill_ perm_sorted : Elapsed 0.015 ms (1.538 ms / 100) B = [40, 20, 5, 16] (stride (1600, 5, 1, 100)) A = [40, 20, 5, 4] (stride (1, 800, 160, 40)) dim = 3 1.120 -> 1.127 ( +0.62%) [ +0.45% +0.18% +0.00% / +0.62% +1.61% +1.16%] index_add_ linear : Elapsed 0.011 ms (1.125 ms / 100) 1.101 -> 1.109 ( +0.73%) [ +0.54% +0.00% +0.09% / +0.73% +1.91% +1.54%] index_copy_ linear : Elapsed 0.011 ms (1.107 ms / 100) 1.119 -> 1.122 ( +0.27%) [ +0.00% +0.00% +0.09% / +0.27% +1.79% +1.61%] index_add_ reverse : Elapsed 0.011 ms (1.119 ms / 100) 1.099 -> 1.102 ( +0.27%) [ +0.09% +0.27% +0.00% / +0.27% +1.82% +2.00%] index_copy_ reverse : Elapsed 0.011 ms (1.100 ms / 100) 1.133 -> 1.127 ( -0.53%) [ +0.00% +0.26% +0.09% / -0.53% +0.79% +0.79%] index_add_ spread : Elapsed 0.011 ms (1.133 ms / 100) 1.111 -> 1.111 ( +0.00%) [ +0.09% +0.00% +0.18% / +0.00% +1.35% +1.62%] index_copy_ spread : Elapsed 0.011 ms (1.112 ms / 100) 1.124 -> 1.129 ( +0.44%) [ +0.44% +0.00% +0.53% / +0.44% +1.51% +1.96%] index_add_ strided 3 : Elapsed 0.011 ms (1.129 ms / 100) 1.107 -> 1.110 ( +0.27%) [ +0.27% +0.00% +0.09% / +0.27% +1.72% +1.81%] index_copy_ strided 3 : Elapsed 0.011 ms (1.110 ms / 100) 1.130 -> 1.129 ( -0.09%) [ +0.18% +0.27% +0.00% / -0.09% +0.53% +0.53%] index_add_ strided 5 : Elapsed 0.011 ms (1.132 ms / 100) 1.110 -> 1.110 ( +0.00%) [ +0.00% +0.36% +0.09% / +0.00% +0.45% +0.36%] index_copy_ strided 5 : Elapsed 0.011 ms (1.110 ms / 100) 1.125 -> 1.124 ( -0.09%) [ +0.00% +0.62% +0.00% / -0.09% +2.04% +1.33%] index_add_ strided 7 : Elapsed 0.011 ms (1.125 ms / 100) 1.106 -> 1.108 ( +0.18%) [ +0.45% +0.45% +0.00% / +0.18% +1.90% +2.35%] index_copy_ strided 7 : Elapsed 0.011 ms (1.111 ms / 100) 1.127 -> 1.128 ( +0.09%) [ +0.00% +0.18% +0.18% / +0.09% +0.98% +0.80%] index_add_ perm : Elapsed 0.011 ms (1.127 ms / 100) 1.106 -> 1.104 ( -0.18%) [ +0.18% +0.00% +0.27% / -0.18% +0.90% +0.90%] index_copy_ perm : Elapsed 0.011 ms (1.108 ms / 100) 1.124 -> 1.125 ( +0.09%) [ +0.27% +0.00% +0.27% / +0.09% +0.98% +1.07%] index_add_ perm_sorted : Elapsed 0.011 ms (1.127 ms / 100) 1.100 -> 1.103 ( +0.27%) [ +0.64% +0.00% +0.45% / +0.27% +1.45% +1.82%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.107 ms / 100) 1.987 -> 1.992 ( +0.25%) [ +0.25% +0.30% +0.00% / +0.25% +1.56% +1.41%] index_select const : Elapsed 0.020 ms (1.992 ms / 100) 2.010 -> 2.010 ( +0.00%) [ +0.15% +0.05% +0.00% / +0.00% +1.39% +1.49%] index_select wrap : Elapsed 0.020 ms (2.013 ms / 100) 2.023 -> 2.026 ( +0.15%) [ +0.25% +0.05% +0.00% / +0.15% +1.29% +1.29%] index_select linear : Elapsed 0.020 ms (2.028 ms / 100) 1.997 -> 2.001 ( +0.20%) [ +0.00% +0.10% +0.15% / +0.20% +1.05% +1.00%] index_select reverse : Elapsed 0.020 ms (1.997 ms / 100) 1.998 -> 2.000 ( +0.10%) [ +0.00% +0.20% +0.05% / +0.10% +0.50% +0.55%] index_select skip64 : Elapsed 0.020 ms (1.998 ms / 100) 1.996 -> 1.997 ( +0.05%) [ +0.00% +0.15% +0.35% / +0.05% +0.75% +0.60%] index_select skip256 : Elapsed 0.020 ms (1.996 ms / 100) 2.004 -> 2.005 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.70% +0.90%] index_select spread : Elapsed 0.020 ms (2.004 ms / 100) 2.011 -> 2.006 ( -0.25%) [ +0.00% +0.05% +0.05% / -0.25% +1.19% +1.39%] index_select strided 3 : Elapsed 0.020 ms (2.011 ms / 100) 2.015 -> 2.016 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.84% +1.14%] index_select random : Elapsed 0.020 ms (2.017 ms / 100) 2.001 -> 2.002 ( +0.05%) [ +0.00% +0.15% +0.00% / +0.05% +0.80% +1.05%] index_select random_sorted : Elapsed 0.020 ms (2.001 ms / 100) B = [40, 20, 5, 16] (stride (5, 3200, 1, 200)) A = [40, 20, 5, 4] (stride (400, 1, 20, 100)) dim = 3 2.478 -> 2.478 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.40% +0.32%] index_add_ linear : Elapsed 0.025 ms (2.478 ms / 100) 2.428 -> 2.428 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.41% +0.25%] index_copy_ linear : Elapsed 0.024 ms (2.429 ms / 100) 2.477 -> 2.476 ( -0.04%) [ +0.00% +0.00% +0.20% / -0.04% +0.12% +0.08%] index_add_ reverse : Elapsed 0.025 ms (2.477 ms / 100) 2.427 -> 2.430 ( +0.12%) [ +0.08% +0.12% +0.00% / +0.12% +0.29% +0.25%] index_copy_ reverse : Elapsed 0.024 ms (2.429 ms / 100) 2.475 -> 2.473 ( -0.08%) [ +0.08% +0.04% +0.00% / +0.00% -0.08% +0.04%] index_add_ spread : Elapsed 0.025 ms (2.477 ms / 100) 2.423 -> 2.422 ( -0.04%) [ +0.00% +0.08% +0.04% / +0.08% -0.04% +0.17%] index_copy_ spread : Elapsed 0.024 ms (2.423 ms / 100) 2.482 -> 2.483 ( +0.04%) [ +0.16% +0.08% +0.00% / +0.20% +0.04% +0.04%] index_add_ strided 3 : Elapsed 0.025 ms (2.486 ms / 100) 2.429 -> 2.431 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.16% +0.29%] index_copy_ strided 3 : Elapsed 0.024 ms (2.431 ms / 100) 2.472 -> 2.475 ( +0.12%) [ +0.28% +0.00% +0.24% / +0.20% +0.12% +0.28%] index_add_ strided 5 : Elapsed 0.025 ms (2.479 ms / 100) 2.425 -> 2.426 ( +0.04%) [ +0.00% +0.00% +0.12% / +0.08% +0.04% +0.08%] index_copy_ strided 5 : Elapsed 0.024 ms (2.425 ms / 100) 2.472 -> 2.473 ( +0.04%) [ +0.00% +0.08% +0.28% / +0.04% +0.24% +0.12%] index_add_ strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.17% +0.17% +0.00% / +0.12% +0.29% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.423 ms / 100) 2.476 -> 2.475 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.36% +0.36%] index_add_ perm : Elapsed 0.025 ms (2.476 ms / 100) 2.429 -> 2.429 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.33% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.431 ms / 100) 2.476 -> 2.472 ( -0.16%) [ +0.20% +0.00% +0.04% / -0.16% +0.08% +0.04%] index_add_ perm_sorted : Elapsed 0.025 ms (2.481 ms / 100) 2.424 -> 2.423 ( -0.04%) [ +0.29% +0.00% +0.33% / -0.04% +0.21% +0.12%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.431 ms / 100) 5.301 -> 5.309 ( +0.15%) [ +0.00% +0.28% +0.30% / +0.15% +0.66% +0.62%] index_select const : Elapsed 0.053 ms (5.301 ms / 100) 5.336 -> 5.342 ( +0.11%) [ +0.00% +0.13% +0.19% / +0.11% +0.11% +0.15%] index_select wrap : Elapsed 0.053 ms (5.336 ms / 100) 5.332 -> 5.342 ( +0.19%) [ +0.23% +0.30% +0.00% / +0.19% +0.49% +0.45%] index_select linear : Elapsed 0.053 ms (5.344 ms / 100) 5.331 -> 5.344 ( +0.24%) [ +0.00% +0.00% +0.09% / +0.30% +0.36% +0.24%] index_select reverse : Elapsed 0.053 ms (5.331 ms / 100) 5.302 -> 5.294 ( -0.15%) [ +0.00% +0.15% +0.00% / -0.15% +0.21% +0.19%] index_select skip64 : Elapsed 0.053 ms (5.302 ms / 100) 5.294 -> 5.301 ( +0.13%) [ +0.00% +0.23% +0.08% / +0.13% +0.47% +0.40%] index_select skip256 : Elapsed 0.053 ms (5.294 ms / 100) 5.320 -> 5.321 ( +0.02%) [ +0.19% +0.09% +0.00% / +0.02% +0.24% +0.32%] index_select spread : Elapsed 0.053 ms (5.330 ms / 100) 5.326 -> 5.327 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.24% +0.28%] index_select strided 3 : Elapsed 0.053 ms (5.326 ms / 100) 5.319 -> 5.324 ( +0.09%) [ +0.04% +0.24% +0.00% / +0.09% +0.45% +0.30%] index_select random : Elapsed 0.053 ms (5.321 ms / 100) 5.314 -> 5.319 ( +0.09%) [ +0.28% +0.11% +0.00% / +0.09% +0.68% +0.66%] index_select random_sorted : Elapsed 0.053 ms (5.329 ms / 100) B = [40, 20, 5, 16] (stride (5, 200, 1, 4000)) A = [40, 20, 5, 4] (stride (400, 4, 80, 1)) dim = 3 2.496 -> 2.493 ( -0.12%) [ +0.24% +0.00% +0.08% / -0.12% +0.48% +0.52%] index_add_ linear : Elapsed 0.025 ms (2.502 ms / 100) 2.447 -> 2.442 ( -0.20%) [ +0.00% +0.12% +0.08% / -0.20% +0.25% +0.25%] index_copy_ linear : Elapsed 0.024 ms (2.447 ms / 100) 2.495 -> 2.494 ( -0.04%) [ +0.00% +0.16% +0.12% / -0.04% +0.56% +0.44%] index_add_ reverse : Elapsed 0.025 ms (2.495 ms / 100) 2.444 -> 2.445 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.65% +0.49%] index_copy_ reverse : Elapsed 0.024 ms (2.447 ms / 100) 2.496 -> 2.498 ( +0.08%) [ +0.20% +0.00% +0.12% / +0.08% +0.48% +0.36%] index_add_ spread : Elapsed 0.025 ms (2.501 ms / 100) 2.446 -> 2.445 ( -0.04%) [ +0.04% +0.08% +0.00% / +0.08% +0.00% -0.04%] index_copy_ spread : Elapsed 0.024 ms (2.447 ms / 100) 2.494 -> 2.498 ( +0.16%) [ +0.16% +0.04% +0.00% / +0.16% +0.52% +0.60%] index_add_ strided 3 : Elapsed 0.025 ms (2.498 ms / 100) 2.443 -> 2.447 ( +0.16%) [ +0.25% +0.29% +0.00% / +0.16% +0.61% +0.45%] index_copy_ strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.495 -> 2.497 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.48% +0.28%] index_add_ strided 5 : Elapsed 0.025 ms (2.498 ms / 100) 2.444 -> 2.446 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.08% +0.45% +0.29%] index_copy_ strided 5 : Elapsed 0.024 ms (2.445 ms / 100) 2.496 -> 2.499 ( +0.12%) [ +0.28% +0.12% +0.00% / +0.12% +0.36% +0.36%] index_add_ strided 7 : Elapsed 0.025 ms (2.503 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.00% +0.20% +0.08% / +0.16% +0.08% +0.33%] index_copy_ strided 7 : Elapsed 0.024 ms (2.445 ms / 100) 2.494 -> 2.496 ( +0.08%) [ +0.24% +0.00% +0.08% / +0.08% +0.12% +0.08%] index_add_ perm : Elapsed 0.025 ms (2.500 ms / 100) 2.442 -> 2.447 ( +0.20%) [ +0.16% +0.00% +0.16% / +0.20% +0.29% +0.20%] index_copy_ perm : Elapsed 0.024 ms (2.446 ms / 100) 2.491 -> 2.495 ( +0.16%) [ +0.16% +0.28% +0.00% / +0.16% +0.40% +0.32%] index_add_ perm_sorted : Elapsed 0.025 ms (2.495 ms / 100) 2.438 -> 2.443 ( +0.21%) [ +0.37% +0.45% +0.00% / +0.21% +0.66% +0.49%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.447 ms / 100) 5.466 -> 5.472 ( +0.11%) [ +0.00% +0.13% +0.26% / +0.11% +0.40% +0.51%] index_select const : Elapsed 0.055 ms (5.466 ms / 100) 5.473 -> 5.471 ( -0.04%) [ +0.02% +0.05% +0.00% / -0.04% +0.37% +0.31%] index_select wrap : Elapsed 0.055 ms (5.474 ms / 100) 5.463 -> 5.476 ( +0.24%) [ +0.38% +0.00% +0.38% / +0.24% +0.62% +0.64%] index_select linear : Elapsed 0.055 ms (5.484 ms / 100) 5.474 -> 5.478 ( +0.07%) [ +0.11% +0.11% +0.00% / +0.07% +0.35% +0.31%] index_select reverse : Elapsed 0.055 ms (5.480 ms / 100) 5.479 -> 5.478 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.18% +0.13%] index_select skip64 : Elapsed 0.055 ms (5.479 ms / 100) 5.483 -> 5.474 ( -0.16%) [ +0.00% +0.02% +0.00% / -0.16% +0.31% +0.22%] index_select skip256 : Elapsed 0.055 ms (5.483 ms / 100) 5.465 -> 5.479 ( +0.26%) [ +0.26% +0.09% +0.00% / +0.26% +0.44% +0.38%] index_select spread : Elapsed 0.055 ms (5.479 ms / 100) 5.473 -> 5.484 ( +0.20%) [ +0.00% +0.20% +0.11% / +0.20% +0.27% +0.40%] index_select strided 3 : Elapsed 0.055 ms (5.473 ms / 100) 5.472 -> 5.480 ( +0.15%) [ +0.00% +0.18% +0.13% / +0.15% +0.37% +0.35%] index_select random : Elapsed 0.055 ms (5.472 ms / 100) 5.467 -> 5.467 ( +0.00%) [ +0.20% +0.00% +0.20% / +0.00% +0.29% +0.49%] index_select random_sorted : Elapsed 0.055 ms (5.478 ms / 100) B = [40, 20, 5, 16] (stride (1, 200, 40, 4000)) A = [40, 20, 5, 4] (stride (400, 20, 1, 5)) dim = 3 1.153 -> 1.159 ( +0.52%) [ +0.35% +0.61% +0.00% / +0.52% +2.08% +1.73%] index_add_ linear : Elapsed 0.012 ms (1.157 ms / 100) 1.145 -> 1.148 ( +0.26%) [ +0.00% +0.09% +0.17% / +0.26% +1.75% +1.57%] index_copy_ linear : Elapsed 0.011 ms (1.145 ms / 100) 1.157 -> 1.155 ( -0.17%) [ +0.17% +0.00% +0.09% / -0.17% +1.64% +1.64%] index_add_ reverse : Elapsed 0.012 ms (1.159 ms / 100) 1.143 -> 1.147 ( +0.35%) [ +0.17% +0.00% +0.26% / +0.35% +1.75% +2.01%] index_copy_ reverse : Elapsed 0.011 ms (1.145 ms / 100) 1.138 -> 1.143 ( +0.44%) [ +0.00% +0.35% +0.62% / +0.44% +2.46% +2.99%] index_add_ spread : Elapsed 0.011 ms (1.138 ms / 100) 1.136 -> 1.139 ( +0.26%) [ +0.00% +0.26% +0.09% / +0.26% +2.02% +1.85%] index_copy_ spread : Elapsed 0.011 ms (1.136 ms / 100) 1.155 -> 1.156 ( +0.09%) [ +0.17% +0.35% +0.00% / +0.35% +0.52% +0.09%] index_add_ strided 3 : Elapsed 0.012 ms (1.157 ms / 100) 1.145 -> 1.144 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.35% +0.52%] index_copy_ strided 3 : Elapsed 0.011 ms (1.146 ms / 100) 1.133 -> 1.138 ( +0.44%) [ +0.00% +0.26% +0.35% / +0.44% +2.03% +2.29%] index_add_ strided 5 : Elapsed 0.011 ms (1.133 ms / 100) 1.124 -> 1.128 ( +0.36%) [ +0.27% +0.00% +0.18% / +0.36% +2.14% +1.96%] index_copy_ strided 5 : Elapsed 0.011 ms (1.127 ms / 100) 1.137 -> 1.134 ( -0.26%) [ +0.00% +0.44% +0.00% / -0.26% +2.29% +2.29%] index_add_ strided 7 : Elapsed 0.011 ms (1.137 ms / 100) 1.125 -> 1.124 ( -0.09%) [ +0.27% +0.18% +0.00% / -0.09% +1.60% +2.04%] index_copy_ strided 7 : Elapsed 0.011 ms (1.128 ms / 100) 1.167 -> 1.165 ( -0.17%) [ +0.00% +0.43% +0.00% / -0.17% +0.34% +0.17%] index_add_ perm : Elapsed 0.012 ms (1.167 ms / 100) 1.145 -> 1.149 ( +0.35%) [ +0.00% +0.17% +0.00% / +0.35% +1.66% +1.05%] index_copy_ perm : Elapsed 0.011 ms (1.145 ms / 100) 1.153 -> 1.154 ( +0.09%) [ +0.43% +0.35% +0.00% / +0.09% +2.34% +2.34%] index_add_ perm_sorted : Elapsed 0.012 ms (1.158 ms / 100) 1.144 -> 1.145 ( +0.09%) [ +0.26% +0.00% +0.09% / +0.09% +1.49% +1.31%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.147 ms / 100) 2.051 -> 2.057 ( +0.29%) [ +0.00% +0.24% +0.05% / +0.29% +0.59% +0.68%] index_select const : Elapsed 0.021 ms (2.051 ms / 100) 2.076 -> 2.076 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.58% +0.63%] index_select wrap : Elapsed 0.021 ms (2.076 ms / 100) 2.072 -> 2.077 ( +0.24%) [ +0.05% +0.00% +0.10% / +0.24% +0.58% +0.97%] index_select linear : Elapsed 0.021 ms (2.073 ms / 100) 2.056 -> 2.063 ( +0.34%) [ +0.19% +0.00% +0.39% / +0.34% +0.78% +1.17%] index_select reverse : Elapsed 0.021 ms (2.060 ms / 100) 2.045 -> 2.042 ( -0.15%) [ +0.00% +0.15% +0.00% / -0.15% +0.68% +0.68%] index_select skip64 : Elapsed 0.020 ms (2.045 ms / 100) 2.043 -> 2.045 ( +0.10%) [ +0.00% +0.05% +0.05% / +0.10% +0.93% +0.98%] index_select skip256 : Elapsed 0.020 ms (2.043 ms / 100) 2.072 -> 2.073 ( +0.05%) [ +0.05% +0.00% +0.29% / +0.05% +0.72% +1.16%] index_select spread : Elapsed 0.021 ms (2.073 ms / 100) 2.064 -> 2.067 ( +0.15%) [ +0.15% +0.00% +0.10% / +0.15% +0.63% +0.63%] index_select strided 3 : Elapsed 0.021 ms (2.067 ms / 100) 2.074 -> 2.077 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.72% +0.92%] index_select random : Elapsed 0.021 ms (2.074 ms / 100) 2.069 -> 2.068 ( -0.05%) [ +0.00% +0.14% +0.10% / -0.05% +1.01% +1.01%] index_select random_sorted : Elapsed 0.021 ms (2.069 ms / 100) B = [40, 20, 5, 16] (stride (1, 40, 800, 4000)) A = [40, 20, 5, 4] (stride (20, 800, 1, 5)) dim = 3 2.528 -> 2.527 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.47% +0.20%] index_add_ linear : Elapsed 0.025 ms (2.528 ms / 100) 2.481 -> 2.484 ( +0.12%) [ +0.24% +0.12% +0.00% / +0.12% +0.40% +0.36%] index_copy_ linear : Elapsed 0.025 ms (2.487 ms / 100) 2.524 -> 2.527 ( +0.12%) [ +0.12% +0.00% +0.24% / +0.12% +0.40% +0.48%] index_add_ reverse : Elapsed 0.025 ms (2.527 ms / 100) 2.478 -> 2.480 ( +0.08%) [ +0.00% +0.20% +0.20% / +0.08% +0.52% +0.61%] index_copy_ reverse : Elapsed 0.025 ms (2.478 ms / 100) 2.522 -> 2.523 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.32% +0.40%] index_add_ spread : Elapsed 0.025 ms (2.523 ms / 100) 2.479 -> 2.480 ( +0.04%) [ +0.00% +0.28% +0.00% / +0.04% +0.32% +0.48%] index_copy_ spread : Elapsed 0.025 ms (2.479 ms / 100) 2.520 -> 2.520 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.67% +0.67%] index_add_ strided 3 : Elapsed 0.025 ms (2.522 ms / 100) 2.475 -> 2.478 ( +0.12%) [ +0.28% +0.12% +0.00% / +0.12% +0.69% +0.61%] index_copy_ strided 3 : Elapsed 0.025 ms (2.482 ms / 100) 2.523 -> 2.522 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.44% +0.36%] index_add_ strided 5 : Elapsed 0.025 ms (2.524 ms / 100) 2.477 -> 2.482 ( +0.20%) [ +0.36% +0.08% +0.00% / +0.20% +0.52% +0.32%] index_copy_ strided 5 : Elapsed 0.025 ms (2.486 ms / 100) 2.526 -> 2.529 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.16% +0.28%] index_add_ strided 7 : Elapsed 0.025 ms (2.528 ms / 100) 2.482 -> 2.484 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.20% +0.08% +0.20%] index_copy_ strided 7 : Elapsed 0.025 ms (2.484 ms / 100) 2.519 -> 2.522 ( +0.12%) [ +0.20% +0.16% +0.00% / +0.12% +0.12% +0.36%] index_add_ perm : Elapsed 0.025 ms (2.524 ms / 100) 2.474 -> 2.479 ( +0.20%) [ +0.40% +0.28% +0.00% / +0.20% +0.44% +0.49%] index_copy_ perm : Elapsed 0.025 ms (2.484 ms / 100) 2.524 -> 2.524 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.04% +0.08%] index_add_ perm_sorted : Elapsed 0.025 ms (2.526 ms / 100) 2.480 -> 2.481 ( +0.04%) [ +0.12% +0.20% +0.00% / +0.04% +0.32% +0.24%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.483 ms / 100) 5.503 -> 5.517 ( +0.25%) [ +0.00% +0.05% +0.11% / +0.25% +0.35% +0.44%] index_select const : Elapsed 0.055 ms (5.503 ms / 100) 5.508 -> 5.508 ( +0.00%) [ +0.04% +0.02% +0.00% / +0.00% +0.45% +0.18%] index_select wrap : Elapsed 0.055 ms (5.510 ms / 100) 5.518 -> 5.534 ( +0.29%) [ +0.25% +0.38% +0.00% / +0.29% +0.43% +0.36%] index_select linear : Elapsed 0.055 ms (5.532 ms / 100) 5.488 -> 5.495 ( +0.13%) [ +0.16% +0.15% +0.00% / +0.13% +0.44% +0.36%] index_select reverse : Elapsed 0.055 ms (5.497 ms / 100) 5.485 -> 5.502 ( +0.31%) [ +0.11% +0.29% +0.00% / +0.31% +0.33% +0.44%] index_select skip64 : Elapsed 0.055 ms (5.491 ms / 100) 5.476 -> 5.500 ( +0.44%) [ +0.29% +0.00% +0.26% / +0.47% +0.69% +0.44%] index_select skip256 : Elapsed 0.055 ms (5.492 ms / 100) 5.514 -> 5.516 ( +0.04%) [ +0.02% +0.00% +0.02% / +0.04% +0.15% +0.34%] index_select spread : Elapsed 0.055 ms (5.515 ms / 100) 5.496 -> 5.510 ( +0.25%) [ +0.00% +0.25% +0.02% / +0.25% +0.29% +0.44%] index_select strided 3 : Elapsed 0.055 ms (5.496 ms / 100) 5.492 -> 5.497 ( +0.09%) [ +0.09% +0.18% +0.00% / +0.09% +0.33% +0.56%] index_select random : Elapsed 0.055 ms (5.497 ms / 100) 5.508 -> 5.515 ( +0.13%) [ +0.07% +0.00% +0.13% / +0.13% +0.31% +0.42%] index_select random_sorted : Elapsed 0.055 ms (5.512 ms / 100) out_shape = [20, 5, 16, 40] in_shape = [4, 5, 16, 40] idx_dim = 0 B = [20, 5, 16, 40] (stride (3200, 1, 200, 5)) A = [4, 5, 16, 40] (stride (80, 1, 5, 320)) dim = 0 2.100 -> 2.098 ( -0.10%) [ +0.29% +0.05% +0.00% / -0.10% +0.10% +0.19%] index_add_ linear : Elapsed 0.021 ms (2.106 ms / 100) 2.036 -> 2.030 ( -0.29%) [ +0.29% +0.00% +0.05% / -0.29% +0.05% +0.29%] index_copy_ linear : Elapsed 0.020 ms (2.042 ms / 100) 2.092 -> 2.096 ( +0.19%) [ +0.24% +0.33% +0.00% / +0.19% +0.53% +0.53%] index_add_ reverse : Elapsed 0.021 ms (2.097 ms / 100) 2.034 -> 2.033 ( -0.05%) [ +0.00% +0.10% +0.10% / -0.05% +0.25% +0.20%] index_copy_ reverse : Elapsed 0.020 ms (2.034 ms / 100) 2.102 -> 2.101 ( -0.05%) [ +0.00% +0.14% +0.00% / -0.05% +0.05% +0.14%] index_add_ spread : Elapsed 0.021 ms (2.102 ms / 100) 2.034 -> 2.041 ( +0.34%) [ +0.29% +0.00% +0.39% / +0.34% +0.34% +0.39%] index_copy_ spread : Elapsed 0.020 ms (2.040 ms / 100) 2.101 -> 2.103 ( +0.10%) [ +0.14% +0.00% +0.14% / +0.10% +0.38% +0.38%] index_add_ strided 3 : Elapsed 0.021 ms (2.104 ms / 100) 2.033 -> 2.040 ( +0.34%) [ +0.05% +0.34% +0.00% / +0.34% +0.49% +0.39%] index_copy_ strided 3 : Elapsed 0.020 ms (2.034 ms / 100) 2.097 -> 2.095 ( -0.10%) [ +0.00% +0.00% +0.14% / -0.10% +0.29% +0.14%] index_add_ strided 7 : Elapsed 0.021 ms (2.097 ms / 100) 2.030 -> 2.034 ( +0.20%) [ +0.00% +0.20% +0.30% / +0.20% +0.59% +0.54%] index_copy_ strided 7 : Elapsed 0.020 ms (2.030 ms / 100) 2.101 -> 2.098 ( -0.14%) [ +0.00% +0.19% +0.19% / +0.29% -0.05% -0.14%] index_add_ perm : Elapsed 0.021 ms (2.101 ms / 100) 2.041 -> 2.038 ( -0.15%) [ +0.00% +0.05% +0.00% / +0.05% -0.15% -0.15%] index_copy_ perm : Elapsed 0.020 ms (2.041 ms / 100) 2.101 -> 2.096 ( -0.24%) [ +0.00% +0.10% +0.00% / +0.05% -0.24% -0.19%] index_add_ perm_sorted : Elapsed 0.021 ms (2.101 ms / 100) 2.037 -> 2.030 ( -0.34%) [ +0.15% +0.00% +0.10% / +0.15% -0.34% -0.20%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.040 ms / 100) 8.729 -> 8.725 ( -0.05%) [ +0.02% +0.06% +0.00% / -0.05% +0.11% +0.15%] index_select const : Elapsed 0.087 ms (8.731 ms / 100) 8.757 -> 8.761 ( +0.05%) [ +0.02% +0.00% +0.03% / +0.05% +0.33% +0.23%] index_select wrap : Elapsed 0.088 ms (8.759 ms / 100) 8.760 -> 8.757 ( -0.03%) [ +0.00% +0.03% +0.19% / -0.03% +0.09% +0.06%] index_select linear : Elapsed 0.088 ms (8.760 ms / 100) 8.769 -> 8.777 ( +0.09%) [ +0.06% +0.00% +0.16% / +0.09% +0.18% +0.24%] index_select reverse : Elapsed 0.088 ms (8.774 ms / 100) 8.722 -> 8.726 ( +0.05%) [ +0.17% +0.00% +0.08% / +0.05% +0.05% +0.18%] index_select skip64 : Elapsed 0.087 ms (8.737 ms / 100) 8.731 -> 8.726 ( -0.06%) [ +0.08% +0.14% +0.00% / -0.01% -0.06% +0.10%] index_select skip256 : Elapsed 0.087 ms (8.738 ms / 100) 8.780 -> 8.789 ( +0.10%) [ +0.00% +0.10% +0.22% / +0.11% +0.10% +0.26%] index_select spread : Elapsed 0.088 ms (8.780 ms / 100) 8.768 -> 8.784 ( +0.18%) [ +0.01% +0.10% +0.00% / +0.26% +0.18% +0.18%] index_select strided 3 : Elapsed 0.088 ms (8.769 ms / 100) 8.757 -> 8.796 ( +0.45%) [ +0.38% +0.00% +0.33% / +0.54% +0.48% +0.45%] index_select random : Elapsed 0.088 ms (8.790 ms / 100) 8.781 -> 8.780 ( -0.01%) [ +0.03% +0.06% +0.00% / -0.01% +0.02% -0.01%] index_select random_sorted : Elapsed 0.088 ms (8.784 ms / 100) B = [20, 5, 16, 40] (stride (1, 12800, 20, 320)) A = [4, 5, 16, 40] (stride (40, 160, 800, 1)) dim = 0 2.123 -> 2.127 ( +0.19%) [ +0.19% +0.00% +0.28% / +0.42% +0.24% +0.19%] index_add_ linear : Elapsed 0.021 ms (2.127 ms / 100) 2.097 -> 2.096 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.19% +0.10%] index_copy_ linear : Elapsed 0.021 ms (2.097 ms / 100) 2.122 -> 2.125 ( +0.14%) [ +0.19% +0.47% +0.00% / +0.19% +0.28% +0.14%] index_add_ reverse : Elapsed 0.021 ms (2.126 ms / 100) 2.094 -> 2.093 ( -0.05%) [ +0.14% +0.00% +0.05% / -0.05% +0.29% +0.43%] index_copy_ reverse : Elapsed 0.021 ms (2.097 ms / 100) 2.170 -> 2.173 ( +0.14%) [ +0.00% +0.28% +0.18% / +0.14% +0.18% +0.18%] index_add_ spread : Elapsed 0.022 ms (2.170 ms / 100) 2.187 -> 2.187 ( +0.00%) [ +0.32% +0.27% +0.00% / +0.00% +0.37% +0.73%] index_copy_ spread : Elapsed 0.022 ms (2.194 ms / 100) 2.162 -> 2.162 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.46% +0.42%] index_add_ strided 3 : Elapsed 0.022 ms (2.162 ms / 100) 2.162 -> 2.163 ( +0.05%) [ +0.00% +0.32% +0.23% / +0.05% +0.14% +0.51%] index_copy_ strided 3 : Elapsed 0.022 ms (2.162 ms / 100) 2.179 -> 2.174 ( -0.23%) [ +0.18% +0.09% +0.00% / -0.23% +0.28% +0.37%] index_add_ strided 7 : Elapsed 0.022 ms (2.183 ms / 100) 2.200 -> 2.200 ( +0.00%) [ +0.23% +0.00% +0.09% / +0.00% +0.41% +0.36%] index_copy_ strided 7 : Elapsed 0.022 ms (2.205 ms / 100) 2.163 -> 2.157 ( -0.28%) [ +0.05% +0.00% +0.14% / -0.28% +0.00% +0.00%] index_add_ perm : Elapsed 0.022 ms (2.164 ms / 100) 2.157 -> 2.162 ( +0.23%) [ +0.37% +0.51% +0.00% / +0.23% +0.37% +0.37%] index_copy_ perm : Elapsed 0.022 ms (2.165 ms / 100) 2.158 -> 2.163 ( +0.23%) [ +0.23% +0.00% +0.60% / +0.23% +0.46% +0.37%] index_add_ perm_sorted : Elapsed 0.022 ms (2.163 ms / 100) 2.162 -> 2.164 ( +0.09%) [ +0.14% +0.19% +0.00% / +0.19% +0.09% +0.32%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.165 ms / 100) 9.238 -> 9.239 ( +0.01%) [ +0.00% +0.13% +0.18% / +0.01% +0.21% +0.10%] index_select const : Elapsed 0.092 ms (9.238 ms / 100) 9.314 -> 9.315 ( +0.01%) [ +0.01% +0.00% +0.14% / +0.02% +0.04% +0.01%] index_select wrap : Elapsed 0.093 ms (9.315 ms / 100) 9.271 -> 9.275 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.05% +0.08%] index_select linear : Elapsed 0.093 ms (9.271 ms / 100) 9.269 -> 9.296 ( +0.29%) [ +0.00% +0.16% +0.12% / +0.29% +0.47% +0.38%] index_select reverse : Elapsed 0.093 ms (9.269 ms / 100) 9.249 -> 9.249 ( +0.00%) [ +0.19% +0.00% +0.05% / +0.17% +0.00% +0.29%] index_select skip64 : Elapsed 0.093 ms (9.267 ms / 100) 9.239 -> 9.241 ( +0.02%) [ +0.00% +0.24% +0.26% / +0.17% +0.02% +0.29%] index_select skip256 : Elapsed 0.092 ms (9.239 ms / 100) 9.295 -> 9.297 ( +0.02%) [ +0.04% +0.13% +0.00% / +0.27% +0.02% +0.10%] index_select spread : Elapsed 0.093 ms (9.299 ms / 100) 9.302 -> 9.312 ( +0.11%) [ +0.33% +0.25% +0.00% / +0.11% +0.26% +0.22%] index_select strided 3 : Elapsed 0.093 ms (9.333 ms / 100) 9.299 -> 9.301 ( +0.02%) [ +0.12% +0.12% +0.00% / +0.02% +0.27% +0.27%] index_select random : Elapsed 0.093 ms (9.310 ms / 100) 9.296 -> 9.298 ( +0.02%) [ +0.10% +0.00% +0.04% / +0.02% +0.06% +0.11%] index_select random_sorted : Elapsed 0.093 ms (9.305 ms / 100) B = [20, 5, 16, 40] (stride (200, 40, 4000, 1)) dim = 0 fill_cnt = 4 0.974 -> 0.973 ( -0.10%) [ +0.00% +0.10% +0.21% / -0.10% +0.00% +0.41%] index_fill_ const : Elapsed 0.010 ms (0.974 ms / 100) 1.003 -> 1.001 ( -0.20%) [ +0.10% +0.00% +0.10% / -0.20% +0.90% +1.00%] index_fill_ linear : Elapsed 0.010 ms (1.004 ms / 100) 1.004 -> 1.005 ( +0.10%) [ +0.00% +0.40% +0.00% / +0.10% +0.70% +0.60%] index_fill_ reverse : Elapsed 0.010 ms (1.004 ms / 100) 0.974 -> 0.973 ( -0.10%) [ +0.10% +0.21% +0.00% / -0.10% +0.10% +0.51%] index_fill_ skip64 : Elapsed 0.010 ms (0.975 ms / 100) 0.975 -> 0.974 ( -0.10%) [ +0.10% +0.00% +0.00% / -0.10% +0.10% +0.10%] index_fill_ skip256 : Elapsed 0.010 ms (0.976 ms / 100) 0.988 -> 0.985 ( -0.30%) [ +0.00% +0.20% +0.00% / -0.30% +0.20% +0.20%] index_fill_ spread : Elapsed 0.010 ms (0.988 ms / 100) 1.006 -> 1.002 ( -0.40%) [ +0.30% +0.00% +0.10% / -0.30% -0.30% -0.40%] index_fill_ strided 3 : Elapsed 0.010 ms (1.009 ms / 100) 0.986 -> 0.985 ( -0.10%) [ +0.30% +0.20% +0.00% / -0.10% +0.81% +0.20%] index_fill_ strided 5 : Elapsed 0.010 ms (0.989 ms / 100) 0.988 -> 0.988 ( +0.00%) [ +0.10% +0.00% +0.30% / +0.00% +0.91% +0.61%] index_fill_ strided 7 : Elapsed 0.010 ms (0.989 ms / 100) 0.990 -> 0.988 ( -0.20%) [ +0.51% +0.30% +0.00% / +0.40% +0.20% -0.20%] index_fill_ strided 8 : Elapsed 0.010 ms (0.995 ms / 100) 0.986 -> 0.988 ( +0.20%) [ +0.30% +0.20% +0.00% / +0.20% +1.01% +0.30%] index_fill_ strided 16 : Elapsed 0.010 ms (0.989 ms / 100) 0.985 -> 0.988 ( +0.30%) [ +0.10% +0.00% +0.00% / +0.30% +1.02% +0.61%] index_fill_ random : Elapsed 0.010 ms (0.986 ms / 100) 0.985 -> 0.988 ( +0.30%) [ +0.30% +0.51% +0.00% / +0.30% +0.91% +0.81%] index_fill_ random_sorted : Elapsed 0.010 ms (0.988 ms / 100) 0.983 -> 0.988 ( +0.51%) [ +0.61% +0.10% +0.00% / +0.51% +1.53% +1.32%] index_fill_ perm : Elapsed 0.010 ms (0.989 ms / 100) 0.984 -> 0.987 ( +0.30%) [ +0.10% +0.20% +0.00% / +0.30% +1.83% +1.73%] index_fill_ perm_sorted : Elapsed 0.010 ms (0.985 ms / 100) B = [20, 5, 16, 40] (stride (200, 40, 4000, 1)) A = [4, 5, 16, 40] (stride (3200, 640, 40, 1)) dim = 0 1.901 -> 1.872 ( -1.53%) [ +0.00% +0.05% +0.26% / +0.21% -1.37% -1.53%] index_add_ linear : Elapsed 0.019 ms (1.901 ms / 100) 1.858 -> 1.829 ( -1.56%) [ +0.05% +0.00% +0.16% / +0.27% -1.51% -1.56%] index_copy_ linear : Elapsed 0.019 ms (1.859 ms / 100) 1.902 -> 1.877 ( -1.31%) [ +0.11% +0.05% +0.00% / +0.21% -1.31% -1.31%] index_add_ reverse : Elapsed 0.019 ms (1.904 ms / 100) 1.858 -> 1.827 ( -1.67%) [ +0.11% +0.00% +0.11% / +0.00% -1.61% -1.67%] index_copy_ reverse : Elapsed 0.019 ms (1.860 ms / 100) 1.900 -> 1.876 ( -1.26%) [ +0.00% +0.11% +0.00% / +0.05% -1.26% -1.11%] index_add_ spread : Elapsed 0.019 ms (1.900 ms / 100) 1.857 -> 1.832 ( -1.35%) [ +0.00% +0.43% +0.16% / +0.32% -1.35% -1.29%] index_copy_ spread : Elapsed 0.019 ms (1.857 ms / 100) 1.906 -> 1.872 ( -1.78%) [ +0.05% +0.00% +0.00% / -0.05% -1.73% -1.78%] index_add_ strided 3 : Elapsed 0.019 ms (1.907 ms / 100) 1.857 -> 1.830 ( -1.45%) [ +0.43% +0.00% +0.43% / +0.00% -1.45% -1.29%] index_copy_ strided 3 : Elapsed 0.019 ms (1.865 ms / 100) 1.903 -> 1.878 ( -1.31%) [ +0.00% +0.00% +0.05% / -0.05% -1.31% -1.21%] index_add_ strided 7 : Elapsed 0.019 ms (1.903 ms / 100) 1.859 -> 1.832 ( -1.45%) [ +0.05% +0.05% +0.00% / -0.05% -1.45% -1.29%] index_copy_ strided 7 : Elapsed 0.019 ms (1.860 ms / 100) 1.911 -> 1.883 ( -1.47%) [ +0.10% +0.00% +0.05% / -0.05% -1.47% -1.41%] index_add_ perm : Elapsed 0.019 ms (1.913 ms / 100) 1.862 -> 1.837 ( -1.34%) [ +0.11% +0.05% +0.00% / +0.43% -1.18% -1.34%] index_copy_ perm : Elapsed 0.019 ms (1.864 ms / 100) 1.901 -> 1.874 ( -1.42%) [ +0.00% +0.21% +0.05% / +0.21% -1.37% -1.42%] index_add_ perm_sorted : Elapsed 0.019 ms (1.901 ms / 100) 1.859 -> 1.830 ( -1.56%) [ +0.11% +0.00% +0.11% / +0.05% -1.29% -1.56%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.861 ms / 100) 8.284 -> 8.285 ( +0.01%) [ +0.34% +0.00% +0.18% / +0.08% +0.01% +0.01%] index_select const : Elapsed 0.083 ms (8.312 ms / 100) 8.317 -> 8.336 ( +0.23%) [ +0.46% +0.00% +0.24% / +0.23% +0.40% +0.41%] index_select wrap : Elapsed 0.084 ms (8.355 ms / 100) 8.300 -> 8.303 ( +0.04%) [ +0.00% +0.11% +0.33% / +0.04% +0.20% +0.33%] index_select linear : Elapsed 0.083 ms (8.300 ms / 100) 8.304 -> 8.320 ( +0.19%) [ +0.19% +0.12% +0.00% / +0.19% +0.19% +0.39%] index_select reverse : Elapsed 0.083 ms (8.320 ms / 100) 8.281 -> 8.284 ( +0.04%) [ +0.05% +0.00% +0.00% / +0.05% +0.04% +0.06%] index_select skip64 : Elapsed 0.083 ms (8.285 ms / 100) 8.283 -> 8.275 ( -0.10%) [ +0.21% +0.00% +0.22% / +0.10% -0.10% +0.06%] index_select skip256 : Elapsed 0.083 ms (8.300 ms / 100) 8.308 -> 8.326 ( +0.22%) [ +0.22% +0.00% +0.37% / +0.28% +0.22% +0.24%] index_select spread : Elapsed 0.083 ms (8.326 ms / 100) 8.331 -> 8.339 ( +0.10%) [ +0.13% +0.42% +0.00% / +0.10% +0.12% +0.26%] index_select strided 3 : Elapsed 0.083 ms (8.342 ms / 100) 8.332 -> 8.324 ( -0.10%) [ +0.00% +0.08% +0.02% / +0.13% +0.18% -0.10%] index_select random : Elapsed 0.083 ms (8.332 ms / 100) 8.307 -> 8.309 ( +0.02%) [ +0.10% +0.13% +0.00% / +0.06% +0.02% +0.20%] index_select random_sorted : Elapsed 0.083 ms (8.315 ms / 100) B = [20, 5, 16, 40] (stride (1, 800, 4000, 20)) A = [4, 5, 16, 40] (stride (3200, 16, 1, 80)) dim = 0 2.154 -> 2.152 ( -0.09%) [ +0.19% +0.14% +0.00% / -0.09% -0.09% +0.00%] index_add_ linear : Elapsed 0.022 ms (2.158 ms / 100) 2.097 -> 2.099 ( +0.10%) [ +0.00% +0.24% +0.10% / +0.14% +0.10% +0.19%] index_copy_ linear : Elapsed 0.021 ms (2.097 ms / 100) 2.155 -> 2.153 ( -0.09%) [ +0.00% +0.05% +0.05% / -0.09% +0.00% +0.05%] index_add_ reverse : Elapsed 0.022 ms (2.155 ms / 100) 2.098 -> 2.099 ( +0.05%) [ +0.19% +0.24% +0.00% / +0.10% +0.05% +0.14%] index_copy_ reverse : Elapsed 0.021 ms (2.102 ms / 100) 2.195 -> 2.195 ( +0.00%) [ +0.09% +0.14% +0.00% / +0.00% +0.00% +0.36%] index_add_ spread : Elapsed 0.022 ms (2.197 ms / 100) 2.204 -> 2.206 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.14% +0.27%] index_copy_ spread : Elapsed 0.022 ms (2.204 ms / 100) 2.184 -> 2.189 ( +0.23%) [ +0.18% +0.14% +0.00% / +0.23% +0.23% +0.37%] index_add_ strided 3 : Elapsed 0.022 ms (2.188 ms / 100) 2.168 -> 2.170 ( +0.09%) [ +0.23% +0.18% +0.00% / +0.09% +0.23% +0.51%] index_copy_ strided 3 : Elapsed 0.022 ms (2.173 ms / 100) 2.188 -> 2.195 ( +0.32%) [ +0.18% +0.23% +0.00% / +0.32% +0.46% +0.46%] index_add_ strided 7 : Elapsed 0.022 ms (2.192 ms / 100) 2.199 -> 2.203 ( +0.18%) [ +0.00% +0.27% +0.18% / +0.18% +0.32% +0.45%] index_copy_ strided 7 : Elapsed 0.022 ms (2.199 ms / 100) 2.189 -> 2.187 ( -0.09%) [ +0.14% +0.14% +0.00% / -0.09% +0.00% +0.05%] index_add_ perm : Elapsed 0.022 ms (2.192 ms / 100) 2.174 -> 2.175 ( +0.05%) [ +0.23% +0.05% +0.00% / +0.14% +0.05% +0.05%] index_copy_ perm : Elapsed 0.022 ms (2.179 ms / 100) 2.182 -> 2.185 ( +0.14%) [ +0.27% +0.09% +0.00% / +0.23% +0.14% +0.37%] index_add_ perm_sorted : Elapsed 0.022 ms (2.188 ms / 100) 2.170 -> 2.169 ( -0.05%) [ +0.00% +0.05% +0.14% / -0.05% +0.09% +0.05%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.170 ms / 100) 8.836 -> 8.848 ( +0.14%) [ +0.00% +0.02% +0.15% / +0.14% +0.23% +0.36%] index_select const : Elapsed 0.088 ms (8.836 ms / 100) 8.889 -> 8.910 ( +0.24%) [ +0.00% +0.03% +0.27% / +0.24% +0.37% +0.26%] index_select wrap : Elapsed 0.089 ms (8.889 ms / 100) 8.873 -> 8.883 ( +0.11%) [ +0.10% +0.09% +0.00% / +0.11% +0.42% +0.27%] index_select linear : Elapsed 0.089 ms (8.882 ms / 100) 8.872 -> 8.882 ( +0.11%) [ +0.06% +0.20% +0.00% / +0.11% +0.36% +0.53%] index_select reverse : Elapsed 0.089 ms (8.877 ms / 100) 8.844 -> 8.832 ( -0.14%) [ +0.01% +0.00% +0.16% / -0.14% +0.11% +0.44%] index_select skip64 : Elapsed 0.088 ms (8.845 ms / 100) 8.838 -> 8.847 ( +0.10%) [ +0.00% +0.14% +0.01% / +0.10% +0.33% +0.25%] index_select skip256 : Elapsed 0.088 ms (8.838 ms / 100) 8.879 -> 8.886 ( +0.08%) [ +0.00% +0.07% +0.06% / +0.08% +0.47% +0.25%] index_select spread : Elapsed 0.089 ms (8.879 ms / 100) 8.887 -> 8.894 ( +0.08%) [ +0.00% +0.12% +0.06% / +0.18% +0.08% +0.33%] index_select strided 3 : Elapsed 0.089 ms (8.887 ms / 100) 8.886 -> 8.884 ( -0.02%) [ +0.00% +0.08% +0.12% / -0.02% +0.43% +0.45%] index_select random : Elapsed 0.089 ms (8.886 ms / 100) 8.873 -> 8.909 ( +0.41%) [ +0.00% +0.21% +0.17% / +0.41% +0.59% +0.51%] index_select random_sorted : Elapsed 0.089 ms (8.873 ms / 100) B = [20, 5, 16, 40] (stride (5, 1, 4000, 100)) A = [4, 5, 16, 40] (stride (640, 2560, 40, 1)) dim = 0 0.796 -> 0.798 ( +0.25%) [ +0.50% +1.13% +0.00% / +0.25% +0.75% +0.63%] index_add_ linear : Elapsed 0.008 ms (0.800 ms / 100) 0.794 -> 0.797 ( +0.38%) [ +0.00% +0.25% +0.25% / +0.38% +2.90% +3.27%] index_copy_ linear : Elapsed 0.008 ms (0.794 ms / 100) 0.802 -> 0.787 ( -1.87%) [ +0.37% +0.00% +0.25% / -0.62% -1.87% -1.25%] index_add_ reverse : Elapsed 0.008 ms (0.805 ms / 100) 0.801 -> 0.801 ( +0.00%) [ +0.25% +0.50% +0.00% / +0.00% +0.12% +0.37%] index_copy_ reverse : Elapsed 0.008 ms (0.803 ms / 100) 0.867 -> 0.838 ( -3.34%) [ +0.12% +0.00% +0.46% / -0.35% -3.34% -3.11%] index_add_ spread : Elapsed 0.009 ms (0.868 ms / 100) 0.868 -> 0.851 ( -1.96%) [ +0.00% +0.00% +0.00% / -0.46% -1.96% -1.50%] index_copy_ spread : Elapsed 0.009 ms (0.868 ms / 100) 0.854 -> 0.827 ( -3.16%) [ +0.12% +0.00% +0.47% / -0.47% -3.16% -2.81%] index_add_ strided 3 : Elapsed 0.009 ms (0.855 ms / 100) 0.846 -> 0.843 ( -0.35%) [ +0.59% +0.00% +0.71% / +0.35% -0.35% +0.24%] index_copy_ strided 3 : Elapsed 0.009 ms (0.851 ms / 100) 0.856 -> 0.831 ( -2.92%) [ +0.70% +0.58% +0.00% / +1.05% -2.92% -2.57%] index_add_ strided 7 : Elapsed 0.009 ms (0.862 ms / 100) 0.862 -> 0.843 ( -2.20%) [ +0.00% +0.46% +0.70% / +1.28% -1.97% -2.20%] index_copy_ strided 7 : Elapsed 0.009 ms (0.862 ms / 100) 0.874 -> 0.846 ( -3.20%) [ +0.34% +0.00% +0.34% / +0.11% -3.20% -3.20%] index_add_ perm : Elapsed 0.009 ms (0.877 ms / 100) 0.865 -> 0.860 ( -0.58%) [ +0.00% +0.12% +0.81% / +0.69% -0.58% -0.58%] index_copy_ perm : Elapsed 0.009 ms (0.865 ms / 100) 0.864 -> 0.845 ( -2.20%) [ +0.69% +0.00% +0.23% / +0.58% +2.08% -2.20%] index_add_ perm_sorted : Elapsed 0.009 ms (0.870 ms / 100) 0.867 -> 0.861 ( -0.69%) [ +0.58% +0.00% +0.12% / -0.23% -0.69% -0.46%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.872 ms / 100) 5.030 -> 5.033 ( +0.06%) [ +0.02% +0.00% +0.10% / +0.06% +0.36% +0.16%] index_select const : Elapsed 0.050 ms (5.031 ms / 100) 5.054 -> 5.049 ( -0.10%) [ +0.24% +0.00% +0.08% / -0.10% +0.18% +0.16%] index_select wrap : Elapsed 0.051 ms (5.066 ms / 100) 5.051 -> 5.041 ( -0.20%) [ +0.04% +0.36% +0.00% / +0.00% -0.20% +0.02%] index_select linear : Elapsed 0.051 ms (5.053 ms / 100) 5.036 -> 5.038 ( +0.04%) [ +0.06% +0.14% +0.00% / +0.16% +0.04% +0.12%] index_select reverse : Elapsed 0.050 ms (5.039 ms / 100) 5.030 -> 5.038 ( +0.16%) [ +0.00% +0.14% +0.14% / +0.28% +0.40% +0.16%] index_select skip64 : Elapsed 0.050 ms (5.030 ms / 100) 5.026 -> 5.031 ( +0.10%) [ +0.00% +0.04% +0.30% / +0.10% +0.18% +0.48%] index_select skip256 : Elapsed 0.050 ms (5.026 ms / 100) 5.037 -> 5.048 ( +0.22%) [ +0.00% +0.40% +0.42% / +0.22% +0.24% +0.22%] index_select spread : Elapsed 0.050 ms (5.037 ms / 100) 5.054 -> 5.051 ( -0.06%) [ +0.12% +0.00% +0.26% / +0.02% -0.06% +0.06%] index_select strided 3 : Elapsed 0.051 ms (5.060 ms / 100) 5.059 -> 5.051 ( -0.16%) [ +0.14% +0.02% +0.00% / +0.16% +0.04% -0.16%] index_select random : Elapsed 0.051 ms (5.066 ms / 100) 5.054 -> 5.042 ( -0.24%) [ +0.00% +0.18% +0.22% / +0.20% -0.24% +0.10%] index_select random_sorted : Elapsed 0.051 ms (5.054 ms / 100) out_shape = [4, 20, 16, 40] in_shape = [4, 5, 16, 40] idx_dim = 1 B = [4, 20, 16, 40] (stride (12800, 40, 800, 1)) A = [4, 5, 16, 40] (stride (3200, 640, 1, 16)) dim = 1 1.853 -> 1.851 ( -0.11%) [ +0.11% +0.00% +0.11% / -0.11% +0.81% +0.81%] index_add_ linear : Elapsed 0.019 ms (1.855 ms / 100) 1.806 -> 1.807 ( +0.06%) [ +0.00% +0.00% +0.11% / +0.06% +0.89% +0.89%] index_copy_ linear : Elapsed 0.018 ms (1.806 ms / 100) 1.855 -> 1.856 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.70% +0.59%] index_add_ reverse : Elapsed 0.019 ms (1.856 ms / 100) 1.805 -> 1.812 ( +0.39%) [ +0.00% +0.11% +0.00% / +0.39% +1.05% +1.00%] index_copy_ reverse : Elapsed 0.018 ms (1.805 ms / 100) 1.875 -> 1.875 ( +0.00%) [ +0.05% +0.16% +0.00% / +0.00% +0.53% +0.37%] index_add_ spread : Elapsed 0.019 ms (1.876 ms / 100) 1.825 -> 1.828 ( +0.16%) [ +0.27% +0.00% +0.22% / +0.16% +0.82% +0.99%] index_copy_ spread : Elapsed 0.018 ms (1.830 ms / 100) 1.874 -> 1.877 ( +0.16%) [ +0.00% +0.05% +0.11% / +0.16% +0.75% +0.64%] index_add_ strided 3 : Elapsed 0.019 ms (1.874 ms / 100) 1.828 -> 1.831 ( +0.16%) [ +0.00% +0.22% +0.16% / +0.16% +0.82% +0.71%] index_copy_ strided 3 : Elapsed 0.018 ms (1.828 ms / 100) 1.865 -> 1.864 ( -0.05%) [ +0.11% +0.00% +0.05% / -0.05% +0.70% +0.97%] index_add_ strided 7 : Elapsed 0.019 ms (1.867 ms / 100) 1.818 -> 1.814 ( -0.22%) [ +0.00% +0.06% +0.00% / -0.22% +0.77% +1.16%] index_copy_ strided 7 : Elapsed 0.018 ms (1.818 ms / 100) 1.869 -> 1.873 ( +0.21%) [ +0.21% +0.11% +0.00% / +0.21% +0.43% +0.70%] index_add_ perm : Elapsed 0.019 ms (1.873 ms / 100) 1.824 -> 1.824 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.66% +0.77%] index_copy_ perm : Elapsed 0.018 ms (1.825 ms / 100) 1.868 -> 1.872 ( +0.21%) [ +0.16% +0.00% +0.05% / +0.21% +0.64% +0.27%] index_add_ perm_sorted : Elapsed 0.019 ms (1.871 ms / 100) 1.826 -> 1.826 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.60% +0.49%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.826 ms / 100) 8.515 -> 8.518 ( +0.04%) [ +0.00% +0.38% +0.04% / +0.04% +0.07% +0.13%] index_select const : Elapsed 0.085 ms (8.515 ms / 100) 8.536 -> 8.535 ( -0.01%) [ +0.00% +0.05% +0.15% / -0.01% +0.16% +0.05%] index_select wrap : Elapsed 0.085 ms (8.536 ms / 100) 8.526 -> 8.542 ( +0.19%) [ +0.01% +0.30% +0.00% / +0.26% +0.34% +0.19%] index_select linear : Elapsed 0.085 ms (8.527 ms / 100) 8.524 -> 8.519 ( -0.06%) [ +0.00% +0.19% +0.07% / -0.06% +0.20% +0.21%] index_select reverse : Elapsed 0.085 ms (8.524 ms / 100) 8.510 -> 8.527 ( +0.20%) [ +0.00% +0.35% +0.11% / +0.34% +0.22% +0.20%] index_select skip64 : Elapsed 0.085 ms (8.510 ms / 100) 8.510 -> 8.513 ( +0.04%) [ +0.09% +0.24% +0.00% / +0.04% +0.41% +0.15%] index_select skip256 : Elapsed 0.085 ms (8.518 ms / 100) 8.533 -> 8.546 ( +0.15%) [ +0.14% +0.05% +0.00% / +0.48% +0.19% +0.15%] index_select spread : Elapsed 0.085 ms (8.545 ms / 100) 8.542 -> 8.545 ( +0.04%) [ +0.15% +0.00% +0.21% / +0.04% +0.35% +0.30%] index_select strided 3 : Elapsed 0.086 ms (8.555 ms / 100) 8.539 -> 8.546 ( +0.08%) [ +0.04% +0.00% +0.20% / +0.11% +0.11% +0.08%] index_select random : Elapsed 0.085 ms (8.542 ms / 100) 8.529 -> 8.543 ( +0.16%) [ +0.00% +0.05% +0.12% / +0.16% +0.23% +0.21%] index_select random_sorted : Elapsed 0.085 ms (8.529 ms / 100) B = [4, 20, 16, 40] (stride (640, 2560, 40, 1)) A = [4, 5, 16, 40] (stride (3200, 1, 5, 80)) dim = 1 1.828 -> 1.834 ( +0.33%) [ +0.27% +0.27% +0.00% / +0.33% +0.60% +0.71%] index_add_ linear : Elapsed 0.018 ms (1.833 ms / 100) 1.773 -> 1.773 ( +0.00%) [ +0.17% +0.00% +0.11% / +0.00% +0.90% +1.02%] index_copy_ linear : Elapsed 0.018 ms (1.776 ms / 100) 1.832 -> 1.830 ( -0.11%) [ +0.00% +0.05% +0.05% / -0.11% +0.55% +0.49%] index_add_ reverse : Elapsed 0.018 ms (1.832 ms / 100) 1.776 -> 1.774 ( -0.11%) [ +0.11% +0.00% +0.00% / -0.11% +0.84% +0.73%] index_copy_ reverse : Elapsed 0.018 ms (1.778 ms / 100) 1.823 -> 1.827 ( +0.22%) [ +0.00% +0.27% +0.11% / +0.22% +0.93% +0.93%] index_add_ spread : Elapsed 0.018 ms (1.823 ms / 100) 1.772 -> 1.772 ( +0.00%) [ +0.00% +0.23% +0.00% / +0.00% +1.07% +0.79%] index_copy_ spread : Elapsed 0.018 ms (1.772 ms / 100) 1.830 -> 1.831 ( +0.05%) [ +0.00% +0.00% +0.11% / +0.05% +0.82% +0.82%] index_add_ strided 3 : Elapsed 0.018 ms (1.830 ms / 100) 1.771 -> 1.771 ( +0.00%) [ +0.28% +0.23% +0.00% / +0.00% +1.19% +1.19%] index_copy_ strided 3 : Elapsed 0.018 ms (1.776 ms / 100) 1.831 -> 1.831 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.33% +0.60%] index_add_ strided 7 : Elapsed 0.018 ms (1.832 ms / 100) 1.773 -> 1.776 ( +0.17%) [ +0.17% +0.00% +0.23% / +0.17% +0.68% +0.68%] index_copy_ strided 7 : Elapsed 0.018 ms (1.776 ms / 100) 1.835 -> 1.834 ( -0.05%) [ +0.16% +0.22% +0.00% / -0.05% +0.27% +0.11%] index_add_ perm : Elapsed 0.018 ms (1.838 ms / 100) 1.776 -> 1.778 ( +0.11%) [ +0.11% +0.00% +0.28% / +0.11% +0.45% +0.34%] index_copy_ perm : Elapsed 0.018 ms (1.778 ms / 100) 1.836 -> 1.833 ( -0.16%) [ +0.16% +0.00% +0.00% / -0.16% +0.11% +0.05%] index_add_ perm_sorted : Elapsed 0.018 ms (1.839 ms / 100) 1.773 -> 1.779 ( +0.34%) [ +0.17% +0.00% +0.23% / +0.34% +0.51% +0.39%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.776 ms / 100) 8.217 -> 8.224 ( +0.09%) [ +0.09% +0.06% +0.00% / +0.10% +0.15% +0.09%] index_select const : Elapsed 0.082 ms (8.224 ms / 100) 8.215 -> 8.215 ( +0.00%) [ +0.00% +0.24% +0.30% / +0.00% +0.15% +0.28%] index_select wrap : Elapsed 0.082 ms (8.215 ms / 100) 8.219 -> 8.225 ( +0.07%) [ +0.22% +0.02% +0.00% / +0.17% +0.27% +0.07%] index_select linear : Elapsed 0.082 ms (8.237 ms / 100) 8.212 -> 8.224 ( +0.15%) [ +0.12% +0.00% +0.19% / +0.19% +0.39% +0.15%] index_select reverse : Elapsed 0.082 ms (8.222 ms / 100) 8.213 -> 8.208 ( -0.06%) [ +0.06% +0.00% +0.04% / -0.06% +0.33% +0.10%] index_select skip64 : Elapsed 0.082 ms (8.218 ms / 100) 8.211 -> 8.222 ( +0.13%) [ +0.06% +0.10% +0.00% / +0.43% +0.24% +0.13%] index_select skip256 : Elapsed 0.082 ms (8.216 ms / 100) 8.204 -> 8.227 ( +0.28%) [ +0.27% +0.00% +0.04% / +0.34% +0.28% +0.30%] index_select spread : Elapsed 0.082 ms (8.226 ms / 100) 8.217 -> 8.216 ( -0.01%) [ +0.09% +0.23% +0.00% / -0.01% +0.09% +0.29%] index_select strided 3 : Elapsed 0.082 ms (8.224 ms / 100) 8.208 -> 8.214 ( +0.07%) [ +0.19% +0.17% +0.00% / +0.07% +0.22% +0.34%] index_select random : Elapsed 0.082 ms (8.224 ms / 100) 8.221 -> 8.216 ( -0.06%) [ +0.17% +0.00% +0.09% / -0.06% -0.02% +0.09%] index_select random_sorted : Elapsed 0.082 ms (8.235 ms / 100) B = [4, 20, 16, 40] (stride (1, 2560, 4, 64)) A = [4, 5, 16, 40] (stride (1, 4, 800, 20)) dim = 1 1.806 -> 1.807 ( +0.06%) [ +0.22% +0.00% +0.22% / +0.06% +0.83% +0.83%] index_add_ linear : Elapsed 0.018 ms (1.810 ms / 100) 1.760 -> 1.756 ( -0.23%) [ +0.11% +0.00% +0.06% / -0.23% +0.51% +0.45%] index_copy_ linear : Elapsed 0.018 ms (1.762 ms / 100) 1.806 -> 1.810 ( +0.22%) [ +0.22% +0.00% +0.00% / +0.22% +0.66% +0.89%] index_add_ reverse : Elapsed 0.018 ms (1.810 ms / 100) 1.755 -> 1.757 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.74% +0.68%] index_copy_ reverse : Elapsed 0.018 ms (1.755 ms / 100) 1.796 -> 1.793 ( -0.17%) [ +0.00% +0.11% +0.11% / -0.17% +1.06% +1.11%] index_add_ spread : Elapsed 0.018 ms (1.796 ms / 100) 1.743 -> 1.745 ( +0.11%) [ +0.00% +0.23% +0.29% / +0.11% +1.26% +1.32%] index_copy_ spread : Elapsed 0.017 ms (1.743 ms / 100) 1.796 -> 1.797 ( +0.06%) [ +0.00% +0.11% +0.11% / +0.06% +1.17% +1.17%] index_add_ strided 3 : Elapsed 0.018 ms (1.796 ms / 100) 1.746 -> 1.749 ( +0.17%) [ +0.00% +0.06% +0.17% / +0.17% +1.26% +1.26%] index_copy_ strided 3 : Elapsed 0.017 ms (1.746 ms / 100) 1.800 -> 1.801 ( +0.06%) [ +0.00% +0.06% +0.11% / +0.06% +0.33% +0.44%] index_add_ strided 7 : Elapsed 0.018 ms (1.800 ms / 100) 1.748 -> 1.753 ( +0.29%) [ +0.06% +0.00% +0.23% / +0.29% +0.80% +0.63%] index_copy_ strided 7 : Elapsed 0.017 ms (1.749 ms / 100) 1.790 -> 1.799 ( +0.50%) [ +0.00% +0.06% +0.56% / +0.50% +1.73% +1.56%] index_add_ perm : Elapsed 0.018 ms (1.790 ms / 100) 1.741 -> 1.753 ( +0.69%) [ +0.00% +0.11% +0.69% / +0.69% +1.84% +1.84%] index_copy_ perm : Elapsed 0.017 ms (1.741 ms / 100) 1.813 -> 1.815 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.77% +0.94%] index_add_ perm_sorted : Elapsed 0.018 ms (1.813 ms / 100) 1.760 -> 1.764 ( +0.23%) [ +0.00% +0.23% +0.17% / +0.23% +0.97% +1.02%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.760 ms / 100) 8.227 -> 8.228 ( +0.01%) [ +0.06% +0.00% +0.06% / +0.01% +0.10% +0.34%] index_select const : Elapsed 0.082 ms (8.232 ms / 100) 8.237 -> 8.243 ( +0.07%) [ +0.08% +0.04% +0.00% / +0.13% +0.27% +0.07%] index_select wrap : Elapsed 0.082 ms (8.244 ms / 100) 8.232 -> 8.238 ( +0.07%) [ +0.15% +0.00% +0.21% / +0.07% +0.33% +0.16%] index_select linear : Elapsed 0.082 ms (8.244 ms / 100) 8.231 -> 8.239 ( +0.10%) [ +0.33% +0.00% +0.00% / +0.28% +0.10% +0.22%] index_select reverse : Elapsed 0.083 ms (8.258 ms / 100) 8.221 -> 8.241 ( +0.24%) [ +0.19% +0.00% +0.09% / +0.24% +0.34% +0.27%] index_select skip64 : Elapsed 0.082 ms (8.237 ms / 100) 8.222 -> 8.238 ( +0.19%) [ +0.00% +0.06% +0.13% / +0.21% +0.28% +0.19%] index_select skip256 : Elapsed 0.082 ms (8.222 ms / 100) 8.246 -> 8.233 ( -0.16%) [ +0.07% +0.00% +0.01% / -0.16% +0.01% +0.33%] index_select spread : Elapsed 0.083 ms (8.252 ms / 100) 8.239 -> 8.246 ( +0.08%) [ +0.17% +0.00% +0.13% / +0.19% +0.16% +0.08%] index_select strided 3 : Elapsed 0.083 ms (8.253 ms / 100) 8.241 -> 8.239 ( -0.02%) [ +0.06% +0.00% +0.11% / -0.02% -0.01% +0.11%] index_select random : Elapsed 0.082 ms (8.246 ms / 100) 8.237 -> 8.224 ( -0.16%) [ +0.04% +0.00% +0.00% / -0.16% +0.52% +0.21%] index_select random_sorted : Elapsed 0.082 ms (8.240 ms / 100) B = [4, 20, 16, 40] (stride (40, 160, 3200, 1)) A = [4, 5, 16, 40] (stride (640, 2560, 40, 1)) dim = 1 1.702 -> 1.706 ( +0.24%) [ +0.24% +0.29% +0.00% / +0.24% +1.76% +1.65%] index_add_ linear : Elapsed 0.017 ms (1.706 ms / 100) 1.659 -> 1.662 ( +0.18%) [ +0.36% +0.00% +0.18% / +0.18% +1.45% +1.39%] index_copy_ linear : Elapsed 0.017 ms (1.665 ms / 100) 1.704 -> 1.705 ( +0.06%) [ +0.00% +0.23% +0.18% / +0.06% +1.88% +2.05%] index_add_ reverse : Elapsed 0.017 ms (1.704 ms / 100) 1.657 -> 1.665 ( +0.48%) [ +0.00% +0.18% +0.12% / +0.48% +1.93% +1.81%] index_copy_ reverse : Elapsed 0.017 ms (1.657 ms / 100) 1.707 -> 1.706 ( -0.06%) [ +0.18% +0.00% +0.00% / -0.06% +0.76% +0.82%] index_add_ spread : Elapsed 0.017 ms (1.710 ms / 100) 1.657 -> 1.662 ( +0.30%) [ +0.18% +0.36% +0.00% / +0.30% +1.15% +1.27%] index_copy_ spread : Elapsed 0.017 ms (1.660 ms / 100) 1.708 -> 1.706 ( -0.12%) [ +0.12% +0.00% +0.12% / -0.12% +0.59% +0.41%] index_add_ strided 3 : Elapsed 0.017 ms (1.710 ms / 100) 1.660 -> 1.661 ( +0.06%) [ +0.00% +0.12% +0.24% / +0.06% +0.90% +0.96%] index_copy_ strided 3 : Elapsed 0.017 ms (1.660 ms / 100) 1.709 -> 1.712 ( +0.18%) [ +0.35% +0.00% +0.23% / +0.18% +0.29% +0.35%] index_add_ strided 7 : Elapsed 0.017 ms (1.715 ms / 100) 1.664 -> 1.666 ( +0.12%) [ +0.00% +0.12% +0.24% / +0.12% +0.54% +0.24%] index_copy_ strided 7 : Elapsed 0.017 ms (1.664 ms / 100) 1.703 -> 1.700 ( -0.18%) [ +0.23% +0.00% +0.00% / -0.18% +1.00% +0.70%] index_add_ perm : Elapsed 0.017 ms (1.707 ms / 100) 1.663 -> 1.661 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.72% +0.96%] index_copy_ perm : Elapsed 0.017 ms (1.663 ms / 100) 1.701 -> 1.703 ( +0.12%) [ +0.24% +0.00% +0.29% / +0.12% +5.29% +1.00%] index_add_ perm_sorted : Elapsed 0.017 ms (1.705 ms / 100) 1.659 -> 1.663 ( +0.24%) [ +0.06% +0.30% +0.00% / +0.24% +1.33% +0.90%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.660 ms / 100) 8.201 -> 8.214 ( +0.16%) [ +0.04% +0.09% +0.00% / +0.16% +0.28% +0.26%] index_select const : Elapsed 0.082 ms (8.204 ms / 100) 8.246 -> 8.263 ( +0.21%) [ +0.08% +0.04% +0.00% / +0.21% +0.21% +0.25%] index_select wrap : Elapsed 0.083 ms (8.253 ms / 100) 8.230 -> 8.220 ( -0.12%) [ +0.30% +0.00% +0.32% / -0.12% +0.32% +0.39%] index_select linear : Elapsed 0.083 ms (8.255 ms / 100) 8.241 -> 8.230 ( -0.13%) [ +0.35% +0.02% +0.00% / -0.02% +0.18% -0.13%] index_select reverse : Elapsed 0.083 ms (8.270 ms / 100) 8.197 -> 8.205 ( +0.10%) [ +0.00% +0.18% +0.10% / +0.10% +0.11% +0.17%] index_select skip64 : Elapsed 0.082 ms (8.197 ms / 100) 8.196 -> 8.206 ( +0.12%) [ +0.05% +0.00% +0.20% / +0.20% +0.12% +0.31%] index_select skip256 : Elapsed 0.082 ms (8.200 ms / 100) 8.212 -> 8.246 ( +0.41%) [ +0.05% +0.17% +0.00% / +0.41% +0.56% +0.65%] index_select spread : Elapsed 0.082 ms (8.216 ms / 100) 8.239 -> 8.263 ( +0.29%) [ +0.05% +0.00% +0.06% / +0.29% +0.34% +0.33%] index_select strided 3 : Elapsed 0.082 ms (8.243 ms / 100) 8.253 -> 8.241 ( -0.15%) [ +0.00% +0.02% +0.24% / -0.15% +0.21% +0.21%] index_select random : Elapsed 0.083 ms (8.253 ms / 100) 8.225 -> 8.225 ( +0.00%) [ +0.04% +0.01% +0.00% / +0.00% +0.34% +0.35%] index_select random_sorted : Elapsed 0.082 ms (8.228 ms / 100) B = [4, 20, 16, 40] (stride (40, 160, 3200, 1)) A = [4, 5, 16, 40] (stride (1, 160, 800, 4)) dim = 1 1.807 -> 1.813 ( +0.33%) [ +0.00% +0.00% +0.28% / +0.33% +2.60% +2.49%] index_add_ linear : Elapsed 0.018 ms (1.807 ms / 100) 1.760 -> 1.770 ( +0.57%) [ +0.23% +0.00% +0.51% / +0.57% +2.90% +2.61%] index_copy_ linear : Elapsed 0.018 ms (1.764 ms / 100) 1.807 -> 1.814 ( +0.39%) [ +0.06% +0.00% +0.39% / +0.39% +2.77% +2.71%] index_add_ reverse : Elapsed 0.018 ms (1.808 ms / 100) 1.760 -> 1.769 ( +0.51%) [ +0.17% +0.00% +0.45% / +0.51% +2.78% +2.90%] index_copy_ reverse : Elapsed 0.018 ms (1.763 ms / 100) 1.815 -> 1.827 ( +0.66%) [ +0.11% +0.00% +0.77% / +0.66% +1.87% +2.04%] index_add_ spread : Elapsed 0.018 ms (1.817 ms / 100) 1.767 -> 1.782 ( +0.85%) [ +0.00% +0.34% +0.62% / +0.85% +2.09% +1.92%] index_copy_ spread : Elapsed 0.018 ms (1.767 ms / 100) 1.815 -> 1.826 ( +0.61%) [ +0.00% +0.17% +0.39% / +0.61% +1.82% +1.87%] index_add_ strided 3 : Elapsed 0.018 ms (1.815 ms / 100) 1.770 -> 1.779 ( +0.51%) [ +0.00% +0.00% +0.62% / +0.51% +1.81% +1.92%] index_copy_ strided 3 : Elapsed 0.018 ms (1.770 ms / 100) 1.828 -> 1.832 ( +0.22%) [ +0.00% +0.05% +0.33% / +0.22% +1.04% +0.98%] index_add_ strided 7 : Elapsed 0.018 ms (1.828 ms / 100) 1.782 -> 1.785 ( +0.17%) [ +0.00% +0.00% +0.22% / +0.17% +1.12% +1.01%] index_copy_ strided 7 : Elapsed 0.018 ms (1.782 ms / 100) 1.831 -> 1.831 ( +0.00%) [ +0.16% +0.11% +0.00% / +0.00% +0.93% +0.87%] index_add_ perm : Elapsed 0.018 ms (1.834 ms / 100) 1.785 -> 1.785 ( +0.00%) [ +0.06% +0.00% +0.17% / +0.00% +1.06% +1.01%] index_copy_ perm : Elapsed 0.018 ms (1.786 ms / 100) 1.833 -> 1.834 ( +0.05%) [ +0.16% +0.00% +0.16% / +0.05% +0.71% +0.82%] index_add_ perm_sorted : Elapsed 0.018 ms (1.836 ms / 100) 1.784 -> 1.791 ( +0.39%) [ +0.34% +0.00% +0.22% / +0.39% +0.90% +0.95%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.790 ms / 100) 8.521 -> 8.525 ( +0.05%) [ +0.18% +0.05% +0.00% / +0.05% +0.40% +0.39%] index_select const : Elapsed 0.085 ms (8.536 ms / 100) 8.542 -> 8.548 ( +0.07%) [ +0.21% +0.21% +0.00% / +0.07% +0.13% +0.40%] index_select wrap : Elapsed 0.086 ms (8.560 ms / 100) 8.543 -> 8.538 ( -0.06%) [ +0.12% +0.20% +0.00% / -0.06% +0.61% +0.18%] index_select linear : Elapsed 0.086 ms (8.553 ms / 100) 8.533 -> 8.538 ( +0.06%) [ +0.26% +0.13% +0.00% / +0.06% +0.29% +0.41%] index_select reverse : Elapsed 0.086 ms (8.555 ms / 100) 8.517 -> 8.518 ( +0.01%) [ +0.27% +0.00% +0.22% / +0.22% +0.18% +0.01%] index_select skip64 : Elapsed 0.085 ms (8.540 ms / 100) 8.515 -> 8.526 ( +0.13%) [ +0.21% +0.13% +0.00% / +0.41% +0.13% +0.22%] index_select skip256 : Elapsed 0.085 ms (8.533 ms / 100) 8.540 -> 8.554 ( +0.16%) [ +0.01% +0.08% +0.00% / +0.16% +0.25% +0.22%] index_select spread : Elapsed 0.085 ms (8.541 ms / 100) 8.554 -> 8.549 ( -0.06%) [ +0.07% +0.30% +0.00% / -0.06% +0.09% +0.02%] index_select strided 3 : Elapsed 0.086 ms (8.560 ms / 100) 8.545 -> 8.561 ( +0.19%) [ +0.00% +0.14% +0.36% / +0.19% +0.49% +0.25%] index_select random : Elapsed 0.085 ms (8.545 ms / 100) 8.541 -> 8.536 ( -0.06%) [ +0.12% +0.00% +0.20% / -0.06% +0.18% +0.23%] index_select random_sorted : Elapsed 0.086 ms (8.551 ms / 100) B = [4, 20, 16, 40] (stride (20, 1, 3200, 80)) A = [4, 5, 16, 40] (stride (1, 64, 4, 320)) dim = 1 1.884 -> 1.879 ( -0.27%) [ +0.00% +0.16% +0.32% / -0.16% -0.11% -0.27%] index_add_ linear : Elapsed 0.019 ms (1.884 ms / 100) 1.845 -> 1.837 ( -0.43%) [ +0.05% +0.27% +0.00% / +0.05% -0.38% -0.43%] index_copy_ linear : Elapsed 0.018 ms (1.846 ms / 100) 1.882 -> 1.876 ( -0.32%) [ +0.27% +0.11% +0.00% / +0.32% -0.32% +0.00%] index_add_ reverse : Elapsed 0.019 ms (1.887 ms / 100) 1.848 -> 1.841 ( -0.38%) [ +0.11% +0.00% +0.05% / -0.11% -0.38% -0.32%] index_copy_ reverse : Elapsed 0.018 ms (1.850 ms / 100) 1.910 -> 1.906 ( -0.21%) [ +0.21% +0.26% +0.00% / +0.26% -0.05% -0.21%] index_add_ spread : Elapsed 0.019 ms (1.914 ms / 100) 1.883 -> 1.874 ( -0.48%) [ +0.00% +0.27% +0.05% / +0.32% -0.48% -0.27%] index_copy_ spread : Elapsed 0.019 ms (1.883 ms / 100) 1.904 -> 1.899 ( -0.26%) [ +0.11% +0.21% +0.00% / +0.16% -0.26% -0.11%] index_add_ strided 3 : Elapsed 0.019 ms (1.906 ms / 100) 1.873 -> 1.862 ( -0.59%) [ +0.00% +0.21% +0.11% / +0.21% -0.48% -0.59%] index_copy_ strided 3 : Elapsed 0.019 ms (1.873 ms / 100) 1.908 -> 1.904 ( -0.21%) [ +0.00% +0.05% +0.16% / -0.21% +0.16% -0.05%] index_add_ strided 7 : Elapsed 0.019 ms (1.908 ms / 100) 1.873 -> 1.867 ( -0.32%) [ +0.00% +0.16% +0.43% / +0.32% -0.32% -0.21%] index_copy_ strided 7 : Elapsed 0.019 ms (1.873 ms / 100) 1.905 -> 1.903 ( -0.10%) [ +0.31% +0.00% +0.26% / +0.05% +0.00% -0.10%] index_add_ perm : Elapsed 0.019 ms (1.911 ms / 100) 1.873 -> 1.863 ( -0.53%) [ +0.05% +0.43% +0.00% / +0.11% +0.00% -0.53%] index_copy_ perm : Elapsed 0.019 ms (1.874 ms / 100) 1.898 -> 1.897 ( -0.05%) [ +0.58% +0.32% +0.00% / +0.11% -0.05% +0.05%] index_add_ perm_sorted : Elapsed 0.019 ms (1.909 ms / 100) 1.868 -> 1.864 ( -0.21%) [ +0.43% +0.00% +0.37% / +0.11% -0.11% -0.21%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.876 ms / 100) 8.250 -> 8.254 ( +0.05%) [ +0.41% +0.01% +0.00% / +0.05% +0.34% +0.46%] index_select const : Elapsed 0.083 ms (8.284 ms / 100) 8.282 -> 8.295 ( +0.16%) [ +0.00% +0.05% +0.05% / +0.16% +0.35% +0.42%] index_select wrap : Elapsed 0.083 ms (8.282 ms / 100) 8.290 -> 8.285 ( -0.06%) [ +0.06% +0.00% +0.28% / -0.06% +0.33% +0.49%] index_select linear : Elapsed 0.083 ms (8.295 ms / 100) 8.280 -> 8.291 ( +0.13%) [ +0.05% +0.00% +0.37% / +0.13% +0.19% +0.17%] index_select reverse : Elapsed 0.083 ms (8.284 ms / 100) 8.253 -> 8.271 ( +0.22%) [ +0.16% +0.00% +0.11% / +0.33% +0.23% +0.22%] index_select skip64 : Elapsed 0.083 ms (8.266 ms / 100) 8.264 -> 8.271 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.23% +0.11%] index_select skip256 : Elapsed 0.083 ms (8.267 ms / 100) 8.292 -> 8.296 ( +0.05%) [ +0.00% +0.12% +0.14% / +0.05% +0.37% +0.41%] index_select spread : Elapsed 0.083 ms (8.292 ms / 100) 8.294 -> 8.302 ( +0.10%) [ +0.02% +0.00% +0.14% / +0.10% +0.40% +0.14%] index_select strided 3 : Elapsed 0.083 ms (8.296 ms / 100) 8.304 -> 8.308 ( +0.05%) [ +0.07% +0.00% +0.12% / +0.05% +0.39% +0.25%] index_select random : Elapsed 0.083 ms (8.310 ms / 100) 8.300 -> 8.311 ( +0.13%) [ +0.14% +0.00% +0.00% / +0.13% +0.35% +0.39%] index_select random_sorted : Elapsed 0.083 ms (8.312 ms / 100) B = [4, 20, 16, 40] (stride (320, 16, 1, 1280)) A = [4, 5, 16, 40] (stride (640, 2560, 40, 1)) dim = 1 1.732 -> 1.737 ( +0.29%) [ +0.06% +0.00% +0.12% / +0.29% +1.44% +1.33%] index_add_ linear : Elapsed 0.017 ms (1.733 ms / 100) 1.689 -> 1.691 ( +0.12%) [ +0.24% +0.00% +0.06% / +0.12% +1.48% +1.36%] index_copy_ linear : Elapsed 0.017 ms (1.693 ms / 100) 1.733 -> 1.738 ( +0.29%) [ +0.00% +0.00% +0.06% / +0.29% +1.04% +1.10%] index_add_ reverse : Elapsed 0.017 ms (1.733 ms / 100) 1.691 -> 1.695 ( +0.24%) [ +0.18% +0.00% +0.12% / +0.24% +1.30% +1.36%] index_copy_ reverse : Elapsed 0.017 ms (1.694 ms / 100) 1.749 -> 1.754 ( +0.29%) [ +0.11% +0.00% +0.29% / +0.29% +1.14% +1.14%] index_add_ spread : Elapsed 0.018 ms (1.751 ms / 100) 1.708 -> 1.710 ( +0.12%) [ +0.00% +0.00% +0.12% / +0.12% +1.00% +1.17%] index_copy_ spread : Elapsed 0.017 ms (1.708 ms / 100) 1.747 -> 1.747 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.00% +1.03% +1.14%] index_add_ strided 3 : Elapsed 0.017 ms (1.747 ms / 100) 1.703 -> 1.709 ( +0.35%) [ +0.00% +0.29% +0.18% / +0.35% +1.17% +1.17%] index_copy_ strided 3 : Elapsed 0.017 ms (1.703 ms / 100) 1.741 -> 1.740 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.98% +1.32%] index_add_ strided 7 : Elapsed 0.017 ms (1.741 ms / 100) 1.696 -> 1.699 ( +0.18%) [ +0.00% +0.00% +0.12% / +0.18% +1.47% +1.30%] index_copy_ strided 7 : Elapsed 0.017 ms (1.696 ms / 100) 1.740 -> 1.745 ( +0.29%) [ +0.00% +0.23% +0.29% / +0.29% +1.26% +1.21%] index_add_ perm : Elapsed 0.017 ms (1.740 ms / 100) 1.698 -> 1.702 ( +0.24%) [ +0.00% +0.18% +0.24% / +0.24% +1.18% +0.94%] index_copy_ perm : Elapsed 0.017 ms (1.698 ms / 100) 1.742 -> 1.741 ( -0.06%) [ +0.17% +0.00% +0.34% / -0.06% +1.03% +0.98%] index_add_ perm_sorted : Elapsed 0.017 ms (1.745 ms / 100) 1.700 -> 1.700 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +1.06% +0.88%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.700 ms / 100) 8.223 -> 8.226 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.35% +0.06%] index_select const : Elapsed 0.082 ms (8.223 ms / 100) 8.266 -> 8.254 ( -0.15%) [ +0.00% +0.13% +0.04% / +0.04% +0.04% -0.15%] index_select wrap : Elapsed 0.083 ms (8.266 ms / 100) 8.243 -> 8.242 ( -0.01%) [ +0.02% +0.00% +0.18% / +0.29% -0.01% +0.08%] index_select linear : Elapsed 0.082 ms (8.245 ms / 100) 8.254 -> 8.228 ( -0.31%) [ +0.02% +0.00% +0.10% / -0.31% +0.22% +0.30%] index_select reverse : Elapsed 0.083 ms (8.256 ms / 100) 8.207 -> 8.198 ( -0.11%) [ +0.11% +0.11% +0.00% / -0.11% +0.16% +0.34%] index_select skip64 : Elapsed 0.082 ms (8.216 ms / 100) 8.217 -> 8.219 ( +0.02%) [ +0.12% +0.00% +0.07% / +0.15% +0.02% +0.07%] index_select skip256 : Elapsed 0.082 ms (8.227 ms / 100) 8.244 -> 8.225 ( -0.23%) [ +0.00% +0.07% +0.01% / +0.06% -0.23% -0.12%] index_select spread : Elapsed 0.082 ms (8.244 ms / 100) 8.256 -> 8.258 ( +0.02%) [ +0.25% +0.00% +0.16% / +0.02% +0.23% +0.25%] index_select strided 3 : Elapsed 0.083 ms (8.277 ms / 100) 8.259 -> 8.253 ( -0.07%) [ +0.02% +0.01% +0.00% / -0.07% +0.02% -0.01%] index_select random : Elapsed 0.083 ms (8.261 ms / 100) 8.235 -> 8.237 ( +0.02%) [ +0.12% +0.00% +0.06% / +0.02% +0.27% +0.17%] index_select random_sorted : Elapsed 0.082 ms (8.245 ms / 100) B = [4, 20, 16, 40] (stride (320, 1, 20, 1280)) A = [4, 5, 16, 40] (stride (1, 64, 4, 320)) dim = 1 1.999 -> 1.991 ( -0.40%) [ +0.10% +0.25% +0.00% / +0.05% -0.25% -0.40%] index_add_ linear : Elapsed 0.020 ms (2.001 ms / 100) 1.955 -> 1.945 ( -0.51%) [ +0.10% +0.31% +0.00% / -0.26% -0.31% -0.51%] index_copy_ linear : Elapsed 0.020 ms (1.957 ms / 100) 1.997 -> 1.989 ( -0.40%) [ +0.25% +0.00% +0.20% / +0.00% -0.40% -0.30%] index_add_ reverse : Elapsed 0.020 ms (2.002 ms / 100) 1.952 -> 1.948 ( -0.20%) [ +0.20% +0.05% +0.00% / +0.36% -0.10% -0.20%] index_copy_ reverse : Elapsed 0.020 ms (1.956 ms / 100) 2.016 -> 2.009 ( -0.35%) [ +0.15% +0.00% +0.10% / +0.10% -0.25% -0.35%] index_add_ spread : Elapsed 0.020 ms (2.019 ms / 100) 1.983 -> 1.974 ( -0.45%) [ +0.00% +0.05% +0.10% / +0.10% -0.35% -0.45%] index_copy_ spread : Elapsed 0.020 ms (1.983 ms / 100) 2.014 -> 2.008 ( -0.30%) [ +0.20% +0.15% +0.00% / +0.25% -0.30% -0.20%] index_add_ strided 3 : Elapsed 0.020 ms (2.018 ms / 100) 1.980 -> 1.974 ( -0.30%) [ +0.25% +0.00% +0.10% / +0.15% -0.30% -0.25%] index_copy_ strided 3 : Elapsed 0.020 ms (1.985 ms / 100) 2.018 -> 2.010 ( -0.40%) [ +0.20% +0.05% +0.00% / +0.00% -0.40% -0.10%] index_add_ strided 7 : Elapsed 0.020 ms (2.022 ms / 100) 1.981 -> 1.976 ( -0.25%) [ +0.00% +0.10% +0.20% / +0.30% -0.25% -0.20%] index_copy_ strided 7 : Elapsed 0.020 ms (1.981 ms / 100) 2.011 -> 2.006 ( -0.25%) [ +0.05% +0.00% +0.10% / -0.20% -0.25% -0.20%] index_add_ perm : Elapsed 0.020 ms (2.012 ms / 100) 1.967 -> 1.964 ( -0.15%) [ +0.20% +0.00% +0.25% / +0.51% +0.15% -0.15%] index_copy_ perm : Elapsed 0.020 ms (1.971 ms / 100) 2.007 -> 1.994 ( -0.65%) [ +0.10% +0.40% +0.00% / +0.10% -0.10% -0.65%] index_add_ perm_sorted : Elapsed 0.020 ms (2.009 ms / 100) 1.971 -> 1.960 ( -0.56%) [ +0.05% +0.00% +0.05% / +0.15% -0.56% -0.41%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.972 ms / 100) 8.585 -> 8.588 ( +0.03%) [ +0.12% +0.00% +0.33% / +0.03% +0.45% +0.52%] index_select const : Elapsed 0.086 ms (8.595 ms / 100) 8.621 -> 8.631 ( +0.12%) [ +0.00% +0.13% +0.12% / +0.12% +0.34% +0.38%] index_select wrap : Elapsed 0.086 ms (8.621 ms / 100) 8.621 -> 8.642 ( +0.24%) [ +0.00% +0.08% +0.07% / +0.24% +0.30% +0.29%] index_select linear : Elapsed 0.086 ms (8.621 ms / 100) 8.615 -> 8.631 ( +0.19%) [ +0.22% +0.07% +0.00% / +0.19% +0.57% +0.35%] index_select reverse : Elapsed 0.086 ms (8.634 ms / 100) 8.601 -> 8.598 ( -0.03%) [ +0.08% +0.09% +0.00% / -0.03% +0.21% +0.31%] index_select skip64 : Elapsed 0.086 ms (8.608 ms / 100) 8.599 -> 8.609 ( +0.12%) [ +0.00% +0.00% +0.03% / +0.14% +0.12% +0.21%] index_select skip256 : Elapsed 0.086 ms (8.599 ms / 100) 8.635 -> 8.629 ( -0.07%) [ +0.00% +0.06% +0.10% / -0.07% +0.21% +0.23%] index_select spread : Elapsed 0.086 ms (8.635 ms / 100) 8.631 -> 8.623 ( -0.09%) [ +0.00% +0.01% +0.14% / -0.09% +0.22% +0.57%] index_select strided 3 : Elapsed 0.086 ms (8.631 ms / 100) 8.637 -> 8.643 ( +0.07%) [ +0.23% +0.00% +0.06% / +0.07% +0.46% +0.31%] index_select random : Elapsed 0.087 ms (8.657 ms / 100) 8.642 -> 8.659 ( +0.20%) [ +0.00% +0.09% +0.14% / +0.20% +0.20% +0.21%] index_select random_sorted : Elapsed 0.086 ms (8.642 ms / 100) B = [4, 20, 16, 40] (stride (16, 64, 1, 1280)) A = [4, 5, 16, 40] (stride (16, 64, 1, 320)) dim = 1 1.966 -> 1.967 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.10% +0.15% +0.05%] index_add_ linear : Elapsed 0.020 ms (1.968 ms / 100) 1.922 -> 1.925 ( +0.16%) [ +0.00% +0.00% +0.31% / +0.36% +0.42% +0.16%] index_copy_ linear : Elapsed 0.019 ms (1.922 ms / 100) 1.971 -> 1.975 ( +0.20%) [ +0.15% +0.00% +0.00% / +0.20% +0.36% +0.25%] index_add_ reverse : Elapsed 0.020 ms (1.974 ms / 100) 1.933 -> 1.934 ( +0.05%) [ +0.00% +0.05% +0.52% / +0.36% +0.21% +0.05%] index_copy_ reverse : Elapsed 0.019 ms (1.933 ms / 100) 1.962 -> 1.968 ( +0.31%) [ +0.00% +0.00% +0.25% / +0.31% +0.46% +0.76%] index_add_ spread : Elapsed 0.020 ms (1.962 ms / 100) 1.921 -> 1.923 ( +0.10%) [ +0.00% +0.00% +0.31% / +0.10% +0.62% +0.94%] index_copy_ spread : Elapsed 0.019 ms (1.921 ms / 100) 1.960 -> 1.962 ( +0.10%) [ +0.00% +0.26% +0.46% / +0.10% +0.82% +0.92%] index_add_ strided 3 : Elapsed 0.020 ms (1.960 ms / 100) 1.924 -> 1.928 ( +0.21%) [ +0.00% +0.21% +0.26% / +0.21% +0.94% +1.09%] index_copy_ strided 3 : Elapsed 0.019 ms (1.924 ms / 100) 1.973 -> 1.976 ( +0.15%) [ +0.00% +0.15% +0.25% / +0.15% +0.76% +0.61%] index_add_ strided 7 : Elapsed 0.020 ms (1.973 ms / 100) 1.935 -> 1.936 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.47% +0.36%] index_copy_ strided 7 : Elapsed 0.019 ms (1.936 ms / 100) 1.962 -> 1.967 ( +0.25%) [ +0.10% +0.00% +0.15% / +0.25% +1.43% +1.53%] index_add_ perm : Elapsed 0.020 ms (1.964 ms / 100) 1.921 -> 1.933 ( +0.62%) [ +0.00% +0.21% +0.42% / +0.62% +1.72% +1.67%] index_copy_ perm : Elapsed 0.019 ms (1.921 ms / 100) 1.952 -> 1.962 ( +0.51%) [ +0.05% +0.00% +0.26% / +0.51% +1.84% +1.43%] index_add_ perm_sorted : Elapsed 0.020 ms (1.953 ms / 100) 1.911 -> 1.919 ( +0.42%) [ +0.21% +0.00% +0.16% / +0.42% +1.88% +1.78%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.915 ms / 100) 8.548 -> 8.544 ( -0.05%) [ +0.14% +0.00% +0.11% / +0.15% +0.05% -0.05%] index_select const : Elapsed 0.086 ms (8.560 ms / 100) 8.562 -> 8.571 ( +0.11%) [ +0.00% +0.19% +0.07% / +0.34% +0.11% +0.40%] index_select wrap : Elapsed 0.086 ms (8.562 ms / 100) 8.580 -> 8.558 ( -0.26%) [ +0.02% +0.06% +0.00% / -0.26% +0.21% +0.36%] index_select linear : Elapsed 0.086 ms (8.582 ms / 100) 8.569 -> 8.574 ( +0.06%) [ +0.33% +0.00% +0.05% / +0.12% +0.06% +0.14%] index_select reverse : Elapsed 0.086 ms (8.597 ms / 100) 8.540 -> 8.534 ( -0.07%) [ +0.00% +0.20% +0.15% / -0.07% +0.00% +0.48%] index_select skip64 : Elapsed 0.085 ms (8.540 ms / 100) 8.543 -> 8.555 ( +0.14%) [ +0.05% +0.00% +0.14% / +0.14% +0.29% +0.15%] index_select skip256 : Elapsed 0.085 ms (8.547 ms / 100) 8.580 -> 8.606 ( +0.30%) [ +0.01% +0.00% +0.15% / +0.35% +0.30% +0.30%] index_select spread : Elapsed 0.086 ms (8.581 ms / 100) 8.561 -> 8.572 ( +0.13%) [ +0.00% +0.16% +0.05% / +0.32% +0.13% +0.21%] index_select strided 3 : Elapsed 0.086 ms (8.561 ms / 100) 8.578 -> 8.593 ( +0.17%) [ +0.00% +0.17% +0.07% / +0.17% +0.21% +0.24%] index_select random : Elapsed 0.086 ms (8.578 ms / 100) 8.575 -> 8.580 ( +0.06%) [ +0.24% +0.16% +0.00% / +0.06% +0.30% +0.16%] index_select random_sorted : Elapsed 0.086 ms (8.596 ms / 100) out_shape = [4, 5, 20, 40] in_shape = [4, 5, 16, 40] idx_dim = 2 B = [4, 5, 20, 40] (stride (4000, 800, 40, 1)) A = [4, 5, 16, 40] (stride (640, 2560, 1, 16)) dim = 2 3.885 -> 3.887 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.88% +0.88%] index_add_ linear : Elapsed 0.039 ms (3.886 ms / 100) 3.740 -> 3.742 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.75% +0.59%] index_copy_ linear : Elapsed 0.037 ms (3.740 ms / 100) 3.888 -> 3.888 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.85% +0.80%] index_add_ reverse : Elapsed 0.039 ms (3.888 ms / 100) 3.745 -> 3.746 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.83% +0.85%] index_copy_ reverse : Elapsed 0.037 ms (3.747 ms / 100) 3.890 -> 3.904 ( +0.36%) [ +0.15% +0.10% +0.00% / +0.36% +0.82% +0.85%] index_add_ spread : Elapsed 0.039 ms (3.896 ms / 100) 3.747 -> 3.749 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.64% +0.64%] index_copy_ spread : Elapsed 0.037 ms (3.749 ms / 100) 3.873 -> 3.875 ( +0.05%) [ +0.03% +0.00% +0.05% / +0.05% +0.85% +0.93%] index_add_ strided 3 : Elapsed 0.039 ms (3.874 ms / 100) 3.731 -> 3.732 ( +0.03%) [ +0.16% +0.00% +0.03% / +0.03% +0.80% +0.72%] index_copy_ strided 3 : Elapsed 0.037 ms (3.737 ms / 100) 3.875 -> 3.876 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +1.08% +0.93%] index_add_ strided 7 : Elapsed 0.039 ms (3.876 ms / 100) 3.733 -> 3.734 ( +0.03%) [ +0.05% +0.11% +0.00% / +0.03% +0.88% +0.67%] index_copy_ strided 7 : Elapsed 0.037 ms (3.735 ms / 100) 3.892 -> 3.894 ( +0.05%) [ +0.23% +0.18% +0.00% / +0.05% +0.77% +0.80%] index_add_ perm : Elapsed 0.039 ms (3.901 ms / 100) 3.750 -> 3.750 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.45% +0.45%] index_copy_ perm : Elapsed 0.038 ms (3.752 ms / 100) 3.888 -> 3.889 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.80% +0.90%] index_add_ perm_sorted : Elapsed 0.039 ms (3.889 ms / 100) 3.746 -> 3.750 ( +0.11%) [ +0.00% +0.03% +0.05% / +0.11% +0.83% +0.77%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.746 ms / 100) 5.461 -> 5.463 ( +0.04%) [ +0.18% +0.02% +0.00% / +0.04% +0.26% +0.09%] index_select const : Elapsed 0.055 ms (5.471 ms / 100) 5.472 -> 5.471 ( -0.02%) [ +0.04% +0.13% +0.00% / -0.02% +0.07% -0.02%] index_select wrap : Elapsed 0.055 ms (5.474 ms / 100) 5.470 -> 5.472 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.11% +0.15%] index_select linear : Elapsed 0.055 ms (5.472 ms / 100) 5.463 -> 5.464 ( +0.02%) [ +0.22% +0.18% +0.00% / +0.02% +0.18% +0.15%] index_select reverse : Elapsed 0.055 ms (5.475 ms / 100) 5.465 -> 5.464 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.22% -0.02%] index_select skip64 : Elapsed 0.055 ms (5.465 ms / 100) 5.463 -> 5.465 ( +0.04%) [ +0.00% +0.04% +0.05% / +0.04% +0.24% +0.05%] index_select skip256 : Elapsed 0.055 ms (5.463 ms / 100) 5.471 -> 5.468 ( -0.05%) [ +0.00% +0.11% +0.00% / +0.09% -0.05% +0.11%] index_select spread : Elapsed 0.055 ms (5.471 ms / 100) 5.475 -> 5.465 ( -0.18%) [ +0.00% +0.09% +0.16% / +0.04% -0.18% -0.13%] index_select strided 3 : Elapsed 0.055 ms (5.475 ms / 100) 5.471 -> 5.476 ( +0.09%) [ +0.15% +0.00% +0.02% / +0.09% +0.11% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.479 ms / 100) 5.464 -> 5.471 ( +0.13%) [ +0.00% +0.27% +0.33% / +0.26% +0.13% +0.20%] index_select strided 7 : Elapsed 0.055 ms (5.464 ms / 100) 5.472 -> 5.470 ( -0.04%) [ +0.02% +0.02% +0.00% / -0.04% +0.07% +0.02%] index_select strided 8 : Elapsed 0.055 ms (5.473 ms / 100) 5.473 -> 5.468 ( -0.09%) [ +0.00% +0.11% +0.04% / +0.11% -0.09% -0.07%] index_select random : Elapsed 0.055 ms (5.473 ms / 100) 5.473 -> 5.469 ( -0.07%) [ +0.09% +0.07% +0.00% / -0.07% +0.09% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.478 ms / 100) B = [4, 5, 20, 40] (stride (4000, 800, 40, 1)) A = [4, 5, 16, 40] (stride (40, 2560, 160, 1)) dim = 2 3.635 -> 3.635 ( +0.00%) [ +0.03% +0.08% +0.00% / +0.00% +0.80% +0.83%] index_add_ linear : Elapsed 0.036 ms (3.636 ms / 100) 3.503 -> 3.504 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.94% +0.86%] index_copy_ linear : Elapsed 0.035 ms (3.504 ms / 100) 3.631 -> 3.631 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.74% +0.88%] index_add_ reverse : Elapsed 0.036 ms (3.633 ms / 100) 3.502 -> 3.503 ( +0.03%) [ +0.09% +0.03% +0.00% / +0.03% +0.83% +0.83%] index_copy_ reverse : Elapsed 0.035 ms (3.505 ms / 100) 3.630 -> 3.631 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.77% +0.80%] index_add_ spread : Elapsed 0.036 ms (3.631 ms / 100) 3.512 -> 3.512 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.71% +0.71%] index_copy_ spread : Elapsed 0.035 ms (3.514 ms / 100) 3.630 -> 3.631 ( +0.03%) [ +0.08% +0.06% +0.00% / +0.03% +0.96% +0.94%] index_add_ strided 3 : Elapsed 0.036 ms (3.633 ms / 100) 3.498 -> 3.499 ( +0.03%) [ +0.06% +0.09% +0.00% / +0.03% +0.91% +0.94%] index_copy_ strided 3 : Elapsed 0.035 ms (3.500 ms / 100) 3.632 -> 3.633 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.91% +0.83%] index_add_ strided 7 : Elapsed 0.036 ms (3.632 ms / 100) 3.503 -> 3.503 ( +0.00%) [ +0.03% +0.09% +0.00% / +0.00% +0.77% +0.80%] index_copy_ strided 7 : Elapsed 0.035 ms (3.504 ms / 100) 3.638 -> 3.639 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.77% +0.74%] index_add_ perm : Elapsed 0.036 ms (3.641 ms / 100) 3.504 -> 3.507 ( +0.09%) [ +0.03% +0.03% +0.00% / +0.09% +0.83% +0.83%] index_copy_ perm : Elapsed 0.035 ms (3.505 ms / 100) 3.631 -> 3.633 ( +0.06%) [ +0.00% +0.11% +0.06% / +0.06% +0.80% +0.88%] index_add_ perm_sorted : Elapsed 0.036 ms (3.631 ms / 100) 3.501 -> 3.502 ( +0.03%) [ +0.00% +0.03% +0.09% / +0.03% +0.89% +0.77%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.501 ms / 100) 5.464 -> 5.467 ( +0.05%) [ +0.11% +0.16% +0.00% / +0.20% +0.20% +0.05%] index_select const : Elapsed 0.055 ms (5.470 ms / 100) 5.483 -> 5.480 ( -0.05%) [ +0.11% +0.05% +0.00% / -0.05% +0.15% +0.00%] index_select wrap : Elapsed 0.055 ms (5.489 ms / 100) 5.480 -> 5.481 ( +0.02%) [ +0.04% +0.00% +0.02% / +0.02% +0.04% +0.20%] index_select linear : Elapsed 0.055 ms (5.482 ms / 100) 5.477 -> 5.481 ( +0.07%) [ +0.11% +0.13% +0.00% / +0.07% +0.27% +0.24%] index_select reverse : Elapsed 0.055 ms (5.483 ms / 100) 5.463 -> 5.467 ( +0.07%) [ +0.24% +0.00% +0.04% / +0.15% +0.18% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.476 ms / 100) 5.469 -> 5.469 ( +0.00%) [ +0.09% +0.02% +0.00% / +0.02% +0.00% +0.07%] index_select skip256 : Elapsed 0.055 ms (5.474 ms / 100) 5.473 -> 5.476 ( +0.05%) [ +0.26% +0.04% +0.00% / +0.05% +0.24% +0.29%] index_select spread : Elapsed 0.055 ms (5.487 ms / 100) 5.484 -> 5.483 ( -0.02%) [ +0.02% +0.16% +0.00% / -0.02% +0.18% +0.05%] index_select strided 3 : Elapsed 0.055 ms (5.485 ms / 100) 5.481 -> 5.481 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.07% +0.00% +0.13%] index_select strided 5 : Elapsed 0.055 ms (5.482 ms / 100) 5.478 -> 5.483 ( +0.09%) [ +0.02% +0.00% +0.09% / +0.09% +0.18% +0.22%] index_select strided 7 : Elapsed 0.055 ms (5.479 ms / 100) 5.466 -> 5.471 ( +0.09%) [ +0.18% +0.15% +0.00% / +0.09% +0.29% +0.20%] index_select strided 8 : Elapsed 0.055 ms (5.476 ms / 100) 5.478 -> 5.480 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.05% +0.09% +0.04%] index_select random : Elapsed 0.055 ms (5.478 ms / 100) 5.474 -> 5.478 ( +0.07%) [ +0.27% +0.00% +0.13% / +0.07% +0.15% +0.13%] index_select random_sorted : Elapsed 0.055 ms (5.489 ms / 100) B = [4, 5, 20, 40] (stride (4000, 800, 40, 1)) A = [4, 5, 16, 40] (stride (1, 4, 800, 20)) dim = 2 3.952 -> 3.954 ( +0.05%) [ +0.03% +0.00% +0.15% / +0.05% +0.78% +0.58%] index_add_ linear : Elapsed 0.040 ms (3.953 ms / 100) 3.793 -> 3.793 ( +0.00%) [ +0.03% +0.00% +0.11% / +0.00% +0.74% +0.53%] index_copy_ linear : Elapsed 0.038 ms (3.794 ms / 100) 3.959 -> 3.964 ( +0.13%) [ +0.15% +0.20% +0.00% / +0.13% +0.78% +0.76%] index_add_ reverse : Elapsed 0.040 ms (3.965 ms / 100) 3.798 -> 3.803 ( +0.13%) [ +0.18% +0.21% +0.00% / +0.13% +0.76% +0.76%] index_copy_ reverse : Elapsed 0.038 ms (3.805 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.05% +0.15% +0.00% / +0.00% +0.79% +0.64%] index_add_ spread : Elapsed 0.039 ms (3.936 ms / 100) 3.777 -> 3.776 ( -0.03%) [ +0.00% +0.00% +0.05% / -0.03% +0.74% +0.69%] index_copy_ spread : Elapsed 0.038 ms (3.777 ms / 100) 3.910 -> 3.920 ( +0.26%) [ +0.36% +0.00% +0.03% / +0.26% +0.90% +0.87%] index_add_ strided 3 : Elapsed 0.039 ms (3.924 ms / 100) 3.767 -> 3.767 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.80% +0.80%] index_copy_ strided 3 : Elapsed 0.038 ms (3.768 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.08% +0.00% +0.05% / +0.05% +0.61% +0.51%] index_add_ strided 7 : Elapsed 0.039 ms (3.923 ms / 100) 3.767 -> 3.770 ( +0.08%) [ +0.00% +0.11% +0.11% / +0.08% +0.69% +0.72%] index_copy_ strided 7 : Elapsed 0.038 ms (3.767 ms / 100) 3.931 -> 3.931 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.69% +0.66%] index_add_ perm : Elapsed 0.039 ms (3.937 ms / 100) 3.777 -> 3.777 ( +0.00%) [ +0.05% +0.00% +0.03% / +0.00% +0.66% +0.66%] index_copy_ perm : Elapsed 0.038 ms (3.779 ms / 100) 3.961 -> 3.968 ( +0.18%) [ +0.23% +0.00% +0.00% / +0.18% +0.68% +0.81%] index_add_ perm_sorted : Elapsed 0.040 ms (3.970 ms / 100) 3.798 -> 3.806 ( +0.21%) [ +0.24% +0.03% +0.00% / +0.21% +0.68% +0.90%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.807 ms / 100) 5.468 -> 5.465 ( -0.05%) [ +0.02% +0.07% +0.00% / -0.05% +0.04% +0.02%] index_select const : Elapsed 0.055 ms (5.469 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.04% +0.09% +0.11%] index_select wrap : Elapsed 0.055 ms (5.478 ms / 100) 5.473 -> 5.476 ( +0.05%) [ +0.00% +0.22% +0.18% / +0.18% +0.13% +0.05%] index_select linear : Elapsed 0.055 ms (5.473 ms / 100) 5.471 -> 5.475 ( +0.07%) [ +0.00% +0.07% +0.11% / +0.24% +0.07% +0.22%] index_select reverse : Elapsed 0.055 ms (5.471 ms / 100) 5.468 -> 5.465 ( -0.05%) [ +0.07% +0.09% +0.00% / -0.05% +0.24% +0.13%] index_select skip64 : Elapsed 0.055 ms (5.472 ms / 100) 5.460 -> 5.471 ( +0.20%) [ +0.02% +0.00% +0.24% / +0.20% +0.29% +0.29%] index_select skip256 : Elapsed 0.055 ms (5.461 ms / 100) 5.478 -> 5.479 ( +0.02%) [ +0.00% +0.07% +0.00% / +0.05% +0.02% +0.04%] index_select spread : Elapsed 0.055 ms (5.478 ms / 100) 5.474 -> 5.478 ( +0.07%) [ +0.24% +0.05% +0.00% / +0.16% +0.07% +0.07%] index_select strided 3 : Elapsed 0.055 ms (5.487 ms / 100) 5.480 -> 5.477 ( -0.05%) [ +0.00% +0.11% +0.05% / -0.05% +0.04% -0.02%] index_select strided 5 : Elapsed 0.055 ms (5.480 ms / 100) 5.483 -> 5.476 ( -0.13%) [ +0.00% +0.04% +0.05% / -0.13% -0.04% -0.11%] index_select strided 7 : Elapsed 0.055 ms (5.483 ms / 100) 5.466 -> 5.467 ( +0.02%) [ +0.00% +0.11% +0.07% / +0.02% +0.07% +0.04%] index_select strided 8 : Elapsed 0.055 ms (5.466 ms / 100) 5.476 -> 5.470 ( -0.11%) [ +0.00% +0.00% +0.09% / -0.11% -0.07% -0.02%] index_select random : Elapsed 0.055 ms (5.476 ms / 100) 5.472 -> 5.471 ( -0.02%) [ +0.00% +0.16% +0.04% / +0.07% -0.02% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.472 ms / 100) B = [4, 5, 20, 40] (stride (800, 3200, 1, 20)) A = [4, 5, 16, 40] (stride (3200, 640, 40, 1)) dim = 2 3.633 -> 3.634 ( +0.03%) [ +0.06% +0.08% +0.00% / +0.03% +0.91% +0.83%] index_add_ linear : Elapsed 0.036 ms (3.635 ms / 100) 3.507 -> 3.507 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.80% +0.80%] index_copy_ linear : Elapsed 0.035 ms (3.507 ms / 100) 3.632 -> 3.632 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.83% +0.85%] index_add_ reverse : Elapsed 0.036 ms (3.633 ms / 100) 3.502 -> 3.503 ( +0.03%) [ +0.00% +0.09% +0.03% / +0.03% +0.83% +0.86%] index_copy_ reverse : Elapsed 0.035 ms (3.502 ms / 100) 3.632 -> 3.633 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.80% +0.94%] index_add_ spread : Elapsed 0.036 ms (3.632 ms / 100) 3.513 -> 3.513 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.71% +0.74%] index_copy_ spread : Elapsed 0.035 ms (3.513 ms / 100) 3.625 -> 3.625 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.83% +0.83%] index_add_ strided 3 : Elapsed 0.036 ms (3.627 ms / 100) 3.497 -> 3.497 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.94% +0.89%] index_copy_ strided 3 : Elapsed 0.035 ms (3.499 ms / 100) 3.632 -> 3.632 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.91% +0.85%] index_add_ strided 7 : Elapsed 0.036 ms (3.633 ms / 100) 3.504 -> 3.505 ( +0.03%) [ +0.00% +0.09% +0.06% / +0.03% +0.77% +0.77%] index_copy_ strided 7 : Elapsed 0.035 ms (3.504 ms / 100) 3.636 -> 3.635 ( -0.03%) [ +0.00% +0.06% +0.06% / -0.03% +0.80% +0.80%] index_add_ perm : Elapsed 0.036 ms (3.636 ms / 100) 3.508 -> 3.508 ( +0.00%) [ +0.00% +0.03% +0.06% / +0.00% +0.80% +0.80%] index_copy_ perm : Elapsed 0.035 ms (3.508 ms / 100) 3.627 -> 3.627 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.77% +0.69%] index_add_ perm_sorted : Elapsed 0.036 ms (3.629 ms / 100) 3.498 -> 3.507 ( +0.26%) [ +0.06% +0.03% +0.00% / +0.26% +0.97% +0.94%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.500 ms / 100) 5.481 -> 5.476 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.04% -0.09% +0.05%] index_select const : Elapsed 0.055 ms (5.481 ms / 100) 5.498 -> 5.498 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.00% +0.05% +0.07%] index_select wrap : Elapsed 0.055 ms (5.500 ms / 100) 5.495 -> 5.495 ( +0.00%) [ +0.04% +0.00% +0.07% / +0.00% +0.29% +0.16%] index_select linear : Elapsed 0.055 ms (5.497 ms / 100) 5.494 -> 5.493 ( -0.02%) [ +0.09% +0.00% +0.04% / -0.02% +0.02% +0.04%] index_select reverse : Elapsed 0.055 ms (5.499 ms / 100) 5.480 -> 5.479 ( -0.02%) [ +0.07% +0.00% +0.09% / -0.02% +0.09% +0.26%] index_select skip64 : Elapsed 0.055 ms (5.484 ms / 100) 5.482 -> 5.473 ( -0.16%) [ +0.07% +0.00% +0.13% / -0.16% +0.07% +0.09%] index_select skip256 : Elapsed 0.055 ms (5.486 ms / 100) 5.496 -> 5.499 ( +0.05%) [ +0.09% +0.00% +0.13% / +0.11% +0.18% +0.05%] index_select spread : Elapsed 0.055 ms (5.501 ms / 100) 5.492 -> 5.495 ( +0.05%) [ +0.16% +0.13% +0.00% / +0.05% +0.15% +0.20%] index_select strided 3 : Elapsed 0.055 ms (5.501 ms / 100) 5.498 -> 5.494 ( -0.07%) [ +0.02% +0.00% +0.00% / -0.07% +0.04% +0.25%] index_select strided 5 : Elapsed 0.055 ms (5.499 ms / 100) 5.497 -> 5.494 ( -0.05%) [ +0.11% +0.11% +0.00% / -0.05% +0.16% +0.07%] index_select strided 7 : Elapsed 0.055 ms (5.503 ms / 100) 5.477 -> 5.478 ( +0.02%) [ +0.00% +0.05% +0.11% / +0.02% +0.26% +0.26%] index_select strided 8 : Elapsed 0.055 ms (5.477 ms / 100) 5.488 -> 5.494 ( +0.11%) [ +0.15% +0.11% +0.00% / +0.15% +0.11% +0.13%] index_select random : Elapsed 0.055 ms (5.496 ms / 100) 5.491 -> 5.486 ( -0.09%) [ +0.04% +0.00% +0.02% / -0.09% +0.11% +0.20%] index_select random_sorted : Elapsed 0.055 ms (5.493 ms / 100) B = [4, 5, 20, 40] (stride (800, 3200, 1, 20)) A = [4, 5, 16, 40] (stride (5, 1, 800, 20)) dim = 2 3.957 -> 3.959 ( +0.05%) [ +0.00% +0.18% +0.15% / +0.05% +0.86% +0.68%] index_add_ linear : Elapsed 0.040 ms (3.957 ms / 100) 3.804 -> 3.803 ( -0.03%) [ +0.00% +0.18% +0.18% / -0.03% +0.92% +0.71%] index_copy_ linear : Elapsed 0.038 ms (3.804 ms / 100) 3.956 -> 3.960 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +0.76% +0.81%] index_add_ reverse : Elapsed 0.040 ms (3.960 ms / 100) 3.800 -> 3.802 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.79% +0.79%] index_copy_ reverse : Elapsed 0.038 ms (3.800 ms / 100) 3.913 -> 3.921 ( +0.20%) [ +0.00% +0.28% +0.00% / +0.20% +0.66% +0.92%] index_add_ spread : Elapsed 0.039 ms (3.913 ms / 100) 3.763 -> 3.772 ( +0.24%) [ +0.08% +0.19% +0.00% / +0.24% +0.85% +0.96%] index_copy_ spread : Elapsed 0.038 ms (3.766 ms / 100) 3.914 -> 3.918 ( +0.10%) [ +0.20% +0.00% +0.20% / +0.10% +0.69% +0.89%] index_add_ strided 3 : Elapsed 0.039 ms (3.922 ms / 100) 3.766 -> 3.771 ( +0.13%) [ +0.16% +0.00% +0.13% / +0.13% +0.88% +1.04%] index_copy_ strided 3 : Elapsed 0.038 ms (3.772 ms / 100) 3.917 -> 3.924 ( +0.18%) [ +0.20% +0.15% +0.00% / +0.18% +0.69% +0.66%] index_add_ strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.775 -> 3.775 ( +0.00%) [ +0.08% +0.11% +0.00% / +0.00% +0.74% +0.69%] index_copy_ strided 7 : Elapsed 0.038 ms (3.778 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.66% +0.66%] index_add_ perm : Elapsed 0.039 ms (3.923 ms / 100) 3.772 -> 3.774 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.72% +0.69%] index_copy_ perm : Elapsed 0.038 ms (3.774 ms / 100) 3.955 -> 3.963 ( +0.20%) [ +0.15% +0.13% +0.00% / +0.20% +0.81% +0.78%] index_add_ perm_sorted : Elapsed 0.040 ms (3.961 ms / 100) 3.800 -> 3.803 ( +0.08%) [ +0.00% +0.03% +0.08% / +0.08% +0.79% +0.71%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.800 ms / 100) 5.476 -> 5.479 ( +0.05%) [ +0.00% +0.16% +0.07% / +0.16% +0.05% +0.11%] index_select const : Elapsed 0.055 ms (5.476 ms / 100) 5.495 -> 5.495 ( +0.00%) [ +0.07% +0.00% +0.04% / +0.04% +0.00% +0.05%] index_select wrap : Elapsed 0.055 ms (5.499 ms / 100) 5.494 -> 5.491 ( -0.05%) [ +0.00% +0.05% +0.18% / +0.00% -0.05% +0.13%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.497 -> 5.490 ( -0.13%) [ +0.00% +0.00% +0.00% / +0.00% -0.13% -0.02%] index_select reverse : Elapsed 0.055 ms (5.497 ms / 100) 5.481 -> 5.485 ( +0.07%) [ +0.00% +0.11% +0.02% / +0.13% +0.18% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.481 ms / 100) 5.481 -> 5.481 ( +0.00%) [ +0.04% +0.07% +0.00% / +0.00% +0.24% +0.18%] index_select skip256 : Elapsed 0.055 ms (5.483 ms / 100) 5.490 -> 5.491 ( +0.02%) [ +0.11% +0.13% +0.00% / +0.02% +0.05% +0.20%] index_select spread : Elapsed 0.055 ms (5.496 ms / 100) 5.503 -> 5.491 ( -0.22%) [ +0.07% +0.04% +0.00% / +0.02% -0.22% -0.15%] index_select strided 3 : Elapsed 0.055 ms (5.507 ms / 100) 5.499 -> 5.492 ( -0.13%) [ +0.16% +0.00% +0.02% / -0.04% -0.09% -0.13%] index_select strided 5 : Elapsed 0.055 ms (5.508 ms / 100) 5.491 -> 5.492 ( +0.02%) [ +0.00% +0.13% +0.07% / +0.02% +0.02% +0.02%] index_select strided 7 : Elapsed 0.055 ms (5.491 ms / 100) 5.476 -> 5.480 ( +0.07%) [ +0.00% +0.02% +0.11% / +0.16% +0.07% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.476 ms / 100) 5.495 -> 5.486 ( -0.16%) [ +0.09% +0.07% +0.00% / -0.15% -0.16% -0.15%] index_select random : Elapsed 0.055 ms (5.500 ms / 100) 5.494 -> 5.491 ( -0.05%) [ +0.04% +0.02% +0.00% / +0.15% +0.04% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.496 ms / 100) B = [4, 5, 20, 40] (stride (40, 160, 800, 1)) A = [4, 5, 16, 40] (stride (200, 1, 800, 5)) dim = 2 4.050 -> 4.055 ( +0.12%) [ +0.00% +0.07% +0.02% / +0.12% +0.79% +0.77%] index_add_ linear : Elapsed 0.041 ms (4.050 ms / 100) 3.921 -> 3.917 ( -0.10%) [ +0.00% +0.03% +0.05% / -0.10% +0.61% +0.54%] index_copy_ linear : Elapsed 0.039 ms (3.921 ms / 100) 4.047 -> 4.049 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.74% +0.86%] index_add_ reverse : Elapsed 0.040 ms (4.047 ms / 100) 3.917 -> 3.923 ( +0.15%) [ +0.13% +0.28% +0.00% / +0.15% +0.61% +0.54%] index_copy_ reverse : Elapsed 0.039 ms (3.922 ms / 100) 4.037 -> 4.040 ( +0.07%) [ +0.05% +0.17% +0.00% / +0.07% +0.84% +0.89%] index_add_ spread : Elapsed 0.040 ms (4.039 ms / 100) 3.909 -> 3.913 ( +0.10%) [ +0.36% +0.00% +0.05% / +0.10% +0.87% +0.84%] index_copy_ spread : Elapsed 0.039 ms (3.923 ms / 100) 4.033 -> 4.035 ( +0.05%) [ +0.10% +0.00% +0.02% / +0.05% +0.79% +0.82%] index_add_ strided 3 : Elapsed 0.040 ms (4.037 ms / 100) 3.907 -> 3.902 ( -0.13%) [ +0.00% +0.05% +0.18% / -0.13% +0.46% +0.41%] index_copy_ strided 3 : Elapsed 0.039 ms (3.907 ms / 100) 4.049 -> 4.045 ( -0.10%) [ +0.02% +0.02% +0.00% / -0.10% +0.79% +0.72%] index_add_ strided 7 : Elapsed 0.040 ms (4.050 ms / 100) 3.915 -> 3.918 ( +0.08%) [ +0.00% +0.03% +0.13% / +0.08% +0.61% +0.64%] index_copy_ strided 7 : Elapsed 0.039 ms (3.915 ms / 100) 4.049 -> 4.053 ( +0.10%) [ +0.15% +0.00% +0.12% / +0.10% +0.77% +0.79%] index_add_ perm : Elapsed 0.041 ms (4.055 ms / 100) 3.917 -> 3.922 ( +0.13%) [ +0.08% +0.13% +0.00% / +0.13% +0.74% +0.71%] index_copy_ perm : Elapsed 0.039 ms (3.920 ms / 100) 4.032 -> 4.032 ( +0.00%) [ +0.12% +0.10% +0.00% / +0.00% +0.69% +0.72%] index_add_ perm_sorted : Elapsed 0.040 ms (4.037 ms / 100) 3.899 -> 3.922 ( +0.59%) [ +0.00% +0.13% +0.44% / +0.59% +0.72% +0.67%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.899 ms / 100) 5.556 -> 5.550 ( -0.11%) [ +0.05% +0.00% +0.13% / -0.02% -0.11% +0.07%] index_select const : Elapsed 0.056 ms (5.559 ms / 100) 5.561 -> 5.568 ( +0.13%) [ +0.00% +0.02% +0.02% / +0.13% +0.13% +0.16%] index_select wrap : Elapsed 0.056 ms (5.561 ms / 100) 5.554 -> 5.553 ( -0.02%) [ +0.11% +0.00% +0.05% / -0.02% +0.14% +0.32%] index_select linear : Elapsed 0.056 ms (5.560 ms / 100) 5.559 -> 5.561 ( +0.04%) [ +0.00% +0.07% +0.07% / +0.04% +0.16% +0.07%] index_select reverse : Elapsed 0.056 ms (5.559 ms / 100) 5.558 -> 5.555 ( -0.05%) [ +0.16% +0.00% +0.02% / -0.05% +0.04% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.567 ms / 100) 5.553 -> 5.556 ( +0.05%) [ +0.04% +0.00% +0.07% / +0.05% +0.09% +0.11%] index_select skip256 : Elapsed 0.056 ms (5.555 ms / 100) 5.563 -> 5.562 ( -0.02%) [ +0.04% +0.04% +0.00% / -0.02% -0.02% +0.05%] index_select spread : Elapsed 0.056 ms (5.565 ms / 100) 5.558 -> 5.560 ( +0.04%) [ +0.00% +0.05% +0.11% / +0.04% +0.20% +0.20%] index_select strided 3 : Elapsed 0.056 ms (5.558 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.04% +0.00% +0.02% / +0.00% +0.02% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.563 ms / 100) 5.558 -> 5.561 ( +0.05%) [ +0.09% +0.07% +0.00% / +0.05% +0.25% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.563 ms / 100) 5.554 -> 5.555 ( +0.02%) [ +0.00% +0.00% +0.13% / +0.02% +0.05% +0.07%] index_select strided 8 : Elapsed 0.056 ms (5.554 ms / 100) 5.563 -> 5.557 ( -0.11%) [ +0.00% +0.04% +0.16% / +0.00% -0.11% +0.02%] index_select random : Elapsed 0.056 ms (5.563 ms / 100) 5.560 -> 5.561 ( +0.02%) [ +0.00% +0.05% +0.11% / +0.02% +0.04% +0.11%] index_select random_sorted : Elapsed 0.056 ms (5.560 ms / 100) B = [4, 5, 20, 40] (stride (1, 4, 800, 20)) A = [4, 5, 16, 40] (stride (3200, 40, 200, 1)) dim = 2 3.936 -> 3.937 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.71% +0.69%] index_add_ linear : Elapsed 0.039 ms (3.938 ms / 100) 3.810 -> 3.812 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.81% +0.76%] index_copy_ linear : Elapsed 0.038 ms (3.812 ms / 100) 3.942 -> 3.942 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.76% +0.76%] index_add_ reverse : Elapsed 0.039 ms (3.944 ms / 100) 3.815 -> 3.815 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.81% +0.76%] index_copy_ reverse : Elapsed 0.038 ms (3.815 ms / 100) 3.933 -> 3.931 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.74% +0.74%] index_add_ spread : Elapsed 0.039 ms (3.935 ms / 100) 3.806 -> 3.804 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.71% +0.66%] index_copy_ spread : Elapsed 0.038 ms (3.808 ms / 100) 3.935 -> 3.934 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.71% +0.71%] index_add_ strided 3 : Elapsed 0.039 ms (3.935 ms / 100) 3.819 -> 3.819 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.79% +0.65%] index_copy_ strided 3 : Elapsed 0.038 ms (3.819 ms / 100) 3.938 -> 3.938 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.56% +0.56%] index_add_ strided 7 : Elapsed 0.039 ms (3.939 ms / 100) 3.821 -> 3.823 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.68% +0.58%] index_copy_ strided 7 : Elapsed 0.038 ms (3.824 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.74% +0.71%] index_add_ perm : Elapsed 0.039 ms (3.935 ms / 100) 3.806 -> 3.810 ( +0.11%) [ +0.05% +0.00% +0.03% / +0.11% +0.76% +0.68%] index_copy_ perm : Elapsed 0.038 ms (3.808 ms / 100) 3.942 -> 3.944 ( +0.05%) [ +0.08% +0.03% +0.00% / +0.05% +0.71% +0.71%] index_add_ perm_sorted : Elapsed 0.039 ms (3.945 ms / 100) 3.815 -> 3.815 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.68% +0.68%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.815 ms / 100) 5.558 -> 5.560 ( +0.04%) [ +0.05% +0.11% +0.00% / +0.09% +0.04% +0.18%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.571 -> 5.569 ( -0.04%) [ +0.00% +0.05% +0.00% / +0.04% +0.13% -0.04%] index_select wrap : Elapsed 0.056 ms (5.571 ms / 100) 5.574 -> 5.575 ( +0.02%) [ +0.04% +0.00% +0.07% / +0.07% +0.02% +0.02%] index_select linear : Elapsed 0.056 ms (5.576 ms / 100) 5.573 -> 5.569 ( -0.07%) [ +0.04% +0.02% +0.00% / +0.07% -0.07% +0.00%] index_select reverse : Elapsed 0.056 ms (5.575 ms / 100) 5.557 -> 5.563 ( +0.11%) [ +0.14% +0.02% +0.00% / +0.16% +0.16% +0.11%] index_select skip64 : Elapsed 0.056 ms (5.565 ms / 100) 5.557 -> 5.559 ( +0.04%) [ +0.14% +0.00% +0.05% / +0.05% +0.04% +0.09%] index_select skip256 : Elapsed 0.056 ms (5.565 ms / 100) 5.568 -> 5.572 ( +0.07%) [ +0.14% +0.25% +0.00% / +0.07% +0.20% +0.11%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.576 -> 5.573 ( -0.05%) [ +0.00% +0.05% +0.07% / +0.04% +0.05% -0.05%] index_select strided 3 : Elapsed 0.056 ms (5.576 ms / 100) 5.573 -> 5.573 ( +0.00%) [ +0.00% +0.02% +0.04% / +0.00% +0.16% +0.11%] index_select strided 5 : Elapsed 0.056 ms (5.573 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.09% +0.00% +0.05% / +0.00% +0.09% +0.29%] index_select strided 7 : Elapsed 0.056 ms (5.574 ms / 100) 5.556 -> 5.559 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.09% +0.13% +0.05%] index_select strided 8 : Elapsed 0.056 ms (5.560 ms / 100) 5.567 -> 5.566 ( -0.02%) [ +0.00% +0.04% +0.09% / +0.00% +0.02% -0.02%] index_select random : Elapsed 0.056 ms (5.567 ms / 100) 5.570 -> 5.566 ( -0.07%) [ +0.00% +0.09% +0.00% / -0.04% -0.07% -0.07%] index_select random_sorted : Elapsed 0.056 ms (5.570 ms / 100) out_shape = [4, 5, 16, 20] in_shape = [4, 5, 16, 40] idx_dim = 3 B = [4, 5, 16, 20] (stride (1600, 320, 1, 16)) A = [4, 5, 16, 40] (stride (3200, 640, 40, 1)) dim = 3 2.313 -> 2.313 ( +0.00%) [ +0.00% +0.30% +0.09% / +0.00% +0.43% +0.22%] index_select const : Elapsed 0.023 ms (2.313 ms / 100) 2.321 -> 2.322 ( +0.04%) [ +0.04% +0.00% +0.13% / +0.22% +0.04% +0.09%] index_select wrap : Elapsed 0.023 ms (2.322 ms / 100) 2.320 -> 2.319 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.13% -0.04%] index_select linear : Elapsed 0.023 ms (2.320 ms / 100) 2.321 -> 2.319 ( -0.09%) [ +0.04% +0.30% +0.00% / +0.09% -0.09% +0.17%] index_select reverse : Elapsed 0.023 ms (2.322 ms / 100) 2.314 -> 2.315 ( +0.04%) [ +0.22% +0.22% +0.00% / +0.13% +0.04% +0.09%] index_select skip64 : Elapsed 0.023 ms (2.319 ms / 100) 2.313 -> 2.314 ( +0.04%) [ +0.17% +0.00% +0.13% / +0.04% +0.30% +0.30%] index_select skip256 : Elapsed 0.023 ms (2.317 ms / 100) 2.325 -> 2.326 ( +0.04%) [ +0.13% +0.09% +0.00% / +0.04% +0.09% +0.04%] index_select spread : Elapsed 0.023 ms (2.328 ms / 100) 2.324 -> 2.324 ( +0.00%) [ +0.04% +0.17% +0.00% / +0.30% +0.00% +0.30%] index_select strided 3 : Elapsed 0.023 ms (2.325 ms / 100) 2.325 -> 2.325 ( +0.00%) [ +0.00% +0.04% +0.09% / +0.17% +0.00% +0.22%] index_select strided 5 : Elapsed 0.023 ms (2.325 ms / 100) 2.323 -> 2.323 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +0.22% +0.22%] index_select strided 7 : Elapsed 0.023 ms (2.325 ms / 100) 2.325 -> 2.327 ( +0.09%) [ +0.09% +0.04% +0.00% / +0.09% +0.17% +0.26%] index_select strided 8 : Elapsed 0.023 ms (2.327 ms / 100) 2.326 -> 2.329 ( +0.13%) [ +0.09% +0.00% +0.00% / +0.13% +0.13% +0.39%] index_select strided 16 : Elapsed 0.023 ms (2.328 ms / 100) 2.322 -> 2.321 ( -0.04%) [ +0.00% +0.09% +0.30% / -0.04% +0.43% +0.34%] index_select random : Elapsed 0.023 ms (2.322 ms / 100) 2.321 -> 2.327 ( +0.26%) [ +0.22% +0.00% +0.22% / +0.26% +0.26% +0.26%] index_select random_sorted : Elapsed 0.023 ms (2.326 ms / 100) 2.325 -> 2.323 ( -0.09%) [ +0.09% +0.00% +0.26% / -0.09% -0.09% +0.17%] index_select perm : Elapsed 0.023 ms (2.327 ms / 100) 2.326 -> 2.321 ( -0.21%) [ +0.00% +0.13% +0.26% / +0.09% -0.17% -0.21%] index_select perm_sorted : Elapsed 0.023 ms (2.326 ms / 100) B = [4, 5, 16, 20] (stride (1600, 1, 100, 5)) A = [4, 5, 16, 40] (stride (1, 64, 4, 320)) dim = 3 2.403 -> 2.408 ( +0.21%) [ +0.29% +0.29% +0.00% / +0.21% +0.33% +0.29%] index_select const : Elapsed 0.024 ms (2.410 ms / 100) 2.418 -> 2.410 ( -0.33%) [ +0.12% +0.00% +0.04% / -0.08% -0.33% -0.25%] index_select wrap : Elapsed 0.024 ms (2.421 ms / 100) 2.414 -> 2.411 ( -0.12%) [ +0.04% +0.00% +0.08% / -0.04% -0.08% -0.12%] index_select linear : Elapsed 0.024 ms (2.415 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +3.36% +0.21%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.409 -> 2.408 ( -0.04%) [ +0.12% +0.04% +0.00% / +0.12% -0.04% +0.25%] index_select skip64 : Elapsed 0.024 ms (2.412 ms / 100) 2.406 -> 2.411 ( +0.21%) [ +0.04% +0.21% +0.00% / +0.21% +0.33% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.407 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.00% +0.12% +0.00% / -0.04% +0.08% +0.21%] index_select spread : Elapsed 0.024 ms (2.413 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.21% +0.17% +0.00% / +0.04% +0.29% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.417 ms / 100) 2.408 -> 2.406 ( -0.08%) [ +0.21% +0.00% +0.12% / -0.08% +0.12% +0.21%] index_select strided 5 : Elapsed 0.024 ms (2.413 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +1.99% +0.08%] index_select strided 7 : Elapsed 0.024 ms (2.416 ms / 100) 2.407 -> 2.411 ( +0.17%) [ +0.29% +0.12% +0.00% / +0.21% +0.17% +0.29%] index_select strided 8 : Elapsed 0.024 ms (2.414 ms / 100) 2.408 -> 2.410 ( +0.08%) [ +0.00% +0.04% +0.12% / +0.08% +0.37% +0.37%] index_select strided 16 : Elapsed 0.024 ms (2.408 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.17% +0.12% +0.00% / +0.08% +0.17% +0.17%] index_select random : Elapsed 0.024 ms (2.414 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.00% +0.17% +0.21% / +0.08% +0.17% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.412 ms / 100) 2.412 -> 2.411 ( -0.04%) [ +0.21% +0.17% +0.00% / +0.21% -0.04% +0.00%] index_select perm : Elapsed 0.024 ms (2.417 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.00% +0.08% +0.08% / +0.04% -0.04% +0.08%] index_select perm_sorted : Elapsed 0.024 ms (2.413 ms / 100) B = [4, 5, 16, 20] (stride (1600, 16, 1, 80)) A = [4, 5, 16, 40] (stride (200, 40, 800, 1)) dim = 3 1.463 -> 1.465 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.27% +0.34%] index_select const : Elapsed 0.015 ms (1.464 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.27% +0.20%] index_select wrap : Elapsed 0.015 ms (1.476 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.41% +0.54%] index_select linear : Elapsed 0.015 ms (1.477 ms / 100) 1.472 -> 1.479 ( +0.48%) [ +0.27% +0.00% +0.20% / +0.48% +0.68% +0.61%] index_select reverse : Elapsed 0.015 ms (1.476 ms / 100) 1.463 -> 1.466 ( +0.21%) [ +0.00% +0.14% +0.14% / +0.21% +0.48% +0.21%] index_select skip64 : Elapsed 0.015 ms (1.463 ms / 100) 1.464 -> 1.465 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.82% +0.48%] index_select skip256 : Elapsed 0.015 ms (1.464 ms / 100) 1.487 -> 1.489 ( +0.13%) [ +0.07% +0.00% +0.00% / +0.13% +0.40% +1.34%] index_select spread : Elapsed 0.015 ms (1.488 ms / 100) 1.485 -> 1.486 ( +0.07%) [ +0.00% +0.20% +0.13% / +0.07% +0.34% +0.34%] index_select strided 3 : Elapsed 0.015 ms (1.485 ms / 100) 1.485 -> 1.492 ( +0.47%) [ +0.20% +0.07% +0.00% / +0.47% +0.61% +0.61%] index_select strided 5 : Elapsed 0.015 ms (1.488 ms / 100) 1.483 -> 1.484 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.47%] index_select strided 7 : Elapsed 0.015 ms (1.484 ms / 100) 1.484 -> 1.485 ( +0.07%) [ +0.00% +0.13% +0.07% / +0.07% +0.40% +0.54%] index_select strided 8 : Elapsed 0.015 ms (1.484 ms / 100) 1.480 -> 1.483 ( +0.20%) [ +0.27% +0.27% +0.00% / +0.20% +0.74% +0.74%] index_select strided 16 : Elapsed 0.015 ms (1.484 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.20% +0.14% +0.00% / +0.00% +0.74% +0.34%] index_select random : Elapsed 0.015 ms (1.480 ms / 100) 1.482 -> 1.482 ( +0.00%) [ +0.20% +0.00% +0.13% / +0.00% +0.20% +0.47%] index_select random_sorted : Elapsed 0.015 ms (1.485 ms / 100) 1.486 -> 1.486 ( +0.00%) [ +0.07% +0.00% +0.34% / +0.00% +0.67% +0.34%] index_select perm : Elapsed 0.015 ms (1.487 ms / 100) 1.483 -> 1.487 ( +0.27%) [ +0.27% +0.27% +0.00% / +0.27% +0.94% +0.54%] index_select perm_sorted : Elapsed 0.015 ms (1.487 ms / 100) B = [4, 5, 16, 20] (stride (1600, 1, 5, 80)) A = [4, 5, 16, 40] (stride (40, 2560, 160, 1)) dim = 3 2.412 -> 2.413 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.17% +0.04% +0.12%] index_select const : Elapsed 0.024 ms (2.413 ms / 100) 2.423 -> 2.416 ( -0.29%) [ +0.00% +0.04% +0.17% / +0.00% -0.17% -0.29%] index_select wrap : Elapsed 0.024 ms (2.423 ms / 100) 2.423 -> 2.417 ( -0.25%) [ +0.00% +0.21% +0.04% / +0.04% -0.25% -0.12%] index_select linear : Elapsed 0.024 ms (2.423 ms / 100) 2.419 -> 2.421 ( +0.08%) [ +0.00% +0.29% +0.17% / +0.08% +0.12% +0.08%] index_select reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.415 -> 2.414 ( -0.04%) [ +0.08% +0.00% +0.12% / +0.17% +0.04% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.29% +0.17%] index_select skip256 : Elapsed 0.024 ms (2.414 ms / 100) 2.424 -> 2.423 ( -0.04%) [ +0.33% +0.00% +0.08% / -0.04% +0.33% +0.17%] index_select spread : Elapsed 0.024 ms (2.432 ms / 100) 2.424 -> 2.424 ( +0.00%) [ +0.08% +0.00% +0.17% / +0.00% +0.33% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.426 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.25% +0.33%] index_select strided 5 : Elapsed 0.024 ms (2.426 ms / 100) 2.428 -> 2.427 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.16% +0.00%] index_select strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.425 -> 2.430 ( +0.21%) [ +0.00% +0.16% +0.04% / +0.21% +0.33% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.425 ms / 100) 2.426 -> 2.427 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.25% +0.12%] index_select strided 16 : Elapsed 0.024 ms (2.426 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.00% +0.12% +0.17% / +0.08% +0.54% +0.37%] index_select random : Elapsed 0.024 ms (2.424 ms / 100) 2.426 -> 2.424 ( -0.08%) [ +0.00% +0.21% +0.08% / +0.04% +0.08% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.426 ms / 100) 2.425 -> 2.426 ( +0.04%) [ +0.16% +0.00% +0.08% / +0.04% +0.08% +0.33%] index_select perm : Elapsed 0.024 ms (2.429 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.37% +0.08% +0.00% / +0.29% +0.08% +0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.433 ms / 100) B = [4, 5, 16, 20] (stride (1600, 1, 5, 80)) A = [4, 5, 16, 40] (stride (5, 1, 20, 320)) dim = 3 2.405 -> 2.408 ( +0.12%) [ +0.12% +0.17% +0.00% / +0.12% +0.37% +0.21%] index_select const : Elapsed 0.024 ms (2.408 ms / 100) 2.417 -> 2.414 ( -0.12%) [ +0.00% +0.00% +0.04% / +0.25% -0.12% -0.12%] index_select wrap : Elapsed 0.024 ms (2.417 ms / 100) 2.416 -> 2.415 ( -0.04%) [ +0.12% +0.00% +0.12% / +0.12% +0.00% -0.04%] index_select linear : Elapsed 0.024 ms (2.419 ms / 100) 2.418 -> 2.416 ( -0.08%) [ +0.04% +0.12% +0.00% / -0.04% -0.08% -0.04%] index_select reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.409 -> 2.407 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.04% +0.00% -0.08%] index_select skip64 : Elapsed 0.024 ms (2.409 ms / 100) 2.406 -> 2.408 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.21% +0.12%] index_select skip256 : Elapsed 0.024 ms (2.406 ms / 100) 2.418 -> 2.415 ( -0.12%) [ +0.00% +0.04% +0.08% / -0.12% -0.04% -0.04%] index_select spread : Elapsed 0.024 ms (2.418 ms / 100) 2.415 -> 2.411 ( -0.17%) [ +0.21% +0.08% +0.00% / +0.08% -0.17% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.420 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.04% +0.04% +0.00% / +0.08% -0.04% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.414 ms / 100) 2.413 -> 2.411 ( -0.08%) [ +0.04% +0.08% +0.00% / +0.12% +0.00% -0.08%] index_select strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.406 -> 2.406 ( +0.00%) [ +0.29% +0.00% +0.04% / +0.00% +0.00% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.413 ms / 100) 2.408 -> 2.410 ( +0.08%) [ +0.21% +0.08% +0.00% / +0.08% +0.08% +0.08%] index_select strided 16 : Elapsed 0.024 ms (2.413 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.12% +0.12%] index_select random : Elapsed 0.024 ms (2.417 ms / 100) 2.416 -> 2.411 ( -0.21%) [ +0.12% +0.08% +0.00% / +0.08% -0.04% -0.21%] index_select random_sorted : Elapsed 0.024 ms (2.419 ms / 100) 2.416 -> 2.414 ( -0.08%) [ +0.17% +0.00% +0.12% / +0.21% -0.08% +0.00%] index_select perm : Elapsed 0.024 ms (2.420 ms / 100) 2.415 -> 2.407 ( -0.33%) [ +0.29% +0.00% +0.12% / +0.12% -0.33% -0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) B = [4, 5, 16, 20] (stride (1, 1280, 80, 4)) A = [4, 5, 16, 40] (stride (3200, 640, 1, 16)) dim = 3 1.456 -> 1.456 ( +0.00%) [ +0.21% +0.27% +0.00% / +0.00% +0.34% +0.34%] index_select const : Elapsed 0.015 ms (1.459 ms / 100) 1.477 -> 1.476 ( -0.07%) [ +0.14% +0.27% +0.00% / -0.07% +0.20% +0.14%] index_select wrap : Elapsed 0.015 ms (1.479 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.34% +0.00% +0.00% / +0.14% +0.07% +0.20%] index_select linear : Elapsed 0.015 ms (1.481 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.34% +0.14%] index_select reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.456 -> 1.456 ( +0.00%) [ +0.00% +0.21% +0.00% / +0.00% +0.21% +0.34%] index_select skip64 : Elapsed 0.015 ms (1.456 ms / 100) 1.453 -> 1.458 ( +0.34%) [ +0.55% +0.28% +0.00% / +0.34% +0.41% +0.55%] index_select skip256 : Elapsed 0.015 ms (1.461 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.27% +0.00% +0.07% / +0.07% +0.41% +0.68%] index_select spread : Elapsed 0.015 ms (1.480 ms / 100) 1.476 -> 1.482 ( +0.41%) [ +0.00% +0.14% +0.07% / +0.54% +0.54% +0.41%] index_select strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.463 -> 1.469 ( +0.41%) [ +0.27% +0.14% +0.00% / +0.55% +0.48% +0.41%] index_select strided 5 : Elapsed 0.015 ms (1.467 ms / 100) 1.479 -> 1.478 ( -0.07%) [ +0.20% +0.00% +0.14% / +0.14% -0.07% -0.07%] index_select strided 7 : Elapsed 0.015 ms (1.482 ms / 100) 1.462 -> 1.464 ( +0.14%) [ +0.14% +0.27% +0.00% / +0.14% +0.27% +0.41%] index_select strided 8 : Elapsed 0.015 ms (1.464 ms / 100) 1.463 -> 1.462 ( -0.07%) [ +0.00% +0.14% +0.00% / -0.07% +0.55% +0.48%] index_select strided 16 : Elapsed 0.015 ms (1.463 ms / 100) 1.474 -> 1.477 ( +0.20%) [ +0.07% +0.34% +0.00% / +0.20% +0.27% +0.34%] index_select random : Elapsed 0.015 ms (1.475 ms / 100) 1.474 -> 1.474 ( +0.00%) [ +0.34% +0.00% +0.00% / +0.14% +0.00% +0.20%] index_select random_sorted : Elapsed 0.015 ms (1.479 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.27% +0.14% +0.00% / +0.07% +0.41% +0.34%] index_select perm : Elapsed 0.015 ms (1.480 ms / 100) 1.478 -> 1.480 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.27% +0.47%] index_select perm_sorted : Elapsed 0.015 ms (1.479 ms / 100) B = [4, 5, 16, 20] (stride (100, 20, 400, 1)) A = [4, 5, 16, 40] (stride (3200, 640, 1, 16)) dim = 3 1.460 -> 1.451 ( -0.62%) [ +0.21% +0.07% +0.00% / -0.48% -0.62% -0.27%] index_select const : Elapsed 0.015 ms (1.463 ms / 100) 1.479 -> 1.464 ( -1.01%) [ +0.07% +0.27% +0.00% / -1.01% -0.81% -0.54%] index_select wrap : Elapsed 0.015 ms (1.480 ms / 100) 1.481 -> 1.466 ( -1.01%) [ +0.14% +0.00% +0.07% / -1.01% -0.74% -0.74%] index_select linear : Elapsed 0.015 ms (1.483 ms / 100) 1.478 -> 1.462 ( -1.08%) [ +0.00% +0.47% +0.07% / -1.08% -0.68% -0.81%] index_select reverse : Elapsed 0.015 ms (1.478 ms / 100) 1.459 -> 1.453 ( -0.41%) [ +0.34% +0.14% +0.00% / -0.27% -0.41% -0.34%] index_select skip64 : Elapsed 0.015 ms (1.464 ms / 100) 1.459 -> 1.449 ( -0.69%) [ +0.14% +0.07% +0.00% / -0.69% -0.21% -0.07%] index_select skip256 : Elapsed 0.015 ms (1.461 ms / 100) 1.478 -> 1.467 ( -0.74%) [ +0.20% +0.00% +0.00% / -0.74% -0.54% -0.61%] index_select spread : Elapsed 0.015 ms (1.481 ms / 100) 1.480 -> 1.470 ( -0.68%) [ +0.27% +0.27% +0.00% / -0.34% -0.68% -0.54%] index_select strided 3 : Elapsed 0.015 ms (1.484 ms / 100) 1.466 -> 1.463 ( -0.20%) [ +0.27% +0.14% +0.00% / -0.14% -0.20% +0.07%] index_select strided 5 : Elapsed 0.015 ms (1.470 ms / 100) 1.477 -> 1.470 ( -0.47%) [ +0.41% +0.07% +0.00% / -0.47% -0.27% -0.47%] index_select strided 7 : Elapsed 0.015 ms (1.483 ms / 100) 1.466 -> 1.455 ( -0.75%) [ +0.00% +0.14% +0.07% / -0.75% -0.34% -0.34%] index_select strided 8 : Elapsed 0.015 ms (1.466 ms / 100) 1.465 -> 1.457 ( -0.55%) [ +0.14% +0.00% +0.20% / -0.55% -0.48% -0.27%] index_select strided 16 : Elapsed 0.015 ms (1.467 ms / 100) 1.474 -> 1.467 ( -0.47%) [ +0.07% +0.00% +0.00% / -0.47% -0.34% -0.27%] index_select random : Elapsed 0.015 ms (1.475 ms / 100) 1.472 -> 1.465 ( -0.48%) [ +0.34% +0.20% +0.00% / -0.34% -0.41% -0.48%] index_select random_sorted : Elapsed 0.015 ms (1.477 ms / 100) 1.479 -> 1.469 ( -0.68%) [ +0.00% +0.07% +0.14% / -0.61% -0.68% -0.34%] index_select perm : Elapsed 0.015 ms (1.479 ms / 100) 1.479 -> 1.470 ( -0.61%) [ +0.00% +0.07% +0.07% / -0.61% -0.47% -0.61%] index_select perm_sorted : Elapsed 0.015 ms (1.479 ms / 100) B = [4, 5, 16, 20] (stride (80, 1, 5, 320)) A = [4, 5, 16, 40] (stride (3200, 16, 1, 80)) dim = 3 2.403 -> 2.408 ( +0.21%) [ +0.17% +0.08% +0.00% / +0.21% +0.29% +0.46%] index_select const : Elapsed 0.024 ms (2.407 ms / 100) 2.414 -> 2.412 ( -0.08%) [ +0.00% +0.21% +0.21% / +0.08% -0.08% +0.04%] index_select wrap : Elapsed 0.024 ms (2.414 ms / 100) 2.419 -> 2.408 ( -0.45%) [ +0.00% +0.04% +0.00% / -0.12% -0.21% -0.45%] index_select linear : Elapsed 0.024 ms (2.419 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.17% +0.00% +0.25% / +0.08% +0.04% +0.08%] index_select reverse : Elapsed 0.024 ms (2.417 ms / 100) 2.407 -> 2.407 ( +0.00%) [ +0.04% +0.21% +0.00% / +0.00% +0.00% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.408 ms / 100) 2.406 -> 2.405 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.42% +0.17%] index_select skip256 : Elapsed 0.024 ms (2.406 ms / 100) 2.415 -> 2.414 ( -0.04%) [ +0.08% +0.00% +0.08% / -0.04% +0.33% +0.12%] index_select spread : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.12% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.413 ms / 100) 2.408 -> 2.411 ( +0.12%) [ +0.08% +0.17% +0.00% / +0.12% +0.25% +0.33%] index_select strided 5 : Elapsed 0.024 ms (2.410 ms / 100) 2.414 -> 2.418 ( +0.17%) [ +0.04% +0.12% +0.00% / +0.17% +0.17% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.415 ms / 100) 2.408 -> 2.409 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.04% +0.08%] index_select strided 8 : Elapsed 0.024 ms (2.410 ms / 100) 2.408 -> 2.409 ( +0.04%) [ +0.12% +0.17% +0.00% / +0.04% +0.12% +0.04%] index_select strided 16 : Elapsed 0.024 ms (2.411 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.17% +0.04% +0.00% / +0.08% +0.21% +0.21%] index_select random : Elapsed 0.024 ms (2.416 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.12% +0.04% +0.04%] index_select random_sorted : Elapsed 0.024 ms (2.415 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.17% +0.21% +0.00% / +0.12% +0.08% +0.12%] index_select perm : Elapsed 0.024 ms (2.417 ms / 100) 2.414 -> 2.418 ( +0.17%) [ +0.00% +0.25% +0.04% / +0.17% +0.21% +0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) B = [4, 5, 16, 20] (stride (1, 4, 20, 320)) A = [4, 5, 16, 40] (stride (200, 1, 800, 5)) dim = 3 2.446 -> 2.447 ( +0.04%) [ +0.04% +0.20% +0.00% / +0.04% +0.16% +0.41%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.20% +0.04% +0.04%] index_select wrap : Elapsed 0.025 ms (2.462 ms / 100) 2.463 -> 2.461 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.04% -0.08% -0.04%] index_select linear : Elapsed 0.025 ms (2.465 ms / 100) 2.463 -> 2.462 ( -0.04%) [ +0.12% +0.04% +0.00% / +0.08% -0.04% +0.12%] index_select reverse : Elapsed 0.025 ms (2.466 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.12% +0.04% +0.00% / +0.12% +0.12% +0.12%] index_select skip64 : Elapsed 0.025 ms (2.451 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.00% +0.12% +0.20% / +0.08% +0.33% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.446 ms / 100) 2.474 -> 2.475 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.16% +0.08%] index_select spread : Elapsed 0.025 ms (2.475 ms / 100) 2.473 -> 2.472 ( -0.04%) [ +0.20% +0.16% +0.00% / +0.24% +0.04% -0.04%] index_select strided 3 : Elapsed 0.025 ms (2.478 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.20% +0.04% +0.00%] index_select strided 5 : Elapsed 0.025 ms (2.464 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.24% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.08% +0.20% +0.00% / -0.04% +0.16% -0.04%] index_select strided 8 : Elapsed 0.025 ms (2.457 ms / 100) 2.452 -> 2.456 ( +0.16%) [ +0.08% +0.00% +0.37% / +0.16% +0.24% +0.49%] index_select strided 16 : Elapsed 0.025 ms (2.454 ms / 100) 2.466 -> 2.465 ( -0.04%) [ +0.16% +0.04% +0.00% / -0.04% +0.45% +0.12%] index_select random : Elapsed 0.025 ms (2.470 ms / 100) 2.470 -> 2.469 ( -0.04%) [ +0.12% +0.12% +0.00% / +0.16% -0.04% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.473 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.16% +0.00% +0.00%] index_select perm : Elapsed 0.025 ms (2.471 ms / 100) 2.471 -> 2.463 ( -0.32%) [ +0.20% +0.08% +0.00% / +0.08% -0.32% -0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.476 ms / 100) out_shape = [20, 5, 40, 16] in_shape = [4, 5, 40, 16] idx_dim = 0 B = [20, 5, 40, 16] (stride (3200, 640, 16, 1)) A = [4, 5, 40, 16] (stride (40, 2560, 1, 160)) dim = 0 1.943 -> 1.942 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.46% +0.57%] index_add_ linear : Elapsed 0.019 ms (1.943 ms / 100) 1.889 -> 1.893 ( +0.21%) [ +0.26% +0.00% +0.26% / +0.21% +0.64% +0.74%] index_copy_ linear : Elapsed 0.019 ms (1.894 ms / 100) 1.939 -> 1.941 ( +0.10%) [ +0.41% +0.00% +0.21% / +0.10% +6.81% +0.83%] index_add_ reverse : Elapsed 0.019 ms (1.947 ms / 100) 1.892 -> 1.895 ( +0.16%) [ +0.00% +0.11% +0.11% / +0.16% +0.63% +0.48%] index_copy_ reverse : Elapsed 0.019 ms (1.892 ms / 100) 1.940 -> 1.941 ( +0.05%) [ +0.41% +0.10% +0.00% / +0.05% +0.72% +0.77%] index_add_ spread : Elapsed 0.019 ms (1.948 ms / 100) 1.894 -> 1.895 ( +0.05%) [ +0.00% +0.16% +0.11% / +0.05% +0.58% +0.37%] index_copy_ spread : Elapsed 0.019 ms (1.894 ms / 100) 1.941 -> 1.944 ( +0.15%) [ +0.21% +0.10% +0.00% / +0.15% +0.72% +0.72%] index_add_ strided 3 : Elapsed 0.019 ms (1.945 ms / 100) 1.889 -> 1.892 ( +0.16%) [ +0.42% +0.32% +0.00% / +0.16% +0.58% +0.64%] index_copy_ strided 3 : Elapsed 0.019 ms (1.897 ms / 100) 1.941 -> 1.940 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.88% +0.72%] index_add_ strided 7 : Elapsed 0.019 ms (1.941 ms / 100) 1.892 -> 1.893 ( +0.05%) [ +0.11% +0.26% +0.00% / +0.05% +0.42% +0.48%] index_copy_ strided 7 : Elapsed 0.019 ms (1.894 ms / 100) 1.938 -> 1.941 ( +0.15%) [ +0.10% +0.10% +0.00% / +0.15% +0.83% +0.77%] index_add_ perm : Elapsed 0.019 ms (1.940 ms / 100) 1.892 -> 1.893 ( +0.05%) [ +0.21% +0.00% +0.11% / +0.05% +0.37% +0.48%] index_copy_ perm : Elapsed 0.019 ms (1.896 ms / 100) 1.939 -> 1.944 ( +0.26%) [ +0.00% +0.00% +0.05% / +0.26% +0.67% +0.62%] index_add_ perm_sorted : Elapsed 0.019 ms (1.939 ms / 100) 1.889 -> 1.891 ( +0.11%) [ +0.00% +0.21% +0.32% / +0.11% +0.58% +0.69%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.889 ms / 100) 8.298 -> 8.304 ( +0.07%) [ +0.23% +0.00% +0.37% / +0.07% +0.12% +0.30%] index_select const : Elapsed 0.083 ms (8.317 ms / 100) 8.325 -> 8.331 ( +0.07%) [ +0.12% +0.28% +0.00% / +0.07% +0.08% +0.36%] index_select wrap : Elapsed 0.083 ms (8.335 ms / 100) 8.319 -> 8.332 ( +0.16%) [ +0.04% +0.19% +0.00% / +0.17% +0.28% +0.16%] index_select linear : Elapsed 0.083 ms (8.322 ms / 100) 8.319 -> 8.320 ( +0.01%) [ +0.16% +0.00% +0.19% / +0.01% +0.29% +0.24%] index_select reverse : Elapsed 0.083 ms (8.332 ms / 100) 8.302 -> 8.301 ( -0.01%) [ +0.00% +0.01% +0.06% / +0.19% +0.10% -0.01%] index_select skip64 : Elapsed 0.083 ms (8.302 ms / 100) 8.285 -> 8.295 ( +0.12%) [ +0.11% +0.00% +0.23% / +0.12% +0.35% +0.30%] index_select skip256 : Elapsed 0.083 ms (8.294 ms / 100) 8.343 -> 8.345 ( +0.02%) [ +0.20% +0.12% +0.00% / +0.02% +0.26% +0.07%] index_select spread : Elapsed 0.084 ms (8.360 ms / 100) 8.335 -> 8.351 ( +0.19%) [ +0.01% +0.00% +0.05% / +0.19% +0.24% +0.23%] index_select strided 3 : Elapsed 0.083 ms (8.336 ms / 100) 8.329 -> 8.335 ( +0.07%) [ +0.24% +0.00% +0.28% / +0.07% +0.43% +0.16%] index_select random : Elapsed 0.083 ms (8.349 ms / 100) 8.336 -> 8.346 ( +0.12%) [ +0.23% +0.00% +0.11% / +0.12% +0.20% +0.38%] index_select random_sorted : Elapsed 0.084 ms (8.355 ms / 100) B = [20, 5, 40, 16] (stride (3200, 16, 80, 1)) A = [4, 5, 40, 16] (stride (1, 2560, 64, 4)) dim = 0 1.875 -> 1.880 ( +0.27%) [ +0.00% +0.11% +0.05% / +0.27% +0.59% +0.53%] index_add_ linear : Elapsed 0.019 ms (1.875 ms / 100) 1.829 -> 1.829 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.66% +0.49%] index_copy_ linear : Elapsed 0.018 ms (1.830 ms / 100) 1.871 -> 1.871 ( +0.00%) [ +0.16% +0.11% +0.00% / +0.00% +0.59% +0.53%] index_add_ reverse : Elapsed 0.019 ms (1.874 ms / 100) 1.826 -> 1.829 ( +0.16%) [ +0.27% +0.00% +0.05% / +0.16% +0.49% +0.44%] index_copy_ reverse : Elapsed 0.018 ms (1.831 ms / 100) 1.870 -> 1.872 ( +0.11%) [ +0.16% +0.27% +0.00% / +0.11% +0.43% +0.43%] index_add_ spread : Elapsed 0.019 ms (1.873 ms / 100) 1.827 -> 1.825 ( -0.11%) [ +0.00% +0.16% +0.05% / -0.11% +0.55% +0.38%] index_copy_ spread : Elapsed 0.018 ms (1.827 ms / 100) 1.883 -> 1.888 ( +0.27%) [ +0.05% +0.32% +0.00% / +0.27% +0.90% +0.90%] index_add_ strided 3 : Elapsed 0.019 ms (1.884 ms / 100) 1.837 -> 1.841 ( +0.22%) [ +0.00% +0.22% +0.11% / +0.22% +5.39% +0.54%] index_copy_ strided 3 : Elapsed 0.018 ms (1.837 ms / 100) 1.884 -> 1.886 ( +0.11%) [ +0.00% +0.21% +0.00% / +0.11% +0.85% +0.80%] index_add_ strided 7 : Elapsed 0.019 ms (1.884 ms / 100) 1.837 -> 1.839 ( +0.11%) [ +0.00% +0.00% +0.05% / +0.11% +0.87% +0.87%] index_copy_ strided 7 : Elapsed 0.018 ms (1.837 ms / 100) 1.865 -> 1.866 ( +0.05%) [ +0.21% +0.27% +0.00% / +0.05% +0.64% +1.13%] index_add_ perm : Elapsed 0.019 ms (1.869 ms / 100) 1.819 -> 1.817 ( -0.11%) [ +0.00% +0.55% +0.05% / -0.11% +0.60% +0.71%] index_copy_ perm : Elapsed 0.018 ms (1.819 ms / 100) 1.871 -> 1.868 ( -0.16%) [ +0.11% +0.11% +0.00% / -0.16% +0.69% +0.37%] index_add_ perm_sorted : Elapsed 0.019 ms (1.873 ms / 100) 1.826 -> 1.826 ( +0.00%) [ +0.27% +0.00% +0.16% / +0.00% +0.66% +0.71%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.831 ms / 100) 8.307 -> 8.306 ( -0.01%) [ +0.01% +0.00% +0.01% / +0.02% +0.14% -0.01%] index_select const : Elapsed 0.083 ms (8.308 ms / 100) 8.310 -> 8.312 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.05% +0.02% +0.06%] index_select wrap : Elapsed 0.083 ms (8.313 ms / 100) 8.296 -> 8.311 ( +0.18%) [ +0.00% +0.11% +0.23% / +0.40% +0.18% +0.28%] index_select linear : Elapsed 0.083 ms (8.296 ms / 100) 8.301 -> 8.311 ( +0.12%) [ +0.25% +0.07% +0.00% / +0.12% +0.25% +0.23%] index_select reverse : Elapsed 0.083 ms (8.322 ms / 100) 8.301 -> 8.310 ( +0.11%) [ +0.30% +0.00% +0.01% / +0.11% +0.28% +0.30%] index_select skip64 : Elapsed 0.083 ms (8.326 ms / 100) 8.303 -> 8.314 ( +0.13%) [ +0.00% +0.07% +0.08% / +0.13% +0.25% +0.33%] index_select skip256 : Elapsed 0.083 ms (8.303 ms / 100) 8.303 -> 8.316 ( +0.16%) [ +0.13% +0.00% +0.06% / +0.16% +0.18% +0.26%] index_select spread : Elapsed 0.083 ms (8.314 ms / 100) 8.307 -> 8.307 ( +0.00%) [ +0.00% +0.00% +0.17% / +0.06% +0.35% +0.00%] index_select strided 3 : Elapsed 0.083 ms (8.307 ms / 100) 8.307 -> 8.308 ( +0.01%) [ +0.00% +0.04% +0.06% / +0.12% +0.24% +0.01%] index_select random : Elapsed 0.083 ms (8.307 ms / 100) 8.305 -> 8.316 ( +0.13%) [ +0.08% +0.00% +0.01% / +0.16% +0.13% +0.41%] index_select random_sorted : Elapsed 0.083 ms (8.312 ms / 100) B = [20, 5, 40, 16] (stride (3200, 16, 80, 1)) A = [4, 5, 40, 16] (stride (5, 1, 320, 20)) dim = 0 2.076 -> 2.076 ( +0.00%) [ +0.00% +0.29% +0.34% / +0.14% +0.00% +0.19%] index_add_ linear : Elapsed 0.021 ms (2.076 ms / 100) 2.015 -> 2.012 ( -0.15%) [ +0.05% +0.10% +0.00% / -0.15% +0.15% +0.30%] index_copy_ linear : Elapsed 0.020 ms (2.016 ms / 100) 2.058 -> 2.059 ( +0.05%) [ +0.19% +0.00% +0.05% / +0.05% +0.44% +0.24%] index_add_ reverse : Elapsed 0.021 ms (2.062 ms / 100) 2.000 -> 2.002 ( +0.10%) [ +0.00% +0.00% +0.30% / +0.15% +0.10% +0.30%] index_copy_ reverse : Elapsed 0.020 ms (2.000 ms / 100) 2.047 -> 2.049 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.44% +0.54%] index_add_ spread : Elapsed 0.020 ms (2.048 ms / 100) 1.992 -> 1.996 ( +0.20%) [ +0.00% +0.30% +0.00% / +0.20% +0.55% +0.45%] index_copy_ spread : Elapsed 0.020 ms (1.992 ms / 100) 2.076 -> 2.074 ( -0.10%) [ +0.00% +0.24% +0.24% / +0.14% +0.29% -0.10%] index_add_ strided 3 : Elapsed 0.021 ms (2.076 ms / 100) 2.011 -> 2.016 ( +0.25%) [ +0.20% +0.20% +0.00% / +0.25% +0.45% +0.50%] index_copy_ strided 3 : Elapsed 0.020 ms (2.015 ms / 100) 2.073 -> 2.073 ( +0.00%) [ +0.05% +0.19% +0.00% / +0.00% +0.48% +0.34%] index_add_ strided 7 : Elapsed 0.021 ms (2.074 ms / 100) 2.010 -> 2.008 ( -0.10%) [ +0.00% +0.35% +0.00% / -0.10% +0.60% +0.80%] index_copy_ strided 7 : Elapsed 0.020 ms (2.010 ms / 100) 2.076 -> 2.078 ( +0.10%) [ +0.05% +0.39% +0.00% / +0.24% +0.43% +0.10%] index_add_ perm : Elapsed 0.021 ms (2.077 ms / 100) 2.014 -> 2.014 ( +0.00%) [ +0.15% +0.00% +0.05% / +0.00% +0.45% +0.30%] index_copy_ perm : Elapsed 0.020 ms (2.017 ms / 100) 2.074 -> 2.073 ( -0.05%) [ +0.14% +0.00% +0.14% / -0.05% +0.24% +0.34%] index_add_ perm_sorted : Elapsed 0.021 ms (2.077 ms / 100) 2.010 -> 2.011 ( +0.05%) [ +0.35% +0.05% +0.00% / +0.05% +0.55% +0.70%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.017 ms / 100) 8.745 -> 8.753 ( +0.09%) [ +0.29% +0.00% +0.19% / +0.09% +0.39% +0.24%] index_select const : Elapsed 0.088 ms (8.770 ms / 100) 8.758 -> 8.765 ( +0.08%) [ +0.07% +0.00% +0.07% / +0.24% +0.26% +0.08%] index_select wrap : Elapsed 0.088 ms (8.764 ms / 100) 8.766 -> 8.762 ( -0.05%) [ +0.07% +0.00% +0.11% / -0.05% +0.10% +0.08%] index_select linear : Elapsed 0.088 ms (8.772 ms / 100) 8.742 -> 8.754 ( +0.14%) [ +0.10% +0.14% +0.00% / +0.14% +0.40% +0.40%] index_select reverse : Elapsed 0.088 ms (8.751 ms / 100) 8.766 -> 8.751 ( -0.17%) [ +0.08% +0.15% +0.00% / -0.17% +0.17% +0.02%] index_select skip64 : Elapsed 0.088 ms (8.773 ms / 100) 8.762 -> 8.761 ( -0.01%) [ +0.14% +0.00% +0.11% / -0.01% +0.11% +0.05%] index_select skip256 : Elapsed 0.088 ms (8.774 ms / 100) 8.765 -> 8.764 ( -0.01%) [ +0.00% +0.03% +0.01% / -0.01% +0.00% +0.11%] index_select spread : Elapsed 0.088 ms (8.765 ms / 100) 8.754 -> 8.767 ( +0.15%) [ +0.38% +0.18% +0.00% / +0.18% +0.15% +0.15%] index_select strided 3 : Elapsed 0.088 ms (8.787 ms / 100) 8.771 -> 8.751 ( -0.23%) [ +0.11% +0.00% +0.14% / -0.23% +0.18% -0.01%] index_select random : Elapsed 0.088 ms (8.781 ms / 100) 8.757 -> 8.772 ( +0.17%) [ +0.00% +0.39% +0.25% / +0.27% +0.31% +0.17%] index_select random_sorted : Elapsed 0.088 ms (8.757 ms / 100) B = [20, 5, 40, 16] (stride (80, 1, 1600, 5)) A = [4, 5, 40, 16] (stride (1, 4, 320, 20)) dim = 0 2.067 -> 2.072 ( +0.24%) [ +0.00% +0.15% +0.19% / +0.24% +1.11% +0.73%] index_add_ linear : Elapsed 0.021 ms (2.067 ms / 100) 2.005 -> 2.006 ( +0.05%) [ +0.05% +0.20% +0.00% / +0.05% +0.75% +0.65%] index_copy_ linear : Elapsed 0.020 ms (2.006 ms / 100) 2.067 -> 2.069 ( +0.10%) [ +0.10% +0.24% +0.00% / +0.10% +0.63% +0.63%] index_add_ reverse : Elapsed 0.021 ms (2.069 ms / 100) 2.005 -> 2.008 ( +0.15%) [ +0.25% +0.25% +0.00% / +0.15% +0.55% +0.55%] index_copy_ reverse : Elapsed 0.020 ms (2.010 ms / 100) 2.053 -> 2.056 ( +0.15%) [ +0.10% +0.29% +0.00% / +0.15% +1.02% +1.02%] index_add_ spread : Elapsed 0.021 ms (2.055 ms / 100) 1.998 -> 2.004 ( +0.30%) [ +0.00% +0.15% +0.15% / +0.30% +0.70% +0.75%] index_copy_ spread : Elapsed 0.020 ms (1.998 ms / 100) 2.079 -> 2.079 ( +0.00%) [ +0.00% +0.19% +0.19% / +0.00% +0.72% +0.72%] index_add_ strided 3 : Elapsed 0.021 ms (2.079 ms / 100) 2.012 -> 2.020 ( +0.40%) [ +0.35% +0.30% +0.00% / +0.40% +1.09% +1.34%] index_copy_ strided 3 : Elapsed 0.020 ms (2.019 ms / 100) 2.081 -> 2.082 ( +0.05%) [ +0.19% +0.00% +0.00% / +0.05% +0.58% +0.34%] index_add_ strided 7 : Elapsed 0.021 ms (2.085 ms / 100) 2.021 -> 2.022 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.49% +0.59%] index_copy_ strided 7 : Elapsed 0.020 ms (2.023 ms / 100) 2.055 -> 2.056 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.92% +0.97%] index_add_ perm : Elapsed 0.021 ms (2.057 ms / 100) 1.996 -> 1.997 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.05% +1.00% +0.90%] index_copy_ perm : Elapsed 0.020 ms (1.997 ms / 100) 2.065 -> 2.069 ( +0.19%) [ +0.15% +0.00% +0.15% / +0.19% +0.68% +0.77%] index_add_ perm_sorted : Elapsed 0.021 ms (2.068 ms / 100) 2.003 -> 2.007 ( +0.20%) [ +0.20% +0.00% +0.30% / +0.20% +0.65% +0.50%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.007 ms / 100) 8.790 -> 8.786 ( -0.05%) [ +0.00% +0.07% +0.03% / -0.05% +0.26% +0.13%] index_select const : Elapsed 0.088 ms (8.790 ms / 100) 8.778 -> 8.786 ( +0.09%) [ +0.14% +0.00% +0.02% / +0.09% +0.52% +0.28%] index_select wrap : Elapsed 0.088 ms (8.790 ms / 100) 8.793 -> 8.793 ( +0.00%) [ +0.00% +0.11% +0.24% / +0.00% +0.17% +0.10%] index_select linear : Elapsed 0.088 ms (8.793 ms / 100) 8.769 -> 8.773 ( +0.05%) [ +0.21% +0.21% +0.00% / +0.05% +0.52% +0.58%] index_select reverse : Elapsed 0.088 ms (8.787 ms / 100) 8.776 -> 8.776 ( +0.00%) [ +0.00% +0.16% +0.16% / +0.00% +0.59% +0.42%] index_select skip64 : Elapsed 0.088 ms (8.776 ms / 100) 8.776 -> 8.778 ( +0.02%) [ +0.00% +0.03% +0.11% / +0.02% +0.48% +0.48%] index_select skip256 : Elapsed 0.088 ms (8.776 ms / 100) 8.789 -> 8.786 ( -0.03%) [ +0.11% +0.22% +0.00% / -0.03% +0.17% +0.20%] index_select spread : Elapsed 0.088 ms (8.799 ms / 100) 8.780 -> 8.783 ( +0.03%) [ +0.08% +0.00% +0.08% / +0.03% +0.18% +0.32%] index_select strided 3 : Elapsed 0.088 ms (8.787 ms / 100) 8.775 -> 8.787 ( +0.14%) [ +0.00% +0.00% +0.01% / +0.14% +0.18% +0.47%] index_select random : Elapsed 0.088 ms (8.775 ms / 100) 8.789 -> 8.785 ( -0.05%) [ +0.00% +0.03% +0.07% / -0.05% +0.44% +0.15%] index_select random_sorted : Elapsed 0.088 ms (8.789 ms / 100) B = [20, 5, 40, 16] (stride (1, 20, 1600, 100)) A = [4, 5, 40, 16] (stride (3200, 640, 16, 1)) dim = 0 1.832 -> 1.826 ( -0.33%) [ +0.11% +0.11% +0.00% / +0.11% -0.33% +0.05%] index_add_ linear : Elapsed 0.018 ms (1.834 ms / 100) 1.814 -> 1.803 ( -0.61%) [ +0.00% +0.17% +0.11% / +0.11% -0.44% -0.61%] index_copy_ linear : Elapsed 0.018 ms (1.814 ms / 100) 1.832 -> 1.830 ( -0.11%) [ +0.00% +0.16% +0.00% / -0.05% -0.11% -0.11%] index_add_ reverse : Elapsed 0.018 ms (1.832 ms / 100) 1.810 -> 1.806 ( -0.22%) [ +0.22% +0.17% +0.00% / +0.28% -0.22% -0.11%] index_copy_ reverse : Elapsed 0.018 ms (1.814 ms / 100) 1.877 -> 1.876 ( -0.05%) [ +0.27% +0.37% +0.00% / +0.05% +0.16% -0.05%] index_add_ spread : Elapsed 0.019 ms (1.882 ms / 100) 1.905 -> 1.906 ( +0.05%) [ +0.42% +0.00% +0.26% / +0.05% +0.21% +0.52%] index_copy_ spread : Elapsed 0.019 ms (1.913 ms / 100) 1.870 -> 1.865 ( -0.27%) [ +0.16% +0.16% +0.00% / +0.16% +0.05% -0.27%] index_add_ strided 3 : Elapsed 0.019 ms (1.873 ms / 100) 1.879 -> 1.879 ( +0.00%) [ +0.00% +0.27% +0.16% / +0.37% +0.11% +0.00%] index_copy_ strided 3 : Elapsed 0.019 ms (1.879 ms / 100) 1.878 -> 1.872 ( -0.32%) [ +0.00% +0.05% +0.11% / +0.21% -0.32% -0.27%] index_add_ strided 7 : Elapsed 0.019 ms (1.878 ms / 100) 1.907 -> 1.907 ( +0.00%) [ +0.58% +0.00% +0.16% / +0.16% +0.10% +0.00%] index_copy_ strided 7 : Elapsed 0.019 ms (1.918 ms / 100) 1.870 -> 1.863 ( -0.37%) [ +0.00% +0.11% +0.05% / -0.05% -0.37% -0.11%] index_add_ perm : Elapsed 0.019 ms (1.870 ms / 100) 1.877 -> 1.872 ( -0.27%) [ +0.00% +0.27% +0.27% / +0.21% -0.16% -0.27%] index_copy_ perm : Elapsed 0.019 ms (1.877 ms / 100) 1.866 -> 1.862 ( -0.21%) [ +0.00% +0.38% +0.16% / +0.38% -0.16% -0.21%] index_add_ perm_sorted : Elapsed 0.019 ms (1.866 ms / 100) 1.879 -> 1.871 ( -0.43%) [ +0.00% +0.05% +0.05% / +0.05% -0.27% -0.43%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.879 ms / 100) 7.942 -> 7.954 ( +0.15%) [ +0.13% +0.16% +0.00% / +0.15% +0.44% +0.33%] index_select const : Elapsed 0.080 ms (7.952 ms / 100) 8.008 -> 8.016 ( +0.10%) [ +0.04% +0.12% +0.00% / +0.24% +0.25% +0.10%] index_select wrap : Elapsed 0.080 ms (8.011 ms / 100) 7.988 -> 7.977 ( -0.14%) [ +0.09% +0.06% +0.00% / -0.14% +0.13% +0.34%] index_select linear : Elapsed 0.080 ms (7.995 ms / 100) 7.986 -> 7.990 ( +0.05%) [ +0.00% +0.24% +0.14% / +0.05% +0.31% +0.34%] index_select reverse : Elapsed 0.080 ms (7.986 ms / 100) 7.944 -> 7.952 ( +0.10%) [ +0.19% +0.08% +0.00% / +0.10% +0.28% +0.33%] index_select skip64 : Elapsed 0.080 ms (7.959 ms / 100) 7.951 -> 7.957 ( +0.08%) [ +0.00% +0.19% +0.08% / +0.08% +0.26% +0.11%] index_select skip256 : Elapsed 0.080 ms (7.951 ms / 100) 7.992 -> 7.989 ( -0.04%) [ +0.11% +0.00% +0.05% / -0.04% +0.06% -0.04%] index_select spread : Elapsed 0.080 ms (8.001 ms / 100) 8.004 -> 8.003 ( -0.01%) [ +0.12% +0.31% +0.00% / +0.10% -0.01% +0.34%] index_select strided 3 : Elapsed 0.080 ms (8.014 ms / 100) 7.993 -> 7.999 ( +0.08%) [ +0.25% +0.20% +0.00% / +0.13% +0.08% +0.15%] index_select random : Elapsed 0.080 ms (8.013 ms / 100) 7.983 -> 7.995 ( +0.15%) [ +0.15% +0.00% +0.09% / +0.15% +0.24% +0.30%] index_select random_sorted : Elapsed 0.080 ms (7.995 ms / 100) B = [20, 5, 40, 16] (stride (1, 20, 1600, 100)) A = [4, 5, 40, 16] (stride (80, 16, 320, 1)) dim = 0 2.028 -> 2.029 ( +0.05%) [ +0.00% +0.05% +0.15% / +0.15% +0.05% +0.20%] index_add_ linear : Elapsed 0.020 ms (2.028 ms / 100) 2.010 -> 2.005 ( -0.25%) [ +0.00% +0.00% +0.05% / -0.25% -0.20% +0.15%] index_copy_ linear : Elapsed 0.020 ms (2.010 ms / 100) 2.022 -> 2.028 ( +0.30%) [ +0.15% +0.15% +0.00% / +0.30% +0.59% +0.45%] index_add_ reverse : Elapsed 0.020 ms (2.025 ms / 100) 2.005 -> 2.001 ( -0.20%) [ +0.05% +0.15% +0.00% / -0.20% +0.35% +0.05%] index_copy_ reverse : Elapsed 0.020 ms (2.006 ms / 100) 2.078 -> 2.079 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.14% +0.05%] index_add_ spread : Elapsed 0.021 ms (2.078 ms / 100) 2.100 -> 2.100 ( +0.00%) [ +0.00% +0.05% +0.38% / +0.14% +0.05% +0.00%] index_copy_ spread : Elapsed 0.021 ms (2.100 ms / 100) 2.067 -> 2.061 ( -0.29%) [ +0.10% +0.19% +0.00% / -0.29% +0.44% +0.29%] index_add_ strided 3 : Elapsed 0.021 ms (2.069 ms / 100) 2.069 -> 2.073 ( +0.19%) [ +0.14% +0.39% +0.00% / +0.19% +0.63% +0.29%] index_copy_ strided 3 : Elapsed 0.021 ms (2.072 ms / 100) 2.070 -> 2.077 ( +0.34%) [ +0.34% +0.48% +0.00% / +0.34% +0.68% +0.58%] index_add_ strided 7 : Elapsed 0.021 ms (2.077 ms / 100) 2.104 -> 2.104 ( +0.00%) [ +0.14% +0.10% +0.00% / +0.05% +0.19% +0.00%] index_copy_ strided 7 : Elapsed 0.021 ms (2.107 ms / 100) 2.066 -> 2.063 ( -0.15%) [ +0.00% +0.19% +0.15% / -0.05% +0.00% -0.15%] index_add_ perm : Elapsed 0.021 ms (2.066 ms / 100) 2.071 -> 2.069 ( -0.10%) [ +0.00% +0.29% +0.00% / +0.14% -0.10% +0.05%] index_copy_ perm : Elapsed 0.021 ms (2.071 ms / 100) 2.065 -> 2.064 ( -0.05%) [ +0.00% +0.10% +0.24% / -0.05% +0.29% +0.19%] index_add_ perm_sorted : Elapsed 0.021 ms (2.065 ms / 100) 2.073 -> 2.072 ( -0.05%) [ +0.10% +0.14% +0.00% / +0.14% -0.05% +0.00%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.075 ms / 100) 8.770 -> 8.774 ( +0.05%) [ +0.00% +0.11% +0.13% / +0.05% +0.25% +0.05%] index_select const : Elapsed 0.088 ms (8.770 ms / 100) 8.849 -> 8.846 ( -0.03%) [ +0.02% +0.06% +0.00% / -0.03% +0.09% +0.26%] index_select wrap : Elapsed 0.089 ms (8.851 ms / 100) 8.800 -> 8.806 ( +0.07%) [ +0.12% +0.00% +0.09% / +0.15% +0.07% +0.31%] index_select linear : Elapsed 0.088 ms (8.811 ms / 100) 8.814 -> 8.821 ( +0.08%) [ +0.00% +0.15% +0.14% / +0.08% +0.16% +0.34%] index_select reverse : Elapsed 0.088 ms (8.814 ms / 100) 8.775 -> 8.784 ( +0.10%) [ +0.08% +0.00% +0.13% / +0.14% +0.22% +0.10%] index_select skip64 : Elapsed 0.088 ms (8.782 ms / 100) 8.772 -> 8.783 ( +0.13%) [ +0.00% +0.09% +0.18% / +0.19% +0.27% +0.13%] index_select skip256 : Elapsed 0.088 ms (8.772 ms / 100) 8.832 -> 8.843 ( +0.12%) [ +0.16% +0.11% +0.00% / +0.17% +0.12% +0.15%] index_select spread : Elapsed 0.088 ms (8.846 ms / 100) 8.840 -> 8.855 ( +0.17%) [ +0.27% +0.02% +0.00% / +0.19% +0.23% +0.17%] index_select strided 3 : Elapsed 0.089 ms (8.864 ms / 100) 8.837 -> 8.844 ( +0.08%) [ +0.00% +0.27% +0.19% / +0.08% +0.45% +0.33%] index_select random : Elapsed 0.088 ms (8.837 ms / 100) 8.834 -> 8.826 ( -0.09%) [ +0.15% +0.00% +0.06% / -0.09% -0.03% +0.01%] index_select random_sorted : Elapsed 0.088 ms (8.847 ms / 100) B = [20, 5, 40, 16] (stride (200, 40, 1, 4000)) A = [4, 5, 40, 16] (stride (1, 4, 20, 800)) dim = 0 2.099 -> 2.099 ( +0.00%) [ +0.14% +0.29% +0.00% / +0.05% +0.00% +0.05%] index_add_ linear : Elapsed 0.021 ms (2.102 ms / 100) 2.052 -> 2.051 ( -0.05%) [ +0.24% +0.34% +0.00% / +0.15% -0.05% +0.29%] index_copy_ linear : Elapsed 0.021 ms (2.057 ms / 100) 2.099 -> 2.105 ( +0.29%) [ +0.05% +0.57% +0.00% / +0.29% +0.52% +0.33%] index_add_ reverse : Elapsed 0.021 ms (2.100 ms / 100) 2.050 -> 2.053 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +3.76% +0.24%] index_copy_ reverse : Elapsed 0.021 ms (2.053 ms / 100) 2.094 -> 2.090 ( -0.19%) [ +0.00% +0.19% +0.38% / -0.19% +0.00% +0.24%] index_add_ spread : Elapsed 0.021 ms (2.094 ms / 100) 2.048 -> 2.044 ( -0.20%) [ +0.00% +0.39% +0.24% / -0.20% +0.05% +0.20%] index_copy_ spread : Elapsed 0.020 ms (2.048 ms / 100) 2.101 -> 2.096 ( -0.24%) [ +0.10% +0.29% +0.00% / -0.24% -0.14% +0.05%] index_add_ strided 3 : Elapsed 0.021 ms (2.103 ms / 100) 2.051 -> 2.052 ( +0.05%) [ +0.15% +0.20% +0.00% / +0.05% +0.05% +0.29%] index_copy_ strided 3 : Elapsed 0.021 ms (2.054 ms / 100) 2.094 -> 2.095 ( +0.05%) [ +0.81% +0.86% +0.00% / +0.67% +0.24% +0.05%] index_add_ strided 7 : Elapsed 0.021 ms (2.111 ms / 100) 2.053 -> 2.048 ( -0.24%) [ +0.39% +0.34% +0.00% / +0.24% -0.24% -0.15%] index_copy_ strided 7 : Elapsed 0.021 ms (2.061 ms / 100) 2.101 -> 2.102 ( +0.05%) [ +0.38% +0.14% +0.00% / +0.05% +0.38% +0.14%] index_add_ perm : Elapsed 0.021 ms (2.109 ms / 100) 2.047 -> 2.048 ( +0.05%) [ +0.34% +0.20% +0.00% / +0.05% +0.83% +0.20%] index_copy_ perm : Elapsed 0.021 ms (2.054 ms / 100) 2.101 -> 2.097 ( -0.19%) [ +0.00% +0.24% +0.33% / -0.10% +0.29% -0.19%] index_add_ perm_sorted : Elapsed 0.021 ms (2.101 ms / 100) 2.053 -> 2.047 ( -0.29%) [ +0.05% +0.00% +0.29% / -0.24% +0.19% -0.29%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.054 ms / 100) 8.802 -> 8.795 ( -0.08%) [ +0.17% +0.02% +0.00% / +0.02% +0.44% -0.08%] index_select const : Elapsed 0.088 ms (8.817 ms / 100) 8.782 -> 8.810 ( +0.32%) [ +0.34% +0.23% +0.00% / +0.35% +0.44% +0.32%] index_select wrap : Elapsed 0.088 ms (8.812 ms / 100) 8.806 -> 8.793 ( -0.15%) [ +0.00% +0.01% +0.00% / -0.15% +0.03% +0.00%] index_select linear : Elapsed 0.088 ms (8.806 ms / 100) 8.793 -> 8.806 ( +0.15%) [ +0.28% +0.22% +0.00% / +0.15% +0.33% +0.36%] index_select reverse : Elapsed 0.088 ms (8.818 ms / 100) 8.802 -> 8.803 ( +0.01%) [ +0.00% +0.07% +0.17% / +0.16% +0.28% +0.01%] index_select skip64 : Elapsed 0.088 ms (8.802 ms / 100) 8.807 -> 8.814 ( +0.08%) [ +0.26% +0.00% +0.01% / +0.08% +0.25% +0.25%] index_select skip256 : Elapsed 0.088 ms (8.830 ms / 100) 8.805 -> 8.806 ( +0.01%) [ +0.00% +0.11% +0.19% / +0.03% +0.01% +0.01%] index_select spread : Elapsed 0.088 ms (8.805 ms / 100) 8.786 -> 8.811 ( +0.28%) [ +0.35% +0.00% +0.10% / +0.28% +0.32% +0.34%] index_select strided 3 : Elapsed 0.088 ms (8.817 ms / 100) 8.793 -> 8.801 ( +0.09%) [ +0.15% +0.10% +0.00% / +0.09% +0.40% +0.50%] index_select random : Elapsed 0.088 ms (8.806 ms / 100) 8.804 -> 8.809 ( +0.06%) [ +0.11% +0.01% +0.00% / +0.25% +0.19% +0.06%] index_select random_sorted : Elapsed 0.088 ms (8.814 ms / 100) out_shape = [4, 20, 40, 16] in_shape = [4, 5, 40, 16] idx_dim = 1 B = [4, 20, 40, 16] (stride (12800, 640, 1, 40)) A = [4, 5, 40, 16] (stride (3200, 16, 80, 1)) dim = 1 1.848 -> 1.848 ( +0.00%) [ +0.16% +0.05% +0.00% / +0.00% +0.81% +0.76%] index_add_ linear : Elapsed 0.019 ms (1.851 ms / 100) 1.804 -> 1.803 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.78% +1.16%] index_copy_ linear : Elapsed 0.018 ms (1.804 ms / 100) 1.850 -> 1.849 ( -0.05%) [ +0.16% +0.05% +0.00% / -0.05% +0.49% +0.49%] index_add_ reverse : Elapsed 0.019 ms (1.853 ms / 100) 1.808 -> 1.812 ( +0.22%) [ +0.06% +0.06% +0.00% / +0.22% +0.55% +0.50%] index_copy_ reverse : Elapsed 0.018 ms (1.809 ms / 100) 1.848 -> 1.854 ( +0.32%) [ +0.11% +0.00% +0.11% / +0.32% +0.43% +0.43%] index_add_ spread : Elapsed 0.019 ms (1.850 ms / 100) 1.805 -> 1.812 ( +0.39%) [ +0.17% +0.06% +0.00% / +0.44% +0.39% +0.50%] index_copy_ spread : Elapsed 0.018 ms (1.808 ms / 100) 1.837 -> 1.842 ( +0.27%) [ +0.16% +0.00% +0.11% / +0.27% +1.69% +1.47%] index_add_ strided 3 : Elapsed 0.018 ms (1.840 ms / 100) 1.793 -> 1.798 ( +0.28%) [ +0.00% +0.28% +0.22% / +0.28% +2.06% +1.62%] index_copy_ strided 3 : Elapsed 0.018 ms (1.793 ms / 100) 1.845 -> 1.849 ( +0.22%) [ +0.00% +0.11% +0.16% / +0.22% +1.30% +1.52%] index_add_ strided 7 : Elapsed 0.018 ms (1.845 ms / 100) 1.800 -> 1.799 ( -0.06%) [ +0.00% +0.28% +0.11% / -0.06% +1.67% +1.61%] index_copy_ strided 7 : Elapsed 0.018 ms (1.800 ms / 100) 1.854 -> 1.855 ( +0.05%) [ +0.00% +0.16% +0.00% / +0.22% +0.16% +0.05%] index_add_ perm : Elapsed 0.019 ms (1.854 ms / 100) 1.804 -> 1.808 ( +0.22%) [ +0.22% +0.00% +0.17% / +0.22% +0.72% +0.61%] index_copy_ perm : Elapsed 0.018 ms (1.808 ms / 100) 1.845 -> 1.849 ( +0.22%) [ +0.16% +0.00% +0.22% / +0.22% +2.11% +0.43%] index_add_ perm_sorted : Elapsed 0.018 ms (1.848 ms / 100) 1.800 -> 1.803 ( +0.17%) [ +0.17% +0.00% +0.28% / +0.17% +0.94% +0.61%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.803 ms / 100) 8.526 -> 8.530 ( +0.05%) [ +0.00% +0.18% +0.11% / +0.05% +0.26% +0.19%] index_select const : Elapsed 0.085 ms (8.526 ms / 100) 8.570 -> 8.568 ( -0.02%) [ +0.02% +0.21% +0.00% / +0.07% -0.02% +0.01%] index_select wrap : Elapsed 0.086 ms (8.572 ms / 100) 8.568 -> 8.562 ( -0.07%) [ +0.00% +0.11% +0.12% / +0.13% -0.05% -0.07%] index_select linear : Elapsed 0.086 ms (8.568 ms / 100) 8.561 -> 8.571 ( +0.12%) [ +0.20% +0.09% +0.00% / +0.22% +0.12% +0.40%] index_select reverse : Elapsed 0.086 ms (8.578 ms / 100) 8.524 -> 8.538 ( +0.16%) [ +0.09% +0.00% +0.19% / +0.20% +0.41% +0.16%] index_select skip64 : Elapsed 0.085 ms (8.532 ms / 100) 8.531 -> 8.548 ( +0.20%) [ +0.38% +0.05% +0.00% / +0.21% +0.26% +0.20%] index_select skip256 : Elapsed 0.086 ms (8.563 ms / 100) 8.566 -> 8.566 ( +0.00%) [ +0.18% +0.04% +0.00% / +0.00% +0.08% +0.12%] index_select spread : Elapsed 0.086 ms (8.581 ms / 100) 8.576 -> 8.579 ( +0.03%) [ +0.16% +0.14% +0.00% / +0.27% +0.07% +0.03%] index_select strided 3 : Elapsed 0.086 ms (8.590 ms / 100) 8.569 -> 8.578 ( +0.11%) [ +0.23% +0.00% +0.23% / +0.11% +0.12% +0.21%] index_select random : Elapsed 0.086 ms (8.589 ms / 100) 8.549 -> 8.561 ( +0.14%) [ +0.00% +0.36% +0.14% / +0.14% +0.30% +0.32%] index_select random_sorted : Elapsed 0.085 ms (8.549 ms / 100) B = [4, 20, 40, 16] (stride (12800, 40, 1, 800)) A = [4, 5, 40, 16] (stride (3200, 16, 80, 1)) dim = 1 1.807 -> 1.811 ( +0.22%) [ +0.22% +0.06% +0.00% / +0.22% +0.33% +0.50%] index_add_ linear : Elapsed 0.018 ms (1.811 ms / 100) 1.760 -> 1.763 ( +0.17%) [ +0.28% +0.23% +0.00% / +0.17% +0.57% +0.68%] index_copy_ linear : Elapsed 0.018 ms (1.765 ms / 100) 1.809 -> 1.807 ( -0.11%) [ +0.17% +0.11% +0.00% / -0.11% +0.44% +0.28%] index_add_ reverse : Elapsed 0.018 ms (1.812 ms / 100) 1.761 -> 1.762 ( +0.06%) [ +0.17% +0.34% +0.00% / +0.06% +0.57% +0.68%] index_copy_ reverse : Elapsed 0.018 ms (1.764 ms / 100) 1.825 -> 1.827 ( +0.11%) [ +0.22% +0.27% +0.00% / +0.11% +0.16% +0.55%] index_add_ spread : Elapsed 0.018 ms (1.829 ms / 100) 1.779 -> 1.781 ( +0.11%) [ +0.00% +0.28% +0.06% / +0.11% +0.34% +0.39%] index_copy_ spread : Elapsed 0.018 ms (1.779 ms / 100) 1.832 -> 1.830 ( -0.11%) [ +0.00% +0.11% +0.16% / +0.22% -0.11% +0.00%] index_add_ strided 3 : Elapsed 0.018 ms (1.832 ms / 100) 1.785 -> 1.787 ( +0.11%) [ +0.00% +0.34% +0.11% / +0.17% +0.17% +0.11%] index_copy_ strided 3 : Elapsed 0.018 ms (1.785 ms / 100) 1.815 -> 1.816 ( +0.06%) [ +0.11% +0.06% +0.00% / +0.06% +0.33% +0.33%] index_add_ strided 7 : Elapsed 0.018 ms (1.817 ms / 100) 1.770 -> 1.772 ( +0.11%) [ +0.23% +0.23% +0.00% / +0.11% +0.56% +0.51%] index_copy_ strided 7 : Elapsed 0.018 ms (1.774 ms / 100) 1.816 -> 1.817 ( +0.06%) [ +0.17% +0.44% +0.00% / +0.06% +0.50% +0.61%] index_add_ perm : Elapsed 0.018 ms (1.819 ms / 100) 1.772 -> 1.775 ( +0.17%) [ +0.00% +0.45% +0.11% / +0.17% +0.45% +0.56%] index_copy_ perm : Elapsed 0.018 ms (1.772 ms / 100) 1.819 -> 1.817 ( -0.11%) [ +0.00% +0.11% +0.16% / -0.11% +0.38% +0.27%] index_add_ perm_sorted : Elapsed 0.018 ms (1.819 ms / 100) 1.772 -> 1.777 ( +0.28%) [ +0.17% +0.17% +0.00% / +0.28% +0.40% +0.45%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.775 ms / 100) 8.516 -> 8.537 ( +0.25%) [ +0.00% +0.32% +0.13% / +0.25% +0.42% +0.46%] index_select const : Elapsed 0.085 ms (8.516 ms / 100) 8.571 -> 8.552 ( -0.22%) [ +0.00% +0.22% +0.02% / -0.22% +0.06% +0.25%] index_select wrap : Elapsed 0.086 ms (8.571 ms / 100) 8.573 -> 8.567 ( -0.07%) [ +0.08% +0.08% +0.00% / -0.07% +0.05% -0.06%] index_select linear : Elapsed 0.086 ms (8.580 ms / 100) 8.568 -> 8.569 ( +0.01%) [ +0.00% +0.20% +0.08% / +0.01% +0.41% +0.21%] index_select reverse : Elapsed 0.086 ms (8.568 ms / 100) 8.521 -> 8.528 ( +0.08%) [ +0.00% +0.19% +0.12% / +0.08% +0.35% +0.28%] index_select skip64 : Elapsed 0.085 ms (8.521 ms / 100) 8.531 -> 8.534 ( +0.04%) [ +0.00% +0.36% +0.08% / +0.08% +0.04% +0.26%] index_select skip256 : Elapsed 0.085 ms (8.531 ms / 100) 8.549 -> 8.573 ( +0.28%) [ +0.25% +0.18% +0.00% / +0.30% +0.51% +0.28%] index_select spread : Elapsed 0.086 ms (8.570 ms / 100) 8.564 -> 8.566 ( +0.02%) [ +0.00% +0.14% +0.16% / +0.02% +0.13% +0.19%] index_select strided 3 : Elapsed 0.086 ms (8.564 ms / 100) 8.584 -> 8.580 ( -0.05%) [ +0.00% +0.10% +0.03% / +0.05% +0.09% -0.05%] index_select random : Elapsed 0.086 ms (8.584 ms / 100) 8.567 -> 8.573 ( +0.07%) [ +0.00% +0.00% +0.11% / +0.07% +0.35% +0.16%] index_select random_sorted : Elapsed 0.086 ms (8.567 ms / 100) B = [4, 20, 40, 16] (stride (1, 2560, 64, 4)) A = [4, 5, 40, 16] (stride (40, 2560, 1, 160)) dim = 1 1.777 -> 1.783 ( +0.34%) [ +0.28% +0.34% +0.00% / +0.34% +0.79% +1.07%] index_add_ linear : Elapsed 0.018 ms (1.782 ms / 100) 1.732 -> 1.733 ( +0.06%) [ +0.00% +0.23% +0.00% / +0.06% +0.75% +0.69%] index_copy_ linear : Elapsed 0.017 ms (1.732 ms / 100) 1.781 -> 1.783 ( +0.11%) [ +0.17% +0.00% +0.17% / +0.11% +0.39% +0.56%] index_add_ reverse : Elapsed 0.018 ms (1.784 ms / 100) 1.733 -> 1.738 ( +0.29%) [ +0.12% +0.06% +0.00% / +0.29% +1.04% +0.58%] index_copy_ reverse : Elapsed 0.017 ms (1.735 ms / 100) 1.765 -> 1.766 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +1.59% +1.81%] index_add_ spread : Elapsed 0.018 ms (1.766 ms / 100) 1.713 -> 1.715 ( +0.12%) [ +0.12% +0.00% +0.18% / +0.12% +1.34% +1.23%] index_copy_ spread : Elapsed 0.017 ms (1.715 ms / 100) 1.763 -> 1.768 ( +0.28%) [ +0.00% +0.23% +0.23% / +0.28% +2.27% +2.38%] index_add_ strided 3 : Elapsed 0.018 ms (1.763 ms / 100) 1.713 -> 1.714 ( +0.06%) [ +0.00% +0.47% +0.18% / +0.06% +2.22% +2.34%] index_copy_ strided 3 : Elapsed 0.017 ms (1.713 ms / 100) 1.770 -> 1.772 ( +0.11%) [ +0.00% +0.28% +0.06% / +0.11% +1.69% +1.53%] index_add_ strided 7 : Elapsed 0.018 ms (1.770 ms / 100) 1.716 -> 1.718 ( +0.12%) [ +0.00% +0.41% +0.17% / +0.12% +1.75% +1.69%] index_copy_ strided 7 : Elapsed 0.017 ms (1.716 ms / 100) 1.750 -> 1.750 ( +0.00%) [ +0.23% +0.00% +0.34% / +0.00% +2.34% +2.34%] index_add_ perm : Elapsed 0.018 ms (1.754 ms / 100) 1.704 -> 1.706 ( +0.12%) [ +0.00% +0.00% +0.06% / +0.12% +2.11% +2.11%] index_copy_ perm : Elapsed 0.017 ms (1.704 ms / 100) 1.754 -> 1.755 ( +0.06%) [ +0.23% +0.00% +0.23% / +0.06% +2.45% +2.57%] index_add_ perm_sorted : Elapsed 0.018 ms (1.758 ms / 100) 1.708 -> 1.711 ( +0.18%) [ +0.18% +0.00% +0.23% / +0.18% +2.46% +2.22%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.711 ms / 100) 8.180 -> 8.205 ( +0.31%) [ +0.00% +0.27% +0.26% / +0.39% +0.70% +0.31%] index_select const : Elapsed 0.082 ms (8.180 ms / 100) 8.224 -> 8.225 ( +0.01%) [ +0.00% +0.06% +0.26% / +0.32% +0.34% +0.01%] index_select wrap : Elapsed 0.082 ms (8.224 ms / 100) 8.235 -> 8.240 ( +0.06%) [ +0.12% +0.00% +0.01% / +0.06% +0.09% +0.16%] index_select linear : Elapsed 0.082 ms (8.245 ms / 100) 8.198 -> 8.221 ( +0.28%) [ +0.00% +0.17% +0.27% / +0.28% +0.46% +0.55%] index_select reverse : Elapsed 0.082 ms (8.198 ms / 100) 8.179 -> 8.196 ( +0.21%) [ +0.44% +0.00% +0.18% / +0.21% +0.34% +0.51%] index_select skip64 : Elapsed 0.082 ms (8.215 ms / 100) 8.196 -> 8.200 ( +0.05%) [ +0.00% +0.05% +0.18% / +0.05% +0.09% +0.27%] index_select skip256 : Elapsed 0.082 ms (8.196 ms / 100) 8.240 -> 8.230 ( -0.12%) [ +0.02% +0.00% +0.01% / +0.15% -0.12% -0.10%] index_select spread : Elapsed 0.082 ms (8.242 ms / 100) 8.233 -> 8.228 ( -0.06%) [ +0.21% +0.33% +0.00% / -0.06% +0.06% -0.04%] index_select strided 3 : Elapsed 0.082 ms (8.250 ms / 100) 8.226 -> 8.248 ( +0.27%) [ +0.21% +0.24% +0.00% / +0.27% +0.33% +0.41%] index_select random : Elapsed 0.082 ms (8.243 ms / 100) 8.234 -> 8.250 ( +0.19%) [ +0.00% +0.22% +0.11% / +0.21% +0.24% +0.19%] index_select random_sorted : Elapsed 0.082 ms (8.234 ms / 100) B = [4, 20, 40, 16] (stride (16, 64, 1280, 1)) A = [4, 5, 40, 16] (stride (40, 2560, 1, 160)) dim = 1 1.878 -> 1.879 ( +0.05%) [ +0.11% +0.05% +0.00% / +0.05% +0.05% +0.05%] index_add_ linear : Elapsed 0.019 ms (1.880 ms / 100) 1.826 -> 1.828 ( +0.11%) [ +0.11% +0.00% +0.16% / +0.38% +0.38% +0.11%] index_copy_ linear : Elapsed 0.018 ms (1.828 ms / 100) 1.867 -> 1.857 ( -0.54%) [ +0.11% +0.11% +0.00% / +0.21% -0.54% -0.32%] index_add_ reverse : Elapsed 0.019 ms (1.869 ms / 100) 1.819 -> 1.813 ( -0.33%) [ +0.00% +0.05% +0.05% / -0.05% -0.22% -0.33%] index_copy_ reverse : Elapsed 0.018 ms (1.819 ms / 100) 1.863 -> 1.868 ( +0.27%) [ +0.21% +0.05% +0.00% / +0.27% +0.54% +0.48%] index_add_ spread : Elapsed 0.019 ms (1.867 ms / 100) 1.814 -> 1.816 ( +0.11%) [ +0.17% +0.33% +0.00% / +0.11% +0.50% +0.44%] index_copy_ spread : Elapsed 0.018 ms (1.817 ms / 100) 1.876 -> 1.880 ( +0.21%) [ +0.00% +0.16% +0.00% / +0.21% +0.21% +0.37%] index_add_ strided 3 : Elapsed 0.019 ms (1.876 ms / 100) 1.823 -> 1.830 ( +0.38%) [ +0.00% +0.22% +0.22% / +0.38% +0.49% +0.66%] index_copy_ strided 3 : Elapsed 0.018 ms (1.823 ms / 100) 1.858 -> 1.857 ( -0.05%) [ +0.00% +0.11% +0.11% / -0.05% +0.48% +0.43%] index_add_ strided 7 : Elapsed 0.019 ms (1.858 ms / 100) 1.808 -> 1.808 ( +0.00%) [ +0.00% +0.17% +0.06% / +0.00% +0.55% +0.44%] index_copy_ strided 7 : Elapsed 0.018 ms (1.808 ms / 100) 1.850 -> 1.854 ( +0.22%) [ +0.00% +0.38% +0.16% / +0.22% +0.97% +1.03%] index_add_ perm : Elapsed 0.018 ms (1.850 ms / 100) 1.804 -> 1.805 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.94% +1.16%] index_copy_ perm : Elapsed 0.018 ms (1.804 ms / 100) 1.870 -> 1.870 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.86%] index_add_ perm_sorted : Elapsed 0.019 ms (1.870 ms / 100) 1.820 -> 1.821 ( +0.05%) [ +0.00% +0.05% +0.11% / +0.05% +0.93% +1.04%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.820 ms / 100) 8.505 -> 8.552 ( +0.55%) [ +0.33% +0.40% +0.00% / +0.56% +0.69% +0.55%] index_select const : Elapsed 0.085 ms (8.533 ms / 100) 8.558 -> 8.585 ( +0.32%) [ +0.42% +0.02% +0.00% / +0.32% +0.35% +0.37%] index_select wrap : Elapsed 0.086 ms (8.594 ms / 100) 8.547 -> 8.557 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.41% +0.50%] index_select linear : Elapsed 0.086 ms (8.557 ms / 100) 8.542 -> 8.537 ( -0.06%) [ +0.00% +0.42% +0.25% / -0.06% +0.35% +0.32%] index_select reverse : Elapsed 0.085 ms (8.542 ms / 100) 8.503 -> 8.538 ( +0.41%) [ +0.44% +0.00% +0.32% / +0.48% +0.41% +0.60%] index_select skip64 : Elapsed 0.085 ms (8.540 ms / 100) 8.514 -> 8.514 ( +0.00%) [ +0.01% +0.08% +0.00% / +0.00% +0.45% +0.52%] index_select skip256 : Elapsed 0.085 ms (8.515 ms / 100) 8.553 -> 8.542 ( -0.13%) [ +0.05% +0.00% +0.19% / -0.13% +0.64% +0.68%] index_select spread : Elapsed 0.086 ms (8.557 ms / 100) 8.558 -> 8.551 ( -0.08%) [ +0.00% +0.18% +0.12% / -0.08% +0.54% +0.23%] index_select strided 3 : Elapsed 0.086 ms (8.558 ms / 100) 8.547 -> 8.566 ( +0.22%) [ +0.25% +0.09% +0.00% / +0.22% +0.42% +0.35%] index_select random : Elapsed 0.086 ms (8.568 ms / 100) 8.543 -> 8.547 ( +0.05%) [ +0.00% +0.05% +0.16% / +0.05% +0.52% +0.59%] index_select random_sorted : Elapsed 0.085 ms (8.543 ms / 100) B = [4, 20, 40, 16] (stride (16, 64, 1280, 1)) A = [4, 5, 40, 16] (stride (1, 2560, 4, 160)) dim = 1 1.874 -> 1.876 ( +0.11%) [ +0.00% +0.16% +0.21% / +0.11% +0.69% +0.59%] index_add_ linear : Elapsed 0.019 ms (1.874 ms / 100) 1.823 -> 1.824 ( +0.05%) [ +0.00% +0.11% +0.05% / +0.05% +0.77% +0.99%] index_copy_ linear : Elapsed 0.018 ms (1.823 ms / 100) 1.874 -> 1.877 ( +0.16%) [ +0.05% +0.21% +0.00% / +0.16% +0.80% +0.64%] index_add_ reverse : Elapsed 0.019 ms (1.875 ms / 100) 1.822 -> 1.824 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.93% +0.93%] index_copy_ reverse : Elapsed 0.018 ms (1.822 ms / 100) 1.874 -> 1.873 ( -0.05%) [ +0.00% +0.05% +0.21% / -0.05% +1.07% +1.07%] index_add_ spread : Elapsed 0.019 ms (1.874 ms / 100) 1.823 -> 1.821 ( -0.11%) [ +0.00% +0.00% +0.05% / -0.11% +0.93% +0.93%] index_copy_ spread : Elapsed 0.018 ms (1.823 ms / 100) 1.877 -> 1.880 ( +0.16%) [ +0.05% +0.16% +0.00% / +0.16% +1.01% +0.91%] index_add_ strided 3 : Elapsed 0.019 ms (1.878 ms / 100) 1.828 -> 1.832 ( +0.22%) [ +0.00% +0.05% +0.22% / +0.22% +1.04% +1.04%] index_copy_ strided 3 : Elapsed 0.018 ms (1.828 ms / 100) 1.892 -> 1.887 ( -0.26%) [ +0.00% +0.11% +0.05% / -0.26% +0.42% +0.37%] index_add_ strided 7 : Elapsed 0.019 ms (1.892 ms / 100) 1.839 -> 1.838 ( -0.05%) [ +0.00% +0.11% +0.16% / -0.05% +0.49% +0.27%] index_copy_ strided 7 : Elapsed 0.018 ms (1.839 ms / 100) 1.876 -> 1.879 ( +0.16%) [ +0.27% +0.16% +0.00% / +0.16% +0.85% +0.85%] index_add_ perm : Elapsed 0.019 ms (1.881 ms / 100) 1.825 -> 1.831 ( +0.33%) [ +0.00% +0.22% +0.16% / +0.33% +0.88% +0.77%] index_copy_ perm : Elapsed 0.018 ms (1.825 ms / 100) 1.878 -> 1.878 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.53%] index_add_ perm_sorted : Elapsed 0.019 ms (1.878 ms / 100) 1.825 -> 1.830 ( +0.27%) [ +0.16% +0.00% +0.22% / +0.27% +0.71% +0.82%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.828 ms / 100) 8.510 -> 8.536 ( +0.31%) [ +0.06% +0.27% +0.00% / +0.31% +0.54% +0.46%] index_select const : Elapsed 0.085 ms (8.515 ms / 100) 8.551 -> 8.560 ( +0.11%) [ +0.00% +0.08% +0.04% / +0.11% +0.61% +0.64%] index_select wrap : Elapsed 0.086 ms (8.551 ms / 100) 8.539 -> 8.566 ( +0.32%) [ +0.00% +0.28% +0.15% / +0.32% +0.34% +0.80%] index_select linear : Elapsed 0.085 ms (8.539 ms / 100) 8.538 -> 8.543 ( +0.06%) [ +0.00% +0.06% +0.20% / +0.06% +0.47% +0.54%] index_select reverse : Elapsed 0.085 ms (8.538 ms / 100) 8.515 -> 8.517 ( +0.02%) [ +0.35% +0.00% +0.00% / +0.02% +0.34% +0.54%] index_select skip64 : Elapsed 0.085 ms (8.545 ms / 100) 8.512 -> 8.520 ( +0.09%) [ +0.00% +0.08% +0.08% / +0.09% +0.59% +0.41%] index_select skip256 : Elapsed 0.085 ms (8.512 ms / 100) 8.543 -> 8.550 ( +0.08%) [ +0.25% +0.00% +0.36% / +0.08% +0.32% +0.55%] index_select spread : Elapsed 0.086 ms (8.564 ms / 100) 8.553 -> 8.564 ( +0.13%) [ +0.20% +0.00% +0.05% / +0.13% +0.68% +0.41%] index_select strided 3 : Elapsed 0.086 ms (8.570 ms / 100) 8.554 -> 8.569 ( +0.18%) [ +0.12% +0.00% +0.27% / +0.18% +0.62% +0.40%] index_select random : Elapsed 0.086 ms (8.564 ms / 100) 8.564 -> 8.565 ( +0.01%) [ +0.18% +0.00% +0.13% / +0.01% +0.35% +0.27%] index_select random_sorted : Elapsed 0.086 ms (8.579 ms / 100) B = [4, 20, 40, 16] (stride (20, 1, 80, 3200)) A = [4, 5, 40, 16] (stride (80, 16, 320, 1)) dim = 1 1.914 -> 1.895 ( -0.99%) [ +0.31% +0.21% +0.00% / +0.26% -0.99% -0.94%] index_add_ linear : Elapsed 0.019 ms (1.920 ms / 100) 1.876 -> 1.849 ( -1.44%) [ +0.21% +0.32% +0.00% / +0.27% -1.44% -1.39%] index_copy_ linear : Elapsed 0.019 ms (1.880 ms / 100) 1.913 -> 1.897 ( -0.84%) [ +0.00% +0.10% +0.16% / +0.16% -0.52% -0.84%] index_add_ reverse : Elapsed 0.019 ms (1.913 ms / 100) 1.876 -> 1.852 ( -1.28%) [ +0.05% +0.11% +0.00% / +0.05% -1.28% -1.17%] index_copy_ reverse : Elapsed 0.019 ms (1.877 ms / 100) 1.934 -> 1.911 ( -1.19%) [ +0.05% +0.26% +0.00% / +0.47% -1.09% -1.19%] index_add_ spread : Elapsed 0.019 ms (1.935 ms / 100) 1.902 -> 1.882 ( -1.05%) [ +0.00% +0.26% +0.21% / +0.11% -0.68% -1.05%] index_copy_ spread : Elapsed 0.019 ms (1.902 ms / 100) 1.937 -> 1.908 ( -1.50%) [ +0.05% +0.10% +0.00% / -0.10% -1.14% -1.50%] index_add_ strided 3 : Elapsed 0.019 ms (1.938 ms / 100) 1.903 -> 1.877 ( -1.37%) [ +0.00% +0.11% +0.16% / +0.21% -1.16% -1.37%] index_copy_ strided 3 : Elapsed 0.019 ms (1.903 ms / 100) 1.932 -> 1.915 ( -0.88%) [ +0.10% +0.21% +0.00% / +0.05% -0.78% -0.88%] index_add_ strided 7 : Elapsed 0.019 ms (1.934 ms / 100) 1.903 -> 1.879 ( -1.26%) [ +0.11% +0.00% +0.16% / -0.05% -1.21% -1.26%] index_copy_ strided 7 : Elapsed 0.019 ms (1.905 ms / 100) 1.932 -> 1.911 ( -1.09%) [ +0.26% +0.00% +0.10% / +0.05% -0.88% -1.09%] index_add_ perm : Elapsed 0.019 ms (1.937 ms / 100) 1.901 -> 1.880 ( -1.10%) [ +0.00% +0.21% +0.00% / +0.26% -1.05% -1.10%] index_copy_ perm : Elapsed 0.019 ms (1.901 ms / 100) 1.933 -> 1.911 ( -1.14%) [ +0.26% +0.16% +0.00% / +0.05% -1.14% -0.93%] index_add_ perm_sorted : Elapsed 0.019 ms (1.938 ms / 100) 1.900 -> 1.879 ( -1.11%) [ +0.21% +0.16% +0.00% / +0.11% -1.11% -0.84%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.904 ms / 100) 8.578 -> 8.598 ( +0.23%) [ +0.36% +0.19% +0.00% / +0.24% +0.23% +0.45%] index_select const : Elapsed 0.086 ms (8.609 ms / 100) 8.630 -> 8.637 ( +0.08%) [ +0.08% +0.00% +0.02% / +0.08% +0.37% +0.27%] index_select wrap : Elapsed 0.086 ms (8.637 ms / 100) 8.621 -> 8.616 ( -0.06%) [ +0.00% +0.06% +0.21% / -0.06% +0.22% +0.48%] index_select linear : Elapsed 0.086 ms (8.621 ms / 100) 8.626 -> 8.644 ( +0.21%) [ +0.08% +0.16% +0.00% / +0.23% +0.21% +0.21%] index_select reverse : Elapsed 0.086 ms (8.633 ms / 100) 8.579 -> 8.592 ( +0.15%) [ +0.00% +0.16% +0.02% / +0.15% +0.21% +0.34%] index_select skip64 : Elapsed 0.086 ms (8.579 ms / 100) 8.577 -> 8.591 ( +0.16%) [ +0.17% +0.34% +0.00% / +0.16% +0.34% +0.35%] index_select skip256 : Elapsed 0.086 ms (8.592 ms / 100) 8.625 -> 8.629 ( +0.05%) [ +0.16% +0.00% +0.13% / +0.05% +0.19% +0.19%] index_select spread : Elapsed 0.086 ms (8.639 ms / 100) 8.628 -> 8.643 ( +0.17%) [ +0.21% +0.03% +0.00% / +0.17% +0.34% +0.49%] index_select strided 3 : Elapsed 0.086 ms (8.646 ms / 100) 8.640 -> 8.647 ( +0.08%) [ +0.21% +0.22% +0.00% / +0.08% +0.41% +0.45%] index_select random : Elapsed 0.087 ms (8.658 ms / 100) 8.625 -> 8.631 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.57% +0.27%] index_select random_sorted : Elapsed 0.086 ms (8.636 ms / 100) B = [4, 20, 40, 16] (stride (20, 1, 80, 3200)) A = [4, 5, 40, 16] (stride (1, 64, 320, 4)) dim = 1 1.917 -> 1.919 ( +0.10%) [ +0.00% +0.21% +0.26% / +0.10% +0.57% +0.47%] index_add_ linear : Elapsed 0.019 ms (1.917 ms / 100) 1.878 -> 1.884 ( +0.32%) [ +0.16% +0.00% +0.16% / +0.32% +0.91% +0.85%] index_copy_ linear : Elapsed 0.019 ms (1.881 ms / 100) 1.917 -> 1.917 ( +0.00%) [ +0.00% +0.21% +0.05% / +0.00% +0.16% +0.57%] index_add_ reverse : Elapsed 0.019 ms (1.917 ms / 100) 1.883 -> 1.880 ( -0.16%) [ +0.00% +0.11% +0.05% / -0.16% +0.53% +0.80%] index_copy_ reverse : Elapsed 0.019 ms (1.883 ms / 100) 1.933 -> 1.936 ( +0.16%) [ +0.00% +0.21% +0.16% / +0.16% +0.52% +0.52%] index_add_ spread : Elapsed 0.019 ms (1.933 ms / 100) 1.904 -> 1.902 ( -0.11%) [ +0.00% +0.37% +0.26% / -0.11% +0.53% +0.26%] index_copy_ spread : Elapsed 0.019 ms (1.904 ms / 100) 1.934 -> 1.936 ( +0.10%) [ +0.00% +0.47% +0.36% / +0.10% +0.57% +0.47%] index_add_ strided 3 : Elapsed 0.019 ms (1.934 ms / 100) 1.903 -> 1.909 ( +0.32%) [ +0.00% +0.21% +0.16% / +0.32% +0.68% +0.32%] index_copy_ strided 3 : Elapsed 0.019 ms (1.903 ms / 100) 1.936 -> 1.936 ( +0.00%) [ +0.52% +0.00% +0.05% / +0.00% +0.31% +0.36%] index_add_ strided 7 : Elapsed 0.019 ms (1.946 ms / 100) 1.905 -> 1.907 ( +0.10%) [ +0.52% +0.00% +0.05% / +0.26% +0.10% +0.42%] index_copy_ strided 7 : Elapsed 0.019 ms (1.915 ms / 100) 1.937 -> 1.939 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.10% +0.21% +0.36%] index_add_ perm : Elapsed 0.019 ms (1.937 ms / 100) 1.904 -> 1.908 ( +0.21%) [ +0.16% +0.00% +0.05% / +0.21% +0.42% +0.47%] index_copy_ perm : Elapsed 0.019 ms (1.907 ms / 100) 1.935 -> 1.938 ( +0.16%) [ +0.36% +0.00% +0.26% / +0.16% +0.67% +0.47%] index_add_ perm_sorted : Elapsed 0.019 ms (1.942 ms / 100) 1.903 -> 1.909 ( +0.32%) [ +0.63% +0.11% +0.00% / +0.32% +0.63% +0.47%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.915 ms / 100) 8.574 -> 8.581 ( +0.08%) [ +0.00% +0.00% +0.14% / +0.08% +0.30% +0.19%] index_select const : Elapsed 0.086 ms (8.574 ms / 100) 8.611 -> 8.616 ( +0.06%) [ +0.00% +0.00% +0.14% / +0.06% +0.36% +0.45%] index_select wrap : Elapsed 0.086 ms (8.611 ms / 100) 8.613 -> 8.610 ( -0.03%) [ +0.15% +0.00% +0.01% / -0.03% +0.16% +0.14%] index_select linear : Elapsed 0.086 ms (8.626 ms / 100) 8.586 -> 8.598 ( +0.14%) [ +0.21% +0.00% +0.03% / +0.14% +0.43% +0.63%] index_select reverse : Elapsed 0.086 ms (8.604 ms / 100) 8.575 -> 8.593 ( +0.21%) [ +0.12% +0.00% +0.09% / +0.21% +0.27% +0.35%] index_select skip64 : Elapsed 0.086 ms (8.585 ms / 100) 8.578 -> 8.577 ( -0.01%) [ +0.05% +0.00% +0.02% / -0.01% +0.28% +0.05%] index_select skip256 : Elapsed 0.086 ms (8.582 ms / 100) 8.611 -> 8.616 ( +0.06%) [ +0.00% +0.09% +0.12% / +0.10% +0.06% +0.08%] index_select spread : Elapsed 0.086 ms (8.611 ms / 100) 8.608 -> 8.629 ( +0.24%) [ +0.08% +0.13% +0.00% / +0.41% +0.24% +0.34%] index_select strided 3 : Elapsed 0.086 ms (8.615 ms / 100) 8.595 -> 8.615 ( +0.23%) [ +0.00% +0.10% +0.24% / +0.42% +0.23% +0.45%] index_select random : Elapsed 0.086 ms (8.595 ms / 100) 8.587 -> 8.592 ( +0.06%) [ +0.14% +0.24% +0.00% / +0.06% +0.33% +0.28%] index_select random_sorted : Elapsed 0.086 ms (8.599 ms / 100) B = [4, 20, 40, 16] (stride (1, 4, 80, 3200)) A = [4, 5, 40, 16] (stride (640, 2560, 16, 1)) dim = 1 1.740 -> 1.740 ( +0.00%) [ +0.17% +0.00% +0.06% / +0.00% +0.63% +1.03%] index_add_ linear : Elapsed 0.017 ms (1.743 ms / 100) 1.701 -> 1.706 ( +0.29%) [ +0.18% +0.06% +0.00% / +0.29% +0.53% +0.53%] index_copy_ linear : Elapsed 0.017 ms (1.704 ms / 100) 1.740 -> 1.743 ( +0.17%) [ +0.23% +0.17% +0.00% / +0.17% +0.63% +0.52%] index_add_ reverse : Elapsed 0.017 ms (1.744 ms / 100) 1.703 -> 1.698 ( -0.29%) [ +0.00% +0.12% +0.12% / -0.29% +0.35% +0.06%] index_copy_ reverse : Elapsed 0.017 ms (1.703 ms / 100) 1.785 -> 1.778 ( -0.39%) [ +0.00% +0.17% +0.00% / +0.17% -0.34% -0.39%] index_add_ spread : Elapsed 0.018 ms (1.785 ms / 100) 1.742 -> 1.734 ( -0.46%) [ +0.06% +0.00% +0.00% / +0.17% -0.46% -0.34%] index_copy_ spread : Elapsed 0.017 ms (1.743 ms / 100) 1.783 -> 1.777 ( -0.34%) [ +0.06% +0.06% +0.00% / +0.06% -0.34% -0.34%] index_add_ strided 3 : Elapsed 0.018 ms (1.784 ms / 100) 1.742 -> 1.729 ( -0.75%) [ +0.00% +0.00% +0.11% / -0.06% -0.75% -0.57%] index_copy_ strided 3 : Elapsed 0.017 ms (1.742 ms / 100) 1.767 -> 1.763 ( -0.23%) [ +0.11% +0.00% +0.23% / +0.17% -0.23% +0.06%] index_add_ strided 7 : Elapsed 0.018 ms (1.769 ms / 100) 1.726 -> 1.718 ( -0.46%) [ +0.00% +0.12% +0.12% / +0.17% -0.23% -0.46%] index_copy_ strided 7 : Elapsed 0.017 ms (1.726 ms / 100) 1.761 -> 1.761 ( +0.00%) [ +0.06% +0.00% +0.11% / +0.28% +0.00% +0.34%] index_add_ perm : Elapsed 0.018 ms (1.762 ms / 100) 1.718 -> 1.720 ( +0.12%) [ +0.00% +0.23% +0.06% / +0.12% +0.29% +0.23%] index_copy_ perm : Elapsed 0.017 ms (1.718 ms / 100) 1.761 -> 1.765 ( +0.23%) [ +0.28% +0.23% +0.00% / +0.23% +0.23% +0.28%] index_add_ perm_sorted : Elapsed 0.018 ms (1.766 ms / 100) 1.722 -> 1.716 ( -0.35%) [ +0.00% +0.06% +0.00% / +0.00% -0.17% -0.35%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.722 ms / 100) 8.228 -> 8.239 ( +0.13%) [ +0.24% +0.00% +0.12% / +0.13% +0.38% +0.19%] index_select const : Elapsed 0.082 ms (8.248 ms / 100) 8.277 -> 8.276 ( -0.01%) [ +0.00% +0.19% +0.11% / -0.01% +0.08% +0.12%] index_select wrap : Elapsed 0.083 ms (8.277 ms / 100) 8.261 -> 8.263 ( +0.02%) [ +0.11% +0.17% +0.00% / +0.07% +0.02% +0.08%] index_select linear : Elapsed 0.083 ms (8.270 ms / 100) 8.272 -> 8.265 ( -0.08%) [ +0.12% +0.00% +0.13% / -0.08% +0.13% +0.24%] index_select reverse : Elapsed 0.083 ms (8.282 ms / 100) 8.231 -> 8.235 ( +0.05%) [ +0.01% +0.00% +0.11% / +0.49% +0.05% +0.11%] index_select skip64 : Elapsed 0.082 ms (8.232 ms / 100) 8.235 -> 8.238 ( +0.04%) [ +0.00% +0.10% +0.05% / +0.18% +0.04% +0.24%] index_select skip256 : Elapsed 0.082 ms (8.235 ms / 100) 8.264 -> 8.266 ( +0.02%) [ +0.10% +0.00% +0.01% / +0.10% +0.02% +0.13%] index_select spread : Elapsed 0.083 ms (8.272 ms / 100) 8.278 -> 8.279 ( +0.01%) [ +0.07% +0.00% +0.00% / +0.14% +0.01% +0.19%] index_select strided 3 : Elapsed 0.083 ms (8.284 ms / 100) 8.270 -> 8.261 ( -0.11%) [ +0.16% +0.19% +0.00% / -0.11% +0.15% +0.13%] index_select random : Elapsed 0.083 ms (8.283 ms / 100) 8.260 -> 8.245 ( -0.18%) [ +0.00% +0.02% +0.18% / -0.18% +0.08% +0.05%] index_select random_sorted : Elapsed 0.083 ms (8.260 ms / 100) out_shape = [4, 5, 20, 16] in_shape = [4, 5, 40, 16] idx_dim = 2 B = [4, 5, 20, 16] (stride (1600, 320, 1, 20)) A = [4, 5, 40, 16] (stride (3200, 1, 5, 200)) dim = 2 2.393 -> 2.397 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.38% +0.17%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.409 -> 2.409 ( +0.00%) [ +0.12% +0.00% +0.12% / +0.17% +0.00% +0.17%] index_select wrap : Elapsed 0.024 ms (2.412 ms / 100) 2.409 -> 2.409 ( +0.00%) [ +0.08% +0.21% +0.00% / +0.25% +0.17% +0.00%] index_select linear : Elapsed 0.024 ms (2.411 ms / 100) 2.411 -> 2.409 ( -0.08%) [ +0.17% +0.08% +0.00% / -0.08% -0.08% +0.08%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.397 -> 2.396 ( -0.04%) [ +0.13% +0.13% +0.00% / +0.21% +0.13% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.400 ms / 100) 2.397 -> 2.399 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.21% +0.13% +0.08%] index_select skip256 : Elapsed 0.024 ms (2.398 ms / 100) 2.419 -> 2.419 ( +0.00%) [ +0.00% +0.12% +0.08% / +0.00% +0.12% +0.21%] index_select spread : Elapsed 0.024 ms (2.419 ms / 100) 2.421 -> 2.417 ( -0.17%) [ +0.00% +0.17% +0.00% / -0.04% -0.17% -0.08%] index_select strided 3 : Elapsed 0.024 ms (2.421 ms / 100) 2.408 -> 2.407 ( -0.04%) [ +0.29% +0.12% +0.00% / +0.04% -0.04% +0.08%] index_select strided 5 : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.417 ( +0.12%) [ +0.00% +0.21% +0.12% / +0.12% +0.33% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.397 -> 2.399 ( +0.08%) [ +0.17% +0.29% +0.00% / +0.08% +0.25% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.401 ms / 100) 2.400 -> 2.403 ( +0.13%) [ +0.13% +0.08% +0.00% / +0.17% +0.13% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.403 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.00% +0.04% +0.21% / +0.08% +0.29% +0.12%] index_select random : Elapsed 0.024 ms (2.411 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.00% +0.08% +0.00% / +0.21% +0.04% -0.04%] index_select random_sorted : Elapsed 0.024 ms (2.413 ms / 100) 2.417 -> 2.417 ( +0.00%) [ +0.04% +0.00% +0.17% / +0.08% +0.00% +0.00%] index_select perm : Elapsed 0.024 ms (2.418 ms / 100) 2.419 -> 2.412 ( -0.29%) [ +0.00% +0.21% +0.04% / +0.08% -0.29% -0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [4, 5, 20, 16] (stride (1600, 320, 1, 20)) A = [4, 5, 40, 16] (stride (16, 2560, 64, 1)) dim = 2 2.392 -> 2.390 ( -0.08%) [ +0.13% +0.00% +0.00% / -0.08% +0.33% +0.08%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.407 -> 2.397 ( -0.42%) [ +0.08% +0.00% +0.08% / +0.04% -0.42% -0.29%] index_select wrap : Elapsed 0.024 ms (2.409 ms / 100) 2.404 -> 2.398 ( -0.25%) [ +0.21% +0.00% +0.17% / +0.12% -0.25% -0.08%] index_select linear : Elapsed 0.024 ms (2.409 ms / 100) 2.405 -> 2.401 ( -0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.00% -0.17%] index_select reverse : Elapsed 0.024 ms (2.405 ms / 100) 2.397 -> 2.392 ( -0.21%) [ +0.00% +0.00% +0.00% / +0.13% -0.21% -0.08%] index_select skip64 : Elapsed 0.024 ms (2.397 ms / 100) 2.391 -> 2.394 ( +0.13%) [ +0.08% +0.00% +0.08% / +0.25% +0.13% +0.42%] index_select skip256 : Elapsed 0.024 ms (2.393 ms / 100) 2.403 -> 2.402 ( -0.04%) [ +0.12% +0.00% +0.04% / -0.04% +0.17% +0.25%] index_select spread : Elapsed 0.024 ms (2.406 ms / 100) 2.401 -> 2.404 ( +0.12%) [ +0.21% +0.00% +0.04% / +0.17% +0.17% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.397 -> 2.396 ( -0.04%) [ +0.08% +0.00% +0.17% / -0.04% +0.25% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.399 ms / 100) 2.404 -> 2.405 ( +0.04%) [ +0.17% +0.21% +0.00% / +0.04% +0.17% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.398 -> 2.398 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.00% +0.00% +0.08%] index_select strided 8 : Elapsed 0.024 ms (2.401 ms / 100) 2.392 -> 2.394 ( +0.08%) [ +0.29% +0.21% +0.00% / +0.08% +0.25% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.399 ms / 100) 2.398 -> 2.400 ( +0.08%) [ +0.00% +0.13% +0.17% / +0.08% +0.29% +0.42%] index_select random : Elapsed 0.024 ms (2.398 ms / 100) 2.402 -> 2.399 ( -0.12%) [ +0.00% +0.04% +0.04% / -0.12% +0.04% +0.04%] index_select random_sorted : Elapsed 0.024 ms (2.402 ms / 100) 2.404 -> 2.404 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.12% +0.00% +0.12%] index_select perm : Elapsed 0.024 ms (2.404 ms / 100) 2.407 -> 2.408 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.08% +0.08%] index_select perm_sorted : Elapsed 0.024 ms (2.408 ms / 100) B = [4, 5, 20, 16] (stride (1600, 1, 80, 5)) dim = 2 fill_cnt = 40 1.691 -> 1.693 ( +0.12%) [ +0.00% +0.06% +0.35% / +0.12% +0.30% +0.30%] index_fill_ const : Elapsed 0.017 ms (1.691 ms / 100) 1.700 -> 1.702 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.29% +0.41%] index_fill_ linear : Elapsed 0.017 ms (1.701 ms / 100) 1.693 -> 1.694 ( +0.06%) [ +0.30% +0.00% +0.00% / +0.06% +0.47% +0.41%] index_fill_ reverse : Elapsed 0.017 ms (1.698 ms / 100) 1.692 -> 1.694 ( +0.12%) [ +0.00% +0.12% +0.18% / +0.12% +0.35% +0.12%] index_fill_ skip64 : Elapsed 0.017 ms (1.692 ms / 100) 1.692 -> 1.693 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.12% +0.30%] index_fill_ skip256 : Elapsed 0.017 ms (1.693 ms / 100) 1.698 -> 1.698 ( +0.00%) [ +0.00% +0.35% +0.00% / +0.12% +0.29% +0.00%] index_fill_ spread : Elapsed 0.017 ms (1.698 ms / 100) 1.698 -> 1.701 ( +0.18%) [ +0.24% +0.00% +0.18% / +0.29% +0.18% +0.35%] index_fill_ strided 3 : Elapsed 0.017 ms (1.702 ms / 100) 1.692 -> 1.696 ( +0.24%) [ +0.18% +0.30% +0.00% / +0.24% +0.35% +0.41%] index_fill_ strided 5 : Elapsed 0.017 ms (1.695 ms / 100) 1.702 -> 1.700 ( -0.12%) [ +0.06% +0.00% +0.00% / +0.18% -0.12% +0.06%] index_fill_ strided 7 : Elapsed 0.017 ms (1.703 ms / 100) 1.696 -> 1.696 ( +0.00%) [ +0.00% +0.12% +0.06% / +0.00% +0.18% +0.12%] index_fill_ strided 8 : Elapsed 0.017 ms (1.696 ms / 100) 1.696 -> 1.698 ( +0.12%) [ +0.00% +0.18% +0.18% / +0.12% +0.24% +0.24%] index_fill_ strided 16 : Elapsed 0.017 ms (1.696 ms / 100) 1.697 -> 1.700 ( +0.18%) [ +0.24% +0.00% +0.00% / +0.18% +6.89% +0.29%] index_fill_ random : Elapsed 0.017 ms (1.701 ms / 100) 1.698 -> 1.698 ( +0.00%) [ +0.00% +0.35% +0.12% / +0.00% +0.29% +0.24%] index_fill_ random_sorted : Elapsed 0.017 ms (1.698 ms / 100) B = [4, 5, 20, 16] (stride (320, 1280, 16, 1)) A = [4, 5, 40, 16] (stride (200, 1, 5, 800)) dim = 2 2.447 -> 2.449 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.33% +0.12%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.465 -> 2.461 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.04% +0.00% -0.16%] index_select wrap : Elapsed 0.025 ms (2.465 ms / 100) 2.462 -> 2.464 ( +0.08%) [ +0.08% +0.12% +0.00% / +0.28% +0.08% +0.12%] index_select linear : Elapsed 0.025 ms (2.464 ms / 100) 2.462 -> 2.460 ( -0.08%) [ +0.20% +0.00% +0.08% / +0.32% +0.08% -0.08%] index_select reverse : Elapsed 0.025 ms (2.467 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.20% +0.00% +0.12% / +0.16% +0.12% +0.12%] index_select skip64 : Elapsed 0.025 ms (2.453 ms / 100) 2.447 -> 2.452 ( +0.20%) [ +0.20% +0.12% +0.00% / +0.20% +0.29% +0.33%] index_select skip256 : Elapsed 0.025 ms (2.452 ms / 100) 2.470 -> 2.474 ( +0.16%) [ +0.24% +0.00% +0.20% / +0.20% +0.16% +0.28%] index_select spread : Elapsed 0.025 ms (2.476 ms / 100) 2.471 -> 2.470 ( -0.04%) [ +0.12% +0.04% +0.00% / +0.16% -0.04% +0.04%] index_select strided 3 : Elapsed 0.025 ms (2.474 ms / 100) 2.464 -> 2.462 ( -0.08%) [ +0.00% +0.16% +0.12% / +0.12% -0.08% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.464 ms / 100) 2.469 -> 2.470 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.04% +0.08% +0.24%] index_select strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.451 -> 2.456 ( +0.20%) [ +0.04% +0.00% +0.04% / +0.20% +0.33% +0.20%] index_select strided 8 : Elapsed 0.025 ms (2.452 ms / 100) 2.454 -> 2.454 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.12% +0.16%] index_select strided 16 : Elapsed 0.025 ms (2.455 ms / 100) 2.466 -> 2.469 ( +0.12%) [ +0.24% +0.00% +0.16% / +0.12% +0.24% +0.32%] index_select random : Elapsed 0.025 ms (2.472 ms / 100) 2.468 -> 2.467 ( -0.04%) [ +0.12% +0.00% +0.08% / -0.04% +0.12% +0.16%] index_select random_sorted : Elapsed 0.025 ms (2.471 ms / 100) 2.470 -> 2.469 ( -0.04%) [ +0.08% +0.16% +0.00% / +0.00% -0.04% +0.08%] index_select perm : Elapsed 0.025 ms (2.472 ms / 100) 2.475 -> 2.465 ( -0.40%) [ +0.00% +0.00% +0.00% / -0.08% -0.28% -0.40%] index_select perm_sorted : Elapsed 0.025 ms (2.475 ms / 100) B = [4, 5, 20, 16] (stride (20, 1280, 1, 80)) A = [4, 5, 40, 16] (stride (1, 2560, 64, 4)) dim = 2 2.445 -> 2.447 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.25% +0.25%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.453 -> 2.447 ( -0.24%) [ +0.00% +0.33% +0.08% / +0.00% -0.24% -0.24%] index_select wrap : Elapsed 0.025 ms (2.453 ms / 100) 2.450 -> 2.444 ( -0.24%) [ +0.20% +0.00% +0.41% / +0.08% -0.16% -0.24%] index_select linear : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.449 ( -0.04%) [ +0.00% +0.16% +0.00% / -0.04% +0.04% +0.08%] index_select reverse : Elapsed 0.025 ms (2.450 ms / 100) 2.446 -> 2.447 ( +0.04%) [ +0.00% +0.25% +0.04% / +0.12% +0.04% +0.16%] index_select skip64 : Elapsed 0.024 ms (2.446 ms / 100) 2.445 -> 2.446 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.29% +0.20%] index_select skip256 : Elapsed 0.024 ms (2.447 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.08% +0.12% +0.00% / +0.04% +0.16% +0.20%] index_select spread : Elapsed 0.025 ms (2.452 ms / 100) 2.449 -> 2.453 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.24% +0.29%] index_select strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.444 -> 2.449 ( +0.20%) [ +0.29% +0.16% +0.00% / +0.20% +0.20% +0.41%] index_select strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +0.12% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.449 -> 2.446 ( -0.12%) [ +0.04% +0.04% +0.00% / +0.04% -0.12% +0.04%] index_select strided 8 : Elapsed 0.024 ms (2.450 ms / 100) 2.444 -> 2.450 ( +0.25%) [ +0.25% +0.00% +0.20% / +0.25% +0.25% +0.29%] index_select strided 16 : Elapsed 0.024 ms (2.450 ms / 100) 2.447 -> 2.448 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.04% +0.25% +0.25%] index_select random : Elapsed 0.024 ms (2.450 ms / 100) 2.450 -> 2.448 ( -0.08%) [ +0.08% +0.12% +0.00% / -0.08% -0.08% +0.33%] index_select random_sorted : Elapsed 0.025 ms (2.452 ms / 100) 2.450 -> 2.452 ( +0.08%) [ +0.16% +0.12% +0.00% / +0.12% +0.12% +0.08%] index_select perm : Elapsed 0.025 ms (2.454 ms / 100) 2.450 -> 2.454 ( +0.16%) [ +0.16% +0.00% +0.04% / +0.16% +0.90% +0.33%] index_select perm_sorted : Elapsed 0.025 ms (2.454 ms / 100) B = [4, 5, 20, 16] (stride (20, 1280, 1, 80)) A = [4, 5, 40, 16] (stride (40, 160, 1, 800)) dim = 2 2.451 -> 2.456 ( +0.20%) [ +0.12% +0.00% +0.08% / +0.20% +0.24% +0.24%] index_select const : Elapsed 0.025 ms (2.454 ms / 100) 2.462 -> 2.465 ( +0.12%) [ +0.32% +0.24% +0.00% / +0.16% +0.12% +0.12%] index_select wrap : Elapsed 0.025 ms (2.470 ms / 100) 2.464 -> 2.465 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.32% +0.04% +0.04%] index_select linear : Elapsed 0.025 ms (2.465 ms / 100) 2.464 -> 2.462 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.08% +0.00%] index_select reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.04% +0.12% +0.00% / +0.08% +0.04% -0.04%] index_select skip64 : Elapsed 0.025 ms (2.456 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.12% +0.20% +0.00% / +0.08% +0.29% +0.24%] index_select skip256 : Elapsed 0.025 ms (2.456 ms / 100) 2.469 -> 2.471 ( +0.08%) [ +0.00% +0.16% +0.04% / +0.16% +0.16% +0.08%] index_select spread : Elapsed 0.025 ms (2.469 ms / 100) 2.471 -> 2.471 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.12% +0.00%] index_select strided 3 : Elapsed 0.025 ms (2.472 ms / 100) 2.471 -> 2.471 ( +0.00%) [ +0.08% +0.00% +0.12% / +0.08% +0.08% +0.00%] index_select strided 5 : Elapsed 0.025 ms (2.473 ms / 100) 2.469 -> 2.468 ( -0.04%) [ +0.00% +0.00% +0.24% / -0.04% +0.12% +0.04%] index_select strided 7 : Elapsed 0.025 ms (2.469 ms / 100) 2.467 -> 2.471 ( +0.16%) [ +0.00% +0.12% +0.12% / +0.16% +0.36% +0.20%] index_select strided 8 : Elapsed 0.025 ms (2.467 ms / 100) 2.469 -> 2.469 ( +0.00%) [ +0.00% +0.00% +0.41% / +0.00% +0.24% +0.20%] index_select strided 16 : Elapsed 0.025 ms (2.469 ms / 100) 2.471 -> 2.470 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.04% +0.16%] index_select random : Elapsed 0.025 ms (2.472 ms / 100) 2.470 -> 2.468 ( -0.08%) [ +0.12% +0.08% +0.00% / -0.08% +0.16% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.473 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.04% +0.00% +0.16% / +0.12% +0.04% +0.20%] index_select perm : Elapsed 0.025 ms (2.472 ms / 100) 2.470 -> 2.468 ( -0.08%) [ +0.24% +0.12% +0.00% / +0.16% -0.08% -0.08%] index_select perm_sorted : Elapsed 0.025 ms (2.476 ms / 100) B = [4, 5, 20, 16] (stride (1, 1280, 4, 80)) A = [4, 5, 40, 16] (stride (3200, 1, 5, 200)) dim = 2 2.449 -> 2.448 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.33% +0.20%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.465 -> 2.459 ( -0.24%) [ +0.16% +0.20% +0.00% / -0.08% -0.12% -0.24%] index_select wrap : Elapsed 0.025 ms (2.469 ms / 100) 2.464 -> 2.459 ( -0.20%) [ +0.00% +0.04% +0.16% / +0.16% -0.20% -0.08%] index_select linear : Elapsed 0.025 ms (2.464 ms / 100) 2.462 -> 2.464 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.16% +0.20%] index_select reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.12% +0.33% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.450 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.00% +0.00% +0.20% / +0.00% +0.20% +0.29%] index_select skip256 : Elapsed 0.025 ms (2.450 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.20% +0.28%] index_select spread : Elapsed 0.025 ms (2.470 ms / 100) 2.467 -> 2.471 ( +0.16%) [ +0.12% +0.00% +0.16% / +0.16% +0.36% +0.28%] index_select strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.462 -> 2.460 ( -0.08%) [ +0.04% +0.08% +0.00% / -0.08% +0.20% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.463 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.08% +0.36%] index_select strided 7 : Elapsed 0.025 ms (2.470 ms / 100) 2.453 -> 2.460 ( +0.29%) [ +0.00% +0.24% +0.24% / +0.29% +0.33% +0.37%] index_select strided 8 : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.456 ( +0.12%) [ +0.12% +0.00% +0.08% / +0.12% +0.65% +0.37%] index_select strided 16 : Elapsed 0.025 ms (2.456 ms / 100) 2.465 -> 2.470 ( +0.20%) [ +0.00% +0.04% +0.04% / +0.20% +0.49% +0.37%] index_select random : Elapsed 0.025 ms (2.465 ms / 100) 2.468 -> 2.468 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +4.54% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.466 -> 2.471 ( +0.20%) [ +0.00% +0.24% +0.08% / +0.28% +0.28% +0.20%] index_select perm : Elapsed 0.025 ms (2.466 ms / 100) 2.469 -> 2.466 ( -0.12%) [ +0.00% +0.04% +0.08% / -0.12% +0.12% +0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.469 ms / 100) B = [4, 5, 20, 16] (stride (80, 16, 320, 1)) A = [4, 5, 40, 16] (stride (3200, 640, 16, 1)) dim = 2 1.452 -> 1.453 ( +0.07%) [ +0.07% +0.00% +0.21% / +0.07% +0.28% +0.48%] index_select const : Elapsed 0.015 ms (1.453 ms / 100) 1.471 -> 1.470 ( -0.07%) [ +0.00% +0.14% +0.14% / -0.07% +0.61% +0.82%] index_select wrap : Elapsed 0.015 ms (1.471 ms / 100) 1.469 -> 1.471 ( +0.14%) [ +0.20% +0.00% +0.07% / +0.14% +0.88% +0.48%] index_select linear : Elapsed 0.015 ms (1.472 ms / 100) 1.470 -> 1.473 ( +0.20%) [ +0.20% +0.27% +0.00% / +0.20% +0.61% +0.54%] index_select reverse : Elapsed 0.015 ms (1.473 ms / 100) 1.451 -> 1.455 ( +0.28%) [ +0.00% +0.14% +0.21% / +0.28% +0.48% +0.34%] index_select skip64 : Elapsed 0.015 ms (1.451 ms / 100) 1.452 -> 1.455 ( +0.21%) [ +0.00% +0.07% +0.21% / +0.21% +0.55% +0.62%] index_select skip256 : Elapsed 0.015 ms (1.452 ms / 100) 1.470 -> 1.473 ( +0.20%) [ +0.00% +0.07% +0.00% / +0.20% +0.82% +0.75%] index_select spread : Elapsed 0.015 ms (1.470 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.00% +0.14% +0.14% / +0.14% +0.07% +0.20%] index_select strided 3 : Elapsed 0.015 ms (1.473 ms / 100) 1.462 -> 1.462 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.00% +0.34% +0.41%] index_select strided 5 : Elapsed 0.015 ms (1.464 ms / 100) 1.470 -> 1.468 ( -0.14%) [ +0.00% +0.14% +0.07% / -0.14% +0.75% +0.54%] index_select strided 7 : Elapsed 0.015 ms (1.470 ms / 100) 1.454 -> 1.458 ( +0.28%) [ +0.28% +0.00% +0.28% / +0.28% +0.34% +0.62%] index_select strided 8 : Elapsed 0.015 ms (1.458 ms / 100) 1.457 -> 1.457 ( +0.00%) [ +0.21% +0.00% +0.00% / +0.00% +0.34% +0.48%] index_select strided 16 : Elapsed 0.015 ms (1.460 ms / 100) 1.468 -> 1.469 ( +0.07%) [ +0.00% +0.14% +0.20% / +0.07% +0.54% +0.41%] index_select random : Elapsed 0.015 ms (1.468 ms / 100) 1.467 -> 1.466 ( -0.07%) [ +0.00% +0.00% +0.27% / -0.07% +0.61% +0.41%] index_select random_sorted : Elapsed 0.015 ms (1.467 ms / 100) 1.469 -> 1.467 ( -0.14%) [ +0.14% +0.14% +0.00% / -0.14% +0.14% +0.27%] index_select perm : Elapsed 0.015 ms (1.471 ms / 100) 1.468 -> 1.467 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.75% +0.68%] index_select perm_sorted : Elapsed 0.015 ms (1.468 ms / 100) B = [4, 5, 20, 16] (stride (16, 64, 320, 1)) A = [4, 5, 40, 16] (stride (1, 4, 320, 20)) dim = 2 2.445 -> 2.447 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.12% +0.16% +0.08%] index_select const : Elapsed 0.024 ms (2.446 ms / 100) 2.455 -> 2.446 ( -0.37%) [ +0.04% +0.00% +0.20% / +0.04% -0.24% -0.37%] index_select wrap : Elapsed 0.025 ms (2.456 ms / 100) 2.453 -> 2.448 ( -0.20%) [ +0.00% +0.12% +0.08% / +0.20% -0.20% -0.04%] index_select linear : Elapsed 0.025 ms (2.453 ms / 100) 2.450 -> 2.452 ( +0.08%) [ +0.16% +0.12% +0.00% / +0.12% +79.18% +0.08%] index_select reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.446 -> 2.444 ( -0.08%) [ +0.00% +0.20% +0.16% / +0.00% +0.49% -0.08%] index_select skip64 : Elapsed 0.024 ms (2.446 ms / 100) 2.444 -> 2.448 ( +0.16%) [ +0.04% +0.00% +0.00% / +0.16% +0.53% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.445 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.57% +0.33%] index_select spread : Elapsed 0.025 ms (2.452 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.57% +0.24%] index_select strided 3 : Elapsed 0.025 ms (2.451 ms / 100) 2.449 -> 2.452 ( +0.12%) [ +0.04% +0.16% +0.00% / +0.12% +0.41% +0.12%] index_select strided 5 : Elapsed 0.024 ms (2.450 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.08% +0.16% +0.00% / +0.12% +0.16% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.448 -> 2.447 ( -0.04%) [ +0.12% +0.00% +0.16% / -0.04% +0.16% +0.04%] index_select strided 8 : Elapsed 0.025 ms (2.451 ms / 100) 2.446 -> 2.451 ( +0.20%) [ +0.08% +0.00% +0.16% / +0.25% +0.33% +0.20%] index_select strided 16 : Elapsed 0.024 ms (2.448 ms / 100) 2.451 -> 2.453 ( +0.08%) [ +0.04% +0.00% +0.12% / +0.08% +0.16% +0.08%] index_select random : Elapsed 0.025 ms (2.452 ms / 100) 2.452 -> 2.451 ( -0.04%) [ +0.08% +0.16% +0.00% / +0.08% +0.08% -0.04%] index_select random_sorted : Elapsed 0.025 ms (2.454 ms / 100) 2.450 -> 2.453 ( +0.12%) [ +0.04% +0.00% +0.24% / +0.12% +0.16% +0.12%] index_select perm : Elapsed 0.025 ms (2.451 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.12% +0.04% +0.08%] index_select perm_sorted : Elapsed 0.025 ms (2.455 ms / 100) B = [4, 5, 20, 16] (stride (1, 64, 320, 4)) A = [4, 5, 40, 16] (stride (80, 16, 320, 1)) dim = 2 2.403 -> 2.409 ( +0.25%) [ +0.12% +0.00% +0.04% / +0.25% +0.29% +0.25%] index_select const : Elapsed 0.024 ms (2.406 ms / 100) 2.416 -> 2.414 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.04% -0.08% -0.04%] index_select wrap : Elapsed 0.024 ms (2.416 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.21% +0.04% +0.04%] index_select linear : Elapsed 0.024 ms (2.415 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.00% -0.04%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.404 -> 2.402 ( -0.08%) [ +0.00% +0.21% +0.21% / -0.08% +0.08% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.404 ms / 100) 2.403 -> 2.404 ( +0.04%) [ +0.12% +0.00% +0.33% / +0.04% +0.21% +0.12%] index_select skip256 : Elapsed 0.024 ms (2.406 ms / 100) 2.410 -> 2.413 ( +0.12%) [ +0.00% +0.12% +0.21% / +0.17% +0.33% +0.12%] index_select spread : Elapsed 0.024 ms (2.410 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.00% +0.25% +0.17% / +0.04% -0.04% -0.04%] index_select strided 3 : Elapsed 0.024 ms (2.413 ms / 100) 2.410 -> 2.409 ( -0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.17% -0.04%] index_select strided 5 : Elapsed 0.024 ms (2.413 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.00% +0.04% +0.21% / +0.17% +0.08% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.410 ms / 100) 2.403 -> 2.408 ( +0.21%) [ +0.00% +0.21% +0.04% / +0.21% +0.29% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.403 ms / 100) 2.406 -> 2.409 ( +0.12%) [ +0.17% +0.12% +0.00% / +0.12% +0.25% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.410 ms / 100) 2.412 -> 2.412 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.17% +0.25%] index_select random : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.08% +0.12% +0.00% / +0.17% +0.21% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.416 ms / 100) 2.415 -> 2.408 ( -0.29%) [ +0.00% +0.00% +0.00% / +0.04% -0.29% -0.04%] index_select perm : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.404 ( -0.41%) [ +0.04% +0.08% +0.00% / +0.21% -0.25% -0.41%] index_select perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) B = [4, 5, 20, 16] (stride (1, 4, 320, 20)) A = [4, 5, 40, 16] (stride (640, 2560, 16, 1)) dim = 2 2.446 -> 2.446 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.00%] index_select const : Elapsed 0.024 ms (2.446 ms / 100) 2.458 -> 2.449 ( -0.37%) [ +0.20% +0.33% +0.00% / +0.08% -0.20% -0.37%] index_select wrap : Elapsed 0.025 ms (2.463 ms / 100) 2.462 -> 2.451 ( -0.45%) [ +0.04% +0.16% +0.00% / -0.04% -0.28% -0.45%] index_select linear : Elapsed 0.025 ms (2.463 ms / 100) 2.458 -> 2.455 ( -0.12%) [ +0.00% +0.04% +0.04% / +0.00% -0.12% -0.08%] index_select reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.449 -> 2.445 ( -0.16%) [ +0.08% +0.04% +0.00% / -0.16% -0.16% -0.08%] index_select skip64 : Elapsed 0.025 ms (2.451 ms / 100) 2.445 -> 2.449 ( +0.16%) [ +0.04% +0.00% +0.08% / +0.20% +0.33% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.446 ms / 100) 2.456 -> 2.456 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.24% +0.16%] index_select spread : Elapsed 0.025 ms (2.460 ms / 100) 2.452 -> 2.459 ( +0.29%) [ +0.00% +0.16% +0.20% / +0.29% +0.57% +0.33%] index_select strided 3 : Elapsed 0.025 ms (2.452 ms / 100) 2.452 -> 2.452 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.20% +0.00% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.452 ms / 100) 2.458 -> 2.458 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.16% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.459 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.08% +0.04% +0.16%] index_select strided 8 : Elapsed 0.025 ms (2.453 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.20% +0.37%] index_select strided 16 : Elapsed 0.025 ms (2.450 ms / 100) 2.455 -> 2.457 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.12% +0.08%] index_select random : Elapsed 0.025 ms (2.455 ms / 100) 2.458 -> 2.455 ( -0.12%) [ +0.00% +0.00% +0.04% / +0.04% -0.12% -0.08%] index_select random_sorted : Elapsed 0.025 ms (2.458 ms / 100) 2.460 -> 2.459 ( -0.04%) [ +0.00% +0.08% +0.00% / +0.00% -0.04% -0.04%] index_select perm : Elapsed 0.025 ms (2.460 ms / 100) 2.458 -> 2.458 ( +0.00%) [ +0.00% +0.08% +0.12% / +0.16% +0.20% +0.00%] index_select perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) B = [4, 5, 20, 16] (stride (100, 20, 1, 400)) A = [4, 5, 40, 16] (stride (16, 2560, 64, 1)) dim = 2 2.392 -> 2.394 ( +0.08%) [ +0.13% +0.00% +0.13% / +0.08% +0.25% +0.25%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.409 -> 2.408 ( -0.04%) [ +0.12% +0.12% +0.00% / +0.00% +0.12% -0.04%] index_select wrap : Elapsed 0.024 ms (2.412 ms / 100) 2.409 -> 2.412 ( +0.12%) [ +0.00% +0.25% +0.12% / +0.17% +0.12% +0.12%] index_select linear : Elapsed 0.024 ms (2.409 ms / 100) 2.407 -> 2.407 ( +0.00%) [ +0.00% +0.17% +0.08% / +0.21% +0.12% +0.00%] index_select reverse : Elapsed 0.024 ms (2.407 ms / 100) 2.394 -> 2.396 ( +0.08%) [ +0.21% +0.17% +0.00% / +0.08% +0.08% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.399 ms / 100) 2.393 -> 2.396 ( +0.13%) [ +0.04% +0.17% +0.00% / +0.13% +0.25% +0.21%] index_select skip256 : Elapsed 0.024 ms (2.394 ms / 100) 2.408 -> 2.408 ( +0.00%) [ +0.12% +0.00% +0.12% / +0.04% +0.00% +0.17%] index_select spread : Elapsed 0.024 ms (2.411 ms / 100) 2.414 -> 2.409 ( -0.21%) [ +0.00% +0.00% +0.00% / -0.21% -0.04% +0.00%] index_select strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.401 -> 2.401 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.04% +0.04% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.404 ms / 100) 2.402 -> 2.406 ( +0.17%) [ +0.00% +0.29% +0.21% / +0.17% +0.42% +0.37%] index_select strided 7 : Elapsed 0.024 ms (2.402 ms / 100) 2.396 -> 2.396 ( +0.00%) [ +0.00% +0.08% +0.21% / +0.00% +0.17% +0.21%] index_select strided 8 : Elapsed 0.024 ms (2.396 ms / 100) 2.397 -> 2.399 ( +0.08%) [ +0.21% +0.17% +0.00% / +0.08% +0.25% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.402 ms / 100) 2.407 -> 2.406 ( -0.04%) [ +0.08% +0.00% +0.12% / -0.04% +0.08% +0.00%] index_select random : Elapsed 0.024 ms (2.409 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.29% +0.25% +0.00% / +0.21% -0.08% -0.04%] index_select random_sorted : Elapsed 0.024 ms (2.412 ms / 100) 2.407 -> 2.405 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.12% +0.17%] index_select perm : Elapsed 0.024 ms (2.409 ms / 100) 2.407 -> 2.401 ( -0.25%) [ +0.25% +0.37% +0.00% / +0.17% -0.21% -0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.413 ms / 100) B = [4, 5, 20, 16] (stride (100, 1, 5, 400)) A = [4, 5, 40, 16] (stride (1, 64, 320, 4)) dim = 2 2.448 -> 2.448 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.04% +0.04% +0.00%] index_select const : Elapsed 0.024 ms (2.450 ms / 100) 2.452 -> 2.448 ( -0.16%) [ +0.00% +0.37% +0.33% / +0.20% -0.16% +0.12%] index_select wrap : Elapsed 0.025 ms (2.452 ms / 100) 2.457 -> 2.451 ( -0.24%) [ +0.00% +0.00% +0.08% / -0.08% -0.24% -0.24%] index_select linear : Elapsed 0.025 ms (2.457 ms / 100) 2.452 -> 2.450 ( -0.08%) [ +0.12% +0.00% +0.08% / +0.20% +0.08% -0.08%] index_select reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.446 ( -0.16%) [ +0.04% +0.12% +0.00% / -0.16% -0.12% -0.04%] index_select skip64 : Elapsed 0.025 ms (2.451 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.12% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.448 ms / 100) 2.454 -> 2.453 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.29% +0.08%] index_select spread : Elapsed 0.025 ms (2.455 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.33% +0.29%] index_select strided 3 : Elapsed 0.025 ms (2.454 ms / 100) 2.451 -> 2.448 ( -0.12%) [ +0.08% +0.04% +0.00% / -0.12% +0.20% +0.20%] index_select strided 5 : Elapsed 0.025 ms (2.453 ms / 100) 2.454 -> 2.456 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.08% +0.20%] index_select strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.451 -> 2.450 ( -0.04%) [ +0.00% +0.12% +0.12% / +0.00% +0.04% -0.04%] index_select strided 8 : Elapsed 0.025 ms (2.451 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.16% +0.08% +0.00% / +0.04% +0.16% +0.16%] index_select strided 16 : Elapsed 0.025 ms (2.453 ms / 100) 2.449 -> 2.453 ( +0.16%) [ +0.12% +0.00% +0.16% / +0.16% +0.16% +0.29%] index_select random : Elapsed 0.025 ms (2.452 ms / 100) 2.451 -> 2.452 ( +0.04%) [ +0.12% +0.00% +0.24% / +0.04% +0.08% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.454 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.20% +0.00% +0.24% / +0.16% +0.41% +0.08%] index_select perm : Elapsed 0.025 ms (2.457 ms / 100) 2.453 -> 2.457 ( +0.16%) [ +0.12% +0.00% +0.12% / +0.16% +0.20% +0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.456 ms / 100) out_shape = [4, 5, 40, 20] in_shape = [4, 5, 40, 16] idx_dim = 3 B = [4, 5, 40, 20] (stride (4000, 800, 20, 1)) A = [4, 5, 40, 16] (stride (5, 1, 320, 20)) dim = 3 3.422 -> 3.423 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.58% +0.58%] index_add_ linear : Elapsed 0.034 ms (3.423 ms / 100) 3.242 -> 3.242 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.65% +0.59%] index_copy_ linear : Elapsed 0.032 ms (3.243 ms / 100) 3.422 -> 3.433 ( +0.32%) [ +0.12% +0.00% +0.26% / +0.32% +0.85% +0.91%] index_add_ reverse : Elapsed 0.034 ms (3.426 ms / 100) 3.245 -> 3.253 ( +0.25%) [ +0.15% +0.00% +0.15% / +0.25% +0.71% +0.86%] index_copy_ reverse : Elapsed 0.033 ms (3.250 ms / 100) 3.407 -> 3.408 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.85% +0.85%] index_add_ spread : Elapsed 0.034 ms (3.407 ms / 100) 3.239 -> 3.239 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.74% +0.74%] index_copy_ spread : Elapsed 0.032 ms (3.239 ms / 100) 3.403 -> 3.404 ( +0.03%) [ +0.09% +0.03% +0.00% / +0.03% +0.85% +0.68%] index_add_ strided 3 : Elapsed 0.034 ms (3.406 ms / 100) 3.239 -> 3.239 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.77% +0.71%] index_copy_ strided 3 : Elapsed 0.032 ms (3.239 ms / 100) 3.405 -> 3.407 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.70% +0.73%] index_add_ strided 7 : Elapsed 0.034 ms (3.407 ms / 100) 3.238 -> 3.238 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.77% +0.77%] index_copy_ strided 7 : Elapsed 0.032 ms (3.238 ms / 100) 3.405 -> 3.408 ( +0.09%) [ +0.06% +0.00% +0.12% / +0.09% +0.88% +0.85%] index_add_ perm : Elapsed 0.034 ms (3.407 ms / 100) 3.238 -> 3.238 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.77% +0.77%] index_copy_ perm : Elapsed 0.032 ms (3.238 ms / 100) 3.427 -> 3.431 ( +0.12%) [ +0.09% +0.00% +0.03% / +0.12% +0.53% +0.79%] index_add_ perm_sorted : Elapsed 0.034 ms (3.430 ms / 100) 3.249 -> 3.252 ( +0.09%) [ +0.00% +0.00% +0.03% / +0.09% +0.65% +0.71%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.249 ms / 100) 5.310 -> 5.306 ( -0.08%) [ +0.00% +0.11% +0.06% / -0.08% -0.02% +0.02%] index_select const : Elapsed 0.053 ms (5.310 ms / 100) 5.333 -> 5.333 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.06% +0.00% +0.08%] index_select wrap : Elapsed 0.053 ms (5.340 ms / 100) 5.331 -> 5.328 ( -0.06%) [ +0.09% +0.00% +0.02% / +0.11% +0.24% -0.06%] index_select linear : Elapsed 0.053 ms (5.336 ms / 100) 5.326 -> 5.335 ( +0.17%) [ +0.23% +0.00% +0.23% / +0.32% +0.17% +0.21%] index_select reverse : Elapsed 0.053 ms (5.338 ms / 100) 5.308 -> 5.311 ( +0.06%) [ +0.02% +0.00% +0.11% / +0.06% +0.21% +0.15%] index_select skip64 : Elapsed 0.053 ms (5.309 ms / 100) 5.308 -> 5.308 ( +0.00%) [ +0.13% +0.00% +0.15% / +0.08% +0.00% +0.09%] index_select skip256 : Elapsed 0.053 ms (5.315 ms / 100) 5.331 -> 5.332 ( +0.02%) [ +0.00% +0.02% +0.04% / +0.02% +0.08% +0.11%] index_select spread : Elapsed 0.053 ms (5.331 ms / 100) 5.330 -> 5.327 ( -0.06%) [ +0.13% +0.06% +0.00% / +0.11% -0.06% +0.02%] index_select strided 3 : Elapsed 0.053 ms (5.337 ms / 100) 5.331 -> 5.330 ( -0.02%) [ +0.11% +0.00% +0.13% / +0.04% -0.02% +0.02%] index_select strided 5 : Elapsed 0.053 ms (5.337 ms / 100) 5.328 -> 5.333 ( +0.09%) [ +0.00% +0.15% +0.13% / +0.11% +0.09% +0.19%] index_select strided 7 : Elapsed 0.053 ms (5.328 ms / 100) 5.310 -> 5.308 ( -0.04%) [ +0.15% +0.15% +0.00% / -0.04% +0.11% +0.15%] index_select strided 8 : Elapsed 0.053 ms (5.318 ms / 100) 5.327 -> 5.327 ( +0.00%) [ +0.08% +0.15% +0.00% / +0.13% +0.00% +0.15%] index_select random : Elapsed 0.053 ms (5.331 ms / 100) 5.324 -> 5.331 ( +0.13%) [ +0.11% +0.00% +0.11% / +0.19% +0.13% +0.15%] index_select random_sorted : Elapsed 0.053 ms (5.330 ms / 100) B = [4, 5, 40, 20] (stride (4000, 20, 100, 1)) A = [4, 5, 40, 16] (stride (640, 2560, 1, 40)) dim = 3 3.939 -> 3.940 ( +0.03%) [ +0.03% +0.00% +0.05% / +0.03% +0.86% +0.74%] index_add_ linear : Elapsed 0.039 ms (3.940 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.73% +0.73%] index_copy_ linear : Elapsed 0.038 ms (3.814 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.10% +0.08% +0.00% / +0.03% +0.74% +0.74%] index_add_ reverse : Elapsed 0.039 ms (3.936 ms / 100) 3.808 -> 3.809 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.76% +0.76%] index_copy_ reverse : Elapsed 0.038 ms (3.810 ms / 100) 3.936 -> 3.936 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.69% +0.69%] index_add_ spread : Elapsed 0.039 ms (3.937 ms / 100) 3.812 -> 3.813 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.79% +0.68%] index_copy_ spread : Elapsed 0.038 ms (3.812 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.81% +0.81%] index_add_ strided 3 : Elapsed 0.039 ms (3.935 ms / 100) 3.811 -> 3.811 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.71% +0.71%] index_copy_ strided 3 : Elapsed 0.038 ms (3.812 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.79% +0.81%] index_add_ strided 7 : Elapsed 0.039 ms (3.934 ms / 100) 3.808 -> 3.809 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.79% +0.74%] index_copy_ strided 7 : Elapsed 0.038 ms (3.810 ms / 100) 3.941 -> 3.941 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.74% +0.71%] index_add_ perm : Elapsed 0.039 ms (3.942 ms / 100) 3.815 -> 3.816 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.76% +0.71%] index_copy_ perm : Elapsed 0.038 ms (3.816 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.71% +0.74%] index_add_ perm_sorted : Elapsed 0.039 ms (3.935 ms / 100) 3.809 -> 3.808 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.71% +0.71%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.811 ms / 100) 5.560 -> 5.558 ( -0.04%) [ +0.00% +0.09% +0.05% / -0.04% +0.16% +0.11%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.578 -> 5.583 ( +0.09%) [ +0.02% +0.09% +0.00% / +0.11% +0.11% +0.09%] index_select wrap : Elapsed 0.056 ms (5.579 ms / 100) 5.581 -> 5.576 ( -0.09%) [ +0.00% +0.02% +0.05% / -0.09% +0.13% +0.07%] index_select linear : Elapsed 0.056 ms (5.581 ms / 100) 5.576 -> 5.578 ( +0.04%) [ +0.07% +0.00% +0.09% / +0.04% +0.09% +0.14%] index_select reverse : Elapsed 0.056 ms (5.580 ms / 100) 5.563 -> 5.560 ( -0.05%) [ +0.00% +0.11% +0.20% / -0.05% +0.07% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.563 ms / 100) 5.561 -> 5.552 ( -0.16%) [ +0.04% +0.02% +0.00% / -0.16% +0.14% +0.02%] index_select skip256 : Elapsed 0.056 ms (5.563 ms / 100) 5.577 -> 5.577 ( +0.00%) [ +0.00% +0.04% +0.14% / +0.13% +0.25% +0.00%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.578 -> 5.578 ( +0.00%) [ +0.00% +0.02% +0.04% / +0.00% +0.11% +0.11%] index_select strided 3 : Elapsed 0.056 ms (5.578 ms / 100) 5.581 -> 5.576 ( -0.09%) [ +0.00% +0.07% +0.09% / -0.09% -0.02% +0.05%] index_select strided 5 : Elapsed 0.056 ms (5.581 ms / 100) 5.575 -> 5.576 ( +0.02%) [ +0.05% +0.04% +0.00% / +0.02% +0.72% +0.25%] index_select strided 7 : Elapsed 0.056 ms (5.578 ms / 100) 5.558 -> 5.565 ( +0.13%) [ +0.20% +0.20% +0.00% / +0.13% +0.16% +0.16%] index_select strided 8 : Elapsed 0.056 ms (5.569 ms / 100) 5.573 -> 5.575 ( +0.04%) [ +0.00% +0.04% +0.05% / +0.07% +0.07% +0.04%] index_select random : Elapsed 0.056 ms (5.573 ms / 100) 5.575 -> 5.570 ( -0.09%) [ +0.00% +0.00% +0.05% / -0.09% +0.22% +0.00%] index_select random_sorted : Elapsed 0.056 ms (5.575 ms / 100) B = [4, 5, 40, 20] (stride (4000, 20, 100, 1)) A = [4, 5, 40, 16] (stride (200, 1, 5, 800)) dim = 3 4.042 -> 4.046 ( +0.10%) [ +0.10% +0.12% +0.00% / +0.10% +0.84% +0.82%] index_add_ linear : Elapsed 0.040 ms (4.046 ms / 100) 3.917 -> 3.918 ( +0.03%) [ +0.10% +0.00% +0.18% / +0.03% +0.79% +0.71%] index_copy_ linear : Elapsed 0.039 ms (3.921 ms / 100) 4.046 -> 4.041 ( -0.12%) [ +0.05% +0.00% +0.02% / -0.12% +0.69% +0.59%] index_add_ reverse : Elapsed 0.040 ms (4.048 ms / 100) 3.919 -> 3.917 ( -0.05%) [ +0.08% +0.00% +0.08% / -0.05% +0.74% +0.61%] index_copy_ reverse : Elapsed 0.039 ms (3.922 ms / 100) 4.036 -> 4.035 ( -0.02%) [ +0.05% +0.02% +0.00% / -0.02% +0.82% +0.79%] index_add_ spread : Elapsed 0.040 ms (4.038 ms / 100) 3.910 -> 3.915 ( +0.13%) [ +0.00% +0.26% +0.20% / +0.13% +0.66% +0.61%] index_copy_ spread : Elapsed 0.039 ms (3.910 ms / 100) 4.044 -> 4.054 ( +0.25%) [ +0.27% +0.00% +0.02% / +0.25% +0.84% +0.91%] index_add_ strided 3 : Elapsed 0.041 ms (4.055 ms / 100) 3.922 -> 3.926 ( +0.10%) [ +0.18% +0.03% +0.00% / +0.10% +0.79% +0.89%] index_copy_ strided 3 : Elapsed 0.039 ms (3.929 ms / 100) 4.048 -> 4.047 ( -0.02%) [ +0.05% +0.00% +0.10% / -0.02% +0.79% +0.79%] index_add_ strided 7 : Elapsed 0.041 ms (4.050 ms / 100) 3.927 -> 3.925 ( -0.05%) [ +0.00% +0.05% +0.08% / -0.05% +0.71% +0.76%] index_copy_ strided 7 : Elapsed 0.039 ms (3.927 ms / 100) 4.037 -> 4.036 ( -0.02%) [ +0.05% +0.02% +0.00% / -0.02% +0.77% +0.77%] index_add_ perm : Elapsed 0.040 ms (4.039 ms / 100) 3.907 -> 3.916 ( +0.23%) [ +0.26% +0.38% +0.00% / +0.23% +0.64% +0.67%] index_copy_ perm : Elapsed 0.039 ms (3.917 ms / 100) 4.046 -> 4.045 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.64% +0.62%] index_add_ perm_sorted : Elapsed 0.040 ms (4.047 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.10% +0.13% +0.00% / +0.05% +0.61% +0.54%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) 5.558 -> 5.562 ( +0.07%) [ +0.00% +0.09% +0.02% / +0.13% +0.07% +0.09%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.568 -> 5.569 ( +0.02%) [ +0.05% +0.00% +0.04% / +0.20% +0.02% +0.09%] index_select wrap : Elapsed 0.056 ms (5.571 ms / 100) 5.571 -> 5.572 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.22% +0.02%] index_select linear : Elapsed 0.056 ms (5.572 ms / 100) 5.567 -> 5.574 ( +0.13%) [ +0.13% +0.00% +0.07% / +0.13% +0.18% +0.16%] index_select reverse : Elapsed 0.056 ms (5.574 ms / 100) 5.553 -> 5.560 ( +0.13%) [ +0.14% +0.00% +0.05% / +0.13% +0.25% +0.23%] index_select skip64 : Elapsed 0.056 ms (5.561 ms / 100) 5.556 -> 5.563 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.16% +0.23%] index_select skip256 : Elapsed 0.056 ms (5.560 ms / 100) 5.569 -> 5.563 ( -0.11%) [ +0.18% +0.00% +0.07% / +0.05% -0.11% +0.07%] index_select spread : Elapsed 0.056 ms (5.579 ms / 100) 5.574 -> 5.569 ( -0.09%) [ +0.05% +0.00% +0.09% / +0.07% -0.09% -0.02%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.573 -> 5.572 ( -0.02%) [ +0.04% +0.00% +0.16% / -0.02% +0.02% -0.02%] index_select strided 5 : Elapsed 0.056 ms (5.575 ms / 100) 5.573 -> 5.570 ( -0.05%) [ +0.00% +0.02% +0.00% / +0.00% -0.05% -0.02%] index_select strided 7 : Elapsed 0.056 ms (5.573 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.05% +0.00% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.563 -> 5.561 ( -0.04%) [ +0.04% +0.07% +0.00% / +0.00% -0.04% +0.14%] index_select random : Elapsed 0.056 ms (5.565 ms / 100) 5.558 -> 5.568 ( +0.18%) [ +0.16% +0.13% +0.00% / +0.18% +0.18% +0.25%] index_select random_sorted : Elapsed 0.056 ms (5.567 ms / 100) B = [4, 5, 40, 20] (stride (800, 3200, 20, 1)) A = [4, 5, 40, 16] (stride (1, 64, 320, 4)) dim = 3 4.452 -> 4.454 ( +0.04%) [ +0.00% +0.07% +0.04% / +0.04% +0.74% +0.74%] index_add_ linear : Elapsed 0.045 ms (4.452 ms / 100) 4.275 -> 4.279 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.94% +0.91%] index_copy_ linear : Elapsed 0.043 ms (4.279 ms / 100) 4.454 -> 4.455 ( +0.02%) [ +0.09% +0.07% +0.00% / +0.02% +0.67% +0.76%] index_add_ reverse : Elapsed 0.045 ms (4.458 ms / 100) 4.282 -> 4.287 ( +0.12%) [ +0.00% +0.07% +0.00% / +0.12% +0.77% +0.77%] index_copy_ reverse : Elapsed 0.043 ms (4.282 ms / 100) 4.466 -> 4.464 ( -0.04%) [ +0.00% +0.07% +0.09% / -0.04% +0.72% +0.76%] index_add_ spread : Elapsed 0.045 ms (4.466 ms / 100) 4.292 -> 4.287 ( -0.12%) [ +0.00% +0.05% +0.02% / -0.12% +0.72% +0.70%] index_copy_ spread : Elapsed 0.043 ms (4.292 ms / 100) 4.459 -> 4.468 ( +0.20%) [ +0.00% +0.16% +0.07% / +0.20% +0.96% +0.92%] index_add_ strided 3 : Elapsed 0.045 ms (4.459 ms / 100) 4.279 -> 4.282 ( +0.07%) [ +0.00% +0.07% +0.02% / +0.07% +0.79% +0.79%] index_copy_ strided 3 : Elapsed 0.043 ms (4.279 ms / 100) 4.454 -> 4.457 ( +0.07%) [ +0.00% +0.09% +0.04% / +0.07% +0.72% +0.72%] index_add_ strided 7 : Elapsed 0.045 ms (4.454 ms / 100) 4.281 -> 4.288 ( +0.16%) [ +0.14% +0.02% +0.00% / +0.16% +0.89% +0.82%] index_copy_ strided 7 : Elapsed 0.043 ms (4.287 ms / 100) 4.447 -> 4.454 ( +0.16%) [ +0.16% +0.20% +0.00% / +0.16% +0.90% +0.85%] index_add_ perm : Elapsed 0.045 ms (4.454 ms / 100) 4.277 -> 4.277 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.77% +0.75%] index_copy_ perm : Elapsed 0.043 ms (4.278 ms / 100) 4.462 -> 4.465 ( +0.07%) [ +0.22% +0.16% +0.00% / +0.07% +0.72% +0.78%] index_add_ perm_sorted : Elapsed 0.045 ms (4.472 ms / 100) 4.284 -> 4.285 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.68% +0.70%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.285 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.09% +0.00% +0.07% / -0.02% +0.04% -0.04%] index_select const : Elapsed 0.056 ms (5.574 ms / 100) 5.582 -> 5.583 ( +0.02%) [ +0.07% +0.00% +0.09% / +0.02% +0.13% +0.21%] index_select wrap : Elapsed 0.056 ms (5.586 ms / 100) 5.579 -> 5.584 ( +0.09%) [ +0.20% +0.00% +0.14% / +0.09% +0.27% +0.23%] index_select linear : Elapsed 0.056 ms (5.590 ms / 100) 5.583 -> 5.585 ( +0.04%) [ +0.00% +0.09% +0.02% / +0.04% +0.20% +0.11%] index_select reverse : Elapsed 0.056 ms (5.583 ms / 100) 5.562 -> 5.567 ( +0.09%) [ +0.09% +0.16% +0.00% / +0.22% +0.25% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.567 ms / 100) 5.568 -> 5.568 ( +0.00%) [ +0.02% +0.00% +0.11% / +0.05% +0.16% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.569 ms / 100) 5.584 -> 5.582 ( -0.04%) [ +0.05% +0.00% +0.05% / +0.02% -0.04% +0.00%] index_select spread : Elapsed 0.056 ms (5.587 ms / 100) 5.582 -> 5.589 ( +0.13%) [ +0.00% +0.07% +0.02% / +0.16% +0.16% +0.13%] index_select strided 3 : Elapsed 0.056 ms (5.582 ms / 100) 5.583 -> 5.587 ( +0.07%) [ +0.16% +0.02% +0.00% / +0.11% +0.07% +0.14%] index_select strided 5 : Elapsed 0.056 ms (5.592 ms / 100) 5.584 -> 5.582 ( -0.04%) [ +0.11% +0.02% +0.00% / -0.04% +0.25% +0.13%] index_select strided 7 : Elapsed 0.056 ms (5.590 ms / 100) 5.564 -> 5.564 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.16% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.579 -> 5.583 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.11% +0.16%] index_select random : Elapsed 0.056 ms (5.579 ms / 100) 5.580 -> 5.575 ( -0.09%) [ +0.05% +0.00% +0.09% / -0.09% +0.05% +0.25%] index_select random_sorted : Elapsed 0.056 ms (5.583 ms / 100) B = [4, 5, 40, 20] (stride (100, 1, 400, 5)) A = [4, 5, 40, 16] (stride (1, 160, 4, 800)) dim = 3 3.645 -> 3.647 ( +0.05%) [ +0.03% +0.11% +0.00% / +0.05% +0.74% +0.80%] index_add_ linear : Elapsed 0.036 ms (3.646 ms / 100) 3.523 -> 3.528 ( +0.14%) [ +0.06% +0.06% +0.00% / +0.14% +0.77% +0.77%] index_copy_ linear : Elapsed 0.035 ms (3.525 ms / 100) 3.649 -> 3.660 ( +0.30%) [ +0.08% +0.00% +0.00% / +0.30% +0.90% +0.88%] index_add_ reverse : Elapsed 0.037 ms (3.652 ms / 100) 3.528 -> 3.530 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.06% +0.77% +0.77%] index_copy_ reverse : Elapsed 0.035 ms (3.531 ms / 100) 3.647 -> 3.649 ( +0.05%) [ +0.08% +0.14% +0.00% / +0.05% +0.82% +0.66%] index_add_ spread : Elapsed 0.036 ms (3.650 ms / 100) 3.530 -> 3.530 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.68% +0.54%] index_copy_ spread : Elapsed 0.035 ms (3.531 ms / 100) 3.646 -> 3.646 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.69% +0.74%] index_add_ strided 3 : Elapsed 0.036 ms (3.648 ms / 100) 3.525 -> 3.528 ( +0.09%) [ +0.17% +0.00% +0.03% / +0.09% +0.65% +0.68%] index_copy_ strided 3 : Elapsed 0.035 ms (3.531 ms / 100) 3.647 -> 3.648 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.80% +0.63%] index_add_ strided 7 : Elapsed 0.036 ms (3.649 ms / 100) 3.530 -> 3.528 ( -0.06%) [ +0.00% +0.03% +0.06% / -0.06% +0.48% +0.42%] index_copy_ strided 7 : Elapsed 0.035 ms (3.530 ms / 100) 3.650 -> 3.651 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.74% +0.71%] index_add_ perm : Elapsed 0.036 ms (3.650 ms / 100) 3.528 -> 3.533 ( +0.14%) [ +0.11% +0.06% +0.00% / +0.14% +0.57% +0.71%] index_copy_ perm : Elapsed 0.035 ms (3.532 ms / 100) 3.653 -> 3.653 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.71% +0.68%] index_add_ perm_sorted : Elapsed 0.037 ms (3.655 ms / 100) 3.530 -> 3.532 ( +0.06%) [ +0.00% +0.14% +0.06% / +0.06% +0.65% +0.59%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.530 ms / 100) 5.480 -> 5.487 ( +0.13%) [ +0.00% +0.13% +0.02% / +0.15% +0.16% +0.13%] index_select const : Elapsed 0.055 ms (5.480 ms / 100) 5.495 -> 5.488 ( -0.13%) [ +0.18% +0.00% +0.13% / -0.13% +0.00% -0.07%] index_select wrap : Elapsed 0.055 ms (5.505 ms / 100) 5.494 -> 5.494 ( +0.00%) [ +0.00% +0.05% +0.02% / +0.11% +0.00% +0.00%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.495 -> 5.494 ( -0.02%) [ +0.02% +0.07% +0.00% / +0.11% -0.02% +0.02%] index_select reverse : Elapsed 0.055 ms (5.496 ms / 100) 5.484 -> 5.488 ( +0.07%) [ +0.00% +0.13% +0.05% / +0.07% +0.07% +0.16%] index_select skip64 : Elapsed 0.055 ms (5.484 ms / 100) 5.484 -> 5.485 ( +0.02%) [ +0.00% +0.09% +0.04% / +0.02% +0.16% +0.20%] index_select skip256 : Elapsed 0.055 ms (5.484 ms / 100) 5.491 -> 5.490 ( -0.02%) [ +0.02% +0.15% +0.00% / +0.13% +0.07% -0.02%] index_select spread : Elapsed 0.055 ms (5.492 ms / 100) 5.497 -> 5.491 ( -0.11%) [ +0.00% +0.05% +0.02% / +0.02% -0.11% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.497 ms / 100) 5.495 -> 5.489 ( -0.11%) [ +0.05% +0.00% +0.00% / +0.16% +0.02% -0.11%] index_select strided 5 : Elapsed 0.055 ms (5.498 ms / 100) 5.494 -> 5.493 ( -0.02%) [ +0.04% +0.00% +0.04% / -0.02% +0.04% +0.05%] index_select strided 7 : Elapsed 0.055 ms (5.496 ms / 100) 5.487 -> 5.488 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.04% +0.07%] index_select strided 8 : Elapsed 0.055 ms (5.490 ms / 100) 5.486 -> 5.488 ( +0.04%) [ +0.00% +0.07% +0.13% / +0.18% +0.11% +0.04%] index_select random : Elapsed 0.055 ms (5.486 ms / 100) 5.491 -> 5.490 ( -0.02%) [ +0.05% +0.00% +0.00% / +0.09% +0.11% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.494 ms / 100) B = [4, 5, 40, 20] (stride (20, 80, 400, 1)) A = [4, 5, 40, 16] (stride (80, 1, 320, 5)) dim = 3 4.455 -> 4.457 ( +0.04%) [ +0.22% +0.00% +0.04% / +0.04% +0.76% +0.63%] index_add_ linear : Elapsed 0.045 ms (4.465 ms / 100) 4.291 -> 4.292 ( +0.02%) [ +0.09% +0.02% +0.00% / +0.02% +0.82% +0.63%] index_copy_ linear : Elapsed 0.043 ms (4.295 ms / 100) 4.441 -> 4.446 ( +0.11%) [ +0.11% +0.00% +0.09% / +0.11% +0.63% +0.68%] index_add_ reverse : Elapsed 0.044 ms (4.446 ms / 100) 4.283 -> 4.283 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.75% +0.58%] index_copy_ reverse : Elapsed 0.043 ms (4.283 ms / 100) 4.450 -> 4.451 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.02% +0.67% +0.63%] index_add_ spread : Elapsed 0.045 ms (4.452 ms / 100) 4.288 -> 4.294 ( +0.14%) [ +0.14% +0.00% +0.12% / +0.14% +0.75% +0.70%] index_copy_ spread : Elapsed 0.043 ms (4.294 ms / 100) 4.456 -> 4.455 ( -0.02%) [ +0.00% +0.11% +0.00% / -0.02% +0.65% +0.56%] index_add_ strided 3 : Elapsed 0.045 ms (4.456 ms / 100) 4.288 -> 4.289 ( +0.02%) [ +0.02% +0.09% +0.00% / +0.02% +0.65% +0.70%] index_copy_ strided 3 : Elapsed 0.043 ms (4.289 ms / 100) 4.438 -> 4.442 ( +0.09%) [ +0.16% +0.00% +0.00% / +0.09% +0.92% +0.90%] index_add_ strided 7 : Elapsed 0.044 ms (4.445 ms / 100) 4.275 -> 4.281 ( +0.14%) [ +0.16% +0.02% +0.00% / +0.14% +0.96% +0.91%] index_copy_ strided 7 : Elapsed 0.043 ms (4.282 ms / 100) 4.457 -> 4.457 ( +0.00%) [ +0.00% +0.09% +0.11% / +0.00% +0.74% +0.74%] index_add_ perm : Elapsed 0.045 ms (4.457 ms / 100) 4.290 -> 4.293 ( +0.07%) [ +0.05% +0.00% +0.09% / +0.07% +0.84% +0.77%] index_copy_ perm : Elapsed 0.043 ms (4.292 ms / 100) 4.446 -> 4.453 ( +0.16%) [ +0.20% +0.18% +0.00% / +0.16% +0.85% +1.03%] index_add_ perm_sorted : Elapsed 0.045 ms (4.455 ms / 100) 4.283 -> 4.286 ( +0.07%) [ +0.09% +0.05% +0.00% / +0.07% +0.98% +0.93%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.287 ms / 100) 5.576 -> 5.570 ( -0.11%) [ +0.07% +0.05% +0.00% / +0.05% -0.07% -0.11%] index_select const : Elapsed 0.056 ms (5.580 ms / 100) 5.588 -> 5.586 ( -0.04%) [ +0.04% +0.07% +0.00% / +0.13% -0.04% +0.09%] index_select wrap : Elapsed 0.056 ms (5.590 ms / 100) 5.589 -> 5.589 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.13% +0.14%] index_select linear : Elapsed 0.056 ms (5.592 ms / 100) 5.587 -> 5.590 ( +0.05%) [ +0.00% +0.14% +0.16% / +0.05% +0.25% +0.30%] index_select reverse : Elapsed 0.056 ms (5.587 ms / 100) 5.567 -> 5.568 ( +0.02%) [ +0.09% +0.00% +0.27% / +0.02% +0.14% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.572 ms / 100) 5.568 -> 5.573 ( +0.09%) [ +0.09% +0.11% +0.00% / +0.22% +0.16% +0.09%] index_select skip256 : Elapsed 0.056 ms (5.573 ms / 100) 5.590 -> 5.592 ( +0.04%) [ +0.05% +0.00% +0.04% / +0.21% +0.04% +0.18%] index_select spread : Elapsed 0.056 ms (5.593 ms / 100) 5.589 -> 5.593 ( +0.07%) [ +0.11% +0.00% +0.04% / +0.07% +0.18% +0.23%] index_select strided 3 : Elapsed 0.056 ms (5.595 ms / 100) 5.595 -> 5.590 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% -0.04% +0.18%] index_select strided 5 : Elapsed 0.056 ms (5.595 ms / 100) 5.591 -> 5.592 ( +0.02%) [ +0.04% +0.00% +0.07% / +0.02% +0.14% +0.14%] index_select strided 7 : Elapsed 0.056 ms (5.593 ms / 100) 5.568 -> 5.576 ( +0.14%) [ +0.09% +0.02% +0.00% / +0.14% +0.32% +0.22%] index_select strided 8 : Elapsed 0.056 ms (5.573 ms / 100) 5.585 -> 5.595 ( +0.18%) [ +0.21% +0.00% +0.07% / +0.18% +0.27% +0.27%] index_select random : Elapsed 0.056 ms (5.597 ms / 100) 5.592 -> 5.593 ( +0.02%) [ +0.00% +0.09% +0.00% / +0.02% +0.07% +0.13%] index_select random_sorted : Elapsed 0.056 ms (5.592 ms / 100) B = [4, 5, 40, 20] (stride (1, 160, 4, 800)) A = [4, 5, 40, 16] (stride (80, 1, 320, 5)) dim = 3 4.112 -> 4.110 ( -0.05%) [ +0.12% +0.02% +0.00% / -0.05% +0.56% +0.63%] index_add_ linear : Elapsed 0.041 ms (4.117 ms / 100) 3.925 -> 3.922 ( -0.08%) [ +0.00% +0.03% +0.00% / -0.08% +0.59% +0.84%] index_copy_ linear : Elapsed 0.039 ms (3.925 ms / 100) 4.103 -> 4.105 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.71% +0.88%] index_add_ reverse : Elapsed 0.041 ms (4.106 ms / 100) 3.922 -> 3.924 ( +0.05%) [ +0.00% +0.05% +0.08% / +0.05% +0.84% +0.97%] index_copy_ reverse : Elapsed 0.039 ms (3.922 ms / 100) 4.115 -> 4.114 ( -0.02%) [ +0.10% +0.00% +0.15% / -0.02% +0.61% +0.70%] index_add_ spread : Elapsed 0.041 ms (4.119 ms / 100) 3.925 -> 3.927 ( +0.05%) [ +0.15% +0.00% +0.20% / +0.05% +0.84% +0.71%] index_copy_ spread : Elapsed 0.039 ms (3.931 ms / 100) 4.119 -> 4.118 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.58% +0.56%] index_add_ strided 3 : Elapsed 0.041 ms (4.119 ms / 100) 3.932 -> 3.931 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.71% +0.79%] index_copy_ strided 3 : Elapsed 0.039 ms (3.933 ms / 100) 4.117 -> 4.118 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.53% +0.63%] index_add_ strided 7 : Elapsed 0.041 ms (4.118 ms / 100) 3.930 -> 3.933 ( +0.08%) [ +0.00% +0.10% +0.08% / +0.08% +0.71% +0.71%] index_copy_ strided 7 : Elapsed 0.039 ms (3.930 ms / 100) 4.118 -> 4.113 ( -0.12%) [ +0.05% +0.05% +0.00% / -0.12% +0.51% +0.58%] index_add_ perm : Elapsed 0.041 ms (4.120 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.20% +0.00% +0.05% / +0.08% +0.92% +0.76%] index_copy_ perm : Elapsed 0.039 ms (3.931 ms / 100) 4.104 -> 4.107 ( +0.07%) [ +0.10% +0.00% +0.15% / +0.07% +0.71% +0.76%] index_add_ perm_sorted : Elapsed 0.041 ms (4.108 ms / 100) 3.925 -> 3.924 ( -0.03%) [ +0.00% +0.05% +0.00% / -0.03% +0.76% +0.61%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.925 ms / 100) 5.475 -> 5.472 ( -0.05%) [ +0.00% +0.02% +0.09% / +0.04% +0.04% -0.05%] index_select const : Elapsed 0.055 ms (5.475 ms / 100) 5.489 -> 5.486 ( -0.05%) [ +0.27% +0.04% +0.00% / +0.09% -0.05% +0.09%] index_select wrap : Elapsed 0.055 ms (5.504 ms / 100) 5.487 -> 5.495 ( +0.15%) [ +0.22% +0.04% +0.00% / +0.16% +0.15% +0.16%] index_select linear : Elapsed 0.055 ms (5.499 ms / 100) 5.491 -> 5.497 ( +0.11%) [ +0.00% +0.05% +0.05% / +0.11% +0.15% +0.16%] index_select reverse : Elapsed 0.055 ms (5.491 ms / 100) 5.471 -> 5.473 ( +0.04%) [ +0.13% +0.07% +0.00% / +0.13% +0.04% +0.04%] index_select skip64 : Elapsed 0.055 ms (5.478 ms / 100) 5.471 -> 5.470 ( -0.02%) [ +0.13% +0.00% +0.05% / -0.02% +0.11% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.478 ms / 100) 5.493 -> 5.484 ( -0.16%) [ +0.16% +0.05% +0.00% / +0.05% +0.07% -0.16%] index_select spread : Elapsed 0.055 ms (5.502 ms / 100) 5.492 -> 5.481 ( -0.20%) [ +0.11% +0.07% +0.00% / -0.07% -0.20% -0.09%] index_select strided 3 : Elapsed 0.055 ms (5.498 ms / 100) 5.496 -> 5.490 ( -0.11%) [ +0.00% +0.04% +0.11% / +0.04% -0.11% -0.04%] index_select strided 5 : Elapsed 0.055 ms (5.496 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.09% +0.04% +0.00% / +0.00% +0.07% +0.07%] index_select strided 7 : Elapsed 0.055 ms (5.496 ms / 100) 5.476 -> 5.470 ( -0.11%) [ +0.00% +0.05% +0.02% / -0.02% -0.11% +0.00%] index_select strided 8 : Elapsed 0.055 ms (5.476 ms / 100) 5.490 -> 5.486 ( -0.07%) [ +0.04% +0.09% +0.00% / +0.07% +0.20% -0.07%] index_select random : Elapsed 0.055 ms (5.492 ms / 100) 5.503 -> 5.491 ( -0.22%) [ +0.00% +0.02% +0.05% / -0.22% -0.20% -0.16%] index_select random_sorted : Elapsed 0.055 ms (5.503 ms / 100) out_shape = [20, 16, 5, 40] in_shape = [4, 16, 5, 40] idx_dim = 0 B = [20, 16, 5, 40] (stride (3200, 1, 640, 16)) A = [4, 16, 5, 40] (stride (40, 800, 160, 1)) dim = 0 0.760 -> 0.766 ( +0.79%) [ +0.00% +0.13% +0.00% / +0.79% +2.50% +2.50%] index_add_ linear : Elapsed 0.008 ms (0.760 ms / 100) 0.743 -> 0.745 ( +0.27%) [ +0.00% +0.27% +0.13% / +0.27% +3.50% +2.69%] index_copy_ linear : Elapsed 0.007 ms (0.743 ms / 100) 0.766 -> 0.764 ( -0.26%) [ +0.00% +0.13% +0.26% / -0.26% +0.65% +1.44%] index_add_ reverse : Elapsed 0.008 ms (0.766 ms / 100) 0.744 -> 0.747 ( +0.40%) [ +0.54% +0.00% +0.13% / +0.40% +1.75% +1.34%] index_copy_ reverse : Elapsed 0.007 ms (0.748 ms / 100) 0.771 -> 0.771 ( +0.00%) [ +0.65% +0.13% +0.00% / +0.91% +0.91% +0.00%] index_add_ spread : Elapsed 0.008 ms (0.776 ms / 100) 0.758 -> 0.755 ( -0.40%) [ +0.00% +0.13% +0.00% / +0.26% -0.40% -0.40%] index_copy_ spread : Elapsed 0.008 ms (0.758 ms / 100) 0.775 -> 0.769 ( -0.77%) [ +0.13% +0.39% +0.00% / +0.13% -0.77% -0.13%] index_add_ strided 3 : Elapsed 0.008 ms (0.776 ms / 100) 0.755 -> 0.751 ( -0.53%) [ +0.79% +0.93% +0.00% / +0.40% -0.53% -0.53%] index_copy_ strided 3 : Elapsed 0.008 ms (0.761 ms / 100) 0.769 -> 0.770 ( +0.13%) [ +0.13% +0.52% +0.00% / +0.13% +0.39% +0.65%] index_add_ strided 7 : Elapsed 0.008 ms (0.770 ms / 100) 0.747 -> 0.751 ( +0.54%) [ +0.80% +0.94% +0.00% / +0.54% +1.20% +1.20%] index_copy_ strided 7 : Elapsed 0.008 ms (0.753 ms / 100) 0.795 -> 0.777 ( -2.26%) [ +0.00% +1.01% +0.00% / +0.75% -2.26% -1.76%] index_add_ perm : Elapsed 0.008 ms (0.795 ms / 100) 0.789 -> 0.762 ( -3.42%) [ +0.00% +0.00% +0.00% / -0.13% -3.04% -3.42%] index_copy_ perm : Elapsed 0.008 ms (0.789 ms / 100) 0.798 -> 0.778 ( -2.51%) [ +0.00% +0.13% +0.63% / -0.25% -1.63% -2.51%] index_add_ perm_sorted : Elapsed 0.008 ms (0.798 ms / 100) 0.788 -> 0.764 ( -3.05%) [ +0.00% +0.13% +0.63% / +0.13% -3.05% -3.05%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.788 ms / 100) 5.027 -> 5.025 ( -0.04%) [ +0.06% +0.00% +0.22% / -0.04% +0.26% +0.32%] index_select const : Elapsed 0.050 ms (5.030 ms / 100) 5.056 -> 5.053 ( -0.06%) [ +0.16% +0.00% +0.42% / -0.06% +0.26% +0.14%] index_select wrap : Elapsed 0.051 ms (5.064 ms / 100) 5.042 -> 5.039 ( -0.06%) [ +0.00% +0.10% +0.02% / -0.06% +0.14% +0.28%] index_select linear : Elapsed 0.050 ms (5.042 ms / 100) 5.033 -> 5.037 ( +0.08%) [ +0.00% +0.04% +0.26% / +0.16% +0.08% +0.30%] index_select reverse : Elapsed 0.050 ms (5.033 ms / 100) 5.014 -> 5.021 ( +0.14%) [ +0.18% +0.22% +0.00% / +0.34% +0.14% +0.28%] index_select skip64 : Elapsed 0.050 ms (5.023 ms / 100) 5.022 -> 5.032 ( +0.20%) [ +0.14% +0.00% +0.08% / +0.24% +0.44% +0.20%] index_select skip256 : Elapsed 0.050 ms (5.029 ms / 100) 5.049 -> 5.060 ( +0.22%) [ +0.02% +0.08% +0.00% / +0.22% +0.22% +0.26%] index_select spread : Elapsed 0.050 ms (5.050 ms / 100) 5.050 -> 5.055 ( +0.10%) [ +0.00% +0.08% +0.16% / +0.18% +0.10% +0.28%] index_select strided 3 : Elapsed 0.051 ms (5.050 ms / 100) 5.059 -> 5.048 ( -0.22%) [ +0.08% +0.06% +0.00% / -0.04% +0.16% -0.22%] index_select random : Elapsed 0.051 ms (5.063 ms / 100) 5.054 -> 5.061 ( +0.14%) [ +0.18% +0.00% +0.16% / +0.20% +0.14% +0.20%] index_select random_sorted : Elapsed 0.051 ms (5.063 ms / 100) B = [20, 16, 5, 40] (stride (3200, 1, 16, 80)) dim = 0 fill_cnt = 4 0.988 -> 0.990 ( +0.20%) [ +0.00% +0.30% +0.20% / +0.20% +0.61% +0.71%] index_fill_ const : Elapsed 0.010 ms (0.988 ms / 100) 0.996 -> 0.998 ( +0.20%) [ +0.10% +0.20% +0.00% / +0.20% +1.00% +0.80%] index_fill_ linear : Elapsed 0.010 ms (0.997 ms / 100) 0.996 -> 0.996 ( +0.00%) [ +0.10% +0.00% +0.20% / +0.00% +1.10% +0.90%] index_fill_ reverse : Elapsed 0.010 ms (0.997 ms / 100) 0.990 -> 0.989 ( -0.10%) [ +0.00% +0.20% +0.10% / -0.10% +0.20% +0.20%] index_fill_ skip64 : Elapsed 0.010 ms (0.990 ms / 100) 0.988 -> 0.989 ( +0.10%) [ +0.00% +0.00% +0.10% / +0.10% +0.61% +0.51%] index_fill_ skip256 : Elapsed 0.010 ms (0.988 ms / 100) 1.002 -> 1.003 ( +0.10%) [ +0.00% +0.00% +0.20% / +0.10% +0.30% +0.60%] index_fill_ spread : Elapsed 0.010 ms (1.002 ms / 100) 0.998 -> 1.001 ( +0.30%) [ +0.50% +0.00% +0.50% / +0.60% +0.40% +0.30%] index_fill_ strided 3 : Elapsed 0.010 ms (1.003 ms / 100) 1.002 -> 1.005 ( +0.30%) [ +0.10% +0.00% +0.20% / +0.30% +0.40% +0.40%] index_fill_ strided 5 : Elapsed 0.010 ms (1.003 ms / 100) 0.997 -> 0.998 ( +0.10%) [ +0.40% +0.00% +0.40% / +0.10% +1.30% +1.40%] index_fill_ strided 7 : Elapsed 0.010 ms (1.001 ms / 100) 0.999 -> 1.001 ( +0.20%) [ +0.30% +0.20% +0.00% / +0.20% +0.50% +0.30%] index_fill_ strided 8 : Elapsed 0.010 ms (1.002 ms / 100) 1.003 -> 1.004 ( +0.10%) [ +0.10% +0.20% +0.00% / +0.10% +0.30% +0.30%] index_fill_ strided 16 : Elapsed 0.010 ms (1.004 ms / 100) 0.993 -> 0.996 ( +0.30%) [ +0.20% +0.30% +0.00% / +0.30% +1.41% +1.71%] index_fill_ random : Elapsed 0.010 ms (0.995 ms / 100) 0.993 -> 0.997 ( +0.40%) [ +0.40% +0.10% +0.00% / +0.40% +2.01% +1.71%] index_fill_ random_sorted : Elapsed 0.010 ms (0.997 ms / 100) 0.999 -> 1.002 ( +0.30%) [ +0.50% +0.60% +0.00% / +0.30% +0.40% +0.60%] index_fill_ perm : Elapsed 0.010 ms (1.004 ms / 100) 1.000 -> 1.004 ( +0.40%) [ +0.50% +0.20% +0.00% / +0.40% +0.80% +0.40%] index_fill_ perm_sorted : Elapsed 0.010 ms (1.005 ms / 100) B = [20, 16, 5, 40] (stride (1, 4000, 20, 100)) A = [4, 16, 5, 40] (stride (3200, 1, 640, 16)) dim = 0 2.053 -> 2.052 ( -0.05%) [ +0.00% +0.05% +0.24% / -0.05% +0.10% +0.34%] index_add_ linear : Elapsed 0.021 ms (2.053 ms / 100) 2.012 -> 2.015 ( +0.15%) [ +0.00% +0.00% +0.05% / +0.15% +0.40% +0.55%] index_copy_ linear : Elapsed 0.020 ms (2.012 ms / 100) 2.050 -> 2.051 ( +0.05%) [ +0.15% +0.49% +0.00% / +0.05% +0.44% +0.63%] index_add_ reverse : Elapsed 0.021 ms (2.053 ms / 100) 2.010 -> 2.015 ( +0.25%) [ +0.00% +0.05% +0.05% / +0.25% +0.60% +0.35%] index_copy_ reverse : Elapsed 0.020 ms (2.010 ms / 100) 2.090 -> 2.095 ( +0.24%) [ +0.00% +0.00% +0.10% / +0.33% +0.24% +0.24%] index_add_ spread : Elapsed 0.021 ms (2.090 ms / 100) 2.110 -> 2.108 ( -0.09%) [ +0.00% +0.09% +0.14% / -0.09% +0.28% +0.38%] index_copy_ spread : Elapsed 0.021 ms (2.110 ms / 100) 2.084 -> 2.090 ( +0.29%) [ +0.00% +0.05% +0.29% / +0.29% +0.62% +0.58%] index_add_ strided 3 : Elapsed 0.021 ms (2.084 ms / 100) 2.079 -> 2.086 ( +0.34%) [ +0.24% +0.00% +0.05% / +0.43% +0.34% +0.67%] index_copy_ strided 3 : Elapsed 0.021 ms (2.084 ms / 100) 2.087 -> 2.088 ( +0.05%) [ +0.14% +0.14% +0.00% / +0.05% +0.53% +0.62%] index_add_ strided 7 : Elapsed 0.021 ms (2.090 ms / 100) 2.106 -> 2.104 ( -0.09%) [ +0.14% +0.19% +0.00% / -0.09% +0.57% +0.47%] index_copy_ strided 7 : Elapsed 0.021 ms (2.109 ms / 100) 2.088 -> 2.095 ( +0.34%) [ +0.19% +0.24% +0.00% / +0.43% +0.34% +0.34%] index_add_ perm : Elapsed 0.021 ms (2.092 ms / 100) 2.115 -> 2.111 ( -0.19%) [ +0.19% +0.05% +0.00% / -0.19% +0.14% +0.09%] index_copy_ perm : Elapsed 0.021 ms (2.119 ms / 100) 2.091 -> 2.093 ( +0.10%) [ +0.14% +0.00% +0.00% / +0.19% +0.24% +0.10%] index_add_ perm_sorted : Elapsed 0.021 ms (2.094 ms / 100) 2.113 -> 2.113 ( +0.00%) [ +0.00% +0.19% +0.05% / +0.00% +0.28% +0.19%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.113 ms / 100) 8.826 -> 8.833 ( +0.08%) [ +0.00% +0.07% +0.09% / +0.18% +0.08% +0.14%] index_select const : Elapsed 0.088 ms (8.826 ms / 100) 8.863 -> 8.882 ( +0.21%) [ +0.06% +0.00% +0.02% / +0.21% +0.30% +0.41%] index_select wrap : Elapsed 0.089 ms (8.868 ms / 100) 8.855 -> 8.854 ( -0.01%) [ +0.08% +0.09% +0.00% / -0.01% +0.23% +0.14%] index_select linear : Elapsed 0.089 ms (8.862 ms / 100) 8.847 -> 8.833 ( -0.16%) [ +0.06% +0.00% +0.11% / -0.16% +0.45% +0.24%] index_select reverse : Elapsed 0.089 ms (8.852 ms / 100) 8.814 -> 8.832 ( +0.20%) [ +0.05% +0.00% +0.22% / +0.20% +0.34% +0.25%] index_select skip64 : Elapsed 0.088 ms (8.818 ms / 100) 8.821 -> 8.833 ( +0.14%) [ +0.09% +0.00% +0.14% / +0.14% +0.17% +0.35%] index_select skip256 : Elapsed 0.088 ms (8.829 ms / 100) 8.847 -> 8.855 ( +0.09%) [ +0.00% +0.25% +0.12% / +0.09% +0.31% +0.46%] index_select spread : Elapsed 0.088 ms (8.847 ms / 100) 8.870 -> 8.878 ( +0.09%) [ +0.01% +0.00% +0.06% / +0.23% +0.11% +0.09%] index_select strided 3 : Elapsed 0.089 ms (8.871 ms / 100) 8.854 -> 8.871 ( +0.19%) [ +0.01% +0.00% +0.14% / +0.19% +0.33% +0.36%] index_select random : Elapsed 0.089 ms (8.855 ms / 100) 8.839 -> 8.857 ( +0.20%) [ +0.12% +0.00% +0.24% / +0.20% +0.55% +0.29%] index_select random_sorted : Elapsed 0.088 ms (8.850 ms / 100) B = [20, 16, 5, 40] (stride (1, 4000, 20, 100)) A = [4, 16, 5, 40] (stride (1, 800, 4, 20)) dim = 0 2.200 -> 2.203 ( +0.14%) [ +0.09% +0.00% +0.18% / +0.14% +0.45% +0.55%] index_add_ linear : Elapsed 0.022 ms (2.202 ms / 100) 2.160 -> 2.165 ( +0.23%) [ +0.19% +0.23% +0.00% / +0.23% +0.37% +0.32%] index_copy_ linear : Elapsed 0.022 ms (2.164 ms / 100) 2.184 -> 2.187 ( +0.14%) [ +0.00% +0.14% +0.09% / +0.14% +0.64% +0.64%] index_add_ reverse : Elapsed 0.022 ms (2.184 ms / 100) 2.145 -> 2.146 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.70% +0.61%] index_copy_ reverse : Elapsed 0.021 ms (2.145 ms / 100) 2.224 -> 2.225 ( +0.04%) [ +0.13% +0.13% +0.00% / +0.04% +0.58% +0.72%] index_add_ spread : Elapsed 0.022 ms (2.227 ms / 100) 2.247 -> 2.246 ( -0.04%) [ +0.00% +0.18% +0.00% / -0.04% +0.80% +0.76%] index_copy_ spread : Elapsed 0.022 ms (2.247 ms / 100) 2.232 -> 2.235 ( +0.13%) [ +0.22% +0.13% +0.00% / +0.13% +0.49% +0.54%] index_add_ strided 3 : Elapsed 0.022 ms (2.237 ms / 100) 2.230 -> 2.230 ( +0.00%) [ +0.00% +0.09% +0.04% / +0.00% +0.54% +0.58%] index_copy_ strided 3 : Elapsed 0.022 ms (2.230 ms / 100) 2.232 -> 2.236 ( +0.18%) [ +0.00% +0.22% +0.40% / +0.18% +0.49% +0.54%] index_add_ strided 7 : Elapsed 0.022 ms (2.232 ms / 100) 2.256 -> 2.257 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.49% +0.40%] index_copy_ strided 7 : Elapsed 0.023 ms (2.258 ms / 100) 2.228 -> 2.231 ( +0.13%) [ +0.09% +0.00% +0.27% / +0.13% +0.45% +0.81%] index_add_ perm : Elapsed 0.022 ms (2.230 ms / 100) 2.229 -> 2.231 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.49% +0.45%] index_copy_ perm : Elapsed 0.022 ms (2.229 ms / 100) 2.225 -> 2.225 ( +0.00%) [ +0.00% +0.18% +0.04% / +0.00% +0.27% +0.27%] index_add_ perm_sorted : Elapsed 0.022 ms (2.225 ms / 100) 2.220 -> 2.219 ( -0.05%) [ +0.00% +0.36% +0.09% / -0.05% +0.68% +0.45%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.220 ms / 100) 9.305 -> 9.310 ( +0.05%) [ +0.15% +0.13% +0.00% / +0.13% +0.09% +0.05%] index_select const : Elapsed 0.093 ms (9.319 ms / 100) 9.293 -> 9.306 ( +0.14%) [ +0.13% +0.16% +0.00% / +0.26% +0.37% +0.14%] index_select wrap : Elapsed 0.093 ms (9.305 ms / 100) 9.306 -> 9.307 ( +0.01%) [ +0.00% +0.25% +0.01% / +0.04% +0.01% +0.05%] index_select linear : Elapsed 0.093 ms (9.306 ms / 100) 9.295 -> 9.296 ( +0.01%) [ +0.00% +0.20% +0.01% / +0.06% +0.01% +0.20%] index_select reverse : Elapsed 0.093 ms (9.295 ms / 100) 9.307 -> 9.290 ( -0.18%) [ +0.00% +0.20% +0.04% / -0.18% +0.09% +0.04%] index_select skip64 : Elapsed 0.093 ms (9.307 ms / 100) 9.303 -> 9.299 ( -0.04%) [ +0.13% +0.14% +0.00% / +0.02% -0.04% +0.17%] index_select skip256 : Elapsed 0.093 ms (9.315 ms / 100) 9.304 -> 9.305 ( +0.01%) [ +0.10% +0.25% +0.00% / +0.01% +0.29% +0.04%] index_select spread : Elapsed 0.093 ms (9.313 ms / 100) 9.304 -> 9.311 ( +0.08%) [ +0.21% +0.17% +0.00% / +0.08% +0.10% +0.15%] index_select strided 3 : Elapsed 0.093 ms (9.324 ms / 100) 9.288 -> 9.307 ( +0.20%) [ +0.17% +0.16% +0.00% / +0.20% +0.29% +0.34%] index_select random : Elapsed 0.093 ms (9.304 ms / 100) 9.299 -> 9.300 ( +0.01%) [ +0.00% +0.01% +0.17% / +0.04% +0.05% +0.01%] index_select random_sorted : Elapsed 0.093 ms (9.299 ms / 100) B = [20, 16, 5, 40] (stride (80, 5, 1, 1600)) A = [4, 16, 5, 40] (stride (3200, 200, 1, 5)) dim = 0 1.974 -> 1.977 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.76% +0.66%] index_add_ linear : Elapsed 0.020 ms (1.977 ms / 100) 1.933 -> 1.940 ( +0.36%) [ +0.10% +0.00% +0.26% / +0.36% +1.14% +1.03%] index_copy_ linear : Elapsed 0.019 ms (1.935 ms / 100) 1.973 -> 1.977 ( +0.20%) [ +0.15% +0.30% +0.00% / +0.20% +0.76% +0.56%] index_add_ reverse : Elapsed 0.020 ms (1.976 ms / 100) 1.939 -> 1.939 ( +0.00%) [ +0.00% +0.26% +0.00% / +0.00% +0.67% +0.67%] index_copy_ reverse : Elapsed 0.019 ms (1.939 ms / 100) 1.971 -> 1.971 ( +0.00%) [ +0.25% +0.20% +0.00% / +0.00% +0.66% +0.81%] index_add_ spread : Elapsed 0.020 ms (1.976 ms / 100) 1.934 -> 1.935 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.67% +0.88%] index_copy_ spread : Elapsed 0.019 ms (1.934 ms / 100) 1.973 -> 1.976 ( +0.15%) [ +0.15% +0.30% +0.00% / +0.15% +0.66% +0.81%] index_add_ strided 3 : Elapsed 0.020 ms (1.976 ms / 100) 1.934 -> 1.935 ( +0.05%) [ +0.00% +0.36% +0.00% / +0.05% +0.88% +0.72%] index_copy_ strided 3 : Elapsed 0.019 ms (1.934 ms / 100) 1.970 -> 1.974 ( +0.20%) [ +0.30% +0.00% +0.30% / +0.20% +0.96% +0.61%] index_add_ strided 7 : Elapsed 0.020 ms (1.976 ms / 100) 1.935 -> 1.936 ( +0.05%) [ +0.16% +0.00% +0.21% / +0.05% +0.83% +0.62%] index_copy_ strided 7 : Elapsed 0.019 ms (1.938 ms / 100) 1.972 -> 1.973 ( +0.05%) [ +0.00% +0.00% +0.15% / +0.05% +0.81% +0.81%] index_add_ perm : Elapsed 0.020 ms (1.972 ms / 100) 1.931 -> 1.928 ( -0.16%) [ +0.05% +0.05% +0.00% / -0.16% +0.67% +0.67%] index_copy_ perm : Elapsed 0.019 ms (1.932 ms / 100) 1.972 -> 1.975 ( +0.15%) [ +0.05% +0.15% +0.00% / +0.15% +0.71% +0.61%] index_add_ perm_sorted : Elapsed 0.020 ms (1.973 ms / 100) 1.926 -> 1.933 ( +0.36%) [ +0.00% +0.42% +0.31% / +0.36% +1.25% +1.09%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.926 ms / 100) 8.709 -> 8.702 ( -0.08%) [ +0.14% +0.00% +0.02% / -0.08% +0.14% +0.14%] index_select const : Elapsed 0.087 ms (8.721 ms / 100) 8.756 -> 8.773 ( +0.19%) [ +0.17% +0.00% +0.13% / +0.19% +0.26% +0.24%] index_select wrap : Elapsed 0.088 ms (8.771 ms / 100) 8.745 -> 8.750 ( +0.06%) [ +0.06% +0.00% +0.09% / +0.06% +0.19% +0.07%] index_select linear : Elapsed 0.087 ms (8.750 ms / 100) 8.730 -> 8.732 ( +0.02%) [ +0.09% +0.15% +0.00% / +0.25% +0.36% +0.02%] index_select reverse : Elapsed 0.087 ms (8.738 ms / 100) 8.705 -> 8.710 ( +0.06%) [ +0.03% +0.00% +0.08% / +0.06% +0.46% +0.32%] index_select skip64 : Elapsed 0.087 ms (8.708 ms / 100) 8.717 -> 8.720 ( +0.03%) [ +0.00% +0.00% +0.01% / +0.03% +0.37% +0.20%] index_select skip256 : Elapsed 0.087 ms (8.717 ms / 100) 8.741 -> 8.744 ( +0.03%) [ +0.02% +0.00% +0.02% / +0.03% +0.48% +0.50%] index_select spread : Elapsed 0.087 ms (8.743 ms / 100) 8.771 -> 8.760 ( -0.13%) [ +0.08% +0.03% +0.00% / -0.13% +0.27% +0.16%] index_select strided 3 : Elapsed 0.088 ms (8.778 ms / 100) 8.760 -> 8.758 ( -0.02%) [ +0.10% +0.08% +0.00% / -0.02% +0.30% +0.27%] index_select random : Elapsed 0.088 ms (8.769 ms / 100) 8.738 -> 8.734 ( -0.05%) [ +0.11% +0.07% +0.00% / -0.05% +0.14% +0.26%] index_select random_sorted : Elapsed 0.087 ms (8.748 ms / 100) out_shape = [4, 20, 5, 40] in_shape = [4, 16, 5, 40] idx_dim = 1 B = [4, 20, 5, 40] (stride (4000, 200, 40, 1)) A = [4, 16, 5, 40] (stride (3200, 5, 1, 80)) dim = 1 4.117 -> 4.118 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.63% +0.58%] index_add_ linear : Elapsed 0.041 ms (4.119 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.71% +0.79%] index_copy_ linear : Elapsed 0.039 ms (3.934 ms / 100) 4.107 -> 4.110 ( +0.07%) [ +0.07% +0.22% +0.00% / +0.07% +0.71% +0.68%] index_add_ reverse : Elapsed 0.041 ms (4.110 ms / 100) 3.926 -> 3.933 ( +0.18%) [ +0.00% +0.25% +0.13% / +0.18% +0.97% +0.97%] index_copy_ reverse : Elapsed 0.039 ms (3.926 ms / 100) 4.098 -> 4.103 ( +0.12%) [ +0.20% +0.12% +0.00% / +0.12% +0.71% +0.56%] index_add_ spread : Elapsed 0.041 ms (4.106 ms / 100) 3.919 -> 3.922 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.59% +0.74%] index_copy_ spread : Elapsed 0.039 ms (3.920 ms / 100) 4.102 -> 4.103 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.66% +0.56%] index_add_ strided 3 : Elapsed 0.041 ms (4.103 ms / 100) 3.926 -> 3.935 ( +0.23%) [ +0.20% +0.05% +0.00% / +0.23% +0.92% +0.82%] index_copy_ strided 3 : Elapsed 0.039 ms (3.934 ms / 100) 4.106 -> 4.104 ( -0.05%) [ +0.02% +0.02% +0.00% / -0.05% +0.51% +0.54%] index_add_ strided 7 : Elapsed 0.041 ms (4.107 ms / 100) 3.933 -> 3.937 ( +0.10%) [ +0.05% +0.13% +0.00% / +0.10% +0.69% +0.53%] index_copy_ strided 7 : Elapsed 0.039 ms (3.935 ms / 100) 4.099 -> 4.102 ( +0.07%) [ +0.22% +0.07% +0.00% / +0.07% +0.54% +0.76%] index_add_ perm : Elapsed 0.041 ms (4.108 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.15% +0.00% +0.10% / +0.05% +0.64% +0.61%] index_copy_ perm : Elapsed 0.039 ms (3.925 ms / 100) 4.113 -> 4.112 ( -0.02%) [ +0.15% +0.10% +0.00% / -0.02% +0.56% +0.56%] index_add_ perm_sorted : Elapsed 0.041 ms (4.119 ms / 100) 3.935 -> 3.929 ( -0.15%) [ +0.00% +0.03% +0.05% / -0.15% +0.71% +0.53%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.935 ms / 100) 5.472 -> 5.479 ( +0.13%) [ +0.27% +0.13% +0.00% / +0.13% +0.13% +0.13%] index_select const : Elapsed 0.055 ms (5.487 ms / 100) 5.485 -> 5.486 ( +0.02%) [ +0.11% +0.00% +0.07% / +0.02% +0.07% +0.16%] index_select wrap : Elapsed 0.055 ms (5.491 ms / 100) 5.487 -> 5.488 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.02% +0.16% +0.02%] index_select linear : Elapsed 0.055 ms (5.489 ms / 100) 5.484 -> 5.488 ( +0.07%) [ +0.00% +0.09% +0.04% / +0.15% +0.07% +0.18%] index_select reverse : Elapsed 0.055 ms (5.484 ms / 100) 5.476 -> 5.475 ( -0.02%) [ +0.00% +0.05% +0.02% / -0.02% +0.09% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.476 ms / 100) 5.471 -> 5.470 ( -0.02%) [ +0.00% +0.13% +0.09% / -0.02% +0.13% +0.18%] index_select skip256 : Elapsed 0.055 ms (5.471 ms / 100) 5.482 -> 5.489 ( +0.13%) [ +0.16% +0.00% +0.15% / +0.13% +0.16% +0.16%] index_select spread : Elapsed 0.055 ms (5.491 ms / 100) 5.487 -> 5.482 ( -0.09%) [ +0.09% +0.00% +0.00% / +0.13% +0.05% -0.09%] index_select strided 3 : Elapsed 0.055 ms (5.492 ms / 100) 5.484 -> 5.491 ( +0.13%) [ +0.13% +0.18% +0.00% / +0.15% +0.13% +0.13%] index_select strided 5 : Elapsed 0.055 ms (5.491 ms / 100) 5.487 -> 5.484 ( -0.05%) [ +0.02% +0.00% +0.09% / -0.02% -0.05% -0.02%] index_select strided 7 : Elapsed 0.055 ms (5.488 ms / 100) 5.471 -> 5.469 ( -0.04%) [ +0.02% +0.07% +0.00% / +0.00% +0.07% -0.04%] index_select strided 8 : Elapsed 0.055 ms (5.472 ms / 100) 5.486 -> 5.482 ( -0.07%) [ +0.07% +0.00% +0.04% / +0.09% -0.07% +0.05%] index_select random : Elapsed 0.055 ms (5.490 ms / 100) 5.487 -> 5.483 ( -0.07%) [ +0.04% +0.04% +0.00% / +0.00% -0.07% +0.05%] index_select random_sorted : Elapsed 0.055 ms (5.489 ms / 100) B = [4, 20, 5, 40] (stride (1, 800, 160, 4)) A = [4, 16, 5, 40] (stride (3200, 200, 40, 1)) dim = 1 1.327 -> 1.327 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.90% +0.90%] index_add_ linear : Elapsed 0.013 ms (1.327 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.00% +0.16% +0.08% / +0.16% +0.94% +0.86%] index_copy_ linear : Elapsed 0.013 ms (1.278 ms / 100) 1.326 -> 1.325 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.90% +0.75%] index_add_ reverse : Elapsed 0.013 ms (1.326 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.86% +1.02%] index_copy_ reverse : Elapsed 0.013 ms (1.279 ms / 100) 1.329 -> 1.330 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.90% +0.90%] index_add_ spread : Elapsed 0.013 ms (1.330 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.86% +0.94%] index_copy_ spread : Elapsed 0.013 ms (1.280 ms / 100) 1.324 -> 1.323 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +1.13% +1.28%] index_add_ strided 3 : Elapsed 0.013 ms (1.324 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +1.41% +1.25%] index_copy_ strided 3 : Elapsed 0.013 ms (1.275 ms / 100) 1.323 -> 1.324 ( +0.08%) [ +0.00% +0.15% +0.15% / +0.08% +1.06% +1.13%] index_add_ strided 7 : Elapsed 0.013 ms (1.323 ms / 100) 1.278 -> 1.277 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.94% +1.02%] index_copy_ strided 7 : Elapsed 0.013 ms (1.279 ms / 100) 1.327 -> 1.326 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.98% +0.98%] index_add_ perm : Elapsed 0.013 ms (1.327 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +1.09% +1.09%] index_copy_ perm : Elapsed 0.013 ms (1.280 ms / 100) 1.327 -> 1.327 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.83% +0.83%] index_add_ perm_sorted : Elapsed 0.013 ms (1.327 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.94% +0.78%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.280 ms / 100) 3.532 -> 3.530 ( -0.06%) [ +0.23% +0.08% +0.00% / +0.11% -0.06% -0.06%] index_select const : Elapsed 0.035 ms (3.540 ms / 100) 3.555 -> 3.550 ( -0.14%) [ +0.00% +0.11% +0.17% / +0.14% -0.03% -0.14%] index_select wrap : Elapsed 0.036 ms (3.555 ms / 100) 3.558 -> 3.554 ( -0.11%) [ +0.20% +0.22% +0.00% / +0.08% -0.06% -0.11%] index_select linear : Elapsed 0.036 ms (3.565 ms / 100) 3.556 -> 3.553 ( -0.08%) [ +0.00% +0.03% +0.31% / +0.06% +0.14% -0.08%] index_select reverse : Elapsed 0.036 ms (3.556 ms / 100) 3.533 -> 3.532 ( -0.03%) [ +0.00% +0.14% +0.14% / +0.20% +0.00% -0.03%] index_select skip64 : Elapsed 0.035 ms (3.533 ms / 100) 3.533 -> 3.533 ( +0.00%) [ +0.20% +0.00% +0.08% / +0.03% +0.03% +0.00%] index_select skip256 : Elapsed 0.035 ms (3.540 ms / 100) 3.562 -> 3.557 ( -0.14%) [ +0.00% +0.22% +0.08% / +0.14% -0.14% -0.14%] index_select spread : Elapsed 0.036 ms (3.562 ms / 100) 3.571 -> 3.547 ( -0.67%) [ +0.14% +0.00% +0.06% / +0.00% -0.67% -0.62%] index_select strided 3 : Elapsed 0.036 ms (3.576 ms / 100) 3.572 -> 3.551 ( -0.59%) [ +0.20% +0.03% +0.00% / -0.08% -0.59% -0.50%] index_select strided 5 : Elapsed 0.036 ms (3.579 ms / 100) 3.549 -> 3.547 ( -0.06%) [ +0.00% +0.39% +0.37% / -0.06% +0.37% +0.14%] index_select strided 7 : Elapsed 0.035 ms (3.549 ms / 100) 3.535 -> 3.535 ( +0.00%) [ +0.00% +0.06% +0.08% / +0.00% +0.14% +0.14%] index_select strided 8 : Elapsed 0.035 ms (3.535 ms / 100) 3.545 -> 3.549 ( +0.11%) [ +0.11% +0.00% +0.17% / +0.11% +0.56% +0.51%] index_select random : Elapsed 0.035 ms (3.549 ms / 100) 3.550 -> 3.546 ( -0.11%) [ +0.00% +0.03% +0.11% / -0.11% +0.48% +0.28%] index_select random_sorted : Elapsed 0.035 ms (3.550 ms / 100) B = [4, 20, 5, 40] (stride (800, 40, 3200, 1)) A = [4, 16, 5, 40] (stride (40, 160, 2560, 1)) dim = 1 3.936 -> 3.938 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.71% +0.71%] index_add_ linear : Elapsed 0.039 ms (3.936 ms / 100) 3.813 -> 3.811 ( -0.05%) [ +0.00% +0.03% +0.05% / -0.05% +0.68% +0.63%] index_copy_ linear : Elapsed 0.038 ms (3.813 ms / 100) 3.943 -> 3.943 ( +0.00%) [ +0.03% +0.10% +0.00% / +0.00% +0.74% +0.71%] index_add_ reverse : Elapsed 0.039 ms (3.944 ms / 100) 3.817 -> 3.818 ( +0.03%) [ +0.00% +0.34% +0.00% / +0.03% +0.76% +0.81%] index_copy_ reverse : Elapsed 0.038 ms (3.817 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.76% +0.79%] index_add_ spread : Elapsed 0.039 ms (3.933 ms / 100) 3.806 -> 3.806 ( +0.00%) [ +0.00% +0.08% +0.03% / +0.00% +0.81% +0.79%] index_copy_ spread : Elapsed 0.038 ms (3.806 ms / 100) 3.933 -> 3.934 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.76% +0.74%] index_add_ strided 3 : Elapsed 0.039 ms (3.933 ms / 100) 3.818 -> 3.817 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.76% +0.79%] index_copy_ strided 3 : Elapsed 0.038 ms (3.818 ms / 100) 3.936 -> 3.937 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.64% +0.64%] index_add_ strided 7 : Elapsed 0.039 ms (3.937 ms / 100) 3.822 -> 3.821 ( -0.03%) [ +0.00% +0.05% +0.00% / -0.03% +0.52% +0.50%] index_copy_ strided 7 : Elapsed 0.038 ms (3.822 ms / 100) 3.934 -> 3.935 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.74% +0.69%] index_add_ perm : Elapsed 0.039 ms (3.935 ms / 100) 3.807 -> 3.811 ( +0.11%) [ +0.11% +0.03% +0.00% / +0.11% +0.74% +0.66%] index_copy_ perm : Elapsed 0.038 ms (3.811 ms / 100) 3.943 -> 3.945 ( +0.05%) [ +0.08% +0.03% +0.00% / +0.05% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.039 ms (3.946 ms / 100) 3.819 -> 3.819 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.68% +0.63%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.819 ms / 100) 5.549 -> 5.553 ( +0.07%) [ +0.13% +0.14% +0.00% / +0.07% +0.23% +0.14%] index_select const : Elapsed 0.056 ms (5.556 ms / 100) 5.571 -> 5.573 ( +0.04%) [ +0.11% +0.00% +0.13% / +0.23% +0.07% +0.04%] index_select wrap : Elapsed 0.056 ms (5.577 ms / 100) 5.572 -> 5.572 ( +0.00%) [ +0.00% +0.09% +0.02% / +0.11% +0.09% +0.00%] index_select linear : Elapsed 0.056 ms (5.572 ms / 100) 5.573 -> 5.571 ( -0.04%) [ +0.00% +0.14% +0.02% / -0.04% +0.14% +0.04%] index_select reverse : Elapsed 0.056 ms (5.573 ms / 100) 5.555 -> 5.548 ( -0.13%) [ +0.00% +0.14% +0.13% / -0.13% +0.11% +0.18%] index_select skip64 : Elapsed 0.056 ms (5.555 ms / 100) 5.561 -> 5.556 ( -0.09%) [ +0.00% +0.04% +0.00% / -0.09% -0.04% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.572 -> 5.571 ( -0.02%) [ +0.00% +0.11% +0.04% / -0.02% +0.16% +0.13%] index_select spread : Elapsed 0.056 ms (5.572 ms / 100) 5.574 -> 5.574 ( +0.00%) [ +0.11% +0.00% +0.05% / +0.00% +0.11% +0.00%] index_select strided 3 : Elapsed 0.056 ms (5.580 ms / 100) 5.578 -> 5.570 ( -0.14%) [ +0.07% +0.00% +0.07% / -0.04% -0.14% -0.09%] index_select strided 5 : Elapsed 0.056 ms (5.582 ms / 100) 5.574 -> 5.576 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.09% +0.13% +0.04%] index_select strided 7 : Elapsed 0.056 ms (5.579 ms / 100) 5.554 -> 5.560 ( +0.11%) [ +0.00% +0.16% +0.09% / +0.18% +0.11% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.554 ms / 100) 5.576 -> 5.567 ( -0.16%) [ +0.00% +0.00% +0.04% / +0.04% -0.16% -0.04%] index_select random : Elapsed 0.056 ms (5.576 ms / 100) 5.577 -> 5.569 ( -0.14%) [ +0.00% +0.32% +0.04% / -0.04% -0.14% -0.05%] index_select random_sorted : Elapsed 0.056 ms (5.577 ms / 100) B = [4, 20, 5, 40] (stride (1, 160, 3200, 4)) A = [4, 16, 5, 40] (stride (5, 800, 1, 20)) dim = 1 4.279 -> 4.285 ( +0.14%) [ +0.00% +0.09% +0.00% / +0.14% +0.86% +0.77%] index_add_ linear : Elapsed 0.043 ms (4.279 ms / 100) 4.126 -> 4.129 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.85% +0.75%] index_copy_ linear : Elapsed 0.041 ms (4.126 ms / 100) 4.267 -> 4.263 ( -0.09%) [ +0.12% +0.00% +0.09% / -0.09% +0.96% +0.82%] index_add_ reverse : Elapsed 0.043 ms (4.272 ms / 100) 4.117 -> 4.116 ( -0.02%) [ +0.05% +0.02% +0.00% / -0.02% +0.73% +0.75%] index_copy_ reverse : Elapsed 0.041 ms (4.119 ms / 100) 4.247 -> 4.245 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.68% +0.71%] index_add_ spread : Elapsed 0.042 ms (4.247 ms / 100) 4.097 -> 4.094 ( -0.07%) [ +0.00% +0.10% +0.00% / -0.07% +0.71% +0.76%] index_copy_ spread : Elapsed 0.041 ms (4.097 ms / 100) 4.237 -> 4.237 ( +0.00%) [ +0.19% +0.31% +0.00% / +0.00% +0.80% +0.94%] index_add_ strided 3 : Elapsed 0.042 ms (4.245 ms / 100) 4.091 -> 4.091 ( +0.00%) [ +0.24% +0.46% +0.00% / +0.00% +0.76% +1.12%] index_copy_ strided 3 : Elapsed 0.041 ms (4.101 ms / 100) 4.265 -> 4.272 ( +0.16%) [ +0.02% +0.16% +0.00% / +0.16% +0.73% +0.73%] index_add_ strided 7 : Elapsed 0.043 ms (4.266 ms / 100) 4.117 -> 4.116 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.73% +0.73%] index_copy_ strided 7 : Elapsed 0.041 ms (4.117 ms / 100) 4.283 -> 4.286 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.77% +0.72%] index_add_ perm : Elapsed 0.043 ms (4.285 ms / 100) 4.128 -> 4.129 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.75% +0.65%] index_copy_ perm : Elapsed 0.041 ms (4.129 ms / 100) 4.243 -> 4.239 ( -0.09%) [ +0.00% +0.00% +0.05% / -0.09% +0.68% +0.82%] index_add_ perm_sorted : Elapsed 0.042 ms (4.243 ms / 100) 4.098 -> 4.093 ( -0.12%) [ +0.02% +0.10% +0.00% / -0.12% +0.76% +0.95%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.099 ms / 100) 5.553 -> 5.556 ( +0.05%) [ +0.00% +0.05% +0.13% / +0.23% +0.05% +0.11%] index_select const : Elapsed 0.056 ms (5.553 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.00% +0.02% +0.04% / -0.02% +0.04% +0.14%] index_select wrap : Elapsed 0.056 ms (5.566 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.11% +0.04% +0.00% / -0.02% +0.16% +0.04%] index_select linear : Elapsed 0.056 ms (5.568 ms / 100) 5.562 -> 5.566 ( +0.07%) [ +0.05% +0.04% +0.00% / +0.07% +0.13% +0.09%] index_select reverse : Elapsed 0.056 ms (5.565 ms / 100) 5.553 -> 5.555 ( +0.04%) [ +0.00% +0.11% +0.02% / +0.04% +0.14% +0.11%] index_select skip64 : Elapsed 0.056 ms (5.553 ms / 100) 5.555 -> 5.555 ( +0.00%) [ +0.00% +0.11% +0.02% / +0.00% +0.14% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.555 ms / 100) 5.565 -> 5.557 ( -0.14%) [ +0.09% +0.05% +0.00% / -0.14% +0.09% +0.04%] index_select spread : Elapsed 0.056 ms (5.570 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.20% +0.00% +0.13% / +0.07% +0.05% +0.20%] index_select strided 3 : Elapsed 0.056 ms (5.574 ms / 100) 5.562 -> 5.566 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.07% +0.18%] index_select strided 5 : Elapsed 0.056 ms (5.566 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.11% +0.00% +0.00% / +0.05% +0.22% +0.20%] index_select strided 7 : Elapsed 0.056 ms (5.569 ms / 100) 5.559 -> 5.554 ( -0.09%) [ +0.00% +0.04% +0.09% / -0.09% +0.16% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.559 ms / 100) 5.557 -> 5.562 ( +0.09%) [ +0.18% +0.00% +0.00% / +0.09% +0.22% +0.11%] index_select random : Elapsed 0.056 ms (5.567 ms / 100) 5.553 -> 5.565 ( +0.22%) [ +0.00% +0.13% +0.16% / +0.22% +0.23% +0.22%] index_select random_sorted : Elapsed 0.056 ms (5.553 ms / 100) B = [4, 20, 5, 40] (stride (20, 1, 3200, 80)) A = [4, 16, 5, 40] (stride (16, 1, 64, 320)) dim = 1 4.105 -> 4.102 ( -0.07%) [ +0.00% +0.10% +0.02% / -0.07% +0.93% +0.78%] index_add_ linear : Elapsed 0.041 ms (4.105 ms / 100) 3.938 -> 3.942 ( +0.10%) [ +0.13% +0.00% +0.05% / +0.10% +0.74% +0.69%] index_copy_ linear : Elapsed 0.039 ms (3.943 ms / 100) 4.101 -> 4.107 ( +0.15%) [ +0.15% +0.24% +0.00% / +0.15% +0.90% +0.56%] index_add_ reverse : Elapsed 0.041 ms (4.107 ms / 100) 3.941 -> 3.941 ( +0.00%) [ +0.03% +0.08% +0.00% / +0.00% +0.74% +0.69%] index_copy_ reverse : Elapsed 0.039 ms (3.942 ms / 100) 4.106 -> 4.107 ( +0.02%) [ +0.15% +0.05% +0.00% / +0.02% +0.78% +0.63%] index_add_ spread : Elapsed 0.041 ms (4.112 ms / 100) 3.940 -> 3.952 ( +0.30%) [ +0.23% +0.13% +0.00% / +0.30% +0.86% +0.71%] index_copy_ spread : Elapsed 0.039 ms (3.949 ms / 100) 4.110 -> 4.119 ( +0.22%) [ +0.07% +0.00% +0.12% / +0.22% +0.58% +0.68%] index_add_ strided 3 : Elapsed 0.041 ms (4.113 ms / 100) 3.939 -> 3.942 ( +0.08%) [ +0.15% +0.00% +0.03% / +0.08% +0.74% +0.74%] index_copy_ strided 3 : Elapsed 0.039 ms (3.945 ms / 100) 4.107 -> 4.105 ( -0.05%) [ +0.32% +0.00% +0.24% / -0.05% +0.75% +0.73%] index_add_ strided 7 : Elapsed 0.041 ms (4.120 ms / 100) 3.940 -> 3.939 ( -0.03%) [ +0.08% +0.00% +0.03% / -0.03% +0.79% +0.71%] index_copy_ strided 7 : Elapsed 0.039 ms (3.943 ms / 100) 4.107 -> 4.108 ( +0.02%) [ +0.12% +0.07% +0.00% / +0.02% +0.49% +0.56%] index_add_ perm : Elapsed 0.041 ms (4.112 ms / 100) 3.943 -> 3.949 ( +0.15%) [ +0.00% +0.20% +0.08% / +0.15% +0.63% +0.81%] index_copy_ perm : Elapsed 0.039 ms (3.943 ms / 100) 4.108 -> 4.112 ( +0.10%) [ +0.15% +0.00% +0.12% / +0.10% +0.73% +0.51%] index_add_ perm_sorted : Elapsed 0.041 ms (4.114 ms / 100) 3.944 -> 3.942 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.56% +0.51%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.944 ms / 100) 5.483 -> 5.486 ( +0.05%) [ +0.15% +0.00% +0.16% / +0.13% +0.11% +0.05%] index_select const : Elapsed 0.055 ms (5.491 ms / 100) 5.495 -> 5.497 ( +0.04%) [ +0.00% +0.05% +0.09% / +0.07% +0.07% +0.04%] index_select wrap : Elapsed 0.055 ms (5.495 ms / 100) 5.496 -> 5.499 ( +0.05%) [ +0.00% +0.18% +0.07% / +0.07% +0.05% +0.15%] index_select linear : Elapsed 0.055 ms (5.496 ms / 100) 5.499 -> 5.492 ( -0.13%) [ +0.05% +0.05% +0.00% / -0.13% -0.07% -0.07%] index_select reverse : Elapsed 0.055 ms (5.502 ms / 100) 5.484 -> 5.489 ( +0.09%) [ +0.13% +0.13% +0.00% / +0.11% +0.09% +0.20%] index_select skip64 : Elapsed 0.055 ms (5.491 ms / 100) 5.490 -> 5.488 ( -0.04%) [ +0.09% +0.02% +0.00% / -0.04% +0.11% +0.09%] index_select skip256 : Elapsed 0.055 ms (5.495 ms / 100) 5.497 -> 5.494 ( -0.05%) [ +0.02% +0.05% +0.00% / +0.07% -0.05% -0.05%] index_select spread : Elapsed 0.055 ms (5.498 ms / 100) 5.495 -> 5.498 ( +0.05%) [ +0.20% +0.00% +0.13% / +0.16% +0.09% +0.05%] index_select strided 3 : Elapsed 0.055 ms (5.506 ms / 100) 5.500 -> 5.493 ( -0.13%) [ +0.15% +0.07% +0.00% / -0.02% -0.13% -0.07%] index_select strided 5 : Elapsed 0.055 ms (5.508 ms / 100) 5.496 -> 5.499 ( +0.05%) [ +0.05% +0.00% +0.11% / +0.11% +0.05% +0.09%] index_select strided 7 : Elapsed 0.055 ms (5.499 ms / 100) 5.497 -> 5.494 ( -0.05%) [ +0.05% +0.00% +0.02% / -0.05% +0.07% +0.05%] index_select strided 8 : Elapsed 0.055 ms (5.500 ms / 100) 5.494 -> 5.494 ( +0.00%) [ +0.15% +0.00% +0.13% / +0.00% +0.07% +0.05%] index_select random : Elapsed 0.055 ms (5.502 ms / 100) 5.499 -> 5.495 ( -0.07%) [ +0.02% +0.11% +0.00% / -0.02% -0.04% -0.07%] index_select random_sorted : Elapsed 0.055 ms (5.500 ms / 100) B = [4, 20, 5, 40] (stride (1, 4, 3200, 80)) A = [4, 16, 5, 40] (stride (3200, 1, 640, 16)) dim = 1 1.422 -> 1.421 ( -0.07%) [ +0.00% +0.28% +0.00% / -0.07% +0.77% +0.84%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.457 -> 1.460 ( +0.21%) [ +0.00% +0.00% +0.34% / +0.21% +0.62% +0.62%] index_copy_ linear : Elapsed 0.015 ms (1.457 ms / 100) 1.420 -> 1.420 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.92% +0.63%] index_add_ reverse : Elapsed 0.014 ms (1.420 ms / 100) 1.464 -> 1.466 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +1.43% +1.30%] index_copy_ reverse : Elapsed 0.015 ms (1.465 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.77% +0.77%] index_add_ spread : Elapsed 0.014 ms (1.420 ms / 100) 1.434 -> 1.435 ( +0.07%) [ +0.21% +0.14% +0.00% / +0.07% +0.63% +0.70%] index_copy_ spread : Elapsed 0.014 ms (1.437 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +1.06% +1.06%] index_add_ strided 3 : Elapsed 0.014 ms (1.420 ms / 100) 1.457 -> 1.458 ( +0.07%) [ +0.21% +0.00% +0.27% / +0.07% +0.41% +0.89%] index_copy_ strided 3 : Elapsed 0.015 ms (1.460 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.63% +0.63%] index_add_ strided 7 : Elapsed 0.014 ms (1.420 ms / 100) 1.466 -> 1.467 ( +0.07%) [ +0.20% +0.20% +0.00% / +0.07% +1.09% +1.02%] index_copy_ strided 7 : Elapsed 0.015 ms (1.469 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.77% +0.77%] index_add_ perm : Elapsed 0.014 ms (1.421 ms / 100) 1.453 -> 1.457 ( +0.28%) [ +0.00% +0.34% +0.41% / +0.28% +0.55% +0.76%] index_copy_ perm : Elapsed 0.015 ms (1.453 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.77% +0.77%] index_add_ perm_sorted : Elapsed 0.014 ms (1.420 ms / 100) 1.455 -> 1.458 ( +0.21%) [ +0.07% +0.07% +0.00% / +0.21% +0.82% +0.82%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.456 ms / 100) 3.561 -> 3.551 ( -0.28%) [ +0.00% +0.17% +0.00% / +0.00% -0.17% -0.28%] index_select const : Elapsed 0.036 ms (3.561 ms / 100) 3.568 -> 3.566 ( -0.06%) [ +0.08% +0.00% +0.06% / +0.08% -0.03% -0.06%] index_select wrap : Elapsed 0.036 ms (3.571 ms / 100) 3.563 -> 3.568 ( +0.14%) [ +0.17% +0.45% +0.00% / +0.34% +0.25% +0.14%] index_select linear : Elapsed 0.036 ms (3.569 ms / 100) 3.566 -> 3.571 ( +0.14%) [ +0.00% +0.03% +0.14% / +0.14% +0.25% +0.31%] index_select reverse : Elapsed 0.036 ms (3.566 ms / 100) 3.558 -> 3.550 ( -0.22%) [ +0.03% +0.06% +0.00% / -0.14% -0.22% -0.14%] index_select skip64 : Elapsed 0.036 ms (3.559 ms / 100) 3.555 -> 3.551 ( -0.11%) [ +0.11% +0.00% +0.17% / +0.14% -0.11% -0.11%] index_select skip256 : Elapsed 0.036 ms (3.559 ms / 100) 3.572 -> 3.557 ( -0.42%) [ +0.31% +0.08% +0.00% / +0.17% -0.25% -0.42%] index_select spread : Elapsed 0.036 ms (3.583 ms / 100) 3.581 -> 3.557 ( -0.67%) [ +0.28% +0.08% +0.00% / +0.17% -0.67% -0.50%] index_select strided 3 : Elapsed 0.036 ms (3.591 ms / 100) 3.584 -> 3.557 ( -0.75%) [ +0.08% +0.08% +0.00% / +0.08% -0.75% -0.53%] index_select strided 5 : Elapsed 0.036 ms (3.587 ms / 100) 3.567 -> 3.563 ( -0.11%) [ +0.00% +0.14% +0.14% / +0.00% +0.08% -0.11%] index_select strided 7 : Elapsed 0.036 ms (3.567 ms / 100) 3.569 -> 3.561 ( -0.22%) [ +0.03% +0.00% +0.00% / +0.20% -0.22% -0.20%] index_select strided 8 : Elapsed 0.036 ms (3.570 ms / 100) 3.567 -> 3.568 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.14% +0.22%] index_select random : Elapsed 0.036 ms (3.567 ms / 100) 3.562 -> 3.560 ( -0.06%) [ +0.20% +0.17% +0.00% / -0.06% +0.39% +0.42%] index_select random_sorted : Elapsed 0.036 ms (3.569 ms / 100) B = [4, 20, 5, 40] (stride (20, 1, 80, 400)) A = [4, 16, 5, 40] (stride (5, 20, 1, 320)) dim = 1 4.445 -> 4.446 ( +0.02%) [ +0.04% +0.00% +0.04% / +0.02% +0.76% +0.74%] index_add_ linear : Elapsed 0.044 ms (4.447 ms / 100) 4.279 -> 4.282 ( +0.07%) [ +0.09% +0.00% +0.00% / +0.07% +0.82% +0.79%] index_copy_ linear : Elapsed 0.043 ms (4.283 ms / 100) 4.450 -> 4.452 ( +0.04%) [ +0.11% +0.11% +0.00% / +0.04% +0.70% +0.58%] index_add_ reverse : Elapsed 0.045 ms (4.455 ms / 100) 4.285 -> 4.286 ( +0.02%) [ +0.00% +0.00% +0.07% / +0.02% +0.68% +0.56%] index_copy_ reverse : Elapsed 0.043 ms (4.285 ms / 100) 4.451 -> 4.450 ( -0.02%) [ +0.00% +0.07% +0.04% / -0.02% +0.67% +0.67%] index_add_ spread : Elapsed 0.045 ms (4.451 ms / 100) 4.284 -> 4.285 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.68% +0.70%] index_copy_ spread : Elapsed 0.043 ms (4.285 ms / 100) 4.454 -> 4.446 ( -0.18%) [ +0.07% +0.04% +0.00% / -0.18% +0.65% +0.61%] index_add_ strided 3 : Elapsed 0.045 ms (4.457 ms / 100) 4.287 -> 4.289 ( +0.05%) [ +0.07% +0.09% +0.00% / +0.05% +0.72% +0.70%] index_copy_ strided 3 : Elapsed 0.043 ms (4.290 ms / 100) 4.455 -> 4.454 ( -0.02%) [ +0.00% +0.07% +0.02% / -0.02% +0.58% +0.61%] index_add_ strided 7 : Elapsed 0.045 ms (4.455 ms / 100) 4.287 -> 4.293 ( +0.14%) [ +0.05% +0.14% +0.00% / +0.14% +0.68% +0.63%] index_copy_ strided 7 : Elapsed 0.043 ms (4.289 ms / 100) 4.453 -> 4.454 ( +0.02%) [ +0.04% +0.09% +0.00% / +0.02% +0.61% +0.63%] index_add_ perm : Elapsed 0.045 ms (4.455 ms / 100) 4.283 -> 4.286 ( +0.07%) [ +0.09% +0.09% +0.00% / +0.07% +0.82% +0.65%] index_copy_ perm : Elapsed 0.043 ms (4.287 ms / 100) 4.452 -> 4.455 ( +0.07%) [ +0.00% +0.02% +0.02% / +0.07% +0.61% +0.65%] index_add_ perm_sorted : Elapsed 0.045 ms (4.452 ms / 100) 4.285 -> 4.282 ( -0.07%) [ +0.05% +0.00% +0.09% / -0.07% +0.68% +0.70%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.287 ms / 100) 5.572 -> 5.572 ( +0.00%) [ +0.05% +0.00% +0.04% / +0.02% +0.00% +0.07%] index_select const : Elapsed 0.056 ms (5.575 ms / 100) 5.594 -> 5.587 ( -0.13%) [ +0.09% +0.04% +0.00% / +0.05% -0.13% -0.02%] index_select wrap : Elapsed 0.056 ms (5.599 ms / 100) 5.594 -> 5.594 ( +0.00%) [ +0.07% +0.00% +0.14% / +0.00% +0.00% +0.04%] index_select linear : Elapsed 0.056 ms (5.598 ms / 100) 5.599 -> 5.594 ( -0.09%) [ +0.00% +0.11% +0.13% / +0.00% -0.04% -0.09%] index_select reverse : Elapsed 0.056 ms (5.599 ms / 100) 5.568 -> 5.567 ( -0.02%) [ +0.02% +0.07% +0.00% / -0.02% +0.20% +0.22%] index_select skip64 : Elapsed 0.056 ms (5.569 ms / 100) 5.566 -> 5.574 ( +0.14%) [ +0.05% +0.23% +0.00% / +0.14% +0.18% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.569 ms / 100) 5.596 -> 5.593 ( -0.05%) [ +0.00% +0.09% +0.05% / +0.04% -0.05% +0.07%] index_select spread : Elapsed 0.056 ms (5.596 ms / 100) 5.597 -> 5.594 ( -0.05%) [ +0.11% +0.00% +0.11% / +0.04% -0.04% -0.05%] index_select strided 3 : Elapsed 0.056 ms (5.603 ms / 100) 5.594 -> 5.593 ( -0.02%) [ +0.21% +0.07% +0.00% / +0.02% -0.02% -0.02%] index_select strided 5 : Elapsed 0.056 ms (5.606 ms / 100) 5.597 -> 5.594 ( -0.05%) [ +0.13% +0.11% +0.00% / +0.04% +0.05% -0.05%] index_select strided 7 : Elapsed 0.056 ms (5.604 ms / 100) 5.579 -> 5.576 ( -0.05%) [ +0.00% +0.13% +0.02% / -0.02% +0.02% -0.05%] index_select strided 8 : Elapsed 0.056 ms (5.579 ms / 100) 5.594 -> 5.583 ( -0.20%) [ +0.07% +0.02% +0.00% / +0.13% -0.20% +0.04%] index_select random : Elapsed 0.056 ms (5.598 ms / 100) 5.586 -> 5.584 ( -0.04%) [ +0.16% +0.00% +0.11% / +0.07% +0.04% -0.04%] index_select random_sorted : Elapsed 0.056 ms (5.595 ms / 100) out_shape = [4, 16, 20, 40] in_shape = [4, 16, 5, 40] idx_dim = 2 B = [4, 16, 20, 40] (stride (12800, 20, 1, 320)) A = [4, 16, 5, 40] (stride (3200, 1, 16, 80)) dim = 2 2.024 -> 2.028 ( +0.20%) [ +0.15% +0.00% +0.25% / +0.20% +0.64% +0.54%] index_add_ linear : Elapsed 0.020 ms (2.027 ms / 100) 1.968 -> 1.988 ( +1.02%) [ +0.00% +0.05% +0.71% / +1.02% +1.17% +1.37%] index_copy_ linear : Elapsed 0.020 ms (1.968 ms / 100) 2.025 -> 2.023 ( -0.10%) [ +0.00% +0.15% +0.05% / -0.10% +0.59% +0.35%] index_add_ reverse : Elapsed 0.020 ms (2.025 ms / 100) 1.970 -> 1.984 ( +0.71%) [ +0.20% +0.00% +0.91% / +0.71% +1.37% +1.32%] index_copy_ reverse : Elapsed 0.020 ms (1.974 ms / 100) 2.033 -> 2.036 ( +0.15%) [ +0.10% +0.25% +0.00% / +0.15% +0.59% +0.59%] index_add_ spread : Elapsed 0.020 ms (2.035 ms / 100) 1.990 -> 1.996 ( +0.30%) [ +0.00% +0.05% +0.75% / +0.30% +0.75% +0.80%] index_copy_ spread : Elapsed 0.020 ms (1.990 ms / 100) 2.038 -> 2.038 ( +0.00%) [ +0.00% +0.25% +0.05% / +0.00% +0.59% +0.44%] index_add_ strided 3 : Elapsed 0.020 ms (2.038 ms / 100) 1.994 -> 1.998 ( +0.20%) [ +0.25% +0.00% +0.35% / +0.20% +0.95% +1.15%] index_copy_ strided 3 : Elapsed 0.020 ms (1.999 ms / 100) 2.041 -> 2.040 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.20% +0.34%] index_add_ strided 7 : Elapsed 0.020 ms (2.041 ms / 100) 2.000 -> 2.008 ( +0.40%) [ +0.00% +0.00% +0.40% / +0.40% +0.80% +0.80%] index_copy_ strided 7 : Elapsed 0.020 ms (2.000 ms / 100) 2.015 -> 2.012 ( -0.15%) [ +0.05% +0.05% +0.00% / -0.15% +0.50% +0.55%] index_add_ perm : Elapsed 0.020 ms (2.016 ms / 100) 1.964 -> 1.973 ( +0.46%) [ +0.00% +0.05% +0.51% / +0.46% +0.87% +1.02%] index_copy_ perm : Elapsed 0.020 ms (1.964 ms / 100) 2.021 -> 2.029 ( +0.40%) [ +0.35% +0.05% +0.00% / +0.40% +0.59% +0.79%] index_add_ perm_sorted : Elapsed 0.020 ms (2.028 ms / 100) 1.969 -> 1.982 ( +0.66%) [ +0.15% +0.00% +0.76% / +0.66% +1.12% +1.27%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.972 ms / 100) 8.583 -> 8.602 ( +0.22%) [ +0.00% +0.31% +0.01% / +0.30% +0.22% +0.44%] index_select const : Elapsed 0.086 ms (8.583 ms / 100) 8.605 -> 8.610 ( +0.06%) [ +0.00% +0.12% +0.08% / +0.06% +0.35% +0.43%] index_select wrap : Elapsed 0.086 ms (8.605 ms / 100) 8.607 -> 8.636 ( +0.34%) [ +0.22% +0.00% +0.14% / +0.34% +0.42% +0.49%] index_select linear : Elapsed 0.086 ms (8.626 ms / 100) 8.597 -> 8.626 ( +0.34%) [ +0.00% +0.03% +0.12% / +0.37% +0.34% +0.45%] index_select reverse : Elapsed 0.086 ms (8.597 ms / 100) 8.597 -> 8.588 ( -0.10%) [ +0.00% +0.15% +0.03% / -0.10% +0.35% +0.08%] index_select skip64 : Elapsed 0.086 ms (8.597 ms / 100) 8.581 -> 8.582 ( +0.01%) [ +0.00% +0.08% +0.38% / +0.01% +0.50% +0.30%] index_select skip256 : Elapsed 0.086 ms (8.581 ms / 100) 8.620 -> 8.625 ( +0.06%) [ +0.00% +0.15% +0.14% / +0.06% +0.27% +0.22%] index_select spread : Elapsed 0.086 ms (8.620 ms / 100) 8.609 -> 8.620 ( +0.13%) [ +0.13% +0.00% +0.10% / +0.24% +0.31% +0.13%] index_select strided 3 : Elapsed 0.086 ms (8.620 ms / 100) 8.612 -> 8.619 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.33% +0.56%] index_select random : Elapsed 0.086 ms (8.612 ms / 100) 8.611 -> 8.604 ( -0.08%) [ +0.17% +0.28% +0.00% / -0.08% +0.70% +0.43%] index_select random_sorted : Elapsed 0.086 ms (8.626 ms / 100) B = [4, 16, 20, 40] (stride (12800, 1, 16, 320)) A = [4, 16, 5, 40] (stride (1, 160, 2560, 4)) dim = 2 1.819 -> 1.820 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.44% +0.27%] index_add_ linear : Elapsed 0.018 ms (1.820 ms / 100) 1.778 -> 1.779 ( +0.06%) [ +0.00% +0.00% +0.11% / +0.06% +0.22% +0.34%] index_copy_ linear : Elapsed 0.018 ms (1.778 ms / 100) 1.818 -> 1.821 ( +0.17%) [ +0.00% +0.11% +0.00% / +0.17% +0.50% +0.77%] index_add_ reverse : Elapsed 0.018 ms (1.818 ms / 100) 1.779 -> 1.779 ( +0.00%) [ +0.00% +0.11% +0.06% / +0.00% +0.17% +0.22%] index_copy_ reverse : Elapsed 0.018 ms (1.779 ms / 100) 1.832 -> 1.830 ( -0.11%) [ +0.05% +0.00% +0.22% / -0.11% +0.27% +0.33%] index_add_ spread : Elapsed 0.018 ms (1.833 ms / 100) 1.786 -> 1.788 ( +0.11%) [ +0.11% +0.00% +0.06% / +0.11% +0.50% +0.45%] index_copy_ spread : Elapsed 0.018 ms (1.788 ms / 100) 1.826 -> 1.826 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.44% +0.71%] index_add_ strided 3 : Elapsed 0.018 ms (1.826 ms / 100) 1.783 -> 1.782 ( -0.06%) [ +0.22% +0.00% +0.11% / -0.06% +0.45% +0.56%] index_copy_ strided 3 : Elapsed 0.018 ms (1.787 ms / 100) 1.821 -> 1.822 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.93% +0.60%] index_add_ strided 7 : Elapsed 0.018 ms (1.822 ms / 100) 1.777 -> 1.784 ( +0.39%) [ +0.23% +0.11% +0.00% / +0.39% +0.68% +0.90%] index_copy_ strided 7 : Elapsed 0.018 ms (1.781 ms / 100) 1.831 -> 1.827 ( -0.22%) [ +0.05% +0.00% +0.00% / -0.22% +0.49% +0.38%] index_add_ perm : Elapsed 0.018 ms (1.832 ms / 100) 1.784 -> 1.787 ( +0.17%) [ +0.34% +0.11% +0.00% / +0.17% +0.39% +0.62%] index_copy_ perm : Elapsed 0.018 ms (1.790 ms / 100) 1.829 -> 1.827 ( -0.11%) [ +0.11% +0.22% +0.00% / -0.11% +0.38% +0.60%] index_add_ perm_sorted : Elapsed 0.018 ms (1.831 ms / 100) 1.785 -> 1.787 ( +0.11%) [ +0.00% +0.11% +0.11% / +0.11% +0.50% +0.45%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.785 ms / 100) 8.529 -> 8.538 ( +0.11%) [ +0.08% +0.00% +0.01% / +0.13% +0.11% +0.26%] index_select const : Elapsed 0.085 ms (8.536 ms / 100) 8.561 -> 8.554 ( -0.08%) [ +0.00% +0.30% +0.07% / -0.08% +0.40% +0.09%] index_select wrap : Elapsed 0.086 ms (8.561 ms / 100) 8.549 -> 8.556 ( +0.08%) [ +0.23% +0.00% +0.15% / +0.08% +0.27% +0.37%] index_select linear : Elapsed 0.086 ms (8.569 ms / 100) 8.535 -> 8.545 ( +0.12%) [ +0.15% +0.00% +0.19% / +0.21% +0.12% +0.21%] index_select reverse : Elapsed 0.085 ms (8.548 ms / 100) 8.528 -> 8.524 ( -0.05%) [ +0.00% +0.14% +0.04% / -0.05% -0.01% -0.04%] index_select skip64 : Elapsed 0.085 ms (8.528 ms / 100) 8.543 -> 8.535 ( -0.09%) [ +0.00% +0.07% +0.07% / +0.05% -0.09% -0.08%] index_select skip256 : Elapsed 0.085 ms (8.543 ms / 100) 8.546 -> 8.529 ( -0.20%) [ +0.00% +0.15% +0.18% / -0.20% +0.18% +0.36%] index_select spread : Elapsed 0.085 ms (8.546 ms / 100) 8.561 -> 8.558 ( -0.04%) [ +0.00% +0.39% +0.25% / -0.04% +0.37% +0.02%] index_select strided 3 : Elapsed 0.086 ms (8.561 ms / 100) 8.555 -> 8.553 ( -0.02%) [ +0.02% +0.16% +0.00% / -0.02% +0.40% +0.02%] index_select random : Elapsed 0.086 ms (8.557 ms / 100) 8.543 -> 8.541 ( -0.02%) [ +0.25% +0.00% +0.07% / -0.02% +0.14% +0.42%] index_select random_sorted : Elapsed 0.086 ms (8.564 ms / 100) B = [4, 16, 20, 40] (stride (20, 3200, 1, 80)) A = [4, 16, 5, 40] (stride (1, 4, 2560, 64)) dim = 2 2.014 -> 2.007 ( -0.35%) [ +0.15% +0.30% +0.00% / +0.40% -0.35% -0.10%] index_add_ linear : Elapsed 0.020 ms (2.017 ms / 100) 1.977 -> 1.967 ( -0.51%) [ +0.10% +0.00% +0.10% / +0.00% -0.35% -0.51%] index_copy_ linear : Elapsed 0.020 ms (1.979 ms / 100) 2.017 -> 2.004 ( -0.64%) [ +0.25% +0.05% +0.00% / +0.05% -0.45% -0.64%] index_add_ reverse : Elapsed 0.020 ms (2.022 ms / 100) 1.972 -> 1.964 ( -0.41%) [ +0.05% +0.20% +0.00% / +0.05% -0.25% -0.41%] index_copy_ reverse : Elapsed 0.020 ms (1.973 ms / 100) 2.025 -> 2.020 ( -0.25%) [ +0.10% +0.15% +0.00% / +0.00% -0.25% +0.15%] index_add_ spread : Elapsed 0.020 ms (2.027 ms / 100) 1.994 -> 1.990 ( -0.20%) [ +0.00% +0.00% +0.05% / -0.10% -0.20% -0.20%] index_copy_ spread : Elapsed 0.020 ms (1.994 ms / 100) 2.033 -> 2.031 ( -0.10%) [ +0.00% +0.00% +0.05% / +0.05% -0.05% -0.10%] index_add_ strided 3 : Elapsed 0.020 ms (2.033 ms / 100) 1.997 -> 1.997 ( +0.00%) [ +0.00% +0.20% +0.15% / +0.25% +0.05% +0.00%] index_copy_ strided 3 : Elapsed 0.020 ms (1.997 ms / 100) 2.018 -> 2.022 ( +0.20%) [ +0.45% +0.00% +0.20% / +0.45% +0.20% +0.30%] index_add_ strided 7 : Elapsed 0.020 ms (2.027 ms / 100) 1.989 -> 1.987 ( -0.10%) [ +0.20% +0.05% +0.00% / +0.15% +0.00% -0.10%] index_copy_ strided 7 : Elapsed 0.020 ms (1.993 ms / 100) 2.023 -> 2.022 ( -0.05%) [ +0.35% +0.25% +0.00% / +0.25% -0.05% +0.00%] index_add_ perm : Elapsed 0.020 ms (2.030 ms / 100) 1.992 -> 1.990 ( -0.10%) [ +0.00% +0.20% +0.15% / +0.20% -0.10% -0.05%] index_copy_ perm : Elapsed 0.020 ms (1.992 ms / 100) 2.038 -> 2.023 ( -0.74%) [ +0.00% +0.10% +0.00% / +0.25% -0.74% -0.74%] index_add_ perm_sorted : Elapsed 0.020 ms (2.038 ms / 100) 1.999 -> 1.987 ( -0.60%) [ +0.10% +0.05% +0.00% / +0.10% -0.60% -0.40%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.001 ms / 100) 8.564 -> 8.573 ( +0.11%) [ +0.30% +0.26% +0.00% / +0.11% +0.50% +0.25%] index_select const : Elapsed 0.086 ms (8.590 ms / 100) 8.607 -> 8.608 ( +0.01%) [ +0.08% +0.01% +0.00% / +0.01% +0.10% +0.20%] index_select wrap : Elapsed 0.086 ms (8.614 ms / 100) 8.609 -> 8.600 ( -0.10%) [ +0.20% +0.00% +0.01% / -0.10% +0.21% +0.07%] index_select linear : Elapsed 0.086 ms (8.626 ms / 100) 8.585 -> 8.597 ( +0.14%) [ +0.23% +0.00% +0.19% / +0.14% +0.56% +0.38%] index_select reverse : Elapsed 0.086 ms (8.605 ms / 100) 8.565 -> 8.552 ( -0.15%) [ +0.05% +0.00% +0.12% / -0.15% +0.21% +0.26%] index_select skip64 : Elapsed 0.086 ms (8.569 ms / 100) 8.564 -> 8.563 ( -0.01%) [ +0.13% +0.12% +0.00% / -0.01% +0.37% +0.48%] index_select skip256 : Elapsed 0.086 ms (8.575 ms / 100) 8.595 -> 8.614 ( +0.22%) [ +0.00% +0.29% +0.17% / +0.24% +0.22% +0.30%] index_select spread : Elapsed 0.086 ms (8.595 ms / 100) 8.606 -> 8.613 ( +0.08%) [ +0.12% +0.20% +0.00% / +0.08% +0.30% +0.14%] index_select strided 3 : Elapsed 0.086 ms (8.616 ms / 100) 8.615 -> 8.602 ( -0.15%) [ +0.00% +0.05% +0.01% / -0.15% +0.35% +0.23%] index_select random : Elapsed 0.086 ms (8.615 ms / 100) 8.596 -> 8.602 ( +0.07%) [ +0.00% +0.52% +0.17% / +0.07% +0.72% +0.35%] index_select random_sorted : Elapsed 0.086 ms (8.596 ms / 100) B = [4, 16, 20, 40] (stride (20, 3200, 1, 80)) A = [4, 16, 5, 40] (stride (1, 20, 4, 320)) dim = 2 2.017 -> 2.014 ( -0.15%) [ +0.00% +0.15% +0.05% / +0.10% +0.05% -0.15%] index_add_ linear : Elapsed 0.020 ms (2.017 ms / 100) 1.968 -> 1.969 ( +0.05%) [ +0.20% +0.05% +0.00% / +0.10% +0.71% +0.05%] index_copy_ linear : Elapsed 0.020 ms (1.972 ms / 100) 2.022 -> 2.015 ( -0.35%) [ +0.20% +0.00% +0.05% / +0.05% -0.35% -0.05%] index_add_ reverse : Elapsed 0.020 ms (2.026 ms / 100) 1.971 -> 1.970 ( -0.05%) [ +0.05% +0.00% +0.46% / +0.10% -0.05% +0.05%] index_copy_ reverse : Elapsed 0.020 ms (1.972 ms / 100) 2.038 -> 2.037 ( -0.05%) [ +0.10% +0.25% +0.00% / -0.05% -0.05% +0.00%] index_add_ spread : Elapsed 0.020 ms (2.040 ms / 100) 1.999 -> 1.992 ( -0.35%) [ +0.05% +0.05% +0.00% / +0.35% -0.05% -0.35%] index_copy_ spread : Elapsed 0.020 ms (2.000 ms / 100) 2.037 -> 2.033 ( -0.20%) [ +0.00% +0.05% +0.10% / +0.10% -0.20% +0.00%] index_add_ strided 3 : Elapsed 0.020 ms (2.037 ms / 100) 2.001 -> 1.996 ( -0.25%) [ +0.00% +0.10% +0.00% / -0.15% -0.25% -0.20%] index_copy_ strided 3 : Elapsed 0.020 ms (2.001 ms / 100) 2.033 -> 2.030 ( -0.15%) [ +0.10% +0.39% +0.00% / +0.34% -0.10% -0.15%] index_add_ strided 7 : Elapsed 0.020 ms (2.035 ms / 100) 1.997 -> 1.995 ( -0.10%) [ +0.25% +0.35% +0.00% / +0.00% +0.05% -0.10%] index_copy_ strided 7 : Elapsed 0.020 ms (2.002 ms / 100) 2.035 -> 2.029 ( -0.29%) [ +0.00% +0.05% +0.25% / +0.39% -0.29% -0.05%] index_add_ perm : Elapsed 0.020 ms (2.035 ms / 100) 1.995 -> 1.994 ( -0.05%) [ +0.05% +0.10% +0.00% / +0.25% +0.25% -0.05%] index_copy_ perm : Elapsed 0.020 ms (1.996 ms / 100) 2.035 -> 2.029 ( -0.29%) [ +0.00% +0.29% +0.05% / +0.15% -0.15% -0.29%] index_add_ perm_sorted : Elapsed 0.020 ms (2.035 ms / 100) 1.997 -> 1.995 ( -0.10%) [ +0.00% +0.00% +0.25% / -0.10% +0.00% +0.00%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.997 ms / 100) 8.602 -> 8.605 ( +0.03%) [ +0.08% +0.20% +0.00% / +0.03% +0.65% +0.10%] index_select const : Elapsed 0.086 ms (8.609 ms / 100) 8.606 -> 8.624 ( +0.21%) [ +0.16% +0.33% +0.00% / +0.21% +0.65% +0.24%] index_select wrap : Elapsed 0.086 ms (8.620 ms / 100) 8.609 -> 8.613 ( +0.05%) [ +0.07% +0.13% +0.00% / +0.05% +0.43% +0.14%] index_select linear : Elapsed 0.086 ms (8.615 ms / 100) 8.615 -> 8.605 ( -0.12%) [ +0.07% +0.00% +0.09% / -0.12% +0.30% +0.31%] index_select reverse : Elapsed 0.086 ms (8.621 ms / 100) 8.608 -> 8.598 ( -0.12%) [ +0.03% +0.00% +0.08% / -0.12% +0.24% +0.29%] index_select skip64 : Elapsed 0.086 ms (8.611 ms / 100) 8.595 -> 8.603 ( +0.09%) [ +0.36% +0.20% +0.00% / +0.09% +0.65% +0.34%] index_select skip256 : Elapsed 0.086 ms (8.626 ms / 100) 8.629 -> 8.633 ( +0.05%) [ +0.14% +0.00% +0.13% / +0.05% +0.19% +0.10%] index_select spread : Elapsed 0.086 ms (8.641 ms / 100) 8.613 -> 8.634 ( +0.24%) [ +0.26% +0.13% +0.00% / +0.24% +0.26% +0.35%] index_select strided 3 : Elapsed 0.086 ms (8.635 ms / 100) 8.611 -> 8.625 ( +0.16%) [ +0.07% +0.01% +0.00% / +0.16% +0.38% +0.51%] index_select random : Elapsed 0.086 ms (8.617 ms / 100) 8.625 -> 8.624 ( -0.01%) [ +0.02% +0.00% +0.13% / -0.01% +0.21% +0.35%] index_select random_sorted : Elapsed 0.086 ms (8.627 ms / 100) B = [4, 16, 20, 40] (stride (1, 4, 2560, 64)) A = [4, 16, 5, 40] (stride (16, 1, 2560, 64)) dim = 2 1.829 -> 1.832 ( +0.16%) [ +0.05% +0.00% +0.05% / +0.16% +0.60% +0.55%] index_add_ linear : Elapsed 0.018 ms (1.830 ms / 100) 1.780 -> 1.781 ( +0.06%) [ +0.00% +0.11% +0.11% / +0.06% +0.62% +0.62%] index_copy_ linear : Elapsed 0.018 ms (1.780 ms / 100) 1.830 -> 1.830 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.00% +0.55% +0.27%] index_add_ reverse : Elapsed 0.018 ms (1.832 ms / 100) 1.783 -> 1.783 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.62% +0.17%] index_copy_ reverse : Elapsed 0.018 ms (1.783 ms / 100) 1.814 -> 1.819 ( +0.28%) [ +0.06% +0.22% +0.00% / +0.28% +0.99% +0.99%] index_add_ spread : Elapsed 0.018 ms (1.815 ms / 100) 1.768 -> 1.772 ( +0.23%) [ +0.11% +0.23% +0.00% / +0.23% +0.96% +0.96%] index_copy_ spread : Elapsed 0.018 ms (1.770 ms / 100) 1.823 -> 1.827 ( +0.22%) [ +0.16% +0.00% +0.27% / +0.22% +1.43% +1.43%] index_add_ strided 3 : Elapsed 0.018 ms (1.826 ms / 100) 1.769 -> 1.774 ( +0.28%) [ +0.40% +0.45% +0.00% / +0.28% +1.98% +1.81%] index_copy_ strided 3 : Elapsed 0.018 ms (1.776 ms / 100) 1.827 -> 1.832 ( +0.27%) [ +0.00% +0.16% +0.11% / +0.27% +0.71% +0.49%] index_add_ strided 7 : Elapsed 0.018 ms (1.827 ms / 100) 1.777 -> 1.780 ( +0.17%) [ +0.00% +0.28% +0.00% / +0.17% +1.01% +0.73%] index_copy_ strided 7 : Elapsed 0.018 ms (1.777 ms / 100) 1.818 -> 1.820 ( +0.11%) [ +0.00% +0.17% +0.11% / +0.11% +0.99% +0.83%] index_add_ perm : Elapsed 0.018 ms (1.818 ms / 100) 1.770 -> 1.767 ( -0.17%) [ +0.17% +0.00% +0.00% / -0.17% +1.47% +1.02%] index_copy_ perm : Elapsed 0.018 ms (1.773 ms / 100) 1.821 -> 1.822 ( +0.05%) [ +0.00% +0.00% +0.27% / +0.05% +1.10% +0.93%] index_add_ perm_sorted : Elapsed 0.018 ms (1.821 ms / 100) 1.771 -> 1.774 ( +0.17%) [ +0.00% +0.06% +0.17% / +0.17% +1.30% +0.96%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.771 ms / 100) 8.207 -> 8.204 ( -0.04%) [ +0.01% +0.07% +0.00% / -0.04% +0.16% +0.26%] index_select const : Elapsed 0.082 ms (8.208 ms / 100) 8.245 -> 8.248 ( +0.04%) [ +0.00% +0.10% +0.06% / +0.07% +0.07% +0.04%] index_select wrap : Elapsed 0.082 ms (8.245 ms / 100) 8.245 -> 8.248 ( +0.04%) [ +0.01% +0.30% +0.00% / +0.04% +0.07% +0.18%] index_select linear : Elapsed 0.082 ms (8.246 ms / 100) 8.217 -> 8.234 ( +0.21%) [ +0.23% +0.16% +0.00% / +0.22% +0.27% +0.21%] index_select reverse : Elapsed 0.082 ms (8.236 ms / 100) 8.218 -> 8.220 ( +0.02%) [ +0.18% +0.00% +0.02% / +0.02% +0.04% +0.18%] index_select skip64 : Elapsed 0.082 ms (8.233 ms / 100) 8.202 -> 8.206 ( +0.05%) [ +0.15% +0.00% +0.15% / +0.05% +0.50% +0.37%] index_select skip256 : Elapsed 0.082 ms (8.214 ms / 100) 8.225 -> 8.243 ( +0.22%) [ +0.43% +0.35% +0.00% / +0.22% +0.30% +0.39%] index_select spread : Elapsed 0.083 ms (8.260 ms / 100) 8.241 -> 8.246 ( +0.06%) [ +0.24% +0.25% +0.00% / +0.06% +0.07% +0.19%] index_select strided 3 : Elapsed 0.083 ms (8.261 ms / 100) 8.240 -> 8.248 ( +0.10%) [ +0.07% +0.00% +0.04% / +0.11% +0.10% +0.11%] index_select random : Elapsed 0.082 ms (8.246 ms / 100) 8.238 -> 8.246 ( +0.10%) [ +0.05% +0.06% +0.00% / +0.10% +0.17% +0.32%] index_select random_sorted : Elapsed 0.082 ms (8.242 ms / 100) B = [4, 16, 20, 40] (stride (16, 1, 64, 1280)) A = [4, 16, 5, 40] (stride (3200, 1, 640, 16)) dim = 2 1.759 -> 1.760 ( +0.06%) [ +0.06% +0.00% +0.17% / +0.23% +0.06% +0.45%] index_add_ linear : Elapsed 0.018 ms (1.760 ms / 100) 1.716 -> 1.714 ( -0.12%) [ +0.06% +0.06% +0.00% / +0.17% -0.12% -0.06%] index_copy_ linear : Elapsed 0.017 ms (1.717 ms / 100) 1.761 -> 1.758 ( -0.17%) [ +0.06% +0.00% +0.11% / +0.11% +0.00% -0.17%] index_add_ reverse : Elapsed 0.018 ms (1.762 ms / 100) 1.717 -> 1.711 ( -0.35%) [ +0.00% +0.12% +0.00% / +0.06% -0.35% -0.35%] index_copy_ reverse : Elapsed 0.017 ms (1.717 ms / 100) 1.759 -> 1.760 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.57% +0.91%] index_add_ spread : Elapsed 0.018 ms (1.759 ms / 100) 1.722 -> 1.724 ( +0.12%) [ +0.12% +0.29% +0.00% / +0.12% +0.41% +0.58%] index_copy_ spread : Elapsed 0.017 ms (1.724 ms / 100) 1.751 -> 1.758 ( +0.40%) [ +0.23% +0.29% +0.00% / +0.40% +1.09% +1.60%] index_add_ strided 3 : Elapsed 0.018 ms (1.755 ms / 100) 1.717 -> 1.715 ( -0.12%) [ +0.12% +0.12% +0.00% / -0.12% +0.58% +0.58%] index_copy_ strided 3 : Elapsed 0.017 ms (1.719 ms / 100) 1.760 -> 1.763 ( +0.17%) [ +0.11% +0.11% +0.00% / +0.17% +0.85% +0.91%] index_add_ strided 7 : Elapsed 0.018 ms (1.762 ms / 100) 1.718 -> 1.718 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.47% +0.35%] index_copy_ strided 7 : Elapsed 0.017 ms (1.719 ms / 100) 1.759 -> 1.758 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.97% +0.80%] index_add_ perm : Elapsed 0.018 ms (1.760 ms / 100) 1.717 -> 1.717 ( +0.00%) [ +0.00% +0.23% +0.06% / +0.00% +0.47% +0.47%] index_copy_ perm : Elapsed 0.017 ms (1.717 ms / 100) 1.754 -> 1.758 ( +0.23%) [ +0.00% +0.06% +0.06% / +0.23% +1.14% +1.08%] index_add_ perm_sorted : Elapsed 0.018 ms (1.754 ms / 100) 1.717 -> 1.716 ( -0.06%) [ +0.06% +0.00% +0.29% / -0.06% +0.99% +0.93%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.718 ms / 100) 8.191 -> 8.197 ( +0.07%) [ +0.20% +0.00% +0.21% / +0.24% +0.07% +0.28%] index_select const : Elapsed 0.082 ms (8.207 ms / 100) 8.227 -> 8.224 ( -0.04%) [ +0.13% +0.00% +0.07% / +0.18% +0.32% -0.04%] index_select wrap : Elapsed 0.082 ms (8.238 ms / 100) 8.228 -> 8.224 ( -0.05%) [ +0.06% +0.00% +0.22% / +0.01% -0.05% +0.10%] index_select linear : Elapsed 0.082 ms (8.233 ms / 100) 8.207 -> 8.199 ( -0.10%) [ +0.01% +0.00% +0.37% / +0.33% +0.29% -0.10%] index_select reverse : Elapsed 0.082 ms (8.208 ms / 100) 8.185 -> 8.194 ( +0.11%) [ +0.39% +0.00% +0.10% / +0.11% +0.29% +0.23%] index_select skip64 : Elapsed 0.082 ms (8.217 ms / 100) 8.194 -> 8.203 ( +0.11%) [ +0.07% +0.05% +0.00% / +0.24% +0.11% +0.12%] index_select skip256 : Elapsed 0.082 ms (8.200 ms / 100) 8.198 -> 8.221 ( +0.28%) [ +0.00% +0.32% +0.24% / +0.28% +0.52% +0.38%] index_select spread : Elapsed 0.082 ms (8.198 ms / 100) 8.239 -> 8.221 ( -0.22%) [ +0.00% +0.01% +0.18% / -0.12% -0.15% -0.22%] index_select strided 3 : Elapsed 0.082 ms (8.239 ms / 100) 8.228 -> 8.224 ( -0.05%) [ +0.00% +0.01% +0.09% / +0.01% +0.05% -0.05%] index_select random : Elapsed 0.082 ms (8.228 ms / 100) 8.217 -> 8.222 ( +0.06%) [ +0.21% +0.21% +0.00% / +0.17% +0.06% +0.19%] index_select random_sorted : Elapsed 0.082 ms (8.234 ms / 100) out_shape = [4, 16, 5, 20] in_shape = [4, 16, 5, 40] idx_dim = 3 B = [4, 16, 5, 20] (stride (1600, 5, 1, 80)) A = [4, 16, 5, 40] (stride (1, 4, 2560, 64)) dim = 3 2.390 -> 2.392 ( +0.08%) [ +0.29% +0.21% +0.00% / +0.08% +0.46% +0.38%] index_select const : Elapsed 0.024 ms (2.397 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.12% +0.00% +0.17% / +0.17% +0.12% +0.21%] index_select wrap : Elapsed 0.024 ms (2.406 ms / 100) 2.406 -> 2.408 ( +0.08%) [ +0.00% +0.17% +0.17% / +0.08% +0.08% +0.08%] index_select linear : Elapsed 0.024 ms (2.406 ms / 100) 2.404 -> 2.403 ( -0.04%) [ +0.08% +0.29% +0.00% / +0.08% -0.04% +0.04%] index_select reverse : Elapsed 0.024 ms (2.406 ms / 100) 2.392 -> 2.396 ( +0.17%) [ +0.00% +0.25% +0.08% / +0.25% +0.17% +0.17%] index_select skip64 : Elapsed 0.024 ms (2.392 ms / 100) 2.393 -> 2.395 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.13% +0.08% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.395 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.04% +0.12% +0.00% / +0.00% +0.00% -0.08%] index_select spread : Elapsed 0.024 ms (2.406 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.08% +0.00% +0.25% / +0.12% +0.25% +0.17%] index_select strided 3 : Elapsed 0.024 ms (2.405 ms / 100) 2.402 -> 2.400 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% -0.08% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.403 ms / 100) 2.403 -> 2.403 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.25% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.393 -> 2.398 ( +0.21%) [ +0.00% +0.08% +0.08% / +0.21% +0.29% +0.38%] index_select strided 8 : Elapsed 0.024 ms (2.393 ms / 100) 2.396 -> 2.399 ( +0.13%) [ +0.04% +0.00% +0.00% / +0.13% +0.21% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.397 ms / 100) 2.399 -> 2.404 ( +0.21%) [ +0.21% +0.04% +0.00% / +0.21% +0.21% +0.25%] index_select random : Elapsed 0.024 ms (2.404 ms / 100) 2.401 -> 2.400 ( -0.04%) [ +0.08% +0.00% +0.17% / +0.08% -0.04% +0.04%] index_select random_sorted : Elapsed 0.024 ms (2.403 ms / 100) 2.404 -> 2.405 ( +0.04%) [ +0.00% +0.17% +0.12% / +0.12% +0.08% +0.04%] index_select perm : Elapsed 0.024 ms (2.404 ms / 100) 2.409 -> 2.396 ( -0.54%) [ +0.08% +0.00% +0.04% / +0.00% -0.54% -0.54%] index_select perm_sorted : Elapsed 0.024 ms (2.411 ms / 100) B = [4, 16, 5, 20] (stride (1, 400, 80, 4)) A = [4, 16, 5, 40] (stride (3200, 40, 640, 1)) dim = 3 2.397 -> 2.399 ( +0.08%) [ +0.13% +0.00% +0.13% / +0.08% +0.42% +0.21%] index_select const : Elapsed 0.024 ms (2.400 ms / 100) 2.409 -> 2.406 ( -0.12%) [ +0.21% +0.00% +0.12% / +0.21% -0.12% -0.12%] index_select wrap : Elapsed 0.024 ms (2.414 ms / 100) 2.409 -> 2.405 ( -0.17%) [ +0.08% +0.00% +0.21% / +0.00% -0.17% -0.08%] index_select linear : Elapsed 0.024 ms (2.411 ms / 100) 2.408 -> 2.407 ( -0.04%) [ +0.04% +0.21% +0.00% / -0.04% +0.08% +0.08%] index_select reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.399 -> 2.402 ( +0.13%) [ +0.00% +0.08% +0.13% / +0.38% +0.25% +0.13%] index_select skip64 : Elapsed 0.024 ms (2.399 ms / 100) 2.398 -> 2.400 ( +0.08%) [ +0.21% +0.00% +0.08% / +0.08% +0.42% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.403 ms / 100) 2.410 -> 2.409 ( -0.04%) [ +0.00% +0.00% +0.17% / -0.04% +0.12% +0.25%] index_select spread : Elapsed 0.024 ms (2.410 ms / 100) 2.412 -> 2.412 ( +0.00%) [ +0.17% +0.04% +0.00% / +0.00% +0.17% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.416 ms / 100) 2.407 -> 2.408 ( +0.04%) [ +0.00% +0.33% +0.08% / +0.04% +0.46% +0.29%] index_select strided 5 : Elapsed 0.024 ms (2.407 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.04% +0.00% +0.17% / +0.08% +0.17% +0.00%] index_select strided 7 : Elapsed 0.024 ms (2.415 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.00% +0.17% +0.04% / +0.08% +0.29% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.413 ms / 100) 2.415 -> 2.413 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.33% +0.41%] index_select strided 16 : Elapsed 0.024 ms (2.415 ms / 100) 2.410 -> 2.414 ( +0.17%) [ +0.25% +0.08% +0.00% / +0.17% +0.46% +0.46%] index_select random : Elapsed 0.024 ms (2.416 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.25% +0.21% +0.00% / +0.08% +0.17% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.416 -> 2.414 ( -0.08%) [ +0.17% +0.12% +0.00% / -0.08% +0.17% +0.04%] index_select perm : Elapsed 0.024 ms (2.420 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.17% +0.04% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) B = [4, 16, 5, 20] (stride (1, 400, 4, 20)) A = [4, 16, 5, 40] (stride (1, 160, 2560, 4)) dim = 3 2.450 -> 2.451 ( +0.04%) [ +0.00% +0.12% +0.04% / +0.08% +0.04% +0.53%] index_select const : Elapsed 0.024 ms (2.450 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.12% +0.00% +0.08% / -0.04% +0.08% +0.08%] index_select wrap : Elapsed 0.025 ms (2.468 ms / 100) 2.463 -> 2.462 ( -0.04%) [ +0.00% +0.04% +0.08% / +0.12% -0.04% +0.00%] index_select linear : Elapsed 0.025 ms (2.463 ms / 100) 2.465 -> 2.460 ( -0.20%) [ +0.00% +0.16% +0.04% / -0.08% -0.16% -0.20%] index_select reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.454 -> 2.452 ( -0.08%) [ +0.04% +0.12% +0.00% / -0.08% -0.04% -0.04%] index_select skip64 : Elapsed 0.025 ms (2.455 ms / 100) 2.449 -> 2.452 ( +0.12%) [ +0.00% +0.16% +0.16% / +0.20% +0.12% +0.37%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.471 -> 2.473 ( +0.08%) [ +0.00% +0.36% +0.20% / +0.08% +0.24% +0.20%] index_select spread : Elapsed 0.025 ms (2.471 ms / 100) 2.473 -> 2.470 ( -0.12%) [ +0.00% +0.16% +0.00% / +0.00% -0.12% -0.04%] index_select strided 3 : Elapsed 0.025 ms (2.473 ms / 100) 2.465 -> 2.461 ( -0.16%) [ +0.04% +0.00% +0.00% / -0.08% +0.04% -0.16%] index_select strided 5 : Elapsed 0.025 ms (2.466 ms / 100) 2.468 -> 2.474 ( +0.24%) [ +0.28% +0.00% +0.12% / +0.28% +0.24% +0.24%] index_select strided 7 : Elapsed 0.025 ms (2.475 ms / 100) 2.456 -> 2.460 ( +0.16%) [ +0.04% +0.00% +0.04% / +0.16% +0.29% +0.16%] index_select strided 8 : Elapsed 0.025 ms (2.457 ms / 100) 2.455 -> 2.458 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.12% +0.24% +0.20%] index_select strided 16 : Elapsed 0.025 ms (2.455 ms / 100) 2.464 -> 2.467 ( +0.12%) [ +0.24% +0.00% +0.28% / +0.37% +0.12% +0.16%] index_select random : Elapsed 0.025 ms (2.470 ms / 100) 2.469 -> 2.470 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.08% +0.20% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.468 -> 2.468 ( +0.00%) [ +0.04% +0.00% +0.12% / +0.08% +0.12% +0.00%] index_select perm : Elapsed 0.025 ms (2.469 ms / 100) 2.469 -> 2.465 ( -0.16%) [ +0.16% +0.20% +0.00% / +0.08% -0.16% -0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.473 ms / 100) B = [4, 16, 5, 20] (stride (1, 80, 1280, 4)) A = [4, 16, 5, 40] (stride (640, 40, 2560, 1)) dim = 3 2.409 -> 2.416 ( +0.29%) [ +0.08% +0.29% +0.00% / +0.29% +0.29% +0.42%] index_select const : Elapsed 0.024 ms (2.411 ms / 100) 2.419 -> 2.413 ( -0.25%) [ +0.00% +0.25% +0.21% / +0.29% -0.25% -0.12%] index_select wrap : Elapsed 0.024 ms (2.419 ms / 100) 2.419 -> 2.415 ( -0.17%) [ +0.12% +0.00% +0.04% / +0.21% -0.12% -0.17%] index_select linear : Elapsed 0.024 ms (2.422 ms / 100) 2.420 -> 2.420 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.04% +0.04%] index_select reverse : Elapsed 0.024 ms (2.421 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.00% +0.21% +0.17% / +0.08% +0.12% +0.29%] index_select skip64 : Elapsed 0.024 ms (2.410 ms / 100) 2.410 -> 2.414 ( +0.17%) [ +0.00% +0.08% +0.21% / +0.17% +0.29% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.410 ms / 100) 2.421 -> 2.424 ( +0.12%) [ +0.21% +0.00% +0.17% / +0.12% +0.29% +0.45%] index_select spread : Elapsed 0.024 ms (2.426 ms / 100) 2.423 -> 2.428 ( +0.21%) [ +0.08% +0.08% +0.00% / +0.21% +0.33% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.425 ms / 100) 2.421 -> 2.422 ( +0.04%) [ +0.21% +0.00% +0.04% / +0.04% +0.33% +0.33%] index_select strided 5 : Elapsed 0.024 ms (2.426 ms / 100) 2.425 -> 2.428 ( +0.12%) [ +0.00% +0.00% +0.04% / +0.12% +0.25% +0.16%] index_select strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.427 -> 2.428 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.08% +0.04% +0.16%] index_select strided 8 : Elapsed 0.024 ms (2.429 ms / 100) 2.425 -> 2.424 ( -0.04%) [ +0.16% +0.00% +0.04% / -0.04% +0.16% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.429 ms / 100) 2.426 -> 2.427 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.29% +0.29%] index_select random : Elapsed 0.024 ms (2.426 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.12% +0.17% +0.00% / +0.12% +0.25% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.427 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.29% +0.04% +0.00% / +0.12% +0.21% +0.08%] index_select perm : Elapsed 0.024 ms (2.430 ms / 100) 2.422 -> 2.422 ( +0.00%) [ +0.29% +0.08% +0.00% / +0.00% +0.33% +0.29%] index_select perm_sorted : Elapsed 0.024 ms (2.429 ms / 100) out_shape = [20, 16, 40, 5] in_shape = [4, 16, 40, 5] idx_dim = 0 B = [20, 16, 40, 5] (stride (3200, 5, 80, 1)) A = [4, 16, 40, 5] (stride (1, 4, 320, 64)) dim = 0 2.143 -> 2.143 ( +0.00%) [ +0.33% +0.28% +0.00% / +0.00% +0.33% +0.19%] index_add_ linear : Elapsed 0.022 ms (2.150 ms / 100) 2.074 -> 2.079 ( +0.24%) [ +0.00% +0.19% +0.00% / +0.24% +0.39% +0.24%] index_copy_ linear : Elapsed 0.021 ms (2.074 ms / 100) 2.141 -> 2.143 ( +0.09%) [ +0.14% +0.19% +0.00% / +0.09% +0.51% +0.47%] index_add_ reverse : Elapsed 0.021 ms (2.144 ms / 100) 2.076 -> 2.072 ( -0.19%) [ +0.05% +0.05% +0.00% / -0.19% +0.39% +0.39%] index_copy_ reverse : Elapsed 0.021 ms (2.077 ms / 100) 2.148 -> 2.150 ( +0.09%) [ +0.00% +0.05% +0.09% / +0.09% +0.23% +0.19%] index_add_ spread : Elapsed 0.021 ms (2.148 ms / 100) 2.078 -> 2.081 ( +0.14%) [ +0.10% +0.14% +0.00% / +0.14% +0.34% +0.38%] index_copy_ spread : Elapsed 0.021 ms (2.080 ms / 100) 2.144 -> 2.146 ( +0.09%) [ +0.19% +0.00% +0.14% / +0.09% +0.51% +0.28%] index_add_ strided 3 : Elapsed 0.021 ms (2.148 ms / 100) 2.079 -> 2.079 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.24% +0.14%] index_copy_ strided 3 : Elapsed 0.021 ms (2.079 ms / 100) 2.143 -> 2.144 ( +0.05%) [ +0.19% +0.09% +0.00% / +0.05% +0.42% +0.33%] index_add_ strided 7 : Elapsed 0.021 ms (2.147 ms / 100) 2.076 -> 2.077 ( +0.05%) [ +0.14% +0.05% +0.00% / +0.05% +0.29% +0.29%] index_copy_ strided 7 : Elapsed 0.021 ms (2.079 ms / 100) 2.141 -> 2.140 ( -0.05%) [ +0.14% +0.33% +0.00% / -0.05% +0.05% +0.23%] index_add_ perm : Elapsed 0.021 ms (2.144 ms / 100) 2.072 -> 2.070 ( -0.10%) [ +0.14% +0.00% +0.14% / -0.10% +0.19% +0.29%] index_copy_ perm : Elapsed 0.021 ms (2.075 ms / 100) 2.144 -> 2.141 ( -0.14%) [ +0.00% +0.14% +0.00% / -0.14% +0.05% +0.00%] index_add_ perm_sorted : Elapsed 0.021 ms (2.144 ms / 100) 2.078 -> 2.075 ( -0.14%) [ +0.00% +0.05% +0.24% / -0.14% +0.14% +0.34%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.078 ms / 100) 8.779 -> 8.790 ( +0.13%) [ +0.00% +0.08% +0.31% / +0.13% +0.25% +0.28%] index_select const : Elapsed 0.088 ms (8.779 ms / 100) 8.781 -> 8.793 ( +0.14%) [ +0.00% +0.05% +0.20% / +0.14% +0.24% +0.20%] index_select wrap : Elapsed 0.088 ms (8.781 ms / 100) 8.778 -> 8.788 ( +0.11%) [ +0.07% +0.07% +0.00% / +0.11% +0.40% +0.36%] index_select linear : Elapsed 0.088 ms (8.784 ms / 100) 8.768 -> 8.786 ( +0.21%) [ +0.00% +0.17% +0.08% / +0.21% +0.21% +0.31%] index_select reverse : Elapsed 0.088 ms (8.768 ms / 100) 8.759 -> 8.783 ( +0.27%) [ +0.30% +0.38% +0.00% / +0.27% +0.27% +0.30%] index_select skip64 : Elapsed 0.088 ms (8.785 ms / 100) 8.782 -> 8.784 ( +0.02%) [ +0.00% +0.09% +0.02% / +0.40% +0.18% +0.02%] index_select skip256 : Elapsed 0.088 ms (8.782 ms / 100) 8.775 -> 8.787 ( +0.14%) [ +0.24% +0.23% +0.00% / +0.14% +0.22% +0.14%] index_select spread : Elapsed 0.088 ms (8.796 ms / 100) 8.785 -> 8.785 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.02% +0.09% +0.00%] index_select strided 3 : Elapsed 0.088 ms (8.787 ms / 100) 8.770 -> 8.770 ( +0.00%) [ +0.15% +0.09% +0.00% / +0.00% +0.33% +0.22%] index_select random : Elapsed 0.088 ms (8.783 ms / 100) 8.779 -> 8.770 ( -0.10%) [ +0.00% +0.13% +0.15% / -0.10% +0.16% +0.21%] index_select random_sorted : Elapsed 0.088 ms (8.779 ms / 100) B = [20, 16, 40, 5] (stride (200, 4000, 5, 1)) A = [4, 16, 40, 5] (stride (3200, 40, 1, 640)) dim = 0 0.765 -> 0.769 ( +0.52%) [ +0.26% +0.39% +0.00% / +0.52% +3.40% +3.79%] index_add_ linear : Elapsed 0.008 ms (0.767 ms / 100) 0.749 -> 0.751 ( +0.27%) [ +0.27% +1.34% +0.00% / +0.27% +3.07% +2.67%] index_copy_ linear : Elapsed 0.008 ms (0.751 ms / 100) 0.771 -> 0.772 ( +0.13%) [ +0.26% +0.52% +0.00% / +0.13% +3.24% +2.20%] index_add_ reverse : Elapsed 0.008 ms (0.773 ms / 100) 0.760 -> 0.763 ( +0.39%) [ +0.39% +0.00% +0.53% / +0.39% +2.63% +1.18%] index_copy_ reverse : Elapsed 0.008 ms (0.763 ms / 100) 0.751 -> 0.754 ( +0.40%) [ +0.53% +0.67% +0.00% / +0.40% +2.53% +2.53%] index_add_ spread : Elapsed 0.008 ms (0.755 ms / 100) 0.736 -> 0.737 ( +0.14%) [ +0.00% +0.14% +0.14% / +0.14% +2.45% +2.31%] index_copy_ spread : Elapsed 0.007 ms (0.736 ms / 100) 0.776 -> 0.777 ( +0.13%) [ +0.90% +0.00% +0.00% / +0.26% +0.13% +0.64%] index_add_ strided 3 : Elapsed 0.008 ms (0.783 ms / 100) 0.759 -> 0.764 ( +0.66%) [ +0.92% +0.66% +0.00% / +0.66% +1.45% +1.32%] index_copy_ strided 3 : Elapsed 0.008 ms (0.766 ms / 100) 0.754 -> 0.759 ( +0.66%) [ +0.00% +0.40% +0.27% / +0.66% +2.39% +2.65%] index_add_ strided 7 : Elapsed 0.008 ms (0.754 ms / 100) 0.736 -> 0.740 ( +0.54%) [ +0.41% +0.54% +0.00% / +0.54% +3.53% +3.13%] index_copy_ strided 7 : Elapsed 0.007 ms (0.739 ms / 100) 0.747 -> 0.751 ( +0.54%) [ +0.00% +0.40% +0.13% / +0.54% +3.48% +2.95%] index_add_ perm : Elapsed 0.007 ms (0.747 ms / 100) 0.731 -> 0.736 ( +0.68%) [ +0.00% +0.55% +0.68% / +0.68% +2.74% +2.87%] index_copy_ perm : Elapsed 0.007 ms (0.731 ms / 100) 0.749 -> 0.752 ( +0.40%) [ +0.00% +0.27% +0.13% / +0.40% +2.27% +2.80%] index_add_ perm_sorted : Elapsed 0.007 ms (0.749 ms / 100) 0.735 -> 0.736 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +2.18% +2.18%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.735 ms / 100) 4.981 -> 4.974 ( -0.14%) [ +0.30% +0.00% +0.14% / +0.18% +0.14% -0.14%] index_select const : Elapsed 0.050 ms (4.996 ms / 100) 5.015 -> 5.017 ( +0.04%) [ +0.00% +0.14% +0.40% / +0.04% +0.28% +0.24%] index_select wrap : Elapsed 0.050 ms (5.015 ms / 100) 5.014 -> 5.013 ( -0.02%) [ +0.20% +0.00% +0.16% / -0.02% +0.22% +0.18%] index_select linear : Elapsed 0.050 ms (5.024 ms / 100) 5.009 -> 5.012 ( +0.06%) [ +0.00% +0.08% +0.10% / +0.06% +0.24% +0.14%] index_select reverse : Elapsed 0.050 ms (5.009 ms / 100) 4.991 -> 4.980 ( -0.22%) [ +0.14% +0.00% +0.08% / +0.02% -0.12% -0.22%] index_select skip64 : Elapsed 0.050 ms (4.998 ms / 100) 4.976 -> 4.978 ( +0.04%) [ +0.22% +0.00% +0.26% / +0.04% +0.06% +0.08%] index_select skip256 : Elapsed 0.050 ms (4.987 ms / 100) 5.014 -> 5.024 ( +0.20%) [ +0.18% +0.00% +0.16% / +0.20% +0.68% +0.50%] index_select spread : Elapsed 0.050 ms (5.023 ms / 100) 5.019 -> 5.019 ( +0.00%) [ +0.06% +0.00% +0.22% / +0.00% +0.14% +0.22%] index_select strided 3 : Elapsed 0.050 ms (5.022 ms / 100) 5.030 -> 5.026 ( -0.08%) [ +0.02% +0.00% +0.14% / -0.08% +0.30% -0.02%] index_select random : Elapsed 0.050 ms (5.031 ms / 100) 5.016 -> 5.019 ( +0.06%) [ +0.00% +0.04% +0.42% / +0.06% +0.58% +0.30%] index_select random_sorted : Elapsed 0.050 ms (5.016 ms / 100) B = [20, 16, 40, 5] (stride (1, 20, 320, 12800)) A = [4, 16, 40, 5] (stride (16, 1, 64, 2560)) dim = 0 2.261 -> 2.267 ( +0.27%) [ +0.00% +0.00% +0.22% / +0.27% +0.88% +0.71%] index_add_ linear : Elapsed 0.023 ms (2.261 ms / 100) 2.215 -> 2.216 ( +0.05%) [ +0.00% +0.27% +0.18% / +0.05% +0.59% +0.59%] index_copy_ linear : Elapsed 0.022 ms (2.215 ms / 100) 2.264 -> 2.267 ( +0.13%) [ +0.18% +0.00% +0.00% / +0.13% +0.88% +0.88%] index_add_ reverse : Elapsed 0.023 ms (2.268 ms / 100) 2.215 -> 2.218 ( +0.14%) [ +0.18% +0.00% +0.45% / +0.14% +0.86% +0.68%] index_copy_ reverse : Elapsed 0.022 ms (2.219 ms / 100) 2.302 -> 2.300 ( -0.09%) [ +0.30% +0.00% +0.22% / -0.09% +0.87% +1.00%] index_add_ spread : Elapsed 0.023 ms (2.309 ms / 100) 2.311 -> 2.310 ( -0.04%) [ +0.43% +0.00% +0.00% / -0.04% +1.30% +1.25%] index_copy_ spread : Elapsed 0.023 ms (2.321 ms / 100) 2.293 -> 2.293 ( +0.00%) [ +0.00% +0.22% +0.22% / +0.00% +0.61% +0.61%] index_add_ strided 3 : Elapsed 0.023 ms (2.293 ms / 100) 2.283 -> 2.284 ( +0.04%) [ +0.13% +0.09% +0.00% / +0.04% +0.66% +0.79%] index_copy_ strided 3 : Elapsed 0.023 ms (2.286 ms / 100) 2.300 -> 2.308 ( +0.35%) [ +0.22% +0.09% +0.00% / +0.35% +0.74% +0.74%] index_add_ strided 7 : Elapsed 0.023 ms (2.305 ms / 100) 2.319 -> 2.316 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.78% +0.56%] index_copy_ strided 7 : Elapsed 0.023 ms (2.319 ms / 100) 2.297 -> 2.301 ( +0.17%) [ +0.00% +0.17% +0.09% / +0.17% +0.52% +0.52%] index_add_ perm : Elapsed 0.023 ms (2.297 ms / 100) 2.308 -> 2.313 ( +0.22%) [ +0.13% +0.17% +0.00% / +0.22% +1.04% +1.00%] index_copy_ perm : Elapsed 0.023 ms (2.311 ms / 100) 2.296 -> 2.301 ( +0.22%) [ +0.00% +0.30% +0.26% / +0.22% +0.61% +0.52%] index_add_ perm_sorted : Elapsed 0.023 ms (2.296 ms / 100) 2.316 -> 2.317 ( +0.04%) [ +0.00% +0.00% +0.22% / +0.04% +0.30% +0.73%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.316 ms / 100) 9.282 -> 9.280 ( -0.02%) [ +0.03% +0.02% +0.00% / -0.02% -0.01% +0.20%] index_select const : Elapsed 0.093 ms (9.285 ms / 100) 9.312 -> 9.304 ( -0.09%) [ +0.13% +0.24% +0.00% / +0.24% +0.09% -0.09%] index_select wrap : Elapsed 0.093 ms (9.324 ms / 100) 9.293 -> 9.291 ( -0.02%) [ +0.27% +0.11% +0.00% / -0.02% +0.19% +0.32%] index_select linear : Elapsed 0.093 ms (9.318 ms / 100) 9.295 -> 9.302 ( +0.08%) [ +0.00% +0.22% +0.09% / +0.11% +0.08% +0.11%] index_select reverse : Elapsed 0.093 ms (9.295 ms / 100) 9.275 -> 9.259 ( -0.17%) [ +0.13% +0.00% +0.09% / -0.17% +0.27% +0.04%] index_select skip64 : Elapsed 0.093 ms (9.287 ms / 100) 9.280 -> 9.281 ( +0.01%) [ +0.20% +0.00% +0.05% / +0.20% +0.02% +0.01%] index_select skip256 : Elapsed 0.093 ms (9.299 ms / 100) 9.326 -> 9.322 ( -0.04%) [ +0.10% +0.00% +0.16% / -0.04% +0.04% +0.00%] index_select spread : Elapsed 0.093 ms (9.335 ms / 100) 9.319 -> 9.318 ( -0.01%) [ +0.23% +0.01% +0.00% / +0.11% -0.01% +0.01%] index_select strided 3 : Elapsed 0.093 ms (9.340 ms / 100) 9.315 -> 9.326 ( +0.12%) [ +0.11% +0.17% +0.00% / +0.17% +0.12% +0.29%] index_select random : Elapsed 0.093 ms (9.325 ms / 100) 9.316 -> 9.324 ( +0.09%) [ +0.43% +0.00% +0.15% / +0.14% +0.17% +0.09%] index_select random_sorted : Elapsed 0.094 ms (9.356 ms / 100) out_shape = [4, 20, 40, 5] in_shape = [4, 16, 40, 5] idx_dim = 1 B = [4, 20, 40, 5] (stride (4000, 200, 1, 40)) A = [4, 16, 40, 5] (stride (16, 1, 64, 2560)) dim = 1 4.457 -> 4.462 ( +0.11%) [ +0.07% +0.00% +0.07% / +0.11% +0.58% +0.58%] index_add_ linear : Elapsed 0.045 ms (4.460 ms / 100) 4.280 -> 4.283 ( +0.07%) [ +0.00% +0.02% +0.00% / +0.07% +0.75% +0.65%] index_copy_ linear : Elapsed 0.043 ms (4.280 ms / 100) 4.429 -> 4.431 ( +0.05%) [ +0.14% +0.34% +0.00% / +0.05% +0.70% +0.93%] index_add_ reverse : Elapsed 0.044 ms (4.435 ms / 100) 4.265 -> 4.265 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.68% +0.70%] index_copy_ reverse : Elapsed 0.043 ms (4.265 ms / 100) 4.436 -> 4.436 ( +0.00%) [ +0.09% +0.00% +0.14% / +0.00% +0.77% +0.54%] index_add_ spread : Elapsed 0.044 ms (4.440 ms / 100) 4.268 -> 4.273 ( +0.12%) [ +0.14% +0.00% +0.12% / +0.12% +0.91% +0.66%] index_copy_ spread : Elapsed 0.043 ms (4.274 ms / 100) 4.431 -> 4.434 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.72% +0.84%] index_add_ strided 3 : Elapsed 0.044 ms (4.431 ms / 100) 4.260 -> 4.263 ( +0.07%) [ +0.02% +0.12% +0.00% / +0.07% +0.75% +0.80%] index_copy_ strided 3 : Elapsed 0.043 ms (4.261 ms / 100) 4.431 -> 4.440 ( +0.20%) [ +0.11% +0.14% +0.00% / +0.20% +0.70% +0.52%] index_add_ strided 7 : Elapsed 0.044 ms (4.436 ms / 100) 4.265 -> 4.267 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.68% +0.59%] index_copy_ strided 7 : Elapsed 0.043 ms (4.267 ms / 100) 4.437 -> 4.439 ( +0.05%) [ +0.00% +0.18% +0.16% / +0.05% +0.52% +0.63%] index_add_ perm : Elapsed 0.044 ms (4.437 ms / 100) 4.276 -> 4.276 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.00% +0.49% +0.49%] index_copy_ perm : Elapsed 0.043 ms (4.276 ms / 100) 4.434 -> 4.446 ( +0.27%) [ +0.00% +0.25% +0.20% / +0.27% +0.72% +0.83%] index_add_ perm_sorted : Elapsed 0.044 ms (4.434 ms / 100) 4.266 -> 4.268 ( +0.05%) [ +0.00% +0.07% +0.05% / +0.05% +0.61% +0.61%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.266 ms / 100) 5.569 -> 5.564 ( -0.09%) [ +0.13% +0.04% +0.00% / +0.00% +0.05% -0.09%] index_select const : Elapsed 0.056 ms (5.576 ms / 100) 5.576 -> 5.571 ( -0.09%) [ +0.07% +0.16% +0.00% / +0.02% -0.09% +0.00%] index_select wrap : Elapsed 0.056 ms (5.580 ms / 100) 5.576 -> 5.578 ( +0.04%) [ +0.11% +0.16% +0.00% / +0.27% +0.04% +0.04%] index_select linear : Elapsed 0.056 ms (5.582 ms / 100) 5.580 -> 5.575 ( -0.09%) [ +0.00% +0.02% +0.00% / +0.04% -0.07% -0.09%] index_select reverse : Elapsed 0.056 ms (5.580 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.16% +0.05% +0.00% / +0.07% +0.29% +0.13%] index_select skip64 : Elapsed 0.056 ms (5.574 ms / 100) 5.563 -> 5.567 ( +0.07%) [ +0.09% +0.11% +0.00% / +0.07% +0.27% +0.32%] index_select skip256 : Elapsed 0.056 ms (5.568 ms / 100) 5.575 -> 5.574 ( -0.02%) [ +0.11% +0.00% +0.07% / +0.09% -0.02% +0.07%] index_select spread : Elapsed 0.056 ms (5.581 ms / 100) 5.580 -> 5.570 ( -0.18%) [ +0.00% +0.02% +0.09% / +0.14% -0.14% -0.18%] index_select strided 3 : Elapsed 0.056 ms (5.580 ms / 100) 5.577 -> 5.574 ( -0.05%) [ +0.07% +0.00% +0.20% / +0.14% +0.00% -0.05%] index_select strided 5 : Elapsed 0.056 ms (5.581 ms / 100) 5.577 -> 5.579 ( +0.04%) [ +0.07% +0.05% +0.00% / +0.16% +0.05% +0.04%] index_select strided 7 : Elapsed 0.056 ms (5.581 ms / 100) 5.576 -> 5.575 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% -0.02% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.576 ms / 100) 5.575 -> 5.572 ( -0.05%) [ +0.23% +0.13% +0.00% / +0.07% -0.05% +0.07%] index_select random : Elapsed 0.056 ms (5.588 ms / 100) 5.567 -> 5.575 ( +0.14%) [ +0.20% +0.25% +0.00% / +0.16% +0.20% +0.14%] index_select random_sorted : Elapsed 0.056 ms (5.578 ms / 100) B = [4, 20, 40, 5] (stride (4000, 1, 20, 800)) A = [4, 16, 40, 5] (stride (3200, 5, 80, 1)) dim = 1 4.053 -> 4.052 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.76% +0.69%] index_add_ linear : Elapsed 0.041 ms (4.053 ms / 100) 3.920 -> 3.920 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.77% +0.77%] index_copy_ linear : Elapsed 0.039 ms (3.922 ms / 100) 4.053 -> 4.051 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.69% +0.67%] index_add_ reverse : Elapsed 0.041 ms (4.053 ms / 100) 3.926 -> 3.922 ( -0.10%) [ +0.00% +0.03% +0.00% / -0.10% +0.82% +0.74%] index_copy_ reverse : Elapsed 0.039 ms (3.926 ms / 100) 4.056 -> 4.056 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.74% +0.71%] index_add_ spread : Elapsed 0.041 ms (4.056 ms / 100) 3.933 -> 3.933 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.74% +0.69%] index_copy_ spread : Elapsed 0.039 ms (3.935 ms / 100) 4.057 -> 4.061 ( +0.10%) [ +0.00% +0.00% +0.02% / +0.10% +0.71% +0.71%] index_add_ strided 3 : Elapsed 0.041 ms (4.057 ms / 100) 3.928 -> 3.926 ( -0.05%) [ +0.03% +0.00% +0.05% / -0.05% +0.81% +0.66%] index_copy_ strided 3 : Elapsed 0.039 ms (3.929 ms / 100) 4.050 -> 4.051 ( +0.02%) [ +0.00% +0.10% +0.02% / +0.02% +0.72% +0.77%] index_add_ strided 7 : Elapsed 0.040 ms (4.050 ms / 100) 3.923 -> 3.927 ( +0.10%) [ +0.03% +0.03% +0.00% / +0.10% +0.87% +0.84%] index_copy_ strided 7 : Elapsed 0.039 ms (3.924 ms / 100) 4.054 -> 4.055 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.67% +0.67%] index_add_ perm : Elapsed 0.041 ms (4.054 ms / 100) 3.920 -> 3.920 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.77% +0.77%] index_copy_ perm : Elapsed 0.039 ms (3.920 ms / 100) 4.055 -> 4.057 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.76% +0.74%] index_add_ perm_sorted : Elapsed 0.041 ms (4.056 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.00% +0.08% +0.10% / +0.08% +0.89% +0.97%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) 5.560 -> 5.566 ( +0.11%) [ +0.00% +0.07% +0.05% / +0.11% +0.13% +0.11%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.574 -> 5.577 ( +0.05%) [ +0.00% +0.04% +0.11% / +0.05% +0.22% +0.05%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.575 -> 5.572 ( -0.05%) [ +0.00% +0.07% +0.04% / -0.05% +0.14% +0.18%] index_select linear : Elapsed 0.056 ms (5.575 ms / 100) 5.575 -> 5.577 ( +0.04%) [ +0.04% +0.00% +0.05% / +0.04% +0.11% +0.13%] index_select reverse : Elapsed 0.056 ms (5.577 ms / 100) 5.558 -> 5.560 ( +0.04%) [ +0.00% +0.05% +0.16% / +0.11% +0.13% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.558 ms / 100) 5.561 -> 5.559 ( -0.04%) [ +0.18% +0.00% +0.05% / -0.04% +0.04% +0.09%] index_select skip256 : Elapsed 0.056 ms (5.571 ms / 100) 5.572 -> 5.572 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.13% +0.25%] index_select spread : Elapsed 0.056 ms (5.579 ms / 100) 5.577 -> 5.573 ( -0.07%) [ +0.11% +0.00% +0.02% / -0.07% +0.14% +0.11%] index_select strided 3 : Elapsed 0.056 ms (5.583 ms / 100) 5.572 -> 5.580 ( +0.14%) [ +0.00% +0.13% +0.04% / +0.18% +0.18% +0.14%] index_select strided 5 : Elapsed 0.056 ms (5.572 ms / 100) 5.572 -> 5.576 ( +0.07%) [ +0.00% +0.05% +0.04% / +0.07% +0.39% +0.27%] index_select strided 7 : Elapsed 0.056 ms (5.572 ms / 100) 5.566 -> 5.563 ( -0.05%) [ +0.04% +0.00% +0.05% / -0.05% +0.07% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.568 ms / 100) 5.579 -> 5.575 ( -0.07%) [ +0.00% +0.07% +0.02% / -0.07% -0.05% +0.09%] index_select random : Elapsed 0.056 ms (5.579 ms / 100) 5.575 -> 5.575 ( +0.00%) [ +0.02% +0.00% +0.04% / +0.00% +0.23% +0.30%] index_select random_sorted : Elapsed 0.056 ms (5.576 ms / 100) B = [4, 20, 40, 5] (stride (1, 800, 4, 160)) A = [4, 16, 40, 5] (stride (200, 800, 1, 40)) dim = 1 4.041 -> 4.047 ( +0.15%) [ +0.05% +0.05% +0.00% / +0.15% +0.69% +0.89%] index_add_ linear : Elapsed 0.040 ms (4.043 ms / 100) 3.917 -> 3.916 ( -0.03%) [ +0.05% +0.00% +0.10% / -0.03% +0.61% +0.61%] index_copy_ linear : Elapsed 0.039 ms (3.919 ms / 100) 4.045 -> 4.046 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.67% +0.69%] index_add_ reverse : Elapsed 0.040 ms (4.046 ms / 100) 3.914 -> 3.916 ( +0.05%) [ +0.13% +0.23% +0.00% / +0.05% +0.59% +0.59%] index_copy_ reverse : Elapsed 0.039 ms (3.919 ms / 100) 4.040 -> 4.041 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.69% +0.77%] index_add_ spread : Elapsed 0.040 ms (4.040 ms / 100) 3.913 -> 3.916 ( +0.08%) [ +0.18% +0.00% +0.23% / +0.08% +0.84% +0.69%] index_copy_ spread : Elapsed 0.039 ms (3.920 ms / 100) 4.043 -> 4.036 ( -0.17%) [ +0.05% +0.05% +0.00% / -0.17% +0.59% +0.69%] index_add_ strided 3 : Elapsed 0.040 ms (4.045 ms / 100) 3.922 -> 3.920 ( -0.05%) [ +0.03% +0.00% +0.00% / -0.05% +0.84% +0.82%] index_copy_ strided 3 : Elapsed 0.039 ms (3.923 ms / 100) 4.038 -> 4.045 ( +0.17%) [ +0.00% +0.07% +0.07% / +0.17% +0.79% +0.77%] index_add_ strided 7 : Elapsed 0.040 ms (4.038 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.05% +0.00% +0.08% / +0.05% +0.82% +0.79%] index_copy_ strided 7 : Elapsed 0.039 ms (3.921 ms / 100) 4.040 -> 4.045 ( +0.12%) [ +0.00% +0.07% +0.00% / +0.12% +0.62% +0.62%] index_add_ perm : Elapsed 0.040 ms (4.040 ms / 100) 3.917 -> 3.920 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.41% +0.54%] index_copy_ perm : Elapsed 0.039 ms (3.917 ms / 100) 4.046 -> 4.046 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.62% +0.59%] index_add_ perm_sorted : Elapsed 0.040 ms (4.047 ms / 100) 3.918 -> 3.917 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.46% +0.36%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.921 ms / 100) 5.557 -> 5.561 ( +0.07%) [ +0.04% +0.09% +0.00% / +0.09% +0.07% +0.09%] index_select const : Elapsed 0.056 ms (5.559 ms / 100) 5.575 -> 5.574 ( -0.02%) [ +0.02% +0.09% +0.00% / +0.16% +0.09% -0.02%] index_select wrap : Elapsed 0.056 ms (5.576 ms / 100) 5.574 -> 5.578 ( +0.07%) [ +0.00% +0.02% +0.09% / +0.07% +0.07% +0.13%] index_select linear : Elapsed 0.056 ms (5.574 ms / 100) 5.577 -> 5.571 ( -0.11%) [ +0.11% +0.16% +0.00% / -0.11% +0.05% +0.02%] index_select reverse : Elapsed 0.056 ms (5.583 ms / 100) 5.555 -> 5.562 ( +0.13%) [ +0.00% +0.05% +0.11% / +0.13% +0.20% +0.23%] index_select skip64 : Elapsed 0.056 ms (5.555 ms / 100) 5.558 -> 5.557 ( -0.02%) [ +0.07% +0.04% +0.00% / -0.02% +0.16% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.562 ms / 100) 5.576 -> 5.578 ( +0.04%) [ +0.00% +0.04% +0.09% / +0.04% +0.16% +0.14%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.578 -> 5.574 ( -0.07%) [ +0.07% +0.16% +0.00% / +0.14% -0.07% +0.02%] index_select strided 3 : Elapsed 0.056 ms (5.582 ms / 100) 5.580 -> 5.572 ( -0.14%) [ +0.02% +0.07% +0.00% / -0.07% -0.14% -0.02%] index_select strided 5 : Elapsed 0.056 ms (5.581 ms / 100) 5.573 -> 5.575 ( +0.04%) [ +0.07% +0.00% +0.04% / +0.04% +0.22% +0.09%] index_select strided 7 : Elapsed 0.056 ms (5.577 ms / 100) 5.558 -> 5.561 ( +0.05%) [ +0.14% +0.00% +0.00% / +0.05% +0.18% +0.11%] index_select strided 8 : Elapsed 0.056 ms (5.566 ms / 100) 5.574 -> 5.570 ( -0.07%) [ +0.00% +0.05% +0.05% / +0.09% -0.07% +0.07%] index_select random : Elapsed 0.056 ms (5.574 ms / 100) 5.573 -> 5.579 ( +0.11%) [ +0.04% +0.00% +0.11% / +0.14% +0.11% +0.13%] index_select random_sorted : Elapsed 0.056 ms (5.575 ms / 100) B = [4, 20, 40, 5] (stride (100, 1, 400, 20)) A = [4, 16, 40, 5] (stride (1, 20, 320, 4)) dim = 1 4.129 -> 4.135 ( +0.15%) [ +0.02% +0.05% +0.00% / +0.15% +0.82% +0.75%] index_add_ linear : Elapsed 0.041 ms (4.130 ms / 100) 3.994 -> 3.994 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.95% +0.75%] index_copy_ linear : Elapsed 0.040 ms (3.994 ms / 100) 4.123 -> 4.125 ( +0.05%) [ +0.02% +0.00% +0.00% / +0.05% +0.75% +0.65%] index_add_ reverse : Elapsed 0.041 ms (4.124 ms / 100) 3.990 -> 3.991 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.73% +0.63%] index_copy_ reverse : Elapsed 0.040 ms (3.991 ms / 100) 4.123 -> 4.132 ( +0.22%) [ +0.00% +0.24% +0.17% / +0.22% +1.04% +0.99%] index_add_ spread : Elapsed 0.041 ms (4.123 ms / 100) 3.987 -> 3.994 ( +0.18%) [ +0.00% +0.18% +0.18% / +0.18% +0.93% +0.90%] index_copy_ spread : Elapsed 0.040 ms (3.987 ms / 100) 4.133 -> 4.133 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.75% +0.80%] index_add_ strided 3 : Elapsed 0.041 ms (4.135 ms / 100) 3.992 -> 3.993 ( +0.03%) [ +0.05% +0.00% +0.08% / +0.03% +0.73% +0.88%] index_copy_ strided 3 : Elapsed 0.040 ms (3.994 ms / 100) 4.122 -> 4.123 ( +0.02%) [ +0.00% +0.12% +0.05% / +0.02% +0.80% +0.78%] index_add_ strided 7 : Elapsed 0.041 ms (4.122 ms / 100) 3.986 -> 3.989 ( +0.08%) [ +0.13% +0.15% +0.00% / +0.08% +0.95% +0.83%] index_copy_ strided 7 : Elapsed 0.040 ms (3.991 ms / 100) 4.136 -> 4.136 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.00% +0.65% +0.68%] index_add_ perm : Elapsed 0.041 ms (4.137 ms / 100) 3.995 -> 3.999 ( +0.10%) [ +0.13% +0.00% +0.05% / +0.10% +0.78% +0.80%] index_copy_ perm : Elapsed 0.040 ms (4.000 ms / 100) 4.132 -> 4.137 ( +0.12%) [ +0.07% +0.05% +0.00% / +0.12% +0.87% +0.73%] index_add_ perm_sorted : Elapsed 0.041 ms (4.135 ms / 100) 3.994 -> 3.998 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.10% +0.73% +0.78%] index_copy_ perm_sorted : Elapsed 0.040 ms (3.994 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.00% +0.05% +0.00% / +0.04% +0.22% +0.13%] index_select const : Elapsed 0.056 ms (5.562 ms / 100) 5.578 -> 5.574 ( -0.07%) [ +0.13% +0.00% +0.07% / -0.07% +0.11% +0.20%] index_select wrap : Elapsed 0.056 ms (5.585 ms / 100) 5.573 -> 5.576 ( +0.05%) [ +0.02% +0.18% +0.00% / +0.11% +0.05% +0.23%] index_select linear : Elapsed 0.056 ms (5.574 ms / 100) 5.579 -> 5.571 ( -0.14%) [ +0.02% +0.00% +0.00% / -0.14% +0.23% -0.02%] index_select reverse : Elapsed 0.056 ms (5.580 ms / 100) 5.558 -> 5.561 ( +0.05%) [ +0.00% +0.13% +0.07% / +0.05% +0.14% +0.13%] index_select skip64 : Elapsed 0.056 ms (5.558 ms / 100) 5.562 -> 5.563 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.13% +0.04%] index_select skip256 : Elapsed 0.056 ms (5.562 ms / 100) 5.575 -> 5.573 ( -0.04%) [ +0.02% +0.05% +0.00% / -0.04% +0.07% +0.07%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.579 -> 5.576 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.09% +0.02%] index_select strided 3 : Elapsed 0.056 ms (5.579 ms / 100) 5.573 -> 5.579 ( +0.11%) [ +0.00% +0.11% +0.14% / +0.11% +0.20% +0.29%] index_select strided 5 : Elapsed 0.056 ms (5.573 ms / 100) 5.578 -> 5.576 ( -0.04%) [ +0.09% +0.04% +0.00% / -0.04% +0.20% +0.16%] index_select strided 7 : Elapsed 0.056 ms (5.583 ms / 100) 5.559 -> 5.567 ( +0.14%) [ +0.00% +0.16% +0.07% / +0.14% +0.25% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.559 ms / 100) 5.570 -> 5.571 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.16% +0.14%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.07% +0.18%] index_select random_sorted : Elapsed 0.056 ms (5.574 ms / 100) B = [4, 20, 40, 5] (stride (800, 40, 1, 3200)) A = [4, 16, 40, 5] (stride (1, 4, 320, 64)) dim = 1 4.090 -> 4.091 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.68% +0.68%] index_add_ linear : Elapsed 0.041 ms (4.091 ms / 100) 3.925 -> 3.932 ( +0.18%) [ +0.03% +0.00% +0.00% / +0.18% +0.84% +0.82%] index_copy_ linear : Elapsed 0.039 ms (3.926 ms / 100) 4.104 -> 4.104 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.63% +0.63%] index_add_ reverse : Elapsed 0.041 ms (4.105 ms / 100) 3.939 -> 3.939 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.79% +0.74%] index_copy_ reverse : Elapsed 0.039 ms (3.939 ms / 100) 4.065 -> 4.066 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.64% +0.64%] index_add_ spread : Elapsed 0.041 ms (4.066 ms / 100) 3.891 -> 3.892 ( +0.03%) [ +0.26% +0.05% +0.00% / +0.03% +0.69% +0.72%] index_copy_ spread : Elapsed 0.039 ms (3.901 ms / 100) 4.079 -> 4.079 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.66% +0.64%] index_add_ strided 3 : Elapsed 0.041 ms (4.079 ms / 100) 3.917 -> 3.919 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.49% +0.51%] index_copy_ strided 3 : Elapsed 0.039 ms (3.921 ms / 100) 4.079 -> 4.080 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.64% +0.64%] index_add_ strided 7 : Elapsed 0.041 ms (4.079 ms / 100) 3.916 -> 3.918 ( +0.05%) [ +0.13% +0.15% +0.00% / +0.05% +0.54% +0.49%] index_copy_ strided 7 : Elapsed 0.039 ms (3.921 ms / 100) 4.067 -> 4.068 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.52% +0.52%] index_add_ perm : Elapsed 0.041 ms (4.069 ms / 100) 3.898 -> 3.918 ( +0.51%) [ +0.03% +0.00% +0.31% / +0.56% +0.51% +0.56%] index_copy_ perm : Elapsed 0.039 ms (3.899 ms / 100) 4.103 -> 4.105 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.63% +0.61%] index_add_ perm_sorted : Elapsed 0.041 ms (4.105 ms / 100) 3.940 -> 3.939 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.71% +0.63%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.940 ms / 100) 5.486 -> 5.485 ( -0.02%) [ +0.02% +0.00% +0.02% / +0.04% +0.02% -0.02%] index_select const : Elapsed 0.055 ms (5.487 ms / 100) 5.497 -> 5.489 ( -0.15%) [ +0.13% +0.05% +0.00% / -0.05% -0.07% -0.15%] index_select wrap : Elapsed 0.055 ms (5.504 ms / 100) 5.499 -> 5.498 ( -0.02%) [ +0.00% +0.02% +0.07% / +0.04% -0.02% +0.04%] index_select linear : Elapsed 0.055 ms (5.499 ms / 100) 5.497 -> 5.496 ( -0.02%) [ +0.04% +0.00% +0.00% / +0.15% +0.02% -0.02%] index_select reverse : Elapsed 0.055 ms (5.499 ms / 100) 5.486 -> 5.486 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.02% +0.00% +0.05%] index_select skip64 : Elapsed 0.055 ms (5.486 ms / 100) 5.485 -> 5.486 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.02% +0.05%] index_select skip256 : Elapsed 0.055 ms (5.489 ms / 100) 5.501 -> 5.500 ( -0.02%) [ +0.04% +0.09% +0.00% / -0.02% +0.00% +0.00%] index_select spread : Elapsed 0.055 ms (5.503 ms / 100) 5.493 -> 5.493 ( +0.00%) [ +0.11% +0.25% +0.00% / +0.07% +0.07% +0.00%] index_select strided 3 : Elapsed 0.055 ms (5.499 ms / 100) 5.500 -> 5.492 ( -0.15%) [ +0.07% +0.04% +0.00% / -0.07% -0.13% -0.15%] index_select strided 5 : Elapsed 0.055 ms (5.504 ms / 100) 5.495 -> 5.492 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.07% +0.20%] index_select strided 7 : Elapsed 0.055 ms (5.495 ms / 100) 5.489 -> 5.480 ( -0.16%) [ +0.00% +0.00% +0.07% / -0.16% -0.02% -0.05%] index_select strided 8 : Elapsed 0.055 ms (5.489 ms / 100) 5.493 -> 5.493 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.16% +0.05% +0.00%] index_select random : Elapsed 0.055 ms (5.493 ms / 100) 5.496 -> 5.495 ( -0.02%) [ +0.02% +0.00% +0.09% / +0.13% +0.09% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.497 ms / 100) B = [4, 20, 40, 5] (stride (40, 160, 1, 3200)) A = [4, 16, 40, 5] (stride (3200, 5, 80, 1)) dim = 1 4.051 -> 4.056 ( +0.12%) [ +0.05% +0.00% +0.10% / +0.12% +0.77% +0.79%] index_add_ linear : Elapsed 0.041 ms (4.053 ms / 100) 3.918 -> 3.919 ( +0.03%) [ +0.08% +0.00% +0.05% / +0.03% +0.71% +0.74%] index_copy_ linear : Elapsed 0.039 ms (3.921 ms / 100) 4.048 -> 4.051 ( +0.07%) [ +0.05% +0.00% +0.15% / +0.07% +0.82% +0.79%] index_add_ reverse : Elapsed 0.041 ms (4.050 ms / 100) 3.922 -> 3.925 ( +0.08%) [ +0.00% +0.03% +0.05% / +0.08% +0.79% +0.82%] index_copy_ reverse : Elapsed 0.039 ms (3.922 ms / 100) 4.055 -> 4.057 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.74% +0.69%] index_add_ spread : Elapsed 0.041 ms (4.055 ms / 100) 3.930 -> 3.930 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.71% +0.71%] index_copy_ spread : Elapsed 0.039 ms (3.930 ms / 100) 4.057 -> 4.056 ( -0.02%) [ +0.02% +0.00% +0.05% / -0.02% +0.74% +0.71%] index_add_ strided 3 : Elapsed 0.041 ms (4.058 ms / 100) 3.922 -> 3.924 ( +0.05%) [ +0.00% +0.03% +0.15% / +0.05% +0.74% +0.74%] index_copy_ strided 3 : Elapsed 0.039 ms (3.922 ms / 100) 4.051 -> 4.050 ( -0.02%) [ +0.00% +0.10% +0.05% / -0.02% +0.72% +0.77%] index_add_ strided 7 : Elapsed 0.041 ms (4.051 ms / 100) 3.918 -> 3.922 ( +0.10%) [ +0.00% +0.18% +0.13% / +0.10% +0.92% +0.89%] index_copy_ strided 7 : Elapsed 0.039 ms (3.918 ms / 100) 4.053 -> 4.054 ( +0.02%) [ +0.07% +0.00% +0.07% / +0.02% +0.72% +0.72%] index_add_ perm : Elapsed 0.041 ms (4.056 ms / 100) 3.918 -> 3.920 ( +0.05%) [ +0.03% +0.15% +0.00% / +0.05% +0.77% +0.77%] index_copy_ perm : Elapsed 0.039 ms (3.919 ms / 100) 4.056 -> 4.058 ( +0.05%) [ +0.07% +0.02% +0.00% / +0.05% +0.76% +0.79%] index_add_ perm_sorted : Elapsed 0.041 ms (4.059 ms / 100) 3.922 -> 3.925 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.97% +0.74%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) 5.554 -> 5.561 ( +0.13%) [ +0.04% +0.04% +0.00% / +0.18% +0.18% +0.13%] index_select const : Elapsed 0.056 ms (5.556 ms / 100) 5.567 -> 5.565 ( -0.04%) [ +0.16% +0.00% +0.07% / -0.04% +0.22% +0.20%] index_select wrap : Elapsed 0.056 ms (5.576 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.05% +0.00% +0.00% / -0.04% +0.16% +0.14%] index_select linear : Elapsed 0.056 ms (5.572 ms / 100) 5.568 -> 5.568 ( +0.00%) [ +0.11% +0.05% +0.00% / +0.07% +0.00% +0.11%] index_select reverse : Elapsed 0.056 ms (5.574 ms / 100) 5.551 -> 5.557 ( +0.11%) [ +0.00% +0.20% +0.04% / +0.14% +0.13% +0.11%] index_select skip64 : Elapsed 0.056 ms (5.551 ms / 100) 5.554 -> 5.556 ( +0.04%) [ +0.09% +0.00% +0.14% / +0.09% +0.11% +0.04%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.569 -> 5.564 ( -0.09%) [ +0.00% +0.00% +0.02% / -0.09% +0.23% +0.16%] index_select spread : Elapsed 0.056 ms (5.569 ms / 100) 5.568 -> 5.569 ( +0.02%) [ +0.02% +0.00% +0.04% / +0.02% +0.23% +0.11%] index_select strided 3 : Elapsed 0.056 ms (5.569 ms / 100) 5.569 -> 5.575 ( +0.11%) [ +0.09% +0.16% +0.00% / +0.11% +0.16% +0.16%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.569 -> 5.572 ( +0.05%) [ +0.09% +0.07% +0.00% / +0.05% +0.05% +0.22%] index_select strided 7 : Elapsed 0.056 ms (5.574 ms / 100) 5.558 -> 5.560 ( +0.04%) [ +0.09% +0.00% +0.02% / +0.04% +0.09% +0.11%] index_select strided 8 : Elapsed 0.056 ms (5.563 ms / 100) 5.569 -> 5.571 ( +0.04%) [ +0.02% +0.00% +0.07% / +0.09% +0.11% +0.04%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.05% +0.14% +0.00% / -0.04% +0.14% +0.25%] index_select random_sorted : Elapsed 0.056 ms (5.572 ms / 100) B = [4, 20, 40, 5] (stride (1, 4, 80, 3200)) A = [4, 16, 40, 5] (stride (16, 1, 320, 64)) dim = 1 4.089 -> 4.092 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.73% +0.73%] index_add_ linear : Elapsed 0.041 ms (4.091 ms / 100) 3.928 -> 3.929 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.89% +0.89%] index_copy_ linear : Elapsed 0.039 ms (3.930 ms / 100) 4.103 -> 4.103 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.00% +0.68% +0.71%] index_add_ reverse : Elapsed 0.041 ms (4.104 ms / 100) 3.943 -> 3.943 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.74% +0.71%] index_copy_ reverse : Elapsed 0.039 ms (3.943 ms / 100) 4.063 -> 4.063 ( +0.00%) [ +0.05% +0.07% +0.00% / +0.00% +0.69% +0.66%] index_add_ spread : Elapsed 0.041 ms (4.065 ms / 100) 3.892 -> 3.901 ( +0.23%) [ +0.10% +0.00% +0.00% / +0.23% +0.87% +0.80%] index_copy_ spread : Elapsed 0.039 ms (3.896 ms / 100) 4.077 -> 4.079 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.74% +0.76%] index_add_ strided 3 : Elapsed 0.041 ms (4.081 ms / 100) 3.916 -> 3.921 ( +0.13%) [ +0.00% +0.00% +0.05% / +0.13% +0.61% +0.59%] index_copy_ strided 3 : Elapsed 0.039 ms (3.916 ms / 100) 4.078 -> 4.079 ( +0.02%) [ +0.10% +0.10% +0.00% / +0.02% +0.66% +0.64%] index_add_ strided 7 : Elapsed 0.041 ms (4.082 ms / 100) 3.920 -> 3.917 ( -0.08%) [ +0.10% +0.00% +0.05% / -0.08% +0.51% +0.54%] index_copy_ strided 7 : Elapsed 0.039 ms (3.924 ms / 100) 4.065 -> 4.065 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.54% +0.54%] index_add_ perm : Elapsed 0.041 ms (4.066 ms / 100) 3.896 -> 3.903 ( +0.18%) [ +0.10% +0.28% +0.00% / +0.18% +0.67% +0.59%] index_copy_ perm : Elapsed 0.039 ms (3.900 ms / 100) 4.104 -> 4.104 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.58%] index_add_ perm_sorted : Elapsed 0.041 ms (4.104 ms / 100) 3.944 -> 3.944 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.61%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.944 ms / 100) 5.494 -> 5.490 ( -0.07%) [ +0.04% +0.00% +0.09% / +0.04% -0.02% -0.07%] index_select const : Elapsed 0.055 ms (5.496 ms / 100) 5.499 -> 5.498 ( -0.02%) [ +0.13% +0.16% +0.00% / +0.13% +0.02% -0.02%] index_select wrap : Elapsed 0.055 ms (5.506 ms / 100) 5.502 -> 5.500 ( -0.04%) [ +0.18% +0.15% +0.00% / +0.04% +0.13% -0.04%] index_select linear : Elapsed 0.055 ms (5.512 ms / 100) 5.504 -> 5.502 ( -0.04%) [ +0.00% +0.13% +0.15% / +0.00% -0.04% +0.02%] index_select reverse : Elapsed 0.055 ms (5.504 ms / 100) 5.493 -> 5.497 ( +0.07%) [ +0.04% +0.13% +0.00% / +0.13% +0.07% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.495 ms / 100) 5.494 -> 5.497 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.13% +0.05%] index_select skip256 : Elapsed 0.055 ms (5.498 ms / 100) 5.497 -> 5.497 ( +0.00%) [ +0.00% +0.13% +0.02% / +0.05% +0.00% +0.04%] index_select spread : Elapsed 0.055 ms (5.497 ms / 100) 5.507 -> 5.500 ( -0.13%) [ +0.00% +0.09% +0.02% / +0.00% -0.09% -0.13%] index_select strided 3 : Elapsed 0.055 ms (5.507 ms / 100) 5.511 -> 5.502 ( -0.16%) [ +0.00% +0.02% +0.02% / -0.04% -0.09% -0.16%] index_select strided 5 : Elapsed 0.055 ms (5.511 ms / 100) 5.500 -> 5.503 ( +0.05%) [ +0.09% +0.00% +0.15% / +0.05% +0.11% +0.05%] index_select strided 7 : Elapsed 0.055 ms (5.505 ms / 100) 5.504 -> 5.500 ( -0.07%) [ +0.13% +0.16% +0.00% / +0.02% -0.07% +0.02%] index_select strided 8 : Elapsed 0.055 ms (5.511 ms / 100) 5.504 -> 5.506 ( +0.04%) [ +0.05% +0.07% +0.00% / +0.13% +0.09% +0.04%] index_select random : Elapsed 0.055 ms (5.507 ms / 100) 5.502 -> 5.504 ( +0.04%) [ +0.15% +0.11% +0.00% / +0.09% +0.04% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.510 ms / 100) out_shape = [4, 16, 20, 5] in_shape = [4, 16, 40, 5] idx_dim = 2 B = [4, 16, 20, 5] (stride (1600, 5, 80, 1)) A = [4, 16, 40, 5] (stride (3200, 1, 80, 16)) dim = 2 2.439 -> 2.444 ( +0.21%) [ +0.21% +0.00% +0.25% / +0.21% +0.41% +0.37%] index_select const : Elapsed 0.024 ms (2.444 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.08% +0.04% +0.00%] index_select wrap : Elapsed 0.025 ms (2.455 ms / 100) 2.454 -> 2.454 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.04% +0.12% +0.00%] index_select linear : Elapsed 0.025 ms (2.457 ms / 100) 2.449 -> 2.453 ( +0.16%) [ +0.16% +0.00% +0.45% / +0.24% +0.16% +0.24%] index_select reverse : Elapsed 0.025 ms (2.453 ms / 100) 2.445 -> 2.446 ( +0.04%) [ +0.20% +0.04% +0.00% / +0.16% +0.04% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.450 ms / 100) 2.446 -> 2.444 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.16% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.447 ms / 100) 2.450 -> 2.456 ( +0.24%) [ +0.20% +0.08% +0.00% / +0.24% +0.29% +0.24%] index_select spread : Elapsed 0.025 ms (2.455 ms / 100) 2.455 -> 2.453 ( -0.08%) [ +0.00% +0.00% +0.12% / +0.04% -0.08% +0.00%] index_select strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.29% +0.04% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.451 -> 2.453 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.20% +0.33%] index_select strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.443 -> 2.447 ( +0.16%) [ +0.00% +0.08% +0.12% / +0.16% +0.33% +0.37%] index_select strided 8 : Elapsed 0.024 ms (2.443 ms / 100) 2.446 -> 2.445 ( -0.04%) [ +0.04% +0.08% +0.00% / -0.04% +0.16% +0.20%] index_select strided 16 : Elapsed 0.024 ms (2.447 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.08% +0.04%] index_select random : Elapsed 0.025 ms (2.453 ms / 100) 2.447 -> 2.448 ( +0.04%) [ +0.29% +0.00% +0.29% / +0.04% +0.20% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.454 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.08% +0.12% +0.00% / +0.08% +0.24% +0.12%] index_select perm : Elapsed 0.025 ms (2.455 ms / 100) 2.458 -> 2.450 ( -0.33%) [ +0.16% +0.04% +0.00% / +0.04% -0.33% -0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.462 ms / 100) B = [4, 16, 20, 5] (stride (1600, 20, 1, 320)) A = [4, 16, 40, 5] (stride (3200, 1, 16, 640)) dim = 2 2.444 -> 2.447 ( +0.12%) [ +0.20% +0.00% +0.04% / +0.16% +0.12% +0.25%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.465 -> 2.458 ( -0.28%) [ +0.00% +0.04% +0.12% / +0.00% -0.24% -0.28%] index_select wrap : Elapsed 0.025 ms (2.465 ms / 100) 2.462 -> 2.452 ( -0.41%) [ +0.16% +0.20% +0.00% / +0.37% -0.41% -0.37%] index_select linear : Elapsed 0.025 ms (2.466 ms / 100) 2.459 -> 2.460 ( +0.04%) [ +0.24% +0.00% +0.08% / +0.16% +0.04% +0.08%] index_select reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.447 -> 2.447 ( +0.00%) [ +0.00% +0.08% +0.16% / +0.08% +0.00% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.447 ms / 100) 2.445 -> 2.446 ( +0.04%) [ +0.00% +0.04% +0.16% / +0.04% +0.25% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.445 ms / 100) 2.461 -> 2.463 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.20% +0.45%] index_select spread : Elapsed 0.025 ms (2.462 ms / 100) 2.463 -> 2.463 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.08% +0.28%] index_select strided 3 : Elapsed 0.025 ms (2.465 ms / 100) 2.452 -> 2.457 ( +0.20%) [ +0.00% +0.00% +0.08% / +0.29% +0.20% +0.20%] index_select strided 5 : Elapsed 0.025 ms (2.452 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.32% +0.41%] index_select strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.453 -> 2.453 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.16% +0.00% +0.12%] index_select strided 8 : Elapsed 0.025 ms (2.454 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.20% +0.33%] index_select strided 16 : Elapsed 0.025 ms (2.450 ms / 100) 2.457 -> 2.457 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.28% +0.28%] index_select random : Elapsed 0.025 ms (2.457 ms / 100) 2.457 -> 2.458 ( +0.04%) [ +0.00% +0.12% +0.08% / +0.12% +0.04% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.457 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.16% +0.04% +0.12%] index_select perm : Elapsed 0.025 ms (2.463 ms / 100) 2.461 -> 2.463 ( +0.08%) [ +0.12% +0.00% +0.33% / +0.08% +0.08% +0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) B = [4, 16, 20, 5] (stride (20, 400, 1, 80)) A = [4, 16, 40, 5] (stride (200, 800, 5, 1)) dim = 2 2.446 -> 2.450 ( +0.16%) [ +0.04% +0.00% +0.20% / +0.20% +0.20% +0.16%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.468 -> 2.463 ( -0.20%) [ +0.00% +0.04% +0.16% / -0.12% -0.20% -0.20%] index_select wrap : Elapsed 0.025 ms (2.468 ms / 100) 2.463 -> 2.463 ( +0.00%) [ +0.00% +0.24% +0.16% / +0.08% +0.00% +0.12%] index_select linear : Elapsed 0.025 ms (2.463 ms / 100) 2.464 -> 2.463 ( -0.04%) [ +0.04% +0.00% +0.00% / +0.04% -0.04% +0.04%] index_select reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.449 -> 2.445 ( -0.16%) [ +0.12% +0.00% +0.08% / +0.00% +0.29% -0.16%] index_select skip64 : Elapsed 0.025 ms (2.452 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.16% +0.12% +0.00% / +0.04% +0.24% +0.16%] index_select skip256 : Elapsed 0.025 ms (2.453 ms / 100) 2.473 -> 2.477 ( +0.16%) [ +0.08% +0.20% +0.00% / +0.16% +0.20% +0.24%] index_select spread : Elapsed 0.025 ms (2.475 ms / 100) 2.475 -> 2.473 ( -0.08%) [ +0.00% +0.12% +0.04% / +0.04% +0.12% -0.08%] index_select strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.463 -> 2.462 ( -0.04%) [ +0.04% +0.00% +0.24% / +0.00% +0.08% -0.04%] index_select strided 5 : Elapsed 0.025 ms (2.464 ms / 100) 2.471 -> 2.473 ( +0.08%) [ +0.00% +0.04% +0.20% / +0.12% +0.32% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.16% +0.00% +0.12% / -0.04% +0.12% +0.20%] index_select strided 8 : Elapsed 0.025 ms (2.459 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.12% +0.00% +0.04% / +0.04% +0.37% +0.24%] index_select strided 16 : Elapsed 0.025 ms (2.458 ms / 100) 2.467 -> 2.468 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.16% +0.12%] index_select random : Elapsed 0.025 ms (2.468 ms / 100) 2.467 -> 2.465 ( -0.08%) [ +0.08% +0.00% +0.12% / +0.04% -0.04% -0.08%] index_select random_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.472 -> 2.468 ( -0.16%) [ +0.08% +0.04% +0.00% / +0.04% -0.04% -0.16%] index_select perm : Elapsed 0.025 ms (2.474 ms / 100) 2.475 -> 2.462 ( -0.53%) [ +0.08% +0.04% +0.00% / -0.08% -0.53% -0.36%] index_select perm_sorted : Elapsed 0.025 ms (2.477 ms / 100) B = [4, 16, 20, 5] (stride (80, 5, 320, 1)) dim = 2 fill_cnt = 40 0.624 -> 0.611 ( -2.08%) [ +0.00% +0.16% +0.16% / -1.76% -1.92% -2.08%] index_fill_ const : Elapsed 0.006 ms (0.624 ms / 100) 0.637 -> 0.622 ( -2.35%) [ +0.00% +0.47% +0.31% / -1.57% -2.35% -1.73%] index_fill_ linear : Elapsed 0.006 ms (0.637 ms / 100) 0.637 -> 0.623 ( -2.20%) [ +0.00% +0.00% +0.63% / -2.04% -2.20% -1.88%] index_fill_ reverse : Elapsed 0.006 ms (0.637 ms / 100) 0.625 -> 0.612 ( -2.08%) [ +0.16% +0.00% +0.32% / -1.92% -1.12% -2.08%] index_fill_ skip64 : Elapsed 0.006 ms (0.626 ms / 100) 0.625 -> 0.612 ( -2.08%) [ +0.16% +0.00% +0.32% / -1.76% -1.44% -2.08%] index_fill_ skip256 : Elapsed 0.006 ms (0.626 ms / 100) 0.637 -> 0.625 ( -1.88%) [ +0.00% +0.63% +0.47% / -1.88% -1.73% -1.88%] index_fill_ spread : Elapsed 0.006 ms (0.637 ms / 100) 0.636 -> 0.622 ( -2.20%) [ +0.00% +0.31% +0.79% / -1.57% -1.73% -2.20%] index_fill_ strided 3 : Elapsed 0.006 ms (0.636 ms / 100) 0.629 -> 0.613 ( -2.54%) [ +0.00% +0.16% +0.32% / -1.59% -2.54% -2.23%] index_fill_ strided 5 : Elapsed 0.006 ms (0.629 ms / 100) 0.635 -> 0.623 ( -1.89%) [ +0.47% +0.00% +0.16% / -1.89% -1.57% -1.73%] index_fill_ strided 7 : Elapsed 0.006 ms (0.638 ms / 100) 0.630 -> 0.617 ( -2.06%) [ +0.32% +0.00% +0.48% / -2.06% -2.06% -2.06%] index_fill_ strided 8 : Elapsed 0.006 ms (0.632 ms / 100) 0.629 -> 0.615 ( -2.23%) [ +0.64% +0.16% +0.00% / -2.23% -2.23% -1.91%] index_fill_ strided 16 : Elapsed 0.006 ms (0.633 ms / 100) 0.635 -> 0.622 ( -2.05%) [ +0.63% +0.00% +0.47% / -1.42% -1.57% -2.05%] index_fill_ random : Elapsed 0.006 ms (0.639 ms / 100) 0.639 -> 0.621 ( -2.82%) [ +0.16% +0.00% +0.16% / -1.72% -2.66% -2.82%] index_fill_ random_sorted : Elapsed 0.006 ms (0.640 ms / 100) B = [4, 16, 20, 5] (stride (80, 5, 320, 1)) A = [4, 16, 40, 5] (stride (3200, 200, 5, 1)) dim = 2 1.451 -> 1.454 ( +0.21%) [ +0.07% +0.00% +0.21% / +0.21% +0.41% +0.69%] index_select const : Elapsed 0.015 ms (1.452 ms / 100) 1.475 -> 1.480 ( +0.34%) [ +0.14% +0.00% +0.14% / +0.34% +0.34% +0.34%] index_select wrap : Elapsed 0.015 ms (1.477 ms / 100) 1.473 -> 1.475 ( +0.14%) [ +0.00% +0.34% +0.20% / +0.14% +0.54% +0.41%] index_select linear : Elapsed 0.015 ms (1.473 ms / 100) 1.472 -> 1.477 ( +0.34%) [ +0.00% +0.00% +0.00% / +0.34% +0.48% +0.48%] index_select reverse : Elapsed 0.015 ms (1.472 ms / 100) 1.454 -> 1.456 ( +0.14%) [ +0.34% +0.00% +0.28% / +0.14% +0.21% +0.48%] index_select skip64 : Elapsed 0.015 ms (1.459 ms / 100) 1.452 -> 1.454 ( +0.14%) [ +0.21% +0.07% +0.00% / +0.14% +0.55% +0.48%] index_select skip256 : Elapsed 0.015 ms (1.455 ms / 100) 1.489 -> 1.490 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.40% +0.54%] index_select spread : Elapsed 0.015 ms (1.490 ms / 100) 1.486 -> 1.485 ( -0.07%) [ +0.13% +0.00% +0.00% / -0.07% +0.74% +0.61%] index_select strided 3 : Elapsed 0.015 ms (1.488 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.07% +0.00% +0.20% / +0.14% +0.41% +0.68%] index_select strided 5 : Elapsed 0.015 ms (1.476 ms / 100) 1.490 -> 1.489 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.27% +0.40%] index_select strided 7 : Elapsed 0.015 ms (1.490 ms / 100) 1.466 -> 1.470 ( +0.27%) [ +0.27% +0.00% +0.48% / +0.27% +0.41% +0.41%] index_select strided 8 : Elapsed 0.015 ms (1.470 ms / 100) 1.463 -> 1.466 ( +0.21%) [ +0.21% +0.00% +0.14% / +0.21% +0.68% +0.62%] index_select strided 16 : Elapsed 0.015 ms (1.466 ms / 100) 1.480 -> 1.484 ( +0.27%) [ +0.07% +0.14% +0.00% / +0.27% +0.68% +0.61%] index_select random : Elapsed 0.015 ms (1.481 ms / 100) 1.478 -> 1.475 ( -0.20%) [ +0.07% +0.00% +0.20% / -0.20% +0.27% +0.47%] index_select random_sorted : Elapsed 0.015 ms (1.479 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.07% +0.27% +0.00% / +0.07% +0.68% +0.41%] index_select perm : Elapsed 0.015 ms (1.481 ms / 100) 1.486 -> 1.488 ( +0.13%) [ +0.27% +0.00% +0.07% / +0.13% +0.81% +0.54%] index_select perm_sorted : Elapsed 0.015 ms (1.490 ms / 100) B = [4, 16, 20, 5] (stride (80, 5, 320, 1)) A = [4, 16, 40, 5] (stride (1, 800, 20, 4)) dim = 2 2.391 -> 2.395 ( +0.17%) [ +0.17% +0.13% +0.00% / +0.17% +0.29% +0.17%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.403 -> 2.401 ( -0.08%) [ +0.25% +0.17% +0.00% / +0.21% -0.08% +0.17%] index_select wrap : Elapsed 0.024 ms (2.409 ms / 100) 2.404 -> 2.402 ( -0.08%) [ +0.00% +0.21% +0.04% / +0.12% -0.08% +0.21%] index_select linear : Elapsed 0.024 ms (2.404 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.08% +0.12% +0.00% / -0.04% -0.08% +0.04%] index_select reverse : Elapsed 0.024 ms (2.407 ms / 100) 2.394 -> 2.393 ( -0.04%) [ +0.13% +0.00% +0.17% / +0.21% -0.04% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.397 ms / 100) 2.391 -> 2.396 ( +0.21%) [ +0.08% +0.25% +0.00% / +0.21% +0.38% +0.46%] index_select skip256 : Elapsed 0.024 ms (2.393 ms / 100) 2.408 -> 2.404 ( -0.17%) [ +0.04% +0.00% +0.04% / -0.08% -0.04% -0.17%] index_select spread : Elapsed 0.024 ms (2.409 ms / 100) 2.406 -> 2.405 ( -0.04%) [ +0.17% +0.17% +0.00% / +0.08% -0.04% +0.04%] index_select strided 3 : Elapsed 0.024 ms (2.410 ms / 100) 2.398 -> 2.399 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.08% +0.04% +0.04%] index_select strided 5 : Elapsed 0.024 ms (2.398 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.25% +0.17% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.406 ms / 100) 2.394 -> 2.392 ( -0.08%) [ +0.21% +0.17% +0.00% / -0.08% +0.38% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.399 ms / 100) 2.393 -> 2.397 ( +0.17%) [ +0.13% +0.00% +0.13% / +0.25% +0.42% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.396 ms / 100) 2.402 -> 2.401 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.04% +0.17%] index_select random : Elapsed 0.024 ms (2.402 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.04% -0.08% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.405 ms / 100) 2.407 -> 2.404 ( -0.12%) [ +0.00% +0.04% +0.04% / +0.00% -0.12% +0.00%] index_select perm : Elapsed 0.024 ms (2.407 ms / 100) 2.408 -> 2.401 ( -0.29%) [ +0.25% +0.12% +0.00% / -0.08% -0.25% -0.29%] index_select perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) B = [4, 16, 20, 5] (stride (5, 20, 320, 1)) A = [4, 16, 40, 5] (stride (80, 5, 320, 1)) dim = 2 2.406 -> 2.406 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.04% +0.21%] index_select const : Elapsed 0.024 ms (2.407 ms / 100) 2.415 -> 2.408 ( -0.29%) [ +0.00% +0.12% +0.12% / -0.08% -0.29% -0.25%] index_select wrap : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.407 ( -0.29%) [ +0.29% +0.21% +0.00% / +0.12% -0.08% -0.29%] index_select linear : Elapsed 0.024 ms (2.421 ms / 100) 2.412 -> 2.409 ( -0.12%) [ +0.08% +0.00% +0.12% / -0.12% +0.12% +0.04%] index_select reverse : Elapsed 0.024 ms (2.414 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.04% +0.08% +0.00% / +0.04% +0.04% -0.08%] index_select skip64 : Elapsed 0.024 ms (2.406 ms / 100) 2.406 -> 2.407 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.12% +0.58% +0.04%] index_select skip256 : Elapsed 0.024 ms (2.407 ms / 100) 2.410 -> 2.414 ( +0.17%) [ +0.17% +0.12% +0.00% / +0.17% +0.25% +0.17%] index_select spread : Elapsed 0.024 ms (2.414 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.33% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.415 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.21% +0.17% +0.00% / +0.17% +0.42% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.410 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.08% +0.21% +0.00% / -0.04% +0.17% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.413 ms / 100) 2.409 -> 2.409 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.00% +0.08%] index_select strided 8 : Elapsed 0.024 ms (2.411 ms / 100) 2.408 -> 2.406 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.17% +0.04%] index_select strided 16 : Elapsed 0.024 ms (2.410 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.33% +0.12%] index_select random : Elapsed 0.024 ms (2.412 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.12% +0.08% +0.29%] index_select random_sorted : Elapsed 0.024 ms (2.412 ms / 100) 2.412 -> 2.412 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.00% +0.00%] index_select perm : Elapsed 0.024 ms (2.412 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.17% +0.17% +0.00% / +0.08% +0.12% +0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) B = [4, 16, 20, 5] (stride (320, 20, 1, 1280)) A = [4, 16, 40, 5] (stride (640, 1, 16, 2560)) dim = 2 2.393 -> 2.399 ( +0.25%) [ +0.08% +0.00% +0.00% / +0.25% +0.46% +0.29%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.417 -> 2.414 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% -0.08% +0.04%] index_select wrap : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.411 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.25% +0.08%] index_select linear : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.410 ( +0.00%) [ +0.00% +0.29% +0.29% / +0.00% +0.25% +0.21%] index_select reverse : Elapsed 0.024 ms (2.410 ms / 100) 2.397 -> 2.396 ( -0.04%) [ +0.13% +0.00% +0.04% / -0.04% +0.00% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.400 ms / 100) 2.395 -> 2.398 ( +0.13%) [ +0.21% +0.21% +0.00% / +0.17% +0.13% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.400 ms / 100) 2.413 -> 2.418 ( +0.21%) [ +0.21% +0.04% +0.00% / +0.21% +0.21% +0.25%] index_select spread : Elapsed 0.024 ms (2.418 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.17% +0.04% +0.00% / +0.17% +0.04% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.418 ms / 100) 2.406 -> 2.403 ( -0.12%) [ +0.08% +0.00% +0.04% / -0.12% +0.12% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.408 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.12% +0.17%] index_select strided 7 : Elapsed 0.024 ms (2.416 ms / 100) 2.400 -> 2.399 ( -0.04%) [ +0.00% +0.33% +0.08% / -0.04% +0.17% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.400 ms / 100) 2.398 -> 2.402 ( +0.17%) [ +0.13% +0.17% +0.00% / +0.17% +0.29% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.401 ms / 100) 2.409 -> 2.411 ( +0.08%) [ +0.04% +0.00% +0.12% / +0.12% +0.08% +0.25%] index_select random : Elapsed 0.024 ms (2.410 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.12% +0.04% +0.00% / +0.08% +0.12% -0.04%] index_select random_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.17% +0.00% +0.12% / +0.17% +0.25% +0.08%] index_select perm : Elapsed 0.024 ms (2.418 ms / 100) 2.417 -> 2.404 ( -0.54%) [ +0.08% +0.00% +0.00% / +0.00% -0.54% -0.41%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [4, 16, 20, 5] (stride (16, 1, 64, 1280)) A = [4, 16, 40, 5] (stride (1, 160, 4, 2560)) dim = 2 2.398 -> 2.400 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.21% +0.21%] index_select const : Elapsed 0.024 ms (2.399 ms / 100) 2.413 -> 2.409 ( -0.17%) [ +0.12% +0.00% +0.08% / -0.04% -0.17% -0.04%] index_select wrap : Elapsed 0.024 ms (2.416 ms / 100) 2.411 -> 2.407 ( -0.17%) [ +0.04% +0.12% +0.00% / +0.33% -0.12% -0.17%] index_select linear : Elapsed 0.024 ms (2.412 ms / 100) 2.410 -> 2.409 ( -0.04%) [ +0.08% +0.00% +0.00% / +0.17% +0.00% -0.04%] index_select reverse : Elapsed 0.024 ms (2.412 ms / 100) 2.402 -> 2.400 ( -0.08%) [ +0.04% +0.00% +0.00% / +0.08% -0.08% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.403 ms / 100) 2.402 -> 2.401 ( -0.04%) [ +0.08% +0.00% +0.04% / -0.04% +0.25% +0.08%] index_select skip256 : Elapsed 0.024 ms (2.404 ms / 100) 2.421 -> 2.421 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.17% +0.29%] index_select spread : Elapsed 0.024 ms (2.421 ms / 100) 2.414 -> 2.419 ( +0.21%) [ +0.25% +0.04% +0.00% / +0.21% +0.46% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.420 ms / 100) 2.408 -> 2.410 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.29% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.409 ms / 100) 2.417 -> 2.420 ( +0.12%) [ +0.00% +0.21% +0.21% / +0.12% +0.21% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.417 ms / 100) 2.405 -> 2.407 ( +0.08%) [ +0.04% +0.17% +0.00% / +0.25% +0.08% +0.29%] index_select strided 8 : Elapsed 0.024 ms (2.406 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.00% +0.08% +0.04% / -0.08% +0.08% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.405 ms / 100) 2.414 -> 2.413 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.33% +0.33%] index_select random : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.413 ( -0.04%) [ +0.00% +0.12% +0.00% / -0.04% +0.21% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.00% +0.12% +0.25% / +0.29% +0.08% +0.08%] index_select perm : Elapsed 0.024 ms (2.410 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.04% +0.29%] index_select perm_sorted : Elapsed 0.024 ms (2.416 ms / 100) B = [4, 16, 20, 5] (stride (1, 4, 64, 1280)) A = [4, 16, 40, 5] (stride (40, 160, 1, 2560)) dim = 2 2.453 -> 2.452 ( -0.04%) [ +0.04% +0.00% +0.12% / -0.04% +0.33% +0.29%] index_select const : Elapsed 0.025 ms (2.454 ms / 100) 2.466 -> 2.467 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.20% +0.04% +0.16%] index_select wrap : Elapsed 0.025 ms (2.466 ms / 100) 2.464 -> 2.468 ( +0.16%) [ +0.00% +0.04% +0.00% / +0.24% +0.16% +0.20%] index_select linear : Elapsed 0.025 ms (2.464 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.24% +0.00% +0.04% / +0.00% +0.04% +0.08%] index_select reverse : Elapsed 0.025 ms (2.470 ms / 100) 2.455 -> 2.457 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.16% +0.12% +0.08%] index_select skip64 : Elapsed 0.025 ms (2.455 ms / 100) 2.455 -> 2.451 ( -0.16%) [ +0.08% +0.00% +0.20% / -0.16% +0.24% +0.12%] index_select skip256 : Elapsed 0.025 ms (2.457 ms / 100) 2.470 -> 2.473 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.16% +0.28%] index_select spread : Elapsed 0.025 ms (2.473 ms / 100) 2.473 -> 2.473 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.04% +0.00% +0.00%] index_select strided 3 : Elapsed 0.025 ms (2.474 ms / 100) 2.472 -> 2.473 ( +0.04%) [ +0.12% +0.16% +0.00% / +0.12% +0.20% +0.04%] index_select strided 5 : Elapsed 0.025 ms (2.475 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.32% +0.24%] index_select strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.469 -> 2.468 ( -0.04%) [ +0.08% +0.00% +0.20% / -0.04% +0.24% +0.20%] index_select strided 8 : Elapsed 0.025 ms (2.471 ms / 100) 2.471 -> 2.473 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.12% +0.08% +0.16%] index_select strided 16 : Elapsed 0.025 ms (2.471 ms / 100) 2.471 -> 2.470 ( -0.04%) [ +0.00% +0.08% +0.20% / -0.04% +0.36% +0.28%] index_select random : Elapsed 0.025 ms (2.471 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.00% +0.24% +0.16% / +0.16% +0.08% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.470 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.12% +0.08% +0.04%] index_select perm : Elapsed 0.025 ms (2.473 ms / 100) 2.473 -> 2.467 ( -0.24%) [ +0.00% +0.16% +0.08% / +0.12% -0.24% -0.04%] index_select perm_sorted : Elapsed 0.025 ms (2.473 ms / 100) B = [4, 16, 20, 5] (stride (1, 4, 64, 1280)) A = [4, 16, 40, 5] (stride (16, 1, 64, 2560)) dim = 2 2.407 -> 2.406 ( -0.04%) [ +0.04% +0.17% +0.00% / -0.04% +0.17% +0.29%] index_select const : Elapsed 0.024 ms (2.408 ms / 100) 2.424 -> 2.418 ( -0.25%) [ +0.17% +0.17% +0.00% / +0.17% -0.17% -0.25%] index_select wrap : Elapsed 0.024 ms (2.428 ms / 100) 2.428 -> 2.417 ( -0.45%) [ +0.12% +0.00% +0.00% / -0.12% -0.45% -0.29%] index_select linear : Elapsed 0.024 ms (2.431 ms / 100) 2.427 -> 2.422 ( -0.21%) [ +0.16% +0.08% +0.00% / -0.04% +0.00% -0.21%] index_select reverse : Elapsed 0.024 ms (2.431 ms / 100) 2.409 -> 2.411 ( +0.08%) [ +0.21% +0.08% +0.00% / +0.08% +0.08% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.414 ms / 100) 2.407 -> 2.411 ( +0.17%) [ +0.00% +0.17% +0.04% / +0.17% +0.21% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.407 ms / 100) 2.418 -> 2.422 ( +0.17%) [ +0.17% +0.00% +0.17% / +0.17% +0.41% +0.37%] index_select spread : Elapsed 0.024 ms (2.422 ms / 100) 2.420 -> 2.421 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.04% +0.25% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.416 -> 2.414 ( -0.08%) [ +0.04% +0.00% +0.04% / -0.08% +0.12% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.417 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.21% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.427 ms / 100) 2.413 -> 2.413 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.00% +0.12% +0.37%] index_select strided 8 : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.410 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.12% +0.46%] index_select strided 16 : Elapsed 0.024 ms (2.413 ms / 100) 2.419 -> 2.424 ( +0.21%) [ +0.25% +0.21% +0.00% / +0.29% +0.33% +0.21%] index_select random : Elapsed 0.024 ms (2.425 ms / 100) 2.420 -> 2.423 ( +0.12%) [ +0.41% +0.29% +0.00% / +0.29% +0.21% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.430 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.41% +0.29%] index_select perm : Elapsed 0.024 ms (2.424 ms / 100) 2.422 -> 2.419 ( -0.12%) [ +0.00% +0.04% +0.08% / -0.12% +0.33% +0.50%] index_select perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) out_shape = [4, 16, 40, 20] in_shape = [4, 16, 40, 5] idx_dim = 3 B = [4, 16, 40, 20] (stride (12800, 40, 1, 640)) A = [4, 16, 40, 5] (stride (640, 1, 16, 2560)) dim = 3 1.751 -> 1.749 ( -0.11%) [ +0.00% +0.11% +0.00% / -0.11% +0.40% +0.23%] index_add_ linear : Elapsed 0.018 ms (1.751 ms / 100) 1.707 -> 1.710 ( +0.18%) [ +0.41% +0.00% +0.06% / +0.18% +0.41% +0.35%] index_copy_ linear : Elapsed 0.017 ms (1.714 ms / 100) 1.750 -> 1.749 ( -0.06%) [ +0.11% +0.06% +0.00% / -0.06% +0.57% +0.29%] index_add_ reverse : Elapsed 0.018 ms (1.752 ms / 100) 1.708 -> 1.710 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.47% +0.41%] index_copy_ reverse : Elapsed 0.017 ms (1.709 ms / 100) 1.756 -> 1.753 ( -0.17%) [ +0.11% +0.00% +0.00% / -0.17% -0.11% -0.06%] index_add_ spread : Elapsed 0.018 ms (1.758 ms / 100) 1.706 -> 1.706 ( +0.00%) [ +0.23% +0.00% +0.23% / +0.00% +0.23% +0.29%] index_copy_ spread : Elapsed 0.017 ms (1.710 ms / 100) 1.744 -> 1.747 ( +0.17%) [ +0.00% +0.23% +0.11% / +0.17% +1.15% +0.92%] index_add_ strided 3 : Elapsed 0.017 ms (1.744 ms / 100) 1.698 -> 1.701 ( +0.18%) [ +0.29% +0.35% +0.00% / +0.18% +1.06% +1.24%] index_copy_ strided 3 : Elapsed 0.017 ms (1.703 ms / 100) 1.764 -> 1.763 ( -0.06%) [ +0.00% +0.17% +0.06% / -0.06% +0.17% +0.23%] index_add_ strided 7 : Elapsed 0.018 ms (1.764 ms / 100) 1.715 -> 1.720 ( +0.29%) [ +0.17% +0.35% +0.00% / +0.29% +0.29% +0.52%] index_copy_ strided 7 : Elapsed 0.017 ms (1.718 ms / 100) 1.743 -> 1.742 ( -0.06%) [ +0.17% +0.00% +0.17% / -0.06% +1.49% +1.26%] index_add_ perm : Elapsed 0.017 ms (1.746 ms / 100) 1.696 -> 1.697 ( +0.06%) [ +0.12% +0.00% +0.18% / +0.06% +1.77% +1.77%] index_copy_ perm : Elapsed 0.017 ms (1.698 ms / 100) 1.747 -> 1.747 ( +0.00%) [ +0.00% +0.11% +0.06% / +0.00% +1.09% +1.03%] index_add_ perm_sorted : Elapsed 0.017 ms (1.747 ms / 100) 1.702 -> 1.699 ( -0.18%) [ +0.00% +0.06% +0.18% / -0.18% +1.35% +1.35%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.702 ms / 100) 8.176 -> 8.190 ( +0.17%) [ +0.00% +0.00% +0.20% / +0.17% +0.26% +0.20%] index_select const : Elapsed 0.082 ms (8.176 ms / 100) 8.192 -> 8.182 ( -0.12%) [ +0.27% +0.00% +0.17% / -0.12% +0.37% +0.18%] index_select wrap : Elapsed 0.082 ms (8.214 ms / 100) 8.195 -> 8.192 ( -0.04%) [ +0.00% +0.11% +0.00% / +0.07% -0.04% +0.02%] index_select linear : Elapsed 0.082 ms (8.195 ms / 100) 8.184 -> 8.188 ( +0.05%) [ +0.35% +0.00% +0.05% / +0.21% +0.05% +0.11%] index_select reverse : Elapsed 0.082 ms (8.213 ms / 100) 8.176 -> 8.180 ( +0.05%) [ +0.20% +0.00% +0.21% / +0.18% +0.05% +0.11%] index_select skip64 : Elapsed 0.082 ms (8.192 ms / 100) 8.185 -> 8.162 ( -0.28%) [ +0.00% +0.00% +0.04% / -0.28% +0.26% -0.13%] index_select skip256 : Elapsed 0.082 ms (8.185 ms / 100) 8.198 -> 8.190 ( -0.10%) [ +0.05% +0.15% +0.00% / +0.27% -0.04% -0.10%] index_select spread : Elapsed 0.082 ms (8.202 ms / 100) 8.206 -> 8.196 ( -0.12%) [ +0.04% +0.00% +0.00% / +0.28% -0.12% -0.05%] index_select strided 3 : Elapsed 0.082 ms (8.209 ms / 100) 8.203 -> 8.193 ( -0.12%) [ +0.00% +0.29% +0.07% / +0.02% +0.22% -0.12%] index_select random : Elapsed 0.082 ms (8.203 ms / 100) 8.198 -> 8.191 ( -0.09%) [ +0.10% +0.07% +0.00% / -0.09% -0.01% +0.00%] index_select random_sorted : Elapsed 0.082 ms (8.206 ms / 100) B = [4, 16, 40, 20] (stride (1, 3200, 4, 160)) A = [4, 16, 40, 5] (stride (1, 800, 20, 4)) dim = 3 1.782 -> 1.781 ( -0.06%) [ +0.11% +0.11% +0.00% / -0.06% +1.57% +1.40%] index_add_ linear : Elapsed 0.018 ms (1.784 ms / 100) 1.729 -> 1.730 ( +0.06%) [ +0.23% +0.00% +0.23% / +0.06% +1.56% +1.50%] index_copy_ linear : Elapsed 0.017 ms (1.733 ms / 100) 1.778 -> 1.777 ( -0.06%) [ +0.11% +0.11% +0.00% / -0.06% +1.46% +1.41%] index_add_ reverse : Elapsed 0.018 ms (1.780 ms / 100) 1.724 -> 1.725 ( +0.06%) [ +0.00% +0.12% +0.00% / +0.06% +1.74% +1.57%] index_copy_ reverse : Elapsed 0.017 ms (1.724 ms / 100) 1.785 -> 1.786 ( +0.06%) [ +0.06% +0.28% +0.00% / +0.06% +0.67% +0.62%] index_add_ spread : Elapsed 0.018 ms (1.786 ms / 100) 1.731 -> 1.734 ( +0.17%) [ +0.17% +0.23% +0.00% / +0.17% +1.10% +0.87%] index_copy_ spread : Elapsed 0.017 ms (1.734 ms / 100) 1.782 -> 1.784 ( +0.11%) [ +0.06% +0.11% +0.00% / +0.11% +0.73% +0.73%] index_add_ strided 3 : Elapsed 0.018 ms (1.783 ms / 100) 1.728 -> 1.729 ( +0.06%) [ +0.00% +0.23% +0.00% / +0.06% +0.98% +0.81%] index_copy_ strided 3 : Elapsed 0.017 ms (1.728 ms / 100) 1.792 -> 1.797 ( +0.28%) [ +0.06% +0.22% +0.00% / +0.28% +0.39% +0.33%] index_add_ strided 7 : Elapsed 0.018 ms (1.793 ms / 100) 1.741 -> 1.742 ( +0.06%) [ +0.06% +0.11% +0.00% / +0.11% +0.06% +0.46%] index_copy_ strided 7 : Elapsed 0.017 ms (1.742 ms / 100) 1.789 -> 1.788 ( -0.06%) [ +0.17% +0.00% +0.06% / -0.06% +0.61% +0.50%] index_add_ perm : Elapsed 0.018 ms (1.792 ms / 100) 1.737 -> 1.737 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.58% +0.69%] index_copy_ perm : Elapsed 0.017 ms (1.738 ms / 100) 1.785 -> 1.790 ( +0.28%) [ +0.06% +0.06% +0.00% / +0.28% +0.34% +0.50%] index_add_ perm_sorted : Elapsed 0.018 ms (1.786 ms / 100) 1.734 -> 1.730 ( -0.23%) [ +0.00% +0.00% +0.00% / -0.23% +0.40% +0.52%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.734 ms / 100) 8.217 -> 8.216 ( -0.01%) [ +0.13% +0.16% +0.00% / -0.01% +0.26% +0.24%] index_select const : Elapsed 0.082 ms (8.228 ms / 100) 8.226 -> 8.211 ( -0.18%) [ +0.10% +0.05% +0.00% / -0.18% +0.17% +0.15%] index_select wrap : Elapsed 0.082 ms (8.234 ms / 100) 8.224 -> 8.235 ( +0.13%) [ +0.17% +0.00% +0.11% / +0.28% +0.23% +0.13%] index_select linear : Elapsed 0.082 ms (8.238 ms / 100) 8.211 -> 8.217 ( +0.07%) [ +0.10% +0.00% +0.19% / +0.07% +0.22% +0.46%] index_select reverse : Elapsed 0.082 ms (8.219 ms / 100) 8.210 -> 8.226 ( +0.19%) [ +0.06% +0.10% +0.00% / +0.28% +0.19% +0.43%] index_select skip64 : Elapsed 0.082 ms (8.215 ms / 100) 8.220 -> 8.222 ( +0.02%) [ +0.00% +0.01% +0.16% / +0.02% +0.30% +0.09%] index_select skip256 : Elapsed 0.082 ms (8.220 ms / 100) 8.218 -> 8.225 ( +0.09%) [ +0.09% +0.00% +0.13% / +0.17% +0.11% +0.09%] index_select spread : Elapsed 0.082 ms (8.225 ms / 100) 8.227 -> 8.239 ( +0.15%) [ +0.05% +0.00% +0.04% / +0.34% +0.16% +0.15%] index_select strided 3 : Elapsed 0.082 ms (8.231 ms / 100) 8.214 -> 8.225 ( +0.13%) [ +0.00% +0.18% +0.18% / +0.13% +0.28% +0.30%] index_select random : Elapsed 0.082 ms (8.214 ms / 100) 8.227 -> 8.230 ( +0.04%) [ +0.01% +0.05% +0.00% / +0.04% +0.16% +0.04%] index_select random_sorted : Elapsed 0.082 ms (8.228 ms / 100) B = [4, 16, 40, 20] (stride (1, 3200, 4, 160)) A = [4, 16, 40, 5] (stride (1, 20, 320, 4)) dim = 3 1.937 -> 1.948 ( +0.57%) [ +0.00% +0.00% +0.62% / +0.57% +2.37% +2.48%] index_add_ linear : Elapsed 0.019 ms (1.937 ms / 100) 1.880 -> 1.893 ( +0.69%) [ +0.00% +0.05% +0.85% / +0.69% +2.45% +2.55%] index_copy_ linear : Elapsed 0.019 ms (1.880 ms / 100) 1.937 -> 1.949 ( +0.62%) [ +0.15% +0.00% +0.62% / +0.62% +2.17% +2.37%] index_add_ reverse : Elapsed 0.019 ms (1.940 ms / 100) 1.880 -> 1.892 ( +0.64%) [ +0.11% +0.00% +0.69% / +0.64% +2.23% +2.29%] index_copy_ reverse : Elapsed 0.019 ms (1.882 ms / 100) 1.952 -> 1.965 ( +0.67%) [ +0.10% +0.00% +0.77% / +0.67% +1.69% +1.64%] index_add_ spread : Elapsed 0.020 ms (1.954 ms / 100) 1.890 -> 1.905 ( +0.79%) [ +0.32% +0.00% +0.74% / +0.79% +1.85% +1.85%] index_copy_ spread : Elapsed 0.019 ms (1.896 ms / 100) 1.954 -> 1.957 ( +0.15%) [ +0.15% +0.00% +0.26% / +0.15% +1.02% +1.18%] index_add_ strided 3 : Elapsed 0.020 ms (1.957 ms / 100) 1.894 -> 1.903 ( +0.48%) [ +0.05% +0.00% +0.42% / +0.48% +1.58% +1.48%] index_copy_ strided 3 : Elapsed 0.019 ms (1.895 ms / 100) 1.958 -> 1.963 ( +0.26%) [ +0.00% +0.00% +0.36% / +0.26% +0.66% +0.92%] index_add_ strided 7 : Elapsed 0.020 ms (1.958 ms / 100) 1.896 -> 1.906 ( +0.53%) [ +0.00% +0.16% +0.42% / +0.53% +1.05% +1.00%] index_copy_ strided 7 : Elapsed 0.019 ms (1.896 ms / 100) 1.957 -> 1.962 ( +0.26%) [ +0.00% +0.20% +0.41% / +0.26% +1.33% +1.48%] index_add_ perm : Elapsed 0.020 ms (1.957 ms / 100) 1.896 -> 1.903 ( +0.37%) [ +0.00% +0.16% +0.42% / +0.37% +1.42% +1.64%] index_copy_ perm : Elapsed 0.019 ms (1.896 ms / 100) 1.948 -> 1.954 ( +0.31%) [ +0.00% +0.21% +0.36% / +0.31% +1.44% +1.54%] index_add_ perm_sorted : Elapsed 0.019 ms (1.948 ms / 100) 1.890 -> 1.896 ( +0.32%) [ +0.00% +0.11% +0.32% / +0.32% +1.43% +1.43%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.890 ms / 100) 8.557 -> 8.544 ( -0.15%) [ +0.07% +0.13% +0.00% / -0.15% +0.08% -0.06%] index_select const : Elapsed 0.086 ms (8.563 ms / 100) 8.543 -> 8.561 ( +0.21%) [ +0.20% +0.12% +0.00% / +0.21% +0.37% +0.42%] index_select wrap : Elapsed 0.086 ms (8.560 ms / 100) 8.546 -> 8.551 ( +0.06%) [ +0.00% +0.20% +0.18% / +0.06% +0.54% +0.22%] index_select linear : Elapsed 0.085 ms (8.546 ms / 100) 8.550 -> 8.550 ( +0.00%) [ +0.00% +0.14% +0.11% / +0.00% +0.13% +0.26%] index_select reverse : Elapsed 0.085 ms (8.550 ms / 100) 8.527 -> 8.534 ( +0.08%) [ +0.11% +0.00% +0.26% / +0.08% +0.65% +0.35%] index_select skip64 : Elapsed 0.085 ms (8.536 ms / 100) 8.546 -> 8.538 ( -0.09%) [ +0.12% +0.20% +0.00% / -0.09% +0.25% +0.29%] index_select skip256 : Elapsed 0.086 ms (8.556 ms / 100) 8.561 -> 8.575 ( +0.16%) [ +0.00% +0.41% +0.19% / +0.22% +0.16% +0.26%] index_select spread : Elapsed 0.086 ms (8.561 ms / 100) 8.554 -> 8.559 ( +0.06%) [ +0.20% +0.00% +0.14% / +0.12% +0.20% +0.06%] index_select strided 3 : Elapsed 0.086 ms (8.571 ms / 100) 8.555 -> 8.573 ( +0.21%) [ +0.25% +0.32% +0.00% / +0.29% +0.22% +0.21%] index_select random : Elapsed 0.086 ms (8.576 ms / 100) 8.566 -> 8.573 ( +0.08%) [ +0.00% +0.19% +0.14% / +0.08% +0.29% +0.64%] index_select random_sorted : Elapsed 0.086 ms (8.566 ms / 100) B = [4, 16, 40, 20] (stride (20, 80, 1280, 1)) A = [4, 16, 40, 5] (stride (3200, 200, 1, 40)) dim = 3 1.811 -> 1.794 ( -0.94%) [ +0.00% +0.33% +0.00% / +0.06% -0.66% -0.94%] index_add_ linear : Elapsed 0.018 ms (1.811 ms / 100) 1.772 -> 1.754 ( -1.02%) [ +0.00% +0.00% +0.11% / -0.17% -1.02% -1.02%] index_copy_ linear : Elapsed 0.018 ms (1.772 ms / 100) 1.810 -> 1.790 ( -1.10%) [ +0.28% +0.17% +0.00% / -0.17% -1.05% -1.10%] index_add_ reverse : Elapsed 0.018 ms (1.815 ms / 100) 1.767 -> 1.752 ( -0.85%) [ +0.00% +0.51% +0.34% / +0.11% -0.85% -0.79%] index_copy_ reverse : Elapsed 0.018 ms (1.767 ms / 100) 1.828 -> 1.802 ( -1.42%) [ +0.27% +0.38% +0.00% / +0.11% -1.42% -0.88%] index_add_ spread : Elapsed 0.018 ms (1.833 ms / 100) 1.794 -> 1.774 ( -1.11%) [ +0.39% +0.00% +0.11% / +0.11% -1.06% -1.11%] index_copy_ spread : Elapsed 0.018 ms (1.801 ms / 100) 1.832 -> 1.806 ( -1.42%) [ +0.38% +0.11% +0.00% / +0.00% -0.98% -1.42%] index_add_ strided 3 : Elapsed 0.018 ms (1.839 ms / 100) 1.796 -> 1.777 ( -1.06%) [ +0.11% +0.28% +0.00% / +0.17% -0.95% -1.06%] index_copy_ strided 3 : Elapsed 0.018 ms (1.798 ms / 100) 1.833 -> 1.808 ( -1.36%) [ +0.11% +0.22% +0.00% / +0.00% -1.36% -1.25%] index_add_ strided 7 : Elapsed 0.018 ms (1.835 ms / 100) 1.796 -> 1.776 ( -1.11%) [ +0.00% +0.28% +0.22% / +0.17% -0.95% -1.11%] index_copy_ strided 7 : Elapsed 0.018 ms (1.796 ms / 100) 1.823 -> 1.805 ( -0.99%) [ +0.16% +0.00% +0.16% / -0.16% -0.99% -0.77%] index_add_ perm : Elapsed 0.018 ms (1.826 ms / 100) 1.782 -> 1.766 ( -0.90%) [ +0.00% +0.51% +0.34% / +0.34% -0.51% -0.90%] index_copy_ perm : Elapsed 0.018 ms (1.782 ms / 100) 1.821 -> 1.805 ( -0.88%) [ +0.00% +0.11% +0.05% / -0.22% -0.88% -0.82%] index_add_ perm_sorted : Elapsed 0.018 ms (1.821 ms / 100) 1.783 -> 1.765 ( -1.01%) [ +0.00% +0.17% +0.11% / +0.17% -0.95% -1.01%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.783 ms / 100) 8.248 -> 8.261 ( +0.16%) [ +0.19% +0.29% +0.00% / +0.16% +0.45% +0.58%] index_select const : Elapsed 0.083 ms (8.264 ms / 100) 8.299 -> 8.310 ( +0.13%) [ +0.00% +0.16% +0.13% / +0.33% +0.13% +0.61%] index_select wrap : Elapsed 0.083 ms (8.299 ms / 100) 8.296 -> 8.284 ( -0.14%) [ +0.00% +0.04% +0.19% / -0.14% +0.41% +0.40%] index_select linear : Elapsed 0.083 ms (8.296 ms / 100) 8.313 -> 8.323 ( +0.12%) [ +0.12% +0.01% +0.00% / +0.14% +0.19% +0.12%] index_select reverse : Elapsed 0.083 ms (8.323 ms / 100) 8.254 -> 8.264 ( +0.12%) [ +0.06% +0.00% +0.13% / +0.12% +0.47% +0.21%] index_select skip64 : Elapsed 0.083 ms (8.259 ms / 100) 8.260 -> 8.266 ( +0.07%) [ +0.00% +0.02% +0.06% / +0.07% +0.21% +0.22%] index_select skip256 : Elapsed 0.083 ms (8.260 ms / 100) 8.288 -> 8.295 ( +0.08%) [ +0.00% +0.14% +0.18% / +0.08% +0.45% +0.41%] index_select spread : Elapsed 0.083 ms (8.288 ms / 100) 8.295 -> 8.315 ( +0.24%) [ +0.20% +0.00% +0.31% / +0.24% +0.39% +0.30%] index_select strided 3 : Elapsed 0.083 ms (8.312 ms / 100) 8.314 -> 8.325 ( +0.13%) [ +0.22% +0.00% +0.18% / +0.13% +0.38% +0.17%] index_select random : Elapsed 0.083 ms (8.332 ms / 100) 8.308 -> 8.296 ( -0.14%) [ +0.08% +0.01% +0.00% / -0.14% +0.28% +0.30%] index_select random_sorted : Elapsed 0.083 ms (8.315 ms / 100) B = [4, 16, 40, 20] (stride (1, 80, 1280, 4)) A = [4, 16, 40, 5] (stride (3200, 40, 1, 640)) dim = 3 1.766 -> 1.768 ( +0.11%) [ +0.00% +0.00% +0.06% / +0.11% +0.74% +0.57%] index_add_ linear : Elapsed 0.018 ms (1.766 ms / 100) 1.722 -> 1.724 ( +0.12%) [ +0.00% +0.17% +0.23% / +0.12% +0.81% +0.75%] index_copy_ linear : Elapsed 0.017 ms (1.722 ms / 100) 1.774 -> 1.772 ( -0.11%) [ +0.06% +0.00% +0.06% / -0.06% -0.11% +0.17%] index_add_ reverse : Elapsed 0.018 ms (1.775 ms / 100) 1.736 -> 1.728 ( -0.46%) [ +0.12% +0.00% +0.00% / +0.00% -0.40% -0.46%] index_copy_ reverse : Elapsed 0.017 ms (1.738 ms / 100) 1.808 -> 1.807 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.22% +0.11%] index_add_ spread : Elapsed 0.018 ms (1.808 ms / 100) 1.768 -> 1.770 ( +0.11%) [ +0.00% +0.00% +0.06% / +0.11% +0.45% +0.51%] index_copy_ spread : Elapsed 0.018 ms (1.768 ms / 100) 1.802 -> 1.801 ( -0.06%) [ +0.00% +0.06% +0.11% / -0.06% +0.78% +0.50%] index_add_ strided 3 : Elapsed 0.018 ms (1.802 ms / 100) 1.759 -> 1.759 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.91% +0.97%] index_copy_ strided 3 : Elapsed 0.018 ms (1.759 ms / 100) 1.793 -> 1.794 ( +0.06%) [ +0.33% +0.00% +0.06% / +0.06% +0.11% +0.39%] index_add_ strided 7 : Elapsed 0.018 ms (1.799 ms / 100) 1.753 -> 1.755 ( +0.11%) [ +0.00% +0.06% +0.11% / +0.11% +0.51% +0.57%] index_copy_ strided 7 : Elapsed 0.018 ms (1.753 ms / 100) 1.800 -> 1.802 ( +0.11%) [ +0.00% +0.11% +0.28% / +0.11% +0.11% +0.17%] index_add_ perm : Elapsed 0.018 ms (1.800 ms / 100) 1.760 -> 1.764 ( +0.23%) [ +0.06% +0.34% +0.00% / +0.23% +0.57% +0.68%] index_copy_ perm : Elapsed 0.018 ms (1.761 ms / 100) 1.796 -> 1.797 ( +0.06%) [ +0.00% +0.17% +0.11% / +0.06% +0.56% +0.61%] index_add_ perm_sorted : Elapsed 0.018 ms (1.796 ms / 100) 1.757 -> 1.755 ( -0.11%) [ +0.06% +0.11% +0.00% / -0.11% +0.97% +0.91%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.758 ms / 100) 8.230 -> 8.236 ( +0.07%) [ +0.00% +0.11% +0.18% / +0.27% +0.07% +0.07%] index_select const : Elapsed 0.082 ms (8.230 ms / 100) 8.285 -> 8.276 ( -0.11%) [ +0.11% +0.00% +0.08% / +0.12% -0.11% +0.00%] index_select wrap : Elapsed 0.083 ms (8.294 ms / 100) 8.262 -> 8.271 ( +0.11%) [ +0.08% +0.24% +0.00% / +0.11% +0.18% +0.30%] index_select linear : Elapsed 0.083 ms (8.269 ms / 100) 8.272 -> 8.283 ( +0.13%) [ +0.00% +0.11% +0.17% / +0.16% +0.13% +0.21%] index_select reverse : Elapsed 0.083 ms (8.272 ms / 100) 8.225 -> 8.232 ( +0.09%) [ +0.00% +0.40% +0.04% / +0.12% +0.09% +0.13%] index_select skip64 : Elapsed 0.082 ms (8.225 ms / 100) 8.236 -> 8.230 ( -0.07%) [ +0.05% +0.00% +0.04% / +0.29% -0.07% +0.11%] index_select skip256 : Elapsed 0.082 ms (8.240 ms / 100) 8.256 -> 8.276 ( +0.24%) [ +0.00% +0.00% +0.21% / +0.52% +0.45% +0.24%] index_select spread : Elapsed 0.083 ms (8.256 ms / 100) 8.284 -> 8.297 ( +0.16%) [ +0.16% +0.00% +0.11% / +0.23% +0.16% +0.28%] index_select strided 3 : Elapsed 0.083 ms (8.297 ms / 100) 8.272 -> 8.281 ( +0.11%) [ +0.10% +0.02% +0.00% / +0.18% +0.11% +0.37%] index_select random : Elapsed 0.083 ms (8.280 ms / 100) 8.267 -> 8.278 ( +0.13%) [ +0.08% +0.00% +0.01% / +0.19% +0.25% +0.13%] index_select random_sorted : Elapsed 0.083 ms (8.274 ms / 100) B = [4, 16, 40, 20] (stride (640, 40, 1, 2560)) A = [4, 16, 40, 5] (stride (40, 160, 1, 2560)) dim = 3 1.613 -> 1.614 ( +0.06%) [ +0.06% +0.12% +0.00% / +0.06% +0.93% +0.81%] index_add_ linear : Elapsed 0.016 ms (1.614 ms / 100) 1.574 -> 1.575 ( +0.06%) [ +0.13% +0.13% +0.00% / +0.06% +0.76% +0.89%] index_copy_ linear : Elapsed 0.016 ms (1.576 ms / 100) 1.609 -> 1.614 ( +0.31%) [ +0.37% +0.12% +0.00% / +0.31% +0.75% +0.81%] index_add_ reverse : Elapsed 0.016 ms (1.615 ms / 100) 1.572 -> 1.573 ( +0.06%) [ +0.19% +0.00% +0.32% / +0.06% +0.76% +0.57%] index_copy_ reverse : Elapsed 0.016 ms (1.575 ms / 100) 1.598 -> 1.600 ( +0.13%) [ +0.19% +0.31% +0.00% / +0.13% +1.88% +1.38%] index_add_ spread : Elapsed 0.016 ms (1.601 ms / 100) 1.563 -> 1.566 ( +0.19%) [ +0.13% +0.00% +0.19% / +0.19% +1.41% +1.28%] index_copy_ spread : Elapsed 0.016 ms (1.565 ms / 100) 1.612 -> 1.617 ( +0.31%) [ +0.00% +0.25% +0.25% / +0.31% +0.99% +1.12%] index_add_ strided 3 : Elapsed 0.016 ms (1.612 ms / 100) 1.571 -> 1.571 ( +0.00%) [ +0.19% +0.00% +0.19% / +0.00% +1.40% +1.27%] index_copy_ strided 3 : Elapsed 0.016 ms (1.574 ms / 100) 1.605 -> 1.607 ( +0.12%) [ +0.25% +0.44% +0.00% / +0.12% +1.06% +0.69%] index_add_ strided 7 : Elapsed 0.016 ms (1.609 ms / 100) 1.573 -> 1.574 ( +0.06%) [ +0.06% +0.13% +0.00% / +0.06% +0.38% +0.64%] index_copy_ strided 7 : Elapsed 0.016 ms (1.574 ms / 100) 1.605 -> 1.609 ( +0.25%) [ +0.12% +0.06% +0.00% / +0.25% +1.87% +1.68%] index_add_ perm : Elapsed 0.016 ms (1.607 ms / 100) 1.570 -> 1.569 ( -0.06%) [ +0.06% +0.13% +0.00% / -0.06% +1.46% +1.59%] index_copy_ perm : Elapsed 0.016 ms (1.571 ms / 100) 1.606 -> 1.606 ( +0.00%) [ +0.00% +0.31% +0.06% / +0.00% +1.62% +1.87%] index_add_ perm_sorted : Elapsed 0.016 ms (1.606 ms / 100) 1.567 -> 1.570 ( +0.19%) [ +0.00% +1.02% +0.51% / +0.19% +1.79% +1.79%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.567 ms / 100) 7.864 -> 7.866 ( +0.03%) [ +0.18% +0.15% +0.00% / +0.03% +0.28% +0.18%] index_select const : Elapsed 0.079 ms (7.878 ms / 100) 7.911 -> 7.916 ( +0.06%) [ +0.03% +0.19% +0.00% / +0.06% +0.08% +0.14%] index_select wrap : Elapsed 0.079 ms (7.913 ms / 100) 7.899 -> 7.897 ( -0.03%) [ +0.01% +0.27% +0.00% / -0.03% +0.09% +0.35%] index_select linear : Elapsed 0.079 ms (7.900 ms / 100) 7.916 -> 7.906 ( -0.13%) [ +0.11% +0.09% +0.00% / -0.01% -0.13% +0.27%] index_select reverse : Elapsed 0.079 ms (7.925 ms / 100) 7.871 -> 7.862 ( -0.11%) [ +0.00% +0.23% +0.00% / -0.11% +0.23% +0.05%] index_select skip64 : Elapsed 0.079 ms (7.871 ms / 100) 7.864 -> 7.864 ( +0.00%) [ +0.00% +0.05% +0.32% / +0.00% +0.55% +0.28%] index_select skip256 : Elapsed 0.079 ms (7.864 ms / 100) 7.903 -> 7.907 ( +0.05%) [ +0.11% +0.00% +0.14% / +0.05% +0.15% +0.19%] index_select spread : Elapsed 0.079 ms (7.912 ms / 100) 7.897 -> 7.924 ( +0.34%) [ +0.00% +0.22% +0.18% / +0.34% +0.42% +0.48%] index_select strided 3 : Elapsed 0.079 ms (7.897 ms / 100) 7.898 -> 7.895 ( -0.04%) [ +0.00% +0.13% +0.08% / -0.04% +0.34% +0.27%] index_select random : Elapsed 0.079 ms (7.898 ms / 100) 7.881 -> 7.896 ( +0.19%) [ +0.00% +0.22% +0.29% / +0.44% +0.19% +0.44%] index_select random_sorted : Elapsed 0.079 ms (7.881 ms / 100) B = [4, 16, 40, 20] (stride (40, 160, 1, 2560)) A = [4, 16, 40, 5] (stride (1, 800, 20, 4)) dim = 3 1.800 -> 1.805 ( +0.28%) [ +0.00% +0.06% +0.33% / +0.28% +0.89% +1.17%] index_add_ linear : Elapsed 0.018 ms (1.800 ms / 100) 1.746 -> 1.748 ( +0.11%) [ +0.00% +0.17% +0.29% / +0.11% +0.97% +1.15%] index_copy_ linear : Elapsed 0.017 ms (1.746 ms / 100) 1.800 -> 1.805 ( +0.28%) [ +0.00% +0.06% +0.39% / +0.28% +0.94% +1.00%] index_add_ reverse : Elapsed 0.018 ms (1.800 ms / 100) 1.748 -> 1.747 ( -0.06%) [ +0.00% +0.11% +0.23% / -0.06% +0.80% +0.86%] index_copy_ reverse : Elapsed 0.017 ms (1.748 ms / 100) 1.793 -> 1.791 ( -0.11%) [ +0.00% +0.06% +0.11% / -0.11% +1.23% +1.28%] index_add_ spread : Elapsed 0.018 ms (1.793 ms / 100) 1.735 -> 1.737 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.12% +1.44% +1.44%] index_copy_ spread : Elapsed 0.017 ms (1.737 ms / 100) 1.798 -> 1.799 ( +0.06%) [ +0.00% +0.22% +0.44% / +0.06% +1.45% +1.22%] index_add_ strided 3 : Elapsed 0.018 ms (1.798 ms / 100) 1.743 -> 1.746 ( +0.17%) [ +0.00% +0.06% +0.11% / +0.17% +1.66% +1.61%] index_copy_ strided 3 : Elapsed 0.017 ms (1.743 ms / 100) 1.794 -> 1.803 ( +0.50%) [ +0.06% +0.00% +0.56% / +0.50% +0.89% +1.11%] index_add_ strided 7 : Elapsed 0.018 ms (1.795 ms / 100) 1.741 -> 1.752 ( +0.63%) [ +0.11% +0.00% +0.46% / +0.63% +0.86% +1.15%] index_copy_ strided 7 : Elapsed 0.017 ms (1.743 ms / 100) 1.783 -> 1.795 ( +0.67%) [ +0.06% +0.00% +0.56% / +0.67% +1.79% +1.85%] index_add_ perm : Elapsed 0.018 ms (1.784 ms / 100) 1.730 -> 1.739 ( +0.52%) [ +0.00% +0.17% +0.58% / +0.52% +1.73% +1.56%] index_copy_ perm : Elapsed 0.017 ms (1.730 ms / 100) 1.790 -> 1.800 ( +0.56%) [ +0.00% +0.17% +0.39% / +0.56% +1.68% +1.90%] index_add_ perm_sorted : Elapsed 0.018 ms (1.790 ms / 100) 1.739 -> 1.745 ( +0.35%) [ +0.00% +0.17% +0.35% / +0.35% +1.44% +1.55%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.739 ms / 100) 8.200 -> 8.209 ( +0.11%) [ +0.00% +0.23% +0.28% / +0.11% +0.51% +0.33%] index_select const : Elapsed 0.082 ms (8.200 ms / 100) 8.209 -> 8.231 ( +0.27%) [ +0.29% +0.00% +0.06% / +0.27% +0.54% +0.35%] index_select wrap : Elapsed 0.082 ms (8.233 ms / 100) 8.219 -> 8.213 ( -0.07%) [ +0.01% +0.07% +0.00% / -0.07% +0.17% +0.09%] index_select linear : Elapsed 0.082 ms (8.220 ms / 100) 8.208 -> 8.222 ( +0.17%) [ +0.10% +0.00% +0.27% / +0.26% +0.60% +0.17%] index_select reverse : Elapsed 0.082 ms (8.216 ms / 100) 8.216 -> 8.219 ( +0.04%) [ +0.00% +0.13% +0.19% / +0.04% +0.19% +0.21%] index_select skip64 : Elapsed 0.082 ms (8.216 ms / 100) 8.208 -> 8.224 ( +0.19%) [ +0.00% +0.16% +0.09% / +0.33% +0.19% +0.33%] index_select skip256 : Elapsed 0.082 ms (8.208 ms / 100) 8.223 -> 8.222 ( -0.01%) [ +0.41% +0.00% +0.16% / +0.15% +0.45% -0.01%] index_select spread : Elapsed 0.083 ms (8.257 ms / 100) 8.211 -> 8.216 ( +0.06%) [ +0.06% +0.00% +0.43% / +0.06% +0.35% +0.30%] index_select strided 3 : Elapsed 0.082 ms (8.216 ms / 100) 8.218 -> 8.220 ( +0.02%) [ +0.05% +0.16% +0.00% / +0.07% +0.35% +0.02%] index_select random : Elapsed 0.082 ms (8.222 ms / 100) 8.207 -> 8.238 ( +0.38%) [ +0.00% +0.13% +0.18% / +0.51% +0.45% +0.38%] index_select random_sorted : Elapsed 0.082 ms (8.207 ms / 100) out_shape = [20, 40, 5, 16] in_shape = [4, 40, 5, 16] idx_dim = 0 B = [20, 40, 5, 16] (stride (3200, 1, 640, 40)) A = [4, 40, 5, 16] (stride (3200, 5, 1, 200)) dim = 0 0.770 -> 0.772 ( +0.26%) [ +0.00% +0.26% +0.00% / +0.26% +2.21% +2.34%] index_add_ linear : Elapsed 0.008 ms (0.770 ms / 100) 0.785 -> 0.787 ( +0.25%) [ +0.76% +0.25% +0.00% / +0.25% +2.42% +2.80%] index_copy_ linear : Elapsed 0.008 ms (0.791 ms / 100) 0.769 -> 0.774 ( +0.65%) [ +0.39% +0.00% +0.52% / +0.65% +1.17% +1.17%] index_add_ reverse : Elapsed 0.008 ms (0.772 ms / 100) 0.789 -> 0.789 ( +0.00%) [ +0.00% +0.51% +0.51% / +0.00% +0.63% +0.89%] index_copy_ reverse : Elapsed 0.008 ms (0.789 ms / 100) 0.795 -> 0.774 ( -2.64%) [ +0.25% +0.25% +0.00% / +0.38% -2.39% -2.64%] index_add_ spread : Elapsed 0.008 ms (0.797 ms / 100) 0.813 -> 0.795 ( -2.21%) [ +0.49% +0.00% +0.49% / +0.37% -2.21% -2.09%] index_copy_ spread : Elapsed 0.008 ms (0.817 ms / 100) 0.776 -> 0.771 ( -0.64%) [ +0.52% +0.26% +0.00% / +0.26% +0.13% -0.64%] index_add_ strided 3 : Elapsed 0.008 ms (0.780 ms / 100) 0.795 -> 0.794 ( -0.13%) [ +0.13% +0.13% +0.00% / +0.00% -0.13% -0.13%] index_copy_ strided 3 : Elapsed 0.008 ms (0.796 ms / 100) 0.772 -> 0.772 ( +0.00%) [ +0.52% +0.00% +0.39% / +0.00% +1.04% +0.39%] index_add_ strided 7 : Elapsed 0.008 ms (0.776 ms / 100) 0.794 -> 0.792 ( -0.25%) [ +0.25% +0.00% +0.38% / -0.25% +0.63% +0.63%] index_copy_ strided 7 : Elapsed 0.008 ms (0.796 ms / 100) 0.772 -> 0.773 ( +0.13%) [ +0.39% +0.00% +0.13% / +0.13% +0.78% +1.04%] index_add_ perm : Elapsed 0.008 ms (0.775 ms / 100) 0.793 -> 0.792 ( -0.13%) [ +0.00% +0.13% +0.38% / -0.13% -0.13% +0.00%] index_copy_ perm : Elapsed 0.008 ms (0.793 ms / 100) 0.771 -> 0.771 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.39% +0.91%] index_add_ perm_sorted : Elapsed 0.008 ms (0.771 ms / 100) 0.788 -> 0.790 ( +0.25%) [ +0.25% +0.63% +0.00% / +0.25% +1.14% +1.14%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.790 ms / 100) 5.056 -> 5.057 ( +0.02%) [ +0.00% +0.24% +0.24% / +0.02% +0.47% +0.42%] index_select const : Elapsed 0.051 ms (5.056 ms / 100) 5.115 -> 5.125 ( +0.20%) [ +0.00% +0.04% +0.18% / +0.20% +0.23% +0.33%] index_select wrap : Elapsed 0.051 ms (5.115 ms / 100) 5.108 -> 5.113 ( +0.10%) [ +0.41% +0.16% +0.00% / +0.10% +0.49% +0.22%] index_select linear : Elapsed 0.051 ms (5.129 ms / 100) 5.097 -> 5.099 ( +0.04%) [ +0.02% +0.00% +0.04% / +0.04% +0.16% +0.04%] index_select reverse : Elapsed 0.051 ms (5.098 ms / 100) 5.069 -> 5.065 ( -0.08%) [ +0.04% +0.00% +0.10% / +0.00% -0.08% +0.04%] index_select skip64 : Elapsed 0.051 ms (5.071 ms / 100) 5.056 -> 5.066 ( +0.20%) [ +0.32% +0.00% +0.42% / +0.20% +0.47% +0.38%] index_select skip256 : Elapsed 0.051 ms (5.072 ms / 100) 5.091 -> 5.098 ( +0.14%) [ +0.00% +0.04% +0.22% / +0.14% +0.31% +0.31%] index_select spread : Elapsed 0.051 ms (5.091 ms / 100) 5.112 -> 5.111 ( -0.02%) [ +0.00% +0.12% +0.06% / -0.02% +0.25% +0.20%] index_select strided 3 : Elapsed 0.051 ms (5.112 ms / 100) 5.105 -> 5.118 ( +0.25%) [ +0.18% +0.12% +0.00% / +0.31% +0.27% +0.25%] index_select random : Elapsed 0.051 ms (5.114 ms / 100) 5.096 -> 5.101 ( +0.10%) [ +0.00% +0.04% +0.20% / +0.10% +0.10% +0.41%] index_select random_sorted : Elapsed 0.051 ms (5.096 ms / 100) B = [20, 40, 5, 16] (stride (80, 1600, 16, 1)) A = [4, 40, 5, 16] (stride (16, 320, 64, 1)) dim = 0 0.737 -> 0.739 ( +0.27%) [ +0.54% +0.54% +0.00% / +0.27% +1.09% +0.41%] index_add_ linear : Elapsed 0.007 ms (0.741 ms / 100) 0.727 -> 0.724 ( -0.41%) [ +0.00% +0.28% +0.28% / -0.41% +0.00% -0.41%] index_copy_ linear : Elapsed 0.007 ms (0.727 ms / 100) 0.748 -> 0.741 ( -0.94%) [ +0.27% +0.13% +0.00% / +0.53% -0.67% -0.94%] index_add_ reverse : Elapsed 0.007 ms (0.750 ms / 100) 0.733 -> 0.723 ( -1.36%) [ +0.41% +0.41% +0.00% / +0.82% -1.36% -0.95%] index_copy_ reverse : Elapsed 0.007 ms (0.736 ms / 100) 0.751 -> 0.751 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.53% +0.27%] index_add_ spread : Elapsed 0.008 ms (0.751 ms / 100) 0.733 -> 0.734 ( +0.14%) [ +0.27% +0.27% +0.00% / +0.14% +0.55% +0.14%] index_copy_ spread : Elapsed 0.007 ms (0.735 ms / 100) 0.749 -> 0.750 ( +0.13%) [ +0.27% +0.00% +0.13% / +0.13% +5.07% +2.94%] index_add_ strided 3 : Elapsed 0.008 ms (0.751 ms / 100) 0.735 -> 0.737 ( +0.27%) [ +0.27% +0.41% +0.00% / +0.27% +3.27% +3.13%] index_copy_ strided 3 : Elapsed 0.007 ms (0.737 ms / 100) 0.745 -> 0.746 ( +0.13%) [ +0.81% +0.40% +0.00% / +0.13% +0.54% +0.67%] index_add_ strided 7 : Elapsed 0.008 ms (0.751 ms / 100) 0.731 -> 0.730 ( -0.14%) [ +0.68% +0.68% +0.00% / +0.68% +0.14% -0.14%] index_copy_ strided 7 : Elapsed 0.007 ms (0.736 ms / 100) 0.747 -> 0.746 ( -0.13%) [ +0.54% +0.27% +0.00% / -0.13% +8.84% +9.10%] index_add_ perm : Elapsed 0.008 ms (0.751 ms / 100) 0.731 -> 0.732 ( +0.14%) [ +0.00% +0.27% +1.78% / +0.14% +7.39% +8.76%] index_copy_ perm : Elapsed 0.007 ms (0.731 ms / 100) 0.745 -> 0.748 ( +0.40%) [ +0.27% +0.27% +0.00% / +0.40% +7.65% +8.86%] index_add_ perm_sorted : Elapsed 0.007 ms (0.747 ms / 100) 0.729 -> 0.733 ( +0.55%) [ +0.00% +0.27% +0.41% / +0.55% +7.96% +8.23%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.729 ms / 100) 4.960 -> 4.962 ( +0.04%) [ +0.08% +0.00% +0.30% / +0.04% +0.40% +0.24%] index_select const : Elapsed 0.050 ms (4.964 ms / 100) 4.999 -> 4.994 ( -0.10%) [ +0.00% +0.04% +0.08% / -0.10% +0.20% +0.24%] index_select wrap : Elapsed 0.050 ms (4.999 ms / 100) 4.986 -> 4.995 ( +0.18%) [ +0.00% +0.02% +0.14% / +0.22% +0.18% +0.40%] index_select linear : Elapsed 0.050 ms (4.986 ms / 100) 4.984 -> 4.997 ( +0.26%) [ +0.00% +0.26% +0.34% / +0.26% +0.28% +0.38%] index_select reverse : Elapsed 0.050 ms (4.984 ms / 100) 4.966 -> 4.961 ( -0.10%) [ +0.12% +0.00% +0.14% / -0.10% +0.02% +0.20%] index_select skip64 : Elapsed 0.050 ms (4.972 ms / 100) 4.955 -> 4.953 ( -0.04%) [ +0.18% +0.00% +0.00% / -0.04% +0.18% +0.32%] index_select skip256 : Elapsed 0.050 ms (4.964 ms / 100) 4.993 -> 4.996 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.20% +0.16%] index_select spread : Elapsed 0.050 ms (4.999 ms / 100) 4.990 -> 4.993 ( +0.06%) [ +0.00% +0.34% +0.14% / +0.06% +0.44% +0.46%] index_select strided 3 : Elapsed 0.050 ms (4.990 ms / 100) 4.995 -> 5.005 ( +0.20%) [ +0.02% +0.00% +0.22% / +0.20% +0.44% +0.36%] index_select random : Elapsed 0.050 ms (4.996 ms / 100) 4.991 -> 4.995 ( +0.08%) [ +0.00% +0.10% +0.22% / +0.08% +0.34% +0.26%] index_select random_sorted : Elapsed 0.050 ms (4.991 ms / 100) B = [20, 40, 5, 16] (stride (16, 1600, 320, 1)) dim = 0 fill_cnt = 4 0.485 -> 0.486 ( +0.21%) [ +0.00% +0.62% +0.21% / +0.21% +0.41% +0.41%] index_fill_ const : Elapsed 0.005 ms (0.485 ms / 100) 0.486 -> 0.486 ( +0.00%) [ +0.00% +1.03% +6.17% / +0.00% +0.00% +0.21%] index_fill_ linear : Elapsed 0.005 ms (0.486 ms / 100) 0.485 -> 0.486 ( +0.21%) [ +0.21% +0.21% +0.00% / +0.21% +0.41% +0.62%] index_fill_ reverse : Elapsed 0.005 ms (0.486 ms / 100) 0.486 -> 0.486 ( +0.00%) [ +0.00% +0.21% +0.21% / +0.00% +0.00% +0.00%] index_fill_ skip64 : Elapsed 0.005 ms (0.486 ms / 100) 0.486 -> 0.486 ( +0.00%) [ +0.21% +0.00% +3.09% / +0.00% +0.21% +0.21%] index_fill_ skip256 : Elapsed 0.005 ms (0.487 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +5.36% +0.21% +0.00% / +0.41% +2.89% +0.41%] index_fill_ spread : Elapsed 0.005 ms (0.511 ms / 100) 0.485 -> 0.485 ( +0.00%) [ +0.21% +0.41% +0.00% / +0.21% +0.00% +0.41%] index_fill_ strided 3 : Elapsed 0.005 ms (0.486 ms / 100) 0.484 -> 0.486 ( +0.41%) [ +0.41% +0.41% +0.00% / +0.41% +0.62% +0.41%] index_fill_ strided 5 : Elapsed 0.005 ms (0.486 ms / 100) 0.485 -> 0.486 ( +0.21%) [ +0.00% +0.41% +0.21% / +0.21% +0.21% +0.21%] index_fill_ strided 7 : Elapsed 0.005 ms (0.485 ms / 100) 0.486 -> 0.486 ( +0.00%) [ +0.00% +0.00% +1.03% / +0.21% +0.00% +0.21%] index_fill_ strided 8 : Elapsed 0.005 ms (0.486 ms / 100) 0.486 -> 0.485 ( -0.21%) [ +0.00% +0.21% +0.00% / +0.00% -0.21% +0.21%] index_fill_ strided 16 : Elapsed 0.005 ms (0.486 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +3.51% +0.41% +0.00% / +0.41% +0.41% +0.62%] index_fill_ random : Elapsed 0.005 ms (0.502 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +0.21% +0.21% +0.00% / +4.33% +0.41% +0.41%] index_fill_ random_sorted : Elapsed 0.005 ms (0.486 ms / 100) 0.486 -> 0.486 ( +0.00%) [ +0.00% +0.00% +5.97% / +6.38% +0.21% +0.00%] index_fill_ perm : Elapsed 0.005 ms (0.486 ms / 100) 0.486 -> 0.486 ( +0.00%) [ +0.00% +0.21% +0.00% / +0.21% +0.00% +0.00%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.486 ms / 100) B = [20, 40, 5, 16] (stride (200, 1, 40, 4000)) A = [4, 40, 5, 16] (stride (80, 320, 16, 1)) dim = 0 1.970 -> 1.975 ( +0.25%) [ +0.36% +0.20% +0.00% / +0.25% +1.27% +1.37%] index_add_ linear : Elapsed 0.020 ms (1.977 ms / 100) 1.929 -> 1.933 ( +0.21%) [ +0.05% +0.21% +0.00% / +0.21% +1.45% +1.66%] index_copy_ linear : Elapsed 0.019 ms (1.930 ms / 100) 1.970 -> 1.971 ( +0.05%) [ +0.20% +0.25% +0.00% / +0.05% +1.62% +1.83%] index_add_ reverse : Elapsed 0.020 ms (1.974 ms / 100) 1.924 -> 1.930 ( +0.31%) [ +0.52% +0.26% +0.00% / +0.31% +2.03% +2.08%] index_copy_ reverse : Elapsed 0.019 ms (1.934 ms / 100) 1.973 -> 1.974 ( +0.05%) [ +0.15% +0.30% +0.00% / +0.05% +1.32% +1.17%] index_add_ spread : Elapsed 0.020 ms (1.976 ms / 100) 1.928 -> 1.931 ( +0.16%) [ +0.10% +0.00% +0.26% / +0.16% +1.71% +1.56%] index_copy_ spread : Elapsed 0.019 ms (1.930 ms / 100) 1.969 -> 1.974 ( +0.25%) [ +0.46% +0.41% +0.00% / +0.25% +1.57% +1.52%] index_add_ strided 3 : Elapsed 0.020 ms (1.978 ms / 100) 1.928 -> 1.929 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +1.56% +1.71%] index_copy_ strided 3 : Elapsed 0.019 ms (1.929 ms / 100) 1.978 -> 1.971 ( -0.35%) [ +0.00% +0.00% +0.10% / -0.35% +1.16% +1.57%] index_add_ strided 7 : Elapsed 0.020 ms (1.978 ms / 100) 1.935 -> 1.931 ( -0.21%) [ +0.26% +0.00% +0.21% / -0.21% +1.60% +2.07%] index_copy_ strided 7 : Elapsed 0.019 ms (1.940 ms / 100) 1.984 -> 1.982 ( -0.10%) [ +0.10% +0.10% +0.00% / -0.10% +1.01% +1.16%] index_add_ perm : Elapsed 0.020 ms (1.986 ms / 100) 1.937 -> 1.942 ( +0.26%) [ +0.00% +0.15% +0.21% / +0.26% +1.76% +1.76%] index_copy_ perm : Elapsed 0.019 ms (1.937 ms / 100) 1.982 -> 1.980 ( -0.10%) [ +0.00% +0.10% +0.05% / -0.10% +1.01% +1.26%] index_add_ perm_sorted : Elapsed 0.020 ms (1.982 ms / 100) 1.939 -> 1.945 ( +0.31%) [ +0.00% +0.15% +0.00% / +0.31% +1.70% +1.55%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.939 ms / 100) 8.753 -> 8.749 ( -0.05%) [ +0.19% +0.00% +0.07% / -0.05% +0.15% +0.14%] index_select const : Elapsed 0.088 ms (8.770 ms / 100) 8.822 -> 8.831 ( +0.10%) [ +0.27% +0.06% +0.00% / +0.10% +0.10% +0.29%] index_select wrap : Elapsed 0.088 ms (8.846 ms / 100) 8.783 -> 8.790 ( +0.08%) [ +0.00% +0.19% +0.19% / +0.08% +0.14% +0.33%] index_select linear : Elapsed 0.088 ms (8.783 ms / 100) 8.797 -> 8.794 ( -0.03%) [ +0.09% +0.00% +0.10% / -0.03% +0.49% +0.30%] index_select reverse : Elapsed 0.088 ms (8.805 ms / 100) 8.745 -> 8.757 ( +0.14%) [ +0.16% +0.00% +0.16% / +0.14% +0.24% +0.27%] index_select skip64 : Elapsed 0.088 ms (8.759 ms / 100) 8.741 -> 8.749 ( +0.09%) [ +0.27% +0.00% +0.39% / +0.31% +0.09% +0.19%] index_select skip256 : Elapsed 0.088 ms (8.765 ms / 100) 8.803 -> 8.804 ( +0.01%) [ +0.03% +0.01% +0.00% / +0.09% +0.01% +0.11%] index_select spread : Elapsed 0.088 ms (8.806 ms / 100) 8.806 -> 8.822 ( +0.18%) [ +0.10% +0.19% +0.00% / +0.18% +0.42% +0.27%] index_select strided 3 : Elapsed 0.088 ms (8.815 ms / 100) 8.814 -> 8.808 ( -0.07%) [ +0.00% +0.25% +0.03% / -0.07% +0.31% +0.08%] index_select random : Elapsed 0.088 ms (8.814 ms / 100) 8.801 -> 8.797 ( -0.05%) [ +0.00% +0.12% +0.26% / +0.00% -0.05% +0.24%] index_select random_sorted : Elapsed 0.088 ms (8.801 ms / 100) out_shape = [4, 20, 5, 16] in_shape = [4, 40, 5, 16] idx_dim = 1 B = [4, 20, 5, 16] (stride (1600, 16, 320, 1)) A = [4, 40, 5, 16] (stride (16, 320, 64, 1)) dim = 1 2.442 -> 2.444 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.33% +0.37%] index_select const : Elapsed 0.024 ms (2.444 ms / 100) 2.458 -> 2.453 ( -0.20%) [ +0.24% +0.00% +0.12% / +0.04% -0.20% -0.12%] index_select wrap : Elapsed 0.025 ms (2.464 ms / 100) 2.455 -> 2.453 ( -0.08%) [ +0.00% +0.20% +0.12% / +0.12% -0.08% +0.16%] index_select linear : Elapsed 0.025 ms (2.455 ms / 100) 2.459 -> 2.452 ( -0.28%) [ +0.12% +0.04% +0.00% / +0.04% -0.08% -0.28%] index_select reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.446 -> 2.444 ( -0.08%) [ +0.00% +0.16% +0.12% / +0.12% +0.12% -0.08%] index_select skip64 : Elapsed 0.024 ms (2.446 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.29% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.445 ms / 100) 2.458 -> 2.454 ( -0.16%) [ +0.04% +0.00% +0.04% / +0.04% -0.16% +0.04%] index_select spread : Elapsed 0.025 ms (2.459 ms / 100) 2.458 -> 2.456 ( -0.08%) [ +0.12% +0.00% +0.04% / +0.16% -0.08% +0.00%] index_select strided 3 : Elapsed 0.025 ms (2.461 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.00% +0.33% +0.12% / +0.12% +0.12% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.450 ms / 100) 2.456 -> 2.454 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.08% +0.08% +0.04%] index_select strided 7 : Elapsed 0.025 ms (2.456 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.16% +0.00% +0.16% / +0.12% +0.37% +0.08%] index_select strided 8 : Elapsed 0.025 ms (2.450 ms / 100) 2.449 -> 2.449 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.04% +0.29% +0.00%] index_select strided 16 : Elapsed 0.024 ms (2.450 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.24% +0.29%] index_select random : Elapsed 0.025 ms (2.454 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.12% +0.16% +0.00% / +0.04% +0.12% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.457 -> 2.456 ( -0.04%) [ +0.28% +0.00% +0.20% / +0.00% +0.04% -0.04%] index_select perm : Elapsed 0.025 ms (2.464 ms / 100) 2.460 -> 2.448 ( -0.49%) [ +0.08% +0.12% +0.00% / +0.04% -0.49% -0.33%] index_select perm_sorted : Elapsed 0.025 ms (2.462 ms / 100) B = [4, 20, 5, 16] (stride (16, 320, 64, 1)) A = [4, 40, 5, 16] (stride (1, 4, 2560, 160)) dim = 1 2.408 -> 2.410 ( +0.08%) [ +0.25% +0.00% +0.04% / +0.08% +0.17% +0.21%] index_select const : Elapsed 0.024 ms (2.414 ms / 100) 2.421 -> 2.416 ( -0.21%) [ +0.17% +0.17% +0.00% / +0.08% -0.21% -0.12%] index_select wrap : Elapsed 0.024 ms (2.425 ms / 100) 2.421 -> 2.415 ( -0.25%) [ +0.04% +0.17% +0.00% / +0.08% -0.25% -0.12%] index_select linear : Elapsed 0.024 ms (2.422 ms / 100) 2.419 -> 2.415 ( -0.17%) [ +0.12% +0.00% +0.21% / -0.17% +0.17% +0.12%] index_select reverse : Elapsed 0.024 ms (2.422 ms / 100) 2.407 -> 2.411 ( +0.17%) [ +0.17% +0.00% +0.12% / +0.37% +0.25% +0.17%] index_select skip64 : Elapsed 0.024 ms (2.411 ms / 100) 2.407 -> 2.414 ( +0.29%) [ +0.08% +0.00% +0.08% / +0.29% +0.29% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.409 ms / 100) 2.427 -> 2.427 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.41% +0.00%] index_select spread : Elapsed 0.024 ms (2.429 ms / 100) 2.421 -> 2.426 ( +0.21%) [ +0.17% +0.00% +0.04% / +0.21% +0.33% +0.41%] index_select strided 3 : Elapsed 0.024 ms (2.425 ms / 100) 2.418 -> 2.419 ( +0.04%) [ +0.45% +0.00% +0.00% / +0.04% +0.21% +0.29%] index_select strided 5 : Elapsed 0.024 ms (2.429 ms / 100) 2.424 -> 2.428 ( +0.17%) [ +0.17% +0.00% +0.12% / +0.17% +0.21% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.413 -> 2.413 ( +0.00%) [ +0.12% +0.21% +0.00% / +0.00% +0.25% +0.21%] index_select strided 8 : Elapsed 0.024 ms (2.416 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.17% +0.00% +0.12% / +0.08% +0.41% +0.33%] index_select strided 16 : Elapsed 0.024 ms (2.415 ms / 100) 2.420 -> 2.425 ( +0.21%) [ +0.12% +0.12% +0.00% / +0.21% +0.21% +0.33%] index_select random : Elapsed 0.024 ms (2.423 ms / 100) 2.421 -> 2.423 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.21% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.421 ms / 100) 2.421 -> 2.424 ( +0.12%) [ +0.33% +0.17% +0.00% / +0.17% +0.21% +0.12%] index_select perm : Elapsed 0.024 ms (2.429 ms / 100) 2.425 -> 2.427 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.12% +0.25% +0.08%] index_select perm_sorted : Elapsed 0.024 ms (2.427 ms / 100) B = [4, 20, 5, 16] (stride (5, 320, 1, 20)) A = [4, 40, 5, 16] (stride (640, 1, 2560, 40)) dim = 1 2.448 -> 2.449 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.16% +0.29%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.454 -> 2.454 ( +0.00%) [ +0.12% +0.29% +0.00% / +0.00% +0.04% +0.00%] index_select wrap : Elapsed 0.025 ms (2.457 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.08% +0.00% +0.16%] index_select linear : Elapsed 0.025 ms (2.457 ms / 100) 2.454 -> 2.456 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.12% +0.12% +0.08%] index_select reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.447 -> 2.448 ( +0.04%) [ +0.04% +0.20% +0.00% / +0.25% +0.04% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.448 ms / 100) 2.449 -> 2.449 ( +0.00%) [ +0.12% +0.16% +0.00% / +0.08% +0.00% +0.08%] index_select skip256 : Elapsed 0.025 ms (2.452 ms / 100) 2.461 -> 2.463 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.08% +0.08%] index_select spread : Elapsed 0.025 ms (2.464 ms / 100) 2.464 -> 2.461 ( -0.12%) [ +0.00% +0.08% +0.00% / +0.00% -0.04% -0.12%] index_select strided 3 : Elapsed 0.025 ms (2.464 ms / 100) 2.463 -> 2.462 ( -0.04%) [ +0.12% +0.00% +0.12% / -0.04% -0.04% -0.04%] index_select strided 5 : Elapsed 0.025 ms (2.466 ms / 100) 2.459 -> 2.462 ( +0.12%) [ +0.16% +0.00% +0.04% / +0.12% +0.24% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.463 ms / 100) 2.461 -> 2.459 ( -0.08%) [ +0.12% +0.00% +0.16% / -0.08% +0.04% +0.12%] index_select strided 8 : Elapsed 0.025 ms (2.464 ms / 100) 2.462 -> 2.461 ( -0.04%) [ +0.24% +0.08% +0.00% / -0.04% +0.04% +0.12%] index_select strided 16 : Elapsed 0.025 ms (2.468 ms / 100) 2.460 -> 2.458 ( -0.08%) [ +0.08% +0.16% +0.00% / +0.00% +0.12% -0.08%] index_select random : Elapsed 0.025 ms (2.462 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.08% +0.00% +0.12% / +0.33% +0.04% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.460 ms / 100) 2.461 -> 2.461 ( +0.00%) [ +0.00% +0.16% +0.04% / +0.12% +0.08% +0.00%] index_select perm : Elapsed 0.025 ms (2.461 ms / 100) 2.462 -> 2.456 ( -0.24%) [ +0.12% +0.32% +0.00% / +0.20% -0.24% -0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.465 ms / 100) B = [4, 20, 5, 16] (stride (5, 320, 1, 20)) A = [4, 40, 5, 16] (stride (200, 5, 1, 800)) dim = 1 2.448 -> 2.449 ( +0.04%) [ +0.12% +0.16% +0.00% / +0.04% +0.20% +0.16%] index_select const : Elapsed 0.025 ms (2.451 ms / 100) 2.460 -> 2.456 ( -0.16%) [ +0.16% +0.28% +0.00% / +0.28% -0.16% -0.12%] index_select wrap : Elapsed 0.025 ms (2.464 ms / 100) 2.462 -> 2.460 ( -0.08%) [ +0.08% +0.00% +0.12% / +0.12% -0.08% -0.08%] index_select linear : Elapsed 0.025 ms (2.464 ms / 100) 2.461 -> 2.461 ( +0.00%) [ +0.08% +0.00% +0.20% / +0.12% +0.00% +0.28%] index_select reverse : Elapsed 0.025 ms (2.463 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.04% +0.00% +0.20% / +0.04% +0.12% +0.08%] index_select skip64 : Elapsed 0.025 ms (2.450 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.20% +0.20% +0.00% / +0.16% +0.08% +0.08%] index_select skip256 : Elapsed 0.025 ms (2.451 ms / 100) 2.466 -> 2.473 ( +0.28%) [ +0.20% +0.32% +0.00% / +0.28% +0.49% +0.45%] index_select spread : Elapsed 0.025 ms (2.471 ms / 100) 2.468 -> 2.472 ( +0.16%) [ +0.00% +0.12% +0.04% / +0.16% +0.24% +0.20%] index_select strided 3 : Elapsed 0.025 ms (2.468 ms / 100) 2.455 -> 2.463 ( +0.33%) [ +0.29% +0.33% +0.00% / +0.33% +0.57% +0.53%] index_select strided 5 : Elapsed 0.025 ms (2.462 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.04% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.00% +0.00% +0.20% / +0.08% +0.00% +0.33%] index_select strided 8 : Elapsed 0.025 ms (2.455 ms / 100) 2.452 -> 2.456 ( +0.16%) [ +0.04% +0.00% +0.04% / +0.16% +0.33% +0.37%] index_select strided 16 : Elapsed 0.025 ms (2.453 ms / 100) 2.466 -> 2.465 ( -0.04%) [ +0.00% +0.04% +0.08% / -0.04% +0.32% +0.28%] index_select random : Elapsed 0.025 ms (2.466 ms / 100) 2.467 -> 2.469 ( +0.08%) [ +0.12% +0.00% +0.12% / +0.08% +0.08% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.470 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +0.04% +0.16%] index_select perm : Elapsed 0.025 ms (2.467 ms / 100) 2.464 -> 2.469 ( +0.20%) [ +0.16% +0.00% +0.16% / +0.20% +0.20% +0.45%] index_select perm_sorted : Elapsed 0.025 ms (2.468 ms / 100) B = [4, 20, 5, 16] (stride (1, 320, 4, 20)) A = [4, 40, 5, 16] (stride (1, 64, 2560, 4)) dim = 1 2.445 -> 2.444 ( -0.04%) [ +0.00% +0.16% +0.00% / -0.04% +0.45% +0.12%] index_select const : Elapsed 0.024 ms (2.445 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.00% +0.20% +0.16% / +0.08% +0.16% +0.04%] index_select wrap : Elapsed 0.025 ms (2.452 ms / 100) 2.449 -> 2.453 ( +0.16%) [ +0.29% +0.24% +0.00% / +0.29% +0.16% +0.33%] index_select linear : Elapsed 0.025 ms (2.456 ms / 100) 2.453 -> 2.448 ( -0.20%) [ +0.08% +0.00% +0.16% / -0.04% -0.20% +0.00%] index_select reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.446 ( -0.16%) [ +0.08% +0.00% +0.00% / -0.16% +0.04% -0.04%] index_select skip64 : Elapsed 0.025 ms (2.452 ms / 100) 2.445 -> 2.446 ( +0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.29% +0.08%] index_select skip256 : Elapsed 0.024 ms (2.447 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.00% +0.04% +0.16% / +0.12% +0.04% +0.29%] index_select spread : Elapsed 0.025 ms (2.452 ms / 100) 2.454 -> 2.452 ( -0.08%) [ +0.04% +0.08% +0.00% / -0.04% -0.08% +0.08%] index_select strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.447 -> 2.446 ( -0.04%) [ +0.25% +0.08% +0.00% / +0.20% +0.20% -0.04%] index_select strided 5 : Elapsed 0.025 ms (2.453 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.12% +0.00% +0.08% / +0.12% +0.16% +0.33%] index_select strided 7 : Elapsed 0.025 ms (2.451 ms / 100) 2.447 -> 2.446 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.08% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.447 ms / 100) 2.446 -> 2.449 ( +0.12%) [ +0.00% +0.12% +0.25% / +0.20% +0.16% +0.12%] index_select strided 16 : Elapsed 0.024 ms (2.446 ms / 100) 2.452 -> 2.451 ( -0.04%) [ +0.00% +0.08% +0.04% / +0.12% +0.00% -0.04%] index_select random : Elapsed 0.025 ms (2.452 ms / 100) 2.451 -> 2.449 ( -0.08%) [ +0.00% +0.20% +0.16% / +0.04% +0.04% -0.08%] index_select random_sorted : Elapsed 0.025 ms (2.451 ms / 100) 2.453 -> 2.451 ( -0.08%) [ +0.08% +0.04% +0.00% / -0.08% +0.04% +0.00%] index_select perm : Elapsed 0.025 ms (2.455 ms / 100) 2.457 -> 2.446 ( -0.45%) [ +0.12% +0.04% +0.00% / -0.08% -0.45% -0.41%] index_select perm_sorted : Elapsed 0.025 ms (2.460 ms / 100) B = [4, 20, 5, 16] (stride (5, 20, 1, 400)) A = [4, 40, 5, 16] (stride (3200, 1, 640, 40)) dim = 1 2.411 -> 2.415 ( +0.17%) [ +0.00% +0.25% +0.08% / +0.17% +0.33% +0.29%] index_select const : Elapsed 0.024 ms (2.411 ms / 100) 2.420 -> 2.415 ( -0.21%) [ +0.17% +0.04% +0.00% / +0.08% -0.04% -0.21%] index_select wrap : Elapsed 0.024 ms (2.424 ms / 100) 2.417 -> 2.418 ( +0.04%) [ +0.00% +0.33% +0.25% / +0.21% +0.08% +0.04%] index_select linear : Elapsed 0.024 ms (2.417 ms / 100) 2.419 -> 2.421 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.17% +0.08%] index_select reverse : Elapsed 0.024 ms (2.420 ms / 100) 2.414 -> 2.413 ( -0.04%) [ +0.00% +0.17% +0.00% / -0.04% +0.12% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.414 ms / 100) 2.412 -> 2.410 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.33% +0.08%] index_select skip256 : Elapsed 0.024 ms (2.412 ms / 100) 2.422 -> 2.417 ( -0.21%) [ +0.17% +0.00% +0.08% / -0.21% +0.33% +0.17%] index_select spread : Elapsed 0.024 ms (2.426 ms / 100) 2.421 -> 2.424 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.37% +0.33%] index_select strided 3 : Elapsed 0.024 ms (2.422 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.25% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.423 ms / 100) 2.423 -> 2.427 ( +0.17%) [ +0.08% +0.12% +0.00% / +0.25% +0.29% +0.17%] index_select strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.425 -> 2.426 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.04% +0.12% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.426 ms / 100) 2.425 -> 2.424 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.21% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.427 ms / 100) 2.423 -> 2.424 ( +0.04%) [ +0.12% +0.00% +0.04% / +0.29% +0.04% +0.25%] index_select random : Elapsed 0.024 ms (2.426 ms / 100) 2.422 -> 2.423 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.12% +0.04% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.425 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.00% +0.08% +0.17% / +0.33% +0.08% +0.08%] index_select perm : Elapsed 0.024 ms (2.424 ms / 100) 2.421 -> 2.425 ( +0.17%) [ +0.00% +0.21% +0.12% / +0.17% +0.29% +0.37%] index_select perm_sorted : Elapsed 0.024 ms (2.421 ms / 100) B = [4, 20, 5, 16] (stride (5, 20, 1, 400)) A = [4, 40, 5, 16] (stride (200, 5, 1, 800)) dim = 1 2.450 -> 2.452 ( +0.08%) [ +0.20% +0.00% +0.04% / +0.08% +0.29% +0.20%] index_select const : Elapsed 0.025 ms (2.455 ms / 100) 2.469 -> 2.466 ( -0.12%) [ +0.12% +0.08% +0.00% / -0.12% +0.00% -0.08%] index_select wrap : Elapsed 0.025 ms (2.472 ms / 100) 2.466 -> 2.468 ( +0.08%) [ +0.00% +0.16% +0.04% / +0.08% +0.24% +0.16%] index_select linear : Elapsed 0.025 ms (2.466 ms / 100) 2.468 -> 2.467 ( -0.04%) [ +0.16% +0.00% +0.00% / +0.04% +0.08% -0.04%] index_select reverse : Elapsed 0.025 ms (2.472 ms / 100) 2.455 -> 2.453 ( -0.08%) [ +0.04% +0.08% +0.00% / +0.00% -0.08% +0.04%] index_select skip64 : Elapsed 0.025 ms (2.456 ms / 100) 2.450 -> 2.454 ( +0.16%) [ +0.12% +0.00% +0.12% / +0.33% +0.16% +0.41%] index_select skip256 : Elapsed 0.025 ms (2.453 ms / 100) 2.476 -> 2.476 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.20% +0.20%] index_select spread : Elapsed 0.025 ms (2.479 ms / 100) 2.475 -> 2.478 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.16% +0.24% +0.12%] index_select strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.467 -> 2.467 ( +0.00%) [ +0.16% +0.04% +0.00% / +0.20% +0.00% +0.24%] index_select strided 5 : Elapsed 0.025 ms (2.471 ms / 100) 2.474 -> 2.475 ( +0.04%) [ +0.20% +0.04% +0.00% / +0.04% +0.24% +0.20%] index_select strided 7 : Elapsed 0.025 ms (2.479 ms / 100) 2.458 -> 2.461 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.20% +0.24%] index_select strided 8 : Elapsed 0.025 ms (2.458 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.00% +0.16% +0.12% / +0.00% +0.20% +0.16%] index_select strided 16 : Elapsed 0.025 ms (2.460 ms / 100) 2.469 -> 2.471 ( +0.08%) [ +0.08% +0.12% +0.00% / +0.08% +0.20% +0.24%] index_select random : Elapsed 0.025 ms (2.471 ms / 100) 2.469 -> 2.470 ( +0.04%) [ +0.00% +0.16% +0.16% / +0.20% +0.12% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.474 -> 2.477 ( +0.12%) [ +0.00% +0.32% +0.08% / +0.16% +0.12% +0.16%] index_select perm : Elapsed 0.025 ms (2.474 ms / 100) 2.480 -> 2.470 ( -0.40%) [ +0.00% +0.04% +0.04% / +0.04% -0.40% -0.32%] index_select perm_sorted : Elapsed 0.025 ms (2.480 ms / 100) B = [4, 20, 5, 16] (stride (1, 20, 4, 400)) A = [4, 40, 5, 16] (stride (40, 1, 2560, 160)) dim = 1 2.417 -> 2.417 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.00% +0.12% +0.04%] index_select const : Elapsed 0.024 ms (2.417 ms / 100) 2.429 -> 2.423 ( -0.25%) [ +0.12% +0.04% +0.00% / -0.12% -0.25% -0.16%] index_select wrap : Elapsed 0.024 ms (2.432 ms / 100) 2.426 -> 2.421 ( -0.21%) [ +0.00% +0.21% +0.08% / +0.08% -0.21% -0.12%] index_select linear : Elapsed 0.024 ms (2.426 ms / 100) 2.426 -> 2.421 ( -0.21%) [ +0.00% +0.08% +0.21% / -0.21% +0.08% +0.21%] index_select reverse : Elapsed 0.024 ms (2.426 ms / 100) 2.415 -> 2.418 ( +0.12%) [ +0.00% +0.08% +0.29% / +0.25% +0.21% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.04% +0.00% +0.21% / +0.08% +0.50% +0.37%] index_select skip256 : Elapsed 0.024 ms (2.415 ms / 100) 2.430 -> 2.429 ( -0.04%) [ +0.08% +0.04% +0.00% / -0.04% +0.33% +0.29%] index_select spread : Elapsed 0.024 ms (2.432 ms / 100) 2.429 -> 2.432 ( +0.12%) [ +0.21% +0.00% +0.04% / +0.21% +0.12% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.434 ms / 100) 2.430 -> 2.428 ( -0.08%) [ +0.12% +0.00% +0.00% / -0.08% +0.29% +0.37%] index_select strided 5 : Elapsed 0.024 ms (2.433 ms / 100) 2.430 -> 2.434 ( +0.16%) [ +0.16% +0.00% +0.16% / +0.16% +0.29% +0.29%] index_select strided 7 : Elapsed 0.024 ms (2.434 ms / 100) 2.434 -> 2.431 ( -0.12%) [ +0.04% +0.00% +0.04% / -0.12% +0.12% +0.04%] index_select strided 8 : Elapsed 0.024 ms (2.435 ms / 100) 2.427 -> 2.433 ( +0.25%) [ +0.16% +0.12% +0.00% / +0.25% +0.45% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.431 ms / 100) 2.431 -> 2.429 ( -0.08%) [ +0.00% +0.04% +0.12% / -0.08% +0.41% +0.33%] index_select random : Elapsed 0.024 ms (2.431 ms / 100) 2.431 -> 2.433 ( +0.08%) [ +0.08% +0.00% +0.21% / +0.25% +0.08% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.433 ms / 100) 2.430 -> 2.432 ( +0.08%) [ +0.12% +0.00% +0.12% / +0.08% +0.12% +0.25%] index_select perm : Elapsed 0.024 ms (2.433 ms / 100) 2.433 -> 2.433 ( +0.00%) [ +0.04% +0.00% +0.29% / +0.00% +0.21% +0.08%] index_select perm_sorted : Elapsed 0.024 ms (2.434 ms / 100) B = [4, 20, 5, 16] (stride (1, 20, 4, 400)) A = [4, 40, 5, 16] (stride (1, 20, 4, 800)) dim = 1 2.449 -> 2.450 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.04% +0.37% +0.37%] index_select const : Elapsed 0.025 ms (2.450 ms / 100) 2.466 -> 2.465 ( -0.04%) [ +0.12% +0.04% +0.00% / +0.12% +0.04% -0.04%] index_select wrap : Elapsed 0.025 ms (2.469 ms / 100) 2.466 -> 2.465 ( -0.04%) [ +0.08% +0.16% +0.00% / +0.04% +0.28% -0.04%] index_select linear : Elapsed 0.025 ms (2.468 ms / 100) 2.466 -> 2.464 ( -0.08%) [ +0.12% +0.00% +0.12% / -0.08% +0.08% +0.08%] index_select reverse : Elapsed 0.025 ms (2.469 ms / 100) 2.453 -> 2.451 ( -0.08%) [ +0.08% +0.16% +0.00% / -0.08% +0.04% +0.04%] index_select skip64 : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.20% +0.00% +0.33% / +0.04% +0.33% +0.33%] index_select skip256 : Elapsed 0.025 ms (2.455 ms / 100) 2.468 -> 2.469 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.08% +0.08% +0.04%] index_select spread : Elapsed 0.025 ms (2.468 ms / 100) 2.468 -> 2.466 ( -0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.04% -0.08%] index_select strided 3 : Elapsed 0.025 ms (2.468 ms / 100) 2.459 -> 2.458 ( -0.04%) [ +0.16% +0.16% +0.00% / +0.08% +0.12% -0.04%] index_select strided 5 : Elapsed 0.025 ms (2.463 ms / 100) 2.467 -> 2.461 ( -0.24%) [ +0.00% +0.12% +0.00% / -0.24% +0.00% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.456 -> 2.454 ( -0.08%) [ +0.12% +0.12% +0.00% / -0.08% +0.24% +0.37%] index_select strided 8 : Elapsed 0.025 ms (2.459 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.20% +0.04% +0.00% / +0.04% +0.29% +0.20%] index_select strided 16 : Elapsed 0.025 ms (2.460 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.12% +0.00% +0.00% / +0.08% +0.20% -0.04%] index_select random : Elapsed 0.025 ms (2.468 ms / 100) 2.464 -> 2.465 ( +0.04%) [ +0.37% +0.16% +0.00% / +0.16% +0.04% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.473 ms / 100) 2.465 -> 2.466 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.24% +0.04% +0.16%] index_select perm : Elapsed 0.025 ms (2.465 ms / 100) 2.470 -> 2.462 ( -0.32%) [ +0.12% +0.00% +0.08% / +0.12% -0.28% -0.32%] index_select perm_sorted : Elapsed 0.025 ms (2.473 ms / 100) out_shape = [4, 40, 20, 16] in_shape = [4, 40, 5, 16] idx_dim = 2 B = [4, 40, 20, 16] (stride (12800, 16, 640, 1)) A = [4, 40, 5, 16] (stride (3200, 80, 16, 1)) dim = 2 1.748 -> 1.749 ( +0.06%) [ +0.11% +0.00% +0.06% / +0.06% +0.86% +1.03%] index_add_ linear : Elapsed 0.017 ms (1.750 ms / 100) 1.705 -> 1.709 ( +0.23%) [ +0.12% +0.18% +0.00% / +0.23% +0.47% +0.53%] index_copy_ linear : Elapsed 0.017 ms (1.707 ms / 100) 1.754 -> 1.750 ( -0.23%) [ +0.00% +0.11% +0.00% / -0.23% +0.51% +0.46%] index_add_ reverse : Elapsed 0.018 ms (1.754 ms / 100) 1.704 -> 1.709 ( +0.29%) [ +0.00% +0.23% +0.29% / +0.29% +0.82% +0.53%] index_copy_ reverse : Elapsed 0.017 ms (1.704 ms / 100) 1.749 -> 1.756 ( +0.40%) [ +0.00% +0.11% +0.51% / +0.40% +0.57% +0.74%] index_add_ spread : Elapsed 0.017 ms (1.749 ms / 100) 1.704 -> 1.711 ( +0.41%) [ +0.00% +0.29% +0.18% / +0.41% +0.59% +0.70%] index_copy_ spread : Elapsed 0.017 ms (1.704 ms / 100) 1.736 -> 1.740 ( +0.23%) [ +0.00% +0.12% +0.00% / +0.23% +2.13% +1.67%] index_add_ strided 3 : Elapsed 0.017 ms (1.736 ms / 100) 1.691 -> 1.697 ( +0.35%) [ +0.00% +0.12% +0.18% / +0.35% +1.42% +1.71%] index_copy_ strided 3 : Elapsed 0.017 ms (1.691 ms / 100) 1.750 -> 1.752 ( +0.11%) [ +0.06% +0.00% +0.00% / +0.11% +1.37% +1.83%] index_add_ strided 7 : Elapsed 0.018 ms (1.751 ms / 100) 1.704 -> 1.710 ( +0.35%) [ +0.00% +0.12% +0.18% / +0.35% +1.53% +1.70%] index_copy_ strided 7 : Elapsed 0.017 ms (1.704 ms / 100) 1.731 -> 1.751 ( +1.16%) [ +0.17% +0.00% +1.50% / +1.16% +2.54% +2.66%] index_add_ perm : Elapsed 0.017 ms (1.734 ms / 100) 1.686 -> 1.702 ( +0.95%) [ +0.00% +0.00% +1.07% / +0.95% +2.97% +2.85%] index_copy_ perm : Elapsed 0.017 ms (1.686 ms / 100) 1.730 -> 1.748 ( +1.04%) [ +0.17% +0.00% +1.21% / +1.04% +2.72% +2.49%] index_add_ perm_sorted : Elapsed 0.017 ms (1.733 ms / 100) 1.681 -> 1.704 ( +1.37%) [ +0.24% +0.00% +1.49% / +1.37% +3.39% +3.21%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.685 ms / 100) 8.182 -> 8.204 ( +0.27%) [ +0.21% +0.22% +0.00% / +0.27% +0.46% +0.33%] index_select const : Elapsed 0.082 ms (8.199 ms / 100) 8.239 -> 8.237 ( -0.02%) [ +0.41% +0.17% +0.00% / +0.11% -0.02% +0.07%] index_select wrap : Elapsed 0.083 ms (8.273 ms / 100) 8.228 -> 8.229 ( +0.01%) [ +0.11% +0.00% +0.13% / +0.05% +0.01% +0.06%] index_select linear : Elapsed 0.082 ms (8.237 ms / 100) 8.241 -> 8.245 ( +0.05%) [ +0.07% +0.00% +0.16% / +0.05% +0.06% +0.21%] index_select reverse : Elapsed 0.082 ms (8.247 ms / 100) 8.201 -> 8.201 ( +0.00%) [ +0.00% +0.07% +0.27% / +0.01% +0.00% +0.15%] index_select skip64 : Elapsed 0.082 ms (8.201 ms / 100) 8.196 -> 8.203 ( +0.09%) [ +0.01% +0.07% +0.00% / +0.09% +0.24% +0.16%] index_select skip256 : Elapsed 0.082 ms (8.197 ms / 100) 8.229 -> 8.236 ( +0.09%) [ +0.16% +0.00% +0.26% / +0.16% +0.09% +0.09%] index_select spread : Elapsed 0.082 ms (8.242 ms / 100) 8.237 -> 8.230 ( -0.08%) [ +0.00% +0.28% +0.08% / +0.18% -0.08% +0.21%] index_select strided 3 : Elapsed 0.082 ms (8.237 ms / 100) 8.227 -> 8.236 ( +0.11%) [ +0.16% +0.00% +0.10% / +0.23% +0.11% +0.12%] index_select random : Elapsed 0.082 ms (8.240 ms / 100) 8.237 -> 8.231 ( -0.07%) [ +0.00% +0.19% +0.05% / +0.18% +0.10% -0.07%] index_select random_sorted : Elapsed 0.082 ms (8.237 ms / 100) B = [4, 40, 20, 16] (stride (12800, 16, 640, 1)) A = [4, 40, 5, 16] (stride (200, 1, 40, 800)) dim = 2 1.855 -> 1.857 ( +0.11%) [ +0.00% +0.22% +0.22% / +0.11% +0.43% +0.43%] index_add_ linear : Elapsed 0.019 ms (1.855 ms / 100) 1.810 -> 1.816 ( +0.33%) [ +0.17% +0.00% +0.39% / +0.33% +0.33% +0.39%] index_copy_ linear : Elapsed 0.018 ms (1.813 ms / 100) 1.854 -> 1.853 ( -0.05%) [ +0.11% +0.00% +0.00% / -0.05% +0.27% +0.22%] index_add_ reverse : Elapsed 0.019 ms (1.856 ms / 100) 1.805 -> 1.807 ( +0.11%) [ +0.17% +0.00% +0.33% / +0.11% +0.50% +0.39%] index_copy_ reverse : Elapsed 0.018 ms (1.808 ms / 100) 1.868 -> 1.862 ( -0.32%) [ +0.11% +0.05% +0.00% / -0.16% -0.32% -0.21%] index_add_ spread : Elapsed 0.019 ms (1.870 ms / 100) 1.820 -> 1.821 ( +0.05%) [ +0.16% +0.00% +0.16% / +0.16% +0.05% +0.05%] index_copy_ spread : Elapsed 0.018 ms (1.823 ms / 100) 1.856 -> 1.859 ( +0.16%) [ +0.05% +0.16% +0.00% / +0.16% +0.70% +0.75%] index_add_ strided 3 : Elapsed 0.019 ms (1.857 ms / 100) 1.810 -> 1.816 ( +0.33%) [ +0.17% +0.39% +0.00% / +0.33% +0.50% +0.44%] index_copy_ strided 3 : Elapsed 0.018 ms (1.813 ms / 100) 1.858 -> 1.865 ( +0.38%) [ +0.00% +0.27% +0.00% / +0.38% +0.59% +0.70%] index_add_ strided 7 : Elapsed 0.019 ms (1.858 ms / 100) 1.820 -> 1.825 ( +0.27%) [ +0.11% +0.11% +0.00% / +0.27% +0.77% +0.71%] index_copy_ strided 7 : Elapsed 0.018 ms (1.822 ms / 100) 1.857 -> 1.856 ( -0.05%) [ +0.27% +0.05% +0.00% / -0.05% +0.65% +0.38%] index_add_ perm : Elapsed 0.019 ms (1.862 ms / 100) 1.814 -> 1.815 ( +0.06%) [ +0.22% +0.22% +0.00% / +0.06% +0.55% +0.39%] index_copy_ perm : Elapsed 0.018 ms (1.818 ms / 100) 1.877 -> 1.875 ( -0.11%) [ +0.32% +0.05% +0.00% / +0.11% -0.11% +0.00%] index_add_ perm_sorted : Elapsed 0.019 ms (1.883 ms / 100) 1.830 -> 1.828 ( -0.11%) [ +0.00% +0.05% +0.00% / -0.11% +0.05% +0.16%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.830 ms / 100) 8.519 -> 8.518 ( -0.01%) [ +0.00% +0.15% +0.18% / +0.13% +0.02% -0.01%] index_select const : Elapsed 0.085 ms (8.519 ms / 100) 8.556 -> 8.554 ( -0.02%) [ +0.00% +0.00% +0.07% / +0.19% -0.02% +0.00%] index_select wrap : Elapsed 0.086 ms (8.556 ms / 100) 8.552 -> 8.544 ( -0.09%) [ +0.00% +0.09% +0.18% / +0.02% -0.09% +0.01%] index_select linear : Elapsed 0.086 ms (8.552 ms / 100) 8.551 -> 8.555 ( +0.05%) [ +0.00% +0.04% +0.12% / +0.21% +0.05% +0.07%] index_select reverse : Elapsed 0.086 ms (8.551 ms / 100) 8.521 -> 8.532 ( +0.13%) [ +0.00% +0.38% +0.22% / +0.14% +0.36% +0.13%] index_select skip64 : Elapsed 0.085 ms (8.521 ms / 100) 8.530 -> 8.516 ( -0.16%) [ +0.00% +0.02% +0.07% / +0.27% +0.05% -0.16%] index_select skip256 : Elapsed 0.085 ms (8.530 ms / 100) 8.570 -> 8.564 ( -0.07%) [ +0.21% +0.00% +0.00% / +0.08% -0.07% -0.06%] index_select spread : Elapsed 0.086 ms (8.588 ms / 100) 8.548 -> 8.550 ( +0.02%) [ +0.00% +0.02% +0.27% / +0.08% +0.28% +0.02%] index_select strided 3 : Elapsed 0.085 ms (8.548 ms / 100) 8.567 -> 8.547 ( -0.23%) [ +0.05% +0.00% +0.00% / +0.26% +0.28% -0.23%] index_select random : Elapsed 0.086 ms (8.571 ms / 100) 8.574 -> 8.574 ( +0.00%) [ +0.14% +0.02% +0.00% / +0.00% +0.10% +0.23%] index_select random_sorted : Elapsed 0.086 ms (8.586 ms / 100) B = [4, 40, 20, 16] (stride (16, 1280, 64, 1)) A = [4, 40, 5, 16] (stride (80, 320, 16, 1)) dim = 2 1.859 -> 1.863 ( +0.22%) [ +0.00% +0.16% +0.32% / +0.22% +0.70% +0.65%] index_add_ linear : Elapsed 0.019 ms (1.859 ms / 100) 1.809 -> 1.817 ( +0.44%) [ +0.17% +0.00% +0.55% / +0.44% +1.00% +1.11%] index_copy_ linear : Elapsed 0.018 ms (1.812 ms / 100) 1.859 -> 1.866 ( +0.38%) [ +0.05% +0.00% +0.22% / +0.38% +0.59% +0.65%] index_add_ reverse : Elapsed 0.019 ms (1.860 ms / 100) 1.812 -> 1.818 ( +0.33%) [ +0.17% +0.00% +0.22% / +0.33% +0.77% +0.94%] index_copy_ reverse : Elapsed 0.018 ms (1.815 ms / 100) 1.857 -> 1.862 ( +0.27%) [ +0.00% +0.05% +0.00% / +0.27% +0.81% +0.81%] index_add_ spread : Elapsed 0.019 ms (1.857 ms / 100) 1.810 -> 1.813 ( +0.17%) [ +0.00% +0.11% +0.11% / +0.17% +1.05% +0.83%] index_copy_ spread : Elapsed 0.018 ms (1.810 ms / 100) 1.853 -> 1.857 ( +0.22%) [ +0.00% +0.05% +0.22% / +0.22% +1.24% +1.35%] index_add_ strided 3 : Elapsed 0.019 ms (1.853 ms / 100) 1.807 -> 1.812 ( +0.28%) [ +0.06% +0.00% +0.06% / +0.28% +1.27% +1.33%] index_copy_ strided 3 : Elapsed 0.018 ms (1.808 ms / 100) 1.856 -> 1.857 ( +0.05%) [ +0.11% +0.05% +0.00% / +0.05% +0.92% +1.13%] index_add_ strided 7 : Elapsed 0.019 ms (1.858 ms / 100) 1.811 -> 1.814 ( +0.17%) [ +0.06% +0.00% +0.11% / +0.17% +0.88% +0.94%] index_copy_ strided 7 : Elapsed 0.018 ms (1.812 ms / 100) 1.853 -> 1.860 ( +0.38%) [ +0.00% +0.22% +0.38% / +0.38% +1.19% +1.19%] index_add_ perm : Elapsed 0.019 ms (1.853 ms / 100) 1.806 -> 1.812 ( +0.33%) [ +0.00% +0.00% +0.28% / +0.33% +1.22% +1.27%] index_copy_ perm : Elapsed 0.018 ms (1.806 ms / 100) 1.849 -> 1.858 ( +0.49%) [ +0.00% +0.16% +0.70% / +0.49% +1.73% +1.57%] index_add_ perm_sorted : Elapsed 0.018 ms (1.849 ms / 100) 1.804 -> 1.812 ( +0.44%) [ +0.00% +0.11% +0.61% / +0.44% +1.55% +1.39%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.804 ms / 100) 8.525 -> 8.532 ( +0.08%) [ +0.00% +0.13% +0.05% / +0.08% +0.21% +0.38%] index_select const : Elapsed 0.085 ms (8.525 ms / 100) 8.562 -> 8.570 ( +0.09%) [ +0.29% +0.15% +0.00% / +0.09% +0.40% +0.77%] index_select wrap : Elapsed 0.086 ms (8.587 ms / 100) 8.552 -> 8.562 ( +0.12%) [ +0.00% +0.12% +0.65% / +0.12% +0.35% +0.61%] index_select linear : Elapsed 0.086 ms (8.552 ms / 100) 8.577 -> 8.565 ( -0.14%) [ +0.00% +0.00% +0.08% / -0.14% +0.37% +0.44%] index_select reverse : Elapsed 0.086 ms (8.577 ms / 100) 8.541 -> 8.518 ( -0.27%) [ +0.00% +0.00% +0.08% / -0.27% +0.13% +0.16%] index_select skip64 : Elapsed 0.085 ms (8.541 ms / 100) 8.519 -> 8.529 ( +0.12%) [ +0.21% +0.33% +0.00% / +0.12% +0.43% +0.56%] index_select skip256 : Elapsed 0.085 ms (8.537 ms / 100) 8.558 -> 8.573 ( +0.18%) [ +0.16% +0.14% +0.00% / +0.18% +0.90% +0.47%] index_select spread : Elapsed 0.086 ms (8.572 ms / 100) 8.591 -> 8.579 ( -0.14%) [ +0.00% +0.08% +0.22% / -0.14% +0.35% +0.44%] index_select strided 3 : Elapsed 0.086 ms (8.591 ms / 100) 8.574 -> 8.604 ( +0.35%) [ +0.00% +0.19% +0.23% / +0.35% +0.40% +0.62%] index_select random : Elapsed 0.086 ms (8.574 ms / 100) 8.553 -> 8.583 ( +0.35%) [ +0.00% +0.36% +0.23% / +0.35% +0.65% +0.65%] index_select random_sorted : Elapsed 0.086 ms (8.553 ms / 100) B = [4, 40, 20, 16] (stride (1, 1280, 4, 80)) A = [4, 40, 5, 16] (stride (1, 320, 64, 4)) dim = 2 1.841 -> 1.840 ( -0.05%) [ +0.11% +0.00% +0.05% / -0.05% +0.54% +0.49%] index_add_ linear : Elapsed 0.018 ms (1.843 ms / 100) 1.800 -> 1.800 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.50% +0.56%] index_copy_ linear : Elapsed 0.018 ms (1.801 ms / 100) 1.839 -> 1.838 ( -0.05%) [ +0.22% +0.00% +0.22% / -0.05% +0.54% +0.65%] index_add_ reverse : Elapsed 0.018 ms (1.843 ms / 100) 1.801 -> 1.801 ( +0.00%) [ +0.00% +0.17% +0.11% / +0.00% +0.61% +0.67%] index_copy_ reverse : Elapsed 0.018 ms (1.801 ms / 100) 1.875 -> 1.879 ( +0.21%) [ +0.32% +0.11% +0.00% / +0.27% +0.37% +0.21%] index_add_ spread : Elapsed 0.019 ms (1.881 ms / 100) 1.837 -> 1.840 ( +0.16%) [ +0.33% +0.16% +0.00% / +0.16% +0.33% +0.22%] index_copy_ spread : Elapsed 0.018 ms (1.843 ms / 100) 1.874 -> 1.877 ( +0.16%) [ +0.37% +0.16% +0.00% / +0.16% +0.43% +0.16%] index_add_ strided 3 : Elapsed 0.019 ms (1.881 ms / 100) 1.838 -> 1.840 ( +0.11%) [ +0.00% +0.11% +0.33% / +0.16% +0.16% +0.11%] index_copy_ strided 3 : Elapsed 0.018 ms (1.838 ms / 100) 1.860 -> 1.863 ( +0.16%) [ +0.05% +0.00% +0.22% / +0.16% +0.22% +0.16%] index_add_ strided 7 : Elapsed 0.019 ms (1.861 ms / 100) 1.822 -> 1.823 ( +0.05%) [ +0.44% +0.00% +0.05% / +0.05% +0.38% +0.55%] index_copy_ strided 7 : Elapsed 0.018 ms (1.830 ms / 100) 1.883 -> 1.882 ( -0.05%) [ +0.11% +0.27% +0.00% / +0.05% -0.05% -0.05%] index_add_ perm : Elapsed 0.019 ms (1.885 ms / 100) 1.839 -> 1.839 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.00% +0.44% +0.27%] index_copy_ perm : Elapsed 0.018 ms (1.841 ms / 100) 1.876 -> 1.875 ( -0.05%) [ +0.21% +0.00% +0.00% / -0.05% +0.05% +0.11%] index_add_ perm_sorted : Elapsed 0.019 ms (1.880 ms / 100) 1.837 -> 1.838 ( +0.05%) [ +0.27% +0.00% +0.11% / +0.44% +0.05% +0.38%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.842 ms / 100) 8.555 -> 8.551 ( -0.05%) [ +0.02% +0.00% +0.11% / +0.18% -0.05% +0.00%] index_select const : Elapsed 0.086 ms (8.557 ms / 100) 8.590 -> 8.578 ( -0.14%) [ +0.08% +0.06% +0.00% / -0.14% +0.20% +0.16%] index_select wrap : Elapsed 0.086 ms (8.597 ms / 100) 8.568 -> 8.585 ( +0.20%) [ +0.19% +0.00% +0.12% / +0.22% +0.20% +0.41%] index_select linear : Elapsed 0.086 ms (8.584 ms / 100) 8.570 -> 8.554 ( -0.19%) [ +0.00% +0.07% +0.15% / -0.19% -0.09% -0.02%] index_select reverse : Elapsed 0.086 ms (8.570 ms / 100) 8.547 -> 8.561 ( +0.16%) [ +0.00% +0.20% +0.00% / +0.26% +0.16% +0.32%] index_select skip64 : Elapsed 0.085 ms (8.547 ms / 100) 8.542 -> 8.572 ( +0.35%) [ +0.01% +0.00% +0.26% / +0.35% +0.41% +0.42%] index_select skip256 : Elapsed 0.085 ms (8.543 ms / 100) 8.574 -> 8.559 ( -0.17%) [ +0.01% +0.10% +0.00% / +0.28% -0.17% -0.10%] index_select spread : Elapsed 0.086 ms (8.575 ms / 100) 8.591 -> 8.600 ( +0.10%) [ +0.14% +0.00% +0.14% / +0.10% +0.26% +0.20%] index_select strided 3 : Elapsed 0.086 ms (8.603 ms / 100) 8.580 -> 8.594 ( +0.16%) [ +0.14% +0.17% +0.00% / +0.21% +0.16% +0.45%] index_select random : Elapsed 0.086 ms (8.592 ms / 100) 8.573 -> 8.577 ( +0.05%) [ +0.16% +0.19% +0.00% / +0.05% +0.10% +0.06%] index_select random_sorted : Elapsed 0.086 ms (8.587 ms / 100) B = [4, 40, 20, 16] (stride (1, 1280, 4, 80)) A = [4, 40, 5, 16] (stride (200, 1, 40, 800)) dim = 2 1.938 -> 1.936 ( -0.10%) [ +0.15% +0.10% +0.00% / -0.10% +0.72% +0.41%] index_add_ linear : Elapsed 0.019 ms (1.941 ms / 100) 1.904 -> 1.909 ( +0.26%) [ +0.00% +0.00% +0.00% / +0.26% +1.00% +1.16%] index_copy_ linear : Elapsed 0.019 ms (1.904 ms / 100) 1.943 -> 1.946 ( +0.15%) [ +0.15% +0.21% +0.00% / +0.15% +0.51% +0.57%] index_add_ reverse : Elapsed 0.019 ms (1.946 ms / 100) 1.916 -> 1.918 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.47% +0.26%] index_copy_ reverse : Elapsed 0.019 ms (1.917 ms / 100) 1.992 -> 1.993 ( +0.05%) [ +0.00% +0.10% +0.00% / +0.10% +0.05% +0.10%] index_add_ spread : Elapsed 0.020 ms (1.992 ms / 100) 1.951 -> 1.966 ( +0.77%) [ +0.05% +0.00% +0.72% / +0.77% +1.13% +1.08%] index_copy_ spread : Elapsed 0.020 ms (1.952 ms / 100) 1.986 -> 1.989 ( +0.15%) [ +0.30% +0.10% +0.00% / +0.15% +0.35% +0.50%] index_add_ strided 3 : Elapsed 0.020 ms (1.992 ms / 100) 1.943 -> 1.960 ( +0.87%) [ +0.00% +0.15% +0.77% / +0.87% +1.39% +1.65%] index_copy_ strided 3 : Elapsed 0.019 ms (1.943 ms / 100) 1.971 -> 1.973 ( +0.10%) [ +0.00% +0.00% +0.20% / +0.20% +0.30% +0.10%] index_add_ strided 7 : Elapsed 0.020 ms (1.971 ms / 100) 1.947 -> 1.950 ( +0.15%) [ +0.10% +0.00% +0.15% / +0.15% +0.26% +0.56%] index_copy_ strided 7 : Elapsed 0.019 ms (1.949 ms / 100) 1.976 -> 1.973 ( -0.15%) [ +0.00% +0.10% +0.05% / -0.15% +0.25% +0.25%] index_add_ perm : Elapsed 0.020 ms (1.976 ms / 100) 1.941 -> 1.952 ( +0.57%) [ +0.10% +0.00% +0.77% / +0.57% +1.29% +1.18%] index_copy_ perm : Elapsed 0.019 ms (1.943 ms / 100) 1.977 -> 1.977 ( +0.00%) [ +0.30% +0.46% +0.00% / +0.00% +0.00% +0.46%] index_add_ perm_sorted : Elapsed 0.020 ms (1.983 ms / 100) 1.939 -> 1.952 ( +0.67%) [ +0.00% +0.26% +0.67% / +0.67% +0.98% +1.13%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.939 ms / 100) 8.568 -> 8.553 ( -0.18%) [ +0.22% +0.01% +0.00% / +0.00% -0.18% +0.07%] index_select const : Elapsed 0.086 ms (8.587 ms / 100) 8.598 -> 8.598 ( +0.00%) [ +0.23% +0.00% +0.13% / +0.00% +0.26% +0.14%] index_select wrap : Elapsed 0.086 ms (8.618 ms / 100) 8.601 -> 8.596 ( -0.06%) [ +0.00% +0.13% +0.05% / +0.20% -0.06% +0.12%] index_select linear : Elapsed 0.086 ms (8.601 ms / 100) 8.590 -> 8.599 ( +0.10%) [ +0.15% +0.02% +0.00% / +0.10% +0.22% +0.20%] index_select reverse : Elapsed 0.086 ms (8.603 ms / 100) 8.561 -> 8.567 ( +0.07%) [ +0.14% +0.27% +0.00% / +0.19% +0.21% +0.07%] index_select skip64 : Elapsed 0.086 ms (8.573 ms / 100) 8.555 -> 8.555 ( +0.00%) [ +0.05% +0.07% +0.00% / +0.00% +0.22% +0.18%] index_select skip256 : Elapsed 0.086 ms (8.559 ms / 100) 8.618 -> 8.623 ( +0.06%) [ +0.00% +0.07% +0.02% / +0.09% +0.15% +0.06%] index_select spread : Elapsed 0.086 ms (8.618 ms / 100) 8.603 -> 8.593 ( -0.12%) [ +0.16% +0.09% +0.00% / +0.06% -0.12% +0.28%] index_select strided 3 : Elapsed 0.086 ms (8.617 ms / 100) 8.609 -> 8.621 ( +0.14%) [ +0.00% +0.31% +0.20% / +0.17% +0.62% +0.14%] index_select random : Elapsed 0.086 ms (8.609 ms / 100) 8.611 -> 8.617 ( +0.07%) [ +0.26% +0.24% +0.00% / +0.24% +0.07% +0.48%] index_select random_sorted : Elapsed 0.086 ms (8.633 ms / 100) B = [4, 40, 20, 16] (stride (1, 64, 2560, 4)) A = [4, 40, 5, 16] (stride (3200, 1, 40, 200)) dim = 2 1.871 -> 1.874 ( +0.16%) [ +0.27% +0.00% +0.05% / +0.16% +0.32% +0.16%] index_add_ linear : Elapsed 0.019 ms (1.876 ms / 100) 1.814 -> 1.813 ( -0.06%) [ +0.06% +0.11% +0.00% / -0.06% +0.39% +0.55%] index_copy_ linear : Elapsed 0.018 ms (1.815 ms / 100) 1.868 -> 1.871 ( +0.16%) [ +0.16% +0.00% +0.05% / +0.16% +0.27% +0.27%] index_add_ reverse : Elapsed 0.019 ms (1.871 ms / 100) 1.813 -> 1.813 ( +0.00%) [ +0.11% +0.00% +0.17% / +0.00% +0.39% +0.44%] index_copy_ reverse : Elapsed 0.018 ms (1.815 ms / 100) 1.853 -> 1.853 ( +0.00%) [ +0.05% +0.11% +0.00% / +0.00% +1.46% +1.40%] index_add_ spread : Elapsed 0.019 ms (1.854 ms / 100) 1.801 -> 1.804 ( +0.17%) [ +0.11% +0.00% +0.11% / +0.17% +1.61% +1.39%] index_copy_ spread : Elapsed 0.018 ms (1.803 ms / 100) 1.868 -> 1.870 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.70% +0.96%] index_add_ strided 3 : Elapsed 0.019 ms (1.868 ms / 100) 1.816 -> 1.814 ( -0.11%) [ +0.00% +0.06% +0.17% / -0.11% +1.21% +0.72%] index_copy_ strided 3 : Elapsed 0.018 ms (1.816 ms / 100) 1.860 -> 1.859 ( -0.05%) [ +0.16% +0.00% +0.05% / -0.05% +0.86% +0.65%] index_add_ strided 7 : Elapsed 0.019 ms (1.863 ms / 100) 1.810 -> 1.808 ( -0.11%) [ +0.22% +0.00% +0.00% / -0.11% +0.72% +0.66%] index_copy_ strided 7 : Elapsed 0.018 ms (1.814 ms / 100) 1.864 -> 1.861 ( -0.16%) [ +0.16% +0.11% +0.00% / -0.05% -0.16% +0.00%] index_add_ perm : Elapsed 0.019 ms (1.867 ms / 100) 1.813 -> 1.811 ( -0.11%) [ +0.22% +0.33% +0.00% / -0.11% +0.11% +0.06%] index_copy_ perm : Elapsed 0.018 ms (1.817 ms / 100) 1.863 -> 1.865 ( +0.11%) [ +0.00% +0.27% +0.27% / +0.27% +0.11% +0.16%] index_add_ perm_sorted : Elapsed 0.019 ms (1.863 ms / 100) 1.809 -> 1.812 ( +0.17%) [ +0.22% +0.11% +0.00% / +0.17% +0.44% +0.55%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.813 ms / 100) 8.527 -> 8.518 ( -0.11%) [ +0.00% +0.07% +0.16% / -0.11% +0.01% +0.06%] index_select const : Elapsed 0.085 ms (8.527 ms / 100) 8.555 -> 8.553 ( -0.02%) [ +0.00% +0.16% +0.35% / -0.02% +0.12% +0.16%] index_select wrap : Elapsed 0.086 ms (8.555 ms / 100) 8.562 -> 8.565 ( +0.04%) [ +0.00% +0.05% +0.04% / +0.04% +0.20% +0.08%] index_select linear : Elapsed 0.086 ms (8.562 ms / 100) 8.544 -> 8.564 ( +0.23%) [ +0.00% +0.35% +0.15% / +0.34% +0.23% +0.42%] index_select reverse : Elapsed 0.085 ms (8.544 ms / 100) 8.525 -> 8.534 ( +0.11%) [ +0.15% +0.04% +0.00% / +0.13% +0.11% +0.18%] index_select skip64 : Elapsed 0.085 ms (8.538 ms / 100) 8.528 -> 8.536 ( +0.09%) [ +0.12% +0.04% +0.00% / +0.09% +0.26% +0.23%] index_select skip256 : Elapsed 0.085 ms (8.538 ms / 100) 8.573 -> 8.567 ( -0.07%) [ +0.00% +0.20% +0.13% / -0.07% +0.19% +0.07%] index_select spread : Elapsed 0.086 ms (8.573 ms / 100) 8.558 -> 8.570 ( +0.14%) [ +0.00% +0.23% +0.04% / +0.14% +0.46% +0.51%] index_select strided 3 : Elapsed 0.086 ms (8.558 ms / 100) 8.548 -> 8.565 ( +0.20%) [ +0.14% +0.08% +0.00% / +0.20% +0.51% +0.28%] index_select random : Elapsed 0.086 ms (8.560 ms / 100) 8.568 -> 8.574 ( +0.07%) [ +0.00% +0.04% +0.21% / +0.16% +0.21% +0.07%] index_select random_sorted : Elapsed 0.086 ms (8.568 ms / 100) B = [4, 40, 20, 16] (stride (1, 64, 2560, 4)) A = [4, 40, 5, 16] (stride (200, 1, 40, 800)) dim = 2 1.896 -> 1.898 ( +0.11%) [ +0.00% +0.16% +0.26% / +0.11% +0.90% +1.05%] index_add_ linear : Elapsed 0.019 ms (1.896 ms / 100) 1.845 -> 1.853 ( +0.43%) [ +0.00% +0.16% +0.27% / +0.43% +1.25% +1.36%] index_copy_ linear : Elapsed 0.018 ms (1.845 ms / 100) 1.900 -> 1.905 ( +0.26%) [ +0.00% +0.00% +0.21% / +0.26% +0.68% +0.58%] index_add_ reverse : Elapsed 0.019 ms (1.900 ms / 100) 1.849 -> 1.853 ( +0.22%) [ +0.22% +0.00% +0.43% / +0.22% +0.81% +0.97%] index_copy_ reverse : Elapsed 0.019 ms (1.853 ms / 100) 1.900 -> 1.904 ( +0.21%) [ +0.00% +0.11% +0.21% / +0.21% +0.58% +0.79%] index_add_ spread : Elapsed 0.019 ms (1.900 ms / 100) 1.852 -> 1.853 ( +0.05%) [ +0.00% +0.05% +0.22% / +0.05% +0.92% +0.70%] index_copy_ spread : Elapsed 0.019 ms (1.852 ms / 100) 1.896 -> 1.905 ( +0.47%) [ +0.00% +0.11% +0.37% / +0.47% +1.64% +1.69%] index_add_ strided 3 : Elapsed 0.019 ms (1.896 ms / 100) 1.847 -> 1.856 ( +0.49%) [ +0.05% +0.00% +0.49% / +0.49% +2.11% +1.84%] index_copy_ strided 3 : Elapsed 0.018 ms (1.848 ms / 100) 1.896 -> 1.906 ( +0.53%) [ +0.00% +0.05% +0.47% / +0.53% +1.00% +1.16%] index_add_ strided 7 : Elapsed 0.019 ms (1.896 ms / 100) 1.844 -> 1.854 ( +0.54%) [ +0.00% +0.11% +0.49% / +0.54% +1.41% +1.30%] index_copy_ strided 7 : Elapsed 0.018 ms (1.844 ms / 100) 1.899 -> 1.907 ( +0.42%) [ +0.00% +0.00% +0.26% / +0.42% +1.21% +1.47%] index_add_ perm : Elapsed 0.019 ms (1.899 ms / 100) 1.848 -> 1.855 ( +0.38%) [ +0.22% +0.00% +0.32% / +0.38% +1.79% +1.57%] index_copy_ perm : Elapsed 0.019 ms (1.852 ms / 100) 1.897 -> 1.900 ( +0.16%) [ +0.00% +0.00% +0.05% / +0.16% +1.37% +1.21%] index_add_ perm_sorted : Elapsed 0.019 ms (1.897 ms / 100) 1.850 -> 1.854 ( +0.22%) [ +0.00% +0.16% +0.05% / +0.22% +1.08% +1.35%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.850 ms / 100) 8.533 -> 8.526 ( -0.08%) [ +0.00% +0.09% +0.13% / -0.08% +0.01% +0.27%] index_select const : Elapsed 0.085 ms (8.533 ms / 100) 8.568 -> 8.564 ( -0.05%) [ +0.00% +0.02% +0.04% / -0.05% -0.02% +0.02%] index_select wrap : Elapsed 0.086 ms (8.568 ms / 100) 8.554 -> 8.567 ( +0.15%) [ +0.00% +0.11% +0.02% / +0.21% +0.15% +0.23%] index_select linear : Elapsed 0.086 ms (8.554 ms / 100) 8.544 -> 8.568 ( +0.28%) [ +0.26% +0.00% +0.16% / +0.28% +0.53% +0.48%] index_select reverse : Elapsed 0.086 ms (8.566 ms / 100) 8.539 -> 8.548 ( +0.11%) [ +0.02% +0.04% +0.00% / +0.18% +0.11% +0.18%] index_select skip64 : Elapsed 0.085 ms (8.541 ms / 100) 8.532 -> 8.532 ( +0.00%) [ +0.12% +0.00% +0.04% / +0.00% +1.38% +0.28%] index_select skip256 : Elapsed 0.085 ms (8.542 ms / 100) 8.585 -> 8.583 ( -0.02%) [ +0.07% +0.00% +0.12% / -0.02% +0.09% +0.00%] index_select spread : Elapsed 0.086 ms (8.591 ms / 100) 8.551 -> 8.578 ( +0.32%) [ +0.16% +0.21% +0.00% / +0.37% +0.33% +0.32%] index_select strided 3 : Elapsed 0.086 ms (8.565 ms / 100) 8.582 -> 8.584 ( +0.02%) [ +0.00% +0.13% +0.08% / +0.02% +0.29% +0.30%] index_select random : Elapsed 0.086 ms (8.582 ms / 100) 8.572 -> 8.581 ( +0.10%) [ +0.00% +0.36% +0.26% / +0.10% +0.17% +0.50%] index_select random_sorted : Elapsed 0.086 ms (8.572 ms / 100) B = [4, 40, 20, 16] (stride (40, 1, 2560, 160)) A = [4, 40, 5, 16] (stride (1, 64, 2560, 4)) dim = 2 1.714 -> 1.719 ( +0.29%) [ +0.29% +0.18% +0.00% / +0.29% +0.76% +0.82%] index_add_ linear : Elapsed 0.017 ms (1.719 ms / 100) 1.671 -> 1.676 ( +0.30%) [ +0.00% +0.24% +0.24% / +0.30% +0.84% +0.84%] index_copy_ linear : Elapsed 0.017 ms (1.671 ms / 100) 1.716 -> 1.715 ( -0.06%) [ +0.17% +0.06% +0.00% / -0.06% +0.17% +0.35%] index_add_ reverse : Elapsed 0.017 ms (1.719 ms / 100) 1.669 -> 1.674 ( +0.30%) [ +0.00% +0.18% +0.36% / +0.30% +0.84% +0.72%] index_copy_ reverse : Elapsed 0.017 ms (1.669 ms / 100) 1.704 -> 1.704 ( +0.00%) [ +0.12% +0.00% +0.06% / +0.00% +1.53% +1.35%] index_add_ spread : Elapsed 0.017 ms (1.706 ms / 100) 1.658 -> 1.663 ( +0.30%) [ +0.12% +0.00% +0.06% / +0.30% +1.63% +1.69%] index_copy_ spread : Elapsed 0.017 ms (1.660 ms / 100) 1.711 -> 1.717 ( +0.35%) [ +0.00% +0.35% +0.18% / +0.35% +1.17% +1.34%] index_add_ strided 3 : Elapsed 0.017 ms (1.711 ms / 100) 1.670 -> 1.670 ( +0.00%) [ +0.00% +0.24% +0.18% / +0.00% +1.32% +1.08%] index_copy_ strided 3 : Elapsed 0.017 ms (1.670 ms / 100) 1.706 -> 1.710 ( +0.23%) [ +0.29% +0.06% +0.00% / +0.23% +1.00% +1.11%] index_add_ strided 7 : Elapsed 0.017 ms (1.711 ms / 100) 1.666 -> 1.669 ( +0.18%) [ +0.24% +0.18% +0.00% / +0.18% +0.96% +0.84%] index_copy_ strided 7 : Elapsed 0.017 ms (1.670 ms / 100) 1.708 -> 1.709 ( +0.06%) [ +0.18% +0.06% +0.00% / +0.06% +1.17% +0.76%] index_add_ perm : Elapsed 0.017 ms (1.711 ms / 100) 1.664 -> 1.663 ( -0.06%) [ +0.12% +0.00% +0.12% / -0.06% +1.02% +1.02%] index_copy_ perm : Elapsed 0.017 ms (1.666 ms / 100) 1.704 -> 1.709 ( +0.29%) [ +0.23% +0.35% +0.00% / +0.29% +1.12% +1.00%] index_add_ perm_sorted : Elapsed 0.017 ms (1.708 ms / 100) 1.663 -> 1.665 ( +0.12%) [ +0.00% +0.18% +0.12% / +0.12% +1.20% +1.14%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.663 ms / 100) 8.196 -> 8.193 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.01% +0.26%] index_select const : Elapsed 0.082 ms (8.196 ms / 100) 8.222 -> 8.224 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.22% +0.17% +0.02%] index_select wrap : Elapsed 0.082 ms (8.226 ms / 100) 8.204 -> 8.227 ( +0.28%) [ +0.00% +0.30% +0.46% / +0.28% +0.52% +0.43%] index_select linear : Elapsed 0.082 ms (8.204 ms / 100) 8.202 -> 8.196 ( -0.07%) [ +0.15% +0.06% +0.00% / -0.07% +0.38% +0.15%] index_select reverse : Elapsed 0.082 ms (8.214 ms / 100) 8.191 -> 8.182 ( -0.11%) [ +0.10% +0.07% +0.00% / -0.11% +0.11% +0.39%] index_select skip64 : Elapsed 0.082 ms (8.199 ms / 100) 8.188 -> 8.195 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.10% +0.42% +0.09%] index_select skip256 : Elapsed 0.082 ms (8.188 ms / 100) 8.200 -> 8.207 ( +0.09%) [ +0.00% +0.17% +0.09% / +0.09% +0.24% +0.34%] index_select spread : Elapsed 0.082 ms (8.200 ms / 100) 8.224 -> 8.215 ( -0.11%) [ +0.00% +0.26% +0.00% / -0.11% +0.18% +0.24%] index_select strided 3 : Elapsed 0.082 ms (8.224 ms / 100) 8.221 -> 8.220 ( -0.01%) [ +0.00% +0.12% +0.09% / -0.01% +0.57% +0.39%] index_select random : Elapsed 0.082 ms (8.221 ms / 100) 8.209 -> 8.224 ( +0.18%) [ +0.00% +0.02% +0.16% / +0.21% +0.39% +0.18%] index_select random_sorted : Elapsed 0.082 ms (8.209 ms / 100) B = [4, 40, 20, 16] (stride (1, 4, 2560, 160)) A = [4, 40, 5, 16] (stride (3200, 1, 640, 40)) dim = 2 1.867 -> 1.868 ( +0.05%) [ +0.16% +0.11% +0.00% / +0.05% +0.59% +0.59%] index_add_ linear : Elapsed 0.019 ms (1.870 ms / 100) 1.837 -> 1.841 ( +0.22%) [ +0.00% +0.00% +0.05% / +0.22% +0.49% +0.49%] index_copy_ linear : Elapsed 0.018 ms (1.837 ms / 100) 1.870 -> 1.870 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.53% +0.53%] index_add_ reverse : Elapsed 0.019 ms (1.870 ms / 100) 1.839 -> 1.840 ( +0.05%) [ +0.00% +0.00% +0.22% / +0.05% +0.76% +0.82%] index_copy_ reverse : Elapsed 0.018 ms (1.839 ms / 100) 1.866 -> 1.868 ( +0.11%) [ +0.00% +0.16% +0.21% / +0.11% +0.75% +0.96%] index_add_ spread : Elapsed 0.019 ms (1.866 ms / 100) 1.835 -> 1.835 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.27% +0.11%] index_copy_ spread : Elapsed 0.018 ms (1.835 ms / 100) 1.865 -> 1.868 ( +0.16%) [ +0.11% +0.00% +0.00% / +0.16% +1.07% +1.07%] index_add_ strided 3 : Elapsed 0.019 ms (1.867 ms / 100) 1.831 -> 1.834 ( +0.16%) [ +0.22% +0.00% +0.11% / +0.16% +0.76% +0.66%] index_copy_ strided 3 : Elapsed 0.018 ms (1.835 ms / 100) 1.866 -> 1.865 ( -0.05%) [ +0.05% +0.00% +0.11% / -0.05% +0.54% +0.48%] index_add_ strided 7 : Elapsed 0.019 ms (1.867 ms / 100) 1.834 -> 1.836 ( +0.11%) [ +0.00% +0.16% +0.16% / +0.11% +0.60% +0.33%] index_copy_ strided 7 : Elapsed 0.018 ms (1.834 ms / 100) 1.870 -> 1.871 ( +0.05%) [ +0.11% +0.21% +0.00% / +0.05% +1.23% +1.12%] index_add_ perm : Elapsed 0.019 ms (1.872 ms / 100) 1.843 -> 1.847 ( +0.22%) [ +0.16% +0.22% +0.00% / +0.22% +1.09% +0.98%] index_copy_ perm : Elapsed 0.018 ms (1.846 ms / 100) 1.866 -> 1.869 ( +0.16%) [ +0.00% +0.21% +0.11% / +0.16% +1.29% +1.34%] index_add_ perm_sorted : Elapsed 0.019 ms (1.866 ms / 100) 1.840 -> 1.845 ( +0.27%) [ +0.00% +0.05% +0.11% / +0.27% +1.14% +1.14%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.840 ms / 100) 8.513 -> 8.519 ( +0.07%) [ +0.00% +0.18% +0.21% / +0.07% +0.35% +0.29%] index_select const : Elapsed 0.085 ms (8.513 ms / 100) 8.548 -> 8.555 ( +0.08%) [ +0.11% +0.19% +0.00% / +0.08% +0.12% +0.30%] index_select wrap : Elapsed 0.086 ms (8.557 ms / 100) 8.543 -> 8.543 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.33% +0.27%] index_select linear : Elapsed 0.085 ms (8.543 ms / 100) 8.537 -> 8.533 ( -0.05%) [ +0.14% +0.04% +0.00% / -0.05% +0.12% +0.27%] index_select reverse : Elapsed 0.085 ms (8.549 ms / 100) 8.519 -> 8.529 ( +0.12%) [ +0.00% +0.32% +0.05% / +0.26% +0.12% +0.18%] index_select skip64 : Elapsed 0.085 ms (8.519 ms / 100) 8.511 -> 8.516 ( +0.06%) [ +0.06% +0.12% +0.00% / +0.06% +0.46% +0.16%] index_select skip256 : Elapsed 0.085 ms (8.516 ms / 100) 8.553 -> 8.531 ( -0.26%) [ +0.08% +0.06% +0.00% / -0.26% +0.19% +0.39%] index_select spread : Elapsed 0.086 ms (8.560 ms / 100) 8.551 -> 8.553 ( +0.02%) [ +0.00% +0.18% +0.09% / +0.02% +0.18% +0.13%] index_select strided 3 : Elapsed 0.086 ms (8.551 ms / 100) 8.555 -> 8.549 ( -0.07%) [ +0.16% +0.13% +0.00% / -0.07% +0.05% +0.06%] index_select random : Elapsed 0.086 ms (8.569 ms / 100) 8.531 -> 8.564 ( +0.39%) [ +0.00% +0.15% +0.07% / +0.39% +0.46% +0.49%] index_select random_sorted : Elapsed 0.085 ms (8.531 ms / 100) B = [4, 40, 20, 16] (stride (800, 1, 40, 3200)) A = [4, 40, 5, 16] (stride (1, 64, 2560, 4)) dim = 2 1.809 -> 1.810 ( +0.06%) [ +0.17% +0.00% +0.28% / +0.06% +0.55% +0.55%] index_add_ linear : Elapsed 0.018 ms (1.812 ms / 100) 1.770 -> 1.768 ( -0.11%) [ +0.00% +0.06% +0.11% / -0.11% +0.34% +0.34%] index_copy_ linear : Elapsed 0.018 ms (1.770 ms / 100) 1.810 -> 1.807 ( -0.17%) [ +0.11% +0.00% +0.17% / -0.17% +0.55% +0.39%] index_add_ reverse : Elapsed 0.018 ms (1.812 ms / 100) 1.767 -> 1.769 ( +0.11%) [ +0.06% +0.17% +0.00% / +0.11% +0.62% +0.57%] index_copy_ reverse : Elapsed 0.018 ms (1.768 ms / 100) 1.827 -> 1.825 ( -0.11%) [ +0.05% +0.00% +0.11% / -0.11% +0.60% +0.33%] index_add_ spread : Elapsed 0.018 ms (1.828 ms / 100) 1.782 -> 1.784 ( +0.11%) [ +0.17% +0.00% +0.17% / +0.11% +0.56% +0.45%] index_copy_ spread : Elapsed 0.018 ms (1.785 ms / 100) 1.824 -> 1.826 ( +0.11%) [ +0.00% +0.16% +0.05% / +0.11% +0.38% +0.44%] index_add_ strided 3 : Elapsed 0.018 ms (1.824 ms / 100) 1.781 -> 1.782 ( +0.06%) [ +0.06% +0.17% +0.00% / +0.06% +0.51% +0.51%] index_copy_ strided 3 : Elapsed 0.018 ms (1.782 ms / 100) 1.819 -> 1.819 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.55% +0.71%] index_add_ strided 7 : Elapsed 0.018 ms (1.819 ms / 100) 1.773 -> 1.775 ( +0.11%) [ +0.17% +0.17% +0.00% / +0.11% +0.73% +0.85%] index_copy_ strided 7 : Elapsed 0.018 ms (1.776 ms / 100) 1.827 -> 1.826 ( -0.05%) [ +0.00% +0.33% +0.00% / -0.05% +0.33% +0.55%] index_add_ perm : Elapsed 0.018 ms (1.827 ms / 100) 1.783 -> 1.785 ( +0.11%) [ +0.11% +0.06% +0.00% / +0.11% +0.45% +0.50%] index_copy_ perm : Elapsed 0.018 ms (1.785 ms / 100) 1.824 -> 1.826 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.71% +0.55%] index_add_ perm_sorted : Elapsed 0.018 ms (1.824 ms / 100) 1.781 -> 1.785 ( +0.22%) [ +0.06% +0.17% +0.00% / +0.22% +0.84% +0.56%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.782 ms / 100) 8.516 -> 8.527 ( +0.13%) [ +0.26% +0.31% +0.00% / +0.13% +0.19% +0.59%] index_select const : Elapsed 0.085 ms (8.538 ms / 100) 8.553 -> 8.545 ( -0.09%) [ +0.01% +0.08% +0.00% / -0.09% +0.08% +0.47%] index_select wrap : Elapsed 0.086 ms (8.554 ms / 100) 8.550 -> 8.551 ( +0.01%) [ +0.28% +0.05% +0.00% / +0.01% +0.07% +0.19%] index_select linear : Elapsed 0.086 ms (8.574 ms / 100) 8.523 -> 8.530 ( +0.08%) [ +0.00% +0.23% +0.19% / +0.35% +0.47% +0.08%] index_select reverse : Elapsed 0.085 ms (8.523 ms / 100) 8.530 -> 8.521 ( -0.11%) [ +0.15% +0.07% +0.00% / -0.11% +0.23% -0.11%] index_select skip64 : Elapsed 0.085 ms (8.543 ms / 100) 8.532 -> 8.529 ( -0.04%) [ +0.09% +0.14% +0.00% / -0.04% +0.18% +0.00%] index_select skip256 : Elapsed 0.085 ms (8.540 ms / 100) 8.527 -> 8.547 ( +0.23%) [ +0.21% +0.00% +0.25% / +0.23% +0.48% +0.40%] index_select spread : Elapsed 0.085 ms (8.545 ms / 100) 8.558 -> 8.560 ( +0.02%) [ +0.09% +0.08% +0.00% / +0.02% +0.22% +0.27%] index_select strided 3 : Elapsed 0.086 ms (8.566 ms / 100) 8.563 -> 8.556 ( -0.08%) [ +0.00% +0.05% +0.00% / -0.08% +0.09% +0.11%] index_select random : Elapsed 0.086 ms (8.563 ms / 100) 8.549 -> 8.536 ( -0.15%) [ +0.00% +0.06% +0.14% / -0.15% +0.21% +0.19%] index_select random_sorted : Elapsed 0.085 ms (8.549 ms / 100) out_shape = [4, 40, 5, 20] in_shape = [4, 40, 5, 16] idx_dim = 3 B = [4, 40, 5, 20] (stride (4000, 100, 20, 1)) A = [4, 40, 5, 16] (stride (1, 64, 2560, 4)) dim = 3 3.743 -> 3.742 ( -0.03%) [ +0.19% +0.00% +0.11% / -0.03% +0.83% +0.72%] index_add_ linear : Elapsed 0.037 ms (3.750 ms / 100) 3.569 -> 3.572 ( +0.08%) [ +0.11% +0.00% +0.11% / +0.08% +0.87% +0.84%] index_copy_ linear : Elapsed 0.036 ms (3.573 ms / 100) 3.750 -> 3.746 ( -0.11%) [ +0.00% +0.05% +0.00% / -0.11% +0.77% +0.61%] index_add_ reverse : Elapsed 0.037 ms (3.750 ms / 100) 3.573 -> 3.571 ( -0.06%) [ +0.03% +0.06% +0.00% / -0.06% +0.76% +0.64%] index_copy_ reverse : Elapsed 0.036 ms (3.574 ms / 100) 3.750 -> 3.753 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.72% +0.75%] index_add_ spread : Elapsed 0.038 ms (3.751 ms / 100) 3.574 -> 3.575 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.76% +0.73%] index_copy_ spread : Elapsed 0.036 ms (3.575 ms / 100) 3.757 -> 3.763 ( +0.16%) [ +0.00% +0.16% +0.13% / +0.16% +0.83% +0.67%] index_add_ strided 3 : Elapsed 0.038 ms (3.757 ms / 100) 3.572 -> 3.577 ( +0.14%) [ +0.00% +0.11% +0.14% / +0.14% +0.76% +0.70%] index_copy_ strided 3 : Elapsed 0.036 ms (3.572 ms / 100) 3.762 -> 3.762 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +0.64% +0.66%] index_add_ strided 7 : Elapsed 0.038 ms (3.762 ms / 100) 3.576 -> 3.574 ( -0.06%) [ +0.00% +0.03% +0.00% / -0.06% +0.73% +0.59%] index_copy_ strided 7 : Elapsed 0.036 ms (3.576 ms / 100) 3.753 -> 3.752 ( -0.03%) [ +0.00% +0.03% +0.05% / -0.03% +0.59% +0.59%] index_add_ perm : Elapsed 0.038 ms (3.753 ms / 100) 3.575 -> 3.576 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.67% +0.70%] index_copy_ perm : Elapsed 0.036 ms (3.575 ms / 100) 3.754 -> 3.755 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.56% +0.61%] index_add_ perm_sorted : Elapsed 0.038 ms (3.754 ms / 100) 3.576 -> 3.574 ( -0.06%) [ +0.00% +0.03% +0.00% / -0.06% +0.59% +0.62%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.576 ms / 100) 5.389 -> 5.384 ( -0.09%) [ +0.07% +0.06% +0.00% / +0.07% -0.09% +0.00%] index_select const : Elapsed 0.054 ms (5.393 ms / 100) 5.402 -> 5.397 ( -0.09%) [ +0.00% +0.06% +0.02% / +0.04% -0.09% +0.09%] index_select wrap : Elapsed 0.054 ms (5.402 ms / 100) 5.400 -> 5.401 ( +0.02%) [ +0.07% +0.00% +0.06% / +0.22% +0.07% +0.02%] index_select linear : Elapsed 0.054 ms (5.404 ms / 100) 5.398 -> 5.400 ( +0.04%) [ +0.02% +0.13% +0.00% / +0.07% +0.07% +0.04%] index_select reverse : Elapsed 0.054 ms (5.399 ms / 100) 5.383 -> 5.393 ( +0.19%) [ +0.13% +0.00% +0.11% / +0.19% +0.24% +0.20%] index_select skip64 : Elapsed 0.054 ms (5.390 ms / 100) 5.384 -> 5.386 ( +0.04%) [ +0.13% +0.30% +0.00% / +0.19% +0.04% +0.11%] index_select skip256 : Elapsed 0.054 ms (5.391 ms / 100) 5.400 -> 5.399 ( -0.02%) [ +0.00% +0.00% +0.13% / +0.06% -0.02% -0.02%] index_select spread : Elapsed 0.054 ms (5.400 ms / 100) 5.404 -> 5.404 ( +0.00%) [ +0.00% +0.06% +0.09% / +0.06% +0.00% +0.02%] index_select strided 3 : Elapsed 0.054 ms (5.404 ms / 100) 5.406 -> 5.395 ( -0.20%) [ +0.00% +0.07% +0.00% / -0.02% +0.04% -0.20%] index_select strided 5 : Elapsed 0.054 ms (5.406 ms / 100) 5.405 -> 5.405 ( +0.00%) [ +0.04% +0.07% +0.00% / +0.00% +0.02% +0.06%] index_select strided 7 : Elapsed 0.054 ms (5.407 ms / 100) 5.388 -> 5.386 ( -0.04%) [ +0.19% +0.06% +0.00% / +0.02% -0.04% -0.02%] index_select strided 8 : Elapsed 0.054 ms (5.398 ms / 100) 5.400 -> 5.401 ( +0.02%) [ +0.00% +0.22% +0.07% / +0.11% +0.02% +0.15%] index_select random : Elapsed 0.054 ms (5.400 ms / 100) 5.403 -> 5.399 ( -0.07%) [ +0.04% +0.15% +0.00% / -0.07% +0.02% +0.02%] index_select random_sorted : Elapsed 0.054 ms (5.405 ms / 100) B = [4, 40, 5, 20] (stride (4000, 1, 40, 200)) A = [4, 40, 5, 16] (stride (3200, 1, 40, 200)) dim = 3 4.045 -> 4.050 ( +0.12%) [ +0.00% +0.07% +0.10% / +0.12% +0.89% +0.84%] index_add_ linear : Elapsed 0.040 ms (4.045 ms / 100) 3.918 -> 3.917 ( -0.03%) [ +0.03% +0.00% +0.10% / -0.03% +0.69% +0.61%] index_copy_ linear : Elapsed 0.039 ms (3.919 ms / 100) 4.052 -> 4.052 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.74% +0.62%] index_add_ reverse : Elapsed 0.041 ms (4.054 ms / 100) 3.918 -> 3.921 ( +0.08%) [ +0.05% +0.00% +0.05% / +0.08% +0.64% +0.46%] index_copy_ reverse : Elapsed 0.039 ms (3.920 ms / 100) 4.051 -> 4.046 ( -0.12%) [ +0.00% +0.10% +0.00% / -0.12% +0.62% +0.74%] index_add_ spread : Elapsed 0.041 ms (4.051 ms / 100) 3.926 -> 3.927 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.69% +0.53%] index_copy_ spread : Elapsed 0.039 ms (3.926 ms / 100) 4.049 -> 4.049 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.82% +0.79%] index_add_ strided 3 : Elapsed 0.041 ms (4.051 ms / 100) 3.918 -> 3.920 ( +0.05%) [ +0.08% +0.00% +0.10% / +0.05% +0.82% +0.77%] index_copy_ strided 3 : Elapsed 0.039 ms (3.921 ms / 100) 4.051 -> 4.052 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.84% +0.74%] index_add_ strided 7 : Elapsed 0.041 ms (4.054 ms / 100) 3.917 -> 3.923 ( +0.15%) [ +0.08% +0.00% +0.00% / +0.15% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.039 ms (3.920 ms / 100) 4.049 -> 4.048 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.74% +0.57%] index_add_ perm : Elapsed 0.040 ms (4.049 ms / 100) 3.918 -> 3.916 ( -0.05%) [ +0.05% +0.10% +0.00% / -0.05% +0.61% +0.61%] index_copy_ perm : Elapsed 0.039 ms (3.920 ms / 100) 4.048 -> 4.049 ( +0.02%) [ +0.00% +0.10% +0.05% / +0.02% +0.72% +0.69%] index_add_ perm_sorted : Elapsed 0.040 ms (4.048 ms / 100) 3.917 -> 3.919 ( +0.05%) [ +0.05% +0.08% +0.00% / +0.05% +0.74% +0.69%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.919 ms / 100) 5.551 -> 5.550 ( -0.02%) [ +0.20% +0.09% +0.00% / -0.02% +0.14% -0.02%] index_select const : Elapsed 0.056 ms (5.562 ms / 100) 5.570 -> 5.573 ( +0.05%) [ +0.00% +0.18% +0.09% / +0.05% +0.14% +0.09%] index_select wrap : Elapsed 0.056 ms (5.570 ms / 100) 5.570 -> 5.560 ( -0.18%) [ +0.00% +0.11% +0.07% / -0.18% +0.02% +0.14%] index_select linear : Elapsed 0.056 ms (5.570 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.07% +0.07% +0.00% / +0.02% +0.16% +0.25%] index_select reverse : Elapsed 0.056 ms (5.569 ms / 100) 5.549 -> 5.561 ( +0.22%) [ +0.00% +0.20% +0.14% / +0.23% +0.22% +0.22%] index_select skip64 : Elapsed 0.055 ms (5.549 ms / 100) 5.559 -> 5.548 ( -0.20%) [ +0.02% +0.05% +0.00% / -0.20% +0.04% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.560 ms / 100) 5.574 -> 5.567 ( -0.13%) [ +0.00% +0.00% +0.04% / -0.13% +0.02% +0.11%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.568 -> 5.569 ( +0.02%) [ +0.00% +0.16% +0.07% / +0.02% +0.11% +0.20%] index_select strided 3 : Elapsed 0.056 ms (5.568 ms / 100) 5.573 -> 5.571 ( -0.04%) [ +0.00% +0.07% +0.13% / -0.04% +0.11% -0.02%] index_select strided 5 : Elapsed 0.056 ms (5.573 ms / 100) 5.567 -> 5.576 ( +0.16%) [ +0.00% +0.14% +0.00% / +0.23% +0.16% +0.29%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.554 -> 5.562 ( +0.14%) [ +0.00% +0.09% +0.09% / +0.14% +0.29% +0.16%] index_select strided 8 : Elapsed 0.056 ms (5.554 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.00% +0.14% +0.07% / +0.02% +0.16% +0.13%] index_select random : Elapsed 0.056 ms (5.565 ms / 100) 5.561 -> 5.570 ( +0.16%) [ +0.02% +0.11% +0.00% / +0.16% +0.20% +0.18%] index_select random_sorted : Elapsed 0.056 ms (5.562 ms / 100) B = [4, 40, 5, 20] (stride (1, 400, 80, 4)) A = [4, 40, 5, 16] (stride (3200, 1, 40, 200)) dim = 3 3.728 -> 3.731 ( +0.08%) [ +0.05% +0.08% +0.00% / +0.08% +0.80% +0.64%] index_add_ linear : Elapsed 0.037 ms (3.730 ms / 100) 3.592 -> 3.593 ( +0.03%) [ +0.22% +0.11% +0.00% / +0.03% +0.75% +0.67%] index_copy_ linear : Elapsed 0.036 ms (3.600 ms / 100) 3.734 -> 3.736 ( +0.05%) [ +0.11% +0.00% +0.05% / +0.05% +0.78% +0.78%] index_add_ reverse : Elapsed 0.037 ms (3.738 ms / 100) 3.595 -> 3.597 ( +0.06%) [ +0.11% +0.00% +0.06% / +0.06% +0.81% +0.78%] index_copy_ reverse : Elapsed 0.036 ms (3.599 ms / 100) 3.740 -> 3.740 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.70% +0.67%] index_add_ spread : Elapsed 0.037 ms (3.740 ms / 100) 3.593 -> 3.595 ( +0.06%) [ +0.08% +0.11% +0.00% / +0.06% +0.83% +0.83%] index_copy_ spread : Elapsed 0.036 ms (3.596 ms / 100) 3.731 -> 3.731 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.72%] index_add_ strided 3 : Elapsed 0.037 ms (3.731 ms / 100) 3.597 -> 3.601 ( +0.11%) [ +0.14% +0.11% +0.00% / +0.11% +0.81% +0.72%] index_copy_ strided 3 : Elapsed 0.036 ms (3.602 ms / 100) 3.735 -> 3.734 ( -0.03%) [ +0.05% +0.00% +0.03% / -0.03% +0.62% +0.62%] index_add_ strided 7 : Elapsed 0.037 ms (3.737 ms / 100) 3.602 -> 3.602 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.64% +0.42%] index_copy_ strided 7 : Elapsed 0.036 ms (3.603 ms / 100) 3.742 -> 3.745 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.53% +0.69%] index_add_ perm : Elapsed 0.037 ms (3.743 ms / 100) 3.597 -> 3.600 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.58% +0.58%] index_copy_ perm : Elapsed 0.036 ms (3.598 ms / 100) 3.736 -> 3.738 ( +0.05%) [ +0.03% +0.08% +0.00% / +0.05% +0.72% +0.56%] index_add_ perm_sorted : Elapsed 0.037 ms (3.737 ms / 100) 3.599 -> 3.600 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.67% +0.53%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.599 ms / 100) 5.469 -> 5.469 ( +0.00%) [ +0.00% +0.27% +0.09% / +0.00% +0.15% +0.15%] index_select const : Elapsed 0.055 ms (5.469 ms / 100) 5.484 -> 5.488 ( +0.07%) [ +0.22% +0.11% +0.00% / +0.09% +0.07% +0.20%] index_select wrap : Elapsed 0.055 ms (5.496 ms / 100) 5.487 -> 5.490 ( +0.05%) [ +0.00% +0.04% +0.09% / +0.05% +0.13% +0.05%] index_select linear : Elapsed 0.055 ms (5.487 ms / 100) 5.486 -> 5.489 ( +0.05%) [ +0.00% +0.09% +0.04% / +0.05% +0.07% +0.09%] index_select reverse : Elapsed 0.055 ms (5.486 ms / 100) 5.470 -> 5.473 ( +0.05%) [ +0.00% +0.04% +0.04% / +0.05% +0.18% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.470 ms / 100) 5.472 -> 5.468 ( -0.07%) [ +0.00% +0.09% +0.09% / -0.07% +0.11% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.472 ms / 100) 5.489 -> 5.484 ( -0.09%) [ +0.05% +0.00% +0.09% / +0.00% -0.09% +0.02%] index_select spread : Elapsed 0.055 ms (5.492 ms / 100) 5.489 -> 5.489 ( +0.00%) [ +0.00% +0.11% +0.04% / +0.15% +0.11% +0.00%] index_select strided 3 : Elapsed 0.055 ms (5.489 ms / 100) 5.490 -> 5.488 ( -0.04%) [ +0.11% +0.00% +0.13% / +0.00% +0.02% -0.04%] index_select strided 5 : Elapsed 0.055 ms (5.496 ms / 100) 5.487 -> 5.489 ( +0.04%) [ +0.09% +0.00% +0.05% / +0.13% +0.04% +0.07%] index_select strided 7 : Elapsed 0.055 ms (5.492 ms / 100) 5.471 -> 5.475 ( +0.07%) [ +0.07% +0.09% +0.00% / +0.07% +0.16% +0.07%] index_select strided 8 : Elapsed 0.055 ms (5.475 ms / 100) 5.483 -> 5.479 ( -0.07%) [ +0.13% +0.04% +0.00% / -0.07% +0.04% -0.07%] index_select random : Elapsed 0.055 ms (5.490 ms / 100) 5.481 -> 5.483 ( +0.04%) [ +0.09% +0.02% +0.00% / +0.05% +0.04% +0.16%] index_select random_sorted : Elapsed 0.055 ms (5.486 ms / 100) B = [4, 40, 5, 20] (stride (800, 1, 3200, 40)) A = [4, 40, 5, 16] (stride (16, 64, 2560, 1)) dim = 3 4.444 -> 4.446 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.72% +0.72%] index_add_ linear : Elapsed 0.044 ms (4.446 ms / 100) 4.285 -> 4.282 ( -0.07%) [ +0.02% +0.00% +0.00% / -0.07% +0.65% +0.70%] index_copy_ linear : Elapsed 0.043 ms (4.286 ms / 100) 4.445 -> 4.453 ( +0.18%) [ +0.20% +0.13% +0.00% / +0.18% +0.83% +0.72%] index_add_ reverse : Elapsed 0.045 ms (4.454 ms / 100) 4.279 -> 4.286 ( +0.16%) [ +0.19% +0.02% +0.00% / +0.16% +0.86% +0.79%] index_copy_ reverse : Elapsed 0.043 ms (4.287 ms / 100) 4.449 -> 4.449 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.90% +0.72%] index_add_ spread : Elapsed 0.044 ms (4.449 ms / 100) 4.280 -> 4.285 ( +0.12%) [ +0.12% +0.14% +0.00% / +0.12% +0.82% +0.77%] index_copy_ spread : Elapsed 0.043 ms (4.285 ms / 100) 4.448 -> 4.455 ( +0.16%) [ +0.20% +0.00% +0.04% / +0.16% +0.90% +0.81%] index_add_ strided 3 : Elapsed 0.045 ms (4.457 ms / 100) 4.273 -> 4.280 ( +0.16%) [ +0.16% +0.00% +0.19% / +0.16% +0.98% +0.91%] index_copy_ strided 3 : Elapsed 0.043 ms (4.280 ms / 100) 4.453 -> 4.450 ( -0.07%) [ +0.00% +0.04% +0.02% / -0.07% +0.67% +0.67%] index_add_ strided 7 : Elapsed 0.045 ms (4.453 ms / 100) 4.281 -> 4.287 ( +0.14%) [ +0.12% +0.14% +0.00% / +0.14% +0.82% +0.75%] index_copy_ strided 7 : Elapsed 0.043 ms (4.286 ms / 100) 4.442 -> 4.445 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.83% +0.79%] index_add_ perm : Elapsed 0.044 ms (4.445 ms / 100) 4.281 -> 4.286 ( +0.12%) [ +0.00% +0.12% +0.14% / +0.12% +0.82% +0.75%] index_copy_ perm : Elapsed 0.043 ms (4.281 ms / 100) 4.448 -> 4.449 ( +0.02%) [ +0.16% +0.00% +0.11% / +0.02% +0.83% +0.76%] index_add_ perm_sorted : Elapsed 0.045 ms (4.455 ms / 100) 4.277 -> 4.272 ( -0.12%) [ +0.09% +0.07% +0.00% / -0.12% +0.79% +0.82%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.281 ms / 100) 5.567 -> 5.568 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.13% +0.02% +0.04%] index_select const : Elapsed 0.056 ms (5.568 ms / 100) 5.580 -> 5.575 ( -0.09%) [ +0.02% +0.00% +0.00% / -0.05% -0.09% -0.04%] index_select wrap : Elapsed 0.056 ms (5.581 ms / 100) 5.572 -> 5.575 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.05% +0.18%] index_select linear : Elapsed 0.056 ms (5.577 ms / 100) 5.571 -> 5.574 ( +0.05%) [ +0.00% +0.07% +0.00% / +0.05% +0.14% +0.16%] index_select reverse : Elapsed 0.056 ms (5.571 ms / 100) 5.571 -> 5.569 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.11% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.573 ms / 100) 5.567 -> 5.567 ( +0.00%) [ +0.00% +0.09% +0.02% / +0.04% +0.18% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.567 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.00% +0.07% +0.11% / +0.00% +0.14% +0.13%] index_select spread : Elapsed 0.056 ms (5.569 ms / 100) 5.572 -> 5.572 ( +0.00%) [ +0.00% +0.13% +0.11% / +0.05% +0.00% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.572 ms / 100) 5.572 -> 5.575 ( +0.05%) [ +0.00% +0.07% +0.05% / +0.09% +0.05% +0.20%] index_select strided 5 : Elapsed 0.056 ms (5.572 ms / 100) 5.576 -> 5.573 ( -0.05%) [ +0.00% +0.04% +0.02% / -0.05% +0.18% +0.07%] index_select strided 7 : Elapsed 0.056 ms (5.576 ms / 100) 5.575 -> 5.566 ( -0.16%) [ +0.00% +0.04% +0.05% / -0.16% -0.02% +0.16%] index_select strided 8 : Elapsed 0.056 ms (5.575 ms / 100) 5.568 -> 5.575 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.31% +0.25%] index_select random : Elapsed 0.056 ms (5.575 ms / 100) 5.570 -> 5.570 ( +0.00%) [ +0.00% +0.13% +0.16% / +0.00% +0.23% +0.36%] index_select random_sorted : Elapsed 0.056 ms (5.570 ms / 100) B = [4, 40, 5, 20] (stride (1, 4, 3200, 160)) A = [4, 40, 5, 16] (stride (3200, 16, 640, 1)) dim = 3 4.277 -> 4.281 ( +0.09%) [ +0.14% +0.09% +0.00% / +0.09% +0.72% +0.70%] index_add_ linear : Elapsed 0.043 ms (4.283 ms / 100) 4.126 -> 4.126 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.63% +0.63%] index_copy_ linear : Elapsed 0.041 ms (4.126 ms / 100) 4.282 -> 4.283 ( +0.02%) [ +0.00% +0.09% +0.07% / +0.02% +0.72% +0.72%] index_add_ reverse : Elapsed 0.043 ms (4.282 ms / 100) 4.127 -> 4.129 ( +0.05%) [ +0.00% +0.15% +0.12% / +0.05% +0.58% +0.61%] index_copy_ reverse : Elapsed 0.041 ms (4.127 ms / 100) 4.266 -> 4.280 ( +0.33%) [ +0.40% +0.40% +0.00% / +0.33% +0.89% +1.13%] index_add_ spread : Elapsed 0.043 ms (4.283 ms / 100) 4.128 -> 4.125 ( -0.07%) [ +0.22% +0.05% +0.00% / -0.07% +0.73% +0.75%] index_copy_ spread : Elapsed 0.041 ms (4.137 ms / 100) 4.264 -> 4.265 ( +0.02%) [ +0.30% +0.00% +0.09% / +0.02% +0.87% +0.77%] index_add_ strided 3 : Elapsed 0.043 ms (4.277 ms / 100) 4.118 -> 4.119 ( +0.02%) [ +0.27% +0.00% +0.17% / +0.02% +0.90% +0.70%] index_copy_ strided 3 : Elapsed 0.041 ms (4.129 ms / 100) 4.276 -> 4.269 ( -0.16%) [ +0.00% +0.12% +0.00% / -0.16% +0.70% +0.44%] index_add_ strided 7 : Elapsed 0.043 ms (4.276 ms / 100) 4.129 -> 4.121 ( -0.19%) [ +0.02% +0.05% +0.00% / -0.19% +0.68% +0.48%] index_copy_ strided 7 : Elapsed 0.041 ms (4.130 ms / 100) 4.275 -> 4.287 ( +0.28%) [ +0.28% +0.26% +0.00% / +0.28% +0.77% +0.42%] index_add_ perm : Elapsed 0.043 ms (4.287 ms / 100) 4.134 -> 4.136 ( +0.05%) [ +0.10% +0.02% +0.00% / +0.05% +0.46% +0.27%] index_copy_ perm : Elapsed 0.041 ms (4.138 ms / 100) 4.272 -> 4.274 ( +0.05%) [ +0.30% +0.33% +0.00% / +0.05% +0.91% +0.98%] index_add_ perm_sorted : Elapsed 0.043 ms (4.285 ms / 100) 4.124 -> 4.119 ( -0.12%) [ +0.15% +0.10% +0.00% / -0.12% +0.68% +0.58%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.130 ms / 100) 5.563 -> 5.562 ( -0.02%) [ +0.00% +0.05% +0.11% / -0.02% +0.09% +0.11%] index_select const : Elapsed 0.056 ms (5.563 ms / 100) 5.574 -> 5.570 ( -0.07%) [ +0.07% +0.05% +0.00% / +0.04% +0.02% -0.07%] index_select wrap : Elapsed 0.056 ms (5.578 ms / 100) 5.572 -> 5.566 ( -0.11%) [ +0.13% +0.00% +0.07% / -0.11% +0.04% +0.11%] index_select linear : Elapsed 0.056 ms (5.579 ms / 100) 5.572 -> 5.572 ( +0.00%) [ +0.04% +0.00% +0.11% / +0.09% +0.02% +0.00%] index_select reverse : Elapsed 0.056 ms (5.574 ms / 100) 5.564 -> 5.562 ( -0.04%) [ +0.00% +0.04% +0.02% / +0.05% -0.04% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.564 ms / 100) 5.564 -> 5.563 ( -0.02%) [ +0.09% +0.05% +0.00% / -0.02% +0.02% +0.16%] index_select skip256 : Elapsed 0.056 ms (5.569 ms / 100) 5.573 -> 5.574 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.13% +0.02% +0.04%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.580 -> 5.572 ( -0.14%) [ +0.02% +0.00% +0.02% / +0.02% -0.14% -0.14%] index_select strided 3 : Elapsed 0.056 ms (5.581 ms / 100) 5.577 -> 5.573 ( -0.07%) [ +0.04% +0.00% +0.02% / -0.04% -0.07% -0.07%] index_select strided 5 : Elapsed 0.056 ms (5.579 ms / 100) 5.571 -> 5.570 ( -0.02%) [ +0.00% +0.02% +0.18% / +0.18% -0.02% +0.00%] index_select strided 7 : Elapsed 0.056 ms (5.571 ms / 100) 5.574 -> 5.576 ( +0.04%) [ +0.00% +0.02% +0.09% / +0.05% +0.04% +0.04%] index_select strided 8 : Elapsed 0.056 ms (5.574 ms / 100) 5.573 -> 5.570 ( -0.05%) [ +0.11% +0.00% +0.11% / -0.05% +0.09% +0.09%] index_select random : Elapsed 0.056 ms (5.579 ms / 100) 5.576 -> 5.569 ( -0.13%) [ +0.11% +0.14% +0.00% / -0.07% -0.13% -0.07%] index_select random_sorted : Elapsed 0.056 ms (5.582 ms / 100) B = [4, 40, 5, 20] (stride (200, 5, 1, 800)) A = [4, 40, 5, 16] (stride (1, 64, 2560, 4)) dim = 3 3.736 -> 3.741 ( +0.13%) [ +0.00% +0.32% +0.27% / +0.13% +0.96% +0.94%] index_add_ linear : Elapsed 0.037 ms (3.736 ms / 100) 3.558 -> 3.563 ( +0.14%) [ +0.00% +0.17% +0.08% / +0.14% +0.90% +0.93%] index_copy_ linear : Elapsed 0.036 ms (3.558 ms / 100) 3.732 -> 3.735 ( +0.08%) [ +0.19% +0.00% +0.19% / +0.08% +0.91% +0.72%] index_add_ reverse : Elapsed 0.037 ms (3.739 ms / 100) 3.561 -> 3.565 ( +0.11%) [ +0.11% +0.00% +0.03% / +0.11% +0.84% +0.79%] index_copy_ reverse : Elapsed 0.036 ms (3.565 ms / 100) 3.743 -> 3.743 ( +0.00%) [ +0.27% +0.00% +0.24% / +0.00% +1.07% +1.10%] index_add_ spread : Elapsed 0.038 ms (3.753 ms / 100) 3.575 -> 3.572 ( -0.08%) [ +0.11% +0.00% +0.06% / -0.08% +0.98% +0.90%] index_copy_ spread : Elapsed 0.036 ms (3.579 ms / 100) 3.750 -> 3.751 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.77% +0.75%] index_add_ strided 3 : Elapsed 0.038 ms (3.752 ms / 100) 3.572 -> 3.576 ( +0.11%) [ +0.08% +0.14% +0.00% / +0.11% +0.84% +0.90%] index_copy_ strided 3 : Elapsed 0.036 ms (3.575 ms / 100) 3.738 -> 3.739 ( +0.03%) [ +0.11% +0.00% +0.11% / +0.03% +0.75% +0.62%] index_add_ strided 7 : Elapsed 0.037 ms (3.742 ms / 100) 3.563 -> 3.563 ( +0.00%) [ +0.06% +0.00% +0.08% / +0.00% +0.76% +0.73%] index_copy_ strided 7 : Elapsed 0.036 ms (3.565 ms / 100) 3.742 -> 3.748 ( +0.16%) [ +0.13% +0.05% +0.00% / +0.16% +0.94% +0.80%] index_add_ perm : Elapsed 0.037 ms (3.747 ms / 100) 3.562 -> 3.564 ( +0.06%) [ +0.14% +0.06% +0.00% / +0.06% +0.81% +0.73%] index_copy_ perm : Elapsed 0.036 ms (3.567 ms / 100) 3.746 -> 3.753 ( +0.19%) [ +0.00% +0.11% +0.21% / +0.19% +0.96% +0.91%] index_add_ perm_sorted : Elapsed 0.037 ms (3.746 ms / 100) 3.570 -> 3.580 ( +0.28%) [ +0.00% +0.17% +0.31% / +0.28% +1.06% +0.84%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.570 ms / 100) 5.388 -> 5.381 ( -0.13%) [ +0.20% +0.00% +0.07% / +0.00% -0.13% +0.04%] index_select const : Elapsed 0.054 ms (5.399 ms / 100) 5.391 -> 5.393 ( +0.04%) [ +0.15% +0.24% +0.00% / +0.07% +0.13% +0.04%] index_select wrap : Elapsed 0.054 ms (5.399 ms / 100) 5.396 -> 5.401 ( +0.09%) [ +0.06% +0.09% +0.00% / +0.09% +0.15% +0.26%] index_select linear : Elapsed 0.054 ms (5.399 ms / 100) 5.397 -> 5.400 ( +0.06%) [ +0.07% +0.11% +0.00% / +0.06% +0.09% +0.15%] index_select reverse : Elapsed 0.054 ms (5.401 ms / 100) 5.385 -> 5.387 ( +0.04%) [ +0.02% +0.00% +0.06% / +0.04% +0.04% +0.04%] index_select skip64 : Elapsed 0.054 ms (5.386 ms / 100) 5.388 -> 5.384 ( -0.07%) [ +0.00% +0.09% +0.02% / -0.07% +0.07% +0.06%] index_select skip256 : Elapsed 0.054 ms (5.388 ms / 100) 5.390 -> 5.396 ( +0.11%) [ +0.24% +0.11% +0.00% / +0.17% +0.17% +0.11%] index_select spread : Elapsed 0.054 ms (5.403 ms / 100) 5.399 -> 5.396 ( -0.06%) [ +0.06% +0.00% +0.07% / -0.06% +0.00% +0.06%] index_select strided 3 : Elapsed 0.054 ms (5.402 ms / 100) 5.395 -> 5.394 ( -0.02%) [ +0.06% +0.00% +0.09% / +0.11% +0.15% -0.02%] index_select strided 5 : Elapsed 0.054 ms (5.398 ms / 100) 5.394 -> 5.397 ( +0.06%) [ +0.00% +0.04% +0.17% / +0.06% +0.09% +0.33%] index_select strided 7 : Elapsed 0.054 ms (5.394 ms / 100) 5.382 -> 5.385 ( +0.06%) [ +0.06% +0.00% +0.15% / +0.06% +0.11% +0.20%] index_select strided 8 : Elapsed 0.054 ms (5.385 ms / 100) 5.394 -> 5.395 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.09% +0.19%] index_select random : Elapsed 0.054 ms (5.395 ms / 100) 5.397 -> 5.395 ( -0.04%) [ +0.00% +0.02% +0.11% / -0.04% +0.24% +0.20%] index_select random_sorted : Elapsed 0.054 ms (5.397 ms / 100) B = [4, 40, 5, 20] (stride (5, 20, 1, 800)) A = [4, 40, 5, 16] (stride (3200, 1, 640, 40)) dim = 3 4.052 -> 4.054 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.79% +0.72%] index_add_ linear : Elapsed 0.041 ms (4.052 ms / 100) 3.916 -> 3.917 ( +0.03%) [ +0.00% +0.26% +0.05% / +0.03% +0.84% +0.64%] index_copy_ linear : Elapsed 0.039 ms (3.916 ms / 100) 4.054 -> 4.055 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.81% +0.76%] index_add_ reverse : Elapsed 0.041 ms (4.056 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.89% +0.82%] index_copy_ reverse : Elapsed 0.039 ms (3.925 ms / 100) 4.048 -> 4.052 ( +0.10%) [ +0.05% +0.12% +0.00% / +0.10% +0.74% +0.64%] index_add_ spread : Elapsed 0.041 ms (4.050 ms / 100) 3.916 -> 3.917 ( +0.03%) [ +0.08% +0.13% +0.00% / +0.03% +0.82% +0.79%] index_copy_ spread : Elapsed 0.039 ms (3.919 ms / 100) 4.053 -> 4.055 ( +0.05%) [ +0.00% +0.07% +0.05% / +0.05% +0.69% +0.72%] index_add_ strided 3 : Elapsed 0.041 ms (4.053 ms / 100) 3.917 -> 3.918 ( +0.03%) [ +0.13% +0.08% +0.00% / +0.03% +0.69% +0.71%] index_copy_ strided 3 : Elapsed 0.039 ms (3.922 ms / 100) 4.055 -> 4.056 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.67% +0.57%] index_add_ strided 7 : Elapsed 0.041 ms (4.055 ms / 100) 3.916 -> 3.918 ( +0.05%) [ +0.00% +0.08% +0.13% / +0.05% +0.66% +0.59%] index_copy_ strided 7 : Elapsed 0.039 ms (3.916 ms / 100) 4.051 -> 4.056 ( +0.12%) [ +0.00% +0.10% +0.07% / +0.12% +0.54% +0.57%] index_add_ perm : Elapsed 0.041 ms (4.051 ms / 100) 3.919 -> 3.924 ( +0.13%) [ +0.00% +0.13% +0.10% / +0.13% +0.66% +0.61%] index_copy_ perm : Elapsed 0.039 ms (3.919 ms / 100) 4.056 -> 4.056 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.74% +0.74%] index_add_ perm_sorted : Elapsed 0.041 ms (4.056 ms / 100) 3.924 -> 3.928 ( +0.10%) [ +0.00% +0.13% +0.13% / +0.10% +0.79% +0.79%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.924 ms / 100) 5.556 -> 5.554 ( -0.04%) [ +0.07% +0.09% +0.00% / -0.02% -0.04% +0.00%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.579 -> 5.567 ( -0.22%) [ +0.05% +0.00% +0.13% / +0.11% -0.22% -0.05%] index_select wrap : Elapsed 0.056 ms (5.582 ms / 100) 5.579 -> 5.580 ( +0.02%) [ +0.00% +0.09% +0.11% / +0.02% +0.09% +0.09%] index_select linear : Elapsed 0.056 ms (5.579 ms / 100) 5.577 -> 5.583 ( +0.11%) [ +0.13% +0.22% +0.00% / +0.11% +0.14% +0.13%] index_select reverse : Elapsed 0.056 ms (5.584 ms / 100) 5.551 -> 5.553 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +0.11% +0.38%] index_select skip64 : Elapsed 0.056 ms (5.553 ms / 100) 5.554 -> 5.554 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.04% +0.22%] index_select skip256 : Elapsed 0.056 ms (5.558 ms / 100) 5.587 -> 5.576 ( -0.20%) [ +0.02% +0.00% +0.05% / -0.05% -0.20% -0.05%] index_select spread : Elapsed 0.056 ms (5.588 ms / 100) 5.583 -> 5.576 ( -0.13%) [ +0.13% +0.00% +0.18% / +0.05% -0.07% -0.13%] index_select strided 3 : Elapsed 0.056 ms (5.590 ms / 100) 5.583 -> 5.578 ( -0.09%) [ +0.00% +0.07% +0.05% / -0.02% -0.05% -0.09%] index_select strided 5 : Elapsed 0.056 ms (5.583 ms / 100) 5.578 -> 5.579 ( +0.02%) [ +0.18% +0.09% +0.00% / +0.02% +0.14% +0.07%] index_select strided 7 : Elapsed 0.056 ms (5.588 ms / 100) 5.560 -> 5.563 ( +0.05%) [ +0.00% +0.04% +0.02% / +0.13% +0.07% +0.05%] index_select strided 8 : Elapsed 0.056 ms (5.560 ms / 100) 5.577 -> 5.575 ( -0.04%) [ +0.16% +0.05% +0.00% / -0.02% -0.04% +0.07%] index_select random : Elapsed 0.056 ms (5.586 ms / 100) 5.578 -> 5.576 ( -0.04%) [ +0.00% +0.13% +0.04% / -0.04% +0.02% -0.02%] index_select random_sorted : Elapsed 0.056 ms (5.578 ms / 100) B = [4, 40, 5, 20] (stride (1, 20, 4, 800)) A = [4, 40, 5, 16] (stride (640, 16, 2560, 1)) dim = 3 1.417 -> 1.420 ( +0.21%) [ +0.14% +0.00% +0.21% / +0.21% +0.85% +0.92%] index_add_ linear : Elapsed 0.014 ms (1.419 ms / 100) 1.368 -> 1.372 ( +0.29%) [ +0.15% +0.00% +0.29% / +0.29% +0.88% +1.02%] index_copy_ linear : Elapsed 0.014 ms (1.370 ms / 100) 1.413 -> 1.413 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.85% +0.78%] index_add_ reverse : Elapsed 0.014 ms (1.414 ms / 100) 1.366 -> 1.368 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.95% +0.95%] index_copy_ reverse : Elapsed 0.014 ms (1.366 ms / 100) 1.412 -> 1.414 ( +0.14%) [ +0.07% +0.00% +0.14% / +0.14% +0.92% +0.92%] index_add_ spread : Elapsed 0.014 ms (1.413 ms / 100) 1.364 -> 1.367 ( +0.22%) [ +0.15% +0.15% +0.00% / +0.22% +1.25% +1.32%] index_copy_ spread : Elapsed 0.014 ms (1.366 ms / 100) 1.417 -> 1.421 ( +0.28%) [ +0.28% +0.28% +0.00% / +0.28% +0.85% +0.99%] index_add_ strided 3 : Elapsed 0.014 ms (1.421 ms / 100) 1.369 -> 1.373 ( +0.29%) [ +0.29% +0.37% +0.00% / +0.29% +0.95% +1.02%] index_copy_ strided 3 : Elapsed 0.014 ms (1.373 ms / 100) 1.413 -> 1.414 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.85% +0.92%] index_add_ strided 7 : Elapsed 0.014 ms (1.414 ms / 100) 1.365 -> 1.368 ( +0.22%) [ +0.15% +0.22% +0.00% / +0.22% +1.03% +1.17%] index_copy_ strided 7 : Elapsed 0.014 ms (1.367 ms / 100) 1.418 -> 1.415 ( -0.21%) [ +0.14% +0.00% +0.21% / -0.21% +0.92% +0.78%] index_add_ perm : Elapsed 0.014 ms (1.420 ms / 100) 1.369 -> 1.368 ( -0.07%) [ +0.15% +0.00% +0.00% / -0.07% +1.10% +0.88%] index_copy_ perm : Elapsed 0.014 ms (1.371 ms / 100) 1.419 -> 1.417 ( -0.14%) [ +0.00% +0.07% +0.07% / -0.14% +1.06% +0.70%] index_add_ perm_sorted : Elapsed 0.014 ms (1.419 ms / 100) 1.369 -> 1.368 ( -0.07%) [ +0.00% +0.15% +0.22% / -0.07% +1.17% +0.88%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.369 ms / 100) 3.550 -> 3.545 ( -0.14%) [ +0.00% +0.03% +0.03% / +0.11% -0.08% -0.14%] index_select const : Elapsed 0.035 ms (3.550 ms / 100) 3.561 -> 3.553 ( -0.22%) [ +0.00% +0.17% +0.20% / +0.00% -0.22% -0.22%] index_select wrap : Elapsed 0.036 ms (3.561 ms / 100) 3.567 -> 3.555 ( -0.34%) [ +0.00% +0.06% +0.00% / -0.20% -0.34% -0.11%] index_select linear : Elapsed 0.036 ms (3.567 ms / 100) 3.560 -> 3.556 ( -0.11%) [ +0.00% +0.06% +0.14% / +0.17% -0.11% -0.03%] index_select reverse : Elapsed 0.036 ms (3.560 ms / 100) 3.550 -> 3.550 ( +0.00%) [ +0.14% +0.08% +0.00% / +0.08% +0.00% +0.00%] index_select skip64 : Elapsed 0.036 ms (3.555 ms / 100) 3.552 -> 3.545 ( -0.20%) [ +0.03% +0.00% +0.03% / -0.11% -0.20% -0.17%] index_select skip256 : Elapsed 0.036 ms (3.553 ms / 100) 3.565 -> 3.552 ( -0.36%) [ +0.31% +0.17% +0.00% / +0.22% -0.25% -0.36%] index_select spread : Elapsed 0.036 ms (3.576 ms / 100) 3.575 -> 3.550 ( -0.70%) [ +0.00% +0.06% +0.08% / +0.11% -0.70% -0.62%] index_select strided 3 : Elapsed 0.036 ms (3.575 ms / 100) 3.575 -> 3.553 ( -0.62%) [ +0.00% +0.14% +0.17% / +0.11% -0.56% -0.62%] index_select strided 5 : Elapsed 0.036 ms (3.575 ms / 100) 3.561 -> 3.554 ( -0.20%) [ +0.00% +0.03% +0.08% / +0.11% -0.20% -0.11%] index_select strided 7 : Elapsed 0.036 ms (3.561 ms / 100) 3.551 -> 3.556 ( +0.14%) [ +0.00% +0.37% +0.17% / +0.28% +0.31% +0.14%] index_select strided 8 : Elapsed 0.036 ms (3.551 ms / 100) 3.560 -> 3.561 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.39% +0.25%] index_select random : Elapsed 0.036 ms (3.561 ms / 100) 3.557 -> 3.560 ( +0.08%) [ +0.08% +0.06% +0.00% / +0.08% +0.20% +0.28%] index_select random_sorted : Elapsed 0.036 ms (3.560 ms / 100) out_shape = [20, 40, 16, 5] in_shape = [4, 40, 16, 5] idx_dim = 0 B = [20, 40, 16, 5] (stride (3200, 5, 200, 1)) A = [4, 40, 16, 5] (stride (3200, 1, 200, 40)) dim = 0 2.129 -> 2.132 ( +0.14%) [ +0.14% +0.19% +0.00% / +0.14% +0.19% +0.23%] index_add_ linear : Elapsed 0.021 ms (2.132 ms / 100) 2.066 -> 2.068 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.15% +0.15%] index_copy_ linear : Elapsed 0.021 ms (2.068 ms / 100) 2.132 -> 2.134 ( +0.09%) [ +0.14% +0.00% +0.23% / +0.09% +0.56% +0.38%] index_add_ reverse : Elapsed 0.021 ms (2.135 ms / 100) 2.066 -> 2.069 ( +0.15%) [ +0.10% +0.00% +0.00% / +0.15% +0.39% +0.34%] index_copy_ reverse : Elapsed 0.021 ms (2.068 ms / 100) 2.135 -> 2.137 ( +0.09%) [ +0.05% +0.00% +0.00% / +0.09% +0.28% +0.33%] index_add_ spread : Elapsed 0.021 ms (2.136 ms / 100) 2.068 -> 2.065 ( -0.15%) [ +0.00% +0.10% +0.15% / -0.15% +0.63% +0.53%] index_copy_ spread : Elapsed 0.021 ms (2.068 ms / 100) 2.131 -> 2.135 ( +0.19%) [ +0.00% +0.00% +0.00% / +0.19% +0.19% +0.19%] index_add_ strided 3 : Elapsed 0.021 ms (2.131 ms / 100) 2.064 -> 2.067 ( +0.15%) [ +0.05% +0.10% +0.00% / +0.15% +0.48% +0.34%] index_copy_ strided 3 : Elapsed 0.021 ms (2.065 ms / 100) 2.136 -> 2.137 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.23% +0.23%] index_add_ strided 7 : Elapsed 0.021 ms (2.138 ms / 100) 2.068 -> 2.069 ( +0.05%) [ +0.00% +0.10% +0.19% / +0.05% +0.29% +0.34%] index_copy_ strided 7 : Elapsed 0.021 ms (2.068 ms / 100) 2.122 -> 2.124 ( +0.09%) [ +0.19% +0.24% +0.00% / +0.09% +0.75% +0.75%] index_add_ perm : Elapsed 0.021 ms (2.126 ms / 100) 2.056 -> 2.058 ( +0.10%) [ +0.15% +0.29% +0.00% / +0.10% +0.73% +0.88%] index_copy_ perm : Elapsed 0.021 ms (2.059 ms / 100) 2.129 -> 2.130 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.05% +0.42% +0.61%] index_add_ perm_sorted : Elapsed 0.021 ms (2.129 ms / 100) 2.063 -> 2.063 ( +0.00%) [ +0.05% +0.19% +0.00% / +0.00% +0.63% +0.58%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.064 ms / 100) 8.712 -> 8.737 ( +0.29%) [ +0.00% +0.21% +0.20% / +0.29% +0.48% +0.41%] index_select const : Elapsed 0.087 ms (8.712 ms / 100) 8.757 -> 8.772 ( +0.17%) [ +0.00% +0.31% +0.32% / +0.24% +0.34% +0.17%] index_select wrap : Elapsed 0.088 ms (8.757 ms / 100) 8.746 -> 8.744 ( -0.02%) [ +0.00% +0.07% +0.16% / -0.02% +0.30% +0.45%] index_select linear : Elapsed 0.087 ms (8.746 ms / 100) 8.767 -> 8.775 ( +0.09%) [ +0.00% +0.14% +0.24% / +0.11% +0.09% +0.18%] index_select reverse : Elapsed 0.088 ms (8.767 ms / 100) 8.724 -> 8.734 ( +0.11%) [ +0.08% +0.18% +0.00% / +0.11% +0.26% +0.29%] index_select skip64 : Elapsed 0.087 ms (8.731 ms / 100) 8.723 -> 8.744 ( +0.24%) [ +0.00% +0.30% +0.08% / +0.24% +0.44% +0.40%] index_select skip256 : Elapsed 0.087 ms (8.723 ms / 100) 8.771 -> 8.769 ( -0.02%) [ +0.17% +0.00% +0.22% / +0.11% -0.02% +0.24%] index_select spread : Elapsed 0.088 ms (8.786 ms / 100) 8.772 -> 8.774 ( +0.02%) [ +0.00% +0.17% +0.11% / +0.11% +0.33% +0.02%] index_select strided 3 : Elapsed 0.088 ms (8.772 ms / 100) 8.792 -> 8.774 ( -0.20%) [ +0.00% +0.06% +0.00% / -0.06% -0.20% -0.09%] index_select random : Elapsed 0.088 ms (8.792 ms / 100) 8.777 -> 8.775 ( -0.02%) [ +0.00% +0.24% +0.07% / +0.07% +0.26% -0.02%] index_select random_sorted : Elapsed 0.088 ms (8.777 ms / 100) B = [20, 40, 16, 5] (stride (3200, 1, 40, 640)) A = [4, 40, 16, 5] (stride (80, 320, 5, 1)) dim = 0 1.996 -> 1.996 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.00% +1.00% +1.10%] index_add_ linear : Elapsed 0.020 ms (1.997 ms / 100) 1.955 -> 1.954 ( -0.05%) [ +0.00% +0.20% +0.10% / -0.05% +1.48% +1.89%] index_copy_ linear : Elapsed 0.020 ms (1.955 ms / 100) 1.994 -> 1.998 ( +0.20%) [ +0.00% +0.20% +0.05% / +0.20% +1.15% +1.00%] index_add_ reverse : Elapsed 0.020 ms (1.994 ms / 100) 1.955 -> 1.956 ( +0.05%) [ +0.15% +0.05% +0.00% / +0.05% +1.28% +1.33%] index_copy_ reverse : Elapsed 0.020 ms (1.958 ms / 100) 2.005 -> 2.001 ( -0.20%) [ +0.10% +0.00% +0.05% / -0.20% +0.60% +0.95%] index_add_ spread : Elapsed 0.020 ms (2.007 ms / 100) 1.964 -> 1.962 ( -0.10%) [ +0.10% +0.05% +0.00% / -0.10% +0.97% +0.92%] index_copy_ spread : Elapsed 0.020 ms (1.966 ms / 100) 1.997 -> 1.998 ( +0.05%) [ +0.10% +0.00% +0.20% / +0.05% +1.15% +1.00%] index_add_ strided 3 : Elapsed 0.020 ms (1.999 ms / 100) 1.961 -> 1.964 ( +0.15%) [ +0.00% +0.00% +0.25% / +0.15% +1.12% +1.27%] index_copy_ strided 3 : Elapsed 0.020 ms (1.961 ms / 100) 2.004 -> 2.005 ( +0.05%) [ +0.10% +0.00% +0.35% / +0.05% +1.05% +1.05%] index_add_ strided 7 : Elapsed 0.020 ms (2.006 ms / 100) 1.965 -> 1.972 ( +0.36%) [ +0.05% +0.25% +0.00% / +0.36% +1.12% +1.02%] index_copy_ strided 7 : Elapsed 0.020 ms (1.966 ms / 100) 1.994 -> 1.998 ( +0.20%) [ +0.10% +0.25% +0.00% / +0.20% +0.95% +0.90%] index_add_ perm : Elapsed 0.020 ms (1.996 ms / 100) 1.959 -> 1.961 ( +0.10%) [ +0.00% +0.26% +0.00% / +0.10% +1.17% +1.02%] index_copy_ perm : Elapsed 0.020 ms (1.959 ms / 100) 1.987 -> 1.991 ( +0.20%) [ +0.35% +0.00% +0.25% / +0.20% +1.41% +1.11%] index_add_ perm_sorted : Elapsed 0.020 ms (1.994 ms / 100) 1.957 -> 1.955 ( -0.10%) [ +0.10% +0.10% +0.00% / -0.10% +1.02% +1.02%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.959 ms / 100) 8.727 -> 8.732 ( +0.06%) [ +0.13% +0.00% +0.06% / +0.06% +0.39% +0.21%] index_select const : Elapsed 0.087 ms (8.738 ms / 100) 8.795 -> 8.780 ( -0.17%) [ +0.13% +0.00% +0.03% / -0.17% +0.51% +0.33%] index_select wrap : Elapsed 0.088 ms (8.806 ms / 100) 8.745 -> 8.762 ( +0.19%) [ +0.24% +0.24% +0.00% / +0.19% +0.25% +0.41%] index_select linear : Elapsed 0.088 ms (8.766 ms / 100) 8.764 -> 8.758 ( -0.07%) [ +0.10% +0.00% +0.03% / -0.07% +0.33% +0.37%] index_select reverse : Elapsed 0.088 ms (8.773 ms / 100) 8.724 -> 8.729 ( +0.06%) [ +0.00% +0.30% +0.09% / +0.06% +0.49% +0.32%] index_select skip64 : Elapsed 0.087 ms (8.724 ms / 100) 8.725 -> 8.725 ( +0.00%) [ +0.00% +0.00% +0.19% / +0.05% +0.24% +0.00%] index_select skip256 : Elapsed 0.087 ms (8.725 ms / 100) 8.774 -> 8.785 ( +0.13%) [ +0.08% +0.00% +0.07% / +0.13% +0.18% +0.21%] index_select spread : Elapsed 0.088 ms (8.781 ms / 100) 8.788 -> 8.795 ( +0.08%) [ +0.16% +0.00% +0.07% / +0.08% +0.40% +0.33%] index_select strided 3 : Elapsed 0.088 ms (8.802 ms / 100) 8.784 -> 8.805 ( +0.24%) [ +0.20% +0.00% +0.31% / +0.24% +0.36% +0.31%] index_select random : Elapsed 0.088 ms (8.802 ms / 100) 8.766 -> 8.771 ( +0.06%) [ +0.23% +0.00% +0.05% / +0.06% +0.22% +0.08%] index_select random_sorted : Elapsed 0.088 ms (8.786 ms / 100) B = [20, 40, 16, 5] (stride (1, 1600, 100, 20)) A = [4, 40, 16, 5] (stride (1, 320, 4, 64)) dim = 0 1.902 -> 1.899 ( -0.16%) [ +0.11% +0.00% +0.11% / -0.16% +0.42% +0.53%] index_add_ linear : Elapsed 0.019 ms (1.904 ms / 100) 1.861 -> 1.863 ( +0.11%) [ +0.16% +0.05% +0.00% / +0.11% +0.54% +0.43%] index_copy_ linear : Elapsed 0.019 ms (1.864 ms / 100) 1.902 -> 1.903 ( +0.05%) [ +0.00% +0.00% +0.16% / +0.05% +0.84% +0.79%] index_add_ reverse : Elapsed 0.019 ms (1.902 ms / 100) 1.859 -> 1.867 ( +0.43%) [ +0.43% +0.11% +0.00% / +0.43% +0.91% +1.02%] index_copy_ reverse : Elapsed 0.019 ms (1.867 ms / 100) 1.936 -> 1.936 ( +0.00%) [ +0.00% +0.52% +0.36% / +0.00% +0.77% +0.93%] index_add_ spread : Elapsed 0.019 ms (1.936 ms / 100) 1.957 -> 1.966 ( +0.46%) [ +0.00% +0.31% +0.36% / +0.46% +0.97% +1.02%] index_copy_ spread : Elapsed 0.020 ms (1.957 ms / 100) 1.926 -> 1.928 ( +0.10%) [ +0.36% +0.36% +0.00% / +0.10% +0.99% +1.04%] index_add_ strided 3 : Elapsed 0.019 ms (1.933 ms / 100) 1.925 -> 1.929 ( +0.21%) [ +0.00% +0.21% +0.16% / +0.21% +0.94% +1.04%] index_copy_ strided 3 : Elapsed 0.019 ms (1.925 ms / 100) 1.942 -> 1.943 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.05% +0.51% +0.72%] index_add_ strided 7 : Elapsed 0.019 ms (1.943 ms / 100) 1.960 -> 1.962 ( +0.10%) [ +0.15% +0.36% +0.00% / +0.10% +0.92% +0.66%] index_copy_ strided 7 : Elapsed 0.020 ms (1.963 ms / 100) 1.933 -> 1.930 ( -0.16%) [ +0.00% +0.10% +0.10% / -0.16% +0.36% +0.36%] index_add_ perm : Elapsed 0.019 ms (1.933 ms / 100) 1.925 -> 1.929 ( +0.21%) [ +0.00% +0.26% +0.16% / +0.21% +0.57% +0.52%] index_copy_ perm : Elapsed 0.019 ms (1.925 ms / 100) 1.932 -> 1.938 ( +0.31%) [ +0.26% +0.26% +0.00% / +0.31% +0.57% +0.62%] index_add_ perm_sorted : Elapsed 0.019 ms (1.937 ms / 100) 1.931 -> 1.930 ( -0.05%) [ +0.16% +0.16% +0.00% / -0.05% +0.88% +0.78%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.934 ms / 100) 8.406 -> 8.422 ( +0.19%) [ +0.24% +0.00% +0.23% / +0.19% +0.31% +0.30%] index_select const : Elapsed 0.084 ms (8.426 ms / 100) 8.413 -> 8.423 ( +0.12%) [ +0.05% +0.00% +0.17% / +0.17% +0.38% +0.12%] index_select wrap : Elapsed 0.084 ms (8.417 ms / 100) 8.412 -> 8.416 ( +0.05%) [ +0.00% +0.12% +0.07% / +0.05% +0.19% +0.15%] index_select linear : Elapsed 0.084 ms (8.412 ms / 100) 8.411 -> 8.416 ( +0.06%) [ +0.21% +0.00% +0.18% / +0.14% +0.13% +0.06%] index_select reverse : Elapsed 0.084 ms (8.429 ms / 100) 8.406 -> 8.407 ( +0.01%) [ +0.00% +0.18% +0.36% / +0.17% +0.24% +0.01%] index_select skip64 : Elapsed 0.084 ms (8.406 ms / 100) 8.404 -> 8.411 ( +0.08%) [ +0.00% +0.12% +0.18% / +0.19% +0.38% +0.08%] index_select skip256 : Elapsed 0.084 ms (8.404 ms / 100) 8.413 -> 8.403 ( -0.12%) [ +0.00% +0.11% +0.05% / +0.08% -0.12% +0.15%] index_select spread : Elapsed 0.084 ms (8.413 ms / 100) 8.418 -> 8.420 ( +0.02%) [ +0.00% +0.06% +0.08% / +0.02% +0.08% +0.10%] index_select strided 3 : Elapsed 0.084 ms (8.418 ms / 100) 8.408 -> 8.411 ( +0.04%) [ +0.08% +0.00% +0.20% / +0.13% +0.26% +0.04%] index_select random : Elapsed 0.084 ms (8.415 ms / 100) 8.420 -> 8.409 ( -0.13%) [ +0.05% +0.00% +0.00% / -0.13% -0.05% +0.14%] index_select random_sorted : Elapsed 0.084 ms (8.424 ms / 100) B = [20, 40, 16, 5] (stride (200, 1, 4000, 40)) A = [4, 40, 16, 5] (stride (640, 16, 1, 2560)) dim = 0 2.027 -> 2.031 ( +0.20%) [ +0.20% +0.20% +0.00% / +0.20% +0.74% +0.54%] index_add_ linear : Elapsed 0.020 ms (2.031 ms / 100) 1.993 -> 1.996 ( +0.15%) [ +0.05% +0.00% +0.10% / +0.15% +0.40% +0.45%] index_copy_ linear : Elapsed 0.020 ms (1.994 ms / 100) 2.025 -> 2.028 ( +0.15%) [ +0.25% +0.10% +0.00% / +0.15% +0.69% +0.59%] index_add_ reverse : Elapsed 0.020 ms (2.030 ms / 100) 1.987 -> 1.995 ( +0.40%) [ +0.45% +0.25% +0.00% / +0.45% +0.40% +0.65%] index_copy_ reverse : Elapsed 0.020 ms (1.996 ms / 100) 2.019 -> 2.027 ( +0.40%) [ +0.30% +0.30% +0.00% / +0.40% +0.84% +0.94%] index_add_ spread : Elapsed 0.020 ms (2.025 ms / 100) 1.984 -> 1.993 ( +0.45%) [ +0.20% +0.25% +0.00% / +0.45% +0.81% +0.66%] index_copy_ spread : Elapsed 0.020 ms (1.988 ms / 100) 2.018 -> 2.019 ( +0.05%) [ +0.05% +0.35% +0.00% / +0.05% +0.64% +0.79%] index_add_ strided 3 : Elapsed 0.020 ms (2.019 ms / 100) 1.981 -> 1.985 ( +0.20%) [ +0.00% +0.20% +0.20% / +0.20% +0.61% +0.76%] index_copy_ strided 3 : Elapsed 0.020 ms (1.981 ms / 100) 2.016 -> 2.020 ( +0.20%) [ +0.35% +0.55% +0.00% / +0.20% +0.60% +0.69%] index_add_ strided 7 : Elapsed 0.020 ms (2.023 ms / 100) 1.987 -> 1.984 ( -0.15%) [ +0.10% +0.00% +0.25% / -0.15% +0.65% +0.40%] index_copy_ strided 7 : Elapsed 0.020 ms (1.989 ms / 100) 2.038 -> 2.039 ( +0.05%) [ +0.15% +0.00% +0.00% / +0.05% +0.74% +0.25%] index_add_ perm : Elapsed 0.020 ms (2.041 ms / 100) 1.992 -> 1.994 ( +0.10%) [ +0.35% +0.00% +0.10% / +0.10% +1.05% +1.05%] index_copy_ perm : Elapsed 0.020 ms (1.999 ms / 100) 2.025 -> 2.022 ( -0.15%) [ +0.10% +0.25% +0.00% / -0.15% +0.74% +0.79%] index_add_ perm_sorted : Elapsed 0.020 ms (2.027 ms / 100) 1.985 -> 1.987 ( +0.10%) [ +0.25% +0.05% +0.00% / +0.10% +0.71% +0.91%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.990 ms / 100) 8.754 -> 8.764 ( +0.11%) [ +0.06% +0.06% +0.00% / +0.11% +0.18% +0.14%] index_select const : Elapsed 0.088 ms (8.759 ms / 100) 8.819 -> 8.827 ( +0.09%) [ +0.07% +0.00% +0.10% / +0.09% +0.49% +0.39%] index_select wrap : Elapsed 0.088 ms (8.825 ms / 100) 8.794 -> 8.790 ( -0.05%) [ +0.00% +0.32% +0.16% / -0.05% +0.52% +0.75%] index_select linear : Elapsed 0.088 ms (8.794 ms / 100) 8.791 -> 8.819 ( +0.32%) [ +0.00% +0.46% +0.11% / +0.32% +0.65% +0.41%] index_select reverse : Elapsed 0.088 ms (8.791 ms / 100) 8.733 -> 8.763 ( +0.34%) [ +0.00% +0.14% +0.44% / +0.34% +0.60% +0.70%] index_select skip64 : Elapsed 0.087 ms (8.733 ms / 100) 8.745 -> 8.769 ( +0.27%) [ +0.30% +0.02% +0.00% / +0.27% +0.42% +0.31%] index_select skip256 : Elapsed 0.088 ms (8.771 ms / 100) 8.813 -> 8.820 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.49% +0.33%] index_select spread : Elapsed 0.088 ms (8.813 ms / 100) 8.838 -> 8.820 ( -0.20%) [ +0.00% +0.02% +0.09% / -0.20% +0.44% +0.46%] index_select strided 3 : Elapsed 0.088 ms (8.838 ms / 100) 8.829 -> 8.837 ( +0.09%) [ +0.24% +0.00% +0.02% / +0.09% +0.61% +0.14%] index_select random : Elapsed 0.088 ms (8.850 ms / 100) 8.804 -> 8.814 ( +0.11%) [ +0.10% +0.00% +0.14% / +0.11% +0.36% +0.66%] index_select random_sorted : Elapsed 0.088 ms (8.813 ms / 100) out_shape = [4, 20, 16, 5] in_shape = [4, 40, 16, 5] idx_dim = 1 B = [4, 20, 16, 5] (stride (1600, 5, 100, 1)) A = [4, 40, 16, 5] (stride (1, 20, 800, 4)) dim = 1 2.443 -> 2.438 ( -0.20%) [ +0.16% +0.00% +0.16% / -0.20% +0.33% +0.33%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.456 -> 2.456 ( +0.00%) [ +0.16% +0.33% +0.00% / +0.16% +0.16% +0.00%] index_select wrap : Elapsed 0.025 ms (2.460 ms / 100) 2.459 -> 2.458 ( -0.04%) [ +0.00% +0.16% +0.04% / +0.08% -0.04% +0.04%] index_select linear : Elapsed 0.025 ms (2.459 ms / 100) 2.458 -> 2.457 ( -0.04%) [ +0.16% +0.08% +0.00% / +0.00% -0.04% +0.08%] index_select reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.445 -> 2.449 ( +0.16%) [ +0.00% +0.12% +0.00% / +0.16% +0.16% +0.29%] index_select skip64 : Elapsed 0.024 ms (2.445 ms / 100) 2.445 -> 2.444 ( -0.04%) [ +0.33% +0.12% +0.00% / +0.12% +0.20% -0.04%] index_select skip256 : Elapsed 0.025 ms (2.453 ms / 100) 2.460 -> 2.456 ( -0.16%) [ +0.00% +0.00% +0.04% / -0.04% +0.00% -0.16%] index_select spread : Elapsed 0.025 ms (2.460 ms / 100) 2.461 -> 2.459 ( -0.08%) [ +0.16% +0.00% +0.12% / -0.04% -0.08% +0.00%] index_select strided 3 : Elapsed 0.025 ms (2.465 ms / 100) 2.449 -> 2.452 ( +0.12%) [ +0.04% +0.00% +0.24% / +0.20% +0.16% +0.12%] index_select strided 5 : Elapsed 0.025 ms (2.450 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.16% +0.00% +0.00% / +0.04% +0.16% +0.24%] index_select strided 7 : Elapsed 0.025 ms (2.462 ms / 100) 2.446 -> 2.449 ( +0.12%) [ +0.00% +0.08% +0.16% / +0.12% +0.29% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.446 ms / 100) 2.449 -> 2.448 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.08% +0.16%] index_select strided 16 : Elapsed 0.024 ms (2.449 ms / 100) 2.457 -> 2.457 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.04% +0.00% +0.12%] index_select random : Elapsed 0.025 ms (2.457 ms / 100) 2.457 -> 2.453 ( -0.16%) [ +0.08% +0.20% +0.00% / +0.20% +0.04% -0.16%] index_select random_sorted : Elapsed 0.025 ms (2.459 ms / 100) 2.458 -> 2.454 ( -0.16%) [ +0.08% +0.00% +0.04% / -0.16% +0.00% +0.00%] index_select perm : Elapsed 0.025 ms (2.460 ms / 100) 2.464 -> 2.452 ( -0.49%) [ +0.00% +0.04% +0.04% / -0.12% -0.45% -0.49%] index_select perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) B = [4, 20, 16, 5] (stride (5, 320, 20, 1)) A = [4, 40, 16, 5] (stride (640, 1, 40, 2560)) dim = 1 2.448 -> 2.448 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.37%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.456 -> 2.450 ( -0.24%) [ +0.12% +0.08% +0.00% / +0.16% -0.12% -0.24%] index_select wrap : Elapsed 0.025 ms (2.459 ms / 100) 2.456 -> 2.455 ( -0.04%) [ +0.12% +0.08% +0.00% / +0.16% -0.04% -0.04%] index_select linear : Elapsed 0.025 ms (2.459 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.04% +0.00% +0.16%] index_select reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.20% +0.04% +0.00% / +0.16% +0.29% +0.33%] index_select skip64 : Elapsed 0.025 ms (2.451 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.00% +0.00% +0.16% / +0.04% +0.16% +0.20%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.458 -> 2.458 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.28% +0.45%] index_select spread : Elapsed 0.025 ms (2.460 ms / 100) 2.458 -> 2.461 ( +0.12%) [ +0.08% +0.20% +0.00% / +0.12% +0.28% +0.45%] index_select strided 3 : Elapsed 0.025 ms (2.460 ms / 100) 2.458 -> 2.463 ( +0.20%) [ +0.16% +0.20% +0.00% / +0.20% +0.28% +0.49%] index_select strided 5 : Elapsed 0.025 ms (2.462 ms / 100) 2.460 -> 2.469 ( +0.37%) [ +0.20% +0.00% +0.00% / +0.41% +0.37% +0.37%] index_select strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.12% +0.00% +0.08% / +0.12% +0.12% +0.37%] index_select strided 8 : Elapsed 0.025 ms (2.464 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.20% +0.00% +0.04% / +0.00% +0.28% +0.37%] index_select strided 16 : Elapsed 0.025 ms (2.465 ms / 100) 2.457 -> 2.460 ( +0.12%) [ +0.00% +0.12% +0.08% / +0.12% +0.20% +0.24%] index_select random : Elapsed 0.025 ms (2.457 ms / 100) 2.458 -> 2.458 ( +0.00%) [ +0.12% +0.00% +0.24% / +0.16% +0.20% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.12% +0.00% +0.16% / +0.16% +0.24% +0.12%] index_select perm : Elapsed 0.025 ms (2.464 ms / 100) 2.459 -> 2.466 ( +0.28%) [ +0.16% +0.00% +0.08% / +0.33% +0.28% +0.28%] index_select perm_sorted : Elapsed 0.025 ms (2.463 ms / 100) B = [4, 20, 16, 5] (stride (1, 320, 20, 4)) A = [4, 40, 16, 5] (stride (1, 64, 4, 2560)) dim = 1 2.394 -> 2.391 ( -0.13%) [ +0.00% +0.04% +0.04% / -0.13% +0.21% +0.17%] index_select const : Elapsed 0.024 ms (2.394 ms / 100) 2.406 -> 2.405 ( -0.04%) [ +0.04% +0.00% +0.17% / +0.17% +0.17% -0.04%] index_select wrap : Elapsed 0.024 ms (2.407 ms / 100) 2.405 -> 2.407 ( +0.08%) [ +0.17% +0.04% +0.00% / +0.17% +0.08% +0.17%] index_select linear : Elapsed 0.024 ms (2.409 ms / 100) 2.408 -> 2.402 ( -0.25%) [ +0.04% +0.00% +0.21% / -0.04% -0.21% -0.25%] index_select reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.397 -> 2.396 ( -0.04%) [ +0.00% +0.04% +0.00% / +0.04% -0.04% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.397 ms / 100) 2.393 -> 2.394 ( +0.04%) [ +0.17% +0.00% +0.21% / +0.04% +0.38% +0.38%] index_select skip256 : Elapsed 0.024 ms (2.397 ms / 100) 2.405 -> 2.406 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.21% +0.08%] index_select spread : Elapsed 0.024 ms (2.405 ms / 100) 2.409 -> 2.406 ( -0.12%) [ +0.08% +0.00% +0.00% / -0.08% +0.08% -0.12%] index_select strided 3 : Elapsed 0.024 ms (2.411 ms / 100) 2.399 -> 2.398 ( -0.04%) [ +0.29% +0.08% +0.00% / +0.21% -0.04% +0.21%] index_select strided 5 : Elapsed 0.024 ms (2.406 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.00% +0.21% +0.29% / +0.17% +0.17% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.403 ms / 100) 2.391 -> 2.396 ( +0.21%) [ +0.00% +0.33% +0.13% / +0.21% +0.42% +0.42%] index_select strided 8 : Elapsed 0.024 ms (2.391 ms / 100) 2.398 -> 2.398 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.00% +0.38%] index_select strided 16 : Elapsed 0.024 ms (2.399 ms / 100) 2.401 -> 2.401 ( +0.00%) [ +0.04% +0.21% +0.00% / +0.17% +0.00% +0.29%] index_select random : Elapsed 0.024 ms (2.402 ms / 100) 2.403 -> 2.401 ( -0.08%) [ +0.00% +0.17% +0.12% / +0.04% -0.08% +0.00%] index_select random_sorted : Elapsed 0.024 ms (2.403 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.17% +0.04% +0.00% / +0.21% +0.00% -0.08%] index_select perm : Elapsed 0.024 ms (2.409 ms / 100) 2.408 -> 2.396 ( -0.50%) [ +0.17% +0.00% +0.21% / +0.12% -0.50% -0.50%] index_select perm_sorted : Elapsed 0.024 ms (2.412 ms / 100) B = [4, 20, 16, 5] (stride (100, 5, 400, 1)) A = [4, 40, 16, 5] (stride (16, 64, 1, 2560)) dim = 1 2.446 -> 2.446 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.16% +0.00% +0.29%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.463 -> 2.456 ( -0.28%) [ +0.16% +0.12% +0.00% / +0.16% -0.28% -0.24%] index_select wrap : Elapsed 0.025 ms (2.467 ms / 100) 2.467 -> 2.455 ( -0.49%) [ +0.00% +0.08% +0.12% / -0.08% -0.45% -0.49%] index_select linear : Elapsed 0.025 ms (2.467 ms / 100) 2.459 -> 2.458 ( -0.04%) [ +0.16% +0.04% +0.00% / +0.04% +0.04% -0.04%] index_select reverse : Elapsed 0.025 ms (2.463 ms / 100) 2.447 -> 2.447 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.12% +0.04% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.447 ms / 100) 2.447 -> 2.446 ( -0.04%) [ +0.00% +0.12% +0.16% / -0.04% +0.16% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.447 ms / 100) 2.460 -> 2.462 ( +0.08%) [ +0.08% +0.20% +0.00% / +0.08% +0.24% +0.24%] index_select spread : Elapsed 0.025 ms (2.462 ms / 100) 2.459 -> 2.460 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.08% +0.16%] index_select strided 3 : Elapsed 0.025 ms (2.460 ms / 100) 2.451 -> 2.450 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.08% +0.29%] index_select strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.457 -> 2.460 ( +0.12%) [ +0.20% +0.24% +0.00% / +0.12% +0.28% +0.20%] index_select strided 7 : Elapsed 0.025 ms (2.462 ms / 100) 2.452 -> 2.452 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.04% +0.00% +0.04%] index_select strided 8 : Elapsed 0.025 ms (2.452 ms / 100) 2.449 -> 2.449 ( +0.00%) [ +0.00% +0.24% +0.08% / +0.00% +0.20% +0.29%] index_select strided 16 : Elapsed 0.024 ms (2.449 ms / 100) 2.453 -> 2.457 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +0.29% +0.16%] index_select random : Elapsed 0.025 ms (2.453 ms / 100) 2.459 -> 2.458 ( -0.04%) [ +0.12% +0.00% +0.00% / +0.04% -0.04% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.462 ms / 100) 2.462 -> 2.459 ( -0.12%) [ +0.04% +0.00% +0.00% / +0.20% -0.12% +0.04%] index_select perm : Elapsed 0.025 ms (2.463 ms / 100) 2.461 -> 2.461 ( +0.00%) [ +0.00% +0.04% +0.12% / +0.00% +0.16% +0.28%] index_select perm_sorted : Elapsed 0.025 ms (2.461 ms / 100) B = [4, 20, 16, 5] (stride (1, 4, 400, 80)) A = [4, 40, 16, 5] (stride (200, 5, 800, 1)) dim = 1 2.396 -> 2.395 ( -0.04%) [ +0.08% +0.13% +0.00% / -0.04% +0.42% +0.17%] index_select const : Elapsed 0.024 ms (2.398 ms / 100) 2.416 -> 2.413 ( -0.12%) [ +0.00% +0.00% +0.00% / +0.08% -0.12% -0.08%] index_select wrap : Elapsed 0.024 ms (2.416 ms / 100) 2.415 -> 2.413 ( -0.08%) [ +0.21% +0.00% +0.54% / -0.04% +0.04% -0.08%] index_select linear : Elapsed 0.024 ms (2.420 ms / 100) 2.413 -> 2.413 ( +0.00%) [ +0.12% +0.21% +0.00% / +0.29% +0.00% +0.25%] index_select reverse : Elapsed 0.024 ms (2.416 ms / 100) 2.399 -> 2.398 ( -0.04%) [ +0.25% +0.17% +0.00% / +0.04% +0.08% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.405 ms / 100) 2.398 -> 2.399 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.04% +0.21% +0.21%] index_select skip256 : Elapsed 0.024 ms (2.398 ms / 100) 2.429 -> 2.426 ( -0.12%) [ +0.00% +0.08% +0.12% / -0.12% +0.04% -0.04%] index_select spread : Elapsed 0.024 ms (2.429 ms / 100) 2.426 -> 2.425 ( -0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.08% -0.04%] index_select strided 3 : Elapsed 0.024 ms (2.428 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.00% +0.08% +0.33% / +0.21% +0.17% +0.08%] index_select strided 5 : Elapsed 0.024 ms (2.412 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.25% +0.08%] index_select strided 7 : Elapsed 0.024 ms (2.423 ms / 100) 2.403 -> 2.407 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.17% +0.21%] index_select strided 8 : Elapsed 0.024 ms (2.405 ms / 100) 2.408 -> 2.405 ( -0.12%) [ +0.04% +0.00% +0.08% / -0.08% +0.21% -0.12%] index_select strided 16 : Elapsed 0.024 ms (2.409 ms / 100) 2.421 -> 2.418 ( -0.12%) [ +0.00% +0.00% +0.12% / -0.12% +0.12% -0.12%] index_select random : Elapsed 0.024 ms (2.421 ms / 100) 2.423 -> 2.419 ( -0.17%) [ +0.00% +0.17% +0.08% / -0.08% -0.08% -0.17%] index_select random_sorted : Elapsed 0.024 ms (2.423 ms / 100) 2.421 -> 2.422 ( +0.04%) [ +0.04% +0.25% +0.00% / +0.29% +0.12% +0.04%] index_select perm : Elapsed 0.024 ms (2.422 ms / 100) 2.426 -> 2.414 ( -0.49%) [ +0.04% +0.00% +0.12% / +0.04% -0.49% -0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.427 ms / 100) B = [4, 20, 16, 5] (stride (1, 4, 400, 80)) A = [4, 40, 16, 5] (stride (40, 1, 800, 160)) dim = 1 1.473 -> 1.474 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.41% +0.41%] index_select const : Elapsed 0.015 ms (1.473 ms / 100) 1.486 -> 1.487 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.40% +0.13%] index_select wrap : Elapsed 0.015 ms (1.486 ms / 100) 1.486 -> 1.483 ( -0.20%) [ +0.13% +0.00% +0.13% / -0.20% +0.47% +0.47%] index_select linear : Elapsed 0.015 ms (1.488 ms / 100) 1.484 -> 1.486 ( +0.13%) [ +0.20% +0.07% +0.00% / +0.13% +0.74% +0.67%] index_select reverse : Elapsed 0.015 ms (1.487 ms / 100) 1.474 -> 1.471 ( -0.20%) [ +0.07% +0.00% +0.14% / -0.20% +0.34% +0.27%] index_select skip64 : Elapsed 0.015 ms (1.475 ms / 100) 1.472 -> 1.475 ( +0.20%) [ +0.00% +0.00% +0.20% / +0.20% +0.48% +0.48%] index_select skip256 : Elapsed 0.015 ms (1.472 ms / 100) 1.490 -> 1.497 ( +0.47%) [ +0.00% +0.47% +0.40% / +0.47% +0.74% +0.87%] index_select spread : Elapsed 0.015 ms (1.490 ms / 100) 1.494 -> 1.494 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.80% +0.80%] index_select strided 3 : Elapsed 0.015 ms (1.495 ms / 100) 1.494 -> 1.500 ( +0.40%) [ +0.27% +0.00% +0.07% / +0.40% +0.94% +0.94%] index_select strided 5 : Elapsed 0.015 ms (1.498 ms / 100) 1.492 -> 1.494 ( +0.13%) [ +0.00% +0.13% +0.07% / +0.13% +1.07% +0.67%] index_select strided 7 : Elapsed 0.015 ms (1.492 ms / 100) 1.492 -> 1.498 ( +0.40%) [ +0.27% +0.00% +0.07% / +0.40% +1.07% +0.60%] index_select strided 8 : Elapsed 0.015 ms (1.496 ms / 100) 1.492 -> 1.499 ( +0.47%) [ +0.00% +0.07% +0.27% / +0.47% +1.01% +0.80%] index_select strided 16 : Elapsed 0.015 ms (1.492 ms / 100) 1.494 -> 1.498 ( +0.27%) [ +0.40% +0.00% +0.27% / +0.27% +1.20% +1.00%] index_select random : Elapsed 0.015 ms (1.500 ms / 100) 1.492 -> 1.494 ( +0.13%) [ +0.00% +0.07% +0.13% / +0.13% +0.94% +0.60%] index_select random_sorted : Elapsed 0.015 ms (1.492 ms / 100) 1.492 -> 1.495 ( +0.20%) [ +0.20% +0.00% +0.27% / +0.20% +0.94% +0.60%] index_select perm : Elapsed 0.015 ms (1.495 ms / 100) 1.498 -> 1.505 ( +0.47%) [ +0.00% +0.13% +0.20% / +0.73% +0.47% +0.47%] index_select perm_sorted : Elapsed 0.015 ms (1.498 ms / 100) B = [4, 20, 16, 5] (stride (320, 1, 20, 1280)) A = [4, 40, 16, 5] (stride (3200, 80, 5, 1)) dim = 1 2.404 -> 2.405 ( +0.04%) [ +0.00% +0.21% +0.00% / +0.04% +0.21% +0.29%] index_select const : Elapsed 0.024 ms (2.404 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.17% +0.21% +0.00% / +0.08% +0.04% +0.21%] index_select wrap : Elapsed 0.024 ms (2.418 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.00% +0.17% +0.00% / +0.08% +0.21% +0.29%] index_select linear : Elapsed 0.024 ms (2.414 ms / 100) 2.411 -> 2.412 ( +0.04%) [ +0.33% +0.00% +0.17% / +0.12% +0.21% +0.04%] index_select reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.405 -> 2.405 ( +0.00%) [ +0.29% +0.00% +0.17% / +0.00% +0.17% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.412 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.00% +0.00% +0.25% / +0.17% +0.17% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.405 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.21% +0.21%] index_select spread : Elapsed 0.024 ms (2.414 ms / 100) 2.415 -> 2.412 ( -0.12%) [ +0.04% +0.29% +0.00% / +0.12% -0.12% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.416 ms / 100) 2.409 -> 2.411 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.12% +0.12%] index_select strided 5 : Elapsed 0.024 ms (2.411 ms / 100) 2.411 -> 2.416 ( +0.21%) [ +0.00% +0.00% +0.00% / +0.21% +0.37% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.411 ms / 100) 2.407 -> 2.403 ( -0.17%) [ +0.08% +0.04% +0.00% / -0.17% +0.25% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.409 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.17% +0.21% +0.00% / +0.17% +0.37% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.409 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.25% +0.21%] index_select random : Elapsed 0.024 ms (2.413 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.00% +0.04% +0.08% / +0.21% -0.04% +0.00%] index_select random_sorted : Elapsed 0.024 ms (2.411 ms / 100) 2.414 -> 2.413 ( -0.04%) [ +0.08% +0.12% +0.00% / -0.04% +0.08% +0.08%] index_select perm : Elapsed 0.024 ms (2.416 ms / 100) 2.415 -> 2.410 ( -0.21%) [ +0.04% +0.12% +0.00% / +0.17% -0.21% -0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.416 ms / 100) out_shape = [4, 40, 20, 5] in_shape = [4, 40, 16, 5] idx_dim = 2 B = [4, 40, 20, 5] (stride (4000, 1, 200, 40)) A = [4, 40, 16, 5] (stride (200, 5, 800, 1)) dim = 2 3.632 -> 3.631 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.85% +0.91%] index_add_ linear : Elapsed 0.036 ms (3.633 ms / 100) 3.501 -> 3.501 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.89% +0.86%] index_copy_ linear : Elapsed 0.035 ms (3.503 ms / 100) 3.637 -> 3.636 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.71% +0.77%] index_add_ reverse : Elapsed 0.036 ms (3.637 ms / 100) 3.510 -> 3.509 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.68% +0.68%] index_copy_ reverse : Elapsed 0.035 ms (3.510 ms / 100) 3.623 -> 3.623 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.75%] index_add_ spread : Elapsed 0.036 ms (3.623 ms / 100) 3.495 -> 3.497 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +1.06% +0.97%] index_copy_ spread : Elapsed 0.035 ms (3.496 ms / 100) 3.624 -> 3.624 ( +0.00%) [ +0.08% +0.00% +0.03% / +0.00% +0.77% +0.72%] index_add_ strided 3 : Elapsed 0.036 ms (3.627 ms / 100) 3.502 -> 3.502 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.80% +0.86%] index_copy_ strided 3 : Elapsed 0.035 ms (3.503 ms / 100) 3.625 -> 3.628 ( +0.08%) [ +0.00% +0.03% +0.08% / +0.08% +0.74% +0.61%] index_add_ strided 7 : Elapsed 0.036 ms (3.625 ms / 100) 3.501 -> 3.505 ( +0.11%) [ +0.00% +0.00% +0.09% / +0.11% +0.91% +0.69%] index_copy_ strided 7 : Elapsed 0.035 ms (3.501 ms / 100) 3.624 -> 3.624 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.66% +0.63%] index_add_ perm : Elapsed 0.036 ms (3.625 ms / 100) 3.495 -> 3.497 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.06% +0.72% +0.86%] index_copy_ perm : Elapsed 0.035 ms (3.498 ms / 100) 3.636 -> 3.639 ( +0.08%) [ +0.00% +0.11% +0.06% / +0.08% +0.77% +0.77%] index_add_ perm_sorted : Elapsed 0.036 ms (3.636 ms / 100) 3.508 -> 3.512 ( +0.11%) [ +0.00% +0.11% +0.09% / +0.11% +0.68% +0.66%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.508 ms / 100) 5.479 -> 5.481 ( +0.04%) [ +0.07% +0.22% +0.00% / +0.04% +0.16% +0.13%] index_select const : Elapsed 0.055 ms (5.483 ms / 100) 5.491 -> 5.493 ( +0.04%) [ +0.05% +0.02% +0.00% / +0.04% +0.05% +0.05%] index_select wrap : Elapsed 0.055 ms (5.494 ms / 100) 5.494 -> 5.488 ( -0.11%) [ +0.00% +0.05% +0.09% / -0.11% +0.09% +0.15%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.491 -> 5.490 ( -0.02%) [ +0.09% +0.07% +0.00% / -0.02% +0.05% +0.15%] index_select reverse : Elapsed 0.055 ms (5.496 ms / 100) 5.473 -> 5.477 ( +0.07%) [ +0.24% +0.00% +0.22% / +0.07% +0.20% +0.31%] index_select skip64 : Elapsed 0.055 ms (5.486 ms / 100) 5.479 -> 5.481 ( +0.04%) [ +0.07% +0.00% +0.11% / +0.04% +0.15% +0.18%] index_select skip256 : Elapsed 0.055 ms (5.483 ms / 100) 5.488 -> 5.487 ( -0.02%) [ +0.15% +0.22% +0.00% / +0.13% -0.02% +0.07%] index_select spread : Elapsed 0.055 ms (5.496 ms / 100) 5.495 -> 5.491 ( -0.07%) [ +0.02% +0.05% +0.00% / +0.04% +0.02% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.496 ms / 100) 5.493 -> 5.489 ( -0.07%) [ +0.00% +0.09% +0.04% / -0.05% +0.09% -0.07%] index_select strided 5 : Elapsed 0.055 ms (5.493 ms / 100) 5.493 -> 5.487 ( -0.11%) [ +0.00% +0.09% +0.02% / -0.11% -0.05% +0.04%] index_select strided 7 : Elapsed 0.055 ms (5.493 ms / 100) 5.478 -> 5.483 ( +0.09%) [ +0.00% +0.09% +0.05% / +0.09% +0.11% +0.15%] index_select strided 8 : Elapsed 0.055 ms (5.478 ms / 100) 5.488 -> 5.488 ( +0.00%) [ +0.00% +0.16% +0.07% / +0.15% +0.04% +0.00%] index_select random : Elapsed 0.055 ms (5.488 ms / 100) 5.485 -> 5.486 ( +0.02%) [ +0.00% +0.09% +0.11% / +0.20% +0.02% +0.16%] index_select random_sorted : Elapsed 0.055 ms (5.485 ms / 100) B = [4, 40, 20, 5] (stride (5, 400, 20, 1)) A = [4, 40, 16, 5] (stride (40, 1, 160, 2560)) dim = 2 3.722 -> 3.722 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.81% +0.78%] index_add_ linear : Elapsed 0.037 ms (3.723 ms / 100) 3.590 -> 3.590 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.72%] index_copy_ linear : Elapsed 0.036 ms (3.590 ms / 100) 3.734 -> 3.734 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.80% +0.80%] index_add_ reverse : Elapsed 0.037 ms (3.736 ms / 100) 3.601 -> 3.601 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.81% +0.78%] index_copy_ reverse : Elapsed 0.036 ms (3.601 ms / 100) 3.720 -> 3.721 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.67% +0.67%] index_add_ spread : Elapsed 0.037 ms (3.720 ms / 100) 3.585 -> 3.584 ( -0.03%) [ +0.06% +0.00% +0.03% / -0.03% +0.92% +0.86%] index_copy_ spread : Elapsed 0.036 ms (3.587 ms / 100) 3.715 -> 3.723 ( +0.22%) [ +0.30% +0.03% +0.00% / +0.22% +0.73% +0.73%] index_add_ strided 3 : Elapsed 0.037 ms (3.726 ms / 100) 3.583 -> 3.588 ( +0.14%) [ +0.14% +0.00% +0.03% / +0.14% +0.67% +0.64%] index_copy_ strided 3 : Elapsed 0.036 ms (3.588 ms / 100) 3.736 -> 3.737 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.70% +0.70%] index_add_ strided 7 : Elapsed 0.037 ms (3.737 ms / 100) 3.602 -> 3.603 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.78% +0.72%] index_copy_ strided 7 : Elapsed 0.036 ms (3.602 ms / 100) 3.723 -> 3.723 ( +0.00%) [ +0.03% +0.08% +0.00% / +0.00% +0.75% +0.75%] index_add_ perm : Elapsed 0.037 ms (3.724 ms / 100) 3.589 -> 3.590 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.81% +0.86%] index_copy_ perm : Elapsed 0.036 ms (3.590 ms / 100) 3.715 -> 3.715 ( +0.00%) [ +0.22% +0.16% +0.00% / +0.00% +1.00% +0.97%] index_add_ perm_sorted : Elapsed 0.037 ms (3.723 ms / 100) 3.581 -> 3.580 ( -0.03%) [ +0.14% +0.14% +0.00% / -0.03% +0.98% +0.95%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.586 ms / 100) 5.481 -> 5.479 ( -0.04%) [ +0.05% +0.07% +0.00% / +0.02% +0.02% -0.04%] index_select const : Elapsed 0.055 ms (5.484 ms / 100) 5.499 -> 5.497 ( -0.04%) [ +0.09% +0.00% +0.20% / +0.07% -0.04% +0.24%] index_select wrap : Elapsed 0.055 ms (5.504 ms / 100) 5.499 -> 5.500 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.11% +0.18%] index_select linear : Elapsed 0.055 ms (5.503 ms / 100) 5.499 -> 5.490 ( -0.16%) [ +0.00% +0.04% +0.04% / -0.16% +0.11% +0.11%] index_select reverse : Elapsed 0.055 ms (5.499 ms / 100) 5.480 -> 5.478 ( -0.04%) [ +0.00% +0.02% +0.04% / -0.04% +0.04% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.480 ms / 100) 5.482 -> 5.475 ( -0.13%) [ +0.02% +0.00% +0.02% / -0.13% -0.05% +0.04%] index_select skip256 : Elapsed 0.055 ms (5.483 ms / 100) 5.495 -> 5.501 ( +0.11%) [ +0.00% +0.09% +0.22% / +0.11% +0.13% +0.15%] index_select spread : Elapsed 0.055 ms (5.495 ms / 100) 5.500 -> 5.495 ( -0.09%) [ +0.09% +0.00% +0.05% / -0.09% +0.05% +0.20%] index_select strided 3 : Elapsed 0.055 ms (5.505 ms / 100) 5.500 -> 5.501 ( +0.02%) [ +0.04% +0.20% +0.00% / +0.02% +0.16% +0.05%] index_select strided 5 : Elapsed 0.055 ms (5.502 ms / 100) 5.497 -> 5.499 ( +0.04%) [ +0.05% +0.00% +0.05% / +0.11% +0.04% +0.29%] index_select strided 7 : Elapsed 0.055 ms (5.500 ms / 100) 5.479 -> 5.483 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.15% +0.20%] index_select strided 8 : Elapsed 0.055 ms (5.482 ms / 100) 5.496 -> 5.497 ( +0.02%) [ +0.00% +0.07% +0.04% / +0.02% +0.09% +0.02%] index_select random : Elapsed 0.055 ms (5.496 ms / 100) 5.495 -> 5.492 ( -0.05%) [ +0.00% +0.16% +0.05% / +0.09% -0.05% +0.07%] index_select random_sorted : Elapsed 0.055 ms (5.495 ms / 100) B = [4, 40, 20, 5] (stride (800, 20, 1, 3200)) A = [4, 40, 16, 5] (stride (80, 320, 1, 16)) dim = 2 3.949 -> 3.951 ( +0.05%) [ +0.10% +0.00% +0.03% / +0.05% +0.81% +0.79%] index_add_ linear : Elapsed 0.040 ms (3.953 ms / 100) 3.793 -> 3.792 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.92% +0.92%] index_copy_ linear : Elapsed 0.038 ms (3.795 ms / 100) 3.954 -> 3.964 ( +0.25%) [ +0.08% +0.10% +0.00% / +0.25% +0.71% +0.66%] index_add_ reverse : Elapsed 0.040 ms (3.957 ms / 100) 3.793 -> 3.796 ( +0.08%) [ +0.11% +0.05% +0.00% / +0.08% +0.69% +0.69%] index_copy_ reverse : Elapsed 0.038 ms (3.797 ms / 100) 3.958 -> 3.950 ( -0.20%) [ +0.05% +0.05% +0.00% / -0.20% +0.63% +0.61%] index_add_ spread : Elapsed 0.040 ms (3.960 ms / 100) 3.794 -> 3.792 ( -0.05%) [ +0.00% +0.11% +0.05% / -0.05% +0.74% +0.66%] index_copy_ spread : Elapsed 0.038 ms (3.794 ms / 100) 3.957 -> 3.957 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.81% +0.76%] index_add_ strided 3 : Elapsed 0.040 ms (3.961 ms / 100) 3.797 -> 3.799 ( +0.05%) [ +0.11% +0.18% +0.00% / +0.05% +0.76% +0.79%] index_copy_ strided 3 : Elapsed 0.038 ms (3.801 ms / 100) 3.960 -> 3.962 ( +0.05%) [ +0.08% +0.03% +0.00% / +0.05% +0.58% +0.68%] index_add_ strided 7 : Elapsed 0.040 ms (3.963 ms / 100) 3.801 -> 3.802 ( +0.03%) [ +0.08% +0.00% +0.03% / +0.03% +0.74% +0.68%] index_copy_ strided 7 : Elapsed 0.038 ms (3.804 ms / 100) 3.951 -> 3.958 ( +0.18%) [ +0.28% +0.25% +0.00% / +0.18% +0.68% +0.66%] index_add_ perm : Elapsed 0.040 ms (3.962 ms / 100) 3.790 -> 3.795 ( +0.13%) [ +0.18% +0.16% +0.00% / +0.13% +0.79% +0.74%] index_copy_ perm : Elapsed 0.038 ms (3.797 ms / 100) 3.956 -> 3.958 ( +0.05%) [ +0.30% +0.00% +0.20% / +0.05% +0.68% +0.68%] index_add_ perm_sorted : Elapsed 0.040 ms (3.968 ms / 100) 3.793 -> 3.795 ( +0.05%) [ +0.18% +0.00% +0.11% / +0.05% +0.69% +0.69%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.800 ms / 100) 5.477 -> 5.475 ( -0.04%) [ +0.16% +0.05% +0.00% / -0.04% +0.15% +0.00%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.04% +0.02%] index_select wrap : Elapsed 0.055 ms (5.488 ms / 100) 5.480 -> 5.486 ( +0.11%) [ +0.13% +0.07% +0.00% / +0.11% +0.24% +0.13%] index_select linear : Elapsed 0.055 ms (5.487 ms / 100) 5.483 -> 5.488 ( +0.09%) [ +0.05% +0.00% +0.18% / +0.15% +0.27% +0.09%] index_select reverse : Elapsed 0.055 ms (5.486 ms / 100) 5.476 -> 5.473 ( -0.05%) [ +0.00% +0.05% +0.22% / -0.05% +0.13% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.476 ms / 100) 5.479 -> 5.477 ( -0.04%) [ +0.09% +0.00% +0.00% / -0.02% -0.04% +0.04%] index_select skip256 : Elapsed 0.055 ms (5.484 ms / 100) 5.486 -> 5.481 ( -0.09%) [ +0.05% +0.00% +0.16% / -0.09% -0.09% -0.04%] index_select spread : Elapsed 0.055 ms (5.489 ms / 100) 5.489 -> 5.482 ( -0.13%) [ +0.00% +0.04% +0.02% / +0.00% +0.00% -0.13%] index_select strided 3 : Elapsed 0.055 ms (5.489 ms / 100) 5.484 -> 5.482 ( -0.04%) [ +0.11% +0.00% +0.11% / -0.04% +0.13% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.490 ms / 100) 5.487 -> 5.480 ( -0.13%) [ +0.11% +0.00% +0.05% / -0.13% +0.07% -0.13%] index_select strided 7 : Elapsed 0.055 ms (5.493 ms / 100) 5.482 -> 5.487 ( +0.09%) [ +0.11% +0.00% +0.04% / +0.09% +0.16% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.488 ms / 100) 5.486 -> 5.484 ( -0.04%) [ +0.00% +0.05% +0.22% / -0.04% +0.00% +0.07%] index_select random : Elapsed 0.055 ms (5.486 ms / 100) 5.483 -> 5.480 ( -0.05%) [ +0.00% +0.11% +0.20% / +0.11% +0.07% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.483 ms / 100) B = [4, 40, 20, 5] (stride (800, 1, 40, 3200)) A = [4, 40, 16, 5] (stride (3200, 80, 1, 16)) dim = 2 3.910 -> 3.924 ( +0.36%) [ +0.00% +0.23% +0.00% / +0.36% +0.49% +0.46%] index_add_ linear : Elapsed 0.039 ms (3.910 ms / 100) 3.758 -> 3.759 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.61% +0.61%] index_copy_ linear : Elapsed 0.038 ms (3.759 ms / 100) 3.893 -> 3.892 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.64% +0.74%] index_add_ reverse : Elapsed 0.039 ms (3.894 ms / 100) 3.747 -> 3.747 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.64% +0.64%] index_copy_ reverse : Elapsed 0.037 ms (3.748 ms / 100) 3.891 -> 3.896 ( +0.13%) [ +0.00% +0.08% +0.05% / +0.13% +0.77% +0.75%] index_add_ spread : Elapsed 0.039 ms (3.891 ms / 100) 3.749 -> 3.755 ( +0.16%) [ +0.03% +0.05% +0.00% / +0.16% +0.80% +0.77%] index_copy_ spread : Elapsed 0.037 ms (3.750 ms / 100) 3.909 -> 3.913 ( +0.10%) [ +0.28% +0.00% +0.20% / +0.10% +0.56% +0.59%] index_add_ strided 3 : Elapsed 0.039 ms (3.920 ms / 100) 3.755 -> 3.756 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.85% +0.85%] index_copy_ strided 3 : Elapsed 0.038 ms (3.755 ms / 100) 3.891 -> 3.892 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.82% +0.72%] index_add_ strided 7 : Elapsed 0.039 ms (3.892 ms / 100) 3.748 -> 3.749 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.56% +0.59%] index_copy_ strided 7 : Elapsed 0.037 ms (3.748 ms / 100) 3.904 -> 3.899 ( -0.13%) [ +0.20% +0.15% +0.00% / -0.13% +0.64% +0.59%] index_add_ perm : Elapsed 0.039 ms (3.912 ms / 100) 3.757 -> 3.757 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.64% +0.67%] index_copy_ perm : Elapsed 0.038 ms (3.758 ms / 100) 3.899 -> 3.909 ( +0.26%) [ +0.21% +0.00% +0.13% / +0.26% +0.69% +0.72%] index_add_ perm_sorted : Elapsed 0.039 ms (3.907 ms / 100) 3.750 -> 3.751 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.88% +0.85%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.752 ms / 100) 5.475 -> 5.475 ( +0.00%) [ +0.02% +0.13% +0.00% / +0.33% +0.27% +0.00%] index_select const : Elapsed 0.055 ms (5.476 ms / 100) 5.490 -> 5.478 ( -0.22%) [ +0.07% +0.00% +0.02% / -0.22% -0.02% -0.09%] index_select wrap : Elapsed 0.055 ms (5.494 ms / 100) 5.481 -> 5.485 ( +0.07%) [ +0.15% +0.04% +0.00% / +0.11% +0.07% +0.22%] index_select linear : Elapsed 0.055 ms (5.489 ms / 100) 5.483 -> 5.484 ( +0.02%) [ +0.00% +0.04% +0.13% / +0.02% +0.05% +0.02%] index_select reverse : Elapsed 0.055 ms (5.483 ms / 100) 5.479 -> 5.484 ( +0.09%) [ +0.04% +0.07% +0.00% / +0.18% +0.09% +0.15%] index_select skip64 : Elapsed 0.055 ms (5.481 ms / 100) 5.479 -> 5.475 ( -0.07%) [ +0.00% +0.00% +0.02% / -0.07% +0.18% +0.20%] index_select skip256 : Elapsed 0.055 ms (5.479 ms / 100) 5.481 -> 5.483 ( +0.04%) [ +0.00% +0.11% +0.13% / +0.13% +0.04% +0.22%] index_select spread : Elapsed 0.055 ms (5.481 ms / 100) 5.485 -> 5.483 ( -0.04%) [ +0.00% +0.02% +0.04% / -0.04% -0.02% +0.13%] index_select strided 3 : Elapsed 0.055 ms (5.485 ms / 100) 5.481 -> 5.478 ( -0.05%) [ +0.16% +0.15% +0.00% / +0.05% +0.05% -0.05%] index_select strided 5 : Elapsed 0.055 ms (5.490 ms / 100) 5.482 -> 5.484 ( +0.04%) [ +0.00% +0.09% +0.16% / +0.04% +0.07% +0.04%] index_select strided 7 : Elapsed 0.055 ms (5.482 ms / 100) 5.480 -> 5.489 ( +0.16%) [ +0.15% +0.04% +0.00% / +0.16% +0.16% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.488 ms / 100) 5.483 -> 5.487 ( +0.07%) [ +0.00% +0.00% +0.05% / +0.09% +0.07% +0.07%] index_select random : Elapsed 0.055 ms (5.483 ms / 100) 5.487 -> 5.480 ( -0.13%) [ +0.05% +0.02% +0.00% / -0.13% -0.05% +0.05%] index_select random_sorted : Elapsed 0.055 ms (5.490 ms / 100) B = [4, 40, 20, 5] (stride (40, 1, 160, 3200)) A = [4, 40, 16, 5] (stride (40, 1, 160, 2560)) dim = 2 1.350 -> 1.350 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.67% +0.67%] index_add_ linear : Elapsed 0.014 ms (1.350 ms / 100) 1.341 -> 1.341 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.97% +0.97%] index_copy_ linear : Elapsed 0.013 ms (1.342 ms / 100) 1.352 -> 1.353 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +1.26% +0.89%] index_add_ reverse : Elapsed 0.014 ms (1.354 ms / 100) 1.345 -> 1.345 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.74% +0.67%] index_copy_ reverse : Elapsed 0.013 ms (1.345 ms / 100) 1.353 -> 1.356 ( +0.22%) [ +0.07% +0.22% +0.00% / +0.22% +0.96% +0.96%] index_add_ spread : Elapsed 0.014 ms (1.354 ms / 100) 1.343 -> 1.344 ( +0.07%) [ +0.00% +0.07% +0.15% / +0.07% +0.89% +0.89%] index_copy_ spread : Elapsed 0.013 ms (1.343 ms / 100) 1.351 -> 1.351 ( +0.00%) [ +0.00% +0.15% +0.07% / +0.00% +0.96% +0.96%] index_add_ strided 3 : Elapsed 0.014 ms (1.351 ms / 100) 1.341 -> 1.341 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.97% +0.97%] index_copy_ strided 3 : Elapsed 0.013 ms (1.342 ms / 100) 1.356 -> 1.356 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.37% +0.37%] index_add_ strided 7 : Elapsed 0.014 ms (1.356 ms / 100) 1.343 -> 1.344 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.67% +0.60%] index_copy_ strided 7 : Elapsed 0.013 ms (1.343 ms / 100) 1.356 -> 1.357 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.66% +0.59%] index_add_ perm : Elapsed 0.014 ms (1.357 ms / 100) 1.347 -> 1.347 ( +0.00%) [ +0.00% +0.22% +0.00% / +0.00% +0.52% +0.59%] index_copy_ perm : Elapsed 0.013 ms (1.347 ms / 100) 1.354 -> 1.355 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.59% +0.66%] index_add_ perm_sorted : Elapsed 0.014 ms (1.355 ms / 100) 1.345 -> 1.348 ( +0.22%) [ +0.07% +0.15% +0.00% / +0.22% +0.67% +0.59%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.346 ms / 100) 3.542 -> 3.530 ( -0.34%) [ +0.11% +0.17% +0.00% / +0.14% -0.34% -0.31%] index_select const : Elapsed 0.035 ms (3.546 ms / 100) 3.554 -> 3.556 ( +0.06%) [ +0.23% +0.28% +0.00% / +0.17% +0.17% +0.06%] index_select wrap : Elapsed 0.036 ms (3.562 ms / 100) 3.559 -> 3.553 ( -0.17%) [ +0.06% +0.00% +0.00% / -0.17% -0.11% -0.03%] index_select linear : Elapsed 0.036 ms (3.561 ms / 100) 3.555 -> 3.550 ( -0.14%) [ +0.20% +0.11% +0.00% / +0.03% -0.14% +0.14%] index_select reverse : Elapsed 0.036 ms (3.562 ms / 100) 3.536 -> 3.526 ( -0.28%) [ +0.20% +0.08% +0.00% / +0.17% -0.11% -0.28%] index_select skip64 : Elapsed 0.035 ms (3.543 ms / 100) 3.540 -> 3.527 ( -0.37%) [ +0.00% +0.00% +0.25% / -0.08% -0.28% -0.37%] index_select skip256 : Elapsed 0.035 ms (3.540 ms / 100) 3.570 -> 3.552 ( -0.50%) [ +0.00% +0.14% +0.03% / +0.11% -0.08% -0.50%] index_select spread : Elapsed 0.036 ms (3.570 ms / 100) 3.555 -> 3.556 ( +0.03%) [ +0.00% +0.06% +0.03% / +0.06% +0.17% +0.03%] index_select strided 3 : Elapsed 0.036 ms (3.555 ms / 100) 3.555 -> 3.554 ( -0.03%) [ +0.14% +0.20% +0.00% / +0.28% -0.03% +0.11%] index_select strided 5 : Elapsed 0.036 ms (3.560 ms / 100) 3.552 -> 3.552 ( +0.00%) [ +0.00% +0.06% +0.08% / +0.00% +0.51% +0.56%] index_select strided 7 : Elapsed 0.036 ms (3.552 ms / 100) 3.540 -> 3.534 ( -0.17%) [ +0.00% +0.00% +0.11% / -0.06% -0.06% -0.17%] index_select strided 8 : Elapsed 0.035 ms (3.540 ms / 100) 3.550 -> 3.549 ( -0.03%) [ +0.06% +0.00% +0.06% / -0.03% +0.31% +0.23%] index_select random : Elapsed 0.036 ms (3.552 ms / 100) 3.551 -> 3.551 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.34% +0.31%] index_select random_sorted : Elapsed 0.036 ms (3.551 ms / 100) out_shape = [4, 40, 16, 20] in_shape = [4, 40, 16, 5] idx_dim = 3 B = [4, 40, 16, 20] (stride (12800, 320, 20, 1)) A = [4, 40, 16, 5] (stride (5, 320, 20, 1)) dim = 3 1.658 -> 1.659 ( +0.06%) [ +0.06% +0.18% +0.00% / +0.06% +0.24% +0.18%] index_add_ linear : Elapsed 0.017 ms (1.659 ms / 100) 1.607 -> 1.613 ( +0.37%) [ +0.00% +0.19% +0.19% / +0.37% +0.56% +0.75%] index_copy_ linear : Elapsed 0.016 ms (1.607 ms / 100) 1.659 -> 1.659 ( +0.00%) [ +0.00% +0.12% +0.06% / +0.24% +0.00% +0.12%] index_add_ reverse : Elapsed 0.017 ms (1.659 ms / 100) 1.611 -> 1.616 ( +0.31%) [ +0.00% +0.19% +0.25% / +0.62% +0.31% +0.31%] index_copy_ reverse : Elapsed 0.016 ms (1.611 ms / 100) 1.673 -> 1.670 ( -0.18%) [ +0.12% +0.00% +0.06% / +0.18% -0.18% +0.00%] index_add_ spread : Elapsed 0.017 ms (1.675 ms / 100) 1.636 -> 1.637 ( +0.06%) [ +0.12% +0.00% +0.31% / +0.24% +0.06% +0.06%] index_copy_ spread : Elapsed 0.016 ms (1.638 ms / 100) 1.672 -> 1.668 ( -0.24%) [ +0.12% +0.00% +0.06% / +0.06% -0.06% -0.24%] index_add_ strided 3 : Elapsed 0.017 ms (1.674 ms / 100) 1.636 -> 1.636 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.18% +0.06%] index_copy_ strided 3 : Elapsed 0.016 ms (1.636 ms / 100) 1.674 -> 1.669 ( -0.30%) [ +0.24% +0.00% +0.24% / -0.12% -0.30% -0.18%] index_add_ strided 7 : Elapsed 0.017 ms (1.678 ms / 100) 1.637 -> 1.637 ( +0.00%) [ +0.00% +0.24% +0.24% / +0.18% +0.00% +0.00%] index_copy_ strided 7 : Elapsed 0.016 ms (1.637 ms / 100) 1.673 -> 1.670 ( -0.18%) [ +0.24% +0.18% +0.00% / +0.30% +0.06% -0.18%] index_add_ perm : Elapsed 0.017 ms (1.677 ms / 100) 1.637 -> 1.638 ( +0.06%) [ +0.06% +0.00% +0.12% / +0.18% +0.06% +0.18%] index_copy_ perm : Elapsed 0.016 ms (1.638 ms / 100) 1.675 -> 1.675 ( +0.00%) [ +0.48% +0.30% +0.00% / +0.06% +0.00% +0.30%] index_add_ perm_sorted : Elapsed 0.017 ms (1.683 ms / 100) 1.639 -> 1.640 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.24% +0.24% +0.06%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.639 ms / 100) 7.651 -> 7.665 ( +0.18%) [ +0.12% +0.00% +0.33% / +0.22% +0.18% +0.35%] index_select const : Elapsed 0.077 ms (7.660 ms / 100) 7.649 -> 7.653 ( +0.05%) [ +0.00% +0.20% +0.01% / +0.05% +0.34% +0.18%] index_select wrap : Elapsed 0.076 ms (7.649 ms / 100) 7.659 -> 7.669 ( +0.13%) [ +0.08% +0.00% +0.17% / +0.13% +0.44% +0.35%] index_select linear : Elapsed 0.077 ms (7.665 ms / 100) 7.650 -> 7.649 ( -0.01%) [ +0.07% +0.17% +0.00% / -0.01% +0.34% +0.27%] index_select reverse : Elapsed 0.077 ms (7.655 ms / 100) 7.650 -> 7.660 ( +0.13%) [ +0.13% +0.00% +0.08% / +0.13% +0.31% +0.33%] index_select skip64 : Elapsed 0.077 ms (7.660 ms / 100) 7.653 -> 7.652 ( -0.01%) [ +0.13% +0.00% +0.18% / -0.01% +0.30% +0.05%] index_select skip256 : Elapsed 0.077 ms (7.663 ms / 100) 7.662 -> 7.658 ( -0.05%) [ +0.14% +0.00% +0.08% / -0.04% -0.05% +0.26%] index_select spread : Elapsed 0.077 ms (7.673 ms / 100) 7.652 -> 7.655 ( +0.04%) [ +0.00% +0.20% +0.07% / +0.04% +0.17% +0.44%] index_select strided 3 : Elapsed 0.077 ms (7.652 ms / 100) 7.657 -> 7.663 ( +0.08%) [ +0.01% +0.00% +0.14% / +0.08% +0.42% +0.35%] index_select random : Elapsed 0.077 ms (7.658 ms / 100) 7.651 -> 7.659 ( +0.10%) [ +0.07% +0.00% +0.30% / +0.10% +0.20% +0.29%] index_select random_sorted : Elapsed 0.077 ms (7.656 ms / 100) B = [4, 40, 16, 20] (stride (12800, 320, 1, 16)) A = [4, 40, 16, 5] (stride (1, 64, 4, 2560)) dim = 3 0.666 -> 0.667 ( +0.15%) [ +0.30% +0.00% +0.15% / +0.15% +1.20% +1.35%] index_add_ linear : Elapsed 0.007 ms (0.668 ms / 100) 0.647 -> 0.647 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.24% +1.24%] index_copy_ linear : Elapsed 0.006 ms (0.647 ms / 100) 0.669 -> 0.669 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.15% +0.00% +0.00%] index_add_ reverse : Elapsed 0.007 ms (0.670 ms / 100) 0.652 -> 0.651 ( -0.15%) [ +0.00% +0.15% +0.00% / +0.15% -0.15% -0.15%] index_copy_ reverse : Elapsed 0.007 ms (0.652 ms / 100) 0.670 -> 0.670 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.00% +0.00%] index_add_ spread : Elapsed 0.007 ms (0.670 ms / 100) 0.652 -> 0.652 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.31% +0.00% +0.00%] index_copy_ spread : Elapsed 0.007 ms (0.652 ms / 100) 0.670 -> 0.669 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% -0.15% -0.15%] index_add_ strided 3 : Elapsed 0.007 ms (0.670 ms / 100) 0.652 -> 0.650 ( -0.31%) [ +0.00% +0.31% +0.31% / +0.15% -0.31% -0.15%] index_copy_ strided 3 : Elapsed 0.007 ms (0.652 ms / 100) 0.669 -> 0.668 ( -0.15%) [ +0.00% +0.15% +0.00% / +0.15% -0.15% -0.15%] index_add_ strided 7 : Elapsed 0.007 ms (0.669 ms / 100) 0.651 -> 0.650 ( -0.15%) [ +0.00% +0.46% +0.00% / +0.31% -0.15% -0.15%] index_copy_ strided 7 : Elapsed 0.007 ms (0.651 ms / 100) 0.667 -> 0.668 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.45% +0.45%] index_add_ perm : Elapsed 0.007 ms (0.668 ms / 100) 0.649 -> 0.650 ( +0.15%) [ +0.00% +0.00% +0.62% / +0.15% +0.62% +0.62%] index_copy_ perm : Elapsed 0.006 ms (0.649 ms / 100) 0.668 -> 0.668 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.45% +0.30%] index_add_ perm_sorted : Elapsed 0.007 ms (0.668 ms / 100) 0.648 -> 0.648 ( +0.00%) [ +0.00% +0.31% +0.15% / +0.00% +0.77% +0.93%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.648 ms / 100) 4.814 -> 4.804 ( -0.21%) [ +0.00% +0.10% +0.08% / +0.08% -0.21% -0.08%] index_select const : Elapsed 0.048 ms (4.814 ms / 100) 4.841 -> 4.846 ( +0.10%) [ +0.00% +0.14% +0.12% / +0.12% +0.10% +0.12%] index_select wrap : Elapsed 0.048 ms (4.841 ms / 100) 4.844 -> 4.841 ( -0.06%) [ +0.08% +0.00% +0.08% / +0.04% -0.06% +0.12%] index_select linear : Elapsed 0.048 ms (4.848 ms / 100) 4.834 -> 4.836 ( +0.04%) [ +0.00% +0.08% +0.02% / +0.23% +0.19% +0.04%] index_select reverse : Elapsed 0.048 ms (4.834 ms / 100) 4.814 -> 4.810 ( -0.08%) [ +0.21% +0.00% +0.10% / +0.08% +0.00% -0.08%] index_select skip64 : Elapsed 0.048 ms (4.824 ms / 100) 4.808 -> 4.809 ( +0.02%) [ +0.17% +0.23% +0.00% / +0.02% +0.08% +0.08%] index_select skip256 : Elapsed 0.048 ms (4.816 ms / 100) 4.824 -> 4.835 ( +0.23%) [ +0.00% +0.27% +0.10% / +0.33% +0.23% +0.37%] index_select spread : Elapsed 0.048 ms (4.824 ms / 100) 4.839 -> 4.840 ( +0.02%) [ +0.12% +0.02% +0.00% / +0.12% +0.02% +0.19%] index_select strided 3 : Elapsed 0.048 ms (4.845 ms / 100) 4.842 -> 4.843 ( +0.02%) [ +0.12% +0.12% +0.00% / +0.12% +0.02% +0.08%] index_select random : Elapsed 0.048 ms (4.848 ms / 100) 4.840 -> 4.839 ( -0.02%) [ +0.00% +0.10% +0.14% / +0.14% -0.02% +0.25%] index_select random_sorted : Elapsed 0.048 ms (4.840 ms / 100) B = [4, 40, 16, 20] (stride (12800, 1, 800, 40)) A = [4, 40, 16, 5] (stride (16, 64, 1, 2560)) dim = 3 1.818 -> 1.819 ( +0.06%) [ +0.00% +0.06% +0.11% / +0.06% +1.82% +1.71%] index_add_ linear : Elapsed 0.018 ms (1.818 ms / 100) 1.772 -> 1.773 ( +0.06%) [ +0.17% +0.00% +0.28% / +0.06% +1.92% +1.64%] index_copy_ linear : Elapsed 0.018 ms (1.775 ms / 100) 1.817 -> 1.822 ( +0.28%) [ +0.28% +0.00% +0.11% / +0.28% +1.87% +1.60%] index_add_ reverse : Elapsed 0.018 ms (1.822 ms / 100) 1.772 -> 1.775 ( +0.17%) [ +0.23% +0.00% +0.34% / +0.17% +1.81% +1.92%] index_copy_ reverse : Elapsed 0.018 ms (1.776 ms / 100) 1.847 -> 1.848 ( +0.05%) [ +0.16% +0.16% +0.00% / +0.05% +0.97% +0.92%] index_add_ spread : Elapsed 0.018 ms (1.850 ms / 100) 1.804 -> 1.804 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.94% +0.89%] index_copy_ spread : Elapsed 0.018 ms (1.804 ms / 100) 1.847 -> 1.849 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.76% +0.81%] index_add_ strided 3 : Elapsed 0.018 ms (1.847 ms / 100) 1.805 -> 1.804 ( -0.06%) [ +0.00% +0.00% +0.11% / -0.06% +0.94% +0.83%] index_copy_ strided 3 : Elapsed 0.018 ms (1.805 ms / 100) 1.836 -> 1.840 ( +0.22%) [ +0.00% +0.16% +0.00% / +0.22% +1.25% +1.09%] index_add_ strided 7 : Elapsed 0.018 ms (1.836 ms / 100) 1.792 -> 1.791 ( -0.06%) [ +0.00% +0.17% +0.22% / -0.06% +1.28% +1.34%] index_copy_ strided 7 : Elapsed 0.018 ms (1.792 ms / 100) 1.842 -> 1.841 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +1.47% +1.47%] index_add_ perm : Elapsed 0.018 ms (1.842 ms / 100) 1.799 -> 1.799 ( +0.00%) [ +0.11% +0.06% +0.00% / +0.00% +1.33% +1.17%] index_copy_ perm : Elapsed 0.018 ms (1.801 ms / 100) 1.842 -> 1.844 ( +0.11%) [ +0.05% +0.27% +0.00% / +0.11% +1.19% +1.36%] index_add_ perm_sorted : Elapsed 0.018 ms (1.843 ms / 100) 1.801 -> 1.807 ( +0.33%) [ +0.00% +0.00% +0.06% / +0.33% +1.17% +1.11%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.801 ms / 100) 8.529 -> 8.534 ( +0.06%) [ +0.00% +0.07% +0.25% / +0.06% +0.16% +0.08%] index_select const : Elapsed 0.085 ms (8.529 ms / 100) 8.573 -> 8.574 ( +0.01%) [ +0.07% +0.00% +0.28% / +0.01% +0.13% +0.16%] index_select wrap : Elapsed 0.086 ms (8.579 ms / 100) 8.552 -> 8.558 ( +0.07%) [ +0.00% +0.05% +0.19% / +0.07% +0.20% +0.15%] index_select linear : Elapsed 0.086 ms (8.552 ms / 100) 8.561 -> 8.578 ( +0.20%) [ +0.08% +0.16% +0.00% / +0.29% +0.33% +0.20%] index_select reverse : Elapsed 0.086 ms (8.568 ms / 100) 8.528 -> 8.520 ( -0.09%) [ +0.04% +0.00% +0.00% / -0.09% +0.30% +0.11%] index_select skip64 : Elapsed 0.085 ms (8.531 ms / 100) 8.527 -> 8.528 ( +0.01%) [ +0.07% +0.12% +0.00% / +0.01% +0.25% +0.07%] index_select skip256 : Elapsed 0.085 ms (8.533 ms / 100) 8.566 -> 8.562 ( -0.05%) [ +0.04% +0.00% +0.08% / -0.02% +0.12% -0.05%] index_select spread : Elapsed 0.086 ms (8.569 ms / 100) 8.576 -> 8.588 ( +0.14%) [ +0.20% +0.03% +0.00% / +0.27% +0.20% +0.14%] index_select strided 3 : Elapsed 0.086 ms (8.593 ms / 100) 8.567 -> 8.558 ( -0.11%) [ +0.20% +0.00% +0.08% / -0.11% +0.28% +0.09%] index_select random : Elapsed 0.086 ms (8.584 ms / 100) 8.549 -> 8.544 ( -0.06%) [ +0.01% +0.00% +0.07% / -0.06% +0.14% +0.25%] index_select random_sorted : Elapsed 0.085 ms (8.550 ms / 100) B = [4, 40, 16, 20] (stride (320, 1280, 1, 16)) A = [4, 40, 16, 5] (stride (3200, 1, 40, 640)) dim = 3 1.863 -> 1.866 ( +0.16%) [ +0.00% +0.27% +0.05% / +0.16% +0.48% +0.59%] index_add_ linear : Elapsed 0.019 ms (1.863 ms / 100) 1.815 -> 1.817 ( +0.11%) [ +0.17% +0.17% +0.00% / +0.11% +0.44% +0.50%] index_copy_ linear : Elapsed 0.018 ms (1.818 ms / 100) 1.861 -> 1.863 ( +0.11%) [ +0.00% +0.05% +0.00% / +0.11% +0.43% +0.32%] index_add_ reverse : Elapsed 0.019 ms (1.861 ms / 100) 1.816 -> 1.817 ( +0.06%) [ +0.11% +0.00% +0.00% / +0.06% +0.28% +0.33%] index_copy_ reverse : Elapsed 0.018 ms (1.818 ms / 100) 1.874 -> 1.875 ( +0.05%) [ +0.27% +0.05% +0.00% / +0.05% +0.37% +0.53%] index_add_ spread : Elapsed 0.019 ms (1.879 ms / 100) 1.833 -> 1.835 ( +0.11%) [ +0.00% +0.11% +0.22% / +0.11% +0.38% +0.33%] index_copy_ spread : Elapsed 0.018 ms (1.833 ms / 100) 1.874 -> 1.875 ( +0.05%) [ +0.32% +0.00% +0.21% / +0.05% +0.64% +0.53%] index_add_ strided 3 : Elapsed 0.019 ms (1.880 ms / 100) 1.834 -> 1.833 ( -0.05%) [ +0.00% +0.00% +0.22% / -0.05% +0.33% +0.27%] index_copy_ strided 3 : Elapsed 0.018 ms (1.834 ms / 100) 1.871 -> 1.873 ( +0.11%) [ +0.32% +0.05% +0.00% / +0.11% +0.37% +0.32%] index_add_ strided 7 : Elapsed 0.019 ms (1.877 ms / 100) 1.824 -> 1.822 ( -0.11%) [ +0.00% +0.05% +0.11% / -0.11% +0.60% +0.44%] index_copy_ strided 7 : Elapsed 0.018 ms (1.824 ms / 100) 1.870 -> 1.871 ( +0.05%) [ +0.00% +0.16% +0.00% / +0.05% +0.11% +0.32%] index_add_ perm : Elapsed 0.019 ms (1.870 ms / 100) 1.819 -> 1.826 ( +0.38%) [ +0.00% +0.33% +0.44% / +0.38% +0.60% +0.60%] index_copy_ perm : Elapsed 0.018 ms (1.819 ms / 100) 1.873 -> 1.876 ( +0.16%) [ +0.05% +0.27% +0.00% / +0.16% +0.16% +0.21%] index_add_ perm_sorted : Elapsed 0.019 ms (1.874 ms / 100) 1.830 -> 1.829 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.05% -0.05%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.830 ms / 100) 8.517 -> 8.521 ( +0.05%) [ +0.00% +0.06% +0.11% / +0.05% +0.26% +0.11%] index_select const : Elapsed 0.085 ms (8.517 ms / 100) 8.539 -> 8.539 ( +0.00%) [ +0.22% +0.00% +0.05% / +0.19% +0.02% +0.00%] index_select wrap : Elapsed 0.086 ms (8.558 ms / 100) 8.544 -> 8.545 ( +0.01%) [ +0.00% +0.12% +0.18% / +0.05% +0.01% +0.18%] index_select linear : Elapsed 0.085 ms (8.544 ms / 100) 8.514 -> 8.526 ( +0.14%) [ +0.41% +0.00% +0.28% / +0.32% +0.19% +0.14%] index_select reverse : Elapsed 0.085 ms (8.549 ms / 100) 8.509 -> 8.518 ( +0.11%) [ +0.34% +0.00% +0.14% / +0.35% +0.11% +0.15%] index_select skip64 : Elapsed 0.085 ms (8.538 ms / 100) 8.522 -> 8.518 ( -0.05%) [ +0.09% +0.00% +0.04% / +0.00% +0.00% -0.05%] index_select skip256 : Elapsed 0.085 ms (8.530 ms / 100) 8.545 -> 8.541 ( -0.05%) [ +0.00% +0.00% +0.02% / +0.18% +0.07% -0.05%] index_select spread : Elapsed 0.085 ms (8.545 ms / 100) 8.536 -> 8.543 ( +0.08%) [ +0.16% +0.00% +0.15% / +0.35% +0.14% +0.08%] index_select strided 3 : Elapsed 0.085 ms (8.550 ms / 100) 8.542 -> 8.552 ( +0.12%) [ +0.00% +0.18% +0.43% / +0.16% +0.12% +0.19%] index_select random : Elapsed 0.085 ms (8.542 ms / 100) 8.550 -> 8.554 ( +0.05%) [ +0.12% +0.05% +0.00% / +0.07% +0.08% +0.05%] index_select random_sorted : Elapsed 0.086 ms (8.560 ms / 100) B = [4, 40, 16, 20] (stride (20, 80, 3200, 1)) A = [4, 40, 16, 5] (stride (3200, 16, 1, 640)) dim = 3 1.815 -> 1.810 ( -0.28%) [ +0.39% +0.06% +0.00% / +0.39% -0.06% -0.28%] index_add_ linear : Elapsed 0.018 ms (1.822 ms / 100) 1.791 -> 1.775 ( -0.89%) [ +0.11% +0.11% +0.00% / -0.22% -0.89% -0.78%] index_copy_ linear : Elapsed 0.018 ms (1.793 ms / 100) 1.823 -> 1.812 ( -0.60%) [ +0.22% +0.00% +0.11% / -0.11% -0.60% -0.60%] index_add_ reverse : Elapsed 0.018 ms (1.827 ms / 100) 1.794 -> 1.774 ( -1.11%) [ +0.11% +0.00% +0.06% / -0.28% -1.00% -1.11%] index_copy_ reverse : Elapsed 0.018 ms (1.796 ms / 100) 1.838 -> 1.828 ( -0.54%) [ +0.22% +0.05% +0.00% / -0.11% -0.33% -0.54%] index_add_ spread : Elapsed 0.018 ms (1.842 ms / 100) 1.817 -> 1.800 ( -0.94%) [ +0.00% +0.11% +0.00% / +0.00% -0.94% -0.66%] index_copy_ spread : Elapsed 0.018 ms (1.817 ms / 100) 1.834 -> 1.827 ( -0.38%) [ +0.44% +0.49% +0.00% / +0.27% -0.27% -0.38%] index_add_ strided 3 : Elapsed 0.018 ms (1.842 ms / 100) 1.816 -> 1.802 ( -0.77%) [ +0.28% +0.00% +0.06% / -0.11% -0.72% -0.77%] index_copy_ strided 3 : Elapsed 0.018 ms (1.821 ms / 100) 1.838 -> 1.823 ( -0.82%) [ +0.11% +0.16% +0.00% / +0.16% -0.82% -0.54%] index_add_ strided 7 : Elapsed 0.018 ms (1.840 ms / 100) 1.815 -> 1.799 ( -0.88%) [ +0.11% +0.17% +0.00% / +0.33% -0.50% -0.88%] index_copy_ strided 7 : Elapsed 0.018 ms (1.817 ms / 100) 1.830 -> 1.821 ( -0.49%) [ +0.22% +0.16% +0.00% / +0.05% -0.49% -0.49%] index_add_ perm : Elapsed 0.018 ms (1.834 ms / 100) 1.808 -> 1.792 ( -0.88%) [ +0.06% +0.17% +0.00% / -0.17% -0.88% -0.77%] index_copy_ perm : Elapsed 0.018 ms (1.809 ms / 100) 1.831 -> 1.820 ( -0.60%) [ +0.05% +0.05% +0.00% / +0.05% -0.60% -0.49%] index_add_ perm_sorted : Elapsed 0.018 ms (1.832 ms / 100) 1.803 -> 1.790 ( -0.72%) [ +0.22% +0.00% +0.11% / +0.17% -0.72% -0.67%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.807 ms / 100) 8.248 -> 8.265 ( +0.21%) [ +0.00% +0.16% +0.16% / +0.21% +0.40% +0.35%] index_select const : Elapsed 0.082 ms (8.248 ms / 100) 8.310 -> 8.296 ( -0.17%) [ +0.08% +0.02% +0.00% / -0.17% +0.19% +0.10%] index_select wrap : Elapsed 0.083 ms (8.317 ms / 100) 8.281 -> 8.287 ( +0.07%) [ +0.00% +0.42% +0.21% / +0.07% +0.27% +0.50%] index_select linear : Elapsed 0.083 ms (8.281 ms / 100) 8.295 -> 8.296 ( +0.01%) [ +0.04% +0.13% +0.00% / +0.01% +0.25% +0.20%] index_select reverse : Elapsed 0.083 ms (8.298 ms / 100) 8.250 -> 8.243 ( -0.08%) [ +0.00% +0.06% +0.19% / -0.08% +0.13% +0.27%] index_select skip64 : Elapsed 0.082 ms (8.250 ms / 100) 8.233 -> 8.269 ( +0.44%) [ +0.16% +0.22% +0.00% / +0.44% +0.67% +0.55%] index_select skip256 : Elapsed 0.082 ms (8.246 ms / 100) 8.292 -> 8.294 ( +0.02%) [ +0.14% +0.07% +0.00% / +0.02% +0.02% +0.35%] index_select spread : Elapsed 0.083 ms (8.304 ms / 100) 8.300 -> 8.298 ( -0.02%) [ +0.20% +0.10% +0.00% / -0.02% +0.46% +0.18%] index_select strided 3 : Elapsed 0.083 ms (8.317 ms / 100) 8.308 -> 8.305 ( -0.04%) [ +0.00% +0.06% +0.10% / -0.04% +0.22% +0.31%] index_select random : Elapsed 0.083 ms (8.308 ms / 100) 8.292 -> 8.292 ( +0.00%) [ +0.16% +0.00% +0.14% / +0.00% +0.29% +0.24%] index_select random_sorted : Elapsed 0.083 ms (8.305 ms / 100) B = [4, 40, 16, 20] (stride (40, 1, 3200, 160)) A = [4, 40, 16, 5] (stride (1, 320, 4, 64)) dim = 3 1.708 -> 1.710 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +1.23% +1.41%] index_add_ linear : Elapsed 0.017 ms (1.709 ms / 100) 1.663 -> 1.665 ( +0.12%) [ +0.24% +0.00% +0.06% / +0.12% +1.44% +1.44%] index_copy_ linear : Elapsed 0.017 ms (1.667 ms / 100) 1.709 -> 1.709 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +1.52% +1.46%] index_add_ reverse : Elapsed 0.017 ms (1.709 ms / 100) 1.665 -> 1.667 ( +0.12%) [ +0.12% +0.00% +0.06% / +0.12% +1.98% +1.68%] index_copy_ reverse : Elapsed 0.017 ms (1.667 ms / 100) 1.714 -> 1.713 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.64% +0.82%] index_add_ spread : Elapsed 0.017 ms (1.714 ms / 100) 1.668 -> 1.667 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +1.02% +0.96%] index_copy_ spread : Elapsed 0.017 ms (1.669 ms / 100) 1.708 -> 1.708 ( +0.00%) [ +0.23% +0.06% +0.00% / +0.00% +0.76% +0.94%] index_add_ strided 3 : Elapsed 0.017 ms (1.712 ms / 100) 1.665 -> 1.665 ( +0.00%) [ +0.00% +0.30% +0.06% / +0.00% +1.20% +1.20%] index_copy_ strided 3 : Elapsed 0.017 ms (1.665 ms / 100) 1.714 -> 1.715 ( +0.06%) [ +0.00% +0.29% +0.12% / +0.06% +0.29% +0.29%] index_add_ strided 7 : Elapsed 0.017 ms (1.714 ms / 100) 1.667 -> 1.670 ( +0.18%) [ +0.00% +0.42% +0.30% / +0.18% +0.78% +0.72%] index_copy_ strided 7 : Elapsed 0.017 ms (1.667 ms / 100) 1.709 -> 1.709 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.53% +0.64%] index_add_ perm : Elapsed 0.017 ms (1.711 ms / 100) 1.664 -> 1.669 ( +0.30%) [ +0.18% +0.18% +0.00% / +0.30% +0.66% +0.60%] index_copy_ perm : Elapsed 0.017 ms (1.667 ms / 100) 1.707 -> 1.711 ( +0.23%) [ +0.06% +0.35% +0.00% / +0.23% +0.59% +0.59%] index_add_ perm_sorted : Elapsed 0.017 ms (1.708 ms / 100) 1.666 -> 1.668 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +0.60% +0.66%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.666 ms / 100) 8.183 -> 8.199 ( +0.20%) [ +0.16% +0.00% +0.17% / +0.20% +0.61% +0.34%] index_select const : Elapsed 0.082 ms (8.196 ms / 100) 8.221 -> 8.229 ( +0.10%) [ +0.00% +0.11% +0.16% / +0.10% +0.33% +0.35%] index_select wrap : Elapsed 0.082 ms (8.221 ms / 100) 8.230 -> 8.222 ( -0.10%) [ +0.11% +0.00% +0.13% / -0.10% +0.22% +0.34%] index_select linear : Elapsed 0.082 ms (8.239 ms / 100) 8.202 -> 8.202 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +0.26% +0.17%] index_select reverse : Elapsed 0.082 ms (8.209 ms / 100) 8.185 -> 8.197 ( +0.15%) [ +0.26% +0.00% +0.00% / +0.15% +0.29% +0.49%] index_select skip64 : Elapsed 0.082 ms (8.206 ms / 100) 8.182 -> 8.204 ( +0.27%) [ +0.11% +0.00% +0.04% / +0.27% +0.56% +0.51%] index_select skip256 : Elapsed 0.082 ms (8.191 ms / 100) 8.211 -> 8.221 ( +0.12%) [ +0.21% +0.00% +0.12% / +0.12% +0.26% +0.35%] index_select spread : Elapsed 0.082 ms (8.228 ms / 100) 8.230 -> 8.235 ( +0.06%) [ +0.05% +0.04% +0.00% / +0.06% +0.15% +0.07%] index_select strided 3 : Elapsed 0.082 ms (8.234 ms / 100) 8.219 -> 8.219 ( +0.00%) [ +0.00% +0.22% +0.17% / +0.00% +0.32% +0.55%] index_select random : Elapsed 0.082 ms (8.219 ms / 100) 8.210 -> 8.219 ( +0.11%) [ +0.02% +0.00% +0.19% / +0.11% +0.34% +0.17%] index_select random_sorted : Elapsed 0.082 ms (8.212 ms / 100) B = [4, 40, 16, 20] (stride (640, 16, 1, 2560)) A = [4, 40, 16, 5] (stride (40, 1, 800, 160)) dim = 3 1.582 -> 1.591 ( +0.57%) [ +0.00% +0.32% +0.32% / +0.57% +1.07% +1.33%] index_add_ linear : Elapsed 0.016 ms (1.582 ms / 100) 1.533 -> 1.536 ( +0.20%) [ +0.00% +0.20% +0.20% / +0.20% +1.30% +1.37%] index_copy_ linear : Elapsed 0.015 ms (1.533 ms / 100) 1.585 -> 1.587 ( +0.13%) [ +0.19% +0.00% +0.13% / +0.13% +0.95% +1.20%] index_add_ reverse : Elapsed 0.016 ms (1.588 ms / 100) 1.533 -> 1.534 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +1.30% +1.17%] index_copy_ reverse : Elapsed 0.015 ms (1.534 ms / 100) 1.580 -> 1.584 ( +0.25%) [ +0.00% +0.06% +0.13% / +0.25% +1.01% +1.01%] index_add_ spread : Elapsed 0.016 ms (1.580 ms / 100) 1.525 -> 1.532 ( +0.46%) [ +0.00% +0.39% +0.46% / +0.46% +1.70% +1.64%] index_copy_ spread : Elapsed 0.015 ms (1.525 ms / 100) 1.579 -> 1.586 ( +0.44%) [ +0.00% +0.06% +0.44% / +0.44% +2.09% +2.15%] index_add_ strided 3 : Elapsed 0.016 ms (1.579 ms / 100) 1.527 -> 1.538 ( +0.72%) [ +0.00% +0.20% +0.79% / +0.72% +2.88% +2.75%] index_copy_ strided 3 : Elapsed 0.015 ms (1.527 ms / 100) 1.587 -> 1.594 ( +0.44%) [ +0.00% +0.13% +0.76% / +0.44% +1.01% +1.07%] index_add_ strided 7 : Elapsed 0.016 ms (1.587 ms / 100) 1.535 -> 1.545 ( +0.65%) [ +0.20% +0.00% +0.46% / +0.65% +0.78% +1.24%] index_copy_ strided 7 : Elapsed 0.015 ms (1.538 ms / 100) 1.559 -> 1.584 ( +1.60%) [ +0.00% +0.51% +1.41% / +1.60% +3.66% +3.66%] index_add_ perm : Elapsed 0.016 ms (1.559 ms / 100) 1.517 -> 1.531 ( +0.92%) [ +0.07% +0.00% +0.73% / +0.92% +3.63% +3.30%] index_copy_ perm : Elapsed 0.015 ms (1.518 ms / 100) 1.569 -> 1.583 ( +0.89%) [ +0.25% +0.00% +0.83% / +0.89% +3.12% +3.06%] index_add_ perm_sorted : Elapsed 0.016 ms (1.573 ms / 100) 1.519 -> 1.532 ( +0.86%) [ +0.07% +0.00% +0.79% / +0.86% +3.36% +3.23%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.520 ms / 100) 7.574 -> 7.572 ( -0.03%) [ +0.37% +0.00% +0.13% / -0.03% +0.26% +0.11%] index_select const : Elapsed 0.076 ms (7.602 ms / 100) 7.615 -> 7.617 ( +0.03%) [ +0.14% +0.00% +0.03% / +0.03% +0.20% +0.18%] index_select wrap : Elapsed 0.076 ms (7.626 ms / 100) 7.614 -> 7.607 ( -0.09%) [ +0.07% +0.00% +0.12% / -0.09% +0.37% +0.18%] index_select linear : Elapsed 0.076 ms (7.619 ms / 100) 7.590 -> 7.602 ( +0.16%) [ +0.18% +0.18% +0.00% / +0.16% +0.30% +0.63%] index_select reverse : Elapsed 0.076 ms (7.604 ms / 100) 7.571 -> 7.567 ( -0.05%) [ +0.01% +0.24% +0.00% / +0.08% -0.05% +0.50%] index_select skip64 : Elapsed 0.076 ms (7.572 ms / 100) 7.569 -> 7.569 ( +0.00%) [ +0.00% +0.12% +0.09% / +0.00% +0.24% +0.42%] index_select skip256 : Elapsed 0.076 ms (7.569 ms / 100) 7.610 -> 7.629 ( +0.25%) [ +0.00% +0.13% +0.18% / +0.25% +0.26% +0.33%] index_select spread : Elapsed 0.076 ms (7.610 ms / 100) 7.616 -> 7.616 ( +0.00%) [ +0.21% +0.12% +0.00% / +0.00% +0.39% +0.05%] index_select strided 3 : Elapsed 0.076 ms (7.632 ms / 100) 7.600 -> 7.613 ( +0.17%) [ +0.16% +0.26% +0.00% / +0.17% +0.59% +0.54%] index_select random : Elapsed 0.076 ms (7.612 ms / 100) 7.610 -> 7.625 ( +0.20%) [ +0.16% +0.18% +0.00% / +0.20% +0.42% +0.33%] index_select random_sorted : Elapsed 0.076 ms (7.622 ms / 100) out_shape = [20, 4, 16, 40] in_shape = [5, 4, 16, 40] idx_dim = 0 B = [20, 4, 16, 40] (stride (40, 12800, 800, 1)) A = [5, 4, 16, 40] (stride (1, 3200, 200, 5)) dim = 0 1.518 -> 1.520 ( +0.13%) [ +0.00% +0.00% +0.26% / +0.13% +1.12% +0.99%] index_add_ linear : Elapsed 0.015 ms (1.518 ms / 100) 1.473 -> 1.475 ( +0.14%) [ +0.00% +0.14% +0.07% / +0.14% +1.09% +1.22%] index_copy_ linear : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.00% +0.00% +0.20% / +0.00% +0.92% +0.99%] index_add_ reverse : Elapsed 0.015 ms (1.518 ms / 100) 1.473 -> 1.475 ( +0.14%) [ +0.00% +0.07% +0.07% / +0.14% +1.09% +1.09%] index_copy_ reverse : Elapsed 0.015 ms (1.473 ms / 100) 1.536 -> 1.535 ( -0.07%) [ +0.07% +0.13% +0.00% / -0.07% +0.65% +0.59%] index_add_ spread : Elapsed 0.015 ms (1.537 ms / 100) 1.494 -> 1.496 ( +0.13%) [ +0.07% +0.00% +0.20% / +0.13% +0.74% +0.94%] index_copy_ spread : Elapsed 0.015 ms (1.495 ms / 100) 1.536 -> 1.537 ( +0.07%) [ +0.00% +0.20% +0.07% / +0.07% +0.72% +1.04%] index_add_ strided 3 : Elapsed 0.015 ms (1.536 ms / 100) 1.491 -> 1.505 ( +0.94%) [ +0.00% +0.20% +0.13% / +0.94% +1.14% +1.07%] index_copy_ strided 3 : Elapsed 0.015 ms (1.491 ms / 100) 1.526 -> 1.529 ( +0.20%) [ +0.00% +0.13% +0.07% / +0.20% +1.18% +0.98%] index_add_ strided 7 : Elapsed 0.015 ms (1.526 ms / 100) 1.483 -> 1.483 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +1.01% +1.15%] index_copy_ strided 7 : Elapsed 0.015 ms (1.483 ms / 100) 1.534 -> 1.537 ( +0.20%) [ +0.46% +0.00% +0.07% / +0.20% +0.72% +0.46%] index_add_ perm : Elapsed 0.015 ms (1.541 ms / 100) 1.490 -> 1.494 ( +0.27%) [ +0.20% +0.00% +0.40% / +0.27% +1.14% +0.87%] index_copy_ perm : Elapsed 0.015 ms (1.493 ms / 100) 1.535 -> 1.536 ( +0.07%) [ +0.00% +0.00% +0.13% / +0.07% +0.78% +0.72%] index_add_ perm_sorted : Elapsed 0.015 ms (1.535 ms / 100) 1.492 -> 1.494 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.74% +0.94%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.492 ms / 100) 7.570 -> 7.601 ( +0.41%) [ +0.00% +0.33% +0.30% / +0.41% +0.48% +0.54%] index_select const : Elapsed 0.076 ms (7.570 ms / 100) 7.572 -> 7.574 ( +0.03%) [ +0.12% +0.20% +0.00% / +0.03% +0.05% +0.28%] index_select wrap : Elapsed 0.076 ms (7.581 ms / 100) 7.570 -> 7.581 ( +0.15%) [ +0.04% +0.01% +0.00% / +0.15% +0.28% +0.32%] index_select linear : Elapsed 0.076 ms (7.573 ms / 100) 7.565 -> 7.562 ( -0.04%) [ +0.00% +0.26% +0.21% / -0.04% +0.46% +0.29%] index_select reverse : Elapsed 0.076 ms (7.565 ms / 100) 7.570 -> 7.588 ( +0.24%) [ +0.24% +0.00% +0.08% / +0.24% +0.38% +0.32%] index_select skip64 : Elapsed 0.076 ms (7.588 ms / 100) 7.566 -> 7.578 ( +0.16%) [ +0.15% +0.00% +0.26% / +0.16% +0.34% +0.26%] index_select skip256 : Elapsed 0.076 ms (7.577 ms / 100) 7.567 -> 7.581 ( +0.19%) [ +0.00% +0.15% +0.19% / +0.19% +0.52% +0.38%] index_select spread : Elapsed 0.076 ms (7.567 ms / 100) 7.576 -> 7.583 ( +0.09%) [ +0.09% +0.01% +0.00% / +0.09% +0.18% +0.12%] index_select strided 3 : Elapsed 0.076 ms (7.583 ms / 100) 7.571 -> 7.574 ( +0.04%) [ +0.00% +0.16% +0.05% / +0.04% +0.29% +0.24%] index_select random : Elapsed 0.076 ms (7.571 ms / 100) 7.575 -> 7.578 ( +0.04%) [ +0.04% +0.01% +0.00% / +0.04% +0.22% +0.42%] index_select random_sorted : Elapsed 0.076 ms (7.578 ms / 100) B = [20, 4, 16, 40] (stride (160, 1, 3200, 4)) A = [5, 4, 16, 40] (stride (2560, 1, 4, 64)) dim = 0 1.918 -> 1.923 ( +0.26%) [ +0.16% +0.10% +0.00% / +0.26% +1.25% +1.20%] index_add_ linear : Elapsed 0.019 ms (1.921 ms / 100) 1.865 -> 1.867 ( +0.11%) [ +0.16% +0.00% +0.00% / +0.11% +1.13% +1.18%] index_copy_ linear : Elapsed 0.019 ms (1.868 ms / 100) 1.926 -> 1.926 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +1.30% +1.19%] index_add_ reverse : Elapsed 0.019 ms (1.927 ms / 100) 1.872 -> 1.873 ( +0.05%) [ +0.16% +0.00% +0.00% / +0.05% +1.34% +1.23%] index_copy_ reverse : Elapsed 0.019 ms (1.875 ms / 100) 1.930 -> 1.931 ( +0.05%) [ +0.36% +0.10% +0.00% / +0.05% +0.73% +0.88%] index_add_ spread : Elapsed 0.019 ms (1.937 ms / 100) 1.871 -> 1.875 ( +0.21%) [ +0.16% +0.00% +0.37% / +0.21% +1.02% +0.96%] index_copy_ spread : Elapsed 0.019 ms (1.874 ms / 100) 1.919 -> 1.920 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.73% +0.78%] index_add_ strided 3 : Elapsed 0.019 ms (1.920 ms / 100) 1.864 -> 1.867 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +0.75% +0.80%] index_copy_ strided 3 : Elapsed 0.019 ms (1.864 ms / 100) 1.932 -> 1.935 ( +0.16%) [ +0.10% +0.26% +0.00% / +0.16% +0.47% +0.62%] index_add_ strided 7 : Elapsed 0.019 ms (1.934 ms / 100) 1.882 -> 1.883 ( +0.05%) [ +0.16% +0.05% +0.00% / +0.05% +0.58% +0.43%] index_copy_ strided 7 : Elapsed 0.019 ms (1.885 ms / 100) 1.930 -> 1.929 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.47% +0.52%] index_add_ perm : Elapsed 0.019 ms (1.932 ms / 100) 1.876 -> 1.878 ( +0.11%) [ +0.16% +0.00% +0.00% / +0.11% +0.85% +0.69%] index_copy_ perm : Elapsed 0.019 ms (1.879 ms / 100) 1.917 -> 1.919 ( +0.10%) [ +0.00% +0.16% +0.16% / +0.10% +0.63% +0.83%] index_add_ perm_sorted : Elapsed 0.019 ms (1.917 ms / 100) 1.862 -> 1.865 ( +0.16%) [ +0.21% +0.27% +0.00% / +0.16% +0.70% +0.75%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.866 ms / 100) 8.521 -> 8.506 ( -0.18%) [ +0.12% +0.11% +0.00% / -0.18% +0.45% +0.50%] index_select const : Elapsed 0.085 ms (8.531 ms / 100) 8.543 -> 8.551 ( +0.09%) [ +0.09% +0.15% +0.00% / +0.09% +0.30% +0.50%] index_select wrap : Elapsed 0.086 ms (8.551 ms / 100) 8.550 -> 8.560 ( +0.12%) [ +0.02% +0.30% +0.00% / +0.12% +0.29% +0.16%] index_select linear : Elapsed 0.086 ms (8.552 ms / 100) 8.538 -> 8.543 ( +0.06%) [ +0.30% +0.49% +0.00% / +0.06% +0.33% +0.15%] index_select reverse : Elapsed 0.086 ms (8.564 ms / 100) 8.523 -> 8.519 ( -0.05%) [ +0.19% +0.00% +0.04% / -0.05% +0.21% +0.39%] index_select skip64 : Elapsed 0.085 ms (8.539 ms / 100) 8.521 -> 8.540 ( +0.22%) [ +0.16% +0.00% +0.08% / +0.22% +0.42% +0.25%] index_select skip256 : Elapsed 0.085 ms (8.535 ms / 100) 8.550 -> 8.549 ( -0.01%) [ +0.13% +0.04% +0.00% / -0.01% +0.39% +0.30%] index_select spread : Elapsed 0.086 ms (8.561 ms / 100) 8.549 -> 8.544 ( -0.06%) [ +0.42% +0.00% +0.07% / -0.06% +0.39% +0.18%] index_select strided 3 : Elapsed 0.086 ms (8.585 ms / 100) 8.543 -> 8.559 ( +0.19%) [ +0.01% +0.00% +0.15% / +0.19% +0.32% +0.41%] index_select random : Elapsed 0.085 ms (8.544 ms / 100) 8.549 -> 8.544 ( -0.06%) [ +0.00% +0.15% +0.19% / -0.06% +0.74% +0.35%] index_select random_sorted : Elapsed 0.085 ms (8.549 ms / 100) B = [20, 4, 16, 40] (stride (1, 20, 80, 1280)) A = [5, 4, 16, 40] (stride (2560, 1, 160, 4)) dim = 0 1.797 -> 1.796 ( -0.06%) [ +0.00% +0.28% +0.17% / +0.17% -0.06% +0.00%] index_add_ linear : Elapsed 0.018 ms (1.797 ms / 100) 1.760 -> 1.758 ( -0.11%) [ +0.23% +0.00% +0.06% / +0.06% -0.11% +0.06%] index_copy_ linear : Elapsed 0.018 ms (1.764 ms / 100) 1.796 -> 1.791 ( -0.28%) [ +0.28% +0.00% +0.06% / +0.22% -0.11% -0.28%] index_add_ reverse : Elapsed 0.018 ms (1.801 ms / 100) 1.765 -> 1.759 ( -0.34%) [ +0.11% +0.40% +0.00% / +0.11% -0.34% -0.28%] index_copy_ reverse : Elapsed 0.018 ms (1.767 ms / 100) 1.815 -> 1.807 ( -0.44%) [ +0.11% +0.00% +0.17% / -0.17% -0.44% -0.11%] index_add_ spread : Elapsed 0.018 ms (1.817 ms / 100) 1.786 -> 1.783 ( -0.17%) [ +0.39% +0.39% +0.00% / +0.39% -0.17% +0.39%] index_copy_ spread : Elapsed 0.018 ms (1.793 ms / 100) 1.819 -> 1.811 ( -0.44%) [ +0.00% +0.00% +0.05% / -0.22% -0.44% -0.27%] index_add_ strided 3 : Elapsed 0.018 ms (1.819 ms / 100) 1.794 -> 1.782 ( -0.67%) [ +0.06% +0.06% +0.00% / -0.22% -0.67% -0.28%] index_copy_ strided 3 : Elapsed 0.018 ms (1.795 ms / 100) 1.816 -> 1.809 ( -0.39%) [ +0.06% +0.33% +0.00% / +0.44% -0.39% -0.33%] index_add_ strided 7 : Elapsed 0.018 ms (1.817 ms / 100) 1.787 -> 1.784 ( -0.17%) [ +0.45% +0.00% +0.34% / +0.34% -0.17% +0.00%] index_copy_ strided 7 : Elapsed 0.018 ms (1.795 ms / 100) 1.808 -> 1.806 ( -0.11%) [ +0.22% +0.39% +0.00% / -0.06% -0.11% -0.06%] index_add_ perm : Elapsed 0.018 ms (1.812 ms / 100) 1.781 -> 1.773 ( -0.45%) [ +0.17% +0.00% +0.00% / +0.06% -0.45% -0.28%] index_copy_ perm : Elapsed 0.018 ms (1.784 ms / 100) 1.809 -> 1.799 ( -0.55%) [ +0.11% +0.00% +0.17% / +0.06% -0.28% -0.55%] index_add_ perm_sorted : Elapsed 0.018 ms (1.811 ms / 100) 1.781 -> 1.773 ( -0.45%) [ +0.06% +0.06% +0.00% / -0.17% -0.45% -0.28%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.782 ms / 100) 8.242 -> 8.264 ( +0.27%) [ +0.32% +0.08% +0.00% / +0.27% +0.46% +0.35%] index_select const : Elapsed 0.083 ms (8.268 ms / 100) 8.293 -> 8.293 ( +0.00%) [ +0.00% +0.10% +0.01% / +0.00% +0.53% +0.22%] index_select wrap : Elapsed 0.083 ms (8.293 ms / 100) 8.287 -> 8.297 ( +0.12%) [ +0.05% +0.12% +0.00% / +0.13% +0.25% +0.12%] index_select linear : Elapsed 0.083 ms (8.291 ms / 100) 8.275 -> 8.272 ( -0.04%) [ +0.00% +0.08% +0.02% / -0.04% +0.19% +0.12%] index_select reverse : Elapsed 0.083 ms (8.275 ms / 100) 8.253 -> 8.266 ( +0.16%) [ +0.13% +0.01% +0.00% / +0.16% +0.44% +0.52%] index_select skip64 : Elapsed 0.083 ms (8.264 ms / 100) 8.260 -> 8.267 ( +0.08%) [ +0.06% +0.01% +0.00% / +0.08% +0.46% +0.15%] index_select skip256 : Elapsed 0.083 ms (8.265 ms / 100) 8.278 -> 8.302 ( +0.29%) [ +0.00% +0.13% +0.23% / +0.29% +0.35% +0.57%] index_select spread : Elapsed 0.083 ms (8.278 ms / 100) 8.300 -> 8.297 ( -0.04%) [ +0.11% +0.22% +0.00% / -0.04% +0.43% +0.20%] index_select strided 3 : Elapsed 0.083 ms (8.309 ms / 100) 8.283 -> 8.281 ( -0.02%) [ +0.00% +0.35% +0.00% / -0.02% +0.43% +0.43%] index_select random : Elapsed 0.083 ms (8.283 ms / 100) 8.277 -> 8.278 ( +0.01%) [ +0.00% +0.02% +0.10% / +0.01% +0.45% +0.37%] index_select random_sorted : Elapsed 0.083 ms (8.277 ms / 100) out_shape = [5, 20, 16, 40] in_shape = [5, 4, 16, 40] idx_dim = 1 B = [5, 20, 16, 40] (stride (12800, 640, 1, 16)) A = [5, 4, 16, 40] (stride (1, 3200, 5, 80)) dim = 1 2.224 -> 2.225 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.49% +0.49%] index_add_ linear : Elapsed 0.022 ms (2.225 ms / 100) 2.160 -> 2.164 ( +0.19%) [ +0.09% +0.19% +0.00% / +0.19% +0.51% +0.60%] index_copy_ linear : Elapsed 0.022 ms (2.162 ms / 100) 2.219 -> 2.221 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.59% +0.63%] index_add_ reverse : Elapsed 0.022 ms (2.221 ms / 100) 2.158 -> 2.161 ( +0.14%) [ +0.32% +0.05% +0.00% / +0.14% +0.46% +0.56%] index_copy_ reverse : Elapsed 0.022 ms (2.165 ms / 100) 2.222 -> 2.221 ( -0.05%) [ +0.05% +0.14% +0.00% / -0.05% +0.45% +0.63%] index_add_ spread : Elapsed 0.022 ms (2.223 ms / 100) 2.159 -> 2.160 ( +0.05%) [ +0.09% +0.00% +0.09% / +0.05% +0.19% +0.37%] index_copy_ spread : Elapsed 0.022 ms (2.161 ms / 100) 2.218 -> 2.222 ( +0.18%) [ +0.14% +0.00% +0.09% / +0.18% +0.68% +0.72%] index_add_ strided 3 : Elapsed 0.022 ms (2.221 ms / 100) 2.159 -> 2.161 ( +0.09%) [ +0.19% +0.46% +0.00% / +0.09% +0.65% +0.83%] index_copy_ strided 3 : Elapsed 0.022 ms (2.163 ms / 100) 2.223 -> 2.222 ( -0.04%) [ +0.09% +0.00% +0.00% / -0.04% +0.54% +0.49%] index_add_ strided 7 : Elapsed 0.022 ms (2.225 ms / 100) 2.160 -> 2.161 ( +0.05%) [ +0.00% +0.19% +0.05% / +0.05% +0.60% +0.46%] index_copy_ strided 7 : Elapsed 0.022 ms (2.160 ms / 100) 2.220 -> 2.222 ( +0.09%) [ +0.18% +0.23% +0.00% / +0.09% +0.54% +0.50%] index_add_ perm : Elapsed 0.022 ms (2.224 ms / 100) 2.158 -> 2.158 ( +0.00%) [ +0.14% +0.28% +0.00% / +0.00% +0.60% +0.56%] index_copy_ perm : Elapsed 0.022 ms (2.161 ms / 100) 2.221 -> 2.226 ( +0.23%) [ +0.00% +0.14% +0.05% / +0.23% +0.50% +0.54%] index_add_ perm_sorted : Elapsed 0.022 ms (2.221 ms / 100) 2.158 -> 2.164 ( +0.28%) [ +0.19% +0.00% +0.14% / +0.28% +0.46% +0.70%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.162 ms / 100) 9.171 -> 9.148 ( -0.25%) [ +0.00% +0.11% +0.10% / -0.25% -0.01% +0.07%] index_select const : Elapsed 0.092 ms (9.171 ms / 100) 9.227 -> 9.210 ( -0.18%) [ +0.12% +0.03% +0.00% / -0.13% -0.18% -0.03%] index_select wrap : Elapsed 0.092 ms (9.238 ms / 100) 9.203 -> 9.199 ( -0.04%) [ +0.17% +0.00% +0.02% / -0.04% +0.04% +0.24%] index_select linear : Elapsed 0.092 ms (9.219 ms / 100) 9.195 -> 9.211 ( +0.17%) [ +0.01% +0.10% +0.00% / +0.26% +0.17% +0.41%] index_select reverse : Elapsed 0.092 ms (9.196 ms / 100) 9.160 -> 9.171 ( +0.12%) [ +0.00% +0.31% +0.10% / +0.31% +0.12% +0.19%] index_select skip64 : Elapsed 0.092 ms (9.160 ms / 100) 9.164 -> 9.168 ( +0.04%) [ +0.02% +0.02% +0.00% / +0.10% +0.04% +0.14%] index_select skip256 : Elapsed 0.092 ms (9.166 ms / 100) 9.216 -> 9.206 ( -0.11%) [ +0.27% +0.28% +0.00% / -0.09% +0.02% -0.11%] index_select spread : Elapsed 0.092 ms (9.241 ms / 100) 9.210 -> 9.214 ( +0.04%) [ +0.14% +0.21% +0.00% / +0.13% +0.04% +0.17%] index_select strided 3 : Elapsed 0.092 ms (9.223 ms / 100) 9.207 -> 9.214 ( +0.08%) [ +0.11% +0.00% +0.48% / +0.10% +0.20% +0.08%] index_select random : Elapsed 0.092 ms (9.217 ms / 100) 9.197 -> 9.207 ( +0.11%) [ +0.33% +0.16% +0.00% / +0.24% +0.11% +0.14%] index_select random_sorted : Elapsed 0.092 ms (9.227 ms / 100) B = [5, 20, 16, 40] (stride (12800, 1, 800, 20)) A = [5, 4, 16, 40] (stride (40, 200, 800, 1)) dim = 1 2.010 -> 2.012 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +0.30% +0.15%] index_add_ linear : Elapsed 0.020 ms (2.012 ms / 100) 1.983 -> 1.982 ( -0.05%) [ +0.05% +0.00% +0.10% / -0.05% +0.10% +0.15%] index_copy_ linear : Elapsed 0.020 ms (1.984 ms / 100) 2.011 -> 2.007 ( -0.20%) [ +0.05% +0.15% +0.00% / -0.20% +0.05% +0.20%] index_add_ reverse : Elapsed 0.020 ms (2.012 ms / 100) 1.980 -> 1.982 ( +0.10%) [ +0.00% +0.20% +0.00% / +0.25% +0.20% +0.10%] index_copy_ reverse : Elapsed 0.020 ms (1.980 ms / 100) 2.054 -> 2.056 ( +0.10%) [ +0.15% +0.39% +0.00% / +0.10% +0.49% +0.29%] index_add_ spread : Elapsed 0.021 ms (2.057 ms / 100) 2.078 -> 2.077 ( -0.05%) [ +0.10% +0.00% +0.29% / -0.05% +0.00% +0.14%] index_copy_ spread : Elapsed 0.021 ms (2.080 ms / 100) 2.052 -> 2.048 ( -0.19%) [ +0.10% +0.10% +0.00% / -0.19% -0.15% +0.10%] index_add_ strided 3 : Elapsed 0.021 ms (2.054 ms / 100) 2.049 -> 2.052 ( +0.15%) [ +0.24% +0.00% +0.00% / +0.24% +0.15% +0.15%] index_copy_ strided 3 : Elapsed 0.021 ms (2.054 ms / 100) 2.054 -> 2.057 ( +0.15%) [ +0.05% +0.29% +0.00% / +0.24% +0.15% +0.15%] index_add_ strided 7 : Elapsed 0.021 ms (2.055 ms / 100) 2.084 -> 2.081 ( -0.14%) [ +0.00% +0.10% +0.14% / -0.14% +0.00% +0.00%] index_copy_ strided 7 : Elapsed 0.021 ms (2.084 ms / 100) 2.060 -> 2.056 ( -0.19%) [ +0.00% +0.05% +0.10% / -0.19% +0.29% +0.24%] index_add_ perm : Elapsed 0.021 ms (2.060 ms / 100) 2.083 -> 2.081 ( -0.10%) [ +0.14% +0.19% +0.00% / -0.10% +0.00% +0.00%] index_copy_ perm : Elapsed 0.021 ms (2.086 ms / 100) 2.057 -> 2.058 ( +0.05%) [ +0.00% +0.39% +0.24% / +0.05% +0.29% +0.29%] index_add_ perm_sorted : Elapsed 0.021 ms (2.057 ms / 100) 2.084 -> 2.080 ( -0.19%) [ +0.14% +0.24% +0.00% / +0.24% -0.19% +0.00%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.087 ms / 100) 8.771 -> 8.777 ( +0.07%) [ +0.00% +0.14% +0.03% / +0.07% +0.36% +0.22%] index_select const : Elapsed 0.088 ms (8.771 ms / 100) 8.843 -> 8.872 ( +0.33%) [ +0.19% +0.11% +0.00% / +0.33% +0.44% +0.38%] index_select wrap : Elapsed 0.089 ms (8.860 ms / 100) 8.823 -> 8.819 ( -0.05%) [ +0.11% +0.08% +0.00% / -0.05% +0.12% +0.00%] index_select linear : Elapsed 0.088 ms (8.833 ms / 100) 8.823 -> 8.830 ( +0.08%) [ +0.00% +0.24% +0.18% / +0.25% +0.08% +0.14%] index_select reverse : Elapsed 0.088 ms (8.823 ms / 100) 8.778 -> 8.779 ( +0.01%) [ +0.10% +0.02% +0.00% / +0.01% +0.14% +0.24%] index_select skip64 : Elapsed 0.088 ms (8.787 ms / 100) 8.771 -> 8.784 ( +0.15%) [ +0.23% +0.10% +0.00% / +0.15% +0.60% +0.31%] index_select skip256 : Elapsed 0.088 ms (8.791 ms / 100) 8.836 -> 8.836 ( +0.00%) [ +0.07% +0.00% +0.06% / +0.00% +0.02% +0.36%] index_select spread : Elapsed 0.088 ms (8.842 ms / 100) 8.853 -> 8.860 ( +0.08%) [ +0.00% +0.23% +0.27% / +0.08% +0.28% +0.24%] index_select strided 3 : Elapsed 0.089 ms (8.853 ms / 100) 8.861 -> 8.877 ( +0.18%) [ +0.16% +0.01% +0.00% / +0.18% +0.20% +0.25%] index_select random : Elapsed 0.089 ms (8.875 ms / 100) 8.836 -> 8.831 ( -0.06%) [ +0.00% +0.24% +0.03% / -0.06% +0.01% +0.23%] index_select random_sorted : Elapsed 0.088 ms (8.836 ms / 100) B = [5, 20, 16, 40] (stride (640, 3200, 40, 1)) A = [5, 4, 16, 40] (stride (40, 200, 800, 1)) dim = 1 1.986 -> 1.991 ( +0.25%) [ +0.20% +0.20% +0.00% / +0.25% +0.40% +0.50%] index_add_ linear : Elapsed 0.020 ms (1.990 ms / 100) 1.951 -> 1.954 ( +0.15%) [ +0.00% +0.21% +0.00% / +0.15% +0.51% +0.41%] index_copy_ linear : Elapsed 0.020 ms (1.951 ms / 100) 1.987 -> 1.981 ( -0.30%) [ +0.00% +0.05% +0.00% / +0.15% +0.10% -0.30%] index_add_ reverse : Elapsed 0.020 ms (1.987 ms / 100) 1.951 -> 1.949 ( -0.10%) [ +0.00% +0.05% +0.00% / -0.10% +0.36% +0.21%] index_copy_ reverse : Elapsed 0.020 ms (1.951 ms / 100) 1.990 -> 1.979 ( -0.55%) [ +0.20% +0.20% +0.00% / -0.05% -0.55% +0.00%] index_add_ spread : Elapsed 0.020 ms (1.994 ms / 100) 1.959 -> 1.951 ( -0.41%) [ +0.10% +0.00% +0.00% / +0.00% -0.41% -0.05%] index_copy_ spread : Elapsed 0.020 ms (1.961 ms / 100) 1.992 -> 1.992 ( +0.00%) [ +0.25% +0.00% +0.00% / +0.00% +0.15% +0.40%] index_add_ strided 3 : Elapsed 0.020 ms (1.997 ms / 100) 1.951 -> 1.955 ( +0.21%) [ +0.05% +0.00% +0.10% / +0.21% +0.41% +0.97%] index_copy_ strided 3 : Elapsed 0.020 ms (1.952 ms / 100) 1.986 -> 1.983 ( -0.15%) [ +0.05% +0.05% +0.00% / -0.10% -0.05% -0.15%] index_add_ strided 7 : Elapsed 0.020 ms (1.987 ms / 100) 1.944 -> 1.945 ( +0.05%) [ +0.15% +0.15% +0.00% / +0.05% +0.51% +0.93%] index_copy_ strided 7 : Elapsed 0.019 ms (1.947 ms / 100) 1.982 -> 1.988 ( +0.30%) [ +0.00% +0.30% +0.10% / +0.30% +0.50% +0.55%] index_add_ perm : Elapsed 0.020 ms (1.982 ms / 100) 1.951 -> 1.954 ( +0.15%) [ +0.10% +0.00% +0.05% / +0.15% +0.36% +0.46%] index_copy_ perm : Elapsed 0.020 ms (1.953 ms / 100) 1.979 -> 1.974 ( -0.25%) [ +0.10% +0.00% +0.00% / +0.10% +0.05% -0.25%] index_add_ perm_sorted : Elapsed 0.020 ms (1.981 ms / 100) 1.945 -> 1.942 ( -0.15%) [ +0.26% +0.00% +0.36% / +0.10% +0.57% -0.15%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.950 ms / 100) 8.672 -> 8.669 ( -0.03%) [ +0.10% +0.00% +0.08% / +0.22% -0.03% +0.32%] index_select const : Elapsed 0.087 ms (8.681 ms / 100) 8.738 -> 8.744 ( +0.07%) [ +0.00% +0.22% +0.19% / +0.13% +0.09% +0.07%] index_select wrap : Elapsed 0.087 ms (8.738 ms / 100) 8.713 -> 8.703 ( -0.11%) [ +0.00% +0.13% +0.15% / +0.07% -0.11% +0.09%] index_select linear : Elapsed 0.087 ms (8.713 ms / 100) 8.719 -> 8.716 ( -0.03%) [ +0.02% +0.16% +0.00% / +0.22% -0.03% +0.10%] index_select reverse : Elapsed 0.087 ms (8.721 ms / 100) 8.672 -> 8.681 ( +0.10%) [ +0.00% +0.28% +0.24% / +0.13% +0.27% +0.10%] index_select skip64 : Elapsed 0.087 ms (8.672 ms / 100) 8.673 -> 8.665 ( -0.09%) [ +0.18% +0.07% +0.00% / +0.17% -0.09% +0.03%] index_select skip256 : Elapsed 0.087 ms (8.689 ms / 100) 8.714 -> 8.697 ( -0.20%) [ +0.08% +0.00% +0.10% / -0.20% -0.14% +0.08%] index_select spread : Elapsed 0.087 ms (8.721 ms / 100) 8.737 -> 8.729 ( -0.09%) [ +0.22% +0.00% +0.10% / -0.09% +0.01% +0.18%] index_select strided 3 : Elapsed 0.088 ms (8.756 ms / 100) 8.719 -> 8.722 ( +0.03%) [ +0.00% +0.05% +0.37% / +0.08% +0.03% +0.05%] index_select random : Elapsed 0.087 ms (8.719 ms / 100) 8.717 -> 8.708 ( -0.10%) [ +0.00% +0.11% +0.25% / +0.11% -0.10% +0.05%] index_select random_sorted : Elapsed 0.087 ms (8.717 ms / 100) B = [5, 20, 16, 40] (stride (1, 3200, 200, 5)) A = [5, 4, 16, 40] (stride (2560, 40, 160, 1)) dim = 1 1.979 -> 1.981 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +1.16% +1.06%] index_add_ linear : Elapsed 0.020 ms (1.980 ms / 100) 1.935 -> 1.935 ( +0.00%) [ +0.21% +0.00% +0.16% / +0.00% +1.60% +1.65%] index_copy_ linear : Elapsed 0.019 ms (1.939 ms / 100) 1.974 -> 1.979 ( +0.25%) [ +0.25% +0.00% +0.15% / +0.25% +1.27% +1.42%] index_add_ reverse : Elapsed 0.020 ms (1.979 ms / 100) 1.932 -> 1.930 ( -0.10%) [ +0.00% +0.00% +0.16% / -0.10% +1.76% +1.81%] index_copy_ reverse : Elapsed 0.019 ms (1.932 ms / 100) 1.981 -> 1.981 ( +0.00%) [ +0.35% +0.00% +0.15% / +0.00% +1.06% +0.91%] index_add_ spread : Elapsed 0.020 ms (1.988 ms / 100) 1.939 -> 1.944 ( +0.26%) [ +0.00% +0.00% +0.10% / +0.26% +1.39% +1.55%] index_copy_ spread : Elapsed 0.019 ms (1.939 ms / 100) 1.984 -> 1.988 ( +0.20%) [ +0.00% +0.00% +0.05% / +0.20% +0.91% +0.91%] index_add_ strided 3 : Elapsed 0.020 ms (1.984 ms / 100) 1.939 -> 1.940 ( +0.05%) [ +0.05% +0.31% +0.00% / +0.05% +1.50% +1.19%] index_copy_ strided 3 : Elapsed 0.019 ms (1.940 ms / 100) 1.989 -> 1.983 ( -0.30%) [ +0.00% +0.10% +0.20% / -0.30% +0.60% +0.70%] index_add_ strided 7 : Elapsed 0.020 ms (1.989 ms / 100) 1.947 -> 1.940 ( -0.36%) [ +0.10% +0.05% +0.00% / -0.36% +0.92% +1.13%] index_copy_ strided 7 : Elapsed 0.019 ms (1.949 ms / 100) 1.977 -> 1.983 ( +0.30%) [ +0.10% +0.00% +0.00% / +0.30% +0.56% +0.71%] index_add_ perm : Elapsed 0.020 ms (1.979 ms / 100) 1.939 -> 1.940 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.98% +1.08%] index_copy_ perm : Elapsed 0.019 ms (1.939 ms / 100) 1.978 -> 1.978 ( +0.00%) [ +0.35% +0.20% +0.00% / +0.00% +0.96% +0.96%] index_add_ perm_sorted : Elapsed 0.020 ms (1.985 ms / 100) 1.935 -> 1.938 ( +0.16%) [ +0.21% +0.16% +0.00% / +0.16% +1.29% +1.60%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.939 ms / 100) 8.696 -> 8.701 ( +0.06%) [ +0.15% +0.00% +0.17% / +0.21% +0.23% +0.06%] index_select const : Elapsed 0.087 ms (8.709 ms / 100) 8.768 -> 8.771 ( +0.03%) [ +0.18% +0.00% +0.13% / +0.03% +0.08% +0.09%] index_select wrap : Elapsed 0.088 ms (8.784 ms / 100) 8.731 -> 8.734 ( +0.03%) [ +0.06% +0.00% +0.11% / +0.13% +0.03% +0.44%] index_select linear : Elapsed 0.087 ms (8.736 ms / 100) 8.740 -> 8.740 ( +0.00%) [ +0.08% +0.00% +0.23% / +0.00% +0.06% +0.37%] index_select reverse : Elapsed 0.087 ms (8.747 ms / 100) 8.700 -> 8.700 ( +0.00%) [ +0.00% +0.07% +0.10% / +0.00% +0.15% +0.05%] index_select skip64 : Elapsed 0.087 ms (8.700 ms / 100) 8.692 -> 8.693 ( +0.01%) [ +0.00% +0.16% +0.13% / +0.07% +0.15% +0.01%] index_select skip256 : Elapsed 0.087 ms (8.692 ms / 100) 8.748 -> 8.739 ( -0.10%) [ +0.00% +0.16% +0.05% / +0.15% +0.11% -0.10%] index_select spread : Elapsed 0.087 ms (8.748 ms / 100) 8.764 -> 8.767 ( +0.03%) [ +0.02% +0.00% +0.19% / +0.15% +0.03% +0.22%] index_select strided 3 : Elapsed 0.088 ms (8.766 ms / 100) 8.766 -> 8.759 ( -0.08%) [ +0.00% +0.00% +0.18% / +0.03% +0.11% -0.08%] index_select random : Elapsed 0.088 ms (8.766 ms / 100) 8.753 -> 8.742 ( -0.13%) [ +0.15% +0.00% +0.15% / +0.06% +0.21% -0.13%] index_select random_sorted : Elapsed 0.088 ms (8.766 ms / 100) B = [5, 20, 16, 40] (stride (1, 200, 4000, 5)) dim = 1 fill_cnt = 4 0.974 -> 0.973 ( -0.10%) [ +0.00% +0.10% +0.00% / -0.10% +0.00% +0.41%] index_fill_ const : Elapsed 0.010 ms (0.974 ms / 100) 1.001 -> 1.005 ( +0.40%) [ +0.10% +0.00% +0.00% / +0.90% +0.40% +0.90%] index_fill_ linear : Elapsed 0.010 ms (1.002 ms / 100) 1.001 -> 1.002 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.90% +1.10%] index_fill_ reverse : Elapsed 0.010 ms (1.001 ms / 100) 0.973 -> 0.972 ( -0.10%) [ +0.31% +0.31% +0.00% / -0.10% +0.10% +0.21%] index_fill_ skip64 : Elapsed 0.010 ms (0.976 ms / 100) 0.975 -> 0.974 ( -0.10%) [ +0.21% +0.00% +0.10% / +0.10% -0.10% +0.00%] index_fill_ skip256 : Elapsed 0.010 ms (0.977 ms / 100) 0.981 -> 0.984 ( +0.31%) [ +0.31% +0.31% +0.00% / +0.31% +0.82% +1.02%] index_fill_ spread : Elapsed 0.010 ms (0.984 ms / 100) 1.004 -> 0.998 ( -0.60%) [ +0.20% +0.20% +0.00% / -0.20% -0.60% -0.50%] index_fill_ strided 3 : Elapsed 0.010 ms (1.006 ms / 100) 0.982 -> 0.982 ( +0.00%) [ +0.10% +0.00% +0.10% / +0.00% +0.61% +0.92%] index_fill_ strided 5 : Elapsed 0.010 ms (0.983 ms / 100) 0.984 -> 0.988 ( +0.41%) [ +0.10% +0.81% +0.00% / +0.41% +1.22% +1.22%] index_fill_ strided 7 : Elapsed 0.010 ms (0.985 ms / 100) 0.989 -> 0.988 ( -0.10%) [ +0.00% +0.10% +0.00% / +0.20% +0.10% -0.10%] index_fill_ strided 8 : Elapsed 0.010 ms (0.989 ms / 100) 0.984 -> 0.985 ( +0.10%) [ +0.10% +0.41% +0.00% / +0.10% +0.71% +0.30%] index_fill_ strided 16 : Elapsed 0.010 ms (0.985 ms / 100) 0.987 -> 0.989 ( +0.20%) [ +0.00% +0.30% +0.20% / +0.41% +0.20% +0.20%] index_fill_ random : Elapsed 0.010 ms (0.987 ms / 100) 0.989 -> 0.988 ( -0.10%) [ +0.40% +0.10% +0.00% / +0.00% +0.10% -0.10%] index_fill_ random_sorted : Elapsed 0.010 ms (0.993 ms / 100) 0.990 -> 0.990 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.40% +0.20%] index_fill_ perm : Elapsed 0.010 ms (0.990 ms / 100) 0.992 -> 0.991 ( -0.10%) [ +0.10% +0.20% +0.00% / +0.20% +0.10% -0.10%] index_fill_ perm_sorted : Elapsed 0.010 ms (0.993 ms / 100) B = [5, 20, 16, 40] (stride (16, 80, 1, 1600)) A = [5, 4, 16, 40] (stride (2560, 16, 1, 64)) dim = 1 2.255 -> 2.257 ( +0.09%) [ +0.04% +0.00% +0.04% / +0.09% +0.49% +0.62%] index_add_ linear : Elapsed 0.023 ms (2.256 ms / 100) 2.194 -> 2.196 ( +0.09%) [ +0.05% +0.00% +0.00% / +0.09% +0.55% +0.64%] index_copy_ linear : Elapsed 0.022 ms (2.195 ms / 100) 2.257 -> 2.256 ( -0.04%) [ +0.09% +0.09% +0.00% / -0.04% +0.66% +0.80%] index_add_ reverse : Elapsed 0.023 ms (2.259 ms / 100) 2.195 -> 2.197 ( +0.09%) [ +0.23% +0.00% +0.05% / +0.09% +0.64% +0.55%] index_copy_ reverse : Elapsed 0.022 ms (2.200 ms / 100) 2.251 -> 2.255 ( +0.18%) [ +0.00% +0.09% +0.13% / +0.18% +0.67% +0.84%] index_add_ spread : Elapsed 0.023 ms (2.251 ms / 100) 2.193 -> 2.194 ( +0.05%) [ +0.14% +0.00% +0.14% / +0.05% +0.87% +0.87%] index_copy_ spread : Elapsed 0.022 ms (2.196 ms / 100) 2.253 -> 2.255 ( +0.09%) [ +0.09% +0.04% +0.00% / +0.09% +0.58% +0.71%] index_add_ strided 3 : Elapsed 0.023 ms (2.255 ms / 100) 2.197 -> 2.197 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.46% +0.55%] index_copy_ strided 3 : Elapsed 0.022 ms (2.198 ms / 100) 2.253 -> 2.257 ( +0.18%) [ +0.09% +0.27% +0.00% / +0.18% +0.93% +0.58%] index_add_ strided 7 : Elapsed 0.023 ms (2.255 ms / 100) 2.194 -> 2.197 ( +0.14%) [ +0.23% +0.14% +0.00% / +0.14% +0.73% +0.55%] index_copy_ strided 7 : Elapsed 0.022 ms (2.199 ms / 100) 2.254 -> 2.256 ( +0.09%) [ +0.13% +0.18% +0.00% / +0.09% +0.67% +0.67%] index_add_ perm : Elapsed 0.023 ms (2.257 ms / 100) 2.194 -> 2.199 ( +0.23%) [ +0.09% +0.00% +0.14% / +0.23% +0.59% +0.73%] index_copy_ perm : Elapsed 0.022 ms (2.196 ms / 100) 2.254 -> 2.256 ( +0.09%) [ +0.09% +0.27% +0.00% / +0.09% +0.58% +0.67%] index_add_ perm_sorted : Elapsed 0.023 ms (2.256 ms / 100) 2.196 -> 2.196 ( +0.00%) [ +0.00% +0.00% +0.23% / +0.00% +0.64% +0.64%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.196 ms / 100) 9.190 -> 9.207 ( +0.18%) [ +0.08% +0.03% +0.00% / +0.18% +0.23% +0.38%] index_select const : Elapsed 0.092 ms (9.197 ms / 100) 9.212 -> 9.229 ( +0.18%) [ +0.00% +0.12% +0.15% / +0.18% +0.41% +0.39%] index_select wrap : Elapsed 0.092 ms (9.212 ms / 100) 9.201 -> 9.201 ( +0.00%) [ +0.00% +0.23% +0.09% / +0.00% +0.65% +0.51%] index_select linear : Elapsed 0.092 ms (9.201 ms / 100) 9.204 -> 9.207 ( +0.03%) [ +0.02% +0.12% +0.00% / +0.03% +0.48% +0.38%] index_select reverse : Elapsed 0.092 ms (9.206 ms / 100) 9.181 -> 9.203 ( +0.24%) [ +0.23% +0.24% +0.00% / +0.24% +0.58% +0.27%] index_select skip64 : Elapsed 0.092 ms (9.202 ms / 100) 9.186 -> 9.195 ( +0.10%) [ +0.08% +0.00% +0.13% / +0.10% +0.37% +0.42%] index_select skip256 : Elapsed 0.092 ms (9.193 ms / 100) 9.233 -> 9.234 ( +0.01%) [ +0.00% +0.03% +0.14% / +0.01% +0.13% +0.28%] index_select spread : Elapsed 0.092 ms (9.233 ms / 100) 9.215 -> 9.214 ( -0.01%) [ +0.14% +0.00% +0.07% / -0.01% +0.27% +0.39%] index_select strided 3 : Elapsed 0.092 ms (9.228 ms / 100) 9.220 -> 9.226 ( +0.07%) [ +0.10% +0.00% +0.11% / +0.07% +0.27% +0.18%] index_select random : Elapsed 0.092 ms (9.229 ms / 100) 9.228 -> 9.235 ( +0.08%) [ +0.00% +0.01% +0.07% / +0.08% +0.35% +0.29%] index_select random_sorted : Elapsed 0.092 ms (9.228 ms / 100) out_shape = [5, 4, 20, 40] in_shape = [5, 4, 16, 40] idx_dim = 2 B = [5, 4, 20, 40] (stride (3200, 1, 160, 4)) dim = 2 fill_cnt = 16 2.068 -> 2.069 ( +0.05%) [ +0.19% +0.05% +0.00% / +0.05% +0.92% +0.92%] index_fill_ const : Elapsed 0.021 ms (2.072 ms / 100) 2.069 -> 2.069 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.87% +0.87%] index_fill_ linear : Elapsed 0.021 ms (2.069 ms / 100) 2.070 -> 2.071 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.63% +0.68%] index_fill_ reverse : Elapsed 0.021 ms (2.071 ms / 100) 2.070 -> 2.071 ( +0.05%) [ +0.10% +0.00% +0.05% / +0.05% +0.68% +0.68%] index_fill_ skip64 : Elapsed 0.021 ms (2.072 ms / 100) 2.066 -> 2.066 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.21% +1.11%] index_fill_ skip256 : Elapsed 0.021 ms (2.066 ms / 100) 2.065 -> 2.066 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +1.40% +1.21%] index_fill_ spread : Elapsed 0.021 ms (2.066 ms / 100) 2.065 -> 2.064 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +1.50% +1.26%] index_fill_ strided 3 : Elapsed 0.021 ms (2.065 ms / 100) 2.065 -> 2.064 ( -0.05%) [ +0.10% +0.00% +0.00% / -0.05% +1.21% +1.36%] index_fill_ strided 5 : Elapsed 0.021 ms (2.067 ms / 100) 2.069 -> 2.069 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.72% +0.77%] index_fill_ strided 7 : Elapsed 0.021 ms (2.069 ms / 100) 2.068 -> 2.069 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.82% +0.82%] index_fill_ strided 8 : Elapsed 0.021 ms (2.069 ms / 100) 2.067 -> 2.067 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.02% +0.97%] index_fill_ strided 16 : Elapsed 0.021 ms (2.067 ms / 100) 2.066 -> 2.068 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.97% +1.16%] index_fill_ random : Elapsed 0.021 ms (2.067 ms / 100) 2.070 -> 2.071 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.68% +0.68%] index_fill_ random_sorted : Elapsed 0.021 ms (2.070 ms / 100) 2.070 -> 2.070 ( +0.00%) [ +0.00% +0.14% +0.05% / +0.00% +0.68% +0.68%] index_fill_ perm : Elapsed 0.021 ms (2.070 ms / 100) 2.073 -> 2.074 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.43% +0.43%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.074 ms / 100) B = [5, 4, 20, 40] (stride (40, 4000, 200, 1)) A = [5, 4, 16, 40] (stride (160, 1, 800, 4)) dim = 2 3.967 -> 3.960 ( -0.18%) [ +0.08% +0.00% +0.00% / -0.18% +0.71% +0.50%] index_add_ linear : Elapsed 0.040 ms (3.970 ms / 100) 3.832 -> 3.832 ( +0.00%) [ +0.13% +0.03% +0.00% / +0.00% +0.81% +0.68%] index_copy_ linear : Elapsed 0.038 ms (3.837 ms / 100) 3.960 -> 3.961 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.78% +0.78%] index_add_ reverse : Elapsed 0.040 ms (3.961 ms / 100) 3.831 -> 3.835 ( +0.10%) [ +0.13% +0.10% +0.00% / +0.10% +0.86% +0.86%] index_copy_ reverse : Elapsed 0.038 ms (3.836 ms / 100) 3.959 -> 3.959 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.73% +0.71%] index_add_ spread : Elapsed 0.040 ms (3.959 ms / 100) 3.829 -> 3.829 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.78% +0.68%] index_copy_ spread : Elapsed 0.038 ms (3.829 ms / 100) 3.963 -> 3.963 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.73% +0.78%] index_add_ strided 3 : Elapsed 0.040 ms (3.964 ms / 100) 3.844 -> 3.844 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.78% +0.81%] index_copy_ strided 3 : Elapsed 0.038 ms (3.845 ms / 100) 3.959 -> 3.964 ( +0.13%) [ +0.00% +0.10% +0.13% / +0.13% +0.81% +0.83%] index_add_ strided 7 : Elapsed 0.040 ms (3.959 ms / 100) 3.847 -> 3.848 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.57% +0.68%] index_copy_ strided 7 : Elapsed 0.038 ms (3.849 ms / 100) 3.962 -> 3.964 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.58% +0.56%] index_add_ perm : Elapsed 0.040 ms (3.963 ms / 100) 3.833 -> 3.832 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.57% +0.52%] index_copy_ perm : Elapsed 0.038 ms (3.834 ms / 100) 3.960 -> 3.962 ( +0.05%) [ +0.08% +0.08% +0.00% / +0.05% +0.76% +0.68%] index_add_ perm_sorted : Elapsed 0.040 ms (3.963 ms / 100) 3.833 -> 3.837 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.78% +0.78%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.837 ms / 100) 5.558 -> 5.550 ( -0.14%) [ +0.00% +0.05% +0.02% / +0.00% -0.05% -0.14%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.564 -> 5.557 ( -0.13%) [ +0.23% +0.00% +0.05% / -0.02% -0.13% +0.04%] index_select wrap : Elapsed 0.056 ms (5.577 ms / 100) 5.561 -> 5.565 ( +0.07%) [ +0.00% +0.14% +0.05% / +0.11% +0.07% +0.18%] index_select linear : Elapsed 0.056 ms (5.561 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.00% +0.07% +0.22%] index_select reverse : Elapsed 0.056 ms (5.570 ms / 100) 5.553 -> 5.548 ( -0.09%) [ +0.13% +0.02% +0.00% / -0.09% +0.14% +0.29%] index_select skip64 : Elapsed 0.056 ms (5.560 ms / 100) 5.558 -> 5.558 ( +0.00%) [ +0.02% +0.00% +0.14% / +0.05% +0.11% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.561 -> 5.562 ( +0.02%) [ +0.04% +0.00% +0.14% / +0.02% +0.02% +0.09%] index_select spread : Elapsed 0.056 ms (5.563 ms / 100) 5.567 -> 5.563 ( -0.07%) [ +0.00% +0.11% +0.07% / -0.07% -0.04% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.567 ms / 100) 5.567 -> 5.564 ( -0.05%) [ +0.07% +0.00% +0.05% / -0.05% +0.09% +0.09%] index_select strided 5 : Elapsed 0.056 ms (5.571 ms / 100) 5.567 -> 5.563 ( -0.07%) [ +0.00% +0.09% +0.04% / -0.07% +0.00% -0.04%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.551 -> 5.557 ( +0.11%) [ +0.25% +0.13% +0.00% / +0.14% +0.14% +0.11%] index_select strided 8 : Elapsed 0.056 ms (5.565 ms / 100) 5.562 -> 5.557 ( -0.09%) [ +0.02% +0.00% +0.11% / -0.02% -0.09% +0.04%] index_select random : Elapsed 0.056 ms (5.563 ms / 100) 5.563 -> 5.559 ( -0.07%) [ +0.05% +0.00% +0.04% / -0.07% +0.00% +0.11%] index_select random_sorted : Elapsed 0.056 ms (5.566 ms / 100) B = [5, 4, 20, 40] (stride (1, 200, 800, 5)) A = [5, 4, 16, 40] (stride (40, 3200, 200, 1)) dim = 2 3.940 -> 3.941 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.79% +0.79%] index_add_ linear : Elapsed 0.039 ms (3.942 ms / 100) 3.813 -> 3.815 ( +0.05%) [ +0.08% +0.00% +0.05% / +0.05% +0.79% +0.76%] index_copy_ linear : Elapsed 0.038 ms (3.816 ms / 100) 3.934 -> 3.935 ( +0.03%) [ +0.10% +0.03% +0.00% / +0.03% +0.81% +0.84%] index_add_ reverse : Elapsed 0.039 ms (3.938 ms / 100) 3.806 -> 3.808 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.81% +0.81%] index_copy_ reverse : Elapsed 0.038 ms (3.809 ms / 100) 3.936 -> 3.937 ( +0.03%) [ +0.08% +0.05% +0.00% / +0.03% +0.76% +0.74%] index_add_ spread : Elapsed 0.039 ms (3.939 ms / 100) 3.837 -> 3.838 ( +0.03%) [ +0.00% +0.08% +0.05% / +0.03% +0.65% +0.78%] index_copy_ spread : Elapsed 0.038 ms (3.837 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.81% +0.81%] index_add_ strided 3 : Elapsed 0.039 ms (3.935 ms / 100) 3.809 -> 3.809 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.74% +0.76%] index_copy_ strided 3 : Elapsed 0.038 ms (3.811 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.76% +0.74%] index_add_ strided 7 : Elapsed 0.039 ms (3.937 ms / 100) 3.810 -> 3.808 ( -0.05%) [ +0.00% +0.08% +0.00% / -0.05% +0.71% +0.68%] index_copy_ strided 7 : Elapsed 0.038 ms (3.810 ms / 100) 3.941 -> 3.942 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.71% +0.74%] index_add_ perm : Elapsed 0.039 ms (3.942 ms / 100) 3.814 -> 3.818 ( +0.10%) [ +0.03% +0.05% +0.00% / +0.10% +0.73% +0.71%] index_copy_ perm : Elapsed 0.038 ms (3.815 ms / 100) 3.937 -> 3.937 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.039 ms (3.938 ms / 100) 3.811 -> 3.811 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +0.63%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.811 ms / 100) 5.558 -> 5.552 ( -0.11%) [ +0.05% +0.00% +0.11% / +0.02% +0.00% -0.11%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.574 -> 5.572 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.04% +0.05% +0.04%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.564 -> 5.568 ( +0.07%) [ +0.09% +0.14% +0.00% / +0.07% +0.22% +0.25%] index_select linear : Elapsed 0.056 ms (5.569 ms / 100) 5.571 -> 5.564 ( -0.13%) [ +0.00% +0.11% +0.04% / -0.13% +0.20% +0.02%] index_select reverse : Elapsed 0.056 ms (5.571 ms / 100) 5.556 -> 5.552 ( -0.07%) [ +0.00% +0.04% +0.04% / +0.04% +0.02% -0.07%] index_select skip64 : Elapsed 0.056 ms (5.556 ms / 100) 5.553 -> 5.555 ( +0.04%) [ +0.02% +0.18% +0.00% / +0.07% +0.04% +0.05%] index_select skip256 : Elapsed 0.056 ms (5.554 ms / 100) 5.573 -> 5.572 ( -0.02%) [ +0.05% +0.00% +0.02% / +0.00% -0.02% +0.07%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.568 -> 5.572 ( +0.07%) [ +0.00% +0.13% +0.05% / +0.07% +0.20% +0.14%] index_select strided 3 : Elapsed 0.056 ms (5.568 ms / 100) 5.572 -> 5.574 ( +0.04%) [ +0.04% +0.07% +0.00% / +0.04% +0.07% +0.11%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.570 -> 5.570 ( +0.00%) [ +0.04% +0.00% +0.07% / +0.02% +0.23% +0.00%] index_select strided 7 : Elapsed 0.056 ms (5.572 ms / 100) 5.554 -> 5.558 ( +0.07%) [ +0.09% +0.00% +0.23% / +0.07% +0.13% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.559 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.00% +0.04% +0.00% / +0.02% +0.18% +0.13%] index_select random : Elapsed 0.056 ms (5.564 ms / 100) 5.566 -> 5.562 ( -0.07%) [ +0.00% +0.09% +0.04% / -0.07% +0.14% +0.16%] index_select random_sorted : Elapsed 0.056 ms (5.566 ms / 100) B = [5, 4, 20, 40] (stride (1, 100, 5, 400)) A = [5, 4, 16, 40] (stride (64, 1, 4, 320)) dim = 2 4.445 -> 4.443 ( -0.04%) [ +0.02% +0.00% +0.00% / -0.04% +0.72% +0.67%] index_add_ linear : Elapsed 0.044 ms (4.446 ms / 100) 4.272 -> 4.273 ( +0.02%) [ +0.00% +0.05% +0.07% / +0.02% +0.96% +0.91%] index_copy_ linear : Elapsed 0.043 ms (4.272 ms / 100) 4.437 -> 4.445 ( +0.18%) [ +0.09% +0.16% +0.00% / +0.18% +0.83% +0.86%] index_add_ reverse : Elapsed 0.044 ms (4.441 ms / 100) 4.278 -> 4.280 ( +0.05%) [ +0.02% +0.12% +0.00% / +0.05% +0.84% +0.82%] index_copy_ reverse : Elapsed 0.043 ms (4.279 ms / 100) 4.447 -> 4.447 ( +0.00%) [ +0.09% +0.07% +0.00% / +0.00% +0.72% +0.79%] index_add_ spread : Elapsed 0.045 ms (4.451 ms / 100) 4.275 -> 4.280 ( +0.12%) [ +0.09% +0.09% +0.00% / +0.12% +0.75% +0.91%] index_copy_ spread : Elapsed 0.043 ms (4.279 ms / 100) 4.444 -> 4.445 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.72% +0.63%] index_add_ strided 3 : Elapsed 0.044 ms (4.445 ms / 100) 4.272 -> 4.276 ( +0.09%) [ +0.02% +0.00% +0.09% / +0.09% +0.84% +0.77%] index_copy_ strided 3 : Elapsed 0.043 ms (4.273 ms / 100) 4.444 -> 4.444 ( +0.00%) [ +0.00% +0.25% +0.02% / +0.00% +0.54% +0.61%] index_add_ strided 7 : Elapsed 0.044 ms (4.444 ms / 100) 4.275 -> 4.275 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.65% +0.70%] index_copy_ strided 7 : Elapsed 0.043 ms (4.278 ms / 100) 4.444 -> 4.447 ( +0.07%) [ +0.14% +0.00% +0.18% / +0.07% +0.72% +0.81%] index_add_ perm : Elapsed 0.044 ms (4.450 ms / 100) 4.277 -> 4.282 ( +0.12%) [ +0.07% +0.00% +0.19% / +0.12% +0.77% +0.77%] index_copy_ perm : Elapsed 0.043 ms (4.280 ms / 100) 4.444 -> 4.444 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.61% +0.52%] index_add_ perm_sorted : Elapsed 0.044 ms (4.444 ms / 100) 4.280 -> 4.283 ( +0.07%) [ +0.00% +0.05% +0.00% / +0.07% +0.79% +0.79%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.280 ms / 100) 5.569 -> 5.572 ( +0.05%) [ +0.09% +0.18% +0.00% / +0.05% +0.13% +0.07%] index_select const : Elapsed 0.056 ms (5.574 ms / 100) 5.591 -> 5.590 ( -0.02%) [ +0.11% +0.05% +0.00% / +0.18% +0.00% -0.02%] index_select wrap : Elapsed 0.056 ms (5.597 ms / 100) 5.589 -> 5.594 ( +0.09%) [ +0.21% +0.13% +0.00% / +0.09% +0.14% +0.13%] index_select linear : Elapsed 0.056 ms (5.601 ms / 100) 5.592 -> 5.588 ( -0.07%) [ +0.13% +0.09% +0.00% / +0.05% -0.07% +0.04%] index_select reverse : Elapsed 0.056 ms (5.599 ms / 100) 5.564 -> 5.573 ( +0.16%) [ +0.13% +0.00% +0.09% / +0.16% +0.18% +0.27%] index_select skip64 : Elapsed 0.056 ms (5.571 ms / 100) 5.572 -> 5.570 ( -0.04%) [ +0.07% +0.00% +0.00% / -0.04% +0.07% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.576 ms / 100) 5.594 -> 5.585 ( -0.16%) [ +0.02% +0.07% +0.00% / +0.07% -0.07% -0.16%] index_select spread : Elapsed 0.056 ms (5.595 ms / 100) 5.594 -> 5.593 ( -0.02%) [ +0.00% +0.20% +0.20% / +0.09% -0.02% +0.02%] index_select strided 3 : Elapsed 0.056 ms (5.594 ms / 100) 5.595 -> 5.588 ( -0.13%) [ +0.07% +0.20% +0.00% / +0.04% -0.07% -0.13%] index_select strided 5 : Elapsed 0.056 ms (5.599 ms / 100) 5.593 -> 5.594 ( +0.02%) [ +0.00% +0.07% +0.18% / +0.02% +0.02% +0.13%] index_select strided 7 : Elapsed 0.056 ms (5.593 ms / 100) 5.576 -> 5.575 ( -0.02%) [ +0.09% +0.05% +0.00% / +0.00% -0.02% +0.02%] index_select strided 8 : Elapsed 0.056 ms (5.581 ms / 100) 5.596 -> 5.591 ( -0.09%) [ +0.00% +0.02% +0.02% / -0.07% -0.07% -0.09%] index_select random : Elapsed 0.056 ms (5.596 ms / 100) 5.589 -> 5.592 ( +0.05%) [ +0.09% +0.09% +0.00% / +0.05% +0.13% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.594 ms / 100) out_shape = [5, 4, 16, 20] in_shape = [5, 4, 16, 40] idx_dim = 3 B = [5, 4, 16, 20] (stride (1280, 320, 1, 16)) A = [5, 4, 16, 40] (stride (2560, 640, 40, 1)) dim = 3 2.315 -> 2.317 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.17% +0.22%] index_select const : Elapsed 0.023 ms (2.315 ms / 100) 2.321 -> 2.322 ( +0.04%) [ +0.13% +0.17% +0.00% / +0.13% +0.09% +0.04%] index_select wrap : Elapsed 0.023 ms (2.324 ms / 100) 2.321 -> 2.320 ( -0.04%) [ +0.17% +0.00% +0.22% / +0.04% -0.04% +0.04%] index_select linear : Elapsed 0.023 ms (2.325 ms / 100) 2.322 -> 2.320 ( -0.09%) [ +0.13% +0.00% +0.09% / -0.09% -0.04% -0.04%] index_select reverse : Elapsed 0.023 ms (2.325 ms / 100) 2.315 -> 2.316 ( +0.04%) [ +0.00% +0.30% +0.09% / +0.13% +0.17% +0.04%] index_select skip64 : Elapsed 0.023 ms (2.315 ms / 100) 2.311 -> 2.318 ( +0.30%) [ +0.39% +0.00% +0.22% / +0.30% +0.30% +0.56%] index_select skip256 : Elapsed 0.023 ms (2.320 ms / 100) 2.327 -> 2.328 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.09% +0.04% +0.13%] index_select spread : Elapsed 0.023 ms (2.329 ms / 100) 2.326 -> 2.330 ( +0.17%) [ +0.09% +0.09% +0.00% / +0.17% +0.21% +0.26%] index_select strided 3 : Elapsed 0.023 ms (2.328 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.13% +0.00% +0.30% / +0.30% +0.04% +0.04%] index_select strided 5 : Elapsed 0.023 ms (2.327 ms / 100) 2.322 -> 2.326 ( +0.17%) [ +0.22% +0.00% +0.13% / +0.17% +0.43% +0.22%] index_select strided 7 : Elapsed 0.023 ms (2.327 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.04% +0.22% +0.00% / +0.04% +0.22% +0.17%] index_select strided 8 : Elapsed 0.023 ms (2.325 ms / 100) 2.323 -> 2.327 ( +0.17%) [ +0.30% +0.00% +0.17% / +0.17% +0.34% +0.52%] index_select strided 16 : Elapsed 0.023 ms (2.330 ms / 100) 2.324 -> 2.325 ( +0.04%) [ +0.04% +0.00% +0.13% / +0.04% +0.22% +0.30%] index_select random : Elapsed 0.023 ms (2.325 ms / 100) 2.323 -> 2.326 ( +0.13%) [ +0.30% +0.00% +0.30% / +0.26% +0.13% +0.22%] index_select random_sorted : Elapsed 0.023 ms (2.330 ms / 100) 2.328 -> 2.328 ( +0.00%) [ +0.00% +0.21% +0.04% / +0.00% +0.04% +0.13%] index_select perm : Elapsed 0.023 ms (2.328 ms / 100) 2.326 -> 2.314 ( -0.52%) [ +0.26% +0.13% +0.00% / +0.13% -0.13% -0.52%] index_select perm_sorted : Elapsed 0.023 ms (2.332 ms / 100) B = [5, 4, 16, 20] (stride (320, 1600, 1, 16)) A = [5, 4, 16, 40] (stride (640, 3200, 40, 1)) dim = 3 2.447 -> 2.450 ( +0.12%) [ +0.04% +0.04% +0.00% / +0.12% +0.12% +0.16%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.455 -> 2.451 ( -0.16%) [ +0.12% +0.20% +0.00% / +0.12% -0.16% -0.08%] index_select wrap : Elapsed 0.025 ms (2.458 ms / 100) 2.454 -> 2.448 ( -0.24%) [ +0.12% +0.00% +0.20% / +0.08% -0.16% -0.24%] index_select linear : Elapsed 0.025 ms (2.457 ms / 100) 2.454 -> 2.454 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.16% +0.00% +0.08%] index_select reverse : Elapsed 0.025 ms (2.456 ms / 100) 2.450 -> 2.447 ( -0.12%) [ +0.04% +0.00% +0.12% / +0.04% -0.12% +0.04%] index_select skip64 : Elapsed 0.025 ms (2.451 ms / 100) 2.446 -> 2.452 ( +0.25%) [ +0.16% +0.00% +0.12% / +0.25% +0.29% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.450 ms / 100) 2.452 -> 2.457 ( +0.20%) [ +0.24% +0.20% +0.00% / +0.20% +0.41% +0.45%] index_select spread : Elapsed 0.025 ms (2.458 ms / 100) 2.456 -> 2.458 ( +0.08%) [ +0.00% +0.20% +0.20% / +0.08% +0.45% +0.49%] index_select strided 3 : Elapsed 0.025 ms (2.456 ms / 100) 2.454 -> 2.457 ( +0.12%) [ +0.00% +0.24% +0.08% / +0.12% +0.49% +0.45%] index_select strided 5 : Elapsed 0.025 ms (2.454 ms / 100) 2.461 -> 2.457 ( -0.16%) [ +0.04% +0.08% +0.00% / -0.16% +0.28% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.462 ms / 100) 2.460 -> 2.459 ( -0.04%) [ +0.00% +0.12% +0.08% / -0.04% +0.04% +0.33%] index_select strided 8 : Elapsed 0.025 ms (2.460 ms / 100) 2.459 -> 2.460 ( +0.04%) [ +0.16% +0.04% +0.00% / +0.04% +0.37% +0.33%] index_select strided 16 : Elapsed 0.025 ms (2.463 ms / 100) 2.455 -> 2.459 ( +0.16%) [ +0.16% +0.12% +0.00% / +0.16% +0.29% +0.53%] index_select random : Elapsed 0.025 ms (2.459 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.04% +0.00% +0.20%] index_select random_sorted : Elapsed 0.025 ms (2.460 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.08% +0.12%] index_select perm : Elapsed 0.025 ms (2.461 ms / 100) 2.459 -> 2.462 ( +0.12%) [ +0.00% +0.12% +0.08% / +0.12% +0.24% +0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) B = [5, 4, 16, 20] (stride (20, 1600, 100, 1)) A = [5, 4, 16, 40] (stride (16, 80, 1, 320)) dim = 3 2.393 -> 2.396 ( +0.13%) [ +0.08% +0.21% +0.00% / +0.13% +0.21% +0.17%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.406 -> 2.402 ( -0.17%) [ +0.00% +0.21% +0.00% / +0.17% -0.17% -0.04%] index_select wrap : Elapsed 0.024 ms (2.406 ms / 100) 2.407 -> 2.404 ( -0.12%) [ +0.04% +0.04% +0.00% / +0.04% -0.08% -0.12%] index_select linear : Elapsed 0.024 ms (2.408 ms / 100) 2.409 -> 2.405 ( -0.17%) [ +0.04% +0.12% +0.00% / +0.12% -0.12% -0.17%] index_select reverse : Elapsed 0.024 ms (2.410 ms / 100) 2.396 -> 2.395 ( -0.04%) [ +0.08% +0.17% +0.00% / +0.17% +0.21% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.398 ms / 100) 2.395 -> 2.394 ( -0.04%) [ +0.04% +0.00% +0.08% / -0.04% +0.13% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.396 ms / 100) 2.404 -> 2.401 ( -0.12%) [ +0.04% +0.00% +0.08% / -0.12% +0.21% +0.37%] index_select spread : Elapsed 0.024 ms (2.405 ms / 100) 2.408 -> 2.406 ( -0.08%) [ +0.00% +0.12% +0.17% / +0.08% -0.08% -0.04%] index_select strided 3 : Elapsed 0.024 ms (2.408 ms / 100) 2.400 -> 2.399 ( -0.04%) [ +0.13% +0.04% +0.00% / +0.13% +0.00% -0.04%] index_select strided 5 : Elapsed 0.024 ms (2.403 ms / 100) 2.403 -> 2.403 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.17% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.403 ms / 100) 2.395 -> 2.399 ( +0.17%) [ +0.13% +0.00% +0.00% / +0.17% +0.38% +0.46%] index_select strided 8 : Elapsed 0.024 ms (2.398 ms / 100) 2.395 -> 2.396 ( +0.04%) [ +0.00% +0.21% +0.13% / +0.04% +0.29% +0.33%] index_select strided 16 : Elapsed 0.024 ms (2.395 ms / 100) 2.398 -> 2.399 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.58% +0.29%] index_select random : Elapsed 0.024 ms (2.402 ms / 100) 2.397 -> 2.401 ( +0.17%) [ +0.00% +0.00% +0.25% / +0.21% +0.17% +0.29%] index_select random_sorted : Elapsed 0.024 ms (2.397 ms / 100) 2.406 -> 2.402 ( -0.17%) [ +0.00% +0.04% +0.04% / +0.17% -0.04% -0.17%] index_select perm : Elapsed 0.024 ms (2.406 ms / 100) 2.404 -> 2.400 ( -0.17%) [ +0.12% +0.25% +0.00% / +0.08% -0.12% -0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.407 ms / 100) B = [5, 4, 16, 20] (stride (80, 20, 400, 1)) A = [5, 4, 16, 40] (stride (2560, 40, 160, 1)) dim = 3 2.398 -> 2.401 ( +0.13%) [ +0.00% +0.17% +0.00% / +0.13% +0.17% +0.33%] index_select const : Elapsed 0.024 ms (2.398 ms / 100) 2.408 -> 2.404 ( -0.17%) [ +0.17% +0.00% +0.00% / +0.08% -0.17% -0.04%] index_select wrap : Elapsed 0.024 ms (2.412 ms / 100) 2.408 -> 2.405 ( -0.12%) [ +0.00% +0.17% +0.08% / +0.12% -0.08% -0.12%] index_select linear : Elapsed 0.024 ms (2.408 ms / 100) 2.406 -> 2.403 ( -0.12%) [ +0.04% +0.04% +0.00% / -0.12% +0.21% +0.21%] index_select reverse : Elapsed 0.024 ms (2.407 ms / 100) 2.402 -> 2.403 ( +0.04%) [ +0.00% +0.12% +0.04% / +0.04% +0.17% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.402 ms / 100) 2.399 -> 2.397 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.21% +0.42%] index_select skip256 : Elapsed 0.024 ms (2.401 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.46% +0.41%] index_select spread : Elapsed 0.024 ms (2.414 ms / 100) 2.414 -> 2.412 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.25% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.414 ( +0.17%) [ +0.21% +0.04% +0.00% / +0.17% +0.33% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.12% +0.29%] index_select strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.415 -> 2.417 ( +0.08%) [ +0.00% +0.12% +0.00% / +0.08% +0.17% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.17% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.25% +0.33%] index_select random : Elapsed 0.024 ms (2.414 ms / 100) 2.411 -> 2.414 ( +0.12%) [ +0.04% +0.25% +0.00% / +0.12% +0.29% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.412 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.12% +0.21%] index_select perm : Elapsed 0.024 ms (2.414 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.00% +0.25% +0.04% / +0.04% +0.12% +0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.412 ms / 100) B = [5, 4, 16, 20] (stride (80, 1, 400, 4)) A = [5, 4, 16, 40] (stride (640, 3200, 40, 1)) dim = 3 2.452 -> 2.450 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.04% -0.08% +0.04%] index_select const : Elapsed 0.025 ms (2.454 ms / 100) 2.462 -> 2.460 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% -0.08% +0.24%] index_select wrap : Elapsed 0.025 ms (2.464 ms / 100) 2.459 -> 2.459 ( +0.00%) [ +0.12% +0.20% +0.00% / +0.16% +0.00% +0.12%] index_select linear : Elapsed 0.025 ms (2.462 ms / 100) 2.458 -> 2.457 ( -0.04%) [ +0.00% +0.04% +0.24% / +0.08% +0.00% -0.04%] index_select reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.455 -> 2.450 ( -0.20%) [ +0.00% +0.08% +0.04% / -0.08% -0.20% -0.20%] index_select skip64 : Elapsed 0.025 ms (2.455 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.00% +0.12% +0.00% / +0.04% +0.04% +0.16%] index_select skip256 : Elapsed 0.025 ms (2.452 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.00% +0.16% +0.04% / +0.00% +0.16% +0.12%] index_select spread : Elapsed 0.025 ms (2.464 ms / 100) 2.465 -> 2.468 ( +0.12%) [ +0.00% +0.04% +0.04% / +0.16% +0.12% +0.49%] index_select strided 3 : Elapsed 0.025 ms (2.465 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.16% +0.16% +0.00%] index_select strided 5 : Elapsed 0.025 ms (2.466 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.08% +0.20% +0.00% / -0.04% +0.16% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.20% +0.00%] index_select strided 8 : Elapsed 0.025 ms (2.465 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.20% +0.08% +0.00% / +0.12% +0.00% +0.08%] index_select strided 16 : Elapsed 0.025 ms (2.470 ms / 100) 2.462 -> 2.462 ( +0.00%) [ +0.12% +0.24% +0.00% / +0.00% +0.08% +0.20%] index_select random : Elapsed 0.025 ms (2.465 ms / 100) 2.466 -> 2.464 ( -0.08%) [ +0.12% +0.20% +0.00% / +0.00% -0.08% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.32% +0.00% +0.12% / +0.24% +0.16% +0.00%] index_select perm : Elapsed 0.025 ms (2.473 ms / 100) 2.467 -> 2.457 ( -0.41%) [ +0.00% +0.08% +0.04% / +0.00% -0.32% -0.41%] index_select perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) B = [5, 4, 16, 20] (stride (1, 80, 5, 320)) A = [5, 4, 16, 40] (stride (4, 1, 20, 320)) dim = 3 1.451 -> 1.450 ( -0.07%) [ +0.00% +0.21% +0.41% / -0.07% +0.00% +0.21%] index_select const : Elapsed 0.015 ms (1.451 ms / 100) 1.460 -> 1.466 ( +0.41%) [ +0.00% +0.48% +0.41% / +0.68% +0.41% +0.41%] index_select wrap : Elapsed 0.015 ms (1.460 ms / 100) 1.466 -> 1.466 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.07% +0.00% +0.27%] index_select linear : Elapsed 0.015 ms (1.467 ms / 100) 1.464 -> 1.465 ( +0.07%) [ +0.14% +0.00% +0.20% / +0.14% +0.07% +0.27%] index_select reverse : Elapsed 0.015 ms (1.466 ms / 100) 1.452 -> 1.451 ( -0.07%) [ +0.00% +0.07% +0.07% / +0.21% -0.07% +0.14%] index_select skip64 : Elapsed 0.015 ms (1.452 ms / 100) 1.451 -> 1.450 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.28% +0.07%] index_select skip256 : Elapsed 0.015 ms (1.451 ms / 100) 1.470 -> 1.468 ( -0.14%) [ +0.00% +0.14% +0.14% / -0.07% +0.00% -0.14%] index_select spread : Elapsed 0.015 ms (1.470 ms / 100) 1.470 -> 1.474 ( +0.27%) [ +0.07% +0.00% +0.14% / +0.27% +0.48% +0.54%] index_select strided 3 : Elapsed 0.015 ms (1.471 ms / 100) 1.456 -> 1.458 ( +0.14%) [ +0.21% +0.14% +0.00% / +0.14% +1.24% +1.10%] index_select strided 5 : Elapsed 0.015 ms (1.459 ms / 100) 1.470 -> 1.473 ( +0.20%) [ +0.00% +0.41% +0.20% / +0.20% +0.41% +0.75%] index_select strided 7 : Elapsed 0.015 ms (1.470 ms / 100) 1.454 -> 1.456 ( +0.14%) [ +0.21% +0.00% +0.00% / +0.14% +0.41% +0.41%] index_select strided 8 : Elapsed 0.015 ms (1.457 ms / 100) 1.453 -> 1.455 ( +0.14%) [ +0.00% +0.21% +0.00% / +0.14% +0.55% +0.48%] index_select strided 16 : Elapsed 0.015 ms (1.453 ms / 100) 1.465 -> 1.465 ( +0.00%) [ +0.27% +0.14% +0.00% / +0.00% +0.48% +0.34%] index_select random : Elapsed 0.015 ms (1.469 ms / 100) 1.468 -> 1.468 ( +0.00%) [ +0.20% +0.07% +0.00% / +0.00% +0.27% +0.48%] index_select random_sorted : Elapsed 0.015 ms (1.471 ms / 100) 1.465 -> 1.467 ( +0.14%) [ +0.27% +0.00% +0.20% / +0.14% +0.20% +0.41%] index_select perm : Elapsed 0.015 ms (1.469 ms / 100) 1.468 -> 1.470 ( +0.14%) [ +0.20% +0.00% +0.27% / +0.41% +0.34% +0.14%] index_select perm_sorted : Elapsed 0.015 ms (1.471 ms / 100) out_shape = [20, 4, 40, 16] in_shape = [5, 4, 40, 16] idx_dim = 0 B = [20, 4, 40, 16] (stride (2560, 1, 64, 4)) A = [5, 4, 40, 16] (stride (4, 1, 20, 800)) dim = 0 1.816 -> 1.826 ( +0.55%) [ +0.00% +0.06% +0.50% / +0.55% +1.43% +1.38%] index_add_ linear : Elapsed 0.018 ms (1.816 ms / 100) 1.761 -> 1.771 ( +0.57%) [ +0.00% +0.23% +0.51% / +0.57% +1.14% +1.36%] index_copy_ linear : Elapsed 0.018 ms (1.761 ms / 100) 1.821 -> 1.827 ( +0.33%) [ +0.22% +0.00% +0.27% / +0.33% +0.93% +1.04%] index_add_ reverse : Elapsed 0.018 ms (1.825 ms / 100) 1.768 -> 1.775 ( +0.40%) [ +0.11% +0.00% +0.34% / +0.40% +0.79% +1.13%] index_copy_ reverse : Elapsed 0.018 ms (1.770 ms / 100) 1.815 -> 1.820 ( +0.28%) [ +0.39% +0.00% +0.28% / +0.28% +1.32% +1.32%] index_add_ spread : Elapsed 0.018 ms (1.822 ms / 100) 1.763 -> 1.768 ( +0.28%) [ +0.00% +0.06% +0.11% / +0.28% +1.08% +1.36%] index_copy_ spread : Elapsed 0.018 ms (1.763 ms / 100) 1.816 -> 1.818 ( +0.11%) [ +0.06% +0.00% +0.33% / +0.11% +1.54% +1.71%] index_add_ strided 3 : Elapsed 0.018 ms (1.817 ms / 100) 1.760 -> 1.766 ( +0.34%) [ +0.28% +0.00% +0.28% / +0.34% +1.76% +1.76%] index_copy_ strided 3 : Elapsed 0.018 ms (1.765 ms / 100) 1.815 -> 1.823 ( +0.44%) [ +0.00% +0.11% +0.44% / +0.44% +1.27% +1.05%] index_add_ strided 7 : Elapsed 0.018 ms (1.815 ms / 100) 1.756 -> 1.768 ( +0.68%) [ +0.00% +0.11% +0.80% / +0.68% +1.37% +1.42%] index_copy_ strided 7 : Elapsed 0.018 ms (1.756 ms / 100) 1.821 -> 1.826 ( +0.27%) [ +0.22% +0.00% +0.27% / +0.27% +0.88% +0.77%] index_add_ perm : Elapsed 0.018 ms (1.825 ms / 100) 1.764 -> 1.775 ( +0.62%) [ +0.00% +0.06% +0.34% / +0.62% +0.85% +0.96%] index_copy_ perm : Elapsed 0.018 ms (1.764 ms / 100) 1.818 -> 1.828 ( +0.55%) [ +0.00% +0.22% +0.39% / +0.55% +0.99% +0.77%] index_add_ perm_sorted : Elapsed 0.018 ms (1.818 ms / 100) 1.761 -> 1.772 ( +0.62%) [ +0.00% +0.06% +0.51% / +0.62% +1.02% +0.85%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.761 ms / 100) 8.216 -> 8.220 ( +0.05%) [ +0.00% +0.15% +0.17% / +0.16% +0.05% +0.06%] index_select const : Elapsed 0.082 ms (8.216 ms / 100) 8.213 -> 8.222 ( +0.11%) [ +0.30% +0.00% +0.10% / +0.11% +0.35% +0.17%] index_select wrap : Elapsed 0.082 ms (8.238 ms / 100) 8.225 -> 8.224 ( -0.01%) [ +0.00% +0.16% +0.07% / -0.01% +0.17% +0.40%] index_select linear : Elapsed 0.082 ms (8.225 ms / 100) 8.217 -> 8.231 ( +0.17%) [ +0.29% +0.00% +0.09% / +0.19% +0.17% +0.19%] index_select reverse : Elapsed 0.082 ms (8.241 ms / 100) 8.218 -> 8.209 ( -0.11%) [ +0.00% +0.01% +0.33% / -0.11% +0.27% +0.05%] index_select skip64 : Elapsed 0.082 ms (8.218 ms / 100) 8.207 -> 8.214 ( +0.09%) [ +0.23% +0.29% +0.00% / +0.09% +0.35% +0.38%] index_select skip256 : Elapsed 0.082 ms (8.226 ms / 100) 8.223 -> 8.241 ( +0.22%) [ +0.00% +0.33% +0.23% / +0.34% +0.22% +0.33%] index_select spread : Elapsed 0.082 ms (8.223 ms / 100) 8.216 -> 8.224 ( +0.10%) [ +0.05% +0.00% +0.09% / +0.30% +0.60% +0.10%] index_select strided 3 : Elapsed 0.082 ms (8.220 ms / 100) 8.216 -> 8.238 ( +0.27%) [ +0.12% +0.00% +0.24% / +0.30% +0.40% +0.27%] index_select random : Elapsed 0.082 ms (8.226 ms / 100) 8.223 -> 8.240 ( +0.21%) [ +0.00% +0.27% +0.17% / +0.21% +0.44% +0.50%] index_select random_sorted : Elapsed 0.082 ms (8.223 ms / 100) B = [20, 4, 40, 16] (stride (640, 12800, 1, 40)) A = [5, 4, 40, 16] (stride (64, 1, 320, 4)) dim = 0 1.805 -> 1.809 ( +0.22%) [ +0.00% +0.17% +0.11% / +0.22% +0.72% +0.61%] index_add_ linear : Elapsed 0.018 ms (1.805 ms / 100) 1.767 -> 1.767 ( +0.00%) [ +0.11% +0.06% +0.00% / +0.00% +0.40% +0.51%] index_copy_ linear : Elapsed 0.018 ms (1.769 ms / 100) 1.807 -> 1.807 ( +0.00%) [ +0.00% +0.06% +0.22% / +0.00% +0.61% +0.50%] index_add_ reverse : Elapsed 0.018 ms (1.807 ms / 100) 1.766 -> 1.768 ( +0.11%) [ +0.00% +0.11% +0.11% / +0.11% +0.51% +0.34%] index_copy_ reverse : Elapsed 0.018 ms (1.766 ms / 100) 1.812 -> 1.810 ( -0.11%) [ +0.22% +0.00% +0.11% / +0.00% -0.06% -0.11%] index_add_ spread : Elapsed 0.018 ms (1.816 ms / 100) 1.771 -> 1.766 ( -0.28%) [ +0.11% +0.00% +0.00% / -0.17% -0.28% -0.11%] index_copy_ spread : Elapsed 0.018 ms (1.773 ms / 100) 1.807 -> 1.805 ( -0.11%) [ +0.22% +0.00% +0.06% / -0.11% +0.77% +1.05%] index_add_ strided 3 : Elapsed 0.018 ms (1.811 ms / 100) 1.762 -> 1.761 ( -0.06%) [ +0.23% +0.00% +0.17% / -0.06% +0.74% +1.19%] index_copy_ strided 3 : Elapsed 0.018 ms (1.766 ms / 100) 1.827 -> 1.827 ( +0.00%) [ +0.11% +0.27% +0.00% / +0.00% +0.22% +0.33%] index_add_ strided 7 : Elapsed 0.018 ms (1.829 ms / 100) 1.787 -> 1.790 ( +0.17%) [ +0.00% +0.06% +0.11% / +0.17% +0.34% +1.12%] index_copy_ strided 7 : Elapsed 0.018 ms (1.787 ms / 100) 1.813 -> 1.817 ( +0.22%) [ +0.11% +0.22% +0.00% / +0.22% +0.77% +0.66%] index_add_ perm : Elapsed 0.018 ms (1.815 ms / 100) 1.770 -> 1.774 ( +0.23%) [ +0.00% +0.28% +0.28% / +0.23% +0.68% +1.19%] index_copy_ perm : Elapsed 0.018 ms (1.770 ms / 100) 1.816 -> 1.816 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.44% +0.44%] index_add_ perm_sorted : Elapsed 0.018 ms (1.817 ms / 100) 1.770 -> 1.778 ( +0.45%) [ +0.23% +0.00% +0.34% / +0.45% +0.79% +0.73%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.774 ms / 100) 8.506 -> 8.533 ( +0.32%) [ +0.26% +0.16% +0.00% / +0.32% +0.33% +0.43%] index_select const : Elapsed 0.085 ms (8.528 ms / 100) 8.552 -> 8.552 ( +0.00%) [ +0.00% +0.08% +0.09% / +0.02% +0.26% +0.00%] index_select wrap : Elapsed 0.086 ms (8.552 ms / 100) 8.546 -> 8.549 ( +0.04%) [ +0.06% +0.00% +0.01% / +0.04% +0.11% +0.23%] index_select linear : Elapsed 0.086 ms (8.551 ms / 100) 8.520 -> 8.545 ( +0.29%) [ +0.12% +0.00% +0.06% / +0.34% +0.38% +0.29%] index_select reverse : Elapsed 0.085 ms (8.530 ms / 100) 8.509 -> 8.515 ( +0.07%) [ +0.00% +0.11% +0.06% / +0.19% +0.45% +0.07%] index_select skip64 : Elapsed 0.085 ms (8.509 ms / 100) 8.515 -> 8.533 ( +0.21%) [ +0.18% +0.00% +0.22% / +0.21% +0.33% +0.39%] index_select skip256 : Elapsed 0.085 ms (8.530 ms / 100) 8.534 -> 8.542 ( +0.09%) [ +0.13% +0.15% +0.00% / +0.09% +0.09% +0.28%] index_select spread : Elapsed 0.085 ms (8.545 ms / 100) 8.547 -> 8.557 ( +0.12%) [ +0.20% +0.00% +0.27% / +0.12% +0.29% +0.53%] index_select strided 3 : Elapsed 0.086 ms (8.564 ms / 100) 8.557 -> 8.558 ( +0.01%) [ +0.04% +0.00% +0.14% / +0.18% +0.09% +0.01%] index_select random : Elapsed 0.086 ms (8.560 ms / 100) 8.541 -> 8.539 ( -0.02%) [ +0.00% +0.15% +0.14% / -0.02% +0.19% +0.08%] index_select random_sorted : Elapsed 0.085 ms (8.541 ms / 100) B = [20, 4, 40, 16] (stride (1, 12800, 20, 800)) A = [5, 4, 40, 16] (stride (16, 3200, 80, 1)) dim = 0 1.845 -> 1.842 ( -0.16%) [ +0.11% +0.00% +0.16% / +0.05% -0.16% +0.22%] index_add_ linear : Elapsed 0.018 ms (1.847 ms / 100) 1.813 -> 1.812 ( -0.06%) [ +0.11% +0.00% +0.33% / -0.06% +0.00% +0.11%] index_copy_ linear : Elapsed 0.018 ms (1.815 ms / 100) 1.848 -> 1.845 ( -0.16%) [ +0.32% +0.00% +0.11% / +0.11% -0.16% -0.11%] index_add_ reverse : Elapsed 0.019 ms (1.854 ms / 100) 1.813 -> 1.812 ( -0.06%) [ +0.17% +0.06% +0.00% / +0.00% +0.06% -0.06%] index_copy_ reverse : Elapsed 0.018 ms (1.816 ms / 100) 1.867 -> 1.860 ( -0.37%) [ +0.00% +0.16% +0.27% / -0.05% -0.37% -0.16%] index_add_ spread : Elapsed 0.019 ms (1.867 ms / 100) 1.839 -> 1.839 ( +0.00%) [ +0.00% +0.38% +0.44% / +0.05% +0.05% +0.00%] index_copy_ spread : Elapsed 0.018 ms (1.839 ms / 100) 1.864 -> 1.860 ( -0.21%) [ +0.27% +0.00% +0.16% / +0.27% -0.11% -0.21%] index_add_ strided 3 : Elapsed 0.019 ms (1.869 ms / 100) 1.839 -> 1.836 ( -0.16%) [ +0.22% +0.00% +0.00% / +0.16% -0.16% -0.16%] index_copy_ strided 3 : Elapsed 0.018 ms (1.843 ms / 100) 1.865 -> 1.860 ( -0.27%) [ +0.11% +0.05% +0.00% / +0.27% -0.11% -0.27%] index_add_ strided 7 : Elapsed 0.019 ms (1.867 ms / 100) 1.840 -> 1.835 ( -0.27%) [ +0.11% +0.16% +0.00% / +0.05% -0.27% -0.27%] index_copy_ strided 7 : Elapsed 0.018 ms (1.842 ms / 100) 1.859 -> 1.856 ( -0.16%) [ +0.11% +0.22% +0.00% / +0.00% +0.00% -0.16%] index_add_ perm : Elapsed 0.019 ms (1.861 ms / 100) 1.831 -> 1.827 ( -0.22%) [ +0.00% +0.11% +0.00% / -0.05% -0.22% -0.22%] index_copy_ perm : Elapsed 0.018 ms (1.831 ms / 100) 1.858 -> 1.853 ( -0.27%) [ +0.43% +0.00% +0.16% / +0.11% -0.11% -0.27%] index_add_ perm_sorted : Elapsed 0.019 ms (1.866 ms / 100) 1.826 -> 1.823 ( -0.16%) [ +0.27% +0.27% +0.00% / +0.22% +0.05% -0.16%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.831 ms / 100) 8.281 -> 8.285 ( +0.05%) [ +0.04% +0.02% +0.00% / +0.05% +0.27% +0.45%] index_select const : Elapsed 0.083 ms (8.284 ms / 100) 8.312 -> 8.330 ( +0.22%) [ +0.05% +0.00% +0.12% / +0.22% +0.67% +0.66%] index_select wrap : Elapsed 0.083 ms (8.316 ms / 100) 8.303 -> 8.300 ( -0.04%) [ +0.26% +0.00% +0.13% / -0.04% +0.49% +0.41%] index_select linear : Elapsed 0.083 ms (8.325 ms / 100) 8.307 -> 8.308 ( +0.01%) [ +0.00% +0.16% +0.06% / +0.01% +0.72% +0.53%] index_select reverse : Elapsed 0.083 ms (8.307 ms / 100) 8.266 -> 8.291 ( +0.30%) [ +0.11% +0.04% +0.00% / +0.30% +0.39% +0.58%] index_select skip64 : Elapsed 0.083 ms (8.275 ms / 100) 8.276 -> 8.264 ( -0.14%) [ +0.05% +0.00% +0.06% / -0.14% +0.50% +0.45%] index_select skip256 : Elapsed 0.083 ms (8.280 ms / 100) 8.301 -> 8.288 ( -0.16%) [ +0.29% +0.29% +0.00% / -0.16% +0.35% +0.61%] index_select spread : Elapsed 0.083 ms (8.325 ms / 100) 8.314 -> 8.338 ( +0.29%) [ +0.20% +0.00% +0.20% / +0.38% +0.29% +0.46%] index_select strided 3 : Elapsed 0.083 ms (8.331 ms / 100) 8.316 -> 8.318 ( +0.02%) [ +0.14% +0.10% +0.00% / +0.02% +0.48% +0.25%] index_select random : Elapsed 0.083 ms (8.328 ms / 100) 8.295 -> 8.289 ( -0.07%) [ +0.00% +0.23% +0.24% / -0.07% +0.74% +0.49%] index_select random_sorted : Elapsed 0.083 ms (8.295 ms / 100) B = [20, 4, 40, 16] (stride (1, 12800, 20, 800)) A = [5, 4, 40, 16] (stride (1, 3200, 80, 5)) dim = 0 1.701 -> 1.688 ( -0.76%) [ +0.00% +0.00% +0.00% / +0.06% -0.76% -0.53%] index_add_ linear : Elapsed 0.017 ms (1.701 ms / 100) 1.663 -> 1.654 ( -0.54%) [ +0.00% +0.24% +0.12% / +0.24% -0.30% -0.54%] index_copy_ linear : Elapsed 0.017 ms (1.663 ms / 100) 1.700 -> 1.685 ( -0.88%) [ +0.00% +0.06% +0.06% / -0.24% -0.88% -0.47%] index_add_ reverse : Elapsed 0.017 ms (1.700 ms / 100) 1.662 -> 1.650 ( -0.72%) [ +0.12% +0.00% +0.06% / +0.36% -0.60% -0.72%] index_copy_ reverse : Elapsed 0.017 ms (1.664 ms / 100) 1.717 -> 1.704 ( -0.76%) [ +0.00% +0.17% +0.17% / -0.17% -0.47% -0.76%] index_add_ spread : Elapsed 0.017 ms (1.717 ms / 100) 1.693 -> 1.674 ( -1.12%) [ +0.00% +0.06% +0.00% / +0.12% -1.12% -1.12%] index_copy_ spread : Elapsed 0.017 ms (1.693 ms / 100) 1.718 -> 1.708 ( -0.58%) [ +0.00% +0.17% +0.17% / +0.12% -0.58% -0.58%] index_add_ strided 3 : Elapsed 0.017 ms (1.718 ms / 100) 1.689 -> 1.672 ( -1.01%) [ +0.06% +0.12% +0.00% / +0.00% -0.41% -1.01%] index_copy_ strided 3 : Elapsed 0.017 ms (1.690 ms / 100) 1.714 -> 1.702 ( -0.70%) [ +0.35% +0.35% +0.00% / +0.18% -0.35% -0.70%] index_add_ strided 7 : Elapsed 0.017 ms (1.720 ms / 100) 1.691 -> 1.681 ( -0.59%) [ +0.06% +0.47% +0.00% / +0.24% -0.59% -0.53%] index_copy_ strided 7 : Elapsed 0.017 ms (1.692 ms / 100) 1.711 -> 1.709 ( -0.12%) [ +0.18% +0.41% +0.00% / +0.35% -0.12% +0.12%] index_add_ perm : Elapsed 0.017 ms (1.714 ms / 100) 1.687 -> 1.682 ( -0.30%) [ +0.30% +0.06% +0.00% / +0.41% -0.30% +0.00%] index_copy_ perm : Elapsed 0.017 ms (1.692 ms / 100) 1.714 -> 1.705 ( -0.53%) [ +0.18% +0.00% +0.35% / +0.00% -0.53% -0.41%] index_add_ perm_sorted : Elapsed 0.017 ms (1.717 ms / 100) 1.693 -> 1.677 ( -0.95%) [ +0.00% +0.00% +0.00% / -0.30% -0.53% -0.95%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.693 ms / 100) 7.961 -> 7.961 ( +0.00%) [ +0.08% +0.05% +0.00% / +0.00% +0.46% +0.54%] index_select const : Elapsed 0.080 ms (7.967 ms / 100) 7.965 -> 7.963 ( -0.03%) [ +0.21% +0.00% +0.00% / -0.03% +0.51% +0.43%] index_select wrap : Elapsed 0.080 ms (7.982 ms / 100) 7.964 -> 7.970 ( +0.08%) [ +0.00% +0.15% +0.19% / +0.08% +0.29% +0.50%] index_select linear : Elapsed 0.080 ms (7.964 ms / 100) 7.960 -> 7.952 ( -0.10%) [ +0.15% +0.00% +0.13% / -0.10% +0.53% +0.44%] index_select reverse : Elapsed 0.080 ms (7.972 ms / 100) 7.964 -> 7.960 ( -0.05%) [ +0.00% +0.16% +0.25% / -0.05% +0.58% +0.51%] index_select skip64 : Elapsed 0.080 ms (7.964 ms / 100) 7.968 -> 7.974 ( +0.08%) [ +0.00% +0.03% +0.39% / +0.08% +0.35% +0.33%] index_select skip256 : Elapsed 0.080 ms (7.968 ms / 100) 7.949 -> 7.963 ( +0.18%) [ +0.00% +0.18% +0.15% / +0.18% +0.57% +0.69%] index_select spread : Elapsed 0.079 ms (7.949 ms / 100) 7.964 -> 7.968 ( +0.05%) [ +0.18% +0.00% +0.18% / +0.05% +0.31% +0.68%] index_select strided 3 : Elapsed 0.080 ms (7.978 ms / 100) 7.967 -> 7.964 ( -0.04%) [ +0.04% +0.03% +0.00% / -0.04% +0.26% +0.55%] index_select random : Elapsed 0.080 ms (7.970 ms / 100) 7.968 -> 7.972 ( +0.05%) [ +0.18% +0.00% +0.09% / +0.05% +0.43% +0.65%] index_select random_sorted : Elapsed 0.080 ms (7.982 ms / 100) B = [20, 4, 40, 16] (stride (64, 1, 1280, 4)) A = [5, 4, 40, 16] (stride (640, 3200, 1, 40)) dim = 0 1.867 -> 1.867 ( +0.00%) [ +0.00% +0.16% +0.05% / +0.00% +0.75% +0.70%] index_add_ linear : Elapsed 0.019 ms (1.867 ms / 100) 1.820 -> 1.823 ( +0.16%) [ +0.00% +0.05% +0.05% / +0.16% +0.66% +0.82%] index_copy_ linear : Elapsed 0.018 ms (1.820 ms / 100) 1.867 -> 1.867 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.00% +0.64% +0.80%] index_add_ reverse : Elapsed 0.019 ms (1.869 ms / 100) 1.820 -> 1.822 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +1.10% +0.93%] index_copy_ reverse : Elapsed 0.018 ms (1.820 ms / 100) 1.878 -> 1.877 ( -0.05%) [ +0.05% +0.00% +0.21% / -0.05% +0.37% +0.32%] index_add_ spread : Elapsed 0.019 ms (1.879 ms / 100) 1.826 -> 1.828 ( +0.11%) [ +0.16% +0.00% +0.33% / +0.11% +0.49% +0.49%] index_copy_ spread : Elapsed 0.018 ms (1.829 ms / 100) 1.873 -> 1.875 ( +0.11%) [ +0.00% +0.05% +0.16% / +0.11% +0.53% +0.85%] index_add_ strided 3 : Elapsed 0.019 ms (1.873 ms / 100) 1.823 -> 1.824 ( +0.05%) [ +0.00% +0.00% +0.11% / +0.05% +0.60% +0.66%] index_copy_ strided 3 : Elapsed 0.018 ms (1.823 ms / 100) 1.874 -> 1.876 ( +0.11%) [ +0.00% +0.05% +0.21% / +0.11% +0.64% +0.48%] index_add_ strided 7 : Elapsed 0.019 ms (1.874 ms / 100) 1.826 -> 1.827 ( +0.05%) [ +0.00% +0.16% +0.16% / +0.05% +0.55% +0.38%] index_copy_ strided 7 : Elapsed 0.018 ms (1.826 ms / 100) 1.871 -> 1.871 ( +0.00%) [ +0.11% +0.16% +0.00% / +0.00% +0.91% +0.75%] index_add_ perm : Elapsed 0.019 ms (1.873 ms / 100) 1.822 -> 1.826 ( +0.22%) [ +0.00% +0.22% +0.11% / +0.22% +0.88% +0.77%] index_copy_ perm : Elapsed 0.018 ms (1.822 ms / 100) 1.872 -> 1.872 ( +0.00%) [ +0.05% +0.21% +0.00% / +0.00% +0.75% +0.91%] index_add_ perm_sorted : Elapsed 0.019 ms (1.873 ms / 100) 1.824 -> 1.827 ( +0.16%) [ +0.27% +0.00% +0.05% / +0.16% +0.77% +0.88%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.829 ms / 100) 8.520 -> 8.528 ( +0.09%) [ +0.00% +0.04% +0.20% / +0.26% +0.13% +0.09%] index_select const : Elapsed 0.085 ms (8.520 ms / 100) 8.547 -> 8.530 ( -0.20%) [ +0.00% +0.30% +0.05% / -0.20% +0.23% +0.27%] index_select wrap : Elapsed 0.085 ms (8.547 ms / 100) 8.534 -> 8.549 ( +0.18%) [ +0.35% +0.27% +0.00% / +0.18% +0.55% +0.30%] index_select linear : Elapsed 0.086 ms (8.564 ms / 100) 8.535 -> 8.542 ( +0.08%) [ +0.05% +0.01% +0.00% / +0.08% +0.27% +0.18%] index_select reverse : Elapsed 0.085 ms (8.539 ms / 100) 8.521 -> 8.536 ( +0.18%) [ +0.11% +0.11% +0.00% / +0.18% +0.34% +0.36%] index_select skip64 : Elapsed 0.085 ms (8.530 ms / 100) 8.516 -> 8.520 ( +0.05%) [ +0.22% +0.00% +0.04% / +0.05% +0.26% +0.46%] index_select skip256 : Elapsed 0.085 ms (8.535 ms / 100) 8.530 -> 8.543 ( +0.15%) [ +0.19% +0.00% +0.22% / +0.15% +0.41% +0.45%] index_select spread : Elapsed 0.085 ms (8.546 ms / 100) 8.538 -> 8.533 ( -0.06%) [ +0.35% +0.00% +0.11% / -0.06% +0.36% +0.41%] index_select strided 3 : Elapsed 0.086 ms (8.568 ms / 100) 8.547 -> 8.570 ( +0.27%) [ +0.00% +0.02% +0.11% / +0.27% +0.44% +0.40%] index_select random : Elapsed 0.085 ms (8.547 ms / 100) 8.548 -> 8.550 ( +0.02%) [ +0.16% +0.00% +0.07% / +0.02% +0.25% +0.36%] index_select random_sorted : Elapsed 0.086 ms (8.562 ms / 100) B = [20, 4, 40, 16] (stride (64, 1, 1280, 4)) A = [5, 4, 40, 16] (stride (1, 3200, 80, 5)) dim = 0 1.615 -> 1.611 ( -0.25%) [ +0.00% +0.06% +0.19% / +0.25% -0.06% -0.25%] index_add_ linear : Elapsed 0.016 ms (1.615 ms / 100) 1.571 -> 1.570 ( -0.06%) [ +0.45% +0.06% +0.00% / -0.06% +0.13% +0.13%] index_copy_ linear : Elapsed 0.016 ms (1.578 ms / 100) 1.617 -> 1.614 ( -0.19%) [ +0.19% +0.00% +0.12% / +0.25% -0.19% +0.00%] index_add_ reverse : Elapsed 0.016 ms (1.620 ms / 100) 1.572 -> 1.574 ( +0.13%) [ +0.13% +0.06% +0.00% / +0.32% +0.13% +0.32%] index_copy_ reverse : Elapsed 0.016 ms (1.574 ms / 100) 1.616 -> 1.615 ( -0.06%) [ +0.00% +0.19% +0.06% / +0.00% -0.06% +0.25%] index_add_ spread : Elapsed 0.016 ms (1.616 ms / 100) 1.573 -> 1.571 ( -0.13%) [ +0.00% +0.00% +0.13% / -0.13% +0.25% +0.25%] index_copy_ spread : Elapsed 0.016 ms (1.573 ms / 100) 1.615 -> 1.614 ( -0.06%) [ +0.19% +0.12% +0.00% / -0.06% +0.19% +0.31%] index_add_ strided 3 : Elapsed 0.016 ms (1.618 ms / 100) 1.569 -> 1.571 ( +0.13%) [ +0.51% +0.06% +0.00% / +0.13% +0.32% +0.70%] index_copy_ strided 3 : Elapsed 0.016 ms (1.577 ms / 100) 1.614 -> 1.615 ( +0.06%) [ +0.00% +0.25% +0.00% / +0.06% +0.43% +0.37%] index_add_ strided 7 : Elapsed 0.016 ms (1.614 ms / 100) 1.568 -> 1.574 ( +0.38%) [ +0.13% +0.06% +0.00% / +0.38% +0.45% +0.70%] index_copy_ strided 7 : Elapsed 0.016 ms (1.570 ms / 100) 1.616 -> 1.615 ( -0.06%) [ +0.19% +0.19% +0.00% / +0.00% -0.06% +0.06%] index_add_ perm : Elapsed 0.016 ms (1.619 ms / 100) 1.574 -> 1.571 ( -0.19%) [ +0.06% +0.13% +0.00% / -0.19% -0.19% +0.00%] index_copy_ perm : Elapsed 0.016 ms (1.575 ms / 100) 1.616 -> 1.617 ( +0.06%) [ +0.19% +0.12% +0.00% / +0.12% +0.06% +0.12%] index_add_ perm_sorted : Elapsed 0.016 ms (1.619 ms / 100) 1.571 -> 1.574 ( +0.19%) [ +0.06% +0.13% +0.00% / +0.32% +0.19% +0.25%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.572 ms / 100) 7.886 -> 7.896 ( +0.13%) [ +0.13% +0.00% +0.06% / +0.13% +0.44% +0.58%] index_select const : Elapsed 0.079 ms (7.896 ms / 100) 7.882 -> 7.890 ( +0.10%) [ +0.25% +0.13% +0.00% / +0.10% +0.61% +0.49%] index_select wrap : Elapsed 0.079 ms (7.902 ms / 100) 7.887 -> 7.898 ( +0.14%) [ +0.10% +0.00% +0.06% / +0.14% +0.47% +0.37%] index_select linear : Elapsed 0.079 ms (7.895 ms / 100) 7.878 -> 7.896 ( +0.23%) [ +0.23% +0.00% +0.01% / +0.23% +0.61% +0.50%] index_select reverse : Elapsed 0.079 ms (7.896 ms / 100) 7.878 -> 7.889 ( +0.14%) [ +0.25% +0.00% +0.11% / +0.14% +0.47% +0.56%] index_select skip64 : Elapsed 0.079 ms (7.898 ms / 100) 7.886 -> 7.891 ( +0.06%) [ +0.04% +0.13% +0.00% / +0.06% +0.53% +0.36%] index_select skip256 : Elapsed 0.079 ms (7.889 ms / 100) 7.877 -> 7.874 ( -0.04%) [ +0.00% +0.10% +0.05% / -0.04% +0.33% +0.34%] index_select spread : Elapsed 0.079 ms (7.877 ms / 100) 7.890 -> 7.874 ( -0.20%) [ +0.01% +0.00% +0.05% / -0.20% +0.20% +0.41%] index_select strided 3 : Elapsed 0.079 ms (7.891 ms / 100) 7.881 -> 7.889 ( +0.10%) [ +0.16% +0.22% +0.00% / +0.10% +0.55% +0.41%] index_select random : Elapsed 0.079 ms (7.894 ms / 100) 7.884 -> 7.895 ( +0.14%) [ +0.00% +0.13% +0.03% / +0.14% +0.43% +0.43%] index_select random_sorted : Elapsed 0.079 ms (7.884 ms / 100) B = [20, 4, 40, 16] (stride (16, 320, 1280, 1)) A = [5, 4, 40, 16] (stride (2560, 40, 1, 160)) dim = 0 1.795 -> 1.801 ( +0.33%) [ +0.00% +0.22% +0.17% / +0.33% +0.67% +0.72%] index_add_ linear : Elapsed 0.018 ms (1.795 ms / 100) 1.745 -> 1.749 ( +0.23%) [ +0.23% +0.29% +0.00% / +0.23% +0.74% +0.86%] index_copy_ linear : Elapsed 0.017 ms (1.749 ms / 100) 1.795 -> 1.796 ( +0.06%) [ +0.00% +0.00% +0.17% / +0.06% +0.84% +0.72%] index_add_ reverse : Elapsed 0.018 ms (1.795 ms / 100) 1.750 -> 1.749 ( -0.06%) [ +0.17% +0.11% +0.00% / -0.06% +0.51% +0.63%] index_copy_ reverse : Elapsed 0.018 ms (1.753 ms / 100) 1.809 -> 1.812 ( +0.17%) [ +0.22% +0.22% +0.00% / +0.17% +0.44% +0.61%] index_add_ spread : Elapsed 0.018 ms (1.813 ms / 100) 1.764 -> 1.765 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.34% +0.45%] index_copy_ spread : Elapsed 0.018 ms (1.764 ms / 100) 1.806 -> 1.810 ( +0.22%) [ +0.06% +0.00% +0.11% / +0.22% +0.72% +0.94%] index_add_ strided 3 : Elapsed 0.018 ms (1.807 ms / 100) 1.762 -> 1.762 ( +0.00%) [ +0.00% +0.11% +0.23% / +0.00% +0.45% +0.68%] index_copy_ strided 3 : Elapsed 0.018 ms (1.762 ms / 100) 1.800 -> 1.802 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +0.61% +0.83%] index_add_ strided 7 : Elapsed 0.018 ms (1.802 ms / 100) 1.756 -> 1.756 ( +0.00%) [ +0.06% +0.17% +0.00% / +0.00% +0.51% +0.63%] index_copy_ strided 7 : Elapsed 0.018 ms (1.757 ms / 100) 1.802 -> 1.802 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.61% +0.67%] index_add_ perm : Elapsed 0.018 ms (1.803 ms / 100) 1.755 -> 1.756 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.40% +0.68%] index_copy_ perm : Elapsed 0.018 ms (1.756 ms / 100) 1.808 -> 1.807 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.44% +0.61%] index_add_ perm_sorted : Elapsed 0.018 ms (1.809 ms / 100) 1.758 -> 1.760 ( +0.11%) [ +0.00% +0.06% +0.28% / +0.11% +0.91% +0.80%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.758 ms / 100) 8.209 -> 8.209 ( +0.00%) [ +0.00% +0.24% +0.10% / +0.00% +0.22% +0.09%] index_select const : Elapsed 0.082 ms (8.209 ms / 100) 8.269 -> 8.249 ( -0.24%) [ +0.02% +0.05% +0.00% / -0.18% -0.24% -0.13%] index_select wrap : Elapsed 0.083 ms (8.271 ms / 100) 8.251 -> 8.248 ( -0.04%) [ +0.00% +0.23% +0.18% / +0.29% +0.33% -0.04%] index_select linear : Elapsed 0.083 ms (8.251 ms / 100) 8.236 -> 8.250 ( +0.17%) [ +0.00% +0.13% +0.04% / +0.18% +0.24% +0.17%] index_select reverse : Elapsed 0.082 ms (8.236 ms / 100) 8.212 -> 8.214 ( +0.02%) [ +0.13% +0.11% +0.00% / +0.02% +0.39% +0.26%] index_select skip64 : Elapsed 0.082 ms (8.223 ms / 100) 8.201 -> 8.220 ( +0.23%) [ +0.00% +0.20% +0.43% / +0.23% +0.35% +0.26%] index_select skip256 : Elapsed 0.082 ms (8.201 ms / 100) 8.248 -> 8.248 ( +0.00%) [ +0.22% +0.07% +0.00% / +0.32% +0.21% +0.00%] index_select spread : Elapsed 0.083 ms (8.266 ms / 100) 8.256 -> 8.253 ( -0.04%) [ +0.23% +0.00% +0.25% / +0.06% +0.04% -0.04%] index_select strided 3 : Elapsed 0.083 ms (8.275 ms / 100) 8.265 -> 8.256 ( -0.11%) [ +0.18% +0.00% +0.00% / +0.23% -0.11% -0.06%] index_select random : Elapsed 0.083 ms (8.280 ms / 100) 8.259 -> 8.253 ( -0.07%) [ +0.13% +0.21% +0.00% / -0.07% +0.06% -0.01%] index_select random_sorted : Elapsed 0.083 ms (8.270 ms / 100) B = [20, 4, 40, 16] (stride (1, 320, 1280, 20)) A = [5, 4, 40, 16] (stride (40, 200, 1, 800)) dim = 0 1.955 -> 1.937 ( -0.92%) [ +0.15% +0.15% +0.00% / +0.05% -0.92% -0.92%] index_add_ linear : Elapsed 0.020 ms (1.958 ms / 100) 1.915 -> 1.898 ( -0.89%) [ +0.05% +0.16% +0.00% / +0.10% -0.84% -0.89%] index_copy_ linear : Elapsed 0.019 ms (1.916 ms / 100) 1.946 -> 1.928 ( -0.92%) [ +0.21% +0.26% +0.00% / +0.26% -0.72% -0.92%] index_add_ reverse : Elapsed 0.020 ms (1.950 ms / 100) 1.903 -> 1.899 ( -0.21%) [ +0.00% +0.32% +0.32% / +0.21% -0.21% -0.16%] index_copy_ reverse : Elapsed 0.019 ms (1.903 ms / 100) 1.971 -> 1.949 ( -1.12%) [ +0.05% +0.25% +0.00% / +0.05% -1.12% -0.91%] index_add_ spread : Elapsed 0.020 ms (1.972 ms / 100) 1.946 -> 1.916 ( -1.54%) [ +0.15% +0.00% +0.15% / +0.31% -1.23% -1.54%] index_copy_ spread : Elapsed 0.019 ms (1.949 ms / 100) 1.973 -> 1.951 ( -1.12%) [ +0.15% +0.00% +0.00% / -0.05% -1.12% -1.01%] index_add_ strided 3 : Elapsed 0.020 ms (1.976 ms / 100) 1.945 -> 1.917 ( -1.44%) [ +0.00% +0.00% +0.00% / +0.21% -1.23% -1.44%] index_copy_ strided 3 : Elapsed 0.019 ms (1.945 ms / 100) 1.963 -> 1.942 ( -1.07%) [ +0.31% +0.00% +0.15% / +0.31% -1.07% -1.02%] index_add_ strided 7 : Elapsed 0.020 ms (1.969 ms / 100) 1.937 -> 1.912 ( -1.29%) [ +0.21% +0.15% +0.00% / +0.10% -1.19% -1.29%] index_copy_ strided 7 : Elapsed 0.019 ms (1.941 ms / 100) 1.956 -> 1.939 ( -0.87%) [ +0.20% +0.00% +0.10% / +0.26% -0.77% -0.87%] index_add_ perm : Elapsed 0.020 ms (1.960 ms / 100) 1.927 -> 1.903 ( -1.25%) [ +0.05% +0.21% +0.00% / +0.00% -1.25% -1.19%] index_copy_ perm : Elapsed 0.019 ms (1.928 ms / 100) 1.965 -> 1.943 ( -1.12%) [ +0.31% +0.15% +0.00% / -0.05% -0.97% -1.12%] index_add_ perm_sorted : Elapsed 0.020 ms (1.971 ms / 100) 1.933 -> 1.907 ( -1.35%) [ +0.10% +0.00% +0.10% / -0.05% -1.24% -1.35%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.935 ms / 100) 8.591 -> 8.588 ( -0.03%) [ +0.00% +0.06% +0.05% / -0.03% +0.27% +0.51%] index_select const : Elapsed 0.086 ms (8.591 ms / 100) 8.643 -> 8.651 ( +0.09%) [ +0.00% +0.10% +0.17% / +0.09% +0.17% +0.17%] index_select wrap : Elapsed 0.086 ms (8.643 ms / 100) 8.632 -> 8.631 ( -0.01%) [ +0.06% +0.00% +0.03% / -0.01% +0.37% +0.30%] index_select linear : Elapsed 0.086 ms (8.637 ms / 100) 8.635 -> 8.637 ( +0.02%) [ +0.01% +0.10% +0.00% / +0.02% +0.31% +0.31%] index_select reverse : Elapsed 0.086 ms (8.636 ms / 100) 8.584 -> 8.593 ( +0.10%) [ +0.00% +0.20% +0.01% / +0.10% +0.34% +0.36%] index_select skip64 : Elapsed 0.086 ms (8.584 ms / 100) 8.615 -> 8.592 ( -0.27%) [ +0.00% +0.07% +0.07% / -0.27% +0.19% +0.15%] index_select skip256 : Elapsed 0.086 ms (8.615 ms / 100) 8.641 -> 8.660 ( +0.22%) [ +0.00% +0.16% +0.14% / +0.22% +0.35% +0.36%] index_select spread : Elapsed 0.086 ms (8.641 ms / 100) 8.636 -> 8.652 ( +0.19%) [ +0.00% +0.06% +0.10% / +0.19% +0.44% +0.24%] index_select strided 3 : Elapsed 0.086 ms (8.636 ms / 100) 8.629 -> 8.646 ( +0.20%) [ +0.19% +0.00% +0.06% / +0.20% +0.25% +0.44%] index_select random : Elapsed 0.086 ms (8.645 ms / 100) 8.639 -> 8.656 ( +0.20%) [ +0.00% +0.16% +0.23% / +0.20% +0.29% +0.53%] index_select random_sorted : Elapsed 0.086 ms (8.639 ms / 100) B = [20, 4, 40, 16] (stride (160, 1, 4, 3200)) A = [5, 4, 40, 16] (stride (640, 3200, 1, 40)) dim = 0 1.848 -> 1.852 ( +0.22%) [ +0.11% +0.05% +0.00% / +0.22% +1.84% +1.84%] index_add_ linear : Elapsed 0.018 ms (1.850 ms / 100) 1.801 -> 1.815 ( +0.78%) [ +0.28% +0.00% +0.78% / +0.78% +2.61% +2.67%] index_copy_ linear : Elapsed 0.018 ms (1.806 ms / 100) 1.844 -> 1.851 ( +0.38%) [ +0.16% +0.00% +0.38% / +0.38% +2.17% +2.33%] index_add_ reverse : Elapsed 0.018 ms (1.847 ms / 100) 1.807 -> 1.816 ( +0.50%) [ +0.00% +0.11% +0.44% / +0.50% +2.38% +2.43%] index_copy_ reverse : Elapsed 0.018 ms (1.807 ms / 100) 1.875 -> 1.875 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.69% +0.64%] index_add_ spread : Elapsed 0.019 ms (1.878 ms / 100) 1.848 -> 1.843 ( -0.27%) [ +0.00% +0.11% +0.00% / -0.27% +0.38% +0.65%] index_copy_ spread : Elapsed 0.018 ms (1.848 ms / 100) 1.866 -> 1.868 ( +0.11%) [ +0.21% +0.00% +0.11% / +0.11% +0.75% +0.86%] index_add_ strided 3 : Elapsed 0.019 ms (1.870 ms / 100) 1.835 -> 1.833 ( -0.11%) [ +0.05% +0.16% +0.00% / -0.11% +0.60% +0.60%] index_copy_ strided 3 : Elapsed 0.018 ms (1.836 ms / 100) 1.870 -> 1.869 ( -0.05%) [ +0.00% +0.21% +0.11% / -0.05% +0.86% +0.80%] index_add_ strided 7 : Elapsed 0.019 ms (1.870 ms / 100) 1.834 -> 1.832 ( -0.11%) [ +0.00% +0.00% +0.11% / -0.11% +0.93% +0.87%] index_copy_ strided 7 : Elapsed 0.018 ms (1.834 ms / 100) 1.864 -> 1.868 ( +0.21%) [ +0.21% +0.21% +0.00% / +0.21% +1.39% +1.50%] index_add_ perm : Elapsed 0.019 ms (1.868 ms / 100) 1.834 -> 1.837 ( +0.16%) [ +0.22% +0.11% +0.00% / +0.16% +1.31% +1.36%] index_copy_ perm : Elapsed 0.018 ms (1.838 ms / 100) 1.862 -> 1.865 ( +0.16%) [ +0.11% +0.05% +0.00% / +0.16% +1.29% +1.34%] index_add_ perm_sorted : Elapsed 0.019 ms (1.864 ms / 100) 1.835 -> 1.836 ( +0.05%) [ +0.00% +0.00% +0.11% / +0.05% +1.09% +1.04%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.835 ms / 100) 8.526 -> 8.528 ( +0.02%) [ +0.00% +0.11% +0.25% / +0.02% +0.40% +0.35%] index_select const : Elapsed 0.085 ms (8.526 ms / 100) 8.555 -> 8.564 ( +0.11%) [ +0.00% +0.16% +0.35% / +0.11% +0.26% +0.28%] index_select wrap : Elapsed 0.086 ms (8.555 ms / 100) 8.552 -> 8.551 ( -0.01%) [ +0.00% +0.05% +0.18% / -0.01% +0.37% +0.35%] index_select linear : Elapsed 0.086 ms (8.552 ms / 100) 8.549 -> 8.554 ( +0.06%) [ +0.21% +0.00% +0.14% / +0.18% +0.06% +0.07%] index_select reverse : Elapsed 0.086 ms (8.567 ms / 100) 8.523 -> 8.530 ( +0.08%) [ +0.22% +0.00% +0.22% / +0.08% +0.31% +0.48%] index_select skip64 : Elapsed 0.085 ms (8.542 ms / 100) 8.514 -> 8.527 ( +0.15%) [ +0.21% +0.12% +0.00% / +0.15% +0.72% +0.42%] index_select skip256 : Elapsed 0.085 ms (8.532 ms / 100) 8.541 -> 8.552 ( +0.13%) [ +0.00% +0.27% +0.27% / +0.13% +0.39% +0.43%] index_select spread : Elapsed 0.085 ms (8.541 ms / 100) 8.571 -> 8.566 ( -0.06%) [ +0.01% +0.19% +0.00% / -0.06% +0.11% +0.15%] index_select strided 3 : Elapsed 0.086 ms (8.572 ms / 100) 8.564 -> 8.561 ( -0.04%) [ +0.02% +0.12% +0.00% / -0.04% +0.05% +0.23%] index_select random : Elapsed 0.086 ms (8.566 ms / 100) 8.550 -> 8.564 ( +0.16%) [ +0.07% +0.19% +0.00% / +0.16% +0.29% +0.27%] index_select random_sorted : Elapsed 0.086 ms (8.556 ms / 100) B = [20, 4, 40, 16] (stride (1, 800, 20, 3200)) A = [5, 4, 40, 16] (stride (1, 5, 20, 800)) dim = 0 1.902 -> 1.901 ( -0.05%) [ +0.00% +0.21% +0.16% / +0.11% -0.05% +0.11%] index_add_ linear : Elapsed 0.019 ms (1.902 ms / 100) 1.853 -> 1.853 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.05% +0.00%] index_copy_ linear : Elapsed 0.019 ms (1.853 ms / 100) 1.902 -> 1.900 ( -0.11%) [ +0.05% +0.00% +0.00% / +0.05% -0.11% +0.00%] index_add_ reverse : Elapsed 0.019 ms (1.903 ms / 100) 1.849 -> 1.846 ( -0.16%) [ +0.00% +0.05% +0.16% / -0.16% +0.16% +0.38%] index_copy_ reverse : Elapsed 0.018 ms (1.849 ms / 100) 1.919 -> 1.915 ( -0.21%) [ +0.00% +0.00% +0.00% / +0.00% -0.21% -0.10%] index_add_ spread : Elapsed 0.019 ms (1.919 ms / 100) 1.878 -> 1.877 ( -0.05%) [ +0.11% +0.00% +0.16% / +0.05% +0.00% -0.05%] index_copy_ spread : Elapsed 0.019 ms (1.880 ms / 100) 1.915 -> 1.916 ( +0.05%) [ +0.26% +0.00% +0.31% / +0.21% +0.26% +0.05%] index_add_ strided 3 : Elapsed 0.019 ms (1.920 ms / 100) 1.879 -> 1.874 ( -0.27%) [ +0.11% +0.00% +0.05% / -0.11% -0.27% +0.05%] index_copy_ strided 3 : Elapsed 0.019 ms (1.881 ms / 100) 1.912 -> 1.911 ( -0.05%) [ +0.26% +0.37% +0.00% / +0.05% -0.05% +0.00%] index_add_ strided 7 : Elapsed 0.019 ms (1.917 ms / 100) 1.874 -> 1.877 ( +0.16%) [ +0.21% +0.16% +0.00% / +0.37% +0.37% +0.16%] index_copy_ strided 7 : Elapsed 0.019 ms (1.878 ms / 100) 1.915 -> 1.915 ( +0.00%) [ +0.00% +0.26% +0.00% / +0.00% +0.10% +0.10%] index_add_ perm : Elapsed 0.019 ms (1.915 ms / 100) 1.878 -> 1.874 ( -0.21%) [ +0.00% +0.05% +0.00% / -0.21% +0.11% -0.05%] index_copy_ perm : Elapsed 0.019 ms (1.878 ms / 100) 1.914 -> 1.915 ( +0.05%) [ +0.16% +0.10% +0.00% / +0.05% +0.26% +0.21%] index_add_ perm_sorted : Elapsed 0.019 ms (1.917 ms / 100) 1.873 -> 1.876 ( +0.16%) [ +0.00% +0.32% +0.11% / +0.16% +0.27% +0.27%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.873 ms / 100) 8.296 -> 8.302 ( +0.07%) [ +0.00% +0.23% +0.11% / +0.24% +0.07% +0.45%] index_select const : Elapsed 0.083 ms (8.296 ms / 100) 8.298 -> 8.320 ( +0.27%) [ +0.11% +0.00% +0.10% / +0.28% +0.27% +0.34%] index_select wrap : Elapsed 0.083 ms (8.307 ms / 100) 8.306 -> 8.295 ( -0.13%) [ +0.04% +0.02% +0.00% / -0.13% +0.46% +0.49%] index_select linear : Elapsed 0.083 ms (8.309 ms / 100) 8.302 -> 8.318 ( +0.19%) [ +0.00% +0.11% +0.04% / +0.19% +0.26% +0.42%] index_select reverse : Elapsed 0.083 ms (8.302 ms / 100) 8.301 -> 8.301 ( +0.00%) [ +0.08% +0.00% +0.05% / +0.00% +0.48% +0.30%] index_select skip64 : Elapsed 0.083 ms (8.308 ms / 100) 8.307 -> 8.303 ( -0.05%) [ +0.00% +0.07% +0.16% / -0.05% +0.31% +0.51%] index_select skip256 : Elapsed 0.083 ms (8.307 ms / 100) 8.312 -> 8.319 ( +0.08%) [ +0.19% +0.06% +0.00% / +0.08% +0.16% +0.24%] index_select spread : Elapsed 0.083 ms (8.328 ms / 100) 8.301 -> 8.311 ( +0.12%) [ +0.14% +0.00% +0.25% / +0.12% +0.49% +0.42%] index_select strided 3 : Elapsed 0.083 ms (8.313 ms / 100) 8.291 -> 8.304 ( +0.16%) [ +0.00% +0.24% +0.28% / +0.16% +0.49% +0.47%] index_select random : Elapsed 0.083 ms (8.291 ms / 100) 8.296 -> 8.312 ( +0.19%) [ +0.11% +0.00% +0.10% / +0.19% +0.40% +0.42%] index_select random_sorted : Elapsed 0.083 ms (8.305 ms / 100) B = [20, 4, 40, 16] (stride (1, 20, 80, 3200)) A = [5, 4, 40, 16] (stride (2560, 16, 64, 1)) dim = 0 1.920 -> 1.915 ( -0.26%) [ +0.16% +0.21% +0.00% / +0.47% +0.10% -0.26%] index_add_ linear : Elapsed 0.019 ms (1.923 ms / 100) 1.891 -> 1.883 ( -0.42%) [ +0.16% +0.00% +0.16% / +0.16% -0.42% -0.32%] index_copy_ linear : Elapsed 0.019 ms (1.894 ms / 100) 1.926 -> 1.917 ( -0.47%) [ +0.00% +0.10% +0.16% / +0.00% -0.47% -0.47%] index_add_ reverse : Elapsed 0.019 ms (1.926 ms / 100) 1.891 -> 1.885 ( -0.32%) [ +0.26% +0.00% +0.11% / +0.05% -0.32% -0.21%] index_copy_ reverse : Elapsed 0.019 ms (1.896 ms / 100) 1.945 -> 1.930 ( -0.77%) [ +0.15% +0.31% +0.00% / +0.00% -0.77% -0.51%] index_add_ spread : Elapsed 0.019 ms (1.948 ms / 100) 1.910 -> 1.902 ( -0.42%) [ +0.42% +0.31% +0.00% / +0.79% -0.21% -0.42%] index_copy_ spread : Elapsed 0.019 ms (1.918 ms / 100) 1.950 -> 1.930 ( -1.03%) [ +0.21% +0.00% +0.00% / -0.10% -1.03% -0.62%] index_add_ strided 3 : Elapsed 0.020 ms (1.954 ms / 100) 1.911 -> 1.903 ( -0.42%) [ +0.26% +0.26% +0.00% / +0.37% -0.42% -0.37%] index_copy_ strided 3 : Elapsed 0.019 ms (1.916 ms / 100) 1.947 -> 1.929 ( -0.92%) [ +0.31% +0.00% +0.21% / +0.15% -0.77% -0.92%] index_add_ strided 7 : Elapsed 0.020 ms (1.953 ms / 100) 1.914 -> 1.904 ( -0.52%) [ +0.16% +0.16% +0.00% / -0.16% -0.26% -0.52%] index_copy_ strided 7 : Elapsed 0.019 ms (1.917 ms / 100) 1.942 -> 1.925 ( -0.88%) [ +0.26% +0.15% +0.00% / +0.05% -0.36% -0.88%] index_add_ perm : Elapsed 0.019 ms (1.947 ms / 100) 1.904 -> 1.896 ( -0.42%) [ +0.11% +0.00% +0.37% / +0.11% -0.37% -0.42%] index_copy_ perm : Elapsed 0.019 ms (1.906 ms / 100) 1.937 -> 1.928 ( -0.46%) [ +0.26% +0.21% +0.00% / +0.26% -0.10% -0.46%] index_add_ perm_sorted : Elapsed 0.019 ms (1.942 ms / 100) 1.909 -> 1.901 ( -0.42%) [ +0.05% +0.00% +0.00% / +0.00% -0.26% -0.42%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.910 ms / 100) 8.573 -> 8.586 ( +0.15%) [ +0.00% +0.07% +0.17% / +0.15% +0.42% +0.27%] index_select const : Elapsed 0.086 ms (8.573 ms / 100) 8.620 -> 8.619 ( -0.01%) [ +0.24% +0.00% +0.36% / -0.01% +0.20% +0.24%] index_select wrap : Elapsed 0.086 ms (8.641 ms / 100) 8.620 -> 8.605 ( -0.17%) [ +0.05% +0.00% +0.00% / -0.17% +0.14% -0.05%] index_select linear : Elapsed 0.086 ms (8.624 ms / 100) 8.616 -> 8.600 ( -0.19%) [ +0.01% +0.05% +0.00% / -0.19% +0.58% +0.45%] index_select reverse : Elapsed 0.086 ms (8.617 ms / 100) 8.573 -> 8.586 ( +0.15%) [ +0.08% +0.14% +0.00% / +0.15% +0.35% +0.38%] index_select skip64 : Elapsed 0.086 ms (8.580 ms / 100) 8.571 -> 8.579 ( +0.09%) [ +0.00% +0.08% +0.11% / +0.09% +0.46% +0.28%] index_select skip256 : Elapsed 0.086 ms (8.571 ms / 100) 8.602 -> 8.609 ( +0.08%) [ +0.23% +0.00% +0.08% / +0.08% +0.41% +0.24%] index_select spread : Elapsed 0.086 ms (8.622 ms / 100) 8.630 -> 8.613 ( -0.20%) [ +0.00% +0.01% +0.05% / -0.20% +0.03% +0.27%] index_select strided 3 : Elapsed 0.086 ms (8.630 ms / 100) 8.618 -> 8.615 ( -0.03%) [ +0.00% +0.21% +0.16% / -0.03% +0.31% +0.09%] index_select random : Elapsed 0.086 ms (8.618 ms / 100) 8.595 -> 8.611 ( +0.19%) [ +0.03% +0.08% +0.00% / +0.19% +0.34% +0.36%] index_select random_sorted : Elapsed 0.086 ms (8.598 ms / 100) out_shape = [5, 20, 40, 16] in_shape = [5, 4, 40, 16] idx_dim = 1 B = [5, 20, 40, 16] (stride (12800, 1, 20, 800)) A = [5, 4, 40, 16] (stride (640, 3200, 1, 40)) dim = 1 2.170 -> 2.171 ( +0.05%) [ +0.09% +0.14% +0.00% / +0.05% +0.41% +0.32%] index_add_ linear : Elapsed 0.022 ms (2.172 ms / 100) 2.140 -> 2.139 ( -0.05%) [ +0.14% +0.28% +0.00% / +0.09% +0.00% -0.05%] index_copy_ linear : Elapsed 0.021 ms (2.143 ms / 100) 2.161 -> 2.165 ( +0.19%) [ +0.00% +0.14% +0.09% / +0.19% +0.69% +0.56%] index_add_ reverse : Elapsed 0.022 ms (2.161 ms / 100) 2.140 -> 2.139 ( -0.05%) [ +0.37% +0.00% +0.05% / -0.05% +0.14% +0.37%] index_copy_ reverse : Elapsed 0.021 ms (2.148 ms / 100) 2.209 -> 2.210 ( +0.05%) [ +0.00% +0.09% +0.14% / +0.05% +0.45% +0.45%] index_add_ spread : Elapsed 0.022 ms (2.209 ms / 100) 2.240 -> 2.239 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.31% +0.54%] index_copy_ spread : Elapsed 0.022 ms (2.240 ms / 100) 2.203 -> 2.199 ( -0.18%) [ +0.14% +0.05% +0.00% / -0.18% +0.36% +0.59%] index_add_ strided 3 : Elapsed 0.022 ms (2.206 ms / 100) 2.203 -> 2.211 ( +0.36%) [ +0.50% +0.45% +0.00% / +0.36% +0.50% +0.68%] index_copy_ strided 3 : Elapsed 0.022 ms (2.214 ms / 100) 2.216 -> 2.215 ( -0.05%) [ +0.14% +0.00% +0.09% / +0.05% -0.05% +0.05%] index_add_ strided 7 : Elapsed 0.022 ms (2.219 ms / 100) 2.246 -> 2.248 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.13% +0.31%] index_copy_ strided 7 : Elapsed 0.022 ms (2.248 ms / 100) 2.211 -> 2.214 ( +0.14%) [ +0.27% +0.00% +0.00% / +0.14% +0.36% +0.27%] index_add_ perm : Elapsed 0.022 ms (2.217 ms / 100) 2.244 -> 2.244 ( +0.00%) [ +0.00% +0.18% +0.18% / +0.00% +0.04% +0.00%] index_copy_ perm : Elapsed 0.022 ms (2.244 ms / 100) 2.212 -> 2.215 ( +0.14%) [ +0.14% +0.00% +0.18% / +0.14% +0.27% +0.18%] index_add_ perm_sorted : Elapsed 0.022 ms (2.215 ms / 100) 2.248 -> 2.245 ( -0.13%) [ +0.22% +0.09% +0.00% / +0.00% -0.13% +0.04%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.253 ms / 100) 9.250 -> 9.254 ( +0.04%) [ +0.00% +0.04% +0.11% / +0.04% +0.28% +0.13%] index_select const : Elapsed 0.092 ms (9.250 ms / 100) 9.299 -> 9.318 ( +0.20%) [ +0.12% +0.09% +0.00% / +0.20% +0.26% +0.27%] index_select wrap : Elapsed 0.093 ms (9.310 ms / 100) 9.288 -> 9.318 ( +0.32%) [ +0.00% +0.18% +0.13% / +0.32% +0.36% +0.50%] index_select linear : Elapsed 0.093 ms (9.288 ms / 100) 9.278 -> 9.279 ( +0.01%) [ +0.00% +0.16% +0.09% / +0.01% +0.18% +0.39%] index_select reverse : Elapsed 0.093 ms (9.278 ms / 100) 9.256 -> 9.251 ( -0.05%) [ +0.05% +0.00% +0.01% / -0.05% -0.03% -0.04%] index_select skip64 : Elapsed 0.093 ms (9.261 ms / 100) 9.245 -> 9.254 ( +0.10%) [ +0.15% +0.04% +0.00% / +0.28% +0.10% +0.24%] index_select skip256 : Elapsed 0.093 ms (9.259 ms / 100) 9.299 -> 9.300 ( +0.01%) [ +0.47% +0.12% +0.00% / +0.01% +0.11% +0.29%] index_select spread : Elapsed 0.093 ms (9.343 ms / 100) 9.315 -> 9.310 ( -0.05%) [ +0.20% +0.16% +0.00% / -0.05% +0.21% +0.20%] index_select strided 3 : Elapsed 0.093 ms (9.334 ms / 100) 9.313 -> 9.304 ( -0.10%) [ +0.00% +0.11% +0.03% / -0.10% -0.04% +0.06%] index_select random : Elapsed 0.093 ms (9.313 ms / 100) 9.293 -> 9.290 ( -0.03%) [ +0.00% +0.01% +0.16% / -0.03% +0.16% +0.05%] index_select random_sorted : Elapsed 0.093 ms (9.293 ms / 100) B = [5, 20, 40, 16] (stride (1, 3200, 5, 200)) A = [5, 4, 40, 16] (stride (1, 5, 320, 20)) dim = 1 2.070 -> 2.073 ( +0.14%) [ +0.00% +0.19% +0.00% / +0.14% +0.24% +0.19%] index_add_ linear : Elapsed 0.021 ms (2.070 ms / 100) 2.013 -> 2.012 ( -0.05%) [ +0.00% +0.25% +0.05% / -0.05% +0.40% +0.65%] index_copy_ linear : Elapsed 0.020 ms (2.013 ms / 100) 2.062 -> 2.063 ( +0.05%) [ +0.34% +0.00% +0.19% / +0.05% +0.58% +0.53%] index_add_ reverse : Elapsed 0.021 ms (2.069 ms / 100) 2.011 -> 2.012 ( +0.05%) [ +0.25% +0.00% +0.10% / +0.05% +0.40% +0.55%] index_copy_ reverse : Elapsed 0.020 ms (2.016 ms / 100) 2.054 -> 2.052 ( -0.10%) [ +0.00% +0.00% +0.10% / -0.10% +0.39% +0.39%] index_add_ spread : Elapsed 0.021 ms (2.054 ms / 100) 2.004 -> 2.008 ( +0.20%) [ +0.00% +0.10% +0.10% / +0.20% +0.70% +0.80%] index_copy_ spread : Elapsed 0.020 ms (2.004 ms / 100) 2.087 -> 2.088 ( +0.05%) [ +0.10% +0.00% +0.05% / +0.05% +0.19% +0.10%] index_add_ strided 3 : Elapsed 0.021 ms (2.089 ms / 100) 2.028 -> 2.027 ( -0.05%) [ +0.15% +0.05% +0.00% / -0.05% +0.49% +0.59%] index_copy_ strided 3 : Elapsed 0.020 ms (2.031 ms / 100) 2.074 -> 2.079 ( +0.24%) [ +0.05% +0.39% +0.00% / +0.24% +0.63% +0.39%] index_add_ strided 7 : Elapsed 0.021 ms (2.075 ms / 100) 2.022 -> 2.025 ( +0.15%) [ +0.25% +0.20% +0.00% / +0.15% +0.79% +0.84%] index_copy_ strided 7 : Elapsed 0.020 ms (2.027 ms / 100) 2.069 -> 2.057 ( -0.58%) [ +0.00% +0.00% +0.24% / +0.14% -0.39% -0.58%] index_add_ perm : Elapsed 0.021 ms (2.069 ms / 100) 2.019 -> 2.013 ( -0.30%) [ +0.15% +0.15% +0.00% / +0.05% -0.05% -0.30%] index_copy_ perm : Elapsed 0.020 ms (2.022 ms / 100) 2.074 -> 2.072 ( -0.10%) [ +0.05% +0.00% +0.00% / +0.14% +0.00% -0.10%] index_add_ perm_sorted : Elapsed 0.021 ms (2.075 ms / 100) 2.018 -> 2.021 ( +0.15%) [ +0.00% +0.25% +0.10% / +0.15% +0.25% +0.15%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.018 ms / 100) 8.788 -> 8.801 ( +0.15%) [ +0.15% +0.00% +0.01% / +0.15% +0.25% +0.48%] index_select const : Elapsed 0.088 ms (8.801 ms / 100) 8.800 -> 8.803 ( +0.03%) [ +0.01% +0.19% +0.00% / +0.03% +0.10% +0.25%] index_select wrap : Elapsed 0.088 ms (8.801 ms / 100) 8.788 -> 8.801 ( +0.15%) [ +0.28% +0.00% +0.01% / +0.15% +0.34% +0.17%] index_select linear : Elapsed 0.088 ms (8.813 ms / 100) 8.806 -> 8.799 ( -0.08%) [ +0.00% +0.05% +0.10% / -0.08% +0.03% +0.07%] index_select reverse : Elapsed 0.088 ms (8.806 ms / 100) 8.792 -> 8.808 ( +0.18%) [ +0.00% +0.08% +0.22% / +0.18% +0.33% +0.43%] index_select skip64 : Elapsed 0.088 ms (8.792 ms / 100) 8.791 -> 8.791 ( +0.00%) [ +0.00% +0.05% +0.07% / +0.00% +0.30% +0.18%] index_select skip256 : Elapsed 0.088 ms (8.791 ms / 100) 8.790 -> 8.793 ( +0.03%) [ +0.15% +0.31% +0.00% / +0.03% +0.46% +0.33%] index_select spread : Elapsed 0.088 ms (8.803 ms / 100) 8.795 -> 8.803 ( +0.09%) [ +0.00% +0.17% +0.16% / +0.10% +0.15% +0.09%] index_select strided 3 : Elapsed 0.088 ms (8.795 ms / 100) 8.781 -> 8.804 ( +0.26%) [ +0.06% +0.19% +0.00% / +0.26% +0.57% +0.40%] index_select random : Elapsed 0.088 ms (8.786 ms / 100) 8.785 -> 8.795 ( +0.11%) [ +0.00% +0.16% +0.19% / +0.11% +0.48% +0.46%] index_select random_sorted : Elapsed 0.088 ms (8.785 ms / 100) B = [5, 20, 40, 16] (stride (20, 1, 1600, 100)) A = [5, 4, 40, 16] (stride (2560, 640, 16, 1)) dim = 1 0.883 -> 0.856 ( -3.06%) [ +0.11% +0.79% +0.00% / +0.45% -2.72% -3.06%] index_add_ linear : Elapsed 0.009 ms (0.884 ms / 100) 0.885 -> 0.867 ( -2.03%) [ +0.00% +0.56% +0.56% / +0.23% -2.03% -1.69%] index_copy_ linear : Elapsed 0.009 ms (0.885 ms / 100) 0.886 -> 0.852 ( -3.84%) [ +0.34% +0.23% +0.00% / -0.11% -3.84% -3.72%] index_add_ reverse : Elapsed 0.009 ms (0.889 ms / 100) 0.886 -> 0.862 ( -2.71%) [ +0.45% +0.45% +0.00% / +0.34% -2.03% -2.71%] index_copy_ reverse : Elapsed 0.009 ms (0.890 ms / 100) 0.929 -> 0.886 ( -4.63%) [ +0.00% +0.11% +0.32% / +0.43% -4.63% -4.31%] index_add_ spread : Elapsed 0.009 ms (0.929 ms / 100) 0.935 -> 0.900 ( -3.74%) [ +0.53% +0.00% +0.00% / +0.96% -3.74% -3.53%] index_copy_ spread : Elapsed 0.009 ms (0.940 ms / 100) 0.917 -> 0.880 ( -4.03%) [ +0.00% +0.44% +0.22% / +0.22% -4.03% -3.71%] index_add_ strided 3 : Elapsed 0.009 ms (0.917 ms / 100) 0.925 -> 0.895 ( -3.24%) [ +0.54% +0.00% +0.00% / +0.43% -3.24% -3.03%] index_copy_ strided 3 : Elapsed 0.009 ms (0.930 ms / 100) 0.930 -> 0.890 ( -4.30%) [ +0.11% +0.00% +0.00% / +0.32% -4.30% -3.87%] index_add_ strided 7 : Elapsed 0.009 ms (0.931 ms / 100) 0.939 -> 0.902 ( -3.94%) [ +0.53% +0.00% +0.43% / +0.00% -3.73% -3.94%] index_copy_ strided 7 : Elapsed 0.009 ms (0.944 ms / 100) 0.934 -> 0.891 ( -4.60%) [ +0.11% +0.00% +0.00% / +0.96% -4.60% -4.18%] index_add_ perm : Elapsed 0.009 ms (0.935 ms / 100) 0.938 -> 0.899 ( -4.16%) [ +0.00% +0.53% +0.75% / +0.21% -4.16% -3.41%] index_copy_ perm : Elapsed 0.009 ms (0.938 ms / 100) 0.926 -> 0.888 ( -4.10%) [ +0.65% +0.00% +0.11% / +0.22% -4.10% -4.10%] index_add_ perm_sorted : Elapsed 0.009 ms (0.932 ms / 100) 0.931 -> 0.901 ( -3.22%) [ +0.75% +0.21% +0.00% / -0.11% -3.22% -2.79%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.938 ms / 100) 5.041 -> 4.967 ( -1.47%) [ +0.00% +0.06% +0.18% / -1.47% -1.17% -1.25%] index_select const : Elapsed 0.050 ms (5.041 ms / 100) 5.079 -> 5.061 ( -0.35%) [ +0.00% +0.00% +0.18% / -0.35% -0.24% -0.22%] index_select wrap : Elapsed 0.051 ms (5.079 ms / 100) 5.064 -> 5.051 ( -0.26%) [ +0.00% +0.04% +0.16% / -0.26% +0.10% +0.04%] index_select linear : Elapsed 0.051 ms (5.064 ms / 100) 5.065 -> 5.053 ( -0.24%) [ +0.08% +0.06% +0.00% / -0.18% -0.12% -0.24%] index_select reverse : Elapsed 0.051 ms (5.069 ms / 100) 5.056 -> 4.972 ( -1.66%) [ +0.10% +0.00% +0.08% / -1.62% -1.38% -1.66%] index_select skip64 : Elapsed 0.051 ms (5.061 ms / 100) 5.048 -> 4.963 ( -1.68%) [ +0.00% +0.14% +0.04% / -1.68% -1.29% -1.47%] index_select skip256 : Elapsed 0.050 ms (5.048 ms / 100) 5.060 -> 5.061 ( +0.02%) [ +0.00% +0.36% +0.24% / +0.02% +0.12% +0.14%] index_select spread : Elapsed 0.051 ms (5.060 ms / 100) 5.072 -> 5.064 ( -0.16%) [ +0.00% +0.14% +0.04% / -0.16% -0.16% -0.02%] index_select strided 3 : Elapsed 0.051 ms (5.072 ms / 100) 5.076 -> 5.059 ( -0.33%) [ +0.02% +0.00% +0.20% / -0.32% -0.33% -0.26%] index_select random : Elapsed 0.051 ms (5.077 ms / 100) 5.067 -> 5.058 ( -0.18%) [ +0.28% +0.00% +0.14% / -0.08% -0.14% -0.18%] index_select random_sorted : Elapsed 0.051 ms (5.081 ms / 100) B = [5, 20, 40, 16] (stride (20, 1, 1600, 100)) A = [5, 4, 40, 16] (stride (2560, 640, 1, 40)) dim = 1 2.062 -> 2.062 ( +0.00%) [ +0.34% +0.48% +0.00% / +0.00% +0.58% +0.63%] index_add_ linear : Elapsed 0.021 ms (2.069 ms / 100) 2.036 -> 2.041 ( +0.25%) [ +0.15% +0.10% +0.00% / +0.25% +0.44% +0.49%] index_copy_ linear : Elapsed 0.020 ms (2.039 ms / 100) 2.061 -> 2.067 ( +0.29%) [ +0.19% +0.00% +0.00% / +0.29% +0.73% +0.53%] index_add_ reverse : Elapsed 0.021 ms (2.065 ms / 100) 2.033 -> 2.036 ( +0.15%) [ +0.00% +0.15% +0.05% / +0.15% +0.49% +0.54%] index_copy_ reverse : Elapsed 0.020 ms (2.033 ms / 100) 2.101 -> 2.104 ( +0.14%) [ +0.05% +0.00% +0.24% / +0.14% +0.48% +0.43%] index_add_ spread : Elapsed 0.021 ms (2.102 ms / 100) 2.131 -> 2.135 ( +0.19%) [ +0.14% +0.14% +0.00% / +0.19% +0.47% +0.52%] index_copy_ spread : Elapsed 0.021 ms (2.134 ms / 100) 2.094 -> 2.098 ( +0.19%) [ +0.05% +0.10% +0.00% / +0.19% +0.72% +0.57%] index_add_ strided 3 : Elapsed 0.021 ms (2.095 ms / 100) 2.101 -> 2.103 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.62% +0.67%] index_copy_ strided 3 : Elapsed 0.021 ms (2.101 ms / 100) 2.105 -> 2.103 ( -0.10%) [ +0.10% +0.00% +0.00% / -0.10% +0.76% +0.52%] index_add_ strided 7 : Elapsed 0.021 ms (2.107 ms / 100) 2.133 -> 2.135 ( +0.09%) [ +0.14% +0.00% +0.14% / +0.09% +0.61% +0.75%] index_copy_ strided 7 : Elapsed 0.021 ms (2.136 ms / 100) 2.096 -> 2.096 ( +0.00%) [ +0.00% +0.19% +0.10% / +0.00% +0.33% +0.43%] index_add_ perm : Elapsed 0.021 ms (2.096 ms / 100) 2.102 -> 2.104 ( +0.10%) [ +0.19% +0.00% +0.10% / +0.10% +0.33% +0.71%] index_copy_ perm : Elapsed 0.021 ms (2.106 ms / 100) 2.098 -> 2.099 ( +0.05%) [ +0.19% +0.00% +0.00% / +0.05% +0.38% +0.43%] index_add_ perm_sorted : Elapsed 0.021 ms (2.102 ms / 100) 2.106 -> 2.103 ( -0.14%) [ +0.14% +0.05% +0.00% / -0.14% +0.14% +0.24%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.109 ms / 100) 8.787 -> 8.791 ( +0.05%) [ +0.00% +0.09% +0.00% / +0.07% +0.05% +0.14%] index_select const : Elapsed 0.088 ms (8.787 ms / 100) 8.845 -> 8.853 ( +0.09%) [ +0.10% +0.00% +0.23% / +0.16% +0.15% +0.09%] index_select wrap : Elapsed 0.089 ms (8.854 ms / 100) 8.826 -> 8.811 ( -0.17%) [ +0.01% +0.11% +0.00% / -0.01% -0.17% +0.29%] index_select linear : Elapsed 0.088 ms (8.827 ms / 100) 8.807 -> 8.815 ( +0.09%) [ +0.07% +0.00% +0.08% / +0.17% +0.25% +0.09%] index_select reverse : Elapsed 0.088 ms (8.813 ms / 100) 8.794 -> 8.790 ( -0.05%) [ +0.00% +0.08% +0.11% / -0.05% +0.17% -0.01%] index_select skip64 : Elapsed 0.088 ms (8.794 ms / 100) 8.800 -> 8.788 ( -0.14%) [ +0.00% +0.26% +0.22% / +0.00% -0.08% -0.14%] index_select skip256 : Elapsed 0.088 ms (8.800 ms / 100) 8.830 -> 8.820 ( -0.11%) [ +0.00% +0.03% +0.19% / -0.11% +0.06% +0.14%] index_select spread : Elapsed 0.088 ms (8.830 ms / 100) 8.843 -> 8.843 ( +0.00%) [ +0.14% +0.00% +0.06% / +0.18% +0.00% +0.00%] index_select strided 3 : Elapsed 0.089 ms (8.855 ms / 100) 8.851 -> 8.843 ( -0.09%) [ +0.00% +0.10% +0.12% / -0.09% +0.05% +0.11%] index_select random : Elapsed 0.089 ms (8.851 ms / 100) 8.823 -> 8.830 ( +0.08%) [ +0.14% +0.00% +0.16% / +0.14% +0.17% +0.08%] index_select random_sorted : Elapsed 0.088 ms (8.835 ms / 100) B = [5, 20, 40, 16] (stride (1, 5, 1600, 100)) A = [5, 4, 40, 16] (stride (640, 3200, 16, 1)) dim = 1 0.778 -> 0.778 ( +0.00%) [ +0.00% +0.64% +1.29% / +0.00% +2.70% +2.57%] index_add_ linear : Elapsed 0.008 ms (0.778 ms / 100) 0.793 -> 0.795 ( +0.25%) [ +0.76% +0.13% +0.00% / +0.25% +2.90% +2.52%] index_copy_ linear : Elapsed 0.008 ms (0.799 ms / 100) 0.784 -> 0.784 ( +0.00%) [ +0.13% +0.00% +0.51% / +0.26% +0.26% +0.00%] index_add_ reverse : Elapsed 0.008 ms (0.785 ms / 100) 0.796 -> 0.798 ( +0.25%) [ +0.00% +0.50% +0.13% / +0.50% +1.13% +0.25%] index_copy_ reverse : Elapsed 0.008 ms (0.796 ms / 100) 0.844 -> 0.839 ( -0.59%) [ +0.00% +1.18% +0.00% / +0.36% -0.59% -0.47%] index_add_ spread : Elapsed 0.008 ms (0.844 ms / 100) 0.854 -> 0.850 ( -0.47%) [ +0.59% +0.00% +0.35% / +0.12% -0.12% -0.47%] index_copy_ spread : Elapsed 0.009 ms (0.859 ms / 100) 0.832 -> 0.828 ( -0.48%) [ +0.12% +0.00% +0.12% / -0.36% -0.24% -0.48%] index_add_ strided 3 : Elapsed 0.008 ms (0.833 ms / 100) 0.844 -> 0.843 ( -0.12%) [ +0.00% +0.12% +0.12% / -0.12% -0.12% +0.12%] index_copy_ strided 3 : Elapsed 0.008 ms (0.844 ms / 100) 0.833 -> 0.825 ( -0.96%) [ +0.36% +0.00% +0.36% / +0.48% -0.36% -0.96%] index_add_ strided 7 : Elapsed 0.008 ms (0.836 ms / 100) 0.847 -> 0.844 ( -0.35%) [ +0.35% +0.00% +0.24% / +0.59% -0.35% -0.24%] index_copy_ strided 7 : Elapsed 0.009 ms (0.850 ms / 100) 0.852 -> 0.844 ( -0.94%) [ +0.59% +0.70% +0.00% / +0.12% -0.59% -0.94%] index_add_ perm : Elapsed 0.009 ms (0.857 ms / 100) 0.862 -> 0.861 ( -0.12%) [ +0.00% +0.12% +0.12% / +0.00% -0.12% -0.12%] index_copy_ perm : Elapsed 0.009 ms (0.862 ms / 100) 0.852 -> 0.840 ( -1.41%) [ +0.00% +0.35% +0.59% / -0.12% -1.41% -0.47%] index_add_ perm_sorted : Elapsed 0.009 ms (0.852 ms / 100) 0.857 -> 0.858 ( +0.12%) [ +0.23% +0.00% +0.58% / +0.70% +0.35% +0.12%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.859 ms / 100) 5.028 -> 5.018 ( -0.20%) [ +0.00% +0.06% +0.06% / -0.20% +0.18% +0.00%] index_select const : Elapsed 0.050 ms (5.028 ms / 100) 5.062 -> 5.050 ( -0.24%) [ +0.08% +0.12% +0.00% / -0.24% +0.08% +0.06%] index_select wrap : Elapsed 0.051 ms (5.066 ms / 100) 5.043 -> 5.051 ( +0.16%) [ +0.24% +0.00% +0.14% / +0.20% +0.16% +0.24%] index_select linear : Elapsed 0.051 ms (5.055 ms / 100) 5.053 -> 5.045 ( -0.16%) [ +0.08% +0.14% +0.00% / +0.14% -0.12% -0.16%] index_select reverse : Elapsed 0.051 ms (5.057 ms / 100) 5.032 -> 5.028 ( -0.08%) [ +0.00% +0.18% +0.04% / -0.08% +0.14% +0.06%] index_select skip64 : Elapsed 0.050 ms (5.032 ms / 100) 5.023 -> 5.025 ( +0.04%) [ +0.22% +0.30% +0.00% / +0.16% +0.32% +0.04%] index_select skip256 : Elapsed 0.050 ms (5.034 ms / 100) 5.034 -> 5.042 ( +0.16%) [ +0.00% +0.12% +0.10% / +0.18% +0.16% +0.24%] index_select spread : Elapsed 0.050 ms (5.034 ms / 100) 5.050 -> 5.054 ( +0.08%) [ +0.28% +0.08% +0.00% / +0.08% +0.22% +0.28%] index_select strided 3 : Elapsed 0.051 ms (5.064 ms / 100) 5.056 -> 5.059 ( +0.06%) [ +0.42% +0.00% +0.28% / +0.08% +0.06% +0.20%] index_select random : Elapsed 0.051 ms (5.077 ms / 100) 5.054 -> 5.046 ( -0.16%) [ +0.20% +0.00% +0.14% / +0.14% -0.16% -0.08%] index_select random_sorted : Elapsed 0.051 ms (5.064 ms / 100) B = [5, 20, 40, 16] (stride (800, 40, 1, 4000)) A = [5, 4, 40, 16] (stride (2560, 1, 4, 160)) dim = 1 2.192 -> 2.197 ( +0.23%) [ +0.14% +0.00% +0.00% / +0.23% +0.68% +0.59%] index_add_ linear : Elapsed 0.022 ms (2.195 ms / 100) 2.139 -> 2.147 ( +0.37%) [ +0.23% +0.23% +0.00% / +0.37% +0.70% +0.65%] index_copy_ linear : Elapsed 0.021 ms (2.144 ms / 100) 2.190 -> 2.191 ( +0.05%) [ +0.05% +0.18% +0.00% / +0.05% +0.68% +0.68%] index_add_ reverse : Elapsed 0.022 ms (2.191 ms / 100) 2.140 -> 2.142 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.47% +0.65%] index_copy_ reverse : Elapsed 0.021 ms (2.140 ms / 100) 2.188 -> 2.192 ( +0.18%) [ +0.27% +0.37% +0.00% / +0.18% +0.78% +0.59%] index_add_ spread : Elapsed 0.022 ms (2.194 ms / 100) 2.133 -> 2.142 ( +0.42%) [ +0.14% +0.61% +0.00% / +0.42% +0.70% +0.84%] index_copy_ spread : Elapsed 0.021 ms (2.136 ms / 100) 2.184 -> 2.189 ( +0.23%) [ +0.18% +0.18% +0.00% / +0.23% +0.92% +0.92%] index_add_ strided 3 : Elapsed 0.022 ms (2.188 ms / 100) 2.130 -> 2.134 ( +0.19%) [ +0.09% +0.19% +0.00% / +0.19% +0.94% +1.13%] index_copy_ strided 3 : Elapsed 0.021 ms (2.132 ms / 100) 2.189 -> 2.188 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.64% +0.64%] index_add_ strided 7 : Elapsed 0.022 ms (2.189 ms / 100) 2.131 -> 2.135 ( +0.19%) [ +0.00% +0.19% +0.14% / +0.19% +0.75% +0.94%] index_copy_ strided 7 : Elapsed 0.021 ms (2.131 ms / 100) 2.205 -> 2.201 ( -0.18%) [ +0.27% +0.09% +0.00% / +0.05% -0.18% -0.09%] index_add_ perm : Elapsed 0.022 ms (2.211 ms / 100) 2.150 -> 2.151 ( +0.05%) [ +0.09% +0.00% +0.09% / +0.05% +0.05% +0.09%] index_copy_ perm : Elapsed 0.022 ms (2.152 ms / 100) 2.190 -> 2.192 ( +0.09%) [ +0.00% +0.23% +0.05% / +0.09% +0.59% +0.68%] index_add_ perm_sorted : Elapsed 0.022 ms (2.190 ms / 100) 2.139 -> 2.140 ( +0.05%) [ +0.00% +0.05% +0.23% / +0.05% +0.70% +0.28%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.139 ms / 100) 9.270 -> 9.237 ( -0.36%) [ +0.15% +0.03% +0.00% / +0.05% -0.36% -0.22%] index_select const : Elapsed 0.093 ms (9.284 ms / 100) 9.253 -> 9.234 ( -0.21%) [ +0.27% +0.00% +0.14% / -0.04% -0.18% -0.21%] index_select wrap : Elapsed 0.093 ms (9.278 ms / 100) 9.260 -> 9.233 ( -0.29%) [ +0.16% +0.00% +0.15% / +0.23% -0.29% -0.28%] index_select linear : Elapsed 0.093 ms (9.275 ms / 100) 9.248 -> 9.238 ( -0.11%) [ +0.00% +0.16% +0.18% / -0.11% -0.11% -0.05%] index_select reverse : Elapsed 0.092 ms (9.248 ms / 100) 9.255 -> 9.238 ( -0.18%) [ +0.17% +0.00% +0.15% / +0.03% -0.18% -0.16%] index_select skip64 : Elapsed 0.093 ms (9.271 ms / 100) 9.255 -> 9.240 ( -0.16%) [ +0.21% +0.09% +0.00% / +0.08% +0.01% -0.16%] index_select skip256 : Elapsed 0.093 ms (9.274 ms / 100) 9.252 -> 9.213 ( -0.42%) [ +0.00% +0.00% +0.12% / +0.13% -0.06% -0.42%] index_select spread : Elapsed 0.093 ms (9.252 ms / 100) 9.248 -> 9.234 ( -0.15%) [ +0.10% +0.00% +0.09% / +0.32% -0.04% -0.15%] index_select strided 3 : Elapsed 0.093 ms (9.257 ms / 100) 9.253 -> 9.230 ( -0.25%) [ +0.00% +0.14% +0.08% / +0.10% -0.18% -0.25%] index_select random : Elapsed 0.093 ms (9.253 ms / 100) 9.254 -> 9.231 ( -0.25%) [ +0.00% +0.05% +0.15% / +0.15% -0.25% -0.02%] index_select random_sorted : Elapsed 0.093 ms (9.254 ms / 100) out_shape = [5, 4, 20, 16] in_shape = [5, 4, 40, 16] idx_dim = 2 B = [5, 4, 20, 16] (stride (1280, 320, 16, 1)) A = [5, 4, 40, 16] (stride (160, 1, 4, 800)) dim = 2 2.395 -> 2.395 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.33% +0.33%] index_select const : Elapsed 0.024 ms (2.397 ms / 100) 2.407 -> 2.412 ( +0.21%) [ +0.25% +0.08% +0.00% / +0.21% +0.29% +0.33%] index_select wrap : Elapsed 0.024 ms (2.413 ms / 100) 2.411 -> 2.411 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.12% +0.12%] index_select linear : Elapsed 0.024 ms (2.411 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.04% +0.04% +0.00% / +0.17% +0.04% -0.04%] index_select reverse : Elapsed 0.024 ms (2.412 ms / 100) 2.397 -> 2.396 ( -0.04%) [ +0.00% +0.08% +0.17% / -0.04% -0.04% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.397 ms / 100) 2.397 -> 2.399 ( +0.08%) [ +0.17% +0.00% +0.04% / +0.08% +0.21% +0.13%] index_select skip256 : Elapsed 0.024 ms (2.401 ms / 100) 2.423 -> 2.424 ( +0.04%) [ +0.08% +0.12% +0.00% / +0.04% +0.12% +0.25%] index_select spread : Elapsed 0.024 ms (2.425 ms / 100) 2.419 -> 2.414 ( -0.21%) [ +0.25% +0.41% +0.00% / +0.25% -0.21% +0.00%] index_select strided 3 : Elapsed 0.024 ms (2.425 ms / 100) 2.413 -> 2.410 ( -0.12%) [ +0.12% +0.00% +0.04% / +0.00% -0.08% -0.12%] index_select strided 5 : Elapsed 0.024 ms (2.416 ms / 100) 2.422 -> 2.421 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.21% +0.04%] index_select strided 7 : Elapsed 0.024 ms (2.423 ms / 100) 2.404 -> 2.404 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.29% +0.21%] index_select strided 8 : Elapsed 0.024 ms (2.406 ms / 100) 2.406 -> 2.405 ( -0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.21% -0.04%] index_select strided 16 : Elapsed 0.024 ms (2.407 ms / 100) 2.416 -> 2.416 ( +0.00%) [ +0.29% +0.12% +0.00% / +0.00% +0.00% +0.08%] index_select random : Elapsed 0.024 ms (2.423 ms / 100) 2.416 -> 2.414 ( -0.08%) [ +0.04% +0.12% +0.00% / -0.04% -0.08% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.415 -> 2.420 ( +0.21%) [ +0.21% +0.21% +0.00% / +0.25% +0.21% +0.25%] index_select perm : Elapsed 0.024 ms (2.420 ms / 100) 2.420 -> 2.412 ( -0.33%) [ +0.17% +0.00% +0.08% / +0.12% -0.21% -0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) B = [5, 4, 20, 16] (stride (1280, 320, 1, 20)) A = [5, 4, 40, 16] (stride (16, 80, 320, 1)) dim = 2 2.392 -> 2.392 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.00% +0.21% +0.21%] index_select const : Elapsed 0.024 ms (2.393 ms / 100) 2.405 -> 2.398 ( -0.29%) [ +0.00% +0.04% +0.04% / -0.04% -0.12% -0.29%] index_select wrap : Elapsed 0.024 ms (2.405 ms / 100) 2.403 -> 2.397 ( -0.25%) [ +0.00% +0.12% +0.04% / +0.12% +0.04% -0.25%] index_select linear : Elapsed 0.024 ms (2.403 ms / 100) 2.401 -> 2.405 ( +0.17%) [ +0.00% +0.08% +0.04% / +0.17% +0.21% +0.33%] index_select reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.392 -> 2.393 ( +0.04%) [ +0.17% +0.00% +0.21% / +0.08% +0.08% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.396 ms / 100) 2.392 -> 2.391 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.38% +0.13%] index_select skip256 : Elapsed 0.024 ms (2.393 ms / 100) 2.399 -> 2.402 ( +0.13%) [ +0.04% +0.13% +0.00% / +0.17% +0.13% +0.21%] index_select spread : Elapsed 0.024 ms (2.400 ms / 100) 2.398 -> 2.398 ( +0.00%) [ +0.21% +0.13% +0.00% / +0.00% +0.42% +0.46%] index_select strided 3 : Elapsed 0.024 ms (2.403 ms / 100) 2.393 -> 2.396 ( +0.13%) [ +0.13% +0.00% +0.29% / +0.13% +0.29% +0.38%] index_select strided 5 : Elapsed 0.024 ms (2.396 ms / 100) 2.399 -> 2.402 ( +0.13%) [ +0.00% +0.17% +0.17% / +0.21% +0.13% +0.17%] index_select strided 7 : Elapsed 0.024 ms (2.399 ms / 100) 2.395 -> 2.394 ( -0.04%) [ +0.13% +0.00% +0.08% / +0.04% -0.04% +0.00%] index_select strided 8 : Elapsed 0.024 ms (2.398 ms / 100) 2.395 -> 2.395 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.13% +0.13%] index_select strided 16 : Elapsed 0.024 ms (2.397 ms / 100) 2.397 -> 2.395 ( -0.08%) [ +0.04% +0.00% +0.04% / -0.08% +0.21% +0.25%] index_select random : Elapsed 0.024 ms (2.398 ms / 100) 2.397 -> 2.400 ( +0.13%) [ +0.17% +0.00% +0.17% / +0.21% +0.21% +0.13%] index_select random_sorted : Elapsed 0.024 ms (2.401 ms / 100) 2.404 -> 2.404 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.04% +0.04%] index_select perm : Elapsed 0.024 ms (2.404 ms / 100) 2.398 -> 2.401 ( +0.13%) [ +0.25% +0.04% +0.00% / +0.25% +0.29% +0.13%] index_select perm_sorted : Elapsed 0.024 ms (2.404 ms / 100) B = [5, 4, 20, 16] (stride (1280, 16, 64, 1)) A = [5, 4, 40, 16] (stride (40, 200, 1, 800)) dim = 2 2.449 -> 2.452 ( +0.12%) [ +0.00% +0.04% +0.08% / +0.12% +0.20% +0.16%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.460 -> 2.457 ( -0.12%) [ +0.04% +0.00% +0.08% / -0.12% -0.04% +0.00%] index_select wrap : Elapsed 0.025 ms (2.461 ms / 100) 2.459 -> 2.457 ( -0.08%) [ +0.04% +0.12% +0.00% / +0.16% -0.08% +0.12%] index_select linear : Elapsed 0.025 ms (2.460 ms / 100) 2.460 -> 2.459 ( -0.04%) [ +0.04% +0.00% +0.12% / +0.12% -0.04% -0.04%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.450 -> 2.453 ( +0.12%) [ +0.16% +0.04% +0.00% / +0.20% +0.12% +0.12%] index_select skip64 : Elapsed 0.025 ms (2.454 ms / 100) 2.451 -> 2.450 ( -0.04%) [ +0.00% +0.20% +0.08% / +0.08% -0.04% +0.20%] index_select skip256 : Elapsed 0.025 ms (2.451 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.28% +0.16%] index_select spread : Elapsed 0.025 ms (2.465 ms / 100) 2.468 -> 2.467 ( -0.04%) [ +0.12% +0.00% +0.04% / +0.04% -0.04% +0.04%] index_select strided 3 : Elapsed 0.025 ms (2.471 ms / 100) 2.468 -> 2.468 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.04% +0.00% +0.04%] index_select strided 5 : Elapsed 0.025 ms (2.471 ms / 100) 2.464 -> 2.465 ( +0.04%) [ +0.00% +0.12% +0.00% / +0.04% +0.16% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.464 ms / 100) 2.463 -> 2.463 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.24% +0.28%] index_select strided 8 : Elapsed 0.025 ms (2.463 ms / 100) 2.468 -> 2.464 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.00% +0.08%] index_select strided 16 : Elapsed 0.025 ms (2.468 ms / 100) 2.466 -> 2.466 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.16% +0.08%] index_select random : Elapsed 0.025 ms (2.467 ms / 100) 2.466 -> 2.465 ( -0.04%) [ +0.08% +0.08% +0.00% / +0.00% +0.32% -0.04%] index_select random_sorted : Elapsed 0.025 ms (2.468 ms / 100) 2.469 -> 2.468 ( -0.04%) [ +0.00% +0.00% +0.08% / +0.00% -0.04% -0.04%] index_select perm : Elapsed 0.025 ms (2.469 ms / 100) 2.469 -> 2.459 ( -0.41%) [ +0.04% +0.00% +0.00% / +0.08% -0.24% -0.41%] index_select perm_sorted : Elapsed 0.025 ms (2.470 ms / 100) B = [5, 4, 20, 16] (stride (320, 1600, 1, 20)) A = [5, 4, 40, 16] (stride (1, 3200, 80, 5)) dim = 2 2.441 -> 2.444 ( +0.12%) [ +0.20% +0.00% +0.25% / +0.29% +0.33% +0.12%] index_select const : Elapsed 0.024 ms (2.446 ms / 100) 2.450 -> 2.442 ( -0.33%) [ +0.00% +0.08% +0.00% / +0.20% -0.16% -0.33%] index_select wrap : Elapsed 0.024 ms (2.450 ms / 100) 2.452 -> 2.446 ( -0.24%) [ +0.04% +0.00% +0.08% / -0.08% -0.04% -0.24%] index_select linear : Elapsed 0.025 ms (2.453 ms / 100) 2.448 -> 2.446 ( -0.08%) [ +0.08% +0.00% +0.08% / +0.37% +0.00% -0.08%] index_select reverse : Elapsed 0.024 ms (2.450 ms / 100) 2.447 -> 2.444 ( -0.12%) [ +0.00% +0.08% +0.00% / -0.08% -0.12% -0.12%] index_select skip64 : Elapsed 0.024 ms (2.447 ms / 100) 2.444 -> 2.446 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.16% +0.08% +0.12%] index_select skip256 : Elapsed 0.024 ms (2.444 ms / 100) 2.447 -> 2.451 ( +0.16%) [ +0.00% +0.08% +0.04% / +0.16% +0.29% +0.33%] index_select spread : Elapsed 0.024 ms (2.447 ms / 100) 2.448 -> 2.450 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.12% +0.16%] index_select strided 3 : Elapsed 0.025 ms (2.450 ms / 100) 2.446 -> 2.446 ( +0.00%) [ +0.20% +0.25% +0.00% / +0.00% +0.16% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.448 -> 2.453 ( +0.20%) [ +0.08% +0.00% +0.29% / +0.25% +0.25% +0.20%] index_select strided 7 : Elapsed 0.024 ms (2.450 ms / 100) 2.446 -> 2.445 ( -0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.20% -0.04%] index_select strided 8 : Elapsed 0.024 ms (2.446 ms / 100) 2.442 -> 2.447 ( +0.20%) [ +0.00% +0.16% +0.08% / +0.29% +0.20% +0.33%] index_select strided 16 : Elapsed 0.024 ms (2.442 ms / 100) 2.446 -> 2.446 ( +0.00%) [ +0.00% +0.25% +0.08% / +0.00% +0.20% +0.45%] index_select random : Elapsed 0.024 ms (2.446 ms / 100) 2.447 -> 2.447 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.25% +0.25% +0.00%] index_select random_sorted : Elapsed 0.024 ms (2.447 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.20% +0.00% +0.00%] index_select perm : Elapsed 0.025 ms (2.454 ms / 100) 2.452 -> 2.455 ( +0.12%) [ +0.04% +0.16% +0.00% / +0.16% +0.12% +0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.453 ms / 100) B = [5, 4, 20, 16] (stride (20, 1600, 1, 100)) A = [5, 4, 40, 16] (stride (64, 16, 320, 1)) dim = 2 2.403 -> 2.403 ( +0.00%) [ +0.12% +0.00% +0.12% / +0.00% +0.29% +0.46%] index_select const : Elapsed 0.024 ms (2.406 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.08% +0.00% +0.21% / +0.08% +0.04% +0.25%] index_select wrap : Elapsed 0.024 ms (2.416 ms / 100) 2.416 -> 2.416 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.04% +0.00% +0.00%] index_select linear : Elapsed 0.024 ms (2.418 ms / 100) 2.413 -> 2.410 ( -0.12%) [ +0.00% +0.08% +0.08% / +0.04% -0.12% +0.00%] index_select reverse : Elapsed 0.024 ms (2.413 ms / 100) 2.409 -> 2.406 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% -0.08% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.409 ms / 100) 2.407 -> 2.407 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.00% +0.04%] index_select skip256 : Elapsed 0.024 ms (2.409 ms / 100) 2.412 -> 2.415 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.12% +0.12% +0.33%] index_select spread : Elapsed 0.024 ms (2.414 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.08% +0.17% +0.00%] index_select strided 3 : Elapsed 0.024 ms (2.416 ms / 100) 2.407 -> 2.408 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.29% +0.04% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.409 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.00% +0.08% +0.12% / +0.04% +0.17% +0.08%] index_select strided 7 : Elapsed 0.024 ms (2.413 ms / 100) 2.404 -> 2.405 ( +0.04%) [ +0.17% +0.00% +0.04% / +0.04% +0.21% +0.37%] index_select strided 8 : Elapsed 0.024 ms (2.408 ms / 100) 2.407 -> 2.410 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.17% +0.12% +0.29%] index_select strided 16 : Elapsed 0.024 ms (2.407 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.17% +0.00% +0.04% / -0.04% +0.00% +0.29%] index_select random : Elapsed 0.024 ms (2.415 ms / 100) 2.412 -> 2.411 ( -0.04%) [ +0.00% +0.00% +0.08% / +0.21% -0.04% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.412 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.12% +0.00% +0.21% / +0.04% +0.00% +0.00%] index_select perm : Elapsed 0.024 ms (2.417 ms / 100) 2.416 -> 2.409 ( -0.29%) [ +0.12% +0.00% +0.00% / +0.04% -0.25% -0.29%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [5, 4, 20, 16] (stride (1, 5, 320, 20)) A = [5, 4, 40, 16] (stride (2560, 640, 1, 40)) dim = 2 2.408 -> 2.411 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.17% +0.12% +0.17%] index_select const : Elapsed 0.024 ms (2.410 ms / 100) 2.416 -> 2.409 ( -0.29%) [ +0.04% +0.12% +0.00% / -0.04% -0.29% +0.04%] index_select wrap : Elapsed 0.024 ms (2.417 ms / 100) 2.417 -> 2.414 ( -0.12%) [ +0.08% +0.04% +0.00% / +0.00% -0.12% -0.08%] index_select linear : Elapsed 0.024 ms (2.419 ms / 100) 2.414 -> 2.417 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +0.17% +0.17%] index_select reverse : Elapsed 0.024 ms (2.416 ms / 100) 2.409 -> 2.410 ( +0.04%) [ +0.12% +0.21% +0.00% / +0.04% +0.08% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.412 ms / 100) 2.406 -> 2.408 ( +0.08%) [ +0.29% +0.21% +0.00% / +0.08% +0.37% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.413 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.04% +0.00% +0.00% / +0.12% +0.12% +0.25%] index_select spread : Elapsed 0.024 ms (2.420 ms / 100) 2.418 -> 2.419 ( +0.04%) [ +0.00% +0.17% +0.21% / +0.04% +0.41% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.418 ms / 100) 2.418 -> 2.419 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.33% +0.29%] index_select strided 5 : Elapsed 0.024 ms (2.418 ms / 100) 2.421 -> 2.421 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.17% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.424 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.25% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.422 ms / 100) 2.420 -> 2.421 ( +0.04%) [ +0.21% +0.00% +0.08% / +0.08% +0.04% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.425 ms / 100) 2.415 -> 2.422 ( +0.29%) [ +0.00% +0.17% +0.33% / +0.29% +0.58% +0.58%] index_select random : Elapsed 0.024 ms (2.415 ms / 100) 2.416 -> 2.422 ( +0.25%) [ +0.17% +0.21% +0.00% / +0.29% +0.33% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.420 ms / 100) 2.422 -> 2.420 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.00% +0.08%] index_select perm : Elapsed 0.024 ms (2.423 ms / 100) 2.418 -> 2.422 ( +0.17%) [ +0.29% +0.00% +0.25% / +0.21% +0.17% +0.21%] index_select perm_sorted : Elapsed 0.024 ms (2.425 ms / 100) B = [5, 4, 20, 16] (stride (1, 100, 5, 400)) A = [5, 4, 40, 16] (stride (1, 3200, 5, 200)) dim = 2 2.452 -> 2.450 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.04% -0.04%] index_select const : Elapsed 0.025 ms (2.453 ms / 100) 2.460 -> 2.463 ( +0.12%) [ +0.16% +0.16% +0.00% / +0.20% +0.12% +0.28%] index_select wrap : Elapsed 0.025 ms (2.464 ms / 100) 2.463 -> 2.465 ( +0.08%) [ +0.04% +0.00% +0.16% / +0.12% +0.08% +0.08%] index_select linear : Elapsed 0.025 ms (2.464 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.12% +0.00% +0.20% / +0.04% +0.16% +0.12%] index_select reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.12% +0.04% +0.00% / +0.16% +0.04% +0.08%] index_select skip64 : Elapsed 0.025 ms (2.453 ms / 100) 2.450 -> 2.449 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.12% +0.16%] index_select skip256 : Elapsed 0.025 ms (2.451 ms / 100) 2.469 -> 2.471 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.20% +0.28%] index_select spread : Elapsed 0.025 ms (2.470 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.20% +0.12% +0.00% / +0.04% +0.04% +0.04%] index_select strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.461 -> 2.463 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.37% +0.16% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.464 ms / 100) 2.465 -> 2.469 ( +0.16%) [ +0.20% +0.12% +0.00% / +0.16% +0.16% +0.32%] index_select strided 7 : Elapsed 0.025 ms (2.470 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.20% +0.12% +0.00% / +0.04% +0.33% +0.33%] index_select strided 8 : Elapsed 0.025 ms (2.455 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.16% +0.29%] index_select strided 16 : Elapsed 0.025 ms (2.454 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.04% +0.08%] index_select random : Elapsed 0.025 ms (2.470 ms / 100) 2.466 -> 2.467 ( +0.04%) [ +0.20% +0.00% +0.08% / +0.04% +0.12% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.471 ms / 100) 2.468 -> 2.470 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.12% +0.16% +0.08%] index_select perm : Elapsed 0.025 ms (2.468 ms / 100) 2.472 -> 2.465 ( -0.28%) [ +0.00% +0.12% +0.08% / +0.12% -0.16% -0.28%] index_select perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) B = [5, 4, 20, 16] (stride (4, 1, 20, 400)) A = [5, 4, 40, 16] (stride (1, 5, 20, 800)) dim = 2 2.402 -> 2.402 ( +0.00%) [ +0.17% +0.08% +0.00% / +0.00% +0.08% +0.17%] index_select const : Elapsed 0.024 ms (2.406 ms / 100) 2.415 -> 2.409 ( -0.25%) [ +0.04% +0.00% +0.00% / -0.04% +0.04% -0.25%] index_select wrap : Elapsed 0.024 ms (2.416 ms / 100) 2.411 -> 2.409 ( -0.08%) [ +0.12% +0.00% +0.21% / +0.25% -0.04% -0.08%] index_select linear : Elapsed 0.024 ms (2.414 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.12% +0.12%] index_select reverse : Elapsed 0.024 ms (2.416 ms / 100) 2.402 -> 2.401 ( -0.04%) [ +0.08% +0.00% +0.00% / +0.12% -0.04% +0.25%] index_select skip64 : Elapsed 0.024 ms (2.404 ms / 100) 2.399 -> 2.402 ( +0.13%) [ +0.04% +0.00% +0.00% / +0.13% +0.38% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.400 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.00% +0.12% +0.25% / +0.08% +0.29% +0.41%] index_select spread : Elapsed 0.024 ms (2.413 ms / 100) 2.414 -> 2.413 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.17% +0.17%] index_select strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.405 -> 2.404 ( -0.04%) [ +0.08% +0.00% +0.00% / -0.04% +0.37% +0.21%] index_select strided 5 : Elapsed 0.024 ms (2.407 ms / 100) 2.415 -> 2.418 ( +0.12%) [ +0.12% +0.00% +0.08% / +0.25% +0.17% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.418 ms / 100) 2.403 -> 2.410 ( +0.29%) [ +0.17% +0.00% +0.08% / +0.33% +0.29% +0.29%] index_select strided 8 : Elapsed 0.024 ms (2.407 ms / 100) 2.405 -> 2.408 ( +0.12%) [ +0.00% +0.00% +0.04% / +0.12% +0.25% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.405 ms / 100) 2.410 -> 2.410 ( +0.00%) [ +0.08% +0.00% +0.37% / +0.00% +0.25% +0.46%] index_select random : Elapsed 0.024 ms (2.412 ms / 100) 2.413 -> 2.413 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.04% +0.00% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.411 ( -0.12%) [ +0.08% +0.04% +0.00% / +0.17% -0.12% +0.00%] index_select perm : Elapsed 0.024 ms (2.416 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.12% +0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.416 ms / 100) out_shape = [5, 4, 40, 20] in_shape = [5, 4, 40, 16] idx_dim = 3 B = [5, 4, 40, 20] (stride (3200, 800, 20, 1)) A = [5, 4, 40, 16] (stride (4, 1, 320, 20)) dim = 3 3.422 -> 3.422 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.64% +0.58%] index_add_ linear : Elapsed 0.034 ms (3.423 ms / 100) 3.242 -> 3.241 ( -0.03%) [ +0.09% +0.00% +0.00% / -0.03% +0.62% +0.68%] index_copy_ linear : Elapsed 0.032 ms (3.245 ms / 100) 3.426 -> 3.429 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.82% +0.55%] index_add_ reverse : Elapsed 0.034 ms (3.429 ms / 100) 3.249 -> 3.244 ( -0.15%) [ +0.06% +0.00% +0.00% / -0.15% +0.68% +0.74%] index_copy_ reverse : Elapsed 0.033 ms (3.251 ms / 100) 3.407 -> 3.408 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.85% +0.91%] index_add_ spread : Elapsed 0.034 ms (3.407 ms / 100) 3.237 -> 3.240 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.74% +0.80%] index_copy_ spread : Elapsed 0.032 ms (3.237 ms / 100) 3.402 -> 3.406 ( +0.12%) [ +0.15% +0.09% +0.00% / +0.12% +0.79% +0.82%] index_add_ strided 3 : Elapsed 0.034 ms (3.407 ms / 100) 3.238 -> 3.235 ( -0.09%) [ +0.06% +0.00% +0.03% / -0.09% +0.80% +0.74%] index_copy_ strided 3 : Elapsed 0.032 ms (3.240 ms / 100) 3.404 -> 3.406 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.06% +0.71% +0.76%] index_add_ strided 7 : Elapsed 0.034 ms (3.407 ms / 100) 3.238 -> 3.241 ( +0.09%) [ +0.00% +0.12% +0.00% / +0.09% +0.71% +0.77%] index_copy_ strided 7 : Elapsed 0.032 ms (3.238 ms / 100) 3.405 -> 3.407 ( +0.06%) [ +0.03% +0.12% +0.00% / +0.06% +0.82% +0.88%] index_add_ perm : Elapsed 0.034 ms (3.406 ms / 100) 3.239 -> 3.239 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.74% +0.71%] index_copy_ perm : Elapsed 0.032 ms (3.239 ms / 100) 3.430 -> 3.423 ( -0.20%) [ +0.06% +0.00% +0.06% / -0.20% +0.61% +0.61%] index_add_ perm_sorted : Elapsed 0.034 ms (3.432 ms / 100) 3.253 -> 3.248 ( -0.15%) [ +0.00% +0.06% +0.00% / -0.15% +0.55% +0.61%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.253 ms / 100) 5.307 -> 5.306 ( -0.02%) [ +0.08% +0.00% +0.17% / +0.23% +0.09% -0.02%] index_select const : Elapsed 0.053 ms (5.311 ms / 100) 5.334 -> 5.327 ( -0.13%) [ +0.11% +0.09% +0.00% / -0.13% +0.00% -0.07%] index_select wrap : Elapsed 0.053 ms (5.340 ms / 100) 5.331 -> 5.335 ( +0.08%) [ +0.02% +0.00% +0.04% / +0.09% +0.08% +0.17%] index_select linear : Elapsed 0.053 ms (5.332 ms / 100) 5.323 -> 5.333 ( +0.19%) [ +0.21% +0.00% +0.13% / +0.36% +0.34% +0.19%] index_select reverse : Elapsed 0.053 ms (5.334 ms / 100) 5.311 -> 5.306 ( -0.09%) [ +0.04% +0.00% +0.06% / -0.09% -0.02% -0.04%] index_select skip64 : Elapsed 0.053 ms (5.313 ms / 100) 5.303 -> 5.314 ( +0.21%) [ +0.00% +0.26% +0.11% / +0.21% +0.23% +0.40%] index_select skip256 : Elapsed 0.053 ms (5.303 ms / 100) 5.333 -> 5.333 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.08% +0.04% +0.00%] index_select spread : Elapsed 0.053 ms (5.333 ms / 100) 5.335 -> 5.329 ( -0.11%) [ +0.00% +0.11% +0.11% / +0.06% -0.11% -0.06%] index_select strided 3 : Elapsed 0.053 ms (5.335 ms / 100) 5.330 -> 5.333 ( +0.06%) [ +0.17% +0.19% +0.00% / +0.08% +0.13% +0.06%] index_select strided 5 : Elapsed 0.053 ms (5.339 ms / 100) 5.330 -> 5.330 ( +0.00%) [ +0.02% +0.00% +0.19% / +0.00% +0.19% +0.24%] index_select strided 7 : Elapsed 0.053 ms (5.331 ms / 100) 5.319 -> 5.314 ( -0.09%) [ +0.04% +0.02% +0.00% / -0.08% +0.06% -0.09%] index_select strided 8 : Elapsed 0.053 ms (5.321 ms / 100) 5.325 -> 5.328 ( +0.06%) [ +0.32% +0.09% +0.00% / +0.30% +0.06% +0.06%] index_select random : Elapsed 0.053 ms (5.342 ms / 100) 5.328 -> 5.331 ( +0.06%) [ +0.00% +0.19% +0.09% / +0.15% +0.06% +0.19%] index_select random_sorted : Elapsed 0.053 ms (5.328 ms / 100) B = [5, 4, 40, 20] (stride (3200, 800, 1, 40)) A = [5, 4, 40, 16] (stride (2560, 16, 64, 1)) dim = 3 4.113 -> 4.114 ( +0.02%) [ +0.00% +0.05% +0.07% / +0.02% +0.73% +0.73%] index_add_ linear : Elapsed 0.041 ms (4.113 ms / 100) 3.922 -> 3.924 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.74% +0.71%] index_copy_ linear : Elapsed 0.039 ms (3.923 ms / 100) 4.101 -> 4.103 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.76% +0.73%] index_add_ reverse : Elapsed 0.041 ms (4.102 ms / 100) 3.916 -> 3.916 ( +0.00%) [ +0.00% +0.15% +0.18% / +0.00% +0.56% +0.56%] index_copy_ reverse : Elapsed 0.039 ms (3.916 ms / 100) 4.090 -> 4.090 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.71% +0.73%] index_add_ spread : Elapsed 0.041 ms (4.091 ms / 100) 3.921 -> 3.920 ( -0.03%) [ +0.03% +0.00% +0.05% / -0.03% +0.71% +0.74%] index_copy_ spread : Elapsed 0.039 ms (3.922 ms / 100) 4.097 -> 4.097 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.81% +0.81%] index_add_ strided 3 : Elapsed 0.041 ms (4.097 ms / 100) 3.919 -> 3.919 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.46% +0.46%] index_copy_ strided 3 : Elapsed 0.039 ms (3.919 ms / 100) 4.104 -> 4.104 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_add_ strided 7 : Elapsed 0.041 ms (4.105 ms / 100) 3.916 -> 3.917 ( +0.03%) [ +0.15% +0.15% +0.00% / +0.03% +0.59% +0.64%] index_copy_ strided 7 : Elapsed 0.039 ms (3.922 ms / 100) 4.115 -> 4.114 ( -0.02%) [ +0.05% +0.00% +0.05% / -0.02% +0.68% +0.70%] index_add_ perm : Elapsed 0.041 ms (4.117 ms / 100) 3.922 -> 3.923 ( +0.03%) [ +0.03% +0.00% +0.05% / +0.03% +0.76% +0.82%] index_copy_ perm : Elapsed 0.039 ms (3.923 ms / 100) 4.100 -> 4.101 ( +0.02%) [ +0.10% +0.05% +0.00% / +0.02% +0.61% +0.59%] index_add_ perm_sorted : Elapsed 0.041 ms (4.104 ms / 100) 3.916 -> 3.922 ( +0.15%) [ +0.10% +0.03% +0.00% / +0.15% +0.43% +0.38%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.920 ms / 100) 5.469 -> 5.471 ( +0.04%) [ +0.00% +0.04% +0.15% / +0.04% +0.15% +0.18%] index_select const : Elapsed 0.055 ms (5.469 ms / 100) 5.477 -> 5.474 ( -0.05%) [ +0.13% +0.05% +0.00% / -0.05% +0.07% -0.02%] index_select wrap : Elapsed 0.055 ms (5.484 ms / 100) 5.472 -> 5.474 ( +0.04%) [ +0.04% +0.11% +0.00% / +0.04% +0.20% +0.20%] index_select linear : Elapsed 0.055 ms (5.474 ms / 100) 5.475 -> 5.474 ( -0.02%) [ +0.07% +0.18% +0.00% / -0.02% +0.15% +0.07%] index_select reverse : Elapsed 0.055 ms (5.479 ms / 100) 5.471 -> 5.476 ( +0.09%) [ +0.05% +0.15% +0.00% / +0.11% +0.09% +0.11%] index_select skip64 : Elapsed 0.055 ms (5.474 ms / 100) 5.467 -> 5.472 ( +0.09%) [ +0.15% +0.13% +0.00% / +0.15% +0.16% +0.09%] index_select skip256 : Elapsed 0.055 ms (5.475 ms / 100) 5.477 -> 5.477 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.00% +0.11% +0.18%] index_select spread : Elapsed 0.055 ms (5.482 ms / 100) 5.476 -> 5.479 ( +0.05%) [ +0.02% +0.00% +0.05% / +0.09% +0.05% +0.09%] index_select strided 3 : Elapsed 0.055 ms (5.477 ms / 100) 5.479 -> 5.471 ( -0.15%) [ +0.09% +0.00% +0.00% / -0.15% +0.04% +0.02%] index_select strided 5 : Elapsed 0.055 ms (5.484 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.11% +0.09% +0.00% / +0.04% +0.22% +0.15%] index_select strided 7 : Elapsed 0.055 ms (5.482 ms / 100) 5.472 -> 5.474 ( +0.04%) [ +0.11% +0.00% +0.09% / +0.04% +0.15% +0.24%] index_select strided 8 : Elapsed 0.055 ms (5.478 ms / 100) 5.477 -> 5.478 ( +0.02%) [ +0.00% +0.07% +0.05% / +0.07% +0.02% +0.04%] index_select random : Elapsed 0.055 ms (5.477 ms / 100) 5.474 -> 5.476 ( +0.04%) [ +0.27% +0.13% +0.00% / +0.04% +0.15% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.489 ms / 100) B = [5, 4, 40, 20] (stride (3200, 40, 1, 160)) A = [5, 4, 40, 16] (stride (1, 5, 20, 800)) dim = 3 3.948 -> 3.964 ( +0.41%) [ +0.18% +0.38% +0.00% / +0.41% +0.96% +1.04%] index_add_ linear : Elapsed 0.040 ms (3.955 ms / 100) 3.786 -> 3.794 ( +0.21%) [ +0.05% +0.18% +0.00% / +0.21% +0.82% +0.79%] index_copy_ linear : Elapsed 0.038 ms (3.788 ms / 100) 3.961 -> 3.968 ( +0.18%) [ +0.15% +0.00% +0.10% / +0.18% +0.81% +0.68%] index_add_ reverse : Elapsed 0.040 ms (3.967 ms / 100) 3.792 -> 3.800 ( +0.21%) [ +0.13% +0.00% +0.08% / +0.21% +0.90% +0.71%] index_copy_ reverse : Elapsed 0.038 ms (3.797 ms / 100) 3.938 -> 3.939 ( +0.03%) [ +0.00% +0.10% +0.03% / +0.03% +0.51% +0.53%] index_add_ spread : Elapsed 0.039 ms (3.938 ms / 100) 3.777 -> 3.778 ( +0.03%) [ +0.00% +0.05% +0.03% / +0.03% +0.56% +0.71%] index_copy_ spread : Elapsed 0.038 ms (3.777 ms / 100) 3.919 -> 3.921 ( +0.05%) [ +0.00% +0.03% +0.15% / +0.05% +0.92% +0.87%] index_add_ strided 3 : Elapsed 0.039 ms (3.919 ms / 100) 3.770 -> 3.770 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.88% +0.88%] index_copy_ strided 3 : Elapsed 0.038 ms (3.770 ms / 100) 3.921 -> 3.923 ( +0.05%) [ +0.00% +0.05% +0.10% / +0.05% +0.69% +0.79%] index_add_ strided 7 : Elapsed 0.039 ms (3.921 ms / 100) 3.770 -> 3.776 ( +0.16%) [ +0.00% +0.03% +0.16% / +0.16% +0.72% +0.80%] index_copy_ strided 7 : Elapsed 0.038 ms (3.770 ms / 100) 3.930 -> 3.930 ( +0.00%) [ +0.31% +0.00% +0.05% / +0.00% +0.94% +0.69%] index_add_ perm : Elapsed 0.039 ms (3.942 ms / 100) 3.769 -> 3.770 ( +0.03%) [ +0.32% +0.16% +0.00% / +0.03% +0.90% +0.66%] index_copy_ perm : Elapsed 0.038 ms (3.781 ms / 100) 3.962 -> 3.967 ( +0.13%) [ +0.15% +0.03% +0.00% / +0.13% +0.63% +0.63%] index_add_ perm_sorted : Elapsed 0.040 ms (3.968 ms / 100) 3.793 -> 3.798 ( +0.13%) [ +0.11% +0.03% +0.00% / +0.13% +0.66% +0.66%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.797 ms / 100) 5.461 -> 5.466 ( +0.09%) [ +0.13% +0.05% +0.00% / +0.09% +0.18% +0.20%] index_select const : Elapsed 0.055 ms (5.468 ms / 100) 5.476 -> 5.472 ( -0.07%) [ +0.00% +0.04% +0.09% / -0.07% +0.18% +0.07%] index_select wrap : Elapsed 0.055 ms (5.476 ms / 100) 5.478 -> 5.479 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.11% +0.02% +0.09%] index_select linear : Elapsed 0.055 ms (5.481 ms / 100) 5.476 -> 5.477 ( +0.02%) [ +0.00% +0.15% +0.04% / +0.02% +0.02% +0.15%] index_select reverse : Elapsed 0.055 ms (5.476 ms / 100) 5.463 -> 5.473 ( +0.18%) [ +0.00% +0.15% +0.15% / +0.18% +0.26% +0.44%] index_select skip64 : Elapsed 0.055 ms (5.463 ms / 100) 5.465 -> 5.468 ( +0.05%) [ +0.16% +0.04% +0.00% / +0.05% +0.20% +0.26%] index_select skip256 : Elapsed 0.055 ms (5.474 ms / 100) 5.474 -> 5.473 ( -0.02%) [ +0.00% +0.18% +0.04% / +0.13% +0.05% -0.02%] index_select spread : Elapsed 0.055 ms (5.474 ms / 100) 5.480 -> 5.479 ( -0.02%) [ +0.00% +0.15% +0.02% / +0.15% +0.07% -0.02%] index_select strided 3 : Elapsed 0.055 ms (5.480 ms / 100) 5.474 -> 5.478 ( +0.07%) [ +0.16% +0.26% +0.00% / +0.15% +0.18% +0.07%] index_select strided 5 : Elapsed 0.055 ms (5.483 ms / 100) 5.476 -> 5.476 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.00% +0.11% +0.13%] index_select strided 7 : Elapsed 0.055 ms (5.482 ms / 100) 5.469 -> 5.464 ( -0.09%) [ +0.15% +0.22% +0.00% / -0.09% +0.07% +0.22%] index_select strided 8 : Elapsed 0.055 ms (5.477 ms / 100) 5.475 -> 5.475 ( +0.00%) [ +0.02% +0.00% +0.04% / +0.15% +0.09% +0.00%] index_select random : Elapsed 0.055 ms (5.476 ms / 100) 5.480 -> 5.474 ( -0.11%) [ +0.07% +0.04% +0.00% / -0.11% +0.09% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.484 ms / 100) B = [5, 4, 40, 20] (stride (800, 4000, 20, 1)) A = [5, 4, 40, 16] (stride (1, 80, 320, 5)) dim = 3 4.452 -> 4.451 ( -0.02%) [ +0.00% +0.02% +0.09% / -0.02% +0.79% +0.70%] index_add_ linear : Elapsed 0.045 ms (4.452 ms / 100) 4.280 -> 4.283 ( +0.07%) [ +0.00% +0.12% +0.09% / +0.07% +0.82% +0.82%] index_copy_ linear : Elapsed 0.043 ms (4.280 ms / 100) 4.457 -> 4.457 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.76% +0.74%] index_add_ reverse : Elapsed 0.045 ms (4.457 ms / 100) 4.275 -> 4.277 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.82% +0.77%] index_copy_ reverse : Elapsed 0.043 ms (4.275 ms / 100) 4.451 -> 4.452 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.02% +0.74% +0.76%] index_add_ spread : Elapsed 0.045 ms (4.453 ms / 100) 4.283 -> 4.284 ( +0.02%) [ +0.00% +0.00% +0.07% / +0.02% +0.65% +0.70%] index_copy_ spread : Elapsed 0.043 ms (4.283 ms / 100) 4.458 -> 4.462 ( +0.09%) [ +0.09% +0.11% +0.00% / +0.09% +0.81% +0.85%] index_add_ strided 3 : Elapsed 0.045 ms (4.462 ms / 100) 4.286 -> 4.288 ( +0.05%) [ +0.00% +0.12% +0.00% / +0.05% +0.75% +0.70%] index_copy_ strided 3 : Elapsed 0.043 ms (4.286 ms / 100) 4.457 -> 4.462 ( +0.11%) [ +0.04% +0.07% +0.00% / +0.11% +0.74% +0.70%] index_add_ strided 7 : Elapsed 0.045 ms (4.459 ms / 100) 4.274 -> 4.278 ( +0.09%) [ +0.09% +0.00% +0.05% / +0.09% +0.91% +0.80%] index_copy_ strided 7 : Elapsed 0.043 ms (4.278 ms / 100) 4.454 -> 4.454 ( +0.00%) [ +0.00% +0.11% +0.07% / +0.00% +0.74% +0.70%] index_add_ perm : Elapsed 0.045 ms (4.454 ms / 100) 4.284 -> 4.285 ( +0.02%) [ +0.00% +0.07% +0.00% / +0.02% +0.70% +0.68%] index_copy_ perm : Elapsed 0.043 ms (4.284 ms / 100) 4.463 -> 4.467 ( +0.09%) [ +0.04% +0.00% +0.07% / +0.09% +0.67% +0.63%] index_add_ perm_sorted : Elapsed 0.045 ms (4.465 ms / 100) 4.287 -> 4.292 ( +0.12%) [ +0.09% +0.00% +0.02% / +0.12% +0.70% +0.82%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.291 ms / 100) 5.566 -> 5.568 ( +0.04%) [ +0.00% +0.00% +0.18% / +0.13% +0.23% +0.04%] index_select const : Elapsed 0.056 ms (5.566 ms / 100) 5.582 -> 5.582 ( +0.00%) [ +0.00% +0.05% +0.20% / +0.00% +0.02% +0.07%] index_select wrap : Elapsed 0.056 ms (5.582 ms / 100) 5.582 -> 5.582 ( +0.00%) [ +0.11% +0.00% +0.13% / +0.00% +0.21% +0.13%] index_select linear : Elapsed 0.056 ms (5.588 ms / 100) 5.586 -> 5.583 ( -0.05%) [ +0.09% +0.00% +0.02% / -0.05% +0.11% +0.13%] index_select reverse : Elapsed 0.056 ms (5.591 ms / 100) 5.569 -> 5.570 ( +0.02%) [ +0.00% +0.05% +0.04% / +0.05% +0.09% +0.02%] index_select skip64 : Elapsed 0.056 ms (5.569 ms / 100) 5.565 -> 5.561 ( -0.07%) [ +0.14% +0.00% +0.04% / -0.07% +0.13% +0.11%] index_select skip256 : Elapsed 0.056 ms (5.573 ms / 100) 5.582 -> 5.580 ( -0.04%) [ +0.00% +0.04% +0.09% / -0.04% +0.14% +0.20%] index_select spread : Elapsed 0.056 ms (5.582 ms / 100) 5.579 -> 5.584 ( +0.09%) [ +0.29% +0.00% +0.04% / +0.22% +0.18% +0.09%] index_select strided 3 : Elapsed 0.056 ms (5.595 ms / 100) 5.584 -> 5.588 ( +0.07%) [ +0.11% +0.00% +0.20% / +0.07% +0.13% +0.14%] index_select strided 5 : Elapsed 0.056 ms (5.590 ms / 100) 5.578 -> 5.591 ( +0.23%) [ +0.20% +0.09% +0.00% / +0.23% +0.45% +0.36%] index_select strided 7 : Elapsed 0.056 ms (5.589 ms / 100) 5.567 -> 5.564 ( -0.05%) [ +0.00% +0.05% +0.04% / -0.05% +0.05% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.567 ms / 100) 5.581 -> 5.589 ( +0.14%) [ +0.22% +0.09% +0.00% / +0.14% +0.18% +0.16%] index_select random : Elapsed 0.056 ms (5.593 ms / 100) 5.580 -> 5.582 ( +0.04%) [ +0.18% +0.14% +0.00% / +0.04% +0.23% +0.16%] index_select random_sorted : Elapsed 0.056 ms (5.590 ms / 100) B = [5, 4, 40, 20] (stride (1, 4000, 5, 200)) A = [5, 4, 40, 16] (stride (64, 16, 320, 1)) dim = 3 4.101 -> 4.097 ( -0.10%) [ +0.07% +0.00% +0.20% / -0.10% +0.66% +0.85%] index_add_ linear : Elapsed 0.041 ms (4.104 ms / 100) 3.940 -> 3.944 ( +0.10%) [ +0.00% +0.03% +0.08% / +0.10% +0.53% +0.63%] index_copy_ linear : Elapsed 0.039 ms (3.940 ms / 100) 4.098 -> 4.096 ( -0.05%) [ +0.02% +0.10% +0.00% / -0.05% +0.73% +0.51%] index_add_ reverse : Elapsed 0.041 ms (4.099 ms / 100) 3.927 -> 3.927 ( +0.00%) [ +0.00% +0.25% +0.13% / +0.00% +0.84% +0.59%] index_copy_ reverse : Elapsed 0.039 ms (3.927 ms / 100) 4.101 -> 4.103 ( +0.05%) [ +0.15% +0.02% +0.00% / +0.05% +0.71% +0.59%] index_add_ spread : Elapsed 0.041 ms (4.107 ms / 100) 3.929 -> 3.935 ( +0.15%) [ +0.08% +0.00% +0.15% / +0.15% +0.76% +0.71%] index_copy_ spread : Elapsed 0.039 ms (3.932 ms / 100) 4.092 -> 4.088 ( -0.10%) [ +0.15% +0.00% +0.29% / -0.10% +0.68% +0.71%] index_add_ strided 3 : Elapsed 0.041 ms (4.098 ms / 100) 3.923 -> 3.920 ( -0.08%) [ +0.20% +0.00% +0.23% / -0.08% +0.66% +0.71%] index_copy_ strided 3 : Elapsed 0.039 ms (3.931 ms / 100) 4.096 -> 4.096 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.00% +0.54% +0.59%] index_add_ strided 7 : Elapsed 0.041 ms (4.097 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.05% +0.10% +0.00% / +0.08% +0.61% +0.61%] index_copy_ strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 4.096 -> 4.095 ( -0.02%) [ +0.24% +0.00% +0.02% / -0.02% +0.61% +0.56%] index_add_ perm : Elapsed 0.041 ms (4.106 ms / 100) 3.928 -> 3.929 ( +0.03%) [ +0.20% +0.05% +0.00% / +0.03% +0.64% +0.59%] index_copy_ perm : Elapsed 0.039 ms (3.936 ms / 100) 4.096 -> 4.095 ( -0.02%) [ +0.07% +0.00% +0.05% / -0.02% +0.71% +0.71%] index_add_ perm_sorted : Elapsed 0.041 ms (4.099 ms / 100) 3.931 -> 3.930 ( -0.03%) [ +0.05% +0.00% +0.05% / -0.03% +0.76% +0.66%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.933 ms / 100) 5.489 -> 5.488 ( -0.02%) [ +0.00% +0.04% +0.20% / -0.02% +0.09% +0.07%] index_select const : Elapsed 0.055 ms (5.489 ms / 100) 5.503 -> 5.499 ( -0.07%) [ +0.00% +0.16% +0.09% / -0.07% +0.02% +0.07%] index_select wrap : Elapsed 0.055 ms (5.503 ms / 100) 5.499 -> 5.502 ( +0.05%) [ +0.13% +0.15% +0.00% / +0.05% +0.13% +0.05%] index_select linear : Elapsed 0.055 ms (5.506 ms / 100) 5.503 -> 5.499 ( -0.07%) [ +0.00% +0.05% +0.13% / -0.07% +0.13% -0.04%] index_select reverse : Elapsed 0.055 ms (5.503 ms / 100) 5.493 -> 5.488 ( -0.09%) [ +0.04% +0.07% +0.00% / +0.00% -0.09% +0.05%] index_select skip64 : Elapsed 0.055 ms (5.495 ms / 100) 5.493 -> 5.490 ( -0.05%) [ +0.09% +0.07% +0.00% / -0.05% +0.04% -0.04%] index_select skip256 : Elapsed 0.055 ms (5.498 ms / 100) 5.504 -> 5.502 ( -0.04%) [ +0.02% +0.05% +0.00% / +0.05% -0.04% +0.00%] index_select spread : Elapsed 0.055 ms (5.505 ms / 100) 5.505 -> 5.501 ( -0.07%) [ +0.05% +0.09% +0.00% / +0.07% -0.05% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.508 ms / 100) 5.511 -> 5.498 ( -0.24%) [ +0.00% +0.00% +0.00% / +0.00% -0.20% -0.24%] index_select strided 5 : Elapsed 0.055 ms (5.511 ms / 100) 5.505 -> 5.501 ( -0.07%) [ +0.07% +0.05% +0.00% / -0.04% -0.07% -0.02%] index_select strided 7 : Elapsed 0.055 ms (5.509 ms / 100) 5.502 -> 5.497 ( -0.09%) [ +0.07% +0.00% +0.07% / +0.04% -0.09% -0.02%] index_select strided 8 : Elapsed 0.055 ms (5.506 ms / 100) 5.503 -> 5.507 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.15% +0.07%] index_select random : Elapsed 0.055 ms (5.503 ms / 100) 5.498 -> 5.502 ( +0.07%) [ +0.22% +0.25% +0.00% / +0.25% +0.07% +0.07%] index_select random_sorted : Elapsed 0.055 ms (5.510 ms / 100) B = [5, 4, 40, 20] (stride (80, 20, 400, 1)) A = [5, 4, 40, 16] (stride (640, 3200, 1, 40)) dim = 3 3.639 -> 3.640 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.69% +0.71%] index_add_ linear : Elapsed 0.036 ms (3.641 ms / 100) 3.507 -> 3.506 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.68% +0.68%] index_copy_ linear : Elapsed 0.035 ms (3.507 ms / 100) 3.631 -> 3.632 ( +0.03%) [ +0.06% +0.00% +0.00% / +0.03% +0.91% +0.91%] index_add_ reverse : Elapsed 0.036 ms (3.633 ms / 100) 3.504 -> 3.510 ( +0.17%) [ +0.00% +0.06% +0.00% / +0.17% +0.80% +0.80%] index_copy_ reverse : Elapsed 0.035 ms (3.504 ms / 100) 3.634 -> 3.640 ( +0.17%) [ +0.03% +0.00% +0.03% / +0.17% +0.85% +0.83%] index_add_ spread : Elapsed 0.036 ms (3.635 ms / 100) 3.507 -> 3.506 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.71% +0.71%] index_copy_ spread : Elapsed 0.035 ms (3.507 ms / 100) 3.635 -> 3.635 ( +0.00%) [ +0.00% +0.03% +0.06% / +0.00% +0.77% +0.83%] index_add_ strided 3 : Elapsed 0.036 ms (3.635 ms / 100) 3.502 -> 3.502 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.86% +0.97%] index_copy_ strided 3 : Elapsed 0.035 ms (3.502 ms / 100) 3.632 -> 3.633 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.91% +0.85%] index_add_ strided 7 : Elapsed 0.036 ms (3.632 ms / 100) 3.503 -> 3.505 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.86% +0.80%] index_copy_ strided 7 : Elapsed 0.035 ms (3.504 ms / 100) 3.640 -> 3.640 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.74% +0.71%] index_add_ perm : Elapsed 0.036 ms (3.641 ms / 100) 3.506 -> 3.506 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.77% +0.74%] index_copy_ perm : Elapsed 0.035 ms (3.506 ms / 100) 3.632 -> 3.633 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.88% +0.88%] index_add_ perm_sorted : Elapsed 0.036 ms (3.633 ms / 100) 3.500 -> 3.502 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.89% +0.86%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.501 ms / 100) 5.469 -> 5.470 ( +0.02%) [ +0.04% +0.00% +0.07% / +0.11% +0.02% +0.16%] index_select const : Elapsed 0.055 ms (5.471 ms / 100) 5.486 -> 5.490 ( +0.07%) [ +0.15% +0.00% +0.05% / +0.07% +0.22% +0.07%] index_select wrap : Elapsed 0.055 ms (5.494 ms / 100) 5.484 -> 5.489 ( +0.09%) [ +0.20% +0.00% +0.05% / +0.09% +0.11% +0.11%] index_select linear : Elapsed 0.055 ms (5.495 ms / 100) 5.483 -> 5.488 ( +0.09%) [ +0.15% +0.07% +0.00% / +0.09% +0.31% +0.18%] index_select reverse : Elapsed 0.055 ms (5.491 ms / 100) 5.474 -> 5.473 ( -0.02%) [ +0.00% +0.02% +0.00% / +0.00% -0.02% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.474 ms / 100) 5.472 -> 5.466 ( -0.11%) [ +0.02% +0.00% +0.05% / -0.11% +0.07% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.473 ms / 100) 5.491 -> 5.490 ( -0.02%) [ +0.09% +0.00% +0.00% / -0.02% +0.05% +0.11%] index_select spread : Elapsed 0.055 ms (5.496 ms / 100) 5.488 -> 5.492 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.11% +0.16%] index_select strided 3 : Elapsed 0.055 ms (5.491 ms / 100) 5.491 -> 5.488 ( -0.05%) [ +0.05% +0.00% +0.00% / +0.05% -0.04% -0.05%] index_select strided 5 : Elapsed 0.055 ms (5.494 ms / 100) 5.484 -> 5.489 ( +0.09%) [ +0.13% +0.00% +0.15% / +0.09% +0.29% +0.27%] index_select strided 7 : Elapsed 0.055 ms (5.491 ms / 100) 5.472 -> 5.471 ( -0.02%) [ +0.04% +0.11% +0.00% / -0.02% +0.22% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.474 ms / 100) 5.485 -> 5.487 ( +0.04%) [ +0.04% +0.00% +0.05% / +0.05% +0.04% +0.05%] index_select random : Elapsed 0.055 ms (5.487 ms / 100) 5.486 -> 5.489 ( +0.05%) [ +0.07% +0.02% +0.00% / +0.05% +0.13% +0.09%] index_select random_sorted : Elapsed 0.055 ms (5.490 ms / 100) B = [5, 4, 40, 20] (stride (160, 1, 4, 800)) A = [5, 4, 40, 16] (stride (1, 3200, 5, 200)) dim = 3 4.051 -> 4.047 ( -0.10%) [ +0.02% +0.00% +0.10% / -0.10% +0.74% +0.77%] index_add_ linear : Elapsed 0.041 ms (4.052 ms / 100) 3.916 -> 3.913 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.72% +0.77%] index_copy_ linear : Elapsed 0.039 ms (3.916 ms / 100) 4.055 -> 4.054 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.74% +0.74%] index_add_ reverse : Elapsed 0.041 ms (4.055 ms / 100) 3.917 -> 3.920 ( +0.08%) [ +0.05% +0.00% +0.10% / +0.08% +0.89% +0.92%] index_copy_ reverse : Elapsed 0.039 ms (3.919 ms / 100) 4.041 -> 4.044 ( +0.07%) [ +0.02% +0.10% +0.00% / +0.07% +0.84% +0.72%] index_add_ spread : Elapsed 0.040 ms (4.042 ms / 100) 3.912 -> 3.919 ( +0.18%) [ +0.00% +0.26% +0.03% / +0.18% +0.64% +0.56%] index_copy_ spread : Elapsed 0.039 ms (3.912 ms / 100) 4.051 -> 4.053 ( +0.05%) [ +0.10% +0.00% +0.07% / +0.05% +0.74% +0.77%] index_add_ strided 3 : Elapsed 0.041 ms (4.055 ms / 100) 3.916 -> 3.923 ( +0.18%) [ +0.10% +0.15% +0.00% / +0.18% +0.64% +0.66%] index_copy_ strided 3 : Elapsed 0.039 ms (3.920 ms / 100) 4.053 -> 4.054 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.72% +0.72%] index_add_ strided 7 : Elapsed 0.041 ms (4.054 ms / 100) 3.916 -> 3.919 ( +0.08%) [ +0.05% +0.10% +0.00% / +0.08% +0.64% +0.59%] index_copy_ strided 7 : Elapsed 0.039 ms (3.918 ms / 100) 4.046 -> 4.047 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.62% +0.64%] index_add_ perm : Elapsed 0.040 ms (4.047 ms / 100) 3.918 -> 3.919 ( +0.03%) [ +0.10% +0.00% +0.10% / +0.03% +0.38% +0.41%] index_copy_ perm : Elapsed 0.039 ms (3.922 ms / 100) 4.054 -> 4.055 ( +0.02%) [ +0.07% +0.02% +0.00% / +0.02% +0.67% +0.69%] index_add_ perm_sorted : Elapsed 0.041 ms (4.057 ms / 100) 3.917 -> 3.921 ( +0.10%) [ +0.05% +0.15% +0.00% / +0.10% +0.82% +0.82%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.919 ms / 100) 5.553 -> 5.552 ( -0.02%) [ +0.00% +0.14% +0.13% / +0.11% +0.13% -0.02%] index_select const : Elapsed 0.056 ms (5.553 ms / 100) 5.561 -> 5.557 ( -0.07%) [ +0.09% +0.00% +0.07% / -0.02% -0.07% +0.05%] index_select wrap : Elapsed 0.056 ms (5.566 ms / 100) 5.559 -> 5.565 ( +0.11%) [ +0.02% +0.07% +0.00% / +0.11% +0.23% +0.11%] index_select linear : Elapsed 0.056 ms (5.560 ms / 100) 5.561 -> 5.559 ( -0.04%) [ +0.11% +0.00% +0.04% / -0.04% +0.18% +0.16%] index_select reverse : Elapsed 0.056 ms (5.567 ms / 100) 5.554 -> 5.552 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.07% +0.14%] index_select skip64 : Elapsed 0.056 ms (5.554 ms / 100) 5.556 -> 5.556 ( +0.00%) [ +0.00% +0.13% +0.02% / +0.07% +0.00% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.556 ms / 100) 5.562 -> 5.557 ( -0.09%) [ +0.02% +0.00% +0.04% / +0.11% +0.02% -0.09%] index_select spread : Elapsed 0.056 ms (5.563 ms / 100) 5.562 -> 5.560 ( -0.04%) [ +0.00% +0.13% +0.07% / +0.04% -0.02% -0.04%] index_select strided 3 : Elapsed 0.056 ms (5.562 ms / 100) 5.564 -> 5.555 ( -0.16%) [ +0.09% +0.09% +0.00% / +0.00% -0.02% -0.16%] index_select strided 5 : Elapsed 0.056 ms (5.569 ms / 100) 5.565 -> 5.559 ( -0.11%) [ +0.04% +0.02% +0.00% / -0.11% +0.00% -0.04%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.553 -> 5.549 ( -0.07%) [ +0.18% +0.00% +0.13% / +0.04% -0.07% +0.04%] index_select strided 8 : Elapsed 0.056 ms (5.563 ms / 100) 5.563 -> 5.557 ( -0.11%) [ +0.04% +0.00% +0.00% / -0.11% +0.04% +0.02%] index_select random : Elapsed 0.056 ms (5.565 ms / 100) 5.561 -> 5.555 ( -0.11%) [ +0.13% +0.09% +0.00% / +0.02% -0.11% -0.09%] index_select random_sorted : Elapsed 0.056 ms (5.568 ms / 100) B = [5, 4, 40, 20] (stride (1, 5, 20, 800)) A = [5, 4, 40, 16] (stride (4, 1, 20, 800)) dim = 3 3.959 -> 3.970 ( +0.28%) [ +0.00% +0.13% +0.25% / +0.28% +1.11% +1.06%] index_add_ linear : Elapsed 0.040 ms (3.959 ms / 100) 3.809 -> 3.816 ( +0.18%) [ +0.00% +0.00% +0.11% / +0.18% +0.95% +0.95%] index_copy_ linear : Elapsed 0.038 ms (3.809 ms / 100) 3.979 -> 3.982 ( +0.08%) [ +0.05% +0.20% +0.00% / +0.08% +0.73% +0.73%] index_add_ reverse : Elapsed 0.040 ms (3.981 ms / 100) 3.816 -> 3.825 ( +0.24%) [ +0.24% +0.16% +0.00% / +0.24% +0.97% +0.76%] index_copy_ reverse : Elapsed 0.038 ms (3.825 ms / 100) 3.913 -> 3.921 ( +0.20%) [ +0.13% +0.00% +0.36% / +0.20% +0.59% +0.54%] index_add_ spread : Elapsed 0.039 ms (3.918 ms / 100) 3.778 -> 3.777 ( -0.03%) [ +0.11% +0.11% +0.00% / -0.03% +0.64% +0.66%] index_copy_ spread : Elapsed 0.038 ms (3.782 ms / 100) 3.898 -> 3.918 ( +0.51%) [ +0.15% +0.00% +0.51% / +0.51% +0.69% +0.74%] index_add_ strided 3 : Elapsed 0.039 ms (3.904 ms / 100) 3.761 -> 3.765 ( +0.11%) [ +0.00% +0.11% +0.08% / +0.11% +0.74% +0.74%] index_copy_ strided 3 : Elapsed 0.038 ms (3.761 ms / 100) 3.981 -> 3.974 ( -0.18%) [ +0.00% +0.03% +0.03% / -0.18% +0.68% +0.55%] index_add_ strided 7 : Elapsed 0.040 ms (3.981 ms / 100) 3.825 -> 3.820 ( -0.13%) [ +0.03% +0.00% +0.05% / -0.13% +0.55% +0.63%] index_copy_ strided 7 : Elapsed 0.038 ms (3.826 ms / 100) 3.963 -> 3.972 ( +0.23%) [ +0.00% +0.30% +0.20% / +0.23% +0.98% +0.83%] index_add_ perm : Elapsed 0.040 ms (3.963 ms / 100) 3.812 -> 3.818 ( +0.16%) [ +0.00% +0.21% +0.13% / +0.16% +0.79% +0.79%] index_copy_ perm : Elapsed 0.038 ms (3.812 ms / 100) 3.896 -> 3.902 ( +0.15%) [ +0.23% +0.15% +0.00% / +0.15% +0.80% +0.80%] index_add_ perm_sorted : Elapsed 0.039 ms (3.905 ms / 100) 3.758 -> 3.761 ( +0.08%) [ +0.11% +0.19% +0.00% / +0.08% +0.96% +1.06%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.762 ms / 100) 5.483 -> 5.486 ( +0.05%) [ +0.05% +0.11% +0.00% / +0.11% +0.11% +0.05%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.497 -> 5.497 ( +0.00%) [ +0.07% +0.02% +0.00% / +0.00% +0.05% +0.04%] index_select wrap : Elapsed 0.055 ms (5.501 ms / 100) 5.488 -> 5.494 ( +0.11%) [ +0.00% +0.09% +0.16% / +0.15% +0.11% +0.16%] index_select linear : Elapsed 0.055 ms (5.488 ms / 100) 5.493 -> 5.494 ( +0.02%) [ +0.15% +0.02% +0.00% / +0.02% +0.13% +0.07%] index_select reverse : Elapsed 0.055 ms (5.501 ms / 100) 5.480 -> 5.486 ( +0.11%) [ +0.00% +0.15% +0.13% / +0.11% +0.13% +0.18%] index_select skip64 : Elapsed 0.055 ms (5.480 ms / 100) 5.485 -> 5.486 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.11% +0.02% +0.07%] index_select skip256 : Elapsed 0.055 ms (5.489 ms / 100) 5.492 -> 5.491 ( -0.02%) [ +0.00% +0.00% +0.07% / -0.02% +0.05% +0.09%] index_select spread : Elapsed 0.055 ms (5.492 ms / 100) 5.496 -> 5.491 ( -0.09%) [ +0.00% +0.07% +0.04% / -0.09% +0.05% +0.11%] index_select strided 3 : Elapsed 0.055 ms (5.496 ms / 100) 5.490 -> 5.494 ( +0.07%) [ +0.13% +0.00% +0.04% / +0.07% +0.16% +0.13%] index_select strided 5 : Elapsed 0.055 ms (5.497 ms / 100) 5.486 -> 5.499 ( +0.24%) [ +0.11% +0.13% +0.00% / +0.24% +0.31% +0.31%] index_select strided 7 : Elapsed 0.055 ms (5.492 ms / 100) 5.484 -> 5.483 ( -0.02%) [ +0.00% +0.13% +0.05% / -0.02% +0.18% +0.09%] index_select strided 8 : Elapsed 0.055 ms (5.484 ms / 100) 5.491 -> 5.488 ( -0.05%) [ +0.11% +0.05% +0.00% / -0.05% +0.07% +0.05%] index_select random : Elapsed 0.055 ms (5.497 ms / 100) 5.494 -> 5.486 ( -0.15%) [ +0.00% +0.02% +0.04% / +0.02% -0.09% -0.15%] index_select random_sorted : Elapsed 0.055 ms (5.494 ms / 100) B = [5, 4, 40, 20] (stride (1, 5, 20, 800)) A = [5, 4, 40, 16] (stride (1, 5, 20, 800)) dim = 3 4.277 -> 4.286 ( +0.21%) [ +0.23% +0.07% +0.00% / +0.21% +0.89% +0.91%] index_add_ linear : Elapsed 0.043 ms (4.287 ms / 100) 4.119 -> 4.122 ( +0.07%) [ +0.07% +0.05% +0.00% / +0.07% +0.80% +0.78%] index_copy_ linear : Elapsed 0.041 ms (4.122 ms / 100) 4.263 -> 4.268 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.12% +0.82% +0.73%] index_add_ reverse : Elapsed 0.043 ms (4.263 ms / 100) 4.119 -> 4.121 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.78% +0.63%] index_copy_ reverse : Elapsed 0.041 ms (4.119 ms / 100) 4.262 -> 4.263 ( +0.02%) [ +0.00% +0.02% +0.16% / +0.02% +1.03% +1.17%] index_add_ spread : Elapsed 0.043 ms (4.262 ms / 100) 4.107 -> 4.109 ( +0.05%) [ +0.02% +0.00% +0.05% / +0.05% +0.90% +0.95%] index_copy_ spread : Elapsed 0.041 ms (4.108 ms / 100) 4.255 -> 4.269 ( +0.33%) [ +0.33% +0.02% +0.00% / +0.33% +0.87% +1.03%] index_add_ strided 3 : Elapsed 0.043 ms (4.269 ms / 100) 4.096 -> 4.104 ( +0.20%) [ +0.20% +0.05% +0.00% / +0.20% +0.85% +0.88%] index_copy_ strided 3 : Elapsed 0.041 ms (4.104 ms / 100) 4.258 -> 4.270 ( +0.28%) [ +0.00% +0.19% +0.16% / +0.28% +0.63% +0.89%] index_add_ strided 7 : Elapsed 0.043 ms (4.258 ms / 100) 4.098 -> 4.107 ( +0.22%) [ +0.00% +0.20% +0.12% / +0.22% +0.71% +0.81%] index_copy_ strided 7 : Elapsed 0.041 ms (4.098 ms / 100) 4.265 -> 4.276 ( +0.26%) [ +0.00% +0.02% +0.23% / +0.26% +0.94% +0.77%] index_add_ perm : Elapsed 0.043 ms (4.265 ms / 100) 4.107 -> 4.117 ( +0.24%) [ +0.00% +0.02% +0.32% / +0.24% +0.90% +0.73%] index_copy_ perm : Elapsed 0.041 ms (4.107 ms / 100) 4.265 -> 4.272 ( +0.16%) [ +0.14% +0.00% +0.12% / +0.16% +0.77% +0.61%] index_add_ perm_sorted : Elapsed 0.043 ms (4.271 ms / 100) 4.118 -> 4.120 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +0.75% +0.61%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.120 ms / 100) 5.558 -> 5.555 ( -0.05%) [ +0.00% +0.11% +0.04% / +0.07% +0.07% -0.05%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.568 -> 5.569 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.13% +0.07% +0.02%] index_select wrap : Elapsed 0.056 ms (5.568 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.00% +0.05% +0.11% / +0.14% +0.02% +0.11%] index_select linear : Elapsed 0.056 ms (5.565 ms / 100) 5.572 -> 5.572 ( +0.00%) [ +0.00% +0.04% +0.02% / +0.00% +0.02% +0.16%] index_select reverse : Elapsed 0.056 ms (5.572 ms / 100) 5.552 -> 5.561 ( +0.16%) [ +0.23% +0.11% +0.00% / +0.16% +0.29% +0.22%] index_select skip64 : Elapsed 0.056 ms (5.565 ms / 100) 5.550 -> 5.561 ( +0.20%) [ +0.20% +0.18% +0.00% / +0.20% +0.23% +0.25%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.570 -> 5.574 ( +0.07%) [ +0.16% +0.09% +0.00% / +0.14% +0.07% +0.13%] index_select spread : Elapsed 0.056 ms (5.579 ms / 100) 5.575 -> 5.571 ( -0.07%) [ +0.00% +0.00% +0.09% / +0.11% -0.02% -0.07%] index_select strided 3 : Elapsed 0.056 ms (5.575 ms / 100) 5.582 -> 5.568 ( -0.25%) [ +0.00% +0.00% +0.00% / +0.02% -0.25% -0.21%] index_select strided 5 : Elapsed 0.056 ms (5.582 ms / 100) 5.575 -> 5.570 ( -0.09%) [ +0.00% +0.09% +0.05% / +0.16% -0.09% +0.02%] index_select strided 7 : Elapsed 0.056 ms (5.575 ms / 100) 5.563 -> 5.562 ( -0.02%) [ +0.11% +0.00% +0.04% / +0.02% -0.02% +0.00%] index_select strided 8 : Elapsed 0.056 ms (5.569 ms / 100) 5.570 -> 5.564 ( -0.11%) [ +0.00% +0.00% +0.02% / +0.02% -0.11% +0.07%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.569 -> 5.561 ( -0.14%) [ +0.04% +0.13% +0.00% / +0.02% -0.14% +0.04%] index_select random_sorted : Elapsed 0.056 ms (5.571 ms / 100) out_shape = [20, 16, 4, 40] in_shape = [5, 16, 4, 40] idx_dim = 0 B = [20, 16, 4, 40] (stride (2560, 1, 16, 64)) A = [5, 16, 4, 40] (stride (1, 5, 3200, 80)) dim = 0 1.842 -> 1.848 ( +0.33%) [ +0.00% +0.11% +0.22% / +0.33% +0.71% +0.76%] index_add_ linear : Elapsed 0.018 ms (1.842 ms / 100) 1.787 -> 1.793 ( +0.34%) [ +0.00% +0.28% +0.34% / +0.34% +1.01% +1.29%] index_copy_ linear : Elapsed 0.018 ms (1.787 ms / 100) 1.842 -> 1.847 ( +0.27%) [ +0.00% +0.05% +0.27% / +0.27% +0.71% +0.71%] index_add_ reverse : Elapsed 0.018 ms (1.842 ms / 100) 1.789 -> 1.794 ( +0.28%) [ +0.00% +0.11% +0.11% / +0.28% +0.89% +1.01%] index_copy_ reverse : Elapsed 0.018 ms (1.789 ms / 100) 1.837 -> 1.840 ( +0.16%) [ +0.00% +0.05% +0.16% / +0.16% +0.98% +1.09%] index_add_ spread : Elapsed 0.018 ms (1.837 ms / 100) 1.783 -> 1.785 ( +0.11%) [ +0.00% +0.11% +0.17% / +0.11% +1.12% +0.95%] index_copy_ spread : Elapsed 0.018 ms (1.783 ms / 100) 1.842 -> 1.842 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.81% +1.14%] index_add_ strided 3 : Elapsed 0.018 ms (1.842 ms / 100) 1.786 -> 1.789 ( +0.17%) [ +0.28% +0.00% +0.06% / +0.17% +1.23% +1.23%] index_copy_ strided 3 : Elapsed 0.018 ms (1.791 ms / 100) 1.843 -> 1.841 ( -0.11%) [ +0.00% +0.00% +0.22% / -0.11% +0.65% +0.54%] index_add_ strided 7 : Elapsed 0.018 ms (1.843 ms / 100) 1.788 -> 1.791 ( +0.17%) [ +0.06% +0.00% +0.28% / +0.17% +0.78% +0.56%] index_copy_ strided 7 : Elapsed 0.018 ms (1.789 ms / 100) 1.846 -> 1.844 ( -0.11%) [ +0.11% +0.05% +0.00% / -0.11% +0.87% +0.81%] index_add_ perm : Elapsed 0.018 ms (1.848 ms / 100) 1.793 -> 1.797 ( +0.22%) [ +0.00% +0.06% +0.06% / +0.22% +0.73% +0.95%] index_copy_ perm : Elapsed 0.018 ms (1.793 ms / 100) 1.844 -> 1.847 ( +0.16%) [ +0.00% +0.05% +0.00% / +0.16% +0.98% +0.76%] index_add_ perm_sorted : Elapsed 0.018 ms (1.844 ms / 100) 1.791 -> 1.796 ( +0.28%) [ +0.00% +0.17% +0.28% / +0.28% +0.89% +0.95%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.791 ms / 100) 8.249 -> 8.267 ( +0.22%) [ +0.00% +0.19% +0.19% / +0.22% +0.29% +0.32%] index_select const : Elapsed 0.082 ms (8.249 ms / 100) 8.249 -> 8.242 ( -0.08%) [ +0.05% +0.08% +0.00% / -0.08% +0.06% +0.21%] index_select wrap : Elapsed 0.083 ms (8.253 ms / 100) 8.243 -> 8.245 ( +0.02%) [ +0.13% +0.00% +0.11% / +0.13% +0.02% +0.13%] index_select linear : Elapsed 0.083 ms (8.254 ms / 100) 8.246 -> 8.262 ( +0.19%) [ +0.00% +0.23% +0.22% / +0.41% +0.24% +0.19%] index_select reverse : Elapsed 0.082 ms (8.246 ms / 100) 8.247 -> 8.256 ( +0.11%) [ +0.00% +0.23% +0.10% / +0.15% +0.11% +0.17%] index_select skip64 : Elapsed 0.082 ms (8.247 ms / 100) 8.257 -> 8.248 ( -0.11%) [ +0.00% +0.12% +0.01% / -0.05% -0.11% +0.19%] index_select skip256 : Elapsed 0.083 ms (8.257 ms / 100) 8.257 -> 8.246 ( -0.13%) [ +0.01% +0.00% +0.07% / -0.12% +0.21% -0.13%] index_select spread : Elapsed 0.083 ms (8.258 ms / 100) 8.251 -> 8.260 ( +0.11%) [ +0.00% +0.05% +0.21% / +0.11% +0.12% +0.32%] index_select strided 3 : Elapsed 0.083 ms (8.251 ms / 100) 8.260 -> 8.243 ( -0.21%) [ +0.00% +0.00% +0.12% / -0.21% +0.08% +0.06%] index_select random : Elapsed 0.083 ms (8.260 ms / 100) 8.245 -> 8.259 ( +0.17%) [ +0.01% +0.00% +0.12% / +0.17% +0.46% +0.52%] index_select random_sorted : Elapsed 0.082 ms (8.246 ms / 100) B = [20, 16, 4, 40] (stride (640, 1, 12800, 16)) A = [5, 16, 4, 40] (stride (1, 200, 3200, 5)) dim = 0 1.807 -> 1.811 ( +0.22%) [ +0.06% +0.44% +0.00% / +0.22% +0.66% +0.72%] index_add_ linear : Elapsed 0.018 ms (1.808 ms / 100) 1.762 -> 1.766 ( +0.23%) [ +0.06% +0.00% +0.17% / +0.23% +1.31% +0.62%] index_copy_ linear : Elapsed 0.018 ms (1.763 ms / 100) 1.809 -> 1.810 ( +0.06%) [ +0.00% +0.11% +0.06% / +0.06% +0.22% +0.55%] index_add_ reverse : Elapsed 0.018 ms (1.809 ms / 100) 1.763 -> 1.767 ( +0.23%) [ +0.00% +0.17% +0.00% / +0.23% +0.45% +0.34%] index_copy_ reverse : Elapsed 0.018 ms (1.763 ms / 100) 1.815 -> 1.810 ( -0.28%) [ +0.11% +0.00% +0.11% / -0.17% -0.28% +0.00%] index_add_ spread : Elapsed 0.018 ms (1.817 ms / 100) 1.767 -> 1.765 ( -0.11%) [ +0.00% +0.00% +0.23% / +0.17% -0.11% +0.00%] index_copy_ spread : Elapsed 0.018 ms (1.767 ms / 100) 1.807 -> 1.808 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.94% +0.77%] index_add_ strided 3 : Elapsed 0.018 ms (1.807 ms / 100) 1.758 -> 1.761 ( +0.17%) [ +0.51% +0.23% +0.00% / +0.17% +1.14% +0.85%] index_copy_ strided 3 : Elapsed 0.018 ms (1.767 ms / 100) 1.828 -> 1.827 ( -0.05%) [ +0.11% +0.11% +0.00% / -0.05% +0.38% +0.88%] index_add_ strided 7 : Elapsed 0.018 ms (1.830 ms / 100) 1.782 -> 1.782 ( +0.00%) [ +0.00% +0.06% +0.11% / +0.00% +0.28% +0.34%] index_copy_ strided 7 : Elapsed 0.018 ms (1.782 ms / 100) 1.810 -> 1.810 ( +0.00%) [ +0.00% +0.11% +0.17% / +0.00% +0.94% +0.83%] index_add_ perm : Elapsed 0.018 ms (1.810 ms / 100) 1.763 -> 1.765 ( +0.11%) [ +0.17% +0.28% +0.00% / +0.11% +1.08% +1.19%] index_copy_ perm : Elapsed 0.018 ms (1.766 ms / 100) 1.810 -> 1.808 ( -0.11%) [ +0.00% +0.22% +0.00% / -0.11% +1.49% +1.49%] index_add_ perm_sorted : Elapsed 0.018 ms (1.810 ms / 100) 1.764 -> 1.766 ( +0.11%) [ +0.00% +0.23% +0.00% / +0.11% +1.76% +1.59%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.764 ms / 100) 8.544 -> 8.557 ( +0.15%) [ +0.00% +0.15% +0.21% / +0.15% +0.53% +0.22%] index_select const : Elapsed 0.085 ms (8.544 ms / 100) 8.554 -> 8.569 ( +0.18%) [ +0.00% +0.08% +0.02% / +0.18% +0.28% +0.33%] index_select wrap : Elapsed 0.086 ms (8.554 ms / 100) 8.558 -> 8.564 ( +0.07%) [ +0.11% +0.02% +0.00% / +0.12% +0.07% +0.33%] index_select linear : Elapsed 0.086 ms (8.567 ms / 100) 8.546 -> 8.562 ( +0.19%) [ +0.11% +0.05% +0.00% / +0.19% +0.23% +0.21%] index_select reverse : Elapsed 0.086 ms (8.555 ms / 100) 8.557 -> 8.561 ( +0.05%) [ +0.00% +0.04% +0.07% / +0.05% +0.13% +0.21%] index_select skip64 : Elapsed 0.086 ms (8.557 ms / 100) 8.555 -> 8.557 ( +0.02%) [ +0.07% +0.00% +0.05% / +0.02% +0.09% +0.12%] index_select skip256 : Elapsed 0.086 ms (8.561 ms / 100) 8.558 -> 8.544 ( -0.16%) [ +0.19% +0.06% +0.00% / +0.15% +0.02% -0.16%] index_select spread : Elapsed 0.086 ms (8.574 ms / 100) 8.556 -> 8.561 ( +0.06%) [ +0.09% +0.08% +0.00% / +0.06% +0.09% +0.19%] index_select strided 3 : Elapsed 0.086 ms (8.564 ms / 100) 8.541 -> 8.563 ( +0.26%) [ +0.00% +0.23% +0.12% / +0.26% +0.36% +0.39%] index_select random : Elapsed 0.085 ms (8.541 ms / 100) 8.565 -> 8.546 ( -0.22%) [ +0.01% +0.00% +0.04% / -0.22% +0.36% +0.07%] index_select random_sorted : Elapsed 0.086 ms (8.566 ms / 100) B = [20, 16, 4, 40] (stride (16, 1, 12800, 320)) A = [5, 16, 4, 40] (stride (1, 800, 5, 20)) dim = 0 1.799 -> 1.802 ( +0.17%) [ +0.17% +0.11% +0.00% / +0.17% +0.89% +1.00%] index_add_ linear : Elapsed 0.018 ms (1.802 ms / 100) 1.752 -> 1.753 ( +0.06%) [ +0.00% +0.29% +0.23% / +0.06% +0.97% +0.97%] index_copy_ linear : Elapsed 0.018 ms (1.752 ms / 100) 1.801 -> 1.801 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.72% +0.61%] index_add_ reverse : Elapsed 0.018 ms (1.802 ms / 100) 1.755 -> 1.760 ( +0.28%) [ +0.11% +0.17% +0.00% / +0.28% +0.74% +0.74%] index_copy_ reverse : Elapsed 0.018 ms (1.757 ms / 100) 1.818 -> 1.821 ( +0.17%) [ +0.11% +0.00% +0.00% / +0.17% +0.50% +0.61%] index_add_ spread : Elapsed 0.018 ms (1.820 ms / 100) 1.774 -> 1.775 ( +0.06%) [ +0.00% +0.00% +0.17% / +0.06% +0.45% +0.45%] index_copy_ spread : Elapsed 0.018 ms (1.774 ms / 100) 1.809 -> 1.808 ( -0.06%) [ +0.11% +0.11% +0.00% / -0.06% +0.61% +0.72%] index_add_ strided 3 : Elapsed 0.018 ms (1.811 ms / 100) 1.765 -> 1.770 ( +0.28%) [ +0.34% +0.28% +0.00% / +0.28% +0.57% +0.79%] index_copy_ strided 3 : Elapsed 0.018 ms (1.771 ms / 100) 1.804 -> 1.806 ( +0.11%) [ +0.22% +0.28% +0.00% / +0.11% +0.72% +0.67%] index_add_ strided 7 : Elapsed 0.018 ms (1.808 ms / 100) 1.763 -> 1.763 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.28% +0.34%] index_copy_ strided 7 : Elapsed 0.018 ms (1.763 ms / 100) 1.818 -> 1.818 ( +0.00%) [ +0.06% +0.22% +0.00% / +0.00% +0.44% +0.55%] index_add_ perm : Elapsed 0.018 ms (1.819 ms / 100) 1.772 -> 1.776 ( +0.23%) [ +0.28% +0.00% +0.00% / +0.23% +0.56% +0.34%] index_copy_ perm : Elapsed 0.018 ms (1.777 ms / 100) 1.809 -> 1.808 ( -0.06%) [ +0.06% +0.00% +0.11% / -0.06% +0.83% +0.66%] index_add_ perm_sorted : Elapsed 0.018 ms (1.810 ms / 100) 1.763 -> 1.770 ( +0.40%) [ +0.06% +0.28% +0.00% / +0.40% +0.68% +0.74%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.764 ms / 100) 8.225 -> 8.238 ( +0.16%) [ +0.00% +0.16% +0.24% / +0.34% +0.39% +0.16%] index_select const : Elapsed 0.082 ms (8.225 ms / 100) 8.236 -> 8.241 ( +0.06%) [ +0.06% +0.00% +0.33% / +0.21% +0.13% +0.06%] index_select wrap : Elapsed 0.082 ms (8.241 ms / 100) 8.226 -> 8.238 ( +0.15%) [ +0.29% +0.00% +0.19% / +0.34% +0.23% +0.15%] index_select linear : Elapsed 0.083 ms (8.250 ms / 100) 8.242 -> 8.222 ( -0.24%) [ +0.00% +0.23% +0.01% / -0.02% -0.24% +0.18%] index_select reverse : Elapsed 0.082 ms (8.242 ms / 100) 8.235 -> 8.235 ( +0.00%) [ +0.19% +0.18% +0.00% / +0.16% +0.24% +0.00%] index_select skip64 : Elapsed 0.083 ms (8.251 ms / 100) 8.238 -> 8.245 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.15% +0.22%] index_select skip256 : Elapsed 0.083 ms (8.251 ms / 100) 8.235 -> 8.246 ( +0.13%) [ +0.04% +0.00% +0.15% / +0.13% +0.21% +0.13%] index_select spread : Elapsed 0.082 ms (8.238 ms / 100) 8.233 -> 8.239 ( +0.07%) [ +0.34% +0.00% +0.13% / +0.15% +0.07% +0.12%] index_select strided 3 : Elapsed 0.083 ms (8.261 ms / 100) 8.232 -> 8.234 ( +0.02%) [ +0.00% +0.09% +0.27% / +0.02% +0.27% +0.41%] index_select random : Elapsed 0.082 ms (8.232 ms / 100) 8.235 -> 8.238 ( +0.04%) [ +0.17% +0.00% +0.09% / +0.04% +0.24% +0.29%] index_select random_sorted : Elapsed 0.082 ms (8.249 ms / 100) B = [20, 16, 4, 40] (stride (64, 1, 16, 1280)) A = [5, 16, 4, 40] (stride (2560, 1, 640, 16)) dim = 0 1.759 -> 1.762 ( +0.17%) [ +0.06% +0.06% +0.00% / +0.17% +0.17% +0.28%] index_add_ linear : Elapsed 0.018 ms (1.760 ms / 100) 1.715 -> 1.716 ( +0.06%) [ +0.17% +0.23% +0.00% / +0.17% +0.06% +0.17%] index_copy_ linear : Elapsed 0.017 ms (1.718 ms / 100) 1.759 -> 1.755 ( -0.23%) [ +0.00% +0.34% +0.00% / +0.17% -0.23% -0.17%] index_add_ reverse : Elapsed 0.018 ms (1.759 ms / 100) 1.719 -> 1.712 ( -0.41%) [ +0.00% +0.06% +0.00% / +0.00% -0.41% -0.35%] index_copy_ reverse : Elapsed 0.017 ms (1.719 ms / 100) 1.756 -> 1.760 ( +0.23%) [ +0.17% +0.11% +0.00% / +0.23% +0.46% +0.51%] index_add_ spread : Elapsed 0.018 ms (1.759 ms / 100) 1.722 -> 1.721 ( -0.06%) [ +0.06% +0.06% +0.00% / +0.06% -0.06% +0.23%] index_copy_ spread : Elapsed 0.017 ms (1.723 ms / 100) 1.752 -> 1.757 ( +0.29%) [ +0.00% +0.06% +0.00% / +0.29% +0.86% +0.80%] index_add_ strided 3 : Elapsed 0.018 ms (1.752 ms / 100) 1.715 -> 1.717 ( +0.12%) [ +0.12% +0.17% +0.00% / +0.12% +0.52% +0.64%] index_copy_ strided 3 : Elapsed 0.017 ms (1.717 ms / 100) 1.757 -> 1.761 ( +0.23%) [ +0.28% +0.17% +0.00% / +0.23% +0.74% +0.74%] index_add_ strided 7 : Elapsed 0.018 ms (1.762 ms / 100) 1.720 -> 1.719 ( -0.06%) [ +0.06% +0.12% +0.00% / -0.06% +0.35% +0.52%] index_copy_ strided 7 : Elapsed 0.017 ms (1.721 ms / 100) 1.758 -> 1.758 ( +0.00%) [ +0.06% +0.00% +0.11% / +0.00% +0.85% +0.80%] index_add_ perm : Elapsed 0.018 ms (1.759 ms / 100) 1.727 -> 1.726 ( -0.06%) [ +0.12% +0.17% +0.00% / -0.06% +0.35% +0.29%] index_copy_ perm : Elapsed 0.017 ms (1.729 ms / 100) 1.756 -> 1.758 ( +0.11%) [ +0.23% +0.17% +0.00% / +0.11% +0.57% +0.80%] index_add_ perm_sorted : Elapsed 0.018 ms (1.760 ms / 100) 1.724 -> 1.722 ( -0.12%) [ +0.17% +0.00% +0.00% / +0.12% -0.12% +0.00%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.727 ms / 100) 8.219 -> 8.225 ( +0.07%) [ +0.00% +0.43% +0.06% / +0.07% +0.19% +0.10%] index_select const : Elapsed 0.082 ms (8.219 ms / 100) 8.250 -> 8.250 ( +0.00%) [ +0.29% +0.12% +0.00% / +0.00% +0.22% +0.07%] index_select wrap : Elapsed 0.083 ms (8.274 ms / 100) 8.249 -> 8.246 ( -0.04%) [ +0.11% +0.01% +0.00% / +0.00% +0.11% -0.04%] index_select linear : Elapsed 0.083 ms (8.258 ms / 100) 8.226 -> 8.229 ( +0.04%) [ +0.30% +0.28% +0.00% / +0.04% +0.23% +0.33%] index_select reverse : Elapsed 0.083 ms (8.251 ms / 100) 8.214 -> 8.216 ( +0.02%) [ +0.00% +0.24% +0.33% / +0.27% +0.23% +0.02%] index_select skip64 : Elapsed 0.082 ms (8.214 ms / 100) 8.217 -> 8.228 ( +0.13%) [ +0.12% +0.00% +0.17% / +0.13% +0.13% +0.35%] index_select skip256 : Elapsed 0.082 ms (8.227 ms / 100) 8.222 -> 8.241 ( +0.23%) [ +0.54% +0.00% +0.34% / +0.40% +0.32% +0.23%] index_select spread : Elapsed 0.083 ms (8.266 ms / 100) 8.248 -> 8.236 ( -0.15%) [ +0.33% +0.22% +0.00% / +0.13% -0.05% -0.15%] index_select strided 3 : Elapsed 0.083 ms (8.275 ms / 100) 8.235 -> 8.232 ( -0.04%) [ +0.06% +0.35% +0.00% / -0.04% +0.11% +0.09%] index_select random : Elapsed 0.082 ms (8.240 ms / 100) 8.236 -> 8.229 ( -0.08%) [ +0.13% +0.08% +0.00% / -0.08% +0.01% +0.35%] index_select random_sorted : Elapsed 0.082 ms (8.247 ms / 100) out_shape = [5, 20, 4, 40] in_shape = [5, 16, 4, 40] idx_dim = 1 B = [5, 20, 4, 40] (stride (3200, 160, 1, 4)) A = [5, 16, 4, 40] (stride (2560, 160, 1, 4)) dim = 1 3.955 -> 3.954 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.66% +0.68%] index_add_ linear : Elapsed 0.040 ms (3.955 ms / 100) 3.841 -> 3.846 ( +0.13%) [ +0.13% +0.08% +0.00% / +0.13% +0.68% +0.70%] index_copy_ linear : Elapsed 0.038 ms (3.846 ms / 100) 3.952 -> 3.954 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.78% +0.81%] index_add_ reverse : Elapsed 0.040 ms (3.953 ms / 100) 3.836 -> 3.836 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.86% +0.81%] index_copy_ reverse : Elapsed 0.038 ms (3.836 ms / 100) 3.954 -> 3.953 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.66% +0.71%] index_add_ spread : Elapsed 0.040 ms (3.954 ms / 100) 3.825 -> 3.825 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.68% +0.71%] index_copy_ spread : Elapsed 0.038 ms (3.825 ms / 100) 3.951 -> 3.950 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.71% +0.73%] index_add_ strided 3 : Elapsed 0.040 ms (3.952 ms / 100) 3.826 -> 3.829 ( +0.08%) [ +0.00% +0.00% +0.10% / +0.08% +0.97% +0.73%] index_copy_ strided 3 : Elapsed 0.038 ms (3.826 ms / 100) 3.953 -> 3.953 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.61% +0.63%] index_add_ strided 7 : Elapsed 0.040 ms (3.955 ms / 100) 3.831 -> 3.833 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.55% +0.68%] index_copy_ strided 7 : Elapsed 0.038 ms (3.833 ms / 100) 3.956 -> 3.957 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.61% +0.61%] index_add_ perm : Elapsed 0.040 ms (3.958 ms / 100) 3.828 -> 3.831 ( +0.08%) [ +0.03% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_copy_ perm : Elapsed 0.038 ms (3.829 ms / 100) 3.953 -> 3.953 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.71% +0.71%] index_add_ perm_sorted : Elapsed 0.040 ms (3.955 ms / 100) 3.838 -> 3.837 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.65% +0.65%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.838 ms / 100) 5.551 -> 5.557 ( +0.11%) [ +0.00% +0.11% +0.05% / +0.13% +0.22% +0.11%] index_select const : Elapsed 0.056 ms (5.551 ms / 100) 5.559 -> 5.566 ( +0.13%) [ +0.07% +0.09% +0.00% / +0.13% +0.14% +0.13%] index_select wrap : Elapsed 0.056 ms (5.563 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.16% +0.00% +0.04% / +0.02% +0.02% +0.11%] index_select linear : Elapsed 0.056 ms (5.573 ms / 100) 5.559 -> 5.563 ( +0.07%) [ +0.16% +0.22% +0.00% / +0.07% +0.14% +0.20%] index_select reverse : Elapsed 0.056 ms (5.568 ms / 100) 5.558 -> 5.552 ( -0.11%) [ +0.04% +0.00% +0.07% / -0.11% +0.11% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.560 ms / 100) 5.553 -> 5.554 ( +0.02%) [ +0.00% +0.05% +0.16% / +0.02% +0.05% +0.18%] index_select skip256 : Elapsed 0.056 ms (5.553 ms / 100) 5.562 -> 5.554 ( -0.14%) [ +0.00% +0.04% +0.05% / +0.16% -0.14% +0.04%] index_select spread : Elapsed 0.056 ms (5.562 ms / 100) 5.563 -> 5.564 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.07% +0.02% +0.04%] index_select strided 3 : Elapsed 0.056 ms (5.566 ms / 100) 5.567 -> 5.559 ( -0.14%) [ +0.02% +0.00% +0.00% / +0.05% -0.14% +0.02%] index_select strided 5 : Elapsed 0.056 ms (5.568 ms / 100) 5.558 -> 5.564 ( +0.11%) [ +0.07% +0.00% +0.20% / +0.11% +0.13% +0.16%] index_select strided 7 : Elapsed 0.056 ms (5.562 ms / 100) 5.553 -> 5.551 ( -0.04%) [ +0.00% +0.14% +0.02% / -0.04% +0.22% +0.18%] index_select strided 8 : Elapsed 0.056 ms (5.553 ms / 100) 5.558 -> 5.565 ( +0.13%) [ +0.00% +0.16% +0.00% / +0.13% +0.13% +0.13%] index_select random : Elapsed 0.056 ms (5.558 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.05% +0.07%] index_select random_sorted : Elapsed 0.056 ms (5.561 ms / 100) B = [5, 20, 4, 40] (stride (160, 800, 40, 1)) A = [5, 16, 4, 40] (stride (64, 1, 16, 320)) dim = 1 4.080 -> 4.088 ( +0.20%) [ +0.17% +0.15% +0.00% / +0.20% +0.96% +0.69%] index_add_ linear : Elapsed 0.041 ms (4.087 ms / 100) 3.907 -> 3.911 ( +0.10%) [ +0.26% +0.41% +0.00% / +0.10% +0.87% +0.87%] index_copy_ linear : Elapsed 0.039 ms (3.917 ms / 100) 4.074 -> 4.082 ( +0.20%) [ +0.27% +0.20% +0.00% / +0.20% +0.93% +0.91%] index_add_ reverse : Elapsed 0.041 ms (4.085 ms / 100) 3.906 -> 3.920 ( +0.36%) [ +0.08% +0.00% +0.23% / +0.36% +0.74% +0.74%] index_copy_ reverse : Elapsed 0.039 ms (3.909 ms / 100) 4.074 -> 4.081 ( +0.17%) [ +0.00% +0.22% +0.17% / +0.17% +1.01% +0.93%] index_add_ spread : Elapsed 0.041 ms (4.074 ms / 100) 3.916 -> 3.922 ( +0.15%) [ +0.00% +0.20% +0.18% / +0.15% +0.51% +0.64%] index_copy_ spread : Elapsed 0.039 ms (3.916 ms / 100) 4.081 -> 4.081 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +0.66%] index_add_ strided 3 : Elapsed 0.041 ms (4.081 ms / 100) 3.920 -> 3.918 ( -0.05%) [ +0.05% +0.08% +0.00% / -0.05% +0.51% +0.43%] index_copy_ strided 3 : Elapsed 0.039 ms (3.922 ms / 100) 4.074 -> 4.080 ( +0.15%) [ +0.20% +0.12% +0.00% / +0.15% +0.81% +0.91%] index_add_ strided 7 : Elapsed 0.041 ms (4.082 ms / 100) 3.904 -> 3.904 ( +0.00%) [ +0.28% +0.00% +0.20% / +0.00% +0.79% +0.85%] index_copy_ strided 7 : Elapsed 0.039 ms (3.915 ms / 100) 4.081 -> 4.083 ( +0.05%) [ +0.02% +0.10% +0.00% / +0.05% +0.83% +0.86%] index_add_ perm : Elapsed 0.041 ms (4.082 ms / 100) 3.916 -> 3.919 ( +0.08%) [ +0.00% +0.05% +0.03% / +0.08% +0.64% +0.54%] index_copy_ perm : Elapsed 0.039 ms (3.916 ms / 100) 4.079 -> 4.081 ( +0.05%) [ +0.00% +0.02% +0.02% / +0.05% +0.69% +0.78%] index_add_ perm_sorted : Elapsed 0.041 ms (4.079 ms / 100) 3.911 -> 3.920 ( +0.23%) [ +0.28% +0.00% +0.26% / +0.23% +0.82% +0.79%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.922 ms / 100) 5.481 -> 5.481 ( +0.00%) [ +0.13% +0.00% +0.16% / +0.00% +0.09% +0.07%] index_select const : Elapsed 0.055 ms (5.488 ms / 100) 5.489 -> 5.483 ( -0.11%) [ +0.00% +0.11% +0.02% / -0.11% -0.09% +0.05%] index_select wrap : Elapsed 0.055 ms (5.489 ms / 100) 5.485 -> 5.481 ( -0.07%) [ +0.07% +0.02% +0.00% / -0.07% +0.15% +0.15%] index_select linear : Elapsed 0.055 ms (5.489 ms / 100) 5.483 -> 5.489 ( +0.11%) [ +0.05% +0.00% +0.07% / +0.16% +0.33% +0.11%] index_select reverse : Elapsed 0.055 ms (5.486 ms / 100) 5.479 -> 5.478 ( -0.02%) [ +0.07% +0.00% +0.09% / +0.07% -0.02% +0.05%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.476 -> 5.480 ( +0.07%) [ +0.00% +0.04% +0.15% / +0.07% +0.11% +0.22%] index_select skip256 : Elapsed 0.055 ms (5.476 ms / 100) 5.484 -> 5.486 ( +0.04%) [ +0.26% +0.11% +0.00% / +0.16% +0.04% +0.13%] index_select spread : Elapsed 0.055 ms (5.498 ms / 100) 5.486 -> 5.484 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.09% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.488 ms / 100) 5.489 -> 5.486 ( -0.05%) [ +0.00% +0.15% +0.00% / -0.02% -0.05% +0.13%] index_select strided 5 : Elapsed 0.055 ms (5.489 ms / 100) 5.486 -> 5.487 ( +0.02%) [ +0.00% +0.15% +0.05% / +0.02% +0.27% +0.27%] index_select strided 7 : Elapsed 0.055 ms (5.486 ms / 100) 5.480 -> 5.491 ( +0.20%) [ +0.20% +0.20% +0.00% / +0.20% +0.24% +0.27%] index_select strided 8 : Elapsed 0.055 ms (5.491 ms / 100) 5.488 -> 5.491 ( +0.05%) [ +0.00% +0.02% +0.11% / +0.05% +0.11% +0.11%] index_select random : Elapsed 0.055 ms (5.488 ms / 100) 5.484 -> 5.485 ( +0.02%) [ +0.00% +0.20% +0.04% / +0.04% +0.16% +0.02%] index_select random_sorted : Elapsed 0.055 ms (5.484 ms / 100) B = [5, 20, 4, 40] (stride (160, 800, 1, 4)) A = [5, 16, 4, 40] (stride (2560, 40, 640, 1)) dim = 1 3.936 -> 3.937 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.76% +0.74%] index_add_ linear : Elapsed 0.039 ms (3.938 ms / 100) 3.817 -> 3.817 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.73% +0.73%] index_copy_ linear : Elapsed 0.038 ms (3.817 ms / 100) 3.944 -> 3.943 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.74% +0.71%] index_add_ reverse : Elapsed 0.039 ms (3.945 ms / 100) 3.818 -> 3.819 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.71% +0.73%] index_copy_ reverse : Elapsed 0.038 ms (3.818 ms / 100) 3.933 -> 3.934 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.76% +0.79%] index_add_ spread : Elapsed 0.039 ms (3.935 ms / 100) 3.808 -> 3.811 ( +0.08%) [ +0.05% +0.16% +0.00% / +0.08% +0.74% +0.74%] index_copy_ spread : Elapsed 0.038 ms (3.810 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.76% +0.79%] index_add_ strided 3 : Elapsed 0.039 ms (3.933 ms / 100) 3.811 -> 3.812 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.76% +0.73%] index_copy_ strided 3 : Elapsed 0.038 ms (3.811 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.61% +0.61%] index_add_ strided 7 : Elapsed 0.039 ms (3.938 ms / 100) 3.815 -> 3.815 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.58% +0.58%] index_copy_ strided 7 : Elapsed 0.038 ms (3.815 ms / 100) 3.937 -> 3.937 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.64% +0.64%] index_add_ perm : Elapsed 0.039 ms (3.937 ms / 100) 3.811 -> 3.812 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.03% +0.66% +0.66%] index_copy_ perm : Elapsed 0.038 ms (3.811 ms / 100) 3.943 -> 3.945 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.66% +0.71%] index_add_ perm_sorted : Elapsed 0.039 ms (3.944 ms / 100) 3.817 -> 3.817 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.65% +0.65%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.818 ms / 100) 5.551 -> 5.550 ( -0.02%) [ +0.00% +0.20% +0.18% / +0.09% -0.02% +0.05%] index_select const : Elapsed 0.056 ms (5.551 ms / 100) 5.572 -> 5.568 ( -0.07%) [ +0.00% +0.05% +0.05% / +0.04% -0.07% -0.04%] index_select wrap : Elapsed 0.056 ms (5.572 ms / 100) 5.573 -> 5.571 ( -0.04%) [ +0.00% +0.04% +0.05% / -0.02% -0.04% +0.07%] index_select linear : Elapsed 0.056 ms (5.573 ms / 100) 5.573 -> 5.569 ( -0.07%) [ +0.13% +0.05% +0.00% / -0.07% +0.09% +0.05%] index_select reverse : Elapsed 0.056 ms (5.580 ms / 100) 5.552 -> 5.558 ( +0.11%) [ +0.05% +0.09% +0.00% / +0.11% +0.32% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.555 ms / 100) 5.553 -> 5.556 ( +0.05%) [ +0.09% +0.16% +0.00% / +0.05% +0.11% +0.22%] index_select skip256 : Elapsed 0.056 ms (5.558 ms / 100) 5.572 -> 5.570 ( -0.04%) [ +0.04% +0.11% +0.00% / -0.04% -0.04% +0.11%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.579 -> 5.571 ( -0.14%) [ +0.05% +0.00% +0.05% / -0.13% -0.14% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.582 ms / 100) 5.574 -> 5.568 ( -0.11%) [ +0.13% +0.09% +0.00% / -0.11% +0.16% -0.04%] index_select strided 5 : Elapsed 0.056 ms (5.581 ms / 100) 5.571 -> 5.574 ( +0.05%) [ +0.09% +0.18% +0.00% / +0.13% +0.14% +0.05%] index_select strided 7 : Elapsed 0.056 ms (5.576 ms / 100) 5.555 -> 5.556 ( +0.02%) [ +0.09% +0.05% +0.00% / +0.09% +0.02% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.560 ms / 100) 5.566 -> 5.564 ( -0.04%) [ +0.00% +0.09% +0.11% / +0.04% -0.04% +0.14%] index_select random : Elapsed 0.056 ms (5.566 ms / 100) 5.569 -> 5.568 ( -0.02%) [ +0.02% +0.05% +0.00% / -0.02% +0.05% +0.11%] index_select random_sorted : Elapsed 0.056 ms (5.570 ms / 100) B = [5, 20, 4, 40] (stride (160, 800, 1, 4)) A = [5, 16, 4, 40] (stride (4, 800, 1, 20)) dim = 1 4.280 -> 4.280 ( +0.00%) [ +0.07% +0.16% +0.00% / +0.00% +0.82% +0.79%] index_add_ linear : Elapsed 0.043 ms (4.283 ms / 100) 4.126 -> 4.124 ( -0.05%) [ +0.05% +0.07% +0.00% / -0.05% +0.78% +0.80%] index_copy_ linear : Elapsed 0.041 ms (4.128 ms / 100) 4.267 -> 4.271 ( +0.09%) [ +0.00% +0.12% +0.09% / +0.09% +0.68% +0.68%] index_add_ reverse : Elapsed 0.043 ms (4.267 ms / 100) 4.118 -> 4.116 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.68% +0.63%] index_copy_ reverse : Elapsed 0.041 ms (4.118 ms / 100) 4.246 -> 4.247 ( +0.02%) [ +0.07% +0.12% +0.00% / +0.02% +0.61% +0.64%] index_add_ spread : Elapsed 0.042 ms (4.249 ms / 100) 4.095 -> 4.097 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.59% +0.66%] index_copy_ spread : Elapsed 0.041 ms (4.098 ms / 100) 4.242 -> 4.252 ( +0.24%) [ +0.07% +0.00% +0.12% / +0.24% +0.83% +0.75%] index_add_ strided 3 : Elapsed 0.042 ms (4.245 ms / 100) 4.099 -> 4.110 ( +0.27%) [ +0.10% +0.00% +0.24% / +0.27% +0.93% +0.71%] index_copy_ strided 3 : Elapsed 0.041 ms (4.103 ms / 100) 4.267 -> 4.271 ( +0.09%) [ +0.00% +0.02% +0.07% / +0.09% +0.94% +0.66%] index_add_ strided 7 : Elapsed 0.043 ms (4.267 ms / 100) 4.117 -> 4.119 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +0.70% +0.73%] index_copy_ strided 7 : Elapsed 0.041 ms (4.119 ms / 100) 4.279 -> 4.280 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.86% +0.89%] index_add_ perm : Elapsed 0.043 ms (4.280 ms / 100) 4.125 -> 4.125 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.00% +0.82% +0.73%] index_copy_ perm : Elapsed 0.041 ms (4.126 ms / 100) 4.240 -> 4.247 ( +0.17%) [ +0.00% +0.09% +0.21% / +0.17% +0.61% +0.75%] index_add_ perm_sorted : Elapsed 0.042 ms (4.240 ms / 100) 4.093 -> 4.109 ( +0.39%) [ +0.00% +0.22% +0.37% / +0.39% +0.66% +0.86%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.093 ms / 100) 5.555 -> 5.554 ( -0.02%) [ +0.11% +0.04% +0.00% / +0.05% -0.02% +0.02%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.566 -> 5.569 ( +0.05%) [ +0.00% +0.02% +0.04% / +0.05% +0.05% +0.18%] index_select wrap : Elapsed 0.056 ms (5.566 ms / 100) 5.559 -> 5.562 ( +0.05%) [ +0.20% +0.00% +0.13% / +0.05% +0.20% +0.14%] index_select linear : Elapsed 0.056 ms (5.570 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.02% +0.02% +0.00% / +0.04% +0.09% +0.11%] index_select reverse : Elapsed 0.056 ms (5.563 ms / 100) 5.560 -> 5.551 ( -0.16%) [ +0.05% +0.02% +0.00% / -0.16% -0.14% -0.09%] index_select skip64 : Elapsed 0.056 ms (5.563 ms / 100) 5.558 -> 5.553 ( -0.09%) [ +0.00% +0.09% +0.04% / -0.09% +0.05% +0.04%] index_select skip256 : Elapsed 0.056 ms (5.558 ms / 100) 5.567 -> 5.561 ( -0.11%) [ +0.00% +0.00% +0.02% / +0.02% +0.09% -0.11%] index_select spread : Elapsed 0.056 ms (5.567 ms / 100) 5.561 -> 5.565 ( +0.07%) [ +0.07% +0.00% +0.02% / +0.09% +0.16% +0.07%] index_select strided 3 : Elapsed 0.056 ms (5.565 ms / 100) 5.566 -> 5.560 ( -0.11%) [ +0.00% +0.04% +0.00% / -0.11% +0.02% +0.13%] index_select strided 5 : Elapsed 0.056 ms (5.566 ms / 100) 5.564 -> 5.564 ( +0.00%) [ +0.02% +0.00% +0.07% / +0.00% +0.07% +0.09%] index_select strided 7 : Elapsed 0.056 ms (5.565 ms / 100) 5.555 -> 5.560 ( +0.09%) [ +0.13% +0.05% +0.00% / +0.09% +0.20% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.567 -> 5.564 ( -0.05%) [ +0.05% +0.00% +0.07% / -0.04% -0.05% +0.04%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.562 -> 5.563 ( +0.02%) [ +0.05% +0.11% +0.00% / +0.02% +0.16% +0.14%] index_select random_sorted : Elapsed 0.056 ms (5.565 ms / 100) B = [5, 20, 4, 40] (stride (1, 200, 4000, 5)) A = [5, 16, 4, 40] (stride (40, 800, 200, 1)) dim = 1 3.937 -> 3.941 ( +0.10%) [ +0.03% +0.05% +0.00% / +0.10% +0.71% +0.69%] index_add_ linear : Elapsed 0.039 ms (3.938 ms / 100) 3.813 -> 3.811 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.87% +0.84%] index_copy_ linear : Elapsed 0.038 ms (3.813 ms / 100) 3.940 -> 3.941 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.76% +0.74%] index_add_ reverse : Elapsed 0.039 ms (3.942 ms / 100) 3.812 -> 3.816 ( +0.10%) [ +0.13% +0.00% +0.03% / +0.10% +0.84% +0.84%] index_copy_ reverse : Elapsed 0.038 ms (3.817 ms / 100) 3.934 -> 3.935 ( +0.03%) [ +0.08% +0.05% +0.00% / +0.03% +0.76% +0.76%] index_add_ spread : Elapsed 0.039 ms (3.937 ms / 100) 3.808 -> 3.808 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.74%] index_copy_ spread : Elapsed 0.038 ms (3.808 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.71% +0.71%] index_add_ strided 3 : Elapsed 0.039 ms (3.934 ms / 100) 3.813 -> 3.816 ( +0.08%) [ +0.08% +0.21% +0.00% / +0.08% +0.76% +0.89%] index_copy_ strided 3 : Elapsed 0.038 ms (3.816 ms / 100) 3.937 -> 3.938 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.58% +0.61%] index_add_ strided 7 : Elapsed 0.039 ms (3.937 ms / 100) 3.815 -> 3.819 ( +0.10%) [ +0.16% +0.00% +0.16% / +0.10% +0.79% +0.66%] index_copy_ strided 7 : Elapsed 0.038 ms (3.821 ms / 100) 3.937 -> 3.937 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +0.69%] index_add_ perm : Elapsed 0.039 ms (3.937 ms / 100) 3.810 -> 3.812 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.68% +0.68%] index_copy_ perm : Elapsed 0.038 ms (3.811 ms / 100) 3.940 -> 3.940 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.63% +0.74%] index_add_ perm_sorted : Elapsed 0.039 ms (3.942 ms / 100) 3.816 -> 3.818 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.58% +0.68%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.816 ms / 100) 5.558 -> 5.554 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% -0.05% +0.04%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.572 -> 5.571 ( -0.02%) [ +0.04% +0.00% +0.14% / +0.05% -0.02% +0.13%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.573 -> 5.579 ( +0.11%) [ +0.05% +0.00% +0.07% / +0.11% +0.22% +0.23%] index_select linear : Elapsed 0.056 ms (5.576 ms / 100) 5.572 -> 5.570 ( -0.04%) [ +0.00% +0.04% +0.11% / +0.04% -0.04% +0.04%] index_select reverse : Elapsed 0.056 ms (5.572 ms / 100) 5.552 -> 5.552 ( +0.00%) [ +0.13% +0.02% +0.00% / +0.00% +0.11% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.559 ms / 100) 5.559 -> 5.557 ( -0.04%) [ +0.00% +0.02% +0.00% / +0.09% -0.04% +0.02%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.571 -> 5.568 ( -0.05%) [ +0.04% +0.00% +0.04% / -0.04% +0.16% -0.05%] index_select spread : Elapsed 0.056 ms (5.573 ms / 100) 5.577 -> 5.575 ( -0.04%) [ +0.00% +0.11% +0.20% / +0.05% -0.04% -0.02%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.575 -> 5.575 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.00% +0.05%] index_select strided 5 : Elapsed 0.056 ms (5.575 ms / 100) 5.568 -> 5.571 ( +0.05%) [ +0.04% +0.13% +0.00% / +0.05% +0.11% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.570 ms / 100) 5.551 -> 5.550 ( -0.02%) [ +0.00% +0.11% +0.05% / -0.02% +0.13% +0.07%] index_select strided 8 : Elapsed 0.056 ms (5.551 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.00% +0.07% +0.13% / +0.13% +0.05% +0.05%] index_select random : Elapsed 0.056 ms (5.563 ms / 100) 5.569 -> 5.564 ( -0.09%) [ +0.00% +0.02% +0.04% / +0.00% -0.09% +0.00%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [5, 20, 4, 40] (stride (80, 1, 20, 400)) A = [5, 16, 4, 40] (stride (64, 4, 1, 320)) dim = 1 4.446 -> 4.457 ( +0.25%) [ +0.22% +0.36% +0.00% / +0.25% +1.08% +0.83%] index_add_ linear : Elapsed 0.045 ms (4.456 ms / 100) 4.281 -> 4.283 ( +0.05%) [ +0.21% +0.21% +0.00% / +0.05% +0.96% +0.75%] index_copy_ linear : Elapsed 0.043 ms (4.290 ms / 100) 4.445 -> 4.449 ( +0.09%) [ +0.04% +0.20% +0.00% / +0.09% +0.83% +0.81%] index_add_ reverse : Elapsed 0.044 ms (4.447 ms / 100) 4.278 -> 4.285 ( +0.16%) [ +0.00% +0.21% +0.00% / +0.16% +0.86% +0.82%] index_copy_ reverse : Elapsed 0.043 ms (4.278 ms / 100) 4.446 -> 4.450 ( +0.09%) [ +0.11% +0.16% +0.00% / +0.09% +0.58% +0.88%] index_add_ spread : Elapsed 0.045 ms (4.451 ms / 100) 4.276 -> 4.282 ( +0.14%) [ +0.05% +0.19% +0.00% / +0.14% +0.65% +0.89%] index_copy_ spread : Elapsed 0.043 ms (4.278 ms / 100) 4.447 -> 4.448 ( +0.02%) [ +0.04% +0.00% +0.04% / +0.02% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.044 ms (4.449 ms / 100) 4.278 -> 4.276 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.75% +0.70%] index_copy_ strided 3 : Elapsed 0.043 ms (4.278 ms / 100) 4.448 -> 4.449 ( +0.02%) [ +0.16% +0.00% +0.02% / +0.02% +0.76% +0.83%] index_add_ strided 7 : Elapsed 0.045 ms (4.455 ms / 100) 4.279 -> 4.283 ( +0.09%) [ +0.21% +0.00% +0.00% / +0.09% +0.79% +0.91%] index_copy_ strided 7 : Elapsed 0.043 ms (4.288 ms / 100) 4.446 -> 4.458 ( +0.27%) [ +0.00% +0.40% +0.20% / +0.27% +0.83% +0.85%] index_add_ perm : Elapsed 0.044 ms (4.446 ms / 100) 4.285 -> 4.289 ( +0.09%) [ +0.00% +0.09% +0.05% / +0.09% +0.68% +0.65%] index_copy_ perm : Elapsed 0.043 ms (4.285 ms / 100) 4.438 -> 4.445 ( +0.16%) [ +0.18% +0.16% +0.00% / +0.16% +0.92% +0.97%] index_add_ perm_sorted : Elapsed 0.044 ms (4.446 ms / 100) 4.276 -> 4.276 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.87% +0.70%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.276 ms / 100) 5.572 -> 5.575 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.16% +0.07%] index_select const : Elapsed 0.056 ms (5.580 ms / 100) 5.589 -> 5.591 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.14% +0.11%] index_select wrap : Elapsed 0.056 ms (5.591 ms / 100) 5.584 -> 5.592 ( +0.14%) [ +0.23% +0.00% +0.14% / +0.14% +0.27% +0.23%] index_select linear : Elapsed 0.056 ms (5.597 ms / 100) 5.588 -> 5.591 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.18% +0.14%] index_select reverse : Elapsed 0.056 ms (5.588 ms / 100) 5.570 -> 5.571 ( +0.02%) [ +0.11% +0.00% +0.09% / +0.07% +0.04% +0.02%] index_select skip64 : Elapsed 0.056 ms (5.576 ms / 100) 5.570 -> 5.567 ( -0.05%) [ +0.00% +0.07% +0.07% / -0.05% +0.13% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.570 ms / 100) 5.591 -> 5.586 ( -0.09%) [ +0.00% +0.00% +0.02% / -0.09% +0.05% +0.09%] index_select spread : Elapsed 0.056 ms (5.591 ms / 100) 5.588 -> 5.594 ( +0.11%) [ +0.04% +0.00% +0.04% / +0.11% +0.20% +0.20%] index_select strided 3 : Elapsed 0.056 ms (5.590 ms / 100) 5.591 -> 5.594 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.29% +0.21%] index_select strided 5 : Elapsed 0.056 ms (5.592 ms / 100) 5.592 -> 5.598 ( +0.11%) [ +0.00% +0.02% +0.09% / +0.11% +0.21% +0.13%] index_select strided 7 : Elapsed 0.056 ms (5.592 ms / 100) 5.569 -> 5.573 ( +0.07%) [ +0.05% +0.00% +0.07% / +0.07% +0.14% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.572 ms / 100) 5.589 -> 5.594 ( +0.09%) [ +0.07% +0.00% +0.07% / +0.09% +0.14% +0.16%] index_select random : Elapsed 0.056 ms (5.593 ms / 100) 5.596 -> 5.595 ( -0.02%) [ +0.14% +0.00% +0.02% / +0.05% +0.02% -0.02%] index_select random_sorted : Elapsed 0.056 ms (5.604 ms / 100) B = [5, 20, 4, 40] (stride (1, 20, 5, 400)) A = [5, 16, 4, 40] (stride (16, 1, 3200, 80)) dim = 1 4.110 -> 4.111 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.68% +0.73%] index_add_ linear : Elapsed 0.041 ms (4.110 ms / 100) 3.947 -> 3.951 ( +0.10%) [ +0.13% +0.15% +0.00% / +0.10% +0.66% +0.79%] index_copy_ linear : Elapsed 0.040 ms (3.952 ms / 100) 4.102 -> 4.100 ( -0.05%) [ +0.00% +0.02% +0.02% / -0.05% +0.59% +0.61%] index_add_ reverse : Elapsed 0.041 ms (4.102 ms / 100) 3.952 -> 3.945 ( -0.18%) [ +0.05% +0.00% +0.03% / -0.18% +0.51% +0.48%] index_copy_ reverse : Elapsed 0.040 ms (3.954 ms / 100) 4.100 -> 4.109 ( +0.22%) [ +0.07% +0.00% +0.20% / +0.22% +0.66% +0.76%] index_add_ spread : Elapsed 0.041 ms (4.103 ms / 100) 3.937 -> 3.949 ( +0.30%) [ +0.23% +0.00% +0.23% / +0.30% +0.69% +0.79%] index_copy_ spread : Elapsed 0.039 ms (3.946 ms / 100) 4.110 -> 4.113 ( +0.07%) [ +0.12% +0.15% +0.00% / +0.07% +0.58% +0.85%] index_add_ strided 3 : Elapsed 0.041 ms (4.115 ms / 100) 3.945 -> 3.949 ( +0.10%) [ +0.25% +0.18% +0.00% / +0.10% +0.81% +0.89%] index_copy_ strided 3 : Elapsed 0.040 ms (3.955 ms / 100) 4.111 -> 4.113 ( +0.05%) [ +0.00% +0.00% +0.17% / +0.05% +0.71% +0.58%] index_add_ strided 7 : Elapsed 0.041 ms (4.111 ms / 100) 3.951 -> 3.958 ( +0.18%) [ +0.00% +0.03% +0.13% / +0.18% +0.81% +0.61%] index_copy_ strided 7 : Elapsed 0.040 ms (3.951 ms / 100) 4.101 -> 4.105 ( +0.10%) [ +0.10% +0.22% +0.00% / +0.10% +0.83% +0.73%] index_add_ perm : Elapsed 0.041 ms (4.105 ms / 100) 3.937 -> 3.948 ( +0.28%) [ +0.18% +0.28% +0.00% / +0.28% +0.97% +0.97%] index_copy_ perm : Elapsed 0.039 ms (3.944 ms / 100) 4.097 -> 4.101 ( +0.10%) [ +0.00% +0.24% +0.17% / +0.10% +0.76% +0.78%] index_add_ perm_sorted : Elapsed 0.041 ms (4.097 ms / 100) 3.946 -> 3.954 ( +0.20%) [ +0.00% +0.20% +0.08% / +0.20% +0.53% +0.58%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.946 ms / 100) 5.493 -> 5.492 ( -0.02%) [ +0.09% +0.18% +0.00% / +0.05% -0.02% +0.07%] index_select const : Elapsed 0.055 ms (5.498 ms / 100) 5.505 -> 5.506 ( +0.02%) [ +0.00% +0.09% +0.04% / +0.13% +0.04% +0.02%] index_select wrap : Elapsed 0.055 ms (5.505 ms / 100) 5.505 -> 5.506 ( +0.02%) [ +0.05% +0.00% +0.05% / +0.05% +0.11% +0.02%] index_select linear : Elapsed 0.055 ms (5.508 ms / 100) 5.508 -> 5.504 ( -0.07%) [ +0.09% +0.02% +0.00% / +0.16% -0.07% -0.02%] index_select reverse : Elapsed 0.055 ms (5.513 ms / 100) 5.491 -> 5.493 ( +0.04%) [ +0.18% +0.25% +0.00% / +0.20% +0.13% +0.04%] index_select skip64 : Elapsed 0.055 ms (5.501 ms / 100) 5.494 -> 5.494 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.09% +0.07%] index_select skip256 : Elapsed 0.055 ms (5.494 ms / 100) 5.501 -> 5.501 ( +0.00%) [ +0.00% +0.15% +0.09% / +0.20% +0.05% +0.00%] index_select spread : Elapsed 0.055 ms (5.501 ms / 100) 5.508 -> 5.503 ( -0.09%) [ +0.00% +0.00% +0.07% / +0.07% -0.09% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.508 ms / 100) 5.509 -> 5.499 ( -0.18%) [ +0.05% +0.02% +0.00% / +0.02% -0.18% -0.18%] index_select strided 5 : Elapsed 0.055 ms (5.512 ms / 100) 5.504 -> 5.499 ( -0.09%) [ +0.00% +0.11% +0.05% / +0.13% -0.09% +0.05%] index_select strided 7 : Elapsed 0.055 ms (5.504 ms / 100) 5.509 -> 5.500 ( -0.16%) [ +0.04% +0.02% +0.00% / -0.04% -0.16% -0.13%] index_select strided 8 : Elapsed 0.055 ms (5.511 ms / 100) 5.502 -> 5.502 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.11% +0.07% +0.00%] index_select random : Elapsed 0.055 ms (5.506 ms / 100) 5.500 -> 5.499 ( -0.02%) [ +0.13% +0.13% +0.00% / +0.11% +0.09% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.507 ms / 100) B = [5, 20, 4, 40] (stride (20, 1, 100, 400)) A = [5, 16, 4, 40] (stride (2560, 4, 1, 64)) dim = 1 4.443 -> 4.445 ( +0.05%) [ +0.02% +0.00% +0.02% / +0.05% +0.61% +0.59%] index_add_ linear : Elapsed 0.044 ms (4.444 ms / 100) 4.282 -> 4.281 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.77% +0.70%] index_copy_ linear : Elapsed 0.043 ms (4.282 ms / 100) 4.426 -> 4.427 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.72% +0.72%] index_add_ reverse : Elapsed 0.044 ms (4.426 ms / 100) 4.269 -> 4.270 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.80% +0.77%] index_copy_ reverse : Elapsed 0.043 ms (4.270 ms / 100) 4.411 -> 4.412 ( +0.02%) [ +0.09% +0.07% +0.00% / +0.02% +0.84% +0.82%] index_add_ spread : Elapsed 0.044 ms (4.415 ms / 100) 4.249 -> 4.249 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.73% +0.73%] index_copy_ spread : Elapsed 0.042 ms (4.249 ms / 100) 4.420 -> 4.421 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.77% +0.79%] index_add_ strided 3 : Elapsed 0.044 ms (4.421 ms / 100) 4.263 -> 4.265 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.82% +0.80%] index_copy_ strided 3 : Elapsed 0.043 ms (4.264 ms / 100) 4.428 -> 4.428 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.65% +0.65%] index_add_ strided 7 : Elapsed 0.044 ms (4.428 ms / 100) 4.270 -> 4.272 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.68% +0.68%] index_copy_ strided 7 : Elapsed 0.043 ms (4.270 ms / 100) 4.444 -> 4.442 ( -0.05%) [ +0.00% +0.02% +0.02% / -0.05% +0.56% +0.59%] index_add_ perm : Elapsed 0.044 ms (4.444 ms / 100) 4.282 -> 4.284 ( +0.05%) [ +0.05% +0.09% +0.00% / +0.05% +0.75% +0.72%] index_copy_ perm : Elapsed 0.043 ms (4.284 ms / 100) 4.422 -> 4.422 ( +0.00%) [ +0.05% +0.14% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.044 ms (4.424 ms / 100) 4.265 -> 4.267 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.63% +0.61%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.267 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.16% +0.05% +0.00% / -0.04% +0.09% +0.04%] index_select const : Elapsed 0.056 ms (5.578 ms / 100) 5.577 -> 5.579 ( +0.04%) [ +0.09% +0.07% +0.00% / +0.04% +0.11% +0.20%] index_select wrap : Elapsed 0.056 ms (5.582 ms / 100) 5.575 -> 5.575 ( +0.00%) [ +0.09% +0.04% +0.00% / +0.14% +0.00% +0.23%] index_select linear : Elapsed 0.056 ms (5.580 ms / 100) 5.578 -> 5.579 ( +0.02%) [ +0.02% +0.04% +0.00% / +0.02% +0.04% +0.16%] index_select reverse : Elapsed 0.056 ms (5.579 ms / 100) 5.563 -> 5.563 ( +0.00%) [ +0.16% +0.00% +0.11% / +0.20% +0.04% +0.00%] index_select skip64 : Elapsed 0.056 ms (5.572 ms / 100) 5.571 -> 5.570 ( -0.02%) [ +0.04% +0.00% +0.05% / -0.02% +0.00% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.573 ms / 100) 5.577 -> 5.578 ( +0.02%) [ +0.04% +0.07% +0.00% / +0.02% +0.13% +0.02%] index_select spread : Elapsed 0.056 ms (5.579 ms / 100) 5.577 -> 5.579 ( +0.04%) [ +0.11% +0.00% +0.00% / +0.04% +0.05% +0.13%] index_select strided 3 : Elapsed 0.056 ms (5.583 ms / 100) 5.578 -> 5.575 ( -0.05%) [ +0.04% +0.02% +0.00% / -0.05% +0.11% +0.23%] index_select strided 5 : Elapsed 0.056 ms (5.580 ms / 100) 5.576 -> 5.568 ( -0.14%) [ +0.09% +0.02% +0.00% / -0.14% +0.11% +0.14%] index_select strided 7 : Elapsed 0.056 ms (5.581 ms / 100) 5.563 -> 5.567 ( +0.07%) [ +0.20% +0.00% +0.16% / +0.07% +0.22% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.574 ms / 100) 5.577 -> 5.579 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.27% +0.13%] index_select random : Elapsed 0.056 ms (5.577 ms / 100) 5.577 -> 5.576 ( -0.02%) [ +0.09% +0.00% +0.07% / -0.02% +0.09% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.582 ms / 100) B = [5, 20, 4, 40] (stride (20, 1, 100, 400)) A = [5, 16, 4, 40] (stride (1, 800, 5, 20)) dim = 1 4.272 -> 4.276 ( +0.09%) [ +0.09% +0.14% +0.00% / +0.09% +0.98% +0.96%] index_add_ linear : Elapsed 0.043 ms (4.276 ms / 100) 4.122 -> 4.127 ( +0.12%) [ +0.12% +0.10% +0.00% / +0.12% +0.78% +0.82%] index_copy_ linear : Elapsed 0.041 ms (4.127 ms / 100) 4.256 -> 4.260 ( +0.09%) [ +0.02% +0.19% +0.00% / +0.09% +0.70% +0.70%] index_add_ reverse : Elapsed 0.043 ms (4.257 ms / 100) 4.117 -> 4.118 ( +0.02%) [ +0.02% +0.19% +0.00% / +0.02% +0.68% +0.70%] index_copy_ reverse : Elapsed 0.041 ms (4.118 ms / 100) 4.236 -> 4.237 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.76% +0.80%] index_add_ spread : Elapsed 0.042 ms (4.237 ms / 100) 4.106 -> 4.119 ( +0.32%) [ +0.02% +0.27% +0.00% / +0.32% +0.78% +0.73%] index_copy_ spread : Elapsed 0.041 ms (4.107 ms / 100) 4.236 -> 4.237 ( +0.02%) [ +0.05% +0.00% +0.05% / +0.02% +0.71% +0.73%] index_add_ strided 3 : Elapsed 0.042 ms (4.238 ms / 100) 4.092 -> 4.092 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.83% +0.78%] index_copy_ strided 3 : Elapsed 0.041 ms (4.097 ms / 100) 4.237 -> 4.237 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.59% +0.73%] index_add_ strided 7 : Elapsed 0.042 ms (4.240 ms / 100) 4.095 -> 4.096 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.66% +0.63%] index_copy_ strided 7 : Elapsed 0.041 ms (4.096 ms / 100) 4.237 -> 4.238 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.61% +0.71%] index_add_ perm : Elapsed 0.042 ms (4.237 ms / 100) 4.105 -> 4.112 ( +0.17%) [ +0.00% +0.15% +0.07% / +0.17% +0.71% +0.68%] index_copy_ perm : Elapsed 0.041 ms (4.105 ms / 100) 4.264 -> 4.262 ( -0.05%) [ +0.12% +0.00% +0.00% / -0.05% +0.63% +0.45%] index_add_ perm_sorted : Elapsed 0.043 ms (4.269 ms / 100) 4.124 -> 4.124 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.68% +0.46%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.125 ms / 100) 5.560 -> 5.564 ( +0.07%) [ +0.14% +0.05% +0.00% / +0.18% +0.13% +0.07%] index_select const : Elapsed 0.056 ms (5.568 ms / 100) 5.575 -> 5.577 ( +0.04%) [ +0.00% +0.00% +0.05% / +0.04% +0.04% +0.04%] index_select wrap : Elapsed 0.056 ms (5.575 ms / 100) 5.575 -> 5.578 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.05% +0.11% +0.07%] index_select linear : Elapsed 0.056 ms (5.575 ms / 100) 5.574 -> 5.573 ( -0.02%) [ +0.05% +0.00% +0.07% / +0.20% -0.02% +0.07%] index_select reverse : Elapsed 0.056 ms (5.577 ms / 100) 5.556 -> 5.566 ( +0.18%) [ +0.11% +0.00% +0.11% / +0.20% +0.18% +0.23%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.559 -> 5.569 ( +0.18%) [ +0.07% +0.09% +0.00% / +0.18% +0.29% +0.22%] index_select skip256 : Elapsed 0.056 ms (5.563 ms / 100) 5.579 -> 5.570 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% -0.05% -0.07%] index_select spread : Elapsed 0.056 ms (5.579 ms / 100) 5.579 -> 5.575 ( -0.07%) [ +0.02% +0.05% +0.00% / +0.02% +0.00% -0.07%] index_select strided 3 : Elapsed 0.056 ms (5.580 ms / 100) 5.571 -> 5.574 ( +0.05%) [ +0.20% +0.20% +0.00% / +0.20% +0.09% +0.05%] index_select strided 5 : Elapsed 0.056 ms (5.582 ms / 100) 5.574 -> 5.579 ( +0.09%) [ +0.00% +0.00% +0.05% / +0.09% +0.09% +0.09%] index_select strided 7 : Elapsed 0.056 ms (5.574 ms / 100) 5.564 -> 5.563 ( -0.02%) [ +0.09% +0.00% +0.07% / -0.02% +0.02% +0.11%] index_select strided 8 : Elapsed 0.056 ms (5.569 ms / 100) 5.571 -> 5.565 ( -0.11%) [ +0.05% +0.02% +0.00% / -0.11% -0.02% -0.04%] index_select random : Elapsed 0.056 ms (5.574 ms / 100) 5.571 -> 5.571 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.05% +0.00%] index_select random_sorted : Elapsed 0.056 ms (5.579 ms / 100) B = [5, 20, 4, 40] (stride (1, 5, 100, 400)) A = [5, 16, 4, 40] (stride (64, 1, 16, 320)) dim = 1 4.427 -> 4.429 ( +0.05%) [ +0.00% +0.09% +0.02% / +0.05% +0.68% +0.70%] index_add_ linear : Elapsed 0.044 ms (4.427 ms / 100) 4.262 -> 4.264 ( +0.05%) [ +0.05% +0.09% +0.00% / +0.05% +0.80% +0.68%] index_copy_ linear : Elapsed 0.043 ms (4.264 ms / 100) 4.433 -> 4.442 ( +0.20%) [ +0.07% +0.00% +0.25% / +0.20% +0.92% +0.86%] index_add_ reverse : Elapsed 0.044 ms (4.436 ms / 100) 4.277 -> 4.282 ( +0.12%) [ +0.02% +0.00% +0.07% / +0.12% +0.87% +0.87%] index_copy_ reverse : Elapsed 0.043 ms (4.278 ms / 100) 4.420 -> 4.423 ( +0.07%) [ +0.25% +0.00% +0.02% / +0.07% +0.95% +1.02%] index_add_ spread : Elapsed 0.044 ms (4.431 ms / 100) 4.253 -> 4.258 ( +0.12%) [ +0.16% +0.00% +0.02% / +0.12% +1.03% +0.96%] index_copy_ spread : Elapsed 0.043 ms (4.260 ms / 100) 4.432 -> 4.435 ( +0.07%) [ +0.18% +0.05% +0.00% / +0.07% +0.72% +0.81%] index_add_ strided 3 : Elapsed 0.044 ms (4.440 ms / 100) 4.276 -> 4.281 ( +0.12%) [ +0.12% +0.00% +0.09% / +0.12% +0.72% +0.75%] index_copy_ strided 3 : Elapsed 0.043 ms (4.281 ms / 100) 4.440 -> 4.444 ( +0.09%) [ +0.07% +0.05% +0.00% / +0.09% +0.61% +0.61%] index_add_ strided 7 : Elapsed 0.044 ms (4.443 ms / 100) 4.281 -> 4.282 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.70% +0.75%] index_copy_ strided 7 : Elapsed 0.043 ms (4.281 ms / 100) 4.426 -> 4.424 ( -0.05%) [ +0.09% +0.00% +0.00% / -0.05% +0.75% +0.70%] index_add_ perm : Elapsed 0.044 ms (4.430 ms / 100) 4.261 -> 4.261 ( +0.00%) [ +0.05% +0.09% +0.00% / +0.00% +0.87% +0.80%] index_copy_ perm : Elapsed 0.043 ms (4.263 ms / 100) 4.432 -> 4.437 ( +0.11%) [ +0.00% +0.00% +0.05% / +0.11% +0.90% +0.93%] index_add_ perm_sorted : Elapsed 0.044 ms (4.432 ms / 100) 4.274 -> 4.285 ( +0.26%) [ +0.05% +0.09% +0.00% / +0.26% +0.94% +0.94%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.276 ms / 100) 5.578 -> 5.574 ( -0.07%) [ +0.04% +0.00% +0.04% / -0.07% +0.02% +0.05%] index_select const : Elapsed 0.056 ms (5.580 ms / 100) 5.589 -> 5.590 ( +0.02%) [ +0.11% +0.00% +0.00% / +0.05% +0.02% +0.04%] index_select wrap : Elapsed 0.056 ms (5.595 ms / 100) 5.586 -> 5.587 ( +0.02%) [ +0.00% +0.04% +0.07% / +0.05% +0.21% +0.02%] index_select linear : Elapsed 0.056 ms (5.586 ms / 100) 5.585 -> 5.592 ( +0.13%) [ +0.14% +0.00% +0.04% / +0.18% +0.13% +0.21%] index_select reverse : Elapsed 0.056 ms (5.593 ms / 100) 5.572 -> 5.577 ( +0.09%) [ +0.00% +0.02% +0.20% / +0.16% +0.16% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.572 ms / 100) 5.577 -> 5.580 ( +0.05%) [ +0.16% +0.00% +0.04% / +0.05% +0.14% +0.20%] index_select skip256 : Elapsed 0.056 ms (5.586 ms / 100) 5.582 -> 5.589 ( +0.13%) [ +0.16% +0.00% +0.30% / +0.13% +0.18% +0.14%] index_select spread : Elapsed 0.056 ms (5.591 ms / 100) 5.584 -> 5.589 ( +0.09%) [ +0.18% +0.00% +0.09% / +0.09% +0.09% +0.20%] index_select strided 3 : Elapsed 0.056 ms (5.594 ms / 100) 5.586 -> 5.591 ( +0.09%) [ +0.07% +0.04% +0.00% / +0.09% +0.13% +0.11%] index_select strided 5 : Elapsed 0.056 ms (5.590 ms / 100) 5.584 -> 5.587 ( +0.05%) [ +0.00% +0.09% +0.04% / +0.05% +0.20% +0.25%] index_select strided 7 : Elapsed 0.056 ms (5.584 ms / 100) 5.586 -> 5.586 ( +0.00%) [ +0.00% +0.05% +0.13% / +0.00% +0.07% +0.18%] index_select strided 8 : Elapsed 0.056 ms (5.586 ms / 100) 5.587 -> 5.586 ( -0.02%) [ +0.13% +0.00% +0.09% / +0.07% +0.11% -0.02%] index_select random : Elapsed 0.056 ms (5.594 ms / 100) 5.582 -> 5.587 ( +0.09%) [ +0.18% +0.00% +0.09% / +0.09% +0.20% +0.13%] index_select random_sorted : Elapsed 0.056 ms (5.592 ms / 100) out_shape = [5, 16, 20, 40] in_shape = [5, 16, 4, 40] idx_dim = 2 B = [5, 16, 20, 40] (stride (12800, 40, 640, 1)) A = [5, 16, 4, 40] (stride (2560, 160, 1, 4)) dim = 2 1.972 -> 1.979 ( +0.35%) [ +0.20% +0.30% +0.00% / +0.35% +0.61% +0.61%] index_add_ linear : Elapsed 0.020 ms (1.976 ms / 100) 1.930 -> 1.935 ( +0.26%) [ +0.31% +0.10% +0.00% / +0.26% +0.47% +0.47%] index_copy_ linear : Elapsed 0.019 ms (1.936 ms / 100) 1.956 -> 1.960 ( +0.20%) [ +0.00% +0.10% +0.05% / +0.20% +0.77% +0.82%] index_add_ reverse : Elapsed 0.020 ms (1.956 ms / 100) 1.914 -> 1.914 ( +0.00%) [ +0.00% +0.00% +0.21% / +0.00% +0.68% +0.73%] index_copy_ reverse : Elapsed 0.019 ms (1.914 ms / 100) 1.949 -> 1.951 ( +0.10%) [ +0.26% +0.00% +0.10% / +0.10% +0.82% +0.87%] index_add_ spread : Elapsed 0.020 ms (1.954 ms / 100) 1.910 -> 1.917 ( +0.37%) [ +0.10% +0.00% +0.31% / +0.37% +0.84% +0.84%] index_copy_ spread : Elapsed 0.019 ms (1.912 ms / 100) 1.971 -> 1.977 ( +0.30%) [ +0.00% +0.10% +0.15% / +0.30% +0.81% +0.71%] index_add_ strided 3 : Elapsed 0.020 ms (1.971 ms / 100) 1.928 -> 1.928 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.57% +0.47%] index_copy_ strided 3 : Elapsed 0.019 ms (1.931 ms / 100) 1.967 -> 1.968 ( +0.05%) [ +0.20% +0.00% +0.15% / +0.05% +0.66% +0.86%] index_add_ strided 7 : Elapsed 0.020 ms (1.971 ms / 100) 1.920 -> 1.923 ( +0.16%) [ +0.10% +0.00% +0.05% / +0.16% +0.78% +0.63%] index_copy_ strided 7 : Elapsed 0.019 ms (1.922 ms / 100) 1.970 -> 1.972 ( +0.10%) [ +0.15% +0.00% +0.05% / +0.10% +0.71% +0.56%] index_add_ perm : Elapsed 0.020 ms (1.973 ms / 100) 1.931 -> 1.933 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.41% +0.16%] index_copy_ perm : Elapsed 0.019 ms (1.932 ms / 100) 1.964 -> 1.967 ( +0.15%) [ +0.20% +0.15% +0.00% / +0.15% +0.81% +0.61%] index_add_ perm_sorted : Elapsed 0.020 ms (1.968 ms / 100) 1.922 -> 1.925 ( +0.16%) [ +0.21% +0.05% +0.00% / +0.16% +0.57% +0.52%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.926 ms / 100) 8.748 -> 8.743 ( -0.06%) [ +0.07% +0.19% +0.00% / +0.18% -0.05% -0.06%] index_select const : Elapsed 0.088 ms (8.754 ms / 100) 8.743 -> 8.731 ( -0.14%) [ +0.10% +0.00% +0.10% / +0.00% -0.14% +0.09%] index_select wrap : Elapsed 0.088 ms (8.752 ms / 100) 8.747 -> 8.736 ( -0.13%) [ +0.08% +0.00% +0.03% / -0.05% +0.09% -0.13%] index_select linear : Elapsed 0.088 ms (8.754 ms / 100) 8.752 -> 8.735 ( -0.19%) [ +0.00% +0.00% +0.09% / -0.05% -0.19% -0.15%] index_select reverse : Elapsed 0.088 ms (8.752 ms / 100) 8.745 -> 8.729 ( -0.18%) [ +0.14% +0.00% +0.01% / +0.09% -0.03% -0.18%] index_select skip64 : Elapsed 0.088 ms (8.757 ms / 100) 8.745 -> 8.740 ( -0.06%) [ +0.16% +0.00% +0.18% / +0.07% -0.06% -0.05%] index_select skip256 : Elapsed 0.088 ms (8.759 ms / 100) 8.744 -> 8.728 ( -0.18%) [ +0.00% +0.43% +0.07% / +0.07% -0.11% -0.18%] index_select spread : Elapsed 0.087 ms (8.744 ms / 100) 8.740 -> 8.737 ( -0.03%) [ +0.06% +0.33% +0.00% / +0.11% -0.03% +0.07%] index_select strided 3 : Elapsed 0.087 ms (8.745 ms / 100) 8.748 -> 8.755 ( +0.08%) [ +0.13% +0.05% +0.00% / +0.11% +0.08% +0.10%] index_select random : Elapsed 0.088 ms (8.759 ms / 100) 8.734 -> 8.746 ( +0.14%) [ +0.22% +0.00% +0.10% / +0.19% +0.40% +0.14%] index_select random_sorted : Elapsed 0.088 ms (8.753 ms / 100) B = [5, 16, 20, 40] (stride (1, 4000, 200, 5)) A = [5, 16, 4, 40] (stride (2560, 1, 16, 64)) dim = 2 2.264 -> 2.263 ( -0.04%) [ +0.13% +0.00% +0.00% / -0.04% +0.27% +0.31%] index_add_ linear : Elapsed 0.023 ms (2.267 ms / 100) 2.194 -> 2.195 ( +0.05%) [ +0.14% +0.00% +0.00% / +0.05% +0.27% +0.46%] index_copy_ linear : Elapsed 0.022 ms (2.197 ms / 100) 2.262 -> 2.261 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.18% +0.49%] index_add_ reverse : Elapsed 0.023 ms (2.263 ms / 100) 2.193 -> 2.196 ( +0.14%) [ +0.09% +0.00% +0.00% / +0.14% +0.50% +0.27%] index_copy_ reverse : Elapsed 0.022 ms (2.195 ms / 100) 2.256 -> 2.257 ( +0.04%) [ +0.13% +0.09% +0.00% / +0.04% +0.35% +0.53%] index_add_ spread : Elapsed 0.023 ms (2.259 ms / 100) 2.190 -> 2.191 ( +0.05%) [ +0.05% +0.09% +0.00% / +0.05% +0.46% +0.55%] index_copy_ spread : Elapsed 0.022 ms (2.191 ms / 100) 2.258 -> 2.263 ( +0.22%) [ +0.27% +0.13% +0.00% / +0.22% +0.49% +0.58%] index_add_ strided 3 : Elapsed 0.023 ms (2.264 ms / 100) 2.190 -> 2.196 ( +0.27%) [ +0.14% +0.27% +0.00% / +0.27% +0.46% +0.32%] index_copy_ strided 3 : Elapsed 0.022 ms (2.193 ms / 100) 2.257 -> 2.264 ( +0.31%) [ +0.13% +0.00% +0.13% / +0.31% +0.35% +0.58%] index_add_ strided 7 : Elapsed 0.023 ms (2.260 ms / 100) 2.190 -> 2.193 ( +0.14%) [ +0.09% +0.18% +0.00% / +0.14% +0.32% +0.46%] index_copy_ strided 7 : Elapsed 0.022 ms (2.192 ms / 100) 2.262 -> 2.263 ( +0.04%) [ +0.00% +0.09% +0.04% / +0.04% +0.49% +0.44%] index_add_ perm : Elapsed 0.023 ms (2.262 ms / 100) 2.188 -> 2.190 ( +0.09%) [ +0.00% +0.14% +0.05% / +0.09% +0.87% +0.69%] index_copy_ perm : Elapsed 0.022 ms (2.188 ms / 100) 2.258 -> 2.262 ( +0.18%) [ +0.09% +0.09% +0.00% / +0.18% +0.62% +0.71%] index_add_ perm_sorted : Elapsed 0.023 ms (2.260 ms / 100) 2.188 -> 2.189 ( +0.05%) [ +0.23% +0.14% +0.00% / +0.05% +0.64% +0.59%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.193 ms / 100) 9.209 -> 9.209 ( +0.00%) [ +0.00% +0.09% +0.03% / +0.11% +0.00% +0.12%] index_select const : Elapsed 0.092 ms (9.209 ms / 100) 9.241 -> 9.247 ( +0.06%) [ +0.00% +0.22% +0.03% / +0.06% +0.19% +0.30%] index_select wrap : Elapsed 0.092 ms (9.241 ms / 100) 9.227 -> 9.248 ( +0.23%) [ +0.04% +0.21% +0.00% / +0.28% +0.39% +0.23%] index_select linear : Elapsed 0.092 ms (9.231 ms / 100) 9.216 -> 9.237 ( +0.23%) [ +0.00% +0.12% +0.11% / +0.23% +0.39% +0.47%] index_select reverse : Elapsed 0.092 ms (9.216 ms / 100) 9.202 -> 9.207 ( +0.05%) [ +0.08% +0.00% +0.10% / +0.05% +0.16% +0.25%] index_select skip64 : Elapsed 0.092 ms (9.209 ms / 100) 9.209 -> 9.222 ( +0.14%) [ +0.00% +0.08% +0.02% / +0.16% +0.14% +0.20%] index_select skip256 : Elapsed 0.092 ms (9.209 ms / 100) 9.244 -> 9.251 ( +0.08%) [ +0.05% +0.08% +0.00% / +0.08% +0.27% +0.23%] index_select spread : Elapsed 0.092 ms (9.249 ms / 100) 9.243 -> 9.237 ( -0.06%) [ +0.18% +0.00% +0.03% / -0.06% +0.27% +0.21%] index_select strided 3 : Elapsed 0.093 ms (9.260 ms / 100) 9.237 -> 9.242 ( +0.05%) [ +0.09% +0.00% +0.03% / +0.05% +0.32% +0.41%] index_select random : Elapsed 0.092 ms (9.245 ms / 100) 9.252 -> 9.268 ( +0.17%) [ +0.13% +0.12% +0.00% / +0.17% +0.38% +0.35%] index_select random_sorted : Elapsed 0.093 ms (9.264 ms / 100) B = [5, 16, 20, 40] (stride (40, 200, 3200, 1)) A = [5, 16, 4, 40] (stride (2560, 40, 640, 1)) dim = 2 2.038 -> 2.037 ( -0.05%) [ +0.15% +0.10% +0.00% / -0.05% +1.62% +1.82%] index_add_ linear : Elapsed 0.020 ms (2.041 ms / 100) 1.994 -> 1.991 ( -0.15%) [ +0.15% +0.05% +0.00% / -0.15% +1.40% +1.35%] index_copy_ linear : Elapsed 0.020 ms (1.997 ms / 100) 2.037 -> 2.039 ( +0.10%) [ +0.10% +0.20% +0.00% / +0.10% +1.87% +1.67%] index_add_ reverse : Elapsed 0.020 ms (2.039 ms / 100) 1.992 -> 1.991 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +1.51% +1.46%] index_copy_ reverse : Elapsed 0.020 ms (1.993 ms / 100) 2.052 -> 2.056 ( +0.19%) [ +0.10% +0.15% +0.00% / +0.19% +1.51% +1.32%] index_add_ spread : Elapsed 0.021 ms (2.054 ms / 100) 2.004 -> 2.004 ( +0.00%) [ +0.25% +0.10% +0.00% / +0.00% +1.90% +1.90%] index_copy_ spread : Elapsed 0.020 ms (2.009 ms / 100) 2.041 -> 2.046 ( +0.24%) [ +0.20% +0.20% +0.00% / +0.24% +1.47% +1.52%] index_add_ strided 3 : Elapsed 0.020 ms (2.045 ms / 100) 1.996 -> 2.000 ( +0.20%) [ +0.00% +0.20% +0.05% / +0.20% +1.35% +1.40%] index_copy_ strided 3 : Elapsed 0.020 ms (1.996 ms / 100) 2.037 -> 2.041 ( +0.20%) [ +0.00% +0.15% +0.05% / +0.20% +2.26% +2.06%] index_add_ strided 7 : Elapsed 0.020 ms (2.037 ms / 100) 1.991 -> 1.994 ( +0.15%) [ +0.00% +0.10% +0.05% / +0.15% +2.11% +1.91%] index_copy_ strided 7 : Elapsed 0.020 ms (1.991 ms / 100) 2.052 -> 2.052 ( +0.00%) [ +0.00% +0.05% +0.29% / +0.00% +1.07% +0.97%] index_add_ perm : Elapsed 0.021 ms (2.052 ms / 100) 2.008 -> 2.006 ( -0.10%) [ +0.00% +0.10% +0.20% / -0.10% +1.10% +0.95%] index_copy_ perm : Elapsed 0.020 ms (2.008 ms / 100) 2.050 -> 2.052 ( +0.10%) [ +0.15% +0.10% +0.00% / +0.10% +1.17% +1.02%] index_add_ perm_sorted : Elapsed 0.021 ms (2.053 ms / 100) 2.006 -> 2.005 ( -0.05%) [ +0.35% +0.20% +0.00% / -0.05% +1.15% +1.25%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.013 ms / 100) 9.110 -> 9.124 ( +0.15%) [ +0.21% +0.22% +0.00% / +0.18% +0.18% +0.15%] index_select const : Elapsed 0.091 ms (9.129 ms / 100) 9.214 -> 9.204 ( -0.11%) [ +0.01% +0.00% +0.20% / -0.11% +0.02% -0.03%] index_select wrap : Elapsed 0.092 ms (9.215 ms / 100) 9.147 -> 9.165 ( +0.20%) [ +0.00% +0.09% +0.26% / +0.20% +0.24% +0.21%] index_select linear : Elapsed 0.091 ms (9.147 ms / 100) 9.156 -> 9.154 ( -0.02%) [ +0.11% +0.00% +0.04% / -0.02% +0.13% +0.01%] index_select reverse : Elapsed 0.092 ms (9.166 ms / 100) 9.112 -> 9.112 ( +0.00%) [ +0.15% +0.24% +0.00% / +0.00% +0.03% +0.20%] index_select skip64 : Elapsed 0.091 ms (9.126 ms / 100) 9.116 -> 9.126 ( +0.11%) [ +0.00% +0.04% +0.10% / +0.11% +0.34% +0.14%] index_select skip256 : Elapsed 0.091 ms (9.116 ms / 100) 9.168 -> 9.169 ( +0.01%) [ +0.26% +0.00% +0.15% / +0.37% +0.01% +0.21%] index_select spread : Elapsed 0.092 ms (9.192 ms / 100) 9.202 -> 9.187 ( -0.16%) [ +0.08% +0.00% +0.04% / +0.09% -0.16% +0.05%] index_select strided 3 : Elapsed 0.092 ms (9.209 ms / 100) 9.194 -> 9.198 ( +0.04%) [ +0.23% +0.00% +0.22% / +0.09% +0.21% +0.04%] index_select random : Elapsed 0.092 ms (9.215 ms / 100) 9.184 -> 9.172 ( -0.13%) [ +0.22% +0.07% +0.00% / +0.22% -0.13% -0.02%] index_select random_sorted : Elapsed 0.092 ms (9.204 ms / 100) B = [5, 16, 20, 40] (stride (1, 5, 3200, 80)) A = [5, 16, 4, 40] (stride (160, 800, 1, 4)) dim = 2 2.068 -> 2.065 ( -0.15%) [ +0.15% +0.00% +0.05% / -0.15% +0.29% +0.39%] index_add_ linear : Elapsed 0.021 ms (2.071 ms / 100) 2.028 -> 2.027 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.59% +0.44%] index_copy_ linear : Elapsed 0.020 ms (2.028 ms / 100) 2.062 -> 2.062 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.48% +0.58%] index_add_ reverse : Elapsed 0.021 ms (2.063 ms / 100) 2.022 -> 2.026 ( +0.20%) [ +0.15% +0.10% +0.00% / +0.20% +0.69% +0.74%] index_copy_ reverse : Elapsed 0.020 ms (2.025 ms / 100) 2.062 -> 2.065 ( +0.15%) [ +0.10% +0.00% +0.19% / +0.15% +0.34% +0.53%] index_add_ spread : Elapsed 0.021 ms (2.064 ms / 100) 2.022 -> 2.027 ( +0.25%) [ +0.40% +0.10% +0.00% / +0.25% +0.64% +0.69%] index_copy_ spread : Elapsed 0.020 ms (2.030 ms / 100) 2.080 -> 2.080 ( +0.00%) [ +0.00% +0.10% +0.14% / +0.00% +0.43% +0.43%] index_add_ strided 3 : Elapsed 0.021 ms (2.080 ms / 100) 2.037 -> 2.038 ( +0.05%) [ +0.00% +0.10% +0.10% / +0.05% +0.49% +0.39%] index_copy_ strided 3 : Elapsed 0.020 ms (2.037 ms / 100) 2.073 -> 2.073 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_add_ strided 7 : Elapsed 0.021 ms (2.075 ms / 100) 2.029 -> 2.034 ( +0.25%) [ +0.00% +0.10% +0.15% / +0.25% +0.74% +0.59%] index_copy_ strided 7 : Elapsed 0.020 ms (2.029 ms / 100) 2.063 -> 2.066 ( +0.15%) [ +0.00% +0.05% +0.05% / +0.15% +0.34% +0.29%] index_add_ perm : Elapsed 0.021 ms (2.063 ms / 100) 2.023 -> 2.031 ( +0.40%) [ +0.20% +0.00% +0.20% / +0.54% +0.40% +0.44%] index_copy_ perm : Elapsed 0.020 ms (2.027 ms / 100) 2.064 -> 2.064 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.29% +0.29%] index_add_ perm_sorted : Elapsed 0.021 ms (2.067 ms / 100) 2.029 -> 2.030 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.15% +0.25%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.029 ms / 100) 9.214 -> 9.207 ( -0.08%) [ +0.11% +0.00% +0.31% / +0.11% -0.08% -0.04%] index_select const : Elapsed 0.092 ms (9.224 ms / 100) 9.211 -> 9.221 ( +0.11%) [ +0.07% +0.14% +0.00% / +0.11% +0.21% +0.27%] index_select wrap : Elapsed 0.092 ms (9.217 ms / 100) 9.217 -> 9.203 ( -0.15%) [ +0.07% +0.23% +0.00% / -0.10% +0.07% -0.15%] index_select linear : Elapsed 0.092 ms (9.223 ms / 100) 9.211 -> 9.213 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.24% +0.02% +0.07%] index_select reverse : Elapsed 0.092 ms (9.216 ms / 100) 9.205 -> 9.212 ( +0.08%) [ +0.35% +0.12% +0.00% / +0.08% +0.46% +0.25%] index_select skip64 : Elapsed 0.092 ms (9.237 ms / 100) 9.193 -> 9.212 ( +0.21%) [ +0.18% +0.00% +0.36% / +0.21% +0.46% +0.37%] index_select skip256 : Elapsed 0.092 ms (9.210 ms / 100) 9.207 -> 9.212 ( +0.05%) [ +0.16% +0.39% +0.00% / +0.12% +0.05% +0.27%] index_select spread : Elapsed 0.092 ms (9.222 ms / 100) 9.215 -> 9.209 ( -0.07%) [ +0.00% +0.26% +0.09% / +0.34% -0.07% -0.03%] index_select strided 3 : Elapsed 0.092 ms (9.215 ms / 100) 9.214 -> 9.205 ( -0.10%) [ +0.04% +0.07% +0.00% / -0.10% +0.31% +0.18%] index_select random : Elapsed 0.092 ms (9.218 ms / 100) 9.204 -> 9.215 ( +0.12%) [ +0.18% +0.00% +0.23% / +0.15% +0.12% +0.32%] index_select random_sorted : Elapsed 0.092 ms (9.221 ms / 100) out_shape = [5, 16, 4, 20] in_shape = [5, 16, 4, 40] idx_dim = 3 B = [5, 16, 4, 20] (stride (1280, 20, 320, 1)) A = [5, 16, 4, 40] (stride (1, 20, 5, 320)) dim = 3 2.402 -> 2.403 ( +0.04%) [ +0.00% +0.17% +0.00% / +0.04% +0.50% +0.29%] index_select const : Elapsed 0.024 ms (2.402 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.00% +0.04%] index_select wrap : Elapsed 0.024 ms (2.413 ms / 100) 2.410 -> 2.409 ( -0.04%) [ +0.29% +0.21% +0.00% / +0.25% -0.04% +0.17%] index_select linear : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.409 ( -0.08%) [ +0.29% +0.17% +0.00% / +0.04% -0.08% -0.04%] index_select reverse : Elapsed 0.024 ms (2.418 ms / 100) 2.407 -> 2.407 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.04% +0.00% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.407 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.25% +0.00% +0.12% / +0.12% +0.25% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.409 ms / 100) 2.407 -> 2.411 ( +0.17%) [ +0.25% +0.00% +0.29% / +0.17% +0.25% +0.37%] index_select spread : Elapsed 0.024 ms (2.413 ms / 100) 2.411 -> 2.411 ( +0.00%) [ +0.08% +0.21% +0.00% / +0.00% +0.12% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.413 ms / 100) 2.409 -> 2.408 ( -0.04%) [ +0.00% +0.08% +0.04% / -0.04% +0.04% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.409 ms / 100) 2.408 -> 2.412 ( +0.17%) [ +0.04% +0.04% +0.00% / +0.21% +0.17% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.409 ms / 100) 2.405 -> 2.407 ( +0.08%) [ +0.12% +0.00% +0.17% / +0.08% +0.21% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.408 ms / 100) 2.409 -> 2.409 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.12% +0.00%] index_select strided 16 : Elapsed 0.024 ms (2.409 ms / 100) 2.411 -> 2.411 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.12% +0.00% +0.17%] index_select random : Elapsed 0.024 ms (2.413 ms / 100) 2.410 -> 2.409 ( -0.04%) [ +0.00% +0.25% +0.04% / +0.12% -0.04% -0.04%] index_select random_sorted : Elapsed 0.024 ms (2.410 ms / 100) 2.412 -> 2.409 ( -0.12%) [ +0.00% +0.00% +0.04% / +0.12% -0.12% +0.08%] index_select perm : Elapsed 0.024 ms (2.412 ms / 100) 2.413 -> 2.406 ( -0.29%) [ +0.04% +0.12% +0.00% / +0.00% -0.25% -0.29%] index_select perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) B = [5, 16, 4, 20] (stride (20, 400, 100, 1)) A = [5, 16, 4, 40] (stride (4, 20, 1, 320)) dim = 3 2.392 -> 2.394 ( +0.08%) [ +0.13% +0.25% +0.00% / +0.08% +0.13% +0.17%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.405 -> 2.399 ( -0.25%) [ +0.00% +0.04% +0.04% / -0.04% -0.12% -0.25%] index_select wrap : Elapsed 0.024 ms (2.405 ms / 100) 2.407 -> 2.399 ( -0.33%) [ +0.00% +0.00% +0.00% / -0.04% -0.17% -0.33%] index_select linear : Elapsed 0.024 ms (2.407 ms / 100) 2.402 -> 2.403 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.17% +0.25%] index_select reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.394 -> 2.395 ( +0.04%) [ +0.25% +0.13% +0.00% / +0.13% +0.29% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.400 ms / 100) 2.393 -> 2.395 ( +0.08%) [ +0.00% +0.04% +0.21% / +0.08% +0.29% +0.21%] index_select skip256 : Elapsed 0.024 ms (2.393 ms / 100) 2.400 -> 2.403 ( +0.13%) [ +0.04% +0.00% +0.08% / +0.13% +0.21% +0.21%] index_select spread : Elapsed 0.024 ms (2.401 ms / 100) 2.402 -> 2.406 ( +0.17%) [ +0.00% +0.08% +0.04% / +0.25% +0.17% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.402 ms / 100) 2.396 -> 2.399 ( +0.13%) [ +0.17% +0.00% +0.04% / +0.13% +0.17% +0.29%] index_select strided 5 : Elapsed 0.024 ms (2.400 ms / 100) 2.404 -> 2.404 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.12% +0.00% +0.04%] index_select strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.397 -> 2.397 ( +0.00%) [ +0.29% +0.00% +0.21% / +0.17% +0.04% +0.00%] index_select strided 8 : Elapsed 0.024 ms (2.404 ms / 100) 2.397 -> 2.396 ( -0.04%) [ +0.21% +0.00% +0.17% / +0.00% -0.04% +0.00%] index_select strided 16 : Elapsed 0.024 ms (2.402 ms / 100) 2.399 -> 2.403 ( +0.17%) [ +0.17% +0.00% +0.04% / +0.17% +0.29% +0.21%] index_select random : Elapsed 0.024 ms (2.403 ms / 100) 2.403 -> 2.401 ( -0.08%) [ +0.08% +0.00% +0.04% / +0.00% +0.00% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.405 ms / 100) 2.402 -> 2.405 ( +0.12%) [ +0.12% +0.00% +0.08% / +0.12% +0.17% +0.12%] index_select perm : Elapsed 0.024 ms (2.405 ms / 100) 2.402 -> 2.404 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.17% +0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) B = [5, 16, 4, 20] (stride (320, 20, 1600, 1)) A = [5, 16, 4, 40] (stride (640, 1, 3200, 16)) dim = 3 2.396 -> 2.394 ( -0.08%) [ +0.00% +0.04% +0.08% / -0.08% +0.13% +0.04%] index_select const : Elapsed 0.024 ms (2.396 ms / 100) 2.418 -> 2.418 ( +0.00%) [ +0.04% +0.12% +0.00% / +0.00% +0.00% +0.00%] index_select wrap : Elapsed 0.024 ms (2.419 ms / 100) 2.417 -> 2.413 ( -0.17%) [ +0.04% +0.00% +0.08% / -0.17% +0.17% +0.08%] index_select linear : Elapsed 0.024 ms (2.418 ms / 100) 2.414 -> 2.418 ( +0.17%) [ +0.04% +0.00% +0.08% / +0.21% +0.17% +0.29%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.397 -> 2.395 ( -0.08%) [ +0.08% +0.00% +0.17% / +0.21% -0.08% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.399 ms / 100) 2.395 -> 2.400 ( +0.21%) [ +0.08% +0.08% +0.00% / +0.21% +0.38% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.397 ms / 100) 2.416 -> 2.417 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.04% +0.12% +0.17%] index_select spread : Elapsed 0.024 ms (2.419 ms / 100) 2.416 -> 2.417 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.04% +0.04%] index_select strided 3 : Elapsed 0.024 ms (2.417 ms / 100) 2.405 -> 2.408 ( +0.12%) [ +0.17% +0.00% +0.17% / +0.12% +0.29% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.409 ms / 100) 2.413 -> 2.419 ( +0.25%) [ +0.08% +0.17% +0.00% / +0.25% +0.29% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.415 ms / 100) 2.400 -> 2.398 ( -0.08%) [ +0.17% +0.00% +0.00% / -0.08% +0.25% +0.29%] index_select strided 8 : Elapsed 0.024 ms (2.404 ms / 100) 2.401 -> 2.403 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.12% +0.08% +0.33%] index_select strided 16 : Elapsed 0.024 ms (2.401 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.21% +0.04% +0.00% / +0.04% +0.33% +0.17%] index_select random : Elapsed 0.024 ms (2.415 ms / 100) 2.407 -> 2.414 ( +0.29%) [ +0.29% +0.17% +0.00% / +0.29% +0.37% +0.33%] index_select random_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.00% +0.33% +0.25% / +0.21% +0.12% +0.08%] index_select perm : Elapsed 0.024 ms (2.414 ms / 100) 2.419 -> 2.413 ( -0.25%) [ +0.00% +0.08% +0.00% / -0.04% -0.17% -0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [5, 16, 4, 20] (stride (320, 1, 1600, 16)) A = [5, 16, 4, 40] (stride (2560, 160, 40, 1)) dim = 3 2.365 -> 2.364 ( -0.04%) [ +0.00% +0.17% +0.08% / +0.08% +0.17% -0.04%] index_select const : Elapsed 0.024 ms (2.365 ms / 100) 2.373 -> 2.368 ( -0.21%) [ +0.13% +0.21% +0.00% / +0.13% -0.08% -0.21%] index_select wrap : Elapsed 0.024 ms (2.376 ms / 100) 2.374 -> 2.367 ( -0.29%) [ +0.00% +0.00% +0.08% / -0.04% -0.29% -0.13%] index_select linear : Elapsed 0.024 ms (2.374 ms / 100) 2.370 -> 2.368 ( -0.08%) [ +0.25% +0.00% +0.08% / -0.08% +0.30% +0.04%] index_select reverse : Elapsed 0.024 ms (2.376 ms / 100) 2.367 -> 2.365 ( -0.08%) [ +0.13% +0.00% +0.13% / +0.04% -0.08% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.370 ms / 100) 2.366 -> 2.365 ( -0.04%) [ +0.13% +0.00% +0.21% / -0.04% +0.34% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.369 ms / 100) 2.376 -> 2.376 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.21% +0.21%] index_select spread : Elapsed 0.024 ms (2.380 ms / 100) 2.375 -> 2.375 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.13% +0.34%] index_select strided 3 : Elapsed 0.024 ms (2.375 ms / 100) 2.375 -> 2.377 ( +0.08%) [ +0.04% +0.00% +0.13% / +0.08% +0.25% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.376 ms / 100) 2.378 -> 2.378 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.17% +0.00% +0.00%] index_select strided 7 : Elapsed 0.024 ms (2.378 ms / 100) 2.379 -> 2.376 ( -0.13%) [ +0.00% +0.08% +0.00% / -0.13% +0.17% +0.00%] index_select strided 8 : Elapsed 0.024 ms (2.379 ms / 100) 2.375 -> 2.377 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.34% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.375 ms / 100) 2.376 -> 2.376 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.04% +0.21% +0.00%] index_select random : Elapsed 0.024 ms (2.377 ms / 100) 2.377 -> 2.373 ( -0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.21% -0.17%] index_select random_sorted : Elapsed 0.024 ms (2.377 ms / 100) 2.378 -> 2.378 ( +0.00%) [ +0.17% +0.00% +0.04% / +0.00% +0.25% +0.13%] index_select perm : Elapsed 0.024 ms (2.382 ms / 100) 2.375 -> 2.376 ( +0.04%) [ +0.13% +0.04% +0.00% / +0.13% +0.34% +0.04%] index_select perm_sorted : Elapsed 0.024 ms (2.378 ms / 100) out_shape = [20, 16, 40, 4] in_shape = [5, 16, 40, 4] idx_dim = 0 B = [20, 16, 40, 4] (stride (2560, 4, 64, 1)) dim = 0 fill_cnt = 5 0.936 -> 0.935 ( -0.11%) [ +0.00% +0.21% +0.00% / -0.11% +0.53% +0.53%] index_fill_ const : Elapsed 0.009 ms (0.936 ms / 100) 0.936 -> 0.937 ( +0.11%) [ +0.00% +0.21% +0.00% / +0.11% +0.64% +0.53%] index_fill_ linear : Elapsed 0.009 ms (0.936 ms / 100) 0.936 -> 0.937 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.75% +0.53%] index_fill_ reverse : Elapsed 0.009 ms (0.937 ms / 100) 0.935 -> 0.936 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.64% +0.53%] index_fill_ skip64 : Elapsed 0.009 ms (0.936 ms / 100) 0.935 -> 0.936 ( +0.11%) [ +0.21% +0.11% +0.00% / +0.11% +0.64% +0.75%] index_fill_ skip256 : Elapsed 0.009 ms (0.937 ms / 100) 0.935 -> 0.937 ( +0.21%) [ +0.11% +0.11% +0.00% / +0.21% +0.86% +0.75%] index_fill_ spread : Elapsed 0.009 ms (0.936 ms / 100) 0.935 -> 0.934 ( -0.11%) [ +0.00% +0.11% +0.00% / -0.11% +0.75% +0.75%] index_fill_ strided 3 : Elapsed 0.009 ms (0.935 ms / 100) 0.934 -> 0.934 ( +0.00%) [ +0.11% +0.21% +0.00% / +0.00% +0.75% +0.75%] index_fill_ strided 5 : Elapsed 0.009 ms (0.935 ms / 100) 0.936 -> 0.936 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +0.53% +0.64%] index_fill_ strided 7 : Elapsed 0.009 ms (0.936 ms / 100) 0.936 -> 0.936 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.53% +0.64%] index_fill_ strided 8 : Elapsed 0.009 ms (0.937 ms / 100) 0.936 -> 0.937 ( +0.11%) [ +0.21% +0.21% +0.00% / +0.11% +0.64% +0.64%] index_fill_ strided 16 : Elapsed 0.009 ms (0.938 ms / 100) 0.935 -> 0.935 ( +0.00%) [ +0.11% +0.21% +0.00% / +0.00% +0.86% +0.64%] index_fill_ random : Elapsed 0.009 ms (0.936 ms / 100) 0.935 -> 0.935 ( +0.00%) [ +0.11% +0.21% +0.00% / +0.00% +0.86% +0.64%] index_fill_ random_sorted : Elapsed 0.009 ms (0.936 ms / 100) 0.934 -> 0.936 ( +0.21%) [ +0.11% +0.32% +0.00% / +0.21% +0.86% +0.86%] index_fill_ perm : Elapsed 0.009 ms (0.935 ms / 100) 0.935 -> 0.935 ( +0.00%) [ +0.11% +0.32% +0.00% / +0.00% +0.75% +0.86%] index_fill_ perm_sorted : Elapsed 0.009 ms (0.936 ms / 100) B = [20, 16, 40, 4] (stride (64, 4, 1280, 1)) A = [5, 16, 40, 4] (stride (1, 200, 5, 3200)) dim = 0 1.737 -> 1.740 ( +0.17%) [ +0.00% +0.29% +0.23% / +0.58% +0.23% +0.17%] index_add_ linear : Elapsed 0.017 ms (1.737 ms / 100) 1.693 -> 1.692 ( -0.06%) [ +0.00% +0.06% +0.30% / -0.06% +0.41% +0.18%] index_copy_ linear : Elapsed 0.017 ms (1.693 ms / 100) 1.735 -> 1.740 ( +0.29%) [ +0.00% +0.35% +0.23% / +0.29% +0.58% +0.40%] index_add_ reverse : Elapsed 0.017 ms (1.735 ms / 100) 1.693 -> 1.696 ( +0.18%) [ +0.00% +0.06% +0.12% / +0.18% +0.24% +0.18%] index_copy_ reverse : Elapsed 0.017 ms (1.693 ms / 100) 1.728 -> 1.730 ( +0.12%) [ +0.17% +0.12% +0.00% / +0.12% +0.35% +0.35%] index_add_ spread : Elapsed 0.017 ms (1.731 ms / 100) 1.683 -> 1.688 ( +0.30%) [ +0.00% +0.12% +0.30% / +0.30% +0.48% +0.42%] index_copy_ spread : Elapsed 0.017 ms (1.683 ms / 100) 1.728 -> 1.729 ( +0.06%) [ +0.12% +0.00% +0.29% / +0.06% +0.69% +0.58%] index_add_ strided 3 : Elapsed 0.017 ms (1.730 ms / 100) 1.685 -> 1.685 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.83% +0.65%] index_copy_ strided 3 : Elapsed 0.017 ms (1.685 ms / 100) 1.737 -> 1.736 ( -0.06%) [ +0.17% +0.00% +0.17% / -0.06% +0.23% +0.17%] index_add_ strided 7 : Elapsed 0.017 ms (1.740 ms / 100) 1.690 -> 1.689 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.36% +0.24%] index_copy_ strided 7 : Elapsed 0.017 ms (1.690 ms / 100) 1.726 -> 1.731 ( +0.29%) [ +0.23% +0.00% +0.12% / +0.29% +1.04% +0.70%] index_add_ perm : Elapsed 0.017 ms (1.730 ms / 100) 1.678 -> 1.686 ( +0.48%) [ +0.24% +0.00% +0.36% / +0.48% +1.07% +0.95%] index_copy_ perm : Elapsed 0.017 ms (1.682 ms / 100) 1.729 -> 1.733 ( +0.23%) [ +0.00% +0.23% +0.29% / +0.23% +1.21% +1.16%] index_add_ perm_sorted : Elapsed 0.017 ms (1.729 ms / 100) 1.685 -> 1.692 ( +0.42%) [ +0.00% +0.24% +0.18% / +0.42% +0.95% +1.31%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.685 ms / 100) 8.248 -> 8.240 ( -0.10%) [ +0.12% +0.02% +0.00% / -0.10% +0.22% +0.17%] index_select const : Elapsed 0.083 ms (8.258 ms / 100) 8.238 -> 8.243 ( +0.06%) [ +0.00% +0.00% +0.08% / +0.06% +0.32% +0.61%] index_select wrap : Elapsed 0.082 ms (8.238 ms / 100) 8.260 -> 8.252 ( -0.10%) [ +0.00% +0.02% +0.01% / -0.02% -0.10% -0.05%] index_select linear : Elapsed 0.083 ms (8.260 ms / 100) 8.240 -> 8.260 ( +0.24%) [ +0.01% +0.07% +0.00% / +0.25% +0.29% +0.24%] index_select reverse : Elapsed 0.082 ms (8.241 ms / 100) 8.248 -> 8.240 ( -0.10%) [ +0.00% +0.00% +0.21% / -0.10% +0.13% +0.15%] index_select skip64 : Elapsed 0.082 ms (8.248 ms / 100) 8.234 -> 8.263 ( +0.35%) [ +0.05% +0.00% +0.40% / +0.44% +0.35% +0.43%] index_select skip256 : Elapsed 0.082 ms (8.238 ms / 100) 8.237 -> 8.257 ( +0.24%) [ +0.13% +0.12% +0.00% / +0.24% +0.27% +0.44%] index_select spread : Elapsed 0.082 ms (8.248 ms / 100) 8.245 -> 8.252 ( +0.08%) [ +0.00% +0.05% +0.13% / +0.12% +0.22% +0.08%] index_select strided 3 : Elapsed 0.082 ms (8.245 ms / 100) 8.239 -> 8.238 ( -0.01%) [ +0.00% +0.13% +0.33% / -0.01% +0.39% +0.44%] index_select random : Elapsed 0.082 ms (8.239 ms / 100) 8.231 -> 8.235 ( +0.05%) [ +0.00% +0.41% +0.22% / +0.05% +0.43% +0.43%] index_select random_sorted : Elapsed 0.082 ms (8.231 ms / 100) B = [20, 16, 40, 4] (stride (1, 80, 1280, 20)) A = [5, 16, 40, 4] (stride (160, 800, 4, 1)) dim = 0 1.799 -> 1.785 ( -0.78%) [ +0.00% +0.39% +0.33% / +0.00% -0.78% -0.44%] index_add_ linear : Elapsed 0.018 ms (1.799 ms / 100) 1.755 -> 1.745 ( -0.57%) [ +0.11% +0.28% +0.00% / +0.28% -0.51% -0.57%] index_copy_ linear : Elapsed 0.018 ms (1.757 ms / 100) 1.799 -> 1.788 ( -0.61%) [ +0.28% +0.00% +0.17% / +0.28% -0.56% -0.61%] index_add_ reverse : Elapsed 0.018 ms (1.804 ms / 100) 1.752 -> 1.745 ( -0.40%) [ +0.00% +0.34% +0.46% / +0.11% -0.34% -0.40%] index_copy_ reverse : Elapsed 0.018 ms (1.752 ms / 100) 1.819 -> 1.806 ( -0.71%) [ +0.00% +0.00% +0.05% / -0.16% -0.71% -0.71%] index_add_ spread : Elapsed 0.018 ms (1.819 ms / 100) 1.781 -> 1.774 ( -0.39%) [ +0.22% +0.00% +0.06% / +0.17% -0.39% -0.39%] index_copy_ spread : Elapsed 0.018 ms (1.785 ms / 100) 1.815 -> 1.803 ( -0.66%) [ +0.55% +0.00% +0.17% / +0.28% -0.55% -0.66%] index_add_ strided 3 : Elapsed 0.018 ms (1.825 ms / 100) 1.782 -> 1.767 ( -0.84%) [ +0.00% +0.11% +0.17% / +0.11% -0.84% -0.67%] index_copy_ strided 3 : Elapsed 0.018 ms (1.782 ms / 100) 1.815 -> 1.802 ( -0.72%) [ +0.00% +0.33% +0.00% / +0.06% -0.44% -0.72%] index_add_ strided 7 : Elapsed 0.018 ms (1.815 ms / 100) 1.782 -> 1.772 ( -0.56%) [ +0.22% +0.00% +0.17% / +0.39% -0.56% -0.51%] index_copy_ strided 7 : Elapsed 0.018 ms (1.786 ms / 100) 1.806 -> 1.798 ( -0.44%) [ +0.00% +0.06% +0.39% / +0.17% -0.17% -0.44%] index_add_ perm : Elapsed 0.018 ms (1.806 ms / 100) 1.769 -> 1.764 ( -0.28%) [ +0.34% +0.17% +0.00% / +0.11% +0.11% -0.28%] index_copy_ perm : Elapsed 0.018 ms (1.775 ms / 100) 1.803 -> 1.804 ( +0.06%) [ +0.33% +0.00% +0.33% / +0.39% +0.06% +0.22%] index_add_ perm_sorted : Elapsed 0.018 ms (1.809 ms / 100) 1.770 -> 1.760 ( -0.56%) [ +0.00% +0.17% +0.34% / +0.06% -0.56% -0.45%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.770 ms / 100) 8.254 -> 8.262 ( +0.10%) [ +0.00% +0.01% +0.16% / +0.10% +0.25% +0.12%] index_select const : Elapsed 0.083 ms (8.254 ms / 100) 8.314 -> 8.309 ( -0.06%) [ +0.04% +0.00% +0.34% / -0.06% +0.34% +0.11%] index_select wrap : Elapsed 0.083 ms (8.317 ms / 100) 8.301 -> 8.309 ( +0.10%) [ +0.10% +0.23% +0.00% / +0.10% +0.23% +0.14%] index_select linear : Elapsed 0.083 ms (8.309 ms / 100) 8.308 -> 8.315 ( +0.08%) [ +0.14% +0.01% +0.00% / +0.20% +0.08% +0.19%] index_select reverse : Elapsed 0.083 ms (8.320 ms / 100) 8.261 -> 8.257 ( -0.05%) [ +0.08% +0.05% +0.00% / -0.05% +0.50% +0.27%] index_select skip64 : Elapsed 0.083 ms (8.268 ms / 100) 8.268 -> 8.250 ( -0.22%) [ +0.11% +0.00% +0.21% / -0.22% +0.23% +0.04%] index_select skip256 : Elapsed 0.083 ms (8.277 ms / 100) 8.290 -> 8.290 ( +0.00%) [ +0.01% +0.00% +0.01% / +0.00% +0.21% +0.17%] index_select spread : Elapsed 0.083 ms (8.291 ms / 100) 8.314 -> 8.327 ( +0.16%) [ +0.10% +0.08% +0.00% / +0.16% +0.24% +0.17%] index_select strided 3 : Elapsed 0.083 ms (8.322 ms / 100) 8.305 -> 8.319 ( +0.17%) [ +0.08% +0.00% +0.05% / +0.17% +0.42% +0.20%] index_select random : Elapsed 0.083 ms (8.312 ms / 100) 8.280 -> 8.289 ( +0.11%) [ +0.06% +0.00% +0.07% / +0.11% +0.59% +0.39%] index_select random_sorted : Elapsed 0.083 ms (8.285 ms / 100) B = [20, 16, 40, 4] (stride (1, 20, 1280, 320)) A = [5, 16, 40, 4] (stride (1, 200, 5, 3200)) dim = 0 0.674 -> 0.675 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +0.45% +0.74%] index_add_ linear : Elapsed 0.007 ms (0.674 ms / 100) 0.688 -> 0.689 ( +0.15%) [ +0.29% +0.29% +0.00% / +0.15% +0.87% +1.02%] index_copy_ linear : Elapsed 0.007 ms (0.690 ms / 100) 0.677 -> 0.676 ( -0.15%) [ +0.30% +0.00% +0.15% / +0.15% -0.15% +0.00%] index_add_ reverse : Elapsed 0.007 ms (0.679 ms / 100) 0.692 -> 0.692 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.14% +0.00% +0.14%] index_copy_ reverse : Elapsed 0.007 ms (0.693 ms / 100) 0.678 -> 0.678 ( +0.00%) [ +0.29% +0.00% +0.00% / +0.15% +0.00% +0.00%] index_add_ spread : Elapsed 0.007 ms (0.680 ms / 100) 0.693 -> 0.693 ( +0.00%) [ +0.00% +0.29% +0.00% / +0.00% +0.29% +0.14%] index_copy_ spread : Elapsed 0.007 ms (0.693 ms / 100) 0.676 -> 0.678 ( +0.30%) [ +0.00% +0.30% +0.00% / +0.30% +0.89% +0.89%] index_add_ strided 3 : Elapsed 0.007 ms (0.676 ms / 100) 0.689 -> 0.690 ( +0.15%) [ +0.29% +0.00% +0.15% / +0.15% +1.16% +1.31%] index_copy_ strided 3 : Elapsed 0.007 ms (0.691 ms / 100) 0.677 -> 0.678 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +0.74% +1.33%] index_add_ strided 7 : Elapsed 0.007 ms (0.679 ms / 100) 0.691 -> 0.693 ( +0.29%) [ +0.14% +0.00% +0.14% / +0.29% +1.16% +1.30%] index_copy_ strided 7 : Elapsed 0.007 ms (0.692 ms / 100) 0.678 -> 0.678 ( +0.00%) [ +0.00% +0.29% +0.15% / +0.00% +0.29% +0.29%] index_add_ perm : Elapsed 0.007 ms (0.678 ms / 100) 0.693 -> 0.694 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.58% +0.29%] index_copy_ perm : Elapsed 0.007 ms (0.694 ms / 100) 0.678 -> 0.678 ( +0.00%) [ +0.29% +0.15% +0.00% / +0.00% +0.29% +0.29%] index_add_ perm_sorted : Elapsed 0.007 ms (0.680 ms / 100) 0.692 -> 0.694 ( +0.29%) [ +0.29% +0.29% +0.00% / +0.29% +0.72% +0.43%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.694 ms / 100) 4.946 -> 4.874 ( -1.46%) [ +0.14% +0.00% +0.06% / -1.37% -1.25% -1.46%] index_select const : Elapsed 0.050 ms (4.953 ms / 100) 4.942 -> 4.868 ( -1.50%) [ +0.10% +0.28% +0.00% / -1.23% -1.27% -1.50%] index_select wrap : Elapsed 0.049 ms (4.947 ms / 100) 4.953 -> 4.873 ( -1.62%) [ +0.08% +0.00% +0.06% / -1.47% -1.49% -1.62%] index_select linear : Elapsed 0.050 ms (4.957 ms / 100) 4.951 -> 4.873 ( -1.58%) [ +0.16% +0.00% +0.08% / -1.37% -1.58% -1.47%] index_select reverse : Elapsed 0.050 ms (4.959 ms / 100) 4.944 -> 4.881 ( -1.27%) [ +0.00% +0.08% +0.12% / -0.95% -1.03% -1.27%] index_select skip64 : Elapsed 0.049 ms (4.944 ms / 100) 4.937 -> 4.881 ( -1.13%) [ +0.20% +0.00% +0.26% / -0.93% -1.01% -1.13%] index_select skip256 : Elapsed 0.049 ms (4.947 ms / 100) 4.947 -> 4.871 ( -1.54%) [ +0.00% +0.02% +0.12% / -1.15% -1.27% -1.54%] index_select spread : Elapsed 0.049 ms (4.947 ms / 100) 4.938 -> 4.874 ( -1.30%) [ +0.22% +0.14% +0.00% / -1.24% -1.22% -1.30%] index_select strided 3 : Elapsed 0.049 ms (4.949 ms / 100) 4.945 -> 4.876 ( -1.40%) [ +0.24% +0.00% +0.12% / -1.05% -1.19% -1.40%] index_select random : Elapsed 0.050 ms (4.957 ms / 100) 4.945 -> 4.874 ( -1.44%) [ +0.00% +0.12% +0.22% / -1.11% -1.44% -1.38%] index_select random_sorted : Elapsed 0.049 ms (4.945 ms / 100) B = [20, 16, 40, 4] (stride (640, 1, 16, 12800)) A = [5, 16, 40, 4] (stride (640, 40, 1, 3200)) dim = 0 1.707 -> 1.708 ( +0.06%) [ +0.00% +0.12% +0.18% / +0.06% +0.41% +0.47%] index_add_ linear : Elapsed 0.017 ms (1.707 ms / 100) 1.665 -> 1.668 ( +0.18%) [ +0.00% +0.00% +0.06% / +0.18% +0.66% +0.30%] index_copy_ linear : Elapsed 0.017 ms (1.665 ms / 100) 1.709 -> 1.710 ( +0.06%) [ +0.00% +0.18% +0.00% / +0.06% +0.35% +0.23%] index_add_ reverse : Elapsed 0.017 ms (1.709 ms / 100) 1.669 -> 1.667 ( -0.12%) [ +0.06% +0.00% +0.18% / -0.12% +0.42% +0.24%] index_copy_ reverse : Elapsed 0.017 ms (1.670 ms / 100) 1.710 -> 1.708 ( -0.12%) [ +0.18% +0.23% +0.00% / +0.00% +0.12% -0.12%] index_add_ spread : Elapsed 0.017 ms (1.713 ms / 100) 1.671 -> 1.668 ( -0.18%) [ +0.00% +0.00% +0.12% / -0.18% -0.06% -0.12%] index_copy_ spread : Elapsed 0.017 ms (1.671 ms / 100) 1.707 -> 1.707 ( +0.00%) [ +0.18% +0.06% +0.00% / +0.00% +0.88% +0.76%] index_add_ strided 3 : Elapsed 0.017 ms (1.710 ms / 100) 1.660 -> 1.662 ( +0.12%) [ +0.36% +0.36% +0.00% / +0.12% +1.20% +1.02%] index_copy_ strided 3 : Elapsed 0.017 ms (1.666 ms / 100) 1.726 -> 1.727 ( +0.06%) [ +0.00% +0.12% +0.29% / +0.06% +0.23% +0.23%] index_add_ strided 7 : Elapsed 0.017 ms (1.726 ms / 100) 1.685 -> 1.689 ( +0.24%) [ +0.00% +0.24% +0.30% / +0.24% +0.59% +0.71%] index_copy_ strided 7 : Elapsed 0.017 ms (1.685 ms / 100) 1.723 -> 1.724 ( +0.06%) [ +0.00% +0.06% +0.12% / +0.12% +0.06% +0.12%] index_add_ perm : Elapsed 0.017 ms (1.723 ms / 100) 1.682 -> 1.683 ( +0.06%) [ +0.18% +0.00% +0.12% / +0.12% +0.06% +0.12%] index_copy_ perm : Elapsed 0.017 ms (1.685 ms / 100) 1.723 -> 1.724 ( +0.06%) [ +0.00% +0.29% +0.00% / +0.12% +0.06% +0.29%] index_add_ perm_sorted : Elapsed 0.017 ms (1.723 ms / 100) 1.686 -> 1.684 ( -0.12%) [ +0.06% +0.00% +0.24% / +0.24% +0.06% -0.12%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.687 ms / 100) 8.207 -> 8.212 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.32% +0.15%] index_select const : Elapsed 0.082 ms (8.212 ms / 100) 8.274 -> 8.279 ( +0.06%) [ +0.02% +0.00% +0.18% / +0.06% +0.39% +0.27%] index_select wrap : Elapsed 0.083 ms (8.276 ms / 100) 8.257 -> 8.266 ( +0.11%) [ +0.19% +0.00% +0.31% / +0.11% +0.21% +0.17%] index_select linear : Elapsed 0.083 ms (8.273 ms / 100) 8.269 -> 8.260 ( -0.11%) [ +0.15% +0.17% +0.00% / +0.22% +0.02% -0.11%] index_select reverse : Elapsed 0.083 ms (8.281 ms / 100) 8.202 -> 8.204 ( +0.02%) [ +0.22% +0.00% +0.21% / +0.02% +0.34% +0.35%] index_select skip64 : Elapsed 0.082 ms (8.220 ms / 100) 8.201 -> 8.208 ( +0.09%) [ +0.00% +0.15% +0.28% / +0.09% +0.37% +0.33%] index_select skip256 : Elapsed 0.082 ms (8.201 ms / 100) 8.244 -> 8.258 ( +0.17%) [ +0.00% +0.22% +0.16% / +0.17% +0.17% +0.52%] index_select spread : Elapsed 0.082 ms (8.244 ms / 100) 8.277 -> 8.272 ( -0.06%) [ +0.00% +0.11% +0.25% / +0.33% +0.18% -0.06%] index_select strided 3 : Elapsed 0.083 ms (8.277 ms / 100) 8.268 -> 8.267 ( -0.01%) [ +0.12% +0.28% +0.00% / +0.13% +0.35% -0.01%] index_select random : Elapsed 0.083 ms (8.278 ms / 100) 8.265 -> 8.272 ( +0.08%) [ +0.15% +0.04% +0.00% / +0.23% +0.21% +0.08%] index_select random_sorted : Elapsed 0.083 ms (8.277 ms / 100) out_shape = [5, 20, 40, 4] in_shape = [5, 16, 40, 4] idx_dim = 1 B = [5, 20, 40, 4] (stride (3200, 4, 80, 1)) A = [5, 16, 40, 4] (stride (16, 1, 80, 3200)) dim = 1 4.448 -> 4.453 ( +0.11%) [ +0.04% +0.09% +0.00% / +0.11% +0.79% +0.79%] index_add_ linear : Elapsed 0.045 ms (4.450 ms / 100) 4.284 -> 4.286 ( +0.05%) [ +0.07% +0.09% +0.00% / +0.05% +0.72% +0.70%] index_copy_ linear : Elapsed 0.043 ms (4.287 ms / 100) 4.443 -> 4.444 ( +0.02%) [ +0.00% +0.07% +0.00% / +0.02% +0.72% +0.65%] index_add_ reverse : Elapsed 0.044 ms (4.443 ms / 100) 4.282 -> 4.283 ( +0.02%) [ +0.00% +0.09% +0.07% / +0.02% +0.61% +0.75%] index_copy_ reverse : Elapsed 0.043 ms (4.282 ms / 100) 4.443 -> 4.441 ( -0.05%) [ +0.07% +0.00% +0.09% / -0.05% +0.83% +0.65%] index_add_ spread : Elapsed 0.044 ms (4.446 ms / 100) 4.282 -> 4.274 ( -0.19%) [ +0.12% +0.00% +0.12% / -0.19% +0.77% +0.75%] index_copy_ spread : Elapsed 0.043 ms (4.287 ms / 100) 4.443 -> 4.450 ( +0.16%) [ +0.07% +0.11% +0.00% / +0.16% +0.79% +0.88%] index_add_ strided 3 : Elapsed 0.044 ms (4.446 ms / 100) 4.279 -> 4.287 ( +0.19%) [ +0.05% +0.02% +0.00% / +0.19% +0.84% +0.82%] index_copy_ strided 3 : Elapsed 0.043 ms (4.281 ms / 100) 4.448 -> 4.445 ( -0.07%) [ +0.09% +0.18% +0.00% / -0.07% +0.45% +0.61%] index_add_ strided 7 : Elapsed 0.045 ms (4.452 ms / 100) 4.285 -> 4.284 ( -0.02%) [ +0.00% +0.16% +0.00% / -0.02% +0.40% +0.44%] index_copy_ strided 7 : Elapsed 0.043 ms (4.285 ms / 100) 4.446 -> 4.452 ( +0.13%) [ +0.02% +0.00% +0.13% / +0.13% +0.49% +0.58%] index_add_ perm : Elapsed 0.044 ms (4.447 ms / 100) 4.280 -> 4.290 ( +0.23%) [ +0.19% +0.00% +0.19% / +0.23% +0.70% +0.77%] index_copy_ perm : Elapsed 0.043 ms (4.288 ms / 100) 4.452 -> 4.448 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.38% +0.56%] index_add_ perm_sorted : Elapsed 0.045 ms (4.452 ms / 100) 4.292 -> 4.289 ( -0.07%) [ +0.05% +0.00% +0.00% / -0.07% +0.54% +0.58%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.294 ms / 100) 5.562 -> 5.563 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.02% +0.04% +0.05%] index_select const : Elapsed 0.056 ms (5.562 ms / 100) 5.574 -> 5.570 ( -0.07%) [ +0.00% +0.13% +0.00% / +0.04% +0.09% -0.07%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.573 -> 5.567 ( -0.11%) [ +0.00% +0.11% +0.02% / +0.07% +0.13% -0.11%] index_select linear : Elapsed 0.056 ms (5.573 ms / 100) 5.575 -> 5.575 ( +0.00%) [ +0.00% +0.02% +0.04% / +0.09% +0.00% +0.02%] index_select reverse : Elapsed 0.056 ms (5.575 ms / 100) 5.562 -> 5.567 ( +0.09%) [ +0.00% +0.13% +0.05% / +0.09% +0.25% +0.23%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.559 -> 5.563 ( +0.07%) [ +0.09% +0.07% +0.00% / +0.07% +0.16% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.564 ms / 100) 5.576 -> 5.566 ( -0.18%) [ +0.00% +0.05% +0.05% / -0.18% +0.02% +0.00%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.574 -> 5.568 ( -0.11%) [ +0.11% +0.07% +0.00% / -0.07% -0.11% -0.07%] index_select strided 3 : Elapsed 0.056 ms (5.580 ms / 100) 5.578 -> 5.573 ( -0.09%) [ +0.00% +0.02% +0.05% / -0.07% -0.09% -0.04%] index_select strided 5 : Elapsed 0.056 ms (5.578 ms / 100) 5.571 -> 5.570 ( -0.02%) [ +0.02% +0.13% +0.00% / -0.02% +0.13% +0.02%] index_select strided 7 : Elapsed 0.056 ms (5.572 ms / 100) 5.574 -> 5.573 ( -0.02%) [ +0.18% +0.00% +0.02% / +0.14% -0.02% -0.02%] index_select strided 8 : Elapsed 0.056 ms (5.584 ms / 100) 5.577 -> 5.568 ( -0.16%) [ +0.14% +0.04% +0.00% / +0.07% -0.16% -0.02%] index_select random : Elapsed 0.056 ms (5.585 ms / 100) 5.569 -> 5.571 ( +0.04%) [ +0.16% +0.23% +0.00% / +0.16% +0.20% +0.04%] index_select random_sorted : Elapsed 0.056 ms (5.578 ms / 100) B = [5, 20, 40, 4] (stride (3200, 1, 20, 800)) A = [5, 16, 40, 4] (stride (640, 40, 1, 3200)) dim = 1 3.953 -> 3.954 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.73% +0.71%] index_add_ linear : Elapsed 0.040 ms (3.954 ms / 100) 3.827 -> 3.830 ( +0.08%) [ +0.00% +0.05% +0.00% / +0.08% +0.78% +0.76%] index_copy_ linear : Elapsed 0.038 ms (3.827 ms / 100) 3.958 -> 3.957 ( -0.03%) [ +0.10% +0.13% +0.00% / -0.03% +0.73% +0.86%] index_add_ reverse : Elapsed 0.040 ms (3.962 ms / 100) 3.834 -> 3.834 ( +0.00%) [ +0.08% +0.05% +0.00% / +0.00% +0.76% +0.81%] index_copy_ reverse : Elapsed 0.038 ms (3.837 ms / 100) 3.951 -> 3.954 ( +0.08%) [ +0.15% +0.05% +0.00% / +0.08% +0.89% +0.86%] index_add_ spread : Elapsed 0.040 ms (3.957 ms / 100) 3.826 -> 3.829 ( +0.08%) [ +0.13% +0.05% +0.00% / +0.08% +0.97% +0.91%] index_copy_ spread : Elapsed 0.038 ms (3.831 ms / 100) 3.953 -> 3.953 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.86% +0.83%] index_add_ strided 3 : Elapsed 0.040 ms (3.954 ms / 100) 3.831 -> 3.831 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.81% +0.81%] index_copy_ strided 3 : Elapsed 0.038 ms (3.831 ms / 100) 3.962 -> 3.963 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.71% +0.63%] index_add_ strided 7 : Elapsed 0.040 ms (3.963 ms / 100) 3.836 -> 3.838 ( +0.05%) [ +0.00% +0.08% +0.00% / +0.05% +0.76% +0.65%] index_copy_ strided 7 : Elapsed 0.038 ms (3.836 ms / 100) 3.953 -> 3.954 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.78% +0.73%] index_add_ perm : Elapsed 0.040 ms (3.954 ms / 100) 3.829 -> 3.829 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.76% +0.73%] index_copy_ perm : Elapsed 0.038 ms (3.829 ms / 100) 3.957 -> 3.958 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.61% +0.63%] index_add_ perm_sorted : Elapsed 0.040 ms (3.959 ms / 100) 3.834 -> 3.838 ( +0.10%) [ +0.03% +0.08% +0.00% / +0.10% +0.63% +0.63%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.835 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.09% +0.13% +0.00% / +0.13% +0.25% +0.02%] index_select const : Elapsed 0.056 ms (5.570 ms / 100) 5.591 -> 5.591 ( +0.00%) [ +0.00% +0.11% +0.05% / +0.00% +0.04% +0.23%] index_select wrap : Elapsed 0.056 ms (5.591 ms / 100) 5.589 -> 5.593 ( +0.07%) [ +0.14% +0.09% +0.00% / +0.09% +0.07% +0.20%] index_select linear : Elapsed 0.056 ms (5.597 ms / 100) 5.590 -> 5.587 ( -0.05%) [ +0.00% +0.04% +0.00% / -0.05% +0.18% +0.29%] index_select reverse : Elapsed 0.056 ms (5.590 ms / 100) 5.561 -> 5.566 ( +0.09%) [ +0.14% +0.14% +0.00% / +0.09% +0.27% +0.13%] index_select skip64 : Elapsed 0.056 ms (5.569 ms / 100) 5.566 -> 5.564 ( -0.04%) [ +0.00% +0.00% +0.11% / -0.04% +0.11% +0.02%] index_select skip256 : Elapsed 0.056 ms (5.566 ms / 100) 5.590 -> 5.590 ( +0.00%) [ +0.00% +0.05% +0.04% / +0.00% +0.09% +0.27%] index_select spread : Elapsed 0.056 ms (5.590 ms / 100) 5.589 -> 5.596 ( +0.13%) [ +0.02% +0.00% +0.09% / +0.13% +0.23% +0.13%] index_select strided 3 : Elapsed 0.056 ms (5.590 ms / 100) 5.588 -> 5.590 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.23% +0.16%] index_select strided 5 : Elapsed 0.056 ms (5.593 ms / 100) 5.588 -> 5.595 ( +0.13%) [ +0.16% +0.00% +0.02% / +0.13% +0.23% +0.34%] index_select strided 7 : Elapsed 0.056 ms (5.597 ms / 100) 5.565 -> 5.564 ( -0.02%) [ +0.13% +0.16% +0.00% / -0.02% +0.31% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.572 ms / 100) 5.587 -> 5.580 ( -0.13%) [ +0.00% +0.02% +0.04% / -0.13% +0.14% +0.02%] index_select random : Elapsed 0.056 ms (5.587 ms / 100) 5.580 -> 5.583 ( +0.05%) [ +0.00% +0.00% +0.11% / +0.05% +0.09% +0.23%] index_select random_sorted : Elapsed 0.056 ms (5.580 ms / 100) B = [5, 20, 40, 4] (stride (1, 800, 5, 200)) A = [5, 16, 40, 4] (stride (2560, 40, 1, 640)) dim = 1 3.951 -> 3.952 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.89% +0.81%] index_add_ linear : Elapsed 0.040 ms (3.954 ms / 100) 3.835 -> 3.834 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.76% +0.73%] index_copy_ linear : Elapsed 0.038 ms (3.835 ms / 100) 3.952 -> 3.953 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.76% +0.76%] index_add_ reverse : Elapsed 0.040 ms (3.953 ms / 100) 3.828 -> 3.829 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.73% +0.65%] index_copy_ reverse : Elapsed 0.038 ms (3.828 ms / 100) 3.958 -> 3.960 ( +0.05%) [ +0.08% +0.25% +0.00% / +0.05% +0.78% +0.81%] index_add_ spread : Elapsed 0.040 ms (3.961 ms / 100) 3.825 -> 3.824 ( -0.03%) [ +0.05% +0.08% +0.00% / -0.03% +0.81% +0.78%] index_copy_ spread : Elapsed 0.038 ms (3.827 ms / 100) 3.952 -> 3.953 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.78% +0.81%] index_add_ strided 3 : Elapsed 0.040 ms (3.954 ms / 100) 3.833 -> 3.833 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.70% +0.70%] index_copy_ strided 3 : Elapsed 0.038 ms (3.833 ms / 100) 3.953 -> 3.954 ( +0.03%) [ +0.08% +0.05% +0.00% / +0.03% +0.71% +0.73%] index_add_ strided 7 : Elapsed 0.040 ms (3.956 ms / 100) 3.833 -> 3.836 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.76% +0.68%] index_copy_ strided 7 : Elapsed 0.038 ms (3.836 ms / 100) 3.962 -> 3.962 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.66% +0.68%] index_add_ perm : Elapsed 0.040 ms (3.962 ms / 100) 3.828 -> 3.829 ( +0.03%) [ +0.00% +0.03% +0.05% / +0.03% +0.65% +0.73%] index_copy_ perm : Elapsed 0.038 ms (3.828 ms / 100) 3.952 -> 3.953 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.68% +0.68%] index_add_ perm_sorted : Elapsed 0.040 ms (3.955 ms / 100) 3.828 -> 3.828 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.65% +0.68%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.828 ms / 100) 5.559 -> 5.551 ( -0.14%) [ +0.05% +0.05% +0.00% / -0.14% +0.22% +0.11%] index_select const : Elapsed 0.056 ms (5.562 ms / 100) 5.587 -> 5.586 ( -0.02%) [ +0.00% +0.20% +0.18% / +0.21% +0.05% -0.02%] index_select wrap : Elapsed 0.056 ms (5.587 ms / 100) 5.586 -> 5.589 ( +0.05%) [ +0.16% +0.00% +0.13% / +0.13% +0.05% +0.11%] index_select linear : Elapsed 0.056 ms (5.595 ms / 100) 5.588 -> 5.591 ( +0.05%) [ +0.00% +0.04% +0.00% / +0.05% +0.07% +0.07%] index_select reverse : Elapsed 0.056 ms (5.588 ms / 100) 5.560 -> 5.562 ( +0.04%) [ +0.16% +0.00% +0.04% / +0.04% +0.07% +0.18%] index_select skip64 : Elapsed 0.056 ms (5.569 ms / 100) 5.561 -> 5.560 ( -0.02%) [ +0.07% +0.00% +0.05% / -0.02% +0.20% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.565 ms / 100) 5.591 -> 5.591 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.14% +0.00% +0.04%] index_select spread : Elapsed 0.056 ms (5.591 ms / 100) 5.592 -> 5.589 ( -0.05%) [ +0.00% +0.18% +0.09% / +0.07% -0.05% -0.05%] index_select strided 3 : Elapsed 0.056 ms (5.592 ms / 100) 5.591 -> 5.589 ( -0.04%) [ +0.05% +0.07% +0.00% / +0.09% +0.02% -0.04%] index_select strided 5 : Elapsed 0.056 ms (5.594 ms / 100) 5.594 -> 5.586 ( -0.14%) [ +0.00% +0.05% +0.02% / -0.05% -0.14% -0.05%] index_select strided 7 : Elapsed 0.056 ms (5.594 ms / 100) 5.568 -> 5.568 ( +0.00%) [ +0.02% +0.00% +0.04% / +0.00% +0.07% +0.02%] index_select strided 8 : Elapsed 0.056 ms (5.569 ms / 100) 5.589 -> 5.583 ( -0.11%) [ +0.16% +0.20% +0.00% / -0.05% -0.05% -0.11%] index_select random : Elapsed 0.056 ms (5.598 ms / 100) 5.585 -> 5.588 ( +0.05%) [ +0.00% +0.02% +0.11% / +0.05% +0.07% +0.14%] index_select random_sorted : Elapsed 0.056 ms (5.585 ms / 100) B = [5, 20, 40, 4] (stride (40, 200, 1, 4000)) A = [5, 16, 40, 4] (stride (40, 200, 1, 3200)) dim = 1 3.966 -> 3.966 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.81% +0.81%] index_add_ linear : Elapsed 0.040 ms (3.966 ms / 100) 3.842 -> 3.844 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.96% +0.96%] index_copy_ linear : Elapsed 0.038 ms (3.842 ms / 100) 3.975 -> 3.976 ( +0.03%) [ +0.08% +0.05% +0.00% / +0.03% +0.75% +0.73%] index_add_ reverse : Elapsed 0.040 ms (3.978 ms / 100) 3.847 -> 3.845 ( -0.05%) [ +0.00% +0.05% +0.03% / -0.05% +0.83% +0.73%] index_copy_ reverse : Elapsed 0.038 ms (3.847 ms / 100) 3.961 -> 3.960 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.86% +0.86%] index_add_ spread : Elapsed 0.040 ms (3.961 ms / 100) 3.837 -> 3.836 ( -0.03%) [ +0.00% +0.03% +0.10% / -0.03% +0.96% +0.96%] index_copy_ spread : Elapsed 0.038 ms (3.837 ms / 100) 3.974 -> 3.972 ( -0.05%) [ +0.03% +0.10% +0.00% / -0.05% +0.70% +0.68%] index_add_ strided 3 : Elapsed 0.040 ms (3.975 ms / 100) 3.846 -> 3.845 ( -0.03%) [ +0.00% +0.10% +0.03% / -0.03% +0.73% +0.68%] index_copy_ strided 3 : Elapsed 0.038 ms (3.846 ms / 100) 3.975 -> 3.978 ( +0.08%) [ +0.00% +0.10% +0.03% / +0.08% +0.73% +0.75%] index_add_ strided 7 : Elapsed 0.040 ms (3.975 ms / 100) 3.846 -> 3.849 ( +0.08%) [ +0.03% +0.16% +0.00% / +0.08% +0.83% +0.88%] index_copy_ strided 7 : Elapsed 0.038 ms (3.847 ms / 100) 3.964 -> 3.966 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.86% +0.76%] index_add_ perm : Elapsed 0.040 ms (3.964 ms / 100) 3.841 -> 3.845 ( +0.10%) [ +0.10% +0.00% +0.05% / +0.10% +1.04% +0.78%] index_copy_ perm : Elapsed 0.038 ms (3.845 ms / 100) 3.972 -> 3.972 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.78% +0.70%] index_add_ perm_sorted : Elapsed 0.040 ms (3.972 ms / 100) 3.844 -> 3.847 ( +0.08%) [ +0.00% +0.00% +0.05% / +0.08% +0.81% +0.70%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.844 ms / 100) 5.558 -> 5.554 ( -0.07%) [ +0.04% +0.07% +0.00% / +0.09% -0.07% +0.00%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.578 -> 5.583 ( +0.09%) [ +0.16% +0.11% +0.00% / +0.09% +0.23% +0.23%] index_select wrap : Elapsed 0.056 ms (5.587 ms / 100) 5.585 -> 5.582 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.02% +0.16%] index_select linear : Elapsed 0.056 ms (5.585 ms / 100) 5.578 -> 5.583 ( +0.09%) [ +0.02% +0.05% +0.00% / +0.09% +0.23% +0.23%] index_select reverse : Elapsed 0.056 ms (5.579 ms / 100) 5.557 -> 5.558 ( +0.02%) [ +0.27% +0.00% +0.05% / +0.02% +0.13% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.572 ms / 100) 5.560 -> 5.560 ( +0.00%) [ +0.02% +0.04% +0.00% / +0.02% +0.00% +0.02%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.580 -> 5.576 ( -0.07%) [ +0.00% +0.18% +0.04% / -0.07% +0.16% +0.07%] index_select spread : Elapsed 0.056 ms (5.580 ms / 100) 5.577 -> 5.586 ( +0.16%) [ +0.16% +0.00% +0.05% / +0.16% +0.22% +0.23%] index_select strided 3 : Elapsed 0.056 ms (5.586 ms / 100) 5.581 -> 5.577 ( -0.07%) [ +0.02% +0.16% +0.00% / -0.07% +0.09% +0.18%] index_select strided 5 : Elapsed 0.056 ms (5.582 ms / 100) 5.579 -> 5.584 ( +0.09%) [ +0.02% +0.00% +0.11% / +0.09% +0.18% +0.29%] index_select strided 7 : Elapsed 0.056 ms (5.580 ms / 100) 5.558 -> 5.566 ( +0.14%) [ +0.02% +0.00% +0.11% / +0.14% +0.23% +0.23%] index_select strided 8 : Elapsed 0.056 ms (5.559 ms / 100) 5.579 -> 5.584 ( +0.09%) [ +0.09% +0.00% +0.02% / +0.09% +0.23% +0.20%] index_select random : Elapsed 0.056 ms (5.584 ms / 100) 5.583 -> 5.583 ( +0.00%) [ +0.04% +0.00% +0.05% / +0.00% +0.04% +0.13%] index_select random_sorted : Elapsed 0.056 ms (5.585 ms / 100) B = [5, 20, 40, 4] (stride (20, 1, 100, 4000)) A = [5, 16, 40, 4] (stride (2560, 1, 64, 16)) dim = 1 3.895 -> 3.902 ( +0.18%) [ +0.26% +0.05% +0.00% / +0.18% +0.72% +0.64%] index_add_ linear : Elapsed 0.039 ms (3.905 ms / 100) 3.756 -> 3.758 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.61% +0.56%] index_copy_ linear : Elapsed 0.038 ms (3.757 ms / 100) 3.903 -> 3.911 ( +0.20%) [ +0.23% +0.00% +0.10% / +0.20% +0.61% +0.59%] index_add_ reverse : Elapsed 0.039 ms (3.912 ms / 100) 3.760 -> 3.761 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.74% +0.69%] index_copy_ reverse : Elapsed 0.038 ms (3.760 ms / 100) 3.923 -> 3.912 ( -0.28%) [ +0.00% +0.00% +0.00% / -0.28% +0.00% -0.13%] index_add_ spread : Elapsed 0.039 ms (3.923 ms / 100) 3.763 -> 3.766 ( +0.08%) [ +0.05% +0.03% +0.00% / +0.08% +0.43% +0.45%] index_copy_ spread : Elapsed 0.038 ms (3.765 ms / 100) 3.885 -> 3.885 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.85% +0.98%] index_add_ strided 3 : Elapsed 0.039 ms (3.885 ms / 100) 3.752 -> 3.752 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.64% +0.67%] index_copy_ strided 3 : Elapsed 0.038 ms (3.752 ms / 100) 3.888 -> 3.889 ( +0.03%) [ +0.10% +0.03% +0.00% / +0.03% +0.90% +0.77%] index_add_ strided 7 : Elapsed 0.039 ms (3.892 ms / 100) 3.752 -> 3.756 ( +0.11%) [ +0.05% +0.03% +0.00% / +0.11% +0.61% +0.53%] index_copy_ strided 7 : Elapsed 0.038 ms (3.754 ms / 100) 3.917 -> 3.916 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.05% +0.13%] index_add_ perm : Elapsed 0.039 ms (3.917 ms / 100) 3.766 -> 3.767 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.32% +0.29%] index_copy_ perm : Elapsed 0.038 ms (3.768 ms / 100) 3.915 -> 3.918 ( +0.08%) [ +0.05% +0.00% +0.20% / +0.08% +0.28% +0.28%] index_add_ perm_sorted : Elapsed 0.039 ms (3.917 ms / 100) 3.763 -> 3.766 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.64% +0.61%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.763 ms / 100) 5.483 -> 5.479 ( -0.07%) [ +0.05% +0.07% +0.00% / +0.11% -0.07% -0.02%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.488 -> 5.489 ( +0.02%) [ +0.11% +0.00% +0.18% / +0.09% +0.02% +0.13%] index_select wrap : Elapsed 0.055 ms (5.494 ms / 100) 5.488 -> 5.490 ( +0.04%) [ +0.00% +0.11% +0.07% / +0.04% +0.07% +0.05%] index_select linear : Elapsed 0.055 ms (5.488 ms / 100) 5.486 -> 5.490 ( +0.07%) [ +0.07% +0.00% +0.13% / +0.11% +0.07% +0.22%] index_select reverse : Elapsed 0.055 ms (5.490 ms / 100) 5.483 -> 5.489 ( +0.11%) [ +0.04% +0.00% +0.15% / +0.11% +0.11% +0.26%] index_select skip64 : Elapsed 0.055 ms (5.485 ms / 100) 5.483 -> 5.487 ( +0.07%) [ +0.00% +0.07% +0.04% / +0.18% +0.07% +0.16%] index_select skip256 : Elapsed 0.055 ms (5.483 ms / 100) 5.491 -> 5.487 ( -0.07%) [ +0.20% +0.09% +0.00% / +0.09% -0.05% -0.07%] index_select spread : Elapsed 0.055 ms (5.502 ms / 100) 5.494 -> 5.490 ( -0.07%) [ +0.04% +0.02% +0.00% / +0.02% -0.07% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.496 ms / 100) 5.489 -> 5.486 ( -0.05%) [ +0.26% +0.15% +0.00% / +0.05% -0.05% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.503 ms / 100) 5.489 -> 5.486 ( -0.05%) [ +0.05% +0.00% +0.09% / -0.02% -0.05% +0.04%] index_select strided 7 : Elapsed 0.055 ms (5.492 ms / 100) 5.489 -> 5.491 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +0.18% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.491 ms / 100) 5.494 -> 5.491 ( -0.05%) [ +0.02% +0.00% +0.04% / -0.05% -0.04% -0.04%] index_select random : Elapsed 0.055 ms (5.495 ms / 100) 5.495 -> 5.490 ( -0.09%) [ +0.00% +0.00% +0.07% / -0.07% -0.09% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.495 ms / 100) out_shape = [5, 16, 20, 4] in_shape = [5, 16, 40, 4] idx_dim = 2 B = [5, 16, 20, 4] (stride (1280, 1, 64, 16)) A = [5, 16, 40, 4] (stride (160, 800, 1, 40)) dim = 2 2.446 -> 2.449 ( +0.12%) [ +0.00% +0.04% +0.08% / +0.12% +0.12% +0.16%] index_select const : Elapsed 0.024 ms (2.446 ms / 100) 2.454 -> 2.457 ( +0.12%) [ +0.00% +0.00% +0.08% / +0.12% +0.33% +0.12%] index_select wrap : Elapsed 0.025 ms (2.454 ms / 100) 2.453 -> 2.457 ( +0.16%) [ +0.00% +0.20% +0.08% / +0.16% +0.16% +0.20%] index_select linear : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.456 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.20% +0.16%] index_select reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.449 -> 2.449 ( +0.00%) [ +0.12% +0.20% +0.00% / +0.00% +0.12% +0.08%] index_select skip64 : Elapsed 0.025 ms (2.452 ms / 100) 2.444 -> 2.449 ( +0.20%) [ +0.00% +0.08% +0.20% / +0.20% +0.33% +0.41%] index_select skip256 : Elapsed 0.024 ms (2.444 ms / 100) 2.462 -> 2.460 ( -0.08%) [ +0.00% +0.12% +0.04% / -0.08% -0.04% +0.08%] index_select spread : Elapsed 0.025 ms (2.462 ms / 100) 2.464 -> 2.463 ( -0.04%) [ +0.00% +0.00% +0.16% / +0.04% -0.04% +0.00%] index_select strided 3 : Elapsed 0.025 ms (2.464 ms / 100) 2.463 -> 2.461 ( -0.08%) [ +0.12% +0.08% +0.00% / -0.08% +0.00% -0.08%] index_select strided 5 : Elapsed 0.025 ms (2.466 ms / 100) 2.461 -> 2.457 ( -0.16%) [ +0.00% +0.08% +0.00% / -0.16% +0.04% +0.00%] index_select strided 7 : Elapsed 0.025 ms (2.461 ms / 100) 2.458 -> 2.458 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.37% +0.12%] index_select strided 8 : Elapsed 0.025 ms (2.462 ms / 100) 2.463 -> 2.464 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.24% +0.08%] index_select strided 16 : Elapsed 0.025 ms (2.464 ms / 100) 2.463 -> 2.461 ( -0.08%) [ +0.04% +0.00% +0.08% / +0.04% +0.24% -0.08%] index_select random : Elapsed 0.025 ms (2.464 ms / 100) 2.460 -> 2.461 ( +0.04%) [ +0.04% +0.00% +0.16% / +0.16% +0.12% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.464 -> 2.460 ( -0.16%) [ +0.00% +0.08% +0.12% / -0.08% -0.16% -0.08%] index_select perm : Elapsed 0.025 ms (2.464 ms / 100) 2.463 -> 2.457 ( -0.24%) [ +0.04% +0.08% +0.00% / +0.20% -0.20% -0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) B = [5, 16, 20, 4] (stride (1, 5, 320, 80)) A = [5, 16, 40, 4] (stride (1, 5, 320, 80)) dim = 2 2.447 -> 2.447 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.08% +0.00% +0.16%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.457 -> 2.449 ( -0.33%) [ +0.16% +0.04% +0.00% / -0.08% -0.33% -0.16%] index_select wrap : Elapsed 0.025 ms (2.461 ms / 100) 2.456 -> 2.449 ( -0.29%) [ +0.00% +0.04% +0.04% / +0.04% -0.29% -0.12%] index_select linear : Elapsed 0.025 ms (2.456 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.20% +0.24%] index_select reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.16% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.449 ms / 100) 2.445 -> 2.446 ( +0.04%) [ +0.20% +0.08% +0.00% / +0.04% +0.37% +0.12%] index_select skip256 : Elapsed 0.025 ms (2.450 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.12% +0.08% +0.00% / +0.00% +0.33% +0.24%] index_select spread : Elapsed 0.025 ms (2.453 ms / 100) 2.452 -> 2.456 ( +0.16%) [ +0.00% +0.12% +0.12% / +0.16% +0.29% +0.37%] index_select strided 3 : Elapsed 0.025 ms (2.452 ms / 100) 2.450 -> 2.445 ( -0.20%) [ +0.16% +0.04% +0.00% / -0.20% +0.20% +0.29%] index_select strided 5 : Elapsed 0.025 ms (2.454 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.16% +0.24% +0.00% / +0.20% +0.20% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.447 -> 2.448 ( +0.04%) [ +0.00% +0.08% +0.20% / +0.04% +0.04% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.447 ms / 100) 2.446 -> 2.447 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.08% +0.29%] index_select strided 16 : Elapsed 0.024 ms (2.448 ms / 100) 2.453 -> 2.450 ( -0.12%) [ +0.00% +0.04% +0.00% / -0.12% +0.16% -0.04%] index_select random : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.452 ( -0.04%) [ +0.16% +0.00% +0.00% / -0.04% +0.04% -0.04%] index_select random_sorted : Elapsed 0.025 ms (2.457 ms / 100) 2.453 -> 2.451 ( -0.08%) [ +0.00% +0.29% +0.04% / +0.37% -0.08% +0.08%] index_select perm : Elapsed 0.025 ms (2.453 ms / 100) 2.456 -> 2.453 ( -0.12%) [ +0.00% +0.16% +0.08% / -0.04% +0.00% -0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.456 ms / 100) B = [5, 16, 20, 4] (stride (320, 20, 1, 1600)) A = [5, 16, 40, 4] (stride (1, 200, 5, 3200)) dim = 2 2.397 -> 2.396 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.17% +0.25%] index_select const : Elapsed 0.024 ms (2.397 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.17% +0.04% +0.00% / +0.12% -0.04% +0.00%] index_select wrap : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.412 ( +0.04%) [ +0.08% +0.00% +0.00% / +0.21% +0.04% +0.12%] index_select linear : Elapsed 0.024 ms (2.413 ms / 100) 2.410 -> 2.410 ( +0.00%) [ +0.21% +0.00% +0.08% / +0.08% +0.00% +0.04%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.399 -> 2.398 ( -0.04%) [ +0.08% +0.00% +0.17% / +0.08% +0.04% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.401 ms / 100) 2.396 -> 2.399 ( +0.13%) [ +0.04% +0.17% +0.00% / +0.13% +0.21% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.397 ms / 100) 2.420 -> 2.420 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.17% +0.08%] index_select spread : Elapsed 0.024 ms (2.420 ms / 100) 2.421 -> 2.419 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.04% -0.08% +0.04%] index_select strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.411 -> 2.412 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.17% +0.04% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.411 ms / 100) 2.417 -> 2.419 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.08% +0.37%] index_select strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.400 -> 2.402 ( +0.08%) [ +0.00% +0.25% +0.00% / +0.13% +0.13% +0.08%] index_select strided 8 : Elapsed 0.024 ms (2.400 ms / 100) 2.400 -> 2.404 ( +0.17%) [ +0.29% +0.08% +0.00% / +0.17% +0.21% +0.37%] index_select strided 16 : Elapsed 0.024 ms (2.407 ms / 100) 2.414 -> 2.418 ( +0.17%) [ +0.12% +0.00% +0.08% / +0.17% +0.21% +0.17%] index_select random : Elapsed 0.024 ms (2.417 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.21% +0.04% +0.00% / +0.08% +0.00% +0.04%] index_select random_sorted : Elapsed 0.024 ms (2.419 ms / 100) 2.417 -> 2.415 ( -0.08%) [ +0.00% +0.00% +0.12% / +0.21% -0.08% +0.04%] index_select perm : Elapsed 0.024 ms (2.417 ms / 100) 2.419 -> 2.413 ( -0.25%) [ +0.00% +0.08% +0.12% / +0.08% +0.00% -0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [5, 16, 20, 4] (stride (20, 100, 1, 1600)) A = [5, 16, 40, 4] (stride (1, 20, 320, 5)) dim = 2 2.446 -> 2.445 ( -0.04%) [ +0.00% +0.00% +0.20% / +0.00% -0.04% +0.00%] index_select const : Elapsed 0.024 ms (2.446 ms / 100) 2.455 -> 2.450 ( -0.20%) [ +0.04% +0.00% +0.29% / +0.08% -0.16% -0.20%] index_select wrap : Elapsed 0.025 ms (2.456 ms / 100) 2.452 -> 2.446 ( -0.24%) [ +0.08% +0.04% +0.00% / -0.08% -0.24% -0.24%] index_select linear : Elapsed 0.025 ms (2.454 ms / 100) 2.451 -> 2.450 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.04% +0.16%] index_select reverse : Elapsed 0.025 ms (2.451 ms / 100) 2.447 -> 2.447 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.04% +0.08% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.448 ms / 100) 2.445 -> 2.445 ( +0.00%) [ +0.16% +0.00% +0.04% / +0.00% +0.20% +0.20%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.449 -> 2.451 ( +0.08%) [ +0.00% +0.12% +0.00% / +0.08% +0.33% +0.08%] index_select spread : Elapsed 0.024 ms (2.449 ms / 100) 2.449 -> 2.451 ( +0.08%) [ +0.00% +0.16% +0.16% / +0.08% +0.29% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.446 -> 2.449 ( +0.12%) [ +0.00% +0.12% +0.20% / +0.20% +0.12% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.446 ms / 100) 2.451 -> 2.452 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.08% +0.04% +0.20%] index_select strided 7 : Elapsed 0.025 ms (2.451 ms / 100) 2.446 -> 2.449 ( +0.12%) [ +0.00% +0.16% +0.08% / +0.16% +0.12% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.446 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.12% +0.16%] index_select strided 16 : Elapsed 0.024 ms (2.448 ms / 100) 2.451 -> 2.450 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.00% +0.08%] index_select random : Elapsed 0.025 ms (2.451 ms / 100) 2.450 -> 2.448 ( -0.08%) [ +0.00% +0.04% +0.08% / +0.04% -0.08% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.450 ms / 100) 2.448 -> 2.452 ( +0.16%) [ +0.41% +0.20% +0.00% / +0.33% +0.37% +0.16%] index_select perm : Elapsed 0.025 ms (2.458 ms / 100) 2.451 -> 2.456 ( +0.20%) [ +0.24% +0.04% +0.00% / +0.57% +0.20% +0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.457 ms / 100) B = [5, 16, 20, 4] (stride (1, 100, 5, 1600)) A = [5, 16, 40, 4] (stride (1, 20, 320, 5)) dim = 2 2.446 -> 2.445 ( -0.04%) [ +0.08% +0.00% +0.04% / -0.04% +0.33% +0.12%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.457 -> 2.454 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% -0.04% +0.00%] index_select wrap : Elapsed 0.025 ms (2.457 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.16% +0.16% +0.08%] index_select linear : Elapsed 0.025 ms (2.453 ms / 100) 2.456 -> 2.450 ( -0.24%) [ +0.04% +0.00% +0.04% / +0.08% -0.24% -0.12%] index_select reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.449 -> 2.449 ( +0.00%) [ +0.00% +0.12% +0.04% / +0.00% +0.04% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.449 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.00% +0.04% +0.12% / +0.20% +0.20% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.446 ms / 100) 2.452 -> 2.455 ( +0.12%) [ +0.04% +0.00% +0.29% / +0.12% +0.20% +0.20%] index_select spread : Elapsed 0.025 ms (2.453 ms / 100) 2.455 -> 2.453 ( -0.08%) [ +0.00% +0.00% +0.08% / +0.20% +0.16% -0.08%] index_select strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.445 -> 2.449 ( +0.16%) [ +0.25% +0.00% +0.33% / +0.25% +0.20% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.452 -> 2.452 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.04% +0.16% +0.00%] index_select strided 7 : Elapsed 0.025 ms (2.454 ms / 100) 2.446 -> 2.449 ( +0.12%) [ +0.04% +0.16% +0.00% / +0.16% +0.16% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.447 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.12% +0.20% +0.00% / +0.20% +0.12% +0.16%] index_select strided 16 : Elapsed 0.025 ms (2.451 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.00% +0.33% +0.20%] index_select random : Elapsed 0.025 ms (2.452 ms / 100) 2.453 -> 2.451 ( -0.08%) [ +0.24% +0.08% +0.00% / +0.00% +0.04% -0.08%] index_select random_sorted : Elapsed 0.025 ms (2.459 ms / 100) 2.453 -> 2.449 ( -0.16%) [ +0.00% +0.16% +0.08% / +0.00% -0.16% +0.04%] index_select perm : Elapsed 0.025 ms (2.453 ms / 100) 2.457 -> 2.451 ( -0.24%) [ +0.08% +0.24% +0.00% / -0.04% -0.24% -0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) out_shape = [5, 16, 40, 20] in_shape = [5, 16, 40, 4] idx_dim = 3 B = [5, 16, 40, 20] (stride (12800, 20, 320, 1)) A = [5, 16, 40, 4] (stride (1, 5, 80, 3200)) dim = 3 2.240 -> 2.242 ( +0.09%) [ +0.13% +0.00% +0.18% / +0.09% +0.22% +0.31%] index_add_ linear : Elapsed 0.022 ms (2.243 ms / 100) 2.194 -> 2.200 ( +0.27%) [ +0.36% +0.00% +0.14% / +0.27% +0.59% +0.50%] index_copy_ linear : Elapsed 0.022 ms (2.202 ms / 100) 2.237 -> 2.244 ( +0.31%) [ +0.04% +0.31% +0.00% / +0.31% +0.89% +0.89%] index_add_ reverse : Elapsed 0.022 ms (2.238 ms / 100) 2.194 -> 2.201 ( +0.32%) [ +0.00% +0.23% +0.09% / +0.46% +0.50% +0.32%] index_copy_ reverse : Elapsed 0.022 ms (2.194 ms / 100) 2.276 -> 2.285 ( +0.40%) [ +0.00% +0.44% +0.35% / +0.40% +0.66% +0.88%] index_add_ spread : Elapsed 0.023 ms (2.276 ms / 100) 2.293 -> 2.294 ( +0.04%) [ +0.00% +0.22% +0.09% / +0.04% +0.65% +0.74%] index_copy_ spread : Elapsed 0.023 ms (2.293 ms / 100) 2.273 -> 2.277 ( +0.18%) [ +0.00% +0.00% +0.57% / +0.18% +0.53% +0.48%] index_add_ strided 3 : Elapsed 0.023 ms (2.273 ms / 100) 2.263 -> 2.270 ( +0.31%) [ +0.00% +0.18% +0.18% / +0.31% +0.71% +0.49%] index_copy_ strided 3 : Elapsed 0.023 ms (2.263 ms / 100) 2.280 -> 2.288 ( +0.35%) [ +0.18% +0.00% +0.00% / +0.35% +0.66% +0.48%] index_add_ strided 7 : Elapsed 0.023 ms (2.284 ms / 100) 2.293 -> 2.300 ( +0.31%) [ +0.00% +0.00% +0.00% / +0.31% +0.78% +0.70%] index_copy_ strided 7 : Elapsed 0.023 ms (2.293 ms / 100) 2.280 -> 2.279 ( -0.04%) [ +0.35% +0.04% +0.00% / -0.04% +0.22% +0.35%] index_add_ perm : Elapsed 0.023 ms (2.288 ms / 100) 2.294 -> 2.295 ( +0.04%) [ +0.17% +0.00% +0.00% / +0.04% +0.35% +0.48%] index_copy_ perm : Elapsed 0.023 ms (2.298 ms / 100) 2.278 -> 2.287 ( +0.40%) [ +0.22% +0.00% +0.35% / +0.40% +0.70% +0.40%] index_add_ perm_sorted : Elapsed 0.023 ms (2.283 ms / 100) 2.294 -> 2.301 ( +0.31%) [ +0.17% +0.22% +0.00% / +0.39% +0.31% +0.35%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.298 ms / 100) 9.243 -> 9.253 ( +0.11%) [ +0.10% +0.08% +0.00% / +0.22% +0.29% +0.11%] index_select const : Elapsed 0.093 ms (9.252 ms / 100) 9.306 -> 9.307 ( +0.01%) [ +0.05% +0.00% +0.14% / +0.03% +0.01% +0.09%] index_select wrap : Elapsed 0.093 ms (9.311 ms / 100) 9.284 -> 9.290 ( +0.06%) [ +0.12% +0.02% +0.00% / +0.12% +0.06% +0.30%] index_select linear : Elapsed 0.093 ms (9.295 ms / 100) 9.297 -> 9.298 ( +0.01%) [ +0.09% +0.00% +0.06% / +0.01% +0.16% +0.12%] index_select reverse : Elapsed 0.093 ms (9.305 ms / 100) 9.228 -> 9.237 ( +0.10%) [ +0.38% +0.34% +0.00% / +0.10% +0.39% +0.47%] index_select skip64 : Elapsed 0.093 ms (9.263 ms / 100) 9.246 -> 9.258 ( +0.13%) [ +0.11% +0.00% +0.05% / +0.13% +0.35% +0.15%] index_select skip256 : Elapsed 0.093 ms (9.256 ms / 100) 9.304 -> 9.294 ( -0.11%) [ +0.03% +0.06% +0.00% / -0.11% -0.03% -0.06%] index_select spread : Elapsed 0.093 ms (9.307 ms / 100) 9.303 -> 9.299 ( -0.04%) [ +0.02% +0.16% +0.00% / +0.04% -0.03% -0.04%] index_select strided 3 : Elapsed 0.093 ms (9.305 ms / 100) 9.301 -> 9.289 ( -0.13%) [ +0.15% +0.00% +0.05% / +0.12% +0.04% -0.13%] index_select random : Elapsed 0.093 ms (9.315 ms / 100) 9.280 -> 9.293 ( +0.14%) [ +0.06% +0.00% +0.23% / +0.15% +0.26% +0.14%] index_select random_sorted : Elapsed 0.093 ms (9.286 ms / 100) B = [5, 16, 40, 20] (stride (800, 4000, 1, 40)) A = [5, 16, 40, 4] (stride (2560, 160, 1, 40)) dim = 3 1.999 -> 1.997 ( -0.10%) [ +0.15% +0.00% +0.05% / -0.10% +0.85% +0.85%] index_add_ linear : Elapsed 0.020 ms (2.002 ms / 100) 1.950 -> 1.953 ( +0.15%) [ +0.05% +0.00% +0.21% / +0.15% +1.18% +1.33%] index_copy_ linear : Elapsed 0.020 ms (1.951 ms / 100) 1.992 -> 1.998 ( +0.30%) [ +0.00% +0.30% +0.40% / +0.30% +0.85% +1.05%] index_add_ reverse : Elapsed 0.020 ms (1.992 ms / 100) 1.949 -> 1.951 ( +0.10%) [ +0.31% +0.00% +0.26% / +0.10% +1.13% +1.03%] index_copy_ reverse : Elapsed 0.020 ms (1.955 ms / 100) 1.994 -> 1.996 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.90% +0.75%] index_add_ spread : Elapsed 0.020 ms (1.996 ms / 100) 1.956 -> 1.955 ( -0.05%) [ +0.36% +0.31% +0.00% / -0.05% +1.02% +1.07%] index_copy_ spread : Elapsed 0.020 ms (1.963 ms / 100) 2.002 -> 2.004 ( +0.10%) [ +0.00% +0.25% +0.00% / +0.10% +1.00% +0.80%] index_add_ strided 3 : Elapsed 0.020 ms (2.002 ms / 100) 1.962 -> 1.963 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +1.12% +0.87%] index_copy_ strided 3 : Elapsed 0.020 ms (1.963 ms / 100) 1.996 -> 2.003 ( +0.35%) [ +0.45% +0.25% +0.00% / +0.35% +1.05% +1.00%] index_add_ strided 7 : Elapsed 0.020 ms (2.005 ms / 100) 1.951 -> 1.957 ( +0.31%) [ +0.51% +0.21% +0.00% / +0.31% +1.28% +1.08%] index_copy_ strided 7 : Elapsed 0.020 ms (1.961 ms / 100) 1.997 -> 1.999 ( +0.10%) [ +0.05% +0.15% +0.00% / +0.10% +0.80% +0.80%] index_add_ perm : Elapsed 0.020 ms (1.998 ms / 100) 1.960 -> 1.964 ( +0.20%) [ +0.05% +0.00% +0.00% / +0.20% +0.87% +0.82%] index_copy_ perm : Elapsed 0.020 ms (1.961 ms / 100) 2.005 -> 2.002 ( -0.15%) [ +0.10% +0.00% +0.00% / -0.15% +0.60% +0.65%] index_add_ perm_sorted : Elapsed 0.020 ms (2.007 ms / 100) 1.963 -> 1.966 ( +0.15%) [ +0.20% +0.10% +0.00% / +0.15% +0.82% +0.71%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.967 ms / 100) 8.724 -> 8.718 ( -0.07%) [ +0.05% +0.00% +0.08% / -0.07% +0.16% +0.07%] index_select const : Elapsed 0.087 ms (8.728 ms / 100) 8.780 -> 8.782 ( +0.02%) [ +0.17% +0.09% +0.00% / +0.19% +0.16% +0.02%] index_select wrap : Elapsed 0.088 ms (8.795 ms / 100) 8.746 -> 8.750 ( +0.05%) [ +0.26% +0.00% +0.16% / +0.05% +0.15% +0.11%] index_select linear : Elapsed 0.088 ms (8.769 ms / 100) 8.767 -> 8.771 ( +0.05%) [ +0.00% +0.06% +0.11% / +0.05% +0.09% +0.17%] index_select reverse : Elapsed 0.088 ms (8.767 ms / 100) 8.719 -> 8.713 ( -0.07%) [ +0.09% +0.15% +0.00% / -0.07% +0.24% +0.14%] index_select skip64 : Elapsed 0.087 ms (8.727 ms / 100) 8.716 -> 8.715 ( -0.01%) [ +0.16% +0.00% +0.07% / -0.01% +0.01% +0.05%] index_select skip256 : Elapsed 0.087 ms (8.730 ms / 100) 8.770 -> 8.772 ( +0.02%) [ +0.00% +0.11% +0.21% / +0.13% +0.02% +0.16%] index_select spread : Elapsed 0.088 ms (8.770 ms / 100) 8.785 -> 8.779 ( -0.07%) [ +0.00% +0.05% +0.03% / -0.06% -0.07% -0.03%] index_select strided 3 : Elapsed 0.088 ms (8.785 ms / 100) 8.788 -> 8.780 ( -0.09%) [ +0.01% +0.00% +0.02% / +0.15% +0.00% -0.09%] index_select random : Elapsed 0.088 ms (8.789 ms / 100) 8.765 -> 8.767 ( +0.02%) [ +0.09% +0.00% +0.19% / +0.06% +0.08% +0.02%] index_select random_sorted : Elapsed 0.088 ms (8.773 ms / 100) B = [5, 16, 40, 20] (stride (1, 4000, 5, 200)) A = [5, 16, 40, 4] (stride (4, 20, 320, 1)) dim = 3 2.233 -> 2.237 ( +0.18%) [ +0.09% +0.27% +0.00% / +0.40% +0.22% +0.18%] index_add_ linear : Elapsed 0.022 ms (2.235 ms / 100) 2.167 -> 2.174 ( +0.32%) [ +0.00% +0.42% +0.18% / +0.42% +0.32% +0.42%] index_copy_ linear : Elapsed 0.022 ms (2.167 ms / 100) 2.239 -> 2.239 ( +0.00%) [ +0.00% +0.27% +0.13% / +0.00% +0.31% +0.00%] index_add_ reverse : Elapsed 0.022 ms (2.239 ms / 100) 2.173 -> 2.171 ( -0.09%) [ +0.09% +0.00% +0.14% / -0.09% +0.09% +0.23%] index_copy_ reverse : Elapsed 0.022 ms (2.175 ms / 100) 2.234 -> 2.237 ( +0.13%) [ +0.00% +0.31% +0.00% / +0.13% +0.36% +0.36%] index_add_ spread : Elapsed 0.022 ms (2.234 ms / 100) 2.166 -> 2.172 ( +0.28%) [ +0.00% +0.42% +0.05% / +0.37% +0.28% +0.60%] index_copy_ spread : Elapsed 0.022 ms (2.166 ms / 100) 2.234 -> 2.233 ( -0.04%) [ +0.09% +0.00% +0.18% / +0.18% -0.04% +0.40%] index_add_ strided 3 : Elapsed 0.022 ms (2.236 ms / 100) 2.170 -> 2.169 ( -0.05%) [ +0.23% +0.00% +0.14% / +0.41% -0.05% +0.32%] index_copy_ strided 3 : Elapsed 0.022 ms (2.175 ms / 100) 2.235 -> 2.234 ( -0.04%) [ +0.13% +0.00% +0.09% / +0.04% -0.04% +0.04%] index_add_ strided 7 : Elapsed 0.022 ms (2.238 ms / 100) 2.172 -> 2.172 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.14% +0.00% +0.14%] index_copy_ strided 7 : Elapsed 0.022 ms (2.172 ms / 100) 2.232 -> 2.235 ( +0.13%) [ +0.22% +0.00% +0.18% / +0.27% +0.22% +0.13%] index_add_ perm : Elapsed 0.022 ms (2.237 ms / 100) 2.166 -> 2.170 ( +0.18%) [ +0.00% +0.05% +0.32% / +0.18% +0.28% +0.23%] index_copy_ perm : Elapsed 0.022 ms (2.166 ms / 100) 2.234 -> 2.235 ( +0.04%) [ +0.09% +0.00% +0.00% / +0.09% +0.04% +0.22%] index_add_ perm_sorted : Elapsed 0.022 ms (2.236 ms / 100) 2.168 -> 2.167 ( -0.05%) [ +0.00% +0.14% +0.28% / -0.05% +0.14% +0.23%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.168 ms / 100) 9.251 -> 9.246 ( -0.05%) [ +0.00% +0.01% +0.04% / -0.03% -0.05% +0.08%] index_select const : Elapsed 0.093 ms (9.251 ms / 100) 9.250 -> 9.246 ( -0.04%) [ +0.04% +0.04% +0.00% / +0.13% +0.03% -0.04%] index_select wrap : Elapsed 0.093 ms (9.254 ms / 100) 9.237 -> 9.256 ( +0.21%) [ +0.00% +0.42% +0.15% / +0.21% +0.31% +0.22%] index_select linear : Elapsed 0.092 ms (9.237 ms / 100) 9.249 -> 9.255 ( +0.06%) [ +0.14% +0.01% +0.00% / +0.10% +0.06% +0.06%] index_select reverse : Elapsed 0.093 ms (9.262 ms / 100) 9.245 -> 9.254 ( +0.10%) [ +0.03% +0.18% +0.00% / +0.10% +0.23% +0.10%] index_select skip64 : Elapsed 0.092 ms (9.248 ms / 100) 9.247 -> 9.245 ( -0.02%) [ +0.01% +0.00% +0.10% / +0.10% -0.02% +0.18%] index_select skip256 : Elapsed 0.092 ms (9.248 ms / 100) 9.251 -> 9.235 ( -0.17%) [ +0.02% +0.00% +0.02% / -0.17% +0.31% +0.04%] index_select spread : Elapsed 0.093 ms (9.253 ms / 100) 9.245 -> 9.243 ( -0.02%) [ +0.26% +0.00% +0.04% / +0.00% +0.15% -0.02%] index_select strided 3 : Elapsed 0.093 ms (9.269 ms / 100) 9.243 -> 9.238 ( -0.05%) [ +0.01% +0.00% +0.13% / -0.05% +0.21% +0.05%] index_select random : Elapsed 0.092 ms (9.244 ms / 100) 9.235 -> 9.247 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.26% +0.27%] index_select random_sorted : Elapsed 0.092 ms (9.247 ms / 100) B = [5, 16, 40, 20] (stride (1, 4000, 5, 200)) A = [5, 16, 40, 4] (stride (40, 200, 1, 3200)) dim = 3 2.077 -> 2.079 ( +0.10%) [ +0.24% +0.29% +0.00% / +0.10% +0.29% +0.43%] index_add_ linear : Elapsed 0.021 ms (2.082 ms / 100) 2.027 -> 2.031 ( +0.20%) [ +0.25% +0.00% +0.15% / +0.20% +0.89% +0.99%] index_copy_ linear : Elapsed 0.020 ms (2.032 ms / 100) 2.078 -> 2.077 ( -0.05%) [ +0.00% +0.10% +0.00% / -0.05% +0.43% +0.34%] index_add_ reverse : Elapsed 0.021 ms (2.078 ms / 100) 2.031 -> 2.029 ( -0.10%) [ +0.10% +0.00% +0.00% / -0.10% +0.54% +0.74%] index_copy_ reverse : Elapsed 0.020 ms (2.033 ms / 100) 2.075 -> 2.075 ( +0.00%) [ +0.00% +0.24% +0.19% / +0.00% +0.10% +0.24%] index_add_ spread : Elapsed 0.021 ms (2.075 ms / 100) 2.031 -> 2.031 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.54% +0.69%] index_copy_ spread : Elapsed 0.020 ms (2.032 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.00% +0.05% +0.14% / +0.05% +0.29% +0.34%] index_add_ strided 3 : Elapsed 0.021 ms (2.089 ms / 100) 2.041 -> 2.040 ( -0.05%) [ +0.15% +0.00% +0.00% / -0.05% +0.64% +0.78%] index_copy_ strided 3 : Elapsed 0.020 ms (2.044 ms / 100) 2.084 -> 2.082 ( -0.10%) [ +0.05% +0.00% +0.24% / -0.10% +0.14% +0.29%] index_add_ strided 7 : Elapsed 0.021 ms (2.085 ms / 100) 2.031 -> 2.032 ( +0.05%) [ +0.00% +0.30% +0.15% / +0.05% +1.08% +0.64%] index_copy_ strided 7 : Elapsed 0.020 ms (2.031 ms / 100) 2.079 -> 2.078 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.43% +0.48%] index_add_ perm : Elapsed 0.021 ms (2.081 ms / 100) 2.034 -> 2.036 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.88% +0.74%] index_copy_ perm : Elapsed 0.020 ms (2.035 ms / 100) 2.073 -> 2.076 ( +0.14%) [ +0.34% +0.00% +0.05% / +0.14% +0.48% +0.68%] index_add_ perm_sorted : Elapsed 0.021 ms (2.080 ms / 100) 2.029 -> 2.032 ( +0.15%) [ +0.00% +0.30% +0.05% / +0.15% +0.84% +0.84%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.029 ms / 100) 9.172 -> 9.177 ( +0.05%) [ +0.07% +0.01% +0.00% / +0.05% +0.32% +0.05%] index_select const : Elapsed 0.092 ms (9.178 ms / 100) 9.241 -> 9.250 ( +0.10%) [ +0.00% +0.17% +0.02% / +0.10% +0.58% +0.42%] index_select wrap : Elapsed 0.092 ms (9.241 ms / 100) 9.215 -> 9.212 ( -0.03%) [ +0.00% +0.05% +0.05% / -0.03% +0.14% +0.05%] index_select linear : Elapsed 0.092 ms (9.215 ms / 100) 9.214 -> 9.216 ( +0.02%) [ +0.05% +0.10% +0.00% / +0.02% +0.30% +0.34%] index_select reverse : Elapsed 0.092 ms (9.219 ms / 100) 9.163 -> 9.163 ( +0.00%) [ +0.00% +0.08% +0.11% / +0.00% +0.48% +0.20%] index_select skip64 : Elapsed 0.092 ms (9.163 ms / 100) 9.167 -> 9.179 ( +0.13%) [ +0.03% +0.00% +0.16% / +0.13% +0.14% +0.19%] index_select skip256 : Elapsed 0.092 ms (9.170 ms / 100) 9.212 -> 9.224 ( +0.13%) [ +0.04% +0.00% +0.05% / +0.13% +0.63% +0.59%] index_select spread : Elapsed 0.092 ms (9.216 ms / 100) 9.250 -> 9.260 ( +0.11%) [ +0.15% +0.13% +0.00% / +0.11% +0.29% +0.29%] index_select strided 3 : Elapsed 0.093 ms (9.264 ms / 100) 9.244 -> 9.263 ( +0.21%) [ +0.16% +0.14% +0.00% / +0.21% +0.50% +0.28%] index_select random : Elapsed 0.093 ms (9.259 ms / 100) 9.210 -> 9.229 ( +0.21%) [ +0.00% +0.11% +0.09% / +0.21% +0.42% +0.48%] index_select random_sorted : Elapsed 0.092 ms (9.210 ms / 100) B = [5, 16, 40, 20] (stride (640, 1, 16, 3200)) A = [5, 16, 40, 4] (stride (160, 800, 4, 1)) dim = 3 2.070 -> 2.073 ( +0.14%) [ +0.34% +0.43% +0.00% / +0.14% +0.29% +0.14%] index_add_ linear : Elapsed 0.021 ms (2.077 ms / 100) 2.020 -> 2.024 ( +0.20%) [ +0.15% +0.35% +0.00% / +0.25% +0.20% +0.20%] index_copy_ linear : Elapsed 0.020 ms (2.023 ms / 100) 2.047 -> 2.045 ( -0.10%) [ +0.00% +0.15% +0.00% / -0.10% +0.39% +0.44%] index_add_ reverse : Elapsed 0.020 ms (2.047 ms / 100) 2.003 -> 2.003 ( +0.00%) [ +0.05% +0.15% +0.00% / +0.00% +0.50% +0.40%] index_copy_ reverse : Elapsed 0.020 ms (2.004 ms / 100) 2.065 -> 2.059 ( -0.29%) [ +0.00% +0.00% +0.24% / -0.05% -0.29% -0.10%] index_add_ spread : Elapsed 0.021 ms (2.065 ms / 100) 2.011 -> 2.011 ( +0.00%) [ +0.25% +0.10% +0.00% / +0.10% +0.00% +0.25%] index_copy_ spread : Elapsed 0.020 ms (2.016 ms / 100) 2.072 -> 2.073 ( +0.05%) [ +0.00% +0.19% +0.14% / +0.24% +0.29% +0.05%] index_add_ strided 3 : Elapsed 0.021 ms (2.072 ms / 100) 2.023 -> 2.027 ( +0.20%) [ +0.40% +0.49% +0.00% / +0.30% +0.40% +0.20%] index_copy_ strided 3 : Elapsed 0.020 ms (2.031 ms / 100) 2.062 -> 2.064 ( +0.10%) [ +0.10% +0.15% +0.00% / +0.10% +0.53% +0.73%] index_add_ strided 7 : Elapsed 0.021 ms (2.064 ms / 100) 2.015 -> 2.018 ( +0.15%) [ +0.15% +0.25% +0.00% / +0.15% +0.45% +0.45%] index_copy_ strided 7 : Elapsed 0.020 ms (2.018 ms / 100) 2.059 -> 2.065 ( +0.29%) [ +0.00% +0.00% +0.10% / +0.29% +0.49% +0.63%] index_add_ perm : Elapsed 0.021 ms (2.059 ms / 100) 2.011 -> 2.012 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.40% +0.60%] index_copy_ perm : Elapsed 0.020 ms (2.011 ms / 100) 2.058 -> 2.059 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.78% +0.49%] index_add_ perm_sorted : Elapsed 0.021 ms (2.059 ms / 100) 2.007 -> 2.012 ( +0.25%) [ +0.00% +0.20% +0.15% / +0.25% +0.70% +0.85%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.007 ms / 100) 9.187 -> 9.176 ( -0.12%) [ +0.00% +0.07% +0.20% / +0.11% -0.02% -0.12%] index_select const : Elapsed 0.092 ms (9.187 ms / 100) 9.189 -> 9.191 ( +0.02%) [ +0.02% +0.00% +0.08% / +0.09% +0.02% +0.12%] index_select wrap : Elapsed 0.092 ms (9.191 ms / 100) 9.177 -> 9.174 ( -0.03%) [ +0.26% +0.00% +0.33% / -0.03% +0.05% +0.09%] index_select linear : Elapsed 0.092 ms (9.201 ms / 100) 9.176 -> 9.178 ( +0.02%) [ +0.17% +0.00% +0.17% / +0.02% +0.23% +0.03%] index_select reverse : Elapsed 0.092 ms (9.192 ms / 100) 9.177 -> 9.170 ( -0.08%) [ +0.00% +0.04% +0.22% / +0.19% +0.17% -0.08%] index_select skip64 : Elapsed 0.092 ms (9.177 ms / 100) 9.176 -> 9.186 ( +0.11%) [ +0.00% +0.11% +0.23% / +0.24% +0.27% +0.11%] index_select skip256 : Elapsed 0.092 ms (9.176 ms / 100) 9.180 -> 9.175 ( -0.05%) [ +0.13% +0.00% +0.24% / +0.17% +0.07% -0.05%] index_select spread : Elapsed 0.092 ms (9.192 ms / 100) 9.192 -> 9.192 ( +0.00%) [ +0.22% +0.04% +0.00% / +0.17% +0.00% +0.05%] index_select strided 3 : Elapsed 0.092 ms (9.212 ms / 100) 9.182 -> 9.187 ( +0.05%) [ +0.28% +0.27% +0.00% / +0.05% +0.08% +0.09%] index_select random : Elapsed 0.092 ms (9.208 ms / 100) 9.178 -> 9.180 ( +0.02%) [ +0.10% +0.20% +0.00% / +0.08% +0.02% +0.27%] index_select random_sorted : Elapsed 0.092 ms (9.187 ms / 100) out_shape = [20, 40, 4, 16] in_shape = [5, 40, 4, 16] idx_dim = 0 B = [20, 40, 4, 16] (stride (2560, 1, 640, 40)) A = [5, 40, 4, 16] (stride (40, 1, 200, 800)) dim = 0 1.848 -> 1.851 ( +0.16%) [ +0.00% +0.00% +0.11% / +0.16% +0.70% +0.49%] index_add_ linear : Elapsed 0.018 ms (1.848 ms / 100) 1.794 -> 1.807 ( +0.72%) [ +0.00% +0.17% +0.67% / +0.72% +1.00% +1.06%] index_copy_ linear : Elapsed 0.018 ms (1.794 ms / 100) 1.849 -> 1.853 ( +0.22%) [ +0.05% +0.00% +0.16% / +0.22% +0.65% +0.43%] index_add_ reverse : Elapsed 0.019 ms (1.850 ms / 100) 1.801 -> 1.807 ( +0.33%) [ +0.06% +0.00% +0.33% / +0.33% +0.89% +0.78%] index_copy_ reverse : Elapsed 0.018 ms (1.802 ms / 100) 1.831 -> 1.833 ( +0.11%) [ +0.00% +0.11% +0.11% / +0.11% +1.20% +0.98%] index_add_ spread : Elapsed 0.018 ms (1.831 ms / 100) 1.782 -> 1.788 ( +0.34%) [ +0.00% +0.06% +0.22% / +0.34% +1.29% +1.52%] index_copy_ spread : Elapsed 0.018 ms (1.782 ms / 100) 1.833 -> 1.836 ( +0.16%) [ +0.00% +0.27% +0.27% / +0.16% +1.47% +1.47%] index_add_ strided 3 : Elapsed 0.018 ms (1.833 ms / 100) 1.784 -> 1.790 ( +0.34%) [ +0.06% +0.00% +0.50% / +0.34% +1.91% +1.96%] index_copy_ strided 3 : Elapsed 0.018 ms (1.785 ms / 100) 1.828 -> 1.836 ( +0.44%) [ +0.27% +0.00% +0.49% / +0.44% +1.20% +1.09%] index_add_ strided 7 : Elapsed 0.018 ms (1.833 ms / 100) 1.780 -> 1.791 ( +0.62%) [ +0.06% +0.00% +0.62% / +0.62% +1.35% +1.01%] index_copy_ strided 7 : Elapsed 0.018 ms (1.781 ms / 100) 1.821 -> 1.829 ( +0.44%) [ +0.00% +0.00% +0.49% / +0.44% +2.20% +2.20%] index_add_ perm : Elapsed 0.018 ms (1.821 ms / 100) 1.769 -> 1.781 ( +0.68%) [ +0.00% +0.06% +0.68% / +0.68% +2.66% +2.43%] index_copy_ perm : Elapsed 0.018 ms (1.769 ms / 100) 1.835 -> 1.838 ( +0.16%) [ +0.00% +0.11% +0.33% / +0.16% +1.58% +1.74%] index_add_ perm_sorted : Elapsed 0.018 ms (1.835 ms / 100) 1.774 -> 1.798 ( +1.35%) [ +0.00% +0.39% +0.96% / +1.35% +2.65% +2.71%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.774 ms / 100) 8.201 -> 8.211 ( +0.12%) [ +0.00% +0.16% +0.13% / +0.23% +0.24% +0.12%] index_select const : Elapsed 0.082 ms (8.201 ms / 100) 8.232 -> 8.231 ( -0.01%) [ +0.09% +0.21% +0.00% / +0.17% -0.01% +0.19%] index_select wrap : Elapsed 0.082 ms (8.239 ms / 100) 8.231 -> 8.231 ( +0.00%) [ +0.06% +0.36% +0.00% / +0.00% +0.19% +0.13%] index_select linear : Elapsed 0.082 ms (8.236 ms / 100) 8.221 -> 8.239 ( +0.22%) [ +0.00% +0.26% +0.15% / +0.28% +0.22% +0.38%] index_select reverse : Elapsed 0.082 ms (8.221 ms / 100) 8.205 -> 8.199 ( -0.07%) [ +0.04% +0.00% +0.13% / -0.07% +0.20% +0.34%] index_select skip64 : Elapsed 0.082 ms (8.208 ms / 100) 8.202 -> 8.202 ( +0.00%) [ +0.10% +0.00% +0.09% / +0.00% +0.29% +0.06%] index_select skip256 : Elapsed 0.082 ms (8.210 ms / 100) 8.234 -> 8.240 ( +0.07%) [ +0.11% +0.15% +0.00% / +0.35% +0.07% +0.60%] index_select spread : Elapsed 0.082 ms (8.243 ms / 100) 8.230 -> 8.234 ( +0.05%) [ +0.00% +0.33% +0.09% / +0.23% +0.13% +0.05%] index_select strided 3 : Elapsed 0.082 ms (8.230 ms / 100) 8.227 -> 8.242 ( +0.18%) [ +0.23% +0.15% +0.00% / +0.18% +0.41% +0.35%] index_select random : Elapsed 0.082 ms (8.246 ms / 100) 8.235 -> 8.249 ( +0.17%) [ +0.07% +0.15% +0.00% / +0.21% +0.17% +0.26%] index_select random_sorted : Elapsed 0.082 ms (8.241 ms / 100) B = [20, 40, 4, 16] (stride (2560, 4, 1, 160)) A = [5, 40, 4, 16] (stride (2560, 1, 40, 160)) dim = 0 1.824 -> 1.829 ( +0.27%) [ +0.00% +0.33% +0.16% / +0.27% +0.66% +0.55%] index_add_ linear : Elapsed 0.018 ms (1.824 ms / 100) 1.771 -> 1.771 ( +0.00%) [ +0.00% +0.17% +0.17% / +0.00% +0.34% +0.62%] index_copy_ linear : Elapsed 0.018 ms (1.771 ms / 100) 1.828 -> 1.829 ( +0.05%) [ +0.05% +0.27% +0.00% / +0.05% +0.38% +0.38%] index_add_ reverse : Elapsed 0.018 ms (1.829 ms / 100) 1.768 -> 1.769 ( +0.06%) [ +0.06% +0.00% +0.17% / +0.06% +0.68% +0.79%] index_copy_ reverse : Elapsed 0.018 ms (1.769 ms / 100) 1.826 -> 1.827 ( +0.05%) [ +0.00% +0.22% +0.05% / +0.05% +1.10% +1.37%] index_add_ spread : Elapsed 0.018 ms (1.826 ms / 100) 1.770 -> 1.771 ( +0.06%) [ +0.11% +0.00% +0.00% / +0.06% +0.90% +1.02%] index_copy_ spread : Elapsed 0.018 ms (1.772 ms / 100) 1.823 -> 1.822 ( -0.05%) [ +0.16% +0.05% +0.00% / -0.05% +0.93% +0.82%] index_add_ strided 3 : Elapsed 0.018 ms (1.826 ms / 100) 1.770 -> 1.770 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +1.02% +0.79%] index_copy_ strided 3 : Elapsed 0.018 ms (1.770 ms / 100) 1.829 -> 1.831 ( +0.11%) [ +0.22% +0.00% +0.05% / +0.11% +0.93% +0.66%] index_add_ strided 7 : Elapsed 0.018 ms (1.833 ms / 100) 1.775 -> 1.776 ( +0.06%) [ +0.11% +0.06% +0.00% / +0.06% +0.39% +0.45%] index_copy_ strided 7 : Elapsed 0.018 ms (1.777 ms / 100) 1.823 -> 1.825 ( +0.11%) [ +0.00% +0.11% +0.05% / +0.11% +0.88% +0.99%] index_add_ perm : Elapsed 0.018 ms (1.823 ms / 100) 1.771 -> 1.774 ( +0.17%) [ +0.06% +0.06% +0.00% / +0.17% +0.73% +0.79%] index_copy_ perm : Elapsed 0.018 ms (1.772 ms / 100) 1.820 -> 1.818 ( -0.11%) [ +0.11% +0.22% +0.00% / -0.11% +0.82% +0.71%] index_add_ perm_sorted : Elapsed 0.018 ms (1.822 ms / 100) 1.765 -> 1.766 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.91% +0.85%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.765 ms / 100) 8.201 -> 8.209 ( +0.10%) [ +0.04% +0.32% +0.00% / +0.16% +0.23% +0.10%] index_select const : Elapsed 0.082 ms (8.204 ms / 100) 8.221 -> 8.220 ( -0.01%) [ +0.30% +0.00% +0.10% / -0.01% +0.18% +0.24%] index_select wrap : Elapsed 0.082 ms (8.246 ms / 100) 8.228 -> 8.207 ( -0.26%) [ +0.16% +0.13% +0.00% / -0.26% +0.33% +0.21%] index_select linear : Elapsed 0.082 ms (8.241 ms / 100) 8.214 -> 8.215 ( +0.01%) [ +0.15% +0.06% +0.00% / +0.30% +0.01% +0.23%] index_select reverse : Elapsed 0.082 ms (8.226 ms / 100) 8.190 -> 8.196 ( +0.07%) [ +0.17% +0.00% +0.15% / +0.07% +0.45% +0.35%] index_select skip64 : Elapsed 0.082 ms (8.204 ms / 100) 8.197 -> 8.210 ( +0.16%) [ +0.11% +0.00% +0.10% / +0.17% +0.16% +0.18%] index_select skip256 : Elapsed 0.082 ms (8.206 ms / 100) 8.203 -> 8.208 ( +0.06%) [ +0.30% +0.35% +0.00% / +0.06% +0.41% +0.50%] index_select spread : Elapsed 0.082 ms (8.228 ms / 100) 8.229 -> 8.228 ( -0.01%) [ +0.00% +0.02% +0.10% / +0.00% -0.01% +0.10%] index_select strided 3 : Elapsed 0.082 ms (8.229 ms / 100) 8.242 -> 8.229 ( -0.16%) [ +0.10% +0.00% +0.07% / -0.08% +0.08% -0.16%] index_select random : Elapsed 0.083 ms (8.250 ms / 100) 8.224 -> 8.232 ( +0.10%) [ +0.02% +0.19% +0.00% / +0.11% +0.10% +0.19%] index_select random_sorted : Elapsed 0.082 ms (8.226 ms / 100) B = [20, 40, 4, 16] (stride (1, 1280, 320, 20)) A = [5, 40, 4, 16] (stride (2560, 64, 16, 1)) dim = 0 0.619 -> 0.620 ( +0.16%) [ +0.32% +0.32% +0.00% / +0.16% +0.81% +1.29%] index_add_ linear : Elapsed 0.006 ms (0.621 ms / 100) 0.619 -> 0.621 ( +0.32%) [ +0.32% +0.32% +0.00% / +0.32% +0.65% +0.65%] index_copy_ linear : Elapsed 0.006 ms (0.621 ms / 100) 0.623 -> 0.621 ( -0.32%) [ +0.32% +0.32% +0.00% / +0.16% -0.32% -0.32%] index_add_ reverse : Elapsed 0.006 ms (0.625 ms / 100) 0.623 -> 0.622 ( -0.16%) [ +0.32% +0.32% +0.00% / +0.00% +0.00% -0.16%] index_copy_ reverse : Elapsed 0.006 ms (0.625 ms / 100) 0.622 -> 0.622 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.16% +0.16%] index_add_ spread : Elapsed 0.006 ms (0.623 ms / 100) 0.626 -> 0.626 ( +0.00%) [ +0.00% +0.32% +0.00% / +0.16% +0.32% +0.00%] index_copy_ spread : Elapsed 0.006 ms (0.626 ms / 100) 0.620 -> 0.620 ( +0.00%) [ +0.00% +0.32% +0.16% / +0.00% +0.81% +0.97%] index_add_ strided 3 : Elapsed 0.006 ms (0.620 ms / 100) 0.623 -> 0.623 ( +0.00%) [ +0.48% +0.16% +0.00% / +0.00% +0.96% +1.12%] index_copy_ strided 3 : Elapsed 0.006 ms (0.626 ms / 100) 0.620 -> 0.622 ( +0.32%) [ +0.00% +0.00% +0.00% / +0.32% +0.97% +0.81%] index_add_ strided 7 : Elapsed 0.006 ms (0.620 ms / 100) 0.622 -> 0.623 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +1.29% +1.13%] index_copy_ strided 7 : Elapsed 0.006 ms (0.623 ms / 100) 0.622 -> 0.623 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.16% +0.32%] index_add_ perm : Elapsed 0.006 ms (0.623 ms / 100) 0.626 -> 0.626 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.16% +0.16%] index_copy_ perm : Elapsed 0.006 ms (0.626 ms / 100) 0.621 -> 0.621 ( +0.00%) [ +0.64% +0.16% +0.00% / +0.00% +0.32% +0.32%] index_add_ perm_sorted : Elapsed 0.006 ms (0.625 ms / 100) 0.627 -> 0.628 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.16% +0.16%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.628 ms / 100) 4.640 -> 4.538 ( -2.20%) [ +0.04% +0.13% +0.00% / -1.85% -2.20% -1.98%] index_select const : Elapsed 0.046 ms (4.642 ms / 100) 4.656 -> 4.604 ( -1.12%) [ +0.11% +0.00% +0.19% / -0.84% -1.12% -1.07%] index_select wrap : Elapsed 0.047 ms (4.661 ms / 100) 4.656 -> 4.601 ( -1.18%) [ +0.06% +0.00% +0.11% / -0.79% -0.92% -1.18%] index_select linear : Elapsed 0.047 ms (4.659 ms / 100) 4.657 -> 4.606 ( -1.10%) [ +0.00% +0.02% +0.11% / -0.82% -1.01% -1.10%] index_select reverse : Elapsed 0.047 ms (4.657 ms / 100) 4.633 -> 4.545 ( -1.90%) [ +0.13% +0.02% +0.00% / -1.68% -1.90% -1.83%] index_select skip64 : Elapsed 0.046 ms (4.639 ms / 100) 4.630 -> 4.536 ( -2.03%) [ +0.09% +0.00% +0.15% / -2.03% -1.77% -1.68%] index_select skip256 : Elapsed 0.046 ms (4.634 ms / 100) 4.656 -> 4.607 ( -1.05%) [ +0.04% +0.17% +0.00% / -0.99% -1.05% -0.97%] index_select spread : Elapsed 0.047 ms (4.658 ms / 100) 4.658 -> 4.612 ( -0.99%) [ +0.15% +0.19% +0.00% / -0.84% -0.99% -0.79%] index_select strided 3 : Elapsed 0.047 ms (4.665 ms / 100) 4.648 -> 4.603 ( -0.97%) [ +0.22% +0.09% +0.00% / -0.62% -0.88% -0.97%] index_select random : Elapsed 0.047 ms (4.658 ms / 100) 4.654 -> 4.605 ( -1.05%) [ +0.19% +0.00% +0.13% / -0.67% -1.05% -1.03%] index_select random_sorted : Elapsed 0.047 ms (4.663 ms / 100) B = [20, 40, 4, 16] (stride (160, 1, 40, 3200)) A = [5, 40, 4, 16] (stride (40, 1, 200, 800)) dim = 0 1.920 -> 1.922 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +1.20% +1.25%] index_add_ linear : Elapsed 0.019 ms (1.921 ms / 100) 1.867 -> 1.872 ( +0.27%) [ +0.05% +0.16% +0.00% / +0.27% +1.18% +1.29%] index_copy_ linear : Elapsed 0.019 ms (1.868 ms / 100) 1.918 -> 1.919 ( +0.05%) [ +0.05% +0.21% +0.00% / +0.05% +1.15% +1.46%] index_add_ reverse : Elapsed 0.019 ms (1.919 ms / 100) 1.867 -> 1.872 ( +0.27%) [ +0.16% +0.00% +0.21% / +0.27% +1.07% +1.23%] index_copy_ reverse : Elapsed 0.019 ms (1.870 ms / 100) 1.936 -> 1.945 ( +0.46%) [ +0.15% +0.00% +0.00% / +0.46% +0.72% +0.67%] index_add_ spread : Elapsed 0.019 ms (1.939 ms / 100) 1.883 -> 1.883 ( +0.00%) [ +0.00% +0.21% +0.21% / +0.00% +0.74% +0.53%] index_copy_ spread : Elapsed 0.019 ms (1.883 ms / 100) 1.921 -> 1.921 ( +0.00%) [ +0.21% +0.05% +0.00% / +0.00% +0.94% +0.99%] index_add_ strided 3 : Elapsed 0.019 ms (1.925 ms / 100) 1.872 -> 1.874 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.11% +0.96% +1.28%] index_copy_ strided 3 : Elapsed 0.019 ms (1.874 ms / 100) 1.939 -> 1.935 ( -0.21%) [ +0.05% +0.00% +0.05% / -0.21% +0.31% +0.31%] index_add_ strided 7 : Elapsed 0.019 ms (1.940 ms / 100) 1.888 -> 1.886 ( -0.11%) [ +0.05% +0.16% +0.00% / -0.11% +0.53% +0.58%] index_copy_ strided 7 : Elapsed 0.019 ms (1.889 ms / 100) 1.950 -> 1.945 ( -0.26%) [ +0.15% +0.10% +0.00% / -0.10% -0.26% -0.10%] index_add_ perm : Elapsed 0.020 ms (1.953 ms / 100) 1.893 -> 1.896 ( +0.16%) [ +0.16% +0.32% +0.00% / +0.16% +0.37% +0.63%] index_copy_ perm : Elapsed 0.019 ms (1.896 ms / 100) 1.931 -> 1.931 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.00% +0.31% +0.52%] index_add_ perm_sorted : Elapsed 0.019 ms (1.932 ms / 100) 1.882 -> 1.883 ( +0.05%) [ +0.00% +0.16% +0.05% / +0.05% +0.48% +0.27%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.882 ms / 100) 8.538 -> 8.546 ( +0.09%) [ +0.05% +0.25% +0.00% / +0.09% +0.25% +0.32%] index_select const : Elapsed 0.085 ms (8.542 ms / 100) 8.570 -> 8.569 ( -0.01%) [ +0.00% +0.05% +0.07% / -0.01% +0.11% +0.35%] index_select wrap : Elapsed 0.086 ms (8.570 ms / 100) 8.564 -> 8.571 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.42% +0.56%] index_select linear : Elapsed 0.086 ms (8.577 ms / 100) 8.571 -> 8.564 ( -0.08%) [ +0.18% +0.00% +0.01% / -0.08% +0.28% +0.34%] index_select reverse : Elapsed 0.086 ms (8.586 ms / 100) 8.532 -> 8.558 ( +0.30%) [ +0.13% +0.14% +0.00% / +0.30% +0.48% +0.36%] index_select skip64 : Elapsed 0.085 ms (8.543 ms / 100) 8.551 -> 8.541 ( -0.12%) [ +0.06% +0.00% +0.06% / -0.12% +0.30% +0.16%] index_select skip256 : Elapsed 0.086 ms (8.556 ms / 100) 8.567 -> 8.575 ( +0.09%) [ +0.15% +0.27% +0.00% / +0.09% +0.55% +0.32%] index_select spread : Elapsed 0.086 ms (8.580 ms / 100) 8.559 -> 8.572 ( +0.15%) [ +0.35% +0.00% +0.18% / +0.15% +0.43% +0.34%] index_select strided 3 : Elapsed 0.086 ms (8.589 ms / 100) 8.574 -> 8.584 ( +0.12%) [ +0.24% +0.00% +0.17% / +0.12% +0.33% +0.37%] index_select random : Elapsed 0.086 ms (8.595 ms / 100) 8.577 -> 8.584 ( +0.08%) [ +0.00% +0.05% +0.26% / +0.08% +0.16% +0.37%] index_select random_sorted : Elapsed 0.086 ms (8.577 ms / 100) B = [20, 40, 4, 16] (stride (40, 1, 800, 3200)) A = [5, 40, 4, 16] (stride (16, 320, 80, 1)) dim = 0 1.747 -> 1.750 ( +0.17%) [ +0.06% +0.00% +0.06% / +0.17% +1.20% +1.14%] index_add_ linear : Elapsed 0.017 ms (1.748 ms / 100) 1.703 -> 1.709 ( +0.35%) [ +0.00% +0.12% +0.12% / +0.35% +1.12% +1.17%] index_copy_ linear : Elapsed 0.017 ms (1.703 ms / 100) 1.751 -> 1.753 ( +0.11%) [ +0.11% +0.00% +0.17% / +0.11% +0.91% +0.86%] index_add_ reverse : Elapsed 0.018 ms (1.753 ms / 100) 1.709 -> 1.711 ( +0.12%) [ +0.06% +0.00% +0.29% / +0.12% +0.88% +0.94%] index_copy_ reverse : Elapsed 0.017 ms (1.710 ms / 100) 1.762 -> 1.764 ( +0.11%) [ +0.00% +0.00% +0.17% / +0.11% +0.74% +0.79%] index_add_ spread : Elapsed 0.018 ms (1.762 ms / 100) 1.716 -> 1.722 ( +0.35%) [ +0.23% +0.00% +0.35% / +0.35% +0.99% +1.11%] index_copy_ spread : Elapsed 0.017 ms (1.720 ms / 100) 1.770 -> 1.771 ( +0.06%) [ +0.11% +0.00% +0.00% / +0.06% +0.62% +0.34%] index_add_ strided 3 : Elapsed 0.018 ms (1.772 ms / 100) 1.725 -> 1.723 ( -0.12%) [ +0.06% +0.00% +0.00% / -0.12% +0.70% +0.46%] index_copy_ strided 3 : Elapsed 0.017 ms (1.726 ms / 100) 1.757 -> 1.762 ( +0.28%) [ +0.06% +0.00% +0.17% / +0.28% +0.97% +1.14%] index_add_ strided 7 : Elapsed 0.018 ms (1.758 ms / 100) 1.714 -> 1.715 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.99% +0.99%] index_copy_ strided 7 : Elapsed 0.017 ms (1.714 ms / 100) 1.764 -> 1.766 ( +0.11%) [ +0.00% +0.23% +0.23% / +0.11% +0.57% +0.45%] index_add_ perm : Elapsed 0.018 ms (1.764 ms / 100) 1.720 -> 1.721 ( +0.06%) [ +0.06% +0.12% +0.00% / +0.06% +0.41% +0.64%] index_copy_ perm : Elapsed 0.017 ms (1.721 ms / 100) 1.766 -> 1.769 ( +0.17%) [ +0.00% +0.23% +0.06% / +0.17% +0.40% +0.51%] index_add_ perm_sorted : Elapsed 0.018 ms (1.766 ms / 100) 1.719 -> 1.723 ( +0.23%) [ +0.00% +0.00% +0.23% / +0.23% +0.58% +0.52%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.719 ms / 100) 8.220 -> 8.242 ( +0.27%) [ +0.06% +0.00% +0.15% / +0.30% +0.35% +0.27%] index_select const : Elapsed 0.082 ms (8.225 ms / 100) 8.257 -> 8.257 ( +0.00%) [ +0.00% +0.06% +0.08% / +0.04% +0.00% +0.08%] index_select wrap : Elapsed 0.083 ms (8.257 ms / 100) 8.253 -> 8.243 ( -0.12%) [ +0.00% +0.08% +0.00% / -0.12% -0.01% -0.05%] index_select linear : Elapsed 0.083 ms (8.253 ms / 100) 8.261 -> 8.272 ( +0.13%) [ +0.00% +0.06% +0.13% / +0.13% +0.39% +0.19%] index_select reverse : Elapsed 0.083 ms (8.261 ms / 100) 8.212 -> 8.227 ( +0.18%) [ +0.21% +0.01% +0.00% / +0.26% +0.34% +0.18%] index_select skip64 : Elapsed 0.082 ms (8.229 ms / 100) 8.201 -> 8.221 ( +0.24%) [ +0.10% +0.30% +0.00% / +0.24% +0.39% +0.34%] index_select skip256 : Elapsed 0.082 ms (8.209 ms / 100) 8.248 -> 8.257 ( +0.11%) [ +0.17% +0.00% +0.27% / +0.11% +0.13% +0.24%] index_select spread : Elapsed 0.083 ms (8.262 ms / 100) 8.265 -> 8.267 ( +0.02%) [ +0.04% +0.00% +0.11% / +0.13% +0.02% +0.11%] index_select strided 3 : Elapsed 0.083 ms (8.268 ms / 100) 8.250 -> 8.269 ( +0.23%) [ +0.00% +0.06% +0.06% / +0.28% +0.46% +0.23%] index_select random : Elapsed 0.083 ms (8.250 ms / 100) 8.253 -> 8.260 ( +0.08%) [ +0.00% +0.10% +0.00% / +0.27% +0.17% +0.08%] index_select random_sorted : Elapsed 0.083 ms (8.253 ms / 100) B = [20, 40, 4, 16] (stride (40, 1, 800, 3200)) A = [5, 40, 4, 16] (stride (1, 80, 3200, 5)) dim = 0 1.810 -> 1.813 ( +0.17%) [ +0.06% +0.11% +0.00% / +0.17% +0.50% +0.44%] index_add_ linear : Elapsed 0.018 ms (1.811 ms / 100) 1.770 -> 1.772 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.40% +0.45%] index_copy_ linear : Elapsed 0.018 ms (1.770 ms / 100) 1.809 -> 1.811 ( +0.11%) [ +0.11% +0.28% +0.00% / +0.11% +0.61% +0.44%] index_add_ reverse : Elapsed 0.018 ms (1.811 ms / 100) 1.770 -> 1.770 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.62% +0.56%] index_copy_ reverse : Elapsed 0.018 ms (1.771 ms / 100) 1.828 -> 1.827 ( -0.05%) [ +0.00% +0.16% +0.11% / -0.05% +0.27% +0.27%] index_add_ spread : Elapsed 0.018 ms (1.828 ms / 100) 1.782 -> 1.786 ( +0.22%) [ +0.22% +0.28% +0.00% / +0.22% +0.67% +0.62%] index_copy_ spread : Elapsed 0.018 ms (1.786 ms / 100) 1.826 -> 1.825 ( -0.05%) [ +0.22% +0.11% +0.00% / -0.05% +0.71% +0.55%] index_add_ strided 3 : Elapsed 0.018 ms (1.830 ms / 100) 1.783 -> 1.784 ( +0.06%) [ +0.06% +0.00% +0.17% / +0.06% +0.95% +0.56%] index_copy_ strided 3 : Elapsed 0.018 ms (1.784 ms / 100) 1.819 -> 1.818 ( -0.05%) [ +0.00% +0.11% +0.11% / -0.05% +0.55% +0.55%] index_add_ strided 7 : Elapsed 0.018 ms (1.819 ms / 100) 1.777 -> 1.781 ( +0.23%) [ +0.00% +0.34% +0.11% / +0.23% +0.39% +0.68%] index_copy_ strided 7 : Elapsed 0.018 ms (1.777 ms / 100) 1.817 -> 1.820 ( +0.17%) [ +0.17% +0.06% +0.00% / +0.17% +0.88% +0.77%] index_add_ perm : Elapsed 0.018 ms (1.820 ms / 100) 1.776 -> 1.775 ( -0.06%) [ +0.11% +0.28% +0.00% / -0.06% +0.96% +0.73%] index_copy_ perm : Elapsed 0.018 ms (1.778 ms / 100) 1.816 -> 1.817 ( +0.06%) [ +0.22% +0.28% +0.00% / +0.06% +0.61% +1.05%] index_add_ perm_sorted : Elapsed 0.018 ms (1.820 ms / 100) 1.775 -> 1.781 ( +0.34%) [ +0.00% +0.34% +0.17% / +0.34% +0.73% +0.79%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.775 ms / 100) 8.560 -> 8.581 ( +0.25%) [ +0.19% +0.00% +0.01% / +0.25% +0.25% +0.42%] index_select const : Elapsed 0.086 ms (8.576 ms / 100) 8.561 -> 8.570 ( +0.11%) [ +0.05% +0.00% +0.02% / +0.11% +0.42% +0.14%] index_select wrap : Elapsed 0.086 ms (8.565 ms / 100) 8.565 -> 8.568 ( +0.04%) [ +0.02% +0.06% +0.00% / +0.04% +0.18% +0.26%] index_select linear : Elapsed 0.086 ms (8.567 ms / 100) 8.558 -> 8.577 ( +0.22%) [ +0.07% +0.00% +0.11% / +0.22% +0.37% +0.36%] index_select reverse : Elapsed 0.086 ms (8.564 ms / 100) 8.570 -> 8.568 ( -0.02%) [ +0.00% +0.13% +0.06% / -0.02% +0.32% +0.09%] index_select skip64 : Elapsed 0.086 ms (8.570 ms / 100) 8.566 -> 8.569 ( +0.04%) [ +0.02% +0.05% +0.00% / +0.09% +0.04% +0.21%] index_select skip256 : Elapsed 0.086 ms (8.568 ms / 100) 8.561 -> 8.568 ( +0.08%) [ +0.00% +0.20% +0.07% / +0.14% +0.44% +0.08%] index_select spread : Elapsed 0.086 ms (8.561 ms / 100) 8.571 -> 8.572 ( +0.01%) [ +0.05% +0.00% +0.00% / +0.01% +0.29% +0.33%] index_select strided 3 : Elapsed 0.086 ms (8.575 ms / 100) 8.549 -> 8.569 ( +0.23%) [ +0.23% +0.18% +0.00% / +0.23% +0.36% +0.41%] index_select random : Elapsed 0.086 ms (8.569 ms / 100) 8.549 -> 8.563 ( +0.16%) [ +0.21% +0.00% +0.20% / +0.16% +0.23% +0.47%] index_select random_sorted : Elapsed 0.086 ms (8.567 ms / 100) B = [20, 40, 4, 16] (stride (1, 20, 800, 3200)) A = [5, 40, 4, 16] (stride (16, 320, 80, 1)) dim = 0 1.844 -> 1.843 ( -0.05%) [ +0.11% +0.00% +0.27% / +0.16% -0.05% +0.00%] index_add_ linear : Elapsed 0.018 ms (1.846 ms / 100) 1.814 -> 1.812 ( -0.11%) [ +0.00% +0.06% +0.17% / +0.22% +0.00% -0.11%] index_copy_ linear : Elapsed 0.018 ms (1.814 ms / 100) 1.847 -> 1.844 ( -0.16%) [ +0.00% +0.22% +0.05% / +0.22% -0.11% -0.16%] index_add_ reverse : Elapsed 0.018 ms (1.847 ms / 100) 1.813 -> 1.811 ( -0.11%) [ +0.00% +0.28% +0.33% / +0.00% +0.17% -0.11%] index_copy_ reverse : Elapsed 0.018 ms (1.813 ms / 100) 1.865 -> 1.862 ( -0.16%) [ +0.00% +0.00% +0.21% / +0.16% +0.05% -0.16%] index_add_ spread : Elapsed 0.019 ms (1.865 ms / 100) 1.844 -> 1.835 ( -0.49%) [ +0.00% +0.00% +0.11% / -0.11% -0.49% -0.27%] index_copy_ spread : Elapsed 0.018 ms (1.844 ms / 100) 1.867 -> 1.863 ( -0.21%) [ +0.21% +0.00% +0.00% / -0.05% -0.16% -0.21%] index_add_ strided 3 : Elapsed 0.019 ms (1.871 ms / 100) 1.842 -> 1.840 ( -0.11%) [ +0.11% +0.22% +0.00% / -0.11% +0.00% +0.00%] index_copy_ strided 3 : Elapsed 0.018 ms (1.844 ms / 100) 1.867 -> 1.861 ( -0.32%) [ +0.32% +0.16% +0.00% / -0.05% -0.21% -0.32%] index_add_ strided 7 : Elapsed 0.019 ms (1.873 ms / 100) 1.841 -> 1.836 ( -0.27%) [ +0.11% +0.05% +0.00% / +0.22% +0.00% -0.27%] index_copy_ strided 7 : Elapsed 0.018 ms (1.843 ms / 100) 1.858 -> 1.859 ( +0.05%) [ +0.00% +0.32% +0.00% / +0.22% +0.11% +0.05%] index_add_ perm : Elapsed 0.019 ms (1.858 ms / 100) 1.828 -> 1.827 ( -0.05%) [ +0.00% +0.05% +0.05% / +0.16% -0.05% +0.05%] index_copy_ perm : Elapsed 0.018 ms (1.828 ms / 100) 1.860 -> 1.856 ( -0.22%) [ +0.00% +0.05% +0.16% / -0.05% -0.22% -0.22%] index_add_ perm_sorted : Elapsed 0.019 ms (1.860 ms / 100) 1.831 -> 1.826 ( -0.27%) [ +0.16% +0.00% +0.11% / +0.16% -0.27% -0.05%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.834 ms / 100) 8.276 -> 8.293 ( +0.21%) [ +0.00% +0.11% +0.08% / +0.22% +0.37% +0.21%] index_select const : Elapsed 0.083 ms (8.276 ms / 100) 8.307 -> 8.328 ( +0.25%) [ +0.07% +0.11% +0.00% / +0.30% +0.25% +0.40%] index_select wrap : Elapsed 0.083 ms (8.313 ms / 100) 8.298 -> 8.300 ( +0.02%) [ +0.11% +0.00% +0.02% / +0.02% +0.28% +0.31%] index_select linear : Elapsed 0.083 ms (8.307 ms / 100) 8.310 -> 8.316 ( +0.07%) [ +0.13% +0.00% +0.14% / +0.07% +0.61% +0.41%] index_select reverse : Elapsed 0.083 ms (8.321 ms / 100) 8.269 -> 8.291 ( +0.27%) [ +0.04% +0.13% +0.00% / +0.27% +0.41% +0.60%] index_select skip64 : Elapsed 0.083 ms (8.272 ms / 100) 8.269 -> 8.273 ( +0.05%) [ +0.17% +0.15% +0.00% / +0.05% +0.31% +0.22%] index_select skip256 : Elapsed 0.083 ms (8.283 ms / 100) 8.304 -> 8.309 ( +0.06%) [ +0.00% +0.04% +0.06% / +0.06% +0.34% +0.16%] index_select spread : Elapsed 0.083 ms (8.304 ms / 100) 8.321 -> 8.322 ( +0.01%) [ +0.00% +0.06% +0.02% / +0.07% +0.08% +0.01%] index_select strided 3 : Elapsed 0.083 ms (8.321 ms / 100) 8.325 -> 8.327 ( +0.02%) [ +0.01% +0.14% +0.00% / +0.05% +0.14% +0.02%] index_select random : Elapsed 0.083 ms (8.326 ms / 100) 8.304 -> 8.318 ( +0.17%) [ +0.14% +0.34% +0.00% / +0.17% +0.28% +0.30%] index_select random_sorted : Elapsed 0.083 ms (8.316 ms / 100) out_shape = [5, 20, 4, 16] in_shape = [5, 40, 4, 16] idx_dim = 1 B = [5, 20, 4, 16] (stride (1280, 64, 1, 4)) A = [5, 40, 4, 16] (stride (2560, 16, 640, 1)) dim = 1 2.441 -> 2.446 ( +0.20%) [ +0.20% +0.16% +0.00% / +0.20% +0.29% +0.29%] index_select const : Elapsed 0.024 ms (2.446 ms / 100) 2.457 -> 2.460 ( +0.12%) [ +0.24% +0.00% +0.16% / +0.12% +0.12% +0.12%] index_select wrap : Elapsed 0.025 ms (2.463 ms / 100) 2.459 -> 2.456 ( -0.12%) [ +0.00% +0.00% +0.04% / -0.12% +0.16% +0.04%] index_select linear : Elapsed 0.025 ms (2.459 ms / 100) 2.457 -> 2.455 ( -0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.08% -0.08%] index_select reverse : Elapsed 0.025 ms (2.460 ms / 100) 2.444 -> 2.444 ( +0.00%) [ +0.12% +0.00% +0.16% / +0.00% +0.00% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.447 ms / 100) 2.442 -> 2.445 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.45% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.445 ms / 100) 2.455 -> 2.457 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.24% +0.20%] index_select spread : Elapsed 0.025 ms (2.459 ms / 100) 2.459 -> 2.457 ( -0.08%) [ +0.00% +0.04% +0.16% / +0.16% -0.08% +0.08%] index_select strided 3 : Elapsed 0.025 ms (2.459 ms / 100) 2.447 -> 2.449 ( +0.08%) [ +0.20% +0.20% +0.00% / +0.33% +0.20% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.452 ms / 100) 2.453 -> 2.456 ( +0.12%) [ +0.29% +0.00% +0.20% / +0.12% +0.33% +0.29%] index_select strided 7 : Elapsed 0.025 ms (2.460 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.00% +0.04% +0.04% / -0.08% +0.20% +0.16%] index_select strided 8 : Elapsed 0.024 ms (2.447 ms / 100) 2.449 -> 2.451 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.20% +0.08% +0.20%] index_select strided 16 : Elapsed 0.025 ms (2.450 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.12% +0.00% +0.04% / +0.04% +0.04% +0.08%] index_select random : Elapsed 0.025 ms (2.458 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.00% +0.33% +0.16% / +0.12% +0.04% +0.20%] index_select random_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.460 -> 2.457 ( -0.12%) [ +0.08% +0.00% +0.00% / -0.08% +0.00% -0.12%] index_select perm : Elapsed 0.025 ms (2.462 ms / 100) 2.459 -> 2.449 ( -0.41%) [ +0.00% +0.24% +0.12% / +0.08% -0.41% -0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) B = [5, 20, 4, 16] (stride (1280, 16, 320, 1)) A = [5, 40, 4, 16] (stride (1, 80, 3200, 5)) dim = 1 2.442 -> 2.445 ( +0.12%) [ +0.16% +0.00% +0.20% / +0.12% +0.16% +0.29%] index_select const : Elapsed 0.024 ms (2.446 ms / 100) 2.456 -> 2.445 ( -0.45%) [ +0.08% +0.04% +0.00% / -0.04% -0.41% -0.45%] index_select wrap : Elapsed 0.025 ms (2.458 ms / 100) 2.452 -> 2.446 ( -0.24%) [ +0.12% +0.00% +0.08% / +0.16% -0.24% -0.24%] index_select linear : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.447 ( -0.12%) [ +0.00% +0.12% +0.04% / +0.16% +0.16% -0.12%] index_select reverse : Elapsed 0.024 ms (2.450 ms / 100) 2.446 -> 2.445 ( -0.04%) [ +0.37% +0.00% +0.20% / +0.00% -0.04% +0.16%] index_select skip64 : Elapsed 0.025 ms (2.455 ms / 100) 2.448 -> 2.445 ( -0.12%) [ +0.04% +0.00% +0.00% / +0.08% -0.12% +0.12%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.449 -> 2.446 ( -0.12%) [ +0.08% +0.00% +0.04% / -0.12% +0.33% +0.24%] index_select spread : Elapsed 0.025 ms (2.451 ms / 100) 2.449 -> 2.449 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.33% +0.20%] index_select strided 3 : Elapsed 0.025 ms (2.450 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.08% +0.00% +0.12% / +0.08% +0.25% +0.20%] index_select strided 5 : Elapsed 0.024 ms (2.448 ms / 100) 2.450 -> 2.452 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.16% +0.12% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.444 -> 2.445 ( +0.04%) [ +0.04% +0.20% +0.00% / +0.25% +0.04% +0.20%] index_select strided 8 : Elapsed 0.024 ms (2.445 ms / 100) 2.445 -> 2.445 ( +0.00%) [ +0.00% +0.20% +0.00% / +0.00% +0.12% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.445 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.00% +0.04% +0.08% / +0.12% +0.20% +0.16%] index_select random : Elapsed 0.024 ms (2.448 ms / 100) 2.451 -> 2.448 ( -0.12%) [ +0.08% +0.00% +0.00% / -0.04% -0.12% -0.08%] index_select random_sorted : Elapsed 0.025 ms (2.453 ms / 100) 2.452 -> 2.451 ( -0.04%) [ +0.08% +0.00% +0.00% / +0.04% +0.00% -0.04%] index_select perm : Elapsed 0.025 ms (2.454 ms / 100) 2.451 -> 2.449 ( -0.08%) [ +0.00% +0.04% +0.00% / +0.00% +0.00% -0.08%] index_select perm_sorted : Elapsed 0.025 ms (2.451 ms / 100) B = [5, 20, 4, 16] (stride (1280, 4, 1, 80)) A = [5, 40, 4, 16] (stride (2560, 16, 640, 1)) dim = 1 2.445 -> 2.450 ( +0.20%) [ +0.00% +0.12% +0.08% / +0.20% +0.20% +0.20%] index_select const : Elapsed 0.024 ms (2.445 ms / 100) 2.458 -> 2.462 ( +0.16%) [ +0.12% +0.20% +0.00% / +0.20% +0.28% +0.16%] index_select wrap : Elapsed 0.025 ms (2.461 ms / 100) 2.462 -> 2.461 ( -0.04%) [ +0.00% +0.12% +0.08% / -0.04% +0.04% +0.12%] index_select linear : Elapsed 0.025 ms (2.462 ms / 100) 2.459 -> 2.463 ( +0.16%) [ +0.16% +0.12% +0.00% / +0.16% +0.20% +0.16%] index_select reverse : Elapsed 0.025 ms (2.463 ms / 100) 2.447 -> 2.448 ( +0.04%) [ +0.20% +0.00% +0.08% / +0.12% +0.04% +0.04%] index_select skip64 : Elapsed 0.025 ms (2.452 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.29% +0.00% +0.16% / +0.08% +0.37% +0.25%] index_select skip256 : Elapsed 0.025 ms (2.452 ms / 100) 2.458 -> 2.460 ( +0.08%) [ +0.20% +0.08% +0.00% / +0.08% +0.24% +0.33%] index_select spread : Elapsed 0.025 ms (2.463 ms / 100) 2.460 -> 2.464 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.20% +0.20% +0.16%] index_select strided 3 : Elapsed 0.025 ms (2.462 ms / 100) 2.456 -> 2.455 ( -0.04%) [ +0.00% +0.16% +0.00% / -0.04% +0.00% -0.04%] index_select strided 5 : Elapsed 0.025 ms (2.456 ms / 100) 2.460 -> 2.462 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.08% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.461 ms / 100) 2.449 -> 2.449 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.00% +0.33% +0.29%] index_select strided 8 : Elapsed 0.024 ms (2.449 ms / 100) 2.450 -> 2.453 ( +0.12%) [ +0.00% +0.04% +0.12% / +0.12% +0.20% +0.37%] index_select strided 16 : Elapsed 0.024 ms (2.450 ms / 100) 2.457 -> 2.459 ( +0.08%) [ +0.12% +0.16% +0.00% / +0.08% +0.16% +0.33%] index_select random : Elapsed 0.025 ms (2.460 ms / 100) 2.457 -> 2.457 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.04% +0.16% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.462 -> 2.459 ( -0.12%) [ +0.24% +0.00% +0.04% / +0.20% -0.12% -0.04%] index_select perm : Elapsed 0.025 ms (2.468 ms / 100) 2.462 -> 2.455 ( -0.28%) [ +0.16% +0.00% +0.41% / +0.08% -0.28% -0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.466 ms / 100) B = [5, 20, 4, 16] (stride (4, 320, 1, 20)) A = [5, 40, 4, 16] (stride (160, 1, 40, 800)) dim = 1 2.450 -> 2.450 ( +0.00%) [ +0.12% +0.16% +0.00% / +0.00% +0.20% +0.41%] index_select const : Elapsed 0.025 ms (2.453 ms / 100) 2.459 -> 2.459 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.16% +0.53% +0.00%] index_select wrap : Elapsed 0.025 ms (2.461 ms / 100) 2.460 -> 2.457 ( -0.12%) [ +0.00% +0.08% +0.20% / +0.04% -0.08% -0.12%] index_select linear : Elapsed 0.025 ms (2.460 ms / 100) 2.461 -> 2.463 ( +0.08%) [ +0.00% +0.12% +0.00% / +0.08% +0.16% +0.08%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.454 -> 2.453 ( -0.04%) [ +0.04% +0.00% +0.00% / +0.04% -0.04% +0.20%] index_select skip64 : Elapsed 0.025 ms (2.455 ms / 100) 2.454 -> 2.451 ( -0.12%) [ +0.04% +0.04% +0.00% / -0.12% +1.83% +0.24%] index_select skip256 : Elapsed 0.025 ms (2.455 ms / 100) 2.461 -> 2.466 ( +0.20%) [ +0.24% +0.20% +0.00% / +0.20% +0.57% +0.37%] index_select spread : Elapsed 0.025 ms (2.467 ms / 100) 2.461 -> 2.465 ( +0.16%) [ +0.28% +0.24% +0.00% / +0.16% +0.37% +0.37%] index_select strided 3 : Elapsed 0.025 ms (2.468 ms / 100) 2.463 -> 2.466 ( +0.12%) [ +0.16% +0.12% +0.00% / +0.12% +0.16% +0.28%] index_select strided 5 : Elapsed 0.025 ms (2.467 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.00% +0.04% +0.20% / +0.00% +0.24% +0.20%] index_select strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.463 -> 2.468 ( +0.20%) [ +0.20% +0.12% +0.00% / +0.20% +0.53% +0.20%] index_select strided 8 : Elapsed 0.025 ms (2.468 ms / 100) 2.464 -> 2.467 ( +0.12%) [ +0.20% +0.00% +0.00% / +0.12% +0.41% +0.24%] index_select strided 16 : Elapsed 0.025 ms (2.469 ms / 100) 2.465 -> 2.469 ( +0.16%) [ +0.04% +0.00% +0.20% / +0.16% +0.28% +0.20%] index_select random : Elapsed 0.025 ms (2.466 ms / 100) 2.467 -> 2.463 ( -0.16%) [ +0.04% +0.00% +0.08% / -0.16% +0.08% -0.08%] index_select random_sorted : Elapsed 0.025 ms (2.468 ms / 100) 2.467 -> 2.467 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.12% +0.16%] index_select perm : Elapsed 0.025 ms (2.467 ms / 100) 2.464 -> 2.467 ( +0.12%) [ +0.16% +0.08% +0.00% / +0.12% +0.37% +0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.468 ms / 100) B = [5, 20, 4, 16] (stride (1, 80, 1600, 5)) A = [5, 40, 4, 16] (stride (40, 1, 200, 800)) dim = 1 2.451 -> 2.450 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.29% +0.33%] index_select const : Elapsed 0.025 ms (2.451 ms / 100) 2.462 -> 2.459 ( -0.12%) [ +0.24% +0.20% +0.00% / +0.20% -0.12% +0.00%] index_select wrap : Elapsed 0.025 ms (2.468 ms / 100) 2.463 -> 2.461 ( -0.08%) [ +0.16% +0.08% +0.00% / +0.08% -0.08% +0.04%] index_select linear : Elapsed 0.025 ms (2.467 ms / 100) 2.461 -> 2.464 ( +0.12%) [ +0.00% +0.24% +0.16% / +0.16% +0.16% +0.12%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.453 -> 2.454 ( +0.04%) [ +0.16% +0.00% +0.04% / +0.24% +0.12% +0.04%] index_select skip64 : Elapsed 0.025 ms (2.457 ms / 100) 2.451 -> 2.455 ( +0.16%) [ +0.08% +0.20% +0.00% / +0.16% +0.24% +0.29%] index_select skip256 : Elapsed 0.025 ms (2.453 ms / 100) 2.466 -> 2.470 ( +0.16%) [ +0.08% +0.00% +0.24% / +0.24% +0.16% +0.20%] index_select spread : Elapsed 0.025 ms (2.468 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.08% +0.00% +0.08%] index_select strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.471 -> 2.469 ( -0.08%) [ +0.08% +0.00% +0.04% / -0.04% +0.08% -0.08%] index_select strided 5 : Elapsed 0.025 ms (2.473 ms / 100) 2.470 -> 2.469 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.00% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.470 ms / 100) 2.469 -> 2.467 ( -0.08%) [ +0.00% +0.04% +0.08% / -0.08% +0.08% +0.08%] index_select strided 8 : Elapsed 0.025 ms (2.469 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.16% +0.00% +0.04%] index_select strided 16 : Elapsed 0.025 ms (2.470 ms / 100) 2.469 -> 2.468 ( -0.04%) [ +0.12% +0.00% +0.16% / -0.04% +0.08% +0.24%] index_select random : Elapsed 0.025 ms (2.472 ms / 100) 2.469 -> 2.470 ( +0.04%) [ +0.00% +0.08% +0.16% / +0.16% +0.04% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.472 -> 2.468 ( -0.16%) [ +0.00% +0.12% +0.04% / -0.16% -0.04% -0.08%] index_select perm : Elapsed 0.025 ms (2.472 ms / 100) 2.474 -> 2.463 ( -0.44%) [ +0.00% +0.00% +0.04% / -0.12% -0.44% -0.36%] index_select perm_sorted : Elapsed 0.025 ms (2.474 ms / 100) B = [5, 20, 4, 16] (stride (4, 20, 1, 400)) A = [5, 40, 4, 16] (stride (160, 1, 40, 800)) dim = 1 2.455 -> 2.454 ( -0.04%) [ +0.20% +0.12% +0.00% / -0.04% +0.29% +0.37%] index_select const : Elapsed 0.025 ms (2.460 ms / 100) 2.466 -> 2.457 ( -0.36%) [ +0.00% +0.16% +0.00% / +0.00% -0.24% -0.36%] index_select wrap : Elapsed 0.025 ms (2.466 ms / 100) 2.463 -> 2.460 ( -0.12%) [ +0.00% +0.28% +0.16% / +0.08% -0.12% +0.12%] index_select linear : Elapsed 0.025 ms (2.463 ms / 100) 2.464 -> 2.463 ( -0.04%) [ +0.08% +0.00% +0.12% / -0.04% +0.20% +0.28%] index_select reverse : Elapsed 0.025 ms (2.466 ms / 100) 2.455 -> 2.457 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.20% +0.24%] index_select skip64 : Elapsed 0.025 ms (2.458 ms / 100) 2.457 -> 2.462 ( +0.20%) [ +0.12% +0.00% +0.00% / +0.20% +0.33% +0.20%] index_select skip256 : Elapsed 0.025 ms (2.460 ms / 100) 2.466 -> 2.469 ( +0.12%) [ +0.16% +0.24% +0.00% / +0.12% +0.24% +0.36%] index_select spread : Elapsed 0.025 ms (2.470 ms / 100) 2.465 -> 2.471 ( +0.24%) [ +0.32% +0.00% +0.08% / +0.24% +0.32% +0.41%] index_select strided 3 : Elapsed 0.025 ms (2.473 ms / 100) 2.468 -> 2.470 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.20% +0.20%] index_select strided 5 : Elapsed 0.025 ms (2.469 ms / 100) 2.471 -> 2.472 ( +0.04%) [ +0.00% +0.08% +0.08% / +0.04% +0.12% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.470 -> 2.468 ( -0.08%) [ +0.00% +0.08% +0.04% / -0.08% +0.24% +0.16%] index_select strided 8 : Elapsed 0.025 ms (2.470 ms / 100) 2.469 -> 2.471 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.24% +0.24%] index_select strided 16 : Elapsed 0.025 ms (2.470 ms / 100) 2.470 -> 2.472 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.28% +0.12%] index_select random : Elapsed 0.025 ms (2.470 ms / 100) 2.473 -> 2.469 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% -0.04% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.473 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.00% +0.08% +0.04% / +0.04% +0.00% +0.12%] index_select perm : Elapsed 0.025 ms (2.470 ms / 100) 2.471 -> 2.471 ( +0.00%) [ +0.00% +0.16% +0.04% / +0.08% +0.00% +0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.471 ms / 100) out_shape = [5, 40, 20, 16] in_shape = [5, 40, 4, 16] idx_dim = 2 B = [5, 40, 20, 16] (stride (20, 1600, 1, 100)) A = [5, 40, 4, 16] (stride (1, 320, 80, 5)) dim = 2 2.098 -> 2.097 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.10% +0.24%] index_add_ linear : Elapsed 0.021 ms (2.100 ms / 100) 2.065 -> 2.063 ( -0.10%) [ +0.00% +0.10% +0.05% / -0.10% +0.44% +0.15%] index_copy_ linear : Elapsed 0.021 ms (2.065 ms / 100) 2.083 -> 2.086 ( +0.14%) [ +0.00% +0.10% +0.24% / +0.14% +0.62% +0.77%] index_add_ reverse : Elapsed 0.021 ms (2.083 ms / 100) 2.053 -> 2.053 ( +0.00%) [ +0.29% +0.19% +0.00% / +0.00% +0.73% +0.88%] index_copy_ reverse : Elapsed 0.021 ms (2.059 ms / 100) 2.114 -> 2.117 ( +0.14%) [ +0.00% +0.05% +0.05% / +0.14% +0.99% +0.90%] index_add_ spread : Elapsed 0.021 ms (2.114 ms / 100) 2.140 -> 2.147 ( +0.33%) [ +0.00% +0.05% +0.05% / +0.33% +0.89% +1.12%] index_copy_ spread : Elapsed 0.021 ms (2.140 ms / 100) 2.122 -> 2.134 ( +0.57%) [ +0.05% +0.28% +0.00% / +0.57% +0.75% +0.75%] index_add_ strided 3 : Elapsed 0.021 ms (2.123 ms / 100) 2.129 -> 2.136 ( +0.33%) [ +0.09% +0.05% +0.00% / +0.33% +0.52% +0.56%] index_copy_ strided 3 : Elapsed 0.021 ms (2.131 ms / 100) 2.131 -> 2.136 ( +0.23%) [ +0.00% +0.00% +0.05% / +0.23% +0.47% +0.38%] index_add_ strided 7 : Elapsed 0.021 ms (2.131 ms / 100) 2.158 -> 2.160 ( +0.09%) [ +0.09% +0.00% +0.19% / +0.09% +0.65% +0.56%] index_copy_ strided 7 : Elapsed 0.022 ms (2.160 ms / 100) 2.122 -> 2.128 ( +0.28%) [ +0.19% +0.00% +0.00% / +0.28% +0.38% +0.42%] index_add_ perm : Elapsed 0.021 ms (2.126 ms / 100) 2.128 -> 2.129 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.09% +0.38%] index_copy_ perm : Elapsed 0.021 ms (2.128 ms / 100) 2.125 -> 2.129 ( +0.19%) [ +0.00% +0.05% +0.00% / +0.19% +0.33% +0.24%] index_add_ perm_sorted : Elapsed 0.021 ms (2.125 ms / 100) 2.121 -> 2.128 ( +0.33%) [ +0.52% +0.00% +0.24% / +0.33% +0.75% +0.75%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.132 ms / 100) 9.242 -> 9.256 ( +0.15%) [ +0.03% +0.03% +0.00% / +0.15% +0.19% +0.17%] index_select const : Elapsed 0.092 ms (9.245 ms / 100) 9.308 -> 9.296 ( -0.13%) [ +0.16% +0.00% +0.02% / -0.13% +0.01% -0.03%] index_select wrap : Elapsed 0.093 ms (9.323 ms / 100) 9.259 -> 9.263 ( +0.04%) [ +0.00% +0.16% +0.22% / +0.04% +0.30% +0.37%] index_select linear : Elapsed 0.093 ms (9.259 ms / 100) 9.261 -> 9.258 ( -0.03%) [ +0.02% +0.00% +0.11% / -0.03% +0.15% +0.00%] index_select reverse : Elapsed 0.093 ms (9.263 ms / 100) 9.233 -> 9.244 ( +0.12%) [ +0.09% +0.00% +0.16% / +0.13% +0.12% +0.26%] index_select skip64 : Elapsed 0.092 ms (9.241 ms / 100) 9.235 -> 9.233 ( -0.02%) [ +0.00% +0.10% +0.09% / +0.05% +0.34% -0.02%] index_select skip256 : Elapsed 0.092 ms (9.235 ms / 100) 9.270 -> 9.276 ( +0.06%) [ +0.17% +0.24% +0.00% / +0.19% +0.06% +0.13%] index_select spread : Elapsed 0.093 ms (9.286 ms / 100) 9.307 -> 9.295 ( -0.13%) [ +0.10% +0.14% +0.00% / -0.13% +0.03% +0.00%] index_select strided 3 : Elapsed 0.093 ms (9.316 ms / 100) 9.279 -> 9.288 ( +0.10%) [ +0.09% +0.00% +0.12% / +0.17% +0.15% +0.10%] index_select random : Elapsed 0.093 ms (9.287 ms / 100) 9.264 -> 9.267 ( +0.03%) [ +0.18% +0.06% +0.00% / +0.03% +0.37% +0.14%] index_select random_sorted : Elapsed 0.093 ms (9.281 ms / 100) B = [5, 40, 20, 16] (stride (640, 16, 3200, 1)) A = [5, 40, 4, 16] (stride (1, 80, 3200, 5)) dim = 2 1.958 -> 1.957 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.36% +0.20%] index_add_ linear : Elapsed 0.020 ms (1.958 ms / 100) 1.907 -> 1.907 ( +0.00%) [ +0.00% +0.16% +0.10% / +0.00% +0.47% +0.73%] index_copy_ linear : Elapsed 0.019 ms (1.907 ms / 100) 1.953 -> 1.953 ( +0.00%) [ +0.00% +0.05% +0.15% / +0.00% +0.61% +0.41%] index_add_ reverse : Elapsed 0.020 ms (1.953 ms / 100) 1.902 -> 1.908 ( +0.32%) [ +0.00% +0.11% +0.11% / +0.42% +0.32% +0.63%] index_copy_ reverse : Elapsed 0.019 ms (1.902 ms / 100) 1.947 -> 1.950 ( +0.15%) [ +0.10% +0.00% +0.10% / +0.15% +0.56% +0.46%] index_add_ spread : Elapsed 0.019 ms (1.949 ms / 100) 1.902 -> 1.903 ( +0.05%) [ +0.21% +0.16% +0.00% / +0.05% +0.63% +0.74%] index_copy_ spread : Elapsed 0.019 ms (1.906 ms / 100) 1.968 -> 1.969 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.30% +0.10%] index_add_ strided 3 : Elapsed 0.020 ms (1.968 ms / 100) 1.915 -> 1.919 ( +0.21%) [ +0.16% +0.00% +0.16% / +0.21% +0.57% +0.63%] index_copy_ strided 3 : Elapsed 0.019 ms (1.918 ms / 100) 1.954 -> 1.955 ( +0.05%) [ +0.20% +0.20% +0.00% / +0.05% +0.72% +0.67%] index_add_ strided 7 : Elapsed 0.020 ms (1.958 ms / 100) 1.909 -> 1.909 ( +0.00%) [ +0.31% +0.10% +0.00% / +0.00% +0.68% +0.89%] index_copy_ strided 7 : Elapsed 0.019 ms (1.915 ms / 100) 1.952 -> 1.949 ( -0.15%) [ +0.05% +0.00% +0.05% / -0.15% +0.10% +0.31%] index_add_ perm : Elapsed 0.020 ms (1.953 ms / 100) 1.902 -> 1.907 ( +0.26%) [ +0.00% +0.21% +0.11% / +0.26% +0.26% +0.58%] index_copy_ perm : Elapsed 0.019 ms (1.902 ms / 100) 1.959 -> 1.958 ( -0.05%) [ +0.00% +0.10% +0.00% / -0.05% +0.10% +0.00%] index_add_ perm_sorted : Elapsed 0.020 ms (1.959 ms / 100) 1.904 -> 1.909 ( +0.26%) [ +0.11% +0.11% +0.00% / +0.42% +0.26% +0.42%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.906 ms / 100) 8.662 -> 8.662 ( +0.00%) [ +0.07% +0.05% +0.00% / +0.23% +0.24% +0.00%] index_select const : Elapsed 0.087 ms (8.668 ms / 100) 8.721 -> 8.709 ( -0.14%) [ +0.00% +0.02% +0.28% / +0.09% -0.14% +0.00%] index_select wrap : Elapsed 0.087 ms (8.721 ms / 100) 8.697 -> 8.697 ( +0.00%) [ +0.15% +0.01% +0.00% / +0.28% +0.01% +0.00%] index_select linear : Elapsed 0.087 ms (8.710 ms / 100) 8.682 -> 8.682 ( +0.00%) [ +0.41% +0.00% +0.12% / +0.30% +0.16% +0.00%] index_select reverse : Elapsed 0.087 ms (8.718 ms / 100) 8.674 -> 8.677 ( +0.03%) [ +0.01% +0.18% +0.00% / +0.03% +0.20% +0.22%] index_select skip64 : Elapsed 0.087 ms (8.675 ms / 100) 8.677 -> 8.665 ( -0.14%) [ +0.06% +0.03% +0.00% / -0.09% +0.12% -0.14%] index_select skip256 : Elapsed 0.087 ms (8.682 ms / 100) 8.697 -> 8.696 ( -0.01%) [ +0.00% +0.26% +0.22% / +0.13% -0.01% +0.08%] index_select spread : Elapsed 0.087 ms (8.697 ms / 100) 8.715 -> 8.712 ( -0.03%) [ +0.25% +0.36% +0.00% / -0.01% +0.00% -0.03%] index_select strided 3 : Elapsed 0.087 ms (8.737 ms / 100) 8.716 -> 8.725 ( +0.10%) [ +0.02% +0.00% +0.25% / +0.11% +0.13% +0.10%] index_select random : Elapsed 0.087 ms (8.718 ms / 100) 8.713 -> 8.708 ( -0.06%) [ +0.00% +0.14% +0.00% / +0.03% +0.11% -0.06%] index_select random_sorted : Elapsed 0.087 ms (8.713 ms / 100) B = [5, 40, 20, 16] (stride (40, 1, 3200, 200)) A = [5, 40, 4, 16] (stride (1, 320, 80, 5)) dim = 2 1.967 -> 1.970 ( +0.15%) [ +0.31% +0.31% +0.00% / +0.15% +0.20% +0.20%] index_add_ linear : Elapsed 0.020 ms (1.973 ms / 100) 1.927 -> 1.924 ( -0.16%) [ +0.31% +0.10% +0.00% / +0.21% -0.16% +0.26%] index_copy_ linear : Elapsed 0.019 ms (1.933 ms / 100) 1.961 -> 1.961 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.36% +0.25%] index_add_ reverse : Elapsed 0.020 ms (1.962 ms / 100) 1.917 -> 1.918 ( +0.05%) [ +0.21% +0.05% +0.00% / +0.05% +0.37% +0.47%] index_copy_ reverse : Elapsed 0.019 ms (1.921 ms / 100) 1.952 -> 1.956 ( +0.20%) [ +0.31% +0.20% +0.00% / +0.20% +0.51% +0.67%] index_add_ spread : Elapsed 0.020 ms (1.958 ms / 100) 1.909 -> 1.911 ( +0.10%) [ +0.16% +0.10% +0.00% / +0.10% +0.26% +0.68%] index_copy_ spread : Elapsed 0.019 ms (1.912 ms / 100) 1.976 -> 1.973 ( -0.15%) [ +0.05% +0.00% +0.00% / +0.05% -0.15% -0.10%] index_add_ strided 3 : Elapsed 0.020 ms (1.977 ms / 100) 1.927 -> 1.934 ( +0.36%) [ +0.42% +0.21% +0.00% / +0.42% +0.62% +0.36%] index_copy_ strided 3 : Elapsed 0.019 ms (1.935 ms / 100) 1.963 -> 1.963 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.41% +0.66%] index_add_ strided 7 : Elapsed 0.020 ms (1.966 ms / 100) 1.917 -> 1.916 ( -0.05%) [ +0.00% +0.10% +0.10% / -0.05% +0.63% +0.68%] index_copy_ strided 7 : Elapsed 0.019 ms (1.917 ms / 100) 1.962 -> 1.969 ( +0.36%) [ +0.00% +0.20% +0.10% / +0.36% +0.56% +0.71%] index_add_ perm : Elapsed 0.020 ms (1.962 ms / 100) 1.916 -> 1.924 ( +0.42%) [ +0.37% +0.21% +0.00% / +0.42% +0.89% +0.84%] index_copy_ perm : Elapsed 0.019 ms (1.923 ms / 100) 1.958 -> 1.958 ( +0.00%) [ +0.05% +0.15% +0.00% / +0.00% +0.46% +0.41%] index_add_ perm_sorted : Elapsed 0.020 ms (1.959 ms / 100) 1.911 -> 1.913 ( +0.10%) [ +0.00% +0.16% +0.26% / +0.10% +0.73% +1.10%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.911 ms / 100) 8.703 -> 8.698 ( -0.06%) [ +0.08% +0.22% +0.00% / -0.06% +0.15% +0.01%] index_select const : Elapsed 0.087 ms (8.710 ms / 100) 8.730 -> 8.733 ( +0.03%) [ +0.00% +0.40% +0.06% / +0.03% +0.45% +0.48%] index_select wrap : Elapsed 0.087 ms (8.730 ms / 100) 8.726 -> 8.732 ( +0.07%) [ +0.00% +0.16% +0.07% / +0.36% +0.07% +0.32%] index_select linear : Elapsed 0.087 ms (8.726 ms / 100) 8.727 -> 8.728 ( +0.01%) [ +0.00% +0.02% +0.17% / +0.01% +0.29% +0.13%] index_select reverse : Elapsed 0.087 ms (8.727 ms / 100) 8.704 -> 8.697 ( -0.08%) [ +0.06% +0.03% +0.00% / -0.03% -0.08% +0.18%] index_select skip64 : Elapsed 0.087 ms (8.709 ms / 100) 8.695 -> 8.704 ( +0.10%) [ +0.21% +0.00% +0.08% / +0.10% +0.32% +0.29%] index_select skip256 : Elapsed 0.087 ms (8.713 ms / 100) 8.730 -> 8.738 ( +0.09%) [ +0.00% +0.01% +0.06% / +0.09% +0.23% +0.22%] index_select spread : Elapsed 0.087 ms (8.730 ms / 100) 8.746 -> 8.753 ( +0.08%) [ +0.09% +0.07% +0.00% / +0.21% +0.38% +0.08%] index_select strided 3 : Elapsed 0.088 ms (8.754 ms / 100) 8.750 -> 8.732 ( -0.21%) [ +0.00% +0.02% +0.06% / -0.21% -0.02% +0.08%] index_select random : Elapsed 0.088 ms (8.750 ms / 100) 8.732 -> 8.742 ( +0.11%) [ +0.14% +0.00% +0.08% / +0.11% +0.16% +0.29%] index_select random_sorted : Elapsed 0.087 ms (8.744 ms / 100) B = [5, 40, 20, 16] (stride (1, 5, 3200, 200)) A = [5, 40, 4, 16] (stride (2560, 1, 640, 40)) dim = 2 2.151 -> 2.150 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.70% +0.74%] index_add_ linear : Elapsed 0.022 ms (2.151 ms / 100) 2.108 -> 2.107 ( -0.05%) [ +0.09% +0.00% +0.05% / -0.05% +1.38% +0.95%] index_copy_ linear : Elapsed 0.021 ms (2.110 ms / 100) 2.144 -> 2.145 ( +0.05%) [ +0.19% +0.19% +0.00% / +0.05% +0.98% +1.03%] index_add_ reverse : Elapsed 0.021 ms (2.148 ms / 100) 2.102 -> 2.109 ( +0.33%) [ +0.00% +0.00% +0.38% / +0.33% +1.24% +1.14%] index_copy_ reverse : Elapsed 0.021 ms (2.102 ms / 100) 2.155 -> 2.158 ( +0.14%) [ +0.14% +0.05% +0.00% / +0.14% +0.97% +0.70%] index_add_ spread : Elapsed 0.022 ms (2.158 ms / 100) 2.107 -> 2.108 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +1.04% +1.04%] index_copy_ spread : Elapsed 0.021 ms (2.108 ms / 100) 2.151 -> 2.153 ( +0.09%) [ +0.09% +0.05% +0.00% / +0.09% +0.70% +0.51%] index_add_ strided 3 : Elapsed 0.022 ms (2.153 ms / 100) 2.102 -> 2.107 ( +0.24%) [ +0.00% +0.33% +0.10% / +0.24% +1.14% +1.19%] index_copy_ strided 3 : Elapsed 0.021 ms (2.102 ms / 100) 2.155 -> 2.154 ( -0.05%) [ +0.09% +0.00% +0.00% / -0.05% +0.84% +0.88%] index_add_ strided 7 : Elapsed 0.022 ms (2.157 ms / 100) 2.111 -> 2.114 ( +0.14%) [ +0.00% +0.00% +0.14% / +0.14% +0.95% +0.66%] index_copy_ strided 7 : Elapsed 0.021 ms (2.111 ms / 100) 2.160 -> 2.156 ( -0.19%) [ +0.05% +0.00% +0.14% / -0.19% +0.14% +0.09%] index_add_ perm : Elapsed 0.022 ms (2.161 ms / 100) 2.112 -> 2.113 ( +0.05%) [ +0.00% +0.00% +0.19% / +0.05% +0.43% +0.33%] index_copy_ perm : Elapsed 0.021 ms (2.112 ms / 100) 2.154 -> 2.152 ( -0.09%) [ +0.05% +0.00% +0.05% / +0.09% +0.09% -0.09%] index_add_ perm_sorted : Elapsed 0.022 ms (2.155 ms / 100) 2.118 -> 2.117 ( -0.05%) [ +0.00% +0.09% +0.09% / +0.05% +0.19% -0.05%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.118 ms / 100) 9.165 -> 9.171 ( +0.07%) [ +0.13% +0.00% +0.21% / +0.28% +0.07% +0.17%] index_select const : Elapsed 0.092 ms (9.177 ms / 100) 9.221 -> 9.228 ( +0.08%) [ +0.10% +0.13% +0.00% / +0.08% +0.08% +0.14%] index_select wrap : Elapsed 0.092 ms (9.230 ms / 100) 9.203 -> 9.204 ( +0.01%) [ +0.00% +0.10% +0.14% / +0.10% +0.01% +0.12%] index_select linear : Elapsed 0.092 ms (9.203 ms / 100) 9.181 -> 9.191 ( +0.11%) [ +0.08% +0.00% +0.10% / +0.15% +0.11% +0.11%] index_select reverse : Elapsed 0.092 ms (9.188 ms / 100) 9.171 -> 9.174 ( +0.03%) [ +0.10% +0.08% +0.00% / +0.03% +0.05% +0.08%] index_select skip64 : Elapsed 0.092 ms (9.180 ms / 100) 9.164 -> 9.171 ( +0.08%) [ +0.14% +0.02% +0.00% / +0.08% +0.17% +0.22%] index_select skip256 : Elapsed 0.092 ms (9.177 ms / 100) 9.213 -> 9.211 ( -0.02%) [ +0.20% +0.09% +0.00% / -0.02% +0.14% +0.04%] index_select spread : Elapsed 0.092 ms (9.231 ms / 100) 9.230 -> 9.233 ( +0.03%) [ +0.07% +0.00% +0.09% / +0.20% +0.21% +0.03%] index_select strided 3 : Elapsed 0.092 ms (9.236 ms / 100) 9.222 -> 9.201 ( -0.23%) [ +0.05% +0.00% +0.01% / -0.23% -0.02% -0.05%] index_select random : Elapsed 0.092 ms (9.227 ms / 100) 9.209 -> 9.208 ( -0.01%) [ +0.01% +0.00% +0.09% / +0.05% +0.09% -0.01%] index_select random_sorted : Elapsed 0.092 ms (9.210 ms / 100) B = [5, 40, 20, 16] (stride (800, 1, 40, 4000)) A = [5, 40, 4, 16] (stride (640, 1, 3200, 40)) dim = 2 2.162 -> 2.161 ( -0.05%) [ +0.14% +0.00% +0.05% / -0.05% +0.32% +0.28%] index_add_ linear : Elapsed 0.022 ms (2.165 ms / 100) 2.114 -> 2.110 ( -0.19%) [ +0.00% +0.00% +0.05% / +0.14% -0.19% +0.00%] index_copy_ linear : Elapsed 0.021 ms (2.114 ms / 100) 2.153 -> 2.156 ( +0.14%) [ +0.14% +0.23% +0.00% / +0.14% +0.56% +0.37%] index_add_ reverse : Elapsed 0.022 ms (2.156 ms / 100) 2.108 -> 2.110 ( +0.09%) [ +0.19% +0.00% +0.19% / +0.09% +0.09% +0.19%] index_copy_ reverse : Elapsed 0.021 ms (2.112 ms / 100) 2.153 -> 2.152 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.74% +0.42%] index_add_ spread : Elapsed 0.022 ms (2.153 ms / 100) 2.111 -> 2.112 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.33% +0.33%] index_copy_ spread : Elapsed 0.021 ms (2.113 ms / 100) 2.158 -> 2.161 ( +0.14%) [ +0.00% +0.19% +0.05% / +0.14% +0.56% +0.37%] index_add_ strided 3 : Elapsed 0.022 ms (2.158 ms / 100) 2.112 -> 2.115 ( +0.14%) [ +0.05% +0.00% +0.00% / +0.19% +0.14% +0.24%] index_copy_ strided 3 : Elapsed 0.021 ms (2.113 ms / 100) 2.172 -> 2.165 ( -0.32%) [ +0.18% +0.09% +0.00% / -0.23% -0.23% -0.32%] index_add_ strided 7 : Elapsed 0.022 ms (2.176 ms / 100) 2.123 -> 2.114 ( -0.42%) [ +0.00% +0.14% +0.00% / -0.05% -0.42% -0.28%] index_copy_ strided 7 : Elapsed 0.021 ms (2.123 ms / 100) 2.162 -> 2.163 ( +0.05%) [ +0.46% +0.00% +0.00% / +0.05% +1.57% +0.23%] index_add_ perm : Elapsed 0.022 ms (2.172 ms / 100) 2.113 -> 2.112 ( -0.05%) [ +0.19% +0.00% +0.09% / +0.19% +0.33% -0.05%] index_copy_ perm : Elapsed 0.021 ms (2.117 ms / 100) 2.170 -> 2.161 ( -0.41%) [ +0.09% +0.00% +0.05% / -0.09% -0.41% -0.14%] index_add_ perm_sorted : Elapsed 0.022 ms (2.172 ms / 100) 2.120 -> 2.112 ( -0.38%) [ +0.00% +0.19% +0.05% / +0.05% -0.24% -0.38%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.120 ms / 100) 9.153 -> 9.155 ( +0.02%) [ +0.00% +0.17% +0.03% / +0.13% +0.02% +0.09%] index_select const : Elapsed 0.092 ms (9.153 ms / 100) 9.214 -> 9.208 ( -0.07%) [ +0.12% +0.02% +0.00% / -0.02% -0.01% -0.07%] index_select wrap : Elapsed 0.092 ms (9.225 ms / 100) 9.196 -> 9.189 ( -0.08%) [ +0.16% +0.00% +0.03% / -0.08% -0.01% +0.11%] index_select linear : Elapsed 0.092 ms (9.211 ms / 100) 9.180 -> 9.194 ( +0.15%) [ +0.11% +0.29% +0.00% / +0.15% +0.34% +0.28%] index_select reverse : Elapsed 0.092 ms (9.190 ms / 100) 9.161 -> 9.177 ( +0.17%) [ +0.00% +0.11% +0.17% / +0.25% +0.21% +0.17%] index_select skip64 : Elapsed 0.092 ms (9.161 ms / 100) 9.156 -> 9.160 ( +0.04%) [ +0.00% +0.04% +0.07% / +0.10% +0.04% +0.13%] index_select skip256 : Elapsed 0.092 ms (9.156 ms / 100) 9.197 -> 9.190 ( -0.08%) [ +0.22% +0.23% +0.00% / +0.13% -0.08% +0.09%] index_select spread : Elapsed 0.092 ms (9.217 ms / 100) 9.206 -> 9.209 ( +0.03%) [ +0.29% +0.00% +0.25% / +0.17% +0.07% +0.03%] index_select strided 3 : Elapsed 0.092 ms (9.233 ms / 100) 9.204 -> 9.210 ( +0.07%) [ +0.00% +0.20% +0.11% / +0.14% +0.07% +0.08%] index_select random : Elapsed 0.092 ms (9.204 ms / 100) 9.204 -> 9.189 ( -0.16%) [ +0.00% +0.15% +0.11% / -0.12% -0.13% -0.16%] index_select random_sorted : Elapsed 0.092 ms (9.204 ms / 100) out_shape = [5, 40, 4, 20] in_shape = [5, 40, 4, 16] idx_dim = 3 B = [5, 40, 4, 20] (stride (3200, 80, 20, 1)) A = [5, 40, 4, 16] (stride (16, 320, 80, 1)) dim = 3 3.416 -> 3.413 ( -0.09%) [ +0.00% +0.15% +0.09% / -0.09% +0.91% +0.70%] index_add_ linear : Elapsed 0.034 ms (3.416 ms / 100) 3.229 -> 3.233 ( +0.12%) [ +0.00% +0.19% +0.03% / +0.12% +0.84% +0.87%] index_copy_ linear : Elapsed 0.032 ms (3.229 ms / 100) 3.425 -> 3.412 ( -0.38%) [ +0.15% +0.00% +0.03% / -0.38% +0.76% +0.47%] index_add_ reverse : Elapsed 0.034 ms (3.430 ms / 100) 3.230 -> 3.230 ( +0.00%) [ +0.12% +0.00% +0.09% / +0.00% +0.93% +0.80%] index_copy_ reverse : Elapsed 0.032 ms (3.234 ms / 100) 3.397 -> 3.402 ( +0.15%) [ +0.21% +0.18% +0.00% / +0.15% +0.77% +0.74%] index_add_ spread : Elapsed 0.034 ms (3.404 ms / 100) 3.220 -> 3.219 ( -0.03%) [ +0.28% +0.12% +0.00% / -0.03% +0.71% +0.78%] index_copy_ spread : Elapsed 0.032 ms (3.229 ms / 100) 3.414 -> 3.418 ( +0.12%) [ +0.29% +0.29% +0.00% / +0.12% +0.85% +1.00%] index_add_ strided 3 : Elapsed 0.034 ms (3.424 ms / 100) 3.236 -> 3.235 ( -0.03%) [ +0.12% +0.00% +0.06% / -0.03% +0.74% +0.80%] index_copy_ strided 3 : Elapsed 0.032 ms (3.240 ms / 100) 3.416 -> 3.427 ( +0.32%) [ +0.18% +0.12% +0.00% / +0.32% +0.88% +0.61%] index_add_ strided 7 : Elapsed 0.034 ms (3.422 ms / 100) 3.238 -> 3.243 ( +0.15%) [ +0.00% +0.03% +0.09% / +0.15% +0.71% +0.56%] index_copy_ strided 7 : Elapsed 0.032 ms (3.238 ms / 100) 3.402 -> 3.404 ( +0.06%) [ +0.12% +0.00% +0.09% / +0.06% +0.73% +0.65%] index_add_ perm : Elapsed 0.034 ms (3.406 ms / 100) 3.225 -> 3.227 ( +0.06%) [ +0.12% +0.03% +0.00% / +0.06% +0.87% +0.50%] index_copy_ perm : Elapsed 0.032 ms (3.229 ms / 100) 3.414 -> 3.429 ( +0.44%) [ +0.00% +0.26% +0.26% / +0.44% +0.94% +0.88%] index_add_ perm_sorted : Elapsed 0.034 ms (3.414 ms / 100) 3.230 -> 3.236 ( +0.19%) [ +0.09% +0.00% +0.19% / +0.19% +0.71% +0.71%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.233 ms / 100) 5.309 -> 5.306 ( -0.06%) [ +0.08% +0.13% +0.00% / -0.06% +0.13% +0.09%] index_select const : Elapsed 0.053 ms (5.313 ms / 100) 5.323 -> 5.314 ( -0.17%) [ +0.00% +0.02% +0.00% / -0.13% -0.17% -0.04%] index_select wrap : Elapsed 0.053 ms (5.323 ms / 100) 5.320 -> 5.322 ( +0.04%) [ +0.09% +0.00% +0.13% / +0.04% +0.13% +0.15%] index_select linear : Elapsed 0.053 ms (5.325 ms / 100) 5.318 -> 5.317 ( -0.02%) [ +0.11% +0.00% +0.21% / -0.02% +0.04% +0.11%] index_select reverse : Elapsed 0.053 ms (5.324 ms / 100) 5.310 -> 5.311 ( +0.02%) [ +0.00% +0.00% +0.19% / +0.02% +0.15% +0.11%] index_select skip64 : Elapsed 0.053 ms (5.310 ms / 100) 5.304 -> 5.316 ( +0.23%) [ +0.11% +0.00% +0.17% / +0.23% +0.23% +0.32%] index_select skip256 : Elapsed 0.053 ms (5.310 ms / 100) 5.321 -> 5.318 ( -0.06%) [ +0.02% +0.00% +0.17% / +0.11% -0.04% -0.06%] index_select spread : Elapsed 0.053 ms (5.322 ms / 100) 5.323 -> 5.315 ( -0.15%) [ +0.09% +0.00% +0.08% / +0.04% -0.09% -0.15%] index_select strided 3 : Elapsed 0.053 ms (5.328 ms / 100) 5.321 -> 5.319 ( -0.04%) [ +0.15% +0.00% +0.02% / +0.06% -0.04% +0.11%] index_select strided 5 : Elapsed 0.053 ms (5.329 ms / 100) 5.321 -> 5.316 ( -0.09%) [ +0.06% +0.02% +0.00% / +0.09% -0.09% +0.00%] index_select strided 7 : Elapsed 0.053 ms (5.324 ms / 100) 5.318 -> 5.320 ( +0.04%) [ +0.15% +0.00% +0.06% / +0.32% +0.04% +0.17%] index_select strided 8 : Elapsed 0.053 ms (5.326 ms / 100) 5.317 -> 5.316 ( -0.02%) [ +0.15% +0.00% +0.11% / +0.17% -0.02% +0.00%] index_select random : Elapsed 0.053 ms (5.325 ms / 100) 5.317 -> 5.322 ( +0.09%) [ +0.11% +0.00% +0.09% / +0.09% +0.19% +0.19%] index_select random_sorted : Elapsed 0.053 ms (5.323 ms / 100) B = [5, 40, 4, 20] (stride (3200, 80, 20, 1)) A = [5, 40, 4, 16] (stride (1, 5, 3200, 200)) dim = 3 3.445 -> 3.445 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.81% +0.75%] index_add_ linear : Elapsed 0.034 ms (3.445 ms / 100) 3.308 -> 3.309 ( +0.03%) [ +0.12% +0.00% +0.03% / +0.03% +0.94% +0.82%] index_copy_ linear : Elapsed 0.033 ms (3.312 ms / 100) 3.445 -> 3.445 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.78% +0.73%] index_add_ reverse : Elapsed 0.034 ms (3.446 ms / 100) 3.317 -> 3.319 ( +0.06%) [ +0.00% +0.09% +0.06% / +0.06% +0.81% +0.75%] index_copy_ reverse : Elapsed 0.033 ms (3.317 ms / 100) 3.443 -> 3.443 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.78% +0.76%] index_add_ spread : Elapsed 0.034 ms (3.443 ms / 100) 3.303 -> 3.304 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.88% +0.88%] index_copy_ spread : Elapsed 0.033 ms (3.303 ms / 100) 3.444 -> 3.448 ( +0.12%) [ +0.09% +0.12% +0.00% / +0.12% +0.73% +0.73%] index_add_ strided 3 : Elapsed 0.034 ms (3.447 ms / 100) 3.311 -> 3.312 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.85% +0.82%] index_copy_ strided 3 : Elapsed 0.033 ms (3.312 ms / 100) 3.444 -> 3.446 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.75% +0.70%] index_add_ strided 7 : Elapsed 0.034 ms (3.444 ms / 100) 3.318 -> 3.320 ( +0.06%) [ +0.00% +0.09% +0.06% / +0.06% +0.72% +0.69%] index_copy_ strided 7 : Elapsed 0.033 ms (3.318 ms / 100) 3.447 -> 3.447 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.75%] index_add_ perm : Elapsed 0.034 ms (3.447 ms / 100) 3.307 -> 3.310 ( +0.09%) [ +0.06% +0.06% +0.00% / +0.09% +0.91% +0.79%] index_copy_ perm : Elapsed 0.033 ms (3.309 ms / 100) 3.445 -> 3.447 ( +0.06%) [ +0.09% +0.09% +0.00% / +0.06% +0.78% +0.78%] index_add_ perm_sorted : Elapsed 0.034 ms (3.448 ms / 100) 3.309 -> 3.309 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.91% +0.82%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.311 ms / 100) 5.380 -> 5.377 ( -0.06%) [ +0.09% +0.07% +0.00% / +0.24% +0.02% -0.06%] index_select const : Elapsed 0.054 ms (5.385 ms / 100) 5.392 -> 5.391 ( -0.02%) [ +0.00% +0.06% +0.09% / -0.02% +0.00% +0.11%] index_select wrap : Elapsed 0.054 ms (5.392 ms / 100) 5.390 -> 5.392 ( +0.04%) [ +0.02% +0.00% +0.11% / +0.04% +0.11% +0.19%] index_select linear : Elapsed 0.054 ms (5.391 ms / 100) 5.386 -> 5.386 ( +0.00%) [ +0.11% +0.00% +0.09% / +0.00% +0.20% +0.20%] index_select reverse : Elapsed 0.054 ms (5.392 ms / 100) 5.378 -> 5.381 ( +0.06%) [ +0.13% +0.00% +0.07% / +0.07% +0.06% +0.13%] index_select skip64 : Elapsed 0.054 ms (5.385 ms / 100) 5.379 -> 5.383 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.09% +0.11% +0.07%] index_select skip256 : Elapsed 0.054 ms (5.386 ms / 100) 5.389 -> 5.393 ( +0.07%) [ +0.00% +0.11% +0.15% / +0.07% +0.11% +0.17%] index_select spread : Elapsed 0.054 ms (5.389 ms / 100) 5.392 -> 5.384 ( -0.15%) [ +0.00% +0.19% +0.06% / -0.15% +0.07% +0.09%] index_select strided 3 : Elapsed 0.054 ms (5.392 ms / 100) 5.387 -> 5.393 ( +0.11%) [ +0.13% +0.00% +0.07% / +0.11% +0.13% +0.22%] index_select strided 5 : Elapsed 0.054 ms (5.394 ms / 100) 5.388 -> 5.395 ( +0.13%) [ +0.00% +0.09% +0.00% / +0.13% +0.15% +0.22%] index_select strided 7 : Elapsed 0.054 ms (5.388 ms / 100) 5.378 -> 5.379 ( +0.02%) [ +0.17% +0.07% +0.00% / +0.04% +0.02% +0.19%] index_select strided 8 : Elapsed 0.054 ms (5.387 ms / 100) 5.385 -> 5.389 ( +0.07%) [ +0.09% +0.07% +0.00% / +0.07% +0.15% +0.09%] index_select random : Elapsed 0.054 ms (5.390 ms / 100) 5.383 -> 5.385 ( +0.04%) [ +0.09% +0.00% +0.11% / +0.06% +0.17% +0.04%] index_select random_sorted : Elapsed 0.054 ms (5.388 ms / 100) B = [5, 40, 4, 20] (stride (3200, 1, 40, 160)) A = [5, 40, 4, 16] (stride (1, 320, 80, 5)) dim = 3 4.110 -> 4.112 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.78% +0.68%] index_add_ linear : Elapsed 0.041 ms (4.112 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.00% +0.03% +0.08% / +0.00% +0.76% +0.64%] index_copy_ linear : Elapsed 0.039 ms (3.932 ms / 100) 4.119 -> 4.119 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.53% +0.44%] index_add_ reverse : Elapsed 0.041 ms (4.120 ms / 100) 3.941 -> 3.942 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.53% +0.63%] index_copy_ reverse : Elapsed 0.039 ms (3.941 ms / 100) 4.099 -> 4.103 ( +0.10%) [ +0.00% +0.10% +0.07% / +0.10% +0.76% +0.81%] index_add_ spread : Elapsed 0.041 ms (4.099 ms / 100) 3.924 -> 3.928 ( +0.10%) [ +0.00% +0.05% +0.03% / +0.10% +0.99% +0.76%] index_copy_ spread : Elapsed 0.039 ms (3.924 ms / 100) 4.093 -> 4.098 ( +0.12%) [ +0.07% +0.10% +0.00% / +0.12% +0.81% +0.78%] index_add_ strided 3 : Elapsed 0.041 ms (4.096 ms / 100) 3.923 -> 3.929 ( +0.15%) [ +0.03% +0.03% +0.00% / +0.15% +0.76% +0.79%] index_copy_ strided 3 : Elapsed 0.039 ms (3.924 ms / 100) 4.096 -> 4.099 ( +0.07%) [ +0.02% +0.00% +0.00% / +0.07% +0.68% +0.66%] index_add_ strided 7 : Elapsed 0.041 ms (4.097 ms / 100) 3.922 -> 3.926 ( +0.10%) [ +0.10% +0.08% +0.00% / +0.10% +0.82% +0.82%] index_copy_ strided 7 : Elapsed 0.039 ms (3.926 ms / 100) 4.103 -> 4.107 ( +0.10%) [ +0.07% +0.17% +0.00% / +0.10% +0.49% +0.68%] index_add_ perm : Elapsed 0.041 ms (4.106 ms / 100) 3.933 -> 3.932 ( -0.03%) [ +0.00% +0.08% +0.08% / -0.03% +0.51% +0.58%] index_copy_ perm : Elapsed 0.039 ms (3.933 ms / 100) 4.115 -> 4.114 ( -0.02%) [ +0.10% +0.10% +0.00% / -0.02% +0.75% +0.63%] index_add_ perm_sorted : Elapsed 0.041 ms (4.119 ms / 100) 3.941 -> 3.940 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.79% +0.61%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.942 ms / 100) 5.488 -> 5.481 ( -0.13%) [ +0.07% +0.05% +0.00% / -0.05% -0.13% +0.05%] index_select const : Elapsed 0.055 ms (5.492 ms / 100) 5.496 -> 5.498 ( +0.04%) [ +0.09% +0.13% +0.00% / +0.18% +0.13% +0.04%] index_select wrap : Elapsed 0.055 ms (5.501 ms / 100) 5.499 -> 5.496 ( -0.05%) [ +0.00% +0.05% +0.04% / -0.05% +0.02% +0.05%] index_select linear : Elapsed 0.055 ms (5.499 ms / 100) 5.492 -> 5.490 ( -0.04%) [ +0.00% +0.18% +0.24% / +0.11% +0.13% -0.04%] index_select reverse : Elapsed 0.055 ms (5.492 ms / 100) 5.480 -> 5.481 ( +0.02%) [ +0.05% +0.09% +0.00% / +0.02% +0.16% +0.09%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.487 -> 5.480 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% -0.02% +0.18%] index_select skip256 : Elapsed 0.055 ms (5.487 ms / 100) 5.501 -> 5.491 ( -0.18%) [ +0.02% +0.00% +0.02% / -0.04% -0.04% -0.18%] index_select spread : Elapsed 0.055 ms (5.502 ms / 100) 5.502 -> 5.493 ( -0.16%) [ +0.07% +0.00% +0.04% / -0.11% -0.16% -0.11%] index_select strided 3 : Elapsed 0.055 ms (5.506 ms / 100) 5.497 -> 5.499 ( +0.04%) [ +0.00% +0.27% +0.13% / +0.11% +0.04% +0.13%] index_select strided 5 : Elapsed 0.055 ms (5.497 ms / 100) 5.498 -> 5.498 ( +0.00%) [ +0.00% +0.04% +0.02% / +0.13% +0.00% +0.04%] index_select strided 7 : Elapsed 0.055 ms (5.498 ms / 100) 5.476 -> 5.484 ( +0.15%) [ +0.15% +0.20% +0.00% / +0.15% +0.16% +0.16%] index_select strided 8 : Elapsed 0.055 ms (5.484 ms / 100) 5.496 -> 5.490 ( -0.11%) [ +0.05% +0.04% +0.00% / -0.11% +0.02% +0.07%] index_select random : Elapsed 0.055 ms (5.499 ms / 100) 5.496 -> 5.493 ( -0.05%) [ +0.16% +0.00% +0.09% / +0.07% -0.05% +0.02%] index_select random_sorted : Elapsed 0.055 ms (5.505 ms / 100) B = [5, 40, 4, 20] (stride (4, 400, 1, 20)) A = [5, 40, 4, 16] (stride (640, 1, 3200, 40)) dim = 3 3.952 -> 3.954 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.73% +0.76%] index_add_ linear : Elapsed 0.040 ms (3.954 ms / 100) 3.826 -> 3.827 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.78% +0.73%] index_copy_ linear : Elapsed 0.038 ms (3.827 ms / 100) 3.966 -> 3.967 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.93% +0.78%] index_add_ reverse : Elapsed 0.040 ms (3.967 ms / 100) 3.836 -> 3.840 ( +0.10%) [ +0.00% +0.00% +0.08% / +0.10% +0.89% +0.76%] index_copy_ reverse : Elapsed 0.038 ms (3.836 ms / 100) 3.968 -> 3.969 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.86% +0.76%] index_add_ spread : Elapsed 0.040 ms (3.971 ms / 100) 3.835 -> 3.843 ( +0.21%) [ +0.18% +0.05% +0.00% / +0.21% +0.94% +0.83%] index_copy_ spread : Elapsed 0.038 ms (3.842 ms / 100) 3.958 -> 3.964 ( +0.15%) [ +0.15% +0.03% +0.00% / +0.15% +0.83% +0.91%] index_add_ strided 3 : Elapsed 0.040 ms (3.964 ms / 100) 3.829 -> 3.834 ( +0.13%) [ +0.13% +0.05% +0.00% / +0.13% +0.86% +0.89%] index_copy_ strided 3 : Elapsed 0.038 ms (3.834 ms / 100) 3.962 -> 3.968 ( +0.15%) [ +0.03% +0.13% +0.00% / +0.15% +0.73% +0.76%] index_add_ strided 7 : Elapsed 0.040 ms (3.963 ms / 100) 3.831 -> 3.836 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.91% +0.84%] index_copy_ strided 7 : Elapsed 0.038 ms (3.836 ms / 100) 3.954 -> 3.954 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.71% +0.73%] index_add_ perm : Elapsed 0.040 ms (3.955 ms / 100) 3.825 -> 3.826 ( +0.03%) [ +0.08% +0.10% +0.00% / +0.03% +0.78% +0.84%] index_copy_ perm : Elapsed 0.038 ms (3.828 ms / 100) 3.962 -> 3.967 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.63% +0.63%] index_add_ perm_sorted : Elapsed 0.040 ms (3.967 ms / 100) 3.834 -> 3.836 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.63% +0.60%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.835 ms / 100) 5.558 -> 5.559 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.56% +0.23%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.578 -> 5.583 ( +0.09%) [ +0.02% +0.00% +0.05% / +0.13% +0.18% +0.09%] index_select wrap : Elapsed 0.056 ms (5.579 ms / 100) 5.582 -> 5.579 ( -0.05%) [ +0.00% +0.07% +0.07% / -0.05% +0.04% +0.05%] index_select linear : Elapsed 0.056 ms (5.582 ms / 100) 5.580 -> 5.587 ( +0.13%) [ +0.00% +0.07% +0.20% / +0.13% +0.13% +0.23%] index_select reverse : Elapsed 0.056 ms (5.580 ms / 100) 5.553 -> 5.552 ( -0.02%) [ +0.09% +0.00% +0.09% / -0.02% +0.13% +0.20%] index_select skip64 : Elapsed 0.056 ms (5.558 ms / 100) 5.555 -> 5.558 ( +0.05%) [ +0.11% +0.13% +0.00% / +0.07% +0.05% +0.18%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.576 -> 5.576 ( +0.00%) [ +0.11% +0.00% +0.09% / +0.00% +0.16% +0.14%] index_select spread : Elapsed 0.056 ms (5.582 ms / 100) 5.577 -> 5.580 ( +0.05%) [ +0.00% +0.11% +0.16% / +0.05% +0.20% +0.22%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.581 -> 5.589 ( +0.14%) [ +0.00% +0.13% +0.00% / +0.14% +0.18% +0.16%] index_select strided 5 : Elapsed 0.056 ms (5.581 ms / 100) 5.575 -> 5.586 ( +0.20%) [ +0.20% +0.00% +0.07% / +0.20% +0.20% +0.22%] index_select strided 7 : Elapsed 0.056 ms (5.586 ms / 100) 5.558 -> 5.565 ( +0.13%) [ +0.05% +0.00% +0.09% / +0.13% +0.18% +0.23%] index_select strided 8 : Elapsed 0.056 ms (5.561 ms / 100) 5.577 -> 5.580 ( +0.05%) [ +0.02% +0.09% +0.00% / +0.11% +0.22% +0.05%] index_select random : Elapsed 0.056 ms (5.578 ms / 100) 5.570 -> 5.574 ( +0.07%) [ +0.13% +0.00% +0.18% / +0.07% +0.34% +0.29%] index_select random_sorted : Elapsed 0.056 ms (5.577 ms / 100) B = [5, 40, 4, 20] (stride (20, 100, 4000, 1)) A = [5, 40, 4, 16] (stride (16, 80, 3200, 1)) dim = 3 4.461 -> 4.466 ( +0.11%) [ +0.16% +0.09% +0.00% / +0.11% +0.76% +0.74%] index_add_ linear : Elapsed 0.045 ms (4.468 ms / 100) 4.286 -> 4.285 ( -0.02%) [ +0.02% +0.00% +0.05% / -0.02% +0.70% +0.75%] index_copy_ linear : Elapsed 0.043 ms (4.287 ms / 100) 4.445 -> 4.451 ( +0.13%) [ +0.00% +0.11% +0.04% / +0.13% +0.67% +0.83%] index_add_ reverse : Elapsed 0.044 ms (4.445 ms / 100) 4.279 -> 4.281 ( +0.05%) [ +0.00% +0.02% +0.00% / +0.05% +0.96% +0.79%] index_copy_ reverse : Elapsed 0.043 ms (4.279 ms / 100) 4.445 -> 4.452 ( +0.16%) [ +0.00% +0.00% +0.16% / +0.16% +0.88% +0.49%] index_add_ spread : Elapsed 0.044 ms (4.445 ms / 100) 4.272 -> 4.275 ( +0.07%) [ +0.07% +0.00% +0.09% / +0.07% +1.01% +0.77%] index_copy_ spread : Elapsed 0.043 ms (4.275 ms / 100) 4.453 -> 4.453 ( +0.00%) [ +0.13% +0.00% +0.09% / +0.00% +0.79% +0.67%] index_add_ strided 3 : Elapsed 0.045 ms (4.459 ms / 100) 4.286 -> 4.283 ( -0.07%) [ +0.05% +0.02% +0.00% / -0.07% +0.63% +0.65%] index_copy_ strided 3 : Elapsed 0.043 ms (4.288 ms / 100) 4.457 -> 4.455 ( -0.04%) [ +0.09% +0.00% +0.07% / -0.04% +0.61% +0.47%] index_add_ strided 7 : Elapsed 0.045 ms (4.461 ms / 100) 4.287 -> 4.284 ( -0.07%) [ +0.12% +0.00% +0.12% / -0.07% +0.65% +0.47%] index_copy_ strided 7 : Elapsed 0.043 ms (4.292 ms / 100) 4.438 -> 4.445 ( +0.16%) [ +0.41% +0.34% +0.00% / +0.16% +0.77% +0.79%] index_add_ perm : Elapsed 0.045 ms (4.456 ms / 100) 4.274 -> 4.277 ( +0.07%) [ +0.09% +0.12% +0.00% / +0.07% +0.66% +0.63%] index_copy_ perm : Elapsed 0.043 ms (4.278 ms / 100) 4.447 -> 4.451 ( +0.09%) [ +0.02% +0.00% +0.00% / +0.09% +0.63% +0.67%] index_add_ perm_sorted : Elapsed 0.044 ms (4.448 ms / 100) 4.279 -> 4.282 ( +0.07%) [ +0.02% +0.07% +0.00% / +0.07% +0.84% +0.77%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.280 ms / 100) 5.575 -> 5.571 ( -0.07%) [ +0.04% +0.05% +0.00% / +0.05% -0.07% -0.02%] index_select const : Elapsed 0.056 ms (5.577 ms / 100) 5.582 -> 5.579 ( -0.05%) [ +0.00% +0.07% +0.13% / +0.04% -0.05% +0.14%] index_select wrap : Elapsed 0.056 ms (5.582 ms / 100) 5.581 -> 5.584 ( +0.05%) [ +0.11% +0.00% +0.11% / +0.05% +0.16% +0.14%] index_select linear : Elapsed 0.056 ms (5.587 ms / 100) 5.586 -> 5.582 ( -0.07%) [ +0.00% +0.09% +0.05% / +0.11% -0.02% -0.07%] index_select reverse : Elapsed 0.056 ms (5.586 ms / 100) 5.572 -> 5.574 ( +0.04%) [ +0.00% +0.02% +0.00% / +0.14% +0.14% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.572 ms / 100) 5.570 -> 5.574 ( +0.07%) [ +0.05% +0.00% +0.09% / +0.13% +0.27% +0.07%] index_select skip256 : Elapsed 0.056 ms (5.573 ms / 100) 5.584 -> 5.578 ( -0.11%) [ +0.00% +0.02% +0.07% / +0.11% -0.11% +0.00%] index_select spread : Elapsed 0.056 ms (5.584 ms / 100) 5.587 -> 5.584 ( -0.05%) [ +0.04% +0.00% +0.13% / +0.11% -0.05% -0.05%] index_select strided 3 : Elapsed 0.056 ms (5.589 ms / 100) 5.587 -> 5.587 ( +0.00%) [ +0.09% +0.16% +0.00% / +0.20% +0.04% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.592 ms / 100) 5.582 -> 5.582 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.00% +0.00% +0.23%] index_select strided 7 : Elapsed 0.056 ms (5.584 ms / 100) 5.585 -> 5.583 ( -0.04%) [ +0.07% +0.13% +0.00% / +0.11% -0.04% +0.05%] index_select strided 8 : Elapsed 0.056 ms (5.589 ms / 100) 5.582 -> 5.586 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.09% +0.07% +0.14%] index_select random : Elapsed 0.056 ms (5.585 ms / 100) 5.583 -> 5.584 ( +0.02%) [ +0.11% +0.00% +0.11% / +0.04% +0.02% +0.11%] index_select random_sorted : Elapsed 0.056 ms (5.589 ms / 100) B = [5, 40, 4, 20] (stride (1, 5, 200, 800)) A = [5, 40, 4, 16] (stride (2560, 4, 1, 160)) dim = 3 3.633 -> 3.635 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.83% +0.88%] index_add_ linear : Elapsed 0.036 ms (3.634 ms / 100) 3.506 -> 3.507 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.71% +0.71%] index_copy_ linear : Elapsed 0.035 ms (3.506 ms / 100) 3.628 -> 3.631 ( +0.08%) [ +0.11% +0.11% +0.00% / +0.08% +0.74% +0.85%] index_add_ reverse : Elapsed 0.036 ms (3.632 ms / 100) 3.497 -> 3.499 ( +0.06%) [ +0.09% +0.09% +0.00% / +0.06% +1.03% +0.97%] index_copy_ reverse : Elapsed 0.035 ms (3.500 ms / 100) 3.628 -> 3.628 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.77% +0.77%] index_add_ spread : Elapsed 0.036 ms (3.629 ms / 100) 3.497 -> 3.501 ( +0.11%) [ +0.00% +0.09% +0.06% / +0.11% +1.00% +0.97%] index_copy_ spread : Elapsed 0.035 ms (3.497 ms / 100) 3.620 -> 3.624 ( +0.11%) [ +0.08% +0.11% +0.00% / +0.11% +0.83% +0.83%] index_add_ strided 3 : Elapsed 0.036 ms (3.623 ms / 100) 3.494 -> 3.493 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.77% +0.86%] index_copy_ strided 3 : Elapsed 0.035 ms (3.494 ms / 100) 3.628 -> 3.632 ( +0.11%) [ +0.11% +0.06% +0.00% / +0.11% +0.88% +0.74%] index_add_ strided 7 : Elapsed 0.036 ms (3.632 ms / 100) 3.499 -> 3.500 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +1.00% +0.91%] index_copy_ strided 7 : Elapsed 0.035 ms (3.501 ms / 100) 3.632 -> 3.633 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.91% +0.91%] index_add_ perm : Elapsed 0.036 ms (3.633 ms / 100) 3.506 -> 3.505 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.74% +0.71%] index_copy_ perm : Elapsed 0.035 ms (3.507 ms / 100) 3.623 -> 3.624 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.77% +0.80%] index_add_ perm_sorted : Elapsed 0.036 ms (3.624 ms / 100) 3.490 -> 3.491 ( +0.03%) [ +0.09% +0.11% +0.00% / +0.03% +1.00% +1.17%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.493 ms / 100) 5.483 -> 5.480 ( -0.05%) [ +0.00% +0.09% +0.04% / +0.11% +0.05% -0.05%] index_select const : Elapsed 0.055 ms (5.483 ms / 100) 5.492 -> 5.491 ( -0.02%) [ +0.16% +0.16% +0.00% / +0.11% +0.15% -0.02%] index_select wrap : Elapsed 0.055 ms (5.501 ms / 100) 5.488 -> 5.497 ( +0.16%) [ +0.11% +0.00% +0.20% / +0.16% +0.18% +0.16%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.493 -> 5.491 ( -0.04%) [ +0.00% +0.04% +0.02% / -0.04% +0.24% +0.05%] index_select reverse : Elapsed 0.055 ms (5.493 ms / 100) 5.484 -> 5.483 ( -0.02%) [ +0.11% +0.00% +0.02% / -0.02% +0.07% +0.05%] index_select skip64 : Elapsed 0.055 ms (5.490 ms / 100) 5.483 -> 5.488 ( +0.09%) [ +0.00% +0.07% +0.04% / +0.13% +0.11% +0.09%] index_select skip256 : Elapsed 0.055 ms (5.483 ms / 100) 5.494 -> 5.492 ( -0.04%) [ +0.09% +0.04% +0.00% / -0.04% +0.05% +0.11%] index_select spread : Elapsed 0.055 ms (5.499 ms / 100) 5.491 -> 5.486 ( -0.09%) [ +0.18% +0.15% +0.00% / -0.09% +0.16% +0.04%] index_select strided 3 : Elapsed 0.055 ms (5.501 ms / 100) 5.491 -> 5.494 ( +0.05%) [ +0.13% +0.00% +0.02% / +0.05% +0.11% +0.13%] index_select strided 5 : Elapsed 0.055 ms (5.498 ms / 100) 5.493 -> 5.494 ( +0.02%) [ +0.15% +0.02% +0.00% / +0.02% +0.15% +0.09%] index_select strided 7 : Elapsed 0.055 ms (5.501 ms / 100) 5.481 -> 5.488 ( +0.13%) [ +0.00% +0.09% +0.05% / +0.15% +0.29% +0.13%] index_select strided 8 : Elapsed 0.055 ms (5.481 ms / 100) 5.485 -> 5.491 ( +0.11%) [ +0.11% +0.00% +0.07% / +0.15% +0.24% +0.11%] index_select random : Elapsed 0.055 ms (5.491 ms / 100) 5.486 -> 5.492 ( +0.11%) [ +0.16% +0.00% +0.13% / +0.11% +0.18% +0.18%] index_select random_sorted : Elapsed 0.055 ms (5.495 ms / 100) B = [5, 40, 4, 20] (stride (1, 5, 200, 800)) A = [5, 40, 4, 16] (stride (1, 5, 3200, 200)) dim = 3 4.060 -> 4.058 ( -0.05%) [ +0.10% +0.00% +0.02% / -0.05% +0.84% +0.76%] index_add_ linear : Elapsed 0.041 ms (4.064 ms / 100) 3.925 -> 3.924 ( -0.03%) [ +0.03% +0.00% +0.05% / -0.03% +0.79% +0.87%] index_copy_ linear : Elapsed 0.039 ms (3.926 ms / 100) 4.067 -> 4.069 ( +0.05%) [ +0.17% +0.00% +0.05% / +0.05% +0.79% +0.79%] index_add_ reverse : Elapsed 0.041 ms (4.074 ms / 100) 3.923 -> 3.922 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.79% +0.82%] index_copy_ reverse : Elapsed 0.039 ms (3.925 ms / 100) 4.072 -> 4.081 ( +0.22%) [ +0.20% +0.00% +0.17% / +0.22% +0.74% +0.81%] index_add_ spread : Elapsed 0.041 ms (4.080 ms / 100) 3.929 -> 3.931 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.84% +0.79%] index_copy_ spread : Elapsed 0.039 ms (3.931 ms / 100) 4.057 -> 4.060 ( +0.07%) [ +0.10% +0.10% +0.00% / +0.07% +0.81% +0.81%] index_add_ strided 3 : Elapsed 0.041 ms (4.061 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.84% +0.77%] index_copy_ strided 3 : Elapsed 0.039 ms (3.924 ms / 100) 4.059 -> 4.064 ( +0.12%) [ +0.05% +0.10% +0.00% / +0.12% +0.76% +0.71%] index_add_ strided 7 : Elapsed 0.041 ms (4.061 ms / 100) 3.922 -> 3.930 ( +0.20%) [ +0.05% +0.03% +0.00% / +0.20% +0.76% +0.71%] index_copy_ strided 7 : Elapsed 0.039 ms (3.924 ms / 100) 4.074 -> 4.077 ( +0.07%) [ +0.00% +0.05% +0.17% / +0.07% +0.71% +0.74%] index_add_ perm : Elapsed 0.041 ms (4.074 ms / 100) 3.931 -> 3.931 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.66% +0.61%] index_copy_ perm : Elapsed 0.039 ms (3.931 ms / 100) 4.070 -> 4.072 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.66% +0.59%] index_add_ perm_sorted : Elapsed 0.041 ms (4.072 ms / 100) 3.924 -> 3.927 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.61% +0.71%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.924 ms / 100) 5.565 -> 5.553 ( -0.22%) [ +0.00% +0.02% +0.16% / -0.22% -0.11% -0.02%] index_select const : Elapsed 0.056 ms (5.565 ms / 100) 5.574 -> 5.569 ( -0.09%) [ +0.00% +0.11% +0.00% / +0.07% -0.09% -0.04%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.572 -> 5.574 ( +0.04%) [ +0.00% +0.05% +0.02% / +0.04% +0.07% +0.05%] index_select linear : Elapsed 0.056 ms (5.572 ms / 100) 5.572 -> 5.572 ( +0.00%) [ +0.20% +0.00% +0.04% / +0.04% +0.00% +0.04%] index_select reverse : Elapsed 0.056 ms (5.583 ms / 100) 5.558 -> 5.563 ( +0.09%) [ +0.07% +0.13% +0.00% / +0.11% +0.20% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.561 -> 5.557 ( -0.07%) [ +0.00% +0.11% +0.04% / -0.07% +0.07% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.571 -> 5.568 ( -0.05%) [ +0.09% +0.25% +0.00% / -0.05% -0.02% -0.04%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.572 -> 5.567 ( -0.09%) [ +0.05% +0.00% +0.13% / +0.13% -0.09% +0.02%] index_select strided 3 : Elapsed 0.056 ms (5.575 ms / 100) 5.574 -> 5.564 ( -0.18%) [ +0.07% +0.13% +0.00% / +0.09% -0.18% -0.07%] index_select strided 5 : Elapsed 0.056 ms (5.578 ms / 100) 5.574 -> 5.565 ( -0.16%) [ +0.00% +0.11% +0.05% / +0.05% -0.16% +0.02%] index_select strided 7 : Elapsed 0.056 ms (5.574 ms / 100) 5.560 -> 5.557 ( -0.05%) [ +0.00% +0.18% +0.02% / -0.05% +0.02% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.560 ms / 100) 5.563 -> 5.570 ( +0.13%) [ +0.14% +0.09% +0.00% / +0.13% +0.18% +0.18%] index_select random : Elapsed 0.056 ms (5.571 ms / 100) 5.570 -> 5.566 ( -0.07%) [ +0.09% +0.05% +0.00% / -0.07% +0.09% +0.07%] index_select random_sorted : Elapsed 0.056 ms (5.575 ms / 100) out_shape = [20, 40, 16, 4] in_shape = [5, 40, 16, 4] idx_dim = 0 B = [20, 40, 16, 4] (stride (2560, 1, 160, 40)) A = [5, 40, 16, 4] (stride (1, 5, 800, 200)) dim = 0 0.700 -> 0.702 ( +0.29%) [ +0.14% +0.57% +0.00% / +0.29% +1.57% +1.43%] index_add_ linear : Elapsed 0.007 ms (0.701 ms / 100) 0.713 -> 0.713 ( +0.00%) [ +0.00% +0.28% +0.00% / +0.00% +1.26% +0.84%] index_copy_ linear : Elapsed 0.007 ms (0.713 ms / 100) 0.711 -> 0.704 ( -0.98%) [ +0.00% +0.00% +0.00% / +0.00% -0.98% -0.84%] index_add_ reverse : Elapsed 0.007 ms (0.711 ms / 100) 0.725 -> 0.720 ( -0.69%) [ +0.14% +0.00% +0.00% / +0.00% -0.55% -0.69%] index_copy_ reverse : Elapsed 0.007 ms (0.726 ms / 100) 0.709 -> 0.702 ( -0.99%) [ +0.00% +0.00% +0.00% / +0.00% -0.99% -0.99%] index_add_ spread : Elapsed 0.007 ms (0.709 ms / 100) 0.719 -> 0.713 ( -0.83%) [ +0.00% +0.14% +0.00% / +0.14% -0.83% -0.83%] index_copy_ spread : Elapsed 0.007 ms (0.719 ms / 100) 0.702 -> 0.703 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.28% +0.28%] index_add_ strided 3 : Elapsed 0.007 ms (0.703 ms / 100) 0.710 -> 0.711 ( +0.14%) [ +0.28% +0.14% +0.00% / +0.14% +0.42% +0.28%] index_copy_ strided 3 : Elapsed 0.007 ms (0.712 ms / 100) 0.701 -> 0.703 ( +0.29%) [ +0.00% +0.29% +0.00% / +0.29% +0.71% +0.57%] index_add_ strided 7 : Elapsed 0.007 ms (0.701 ms / 100) 0.711 -> 0.711 ( +0.00%) [ +0.14% +0.28% +0.00% / +0.00% +0.42% +0.42%] index_copy_ strided 7 : Elapsed 0.007 ms (0.712 ms / 100) 0.700 -> 0.700 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.43% +1.43%] index_add_ perm : Elapsed 0.007 ms (0.700 ms / 100) 0.709 -> 0.709 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.27% +0.99%] index_copy_ perm : Elapsed 0.007 ms (0.709 ms / 100) 0.701 -> 0.701 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.43% +1.43%] index_add_ perm_sorted : Elapsed 0.007 ms (0.701 ms / 100) 0.711 -> 0.712 ( +0.14%) [ +0.28% +0.14% +0.00% / +0.14% +1.27% +0.98%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.713 ms / 100) 4.966 -> 4.960 ( -0.12%) [ +0.00% +0.20% +0.12% / -0.04% -0.12% -0.12%] index_select const : Elapsed 0.050 ms (4.966 ms / 100) 4.958 -> 4.943 ( -0.30%) [ +0.20% +0.18% +0.00% / +0.10% -0.22% -0.30%] index_select wrap : Elapsed 0.050 ms (4.968 ms / 100) 4.966 -> 4.946 ( -0.40%) [ +0.14% +0.00% +0.16% / +0.14% +0.10% -0.40%] index_select linear : Elapsed 0.050 ms (4.973 ms / 100) 4.965 -> 4.946 ( -0.38%) [ +0.18% +0.00% +0.20% / -0.06% -0.38% -0.20%] index_select reverse : Elapsed 0.050 ms (4.974 ms / 100) 4.953 -> 4.957 ( +0.08%) [ +0.00% +0.18% +0.26% / +0.24% +0.08% +0.12%] index_select skip64 : Elapsed 0.050 ms (4.953 ms / 100) 4.953 -> 4.944 ( -0.18%) [ +0.06% +0.08% +0.00% / +0.00% -0.18% -0.16%] index_select skip256 : Elapsed 0.050 ms (4.956 ms / 100) 4.963 -> 4.947 ( -0.32%) [ +0.00% +0.08% +0.02% / +0.02% -0.04% -0.32%] index_select spread : Elapsed 0.050 ms (4.963 ms / 100) 4.950 -> 4.951 ( +0.02%) [ +0.22% +0.22% +0.00% / +0.32% +0.02% +0.14%] index_select strided 3 : Elapsed 0.050 ms (4.961 ms / 100) 4.957 -> 4.953 ( -0.08%) [ +0.08% +0.00% +0.26% / -0.08% -0.06% -0.08%] index_select random : Elapsed 0.050 ms (4.961 ms / 100) 4.965 -> 4.946 ( -0.38%) [ +0.08% +0.10% +0.00% / -0.16% -0.28% -0.38%] index_select random_sorted : Elapsed 0.050 ms (4.969 ms / 100) B = [20, 40, 16, 4] (stride (2560, 1, 40, 640)) A = [5, 40, 16, 4] (stride (4, 320, 20, 1)) dim = 0 1.715 -> 1.718 ( +0.17%) [ +0.41% +0.17% +0.00% / +0.17% +1.11% +1.22%] index_add_ linear : Elapsed 0.017 ms (1.722 ms / 100) 1.671 -> 1.669 ( -0.12%) [ +0.00% +0.30% +0.00% / -0.12% +0.60% +0.78%] index_copy_ linear : Elapsed 0.017 ms (1.671 ms / 100) 1.714 -> 1.716 ( +0.12%) [ +0.23% +0.18% +0.00% / +0.12% +0.35% +0.12%] index_add_ reverse : Elapsed 0.017 ms (1.718 ms / 100) 1.670 -> 1.673 ( +0.18%) [ +0.00% +0.18% +0.06% / +0.18% +0.42% +0.30%] index_copy_ reverse : Elapsed 0.017 ms (1.670 ms / 100) 1.702 -> 1.700 ( -0.12%) [ +0.00% +0.06% +0.06% / -0.12% +1.82% +2.00%] index_add_ spread : Elapsed 0.017 ms (1.702 ms / 100) 1.656 -> 1.657 ( +0.06%) [ +0.24% +0.00% +0.00% / +0.06% +1.63% +1.81%] index_copy_ spread : Elapsed 0.017 ms (1.660 ms / 100) 1.714 -> 1.714 ( +0.00%) [ +0.18% +0.06% +0.00% / +0.00% +1.52% +1.23%] index_add_ strided 3 : Elapsed 0.017 ms (1.717 ms / 100) 1.669 -> 1.665 ( -0.24%) [ +0.06% +0.24% +0.00% / -0.24% +1.20% +1.02%] index_copy_ strided 3 : Elapsed 0.017 ms (1.670 ms / 100) 1.705 -> 1.706 ( +0.06%) [ +0.00% +0.23% +0.23% / +0.06% +1.70% +1.47%] index_add_ strided 7 : Elapsed 0.017 ms (1.705 ms / 100) 1.665 -> 1.666 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.90% +0.90%] index_copy_ strided 7 : Elapsed 0.017 ms (1.666 ms / 100) 1.701 -> 1.703 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.71% +0.65%] index_add_ perm : Elapsed 0.017 ms (1.703 ms / 100) 1.656 -> 1.659 ( +0.18%) [ +0.36% +0.00% +0.06% / +0.18% +0.79% +0.79%] index_copy_ perm : Elapsed 0.017 ms (1.662 ms / 100) 1.702 -> 1.702 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.94% +0.41%] index_add_ perm_sorted : Elapsed 0.017 ms (1.703 ms / 100) 1.660 -> 1.661 ( +0.06%) [ +0.00% +0.00% +0.18% / +0.06% +0.66% +0.42%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.660 ms / 100) 8.237 -> 8.232 ( -0.06%) [ +0.00% +0.13% +0.10% / -0.06% +0.32% +0.27%] index_select const : Elapsed 0.082 ms (8.237 ms / 100) 8.256 -> 8.265 ( +0.11%) [ +0.16% +0.06% +0.00% / +0.11% +0.31% +0.23%] index_select wrap : Elapsed 0.083 ms (8.269 ms / 100) 8.254 -> 8.254 ( +0.00%) [ +0.00% +0.00% +0.18% / +0.00% +0.15% +0.48%] index_select linear : Elapsed 0.083 ms (8.254 ms / 100) 8.253 -> 8.264 ( +0.13%) [ +0.29% +0.00% +0.27% / +0.27% +0.13% +0.41%] index_select reverse : Elapsed 0.083 ms (8.277 ms / 100) 8.238 -> 8.233 ( -0.06%) [ +0.08% +0.00% +0.16% / -0.06% +0.21% +0.15%] index_select skip64 : Elapsed 0.082 ms (8.245 ms / 100) 8.224 -> 8.240 ( +0.19%) [ +0.40% +0.00% +0.40% / +0.19% +0.55% +0.32%] index_select skip256 : Elapsed 0.083 ms (8.257 ms / 100) 8.247 -> 8.260 ( +0.16%) [ +0.11% +0.00% +0.24% / +0.40% +0.16% +0.23%] index_select spread : Elapsed 0.083 ms (8.256 ms / 100) 8.248 -> 8.250 ( +0.02%) [ +0.05% +0.00% +0.15% / +0.02% +0.21% +0.33%] index_select strided 3 : Elapsed 0.083 ms (8.252 ms / 100) 8.243 -> 8.254 ( +0.13%) [ +0.27% +0.35% +0.00% / +0.13% +0.25% +0.42%] index_select random : Elapsed 0.083 ms (8.265 ms / 100) 8.241 -> 8.266 ( +0.30%) [ +0.08% +0.30% +0.00% / +0.30% +0.34% +0.44%] index_select random_sorted : Elapsed 0.082 ms (8.248 ms / 100) B = [20, 40, 16, 4] (stride (2560, 1, 40, 640)) A = [5, 40, 16, 4] (stride (4, 20, 800, 1)) dim = 0 1.847 -> 1.851 ( +0.22%) [ +0.00% +0.16% +0.16% / +0.22% +0.87% +0.92%] index_add_ linear : Elapsed 0.018 ms (1.847 ms / 100) 1.802 -> 1.806 ( +0.22%) [ +0.00% +0.06% +0.28% / +0.22% +1.11% +1.17%] index_copy_ linear : Elapsed 0.018 ms (1.802 ms / 100) 1.848 -> 1.852 ( +0.22%) [ +0.00% +0.16% +0.11% / +0.22% +0.81% +0.81%] index_add_ reverse : Elapsed 0.018 ms (1.848 ms / 100) 1.806 -> 1.810 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.89% +0.89%] index_copy_ reverse : Elapsed 0.018 ms (1.806 ms / 100) 1.844 -> 1.846 ( +0.11%) [ +0.27% +0.11% +0.00% / +0.11% +0.98% +1.08%] index_add_ spread : Elapsed 0.018 ms (1.849 ms / 100) 1.805 -> 1.802 ( -0.17%) [ +0.06% +0.06% +0.00% / -0.17% +1.05% +1.16%] index_copy_ spread : Elapsed 0.018 ms (1.806 ms / 100) 1.843 -> 1.839 ( -0.22%) [ +0.33% +0.00% +0.00% / -0.22% +1.36% +1.57%] index_add_ strided 3 : Elapsed 0.018 ms (1.849 ms / 100) 1.801 -> 1.801 ( +0.00%) [ +0.00% +0.06% +0.11% / +0.00% +1.61% +1.50%] index_copy_ strided 3 : Elapsed 0.018 ms (1.801 ms / 100) 1.840 -> 1.844 ( +0.22%) [ +0.00% +0.16% +0.11% / +0.22% +0.82% +0.98%] index_add_ strided 7 : Elapsed 0.018 ms (1.840 ms / 100) 1.797 -> 1.801 ( +0.22%) [ +0.00% +0.22% +0.17% / +0.22% +0.83% +1.11%] index_copy_ strided 7 : Elapsed 0.018 ms (1.797 ms / 100) 1.817 -> 1.840 ( +1.27%) [ +0.06% +0.00% +1.10% / +1.27% +2.15% +2.09%] index_add_ perm : Elapsed 0.018 ms (1.818 ms / 100) 1.772 -> 1.793 ( +1.19%) [ +0.00% +0.11% +1.24% / +1.19% +2.54% +2.48%] index_copy_ perm : Elapsed 0.018 ms (1.772 ms / 100) 1.841 -> 1.846 ( +0.27%) [ +0.00% +0.00% +0.22% / +0.27% +0.81% +0.76%] index_add_ perm_sorted : Elapsed 0.018 ms (1.841 ms / 100) 1.800 -> 1.806 ( +0.33%) [ +0.00% +0.00% +0.06% / +0.33% +0.72% +0.72%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.800 ms / 100) 8.570 -> 8.573 ( +0.04%) [ +0.00% +0.08% +0.11% / +0.04% +0.05% +0.18%] index_select const : Elapsed 0.086 ms (8.570 ms / 100) 8.577 -> 8.572 ( -0.06%) [ +0.19% +0.00% +0.31% / -0.06% +0.47% +0.14%] index_select wrap : Elapsed 0.086 ms (8.593 ms / 100) 8.580 -> 8.584 ( +0.05%) [ +0.17% +0.01% +0.00% / +0.05% +0.17% +0.29%] index_select linear : Elapsed 0.086 ms (8.595 ms / 100) 8.604 -> 8.579 ( -0.29%) [ +0.00% +0.15% +0.05% / -0.29% +0.10% +0.13%] index_select reverse : Elapsed 0.086 ms (8.604 ms / 100) 8.570 -> 8.572 ( +0.02%) [ +0.06% +0.00% +0.11% / +0.27% +0.04% +0.02%] index_select skip64 : Elapsed 0.086 ms (8.575 ms / 100) 8.567 -> 8.571 ( +0.05%) [ +0.13% +0.00% +0.08% / +0.13% +0.16% +0.05%] index_select skip256 : Elapsed 0.086 ms (8.578 ms / 100) 8.601 -> 8.602 ( +0.01%) [ +0.09% +0.00% +0.15% / +0.01% +0.17% +0.23%] index_select spread : Elapsed 0.086 ms (8.609 ms / 100) 8.578 -> 8.593 ( +0.17%) [ +0.00% +0.29% +0.16% / +0.17% +0.28% +0.48%] index_select strided 3 : Elapsed 0.086 ms (8.578 ms / 100) 8.585 -> 8.582 ( -0.03%) [ +0.00% +0.09% +0.14% / -0.03% +0.56% +0.24%] index_select random : Elapsed 0.086 ms (8.585 ms / 100) 8.594 -> 8.597 ( +0.03%) [ +0.00% +0.05% +0.33% / +0.03% +0.19% +0.31%] index_select random_sorted : Elapsed 0.086 ms (8.594 ms / 100) B = [20, 40, 16, 4] (stride (1, 1280, 20, 320)) A = [5, 40, 16, 4] (stride (1, 5, 800, 200)) dim = 0 1.908 -> 1.908 ( +0.00%) [ +0.21% +0.10% +0.00% / +0.21% +0.00% +0.05%] index_add_ linear : Elapsed 0.019 ms (1.912 ms / 100) 1.866 -> 1.867 ( +0.05%) [ +0.16% +0.05% +0.00% / +0.27% +0.05% +0.11%] index_copy_ linear : Elapsed 0.019 ms (1.869 ms / 100) 1.911 -> 1.908 ( -0.16%) [ +0.00% +0.26% +0.16% / -0.16% -0.16% +0.05%] index_add_ reverse : Elapsed 0.019 ms (1.911 ms / 100) 1.866 -> 1.862 ( -0.21%) [ +0.00% +0.05% +0.11% / +0.16% -0.16% -0.21%] index_copy_ reverse : Elapsed 0.019 ms (1.866 ms / 100) 1.928 -> 1.926 ( -0.10%) [ +0.26% +0.05% +0.00% / +0.05% +0.21% -0.10%] index_add_ spread : Elapsed 0.019 ms (1.933 ms / 100) 1.895 -> 1.892 ( -0.16%) [ +0.00% +0.00% +0.11% / +0.16% +0.00% -0.16%] index_copy_ spread : Elapsed 0.019 ms (1.895 ms / 100) 1.925 -> 1.923 ( -0.10%) [ +0.00% +0.47% +0.21% / +0.26% -0.10% +0.00%] index_add_ strided 3 : Elapsed 0.019 ms (1.925 ms / 100) 1.896 -> 1.890 ( -0.32%) [ +0.11% +0.05% +0.00% / -0.05% -0.32% -0.26%] index_copy_ strided 3 : Elapsed 0.019 ms (1.898 ms / 100) 1.921 -> 1.922 ( +0.05%) [ +0.31% +0.47% +0.00% / +0.26% +0.31% +0.05%] index_add_ strided 7 : Elapsed 0.019 ms (1.927 ms / 100) 1.893 -> 1.892 ( -0.05%) [ +0.16% +0.42% +0.00% / +0.00% -0.05% +0.00%] index_copy_ strided 7 : Elapsed 0.019 ms (1.896 ms / 100) 1.926 -> 1.923 ( -0.16%) [ +0.05% +0.00% +0.05% / +0.05% -0.10% -0.16%] index_add_ perm : Elapsed 0.019 ms (1.927 ms / 100) 1.890 -> 1.892 ( +0.11%) [ +0.48% +0.11% +0.00% / +0.48% +0.21% +0.11%] index_copy_ perm : Elapsed 0.019 ms (1.899 ms / 100) 1.928 -> 1.927 ( -0.05%) [ +0.00% +0.05% +0.21% / +0.10% -0.05% +0.05%] index_add_ perm_sorted : Elapsed 0.019 ms (1.928 ms / 100) 1.895 -> 1.893 ( -0.11%) [ +0.00% +0.05% +0.05% / +0.37% -0.05% -0.11%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.895 ms / 100) 8.314 -> 8.333 ( +0.23%) [ +0.00% +0.18% +0.10% / +0.31% +0.23% +0.45%] index_select const : Elapsed 0.083 ms (8.314 ms / 100) 8.323 -> 8.320 ( -0.04%) [ +0.01% +0.10% +0.00% / -0.04% +0.30% +0.23%] index_select wrap : Elapsed 0.083 ms (8.324 ms / 100) 8.316 -> 8.306 ( -0.12%) [ +0.04% +0.02% +0.00% / -0.12% +0.42% +0.43%] index_select linear : Elapsed 0.083 ms (8.319 ms / 100) 8.320 -> 8.332 ( +0.14%) [ +0.00% +0.01% +0.23% / +0.14% +0.48% +0.32%] index_select reverse : Elapsed 0.083 ms (8.320 ms / 100) 8.325 -> 8.336 ( +0.13%) [ +0.01% +0.12% +0.00% / +0.20% +0.41% +0.13%] index_select skip64 : Elapsed 0.083 ms (8.326 ms / 100) 8.314 -> 8.336 ( +0.26%) [ +0.00% +0.17% +0.02% / +0.26% +0.53% +0.36%] index_select skip256 : Elapsed 0.083 ms (8.314 ms / 100) 8.310 -> 8.325 ( +0.18%) [ +0.12% +0.00% +0.20% / +0.18% +0.58% +0.37%] index_select spread : Elapsed 0.083 ms (8.320 ms / 100) 8.317 -> 8.321 ( +0.05%) [ +0.12% +0.00% +0.01% / +0.05% +0.26% +0.53%] index_select strided 3 : Elapsed 0.083 ms (8.327 ms / 100) 8.318 -> 8.305 ( -0.16%) [ +0.00% +0.08% +0.02% / -0.16% +0.22% +0.26%] index_select random : Elapsed 0.083 ms (8.318 ms / 100) 8.321 -> 8.319 ( -0.02%) [ +0.12% +0.18% +0.00% / -0.02% +0.40% +0.56%] index_select random_sorted : Elapsed 0.083 ms (8.331 ms / 100) B = [20, 40, 16, 4] (stride (160, 1, 3200, 40)) A = [5, 40, 16, 4] (stride (2560, 16, 1, 640)) dim = 0 1.709 -> 1.718 ( +0.53%) [ +0.41% +0.00% +0.59% / +0.53% +4.33% +4.21%] index_add_ linear : Elapsed 0.017 ms (1.716 ms / 100) 1.669 -> 1.676 ( +0.42%) [ +0.00% +0.06% +0.60% / +0.42% +4.25% +4.19%] index_copy_ linear : Elapsed 0.017 ms (1.669 ms / 100) 1.711 -> 1.717 ( +0.35%) [ +0.00% +0.18% +0.58% / +0.35% +4.21% +4.73%] index_add_ reverse : Elapsed 0.017 ms (1.711 ms / 100) 1.664 -> 1.682 ( +1.08%) [ +0.18% +0.00% +0.90% / +1.08% +4.69% +4.69%] index_copy_ reverse : Elapsed 0.017 ms (1.667 ms / 100) 1.746 -> 1.746 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.00% +1.20% +1.32%] index_add_ spread : Elapsed 0.017 ms (1.746 ms / 100) 1.706 -> 1.708 ( +0.12%) [ +0.00% +0.12% +0.23% / +0.12% +1.11% +1.06%] index_copy_ spread : Elapsed 0.017 ms (1.706 ms / 100) 1.736 -> 1.738 ( +0.12%) [ +0.35% +0.00% +0.12% / +0.12% +1.50% +1.44%] index_add_ strided 3 : Elapsed 0.017 ms (1.742 ms / 100) 1.693 -> 1.699 ( +0.35%) [ +0.00% +0.53% +0.35% / +0.35% +1.77% +1.59%] index_copy_ strided 3 : Elapsed 0.017 ms (1.693 ms / 100) 1.742 -> 1.744 ( +0.11%) [ +0.00% +0.17% +0.17% / +0.11% +1.26% +1.03%] index_add_ strided 7 : Elapsed 0.017 ms (1.742 ms / 100) 1.702 -> 1.706 ( +0.24%) [ +0.00% +0.24% +0.06% / +0.24% +1.00% +1.00%] index_copy_ strided 7 : Elapsed 0.017 ms (1.702 ms / 100) 1.730 -> 1.741 ( +0.64%) [ +0.00% +0.23% +0.35% / +0.64% +2.14% +1.97%] index_add_ perm : Elapsed 0.017 ms (1.730 ms / 100) 1.688 -> 1.702 ( +0.83%) [ +0.24% +0.00% +0.59% / +0.83% +2.13% +1.72%] index_copy_ perm : Elapsed 0.017 ms (1.692 ms / 100) 1.733 -> 1.741 ( +0.46%) [ +0.00% +0.00% +0.17% / +0.46% +1.73% +1.79%] index_add_ perm_sorted : Elapsed 0.017 ms (1.733 ms / 100) 1.690 -> 1.696 ( +0.36%) [ +0.00% +0.00% +0.41% / +0.36% +1.89% +1.66%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.690 ms / 100) 8.209 -> 8.228 ( +0.23%) [ +0.28% +0.00% +0.35% / +0.24% +0.43% +0.23%] index_select const : Elapsed 0.082 ms (8.232 ms / 100) 8.282 -> 8.286 ( +0.05%) [ +0.07% +0.17% +0.00% / +0.05% +0.39% +0.25%] index_select wrap : Elapsed 0.083 ms (8.288 ms / 100) 8.271 -> 8.268 ( -0.04%) [ +0.35% +0.00% +0.11% / +0.07% +0.21% -0.04%] index_select linear : Elapsed 0.083 ms (8.300 ms / 100) 8.267 -> 8.277 ( +0.12%) [ +0.17% +0.00% +0.07% / +0.12% +0.51% +0.58%] index_select reverse : Elapsed 0.083 ms (8.281 ms / 100) 8.215 -> 8.214 ( -0.01%) [ +0.07% +0.04% +0.00% / -0.01% +0.27% +0.18%] index_select skip64 : Elapsed 0.082 ms (8.221 ms / 100) 8.211 -> 8.211 ( +0.00%) [ +0.00% +0.13% +0.21% / +0.00% +0.46% +0.22%] index_select skip256 : Elapsed 0.082 ms (8.211 ms / 100) 8.274 -> 8.267 ( -0.08%) [ +0.01% +0.00% +0.07% / -0.08% +0.06% +0.16%] index_select spread : Elapsed 0.083 ms (8.275 ms / 100) 8.290 -> 8.290 ( +0.00%) [ +0.25% +0.00% +0.17% / +0.00% +0.06% +0.16%] index_select strided 3 : Elapsed 0.083 ms (8.311 ms / 100) 8.289 -> 8.289 ( +0.00%) [ +0.10% +0.37% +0.00% / +0.00% +0.30% +0.40%] index_select random : Elapsed 0.083 ms (8.297 ms / 100) 8.258 -> 8.274 ( +0.19%) [ +0.00% +0.11% +0.27% / +0.19% +0.35% +0.22%] index_select random_sorted : Elapsed 0.083 ms (8.258 ms / 100) B = [20, 40, 16, 4] (stride (1, 320, 20, 12800)) A = [5, 40, 16, 4] (stride (40, 1, 200, 3200)) dim = 0 1.895 -> 1.888 ( -0.37%) [ +0.05% +0.11% +0.00% / +0.00% -0.37% -0.37%] index_add_ linear : Elapsed 0.019 ms (1.896 ms / 100) 1.841 -> 1.842 ( +0.05%) [ +0.11% +0.05% +0.00% / +0.16% +0.05% +0.05%] index_copy_ linear : Elapsed 0.018 ms (1.843 ms / 100) 1.896 -> 1.884 ( -0.63%) [ +0.32% +0.00% +0.11% / +0.05% -0.53% -0.63%] index_add_ reverse : Elapsed 0.019 ms (1.902 ms / 100) 1.843 -> 1.839 ( -0.22%) [ +0.11% +0.27% +0.00% / -0.05% -0.11% -0.22%] index_copy_ reverse : Elapsed 0.018 ms (1.845 ms / 100) 1.917 -> 1.912 ( -0.26%) [ +0.00% +0.10% +0.00% / +0.05% -0.26% -0.16%] index_add_ spread : Elapsed 0.019 ms (1.917 ms / 100) 1.882 -> 1.877 ( -0.27%) [ +0.16% +0.11% +0.00% / +0.00% -0.11% -0.27%] index_copy_ spread : Elapsed 0.019 ms (1.885 ms / 100) 1.905 -> 1.905 ( +0.00%) [ +0.26% +0.16% +0.00% / +0.16% +0.00% +0.10%] index_add_ strided 3 : Elapsed 0.019 ms (1.910 ms / 100) 1.870 -> 1.865 ( -0.27%) [ +0.05% +0.00% +0.11% / +0.11% -0.27% -0.11%] index_copy_ strided 3 : Elapsed 0.019 ms (1.871 ms / 100) 1.913 -> 1.910 ( -0.16%) [ +0.00% +0.05% +0.10% / +0.21% -0.16% -0.16%] index_add_ strided 7 : Elapsed 0.019 ms (1.913 ms / 100) 1.874 -> 1.871 ( -0.16%) [ +0.00% +0.00% +0.05% / +0.21% -0.05% -0.16%] index_copy_ strided 7 : Elapsed 0.019 ms (1.874 ms / 100) 1.912 -> 1.914 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.16% +0.10% +0.10%] index_add_ perm : Elapsed 0.019 ms (1.912 ms / 100) 1.872 -> 1.872 ( +0.00%) [ +0.05% +0.21% +0.00% / +0.00% +0.16% +0.05%] index_copy_ perm : Elapsed 0.019 ms (1.873 ms / 100) 1.908 -> 1.903 ( -0.26%) [ +0.05% +0.00% +0.05% / +0.10% -0.26% -0.16%] index_add_ perm_sorted : Elapsed 0.019 ms (1.909 ms / 100) 1.869 -> 1.868 ( -0.05%) [ +0.37% +0.11% +0.00% / +0.21% -0.05% +0.00%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.876 ms / 100) 8.267 -> 8.282 ( +0.18%) [ +0.00% +0.28% +0.12% / +0.18% +0.25% +0.70%] index_select const : Elapsed 0.083 ms (8.267 ms / 100) 8.296 -> 8.320 ( +0.29%) [ +0.11% +0.00% +0.08% / +0.29% +0.37% +0.37%] index_select wrap : Elapsed 0.083 ms (8.305 ms / 100) 8.294 -> 8.321 ( +0.33%) [ +0.01% +0.05% +0.00% / +0.33% +0.51% +0.53%] index_select linear : Elapsed 0.083 ms (8.295 ms / 100) 8.303 -> 8.305 ( +0.02%) [ +0.08% +0.14% +0.00% / +0.02% +0.41% +0.19%] index_select reverse : Elapsed 0.083 ms (8.310 ms / 100) 8.276 -> 8.289 ( +0.16%) [ +0.19% +0.00% +0.04% / +0.28% +0.16% +0.18%] index_select skip64 : Elapsed 0.083 ms (8.292 ms / 100) 8.272 -> 8.268 ( -0.05%) [ +0.18% +0.11% +0.00% / -0.05% +0.42% +0.29%] index_select skip256 : Elapsed 0.083 ms (8.287 ms / 100) 8.309 -> 8.304 ( -0.06%) [ +0.01% +0.00% +0.10% / -0.06% +0.29% +0.20%] index_select spread : Elapsed 0.083 ms (8.310 ms / 100) 8.296 -> 8.301 ( +0.06%) [ +0.00% +0.17% +0.13% / +0.06% +0.20% +0.52%] index_select strided 3 : Elapsed 0.083 ms (8.296 ms / 100) 8.300 -> 8.293 ( -0.08%) [ +0.11% +0.00% +0.12% / -0.08% +0.30% +0.36%] index_select random : Elapsed 0.083 ms (8.309 ms / 100) 8.307 -> 8.315 ( +0.10%) [ +0.02% +0.05% +0.00% / +0.10% +0.53% +0.53%] index_select random_sorted : Elapsed 0.083 ms (8.309 ms / 100) out_shape = [5, 20, 16, 4] in_shape = [5, 40, 16, 4] idx_dim = 1 B = [5, 20, 16, 4] (stride (64, 320, 1, 16)) A = [5, 40, 16, 4] (stride (64, 320, 1, 16)) dim = 1 2.443 -> 2.444 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.20% +0.20%] index_select const : Elapsed 0.024 ms (2.444 ms / 100) 2.456 -> 2.451 ( -0.20%) [ +0.00% +0.00% +0.16% / -0.20% -0.12% -0.04%] index_select wrap : Elapsed 0.025 ms (2.456 ms / 100) 2.456 -> 2.451 ( -0.20%) [ +0.12% +0.04% +0.00% / +0.00% -0.12% -0.20%] index_select linear : Elapsed 0.025 ms (2.459 ms / 100) 2.456 -> 2.453 ( -0.12%) [ +0.00% +0.00% +0.04% / +0.00% -0.04% -0.12%] index_select reverse : Elapsed 0.025 ms (2.456 ms / 100) 2.445 -> 2.446 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.08% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.447 ms / 100) 2.448 -> 2.444 ( -0.16%) [ +0.08% +0.00% +0.00% / -0.16% +0.00% +0.08%] index_select skip256 : Elapsed 0.024 ms (2.450 ms / 100) 2.453 -> 2.454 ( +0.04%) [ +0.00% +0.04% +0.12% / +0.12% +0.20% +0.04%] index_select spread : Elapsed 0.025 ms (2.453 ms / 100) 2.457 -> 2.455 ( -0.08%) [ +0.08% +0.00% +0.12% / -0.08% -0.04% -0.08%] index_select strided 3 : Elapsed 0.025 ms (2.459 ms / 100) 2.445 -> 2.450 ( +0.20%) [ +0.00% +0.04% +0.20% / +0.20% +0.20% +0.41%] index_select strided 5 : Elapsed 0.024 ms (2.445 ms / 100) 2.453 -> 2.450 ( -0.12%) [ +0.00% +0.08% +0.08% / -0.12% +0.04% +0.00%] index_select strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.446 -> 2.445 ( -0.04%) [ +0.12% +0.00% +0.00% / -0.04% +0.37% +0.20%] index_select strided 8 : Elapsed 0.024 ms (2.449 ms / 100) 2.445 -> 2.448 ( +0.12%) [ +0.00% +0.20% +0.08% / +0.12% +0.20% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.445 ms / 100) 2.450 -> 2.453 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.16% +0.12% +0.12%] index_select random : Elapsed 0.024 ms (2.450 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.16% +0.00% +0.12% / +0.16% +0.24% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.454 ms / 100) 2.451 -> 2.452 ( +0.04%) [ +0.04% +0.00% +0.20% / +0.04% +0.04% +0.16%] index_select perm : Elapsed 0.025 ms (2.452 ms / 100) 2.455 -> 2.449 ( -0.24%) [ +0.00% +0.08% +0.08% / +0.12% -0.24% -0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.455 ms / 100) B = [5, 20, 16, 4] (stride (4, 320, 20, 1)) A = [5, 40, 16, 4] (stride (640, 16, 1, 3200)) dim = 1 2.444 -> 2.445 ( +0.04%) [ +0.08% +0.00% +0.20% / +0.04% +0.53% +0.29%] index_select const : Elapsed 0.024 ms (2.446 ms / 100) 2.464 -> 2.457 ( -0.28%) [ +0.16% +0.08% +0.00% / +0.08% -0.28% -0.28%] index_select wrap : Elapsed 0.025 ms (2.468 ms / 100) 2.461 -> 2.456 ( -0.20%) [ +0.00% +0.28% +0.37% / +0.08% -0.16% -0.20%] index_select linear : Elapsed 0.025 ms (2.461 ms / 100) 2.464 -> 2.460 ( -0.16%) [ +0.00% +0.04% +0.00% / -0.12% -0.08% -0.16%] index_select reverse : Elapsed 0.025 ms (2.464 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.12% +0.04% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.449 ms / 100) 2.443 -> 2.447 ( +0.16%) [ +0.12% +0.25% +0.00% / +0.16% +0.45% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.446 ms / 100) 2.464 -> 2.466 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.12% +0.08%] index_select spread : Elapsed 0.025 ms (2.465 ms / 100) 2.464 -> 2.463 ( -0.04%) [ +0.08% +0.00% +0.00% / -0.04% +0.08% +0.28%] index_select strided 3 : Elapsed 0.025 ms (2.466 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.08% +0.08% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.456 ms / 100) 2.461 -> 2.465 ( +0.16%) [ +0.24% +0.00% +0.04% / +0.33% +0.37% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.451 -> 2.451 ( +0.00%) [ +0.00% +0.16% +0.16% / +0.00% +0.16% +0.20%] index_select strided 8 : Elapsed 0.025 ms (2.451 ms / 100) 2.449 -> 2.451 ( +0.08%) [ +0.04% +0.00% +0.16% / +0.08% +0.16% +0.45%] index_select strided 16 : Elapsed 0.024 ms (2.450 ms / 100) 2.456 -> 2.457 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.37% +0.29%] index_select random : Elapsed 0.025 ms (2.457 ms / 100) 2.460 -> 2.458 ( -0.08%) [ +0.20% +0.00% +0.04% / +0.00% -0.04% -0.08%] index_select random_sorted : Elapsed 0.025 ms (2.465 ms / 100) 2.465 -> 2.461 ( -0.16%) [ +0.32% +0.00% +0.00% / +0.04% -0.16% +0.04%] index_select perm : Elapsed 0.025 ms (2.473 ms / 100) 2.463 -> 2.464 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.32% +0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.466 ms / 100) B = [5, 20, 16, 4] (stride (1, 20, 400, 5)) A = [5, 40, 16, 4] (stride (1, 320, 20, 5)) dim = 1 2.405 -> 2.409 ( +0.17%) [ +0.00% +0.08% +0.12% / +0.17% +0.21% +0.29%] index_select const : Elapsed 0.024 ms (2.405 ms / 100) 2.414 -> 2.412 ( -0.08%) [ +0.12% +0.04% +0.00% / -0.08% +0.04% -0.04%] index_select wrap : Elapsed 0.024 ms (2.417 ms / 100) 2.414 -> 2.413 ( -0.04%) [ +0.08% +0.00% +0.17% / +0.00% +0.17% -0.04%] index_select linear : Elapsed 0.024 ms (2.416 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.12% +0.12%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.407 -> 2.409 ( +0.08%) [ +0.00% +0.17% +0.21% / +0.17% +0.21% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.407 ms / 100) 2.404 -> 2.409 ( +0.21%) [ +0.12% +0.00% +0.17% / +0.21% +0.25% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.407 ms / 100) 2.413 -> 2.416 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.25% +0.17% +0.12%] index_select spread : Elapsed 0.024 ms (2.415 ms / 100) 2.413 -> 2.410 ( -0.12%) [ +0.08% +0.00% +0.17% / +0.04% -0.12% -0.04%] index_select strided 3 : Elapsed 0.024 ms (2.415 ms / 100) 2.410 -> 2.410 ( +0.00%) [ +0.21% +0.00% +0.08% / +0.04% +0.00% +0.08%] index_select strided 5 : Elapsed 0.024 ms (2.415 ms / 100) 2.409 -> 2.409 ( +0.00%) [ +0.17% +0.00% +0.17% / +0.00% +0.21% +0.17%] index_select strided 7 : Elapsed 0.024 ms (2.413 ms / 100) 2.407 -> 2.410 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.21% +0.17% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.410 ms / 100) 2.410 -> 2.408 ( -0.08%) [ +0.00% +0.04% +0.04% / -0.08% +0.04% +0.12%] index_select strided 16 : Elapsed 0.024 ms (2.410 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.08% +0.08%] index_select random : Elapsed 0.024 ms (2.413 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.17% +0.08% +0.17%] index_select random_sorted : Elapsed 0.024 ms (2.412 ms / 100) 2.412 -> 2.411 ( -0.04%) [ +0.00% +0.04% +0.12% / +0.17% +0.04% -0.04%] index_select perm : Elapsed 0.024 ms (2.412 ms / 100) 2.412 -> 2.407 ( -0.21%) [ +0.08% +0.29% +0.00% / +0.29% -0.21% -0.08%] index_select perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) B = [5, 20, 16, 4] (stride (20, 1, 400, 100)) A = [5, 40, 16, 4] (stride (2560, 64, 4, 1)) dim = 1 1.460 -> 1.450 ( -0.68%) [ +0.00% +0.21% +0.07% / -0.68% -0.27% -0.34%] index_select const : Elapsed 0.015 ms (1.460 ms / 100) 1.473 -> 1.468 ( -0.34%) [ +0.00% +0.00% +0.00% / -0.34% +0.07% +0.41%] index_select wrap : Elapsed 0.015 ms (1.473 ms / 100) 1.472 -> 1.470 ( -0.14%) [ +0.20% +0.07% +0.00% / -0.14% +0.00% +0.41%] index_select linear : Elapsed 0.015 ms (1.475 ms / 100) 1.470 -> 1.472 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.20% +0.48% +0.14%] index_select reverse : Elapsed 0.015 ms (1.472 ms / 100) 1.458 -> 1.450 ( -0.55%) [ +0.14% +0.00% +0.00% / -0.55% +0.21% -0.21%] index_select skip64 : Elapsed 0.015 ms (1.460 ms / 100) 1.457 -> 1.453 ( -0.27%) [ +0.00% +0.21% +0.07% / -0.27% +0.14% +0.27%] index_select skip256 : Elapsed 0.015 ms (1.457 ms / 100) 1.472 -> 1.465 ( -0.48%) [ +0.00% +0.14% +0.14% / -0.48% +0.27% +0.20%] index_select spread : Elapsed 0.015 ms (1.472 ms / 100) 1.471 -> 1.471 ( +0.00%) [ +0.27% +0.00% +0.07% / +0.00% +0.20% +0.54%] index_select strided 3 : Elapsed 0.015 ms (1.475 ms / 100) 1.460 -> 1.459 ( -0.07%) [ +0.55% +0.00% +0.27% / -0.07% +0.21% +0.34%] index_select strided 5 : Elapsed 0.015 ms (1.468 ms / 100) 1.474 -> 1.468 ( -0.41%) [ +0.00% +0.00% +0.07% / -0.41% -0.14% +0.20%] index_select strided 7 : Elapsed 0.015 ms (1.474 ms / 100) 1.458 -> 1.460 ( +0.14%) [ +0.00% +0.14% +0.21% / +0.14% +0.27% +0.21%] index_select strided 8 : Elapsed 0.015 ms (1.458 ms / 100) 1.458 -> 1.457 ( -0.07%) [ +0.14% +0.00% +0.21% / -0.07% +0.34% +0.27%] index_select strided 16 : Elapsed 0.015 ms (1.460 ms / 100) 1.467 -> 1.460 ( -0.48%) [ +0.41% +0.14% +0.00% / -0.48% +0.20% +0.27%] index_select random : Elapsed 0.015 ms (1.473 ms / 100) 1.465 -> 1.462 ( -0.20%) [ +0.00% +0.27% +0.20% / -0.20% +0.48% +0.48%] index_select random_sorted : Elapsed 0.015 ms (1.465 ms / 100) 1.471 -> 1.470 ( -0.07%) [ +0.20% +0.00% +0.07% / +0.00% -0.07% +0.20%] index_select perm : Elapsed 0.015 ms (1.474 ms / 100) 1.473 -> 1.471 ( -0.14%) [ +0.00% +0.14% +0.27% / +0.00% +0.14% -0.14%] index_select perm_sorted : Elapsed 0.015 ms (1.473 ms / 100) B = [5, 20, 16, 4] (stride (320, 1, 20, 1600)) A = [5, 40, 16, 4] (stride (1, 20, 800, 5)) dim = 1 2.447 -> 2.445 ( -0.08%) [ +0.00% +0.12% +0.00% / -0.08% +0.25% +0.20%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.16% +0.12% +0.00% / +0.00% +0.08% +0.12%] index_select wrap : Elapsed 0.025 ms (2.459 ms / 100) 2.457 -> 2.458 ( +0.04%) [ +0.00% +0.12% +0.16% / +0.04% +0.16% +0.12%] index_select linear : Elapsed 0.025 ms (2.457 ms / 100) 2.458 -> 2.453 ( -0.20%) [ +0.12% +0.00% +0.00% / +0.04% -0.20% -0.08%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.447 -> 2.447 ( +0.00%) [ +0.00% +0.16% +0.12% / +0.00% +0.04% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.447 ms / 100) 2.446 -> 2.446 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +0.29% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.20% +0.08% +0.00% / +0.04% +0.08% +0.24%] index_select spread : Elapsed 0.025 ms (2.463 ms / 100) 2.457 -> 2.460 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.20% +0.33% +0.12%] index_select strided 3 : Elapsed 0.025 ms (2.459 ms / 100) 2.453 -> 2.450 ( -0.12%) [ +0.04% +0.08% +0.00% / -0.04% -0.12% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.454 ms / 100) 2.453 -> 2.453 ( +0.00%) [ +0.04% +0.12% +0.00% / +0.00% +0.20% +0.33%] index_select strided 7 : Elapsed 0.025 ms (2.454 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.25% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.449 ms / 100) 2.447 -> 2.452 ( +0.20%) [ +0.00% +0.16% +0.16% / +0.25% +0.37% +0.20%] index_select strided 16 : Elapsed 0.024 ms (2.447 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.00% +0.12% +0.08% / +0.08% +0.33% +0.29%] index_select random : Elapsed 0.025 ms (2.453 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.16% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.458 ms / 100) 2.456 -> 2.455 ( -0.04%) [ +0.04% +0.16% +0.00% / +0.04% +0.12% -0.04%] index_select perm : Elapsed 0.025 ms (2.457 ms / 100) 2.462 -> 2.452 ( -0.41%) [ +0.08% +0.08% +0.00% / +0.04% -0.28% -0.41%] index_select perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) out_shape = [5, 40, 20, 4] in_shape = [5, 40, 16, 4] idx_dim = 2 B = [5, 40, 20, 4] (stride (3200, 1, 40, 800)) A = [5, 40, 16, 4] (stride (4, 320, 20, 1)) dim = 2 4.050 -> 4.049 ( -0.02%) [ +0.00% +0.05% +0.00% / -0.02% +0.77% +0.77%] index_add_ linear : Elapsed 0.041 ms (4.050 ms / 100) 3.916 -> 3.920 ( +0.10%) [ +0.00% +0.15% +0.13% / +0.10% +0.77% +0.66%] index_copy_ linear : Elapsed 0.039 ms (3.916 ms / 100) 4.061 -> 4.063 ( +0.05%) [ +0.00% +0.07% +0.02% / +0.05% +0.64% +0.71%] index_add_ reverse : Elapsed 0.041 ms (4.061 ms / 100) 3.920 -> 3.924 ( +0.10%) [ +0.00% +0.10% +0.08% / +0.10% +0.69% +0.77%] index_copy_ reverse : Elapsed 0.039 ms (3.920 ms / 100) 4.041 -> 4.041 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.74% +0.74%] index_add_ spread : Elapsed 0.040 ms (4.042 ms / 100) 3.912 -> 3.918 ( +0.15%) [ +0.28% +0.23% +0.00% / +0.15% +0.56% +0.61%] index_copy_ spread : Elapsed 0.039 ms (3.923 ms / 100) 4.050 -> 4.055 ( +0.12%) [ +0.15% +0.07% +0.00% / +0.12% +0.77% +0.74%] index_add_ strided 3 : Elapsed 0.041 ms (4.056 ms / 100) 3.915 -> 3.919 ( +0.10%) [ +0.10% +0.15% +0.00% / +0.10% +0.79% +0.82%] index_copy_ strided 3 : Elapsed 0.039 ms (3.919 ms / 100) 4.055 -> 4.055 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.00% +0.67% +0.64%] index_add_ strided 7 : Elapsed 0.041 ms (4.056 ms / 100) 3.918 -> 3.920 ( +0.05%) [ +0.00% +0.10% +0.13% / +0.05% +0.64% +0.61%] index_copy_ strided 7 : Elapsed 0.039 ms (3.918 ms / 100) 4.041 -> 4.044 ( +0.07%) [ +0.07% +0.02% +0.00% / +0.07% +0.69% +0.64%] index_add_ perm : Elapsed 0.040 ms (4.044 ms / 100) 3.916 -> 3.915 ( -0.03%) [ +0.05% +0.10% +0.00% / -0.03% +0.46% +0.41%] index_copy_ perm : Elapsed 0.039 ms (3.918 ms / 100) 4.062 -> 4.063 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.64% +0.59%] index_add_ perm_sorted : Elapsed 0.041 ms (4.062 ms / 100) 3.920 -> 3.923 ( +0.08%) [ +0.00% +0.03% +0.05% / +0.08% +0.74% +0.66%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.920 ms / 100) 5.548 -> 5.561 ( +0.23%) [ +0.27% +0.00% +0.31% / +0.27% +0.23% +0.34%] index_select const : Elapsed 0.056 ms (5.563 ms / 100) 5.583 -> 5.582 ( -0.02%) [ +0.11% +0.00% +0.09% / +0.04% +0.02% -0.02%] index_select wrap : Elapsed 0.056 ms (5.589 ms / 100) 5.583 -> 5.584 ( +0.02%) [ +0.07% +0.00% +0.04% / +0.09% +0.02% +0.02%] index_select linear : Elapsed 0.056 ms (5.587 ms / 100) 5.583 -> 5.583 ( +0.00%) [ +0.07% +0.05% +0.00% / +0.27% +0.00% +0.11%] index_select reverse : Elapsed 0.056 ms (5.587 ms / 100) 5.561 -> 5.557 ( -0.07%) [ +0.05% +0.00% +0.04% / -0.07% +0.00% +0.11%] index_select skip64 : Elapsed 0.056 ms (5.564 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.09% +0.00% +0.04% / -0.02% +0.18% +0.04%] index_select skip256 : Elapsed 0.056 ms (5.567 ms / 100) 5.584 -> 5.574 ( -0.18%) [ +0.00% +0.04% +0.05% / -0.18% -0.14% +0.07%] index_select spread : Elapsed 0.056 ms (5.584 ms / 100) 5.581 -> 5.580 ( -0.02%) [ +0.11% +0.18% +0.00% / +0.05% +0.11% -0.02%] index_select strided 3 : Elapsed 0.056 ms (5.587 ms / 100) 5.588 -> 5.578 ( -0.18%) [ +0.00% +0.05% +0.05% / -0.05% +0.04% -0.18%] index_select strided 5 : Elapsed 0.056 ms (5.588 ms / 100) 5.587 -> 5.582 ( -0.09%) [ +0.09% +0.02% +0.00% / +0.04% -0.09% -0.05%] index_select strided 7 : Elapsed 0.056 ms (5.592 ms / 100) 5.562 -> 5.563 ( +0.02%) [ +0.16% +0.00% +0.05% / +0.02% +0.13% +0.02%] index_select strided 8 : Elapsed 0.056 ms (5.571 ms / 100) 5.573 -> 5.575 ( +0.04%) [ +0.25% +0.22% +0.00% / +0.16% +0.04% +0.14%] index_select random : Elapsed 0.056 ms (5.587 ms / 100) 5.579 -> 5.576 ( -0.05%) [ +0.00% +0.11% +0.23% / +0.05% +0.07% -0.05%] index_select random_sorted : Elapsed 0.056 ms (5.579 ms / 100) B = [5, 40, 20, 4] (stride (800, 1, 40, 4000)) A = [5, 40, 16, 4] (stride (16, 80, 1, 3200)) dim = 2 4.452 -> 4.452 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.70% +0.70%] index_add_ linear : Elapsed 0.045 ms (4.452 ms / 100) 4.273 -> 4.274 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.75% +0.82%] index_copy_ linear : Elapsed 0.043 ms (4.275 ms / 100) 4.443 -> 4.444 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.61% +0.56%] index_add_ reverse : Elapsed 0.044 ms (4.445 ms / 100) 4.266 -> 4.267 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.70% +0.66%] index_copy_ reverse : Elapsed 0.043 ms (4.267 ms / 100) 4.440 -> 4.441 ( +0.02%) [ +0.09% +0.05% +0.00% / +0.02% +0.61% +0.68%] index_add_ spread : Elapsed 0.044 ms (4.444 ms / 100) 4.264 -> 4.271 ( +0.16%) [ +0.02% +0.16% +0.00% / +0.16% +0.73% +0.66%] index_copy_ spread : Elapsed 0.043 ms (4.265 ms / 100) 4.449 -> 4.447 ( -0.04%) [ +0.04% +0.02% +0.00% / -0.04% +0.72% +0.72%] index_add_ strided 3 : Elapsed 0.045 ms (4.451 ms / 100) 4.270 -> 4.271 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.96% +0.96%] index_copy_ strided 3 : Elapsed 0.043 ms (4.271 ms / 100) 4.445 -> 4.443 ( -0.04%) [ +0.02% +0.02% +0.00% / -0.04% +0.58% +0.63%] index_add_ strided 7 : Elapsed 0.044 ms (4.446 ms / 100) 4.266 -> 4.269 ( +0.07%) [ +0.09% +0.00% +0.07% / +0.07% +0.68% +0.66%] index_copy_ strided 7 : Elapsed 0.043 ms (4.270 ms / 100) 4.449 -> 4.450 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.02% +0.72% +0.70%] index_add_ perm : Elapsed 0.045 ms (4.451 ms / 100) 4.273 -> 4.272 ( -0.02%) [ +0.00% +0.05% +0.07% / -0.02% +0.68% +0.66%] index_copy_ perm : Elapsed 0.043 ms (4.273 ms / 100) 4.448 -> 4.453 ( +0.11%) [ +0.11% +0.13% +0.00% / +0.11% +0.65% +0.56%] index_add_ perm_sorted : Elapsed 0.045 ms (4.453 ms / 100) 4.275 -> 4.277 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.77% +0.70%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.275 ms / 100) 5.565 -> 5.567 ( +0.04%) [ +0.00% +0.04% +0.14% / +0.05% +0.04% +0.05%] index_select const : Elapsed 0.056 ms (5.565 ms / 100) 5.574 -> 5.575 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.11% +0.02% +0.04%] index_select wrap : Elapsed 0.056 ms (5.576 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.11% +0.00% +0.09%] index_select linear : Elapsed 0.056 ms (5.571 ms / 100) 5.567 -> 5.573 ( +0.11%) [ +0.27% +0.09% +0.00% / +0.11% +0.23% +0.22%] index_select reverse : Elapsed 0.056 ms (5.582 ms / 100) 5.565 -> 5.571 ( +0.11%) [ +0.00% +0.00% +0.05% / +0.13% +0.11% +0.13%] index_select skip64 : Elapsed 0.056 ms (5.565 ms / 100) 5.562 -> 5.562 ( +0.00%) [ +0.11% +0.02% +0.00% / +0.00% +0.18% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.568 ms / 100) 5.571 -> 5.570 ( -0.02%) [ +0.11% +0.14% +0.00% / -0.02% +0.14% +0.22%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.575 -> 5.575 ( +0.00%) [ +0.04% +0.00% +0.05% / +0.00% +0.07% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.575 -> 5.567 ( -0.14%) [ +0.00% +0.07% +0.00% / -0.14% +0.05% +0.13%] index_select strided 5 : Elapsed 0.056 ms (5.575 ms / 100) 5.572 -> 5.567 ( -0.09%) [ +0.02% +0.16% +0.00% / -0.09% +0.22% +0.18%] index_select strided 7 : Elapsed 0.056 ms (5.573 ms / 100) 5.569 -> 5.572 ( +0.05%) [ +0.11% +0.00% +0.13% / +0.05% +0.27% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.575 ms / 100) 5.565 -> 5.573 ( +0.14%) [ +0.18% +0.16% +0.00% / +0.20% +0.14% +0.25%] index_select random : Elapsed 0.056 ms (5.575 ms / 100) 5.567 -> 5.575 ( +0.14%) [ +0.14% +0.00% +0.05% / +0.14% +0.16% +0.18%] index_select random_sorted : Elapsed 0.056 ms (5.575 ms / 100) out_shape = [5, 40, 16, 20] in_shape = [5, 40, 16, 4] idx_dim = 3 B = [5, 40, 16, 20] (stride (12800, 16, 1, 640)) A = [5, 40, 16, 4] (stride (2560, 16, 1, 640)) dim = 3 0.739 -> 0.741 ( +0.27%) [ +0.14% +0.14% +0.00% / +0.27% +0.54% +0.54%] index_add_ linear : Elapsed 0.007 ms (0.740 ms / 100) 0.726 -> 0.723 ( -0.41%) [ +0.28% +0.55% +0.00% / +0.41% +0.14% -0.41%] index_copy_ linear : Elapsed 0.007 ms (0.728 ms / 100) 0.742 -> 0.736 ( -0.81%) [ +0.54% +0.27% +0.00% / +0.27% -0.81% -0.81%] index_add_ reverse : Elapsed 0.007 ms (0.746 ms / 100) 0.731 -> 0.719 ( -1.64%) [ +0.14% +0.00% +0.00% / +0.00% -1.50% -1.64%] index_copy_ reverse : Elapsed 0.007 ms (0.732 ms / 100) 0.741 -> 0.741 ( +0.00%) [ +0.00% +0.54% +0.00% / +0.27% +0.54% +0.00%] index_add_ spread : Elapsed 0.007 ms (0.741 ms / 100) 0.730 -> 0.730 ( +0.00%) [ +0.00% +0.00% +0.27% / +0.00% +0.00% +0.14%] index_copy_ spread : Elapsed 0.007 ms (0.730 ms / 100) 0.741 -> 0.741 ( +0.00%) [ +0.00% +0.27% +0.00% / +0.00% +0.54% +0.13%] index_add_ strided 3 : Elapsed 0.007 ms (0.741 ms / 100) 0.721 -> 0.726 ( +0.69%) [ +0.42% +0.00% +0.28% / +0.69% +1.53% +0.69%] index_copy_ strided 3 : Elapsed 0.007 ms (0.724 ms / 100) 0.735 -> 0.737 ( +0.27%) [ +0.54% +0.68% +0.00% / +0.27% +0.27% +0.27%] index_add_ strided 7 : Elapsed 0.007 ms (0.739 ms / 100) 0.724 -> 0.725 ( +0.14%) [ +0.00% +0.14% +0.00% / +1.10% +0.41% +0.14%] index_copy_ strided 7 : Elapsed 0.007 ms (0.724 ms / 100) 0.746 -> 0.742 ( -0.54%) [ +0.00% +0.13% +0.27% / +0.54% -0.13% -0.54%] index_add_ perm : Elapsed 0.007 ms (0.746 ms / 100) 0.736 -> 0.728 ( -1.09%) [ +0.27% +0.00% +0.41% / +0.14% -1.09% -1.09%] index_copy_ perm : Elapsed 0.007 ms (0.738 ms / 100) 0.750 -> 0.746 ( -0.53%) [ +0.13% +0.27% +0.00% / +0.13% -0.53% -0.53%] index_add_ perm_sorted : Elapsed 0.008 ms (0.751 ms / 100) 0.736 -> 0.731 ( -0.68%) [ +0.54% +0.27% +0.00% / +0.82% -0.68% -0.68%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.740 ms / 100) 4.915 -> 4.930 ( +0.31%) [ +0.14% +0.00% +0.20% / +0.31% +0.37% +0.53%] index_select const : Elapsed 0.049 ms (4.922 ms / 100) 4.946 -> 4.948 ( +0.04%) [ +0.28% +0.04% +0.00% / +0.04% +0.44% +0.10%] index_select wrap : Elapsed 0.050 ms (4.960 ms / 100) 4.939 -> 4.941 ( +0.04%) [ +0.00% +0.20% +0.14% / +0.14% +0.14% +0.04%] index_select linear : Elapsed 0.049 ms (4.939 ms / 100) 4.948 -> 4.939 ( -0.18%) [ +0.00% +0.06% +0.24% / +0.16% +0.14% -0.18%] index_select reverse : Elapsed 0.049 ms (4.948 ms / 100) 4.923 -> 4.937 ( +0.28%) [ +0.06% +0.28% +0.00% / +0.41% +0.39% +0.28%] index_select skip64 : Elapsed 0.049 ms (4.926 ms / 100) 4.915 -> 4.920 ( +0.10%) [ +0.16% +0.00% +0.28% / +0.10% +0.39% +0.57%] index_select skip256 : Elapsed 0.049 ms (4.923 ms / 100) 4.939 -> 4.943 ( +0.08%) [ +0.24% +0.00% +0.06% / +0.16% +0.16% +0.08%] index_select spread : Elapsed 0.050 ms (4.951 ms / 100) 4.948 -> 4.939 ( -0.18%) [ +0.00% +0.02% +0.06% / -0.08% +0.18% -0.18%] index_select strided 3 : Elapsed 0.049 ms (4.948 ms / 100) 4.951 -> 4.943 ( -0.16%) [ +0.16% +0.00% +0.10% / -0.16% +0.08% -0.02%] index_select random : Elapsed 0.050 ms (4.959 ms / 100) 4.949 -> 4.944 ( -0.10%) [ +0.04% +0.00% +0.26% / -0.10% +0.26% +0.10%] index_select random_sorted : Elapsed 0.050 ms (4.951 ms / 100) B = [5, 40, 16, 20] (stride (12800, 16, 1, 640)) A = [5, 40, 16, 4] (stride (1, 5, 800, 200)) dim = 3 2.048 -> 2.045 ( -0.15%) [ +0.05% +0.20% +0.00% / -0.15% +0.34% +0.20%] index_add_ linear : Elapsed 0.020 ms (2.049 ms / 100) 1.998 -> 1.996 ( -0.10%) [ +0.00% +0.10% +0.05% / -0.10% +0.05% +0.30%] index_copy_ linear : Elapsed 0.020 ms (1.998 ms / 100) 2.041 -> 2.045 ( +0.20%) [ +0.49% +0.15% +0.00% / +0.20% +0.49% +0.39%] index_add_ reverse : Elapsed 0.021 ms (2.051 ms / 100) 1.993 -> 1.996 ( +0.15%) [ +0.40% +0.15% +0.00% / +0.20% +0.15% +0.35%] index_copy_ reverse : Elapsed 0.020 ms (2.001 ms / 100) 2.047 -> 2.049 ( +0.10%) [ +0.00% +0.15% +0.05% / +0.15% +0.39% +0.10%] index_add_ spread : Elapsed 0.020 ms (2.047 ms / 100) 1.993 -> 1.998 ( +0.25%) [ +0.00% +0.30% +0.20% / +0.25% +0.80% +0.30%] index_copy_ spread : Elapsed 0.020 ms (1.993 ms / 100) 2.043 -> 2.043 ( +0.00%) [ +0.44% +0.39% +0.00% / +0.00% +0.73% +0.73%] index_add_ strided 3 : Elapsed 0.021 ms (2.052 ms / 100) 1.995 -> 1.994 ( -0.05%) [ +0.25% +0.10% +0.00% / -0.05% +0.40% +0.45%] index_copy_ strided 3 : Elapsed 0.020 ms (2.000 ms / 100) 2.053 -> 2.053 ( +0.00%) [ +0.19% +0.24% +0.00% / +0.10% +0.00% +0.39%] index_add_ strided 7 : Elapsed 0.021 ms (2.057 ms / 100) 1.998 -> 1.997 ( -0.05%) [ +0.00% +0.15% +0.05% / +0.30% -0.05% +0.60%] index_copy_ strided 7 : Elapsed 0.020 ms (1.998 ms / 100) 2.045 -> 2.047 ( +0.10%) [ +0.34% +0.00% +0.24% / +0.10% +0.15% +0.39%] index_add_ perm : Elapsed 0.021 ms (2.052 ms / 100) 1.994 -> 1.993 ( -0.05%) [ +0.20% +0.00% +0.35% / -0.05% +0.05% +0.40%] index_copy_ perm : Elapsed 0.020 ms (1.998 ms / 100) 2.041 -> 2.045 ( +0.20%) [ +0.24% +0.39% +0.00% / +0.29% +0.20% +0.69%] index_add_ perm_sorted : Elapsed 0.020 ms (2.046 ms / 100) 1.993 -> 1.995 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.45% +0.55%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.994 ms / 100) 8.703 -> 8.694 ( -0.10%) [ +0.01% +0.00% +0.07% / +0.07% -0.10% +0.00%] index_select const : Elapsed 0.087 ms (8.704 ms / 100) 8.752 -> 8.730 ( -0.25%) [ +0.07% +0.01% +0.00% / +0.03% -0.09% -0.25%] index_select wrap : Elapsed 0.088 ms (8.758 ms / 100) 8.737 -> 8.725 ( -0.14%) [ +0.39% +0.22% +0.00% / +0.19% -0.06% -0.14%] index_select linear : Elapsed 0.088 ms (8.771 ms / 100) 8.743 -> 8.726 ( -0.19%) [ +0.00% +0.02% +0.05% / +0.27% -0.13% -0.19%] index_select reverse : Elapsed 0.087 ms (8.743 ms / 100) 8.689 -> 8.707 ( +0.21%) [ +0.00% +0.17% +0.07% / +0.21% +0.24% +0.23%] index_select skip64 : Elapsed 0.087 ms (8.689 ms / 100) 8.694 -> 8.708 ( +0.16%) [ +0.02% +0.00% +0.22% / +0.29% +0.23% +0.16%] index_select skip256 : Elapsed 0.087 ms (8.696 ms / 100) 8.748 -> 8.727 ( -0.24%) [ +0.48% +0.09% +0.00% / +0.24% -0.24% -0.06%] index_select spread : Elapsed 0.088 ms (8.790 ms / 100) 8.753 -> 8.726 ( -0.31%) [ +0.31% +0.00% +0.14% / +0.15% -0.08% -0.31%] index_select strided 3 : Elapsed 0.088 ms (8.780 ms / 100) 8.754 -> 8.754 ( +0.00%) [ +0.34% +0.06% +0.00% / +0.01% +0.00% +0.05%] index_select random : Elapsed 0.088 ms (8.784 ms / 100) 8.739 -> 8.757 ( +0.21%) [ +0.17% +0.06% +0.00% / +0.21% +0.22% +0.22%] index_select random_sorted : Elapsed 0.088 ms (8.754 ms / 100) B = [5, 40, 16, 20] (stride (1, 1600, 5, 80)) A = [5, 40, 16, 4] (stride (1, 320, 20, 5)) dim = 3 2.074 -> 2.076 ( +0.10%) [ +0.19% +0.19% +0.00% / +0.10% +0.19% +0.58%] index_add_ linear : Elapsed 0.021 ms (2.078 ms / 100) 2.017 -> 2.016 ( -0.05%) [ +0.00% +0.10% +0.10% / -0.05% +0.50% +0.45%] index_copy_ linear : Elapsed 0.020 ms (2.017 ms / 100) 2.064 -> 2.063 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.58% +0.44%] index_add_ reverse : Elapsed 0.021 ms (2.065 ms / 100) 2.001 -> 2.000 ( -0.05%) [ +0.10% +0.20% +0.00% / -0.05% +0.50% +0.40%] index_copy_ reverse : Elapsed 0.020 ms (2.003 ms / 100) 2.038 -> 2.039 ( +0.05%) [ +0.00% +0.10% +0.00% / +0.05% +0.79% +0.69%] index_add_ spread : Elapsed 0.020 ms (2.038 ms / 100) 1.989 -> 1.988 ( -0.05%) [ +0.00% +0.15% +0.10% / -0.05% +0.60% +0.60%] index_copy_ spread : Elapsed 0.020 ms (1.989 ms / 100) 2.076 -> 2.077 ( +0.05%) [ +0.05% +0.19% +0.00% / +0.05% +0.29% +0.29%] index_add_ strided 3 : Elapsed 0.021 ms (2.077 ms / 100) 2.013 -> 2.019 ( +0.30%) [ +0.00% +0.05% +0.05% / +0.30% +0.60% +0.55%] index_copy_ strided 3 : Elapsed 0.020 ms (2.013 ms / 100) 2.070 -> 2.069 ( -0.05%) [ +0.24% +0.00% +0.00% / -0.05% +0.39% +0.39%] index_add_ strided 7 : Elapsed 0.021 ms (2.075 ms / 100) 2.011 -> 2.016 ( +0.25%) [ +0.20% +0.00% +0.05% / +0.25% +0.55% +0.50%] index_copy_ strided 7 : Elapsed 0.020 ms (2.015 ms / 100) 2.075 -> 2.072 ( -0.14%) [ +0.00% +0.14% +0.05% / -0.14% +0.29% +0.29%] index_add_ perm : Elapsed 0.021 ms (2.075 ms / 100) 2.010 -> 2.010 ( +0.00%) [ +0.30% +0.40% +0.00% / +0.00% +0.70% +0.60%] index_copy_ perm : Elapsed 0.020 ms (2.016 ms / 100) 2.068 -> 2.071 ( +0.15%) [ +0.00% +0.29% +0.05% / +0.15% +0.34% +0.24%] index_add_ perm_sorted : Elapsed 0.021 ms (2.068 ms / 100) 2.011 -> 2.010 ( -0.05%) [ +0.10% +0.30% +0.00% / -0.05% +0.50% +0.45%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.013 ms / 100) 8.769 -> 8.773 ( +0.05%) [ +0.00% +0.10% +0.24% / +0.05% +0.21% +0.11%] index_select const : Elapsed 0.088 ms (8.769 ms / 100) 8.783 -> 8.783 ( +0.00%) [ +0.02% +0.00% +0.08% / +0.00% +0.14% +0.31%] index_select wrap : Elapsed 0.088 ms (8.785 ms / 100) 8.769 -> 8.765 ( -0.05%) [ +0.17% +0.03% +0.00% / -0.05% +0.27% +0.44%] index_select linear : Elapsed 0.088 ms (8.784 ms / 100) 8.777 -> 8.775 ( -0.02%) [ +0.11% +0.05% +0.00% / -0.02% +0.33% +0.32%] index_select reverse : Elapsed 0.088 ms (8.787 ms / 100) 8.776 -> 8.780 ( +0.05%) [ +0.09% +0.00% +0.02% / +0.05% +0.08% +0.17%] index_select skip64 : Elapsed 0.088 ms (8.784 ms / 100) 8.766 -> 8.776 ( +0.11%) [ +0.00% +0.07% +0.17% / +0.18% +0.11% +0.15%] index_select skip256 : Elapsed 0.088 ms (8.766 ms / 100) 8.776 -> 8.786 ( +0.11%) [ +0.26% +0.00% +0.39% / +0.11% +0.35% +0.21%] index_select spread : Elapsed 0.088 ms (8.799 ms / 100) 8.768 -> 8.773 ( +0.06%) [ +0.07% +0.01% +0.00% / +0.06% +0.59% +0.24%] index_select strided 3 : Elapsed 0.088 ms (8.774 ms / 100) 8.780 -> 8.783 ( +0.03%) [ +0.00% +0.10% +0.03% / +0.03% +0.50% +0.14%] index_select random : Elapsed 0.088 ms (8.780 ms / 100) 8.769 -> 8.781 ( +0.14%) [ +0.00% +0.07% +0.06% / +0.14% +0.32% +0.24%] index_select random_sorted : Elapsed 0.088 ms (8.769 ms / 100) B = [5, 40, 16, 20] (stride (40, 1, 4000, 200)) A = [5, 40, 16, 4] (stride (1, 320, 5, 80)) dim = 3 1.963 -> 1.964 ( +0.05%) [ +0.25% +0.20% +0.00% / +0.25% +0.05% +0.25%] index_add_ linear : Elapsed 0.020 ms (1.968 ms / 100) 1.925 -> 1.927 ( +0.10%) [ +0.26% +0.00% +0.00% / +0.10% +0.31% +0.26%] index_copy_ linear : Elapsed 0.019 ms (1.930 ms / 100) 1.965 -> 1.965 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.10% +0.00% +0.10%] index_add_ reverse : Elapsed 0.020 ms (1.965 ms / 100) 1.925 -> 1.927 ( +0.10%) [ +0.16% +0.10% +0.00% / +0.10% +0.31% +0.16%] index_copy_ reverse : Elapsed 0.019 ms (1.928 ms / 100) 1.956 -> 1.959 ( +0.15%) [ +0.00% +0.05% +0.05% / +0.15% +0.20% +0.15%] index_add_ spread : Elapsed 0.020 ms (1.956 ms / 100) 1.910 -> 1.912 ( +0.10%) [ +0.37% +0.00% +0.05% / +0.10% +0.52% +0.52%] index_copy_ spread : Elapsed 0.019 ms (1.917 ms / 100) 1.971 -> 1.971 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.25% +0.20%] index_add_ strided 3 : Elapsed 0.020 ms (1.971 ms / 100) 1.925 -> 1.930 ( +0.26%) [ +0.21% +0.16% +0.00% / +0.36% +0.26% +0.57%] index_copy_ strided 3 : Elapsed 0.019 ms (1.929 ms / 100) 1.967 -> 1.968 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.05% +0.41%] index_add_ strided 7 : Elapsed 0.020 ms (1.967 ms / 100) 1.922 -> 1.929 ( +0.36%) [ +0.00% +0.52% +0.26% / +0.36% +0.52% +0.36%] index_copy_ strided 7 : Elapsed 0.019 ms (1.922 ms / 100) 1.957 -> 1.955 ( -0.10%) [ +0.05% +0.00% +0.00% / -0.10% +0.46% +0.41%] index_add_ perm : Elapsed 0.020 ms (1.958 ms / 100) 1.914 -> 1.912 ( -0.10%) [ +0.00% +0.16% +0.10% / -0.10% +0.63% +0.57%] index_copy_ perm : Elapsed 0.019 ms (1.914 ms / 100) 1.961 -> 1.960 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.61% +0.36%] index_add_ perm_sorted : Elapsed 0.020 ms (1.961 ms / 100) 1.916 -> 1.917 ( +0.05%) [ +0.00% +0.16% +0.16% / +0.05% +0.78% +0.94%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.916 ms / 100) 8.734 -> 8.743 ( +0.10%) [ +0.00% +0.01% +0.13% / +0.10% +0.26% +0.18%] index_select const : Elapsed 0.087 ms (8.734 ms / 100) 8.764 -> 8.788 ( +0.27%) [ +0.08% +0.00% +0.24% / +0.27% +0.29% +0.39%] index_select wrap : Elapsed 0.088 ms (8.771 ms / 100) 8.750 -> 8.778 ( +0.32%) [ +0.03% +0.00% +0.19% / +0.45% +0.41% +0.32%] index_select linear : Elapsed 0.088 ms (8.753 ms / 100) 8.731 -> 8.755 ( +0.27%) [ +0.27% +0.09% +0.00% / +0.27% +0.27% +0.44%] index_select reverse : Elapsed 0.088 ms (8.755 ms / 100) 8.729 -> 8.735 ( +0.07%) [ +0.09% +0.15% +0.00% / +0.07% +0.13% +0.26%] index_select skip64 : Elapsed 0.087 ms (8.737 ms / 100) 8.737 -> 8.727 ( -0.11%) [ +0.00% +0.19% +0.13% / -0.11% +0.30% +0.07%] index_select skip256 : Elapsed 0.087 ms (8.737 ms / 100) 8.764 -> 8.754 ( -0.11%) [ +0.09% +0.00% +0.00% / -0.11% +0.05% +0.08%] index_select spread : Elapsed 0.088 ms (8.772 ms / 100) 8.783 -> 8.770 ( -0.15%) [ +0.11% +0.16% +0.00% / +0.09% +0.13% -0.15%] index_select strided 3 : Elapsed 0.088 ms (8.793 ms / 100) 8.768 -> 8.779 ( +0.13%) [ +0.32% +0.00% +0.21% / +0.13% +0.26% +0.15%] index_select random : Elapsed 0.088 ms (8.796 ms / 100) 8.762 -> 8.761 ( -0.01%) [ +0.00% +0.00% +0.13% / -0.01% +0.09% +0.18%] index_select random_sorted : Elapsed 0.088 ms (8.762 ms / 100) B = [5, 40, 16, 20] (stride (40, 1, 4000, 200)) A = [5, 40, 16, 4] (stride (16, 80, 1, 3200)) dim = 3 1.982 -> 1.958 ( -1.21%) [ +0.30% +0.10% +0.00% / +0.10% -0.96% -1.21%] index_add_ linear : Elapsed 0.020 ms (1.988 ms / 100) 1.938 -> 1.919 ( -0.98%) [ +0.00% +0.10% +0.05% / +0.05% -0.98% -0.88%] index_copy_ linear : Elapsed 0.019 ms (1.938 ms / 100) 1.978 -> 1.955 ( -1.16%) [ +0.35% +0.05% +0.00% / +0.00% -1.16% -1.06%] index_add_ reverse : Elapsed 0.020 ms (1.985 ms / 100) 1.934 -> 1.913 ( -1.09%) [ +0.05% +0.00% +0.26% / +0.26% -1.09% -0.52%] index_copy_ reverse : Elapsed 0.019 ms (1.935 ms / 100) 1.979 -> 1.963 ( -0.81%) [ +0.20% +0.15% +0.00% / +0.25% -0.76% -0.81%] index_add_ spread : Elapsed 0.020 ms (1.983 ms / 100) 1.936 -> 1.919 ( -0.88%) [ +0.00% +0.00% +0.05% / -0.10% -0.88% -0.67%] index_copy_ spread : Elapsed 0.019 ms (1.936 ms / 100) 1.985 -> 1.962 ( -1.16%) [ +0.20% +0.30% +0.00% / +0.10% -0.91% -1.16%] index_add_ strided 3 : Elapsed 0.020 ms (1.989 ms / 100) 1.944 -> 1.924 ( -1.03%) [ +0.10% +0.00% +0.05% / -0.10% -1.03% -0.98%] index_copy_ strided 3 : Elapsed 0.019 ms (1.946 ms / 100) 1.979 -> 1.967 ( -0.61%) [ +0.30% +0.10% +0.00% / +0.05% -0.56% -0.61%] index_add_ strided 7 : Elapsed 0.020 ms (1.985 ms / 100) 1.933 -> 1.920 ( -0.67%) [ +0.10% +0.00% +0.31% / +0.16% -0.67% -0.47%] index_copy_ strided 7 : Elapsed 0.019 ms (1.935 ms / 100) 1.989 -> 1.968 ( -1.06%) [ +0.25% +0.00% +0.40% / +0.20% -0.85% -1.06%] index_add_ perm : Elapsed 0.020 ms (1.994 ms / 100) 1.942 -> 1.922 ( -1.03%) [ +0.31% +0.00% +0.10% / +0.00% -1.03% -0.88%] index_copy_ perm : Elapsed 0.019 ms (1.948 ms / 100) 1.987 -> 1.967 ( -1.01%) [ +0.00% +0.05% +0.05% / +0.05% -1.01% -0.91%] index_add_ perm_sorted : Elapsed 0.020 ms (1.987 ms / 100) 1.940 -> 1.924 ( -0.82%) [ +0.15% +0.21% +0.00% / +0.05% -0.41% -0.82%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.943 ms / 100) 8.714 -> 8.720 ( +0.07%) [ +0.15% +0.00% +0.16% / +0.23% +0.09% +0.07%] index_select const : Elapsed 0.087 ms (8.727 ms / 100) 8.787 -> 8.788 ( +0.01%) [ +0.00% +0.02% +0.05% / +0.01% +0.23% +0.03%] index_select wrap : Elapsed 0.088 ms (8.787 ms / 100) 8.741 -> 8.753 ( +0.14%) [ +0.00% +0.14% +0.21% / +0.14% +0.41% +0.34%] index_select linear : Elapsed 0.087 ms (8.741 ms / 100) 8.734 -> 8.747 ( +0.15%) [ +0.46% +0.00% +0.25% / +0.15% +0.58% +0.31%] index_select reverse : Elapsed 0.088 ms (8.774 ms / 100) 8.710 -> 8.716 ( +0.07%) [ +0.10% +0.40% +0.00% / +0.07% +0.25% +0.36%] index_select skip64 : Elapsed 0.087 ms (8.719 ms / 100) 8.715 -> 8.710 ( -0.06%) [ +0.08% +0.00% +0.16% / -0.06% +0.09% +0.31%] index_select skip256 : Elapsed 0.087 ms (8.722 ms / 100) 8.762 -> 8.767 ( +0.06%) [ +0.00% +0.19% +0.30% / +0.13% +0.06% +0.34%] index_select spread : Elapsed 0.088 ms (8.762 ms / 100) 8.778 -> 8.794 ( +0.18%) [ +0.00% +0.24% +0.08% / +0.18% +0.21% +0.42%] index_select strided 3 : Elapsed 0.088 ms (8.778 ms / 100) 8.781 -> 8.783 ( +0.02%) [ +0.00% +0.28% +0.00% / +0.02% +0.07% +0.05%] index_select random : Elapsed 0.088 ms (8.781 ms / 100) 8.765 -> 8.769 ( +0.05%) [ +0.00% +0.09% +0.07% / +0.24% +0.27% +0.05%] index_select random_sorted : Elapsed 0.088 ms (8.765 ms / 100) B = [5, 40, 16, 20] (stride (1, 5, 4000, 200)) A = [5, 40, 16, 4] (stride (2560, 64, 4, 1)) dim = 3 1.874 -> 1.877 ( +0.16%) [ +0.21% +0.00% +0.11% / +0.16% +0.80% +0.48%] index_add_ linear : Elapsed 0.019 ms (1.878 ms / 100) 1.842 -> 1.845 ( +0.16%) [ +0.05% +0.00% +0.00% / +0.16% +0.60% +0.33%] index_copy_ linear : Elapsed 0.018 ms (1.843 ms / 100) 1.877 -> 1.879 ( +0.11%) [ +0.05% +0.05% +0.00% / +0.11% +0.37% +0.48%] index_add_ reverse : Elapsed 0.019 ms (1.878 ms / 100) 1.843 -> 1.841 ( -0.11%) [ +0.05% +0.00% +0.00% / -0.11% +0.43% +0.43%] index_copy_ reverse : Elapsed 0.018 ms (1.844 ms / 100) 1.870 -> 1.872 ( +0.11%) [ +0.05% +0.00% +0.27% / +0.11% +0.11% +0.21%] index_add_ spread : Elapsed 0.019 ms (1.871 ms / 100) 1.832 -> 1.837 ( +0.27%) [ +0.27% +0.00% +0.33% / +0.27% +0.27% +0.44%] index_copy_ spread : Elapsed 0.018 ms (1.837 ms / 100) 1.887 -> 1.894 ( +0.37%) [ +0.26% +0.26% +0.00% / +0.37% +0.90% +0.85%] index_add_ strided 3 : Elapsed 0.019 ms (1.892 ms / 100) 1.850 -> 1.853 ( +0.16%) [ +0.11% +0.00% +0.05% / +0.16% +0.97% +0.97%] index_copy_ strided 3 : Elapsed 0.019 ms (1.852 ms / 100) 1.889 -> 1.890 ( +0.05%) [ +0.00% +0.11% +0.32% / +0.05% +0.69% +0.79%] index_add_ strided 7 : Elapsed 0.019 ms (1.889 ms / 100) 1.852 -> 1.854 ( +0.11%) [ +0.16% +0.00% +0.11% / +0.11% +0.70% +0.59%] index_copy_ strided 7 : Elapsed 0.019 ms (1.855 ms / 100) 1.875 -> 1.874 ( -0.05%) [ +0.05% +0.00% +0.16% / +0.11% -0.05% +0.00%] index_add_ perm : Elapsed 0.019 ms (1.876 ms / 100) 1.837 -> 1.834 ( -0.16%) [ +0.00% +0.05% +0.05% / -0.16% +0.22% +0.00%] index_copy_ perm : Elapsed 0.018 ms (1.837 ms / 100) 1.877 -> 1.880 ( +0.16%) [ +0.05% +0.00% +0.05% / +0.21% +0.16% +0.16%] index_add_ perm_sorted : Elapsed 0.019 ms (1.878 ms / 100) 1.837 -> 1.841 ( +0.22%) [ +0.00% +0.11% +0.16% / +0.22% +0.54% +0.60%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.837 ms / 100) 8.346 -> 8.340 ( -0.07%) [ +0.00% +0.24% +0.11% / -0.07% +0.58% +0.64%] index_select const : Elapsed 0.083 ms (8.346 ms / 100) 8.365 -> 8.351 ( -0.17%) [ +0.04% +0.00% +0.07% / -0.17% +0.20% +0.17%] index_select wrap : Elapsed 0.084 ms (8.368 ms / 100) 8.352 -> 8.359 ( +0.08%) [ +0.00% +0.05% +0.12% / +0.08% +0.19% +0.38%] index_select linear : Elapsed 0.084 ms (8.352 ms / 100) 8.352 -> 8.363 ( +0.13%) [ +0.05% +0.02% +0.00% / +0.13% +0.32% +0.59%] index_select reverse : Elapsed 0.084 ms (8.356 ms / 100) 8.353 -> 8.354 ( +0.01%) [ +0.12% +0.08% +0.00% / +0.01% +0.29% +0.29%] index_select skip64 : Elapsed 0.084 ms (8.363 ms / 100) 8.347 -> 8.360 ( +0.16%) [ +0.20% +0.14% +0.00% / +0.16% +0.46% +0.40%] index_select skip256 : Elapsed 0.084 ms (8.364 ms / 100) 8.350 -> 8.360 ( +0.12%) [ +0.00% +0.02% +0.01% / +0.12% +0.29% +0.31%] index_select spread : Elapsed 0.084 ms (8.350 ms / 100) 8.344 -> 8.359 ( +0.18%) [ +0.31% +0.00% +0.14% / +0.18% +0.77% +0.56%] index_select strided 3 : Elapsed 0.084 ms (8.370 ms / 100) 8.348 -> 8.372 ( +0.29%) [ +0.26% +0.10% +0.00% / +0.31% +0.53% +0.29%] index_select random : Elapsed 0.084 ms (8.370 ms / 100) 8.348 -> 8.362 ( +0.17%) [ +0.00% +0.07% +0.16% / +0.17% +0.36% +0.44%] index_select random_sorted : Elapsed 0.083 ms (8.348 ms / 100) B = [5, 40, 16, 20] (stride (1, 80, 5, 3200)) A = [5, 40, 16, 4] (stride (160, 4, 800, 1)) dim = 3 0.769 -> 0.772 ( +0.39%) [ +0.52% +0.26% +0.00% / +0.39% +1.82% +1.69%] index_add_ linear : Elapsed 0.008 ms (0.773 ms / 100) 0.754 -> 0.755 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +2.25% +2.39%] index_copy_ linear : Elapsed 0.008 ms (0.755 ms / 100) 0.769 -> 0.771 ( +0.26%) [ +0.52% +0.39% +0.00% / +0.26% +0.39% +0.26%] index_add_ reverse : Elapsed 0.008 ms (0.773 ms / 100) 0.756 -> 0.755 ( -0.13%) [ +0.00% +0.00% +0.13% / -0.13% +0.93% +1.72%] index_copy_ reverse : Elapsed 0.008 ms (0.756 ms / 100) 0.798 -> 0.775 ( -2.88%) [ +0.13% +0.00% +0.13% / +0.00% -2.63% -2.88%] index_add_ spread : Elapsed 0.008 ms (0.799 ms / 100) 0.785 -> 0.762 ( -2.93%) [ +0.00% +0.13% +0.00% / +0.51% -2.93% -2.93%] index_copy_ spread : Elapsed 0.008 ms (0.785 ms / 100) 0.777 -> 0.770 ( -0.90%) [ +0.00% +1.16% +0.13% / +0.90% -0.90% -0.39%] index_add_ strided 3 : Elapsed 0.008 ms (0.777 ms / 100) 0.764 -> 0.760 ( -0.52%) [ +0.00% +0.00% +0.13% / +0.13% -0.52% -0.39%] index_copy_ strided 3 : Elapsed 0.008 ms (0.764 ms / 100) 0.771 -> 0.770 ( -0.13%) [ +0.26% +0.65% +0.00% / -0.13% +0.91% +0.65%] index_add_ strided 7 : Elapsed 0.008 ms (0.773 ms / 100) 0.757 -> 0.758 ( +0.13%) [ +0.26% +0.00% +0.00% / +0.13% +0.92% +1.19%] index_copy_ strided 7 : Elapsed 0.008 ms (0.759 ms / 100) 0.775 -> 0.775 ( +0.00%) [ +0.00% +0.39% +0.00% / +0.00% +2.84% +2.32%] index_add_ perm : Elapsed 0.008 ms (0.775 ms / 100) 0.766 -> 0.762 ( -0.52%) [ +0.00% +0.13% +0.26% / -0.52% +0.91% +1.44%] index_copy_ perm : Elapsed 0.008 ms (0.766 ms / 100) 0.771 -> 0.777 ( +0.78%) [ +0.00% +0.26% +0.26% / +0.78% +3.50% +3.24%] index_add_ perm_sorted : Elapsed 0.008 ms (0.771 ms / 100) 0.764 -> 0.764 ( +0.00%) [ +0.26% +0.00% +0.00% / +0.00% +1.05% +1.57%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.766 ms / 100) 5.069 -> 5.064 ( -0.10%) [ +0.00% +0.04% +0.04% / -0.10% +0.22% +0.39%] index_select const : Elapsed 0.051 ms (5.069 ms / 100) 5.071 -> 5.068 ( -0.06%) [ +0.18% +0.06% +0.00% / -0.06% +0.45% +0.37%] index_select wrap : Elapsed 0.051 ms (5.080 ms / 100) 5.067 -> 5.064 ( -0.06%) [ +0.18% +0.14% +0.00% / -0.06% +0.26% +0.37%] index_select linear : Elapsed 0.051 ms (5.076 ms / 100) 5.068 -> 5.064 ( -0.08%) [ +0.12% +0.00% +0.00% / -0.08% +0.12% +0.28%] index_select reverse : Elapsed 0.051 ms (5.074 ms / 100) 5.062 -> 5.067 ( +0.10%) [ +0.00% +0.30% +0.10% / +0.22% +0.34% +0.10%] index_select skip64 : Elapsed 0.051 ms (5.062 ms / 100) 5.066 -> 5.062 ( -0.08%) [ +0.00% +0.00% +0.26% / -0.08% +0.47% +0.26%] index_select skip256 : Elapsed 0.051 ms (5.066 ms / 100) 5.064 -> 5.075 ( +0.22%) [ +0.16% +0.00% +0.28% / +0.22% +0.55% +0.53%] index_select spread : Elapsed 0.051 ms (5.072 ms / 100) 5.069 -> 5.072 ( +0.06%) [ +0.00% +0.00% +0.14% / +0.06% +0.30% +0.24%] index_select strided 3 : Elapsed 0.051 ms (5.069 ms / 100) 5.070 -> 5.069 ( -0.02%) [ +0.06% +0.00% +0.22% / -0.02% +0.24% +0.22%] index_select random : Elapsed 0.051 ms (5.073 ms / 100) 5.066 -> 5.078 ( +0.24%) [ +0.26% +0.00% +0.18% / +0.24% +0.39% +0.30%] index_select random_sorted : Elapsed 0.051 ms (5.079 ms / 100) B = [5, 40, 16, 20] (stride (40, 1, 200, 3200)) A = [5, 40, 16, 4] (stride (1, 20, 800, 5)) dim = 3 2.117 -> 2.122 ( +0.24%) [ +0.19% +0.00% +0.00% / +0.24% +0.38% +0.28%] index_add_ linear : Elapsed 0.021 ms (2.121 ms / 100) 2.057 -> 2.063 ( +0.29%) [ +0.34% +0.05% +0.00% / +0.29% +0.58% +0.73%] index_copy_ linear : Elapsed 0.021 ms (2.064 ms / 100) 2.106 -> 2.108 ( +0.09%) [ +0.00% +0.43% +0.43% / +0.09% +1.19% +1.14%] index_add_ reverse : Elapsed 0.021 ms (2.106 ms / 100) 2.057 -> 2.055 ( -0.10%) [ +0.05% +0.39% +0.00% / -0.10% +0.49% +0.53%] index_copy_ reverse : Elapsed 0.021 ms (2.058 ms / 100) 2.117 -> 2.122 ( +0.24%) [ +0.28% +0.09% +0.00% / +0.24% +0.24% +0.43%] index_add_ spread : Elapsed 0.021 ms (2.123 ms / 100) 2.061 -> 2.064 ( +0.15%) [ +0.15% +0.00% +0.10% / +0.15% +0.19% +0.19%] index_copy_ spread : Elapsed 0.021 ms (2.064 ms / 100) 2.114 -> 2.120 ( +0.28%) [ +0.00% +0.24% +0.33% / +0.28% +0.99% +0.43%] index_add_ strided 3 : Elapsed 0.021 ms (2.114 ms / 100) 2.057 -> 2.061 ( +0.19%) [ +0.19% +0.00% +0.49% / +0.19% +0.78% +0.39%] index_copy_ strided 3 : Elapsed 0.021 ms (2.061 ms / 100) 2.110 -> 2.115 ( +0.24%) [ +0.05% +0.14% +0.00% / +0.24% +1.18% +0.62%] index_add_ strided 7 : Elapsed 0.021 ms (2.111 ms / 100) 2.054 -> 2.058 ( +0.19%) [ +0.00% +0.24% +0.19% / +0.19% +1.31% +0.49%] index_copy_ strided 7 : Elapsed 0.021 ms (2.054 ms / 100) 2.115 -> 2.116 ( +0.05%) [ +0.33% +0.00% +0.05% / +0.05% +0.14% +0.14%] index_add_ perm : Elapsed 0.021 ms (2.122 ms / 100) 2.054 -> 2.062 ( +0.39%) [ +0.34% +0.00% +0.29% / +0.39% +0.54% +0.44%] index_copy_ perm : Elapsed 0.021 ms (2.061 ms / 100) 2.105 -> 2.107 ( +0.10%) [ +0.24% +0.71% +0.00% / +0.10% +0.67% +0.76%] index_add_ perm_sorted : Elapsed 0.021 ms (2.110 ms / 100) 2.059 -> 2.057 ( -0.10%) [ +0.00% +0.10% +0.00% / -0.10% +0.39% +0.39%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.059 ms / 100) 8.771 -> 8.785 ( +0.16%) [ +0.09% +0.00% +0.09% / +0.16% +0.17% +0.22%] index_select const : Elapsed 0.088 ms (8.779 ms / 100) 8.785 -> 8.781 ( -0.05%) [ +0.00% +0.30% +0.00% / +0.05% -0.05% +0.15%] index_select wrap : Elapsed 0.088 ms (8.785 ms / 100) 8.783 -> 8.775 ( -0.09%) [ +0.03% +0.00% +0.19% / +0.08% -0.09% +0.03%] index_select linear : Elapsed 0.088 ms (8.786 ms / 100) 8.776 -> 8.794 ( +0.21%) [ +0.00% +0.54% +0.23% / +0.43% +0.21% +0.36%] index_select reverse : Elapsed 0.088 ms (8.776 ms / 100) 8.775 -> 8.770 ( -0.06%) [ +0.00% +0.11% +0.11% / -0.06% +0.01% -0.01%] index_select skip64 : Elapsed 0.088 ms (8.775 ms / 100) 8.777 -> 8.780 ( +0.03%) [ +0.16% +0.07% +0.00% / +0.10% +0.03% +0.18%] index_select skip256 : Elapsed 0.088 ms (8.791 ms / 100) 8.774 -> 8.795 ( +0.24%) [ +0.00% +0.46% +0.23% / +0.27% +0.40% +0.24%] index_select spread : Elapsed 0.088 ms (8.774 ms / 100) 8.770 -> 8.778 ( +0.09%) [ +0.13% +0.08% +0.00% / +0.09% +0.33% +0.19%] index_select strided 3 : Elapsed 0.088 ms (8.781 ms / 100) 8.770 -> 8.777 ( +0.08%) [ +0.00% +0.07% +0.18% / +0.14% +0.25% +0.08%] index_select random : Elapsed 0.088 ms (8.770 ms / 100) 8.801 -> 8.801 ( +0.00%) [ +0.00% +0.16% +0.01% / +0.00% +0.00% +0.15%] index_select random_sorted : Elapsed 0.088 ms (8.801 ms / 100) B = [5, 40, 16, 20] (stride (1, 5, 200, 3200)) A = [5, 40, 16, 4] (stride (2560, 4, 160, 1)) dim = 3 2.187 -> 2.189 ( +0.09%) [ +0.14% +0.23% +0.00% / +0.09% +0.50% +0.32%] index_add_ linear : Elapsed 0.022 ms (2.190 ms / 100) 2.136 -> 2.138 ( +0.09%) [ +0.19% +0.00% +0.09% / +0.09% +0.28% +0.23%] index_copy_ linear : Elapsed 0.021 ms (2.140 ms / 100) 2.179 -> 2.183 ( +0.18%) [ +0.00% +0.18% +0.14% / +0.18% +0.96% +0.87%] index_add_ reverse : Elapsed 0.022 ms (2.179 ms / 100) 2.133 -> 2.132 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.75% +0.75%] index_copy_ reverse : Elapsed 0.021 ms (2.133 ms / 100) 2.184 -> 2.182 ( -0.09%) [ +0.14% +0.00% +0.09% / -0.09% +0.37% +0.41%] index_add_ spread : Elapsed 0.022 ms (2.187 ms / 100) 2.130 -> 2.134 ( +0.19%) [ +0.14% +0.19% +0.00% / +0.19% +0.66% +0.52%] index_copy_ spread : Elapsed 0.021 ms (2.133 ms / 100) 2.188 -> 2.190 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.41% +0.32%] index_add_ strided 3 : Elapsed 0.022 ms (2.190 ms / 100) 2.138 -> 2.145 ( +0.33%) [ +0.05% +0.09% +0.00% / +0.37% +0.33% +0.42%] index_copy_ strided 3 : Elapsed 0.021 ms (2.139 ms / 100) 2.186 -> 2.181 ( -0.23%) [ +0.27% +0.00% +0.00% / -0.23% +0.50% +0.69%] index_add_ strided 7 : Elapsed 0.022 ms (2.192 ms / 100) 2.129 -> 2.130 ( +0.05%) [ +0.09% +0.05% +0.00% / +0.05% +0.61% +0.70%] index_copy_ strided 7 : Elapsed 0.021 ms (2.131 ms / 100) 2.190 -> 2.192 ( +0.09%) [ +0.00% +0.14% +0.09% / +0.09% +0.46% +0.46%] index_add_ perm : Elapsed 0.022 ms (2.190 ms / 100) 2.140 -> 2.143 ( +0.14%) [ +0.00% +0.14% +0.05% / +0.14% +0.47% +0.37%] index_copy_ perm : Elapsed 0.021 ms (2.140 ms / 100) 2.192 -> 2.193 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.50% +0.55%] index_add_ perm_sorted : Elapsed 0.022 ms (2.194 ms / 100) 2.138 -> 2.134 ( -0.19%) [ +0.14% +0.00% +0.00% / -0.19% +0.37% +0.28%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.141 ms / 100) 9.211 -> 9.221 ( +0.11%) [ +0.12% +0.00% +0.18% / +0.23% +0.41% +0.11%] index_select const : Elapsed 0.092 ms (9.222 ms / 100) 9.217 -> 9.219 ( +0.02%) [ +0.16% +0.00% +0.05% / +0.02% +0.04% +0.11%] index_select wrap : Elapsed 0.092 ms (9.232 ms / 100) 9.208 -> 9.213 ( +0.05%) [ +0.05% +0.17% +0.00% / +0.33% +0.05% +0.24%] index_select linear : Elapsed 0.092 ms (9.213 ms / 100) 9.206 -> 9.216 ( +0.11%) [ +0.28% +0.18% +0.00% / +0.12% +0.22% +0.11%] index_select reverse : Elapsed 0.092 ms (9.232 ms / 100) 9.213 -> 9.219 ( +0.07%) [ +0.01% +0.00% +0.23% / +0.07% +0.35% +0.23%] index_select skip64 : Elapsed 0.092 ms (9.214 ms / 100) 9.218 -> 9.211 ( -0.08%) [ +0.00% +0.14% +0.04% / -0.08% +0.23% +0.13%] index_select skip256 : Elapsed 0.092 ms (9.218 ms / 100) 9.220 -> 9.222 ( +0.02%) [ +0.34% +0.00% +0.04% / +0.08% +0.02% +0.03%] index_select spread : Elapsed 0.093 ms (9.251 ms / 100) 9.229 -> 9.206 ( -0.25%) [ +0.00% +0.01% +0.17% / -0.12% -0.03% -0.25%] index_select strided 3 : Elapsed 0.092 ms (9.229 ms / 100) 9.216 -> 9.216 ( +0.00%) [ +0.11% +0.10% +0.00% / +0.00% +0.24% +0.11%] index_select random : Elapsed 0.092 ms (9.226 ms / 100) 9.216 -> 9.223 ( +0.08%) [ +0.00% +0.16% +0.04% / +0.11% +0.23% +0.08%] index_select random_sorted : Elapsed 0.092 ms (9.216 ms / 100) out_shape = [20, 4, 5, 40] in_shape = [16, 4, 5, 40] idx_dim = 0 B = [20, 4, 5, 40] (stride (800, 5, 1, 20)) A = [16, 4, 5, 40] (stride (800, 1, 4, 20)) dim = 0 3.947 -> 3.950 ( +0.08%) [ +0.08% +0.20% +0.00% / +0.08% +0.81% +0.79%] index_add_ linear : Elapsed 0.040 ms (3.950 ms / 100) 3.777 -> 3.780 ( +0.08%) [ +0.05% +0.13% +0.00% / +0.08% +0.85% +0.58%] index_copy_ linear : Elapsed 0.038 ms (3.779 ms / 100) 3.962 -> 3.964 ( +0.05%) [ +0.00% +0.03% +0.08% / +0.05% +0.71% +0.83%] index_add_ reverse : Elapsed 0.040 ms (3.962 ms / 100) 3.796 -> 3.798 ( +0.05%) [ +0.00% +0.00% +0.08% / +0.05% +0.90% +0.87%] index_copy_ reverse : Elapsed 0.038 ms (3.796 ms / 100) 3.947 -> 3.943 ( -0.10%) [ +0.00% +0.00% +0.03% / -0.10% +0.63% +0.66%] index_add_ spread : Elapsed 0.039 ms (3.947 ms / 100) 3.769 -> 3.768 ( -0.03%) [ +0.32% +0.00% +0.24% / -0.03% +0.77% +0.77%] index_copy_ spread : Elapsed 0.038 ms (3.781 ms / 100) 3.937 -> 3.938 ( +0.03%) [ +0.10% +0.03% +0.00% / +0.03% +0.84% +0.79%] index_add_ strided 3 : Elapsed 0.039 ms (3.941 ms / 100) 3.777 -> 3.783 ( +0.16%) [ +0.00% +0.08% +0.11% / +0.16% +0.69% +0.66%] index_copy_ strided 3 : Elapsed 0.038 ms (3.777 ms / 100) 3.934 -> 3.943 ( +0.23%) [ +0.00% +0.08% +0.18% / +0.23% +0.84% +0.84%] index_add_ strided 7 : Elapsed 0.039 ms (3.934 ms / 100) 3.774 -> 3.786 ( +0.32%) [ +0.00% +0.05% +0.21% / +0.32% +0.79% +0.79%] index_copy_ strided 7 : Elapsed 0.038 ms (3.774 ms / 100) 3.945 -> 3.948 ( +0.08%) [ +0.18% +0.00% +0.00% / +0.08% +0.74% +0.71%] index_add_ perm : Elapsed 0.040 ms (3.952 ms / 100) 3.768 -> 3.781 ( +0.35%) [ +0.40% +0.11% +0.00% / +0.35% +0.96% +0.93%] index_copy_ perm : Elapsed 0.038 ms (3.783 ms / 100) 3.962 -> 3.963 ( +0.03%) [ +0.00% +0.10% +0.05% / +0.03% +0.68% +0.68%] index_add_ perm_sorted : Elapsed 0.040 ms (3.962 ms / 100) 3.797 -> 3.796 ( -0.03%) [ +0.05% +0.00% +0.03% / -0.03% +0.79% +0.79%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.799 ms / 100) 5.469 -> 5.468 ( -0.02%) [ +0.04% +0.00% +0.05% / -0.02% +0.26% +0.04%] index_select const : Elapsed 0.055 ms (5.471 ms / 100) 5.479 -> 5.481 ( +0.04%) [ +0.11% +0.05% +0.00% / +0.09% +0.04% +0.05%] index_select wrap : Elapsed 0.055 ms (5.485 ms / 100) 5.478 -> 5.480 ( +0.04%) [ +0.18% +0.00% +0.09% / +0.05% +0.18% +0.04%] index_select linear : Elapsed 0.055 ms (5.488 ms / 100) 5.478 -> 5.480 ( +0.04%) [ +0.15% +0.00% +0.11% / +0.04% +0.05% +0.13%] index_select reverse : Elapsed 0.055 ms (5.486 ms / 100) 5.468 -> 5.468 ( +0.00%) [ +0.09% +0.00% +0.07% / +0.00% +0.00% +0.18%] index_select skip64 : Elapsed 0.055 ms (5.473 ms / 100) 5.468 -> 5.475 ( +0.13%) [ +0.07% +0.00% +0.02% / +0.13% +0.18% +0.20%] index_select skip256 : Elapsed 0.055 ms (5.472 ms / 100) 5.479 -> 5.487 ( +0.15%) [ +0.05% +0.22% +0.00% / +0.18% +0.15% +0.20%] index_select spread : Elapsed 0.055 ms (5.482 ms / 100) 5.481 -> 5.485 ( +0.07%) [ +0.09% +0.00% +0.07% / +0.16% +0.07% +0.11%] index_select strided 3 : Elapsed 0.055 ms (5.486 ms / 100) 5.483 -> 5.479 ( -0.07%) [ +0.05% +0.13% +0.00% / +0.02% -0.05% -0.07%] index_select strided 5 : Elapsed 0.055 ms (5.486 ms / 100) 5.484 -> 5.479 ( -0.09%) [ +0.09% +0.09% +0.00% / +0.07% -0.09% +0.05%] index_select strided 7 : Elapsed 0.055 ms (5.489 ms / 100) 5.469 -> 5.473 ( +0.07%) [ +0.13% +0.11% +0.00% / +0.07% +0.22% +0.15%] index_select strided 8 : Elapsed 0.055 ms (5.476 ms / 100) 5.479 -> 5.474 ( -0.09%) [ +0.07% +0.00% +0.00% / -0.09% -0.02% +0.07%] index_select random : Elapsed 0.055 ms (5.483 ms / 100) 5.483 -> 5.478 ( -0.09%) [ +0.04% +0.02% +0.00% / -0.04% -0.07% -0.09%] index_select random_sorted : Elapsed 0.055 ms (5.485 ms / 100) B = [20, 4, 5, 40] (stride (1, 800, 3200, 20)) A = [16, 4, 5, 40] (stride (800, 40, 160, 1)) dim = 0 3.933 -> 3.933 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.79% +0.86%] index_add_ linear : Elapsed 0.039 ms (3.934 ms / 100) 3.805 -> 3.805 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.84% +0.87%] index_copy_ linear : Elapsed 0.038 ms (3.807 ms / 100) 3.934 -> 3.935 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.74% +0.76%] index_add_ reverse : Elapsed 0.039 ms (3.935 ms / 100) 3.802 -> 3.803 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.74% +0.71%] index_copy_ reverse : Elapsed 0.038 ms (3.803 ms / 100) 3.936 -> 3.935 ( -0.03%) [ +0.05% +0.05% +0.00% / -0.03% +0.79% +0.79%] index_add_ spread : Elapsed 0.039 ms (3.938 ms / 100) 3.807 -> 3.807 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.84% +0.84%] index_copy_ spread : Elapsed 0.038 ms (3.807 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.76% +0.71%] index_add_ strided 3 : Elapsed 0.039 ms (3.936 ms / 100) 3.810 -> 3.810 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.73% +0.71%] index_copy_ strided 3 : Elapsed 0.038 ms (3.811 ms / 100) 3.933 -> 3.933 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.039 ms (3.934 ms / 100) 3.802 -> 3.803 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.03% +0.76% +0.76%] index_copy_ strided 7 : Elapsed 0.038 ms (3.802 ms / 100) 3.931 -> 3.934 ( +0.08%) [ +0.00% +0.13% +0.08% / +0.08% +0.92% +0.84%] index_add_ perm : Elapsed 0.039 ms (3.931 ms / 100) 3.805 -> 3.807 ( +0.05%) [ +0.00% +0.11% +0.03% / +0.05% +0.79% +0.71%] index_copy_ perm : Elapsed 0.038 ms (3.805 ms / 100) 3.933 -> 3.934 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.76% +0.76%] index_add_ perm_sorted : Elapsed 0.039 ms (3.935 ms / 100) 3.810 -> 3.811 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.66% +0.73%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.810 ms / 100) 5.558 -> 5.553 ( -0.09%) [ +0.00% +0.05% +0.02% / -0.09% -0.05% +0.04%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.570 -> 5.572 ( +0.04%) [ +0.00% +0.02% +0.13% / +0.04% +0.05% +0.16%] index_select wrap : Elapsed 0.056 ms (5.570 ms / 100) 5.568 -> 5.570 ( +0.04%) [ +0.00% +0.04% +0.09% / +0.04% +0.25% +0.40%] index_select linear : Elapsed 0.056 ms (5.568 ms / 100) 5.570 -> 5.573 ( +0.05%) [ +0.00% +0.04% +0.00% / +0.05% +0.14% +0.20%] index_select reverse : Elapsed 0.056 ms (5.570 ms / 100) 5.555 -> 5.554 ( -0.02%) [ +0.00% +0.09% +0.05% / -0.02% +0.14% +0.05%] index_select skip64 : Elapsed 0.056 ms (5.555 ms / 100) 5.553 -> 5.560 ( +0.13%) [ +0.20% +0.02% +0.00% / +0.13% +0.13% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.564 ms / 100) 5.572 -> 5.577 ( +0.09%) [ +0.04% +0.00% +0.04% / +0.09% +0.18% +0.16%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.571 -> 5.572 ( +0.02%) [ +0.00% +0.04% +0.04% / +0.02% +0.20% +0.22%] index_select strided 3 : Elapsed 0.056 ms (5.571 ms / 100) 5.566 -> 5.575 ( +0.16%) [ +0.09% +0.05% +0.00% / +0.16% +0.29% +0.31%] index_select strided 5 : Elapsed 0.056 ms (5.571 ms / 100) 5.571 -> 5.574 ( +0.05%) [ +0.00% +0.05% +0.04% / +0.05% +0.07% +0.05%] index_select strided 7 : Elapsed 0.056 ms (5.571 ms / 100) 5.555 -> 5.561 ( +0.11%) [ +0.00% +0.11% +0.11% / +0.11% +0.32% +0.23%] index_select strided 8 : Elapsed 0.056 ms (5.555 ms / 100) 5.571 -> 5.569 ( -0.04%) [ +0.05% +0.00% +0.00% / -0.04% +0.14% +0.04%] index_select random : Elapsed 0.056 ms (5.574 ms / 100) 5.564 -> 5.572 ( +0.14%) [ +0.00% +0.00% +0.07% / +0.14% +0.20% +0.18%] index_select random_sorted : Elapsed 0.056 ms (5.564 ms / 100) B = [20, 4, 5, 40] (stride (20, 5, 1, 400)) A = [16, 4, 5, 40] (stride (800, 200, 40, 1)) dim = 0 3.009 -> 3.015 ( +0.20%) [ +0.00% +0.07% +0.17% / +0.20% +0.90% +0.73%] index_add_ linear : Elapsed 0.030 ms (3.009 ms / 100) 2.880 -> 2.885 ( +0.17%) [ +0.00% +0.03% +0.21% / +0.17% +0.97% +0.80%] index_copy_ linear : Elapsed 0.029 ms (2.880 ms / 100) 3.023 -> 3.024 ( +0.03%) [ +0.00% +0.07% +0.03% / +0.03% +0.83% +0.83%] index_add_ reverse : Elapsed 0.030 ms (3.023 ms / 100) 2.895 -> 2.895 ( +0.00%) [ +0.03% +0.00% +0.07% / +0.00% +0.83% +0.79%] index_copy_ reverse : Elapsed 0.029 ms (2.896 ms / 100) 3.012 -> 3.013 ( +0.03%) [ +0.00% +0.07% +0.03% / +0.03% +0.86% +0.66%] index_add_ spread : Elapsed 0.030 ms (3.012 ms / 100) 2.881 -> 2.883 ( +0.07%) [ +0.10% +0.14% +0.00% / +0.07% +0.97% +0.94%] index_copy_ spread : Elapsed 0.029 ms (2.884 ms / 100) 3.010 -> 3.015 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.76% +1.20%] index_add_ strided 3 : Elapsed 0.030 ms (3.015 ms / 100) 2.882 -> 2.887 ( +0.17%) [ +0.24% +0.21% +0.00% / +0.17% +0.87% +1.25%] index_copy_ strided 3 : Elapsed 0.029 ms (2.889 ms / 100) 3.017 -> 3.013 ( -0.13%) [ +0.00% +0.03% +0.03% / -0.13% +0.63% +0.53%] index_add_ strided 7 : Elapsed 0.030 ms (3.017 ms / 100) 2.889 -> 2.886 ( -0.10%) [ +0.00% +0.03% +0.00% / -0.10% +0.69% +0.62%] index_copy_ strided 7 : Elapsed 0.029 ms (2.889 ms / 100) 3.013 -> 3.013 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.00% +0.83% +0.83%] index_add_ perm : Elapsed 0.030 ms (3.013 ms / 100) 2.881 -> 2.884 ( +0.10%) [ +0.10% +0.14% +0.00% / +0.10% +0.94% +0.97%] index_copy_ perm : Elapsed 0.029 ms (2.884 ms / 100) 3.022 -> 3.027 ( +0.17%) [ +0.17% +0.00% +0.13% / +0.17% +0.73% +0.79%] index_add_ perm_sorted : Elapsed 0.030 ms (3.027 ms / 100) 2.894 -> 2.898 ( +0.14%) [ +0.21% +0.00% +0.14% / +0.14% +0.79% +0.79%] index_copy_ perm_sorted : Elapsed 0.029 ms (2.900 ms / 100) 5.302 -> 5.302 ( +0.00%) [ +0.00% +0.13% +0.06% / +0.08% +0.00% +0.13%] index_select const : Elapsed 0.053 ms (5.302 ms / 100) 5.320 -> 5.316 ( -0.08%) [ +0.00% +0.02% +0.04% / -0.08% +0.00% +0.09%] index_select wrap : Elapsed 0.053 ms (5.320 ms / 100) 5.318 -> 5.322 ( +0.08%) [ +0.09% +0.13% +0.00% / +0.08% +0.15% +0.08%] index_select linear : Elapsed 0.053 ms (5.323 ms / 100) 5.319 -> 5.323 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.15% +0.09% +0.08%] index_select reverse : Elapsed 0.053 ms (5.319 ms / 100) 5.303 -> 5.305 ( +0.04%) [ +0.00% +0.09% +0.15% / +0.04% +0.13% +0.08%] index_select skip64 : Elapsed 0.053 ms (5.303 ms / 100) 5.303 -> 5.302 ( -0.02%) [ +0.21% +0.04% +0.00% / -0.02% +0.21% +0.06%] index_select skip256 : Elapsed 0.053 ms (5.314 ms / 100) 5.312 -> 5.317 ( +0.09%) [ +0.17% +0.19% +0.00% / +0.13% +0.15% +0.09%] index_select spread : Elapsed 0.053 ms (5.321 ms / 100) 5.322 -> 5.318 ( -0.08%) [ +0.08% +0.00% +0.06% / +0.02% -0.08% +0.06%] index_select strided 3 : Elapsed 0.053 ms (5.326 ms / 100) 5.319 -> 5.321 ( +0.04%) [ +0.11% +0.00% +0.13% / +0.11% +0.06% +0.04%] index_select strided 5 : Elapsed 0.053 ms (5.325 ms / 100) 5.318 -> 5.314 ( -0.08%) [ +0.00% +0.06% +0.04% / -0.08% +0.00% +0.11%] index_select strided 7 : Elapsed 0.053 ms (5.318 ms / 100) 5.310 -> 5.306 ( -0.08%) [ +0.00% +0.02% +0.00% / +0.08% +0.02% -0.08%] index_select strided 8 : Elapsed 0.053 ms (5.310 ms / 100) 5.312 -> 5.309 ( -0.06%) [ +0.19% +0.00% +0.11% / +0.13% -0.06% +0.00%] index_select random : Elapsed 0.053 ms (5.322 ms / 100) 5.308 -> 5.319 ( +0.21%) [ +0.21% +0.00% +0.17% / +0.24% +0.21% +0.21%] index_select random_sorted : Elapsed 0.053 ms (5.319 ms / 100) B = [20, 4, 5, 40] (stride (20, 5, 1, 400)) A = [16, 4, 5, 40] (stride (160, 1, 2560, 4)) dim = 0 3.649 -> 3.650 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.79% +0.79%] index_add_ linear : Elapsed 0.036 ms (3.649 ms / 100) 3.529 -> 3.529 ( +0.00%) [ +0.00% +0.06% +0.09% / +0.00% +0.82% +0.74%] index_copy_ linear : Elapsed 0.035 ms (3.529 ms / 100) 3.644 -> 3.650 ( +0.16%) [ +0.00% +0.11% +0.11% / +0.16% +0.96% +0.91%] index_add_ reverse : Elapsed 0.036 ms (3.644 ms / 100) 3.519 -> 3.531 ( +0.34%) [ +0.00% +0.17% +0.17% / +0.34% +0.88% +0.88%] index_copy_ reverse : Elapsed 0.035 ms (3.519 ms / 100) 3.651 -> 3.653 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.85% +0.85%] index_add_ spread : Elapsed 0.037 ms (3.652 ms / 100) 3.525 -> 3.529 ( +0.11%) [ +0.17% +0.23% +0.00% / +0.11% +0.79% +0.88%] index_copy_ spread : Elapsed 0.035 ms (3.531 ms / 100) 3.651 -> 3.656 ( +0.14%) [ +0.16% +0.05% +0.00% / +0.14% +0.79% +0.90%] index_add_ strided 3 : Elapsed 0.037 ms (3.657 ms / 100) 3.530 -> 3.531 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.62% +0.71%] index_copy_ strided 3 : Elapsed 0.035 ms (3.531 ms / 100) 3.647 -> 3.652 ( +0.14%) [ +0.00% +0.11% +0.14% / +0.14% +0.88% +0.77%] index_add_ strided 7 : Elapsed 0.036 ms (3.647 ms / 100) 3.523 -> 3.527 ( +0.11%) [ +0.06% +0.00% +0.06% / +0.11% +0.79% +0.68%] index_copy_ strided 7 : Elapsed 0.035 ms (3.525 ms / 100) 3.650 -> 3.650 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.71% +0.74%] index_add_ perm : Elapsed 0.037 ms (3.650 ms / 100) 3.527 -> 3.529 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.06% +0.79% +0.82%] index_copy_ perm : Elapsed 0.035 ms (3.530 ms / 100) 3.653 -> 3.656 ( +0.08%) [ +0.03% +0.00% +0.14% / +0.08% +0.68% +0.74%] index_add_ perm_sorted : Elapsed 0.037 ms (3.654 ms / 100) 3.527 -> 3.528 ( +0.03%) [ +0.11% +0.11% +0.00% / +0.03% +0.68% +0.65%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.531 ms / 100) 5.472 -> 5.475 ( +0.05%) [ +0.04% +0.04% +0.00% / +0.07% +0.05% +0.07%] index_select const : Elapsed 0.055 ms (5.474 ms / 100) 5.486 -> 5.480 ( -0.11%) [ +0.02% +0.00% +0.07% / -0.05% -0.11% +0.07%] index_select wrap : Elapsed 0.055 ms (5.487 ms / 100) 5.479 -> 5.477 ( -0.04%) [ +0.00% +0.07% +0.09% / -0.04% +0.11% +0.11%] index_select linear : Elapsed 0.055 ms (5.479 ms / 100) 5.477 -> 5.480 ( +0.05%) [ +0.09% +0.00% +0.09% / +0.05% +0.13% +0.35%] index_select reverse : Elapsed 0.055 ms (5.482 ms / 100) 5.475 -> 5.476 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.05% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.476 ms / 100) 5.474 -> 5.476 ( +0.04%) [ +0.18% +0.00% +0.07% / +0.04% +0.09% +0.09%] index_select skip256 : Elapsed 0.055 ms (5.484 ms / 100) 5.481 -> 5.477 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.13% +0.11%] index_select spread : Elapsed 0.055 ms (5.481 ms / 100) 5.480 -> 5.482 ( +0.04%) [ +0.00% +0.09% +0.11% / +0.04% +0.09% +0.11%] index_select strided 3 : Elapsed 0.055 ms (5.480 ms / 100) 5.476 -> 5.480 ( +0.07%) [ +0.18% +0.00% +0.04% / +0.07% +0.15% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.486 ms / 100) 5.481 -> 5.481 ( +0.00%) [ +0.00% +0.09% +0.07% / +0.00% +0.26% +0.09%] index_select strided 7 : Elapsed 0.055 ms (5.481 ms / 100) 5.474 -> 5.476 ( +0.04%) [ +0.05% +0.00% +0.07% / +0.04% +0.16% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.477 ms / 100) 5.477 -> 5.476 ( -0.02%) [ +0.16% +0.00% +0.05% / -0.02% +0.13% +0.11%] index_select random : Elapsed 0.055 ms (5.486 ms / 100) 5.483 -> 5.481 ( -0.04%) [ +0.00% +0.00% +0.09% / -0.04% +0.09% +0.07%] index_select random_sorted : Elapsed 0.055 ms (5.483 ms / 100) B = [20, 4, 5, 40] (stride (20, 1, 4, 400)) A = [16, 4, 5, 40] (stride (800, 5, 1, 20)) dim = 0 3.939 -> 3.934 ( -0.13%) [ +0.23% +0.20% +0.00% / -0.13% +0.96% +0.91%] index_add_ linear : Elapsed 0.039 ms (3.948 ms / 100) 3.791 -> 3.788 ( -0.08%) [ +0.08% +0.11% +0.00% / -0.08% +0.82% +0.71%] index_copy_ linear : Elapsed 0.038 ms (3.794 ms / 100) 3.947 -> 3.948 ( +0.03%) [ +0.13% +0.00% +0.25% / +0.03% +0.79% +0.81%] index_add_ reverse : Elapsed 0.040 ms (3.952 ms / 100) 3.806 -> 3.804 ( -0.05%) [ +0.13% +0.00% +0.11% / -0.05% +0.84% +0.81%] index_copy_ reverse : Elapsed 0.038 ms (3.811 ms / 100) 3.912 -> 3.923 ( +0.28%) [ +0.00% +0.08% +0.13% / +0.28% +0.79% +0.97%] index_add_ spread : Elapsed 0.039 ms (3.912 ms / 100) 3.767 -> 3.774 ( +0.19%) [ +0.00% +0.03% +0.03% / +0.19% +0.80% +1.09%] index_copy_ spread : Elapsed 0.038 ms (3.767 ms / 100) 3.936 -> 3.937 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.64% +0.58%] index_add_ strided 3 : Elapsed 0.039 ms (3.936 ms / 100) 3.783 -> 3.783 ( +0.00%) [ +0.03% +0.08% +0.00% / +0.00% +0.90% +0.77%] index_copy_ strided 3 : Elapsed 0.038 ms (3.784 ms / 100) 3.935 -> 3.937 ( +0.05%) [ +0.03% +0.00% +0.13% / +0.05% +0.61% +0.58%] index_add_ strided 7 : Elapsed 0.039 ms (3.936 ms / 100) 3.781 -> 3.786 ( +0.13%) [ +0.00% +0.05% +0.03% / +0.13% +0.95% +0.63%] index_copy_ strided 7 : Elapsed 0.038 ms (3.781 ms / 100) 3.915 -> 3.923 ( +0.20%) [ +0.00% +0.05% +0.20% / +0.20% +0.61% +0.82%] index_add_ perm : Elapsed 0.039 ms (3.915 ms / 100) 3.769 -> 3.776 ( +0.19%) [ +0.00% +0.03% +0.19% / +0.19% +0.72% +0.82%] index_copy_ perm : Elapsed 0.038 ms (3.769 ms / 100) 3.952 -> 3.949 ( -0.08%) [ +0.00% +0.00% +0.10% / -0.08% +0.56% +0.76%] index_add_ perm_sorted : Elapsed 0.040 ms (3.952 ms / 100) 3.807 -> 3.808 ( +0.03%) [ +0.00% +0.03% +0.18% / +0.03% +0.81% +0.79%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.807 ms / 100) 5.489 -> 5.491 ( +0.04%) [ +0.07% +0.00% +0.04% / +0.11% +0.04% +0.04%] index_select const : Elapsed 0.055 ms (5.493 ms / 100) 5.501 -> 5.494 ( -0.13%) [ +0.00% +0.00% +0.04% / -0.09% -0.13% +0.09%] index_select wrap : Elapsed 0.055 ms (5.501 ms / 100) 5.498 -> 5.496 ( -0.04%) [ +0.07% +0.00% +0.04% / +0.02% +0.09% -0.04%] index_select linear : Elapsed 0.055 ms (5.502 ms / 100) 5.497 -> 5.501 ( +0.07%) [ +0.18% +0.05% +0.00% / +0.07% +0.07% +0.15%] index_select reverse : Elapsed 0.055 ms (5.507 ms / 100) 5.485 -> 5.492 ( +0.13%) [ +0.00% +0.05% +0.07% / +0.15% +0.13% +0.24%] index_select skip64 : Elapsed 0.055 ms (5.485 ms / 100) 5.485 -> 5.487 ( +0.04%) [ +0.09% +0.00% +0.05% / +0.09% +0.05% +0.04%] index_select skip256 : Elapsed 0.055 ms (5.490 ms / 100) 5.503 -> 5.495 ( -0.15%) [ +0.02% +0.00% +0.07% / +0.02% -0.15% -0.02%] index_select spread : Elapsed 0.055 ms (5.504 ms / 100) 5.505 -> 5.493 ( -0.22%) [ +0.04% +0.02% +0.00% / +0.09% -0.18% -0.22%] index_select strided 3 : Elapsed 0.055 ms (5.507 ms / 100) 5.501 -> 5.493 ( -0.15%) [ +0.15% +0.00% +0.04% / +0.07% -0.04% -0.15%] index_select strided 5 : Elapsed 0.055 ms (5.509 ms / 100) 5.499 -> 5.499 ( +0.00%) [ +0.00% +0.15% +0.09% / +0.05% +0.04% +0.00%] index_select strided 7 : Elapsed 0.055 ms (5.499 ms / 100) 5.492 -> 5.490 ( -0.04%) [ +0.00% +0.04% +0.02% / -0.04% +0.00% +0.02%] index_select strided 8 : Elapsed 0.055 ms (5.492 ms / 100) 5.491 -> 5.490 ( -0.02%) [ +0.18% +0.00% +0.11% / +0.07% +0.09% -0.02%] index_select random : Elapsed 0.055 ms (5.501 ms / 100) 5.499 -> 5.491 ( -0.15%) [ +0.00% +0.05% +0.02% / -0.11% -0.05% -0.15%] index_select random_sorted : Elapsed 0.055 ms (5.499 ms / 100) B = [20, 4, 5, 40] (stride (1, 20, 80, 400)) A = [16, 4, 5, 40] (stride (1, 16, 64, 320)) dim = 0 4.439 -> 4.443 ( +0.09%) [ +0.07% +0.00% +0.07% / +0.09% +0.72% +0.74%] index_add_ linear : Elapsed 0.044 ms (4.442 ms / 100) 4.280 -> 4.283 ( +0.07%) [ +0.02% +0.00% +0.02% / +0.07% +0.79% +0.72%] index_copy_ linear : Elapsed 0.043 ms (4.281 ms / 100) 4.444 -> 4.444 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.77%] index_add_ reverse : Elapsed 0.044 ms (4.444 ms / 100) 4.273 -> 4.279 ( +0.14%) [ +0.00% +0.02% +0.02% / +0.14% +0.82% +0.91%] index_copy_ reverse : Elapsed 0.043 ms (4.273 ms / 100) 4.445 -> 4.451 ( +0.13%) [ +0.16% +0.00% +0.09% / +0.13% +0.79% +0.67%] index_add_ spread : Elapsed 0.045 ms (4.452 ms / 100) 4.281 -> 4.277 ( -0.09%) [ +0.07% +0.05% +0.00% / -0.09% +0.65% +0.72%] index_copy_ spread : Elapsed 0.043 ms (4.284 ms / 100) 4.439 -> 4.439 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.74% +0.99%] index_add_ strided 3 : Elapsed 0.044 ms (4.442 ms / 100) 4.278 -> 4.276 ( -0.05%) [ +0.05% +0.09% +0.00% / -0.05% +0.70% +0.86%] index_copy_ strided 3 : Elapsed 0.043 ms (4.280 ms / 100) 4.445 -> 4.445 ( +0.00%) [ +0.04% +0.02% +0.00% / +0.00% +0.76% +0.58%] index_add_ strided 7 : Elapsed 0.044 ms (4.447 ms / 100) 4.275 -> 4.277 ( +0.05%) [ +0.09% +0.00% +0.05% / +0.05% +0.91% +0.65%] index_copy_ strided 7 : Elapsed 0.043 ms (4.279 ms / 100) 4.440 -> 4.443 ( +0.07%) [ +0.11% +0.09% +0.00% / +0.07% +0.77% +0.63%] index_add_ perm : Elapsed 0.044 ms (4.445 ms / 100) 4.281 -> 4.281 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.70% +0.72%] index_copy_ perm : Elapsed 0.043 ms (4.283 ms / 100) 4.444 -> 4.444 ( +0.00%) [ +0.00% +0.16% +0.07% / +0.00% +0.74% +0.65%] index_add_ perm_sorted : Elapsed 0.044 ms (4.444 ms / 100) 4.277 -> 4.282 ( +0.12%) [ +0.00% +0.16% +0.09% / +0.12% +0.77% +0.79%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.277 ms / 100) 5.572 -> 5.578 ( +0.11%) [ +0.00% +0.23% +0.16% / +0.16% +0.16% +0.11%] index_select const : Elapsed 0.056 ms (5.572 ms / 100) 5.583 -> 5.589 ( +0.11%) [ +0.04% +0.14% +0.00% / +0.16% +0.18% +0.11%] index_select wrap : Elapsed 0.056 ms (5.585 ms / 100) 5.586 -> 5.583 ( -0.05%) [ +0.02% +0.09% +0.00% / -0.05% +0.09% +0.14%] index_select linear : Elapsed 0.056 ms (5.587 ms / 100) 5.581 -> 5.589 ( +0.14%) [ +0.13% +0.14% +0.00% / +0.14% +0.23% +0.16%] index_select reverse : Elapsed 0.056 ms (5.588 ms / 100) 5.575 -> 5.580 ( +0.09%) [ +0.00% +0.16% +0.11% / +0.13% +0.09% +0.11%] index_select skip64 : Elapsed 0.056 ms (5.575 ms / 100) 5.574 -> 5.580 ( +0.11%) [ +0.04% +0.00% +0.14% / +0.13% +0.11% +0.27%] index_select skip256 : Elapsed 0.056 ms (5.576 ms / 100) 5.583 -> 5.587 ( +0.07%) [ +0.09% +0.11% +0.00% / +0.07% +0.14% +0.07%] index_select spread : Elapsed 0.056 ms (5.588 ms / 100) 5.588 -> 5.589 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.14% +0.09%] index_select strided 3 : Elapsed 0.056 ms (5.588 ms / 100) 5.584 -> 5.584 ( +0.00%) [ +0.00% +0.04% +0.05% / +0.00% +0.14% +0.21%] index_select strided 5 : Elapsed 0.056 ms (5.584 ms / 100) 5.590 -> 5.591 ( +0.02%) [ +0.00% +0.04% +0.05% / +0.02% +0.09% +0.05%] index_select strided 7 : Elapsed 0.056 ms (5.590 ms / 100) 5.587 -> 5.584 ( -0.05%) [ +0.02% +0.00% +0.04% / -0.05% +0.16% +0.16%] index_select strided 8 : Elapsed 0.056 ms (5.588 ms / 100) 5.585 -> 5.582 ( -0.05%) [ +0.00% +0.07% +0.07% / -0.05% +0.11% +0.16%] index_select random : Elapsed 0.056 ms (5.585 ms / 100) 5.585 -> 5.587 ( +0.04%) [ +0.02% +0.00% +0.02% / +0.04% +0.20% +0.16%] index_select random_sorted : Elapsed 0.056 ms (5.586 ms / 100) out_shape = [16, 20, 5, 40] in_shape = [16, 4, 5, 40] idx_dim = 1 B = [16, 20, 5, 40] (stride (5, 80, 1, 1600)) A = [16, 4, 5, 40] (stride (800, 1, 4, 20)) dim = 1 2.175 -> 2.179 ( +0.18%) [ +0.23% +0.14% +0.00% / +0.18% +0.69% +0.60%] index_add_ linear : Elapsed 0.022 ms (2.180 ms / 100) 2.125 -> 2.129 ( +0.19%) [ +0.05% +0.05% +0.00% / +0.19% +0.80% +0.66%] index_copy_ linear : Elapsed 0.021 ms (2.126 ms / 100) 2.162 -> 2.166 ( +0.19%) [ +0.09% +0.14% +0.00% / +0.19% +1.16% +1.06%] index_add_ reverse : Elapsed 0.022 ms (2.164 ms / 100) 2.111 -> 2.119 ( +0.38%) [ +0.09% +0.19% +0.00% / +0.38% +1.28% +1.18%] index_copy_ reverse : Elapsed 0.021 ms (2.113 ms / 100) 2.164 -> 2.163 ( -0.05%) [ +0.00% +0.00% +0.18% / -0.05% +1.02% +0.88%] index_add_ spread : Elapsed 0.022 ms (2.164 ms / 100) 2.108 -> 2.104 ( -0.19%) [ +0.14% +0.00% +0.00% / -0.19% +1.14% +1.28%] index_copy_ spread : Elapsed 0.021 ms (2.111 ms / 100) 2.176 -> 2.179 ( +0.14%) [ +0.00% +0.14% +0.23% / +0.14% +0.55% +0.46%] index_add_ strided 3 : Elapsed 0.022 ms (2.176 ms / 100) 2.122 -> 2.123 ( +0.05%) [ +0.09% +0.14% +0.00% / +0.05% +0.61% +0.90%] index_copy_ strided 3 : Elapsed 0.021 ms (2.124 ms / 100) 2.174 -> 2.179 ( +0.23%) [ +0.09% +0.00% +0.05% / +0.23% +0.60% +0.51%] index_add_ strided 7 : Elapsed 0.022 ms (2.176 ms / 100) 2.118 -> 2.120 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.52% +0.61%] index_copy_ strided 7 : Elapsed 0.021 ms (2.120 ms / 100) 2.180 -> 2.179 ( -0.05%) [ +0.09% +0.14% +0.00% / -0.05% +0.41% +0.64%] index_add_ perm : Elapsed 0.022 ms (2.182 ms / 100) 2.125 -> 2.127 ( +0.09%) [ +0.14% +0.09% +0.00% / +0.09% +0.80% +0.75%] index_copy_ perm : Elapsed 0.021 ms (2.128 ms / 100) 2.175 -> 2.178 ( +0.14%) [ +0.23% +0.14% +0.00% / +0.14% +0.51% +0.74%] index_add_ perm_sorted : Elapsed 0.022 ms (2.180 ms / 100) 2.121 -> 2.120 ( -0.05%) [ +0.00% +0.09% +0.00% / -0.05% +0.57% +0.57%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.121 ms / 100) 9.203 -> 9.226 ( +0.25%) [ +0.28% +0.12% +0.00% / +0.36% +0.55% +0.25%] index_select const : Elapsed 0.092 ms (9.229 ms / 100) 9.215 -> 9.216 ( +0.01%) [ +0.05% +0.00% +0.14% / +0.01% +0.22% +0.34%] index_select wrap : Elapsed 0.092 ms (9.220 ms / 100) 9.210 -> 9.231 ( +0.23%) [ +0.00% +0.17% +0.04% / +0.25% +0.23% +0.58%] index_select linear : Elapsed 0.092 ms (9.210 ms / 100) 9.206 -> 9.210 ( +0.04%) [ +0.09% +0.00% +0.03% / +0.04% +0.58% +0.43%] index_select reverse : Elapsed 0.092 ms (9.214 ms / 100) 9.209 -> 9.223 ( +0.15%) [ +0.00% +0.05% +0.23% / +0.15% +0.34% +0.29%] index_select skip64 : Elapsed 0.092 ms (9.209 ms / 100) 9.233 -> 9.227 ( -0.06%) [ +0.19% +0.10% +0.00% / -0.06% +0.17% +0.03%] index_select skip256 : Elapsed 0.093 ms (9.251 ms / 100) 9.219 -> 9.238 ( +0.21%) [ +0.04% +0.05% +0.00% / +0.21% +0.21% +0.33%] index_select spread : Elapsed 0.092 ms (9.223 ms / 100) 9.222 -> 9.218 ( -0.04%) [ +0.04% +0.12% +0.00% / -0.04% +0.23% +0.15%] index_select strided 3 : Elapsed 0.092 ms (9.226 ms / 100) 9.221 -> 9.212 ( -0.10%) [ +0.09% +0.04% +0.00% / -0.10% +0.14% +0.11%] index_select random : Elapsed 0.092 ms (9.229 ms / 100) 9.213 -> 9.239 ( +0.28%) [ +0.00% +0.11% +0.17% / +0.34% +0.28% +0.47%] index_select random_sorted : Elapsed 0.092 ms (9.213 ms / 100) B = [16, 20, 5, 40] (stride (5, 80, 1, 1600)) A = [16, 4, 5, 40] (stride (1, 16, 2560, 64)) dim = 1 2.134 -> 2.137 ( +0.14%) [ +0.23% +0.19% +0.00% / +0.14% +0.56% +0.47%] index_add_ linear : Elapsed 0.021 ms (2.139 ms / 100) 2.085 -> 2.087 ( +0.10%) [ +0.00% +0.14% +0.00% / +0.10% +0.34% +0.38%] index_copy_ linear : Elapsed 0.021 ms (2.085 ms / 100) 2.134 -> 2.136 ( +0.09%) [ +0.09% +0.23% +0.00% / +0.09% +0.61% +0.56%] index_add_ reverse : Elapsed 0.021 ms (2.136 ms / 100) 2.084 -> 2.086 ( +0.10%) [ +0.00% +0.05% +0.10% / +0.10% +0.29% +0.43%] index_copy_ reverse : Elapsed 0.021 ms (2.084 ms / 100) 2.135 -> 2.136 ( +0.05%) [ +0.23% +0.14% +0.00% / +0.05% +0.42% +0.23%] index_add_ spread : Elapsed 0.021 ms (2.140 ms / 100) 2.085 -> 2.087 ( +0.10%) [ +0.10% +0.00% +0.05% / +0.10% +0.34% +0.29%] index_copy_ spread : Elapsed 0.021 ms (2.087 ms / 100) 2.134 -> 2.135 ( +0.05%) [ +0.00% +0.14% +0.00% / +0.05% +0.28% +0.37%] index_add_ strided 3 : Elapsed 0.021 ms (2.134 ms / 100) 2.079 -> 2.079 ( +0.00%) [ +0.14% +0.10% +0.00% / +0.00% +0.72% +0.29%] index_copy_ strided 3 : Elapsed 0.021 ms (2.082 ms / 100) 2.136 -> 2.135 ( -0.05%) [ +0.14% +0.09% +0.00% / -0.05% +0.37% +0.42%] index_add_ strided 7 : Elapsed 0.021 ms (2.139 ms / 100) 2.083 -> 2.088 ( +0.24%) [ +0.19% +0.10% +0.00% / +0.24% +0.38% +0.29%] index_copy_ strided 7 : Elapsed 0.021 ms (2.087 ms / 100) 2.135 -> 2.138 ( +0.14%) [ +0.05% +0.23% +0.00% / +0.14% +0.33% +0.52%] index_add_ perm : Elapsed 0.021 ms (2.136 ms / 100) 2.086 -> 2.086 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.14% +0.14%] index_copy_ perm : Elapsed 0.021 ms (2.088 ms / 100) 2.138 -> 2.137 ( -0.05%) [ +0.14% +0.00% +0.00% / -0.05% +0.23% +0.19%] index_add_ perm_sorted : Elapsed 0.021 ms (2.141 ms / 100) 2.085 -> 2.087 ( +0.10%) [ +0.10% +0.14% +0.00% / +0.14% +0.24% +0.10%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.087 ms / 100) 8.772 -> 8.786 ( +0.16%) [ +0.00% +0.13% +0.01% / +0.16% +0.38% +0.49%] index_select const : Elapsed 0.088 ms (8.772 ms / 100) 8.798 -> 8.806 ( +0.09%) [ +0.08% +0.00% +0.23% / +0.09% +0.42% +0.42%] index_select wrap : Elapsed 0.088 ms (8.805 ms / 100) 8.792 -> 8.805 ( +0.15%) [ +0.02% +0.00% +0.19% / +0.15% +0.26% +0.23%] index_select linear : Elapsed 0.088 ms (8.794 ms / 100) 8.798 -> 8.787 ( -0.13%) [ +0.01% +0.14% +0.00% / -0.13% +0.23% +0.20%] index_select reverse : Elapsed 0.088 ms (8.799 ms / 100) 8.765 -> 8.770 ( +0.06%) [ +0.00% +0.08% +0.29% / +0.06% +0.32% +0.48%] index_select skip64 : Elapsed 0.088 ms (8.765 ms / 100) 8.770 -> 8.775 ( +0.06%) [ +0.01% +0.00% +0.19% / +0.06% +0.31% +0.27%] index_select skip256 : Elapsed 0.088 ms (8.771 ms / 100) 8.794 -> 8.809 ( +0.17%) [ +0.01% +0.00% +0.16% / +0.17% +0.68% +0.50%] index_select spread : Elapsed 0.088 ms (8.795 ms / 100) 8.797 -> 8.811 ( +0.16%) [ +0.01% +0.00% +0.13% / +0.16% +0.33% +0.31%] index_select strided 3 : Elapsed 0.088 ms (8.798 ms / 100) 8.806 -> 8.805 ( -0.01%) [ +0.00% +0.06% +0.00% / -0.01% +0.47% +0.30%] index_select random : Elapsed 0.088 ms (8.806 ms / 100) 8.799 -> 8.814 ( +0.17%) [ +0.09% +0.00% +0.06% / +0.17% +0.57% +0.50%] index_select random_sorted : Elapsed 0.088 ms (8.807 ms / 100) out_shape = [16, 4, 20, 40] in_shape = [16, 4, 5, 40] idx_dim = 2 B = [16, 4, 20, 40] (stride (3200, 40, 160, 1)) A = [16, 4, 5, 40] (stride (1, 640, 2560, 16)) dim = 2 1.842 -> 1.847 ( +0.27%) [ +0.00% +0.22% +0.16% / +0.27% +1.68% +1.57%] index_add_ linear : Elapsed 0.018 ms (1.842 ms / 100) 1.798 -> 1.801 ( +0.17%) [ +0.17% +0.28% +0.00% / +0.17% +1.84% +1.78%] index_copy_ linear : Elapsed 0.018 ms (1.801 ms / 100) 1.841 -> 1.841 ( +0.00%) [ +0.16% +0.38% +0.00% / +0.00% +1.47% +1.47%] index_add_ reverse : Elapsed 0.018 ms (1.844 ms / 100) 1.797 -> 1.798 ( +0.06%) [ +0.33% +0.00% +0.22% / +0.06% +1.95% +1.73%] index_copy_ reverse : Elapsed 0.018 ms (1.803 ms / 100) 1.844 -> 1.843 ( -0.05%) [ +0.00% +0.22% +0.22% / -0.05% +0.92% +0.87%] index_add_ spread : Elapsed 0.018 ms (1.844 ms / 100) 1.801 -> 1.803 ( +0.11%) [ +0.17% +0.22% +0.00% / +0.11% +1.17% +0.89%] index_copy_ spread : Elapsed 0.018 ms (1.804 ms / 100) 1.845 -> 1.846 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.92% +0.98%] index_add_ strided 3 : Elapsed 0.018 ms (1.845 ms / 100) 1.799 -> 1.804 ( +0.28%) [ +0.11% +0.06% +0.00% / +0.28% +1.28% +1.28%] index_copy_ strided 3 : Elapsed 0.018 ms (1.801 ms / 100) 1.850 -> 1.854 ( +0.22%) [ +0.16% +0.00% +0.22% / +0.22% +0.22% +0.22%] index_add_ strided 7 : Elapsed 0.019 ms (1.853 ms / 100) 1.805 -> 1.814 ( +0.50%) [ +0.00% +0.17% +0.22% / +0.61% +0.50% +0.78%] index_copy_ strided 7 : Elapsed 0.018 ms (1.805 ms / 100) 1.844 -> 1.845 ( +0.05%) [ +0.16% +0.00% +0.16% / +0.05% +0.49% +0.65%] index_add_ perm : Elapsed 0.018 ms (1.847 ms / 100) 1.799 -> 1.807 ( +0.44%) [ +0.17% +0.00% +0.17% / +0.44% +0.61% +0.94%] index_copy_ perm : Elapsed 0.018 ms (1.802 ms / 100) 1.844 -> 1.843 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.54% +0.60%] index_add_ perm_sorted : Elapsed 0.018 ms (1.844 ms / 100) 1.800 -> 1.802 ( +0.11%) [ +0.00% +0.06% +0.11% / +0.11% +0.61% +0.67%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.800 ms / 100) 8.500 -> 8.525 ( +0.29%) [ +0.36% +0.19% +0.00% / +0.29% +0.48% +0.36%] index_select const : Elapsed 0.085 ms (8.531 ms / 100) 8.543 -> 8.530 ( -0.15%) [ +0.00% +0.01% +0.12% / -0.15% +0.09% +0.27%] index_select wrap : Elapsed 0.085 ms (8.543 ms / 100) 8.532 -> 8.538 ( +0.07%) [ +0.00% +0.07% +0.33% / +0.07% +0.21% +0.27%] index_select linear : Elapsed 0.085 ms (8.532 ms / 100) 8.535 -> 8.539 ( +0.05%) [ +0.00% +0.21% +0.13% / +0.12% +0.22% +0.05%] index_select reverse : Elapsed 0.085 ms (8.535 ms / 100) 8.518 -> 8.505 ( -0.15%) [ +0.07% +0.00% +0.05% / -0.15% +0.33% +0.23%] index_select skip64 : Elapsed 0.085 ms (8.524 ms / 100) 8.507 -> 8.517 ( +0.12%) [ +0.05% +0.02% +0.00% / +0.12% +0.39% +0.28%] index_select skip256 : Elapsed 0.085 ms (8.511 ms / 100) 8.528 -> 8.536 ( +0.09%) [ +0.16% +0.00% +0.15% / +0.09% +0.16% +0.23%] index_select spread : Elapsed 0.085 ms (8.542 ms / 100) 8.538 -> 8.538 ( +0.00%) [ +0.12% +0.00% +0.27% / +0.14% +0.05% +0.00%] index_select strided 3 : Elapsed 0.085 ms (8.548 ms / 100) 8.534 -> 8.530 ( -0.05%) [ +0.05% +0.18% +0.00% / -0.05% +0.15% +0.20%] index_select random : Elapsed 0.085 ms (8.538 ms / 100) 8.528 -> 8.540 ( +0.14%) [ +0.16% +0.00% +0.21% / +0.14% +0.54% +0.36%] index_select random_sorted : Elapsed 0.085 ms (8.542 ms / 100) B = [16, 4, 20, 40] (stride (3200, 1, 4, 80)) A = [16, 4, 5, 40] (stride (800, 5, 1, 20)) dim = 2 1.921 -> 1.920 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.47% +0.42%] index_add_ linear : Elapsed 0.019 ms (1.921 ms / 100) 1.876 -> 1.872 ( -0.21%) [ +0.21% +0.00% +0.11% / -0.21% +0.53% +0.48%] index_copy_ linear : Elapsed 0.019 ms (1.880 ms / 100) 1.923 -> 1.920 ( -0.16%) [ +0.05% +0.21% +0.00% / -0.16% +0.42% +0.36%] index_add_ reverse : Elapsed 0.019 ms (1.924 ms / 100) 1.874 -> 1.878 ( +0.21%) [ +0.27% +0.48% +0.00% / +0.21% +0.85% +0.53%] index_copy_ reverse : Elapsed 0.019 ms (1.879 ms / 100) 1.950 -> 1.950 ( +0.00%) [ +0.00% +0.21% +0.05% / +0.15% +0.00% +0.10%] index_add_ spread : Elapsed 0.019 ms (1.950 ms / 100) 1.905 -> 1.909 ( +0.21%) [ +0.00% +0.21% +0.10% / +0.26% +0.31% +0.21%] index_copy_ spread : Elapsed 0.019 ms (1.905 ms / 100) 1.950 -> 1.948 ( -0.10%) [ +0.10% +0.05% +0.00% / +0.15% -0.05% -0.10%] index_add_ strided 3 : Elapsed 0.020 ms (1.952 ms / 100) 1.903 -> 1.905 ( +0.11%) [ +0.16% +0.00% +0.05% / +0.11% +0.26% +0.21%] index_copy_ strided 3 : Elapsed 0.019 ms (1.906 ms / 100) 1.936 -> 1.937 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.15% +0.05%] index_add_ strided 7 : Elapsed 0.019 ms (1.936 ms / 100) 1.895 -> 1.894 ( -0.05%) [ +0.00% +0.16% +0.11% / +0.00% +0.00% -0.05%] index_copy_ strided 7 : Elapsed 0.019 ms (1.895 ms / 100) 1.943 -> 1.945 ( +0.10%) [ +0.00% +0.41% +0.00% / +0.10% +0.21% +0.21%] index_add_ perm : Elapsed 0.019 ms (1.943 ms / 100) 1.901 -> 1.902 ( +0.05%) [ +0.00% +0.16% +0.11% / +0.05% +0.37% +0.21%] index_copy_ perm : Elapsed 0.019 ms (1.901 ms / 100) 1.942 -> 1.941 ( -0.05%) [ +0.21% +0.00% +0.21% / +0.15% -0.05% +0.10%] index_add_ perm_sorted : Elapsed 0.019 ms (1.946 ms / 100) 1.901 -> 1.901 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.26% +0.11%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.901 ms / 100) 8.584 -> 8.587 ( +0.03%) [ +0.00% +0.08% +0.24% / +0.15% +0.03% +0.35%] index_select const : Elapsed 0.086 ms (8.584 ms / 100) 8.585 -> 8.586 ( +0.01%) [ +0.12% +0.12% +0.00% / +0.01% +0.06% +0.06%] index_select wrap : Elapsed 0.086 ms (8.595 ms / 100) 8.592 -> 8.578 ( -0.16%) [ +0.01% +0.02% +0.00% / +0.06% -0.16% -0.03%] index_select linear : Elapsed 0.086 ms (8.593 ms / 100) 8.580 -> 8.589 ( +0.10%) [ +0.06% +0.00% +0.22% / +0.15% +0.10% +0.22%] index_select reverse : Elapsed 0.086 ms (8.585 ms / 100) 8.577 -> 8.585 ( +0.09%) [ +0.19% +0.00% +0.16% / +0.09% +0.38% +0.12%] index_select skip64 : Elapsed 0.086 ms (8.593 ms / 100) 8.592 -> 8.583 ( -0.10%) [ +0.00% +0.00% +0.19% / -0.10% +0.27% +0.00%] index_select skip256 : Elapsed 0.086 ms (8.592 ms / 100) 8.593 -> 8.579 ( -0.16%) [ +0.00% +0.08% +0.06% / +0.02% +0.19% -0.16%] index_select spread : Elapsed 0.086 ms (8.593 ms / 100) 8.578 -> 8.583 ( +0.06%) [ +0.00% +0.26% +0.20% / +0.06% +0.16% +0.31%] index_select strided 3 : Elapsed 0.086 ms (8.578 ms / 100) 8.592 -> 8.598 ( +0.07%) [ +0.00% +0.15% +0.13% / +0.08% +0.09% +0.07%] index_select random : Elapsed 0.086 ms (8.592 ms / 100) 8.594 -> 8.584 ( -0.12%) [ +0.05% +0.00% +0.26% / -0.07% +0.14% -0.12%] index_select random_sorted : Elapsed 0.086 ms (8.598 ms / 100) B = [16, 4, 20, 40] (stride (1, 12800, 16, 320)) A = [16, 4, 5, 40] (stride (1, 80, 16, 320)) dim = 2 1.960 -> 1.960 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.66% +0.61%] index_add_ linear : Elapsed 0.020 ms (1.960 ms / 100) 1.905 -> 1.907 ( +0.10%) [ +0.05% +0.00% +0.21% / +0.10% +1.26% +0.94%] index_copy_ linear : Elapsed 0.019 ms (1.906 ms / 100) 1.961 -> 1.965 ( +0.20%) [ +0.00% +0.00% +0.20% / +0.20% +0.76% +0.92%] index_add_ reverse : Elapsed 0.020 ms (1.961 ms / 100) 1.911 -> 1.913 ( +0.10%) [ +0.00% +0.00% +0.05% / +0.10% +0.89% +0.94%] index_copy_ reverse : Elapsed 0.019 ms (1.911 ms / 100) 1.980 -> 1.981 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.76% +0.61%] index_add_ spread : Elapsed 0.020 ms (1.980 ms / 100) 1.926 -> 1.931 ( +0.26%) [ +0.16% +0.00% +0.26% / +0.26% +0.83% +1.04%] index_copy_ spread : Elapsed 0.019 ms (1.929 ms / 100) 1.972 -> 1.977 ( +0.25%) [ +0.25% +0.00% +0.25% / +0.25% +0.86% +0.91%] index_add_ strided 3 : Elapsed 0.020 ms (1.977 ms / 100) 1.927 -> 1.929 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +0.78% +0.78%] index_copy_ strided 3 : Elapsed 0.019 ms (1.927 ms / 100) 1.967 -> 1.971 ( +0.20%) [ +0.20% +0.05% +0.00% / +0.20% +0.92% +0.97%] index_add_ strided 7 : Elapsed 0.020 ms (1.971 ms / 100) 1.916 -> 1.920 ( +0.21%) [ +0.05% +0.00% +0.26% / +0.21% +0.99% +0.84%] index_copy_ strided 7 : Elapsed 0.019 ms (1.917 ms / 100) 1.968 -> 1.974 ( +0.30%) [ +0.00% +0.25% +0.05% / +0.30% +0.91% +0.86%] index_add_ perm : Elapsed 0.020 ms (1.968 ms / 100) 1.919 -> 1.925 ( +0.31%) [ +0.00% +0.16% +0.21% / +0.31% +0.68% +0.73%] index_copy_ perm : Elapsed 0.019 ms (1.919 ms / 100) 1.968 -> 1.970 ( +0.10%) [ +0.15% +0.00% +0.15% / +0.10% +0.71% +0.71%] index_add_ perm_sorted : Elapsed 0.020 ms (1.971 ms / 100) 1.918 -> 1.920 ( +0.10%) [ +0.00% +0.10% +0.42% / +0.10% +0.78% +0.83%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.918 ms / 100) 8.530 -> 8.540 ( +0.12%) [ +0.11% +0.00% +0.18% / +0.41% +0.12% +0.42%] index_select const : Elapsed 0.085 ms (8.539 ms / 100) 8.552 -> 8.579 ( +0.32%) [ +0.00% +0.48% +0.23% / +0.32% +0.41% +0.32%] index_select wrap : Elapsed 0.086 ms (8.552 ms / 100) 8.568 -> 8.560 ( -0.09%) [ +0.00% +0.07% +0.11% / +0.13% -0.09% +0.07%] index_select linear : Elapsed 0.086 ms (8.568 ms / 100) 8.567 -> 8.562 ( -0.06%) [ +0.13% +0.14% +0.00% / +0.06% +0.13% -0.06%] index_select reverse : Elapsed 0.086 ms (8.578 ms / 100) 8.548 -> 8.531 ( -0.20%) [ +0.00% +0.07% +0.11% / +0.16% -0.20% +0.25%] index_select skip64 : Elapsed 0.085 ms (8.548 ms / 100) 8.531 -> 8.551 ( +0.23%) [ +0.29% +0.00% +0.09% / +0.29% +0.23% +0.29%] index_select skip256 : Elapsed 0.086 ms (8.556 ms / 100) 8.584 -> 8.583 ( -0.01%) [ +0.06% +0.00% +0.17% / -0.01% +0.20% +0.16%] index_select spread : Elapsed 0.086 ms (8.589 ms / 100) 8.570 -> 8.560 ( -0.12%) [ +0.00% +0.11% +0.05% / -0.12% +0.02% +0.02%] index_select strided 3 : Elapsed 0.086 ms (8.570 ms / 100) 8.579 -> 8.566 ( -0.15%) [ +0.13% +0.00% +0.22% / -0.15% +0.13% +0.10%] index_select random : Elapsed 0.086 ms (8.590 ms / 100) 8.581 -> 8.573 ( -0.09%) [ +0.02% +0.00% +0.01% / -0.09% +0.10% +0.20%] index_select random_sorted : Elapsed 0.086 ms (8.583 ms / 100) B = [16, 4, 20, 40] (stride (1, 640, 2560, 16)) A = [16, 4, 5, 40] (stride (800, 40, 160, 1)) dim = 2 1.815 -> 1.814 ( -0.06%) [ +0.00% +0.33% +0.00% / -0.06% +1.38% +1.21%] index_add_ linear : Elapsed 0.018 ms (1.815 ms / 100) 1.766 -> 1.769 ( +0.17%) [ +0.00% +0.11% +0.23% / +0.17% +0.85% +1.02%] index_copy_ linear : Elapsed 0.018 ms (1.766 ms / 100) 1.813 -> 1.815 ( +0.11%) [ +0.00% +0.22% +0.33% / +0.17% +0.11% +0.33%] index_add_ reverse : Elapsed 0.018 ms (1.813 ms / 100) 1.767 -> 1.767 ( +0.00%) [ +0.00% +0.23% +0.06% / +0.00% +0.17% +0.23%] index_copy_ reverse : Elapsed 0.018 ms (1.767 ms / 100) 1.798 -> 1.801 ( +0.17%) [ +0.06% +0.00% +0.00% / +0.17% +1.95% +2.22%] index_add_ spread : Elapsed 0.018 ms (1.799 ms / 100) 1.754 -> 1.751 ( -0.17%) [ +0.06% +0.00% +0.06% / -0.17% +1.71% +1.60%] index_copy_ spread : Elapsed 0.018 ms (1.755 ms / 100) 1.815 -> 1.810 ( -0.28%) [ +0.00% +0.06% +0.11% / -0.28% +1.65% +1.38%] index_add_ strided 3 : Elapsed 0.018 ms (1.815 ms / 100) 1.767 -> 1.765 ( -0.11%) [ +0.00% +0.11% +0.06% / -0.11% +1.36% +1.08%] index_copy_ strided 3 : Elapsed 0.018 ms (1.767 ms / 100) 1.802 -> 1.804 ( +0.11%) [ +0.22% +0.33% +0.00% / +0.11% +1.94% +1.44%] index_add_ strided 7 : Elapsed 0.018 ms (1.806 ms / 100) 1.759 -> 1.757 ( -0.11%) [ +0.23% +0.06% +0.00% / -0.11% +0.97% +1.08%] index_copy_ strided 7 : Elapsed 0.018 ms (1.763 ms / 100) 1.802 -> 1.804 ( +0.11%) [ +0.17% +0.06% +0.00% / +0.11% +0.55% +0.55%] index_add_ perm : Elapsed 0.018 ms (1.805 ms / 100) 1.756 -> 1.759 ( +0.17%) [ +0.06% +0.17% +0.00% / +0.17% +0.63% +0.34%] index_copy_ perm : Elapsed 0.018 ms (1.757 ms / 100) 1.799 -> 1.805 ( +0.33%) [ +0.11% +0.39% +0.00% / +0.33% +0.89% +0.61%] index_add_ perm_sorted : Elapsed 0.018 ms (1.801 ms / 100) 1.758 -> 1.755 ( -0.17%) [ +0.06% +0.06% +0.00% / -0.17% +0.34% +0.51%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.759 ms / 100) 8.516 -> 8.518 ( +0.02%) [ +0.32% +0.15% +0.00% / +0.22% +0.22% +0.02%] index_select const : Elapsed 0.085 ms (8.543 ms / 100) 8.563 -> 8.568 ( +0.06%) [ +0.06% +0.18% +0.00% / +0.11% +0.06% +0.34%] index_select wrap : Elapsed 0.086 ms (8.568 ms / 100) 8.550 -> 8.552 ( +0.02%) [ +0.11% +0.00% +0.28% / +0.07% +0.02% +0.16%] index_select linear : Elapsed 0.086 ms (8.559 ms / 100) 8.561 -> 8.565 ( +0.05%) [ +0.11% +0.00% +0.22% / +0.11% +0.05% +0.05%] index_select reverse : Elapsed 0.086 ms (8.570 ms / 100) 8.516 -> 8.529 ( +0.15%) [ +0.15% +0.18% +0.00% / +0.16% +0.22% +0.15%] index_select skip64 : Elapsed 0.085 ms (8.529 ms / 100) 8.526 -> 8.511 ( -0.18%) [ +0.25% +0.00% +0.11% / +0.16% -0.18% +0.26%] index_select skip256 : Elapsed 0.085 ms (8.547 ms / 100) 8.534 -> 8.541 ( +0.08%) [ +0.12% +0.02% +0.00% / +0.08% +0.40% +0.36%] index_select spread : Elapsed 0.085 ms (8.544 ms / 100) 8.570 -> 8.577 ( +0.08%) [ +0.22% +0.08% +0.00% / +0.08% +0.11% +0.22%] index_select strided 3 : Elapsed 0.086 ms (8.589 ms / 100) 8.572 -> 8.564 ( -0.09%) [ +0.13% +0.27% +0.00% / -0.09% +0.16% +0.22%] index_select random : Elapsed 0.086 ms (8.583 ms / 100) 8.534 -> 8.554 ( +0.23%) [ +0.00% +0.33% +0.26% / +0.23% +0.34% +0.42%] index_select random_sorted : Elapsed 0.085 ms (8.534 ms / 100) B = [16, 4, 20, 40] (stride (1, 16, 2560, 64)) A = [16, 4, 5, 40] (stride (1, 80, 16, 320)) dim = 2 1.949 -> 1.958 ( +0.46%) [ +0.10% +0.00% +0.21% / +0.46% +1.03% +0.97%] index_add_ linear : Elapsed 0.020 ms (1.951 ms / 100) 1.898 -> 1.909 ( +0.58%) [ +0.00% +0.16% +0.32% / +0.58% +1.00% +1.05%] index_copy_ linear : Elapsed 0.019 ms (1.898 ms / 100) 1.957 -> 1.959 ( +0.10%) [ +0.00% +0.00% +0.20% / +0.10% +0.36% +0.56%] index_add_ reverse : Elapsed 0.020 ms (1.957 ms / 100) 1.905 -> 1.909 ( +0.21%) [ +0.00% +0.10% +0.16% / +0.21% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.019 ms (1.905 ms / 100) 1.949 -> 1.952 ( +0.15%) [ +0.00% +0.00% +0.21% / +0.15% +0.77% +0.82%] index_add_ spread : Elapsed 0.019 ms (1.949 ms / 100) 1.900 -> 1.902 ( +0.11%) [ +0.00% +0.05% +0.16% / +0.11% +0.68% +0.63%] index_copy_ spread : Elapsed 0.019 ms (1.900 ms / 100) 1.945 -> 1.944 ( -0.05%) [ +0.05% +0.00% +0.10% / -0.05% +1.34% +1.34%] index_add_ strided 3 : Elapsed 0.019 ms (1.946 ms / 100) 1.892 -> 1.899 ( +0.37%) [ +0.00% +0.05% +0.37% / +0.37% +1.59% +1.64%] index_copy_ strided 3 : Elapsed 0.019 ms (1.892 ms / 100) 1.942 -> 1.951 ( +0.46%) [ +0.00% +0.05% +0.31% / +0.46% +1.08% +1.13%] index_add_ strided 7 : Elapsed 0.019 ms (1.942 ms / 100) 1.891 -> 1.899 ( +0.42%) [ +0.00% +0.16% +0.63% / +0.42% +1.11% +0.95%] index_copy_ strided 7 : Elapsed 0.019 ms (1.891 ms / 100) 1.943 -> 1.948 ( +0.26%) [ +0.00% +0.21% +0.36% / +0.26% +1.44% +1.54%] index_add_ perm : Elapsed 0.019 ms (1.943 ms / 100) 1.894 -> 1.898 ( +0.21%) [ +0.00% +0.16% +0.26% / +0.21% +1.74% +1.64%] index_copy_ perm : Elapsed 0.019 ms (1.894 ms / 100) 1.947 -> 1.952 ( +0.26%) [ +0.15% +0.15% +0.00% / +0.26% +1.44% +1.39%] index_add_ perm_sorted : Elapsed 0.020 ms (1.950 ms / 100) 1.901 -> 1.904 ( +0.16%) [ +0.00% +0.11% +0.11% / +0.16% +1.53% +1.47%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.901 ms / 100) 8.523 -> 8.536 ( +0.15%) [ +0.16% +0.29% +0.00% / +0.22% +0.33% +0.15%] index_select const : Elapsed 0.085 ms (8.537 ms / 100) 8.545 -> 8.551 ( +0.07%) [ +0.00% +0.35% +0.27% / +0.07% +0.20% +0.13%] index_select wrap : Elapsed 0.085 ms (8.545 ms / 100) 8.550 -> 8.552 ( +0.02%) [ +0.04% +0.18% +0.00% / +0.12% +0.02% +0.11%] index_select linear : Elapsed 0.086 ms (8.553 ms / 100) 8.544 -> 8.546 ( +0.02%) [ +0.22% +0.12% +0.00% / +0.02% +0.22% +0.20%] index_select reverse : Elapsed 0.086 ms (8.563 ms / 100) 8.531 -> 8.535 ( +0.05%) [ +0.00% +0.01% +0.20% / +0.05% +0.18% +0.12%] index_select skip64 : Elapsed 0.085 ms (8.531 ms / 100) 8.531 -> 8.524 ( -0.08%) [ +0.00% +0.08% +0.19% / -0.08% +0.08% +0.12%] index_select skip256 : Elapsed 0.085 ms (8.531 ms / 100) 8.567 -> 8.582 ( +0.18%) [ +0.20% +0.00% +0.13% / +0.18% +0.36% +0.41%] index_select spread : Elapsed 0.086 ms (8.584 ms / 100) 8.558 -> 8.551 ( -0.08%) [ +0.00% +0.01% +0.14% / -0.08% +0.15% +0.04%] index_select strided 3 : Elapsed 0.086 ms (8.558 ms / 100) 8.557 -> 8.570 ( +0.15%) [ +0.11% +0.00% +0.25% / +0.15% +0.35% +0.19%] index_select random : Elapsed 0.086 ms (8.566 ms / 100) 8.562 -> 8.586 ( +0.28%) [ +0.00% +0.12% +0.06% / +0.28% +0.32% +0.34%] index_select random_sorted : Elapsed 0.086 ms (8.562 ms / 100) out_shape = [16, 4, 5, 20] in_shape = [16, 4, 5, 40] idx_dim = 3 B = [16, 4, 5, 20] (stride (400, 100, 1, 5)) A = [16, 4, 5, 40] (stride (1, 640, 2560, 16)) dim = 3 2.394 -> 2.396 ( +0.08%) [ +0.00% +0.00% +0.21% / +0.08% +0.17% +0.25%] index_select const : Elapsed 0.024 ms (2.394 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.00% +0.12% +0.17% / +0.17% +0.04% +0.08%] index_select wrap : Elapsed 0.024 ms (2.412 ms / 100) 2.411 -> 2.414 ( +0.12%) [ +0.00% +0.29% +0.12% / +0.12% +0.21% +0.25%] index_select linear : Elapsed 0.024 ms (2.411 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.00% +0.04% +0.17% / +0.04% +0.04% +0.04%] index_select reverse : Elapsed 0.024 ms (2.413 ms / 100) 2.396 -> 2.399 ( +0.13%) [ +0.13% +0.00% +0.08% / +0.13% +0.21% +0.17%] index_select skip64 : Elapsed 0.024 ms (2.399 ms / 100) 2.398 -> 2.398 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.00% +0.08% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.400 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.17% +0.29% +0.00% / +0.25% +0.04% +0.29%] index_select spread : Elapsed 0.024 ms (2.416 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.17% +0.25% +0.08%] index_select strided 3 : Elapsed 0.024 ms (2.416 ms / 100) 2.404 -> 2.407 ( +0.12%) [ +0.12% +0.17% +0.00% / +0.17% +0.12% +0.12%] index_select strided 5 : Elapsed 0.024 ms (2.407 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.04% +0.21% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.415 ms / 100) 2.400 -> 2.400 ( +0.00%) [ +0.13% +0.08% +0.00% / +0.00% +0.21% +0.04%] index_select strided 8 : Elapsed 0.024 ms (2.403 ms / 100) 2.401 -> 2.400 ( -0.04%) [ +0.00% +0.08% +0.12% / -0.04% +0.29% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.401 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.00% +0.08% +0.25% / +0.08% +0.17% -0.04%] index_select random : Elapsed 0.024 ms (2.411 ms / 100) 2.410 -> 2.413 ( +0.12%) [ +0.25% +0.17% +0.00% / +0.33% +0.17% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.416 ms / 100) 2.415 -> 2.412 ( -0.12%) [ +0.17% +0.04% +0.00% / +0.00% -0.08% -0.12%] index_select perm : Elapsed 0.024 ms (2.419 ms / 100) 2.415 -> 2.403 ( -0.50%) [ +0.00% +0.12% +0.12% / +0.00% -0.50% -0.29%] index_select perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) B = [16, 4, 5, 20] (stride (80, 1, 1280, 4)) A = [16, 4, 5, 40] (stride (800, 200, 40, 1)) dim = 3 2.366 -> 2.368 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.17% +0.13%] index_select const : Elapsed 0.024 ms (2.367 ms / 100) 2.373 -> 2.367 ( -0.25%) [ +0.00% +0.04% +0.04% / +0.00% -0.25% -0.08%] index_select wrap : Elapsed 0.024 ms (2.373 ms / 100) 2.371 -> 2.367 ( -0.17%) [ +0.00% +0.04% +0.08% / +0.08% -0.17% -0.13%] index_select linear : Elapsed 0.024 ms (2.371 ms / 100) 2.371 -> 2.370 ( -0.04%) [ +0.04% +0.08% +0.00% / -0.04% +0.04% +0.04%] index_select reverse : Elapsed 0.024 ms (2.372 ms / 100) 2.366 -> 2.368 ( +0.08%) [ +0.08% +0.13% +0.00% / +0.08% +0.21% +0.13%] index_select skip64 : Elapsed 0.024 ms (2.368 ms / 100) 2.364 -> 2.362 ( -0.08%) [ +0.00% +0.08% +0.13% / -0.08% +0.21% +0.34%] index_select skip256 : Elapsed 0.024 ms (2.364 ms / 100) 2.373 -> 2.377 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.34% +0.17% +0.25%] index_select spread : Elapsed 0.024 ms (2.373 ms / 100) 2.373 -> 2.373 ( +0.00%) [ +0.00% +0.21% +0.21% / +0.00% +0.21% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.373 ms / 100) 2.372 -> 2.376 ( +0.17%) [ +0.04% +0.00% +0.17% / +0.17% +0.46% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.373 ms / 100) 2.376 -> 2.377 ( +0.04%) [ +0.13% +0.21% +0.00% / +0.04% +0.04% +0.17%] index_select strided 7 : Elapsed 0.024 ms (2.379 ms / 100) 2.376 -> 2.374 ( -0.08%) [ +0.17% +0.00% +0.04% / -0.08% +0.21% +0.08%] index_select strided 8 : Elapsed 0.024 ms (2.380 ms / 100) 2.375 -> 2.376 ( +0.04%) [ +0.34% +0.00% +0.00% / +0.17% +0.04% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.383 ms / 100) 2.375 -> 2.376 ( +0.04%) [ +0.13% +0.00% +0.13% / +0.04% +0.25% +0.13%] index_select random : Elapsed 0.024 ms (2.378 ms / 100) 2.376 -> 2.376 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.04% +0.00% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.378 ms / 100) 2.376 -> 2.376 ( +0.00%) [ +0.21% +0.04% +0.00% / +0.00% +0.00% +0.17%] index_select perm : Elapsed 0.024 ms (2.381 ms / 100) 2.379 -> 2.374 ( -0.21%) [ +0.08% +0.00% +0.00% / -0.21% -0.04% -0.13%] index_select perm_sorted : Elapsed 0.024 ms (2.381 ms / 100) B = [16, 4, 5, 20] (stride (1, 320, 1280, 16)) A = [16, 4, 5, 40] (stride (4, 1, 64, 320)) dim = 3 2.407 -> 2.411 ( +0.17%) [ +0.00% +0.17% +0.21% / +0.17% +0.42% +0.37%] index_select const : Elapsed 0.024 ms (2.407 ms / 100) 2.422 -> 2.417 ( -0.21%) [ +0.08% +0.12% +0.00% / +0.00% -0.08% -0.21%] index_select wrap : Elapsed 0.024 ms (2.424 ms / 100) 2.421 -> 2.419 ( -0.08%) [ +0.00% +0.25% +0.17% / +0.29% -0.08% +0.12%] index_select linear : Elapsed 0.024 ms (2.421 ms / 100) 2.424 -> 2.422 ( -0.08%) [ +0.04% +0.00% +0.08% / -0.04% -0.08% +0.25%] index_select reverse : Elapsed 0.024 ms (2.425 ms / 100) 2.412 -> 2.412 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.17% +0.08% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.413 ms / 100) 2.411 -> 2.409 ( -0.08%) [ +0.12% +0.12% +0.00% / -0.08% +0.29% +0.21%] index_select skip256 : Elapsed 0.024 ms (2.414 ms / 100) 2.420 -> 2.419 ( -0.04%) [ +0.00% +0.12% +0.17% / -0.04% +0.33% +0.08%] index_select spread : Elapsed 0.024 ms (2.420 ms / 100) 2.422 -> 2.423 ( +0.04%) [ +0.21% +0.00% +0.08% / +0.04% +0.12% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.427 ms / 100) 2.416 -> 2.415 ( -0.04%) [ +0.08% +0.17% +0.00% / -0.04% -0.04% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.418 ms / 100) 2.418 -> 2.418 ( +0.00%) [ +0.08% +0.00% +0.25% / +0.00% +0.04% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.00% +0.12% +0.08% / +0.08% +0.08% +0.29%] index_select strided 8 : Elapsed 0.024 ms (2.412 ms / 100) 2.410 -> 2.414 ( +0.17%) [ +0.33% +0.25% +0.00% / +0.17% +0.21% +0.41%] index_select strided 16 : Elapsed 0.024 ms (2.418 ms / 100) 2.416 -> 2.420 ( +0.17%) [ +0.00% +0.25% +0.12% / +0.17% +0.17% +0.25%] index_select random : Elapsed 0.024 ms (2.416 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.25% +0.29% +0.00% / +0.08% +0.29% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.420 ms / 100) 2.419 -> 2.421 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.12% +0.08% +0.25%] index_select perm : Elapsed 0.024 ms (2.421 ms / 100) 2.421 -> 2.415 ( -0.25%) [ +0.04% +0.25% +0.00% / +0.04% -0.21% -0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) out_shape = [20, 4, 40, 5] in_shape = [16, 4, 40, 5] idx_dim = 0 B = [20, 4, 40, 5] (stride (800, 1, 20, 4)) A = [16, 4, 40, 5] (stride (800, 1, 4, 160)) dim = 0 3.768 -> 3.770 ( +0.05%) [ +0.00% +0.16% +0.11% / +0.05% +0.82% +0.98%] index_add_ linear : Elapsed 0.038 ms (3.768 ms / 100) 3.627 -> 3.630 ( +0.08%) [ +0.00% +0.08% +0.11% / +0.08% +1.02% +0.91%] index_copy_ linear : Elapsed 0.036 ms (3.627 ms / 100) 3.784 -> 3.783 ( -0.03%) [ +0.16% +0.08% +0.00% / -0.03% +0.69% +0.69%] index_add_ reverse : Elapsed 0.038 ms (3.790 ms / 100) 3.641 -> 3.644 ( +0.08%) [ +0.00% +0.03% +0.00% / +0.08% +0.63% +0.69%] index_copy_ reverse : Elapsed 0.036 ms (3.641 ms / 100) 3.776 -> 3.781 ( +0.13%) [ +0.19% +0.26% +0.00% / +0.13% +0.79% +0.87%] index_add_ spread : Elapsed 0.038 ms (3.783 ms / 100) 3.631 -> 3.633 ( +0.06%) [ +0.17% +0.19% +0.00% / +0.06% +0.85% +0.83%] index_copy_ spread : Elapsed 0.036 ms (3.637 ms / 100) 3.772 -> 3.773 ( +0.03%) [ +0.16% +0.00% +0.08% / +0.03% +0.72% +0.61%] index_add_ strided 3 : Elapsed 0.038 ms (3.778 ms / 100) 3.641 -> 3.639 ( -0.05%) [ +0.00% +0.05% +0.03% / -0.05% +0.77% +0.63%] index_copy_ strided 3 : Elapsed 0.036 ms (3.641 ms / 100) 3.768 -> 3.773 ( +0.13%) [ +0.00% +0.00% +0.11% / +0.13% +0.88% +0.90%] index_add_ strided 7 : Elapsed 0.038 ms (3.768 ms / 100) 3.642 -> 3.643 ( +0.03%) [ +0.03% +0.00% +0.05% / +0.03% +0.58% +0.69%] index_copy_ strided 7 : Elapsed 0.036 ms (3.643 ms / 100) 3.776 -> 3.785 ( +0.24%) [ +0.19% +0.00% +0.11% / +0.24% +1.01% +0.72%] index_add_ perm : Elapsed 0.038 ms (3.783 ms / 100) 3.633 -> 3.637 ( +0.11%) [ +0.11% +0.00% +0.08% / +0.11% +0.80% +0.72%] index_copy_ perm : Elapsed 0.036 ms (3.637 ms / 100) 3.783 -> 3.784 ( +0.03%) [ +0.08% +0.00% +0.05% / +0.03% +0.74% +0.66%] index_add_ perm_sorted : Elapsed 0.038 ms (3.786 ms / 100) 3.641 -> 3.641 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.77% +0.63%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.641 ms / 100) 5.460 -> 5.465 ( +0.09%) [ +0.04% +0.09% +0.00% / +0.09% +0.15% +0.15%] index_select const : Elapsed 0.055 ms (5.462 ms / 100) 5.480 -> 5.473 ( -0.13%) [ +0.13% +0.09% +0.00% / -0.04% -0.13% -0.09%] index_select wrap : Elapsed 0.055 ms (5.487 ms / 100) 5.469 -> 5.476 ( +0.13%) [ +0.16% +0.22% +0.00% / +0.24% +0.22% +0.13%] index_select linear : Elapsed 0.055 ms (5.478 ms / 100) 5.478 -> 5.481 ( +0.05%) [ +0.05% +0.15% +0.00% / +0.13% +0.05% +0.11%] index_select reverse : Elapsed 0.055 ms (5.481 ms / 100) 5.458 -> 5.460 ( +0.04%) [ +0.00% +0.15% +0.13% / +0.04% +0.15% +0.18%] index_select skip64 : Elapsed 0.055 ms (5.458 ms / 100) 5.464 -> 5.462 ( -0.04%) [ +0.00% +0.07% +0.02% / -0.04% +0.29% +0.04%] index_select skip256 : Elapsed 0.055 ms (5.464 ms / 100) 5.476 -> 5.472 ( -0.07%) [ +0.15% +0.02% +0.00% / -0.07% +0.09% +0.07%] index_select spread : Elapsed 0.055 ms (5.484 ms / 100) 5.474 -> 5.479 ( +0.09%) [ +0.09% +0.00% +0.16% / +0.13% +0.09% +0.13%] index_select strided 3 : Elapsed 0.055 ms (5.479 ms / 100) 5.479 -> 5.474 ( -0.09%) [ +0.00% +0.04% +0.07% / +0.02% +0.04% -0.09%] index_select strided 5 : Elapsed 0.055 ms (5.479 ms / 100) 5.470 -> 5.475 ( +0.09%) [ +0.00% +0.24% +0.05% / +0.09% +0.11% +0.20%] index_select strided 7 : Elapsed 0.055 ms (5.470 ms / 100) 5.463 -> 5.466 ( +0.05%) [ +0.13% +0.11% +0.00% / +0.05% +0.05% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.470 ms / 100) 5.466 -> 5.477 ( +0.20%) [ +0.26% +0.20% +0.00% / +0.24% +0.24% +0.20%] index_select random : Elapsed 0.055 ms (5.480 ms / 100) 5.471 -> 5.473 ( +0.04%) [ +0.02% +0.11% +0.00% / +0.07% +0.16% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.472 ms / 100) B = [20, 4, 40, 5] (stride (800, 1, 20, 4)) A = [16, 4, 40, 5] (stride (5, 80, 320, 1)) dim = 0 3.751 -> 3.755 ( +0.11%) [ +0.08% +0.00% +0.00% / +0.11% +0.85% +0.85%] index_add_ linear : Elapsed 0.038 ms (3.754 ms / 100) 3.611 -> 3.610 ( -0.03%) [ +0.00% +0.08% +0.00% / -0.03% +0.78% +0.78%] index_copy_ linear : Elapsed 0.036 ms (3.611 ms / 100) 3.753 -> 3.754 ( +0.03%) [ +0.00% +0.11% +0.00% / +0.03% +0.69% +0.80%] index_add_ reverse : Elapsed 0.038 ms (3.753 ms / 100) 3.614 -> 3.617 ( +0.08%) [ +0.03% +0.19% +0.00% / +0.08% +0.77% +0.86%] index_copy_ reverse : Elapsed 0.036 ms (3.615 ms / 100) 3.752 -> 3.751 ( -0.03%) [ +0.00% +0.11% +0.00% / -0.03% +0.72% +0.67%] index_add_ spread : Elapsed 0.038 ms (3.752 ms / 100) 3.612 -> 3.611 ( -0.03%) [ +0.00% +0.28% +0.14% / -0.03% +0.75% +0.83%] index_copy_ spread : Elapsed 0.036 ms (3.612 ms / 100) 3.742 -> 3.743 ( +0.03%) [ +0.00% +0.13% +0.03% / +0.03% +0.75% +0.75%] index_add_ strided 3 : Elapsed 0.037 ms (3.742 ms / 100) 3.608 -> 3.611 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.72% +0.69%] index_copy_ strided 3 : Elapsed 0.036 ms (3.608 ms / 100) 3.753 -> 3.752 ( -0.03%) [ +0.11% +0.13% +0.00% / -0.03% +0.69% +0.75%] index_add_ strided 7 : Elapsed 0.038 ms (3.757 ms / 100) 3.612 -> 3.617 ( +0.14%) [ +0.06% +0.33% +0.00% / +0.14% +0.83% +0.78%] index_copy_ strided 7 : Elapsed 0.036 ms (3.614 ms / 100) 3.751 -> 3.755 ( +0.11%) [ +0.11% +0.16% +0.00% / +0.11% +0.85% +0.85%] index_add_ perm : Elapsed 0.038 ms (3.755 ms / 100) 3.608 -> 3.608 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.86% +0.89%] index_copy_ perm : Elapsed 0.036 ms (3.609 ms / 100) 3.740 -> 3.742 ( +0.05%) [ +0.08% +0.13% +0.00% / +0.05% +0.78% +0.83%] index_add_ perm_sorted : Elapsed 0.037 ms (3.743 ms / 100) 3.610 -> 3.611 ( +0.03%) [ +0.00% +0.06% +0.00% / +0.03% +0.61% +0.66%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.610 ms / 100) 5.464 -> 5.471 ( +0.13%) [ +0.15% +0.20% +0.00% / +0.13% +0.16% +0.20%] index_select const : Elapsed 0.055 ms (5.472 ms / 100) 5.481 -> 5.485 ( +0.07%) [ +0.09% +0.15% +0.00% / +0.13% +0.07% +0.22%] index_select wrap : Elapsed 0.055 ms (5.486 ms / 100) 5.484 -> 5.483 ( -0.02%) [ +0.00% +0.04% +0.07% / -0.02% +0.13% +0.11%] index_select linear : Elapsed 0.055 ms (5.484 ms / 100) 5.482 -> 5.485 ( +0.05%) [ +0.05% +0.13% +0.00% / +0.05% +0.13% +0.05%] index_select reverse : Elapsed 0.055 ms (5.485 ms / 100) 5.466 -> 5.466 ( +0.00%) [ +0.00% +0.15% +0.11% / +0.00% +0.11% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.466 ms / 100) 5.463 -> 5.464 ( +0.02%) [ +0.00% +0.09% +0.00% / +0.02% +0.18% +0.27%] index_select skip256 : Elapsed 0.055 ms (5.463 ms / 100) 5.480 -> 5.485 ( +0.09%) [ +0.00% +0.07% +0.07% / +0.11% +0.29% +0.09%] index_select spread : Elapsed 0.055 ms (5.480 ms / 100) 5.483 -> 5.482 ( -0.02%) [ +0.00% +0.04% +0.02% / -0.02% +0.05% +0.13%] index_select strided 3 : Elapsed 0.055 ms (5.483 ms / 100) 5.486 -> 5.486 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.02% +0.16% +0.00%] index_select strided 5 : Elapsed 0.055 ms (5.486 ms / 100) 5.481 -> 5.487 ( +0.11%) [ +0.09% +0.15% +0.00% / +0.11% +0.16% +0.18%] index_select strided 7 : Elapsed 0.055 ms (5.486 ms / 100) 5.463 -> 5.472 ( +0.16%) [ +0.00% +0.11% +0.20% / +0.16% +0.24% +0.40%] index_select strided 8 : Elapsed 0.055 ms (5.463 ms / 100) 5.484 -> 5.487 ( +0.05%) [ +0.16% +0.04% +0.00% / +0.05% +0.13% +0.13%] index_select random : Elapsed 0.055 ms (5.493 ms / 100) 5.484 -> 5.488 ( +0.07%) [ +0.05% +0.00% +0.02% / +0.07% +0.15% +0.20%] index_select random_sorted : Elapsed 0.055 ms (5.487 ms / 100) B = [20, 4, 40, 5] (stride (1, 4000, 20, 800)) A = [16, 4, 40, 5] (stride (1, 16, 64, 2560)) dim = 0 4.459 -> 4.460 ( +0.02%) [ +0.02% +0.04% +0.00% / +0.02% +0.67% +0.54%] index_add_ linear : Elapsed 0.045 ms (4.460 ms / 100) 4.283 -> 4.285 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.72% +0.72%] index_copy_ linear : Elapsed 0.043 ms (4.284 ms / 100) 4.428 -> 4.440 ( +0.27%) [ +0.00% +0.32% +0.34% / +0.27% +0.99% +0.65%] index_add_ reverse : Elapsed 0.044 ms (4.428 ms / 100) 4.278 -> 4.280 ( +0.05%) [ +0.00% +0.12% +0.02% / +0.05% +0.86% +0.75%] index_copy_ reverse : Elapsed 0.043 ms (4.278 ms / 100) 4.435 -> 4.438 ( +0.07%) [ +0.00% +0.00% +0.09% / +0.07% +0.83% +0.72%] index_add_ spread : Elapsed 0.044 ms (4.435 ms / 100) 4.279 -> 4.283 ( +0.09%) [ +0.07% +0.14% +0.00% / +0.09% +0.82% +0.75%] index_copy_ spread : Elapsed 0.043 ms (4.282 ms / 100) 4.433 -> 4.433 ( +0.00%) [ +0.05% +0.00% +0.11% / +0.00% +0.77% +0.83%] index_add_ strided 3 : Elapsed 0.044 ms (4.435 ms / 100) 4.292 -> 4.295 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.63% +0.54%] index_copy_ strided 3 : Elapsed 0.043 ms (4.292 ms / 100) 4.436 -> 4.439 ( +0.07%) [ +0.18% +0.00% +0.02% / +0.07% +0.54% +0.59%] index_add_ strided 7 : Elapsed 0.044 ms (4.444 ms / 100) 4.292 -> 4.290 ( -0.05%) [ +0.02% +0.02% +0.00% / -0.05% +0.63% +0.56%] index_copy_ strided 7 : Elapsed 0.043 ms (4.293 ms / 100) 4.442 -> 4.444 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.47% +0.61%] index_add_ perm : Elapsed 0.044 ms (4.442 ms / 100) 4.279 -> 4.283 ( +0.09%) [ +0.05% +0.30% +0.00% / +0.09% +0.84% +0.77%] index_copy_ perm : Elapsed 0.043 ms (4.281 ms / 100) 4.436 -> 4.444 ( +0.18%) [ +0.16% +0.09% +0.00% / +0.18% +0.72% +0.72%] index_add_ perm_sorted : Elapsed 0.044 ms (4.443 ms / 100) 4.280 -> 4.284 ( +0.09%) [ +0.00% +0.02% +0.07% / +0.09% +0.72% +0.65%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.280 ms / 100) 5.577 -> 5.573 ( -0.07%) [ +0.04% +0.00% +0.00% / +0.04% -0.02% -0.07%] index_select const : Elapsed 0.056 ms (5.579 ms / 100) 5.580 -> 5.580 ( +0.00%) [ +0.00% +0.13% +0.11% / +0.02% +0.00% +0.00%] index_select wrap : Elapsed 0.056 ms (5.580 ms / 100) 5.584 -> 5.578 ( -0.11%) [ +0.00% +0.14% +0.02% / +0.05% +0.07% -0.11%] index_select linear : Elapsed 0.056 ms (5.584 ms / 100) 5.582 -> 5.582 ( +0.00%) [ +0.04% +0.00% +0.20% / +0.09% +0.05% +0.00%] index_select reverse : Elapsed 0.056 ms (5.584 ms / 100) 5.571 -> 5.574 ( +0.05%) [ +0.16% +0.00% +0.14% / +0.05% +0.18% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.580 ms / 100) 5.565 -> 5.579 ( +0.25%) [ +0.20% +0.29% +0.00% / +0.27% +0.25% +0.25%] index_select skip256 : Elapsed 0.056 ms (5.576 ms / 100) 5.585 -> 5.585 ( +0.00%) [ +0.04% +0.02% +0.00% / +0.05% +0.00% +0.02%] index_select spread : Elapsed 0.056 ms (5.587 ms / 100) 5.587 -> 5.583 ( -0.07%) [ +0.13% +0.07% +0.00% / -0.07% -0.07% -0.05%] index_select strided 3 : Elapsed 0.056 ms (5.594 ms / 100) 5.586 -> 5.581 ( -0.09%) [ +0.00% +0.09% +0.13% / +0.02% -0.09% -0.05%] index_select strided 5 : Elapsed 0.056 ms (5.586 ms / 100) 5.587 -> 5.581 ( -0.11%) [ +0.07% +0.11% +0.00% / +0.04% -0.11% -0.07%] index_select strided 7 : Elapsed 0.056 ms (5.591 ms / 100) 5.579 -> 5.581 ( +0.04%) [ +0.16% +0.00% +0.18% / +0.18% +0.04% +0.04%] index_select strided 8 : Elapsed 0.056 ms (5.588 ms / 100) 5.584 -> 5.579 ( -0.09%) [ +0.00% +0.16% +0.07% / +0.07% -0.09% -0.02%] index_select random : Elapsed 0.056 ms (5.584 ms / 100) 5.578 -> 5.580 ( +0.04%) [ +0.22% +0.00% +0.09% / +0.16% +0.04% +0.07%] index_select random_sorted : Elapsed 0.056 ms (5.590 ms / 100) B = [20, 4, 40, 5] (stride (20, 5, 400, 1)) A = [16, 4, 40, 5] (stride (800, 1, 20, 4)) dim = 0 3.646 -> 3.644 ( -0.05%) [ +0.00% +0.08% +0.00% / -0.05% +0.74% +0.77%] index_add_ linear : Elapsed 0.036 ms (3.646 ms / 100) 3.518 -> 3.523 ( +0.14%) [ +0.14% +0.34% +0.00% / +0.14% +0.74% +0.77%] index_copy_ linear : Elapsed 0.035 ms (3.523 ms / 100) 3.649 -> 3.648 ( -0.03%) [ +0.05% +0.00% +0.00% / -0.03% +0.77% +0.74%] index_add_ reverse : Elapsed 0.037 ms (3.651 ms / 100) 3.519 -> 3.517 ( -0.06%) [ +0.00% +0.11% +0.03% / -0.06% +0.74% +0.74%] index_copy_ reverse : Elapsed 0.035 ms (3.519 ms / 100) 3.646 -> 3.647 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.69% +0.71%] index_add_ spread : Elapsed 0.036 ms (3.647 ms / 100) 3.529 -> 3.532 ( +0.09%) [ +0.03% +0.06% +0.00% / +0.09% +0.79% +0.79%] index_copy_ spread : Elapsed 0.035 ms (3.530 ms / 100) 3.649 -> 3.652 ( +0.08%) [ +0.14% +0.03% +0.00% / +0.08% +0.88% +0.85%] index_add_ strided 3 : Elapsed 0.037 ms (3.654 ms / 100) 3.534 -> 3.532 ( -0.06%) [ +0.00% +0.11% +0.08% / -0.06% +0.93% +0.82%] index_copy_ strided 3 : Elapsed 0.035 ms (3.534 ms / 100) 3.649 -> 3.650 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.74% +0.71%] index_add_ strided 7 : Elapsed 0.037 ms (3.650 ms / 100) 3.518 -> 3.518 ( +0.00%) [ +0.06% +0.11% +0.00% / +0.00% +0.82% +0.88%] index_copy_ strided 7 : Elapsed 0.035 ms (3.520 ms / 100) 3.646 -> 3.645 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.74% +0.77%] index_add_ perm : Elapsed 0.036 ms (3.646 ms / 100) 3.519 -> 3.522 ( +0.09%) [ +0.09% +0.00% +0.14% / +0.09% +0.77% +0.74%] index_copy_ perm : Elapsed 0.035 ms (3.522 ms / 100) 3.653 -> 3.652 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.66% +0.77%] index_add_ perm_sorted : Elapsed 0.037 ms (3.654 ms / 100) 3.533 -> 3.540 ( +0.20%) [ +0.00% +0.23% +0.23% / +0.20% +0.93% +0.79%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.533 ms / 100) 5.478 -> 5.466 ( -0.22%) [ +0.00% +0.05% +0.04% / -0.22% -0.04% -0.04%] index_select const : Elapsed 0.055 ms (5.478 ms / 100) 5.484 -> 5.481 ( -0.05%) [ +0.04% +0.00% +0.07% / -0.05% +0.00% +0.13%] index_select wrap : Elapsed 0.055 ms (5.486 ms / 100) 5.478 -> 5.483 ( +0.09%) [ +0.00% +0.18% +0.16% / +0.09% +0.18% +0.27%] index_select linear : Elapsed 0.055 ms (5.478 ms / 100) 5.483 -> 5.484 ( +0.02%) [ +0.00% +0.09% +0.00% / +0.02% +0.07% +0.07%] index_select reverse : Elapsed 0.055 ms (5.483 ms / 100) 5.475 -> 5.473 ( -0.04%) [ +0.00% +0.15% +0.04% / -0.04% +0.18% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.475 ms / 100) 5.479 -> 5.474 ( -0.09%) [ +0.00% +0.04% +0.04% / -0.09% +0.00% -0.04%] index_select skip256 : Elapsed 0.055 ms (5.479 ms / 100) 5.485 -> 5.487 ( +0.04%) [ +0.00% +0.07% +0.09% / +0.04% +0.05% +0.07%] index_select spread : Elapsed 0.055 ms (5.485 ms / 100) 5.481 -> 5.485 ( +0.07%) [ +0.02% +0.00% +0.05% / +0.07% +0.20% +0.07%] index_select strided 3 : Elapsed 0.055 ms (5.482 ms / 100) 5.481 -> 5.479 ( -0.04%) [ +0.00% +0.15% +0.11% / +0.16% +0.07% -0.04%] index_select strided 5 : Elapsed 0.055 ms (5.481 ms / 100) 5.484 -> 5.481 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.11% +0.11%] index_select strided 7 : Elapsed 0.055 ms (5.487 ms / 100) 5.473 -> 5.481 ( +0.15%) [ +0.00% +0.09% +0.02% / +0.15% +0.24% +0.27%] index_select strided 8 : Elapsed 0.055 ms (5.473 ms / 100) 5.477 -> 5.476 ( -0.02%) [ +0.13% +0.00% +0.26% / -0.02% +0.04% +0.15%] index_select random : Elapsed 0.055 ms (5.484 ms / 100) 5.477 -> 5.483 ( +0.11%) [ +0.07% +0.04% +0.00% / +0.11% +0.20% +0.22%] index_select random_sorted : Elapsed 0.055 ms (5.481 ms / 100) B = [20, 4, 40, 5] (stride (5, 100, 400, 1)) A = [16, 4, 40, 5] (stride (800, 200, 1, 40)) dim = 0 4.048 -> 4.055 ( +0.17%) [ +0.07% +0.20% +0.00% / +0.17% +0.79% +0.84%] index_add_ linear : Elapsed 0.041 ms (4.051 ms / 100) 3.913 -> 3.912 ( -0.03%) [ +0.00% +0.26% +0.20% / -0.03% +0.66% +0.64%] index_copy_ linear : Elapsed 0.039 ms (3.913 ms / 100) 4.038 -> 4.037 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.74% +0.72%] index_add_ reverse : Elapsed 0.040 ms (4.038 ms / 100) 3.906 -> 3.912 ( +0.15%) [ +0.20% +0.05% +0.00% / +0.15% +0.72% +0.84%] index_copy_ reverse : Elapsed 0.039 ms (3.914 ms / 100) 4.038 -> 4.037 ( -0.02%) [ +0.00% +0.05% +0.00% / -0.02% +0.69% +0.72%] index_add_ spread : Elapsed 0.040 ms (4.038 ms / 100) 3.910 -> 3.909 ( -0.03%) [ +0.00% +0.03% +0.31% / -0.03% +0.59% +0.49%] index_copy_ spread : Elapsed 0.039 ms (3.910 ms / 100) 4.031 -> 4.029 ( -0.05%) [ +0.00% +0.02% +0.00% / -0.05% +0.79% +0.74%] index_add_ strided 3 : Elapsed 0.040 ms (4.031 ms / 100) 3.900 -> 3.908 ( +0.21%) [ +0.08% +0.15% +0.00% / +0.21% +0.69% +0.64%] index_copy_ strided 3 : Elapsed 0.039 ms (3.903 ms / 100) 4.030 -> 4.030 ( +0.00%) [ +0.05% +0.22% +0.00% / +0.00% +0.77% +0.77%] index_add_ strided 7 : Elapsed 0.040 ms (4.032 ms / 100) 3.902 -> 3.900 ( -0.05%) [ +0.00% +0.31% +0.00% / -0.05% +0.62% +0.62%] index_copy_ strided 7 : Elapsed 0.039 ms (3.902 ms / 100) 4.042 -> 4.038 ( -0.10%) [ +0.00% +0.02% +0.00% / -0.10% +0.62% +0.59%] index_add_ perm : Elapsed 0.040 ms (4.042 ms / 100) 3.908 -> 3.915 ( +0.18%) [ +0.08% +0.36% +0.00% / +0.18% +0.64% +0.61%] index_copy_ perm : Elapsed 0.039 ms (3.911 ms / 100) 4.039 -> 4.038 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.59% +0.67%] index_add_ perm_sorted : Elapsed 0.040 ms (4.039 ms / 100) 3.913 -> 3.921 ( +0.20%) [ +0.13% +0.31% +0.00% / +0.20% +0.61% +0.59%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.918 ms / 100) 5.550 -> 5.551 ( +0.02%) [ +0.09% +0.14% +0.00% / +0.16% +0.02% +0.11%] index_select const : Elapsed 0.056 ms (5.555 ms / 100) 5.566 -> 5.568 ( +0.04%) [ +0.09% +0.11% +0.00% / +0.13% +0.09% +0.04%] index_select wrap : Elapsed 0.056 ms (5.571 ms / 100) 5.573 -> 5.571 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% -0.02% +0.04%] index_select linear : Elapsed 0.056 ms (5.573 ms / 100) 5.570 -> 5.566 ( -0.07%) [ +0.11% +0.00% +0.05% / +0.00% -0.07% +0.16%] index_select reverse : Elapsed 0.056 ms (5.576 ms / 100) 5.554 -> 5.551 ( -0.05%) [ +0.00% +0.05% +0.02% / -0.05% +0.05% +0.20%] index_select skip64 : Elapsed 0.056 ms (5.554 ms / 100) 5.551 -> 5.548 ( -0.05%) [ +0.00% +0.11% +0.05% / -0.05% +0.18% +0.20%] index_select skip256 : Elapsed 0.056 ms (5.551 ms / 100) 5.562 -> 5.567 ( +0.09%) [ +0.13% +0.11% +0.00% / +0.16% +0.13% +0.09%] index_select spread : Elapsed 0.056 ms (5.569 ms / 100) 5.571 -> 5.564 ( -0.13%) [ +0.02% +0.04% +0.00% / -0.04% -0.13% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.572 ms / 100) 5.574 -> 5.570 ( -0.07%) [ +0.00% +0.04% +0.04% / -0.05% -0.05% -0.07%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.567 -> 5.570 ( +0.05%) [ +0.09% +0.00% +0.04% / +0.05% +0.14% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.572 ms / 100) 5.557 -> 5.549 ( -0.14%) [ +0.07% +0.04% +0.00% / -0.14% +0.11% +0.02%] index_select strided 8 : Elapsed 0.056 ms (5.561 ms / 100) 5.567 -> 5.571 ( +0.07%) [ +0.00% +0.07% +0.04% / +0.07% +0.13% +0.07%] index_select random : Elapsed 0.056 ms (5.567 ms / 100) 5.570 -> 5.565 ( -0.09%) [ +0.02% +0.00% +0.07% / -0.09% +0.00% +0.02%] index_select random_sorted : Elapsed 0.056 ms (5.571 ms / 100) B = [20, 4, 40, 5] (stride (40, 800, 1, 3200)) A = [16, 4, 40, 5] (stride (40, 640, 1, 2560)) dim = 0 4.046 -> 4.045 ( -0.02%) [ +0.02% +0.07% +0.00% / -0.02% +0.82% +0.72%] index_add_ linear : Elapsed 0.040 ms (4.047 ms / 100) 3.915 -> 3.911 ( -0.10%) [ +0.00% +0.13% +0.05% / -0.10% +0.64% +0.56%] index_copy_ linear : Elapsed 0.039 ms (3.915 ms / 100) 4.040 -> 4.042 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.67% +0.67%] index_add_ reverse : Elapsed 0.040 ms (4.043 ms / 100) 3.911 -> 3.925 ( +0.36%) [ +0.00% +0.08% +0.15% / +0.36% +0.61% +0.49%] index_copy_ reverse : Elapsed 0.039 ms (3.911 ms / 100) 4.054 -> 4.059 ( +0.12%) [ +0.12% +0.00% +0.10% / +0.12% +0.76% +0.76%] index_add_ spread : Elapsed 0.041 ms (4.059 ms / 100) 3.916 -> 3.919 ( +0.08%) [ +0.00% +0.05% +0.15% / +0.08% +0.84% +0.79%] index_copy_ spread : Elapsed 0.039 ms (3.916 ms / 100) 4.056 -> 4.064 ( +0.20%) [ +0.02% +0.15% +0.00% / +0.20% +0.71% +0.74%] index_add_ strided 3 : Elapsed 0.041 ms (4.057 ms / 100) 3.917 -> 3.919 ( +0.05%) [ +0.00% +0.03% +0.13% / +0.05% +0.69% +0.69%] index_copy_ strided 3 : Elapsed 0.039 ms (3.917 ms / 100) 4.039 -> 4.040 ( +0.02%) [ +0.00% +0.10% +0.00% / +0.02% +0.67% +0.74%] index_add_ strided 7 : Elapsed 0.040 ms (4.039 ms / 100) 3.906 -> 3.911 ( +0.13%) [ +0.08% +0.10% +0.00% / +0.13% +0.67% +0.69%] index_copy_ strided 7 : Elapsed 0.039 ms (3.909 ms / 100) 4.046 -> 4.049 ( +0.07%) [ +0.12% +0.12% +0.00% / +0.07% +0.72% +0.72%] index_add_ perm : Elapsed 0.041 ms (4.051 ms / 100) 3.913 -> 3.912 ( -0.03%) [ +0.23% +0.20% +0.00% / -0.03% +0.66% +0.74%] index_copy_ perm : Elapsed 0.039 ms (3.922 ms / 100) 4.056 -> 4.062 ( +0.15%) [ +0.12% +0.07% +0.00% / +0.15% +0.81% +0.74%] index_add_ perm_sorted : Elapsed 0.041 ms (4.061 ms / 100) 3.915 -> 3.922 ( +0.18%) [ +0.18% +0.20% +0.00% / +0.18% +0.72% +0.74%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.922 ms / 100) 5.564 -> 5.561 ( -0.05%) [ +0.00% +0.11% +0.02% / -0.05% +0.18% +0.02%] index_select const : Elapsed 0.056 ms (5.564 ms / 100) 5.586 -> 5.578 ( -0.14%) [ +0.02% +0.00% +0.07% / -0.14% -0.05% +0.05%] index_select wrap : Elapsed 0.056 ms (5.587 ms / 100) 5.575 -> 5.580 ( +0.09%) [ +0.00% +0.18% +0.07% / +0.09% +0.27% +0.20%] index_select linear : Elapsed 0.056 ms (5.575 ms / 100) 5.569 -> 5.581 ( +0.22%) [ +0.22% +0.29% +0.00% / +0.22% +0.34% +0.36%] index_select reverse : Elapsed 0.056 ms (5.581 ms / 100) 5.554 -> 5.562 ( +0.14%) [ +0.00% +0.25% +0.13% / +0.14% +0.27% +0.22%] index_select skip64 : Elapsed 0.056 ms (5.554 ms / 100) 5.557 -> 5.560 ( +0.05%) [ +0.00% +0.25% +0.02% / +0.05% +0.29% +0.11%] index_select skip256 : Elapsed 0.056 ms (5.557 ms / 100) 5.581 -> 5.584 ( +0.05%) [ +0.00% +0.05% +0.16% / +0.05% +0.18% +0.13%] index_select spread : Elapsed 0.056 ms (5.581 ms / 100) 5.577 -> 5.584 ( +0.13%) [ +0.20% +0.00% +0.18% / +0.13% +0.23% +0.16%] index_select strided 3 : Elapsed 0.056 ms (5.588 ms / 100) 5.577 -> 5.581 ( +0.07%) [ +0.09% +0.20% +0.00% / +0.07% +0.14% +0.23%] index_select strided 5 : Elapsed 0.056 ms (5.582 ms / 100) 5.579 -> 5.584 ( +0.09%) [ +0.14% +0.09% +0.00% / +0.09% +0.25% +0.09%] index_select strided 7 : Elapsed 0.056 ms (5.587 ms / 100) 5.557 -> 5.565 ( +0.14%) [ +0.00% +0.07% +0.09% / +0.16% +0.14% +0.38%] index_select strided 8 : Elapsed 0.056 ms (5.557 ms / 100) 5.576 -> 5.575 ( -0.02%) [ +0.13% +0.00% +0.05% / -0.02% +0.18% +0.09%] index_select random : Elapsed 0.056 ms (5.583 ms / 100) 5.580 -> 5.572 ( -0.14%) [ +0.00% +0.04% +0.00% / -0.14% +0.04% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.580 ms / 100) out_shape = [16, 20, 40, 5] in_shape = [16, 4, 40, 5] idx_dim = 1 B = [16, 20, 40, 5] (stride (4000, 5, 100, 1)) A = [16, 4, 40, 5] (stride (1, 3200, 80, 16)) dim = 1 2.028 -> 2.030 ( +0.10%) [ +0.00% +0.30% +0.10% / +0.10% +0.64% +0.49%] index_add_ linear : Elapsed 0.020 ms (2.028 ms / 100) 1.980 -> 1.980 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.00% +0.35% +0.35%] index_copy_ linear : Elapsed 0.020 ms (1.981 ms / 100) 2.027 -> 2.028 ( +0.05%) [ +0.00% +0.00% +0.10% / +0.05% +0.59% +0.59%] index_add_ reverse : Elapsed 0.020 ms (2.027 ms / 100) 1.980 -> 1.982 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.30% +0.30%] index_copy_ reverse : Elapsed 0.020 ms (1.981 ms / 100) 2.017 -> 2.015 ( -0.10%) [ +0.05% +0.05% +0.00% / -0.10% +0.64% +0.59%] index_add_ spread : Elapsed 0.020 ms (2.018 ms / 100) 1.987 -> 1.987 ( +0.00%) [ +0.00% +0.10% +0.15% / +0.00% +0.40% +0.55%] index_copy_ spread : Elapsed 0.020 ms (1.987 ms / 100) 2.031 -> 2.032 ( +0.05%) [ +0.30% +0.25% +0.00% / +0.05% +0.84% +0.79%] index_add_ strided 3 : Elapsed 0.020 ms (2.037 ms / 100) 2.002 -> 2.005 ( +0.15%) [ +0.00% +0.15% +0.05% / +0.15% +0.75% +0.90%] index_copy_ strided 3 : Elapsed 0.020 ms (2.002 ms / 100) 2.006 -> 2.007 ( +0.05%) [ +0.05% +0.15% +0.00% / +0.05% +0.55% +0.60%] index_add_ strided 7 : Elapsed 0.020 ms (2.007 ms / 100) 1.972 -> 1.974 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.56% +0.51%] index_copy_ strided 7 : Elapsed 0.020 ms (1.972 ms / 100) 2.031 -> 2.028 ( -0.15%) [ +0.10% +0.05% +0.00% / -0.15% +0.34% +0.54%] index_add_ perm : Elapsed 0.020 ms (2.033 ms / 100) 1.997 -> 1.999 ( +0.10%) [ +0.20% +0.00% +0.05% / +0.10% +0.75% +0.80%] index_copy_ perm : Elapsed 0.020 ms (2.001 ms / 100) 2.028 -> 2.028 ( +0.00%) [ +0.05% +0.35% +0.00% / +0.00% +0.39% +0.49%] index_add_ perm_sorted : Elapsed 0.020 ms (2.029 ms / 100) 1.994 -> 1.997 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +0.50% +0.60%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.997 ms / 100) 8.802 -> 8.807 ( +0.06%) [ +0.01% +0.11% +0.00% / +0.06% +0.27% +0.45%] index_select const : Elapsed 0.088 ms (8.803 ms / 100) 8.851 -> 8.858 ( +0.08%) [ +0.21% +0.06% +0.00% / +0.08% +0.42% +0.21%] index_select wrap : Elapsed 0.089 ms (8.870 ms / 100) 8.840 -> 8.835 ( -0.06%) [ +0.00% +0.31% +0.02% / -0.06% +0.17% +0.41%] index_select linear : Elapsed 0.088 ms (8.840 ms / 100) 8.851 -> 8.841 ( -0.11%) [ +0.05% +0.03% +0.00% / -0.11% +0.16% +0.25%] index_select reverse : Elapsed 0.089 ms (8.855 ms / 100) 8.814 -> 8.818 ( +0.05%) [ +0.09% +0.08% +0.00% / +0.05% +0.22% +0.26%] index_select skip64 : Elapsed 0.088 ms (8.822 ms / 100) 8.800 -> 8.827 ( +0.31%) [ +0.00% +0.15% +0.02% / +0.31% +0.49% +0.44%] index_select skip256 : Elapsed 0.088 ms (8.800 ms / 100) 8.855 -> 8.851 ( -0.05%) [ +0.06% +0.00% +0.17% / -0.05% +0.10% +0.23%] index_select spread : Elapsed 0.089 ms (8.860 ms / 100) 8.858 -> 8.864 ( +0.07%) [ +0.00% +0.21% +0.14% / +0.07% +0.27% +0.17%] index_select strided 3 : Elapsed 0.089 ms (8.858 ms / 100) 8.865 -> 8.860 ( -0.06%) [ +0.00% +0.03% +0.03% / -0.05% +0.27% -0.06%] index_select random : Elapsed 0.089 ms (8.865 ms / 100) 8.859 -> 8.869 ( +0.11%) [ +0.00% +0.15% +0.06% / +0.11% +0.15% +0.17%] index_select random_sorted : Elapsed 0.089 ms (8.859 ms / 100) B = [16, 20, 40, 5] (stride (5, 3200, 80, 1)) A = [16, 4, 40, 5] (stride (1, 3200, 80, 16)) dim = 1 2.031 -> 2.031 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.49% +0.20%] index_add_ linear : Elapsed 0.020 ms (2.032 ms / 100) 1.973 -> 1.976 ( +0.15%) [ +0.00% +0.10% +0.00% / +0.15% +0.46% +0.56%] index_copy_ linear : Elapsed 0.020 ms (1.973 ms / 100) 2.025 -> 2.027 ( +0.10%) [ +0.00% +0.20% +0.25% / +0.10% +0.54% +0.59%] index_add_ reverse : Elapsed 0.020 ms (2.025 ms / 100) 1.972 -> 1.976 ( +0.20%) [ +0.00% +0.20% +0.00% / +0.20% +0.35% +0.35%] index_copy_ reverse : Elapsed 0.020 ms (1.972 ms / 100) 2.031 -> 2.033 ( +0.10%) [ +0.00% +0.34% +0.00% / +0.10% +0.34% +0.34%] index_add_ spread : Elapsed 0.020 ms (2.031 ms / 100) 1.968 -> 1.974 ( +0.30%) [ +0.36% +0.20% +0.00% / +0.30% +0.66% +0.71%] index_copy_ spread : Elapsed 0.020 ms (1.975 ms / 100) 2.034 -> 2.036 ( +0.10%) [ +0.10% +0.00% +0.20% / +0.10% +0.10% +0.29%] index_add_ strided 3 : Elapsed 0.020 ms (2.036 ms / 100) 1.977 -> 1.981 ( +0.20%) [ +0.15% +0.00% +0.15% / +0.20% +0.56% +0.40%] index_copy_ strided 3 : Elapsed 0.020 ms (1.980 ms / 100) 2.028 -> 2.023 ( -0.25%) [ +0.05% +0.10% +0.00% / -0.25% +0.49% +0.20%] index_add_ strided 7 : Elapsed 0.020 ms (2.029 ms / 100) 1.967 -> 1.970 ( +0.15%) [ +0.00% +0.31% +0.05% / +0.15% +0.56% +0.66%] index_copy_ strided 7 : Elapsed 0.020 ms (1.967 ms / 100) 2.028 -> 2.033 ( +0.25%) [ +0.25% +0.15% +0.00% / +0.25% +0.44% +0.25%] index_add_ perm : Elapsed 0.020 ms (2.033 ms / 100) 1.971 -> 1.972 ( +0.05%) [ +0.41% +0.10% +0.00% / +0.05% +0.30% +0.30%] index_copy_ perm : Elapsed 0.020 ms (1.979 ms / 100) 2.030 -> 2.029 ( -0.05%) [ +0.05% +0.10% +0.00% / -0.05% +0.15% +0.15%] index_add_ perm_sorted : Elapsed 0.020 ms (2.031 ms / 100) 1.974 -> 1.971 ( -0.15%) [ +0.05% +0.00% +0.10% / +0.15% +0.15% -0.15%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.975 ms / 100) 8.728 -> 8.723 ( -0.06%) [ +0.00% +0.02% +0.16% / -0.06% +0.33% +0.38%] index_select const : Elapsed 0.087 ms (8.728 ms / 100) 8.774 -> 8.761 ( -0.15%) [ +0.11% +0.00% +0.19% / -0.15% +0.41% +0.36%] index_select wrap : Elapsed 0.088 ms (8.784 ms / 100) 8.754 -> 8.765 ( +0.13%) [ +0.05% +0.10% +0.00% / +0.13% +0.45% +0.70%] index_select linear : Elapsed 0.088 ms (8.758 ms / 100) 8.760 -> 8.770 ( +0.11%) [ +0.13% +0.10% +0.00% / +0.11% +0.13% +0.30%] index_select reverse : Elapsed 0.088 ms (8.771 ms / 100) 8.736 -> 8.731 ( -0.06%) [ +0.14% +0.00% +0.13% / -0.06% +0.39% +0.17%] index_select skip64 : Elapsed 0.087 ms (8.748 ms / 100) 8.730 -> 8.734 ( +0.05%) [ +0.07% +0.22% +0.00% / +0.05% +0.27% +0.45%] index_select skip256 : Elapsed 0.087 ms (8.736 ms / 100) 8.769 -> 8.774 ( +0.06%) [ +0.14% +0.00% +0.06% / +0.06% +0.10% +0.16%] index_select spread : Elapsed 0.088 ms (8.781 ms / 100) 8.774 -> 8.779 ( +0.06%) [ +0.09% +0.18% +0.00% / +0.18% +0.06% +0.28%] index_select strided 3 : Elapsed 0.088 ms (8.782 ms / 100) 8.760 -> 8.790 ( +0.34%) [ +0.00% +0.37% +0.32% / +0.34% +0.40% +0.34%] index_select random : Elapsed 0.088 ms (8.760 ms / 100) 8.764 -> 8.761 ( -0.03%) [ +0.00% +0.22% +0.34% / -0.03% +0.19% +0.38%] index_select random_sorted : Elapsed 0.088 ms (8.764 ms / 100) B = [16, 20, 40, 5] (stride (5, 3200, 80, 1)) A = [16, 4, 40, 5] (stride (4, 1, 64, 2560)) dim = 1 2.250 -> 2.250 ( +0.00%) [ +0.00% +0.18% +0.13% / +0.18% +0.13% +0.00%] index_add_ linear : Elapsed 0.023 ms (2.250 ms / 100) 2.180 -> 2.182 ( +0.09%) [ +0.09% +0.00% +0.14% / +0.09% +0.09% +0.23%] index_copy_ linear : Elapsed 0.022 ms (2.182 ms / 100) 2.250 -> 2.249 ( -0.04%) [ +0.18% +0.00% +0.13% / -0.04% +0.13% +0.22%] index_add_ reverse : Elapsed 0.023 ms (2.254 ms / 100) 2.181 -> 2.182 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.05% +0.32% +0.28%] index_copy_ reverse : Elapsed 0.022 ms (2.181 ms / 100) 2.250 -> 2.250 ( +0.00%) [ +0.27% +0.18% +0.00% / +0.00% +0.18% +0.22%] index_add_ spread : Elapsed 0.023 ms (2.256 ms / 100) 2.182 -> 2.184 ( +0.09%) [ +0.09% +0.00% +0.14% / +0.09% +0.37% +0.23%] index_copy_ spread : Elapsed 0.022 ms (2.184 ms / 100) 2.249 -> 2.247 ( -0.09%) [ +0.31% +0.18% +0.00% / -0.09% +0.36% +0.31%] index_add_ strided 3 : Elapsed 0.023 ms (2.256 ms / 100) 2.182 -> 2.184 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.14% +0.09% +0.27%] index_copy_ strided 3 : Elapsed 0.022 ms (2.184 ms / 100) 2.247 -> 2.248 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.45% +0.22%] index_add_ strided 7 : Elapsed 0.022 ms (2.247 ms / 100) 2.180 -> 2.179 ( -0.05%) [ +0.00% +0.18% +0.05% / -0.05% +0.23% +0.23%] index_copy_ strided 7 : Elapsed 0.022 ms (2.180 ms / 100) 2.246 -> 2.251 ( +0.22%) [ +0.00% +0.13% +0.09% / +0.22% +0.45% +0.40%] index_add_ perm : Elapsed 0.022 ms (2.246 ms / 100) 2.179 -> 2.182 ( +0.14%) [ +0.05% +0.23% +0.00% / +0.14% +0.37% +0.37%] index_copy_ perm : Elapsed 0.022 ms (2.180 ms / 100) 2.249 -> 2.249 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.00% +0.36% +0.22%] index_add_ perm_sorted : Elapsed 0.023 ms (2.250 ms / 100) 2.185 -> 2.182 ( -0.14%) [ +0.05% +0.00% +0.00% / -0.14% +0.18% +0.18%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.186 ms / 100) 9.211 -> 9.202 ( -0.10%) [ +0.00% +0.01% +0.03% / -0.10% +0.13% +0.11%] index_select const : Elapsed 0.092 ms (9.211 ms / 100) 9.202 -> 9.205 ( +0.03%) [ +0.29% +0.02% +0.00% / +0.07% +0.07% +0.03%] index_select wrap : Elapsed 0.092 ms (9.229 ms / 100) 9.201 -> 9.194 ( -0.08%) [ +0.22% +0.00% +0.25% / -0.08% +0.33% +0.13%] index_select linear : Elapsed 0.092 ms (9.221 ms / 100) 9.218 -> 9.205 ( -0.14%) [ +0.07% +0.04% +0.00% / -0.14% +0.00% +0.05%] index_select reverse : Elapsed 0.092 ms (9.224 ms / 100) 9.209 -> 9.204 ( -0.05%) [ +0.17% +0.00% +0.07% / +0.22% +0.10% -0.05%] index_select skip64 : Elapsed 0.092 ms (9.225 ms / 100) 9.206 -> 9.205 ( -0.01%) [ +0.00% +0.09% +0.11% / +0.17% +0.11% -0.01%] index_select skip256 : Elapsed 0.092 ms (9.206 ms / 100) 9.202 -> 9.212 ( +0.11%) [ +0.17% +0.22% +0.00% / +0.32% +0.11% +0.12%] index_select spread : Elapsed 0.092 ms (9.218 ms / 100) 9.203 -> 9.221 ( +0.20%) [ +0.10% +0.25% +0.00% / +0.23% +0.20% +0.21%] index_select strided 3 : Elapsed 0.092 ms (9.212 ms / 100) 9.200 -> 9.208 ( +0.09%) [ +0.00% +0.21% +0.14% / +0.09% +0.13% +0.28%] index_select random : Elapsed 0.092 ms (9.200 ms / 100) 9.198 -> 9.206 ( +0.09%) [ +0.00% +0.24% +0.22% / +0.09% +0.12% +0.21%] index_select random_sorted : Elapsed 0.092 ms (9.198 ms / 100) B = [16, 20, 40, 5] (stride (40, 3200, 1, 640)) A = [16, 4, 40, 5] (stride (800, 5, 20, 1)) dim = 1 2.090 -> 2.091 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.81% +0.91%] index_add_ linear : Elapsed 0.021 ms (2.090 ms / 100) 2.039 -> 2.036 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +1.37% +1.32%] index_copy_ linear : Elapsed 0.020 ms (2.039 ms / 100) 2.087 -> 2.086 ( -0.05%) [ +0.29% +0.10% +0.00% / -0.05% +1.05% +0.86%] index_add_ reverse : Elapsed 0.021 ms (2.093 ms / 100) 2.036 -> 2.036 ( +0.00%) [ +0.25% +0.05% +0.00% / +0.00% +1.33% +1.33%] index_copy_ reverse : Elapsed 0.020 ms (2.041 ms / 100) 2.087 -> 2.090 ( +0.14%) [ +0.10% +0.10% +0.00% / +0.14% +0.81% +1.01%] index_add_ spread : Elapsed 0.021 ms (2.089 ms / 100) 2.033 -> 2.035 ( +0.10%) [ +0.00% +0.15% +0.20% / +0.10% +1.13% +1.18%] index_copy_ spread : Elapsed 0.020 ms (2.033 ms / 100) 2.096 -> 2.096 ( +0.00%) [ +0.14% +0.00% +0.05% / +0.00% +1.24% +1.05%] index_add_ strided 3 : Elapsed 0.021 ms (2.099 ms / 100) 2.046 -> 2.047 ( +0.05%) [ +0.24% +0.29% +0.00% / +0.05% +1.56% +1.42%] index_copy_ strided 3 : Elapsed 0.021 ms (2.051 ms / 100) 2.095 -> 2.094 ( -0.05%) [ +0.19% +0.24% +0.00% / -0.05% +1.29% +1.24%] index_add_ strided 7 : Elapsed 0.021 ms (2.099 ms / 100) 2.047 -> 2.044 ( -0.15%) [ +0.10% +0.00% +0.10% / -0.15% +1.22% +1.27%] index_copy_ strided 7 : Elapsed 0.020 ms (2.049 ms / 100) 2.081 -> 2.083 ( +0.10%) [ +0.00% +0.14% +0.05% / +0.10% +0.82% +0.91%] index_add_ perm : Elapsed 0.021 ms (2.081 ms / 100) 2.031 -> 2.033 ( +0.10%) [ +0.34% +0.00% +0.15% / +0.10% +1.23% +1.38%] index_copy_ perm : Elapsed 0.020 ms (2.038 ms / 100) 2.086 -> 2.087 ( +0.05%) [ +0.00% +0.14% +0.10% / +0.05% +1.25% +1.15%] index_add_ perm_sorted : Elapsed 0.021 ms (2.086 ms / 100) 2.034 -> 2.037 ( +0.15%) [ +0.00% +0.20% +0.29% / +0.15% +1.38% +1.62%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.034 ms / 100) 9.202 -> 9.195 ( -0.08%) [ +0.00% +0.05% +0.03% / -0.08% +0.00% +0.00%] index_select const : Elapsed 0.092 ms (9.202 ms / 100) 9.202 -> 9.205 ( +0.03%) [ +0.00% +0.18% +0.26% / +0.08% +0.03% +0.03%] index_select wrap : Elapsed 0.092 ms (9.202 ms / 100) 9.201 -> 9.202 ( +0.01%) [ +0.07% +0.00% +0.11% / +0.01% +0.16% +0.07%] index_select linear : Elapsed 0.092 ms (9.207 ms / 100) 9.198 -> 9.211 ( +0.14%) [ +0.05% +0.00% +0.01% / +0.16% +0.14% +0.15%] index_select reverse : Elapsed 0.092 ms (9.203 ms / 100) 9.193 -> 9.177 ( -0.17%) [ +0.14% +0.00% +0.02% / -0.02% -0.17% +0.01%] index_select skip64 : Elapsed 0.092 ms (9.206 ms / 100) 9.173 -> 9.193 ( +0.22%) [ +0.19% +0.29% +0.00% / +0.22% +0.32% +0.26%] index_select skip256 : Elapsed 0.092 ms (9.190 ms / 100) 9.210 -> 9.205 ( -0.05%) [ +0.02% +0.13% +0.00% / +0.11% +0.01% -0.05%] index_select spread : Elapsed 0.092 ms (9.212 ms / 100) 9.210 -> 9.209 ( -0.01%) [ +0.00% +0.01% +0.02% / -0.01% +0.21% +0.08%] index_select strided 3 : Elapsed 0.092 ms (9.210 ms / 100) 9.197 -> 9.207 ( +0.11%) [ +0.07% +0.14% +0.00% / +0.11% +0.15% +0.20%] index_select random : Elapsed 0.092 ms (9.203 ms / 100) 9.210 -> 9.199 ( -0.12%) [ +0.00% +0.08% +0.27% / -0.12% +0.01% -0.03%] index_select random_sorted : Elapsed 0.092 ms (9.210 ms / 100) B = [16, 20, 40, 5] (stride (100, 5, 1600, 1)) A = [16, 4, 40, 5] (stride (1, 3200, 80, 16)) dim = 1 2.028 -> 2.037 ( +0.44%) [ +0.10% +0.00% +0.20% / +0.44% +1.04% +0.94%] index_add_ linear : Elapsed 0.020 ms (2.030 ms / 100) 1.980 -> 1.982 ( +0.10%) [ +0.05% +0.20% +0.00% / +0.10% +0.66% +0.61%] index_copy_ linear : Elapsed 0.020 ms (1.981 ms / 100) 2.026 -> 2.029 ( +0.15%) [ +0.25% +0.15% +0.00% / +0.15% +1.09% +1.09%] index_add_ reverse : Elapsed 0.020 ms (2.031 ms / 100) 1.979 -> 1.977 ( -0.10%) [ +0.10% +0.00% +0.20% / -0.10% +0.40% +0.51%] index_copy_ reverse : Elapsed 0.020 ms (1.981 ms / 100) 2.013 -> 2.018 ( +0.25%) [ +0.30% +0.15% +0.00% / +0.25% +1.14% +0.94%] index_add_ spread : Elapsed 0.020 ms (2.019 ms / 100) 1.985 -> 1.988 ( +0.15%) [ +0.00% +0.00% +0.25% / +0.15% +0.86% +0.76%] index_copy_ spread : Elapsed 0.020 ms (1.985 ms / 100) 2.031 -> 2.034 ( +0.15%) [ +0.00% +0.44% +0.00% / +0.15% +1.08% +0.98%] index_add_ strided 3 : Elapsed 0.020 ms (2.031 ms / 100) 2.004 -> 2.002 ( -0.10%) [ +0.15% +0.25% +0.00% / -0.10% +0.95% +0.75%] index_copy_ strided 3 : Elapsed 0.020 ms (2.007 ms / 100) 2.006 -> 2.009 ( +0.15%) [ +0.10% +0.10% +0.00% / +0.15% +0.75% +0.70%] index_add_ strided 7 : Elapsed 0.020 ms (2.008 ms / 100) 1.971 -> 1.970 ( -0.05%) [ +0.15% +0.20% +0.00% / -0.05% +0.66% +0.51%] index_copy_ strided 7 : Elapsed 0.020 ms (1.974 ms / 100) 2.030 -> 2.029 ( -0.05%) [ +0.15% +0.00% +0.05% / -0.05% +0.49% +0.44%] index_add_ perm : Elapsed 0.020 ms (2.033 ms / 100) 1.998 -> 2.000 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.55% +0.65%] index_copy_ perm : Elapsed 0.020 ms (2.000 ms / 100) 2.029 -> 2.029 ( +0.00%) [ +0.25% +0.10% +0.00% / +0.00% +0.39% +0.49%] index_add_ perm_sorted : Elapsed 0.020 ms (2.034 ms / 100) 1.992 -> 1.998 ( +0.30%) [ +0.10% +0.00% +0.00% / +0.30% +0.40% +0.65%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.994 ms / 100) 8.801 -> 8.818 ( +0.19%) [ +0.18% +0.00% +0.03% / +0.30% +0.28% +0.19%] index_select const : Elapsed 0.088 ms (8.817 ms / 100) 8.875 -> 8.862 ( -0.15%) [ +0.00% +0.01% +0.00% / -0.15% +0.12% -0.12%] index_select wrap : Elapsed 0.089 ms (8.875 ms / 100) 8.856 -> 8.853 ( -0.03%) [ +0.01% +0.00% +0.12% / -0.03% +0.12% +0.23%] index_select linear : Elapsed 0.089 ms (8.857 ms / 100) 8.845 -> 8.857 ( +0.14%) [ +0.16% +0.09% +0.00% / +0.34% +0.26% +0.14%] index_select reverse : Elapsed 0.089 ms (8.859 ms / 100) 8.796 -> 8.827 ( +0.35%) [ +0.02% +0.00% +0.35% / +0.41% +0.35% +0.61%] index_select skip64 : Elapsed 0.088 ms (8.798 ms / 100) 8.794 -> 8.812 ( +0.20%) [ +0.24% +0.14% +0.00% / +0.20% +0.43% +0.57%] index_select skip256 : Elapsed 0.088 ms (8.815 ms / 100) 8.858 -> 8.867 ( +0.10%) [ +0.14% +0.08% +0.00% / +0.12% +0.10% +0.18%] index_select spread : Elapsed 0.089 ms (8.870 ms / 100) 8.864 -> 8.862 ( -0.02%) [ +0.11% +0.07% +0.00% / +0.10% +0.12% -0.02%] index_select strided 3 : Elapsed 0.089 ms (8.874 ms / 100) 8.870 -> 8.859 ( -0.12%) [ +0.00% +0.15% +0.05% / +0.02% +0.21% -0.12%] index_select random : Elapsed 0.089 ms (8.870 ms / 100) 8.872 -> 8.862 ( -0.11%) [ +0.17% +0.00% +0.07% / -0.01% -0.11% +0.05%] index_select random_sorted : Elapsed 0.089 ms (8.887 ms / 100) B = [16, 20, 40, 5] (stride (1, 640, 16, 12800)) A = [16, 4, 40, 5] (stride (4, 1, 64, 2560)) dim = 1 2.248 -> 2.254 ( +0.27%) [ +0.27% +0.40% +0.00% / +0.27% +0.71% +0.67%] index_add_ linear : Elapsed 0.023 ms (2.254 ms / 100) 2.199 -> 2.200 ( +0.05%) [ +0.18% +0.00% +0.05% / +0.05% +0.32% +0.32%] index_copy_ linear : Elapsed 0.022 ms (2.203 ms / 100) 2.252 -> 2.254 ( +0.09%) [ +0.00% +0.27% +0.04% / +0.09% +0.22% +0.09%] index_add_ reverse : Elapsed 0.023 ms (2.252 ms / 100) 2.198 -> 2.198 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.09% +0.27%] index_copy_ reverse : Elapsed 0.022 ms (2.198 ms / 100) 2.250 -> 2.252 ( +0.09%) [ +0.13% +0.00% +0.18% / +0.09% +0.58% +0.67%] index_add_ spread : Elapsed 0.023 ms (2.253 ms / 100) 2.198 -> 2.194 ( -0.18%) [ +0.00% +0.05% +0.05% / -0.18% +0.09% +0.32%] index_copy_ spread : Elapsed 0.022 ms (2.198 ms / 100) 2.250 -> 2.249 ( -0.04%) [ +0.04% +0.22% +0.00% / -0.04% +0.67% +0.62%] index_add_ strided 3 : Elapsed 0.023 ms (2.251 ms / 100) 2.195 -> 2.191 ( -0.18%) [ +0.27% +0.09% +0.00% / -0.18% +0.73% +0.59%] index_copy_ strided 3 : Elapsed 0.022 ms (2.201 ms / 100) 2.251 -> 2.251 ( +0.00%) [ +0.13% +0.18% +0.00% / +0.00% +0.44% +0.44%] index_add_ strided 7 : Elapsed 0.023 ms (2.254 ms / 100) 2.200 -> 2.199 ( -0.05%) [ +0.00% +0.00% +0.09% / -0.05% +0.18% +0.09%] index_copy_ strided 7 : Elapsed 0.022 ms (2.200 ms / 100) 2.248 -> 2.252 ( +0.18%) [ +0.13% +0.04% +0.00% / +0.18% +0.62% +0.71%] index_add_ perm : Elapsed 0.023 ms (2.251 ms / 100) 2.194 -> 2.196 ( +0.09%) [ +0.09% +0.00% +0.14% / +0.09% +0.77% +0.68%] index_copy_ perm : Elapsed 0.022 ms (2.196 ms / 100) 2.251 -> 2.253 ( +0.09%) [ +0.40% +0.04% +0.00% / +0.09% +0.58% +0.76%] index_add_ perm_sorted : Elapsed 0.023 ms (2.260 ms / 100) 2.192 -> 2.198 ( +0.27%) [ +0.41% +0.00% +0.23% / +0.27% +0.91% +0.78%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.201 ms / 100) 9.216 -> 9.248 ( +0.35%) [ +0.13% +0.20% +0.00% / +0.35% +0.36% +0.35%] index_select const : Elapsed 0.092 ms (9.228 ms / 100) 9.233 -> 9.239 ( +0.06%) [ +0.13% +0.10% +0.00% / +0.06% +0.18% +0.15%] index_select wrap : Elapsed 0.092 ms (9.245 ms / 100) 9.225 -> 9.231 ( +0.07%) [ +0.02% +0.00% +0.09% / +0.07% +0.23% +0.38%] index_select linear : Elapsed 0.092 ms (9.227 ms / 100) 9.225 -> 9.229 ( +0.04%) [ +0.20% +0.00% +0.03% / +0.04% +0.28% +0.15%] index_select reverse : Elapsed 0.092 ms (9.243 ms / 100) 9.207 -> 9.234 ( +0.29%) [ +0.16% +0.42% +0.00% / +0.31% +0.29% +0.56%] index_select skip64 : Elapsed 0.092 ms (9.222 ms / 100) 9.237 -> 9.243 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.11% +0.10% +0.06%] index_select skip256 : Elapsed 0.092 ms (9.237 ms / 100) 9.224 -> 9.236 ( +0.13%) [ +0.00% +0.26% +0.04% / +0.13% +0.47% +0.20%] index_select spread : Elapsed 0.092 ms (9.224 ms / 100) 9.232 -> 9.239 ( +0.08%) [ +0.00% +0.12% +0.21% / +0.27% +0.14% +0.08%] index_select strided 3 : Elapsed 0.092 ms (9.232 ms / 100) 9.235 -> 9.236 ( +0.01%) [ +0.01% +0.00% +0.09% / +0.01% +0.08% +0.22%] index_select random : Elapsed 0.092 ms (9.236 ms / 100) 9.230 -> 9.238 ( +0.09%) [ +0.03% +0.17% +0.00% / +0.09% +0.27% +0.17%] index_select random_sorted : Elapsed 0.092 ms (9.233 ms / 100) out_shape = [16, 4, 20, 5] in_shape = [16, 4, 40, 5] idx_dim = 2 B = [16, 4, 20, 5] (stride (1, 1600, 80, 16)) A = [16, 4, 40, 5] (stride (200, 3200, 1, 40)) dim = 2 2.449 -> 2.450 ( +0.04%) [ +0.16% +0.12% +0.00% / +0.04% +0.20% +0.16%] index_select const : Elapsed 0.025 ms (2.453 ms / 100) 2.459 -> 2.460 ( +0.04%) [ +0.00% +0.08% +0.12% / +0.08% +0.04% +0.04%] index_select wrap : Elapsed 0.025 ms (2.459 ms / 100) 2.459 -> 2.459 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.16% +0.04% +0.00%] index_select linear : Elapsed 0.025 ms (2.461 ms / 100) 2.456 -> 2.459 ( +0.12%) [ +0.20% +0.20% +0.00% / +0.20% +0.16% +0.12%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.452 -> 2.452 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.04% +0.00% +0.12%] index_select skip64 : Elapsed 0.025 ms (2.454 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.08% +0.20% +0.00% / +0.00% +0.41% +0.37%] index_select skip256 : Elapsed 0.025 ms (2.452 ms / 100) 2.463 -> 2.466 ( +0.12%) [ +0.00% +0.28% +0.04% / +0.12% +0.24% +0.20%] index_select spread : Elapsed 0.025 ms (2.463 ms / 100) 2.466 -> 2.467 ( +0.04%) [ +0.00% +0.12% +0.08% / +0.04% +0.20% +0.12%] index_select strided 3 : Elapsed 0.025 ms (2.466 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.20% +0.04% +0.00%] index_select strided 5 : Elapsed 0.025 ms (2.467 ms / 100) 2.465 -> 2.467 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.08% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.466 -> 2.466 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.12% +0.00%] index_select strided 8 : Elapsed 0.025 ms (2.469 ms / 100) 2.469 -> 2.466 ( -0.12%) [ +0.12% +0.00% +0.08% / -0.12% +0.00% -0.12%] index_select strided 16 : Elapsed 0.025 ms (2.472 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.16% +0.00% +0.24% / +0.00% +0.04% +0.16%] index_select random : Elapsed 0.025 ms (2.469 ms / 100) 2.464 -> 2.466 ( +0.08%) [ +0.16% +0.00% +0.12% / +0.20% +0.12% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.468 ms / 100) 2.468 -> 2.466 ( -0.08%) [ +0.12% +0.08% +0.00% / -0.08% -0.04% -0.08%] index_select perm : Elapsed 0.025 ms (2.471 ms / 100) 2.467 -> 2.459 ( -0.32%) [ +0.00% +0.16% +0.12% / +0.16% -0.32% -0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) B = [16, 4, 20, 5] (stride (5, 80, 320, 1)) A = [16, 4, 40, 5] (stride (5, 80, 320, 1)) dim = 2 2.447 -> 2.444 ( -0.12%) [ +0.12% +0.16% +0.00% / -0.12% +0.25% +0.12%] index_select const : Elapsed 0.025 ms (2.450 ms / 100) 2.456 -> 2.454 ( -0.08%) [ +0.00% +0.16% +0.16% / +0.24% -0.08% +0.16%] index_select wrap : Elapsed 0.025 ms (2.456 ms / 100) 2.458 -> 2.455 ( -0.12%) [ +0.04% +0.00% +0.04% / +0.16% -0.12% -0.08%] index_select linear : Elapsed 0.025 ms (2.459 ms / 100) 2.457 -> 2.457 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.00% +0.12%] index_select reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.16% +0.25% +0.00% / -0.08% +0.12% -0.04%] index_select skip64 : Elapsed 0.025 ms (2.451 ms / 100) 2.445 -> 2.448 ( +0.12%) [ +0.16% +0.00% +0.08% / +0.12% +0.25% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.16% +0.04% +0.29%] index_select spread : Elapsed 0.025 ms (2.455 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.29% +0.33%] index_select strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.447 ( -0.12%) [ +0.00% +0.12% +0.16% / -0.12% +0.20% +0.16%] index_select strided 5 : Elapsed 0.024 ms (2.450 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.04% +0.20% +0.00% / +0.00% +0.29% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.456 ms / 100) 2.447 -> 2.449 ( +0.08%) [ +0.12% +0.29% +0.00% / +0.08% +0.20% +0.20%] index_select strided 8 : Elapsed 0.025 ms (2.450 ms / 100) 2.447 -> 2.451 ( +0.16%) [ +0.12% +0.08% +0.00% / +0.16% +0.20% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.450 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.12% +0.20% +0.00% / +0.12% +0.04% +0.24%] index_select random : Elapsed 0.025 ms (2.455 ms / 100) 2.455 -> 2.451 ( -0.16%) [ +0.00% +0.04% +0.00% / -0.16% +0.00% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.08% +0.20%] index_select perm : Elapsed 0.025 ms (2.458 ms / 100) 2.454 -> 2.456 ( +0.08%) [ +0.00% +0.04% +0.24% / +0.08% +0.24% +0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.454 ms / 100) B = [16, 4, 20, 5] (stride (1, 16, 320, 64)) A = [16, 4, 40, 5] (stride (20, 1, 320, 4)) dim = 2 2.442 -> 2.442 ( +0.00%) [ +0.25% +0.00% +0.20% / +0.00% +0.33% +0.33%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.458 -> 2.455 ( -0.12%) [ +0.00% +0.08% +0.00% / +0.00% -0.04% -0.12%] index_select wrap : Elapsed 0.025 ms (2.458 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.29% +0.12% +0.00% / +0.00% +0.16% +0.00%] index_select linear : Elapsed 0.025 ms (2.462 ms / 100) 2.458 -> 2.454 ( -0.16%) [ +0.00% +0.04% +0.12% / +0.04% -0.16% -0.16%] index_select reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.447 -> 2.448 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.12% +0.04% +0.25%] index_select skip64 : Elapsed 0.024 ms (2.449 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.29% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.447 ms / 100) 2.452 -> 2.457 ( +0.20%) [ +0.00% +0.12% +0.16% / +0.20% +0.20% +0.24%] index_select spread : Elapsed 0.025 ms (2.452 ms / 100) 2.461 -> 2.453 ( -0.33%) [ +0.08% +0.00% +0.00% / +0.04% -0.28% -0.33%] index_select strided 3 : Elapsed 0.025 ms (2.463 ms / 100) 2.451 -> 2.450 ( -0.04%) [ +0.04% +0.20% +0.00% / +0.24% -0.04% +0.00%] index_select strided 5 : Elapsed 0.025 ms (2.452 ms / 100) 2.453 -> 2.454 ( +0.04%) [ +0.00% +0.00% +0.16% / +0.12% +0.04% +0.04%] index_select strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.448 -> 2.446 ( -0.08%) [ +0.00% +0.12% +0.16% / -0.08% +0.25% +0.16%] index_select strided 8 : Elapsed 0.024 ms (2.448 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.20% +0.08%] index_select strided 16 : Elapsed 0.024 ms (2.449 ms / 100) 2.455 -> 2.458 ( +0.12%) [ +0.29% +0.00% +0.04% / +0.12% +0.12% +0.24%] index_select random : Elapsed 0.025 ms (2.462 ms / 100) 2.459 -> 2.453 ( -0.24%) [ +0.08% +0.00% +0.12% / +0.12% -0.24% -0.20%] index_select random_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.457 -> 2.456 ( -0.04%) [ +0.16% +0.00% +0.12% / +0.20% +0.00% -0.04%] index_select perm : Elapsed 0.025 ms (2.461 ms / 100) 2.457 -> 2.448 ( -0.37%) [ +0.00% +0.28% +0.08% / +0.16% -0.37% -0.33%] index_select perm_sorted : Elapsed 0.025 ms (2.457 ms / 100) B = [16, 4, 20, 5] (stride (80, 20, 1, 1280)) A = [16, 4, 40, 5] (stride (1, 640, 16, 2560)) dim = 2 2.393 -> 2.399 ( +0.25%) [ +0.21% +0.00% +0.13% / +0.25% +0.42% +0.46%] index_select const : Elapsed 0.024 ms (2.398 ms / 100) 2.410 -> 2.403 ( -0.29%) [ +0.12% +0.00% +0.12% / +0.17% -0.29% -0.21%] index_select wrap : Elapsed 0.024 ms (2.413 ms / 100) 2.411 -> 2.405 ( -0.25%) [ +0.17% +0.00% +0.04% / +0.12% -0.25% +0.00%] index_select linear : Elapsed 0.024 ms (2.415 ms / 100) 2.409 -> 2.412 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.21% +0.12% +0.25%] index_select reverse : Elapsed 0.024 ms (2.411 ms / 100) 2.398 -> 2.395 ( -0.13%) [ +0.04% +0.08% +0.00% / +0.00% +0.21% -0.13%] index_select skip64 : Elapsed 0.024 ms (2.399 ms / 100) 2.396 -> 2.395 ( -0.04%) [ +0.21% +0.00% +0.00% / -0.04% +0.29% +0.21%] index_select skip256 : Elapsed 0.024 ms (2.401 ms / 100) 2.411 -> 2.411 ( +0.00%) [ +0.17% +0.12% +0.00% / +0.00% +0.25% +0.25%] index_select spread : Elapsed 0.024 ms (2.415 ms / 100) 2.409 -> 2.413 ( +0.17%) [ +0.12% +0.08% +0.00% / +0.17% +0.17% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.412 ms / 100) 2.401 -> 2.401 ( +0.00%) [ +0.00% +0.25% +0.25% / +0.00% +0.29% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.401 ms / 100) 2.412 -> 2.415 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.12% +0.21% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.401 -> 2.400 ( -0.04%) [ +0.04% +0.00% +0.08% / -0.04% +0.04% +0.21%] index_select strided 8 : Elapsed 0.024 ms (2.402 ms / 100) 2.397 -> 2.402 ( +0.21%) [ +0.00% +0.29% +0.17% / +0.21% +0.29% +0.42%] index_select strided 16 : Elapsed 0.024 ms (2.397 ms / 100) 2.405 -> 2.408 ( +0.12%) [ +0.17% +0.00% +0.04% / +0.12% +0.17% +0.37%] index_select random : Elapsed 0.024 ms (2.409 ms / 100) 2.408 -> 2.408 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.21% +0.04% +0.00%] index_select random_sorted : Elapsed 0.024 ms (2.408 ms / 100) 2.412 -> 2.412 ( +0.00%) [ +0.00% +0.12% +0.08% / +0.00% +0.17% +0.04%] index_select perm : Elapsed 0.024 ms (2.412 ms / 100) 2.414 -> 2.414 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.04% +0.12% +0.00%] index_select perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) B = [16, 4, 20, 5] (stride (1, 320, 16, 1280)) A = [16, 4, 40, 5] (stride (1, 640, 16, 2560)) dim = 2 2.451 -> 2.454 ( +0.12%) [ +0.00% +0.04% +0.00% / +0.12% +0.24% +0.29%] index_select const : Elapsed 0.025 ms (2.451 ms / 100) 2.470 -> 2.470 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.16% +0.00%] index_select wrap : Elapsed 0.025 ms (2.471 ms / 100) 2.470 -> 2.469 ( -0.04%) [ +0.04% +0.00% +0.08% / -0.04% +0.12% +0.04%] index_select linear : Elapsed 0.025 ms (2.471 ms / 100) 2.468 -> 2.469 ( +0.04%) [ +0.00% +0.12% +0.08% / +0.04% +0.12% +0.08%] index_select reverse : Elapsed 0.025 ms (2.468 ms / 100) 2.454 -> 2.451 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.04% -0.12% -0.04%] index_select skip64 : Elapsed 0.025 ms (2.454 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.24% +0.12%] index_select skip256 : Elapsed 0.025 ms (2.452 ms / 100) 2.470 -> 2.471 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.28% +0.04% +0.08%] index_select spread : Elapsed 0.025 ms (2.471 ms / 100) 2.469 -> 2.468 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.04% -0.04% +0.08%] index_select strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.458 -> 2.460 ( +0.08%) [ +0.16% +0.00% +0.33% / +0.37% +0.24% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.462 ms / 100) 2.467 -> 2.470 ( +0.12%) [ +0.00% +0.12% +0.08% / +0.12% +0.12% +0.36%] index_select strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.453 -> 2.457 ( +0.16%) [ +0.16% +0.00% +0.33% / +0.16% +0.24% +0.37%] index_select strided 8 : Elapsed 0.025 ms (2.457 ms / 100) 2.456 -> 2.459 ( +0.12%) [ +0.16% +0.00% +0.04% / +0.12% +0.16% +0.12%] index_select strided 16 : Elapsed 0.025 ms (2.460 ms / 100) 2.466 -> 2.467 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.04% +0.12% +0.20%] index_select random : Elapsed 0.025 ms (2.466 ms / 100) 2.468 -> 2.469 ( +0.04%) [ +0.00% +0.20% +0.04% / +0.04% +0.28% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.468 ms / 100) 2.469 -> 2.468 ( -0.04%) [ +0.28% +0.20% +0.00% / -0.04% +0.04% +0.20%] index_select perm : Elapsed 0.025 ms (2.476 ms / 100) 2.473 -> 2.464 ( -0.36%) [ +0.04% +0.00% +0.00% / +0.00% -0.36% -0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.474 ms / 100) out_shape = [16, 4, 40, 20] in_shape = [16, 4, 40, 5] idx_dim = 3 B = [16, 4, 40, 20] (stride (3200, 20, 80, 1)) A = [16, 4, 40, 5] (stride (1, 80, 320, 16)) dim = 3 2.036 -> 2.033 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% -0.15% +0.05%] index_add_ linear : Elapsed 0.020 ms (2.036 ms / 100) 1.982 -> 1.989 ( +0.35%) [ +0.00% +0.10% +0.45% / +0.35% +0.76% +0.55%] index_copy_ linear : Elapsed 0.020 ms (1.982 ms / 100) 2.034 -> 2.035 ( +0.05%) [ +0.00% +0.15% +0.05% / +0.05% +0.05% +0.10%] index_add_ reverse : Elapsed 0.020 ms (2.034 ms / 100) 1.982 -> 1.984 ( +0.10%) [ +0.00% +0.10% +0.35% / +0.10% +0.55% +0.40%] index_copy_ reverse : Elapsed 0.020 ms (1.982 ms / 100) 2.048 -> 2.048 ( +0.00%) [ +0.05% +0.15% +0.00% / +0.00% +0.24% +0.29%] index_add_ spread : Elapsed 0.020 ms (2.049 ms / 100) 2.010 -> 2.013 ( +0.15%) [ +0.00% +0.20% +0.10% / +0.15% +0.30% +0.20%] index_copy_ spread : Elapsed 0.020 ms (2.010 ms / 100) 2.046 -> 2.047 ( +0.05%) [ +0.10% +0.00% +0.10% / +0.15% +0.05% +0.34%] index_add_ strided 3 : Elapsed 0.020 ms (2.048 ms / 100) 2.008 -> 2.010 ( +0.10%) [ +0.00% +0.05% +0.00% / +0.40% +0.30% +0.10%] index_copy_ strided 3 : Elapsed 0.020 ms (2.008 ms / 100) 2.047 -> 2.044 ( -0.15%) [ +0.05% +0.24% +0.00% / -0.15% +0.24% +0.15%] index_add_ strided 7 : Elapsed 0.020 ms (2.048 ms / 100) 2.004 -> 2.011 ( +0.35%) [ +0.00% +0.15% +0.40% / +0.35% +0.45% +0.50%] index_copy_ strided 7 : Elapsed 0.020 ms (2.004 ms / 100) 2.050 -> 2.051 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.24% +0.05% +0.15%] index_add_ perm : Elapsed 0.021 ms (2.050 ms / 100) 2.006 -> 2.013 ( +0.35%) [ +0.00% +0.35% +0.45% / +0.40% +0.35% +0.55%] index_copy_ perm : Elapsed 0.020 ms (2.006 ms / 100) 2.053 -> 2.050 ( -0.15%) [ +0.00% +0.15% +0.10% / -0.15% +0.10% +0.15%] index_add_ perm_sorted : Elapsed 0.021 ms (2.053 ms / 100) 2.011 -> 2.008 ( -0.15%) [ +0.05% +0.00% +0.00% / -0.15% +0.25% +0.10%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.012 ms / 100) 8.587 -> 8.594 ( +0.08%) [ +0.05% +0.16% +0.00% / +0.08% +0.33% +0.34%] index_select const : Elapsed 0.086 ms (8.591 ms / 100) 8.611 -> 8.626 ( +0.17%) [ +0.06% +0.28% +0.00% / +0.17% +0.43% +0.30%] index_select wrap : Elapsed 0.086 ms (8.616 ms / 100) 8.609 -> 8.618 ( +0.10%) [ +0.10% +0.15% +0.00% / +0.10% +0.26% +0.60%] index_select linear : Elapsed 0.086 ms (8.618 ms / 100) 8.601 -> 8.598 ( -0.03%) [ +0.20% +0.10% +0.00% / -0.03% +0.53% +0.34%] index_select reverse : Elapsed 0.086 ms (8.618 ms / 100) 8.581 -> 8.578 ( -0.03%) [ +0.10% +0.00% +0.06% / -0.03% +0.40% +0.47%] index_select skip64 : Elapsed 0.086 ms (8.590 ms / 100) 8.580 -> 8.596 ( +0.19%) [ +0.00% +0.17% +0.05% / +0.19% +0.38% +0.58%] index_select skip256 : Elapsed 0.086 ms (8.580 ms / 100) 8.629 -> 8.651 ( +0.25%) [ +0.05% +0.22% +0.00% / +0.25% +0.65% +0.54%] index_select spread : Elapsed 0.086 ms (8.633 ms / 100) 8.611 -> 8.620 ( +0.10%) [ +0.05% +0.08% +0.00% / +0.10% +0.23% +0.22%] index_select strided 3 : Elapsed 0.086 ms (8.615 ms / 100) 8.630 -> 8.637 ( +0.08%) [ +0.14% +0.00% +0.20% / +0.08% +0.44% +0.61%] index_select random : Elapsed 0.086 ms (8.642 ms / 100) 8.630 -> 8.648 ( +0.21%) [ +0.16% +0.00% +0.16% / +0.21% +0.46% +0.37%] index_select random_sorted : Elapsed 0.086 ms (8.644 ms / 100) B = [16, 4, 40, 20] (stride (3200, 1, 80, 4)) A = [16, 4, 40, 5] (stride (800, 200, 5, 1)) dim = 3 1.643 -> 1.643 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.30% +0.49%] index_add_ linear : Elapsed 0.016 ms (1.644 ms / 100) 1.598 -> 1.604 ( +0.38%) [ +0.38% +0.38% +0.00% / +0.38% +0.81% +0.94%] index_copy_ linear : Elapsed 0.016 ms (1.604 ms / 100) 1.642 -> 1.639 ( -0.18%) [ +0.18% +0.06% +0.00% / -0.18% +0.49% +0.49%] index_add_ reverse : Elapsed 0.016 ms (1.645 ms / 100) 1.602 -> 1.603 ( +0.06%) [ +0.25% +0.06% +0.00% / +0.06% +0.19% +0.44%] index_copy_ reverse : Elapsed 0.016 ms (1.606 ms / 100) 1.679 -> 1.672 ( -0.42%) [ +0.06% +0.12% +0.00% / +0.36% -0.30% -0.42%] index_add_ spread : Elapsed 0.017 ms (1.680 ms / 100) 1.642 -> 1.637 ( -0.30%) [ +0.00% +0.30% +0.06% / +0.00% -0.30% -0.06%] index_copy_ spread : Elapsed 0.016 ms (1.642 ms / 100) 1.682 -> 1.672 ( -0.59%) [ +0.06% +0.00% +0.06% / -0.06% -0.59% -0.54%] index_add_ strided 3 : Elapsed 0.017 ms (1.683 ms / 100) 1.641 -> 1.637 ( -0.24%) [ +0.24% +0.06% +0.00% / +0.00% -0.18% -0.24%] index_copy_ strided 3 : Elapsed 0.016 ms (1.645 ms / 100) 1.667 -> 1.658 ( -0.54%) [ +0.00% +0.12% +0.06% / +0.12% -0.48% -0.54%] index_add_ strided 7 : Elapsed 0.017 ms (1.667 ms / 100) 1.627 -> 1.621 ( -0.37%) [ +0.00% +0.25% +0.00% / +0.06% -0.25% -0.37%] index_copy_ strided 7 : Elapsed 0.016 ms (1.627 ms / 100) 1.675 -> 1.667 ( -0.48%) [ +0.00% +0.12% +0.00% / +0.00% -0.24% -0.48%] index_add_ perm : Elapsed 0.017 ms (1.675 ms / 100) 1.637 -> 1.630 ( -0.43%) [ +0.00% +0.12% +0.06% / -0.12% -0.43% -0.31%] index_copy_ perm : Elapsed 0.016 ms (1.637 ms / 100) 1.673 -> 1.667 ( -0.36%) [ +0.00% +0.12% +0.18% / -0.24% -0.36% -0.24%] index_add_ perm_sorted : Elapsed 0.017 ms (1.673 ms / 100) 1.632 -> 1.628 ( -0.25%) [ +0.18% +0.12% +0.00% / +0.00% -0.06% -0.25%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.635 ms / 100) 7.940 -> 7.941 ( +0.01%) [ +0.00% +0.01% +0.00% / +0.01% +0.18% +0.33%] index_select const : Elapsed 0.079 ms (7.940 ms / 100) 7.930 -> 7.952 ( +0.28%) [ +0.00% +0.14% +0.08% / +0.28% +0.29% +0.32%] index_select wrap : Elapsed 0.079 ms (7.930 ms / 100) 7.939 -> 7.942 ( +0.04%) [ +0.03% +0.00% +0.10% / +0.15% +0.15% +0.04%] index_select linear : Elapsed 0.079 ms (7.941 ms / 100) 7.938 -> 7.941 ( +0.04%) [ +0.10% +0.00% +0.31% / +0.06% +0.08% +0.04%] index_select reverse : Elapsed 0.079 ms (7.946 ms / 100) 7.925 -> 7.934 ( +0.11%) [ +0.30% +0.19% +0.00% / +0.11% +0.26% +0.29%] index_select skip64 : Elapsed 0.079 ms (7.949 ms / 100) 7.931 -> 7.948 ( +0.21%) [ +0.38% +0.10% +0.00% / +0.21% +0.28% +0.35%] index_select skip256 : Elapsed 0.080 ms (7.961 ms / 100) 7.937 -> 7.931 ( -0.08%) [ +0.16% +0.05% +0.00% / -0.08% +0.16% +0.21%] index_select spread : Elapsed 0.079 ms (7.950 ms / 100) 7.938 -> 7.948 ( +0.13%) [ +0.13% +0.00% +0.08% / +0.13% +0.30% +0.30%] index_select strided 3 : Elapsed 0.079 ms (7.948 ms / 100) 7.932 -> 7.944 ( +0.15%) [ +0.00% +0.04% +0.00% / +0.15% +0.15% +0.33%] index_select random : Elapsed 0.079 ms (7.932 ms / 100) 7.931 -> 7.944 ( +0.16%) [ +0.00% +0.15% +0.08% / +0.24% +0.30% +0.16%] index_select random_sorted : Elapsed 0.079 ms (7.931 ms / 100) B = [16, 4, 40, 20] (stride (1, 12800, 320, 16)) A = [16, 4, 40, 5] (stride (5, 80, 320, 1)) dim = 3 1.856 -> 1.859 ( +0.16%) [ +0.05% +0.00% +0.16% / +0.16% +1.13% +1.13%] index_add_ linear : Elapsed 0.019 ms (1.857 ms / 100) 1.802 -> 1.805 ( +0.17%) [ +0.00% +0.00% +0.17% / +0.17% +1.05% +0.89%] index_copy_ linear : Elapsed 0.018 ms (1.802 ms / 100) 1.856 -> 1.858 ( +0.11%) [ +0.00% +0.16% +0.22% / +0.11% +1.19% +0.92%] index_add_ reverse : Elapsed 0.019 ms (1.856 ms / 100) 1.802 -> 1.805 ( +0.17%) [ +0.00% +0.11% +0.11% / +0.17% +0.78% +1.11%] index_copy_ reverse : Elapsed 0.018 ms (1.802 ms / 100) 1.870 -> 1.877 ( +0.37%) [ +0.00% +0.16% +0.21% / +0.37% +0.75% +0.75%] index_add_ spread : Elapsed 0.019 ms (1.870 ms / 100) 1.813 -> 1.816 ( +0.17%) [ +0.00% +0.11% +0.22% / +0.17% +0.94% +0.77%] index_copy_ spread : Elapsed 0.018 ms (1.813 ms / 100) 1.863 -> 1.865 ( +0.11%) [ +0.05% +0.00% +0.21% / +0.11% +1.07% +0.91%] index_add_ strided 3 : Elapsed 0.019 ms (1.864 ms / 100) 1.809 -> 1.811 ( +0.11%) [ +0.17% +0.00% +0.00% / +0.11% +1.00% +0.88%] index_copy_ strided 3 : Elapsed 0.018 ms (1.812 ms / 100) 1.861 -> 1.863 ( +0.11%) [ +0.11% +0.00% +0.21% / +0.11% +1.02% +1.18%] index_add_ strided 7 : Elapsed 0.019 ms (1.863 ms / 100) 1.805 -> 1.810 ( +0.28%) [ +0.17% +0.00% +0.28% / +0.28% +1.00% +1.16%] index_copy_ strided 7 : Elapsed 0.018 ms (1.808 ms / 100) 1.863 -> 1.867 ( +0.21%) [ +0.00% +0.16% +0.21% / +0.21% +0.97% +0.91%] index_add_ perm : Elapsed 0.019 ms (1.863 ms / 100) 1.806 -> 1.809 ( +0.17%) [ +0.00% +0.06% +0.22% / +0.17% +1.05% +1.00%] index_copy_ perm : Elapsed 0.018 ms (1.806 ms / 100) 1.861 -> 1.867 ( +0.32%) [ +0.11% +0.00% +0.27% / +0.32% +0.97% +1.02%] index_add_ perm_sorted : Elapsed 0.019 ms (1.863 ms / 100) 1.805 -> 1.812 ( +0.39%) [ +0.00% +0.22% +0.33% / +0.39% +1.11% +1.00%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.805 ms / 100) 8.258 -> 8.240 ( -0.22%) [ +0.04% +0.00% +0.04% / +0.04% -0.22% -0.07%] index_select const : Elapsed 0.083 ms (8.261 ms / 100) 8.236 -> 8.237 ( +0.01%) [ +0.10% +0.00% +0.12% / +0.01% +0.13% +0.01%] index_select wrap : Elapsed 0.082 ms (8.244 ms / 100) 8.250 -> 8.244 ( -0.07%) [ +0.01% +0.00% +0.05% / -0.02% -0.07% +0.10%] index_select linear : Elapsed 0.083 ms (8.251 ms / 100) 8.244 -> 8.237 ( -0.08%) [ +0.06% +0.00% +0.17% / -0.08% +0.11% +0.21%] index_select reverse : Elapsed 0.082 ms (8.249 ms / 100) 8.239 -> 8.246 ( +0.08%) [ +0.08% +0.00% +0.02% / +0.08% +0.11% +0.25%] index_select skip64 : Elapsed 0.082 ms (8.246 ms / 100) 8.237 -> 8.247 ( +0.12%) [ +0.00% +0.39% +0.21% / +0.12% +0.19% +0.39%] index_select skip256 : Elapsed 0.082 ms (8.237 ms / 100) 8.251 -> 8.240 ( -0.13%) [ +0.00% +0.10% +0.24% / -0.13% +0.02% +0.08%] index_select spread : Elapsed 0.083 ms (8.251 ms / 100) 8.245 -> 8.252 ( +0.08%) [ +0.19% +0.00% +0.02% / +0.19% +0.08% +0.24%] index_select strided 3 : Elapsed 0.083 ms (8.261 ms / 100) 8.250 -> 8.240 ( -0.12%) [ +0.33% +0.00% +0.06% / +0.04% +0.19% -0.12%] index_select random : Elapsed 0.083 ms (8.277 ms / 100) 8.247 -> 8.246 ( -0.01%) [ +0.00% +0.22% +0.16% / -0.01% +0.21% +0.00%] index_select random_sorted : Elapsed 0.082 ms (8.247 ms / 100) B = [16, 4, 40, 20] (stride (80, 20, 1280, 1)) A = [16, 4, 40, 5] (stride (160, 1, 4, 2560)) dim = 3 1.801 -> 1.790 ( -0.61%) [ +0.11% +0.00% +0.06% / +0.00% -0.61% -0.56%] index_add_ linear : Elapsed 0.018 ms (1.803 ms / 100) 1.760 -> 1.757 ( -0.17%) [ +0.17% +0.00% +0.28% / +0.11% +0.00% -0.17%] index_copy_ linear : Elapsed 0.018 ms (1.763 ms / 100) 1.794 -> 1.785 ( -0.50%) [ +0.00% +0.17% +0.17% / -0.17% -0.17% -0.50%] index_add_ reverse : Elapsed 0.018 ms (1.794 ms / 100) 1.760 -> 1.749 ( -0.62%) [ +0.00% +0.40% +0.11% / +0.00% -0.62% -0.28%] index_copy_ reverse : Elapsed 0.018 ms (1.760 ms / 100) 1.815 -> 1.809 ( -0.33%) [ +0.00% +0.00% +0.17% / -0.17% -0.33% -0.33%] index_add_ spread : Elapsed 0.018 ms (1.815 ms / 100) 1.785 -> 1.779 ( -0.34%) [ +0.34% +0.00% +0.17% / +0.22% -0.22% -0.34%] index_copy_ spread : Elapsed 0.018 ms (1.791 ms / 100) 1.811 -> 1.806 ( -0.28%) [ +0.11% +0.00% +0.06% / +0.17% -0.17% -0.28%] index_add_ strided 3 : Elapsed 0.018 ms (1.813 ms / 100) 1.791 -> 1.781 ( -0.56%) [ +0.00% +0.00% +0.00% / -0.11% -0.56% -0.50%] index_copy_ strided 3 : Elapsed 0.018 ms (1.791 ms / 100) 1.809 -> 1.807 ( -0.11%) [ +0.11% +0.17% +0.00% / +0.17% -0.11% -0.06%] index_add_ strided 7 : Elapsed 0.018 ms (1.811 ms / 100) 1.786 -> 1.779 ( -0.39%) [ +0.00% +0.28% +0.00% / -0.39% -0.34% -0.34%] index_copy_ strided 7 : Elapsed 0.018 ms (1.786 ms / 100) 1.814 -> 1.804 ( -0.55%) [ +0.06% +0.00% +0.17% / +0.00% -0.55% -0.33%] index_add_ perm : Elapsed 0.018 ms (1.815 ms / 100) 1.785 -> 1.776 ( -0.50%) [ +0.00% +0.17% +0.34% / +0.06% -0.28% -0.50%] index_copy_ perm : Elapsed 0.018 ms (1.785 ms / 100) 1.814 -> 1.809 ( -0.28%) [ +0.00% +0.06% +0.33% / +0.06% -0.28% -0.17%] index_add_ perm_sorted : Elapsed 0.018 ms (1.814 ms / 100) 1.788 -> 1.781 ( -0.39%) [ +0.11% +0.06% +0.00% / -0.17% -0.06% -0.39%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.790 ms / 100) 8.248 -> 8.242 ( -0.07%) [ +0.01% +0.00% +0.05% / -0.07% +0.33% +0.13%] index_select const : Elapsed 0.082 ms (8.249 ms / 100) 8.279 -> 8.292 ( +0.16%) [ +0.00% +0.07% +0.50% / +0.16% +0.53% +0.54%] index_select wrap : Elapsed 0.083 ms (8.279 ms / 100) 8.275 -> 8.276 ( +0.01%) [ +0.25% +0.06% +0.00% / +0.01% +0.36% +0.48%] index_select linear : Elapsed 0.083 ms (8.296 ms / 100) 8.264 -> 8.261 ( -0.04%) [ +0.00% +0.05% +0.05% / -0.04% +0.16% +0.12%] index_select reverse : Elapsed 0.083 ms (8.264 ms / 100) 8.242 -> 8.260 ( +0.22%) [ +0.16% +0.16% +0.00% / +0.22% +0.24% +0.27%] index_select skip64 : Elapsed 0.083 ms (8.255 ms / 100) 8.245 -> 8.256 ( +0.13%) [ +0.00% +0.28% +0.24% / +0.13% +0.30% +0.45%] index_select skip256 : Elapsed 0.082 ms (8.245 ms / 100) 8.253 -> 8.266 ( +0.16%) [ +0.30% +0.00% +0.28% / +0.16% +0.78% +0.52%] index_select spread : Elapsed 0.083 ms (8.278 ms / 100) 8.283 -> 8.291 ( +0.10%) [ +0.00% +0.12% +0.10% / +0.10% +0.43% +0.21%] index_select strided 3 : Elapsed 0.083 ms (8.283 ms / 100) 8.276 -> 8.286 ( +0.12%) [ +0.06% +0.18% +0.00% / +0.12% +0.48% +0.53%] index_select random : Elapsed 0.083 ms (8.281 ms / 100) 8.265 -> 8.275 ( +0.12%) [ +0.07% +0.00% +0.04% / +0.12% +0.46% +0.38%] index_select random_sorted : Elapsed 0.083 ms (8.271 ms / 100) B = [16, 4, 40, 20] (stride (4, 1, 1280, 64)) A = [16, 4, 40, 5] (stride (1, 3200, 16, 640)) dim = 3 1.771 -> 1.771 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.17% +0.00% +0.00%] index_add_ linear : Elapsed 0.018 ms (1.771 ms / 100) 1.727 -> 1.725 ( -0.12%) [ +0.00% +0.00% +0.06% / -0.12% +0.17% -0.06%] index_copy_ linear : Elapsed 0.017 ms (1.727 ms / 100) 1.781 -> 1.781 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.00% +0.00% +0.17%] index_add_ reverse : Elapsed 0.018 ms (1.783 ms / 100) 1.742 -> 1.738 ( -0.23%) [ +0.06% +0.06% +0.00% / +0.11% -0.06% -0.23%] index_copy_ reverse : Elapsed 0.017 ms (1.743 ms / 100) 1.774 -> 1.777 ( +0.17%) [ +0.34% +0.23% +0.00% / +0.23% +0.17% +0.17%] index_add_ spread : Elapsed 0.018 ms (1.780 ms / 100) 1.734 -> 1.729 ( -0.29%) [ +0.12% +0.06% +0.00% / -0.29% +0.12% -0.23%] index_copy_ spread : Elapsed 0.017 ms (1.736 ms / 100) 1.771 -> 1.776 ( +0.28%) [ +0.51% +0.56% +0.00% / +0.28% +0.56% +0.85%] index_add_ strided 3 : Elapsed 0.018 ms (1.780 ms / 100) 1.730 -> 1.731 ( +0.06%) [ +0.12% +0.23% +0.00% / +0.06% +0.58% +0.40%] index_copy_ strided 3 : Elapsed 0.017 ms (1.732 ms / 100) 1.776 -> 1.775 ( -0.06%) [ +0.00% +0.23% +0.00% / -0.06% +0.34% +0.34%] index_add_ strided 7 : Elapsed 0.018 ms (1.776 ms / 100) 1.729 -> 1.732 ( +0.17%) [ +0.23% +0.06% +0.00% / +0.17% +0.29% +0.40%] index_copy_ strided 7 : Elapsed 0.017 ms (1.733 ms / 100) 1.760 -> 1.763 ( +0.17%) [ +0.06% +0.23% +0.00% / +0.17% +0.45% +0.34%] index_add_ perm : Elapsed 0.018 ms (1.761 ms / 100) 1.715 -> 1.716 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.35% +0.35%] index_copy_ perm : Elapsed 0.017 ms (1.715 ms / 100) 1.762 -> 1.763 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.51% +0.51%] index_add_ perm_sorted : Elapsed 0.018 ms (1.762 ms / 100) 1.718 -> 1.719 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.41% +0.35%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.719 ms / 100) 8.192 -> 8.204 ( +0.15%) [ +0.22% +0.28% +0.00% / +0.15% +0.16% +0.15%] index_select const : Elapsed 0.082 ms (8.210 ms / 100) 8.230 -> 8.222 ( -0.10%) [ +0.16% +0.00% +0.10% / -0.10% -0.07% +0.11%] index_select wrap : Elapsed 0.082 ms (8.243 ms / 100) 8.210 -> 8.221 ( +0.13%) [ +0.00% +0.07% +0.17% / +0.13% +0.21% +0.44%] index_select linear : Elapsed 0.082 ms (8.210 ms / 100) 8.212 -> 8.211 ( -0.01%) [ +0.27% +0.00% +0.19% / +0.18% -0.01% -0.01%] index_select reverse : Elapsed 0.082 ms (8.234 ms / 100) 8.183 -> 8.196 ( +0.16%) [ +0.00% +0.26% +0.10% / +0.27% +0.29% +0.16%] index_select skip64 : Elapsed 0.082 ms (8.183 ms / 100) 8.190 -> 8.193 ( +0.04%) [ +0.00% +0.10% +0.24% / +0.04% +0.07% +0.05%] index_select skip256 : Elapsed 0.082 ms (8.190 ms / 100) 8.217 -> 8.223 ( +0.07%) [ +0.18% +0.00% +0.21% / +0.07% +0.12% +0.16%] index_select spread : Elapsed 0.082 ms (8.232 ms / 100) 8.219 -> 8.224 ( +0.06%) [ +0.27% +0.13% +0.00% / +0.18% +0.06% +0.24%] index_select strided 3 : Elapsed 0.082 ms (8.241 ms / 100) 8.219 -> 8.221 ( +0.02%) [ +0.28% +0.29% +0.00% / +0.27% +0.02% +0.10%] index_select random : Elapsed 0.082 ms (8.242 ms / 100) 8.209 -> 8.212 ( +0.04%) [ +0.18% +0.00% +0.18% / +0.21% +0.04% +0.34%] index_select random_sorted : Elapsed 0.082 ms (8.224 ms / 100) B = [16, 4, 40, 20] (stride (4, 1, 1280, 64)) A = [16, 4, 40, 5] (stride (4, 1, 320, 64)) dim = 3 0.712 -> 0.713 ( +0.14%) [ +0.00% +0.14% +0.14% / +0.14% +1.83% +1.83%] index_add_ linear : Elapsed 0.007 ms (0.712 ms / 100) 0.740 -> 0.740 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +1.49% +1.49%] index_copy_ linear : Elapsed 0.007 ms (0.740 ms / 100) 0.710 -> 0.708 ( -0.28%) [ +0.14% +0.42% +0.00% / +0.28% -0.28% +0.00%] index_add_ reverse : Elapsed 0.007 ms (0.711 ms / 100) 0.730 -> 0.729 ( -0.14%) [ +0.00% +0.14% +0.00% / +0.14% -0.14% -0.14%] index_copy_ reverse : Elapsed 0.007 ms (0.730 ms / 100) 0.711 -> 0.711 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.14% +0.00% +0.00%] index_add_ spread : Elapsed 0.007 ms (0.711 ms / 100) 0.745 -> 0.745 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.67% +0.40%] index_copy_ spread : Elapsed 0.007 ms (0.746 ms / 100) 0.712 -> 0.710 ( -0.28%) [ +0.28% +0.28% +0.00% / +0.14% -0.28% -0.14%] index_add_ strided 3 : Elapsed 0.007 ms (0.714 ms / 100) 0.752 -> 0.749 ( -0.40%) [ +0.00% +0.13% +0.00% / +0.27% -0.27% -0.40%] index_copy_ strided 3 : Elapsed 0.008 ms (0.752 ms / 100) 0.718 -> 0.715 ( -0.42%) [ +0.28% +0.42% +0.00% / +0.14% -0.28% -0.42%] index_add_ strided 7 : Elapsed 0.007 ms (0.720 ms / 100) 0.746 -> 0.742 ( -0.54%) [ +0.00% +0.00% +0.00% / +0.13% -0.54% -0.27%] index_copy_ strided 7 : Elapsed 0.007 ms (0.746 ms / 100) 0.710 -> 0.710 ( +0.00%) [ +0.28% +0.00% +0.14% / +0.00% +0.28% +0.42%] index_add_ perm : Elapsed 0.007 ms (0.712 ms / 100) 0.736 -> 0.736 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.82% +0.68%] index_copy_ perm : Elapsed 0.007 ms (0.736 ms / 100) 0.707 -> 0.708 ( +0.14%) [ +0.28% +0.28% +0.00% / +0.14% +0.57% +0.42%] index_add_ perm_sorted : Elapsed 0.007 ms (0.709 ms / 100) 0.739 -> 0.739 ( +0.00%) [ +0.27% +0.27% +0.00% / +0.00% +0.95% +0.68%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.741 ms / 100) 4.964 -> 4.963 ( -0.02%) [ +0.16% +0.00% +0.14% / -0.02% +0.06% +0.10%] index_select const : Elapsed 0.050 ms (4.972 ms / 100) 4.984 -> 4.983 ( -0.02%) [ +0.20% +0.04% +0.00% / -0.02% +0.26% +0.04%] index_select wrap : Elapsed 0.050 ms (4.994 ms / 100) 4.996 -> 4.999 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.18% +0.14%] index_select linear : Elapsed 0.050 ms (5.002 ms / 100) 4.949 -> 4.948 ( -0.02%) [ +0.00% +0.34% +0.32% / -0.02% +0.06% +0.08%] index_select reverse : Elapsed 0.049 ms (4.949 ms / 100) 4.967 -> 4.962 ( -0.10%) [ +0.00% +0.02% +0.26% / +0.10% +0.00% -0.10%] index_select skip64 : Elapsed 0.050 ms (4.967 ms / 100) 4.963 -> 4.963 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.02% +0.00% +0.08%] index_select skip256 : Elapsed 0.050 ms (4.963 ms / 100) 4.984 -> 4.982 ( -0.04%) [ +0.06% +0.00% +0.06% / -0.02% +0.04% -0.04%] index_select spread : Elapsed 0.050 ms (4.987 ms / 100) 4.985 -> 4.981 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.18% +0.02%] index_select strided 3 : Elapsed 0.050 ms (4.985 ms / 100) 4.984 -> 4.984 ( +0.00%) [ +0.00% +0.20% +0.06% / +0.00% +0.14% +0.10%] index_select random : Elapsed 0.050 ms (4.984 ms / 100) 4.973 -> 4.975 ( +0.04%) [ +0.00% +0.06% +0.18% / +0.12% +0.12% +0.04%] index_select random_sorted : Elapsed 0.050 ms (4.973 ms / 100) B = [16, 4, 40, 20] (stride (1, 16, 1280, 64)) A = [16, 4, 40, 5] (stride (1, 16, 320, 64)) dim = 3 1.962 -> 1.963 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.36% +0.25%] index_add_ linear : Elapsed 0.020 ms (1.962 ms / 100) 1.916 -> 1.913 ( -0.16%) [ +0.00% +0.10% +0.21% / -0.16% +0.16% -0.10%] index_copy_ linear : Elapsed 0.019 ms (1.916 ms / 100) 1.973 -> 1.974 ( +0.05%) [ +0.10% +0.00% +0.15% / +0.25% +0.35% +0.05%] index_add_ reverse : Elapsed 0.020 ms (1.975 ms / 100) 1.933 -> 1.928 ( -0.26%) [ +0.10% +0.00% +0.16% / +0.10% -0.26% -0.26%] index_copy_ reverse : Elapsed 0.019 ms (1.935 ms / 100) 1.965 -> 1.967 ( +0.10%) [ +0.00% +0.10% +0.31% / +0.10% +0.41% +0.56%] index_add_ spread : Elapsed 0.020 ms (1.965 ms / 100) 1.918 -> 1.918 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.63% +0.47%] index_copy_ spread : Elapsed 0.019 ms (1.920 ms / 100) 1.962 -> 1.965 ( +0.15%) [ +0.00% +0.15% +0.25% / +0.15% +0.82% +0.92%] index_add_ strided 3 : Elapsed 0.020 ms (1.962 ms / 100) 1.918 -> 1.923 ( +0.26%) [ +0.10% +0.00% +0.10% / +0.26% +0.89% +0.94%] index_copy_ strided 3 : Elapsed 0.019 ms (1.920 ms / 100) 1.983 -> 1.985 ( +0.10%) [ +0.00% +0.00% +0.10% / +0.10% +0.25% +0.45%] index_add_ strided 7 : Elapsed 0.020 ms (1.983 ms / 100) 1.930 -> 1.934 ( +0.21%) [ +0.10% +0.05% +0.00% / +0.21% +0.41% +0.21%] index_copy_ strided 7 : Elapsed 0.019 ms (1.932 ms / 100) 1.961 -> 1.966 ( +0.25%) [ +0.00% +0.05% +0.20% / +0.25% +0.71% +0.76%] index_add_ perm : Elapsed 0.020 ms (1.961 ms / 100) 1.913 -> 1.919 ( +0.31%) [ +0.10% +0.00% +0.10% / +0.31% +0.73% +0.63%] index_copy_ perm : Elapsed 0.019 ms (1.915 ms / 100) 1.957 -> 1.961 ( +0.20%) [ +0.00% +0.10% +0.10% / +0.20% +0.72% +0.72%] index_add_ perm_sorted : Elapsed 0.020 ms (1.957 ms / 100) 1.913 -> 1.912 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.37% +0.37%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.913 ms / 100) 8.545 -> 8.530 ( -0.18%) [ +0.13% +0.19% +0.00% / +0.19% -0.18% -0.13%] index_select const : Elapsed 0.086 ms (8.556 ms / 100) 8.570 -> 8.567 ( -0.04%) [ +0.14% +0.21% +0.00% / -0.04% +0.18% +0.16%] index_select wrap : Elapsed 0.086 ms (8.582 ms / 100) 8.562 -> 8.560 ( -0.02%) [ +0.22% +0.00% +0.22% / +0.30% +0.14% -0.02%] index_select linear : Elapsed 0.086 ms (8.581 ms / 100) 8.563 -> 8.573 ( +0.12%) [ +0.09% +0.14% +0.00% / +0.13% +0.12% +0.13%] index_select reverse : Elapsed 0.086 ms (8.571 ms / 100) 8.533 -> 8.546 ( +0.15%) [ +0.00% +0.40% +0.30% / +0.28% +0.15% +0.18%] index_select skip64 : Elapsed 0.085 ms (8.533 ms / 100) 8.537 -> 8.530 ( -0.08%) [ +0.00% +0.14% +0.15% / +0.08% +0.01% -0.08%] index_select skip256 : Elapsed 0.085 ms (8.537 ms / 100) 8.573 -> 8.594 ( +0.24%) [ +0.01% +0.26% +0.00% / +0.28% +0.24% +0.28%] index_select spread : Elapsed 0.086 ms (8.574 ms / 100) 8.563 -> 8.569 ( +0.07%) [ +0.00% +0.23% +0.39% / +0.25% +0.07% +0.13%] index_select strided 3 : Elapsed 0.086 ms (8.563 ms / 100) 8.555 -> 8.546 ( -0.11%) [ +0.16% +0.21% +0.00% / +0.23% -0.11% +0.22%] index_select random : Elapsed 0.086 ms (8.569 ms / 100) 8.564 -> 8.564 ( +0.00%) [ +0.16% +0.21% +0.00% / +0.26% +0.00% +0.27%] index_select random_sorted : Elapsed 0.086 ms (8.578 ms / 100) B = [16, 4, 40, 20] (stride (4, 1, 64, 2560)) A = [16, 4, 40, 5] (stride (800, 1, 4, 160)) dim = 3 1.713 -> 1.713 ( +0.00%) [ +0.06% +0.00% +0.35% / +0.00% +0.93% +0.88%] index_add_ linear : Elapsed 0.017 ms (1.714 ms / 100) 1.672 -> 1.674 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +1.20% +0.96%] index_copy_ linear : Elapsed 0.017 ms (1.674 ms / 100) 1.712 -> 1.716 ( +0.23%) [ +0.18% +0.53% +0.00% / +0.23% +0.53% +0.35%] index_add_ reverse : Elapsed 0.017 ms (1.715 ms / 100) 1.672 -> 1.671 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.30% +0.42%] index_copy_ reverse : Elapsed 0.017 ms (1.672 ms / 100) 1.701 -> 1.704 ( +0.18%) [ +0.18% +0.12% +0.00% / +0.18% +1.94% +2.23%] index_add_ spread : Elapsed 0.017 ms (1.704 ms / 100) 1.657 -> 1.657 ( +0.00%) [ +0.36% +0.18% +0.00% / +0.00% +1.69% +1.93%] index_copy_ spread : Elapsed 0.017 ms (1.663 ms / 100) 1.713 -> 1.720 ( +0.41%) [ +0.00% +0.12% +0.06% / +0.41% +1.23% +1.52%] index_add_ strided 3 : Elapsed 0.017 ms (1.713 ms / 100) 1.669 -> 1.675 ( +0.36%) [ +0.00% +0.30% +0.06% / +0.36% +1.50% +1.56%] index_copy_ strided 3 : Elapsed 0.017 ms (1.669 ms / 100) 1.710 -> 1.710 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.99% +1.17%] index_add_ strided 7 : Elapsed 0.017 ms (1.711 ms / 100) 1.666 -> 1.665 ( -0.06%) [ +0.06% +0.00% +0.12% / -0.06% +1.08% +1.14%] index_copy_ strided 7 : Elapsed 0.017 ms (1.667 ms / 100) 1.710 -> 1.712 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.18% +0.23%] index_add_ perm : Elapsed 0.017 ms (1.711 ms / 100) 1.666 -> 1.668 ( +0.12%) [ +0.30% +0.18% +0.00% / +0.12% +0.36% +0.54%] index_copy_ perm : Elapsed 0.017 ms (1.671 ms / 100) 1.711 -> 1.711 ( +0.00%) [ +0.00% +0.00% +0.18% / +0.00% +0.23% +0.29%] index_add_ perm_sorted : Elapsed 0.017 ms (1.711 ms / 100) 1.665 -> 1.666 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.24% +0.54%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.667 ms / 100) 8.176 -> 8.187 ( +0.13%) [ +0.09% +0.23% +0.00% / +0.13% +0.18% +0.54%] index_select const : Elapsed 0.082 ms (8.183 ms / 100) 8.213 -> 8.231 ( +0.22%) [ +0.00% +0.21% +0.21% / +0.22% +0.26% +0.23%] index_select wrap : Elapsed 0.082 ms (8.213 ms / 100) 8.217 -> 8.226 ( +0.11%) [ +0.10% +0.00% +0.01% / +0.11% +0.39% +0.15%] index_select linear : Elapsed 0.082 ms (8.225 ms / 100) 8.211 -> 8.201 ( -0.12%) [ +0.02% +0.00% +0.04% / -0.09% -0.12% -0.07%] index_select reverse : Elapsed 0.082 ms (8.213 ms / 100) 8.169 -> 8.197 ( +0.34%) [ +0.00% +0.13% +0.02% / +0.34% +0.61% +0.37%] index_select skip64 : Elapsed 0.082 ms (8.169 ms / 100) 8.183 -> 8.186 ( +0.04%) [ +0.00% +0.05% +0.02% / +0.04% +0.56% +0.06%] index_select skip256 : Elapsed 0.082 ms (8.183 ms / 100) 8.204 -> 8.201 ( -0.04%) [ +0.06% +0.00% +0.00% / -0.04% +0.28% +0.12%] index_select spread : Elapsed 0.082 ms (8.209 ms / 100) 8.212 -> 8.216 ( +0.05%) [ +0.27% +0.46% +0.00% / +0.05% +0.18% +0.22%] index_select strided 3 : Elapsed 0.082 ms (8.234 ms / 100) 8.218 -> 8.220 ( +0.02%) [ +0.07% +0.19% +0.00% / +0.02% +0.39% +0.30%] index_select random : Elapsed 0.082 ms (8.224 ms / 100) 8.186 -> 8.193 ( +0.09%) [ +0.29% +0.00% +0.12% / +0.09% +0.49% +0.43%] index_select random_sorted : Elapsed 0.082 ms (8.210 ms / 100) out_shape = [20, 5, 4, 40] in_shape = [16, 5, 4, 40] idx_dim = 0 B = [20, 5, 4, 40] (stride (800, 40, 200, 1)) A = [16, 5, 4, 40] (stride (1, 2560, 16, 64)) dim = 0 4.419 -> 4.418 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.57% +0.54%] index_add_ linear : Elapsed 0.044 ms (4.420 ms / 100) 4.258 -> 4.259 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.59% +0.63%] index_copy_ linear : Elapsed 0.043 ms (4.259 ms / 100) 4.434 -> 4.435 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.74% +0.72%] index_add_ reverse : Elapsed 0.044 ms (4.434 ms / 100) 4.274 -> 4.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.91% +0.84%] index_copy_ reverse : Elapsed 0.043 ms (4.274 ms / 100) 4.421 -> 4.422 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.63% +0.63%] index_add_ spread : Elapsed 0.044 ms (4.423 ms / 100) 4.258 -> 4.259 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.70% +0.66%] index_copy_ spread : Elapsed 0.043 ms (4.260 ms / 100) 4.427 -> 4.426 ( -0.02%) [ +0.05% +0.02% +0.00% / -0.02% +0.77% +0.70%] index_add_ strided 3 : Elapsed 0.044 ms (4.429 ms / 100) 4.259 -> 4.259 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.77% +0.77%] index_copy_ strided 3 : Elapsed 0.043 ms (4.260 ms / 100) 4.428 -> 4.429 ( +0.02%) [ +0.00% +0.07% +0.05% / +0.02% +0.63% +0.68%] index_add_ strided 7 : Elapsed 0.044 ms (4.428 ms / 100) 4.260 -> 4.261 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.75% +0.73%] index_copy_ strided 7 : Elapsed 0.043 ms (4.261 ms / 100) 4.424 -> 4.426 ( +0.05%) [ +0.05% +0.07% +0.00% / +0.05% +0.50% +0.52%] index_add_ perm : Elapsed 0.044 ms (4.426 ms / 100) 4.263 -> 4.262 ( -0.02%) [ +0.09% +0.07% +0.00% / -0.02% +0.49% +0.49%] index_copy_ perm : Elapsed 0.043 ms (4.267 ms / 100) 4.438 -> 4.436 ( -0.05%) [ +0.00% +0.02% +0.05% / -0.05% +0.59% +0.59%] index_add_ perm_sorted : Elapsed 0.044 ms (4.438 ms / 100) 4.273 -> 4.273 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.66% +0.63%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.273 ms / 100) 5.559 -> 5.557 ( -0.04%) [ +0.16% +0.00% +0.22% / +0.16% +0.13% -0.04%] index_select const : Elapsed 0.056 ms (5.568 ms / 100) 5.564 -> 5.566 ( +0.04%) [ +0.22% +0.20% +0.00% / +0.14% +0.18% +0.04%] index_select wrap : Elapsed 0.056 ms (5.576 ms / 100) 5.572 -> 5.569 ( -0.05%) [ +0.00% +0.14% +0.00% / +0.09% -0.05% -0.04%] index_select linear : Elapsed 0.056 ms (5.572 ms / 100) 5.572 -> 5.568 ( -0.07%) [ +0.04% +0.00% +0.07% / +0.00% -0.07% -0.05%] index_select reverse : Elapsed 0.056 ms (5.574 ms / 100) 5.564 -> 5.563 ( -0.02%) [ +0.00% +0.07% +0.02% / -0.02% +0.00% +0.02%] index_select skip64 : Elapsed 0.056 ms (5.564 ms / 100) 5.556 -> 5.562 ( +0.11%) [ +0.09% +0.16% +0.00% / +0.11% +0.20% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.568 -> 5.565 ( -0.05%) [ +0.16% +0.11% +0.00% / +0.09% +0.11% -0.05%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.571 -> 5.569 ( -0.04%) [ +0.11% +0.00% +0.02% / +0.25% +0.02% -0.04%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.577 -> 5.569 ( -0.14%) [ +0.00% +0.02% +0.00% / +0.02% -0.02% -0.14%] index_select strided 5 : Elapsed 0.056 ms (5.577 ms / 100) 5.574 -> 5.577 ( +0.05%) [ +0.07% +0.00% +0.04% / +0.09% +0.05% +0.20%] index_select strided 7 : Elapsed 0.056 ms (5.578 ms / 100) 5.575 -> 5.573 ( -0.04%) [ +0.00% +0.02% +0.04% / +0.02% +0.07% -0.04%] index_select strided 8 : Elapsed 0.056 ms (5.575 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.20% +0.05% +0.00% / +0.00% +0.07% +0.05%] index_select random : Elapsed 0.056 ms (5.580 ms / 100) 5.571 -> 5.567 ( -0.07%) [ +0.04% +0.05% +0.00% / +0.05% +0.04% -0.07%] index_select random_sorted : Elapsed 0.056 ms (5.573 ms / 100) B = [20, 5, 4, 40] (stride (800, 40, 200, 1)) A = [16, 5, 4, 40] (stride (20, 4, 1, 320)) dim = 0 4.087 -> 4.088 ( +0.02%) [ +0.05% +0.12% +0.00% / +0.02% +0.66% +0.78%] index_add_ linear : Elapsed 0.041 ms (4.089 ms / 100) 3.915 -> 3.913 ( -0.05%) [ +0.23% +0.08% +0.00% / -0.05% +0.61% +0.66%] index_copy_ linear : Elapsed 0.039 ms (3.924 ms / 100) 4.102 -> 4.103 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.66% +0.71%] index_add_ reverse : Elapsed 0.041 ms (4.103 ms / 100) 3.924 -> 3.925 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.74% +0.74%] index_copy_ reverse : Elapsed 0.039 ms (3.924 ms / 100) 4.081 -> 4.081 ( +0.00%) [ +0.00% +0.20% +0.02% / +0.00% +0.81% +0.88%] index_add_ spread : Elapsed 0.041 ms (4.081 ms / 100) 3.917 -> 3.916 ( -0.03%) [ +0.00% +0.13% +0.13% / -0.03% +0.64% +0.59%] index_copy_ spread : Elapsed 0.039 ms (3.917 ms / 100) 4.088 -> 4.088 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.76% +0.78%] index_add_ strided 3 : Elapsed 0.041 ms (4.090 ms / 100) 3.918 -> 3.919 ( +0.03%) [ +0.08% +0.13% +0.00% / +0.03% +0.59% +0.59%] index_copy_ strided 3 : Elapsed 0.039 ms (3.921 ms / 100) 4.101 -> 4.102 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.76% +0.71%] index_add_ strided 7 : Elapsed 0.041 ms (4.101 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.00% +0.08% +0.05% / +0.05% +0.74% +0.79%] index_copy_ strided 7 : Elapsed 0.039 ms (3.923 ms / 100) 4.084 -> 4.087 ( +0.07%) [ +0.12% +0.10% +0.00% / +0.07% +0.83% +0.73%] index_add_ perm : Elapsed 0.041 ms (4.089 ms / 100) 3.914 -> 3.916 ( +0.05%) [ +0.00% +0.13% +0.15% / +0.05% +0.72% +0.61%] index_copy_ perm : Elapsed 0.039 ms (3.914 ms / 100) 4.087 -> 4.088 ( +0.02%) [ +0.10% +0.00% +0.00% / +0.02% +0.78% +0.73%] index_add_ perm_sorted : Elapsed 0.041 ms (4.091 ms / 100) 3.918 -> 3.922 ( +0.10%) [ +0.05% +0.00% +0.05% / +0.10% +0.59% +0.59%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.920 ms / 100) 5.488 -> 5.487 ( -0.02%) [ +0.00% +0.09% +0.09% / +0.04% -0.02% +0.02%] index_select const : Elapsed 0.055 ms (5.488 ms / 100) 5.508 -> 5.509 ( +0.02%) [ +0.16% +0.00% +0.02% / +0.02% +0.11% +0.04%] index_select wrap : Elapsed 0.055 ms (5.517 ms / 100) 5.505 -> 5.500 ( -0.09%) [ +0.07% +0.00% +0.04% / -0.09% +0.13% +0.15%] index_select linear : Elapsed 0.055 ms (5.509 ms / 100) 5.506 -> 5.505 ( -0.02%) [ +0.02% +0.00% +0.04% / -0.02% +0.15% +0.16%] index_select reverse : Elapsed 0.055 ms (5.507 ms / 100) 5.487 -> 5.486 ( -0.02%) [ +0.09% +0.00% +0.05% / -0.02% +0.09% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.492 ms / 100) 5.485 -> 5.492 ( +0.13%) [ +0.00% +0.11% +0.15% / +0.13% +0.15% +0.24%] index_select skip256 : Elapsed 0.055 ms (5.485 ms / 100) 5.502 -> 5.508 ( +0.11%) [ +0.00% +0.11% +0.18% / +0.11% +0.11% +0.25%] index_select spread : Elapsed 0.055 ms (5.502 ms / 100) 5.509 -> 5.505 ( -0.07%) [ +0.05% +0.05% +0.00% / -0.07% +0.05% +0.04%] index_select strided 3 : Elapsed 0.055 ms (5.512 ms / 100) 5.506 -> 5.501 ( -0.09%) [ +0.00% +0.02% +0.02% / -0.09% +0.27% +0.02%] index_select strided 5 : Elapsed 0.055 ms (5.506 ms / 100) 5.508 -> 5.507 ( -0.02%) [ +0.07% +0.00% +0.05% / -0.02% +0.04% +0.00%] index_select strided 7 : Elapsed 0.055 ms (5.512 ms / 100) 5.491 -> 5.487 ( -0.07%) [ +0.07% +0.02% +0.00% / -0.07% +0.16% +0.07%] index_select strided 8 : Elapsed 0.055 ms (5.495 ms / 100) 5.504 -> 5.505 ( +0.02%) [ +0.05% +0.20% +0.00% / +0.02% +0.02% +0.11%] index_select random : Elapsed 0.055 ms (5.507 ms / 100) 5.505 -> 5.503 ( -0.04%) [ +0.00% +0.00% +0.04% / -0.02% -0.04% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.505 ms / 100) B = [20, 5, 4, 40] (stride (800, 4, 1, 20)) A = [16, 5, 4, 40] (stride (800, 40, 200, 1)) dim = 0 3.628 -> 3.629 ( +0.03%) [ +0.06% +0.08% +0.00% / +0.03% +0.83% +0.99%] index_add_ linear : Elapsed 0.036 ms (3.630 ms / 100) 3.501 -> 3.502 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.83% +0.83%] index_copy_ linear : Elapsed 0.035 ms (3.502 ms / 100) 3.634 -> 3.636 ( +0.06%) [ +0.00% +0.03% +0.00% / +0.06% +0.80% +0.83%] index_add_ reverse : Elapsed 0.036 ms (3.634 ms / 100) 3.503 -> 3.506 ( +0.09%) [ +0.00% +0.00% +0.20% / +0.09% +0.74% +0.77%] index_copy_ reverse : Elapsed 0.035 ms (3.503 ms / 100) 3.632 -> 3.632 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.91% +0.85%] index_add_ spread : Elapsed 0.036 ms (3.633 ms / 100) 3.503 -> 3.503 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.80% +0.77%] index_copy_ spread : Elapsed 0.035 ms (3.503 ms / 100) 3.632 -> 3.631 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.91% +0.88%] index_add_ strided 3 : Elapsed 0.036 ms (3.633 ms / 100) 3.508 -> 3.509 ( +0.03%) [ +0.00% +0.03% +0.06% / +0.03% +0.74% +0.71%] index_copy_ strided 3 : Elapsed 0.035 ms (3.508 ms / 100) 3.635 -> 3.634 ( -0.03%) [ +0.00% +0.03% +0.06% / -0.03% +0.66% +0.80%] index_add_ strided 7 : Elapsed 0.036 ms (3.635 ms / 100) 3.510 -> 3.509 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.74% +0.68%] index_copy_ strided 7 : Elapsed 0.035 ms (3.510 ms / 100) 3.635 -> 3.634 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.77% +0.83%] index_add_ perm : Elapsed 0.036 ms (3.635 ms / 100) 3.507 -> 3.508 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.74% +0.68%] index_copy_ perm : Elapsed 0.035 ms (3.507 ms / 100) 3.634 -> 3.635 ( +0.03%) [ +0.00% +0.08% +0.03% / +0.03% +0.83% +0.83%] index_add_ perm_sorted : Elapsed 0.036 ms (3.634 ms / 100) 3.503 -> 3.503 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.80% +0.86%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.503 ms / 100) 5.464 -> 5.468 ( +0.07%) [ +0.09% +0.15% +0.00% / +0.20% +0.24% +0.07%] index_select const : Elapsed 0.055 ms (5.469 ms / 100) 5.484 -> 5.483 ( -0.02%) [ +0.02% +0.02% +0.00% / +0.07% +0.18% -0.02%] index_select wrap : Elapsed 0.055 ms (5.485 ms / 100) 5.478 -> 5.489 ( +0.20%) [ +0.18% +0.00% +0.22% / +0.27% +0.20% +0.26%] index_select linear : Elapsed 0.055 ms (5.488 ms / 100) 5.484 -> 5.480 ( -0.07%) [ +0.04% +0.00% +0.00% / -0.07% +0.15% +0.13%] index_select reverse : Elapsed 0.055 ms (5.486 ms / 100) 5.463 -> 5.471 ( +0.15%) [ +0.02% +0.15% +0.00% / +0.15% +0.18% +0.22%] index_select skip64 : Elapsed 0.055 ms (5.464 ms / 100) 5.462 -> 5.468 ( +0.11%) [ +0.00% +0.18% +0.05% / +0.11% +0.22% +0.22%] index_select skip256 : Elapsed 0.055 ms (5.462 ms / 100) 5.486 -> 5.483 ( -0.05%) [ +0.00% +0.11% +0.00% / +0.02% -0.05% +0.02%] index_select spread : Elapsed 0.055 ms (5.486 ms / 100) 5.486 -> 5.479 ( -0.13%) [ +0.18% +0.00% +0.02% / +0.09% -0.07% -0.13%] index_select strided 3 : Elapsed 0.055 ms (5.496 ms / 100) 5.487 -> 5.481 ( -0.11%) [ +0.00% +0.09% +0.07% / +0.02% -0.04% -0.11%] index_select strided 5 : Elapsed 0.055 ms (5.487 ms / 100) 5.479 -> 5.482 ( +0.05%) [ +0.00% +0.02% +0.07% / +0.11% +0.05% +0.09%] index_select strided 7 : Elapsed 0.055 ms (5.479 ms / 100) 5.463 -> 5.465 ( +0.04%) [ +0.13% +0.18% +0.00% / +0.04% +0.20% +0.16%] index_select strided 8 : Elapsed 0.055 ms (5.470 ms / 100) 5.484 -> 5.475 ( -0.16%) [ +0.13% +0.05% +0.00% / -0.16% -0.05% -0.16%] index_select random : Elapsed 0.055 ms (5.491 ms / 100) 5.479 -> 5.480 ( +0.02%) [ +0.04% +0.04% +0.00% / +0.16% +0.15% +0.02%] index_select random_sorted : Elapsed 0.055 ms (5.481 ms / 100) B = [20, 5, 4, 40] (stride (5, 1, 4000, 100)) A = [16, 5, 4, 40] (stride (800, 160, 1, 4)) dim = 0 3.647 -> 3.647 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.74% +0.69%] index_add_ linear : Elapsed 0.036 ms (3.647 ms / 100) 3.525 -> 3.526 ( +0.03%) [ +0.09% +0.00% +0.20% / +0.03% +0.71% +0.60%] index_copy_ linear : Elapsed 0.035 ms (3.528 ms / 100) 3.646 -> 3.646 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.74% +0.77%] index_add_ reverse : Elapsed 0.036 ms (3.646 ms / 100) 3.518 -> 3.521 ( +0.09%) [ +0.00% +0.06% +0.00% / +0.09% +0.77% +0.71%] index_copy_ reverse : Elapsed 0.035 ms (3.518 ms / 100) 3.643 -> 3.646 ( +0.08%) [ +0.00% +0.05% +0.03% / +0.08% +0.82% +0.85%] index_add_ spread : Elapsed 0.036 ms (3.643 ms / 100) 3.520 -> 3.523 ( +0.09%) [ +0.09% +0.00% +0.23% / +0.09% +0.74% +0.77%] index_copy_ spread : Elapsed 0.035 ms (3.523 ms / 100) 3.651 -> 3.650 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.79% +0.85%] index_add_ strided 3 : Elapsed 0.037 ms (3.652 ms / 100) 3.525 -> 3.525 ( +0.00%) [ +0.03% +0.14% +0.00% / +0.00% +0.74% +0.71%] index_copy_ strided 3 : Elapsed 0.035 ms (3.526 ms / 100) 3.648 -> 3.647 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.69% +0.69%] index_add_ strided 7 : Elapsed 0.036 ms (3.648 ms / 100) 3.519 -> 3.519 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.65% +0.74%] index_copy_ strided 7 : Elapsed 0.035 ms (3.521 ms / 100) 3.647 -> 3.646 ( -0.03%) [ +0.05% +0.00% +0.03% / -0.03% +0.74% +0.74%] index_add_ perm : Elapsed 0.036 ms (3.649 ms / 100) 3.523 -> 3.529 ( +0.17%) [ +0.14% +0.00% +0.03% / +0.17% +0.71% +0.68%] index_copy_ perm : Elapsed 0.035 ms (3.528 ms / 100) 3.653 -> 3.654 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.66% +0.63%] index_add_ perm_sorted : Elapsed 0.037 ms (3.654 ms / 100) 3.528 -> 3.530 ( +0.06%) [ +0.06% +0.00% +0.03% / +0.06% +0.48% +0.51%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.530 ms / 100) 5.471 -> 5.471 ( +0.00%) [ +0.00% +0.05% +0.13% / +0.07% +0.00% +0.04%] index_select const : Elapsed 0.055 ms (5.471 ms / 100) 5.475 -> 5.481 ( +0.11%) [ +0.15% +0.16% +0.00% / +0.26% +0.26% +0.11%] index_select wrap : Elapsed 0.055 ms (5.483 ms / 100) 5.475 -> 5.483 ( +0.15%) [ +0.05% +0.13% +0.00% / +0.15% +0.24% +0.35%] index_select linear : Elapsed 0.055 ms (5.478 ms / 100) 5.477 -> 5.484 ( +0.13%) [ +0.09% +0.07% +0.00% / +0.15% +0.13% +0.15%] index_select reverse : Elapsed 0.055 ms (5.482 ms / 100) 5.468 -> 5.468 ( +0.00%) [ +0.00% +0.20% +0.09% / +0.00% +0.22% +0.20%] index_select skip64 : Elapsed 0.055 ms (5.468 ms / 100) 5.472 -> 5.473 ( +0.02%) [ +0.05% +0.11% +0.00% / +0.07% +0.02% +0.18%] index_select skip256 : Elapsed 0.055 ms (5.475 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.07% +0.00% +0.15% / +0.09% +0.04% +0.31%] index_select spread : Elapsed 0.055 ms (5.480 ms / 100) 5.481 -> 5.482 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.02% +0.11% +0.15%] index_select strided 3 : Elapsed 0.055 ms (5.481 ms / 100) 5.477 -> 5.482 ( +0.09%) [ +0.07% +0.00% +0.11% / +0.09% +0.18% +0.13%] index_select strided 5 : Elapsed 0.055 ms (5.481 ms / 100) 5.481 -> 5.477 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.13% +0.02%] index_select strided 7 : Elapsed 0.055 ms (5.482 ms / 100) 5.470 -> 5.474 ( +0.07%) [ +0.00% +0.15% +0.11% / +0.07% +0.20% +0.44%] index_select strided 8 : Elapsed 0.055 ms (5.470 ms / 100) 5.474 -> 5.474 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.09% +0.15%] index_select random : Elapsed 0.055 ms (5.479 ms / 100) 5.474 -> 5.480 ( +0.11%) [ +0.09% +0.00% +0.02% / +0.11% +0.15% +0.16%] index_select random_sorted : Elapsed 0.055 ms (5.479 ms / 100) B = [20, 5, 4, 40] (stride (5, 1, 4000, 100)) A = [16, 5, 4, 40] (stride (800, 4, 1, 20)) dim = 0 1.418 -> 1.421 ( +0.21%) [ +0.35% +0.14% +0.00% / +0.21% +1.06% +0.99%] index_add_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.455 -> 1.459 ( +0.27%) [ +0.34% +0.21% +0.00% / +0.27% +1.17% +1.17%] index_copy_ linear : Elapsed 0.015 ms (1.460 ms / 100) 1.416 -> 1.417 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +1.06% +0.85%] index_add_ reverse : Elapsed 0.014 ms (1.418 ms / 100) 1.455 -> 1.454 ( -0.07%) [ +0.14% +0.07% +0.00% / -0.07% +1.24% +1.24%] index_copy_ reverse : Elapsed 0.015 ms (1.457 ms / 100) 1.416 -> 1.416 ( +0.00%) [ +0.00% +0.35% +0.00% / +0.00% +1.06% +1.06%] index_add_ spread : Elapsed 0.014 ms (1.416 ms / 100) 1.447 -> 1.448 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +1.38% +1.38%] index_copy_ spread : Elapsed 0.014 ms (1.447 ms / 100) 1.414 -> 1.415 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.78% +0.85%] index_add_ strided 3 : Elapsed 0.014 ms (1.416 ms / 100) 1.445 -> 1.447 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.97% +0.97%] index_copy_ strided 3 : Elapsed 0.014 ms (1.446 ms / 100) 1.414 -> 1.418 ( +0.28%) [ +0.07% +0.00% +0.28% / +0.28% +0.85% +0.85%] index_add_ strided 7 : Elapsed 0.014 ms (1.415 ms / 100) 1.445 -> 1.449 ( +0.28%) [ +0.07% +0.07% +0.00% / +0.28% +0.62% +0.90%] index_copy_ strided 7 : Elapsed 0.014 ms (1.446 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.56% +0.42%] index_add_ perm : Elapsed 0.014 ms (1.423 ms / 100) 1.451 -> 1.452 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.76% +0.90%] index_copy_ perm : Elapsed 0.015 ms (1.452 ms / 100) 1.417 -> 1.417 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.64% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.418 ms / 100) 1.457 -> 1.461 ( +0.27%) [ +0.00% +0.00% +0.27% / +0.27% +0.69% +0.82%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.457 ms / 100) 3.545 -> 3.536 ( -0.25%) [ +0.08% +0.00% +0.23% / +0.03% -0.25% +0.03%] index_select const : Elapsed 0.035 ms (3.548 ms / 100) 3.577 -> 3.572 ( -0.14%) [ +0.00% +0.06% +0.11% / -0.14% -0.06% -0.03%] index_select wrap : Elapsed 0.036 ms (3.577 ms / 100) 3.571 -> 3.574 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.17% +0.14%] index_select linear : Elapsed 0.036 ms (3.572 ms / 100) 3.569 -> 3.568 ( -0.03%) [ +0.03% +0.00% +0.06% / -0.03% +0.20% +0.08%] index_select reverse : Elapsed 0.036 ms (3.570 ms / 100) 3.534 -> 3.538 ( +0.11%) [ +0.00% +0.11% +0.25% / +0.20% +0.11% +0.31%] index_select skip64 : Elapsed 0.035 ms (3.534 ms / 100) 3.537 -> 3.531 ( -0.17%) [ +0.06% +0.00% +0.11% / -0.17% +0.08% +0.14%] index_select skip256 : Elapsed 0.035 ms (3.539 ms / 100) 3.581 -> 3.570 ( -0.31%) [ +0.03% +0.34% +0.00% / +0.28% -0.31% -0.28%] index_select spread : Elapsed 0.036 ms (3.582 ms / 100) 3.570 -> 3.568 ( -0.06%) [ +0.08% +0.22% +0.00% / +0.06% +0.31% -0.06%] index_select strided 3 : Elapsed 0.036 ms (3.573 ms / 100) 3.567 -> 3.573 ( +0.17%) [ +0.25% +0.31% +0.00% / +0.17% +0.25% +0.25%] index_select strided 5 : Elapsed 0.036 ms (3.576 ms / 100) 3.563 -> 3.572 ( +0.25%) [ +0.34% +0.31% +0.00% / +0.25% +0.62% +0.67%] index_select strided 7 : Elapsed 0.036 ms (3.575 ms / 100) 3.533 -> 3.536 ( +0.08%) [ +0.11% +0.14% +0.00% / +0.08% +0.85% +0.79%] index_select strided 8 : Elapsed 0.035 ms (3.537 ms / 100) 3.559 -> 3.554 ( -0.14%) [ +0.03% +0.00% +0.08% / -0.14% +0.62% +0.42%] index_select random : Elapsed 0.036 ms (3.560 ms / 100) 3.559 -> 3.554 ( -0.14%) [ +0.17% +0.00% +0.20% / -0.14% +0.65% +0.65%] index_select random_sorted : Elapsed 0.036 ms (3.565 ms / 100) B = [20, 5, 4, 40] (stride (5, 1, 4000, 100)) A = [16, 5, 4, 40] (stride (1, 640, 3200, 16)) dim = 0 3.893 -> 3.895 ( +0.05%) [ +0.10% +0.03% +0.00% / +0.05% +0.74% +0.85%] index_add_ linear : Elapsed 0.039 ms (3.897 ms / 100) 3.744 -> 3.747 ( +0.08%) [ +0.00% +0.05% +0.13% / +0.08% +0.75% +0.83%] index_copy_ linear : Elapsed 0.037 ms (3.744 ms / 100) 3.887 -> 3.887 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.90% +0.90%] index_add_ reverse : Elapsed 0.039 ms (3.887 ms / 100) 3.739 -> 3.740 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.75% +0.75%] index_copy_ reverse : Elapsed 0.037 ms (3.742 ms / 100) 3.881 -> 3.885 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.90% +1.03%] index_add_ spread : Elapsed 0.039 ms (3.881 ms / 100) 3.750 -> 3.752 ( +0.05%) [ +0.00% +0.13% +0.05% / +0.05% +0.80% +1.12%] index_copy_ spread : Elapsed 0.038 ms (3.750 ms / 100) 3.914 -> 3.914 ( +0.00%) [ +0.05% +0.00% +0.15% / +0.00% +0.49% +0.49%] index_add_ strided 3 : Elapsed 0.039 ms (3.916 ms / 100) 3.754 -> 3.759 ( +0.13%) [ +0.11% +0.05% +0.00% / +0.13% +0.69% +0.75%] index_copy_ strided 3 : Elapsed 0.038 ms (3.758 ms / 100) 3.889 -> 3.890 ( +0.03%) [ +0.00% +0.10% +0.05% / +0.03% +0.87% +0.77%] index_add_ strided 7 : Elapsed 0.039 ms (3.889 ms / 100) 3.741 -> 3.742 ( +0.03%) [ +0.00% +0.05% +0.03% / +0.03% +0.72% +0.67%] index_copy_ strided 7 : Elapsed 0.037 ms (3.741 ms / 100) 3.890 -> 3.891 ( +0.03%) [ +0.13% +0.21% +0.00% / +0.03% +0.90% +0.87%] index_add_ perm : Elapsed 0.039 ms (3.895 ms / 100) 3.745 -> 3.748 ( +0.08%) [ +0.00% +0.08% +0.05% / +0.08% +0.77% +0.69%] index_copy_ perm : Elapsed 0.037 ms (3.745 ms / 100) 3.909 -> 3.917 ( +0.20%) [ +0.28% +0.36% +0.00% / +0.20% +0.54% +0.51%] index_add_ perm_sorted : Elapsed 0.039 ms (3.920 ms / 100) 3.755 -> 3.756 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.61% +0.59%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.755 ms / 100) 5.471 -> 5.474 ( +0.05%) [ +0.02% +0.07% +0.00% / +0.05% +0.13% +0.07%] index_select const : Elapsed 0.055 ms (5.472 ms / 100) 5.481 -> 5.478 ( -0.05%) [ +0.07% +0.00% +0.00% / -0.05% +0.00% +0.15%] index_select wrap : Elapsed 0.055 ms (5.485 ms / 100) 5.482 -> 5.479 ( -0.05%) [ +0.15% +0.00% +0.00% / -0.05% +0.02% +0.05%] index_select linear : Elapsed 0.055 ms (5.490 ms / 100) 5.470 -> 5.479 ( +0.16%) [ +0.15% +0.13% +0.00% / +0.16% +0.20% +0.18%] index_select reverse : Elapsed 0.055 ms (5.478 ms / 100) 5.473 -> 5.471 ( -0.04%) [ +0.09% +0.09% +0.00% / +0.04% -0.04% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.478 ms / 100) 5.469 -> 5.471 ( +0.04%) [ +0.00% +0.16% +0.13% / +0.04% +0.16% +0.22%] index_select skip256 : Elapsed 0.055 ms (5.469 ms / 100) 5.475 -> 5.478 ( +0.05%) [ +0.07% +0.09% +0.00% / +0.09% +0.05% +0.11%] index_select spread : Elapsed 0.055 ms (5.479 ms / 100) 5.477 -> 5.479 ( +0.04%) [ +0.05% +0.00% +0.18% / +0.04% +0.11% +0.07%] index_select strided 3 : Elapsed 0.055 ms (5.480 ms / 100) 5.481 -> 5.481 ( +0.00%) [ +0.00% +0.02% +0.11% / +0.00% +0.09% +0.02%] index_select strided 5 : Elapsed 0.055 ms (5.481 ms / 100) 5.474 -> 5.479 ( +0.09%) [ +0.00% +0.00% +0.11% / +0.09% +0.16% +0.27%] index_select strided 7 : Elapsed 0.055 ms (5.474 ms / 100) 5.478 -> 5.475 ( -0.05%) [ +0.02% +0.00% +0.00% / -0.05% +0.15% +0.05%] index_select strided 8 : Elapsed 0.055 ms (5.479 ms / 100) 5.481 -> 5.481 ( +0.00%) [ +0.11% +0.02% +0.00% / +0.11% +0.00% +0.00%] index_select random : Elapsed 0.055 ms (5.487 ms / 100) 5.480 -> 5.480 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.04% +0.07% +0.00%] index_select random_sorted : Elapsed 0.055 ms (5.482 ms / 100) B = [20, 5, 4, 40] (stride (1, 80, 20, 400)) A = [16, 5, 4, 40] (stride (1, 16, 80, 320)) dim = 0 4.105 -> 4.110 ( +0.12%) [ +0.00% +0.10% +0.07% / +0.12% +0.83% +0.78%] index_add_ linear : Elapsed 0.041 ms (4.105 ms / 100) 3.932 -> 3.936 ( +0.10%) [ +0.00% +0.08% +0.08% / +0.10% +0.79% +0.74%] index_copy_ linear : Elapsed 0.039 ms (3.932 ms / 100) 4.100 -> 4.095 ( -0.12%) [ +0.07% +0.00% +0.12% / -0.12% +0.83% +0.73%] index_add_ reverse : Elapsed 0.041 ms (4.103 ms / 100) 3.932 -> 3.928 ( -0.10%) [ +0.00% +0.03% +0.10% / -0.10% +0.76% +0.66%] index_copy_ reverse : Elapsed 0.039 ms (3.932 ms / 100) 4.086 -> 4.095 ( +0.22%) [ +0.12% +0.00% +0.15% / +0.22% +0.59% +0.61%] index_add_ spread : Elapsed 0.041 ms (4.091 ms / 100) 3.920 -> 3.924 ( +0.10%) [ +0.13% +0.00% +0.10% / +0.10% +0.71% +0.74%] index_copy_ spread : Elapsed 0.039 ms (3.925 ms / 100) 4.103 -> 4.104 ( +0.02%) [ +0.24% +0.19% +0.00% / +0.02% +0.71% +0.66%] index_add_ strided 3 : Elapsed 0.041 ms (4.113 ms / 100) 3.935 -> 3.933 ( -0.05%) [ +0.13% +0.13% +0.00% / -0.05% +0.74% +0.56%] index_copy_ strided 3 : Elapsed 0.039 ms (3.940 ms / 100) 4.103 -> 4.105 ( +0.05%) [ +0.22% +0.19% +0.00% / +0.05% +0.83% +0.76%] index_add_ strided 7 : Elapsed 0.041 ms (4.112 ms / 100) 3.939 -> 3.941 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.74% +0.58%] index_copy_ strided 7 : Elapsed 0.039 ms (3.940 ms / 100) 4.086 -> 4.090 ( +0.10%) [ +0.00% +0.27% +0.15% / +0.10% +0.71% +0.83%] index_add_ perm : Elapsed 0.041 ms (4.086 ms / 100) 3.924 -> 3.924 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.56% +0.64%] index_copy_ perm : Elapsed 0.039 ms (3.924 ms / 100) 4.105 -> 4.104 ( -0.02%) [ +0.00% +0.05% +0.02% / -0.02% +0.32% +0.63%] index_add_ perm_sorted : Elapsed 0.041 ms (4.105 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.56% +0.69%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.934 ms / 100) 5.485 -> 5.493 ( +0.15%) [ +0.11% +0.11% +0.00% / +0.20% +0.15% +0.18%] index_select const : Elapsed 0.055 ms (5.491 ms / 100) 5.493 -> 5.492 ( -0.02%) [ +0.11% +0.00% +0.16% / +0.05% -0.02% +0.05%] index_select wrap : Elapsed 0.055 ms (5.499 ms / 100) 5.495 -> 5.497 ( +0.04%) [ +0.00% +0.05% +0.02% / +0.04% +0.05% +0.20%] index_select linear : Elapsed 0.055 ms (5.495 ms / 100) 5.499 -> 5.491 ( -0.15%) [ +0.02% +0.00% +0.09% / +0.09% -0.04% -0.15%] index_select reverse : Elapsed 0.055 ms (5.500 ms / 100) 5.487 -> 5.488 ( +0.02%) [ +0.00% +0.09% +0.05% / +0.02% +0.13% +0.11%] index_select skip64 : Elapsed 0.055 ms (5.487 ms / 100) 5.488 -> 5.491 ( +0.05%) [ +0.00% +0.02% +0.04% / +0.05% +0.15% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.488 ms / 100) 5.495 -> 5.494 ( -0.02%) [ +0.24% +0.00% +0.04% / +0.04% -0.02% +0.04%] index_select spread : Elapsed 0.055 ms (5.508 ms / 100) 5.499 -> 5.501 ( +0.04%) [ +0.04% +0.00% +0.13% / +0.04% +0.09% +0.04%] index_select strided 3 : Elapsed 0.055 ms (5.501 ms / 100) 5.500 -> 5.494 ( -0.11%) [ +0.07% +0.00% +0.09% / +0.13% -0.11% -0.05%] index_select strided 5 : Elapsed 0.055 ms (5.504 ms / 100) 5.496 -> 5.496 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.13% +0.13%] index_select strided 7 : Elapsed 0.055 ms (5.501 ms / 100) 5.501 -> 5.496 ( -0.09%) [ +0.05% +0.00% +0.04% / -0.05% -0.09% -0.05%] index_select strided 8 : Elapsed 0.055 ms (5.504 ms / 100) 5.499 -> 5.494 ( -0.09%) [ +0.04% +0.07% +0.00% / -0.09% -0.02% +0.15%] index_select random : Elapsed 0.055 ms (5.501 ms / 100) 5.499 -> 5.494 ( -0.09%) [ +0.15% +0.00% +0.02% / +0.00% -0.09% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.507 ms / 100) B = [20, 5, 4, 40] (stride (5, 1, 100, 400)) A = [16, 5, 4, 40] (stride (20, 1, 5, 320)) dim = 0 4.450 -> 4.448 ( -0.04%) [ +0.00% +0.09% +0.07% / -0.04% +0.72% +0.74%] index_add_ linear : Elapsed 0.045 ms (4.450 ms / 100) 4.285 -> 4.286 ( +0.02%) [ +0.05% +0.00% +0.05% / +0.02% +0.68% +0.65%] index_copy_ linear : Elapsed 0.043 ms (4.287 ms / 100) 4.462 -> 4.465 ( +0.07%) [ +0.09% +0.00% +0.00% / +0.07% +0.74% +0.72%] index_add_ reverse : Elapsed 0.045 ms (4.466 ms / 100) 4.290 -> 4.292 ( +0.05%) [ +0.12% +0.00% +0.00% / +0.05% +0.72% +0.68%] index_copy_ reverse : Elapsed 0.043 ms (4.295 ms / 100) 4.435 -> 4.437 ( +0.05%) [ +0.23% +0.00% +0.07% / +0.05% +0.72% +0.77%] index_add_ spread : Elapsed 0.044 ms (4.445 ms / 100) 4.272 -> 4.271 ( -0.02%) [ +0.05% +0.05% +0.00% / -0.02% +0.84% +0.87%] index_copy_ spread : Elapsed 0.043 ms (4.274 ms / 100) 4.450 -> 4.455 ( +0.11%) [ +0.09% +0.00% +0.07% / +0.11% +0.63% +0.74%] index_add_ strided 3 : Elapsed 0.045 ms (4.454 ms / 100) 4.271 -> 4.276 ( +0.12%) [ +0.00% +0.09% +0.05% / +0.12% +1.03% +0.89%] index_copy_ strided 3 : Elapsed 0.043 ms (4.271 ms / 100) 4.463 -> 4.466 ( +0.07%) [ +0.02% +0.09% +0.00% / +0.07% +0.65% +0.69%] index_add_ strided 7 : Elapsed 0.045 ms (4.464 ms / 100) 4.289 -> 4.295 ( +0.14%) [ +0.00% +0.14% +0.02% / +0.14% +0.58% +0.70%] index_copy_ strided 7 : Elapsed 0.043 ms (4.289 ms / 100) 4.450 -> 4.449 ( -0.02%) [ +0.00% +0.07% +0.02% / -0.02% +0.79% +0.67%] index_add_ perm : Elapsed 0.045 ms (4.450 ms / 100) 4.286 -> 4.287 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.02% +0.70% +0.70%] index_copy_ perm : Elapsed 0.043 ms (4.286 ms / 100) 4.449 -> 4.449 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.72% +0.65%] index_add_ perm_sorted : Elapsed 0.045 ms (4.451 ms / 100) 4.275 -> 4.275 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.65% +0.63%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.276 ms / 100) 5.572 -> 5.570 ( -0.04%) [ +0.13% +0.04% +0.00% / -0.04% +0.07% +0.00%] index_select const : Elapsed 0.056 ms (5.579 ms / 100) 5.590 -> 5.584 ( -0.11%) [ +0.04% +0.00% +0.07% / +0.07% +0.11% -0.11%] index_select wrap : Elapsed 0.056 ms (5.592 ms / 100) 5.586 -> 5.588 ( +0.04%) [ +0.07% +0.00% +0.16% / +0.04% +0.18% +0.29%] index_select linear : Elapsed 0.056 ms (5.590 ms / 100) 5.584 -> 5.595 ( +0.20%) [ +0.00% +0.05% +0.09% / +0.20% +0.27% +0.32%] index_select reverse : Elapsed 0.056 ms (5.584 ms / 100) 5.569 -> 5.562 ( -0.13%) [ +0.04% +0.14% +0.00% / -0.13% +0.13% +0.14%] index_select skip64 : Elapsed 0.056 ms (5.571 ms / 100) 5.572 -> 5.577 ( +0.09%) [ +0.14% +0.00% +0.04% / +0.09% +0.23% +0.18%] index_select skip256 : Elapsed 0.056 ms (5.580 ms / 100) 5.590 -> 5.584 ( -0.11%) [ +0.11% +0.00% +0.00% / -0.11% +0.18% +0.25%] index_select spread : Elapsed 0.056 ms (5.596 ms / 100) 5.586 -> 5.587 ( +0.02%) [ +0.00% +0.09% +0.02% / +0.02% +0.21% +0.13%] index_select strided 3 : Elapsed 0.056 ms (5.586 ms / 100) 5.587 -> 5.593 ( +0.11%) [ +0.13% +0.00% +0.02% / +0.11% +0.11% +0.11%] index_select strided 5 : Elapsed 0.056 ms (5.594 ms / 100) 5.585 -> 5.586 ( +0.02%) [ +0.16% +0.00% +0.04% / +0.02% +0.16% +0.32%] index_select strided 7 : Elapsed 0.056 ms (5.594 ms / 100) 5.574 -> 5.570 ( -0.07%) [ +0.02% +0.05% +0.00% / -0.07% +0.14% +0.22%] index_select strided 8 : Elapsed 0.056 ms (5.575 ms / 100) 5.588 -> 5.581 ( -0.13%) [ +0.04% +0.07% +0.00% / -0.13% +0.04% +0.16%] index_select random : Elapsed 0.056 ms (5.590 ms / 100) 5.580 -> 5.578 ( -0.04%) [ +0.00% +0.22% +0.16% / -0.04% +0.27% +0.30%] index_select random_sorted : Elapsed 0.056 ms (5.580 ms / 100) out_shape = [16, 20, 4, 40] in_shape = [16, 5, 4, 40] idx_dim = 1 B = [16, 20, 4, 40] (stride (3200, 40, 800, 1)) A = [16, 5, 4, 40] (stride (160, 2560, 1, 4)) dim = 1 1.814 -> 1.813 ( -0.06%) [ +0.06% +0.00% +0.06% / -0.06% +0.94% +0.88%] index_add_ linear : Elapsed 0.018 ms (1.815 ms / 100) 1.771 -> 1.767 ( -0.23%) [ +0.00% +0.06% +0.00% / -0.23% +1.13% +0.90%] index_copy_ linear : Elapsed 0.018 ms (1.771 ms / 100) 1.814 -> 1.814 ( +0.00%) [ +0.00% +0.06% +0.11% / +0.00% +1.05% +0.83%] index_add_ reverse : Elapsed 0.018 ms (1.814 ms / 100) 1.767 -> 1.769 ( +0.11%) [ +0.40% +0.06% +0.00% / +0.11% +1.08% +1.08%] index_copy_ reverse : Elapsed 0.018 ms (1.774 ms / 100) 1.835 -> 1.834 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.60% +0.76%] index_add_ spread : Elapsed 0.018 ms (1.836 ms / 100) 1.792 -> 1.789 ( -0.17%) [ +0.00% +0.06% +0.00% / -0.17% +0.78% +0.61%] index_copy_ spread : Elapsed 0.018 ms (1.792 ms / 100) 1.833 -> 1.833 ( +0.00%) [ +0.27% +0.00% +0.16% / +0.00% +0.71% +0.76%] index_add_ strided 3 : Elapsed 0.018 ms (1.838 ms / 100) 1.788 -> 1.791 ( +0.17%) [ +0.00% +0.22% +0.17% / +0.17% +0.78% +0.78%] index_copy_ strided 3 : Elapsed 0.018 ms (1.788 ms / 100) 1.823 -> 1.829 ( +0.33%) [ +0.00% +0.11% +0.16% / +0.33% +1.10% +0.99%] index_add_ strided 7 : Elapsed 0.018 ms (1.823 ms / 100) 1.781 -> 1.782 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +1.18% +1.12%] index_copy_ strided 7 : Elapsed 0.018 ms (1.782 ms / 100) 1.827 -> 1.828 ( +0.05%) [ +0.00% +0.22% +0.27% / +0.05% +1.26% +1.37%] index_add_ perm : Elapsed 0.018 ms (1.827 ms / 100) 1.784 -> 1.789 ( +0.28%) [ +0.00% +0.22% +0.39% / +0.28% +1.23% +1.35%] index_copy_ perm : Elapsed 0.018 ms (1.784 ms / 100) 1.828 -> 1.830 ( +0.11%) [ +0.00% +0.05% +0.11% / +0.11% +1.20% +1.04%] index_add_ perm_sorted : Elapsed 0.018 ms (1.828 ms / 100) 1.787 -> 1.787 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +1.29% +1.23%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.790 ms / 100) 8.510 -> 8.526 ( +0.19%) [ +0.00% +0.16% +0.08% / +0.19% +0.33% +0.39%] index_select const : Elapsed 0.085 ms (8.510 ms / 100) 8.554 -> 8.554 ( +0.00%) [ +0.00% +0.04% +0.06% / +0.00% +0.14% +0.22%] index_select wrap : Elapsed 0.086 ms (8.554 ms / 100) 8.538 -> 8.540 ( +0.02%) [ +0.25% +0.00% +0.07% / +0.02% +0.18% +0.16%] index_select linear : Elapsed 0.086 ms (8.559 ms / 100) 8.526 -> 8.517 ( -0.11%) [ +0.14% +0.00% +0.12% / -0.02% -0.11% +0.06%] index_select reverse : Elapsed 0.085 ms (8.538 ms / 100) 8.507 -> 8.528 ( +0.25%) [ +0.21% +0.00% +0.18% / +0.25% +0.31% +0.27%] index_select skip64 : Elapsed 0.085 ms (8.525 ms / 100) 8.498 -> 8.512 ( +0.16%) [ +0.00% +0.11% +0.25% / +0.16% +0.32% +0.49%] index_select skip256 : Elapsed 0.085 ms (8.498 ms / 100) 8.530 -> 8.533 ( +0.04%) [ +0.46% +0.09% +0.00% / +0.04% +0.34% +0.21%] index_select spread : Elapsed 0.086 ms (8.569 ms / 100) 8.555 -> 8.549 ( -0.07%) [ +0.02% +0.00% +0.06% / -0.07% -0.06% +0.11%] index_select strided 3 : Elapsed 0.086 ms (8.557 ms / 100) 8.533 -> 8.556 ( +0.27%) [ +0.00% +0.02% +0.21% / +0.28% +0.27% +0.39%] index_select random : Elapsed 0.085 ms (8.533 ms / 100) 8.539 -> 8.537 ( -0.02%) [ +0.05% +0.04% +0.00% / +0.02% +0.29% -0.02%] index_select random_sorted : Elapsed 0.085 ms (8.543 ms / 100) B = [16, 20, 4, 40] (stride (3200, 1, 20, 80)) A = [16, 5, 4, 40] (stride (1, 2560, 16, 64)) dim = 1 2.002 -> 2.000 ( -0.10%) [ +0.00% +0.05% +0.05% / -0.10% -0.10% -0.10%] index_add_ linear : Elapsed 0.020 ms (2.002 ms / 100) 1.953 -> 1.949 ( -0.20%) [ +0.05% +0.20% +0.00% / -0.20% -0.05% +0.10%] index_copy_ linear : Elapsed 0.020 ms (1.954 ms / 100) 1.999 -> 2.001 ( +0.10%) [ +0.00% +0.15% +0.00% / +0.10% +0.10% +0.10%] index_add_ reverse : Elapsed 0.020 ms (1.999 ms / 100) 1.953 -> 1.953 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.36% +0.00% +0.20%] index_copy_ reverse : Elapsed 0.020 ms (1.953 ms / 100) 2.023 -> 2.020 ( -0.15%) [ +0.20% +0.15% +0.00% / +0.00% -0.15% -0.05%] index_add_ spread : Elapsed 0.020 ms (2.027 ms / 100) 1.982 -> 1.985 ( +0.15%) [ +0.00% +0.10% +0.05% / +0.15% +0.15% +0.30%] index_copy_ spread : Elapsed 0.020 ms (1.982 ms / 100) 2.012 -> 2.014 ( +0.10%) [ +0.00% +0.45% +0.15% / +0.15% +0.10% +0.15%] index_add_ strided 3 : Elapsed 0.020 ms (2.012 ms / 100) 1.979 -> 1.978 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.05% +0.00%] index_copy_ strided 3 : Elapsed 0.020 ms (1.980 ms / 100) 2.023 -> 2.024 ( +0.05%) [ +0.00% +0.15% +0.20% / +0.35% +0.05% +0.35%] index_add_ strided 7 : Elapsed 0.020 ms (2.023 ms / 100) 1.990 -> 1.991 ( +0.05%) [ +0.20% +0.05% +0.00% / +0.05% +0.10% +0.05%] index_copy_ strided 7 : Elapsed 0.020 ms (1.994 ms / 100) 2.024 -> 2.023 ( -0.05%) [ +0.05% +0.00% +0.05% / +0.20% -0.05% +0.10%] index_add_ perm : Elapsed 0.020 ms (2.025 ms / 100) 1.987 -> 1.989 ( +0.10%) [ +0.00% +0.25% +0.00% / +0.25% +0.10% +0.45%] index_copy_ perm : Elapsed 0.020 ms (1.987 ms / 100) 2.012 -> 2.015 ( +0.15%) [ +0.30% +0.30% +0.00% / +0.15% +0.35% +0.20%] index_add_ perm_sorted : Elapsed 0.020 ms (2.018 ms / 100) 1.977 -> 1.978 ( +0.05%) [ +0.15% +0.10% +0.00% / +0.05% +0.30% +0.20%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.980 ms / 100) 8.572 -> 8.576 ( +0.05%) [ +0.00% +0.42% +0.21% / +0.05% +0.59% +0.23%] index_select const : Elapsed 0.086 ms (8.572 ms / 100) 8.607 -> 8.600 ( -0.08%) [ +0.00% +0.23% +0.19% / -0.08% +0.22% +0.23%] index_select wrap : Elapsed 0.086 ms (8.607 ms / 100) 8.611 -> 8.621 ( +0.12%) [ +0.03% +0.00% +0.13% / +0.12% +0.23% +0.49%] index_select linear : Elapsed 0.086 ms (8.614 ms / 100) 8.591 -> 8.599 ( +0.09%) [ +0.20% +0.00% +0.16% / +0.09% +0.43% +0.28%] index_select reverse : Elapsed 0.086 ms (8.608 ms / 100) 8.569 -> 8.584 ( +0.18%) [ +0.13% +0.20% +0.00% / +0.18% +0.50% +0.41%] index_select skip64 : Elapsed 0.086 ms (8.580 ms / 100) 8.582 -> 8.578 ( -0.05%) [ +0.00% +0.08% +0.01% / -0.05% +0.44% +0.28%] index_select skip256 : Elapsed 0.086 ms (8.582 ms / 100) 8.600 -> 8.605 ( +0.06%) [ +0.00% +0.19% +0.28% / +0.06% +0.51% +0.33%] index_select spread : Elapsed 0.086 ms (8.600 ms / 100) 8.600 -> 8.615 ( +0.17%) [ +0.00% +0.20% +0.09% / +0.17% +0.19% +0.44%] index_select strided 3 : Elapsed 0.086 ms (8.600 ms / 100) 8.596 -> 8.604 ( +0.09%) [ +0.30% +0.00% +0.13% / +0.09% +0.34% +0.45%] index_select random : Elapsed 0.086 ms (8.622 ms / 100) 8.591 -> 8.604 ( +0.15%) [ +0.05% +0.00% +0.21% / +0.15% +0.57% +0.57%] index_select random_sorted : Elapsed 0.086 ms (8.595 ms / 100) B = [16, 20, 4, 40] (stride (160, 2560, 40, 1)) A = [16, 5, 4, 40] (stride (1, 16, 3200, 80)) dim = 1 0.700 -> 0.700 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.71% +1.00%] index_add_ linear : Elapsed 0.007 ms (0.700 ms / 100) 0.680 -> 0.681 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.74% +0.88%] index_copy_ linear : Elapsed 0.007 ms (0.680 ms / 100) 0.705 -> 0.700 ( -0.71%) [ +0.28% +0.28% +0.00% / +0.00% -0.71% -0.43%] index_add_ reverse : Elapsed 0.007 ms (0.707 ms / 100) 0.685 -> 0.682 ( -0.44%) [ +0.29% +0.00% +0.29% / +0.15% -0.44% -0.44%] index_copy_ reverse : Elapsed 0.007 ms (0.687 ms / 100) 0.699 -> 0.695 ( -0.57%) [ +0.29% +0.57% +0.00% / +0.29% -0.14% -0.57%] index_add_ spread : Elapsed 0.007 ms (0.701 ms / 100) 0.681 -> 0.676 ( -0.73%) [ +0.15% +0.44% +0.00% / +0.29% -0.73% -0.73%] index_copy_ spread : Elapsed 0.007 ms (0.682 ms / 100) 0.698 -> 0.699 ( +0.14%) [ +0.00% +0.00% +0.14% / +0.14% +0.29% +0.43%] index_add_ strided 3 : Elapsed 0.007 ms (0.698 ms / 100) 0.678 -> 0.679 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.44% +0.29%] index_copy_ strided 3 : Elapsed 0.007 ms (0.678 ms / 100) 0.698 -> 0.699 ( +0.14%) [ +0.14% +0.00% +0.29% / +0.14% +0.29% +1.00%] index_add_ strided 7 : Elapsed 0.007 ms (0.699 ms / 100) 0.679 -> 0.677 ( -0.29%) [ +0.00% +0.00% +0.29% / -0.29% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.007 ms (0.679 ms / 100) 0.693 -> 0.694 ( +0.14%) [ +0.29% +0.29% +0.00% / +0.14% +1.15% +1.15%] index_add_ perm : Elapsed 0.007 ms (0.695 ms / 100) 0.674 -> 0.675 ( +0.15%) [ +0.15% +0.30% +0.00% / +0.15% +1.48% +1.34%] index_copy_ perm : Elapsed 0.007 ms (0.675 ms / 100) 0.701 -> 0.700 ( -0.14%) [ +0.00% +0.14% +0.00% / -0.14% +0.71% +0.71%] index_add_ perm_sorted : Elapsed 0.007 ms (0.701 ms / 100) 0.680 -> 0.680 ( +0.00%) [ +0.15% +0.29% +0.00% / +0.00% +1.03% +1.03%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.681 ms / 100) 4.854 -> 4.844 ( -0.21%) [ +0.10% +0.08% +0.00% / -0.21% -0.14% -0.16%] index_select const : Elapsed 0.049 ms (4.859 ms / 100) 4.886 -> 4.867 ( -0.39%) [ +0.20% +0.00% +0.51% / -0.16% -0.39% -0.18%] index_select wrap : Elapsed 0.049 ms (4.896 ms / 100) 4.886 -> 4.876 ( -0.20%) [ +0.04% +0.00% +0.16% / +0.12% -0.14% -0.20%] index_select linear : Elapsed 0.049 ms (4.888 ms / 100) 4.866 -> 4.864 ( -0.04%) [ +0.06% +0.41% +0.00% / +0.23% -0.04% +0.14%] index_select reverse : Elapsed 0.049 ms (4.869 ms / 100) 4.842 -> 4.842 ( +0.00%) [ +0.00% +0.25% +0.10% / +0.00% +0.02% +0.10%] index_select skip64 : Elapsed 0.048 ms (4.842 ms / 100) 4.844 -> 4.840 ( -0.08%) [ +0.00% +0.04% +0.14% / -0.08% +0.08% +0.02%] index_select skip256 : Elapsed 0.048 ms (4.844 ms / 100) 4.884 -> 4.877 ( -0.14%) [ +0.00% +0.10% +0.10% / +0.23% -0.14% +0.10%] index_select spread : Elapsed 0.049 ms (4.884 ms / 100) 4.878 -> 4.884 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.14% +0.14% +0.12%] index_select strided 3 : Elapsed 0.049 ms (4.878 ms / 100) 4.869 -> 4.873 ( +0.08%) [ +0.16% +0.00% +0.27% / +0.41% +0.12% +0.08%] index_select random : Elapsed 0.049 ms (4.877 ms / 100) 4.870 -> 4.876 ( +0.12%) [ +0.00% +0.37% +0.43% / +0.25% +0.12% +0.14%] index_select random_sorted : Elapsed 0.049 ms (4.870 ms / 100) B = [16, 20, 4, 40] (stride (800, 40, 12800, 1)) A = [16, 5, 4, 40] (stride (160, 2560, 40, 1)) dim = 1 1.706 -> 1.711 ( +0.29%) [ +0.12% +0.00% +0.18% / +0.29% +0.41% +0.59%] index_add_ linear : Elapsed 0.017 ms (1.708 ms / 100) 1.661 -> 1.664 ( +0.18%) [ +0.00% +0.06% +0.36% / +0.18% +0.60% +0.66%] index_copy_ linear : Elapsed 0.017 ms (1.661 ms / 100) 1.705 -> 1.705 ( +0.00%) [ +0.00% +0.18% +0.18% / +0.00% +0.47% +0.59%] index_add_ reverse : Elapsed 0.017 ms (1.705 ms / 100) 1.661 -> 1.664 ( +0.18%) [ +0.18% +0.00% +0.06% / +0.18% +0.42% +0.66%] index_copy_ reverse : Elapsed 0.017 ms (1.664 ms / 100) 1.728 -> 1.729 ( +0.06%) [ +0.00% +0.06% +0.23% / +0.06% +0.29% +0.41%] index_add_ spread : Elapsed 0.017 ms (1.728 ms / 100) 1.690 -> 1.688 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.18% +0.06%] index_copy_ spread : Elapsed 0.017 ms (1.690 ms / 100) 1.724 -> 1.726 ( +0.12%) [ +0.06% +0.12% +0.00% / +0.12% +0.29% +0.41%] index_add_ strided 3 : Elapsed 0.017 ms (1.725 ms / 100) 1.686 -> 1.685 ( -0.06%) [ +0.06% +0.00% +0.18% / -0.06% +0.24% +0.59%] index_copy_ strided 3 : Elapsed 0.017 ms (1.687 ms / 100) 1.714 -> 1.717 ( +0.18%) [ +0.18% +0.23% +0.00% / +0.18% +0.58% +0.70%] index_add_ strided 7 : Elapsed 0.017 ms (1.717 ms / 100) 1.674 -> 1.675 ( +0.06%) [ +0.24% +0.06% +0.00% / +0.06% +0.60% +0.72%] index_copy_ strided 7 : Elapsed 0.017 ms (1.678 ms / 100) 1.713 -> 1.714 ( +0.06%) [ +0.53% +0.12% +0.00% / +0.06% +0.64% +0.64%] index_add_ perm : Elapsed 0.017 ms (1.722 ms / 100) 1.672 -> 1.672 ( +0.00%) [ +0.00% +0.06% +0.12% / +0.00% +0.48% +0.60%] index_copy_ perm : Elapsed 0.017 ms (1.672 ms / 100) 1.712 -> 1.714 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.70% +0.47%] index_add_ perm_sorted : Elapsed 0.017 ms (1.713 ms / 100) 1.670 -> 1.671 ( +0.06%) [ +0.00% +0.06% +0.18% / +0.06% +0.78% +0.66%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.670 ms / 100) 8.175 -> 8.185 ( +0.12%) [ +0.00% +0.00% +0.40% / +0.12% +0.40% +0.37%] index_select const : Elapsed 0.082 ms (8.175 ms / 100) 8.231 -> 8.236 ( +0.06%) [ +0.00% +0.15% +0.22% / +0.15% +0.06% +0.21%] index_select wrap : Elapsed 0.082 ms (8.231 ms / 100) 8.222 -> 8.225 ( +0.04%) [ +0.10% +0.00% +0.07% / +0.04% +0.26% +0.15%] index_select linear : Elapsed 0.082 ms (8.230 ms / 100) 8.234 -> 8.224 ( -0.12%) [ +0.07% +0.00% +0.11% / +0.12% -0.12% -0.01%] index_select reverse : Elapsed 0.082 ms (8.240 ms / 100) 8.177 -> 8.197 ( +0.24%) [ +0.16% +0.00% +0.15% / +0.27% +0.44% +0.24%] index_select skip64 : Elapsed 0.082 ms (8.190 ms / 100) 8.185 -> 8.182 ( -0.04%) [ +0.13% +0.10% +0.00% / -0.04% +0.27% +0.42%] index_select skip256 : Elapsed 0.082 ms (8.196 ms / 100) 8.207 -> 8.204 ( -0.04%) [ +0.00% +0.16% +0.22% / -0.04% +0.32% +0.26%] index_select spread : Elapsed 0.082 ms (8.207 ms / 100) 8.233 -> 8.252 ( +0.23%) [ +0.18% +0.12% +0.00% / +0.26% +0.41% +0.23%] index_select strided 3 : Elapsed 0.082 ms (8.248 ms / 100) 8.232 -> 8.226 ( -0.07%) [ +0.00% +0.01% +0.09% / -0.07% +0.10% +0.11%] index_select random : Elapsed 0.082 ms (8.232 ms / 100) 8.212 -> 8.224 ( +0.15%) [ +0.02% +0.00% +0.02% / +0.15% +0.44% +0.32%] index_select random_sorted : Elapsed 0.082 ms (8.214 ms / 100) B = [16, 20, 4, 40] (stride (20, 1, 12800, 320)) A = [16, 5, 4, 40] (stride (160, 2560, 40, 1)) dim = 1 0.668 -> 0.668 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.15% +0.45%] index_add_ linear : Elapsed 0.007 ms (0.668 ms / 100) 0.682 -> 0.683 ( +0.15%) [ +0.29% +0.00% +0.29% / +0.15% +0.88% +0.88%] index_copy_ linear : Elapsed 0.007 ms (0.684 ms / 100) 0.669 -> 0.669 ( +0.00%) [ +0.30% +0.30% +0.00% / +0.15% +0.00% +0.00%] index_add_ reverse : Elapsed 0.007 ms (0.671 ms / 100) 0.685 -> 0.684 ( -0.15%) [ +0.15% +0.00% +0.00% / +0.00% +0.15% -0.15%] index_copy_ reverse : Elapsed 0.007 ms (0.686 ms / 100) 0.671 -> 0.671 ( +0.00%) [ +0.30% +0.00% +0.15% / +0.15% +0.00% +0.30%] index_add_ spread : Elapsed 0.007 ms (0.673 ms / 100) 0.686 -> 0.685 ( -0.15%) [ +0.00% +0.00% +0.44% / +0.00% +0.15% -0.15%] index_copy_ spread : Elapsed 0.007 ms (0.686 ms / 100) 0.668 -> 0.669 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +1.05% +1.05%] index_add_ strided 3 : Elapsed 0.007 ms (0.669 ms / 100) 0.684 -> 0.684 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.02% +1.02%] index_copy_ strided 3 : Elapsed 0.007 ms (0.684 ms / 100) 0.668 -> 0.670 ( +0.30%) [ +0.00% +0.45% +0.15% / +0.30% +1.20% +1.20%] index_add_ strided 7 : Elapsed 0.007 ms (0.668 ms / 100) 0.684 -> 0.684 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.88% +1.02%] index_copy_ strided 7 : Elapsed 0.007 ms (0.685 ms / 100) 0.673 -> 0.673 ( +0.00%) [ +0.00% +0.30% +0.00% / +0.00% +0.45% +0.30%] index_add_ perm : Elapsed 0.007 ms (0.673 ms / 100) 0.685 -> 0.685 ( +0.00%) [ +0.29% +0.00% +0.44% / +0.00% +0.73% +0.44%] index_copy_ perm : Elapsed 0.007 ms (0.687 ms / 100) 0.671 -> 0.674 ( +0.45%) [ +0.00% +0.15% +0.15% / +0.45% +0.45% +0.45%] index_add_ perm_sorted : Elapsed 0.007 ms (0.671 ms / 100) 0.686 -> 0.686 ( +0.00%) [ +0.15% +0.44% +0.00% / +0.00% +0.44% +0.44%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.687 ms / 100) 4.879 -> 4.811 ( -1.39%) [ +0.00% +0.04% +0.18% / -1.23% -1.39% -1.39%] index_select const : Elapsed 0.049 ms (4.879 ms / 100) 4.902 -> 4.881 ( -0.43%) [ +0.16% +0.00% +0.31% / -0.22% -0.39% -0.43%] index_select wrap : Elapsed 0.049 ms (4.910 ms / 100) 4.901 -> 4.874 ( -0.55%) [ +0.00% +0.02% +0.16% / -0.16% -0.47% -0.55%] index_select linear : Elapsed 0.049 ms (4.901 ms / 100) 4.900 -> 4.884 ( -0.33%) [ +0.00% +0.06% +0.10% / -0.10% -0.33% -0.22%] index_select reverse : Elapsed 0.049 ms (4.900 ms / 100) 4.871 -> 4.811 ( -1.23%) [ +0.25% +0.00% +0.10% / -1.23% -1.07% -1.01%] index_select skip64 : Elapsed 0.049 ms (4.883 ms / 100) 4.865 -> 4.809 ( -1.15%) [ +0.25% +0.00% +0.12% / -1.15% -1.07% -0.82%] index_select skip256 : Elapsed 0.049 ms (4.877 ms / 100) 4.885 -> 4.883 ( -0.04%) [ +0.00% +0.39% +0.25% / -0.02% -0.04% +0.14%] index_select spread : Elapsed 0.049 ms (4.885 ms / 100) 4.906 -> 4.885 ( -0.43%) [ +0.12% +0.12% +0.00% / -0.24% -0.43% -0.29%] index_select strided 3 : Elapsed 0.049 ms (4.912 ms / 100) 4.907 -> 4.883 ( -0.49%) [ +0.00% +0.06% +0.16% / -0.14% -0.49% -0.31%] index_select random : Elapsed 0.049 ms (4.907 ms / 100) 4.903 -> 4.891 ( -0.24%) [ +0.08% +0.22% +0.00% / -0.24% -0.24% -0.20%] index_select random_sorted : Elapsed 0.049 ms (4.907 ms / 100) B = [16, 20, 4, 40] (stride (20, 1, 12800, 320)) A = [16, 5, 4, 40] (stride (160, 2560, 1, 4)) dim = 1 1.792 -> 1.787 ( -0.28%) [ +0.00% +0.33% +0.22% / +0.11% -0.28% -0.17%] index_add_ linear : Elapsed 0.018 ms (1.792 ms / 100) 1.761 -> 1.756 ( -0.28%) [ +0.23% +0.17% +0.00% / -0.06% -0.28% -0.23%] index_copy_ linear : Elapsed 0.018 ms (1.765 ms / 100) 1.791 -> 1.788 ( -0.17%) [ +0.34% +0.67% +0.00% / +0.11% -0.17% -0.06%] index_add_ reverse : Elapsed 0.018 ms (1.797 ms / 100) 1.761 -> 1.752 ( -0.51%) [ +0.11% +0.28% +0.00% / -0.28% -0.23% -0.51%] index_copy_ reverse : Elapsed 0.018 ms (1.763 ms / 100) 1.811 -> 1.804 ( -0.39%) [ +0.00% +0.06% +0.06% / +0.17% +0.00% -0.39%] index_add_ spread : Elapsed 0.018 ms (1.811 ms / 100) 1.787 -> 1.780 ( -0.39%) [ +0.00% +0.06% +0.11% / +0.28% -0.28% -0.39%] index_copy_ spread : Elapsed 0.018 ms (1.787 ms / 100) 1.808 -> 1.808 ( +0.00%) [ +0.55% +0.00% +0.39% / +0.33% +0.00% +0.22%] index_add_ strided 3 : Elapsed 0.018 ms (1.818 ms / 100) 1.787 -> 1.779 ( -0.45%) [ +0.06% +0.00% +0.17% / +0.00% -0.39% -0.45%] index_copy_ strided 3 : Elapsed 0.018 ms (1.788 ms / 100) 1.811 -> 1.804 ( -0.39%) [ +0.00% +0.22% +0.06% / +0.17% -0.39% -0.22%] index_add_ strided 7 : Elapsed 0.018 ms (1.811 ms / 100) 1.782 -> 1.780 ( -0.11%) [ +0.00% +0.34% +0.45% / +0.39% -0.11% -0.11%] index_copy_ strided 7 : Elapsed 0.018 ms (1.782 ms / 100) 1.808 -> 1.805 ( -0.17%) [ +0.50% +0.00% +0.17% / +0.44% +0.11% -0.17%] index_add_ perm : Elapsed 0.018 ms (1.817 ms / 100) 1.785 -> 1.779 ( -0.34%) [ +0.00% +0.06% +0.22% / +0.22% -0.06% -0.34%] index_copy_ perm : Elapsed 0.018 ms (1.785 ms / 100) 1.812 -> 1.806 ( -0.33%) [ +0.17% +0.28% +0.00% / +0.00% -0.33% -0.22%] index_add_ perm_sorted : Elapsed 0.018 ms (1.815 ms / 100) 1.783 -> 1.778 ( -0.28%) [ +0.34% +0.00% +0.17% / +0.39% -0.28% -0.11%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.789 ms / 100) 8.247 -> 8.265 ( +0.22%) [ +0.07% +0.00% +0.13% / +0.29% +0.22% +0.22%] index_select const : Elapsed 0.083 ms (8.253 ms / 100) 8.294 -> 8.291 ( -0.04%) [ +0.17% +0.08% +0.00% / -0.01% -0.04% +0.07%] index_select wrap : Elapsed 0.083 ms (8.308 ms / 100) 8.278 -> 8.285 ( +0.08%) [ +0.16% +0.06% +0.00% / +0.14% +0.08% +0.14%] index_select linear : Elapsed 0.083 ms (8.291 ms / 100) 8.266 -> 8.253 ( -0.16%) [ +0.06% +0.00% +0.24% / -0.16% -0.15% +0.13%] index_select reverse : Elapsed 0.083 ms (8.271 ms / 100) 8.241 -> 8.258 ( +0.21%) [ +0.00% +0.29% +0.06% / +0.21% +0.55% +0.36%] index_select skip64 : Elapsed 0.082 ms (8.241 ms / 100) 8.245 -> 8.235 ( -0.12%) [ +0.00% +0.07% +0.21% / -0.12% +0.12% +0.13%] index_select skip256 : Elapsed 0.082 ms (8.245 ms / 100) 8.267 -> 8.244 ( -0.28%) [ +0.12% +0.01% +0.00% / -0.28% +0.25% +0.41%] index_select spread : Elapsed 0.083 ms (8.277 ms / 100) 8.284 -> 8.281 ( -0.04%) [ +0.29% +0.08% +0.00% / -0.04% +0.33% +0.35%] index_select strided 3 : Elapsed 0.083 ms (8.308 ms / 100) 8.285 -> 8.272 ( -0.16%) [ +0.00% +0.07% +0.01% / -0.16% +0.25% +0.34%] index_select random : Elapsed 0.083 ms (8.285 ms / 100) 8.264 -> 8.269 ( +0.06%) [ +0.08% +0.00% +0.10% / +0.06% +0.36% +0.30%] index_select random_sorted : Elapsed 0.083 ms (8.271 ms / 100) B = [16, 20, 4, 40] (stride (1, 64, 16, 1280)) A = [16, 5, 4, 40] (stride (800, 4, 1, 20)) dim = 1 1.922 -> 1.925 ( +0.16%) [ +0.05% +0.00% +0.26% / +0.42% +0.21% +0.16%] index_add_ linear : Elapsed 0.019 ms (1.923 ms / 100) 1.873 -> 1.880 ( +0.37%) [ +0.05% +0.00% +0.59% / +0.59% +0.43% +0.37%] index_copy_ linear : Elapsed 0.019 ms (1.874 ms / 100) 1.929 -> 1.927 ( -0.10%) [ +0.00% +0.10% +0.16% / +0.21% -0.05% -0.10%] index_add_ reverse : Elapsed 0.019 ms (1.929 ms / 100) 1.881 -> 1.882 ( +0.05%) [ +0.16% +0.00% +0.16% / +0.21% +0.05% +0.21%] index_copy_ reverse : Elapsed 0.019 ms (1.884 ms / 100) 1.917 -> 1.921 ( +0.21%) [ +0.26% +0.00% +0.78% / +0.21% +0.89% +0.78%] index_add_ spread : Elapsed 0.019 ms (1.922 ms / 100) 1.876 -> 1.879 ( +0.16%) [ +0.05% +0.00% +0.21% / +0.16% +0.75% +0.80%] index_copy_ spread : Elapsed 0.019 ms (1.877 ms / 100) 1.916 -> 1.918 ( +0.10%) [ +0.05% +0.00% +0.05% / +0.10% +0.89% +0.89%] index_add_ strided 3 : Elapsed 0.019 ms (1.917 ms / 100) 1.873 -> 1.877 ( +0.21%) [ +0.00% +0.00% +0.21% / +0.21% +0.59% +0.75%] index_copy_ strided 3 : Elapsed 0.019 ms (1.873 ms / 100) 1.920 -> 1.917 ( -0.16%) [ +0.00% +0.00% +0.26% / -0.16% +0.68% +0.57%] index_add_ strided 7 : Elapsed 0.019 ms (1.920 ms / 100) 1.878 -> 1.877 ( -0.05%) [ +0.00% +0.11% +0.11% / -0.05% +0.64% +0.59%] index_copy_ strided 7 : Elapsed 0.019 ms (1.878 ms / 100) 1.924 -> 1.925 ( +0.05%) [ +0.00% +0.10% +0.10% / +0.05% +1.09% +0.88%] index_add_ perm : Elapsed 0.019 ms (1.924 ms / 100) 1.880 -> 1.882 ( +0.11%) [ +0.00% +0.05% +0.16% / +0.11% +1.01% +1.01%] index_copy_ perm : Elapsed 0.019 ms (1.880 ms / 100) 1.913 -> 1.911 ( -0.10%) [ +0.16% +0.00% +0.00% / -0.10% +1.15% +1.20%] index_add_ perm_sorted : Elapsed 0.019 ms (1.916 ms / 100) 1.868 -> 1.875 ( +0.37%) [ +0.05% +0.00% +0.00% / +0.37% +1.39% +1.34%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.869 ms / 100) 8.563 -> 8.564 ( +0.01%) [ +0.00% +0.13% +0.18% / +0.05% +0.01% +0.18%] index_select const : Elapsed 0.086 ms (8.563 ms / 100) 8.558 -> 8.566 ( +0.09%) [ +0.20% +0.22% +0.00% / +0.46% +0.15% +0.09%] index_select wrap : Elapsed 0.086 ms (8.575 ms / 100) 8.559 -> 8.571 ( +0.14%) [ +0.08% +0.00% +0.27% / +0.18% +0.14% +0.21%] index_select linear : Elapsed 0.086 ms (8.566 ms / 100) 8.574 -> 8.554 ( -0.23%) [ +0.00% +0.00% +0.06% / +0.20% -0.06% -0.23%] index_select reverse : Elapsed 0.086 ms (8.574 ms / 100) 8.546 -> 8.556 ( +0.12%) [ +0.00% +0.22% +0.09% / +0.12% +0.34% +0.43%] index_select skip64 : Elapsed 0.085 ms (8.546 ms / 100) 8.546 -> 8.551 ( +0.06%) [ +0.04% +0.00% +0.39% / +0.30% +0.06% +0.11%] index_select skip256 : Elapsed 0.085 ms (8.549 ms / 100) 8.567 -> 8.565 ( -0.02%) [ +0.00% +0.07% +0.02% / +0.20% -0.02% +0.13%] index_select spread : Elapsed 0.086 ms (8.567 ms / 100) 8.568 -> 8.558 ( -0.12%) [ +0.06% +0.00% +0.16% / -0.07% +0.06% -0.12%] index_select strided 3 : Elapsed 0.086 ms (8.573 ms / 100) 8.545 -> 8.570 ( +0.29%) [ +0.22% +0.51% +0.00% / +0.34% +0.61% +0.29%] index_select random : Elapsed 0.086 ms (8.564 ms / 100) 8.564 -> 8.568 ( +0.05%) [ +0.26% +0.00% +0.06% / +0.05% +0.32% +0.05%] index_select random_sorted : Elapsed 0.086 ms (8.586 ms / 100) B = [16, 20, 4, 40] (stride (20, 1, 320, 1280)) A = [16, 5, 4, 40] (stride (160, 2560, 1, 4)) dim = 1 1.896 -> 1.885 ( -0.58%) [ +0.11% +0.00% +0.05% / +0.26% -0.58% -0.11%] index_add_ linear : Elapsed 0.019 ms (1.898 ms / 100) 1.858 -> 1.856 ( -0.11%) [ +0.00% +0.43% +0.05% / +0.27% -0.11% -0.11%] index_copy_ linear : Elapsed 0.019 ms (1.858 ms / 100) 1.900 -> 1.890 ( -0.53%) [ +0.16% +0.00% +0.11% / -0.16% -0.47% -0.53%] index_add_ reverse : Elapsed 0.019 ms (1.903 ms / 100) 1.859 -> 1.856 ( -0.16%) [ +0.11% +0.59% +0.00% / +0.11% -0.16% -0.05%] index_copy_ reverse : Elapsed 0.019 ms (1.861 ms / 100) 1.908 -> 1.905 ( -0.16%) [ +0.26% +0.42% +0.00% / +0.10% -0.05% -0.16%] index_add_ spread : Elapsed 0.019 ms (1.913 ms / 100) 1.888 -> 1.876 ( -0.64%) [ +0.11% +0.21% +0.00% / -0.32% -0.64% -0.64%] index_copy_ spread : Elapsed 0.019 ms (1.890 ms / 100) 1.909 -> 1.907 ( -0.10%) [ +0.21% +0.58% +0.00% / +0.21% -0.10% -0.05%] index_add_ strided 3 : Elapsed 0.019 ms (1.913 ms / 100) 1.884 -> 1.880 ( -0.21%) [ +0.37% +0.27% +0.00% / +0.16% -0.05% -0.21%] index_copy_ strided 3 : Elapsed 0.019 ms (1.891 ms / 100) 1.909 -> 1.904 ( -0.26%) [ +0.21% +0.26% +0.00% / +0.05% -0.05% -0.26%] index_add_ strided 7 : Elapsed 0.019 ms (1.913 ms / 100) 1.885 -> 1.881 ( -0.21%) [ +0.05% +0.00% +0.48% / -0.05% -0.21% -0.21%] index_copy_ strided 7 : Elapsed 0.019 ms (1.886 ms / 100) 1.903 -> 1.899 ( -0.21%) [ +0.05% +0.00% +0.11% / +0.21% +0.16% -0.21%] index_add_ perm : Elapsed 0.019 ms (1.904 ms / 100) 1.871 -> 1.874 ( +0.16%) [ +0.64% +0.00% +0.37% / +0.69% +0.37% +0.16%] index_copy_ perm : Elapsed 0.019 ms (1.883 ms / 100) 1.902 -> 1.899 ( -0.16%) [ +0.00% +0.21% +0.16% / +0.11% -0.16% +0.16%] index_add_ perm_sorted : Elapsed 0.019 ms (1.902 ms / 100) 1.877 -> 1.870 ( -0.37%) [ +0.00% +0.27% +0.05% / +0.00% -0.37% -0.21%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.877 ms / 100) 8.579 -> 8.577 ( -0.02%) [ +0.05% +0.00% +0.03% / -0.02% +0.30% +0.15%] index_select const : Elapsed 0.086 ms (8.583 ms / 100) 8.627 -> 8.622 ( -0.06%) [ +0.14% +0.13% +0.00% / -0.06% +0.46% +0.22%] index_select wrap : Elapsed 0.086 ms (8.639 ms / 100) 8.600 -> 8.607 ( +0.08%) [ +0.01% +0.20% +0.00% / +0.08% +0.51% +0.28%] index_select linear : Elapsed 0.086 ms (8.601 ms / 100) 8.591 -> 8.602 ( +0.13%) [ +0.00% +0.12% +0.19% / +0.20% +0.13% +0.13%] index_select reverse : Elapsed 0.086 ms (8.591 ms / 100) 8.580 -> 8.567 ( -0.15%) [ +0.02% +0.00% +0.07% / -0.15% +0.34% +0.05%] index_select skip64 : Elapsed 0.086 ms (8.582 ms / 100) 8.578 -> 8.575 ( -0.03%) [ +0.07% +0.14% +0.00% / +0.05% +0.07% -0.03%] index_select skip256 : Elapsed 0.086 ms (8.584 ms / 100) 8.582 -> 8.615 ( +0.38%) [ +0.00% +0.16% +0.45% / +0.42% +0.59% +0.38%] index_select spread : Elapsed 0.086 ms (8.582 ms / 100) 8.611 -> 8.618 ( +0.08%) [ +0.00% +0.09% +0.00% / +0.08% +0.19% +0.26%] index_select strided 3 : Elapsed 0.086 ms (8.611 ms / 100) 8.607 -> 8.606 ( -0.01%) [ +0.00% +0.15% +0.01% / -0.01% +0.22% +0.24%] index_select random : Elapsed 0.086 ms (8.607 ms / 100) 8.602 -> 8.588 ( -0.16%) [ +0.20% +0.00% +0.08% / -0.16% +0.31% +0.31%] index_select random_sorted : Elapsed 0.086 ms (8.619 ms / 100) B = [16, 20, 4, 40] (stride (1, 16, 320, 1280)) A = [16, 5, 4, 40] (stride (200, 40, 3200, 1)) dim = 1 1.859 -> 1.860 ( +0.05%) [ +0.00% +0.00% +0.27% / +0.05% +0.81% +0.81%] index_add_ linear : Elapsed 0.019 ms (1.859 ms / 100) 1.813 -> 1.820 ( +0.39%) [ +0.00% +0.11% +0.28% / +0.39% +1.10% +1.16%] index_copy_ linear : Elapsed 0.018 ms (1.813 ms / 100) 1.859 -> 1.862 ( +0.16%) [ +0.05% +0.00% +0.22% / +0.16% +1.08% +1.13%] index_add_ reverse : Elapsed 0.019 ms (1.860 ms / 100) 1.813 -> 1.816 ( +0.17%) [ +0.06% +0.00% +0.22% / +0.17% +1.43% +1.38%] index_copy_ reverse : Elapsed 0.018 ms (1.814 ms / 100) 1.883 -> 1.886 ( +0.16%) [ +0.00% +0.00% +0.21% / +0.16% +0.32% +0.42%] index_add_ spread : Elapsed 0.019 ms (1.883 ms / 100) 1.839 -> 1.840 ( +0.05%) [ +0.00% +0.00% +0.16% / +0.05% +0.44% +0.87%] index_copy_ spread : Elapsed 0.018 ms (1.839 ms / 100) 1.876 -> 1.875 ( -0.05%) [ +0.00% +0.05% +0.21% / -0.05% +0.48% +0.53%] index_add_ strided 3 : Elapsed 0.019 ms (1.876 ms / 100) 1.831 -> 1.838 ( +0.38%) [ +0.11% +0.00% +0.11% / +0.38% +0.60% +0.87%] index_copy_ strided 3 : Elapsed 0.018 ms (1.833 ms / 100) 1.870 -> 1.874 ( +0.21%) [ +0.00% +0.00% +0.11% / +0.21% +0.70% +0.64%] index_add_ strided 7 : Elapsed 0.019 ms (1.870 ms / 100) 1.826 -> 1.832 ( +0.33%) [ +0.55% +0.00% +0.11% / +0.33% +0.60% +0.82%] index_copy_ strided 7 : Elapsed 0.018 ms (1.836 ms / 100) 1.880 -> 1.884 ( +0.21%) [ +0.00% +0.05% +0.21% / +0.21% +0.48% +0.43%] index_add_ perm : Elapsed 0.019 ms (1.880 ms / 100) 1.837 -> 1.838 ( +0.05%) [ +0.00% +0.22% +0.11% / +0.05% +0.60% +0.54%] index_copy_ perm : Elapsed 0.018 ms (1.837 ms / 100) 1.880 -> 1.881 ( +0.05%) [ +0.11% +0.00% +0.05% / +0.05% +0.16% +0.48%] index_add_ perm_sorted : Elapsed 0.019 ms (1.882 ms / 100) 1.837 -> 1.842 ( +0.27%) [ +0.00% +0.05% +0.00% / +0.27% +0.60% +0.60%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.837 ms / 100) 8.539 -> 8.540 ( +0.01%) [ +0.16% +0.02% +0.00% / +0.13% +0.09% +0.01%] index_select const : Elapsed 0.086 ms (8.553 ms / 100) 8.594 -> 8.576 ( -0.21%) [ +0.00% +0.07% +0.06% / +0.01% -0.21% +0.14%] index_select wrap : Elapsed 0.086 ms (8.594 ms / 100) 8.571 -> 8.560 ( -0.13%) [ +0.00% +0.13% +0.06% / +0.15% -0.13% +0.28%] index_select linear : Elapsed 0.086 ms (8.571 ms / 100) 8.586 -> 8.591 ( +0.06%) [ +0.00% +0.12% +0.10% / +0.26% +0.06% +0.09%] index_select reverse : Elapsed 0.086 ms (8.586 ms / 100) 8.537 -> 8.533 ( -0.05%) [ +0.00% +0.13% +0.06% / -0.05% +0.20% +0.22%] index_select skip64 : Elapsed 0.085 ms (8.537 ms / 100) 8.527 -> 8.537 ( +0.12%) [ +0.09% +0.25% +0.00% / +0.12% +0.52% +0.18%] index_select skip256 : Elapsed 0.085 ms (8.535 ms / 100) 8.565 -> 8.578 ( +0.15%) [ +0.00% +0.29% +0.44% / +0.15% +0.28% +0.36%] index_select spread : Elapsed 0.086 ms (8.565 ms / 100) 8.595 -> 8.578 ( -0.20%) [ +0.00% +0.05% +0.10% / -0.14% +0.17% -0.20%] index_select strided 3 : Elapsed 0.086 ms (8.595 ms / 100) 8.580 -> 8.594 ( +0.16%) [ +0.30% +0.26% +0.00% / +0.36% +0.28% +0.16%] index_select random : Elapsed 0.086 ms (8.606 ms / 100) 8.561 -> 8.573 ( +0.14%) [ +0.00% +0.16% +0.42% / +0.14% +0.54% +0.33%] index_select random_sorted : Elapsed 0.086 ms (8.561 ms / 100) out_shape = [16, 5, 20, 40] in_shape = [16, 5, 4, 40] idx_dim = 2 B = [16, 5, 20, 40] (stride (4000, 1, 200, 5)) A = [16, 5, 4, 40] (stride (4, 64, 1, 320)) dim = 2 2.249 -> 2.246 ( -0.13%) [ +0.00% +0.13% +0.18% / -0.04% -0.13% +0.09%] index_add_ linear : Elapsed 0.022 ms (2.249 ms / 100) 2.184 -> 2.176 ( -0.37%) [ +0.05% +0.14% +0.00% / -0.37% -0.14% -0.14%] index_copy_ linear : Elapsed 0.022 ms (2.185 ms / 100) 2.250 -> 2.249 ( -0.04%) [ +0.09% +0.09% +0.00% / -0.04% +0.31% +0.09%] index_add_ reverse : Elapsed 0.023 ms (2.252 ms / 100) 2.182 -> 2.182 ( +0.00%) [ +0.09% +0.18% +0.00% / +0.00% +0.14% +0.09%] index_copy_ reverse : Elapsed 0.022 ms (2.184 ms / 100) 2.245 -> 2.247 ( +0.09%) [ +0.13% +0.09% +0.00% / +0.18% +0.27% +0.09%] index_add_ spread : Elapsed 0.022 ms (2.248 ms / 100) 2.177 -> 2.180 ( +0.14%) [ +0.00% +0.09% +0.23% / +0.14% +0.23% +0.23%] index_copy_ spread : Elapsed 0.022 ms (2.177 ms / 100) 2.244 -> 2.246 ( +0.09%) [ +0.31% +0.45% +0.00% / +0.18% +0.09% +0.31%] index_add_ strided 3 : Elapsed 0.023 ms (2.251 ms / 100) 2.178 -> 2.182 ( +0.18%) [ +0.23% +0.32% +0.00% / +0.23% +0.18% +0.32%] index_copy_ strided 3 : Elapsed 0.022 ms (2.183 ms / 100) 2.249 -> 2.249 ( +0.00%) [ +0.09% +0.31% +0.00% / +0.00% +0.22% +0.09%] index_add_ strided 7 : Elapsed 0.023 ms (2.251 ms / 100) 2.185 -> 2.183 ( -0.09%) [ +0.00% +0.14% +0.09% / +0.00% -0.05% -0.09%] index_copy_ strided 7 : Elapsed 0.022 ms (2.185 ms / 100) 2.249 -> 2.248 ( -0.04%) [ +0.09% +0.44% +0.00% / -0.04% -0.04% +0.22%] index_add_ perm : Elapsed 0.023 ms (2.251 ms / 100) 2.182 -> 2.182 ( +0.00%) [ +0.00% +0.37% +0.00% / +0.00% +0.00% +0.18%] index_copy_ perm : Elapsed 0.022 ms (2.182 ms / 100) 2.252 -> 2.249 ( -0.13%) [ +0.13% +0.00% +0.09% / -0.09% -0.13% +0.09%] index_add_ perm_sorted : Elapsed 0.023 ms (2.255 ms / 100) 2.186 -> 2.186 ( +0.00%) [ +0.14% +0.00% +0.05% / +0.09% +0.00% +0.05%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.189 ms / 100) 9.218 -> 9.224 ( +0.07%) [ +0.00% +0.03% +0.02% / +0.07% +0.20% +0.11%] index_select const : Elapsed 0.092 ms (9.218 ms / 100) 9.226 -> 9.233 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.26% +0.15%] index_select wrap : Elapsed 0.092 ms (9.229 ms / 100) 9.218 -> 9.231 ( +0.14%) [ +0.13% +0.00% +0.02% / +0.14% +0.14% +0.60%] index_select linear : Elapsed 0.092 ms (9.230 ms / 100) 9.212 -> 9.219 ( +0.08%) [ +0.34% +0.00% +0.07% / +0.08% +0.63% +0.29%] index_select reverse : Elapsed 0.092 ms (9.243 ms / 100) 9.211 -> 9.218 ( +0.08%) [ +0.22% +0.17% +0.00% / +0.20% +0.39% +0.08%] index_select skip64 : Elapsed 0.092 ms (9.231 ms / 100) 9.209 -> 9.227 ( +0.20%) [ +0.37% +0.00% +0.26% / +0.21% +0.24% +0.20%] index_select skip256 : Elapsed 0.092 ms (9.243 ms / 100) 9.226 -> 9.220 ( -0.07%) [ +0.00% +0.00% +0.15% / +0.01% +0.21% -0.07%] index_select spread : Elapsed 0.092 ms (9.226 ms / 100) 9.228 -> 9.217 ( -0.12%) [ +0.00% +0.23% +0.04% / -0.03% -0.04% -0.12%] index_select strided 3 : Elapsed 0.092 ms (9.228 ms / 100) 9.210 -> 9.206 ( -0.04%) [ +0.00% +0.09% +0.11% / +0.36% -0.04% +0.27%] index_select random : Elapsed 0.092 ms (9.210 ms / 100) 9.217 -> 9.224 ( +0.08%) [ +0.00% +0.17% +0.18% / +0.16% +0.08% +0.34%] index_select random_sorted : Elapsed 0.092 ms (9.217 ms / 100) B = [16, 5, 20, 40] (stride (4000, 1, 5, 100)) A = [16, 5, 4, 40] (stride (40, 640, 3200, 1)) dim = 2 2.104 -> 2.102 ( -0.10%) [ +0.24% +0.14% +0.00% / -0.10% +0.57% +0.48%] index_add_ linear : Elapsed 0.021 ms (2.109 ms / 100) 2.067 -> 2.071 ( +0.19%) [ +0.00% +0.00% +0.05% / +0.19% +0.58% +0.63%] index_copy_ linear : Elapsed 0.021 ms (2.067 ms / 100) 2.101 -> 2.106 ( +0.24%) [ +0.19% +0.00% +0.10% / +0.24% +0.52% +0.38%] index_add_ reverse : Elapsed 0.021 ms (2.105 ms / 100) 2.066 -> 2.069 ( +0.15%) [ +0.39% +0.24% +0.00% / +0.15% +0.58% +0.39%] index_copy_ reverse : Elapsed 0.021 ms (2.074 ms / 100) 2.106 -> 2.103 ( -0.14%) [ +0.00% +0.05% +0.00% / -0.14% +0.47% +0.24%] index_add_ spread : Elapsed 0.021 ms (2.106 ms / 100) 2.087 -> 2.090 ( +0.14%) [ +0.24% +0.00% +0.10% / +0.14% +0.38% +0.38%] index_copy_ spread : Elapsed 0.021 ms (2.092 ms / 100) 2.124 -> 2.127 ( +0.14%) [ +0.14% +0.42% +0.00% / +0.14% +0.47% +0.94%] index_add_ strided 3 : Elapsed 0.021 ms (2.127 ms / 100) 2.101 -> 2.100 ( -0.05%) [ +0.14% +0.00% +0.05% / -0.05% +0.57% +0.81%] index_copy_ strided 3 : Elapsed 0.021 ms (2.104 ms / 100) 2.094 -> 2.097 ( +0.14%) [ +0.29% +0.00% +0.05% / +0.14% +0.29% +0.53%] index_add_ strided 7 : Elapsed 0.021 ms (2.100 ms / 100) 2.074 -> 2.075 ( +0.05%) [ +0.00% +0.10% +0.24% / +0.05% +0.34% +0.48%] index_copy_ strided 7 : Elapsed 0.021 ms (2.074 ms / 100) 2.114 -> 2.118 ( +0.19%) [ +0.05% +0.24% +0.00% / +0.19% +0.61% +0.24%] index_add_ perm : Elapsed 0.021 ms (2.115 ms / 100) 2.091 -> 2.096 ( +0.24%) [ +0.14% +0.10% +0.00% / +0.24% +0.67% +0.48%] index_copy_ perm : Elapsed 0.021 ms (2.094 ms / 100) 2.118 -> 2.120 ( +0.09%) [ +0.33% +0.09% +0.00% / +0.09% +0.52% +0.14%] index_add_ perm_sorted : Elapsed 0.021 ms (2.125 ms / 100) 2.094 -> 2.094 ( +0.00%) [ +0.29% +0.00% +0.00% / +0.00% +0.38% +0.67%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.100 ms / 100) 9.231 -> 9.209 ( -0.24%) [ +0.00% +0.45% +0.01% / +0.22% -0.11% -0.24%] index_select const : Elapsed 0.092 ms (9.231 ms / 100) 9.302 -> 9.307 ( +0.05%) [ +0.09% +0.03% +0.00% / +0.05% +0.22% +0.25%] index_select wrap : Elapsed 0.093 ms (9.310 ms / 100) 9.260 -> 9.264 ( +0.04%) [ +0.02% +0.05% +0.00% / +0.05% +0.04% +0.08%] index_select linear : Elapsed 0.093 ms (9.262 ms / 100) 9.280 -> 9.282 ( +0.02%) [ +0.10% +0.16% +0.00% / +0.02% +0.20% +0.04%] index_select reverse : Elapsed 0.093 ms (9.289 ms / 100) 9.231 -> 9.232 ( +0.01%) [ +0.00% +0.09% +0.12% / +0.04% +0.04% +0.01%] index_select skip64 : Elapsed 0.092 ms (9.231 ms / 100) 9.225 -> 9.216 ( -0.10%) [ +0.00% +0.47% +0.20% / +0.03% +0.01% -0.10%] index_select skip256 : Elapsed 0.092 ms (9.225 ms / 100) 9.268 -> 9.265 ( -0.03%) [ +0.15% +0.24% +0.00% / -0.03% +0.24% +0.38%] index_select spread : Elapsed 0.093 ms (9.282 ms / 100) 9.306 -> 9.307 ( +0.01%) [ +0.00% +0.20% +0.06% / +0.01% +0.14% +0.39%] index_select strided 3 : Elapsed 0.093 ms (9.306 ms / 100) 9.306 -> 9.304 ( -0.02%) [ +0.08% +0.00% +0.08% / +0.01% -0.02% +0.13%] index_select random : Elapsed 0.093 ms (9.313 ms / 100) 9.271 -> 9.270 ( -0.01%) [ +0.02% +0.12% +0.00% / -0.01% +0.22% +0.05%] index_select random_sorted : Elapsed 0.093 ms (9.273 ms / 100) B = [16, 5, 20, 40] (stride (20, 12800, 1, 320)) A = [16, 5, 4, 40] (stride (5, 1, 80, 320)) dim = 2 2.141 -> 2.147 ( +0.28%) [ +0.28% +0.00% +0.23% / +0.28% +0.51% +0.70%] index_add_ linear : Elapsed 0.021 ms (2.147 ms / 100) 2.095 -> 2.099 ( +0.19%) [ +0.00% +0.38% +0.29% / +0.19% +0.76% +0.81%] index_copy_ linear : Elapsed 0.021 ms (2.095 ms / 100) 2.137 -> 2.138 ( +0.05%) [ +0.19% +0.00% +0.14% / +0.05% +1.03% +1.26%] index_add_ reverse : Elapsed 0.021 ms (2.141 ms / 100) 2.097 -> 2.098 ( +0.05%) [ +0.00% +0.29% +0.29% / +0.05% +1.14% +1.14%] index_copy_ reverse : Elapsed 0.021 ms (2.097 ms / 100) 2.180 -> 2.179 ( -0.05%) [ +0.14% +0.18% +0.00% / -0.05% +0.83% +0.73%] index_add_ spread : Elapsed 0.022 ms (2.183 ms / 100) 2.197 -> 2.194 ( -0.14%) [ +0.27% +0.14% +0.00% / -0.14% +0.96% +0.96%] index_copy_ spread : Elapsed 0.022 ms (2.203 ms / 100) 2.173 -> 2.176 ( +0.14%) [ +0.18% +0.00% +0.18% / +0.14% +0.74% +1.01%] index_add_ strided 3 : Elapsed 0.022 ms (2.177 ms / 100) 2.169 -> 2.172 ( +0.14%) [ +0.00% +0.00% +0.09% / +0.14% +0.69% +0.83%] index_copy_ strided 3 : Elapsed 0.022 ms (2.169 ms / 100) 2.182 -> 2.186 ( +0.18%) [ +0.37% +0.05% +0.00% / +0.18% +0.64% +0.82%] index_add_ strided 7 : Elapsed 0.022 ms (2.190 ms / 100) 2.198 -> 2.200 ( +0.09%) [ +0.05% +0.23% +0.00% / +0.09% +0.73% +0.77%] index_copy_ strided 7 : Elapsed 0.022 ms (2.199 ms / 100) 2.159 -> 2.161 ( +0.09%) [ +0.23% +0.00% +0.14% / +0.09% +0.56% +0.65%] index_add_ perm : Elapsed 0.022 ms (2.164 ms / 100) 2.134 -> 2.138 ( +0.19%) [ +0.00% +0.23% +0.09% / +0.28% +0.19% +0.47%] index_copy_ perm : Elapsed 0.021 ms (2.134 ms / 100) 2.162 -> 2.162 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.42% +0.42%] index_add_ perm_sorted : Elapsed 0.022 ms (2.163 ms / 100) 2.133 -> 2.135 ( +0.09%) [ +0.09% +0.23% +0.00% / +0.09% +0.42% +0.56%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.135 ms / 100) 8.840 -> 8.836 ( -0.05%) [ +0.00% +0.07% +0.02% / +0.01% +0.34% -0.05%] index_select const : Elapsed 0.088 ms (8.840 ms / 100) 8.891 -> 8.894 ( +0.03%) [ +0.30% +0.00% +0.02% / +0.10% +0.18% +0.03%] index_select wrap : Elapsed 0.089 ms (8.918 ms / 100) 8.870 -> 8.875 ( +0.06%) [ +0.00% +0.37% +0.20% / +0.06% +0.19% +0.44%] index_select linear : Elapsed 0.089 ms (8.870 ms / 100) 8.886 -> 8.879 ( -0.08%) [ +0.08% +0.00% +0.23% / -0.08% +0.14% +0.26%] index_select reverse : Elapsed 0.089 ms (8.893 ms / 100) 8.831 -> 8.849 ( +0.20%) [ +0.08% +0.34% +0.00% / +0.20% +0.48% +0.22%] index_select skip64 : Elapsed 0.088 ms (8.838 ms / 100) 8.840 -> 8.843 ( +0.03%) [ +0.02% +0.00% +0.20% / +0.03% +0.17% +0.15%] index_select skip256 : Elapsed 0.088 ms (8.842 ms / 100) 8.913 -> 8.901 ( -0.13%) [ +0.00% +0.07% +0.26% / +0.15% +0.15% -0.13%] index_select spread : Elapsed 0.089 ms (8.913 ms / 100) 8.897 -> 8.883 ( -0.16%) [ +0.01% +0.03% +0.00% / +0.13% +0.26% -0.16%] index_select strided 3 : Elapsed 0.089 ms (8.898 ms / 100) 8.890 -> 8.891 ( +0.01%) [ +0.01% +0.00% +0.07% / +0.24% +0.13% +0.01%] index_select random : Elapsed 0.089 ms (8.891 ms / 100) 8.908 -> 8.902 ( -0.07%) [ +0.00% +0.03% +0.09% / +0.07% +0.21% -0.07%] index_select random_sorted : Elapsed 0.089 ms (8.908 ms / 100) B = [16, 5, 20, 40] (stride (200, 1, 3200, 5)) A = [16, 5, 4, 40] (stride (800, 1, 200, 5)) dim = 2 2.078 -> 2.079 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.05% +0.82% +0.96%] index_add_ linear : Elapsed 0.021 ms (2.079 ms / 100) 2.023 -> 2.030 ( +0.35%) [ +0.15% +0.00% +0.44% / +0.35% +1.19% +1.19%] index_copy_ linear : Elapsed 0.020 ms (2.026 ms / 100) 2.076 -> 2.075 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.05% +0.77% +0.87%] index_add_ reverse : Elapsed 0.021 ms (2.078 ms / 100) 2.025 -> 2.023 ( -0.10%) [ +0.00% +0.20% +0.00% / -0.10% +0.79% +0.74%] index_copy_ reverse : Elapsed 0.020 ms (2.025 ms / 100) 2.081 -> 2.082 ( +0.05%) [ +0.00% +0.19% +0.14% / +0.05% +0.77% +0.91%] index_add_ spread : Elapsed 0.021 ms (2.081 ms / 100) 2.030 -> 2.029 ( -0.05%) [ +0.00% +0.15% +0.10% / -0.05% +0.79% +0.69%] index_copy_ spread : Elapsed 0.020 ms (2.030 ms / 100) 2.085 -> 2.082 ( -0.14%) [ +0.00% +0.00% +0.05% / -0.14% +0.48% +0.48%] index_add_ strided 3 : Elapsed 0.021 ms (2.085 ms / 100) 2.032 -> 2.033 ( +0.05%) [ +0.15% +0.15% +0.00% / +0.05% +0.84% +0.79%] index_copy_ strided 3 : Elapsed 0.020 ms (2.035 ms / 100) 2.078 -> 2.077 ( -0.05%) [ +0.19% +0.00% +0.00% / -0.05% +0.77% +0.53%] index_add_ strided 7 : Elapsed 0.021 ms (2.082 ms / 100) 2.028 -> 2.026 ( -0.10%) [ +0.25% +0.00% +0.20% / -0.10% +0.84% +0.99%] index_copy_ strided 7 : Elapsed 0.020 ms (2.033 ms / 100) 2.085 -> 2.081 ( -0.19%) [ +0.05% +0.00% +0.00% / -0.19% +0.14% +0.29%] index_add_ perm : Elapsed 0.021 ms (2.086 ms / 100) 2.030 -> 2.031 ( +0.05%) [ +0.20% +0.10% +0.00% / +0.05% +0.15% +0.39%] index_copy_ perm : Elapsed 0.020 ms (2.034 ms / 100) 2.081 -> 2.078 ( -0.14%) [ +0.00% +0.29% +0.10% / -0.14% +0.29% +0.14%] index_add_ perm_sorted : Elapsed 0.021 ms (2.081 ms / 100) 2.029 -> 2.028 ( -0.05%) [ +0.00% +0.25% +0.20% / -0.05% +0.39% +0.30%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.029 ms / 100) 9.135 -> 9.140 ( +0.05%) [ +0.11% +0.00% +0.30% / +0.27% +0.13% +0.05%] index_select const : Elapsed 0.091 ms (9.145 ms / 100) 9.201 -> 9.179 ( -0.24%) [ +0.09% +0.00% +0.18% / -0.23% +0.13% -0.24%] index_select wrap : Elapsed 0.092 ms (9.209 ms / 100) 9.165 -> 9.171 ( +0.07%) [ +0.00% +0.00% +0.21% / +0.07% +0.13% +0.17%] index_select linear : Elapsed 0.092 ms (9.165 ms / 100) 9.145 -> 9.159 ( +0.15%) [ +0.00% +0.35% +0.21% / +0.26% +0.16% +0.15%] index_select reverse : Elapsed 0.091 ms (9.145 ms / 100) 9.126 -> 9.133 ( +0.08%) [ +0.00% +0.21% +0.11% / +0.15% +0.08% +0.24%] index_select skip64 : Elapsed 0.091 ms (9.126 ms / 100) 9.130 -> 9.120 ( -0.11%) [ +0.09% +0.01% +0.00% / +0.20% +0.20% -0.11%] index_select skip256 : Elapsed 0.091 ms (9.138 ms / 100) 9.173 -> 9.175 ( +0.02%) [ +0.03% +0.00% +0.23% / +0.29% +0.07% +0.02%] index_select spread : Elapsed 0.092 ms (9.176 ms / 100) 9.197 -> 9.181 ( -0.17%) [ +0.00% +0.23% +0.10% / +0.16% -0.07% -0.17%] index_select strided 3 : Elapsed 0.092 ms (9.197 ms / 100) 9.183 -> 9.189 ( +0.07%) [ +0.00% +0.02% +0.21% / +0.19% +0.07% +0.14%] index_select random : Elapsed 0.092 ms (9.183 ms / 100) 9.175 -> 9.163 ( -0.13%) [ +0.02% +0.00% +0.21% / +0.21% -0.13% -0.01%] index_select random_sorted : Elapsed 0.092 ms (9.177 ms / 100) B = [16, 5, 20, 40] (stride (1, 16, 3200, 80)) A = [16, 5, 4, 40] (stride (160, 2560, 40, 1)) dim = 2 2.100 -> 2.100 ( +0.00%) [ +0.00% +0.48% +0.29% / +0.24% +0.05% +0.00%] index_add_ linear : Elapsed 0.021 ms (2.100 ms / 100) 2.064 -> 2.060 ( -0.19%) [ +0.05% +0.10% +0.00% / -0.19% -0.10% -0.15%] index_copy_ linear : Elapsed 0.021 ms (2.065 ms / 100) 2.085 -> 2.089 ( +0.19%) [ +0.43% +0.29% +0.00% / +0.19% +0.43% +0.34%] index_add_ reverse : Elapsed 0.021 ms (2.094 ms / 100) 2.045 -> 2.046 ( +0.05%) [ +0.10% +0.20% +0.00% / +0.15% +0.05% +0.39%] index_copy_ reverse : Elapsed 0.020 ms (2.047 ms / 100) 2.088 -> 2.096 ( +0.38%) [ +0.34% +0.38% +0.00% / +0.43% +0.38% +0.43%] index_add_ spread : Elapsed 0.021 ms (2.095 ms / 100) 2.053 -> 2.053 ( +0.00%) [ +0.00% +0.15% +0.05% / +0.00% +0.15% +0.15%] index_copy_ spread : Elapsed 0.021 ms (2.053 ms / 100) 2.105 -> 2.095 ( -0.48%) [ +0.10% +0.00% +0.10% / +0.05% -0.48% -0.33%] index_add_ strided 3 : Elapsed 0.021 ms (2.107 ms / 100) 2.063 -> 2.057 ( -0.29%) [ +0.19% +0.05% +0.00% / +0.34% -0.29% +0.05%] index_copy_ strided 3 : Elapsed 0.021 ms (2.067 ms / 100) 2.099 -> 2.101 ( +0.10%) [ +0.19% +0.24% +0.00% / +0.10% +0.57% +0.52%] index_add_ strided 7 : Elapsed 0.021 ms (2.103 ms / 100) 2.058 -> 2.063 ( +0.24%) [ +0.29% +0.10% +0.00% / +0.24% +0.58% +0.58%] index_copy_ strided 7 : Elapsed 0.021 ms (2.064 ms / 100) 2.096 -> 2.098 ( +0.10%) [ +0.19% +0.05% +0.00% / +0.10% +0.19% +0.29%] index_add_ perm : Elapsed 0.021 ms (2.100 ms / 100) 2.062 -> 2.056 ( -0.29%) [ +0.29% +0.10% +0.00% / +0.05% -0.29% -0.05%] index_copy_ perm : Elapsed 0.021 ms (2.068 ms / 100) 2.098 -> 2.096 ( -0.10%) [ +0.19% +0.19% +0.00% / +0.14% +0.29% -0.10%] index_add_ perm_sorted : Elapsed 0.021 ms (2.102 ms / 100) 2.063 -> 2.060 ( -0.15%) [ +0.05% +0.00% +0.15% / +0.00% +0.19% -0.15%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.064 ms / 100) 9.152 -> 9.169 ( +0.19%) [ +0.14% +0.16% +0.00% / +0.19% +0.33% +0.36%] index_select const : Elapsed 0.092 ms (9.165 ms / 100) 9.217 -> 9.224 ( +0.08%) [ +0.05% +0.17% +0.00% / +0.08% +0.09% +0.20%] index_select wrap : Elapsed 0.092 ms (9.222 ms / 100) 9.188 -> 9.198 ( +0.11%) [ +0.26% +0.07% +0.00% / +0.21% +0.11% +0.25%] index_select linear : Elapsed 0.092 ms (9.212 ms / 100) 9.204 -> 9.204 ( +0.00%) [ +0.02% +0.00% +0.10% / +0.22% +0.00% +0.04%] index_select reverse : Elapsed 0.092 ms (9.206 ms / 100) 9.148 -> 9.141 ( -0.08%) [ +0.05% +0.21% +0.00% / -0.08% +0.36% +0.33%] index_select skip64 : Elapsed 0.092 ms (9.153 ms / 100) 9.147 -> 9.158 ( +0.12%) [ +0.00% +0.24% +0.10% / +0.12% +0.23% +0.27%] index_select skip256 : Elapsed 0.091 ms (9.147 ms / 100) 9.209 -> 9.206 ( -0.03%) [ +0.11% +0.09% +0.00% / -0.03% +0.02% +0.23%] index_select spread : Elapsed 0.092 ms (9.219 ms / 100) 9.227 -> 9.219 ( -0.09%) [ +0.00% +0.05% +0.14% / -0.09% +0.05% +0.04%] index_select strided 3 : Elapsed 0.092 ms (9.227 ms / 100) 9.222 -> 9.225 ( +0.03%) [ +0.21% +0.07% +0.00% / +0.13% +0.03% +0.13%] index_select random : Elapsed 0.092 ms (9.241 ms / 100) 9.196 -> 9.202 ( +0.07%) [ +0.00% +0.12% +0.18% / +0.07% +0.18% +0.39%] index_select random_sorted : Elapsed 0.092 ms (9.196 ms / 100) out_shape = [16, 5, 4, 20] in_shape = [16, 5, 4, 40] idx_dim = 3 B = [16, 5, 4, 20] (stride (400, 80, 20, 1)) A = [16, 5, 4, 40] (stride (800, 40, 200, 1)) dim = 3 2.343 -> 2.344 ( +0.04%) [ +0.34% +0.09% +0.00% / +0.04% +0.43% +0.21%] index_select const : Elapsed 0.024 ms (2.351 ms / 100) 2.351 -> 2.351 ( +0.00%) [ +0.26% +0.04% +0.00% / +0.09% +0.17% +0.00%] index_select wrap : Elapsed 0.024 ms (2.357 ms / 100) 2.354 -> 2.351 ( -0.13%) [ +0.38% +0.00% +0.04% / -0.13% -0.04% +0.08%] index_select linear : Elapsed 0.024 ms (2.363 ms / 100) 2.352 -> 2.353 ( +0.04%) [ +0.09% +0.04% +0.00% / +0.21% +0.04% +0.26%] index_select reverse : Elapsed 0.024 ms (2.354 ms / 100) 2.349 -> 2.347 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.17% +0.09%] index_select skip64 : Elapsed 0.023 ms (2.349 ms / 100) 2.346 -> 2.345 ( -0.04%) [ +0.17% +0.00% +0.04% / -0.04% +0.17% +0.47%] index_select skip256 : Elapsed 0.024 ms (2.350 ms / 100) 2.356 -> 2.358 ( +0.08%) [ +0.30% +0.17% +0.00% / +0.13% +0.21% +0.08%] index_select spread : Elapsed 0.024 ms (2.363 ms / 100) 2.357 -> 2.356 ( -0.04%) [ +0.00% +0.25% +0.04% / +0.17% -0.04% +0.13%] index_select strided 3 : Elapsed 0.024 ms (2.357 ms / 100) 2.361 -> 2.360 ( -0.04%) [ +0.25% +0.00% +0.04% / -0.04% +0.04% +0.04%] index_select strided 5 : Elapsed 0.024 ms (2.367 ms / 100) 2.357 -> 2.357 ( +0.00%) [ +0.13% +0.08% +0.00% / +0.00% +0.13% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.360 ms / 100) 2.358 -> 2.356 ( -0.08%) [ +0.08% +0.25% +0.00% / -0.08% +0.21% +0.13%] index_select strided 8 : Elapsed 0.024 ms (2.360 ms / 100) 2.357 -> 2.360 ( +0.13%) [ +0.13% +0.00% +0.21% / +0.13% +0.25% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.360 ms / 100) 2.359 -> 2.361 ( +0.08%) [ +0.00% +0.08% +0.17% / +0.13% +0.13% +0.08%] index_select random : Elapsed 0.024 ms (2.359 ms / 100) 2.360 -> 2.358 ( -0.08%) [ +0.08% +0.00% +0.08% / +0.08% -0.04% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.362 ms / 100) 2.359 -> 2.359 ( +0.00%) [ +0.17% +0.04% +0.00% / +0.17% +0.00% +0.04%] index_select perm : Elapsed 0.024 ms (2.363 ms / 100) 2.361 -> 2.356 ( -0.21%) [ +0.00% +0.04% +0.04% / +0.00% +0.00% -0.21%] index_select perm_sorted : Elapsed 0.024 ms (2.361 ms / 100) B = [16, 5, 4, 20] (stride (400, 1, 100, 5)) A = [16, 5, 4, 40] (stride (20, 1, 5, 320)) dim = 3 2.445 -> 2.443 ( -0.08%) [ +0.12% +0.00% +0.04% / -0.08% +0.16% +0.25%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.457 -> 2.452 ( -0.20%) [ +0.00% +0.24% +0.00% / +0.20% -0.20% -0.20%] index_select wrap : Elapsed 0.025 ms (2.457 ms / 100) 2.456 -> 2.452 ( -0.16%) [ +0.08% +0.08% +0.00% / +0.08% +0.04% -0.16%] index_select linear : Elapsed 0.025 ms (2.458 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.16% +0.00% +0.04% / +0.00% +0.20% +0.12%] index_select reverse : Elapsed 0.025 ms (2.459 ms / 100) 2.446 -> 2.446 ( +0.00%) [ +0.29% +0.20% +0.00% / +0.12% +0.04% +0.00%] index_select skip64 : Elapsed 0.025 ms (2.453 ms / 100) 2.444 -> 2.445 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.04% +0.37% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.444 ms / 100) 2.454 -> 2.456 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.24% +0.08% +0.16%] index_select spread : Elapsed 0.025 ms (2.457 ms / 100) 2.453 -> 2.454 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.24% +0.29%] index_select strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.447 -> 2.448 ( +0.04%) [ +0.08% +0.00% +0.25% / +0.04% +0.25% +0.41%] index_select strided 5 : Elapsed 0.024 ms (2.449 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.16% +0.08% +0.00% / +0.12% +0.04% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.458 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.04% +0.08%] index_select strided 8 : Elapsed 0.024 ms (2.448 ms / 100) 2.447 -> 2.446 ( -0.04%) [ +0.00% +0.20% +0.08% / -0.04% +0.08% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.447 ms / 100) 2.453 -> 2.451 ( -0.08%) [ +0.20% +0.08% +0.00% / -0.08% +0.08% +0.00%] index_select random : Elapsed 0.025 ms (2.458 ms / 100) 2.453 -> 2.453 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.04% +0.04% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.04% +0.12% +0.00% / -0.04% -0.04% +0.00%] index_select perm : Elapsed 0.025 ms (2.456 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.04% +0.08%] index_select perm_sorted : Elapsed 0.025 ms (2.454 ms / 100) B = [16, 5, 4, 20] (stride (400, 4, 1, 20)) A = [16, 5, 4, 40] (stride (40, 2560, 640, 1)) dim = 3 1.460 -> 1.464 ( +0.27%) [ +0.62% +0.41% +0.00% / +0.27% +0.62% +0.75%] index_select const : Elapsed 0.015 ms (1.469 ms / 100) 1.473 -> 1.472 ( -0.07%) [ +0.14% +0.00% +0.07% / -0.07% +0.54% +0.54%] index_select wrap : Elapsed 0.015 ms (1.475 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.27% +0.81%] index_select linear : Elapsed 0.015 ms (1.477 ms / 100) 1.473 -> 1.476 ( +0.20%) [ +0.00% +0.48% +0.34% / +0.20% +0.81% +0.61%] index_select reverse : Elapsed 0.015 ms (1.473 ms / 100) 1.462 -> 1.462 ( +0.00%) [ +0.00% +0.14% +0.07% / +0.00% +0.55% +0.21%] index_select skip64 : Elapsed 0.015 ms (1.462 ms / 100) 1.462 -> 1.460 ( -0.14%) [ +0.00% +0.00% +0.27% / -0.14% +0.62% +0.34%] index_select skip256 : Elapsed 0.015 ms (1.462 ms / 100) 1.482 -> 1.489 ( +0.47%) [ +0.00% +0.20% +0.34% / +0.47% +0.74% +0.74%] index_select spread : Elapsed 0.015 ms (1.482 ms / 100) 1.487 -> 1.487 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.07% +0.40% +0.00%] index_select strided 3 : Elapsed 0.015 ms (1.488 ms / 100) 1.486 -> 1.490 ( +0.27%) [ +0.00% +0.13% +0.13% / +0.27% +0.54% +0.40%] index_select strided 5 : Elapsed 0.015 ms (1.486 ms / 100) 1.483 -> 1.482 ( -0.07%) [ +0.00% +0.27% +0.20% / -0.07% +0.40% +0.67%] index_select strided 7 : Elapsed 0.015 ms (1.483 ms / 100) 1.481 -> 1.484 ( +0.20%) [ +0.41% +0.00% +0.20% / +0.20% +0.47% +0.47%] index_select strided 8 : Elapsed 0.015 ms (1.487 ms / 100) 1.481 -> 1.484 ( +0.20%) [ +0.14% +0.27% +0.00% / +0.20% +0.74% +0.74%] index_select strided 16 : Elapsed 0.015 ms (1.483 ms / 100) 1.487 -> 1.492 ( +0.34%) [ +0.00% +0.20% +0.07% / +0.34% +0.40% +0.61%] index_select random : Elapsed 0.015 ms (1.487 ms / 100) 1.484 -> 1.483 ( -0.07%) [ +0.00% +0.20% +0.20% / -0.07% +0.94% +0.54%] index_select random_sorted : Elapsed 0.015 ms (1.484 ms / 100) 1.484 -> 1.491 ( +0.47%) [ +0.20% +0.20% +0.00% / +0.47% +0.67% +0.54%] index_select perm : Elapsed 0.015 ms (1.487 ms / 100) 1.483 -> 1.486 ( +0.20%) [ +0.07% +0.00% +0.27% / +0.20% +0.54% +0.74%] index_select perm_sorted : Elapsed 0.015 ms (1.484 ms / 100) B = [16, 5, 4, 20] (stride (400, 1, 5, 20)) A = [16, 5, 4, 40] (stride (1, 2560, 640, 16)) dim = 3 2.406 -> 2.412 ( +0.25%) [ +0.00% +0.25% +0.17% / +0.25% +0.37% +0.25%] index_select const : Elapsed 0.024 ms (2.406 ms / 100) 2.423 -> 2.419 ( -0.17%) [ +0.12% +0.00% +0.12% / +0.29% -0.17% -0.12%] index_select wrap : Elapsed 0.024 ms (2.426 ms / 100) 2.424 -> 2.419 ( -0.21%) [ +0.08% +0.00% +0.00% / +0.04% -0.21% -0.17%] index_select linear : Elapsed 0.024 ms (2.426 ms / 100) 2.423 -> 2.422 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.00% +0.04%] index_select reverse : Elapsed 0.024 ms (2.423 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.29% +0.00% +0.08% / +0.12% +0.08% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.418 ms / 100) 2.409 -> 2.411 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.37% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.411 ms / 100) 2.421 -> 2.426 ( +0.21%) [ +0.21% +0.00% +0.04% / +0.25% +0.29% +0.21%] index_select spread : Elapsed 0.024 ms (2.426 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.08% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.424 ms / 100) 2.415 -> 2.416 ( +0.04%) [ +0.17% +0.17% +0.00% / +0.04% +0.04% +0.04%] index_select strided 5 : Elapsed 0.024 ms (2.419 ms / 100) 2.422 -> 2.426 ( +0.17%) [ +0.25% +0.00% +0.08% / +0.21% +0.17% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.413 -> 2.413 ( +0.00%) [ +0.12% +0.21% +0.00% / +0.00% +0.04% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.416 ms / 100) 2.415 -> 2.415 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.04% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.418 ms / 100) 2.421 -> 2.419 ( -0.08%) [ +0.12% +0.17% +0.00% / -0.08% +0.33% +0.25%] index_select random : Elapsed 0.024 ms (2.424 ms / 100) 2.421 -> 2.424 ( +0.12%) [ +0.25% +0.12% +0.00% / +0.17% +0.12% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.427 ms / 100) 2.426 -> 2.425 ( -0.04%) [ +0.08% +0.00% +0.00% / -0.04% +0.08% +0.08%] index_select perm : Elapsed 0.024 ms (2.428 ms / 100) 2.424 -> 2.424 ( +0.00%) [ +0.00% +0.08% +0.12% / +0.00% +0.04% +0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) B = [16, 5, 4, 20] (stride (400, 1, 5, 20)) A = [16, 5, 4, 40] (stride (1, 2560, 16, 64)) dim = 3 2.445 -> 2.448 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.41% +0.37%] index_select const : Elapsed 0.024 ms (2.445 ms / 100) 2.456 -> 2.458 ( +0.08%) [ +0.00% +0.24% +0.20% / +0.12% +0.20% +0.08%] index_select wrap : Elapsed 0.025 ms (2.456 ms / 100) 2.455 -> 2.458 ( +0.12%) [ +0.04% +0.16% +0.00% / +0.33% +0.20% +0.12%] index_select linear : Elapsed 0.025 ms (2.456 ms / 100) 2.456 -> 2.455 ( -0.04%) [ +0.16% +0.08% +0.00% / +0.08% -0.04% +0.08%] index_select reverse : Elapsed 0.025 ms (2.460 ms / 100) 2.445 -> 2.450 ( +0.20%) [ +0.00% +0.25% +0.25% / +0.29% +0.25% +0.20%] index_select skip64 : Elapsed 0.024 ms (2.445 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.04% +0.24%] index_select skip256 : Elapsed 0.024 ms (2.450 ms / 100) 2.456 -> 2.459 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +0.20% +0.20%] index_select spread : Elapsed 0.025 ms (2.458 ms / 100) 2.459 -> 2.456 ( -0.12%) [ +0.00% +0.04% +0.04% / -0.12% +0.12% +0.04%] index_select strided 3 : Elapsed 0.025 ms (2.459 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.16% +0.20% +0.00% / +0.12% +0.24% +0.12%] index_select strided 5 : Elapsed 0.025 ms (2.455 ms / 100) 2.452 -> 2.458 ( +0.24%) [ +0.08% +0.00% +0.08% / +0.24% +0.33% +0.33%] index_select strided 7 : Elapsed 0.025 ms (2.454 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.00% +0.29% +0.25%] index_select strided 8 : Elapsed 0.025 ms (2.452 ms / 100) 2.449 -> 2.453 ( +0.16%) [ +0.20% +0.12% +0.00% / +0.16% +0.20% +0.29%] index_select strided 16 : Elapsed 0.025 ms (2.454 ms / 100) 2.453 -> 2.457 ( +0.16%) [ +0.00% +0.08% +0.08% / +0.16% +0.20% +0.29%] index_select random : Elapsed 0.025 ms (2.453 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.04% +0.12% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.458 -> 2.456 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% -0.04% +0.04%] index_select perm : Elapsed 0.025 ms (2.460 ms / 100) 2.457 -> 2.450 ( -0.28%) [ +0.16% +0.00% +0.16% / +0.20% -0.28% -0.28%] index_select perm_sorted : Elapsed 0.025 ms (2.461 ms / 100) B = [16, 5, 4, 20] (stride (400, 1, 5, 20)) A = [16, 5, 4, 40] (stride (4, 64, 1, 320)) dim = 3 2.448 -> 2.448 ( +0.00%) [ +0.04% +0.12% +0.00% / +0.04% +0.00% +0.12%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.459 -> 2.454 ( -0.20%) [ +0.04% +0.00% +0.00% / -0.04% -0.20% -0.12%] index_select wrap : Elapsed 0.025 ms (2.460 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.12% +0.00% +0.20% / +0.20% +0.08% +0.04%] index_select linear : Elapsed 0.025 ms (2.458 ms / 100) 2.456 -> 2.454 ( -0.08%) [ +0.08% +0.08% +0.00% / +0.12% -0.08% +0.04%] index_select reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.29% +0.29% +0.00% / +0.16% +0.12% +0.08%] index_select skip64 : Elapsed 0.025 ms (2.453 ms / 100) 2.444 -> 2.445 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.04% +0.33% +0.20%] index_select skip256 : Elapsed 0.024 ms (2.445 ms / 100) 2.453 -> 2.456 ( +0.12%) [ +0.20% +0.08% +0.00% / +0.12% +0.29% +0.29%] index_select spread : Elapsed 0.025 ms (2.458 ms / 100) 2.454 -> 2.456 ( +0.08%) [ +0.00% +0.20% +0.04% / +0.08% +0.33% +0.41%] index_select strided 3 : Elapsed 0.025 ms (2.454 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.16% +0.20%] index_select strided 5 : Elapsed 0.024 ms (2.450 ms / 100) 2.456 -> 2.457 ( +0.04%) [ +0.04% +0.00% +0.12% / +0.04% +0.12% +0.04%] index_select strided 7 : Elapsed 0.025 ms (2.457 ms / 100) 2.447 -> 2.449 ( +0.08%) [ +0.25% +0.20% +0.00% / +0.25% +0.12% +0.08%] index_select strided 8 : Elapsed 0.025 ms (2.453 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.29% +0.20%] index_select strided 16 : Elapsed 0.025 ms (2.451 ms / 100) 2.458 -> 2.456 ( -0.08%) [ +0.00% +0.04% +0.08% / +0.00% +0.00% -0.08%] index_select random : Elapsed 0.025 ms (2.458 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.00% +0.20% +0.16% / +0.24% -0.04% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.458 -> 2.456 ( -0.08%) [ +0.12% +0.00% +0.00% / +0.28% -0.08% +0.00%] index_select perm : Elapsed 0.025 ms (2.461 ms / 100) 2.457 -> 2.454 ( -0.12%) [ +0.00% +0.08% +0.08% / -0.04% -0.12% -0.04%] index_select perm_sorted : Elapsed 0.025 ms (2.457 ms / 100) B = [16, 5, 4, 20] (stride (80, 1280, 20, 1)) A = [16, 5, 4, 40] (stride (1, 2560, 16, 64)) dim = 3 2.441 -> 2.443 ( +0.08%) [ +0.29% +0.00% +0.33% / +0.08% +0.41% +0.37%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.20% +0.20% +0.00% / +0.24% +0.24% +0.08%] index_select wrap : Elapsed 0.025 ms (2.457 ms / 100) 2.457 -> 2.455 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% -0.04% +0.12%] index_select linear : Elapsed 0.025 ms (2.458 ms / 100) 2.456 -> 2.451 ( -0.20%) [ +0.08% +0.16% +0.00% / +0.08% -0.20% -0.08%] index_select reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.00% +0.16% +0.00% / -0.08% +0.12% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.447 ms / 100) 2.445 -> 2.444 ( -0.04%) [ +0.12% +0.04% +0.00% / -0.04% +0.20% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.448 ms / 100) 2.452 -> 2.452 ( +0.00%) [ +0.16% +0.20% +0.00% / +0.00% +0.16% +0.24%] index_select spread : Elapsed 0.025 ms (2.456 ms / 100) 2.453 -> 2.452 ( -0.04%) [ +0.08% +0.08% +0.00% / +0.04% +0.00% -0.04%] index_select strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.00% +0.29% +0.04% / +0.12% +0.20% +0.04%] index_select strided 5 : Elapsed 0.025 ms (2.450 ms / 100) 2.452 -> 2.455 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.33% +0.24%] index_select strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.446 -> 2.447 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.16% +0.04% +0.20%] index_select strided 8 : Elapsed 0.024 ms (2.447 ms / 100) 2.446 -> 2.448 ( +0.08%) [ +0.25% +0.00% +0.16% / +0.08% +0.25% +0.20%] index_select strided 16 : Elapsed 0.025 ms (2.452 ms / 100) 2.453 -> 2.450 ( -0.12%) [ +0.04% +0.00% +0.00% / +0.04% +0.12% -0.12%] index_select random : Elapsed 0.025 ms (2.454 ms / 100) 2.453 -> 2.452 ( -0.04%) [ +0.00% +0.04% +0.16% / +0.04% +0.04% -0.04%] index_select random_sorted : Elapsed 0.025 ms (2.453 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.04% -0.04%] index_select perm : Elapsed 0.025 ms (2.455 ms / 100) 2.456 -> 2.448 ( -0.33%) [ +0.12% +0.00% +0.12% / +0.04% -0.33% -0.29%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) B = [16, 5, 4, 20] (stride (80, 1280, 1, 4)) A = [16, 5, 4, 40] (stride (200, 1, 3200, 5)) dim = 3 2.449 -> 2.446 ( -0.12%) [ +0.00% +0.00% +0.08% / -0.12% +0.16% +0.16%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.463 -> 2.457 ( -0.24%) [ +0.08% +0.00% +0.08% / +0.08% -0.24% -0.12%] index_select wrap : Elapsed 0.025 ms (2.465 ms / 100) 2.466 -> 2.457 ( -0.36%) [ +0.00% +0.12% +0.08% / +0.00% -0.32% -0.36%] index_select linear : Elapsed 0.025 ms (2.466 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.16% +0.08% +0.00%] index_select reverse : Elapsed 0.025 ms (2.460 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.12% +0.12% +0.16%] index_select skip64 : Elapsed 0.024 ms (2.448 ms / 100) 2.449 -> 2.445 ( -0.16%) [ +0.12% +0.04% +0.00% / -0.16% +0.16% +0.24%] index_select skip256 : Elapsed 0.025 ms (2.452 ms / 100) 2.472 -> 2.474 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.08% +0.16% +0.44%] index_select spread : Elapsed 0.025 ms (2.473 ms / 100) 2.468 -> 2.467 ( -0.04%) [ +0.00% +0.08% +0.00% / -0.04% +0.16% +0.12%] index_select strided 3 : Elapsed 0.025 ms (2.468 ms / 100) 2.457 -> 2.460 ( +0.12%) [ +0.00% +0.04% +0.00% / +0.12% +0.24% +0.24%] index_select strided 5 : Elapsed 0.025 ms (2.457 ms / 100) 2.468 -> 2.471 ( +0.12%) [ +0.00% +0.16% +0.16% / +0.20% +0.24% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.468 ms / 100) 2.455 -> 2.457 ( +0.08%) [ +0.16% +0.12% +0.00% / +0.08% +0.16% +0.16%] index_select strided 8 : Elapsed 0.025 ms (2.459 ms / 100) 2.455 -> 2.453 ( -0.08%) [ +0.00% +0.16% +0.00% / -0.08% +0.29% +0.24%] index_select strided 16 : Elapsed 0.025 ms (2.455 ms / 100) 2.468 -> 2.467 ( -0.04%) [ +0.16% +0.16% +0.00% / -0.04% +0.08% +0.20%] index_select random : Elapsed 0.025 ms (2.472 ms / 100) 2.466 -> 2.466 ( +0.00%) [ +0.20% +0.41% +0.00% / +0.12% +0.00% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.471 ms / 100) 2.468 -> 2.466 ( -0.08%) [ +0.00% +0.08% +0.12% / -0.08% +0.00% +0.00%] index_select perm : Elapsed 0.025 ms (2.468 ms / 100) 2.465 -> 2.469 ( +0.16%) [ +0.16% +0.20% +0.00% / +0.24% +0.45% +0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.469 ms / 100) B = [16, 5, 4, 20] (stride (20, 1280, 320, 1)) A = [16, 5, 4, 40] (stride (5, 1, 3200, 80)) dim = 3 1.460 -> 1.451 ( -0.62%) [ +0.00% +0.07% +0.14% / -0.62% -0.62% -0.48%] index_select const : Elapsed 0.015 ms (1.460 ms / 100) 1.486 -> 1.475 ( -0.74%) [ +0.13% +0.34% +0.00% / -0.54% -0.54% -0.74%] index_select wrap : Elapsed 0.015 ms (1.488 ms / 100) 1.486 -> 1.475 ( -0.74%) [ +0.13% +0.00% +0.00% / -0.47% -0.74% -0.54%] index_select linear : Elapsed 0.015 ms (1.488 ms / 100) 1.484 -> 1.477 ( -0.47%) [ +0.13% +0.00% +0.07% / -0.47% -0.47% -0.40%] index_select reverse : Elapsed 0.015 ms (1.486 ms / 100) 1.456 -> 1.448 ( -0.55%) [ +0.00% +0.34% +0.07% / -0.55% +0.14% -0.27%] index_select skip64 : Elapsed 0.015 ms (1.456 ms / 100) 1.459 -> 1.451 ( -0.55%) [ +0.07% +0.21% +0.00% / -0.55% -0.41% -0.21%] index_select skip256 : Elapsed 0.015 ms (1.460 ms / 100) 1.485 -> 1.475 ( -0.67%) [ +0.00% +0.13% +0.00% / -0.67% -0.27% -0.20%] index_select spread : Elapsed 0.015 ms (1.485 ms / 100) 1.483 -> 1.475 ( -0.54%) [ +0.00% +0.27% +0.13% / -0.54% -0.47% -0.27%] index_select strided 3 : Elapsed 0.015 ms (1.483 ms / 100) 1.471 -> 1.464 ( -0.48%) [ +0.14% +0.07% +0.00% / -0.48% -0.34% -0.34%] index_select strided 5 : Elapsed 0.015 ms (1.473 ms / 100) 1.484 -> 1.477 ( -0.47%) [ +0.20% +0.00% +0.07% / -0.40% -0.34% -0.47%] index_select strided 7 : Elapsed 0.015 ms (1.487 ms / 100) 1.465 -> 1.459 ( -0.41%) [ +0.20% +0.07% +0.00% / -0.41% -0.07% +0.14%] index_select strided 8 : Elapsed 0.015 ms (1.468 ms / 100) 1.466 -> 1.456 ( -0.68%) [ +0.00% +0.07% +0.20% / -0.68% -0.20% +0.00%] index_select strided 16 : Elapsed 0.015 ms (1.466 ms / 100) 1.479 -> 1.468 ( -0.74%) [ +0.00% +0.34% +0.14% / -0.74% +0.00% -0.27%] index_select random : Elapsed 0.015 ms (1.479 ms / 100) 1.478 -> 1.470 ( -0.54%) [ +0.00% +0.20% +0.07% / -0.54% -0.27% -0.27%] index_select random_sorted : Elapsed 0.015 ms (1.478 ms / 100) 1.485 -> 1.476 ( -0.61%) [ +0.00% +0.00% +0.07% / -0.61% -0.47% -0.61%] index_select perm : Elapsed 0.015 ms (1.485 ms / 100) 1.485 -> 1.475 ( -0.67%) [ +0.00% +0.20% +0.13% / -0.34% -0.61% -0.67%] index_select perm_sorted : Elapsed 0.015 ms (1.485 ms / 100) B = [16, 5, 4, 20] (stride (1, 1280, 16, 64)) A = [16, 5, 4, 40] (stride (40, 2560, 640, 1)) dim = 3 2.413 -> 2.413 ( +0.00%) [ +0.17% +0.04% +0.00% / +0.00% +0.17% +0.12%] index_select const : Elapsed 0.024 ms (2.417 ms / 100) 2.423 -> 2.417 ( -0.25%) [ +0.17% +0.04% +0.00% / +0.12% -0.25% -0.12%] index_select wrap : Elapsed 0.024 ms (2.427 ms / 100) 2.421 -> 2.417 ( -0.17%) [ +0.04% +0.00% +0.00% / +0.25% -0.17% -0.04%] index_select linear : Elapsed 0.024 ms (2.422 ms / 100) 2.422 -> 2.421 ( -0.04%) [ +0.08% +0.21% +0.00% / +0.17% +0.25% -0.04%] index_select reverse : Elapsed 0.024 ms (2.424 ms / 100) 2.413 -> 2.418 ( +0.21%) [ +0.12% +0.00% +0.21% / +0.21% +0.33% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.416 ms / 100) 2.411 -> 2.418 ( +0.29%) [ +0.04% +0.00% +0.12% / +0.37% +0.29% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.412 ms / 100) 2.429 -> 2.426 ( -0.12%) [ +0.04% +0.00% +0.04% / -0.12% +0.25% +0.33%] index_select spread : Elapsed 0.024 ms (2.430 ms / 100) 2.426 -> 2.426 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.41% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.426 ms / 100) 2.425 -> 2.430 ( +0.21%) [ +0.21% +0.25% +0.00% / +0.25% +0.21% +0.41%] index_select strided 5 : Elapsed 0.024 ms (2.430 ms / 100) 2.427 -> 2.429 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.33% +0.45%] index_select strided 7 : Elapsed 0.024 ms (2.429 ms / 100) 2.427 -> 2.428 ( +0.04%) [ +0.16% +0.21% +0.00% / +0.04% +0.21% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.431 ms / 100) 2.426 -> 2.427 ( +0.04%) [ +0.12% +0.00% +0.04% / +0.04% +0.33% +0.29%] index_select strided 16 : Elapsed 0.024 ms (2.429 ms / 100) 2.429 -> 2.429 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.41%] index_select random : Elapsed 0.024 ms (2.429 ms / 100) 2.427 -> 2.429 ( +0.08%) [ +0.00% +0.21% +0.25% / +0.08% +0.29% +0.25%] index_select random_sorted : Elapsed 0.024 ms (2.427 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.00% +0.25% +0.29% / +0.12% +0.29% +0.29%] index_select perm : Elapsed 0.024 ms (2.426 ms / 100) 2.427 -> 2.431 ( +0.16%) [ +0.25% +0.00% +0.25% / +0.16% +0.21% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.433 ms / 100) B = [16, 5, 4, 20] (stride (5, 1, 1600, 80)) A = [16, 5, 4, 40] (stride (1, 640, 3200, 16)) dim = 3 2.396 -> 2.399 ( +0.13%) [ +0.08% +0.00% +0.04% / +0.21% +0.13% +0.50%] index_select const : Elapsed 0.024 ms (2.398 ms / 100) 2.416 -> 2.415 ( -0.04%) [ +0.00% +0.08% +0.17% / +0.00% -0.04% +0.04%] index_select wrap : Elapsed 0.024 ms (2.416 ms / 100) 2.417 -> 2.417 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.12% +0.00% +0.00%] index_select linear : Elapsed 0.024 ms (2.419 ms / 100) 2.419 -> 2.414 ( -0.21%) [ +0.08% +0.00% +0.00% / -0.04% -0.17% -0.21%] index_select reverse : Elapsed 0.024 ms (2.421 ms / 100) 2.402 -> 2.401 ( -0.04%) [ +0.00% +0.04% +0.12% / -0.04% +0.08% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.402 ms / 100) 2.398 -> 2.398 ( +0.00%) [ +0.00% +0.17% +0.08% / +0.00% +0.25% +0.17%] index_select skip256 : Elapsed 0.024 ms (2.398 ms / 100) 2.415 -> 2.414 ( -0.04%) [ +0.00% +0.12% +0.04% / +0.12% -0.04% +0.12%] index_select spread : Elapsed 0.024 ms (2.415 ms / 100) 2.417 -> 2.416 ( -0.04%) [ +0.00% +0.04% +0.08% / +0.00% +0.08% -0.04%] index_select strided 3 : Elapsed 0.024 ms (2.417 ms / 100) 2.407 -> 2.406 ( -0.04%) [ +0.17% +0.12% +0.00% / +0.12% -0.04% -0.04%] index_select strided 5 : Elapsed 0.024 ms (2.411 ms / 100) 2.415 -> 2.417 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.12% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.415 ms / 100) 2.400 -> 2.402 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.21% +0.29% +0.08%] index_select strided 8 : Elapsed 0.024 ms (2.402 ms / 100) 2.402 -> 2.405 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.17% +0.12% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.404 ms / 100) 2.411 -> 2.414 ( +0.12%) [ +0.04% +0.17% +0.00% / +0.25% +0.12% +0.21%] index_select random : Elapsed 0.024 ms (2.412 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.04% +0.08% +0.00% / +0.12% -0.04% +0.04%] index_select random_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.414 -> 2.413 ( -0.04%) [ +0.00% +0.12% +0.29% / +0.17% -0.04% +0.17%] index_select perm : Elapsed 0.024 ms (2.414 ms / 100) 2.419 -> 2.410 ( -0.37%) [ +0.12% +0.00% +0.00% / +0.08% -0.37% -0.21%] index_select perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) B = [16, 5, 4, 20] (stride (5, 1, 1600, 80)) A = [16, 5, 4, 40] (stride (5, 1, 80, 320)) dim = 3 1.444 -> 1.448 ( +0.28%) [ +0.48% +0.35% +0.00% / +0.55% +0.35% +0.28%] index_select const : Elapsed 0.015 ms (1.451 ms / 100) 1.462 -> 1.461 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.07% +0.34%] index_select wrap : Elapsed 0.015 ms (1.462 ms / 100) 1.463 -> 1.468 ( +0.34%) [ +0.07% +0.00% +0.14% / +0.34% +0.41% +0.34%] index_select linear : Elapsed 0.015 ms (1.464 ms / 100) 1.463 -> 1.465 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.14% +0.27%] index_select reverse : Elapsed 0.015 ms (1.465 ms / 100) 1.449 -> 1.451 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.14% +0.14%] index_select skip64 : Elapsed 0.015 ms (1.450 ms / 100) 1.449 -> 1.448 ( -0.07%) [ +0.00% +0.14% +0.41% / -0.07% +0.00% +0.07%] index_select skip256 : Elapsed 0.014 ms (1.449 ms / 100) 1.466 -> 1.468 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.14% +0.20%] index_select spread : Elapsed 0.015 ms (1.467 ms / 100) 1.468 -> 1.472 ( +0.27%) [ +0.14% +0.34% +0.00% / +0.27% +0.27% +0.61%] index_select strided 3 : Elapsed 0.015 ms (1.470 ms / 100) 1.455 -> 1.457 ( +0.14%) [ +0.21% +0.00% +0.21% / +0.14% +1.17% +1.24%] index_select strided 5 : Elapsed 0.015 ms (1.458 ms / 100) 1.466 -> 1.470 ( +0.27%) [ +0.14% +0.00% +0.14% / +0.27% +0.34% +0.41%] index_select strided 7 : Elapsed 0.015 ms (1.468 ms / 100) 1.452 -> 1.453 ( +0.07%) [ +0.07% +0.21% +0.00% / +0.07% +0.34% +0.48%] index_select strided 8 : Elapsed 0.015 ms (1.453 ms / 100) 1.451 -> 1.453 ( +0.14%) [ +0.21% +0.14% +0.00% / +0.14% +0.34% +0.55%] index_select strided 16 : Elapsed 0.015 ms (1.454 ms / 100) 1.462 -> 1.464 ( +0.14%) [ +0.27% +0.07% +0.00% / +0.34% +0.34% +0.14%] index_select random : Elapsed 0.015 ms (1.466 ms / 100) 1.463 -> 1.461 ( -0.14%) [ +0.41% +0.00% +0.41% / +0.07% -0.14% -0.14%] index_select random_sorted : Elapsed 0.015 ms (1.469 ms / 100) 1.466 -> 1.471 ( +0.34%) [ +0.34% +0.00% +0.41% / +0.34% +0.61% +0.75%] index_select perm : Elapsed 0.015 ms (1.471 ms / 100) 1.471 -> 1.471 ( +0.00%) [ +0.20% +0.00% +0.34% / +0.34% +0.27% +0.00%] index_select perm_sorted : Elapsed 0.015 ms (1.474 ms / 100) B = [16, 5, 4, 20] (stride (20, 4, 1, 320)) A = [16, 5, 4, 40] (stride (5, 1, 3200, 80)) dim = 3 2.305 -> 2.309 ( +0.17%) [ +0.00% +0.13% +0.00% / +0.17% +0.43% +0.26%] index_select const : Elapsed 0.023 ms (2.305 ms / 100) 2.324 -> 2.326 ( +0.09%) [ +0.22% +0.09% +0.00% / +0.17% +0.17% +0.09%] index_select wrap : Elapsed 0.023 ms (2.329 ms / 100) 2.326 -> 2.325 ( -0.04%) [ +0.00% +0.09% +0.00% / +0.09% +0.00% -0.04%] index_select linear : Elapsed 0.023 ms (2.326 ms / 100) 2.325 -> 2.326 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.09% +0.04% +0.13%] index_select reverse : Elapsed 0.023 ms (2.326 ms / 100) 2.309 -> 2.307 ( -0.09%) [ +0.04% +0.09% +0.00% / -0.09% +0.04% +0.00%] index_select skip64 : Elapsed 0.023 ms (2.310 ms / 100) 2.304 -> 2.304 ( +0.00%) [ +0.26% +0.00% +0.00% / +0.00% +0.39% +0.35%] index_select skip256 : Elapsed 0.023 ms (2.310 ms / 100) 2.322 -> 2.326 ( +0.17%) [ +0.00% +0.13% +0.04% / +0.17% +0.30% +0.22%] index_select spread : Elapsed 0.023 ms (2.322 ms / 100) 2.323 -> 2.326 ( +0.13%) [ +0.26% +0.09% +0.00% / +0.13% +0.34% +0.22%] index_select strided 3 : Elapsed 0.023 ms (2.329 ms / 100) 2.312 -> 2.315 ( +0.13%) [ +0.00% +0.17% +0.13% / +0.13% +0.17% +0.22%] index_select strided 5 : Elapsed 0.023 ms (2.312 ms / 100) 2.323 -> 2.325 ( +0.09%) [ +0.13% +0.17% +0.00% / +0.13% +0.13% +0.09%] index_select strided 7 : Elapsed 0.023 ms (2.326 ms / 100) 2.308 -> 2.310 ( +0.09%) [ +0.04% +0.00% +0.00% / +0.09% +0.43% +0.39%] index_select strided 8 : Elapsed 0.023 ms (2.309 ms / 100) 2.305 -> 2.311 ( +0.26%) [ +0.35% +0.00% +0.13% / +0.26% +0.61% +0.56%] index_select strided 16 : Elapsed 0.023 ms (2.313 ms / 100) 2.323 -> 2.324 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.04% +0.13% +0.30%] index_select random : Elapsed 0.023 ms (2.324 ms / 100) 2.323 -> 2.322 ( -0.04%) [ +0.00% +0.04% +0.09% / +0.04% -0.04% +0.00%] index_select random_sorted : Elapsed 0.023 ms (2.323 ms / 100) 2.320 -> 2.324 ( +0.17%) [ +0.22% +0.00% +0.00% / +0.17% +0.34% +0.17%] index_select perm : Elapsed 0.023 ms (2.325 ms / 100) 2.323 -> 2.318 ( -0.22%) [ +0.17% +0.00% +0.04% / +0.13% -0.13% -0.22%] index_select perm_sorted : Elapsed 0.023 ms (2.327 ms / 100) out_shape = [20, 5, 40, 4] in_shape = [16, 5, 40, 4] idx_dim = 0 B = [20, 5, 40, 4] (stride (800, 160, 4, 1)) A = [16, 5, 40, 4] (stride (40, 640, 1, 3200)) dim = 0 3.336 -> 3.338 ( +0.06%) [ +0.00% +0.00% +0.03% / +0.06% +0.72% +0.69%] index_add_ linear : Elapsed 0.033 ms (3.336 ms / 100) 3.206 -> 3.206 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.72% +0.69%] index_copy_ linear : Elapsed 0.032 ms (3.206 ms / 100) 3.336 -> 3.339 ( +0.09%) [ +0.09% +0.00% +0.03% / +0.09% +0.69% +0.66%] index_add_ reverse : Elapsed 0.033 ms (3.339 ms / 100) 3.211 -> 3.211 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.72% +0.69%] index_copy_ reverse : Elapsed 0.032 ms (3.212 ms / 100) 3.338 -> 3.339 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.57% +0.57%] index_add_ spread : Elapsed 0.033 ms (3.339 ms / 100) 3.206 -> 3.207 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.75% +0.75%] index_copy_ spread : Elapsed 0.032 ms (3.207 ms / 100) 3.340 -> 3.340 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.81% +0.78%] index_add_ strided 3 : Elapsed 0.033 ms (3.340 ms / 100) 3.213 -> 3.214 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.75% +0.75%] index_copy_ strided 3 : Elapsed 0.032 ms (3.213 ms / 100) 3.344 -> 3.345 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.57% +0.63%] index_add_ strided 7 : Elapsed 0.033 ms (3.346 ms / 100) 3.217 -> 3.219 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.59% +0.50%] index_copy_ strided 7 : Elapsed 0.032 ms (3.219 ms / 100) 3.338 -> 3.339 ( +0.03%) [ +0.03% +0.00% +0.06% / +0.03% +0.57% +0.57%] index_add_ perm : Elapsed 0.033 ms (3.339 ms / 100) 3.212 -> 3.212 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.50% +0.50%] index_copy_ perm : Elapsed 0.032 ms (3.212 ms / 100) 3.335 -> 3.338 ( +0.09%) [ +0.06% +0.06% +0.00% / +0.09% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.033 ms (3.337 ms / 100) 3.211 -> 3.212 ( +0.03%) [ +0.00% +0.12% +0.00% / +0.03% +0.72% +0.69%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.211 ms / 100) 5.376 -> 5.383 ( +0.13%) [ +0.00% +0.19% +0.15% / +0.13% +0.15% +0.17%] index_select const : Elapsed 0.054 ms (5.376 ms / 100) 5.406 -> 5.405 ( -0.02%) [ +0.00% +0.13% +0.11% / -0.02% +0.11% +0.04%] index_select wrap : Elapsed 0.054 ms (5.406 ms / 100) 5.405 -> 5.405 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.02% +0.00% +0.17%] index_select linear : Elapsed 0.054 ms (5.412 ms / 100) 5.403 -> 5.405 ( +0.04%) [ +0.13% +0.00% +0.07% / +0.04% +0.04% +0.19%] index_select reverse : Elapsed 0.054 ms (5.410 ms / 100) 5.383 -> 5.386 ( +0.06%) [ +0.00% +0.02% +0.04% / +0.06% +0.15% +0.11%] index_select skip64 : Elapsed 0.054 ms (5.383 ms / 100) 5.380 -> 5.380 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.13% +0.00% +0.13%] index_select skip256 : Elapsed 0.054 ms (5.388 ms / 100) 5.407 -> 5.411 ( +0.07%) [ +0.04% +0.09% +0.00% / +0.11% +0.20% +0.07%] index_select spread : Elapsed 0.054 ms (5.409 ms / 100) 5.405 -> 5.407 ( +0.04%) [ +0.07% +0.00% +0.07% / +0.07% +0.04% +0.07%] index_select strided 3 : Elapsed 0.054 ms (5.409 ms / 100) 5.408 -> 5.407 ( -0.02%) [ +0.00% +0.17% +0.11% / -0.02% +0.02% +0.15%] index_select strided 5 : Elapsed 0.054 ms (5.408 ms / 100) 5.403 -> 5.406 ( +0.06%) [ +0.04% +0.15% +0.00% / +0.06% +0.06% +0.26%] index_select strided 7 : Elapsed 0.054 ms (5.405 ms / 100) 5.388 -> 5.381 ( -0.13%) [ +0.09% +0.00% +0.02% / +0.04% +0.04% -0.13%] index_select strided 8 : Elapsed 0.054 ms (5.393 ms / 100) 5.399 -> 5.400 ( +0.02%) [ +0.00% +0.13% +0.15% / +0.07% +0.09% +0.02%] index_select random : Elapsed 0.054 ms (5.399 ms / 100) 5.406 -> 5.405 ( -0.02%) [ +0.00% +0.07% +0.06% / -0.02% +0.13% +0.02%] index_select random_sorted : Elapsed 0.054 ms (5.406 ms / 100) B = [20, 5, 40, 4] (stride (800, 1, 20, 5)) A = [16, 5, 40, 4] (stride (5, 1, 80, 3200)) dim = 0 4.110 -> 4.119 ( +0.22%) [ +0.02% +0.07% +0.00% / +0.22% +0.68% +0.71%] index_add_ linear : Elapsed 0.041 ms (4.111 ms / 100) 3.936 -> 3.933 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.61% +0.61%] index_copy_ linear : Elapsed 0.039 ms (3.936 ms / 100) 4.101 -> 4.102 ( +0.02%) [ +0.00% +0.10% +0.07% / +0.02% +0.80% +0.66%] index_add_ reverse : Elapsed 0.041 ms (4.101 ms / 100) 3.927 -> 3.927 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.69% +0.79%] index_copy_ reverse : Elapsed 0.039 ms (3.927 ms / 100) 4.113 -> 4.112 ( -0.02%) [ +0.07% +0.10% +0.00% / -0.02% +0.58% +0.63%] index_add_ spread : Elapsed 0.041 ms (4.116 ms / 100) 3.929 -> 3.926 ( -0.08%) [ +0.10% +0.00% +0.05% / -0.08% +0.69% +0.76%] index_copy_ spread : Elapsed 0.039 ms (3.933 ms / 100) 4.110 -> 4.112 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.041 ms (4.111 ms / 100) 3.922 -> 3.925 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.84% +0.79%] index_copy_ strided 3 : Elapsed 0.039 ms (3.923 ms / 100) 4.103 -> 4.106 ( +0.07%) [ +0.00% +0.02% +0.10% / +0.07% +0.78% +0.61%] index_add_ strided 7 : Elapsed 0.041 ms (4.103 ms / 100) 3.926 -> 3.925 ( -0.03%) [ +0.03% +0.10% +0.00% / -0.03% +0.61% +0.71%] index_copy_ strided 7 : Elapsed 0.039 ms (3.927 ms / 100) 4.107 -> 4.107 ( +0.00%) [ +0.15% +0.00% +0.12% / +0.00% +0.80% +0.66%] index_add_ perm : Elapsed 0.041 ms (4.113 ms / 100) 3.931 -> 3.932 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.89% +0.86%] index_copy_ perm : Elapsed 0.039 ms (3.933 ms / 100) 4.107 -> 4.109 ( +0.05%) [ +0.27% +0.29% +0.00% / +0.05% +0.75% +0.80%] index_add_ perm_sorted : Elapsed 0.041 ms (4.118 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.00% +0.03% +0.05% / +0.08% +0.74% +0.66%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) 5.468 -> 5.473 ( +0.09%) [ +0.00% +0.20% +0.20% / +0.13% +0.18% +0.09%] index_select const : Elapsed 0.055 ms (5.468 ms / 100) 5.481 -> 5.479 ( -0.04%) [ +0.00% +0.24% +0.11% / -0.04% +0.05% +0.11%] index_select wrap : Elapsed 0.055 ms (5.481 ms / 100) 5.482 -> 5.476 ( -0.11%) [ +0.00% +0.13% +0.16% / -0.11% +0.05% -0.02%] index_select linear : Elapsed 0.055 ms (5.482 ms / 100) 5.486 -> 5.481 ( -0.09%) [ +0.02% +0.00% +0.00% / -0.09% +0.07% +0.13%] index_select reverse : Elapsed 0.055 ms (5.487 ms / 100) 5.468 -> 5.461 ( -0.13%) [ +0.00% +0.09% +0.16% / -0.13% +0.09% +0.26%] index_select skip64 : Elapsed 0.055 ms (5.468 ms / 100) 5.474 -> 5.471 ( -0.05%) [ +0.00% +0.05% +0.02% / +0.02% +0.04% -0.05%] index_select skip256 : Elapsed 0.055 ms (5.474 ms / 100) 5.486 -> 5.481 ( -0.09%) [ +0.02% +0.00% +0.02% / -0.09% +0.05% +0.00%] index_select spread : Elapsed 0.055 ms (5.487 ms / 100) 5.477 -> 5.486 ( +0.16%) [ +0.00% +0.16% +0.11% / +0.26% +0.16% +0.22%] index_select strided 3 : Elapsed 0.055 ms (5.477 ms / 100) 5.485 -> 5.487 ( +0.04%) [ +0.11% +0.09% +0.00% / +0.05% +0.13% +0.04%] index_select strided 5 : Elapsed 0.055 ms (5.491 ms / 100) 5.484 -> 5.482 ( -0.04%) [ +0.05% +0.09% +0.00% / -0.04% +0.15% +0.09%] index_select strided 7 : Elapsed 0.055 ms (5.487 ms / 100) 5.464 -> 5.470 ( +0.11%) [ +0.11% +0.00% +0.15% / +0.11% +0.27% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.470 ms / 100) 5.481 -> 5.481 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.05% +0.22%] index_select random : Elapsed 0.055 ms (5.487 ms / 100) 5.481 -> 5.478 ( -0.05%) [ +0.05% +0.13% +0.00% / +0.00% +0.13% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.484 ms / 100) B = [20, 5, 40, 4] (stride (20, 1, 400, 5)) A = [16, 5, 40, 4] (stride (40, 2560, 1, 640)) dim = 0 3.949 -> 3.950 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.76% +0.71%] index_add_ linear : Elapsed 0.040 ms (3.950 ms / 100) 3.820 -> 3.819 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.79% +0.73%] index_copy_ linear : Elapsed 0.038 ms (3.821 ms / 100) 3.947 -> 3.948 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.76% +0.73%] index_add_ reverse : Elapsed 0.039 ms (3.947 ms / 100) 3.822 -> 3.823 ( +0.03%) [ +0.00% +0.05% +0.03% / +0.03% +0.65% +0.71%] index_copy_ reverse : Elapsed 0.038 ms (3.822 ms / 100) 3.949 -> 3.950 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.81% +0.86%] index_add_ spread : Elapsed 0.040 ms (3.950 ms / 100) 3.825 -> 3.828 ( +0.08%) [ +0.00% +0.08% +0.05% / +0.08% +0.92% +0.86%] index_copy_ spread : Elapsed 0.038 ms (3.825 ms / 100) 3.943 -> 3.944 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.84% +0.84%] index_add_ strided 3 : Elapsed 0.039 ms (3.944 ms / 100) 3.825 -> 3.826 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.84% +0.81%] index_copy_ strided 3 : Elapsed 0.038 ms (3.826 ms / 100) 3.945 -> 3.945 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.68% +0.68%] index_add_ strided 7 : Elapsed 0.039 ms (3.946 ms / 100) 3.825 -> 3.827 ( +0.05%) [ +0.08% +0.10% +0.00% / +0.05% +0.78% +0.78%] index_copy_ strided 7 : Elapsed 0.038 ms (3.828 ms / 100) 3.952 -> 3.955 ( +0.08%) [ +0.05% +0.03% +0.00% / +0.08% +0.63% +0.63%] index_add_ perm : Elapsed 0.040 ms (3.954 ms / 100) 3.825 -> 3.828 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.81% +0.78%] index_copy_ perm : Elapsed 0.038 ms (3.828 ms / 100) 3.947 -> 3.947 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.68% +0.68%] index_add_ perm_sorted : Elapsed 0.039 ms (3.948 ms / 100) 3.821 -> 3.822 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.68% +0.68%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.824 ms / 100) 5.549 -> 5.558 ( +0.16%) [ +0.22% +0.18% +0.00% / +0.16% +0.16% +0.27%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.587 -> 5.584 ( -0.05%) [ +0.00% +0.14% +0.07% / +0.00% -0.05% -0.04%] index_select wrap : Elapsed 0.056 ms (5.587 ms / 100) 5.587 -> 5.585 ( -0.04%) [ +0.14% +0.00% +0.04% / -0.04% +0.04% +0.02%] index_select linear : Elapsed 0.056 ms (5.595 ms / 100) 5.583 -> 5.582 ( -0.02%) [ +0.20% +0.09% +0.00% / -0.02% +0.20% +0.04%] index_select reverse : Elapsed 0.056 ms (5.594 ms / 100) 5.553 -> 5.552 ( -0.02%) [ +0.07% +0.07% +0.00% / -0.02% +0.14% +0.23%] index_select skip64 : Elapsed 0.056 ms (5.557 ms / 100) 5.553 -> 5.557 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.22% +0.22%] index_select skip256 : Elapsed 0.056 ms (5.557 ms / 100) 5.588 -> 5.584 ( -0.07%) [ +0.05% +0.00% +0.04% / -0.02% -0.07% -0.02%] index_select spread : Elapsed 0.056 ms (5.591 ms / 100) 5.589 -> 5.585 ( -0.07%) [ +0.04% +0.09% +0.00% / -0.07% -0.02% -0.07%] index_select strided 3 : Elapsed 0.056 ms (5.591 ms / 100) 5.589 -> 5.578 ( -0.20%) [ +0.02% +0.07% +0.00% / +0.05% +0.02% -0.20%] index_select strided 5 : Elapsed 0.056 ms (5.590 ms / 100) 5.580 -> 5.583 ( +0.05%) [ +0.20% +0.00% +0.16% / +0.05% +0.14% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.591 ms / 100) 5.559 -> 5.561 ( +0.04%) [ +0.09% +0.00% +0.11% / +0.11% +0.04% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.577 -> 5.575 ( -0.04%) [ +0.09% +0.00% +0.11% / -0.04% +0.07% +0.14%] index_select random : Elapsed 0.056 ms (5.582 ms / 100) 5.579 -> 5.581 ( +0.04%) [ +0.18% +0.11% +0.00% / +0.04% +0.05% +0.09%] index_select random_sorted : Elapsed 0.056 ms (5.589 ms / 100) B = [20, 5, 40, 4] (stride (1, 80, 400, 20)) dim = 0 fill_cnt = 16 2.074 -> 2.074 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +1.21% +1.21%] index_fill_ const : Elapsed 0.021 ms (2.075 ms / 100) 2.076 -> 2.081 ( +0.24%) [ +0.05% +0.00% +0.14% / +0.24% +1.11% +1.01%] index_fill_ linear : Elapsed 0.021 ms (2.077 ms / 100) 2.082 -> 2.083 ( +0.05%) [ +0.19% +0.05% +0.00% / +0.05% +0.96% +1.06%] index_fill_ reverse : Elapsed 0.021 ms (2.086 ms / 100) 2.073 -> 2.074 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +1.21% +1.21%] index_fill_ skip64 : Elapsed 0.021 ms (2.073 ms / 100) 2.077 -> 2.075 ( -0.10%) [ +0.05% +0.05% +0.00% / -0.10% +0.96% +0.96%] index_fill_ skip256 : Elapsed 0.021 ms (2.078 ms / 100) 2.079 -> 2.079 ( +0.00%) [ +0.19% +0.00% +0.00% / +0.00% +0.91% +0.87%] index_fill_ spread : Elapsed 0.021 ms (2.083 ms / 100) 2.079 -> 2.080 ( +0.05%) [ +0.05% +0.14% +0.00% / +0.05% +0.82% +0.82%] index_fill_ strided 3 : Elapsed 0.021 ms (2.080 ms / 100) 2.086 -> 2.086 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.62% +0.86%] index_fill_ strided 5 : Elapsed 0.021 ms (2.086 ms / 100) 2.083 -> 2.083 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.72% +0.72%] index_fill_ strided 7 : Elapsed 0.021 ms (2.083 ms / 100) 2.082 -> 2.082 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.77% +0.67%] index_fill_ strided 8 : Elapsed 0.021 ms (2.083 ms / 100) 2.080 -> 2.079 ( -0.05%) [ +0.10% +0.00% +0.05% / -0.05% +0.82% +0.87%] index_fill_ strided 16 : Elapsed 0.021 ms (2.082 ms / 100) 2.081 -> 2.083 ( +0.10%) [ +0.00% +0.05% +0.05% / +0.10% +0.72% +0.82%] index_fill_ random : Elapsed 0.021 ms (2.081 ms / 100) 2.083 -> 2.084 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.67% +0.58%] index_fill_ random_sorted : Elapsed 0.021 ms (2.086 ms / 100) 2.090 -> 2.089 ( -0.05%) [ +0.05% +0.10% +0.00% / -0.05% +0.38% +0.33%] index_fill_ perm : Elapsed 0.021 ms (2.091 ms / 100) 2.084 -> 2.086 ( +0.10%) [ +0.10% +0.14% +0.00% / +0.10% +0.48% +0.62%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.086 ms / 100) B = [20, 5, 40, 4] (stride (5, 1, 400, 100)) dim = 0 fill_cnt = 16 1.057 -> 1.058 ( +0.09%) [ +0.38% +0.19% +0.00% / +0.09% +1.14% +1.23%] index_fill_ const : Elapsed 0.011 ms (1.061 ms / 100) 1.058 -> 1.058 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.13% +1.13%] index_fill_ linear : Elapsed 0.011 ms (1.058 ms / 100) 1.057 -> 1.057 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +1.23% +1.23%] index_fill_ reverse : Elapsed 0.011 ms (1.057 ms / 100) 1.056 -> 1.056 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +1.23% +1.42%] index_fill_ skip64 : Elapsed 0.011 ms (1.057 ms / 100) 1.059 -> 1.059 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +1.04% +0.85%] index_fill_ skip256 : Elapsed 0.011 ms (1.059 ms / 100) 1.059 -> 1.060 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.94% +0.85%] index_fill_ spread : Elapsed 0.011 ms (1.059 ms / 100) 1.059 -> 1.059 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.85% +0.85%] index_fill_ strided 3 : Elapsed 0.011 ms (1.059 ms / 100) 1.059 -> 1.059 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.85% +0.85%] index_fill_ strided 5 : Elapsed 0.011 ms (1.059 ms / 100) 1.060 -> 1.061 ( +0.09%) [ +0.09% +0.19% +0.00% / +0.09% +0.94% +0.94%] index_fill_ strided 7 : Elapsed 0.011 ms (1.061 ms / 100) 1.060 -> 1.062 ( +0.19%) [ +0.19% +0.09% +0.00% / +0.19% +0.94% +0.85%] index_fill_ strided 8 : Elapsed 0.011 ms (1.062 ms / 100) 1.059 -> 1.060 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.85% +0.94%] index_fill_ strided 16 : Elapsed 0.011 ms (1.060 ms / 100) 1.060 -> 1.061 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.94% +0.94%] index_fill_ random : Elapsed 0.011 ms (1.060 ms / 100) 1.063 -> 1.064 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.38% +0.38%] index_fill_ random_sorted : Elapsed 0.011 ms (1.064 ms / 100) 1.063 -> 1.064 ( +0.09%) [ +0.09% +0.19% +0.00% / +0.09% +0.38% +0.38%] index_fill_ perm : Elapsed 0.011 ms (1.064 ms / 100) 1.064 -> 1.064 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.28% +0.28%] index_fill_ perm_sorted : Elapsed 0.011 ms (1.064 ms / 100) out_shape = [16, 20, 40, 4] in_shape = [16, 5, 40, 4] idx_dim = 1 B = [16, 20, 40, 4] (stride (3200, 1, 80, 20)) A = [16, 5, 40, 4] (stride (20, 4, 320, 1)) dim = 1 1.839 -> 1.844 ( +0.27%) [ +0.22% +0.27% +0.00% / +0.38% +0.27% +0.38%] index_add_ linear : Elapsed 0.018 ms (1.843 ms / 100) 1.789 -> 1.799 ( +0.56%) [ +0.22% +0.00% +0.61% / +0.56% +1.17% +1.06%] index_copy_ linear : Elapsed 0.018 ms (1.793 ms / 100) 1.837 -> 1.839 ( +0.11%) [ +0.00% +0.16% +0.49% / +0.11% +0.49% +0.54%] index_add_ reverse : Elapsed 0.018 ms (1.837 ms / 100) 1.788 -> 1.799 ( +0.62%) [ +0.00% +0.11% +0.67% / +0.62% +0.95% +1.17%] index_copy_ reverse : Elapsed 0.018 ms (1.788 ms / 100) 1.859 -> 1.859 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.00% +0.22% +0.11%] index_add_ spread : Elapsed 0.019 ms (1.861 ms / 100) 1.824 -> 1.826 ( +0.11%) [ +0.00% +0.05% +0.22% / +0.55% +0.27% +0.11%] index_copy_ spread : Elapsed 0.018 ms (1.824 ms / 100) 1.863 -> 1.861 ( -0.11%) [ +0.05% +0.05% +0.00% / +0.05% +0.00% -0.11%] index_add_ strided 3 : Elapsed 0.019 ms (1.864 ms / 100) 1.821 -> 1.830 ( +0.49%) [ +0.00% +0.38% +0.44% / +0.49% +0.55% +0.60%] index_copy_ strided 3 : Elapsed 0.018 ms (1.821 ms / 100) 1.860 -> 1.862 ( +0.11%) [ +0.00% +0.11% +0.27% / +0.16% +0.11% +0.32%] index_add_ strided 7 : Elapsed 0.019 ms (1.860 ms / 100) 1.821 -> 1.827 ( +0.33%) [ +0.27% +0.00% +0.55% / +0.33% +0.66% +0.60%] index_copy_ strided 7 : Elapsed 0.018 ms (1.826 ms / 100) 1.853 -> 1.850 ( -0.16%) [ +0.00% +0.00% +0.00% / +0.05% -0.16% +0.22%] index_add_ perm : Elapsed 0.019 ms (1.853 ms / 100) 1.809 -> 1.818 ( +0.50%) [ +0.22% +0.00% +0.44% / +0.50% +0.66% +0.61%] index_copy_ perm : Elapsed 0.018 ms (1.813 ms / 100) 1.850 -> 1.851 ( +0.05%) [ +0.22% +0.00% +0.27% / +0.05% +0.32% +0.32%] index_add_ perm_sorted : Elapsed 0.019 ms (1.854 ms / 100) 1.810 -> 1.819 ( +0.50%) [ +0.06% +0.00% +0.55% / +0.50% +0.88% +0.61%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.811 ms / 100) 8.302 -> 8.303 ( +0.01%) [ +0.26% +0.00% +0.01% / +0.01% +0.33% +0.30%] index_select const : Elapsed 0.083 ms (8.324 ms / 100) 8.311 -> 8.320 ( +0.11%) [ +0.00% +0.06% +0.13% / +0.11% +0.37% +0.31%] index_select wrap : Elapsed 0.083 ms (8.311 ms / 100) 8.324 -> 8.323 ( -0.01%) [ +0.04% +0.05% +0.00% / -0.01% +0.08% +0.04%] index_select linear : Elapsed 0.083 ms (8.327 ms / 100) 8.326 -> 8.333 ( +0.08%) [ +0.00% +0.12% +0.06% / +0.08% +0.36% +0.18%] index_select reverse : Elapsed 0.083 ms (8.326 ms / 100) 8.304 -> 8.316 ( +0.14%) [ +0.00% +0.01% +0.10% / +0.17% +0.23% +0.14%] index_select skip64 : Elapsed 0.083 ms (8.304 ms / 100) 8.304 -> 8.304 ( +0.00%) [ +0.06% +0.00% +0.10% / +0.00% +0.29% +0.23%] index_select skip256 : Elapsed 0.083 ms (8.309 ms / 100) 8.332 -> 8.332 ( +0.00%) [ +0.19% +0.00% +0.00% / +0.00% +0.37% +0.37%] index_select spread : Elapsed 0.083 ms (8.348 ms / 100) 8.317 -> 8.325 ( +0.10%) [ +0.14% +0.20% +0.00% / +0.18% +0.11% +0.10%] index_select strided 3 : Elapsed 0.083 ms (8.329 ms / 100) 8.322 -> 8.315 ( -0.08%) [ +0.07% +0.00% +0.08% / -0.08% +0.18% +0.11%] index_select random : Elapsed 0.083 ms (8.328 ms / 100) 8.317 -> 8.333 ( +0.19%) [ +0.25% +0.00% +0.24% / +0.19% +0.79% +0.57%] index_select random_sorted : Elapsed 0.083 ms (8.338 ms / 100) B = [16, 20, 40, 4] (stride (3200, 1, 20, 800)) A = [16, 5, 40, 4] (stride (20, 1, 320, 5)) dim = 1 1.932 -> 1.925 ( -0.36%) [ +0.00% +0.47% +0.36% / +0.16% -0.05% -0.36%] index_add_ linear : Elapsed 0.019 ms (1.932 ms / 100) 1.898 -> 1.889 ( -0.47%) [ +0.00% +0.16% +0.05% / +0.11% -0.26% -0.47%] index_copy_ linear : Elapsed 0.019 ms (1.898 ms / 100) 1.934 -> 1.927 ( -0.36%) [ +0.00% +0.00% +0.10% / -0.21% -0.31% -0.36%] index_add_ reverse : Elapsed 0.019 ms (1.934 ms / 100) 1.894 -> 1.889 ( -0.26%) [ +0.00% +0.16% +0.32% / -0.05% -0.05% -0.26%] index_copy_ reverse : Elapsed 0.019 ms (1.894 ms / 100) 1.952 -> 1.943 ( -0.46%) [ +0.15% +0.10% +0.00% / +0.20% -0.46% -0.26%] index_add_ spread : Elapsed 0.020 ms (1.955 ms / 100) 1.915 -> 1.914 ( -0.05%) [ +0.00% +0.26% +0.31% / +0.63% +0.10% -0.05%] index_copy_ spread : Elapsed 0.019 ms (1.915 ms / 100) 1.952 -> 1.945 ( -0.36%) [ +0.10% +0.00% +0.26% / +0.31% -0.36% -0.20%] index_add_ strided 3 : Elapsed 0.020 ms (1.954 ms / 100) 1.919 -> 1.911 ( -0.42%) [ +0.00% +0.42% +0.21% / -0.10% -0.42% -0.21%] index_copy_ strided 3 : Elapsed 0.019 ms (1.919 ms / 100) 1.952 -> 1.948 ( -0.20%) [ +0.00% +0.20% +0.20% / +0.26% -0.20% +0.00%] index_add_ strided 7 : Elapsed 0.020 ms (1.952 ms / 100) 1.925 -> 1.915 ( -0.52%) [ +0.16% +0.21% +0.00% / +0.16% -0.21% -0.52%] index_copy_ strided 7 : Elapsed 0.019 ms (1.928 ms / 100) 1.954 -> 1.944 ( -0.51%) [ +0.00% +0.00% +0.00% / +0.10% -0.51% -0.15%] index_add_ perm : Elapsed 0.020 ms (1.954 ms / 100) 1.925 -> 1.917 ( -0.42%) [ +0.00% +0.26% +0.05% / +0.00% -0.26% -0.42%] index_copy_ perm : Elapsed 0.019 ms (1.925 ms / 100) 1.951 -> 1.944 ( -0.36%) [ +0.21% +0.00% +0.31% / +0.10% -0.36% -0.21%] index_add_ perm_sorted : Elapsed 0.020 ms (1.955 ms / 100) 1.917 -> 1.914 ( -0.16%) [ +0.00% +0.47% +0.26% / +0.26% -0.10% -0.16%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.917 ms / 100) 8.628 -> 8.645 ( +0.20%) [ +0.00% +0.12% +0.14% / +0.27% +0.20% +0.32%] index_select const : Elapsed 0.086 ms (8.628 ms / 100) 8.622 -> 8.641 ( +0.22%) [ +0.00% +0.16% +0.27% / +0.22% +0.68% +0.64%] index_select wrap : Elapsed 0.086 ms (8.622 ms / 100) 8.641 -> 8.641 ( +0.00%) [ +0.16% +0.00% +0.07% / +0.00% +0.58% +0.41%] index_select linear : Elapsed 0.087 ms (8.655 ms / 100) 8.636 -> 8.638 ( +0.02%) [ +0.00% +0.06% +0.13% / +0.02% +0.23% +0.43%] index_select reverse : Elapsed 0.086 ms (8.636 ms / 100) 8.630 -> 8.644 ( +0.16%) [ +0.38% +0.00% +0.00% / +0.21% +0.30% +0.16%] index_select skip64 : Elapsed 0.087 ms (8.663 ms / 100) 8.641 -> 8.650 ( +0.10%) [ +0.07% +0.00% +0.03% / +0.13% +0.31% +0.10%] index_select skip256 : Elapsed 0.086 ms (8.647 ms / 100) 8.639 -> 8.634 ( -0.06%) [ +0.00% +0.00% +0.05% / -0.06% +0.34% +0.25%] index_select spread : Elapsed 0.086 ms (8.639 ms / 100) 8.629 -> 8.636 ( +0.08%) [ +0.38% +0.06% +0.00% / +0.08% +0.23% +0.34%] index_select strided 3 : Elapsed 0.087 ms (8.662 ms / 100) 8.629 -> 8.650 ( +0.24%) [ +0.15% +0.01% +0.00% / +0.34% +0.24% +0.60%] index_select random : Elapsed 0.086 ms (8.642 ms / 100) 8.629 -> 8.641 ( +0.14%) [ +0.05% +0.00% +0.08% / +0.14% +0.39% +0.31%] index_select random_sorted : Elapsed 0.086 ms (8.633 ms / 100) B = [16, 20, 40, 4] (stride (1, 2560, 64, 16)) A = [16, 5, 40, 4] (stride (1, 2560, 16, 640)) dim = 1 1.754 -> 1.754 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +0.97% +0.74%] index_add_ linear : Elapsed 0.018 ms (1.754 ms / 100) 1.714 -> 1.713 ( -0.06%) [ +0.00% +0.06% +0.06% / -0.06% +0.35% +0.29%] index_copy_ linear : Elapsed 0.017 ms (1.714 ms / 100) 1.757 -> 1.761 ( +0.23%) [ +0.17% +0.00% +0.28% / +0.23% +0.57% +0.28%] index_add_ reverse : Elapsed 0.018 ms (1.760 ms / 100) 1.712 -> 1.716 ( +0.23%) [ +0.00% +0.29% +0.12% / +0.23% +0.35% +0.23%] index_copy_ reverse : Elapsed 0.017 ms (1.712 ms / 100) 1.744 -> 1.747 ( +0.17%) [ +0.00% +0.00% +0.06% / +0.17% +1.49% +1.43%] index_add_ spread : Elapsed 0.017 ms (1.744 ms / 100) 1.695 -> 1.698 ( +0.18%) [ +0.18% +0.00% +0.00% / +0.18% +1.65% +1.59%] index_copy_ spread : Elapsed 0.017 ms (1.698 ms / 100) 1.749 -> 1.752 ( +0.17%) [ +0.00% +0.00% +0.06% / +0.17% +1.54% +1.66%] index_add_ strided 3 : Elapsed 0.017 ms (1.749 ms / 100) 1.698 -> 1.699 ( +0.06%) [ +0.12% +0.29% +0.00% / +0.06% +1.47% +1.65%] index_copy_ strided 3 : Elapsed 0.017 ms (1.700 ms / 100) 1.749 -> 1.748 ( -0.06%) [ +0.06% +0.11% +0.00% / -0.06% +1.14% +1.09%] index_add_ strided 7 : Elapsed 0.017 ms (1.750 ms / 100) 1.700 -> 1.701 ( +0.06%) [ +0.00% +0.18% +0.12% / +0.06% +0.94% +1.12%] index_copy_ strided 7 : Elapsed 0.017 ms (1.700 ms / 100) 1.752 -> 1.755 ( +0.17%) [ +0.17% +0.06% +0.00% / +0.17% +0.80% +0.74%] index_add_ perm : Elapsed 0.018 ms (1.755 ms / 100) 1.705 -> 1.705 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.76% +0.82%] index_copy_ perm : Elapsed 0.017 ms (1.705 ms / 100) 1.755 -> 1.760 ( +0.28%) [ +0.17% +0.17% +0.00% / +0.28% +0.57% +0.57%] index_add_ perm_sorted : Elapsed 0.018 ms (1.758 ms / 100) 1.712 -> 1.714 ( +0.12%) [ +0.00% +0.06% +0.06% / +0.12% +0.29% +0.12%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.712 ms / 100) 8.174 -> 8.198 ( +0.29%) [ +0.12% +0.00% +0.28% / +0.32% +0.76% +0.29%] index_select const : Elapsed 0.082 ms (8.184 ms / 100) 8.208 -> 8.211 ( +0.04%) [ +0.00% +0.04% +0.49% / +0.24% +0.29% +0.04%] index_select wrap : Elapsed 0.082 ms (8.208 ms / 100) 8.208 -> 8.227 ( +0.23%) [ +0.00% +0.05% +0.13% / +0.26% +0.23% +0.23%] index_select linear : Elapsed 0.082 ms (8.208 ms / 100) 8.199 -> 8.191 ( -0.10%) [ +0.17% +0.12% +0.00% / -0.10% +0.26% +0.09%] index_select reverse : Elapsed 0.082 ms (8.213 ms / 100) 8.182 -> 8.186 ( +0.05%) [ +0.00% +0.00% +0.10% / +0.05% +0.33% +0.34%] index_select skip64 : Elapsed 0.082 ms (8.182 ms / 100) 8.194 -> 8.192 ( -0.02%) [ +0.05% +0.00% +0.05% / -0.02% +0.15% +0.10%] index_select skip256 : Elapsed 0.082 ms (8.198 ms / 100) 8.211 -> 8.217 ( +0.07%) [ +0.00% +0.19% +0.21% / +0.18% +0.07% +0.30%] index_select spread : Elapsed 0.082 ms (8.211 ms / 100) 8.210 -> 8.218 ( +0.10%) [ +0.10% +0.18% +0.00% / +0.11% +0.27% +0.10%] index_select strided 3 : Elapsed 0.082 ms (8.218 ms / 100) 8.212 -> 8.210 ( -0.02%) [ +0.07% +0.34% +0.00% / -0.02% +0.28% +0.40%] index_select random : Elapsed 0.082 ms (8.218 ms / 100) 8.205 -> 8.209 ( +0.05%) [ +0.20% +0.28% +0.00% / +0.05% +0.38% +0.32%] index_select random_sorted : Elapsed 0.082 ms (8.221 ms / 100) B = [16, 20, 40, 4] (stride (4, 64, 1280, 1)) A = [16, 5, 40, 4] (stride (1, 2560, 16, 640)) dim = 1 1.850 -> 1.851 ( +0.05%) [ +0.00% +0.05% +0.16% / +0.05% +0.11% +0.05%] index_add_ linear : Elapsed 0.018 ms (1.850 ms / 100) 1.805 -> 1.805 ( +0.00%) [ +0.00% +0.22% +0.11% / +0.06% +0.06% +0.00%] index_copy_ linear : Elapsed 0.018 ms (1.805 ms / 100) 1.852 -> 1.848 ( -0.22%) [ +0.00% +0.05% +0.05% / +0.11% -0.22% -0.11%] index_add_ reverse : Elapsed 0.019 ms (1.852 ms / 100) 1.806 -> 1.802 ( -0.22%) [ +0.00% +0.00% +0.06% / +0.06% +0.11% -0.22%] index_copy_ reverse : Elapsed 0.018 ms (1.806 ms / 100) 1.848 -> 1.847 ( -0.05%) [ +0.16% +0.05% +0.00% / -0.05% +0.05% +0.54%] index_add_ spread : Elapsed 0.019 ms (1.851 ms / 100) 1.803 -> 1.801 ( -0.11%) [ +0.00% +0.06% +0.00% / -0.11% +0.28% +0.17%] index_copy_ spread : Elapsed 0.018 ms (1.803 ms / 100) 1.848 -> 1.851 ( +0.16%) [ +0.00% +0.05% +0.05% / +0.16% +0.49% +0.43%] index_add_ strided 3 : Elapsed 0.018 ms (1.848 ms / 100) 1.798 -> 1.802 ( +0.22%) [ +0.28% +0.22% +0.00% / +0.22% +0.61% +0.83%] index_copy_ strided 3 : Elapsed 0.018 ms (1.803 ms / 100) 1.851 -> 1.851 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.54% +0.43%] index_add_ strided 7 : Elapsed 0.019 ms (1.851 ms / 100) 1.804 -> 1.805 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.28% +0.44%] index_copy_ strided 7 : Elapsed 0.018 ms (1.805 ms / 100) 1.854 -> 1.851 ( -0.16%) [ +0.00% +0.16% +0.00% / -0.05% -0.16% +0.00%] index_add_ perm : Elapsed 0.019 ms (1.854 ms / 100) 1.804 -> 1.803 ( -0.06%) [ +0.00% +0.17% +0.11% / +0.55% +0.11% -0.06%] index_copy_ perm : Elapsed 0.018 ms (1.804 ms / 100) 1.848 -> 1.854 ( +0.32%) [ +0.22% +0.38% +0.00% / +0.32% +0.49% +0.49%] index_add_ perm_sorted : Elapsed 0.019 ms (1.852 ms / 100) 1.803 -> 1.806 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.28% +0.22%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.806 ms / 100) 8.521 -> 8.535 ( +0.16%) [ +0.00% +0.08% +0.05% / +0.16% +0.22% +0.48%] index_select const : Elapsed 0.085 ms (8.521 ms / 100) 8.538 -> 8.538 ( +0.00%) [ +0.12% +0.45% +0.00% / +0.00% +0.46% +0.40%] index_select wrap : Elapsed 0.085 ms (8.548 ms / 100) 8.557 -> 8.550 ( -0.08%) [ +0.00% +0.23% +0.14% / -0.08% +0.06% +0.13%] index_select linear : Elapsed 0.086 ms (8.557 ms / 100) 8.533 -> 8.550 ( +0.20%) [ +0.04% +0.00% +0.12% / +0.20% +0.36% +0.28%] index_select reverse : Elapsed 0.085 ms (8.536 ms / 100) 8.503 -> 8.531 ( +0.33%) [ +0.35% +0.27% +0.00% / +0.33% +0.65% +0.53%] index_select skip64 : Elapsed 0.085 ms (8.533 ms / 100) 8.516 -> 8.542 ( +0.31%) [ +0.07% +0.12% +0.00% / +0.42% +0.45% +0.31%] index_select skip256 : Elapsed 0.085 ms (8.522 ms / 100) 8.538 -> 8.548 ( +0.12%) [ +0.12% +0.02% +0.00% / +0.12% +0.22% +0.67%] index_select spread : Elapsed 0.085 ms (8.548 ms / 100) 8.557 -> 8.549 ( -0.09%) [ +0.08% +0.08% +0.00% / -0.09% +0.41% +0.37%] index_select strided 3 : Elapsed 0.086 ms (8.564 ms / 100) 8.559 -> 8.564 ( +0.06%) [ +0.04% +0.05% +0.00% / +0.15% +0.12% +0.06%] index_select random : Elapsed 0.086 ms (8.562 ms / 100) 8.538 -> 8.543 ( +0.06%) [ +0.06% +0.15% +0.00% / +0.06% +0.32% +0.54%] index_select random_sorted : Elapsed 0.085 ms (8.543 ms / 100) out_shape = [16, 5, 20, 4] in_shape = [16, 5, 40, 4] idx_dim = 2 B = [16, 5, 20, 4] (stride (4, 1280, 64, 1)) A = [16, 5, 40, 4] (stride (200, 40, 1, 3200)) dim = 2 2.405 -> 2.411 ( +0.25%) [ +0.37% +0.00% +0.17% / +0.25% +0.46% +0.46%] index_select const : Elapsed 0.024 ms (2.414 ms / 100) 2.418 -> 2.418 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.04% +0.12%] index_select wrap : Elapsed 0.024 ms (2.418 ms / 100) 2.417 -> 2.416 ( -0.04%) [ +0.08% +0.00% +0.08% / -0.04% +0.08% +0.08%] index_select linear : Elapsed 0.024 ms (2.419 ms / 100) 2.416 -> 2.418 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.25% +0.08% +0.12%] index_select reverse : Elapsed 0.024 ms (2.416 ms / 100) 2.411 -> 2.408 ( -0.12%) [ +0.04% +0.00% +0.12% / -0.12% +0.21% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.412 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.12% +0.04% +0.00% / +0.04% +0.25% +0.17%] index_select skip256 : Elapsed 0.024 ms (2.413 ms / 100) 2.422 -> 2.420 ( -0.08%) [ +0.00% +0.21% +0.17% / -0.08% +0.12% +0.21%] index_select spread : Elapsed 0.024 ms (2.422 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.12% +0.08% +0.08%] index_select strided 3 : Elapsed 0.024 ms (2.425 ms / 100) 2.423 -> 2.423 ( +0.00%) [ +0.17% +0.08% +0.00% / +0.25% +0.00% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.427 ms / 100) 2.423 -> 2.421 ( -0.08%) [ +0.08% +0.00% +0.04% / -0.08% +0.25% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.420 -> 2.421 ( +0.04%) [ +0.08% +0.04% +0.00% / +0.04% +0.25% +0.45%] index_select strided 8 : Elapsed 0.024 ms (2.422 ms / 100) 2.424 -> 2.423 ( -0.04%) [ +0.00% +0.04% +0.08% / -0.04% +0.08% +0.04%] index_select strided 16 : Elapsed 0.024 ms (2.424 ms / 100) 2.424 -> 2.422 ( -0.08%) [ +0.17% +0.00% +0.12% / +0.00% +0.00% -0.08%] index_select random : Elapsed 0.024 ms (2.428 ms / 100) 2.425 -> 2.423 ( -0.08%) [ +0.12% +0.04% +0.00% / +0.12% +0.08% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.428 ms / 100) 2.425 -> 2.421 ( -0.16%) [ +0.16% +0.00% +0.04% / -0.16% +0.04% +0.16%] index_select perm : Elapsed 0.024 ms (2.429 ms / 100) 2.426 -> 2.421 ( -0.21%) [ +0.08% +0.04% +0.00% / +0.12% -0.21% -0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.428 ms / 100) B = [16, 5, 20, 4] (stride (20, 1280, 1, 320)) A = [16, 5, 40, 4] (stride (800, 40, 1, 200)) dim = 2 2.449 -> 2.450 ( +0.04%) [ +0.20% +0.00% +0.08% / +0.04% +0.33% +0.16%] index_select const : Elapsed 0.025 ms (2.454 ms / 100) 2.457 -> 2.454 ( -0.12%) [ +0.20% +0.00% +0.04% / +0.41% -0.12% -0.04%] index_select wrap : Elapsed 0.025 ms (2.462 ms / 100) 2.459 -> 2.452 ( -0.28%) [ +0.16% +0.00% +0.04% / +0.04% -0.12% -0.28%] index_select linear : Elapsed 0.025 ms (2.463 ms / 100) 2.453 -> 2.457 ( +0.16%) [ +0.33% +0.33% +0.00% / +0.24% +0.24% +0.16%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.12% +0.04% +0.12%] index_select skip64 : Elapsed 0.025 ms (2.453 ms / 100) 2.451 -> 2.451 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.24% +0.41%] index_select skip256 : Elapsed 0.025 ms (2.451 ms / 100) 2.461 -> 2.465 ( +0.16%) [ +0.12% +0.28% +0.00% / +0.28% +0.16% +0.28%] index_select spread : Elapsed 0.025 ms (2.464 ms / 100) 2.460 -> 2.462 ( +0.08%) [ +0.00% +0.08% +0.28% / +0.08% +0.28% +0.37%] index_select strided 3 : Elapsed 0.025 ms (2.460 ms / 100) 2.460 -> 2.459 ( -0.04%) [ +0.04% +0.12% +0.00% / -0.04% +0.37% +0.33%] index_select strided 5 : Elapsed 0.025 ms (2.461 ms / 100) 2.464 -> 2.465 ( +0.04%) [ +0.00% +0.04% +0.20% / +0.16% +0.08% +0.04%] index_select strided 7 : Elapsed 0.025 ms (2.464 ms / 100) 2.465 -> 2.461 ( -0.16%) [ +0.00% +0.12% +0.04% / -0.16% +0.12% +0.12%] index_select strided 8 : Elapsed 0.025 ms (2.465 ms / 100) 2.462 -> 2.461 ( -0.04%) [ +0.12% +0.20% +0.00% / -0.04% +0.37% +0.37%] index_select strided 16 : Elapsed 0.025 ms (2.465 ms / 100) 2.460 -> 2.461 ( +0.04%) [ +0.16% +0.00% +0.08% / +0.04% +0.53% +0.28%] index_select random : Elapsed 0.025 ms (2.464 ms / 100) 2.462 -> 2.461 ( -0.04%) [ +0.12% +0.04% +0.00% / -0.04% -0.04% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.465 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.04% +0.12% +0.00%] index_select perm : Elapsed 0.025 ms (2.465 ms / 100) 2.460 -> 2.463 ( +0.12%) [ +0.16% +0.00% +0.04% / +0.12% +0.24% +0.24%] index_select perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) B = [16, 5, 20, 4] (stride (20, 4, 320, 1)) A = [16, 5, 40, 4] (stride (1, 2560, 16, 640)) dim = 2 2.391 -> 2.395 ( +0.17%) [ +0.08% +0.21% +0.00% / +0.17% +0.50% +0.42%] index_select const : Elapsed 0.024 ms (2.393 ms / 100) 2.414 -> 2.410 ( -0.17%) [ +0.00% +0.00% +0.04% / -0.08% -0.17% +0.08%] index_select wrap : Elapsed 0.024 ms (2.414 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.00% +0.12% +0.00% / +0.00% +0.08% -0.04%] index_select linear : Elapsed 0.024 ms (2.411 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.12% +0.00% +0.12% / +0.04% +0.29% +0.12%] index_select reverse : Elapsed 0.024 ms (2.413 ms / 100) 2.393 -> 2.396 ( +0.13%) [ +0.33% +0.33% +0.00% / +0.21% +0.13% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.401 ms / 100) 2.393 -> 2.396 ( +0.13%) [ +0.21% +0.17% +0.00% / +0.33% +0.13% +0.38%] index_select skip256 : Elapsed 0.024 ms (2.398 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.00% +0.21% +0.00% / +0.29% +0.17% +0.04%] index_select spread : Elapsed 0.024 ms (2.410 ms / 100) 2.409 -> 2.414 ( +0.21%) [ +0.37% +0.29% +0.00% / +0.33% +0.33% +0.21%] index_select strided 3 : Elapsed 0.024 ms (2.418 ms / 100) 2.403 -> 2.401 ( -0.08%) [ +0.00% +0.17% +0.08% / +0.08% -0.08% +0.04%] index_select strided 5 : Elapsed 0.024 ms (2.403 ms / 100) 2.408 -> 2.412 ( +0.17%) [ +0.00% +0.12% +0.04% / +0.17% +0.29% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.397 -> 2.394 ( -0.13%) [ +0.04% +0.21% +0.00% / -0.13% +0.21% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.398 ms / 100) 2.397 -> 2.402 ( +0.21%) [ +0.42% +0.17% +0.00% / +0.33% +0.25% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.407 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.21% +0.04%] index_select random : Elapsed 0.024 ms (2.410 ms / 100) 2.413 -> 2.409 ( -0.17%) [ +0.00% +0.08% +0.00% / +0.00% -0.17% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.413 ms / 100) 2.414 -> 2.409 ( -0.21%) [ +0.00% +0.08% +0.04% / +0.12% -0.04% -0.21%] index_select perm : Elapsed 0.024 ms (2.414 ms / 100) 2.414 -> 2.407 ( -0.29%) [ +0.00% +0.08% +0.33% / -0.08% -0.25% -0.29%] index_select perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) out_shape = [16, 5, 40, 20] in_shape = [16, 5, 40, 4] idx_dim = 3 B = [16, 5, 40, 20] (stride (4000, 800, 20, 1)) A = [16, 5, 40, 4] (stride (1, 640, 16, 3200)) dim = 3 1.837 -> 1.836 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.00% +0.33%] index_add_ linear : Elapsed 0.018 ms (1.837 ms / 100) 1.784 -> 1.786 ( +0.11%) [ +0.06% +0.22% +0.00% / +0.11% +0.34% +0.22%] index_copy_ linear : Elapsed 0.018 ms (1.785 ms / 100) 1.837 -> 1.834 ( -0.16%) [ +0.00% +0.05% +0.00% / -0.16% +0.27% +0.49%] index_add_ reverse : Elapsed 0.018 ms (1.837 ms / 100) 1.784 -> 1.785 ( +0.06%) [ +0.06% +0.17% +0.00% / +0.06% +0.73% +0.45%] index_copy_ reverse : Elapsed 0.018 ms (1.785 ms / 100) 1.873 -> 1.872 ( -0.05%) [ +0.00% +0.27% +0.05% / -0.05% +0.43% +0.53%] index_add_ spread : Elapsed 0.019 ms (1.873 ms / 100) 1.881 -> 1.884 ( +0.16%) [ +0.21% +0.00% +0.37% / +0.16% +1.06% +1.12%] index_copy_ spread : Elapsed 0.019 ms (1.885 ms / 100) 1.869 -> 1.869 ( +0.00%) [ +0.21% +0.00% +0.00% / +0.00% +0.43% +0.48%] index_add_ strided 3 : Elapsed 0.019 ms (1.873 ms / 100) 1.855 -> 1.852 ( -0.16%) [ +0.11% +0.11% +0.00% / -0.16% +0.22% +0.32%] index_copy_ strided 3 : Elapsed 0.019 ms (1.857 ms / 100) 1.872 -> 1.869 ( -0.16%) [ +0.32% +0.37% +0.00% / -0.16% +0.21% +0.59%] index_add_ strided 7 : Elapsed 0.019 ms (1.878 ms / 100) 1.886 -> 1.893 ( +0.37%) [ +0.05% +0.00% +0.05% / +0.37% +0.74% +0.74%] index_copy_ strided 7 : Elapsed 0.019 ms (1.887 ms / 100) 1.871 -> 1.877 ( +0.32%) [ +0.32% +0.00% +0.11% / +0.32% +0.53% +0.32%] index_add_ perm : Elapsed 0.019 ms (1.877 ms / 100) 1.889 -> 1.891 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.11% +0.48% +0.42%] index_copy_ perm : Elapsed 0.019 ms (1.891 ms / 100) 1.874 -> 1.879 ( +0.27%) [ +0.27% +0.43% +0.00% / +0.27% +0.32% +0.59%] index_add_ perm_sorted : Elapsed 0.019 ms (1.879 ms / 100) 1.890 -> 1.894 ( +0.21%) [ +0.11% +0.05% +0.00% / +0.21% +0.37% +0.63%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.892 ms / 100) 8.010 -> 8.009 ( -0.01%) [ +0.14% +0.02% +0.00% / +0.10% -0.01% +0.06%] index_select const : Elapsed 0.080 ms (8.021 ms / 100) 8.027 -> 8.038 ( +0.14%) [ +0.17% +0.25% +0.00% / +0.15% +0.14% +0.17%] index_select wrap : Elapsed 0.080 ms (8.041 ms / 100) 8.022 -> 8.023 ( +0.01%) [ +0.00% +0.24% +0.16% / +0.01% +0.42% +0.36%] index_select linear : Elapsed 0.080 ms (8.022 ms / 100) 8.020 -> 8.027 ( +0.09%) [ +0.00% +0.04% +0.04% / +0.09% +0.31% +0.10%] index_select reverse : Elapsed 0.080 ms (8.020 ms / 100) 8.003 -> 7.998 ( -0.06%) [ +0.01% +0.00% +0.26% / -0.06% +0.12% +0.19%] index_select skip64 : Elapsed 0.080 ms (8.004 ms / 100) 8.005 -> 8.004 ( -0.01%) [ +0.00% +0.19% +0.06% / -0.01% +0.26% +0.05%] index_select skip256 : Elapsed 0.080 ms (8.005 ms / 100) 8.028 -> 8.040 ( +0.15%) [ +0.14% +0.00% +0.16% / +0.20% +0.19% +0.15%] index_select spread : Elapsed 0.080 ms (8.039 ms / 100) 8.042 -> 8.042 ( +0.00%) [ +0.22% +0.20% +0.00% / +0.12% +0.00% +0.14%] index_select strided 3 : Elapsed 0.081 ms (8.060 ms / 100) 8.026 -> 8.038 ( +0.15%) [ +0.00% +0.41% +0.27% / +0.29% +0.15% +0.29%] index_select random : Elapsed 0.080 ms (8.026 ms / 100) 8.029 -> 8.035 ( +0.07%) [ +0.15% +0.00% +0.21% / +0.07% +0.25% +0.22%] index_select random_sorted : Elapsed 0.080 ms (8.041 ms / 100) B = [16, 5, 40, 20] (stride (800, 12800, 20, 1)) A = [16, 5, 40, 4] (stride (1, 2560, 64, 16)) dim = 3 2.157 -> 2.155 ( -0.09%) [ +0.28% +0.00% +0.09% / +0.19% +0.32% -0.09%] index_add_ linear : Elapsed 0.022 ms (2.163 ms / 100) 2.101 -> 2.101 ( +0.00%) [ +0.00% +0.24% +0.14% / +0.10% +0.00% +0.10%] index_copy_ linear : Elapsed 0.021 ms (2.101 ms / 100) 2.157 -> 2.161 ( +0.19%) [ +0.09% +0.19% +0.00% / +0.19% +0.19% +0.42%] index_add_ reverse : Elapsed 0.022 ms (2.159 ms / 100) 2.099 -> 2.098 ( -0.05%) [ +0.24% +0.05% +0.00% / +0.19% -0.05% +0.33%] index_copy_ reverse : Elapsed 0.021 ms (2.104 ms / 100) 2.199 -> 2.197 ( -0.09%) [ +0.00% +0.18% +0.00% / +0.05% +0.05% -0.09%] index_add_ spread : Elapsed 0.022 ms (2.199 ms / 100) 2.201 -> 2.202 ( +0.05%) [ +0.14% +0.27% +0.00% / +0.05% +0.05% +0.05%] index_copy_ spread : Elapsed 0.022 ms (2.204 ms / 100) 2.185 -> 2.187 ( +0.09%) [ +0.14% +0.32% +0.00% / +0.09% +0.32% +0.27%] index_add_ strided 3 : Elapsed 0.022 ms (2.188 ms / 100) 2.167 -> 2.169 ( +0.09%) [ +0.18% +0.28% +0.00% / +0.09% +0.09% +0.28%] index_copy_ strided 3 : Elapsed 0.022 ms (2.171 ms / 100) 2.195 -> 2.198 ( +0.14%) [ +0.05% +0.00% +0.14% / +0.14% +0.27% +0.36%] index_add_ strided 7 : Elapsed 0.022 ms (2.196 ms / 100) 2.205 -> 2.201 ( -0.18%) [ +0.05% +0.00% +0.09% / +0.00% -0.18% -0.05%] index_copy_ strided 7 : Elapsed 0.022 ms (2.206 ms / 100) 2.199 -> 2.204 ( +0.23%) [ +0.27% +0.00% +0.32% / +0.32% +0.23% +0.32%] index_add_ perm : Elapsed 0.022 ms (2.205 ms / 100) 2.212 -> 2.206 ( -0.27%) [ +0.05% +0.00% +0.00% / -0.09% -0.27% -0.14%] index_copy_ perm : Elapsed 0.022 ms (2.213 ms / 100) 2.195 -> 2.200 ( +0.23%) [ +0.00% +0.18% +0.27% / +0.36% +0.23% +0.27%] index_add_ perm_sorted : Elapsed 0.022 ms (2.195 ms / 100) 2.203 -> 2.201 ( -0.09%) [ +0.00% +0.18% +0.14% / +0.23% -0.09% +0.14%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.203 ms / 100) 8.843 -> 8.859 ( +0.18%) [ +0.34% +0.00% +0.24% / +0.18% +0.28% +0.35%] index_select const : Elapsed 0.089 ms (8.873 ms / 100) 8.880 -> 8.883 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.25% +0.18%] index_select wrap : Elapsed 0.089 ms (8.880 ms / 100) 8.856 -> 8.881 ( +0.28%) [ +0.00% +0.34% +0.36% / +0.28% +0.53% +0.43%] index_select linear : Elapsed 0.089 ms (8.856 ms / 100) 8.860 -> 8.867 ( +0.08%) [ +0.20% +0.19% +0.00% / +0.08% +0.65% +0.68%] index_select reverse : Elapsed 0.089 ms (8.878 ms / 100) 8.835 -> 8.861 ( +0.29%) [ +0.35% +0.00% +0.02% / +0.38% +0.29% +0.34%] index_select skip64 : Elapsed 0.089 ms (8.866 ms / 100) 8.848 -> 8.854 ( +0.07%) [ +0.17% +0.00% +0.08% / +0.07% +0.19% +0.17%] index_select skip256 : Elapsed 0.089 ms (8.863 ms / 100) 8.889 -> 8.885 ( -0.04%) [ +0.21% +0.00% +0.01% / -0.04% +0.26% +0.10%] index_select spread : Elapsed 0.089 ms (8.908 ms / 100) 8.878 -> 8.879 ( +0.01%) [ +0.00% +0.03% +0.00% / +0.01% +0.08% +0.38%] index_select strided 3 : Elapsed 0.089 ms (8.878 ms / 100) 8.879 -> 8.891 ( +0.14%) [ +0.00% +0.11% +0.06% / +0.14% +0.27% +0.36%] index_select random : Elapsed 0.089 ms (8.879 ms / 100) 8.879 -> 8.890 ( +0.12%) [ +0.06% +0.20% +0.00% / +0.12% +0.46% +0.20%] index_select random_sorted : Elapsed 0.089 ms (8.884 ms / 100) B = [16, 5, 40, 20] (stride (40, 12800, 1, 640)) A = [16, 5, 40, 4] (stride (1, 64, 320, 16)) dim = 3 2.237 -> 2.237 ( +0.00%) [ +0.00% +0.22% +0.04% / +0.13% +0.27% +0.00%] index_add_ linear : Elapsed 0.022 ms (2.237 ms / 100) 2.170 -> 2.171 ( +0.05%) [ +0.14% +0.00% +0.00% / +0.05% +0.14% +0.05%] index_copy_ linear : Elapsed 0.022 ms (2.173 ms / 100) 2.239 -> 2.237 ( -0.09%) [ +0.00% +0.00% +0.04% / -0.09% +0.22% -0.04%] index_add_ reverse : Elapsed 0.022 ms (2.239 ms / 100) 2.172 -> 2.173 ( +0.05%) [ +0.05% +0.09% +0.00% / +0.05% +0.23% +0.09%] index_copy_ reverse : Elapsed 0.022 ms (2.173 ms / 100) 2.235 -> 2.238 ( +0.13%) [ +0.13% +0.09% +0.00% / +0.13% +0.36% +0.67%] index_add_ spread : Elapsed 0.022 ms (2.238 ms / 100) 2.171 -> 2.172 ( +0.05%) [ +0.18% +0.00% +0.00% / +0.09% +0.05% +0.64%] index_copy_ spread : Elapsed 0.022 ms (2.175 ms / 100) 2.235 -> 2.234 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.45% +0.85%] index_add_ strided 3 : Elapsed 0.022 ms (2.235 ms / 100) 2.167 -> 2.168 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.05% +0.46% +0.60%] index_copy_ strided 3 : Elapsed 0.022 ms (2.167 ms / 100) 2.234 -> 2.240 ( +0.27%) [ +0.27% +0.18% +0.00% / +0.27% +0.49% +0.27%] index_add_ strided 7 : Elapsed 0.022 ms (2.240 ms / 100) 2.172 -> 2.172 ( +0.00%) [ +0.09% +0.00% +0.05% / +0.00% +0.28% +0.23%] index_copy_ strided 7 : Elapsed 0.022 ms (2.174 ms / 100) 2.230 -> 2.237 ( +0.31%) [ +0.00% +0.09% +0.09% / +0.31% +0.67% +0.63%] index_add_ perm : Elapsed 0.022 ms (2.230 ms / 100) 2.162 -> 2.167 ( +0.23%) [ +0.00% +0.23% +0.09% / +0.23% +0.88% +0.51%] index_copy_ perm : Elapsed 0.022 ms (2.162 ms / 100) 2.234 -> 2.234 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.54% +0.54%] index_add_ perm_sorted : Elapsed 0.022 ms (2.236 ms / 100) 2.162 -> 2.174 ( +0.56%) [ +0.23% +0.09% +0.00% / +0.56% +0.79% +1.11%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.167 ms / 100) 9.186 -> 9.163 ( -0.25%) [ +0.00% +0.00% +0.02% / -0.11% -0.25% -0.11%] index_select const : Elapsed 0.092 ms (9.186 ms / 100) 9.213 -> 9.195 ( -0.20%) [ +0.00% +0.07% +0.02% / +0.17% +0.18% -0.20%] index_select wrap : Elapsed 0.092 ms (9.213 ms / 100) 9.201 -> 9.197 ( -0.04%) [ +0.00% +0.03% +0.12% / -0.04% +0.00% -0.04%] index_select linear : Elapsed 0.092 ms (9.201 ms / 100) 9.207 -> 9.198 ( -0.10%) [ +0.09% +0.00% +0.08% / +0.08% +0.14% -0.10%] index_select reverse : Elapsed 0.092 ms (9.215 ms / 100) 9.176 -> 9.177 ( +0.01%) [ +0.15% +0.10% +0.00% / +0.12% +0.01% +0.04%] index_select skip64 : Elapsed 0.092 ms (9.190 ms / 100) 9.175 -> 9.170 ( -0.05%) [ +0.00% +0.10% +0.13% / -0.03% -0.05% +0.03%] index_select skip256 : Elapsed 0.092 ms (9.175 ms / 100) 9.222 -> 9.217 ( -0.05%) [ +0.00% +0.07% +0.13% / +0.10% -0.03% -0.05%] index_select spread : Elapsed 0.092 ms (9.222 ms / 100) 9.221 -> 9.194 ( -0.29%) [ +0.07% +0.09% +0.00% / +0.08% -0.29% -0.20%] index_select strided 3 : Elapsed 0.092 ms (9.227 ms / 100) 9.207 -> 9.197 ( -0.11%) [ +0.09% +0.13% +0.00% / -0.02% -0.11% -0.11%] index_select random : Elapsed 0.092 ms (9.215 ms / 100) 9.221 -> 9.210 ( -0.12%) [ +0.00% +0.02% +0.09% / -0.10% +0.01% -0.12%] index_select random_sorted : Elapsed 0.092 ms (9.221 ms / 100) B = [16, 5, 40, 20] (stride (100, 20, 1600, 1)) A = [16, 5, 40, 4] (stride (5, 1, 80, 3200)) dim = 3 0.890 -> 0.891 ( +0.11%) [ +0.00% +0.67% +0.67% / +0.11% +3.37% +3.37%] index_add_ linear : Elapsed 0.009 ms (0.890 ms / 100) 0.894 -> 0.896 ( +0.22%) [ +0.45% +0.67% +0.00% / +0.22% +1.79% +1.90%] index_copy_ linear : Elapsed 0.009 ms (0.898 ms / 100) 0.892 -> 0.899 ( +0.78%) [ +1.01% +0.45% +0.00% / +0.78% +2.69% +2.58%] index_add_ reverse : Elapsed 0.009 ms (0.901 ms / 100) 0.898 -> 0.903 ( +0.56%) [ +0.33% +0.00% +0.22% / +0.56% +1.22% +1.67%] index_copy_ reverse : Elapsed 0.009 ms (0.901 ms / 100) 0.933 -> 0.931 ( -0.21%) [ +0.21% +0.21% +0.00% / -0.21% +2.36% +2.36%] index_add_ spread : Elapsed 0.009 ms (0.935 ms / 100) 0.944 -> 0.947 ( +0.32%) [ +0.00% +0.32% +0.32% / +0.32% +0.74% +0.53%] index_copy_ spread : Elapsed 0.009 ms (0.944 ms / 100) 0.924 -> 0.922 ( -0.22%) [ +0.00% +0.22% +0.43% / -0.22% +2.38% +2.16%] index_add_ strided 3 : Elapsed 0.009 ms (0.924 ms / 100) 0.931 -> 0.932 ( +0.11%) [ +0.00% +0.11% +0.32% / +0.11% +1.29% +1.72%] index_copy_ strided 3 : Elapsed 0.009 ms (0.931 ms / 100) 0.932 -> 0.934 ( +0.21%) [ +0.32% +0.00% +0.11% / +0.21% +2.04% +2.15%] index_add_ strided 7 : Elapsed 0.009 ms (0.935 ms / 100) 0.942 -> 0.945 ( +0.32%) [ +0.21% +0.00% +0.21% / +0.32% +1.49% +1.17%] index_copy_ strided 7 : Elapsed 0.009 ms (0.944 ms / 100) 0.930 -> 0.930 ( +0.00%) [ +0.32% +0.00% +0.11% / +0.00% +2.90% +2.47%] index_add_ perm : Elapsed 0.009 ms (0.933 ms / 100) 0.937 -> 0.940 ( +0.32%) [ +0.32% +0.21% +0.00% / +0.32% +2.45% +2.13%] index_copy_ perm : Elapsed 0.009 ms (0.940 ms / 100) 0.930 -> 0.933 ( +0.32%) [ +0.43% +0.22% +0.00% / +0.32% +3.01% +2.58%] index_add_ perm_sorted : Elapsed 0.009 ms (0.934 ms / 100) 0.937 -> 0.936 ( -0.11%) [ +0.00% +0.21% +0.53% / -0.11% +2.13% +2.24%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.937 ms / 100) 5.166 -> 4.987 ( -3.46%) [ +0.19% +0.29% +0.00% / -3.04% -3.41% -3.46%] index_select const : Elapsed 0.052 ms (5.176 ms / 100) 5.197 -> 5.068 ( -2.48%) [ +0.04% +0.00% +0.12% / -2.48% -2.35% -2.39%] index_select wrap : Elapsed 0.052 ms (5.199 ms / 100) 5.207 -> 5.066 ( -2.71%) [ +0.15% +0.00% +0.15% / -2.71% -2.63% -2.48%] index_select linear : Elapsed 0.052 ms (5.215 ms / 100) 5.161 -> 5.058 ( -2.00%) [ +0.00% +0.23% +0.10% / -2.00% -1.67% -1.84%] index_select reverse : Elapsed 0.052 ms (5.161 ms / 100) 5.166 -> 4.993 ( -3.35%) [ +0.00% +0.29% +0.21% / -3.21% -3.35% -3.31%] index_select skip64 : Elapsed 0.052 ms (5.166 ms / 100) 5.171 -> 4.994 ( -3.42%) [ +0.04% +0.02% +0.00% / -3.11% -3.42% -3.38%] index_select skip256 : Elapsed 0.052 ms (5.173 ms / 100) 5.176 -> 5.060 ( -2.24%) [ +0.35% +0.06% +0.00% / -2.24% -2.01% -1.93%] index_select spread : Elapsed 0.052 ms (5.194 ms / 100) 5.185 -> 5.072 ( -2.18%) [ +0.04% +0.00% +0.00% / -2.18% -2.04% -1.95%] index_select strided 3 : Elapsed 0.052 ms (5.187 ms / 100) 5.190 -> 5.059 ( -2.52%) [ +0.25% +0.00% +0.25% / -2.52% -2.22% -2.27%] index_select random : Elapsed 0.052 ms (5.203 ms / 100) 5.172 -> 5.059 ( -2.18%) [ +0.08% +0.08% +0.00% / -2.18% -1.88% -1.88%] index_select random_sorted : Elapsed 0.052 ms (5.176 ms / 100) B = [16, 5, 40, 20] (stride (20, 320, 1600, 1)) A = [16, 5, 40, 4] (stride (800, 4, 20, 1)) dim = 3 2.199 -> 2.196 ( -0.14%) [ +0.00% +0.14% +0.00% / -0.14% +0.73% +0.86%] index_add_ linear : Elapsed 0.022 ms (2.199 ms / 100) 2.154 -> 2.158 ( +0.19%) [ +0.28% +0.19% +0.00% / +0.19% +0.84% +0.93%] index_copy_ linear : Elapsed 0.022 ms (2.160 ms / 100) 2.184 -> 2.183 ( -0.05%) [ +0.14% +0.14% +0.00% / -0.05% +1.01% +1.10%] index_add_ reverse : Elapsed 0.022 ms (2.187 ms / 100) 2.148 -> 2.151 ( +0.14%) [ +0.00% +0.09% +0.14% / +0.14% +0.98% +0.84%] index_copy_ reverse : Elapsed 0.021 ms (2.148 ms / 100) 2.218 -> 2.223 ( +0.23%) [ +0.36% +0.27% +0.00% / +0.23% +1.26% +1.35%] index_add_ spread : Elapsed 0.022 ms (2.226 ms / 100) 2.243 -> 2.248 ( +0.22%) [ +0.00% +0.22% +0.13% / +0.22% +1.25% +1.29%] index_copy_ spread : Elapsed 0.022 ms (2.243 ms / 100) 2.226 -> 2.227 ( +0.04%) [ +0.09% +0.40% +0.00% / +0.04% +1.17% +1.08%] index_add_ strided 3 : Elapsed 0.022 ms (2.228 ms / 100) 2.228 -> 2.228 ( +0.00%) [ +0.09% +0.18% +0.00% / +0.00% +1.03% +0.72%] index_copy_ strided 3 : Elapsed 0.022 ms (2.230 ms / 100) 2.233 -> 2.235 ( +0.09%) [ +0.13% +0.09% +0.00% / +0.09% +1.03% +0.90%] index_add_ strided 7 : Elapsed 0.022 ms (2.236 ms / 100) 2.252 -> 2.255 ( +0.13%) [ +0.09% +0.09% +0.00% / +0.13% +1.15% +1.11%] index_copy_ strided 7 : Elapsed 0.023 ms (2.254 ms / 100) 2.220 -> 2.225 ( +0.23%) [ +0.00% +0.05% +0.00% / +0.23% +0.23% +0.50%] index_add_ perm : Elapsed 0.022 ms (2.220 ms / 100) 2.195 -> 2.197 ( +0.09%) [ +0.00% +0.27% +0.14% / +0.09% +0.55% +0.59%] index_copy_ perm : Elapsed 0.022 ms (2.195 ms / 100) 2.209 -> 2.210 ( +0.05%) [ +0.27% +0.00% +0.14% / +0.05% +0.72% +0.91%] index_add_ perm_sorted : Elapsed 0.022 ms (2.215 ms / 100) 2.186 -> 2.191 ( +0.23%) [ +0.09% +0.14% +0.00% / +0.23% +0.69% +1.01%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.188 ms / 100) 9.286 -> 9.290 ( +0.04%) [ +0.00% +0.05% +0.12% / +0.04% +0.15% +0.17%] index_select const : Elapsed 0.093 ms (9.286 ms / 100) 9.289 -> 9.294 ( +0.05%) [ +0.00% +0.17% +0.08% / +0.05% +0.15% +0.39%] index_select wrap : Elapsed 0.093 ms (9.289 ms / 100) 9.293 -> 9.290 ( -0.03%) [ +0.08% +0.00% +0.11% / -0.03% +0.20% +0.17%] index_select linear : Elapsed 0.093 ms (9.300 ms / 100) 9.288 -> 9.290 ( +0.02%) [ +0.15% +0.04% +0.00% / +0.03% +0.02% +0.22%] index_select reverse : Elapsed 0.093 ms (9.302 ms / 100) 9.287 -> 9.288 ( +0.01%) [ +0.00% +0.01% +0.13% / +0.08% +0.23% +0.01%] index_select skip64 : Elapsed 0.093 ms (9.287 ms / 100) 9.285 -> 9.288 ( +0.03%) [ +0.17% +0.25% +0.00% / +0.19% +0.18% +0.03%] index_select skip256 : Elapsed 0.093 ms (9.301 ms / 100) 9.286 -> 9.293 ( +0.08%) [ +0.00% +0.38% +0.17% / +0.26% +0.16% +0.08%] index_select spread : Elapsed 0.093 ms (9.286 ms / 100) 9.291 -> 9.296 ( +0.05%) [ +0.15% +0.00% +0.10% / +0.15% +0.17% +0.05%] index_select strided 3 : Elapsed 0.093 ms (9.305 ms / 100) 9.278 -> 9.286 ( +0.09%) [ +0.24% +0.25% +0.00% / +0.40% +0.16% +0.09%] index_select random : Elapsed 0.093 ms (9.300 ms / 100) 9.281 -> 9.299 ( +0.19%) [ +0.06% +0.00% +0.03% / +0.23% +0.19% +0.25%] index_select random_sorted : Elapsed 0.093 ms (9.287 ms / 100) B = [16, 5, 40, 20] (stride (1, 16, 1600, 80)) A = [16, 5, 40, 4] (stride (1, 2560, 16, 640)) dim = 3 2.131 -> 2.133 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.33% +0.19%] index_add_ linear : Elapsed 0.021 ms (2.133 ms / 100) 2.085 -> 2.085 ( +0.00%) [ +0.19% +0.24% +0.00% / +0.00% +0.29% +0.19%] index_copy_ linear : Elapsed 0.021 ms (2.089 ms / 100) 2.131 -> 2.131 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.00% +0.38% +0.14%] index_add_ reverse : Elapsed 0.021 ms (2.133 ms / 100) 2.084 -> 2.083 ( -0.05%) [ +0.14% +0.00% +0.00% / -0.05% +0.34% +0.34%] index_copy_ reverse : Elapsed 0.021 ms (2.087 ms / 100) 2.113 -> 2.118 ( +0.24%) [ +0.09% +0.24% +0.00% / +0.24% +0.52% +0.66%] index_add_ spread : Elapsed 0.021 ms (2.115 ms / 100) 2.064 -> 2.070 ( +0.29%) [ +0.19% +0.15% +0.00% / +0.29% +0.68% +0.58%] index_copy_ spread : Elapsed 0.021 ms (2.068 ms / 100) 2.136 -> 2.142 ( +0.28%) [ +0.19% +0.00% +0.05% / +0.28% +0.70% +0.37%] index_add_ strided 3 : Elapsed 0.021 ms (2.140 ms / 100) 2.084 -> 2.087 ( +0.14%) [ +0.05% +0.05% +0.00% / +0.14% +0.86% +0.38%] index_copy_ strided 3 : Elapsed 0.021 ms (2.085 ms / 100) 2.127 -> 2.129 ( +0.09%) [ +0.19% +0.24% +0.00% / +0.09% +0.56% +0.47%] index_add_ strided 7 : Elapsed 0.021 ms (2.131 ms / 100) 2.075 -> 2.079 ( +0.19%) [ +0.10% +0.10% +0.00% / +0.19% +0.63% +0.48%] index_copy_ strided 7 : Elapsed 0.021 ms (2.077 ms / 100) 2.115 -> 2.120 ( +0.24%) [ +0.19% +0.00% +0.05% / +0.24% +0.33% +0.47%] index_add_ perm : Elapsed 0.021 ms (2.119 ms / 100) 2.066 -> 2.067 ( +0.05%) [ +0.10% +0.24% +0.00% / +0.05% +0.58% +0.48%] index_copy_ perm : Elapsed 0.021 ms (2.068 ms / 100) 2.129 -> 2.130 ( +0.05%) [ +0.19% +0.19% +0.00% / +0.05% +0.47% +0.23%] index_add_ perm_sorted : Elapsed 0.021 ms (2.133 ms / 100) 2.080 -> 2.085 ( +0.24%) [ +0.14% +0.10% +0.00% / +0.24% +0.24% +0.43%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.083 ms / 100) 9.174 -> 9.172 ( -0.02%) [ +0.36% +0.11% +0.00% / -0.02% +0.40% +0.26%] index_select const : Elapsed 0.092 ms (9.207 ms / 100) 9.224 -> 9.213 ( -0.12%) [ +0.00% +0.03% +0.24% / -0.12% +0.16% +0.23%] index_select wrap : Elapsed 0.092 ms (9.224 ms / 100) 9.207 -> 9.209 ( +0.02%) [ +0.07% +0.00% +0.23% / +0.02% +0.27% +0.21%] index_select linear : Elapsed 0.092 ms (9.213 ms / 100) 9.202 -> 9.208 ( +0.07%) [ +0.02% +0.11% +0.00% / +0.07% +0.30% +0.10%] index_select reverse : Elapsed 0.092 ms (9.204 ms / 100) 9.166 -> 9.172 ( +0.07%) [ +0.10% +0.14% +0.00% / +0.07% +0.16% +0.25%] index_select skip64 : Elapsed 0.092 ms (9.175 ms / 100) 9.168 -> 9.156 ( -0.13%) [ +0.00% +0.05% +0.04% / -0.13% +0.21% +0.17%] index_select skip256 : Elapsed 0.092 ms (9.168 ms / 100) 9.197 -> 9.197 ( +0.00%) [ +0.00% +0.16% +0.13% / +0.00% +0.65% +0.53%] index_select spread : Elapsed 0.092 ms (9.197 ms / 100) 9.223 -> 9.228 ( +0.05%) [ +0.09% +0.00% +0.13% / +0.24% +0.05% +0.10%] index_select strided 3 : Elapsed 0.092 ms (9.231 ms / 100) 9.203 -> 9.206 ( +0.03%) [ +0.00% +0.02% +0.12% / +0.03% +0.32% +0.41%] index_select random : Elapsed 0.092 ms (9.203 ms / 100) 9.209 -> 9.202 ( -0.08%) [ +0.00% +0.21% +0.11% / -0.08% +0.53% +0.38%] index_select random_sorted : Elapsed 0.092 ms (9.209 ms / 100) B = [16, 5, 40, 20] (stride (1, 16, 1600, 80)) A = [16, 5, 40, 4] (stride (1, 16, 320, 80)) dim = 3 2.211 -> 2.213 ( +0.09%) [ +0.23% +0.23% +0.00% / +0.09% +0.36% +0.54%] index_add_ linear : Elapsed 0.022 ms (2.216 ms / 100) 2.159 -> 2.160 ( +0.05%) [ +0.09% +0.19% +0.00% / +0.05% +0.37% +0.37%] index_copy_ linear : Elapsed 0.022 ms (2.161 ms / 100) 2.213 -> 2.214 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.36% +0.54%] index_add_ reverse : Elapsed 0.022 ms (2.215 ms / 100) 2.157 -> 2.157 ( +0.00%) [ +0.19% +0.00% +0.32% / +0.00% +0.60% +0.46%] index_copy_ reverse : Elapsed 0.022 ms (2.161 ms / 100) 2.214 -> 2.216 ( +0.09%) [ +0.14% +0.27% +0.00% / +0.09% +0.63% +0.36%] index_add_ spread : Elapsed 0.022 ms (2.217 ms / 100) 2.159 -> 2.162 ( +0.14%) [ +0.00% +0.09% +0.19% / +0.14% +0.56% +0.42%] index_copy_ spread : Elapsed 0.022 ms (2.159 ms / 100) 2.212 -> 2.215 ( +0.14%) [ +0.09% +0.14% +0.00% / +0.14% +0.41% +0.41%] index_add_ strided 3 : Elapsed 0.022 ms (2.214 ms / 100) 2.160 -> 2.161 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.28% +0.28%] index_copy_ strided 3 : Elapsed 0.022 ms (2.160 ms / 100) 2.216 -> 2.218 ( +0.09%) [ +0.18% +0.00% +0.05% / +0.09% +0.41% +0.32%] index_add_ strided 7 : Elapsed 0.022 ms (2.220 ms / 100) 2.158 -> 2.160 ( +0.09%) [ +0.00% +0.23% +0.23% / +0.09% +0.46% +0.42%] index_copy_ strided 7 : Elapsed 0.022 ms (2.158 ms / 100) 2.209 -> 2.213 ( +0.18%) [ +0.27% +0.00% +0.14% / +0.18% +0.45% +0.45%] index_add_ perm : Elapsed 0.022 ms (2.215 ms / 100) 2.157 -> 2.159 ( +0.09%) [ +0.09% +0.05% +0.00% / +0.09% +0.19% +0.42%] index_copy_ perm : Elapsed 0.022 ms (2.159 ms / 100) 2.215 -> 2.213 ( -0.09%) [ +0.00% +0.09% +0.00% / -0.09% +0.23% +0.18%] index_add_ perm_sorted : Elapsed 0.022 ms (2.215 ms / 100) 2.159 -> 2.162 ( +0.14%) [ +0.09% +0.09% +0.00% / +0.14% +0.23% +0.37%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.161 ms / 100) 9.187 -> 9.188 ( +0.01%) [ +0.07% +0.00% +0.22% / +0.01% +0.34% +0.29%] index_select const : Elapsed 0.092 ms (9.193 ms / 100) 9.233 -> 9.251 ( +0.19%) [ +0.08% +0.00% +0.14% / +0.47% +0.19% +0.24%] index_select wrap : Elapsed 0.092 ms (9.240 ms / 100) 9.214 -> 9.219 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.33% +0.48%] index_select linear : Elapsed 0.092 ms (9.217 ms / 100) 9.238 -> 9.244 ( +0.06%) [ +0.01% +0.10% +0.00% / +0.06% +0.11% +0.29%] index_select reverse : Elapsed 0.092 ms (9.239 ms / 100) 9.187 -> 9.191 ( +0.04%) [ +0.03% +0.13% +0.00% / +0.04% +0.36% +0.09%] index_select skip64 : Elapsed 0.092 ms (9.190 ms / 100) 9.188 -> 9.197 ( +0.10%) [ +0.00% +0.10% +0.09% / +0.10% +0.50% +0.50%] index_select skip256 : Elapsed 0.092 ms (9.188 ms / 100) 9.234 -> 9.260 ( +0.28%) [ +0.00% +0.28% +0.19% / +0.37% +0.42% +0.28%] index_select spread : Elapsed 0.092 ms (9.234 ms / 100) 9.230 -> 9.235 ( +0.05%) [ +0.41% +0.00% +0.08% / +0.05% +0.40% +0.15%] index_select strided 3 : Elapsed 0.093 ms (9.268 ms / 100) 9.236 -> 9.252 ( +0.17%) [ +0.24% +0.25% +0.00% / +0.17% +0.28% +0.36%] index_select random : Elapsed 0.093 ms (9.258 ms / 100) 9.253 -> 9.247 ( -0.06%) [ +0.24% +0.02% +0.00% / -0.06% +0.12% +0.04%] index_select random_sorted : Elapsed 0.093 ms (9.275 ms / 100) out_shape = [20, 40, 4, 5] in_shape = [16, 40, 4, 5] idx_dim = 0 B = [20, 40, 4, 5] (stride (800, 1, 200, 40)) A = [16, 40, 4, 5] (stride (1, 320, 80, 16)) dim = 0 3.253 -> 3.255 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.43% +0.40%] index_add_ linear : Elapsed 0.033 ms (3.255 ms / 100) 3.109 -> 3.110 ( +0.03%) [ +0.16% +0.00% +0.06% / +0.03% +0.93% +0.96%] index_copy_ linear : Elapsed 0.031 ms (3.114 ms / 100) 3.248 -> 3.250 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.74% +0.77%] index_add_ reverse : Elapsed 0.032 ms (3.249 ms / 100) 3.112 -> 3.115 ( +0.10%) [ +0.00% +0.16% +0.26% / +0.10% +0.80% +0.87%] index_copy_ reverse : Elapsed 0.031 ms (3.112 ms / 100) 3.277 -> 3.278 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.34% +0.37%] index_add_ spread : Elapsed 0.033 ms (3.278 ms / 100) 3.131 -> 3.130 ( -0.03%) [ +0.19% +0.00% +0.06% / -0.03% +0.35% +0.19%] index_copy_ spread : Elapsed 0.031 ms (3.137 ms / 100) 3.276 -> 3.277 ( +0.03%) [ +0.12% +0.00% +0.00% / +0.03% +0.31% +0.37%] index_add_ strided 3 : Elapsed 0.033 ms (3.280 ms / 100) 3.116 -> 3.139 ( +0.74%) [ +0.71% +0.32% +0.00% / +0.77% +0.74% +0.77%] index_copy_ strided 3 : Elapsed 0.031 ms (3.138 ms / 100) 3.278 -> 3.279 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.24% +0.27%] index_add_ strided 7 : Elapsed 0.033 ms (3.279 ms / 100) 3.130 -> 3.123 ( -0.22%) [ +0.32% +0.10% +0.00% / -0.22% +0.32% +0.26%] index_copy_ strided 7 : Elapsed 0.031 ms (3.140 ms / 100) 3.280 -> 3.279 ( -0.03%) [ +0.00% +0.06% +0.00% / -0.03% +0.24% +0.21%] index_add_ perm : Elapsed 0.033 ms (3.280 ms / 100) 3.135 -> 3.135 ( +0.00%) [ +0.10% +0.26% +0.00% / +0.00% +0.22% +0.16%] index_copy_ perm : Elapsed 0.031 ms (3.138 ms / 100) 3.251 -> 3.253 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.68% +0.65%] index_add_ perm_sorted : Elapsed 0.033 ms (3.253 ms / 100) 3.114 -> 3.116 ( +0.06%) [ +0.26% +0.00% +0.35% / +0.06% +0.64% +0.90%] index_copy_ perm_sorted : Elapsed 0.031 ms (3.122 ms / 100) 5.307 -> 5.300 ( -0.13%) [ +0.09% +0.06% +0.00% / +0.00% -0.13% +0.04%] index_select const : Elapsed 0.053 ms (5.312 ms / 100) 5.308 -> 5.308 ( +0.00%) [ +0.19% +0.11% +0.00% / +0.09% +0.00% +0.06%] index_select wrap : Elapsed 0.053 ms (5.318 ms / 100) 5.316 -> 5.312 ( -0.08%) [ +0.00% +0.00% +0.02% / +0.00% -0.08% +0.00%] index_select linear : Elapsed 0.053 ms (5.316 ms / 100) 5.318 -> 5.311 ( -0.13%) [ +0.04% +0.00% +0.04% / -0.04% -0.13% -0.08%] index_select reverse : Elapsed 0.053 ms (5.320 ms / 100) 5.306 -> 5.308 ( +0.04%) [ +0.11% +0.00% +0.04% / +0.04% +0.09% +0.06%] index_select skip64 : Elapsed 0.053 ms (5.312 ms / 100) 5.301 -> 5.302 ( +0.02%) [ +0.19% +0.00% +0.15% / +0.19% +0.30% +0.02%] index_select skip256 : Elapsed 0.053 ms (5.311 ms / 100) 5.311 -> 5.309 ( -0.04%) [ +0.06% +0.00% +0.09% / +0.09% +0.02% -0.04%] index_select spread : Elapsed 0.053 ms (5.314 ms / 100) 5.315 -> 5.309 ( -0.11%) [ +0.06% +0.15% +0.00% / +0.06% -0.11% +0.09%] index_select strided 3 : Elapsed 0.053 ms (5.318 ms / 100) 5.316 -> 5.309 ( -0.13%) [ +0.00% +0.08% +0.06% / +0.17% +0.08% -0.13%] index_select strided 5 : Elapsed 0.053 ms (5.316 ms / 100) 5.313 -> 5.313 ( +0.00%) [ +0.06% +0.00% +0.02% / +0.11% +0.00% +0.11%] index_select strided 7 : Elapsed 0.053 ms (5.316 ms / 100) 5.308 -> 5.307 ( -0.02%) [ +0.00% +0.19% +0.17% / +0.34% +0.06% -0.02%] index_select strided 8 : Elapsed 0.053 ms (5.308 ms / 100) 5.314 -> 5.307 ( -0.13%) [ +0.06% +0.00% +0.02% / +0.00% -0.09% -0.13%] index_select random : Elapsed 0.053 ms (5.317 ms / 100) 5.313 -> 5.309 ( -0.08%) [ +0.04% +0.00% +0.13% / -0.06% -0.08% -0.08%] index_select random_sorted : Elapsed 0.053 ms (5.315 ms / 100) B = [20, 40, 4, 5] (stride (800, 4, 1, 160)) A = [16, 40, 4, 5] (stride (800, 5, 200, 1)) dim = 0 3.687 -> 3.687 ( +0.00%) [ +0.05% +0.00% +0.08% / +0.00% +0.76% +0.81%] index_add_ linear : Elapsed 0.037 ms (3.689 ms / 100) 3.549 -> 3.551 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.76% +0.79%] index_copy_ linear : Elapsed 0.036 ms (3.551 ms / 100) 3.696 -> 3.701 ( +0.14%) [ +0.11% +0.05% +0.00% / +0.14% +0.92% +0.70%] index_add_ reverse : Elapsed 0.037 ms (3.700 ms / 100) 3.557 -> 3.559 ( +0.06%) [ +0.08% +0.03% +0.00% / +0.06% +0.84% +0.70%] index_copy_ reverse : Elapsed 0.036 ms (3.560 ms / 100) 3.692 -> 3.694 ( +0.05%) [ +0.00% +0.08% +0.03% / +0.05% +0.73% +0.81%] index_add_ spread : Elapsed 0.037 ms (3.692 ms / 100) 3.562 -> 3.565 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.73% +0.84%] index_copy_ spread : Elapsed 0.036 ms (3.562 ms / 100) 3.697 -> 3.702 ( +0.14%) [ +0.19% +0.14% +0.00% / +0.14% +0.95% +0.92%] index_add_ strided 3 : Elapsed 0.037 ms (3.704 ms / 100) 3.563 -> 3.567 ( +0.11%) [ +0.11% +0.06% +0.00% / +0.11% +0.87% +0.81%] index_copy_ strided 3 : Elapsed 0.036 ms (3.567 ms / 100) 3.695 -> 3.693 ( -0.05%) [ +0.16% +0.11% +0.00% / -0.05% +0.97% +0.76%] index_add_ strided 7 : Elapsed 0.037 ms (3.701 ms / 100) 3.557 -> 3.556 ( -0.03%) [ +0.03% +0.06% +0.00% / -0.03% +0.82% +0.73%] index_copy_ strided 7 : Elapsed 0.036 ms (3.558 ms / 100) 3.686 -> 3.691 ( +0.14%) [ +0.11% +0.00% +0.08% / +0.14% +0.87% +0.76%] index_add_ perm : Elapsed 0.037 ms (3.690 ms / 100) 3.549 -> 3.551 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.82% +0.73%] index_copy_ perm : Elapsed 0.035 ms (3.550 ms / 100) 3.702 -> 3.701 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.68% +0.78%] index_add_ perm_sorted : Elapsed 0.037 ms (3.702 ms / 100) 3.562 -> 3.567 ( +0.14%) [ +0.11% +0.14% +0.00% / +0.14% +0.87% +0.93%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.566 ms / 100) 5.468 -> 5.467 ( -0.02%) [ +0.11% +0.02% +0.00% / +0.00% -0.02% +0.07%] index_select const : Elapsed 0.055 ms (5.474 ms / 100) 5.483 -> 5.489 ( +0.11%) [ +0.13% +0.09% +0.00% / +0.11% +0.20% +0.22%] index_select wrap : Elapsed 0.055 ms (5.490 ms / 100) 5.485 -> 5.489 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.09% +0.07% +0.09%] index_select linear : Elapsed 0.055 ms (5.488 ms / 100) 5.488 -> 5.488 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.09% +0.04%] index_select reverse : Elapsed 0.055 ms (5.489 ms / 100) 5.468 -> 5.465 ( -0.05%) [ +0.13% +0.00% +0.02% / -0.05% +0.20% -0.04%] index_select skip64 : Elapsed 0.055 ms (5.475 ms / 100) 5.463 -> 5.467 ( +0.07%) [ +0.11% +0.15% +0.00% / +0.07% +0.27% +0.18%] index_select skip256 : Elapsed 0.055 ms (5.469 ms / 100) 5.482 -> 5.492 ( +0.18%) [ +0.09% +0.16% +0.00% / +0.26% +0.18% +0.24%] index_select spread : Elapsed 0.055 ms (5.487 ms / 100) 5.486 -> 5.491 ( +0.09%) [ +0.02% +0.07% +0.00% / +0.15% +0.15% +0.09%] index_select strided 3 : Elapsed 0.055 ms (5.487 ms / 100) 5.483 -> 5.484 ( +0.02%) [ +0.00% +0.13% +0.05% / +0.02% +0.20% +0.26%] index_select strided 5 : Elapsed 0.055 ms (5.483 ms / 100) 5.484 -> 5.478 ( -0.11%) [ +0.04% +0.11% +0.00% / -0.11% +0.22% +0.24%] index_select strided 7 : Elapsed 0.055 ms (5.486 ms / 100) 5.470 -> 5.472 ( +0.04%) [ +0.02% +0.00% +0.15% / +0.09% +0.09% +0.04%] index_select strided 8 : Elapsed 0.055 ms (5.471 ms / 100) 5.479 -> 5.483 ( +0.07%) [ +0.00% +0.07% +0.09% / +0.07% +0.13% +0.09%] index_select random : Elapsed 0.055 ms (5.479 ms / 100) 5.484 -> 5.486 ( +0.04%) [ +0.00% +0.11% +0.04% / +0.07% +0.04% +0.09%] index_select random_sorted : Elapsed 0.055 ms (5.484 ms / 100) B = [20, 40, 4, 5] (stride (1, 400, 100, 20)) A = [16, 40, 4, 5] (stride (5, 320, 80, 1)) dim = 0 3.112 -> 3.119 ( +0.22%) [ +0.00% +0.22% +0.22% / +0.22% +0.96% +0.87%] index_add_ linear : Elapsed 0.031 ms (3.112 ms / 100) 2.987 -> 2.988 ( +0.03%) [ +0.00% +0.10% +0.13% / +0.03% +0.77% +0.77%] index_copy_ linear : Elapsed 0.030 ms (2.987 ms / 100) 3.115 -> 3.125 ( +0.32%) [ +0.55% +0.00% +0.35% / +0.32% +1.03% +0.90%] index_add_ reverse : Elapsed 0.031 ms (3.132 ms / 100) 2.986 -> 2.990 ( +0.13%) [ +0.07% +0.00% +0.10% / +0.13% +1.11% +0.80%] index_copy_ reverse : Elapsed 0.030 ms (2.988 ms / 100) 3.119 -> 3.121 ( +0.06%) [ +0.06% +0.00% +0.22% / +0.06% +0.71% +0.71%] index_add_ spread : Elapsed 0.031 ms (3.121 ms / 100) 2.985 -> 2.984 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +1.04% +0.97%] index_copy_ spread : Elapsed 0.030 ms (2.986 ms / 100) 3.123 -> 3.127 ( +0.13%) [ +0.16% +0.10% +0.00% / +0.13% +0.67% +0.67%] index_add_ strided 3 : Elapsed 0.031 ms (3.128 ms / 100) 2.986 -> 2.985 ( -0.03%) [ +0.07% +0.03% +0.00% / -0.03% +0.80% +0.90%] index_copy_ strided 3 : Elapsed 0.030 ms (2.988 ms / 100) 3.130 -> 3.132 ( +0.06%) [ +0.19% +0.32% +0.00% / +0.06% +0.45% +0.45%] index_add_ strided 7 : Elapsed 0.031 ms (3.136 ms / 100) 2.987 -> 2.990 ( +0.10%) [ +0.00% +0.17% +0.17% / +0.10% +0.87% +0.80%] index_copy_ strided 7 : Elapsed 0.030 ms (2.987 ms / 100) 3.124 -> 3.124 ( +0.00%) [ +0.48% +0.00% +0.16% / +0.00% +0.42% +0.51%] index_add_ perm : Elapsed 0.031 ms (3.139 ms / 100) 2.986 -> 2.988 ( +0.07%) [ +0.03% +0.00% +0.10% / +0.07% +0.74% +0.84%] index_copy_ perm : Elapsed 0.030 ms (2.987 ms / 100) 3.125 -> 3.141 ( +0.51%) [ +0.00% +0.38% +0.38% / +0.51% +0.51% +0.54%] index_add_ perm_sorted : Elapsed 0.031 ms (3.125 ms / 100) 2.989 -> 2.992 ( +0.10%) [ +0.03% +0.00% +0.03% / +0.10% +0.74% +0.74%] index_copy_ perm_sorted : Elapsed 0.030 ms (2.990 ms / 100) 5.304 -> 5.300 ( -0.08%) [ +0.00% +0.06% +0.00% / -0.06% -0.08% -0.04%] index_select const : Elapsed 0.053 ms (5.304 ms / 100) 5.310 -> 5.311 ( +0.02%) [ +0.00% +0.08% +0.13% / +0.28% +0.02% +0.24%] index_select wrap : Elapsed 0.053 ms (5.310 ms / 100) 5.316 -> 5.317 ( +0.02%) [ +0.09% +0.00% +0.02% / +0.02% +0.02% +0.19%] index_select linear : Elapsed 0.053 ms (5.321 ms / 100) 5.312 -> 5.311 ( -0.02%) [ +0.21% +0.13% +0.00% / +0.23% +0.11% -0.02%] index_select reverse : Elapsed 0.053 ms (5.323 ms / 100) 5.302 -> 5.302 ( +0.00%) [ +0.06% +0.02% +0.00% / +0.04% +0.00% +0.08%] index_select skip64 : Elapsed 0.053 ms (5.305 ms / 100) 5.293 -> 5.294 ( +0.02%) [ +0.17% +0.00% +0.17% / +0.11% +0.17% +0.02%] index_select skip256 : Elapsed 0.053 ms (5.302 ms / 100) 5.315 -> 5.313 ( -0.04%) [ +0.17% +0.00% +0.11% / +0.15% +0.04% -0.04%] index_select spread : Elapsed 0.053 ms (5.324 ms / 100) 5.314 -> 5.311 ( -0.06%) [ +0.00% +0.08% +0.17% / +0.08% -0.06% +0.08%] index_select strided 3 : Elapsed 0.053 ms (5.314 ms / 100) 5.321 -> 5.314 ( -0.13%) [ +0.06% +0.00% +0.08% / +0.00% -0.13% -0.04%] index_select strided 5 : Elapsed 0.053 ms (5.324 ms / 100) 5.312 -> 5.316 ( +0.08%) [ +0.09% +0.00% +0.08% / +0.15% +0.09% +0.08%] index_select strided 7 : Elapsed 0.053 ms (5.317 ms / 100) 5.299 -> 5.300 ( +0.02%) [ +0.11% +0.00% +0.04% / +0.02% +0.23% +0.08%] index_select strided 8 : Elapsed 0.053 ms (5.305 ms / 100) 5.309 -> 5.311 ( +0.04%) [ +0.04% +0.00% +0.23% / +0.04% +0.21% +0.13%] index_select random : Elapsed 0.053 ms (5.311 ms / 100) 5.315 -> 5.319 ( +0.08%) [ +0.13% +0.19% +0.00% / +0.08% +0.08% +0.15%] index_select random_sorted : Elapsed 0.053 ms (5.322 ms / 100) B = [20, 40, 4, 5] (stride (200, 5, 4000, 1)) A = [16, 40, 4, 5] (stride (5, 80, 3200, 1)) dim = 0 4.059 -> 4.073 ( +0.34%) [ +0.27% +0.00% +0.27% / +0.34% +1.08% +0.94%] index_add_ linear : Elapsed 0.041 ms (4.070 ms / 100) 3.924 -> 3.933 ( +0.23%) [ +0.23% +0.00% +0.23% / +0.23% +1.10% +0.97%] index_copy_ linear : Elapsed 0.039 ms (3.933 ms / 100) 4.059 -> 4.063 ( +0.10%) [ +0.00% +0.10% +0.17% / +0.10% +0.86% +0.84%] index_add_ reverse : Elapsed 0.041 ms (4.059 ms / 100) 3.920 -> 3.928 ( +0.20%) [ +0.00% +0.05% +0.15% / +0.20% +0.84% +0.84%] index_copy_ reverse : Elapsed 0.039 ms (3.920 ms / 100) 4.060 -> 4.061 ( +0.02%) [ +0.10% +0.05% +0.00% / +0.02% +0.79% +0.81%] index_add_ spread : Elapsed 0.041 ms (4.064 ms / 100) 3.924 -> 3.925 ( +0.03%) [ +0.08% +0.00% +0.05% / +0.03% +0.74% +0.76%] index_copy_ spread : Elapsed 0.039 ms (3.927 ms / 100) 4.070 -> 4.071 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.66% +0.69%] index_add_ strided 3 : Elapsed 0.041 ms (4.071 ms / 100) 3.930 -> 3.932 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.039 ms (3.930 ms / 100) 4.060 -> 4.061 ( +0.02%) [ +0.05% +0.10% +0.00% / +0.02% +0.69% +0.79%] index_add_ strided 7 : Elapsed 0.041 ms (4.062 ms / 100) 3.923 -> 3.926 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.69% +0.84%] index_copy_ strided 7 : Elapsed 0.039 ms (3.924 ms / 100) 4.064 -> 4.079 ( +0.37%) [ +0.12% +0.15% +0.00% / +0.37% +1.11% +0.81%] index_add_ perm : Elapsed 0.041 ms (4.069 ms / 100) 3.929 -> 3.936 ( +0.18%) [ +0.05% +0.03% +0.00% / +0.18% +1.09% +0.87%] index_copy_ perm : Elapsed 0.039 ms (3.931 ms / 100) 4.070 -> 4.072 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.66% +0.69%] index_add_ perm_sorted : Elapsed 0.041 ms (4.071 ms / 100) 3.931 -> 3.932 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.69% +0.71%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.932 ms / 100) 5.558 -> 5.555 ( -0.05%) [ +0.05% +0.00% +0.11% / +0.05% -0.02% -0.05%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.570 -> 5.569 ( -0.02%) [ +0.07% +0.09% +0.00% / -0.02% +0.22% +0.16%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.574 -> 5.567 ( -0.13%) [ +0.14% +0.00% +0.05% / -0.13% +0.14% +0.11%] index_select linear : Elapsed 0.056 ms (5.582 ms / 100) 5.576 -> 5.574 ( -0.04%) [ +0.00% +0.04% +0.02% / -0.04% +0.07% +0.05%] index_select reverse : Elapsed 0.056 ms (5.576 ms / 100) 5.558 -> 5.558 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.13% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.558 ms / 100) 5.555 -> 5.556 ( +0.02%) [ +0.07% +0.14% +0.00% / +0.02% +0.13% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.573 -> 5.578 ( +0.09%) [ +0.00% +0.02% +0.02% / +0.20% +0.09% +0.11%] index_select spread : Elapsed 0.056 ms (5.573 ms / 100) 5.570 -> 5.566 ( -0.07%) [ +0.00% +0.22% +0.11% / -0.07% +0.25% +0.20%] index_select strided 3 : Elapsed 0.056 ms (5.570 ms / 100) 5.574 -> 5.575 ( +0.02%) [ +0.13% +0.04% +0.00% / +0.02% +0.09% +0.14%] index_select strided 5 : Elapsed 0.056 ms (5.581 ms / 100) 5.567 -> 5.568 ( +0.02%) [ +0.27% +0.18% +0.00% / +0.02% +0.36% +0.34%] index_select strided 7 : Elapsed 0.056 ms (5.582 ms / 100) 5.559 -> 5.556 ( -0.05%) [ +0.09% +0.00% +0.07% / -0.05% +0.11% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.572 -> 5.578 ( +0.11%) [ +0.07% +0.04% +0.00% / +0.11% +0.13% +0.14%] index_select random : Elapsed 0.056 ms (5.576 ms / 100) 5.572 -> 5.577 ( +0.09%) [ +0.00% +0.04% +0.02% / +0.16% +0.20% +0.09%] index_select random_sorted : Elapsed 0.056 ms (5.572 ms / 100) B = [20, 40, 4, 5] (stride (40, 1, 4000, 800)) A = [16, 40, 4, 5] (stride (40, 1, 640, 2560)) dim = 0 3.933 -> 3.937 ( +0.10%) [ +0.15% +0.10% +0.00% / +0.10% +0.74% +0.76%] index_add_ linear : Elapsed 0.039 ms (3.939 ms / 100) 3.792 -> 3.793 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.82% +0.79%] index_copy_ linear : Elapsed 0.038 ms (3.792 ms / 100) 3.928 -> 3.929 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.87% +0.79%] index_add_ reverse : Elapsed 0.039 ms (3.931 ms / 100) 3.782 -> 3.781 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.82% +0.79%] index_copy_ reverse : Elapsed 0.038 ms (3.783 ms / 100) 3.931 -> 3.930 ( -0.03%) [ +0.08% +0.00% +0.15% / -0.03% +0.71% +0.89%] index_add_ spread : Elapsed 0.039 ms (3.934 ms / 100) 3.785 -> 3.786 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.71% +0.71%] index_copy_ spread : Elapsed 0.038 ms (3.786 ms / 100) 3.938 -> 3.943 ( +0.13%) [ +0.08% +0.00% +0.23% / +0.13% +0.86% +0.99%] index_add_ strided 3 : Elapsed 0.039 ms (3.941 ms / 100) 3.786 -> 3.787 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.03% +0.74% +0.85%] index_copy_ strided 3 : Elapsed 0.038 ms (3.787 ms / 100) 3.943 -> 3.951 ( +0.20%) [ +0.18% +0.18% +0.00% / +0.20% +0.76% +0.74%] index_add_ strided 7 : Elapsed 0.040 ms (3.950 ms / 100) 3.791 -> 3.794 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.58% +0.58%] index_copy_ strided 7 : Elapsed 0.038 ms (3.793 ms / 100) 3.933 -> 3.936 ( +0.08%) [ +0.00% +0.10% +0.03% / +0.08% +0.74% +0.74%] index_add_ perm : Elapsed 0.039 ms (3.933 ms / 100) 3.787 -> 3.788 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.61% +0.66%] index_copy_ perm : Elapsed 0.038 ms (3.788 ms / 100) 3.929 -> 3.930 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.69% +0.76%] index_add_ perm_sorted : Elapsed 0.039 ms (3.930 ms / 100) 3.784 -> 3.785 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.71% +0.71%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.786 ms / 100) 5.481 -> 5.479 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.04% +0.05%] index_select const : Elapsed 0.055 ms (5.481 ms / 100) 5.506 -> 5.503 ( -0.05%) [ +0.00% +0.09% +0.09% / +0.16% -0.05% -0.04%] index_select wrap : Elapsed 0.055 ms (5.506 ms / 100) 5.500 -> 5.504 ( +0.07%) [ +0.11% +0.00% +0.09% / +0.20% +0.07% +0.16%] index_select linear : Elapsed 0.055 ms (5.506 ms / 100) 5.503 -> 5.505 ( +0.04%) [ +0.18% +0.07% +0.00% / +0.04% +0.04% +0.15%] index_select reverse : Elapsed 0.055 ms (5.513 ms / 100) 5.479 -> 5.475 ( -0.07%) [ +0.07% +0.02% +0.00% / -0.07% +0.05% +0.04%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.477 -> 5.480 ( +0.05%) [ +0.00% +0.09% +0.02% / +0.05% +0.20% +0.16%] index_select skip256 : Elapsed 0.055 ms (5.477 ms / 100) 5.505 -> 5.499 ( -0.11%) [ +0.00% +0.02% +0.00% / +0.04% -0.02% -0.11%] index_select spread : Elapsed 0.055 ms (5.505 ms / 100) 5.508 -> 5.501 ( -0.13%) [ +0.07% +0.00% +0.00% / +0.07% -0.13% -0.09%] index_select strided 3 : Elapsed 0.055 ms (5.512 ms / 100) 5.508 -> 5.502 ( -0.11%) [ +0.05% +0.00% +0.05% / -0.11% -0.05% -0.07%] index_select strided 5 : Elapsed 0.055 ms (5.511 ms / 100) 5.502 -> 5.503 ( +0.02%) [ +0.24% +0.16% +0.00% / +0.05% +0.07% +0.02%] index_select strided 7 : Elapsed 0.055 ms (5.515 ms / 100) 5.480 -> 5.483 ( +0.05%) [ +0.00% +0.16% +0.05% / +0.05% +0.16% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.480 ms / 100) 5.501 -> 5.506 ( +0.09%) [ +0.00% +0.07% +0.05% / +0.09% +0.09% +0.16%] index_select random : Elapsed 0.055 ms (5.501 ms / 100) 5.495 -> 5.497 ( +0.04%) [ +0.07% +0.00% +0.09% / +0.09% +0.09% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.499 ms / 100) B = [20, 40, 4, 5] (stride (1, 20, 4000, 800)) A = [16, 40, 4, 5] (stride (1, 80, 3200, 16)) dim = 0 3.946 -> 3.938 ( -0.20%) [ +0.03% +0.10% +0.00% / -0.20% +0.91% +0.86%] index_add_ linear : Elapsed 0.039 ms (3.947 ms / 100) 3.797 -> 3.793 ( -0.11%) [ +0.16% +0.29% +0.00% / -0.11% +1.13% +0.95%] index_copy_ linear : Elapsed 0.038 ms (3.803 ms / 100) 3.949 -> 3.948 ( -0.03%) [ +0.05% +0.00% +0.15% / -0.03% +1.04% +0.94%] index_add_ reverse : Elapsed 0.040 ms (3.951 ms / 100) 3.789 -> 3.789 ( +0.00%) [ +0.03% +0.00% +0.08% / +0.00% +1.00% +0.87%] index_copy_ reverse : Elapsed 0.038 ms (3.790 ms / 100) 3.944 -> 3.951 ( +0.18%) [ +0.00% +0.10% +0.13% / +0.18% +0.96% +0.96%] index_add_ spread : Elapsed 0.039 ms (3.944 ms / 100) 3.792 -> 3.803 ( +0.29%) [ +0.16% +0.24% +0.00% / +0.29% +0.87% +0.82%] index_copy_ spread : Elapsed 0.038 ms (3.798 ms / 100) 3.947 -> 3.943 ( -0.10%) [ +0.08% +0.03% +0.00% / -0.10% +0.96% +0.86%] index_add_ strided 3 : Elapsed 0.040 ms (3.950 ms / 100) 3.789 -> 3.792 ( +0.08%) [ +0.13% +0.05% +0.00% / +0.08% +0.90% +0.87%] index_copy_ strided 3 : Elapsed 0.038 ms (3.794 ms / 100) 3.948 -> 3.950 ( +0.05%) [ +0.13% +0.18% +0.00% / +0.05% +0.96% +0.99%] index_add_ strided 7 : Elapsed 0.040 ms (3.953 ms / 100) 3.787 -> 3.789 ( +0.05%) [ +0.16% +0.24% +0.00% / +0.05% +0.90% +1.03%] index_copy_ strided 7 : Elapsed 0.038 ms (3.793 ms / 100) 3.946 -> 3.948 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.86% +0.96%] index_add_ perm : Elapsed 0.039 ms (3.946 ms / 100) 3.799 -> 3.805 ( +0.16%) [ +0.08% +0.00% +0.11% / +0.16% +1.16% +1.05%] index_copy_ perm : Elapsed 0.038 ms (3.802 ms / 100) 3.944 -> 3.944 ( +0.00%) [ +0.10% +0.13% +0.00% / +0.00% +0.99% +0.89%] index_add_ perm_sorted : Elapsed 0.039 ms (3.948 ms / 100) 3.791 -> 3.794 ( +0.08%) [ +0.00% +0.05% +0.00% / +0.08% +0.90% +0.76%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.791 ms / 100) 5.478 -> 5.481 ( +0.05%) [ +0.02% +0.15% +0.00% / +0.13% +0.07% +0.05%] index_select const : Elapsed 0.055 ms (5.479 ms / 100) 5.485 -> 5.480 ( -0.09%) [ +0.15% +0.00% +0.09% / -0.09% +0.15% +0.16%] index_select wrap : Elapsed 0.055 ms (5.493 ms / 100) 5.483 -> 5.487 ( +0.07%) [ +0.04% +0.15% +0.00% / +0.07% +0.11% +0.18%] index_select linear : Elapsed 0.055 ms (5.485 ms / 100) 5.479 -> 5.482 ( +0.05%) [ +0.00% +0.05% +0.11% / +0.05% +0.05% +0.24%] index_select reverse : Elapsed 0.055 ms (5.479 ms / 100) 5.472 -> 5.470 ( -0.04%) [ +0.15% +0.13% +0.00% / +0.04% +0.26% -0.04%] index_select skip64 : Elapsed 0.055 ms (5.480 ms / 100) 5.477 -> 5.481 ( +0.07%) [ +0.04% +0.02% +0.00% / +0.26% +0.16% +0.07%] index_select skip256 : Elapsed 0.055 ms (5.479 ms / 100) 5.483 -> 5.482 ( -0.02%) [ +0.00% +0.05% +0.04% / -0.02% +0.16% +0.00%] index_select spread : Elapsed 0.055 ms (5.483 ms / 100) 5.480 -> 5.483 ( +0.05%) [ +0.13% +0.05% +0.00% / +0.16% +0.05% +0.11%] index_select strided 3 : Elapsed 0.055 ms (5.487 ms / 100) 5.479 -> 5.486 ( +0.13%) [ +0.00% +0.15% +0.04% / +0.13% +0.24% +0.29%] index_select strided 5 : Elapsed 0.055 ms (5.479 ms / 100) 5.483 -> 5.488 ( +0.09%) [ +0.07% +0.00% +0.15% / +0.09% +0.27% +0.11%] index_select strided 7 : Elapsed 0.055 ms (5.487 ms / 100) 5.483 -> 5.482 ( -0.02%) [ +0.00% +0.16% +0.05% / -0.02% +0.15% +0.15%] index_select strided 8 : Elapsed 0.055 ms (5.483 ms / 100) 5.486 -> 5.484 ( -0.04%) [ +0.02% +0.00% +0.02% / -0.04% +0.07% +0.02%] index_select random : Elapsed 0.055 ms (5.487 ms / 100) 5.489 -> 5.485 ( -0.07%) [ +0.00% +0.04% +0.02% / -0.07% +0.00% +0.05%] index_select random_sorted : Elapsed 0.055 ms (5.489 ms / 100) B = [20, 40, 4, 5] (stride (1, 80, 20, 3200)) A = [16, 40, 4, 5] (stride (800, 20, 5, 1)) dim = 0 3.009 -> 3.010 ( +0.03%) [ +0.00% +0.07% +0.00% / +0.03% +0.66% +0.70%] index_add_ linear : Elapsed 0.030 ms (3.009 ms / 100) 2.879 -> 2.881 ( +0.07%) [ +0.03% +0.00% +0.00% / +0.07% +0.83% +0.76%] index_copy_ linear : Elapsed 0.029 ms (2.880 ms / 100) 3.020 -> 3.019 ( -0.03%) [ +0.10% +0.07% +0.00% / -0.03% +0.86% +0.99%] index_add_ reverse : Elapsed 0.030 ms (3.023 ms / 100) 2.894 -> 2.894 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.83% +0.83%] index_copy_ reverse : Elapsed 0.029 ms (2.894 ms / 100) 3.009 -> 3.013 ( +0.13%) [ +0.17% +0.00% +0.13% / +0.13% +0.90% +1.03%] index_add_ spread : Elapsed 0.030 ms (3.014 ms / 100) 2.881 -> 2.882 ( +0.03%) [ +0.10% +0.00% +0.14% / +0.03% +0.97% +0.94%] index_copy_ spread : Elapsed 0.029 ms (2.884 ms / 100) 3.014 -> 3.014 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.83% +0.80%] index_add_ strided 3 : Elapsed 0.030 ms (3.015 ms / 100) 2.887 -> 2.886 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.80% +0.83%] index_copy_ strided 3 : Elapsed 0.029 ms (2.887 ms / 100) 3.016 -> 3.017 ( +0.03%) [ +0.07% +0.00% +0.00% / +0.03% +0.90% +0.66%] index_add_ strided 7 : Elapsed 0.030 ms (3.018 ms / 100) 2.889 -> 2.892 ( +0.10%) [ +0.07% +0.03% +0.00% / +0.10% +0.93% +0.69%] index_copy_ strided 7 : Elapsed 0.029 ms (2.891 ms / 100) 3.013 -> 3.014 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.86% +0.63%] index_add_ perm : Elapsed 0.030 ms (3.013 ms / 100) 2.880 -> 2.885 ( +0.17%) [ +0.10% +0.24% +0.00% / +0.17% +1.08% +0.87%] index_copy_ perm : Elapsed 0.029 ms (2.883 ms / 100) 3.022 -> 3.023 ( +0.03%) [ +0.13% +0.17% +0.00% / +0.03% +0.69% +0.73%] index_add_ perm_sorted : Elapsed 0.030 ms (3.026 ms / 100) 2.895 -> 2.896 ( +0.03%) [ +0.10% +0.07% +0.00% / +0.03% +0.66% +0.62%] index_copy_ perm_sorted : Elapsed 0.029 ms (2.898 ms / 100) 5.301 -> 5.301 ( +0.00%) [ +0.02% +0.11% +0.00% / +0.06% +0.00% +0.11%] index_select const : Elapsed 0.053 ms (5.302 ms / 100) 5.316 -> 5.312 ( -0.08%) [ +0.02% +0.08% +0.00% / -0.08% +0.04% +0.00%] index_select wrap : Elapsed 0.053 ms (5.317 ms / 100) 5.316 -> 5.318 ( +0.04%) [ +0.00% +0.09% +0.21% / +0.15% +0.04% +0.11%] index_select linear : Elapsed 0.053 ms (5.316 ms / 100) 5.320 -> 5.317 ( -0.06%) [ +0.00% +0.06% +0.08% / -0.06% -0.04% -0.02%] index_select reverse : Elapsed 0.053 ms (5.320 ms / 100) 5.299 -> 5.303 ( +0.08%) [ +0.00% +0.04% +0.26% / +0.09% +0.08% +0.15%] index_select skip64 : Elapsed 0.053 ms (5.299 ms / 100) 5.304 -> 5.304 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.02% +0.04%] index_select skip256 : Elapsed 0.053 ms (5.304 ms / 100) 5.313 -> 5.316 ( +0.06%) [ +0.04% +0.15% +0.00% / +0.17% +0.11% +0.06%] index_select spread : Elapsed 0.053 ms (5.315 ms / 100) 5.319 -> 5.320 ( +0.02%) [ +0.00% +0.11% +0.02% / +0.21% +0.06% +0.02%] index_select strided 3 : Elapsed 0.053 ms (5.319 ms / 100) 5.321 -> 5.317 ( -0.08%) [ +0.02% +0.02% +0.00% / +0.04% -0.08% -0.04%] index_select strided 5 : Elapsed 0.053 ms (5.322 ms / 100) 5.315 -> 5.315 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.15% +0.21%] index_select strided 7 : Elapsed 0.053 ms (5.315 ms / 100) 5.302 -> 5.300 ( -0.04%) [ +0.00% +0.09% +0.09% / -0.04% +0.19% +0.00%] index_select strided 8 : Elapsed 0.053 ms (5.302 ms / 100) 5.311 -> 5.315 ( +0.08%) [ +0.11% +0.00% +0.08% / +0.11% +0.08% +0.08%] index_select random : Elapsed 0.053 ms (5.317 ms / 100) 5.322 -> 5.315 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.00% -0.13%] index_select random_sorted : Elapsed 0.053 ms (5.322 ms / 100) out_shape = [16, 20, 4, 5] in_shape = [16, 40, 4, 5] idx_dim = 1 B = [16, 20, 4, 5] (stride (400, 1, 100, 20)) A = [16, 40, 4, 5] (stride (200, 5, 3200, 1)) dim = 1 2.394 -> 2.398 ( +0.17%) [ +0.00% +0.04% +0.00% / +0.17% +0.21% +0.21%] index_select const : Elapsed 0.024 ms (2.394 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.08% +0.12%] index_select wrap : Elapsed 0.024 ms (2.413 ms / 100) 2.411 -> 2.411 ( +0.00%) [ +0.00% +0.00% +0.17% / +0.21% +0.00% +0.00%] index_select linear : Elapsed 0.024 ms (2.411 ms / 100) 2.410 -> 2.408 ( -0.08%) [ +0.21% +0.12% +0.00% / +0.17% -0.08% +0.17%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.395 -> 2.397 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.17% +0.13% +0.08%] index_select skip64 : Elapsed 0.024 ms (2.397 ms / 100) 2.394 -> 2.396 ( +0.08%) [ +0.00% +0.00% +0.13% / +0.17% +0.29% +0.08%] index_select skip256 : Elapsed 0.024 ms (2.394 ms / 100) 2.420 -> 2.423 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.29% +0.29%] index_select spread : Elapsed 0.024 ms (2.423 ms / 100) 2.422 -> 2.418 ( -0.17%) [ +0.04% +0.21% +0.00% / +0.04% -0.17% -0.04%] index_select strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.405 -> 2.407 ( +0.08%) [ +0.25% +0.21% +0.00% / +0.37% +0.37% +0.08%] index_select strided 5 : Elapsed 0.024 ms (2.411 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.12% +0.17%] index_select strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.401 -> 2.404 ( +0.12%) [ +0.17% +0.00% +0.08% / +0.12% +0.25% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.405 ms / 100) 2.400 -> 2.406 ( +0.25%) [ +0.13% +0.00% +0.13% / +0.37% +0.33% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.403 ms / 100) 2.412 -> 2.418 ( +0.25%) [ +0.00% +0.17% +0.08% / +0.25% +0.25% +0.25%] index_select random : Elapsed 0.024 ms (2.412 ms / 100) 2.417 -> 2.416 ( -0.04%) [ +0.00% +0.08% +0.08% / +0.17% +0.04% -0.04%] index_select random_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.418 -> 2.414 ( -0.17%) [ +0.21% +0.00% +0.08% / -0.17% -0.04% -0.04%] index_select perm : Elapsed 0.024 ms (2.423 ms / 100) 2.417 -> 2.415 ( -0.08%) [ +0.00% +0.08% +0.21% / +0.17% -0.08% -0.08%] index_select perm_sorted : Elapsed 0.024 ms (2.417 ms / 100) B = [16, 20, 4, 5] (stride (400, 4, 1, 80)) A = [16, 40, 4, 5] (stride (800, 1, 200, 40)) dim = 1 2.407 -> 2.411 ( +0.17%) [ +0.25% +0.00% +0.04% / +0.17% +0.21% +0.29%] index_select const : Elapsed 0.024 ms (2.413 ms / 100) 2.416 -> 2.413 ( -0.12%) [ +0.12% +0.04% +0.00% / +0.17% -0.12% -0.12%] index_select wrap : Elapsed 0.024 ms (2.419 ms / 100) 2.417 -> 2.411 ( -0.25%) [ +0.17% +0.04% +0.00% / -0.08% -0.25% -0.25%] index_select linear : Elapsed 0.024 ms (2.421 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.08% +0.12% +0.00% / +0.12% +0.12% +0.08%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.410 -> 2.410 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.08% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.410 ms / 100) 2.409 -> 2.412 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.12% +0.17% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.411 ms / 100) 2.418 -> 2.418 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.17% +0.29%] index_select spread : Elapsed 0.024 ms (2.418 ms / 100) 2.420 -> 2.418 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.25% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.421 ms / 100) 2.417 -> 2.420 ( +0.12%) [ +0.00% +0.50% +0.00% / +0.12% +0.29% +0.21%] index_select strided 5 : Elapsed 0.024 ms (2.417 ms / 100) 2.417 -> 2.421 ( +0.17%) [ +0.21% +0.00% +0.29% / +0.17% +0.74% +0.37%] index_select strided 7 : Elapsed 0.024 ms (2.422 ms / 100) 2.420 -> 2.419 ( -0.04%) [ +0.12% +0.00% +0.12% / +0.21% -0.04% +0.00%] index_select strided 8 : Elapsed 0.024 ms (2.423 ms / 100) 2.418 -> 2.419 ( +0.04%) [ +0.12% +0.00% +0.04% / +0.04% +0.33% +0.29%] index_select strided 16 : Elapsed 0.024 ms (2.421 ms / 100) 2.418 -> 2.421 ( +0.12%) [ +0.04% +0.00% +0.17% / +0.12% +0.29% +0.25%] index_select random : Elapsed 0.024 ms (2.419 ms / 100) 2.420 -> 2.418 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.04% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.421 ms / 100) 2.418 -> 2.420 ( +0.08%) [ +0.21% +0.08% +0.00% / +0.08% +0.17% +0.17%] index_select perm : Elapsed 0.024 ms (2.423 ms / 100) 2.420 -> 2.421 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.17% +0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) B = [16, 20, 4, 5] (stride (20, 320, 1, 4)) A = [16, 40, 4, 5] (stride (5, 320, 80, 1)) dim = 1 2.444 -> 2.443 ( -0.04%) [ +0.00% +0.16% +0.12% / -0.04% +0.04% +0.12%] index_select const : Elapsed 0.024 ms (2.444 ms / 100) 2.458 -> 2.457 ( -0.04%) [ +0.00% +0.12% +0.04% / -0.04% -0.04% +0.12%] index_select wrap : Elapsed 0.025 ms (2.458 ms / 100) 2.456 -> 2.455 ( -0.04%) [ +0.00% +0.12% +0.29% / +0.12% +0.08% -0.04%] index_select linear : Elapsed 0.025 ms (2.456 ms / 100) 2.456 -> 2.452 ( -0.16%) [ +0.33% +0.16% +0.00% / +0.00% -0.16% +0.04%] index_select reverse : Elapsed 0.025 ms (2.464 ms / 100) 2.444 -> 2.444 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.12% +0.00% +0.29%] index_select skip64 : Elapsed 0.024 ms (2.444 ms / 100) 2.443 -> 2.445 ( +0.08%) [ +0.12% +0.04% +0.00% / +0.08% +0.33% +0.20%] index_select skip256 : Elapsed 0.024 ms (2.446 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.08% +0.00% +0.16% / +0.08% +0.04% +0.08%] index_select spread : Elapsed 0.025 ms (2.457 ms / 100) 2.459 -> 2.457 ( -0.08%) [ +0.08% +0.00% +0.04% / +0.04% +0.04% -0.08%] index_select strided 3 : Elapsed 0.025 ms (2.461 ms / 100) 2.450 -> 2.448 ( -0.08%) [ +0.00% +0.04% +0.08% / +0.24% -0.08% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.450 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.08% +0.20% +0.00% / +0.04% +0.00% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.457 ms / 100) 2.445 -> 2.449 ( +0.16%) [ +0.08% +0.12% +0.00% / +0.16% +0.25% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.447 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.29% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.448 ms / 100) 2.448 -> 2.453 ( +0.20%) [ +0.00% +0.20% +0.16% / +0.25% +0.37% +0.20%] index_select random : Elapsed 0.024 ms (2.448 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.00% +0.29% +0.08% / +0.20% +0.12% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.452 ms / 100) 2.460 -> 2.456 ( -0.16%) [ +0.08% +0.00% +0.12% / +0.04% -0.12% -0.16%] index_select perm : Elapsed 0.025 ms (2.462 ms / 100) 2.464 -> 2.451 ( -0.53%) [ +0.12% +0.04% +0.00% / +0.04% -0.53% -0.45%] index_select perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) B = [16, 20, 4, 5] (stride (4, 320, 1, 64)) A = [16, 40, 4, 5] (stride (800, 1, 40, 160)) dim = 1 2.447 -> 2.452 ( +0.20%) [ +0.12% +0.00% +0.00% / +0.20% +0.29% +0.33%] index_select const : Elapsed 0.024 ms (2.450 ms / 100) 2.458 -> 2.455 ( -0.12%) [ +0.12% +0.08% +0.00% / +0.00% -0.12% -0.12%] index_select wrap : Elapsed 0.025 ms (2.461 ms / 100) 2.456 -> 2.454 ( -0.08%) [ +0.20% +0.08% +0.00% / +0.16% -0.08% -0.04%] index_select linear : Elapsed 0.025 ms (2.461 ms / 100) 2.454 -> 2.457 ( +0.12%) [ +0.04% +0.00% +0.12% / +0.20% +0.12% +0.45%] index_select reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.16% +0.00% +0.08% / +0.20% +0.04% +0.16%] index_select skip64 : Elapsed 0.025 ms (2.453 ms / 100) 2.449 -> 2.452 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.45% +0.24%] index_select skip256 : Elapsed 0.025 ms (2.450 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.20% +0.16% +0.00% / +0.04% +0.45% +0.33%] index_select spread : Elapsed 0.025 ms (2.463 ms / 100) 2.460 -> 2.464 ( +0.16%) [ +0.00% +0.12% +0.16% / +0.16% +0.28% +0.24%] index_select strided 3 : Elapsed 0.025 ms (2.460 ms / 100) 2.458 -> 2.460 ( +0.08%) [ +0.00% +0.12% +0.04% / +0.08% +0.33% +0.41%] index_select strided 5 : Elapsed 0.025 ms (2.458 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.04% +0.08% +0.00%] index_select strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.00% +0.04% +0.20% / +0.04% +0.08% +0.12%] index_select strided 8 : Elapsed 0.025 ms (2.462 ms / 100) 2.459 -> 2.461 ( +0.08%) [ +0.00% +0.12% +0.08% / +0.08% +0.49% +0.37%] index_select strided 16 : Elapsed 0.025 ms (2.459 ms / 100) 2.459 -> 2.462 ( +0.12%) [ +0.00% +0.20% +0.08% / +0.12% +0.37% +0.33%] index_select random : Elapsed 0.025 ms (2.459 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.00% +0.28% +0.20% / +0.00% +0.16% +0.37%] index_select random_sorted : Elapsed 0.025 ms (2.460 ms / 100) 2.459 -> 2.465 ( +0.24%) [ +0.00% +0.20% +0.37% / +0.24% +0.24% +0.41%] index_select perm : Elapsed 0.025 ms (2.459 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.00% +0.12% +0.12% / +0.45% +0.08% +0.04%] index_select perm_sorted : Elapsed 0.025 ms (2.462 ms / 100) B = [16, 20, 4, 5] (stride (1, 320, 16, 64)) A = [16, 40, 4, 5] (stride (160, 4, 1, 2560)) dim = 1 2.446 -> 2.449 ( +0.12%) [ +0.04% +0.29% +0.00% / +0.33% +0.25% +0.12%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.466 -> 2.466 ( +0.00%) [ +0.00% +0.12% +0.20% / +0.08% +0.08% +0.00%] index_select wrap : Elapsed 0.025 ms (2.466 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.16% +0.12% +0.00% / -0.04% +0.04% +0.04%] index_select linear : Elapsed 0.025 ms (2.469 ms / 100) 2.467 -> 2.465 ( -0.08%) [ +0.16% +0.00% +0.12% / -0.04% +0.04% -0.08%] index_select reverse : Elapsed 0.025 ms (2.471 ms / 100) 2.448 -> 2.451 ( +0.12%) [ +0.16% +0.00% +0.20% / +0.12% +0.25% +0.16%] index_select skip64 : Elapsed 0.025 ms (2.452 ms / 100) 2.448 -> 2.452 ( +0.16%) [ +0.04% +0.00% +0.16% / +0.20% +0.29% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.476 -> 2.476 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.24% +0.00% +0.12%] index_select spread : Elapsed 0.025 ms (2.478 ms / 100) 2.475 -> 2.474 ( -0.04%) [ +0.20% +0.08% +0.00% / +0.16% -0.04% -0.04%] index_select strided 3 : Elapsed 0.025 ms (2.480 ms / 100) 2.466 -> 2.464 ( -0.08%) [ +0.04% +0.00% +0.08% / +0.20% -0.08% +0.12%] index_select strided 5 : Elapsed 0.025 ms (2.467 ms / 100) 2.474 -> 2.476 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.12% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.477 ms / 100) 2.457 -> 2.459 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.12% +0.12%] index_select strided 8 : Elapsed 0.025 ms (2.457 ms / 100) 2.458 -> 2.458 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.08% +0.00% +0.08%] index_select strided 16 : Elapsed 0.025 ms (2.458 ms / 100) 2.468 -> 2.467 ( -0.04%) [ +0.00% +0.08% +0.24% / +0.08% -0.04% +0.08%] index_select random : Elapsed 0.025 ms (2.468 ms / 100) 2.470 -> 2.467 ( -0.12%) [ +0.04% +0.00% +0.08% / +0.16% -0.12% -0.12%] index_select random_sorted : Elapsed 0.025 ms (2.471 ms / 100) 2.472 -> 2.470 ( -0.08%) [ +0.00% +0.12% +0.04% / +0.00% +0.00% -0.08%] index_select perm : Elapsed 0.025 ms (2.472 ms / 100) 2.477 -> 2.468 ( -0.36%) [ +0.12% +0.20% +0.00% / +0.08% -0.36% +0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.480 ms / 100) B = [16, 20, 4, 5] (stride (1, 320, 16, 64)) A = [16, 40, 4, 5] (stride (4, 64, 1, 2560)) dim = 1 2.447 -> 2.447 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.16% +0.12%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.463 -> 2.457 ( -0.24%) [ +0.00% +0.12% +0.24% / +0.04% -0.24% -0.20%] index_select wrap : Elapsed 0.025 ms (2.463 ms / 100) 2.462 -> 2.459 ( -0.12%) [ +0.00% +0.12% +0.24% / +0.16% -0.12% -0.12%] index_select linear : Elapsed 0.025 ms (2.462 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.04% +0.00% +0.24% / +0.28% +0.00% +0.04%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.449 -> 2.448 ( -0.04%) [ +0.00% +0.20% +0.00% / +0.00% +0.12% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.449 ms / 100) 2.446 -> 2.447 ( +0.04%) [ +0.20% +0.00% +0.12% / +0.12% +0.04% +0.25%] index_select skip256 : Elapsed 0.025 ms (2.451 ms / 100) 2.459 -> 2.463 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.37% +0.37%] index_select spread : Elapsed 0.025 ms (2.463 ms / 100) 2.461 -> 2.461 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.12% +0.04% +0.00%] index_select strided 3 : Elapsed 0.025 ms (2.461 ms / 100) 2.451 -> 2.455 ( +0.16%) [ +0.00% +0.12% +0.12% / +0.16% +0.29% +0.33%] index_select strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.462 -> 2.464 ( +0.08%) [ +0.08% +0.12% +0.00% / +0.12% +0.08% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.464 ms / 100) 2.454 -> 2.448 ( -0.24%) [ +0.12% +0.20% +0.00% / +0.04% -0.24% -0.08%] index_select strided 8 : Elapsed 0.025 ms (2.457 ms / 100) 2.450 -> 2.453 ( +0.12%) [ +0.16% +0.00% +0.04% / +0.20% +0.12% +0.29%] index_select strided 16 : Elapsed 0.025 ms (2.454 ms / 100) 2.457 -> 2.460 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.24% +0.20%] index_select random : Elapsed 0.025 ms (2.457 ms / 100) 2.458 -> 2.459 ( +0.04%) [ +0.00% +0.04% +0.20% / +0.04% +0.16% +0.16%] index_select random_sorted : Elapsed 0.025 ms (2.458 ms / 100) 2.461 -> 2.462 ( +0.04%) [ +0.12% +0.00% +0.12% / +0.04% +0.16% +0.20%] index_select perm : Elapsed 0.025 ms (2.464 ms / 100) 2.464 -> 2.462 ( -0.08%) [ +0.04% +0.00% +0.08% / +0.04% +0.08% -0.08%] index_select perm_sorted : Elapsed 0.025 ms (2.465 ms / 100) B = [16, 20, 4, 5] (stride (5, 80, 1600, 1)) A = [16, 40, 4, 5] (stride (1, 64, 16, 2560)) dim = 1 2.447 -> 2.447 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.04% +0.16% +0.00%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.458 -> 2.456 ( -0.08%) [ +0.00% +0.20% +0.04% / -0.08% +0.00% +0.12%] index_select wrap : Elapsed 0.025 ms (2.458 ms / 100) 2.458 -> 2.458 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.04% +0.00% +0.04%] index_select linear : Elapsed 0.025 ms (2.458 ms / 100) 2.456 -> 2.457 ( +0.04%) [ +0.20% +0.37% +0.00% / +0.08% +0.04% +0.08%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.449 -> 2.447 ( -0.08%) [ +0.00% +0.00% +0.16% / +0.12% -0.08% -0.08%] index_select skip64 : Elapsed 0.024 ms (2.449 ms / 100) 2.447 -> 2.446 ( -0.04%) [ +0.08% +0.00% +0.12% / -0.04% +0.16% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.460 -> 2.457 ( -0.12%) [ +0.04% +0.00% +0.00% / -0.12% -0.08% -0.04%] index_select spread : Elapsed 0.025 ms (2.461 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.12% +0.20% +0.00% / +0.24% +0.00% +0.29%] index_select strided 3 : Elapsed 0.025 ms (2.458 ms / 100) 2.450 -> 2.453 ( +0.12%) [ +0.12% +0.00% +0.16% / +0.24% +0.24% +0.12%] index_select strided 5 : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.452 ( -0.04%) [ +0.12% +0.20% +0.00% / -0.04% +0.12% +0.29%] index_select strided 7 : Elapsed 0.025 ms (2.456 ms / 100) 2.446 -> 2.446 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.37% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.446 ms / 100) 2.450 -> 2.449 ( -0.04%) [ +0.12% +0.00% +0.08% / -0.04% +0.00% +0.20%] index_select strided 16 : Elapsed 0.025 ms (2.453 ms / 100) 2.452 -> 2.452 ( +0.00%) [ +0.16% +0.20% +0.00% / +0.00% +0.16% +0.20%] index_select random : Elapsed 0.025 ms (2.456 ms / 100) 2.455 -> 2.451 ( -0.16%) [ +0.08% +0.24% +0.00% / +0.04% -0.08% -0.16%] index_select random_sorted : Elapsed 0.025 ms (2.457 ms / 100) 2.454 -> 2.457 ( +0.12%) [ +0.08% +0.16% +0.00% / +0.12% +0.20% +0.20%] index_select perm : Elapsed 0.025 ms (2.456 ms / 100) 2.460 -> 2.451 ( -0.37%) [ +0.00% +0.16% +0.04% / +0.00% -0.37% -0.33%] index_select perm_sorted : Elapsed 0.025 ms (2.460 ms / 100) B = [16, 20, 4, 5] (stride (1, 80, 1600, 16)) A = [16, 40, 4, 5] (stride (20, 320, 5, 1)) dim = 1 2.406 -> 2.408 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.17% +0.21%] index_select const : Elapsed 0.024 ms (2.409 ms / 100) 2.419 -> 2.410 ( -0.37%) [ +0.08% +0.00% +0.00% / -0.04% -0.37% -0.21%] index_select wrap : Elapsed 0.024 ms (2.421 ms / 100) 2.417 -> 2.413 ( -0.17%) [ +0.17% +0.00% +0.12% / +0.17% -0.17% -0.04%] index_select linear : Elapsed 0.024 ms (2.421 ms / 100) 2.415 -> 2.418 ( +0.12%) [ +0.17% +0.00% +0.04% / +0.12% +0.12% +0.12%] index_select reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.410 -> 2.404 ( -0.25%) [ +0.00% +0.17% +0.00% / +0.00% +0.00% -0.25%] index_select skip64 : Elapsed 0.024 ms (2.410 ms / 100) 2.408 -> 2.411 ( +0.12%) [ +0.00% +0.17% +0.21% / +0.17% +0.12% +0.21%] index_select skip256 : Elapsed 0.024 ms (2.408 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.21% +0.12% +0.08%] index_select spread : Elapsed 0.024 ms (2.414 ms / 100) 2.417 -> 2.415 ( -0.08%) [ +0.04% +0.00% +0.12% / -0.08% -0.08% +0.00%] index_select strided 3 : Elapsed 0.024 ms (2.418 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.12% +0.00% +0.21% / +0.04% +0.08% +0.12%] index_select strided 5 : Elapsed 0.024 ms (2.413 ms / 100) 2.418 -> 2.416 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.04% -0.04% -0.08%] index_select strided 7 : Elapsed 0.024 ms (2.419 ms / 100) 2.410 -> 2.407 ( -0.12%) [ +0.21% +0.04% +0.00% / +0.08% +0.17% -0.12%] index_select strided 8 : Elapsed 0.024 ms (2.415 ms / 100) 2.411 -> 2.410 ( -0.04%) [ +0.00% +0.00% +0.00% / +0.08% -0.04% +0.08%] index_select strided 16 : Elapsed 0.024 ms (2.411 ms / 100) 2.411 -> 2.414 ( +0.12%) [ +0.08% +0.00% +0.17% / +0.21% +0.12% +0.37%] index_select random : Elapsed 0.024 ms (2.413 ms / 100) 2.416 -> 2.410 ( -0.25%) [ +0.04% +0.00% +0.08% / +0.04% -0.25% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.418 -> 2.412 ( -0.25%) [ +0.04% +0.17% +0.00% / +0.08% -0.25% -0.12%] index_select perm : Elapsed 0.024 ms (2.419 ms / 100) 2.417 -> 2.414 ( -0.12%) [ +0.08% +0.12% +0.00% / +0.08% -0.12% -0.04%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [16, 20, 4, 5] (stride (4, 64, 1, 1280)) A = [16, 40, 4, 5] (stride (1, 16, 640, 2560)) dim = 1 2.448 -> 2.452 ( +0.16%) [ +0.00% +0.00% +0.04% / +0.16% +0.25% +0.25%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.467 -> 2.466 ( -0.04%) [ +0.24% +0.00% +0.12% / -0.04% +0.08% +0.16%] index_select wrap : Elapsed 0.025 ms (2.473 ms / 100) 2.469 -> 2.465 ( -0.16%) [ +0.00% +0.04% +0.00% / -0.16% +0.04% -0.04%] index_select linear : Elapsed 0.025 ms (2.469 ms / 100) 2.467 -> 2.463 ( -0.16%) [ +0.12% +0.00% +0.04% / +0.08% -0.16% +0.00%] index_select reverse : Elapsed 0.025 ms (2.470 ms / 100) 2.453 -> 2.450 ( -0.12%) [ +0.04% +0.00% +0.08% / -0.12% -0.04% -0.12%] index_select skip64 : Elapsed 0.025 ms (2.454 ms / 100) 2.450 -> 2.453 ( +0.12%) [ +0.00% +0.08% +0.16% / +0.16% +0.20% +0.12%] index_select skip256 : Elapsed 0.025 ms (2.450 ms / 100) 2.468 -> 2.468 ( +0.00%) [ +0.08% +0.20% +0.00% / +0.16% +0.00% +0.08%] index_select spread : Elapsed 0.025 ms (2.470 ms / 100) 2.466 -> 2.469 ( +0.12%) [ +0.12% +0.00% +0.16% / +0.16% +0.16% +0.12%] index_select strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.457 -> 2.459 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.16% +0.08% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.460 ms / 100) 2.464 -> 2.466 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.28% +0.28%] index_select strided 7 : Elapsed 0.025 ms (2.466 ms / 100) 2.453 -> 2.457 ( +0.16%) [ +0.00% +0.08% +0.20% / +0.16% +0.16% +0.16%] index_select strided 8 : Elapsed 0.025 ms (2.453 ms / 100) 2.456 -> 2.459 ( +0.12%) [ +0.00% +0.08% +0.12% / +0.16% +0.12% +0.16%] index_select strided 16 : Elapsed 0.025 ms (2.456 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.20% +0.20% +0.00% / +0.16% +0.04% +0.41%] index_select random : Elapsed 0.025 ms (2.467 ms / 100) 2.464 -> 2.461 ( -0.12%) [ +0.00% +0.12% +0.04% / -0.12% +0.04% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.464 ms / 100) 2.466 -> 2.468 ( +0.08%) [ +0.00% +0.12% +0.24% / +0.08% +0.20% +0.08%] index_select perm : Elapsed 0.025 ms (2.466 ms / 100) 2.469 -> 2.462 ( -0.28%) [ +0.12% +0.00% +0.12% / +0.20% -0.16% -0.28%] index_select perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) out_shape = [16, 40, 20, 5] in_shape = [16, 40, 4, 5] idx_dim = 2 B = [16, 40, 20, 5] (stride (4000, 5, 200, 1)) A = [16, 40, 4, 5] (stride (1, 320, 16, 64)) dim = 2 2.241 -> 2.240 ( -0.04%) [ +0.00% +0.18% +0.13% / +0.09% +0.09% -0.04%] index_add_ linear : Elapsed 0.022 ms (2.241 ms / 100) 2.174 -> 2.175 ( +0.05%) [ +0.14% +0.14% +0.00% / +0.09% +0.14% +0.05%] index_copy_ linear : Elapsed 0.022 ms (2.177 ms / 100) 2.240 -> 2.240 ( +0.00%) [ +0.00% +0.18% +0.09% / +0.00% +0.18% +0.27%] index_add_ reverse : Elapsed 0.022 ms (2.240 ms / 100) 2.176 -> 2.173 ( -0.14%) [ +0.00% +0.05% +0.05% / +0.05% +0.09% -0.14%] index_copy_ reverse : Elapsed 0.022 ms (2.176 ms / 100) 2.239 -> 2.238 ( -0.04%) [ +0.04% +0.00% +0.09% / -0.04% +0.31% +0.36%] index_add_ spread : Elapsed 0.022 ms (2.240 ms / 100) 2.172 -> 2.174 ( +0.09%) [ +0.09% +0.00% +0.14% / +0.09% +0.23% +0.23%] index_copy_ spread : Elapsed 0.022 ms (2.174 ms / 100) 2.238 -> 2.243 ( +0.22%) [ +0.00% +0.31% +0.04% / +0.22% +0.27% +0.31%] index_add_ strided 3 : Elapsed 0.022 ms (2.238 ms / 100) 2.175 -> 2.175 ( +0.00%) [ +0.00% +0.05% +0.14% / +0.00% +0.23% +0.18%] index_copy_ strided 3 : Elapsed 0.022 ms (2.175 ms / 100) 2.239 -> 2.242 ( +0.13%) [ +0.22% +0.09% +0.00% / +0.13% +0.18% +0.13%] index_add_ strided 7 : Elapsed 0.022 ms (2.244 ms / 100) 2.176 -> 2.176 ( +0.00%) [ +0.09% +0.00% +0.05% / +0.00% +0.05% +0.05%] index_copy_ strided 7 : Elapsed 0.022 ms (2.178 ms / 100) 2.236 -> 2.238 ( +0.09%) [ +0.00% +0.09% +0.22% / +0.09% +0.63% +0.54%] index_add_ perm : Elapsed 0.022 ms (2.236 ms / 100) 2.172 -> 2.176 ( +0.18%) [ +0.09% +0.00% +0.09% / +0.18% +0.28% +0.37%] index_copy_ perm : Elapsed 0.022 ms (2.174 ms / 100) 2.239 -> 2.239 ( +0.00%) [ +0.00% +0.04% +0.13% / +0.00% +0.63% +0.49%] index_add_ perm_sorted : Elapsed 0.022 ms (2.239 ms / 100) 2.174 -> 2.173 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.23% +0.37%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.175 ms / 100) 9.180 -> 9.178 ( -0.02%) [ +0.00% +0.14% +0.28% / -0.02% +0.35% +0.34%] index_select const : Elapsed 0.092 ms (9.180 ms / 100) 9.206 -> 9.213 ( +0.08%) [ +0.08% +0.09% +0.00% / +0.09% +0.15% +0.08%] index_select wrap : Elapsed 0.092 ms (9.213 ms / 100) 9.195 -> 9.189 ( -0.07%) [ +0.00% +0.24% +0.01% / -0.07% +0.35% +0.36%] index_select linear : Elapsed 0.092 ms (9.195 ms / 100) 9.223 -> 9.218 ( -0.05%) [ +0.12% +0.02% +0.00% / +0.12% -0.05% +0.12%] index_select reverse : Elapsed 0.092 ms (9.234 ms / 100) 9.189 -> 9.184 ( -0.05%) [ +0.12% +0.07% +0.00% / -0.05% +0.11% +0.35%] index_select skip64 : Elapsed 0.092 ms (9.200 ms / 100) 9.189 -> 9.186 ( -0.03%) [ +0.00% +0.13% +0.17% / -0.03% +0.22% +0.11%] index_select skip256 : Elapsed 0.092 ms (9.189 ms / 100) 9.223 -> 9.217 ( -0.07%) [ +0.05% +0.00% +0.14% / -0.07% +0.08% +0.05%] index_select spread : Elapsed 0.092 ms (9.228 ms / 100) 9.203 -> 9.204 ( +0.01%) [ +0.00% +0.36% +0.17% / +0.01% +0.25% +0.33%] index_select strided 3 : Elapsed 0.092 ms (9.203 ms / 100) 9.206 -> 9.216 ( +0.11%) [ +0.04% +0.00% +0.04% / +0.13% +0.21% +0.11%] index_select random : Elapsed 0.092 ms (9.210 ms / 100) 9.220 -> 9.235 ( +0.16%) [ +0.00% +0.09% +0.08% / +0.21% +0.16% +0.20%] index_select random_sorted : Elapsed 0.092 ms (9.220 ms / 100) B = [16, 40, 20, 5] (stride (1, 1600, 80, 16)) A = [16, 40, 4, 5] (stride (20, 320, 5, 1)) dim = 2 2.145 -> 2.149 ( +0.19%) [ +0.00% +0.19% +0.14% / +0.19% +0.84% +0.56%] index_add_ linear : Elapsed 0.021 ms (2.145 ms / 100) 2.094 -> 2.093 ( -0.05%) [ +0.19% +0.24% +0.00% / -0.05% +0.81% +0.76%] index_copy_ linear : Elapsed 0.021 ms (2.098 ms / 100) 2.144 -> 2.145 ( +0.05%) [ +0.19% +0.00% +0.09% / +0.05% +0.47% +0.51%] index_add_ reverse : Elapsed 0.021 ms (2.148 ms / 100) 2.096 -> 2.097 ( +0.05%) [ +0.00% +0.05% +0.19% / +0.05% +0.81% +0.57%] index_copy_ reverse : Elapsed 0.021 ms (2.096 ms / 100) 2.145 -> 2.146 ( +0.05%) [ +0.00% +0.19% +0.05% / +0.05% +0.37% +0.70%] index_add_ spread : Elapsed 0.021 ms (2.145 ms / 100) 2.090 -> 2.094 ( +0.19%) [ +0.24% +0.00% +0.10% / +0.19% +0.57% +0.57%] index_copy_ spread : Elapsed 0.021 ms (2.095 ms / 100) 2.151 -> 2.153 ( +0.09%) [ +0.14% +0.09% +0.00% / +0.09% +0.74% +0.65%] index_add_ strided 3 : Elapsed 0.022 ms (2.154 ms / 100) 2.095 -> 2.098 ( +0.14%) [ +0.00% +0.38% +0.19% / +0.14% +0.76% +0.86%] index_copy_ strided 3 : Elapsed 0.021 ms (2.095 ms / 100) 2.148 -> 2.147 ( -0.05%) [ +0.19% +0.00% +0.19% / -0.05% +0.61% +0.47%] index_add_ strided 7 : Elapsed 0.022 ms (2.152 ms / 100) 2.095 -> 2.093 ( -0.10%) [ +0.10% +0.00% +0.19% / -0.10% +0.24% +0.67%] index_copy_ strided 7 : Elapsed 0.021 ms (2.097 ms / 100) 2.143 -> 2.138 ( -0.23%) [ +0.00% +0.23% +0.33% / -0.23% +0.56% +0.33%] index_add_ perm : Elapsed 0.021 ms (2.143 ms / 100) 2.090 -> 2.093 ( +0.14%) [ +0.05% +0.00% +0.48% / +0.14% +0.81% +0.38%] index_copy_ perm : Elapsed 0.021 ms (2.091 ms / 100) 2.144 -> 2.146 ( +0.09%) [ +0.00% +0.37% +0.05% / +0.09% +0.70% +0.79%] index_add_ perm_sorted : Elapsed 0.021 ms (2.144 ms / 100) 2.093 -> 2.097 ( +0.19%) [ +0.00% +0.19% +0.05% / +0.19% +0.57% +0.67%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.093 ms / 100) 9.235 -> 9.222 ( -0.14%) [ +0.00% +0.05% +0.03% / -0.14% +0.26% +0.10%] index_select const : Elapsed 0.092 ms (9.235 ms / 100) 9.246 -> 9.245 ( -0.01%) [ +0.17% +0.25% +0.00% / -0.01% +0.06% +0.14%] index_select wrap : Elapsed 0.093 ms (9.262 ms / 100) 9.240 -> 9.241 ( +0.01%) [ +0.13% +0.00% +0.09% / +0.03% +0.17% +0.01%] index_select linear : Elapsed 0.093 ms (9.252 ms / 100) 9.249 -> 9.249 ( +0.00%) [ +0.19% +0.13% +0.00% / +0.00% +0.12% +0.42%] index_select reverse : Elapsed 0.093 ms (9.267 ms / 100) 9.234 -> 9.235 ( +0.01%) [ +0.00% +0.15% +0.06% / +0.01% +0.21% +0.16%] index_select skip64 : Elapsed 0.092 ms (9.234 ms / 100) 9.231 -> 9.225 ( -0.06%) [ +0.05% +0.00% +0.09% / -0.06% +0.27% +0.23%] index_select skip256 : Elapsed 0.092 ms (9.236 ms / 100) 9.252 -> 9.269 ( +0.18%) [ +0.00% +0.27% +0.46% / +0.18% +0.39% +0.26%] index_select spread : Elapsed 0.093 ms (9.252 ms / 100) 9.242 -> 9.247 ( +0.05%) [ +0.00% +0.08% +0.17% / +0.05% +0.24% +0.25%] index_select strided 3 : Elapsed 0.092 ms (9.242 ms / 100) 9.241 -> 9.254 ( +0.14%) [ +0.04% +0.18% +0.00% / +0.14% +0.30% +0.27%] index_select random : Elapsed 0.092 ms (9.245 ms / 100) 9.265 -> 9.271 ( +0.06%) [ +0.00% +0.13% +0.09% / +0.06% +0.32% +0.19%] index_select random_sorted : Elapsed 0.093 ms (9.265 ms / 100) B = [16, 40, 20, 5] (stride (1, 80, 3200, 16)) A = [16, 40, 4, 5] (stride (800, 20, 1, 4)) dim = 2 1.976 -> 1.980 ( +0.20%) [ +0.00% +0.20% +0.25% / +0.20% +0.40% +0.46%] index_add_ linear : Elapsed 0.020 ms (1.976 ms / 100) 1.928 -> 1.931 ( +0.16%) [ +0.00% +0.05% +0.16% / +0.16% +0.57% +0.57%] index_copy_ linear : Elapsed 0.019 ms (1.928 ms / 100) 1.959 -> 1.960 ( +0.05%) [ +0.00% +0.10% +0.10% / +0.05% +0.77% +0.92%] index_add_ reverse : Elapsed 0.020 ms (1.959 ms / 100) 1.910 -> 1.911 ( +0.05%) [ +0.10% +0.00% +0.16% / +0.05% +0.99% +0.79%] index_copy_ reverse : Elapsed 0.019 ms (1.912 ms / 100) 1.960 -> 1.965 ( +0.26%) [ +0.20% +0.00% +0.15% / +0.26% +0.66% +0.61%] index_add_ spread : Elapsed 0.020 ms (1.964 ms / 100) 1.912 -> 1.915 ( +0.16%) [ +0.26% +0.21% +0.00% / +0.16% +0.78% +0.58%] index_copy_ spread : Elapsed 0.019 ms (1.917 ms / 100) 1.980 -> 1.979 ( -0.05%) [ +0.00% +0.00% +0.20% / -0.05% +0.30% +0.51%] index_add_ strided 3 : Elapsed 0.020 ms (1.980 ms / 100) 1.930 -> 1.930 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.47% +0.41%] index_copy_ strided 3 : Elapsed 0.019 ms (1.932 ms / 100) 1.966 -> 1.968 ( +0.10%) [ +0.10% +0.20% +0.00% / +0.10% +1.27% +1.22%] index_add_ strided 7 : Elapsed 0.020 ms (1.968 ms / 100) 1.915 -> 1.918 ( +0.16%) [ +0.00% +0.05% +0.31% / +0.16% +1.62% +1.15%] index_copy_ strided 7 : Elapsed 0.019 ms (1.915 ms / 100) 1.979 -> 1.981 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.81% +0.86%] index_add_ perm : Elapsed 0.020 ms (1.980 ms / 100) 1.927 -> 1.926 ( -0.05%) [ +0.16% +0.05% +0.00% / -0.05% +0.88% +0.88%] index_copy_ perm : Elapsed 0.019 ms (1.930 ms / 100) 1.968 -> 1.969 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +1.17% +1.22%] index_add_ perm_sorted : Elapsed 0.020 ms (1.968 ms / 100) 1.911 -> 1.918 ( +0.37%) [ +0.00% +0.26% +0.16% / +0.37% +1.36% +1.52%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.911 ms / 100) 8.763 -> 8.770 ( +0.08%) [ +0.29% +0.00% +0.17% / +0.11% +0.25% +0.08%] index_select const : Elapsed 0.088 ms (8.788 ms / 100) 8.766 -> 8.768 ( +0.02%) [ +0.11% +0.00% +0.03% / +0.02% +0.07% +0.10%] index_select wrap : Elapsed 0.088 ms (8.776 ms / 100) 8.765 -> 8.762 ( -0.03%) [ +0.19% +0.07% +0.00% / -0.03% +0.30% +0.16%] index_select linear : Elapsed 0.088 ms (8.782 ms / 100) 8.777 -> 8.780 ( +0.03%) [ +0.07% +0.00% +0.07% / +0.14% +0.03% +0.07%] index_select reverse : Elapsed 0.088 ms (8.783 ms / 100) 8.758 -> 8.774 ( +0.18%) [ +0.00% +0.14% +0.05% / +0.22% +0.18% +0.41%] index_select skip64 : Elapsed 0.088 ms (8.758 ms / 100) 8.765 -> 8.768 ( +0.03%) [ +0.00% +0.18% +0.21% / +0.06% +0.19% +0.03%] index_select skip256 : Elapsed 0.088 ms (8.765 ms / 100) 8.780 -> 8.768 ( -0.14%) [ +0.07% +0.00% +0.23% / -0.14% +0.05% +0.13%] index_select spread : Elapsed 0.088 ms (8.786 ms / 100) 8.770 -> 8.774 ( +0.05%) [ +0.13% +0.00% +0.06% / +0.05% +0.15% +0.16%] index_select strided 3 : Elapsed 0.088 ms (8.781 ms / 100) 8.767 -> 8.766 ( -0.01%) [ +0.00% +0.24% +0.18% / -0.01% +0.16% +0.10%] index_select random : Elapsed 0.088 ms (8.767 ms / 100) 8.758 -> 8.776 ( +0.21%) [ +0.19% +0.00% +0.08% / +0.21% +0.31% +0.24%] index_select random_sorted : Elapsed 0.088 ms (8.775 ms / 100) B = [16, 40, 20, 5] (stride (1, 16, 640, 12800)) A = [16, 40, 4, 5] (stride (1, 320, 16, 64)) dim = 2 2.246 -> 2.250 ( +0.18%) [ +0.13% +0.27% +0.00% / +0.18% +0.49% +0.36%] index_add_ linear : Elapsed 0.022 ms (2.249 ms / 100) 2.192 -> 2.194 ( +0.09%) [ +0.00% +0.23% +0.14% / +0.09% +0.27% +0.14%] index_copy_ linear : Elapsed 0.022 ms (2.192 ms / 100) 2.251 -> 2.249 ( -0.09%) [ +0.00% +0.13% +0.09% / +0.18% +0.00% -0.09%] index_add_ reverse : Elapsed 0.023 ms (2.251 ms / 100) 2.193 -> 2.194 ( +0.05%) [ +0.00% +0.32% +0.23% / +0.05% +0.05% +0.09%] index_copy_ reverse : Elapsed 0.022 ms (2.193 ms / 100) 2.243 -> 2.247 ( +0.18%) [ +0.27% +0.27% +0.00% / +0.18% +0.67% +0.53%] index_add_ spread : Elapsed 0.022 ms (2.249 ms / 100) 2.191 -> 2.191 ( +0.00%) [ +0.14% +0.27% +0.00% / +0.00% +0.32% +0.46%] index_copy_ spread : Elapsed 0.022 ms (2.194 ms / 100) 2.243 -> 2.244 ( +0.04%) [ +0.09% +0.00% +0.09% / +0.04% +0.71% +0.53%] index_add_ strided 3 : Elapsed 0.022 ms (2.245 ms / 100) 2.188 -> 2.190 ( +0.09%) [ +0.18% +0.05% +0.00% / +0.09% +0.41% +0.50%] index_copy_ strided 3 : Elapsed 0.022 ms (2.192 ms / 100) 2.248 -> 2.248 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.40% +0.27%] index_add_ strided 7 : Elapsed 0.022 ms (2.248 ms / 100) 2.193 -> 2.193 ( +0.00%) [ +0.00% +0.23% +0.36% / +0.00% +0.09% +0.14%] index_copy_ strided 7 : Elapsed 0.022 ms (2.193 ms / 100) 2.243 -> 2.247 ( +0.18%) [ +0.22% +0.22% +0.00% / +0.18% +0.53% +0.58%] index_add_ perm : Elapsed 0.022 ms (2.248 ms / 100) 2.188 -> 2.187 ( -0.05%) [ +0.00% +0.09% +0.09% / -0.05% +0.64% +0.78%] index_copy_ perm : Elapsed 0.022 ms (2.188 ms / 100) 2.245 -> 2.248 ( +0.13%) [ +0.18% +0.00% +0.09% / +0.13% +0.58% +0.62%] index_add_ perm_sorted : Elapsed 0.022 ms (2.249 ms / 100) 2.188 -> 2.192 ( +0.18%) [ +0.27% +0.00% +0.00% / +0.18% +0.50% +0.59%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.194 ms / 100) 9.181 -> 9.197 ( +0.17%) [ +0.02% +0.00% +0.00% / +0.32% +0.17% +0.24%] index_select const : Elapsed 0.092 ms (9.183 ms / 100) 9.219 -> 9.230 ( +0.12%) [ +0.08% +0.00% +0.05% / +0.12% +0.20% +0.16%] index_select wrap : Elapsed 0.092 ms (9.226 ms / 100) 9.210 -> 9.194 ( -0.17%) [ +0.00% +0.09% +0.15% / -0.17% +0.18% +0.04%] index_select linear : Elapsed 0.092 ms (9.210 ms / 100) 9.201 -> 9.189 ( -0.13%) [ +0.08% +0.22% +0.00% / -0.13% +0.46% +0.38%] index_select reverse : Elapsed 0.092 ms (9.208 ms / 100) 9.173 -> 9.173 ( +0.00%) [ +0.25% +0.25% +0.00% / +0.00% +0.43% +0.48%] index_select skip64 : Elapsed 0.092 ms (9.196 ms / 100) 9.177 -> 9.197 ( +0.22%) [ +0.00% +0.09% +0.36% / +0.22% +0.38% +0.32%] index_select skip256 : Elapsed 0.092 ms (9.177 ms / 100) 9.222 -> 9.224 ( +0.02%) [ +0.02% +0.01% +0.00% / +0.18% +0.35% +0.02%] index_select spread : Elapsed 0.092 ms (9.224 ms / 100) 9.225 -> 9.225 ( +0.00%) [ +0.00% +0.03% +0.16% / +0.00% +0.25% +0.14%] index_select strided 3 : Elapsed 0.092 ms (9.225 ms / 100) 9.213 -> 9.228 ( +0.16%) [ +0.00% +0.11% +0.13% / +0.16% +0.28% +0.17%] index_select random : Elapsed 0.092 ms (9.213 ms / 100) 9.223 -> 9.233 ( +0.11%) [ +0.16% +0.10% +0.00% / +0.11% +0.23% +0.47%] index_select random_sorted : Elapsed 0.092 ms (9.238 ms / 100) out_shape = [16, 40, 4, 20] in_shape = [16, 40, 4, 5] idx_dim = 3 B = [16, 40, 4, 20] (stride (80, 1280, 20, 1)) A = [16, 40, 4, 5] (stride (200, 5, 3200, 1)) dim = 3 1.817 -> 1.812 ( -0.28%) [ +0.06% +0.22% +0.00% / +0.28% -0.28% +0.17%] index_add_ linear : Elapsed 0.018 ms (1.818 ms / 100) 1.783 -> 1.779 ( -0.22%) [ +0.06% +0.22% +0.00% / +0.34% -0.11% -0.22%] index_copy_ linear : Elapsed 0.018 ms (1.784 ms / 100) 1.814 -> 1.816 ( +0.11%) [ +0.44% +0.00% +0.06% / +0.33% +0.11% +0.17%] index_add_ reverse : Elapsed 0.018 ms (1.822 ms / 100) 1.783 -> 1.780 ( -0.17%) [ +0.11% +0.34% +0.00% / +0.06% +0.00% -0.17%] index_copy_ reverse : Elapsed 0.018 ms (1.785 ms / 100) 1.832 -> 1.828 ( -0.22%) [ +0.11% +0.11% +0.00% / -0.11% -0.22% -0.11%] index_add_ spread : Elapsed 0.018 ms (1.834 ms / 100) 1.808 -> 1.801 ( -0.39%) [ +0.00% +0.39% +0.33% / +0.17% -0.17% -0.39%] index_copy_ spread : Elapsed 0.018 ms (1.808 ms / 100) 1.833 -> 1.835 ( +0.11%) [ +0.33% +0.00% +0.05% / +0.27% +0.11% +0.11%] index_add_ strided 3 : Elapsed 0.018 ms (1.839 ms / 100) 1.813 -> 1.807 ( -0.33%) [ +0.00% +0.17% +0.33% / -0.06% -0.17% -0.33%] index_copy_ strided 3 : Elapsed 0.018 ms (1.813 ms / 100) 1.840 -> 1.833 ( -0.38%) [ +0.05% +0.22% +0.00% / +0.11% -0.33% -0.38%] index_add_ strided 7 : Elapsed 0.018 ms (1.841 ms / 100) 1.817 -> 1.805 ( -0.66%) [ +0.17% +0.33% +0.00% / +0.06% -0.66% -0.61%] index_copy_ strided 7 : Elapsed 0.018 ms (1.820 ms / 100) 1.832 -> 1.829 ( -0.16%) [ +0.00% +0.22% +0.05% / +0.16% -0.16% +0.00%] index_add_ perm : Elapsed 0.018 ms (1.832 ms / 100) 1.807 -> 1.803 ( -0.22%) [ +0.33% +0.72% +0.00% / +0.11% -0.17% -0.22%] index_copy_ perm : Elapsed 0.018 ms (1.813 ms / 100) 1.835 -> 1.831 ( -0.22%) [ +0.38% +0.05% +0.00% / +0.05% -0.16% -0.22%] index_add_ perm_sorted : Elapsed 0.018 ms (1.842 ms / 100) 1.815 -> 1.806 ( -0.50%) [ +0.00% +0.00% +0.11% / -0.17% -0.44% -0.50%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.815 ms / 100) 8.305 -> 8.307 ( +0.02%) [ +0.45% +0.13% +0.00% / +0.02% +0.48% +0.42%] index_select const : Elapsed 0.083 ms (8.342 ms / 100) 8.300 -> 8.308 ( +0.10%) [ +0.20% +0.00% +0.16% / +0.10% +0.42% +0.81%] index_select wrap : Elapsed 0.083 ms (8.317 ms / 100) 8.323 -> 8.324 ( +0.01%) [ +0.00% +0.07% +0.22% / +0.01% +0.08% +0.47%] index_select linear : Elapsed 0.083 ms (8.323 ms / 100) 8.313 -> 8.308 ( -0.06%) [ +0.04% +0.00% +0.06% / -0.06% +0.41% +0.29%] index_select reverse : Elapsed 0.083 ms (8.316 ms / 100) 8.310 -> 8.332 ( +0.26%) [ +0.05% +0.10% +0.00% / +0.26% +0.41% +0.52%] index_select skip64 : Elapsed 0.083 ms (8.314 ms / 100) 8.316 -> 8.315 ( -0.01%) [ +0.00% +0.01% +0.04% / -0.01% +0.11% +0.07%] index_select skip256 : Elapsed 0.083 ms (8.316 ms / 100) 8.306 -> 8.321 ( +0.18%) [ +0.00% +0.25% +0.18% / +0.22% +0.18% +0.18%] index_select spread : Elapsed 0.083 ms (8.306 ms / 100) 8.308 -> 8.321 ( +0.16%) [ +0.00% +0.24% +0.34% / +0.34% +0.16% +0.41%] index_select strided 3 : Elapsed 0.083 ms (8.308 ms / 100) 8.311 -> 8.328 ( +0.20%) [ +0.13% +0.02% +0.00% / +0.20% +0.35% +0.20%] index_select random : Elapsed 0.083 ms (8.322 ms / 100) 8.302 -> 8.305 ( +0.04%) [ +0.00% +0.19% +0.08% / +0.04% +0.63% +0.23%] index_select random_sorted : Elapsed 0.083 ms (8.302 ms / 100) B = [16, 40, 4, 20] (stride (20, 1280, 320, 1)) A = [16, 40, 4, 5] (stride (1, 16, 3200, 640)) dim = 3 1.826 -> 1.819 ( -0.38%) [ +0.33% +0.00% +0.22% / +0.00% -0.27% -0.38%] index_add_ linear : Elapsed 0.018 ms (1.832 ms / 100) 1.791 -> 1.783 ( -0.45%) [ +0.17% +0.06% +0.00% / +0.11% -0.45% -0.22%] index_copy_ linear : Elapsed 0.018 ms (1.794 ms / 100) 1.829 -> 1.823 ( -0.33%) [ +0.22% +0.00% +0.11% / -0.16% -0.05% -0.33%] index_add_ reverse : Elapsed 0.018 ms (1.833 ms / 100) 1.790 -> 1.788 ( -0.11%) [ +0.45% +0.06% +0.00% / +0.50% +0.06% -0.11%] index_copy_ reverse : Elapsed 0.018 ms (1.798 ms / 100) 1.845 -> 1.837 ( -0.43%) [ +0.00% +0.16% +0.43% / +0.00% -0.43% -0.05%] index_add_ spread : Elapsed 0.018 ms (1.845 ms / 100) 1.816 -> 1.811 ( -0.28%) [ +0.00% +0.11% +0.39% / +0.28% -0.17% -0.28%] index_copy_ spread : Elapsed 0.018 ms (1.816 ms / 100) 1.846 -> 1.839 ( -0.38%) [ +0.11% +0.11% +0.00% / -0.33% -0.27% -0.38%] index_add_ strided 3 : Elapsed 0.018 ms (1.848 ms / 100) 1.815 -> 1.811 ( -0.22%) [ +0.06% +0.17% +0.00% / +0.00% -0.22% -0.22%] index_copy_ strided 3 : Elapsed 0.018 ms (1.816 ms / 100) 1.842 -> 1.839 ( -0.16%) [ +0.05% +0.00% +0.11% / +0.11% +0.05% -0.16%] index_add_ strided 7 : Elapsed 0.018 ms (1.843 ms / 100) 1.813 -> 1.809 ( -0.22%) [ +0.39% +0.33% +0.00% / -0.22% -0.22% -0.17%] index_copy_ strided 7 : Elapsed 0.018 ms (1.820 ms / 100) 1.842 -> 1.838 ( -0.22%) [ +0.54% +0.27% +0.00% / +0.16% -0.16% -0.22%] index_add_ perm : Elapsed 0.019 ms (1.852 ms / 100) 1.813 -> 1.810 ( -0.17%) [ +0.44% +0.22% +0.00% / +0.17% -0.06% -0.17%] index_copy_ perm : Elapsed 0.018 ms (1.821 ms / 100) 1.842 -> 1.844 ( +0.11%) [ +0.27% +0.00% +0.27% / +0.27% +0.11% +0.11%] index_add_ perm_sorted : Elapsed 0.018 ms (1.847 ms / 100) 1.815 -> 1.812 ( -0.17%) [ +0.00% +0.06% +0.11% / +0.28% -0.17% -0.17%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.815 ms / 100) 8.244 -> 8.246 ( +0.02%) [ +0.19% +0.15% +0.00% / +0.02% +0.41% +0.21%] index_select const : Elapsed 0.083 ms (8.260 ms / 100) 8.270 -> 8.277 ( +0.08%) [ +0.31% +0.00% +0.02% / +0.08% +0.30% +0.17%] index_select wrap : Elapsed 0.083 ms (8.296 ms / 100) 8.274 -> 8.279 ( +0.06%) [ +0.21% +0.10% +0.00% / +0.06% +0.35% +0.39%] index_select linear : Elapsed 0.083 ms (8.291 ms / 100) 8.255 -> 8.268 ( +0.16%) [ +0.00% +0.19% +0.48% / +0.16% +0.22% +0.39%] index_select reverse : Elapsed 0.083 ms (8.255 ms / 100) 8.250 -> 8.251 ( +0.01%) [ +0.11% +0.00% +0.05% / +0.01% +0.08% +0.48%] index_select skip64 : Elapsed 0.083 ms (8.259 ms / 100) 8.250 -> 8.253 ( +0.04%) [ +0.16% +0.00% +0.05% / +0.04% +0.27% +0.16%] index_select skip256 : Elapsed 0.083 ms (8.263 ms / 100) 8.259 -> 8.273 ( +0.17%) [ +0.28% +0.00% +0.18% / +0.17% +0.51% +0.44%] index_select spread : Elapsed 0.083 ms (8.282 ms / 100) 8.268 -> 8.276 ( +0.10%) [ +0.06% +0.00% +0.47% / +0.10% +0.53% +0.44%] index_select strided 3 : Elapsed 0.083 ms (8.273 ms / 100) 8.277 -> 8.278 ( +0.01%) [ +0.06% +0.00% +0.02% / +0.01% +0.31% +0.28%] index_select random : Elapsed 0.083 ms (8.282 ms / 100) 8.247 -> 8.274 ( +0.33%) [ +0.15% +0.11% +0.00% / +0.33% +0.57% +0.58%] index_select random_sorted : Elapsed 0.083 ms (8.259 ms / 100) B = [16, 40, 4, 20] (stride (800, 1, 12800, 40)) A = [16, 40, 4, 5] (stride (800, 4, 1, 160)) dim = 3 1.719 -> 1.733 ( +0.81%) [ +0.00% +0.17% +0.70% / +0.81% +2.73% +2.68%] index_add_ linear : Elapsed 0.017 ms (1.719 ms / 100) 1.674 -> 1.689 ( +0.90%) [ +0.06% +0.00% +0.66% / +0.90% +2.63% +2.69%] index_copy_ linear : Elapsed 0.017 ms (1.675 ms / 100) 1.717 -> 1.732 ( +0.87%) [ +0.00% +0.17% +0.93% / +0.87% +2.97% +2.85%] index_add_ reverse : Elapsed 0.017 ms (1.717 ms / 100) 1.672 -> 1.691 ( +1.14%) [ +0.30% +0.00% +0.90% / +1.14% +2.69% +2.69%] index_copy_ reverse : Elapsed 0.017 ms (1.677 ms / 100) 1.761 -> 1.764 ( +0.17%) [ +0.34% +0.23% +0.00% / +0.17% +0.80% +0.97%] index_add_ spread : Elapsed 0.018 ms (1.767 ms / 100) 1.713 -> 1.714 ( +0.06%) [ +0.00% +0.12% +0.06% / +0.06% +0.93% +0.88%] index_copy_ spread : Elapsed 0.017 ms (1.713 ms / 100) 1.752 -> 1.754 ( +0.11%) [ +0.00% +0.06% +0.11% / +0.11% +1.60% +1.60%] index_add_ strided 3 : Elapsed 0.018 ms (1.752 ms / 100) 1.702 -> 1.708 ( +0.35%) [ +0.18% +0.00% +0.24% / +0.35% +1.70% +1.70%] index_copy_ strided 3 : Elapsed 0.017 ms (1.705 ms / 100) 1.740 -> 1.744 ( +0.23%) [ +0.00% +0.11% +0.17% / +0.23% +1.95% +2.01%] index_add_ strided 7 : Elapsed 0.017 ms (1.740 ms / 100) 1.689 -> 1.699 ( +0.59%) [ +0.53% +0.00% +0.77% / +0.59% +2.19% +2.19%] index_copy_ strided 7 : Elapsed 0.017 ms (1.698 ms / 100) 1.740 -> 1.743 ( +0.17%) [ +0.00% +0.00% +0.29% / +0.17% +1.72% +1.72%] index_add_ perm : Elapsed 0.017 ms (1.740 ms / 100) 1.692 -> 1.698 ( +0.35%) [ +0.00% +0.18% +0.35% / +0.35% +1.89% +1.71%] index_copy_ perm : Elapsed 0.017 ms (1.692 ms / 100) 1.736 -> 1.742 ( +0.35%) [ +0.06% +0.00% +0.46% / +0.35% +1.84% +1.90%] index_add_ perm_sorted : Elapsed 0.017 ms (1.737 ms / 100) 1.689 -> 1.700 ( +0.65%) [ +0.12% +0.00% +0.36% / +0.65% +1.66% +1.89%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.691 ms / 100) 8.195 -> 8.179 ( -0.20%) [ +0.06% +0.00% +0.11% / +0.09% +0.23% -0.20%] index_select const : Elapsed 0.082 ms (8.200 ms / 100) 8.244 -> 8.250 ( +0.07%) [ +0.00% +0.18% +0.13% / +0.07% +0.35% +0.11%] index_select wrap : Elapsed 0.082 ms (8.244 ms / 100) 8.221 -> 8.234 ( +0.16%) [ +0.06% +0.00% +0.17% / +0.21% +0.16% +0.19%] index_select linear : Elapsed 0.082 ms (8.226 ms / 100) 8.230 -> 8.232 ( +0.02%) [ +0.11% +0.05% +0.00% / +0.02% +0.19% +0.15%] index_select reverse : Elapsed 0.082 ms (8.239 ms / 100) 8.190 -> 8.195 ( +0.06%) [ +0.00% +0.31% +0.01% / +0.06% +0.07% +0.15%] index_select skip64 : Elapsed 0.082 ms (8.190 ms / 100) 8.182 -> 8.188 ( +0.07%) [ +0.12% +0.04% +0.00% / +0.07% +0.51% +0.22%] index_select skip256 : Elapsed 0.082 ms (8.192 ms / 100) 8.203 -> 8.224 ( +0.26%) [ +0.29% +0.43% +0.00% / +0.35% +0.32% +0.26%] index_select spread : Elapsed 0.082 ms (8.227 ms / 100) 8.247 -> 8.250 ( +0.04%) [ +0.18% +0.11% +0.00% / +0.16% +0.24% +0.04%] index_select strided 3 : Elapsed 0.083 ms (8.262 ms / 100) 8.235 -> 8.225 ( -0.12%) [ +0.05% +0.00% +0.13% / -0.12% +0.49% +0.40%] index_select random : Elapsed 0.082 ms (8.239 ms / 100) 8.224 -> 8.216 ( -0.10%) [ +0.02% +0.07% +0.00% / -0.10% +0.06% +0.29%] index_select random_sorted : Elapsed 0.082 ms (8.226 ms / 100) B = [16, 40, 4, 20] (stride (20, 320, 12800, 1)) A = [16, 40, 4, 5] (stride (800, 20, 1, 4)) dim = 3 1.815 -> 1.791 ( -1.32%) [ +0.22% +0.00% +0.11% / +0.44% -1.32% -0.83%] index_add_ linear : Elapsed 0.018 ms (1.819 ms / 100) 1.774 -> 1.748 ( -1.47%) [ +0.23% +0.06% +0.00% / +0.23% -1.47% -1.30%] index_copy_ linear : Elapsed 0.018 ms (1.778 ms / 100) 1.817 -> 1.790 ( -1.49%) [ +0.00% +0.11% +0.11% / +0.11% -1.49% -1.10%] index_add_ reverse : Elapsed 0.018 ms (1.817 ms / 100) 1.774 -> 1.751 ( -1.30%) [ +0.00% +0.11% +0.00% / +0.00% -1.30% -1.24%] index_copy_ reverse : Elapsed 0.018 ms (1.774 ms / 100) 1.838 -> 1.810 ( -1.52%) [ +0.00% +0.16% +0.22% / +0.00% -1.52% -1.52%] index_add_ spread : Elapsed 0.018 ms (1.838 ms / 100) 1.801 -> 1.774 ( -1.50%) [ +0.17% +0.00% +0.28% / +0.22% -1.50% -1.39%] index_copy_ spread : Elapsed 0.018 ms (1.804 ms / 100) 1.834 -> 1.811 ( -1.25%) [ +0.11% +0.22% +0.00% / -0.22% -1.25% -1.20%] index_add_ strided 3 : Elapsed 0.018 ms (1.836 ms / 100) 1.801 -> 1.773 ( -1.55%) [ +0.28% +0.00% +0.11% / -0.28% -1.50% -1.55%] index_copy_ strided 3 : Elapsed 0.018 ms (1.806 ms / 100) 1.835 -> 1.808 ( -1.47%) [ +0.27% +0.00% +0.00% / -0.11% -1.47% -1.47%] index_add_ strided 7 : Elapsed 0.018 ms (1.840 ms / 100) 1.799 -> 1.773 ( -1.45%) [ +0.17% +0.00% +0.17% / +0.11% -1.17% -1.45%] index_copy_ strided 7 : Elapsed 0.018 ms (1.802 ms / 100) 1.832 -> 1.807 ( -1.36%) [ +0.00% +0.27% +0.38% / +0.05% -1.36% -0.93%] index_add_ perm : Elapsed 0.018 ms (1.832 ms / 100) 1.796 -> 1.777 ( -1.06%) [ +0.11% +0.00% +0.17% / +0.00% -1.06% -0.84%] index_copy_ perm : Elapsed 0.018 ms (1.798 ms / 100) 1.833 -> 1.812 ( -1.15%) [ +0.22% +0.05% +0.00% / +0.11% -0.98% -1.15%] index_add_ perm_sorted : Elapsed 0.018 ms (1.837 ms / 100) 1.804 -> 1.777 ( -1.50%) [ +0.00% +0.00% +0.22% / -0.28% -1.50% -1.50%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.804 ms / 100) 8.292 -> 8.313 ( +0.25%) [ +0.14% +0.25% +0.00% / +0.25% +0.58% +0.25%] index_select const : Elapsed 0.083 ms (8.304 ms / 100) 8.322 -> 8.315 ( -0.08%) [ +0.16% +0.00% +0.06% / -0.06% +0.30% -0.08%] index_select wrap : Elapsed 0.083 ms (8.335 ms / 100) 8.307 -> 8.308 ( +0.01%) [ +0.00% +0.07% +0.10% / +0.01% +0.05% +0.08%] index_select linear : Elapsed 0.083 ms (8.307 ms / 100) 8.316 -> 8.324 ( +0.10%) [ +0.07% +0.00% +0.13% / +0.28% +0.10% +0.28%] index_select reverse : Elapsed 0.083 ms (8.322 ms / 100) 8.297 -> 8.297 ( +0.00%) [ +0.19% +0.00% +0.13% / +0.01% +0.28% +0.00%] index_select skip64 : Elapsed 0.083 ms (8.313 ms / 100) 8.294 -> 8.294 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.22% +0.17%] index_select skip256 : Elapsed 0.083 ms (8.294 ms / 100) 8.303 -> 8.316 ( +0.16%) [ +0.00% +0.05% +0.17% / +0.16% +0.41% +0.45%] index_select spread : Elapsed 0.083 ms (8.303 ms / 100) 8.307 -> 8.302 ( -0.06%) [ +0.06% +0.00% +0.24% / -0.06% +0.25% +0.17%] index_select strided 3 : Elapsed 0.083 ms (8.312 ms / 100) 8.315 -> 8.306 ( -0.11%) [ +0.10% +0.00% +0.02% / -0.11% +0.16% +0.35%] index_select random : Elapsed 0.083 ms (8.323 ms / 100) 8.298 -> 8.299 ( +0.01%) [ +0.00% +0.08% +0.20% / +0.01% +0.33% +0.57%] index_select random_sorted : Elapsed 0.083 ms (8.298 ms / 100) B = [16, 40, 4, 20] (stride (160, 4, 1, 2560)) A = [16, 40, 4, 5] (stride (800, 4, 1, 160)) dim = 3 1.538 -> 1.541 ( +0.20%) [ +0.20% +0.00% +0.13% / +0.20% +2.15% +1.95%] index_add_ linear : Elapsed 0.015 ms (1.541 ms / 100) 1.490 -> 1.493 ( +0.20%) [ +0.00% +0.20% +0.13% / +0.20% +2.15% +2.21%] index_copy_ linear : Elapsed 0.015 ms (1.490 ms / 100) 1.534 -> 1.539 ( +0.33%) [ +0.26% +0.00% +0.13% / +0.33% +2.15% +1.96%] index_add_ reverse : Elapsed 0.015 ms (1.538 ms / 100) 1.489 -> 1.492 ( +0.20%) [ +0.27% +0.00% +0.07% / +0.20% +2.22% +2.22%] index_copy_ reverse : Elapsed 0.015 ms (1.493 ms / 100) 1.530 -> 1.533 ( +0.20%) [ +0.07% +0.00% +0.13% / +0.20% +2.48% +1.90%] index_add_ spread : Elapsed 0.015 ms (1.531 ms / 100) 1.486 -> 1.488 ( +0.13%) [ +0.00% +0.27% +0.13% / +0.13% +2.29% +2.36%] index_copy_ spread : Elapsed 0.015 ms (1.486 ms / 100) 1.532 -> 1.540 ( +0.52%) [ +0.00% +0.07% +0.00% / +0.52% +2.28% +2.87%] index_add_ strided 3 : Elapsed 0.015 ms (1.532 ms / 100) 1.487 -> 1.490 ( +0.20%) [ +0.07% +0.00% +0.07% / +0.20% +2.69% +2.69%] index_copy_ strided 3 : Elapsed 0.015 ms (1.488 ms / 100) 1.533 -> 1.544 ( +0.72%) [ +0.07% +0.00% +0.52% / +0.72% +1.89% +1.83%] index_add_ strided 7 : Elapsed 0.015 ms (1.534 ms / 100) 1.493 -> 1.501 ( +0.54%) [ +0.07% +0.00% +0.27% / +0.54% +1.47% +1.47%] index_copy_ strided 7 : Elapsed 0.015 ms (1.494 ms / 100) 1.516 -> 1.527 ( +0.73%) [ +0.00% +0.46% +0.73% / +0.73% +3.56% +3.43%] index_add_ perm : Elapsed 0.015 ms (1.516 ms / 100) 1.476 -> 1.483 ( +0.47%) [ +0.00% +0.34% +0.27% / +0.47% +3.39% +3.18%] index_copy_ perm : Elapsed 0.015 ms (1.476 ms / 100) 1.519 -> 1.530 ( +0.72%) [ +0.07% +0.00% +0.39% / +0.72% +3.23% +3.03%] index_add_ perm_sorted : Elapsed 0.015 ms (1.520 ms / 100) 1.475 -> 1.483 ( +0.54%) [ +0.00% +0.34% +0.75% / +0.54% +3.19% +3.19%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.475 ms / 100) 7.554 -> 7.560 ( +0.08%) [ +0.00% +0.17% +0.08% / +0.08% +0.25% +0.15%] index_select const : Elapsed 0.076 ms (7.554 ms / 100) 7.604 -> 7.599 ( -0.07%) [ +0.05% +0.00% +0.07% / -0.07% +0.34% +0.17%] index_select wrap : Elapsed 0.076 ms (7.608 ms / 100) 7.590 -> 7.591 ( +0.01%) [ +0.21% +0.00% +0.20% / +0.01% +0.49% +0.26%] index_select linear : Elapsed 0.076 ms (7.606 ms / 100) 7.589 -> 7.608 ( +0.25%) [ +0.21% +0.00% +0.29% / +0.25% +0.62% +0.36%] index_select reverse : Elapsed 0.076 ms (7.605 ms / 100) 7.553 -> 7.567 ( +0.19%) [ +0.29% +0.26% +0.00% / +0.19% +0.29% +0.29%] index_select skip64 : Elapsed 0.076 ms (7.575 ms / 100) 7.556 -> 7.556 ( +0.00%) [ +0.12% +0.05% +0.00% / +0.00% +0.22% +0.20%] index_select skip256 : Elapsed 0.076 ms (7.565 ms / 100) 7.586 -> 7.576 ( -0.13%) [ +0.15% +0.00% +0.04% / -0.13% +0.09% +0.22%] index_select spread : Elapsed 0.076 ms (7.597 ms / 100) 7.603 -> 7.612 ( +0.12%) [ +0.00% +0.45% +0.22% / +0.12% +0.16% +0.17%] index_select strided 3 : Elapsed 0.076 ms (7.603 ms / 100) 7.613 -> 7.598 ( -0.20%) [ +0.05% +0.01% +0.00% / -0.20% +0.04% +0.07%] index_select random : Elapsed 0.076 ms (7.617 ms / 100) 7.583 -> 7.583 ( +0.00%) [ +0.17% +0.00% +0.30% / +0.00% +0.49% +0.07%] index_select random_sorted : Elapsed 0.076 ms (7.596 ms / 100) out_shape = [20, 40, 5, 4] in_shape = [16, 40, 5, 4] idx_dim = 0 B = [20, 40, 5, 4] (stride (800, 20, 4, 1)) A = [16, 40, 5, 4] (stride (800, 1, 160, 40)) dim = 0 3.262 -> 3.263 ( +0.03%) [ +0.09% +0.00% +0.03% / +0.03% +0.92% +0.86%] index_add_ linear : Elapsed 0.033 ms (3.265 ms / 100) 3.109 -> 3.112 ( +0.10%) [ +0.06% +0.10% +0.00% / +0.10% +1.09% +1.03%] index_copy_ linear : Elapsed 0.031 ms (3.111 ms / 100) 3.250 -> 3.252 ( +0.06%) [ +0.03% +0.00% +0.09% / +0.06% +0.83% +0.77%] index_add_ reverse : Elapsed 0.033 ms (3.251 ms / 100) 3.103 -> 3.106 ( +0.10%) [ +0.00% +0.03% +0.00% / +0.10% +1.03% +1.06%] index_copy_ reverse : Elapsed 0.031 ms (3.103 ms / 100) 3.254 -> 3.254 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.74% +0.74%] index_add_ spread : Elapsed 0.033 ms (3.255 ms / 100) 3.113 -> 3.130 ( +0.55%) [ +0.16% +0.16% +0.00% / +0.55% +0.90% +0.71%] index_copy_ spread : Elapsed 0.031 ms (3.118 ms / 100) 3.290 -> 3.291 ( +0.03%) [ +0.06% +0.09% +0.00% / +0.03% +0.73% +0.76%] index_add_ strided 3 : Elapsed 0.033 ms (3.292 ms / 100) 3.137 -> 3.135 ( -0.06%) [ +0.10% +0.16% +0.00% / -0.06% +0.48% +0.48%] index_copy_ strided 3 : Elapsed 0.031 ms (3.140 ms / 100) 3.294 -> 3.297 ( +0.09%) [ +0.18% +0.09% +0.00% / +0.09% +0.49% +0.52%] index_add_ strided 7 : Elapsed 0.033 ms (3.300 ms / 100) 3.134 -> 3.134 ( +0.00%) [ +0.00% +0.19% +0.03% / +0.00% +0.45% +0.41%] index_copy_ strided 7 : Elapsed 0.031 ms (3.134 ms / 100) 3.256 -> 3.257 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.68% +0.64%] index_add_ perm : Elapsed 0.033 ms (3.257 ms / 100) 3.121 -> 3.125 ( +0.13%) [ +0.35% +0.19% +0.00% / +0.13% +0.54% +0.64%] index_copy_ perm : Elapsed 0.031 ms (3.132 ms / 100) 3.253 -> 3.255 ( +0.06%) [ +0.09% +0.03% +0.00% / +0.06% +0.58% +0.61%] index_add_ perm_sorted : Elapsed 0.033 ms (3.256 ms / 100) 3.108 -> 3.109 ( +0.03%) [ +0.03% +0.00% +0.06% / +0.03% +0.84% +0.93%] index_copy_ perm_sorted : Elapsed 0.031 ms (3.109 ms / 100) 5.302 -> 5.299 ( -0.06%) [ +0.00% +0.06% +0.02% / +0.08% -0.06% -0.06%] index_select const : Elapsed 0.053 ms (5.302 ms / 100) 5.314 -> 5.309 ( -0.09%) [ +0.08% +0.08% +0.00% / +0.02% -0.09% +0.09%] index_select wrap : Elapsed 0.053 ms (5.318 ms / 100) 5.315 -> 5.312 ( -0.06%) [ +0.08% +0.00% +0.02% / -0.04% -0.06% +0.04%] index_select linear : Elapsed 0.053 ms (5.319 ms / 100) 5.315 -> 5.306 ( -0.17%) [ +0.00% +0.08% +0.08% / -0.17% +0.08% +0.13%] index_select reverse : Elapsed 0.053 ms (5.315 ms / 100) 5.299 -> 5.302 ( +0.06%) [ +0.02% +0.00% +0.04% / +0.13% +0.09% +0.06%] index_select skip64 : Elapsed 0.053 ms (5.300 ms / 100) 5.298 -> 5.300 ( +0.04%) [ +0.11% +0.00% +0.09% / +0.04% +0.04% +0.15%] index_select skip256 : Elapsed 0.053 ms (5.304 ms / 100) 5.311 -> 5.314 ( +0.06%) [ +0.00% +0.11% +0.06% / +0.11% +0.08% +0.06%] index_select spread : Elapsed 0.053 ms (5.311 ms / 100) 5.313 -> 5.315 ( +0.04%) [ +0.08% +0.00% +0.06% / +0.13% +0.08% +0.04%] index_select strided 3 : Elapsed 0.053 ms (5.317 ms / 100) 5.319 -> 5.315 ( -0.08%) [ +0.02% +0.00% +0.04% / -0.08% -0.04% -0.02%] index_select strided 5 : Elapsed 0.053 ms (5.320 ms / 100) 5.314 -> 5.309 ( -0.09%) [ +0.19% +0.00% +0.00% / +0.11% -0.09% -0.09%] index_select strided 7 : Elapsed 0.053 ms (5.324 ms / 100) 5.299 -> 5.297 ( -0.04%) [ +0.00% +0.15% +0.04% / -0.04% +0.11% +0.04%] index_select strided 8 : Elapsed 0.053 ms (5.299 ms / 100) 5.305 -> 5.312 ( +0.13%) [ +0.00% +0.08% +0.09% / +0.26% +0.13% +0.21%] index_select random : Elapsed 0.053 ms (5.305 ms / 100) 5.310 -> 5.309 ( -0.02%) [ +0.02% +0.00% +0.06% / +0.06% +0.11% -0.02%] index_select random_sorted : Elapsed 0.053 ms (5.311 ms / 100) B = [20, 40, 5, 4] (stride (800, 1, 40, 200)) A = [16, 40, 5, 4] (stride (1, 64, 2560, 16)) dim = 0 4.232 -> 4.235 ( +0.07%) [ +0.26% +0.14% +0.00% / +0.07% +0.73% +0.73%] index_add_ linear : Elapsed 0.042 ms (4.243 ms / 100) 4.102 -> 4.111 ( +0.22%) [ +0.29% +0.20% +0.00% / +0.22% +0.63% +0.71%] index_copy_ linear : Elapsed 0.041 ms (4.114 ms / 100) 4.226 -> 4.231 ( +0.12%) [ +0.14% +0.02% +0.00% / +0.12% +0.78% +0.88%] index_add_ reverse : Elapsed 0.042 ms (4.232 ms / 100) 4.091 -> 4.096 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.98% +0.93%] index_copy_ reverse : Elapsed 0.041 ms (4.096 ms / 100) 4.220 -> 4.221 ( +0.02%) [ +0.12% +0.00% +0.19% / +0.02% +0.85% +0.81%] index_add_ spread : Elapsed 0.042 ms (4.225 ms / 100) 4.075 -> 4.077 ( +0.05%) [ +0.10% +0.00% +0.05% / +0.05% +0.83% +0.74%] index_copy_ spread : Elapsed 0.041 ms (4.079 ms / 100) 4.220 -> 4.222 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.62% +0.62%] index_add_ strided 3 : Elapsed 0.042 ms (4.221 ms / 100) 4.078 -> 4.084 ( +0.15%) [ +0.05% +0.05% +0.00% / +0.15% +0.81% +0.66%] index_copy_ strided 3 : Elapsed 0.041 ms (4.080 ms / 100) 4.226 -> 4.230 ( +0.09%) [ +0.09% +0.12% +0.00% / +0.09% +0.85% +0.85%] index_add_ strided 7 : Elapsed 0.042 ms (4.230 ms / 100) 4.088 -> 4.097 ( +0.22%) [ +0.07% +0.20% +0.00% / +0.22% +1.00% +1.03%] index_copy_ strided 7 : Elapsed 0.041 ms (4.091 ms / 100) 4.235 -> 4.236 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.54% +0.59%] index_add_ perm : Elapsed 0.042 ms (4.235 ms / 100) 4.098 -> 4.103 ( +0.12%) [ +0.00% +0.22% +0.29% / +0.12% +0.78% +0.85%] index_copy_ perm : Elapsed 0.041 ms (4.098 ms / 100) 4.215 -> 4.219 ( +0.09%) [ +0.00% +0.09% +0.07% / +0.09% +0.71% +0.69%] index_add_ perm_sorted : Elapsed 0.042 ms (4.215 ms / 100) 4.077 -> 4.081 ( +0.10%) [ +0.00% +0.02% +0.10% / +0.10% +0.74% +0.78%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.077 ms / 100) 5.571 -> 5.566 ( -0.09%) [ +0.04% +0.00% +0.05% / +0.04% +0.02% -0.09%] index_select const : Elapsed 0.056 ms (5.573 ms / 100) 5.566 -> 5.566 ( +0.00%) [ +0.14% +0.04% +0.00% / +0.00% +0.14% +0.18%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.568 -> 5.571 ( +0.05%) [ +0.00% +0.05% +0.14% / +0.05% +0.34% +0.14%] index_select linear : Elapsed 0.056 ms (5.568 ms / 100) 5.564 -> 5.575 ( +0.20%) [ +0.22% +0.22% +0.00% / +0.20% +0.29% +0.22%] index_select reverse : Elapsed 0.056 ms (5.576 ms / 100) 5.560 -> 5.563 ( +0.05%) [ +0.22% +0.11% +0.00% / +0.05% +0.20% +0.25%] index_select skip64 : Elapsed 0.056 ms (5.572 ms / 100) 5.565 -> 5.565 ( +0.00%) [ +0.00% +0.02% +0.14% / +0.04% +0.16% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.565 ms / 100) 5.567 -> 5.572 ( +0.09%) [ +0.13% +0.00% +0.14% / +0.11% +0.09% +0.11%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.566 -> 5.569 ( +0.05%) [ +0.04% +0.18% +0.00% / +0.05% +0.14% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.568 ms / 100) 5.564 -> 5.572 ( +0.14%) [ +0.09% +0.00% +0.16% / +0.23% +0.31% +0.14%] index_select strided 5 : Elapsed 0.056 ms (5.569 ms / 100) 5.572 -> 5.571 ( -0.02%) [ +0.05% +0.00% +0.11% / -0.02% +0.07% +0.16%] index_select strided 7 : Elapsed 0.056 ms (5.575 ms / 100) 5.571 -> 5.574 ( +0.05%) [ +0.00% +0.09% +0.00% / +0.07% +0.32% +0.05%] index_select strided 8 : Elapsed 0.056 ms (5.571 ms / 100) 5.569 -> 5.572 ( +0.05%) [ +0.00% +0.02% +0.14% / +0.05% +0.14% +0.11%] index_select random : Elapsed 0.056 ms (5.569 ms / 100) 5.574 -> 5.564 ( -0.18%) [ +0.00% +0.02% +0.00% / -0.18% +0.07% +0.04%] index_select random_sorted : Elapsed 0.056 ms (5.574 ms / 100) B = [20, 40, 5, 4] (stride (5, 100, 1, 4000)) A = [16, 40, 5, 4] (stride (1, 320, 64, 16)) dim = 0 3.576 -> 3.578 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.36% +0.36%] index_add_ linear : Elapsed 0.036 ms (3.577 ms / 100) 3.429 -> 3.433 ( +0.12%) [ +0.09% +0.15% +0.00% / +0.12% +0.50% +0.47%] index_copy_ linear : Elapsed 0.034 ms (3.432 ms / 100) 3.564 -> 3.563 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.76% +0.70%] index_add_ reverse : Elapsed 0.036 ms (3.565 ms / 100) 3.429 -> 3.429 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.73% +0.73%] index_copy_ reverse : Elapsed 0.034 ms (3.429 ms / 100) 3.591 -> 3.591 ( +0.00%) [ +0.00% +0.06% +0.03% / +0.00% +0.39% +0.42%] index_add_ spread : Elapsed 0.036 ms (3.591 ms / 100) 3.447 -> 3.447 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.32% +0.38%] index_copy_ spread : Elapsed 0.034 ms (3.447 ms / 100) 3.597 -> 3.601 ( +0.11%) [ +0.03% +0.03% +0.00% / +0.11% +0.44% +0.44%] index_add_ strided 3 : Elapsed 0.036 ms (3.598 ms / 100) 3.438 -> 3.439 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.52% +0.47%] index_copy_ strided 3 : Elapsed 0.034 ms (3.439 ms / 100) 3.600 -> 3.602 ( +0.06%) [ +0.06% +0.00% +0.03% / +0.06% +0.31% +0.33%] index_add_ strided 7 : Elapsed 0.036 ms (3.602 ms / 100) 3.441 -> 3.443 ( +0.06%) [ +0.09% +0.03% +0.00% / +0.06% +0.38% +0.38%] index_copy_ strided 7 : Elapsed 0.034 ms (3.444 ms / 100) 3.594 -> 3.595 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.31% +0.28%] index_add_ perm : Elapsed 0.036 ms (3.595 ms / 100) 3.448 -> 3.450 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.06% +0.29% +0.26%] index_copy_ perm : Elapsed 0.035 ms (3.451 ms / 100) 3.567 -> 3.567 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.67% +0.64%] index_add_ perm_sorted : Elapsed 0.036 ms (3.570 ms / 100) 3.434 -> 3.434 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.58%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.434 ms / 100) 5.390 -> 5.392 ( +0.04%) [ +0.00% +0.13% +0.20% / +0.04% +0.07% +0.09%] index_select const : Elapsed 0.054 ms (5.390 ms / 100) 5.392 -> 5.392 ( +0.00%) [ +0.00% +0.06% +0.20% / +0.15% +0.15% +0.00%] index_select wrap : Elapsed 0.054 ms (5.392 ms / 100) 5.400 -> 5.395 ( -0.09%) [ +0.04% +0.00% +0.04% / -0.04% -0.09% -0.09%] index_select linear : Elapsed 0.054 ms (5.402 ms / 100) 5.401 -> 5.397 ( -0.07%) [ +0.06% +0.00% +0.02% / -0.07% -0.04% +0.04%] index_select reverse : Elapsed 0.054 ms (5.404 ms / 100) 5.390 -> 5.394 ( +0.07%) [ +0.09% +0.00% +0.15% / +0.13% +0.07% +0.19%] index_select skip64 : Elapsed 0.054 ms (5.395 ms / 100) 5.396 -> 5.392 ( -0.07%) [ +0.06% +0.02% +0.00% / -0.07% +0.06% -0.07%] index_select skip256 : Elapsed 0.054 ms (5.399 ms / 100) 5.400 -> 5.398 ( -0.04%) [ +0.20% +0.02% +0.00% / +0.13% -0.04% +0.15%] index_select spread : Elapsed 0.054 ms (5.411 ms / 100) 5.401 -> 5.394 ( -0.13%) [ +0.00% +0.07% +0.13% / +0.11% -0.13% -0.06%] index_select strided 3 : Elapsed 0.054 ms (5.401 ms / 100) 5.400 -> 5.392 ( -0.15%) [ +0.13% +0.07% +0.00% / +0.13% -0.15% +0.02%] index_select strided 5 : Elapsed 0.054 ms (5.407 ms / 100) 5.396 -> 5.393 ( -0.06%) [ +0.13% +0.00% +0.07% / -0.06% -0.02% +0.02%] index_select strided 7 : Elapsed 0.054 ms (5.403 ms / 100) 5.401 -> 5.396 ( -0.09%) [ +0.00% +0.00% +0.04% / +0.06% +0.02% -0.09%] index_select strided 8 : Elapsed 0.054 ms (5.401 ms / 100) 5.397 -> 5.400 ( +0.06%) [ +0.09% +0.06% +0.00% / +0.07% +0.07% +0.06%] index_select random : Elapsed 0.054 ms (5.402 ms / 100) 5.398 -> 5.398 ( +0.00%) [ +0.02% +0.07% +0.00% / +0.17% +0.00% +0.02%] index_select random_sorted : Elapsed 0.054 ms (5.399 ms / 100) B = [20, 40, 5, 4] (stride (40, 1, 800, 4000)) A = [16, 40, 5, 4] (stride (800, 20, 4, 1)) dim = 0 3.325 -> 3.325 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.81% +0.75%] index_add_ linear : Elapsed 0.033 ms (3.325 ms / 100) 3.197 -> 3.199 ( +0.06%) [ +0.00% +0.09% +0.09% / +0.06% +0.88% +0.69%] index_copy_ linear : Elapsed 0.032 ms (3.197 ms / 100) 3.327 -> 3.330 ( +0.09%) [ +0.03% +0.03% +0.00% / +0.09% +0.75% +0.78%] index_add_ reverse : Elapsed 0.033 ms (3.328 ms / 100) 3.195 -> 3.196 ( +0.03%) [ +0.06% +0.09% +0.00% / +0.03% +0.75% +0.81%] index_copy_ reverse : Elapsed 0.032 ms (3.197 ms / 100) 3.322 -> 3.324 ( +0.06%) [ +0.06% +0.09% +0.00% / +0.06% +0.84% +0.84%] index_add_ spread : Elapsed 0.033 ms (3.324 ms / 100) 3.195 -> 3.197 ( +0.06%) [ +0.13% +0.16% +0.00% / +0.06% +0.75% +0.78%] index_copy_ spread : Elapsed 0.032 ms (3.199 ms / 100) 3.311 -> 3.311 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.82% +0.82%] index_add_ strided 3 : Elapsed 0.033 ms (3.311 ms / 100) 3.187 -> 3.189 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.85% +0.82%] index_copy_ strided 3 : Elapsed 0.032 ms (3.187 ms / 100) 3.328 -> 3.328 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.69% +0.69%] index_add_ strided 7 : Elapsed 0.033 ms (3.329 ms / 100) 3.197 -> 3.198 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.72% +0.72%] index_copy_ strided 7 : Elapsed 0.032 ms (3.197 ms / 100) 3.325 -> 3.324 ( -0.03%) [ +0.06% +0.03% +0.00% / -0.03% +0.81% +0.72%] index_add_ perm : Elapsed 0.033 ms (3.327 ms / 100) 3.197 -> 3.196 ( -0.03%) [ +0.09% +0.13% +0.00% / -0.03% +0.88% +0.72%] index_copy_ perm : Elapsed 0.032 ms (3.200 ms / 100) 3.314 -> 3.315 ( +0.03%) [ +0.06% +0.06% +0.00% / +0.03% +0.75% +0.72%] index_add_ perm_sorted : Elapsed 0.033 ms (3.316 ms / 100) 3.190 -> 3.187 ( -0.09%) [ +0.03% +0.00% +0.00% / -0.09% +0.72% +0.72%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.191 ms / 100) 5.401 -> 5.399 ( -0.04%) [ +0.00% +0.07% +0.06% / -0.04% +0.06% +0.13%] index_select const : Elapsed 0.054 ms (5.401 ms / 100) 5.407 -> 5.407 ( +0.00%) [ +0.04% +0.13% +0.00% / +0.15% +0.00% +0.04%] index_select wrap : Elapsed 0.054 ms (5.409 ms / 100) 5.407 -> 5.411 ( +0.07%) [ +0.09% +0.11% +0.00% / +0.07% +0.07% +0.07%] index_select linear : Elapsed 0.054 ms (5.412 ms / 100) 5.407 -> 5.409 ( +0.04%) [ +0.00% +0.07% +0.11% / +0.04% +0.18% +0.09%] index_select reverse : Elapsed 0.054 ms (5.407 ms / 100) 5.395 -> 5.401 ( +0.11%) [ +0.13% +0.00% +0.11% / +0.11% +0.13% +0.17%] index_select skip64 : Elapsed 0.054 ms (5.402 ms / 100) 5.394 -> 5.400 ( +0.11%) [ +0.15% +0.00% +0.09% / +0.11% +0.15% +0.28%] index_select skip256 : Elapsed 0.054 ms (5.402 ms / 100) 5.414 -> 5.410 ( -0.07%) [ +0.04% +0.00% +0.13% / -0.07% +0.13% +0.00%] index_select spread : Elapsed 0.054 ms (5.416 ms / 100) 5.410 -> 5.414 ( +0.07%) [ +0.06% +0.11% +0.00% / +0.07% +0.07% +0.15%] index_select strided 3 : Elapsed 0.054 ms (5.413 ms / 100) 5.409 -> 5.412 ( +0.06%) [ +0.02% +0.18% +0.00% / +0.09% +0.07% +0.06%] index_select strided 5 : Elapsed 0.054 ms (5.410 ms / 100) 5.410 -> 5.417 ( +0.13%) [ +0.00% +0.09% +0.00% / +0.13% +0.15% +0.13%] index_select strided 7 : Elapsed 0.054 ms (5.410 ms / 100) 5.397 -> 5.399 ( +0.04%) [ +0.07% +0.04% +0.00% / +0.04% +0.15% +0.13%] index_select strided 8 : Elapsed 0.054 ms (5.401 ms / 100) 5.404 -> 5.409 ( +0.09%) [ +0.00% +0.06% +0.15% / +0.11% +0.11% +0.09%] index_select random : Elapsed 0.054 ms (5.404 ms / 100) 5.399 -> 5.412 ( +0.24%) [ +0.00% +0.22% +0.17% / +0.26% +0.26% +0.24%] index_select random_sorted : Elapsed 0.054 ms (5.399 ms / 100) out_shape = [16, 20, 5, 4] in_shape = [16, 40, 5, 4] idx_dim = 1 B = [16, 20, 5, 4] (stride (400, 1, 80, 20)) A = [16, 40, 5, 4] (stride (1, 16, 2560, 640)) dim = 1 1.462 -> 1.451 ( -0.75%) [ +0.27% +0.07% +0.00% / -0.75% -0.55% -0.48%] index_select const : Elapsed 0.015 ms (1.466 ms / 100) 1.485 -> 1.469 ( -1.08%) [ +0.00% +0.27% +0.00% / -0.88% -0.88% -1.08%] index_select wrap : Elapsed 0.015 ms (1.485 ms / 100) 1.483 -> 1.467 ( -1.08%) [ +0.20% +0.00% +0.20% / -1.08% -0.88% -0.74%] index_select linear : Elapsed 0.015 ms (1.486 ms / 100) 1.484 -> 1.468 ( -1.08%) [ +0.13% +0.07% +0.00% / -0.88% -0.88% -1.08%] index_select reverse : Elapsed 0.015 ms (1.486 ms / 100) 1.459 -> 1.453 ( -0.41%) [ +0.00% +0.14% +0.21% / -0.41% -0.27% -0.41%] index_select skip64 : Elapsed 0.015 ms (1.459 ms / 100) 1.461 -> 1.451 ( -0.68%) [ +0.00% +0.07% +0.07% / -0.68% +0.07% -0.07%] index_select skip256 : Elapsed 0.015 ms (1.461 ms / 100) 1.482 -> 1.470 ( -0.81%) [ +0.00% +0.20% +0.07% / -0.81% -0.27% -0.40%] index_select spread : Elapsed 0.015 ms (1.482 ms / 100) 1.481 -> 1.472 ( -0.61%) [ +1.08% +0.47% +0.00% / -0.27% -0.07% -0.61%] index_select strided 3 : Elapsed 0.015 ms (1.497 ms / 100) 1.475 -> 1.466 ( -0.61%) [ +0.20% +0.00% +0.00% / -0.47% -0.61% -0.27%] index_select strided 5 : Elapsed 0.015 ms (1.478 ms / 100) 1.484 -> 1.474 ( -0.67%) [ +0.00% +0.07% +0.00% / -0.67% -0.67% -0.40%] index_select strided 7 : Elapsed 0.015 ms (1.484 ms / 100) 1.467 -> 1.462 ( -0.34%) [ +0.00% +0.14% +0.07% / -0.34% -0.20% -0.14%] index_select strided 8 : Elapsed 0.015 ms (1.467 ms / 100) 1.467 -> 1.460 ( -0.48%) [ +0.20% +0.41% +0.00% / -0.48% +0.00% +0.00%] index_select strided 16 : Elapsed 0.015 ms (1.470 ms / 100) 1.482 -> 1.466 ( -1.08%) [ +0.07% +0.00% +0.07% / -1.08% -0.88% -0.74%] index_select random : Elapsed 0.015 ms (1.483 ms / 100) 1.479 -> 1.471 ( -0.54%) [ +0.34% +0.00% +0.34% / -0.54% -0.54% -0.54%] index_select random_sorted : Elapsed 0.015 ms (1.484 ms / 100) 1.486 -> 1.469 ( -1.14%) [ +0.00% +0.20% +0.00% / -1.14% -0.67% -0.81%] index_select perm : Elapsed 0.015 ms (1.486 ms / 100) 1.485 -> 1.472 ( -0.88%) [ +0.07% +0.00% +0.00% / -0.88% -0.88% -0.81%] index_select perm_sorted : Elapsed 0.015 ms (1.486 ms / 100) B = [16, 20, 5, 4] (stride (20, 320, 4, 1)) A = [16, 40, 5, 4] (stride (200, 1, 40, 3200)) dim = 1 2.395 -> 2.398 ( +0.13%) [ +0.00% +0.13% +0.17% / +0.13% +0.17% +0.21%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.403 -> 2.397 ( -0.25%) [ +0.00% +0.04% +0.12% / +0.08% -0.25% -0.08%] index_select wrap : Elapsed 0.024 ms (2.403 ms / 100) 2.404 -> 2.398 ( -0.25%) [ +0.21% +0.00% +0.12% / +0.04% -0.08% -0.25%] index_select linear : Elapsed 0.024 ms (2.409 ms / 100) 2.405 -> 2.401 ( -0.17%) [ +0.04% +0.04% +0.00% / -0.17% -0.04% -0.17%] index_select reverse : Elapsed 0.024 ms (2.406 ms / 100) 2.396 -> 2.395 ( -0.04%) [ +0.00% +0.00% +0.04% / +0.00% -0.04% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.396 ms / 100) 2.395 -> 2.395 ( +0.00%) [ +0.00% +0.25% +0.13% / +0.08% +0.21% +0.00%] index_select skip256 : Elapsed 0.024 ms (2.395 ms / 100) 2.406 -> 2.404 ( -0.08%) [ +0.04% +0.04% +0.00% / -0.08% +0.29% +0.21%] index_select spread : Elapsed 0.024 ms (2.407 ms / 100) 2.408 -> 2.406 ( -0.08%) [ +0.12% +0.00% +0.04% / -0.08% +0.08% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.411 ms / 100) 2.403 -> 2.406 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.12% +0.42% +0.42%] index_select strided 5 : Elapsed 0.024 ms (2.403 ms / 100) 2.407 -> 2.409 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.25% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.409 ms / 100) 2.409 -> 2.407 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.21% +0.33%] index_select strided 8 : Elapsed 0.024 ms (2.411 ms / 100) 2.407 -> 2.406 ( -0.04%) [ +0.08% +0.00% +0.12% / -0.04% +0.25% +0.33%] index_select strided 16 : Elapsed 0.024 ms (2.409 ms / 100) 2.406 -> 2.407 ( +0.04%) [ +0.17% +0.08% +0.00% / +0.04% +0.12% +0.29%] index_select random : Elapsed 0.024 ms (2.410 ms / 100) 2.406 -> 2.409 ( +0.12%) [ +0.17% +0.12% +0.00% / +0.21% +0.46% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.410 ms / 100) 2.408 -> 2.408 ( +0.00%) [ +0.12% +0.00% +0.08% / +0.29% +0.04% +0.00%] index_select perm : Elapsed 0.024 ms (2.411 ms / 100) 2.405 -> 2.408 ( +0.12%) [ +0.08% +0.17% +0.00% / +0.12% +0.25% +0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.407 ms / 100) B = [16, 20, 5, 4] (stride (5, 320, 1, 80)) A = [16, 40, 5, 4] (stride (800, 20, 1, 5)) dim = 1 2.443 -> 2.444 ( +0.04%) [ +0.16% +0.08% +0.00% / +0.04% +0.25% +0.25%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.459 -> 2.460 ( +0.04%) [ +0.04% +0.24% +0.00% / +0.08% +0.04% +0.12%] index_select wrap : Elapsed 0.025 ms (2.460 ms / 100) 2.461 -> 2.458 ( -0.12%) [ +0.16% +0.00% +0.00% / -0.12% -0.08% +0.04%] index_select linear : Elapsed 0.025 ms (2.465 ms / 100) 2.460 -> 2.460 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.12% +0.08% +0.00%] index_select reverse : Elapsed 0.025 ms (2.463 ms / 100) 2.446 -> 2.447 ( +0.04%) [ +0.25% +0.00% +0.12% / +0.08% +0.25% +0.04%] index_select skip64 : Elapsed 0.025 ms (2.452 ms / 100) 2.445 -> 2.446 ( +0.04%) [ +0.16% +0.08% +0.00% / +0.04% +0.25% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.460 -> 2.463 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.12% +0.20% +0.20%] index_select spread : Elapsed 0.025 ms (2.462 ms / 100) 2.462 -> 2.461 ( -0.04%) [ +0.28% +0.00% +0.04% / +0.00% -0.04% +0.00%] index_select strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.00% +0.24% +0.20% / +0.12% +0.04% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.452 ms / 100) 2.457 -> 2.458 ( +0.04%) [ +0.16% +0.04% +0.00% / +0.04% +0.24% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.461 ms / 100) 2.443 -> 2.450 ( +0.29%) [ +0.16% +0.33% +0.00% / +0.29% +0.57% +0.61%] index_select strided 8 : Elapsed 0.024 ms (2.447 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.20% +0.00% +0.04% / +0.04% +0.25% +0.16%] index_select strided 16 : Elapsed 0.025 ms (2.453 ms / 100) 2.456 -> 2.459 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.16% +0.12% +0.24%] index_select random : Elapsed 0.025 ms (2.458 ms / 100) 2.457 -> 2.457 ( +0.00%) [ +0.00% +0.04% +0.20% / +0.08% +0.04% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.457 ms / 100) 2.460 -> 2.459 ( -0.04%) [ +0.12% +0.16% +0.00% / -0.04% +0.00% +0.16%] index_select perm : Elapsed 0.025 ms (2.463 ms / 100) 2.463 -> 2.454 ( -0.37%) [ +0.00% +0.20% +0.00% / +0.16% -0.32% -0.37%] index_select perm_sorted : Elapsed 0.025 ms (2.463 ms / 100) B = [16, 20, 5, 4] (stride (1, 320, 16, 80)) A = [16, 40, 5, 4] (stride (5, 80, 1, 3200)) dim = 1 2.447 -> 2.445 ( -0.08%) [ +0.33% +0.00% +0.08% / -0.08% +0.20% +0.16%] index_select const : Elapsed 0.025 ms (2.455 ms / 100) 2.463 -> 2.458 ( -0.20%) [ +0.16% +0.00% +0.12% / +0.16% -0.16% -0.20%] index_select wrap : Elapsed 0.025 ms (2.467 ms / 100) 2.466 -> 2.457 ( -0.36%) [ +0.12% +0.00% +0.12% / +0.08% -0.28% -0.36%] index_select linear : Elapsed 0.025 ms (2.469 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.00% +0.04% +0.16% / +0.00% -0.04% +0.00%] index_select reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.00% +0.16% +0.00% / +0.20% +0.04% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.448 ms / 100) 2.447 -> 2.450 ( +0.12%) [ +0.04% +0.16% +0.00% / +0.12% +0.29% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.448 ms / 100) 2.466 -> 2.465 ( -0.04%) [ +0.04% +0.16% +0.00% / -0.04% -0.04% +0.08%] index_select spread : Elapsed 0.025 ms (2.467 ms / 100) 2.462 -> 2.466 ( +0.16%) [ +0.04% +0.00% +0.12% / +0.20% +0.32% +0.16%] index_select strided 3 : Elapsed 0.025 ms (2.463 ms / 100) 2.451 -> 2.456 ( +0.20%) [ +0.20% +0.00% +0.20% / +0.37% +0.20% +0.29%] index_select strided 5 : Elapsed 0.025 ms (2.456 ms / 100) 2.463 -> 2.465 ( +0.08%) [ +0.16% +0.00% +0.12% / +0.08% +0.16% +0.20%] index_select strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.16% +0.29% +0.00% / +0.04% +0.04% +0.12%] index_select strided 8 : Elapsed 0.025 ms (2.454 ms / 100) 2.451 -> 2.451 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.37% +0.41%] index_select strided 16 : Elapsed 0.025 ms (2.454 ms / 100) 2.458 -> 2.460 ( +0.08%) [ +0.12% +0.08% +0.00% / +0.08% +0.37% +0.33%] index_select random : Elapsed 0.025 ms (2.461 ms / 100) 2.459 -> 2.462 ( +0.12%) [ +0.24% +0.00% +0.28% / +0.16% +0.24% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.465 ms / 100) 2.466 -> 2.464 ( -0.08%) [ +0.16% +0.00% +0.00% / -0.08% -0.04% +0.00%] index_select perm : Elapsed 0.025 ms (2.470 ms / 100) 2.467 -> 2.464 ( -0.12%) [ +0.12% +0.08% +0.00% / -0.12% -0.08% +0.00%] index_select perm_sorted : Elapsed 0.025 ms (2.470 ms / 100) B = [16, 20, 5, 4] (stride (100, 5, 1, 1600)) A = [16, 40, 5, 4] (stride (160, 4, 2560, 1)) dim = 1 2.447 -> 2.452 ( +0.20%) [ +0.04% +0.00% +0.04% / +0.25% +0.20% +0.33%] index_select const : Elapsed 0.024 ms (2.448 ms / 100) 2.463 -> 2.463 ( +0.00%) [ +0.20% +0.04% +0.00% / +0.24% +0.00% +0.08%] index_select wrap : Elapsed 0.025 ms (2.468 ms / 100) 2.464 -> 2.462 ( -0.08%) [ +0.08% +0.12% +0.00% / +0.08% -0.04% -0.08%] index_select linear : Elapsed 0.025 ms (2.466 ms / 100) 2.465 -> 2.462 ( -0.12%) [ +0.00% +0.04% +0.20% / +0.12% +0.08% -0.12%] index_select reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.20% +0.08% +0.00%] index_select skip64 : Elapsed 0.025 ms (2.450 ms / 100) 2.449 -> 2.450 ( +0.04%) [ +0.08% +0.12% +0.00% / +0.04% +0.29% +0.20%] index_select skip256 : Elapsed 0.025 ms (2.451 ms / 100) 2.477 -> 2.472 ( -0.20%) [ +0.00% +0.20% +0.20% / -0.20% +0.08% +0.16%] index_select spread : Elapsed 0.025 ms (2.477 ms / 100) 2.475 -> 2.471 ( -0.16%) [ +0.00% +0.00% +0.28% / +0.12% -0.16% +0.08%] index_select strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.00% +0.08% +0.08% / +0.00% -0.04% +0.00%] index_select strided 5 : Elapsed 0.025 ms (2.465 ms / 100) 2.476 -> 2.476 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.08% +0.00% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.478 ms / 100) 2.458 -> 2.457 ( -0.04%) [ +0.00% +0.08% +0.00% / +0.12% +0.04% -0.04%] index_select strided 8 : Elapsed 0.025 ms (2.458 ms / 100) 2.461 -> 2.459 ( -0.08%) [ +0.04% +0.00% +0.04% / -0.04% -0.08% -0.04%] index_select strided 16 : Elapsed 0.025 ms (2.462 ms / 100) 2.471 -> 2.469 ( -0.08%) [ +0.00% +0.04% +0.00% / -0.08% +0.00% +0.00%] index_select random : Elapsed 0.025 ms (2.471 ms / 100) 2.472 -> 2.468 ( -0.16%) [ +0.04% +0.00% +0.00% / +0.08% -0.16% -0.04%] index_select random_sorted : Elapsed 0.025 ms (2.473 ms / 100) 2.471 -> 2.467 ( -0.16%) [ +0.00% +0.08% +0.00% / +0.20% -0.16% +0.00%] index_select perm : Elapsed 0.025 ms (2.471 ms / 100) 2.470 -> 2.464 ( -0.24%) [ +0.16% +0.00% +0.32% / +0.28% -0.24% -0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.474 ms / 100) B = [16, 20, 5, 4] (stride (1, 80, 16, 1600)) A = [16, 40, 5, 4] (stride (200, 1, 40, 3200)) dim = 1 2.454 -> 2.453 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.00% +0.20%] index_select const : Elapsed 0.025 ms (2.455 ms / 100) 2.460 -> 2.458 ( -0.08%) [ +0.04% +0.00% +0.20% / +0.20% -0.08% -0.08%] index_select wrap : Elapsed 0.025 ms (2.461 ms / 100) 2.462 -> 2.456 ( -0.24%) [ +0.12% +0.04% +0.00% / -0.08% -0.20% -0.24%] index_select linear : Elapsed 0.025 ms (2.465 ms / 100) 2.459 -> 2.458 ( -0.04%) [ +0.12% +0.00% +0.20% / -0.04% -0.04% +0.12%] index_select reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.456 -> 2.453 ( -0.12%) [ +0.00% +0.00% +0.12% / -0.12% +0.20% +0.00%] index_select skip64 : Elapsed 0.025 ms (2.456 ms / 100) 2.449 -> 2.454 ( +0.20%) [ +0.00% +0.16% +0.12% / +0.24% +0.20% +0.20%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.462 -> 2.463 ( +0.04%) [ +0.00% +0.16% +0.16% / +0.08% +0.04% +0.37%] index_select spread : Elapsed 0.025 ms (2.462 ms / 100) 2.464 -> 2.467 ( +0.12%) [ +0.00% +0.16% +0.04% / +0.12% +0.16% +0.20%] index_select strided 3 : Elapsed 0.025 ms (2.464 ms / 100) 2.463 -> 2.459 ( -0.16%) [ +0.08% +0.00% +0.00% / -0.16% +0.12% +0.28%] index_select strided 5 : Elapsed 0.025 ms (2.465 ms / 100) 2.467 -> 2.467 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.00% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.468 ms / 100) 2.464 -> 2.466 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.28% +0.28%] index_select strided 8 : Elapsed 0.025 ms (2.465 ms / 100) 2.464 -> 2.466 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.28% +0.24%] index_select strided 16 : Elapsed 0.025 ms (2.464 ms / 100) 2.463 -> 2.466 ( +0.12%) [ +0.00% +0.12% +0.08% / +0.12% +0.16% +0.32%] index_select random : Elapsed 0.025 ms (2.463 ms / 100) 2.462 -> 2.460 ( -0.08%) [ +0.28% +0.04% +0.00% / +0.24% -0.08% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.465 -> 2.466 ( +0.04%) [ +0.20% +0.20% +0.00% / +0.12% +0.04% +0.12%] index_select perm : Elapsed 0.025 ms (2.470 ms / 100) 2.465 -> 2.467 ( +0.08%) [ +0.28% +0.08% +0.00% / +0.08% +0.28% +0.16%] index_select perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) out_shape = [16, 40, 20, 4] in_shape = [16, 40, 5, 4] idx_dim = 2 B = [16, 40, 20, 4] (stride (3200, 4, 160, 1)) A = [16, 40, 5, 4] (stride (1, 16, 640, 3200)) dim = 2 1.842 -> 1.844 ( +0.11%) [ +0.16% +0.16% +0.00% / +0.11% +1.41% +1.47%] index_add_ linear : Elapsed 0.018 ms (1.845 ms / 100) 1.797 -> 1.799 ( +0.11%) [ +0.11% +0.00% +0.06% / +0.11% +1.89% +1.61%] index_copy_ linear : Elapsed 0.018 ms (1.799 ms / 100) 1.842 -> 1.844 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.11% +1.47% +1.25%] index_add_ reverse : Elapsed 0.018 ms (1.844 ms / 100) 1.795 -> 1.802 ( +0.39%) [ +0.00% +0.11% +0.00% / +0.39% +2.06% +1.89%] index_copy_ reverse : Elapsed 0.018 ms (1.795 ms / 100) 1.855 -> 1.855 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.65% +0.65%] index_add_ spread : Elapsed 0.019 ms (1.855 ms / 100) 1.806 -> 1.806 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.89% +0.89%] index_copy_ spread : Elapsed 0.018 ms (1.809 ms / 100) 1.846 -> 1.844 ( -0.11%) [ +0.00% +0.00% +0.05% / -0.11% +0.92% +0.92%] index_add_ strided 3 : Elapsed 0.018 ms (1.846 ms / 100) 1.803 -> 1.800 ( -0.17%) [ +0.00% +0.22% +0.11% / -0.17% +1.00% +1.05%] index_copy_ strided 3 : Elapsed 0.018 ms (1.803 ms / 100) 1.852 -> 1.854 ( +0.11%) [ +0.05% +0.00% +0.16% / +0.11% +0.32% +0.49%] index_add_ strided 7 : Elapsed 0.019 ms (1.853 ms / 100) 1.808 -> 1.811 ( +0.17%) [ +0.00% +0.22% +0.11% / +0.17% +0.55% +0.66%] index_copy_ strided 7 : Elapsed 0.018 ms (1.808 ms / 100) 1.842 -> 1.840 ( -0.11%) [ +0.05% +0.05% +0.00% / -0.11% +1.14% +1.36%] index_add_ perm : Elapsed 0.018 ms (1.843 ms / 100) 1.797 -> 1.803 ( +0.33%) [ +0.00% +0.00% +0.00% / +0.33% +1.95% +1.61%] index_copy_ perm : Elapsed 0.018 ms (1.797 ms / 100) 1.842 -> 1.844 ( +0.11%) [ +0.16% +0.00% +0.05% / +0.11% +1.14% +1.41%] index_add_ perm_sorted : Elapsed 0.018 ms (1.845 ms / 100) 1.796 -> 1.801 ( +0.28%) [ +0.22% +0.00% +0.11% / +0.28% +2.00% +1.67%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.800 ms / 100) 8.505 -> 8.525 ( +0.24%) [ +0.00% +0.25% +0.38% / +0.24% +0.51% +0.26%] index_select const : Elapsed 0.085 ms (8.505 ms / 100) 8.533 -> 8.532 ( -0.01%) [ +0.00% +0.07% +0.08% / -0.01% +0.22% +0.05%] index_select wrap : Elapsed 0.085 ms (8.533 ms / 100) 8.532 -> 8.551 ( +0.22%) [ +0.00% +0.30% +0.08% / +0.35% +0.42% +0.22%] index_select linear : Elapsed 0.085 ms (8.532 ms / 100) 8.529 -> 8.523 ( -0.07%) [ +0.00% +0.13% +0.04% / -0.07% +0.09% +0.52%] index_select reverse : Elapsed 0.085 ms (8.529 ms / 100) 8.520 -> 8.528 ( +0.09%) [ +0.04% +0.04% +0.00% / +0.09% +0.16% +0.29%] index_select skip64 : Elapsed 0.085 ms (8.523 ms / 100) 8.521 -> 8.511 ( -0.12%) [ +0.05% +0.00% +0.01% / -0.12% +0.09% +0.20%] index_select skip256 : Elapsed 0.085 ms (8.525 ms / 100) 8.532 -> 8.551 ( +0.22%) [ +0.07% +0.09% +0.00% / +0.27% +0.22% +0.30%] index_select spread : Elapsed 0.085 ms (8.538 ms / 100) 8.546 -> 8.535 ( -0.13%) [ +0.08% +0.00% +0.07% / -0.13% -0.01% +0.07%] index_select strided 3 : Elapsed 0.086 ms (8.553 ms / 100) 8.528 -> 8.536 ( +0.09%) [ +0.00% +0.29% +0.15% / +0.09% +0.23% +0.23%] index_select random : Elapsed 0.085 ms (8.528 ms / 100) 8.535 -> 8.543 ( +0.09%) [ +0.14% +0.01% +0.00% / +0.09% +0.45% +0.50%] index_select random_sorted : Elapsed 0.085 ms (8.547 ms / 100) B = [16, 40, 20, 4] (stride (1, 64, 2560, 16)) dim = 2 fill_cnt = 5 0.936 -> 0.936 ( +0.00%) [ +0.11% +0.21% +0.00% / +0.00% +0.21% +0.32%] index_fill_ const : Elapsed 0.009 ms (0.937 ms / 100) 0.937 -> 0.938 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.21% +0.32%] index_fill_ linear : Elapsed 0.009 ms (0.938 ms / 100) 0.937 -> 0.939 ( +0.21%) [ +0.32% +0.11% +0.00% / +0.21% +0.21% +0.32%] index_fill_ reverse : Elapsed 0.009 ms (0.940 ms / 100) 0.937 -> 0.937 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.21% +0.21%] index_fill_ skip64 : Elapsed 0.009 ms (0.937 ms / 100) 0.937 -> 0.938 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.21% +0.32% +0.11%] index_fill_ skip256 : Elapsed 0.009 ms (0.937 ms / 100) 0.937 -> 0.937 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.00% +0.21% +0.21%] index_fill_ spread : Elapsed 0.009 ms (0.938 ms / 100) 0.937 -> 0.938 ( +0.11%) [ +0.00% +0.11% +0.11% / +0.11% +0.32% +0.32%] index_fill_ strided 3 : Elapsed 0.009 ms (0.937 ms / 100) 0.936 -> 0.938 ( +0.21%) [ +0.11% +0.11% +0.00% / +0.21% +0.32% +0.32%] index_fill_ strided 5 : Elapsed 0.009 ms (0.937 ms / 100) 0.938 -> 0.938 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.32% +0.32%] index_fill_ strided 7 : Elapsed 0.009 ms (0.938 ms / 100) 0.939 -> 0.938 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% -0.11% +0.00%] index_fill_ strided 8 : Elapsed 0.009 ms (0.939 ms / 100) 0.938 -> 0.938 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.21% +0.11%] index_fill_ strided 16 : Elapsed 0.009 ms (0.938 ms / 100) 0.937 -> 0.937 ( +0.00%) [ +0.21% +0.11% +0.00% / +0.00% +0.21% +0.21%] index_fill_ random : Elapsed 0.009 ms (0.939 ms / 100) 0.936 -> 0.938 ( +0.21%) [ +0.21% +0.32% +0.00% / +0.21% +0.32% +0.32%] index_fill_ random_sorted : Elapsed 0.009 ms (0.938 ms / 100) 0.938 -> 0.938 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.32% +0.21%] index_fill_ perm : Elapsed 0.009 ms (0.938 ms / 100) 0.936 -> 0.937 ( +0.11%) [ +0.21% +0.43% +0.00% / +0.11% +0.32% +0.43%] index_fill_ perm_sorted : Elapsed 0.009 ms (0.938 ms / 100) B = [16, 40, 20, 4] (stride (1, 16, 2560, 640)) A = [16, 40, 5, 4] (stride (40, 1, 2560, 640)) dim = 2 1.715 -> 1.716 ( +0.06%) [ +0.23% +0.00% +0.06% / +0.06% +0.87% +0.87%] index_add_ linear : Elapsed 0.017 ms (1.719 ms / 100) 1.673 -> 1.672 ( -0.06%) [ +0.24% +0.00% +0.06% / -0.06% +1.02% +0.66%] index_copy_ linear : Elapsed 0.017 ms (1.677 ms / 100) 1.714 -> 1.710 ( -0.23%) [ +0.06% +0.00% +0.00% / -0.23% +0.29% +0.41%] index_add_ reverse : Elapsed 0.017 ms (1.715 ms / 100) 1.671 -> 1.672 ( +0.06%) [ +0.00% +0.36% +0.12% / +0.12% +0.36% +0.06%] index_copy_ reverse : Elapsed 0.017 ms (1.671 ms / 100) 1.702 -> 1.701 ( -0.06%) [ +0.29% +0.00% +0.00% / -0.06% +1.47% +1.23%] index_add_ spread : Elapsed 0.017 ms (1.707 ms / 100) 1.657 -> 1.658 ( +0.06%) [ +0.00% +0.30% +0.12% / +0.06% +1.57% +1.27%] index_copy_ spread : Elapsed 0.017 ms (1.657 ms / 100) 1.714 -> 1.714 ( +0.00%) [ +0.06% +0.00% +0.23% / +0.00% +0.93% +1.05%] index_add_ strided 3 : Elapsed 0.017 ms (1.715 ms / 100) 1.673 -> 1.674 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.90% +1.08%] index_copy_ strided 3 : Elapsed 0.017 ms (1.673 ms / 100) 1.706 -> 1.704 ( -0.12%) [ +0.12% +0.23% +0.00% / -0.12% +0.76% +1.17%] index_add_ strided 7 : Elapsed 0.017 ms (1.708 ms / 100) 1.665 -> 1.666 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.84% +1.20%] index_copy_ strided 7 : Elapsed 0.017 ms (1.665 ms / 100) 1.707 -> 1.710 ( +0.18%) [ +0.00% +0.18% +0.00% / +0.18% +0.53% +0.64%] index_add_ perm : Elapsed 0.017 ms (1.707 ms / 100) 1.661 -> 1.665 ( +0.24%) [ +0.24% +0.00% +0.30% / +0.24% +0.84% +0.60%] index_copy_ perm : Elapsed 0.017 ms (1.665 ms / 100) 1.705 -> 1.708 ( +0.18%) [ +0.00% +0.18% +0.06% / +0.18% +1.11% +1.06%] index_add_ perm_sorted : Elapsed 0.017 ms (1.705 ms / 100) 1.664 -> 1.666 ( +0.12%) [ +0.24% +0.00% +0.06% / +0.12% +1.20% +0.96%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.668 ms / 100) 8.215 -> 8.207 ( -0.10%) [ +0.02% +0.00% +0.05% / -0.01% -0.10% -0.05%] index_select const : Elapsed 0.082 ms (8.217 ms / 100) 8.265 -> 8.271 ( +0.07%) [ +0.42% +0.06% +0.00% / +0.07% +0.41% +0.35%] index_select wrap : Elapsed 0.083 ms (8.300 ms / 100) 8.256 -> 8.256 ( +0.00%) [ +0.02% +0.00% +0.41% / +0.00% +0.24% +0.27%] index_select linear : Elapsed 0.083 ms (8.258 ms / 100) 8.268 -> 8.285 ( +0.21%) [ +0.17% +0.05% +0.00% / +0.21% +0.21% +0.21%] index_select reverse : Elapsed 0.083 ms (8.282 ms / 100) 8.207 -> 8.226 ( +0.23%) [ +0.21% +0.10% +0.00% / +0.23% +0.27% +0.28%] index_select skip64 : Elapsed 0.082 ms (8.224 ms / 100) 8.214 -> 8.221 ( +0.09%) [ +0.00% +0.09% +0.04% / +0.27% +0.09% +0.15%] index_select skip256 : Elapsed 0.082 ms (8.214 ms / 100) 8.260 -> 8.265 ( +0.06%) [ +0.07% +0.00% +0.01% / +0.15% +0.23% +0.06%] index_select spread : Elapsed 0.083 ms (8.266 ms / 100) 8.273 -> 8.279 ( +0.07%) [ +0.27% +0.00% +0.33% / +0.07% +0.29% +0.17%] index_select strided 3 : Elapsed 0.083 ms (8.295 ms / 100) 8.270 -> 8.277 ( +0.08%) [ +0.25% +0.00% +0.00% / +0.08% +0.25% +0.29%] index_select random : Elapsed 0.083 ms (8.291 ms / 100) 8.254 -> 8.263 ( +0.11%) [ +0.00% +0.21% +0.01% / +0.11% +0.19% +0.28%] index_select random_sorted : Elapsed 0.083 ms (8.254 ms / 100) B = [16, 40, 20, 4] (stride (1, 320, 16, 12800)) A = [16, 40, 5, 4] (stride (1, 80, 16, 3200)) dim = 2 1.948 -> 1.948 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.87% +1.03%] index_add_ linear : Elapsed 0.020 ms (1.951 ms / 100) 1.898 -> 1.899 ( +0.05%) [ +0.00% +0.11% +0.00% / +0.05% +0.74% +0.74%] index_copy_ linear : Elapsed 0.019 ms (1.898 ms / 100) 1.953 -> 1.953 ( +0.00%) [ +0.00% +0.26% +0.10% / +0.00% +0.82% +0.77%] index_add_ reverse : Elapsed 0.020 ms (1.953 ms / 100) 1.904 -> 1.904 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.53%] index_copy_ reverse : Elapsed 0.019 ms (1.904 ms / 100) 1.956 -> 1.955 ( -0.05%) [ +0.15% +0.05% +0.00% / -0.05% +0.26% +0.26%] index_add_ spread : Elapsed 0.020 ms (1.959 ms / 100) 1.904 -> 1.902 ( -0.11%) [ +0.00% +0.05% +0.21% / -0.11% +0.53% +0.47%] index_copy_ spread : Elapsed 0.019 ms (1.904 ms / 100) 1.966 -> 1.968 ( +0.10%) [ +0.25% +0.20% +0.00% / +0.10% +0.15% +0.66%] index_add_ strided 3 : Elapsed 0.020 ms (1.971 ms / 100) 1.914 -> 1.914 ( +0.00%) [ +0.00% +0.16% +0.05% / +0.00% +0.42% +0.84%] index_copy_ strided 3 : Elapsed 0.019 ms (1.914 ms / 100) 1.955 -> 1.959 ( +0.20%) [ +0.00% +0.31% +0.26% / +0.20% +0.56% +0.61%] index_add_ strided 7 : Elapsed 0.020 ms (1.955 ms / 100) 1.901 -> 1.902 ( +0.05%) [ +0.00% +0.26% +0.32% / +0.05% +0.68% +0.79%] index_copy_ strided 7 : Elapsed 0.019 ms (1.901 ms / 100) 1.947 -> 1.951 ( +0.21%) [ +0.41% +0.26% +0.00% / +0.21% +0.67% +0.72%] index_add_ perm : Elapsed 0.020 ms (1.955 ms / 100) 1.898 -> 1.897 ( -0.05%) [ +0.32% +0.00% +0.05% / -0.05% +0.37% +0.37%] index_copy_ perm : Elapsed 0.019 ms (1.904 ms / 100) 1.967 -> 1.980 ( +0.66%) [ +0.05% +0.10% +0.00% / +0.66% +0.81% +0.81%] index_add_ perm_sorted : Elapsed 0.020 ms (1.968 ms / 100) 1.911 -> 1.913 ( +0.10%) [ +0.05% +0.00% +0.05% / +0.10% +0.89% +0.84%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.912 ms / 100) 8.532 -> 8.545 ( +0.15%) [ +0.18% +0.04% +0.00% / +0.28% +0.23% +0.15%] index_select const : Elapsed 0.085 ms (8.547 ms / 100) 8.560 -> 8.565 ( +0.06%) [ +0.15% +0.04% +0.00% / +0.06% +0.23% +0.25%] index_select wrap : Elapsed 0.086 ms (8.573 ms / 100) 8.556 -> 8.572 ( +0.19%) [ +0.19% +0.00% +0.12% / +0.19% +0.33% +0.28%] index_select linear : Elapsed 0.086 ms (8.572 ms / 100) 8.544 -> 8.555 ( +0.13%) [ +0.20% +0.29% +0.00% / +0.13% +0.23% +0.29%] index_select reverse : Elapsed 0.086 ms (8.561 ms / 100) 8.538 -> 8.536 ( -0.02%) [ +0.00% +0.04% +0.00% / +0.04% -0.02% +0.25%] index_select skip64 : Elapsed 0.085 ms (8.538 ms / 100) 8.528 -> 8.549 ( +0.25%) [ +0.32% +0.00% +0.23% / +0.54% +0.25% +0.34%] index_select skip256 : Elapsed 0.086 ms (8.555 ms / 100) 8.564 -> 8.583 ( +0.22%) [ +0.00% +0.25% +0.06% / +0.48% +0.22% +0.43%] index_select spread : Elapsed 0.086 ms (8.564 ms / 100) 8.557 -> 8.563 ( +0.07%) [ +0.00% +0.07% +0.18% / +0.07% +0.28% +0.11%] index_select strided 3 : Elapsed 0.086 ms (8.557 ms / 100) 8.568 -> 8.575 ( +0.08%) [ +0.07% +0.14% +0.00% / +0.19% +0.16% +0.08%] index_select random : Elapsed 0.086 ms (8.574 ms / 100) 8.574 -> 8.562 ( -0.14%) [ +0.05% +0.00% +0.07% / -0.14% +0.13% +0.43%] index_select random_sorted : Elapsed 0.086 ms (8.578 ms / 100) out_shape = [16, 40, 5, 20] in_shape = [16, 40, 5, 4] idx_dim = 3 B = [16, 40, 5, 20] (stride (4000, 20, 800, 1)) A = [16, 40, 5, 4] (stride (800, 1, 40, 200)) dim = 3 2.111 -> 2.106 ( -0.24%) [ +0.00% +0.09% +0.14% / -0.24% +0.90% +0.62%] index_add_ linear : Elapsed 0.021 ms (2.111 ms / 100) 2.080 -> 2.084 ( +0.19%) [ +0.24% +0.29% +0.00% / +0.19% +1.01% +1.06%] index_copy_ linear : Elapsed 0.021 ms (2.085 ms / 100) 2.098 -> 2.104 ( +0.29%) [ +0.00% +0.10% +0.14% / +0.29% +1.19% +1.57%] index_add_ reverse : Elapsed 0.021 ms (2.098 ms / 100) 2.073 -> 2.078 ( +0.24%) [ +0.05% +0.00% +0.10% / +0.24% +1.16% +1.11%] index_copy_ reverse : Elapsed 0.021 ms (2.074 ms / 100) 2.143 -> 2.152 ( +0.42%) [ +0.33% +0.19% +0.00% / +0.42% +1.59% +1.54%] index_add_ spread : Elapsed 0.021 ms (2.150 ms / 100) 2.166 -> 2.169 ( +0.14%) [ +0.05% +0.28% +0.00% / +0.14% +1.99% +1.99%] index_copy_ spread : Elapsed 0.022 ms (2.167 ms / 100) 2.145 -> 2.145 ( +0.00%) [ +0.00% +0.14% +0.33% / +0.00% +0.79% +1.07%] index_add_ strided 3 : Elapsed 0.021 ms (2.145 ms / 100) 2.146 -> 2.149 ( +0.14%) [ +0.05% +0.05% +0.00% / +0.14% +1.21% +1.26%] index_copy_ strided 3 : Elapsed 0.021 ms (2.147 ms / 100) 2.153 -> 2.150 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.60% +0.98%] index_add_ strided 7 : Elapsed 0.022 ms (2.153 ms / 100) 2.176 -> 2.179 ( +0.14%) [ +0.00% +0.18% +0.23% / +0.14% +1.33% +1.70%] index_copy_ strided 7 : Elapsed 0.022 ms (2.176 ms / 100) 2.151 -> 2.148 ( -0.14%) [ +0.05% +0.00% +0.05% / -0.14% +1.02% +0.98%] index_add_ perm : Elapsed 0.022 ms (2.152 ms / 100) 2.174 -> 2.168 ( -0.28%) [ +0.00% +0.14% +0.05% / -0.28% +1.20% +1.20%] index_copy_ perm : Elapsed 0.022 ms (2.174 ms / 100) 2.148 -> 2.157 ( +0.42%) [ +0.37% +0.19% +0.00% / +0.42% +1.21% +1.35%] index_add_ perm_sorted : Elapsed 0.022 ms (2.156 ms / 100) 2.171 -> 2.182 ( +0.51%) [ +0.00% +0.46% +0.37% / +0.51% +1.38% +1.52%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.171 ms / 100) 9.252 -> 9.249 ( -0.03%) [ +0.06% +0.00% +0.08% / -0.03% +0.23% +0.21%] index_select const : Elapsed 0.093 ms (9.258 ms / 100) 9.300 -> 9.323 ( +0.25%) [ +0.27% +0.05% +0.00% / +0.25% +0.37% +0.34%] index_select wrap : Elapsed 0.093 ms (9.325 ms / 100) 9.287 -> 9.287 ( +0.00%) [ +0.14% +0.09% +0.00% / +0.00% +0.17% +0.37%] index_select linear : Elapsed 0.093 ms (9.300 ms / 100) 9.284 -> 9.294 ( +0.11%) [ +0.26% +0.00% +0.04% / +0.13% +0.12% +0.11%] index_select reverse : Elapsed 0.093 ms (9.308 ms / 100) 9.238 -> 9.263 ( +0.27%) [ +0.35% +0.15% +0.00% / +0.27% +0.44% +0.43%] index_select skip64 : Elapsed 0.093 ms (9.270 ms / 100) 9.260 -> 9.254 ( -0.06%) [ +0.05% +0.00% +0.43% / -0.06% +0.17% +0.17%] index_select skip256 : Elapsed 0.093 ms (9.265 ms / 100) 9.285 -> 9.305 ( +0.22%) [ +0.11% +0.00% +0.17% / +0.27% +0.22% +0.24%] index_select spread : Elapsed 0.093 ms (9.295 ms / 100) 9.307 -> 9.315 ( +0.09%) [ +0.00% +0.15% +0.15% / +0.19% +0.38% +0.09%] index_select strided 3 : Elapsed 0.093 ms (9.307 ms / 100) 9.305 -> 9.303 ( -0.02%) [ +0.00% +0.16% +0.15% / -0.02% +0.20% +0.42%] index_select random : Elapsed 0.093 ms (9.305 ms / 100) 9.286 -> 9.292 ( +0.06%) [ +0.20% +0.00% +0.29% / +0.06% +0.48% +0.37%] index_select random_sorted : Elapsed 0.093 ms (9.305 ms / 100) B = [16, 40, 5, 20] (stride (4000, 5, 1, 200)) A = [16, 40, 5, 4] (stride (20, 320, 4, 1)) dim = 3 2.021 -> 2.027 ( +0.30%) [ +0.30% +0.20% +0.00% / +0.30% +0.64% +0.40%] index_add_ linear : Elapsed 0.020 ms (2.027 ms / 100) 1.966 -> 1.971 ( +0.25%) [ +0.10% +0.31% +0.00% / +0.25% +0.36% +0.41%] index_copy_ linear : Elapsed 0.020 ms (1.968 ms / 100) 2.022 -> 2.024 ( +0.10%) [ +0.20% +0.00% +0.00% / +0.10% +0.54% +0.30%] index_add_ reverse : Elapsed 0.020 ms (2.026 ms / 100) 1.969 -> 1.971 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.36% +0.15%] index_copy_ reverse : Elapsed 0.020 ms (1.969 ms / 100) 2.014 -> 2.019 ( +0.25%) [ +0.20% +0.20% +0.00% / +0.25% +0.30% +0.74%] index_add_ spread : Elapsed 0.020 ms (2.018 ms / 100) 1.964 -> 1.971 ( +0.36%) [ +0.10% +0.00% +0.10% / +0.36% +0.36% +0.36%] index_copy_ spread : Elapsed 0.020 ms (1.966 ms / 100) 2.018 -> 2.022 ( +0.20%) [ +0.45% +0.00% +0.20% / +0.20% +0.59% +0.40%] index_add_ strided 3 : Elapsed 0.020 ms (2.027 ms / 100) 1.964 -> 1.964 ( +0.00%) [ +0.46% +0.10% +0.00% / +0.00% +0.61% +0.56%] index_copy_ strided 3 : Elapsed 0.020 ms (1.973 ms / 100) 2.023 -> 2.024 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.44% +0.44%] index_add_ strided 7 : Elapsed 0.020 ms (2.023 ms / 100) 1.967 -> 1.968 ( +0.05%) [ +0.10% +0.00% +0.20% / +0.05% +0.36% +0.66%] index_copy_ strided 7 : Elapsed 0.020 ms (1.969 ms / 100) 2.014 -> 2.018 ( +0.20%) [ +0.00% +0.35% +0.25% / +0.20% +0.55% +0.45%] index_add_ perm : Elapsed 0.020 ms (2.014 ms / 100) 1.961 -> 1.967 ( +0.31%) [ +0.15% +0.41% +0.00% / +0.31% +0.61% +0.66%] index_copy_ perm : Elapsed 0.020 ms (1.964 ms / 100) 2.023 -> 2.025 ( +0.10%) [ +0.00% +0.00% +0.05% / +0.10% +0.79% +0.30%] index_add_ perm_sorted : Elapsed 0.020 ms (2.023 ms / 100) 1.967 -> 1.972 ( +0.25%) [ +0.20% +0.10% +0.00% / +0.25% +0.56% +0.46%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.971 ms / 100) 8.769 -> 8.756 ( -0.15%) [ +0.02% +0.00% +0.13% / -0.02% -0.15% +0.01%] index_select const : Elapsed 0.088 ms (8.771 ms / 100) 8.762 -> 8.769 ( +0.08%) [ +0.30% +0.19% +0.00% / +0.11% +0.16% +0.08%] index_select wrap : Elapsed 0.088 ms (8.788 ms / 100) 8.760 -> 8.755 ( -0.06%) [ +0.17% +0.00% +0.07% / +0.11% -0.01% -0.06%] index_select linear : Elapsed 0.088 ms (8.775 ms / 100) 8.776 -> 8.754 ( -0.25%) [ +0.01% +0.09% +0.00% / -0.25% +0.25% +0.06%] index_select reverse : Elapsed 0.088 ms (8.777 ms / 100) 8.762 -> 8.759 ( -0.03%) [ +0.23% +0.00% +0.02% / -0.03% +0.14% +0.21%] index_select skip64 : Elapsed 0.088 ms (8.782 ms / 100) 8.759 -> 8.760 ( +0.01%) [ +0.00% +0.10% +0.18% / +0.08% +0.01% +0.09%] index_select skip256 : Elapsed 0.088 ms (8.759 ms / 100) 8.764 -> 8.771 ( +0.08%) [ +0.22% +0.00% +0.22% / +0.15% +0.08% +0.09%] index_select spread : Elapsed 0.088 ms (8.783 ms / 100) 8.762 -> 8.770 ( +0.09%) [ +0.15% +0.00% +0.11% / +0.09% +0.18% +0.10%] index_select strided 3 : Elapsed 0.088 ms (8.775 ms / 100) 8.751 -> 8.767 ( +0.18%) [ +0.08% +0.00% +0.16% / +0.22% +0.18% +0.31%] index_select random : Elapsed 0.088 ms (8.758 ms / 100) 8.761 -> 8.754 ( -0.08%) [ +0.25% +0.06% +0.00% / +0.22% -0.08% +0.10%] index_select random_sorted : Elapsed 0.088 ms (8.783 ms / 100) B = [16, 40, 5, 20] (stride (200, 1, 40, 3200)) A = [16, 40, 5, 4] (stride (800, 1, 160, 40)) dim = 3 2.094 -> 2.094 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +1.34% +1.29%] index_add_ linear : Elapsed 0.021 ms (2.094 ms / 100) 2.047 -> 2.047 ( +0.00%) [ +0.15% +0.05% +0.00% / +0.00% +1.47% +1.27%] index_copy_ linear : Elapsed 0.020 ms (2.050 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.00% +0.10% +0.14% / +0.05% +1.63% +1.77%] index_add_ reverse : Elapsed 0.021 ms (2.089 ms / 100) 2.040 -> 2.044 ( +0.20%) [ +0.15% +0.15% +0.00% / +0.20% +1.91% +1.67%] index_copy_ reverse : Elapsed 0.020 ms (2.043 ms / 100) 2.093 -> 2.097 ( +0.19%) [ +0.00% +0.38% +0.57% / +0.19% +1.58% +1.15%] index_add_ spread : Elapsed 0.021 ms (2.093 ms / 100) 2.047 -> 2.047 ( +0.00%) [ +0.00% +0.05% +0.29% / +0.00% +1.56% +1.32%] index_copy_ spread : Elapsed 0.020 ms (2.047 ms / 100) 2.095 -> 2.095 ( +0.00%) [ +0.29% +0.00% +0.10% / +0.00% +1.19% +1.81%] index_add_ strided 3 : Elapsed 0.021 ms (2.101 ms / 100) 2.050 -> 2.050 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +1.22% +1.41%] index_copy_ strided 3 : Elapsed 0.021 ms (2.051 ms / 100) 2.098 -> 2.103 ( +0.24%) [ +0.00% +0.05% +0.10% / +0.24% +1.72% +1.86%] index_add_ strided 7 : Elapsed 0.021 ms (2.098 ms / 100) 2.051 -> 2.052 ( +0.05%) [ +0.20% +0.00% +0.10% / +0.05% +1.61% +1.51%] index_copy_ strided 7 : Elapsed 0.021 ms (2.055 ms / 100) 2.094 -> 2.097 ( +0.14%) [ +0.33% +0.05% +0.00% / +0.14% +1.00% +1.29%] index_add_ perm : Elapsed 0.021 ms (2.101 ms / 100) 2.051 -> 2.050 ( -0.05%) [ +0.24% +0.00% +0.10% / -0.05% +1.17% +1.07%] index_copy_ perm : Elapsed 0.021 ms (2.056 ms / 100) 2.104 -> 2.109 ( +0.24%) [ +0.05% +0.05% +0.00% / +0.24% +1.38% +1.05%] index_add_ perm_sorted : Elapsed 0.021 ms (2.105 ms / 100) 2.057 -> 2.060 ( +0.15%) [ +0.15% +0.00% +0.19% / +0.15% +1.12% +1.07%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.060 ms / 100) 9.144 -> 9.144 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.15% +0.28% +0.00%] index_select const : Elapsed 0.092 ms (9.156 ms / 100) 9.211 -> 9.215 ( +0.04%) [ +0.17% +0.35% +0.00% / +0.11% +0.20% +0.04%] index_select wrap : Elapsed 0.092 ms (9.227 ms / 100) 9.191 -> 9.181 ( -0.11%) [ +0.00% +0.18% +0.07% / +0.03% -0.11% +0.20%] index_select linear : Elapsed 0.092 ms (9.191 ms / 100) 9.180 -> 9.170 ( -0.11%) [ +0.03% +0.09% +0.00% / +0.24% -0.01% -0.11%] index_select reverse : Elapsed 0.092 ms (9.183 ms / 100) 9.140 -> 9.151 ( +0.12%) [ +0.10% +0.00% +0.03% / +0.12% +0.13% +0.46%] index_select skip64 : Elapsed 0.091 ms (9.149 ms / 100) 9.128 -> 9.145 ( +0.19%) [ +0.19% +0.00% +0.21% / +0.21% +0.19% +0.21%] index_select skip256 : Elapsed 0.091 ms (9.145 ms / 100) 9.201 -> 9.206 ( +0.05%) [ +0.15% +0.14% +0.00% / +0.05% +0.17% +0.33%] index_select spread : Elapsed 0.092 ms (9.215 ms / 100) 9.193 -> 9.194 ( +0.01%) [ +0.10% +0.15% +0.00% / +0.08% +0.01% +0.09%] index_select strided 3 : Elapsed 0.092 ms (9.202 ms / 100) 9.198 -> 9.205 ( +0.08%) [ +0.00% +0.11% +0.38% / +0.08% +0.22% +0.10%] index_select random : Elapsed 0.092 ms (9.198 ms / 100) 9.208 -> 9.190 ( -0.20%) [ +0.00% +0.04% +0.08% / +0.05% +0.14% -0.20%] index_select random_sorted : Elapsed 0.092 ms (9.208 ms / 100) B = [16, 40, 5, 20] (stride (1, 80, 16, 3200)) A = [16, 40, 5, 4] (stride (1, 320, 64, 16)) dim = 3 0.800 -> 0.800 ( +0.00%) [ +0.00% +0.38% +0.00% / +0.00% +0.75% +0.75%] index_add_ linear : Elapsed 0.008 ms (0.800 ms / 100) 0.785 -> 0.788 ( +0.38%) [ +0.51% +0.51% +0.00% / +0.76% +1.27% +0.38%] index_copy_ linear : Elapsed 0.008 ms (0.789 ms / 100) 0.806 -> 0.806 ( +0.00%) [ +0.00% +0.37% +0.12% / +0.00% +0.00% +0.25%] index_add_ reverse : Elapsed 0.008 ms (0.806 ms / 100) 0.788 -> 0.792 ( +0.51%) [ +0.89% +0.63% +0.00% / +0.76% +0.63% +0.51%] index_copy_ reverse : Elapsed 0.008 ms (0.795 ms / 100) 0.816 -> 0.801 ( -1.84%) [ +0.25% +0.00% +0.25% / +0.00% -1.84% -1.59%] index_add_ spread : Elapsed 0.008 ms (0.818 ms / 100) 0.798 -> 0.792 ( -0.75%) [ +0.50% +0.00% +0.00% / +0.50% -0.25% -0.75%] index_copy_ spread : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.797 ( -0.62%) [ +0.25% +0.00% +0.25% / +0.00% -0.62% -0.37%] index_add_ strided 3 : Elapsed 0.008 ms (0.804 ms / 100) 0.791 -> 0.786 ( -0.63%) [ +0.38% +0.00% +0.38% / -0.38% -0.63% +0.51%] index_copy_ strided 3 : Elapsed 0.008 ms (0.794 ms / 100) 0.798 -> 0.799 ( +0.13%) [ +0.00% +0.00% +0.50% / +0.13% +1.50% +1.13%] index_add_ strided 7 : Elapsed 0.008 ms (0.798 ms / 100) 0.787 -> 0.792 ( +0.64%) [ +0.00% +0.25% +0.89% / +0.64% +0.89% +0.76%] index_copy_ strided 7 : Elapsed 0.008 ms (0.787 ms / 100) 0.798 -> 0.798 ( +0.00%) [ +0.50% +0.50% +0.00% / +0.00% +1.75% +1.63%] index_add_ perm : Elapsed 0.008 ms (0.802 ms / 100) 0.785 -> 0.795 ( +1.27%) [ +0.00% +1.27% +1.02% / +1.27% +2.17% +1.66%] index_copy_ perm : Elapsed 0.008 ms (0.785 ms / 100) 0.798 -> 0.801 ( +0.38%) [ +0.25% +0.00% +0.00% / +0.38% +1.63% +1.75%] index_add_ perm_sorted : Elapsed 0.008 ms (0.800 ms / 100) 0.786 -> 0.795 ( +1.15%) [ +1.15% +0.76% +0.00% / +1.15% +1.65% +1.53%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.795 ms / 100) 5.074 -> 5.066 ( -0.16%) [ +0.08% +0.02% +0.00% / -0.16% +0.26% +0.26%] index_select const : Elapsed 0.051 ms (5.078 ms / 100) 5.108 -> 5.105 ( -0.06%) [ +0.00% +0.02% +0.06% / -0.06% +0.20% +0.18%] index_select wrap : Elapsed 0.051 ms (5.108 ms / 100) 5.094 -> 5.100 ( +0.12%) [ +0.39% +0.00% +0.27% / +0.12% +0.26% +0.57%] index_select linear : Elapsed 0.051 ms (5.114 ms / 100) 5.089 -> 5.089 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.00% +0.35% +0.37%] index_select reverse : Elapsed 0.051 ms (5.097 ms / 100) 5.069 -> 5.077 ( +0.16%) [ +0.00% +0.16% +0.06% / +0.16% +0.24% +0.28%] index_select skip64 : Elapsed 0.051 ms (5.069 ms / 100) 5.062 -> 5.070 ( +0.16%) [ +0.30% +0.00% +0.06% / +0.16% +0.47% +0.45%] index_select skip256 : Elapsed 0.051 ms (5.077 ms / 100) 5.089 -> 5.106 ( +0.33%) [ +0.28% +0.20% +0.00% / +0.33% +0.71% +0.59%] index_select spread : Elapsed 0.051 ms (5.103 ms / 100) 5.093 -> 5.105 ( +0.24%) [ +0.14% +0.27% +0.00% / +0.39% +0.43% +0.24%] index_select strided 3 : Elapsed 0.051 ms (5.100 ms / 100) 5.104 -> 5.107 ( +0.06%) [ +0.00% +0.02% +0.10% / +0.37% +0.29% +0.06%] index_select random : Elapsed 0.051 ms (5.104 ms / 100) 5.095 -> 5.099 ( +0.08%) [ +0.18% +0.00% +0.18% / +0.08% +0.41% +0.47%] index_select random_sorted : Elapsed 0.051 ms (5.104 ms / 100) out_shape = [20, 4, 5, 16] in_shape = [40, 4, 5, 16] idx_dim = 0 B = [20, 4, 5, 16] (stride (320, 80, 1, 5)) A = [40, 4, 5, 16] (stride (5, 3200, 1, 200)) dim = 0 2.447 -> 2.444 ( -0.12%) [ +0.12% +0.00% +0.00% / -0.12% +0.16% +0.20%] index_select const : Elapsed 0.024 ms (2.450 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.04% +0.24%] index_select wrap : Elapsed 0.025 ms (2.464 ms / 100) 2.461 -> 2.462 ( +0.04%) [ +0.00% +0.12% +0.04% / +0.24% +0.20% +0.04%] index_select linear : Elapsed 0.025 ms (2.461 ms / 100) 2.463 -> 2.463 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.08% +0.00% +0.04%] index_select reverse : Elapsed 0.025 ms (2.464 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.12% +0.00% +0.16%] index_select skip64 : Elapsed 0.025 ms (2.450 ms / 100) 2.446 -> 2.449 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.29% +0.41%] index_select skip256 : Elapsed 0.024 ms (2.446 ms / 100) 2.470 -> 2.473 ( +0.12%) [ +0.00% +0.08% +0.12% / +0.12% +0.24% +0.24%] index_select spread : Elapsed 0.025 ms (2.470 ms / 100) 2.471 -> 2.470 ( -0.04%) [ +0.16% +0.00% +0.12% / +0.12% +0.04% -0.04%] index_select strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.461 -> 2.460 ( -0.04%) [ +0.37% +0.33% +0.00% / +0.12% +0.24% -0.04%] index_select strided 5 : Elapsed 0.025 ms (2.470 ms / 100) 2.467 -> 2.470 ( +0.12%) [ +0.12% +0.00% +0.24% / +0.12% +0.12% +0.32%] index_select strided 7 : Elapsed 0.025 ms (2.470 ms / 100) 2.453 -> 2.452 ( -0.04%) [ +0.00% +0.12% +0.04% / +0.04% -0.04% +0.24%] index_select strided 8 : Elapsed 0.025 ms (2.453 ms / 100) 2.452 -> 2.455 ( +0.12%) [ +0.04% +0.00% +0.12% / +0.24% +0.24% +0.12%] index_select strided 16 : Elapsed 0.025 ms (2.453 ms / 100) 2.470 -> 2.465 ( -0.20%) [ +0.04% +0.04% +0.00% / -0.20% -0.08% +0.00%] index_select random : Elapsed 0.025 ms (2.471 ms / 100) 2.468 -> 2.468 ( +0.00%) [ +0.12% +0.00% +0.12% / +0.08% +0.16% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.471 ms / 100) 2.466 -> 2.467 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.20% +0.12% +0.04%] index_select perm : Elapsed 0.025 ms (2.468 ms / 100) 2.467 -> 2.459 ( -0.32%) [ +0.20% +0.24% +0.00% / +0.28% -0.32% -0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) B = [20, 4, 5, 16] (stride (320, 1, 4, 20)) A = [40, 4, 5, 16] (stride (16, 3200, 640, 1)) dim = 0 2.403 -> 2.407 ( +0.17%) [ +0.12% +0.00% +0.04% / +0.21% +0.37% +0.17%] index_select const : Elapsed 0.024 ms (2.406 ms / 100) 2.422 -> 2.410 ( -0.50%) [ +0.08% +0.00% +0.08% / +0.04% -0.50% -0.29%] index_select wrap : Elapsed 0.024 ms (2.424 ms / 100) 2.421 -> 2.414 ( -0.29%) [ +0.00% +0.00% +0.21% / +0.04% -0.29% -0.29%] index_select linear : Elapsed 0.024 ms (2.421 ms / 100) 2.418 -> 2.416 ( -0.08%) [ +0.12% +0.17% +0.00% / +0.00% -0.08% -0.04%] index_select reverse : Elapsed 0.024 ms (2.421 ms / 100) 2.404 -> 2.408 ( +0.17%) [ +0.25% +0.29% +0.00% / +0.17% +0.37% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.410 ms / 100) 2.405 -> 2.409 ( +0.17%) [ +0.04% +0.00% +0.00% / +0.17% +0.42% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.406 ms / 100) 2.416 -> 2.418 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.21% +0.08% +0.21%] index_select spread : Elapsed 0.024 ms (2.416 ms / 100) 2.415 -> 2.420 ( +0.21%) [ +0.21% +0.08% +0.00% / +0.21% +0.41% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.420 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.00% +0.12% +0.17% / +0.04% +0.17% +0.33%] index_select strided 5 : Elapsed 0.024 ms (2.410 ms / 100) 2.419 -> 2.417 ( -0.08%) [ +0.04% +0.08% +0.00% / +0.17% -0.08% +0.04%] index_select strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.412 -> 2.411 ( -0.04%) [ +0.21% +0.00% +0.12% / -0.04% +0.08% -0.04%] index_select strided 8 : Elapsed 0.024 ms (2.417 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.00% +0.21% +0.04% / +0.08% +0.29% +0.21%] index_select strided 16 : Elapsed 0.024 ms (2.410 ms / 100) 2.415 -> 2.421 ( +0.25%) [ +0.00% +0.08% +0.04% / +0.25% +0.37% +0.29%] index_select random : Elapsed 0.024 ms (2.415 ms / 100) 2.418 -> 2.416 ( -0.08%) [ +0.00% +0.04% +0.08% / +0.04% +0.00% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.418 ms / 100) 2.419 -> 2.420 ( +0.04%) [ +0.00% +0.17% +0.00% / +0.08% +0.04% +0.12%] index_select perm : Elapsed 0.024 ms (2.419 ms / 100) 2.419 -> 2.419 ( +0.00%) [ +0.00% +0.21% +0.08% / +0.25% +0.04% +0.00%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [20, 4, 5, 16] (stride (1, 1600, 320, 20)) A = [40, 4, 5, 16] (stride (5, 200, 1, 800)) dim = 0 2.342 -> 2.348 ( +0.26%) [ +0.43% +0.00% +0.17% / +0.34% +0.51% +0.26%] index_select const : Elapsed 0.024 ms (2.352 ms / 100) 2.362 -> 2.361 ( -0.04%) [ +0.00% +0.04% +0.00% / +0.00% -0.04% +0.00%] index_select wrap : Elapsed 0.024 ms (2.362 ms / 100) 2.357 -> 2.361 ( +0.17%) [ +0.13% +0.21% +0.00% / +0.17% +0.30% +0.25%] index_select linear : Elapsed 0.024 ms (2.360 ms / 100) 2.358 -> 2.360 ( +0.08%) [ +0.13% +0.00% +0.13% / +0.08% +0.17% +0.13%] index_select reverse : Elapsed 0.024 ms (2.361 ms / 100) 2.347 -> 2.345 ( -0.09%) [ +0.00% +0.09% +0.13% / +0.09% +0.09% -0.09%] index_select skip64 : Elapsed 0.023 ms (2.347 ms / 100) 2.344 -> 2.348 ( +0.17%) [ +0.30% +0.13% +0.00% / +0.26% +0.26% +0.17%] index_select skip256 : Elapsed 0.024 ms (2.351 ms / 100) 2.368 -> 2.372 ( +0.17%) [ +0.08% +0.04% +0.00% / +0.17% +0.21% +0.34%] index_select spread : Elapsed 0.024 ms (2.370 ms / 100) 2.367 -> 2.369 ( +0.08%) [ +0.00% +0.13% +0.25% / +0.38% +0.08% +0.17%] index_select strided 3 : Elapsed 0.024 ms (2.367 ms / 100) 2.359 -> 2.361 ( +0.08%) [ +0.00% +0.08% +0.13% / +0.08% +0.13% +0.08%] index_select strided 5 : Elapsed 0.024 ms (2.359 ms / 100) 2.367 -> 2.364 ( -0.13%) [ +0.17% +0.00% +0.00% / -0.13% +0.21% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.371 ms / 100) 2.348 -> 2.350 ( +0.09%) [ +0.47% +0.21% +0.00% / +0.09% +0.17% +0.30%] index_select strided 8 : Elapsed 0.024 ms (2.359 ms / 100) 2.353 -> 2.353 ( +0.00%) [ +0.08% +0.04% +0.00% / +0.04% +0.00% +0.00%] index_select strided 16 : Elapsed 0.024 ms (2.355 ms / 100) 2.364 -> 2.363 ( -0.04%) [ +0.04% +0.21% +0.00% / +0.08% -0.04% +0.17%] index_select random : Elapsed 0.024 ms (2.365 ms / 100) 2.364 -> 2.365 ( +0.04%) [ +0.00% +0.08% +0.13% / +0.13% +0.04% +0.13%] index_select random_sorted : Elapsed 0.024 ms (2.364 ms / 100) 2.365 -> 2.364 ( -0.04%) [ +0.00% +0.25% +0.13% / +0.08% +0.08% -0.04%] index_select perm : Elapsed 0.024 ms (2.365 ms / 100) 2.367 -> 2.361 ( -0.25%) [ +0.04% +0.00% +0.13% / +0.08% -0.21% -0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.368 ms / 100) B = [20, 4, 5, 16] (stride (1, 1600, 20, 100)) A = [40, 4, 5, 16] (stride (320, 5, 1, 20)) dim = 0 2.404 -> 2.407 ( +0.12%) [ +0.00% +0.08% +0.00% / +0.17% +0.12% +0.21%] index_select const : Elapsed 0.024 ms (2.404 ms / 100) 2.415 -> 2.409 ( -0.25%) [ +0.17% +0.00% +0.17% / +0.17% -0.21% -0.25%] index_select wrap : Elapsed 0.024 ms (2.419 ms / 100) 2.415 -> 2.412 ( -0.12%) [ +0.04% +0.00% +0.17% / +0.04% -0.12% -0.04%] index_select linear : Elapsed 0.024 ms (2.416 ms / 100) 2.415 -> 2.414 ( -0.04%) [ +0.12% +0.00% +0.00% / +0.04% +0.04% -0.04%] index_select reverse : Elapsed 0.024 ms (2.418 ms / 100) 2.409 -> 2.406 ( -0.12%) [ +0.08% +0.12% +0.00% / -0.04% -0.04% -0.12%] index_select skip64 : Elapsed 0.024 ms (2.411 ms / 100) 2.405 -> 2.408 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.21% +0.21%] index_select skip256 : Elapsed 0.024 ms (2.408 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.29% +0.00% +0.08% / +0.08% +0.41% +0.41%] index_select spread : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.413 ( +0.08%) [ +0.17% +0.12% +0.00% / +0.08% +0.25% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.415 ms / 100) 2.407 -> 2.408 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.29% +0.21%] index_select strided 5 : Elapsed 0.024 ms (2.409 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.29% +0.12% +0.00% / +0.04% +0.08% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.410 -> 2.408 ( -0.08%) [ +0.00% +0.00% +0.04% / -0.04% -0.08% +0.04%] index_select strided 8 : Elapsed 0.024 ms (2.410 ms / 100) 2.408 -> 2.410 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.08% +0.12% +0.12%] index_select strided 16 : Elapsed 0.024 ms (2.408 ms / 100) 2.413 -> 2.409 ( -0.17%) [ +0.08% +0.00% +0.00% / -0.17% +0.08% +0.04%] index_select random : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.412 ( -0.08%) [ +0.12% +0.00% +0.04% / -0.08% -0.04% +0.00%] index_select random_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.414 -> 2.410 ( -0.17%) [ +0.04% +0.00% +0.21% / +0.21% +0.04% -0.17%] index_select perm : Elapsed 0.024 ms (2.415 ms / 100) 2.413 -> 2.413 ( +0.00%) [ +0.17% +0.00% +0.04% / +0.00% +0.17% +0.21%] index_select perm_sorted : Elapsed 0.024 ms (2.417 ms / 100) B = [20, 4, 5, 16] (stride (64, 1, 1280, 4)) A = [40, 4, 5, 16] (stride (1, 40, 2560, 160)) dim = 0 2.410 -> 2.408 ( -0.08%) [ +0.00% +0.00% +0.17% / -0.08% +0.25% +0.29%] index_select const : Elapsed 0.024 ms (2.410 ms / 100) 2.421 -> 2.421 ( +0.00%) [ +0.00% +0.29% +0.00% / +0.00% +0.04% +0.04%] index_select wrap : Elapsed 0.024 ms (2.421 ms / 100) 2.420 -> 2.419 ( -0.04%) [ +0.12% +0.12% +0.00% / +0.04% -0.04% +0.04%] index_select linear : Elapsed 0.024 ms (2.423 ms / 100) 2.418 -> 2.419 ( +0.04%) [ +0.17% +0.04% +0.00% / +0.21% +0.04% +0.17%] index_select reverse : Elapsed 0.024 ms (2.422 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.08% +0.25%] index_select skip64 : Elapsed 0.024 ms (2.411 ms / 100) 2.410 -> 2.410 ( +0.00%) [ +0.17% +0.00% +0.25% / +0.00% +0.37% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.414 ms / 100) 2.425 -> 2.426 ( +0.04%) [ +0.21% +0.21% +0.00% / +0.04% +0.21% +0.25%] index_select spread : Elapsed 0.024 ms (2.430 ms / 100) 2.428 -> 2.427 ( -0.04%) [ +0.08% +0.00% +0.08% / +0.08% +0.00% -0.04%] index_select strided 3 : Elapsed 0.024 ms (2.430 ms / 100) 2.427 -> 2.426 ( -0.04%) [ +0.29% +0.00% +0.21% / +0.08% -0.04% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.434 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.21% +0.33%] index_select strided 7 : Elapsed 0.024 ms (2.427 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.00% +0.17% +0.29% / +0.08% +0.33% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.422 ms / 100) 2.424 -> 2.427 ( +0.12%) [ +0.00% +0.17% +0.08% / +0.12% +0.21% +0.41%] index_select strided 16 : Elapsed 0.024 ms (2.424 ms / 100) 2.427 -> 2.428 ( +0.04%) [ +0.00% +0.12% +0.21% / +0.04% +0.21% +0.04%] index_select random : Elapsed 0.024 ms (2.427 ms / 100) 2.426 -> 2.427 ( +0.04%) [ +0.08% +0.00% +0.25% / +0.04% +0.04% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.428 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.16% +0.12% +0.12%] index_select perm : Elapsed 0.024 ms (2.429 ms / 100) 2.430 -> 2.424 ( -0.25%) [ +0.00% +0.00% +0.00% / -0.04% -0.25% -0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.430 ms / 100) out_shape = [40, 20, 5, 16] in_shape = [40, 4, 5, 16] idx_dim = 1 B = [40, 20, 5, 16] (stride (1600, 80, 16, 1)) A = [40, 4, 5, 16] (stride (320, 16, 64, 1)) dim = 1 1.959 -> 1.962 ( +0.15%) [ +0.00% +0.05% +0.05% / +0.15% +1.07% +1.33%] index_add_ linear : Elapsed 0.020 ms (1.959 ms / 100) 1.914 -> 1.919 ( +0.26%) [ +0.10% +0.00% +0.00% / +0.26% +1.62% +1.67%] index_copy_ linear : Elapsed 0.019 ms (1.916 ms / 100) 1.955 -> 1.960 ( +0.26%) [ +0.31% +0.00% +0.10% / +0.26% +1.43% +1.33%] index_add_ reverse : Elapsed 0.020 ms (1.961 ms / 100) 1.911 -> 1.920 ( +0.47%) [ +0.00% +0.31% +0.16% / +0.47% +1.94% +1.99%] index_copy_ reverse : Elapsed 0.019 ms (1.911 ms / 100) 1.961 -> 1.964 ( +0.15%) [ +0.10% +0.00% +0.00% / +0.15% +1.02% +0.97%] index_add_ spread : Elapsed 0.020 ms (1.963 ms / 100) 1.916 -> 1.921 ( +0.26%) [ +0.00% +0.47% +0.31% / +0.26% +1.77% +1.77%] index_copy_ spread : Elapsed 0.019 ms (1.916 ms / 100) 1.961 -> 1.958 ( -0.15%) [ +0.15% +0.25% +0.00% / -0.15% +1.02% +1.17%] index_add_ strided 3 : Elapsed 0.020 ms (1.964 ms / 100) 1.923 -> 1.922 ( -0.05%) [ +0.26% +0.16% +0.00% / -0.05% +1.35% +1.14%] index_copy_ strided 3 : Elapsed 0.019 ms (1.928 ms / 100) 1.962 -> 1.967 ( +0.25%) [ +0.05% +0.00% +0.25% / +0.25% +0.97% +1.12%] index_add_ strided 7 : Elapsed 0.020 ms (1.963 ms / 100) 1.920 -> 1.925 ( +0.26%) [ +0.63% +0.21% +0.00% / +0.26% +1.67% +1.61%] index_copy_ strided 7 : Elapsed 0.019 ms (1.932 ms / 100) 1.962 -> 1.965 ( +0.15%) [ +0.00% +0.31% +0.25% / +0.15% +1.17% +1.27%] index_add_ perm : Elapsed 0.020 ms (1.962 ms / 100) 1.922 -> 1.920 ( -0.10%) [ +0.00% +0.10% +0.16% / -0.10% +1.56% +1.35%] index_copy_ perm : Elapsed 0.019 ms (1.922 ms / 100) 1.961 -> 1.966 ( +0.25%) [ +0.00% +0.05% +0.15% / +0.25% +1.17% +0.87%] index_add_ perm_sorted : Elapsed 0.020 ms (1.961 ms / 100) 1.918 -> 1.924 ( +0.31%) [ +0.00% +0.47% +0.16% / +0.31% +1.88% +1.77%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.918 ms / 100) 8.688 -> 8.688 ( +0.00%) [ +0.00% +0.28% +0.12% / +0.00% +0.24% +0.23%] index_select const : Elapsed 0.087 ms (8.688 ms / 100) 8.734 -> 8.741 ( +0.08%) [ +0.00% +0.11% +0.01% / +0.08% +0.23% +0.46%] index_select wrap : Elapsed 0.087 ms (8.734 ms / 100) 8.712 -> 8.726 ( +0.16%) [ +0.00% +0.08% +0.07% / +0.22% +0.36% +0.16%] index_select linear : Elapsed 0.087 ms (8.712 ms / 100) 8.724 -> 8.714 ( -0.11%) [ +0.00% +0.15% +0.07% / -0.11% +0.28% +0.05%] index_select reverse : Elapsed 0.087 ms (8.724 ms / 100) 8.690 -> 8.702 ( +0.14%) [ +0.14% +0.00% +0.06% / +0.14% +0.35% +0.15%] index_select skip64 : Elapsed 0.087 ms (8.702 ms / 100) 8.683 -> 8.683 ( +0.00%) [ +0.29% +0.24% +0.00% / +0.00% +0.35% +0.20%] index_select skip256 : Elapsed 0.087 ms (8.708 ms / 100) 8.725 -> 8.721 ( -0.05%) [ +0.14% +0.00% +0.10% / -0.05% +0.44% +0.31%] index_select spread : Elapsed 0.087 ms (8.737 ms / 100) 8.735 -> 8.740 ( +0.06%) [ +0.02% +0.00% +0.01% / +0.06% +0.16% +0.27%] index_select strided 3 : Elapsed 0.087 ms (8.737 ms / 100) 8.720 -> 8.734 ( +0.16%) [ +0.00% +0.19% +0.13% / +0.16% +0.30% +0.22%] index_select random : Elapsed 0.087 ms (8.720 ms / 100) 8.724 -> 8.730 ( +0.07%) [ +0.03% +0.05% +0.00% / +0.07% +0.31% +0.34%] index_select random_sorted : Elapsed 0.087 ms (8.727 ms / 100) B = [40, 20, 5, 16] (stride (1600, 80, 16, 1)) A = [40, 4, 5, 16] (stride (64, 1, 2560, 4)) dim = 1 1.960 -> 1.959 ( -0.05%) [ +0.20% +0.00% +0.05% / -0.05% +0.46% +0.77%] index_add_ linear : Elapsed 0.020 ms (1.964 ms / 100) 1.912 -> 1.912 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.00% +0.52% +0.94%] index_copy_ linear : Elapsed 0.019 ms (1.913 ms / 100) 1.961 -> 1.962 ( +0.05%) [ +0.10% +0.00% +0.10% / +0.05% +0.31% +0.56%] index_add_ reverse : Elapsed 0.020 ms (1.963 ms / 100) 1.913 -> 1.917 ( +0.21%) [ +0.26% +0.00% +0.21% / +0.21% +0.68% +0.63%] index_copy_ reverse : Elapsed 0.019 ms (1.918 ms / 100) 1.949 -> 1.955 ( +0.31%) [ +0.10% +0.00% +0.05% / +0.31% +0.62% +0.56%] index_add_ spread : Elapsed 0.020 ms (1.951 ms / 100) 1.905 -> 1.908 ( +0.16%) [ +0.10% +0.00% +0.00% / +0.16% +0.79% +0.58%] index_copy_ spread : Elapsed 0.019 ms (1.907 ms / 100) 1.965 -> 1.967 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.66% +0.56%] index_add_ strided 3 : Elapsed 0.020 ms (1.967 ms / 100) 1.918 -> 1.923 ( +0.26%) [ +0.31% +0.00% +0.00% / +0.26% +0.73% +0.68%] index_copy_ strided 3 : Elapsed 0.019 ms (1.924 ms / 100) 1.956 -> 1.964 ( +0.41%) [ +0.10% +0.20% +0.00% / +0.41% +0.66% +0.77%] index_add_ strided 7 : Elapsed 0.020 ms (1.958 ms / 100) 1.912 -> 1.920 ( +0.42%) [ +0.05% +0.31% +0.00% / +0.42% +0.52% +0.63%] index_copy_ strided 7 : Elapsed 0.019 ms (1.913 ms / 100) 1.952 -> 1.953 ( +0.05%) [ +0.15% +0.20% +0.00% / +0.05% +0.61% +0.36%] index_add_ perm : Elapsed 0.020 ms (1.955 ms / 100) 1.904 -> 1.904 ( +0.00%) [ +0.00% +0.11% +0.05% / +0.00% +0.84% +0.58%] index_copy_ perm : Elapsed 0.019 ms (1.904 ms / 100) 1.963 -> 1.963 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.00% +0.25% +0.05%] index_add_ perm_sorted : Elapsed 0.020 ms (1.965 ms / 100) 1.915 -> 1.912 ( -0.16%) [ +0.05% +0.16% +0.00% / -0.16% +0.52% +0.47%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.916 ms / 100) 8.716 -> 8.731 ( +0.17%) [ +0.00% +0.20% +0.29% / +0.17% +0.31% +0.28%] index_select const : Elapsed 0.087 ms (8.716 ms / 100) 8.728 -> 8.731 ( +0.03%) [ +0.00% +0.15% +0.07% / +0.03% +0.32% +0.16%] index_select wrap : Elapsed 0.087 ms (8.728 ms / 100) 8.726 -> 8.723 ( -0.03%) [ +0.03% +0.00% +0.10% / -0.03% +0.31% +0.18%] index_select linear : Elapsed 0.087 ms (8.729 ms / 100) 8.723 -> 8.729 ( +0.07%) [ +0.10% +0.00% +0.17% / +0.07% +0.34% +0.28%] index_select reverse : Elapsed 0.087 ms (8.732 ms / 100) 8.731 -> 8.733 ( +0.02%) [ +0.00% +0.24% +0.07% / +0.02% +0.13% +0.02%] index_select skip64 : Elapsed 0.087 ms (8.731 ms / 100) 8.732 -> 8.744 ( +0.14%) [ +0.02% +0.19% +0.00% / +0.33% +0.14% +0.26%] index_select skip256 : Elapsed 0.087 ms (8.734 ms / 100) 8.741 -> 8.741 ( +0.00%) [ +0.03% +0.13% +0.00% / +0.07% +0.05% +0.00%] index_select spread : Elapsed 0.087 ms (8.744 ms / 100) 8.725 -> 8.729 ( +0.05%) [ +0.02% +0.00% +0.13% / +0.05% +0.10% +0.22%] index_select strided 3 : Elapsed 0.087 ms (8.727 ms / 100) 8.729 -> 8.738 ( +0.10%) [ +0.06% +0.00% +0.01% / +0.10% +0.14% +0.18%] index_select random : Elapsed 0.087 ms (8.734 ms / 100) 8.734 -> 8.740 ( +0.07%) [ +0.00% +0.16% +0.02% / +0.07% +0.22% +0.07%] index_select random_sorted : Elapsed 0.087 ms (8.734 ms / 100) B = [40, 20, 5, 16] (stride (1600, 5, 1, 100)) A = [40, 4, 5, 16] (stride (16, 3200, 640, 1)) dim = 1 2.095 -> 2.097 ( +0.10%) [ +0.00% +0.14% +0.10% / +0.24% +0.10% +1.43%] index_add_ linear : Elapsed 0.021 ms (2.095 ms / 100) 2.058 -> 2.049 ( -0.44%) [ +0.00% +0.19% +0.39% / +0.05% -0.44% +0.05%] index_copy_ linear : Elapsed 0.021 ms (2.058 ms / 100) 2.094 -> 2.089 ( -0.24%) [ +0.00% +0.14% +0.05% / -0.24% +0.29% +0.19%] index_add_ reverse : Elapsed 0.021 ms (2.094 ms / 100) 2.052 -> 2.052 ( +0.00%) [ +0.34% +0.05% +0.00% / +0.10% +0.00% +0.05%] index_copy_ reverse : Elapsed 0.021 ms (2.059 ms / 100) 2.099 -> 2.105 ( +0.29%) [ +0.00% +0.19% +0.29% / +0.29% +0.48% +0.57%] index_add_ spread : Elapsed 0.021 ms (2.099 ms / 100) 2.077 -> 2.074 ( -0.14%) [ +0.00% +0.24% +0.10% / +0.05% -0.14% +0.05%] index_copy_ spread : Elapsed 0.021 ms (2.077 ms / 100) 2.107 -> 2.110 ( +0.14%) [ +0.00% +0.38% +0.14% / +0.14% +0.52% +0.38%] index_add_ strided 3 : Elapsed 0.021 ms (2.107 ms / 100) 2.086 -> 2.087 ( +0.05%) [ +0.19% +0.14% +0.00% / +0.05% +0.48% +0.19%] index_copy_ strided 3 : Elapsed 0.021 ms (2.090 ms / 100) 2.095 -> 2.095 ( +0.00%) [ +0.10% +0.00% +0.14% / +0.10% +0.00% +0.05%] index_add_ strided 7 : Elapsed 0.021 ms (2.097 ms / 100) 2.065 -> 2.064 ( -0.05%) [ +0.00% +0.10% +0.48% / +0.48% -0.05% -0.05%] index_copy_ strided 7 : Elapsed 0.021 ms (2.065 ms / 100) 2.095 -> 2.101 ( +0.29%) [ +0.00% +0.43% +0.38% / +0.53% +0.53% +0.29%] index_add_ perm : Elapsed 0.021 ms (2.095 ms / 100) 2.073 -> 2.071 ( -0.10%) [ +0.24% +0.29% +0.00% / +0.39% -0.10% -0.10%] index_copy_ perm : Elapsed 0.021 ms (2.078 ms / 100) 2.103 -> 2.100 ( -0.14%) [ +0.00% +0.05% +0.10% / -0.14% +0.00% -0.10%] index_add_ perm_sorted : Elapsed 0.021 ms (2.103 ms / 100) 2.073 -> 2.071 ( -0.10%) [ +0.05% +0.00% +0.43% / +0.10% -0.10% +0.05%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.074 ms / 100) 9.228 -> 9.220 ( -0.09%) [ +0.00% +0.13% +0.15% / -0.09% +0.15% +0.24%] index_select const : Elapsed 0.092 ms (9.228 ms / 100) 9.319 -> 9.322 ( +0.03%) [ +0.09% +0.00% +0.02% / +0.03% +0.06% +0.09%] index_select wrap : Elapsed 0.093 ms (9.327 ms / 100) 9.273 -> 9.265 ( -0.09%) [ +0.04% +0.00% +0.03% / -0.09% +0.22% +0.30%] index_select linear : Elapsed 0.093 ms (9.277 ms / 100) 9.283 -> 9.283 ( +0.00%) [ +0.00% +0.19% +0.02% / +0.00% +0.12% +0.10%] index_select reverse : Elapsed 0.093 ms (9.283 ms / 100) 9.221 -> 9.220 ( -0.01%) [ +0.00% +0.02% +0.02% / -0.01% +0.38% +0.11%] index_select skip64 : Elapsed 0.092 ms (9.221 ms / 100) 9.230 -> 9.232 ( +0.02%) [ +0.00% +0.11% +0.10% / +0.02% +0.04% +0.08%] index_select skip256 : Elapsed 0.092 ms (9.230 ms / 100) 9.290 -> 9.283 ( -0.08%) [ +0.13% +0.03% +0.00% / -0.08% +0.11% -0.06%] index_select spread : Elapsed 0.093 ms (9.302 ms / 100) 9.315 -> 9.311 ( -0.04%) [ +0.00% +0.01% +0.11% / -0.04% +0.16% +0.02%] index_select strided 3 : Elapsed 0.093 ms (9.315 ms / 100) 9.296 -> 9.305 ( +0.10%) [ +0.00% +0.22% +0.17% / +0.10% +0.32% +0.34%] index_select random : Elapsed 0.093 ms (9.296 ms / 100) 9.279 -> 9.280 ( +0.01%) [ +0.00% +0.17% +0.05% / +0.01% +0.03% +0.05%] index_select random_sorted : Elapsed 0.093 ms (9.279 ms / 100) B = [40, 20, 5, 16] (stride (80, 3200, 1, 5)) A = [40, 4, 5, 16] (stride (320, 1, 64, 4)) dim = 1 1.967 -> 1.971 ( +0.20%) [ +0.05% +0.00% +0.15% / +0.20% +0.81% +0.76%] index_add_ linear : Elapsed 0.020 ms (1.968 ms / 100) 1.917 -> 1.919 ( +0.10%) [ +0.00% +0.31% +0.26% / +0.10% +0.94% +0.83%] index_copy_ linear : Elapsed 0.019 ms (1.917 ms / 100) 1.961 -> 1.966 ( +0.25%) [ +0.25% +0.31% +0.00% / +0.25% +0.92% +0.87%] index_add_ reverse : Elapsed 0.020 ms (1.966 ms / 100) 1.913 -> 1.921 ( +0.42%) [ +0.31% +0.37% +0.00% / +0.42% +0.84% +0.89%] index_copy_ reverse : Elapsed 0.019 ms (1.919 ms / 100) 1.958 -> 1.962 ( +0.20%) [ +0.00% +0.20% +0.36% / +0.20% +0.66% +0.72%] index_add_ spread : Elapsed 0.020 ms (1.958 ms / 100) 1.912 -> 1.918 ( +0.31%) [ +0.00% +0.16% +0.31% / +0.31% +0.47% +0.68%] index_copy_ spread : Elapsed 0.019 ms (1.912 ms / 100) 1.982 -> 1.985 ( +0.15%) [ +0.15% +0.10% +0.00% / +0.15% +0.66% +0.81%] index_add_ strided 3 : Elapsed 0.020 ms (1.985 ms / 100) 1.930 -> 1.936 ( +0.31%) [ +0.00% +0.21% +0.10% / +0.31% +0.78% +0.57%] index_copy_ strided 3 : Elapsed 0.019 ms (1.930 ms / 100) 1.974 -> 1.979 ( +0.25%) [ +0.25% +0.10% +0.00% / +0.25% +0.81% +0.56%] index_add_ strided 7 : Elapsed 0.020 ms (1.979 ms / 100) 1.921 -> 1.924 ( +0.16%) [ +0.21% +0.16% +0.00% / +0.16% +1.04% +0.99%] index_copy_ strided 7 : Elapsed 0.019 ms (1.925 ms / 100) 1.959 -> 1.960 ( +0.05%) [ +0.10% +0.15% +0.00% / +0.05% +0.77% +0.61%] index_add_ perm : Elapsed 0.020 ms (1.961 ms / 100) 1.913 -> 1.911 ( -0.10%) [ +0.37% +0.00% +0.21% / -0.10% +0.73% +0.63%] index_copy_ perm : Elapsed 0.019 ms (1.920 ms / 100) 1.962 -> 1.966 ( +0.20%) [ +0.36% +0.20% +0.00% / +0.20% +0.82% +0.87%] index_add_ perm_sorted : Elapsed 0.020 ms (1.969 ms / 100) 1.911 -> 1.917 ( +0.31%) [ +0.47% +0.16% +0.00% / +0.31% +0.99% +0.99%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.920 ms / 100) 8.745 -> 8.744 ( -0.01%) [ +0.10% +0.07% +0.00% / -0.01% +0.03% -0.01%] index_select const : Elapsed 0.088 ms (8.754 ms / 100) 8.740 -> 8.742 ( +0.02%) [ +0.46% +0.11% +0.00% / +0.05% +0.02% +0.39%] index_select wrap : Elapsed 0.088 ms (8.780 ms / 100) 8.744 -> 8.758 ( +0.16%) [ +0.01% +0.21% +0.00% / +0.16% +0.32% +0.27%] index_select linear : Elapsed 0.087 ms (8.745 ms / 100) 8.752 -> 8.757 ( +0.06%) [ +0.00% +0.07% +0.03% / +0.06% +0.07% +0.18%] index_select reverse : Elapsed 0.088 ms (8.752 ms / 100) 8.744 -> 8.755 ( +0.13%) [ +0.15% +0.18% +0.00% / +0.13% +0.19% +0.14%] index_select skip64 : Elapsed 0.088 ms (8.757 ms / 100) 8.746 -> 8.743 ( -0.03%) [ +0.00% +0.18% +0.06% / -0.03% +0.00% +0.25%] index_select skip256 : Elapsed 0.087 ms (8.746 ms / 100) 8.752 -> 8.743 ( -0.10%) [ +0.00% +0.08% +0.22% / +0.08% -0.10% +0.15%] index_select spread : Elapsed 0.088 ms (8.752 ms / 100) 8.748 -> 8.757 ( +0.10%) [ +0.17% +0.01% +0.00% / +0.13% +0.27% +0.10%] index_select strided 3 : Elapsed 0.088 ms (8.763 ms / 100) 8.753 -> 8.739 ( -0.16%) [ +0.00% +0.03% +0.07% / +0.07% -0.10% -0.16%] index_select random : Elapsed 0.088 ms (8.753 ms / 100) 8.745 -> 8.732 ( -0.15%) [ +0.00% +0.08% +0.13% / -0.15% +0.16% +0.22%] index_select random_sorted : Elapsed 0.087 ms (8.745 ms / 100) B = [40, 20, 5, 16] (stride (16, 3200, 640, 1)) A = [40, 4, 5, 16] (stride (1, 640, 2560, 40)) dim = 1 2.247 -> 2.248 ( +0.04%) [ +0.09% +0.00% +0.04% / +0.04% +0.22% +0.27%] index_add_ linear : Elapsed 0.022 ms (2.249 ms / 100) 2.177 -> 2.177 ( +0.00%) [ +0.00% +0.14% +0.05% / +0.00% +0.23% +0.23%] index_copy_ linear : Elapsed 0.022 ms (2.177 ms / 100) 2.246 -> 2.242 ( -0.18%) [ +0.09% +0.22% +0.00% / -0.18% +0.36% +0.27%] index_add_ reverse : Elapsed 0.022 ms (2.248 ms / 100) 2.177 -> 2.182 ( +0.23%) [ +0.05% +0.00% +0.05% / +0.23% +0.28% +0.32%] index_copy_ reverse : Elapsed 0.022 ms (2.178 ms / 100) 2.248 -> 2.249 ( +0.04%) [ +0.04% +0.00% +0.13% / +0.04% +0.31% +0.27%] index_add_ spread : Elapsed 0.022 ms (2.249 ms / 100) 2.177 -> 2.180 ( +0.14%) [ +0.00% +0.09% +0.14% / +0.14% +0.37% +0.28%] index_copy_ spread : Elapsed 0.022 ms (2.177 ms / 100) 2.246 -> 2.248 ( +0.09%) [ +0.13% +0.04% +0.00% / +0.09% +0.31% +0.22%] index_add_ strided 3 : Elapsed 0.022 ms (2.249 ms / 100) 2.176 -> 2.179 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.37% +0.37%] index_copy_ strided 3 : Elapsed 0.022 ms (2.179 ms / 100) 2.244 -> 2.243 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.53% +0.45%] index_add_ strided 7 : Elapsed 0.022 ms (2.245 ms / 100) 2.172 -> 2.174 ( +0.09%) [ +0.00% +0.14% +0.14% / +0.09% +0.64% +0.64%] index_copy_ strided 7 : Elapsed 0.022 ms (2.172 ms / 100) 2.245 -> 2.247 ( +0.09%) [ +0.22% +0.00% +0.13% / +0.09% +0.09% +0.09%] index_add_ perm : Elapsed 0.022 ms (2.250 ms / 100) 2.180 -> 2.180 ( +0.00%) [ +0.14% +0.09% +0.00% / +0.14% +0.00% +0.00%] index_copy_ perm : Elapsed 0.022 ms (2.183 ms / 100) 2.246 -> 2.245 ( -0.04%) [ +0.04% +0.09% +0.00% / +0.00% -0.04% +0.00%] index_add_ perm_sorted : Elapsed 0.022 ms (2.247 ms / 100) 2.180 -> 2.178 ( -0.09%) [ +0.05% +0.09% +0.00% / -0.09% +0.09% +0.00%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.181 ms / 100) 9.128 -> 9.132 ( +0.04%) [ +0.00% +0.18% +0.45% / +0.42% +0.30% +0.04%] index_select const : Elapsed 0.091 ms (9.128 ms / 100) 9.185 -> 9.181 ( -0.04%) [ +0.00% +0.26% +0.16% / -0.01% -0.04% +0.08%] index_select wrap : Elapsed 0.092 ms (9.185 ms / 100) 9.174 -> 9.171 ( -0.03%) [ +0.13% +0.01% +0.00% / -0.03% +0.21% -0.01%] index_select linear : Elapsed 0.092 ms (9.186 ms / 100) 9.179 -> 9.178 ( -0.01%) [ +0.15% +0.00% +0.26% / +0.21% -0.01% +0.12%] index_select reverse : Elapsed 0.092 ms (9.193 ms / 100) 9.146 -> 9.144 ( -0.02%) [ +0.03% +0.00% +0.00% / +0.02% -0.02% +0.17%] index_select skip64 : Elapsed 0.091 ms (9.149 ms / 100) 9.150 -> 9.140 ( -0.11%) [ +0.00% +0.19% +0.02% / -0.11% +0.08% -0.07%] index_select skip256 : Elapsed 0.091 ms (9.150 ms / 100) 9.202 -> 9.193 ( -0.10%) [ +0.00% +0.05% +0.04% / -0.10% -0.03% +0.00%] index_select spread : Elapsed 0.092 ms (9.202 ms / 100) 9.187 -> 9.183 ( -0.04%) [ +0.02% +0.20% +0.00% / +0.16% -0.04% -0.01%] index_select strided 3 : Elapsed 0.092 ms (9.189 ms / 100) 9.194 -> 9.178 ( -0.17%) [ +0.00% +0.04% +0.12% / +0.05% -0.17% -0.15%] index_select random : Elapsed 0.092 ms (9.194 ms / 100) 9.197 -> 9.203 ( +0.07%) [ +0.10% +0.00% +0.17% / +0.12% +0.11% +0.07%] index_select random_sorted : Elapsed 0.092 ms (9.206 ms / 100) B = [40, 20, 5, 16] (stride (16, 640, 12800, 1)) A = [40, 4, 5, 16] (stride (20, 1, 4, 800)) dim = 1 2.177 -> 2.175 ( -0.09%) [ +0.14% +0.00% +0.14% / +0.00% +0.14% -0.09%] index_add_ linear : Elapsed 0.022 ms (2.180 ms / 100) 2.124 -> 2.124 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.09% +0.05%] index_copy_ linear : Elapsed 0.021 ms (2.127 ms / 100) 2.174 -> 2.173 ( -0.05%) [ +0.28% +0.00% +0.09% / +0.18% +0.09% -0.05%] index_add_ reverse : Elapsed 0.022 ms (2.180 ms / 100) 2.123 -> 2.121 ( -0.09%) [ +0.19% +0.24% +0.00% / +0.19% +0.09% -0.09%] index_copy_ reverse : Elapsed 0.021 ms (2.127 ms / 100) 2.167 -> 2.171 ( +0.18%) [ +0.09% +0.14% +0.00% / +0.18% +0.37% +0.23%] index_add_ spread : Elapsed 0.022 ms (2.169 ms / 100) 2.110 -> 2.118 ( +0.38%) [ +0.00% +0.24% +0.19% / +0.38% +0.43% +0.47%] index_copy_ spread : Elapsed 0.021 ms (2.110 ms / 100) 2.183 -> 2.187 ( +0.18%) [ +0.00% +0.18% +0.05% / +0.18% +0.55% +0.64%] index_add_ strided 3 : Elapsed 0.022 ms (2.183 ms / 100) 2.126 -> 2.128 ( +0.09%) [ +0.00% +0.05% +0.00% / +0.09% +0.66% +0.61%] index_copy_ strided 3 : Elapsed 0.021 ms (2.126 ms / 100) 2.174 -> 2.175 ( +0.05%) [ +0.28% +0.00% +0.09% / +0.05% +0.46% +0.55%] index_add_ strided 7 : Elapsed 0.022 ms (2.180 ms / 100) 2.117 -> 2.116 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.19% +0.47%] index_copy_ strided 7 : Elapsed 0.021 ms (2.117 ms / 100) 2.168 -> 2.170 ( +0.09%) [ +0.18% +0.09% +0.00% / +0.09% +0.42% +0.46%] index_add_ perm : Elapsed 0.022 ms (2.172 ms / 100) 2.111 -> 2.112 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.62% +0.62%] index_copy_ perm : Elapsed 0.021 ms (2.111 ms / 100) 2.178 -> 2.176 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.09% +0.14%] index_add_ perm_sorted : Elapsed 0.022 ms (2.178 ms / 100) 2.119 -> 2.123 ( +0.19%) [ +0.28% +0.05% +0.00% / +0.19% +0.47% +0.33%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.125 ms / 100) 9.222 -> 9.218 ( -0.04%) [ +0.05% +0.03% +0.00% / -0.04% +0.13% +0.05%] index_select const : Elapsed 0.092 ms (9.227 ms / 100) 9.221 -> 9.228 ( +0.08%) [ +0.00% +0.07% +0.09% / +0.16% +0.08% +0.13%] index_select wrap : Elapsed 0.092 ms (9.221 ms / 100) 9.224 -> 9.228 ( +0.04%) [ +0.00% +0.05% +0.04% / +0.31% +0.04% +0.07%] index_select linear : Elapsed 0.092 ms (9.224 ms / 100) 9.221 -> 9.216 ( -0.05%) [ +0.16% +0.00% +0.08% / +0.15% -0.05% -0.02%] index_select reverse : Elapsed 0.092 ms (9.236 ms / 100) 9.223 -> 9.223 ( +0.00%) [ +0.00% +0.20% +0.04% / +0.13% +0.05% +0.00%] index_select skip64 : Elapsed 0.092 ms (9.223 ms / 100) 9.223 -> 9.214 ( -0.10%) [ +0.00% +0.02% +0.07% / -0.10% +0.20% -0.08%] index_select skip256 : Elapsed 0.092 ms (9.223 ms / 100) 9.221 -> 9.214 ( -0.08%) [ +0.04% +0.13% +0.00% / +0.30% -0.08% +0.24%] index_select spread : Elapsed 0.092 ms (9.225 ms / 100) 9.214 -> 9.227 ( +0.14%) [ +0.23% +0.00% +0.20% / +0.28% +0.14% +0.20%] index_select strided 3 : Elapsed 0.092 ms (9.235 ms / 100) 9.217 -> 9.220 ( +0.03%) [ +0.00% +0.15% +0.15% / +0.03% +0.22% +0.15%] index_select random : Elapsed 0.092 ms (9.217 ms / 100) 9.231 -> 9.218 ( -0.14%) [ +0.15% +0.00% +0.12% / -0.14% -0.05% +0.22%] index_select random_sorted : Elapsed 0.092 ms (9.245 ms / 100) B = [40, 20, 5, 16] (stride (5, 200, 1, 4000)) A = [40, 4, 5, 16] (stride (20, 1, 4, 800)) dim = 1 2.183 -> 2.185 ( +0.09%) [ +0.00% +0.14% +0.18% / +0.09% +0.32% +0.18%] index_add_ linear : Elapsed 0.022 ms (2.183 ms / 100) 2.133 -> 2.133 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.00% +0.00%] index_copy_ linear : Elapsed 0.021 ms (2.135 ms / 100) 2.181 -> 2.180 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.23% +0.05%] index_add_ reverse : Elapsed 0.022 ms (2.181 ms / 100) 2.129 -> 2.134 ( +0.23%) [ +0.14% +0.00% +0.09% / +0.23% +0.52% +0.23%] index_copy_ reverse : Elapsed 0.021 ms (2.132 ms / 100) 2.174 -> 2.173 ( -0.05%) [ +0.09% +0.00% +0.05% / -0.05% +0.28% +0.51%] index_add_ spread : Elapsed 0.022 ms (2.176 ms / 100) 2.123 -> 2.125 ( +0.09%) [ +0.00% +0.14% +0.09% / +0.09% +0.33% +0.38%] index_copy_ spread : Elapsed 0.021 ms (2.123 ms / 100) 2.186 -> 2.187 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.46% +0.46%] index_add_ strided 3 : Elapsed 0.022 ms (2.186 ms / 100) 2.132 -> 2.134 ( +0.09%) [ +0.19% +0.00% +0.09% / +0.14% +0.09% +0.23%] index_copy_ strided 3 : Elapsed 0.021 ms (2.136 ms / 100) 2.192 -> 2.192 ( +0.00%) [ +0.23% +0.27% +0.00% / +0.23% +0.00% +0.32%] index_add_ strided 7 : Elapsed 0.022 ms (2.197 ms / 100) 2.141 -> 2.141 ( +0.00%) [ +0.23% +0.09% +0.00% / +0.19% +0.00% +0.00%] index_copy_ strided 7 : Elapsed 0.021 ms (2.146 ms / 100) 2.187 -> 2.188 ( +0.05%) [ +0.00% +0.18% +0.05% / +0.05% +0.18% +0.32%] index_add_ perm : Elapsed 0.022 ms (2.187 ms / 100) 2.131 -> 2.134 ( +0.14%) [ +0.00% +0.09% +0.09% / +0.23% +0.14% +0.56%] index_copy_ perm : Elapsed 0.021 ms (2.131 ms / 100) 2.192 -> 2.197 ( +0.23%) [ +0.23% +0.00% +0.14% / +0.23% +0.36% +0.23%] index_add_ perm_sorted : Elapsed 0.022 ms (2.197 ms / 100) 2.141 -> 2.142 ( +0.05%) [ +0.00% +0.19% +0.05% / +0.05% +0.05% +0.23%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.141 ms / 100) 9.255 -> 9.265 ( +0.11%) [ +0.18% +0.00% +0.03% / +0.11% +0.25% +0.23%] index_select const : Elapsed 0.093 ms (9.272 ms / 100) 9.262 -> 9.253 ( -0.10%) [ +0.11% +0.00% +0.06% / -0.10% +0.50% +0.23%] index_select wrap : Elapsed 0.093 ms (9.272 ms / 100) 9.256 -> 9.261 ( +0.05%) [ +0.00% +0.16% +0.03% / +0.05% +0.40% +0.09%] index_select linear : Elapsed 0.093 ms (9.256 ms / 100) 9.248 -> 9.272 ( +0.26%) [ +0.26% +0.00% +0.10% / +0.26% +0.28% +0.38%] index_select reverse : Elapsed 0.093 ms (9.272 ms / 100) 9.249 -> 9.258 ( +0.10%) [ +0.16% +0.00% +0.17% / +0.10% +0.35% +0.21%] index_select skip64 : Elapsed 0.093 ms (9.264 ms / 100) 9.264 -> 9.266 ( +0.02%) [ +0.26% +0.12% +0.00% / +0.02% +0.13% +0.32%] index_select skip256 : Elapsed 0.093 ms (9.288 ms / 100) 9.247 -> 9.255 ( +0.09%) [ +0.00% +0.11% +0.25% / +0.09% +0.25% +0.34%] index_select spread : Elapsed 0.092 ms (9.247 ms / 100) 9.248 -> 9.267 ( +0.21%) [ +0.35% +0.00% +0.36% / +0.23% +0.43% +0.21%] index_select strided 3 : Elapsed 0.093 ms (9.280 ms / 100) 9.263 -> 9.277 ( +0.15%) [ +0.11% +0.03% +0.00% / +0.15% +0.29% +0.15%] index_select random : Elapsed 0.093 ms (9.273 ms / 100) 9.265 -> 9.256 ( -0.10%) [ +0.14% +0.02% +0.00% / -0.10% +0.21% +0.09%] index_select random_sorted : Elapsed 0.093 ms (9.278 ms / 100) B = [40, 20, 5, 16] (stride (1, 40, 800, 4000)) A = [40, 4, 5, 16] (stride (320, 80, 1, 5)) dim = 1 2.065 -> 2.073 ( +0.39%) [ +0.00% +0.24% +0.34% / +0.39% +0.58% +0.73%] index_add_ linear : Elapsed 0.021 ms (2.065 ms / 100) 2.032 -> 2.031 ( -0.05%) [ +0.30% +0.00% +0.30% / -0.05% +0.84% +1.08%] index_copy_ linear : Elapsed 0.020 ms (2.038 ms / 100) 2.066 -> 2.068 ( +0.10%) [ +0.48% +0.00% +0.19% / +0.10% +0.53% +0.44%] index_add_ reverse : Elapsed 0.021 ms (2.076 ms / 100) 2.033 -> 2.034 ( +0.05%) [ +0.00% +0.10% +0.00% / +0.05% +0.69% +0.54%] index_copy_ reverse : Elapsed 0.020 ms (2.033 ms / 100) 2.056 -> 2.057 ( +0.05%) [ +0.19% +0.10% +0.00% / +0.05% +0.68% +0.78%] index_add_ spread : Elapsed 0.021 ms (2.060 ms / 100) 2.016 -> 2.022 ( +0.30%) [ +0.25% +0.20% +0.00% / +0.30% +1.04% +1.19%] index_copy_ spread : Elapsed 0.020 ms (2.021 ms / 100) 2.064 -> 2.070 ( +0.29%) [ +0.29% +0.39% +0.00% / +0.29% +0.78% +0.73%] index_add_ strided 3 : Elapsed 0.021 ms (2.070 ms / 100) 2.030 -> 2.026 ( -0.20%) [ +0.00% +0.10% +0.15% / -0.20% +0.84% +0.74%] index_copy_ strided 3 : Elapsed 0.020 ms (2.030 ms / 100) 2.077 -> 2.081 ( +0.19%) [ +0.29% +0.19% +0.00% / +0.19% +0.63% +0.53%] index_add_ strided 7 : Elapsed 0.021 ms (2.083 ms / 100) 2.041 -> 2.040 ( -0.05%) [ +0.10% +0.15% +0.00% / -0.05% +0.88% +0.73%] index_copy_ strided 7 : Elapsed 0.020 ms (2.043 ms / 100) 2.065 -> 2.064 ( -0.05%) [ +0.10% +0.00% +0.00% / -0.05% +0.58% +0.48%] index_add_ perm : Elapsed 0.021 ms (2.067 ms / 100) 2.023 -> 2.025 ( +0.10%) [ +0.15% +0.15% +0.00% / +0.10% +0.89% +0.94%] index_copy_ perm : Elapsed 0.020 ms (2.026 ms / 100) 2.071 -> 2.073 ( +0.10%) [ +0.10% +0.19% +0.00% / +0.10% +0.48% +0.58%] index_add_ perm_sorted : Elapsed 0.021 ms (2.073 ms / 100) 2.040 -> 2.041 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.44% +0.49%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.041 ms / 100) 9.155 -> 9.148 ( -0.08%) [ +0.09% +0.21% +0.00% / -0.08% +0.05% +0.07%] index_select const : Elapsed 0.092 ms (9.163 ms / 100) 9.213 -> 9.218 ( +0.05%) [ +0.03% +0.22% +0.00% / +0.22% +0.05% +0.16%] index_select wrap : Elapsed 0.092 ms (9.216 ms / 100) 9.187 -> 9.173 ( -0.15%) [ +0.00% +0.15% +0.05% / +0.12% -0.15% -0.12%] index_select linear : Elapsed 0.092 ms (9.187 ms / 100) 9.165 -> 9.169 ( +0.04%) [ +0.00% +0.25% +0.17% / +0.04% +0.25% +0.07%] index_select reverse : Elapsed 0.092 ms (9.165 ms / 100) 9.151 -> 9.166 ( +0.16%) [ +0.11% +0.34% +0.00% / +0.20% +0.19% +0.16%] index_select skip64 : Elapsed 0.092 ms (9.161 ms / 100) 9.160 -> 9.150 ( -0.11%) [ +0.14% +0.02% +0.00% / -0.02% -0.11% +0.15%] index_select skip256 : Elapsed 0.092 ms (9.173 ms / 100) 9.206 -> 9.194 ( -0.13%) [ +0.15% +0.00% +0.01% / +0.12% +0.16% -0.13%] index_select spread : Elapsed 0.092 ms (9.220 ms / 100) 9.222 -> 9.204 ( -0.20%) [ +0.17% +0.17% +0.00% / +0.12% -0.16% -0.20%] index_select strided 3 : Elapsed 0.092 ms (9.238 ms / 100) 9.226 -> 9.205 ( -0.23%) [ +0.05% +0.00% +0.07% / -0.16% -0.23% -0.07%] index_select random : Elapsed 0.092 ms (9.231 ms / 100) 9.186 -> 9.196 ( +0.11%) [ +0.00% +0.16% +0.17% / +0.11% +0.16% +0.11%] index_select random_sorted : Elapsed 0.092 ms (9.186 ms / 100) out_shape = [40, 4, 20, 16] in_shape = [40, 4, 5, 16] idx_dim = 2 B = [40, 4, 20, 16] (stride (1280, 1, 64, 4)) A = [40, 4, 5, 16] (stride (5, 200, 1, 800)) dim = 2 1.958 -> 1.967 ( +0.46%) [ +0.00% +0.26% +0.51% / +0.46% +0.46% +0.61%] index_add_ linear : Elapsed 0.020 ms (1.958 ms / 100) 1.898 -> 1.910 ( +0.63%) [ +0.00% +0.26% +0.47% / +0.63% +0.74% +0.68%] index_copy_ linear : Elapsed 0.019 ms (1.898 ms / 100) 1.959 -> 1.967 ( +0.41%) [ +0.00% +0.05% +0.41% / +0.41% +0.51% +0.56%] index_add_ reverse : Elapsed 0.020 ms (1.959 ms / 100) 1.903 -> 1.910 ( +0.37%) [ +0.00% +0.05% +0.26% / +0.37% +0.42% +0.42%] index_copy_ reverse : Elapsed 0.019 ms (1.903 ms / 100) 1.951 -> 1.962 ( +0.56%) [ +0.10% +0.00% +0.41% / +0.56% +0.97% +0.82%] index_add_ spread : Elapsed 0.020 ms (1.953 ms / 100) 1.895 -> 1.904 ( +0.47%) [ +0.00% +0.21% +0.47% / +0.47% +0.63% +0.79%] index_copy_ spread : Elapsed 0.019 ms (1.895 ms / 100) 1.951 -> 1.960 ( +0.46%) [ +0.05% +0.00% +0.31% / +0.46% +0.87% +0.87%] index_add_ strided 3 : Elapsed 0.020 ms (1.952 ms / 100) 1.895 -> 1.903 ( +0.42%) [ +0.00% +0.16% +0.47% / +0.42% +0.79% +0.79%] index_copy_ strided 3 : Elapsed 0.019 ms (1.895 ms / 100) 1.956 -> 1.967 ( +0.56%) [ +0.00% +0.10% +0.51% / +0.56% +1.12% +0.87%] index_add_ strided 7 : Elapsed 0.020 ms (1.956 ms / 100) 1.896 -> 1.906 ( +0.53%) [ +0.05% +0.00% +0.53% / +0.53% +1.16% +0.95%] index_copy_ strided 7 : Elapsed 0.019 ms (1.897 ms / 100) 1.951 -> 1.961 ( +0.51%) [ +0.05% +0.00% +0.36% / +0.51% +0.77% +0.77%] index_add_ perm : Elapsed 0.020 ms (1.952 ms / 100) 1.894 -> 1.902 ( +0.42%) [ +0.00% +0.05% +0.32% / +0.42% +0.74% +0.74%] index_copy_ perm : Elapsed 0.019 ms (1.894 ms / 100) 1.952 -> 1.959 ( +0.36%) [ +0.15% +0.00% +0.36% / +0.36% +0.97% +0.77%] index_add_ perm_sorted : Elapsed 0.020 ms (1.955 ms / 100) 1.896 -> 1.904 ( +0.42%) [ +0.00% +0.16% +0.32% / +0.42% +0.90% +1.00%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.896 ms / 100) 8.544 -> 8.540 ( -0.05%) [ +0.00% +0.13% +0.02% / -0.05% +0.33% +0.41%] index_select const : Elapsed 0.085 ms (8.544 ms / 100) 8.529 -> 8.537 ( +0.09%) [ +0.00% +0.16% +0.40% / +0.09% +0.67% +0.64%] index_select wrap : Elapsed 0.085 ms (8.529 ms / 100) 8.525 -> 8.538 ( +0.15%) [ +0.00% +0.07% +0.28% / +0.15% +0.54% +0.82%] index_select linear : Elapsed 0.085 ms (8.525 ms / 100) 8.527 -> 8.552 ( +0.29%) [ +0.09% +0.00% +0.19% / +0.29% +0.60% +0.88%] index_select reverse : Elapsed 0.085 ms (8.535 ms / 100) 8.540 -> 8.546 ( +0.07%) [ +0.20% +0.05% +0.00% / +0.07% +0.55% +0.63%] index_select skip64 : Elapsed 0.086 ms (8.557 ms / 100) 8.540 -> 8.574 ( +0.40%) [ +0.13% +0.00% +0.16% / +0.40% +0.42% +0.60%] index_select skip256 : Elapsed 0.086 ms (8.551 ms / 100) 8.528 -> 8.542 ( +0.16%) [ +0.30% +0.12% +0.00% / +0.16% +0.94% +0.59%] index_select spread : Elapsed 0.086 ms (8.554 ms / 100) 8.536 -> 8.535 ( -0.01%) [ +0.08% +0.00% +0.01% / -0.01% +0.35% +0.52%] index_select strided 3 : Elapsed 0.085 ms (8.543 ms / 100) 8.532 -> 8.528 ( -0.05%) [ +0.00% +0.22% +0.06% / -0.05% +0.68% +0.80%] index_select random : Elapsed 0.085 ms (8.532 ms / 100) 8.543 -> 8.528 ( -0.18%) [ +0.18% +0.18% +0.00% / -0.18% +0.39% +0.54%] index_select random_sorted : Elapsed 0.086 ms (8.558 ms / 100) B = [40, 4, 20, 16] (stride (320, 12800, 16, 1)) A = [40, 4, 5, 16] (stride (16, 3200, 640, 1)) dim = 2 1.819 -> 1.819 ( +0.00%) [ +0.05% +0.00% +0.11% / +0.00% +0.16% +0.22%] index_add_ linear : Elapsed 0.018 ms (1.820 ms / 100) 1.777 -> 1.779 ( +0.11%) [ +0.00% +0.11% +0.06% / +0.28% +0.11% +0.28%] index_copy_ linear : Elapsed 0.018 ms (1.777 ms / 100) 1.821 -> 1.822 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.11% +0.16% +0.05%] index_add_ reverse : Elapsed 0.018 ms (1.822 ms / 100) 1.777 -> 1.776 ( -0.06%) [ +0.00% +0.23% +0.06% / -0.06% +0.34% +0.39%] index_copy_ reverse : Elapsed 0.018 ms (1.777 ms / 100) 1.829 -> 1.830 ( +0.05%) [ +0.00% +0.22% +0.22% / +0.05% +0.55% +0.71%] index_add_ spread : Elapsed 0.018 ms (1.829 ms / 100) 1.793 -> 1.796 ( +0.17%) [ +0.00% +0.06% +0.33% / +0.17% +0.67% +0.50%] index_copy_ spread : Elapsed 0.018 ms (1.793 ms / 100) 1.829 -> 1.831 ( +0.11%) [ +0.16% +0.05% +0.00% / +0.11% +0.44% +0.55%] index_add_ strided 3 : Elapsed 0.018 ms (1.832 ms / 100) 1.793 -> 1.797 ( +0.22%) [ +0.11% +0.00% +0.11% / +0.22% +0.73% +0.67%] index_copy_ strided 3 : Elapsed 0.018 ms (1.795 ms / 100) 1.825 -> 1.826 ( +0.05%) [ +0.00% +0.11% +0.05% / +0.05% +0.49% +0.44%] index_add_ strided 7 : Elapsed 0.018 ms (1.825 ms / 100) 1.784 -> 1.787 ( +0.17%) [ +0.00% +0.06% +0.11% / +0.17% +0.39% +0.28%] index_copy_ strided 7 : Elapsed 0.018 ms (1.784 ms / 100) 1.835 -> 1.836 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.44% +0.22% +0.05%] index_add_ perm : Elapsed 0.018 ms (1.836 ms / 100) 1.797 -> 1.801 ( +0.22%) [ +0.00% +0.11% +0.22% / +0.22% +0.45% +0.22%] index_copy_ perm : Elapsed 0.018 ms (1.797 ms / 100) 1.836 -> 1.834 ( -0.11%) [ +0.00% +0.33% +0.11% / +0.05% -0.05% -0.11%] index_add_ perm_sorted : Elapsed 0.018 ms (1.836 ms / 100) 1.798 -> 1.801 ( +0.17%) [ +0.00% +0.28% +0.17% / +0.17% +0.22% +0.28%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.798 ms / 100) 8.533 -> 8.535 ( +0.02%) [ +0.08% +0.07% +0.00% / +0.07% +0.02% +0.02%] index_select const : Elapsed 0.085 ms (8.540 ms / 100) 8.591 -> 8.581 ( -0.12%) [ +0.00% +0.06% +0.08% / +0.19% -0.03% -0.12%] index_select wrap : Elapsed 0.086 ms (8.591 ms / 100) 8.568 -> 8.556 ( -0.14%) [ +0.01% +0.00% +0.09% / -0.05% -0.14% +0.13%] index_select linear : Elapsed 0.086 ms (8.569 ms / 100) 8.571 -> 8.565 ( -0.07%) [ +0.00% +0.09% +0.14% / +0.21% +0.12% -0.07%] index_select reverse : Elapsed 0.086 ms (8.571 ms / 100) 8.511 -> 8.533 ( +0.26%) [ +0.00% +0.15% +0.28% / +0.45% +0.32% +0.26%] index_select skip64 : Elapsed 0.085 ms (8.511 ms / 100) 8.520 -> 8.529 ( +0.11%) [ +0.39% +0.00% +0.09% / +0.11% +0.26% +0.18%] index_select skip256 : Elapsed 0.086 ms (8.553 ms / 100) 8.556 -> 8.558 ( +0.02%) [ +0.18% +0.00% +0.16% / +0.25% +0.13% +0.02%] index_select spread : Elapsed 0.086 ms (8.571 ms / 100) 8.594 -> 8.584 ( -0.12%) [ +0.06% +0.00% +0.24% / -0.12% +0.16% +0.19%] index_select strided 3 : Elapsed 0.086 ms (8.599 ms / 100) 8.579 -> 8.584 ( +0.06%) [ +0.00% +0.07% +0.17% / +0.14% +0.24% +0.06%] index_select random : Elapsed 0.086 ms (8.579 ms / 100) 8.570 -> 8.580 ( +0.12%) [ +0.00% +0.05% +0.00% / +0.15% +0.15% +0.12%] index_select random_sorted : Elapsed 0.086 ms (8.570 ms / 100) B = [40, 4, 20, 16] (stride (320, 12800, 16, 1)) A = [40, 4, 5, 16] (stride (16, 640, 2560, 1)) dim = 2 1.851 -> 1.856 ( +0.27%) [ +0.22% +0.16% +0.00% / +0.27% +0.54% +0.81%] index_add_ linear : Elapsed 0.019 ms (1.855 ms / 100) 1.810 -> 1.809 ( -0.06%) [ +0.17% +0.00% +0.06% / -0.06% +0.61% +0.44%] index_copy_ linear : Elapsed 0.018 ms (1.813 ms / 100) 1.853 -> 1.853 ( +0.00%) [ +0.00% +0.27% +0.16% / +0.00% +0.54% +0.54%] index_add_ reverse : Elapsed 0.019 ms (1.853 ms / 100) 1.811 -> 1.813 ( +0.11%) [ +0.00% +0.11% +0.06% / +0.11% +0.44% +0.66%] index_copy_ reverse : Elapsed 0.018 ms (1.811 ms / 100) 1.865 -> 1.867 ( +0.11%) [ +0.16% +0.00% +0.05% / +0.11% +1.07% +1.18%] index_add_ spread : Elapsed 0.019 ms (1.868 ms / 100) 1.822 -> 1.827 ( +0.27%) [ +0.00% +0.05% +0.27% / +0.27% +0.99% +1.04%] index_copy_ spread : Elapsed 0.018 ms (1.822 ms / 100) 1.871 -> 1.871 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.75% +0.69%] index_add_ strided 3 : Elapsed 0.019 ms (1.871 ms / 100) 1.826 -> 1.829 ( +0.16%) [ +0.22% +0.00% +0.22% / +0.16% +0.88% +0.88%] index_copy_ strided 3 : Elapsed 0.018 ms (1.830 ms / 100) 1.855 -> 1.859 ( +0.22%) [ +0.05% +0.00% +0.11% / +0.22% +0.97% +1.13%] index_add_ strided 7 : Elapsed 0.019 ms (1.856 ms / 100) 1.811 -> 1.813 ( +0.11%) [ +0.00% +0.06% +0.17% / +0.11% +1.10% +1.10%] index_copy_ strided 7 : Elapsed 0.018 ms (1.811 ms / 100) 1.856 -> 1.856 ( +0.00%) [ +0.00% +0.11% +0.05% / +0.00% +0.92% +1.02%] index_add_ perm : Elapsed 0.019 ms (1.856 ms / 100) 1.813 -> 1.816 ( +0.17%) [ +0.00% +0.06% +0.22% / +0.17% +0.99% +0.99%] index_copy_ perm : Elapsed 0.018 ms (1.813 ms / 100) 1.857 -> 1.856 ( -0.05%) [ +0.00% +0.11% +0.05% / -0.05% +1.02% +0.86%] index_add_ perm_sorted : Elapsed 0.019 ms (1.857 ms / 100) 1.811 -> 1.815 ( +0.22%) [ +0.00% +0.22% +0.28% / +0.22% +1.27% +1.38%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.811 ms / 100) 8.535 -> 8.536 ( +0.01%) [ +0.11% +0.23% +0.00% / +0.01% +0.11% +0.05%] index_select const : Elapsed 0.085 ms (8.544 ms / 100) 8.582 -> 8.584 ( +0.02%) [ +0.41% +0.00% +0.24% / +0.15% +0.02% +0.03%] index_select wrap : Elapsed 0.086 ms (8.617 ms / 100) 8.581 -> 8.560 ( -0.24%) [ +0.17% +0.00% +0.12% / -0.01% +0.05% -0.24%] index_select linear : Elapsed 0.086 ms (8.596 ms / 100) 8.586 -> 8.571 ( -0.17%) [ +0.22% +0.00% +0.07% / -0.17% +0.07% +0.23%] index_select reverse : Elapsed 0.086 ms (8.605 ms / 100) 8.529 -> 8.528 ( -0.01%) [ +0.08% +0.15% +0.00% / -0.01% +0.09% +0.07%] index_select skip64 : Elapsed 0.085 ms (8.536 ms / 100) 8.520 -> 8.522 ( +0.02%) [ +0.00% +0.18% +0.38% / +0.40% +0.31% +0.02%] index_select skip256 : Elapsed 0.085 ms (8.520 ms / 100) 8.578 -> 8.541 ( -0.43%) [ +0.00% +0.05% +0.07% / -0.26% -0.43% -0.23%] index_select spread : Elapsed 0.086 ms (8.578 ms / 100) 8.594 -> 8.574 ( -0.23%) [ +0.00% +0.01% +0.07% / +0.02% -0.23% +0.07%] index_select strided 3 : Elapsed 0.086 ms (8.594 ms / 100) 8.572 -> 8.588 ( +0.19%) [ +0.00% +0.13% +0.22% / +0.42% +0.19% +0.33%] index_select random : Elapsed 0.086 ms (8.572 ms / 100) 8.567 -> 8.561 ( -0.07%) [ +0.26% +0.12% +0.00% / -0.04% -0.07% +0.14%] index_select random_sorted : Elapsed 0.086 ms (8.589 ms / 100) B = [40, 4, 20, 16] (stride (320, 12800, 16, 1)) A = [40, 4, 5, 16] (stride (1, 40, 2560, 160)) dim = 2 1.937 -> 1.939 ( +0.10%) [ +0.10% +0.00% +0.00% / +0.10% +0.77% +0.77%] index_add_ linear : Elapsed 0.019 ms (1.939 ms / 100) 1.889 -> 1.887 ( -0.11%) [ +0.00% +0.00% +0.05% / -0.11% +0.58% +0.37%] index_copy_ linear : Elapsed 0.019 ms (1.889 ms / 100) 1.940 -> 1.938 ( -0.10%) [ +0.10% +0.10% +0.00% / -0.10% +0.52% +0.72%] index_add_ reverse : Elapsed 0.019 ms (1.942 ms / 100) 1.886 -> 1.886 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.48% +0.37%] index_copy_ reverse : Elapsed 0.019 ms (1.889 ms / 100) 1.956 -> 1.961 ( +0.26%) [ +0.10% +0.15% +0.00% / +0.26% +0.51% +0.36%] index_add_ spread : Elapsed 0.020 ms (1.958 ms / 100) 1.909 -> 1.913 ( +0.21%) [ +0.00% +0.00% +0.00% / +0.21% +0.58% +0.26%] index_copy_ spread : Elapsed 0.019 ms (1.909 ms / 100) 1.946 -> 1.948 ( +0.10%) [ +0.21% +0.00% +0.15% / +0.10% +0.67% +0.72%] index_add_ strided 3 : Elapsed 0.019 ms (1.950 ms / 100) 1.900 -> 1.901 ( +0.05%) [ +0.05% +0.11% +0.00% / +0.05% +0.63% +0.47%] index_copy_ strided 3 : Elapsed 0.019 ms (1.901 ms / 100) 1.944 -> 1.946 ( +0.10%) [ +0.15% +0.26% +0.00% / +0.10% +0.77% +0.82%] index_add_ strided 7 : Elapsed 0.019 ms (1.947 ms / 100) 1.895 -> 1.899 ( +0.21%) [ +0.00% +0.16% +0.05% / +0.21% +0.74% +0.63%] index_copy_ strided 7 : Elapsed 0.019 ms (1.895 ms / 100) 1.947 -> 1.950 ( +0.15%) [ +0.00% +0.10% +0.15% / +0.15% +0.41% +0.21%] index_add_ perm : Elapsed 0.019 ms (1.947 ms / 100) 1.894 -> 1.894 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.37% +0.26%] index_copy_ perm : Elapsed 0.019 ms (1.894 ms / 100) 1.943 -> 1.946 ( +0.15%) [ +0.26% +0.00% +0.15% / +0.15% +0.62% +0.41%] index_add_ perm_sorted : Elapsed 0.019 ms (1.948 ms / 100) 1.891 -> 1.890 ( -0.05%) [ +0.11% +0.21% +0.00% / -0.05% +0.26% +0.42%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.893 ms / 100) 8.516 -> 8.507 ( -0.11%) [ +0.19% +0.08% +0.00% / -0.11% +0.42% -0.05%] index_select const : Elapsed 0.085 ms (8.532 ms / 100) 8.540 -> 8.544 ( +0.05%) [ +0.00% +0.44% +0.12% / +0.05% +0.20% +0.25%] index_select wrap : Elapsed 0.085 ms (8.540 ms / 100) 8.538 -> 8.545 ( +0.08%) [ +0.06% +0.13% +0.00% / +0.36% +0.08% +0.13%] index_select linear : Elapsed 0.085 ms (8.543 ms / 100) 8.533 -> 8.543 ( +0.12%) [ +0.05% +0.15% +0.00% / +0.25% +0.12% +0.26%] index_select reverse : Elapsed 0.085 ms (8.537 ms / 100) 8.528 -> 8.513 ( -0.18%) [ +0.05% +0.00% +0.18% / +0.06% -0.13% -0.18%] index_select skip64 : Elapsed 0.085 ms (8.532 ms / 100) 8.517 -> 8.517 ( +0.00%) [ +0.00% +0.05% +0.41% / +0.04% +0.27% +0.00%] index_select skip256 : Elapsed 0.085 ms (8.517 ms / 100) 8.547 -> 8.544 ( -0.04%) [ +0.01% +0.02% +0.00% / -0.04% +0.12% +0.00%] index_select spread : Elapsed 0.085 ms (8.548 ms / 100) 8.554 -> 8.540 ( -0.16%) [ +0.11% +0.01% +0.00% / -0.12% -0.16% +0.01%] index_select strided 3 : Elapsed 0.086 ms (8.563 ms / 100) 8.543 -> 8.549 ( +0.07%) [ +0.13% +0.00% +0.00% / +0.19% +0.08% +0.07%] index_select random : Elapsed 0.086 ms (8.554 ms / 100) 8.553 -> 8.552 ( -0.01%) [ +0.09% +0.01% +0.00% / +0.09% -0.01% +0.21%] index_select random_sorted : Elapsed 0.086 ms (8.561 ms / 100) B = [40, 4, 20, 16] (stride (16, 12800, 640, 1)) A = [40, 4, 5, 16] (stride (20, 1, 4, 800)) dim = 2 1.883 -> 1.898 ( +0.80%) [ +0.11% +0.00% +0.48% / +0.80% +1.43% +1.43%] index_add_ linear : Elapsed 0.019 ms (1.885 ms / 100) 1.832 -> 1.843 ( +0.60%) [ +0.00% +0.00% +0.66% / +0.60% +1.64% +1.97%] index_copy_ linear : Elapsed 0.018 ms (1.832 ms / 100) 1.882 -> 1.895 ( +0.69%) [ +0.00% +0.00% +0.64% / +0.69% +1.59% +1.54%] index_add_ reverse : Elapsed 0.019 ms (1.882 ms / 100) 1.832 -> 1.848 ( +0.87%) [ +0.33% +0.00% +0.82% / +0.87% +1.69% +1.86%] index_copy_ reverse : Elapsed 0.018 ms (1.838 ms / 100) 1.873 -> 1.899 ( +1.39%) [ +0.00% +0.05% +1.33% / +1.39% +1.76% +1.92%] index_add_ spread : Elapsed 0.019 ms (1.873 ms / 100) 1.822 -> 1.849 ( +1.48%) [ +0.00% +0.11% +1.43% / +1.48% +2.03% +1.98%] index_copy_ spread : Elapsed 0.018 ms (1.822 ms / 100) 1.871 -> 1.892 ( +1.12%) [ +0.00% +0.05% +1.07% / +1.12% +2.35% +2.46%] index_add_ strided 3 : Elapsed 0.019 ms (1.871 ms / 100) 1.820 -> 1.847 ( +1.48%) [ +0.00% +0.33% +1.26% / +1.48% +2.53% +2.58%] index_copy_ strided 3 : Elapsed 0.018 ms (1.820 ms / 100) 1.891 -> 1.899 ( +0.42%) [ +0.16% +0.00% +0.63% / +0.42% +1.48% +1.69%] index_add_ strided 7 : Elapsed 0.019 ms (1.894 ms / 100) 1.843 -> 1.847 ( +0.22%) [ +0.00% +0.00% +0.38% / +0.22% +1.47% +1.57%] index_copy_ strided 7 : Elapsed 0.018 ms (1.843 ms / 100) 1.893 -> 1.899 ( +0.32%) [ +0.00% +0.05% +0.11% / +0.32% +1.16% +1.32%] index_add_ perm : Elapsed 0.019 ms (1.893 ms / 100) 1.845 -> 1.851 ( +0.33%) [ +0.00% +0.05% +0.22% / +0.33% +1.25% +1.52%] index_copy_ perm : Elapsed 0.018 ms (1.845 ms / 100) 1.877 -> 1.896 ( +1.01%) [ +0.11% +0.00% +1.01% / +1.01% +2.02% +2.18%] index_add_ perm_sorted : Elapsed 0.019 ms (1.879 ms / 100) 1.829 -> 1.848 ( +1.04%) [ +0.00% +0.00% +1.04% / +1.04% +2.35% +2.24%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.829 ms / 100) 8.543 -> 8.550 ( +0.08%) [ +0.00% +0.29% +0.23% / +0.64% +0.09% +0.08%] index_select const : Elapsed 0.085 ms (8.543 ms / 100) 8.573 -> 8.559 ( -0.16%) [ +0.00% +0.02% +0.05% / +0.14% -0.16% +0.13%] index_select wrap : Elapsed 0.086 ms (8.573 ms / 100) 8.569 -> 8.554 ( -0.18%) [ +0.11% +0.00% +0.12% / +0.22% -0.18% -0.01%] index_select linear : Elapsed 0.086 ms (8.578 ms / 100) 8.586 -> 8.574 ( -0.14%) [ +0.00% +0.09% +0.15% / -0.07% -0.14% +0.01%] index_select reverse : Elapsed 0.086 ms (8.586 ms / 100) 8.557 -> 8.552 ( -0.06%) [ +0.20% +0.00% +0.23% / +0.21% -0.06% -0.05%] index_select skip64 : Elapsed 0.086 ms (8.574 ms / 100) 8.568 -> 8.542 ( -0.30%) [ +0.16% +0.00% +0.08% / -0.09% -0.30% -0.25%] index_select skip256 : Elapsed 0.086 ms (8.582 ms / 100) 8.580 -> 8.582 ( +0.02%) [ +0.17% +0.00% +0.24% / +0.31% +0.05% +0.02%] index_select spread : Elapsed 0.086 ms (8.595 ms / 100) 8.581 -> 8.564 ( -0.20%) [ +0.02% +0.28% +0.00% / -0.01% +0.01% -0.20%] index_select strided 3 : Elapsed 0.086 ms (8.583 ms / 100) 8.572 -> 8.565 ( -0.08%) [ +0.28% +0.00% +0.05% / +0.12% -0.03% -0.08%] index_select random : Elapsed 0.086 ms (8.596 ms / 100) 8.581 -> 8.585 ( +0.05%) [ +0.14% +0.06% +0.00% / +0.08% +0.38% +0.05%] index_select random_sorted : Elapsed 0.086 ms (8.593 ms / 100) B = [40, 4, 20, 16] (stride (20, 12800, 1, 800)) dim = 2 fill_cnt = 5 0.951 -> 0.952 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.63% +0.53%] index_fill_ const : Elapsed 0.010 ms (0.952 ms / 100) 0.951 -> 0.957 ( +0.63%) [ +0.11% +0.21% +0.00% / +0.63% +0.74% +0.95%] index_fill_ linear : Elapsed 0.010 ms (0.952 ms / 100) 0.953 -> 0.953 ( +0.00%) [ +0.21% +0.10% +0.00% / +0.00% +0.21% +0.31%] index_fill_ reverse : Elapsed 0.010 ms (0.955 ms / 100) 0.952 -> 0.952 ( +0.00%) [ +0.00% +0.21% +0.00% / +0.00% +0.42% +0.00%] index_fill_ skip64 : Elapsed 0.010 ms (0.952 ms / 100) 0.951 -> 0.952 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.11% +0.32%] index_fill_ skip256 : Elapsed 0.010 ms (0.951 ms / 100) 0.958 -> 0.958 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.00% +0.10%] index_fill_ spread : Elapsed 0.010 ms (0.958 ms / 100) 0.959 -> 0.957 ( -0.21%) [ +0.00% +0.00% +0.00% / -0.21% +0.10% +0.42%] index_fill_ strided 3 : Elapsed 0.010 ms (0.959 ms / 100) 0.958 -> 0.957 ( -0.10%) [ +0.00% +0.21% +0.00% / -0.10% +0.10% +0.21%] index_fill_ strided 5 : Elapsed 0.010 ms (0.958 ms / 100) 0.957 -> 0.958 ( +0.10%) [ +0.10% +0.31% +0.00% / +0.10% +0.21% +0.21%] index_fill_ strided 7 : Elapsed 0.010 ms (0.958 ms / 100) 0.958 -> 0.959 ( +0.10%) [ +0.00% +0.21% +0.10% / +0.10% +0.10% +0.10%] index_fill_ strided 8 : Elapsed 0.010 ms (0.958 ms / 100) 0.958 -> 0.958 ( +0.00%) [ +0.00% +0.10% +0.10% / +0.00% +0.21% +0.00%] index_fill_ strided 16 : Elapsed 0.010 ms (0.958 ms / 100) 0.956 -> 0.958 ( +0.21%) [ +0.00% +0.31% +0.21% / +0.42% +0.21% +0.31%] index_fill_ random : Elapsed 0.010 ms (0.956 ms / 100) 0.958 -> 0.959 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.10% +0.10%] index_fill_ random_sorted : Elapsed 0.010 ms (0.959 ms / 100) 0.955 -> 0.957 ( +0.21%) [ +0.00% +0.00% +0.21% / +0.21% +0.42% +0.31%] index_fill_ perm : Elapsed 0.010 ms (0.955 ms / 100) 0.953 -> 0.957 ( +0.42%) [ +0.00% +0.00% +0.31% / +0.42% +0.52% +0.52%] index_fill_ perm_sorted : Elapsed 0.010 ms (0.953 ms / 100) B = [40, 4, 20, 16] (stride (20, 12800, 1, 800)) A = [40, 4, 5, 16] (stride (1, 200, 40, 800)) dim = 2 1.999 -> 1.999 ( +0.00%) [ +0.40% +0.15% +0.00% / +0.15% +0.00% +0.10%] index_add_ linear : Elapsed 0.020 ms (2.007 ms / 100) 1.956 -> 1.960 ( +0.20%) [ +0.10% +0.15% +0.00% / +0.20% +0.36% +0.20%] index_copy_ linear : Elapsed 0.020 ms (1.958 ms / 100) 1.998 -> 1.986 ( -0.60%) [ +0.05% +0.00% +0.00% / -0.05% -0.60% -0.40%] index_add_ reverse : Elapsed 0.020 ms (1.999 ms / 100) 1.953 -> 1.950 ( -0.15%) [ +0.31% +0.31% +0.00% / +0.15% +0.00% -0.15%] index_copy_ reverse : Elapsed 0.020 ms (1.959 ms / 100) 2.031 -> 2.019 ( -0.59%) [ +0.30% +0.05% +0.00% / +0.25% -0.39% -0.59%] index_add_ spread : Elapsed 0.020 ms (2.037 ms / 100) 1.990 -> 1.987 ( -0.15%) [ +0.05% +0.00% +0.30% / +0.10% -0.15% +0.20%] index_copy_ spread : Elapsed 0.020 ms (1.991 ms / 100) 2.019 -> 2.015 ( -0.20%) [ +0.20% +0.15% +0.00% / +0.05% -0.20% +0.15%] index_add_ strided 3 : Elapsed 0.020 ms (2.023 ms / 100) 1.986 -> 1.983 ( -0.15%) [ +0.15% +0.15% +0.00% / -0.10% -0.05% -0.15%] index_copy_ strided 3 : Elapsed 0.020 ms (1.989 ms / 100) 2.027 -> 2.028 ( +0.05%) [ +0.05% +0.00% +0.25% / +0.44% +0.05% +0.10%] index_add_ strided 7 : Elapsed 0.020 ms (2.028 ms / 100) 1.994 -> 1.987 ( -0.35%) [ +0.00% +0.10% +0.25% / +0.10% -0.20% -0.35%] index_copy_ strided 7 : Elapsed 0.020 ms (1.994 ms / 100) 2.021 -> 2.020 ( -0.05%) [ +0.15% +0.15% +0.00% / +0.25% +0.15% -0.05%] index_add_ perm : Elapsed 0.020 ms (2.024 ms / 100) 1.984 -> 1.982 ( -0.10%) [ +0.00% +0.00% +0.15% / +0.10% +0.00% -0.10%] index_copy_ perm : Elapsed 0.020 ms (1.984 ms / 100) 2.012 -> 2.009 ( -0.15%) [ +0.20% +0.00% +0.00% / -0.15% -0.10% -0.15%] index_add_ perm_sorted : Elapsed 0.020 ms (2.016 ms / 100) 1.972 -> 1.975 ( +0.15%) [ +0.05% +0.00% +0.25% / +0.20% +0.15% +0.30%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.973 ms / 100) 8.589 -> 8.609 ( +0.23%) [ +0.07% +0.19% +0.00% / +0.23% +0.45% +0.40%] index_select const : Elapsed 0.086 ms (8.595 ms / 100) 8.628 -> 8.627 ( -0.01%) [ +0.02% +0.00% +0.07% / -0.01% +0.17% +0.24%] index_select wrap : Elapsed 0.086 ms (8.630 ms / 100) 8.619 -> 8.618 ( -0.01%) [ +0.00% +0.12% +0.07% / -0.01% +0.32% +0.32%] index_select linear : Elapsed 0.086 ms (8.619 ms / 100) 8.608 -> 8.618 ( +0.12%) [ +0.06% +0.00% +0.21% / +0.12% +0.48% +0.60%] index_select reverse : Elapsed 0.086 ms (8.613 ms / 100) 8.595 -> 8.590 ( -0.06%) [ +0.10% +0.07% +0.00% / -0.06% +0.34% +0.24%] index_select skip64 : Elapsed 0.086 ms (8.604 ms / 100) 8.598 -> 8.604 ( +0.07%) [ +0.00% +0.00% +0.10% / +0.07% +0.28% +0.19%] index_select skip256 : Elapsed 0.086 ms (8.598 ms / 100) 8.621 -> 8.640 ( +0.22%) [ +0.00% +0.19% +0.06% / +0.28% +0.22% +0.29%] index_select spread : Elapsed 0.086 ms (8.621 ms / 100) 8.625 -> 8.613 ( -0.14%) [ +0.28% +0.00% +0.13% / -0.14% +0.29% +0.17%] index_select strided 3 : Elapsed 0.086 ms (8.649 ms / 100) 8.622 -> 8.625 ( +0.03%) [ +0.00% +0.07% +0.14% / +0.03% +0.32% +0.46%] index_select random : Elapsed 0.086 ms (8.622 ms / 100) 8.624 -> 8.639 ( +0.17%) [ +0.00% +0.20% +0.01% / +0.17% +0.32% +0.49%] index_select random_sorted : Elapsed 0.086 ms (8.624 ms / 100) B = [40, 4, 20, 16] (stride (1, 800, 40, 3200)) A = [40, 4, 5, 16] (stride (320, 5, 1, 20)) dim = 2 1.818 -> 1.821 ( +0.17%) [ +0.00% +0.11% +0.00% / +0.17% +1.16% +1.16%] index_add_ linear : Elapsed 0.018 ms (1.818 ms / 100) 1.775 -> 1.777 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +1.13% +1.24%] index_copy_ linear : Elapsed 0.018 ms (1.777 ms / 100) 1.822 -> 1.820 ( -0.11%) [ +0.00% +0.05% +0.00% / -0.11% +0.82% +0.82%] index_add_ reverse : Elapsed 0.018 ms (1.822 ms / 100) 1.778 -> 1.779 ( +0.06%) [ +0.00% +0.17% +0.06% / +0.06% +0.96% +0.79%] index_copy_ reverse : Elapsed 0.018 ms (1.778 ms / 100) 1.832 -> 1.839 ( +0.38%) [ +0.00% +0.11% +0.05% / +0.38% +0.93% +1.15%] index_add_ spread : Elapsed 0.018 ms (1.832 ms / 100) 1.790 -> 1.789 ( -0.06%) [ +0.00% +0.22% +0.17% / -0.06% +0.95% +1.06%] index_copy_ spread : Elapsed 0.018 ms (1.790 ms / 100) 1.837 -> 1.836 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.71% +0.76%] index_add_ strided 3 : Elapsed 0.018 ms (1.837 ms / 100) 1.797 -> 1.797 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_copy_ strided 3 : Elapsed 0.018 ms (1.797 ms / 100) 1.826 -> 1.826 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +1.04% +1.31%] index_add_ strided 7 : Elapsed 0.018 ms (1.827 ms / 100) 1.784 -> 1.784 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +1.12% +1.29%] index_copy_ strided 7 : Elapsed 0.018 ms (1.785 ms / 100) 1.837 -> 1.835 ( -0.11%) [ +0.00% +0.05% +0.05% / -0.11% +0.76% +0.82%] index_add_ perm : Elapsed 0.018 ms (1.837 ms / 100) 1.795 -> 1.797 ( +0.11%) [ +0.00% +0.06% +0.11% / +0.11% +0.78% +0.78%] index_copy_ perm : Elapsed 0.018 ms (1.795 ms / 100) 1.834 -> 1.833 ( -0.05%) [ +0.11% +0.00% +0.05% / -0.05% +0.93% +0.65%] index_add_ perm_sorted : Elapsed 0.018 ms (1.836 ms / 100) 1.796 -> 1.797 ( +0.06%) [ +0.11% +0.11% +0.00% / +0.06% +0.61% +0.56%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.798 ms / 100) 8.564 -> 8.573 ( +0.11%) [ +0.02% +0.05% +0.00% / +0.23% +0.21% +0.11%] index_select const : Elapsed 0.086 ms (8.566 ms / 100) 8.543 -> 8.559 ( +0.19%) [ +0.00% +0.25% +0.15% / +0.19% +0.22% +0.61%] index_select wrap : Elapsed 0.085 ms (8.543 ms / 100) 8.561 -> 8.570 ( +0.11%) [ +0.04% +0.13% +0.00% / +0.11% +0.11% +0.26%] index_select linear : Elapsed 0.086 ms (8.564 ms / 100) 8.561 -> 8.567 ( +0.07%) [ +0.18% +0.20% +0.00% / +0.07% +0.09% +0.25%] index_select reverse : Elapsed 0.086 ms (8.576 ms / 100) 8.552 -> 8.565 ( +0.15%) [ +0.26% +0.25% +0.00% / +0.47% +0.26% +0.15%] index_select skip64 : Elapsed 0.086 ms (8.574 ms / 100) 8.557 -> 8.567 ( +0.12%) [ +0.26% +0.00% +0.26% / +0.12% +0.21% +0.41%] index_select skip256 : Elapsed 0.086 ms (8.579 ms / 100) 8.556 -> 8.560 ( +0.05%) [ +0.22% +0.00% +0.12% / +0.05% +0.20% +0.36%] index_select spread : Elapsed 0.086 ms (8.575 ms / 100) 8.566 -> 8.552 ( -0.16%) [ +0.32% +0.00% +0.08% / -0.16% -0.01% +0.19%] index_select strided 3 : Elapsed 0.086 ms (8.593 ms / 100) 8.566 -> 8.551 ( -0.18%) [ +0.07% +0.11% +0.00% / -0.18% -0.02% +0.13%] index_select random : Elapsed 0.086 ms (8.572 ms / 100) 8.550 -> 8.561 ( +0.13%) [ +0.14% +0.14% +0.00% / +0.13% +0.27% +0.32%] index_select random_sorted : Elapsed 0.086 ms (8.562 ms / 100) B = [40, 4, 20, 16] (stride (1, 800, 40, 3200)) A = [40, 4, 5, 16] (stride (1, 3200, 40, 200)) dim = 2 1.922 -> 1.924 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.47% +0.57%] index_add_ linear : Elapsed 0.019 ms (1.923 ms / 100) 1.876 -> 1.876 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.00% +0.37% +0.48%] index_copy_ linear : Elapsed 0.019 ms (1.876 ms / 100) 1.911 -> 1.914 ( +0.16%) [ +0.31% +0.10% +0.00% / +0.16% +0.52% +0.58%] index_add_ reverse : Elapsed 0.019 ms (1.917 ms / 100) 1.867 -> 1.867 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.54% +0.48%] index_copy_ reverse : Elapsed 0.019 ms (1.868 ms / 100) 1.946 -> 1.940 ( -0.31%) [ +0.00% +0.21% +0.21% / -0.31% +0.10% +0.36%] index_add_ spread : Elapsed 0.019 ms (1.946 ms / 100) 1.890 -> 1.894 ( +0.21%) [ +0.00% +0.32% +0.16% / +0.21% +0.69% +0.90%] index_copy_ spread : Elapsed 0.019 ms (1.890 ms / 100) 1.943 -> 1.942 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.26% +0.21%] index_add_ strided 3 : Elapsed 0.019 ms (1.943 ms / 100) 1.894 -> 1.896 ( +0.11%) [ +0.11% +0.16% +0.00% / +0.16% +0.11% +0.16%] index_copy_ strided 3 : Elapsed 0.019 ms (1.896 ms / 100) 1.940 -> 1.940 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.67% +0.72%] index_add_ strided 7 : Elapsed 0.019 ms (1.941 ms / 100) 1.888 -> 1.890 ( +0.11%) [ +0.21% +0.05% +0.00% / +0.11% +0.85% +0.74%] index_copy_ strided 7 : Elapsed 0.019 ms (1.892 ms / 100) 1.943 -> 1.946 ( +0.15%) [ +0.15% +0.10% +0.00% / +0.15% +0.62% +0.57%] index_add_ perm : Elapsed 0.019 ms (1.946 ms / 100) 1.893 -> 1.897 ( +0.21%) [ +0.05% +0.00% +0.11% / +0.21% +0.74% +0.63%] index_copy_ perm : Elapsed 0.019 ms (1.894 ms / 100) 1.935 -> 1.935 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.72% +0.57%] index_add_ perm_sorted : Elapsed 0.019 ms (1.935 ms / 100) 1.887 -> 1.887 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.53% +0.48%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.888 ms / 100) 8.531 -> 8.539 ( +0.09%) [ +0.13% +0.00% +0.11% / +0.19% +0.14% +0.09%] index_select const : Elapsed 0.085 ms (8.542 ms / 100) 8.554 -> 8.553 ( -0.01%) [ +0.20% +0.01% +0.00% / -0.01% +0.13% +0.00%] index_select wrap : Elapsed 0.086 ms (8.571 ms / 100) 8.547 -> 8.547 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.35% +0.43%] index_select linear : Elapsed 0.086 ms (8.560 ms / 100) 8.547 -> 8.564 ( +0.20%) [ +0.07% +0.00% +0.11% / +0.23% +0.20% +0.50%] index_select reverse : Elapsed 0.086 ms (8.553 ms / 100) 8.534 -> 8.528 ( -0.07%) [ +0.02% +0.14% +0.00% / -0.07% +0.39% +0.42%] index_select skip64 : Elapsed 0.085 ms (8.536 ms / 100) 8.533 -> 8.543 ( +0.12%) [ +0.13% +0.13% +0.00% / +0.12% +0.20% +0.18%] index_select skip256 : Elapsed 0.085 ms (8.544 ms / 100) 8.565 -> 8.569 ( +0.05%) [ +0.15% +0.09% +0.00% / +0.05% +0.22% +0.22%] index_select spread : Elapsed 0.086 ms (8.578 ms / 100) 8.549 -> 8.547 ( -0.02%) [ +0.02% +0.00% +0.06% / -0.02% +0.16% +0.12%] index_select strided 3 : Elapsed 0.086 ms (8.551 ms / 100) 8.565 -> 8.562 ( -0.04%) [ +0.01% +0.22% +0.00% / -0.04% +0.11% +0.08%] index_select random : Elapsed 0.086 ms (8.566 ms / 100) 8.569 -> 8.562 ( -0.08%) [ +0.07% +0.28% +0.00% / -0.08% +0.30% +0.11%] index_select random_sorted : Elapsed 0.086 ms (8.575 ms / 100) B = [40, 4, 20, 16] (stride (4, 1, 160, 3200)) A = [40, 4, 5, 16] (stride (320, 80, 1, 5)) dim = 2 0.667 -> 0.668 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.75% +0.90%] index_add_ linear : Elapsed 0.007 ms (0.668 ms / 100) 0.684 -> 0.684 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +1.02% +0.88%] index_copy_ linear : Elapsed 0.007 ms (0.684 ms / 100) 0.673 -> 0.667 ( -0.89%) [ +0.15% +0.15% +0.00% / +0.00% -0.59% -0.89%] index_add_ reverse : Elapsed 0.007 ms (0.674 ms / 100) 0.690 -> 0.684 ( -0.87%) [ +0.00% +0.14% +0.00% / +0.00% -0.72% -0.87%] index_copy_ reverse : Elapsed 0.007 ms (0.690 ms / 100) 0.676 -> 0.671 ( -0.74%) [ +0.30% +0.30% +0.00% / +0.15% -0.74% -0.74%] index_add_ spread : Elapsed 0.007 ms (0.678 ms / 100) 0.698 -> 0.693 ( -0.72%) [ +0.00% +0.00% +0.00% / +0.00% -0.72% -0.72%] index_copy_ spread : Elapsed 0.007 ms (0.698 ms / 100) 0.667 -> 0.669 ( +0.30%) [ +0.00% +0.15% +0.00% / +0.30% +0.45% +0.45%] index_add_ strided 3 : Elapsed 0.007 ms (0.667 ms / 100) 0.686 -> 0.686 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.15% +0.15%] index_copy_ strided 3 : Elapsed 0.007 ms (0.686 ms / 100) 0.672 -> 0.673 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.60% +0.45%] index_add_ strided 7 : Elapsed 0.007 ms (0.672 ms / 100) 0.695 -> 0.697 ( +0.29%) [ +0.14% +0.00% +0.00% / +0.29% +0.58% +0.43%] index_copy_ strided 7 : Elapsed 0.007 ms (0.696 ms / 100) 0.667 -> 0.667 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +1.20% +1.05%] index_add_ perm : Elapsed 0.007 ms (0.667 ms / 100) 0.688 -> 0.688 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.87% +0.87%] index_copy_ perm : Elapsed 0.007 ms (0.689 ms / 100) 0.667 -> 0.667 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +1.05% +1.20%] index_add_ perm_sorted : Elapsed 0.007 ms (0.668 ms / 100) 0.687 -> 0.686 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.73% +0.73%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.687 ms / 100) 4.922 -> 4.909 ( -0.26%) [ +0.04% +0.00% +0.06% / +0.12% -0.04% -0.26%] index_select const : Elapsed 0.049 ms (4.924 ms / 100) 4.909 -> 4.912 ( +0.06%) [ +0.00% +0.06% +0.22% / +0.12% +0.06% +0.31%] index_select wrap : Elapsed 0.049 ms (4.909 ms / 100) 4.913 -> 4.910 ( -0.06%) [ +0.00% +0.00% +0.08% / -0.06% +0.02% +0.14%] index_select linear : Elapsed 0.049 ms (4.913 ms / 100) 4.910 -> 4.904 ( -0.12%) [ +0.06% +0.10% +0.00% / +0.14% -0.12% +0.14%] index_select reverse : Elapsed 0.049 ms (4.913 ms / 100) 4.910 -> 4.916 ( +0.12%) [ +0.10% +0.20% +0.00% / +0.12% +0.39% +0.20%] index_select skip64 : Elapsed 0.049 ms (4.915 ms / 100) 4.913 -> 4.911 ( -0.04%) [ +0.00% +0.08% +0.08% / -0.04% +0.14% +0.18%] index_select skip256 : Elapsed 0.049 ms (4.913 ms / 100) 4.909 -> 4.904 ( -0.10%) [ +0.29% +0.00% +0.02% / -0.10% +0.18% -0.10%] index_select spread : Elapsed 0.049 ms (4.923 ms / 100) 4.907 -> 4.916 ( +0.18%) [ +0.22% +0.00% +0.04% / +0.29% +0.18% +0.39%] index_select strided 3 : Elapsed 0.049 ms (4.918 ms / 100) 4.911 -> 4.913 ( +0.04%) [ +0.02% +0.08% +0.00% / +0.16% +0.04% +0.12%] index_select random : Elapsed 0.049 ms (4.912 ms / 100) 4.907 -> 4.901 ( -0.12%) [ +0.31% +0.00% +0.20% / -0.12% +0.26% +0.10%] index_select random_sorted : Elapsed 0.049 ms (4.922 ms / 100) out_shape = [40, 4, 5, 20] in_shape = [40, 4, 5, 16] idx_dim = 3 B = [40, 4, 5, 20] (stride (400, 5, 1, 20)) A = [40, 4, 5, 16] (stride (16, 640, 2560, 1)) dim = 3 4.056 -> 4.063 ( +0.17%) [ +0.00% +0.37% +0.37% / +0.17% +0.69% +0.86%] index_add_ linear : Elapsed 0.041 ms (4.056 ms / 100) 3.885 -> 3.888 ( +0.08%) [ +0.00% +0.28% +0.15% / +0.08% +0.98% +1.03%] index_copy_ linear : Elapsed 0.039 ms (3.885 ms / 100) 4.065 -> 4.072 ( +0.17%) [ +0.17% +0.07% +0.00% / +0.17% +0.59% +0.84%] index_add_ reverse : Elapsed 0.041 ms (4.072 ms / 100) 3.885 -> 3.899 ( +0.36%) [ +0.10% +0.15% +0.00% / +0.36% +1.00% +0.90%] index_copy_ reverse : Elapsed 0.039 ms (3.889 ms / 100) 4.044 -> 4.054 ( +0.25%) [ +0.25% +0.12% +0.00% / +0.25% +0.89% +1.06%] index_add_ spread : Elapsed 0.041 ms (4.054 ms / 100) 3.878 -> 3.887 ( +0.23%) [ +0.10% +0.05% +0.00% / +0.23% +1.01% +1.19%] index_copy_ spread : Elapsed 0.039 ms (3.882 ms / 100) 4.051 -> 4.053 ( +0.05%) [ +0.10% +0.07% +0.00% / +0.05% +0.72% +0.59%] index_add_ strided 3 : Elapsed 0.041 ms (4.055 ms / 100) 3.884 -> 3.884 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.98% +0.64%] index_copy_ strided 3 : Elapsed 0.039 ms (3.884 ms / 100) 4.055 -> 4.058 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.52% +0.67%] index_add_ strided 7 : Elapsed 0.041 ms (4.055 ms / 100) 3.886 -> 3.887 ( +0.03%) [ +0.00% +0.08% +0.03% / +0.03% +0.57% +1.06%] index_copy_ strided 7 : Elapsed 0.039 ms (3.886 ms / 100) 4.049 -> 4.054 ( +0.12%) [ +0.07% +0.27% +0.00% / +0.12% +0.62% +0.42%] index_add_ perm : Elapsed 0.041 ms (4.052 ms / 100) 3.879 -> 3.889 ( +0.26%) [ +0.00% +0.26% +0.08% / +0.26% +0.70% +0.49%] index_copy_ perm : Elapsed 0.039 ms (3.879 ms / 100) 4.066 -> 4.076 ( +0.25%) [ +0.05% +0.00% +0.00% / +0.25% +0.64% +0.64%] index_add_ perm_sorted : Elapsed 0.041 ms (4.068 ms / 100) 3.887 -> 3.892 ( +0.13%) [ +0.13% +0.03% +0.00% / +0.13% +0.67% +0.77%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.892 ms / 100) 5.477 -> 5.470 ( -0.13%) [ +0.04% +0.04% +0.00% / +0.02% -0.13% +0.07%] index_select const : Elapsed 0.055 ms (5.479 ms / 100) 5.485 -> 5.483 ( -0.04%) [ +0.07% +0.02% +0.00% / +0.04% -0.04% +0.02%] index_select wrap : Elapsed 0.055 ms (5.489 ms / 100) 5.479 -> 5.485 ( +0.11%) [ +0.18% +0.15% +0.00% / +0.11% +0.13% +0.15%] index_select linear : Elapsed 0.055 ms (5.489 ms / 100) 5.487 -> 5.487 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +0.07% +0.09%] index_select reverse : Elapsed 0.055 ms (5.487 ms / 100) 5.472 -> 5.471 ( -0.02%) [ +0.00% +0.04% +0.00% / +0.05% -0.02% +0.15%] index_select skip64 : Elapsed 0.055 ms (5.472 ms / 100) 5.474 -> 5.475 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.13% +0.09%] index_select skip256 : Elapsed 0.055 ms (5.474 ms / 100) 5.481 -> 5.482 ( +0.02%) [ +0.18% +0.00% +0.02% / +0.09% +0.11% +0.02%] index_select spread : Elapsed 0.055 ms (5.491 ms / 100) 5.490 -> 5.482 ( -0.15%) [ +0.00% +0.02% +0.09% / -0.11% -0.15% -0.13%] index_select strided 3 : Elapsed 0.055 ms (5.490 ms / 100) 5.486 -> 5.486 ( +0.00%) [ +0.09% +0.00% +0.05% / +0.07% +0.00% +0.00%] index_select strided 5 : Elapsed 0.055 ms (5.491 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.18% +0.04% +0.00% / +0.11% +0.00% +0.00%] index_select strided 7 : Elapsed 0.055 ms (5.495 ms / 100) 5.488 -> 5.481 ( -0.13%) [ +0.02% +0.00% +0.05% / -0.09% -0.05% -0.13%] index_select strided 8 : Elapsed 0.055 ms (5.489 ms / 100) 5.476 -> 5.479 ( +0.05%) [ +0.00% +0.09% +0.15% / +0.16% +0.05% +0.11%] index_select random : Elapsed 0.055 ms (5.476 ms / 100) 5.486 -> 5.481 ( -0.09%) [ +0.05% +0.02% +0.00% / -0.09% +0.04% +0.05%] index_select random_sorted : Elapsed 0.055 ms (5.489 ms / 100) B = [40, 4, 5, 20] (stride (1, 800, 3200, 40)) A = [40, 4, 5, 16] (stride (1, 3200, 640, 40)) dim = 3 3.973 -> 3.973 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.73%] index_add_ linear : Elapsed 0.040 ms (3.973 ms / 100) 3.810 -> 3.811 ( +0.03%) [ +0.00% +0.03% +0.08% / +0.03% +0.79% +0.81%] index_copy_ linear : Elapsed 0.038 ms (3.810 ms / 100) 3.960 -> 3.960 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.76% +0.73%] index_add_ reverse : Elapsed 0.040 ms (3.960 ms / 100) 3.808 -> 3.808 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.71% +0.74%] index_copy_ reverse : Elapsed 0.038 ms (3.808 ms / 100) 3.960 -> 3.961 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.71% +0.68%] index_add_ spread : Elapsed 0.040 ms (3.961 ms / 100) 3.804 -> 3.805 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.71% +0.68%] index_copy_ spread : Elapsed 0.038 ms (3.806 ms / 100) 3.982 -> 3.980 ( -0.05%) [ +0.00% +0.03% +0.00% / -0.05% +0.70% +0.73%] index_add_ strided 3 : Elapsed 0.040 ms (3.982 ms / 100) 3.824 -> 3.825 ( +0.03%) [ +0.13% +0.05% +0.00% / +0.03% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.038 ms (3.829 ms / 100) 3.961 -> 3.960 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.73% +0.73%] index_add_ strided 7 : Elapsed 0.040 ms (3.962 ms / 100) 3.808 -> 3.808 ( +0.00%) [ +0.00% +0.05% +0.08% / +0.00% +0.76% +0.74%] index_copy_ strided 7 : Elapsed 0.038 ms (3.808 ms / 100) 3.973 -> 3.973 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.76% +0.76%] index_add_ perm : Elapsed 0.040 ms (3.974 ms / 100) 3.812 -> 3.813 ( +0.03%) [ +0.05% +0.00% +0.05% / +0.03% +0.71% +0.73%] index_copy_ perm : Elapsed 0.038 ms (3.814 ms / 100) 3.982 -> 3.983 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.73% +0.75%] index_add_ perm_sorted : Elapsed 0.040 ms (3.982 ms / 100) 3.825 -> 3.825 ( +0.00%) [ +0.00% +0.10% +0.03% / +0.00% +0.76% +0.78%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.825 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.00% +0.16% +0.11% / +0.00% +0.07% +0.15%] index_select const : Elapsed 0.055 ms (5.491 ms / 100) 5.519 -> 5.518 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.05% +0.05%] index_select wrap : Elapsed 0.055 ms (5.519 ms / 100) 5.515 -> 5.516 ( +0.02%) [ +0.09% +0.04% +0.00% / +0.02% +0.18% +0.05%] index_select linear : Elapsed 0.055 ms (5.520 ms / 100) 5.517 -> 5.515 ( -0.04%) [ +0.07% +0.00% +0.09% / -0.04% +0.13% +0.16%] index_select reverse : Elapsed 0.055 ms (5.521 ms / 100) 5.492 -> 5.491 ( -0.02%) [ +0.02% +0.15% +0.00% / -0.02% +0.16% +0.09%] index_select skip64 : Elapsed 0.055 ms (5.493 ms / 100) 5.491 -> 5.494 ( +0.05%) [ +0.00% +0.09% +0.04% / +0.11% +0.07% +0.05%] index_select skip256 : Elapsed 0.055 ms (5.491 ms / 100) 5.513 -> 5.523 ( +0.18%) [ +0.05% +0.00% +0.09% / +0.22% +0.18% +0.18%] index_select spread : Elapsed 0.055 ms (5.516 ms / 100) 5.518 -> 5.518 ( +0.00%) [ +0.02% +0.13% +0.00% / +0.02% +0.00% +0.18%] index_select strided 3 : Elapsed 0.055 ms (5.519 ms / 100) 5.514 -> 5.518 ( +0.07%) [ +0.00% +0.05% +0.07% / +0.16% +0.13% +0.07%] index_select strided 5 : Elapsed 0.055 ms (5.514 ms / 100) 5.514 -> 5.517 ( +0.05%) [ +0.07% +0.11% +0.00% / +0.05% +0.25% +0.13%] index_select strided 7 : Elapsed 0.055 ms (5.518 ms / 100) 5.492 -> 5.494 ( +0.04%) [ +0.07% +0.09% +0.00% / +0.04% +0.24% +0.31%] index_select strided 8 : Elapsed 0.055 ms (5.496 ms / 100) 5.508 -> 5.515 ( +0.13%) [ +0.04% +0.16% +0.00% / +0.13% +0.20% +0.15%] index_select random : Elapsed 0.055 ms (5.510 ms / 100) 5.514 -> 5.506 ( -0.15%) [ +0.00% +0.00% +0.07% / -0.15% -0.05% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.514 ms / 100) B = [40, 4, 5, 20] (stride (20, 5, 1, 800)) A = [40, 4, 5, 16] (stride (320, 80, 16, 1)) dim = 3 1.254 -> 1.253 ( -0.08%) [ +0.08% +0.40% +0.00% / +0.16% +0.00% -0.08%] index_add_ linear : Elapsed 0.013 ms (1.255 ms / 100) 1.210 -> 1.210 ( +0.00%) [ +0.08% +0.41% +0.00% / +0.33% +0.25% +0.00%] index_copy_ linear : Elapsed 0.012 ms (1.211 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.31% +0.31% +0.00% / +0.00% +0.00% +0.00%] index_add_ reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.241 -> 1.241 ( +0.00%) [ +0.08% +0.56% +0.00% / +0.00% +0.32% +0.16%] index_copy_ reverse : Elapsed 0.012 ms (1.242 ms / 100) 1.262 -> 1.261 ( -0.08%) [ +0.16% +0.00% +0.08% / -0.08% +0.16% +0.24%] index_add_ spread : Elapsed 0.013 ms (1.264 ms / 100) 1.221 -> 1.221 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.25% +0.41%] index_copy_ spread : Elapsed 0.012 ms (1.223 ms / 100) 1.252 -> 1.252 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.72% +0.80%] index_add_ strided 3 : Elapsed 0.013 ms (1.254 ms / 100) 1.205 -> 1.207 ( +0.17%) [ +0.25% +0.00% +0.08% / +0.17% +1.16% +0.91%] index_copy_ strided 3 : Elapsed 0.012 ms (1.208 ms / 100) 1.254 -> 1.256 ( +0.16%) [ +0.00% +0.32% +0.00% / +0.16% +0.48% +0.56%] index_add_ strided 7 : Elapsed 0.013 ms (1.254 ms / 100) 1.206 -> 1.209 ( +0.25%) [ +0.08% +0.50% +0.00% / +0.25% +0.91% +0.75%] index_copy_ strided 7 : Elapsed 0.012 ms (1.207 ms / 100) 1.263 -> 1.263 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.08% +0.24%] index_add_ perm : Elapsed 0.013 ms (1.263 ms / 100) 1.224 -> 1.225 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.25% +0.08% +0.08%] index_copy_ perm : Elapsed 0.012 ms (1.224 ms / 100) 1.273 -> 1.269 ( -0.31%) [ +0.08% +0.16% +0.00% / -0.16% -0.31% -0.24%] index_add_ perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) 1.246 -> 1.240 ( -0.48%) [ +0.00% +0.16% +0.00% / -0.48% -0.48% -0.48%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.246 ms / 100) 3.451 -> 3.442 ( -0.26%) [ +0.09% +0.00% +0.09% / -0.12% -0.26% -0.12%] index_select const : Elapsed 0.035 ms (3.454 ms / 100) 3.459 -> 3.457 ( -0.06%) [ +0.00% +0.12% +0.17% / +0.00% -0.06% -0.06%] index_select wrap : Elapsed 0.035 ms (3.459 ms / 100) 3.457 -> 3.456 ( -0.03%) [ +0.03% +0.00% +0.12% / +0.09% +0.03% -0.03%] index_select linear : Elapsed 0.035 ms (3.458 ms / 100) 3.456 -> 3.454 ( -0.06%) [ +0.09% +0.09% +0.00% / -0.03% +0.00% -0.06%] index_select reverse : Elapsed 0.035 ms (3.459 ms / 100) 3.441 -> 3.447 ( +0.17%) [ +0.23% +0.17% +0.00% / +0.32% +0.20% +0.17%] index_select skip64 : Elapsed 0.034 ms (3.449 ms / 100) 3.444 -> 3.436 ( -0.23%) [ +0.03% +0.00% +0.09% / +0.03% -0.23% +0.00%] index_select skip256 : Elapsed 0.034 ms (3.445 ms / 100) 3.467 -> 3.452 ( -0.43%) [ +0.09% +0.00% +0.32% / +0.06% -0.43% -0.43%] index_select spread : Elapsed 0.035 ms (3.470 ms / 100) 3.458 -> 3.453 ( -0.14%) [ +0.03% +0.03% +0.00% / +0.20% -0.03% -0.14%] index_select strided 3 : Elapsed 0.035 ms (3.459 ms / 100) 3.457 -> 3.454 ( -0.09%) [ +0.03% +0.06% +0.00% / +0.14% -0.09% -0.06%] index_select strided 5 : Elapsed 0.035 ms (3.458 ms / 100) 3.455 -> 3.459 ( +0.12%) [ +0.00% +0.12% +0.23% / +0.12% +0.20% +0.12%] index_select strided 7 : Elapsed 0.035 ms (3.455 ms / 100) 3.453 -> 3.455 ( +0.06%) [ +0.23% +0.20% +0.00% / +0.06% +0.14% +0.12%] index_select strided 8 : Elapsed 0.035 ms (3.461 ms / 100) 3.456 -> 3.455 ( -0.03%) [ +0.03% +0.06% +0.00% / -0.03% +0.06% +0.43%] index_select random : Elapsed 0.035 ms (3.457 ms / 100) 3.453 -> 3.454 ( +0.03%) [ +0.17% +0.03% +0.00% / +0.03% +0.38% +0.32%] index_select random_sorted : Elapsed 0.035 ms (3.459 ms / 100) out_shape = [20, 4, 16, 5] in_shape = [40, 4, 16, 5] idx_dim = 0 B = [20, 4, 16, 5] (stride (320, 1, 20, 4)) A = [40, 4, 16, 5] (stride (320, 1, 4, 64)) dim = 0 2.392 -> 2.394 ( +0.08%) [ +0.04% +0.00% +0.17% / +0.08% +0.21% +0.17%] index_select const : Elapsed 0.024 ms (2.393 ms / 100) 2.402 -> 2.400 ( -0.08%) [ +0.00% +0.17% +0.04% / +0.17% +0.08% -0.08%] index_select wrap : Elapsed 0.024 ms (2.402 ms / 100) 2.403 -> 2.401 ( -0.08%) [ +0.00% +0.04% +0.12% / +0.00% -0.08% -0.04%] index_select linear : Elapsed 0.024 ms (2.403 ms / 100) 2.402 -> 2.398 ( -0.17%) [ +0.12% +0.00% +0.04% / +0.08% -0.04% -0.17%] index_select reverse : Elapsed 0.024 ms (2.405 ms / 100) 2.393 -> 2.395 ( +0.08%) [ +0.25% +0.00% +0.17% / +0.08% +0.25% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.399 ms / 100) 2.393 -> 2.396 ( +0.13%) [ +0.08% +0.21% +0.00% / +0.17% +0.13% +0.38%] index_select skip256 : Elapsed 0.024 ms (2.395 ms / 100) 2.400 -> 2.401 ( +0.04%) [ +0.00% +0.17% +0.08% / +0.17% +0.04% +0.13%] index_select spread : Elapsed 0.024 ms (2.400 ms / 100) 2.404 -> 2.402 ( -0.08%) [ +0.08% +0.00% +0.12% / +0.04% -0.08% -0.08%] index_select strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.398 -> 2.398 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.08% +0.00% +0.04%] index_select strided 5 : Elapsed 0.024 ms (2.398 ms / 100) 2.398 -> 2.400 ( +0.08%) [ +0.00% +0.13% +0.04% / +0.08% +0.13% +0.13%] index_select strided 7 : Elapsed 0.024 ms (2.398 ms / 100) 2.393 -> 2.398 ( +0.21%) [ +0.00% +0.04% +0.00% / +0.21% +0.29% +0.46%] index_select strided 8 : Elapsed 0.024 ms (2.393 ms / 100) 2.392 -> 2.395 ( +0.13%) [ +0.33% +0.25% +0.00% / +0.13% +0.25% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.400 ms / 100) 2.396 -> 2.400 ( +0.17%) [ +0.00% +0.13% +0.13% / +0.17% +0.42% +0.25%] index_select random : Elapsed 0.024 ms (2.396 ms / 100) 2.399 -> 2.399 ( +0.00%) [ +0.00% +0.04% +0.13% / +0.08% +0.13% +0.00%] index_select random_sorted : Elapsed 0.024 ms (2.399 ms / 100) 2.399 -> 2.405 ( +0.25%) [ +0.17% +0.04% +0.00% / +0.25% +0.29% +0.29%] index_select perm : Elapsed 0.024 ms (2.403 ms / 100) 2.404 -> 2.396 ( -0.33%) [ +0.08% +0.00% +0.04% / -0.08% -0.29% -0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) B = [20, 4, 16, 5] (stride (320, 1, 4, 64)) A = [40, 4, 16, 5] (stride (1, 3200, 200, 40)) dim = 0 2.364 -> 2.367 ( +0.13%) [ +0.00% +0.00% +0.08% / +0.17% +0.17% +0.13%] index_select const : Elapsed 0.024 ms (2.364 ms / 100) 2.373 -> 2.368 ( -0.21%) [ +0.21% +0.13% +0.00% / +0.04% -0.21% -0.13%] index_select wrap : Elapsed 0.024 ms (2.378 ms / 100) 2.375 -> 2.368 ( -0.29%) [ +0.00% +0.17% +0.08% / +0.04% -0.21% -0.29%] index_select linear : Elapsed 0.024 ms (2.375 ms / 100) 2.373 -> 2.373 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.08% +0.00% +0.04%] index_select reverse : Elapsed 0.024 ms (2.373 ms / 100) 2.368 -> 2.366 ( -0.08%) [ +0.00% +0.17% +0.08% / +0.17% -0.08% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.368 ms / 100) 2.366 -> 2.365 ( -0.04%) [ +0.08% +0.00% +0.08% / -0.04% +0.17% +0.30%] index_select skip256 : Elapsed 0.024 ms (2.368 ms / 100) 2.374 -> 2.378 ( +0.17%) [ +0.08% +0.00% +0.04% / +0.17% +0.29% +0.29%] index_select spread : Elapsed 0.024 ms (2.376 ms / 100) 2.374 -> 2.375 ( +0.04%) [ +0.08% +0.00% +0.08% / +0.04% +0.38% +0.29%] index_select strided 3 : Elapsed 0.024 ms (2.376 ms / 100) 2.374 -> 2.378 ( +0.17%) [ +0.00% +0.00% +0.04% / +0.17% +0.42% +0.42%] index_select strided 5 : Elapsed 0.024 ms (2.374 ms / 100) 2.378 -> 2.378 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.08% +0.00% +0.34%] index_select strided 7 : Elapsed 0.024 ms (2.378 ms / 100) 2.381 -> 2.379 ( -0.08%) [ +0.00% +0.04% +0.00% / -0.08% +0.08% -0.04%] index_select strided 8 : Elapsed 0.024 ms (2.381 ms / 100) 2.376 -> 2.378 ( +0.08%) [ +0.00% +0.13% +0.04% / +0.08% +0.29% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.376 ms / 100) 2.376 -> 2.375 ( -0.04%) [ +0.08% +0.00% +0.13% / -0.04% +0.21% +0.00%] index_select random : Elapsed 0.024 ms (2.378 ms / 100) 2.378 -> 2.375 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.04% -0.13% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.378 ms / 100) 2.375 -> 2.380 ( +0.21%) [ +0.25% +0.00% +0.17% / +0.42% +0.21% +0.34%] index_select perm : Elapsed 0.024 ms (2.381 ms / 100) 2.378 -> 2.378 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.00% +0.13%] index_select perm_sorted : Elapsed 0.024 ms (2.380 ms / 100) B = [20, 4, 16, 5] (stride (80, 1600, 5, 1)) A = [40, 4, 16, 5] (stride (5, 200, 800, 1)) dim = 0 2.397 -> 2.394 ( -0.13%) [ +0.00% +0.13% +0.00% / -0.13% +0.21% +0.17%] index_select const : Elapsed 0.024 ms (2.397 ms / 100) 2.412 -> 2.412 ( +0.00%) [ +0.00% +0.04% +0.17% / +0.00% +0.21% +0.00%] index_select wrap : Elapsed 0.024 ms (2.412 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.00% +0.04% +0.17% / +0.08% +0.12% +0.08%] index_select linear : Elapsed 0.024 ms (2.410 ms / 100) 2.411 -> 2.411 ( +0.00%) [ +0.00% +0.04% +0.08% / +0.12% +0.00% +0.12%] index_select reverse : Elapsed 0.024 ms (2.411 ms / 100) 2.398 -> 2.397 ( -0.04%) [ +0.00% +0.13% +0.00% / +0.04% -0.04% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.398 ms / 100) 2.394 -> 2.398 ( +0.17%) [ +0.04% +0.00% +0.13% / +0.29% +0.29% +0.17%] index_select skip256 : Elapsed 0.024 ms (2.395 ms / 100) 2.421 -> 2.420 ( -0.04%) [ +0.00% +0.08% +0.17% / -0.04% +0.37% +0.17%] index_select spread : Elapsed 0.024 ms (2.421 ms / 100) 2.423 -> 2.423 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.08% +0.08%] index_select strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.415 -> 2.408 ( -0.29%) [ +0.00% +0.04% +0.00% / -0.12% -0.29% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.415 ms / 100) 2.419 -> 2.423 ( +0.17%) [ +0.00% +0.21% +0.04% / +0.21% +0.25% +0.17%] index_select strided 7 : Elapsed 0.024 ms (2.419 ms / 100) 2.401 -> 2.402 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.04% +0.29% +0.21%] index_select strided 8 : Elapsed 0.024 ms (2.402 ms / 100) 2.405 -> 2.405 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.08% +0.00% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.405 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.00% +0.12% +0.12% / +0.08% +0.21% +0.29%] index_select random : Elapsed 0.024 ms (2.412 ms / 100) 2.414 -> 2.413 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.00% +0.08% -0.04%] index_select random_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.421 -> 2.420 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.00% +0.21% -0.04%] index_select perm : Elapsed 0.024 ms (2.421 ms / 100) 2.419 -> 2.414 ( -0.21%) [ +0.12% +0.04% +0.00% / -0.04% -0.12% -0.21%] index_select perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) B = [20, 4, 16, 5] (stride (80, 1600, 1, 16)) A = [40, 4, 16, 5] (stride (1, 200, 800, 40)) dim = 0 2.449 -> 2.451 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.12% +0.12%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.457 -> 2.455 ( -0.08%) [ +0.00% +0.28% +0.08% / +0.16% -0.08% -0.08%] index_select wrap : Elapsed 0.025 ms (2.457 ms / 100) 2.459 -> 2.451 ( -0.33%) [ +0.00% +0.00% +0.00% / +0.04% -0.24% -0.33%] index_select linear : Elapsed 0.025 ms (2.459 ms / 100) 2.453 -> 2.457 ( +0.16%) [ +0.20% +0.00% +0.29% / +0.29% +0.20% +0.16%] index_select reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.450 -> 2.448 ( -0.08%) [ +0.12% +0.12% +0.00% / -0.08% +0.20% +0.16%] index_select skip64 : Elapsed 0.025 ms (2.453 ms / 100) 2.447 -> 2.450 ( +0.12%) [ +0.16% +0.00% +0.12% / +0.12% +0.37% +0.45%] index_select skip256 : Elapsed 0.025 ms (2.451 ms / 100) 2.459 -> 2.464 ( +0.20%) [ +0.12% +0.20% +0.00% / +0.20% +0.33% +0.41%] index_select spread : Elapsed 0.025 ms (2.462 ms / 100) 2.460 -> 2.464 ( +0.16%) [ +0.12% +0.00% +0.08% / +0.16% +0.41% +0.37%] index_select strided 3 : Elapsed 0.025 ms (2.463 ms / 100) 2.459 -> 2.463 ( +0.16%) [ +0.12% +0.00% +0.16% / +0.16% +0.37% +0.49%] index_select strided 5 : Elapsed 0.025 ms (2.462 ms / 100) 2.465 -> 2.463 ( -0.08%) [ +0.04% +0.00% +0.04% / -0.08% +0.04% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.466 ms / 100) 2.462 -> 2.465 ( +0.12%) [ +0.00% +0.08% +0.00% / +0.12% +0.28% +0.24%] index_select strided 8 : Elapsed 0.025 ms (2.462 ms / 100) 2.462 -> 2.460 ( -0.08%) [ +0.16% +0.00% +0.04% / -0.08% +0.32% +0.49%] index_select strided 16 : Elapsed 0.025 ms (2.466 ms / 100) 2.460 -> 2.464 ( +0.16%) [ +0.20% +0.08% +0.00% / +0.16% +0.33% +0.37%] index_select random : Elapsed 0.025 ms (2.465 ms / 100) 2.463 -> 2.464 ( +0.04%) [ +0.00% +0.12% +0.04% / +0.08% +0.20% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.463 ms / 100) 2.463 -> 2.459 ( -0.16%) [ +0.04% +0.12% +0.00% / +0.12% +0.16% -0.16%] index_select perm : Elapsed 0.025 ms (2.464 ms / 100) 2.459 -> 2.463 ( +0.16%) [ +0.00% +0.08% +0.16% / +0.16% +0.16% +0.33%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) B = [20, 4, 16, 5] (stride (5, 1600, 100, 1)) A = [40, 4, 16, 5] (stride (320, 5, 20, 1)) dim = 0 2.392 -> 2.392 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.00% +0.33% +0.13%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.00% +0.00% +0.21% / +0.12% -0.04% -0.08%] index_select wrap : Elapsed 0.024 ms (2.405 ms / 100) 2.402 -> 2.406 ( +0.17%) [ +0.42% +0.17% +0.00% / +0.17% +0.21% +0.21%] index_select linear : Elapsed 0.024 ms (2.412 ms / 100) 2.406 -> 2.403 ( -0.12%) [ +0.04% +0.00% +0.17% / +0.21% -0.12% -0.08%] index_select reverse : Elapsed 0.024 ms (2.407 ms / 100) 2.393 -> 2.394 ( +0.04%) [ +0.38% +0.17% +0.00% / +0.08% +0.04% +0.17%] index_select skip64 : Elapsed 0.024 ms (2.402 ms / 100) 2.393 -> 2.394 ( +0.04%) [ +0.21% +0.13% +0.00% / +0.04% +0.21% +0.38%] index_select skip256 : Elapsed 0.024 ms (2.398 ms / 100) 2.400 -> 2.401 ( +0.04%) [ +0.13% +0.13% +0.00% / +0.04% +0.37% +0.17%] index_select spread : Elapsed 0.024 ms (2.403 ms / 100) 2.409 -> 2.402 ( -0.29%) [ +0.00% +0.00% +0.04% / +0.00% -0.17% -0.29%] index_select strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.398 -> 2.400 ( +0.08%) [ +0.00% +0.38% +0.00% / +0.21% +0.08% +0.21%] index_select strided 5 : Elapsed 0.024 ms (2.398 ms / 100) 2.403 -> 2.403 ( +0.00%) [ +0.25% +0.08% +0.00% / +0.08% +0.00% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.409 ms / 100) 2.396 -> 2.399 ( +0.13%) [ +0.00% +0.04% +0.00% / +0.17% +0.13% +0.13%] index_select strided 8 : Elapsed 0.024 ms (2.396 ms / 100) 2.396 -> 2.397 ( +0.04%) [ +0.13% +0.08% +0.00% / +0.04% +0.21% +0.29%] index_select strided 16 : Elapsed 0.024 ms (2.399 ms / 100) 2.399 -> 2.400 ( +0.04%) [ +0.17% +0.29% +0.00% / +0.04% +0.33% +0.04%] index_select random : Elapsed 0.024 ms (2.403 ms / 100) 2.400 -> 2.402 ( +0.08%) [ +0.00% +0.04% +0.08% / +0.08% +0.08% +0.13%] index_select random_sorted : Elapsed 0.024 ms (2.400 ms / 100) 2.401 -> 2.401 ( +0.00%) [ +0.00% +0.08% +0.17% / +0.04% +0.04% +0.00%] index_select perm : Elapsed 0.024 ms (2.401 ms / 100) 2.403 -> 2.400 ( -0.12%) [ +0.12% +0.17% +0.00% / +0.21% -0.08% -0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) B = [20, 4, 16, 5] (stride (16, 1600, 1, 320)) A = [40, 4, 16, 5] (stride (20, 1, 800, 4)) dim = 0 2.448 -> 2.448 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.16% +0.00% +0.12%] index_select const : Elapsed 0.024 ms (2.449 ms / 100) 2.456 -> 2.455 ( -0.04%) [ +0.16% +0.20% +0.00% / +0.08% +0.04% -0.04%] index_select wrap : Elapsed 0.025 ms (2.460 ms / 100) 2.460 -> 2.455 ( -0.20%) [ +0.00% +0.08% +0.04% / +0.16% -0.08% -0.20%] index_select linear : Elapsed 0.025 ms (2.460 ms / 100) 2.457 -> 2.459 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.12% +0.08% +0.20%] index_select reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.448 -> 2.450 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.16% +0.25%] index_select skip64 : Elapsed 0.025 ms (2.451 ms / 100) 2.447 -> 2.451 ( +0.16%) [ +0.00% +0.25% +0.12% / +0.16% +0.33% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.447 ms / 100) 2.459 -> 2.460 ( +0.04%) [ +0.00% +0.16% +0.08% / +0.04% +0.16% +0.41%] index_select spread : Elapsed 0.025 ms (2.459 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.16% +0.33% +0.00% / +0.08% +0.33% +0.57%] index_select strided 3 : Elapsed 0.025 ms (2.457 ms / 100) 2.447 -> 2.450 ( +0.12%) [ +0.00% +0.20% +0.16% / +0.12% +0.33% +0.37%] index_select strided 5 : Elapsed 0.024 ms (2.447 ms / 100) 2.460 -> 2.462 ( +0.08%) [ +0.08% +0.20% +0.00% / +0.08% +0.16% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.462 ms / 100) 2.449 -> 2.452 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.16% +0.37% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.449 ms / 100) 2.450 -> 2.452 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.20% +0.12%] index_select strided 16 : Elapsed 0.025 ms (2.452 ms / 100) 2.455 -> 2.460 ( +0.20%) [ +0.08% +0.04% +0.00% / +0.20% +0.24% +0.24%] index_select random : Elapsed 0.025 ms (2.457 ms / 100) 2.456 -> 2.458 ( +0.08%) [ +0.00% +0.08% +0.24% / +0.08% +0.16% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.456 ms / 100) 2.457 -> 2.460 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.16% +0.12% +0.12%] index_select perm : Elapsed 0.025 ms (2.460 ms / 100) 2.456 -> 2.458 ( +0.08%) [ +0.12% +0.00% +0.16% / +0.08% +0.37% +0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) B = [20, 4, 16, 5] (stride (64, 16, 1, 1280)) A = [40, 4, 16, 5] (stride (320, 1, 4, 64)) dim = 0 2.396 -> 2.399 ( +0.13%) [ +0.00% +0.04% +0.21% / +0.13% +0.17% +0.33%] index_select const : Elapsed 0.024 ms (2.396 ms / 100) 2.408 -> 2.407 ( -0.04%) [ +0.04% +0.00% +0.00% / +0.12% -0.04% +0.00%] index_select wrap : Elapsed 0.024 ms (2.409 ms / 100) 2.407 -> 2.405 ( -0.08%) [ +0.04% +0.08% +0.00% / -0.08% -0.08% +0.04%] index_select linear : Elapsed 0.024 ms (2.408 ms / 100) 2.407 -> 2.406 ( -0.04%) [ +0.17% +0.08% +0.00% / +0.04% -0.04% -0.04%] index_select reverse : Elapsed 0.024 ms (2.411 ms / 100) 2.399 -> 2.400 ( +0.04%) [ +0.04% +0.25% +0.00% / +0.08% +0.04% +0.17%] index_select skip64 : Elapsed 0.024 ms (2.400 ms / 100) 2.396 -> 2.394 ( -0.08%) [ +0.08% +0.00% +0.21% / -0.08% +0.29% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.398 ms / 100) 2.405 -> 2.404 ( -0.04%) [ +0.12% +0.00% +0.12% / -0.04% +0.25% +0.12%] index_select spread : Elapsed 0.024 ms (2.408 ms / 100) 2.410 -> 2.407 ( -0.12%) [ +0.21% +0.00% +0.00% / -0.12% +0.00% -0.12%] index_select strided 3 : Elapsed 0.024 ms (2.415 ms / 100) 2.401 -> 2.399 ( -0.08%) [ +0.21% +0.04% +0.00% / -0.04% -0.08% +0.12%] index_select strided 5 : Elapsed 0.024 ms (2.406 ms / 100) 2.404 -> 2.403 ( -0.04%) [ +0.04% +0.08% +0.00% / -0.04% +0.12% +0.08%] index_select strided 7 : Elapsed 0.024 ms (2.405 ms / 100) 2.398 -> 2.399 ( +0.04%) [ +0.00% +0.08% +0.13% / +0.04% +0.29% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.398 ms / 100) 2.399 -> 2.401 ( +0.08%) [ +0.00% +0.08% +0.04% / +0.17% +0.08% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.399 ms / 100) 2.402 -> 2.406 ( +0.17%) [ +0.04% +0.00% +0.00% / +0.17% +0.25% +0.33%] index_select random : Elapsed 0.024 ms (2.403 ms / 100) 2.404 -> 2.405 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.08% +0.04% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.404 ms / 100) 2.409 -> 2.402 ( -0.29%) [ +0.00% +0.12% +0.17% / -0.04% -0.12% -0.29%] index_select perm : Elapsed 0.024 ms (2.409 ms / 100) 2.408 -> 2.400 ( -0.33%) [ +0.12% +0.08% +0.00% / +0.12% -0.33% -0.29%] index_select perm_sorted : Elapsed 0.024 ms (2.411 ms / 100) B = [20, 4, 16, 5] (stride (1, 20, 80, 1280)) A = [40, 4, 16, 5] (stride (4, 1, 800, 160)) dim = 0 2.409 -> 2.412 ( +0.12%) [ +0.17% +0.17% +0.00% / +0.12% +0.17% +0.12%] index_select const : Elapsed 0.024 ms (2.413 ms / 100) 2.423 -> 2.416 ( -0.29%) [ +0.04% +0.04% +0.00% / +0.08% -0.21% -0.29%] index_select wrap : Elapsed 0.024 ms (2.424 ms / 100) 2.419 -> 2.415 ( -0.17%) [ +0.21% +0.17% +0.00% / +0.29% -0.17% +0.00%] index_select linear : Elapsed 0.024 ms (2.424 ms / 100) 2.419 -> 2.417 ( -0.08%) [ +0.08% +0.08% +0.00% / +0.21% +0.08% -0.08%] index_select reverse : Elapsed 0.024 ms (2.421 ms / 100) 2.413 -> 2.411 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.04% -0.08% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.413 ms / 100) 2.408 -> 2.410 ( +0.08%) [ +0.04% +0.17% +0.00% / +0.08% +0.17% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.409 ms / 100) 2.424 -> 2.429 ( +0.21%) [ +0.17% +0.08% +0.00% / +0.21% +0.50% +0.25%] index_select spread : Elapsed 0.024 ms (2.428 ms / 100) 2.422 -> 2.424 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.08% +0.37% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.416 -> 2.418 ( +0.08%) [ +0.04% +0.00% +0.12% / +0.08% +0.29% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.417 ms / 100) 2.426 -> 2.427 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.16% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.426 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.00% +0.08% +0.17% / +0.08% +0.21% +0.17%] index_select strided 8 : Elapsed 0.024 ms (2.414 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.00% +0.04% +0.21% / +0.04% +0.33% +0.08%] index_select strided 16 : Elapsed 0.024 ms (2.413 ms / 100) 2.418 -> 2.423 ( +0.21%) [ +0.00% +0.08% +0.08% / +0.21% +0.25% +0.29%] index_select random : Elapsed 0.024 ms (2.418 ms / 100) 2.424 -> 2.423 ( -0.04%) [ +0.08% +0.08% +0.00% / +0.04% -0.04% +0.04%] index_select random_sorted : Elapsed 0.024 ms (2.426 ms / 100) 2.425 -> 2.425 ( +0.00%) [ +0.04% +0.25% +0.00% / +0.08% +0.00% +0.08%] index_select perm : Elapsed 0.024 ms (2.426 ms / 100) 2.423 -> 2.424 ( +0.04%) [ +0.12% +0.12% +0.00% / +0.21% +0.25% +0.04%] index_select perm_sorted : Elapsed 0.024 ms (2.426 ms / 100) out_shape = [40, 20, 16, 5] in_shape = [40, 4, 16, 5] idx_dim = 1 B = [40, 20, 16, 5] (stride (1600, 16, 1, 320)) A = [40, 4, 16, 5] (stride (1, 40, 800, 160)) dim = 1 2.133 -> 2.135 ( +0.09%) [ +0.14% +0.23% +0.00% / +0.09% +0.42% +0.52%] index_add_ linear : Elapsed 0.021 ms (2.136 ms / 100) 2.072 -> 2.074 ( +0.10%) [ +0.00% +0.10% +0.14% / +0.10% +0.58% +0.48%] index_copy_ linear : Elapsed 0.021 ms (2.072 ms / 100) 2.133 -> 2.136 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.28% +0.33%] index_add_ reverse : Elapsed 0.021 ms (2.136 ms / 100) 2.072 -> 2.074 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.48% +0.53%] index_copy_ reverse : Elapsed 0.021 ms (2.074 ms / 100) 2.127 -> 2.129 ( +0.09%) [ +0.14% +0.24% +0.00% / +0.09% +0.47% +0.42%] index_add_ spread : Elapsed 0.021 ms (2.130 ms / 100) 2.064 -> 2.066 ( +0.10%) [ +0.00% +0.24% +0.10% / +0.10% +0.44% +0.44%] index_copy_ spread : Elapsed 0.021 ms (2.064 ms / 100) 2.127 -> 2.124 ( -0.14%) [ +0.09% +0.28% +0.00% / -0.14% +0.33% +0.28%] index_add_ strided 3 : Elapsed 0.021 ms (2.129 ms / 100) 2.063 -> 2.065 ( +0.10%) [ +0.00% +0.19% +0.10% / +0.10% +0.44% +0.24%] index_copy_ strided 3 : Elapsed 0.021 ms (2.063 ms / 100) 2.133 -> 2.131 ( -0.09%) [ +0.09% +0.00% +0.14% / -0.09% +0.28% +0.23%] index_add_ strided 7 : Elapsed 0.021 ms (2.135 ms / 100) 2.070 -> 2.071 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.05% +0.29% +0.29%] index_copy_ strided 7 : Elapsed 0.021 ms (2.071 ms / 100) 2.131 -> 2.132 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.38% +0.19%] index_add_ perm : Elapsed 0.021 ms (2.131 ms / 100) 2.067 -> 2.065 ( -0.10%) [ +0.00% +0.00% +0.05% / -0.10% +0.48% +0.44%] index_copy_ perm : Elapsed 0.021 ms (2.067 ms / 100) 2.132 -> 2.140 ( +0.38%) [ +0.38% +0.00% +0.23% / +0.38% +0.38% +0.42%] index_add_ perm_sorted : Elapsed 0.021 ms (2.140 ms / 100) 2.069 -> 2.077 ( +0.39%) [ +0.10% +0.00% +0.29% / +0.39% +0.43% +0.53%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.071 ms / 100) 8.782 -> 8.765 ( -0.19%) [ +0.00% +0.03% +0.00% / +0.20% -0.19% -0.09%] index_select const : Elapsed 0.088 ms (8.782 ms / 100) 8.803 -> 8.797 ( -0.07%) [ +0.00% +0.12% +0.26% / -0.07% +0.09% +0.03%] index_select wrap : Elapsed 0.088 ms (8.803 ms / 100) 8.790 -> 8.796 ( +0.07%) [ +0.10% +0.08% +0.00% / +0.07% +0.23% +0.39%] index_select linear : Elapsed 0.088 ms (8.799 ms / 100) 8.801 -> 8.804 ( +0.03%) [ +0.17% +0.11% +0.00% / +0.27% +0.03% +0.09%] index_select reverse : Elapsed 0.088 ms (8.816 ms / 100) 8.771 -> 8.761 ( -0.11%) [ +0.05% +0.09% +0.00% / +0.07% +0.03% -0.11%] index_select skip64 : Elapsed 0.088 ms (8.775 ms / 100) 8.773 -> 8.779 ( +0.07%) [ +0.00% +0.27% +0.00% / +0.21% +0.08% +0.07%] index_select skip256 : Elapsed 0.088 ms (8.773 ms / 100) 8.822 -> 8.811 ( -0.12%) [ +0.20% +0.07% +0.00% / +0.17% +0.09% -0.12%] index_select spread : Elapsed 0.088 ms (8.840 ms / 100) 8.813 -> 8.792 ( -0.24%) [ +0.05% +0.06% +0.00% / -0.05% -0.24% -0.08%] index_select strided 3 : Elapsed 0.088 ms (8.817 ms / 100) 8.806 -> 8.803 ( -0.03%) [ +0.00% +0.01% +0.22% / +0.10% -0.01% -0.03%] index_select random : Elapsed 0.088 ms (8.806 ms / 100) 8.808 -> 8.817 ( +0.10%) [ +0.00% +0.22% +0.24% / +0.11% +0.28% +0.10%] index_select random_sorted : Elapsed 0.088 ms (8.808 ms / 100) B = [40, 20, 16, 5] (stride (80, 3200, 1, 16)) A = [40, 4, 16, 5] (stride (320, 80, 1, 16)) dim = 1 2.078 -> 2.079 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.63% +0.77%] index_add_ linear : Elapsed 0.021 ms (2.078 ms / 100) 2.026 -> 2.027 ( +0.05%) [ +0.30% +0.10% +0.00% / +0.05% +1.23% +1.14%] index_copy_ linear : Elapsed 0.020 ms (2.032 ms / 100) 2.074 -> 2.074 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.77% +0.82%] index_add_ reverse : Elapsed 0.021 ms (2.075 ms / 100) 2.024 -> 2.028 ( +0.20%) [ +0.10% +0.20% +0.00% / +0.20% +1.14% +0.99%] index_copy_ reverse : Elapsed 0.020 ms (2.026 ms / 100) 2.069 -> 2.070 ( +0.05%) [ +0.24% +0.14% +0.00% / +0.05% +0.92% +0.92%] index_add_ spread : Elapsed 0.021 ms (2.074 ms / 100) 2.021 -> 2.024 ( +0.15%) [ +0.20% +0.00% +0.10% / +0.15% +1.09% +0.94%] index_copy_ spread : Elapsed 0.020 ms (2.025 ms / 100) 2.083 -> 2.087 ( +0.19%) [ +0.10% +0.29% +0.00% / +0.19% +0.77% +0.62%] index_add_ strided 3 : Elapsed 0.021 ms (2.085 ms / 100) 2.033 -> 2.036 ( +0.15%) [ +0.00% +0.39% +0.00% / +0.15% +0.93% +0.98%] index_copy_ strided 3 : Elapsed 0.020 ms (2.033 ms / 100) 2.088 -> 2.091 ( +0.14%) [ +0.00% +0.00% +0.05% / +0.14% +0.67% +0.86%] index_add_ strided 7 : Elapsed 0.021 ms (2.088 ms / 100) 2.041 -> 2.038 ( -0.15%) [ +0.00% +0.15% +0.39% / -0.15% +0.83% +0.78%] index_copy_ strided 7 : Elapsed 0.020 ms (2.041 ms / 100) 2.069 -> 2.069 ( +0.00%) [ +0.00% +0.10% +0.14% / +0.00% +0.29% +0.43%] index_add_ perm : Elapsed 0.021 ms (2.069 ms / 100) 2.023 -> 2.026 ( +0.15%) [ +0.15% +0.05% +0.00% / +0.15% +0.64% +0.30%] index_copy_ perm : Elapsed 0.020 ms (2.026 ms / 100) 2.079 -> 2.083 ( +0.19%) [ +0.00% +0.05% +0.14% / +0.19% +0.29% +0.34%] index_add_ perm_sorted : Elapsed 0.021 ms (2.079 ms / 100) 2.037 -> 2.038 ( +0.05%) [ +0.00% +0.15% +0.00% / +0.05% +0.15% +0.15%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.037 ms / 100) 9.132 -> 9.134 ( +0.02%) [ +0.00% +0.03% +0.03% / +0.02% +0.18% +0.03%] index_select const : Elapsed 0.091 ms (9.132 ms / 100) 9.191 -> 9.185 ( -0.07%) [ +0.22% +0.20% +0.00% / +0.38% -0.07% +0.16%] index_select wrap : Elapsed 0.092 ms (9.211 ms / 100) 9.170 -> 9.155 ( -0.16%) [ +0.07% +0.02% +0.00% / +0.12% -0.16% +0.11%] index_select linear : Elapsed 0.092 ms (9.176 ms / 100) 9.152 -> 9.161 ( +0.10%) [ +0.00% +0.09% +0.11% / +0.22% +0.10% +0.30%] index_select reverse : Elapsed 0.092 ms (9.152 ms / 100) 9.138 -> 9.125 ( -0.14%) [ +0.07% +0.02% +0.00% / -0.14% +0.07% +0.05%] index_select skip64 : Elapsed 0.091 ms (9.144 ms / 100) 9.127 -> 9.127 ( +0.00%) [ +0.02% +0.00% +0.10% / +0.00% +0.10% +0.19%] index_select skip256 : Elapsed 0.091 ms (9.129 ms / 100) 9.201 -> 9.168 ( -0.36%) [ +0.00% +0.00% +0.16% / -0.26% -0.36% -0.34%] index_select spread : Elapsed 0.092 ms (9.201 ms / 100) 9.201 -> 9.189 ( -0.13%) [ +0.00% +0.18% +0.05% / +0.09% +0.32% -0.13%] index_select strided 3 : Elapsed 0.092 ms (9.201 ms / 100) 9.215 -> 9.197 ( -0.20%) [ +0.09% +0.00% +0.07% / +0.01% -0.20% -0.17%] index_select random : Elapsed 0.092 ms (9.223 ms / 100) 9.188 -> 9.182 ( -0.07%) [ +0.02% +0.15% +0.00% / +0.07% -0.07% +0.03%] index_select random_sorted : Elapsed 0.092 ms (9.190 ms / 100) B = [40, 20, 16, 5] (stride (80, 3200, 1, 16)) A = [40, 4, 16, 5] (stride (320, 1, 20, 4)) dim = 1 1.976 -> 1.979 ( +0.15%) [ +0.10% +0.00% +0.00% / +0.15% +0.61% +0.46%] index_add_ linear : Elapsed 0.020 ms (1.978 ms / 100) 1.921 -> 1.927 ( +0.31%) [ +0.21% +0.21% +0.00% / +0.31% +0.73% +0.57%] index_copy_ linear : Elapsed 0.019 ms (1.925 ms / 100) 1.953 -> 1.954 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.05% +0.97% +1.02%] index_add_ reverse : Elapsed 0.020 ms (1.955 ms / 100) 1.903 -> 1.903 ( +0.00%) [ +0.11% +0.00% +0.05% / +0.00% +1.00% +0.84%] index_copy_ reverse : Elapsed 0.019 ms (1.905 ms / 100) 1.958 -> 1.959 ( +0.05%) [ +0.00% +0.00% +0.20% / +0.05% +0.51% +0.61%] index_add_ spread : Elapsed 0.020 ms (1.958 ms / 100) 1.905 -> 1.908 ( +0.16%) [ +0.00% +0.21% +0.37% / +0.16% +0.89% +0.63%] index_copy_ spread : Elapsed 0.019 ms (1.905 ms / 100) 1.979 -> 1.980 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.45% +0.51%] index_add_ strided 3 : Elapsed 0.020 ms (1.980 ms / 100) 1.922 -> 1.921 ( -0.05%) [ +0.00% +0.21% +0.26% / -0.05% +0.78% +0.62%] index_copy_ strided 3 : Elapsed 0.019 ms (1.922 ms / 100) 1.964 -> 1.965 ( +0.05%) [ +0.00% +0.15% +0.10% / +0.05% +1.02% +1.07%] index_add_ strided 7 : Elapsed 0.020 ms (1.964 ms / 100) 1.910 -> 1.914 ( +0.21%) [ +0.00% +0.21% +0.21% / +0.21% +0.94% +1.20%] index_copy_ strided 7 : Elapsed 0.019 ms (1.910 ms / 100) 1.970 -> 1.974 ( +0.20%) [ +0.15% +0.10% +0.00% / +0.20% +0.41% +0.51%] index_add_ perm : Elapsed 0.020 ms (1.973 ms / 100) 1.920 -> 1.921 ( +0.05%) [ +0.00% +0.36% +0.05% / +0.05% +0.31% +0.52%] index_copy_ perm : Elapsed 0.019 ms (1.920 ms / 100) 1.964 -> 1.967 ( +0.15%) [ +0.05% +0.20% +0.00% / +0.15% +0.61% +0.61%] index_add_ perm_sorted : Elapsed 0.020 ms (1.965 ms / 100) 1.912 -> 1.914 ( +0.10%) [ +0.16% +0.05% +0.00% / +0.10% +0.84% +0.84%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.915 ms / 100) 8.748 -> 8.755 ( +0.08%) [ +0.27% +0.08% +0.00% / +0.10% +0.08% +0.23%] index_select const : Elapsed 0.088 ms (8.772 ms / 100) 8.746 -> 8.746 ( +0.00%) [ +0.01% +0.07% +0.00% / +0.00% +0.43% +0.14%] index_select wrap : Elapsed 0.087 ms (8.747 ms / 100) 8.741 -> 8.740 ( -0.01%) [ +0.00% +0.32% +0.19% / +0.19% -0.01% +0.19%] index_select linear : Elapsed 0.087 ms (8.741 ms / 100) 8.743 -> 8.739 ( -0.05%) [ +0.09% +0.00% +0.06% / -0.05% +0.24% +0.10%] index_select reverse : Elapsed 0.088 ms (8.751 ms / 100) 8.744 -> 8.745 ( +0.01%) [ +0.15% +0.00% +0.03% / +0.03% +0.01% +0.14%] index_select skip64 : Elapsed 0.088 ms (8.757 ms / 100) 8.747 -> 8.739 ( -0.09%) [ +0.07% +0.00% +0.05% / +0.22% -0.09% -0.01%] index_select skip256 : Elapsed 0.088 ms (8.753 ms / 100) 8.758 -> 8.739 ( -0.22%) [ +0.05% +0.02% +0.00% / -0.02% +0.01% -0.22%] index_select spread : Elapsed 0.088 ms (8.762 ms / 100) 8.739 -> 8.747 ( +0.09%) [ +0.00% +0.23% +0.09% / +0.24% +0.09% +0.22%] index_select strided 3 : Elapsed 0.087 ms (8.739 ms / 100) 8.728 -> 8.732 ( +0.05%) [ +0.00% +0.37% +0.42% / +0.18% +0.05% +0.11%] index_select random : Elapsed 0.087 ms (8.728 ms / 100) 8.734 -> 8.753 ( +0.22%) [ +0.37% +0.00% +0.21% / +0.22% +0.22% +0.23%] index_select random_sorted : Elapsed 0.088 ms (8.766 ms / 100) B = [40, 20, 16, 5] (stride (1, 3200, 200, 40)) dim = 1 fill_cnt = 4 0.554 -> 0.554 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.54%] index_fill_ const : Elapsed 0.006 ms (0.554 ms / 100) 0.554 -> 0.554 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +1.08% +0.72%] index_fill_ linear : Elapsed 0.006 ms (0.554 ms / 100) 0.553 -> 0.555 ( +0.36%) [ +0.18% +0.18% +0.00% / +0.36% +0.90% +1.27%] index_fill_ reverse : Elapsed 0.006 ms (0.554 ms / 100) 0.554 -> 0.555 ( +0.18%) [ +0.00% +0.00% +0.00% / +0.18% +1.26% +0.90%] index_fill_ skip64 : Elapsed 0.006 ms (0.554 ms / 100) 0.553 -> 0.554 ( +0.18%) [ +0.18% +0.36% +0.00% / +0.18% +1.27% +1.27%] index_fill_ skip256 : Elapsed 0.006 ms (0.554 ms / 100) 0.555 -> 0.556 ( +0.18%) [ +0.18% +0.00% +0.00% / +0.18% +0.72% +0.54%] index_fill_ spread : Elapsed 0.006 ms (0.556 ms / 100) 0.556 -> 0.560 ( +0.72%) [ +1.08% +0.36% +0.00% / +0.72% +0.72% +0.72%] index_fill_ strided 3 : Elapsed 0.006 ms (0.562 ms / 100) 0.552 -> 0.553 ( +0.18%) [ +0.18% +0.00% +0.18% / +0.18% +1.09% +1.45%] index_fill_ strided 5 : Elapsed 0.006 ms (0.553 ms / 100) 0.560 -> 0.560 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.36% +0.54%] index_fill_ strided 7 : Elapsed 0.006 ms (0.560 ms / 100) 0.559 -> 0.560 ( +0.18%) [ +0.18% +0.18% +0.00% / +0.18% +1.25% +0.89%] index_fill_ strided 8 : Elapsed 0.006 ms (0.560 ms / 100) 0.556 -> 0.557 ( +0.18%) [ +0.00% +0.18% +0.18% / +0.18% +1.44% +1.26%] index_fill_ strided 16 : Elapsed 0.006 ms (0.556 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +0.18% +0.00% +0.00% / +0.36% +1.26% +1.26%] index_fill_ random : Elapsed 0.006 ms (0.555 ms / 100) 0.553 -> 0.555 ( +0.36%) [ +0.00% +0.36% +0.36% / +0.36% +1.45% +1.27%] index_fill_ random_sorted : Elapsed 0.006 ms (0.553 ms / 100) 0.555 -> 0.554 ( -0.18%) [ +0.36% +0.54% +0.00% / -0.18% +1.44% +1.62%] index_fill_ perm : Elapsed 0.006 ms (0.557 ms / 100) 0.554 -> 0.555 ( +0.18%) [ +0.00% +0.18% +0.00% / +0.18% +0.54% +0.36%] index_fill_ perm_sorted : Elapsed 0.006 ms (0.554 ms / 100) B = [40, 20, 16, 5] (stride (16, 3200, 1, 640)) A = [40, 4, 16, 5] (stride (320, 80, 1, 16)) dim = 1 2.083 -> 2.084 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.62% +0.58%] index_add_ linear : Elapsed 0.021 ms (2.083 ms / 100) 2.028 -> 2.029 ( +0.05%) [ +0.10% +0.25% +0.00% / +0.05% +1.18% +1.23%] index_copy_ linear : Elapsed 0.020 ms (2.030 ms / 100) 2.073 -> 2.075 ( +0.10%) [ +0.29% +0.24% +0.00% / +0.10% +1.16% +0.96%] index_add_ reverse : Elapsed 0.021 ms (2.079 ms / 100) 2.026 -> 2.033 ( +0.35%) [ +0.10% +0.25% +0.00% / +0.35% +1.23% +1.09%] index_copy_ reverse : Elapsed 0.020 ms (2.028 ms / 100) 2.069 -> 2.073 ( +0.19%) [ +0.00% +0.29% +0.14% / +0.19% +1.06% +1.01%] index_add_ spread : Elapsed 0.021 ms (2.069 ms / 100) 2.023 -> 2.028 ( +0.25%) [ +0.20% +0.00% +0.10% / +0.25% +1.38% +1.19%] index_copy_ spread : Elapsed 0.020 ms (2.027 ms / 100) 2.086 -> 2.085 ( -0.05%) [ +0.24% +0.00% +0.05% / -0.05% +0.91% +0.67%] index_add_ strided 3 : Elapsed 0.021 ms (2.091 ms / 100) 2.036 -> 2.041 ( +0.25%) [ +0.20% +0.00% +0.15% / +0.25% +1.03% +1.18%] index_copy_ strided 3 : Elapsed 0.020 ms (2.040 ms / 100) 2.090 -> 2.089 ( -0.05%) [ +0.00% +0.24% +0.14% / -0.05% +0.86% +0.72%] index_add_ strided 7 : Elapsed 0.021 ms (2.090 ms / 100) 2.042 -> 2.043 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.83% +1.08%] index_copy_ strided 7 : Elapsed 0.020 ms (2.042 ms / 100) 2.071 -> 2.070 ( -0.05%) [ +0.00% +0.10% +0.14% / -0.05% +0.34% +0.53%] index_add_ perm : Elapsed 0.021 ms (2.071 ms / 100) 2.024 -> 2.027 ( +0.15%) [ +0.00% +0.10% +0.44% / +0.15% +0.59% +0.44%] index_copy_ perm : Elapsed 0.020 ms (2.024 ms / 100) 2.076 -> 2.080 ( +0.19%) [ +0.00% +0.34% +0.14% / +0.19% +0.63% +0.43%] index_add_ perm_sorted : Elapsed 0.021 ms (2.076 ms / 100) 2.036 -> 2.038 ( +0.10%) [ +0.05% +0.00% +0.05% / +0.10% +0.39% +0.34%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.037 ms / 100) 9.141 -> 9.155 ( +0.15%) [ +0.00% +0.09% +0.01% / +0.22% +0.15% +0.27%] index_select const : Elapsed 0.091 ms (9.141 ms / 100) 9.219 -> 9.208 ( -0.12%) [ +0.08% +0.14% +0.00% / -0.05% -0.12% +0.07%] index_select wrap : Elapsed 0.092 ms (9.226 ms / 100) 9.169 -> 9.180 ( +0.12%) [ +0.00% +0.03% +0.04% / +0.31% +0.12% +0.25%] index_select linear : Elapsed 0.092 ms (9.169 ms / 100) 9.164 -> 9.173 ( +0.10%) [ +0.22% +0.00% +0.00% / +0.10% +0.10% +0.22%] index_select reverse : Elapsed 0.092 ms (9.184 ms / 100) 9.135 -> 9.135 ( +0.00%) [ +0.33% +0.00% +0.11% / +0.00% +0.14% +0.28%] index_select skip64 : Elapsed 0.092 ms (9.165 ms / 100) 9.140 -> 9.148 ( +0.09%) [ +0.00% +0.04% +0.19% / +0.10% +0.09% +0.22%] index_select skip256 : Elapsed 0.091 ms (9.140 ms / 100) 9.194 -> 9.194 ( +0.00%) [ +0.07% +0.18% +0.00% / +0.00% +0.16% +0.00%] index_select spread : Elapsed 0.092 ms (9.200 ms / 100) 9.207 -> 9.214 ( +0.08%) [ +0.05% +0.16% +0.00% / +0.10% +0.29% +0.08%] index_select strided 3 : Elapsed 0.092 ms (9.212 ms / 100) 9.201 -> 9.197 ( -0.04%) [ +0.00% +0.17% +0.24% / -0.04% +0.08% +0.07%] index_select random : Elapsed 0.092 ms (9.201 ms / 100) 9.178 -> 9.191 ( +0.14%) [ +0.00% +0.20% +0.15% / +0.22% +0.31% +0.14%] index_select random_sorted : Elapsed 0.092 ms (9.178 ms / 100) B = [40, 20, 16, 5] (stride (1, 200, 4000, 40)) A = [40, 4, 16, 5] (stride (320, 80, 1, 16)) dim = 1 2.069 -> 2.069 ( +0.00%) [ +0.00% +0.34% +0.10% / +0.00% +0.72% +0.48%] index_add_ linear : Elapsed 0.021 ms (2.069 ms / 100) 2.033 -> 2.037 ( +0.20%) [ +0.00% +0.39% +0.39% / +0.20% +0.69% +0.84%] index_copy_ linear : Elapsed 0.020 ms (2.033 ms / 100) 2.052 -> 2.052 ( +0.00%) [ +0.00% +0.19% +0.19% / +0.00% +1.61% +1.27%] index_add_ reverse : Elapsed 0.021 ms (2.052 ms / 100) 2.019 -> 2.021 ( +0.10%) [ +0.10% +0.54% +0.00% / +0.10% +1.78% +1.39%] index_copy_ reverse : Elapsed 0.020 ms (2.021 ms / 100) 2.046 -> 2.046 ( +0.00%) [ +0.15% +0.00% +0.10% / +0.00% +1.22% +1.08%] index_add_ spread : Elapsed 0.020 ms (2.049 ms / 100) 2.020 -> 2.023 ( +0.15%) [ +0.15% +0.20% +0.00% / +0.15% +1.04% +1.39%] index_copy_ spread : Elapsed 0.020 ms (2.023 ms / 100) 2.067 -> 2.075 ( +0.39%) [ +0.63% +0.10% +0.00% / +0.39% +0.92% +0.92%] index_add_ strided 3 : Elapsed 0.021 ms (2.080 ms / 100) 2.032 -> 2.041 ( +0.44%) [ +0.34% +0.05% +0.00% / +0.44% +0.89% +1.03%] index_copy_ strided 3 : Elapsed 0.020 ms (2.039 ms / 100) 2.059 -> 2.059 ( +0.00%) [ +0.15% +0.39% +0.00% / +0.00% +1.46% +1.46%] index_add_ strided 7 : Elapsed 0.021 ms (2.062 ms / 100) 2.019 -> 2.022 ( +0.15%) [ +0.45% +0.59% +0.00% / +0.15% +2.03% +1.88%] index_copy_ strided 7 : Elapsed 0.020 ms (2.028 ms / 100) 2.072 -> 2.079 ( +0.34%) [ +0.00% +0.34% +0.43% / +0.34% +0.92% +0.97%] index_add_ perm : Elapsed 0.021 ms (2.072 ms / 100) 2.039 -> 2.038 ( -0.05%) [ +0.00% +0.20% +0.10% / -0.05% +0.74% +0.83%] index_copy_ perm : Elapsed 0.020 ms (2.039 ms / 100) 2.064 -> 2.061 ( -0.15%) [ +0.00% +0.10% +0.00% / -0.15% +1.26% +1.11%] index_add_ perm_sorted : Elapsed 0.021 ms (2.064 ms / 100) 2.022 -> 2.022 ( +0.00%) [ +0.00% +0.45% +0.05% / +0.00% +1.68% +1.63%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.022 ms / 100) 9.197 -> 9.187 ( -0.11%) [ +0.00% +0.00% +0.09% / -0.11% -0.05% +0.15%] index_select const : Elapsed 0.092 ms (9.197 ms / 100) 9.238 -> 9.260 ( +0.24%) [ +0.00% +0.14% +0.17% / +0.24% +0.41% +0.53%] index_select wrap : Elapsed 0.092 ms (9.238 ms / 100) 9.217 -> 9.225 ( +0.09%) [ +0.10% +0.00% +0.09% / +0.09% +0.17% +0.26%] index_select linear : Elapsed 0.092 ms (9.226 ms / 100) 9.213 -> 9.212 ( -0.01%) [ +0.00% +0.02% +0.18% / -0.01% +0.05% +0.13%] index_select reverse : Elapsed 0.092 ms (9.213 ms / 100) 9.192 -> 9.197 ( +0.05%) [ +0.00% +0.13% +0.04% / +0.05% +0.14% +0.07%] index_select skip64 : Elapsed 0.092 ms (9.192 ms / 100) 9.181 -> 9.189 ( +0.09%) [ +0.12% +0.00% +0.27% / +0.16% +0.33% +0.09%] index_select skip256 : Elapsed 0.092 ms (9.192 ms / 100) 9.224 -> 9.232 ( +0.09%) [ +0.18% +0.23% +0.00% / +0.09% +0.14% +0.15%] index_select spread : Elapsed 0.092 ms (9.241 ms / 100) 9.251 -> 9.252 ( +0.01%) [ +0.02% +0.10% +0.00% / +0.01% +0.26% +0.11%] index_select strided 3 : Elapsed 0.093 ms (9.253 ms / 100) 9.229 -> 9.229 ( +0.00%) [ +0.07% +0.20% +0.00% / +0.00% +0.13% +0.14%] index_select random : Elapsed 0.092 ms (9.235 ms / 100) 9.209 -> 9.214 ( +0.05%) [ +0.00% +0.37% +0.12% / +0.05% +0.17% +0.26%] index_select random_sorted : Elapsed 0.092 ms (9.209 ms / 100) B = [40, 20, 16, 5] (stride (1, 200, 4000, 40)) A = [40, 4, 16, 5] (stride (16, 640, 1, 2560)) dim = 1 2.117 -> 2.121 ( +0.19%) [ +0.19% +0.00% +0.24% / +0.19% +0.33% +0.57%] index_add_ linear : Elapsed 0.021 ms (2.121 ms / 100) 2.085 -> 2.089 ( +0.19%) [ +0.00% +0.14% +0.00% / +0.19% +0.43% +0.48%] index_copy_ linear : Elapsed 0.021 ms (2.085 ms / 100) 2.114 -> 2.115 ( +0.05%) [ +0.24% +0.24% +0.00% / +0.05% +0.52% +0.38%] index_add_ reverse : Elapsed 0.021 ms (2.119 ms / 100) 2.083 -> 2.083 ( +0.00%) [ +0.10% +0.19% +0.00% / +0.00% +0.38% +0.43%] index_copy_ reverse : Elapsed 0.021 ms (2.085 ms / 100) 2.113 -> 2.108 ( -0.24%) [ +0.00% +0.00% +0.05% / -0.24% +0.52% +0.24%] index_add_ spread : Elapsed 0.021 ms (2.113 ms / 100) 2.075 -> 2.080 ( +0.24%) [ +0.00% +0.48% +0.34% / +0.24% +0.82% +0.67%] index_copy_ spread : Elapsed 0.021 ms (2.075 ms / 100) 2.124 -> 2.131 ( +0.33%) [ +0.14% +0.14% +0.00% / +0.33% +0.66% +0.66%] index_add_ strided 3 : Elapsed 0.021 ms (2.127 ms / 100) 2.087 -> 2.086 ( -0.05%) [ +0.10% +0.00% +0.10% / -0.05% +0.67% +0.43%] index_copy_ strided 3 : Elapsed 0.021 ms (2.089 ms / 100) 2.126 -> 2.126 ( +0.00%) [ +0.09% +0.00% +0.05% / +0.00% +0.19% +0.28%] index_add_ strided 7 : Elapsed 0.021 ms (2.128 ms / 100) 2.088 -> 2.090 ( +0.10%) [ +0.10% +0.14% +0.00% / +0.10% +0.29% +0.34%] index_copy_ strided 7 : Elapsed 0.021 ms (2.090 ms / 100) 2.117 -> 2.118 ( +0.05%) [ +0.38% +0.00% +0.00% / +0.05% +0.66% +1.13%] index_add_ perm : Elapsed 0.021 ms (2.125 ms / 100) 2.076 -> 2.081 ( +0.24%) [ +0.19% +0.39% +0.00% / +0.24% +1.11% +1.06%] index_copy_ perm : Elapsed 0.021 ms (2.080 ms / 100) 2.121 -> 2.124 ( +0.14%) [ +0.14% +0.00% +0.05% / +0.14% +0.75% +0.99%] index_add_ perm_sorted : Elapsed 0.021 ms (2.124 ms / 100) 2.081 -> 2.081 ( +0.00%) [ +0.19% +0.00% +0.19% / +0.00% +0.86% +1.01%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.085 ms / 100) 9.189 -> 9.182 ( -0.08%) [ +0.11% +0.00% +0.28% / -0.08% +0.19% +0.17%] index_select const : Elapsed 0.092 ms (9.199 ms / 100) 9.262 -> 9.279 ( +0.18%) [ +0.19% +0.00% +0.03% / +0.18% +0.41% +0.41%] index_select wrap : Elapsed 0.093 ms (9.280 ms / 100) 9.236 -> 9.234 ( -0.02%) [ +0.13% +0.00% +0.18% / -0.02% +0.35% +0.19%] index_select linear : Elapsed 0.092 ms (9.248 ms / 100) 9.235 -> 9.251 ( +0.17%) [ +0.05% +0.27% +0.00% / +0.17% +0.37% +0.32%] index_select reverse : Elapsed 0.092 ms (9.240 ms / 100) 9.187 -> 9.183 ( -0.04%) [ +0.21% +0.14% +0.00% / -0.04% +0.38% +0.46%] index_select skip64 : Elapsed 0.092 ms (9.206 ms / 100) 9.182 -> 9.194 ( +0.13%) [ +0.03% +0.00% +0.16% / +0.13% +0.35% +0.40%] index_select skip256 : Elapsed 0.092 ms (9.185 ms / 100) 9.235 -> 9.249 ( +0.15%) [ +0.18% +0.00% +0.05% / +0.15% +0.63% +0.58%] index_select spread : Elapsed 0.093 ms (9.252 ms / 100) 9.256 -> 9.276 ( +0.22%) [ +0.00% +0.23% +0.19% / +0.22% +0.58% +0.38%] index_select strided 3 : Elapsed 0.093 ms (9.256 ms / 100) 9.260 -> 9.282 ( +0.24%) [ +0.41% +0.18% +0.00% / +0.24% +0.51% +0.40%] index_select random : Elapsed 0.093 ms (9.298 ms / 100) 9.224 -> 9.252 ( +0.30%) [ +0.00% +0.25% +0.20% / +0.30% +0.59% +0.56%] index_select random_sorted : Elapsed 0.092 ms (9.224 ms / 100) out_shape = [40, 4, 20, 5] in_shape = [40, 4, 16, 5] idx_dim = 2 B = [40, 4, 20, 5] (stride (100, 4000, 5, 1)) A = [40, 4, 16, 5] (stride (1, 3200, 200, 40)) dim = 2 4.260 -> 4.262 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.59% +0.61%] index_add_ linear : Elapsed 0.043 ms (4.260 ms / 100) 4.099 -> 4.101 ( +0.05%) [ +0.02% +0.00% +0.05% / +0.05% +0.63% +0.66%] index_copy_ linear : Elapsed 0.041 ms (4.100 ms / 100) 4.279 -> 4.280 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.02% +0.84% +0.79%] index_add_ reverse : Elapsed 0.043 ms (4.279 ms / 100) 4.117 -> 4.120 ( +0.07%) [ +0.00% +0.02% +0.12% / +0.07% +0.87% +0.85%] index_copy_ reverse : Elapsed 0.041 ms (4.117 ms / 100) 4.222 -> 4.221 ( -0.02%) [ +0.00% +0.09% +0.09% / -0.02% +0.69% +0.73%] index_add_ spread : Elapsed 0.042 ms (4.222 ms / 100) 4.083 -> 4.082 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.76% +0.78%] index_copy_ spread : Elapsed 0.041 ms (4.083 ms / 100) 4.250 -> 4.251 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.66% +0.66%] index_add_ strided 3 : Elapsed 0.043 ms (4.251 ms / 100) 4.082 -> 4.083 ( +0.02%) [ +0.07% +0.00% +0.07% / +0.02% +0.76% +0.73%] index_copy_ strided 3 : Elapsed 0.041 ms (4.085 ms / 100) 4.254 -> 4.254 ( +0.00%) [ +0.00% +0.02% +0.05% / +0.00% +0.49% +0.54%] index_add_ strided 7 : Elapsed 0.043 ms (4.254 ms / 100) 4.086 -> 4.084 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.69% +0.69%] index_copy_ strided 7 : Elapsed 0.041 ms (4.086 ms / 100) 4.225 -> 4.224 ( -0.02%) [ +0.05% +0.00% +0.02% / -0.02% +0.62% +0.64%] index_add_ perm : Elapsed 0.042 ms (4.227 ms / 100) 4.082 -> 4.083 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.91% +0.73%] index_copy_ perm : Elapsed 0.041 ms (4.083 ms / 100) 4.282 -> 4.283 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.72% +0.70%] index_add_ perm_sorted : Elapsed 0.043 ms (4.283 ms / 100) 4.122 -> 4.122 ( +0.00%) [ +0.02% +0.00% +0.10% / +0.00% +0.66% +0.66%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.123 ms / 100) 5.557 -> 5.554 ( -0.05%) [ +0.05% +0.05% +0.00% / +0.11% -0.02% -0.05%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.575 -> 5.572 ( -0.05%) [ +0.20% +0.00% +0.11% / +0.00% -0.05% -0.05%] index_select wrap : Elapsed 0.056 ms (5.586 ms / 100) 5.572 -> 5.571 ( -0.02%) [ +0.11% +0.00% +0.04% / +0.02% +0.11% -0.02%] index_select linear : Elapsed 0.056 ms (5.578 ms / 100) 5.569 -> 5.573 ( +0.07%) [ +0.27% +0.04% +0.00% / +0.07% +0.11% +0.07%] index_select reverse : Elapsed 0.056 ms (5.584 ms / 100) 5.557 -> 5.559 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.09% +0.05%] index_select skip64 : Elapsed 0.056 ms (5.557 ms / 100) 5.559 -> 5.551 ( -0.14%) [ +0.04% +0.02% +0.00% / -0.14% -0.11% +0.09%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.573 -> 5.571 ( -0.04%) [ +0.20% +0.05% +0.00% / +0.04% -0.04% +0.07%] index_select spread : Elapsed 0.056 ms (5.584 ms / 100) 5.574 -> 5.571 ( -0.05%) [ +0.05% +0.14% +0.00% / +0.11% -0.05% +0.07%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.577 -> 5.572 ( -0.09%) [ +0.00% +0.05% +0.04% / +0.16% -0.09% -0.05%] index_select strided 5 : Elapsed 0.056 ms (5.577 ms / 100) 5.576 -> 5.570 ( -0.11%) [ +0.07% +0.00% +0.07% / +0.04% -0.11% -0.04%] index_select strided 7 : Elapsed 0.056 ms (5.580 ms / 100) 5.555 -> 5.551 ( -0.07%) [ +0.23% +0.00% +0.11% / +0.00% -0.07% +0.05%] index_select strided 8 : Elapsed 0.056 ms (5.568 ms / 100) 5.565 -> 5.565 ( +0.00%) [ +0.05% +0.16% +0.00% / +0.22% +0.14% +0.00%] index_select random : Elapsed 0.056 ms (5.568 ms / 100) 5.566 -> 5.568 ( +0.04%) [ +0.05% +0.07% +0.00% / +0.05% +0.14% +0.04%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [40, 4, 20, 5] (stride (1, 4000, 40, 800)) A = [40, 4, 16, 5] (stride (320, 16, 1, 64)) dim = 2 4.292 -> 4.290 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.63% +0.65%] index_add_ linear : Elapsed 0.043 ms (4.292 ms / 100) 4.139 -> 4.137 ( -0.05%) [ +0.00% +0.27% +0.00% / -0.05% +0.60% +0.72%] index_copy_ linear : Elapsed 0.041 ms (4.139 ms / 100) 4.282 -> 4.289 ( +0.16%) [ +0.00% +0.02% +0.00% / +0.16% +0.56% +0.61%] index_add_ reverse : Elapsed 0.043 ms (4.282 ms / 100) 4.128 -> 4.134 ( +0.15%) [ +0.00% +0.02% +0.00% / +0.15% +0.73% +0.73%] index_copy_ reverse : Elapsed 0.041 ms (4.128 ms / 100) 4.279 -> 4.280 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.68% +0.86%] index_add_ spread : Elapsed 0.043 ms (4.280 ms / 100) 4.124 -> 4.124 ( +0.00%) [ +0.00% +0.02% +0.10% / +0.00% +0.80% +0.87%] index_copy_ spread : Elapsed 0.041 ms (4.124 ms / 100) 4.273 -> 4.274 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.02% +0.80% +0.80%] index_add_ strided 3 : Elapsed 0.043 ms (4.276 ms / 100) 4.120 -> 4.122 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.68% +0.95%] index_copy_ strided 3 : Elapsed 0.041 ms (4.120 ms / 100) 4.279 -> 4.281 ( +0.05%) [ +0.16% +0.00% +0.07% / +0.05% +0.79% +0.82%] index_add_ strided 7 : Elapsed 0.043 ms (4.286 ms / 100) 4.128 -> 4.128 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.73% +0.70%] index_copy_ strided 7 : Elapsed 0.041 ms (4.129 ms / 100) 4.289 -> 4.290 ( +0.02%) [ +0.05% +0.00% +0.05% / +0.02% +0.75% +0.65%] index_add_ perm : Elapsed 0.043 ms (4.291 ms / 100) 4.138 -> 4.136 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.80% +0.65%] index_copy_ perm : Elapsed 0.041 ms (4.138 ms / 100) 4.268 -> 4.282 ( +0.33%) [ +0.19% +0.00% +0.35% / +0.33% +1.01% +0.73%] index_add_ perm_sorted : Elapsed 0.043 ms (4.276 ms / 100) 4.122 -> 4.130 ( +0.19%) [ +0.05% +0.00% +0.12% / +0.19% +0.90% +0.70%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.124 ms / 100) 5.562 -> 5.567 ( +0.09%) [ +0.18% +0.09% +0.00% / +0.13% +0.09% +0.11%] index_select const : Elapsed 0.056 ms (5.572 ms / 100) 5.569 -> 5.575 ( +0.11%) [ +0.13% +0.04% +0.00% / +0.14% +0.11% +0.13%] index_select wrap : Elapsed 0.056 ms (5.576 ms / 100) 5.560 -> 5.570 ( +0.18%) [ +0.29% +0.00% +0.16% / +0.18% +0.36% +0.27%] index_select linear : Elapsed 0.056 ms (5.576 ms / 100) 5.569 -> 5.571 ( +0.04%) [ +0.00% +0.04% +0.05% / +0.04% +0.05% +0.20%] index_select reverse : Elapsed 0.056 ms (5.569 ms / 100) 5.559 -> 5.571 ( +0.22%) [ +0.05% +0.09% +0.00% / +0.23% +0.22% +0.32%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.560 -> 5.567 ( +0.13%) [ +0.00% +0.13% +0.07% / +0.13% +0.20% +0.16%] index_select skip256 : Elapsed 0.056 ms (5.560 ms / 100) 5.571 -> 5.569 ( -0.04%) [ +0.00% +0.16% +0.09% / -0.02% +0.18% -0.04%] index_select spread : Elapsed 0.056 ms (5.571 ms / 100) 5.570 -> 5.575 ( +0.09%) [ +0.05% +0.00% +0.05% / +0.09% +0.18% +0.13%] index_select strided 3 : Elapsed 0.056 ms (5.573 ms / 100) 5.566 -> 5.573 ( +0.13%) [ +0.18% +0.00% +0.13% / +0.18% +0.13% +0.27%] index_select strided 5 : Elapsed 0.056 ms (5.576 ms / 100) 5.572 -> 5.569 ( -0.05%) [ +0.05% +0.00% +0.04% / -0.05% +0.18% +0.22%] index_select strided 7 : Elapsed 0.056 ms (5.575 ms / 100) 5.568 -> 5.570 ( +0.04%) [ +0.02% +0.00% +0.00% / +0.04% +0.34% +0.22%] index_select strided 8 : Elapsed 0.056 ms (5.569 ms / 100) 5.570 -> 5.574 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.09% +0.07% +0.25%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.567 -> 5.572 ( +0.09%) [ +0.11% +0.13% +0.00% / +0.11% +0.09% +0.14%] index_select random_sorted : Elapsed 0.056 ms (5.573 ms / 100) B = [40, 4, 20, 5] (stride (5, 200, 800, 1)) A = [40, 4, 16, 5] (stride (320, 80, 1, 16)) dim = 2 3.902 -> 3.896 ( -0.15%) [ +0.00% +0.18% +0.00% / -0.15% +0.41% +0.49%] index_add_ linear : Elapsed 0.039 ms (3.902 ms / 100) 3.749 -> 3.750 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.69% +0.75%] index_copy_ linear : Elapsed 0.037 ms (3.749 ms / 100) 3.904 -> 3.919 ( +0.38%) [ +0.23% +0.00% +0.31% / +0.38% +0.59% +0.59%] index_add_ reverse : Elapsed 0.039 ms (3.913 ms / 100) 3.755 -> 3.756 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.72% +0.75%] index_copy_ reverse : Elapsed 0.038 ms (3.755 ms / 100) 3.916 -> 3.917 ( +0.03%) [ +0.03% +0.00% +0.18% / +0.03% +0.05% +0.13%] index_add_ spread : Elapsed 0.039 ms (3.917 ms / 100) 3.758 -> 3.758 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.40% +0.40%] index_copy_ spread : Elapsed 0.038 ms (3.758 ms / 100) 3.893 -> 3.914 ( +0.54%) [ +0.10% +0.15% +0.00% / +0.54% +0.62% +0.64%] index_add_ strided 3 : Elapsed 0.039 ms (3.897 ms / 100) 3.749 -> 3.750 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.67% +0.69%] index_copy_ strided 3 : Elapsed 0.037 ms (3.749 ms / 100) 3.899 -> 3.907 ( +0.21%) [ +0.00% +0.08% +0.56% / +0.21% +0.46% +0.56%] index_add_ strided 7 : Elapsed 0.039 ms (3.899 ms / 100) 3.750 -> 3.751 ( +0.03%) [ +0.00% +0.11% +0.03% / +0.03% +0.59% +0.56%] index_copy_ strided 7 : Elapsed 0.038 ms (3.750 ms / 100) 3.910 -> 3.917 ( +0.18%) [ +0.20% +0.00% +0.15% / +0.33% +0.20% +0.18%] index_add_ perm : Elapsed 0.039 ms (3.918 ms / 100) 3.761 -> 3.762 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.29% +0.27%] index_copy_ perm : Elapsed 0.038 ms (3.761 ms / 100) 3.910 -> 3.915 ( +0.13%) [ +0.26% +0.00% +0.05% / +0.13% +0.41% +0.43%] index_add_ perm_sorted : Elapsed 0.039 ms (3.920 ms / 100) 3.758 -> 3.759 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.64% +0.67%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.758 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +0.07% +0.07%] index_select const : Elapsed 0.055 ms (5.478 ms / 100) 5.482 -> 5.481 ( -0.02%) [ +0.02% +0.16% +0.00% / +0.02% -0.02% +0.09%] index_select wrap : Elapsed 0.055 ms (5.483 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.02% +0.00% +0.04% / +0.00% +0.04% +0.09%] index_select linear : Elapsed 0.055 ms (5.486 ms / 100) 5.483 -> 5.487 ( +0.07%) [ +0.00% +0.02% +0.02% / +0.07% +0.13% +0.15%] index_select reverse : Elapsed 0.055 ms (5.483 ms / 100) 5.476 -> 5.475 ( -0.02%) [ +0.07% +0.00% +0.00% / -0.02% +0.02% +0.11%] index_select skip64 : Elapsed 0.055 ms (5.480 ms / 100) 5.475 -> 5.474 ( -0.02%) [ +0.00% +0.07% +0.02% / -0.02% +0.18% +0.11%] index_select skip256 : Elapsed 0.055 ms (5.475 ms / 100) 5.486 -> 5.482 ( -0.07%) [ +0.04% +0.00% +0.09% / +0.00% -0.04% -0.07%] index_select spread : Elapsed 0.055 ms (5.488 ms / 100) 5.486 -> 5.483 ( -0.05%) [ +0.09% +0.00% +0.00% / -0.05% +0.13% -0.02%] index_select strided 3 : Elapsed 0.055 ms (5.491 ms / 100) 5.484 -> 5.480 ( -0.07%) [ +0.00% +0.09% +0.05% / +0.11% -0.07% +0.04%] index_select strided 5 : Elapsed 0.055 ms (5.484 ms / 100) 5.484 -> 5.482 ( -0.04%) [ +0.11% +0.00% +0.07% / +0.00% +0.07% -0.04%] index_select strided 7 : Elapsed 0.055 ms (5.490 ms / 100) 5.486 -> 5.481 ( -0.09%) [ +0.00% +0.05% +0.00% / +0.00% -0.09% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.486 ms / 100) 5.485 -> 5.481 ( -0.07%) [ +0.05% +0.00% +0.02% / +0.04% -0.07% -0.04%] index_select random : Elapsed 0.055 ms (5.488 ms / 100) 5.481 -> 5.482 ( +0.02%) [ +0.00% +0.04% +0.05% / +0.02% +0.07% +0.13%] index_select random_sorted : Elapsed 0.055 ms (5.481 ms / 100) B = [40, 4, 20, 5] (stride (1, 200, 800, 40)) dim = 2 fill_cnt = 16 2.102 -> 2.103 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.86% +0.76%] index_fill_ const : Elapsed 0.021 ms (2.103 ms / 100) 2.101 -> 2.103 ( +0.10%) [ +0.05% +0.29% +0.00% / +0.10% +0.81% +0.81%] index_fill_ linear : Elapsed 0.021 ms (2.102 ms / 100) 2.100 -> 2.098 ( -0.10%) [ +0.05% +0.05% +0.00% / -0.10% +0.86% +0.76%] index_fill_ reverse : Elapsed 0.021 ms (2.101 ms / 100) 2.102 -> 2.102 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.00% +0.86% +0.81%] index_fill_ skip64 : Elapsed 0.021 ms (2.102 ms / 100) 2.105 -> 2.106 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.05% +0.67% +0.76%] index_fill_ skip256 : Elapsed 0.021 ms (2.106 ms / 100) 2.101 -> 2.105 ( +0.19%) [ +0.05% +0.10% +0.00% / +0.19% +0.71% +0.76%] index_fill_ spread : Elapsed 0.021 ms (2.102 ms / 100) 2.105 -> 2.106 ( +0.05%) [ +0.24% +0.00% +0.00% / +0.05% +0.71% +0.76%] index_fill_ strided 3 : Elapsed 0.021 ms (2.110 ms / 100) 2.104 -> 2.105 ( +0.05%) [ +0.00% +0.14% +0.05% / +0.05% +0.86% +0.76%] index_fill_ strided 5 : Elapsed 0.021 ms (2.104 ms / 100) 2.104 -> 2.110 ( +0.29%) [ +0.10% +0.05% +0.00% / +0.29% +0.71% +0.62%] index_fill_ strided 7 : Elapsed 0.021 ms (2.106 ms / 100) 2.100 -> 2.102 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +0.95% +0.90%] index_fill_ strided 8 : Elapsed 0.021 ms (2.102 ms / 100) 2.104 -> 2.106 ( +0.10%) [ +0.10% +0.19% +0.00% / +0.10% +0.71% +0.76%] index_fill_ strided 16 : Elapsed 0.021 ms (2.106 ms / 100) 2.106 -> 2.106 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.57% +0.57%] index_fill_ random : Elapsed 0.021 ms (2.107 ms / 100) 2.108 -> 2.108 ( +0.00%) [ +0.00% +0.19% +0.14% / +0.00% +0.38% +0.43%] index_fill_ random_sorted : Elapsed 0.021 ms (2.108 ms / 100) 2.103 -> 2.104 ( +0.05%) [ +0.05% +0.00% +0.19% / +0.05% +0.52% +0.57%] index_fill_ perm : Elapsed 0.021 ms (2.104 ms / 100) 2.110 -> 2.107 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.33% +0.43%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.110 ms / 100) out_shape = [40, 4, 16, 20] in_shape = [40, 4, 16, 5] idx_dim = 3 B = [40, 4, 16, 20] (stride (16, 12800, 1, 640)) A = [40, 4, 16, 5] (stride (20, 5, 800, 1)) dim = 3 1.791 -> 1.798 ( +0.39%) [ +0.00% +0.00% +0.34% / +0.39% +0.95% +0.78%] index_add_ linear : Elapsed 0.018 ms (1.791 ms / 100) 1.736 -> 1.749 ( +0.75%) [ +0.29% +0.00% +0.58% / +0.75% +1.09% +1.04%] index_copy_ linear : Elapsed 0.017 ms (1.741 ms / 100) 1.790 -> 1.801 ( +0.61%) [ +0.00% +0.06% +0.50% / +0.61% +0.95% +0.89%] index_add_ reverse : Elapsed 0.018 ms (1.790 ms / 100) 1.735 -> 1.750 ( +0.86%) [ +0.00% +0.12% +0.75% / +0.86% +1.15% +1.10%] index_copy_ reverse : Elapsed 0.017 ms (1.735 ms / 100) 1.794 -> 1.805 ( +0.61%) [ +0.00% +0.11% +0.72% / +0.61% +0.84% +0.72%] index_add_ spread : Elapsed 0.018 ms (1.794 ms / 100) 1.739 -> 1.752 ( +0.75%) [ +0.00% +0.06% +0.92% / +0.75% +0.98% +1.04%] index_copy_ spread : Elapsed 0.017 ms (1.739 ms / 100) 1.791 -> 1.800 ( +0.50%) [ +0.00% +0.11% +0.73% / +0.50% +1.45% +1.45%] index_add_ strided 3 : Elapsed 0.018 ms (1.791 ms / 100) 1.738 -> 1.748 ( +0.58%) [ +0.23% +0.00% +0.58% / +0.58% +1.55% +1.84%] index_copy_ strided 3 : Elapsed 0.017 ms (1.742 ms / 100) 1.805 -> 1.812 ( +0.39%) [ +0.00% +0.00% +0.61% / +0.39% +0.89% +0.72%] index_add_ strided 7 : Elapsed 0.018 ms (1.805 ms / 100) 1.750 -> 1.758 ( +0.46%) [ +0.06% +0.00% +0.57% / +0.46% +1.14% +1.14%] index_copy_ strided 7 : Elapsed 0.018 ms (1.751 ms / 100) 1.795 -> 1.801 ( +0.33%) [ +0.00% +0.11% +0.33% / +0.33% +1.34% +1.39%] index_add_ perm : Elapsed 0.018 ms (1.795 ms / 100) 1.738 -> 1.746 ( +0.46%) [ +0.00% +0.17% +0.63% / +0.46% +1.55% +1.67%] index_copy_ perm : Elapsed 0.017 ms (1.738 ms / 100) 1.782 -> 1.791 ( +0.51%) [ +0.00% +0.11% +0.51% / +0.51% +1.68% +1.68%] index_add_ perm_sorted : Elapsed 0.018 ms (1.782 ms / 100) 1.733 -> 1.742 ( +0.52%) [ +0.06% +0.00% +0.63% / +0.52% +1.73% +1.79%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.734 ms / 100) 8.243 -> 8.236 ( -0.08%) [ +0.01% +0.00% +0.04% / -0.01% +0.18% -0.08%] index_select const : Elapsed 0.082 ms (8.244 ms / 100) 8.238 -> 8.238 ( +0.00%) [ +0.27% +0.16% +0.00% / +0.08% +0.19% +0.00%] index_select wrap : Elapsed 0.083 ms (8.260 ms / 100) 8.236 -> 8.248 ( +0.15%) [ +0.05% +0.00% +0.13% / +0.32% +0.18% +0.15%] index_select linear : Elapsed 0.082 ms (8.240 ms / 100) 8.243 -> 8.237 ( -0.07%) [ +0.15% +0.06% +0.00% / -0.07% +0.22% -0.06%] index_select reverse : Elapsed 0.083 ms (8.255 ms / 100) 8.245 -> 8.240 ( -0.06%) [ +0.00% +0.18% +0.13% / +0.06% -0.04% -0.06%] index_select skip64 : Elapsed 0.082 ms (8.245 ms / 100) 8.228 -> 8.242 ( +0.17%) [ +0.18% +0.00% +0.35% / +0.34% +0.17% +0.17%] index_select skip256 : Elapsed 0.082 ms (8.243 ms / 100) 8.239 -> 8.230 ( -0.11%) [ +0.30% +0.24% +0.00% / +0.23% +0.39% -0.11%] index_select spread : Elapsed 0.083 ms (8.264 ms / 100) 8.240 -> 8.237 ( -0.04%) [ +0.33% +0.27% +0.00% / +0.04% +0.02% -0.04%] index_select strided 3 : Elapsed 0.083 ms (8.267 ms / 100) 8.244 -> 8.240 ( -0.05%) [ +0.06% +0.00% +0.01% / +0.08% -0.05% +0.10%] index_select random : Elapsed 0.082 ms (8.249 ms / 100) 8.237 -> 8.241 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.16% +0.25% +0.05%] index_select random_sorted : Elapsed 0.082 ms (8.241 ms / 100) B = [40, 4, 16, 20] (stride (80, 1, 3200, 4)) A = [40, 4, 16, 5] (stride (1, 640, 40, 2560)) dim = 3 1.854 -> 1.850 ( -0.22%) [ +0.16% +0.00% +0.05% / -0.22% +0.59% +0.65%] index_add_ linear : Elapsed 0.019 ms (1.857 ms / 100) 1.804 -> 1.804 ( +0.00%) [ +0.11% +0.06% +0.00% / +0.00% +0.55% +0.44%] index_copy_ linear : Elapsed 0.018 ms (1.806 ms / 100) 1.853 -> 1.852 ( -0.05%) [ +0.00% +0.16% +0.00% / -0.05% +0.43% +0.49%] index_add_ reverse : Elapsed 0.019 ms (1.853 ms / 100) 1.800 -> 1.804 ( +0.22%) [ +0.28% +0.17% +0.00% / +0.22% +0.44% +0.44%] index_copy_ reverse : Elapsed 0.018 ms (1.805 ms / 100) 1.894 -> 1.890 ( -0.21%) [ +0.16% +0.16% +0.00% / +0.21% -0.21% +0.00%] index_add_ spread : Elapsed 0.019 ms (1.897 ms / 100) 1.847 -> 1.842 ( -0.27%) [ +0.11% +0.16% +0.00% / +0.11% -0.27% -0.05%] index_copy_ spread : Elapsed 0.018 ms (1.849 ms / 100) 1.885 -> 1.884 ( -0.05%) [ +0.27% +0.00% +0.21% / +0.05% -0.05% +0.11%] index_add_ strided 3 : Elapsed 0.019 ms (1.890 ms / 100) 1.838 -> 1.837 ( -0.05%) [ +0.00% +0.22% +0.11% / +0.05% -0.05% +0.00%] index_copy_ strided 3 : Elapsed 0.018 ms (1.838 ms / 100) 1.873 -> 1.873 ( +0.00%) [ +0.05% +0.21% +0.00% / +0.16% +0.05% +0.00%] index_add_ strided 7 : Elapsed 0.019 ms (1.874 ms / 100) 1.822 -> 1.822 ( +0.00%) [ +0.00% +0.11% +0.22% / +0.22% +0.00% +0.11%] index_copy_ strided 7 : Elapsed 0.018 ms (1.822 ms / 100) 1.876 -> 1.877 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.27% +0.21%] index_add_ perm : Elapsed 0.019 ms (1.876 ms / 100) 1.827 -> 1.825 ( -0.11%) [ +0.00% +0.00% +0.00% / +0.05% -0.11% -0.05%] index_copy_ perm : Elapsed 0.018 ms (1.827 ms / 100) 1.871 -> 1.870 ( -0.05%) [ +0.16% +0.00% +0.16% / -0.05% +0.37% +0.21%] index_add_ perm_sorted : Elapsed 0.019 ms (1.874 ms / 100) 1.824 -> 1.824 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +0.44% +0.16%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.828 ms / 100) 8.231 -> 8.231 ( +0.00%) [ +0.29% +0.06% +0.00% / +0.00% +0.39% +0.18%] index_select const : Elapsed 0.083 ms (8.255 ms / 100) 8.264 -> 8.257 ( -0.08%) [ +0.00% +0.24% +0.04% / -0.08% +0.10% -0.02%] index_select wrap : Elapsed 0.083 ms (8.264 ms / 100) 8.249 -> 8.263 ( +0.17%) [ +0.05% +0.27% +0.00% / +0.19% +0.40% +0.17%] index_select linear : Elapsed 0.083 ms (8.253 ms / 100) 8.254 -> 8.249 ( -0.06%) [ +0.00% +0.01% +0.06% / +0.22% -0.06% +0.18%] index_select reverse : Elapsed 0.083 ms (8.254 ms / 100) 8.230 -> 8.229 ( -0.01%) [ +0.00% +0.22% +0.18% / -0.01% +0.33% +0.17%] index_select skip64 : Elapsed 0.082 ms (8.230 ms / 100) 8.235 -> 8.234 ( -0.01%) [ +0.18% +0.00% +0.04% / +0.17% -0.01% +0.00%] index_select skip256 : Elapsed 0.082 ms (8.250 ms / 100) 8.245 -> 8.260 ( +0.18%) [ +0.00% +0.17% +0.30% / +0.18% +0.35% +0.44%] index_select spread : Elapsed 0.082 ms (8.245 ms / 100) 8.262 -> 8.262 ( +0.00%) [ +0.10% +0.19% +0.00% / +0.00% +0.17% +0.16%] index_select strided 3 : Elapsed 0.083 ms (8.270 ms / 100) 8.262 -> 8.262 ( +0.00%) [ +0.12% +0.00% +0.11% / +0.00% +0.23% +0.38%] index_select random : Elapsed 0.083 ms (8.272 ms / 100) 8.247 -> 8.259 ( +0.15%) [ +0.15% +0.00% +0.19% / +0.18% +0.45% +0.15%] index_select random_sorted : Elapsed 0.083 ms (8.259 ms / 100) B = [40, 4, 16, 20] (stride (16, 640, 1, 2560)) A = [40, 4, 16, 5] (stride (1, 40, 800, 160)) dim = 3 1.945 -> 1.947 ( +0.10%) [ +0.00% +0.05% +0.15% / +0.10% +0.57% +0.72%] index_add_ linear : Elapsed 0.019 ms (1.945 ms / 100) 1.895 -> 1.898 ( +0.16%) [ +0.05% +0.00% +0.11% / +0.16% +0.42% +0.47%] index_copy_ linear : Elapsed 0.019 ms (1.896 ms / 100) 1.945 -> 1.945 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.72% +0.67%] index_add_ reverse : Elapsed 0.019 ms (1.945 ms / 100) 1.893 -> 1.894 ( +0.05%) [ +0.00% +0.11% +0.32% / +0.05% +0.69% +0.63%] index_copy_ reverse : Elapsed 0.019 ms (1.893 ms / 100) 1.936 -> 1.937 ( +0.05%) [ +0.10% +0.00% +0.05% / +0.05% +0.72% +0.77%] index_add_ spread : Elapsed 0.019 ms (1.938 ms / 100) 1.879 -> 1.886 ( +0.37%) [ +0.00% +0.00% +0.27% / +0.37% +0.96% +1.17%] index_copy_ spread : Elapsed 0.019 ms (1.879 ms / 100) 1.944 -> 1.950 ( +0.31%) [ +0.00% +0.31% +0.36% / +0.31% +1.39% +1.44%] index_add_ strided 3 : Elapsed 0.019 ms (1.944 ms / 100) 1.889 -> 1.897 ( +0.42%) [ +0.00% +0.21% +0.53% / +0.42% +1.22% +1.22%] index_copy_ strided 3 : Elapsed 0.019 ms (1.889 ms / 100) 1.935 -> 1.941 ( +0.31%) [ +0.00% +0.10% +0.21% / +0.31% +0.72% +0.72%] index_add_ strided 7 : Elapsed 0.019 ms (1.935 ms / 100) 1.885 -> 1.893 ( +0.42%) [ +0.00% +0.00% +0.37% / +0.42% +0.80% +0.80%] index_copy_ strided 7 : Elapsed 0.019 ms (1.885 ms / 100) 1.938 -> 1.941 ( +0.15%) [ +0.10% +0.00% +0.26% / +0.15% +0.41% +0.57%] index_add_ perm : Elapsed 0.019 ms (1.940 ms / 100) 1.884 -> 1.889 ( +0.27%) [ +0.00% +0.00% +0.27% / +0.27% +0.74% +0.85%] index_copy_ perm : Elapsed 0.019 ms (1.884 ms / 100) 1.937 -> 1.943 ( +0.31%) [ +0.26% +0.00% +0.15% / +0.31% +0.72% +0.57%] index_add_ perm_sorted : Elapsed 0.019 ms (1.942 ms / 100) 1.886 -> 1.889 ( +0.16%) [ +0.16% +0.00% +0.42% / +0.16% +0.85% +0.80%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.889 ms / 100) 8.524 -> 8.521 ( -0.04%) [ +0.00% +0.01% +0.20% / +0.04% +0.45% -0.04%] index_select const : Elapsed 0.085 ms (8.524 ms / 100) 8.544 -> 8.549 ( +0.06%) [ +0.04% +0.25% +0.00% / +0.07% +0.06% +0.26%] index_select wrap : Elapsed 0.085 ms (8.547 ms / 100) 8.549 -> 8.545 ( -0.05%) [ +0.15% +0.00% +0.09% / -0.05% +0.07% -0.01%] index_select linear : Elapsed 0.086 ms (8.562 ms / 100) 8.537 -> 8.537 ( +0.00%) [ +0.00% +0.25% +0.02% / +0.00% +0.21% +0.26%] index_select reverse : Elapsed 0.085 ms (8.537 ms / 100) 8.524 -> 8.517 ( -0.08%) [ +0.00% +0.06% +0.20% / +0.28% -0.08% +0.19%] index_select skip64 : Elapsed 0.085 ms (8.524 ms / 100) 8.510 -> 8.520 ( +0.12%) [ +0.00% +0.36% +0.29% / +0.32% +0.16% +0.12%] index_select skip256 : Elapsed 0.085 ms (8.510 ms / 100) 8.557 -> 8.559 ( +0.02%) [ +0.05% +0.36% +0.00% / +0.22% +0.35% +0.02%] index_select spread : Elapsed 0.086 ms (8.561 ms / 100) 8.550 -> 8.559 ( +0.11%) [ +0.00% +0.28% +0.23% / +0.29% +0.11% +0.15%] index_select strided 3 : Elapsed 0.086 ms (8.550 ms / 100) 8.549 -> 8.552 ( +0.04%) [ +0.00% +0.13% +0.29% / +0.04% +0.39% +0.28%] index_select random : Elapsed 0.085 ms (8.549 ms / 100) 8.553 -> 8.558 ( +0.06%) [ +0.09% +0.00% +0.21% / +0.06% +0.18% +0.18%] index_select random_sorted : Elapsed 0.086 ms (8.561 ms / 100) out_shape = [20, 5, 4, 16] in_shape = [40, 5, 4, 16] idx_dim = 0 B = [20, 5, 4, 16] (stride (320, 64, 1, 4)) A = [40, 5, 4, 16] (stride (5, 1, 200, 800)) dim = 0 2.445 -> 2.452 ( +0.29%) [ +0.00% +0.08% +0.16% / +0.57% +0.37% +0.29%] index_select const : Elapsed 0.024 ms (2.445 ms / 100) 2.462 -> 2.459 ( -0.12%) [ +0.00% +0.12% +0.12% / +0.04% -0.12% -0.08%] index_select wrap : Elapsed 0.025 ms (2.462 ms / 100) 2.461 -> 2.460 ( -0.04%) [ +0.00% +0.12% +0.16% / -0.04% +0.08% +0.12%] index_select linear : Elapsed 0.025 ms (2.461 ms / 100) 2.461 -> 2.458 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.04% -0.12% +0.08%] index_select reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.449 -> 2.448 ( -0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.08% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.449 ms / 100) 2.447 -> 2.448 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.33% +0.16%] index_select skip256 : Elapsed 0.024 ms (2.449 ms / 100) 2.466 -> 2.469 ( +0.12%) [ +0.28% +0.20% +0.00% / +0.12% +0.24% +0.28%] index_select spread : Elapsed 0.025 ms (2.473 ms / 100) 2.470 -> 2.466 ( -0.16%) [ +0.00% +0.08% +0.00% / +0.00% +0.00% -0.16%] index_select strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.457 -> 2.459 ( +0.08%) [ +0.33% +0.00% +0.24% / +0.08% +0.12% +0.20%] index_select strided 5 : Elapsed 0.025 ms (2.465 ms / 100) 2.463 -> 2.466 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.12% +0.32% +0.41%] index_select strided 7 : Elapsed 0.025 ms (2.466 ms / 100) 2.450 -> 2.452 ( +0.08%) [ +0.00% +0.08% +0.12% / +0.08% +0.24% +0.37%] index_select strided 8 : Elapsed 0.025 ms (2.450 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.12% +0.08% +0.12%] index_select strided 16 : Elapsed 0.025 ms (2.452 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.00% +0.12% +0.04% / -0.04% +0.08% +0.24%] index_select random : Elapsed 0.025 ms (2.465 ms / 100) 2.465 -> 2.464 ( -0.04%) [ +0.20% +0.00% +0.00% / +0.08% +0.08% -0.04%] index_select random_sorted : Elapsed 0.025 ms (2.470 ms / 100) 2.463 -> 2.463 ( +0.00%) [ +0.08% +0.00% +0.20% / +0.08% +0.16% +0.00%] index_select perm : Elapsed 0.025 ms (2.465 ms / 100) 2.469 -> 2.460 ( -0.36%) [ +0.24% +0.08% +0.00% / +0.04% -0.36% -0.32%] index_select perm_sorted : Elapsed 0.025 ms (2.475 ms / 100) B = [20, 5, 4, 16] (stride (320, 16, 80, 1)) A = [40, 5, 4, 16] (stride (16, 640, 3200, 1)) dim = 0 2.446 -> 2.447 ( +0.04%) [ +0.04% +0.16% +0.00% / +0.16% +0.08% +0.04%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.460 -> 2.450 ( -0.41%) [ +0.04% +0.00% +0.04% / +0.08% -0.41% -0.37%] index_select wrap : Elapsed 0.025 ms (2.461 ms / 100) 2.459 -> 2.451 ( -0.33%) [ +0.12% +0.20% +0.00% / +0.20% -0.33% -0.20%] index_select linear : Elapsed 0.025 ms (2.462 ms / 100) 2.456 -> 2.454 ( -0.08%) [ +0.12% +0.00% +0.29% / +0.20% -0.08% +0.04%] index_select reverse : Elapsed 0.025 ms (2.459 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.08% +0.20% +0.00% / +0.04% -0.08% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.449 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.29% +0.37%] index_select skip256 : Elapsed 0.024 ms (2.447 ms / 100) 2.459 -> 2.459 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.16% +0.04%] index_select spread : Elapsed 0.025 ms (2.460 ms / 100) 2.455 -> 2.457 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.08% +0.24% +0.24%] index_select strided 3 : Elapsed 0.025 ms (2.456 ms / 100) 2.447 -> 2.454 ( +0.29%) [ +0.00% +0.12% +0.12% / +0.29% +0.33% +0.29%] index_select strided 5 : Elapsed 0.024 ms (2.447 ms / 100) 2.460 -> 2.456 ( -0.16%) [ +0.00% +0.04% +0.04% / -0.16% +0.00% +0.08%] index_select strided 7 : Elapsed 0.025 ms (2.460 ms / 100) 2.448 -> 2.449 ( +0.04%) [ +0.00% +0.00% +0.08% / +0.04% +0.16% +0.12%] index_select strided 8 : Elapsed 0.024 ms (2.448 ms / 100) 2.448 -> 2.450 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.25% +0.16%] index_select strided 16 : Elapsed 0.024 ms (2.449 ms / 100) 2.457 -> 2.454 ( -0.12%) [ +0.20% +0.00% +0.00% / -0.12% +0.08% +0.28%] index_select random : Elapsed 0.025 ms (2.462 ms / 100) 2.456 -> 2.454 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.04% +0.16% -0.08%] index_select random_sorted : Elapsed 0.025 ms (2.456 ms / 100) 2.458 -> 2.460 ( +0.08%) [ +0.04% +0.00% +0.24% / +0.12% +0.20% +0.08%] index_select perm : Elapsed 0.025 ms (2.459 ms / 100) 2.458 -> 2.456 ( -0.08%) [ +0.04% +0.00% +0.16% / +0.24% +0.08% -0.08%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) B = [20, 5, 4, 16] (stride (320, 1, 80, 5)) A = [40, 5, 4, 16] (stride (320, 16, 80, 1)) dim = 0 2.392 -> 2.393 ( +0.04%) [ +0.00% +0.04% +0.17% / +0.04% +0.29% +0.46%] index_select const : Elapsed 0.024 ms (2.392 ms / 100) 2.403 -> 2.404 ( +0.04%) [ +0.21% +0.00% +0.08% / +0.12% +0.17% +0.04%] index_select wrap : Elapsed 0.024 ms (2.408 ms / 100) 2.403 -> 2.405 ( +0.08%) [ +0.12% +0.00% +0.04% / +0.12% +0.17% +0.08%] index_select linear : Elapsed 0.024 ms (2.406 ms / 100) 2.405 -> 2.403 ( -0.08%) [ +0.08% +0.12% +0.00% / +0.21% -0.08% -0.08%] index_select reverse : Elapsed 0.024 ms (2.407 ms / 100) 2.392 -> 2.395 ( +0.13%) [ +0.13% +0.00% +0.25% / +0.13% +0.25% +0.21%] index_select skip64 : Elapsed 0.024 ms (2.395 ms / 100) 2.393 -> 2.399 ( +0.25%) [ +0.00% +0.08% +0.04% / +0.33% +0.33% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.393 ms / 100) 2.403 -> 2.403 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.21% +0.21%] index_select spread : Elapsed 0.024 ms (2.404 ms / 100) 2.407 -> 2.403 ( -0.17%) [ +0.08% +0.00% +0.04% / +0.08% -0.17% +0.04%] index_select strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.399 -> 2.398 ( -0.04%) [ +0.21% +0.04% +0.00% / +0.00% +0.00% -0.04%] index_select strided 5 : Elapsed 0.024 ms (2.404 ms / 100) 2.401 -> 2.406 ( +0.21%) [ +0.17% +0.08% +0.00% / +0.21% +0.25% +0.25%] index_select strided 7 : Elapsed 0.024 ms (2.405 ms / 100) 2.394 -> 2.393 ( -0.04%) [ +0.04% +0.13% +0.00% / -0.04% +0.21% +0.33%] index_select strided 8 : Elapsed 0.024 ms (2.395 ms / 100) 2.393 -> 2.397 ( +0.17%) [ +0.29% +0.29% +0.00% / +0.17% +0.42% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.400 ms / 100) 2.401 -> 2.401 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.08% +0.04%] index_select random : Elapsed 0.024 ms (2.401 ms / 100) 2.403 -> 2.402 ( -0.04%) [ +0.04% +0.21% +0.00% / +0.04% +0.00% -0.04%] index_select random_sorted : Elapsed 0.024 ms (2.404 ms / 100) 2.409 -> 2.406 ( -0.12%) [ +0.00% +0.00% +0.25% / -0.12% -0.12% -0.12%] index_select perm : Elapsed 0.024 ms (2.409 ms / 100) 2.406 -> 2.398 ( -0.33%) [ +0.00% +0.08% +0.04% / +0.04% -0.33% -0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) B = [20, 5, 4, 16] (stride (1, 20, 1600, 100)) A = [40, 5, 4, 16] (stride (4, 2560, 1, 160)) dim = 0 2.397 -> 2.397 ( +0.00%) [ +0.04% +0.08% +0.00% / +0.00% +0.25% +0.17%] index_select const : Elapsed 0.024 ms (2.398 ms / 100) 2.411 -> 2.405 ( -0.25%) [ +0.00% +0.12% +0.00% / +0.08% -0.12% -0.25%] index_select wrap : Elapsed 0.024 ms (2.411 ms / 100) 2.412 -> 2.403 ( -0.37%) [ +0.04% +0.00% +0.04% / -0.08% -0.17% -0.37%] index_select linear : Elapsed 0.024 ms (2.413 ms / 100) 2.408 -> 2.408 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.08% +0.04%] index_select reverse : Elapsed 0.024 ms (2.410 ms / 100) 2.400 -> 2.400 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.04% +0.04% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.401 ms / 100) 2.397 -> 2.399 ( +0.08%) [ +0.04% +0.00% +0.08% / +0.08% +0.33% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.398 ms / 100) 2.417 -> 2.419 ( +0.08%) [ +0.00% +0.17% +0.08% / +0.08% +0.33% +0.29%] index_select spread : Elapsed 0.024 ms (2.417 ms / 100) 2.414 -> 2.417 ( +0.12%) [ +0.21% +0.00% +0.12% / +0.12% +0.37% +0.25%] index_select strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.407 -> 2.408 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.04% +0.25% +0.25%] index_select strided 5 : Elapsed 0.024 ms (2.407 ms / 100) 2.416 -> 2.420 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.37% +0.29%] index_select strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.404 -> 2.407 ( +0.12%) [ +0.00% +0.04% +0.00% / +0.12% +0.12% +0.21%] index_select strided 8 : Elapsed 0.024 ms (2.404 ms / 100) 2.403 -> 2.407 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.25% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.407 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.17% +0.00% +0.08% / +0.04% +0.25% +0.33%] index_select random : Elapsed 0.024 ms (2.416 ms / 100) 2.414 -> 2.410 ( -0.17%) [ +0.21% +0.04% +0.00% / -0.04% -0.17% +0.00%] index_select random_sorted : Elapsed 0.024 ms (2.419 ms / 100) 2.413 -> 2.415 ( +0.08%) [ +0.21% +0.00% +0.25% / +0.21% +0.08% +0.21%] index_select perm : Elapsed 0.024 ms (2.418 ms / 100) 2.415 -> 2.414 ( -0.04%) [ +0.04% +0.17% +0.00% / -0.04% +0.21% +0.17%] index_select perm_sorted : Elapsed 0.024 ms (2.416 ms / 100) B = [20, 5, 4, 16] (stride (1, 20, 1600, 100)) A = [40, 5, 4, 16] (stride (5, 1, 3200, 200)) dim = 0 1.461 -> 1.457 ( -0.27%) [ +0.41% +0.21% +0.00% / -0.27% +0.07% -0.27%] index_select const : Elapsed 0.015 ms (1.467 ms / 100) 1.484 -> 1.465 ( -1.28%) [ +0.07% +0.13% +0.00% / -1.21% -1.28% -0.94%] index_select wrap : Elapsed 0.015 ms (1.485 ms / 100) 1.482 -> 1.465 ( -1.15%) [ +0.00% +0.00% +0.00% / -1.15% -0.94% -0.88%] index_select linear : Elapsed 0.015 ms (1.482 ms / 100) 1.484 -> 1.461 ( -1.55%) [ +0.00% +0.00% +0.07% / -1.55% -1.08% -1.35%] index_select reverse : Elapsed 0.015 ms (1.484 ms / 100) 1.463 -> 1.456 ( -0.48%) [ +0.00% +0.07% +0.00% / -0.34% -0.48% -0.41%] index_select skip64 : Elapsed 0.015 ms (1.463 ms / 100) 1.464 -> 1.456 ( -0.55%) [ +0.07% +0.07% +0.00% / -0.55% -0.48% -0.27%] index_select skip256 : Elapsed 0.015 ms (1.465 ms / 100) 1.499 -> 1.473 ( -1.73%) [ +0.00% +0.07% +0.00% / -1.73% -1.53% -1.20%] index_select spread : Elapsed 0.015 ms (1.499 ms / 100) 1.501 -> 1.478 ( -1.53%) [ +0.00% +0.00% +0.00% / -1.53% -1.07% -1.13%] index_select strided 3 : Elapsed 0.015 ms (1.501 ms / 100) 1.490 -> 1.471 ( -1.28%) [ +0.00% +0.00% +0.00% / -1.28% -0.81% -1.07%] index_select strided 5 : Elapsed 0.015 ms (1.490 ms / 100) 1.495 -> 1.478 ( -1.14%) [ +0.00% +0.20% +0.00% / -1.14% -0.67% -0.54%] index_select strided 7 : Elapsed 0.015 ms (1.495 ms / 100) 1.476 -> 1.461 ( -1.02%) [ +0.14% +0.07% +0.00% / -1.02% -0.61% -0.54%] index_select strided 8 : Elapsed 0.015 ms (1.478 ms / 100) 1.474 -> 1.462 ( -0.81%) [ +0.00% +0.20% +0.00% / -0.81% -0.68% -0.41%] index_select strided 16 : Elapsed 0.015 ms (1.474 ms / 100) 1.489 -> 1.477 ( -0.81%) [ +0.27% +0.40% +0.00% / -0.81% -0.47% -0.54%] index_select random : Elapsed 0.015 ms (1.493 ms / 100) 1.491 -> 1.474 ( -1.14%) [ +0.00% +0.00% +0.07% / -1.07% -1.14% -1.07%] index_select random_sorted : Elapsed 0.015 ms (1.491 ms / 100) 1.494 -> 1.479 ( -1.00%) [ +0.13% +0.00% +0.13% / -0.94% -0.67% -1.00%] index_select perm : Elapsed 0.015 ms (1.496 ms / 100) 1.494 -> 1.476 ( -1.20%) [ +0.27% +0.07% +0.00% / -1.20% -0.94% -0.87%] index_select perm_sorted : Elapsed 0.015 ms (1.498 ms / 100) B = [20, 5, 4, 16] (stride (4, 80, 1, 400)) A = [40, 5, 4, 16] (stride (1, 160, 40, 800)) dim = 0 2.416 -> 2.412 ( -0.17%) [ +0.08% +0.00% +0.00% / -0.17% +0.25% +0.12%] index_select const : Elapsed 0.024 ms (2.418 ms / 100) 2.424 -> 2.419 ( -0.21%) [ +0.00% +0.08% +0.00% / +0.04% -0.08% -0.21%] index_select wrap : Elapsed 0.024 ms (2.424 ms / 100) 2.420 -> 2.420 ( +0.00%) [ +0.00% +0.21% +0.17% / +0.17% +0.12% +0.00%] index_select linear : Elapsed 0.024 ms (2.420 ms / 100) 2.419 -> 2.422 ( +0.12%) [ +0.00% +0.08% +0.21% / +0.33% +0.12% +0.12%] index_select reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.21% +0.17% +0.00% / +0.08% +0.12% +0.17%] index_select skip64 : Elapsed 0.024 ms (2.419 ms / 100) 2.411 -> 2.414 ( +0.12%) [ +0.17% +0.17% +0.00% / +0.12% +0.25% +0.33%] index_select skip256 : Elapsed 0.024 ms (2.415 ms / 100) 2.427 -> 2.426 ( -0.04%) [ +0.00% +0.00% +0.08% / -0.04% +0.21% +0.21%] index_select spread : Elapsed 0.024 ms (2.427 ms / 100) 2.429 -> 2.428 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.12% +0.08%] index_select strided 3 : Elapsed 0.024 ms (2.430 ms / 100) 2.427 -> 2.427 ( +0.00%) [ +0.04% +0.21% +0.00% / +0.00% +0.04% +0.12%] index_select strided 5 : Elapsed 0.024 ms (2.428 ms / 100) 2.429 -> 2.430 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.04% +0.04% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.429 ms / 100) 2.430 -> 2.430 ( +0.00%) [ +0.00% +0.08% +0.12% / +0.00% +0.25% +0.16%] index_select strided 8 : Elapsed 0.024 ms (2.430 ms / 100) 2.428 -> 2.425 ( -0.12%) [ +0.12% +0.00% +0.04% / -0.12% +0.21% +0.25%] index_select strided 16 : Elapsed 0.024 ms (2.431 ms / 100) 2.427 -> 2.427 ( +0.00%) [ +0.00% +0.04% +0.12% / +0.00% +0.29% +0.37%] index_select random : Elapsed 0.024 ms (2.427 ms / 100) 2.425 -> 2.429 ( +0.16%) [ +0.00% +0.21% +0.04% / +0.29% +0.16% +0.21%] index_select random_sorted : Elapsed 0.024 ms (2.425 ms / 100) 2.424 -> 2.429 ( +0.21%) [ +0.45% +0.33% +0.00% / +0.25% +0.21% +0.33%] index_select perm : Elapsed 0.024 ms (2.435 ms / 100) 2.427 -> 2.427 ( +0.00%) [ +0.12% +0.04% +0.00% / +0.00% +0.12% +0.25%] index_select perm_sorted : Elapsed 0.024 ms (2.430 ms / 100) out_shape = [40, 20, 4, 16] in_shape = [40, 5, 4, 16] idx_dim = 1 B = [40, 20, 4, 16] (stride (1280, 1, 320, 20)) A = [40, 5, 4, 16] (stride (320, 4, 1, 20)) dim = 1 1.833 -> 1.833 ( +0.00%) [ +0.00% +0.27% +0.11% / +0.00% +0.16% +0.05%] index_add_ linear : Elapsed 0.018 ms (1.833 ms / 100) 1.790 -> 1.792 ( +0.11%) [ +0.00% +0.22% +0.34% / +0.11% +0.45% +0.73%] index_copy_ linear : Elapsed 0.018 ms (1.790 ms / 100) 1.834 -> 1.835 ( +0.05%) [ +0.22% +0.00% +0.16% / +0.05% +0.05% +0.16%] index_add_ reverse : Elapsed 0.018 ms (1.838 ms / 100) 1.788 -> 1.795 ( +0.39%) [ +0.00% +0.06% +0.50% / +0.39% +0.73% +0.67%] index_copy_ reverse : Elapsed 0.018 ms (1.788 ms / 100) 1.852 -> 1.855 ( +0.16%) [ +0.00% +0.16% +0.11% / +0.16% +0.22% +0.16%] index_add_ spread : Elapsed 0.019 ms (1.852 ms / 100) 1.818 -> 1.826 ( +0.44%) [ +0.11% +0.00% +0.11% / +0.44% +0.50% +0.66%] index_copy_ spread : Elapsed 0.018 ms (1.820 ms / 100) 1.852 -> 1.852 ( +0.00%) [ +0.16% +0.05% +0.00% / +0.00% +0.22% +0.05%] index_add_ strided 3 : Elapsed 0.019 ms (1.855 ms / 100) 1.818 -> 1.822 ( +0.22%) [ +0.00% +0.11% +0.11% / +0.22% +0.50% +0.33%] index_copy_ strided 3 : Elapsed 0.018 ms (1.818 ms / 100) 1.849 -> 1.853 ( +0.22%) [ +0.27% +0.27% +0.00% / +0.43% +0.22% +0.27%] index_add_ strided 7 : Elapsed 0.019 ms (1.854 ms / 100) 1.819 -> 1.825 ( +0.33%) [ +0.00% +0.05% +0.22% / +0.55% +0.49% +0.33%] index_copy_ strided 7 : Elapsed 0.018 ms (1.819 ms / 100) 1.853 -> 1.854 ( +0.05%) [ +0.16% +0.00% +0.05% / +0.16% +0.05% +0.27%] index_add_ perm : Elapsed 0.019 ms (1.856 ms / 100) 1.820 -> 1.824 ( +0.22%) [ +0.00% +0.27% +0.16% / +0.22% +0.44% +0.38%] index_copy_ perm : Elapsed 0.018 ms (1.820 ms / 100) 1.850 -> 1.851 ( +0.05%) [ +0.00% +0.22% +0.16% / +0.05% +0.05% +0.16%] index_add_ perm_sorted : Elapsed 0.019 ms (1.850 ms / 100) 1.821 -> 1.824 ( +0.16%) [ +0.00% +0.22% +0.05% / +0.16% +0.27% +0.27%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.821 ms / 100) 8.289 -> 8.288 ( -0.01%) [ +0.00% +0.02% +0.05% / -0.01% +0.24% +0.40%] index_select const : Elapsed 0.083 ms (8.289 ms / 100) 8.296 -> 8.316 ( +0.24%) [ +0.00% +0.12% +0.12% / +0.24% +0.35% +0.39%] index_select wrap : Elapsed 0.083 ms (8.296 ms / 100) 8.281 -> 8.294 ( +0.16%) [ +0.00% +0.37% +0.13% / +0.16% +0.47% +0.57%] index_select linear : Elapsed 0.083 ms (8.281 ms / 100) 8.284 -> 8.302 ( +0.22%) [ +0.00% +0.18% +0.17% / +0.22% +0.49% +0.40%] index_select reverse : Elapsed 0.083 ms (8.284 ms / 100) 8.284 -> 8.292 ( +0.10%) [ +0.06% +0.36% +0.00% / +0.10% +0.45% +0.23%] index_select skip64 : Elapsed 0.083 ms (8.289 ms / 100) 8.290 -> 8.280 ( -0.12%) [ +0.00% +0.01% +0.06% / -0.12% +0.17% +0.29%] index_select skip256 : Elapsed 0.083 ms (8.290 ms / 100) 8.288 -> 8.305 ( +0.21%) [ +0.00% +0.47% +0.19% / +0.27% +0.21% +0.51%] index_select spread : Elapsed 0.083 ms (8.288 ms / 100) 8.299 -> 8.310 ( +0.13%) [ +0.18% +0.00% +0.04% / +0.28% +0.13% +0.39%] index_select strided 3 : Elapsed 0.083 ms (8.314 ms / 100) 8.301 -> 8.303 ( +0.02%) [ +0.08% +0.10% +0.00% / +0.02% +0.52% +0.24%] index_select random : Elapsed 0.083 ms (8.308 ms / 100) 8.287 -> 8.295 ( +0.10%) [ +0.00% +0.21% +0.08% / +0.10% +0.74% +0.69%] index_select random_sorted : Elapsed 0.083 ms (8.287 ms / 100) B = [40, 20, 4, 16] (stride (16, 2560, 640, 1)) A = [40, 5, 4, 16] (stride (320, 1, 80, 5)) dim = 1 1.714 -> 1.715 ( +0.06%) [ +0.06% +0.18% +0.00% / +0.06% +0.47% +0.47%] index_add_ linear : Elapsed 0.017 ms (1.715 ms / 100) 1.670 -> 1.673 ( +0.18%) [ +0.00% +0.24% +0.54% / +0.18% +0.60% +0.78%] index_copy_ linear : Elapsed 0.017 ms (1.670 ms / 100) 1.715 -> 1.716 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.12% +0.17%] index_add_ reverse : Elapsed 0.017 ms (1.715 ms / 100) 1.670 -> 1.672 ( +0.12%) [ +0.00% +0.30% +0.00% / +0.12% +0.60% +0.42%] index_copy_ reverse : Elapsed 0.017 ms (1.670 ms / 100) 1.700 -> 1.701 ( +0.06%) [ +0.00% +0.12% +0.29% / +0.06% +1.06% +1.12%] index_add_ spread : Elapsed 0.017 ms (1.700 ms / 100) 1.658 -> 1.658 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.39% +1.15%] index_copy_ spread : Elapsed 0.017 ms (1.658 ms / 100) 1.711 -> 1.709 ( -0.12%) [ +0.12% +0.23% +0.00% / -0.12% +1.05% +0.76%] index_add_ strided 3 : Elapsed 0.017 ms (1.713 ms / 100) 1.671 -> 1.670 ( -0.06%) [ +0.00% +0.06% +0.06% / -0.06% +0.54% +0.54%] index_copy_ strided 3 : Elapsed 0.017 ms (1.671 ms / 100) 1.708 -> 1.707 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.59% +0.70%] index_add_ strided 7 : Elapsed 0.017 ms (1.709 ms / 100) 1.665 -> 1.669 ( +0.24%) [ +0.24% +0.36% +0.00% / +0.24% +0.72% +0.78%] index_copy_ strided 7 : Elapsed 0.017 ms (1.669 ms / 100) 1.704 -> 1.706 ( +0.12%) [ +0.29% +0.35% +0.00% / +0.12% +0.59% +0.59%] index_add_ perm : Elapsed 0.017 ms (1.709 ms / 100) 1.661 -> 1.662 ( +0.06%) [ +0.00% +0.18% +0.06% / +0.06% +0.54% +0.84%] index_copy_ perm : Elapsed 0.017 ms (1.661 ms / 100) 1.705 -> 1.706 ( +0.06%) [ +0.06% +0.23% +0.00% / +0.06% +0.88% +0.94%] index_add_ perm_sorted : Elapsed 0.017 ms (1.706 ms / 100) 1.662 -> 1.661 ( -0.06%) [ +0.06% +0.12% +0.00% / -0.06% +0.78% +1.08%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.663 ms / 100) 8.239 -> 8.241 ( +0.02%) [ +0.04% +0.00% +0.35% / +0.02% +0.05% +0.12%] index_select const : Elapsed 0.082 ms (8.242 ms / 100) 8.244 -> 8.252 ( +0.10%) [ +0.00% +0.06% +0.05% / +0.19% +0.10% +0.25%] index_select wrap : Elapsed 0.082 ms (8.244 ms / 100) 8.238 -> 8.231 ( -0.08%) [ +0.00% +0.01% +0.32% / +0.10% +0.07% -0.08%] index_select linear : Elapsed 0.082 ms (8.238 ms / 100) 8.237 -> 8.239 ( +0.02%) [ +0.10% +0.00% +0.10% / +0.02% +0.13% +0.21%] index_select reverse : Elapsed 0.082 ms (8.245 ms / 100) 8.236 -> 8.239 ( +0.04%) [ +0.00% +0.29% +0.34% / +0.06% +0.04% +0.28%] index_select skip64 : Elapsed 0.082 ms (8.236 ms / 100) 8.246 -> 8.225 ( -0.25%) [ +0.00% +0.04% +0.08% / -0.25% +0.00% -0.11%] index_select skip256 : Elapsed 0.082 ms (8.246 ms / 100) 8.240 -> 8.237 ( -0.04%) [ +0.00% +0.12% +0.15% / -0.04% +0.19% +0.06%] index_select spread : Elapsed 0.082 ms (8.240 ms / 100) 8.239 -> 8.224 ( -0.18%) [ +0.00% +0.06% +0.04% / -0.18% +0.16% +0.24%] index_select strided 3 : Elapsed 0.082 ms (8.239 ms / 100) 8.238 -> 8.244 ( +0.07%) [ +0.00% +0.07% +0.24% / +0.11% +0.07% +0.11%] index_select random : Elapsed 0.082 ms (8.238 ms / 100) 8.227 -> 8.245 ( +0.22%) [ +0.00% +0.26% +0.16% / +0.22% +0.27% +0.41%] index_select random_sorted : Elapsed 0.082 ms (8.227 ms / 100) B = [40, 20, 4, 16] (stride (16, 2560, 640, 1)) A = [40, 5, 4, 16] (stride (20, 1, 5, 800)) dim = 1 1.899 -> 1.907 ( +0.42%) [ +0.21% +0.00% +0.42% / +0.42% +1.11% +1.11%] index_add_ linear : Elapsed 0.019 ms (1.903 ms / 100) 1.847 -> 1.853 ( +0.32%) [ +0.00% +0.27% +0.43% / +0.32% +1.25% +0.92%] index_copy_ linear : Elapsed 0.018 ms (1.847 ms / 100) 1.904 -> 1.907 ( +0.16%) [ +0.00% +0.00% +0.11% / +0.16% +1.00% +0.79%] index_add_ reverse : Elapsed 0.019 ms (1.904 ms / 100) 1.853 -> 1.857 ( +0.22%) [ +0.00% +0.00% +0.27% / +0.22% +1.30% +0.70%] index_copy_ reverse : Elapsed 0.019 ms (1.853 ms / 100) 1.903 -> 1.908 ( +0.26%) [ +0.05% +0.00% +0.05% / +0.26% +0.79% +0.68%] index_add_ spread : Elapsed 0.019 ms (1.904 ms / 100) 1.852 -> 1.857 ( +0.27%) [ +0.00% +0.16% +0.22% / +0.27% +1.13% +0.92%] index_copy_ spread : Elapsed 0.019 ms (1.852 ms / 100) 1.902 -> 1.906 ( +0.21%) [ +0.11% +0.00% +0.11% / +0.21% +1.26% +1.05%] index_add_ strided 3 : Elapsed 0.019 ms (1.904 ms / 100) 1.847 -> 1.854 ( +0.38%) [ +0.11% +0.00% +0.05% / +0.38% +1.41% +1.46%] index_copy_ strided 3 : Elapsed 0.018 ms (1.849 ms / 100) 1.903 -> 1.908 ( +0.26%) [ +0.05% +0.00% +0.16% / +0.26% +0.68% +0.58%] index_add_ strided 7 : Elapsed 0.019 ms (1.904 ms / 100) 1.848 -> 1.857 ( +0.49%) [ +0.05% +0.00% +0.38% / +0.49% +0.92% +0.81%] index_copy_ strided 7 : Elapsed 0.018 ms (1.849 ms / 100) 1.896 -> 1.907 ( +0.58%) [ +0.11% +0.00% +0.58% / +0.58% +1.37% +1.42%] index_add_ perm : Elapsed 0.019 ms (1.898 ms / 100) 1.842 -> 1.858 ( +0.87%) [ +0.22% +0.00% +0.92% / +0.87% +2.28% +1.63%] index_copy_ perm : Elapsed 0.018 ms (1.846 ms / 100) 1.903 -> 1.903 ( +0.00%) [ +0.21% +0.00% +0.26% / +0.00% +0.89% +0.79%] index_add_ perm_sorted : Elapsed 0.019 ms (1.907 ms / 100) 1.848 -> 1.854 ( +0.32%) [ +0.00% +0.27% +0.16% / +0.32% +1.03% +1.19%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.848 ms / 100) 8.578 -> 8.570 ( -0.09%) [ +0.13% +0.01% +0.00% / +0.14% -0.09% -0.02%] index_select const : Elapsed 0.086 ms (8.589 ms / 100) 8.574 -> 8.579 ( +0.06%) [ +0.21% +0.00% +0.15% / +0.28% +0.35% +0.06%] index_select wrap : Elapsed 0.086 ms (8.592 ms / 100) 8.571 -> 8.572 ( +0.01%) [ +0.00% +0.19% +0.01% / +0.01% +0.27% +0.12%] index_select linear : Elapsed 0.086 ms (8.571 ms / 100) 8.565 -> 8.573 ( +0.09%) [ +0.04% +0.18% +0.00% / +0.35% +0.22% +0.09%] index_select reverse : Elapsed 0.086 ms (8.568 ms / 100) 8.585 -> 8.569 ( -0.19%) [ +0.17% +0.27% +0.00% / +0.07% -0.19% -0.15%] index_select skip64 : Elapsed 0.086 ms (8.600 ms / 100) 8.562 -> 8.579 ( +0.20%) [ +0.32% +0.00% +0.04% / +0.20% +0.23% +0.30%] index_select skip256 : Elapsed 0.086 ms (8.589 ms / 100) 8.564 -> 8.565 ( +0.01%) [ +0.00% +0.06% +0.42% / +0.30% +0.01% +0.42%] index_select spread : Elapsed 0.086 ms (8.564 ms / 100) 8.587 -> 8.563 ( -0.28%) [ +0.02% +0.01% +0.00% / -0.20% -0.28% +0.03%] index_select strided 3 : Elapsed 0.086 ms (8.589 ms / 100) 8.571 -> 8.575 ( +0.05%) [ +0.18% +0.00% +0.15% / +0.15% +0.07% +0.05%] index_select random : Elapsed 0.086 ms (8.586 ms / 100) 8.575 -> 8.579 ( +0.05%) [ +0.19% +0.01% +0.00% / +0.05% +0.08% +0.17%] index_select random_sorted : Elapsed 0.086 ms (8.591 ms / 100) B = [40, 20, 4, 16] (stride (1, 2560, 640, 40)) A = [40, 5, 4, 16] (stride (4, 2560, 1, 160)) dim = 1 1.775 -> 1.780 ( +0.28%) [ +0.00% +0.06% +0.00% / +0.28% +0.39% +0.56%] index_add_ linear : Elapsed 0.018 ms (1.775 ms / 100) 1.730 -> 1.735 ( +0.29%) [ +0.35% +0.00% +0.35% / +0.29% +0.35% +0.52%] index_copy_ linear : Elapsed 0.017 ms (1.736 ms / 100) 1.755 -> 1.757 ( +0.11%) [ +0.28% +0.00% +0.06% / +0.11% +0.46% +0.23%] index_add_ reverse : Elapsed 0.018 ms (1.760 ms / 100) 1.713 -> 1.714 ( +0.06%) [ +0.00% +0.12% +0.06% / +0.12% +0.35% +0.06%] index_copy_ reverse : Elapsed 0.017 ms (1.713 ms / 100) 1.747 -> 1.745 ( -0.11%) [ +0.11% +0.06% +0.00% / -0.11% +1.26% +1.09%] index_add_ spread : Elapsed 0.017 ms (1.749 ms / 100) 1.699 -> 1.698 ( -0.06%) [ +0.29% +0.24% +0.00% / -0.06% +1.18% +1.35%] index_copy_ spread : Elapsed 0.017 ms (1.704 ms / 100) 1.769 -> 1.771 ( +0.11%) [ +0.45% +0.11% +0.00% / +0.11% +1.36% +1.19%] index_add_ strided 3 : Elapsed 0.018 ms (1.777 ms / 100) 1.728 -> 1.729 ( +0.06%) [ +0.46% +0.00% +0.06% / +0.06% +0.46% +0.75%] index_copy_ strided 3 : Elapsed 0.017 ms (1.736 ms / 100) 1.752 -> 1.754 ( +0.11%) [ +0.29% +0.23% +0.00% / +0.11% +0.68% +0.91%] index_add_ strided 7 : Elapsed 0.018 ms (1.757 ms / 100) 1.704 -> 1.703 ( -0.06%) [ +0.18% +0.23% +0.00% / -0.06% +0.70% +0.76%] index_copy_ strided 7 : Elapsed 0.017 ms (1.707 ms / 100) 1.766 -> 1.764 ( -0.11%) [ +0.00% +0.23% +0.06% / +0.11% -0.11% +0.17%] index_add_ perm : Elapsed 0.018 ms (1.766 ms / 100) 1.723 -> 1.714 ( -0.52%) [ +0.12% +0.12% +0.00% / +0.00% -0.46% -0.52%] index_copy_ perm : Elapsed 0.017 ms (1.725 ms / 100) 1.770 -> 1.771 ( +0.06%) [ +0.00% +0.23% +0.00% / +0.06% +0.40% +0.23%] index_add_ perm_sorted : Elapsed 0.018 ms (1.770 ms / 100) 1.730 -> 1.732 ( +0.12%) [ +0.23% +0.17% +0.00% / +0.12% +0.29% +0.23%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.734 ms / 100) 8.193 -> 8.212 ( +0.23%) [ +0.02% +0.05% +0.00% / +0.23% +0.44% +0.28%] index_select const : Elapsed 0.082 ms (8.195 ms / 100) 8.238 -> 8.235 ( -0.04%) [ +0.11% +0.00% +0.07% / -0.04% -0.01% +0.22%] index_select wrap : Elapsed 0.082 ms (8.247 ms / 100) 8.224 -> 8.234 ( +0.12%) [ +0.00% +0.07% +0.17% / +0.12% +0.33% +0.36%] index_select linear : Elapsed 0.082 ms (8.224 ms / 100) 8.210 -> 8.216 ( +0.07%) [ +0.17% +0.00% +0.04% / +0.11% +0.07% +0.17%] index_select reverse : Elapsed 0.082 ms (8.224 ms / 100) 8.204 -> 8.193 ( -0.13%) [ +0.11% +0.00% +0.00% / -0.13% -0.02% +0.12%] index_select skip64 : Elapsed 0.082 ms (8.213 ms / 100) 8.187 -> 8.200 ( +0.16%) [ +0.01% +0.00% +0.17% / +0.22% +0.20% +0.16%] index_select skip256 : Elapsed 0.082 ms (8.188 ms / 100) 8.228 -> 8.241 ( +0.16%) [ +0.00% +0.09% +0.04% / +0.16% +0.18% +0.17%] index_select spread : Elapsed 0.082 ms (8.228 ms / 100) 8.234 -> 8.241 ( +0.09%) [ +0.00% +0.21% +0.05% / +0.09% +0.28% +0.18%] index_select strided 3 : Elapsed 0.082 ms (8.234 ms / 100) 8.242 -> 8.234 ( -0.10%) [ +0.15% +0.17% +0.00% / -0.10% +0.16% +0.21%] index_select random : Elapsed 0.083 ms (8.254 ms / 100) 8.219 -> 8.215 ( -0.05%) [ +0.06% +0.13% +0.00% / -0.05% +0.19% +0.46%] index_select random_sorted : Elapsed 0.082 ms (8.224 ms / 100) B = [40, 20, 4, 16] (stride (1, 2560, 40, 160)) A = [40, 5, 4, 16] (stride (1, 2560, 640, 40)) dim = 1 1.826 -> 1.828 ( +0.11%) [ +0.38% +0.27% +0.00% / +0.11% +0.49% +0.55%] index_add_ linear : Elapsed 0.018 ms (1.833 ms / 100) 1.776 -> 1.780 ( +0.23%) [ +0.00% +0.23% +0.23% / +0.23% +0.45% +0.45%] index_copy_ linear : Elapsed 0.018 ms (1.776 ms / 100) 1.831 -> 1.831 ( +0.00%) [ +0.00% +0.05% +0.22% / +0.38% +0.05% +0.00%] index_add_ reverse : Elapsed 0.018 ms (1.831 ms / 100) 1.777 -> 1.779 ( +0.11%) [ +0.00% +0.28% +0.17% / +0.11% +0.39% +0.39%] index_copy_ reverse : Elapsed 0.018 ms (1.777 ms / 100) 1.807 -> 1.806 ( -0.06%) [ +0.06% +0.00% +0.11% / -0.06% +1.66% +1.49%] index_add_ spread : Elapsed 0.018 ms (1.808 ms / 100) 1.757 -> 1.758 ( +0.06%) [ +0.00% +0.23% +0.11% / +0.06% +1.37% +1.31%] index_copy_ spread : Elapsed 0.018 ms (1.757 ms / 100) 1.824 -> 1.827 ( +0.16%) [ +0.11% +0.00% +0.00% / +0.16% +1.21% +0.99%] index_add_ strided 3 : Elapsed 0.018 ms (1.826 ms / 100) 1.770 -> 1.772 ( +0.11%) [ +0.23% +0.11% +0.00% / +0.11% +1.53% +1.58%] index_copy_ strided 3 : Elapsed 0.018 ms (1.774 ms / 100) 1.819 -> 1.820 ( +0.05%) [ +0.00% +0.05% +0.16% / +0.05% +0.49% +0.60%] index_add_ strided 7 : Elapsed 0.018 ms (1.819 ms / 100) 1.767 -> 1.775 ( +0.45%) [ +0.17% +0.23% +0.00% / +0.45% +0.68% +0.74%] index_copy_ strided 7 : Elapsed 0.018 ms (1.770 ms / 100) 1.815 -> 1.818 ( +0.17%) [ +0.00% +0.11% +0.17% / +0.17% +0.99% +0.99%] index_add_ perm : Elapsed 0.018 ms (1.815 ms / 100) 1.765 -> 1.766 ( +0.06%) [ +0.28% +0.17% +0.00% / +0.06% +1.02% +1.19%] index_copy_ perm : Elapsed 0.018 ms (1.770 ms / 100) 1.827 -> 1.827 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.00% +0.71% +0.49%] index_add_ perm_sorted : Elapsed 0.018 ms (1.827 ms / 100) 1.773 -> 1.774 ( +0.06%) [ +0.28% +0.23% +0.00% / +0.06% +0.90% +0.79%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.778 ms / 100) 8.206 -> 8.202 ( -0.05%) [ +0.13% +0.00% +0.02% / -0.05% +0.28% +0.22%] index_select const : Elapsed 0.082 ms (8.217 ms / 100) 8.242 -> 8.239 ( -0.04%) [ +0.24% +0.07% +0.00% / -0.02% -0.04% +0.08%] index_select wrap : Elapsed 0.083 ms (8.262 ms / 100) 8.229 -> 8.218 ( -0.13%) [ +0.12% +0.07% +0.00% / -0.13% +0.43% +0.28%] index_select linear : Elapsed 0.082 ms (8.239 ms / 100) 8.208 -> 8.218 ( +0.12%) [ +0.44% +0.32% +0.00% / +0.12% +0.22% +0.29%] index_select reverse : Elapsed 0.082 ms (8.244 ms / 100) 8.197 -> 8.195 ( -0.02%) [ +0.07% +0.22% +0.00% / -0.02% +0.33% +0.12%] index_select skip64 : Elapsed 0.082 ms (8.203 ms / 100) 8.211 -> 8.209 ( -0.02%) [ +0.12% +0.00% +0.10% / -0.02% +0.13% +0.12%] index_select skip256 : Elapsed 0.082 ms (8.221 ms / 100) 8.226 -> 8.242 ( +0.19%) [ +0.00% +0.09% +0.17% / +0.21% +0.34% +0.19%] index_select spread : Elapsed 0.082 ms (8.226 ms / 100) 8.229 -> 8.263 ( +0.41%) [ +0.00% +0.24% +0.36% / +0.41% +0.50% +0.44%] index_select strided 3 : Elapsed 0.082 ms (8.229 ms / 100) 8.235 -> 8.247 ( +0.15%) [ +0.00% +0.06% +0.16% / +0.16% +0.15% +0.24%] index_select random : Elapsed 0.082 ms (8.235 ms / 100) 8.238 -> 8.241 ( +0.04%) [ +0.00% +0.06% +0.05% / +0.04% +0.07% +0.15%] index_select random_sorted : Elapsed 0.082 ms (8.238 ms / 100) B = [40, 20, 4, 16] (stride (16, 640, 12800, 1)) A = [40, 5, 4, 16] (stride (320, 1, 80, 5)) dim = 1 1.704 -> 1.706 ( +0.12%) [ +0.18% +0.00% +0.18% / +0.12% +0.41% +0.23%] index_add_ linear : Elapsed 0.017 ms (1.707 ms / 100) 1.659 -> 1.662 ( +0.18%) [ +0.18% +0.12% +0.00% / +0.18% +0.48% +0.24%] index_copy_ linear : Elapsed 0.017 ms (1.662 ms / 100) 1.702 -> 1.706 ( +0.24%) [ +0.12% +0.24% +0.00% / +0.24% +0.59% +0.35%] index_add_ reverse : Elapsed 0.017 ms (1.704 ms / 100) 1.663 -> 1.664 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.36% +0.36%] index_copy_ reverse : Elapsed 0.017 ms (1.663 ms / 100) 1.709 -> 1.706 ( -0.18%) [ +0.00% +0.12% +0.29% / -0.12% +0.00% -0.18%] index_add_ spread : Elapsed 0.017 ms (1.709 ms / 100) 1.666 -> 1.665 ( -0.06%) [ +0.00% +0.12% +0.24% / +0.12% -0.06% -0.06%] index_copy_ spread : Elapsed 0.017 ms (1.666 ms / 100) 1.699 -> 1.705 ( +0.35%) [ +0.35% +0.29% +0.00% / +0.35% +1.06% +1.18%] index_add_ strided 3 : Elapsed 0.017 ms (1.705 ms / 100) 1.659 -> 1.659 ( +0.00%) [ +0.18% +0.00% +0.06% / +0.00% +1.21% +0.96%] index_copy_ strided 3 : Elapsed 0.017 ms (1.662 ms / 100) 1.720 -> 1.719 ( -0.06%) [ +0.29% +0.17% +0.00% / -0.06% +0.29% +0.12%] index_add_ strided 7 : Elapsed 0.017 ms (1.725 ms / 100) 1.681 -> 1.676 ( -0.30%) [ +0.12% +0.12% +0.00% / -0.30% +0.06% -0.06%] index_copy_ strided 7 : Elapsed 0.017 ms (1.683 ms / 100) 1.703 -> 1.707 ( +0.23%) [ +0.29% +0.41% +0.00% / +0.23% +1.17% +1.12%] index_add_ perm : Elapsed 0.017 ms (1.708 ms / 100) 1.663 -> 1.667 ( +0.24%) [ +0.00% +0.18% +0.06% / +0.24% +1.14% +1.14%] index_copy_ perm : Elapsed 0.017 ms (1.663 ms / 100) 1.706 -> 1.706 ( +0.00%) [ +0.00% +0.12% +0.18% / +0.00% +0.88% +1.00%] index_add_ perm_sorted : Elapsed 0.017 ms (1.706 ms / 100) 1.662 -> 1.666 ( +0.24%) [ +0.00% +0.06% +0.18% / +0.24% +1.32% +1.08%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.662 ms / 100) 8.234 -> 8.220 ( -0.17%) [ +0.00% +0.07% +0.01% / -0.17% +0.17% -0.07%] index_select const : Elapsed 0.082 ms (8.234 ms / 100) 8.222 -> 8.226 ( +0.05%) [ +0.18% +0.30% +0.00% / +0.05% +0.17% +0.41%] index_select wrap : Elapsed 0.082 ms (8.237 ms / 100) 8.227 -> 8.227 ( +0.00%) [ +0.18% +0.00% +0.11% / +0.00% +0.10% +0.26%] index_select linear : Elapsed 0.082 ms (8.242 ms / 100) 8.227 -> 8.235 ( +0.10%) [ +0.10% +0.00% +0.22% / +0.11% +0.10% +0.30%] index_select reverse : Elapsed 0.082 ms (8.235 ms / 100) 8.230 -> 8.212 ( -0.22%) [ +0.22% +0.06% +0.00% / +0.27% -0.22% +0.30%] index_select skip64 : Elapsed 0.082 ms (8.248 ms / 100) 8.232 -> 8.218 ( -0.17%) [ +0.34% +0.01% +0.00% / +0.19% -0.17% +0.10%] index_select skip256 : Elapsed 0.083 ms (8.260 ms / 100) 8.242 -> 8.230 ( -0.15%) [ +0.00% +0.08% +0.11% / -0.06% +0.08% -0.15%] index_select spread : Elapsed 0.082 ms (8.242 ms / 100) 8.235 -> 8.235 ( +0.00%) [ +0.23% +0.00% +0.12% / +0.17% +0.00% +0.21%] index_select strided 3 : Elapsed 0.083 ms (8.254 ms / 100) 8.234 -> 8.225 ( -0.11%) [ +0.04% +0.00% +0.05% / -0.11% +0.24% +0.17%] index_select random : Elapsed 0.082 ms (8.237 ms / 100) 8.236 -> 8.224 ( -0.15%) [ +0.02% +0.01% +0.00% / -0.15% -0.10% +0.16%] index_select random_sorted : Elapsed 0.082 ms (8.238 ms / 100) B = [40, 20, 4, 16] (stride (16, 640, 12800, 1)) A = [40, 5, 4, 16] (stride (16, 2560, 640, 1)) dim = 1 1.835 -> 1.839 ( +0.22%) [ +0.27% +0.27% +0.00% / +0.22% +2.02% +0.87%] index_add_ linear : Elapsed 0.018 ms (1.840 ms / 100) 1.793 -> 1.793 ( +0.00%) [ +0.17% +0.00% +0.11% / +0.00% +0.78% +0.84%] index_copy_ linear : Elapsed 0.018 ms (1.796 ms / 100) 1.837 -> 1.838 ( +0.05%) [ +0.00% +0.16% +0.05% / +0.05% +1.25% +1.03%] index_add_ reverse : Elapsed 0.018 ms (1.837 ms / 100) 1.790 -> 1.792 ( +0.11%) [ +0.00% +0.11% +0.22% / +0.11% +1.23% +1.17%] index_copy_ reverse : Elapsed 0.018 ms (1.790 ms / 100) 1.832 -> 1.841 ( +0.49%) [ +0.16% +0.00% +0.38% / +0.49% +1.04% +1.09%] index_add_ spread : Elapsed 0.018 ms (1.835 ms / 100) 1.786 -> 1.794 ( +0.45%) [ +0.17% +0.00% +0.45% / +0.45% +1.01% +1.29%] index_copy_ spread : Elapsed 0.018 ms (1.789 ms / 100) 1.820 -> 1.827 ( +0.38%) [ +0.00% +0.00% +0.27% / +0.38% +2.64% +1.92%] index_add_ strided 3 : Elapsed 0.018 ms (1.820 ms / 100) 1.772 -> 1.779 ( +0.40%) [ +0.17% +0.00% +0.56% / +0.40% +2.20% +2.43%] index_copy_ strided 3 : Elapsed 0.018 ms (1.775 ms / 100) 1.844 -> 1.847 ( +0.16%) [ +0.00% +0.11% +0.43% / +0.16% +1.14% +1.08%] index_add_ strided 7 : Elapsed 0.018 ms (1.844 ms / 100) 1.797 -> 1.800 ( +0.17%) [ +0.33% +0.00% +0.39% / +0.17% +1.28% +1.34%] index_copy_ strided 7 : Elapsed 0.018 ms (1.803 ms / 100) 1.837 -> 1.839 ( +0.11%) [ +0.22% +0.00% +0.16% / +0.11% +0.93% +0.87%] index_add_ perm : Elapsed 0.018 ms (1.841 ms / 100) 1.788 -> 1.790 ( +0.11%) [ +0.00% +0.17% +0.34% / +0.11% +1.12% +1.29%] index_copy_ perm : Elapsed 0.018 ms (1.788 ms / 100) 1.831 -> 1.837 ( +0.33%) [ +0.16% +0.00% +0.16% / +0.33% +1.58% +1.69%] index_add_ perm_sorted : Elapsed 0.018 ms (1.834 ms / 100) 1.786 -> 1.790 ( +0.22%) [ +0.00% +0.11% +0.17% / +0.22% +1.57% +1.79%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.786 ms / 100) 8.525 -> 8.513 ( -0.14%) [ +0.00% +0.21% +0.12% / -0.14% +0.07% +0.06%] index_select const : Elapsed 0.085 ms (8.525 ms / 100) 8.599 -> 8.574 ( -0.29%) [ +0.00% +0.06% +0.02% / -0.10% +0.08% -0.29%] index_select wrap : Elapsed 0.086 ms (8.599 ms / 100) 8.567 -> 8.566 ( -0.01%) [ +0.07% +0.00% +0.01% / +0.16% +0.09% -0.01%] index_select linear : Elapsed 0.086 ms (8.573 ms / 100) 8.575 -> 8.562 ( -0.15%) [ +0.06% +0.02% +0.00% / -0.15% +0.19% +0.07%] index_select reverse : Elapsed 0.086 ms (8.580 ms / 100) 8.533 -> 8.530 ( -0.04%) [ +0.23% +0.00% +0.07% / -0.04% +0.27% +0.01%] index_select skip64 : Elapsed 0.086 ms (8.553 ms / 100) 8.520 -> 8.536 ( +0.19%) [ +0.02% +0.00% +0.20% / +0.49% +0.33% +0.19%] index_select skip256 : Elapsed 0.085 ms (8.522 ms / 100) 8.554 -> 8.544 ( -0.12%) [ +0.00% +0.12% +0.21% / +0.06% -0.12% -0.12%] index_select spread : Elapsed 0.086 ms (8.554 ms / 100) 8.576 -> 8.576 ( +0.00%) [ +0.07% +0.23% +0.00% / +0.19% +0.00% +0.01%] index_select strided 3 : Elapsed 0.086 ms (8.582 ms / 100) 8.581 -> 8.580 ( -0.01%) [ +0.06% +0.00% +0.06% / -0.01% +0.00% +0.13%] index_select random : Elapsed 0.086 ms (8.586 ms / 100) 8.558 -> 8.562 ( +0.05%) [ +0.20% +0.00% +0.00% / +0.05% +0.05% +0.13%] index_select random_sorted : Elapsed 0.086 ms (8.575 ms / 100) B = [40, 20, 4, 16] (stride (80, 4, 1, 3200)) A = [40, 5, 4, 16] (stride (320, 1, 5, 20)) dim = 1 1.841 -> 1.838 ( -0.16%) [ +0.00% +0.22% +0.00% / -0.16% +0.76% +0.81%] index_add_ linear : Elapsed 0.018 ms (1.841 ms / 100) 1.800 -> 1.801 ( +0.06%) [ +0.17% +0.17% +0.00% / +0.06% +0.78% +0.72%] index_copy_ linear : Elapsed 0.018 ms (1.803 ms / 100) 1.840 -> 1.842 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.54% +0.65%] index_add_ reverse : Elapsed 0.018 ms (1.840 ms / 100) 1.800 -> 1.802 ( +0.11%) [ +0.11% +0.17% +0.00% / +0.11% +0.67% +0.67%] index_copy_ reverse : Elapsed 0.018 ms (1.802 ms / 100) 1.875 -> 1.879 ( +0.21%) [ +0.00% +0.27% +0.05% / +0.21% +0.43% +0.37%] index_add_ spread : Elapsed 0.019 ms (1.875 ms / 100) 1.837 -> 1.842 ( +0.27%) [ +0.00% +0.05% +0.00% / +0.27% +0.33% +0.38%] index_copy_ spread : Elapsed 0.018 ms (1.837 ms / 100) 1.874 -> 1.877 ( +0.16%) [ +0.16% +0.32% +0.00% / +0.21% +0.48% +0.16%] index_add_ strided 3 : Elapsed 0.019 ms (1.877 ms / 100) 1.835 -> 1.838 ( +0.16%) [ +0.22% +0.11% +0.00% / +0.16% +0.33% +0.33%] index_copy_ strided 3 : Elapsed 0.018 ms (1.839 ms / 100) 1.862 -> 1.862 ( +0.00%) [ +0.11% +0.05% +0.00% / +0.00% +0.00% +0.05%] index_add_ strided 7 : Elapsed 0.019 ms (1.864 ms / 100) 1.822 -> 1.823 ( +0.05%) [ +0.05% +0.00% +0.27% / +0.05% +0.38% +0.27%] index_copy_ strided 7 : Elapsed 0.018 ms (1.823 ms / 100) 1.863 -> 1.862 ( -0.05%) [ +0.21% +0.16% +0.00% / -0.05% +0.21% +0.00%] index_add_ perm : Elapsed 0.019 ms (1.867 ms / 100) 1.827 -> 1.826 ( -0.05%) [ +0.22% +0.00% +0.05% / +0.11% +0.11% -0.05%] index_copy_ perm : Elapsed 0.018 ms (1.831 ms / 100) 1.864 -> 1.864 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.11% +0.05% +0.00%] index_add_ perm_sorted : Elapsed 0.019 ms (1.867 ms / 100) 1.827 -> 1.822 ( -0.27%) [ +0.00% +0.05% +0.00% / -0.27% +0.05% +0.05%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.827 ms / 100) 8.592 -> 8.589 ( -0.03%) [ +0.22% +0.00% +0.24% / -0.03% +0.01% +0.06%] index_select const : Elapsed 0.086 ms (8.611 ms / 100) 8.601 -> 8.594 ( -0.08%) [ +0.10% +0.02% +0.00% / -0.08% +0.03% +0.13%] index_select wrap : Elapsed 0.086 ms (8.610 ms / 100) 8.598 -> 8.593 ( -0.06%) [ +0.20% +0.00% +0.05% / -0.06% +0.19% +0.20%] index_select linear : Elapsed 0.086 ms (8.615 ms / 100) 8.590 -> 8.596 ( +0.07%) [ +0.07% +0.00% +0.10% / +0.07% +0.07% +0.27%] index_select reverse : Elapsed 0.086 ms (8.596 ms / 100) 8.585 -> 8.598 ( +0.15%) [ +0.20% +0.00% +0.20% / +0.15% +0.26% +0.30%] index_select skip64 : Elapsed 0.086 ms (8.602 ms / 100) 8.602 -> 8.595 ( -0.08%) [ +0.00% +0.01% +0.07% / -0.06% -0.08% +0.09%] index_select skip256 : Elapsed 0.086 ms (8.602 ms / 100) 8.585 -> 8.593 ( +0.09%) [ +0.00% +0.20% +0.10% / +0.30% +0.09% +0.19%] index_select spread : Elapsed 0.086 ms (8.585 ms / 100) 8.580 -> 8.600 ( +0.23%) [ +0.09% +0.00% +0.07% / +0.37% +0.24% +0.23%] index_select strided 3 : Elapsed 0.086 ms (8.588 ms / 100) 8.590 -> 8.595 ( +0.06%) [ +0.00% +0.35% +0.14% / +0.06% +0.20% +0.29%] index_select random : Elapsed 0.086 ms (8.590 ms / 100) 8.588 -> 8.603 ( +0.17%) [ +0.00% +0.13% +0.22% / +0.17% +0.24% +0.22%] index_select random_sorted : Elapsed 0.086 ms (8.588 ms / 100) B = [40, 20, 4, 16] (stride (80, 4, 1, 3200)) A = [40, 5, 4, 16] (stride (20, 1, 5, 800)) dim = 1 1.941 -> 1.948 ( +0.36%) [ +0.05% +0.00% +0.21% / +0.36% +0.93% +0.72%] index_add_ linear : Elapsed 0.019 ms (1.942 ms / 100) 1.897 -> 1.900 ( +0.16%) [ +0.05% +0.00% +0.37% / +0.16% +0.53% +0.63%] index_copy_ linear : Elapsed 0.019 ms (1.898 ms / 100) 1.941 -> 1.946 ( +0.26%) [ +0.00% +0.00% +0.21% / +0.26% +0.82% +0.72%] index_add_ reverse : Elapsed 0.019 ms (1.941 ms / 100) 1.899 -> 1.902 ( +0.16%) [ +0.26% +0.00% +0.00% / +0.16% +0.68% +0.63%] index_copy_ reverse : Elapsed 0.019 ms (1.904 ms / 100) 1.967 -> 1.972 ( +0.25%) [ +0.00% +0.25% +0.20% / +0.25% +0.71% +0.71%] index_add_ spread : Elapsed 0.020 ms (1.967 ms / 100) 1.931 -> 1.937 ( +0.31%) [ +0.05% +0.00% +0.16% / +0.31% +0.62% +0.47%] index_copy_ spread : Elapsed 0.019 ms (1.932 ms / 100) 1.968 -> 1.974 ( +0.30%) [ +0.15% +0.15% +0.00% / +0.30% +0.71% +0.46%] index_add_ strided 3 : Elapsed 0.020 ms (1.971 ms / 100) 1.927 -> 1.935 ( +0.42%) [ +0.16% +0.00% +0.36% / +0.42% +0.73% +0.62%] index_copy_ strided 3 : Elapsed 0.019 ms (1.930 ms / 100) 1.958 -> 1.958 ( +0.00%) [ +0.15% +0.00% +0.20% / +0.00% +0.15% +0.36%] index_add_ strided 7 : Elapsed 0.020 ms (1.961 ms / 100) 1.914 -> 1.921 ( +0.37%) [ +0.10% +0.00% +0.26% / +0.37% +0.47% +0.52%] index_copy_ strided 7 : Elapsed 0.019 ms (1.916 ms / 100) 1.961 -> 1.967 ( +0.31%) [ +0.20% +0.00% +0.41% / +0.31% +0.46% +0.71%] index_add_ perm : Elapsed 0.020 ms (1.965 ms / 100) 1.923 -> 1.928 ( +0.26%) [ +0.00% +0.00% +0.10% / +0.26% +0.31% +0.57%] index_copy_ perm : Elapsed 0.019 ms (1.923 ms / 100) 1.957 -> 1.964 ( +0.36%) [ +0.00% +0.31% +0.05% / +0.36% +0.82% +0.56%] index_add_ perm_sorted : Elapsed 0.020 ms (1.957 ms / 100) 1.915 -> 1.920 ( +0.26%) [ +0.00% +0.00% +0.10% / +0.26% +0.52% +0.37%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.915 ms / 100) 8.598 -> 8.612 ( +0.16%) [ +0.26% +0.28% +0.00% / +0.16% +0.33% +0.40%] index_select const : Elapsed 0.086 ms (8.620 ms / 100) 8.614 -> 8.609 ( -0.06%) [ +0.00% +0.14% +0.09% / -0.05% +0.16% -0.06%] index_select wrap : Elapsed 0.086 ms (8.614 ms / 100) 8.624 -> 8.598 ( -0.30%) [ +0.05% +0.00% +0.00% / -0.30% -0.13% +0.08%] index_select linear : Elapsed 0.086 ms (8.628 ms / 100) 8.607 -> 8.615 ( +0.09%) [ +0.00% +0.00% +0.34% / +0.20% +0.09% +0.13%] index_select reverse : Elapsed 0.086 ms (8.607 ms / 100) 8.601 -> 8.611 ( +0.12%) [ +0.00% +0.37% +0.06% / +0.12% +0.28% +0.15%] index_select skip64 : Elapsed 0.086 ms (8.601 ms / 100) 8.609 -> 8.610 ( +0.01%) [ +0.00% +0.05% +0.20% / +0.26% +0.19% +0.01%] index_select skip256 : Elapsed 0.086 ms (8.609 ms / 100) 8.613 -> 8.618 ( +0.06%) [ +0.15% +0.00% +0.26% / +0.06% +0.19% +0.15%] index_select spread : Elapsed 0.086 ms (8.626 ms / 100) 8.612 -> 8.621 ( +0.10%) [ +0.29% +0.00% +0.06% / +0.23% +0.23% +0.10%] index_select strided 3 : Elapsed 0.086 ms (8.637 ms / 100) 8.603 -> 8.607 ( +0.05%) [ +0.16% +0.00% +0.40% / +0.08% +0.05% +0.31%] index_select random : Elapsed 0.086 ms (8.617 ms / 100) 8.612 -> 8.607 ( -0.06%) [ +0.00% +0.16% +0.16% / -0.02% -0.06% +0.12%] index_select random_sorted : Elapsed 0.086 ms (8.612 ms / 100) B = [40, 20, 4, 16] (stride (4, 160, 1, 3200)) A = [40, 5, 4, 16] (stride (4, 2560, 1, 160)) dim = 1 1.876 -> 1.876 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +1.23% +1.07%] index_add_ linear : Elapsed 0.019 ms (1.876 ms / 100) 1.824 -> 1.825 ( +0.05%) [ +0.00% +0.05% +0.27% / +0.05% +1.32% +1.37%] index_copy_ linear : Elapsed 0.018 ms (1.824 ms / 100) 1.855 -> 1.856 ( +0.05%) [ +0.16% +0.00% +0.16% / +0.05% +1.78% +1.83%] index_add_ reverse : Elapsed 0.019 ms (1.858 ms / 100) 1.809 -> 1.817 ( +0.44%) [ +0.17% +0.11% +0.00% / +0.44% +1.77% +1.55%] index_copy_ reverse : Elapsed 0.018 ms (1.812 ms / 100) 1.862 -> 1.860 ( -0.11%) [ +0.05% +0.11% +0.00% / -0.11% +0.91% +0.97%] index_add_ spread : Elapsed 0.019 ms (1.863 ms / 100) 1.818 -> 1.818 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.83% +0.88%] index_copy_ spread : Elapsed 0.018 ms (1.818 ms / 100) 1.873 -> 1.874 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.85% +0.85%] index_add_ strided 3 : Elapsed 0.019 ms (1.873 ms / 100) 1.826 -> 1.827 ( +0.05%) [ +0.05% +0.00% +0.11% / +0.05% +0.71% +0.71%] index_copy_ strided 3 : Elapsed 0.018 ms (1.827 ms / 100) 1.862 -> 1.861 ( -0.05%) [ +0.00% +0.11% +0.43% / -0.05% +0.05% +0.11%] index_add_ strided 7 : Elapsed 0.019 ms (1.862 ms / 100) 1.817 -> 1.818 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.06% +0.11%] index_copy_ strided 7 : Elapsed 0.018 ms (1.817 ms / 100) 1.855 -> 1.856 ( +0.05%) [ +0.22% +0.00% +0.43% / +0.05% +0.92% +1.08%] index_add_ perm : Elapsed 0.019 ms (1.859 ms / 100) 1.806 -> 1.810 ( +0.22%) [ +0.06% +0.00% +0.33% / +0.22% +1.27% +1.22%] index_copy_ perm : Elapsed 0.018 ms (1.807 ms / 100) 1.870 -> 1.872 ( +0.11%) [ +0.00% +0.11% +0.05% / +0.11% +1.07% +1.02%] index_add_ perm_sorted : Elapsed 0.019 ms (1.870 ms / 100) 1.826 -> 1.826 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.88% +0.99%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.826 ms / 100) 8.529 -> 8.543 ( +0.16%) [ +0.00% +0.08% +0.27% / +0.16% +0.40% +0.21%] index_select const : Elapsed 0.085 ms (8.529 ms / 100) 8.576 -> 8.576 ( +0.00%) [ +0.06% +0.05% +0.00% / +0.00% +0.23% +0.24%] index_select wrap : Elapsed 0.086 ms (8.581 ms / 100) 8.565 -> 8.569 ( +0.05%) [ +0.09% +0.00% +0.37% / +0.05% +0.27% +0.25%] index_select linear : Elapsed 0.086 ms (8.573 ms / 100) 8.551 -> 8.564 ( +0.15%) [ +0.00% +0.15% +0.28% / +0.16% +0.15% +0.23%] index_select reverse : Elapsed 0.086 ms (8.551 ms / 100) 8.543 -> 8.549 ( +0.07%) [ +0.02% +0.00% +0.04% / +0.13% +0.07% +0.35%] index_select skip64 : Elapsed 0.085 ms (8.545 ms / 100) 8.528 -> 8.539 ( +0.13%) [ +0.21% +0.00% +0.23% / +0.13% +0.27% +0.14%] index_select skip256 : Elapsed 0.085 ms (8.546 ms / 100) 8.569 -> 8.581 ( +0.14%) [ +0.15% +0.00% +0.18% / +0.14% +0.23% +0.61%] index_select spread : Elapsed 0.086 ms (8.582 ms / 100) 8.568 -> 8.572 ( +0.05%) [ +0.13% +0.00% +0.15% / +0.05% +0.58% +0.22%] index_select strided 3 : Elapsed 0.086 ms (8.579 ms / 100) 8.574 -> 8.607 ( +0.38%) [ +0.00% +0.12% +0.28% / +0.48% +0.38% +0.48%] index_select random : Elapsed 0.086 ms (8.574 ms / 100) 8.557 -> 8.575 ( +0.21%) [ +0.00% +0.23% +0.28% / +0.21% +0.41% +0.46%] index_select random_sorted : Elapsed 0.086 ms (8.557 ms / 100) out_shape = [40, 5, 20, 16] in_shape = [40, 5, 4, 16] idx_dim = 2 B = [40, 5, 20, 16] (stride (1600, 1, 80, 5)) A = [40, 5, 4, 16] (stride (1, 40, 200, 800)) dim = 2 2.243 -> 2.246 ( +0.13%) [ +0.00% +0.27% +0.13% / +0.13% +0.53% +0.40%] index_add_ linear : Elapsed 0.022 ms (2.243 ms / 100) 2.176 -> 2.182 ( +0.28%) [ +0.32% +0.28% +0.00% / +0.28% +0.46% +0.51%] index_copy_ linear : Elapsed 0.022 ms (2.183 ms / 100) 2.250 -> 2.254 ( +0.18%) [ +0.04% +0.04% +0.00% / +0.18% +0.27% +0.36%] index_add_ reverse : Elapsed 0.023 ms (2.251 ms / 100) 2.178 -> 2.187 ( +0.41%) [ +0.18% +0.00% +0.41% / +0.41% +0.46% +0.64%] index_copy_ reverse : Elapsed 0.022 ms (2.182 ms / 100) 2.247 -> 2.249 ( +0.09%) [ +0.22% +0.18% +0.00% / +0.09% +0.40% +0.58%] index_add_ spread : Elapsed 0.023 ms (2.252 ms / 100) 2.179 -> 2.183 ( +0.18%) [ +0.00% +0.14% +0.05% / +0.18% +0.41% +0.46%] index_copy_ spread : Elapsed 0.022 ms (2.179 ms / 100) 2.246 -> 2.247 ( +0.04%) [ +0.04% +0.00% +0.09% / +0.04% +0.36% +0.13%] index_add_ strided 3 : Elapsed 0.022 ms (2.247 ms / 100) 2.180 -> 2.178 ( -0.09%) [ +0.09% +0.05% +0.00% / -0.09% +0.14% +0.14%] index_copy_ strided 3 : Elapsed 0.022 ms (2.182 ms / 100) 2.251 -> 2.251 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.00% +0.13% +0.71%] index_add_ strided 7 : Elapsed 0.023 ms (2.254 ms / 100) 2.182 -> 2.184 ( +0.09%) [ +0.09% +0.05% +0.00% / +0.09% +0.37% +0.37%] index_copy_ strided 7 : Elapsed 0.022 ms (2.184 ms / 100) 2.250 -> 2.249 ( -0.04%) [ +0.04% +0.04% +0.00% / -0.04% +0.04% +0.27%] index_add_ perm : Elapsed 0.023 ms (2.251 ms / 100) 2.181 -> 2.183 ( +0.09%) [ +0.00% +0.05% +0.05% / +0.09% +0.28% +0.18%] index_copy_ perm : Elapsed 0.022 ms (2.181 ms / 100) 2.249 -> 2.250 ( +0.04%) [ +0.18% +0.13% +0.00% / +0.04% +0.31% +0.31%] index_add_ perm_sorted : Elapsed 0.023 ms (2.253 ms / 100) 2.179 -> 2.184 ( +0.23%) [ +0.18% +0.09% +0.00% / +0.23% +0.28% +0.50%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.183 ms / 100) 9.162 -> 9.152 ( -0.11%) [ +0.04% +0.17% +0.00% / -0.11% +0.04% +0.31%] index_select const : Elapsed 0.092 ms (9.166 ms / 100) 9.201 -> 9.202 ( +0.01%) [ +0.17% +0.18% +0.00% / +0.01% +0.34% +0.03%] index_select wrap : Elapsed 0.092 ms (9.217 ms / 100) 9.166 -> 9.179 ( +0.14%) [ +0.00% +0.36% +0.13% / +0.14% +0.58% +0.32%] index_select linear : Elapsed 0.092 ms (9.166 ms / 100) 9.185 -> 9.192 ( +0.08%) [ +0.22% +0.00% +0.13% / +0.08% +0.22% +0.32%] index_select reverse : Elapsed 0.092 ms (9.205 ms / 100) 9.156 -> 9.161 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.44% +0.20%] index_select skip64 : Elapsed 0.092 ms (9.158 ms / 100) 9.154 -> 9.163 ( +0.10%) [ +0.00% +0.03% +0.17% / +0.19% +0.27% +0.10%] index_select skip256 : Elapsed 0.092 ms (9.154 ms / 100) 9.196 -> 9.196 ( +0.00%) [ +0.00% +0.40% +0.12% / +0.00% +0.38% +0.33%] index_select spread : Elapsed 0.092 ms (9.196 ms / 100) 9.193 -> 9.206 ( +0.14%) [ +0.00% +0.17% +0.21% / +0.15% +0.20% +0.14%] index_select strided 3 : Elapsed 0.092 ms (9.193 ms / 100) 9.210 -> 9.202 ( -0.09%) [ +0.00% +0.01% +0.04% / -0.09% +0.03% -0.05%] index_select random : Elapsed 0.092 ms (9.210 ms / 100) 9.204 -> 9.211 ( +0.08%) [ +0.00% +0.15% +0.22% / +0.08% +0.34% +0.23%] index_select random_sorted : Elapsed 0.092 ms (9.204 ms / 100) B = [40, 5, 20, 16] (stride (320, 12800, 1, 20)) A = [40, 5, 4, 16] (stride (80, 1, 3200, 5)) dim = 2 2.074 -> 2.075 ( +0.05%) [ +0.14% +0.24% +0.00% / +0.05% +0.48% +0.43%] index_add_ linear : Elapsed 0.021 ms (2.077 ms / 100) 2.040 -> 2.038 ( -0.10%) [ +0.00% +0.25% +0.05% / -0.10% +0.20% +0.39%] index_copy_ linear : Elapsed 0.020 ms (2.040 ms / 100) 2.077 -> 2.078 ( +0.05%) [ +0.00% +0.10% +0.10% / +0.05% +0.29% +0.05%] index_add_ reverse : Elapsed 0.021 ms (2.077 ms / 100) 2.038 -> 2.041 ( +0.15%) [ +0.00% +0.20% +0.15% / +0.15% +0.64% +0.39%] index_copy_ reverse : Elapsed 0.020 ms (2.038 ms / 100) 2.108 -> 2.107 ( -0.05%) [ +0.00% +0.09% +0.28% / -0.05% +0.43% +0.47%] index_add_ spread : Elapsed 0.021 ms (2.108 ms / 100) 2.130 -> 2.126 ( -0.19%) [ +0.00% +0.23% +0.52% / -0.19% +0.42% +0.56%] index_copy_ spread : Elapsed 0.021 ms (2.130 ms / 100) 2.116 -> 2.111 ( -0.24%) [ +0.00% +0.05% +0.24% / -0.24% +0.19% +0.47%] index_add_ strided 3 : Elapsed 0.021 ms (2.116 ms / 100) 2.111 -> 2.112 ( +0.05%) [ +0.00% +0.19% +0.66% / +0.05% +0.71% +0.62%] index_copy_ strided 3 : Elapsed 0.021 ms (2.111 ms / 100) 2.112 -> 2.117 ( +0.24%) [ +0.14% +0.00% +0.14% / +0.24% +0.71% +0.80%] index_add_ strided 7 : Elapsed 0.021 ms (2.115 ms / 100) 2.133 -> 2.139 ( +0.28%) [ +0.23% +0.00% +0.23% / +0.28% +0.84% +0.75%] index_copy_ strided 7 : Elapsed 0.021 ms (2.138 ms / 100) 2.108 -> 2.111 ( +0.14%) [ +0.19% +0.24% +0.00% / +0.14% +0.47% +0.43%] index_add_ perm : Elapsed 0.021 ms (2.112 ms / 100) 2.136 -> 2.133 ( -0.14%) [ +0.00% +0.09% +0.14% / -0.14% +0.28% +0.23%] index_copy_ perm : Elapsed 0.021 ms (2.136 ms / 100) 2.119 -> 2.118 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.05% +0.09%] index_add_ perm_sorted : Elapsed 0.021 ms (2.120 ms / 100) 2.141 -> 2.142 ( +0.05%) [ +0.09% +0.00% +0.70% / +0.05% +0.14% +0.33%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.143 ms / 100) 9.244 -> 9.236 ( -0.09%) [ +0.03% +0.00% +0.28% / +0.05% +0.10% -0.09%] index_select const : Elapsed 0.092 ms (9.247 ms / 100) 9.293 -> 9.309 ( +0.17%) [ +0.22% +0.00% +0.08% / +0.17% +0.30% +0.54%] index_select wrap : Elapsed 0.093 ms (9.313 ms / 100) 9.282 -> 9.267 ( -0.16%) [ +0.13% +0.00% +0.30% / -0.16% +0.01% +0.08%] index_select linear : Elapsed 0.093 ms (9.294 ms / 100) 9.271 -> 9.264 ( -0.08%) [ +0.00% +0.05% +0.11% / -0.08% +0.03% +0.09%] index_select reverse : Elapsed 0.093 ms (9.271 ms / 100) 9.257 -> 9.244 ( -0.14%) [ +0.14% +0.00% +0.10% / -0.14% +0.16% -0.01%] index_select skip64 : Elapsed 0.093 ms (9.270 ms / 100) 9.247 -> 9.247 ( +0.00%) [ +0.29% +0.00% +0.49% / +0.19% +0.03% +0.00%] index_select skip256 : Elapsed 0.093 ms (9.274 ms / 100) 9.282 -> 9.290 ( +0.09%) [ +0.05% +0.00% +0.29% / +0.09% +0.26% +0.11%] index_select spread : Elapsed 0.093 ms (9.287 ms / 100) 9.312 -> 9.314 ( +0.02%) [ +0.16% +0.00% +0.19% / +0.02% +0.03% +0.13%] index_select strided 3 : Elapsed 0.093 ms (9.327 ms / 100) 9.298 -> 9.299 ( +0.01%) [ +0.00% +0.08% +0.09% / +0.01% +0.27% +0.24%] index_select random : Elapsed 0.093 ms (9.298 ms / 100) 9.299 -> 9.290 ( -0.10%) [ +0.11% +0.01% +0.00% / -0.10% -0.03% +0.20%] index_select random_sorted : Elapsed 0.093 ms (9.309 ms / 100) B = [40, 5, 20, 16] (stride (20, 12800, 1, 800)) A = [40, 5, 4, 16] (stride (320, 1, 5, 20)) dim = 2 2.178 -> 2.183 ( +0.23%) [ +0.23% +0.41% +0.00% / +0.23% +0.73% +0.41%] index_add_ linear : Elapsed 0.022 ms (2.183 ms / 100) 2.143 -> 2.146 ( +0.14%) [ +0.05% +0.00% +0.28% / +0.14% +0.42% +0.51%] index_copy_ linear : Elapsed 0.021 ms (2.144 ms / 100) 2.169 -> 2.171 ( +0.09%) [ +0.37% +0.65% +0.00% / +0.09% +1.15% +1.06%] index_add_ reverse : Elapsed 0.022 ms (2.177 ms / 100) 2.134 -> 2.136 ( +0.09%) [ +0.14% +0.47% +0.00% / +0.09% +0.66% +0.75%] index_copy_ reverse : Elapsed 0.021 ms (2.137 ms / 100) 2.208 -> 2.209 ( +0.05%) [ +0.00% +0.18% +0.32% / +0.05% +1.18% +1.45%] index_add_ spread : Elapsed 0.022 ms (2.208 ms / 100) 2.226 -> 2.236 ( +0.45%) [ +0.00% +0.45% +0.63% / +0.45% +1.48% +1.48%] index_copy_ spread : Elapsed 0.022 ms (2.226 ms / 100) 2.209 -> 2.213 ( +0.18%) [ +0.14% +0.14% +0.00% / +0.18% +0.45% +0.59%] index_add_ strided 3 : Elapsed 0.022 ms (2.212 ms / 100) 2.211 -> 2.216 ( +0.23%) [ +0.00% +0.18% +0.14% / +0.23% +0.68% +0.36%] index_copy_ strided 3 : Elapsed 0.022 ms (2.211 ms / 100) 2.218 -> 2.222 ( +0.18%) [ +0.27% +0.18% +0.00% / +0.18% +0.72% +0.68%] index_add_ strided 7 : Elapsed 0.022 ms (2.224 ms / 100) 2.240 -> 2.242 ( +0.09%) [ +0.31% +0.00% +0.18% / +0.09% +0.94% +0.85%] index_copy_ strided 7 : Elapsed 0.022 ms (2.247 ms / 100) 2.208 -> 2.205 ( -0.14%) [ +0.00% +0.32% +0.41% / -0.14% +0.59% +0.54%] index_add_ perm : Elapsed 0.022 ms (2.208 ms / 100) 2.210 -> 2.215 ( +0.23%) [ +0.00% +0.18% +0.45% / +0.27% +0.50% +0.23%] index_copy_ perm : Elapsed 0.022 ms (2.210 ms / 100) 2.211 -> 2.215 ( +0.18%) [ +0.00% +0.00% +0.09% / +0.18% +0.54% +0.54%] index_add_ perm_sorted : Elapsed 0.022 ms (2.211 ms / 100) 2.211 -> 2.208 ( -0.14%) [ +0.00% +0.18% +0.09% / -0.14% +0.54% +0.36%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.211 ms / 100) 9.280 -> 9.283 ( +0.03%) [ +0.00% +0.39% +0.11% / +0.19% +0.03% +0.04%] index_select const : Elapsed 0.093 ms (9.280 ms / 100) 9.290 -> 9.288 ( -0.02%) [ +0.13% +0.00% +0.12% / -0.02% +0.10% +0.11%] index_select wrap : Elapsed 0.093 ms (9.302 ms / 100) 9.284 -> 9.300 ( +0.17%) [ +0.39% +0.01% +0.00% / +0.20% +0.17% +0.29%] index_select linear : Elapsed 0.093 ms (9.320 ms / 100) 9.282 -> 9.292 ( +0.11%) [ +0.16% +0.06% +0.00% / +0.11% +0.34% +0.29%] index_select reverse : Elapsed 0.093 ms (9.297 ms / 100) 9.284 -> 9.287 ( +0.03%) [ +0.17% +0.00% +0.06% / +0.03% +0.17% +0.03%] index_select skip64 : Elapsed 0.093 ms (9.300 ms / 100) 9.286 -> 9.290 ( +0.04%) [ +0.32% +0.00% +0.18% / +0.04% +0.26% +0.17%] index_select skip256 : Elapsed 0.093 ms (9.316 ms / 100) 9.305 -> 9.305 ( +0.00%) [ +0.21% +0.00% +0.03% / +0.02% +0.12% +0.00%] index_select spread : Elapsed 0.093 ms (9.325 ms / 100) 9.304 -> 9.287 ( -0.18%) [ +0.26% +0.02% +0.00% / -0.18% -0.02% -0.17%] index_select strided 3 : Elapsed 0.093 ms (9.328 ms / 100) 9.286 -> 9.289 ( +0.03%) [ +0.08% +0.09% +0.00% / +0.25% +0.13% +0.03%] index_select random : Elapsed 0.093 ms (9.293 ms / 100) 9.303 -> 9.311 ( +0.09%) [ +0.06% +0.06% +0.00% / +0.09% +0.17% +0.09%] index_select random_sorted : Elapsed 0.093 ms (9.309 ms / 100) B = [40, 5, 20, 16] (stride (1, 12800, 40, 800)) A = [40, 5, 4, 16] (stride (320, 1, 80, 5)) dim = 2 2.068 -> 2.063 ( -0.24%) [ +0.19% +0.19% +0.00% / -0.24% +0.53% +0.48%] index_add_ linear : Elapsed 0.021 ms (2.072 ms / 100) 2.032 -> 2.034 ( +0.10%) [ +0.00% +0.05% +0.00% / +0.10% +0.89% +0.69%] index_copy_ linear : Elapsed 0.020 ms (2.032 ms / 100) 2.068 -> 2.068 ( +0.00%) [ +0.29% +0.00% +0.05% / +0.00% +0.44% +0.39%] index_add_ reverse : Elapsed 0.021 ms (2.074 ms / 100) 2.032 -> 2.033 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.89% +0.74%] index_copy_ reverse : Elapsed 0.020 ms (2.033 ms / 100) 2.057 -> 2.059 ( +0.10%) [ +0.10% +0.00% +0.05% / +0.10% +0.78% +0.53%] index_add_ spread : Elapsed 0.021 ms (2.059 ms / 100) 2.018 -> 2.017 ( -0.05%) [ +0.00% +0.15% +0.35% / -0.05% +0.84% +0.99%] index_copy_ spread : Elapsed 0.020 ms (2.018 ms / 100) 2.066 -> 2.068 ( +0.10%) [ +0.15% +0.00% +0.15% / +0.10% +0.58% +0.73%] index_add_ strided 3 : Elapsed 0.021 ms (2.069 ms / 100) 2.029 -> 2.030 ( +0.05%) [ +0.00% +0.15% +0.15% / +0.05% +1.03% +0.94%] index_copy_ strided 3 : Elapsed 0.020 ms (2.029 ms / 100) 2.074 -> 2.072 ( -0.10%) [ +0.10% +0.10% +0.00% / -0.10% +0.63% +0.68%] index_add_ strided 7 : Elapsed 0.021 ms (2.076 ms / 100) 2.037 -> 2.042 ( +0.25%) [ +0.15% +0.29% +0.00% / +0.25% +0.98% +0.83%] index_copy_ strided 7 : Elapsed 0.020 ms (2.040 ms / 100) 2.063 -> 2.066 ( +0.15%) [ +0.24% +0.05% +0.00% / +0.15% +0.63% +0.73%] index_add_ perm : Elapsed 0.021 ms (2.068 ms / 100) 2.028 -> 2.030 ( +0.10%) [ +0.00% +0.00% +0.05% / +0.10% +0.54% +0.79%] index_copy_ perm : Elapsed 0.020 ms (2.028 ms / 100) 2.068 -> 2.067 ( -0.05%) [ +0.10% +0.10% +0.00% / -0.05% +0.39% +0.63%] index_add_ perm_sorted : Elapsed 0.021 ms (2.070 ms / 100) 2.034 -> 2.034 ( +0.00%) [ +0.25% +0.00% +0.05% / +0.00% +0.39% +0.54%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.039 ms / 100) 9.164 -> 9.161 ( -0.03%) [ +0.27% +0.00% +0.04% / +0.05% +0.03% -0.03%] index_select const : Elapsed 0.092 ms (9.189 ms / 100) 9.213 -> 9.209 ( -0.04%) [ +0.00% +0.17% +0.10% / +0.16% -0.04% -0.03%] index_select wrap : Elapsed 0.092 ms (9.213 ms / 100) 9.183 -> 9.192 ( +0.10%) [ +0.00% +0.10% +0.08% / +0.10% +0.19% +0.34%] index_select linear : Elapsed 0.092 ms (9.183 ms / 100) 9.174 -> 9.181 ( +0.08%) [ +0.05% +0.00% +0.08% / +0.08% +0.33% +0.13%] index_select reverse : Elapsed 0.092 ms (9.179 ms / 100) 9.149 -> 9.168 ( +0.21%) [ +0.24% +0.00% +0.01% / +0.27% +0.21% +0.70%] index_select skip64 : Elapsed 0.092 ms (9.171 ms / 100) 9.160 -> 9.157 ( -0.03%) [ +0.00% +0.08% +0.04% / -0.01% -0.03% +0.07%] index_select skip256 : Elapsed 0.092 ms (9.160 ms / 100) 9.206 -> 9.192 ( -0.15%) [ +0.00% +0.02% +0.04% / +0.13% -0.03% -0.15%] index_select spread : Elapsed 0.092 ms (9.206 ms / 100) 9.227 -> 9.203 ( -0.26%) [ +0.04% +0.00% +0.05% / +0.05% -0.26% -0.14%] index_select strided 3 : Elapsed 0.092 ms (9.231 ms / 100) 9.212 -> 9.195 ( -0.18%) [ +0.11% +0.25% +0.00% / +0.08% -0.18% +0.00%] index_select random : Elapsed 0.092 ms (9.222 ms / 100) 9.196 -> 9.198 ( +0.02%) [ +0.00% +0.01% +0.00% / +0.07% +0.02% +0.09%] index_select random_sorted : Elapsed 0.092 ms (9.196 ms / 100) B = [40, 5, 20, 16] (stride (80, 1, 3200, 5)) A = [40, 5, 4, 16] (stride (5, 1, 200, 800)) dim = 2 2.089 -> 2.088 ( -0.05%) [ +0.00% +0.10% +0.00% / +0.05% -0.05% -0.05%] index_add_ linear : Elapsed 0.021 ms (2.089 ms / 100) 2.024 -> 2.024 ( +0.00%) [ +0.20% +0.30% +0.00% / +0.00% +0.30% +0.05%] index_copy_ linear : Elapsed 0.020 ms (2.028 ms / 100) 2.093 -> 2.093 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.05% +0.00% +0.10%] index_add_ reverse : Elapsed 0.021 ms (2.093 ms / 100) 2.031 -> 2.034 ( +0.15%) [ +0.05% +0.00% +0.34% / +0.30% +0.15% +0.20%] index_copy_ reverse : Elapsed 0.020 ms (2.032 ms / 100) 2.083 -> 2.081 ( -0.10%) [ +0.19% +0.00% +0.05% / -0.10% +0.05% +0.29%] index_add_ spread : Elapsed 0.021 ms (2.087 ms / 100) 2.023 -> 2.026 ( +0.15%) [ +0.10% +0.20% +0.00% / +0.15% +0.40% +0.40%] index_copy_ spread : Elapsed 0.020 ms (2.025 ms / 100) 2.089 -> 2.089 ( +0.00%) [ +0.05% +0.14% +0.00% / +0.00% +0.19% +0.05%] index_add_ strided 3 : Elapsed 0.021 ms (2.090 ms / 100) 2.031 -> 2.029 ( -0.10%) [ +0.15% +0.34% +0.00% / -0.10% -0.05% +0.10%] index_copy_ strided 3 : Elapsed 0.020 ms (2.034 ms / 100) 2.079 -> 2.082 ( +0.14%) [ +0.00% +0.05% +0.43% / +0.14% +0.29% +0.38%] index_add_ strided 7 : Elapsed 0.021 ms (2.079 ms / 100) 2.019 -> 2.024 ( +0.25%) [ +0.00% +0.15% +0.05% / +0.25% +0.50% +0.25%] index_copy_ strided 7 : Elapsed 0.020 ms (2.019 ms / 100) 2.080 -> 2.084 ( +0.19%) [ +0.05% +0.00% +0.14% / +0.19% +0.29% +0.24%] index_add_ perm : Elapsed 0.021 ms (2.081 ms / 100) 2.023 -> 2.024 ( +0.05%) [ +0.00% +0.00% +0.10% / +0.05% +0.10% +0.30%] index_copy_ perm : Elapsed 0.020 ms (2.023 ms / 100) 2.073 -> 2.076 ( +0.14%) [ +0.29% +0.00% +0.19% / +0.14% +0.14% +0.34%] index_add_ perm_sorted : Elapsed 0.021 ms (2.079 ms / 100) 2.018 -> 2.017 ( -0.05%) [ +0.00% +0.10% +0.05% / -0.05% +0.15% +0.20%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.018 ms / 100) 8.729 -> 8.722 ( -0.08%) [ +0.01% +0.00% +0.14% / -0.08% +0.31% +0.29%] index_select const : Elapsed 0.087 ms (8.730 ms / 100) 8.768 -> 8.786 ( +0.21%) [ +0.10% +0.00% +0.17% / +0.31% +0.25% +0.21%] index_select wrap : Elapsed 0.088 ms (8.777 ms / 100) 8.758 -> 8.760 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.10% +0.17%] index_select linear : Elapsed 0.088 ms (8.762 ms / 100) 8.757 -> 8.750 ( -0.08%) [ +0.00% +0.11% +0.00% / -0.08% +0.31% -0.08%] index_select reverse : Elapsed 0.088 ms (8.757 ms / 100) 8.728 -> 8.721 ( -0.08%) [ +0.07% +0.00% +0.07% / -0.08% +0.06% +0.09%] index_select skip64 : Elapsed 0.087 ms (8.734 ms / 100) 8.734 -> 8.717 ( -0.19%) [ +0.00% +0.02% +0.05% / -0.19% +0.03% -0.03%] index_select skip256 : Elapsed 0.087 ms (8.734 ms / 100) 8.779 -> 8.765 ( -0.16%) [ +0.06% +0.00% +0.21% / -0.16% +0.07% -0.09%] index_select spread : Elapsed 0.088 ms (8.784 ms / 100) 8.764 -> 8.755 ( -0.10%) [ +0.00% +0.40% +0.13% / +0.19% -0.10% +0.14%] index_select strided 3 : Elapsed 0.088 ms (8.764 ms / 100) 8.785 -> 8.779 ( -0.07%) [ +0.05% +0.00% +0.18% / -0.07% +0.06% +0.20%] index_select random : Elapsed 0.088 ms (8.789 ms / 100) 8.775 -> 8.764 ( -0.13%) [ +0.09% +0.00% +0.10% / -0.13% +0.36% +0.25%] index_select random_sorted : Elapsed 0.088 ms (8.783 ms / 100) B = [40, 5, 20, 16] (stride (100, 20, 1, 4000)) A = [40, 5, 4, 16] (stride (320, 16, 80, 1)) dim = 2 2.025 -> 2.023 ( -0.10%) [ +0.35% +0.59% +0.00% / +0.44% -0.10% +0.05%] index_add_ linear : Elapsed 0.020 ms (2.032 ms / 100) 2.006 -> 2.000 ( -0.30%) [ +0.00% +0.00% +0.05% / +0.25% -0.20% -0.30%] index_copy_ linear : Elapsed 0.020 ms (2.006 ms / 100) 2.027 -> 2.021 ( -0.30%) [ +0.25% +0.10% +0.00% / +0.00% -0.20% -0.30%] index_add_ reverse : Elapsed 0.020 ms (2.032 ms / 100) 2.003 -> 1.999 ( -0.20%) [ +0.00% +0.05% +0.15% / +0.20% +0.10% -0.20%] index_copy_ reverse : Elapsed 0.020 ms (2.003 ms / 100) 2.077 -> 2.072 ( -0.24%) [ +0.10% +0.24% +0.00% / +0.34% -0.05% -0.24%] index_add_ spread : Elapsed 0.021 ms (2.079 ms / 100) 2.097 -> 2.094 ( -0.14%) [ +0.00% +0.29% +0.19% / -0.10% -0.14% +0.10%] index_copy_ spread : Elapsed 0.021 ms (2.097 ms / 100) 2.063 -> 2.059 ( -0.19%) [ +0.00% +0.39% +0.15% / +0.34% +0.19% -0.19%] index_add_ strided 3 : Elapsed 0.021 ms (2.063 ms / 100) 2.065 -> 2.069 ( +0.19%) [ +0.39% +0.39% +0.00% / +0.34% +0.19% +0.19%] index_copy_ strided 3 : Elapsed 0.021 ms (2.073 ms / 100) 2.074 -> 2.074 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.29% +0.00% +0.05%] index_add_ strided 7 : Elapsed 0.021 ms (2.074 ms / 100) 2.099 -> 2.100 ( +0.05%) [ +0.00% +0.00% +0.29% / +0.24% +0.05% +0.10%] index_copy_ strided 7 : Elapsed 0.021 ms (2.099 ms / 100) 2.077 -> 2.068 ( -0.43%) [ +0.10% +0.10% +0.00% / +0.10% -0.34% -0.43%] index_add_ perm : Elapsed 0.021 ms (2.079 ms / 100) 2.102 -> 2.096 ( -0.29%) [ +0.00% +0.10% +0.19% / -0.29% -0.05% -0.14%] index_copy_ perm : Elapsed 0.021 ms (2.102 ms / 100) 2.076 -> 2.072 ( -0.19%) [ +0.10% +0.24% +0.00% / +0.05% -0.19% -0.10%] index_add_ perm_sorted : Elapsed 0.021 ms (2.078 ms / 100) 2.095 -> 2.094 ( -0.05%) [ +0.33% +0.67% +0.00% / +0.43% -0.05% +0.05%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.102 ms / 100) 8.779 -> 8.780 ( +0.01%) [ +0.00% +0.07% +0.08% / +0.01% +0.01% +0.08%] index_select const : Elapsed 0.088 ms (8.779 ms / 100) 8.836 -> 8.849 ( +0.15%) [ +0.24% +0.00% +0.46% / +0.20% +0.15% +0.41%] index_select wrap : Elapsed 0.089 ms (8.857 ms / 100) 8.811 -> 8.806 ( -0.06%) [ +0.00% +0.08% +0.11% / +0.01% -0.06% +0.03%] index_select linear : Elapsed 0.088 ms (8.811 ms / 100) 8.811 -> 8.825 ( +0.16%) [ +0.17% +0.00% +0.01% / +0.16% +0.44% +0.36%] index_select reverse : Elapsed 0.088 ms (8.826 ms / 100) 8.763 -> 8.779 ( +0.18%) [ +0.16% +0.00% +0.29% / +0.18% +0.31% +0.18%] index_select skip64 : Elapsed 0.088 ms (8.777 ms / 100) 8.781 -> 8.782 ( +0.01%) [ +0.00% +0.06% +0.11% / +0.11% +0.01% +0.07%] index_select skip256 : Elapsed 0.088 ms (8.781 ms / 100) 8.836 -> 8.839 ( +0.03%) [ +0.00% +0.07% +0.07% / +0.03% +0.12% +0.06%] index_select spread : Elapsed 0.088 ms (8.836 ms / 100) 8.849 -> 8.851 ( +0.02%) [ +0.00% +0.00% +0.14% / +0.02% +0.12% +0.43%] index_select strided 3 : Elapsed 0.088 ms (8.849 ms / 100) 8.853 -> 8.846 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.05% +0.21%] index_select random : Elapsed 0.089 ms (8.853 ms / 100) 8.832 -> 8.836 ( +0.05%) [ +0.17% +0.00% +0.28% / +0.14% +0.05% +0.12%] index_select random_sorted : Elapsed 0.088 ms (8.847 ms / 100) B = [40, 5, 20, 16] (stride (20, 800, 1, 4000)) A = [40, 5, 4, 16] (stride (5, 1, 200, 800)) dim = 2 2.107 -> 2.108 ( +0.05%) [ +0.00% +0.38% +0.14% / +0.19% +0.05% +0.09%] index_add_ linear : Elapsed 0.021 ms (2.107 ms / 100) 2.085 -> 2.082 ( -0.14%) [ +0.00% +0.29% +0.00% / -0.05% +0.10% -0.14%] index_copy_ linear : Elapsed 0.021 ms (2.085 ms / 100) 2.114 -> 2.113 ( -0.05%) [ +0.14% +0.00% +0.09% / -0.05% +0.52% +0.14%] index_add_ reverse : Elapsed 0.021 ms (2.117 ms / 100) 2.089 -> 2.089 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.24% +0.10%] index_copy_ reverse : Elapsed 0.021 ms (2.090 ms / 100) 2.150 -> 2.150 ( +0.00%) [ +0.00% +0.09% +0.14% / +0.00% +0.09% +0.33%] index_add_ spread : Elapsed 0.022 ms (2.150 ms / 100) 2.175 -> 2.176 ( +0.05%) [ +0.18% +0.09% +0.00% / +0.05% +0.41% +0.28%] index_copy_ spread : Elapsed 0.022 ms (2.179 ms / 100) 2.148 -> 2.145 ( -0.14%) [ +0.00% +0.05% +0.09% / -0.14% +0.33% +0.19%] index_add_ strided 3 : Elapsed 0.021 ms (2.148 ms / 100) 2.149 -> 2.145 ( -0.19%) [ +0.09% +0.00% +0.00% / -0.19% +0.09% +0.00%] index_copy_ strided 3 : Elapsed 0.022 ms (2.151 ms / 100) 2.146 -> 2.145 ( -0.05%) [ +0.37% +0.00% +0.00% / -0.05% +0.09% +0.09%] index_add_ strided 7 : Elapsed 0.022 ms (2.154 ms / 100) 2.173 -> 2.174 ( +0.05%) [ +0.09% +0.09% +0.00% / +0.05% +0.18% +0.23%] index_copy_ strided 7 : Elapsed 0.022 ms (2.175 ms / 100) 2.147 -> 2.148 ( +0.05%) [ +0.00% +0.14% +0.09% / +0.19% +0.19% +0.05%] index_add_ perm : Elapsed 0.021 ms (2.147 ms / 100) 2.151 -> 2.146 ( -0.23%) [ +0.00% +0.00% +0.19% / +0.09% -0.23% -0.09%] index_copy_ perm : Elapsed 0.022 ms (2.151 ms / 100) 2.138 -> 2.139 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.09% +0.33%] index_add_ perm_sorted : Elapsed 0.021 ms (2.139 ms / 100) 2.139 -> 2.142 ( +0.14%) [ +0.00% +0.33% +0.00% / +0.14% +0.14% +0.33%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.139 ms / 100) 8.838 -> 8.851 ( +0.15%) [ +0.16% +0.00% +0.16% / +0.15% +0.19% +0.21%] index_select const : Elapsed 0.089 ms (8.852 ms / 100) 8.902 -> 8.888 ( -0.16%) [ +0.00% +0.06% +0.02% / -0.08% -0.03% -0.16%] index_select wrap : Elapsed 0.089 ms (8.902 ms / 100) 8.871 -> 8.887 ( +0.18%) [ +0.00% +0.21% +0.01% / +0.18% +0.39% +0.19%] index_select linear : Elapsed 0.089 ms (8.871 ms / 100) 8.860 -> 8.862 ( +0.02%) [ +0.20% +0.00% +0.18% / +0.02% +0.43% +0.34%] index_select reverse : Elapsed 0.089 ms (8.878 ms / 100) 8.835 -> 8.841 ( +0.07%) [ +0.18% +0.03% +0.00% / +0.07% +0.12% +0.25%] index_select skip64 : Elapsed 0.089 ms (8.851 ms / 100) 8.838 -> 8.833 ( -0.06%) [ +0.15% +0.00% +0.23% / +0.09% -0.06% +0.06%] index_select skip256 : Elapsed 0.089 ms (8.851 ms / 100) 8.902 -> 8.898 ( -0.04%) [ +0.00% +0.26% +0.08% / -0.04% +0.03% +0.13%] index_select spread : Elapsed 0.089 ms (8.902 ms / 100) 8.888 -> 8.887 ( -0.01%) [ +0.25% +0.00% +0.26% / -0.01% +0.20% +0.05%] index_select strided 3 : Elapsed 0.089 ms (8.910 ms / 100) 8.895 -> 8.878 ( -0.19%) [ +0.00% +0.11% +0.02% / -0.19% +0.24% -0.04%] index_select random : Elapsed 0.089 ms (8.895 ms / 100) 8.885 -> 8.892 ( +0.08%) [ +0.10% +0.00% +0.16% / +0.08% +0.10% +0.09%] index_select random_sorted : Elapsed 0.089 ms (8.894 ms / 100) out_shape = [40, 5, 4, 20] in_shape = [40, 5, 4, 16] idx_dim = 3 B = [40, 5, 4, 20] (stride (400, 1, 100, 5)) A = [40, 5, 4, 16] (stride (320, 64, 16, 1)) dim = 3 3.579 -> 3.581 ( +0.06%) [ +0.08% +0.08% +0.00% / +0.06% +0.50% +0.53%] index_add_ linear : Elapsed 0.036 ms (3.582 ms / 100) 3.433 -> 3.436 ( +0.09%) [ +0.03% +0.00% +0.00% / +0.09% +0.61% +0.61%] index_copy_ linear : Elapsed 0.034 ms (3.434 ms / 100) 3.577 -> 3.577 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.84% +0.78%] index_add_ reverse : Elapsed 0.036 ms (3.577 ms / 100) 3.436 -> 3.435 ( -0.03%) [ +0.00% +0.00% +0.15% / -0.03% +0.79% +0.67%] index_copy_ reverse : Elapsed 0.034 ms (3.436 ms / 100) 3.597 -> 3.597 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.36% +0.39%] index_add_ spread : Elapsed 0.036 ms (3.600 ms / 100) 3.448 -> 3.453 ( +0.15%) [ +0.00% +0.09% +0.09% / +0.15% +0.38% +0.32%] index_copy_ spread : Elapsed 0.034 ms (3.448 ms / 100) 3.602 -> 3.600 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.28% +0.33%] index_add_ strided 3 : Elapsed 0.036 ms (3.602 ms / 100) 3.436 -> 3.440 ( +0.12%) [ +0.00% +0.09% +0.26% / +0.12% +0.76% +0.70%] index_copy_ strided 3 : Elapsed 0.034 ms (3.436 ms / 100) 3.602 -> 3.604 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.31% +0.28%] index_add_ strided 7 : Elapsed 0.036 ms (3.603 ms / 100) 3.440 -> 3.443 ( +0.09%) [ +0.00% +0.09% +0.03% / +0.09% +0.58% +0.55%] index_copy_ strided 7 : Elapsed 0.034 ms (3.440 ms / 100) 3.600 -> 3.600 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.31% +0.28%] index_add_ perm : Elapsed 0.036 ms (3.600 ms / 100) 3.453 -> 3.456 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.17% +0.14%] index_copy_ perm : Elapsed 0.035 ms (3.453 ms / 100) 3.582 -> 3.582 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.64% +0.64%] index_add_ perm_sorted : Elapsed 0.036 ms (3.582 ms / 100) 3.440 -> 3.440 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.55%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.440 ms / 100) 5.389 -> 5.388 ( -0.02%) [ +0.13% +0.13% +0.00% / +0.15% -0.02% +0.06%] index_select const : Elapsed 0.054 ms (5.396 ms / 100) 5.402 -> 5.396 ( -0.11%) [ +0.02% +0.13% +0.00% / -0.02% -0.07% -0.11%] index_select wrap : Elapsed 0.054 ms (5.403 ms / 100) 5.395 -> 5.397 ( +0.04%) [ +0.00% +0.06% +0.20% / +0.04% +0.09% +0.04%] index_select linear : Elapsed 0.054 ms (5.395 ms / 100) 5.400 -> 5.396 ( -0.07%) [ +0.00% +0.02% +0.17% / -0.07% +0.13% -0.02%] index_select reverse : Elapsed 0.054 ms (5.400 ms / 100) 5.390 -> 5.389 ( -0.02%) [ +0.07% +0.00% +0.11% / -0.02% +0.17% +0.07%] index_select skip64 : Elapsed 0.054 ms (5.394 ms / 100) 5.390 -> 5.394 ( +0.07%) [ +0.06% +0.00% +0.04% / +0.07% +0.26% +0.09%] index_select skip256 : Elapsed 0.054 ms (5.393 ms / 100) 5.403 -> 5.396 ( -0.13%) [ +0.02% +0.11% +0.00% / -0.06% +0.04% -0.13%] index_select spread : Elapsed 0.054 ms (5.404 ms / 100) 5.400 -> 5.394 ( -0.11%) [ +0.15% +0.00% +0.07% / +0.11% +0.06% -0.11%] index_select strided 3 : Elapsed 0.054 ms (5.408 ms / 100) 5.402 -> 5.395 ( -0.13%) [ +0.04% +0.00% +0.09% / +0.13% -0.13% -0.02%] index_select strided 5 : Elapsed 0.054 ms (5.404 ms / 100) 5.403 -> 5.392 ( -0.20%) [ +0.11% +0.07% +0.00% / +0.02% -0.06% -0.20%] index_select strided 7 : Elapsed 0.054 ms (5.409 ms / 100) 5.400 -> 5.396 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% -0.02% +0.04%] index_select strided 8 : Elapsed 0.054 ms (5.401 ms / 100) 5.396 -> 5.394 ( -0.04%) [ +0.09% +0.04% +0.00% / +0.07% -0.04% +0.00%] index_select random : Elapsed 0.054 ms (5.401 ms / 100) 5.402 -> 5.398 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% -0.04% +0.02%] index_select random_sorted : Elapsed 0.054 ms (5.403 ms / 100) B = [40, 5, 4, 20] (stride (400, 1, 100, 5)) A = [40, 5, 4, 16] (stride (20, 4, 1, 800)) dim = 3 3.329 -> 3.337 ( +0.24%) [ +0.00% +0.15% +0.21% / +0.24% +0.75% +0.87%] index_add_ linear : Elapsed 0.033 ms (3.329 ms / 100) 3.200 -> 3.206 ( +0.19%) [ +0.00% +0.19% +0.22% / +0.19% +0.63% +0.97%] index_copy_ linear : Elapsed 0.032 ms (3.200 ms / 100) 3.327 -> 3.328 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.78% +0.75%] index_add_ reverse : Elapsed 0.033 ms (3.327 ms / 100) 3.195 -> 3.198 ( +0.09%) [ +0.03% +0.00% +0.03% / +0.09% +0.88% +0.75%] index_copy_ reverse : Elapsed 0.032 ms (3.196 ms / 100) 3.325 -> 3.324 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.75% +0.72%] index_add_ spread : Elapsed 0.033 ms (3.325 ms / 100) 3.197 -> 3.197 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.75% +0.66%] index_copy_ spread : Elapsed 0.032 ms (3.197 ms / 100) 3.311 -> 3.311 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.82% +0.85%] index_add_ strided 3 : Elapsed 0.033 ms (3.311 ms / 100) 3.182 -> 3.183 ( +0.03%) [ +0.00% +0.09% +0.03% / +0.03% +0.88% +0.82%] index_copy_ strided 3 : Elapsed 0.032 ms (3.182 ms / 100) 3.327 -> 3.328 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.72% +0.69%] index_add_ strided 7 : Elapsed 0.033 ms (3.328 ms / 100) 3.196 -> 3.196 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.75% +0.75%] index_copy_ strided 7 : Elapsed 0.032 ms (3.197 ms / 100) 3.330 -> 3.323 ( -0.21%) [ +0.00% +0.21% +0.21% / -0.21% +0.84% +0.72%] index_add_ perm : Elapsed 0.033 ms (3.330 ms / 100) 3.201 -> 3.197 ( -0.12%) [ +0.00% +0.19% +0.16% / -0.12% +0.94% +0.75%] index_copy_ perm : Elapsed 0.032 ms (3.201 ms / 100) 3.311 -> 3.312 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.91% +0.82%] index_add_ perm_sorted : Elapsed 0.033 ms (3.311 ms / 100) 3.184 -> 3.187 ( +0.09%) [ +0.00% +0.03% +0.03% / +0.09% +0.72% +0.75%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.184 ms / 100) 5.395 -> 5.390 ( -0.09%) [ +0.02% +0.00% +0.00% / -0.07% -0.07% -0.09%] index_select const : Elapsed 0.054 ms (5.396 ms / 100) 5.405 -> 5.405 ( +0.00%) [ +0.00% +0.07% +0.19% / +0.00% +0.09% +0.04%] index_select wrap : Elapsed 0.054 ms (5.405 ms / 100) 5.404 -> 5.400 ( -0.07%) [ +0.04% +0.00% +0.07% / -0.07% +0.09% +0.11%] index_select linear : Elapsed 0.054 ms (5.406 ms / 100) 5.403 -> 5.404 ( +0.02%) [ +0.00% +0.04% +0.07% / +0.02% +0.07% +0.09%] index_select reverse : Elapsed 0.054 ms (5.403 ms / 100) 5.390 -> 5.387 ( -0.06%) [ +0.11% +0.06% +0.00% / -0.06% +0.15% +0.15%] index_select skip64 : Elapsed 0.054 ms (5.396 ms / 100) 5.389 -> 5.390 ( +0.02%) [ +0.07% +0.02% +0.00% / +0.02% +0.17% +0.26%] index_select skip256 : Elapsed 0.054 ms (5.393 ms / 100) 5.397 -> 5.404 ( +0.13%) [ +0.02% +0.00% +0.26% / +0.13% +0.19% +0.28%] index_select spread : Elapsed 0.054 ms (5.398 ms / 100) 5.401 -> 5.403 ( +0.04%) [ +0.09% +0.00% +0.28% / +0.04% +0.26% +0.09%] index_select strided 3 : Elapsed 0.054 ms (5.406 ms / 100) 5.401 -> 5.402 ( +0.02%) [ +0.07% +0.00% +0.19% / +0.02% +0.20% +0.13%] index_select strided 5 : Elapsed 0.054 ms (5.405 ms / 100) 5.401 -> 5.400 ( -0.02%) [ +0.07% +0.02% +0.00% / -0.02% +0.06% +0.22%] index_select strided 7 : Elapsed 0.054 ms (5.405 ms / 100) 5.390 -> 5.389 ( -0.02%) [ +0.00% +0.06% +0.13% / -0.02% +0.28% +0.22%] index_select strided 8 : Elapsed 0.054 ms (5.390 ms / 100) 5.397 -> 5.402 ( +0.09%) [ +0.17% +0.00% +0.15% / +0.09% +0.17% +0.20%] index_select random : Elapsed 0.054 ms (5.406 ms / 100) 5.403 -> 5.403 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.04% +0.13% +0.00%] index_select random_sorted : Elapsed 0.054 ms (5.404 ms / 100) B = [40, 5, 4, 20] (stride (20, 800, 4000, 1)) A = [40, 5, 4, 16] (stride (80, 16, 3200, 1)) dim = 3 3.880 -> 3.882 ( +0.05%) [ +0.03% +0.00% +0.34% / +0.05% +0.93% +0.95%] index_add_ linear : Elapsed 0.039 ms (3.881 ms / 100) 3.749 -> 3.749 ( +0.00%) [ +0.00% +0.00% +0.21% / +0.00% +0.75% +0.67%] index_copy_ linear : Elapsed 0.037 ms (3.749 ms / 100) 3.890 -> 3.890 ( +0.00%) [ +0.00% +0.21% +0.00% / +0.00% +0.87% +0.82%] index_add_ reverse : Elapsed 0.039 ms (3.890 ms / 100) 3.756 -> 3.757 ( +0.03%) [ +0.00% +0.00% +0.05% / +0.03% +0.72% +0.72%] index_copy_ reverse : Elapsed 0.038 ms (3.756 ms / 100) 3.889 -> 3.889 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.85% +0.75%] index_add_ spread : Elapsed 0.039 ms (3.889 ms / 100) 3.753 -> 3.754 ( +0.03%) [ +0.00% +0.05% +0.03% / +0.03% +0.67% +0.67%] index_copy_ spread : Elapsed 0.038 ms (3.753 ms / 100) 3.896 -> 3.895 ( -0.03%) [ +0.00% +0.00% +0.13% / -0.03% +0.62% +0.64%] index_add_ strided 3 : Elapsed 0.039 ms (3.896 ms / 100) 3.740 -> 3.741 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.83% +0.78%] index_copy_ strided 3 : Elapsed 0.037 ms (3.742 ms / 100) 3.896 -> 3.899 ( +0.08%) [ +0.13% +0.00% +0.26% / +0.08% +0.67% +0.59%] index_add_ strided 7 : Elapsed 0.039 ms (3.901 ms / 100) 3.744 -> 3.746 ( +0.05%) [ +0.08% +0.00% +0.13% / +0.05% +0.64% +0.61%] index_copy_ strided 7 : Elapsed 0.037 ms (3.747 ms / 100) 3.891 -> 3.896 ( +0.13%) [ +0.08% +0.21% +0.00% / +0.13% +0.80% +0.80%] index_add_ perm : Elapsed 0.039 ms (3.894 ms / 100) 3.755 -> 3.755 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.53% +0.48%] index_copy_ perm : Elapsed 0.038 ms (3.756 ms / 100) 3.891 -> 3.891 ( +0.00%) [ +0.00% +0.08% +0.23% / +0.00% +0.69% +0.82%] index_add_ perm_sorted : Elapsed 0.039 ms (3.891 ms / 100) 3.757 -> 3.758 ( +0.03%) [ +0.00% +0.11% +0.59% / +0.03% +0.67% +0.61%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.757 ms / 100) 5.487 -> 5.483 ( -0.07%) [ +0.07% +0.00% +0.13% / -0.02% +0.15% -0.07%] index_select const : Elapsed 0.055 ms (5.491 ms / 100) 5.497 -> 5.491 ( -0.11%) [ +0.00% +0.07% +0.20% / +0.09% +0.09% -0.11%] index_select wrap : Elapsed 0.055 ms (5.497 ms / 100) 5.496 -> 5.494 ( -0.04%) [ +0.00% +0.24% +0.05% / -0.04% +0.05% +0.13%] index_select linear : Elapsed 0.055 ms (5.496 ms / 100) 5.496 -> 5.495 ( -0.02%) [ +0.00% +0.02% +0.09% / -0.02% +0.15% +0.02%] index_select reverse : Elapsed 0.055 ms (5.496 ms / 100) 5.490 -> 5.489 ( -0.02%) [ +0.13% +0.00% +0.11% / +0.02% -0.02% -0.02%] index_select skip64 : Elapsed 0.055 ms (5.497 ms / 100) 5.493 -> 5.491 ( -0.04%) [ +0.00% +0.00% +0.00% / -0.04% +0.02% +0.02%] index_select skip256 : Elapsed 0.055 ms (5.493 ms / 100) 5.495 -> 5.495 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.11% +0.00% +0.02%] index_select spread : Elapsed 0.055 ms (5.495 ms / 100) 5.498 -> 5.491 ( -0.13%) [ +0.00% +0.02% +0.18% / +0.15% -0.11% -0.13%] index_select strided 3 : Elapsed 0.055 ms (5.498 ms / 100) 5.498 -> 5.494 ( -0.07%) [ +0.13% +0.13% +0.00% / +0.04% +0.02% -0.07%] index_select strided 5 : Elapsed 0.055 ms (5.505 ms / 100) 5.493 -> 5.491 ( -0.04%) [ +0.00% +0.13% +0.13% / +0.11% -0.04% +0.11%] index_select strided 7 : Elapsed 0.055 ms (5.493 ms / 100) 5.495 -> 5.498 ( +0.05%) [ +0.02% +0.00% +0.18% / +0.05% +0.13% +0.07%] index_select strided 8 : Elapsed 0.055 ms (5.496 ms / 100) 5.495 -> 5.495 ( +0.00%) [ +0.05% +0.00% +0.11% / +0.04% +0.00% +0.04%] index_select random : Elapsed 0.055 ms (5.498 ms / 100) 5.499 -> 5.496 ( -0.05%) [ +0.00% +0.05% +0.24% / +0.04% -0.04% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.499 ms / 100) B = [40, 5, 4, 20] (stride (20, 800, 4000, 1)) A = [40, 5, 4, 16] (stride (1, 40, 3200, 200)) dim = 3 4.284 -> 4.282 ( -0.05%) [ +0.02% +0.00% +0.07% / -0.05% +0.70% +0.72%] index_add_ linear : Elapsed 0.043 ms (4.285 ms / 100) 4.138 -> 4.136 ( -0.05%) [ +0.02% +0.00% +0.12% / -0.05% +0.77% +0.75%] index_copy_ linear : Elapsed 0.041 ms (4.139 ms / 100) 4.279 -> 4.279 ( +0.00%) [ +0.00% +0.02% +0.12% / +0.00% +0.84% +0.84%] index_add_ reverse : Elapsed 0.043 ms (4.279 ms / 100) 4.136 -> 4.137 ( +0.02%) [ +0.00% +0.00% +0.36% / +0.02% +0.68% +0.65%] index_copy_ reverse : Elapsed 0.041 ms (4.136 ms / 100) 4.270 -> 4.272 ( +0.05%) [ +0.00% +0.07% +0.00% / +0.05% +0.75% +0.75%] index_add_ spread : Elapsed 0.043 ms (4.270 ms / 100) 4.127 -> 4.128 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.75% +0.75%] index_copy_ spread : Elapsed 0.041 ms (4.127 ms / 100) 4.287 -> 4.287 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.75% +0.72%] index_add_ strided 3 : Elapsed 0.043 ms (4.288 ms / 100) 4.142 -> 4.143 ( +0.02%) [ +0.00% +0.05% +0.07% / +0.02% +0.89% +0.89%] index_copy_ strided 3 : Elapsed 0.041 ms (4.142 ms / 100) 4.280 -> 4.280 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.77% +0.77%] index_add_ strided 7 : Elapsed 0.043 ms (4.280 ms / 100) 4.136 -> 4.136 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.70% +0.68%] index_copy_ strided 7 : Elapsed 0.041 ms (4.136 ms / 100) 4.285 -> 4.285 ( +0.00%) [ +0.02% +0.00% +0.19% / +0.00% +0.75% +0.70%] index_add_ perm : Elapsed 0.043 ms (4.286 ms / 100) 4.137 -> 4.138 ( +0.02%) [ +0.00% +0.07% +0.07% / +0.02% +0.75% +0.70%] index_copy_ perm : Elapsed 0.041 ms (4.137 ms / 100) 4.290 -> 4.289 ( -0.02%) [ +0.02% +0.02% +0.00% / -0.02% +0.65% +0.63%] index_add_ perm_sorted : Elapsed 0.043 ms (4.291 ms / 100) 4.146 -> 4.147 ( +0.02%) [ +0.02% +0.00% +0.05% / +0.02% +0.72% +0.68%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.147 ms / 100) 5.564 -> 5.563 ( -0.02%) [ +0.00% +0.18% +0.13% / +0.13% -0.02% +0.02%] index_select const : Elapsed 0.056 ms (5.564 ms / 100) 5.582 -> 5.576 ( -0.11%) [ +0.07% +0.00% +0.07% / +0.07% -0.11% +0.04%] index_select wrap : Elapsed 0.056 ms (5.586 ms / 100) 5.575 -> 5.579 ( +0.07%) [ +0.25% +0.11% +0.00% / +0.07% +0.23% +0.18%] index_select linear : Elapsed 0.056 ms (5.589 ms / 100) 5.580 -> 5.581 ( +0.02%) [ +0.02% +0.00% +0.04% / +0.02% +0.18% +0.14%] index_select reverse : Elapsed 0.056 ms (5.581 ms / 100) 5.564 -> 5.564 ( +0.00%) [ +0.00% +0.07% +0.02% / +0.00% +0.02% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.564 ms / 100) 5.562 -> 5.569 ( +0.13%) [ +0.04% +0.09% +0.00% / +0.13% +0.13% +0.23%] index_select skip256 : Elapsed 0.056 ms (5.564 ms / 100) 5.577 -> 5.579 ( +0.04%) [ +0.18% +0.00% +0.11% / +0.04% +0.16% +0.09%] index_select spread : Elapsed 0.056 ms (5.587 ms / 100) 5.578 -> 5.580 ( +0.04%) [ +0.05% +0.00% +0.05% / +0.04% +0.25% +0.32%] index_select strided 3 : Elapsed 0.056 ms (5.581 ms / 100) 5.582 -> 5.583 ( +0.02%) [ +0.14% +0.00% +0.04% / +0.02% +0.14% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.590 ms / 100) 5.574 -> 5.579 ( +0.09%) [ +0.07% +0.00% +0.14% / +0.09% +0.34% +0.27%] index_select strided 7 : Elapsed 0.056 ms (5.578 ms / 100) 5.564 -> 5.566 ( +0.04%) [ +0.00% +0.11% +0.11% / +0.04% +0.22% +0.07%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.582 -> 5.580 ( -0.04%) [ +0.11% +0.13% +0.00% / +0.05% +0.09% -0.04%] index_select random : Elapsed 0.056 ms (5.588 ms / 100) 5.574 -> 5.574 ( +0.00%) [ +0.11% +0.20% +0.00% / +0.00% +0.18% +0.23%] index_select random_sorted : Elapsed 0.056 ms (5.580 ms / 100) B = [40, 5, 4, 20] (stride (1, 800, 4000, 40)) A = [40, 5, 4, 16] (stride (4, 2560, 1, 160)) dim = 3 3.999 -> 3.999 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.83% +0.83%] index_add_ linear : Elapsed 0.040 ms (4.001 ms / 100) 3.876 -> 3.876 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.85% +1.03%] index_copy_ linear : Elapsed 0.039 ms (3.876 ms / 100) 4.011 -> 4.012 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.67% +0.65%] index_add_ reverse : Elapsed 0.040 ms (4.013 ms / 100) 3.879 -> 3.879 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.98% +0.98%] index_copy_ reverse : Elapsed 0.039 ms (3.879 ms / 100) 4.011 -> 4.011 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_add_ spread : Elapsed 0.040 ms (4.012 ms / 100) 3.879 -> 3.880 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.98% +1.11%] index_copy_ spread : Elapsed 0.039 ms (3.879 ms / 100) 4.011 -> 4.013 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.75% +0.75%] index_add_ strided 3 : Elapsed 0.040 ms (4.012 ms / 100) 3.871 -> 3.871 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +1.03% +1.27%] index_copy_ strided 3 : Elapsed 0.039 ms (3.871 ms / 100) 4.016 -> 4.017 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.55% +0.52%] index_add_ strided 7 : Elapsed 0.040 ms (4.017 ms / 100) 3.875 -> 3.875 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +1.03% +0.70%] index_copy_ strided 7 : Elapsed 0.039 ms (3.876 ms / 100) 4.013 -> 4.014 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.65% +0.60%] index_add_ perm : Elapsed 0.040 ms (4.014 ms / 100) 3.882 -> 3.884 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +1.03% +1.03%] index_copy_ perm : Elapsed 0.039 ms (3.882 ms / 100) 4.012 -> 4.012 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.70% +0.62%] index_add_ perm_sorted : Elapsed 0.040 ms (4.013 ms / 100) 3.879 -> 3.881 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.80% +0.98%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.880 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.09% +0.02% +0.00% / +0.11% +0.11% +0.05%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.589 -> 5.589 ( +0.00%) [ +0.09% +0.23% +0.00% / +0.00% +0.04% +0.14%] index_select wrap : Elapsed 0.056 ms (5.594 ms / 100) 5.591 -> 5.592 ( +0.02%) [ +0.00% +0.13% +0.02% / +0.02% +0.11% +0.04%] index_select linear : Elapsed 0.056 ms (5.591 ms / 100) 5.589 -> 5.591 ( +0.04%) [ +0.00% +0.09% +0.13% / +0.07% +0.21% +0.04%] index_select reverse : Elapsed 0.056 ms (5.589 ms / 100) 5.565 -> 5.563 ( -0.04%) [ +0.09% +0.05% +0.00% / -0.04% +0.11% +0.02%] index_select skip64 : Elapsed 0.056 ms (5.570 ms / 100) 5.560 -> 5.566 ( +0.11%) [ +0.11% +0.00% +0.14% / +0.11% +0.18% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.566 ms / 100) 5.589 -> 5.589 ( +0.00%) [ +0.00% +0.16% +0.13% / +0.18% +0.21% +0.00%] index_select spread : Elapsed 0.056 ms (5.589 ms / 100) 5.594 -> 5.594 ( +0.00%) [ +0.00% +0.16% +0.11% / +0.02% +0.00% +0.09%] index_select strided 3 : Elapsed 0.056 ms (5.594 ms / 100) 5.589 -> 5.593 ( +0.07%) [ +0.00% +0.11% +0.14% / +0.11% +0.21% +0.07%] index_select strided 5 : Elapsed 0.056 ms (5.589 ms / 100) 5.590 -> 5.591 ( +0.02%) [ +0.07% +0.07% +0.00% / +0.02% +0.09% +0.07%] index_select strided 7 : Elapsed 0.056 ms (5.594 ms / 100) 5.573 -> 5.571 ( -0.04%) [ +0.00% +0.00% +0.04% / +0.02% +0.04% -0.04%] index_select strided 8 : Elapsed 0.056 ms (5.573 ms / 100) 5.585 -> 5.588 ( +0.05%) [ +0.20% +0.00% +0.18% / +0.07% +0.13% +0.05%] index_select random : Elapsed 0.056 ms (5.596 ms / 100) 5.582 -> 5.589 ( +0.13%) [ +0.14% +0.00% +0.11% / +0.13% +0.14% +0.27%] index_select random_sorted : Elapsed 0.056 ms (5.590 ms / 100) B = [40, 5, 4, 20] (stride (1, 40, 4000, 200)) A = [40, 5, 4, 16] (stride (64, 2560, 16, 1)) dim = 3 4.234 -> 4.246 ( +0.28%) [ +0.00% +0.28% +0.07% / +0.28% +0.80% +0.73%] index_add_ linear : Elapsed 0.042 ms (4.234 ms / 100) 4.109 -> 4.117 ( +0.19%) [ +0.00% +0.24% +0.07% / +0.19% +0.73% +0.68%] index_copy_ linear : Elapsed 0.041 ms (4.109 ms / 100) 4.238 -> 4.242 ( +0.09%) [ +0.17% +0.00% +0.09% / +0.09% +1.01% +0.80%] index_add_ reverse : Elapsed 0.042 ms (4.245 ms / 100) 4.091 -> 4.096 ( +0.12%) [ +0.17% +0.00% +0.10% / +0.12% +1.12% +0.76%] index_copy_ reverse : Elapsed 0.041 ms (4.098 ms / 100) 4.227 -> 4.227 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.76% +0.76%] index_add_ spread : Elapsed 0.042 ms (4.229 ms / 100) 4.090 -> 4.091 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.78% +0.71%] index_copy_ spread : Elapsed 0.041 ms (4.091 ms / 100) 4.220 -> 4.217 ( -0.07%) [ +0.02% +0.07% +0.00% / -0.07% +0.69% +0.73%] index_add_ strided 3 : Elapsed 0.042 ms (4.221 ms / 100) 4.091 -> 4.092 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.041 ms (4.091 ms / 100) 4.245 -> 4.241 ( -0.09%) [ +0.02% +0.02% +0.00% / -0.09% +0.61% +0.73%] index_add_ strided 7 : Elapsed 0.042 ms (4.246 ms / 100) 4.108 -> 4.114 ( +0.15%) [ +0.07% +0.00% +0.22% / +0.15% +0.56% +0.51%] index_copy_ strided 7 : Elapsed 0.041 ms (4.111 ms / 100) 4.237 -> 4.247 ( +0.24%) [ +0.21% +0.24% +0.00% / +0.24% +0.90% +0.90%] index_add_ perm : Elapsed 0.042 ms (4.246 ms / 100) 4.095 -> 4.100 ( +0.12%) [ +0.17% +0.15% +0.00% / +0.12% +0.90% +0.88%] index_copy_ perm : Elapsed 0.041 ms (4.102 ms / 100) 4.220 -> 4.220 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.042 ms (4.221 ms / 100) 4.097 -> 4.096 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.66% +0.56%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.098 ms / 100) 5.566 -> 5.562 ( -0.07%) [ +0.00% +0.02% +0.05% / -0.07% +0.02% +0.05%] index_select const : Elapsed 0.056 ms (5.566 ms / 100) 5.568 -> 5.576 ( +0.14%) [ +0.07% +0.00% +0.14% / +0.14% +0.20% +0.14%] index_select wrap : Elapsed 0.056 ms (5.572 ms / 100) 5.569 -> 5.571 ( +0.04%) [ +0.13% +0.11% +0.00% / +0.04% +0.11% +0.05%] index_select linear : Elapsed 0.056 ms (5.576 ms / 100) 5.569 -> 5.572 ( +0.05%) [ +0.05% +0.11% +0.00% / +0.05% +0.13% +0.20%] index_select reverse : Elapsed 0.056 ms (5.572 ms / 100) 5.558 -> 5.568 ( +0.18%) [ +0.13% +0.22% +0.00% / +0.18% +0.31% +0.27%] index_select skip64 : Elapsed 0.056 ms (5.565 ms / 100) 5.566 -> 5.563 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.16% -0.02%] index_select skip256 : Elapsed 0.056 ms (5.566 ms / 100) 5.566 -> 5.576 ( +0.18%) [ +0.07% +0.14% +0.00% / +0.36% +0.18% +0.23%] index_select spread : Elapsed 0.056 ms (5.570 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.22% +0.00% +0.16% / +0.02% +0.23% +0.18%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.567 -> 5.577 ( +0.18%) [ +0.18% +0.05% +0.00% / +0.25% +0.20% +0.18%] index_select strided 5 : Elapsed 0.056 ms (5.577 ms / 100) 5.568 -> 5.577 ( +0.16%) [ +0.16% +0.00% +0.07% / +0.16% +0.23% +0.27%] index_select strided 7 : Elapsed 0.056 ms (5.577 ms / 100) 5.570 -> 5.569 ( -0.02%) [ +0.05% +0.13% +0.00% / -0.02% +0.16% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.573 ms / 100) 5.566 -> 5.575 ( +0.16%) [ +0.07% +0.00% +0.05% / +0.16% +0.27% +0.29%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.564 -> 5.573 ( +0.16%) [ +0.07% +0.00% +0.05% / +0.22% +0.16% +0.29%] index_select random_sorted : Elapsed 0.056 ms (5.568 ms / 100) B = [40, 5, 4, 20] (stride (1, 40, 4000, 200)) A = [40, 5, 4, 16] (stride (20, 4, 1, 800)) dim = 3 3.328 -> 3.328 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.66% +0.66%] index_add_ linear : Elapsed 0.033 ms (3.329 ms / 100) 3.224 -> 3.224 ( +0.00%) [ +0.09% +0.00% +0.06% / +0.00% +0.68% +0.71%] index_copy_ linear : Elapsed 0.032 ms (3.227 ms / 100) 3.330 -> 3.338 ( +0.24%) [ +0.18% +0.12% +0.00% / +0.24% +0.81% +0.84%] index_add_ reverse : Elapsed 0.033 ms (3.336 ms / 100) 3.228 -> 3.227 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.84% +0.77%] index_copy_ reverse : Elapsed 0.032 ms (3.228 ms / 100) 3.309 -> 3.314 ( +0.15%) [ +0.24% +0.21% +0.00% / +0.15% +0.94% +0.94%] index_add_ spread : Elapsed 0.033 ms (3.317 ms / 100) 3.212 -> 3.217 ( +0.16%) [ +0.00% +0.12% +0.06% / +0.16% +1.09% +0.84%] index_copy_ spread : Elapsed 0.032 ms (3.212 ms / 100) 3.324 -> 3.316 ( -0.24%) [ +0.00% +0.00% +0.00% / -0.24% +0.51% +0.51%] index_add_ strided 3 : Elapsed 0.033 ms (3.324 ms / 100) 3.213 -> 3.217 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.87% +0.84%] index_copy_ strided 3 : Elapsed 0.032 ms (3.215 ms / 100) 3.320 -> 3.324 ( +0.12%) [ +0.06% +0.00% +0.12% / +0.12% +0.72% +0.66%] index_add_ strided 7 : Elapsed 0.033 ms (3.322 ms / 100) 3.218 -> 3.219 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.59% +0.44%] index_copy_ strided 7 : Elapsed 0.032 ms (3.218 ms / 100) 3.316 -> 3.315 ( -0.03%) [ +0.06% +0.06% +0.00% / -0.03% +0.66% +0.72%] index_add_ perm : Elapsed 0.033 ms (3.318 ms / 100) 3.208 -> 3.202 ( -0.19%) [ +0.06% +0.00% +0.09% / -0.19% +0.75% +0.75%] index_copy_ perm : Elapsed 0.032 ms (3.210 ms / 100) 3.331 -> 3.330 ( -0.03%) [ +0.00% +0.24% +0.00% / -0.03% +0.78% +0.78%] index_add_ perm_sorted : Elapsed 0.033 ms (3.331 ms / 100) 3.238 -> 3.239 ( +0.03%) [ +0.00% +0.15% +0.03% / +0.03% +0.68% +0.71%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.238 ms / 100) 5.398 -> 5.391 ( -0.13%) [ +0.06% +0.06% +0.00% / +0.00% -0.13% +0.11%] index_select const : Elapsed 0.054 ms (5.401 ms / 100) 5.411 -> 5.417 ( +0.11%) [ +0.04% +0.00% +0.22% / +0.11% +0.11% +0.15%] index_select wrap : Elapsed 0.054 ms (5.413 ms / 100) 5.410 -> 5.412 ( +0.04%) [ +0.22% +0.09% +0.00% / +0.07% +0.04% +0.18%] index_select linear : Elapsed 0.054 ms (5.422 ms / 100) 5.410 -> 5.412 ( +0.04%) [ +0.00% +0.07% +0.07% / +0.04% +0.07% +0.13%] index_select reverse : Elapsed 0.054 ms (5.410 ms / 100) 5.399 -> 5.403 ( +0.07%) [ +0.07% +0.00% +0.09% / +0.07% +0.09% +0.11%] index_select skip64 : Elapsed 0.054 ms (5.403 ms / 100) 5.396 -> 5.395 ( -0.02%) [ +0.17% +0.06% +0.00% / +0.22% +0.06% -0.02%] index_select skip256 : Elapsed 0.054 ms (5.405 ms / 100) 5.410 -> 5.410 ( +0.00%) [ +0.00% +0.07% +0.02% / +0.00% +0.02% +0.09%] index_select spread : Elapsed 0.054 ms (5.410 ms / 100) 5.413 -> 5.410 ( -0.06%) [ +0.00% +0.07% +0.07% / +0.06% +0.00% -0.06%] index_select strided 3 : Elapsed 0.054 ms (5.413 ms / 100) 5.413 -> 5.408 ( -0.09%) [ +0.00% +0.02% +0.04% / +0.11% -0.09% +0.22%] index_select strided 5 : Elapsed 0.054 ms (5.413 ms / 100) 5.407 -> 5.413 ( +0.11%) [ +0.00% +0.07% +0.09% / +0.20% +0.17% +0.11%] index_select strided 7 : Elapsed 0.054 ms (5.407 ms / 100) 5.393 -> 5.395 ( +0.04%) [ +0.00% +0.17% +0.20% / +0.04% +0.22% +0.20%] index_select strided 8 : Elapsed 0.054 ms (5.393 ms / 100) 5.412 -> 5.404 ( -0.15%) [ +0.00% +0.15% +0.07% / +0.00% +0.04% -0.15%] index_select random : Elapsed 0.054 ms (5.412 ms / 100) 5.407 -> 5.406 ( -0.02%) [ +0.00% +0.04% +0.11% / +0.02% -0.02% +0.15%] index_select random_sorted : Elapsed 0.054 ms (5.407 ms / 100) B = [40, 5, 4, 20] (stride (4, 160, 1, 800)) A = [40, 5, 4, 16] (stride (80, 16, 3200, 1)) dim = 3 3.886 -> 3.886 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.98% +0.93%] index_add_ linear : Elapsed 0.039 ms (3.886 ms / 100) 3.742 -> 3.742 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.80% +0.75%] index_copy_ linear : Elapsed 0.037 ms (3.742 ms / 100) 3.875 -> 3.876 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +1.19% +1.14%] index_add_ reverse : Elapsed 0.039 ms (3.875 ms / 100) 3.731 -> 3.733 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.91% +0.80%] index_copy_ reverse : Elapsed 0.037 ms (3.731 ms / 100) 3.889 -> 3.890 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.85% +0.85%] index_add_ spread : Elapsed 0.039 ms (3.890 ms / 100) 3.732 -> 3.733 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.91% +0.94%] index_copy_ spread : Elapsed 0.037 ms (3.734 ms / 100) 3.888 -> 3.887 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.82% +0.85%] index_add_ strided 3 : Elapsed 0.039 ms (3.888 ms / 100) 3.747 -> 3.745 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.75% +0.75%] index_copy_ strided 3 : Elapsed 0.037 ms (3.749 ms / 100) 3.877 -> 3.877 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +1.01% +1.06%] index_add_ strided 7 : Elapsed 0.039 ms (3.878 ms / 100) 3.732 -> 3.732 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.78% +0.80%] index_copy_ strided 7 : Elapsed 0.037 ms (3.735 ms / 100) 3.886 -> 3.887 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.82% +0.90%] index_add_ perm : Elapsed 0.039 ms (3.888 ms / 100) 3.743 -> 3.743 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.75% +0.72%] index_copy_ perm : Elapsed 0.037 ms (3.744 ms / 100) 3.888 -> 3.888 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.80% +0.82%] index_add_ perm_sorted : Elapsed 0.039 ms (3.891 ms / 100) 3.747 -> 3.748 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.61% +0.59%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.749 ms / 100) 5.478 -> 5.474 ( -0.07%) [ +0.05% +0.07% +0.00% / -0.04% +0.00% -0.07%] index_select const : Elapsed 0.055 ms (5.481 ms / 100) 5.486 -> 5.483 ( -0.05%) [ +0.07% +0.09% +0.00% / -0.05% +0.11% +0.04%] index_select wrap : Elapsed 0.055 ms (5.490 ms / 100) 5.478 -> 5.486 ( +0.15%) [ +0.16% +0.22% +0.00% / +0.15% +0.16% +0.16%] index_select linear : Elapsed 0.055 ms (5.487 ms / 100) 5.482 -> 5.484 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.05% +0.04% +0.18%] index_select reverse : Elapsed 0.055 ms (5.489 ms / 100) 5.479 -> 5.476 ( -0.05%) [ +0.00% +0.02% +0.05% / -0.05% +0.02% +0.18%] index_select skip64 : Elapsed 0.055 ms (5.479 ms / 100) 5.477 -> 5.481 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.26% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.477 ms / 100) 5.484 -> 5.485 ( +0.02%) [ +0.00% +0.00% +0.13% / +0.11% +0.05% +0.02%] index_select spread : Elapsed 0.055 ms (5.484 ms / 100) 5.483 -> 5.486 ( +0.05%) [ +0.00% +0.02% +0.02% / +0.11% +0.05% +0.20%] index_select strided 3 : Elapsed 0.055 ms (5.483 ms / 100) 5.483 -> 5.485 ( +0.04%) [ +0.04% +0.11% +0.00% / +0.04% +0.04% +0.13%] index_select strided 5 : Elapsed 0.055 ms (5.485 ms / 100) 5.484 -> 5.484 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.09% +0.15% +0.00%] index_select strided 7 : Elapsed 0.055 ms (5.491 ms / 100) 5.487 -> 5.481 ( -0.11%) [ +0.00% +0.11% +0.00% / -0.11% -0.05% -0.02%] index_select strided 8 : Elapsed 0.055 ms (5.487 ms / 100) 5.485 -> 5.486 ( +0.02%) [ +0.00% +0.07% +0.02% / +0.02% +0.02% +0.04%] index_select random : Elapsed 0.055 ms (5.485 ms / 100) 5.484 -> 5.485 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.13% +0.09%] index_select random_sorted : Elapsed 0.055 ms (5.485 ms / 100) B = [40, 5, 4, 20] (stride (1, 40, 200, 800)) A = [40, 5, 4, 16] (stride (320, 64, 16, 1)) dim = 3 3.581 -> 3.581 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.45% +0.45%] index_add_ linear : Elapsed 0.036 ms (3.582 ms / 100) 3.434 -> 3.434 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.61% +0.64%] index_copy_ linear : Elapsed 0.034 ms (3.434 ms / 100) 3.577 -> 3.577 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.81% +0.81%] index_add_ reverse : Elapsed 0.036 ms (3.578 ms / 100) 3.438 -> 3.439 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.87% +0.70%] index_copy_ reverse : Elapsed 0.034 ms (3.438 ms / 100) 3.597 -> 3.596 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.56% +0.36%] index_add_ spread : Elapsed 0.036 ms (3.598 ms / 100) 3.452 -> 3.454 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +0.49% +0.35%] index_copy_ spread : Elapsed 0.035 ms (3.453 ms / 100) 3.600 -> 3.601 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.42% +0.39%] index_add_ strided 3 : Elapsed 0.036 ms (3.600 ms / 100) 3.439 -> 3.440 ( +0.03%) [ +0.00% +0.09% +0.03% / +0.03% +0.90% +0.81%] index_copy_ strided 3 : Elapsed 0.034 ms (3.439 ms / 100) 3.603 -> 3.603 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.22% +0.28%] index_add_ strided 7 : Elapsed 0.036 ms (3.604 ms / 100) 3.443 -> 3.443 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.64% +0.61%] index_copy_ strided 7 : Elapsed 0.034 ms (3.443 ms / 100) 3.600 -> 3.600 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.33% +0.28%] index_add_ perm : Elapsed 0.036 ms (3.601 ms / 100) 3.452 -> 3.454 ( +0.06%) [ +0.06% +0.14% +0.00% / +0.06% +0.38% +0.23%] index_copy_ perm : Elapsed 0.035 ms (3.454 ms / 100) 3.582 -> 3.581 ( -0.03%) [ +0.00% +0.06% +0.00% / -0.03% +0.70% +0.64%] index_add_ perm_sorted : Elapsed 0.036 ms (3.582 ms / 100) 3.443 -> 3.442 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.58% +0.55%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.443 ms / 100) 5.394 -> 5.399 ( +0.09%) [ +0.09% +0.00% +0.11% / +0.09% +0.19% +0.15%] index_select const : Elapsed 0.054 ms (5.399 ms / 100) 5.407 -> 5.404 ( -0.06%) [ +0.00% +0.04% +0.00% / +0.04% +0.00% -0.06%] index_select wrap : Elapsed 0.054 ms (5.407 ms / 100) 5.404 -> 5.403 ( -0.02%) [ +0.07% +0.00% +0.11% / +0.09% +0.07% -0.02%] index_select linear : Elapsed 0.054 ms (5.408 ms / 100) 5.407 -> 5.406 ( -0.02%) [ +0.04% +0.00% +0.06% / -0.02% +0.09% -0.02%] index_select reverse : Elapsed 0.054 ms (5.409 ms / 100) 5.402 -> 5.394 ( -0.15%) [ +0.04% +0.07% +0.00% / -0.15% +0.02% -0.09%] index_select skip64 : Elapsed 0.054 ms (5.404 ms / 100) 5.397 -> 5.400 ( +0.06%) [ +0.00% +0.07% +0.11% / +0.06% +0.15% +0.06%] index_select skip256 : Elapsed 0.054 ms (5.397 ms / 100) 5.410 -> 5.401 ( -0.17%) [ +0.02% +0.00% +0.00% / -0.17% +0.00% -0.15%] index_select spread : Elapsed 0.054 ms (5.411 ms / 100) 5.407 -> 5.403 ( -0.07%) [ +0.18% +0.00% +0.13% / +0.04% -0.07% +0.04%] index_select strided 3 : Elapsed 0.054 ms (5.417 ms / 100) 5.410 -> 5.402 ( -0.15%) [ +0.00% +0.02% +0.02% / -0.02% -0.15% -0.15%] index_select strided 5 : Elapsed 0.054 ms (5.410 ms / 100) 5.400 -> 5.407 ( +0.13%) [ +0.19% +0.06% +0.00% / +0.24% +0.13% +0.20%] index_select strided 7 : Elapsed 0.054 ms (5.410 ms / 100) 5.409 -> 5.407 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.02% -0.04% -0.04%] index_select strided 8 : Elapsed 0.054 ms (5.409 ms / 100) 5.404 -> 5.401 ( -0.06%) [ +0.00% +0.06% +0.04% / +0.04% +0.00% -0.06%] index_select random : Elapsed 0.054 ms (5.404 ms / 100) 5.407 -> 5.402 ( -0.09%) [ +0.04% +0.04% +0.00% / +0.04% -0.07% -0.09%] index_select random_sorted : Elapsed 0.054 ms (5.409 ms / 100) out_shape = [20, 5, 16, 4] in_shape = [40, 5, 16, 4] idx_dim = 0 B = [20, 5, 16, 4] (stride (4, 80, 400, 1)) A = [40, 5, 16, 4] (stride (320, 1, 5, 80)) dim = 0 2.444 -> 2.447 ( +0.12%) [ +0.04% +0.00% +0.00% / +0.12% +0.29% +0.25%] index_select const : Elapsed 0.024 ms (2.445 ms / 100) 2.455 -> 2.455 ( +0.00%) [ +0.08% +0.00% +0.24% / +0.20% +0.12% +0.00%] index_select wrap : Elapsed 0.025 ms (2.457 ms / 100) 2.454 -> 2.453 ( -0.04%) [ +0.04% +0.08% +0.00% / +0.08% +0.00% -0.04%] index_select linear : Elapsed 0.025 ms (2.455 ms / 100) 2.453 -> 2.452 ( -0.04%) [ +0.12% +0.04% +0.00% / +0.00% -0.04% +0.04%] index_select reverse : Elapsed 0.025 ms (2.456 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.00% +0.20% +0.29% / +0.16% -0.08% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.447 ms / 100) 2.445 -> 2.447 ( +0.08%) [ +0.20% +0.00% +0.12% / +0.08% +0.37% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.450 ms / 100) 2.453 -> 2.455 ( +0.08%) [ +0.00% +0.04% +0.00% / +0.08% +0.20% +0.24%] index_select spread : Elapsed 0.025 ms (2.453 ms / 100) 2.455 -> 2.451 ( -0.16%) [ +0.00% +0.12% +0.08% / +0.00% -0.04% -0.16%] index_select strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.449 -> 2.454 ( +0.20%) [ +0.00% +0.24% +0.20% / +0.20% +0.53% +0.24%] index_select strided 5 : Elapsed 0.024 ms (2.449 ms / 100) 2.451 -> 2.451 ( +0.00%) [ +0.12% +0.00% +0.04% / +0.12% +0.57% +0.00%] index_select strided 7 : Elapsed 0.025 ms (2.454 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.04% +0.04% +0.00% / +0.16% +0.29% +0.20%] index_select strided 8 : Elapsed 0.024 ms (2.447 ms / 100) 2.448 -> 2.450 ( +0.08%) [ +0.08% +0.12% +0.00% / +0.08% +0.20% +0.16%] index_select strided 16 : Elapsed 0.024 ms (2.450 ms / 100) 2.451 -> 2.453 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.16% +0.16% +0.08%] index_select random : Elapsed 0.025 ms (2.453 ms / 100) 2.450 -> 2.450 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.24% +0.00% +0.00%] index_select random_sorted : Elapsed 0.025 ms (2.450 ms / 100) 2.453 -> 2.453 ( +0.00%) [ +0.16% +0.00% +0.04% / +0.04% +0.41% +0.00%] index_select perm : Elapsed 0.025 ms (2.457 ms / 100) 2.458 -> 2.447 ( -0.45%) [ +0.00% +0.08% +0.00% / -0.08% -0.16% -0.45%] index_select perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) B = [20, 5, 16, 4] (stride (16, 320, 1, 1600)) A = [40, 5, 16, 4] (stride (320, 4, 20, 1)) dim = 0 2.445 -> 2.447 ( +0.08%) [ +0.25% +0.25% +0.00% / +0.12% +0.45% +0.08%] index_select const : Elapsed 0.025 ms (2.451 ms / 100) 2.458 -> 2.452 ( -0.24%) [ +0.00% +0.12% +0.00% / +0.12% -0.08% -0.24%] index_select wrap : Elapsed 0.025 ms (2.458 ms / 100) 2.457 -> 2.453 ( -0.16%) [ +0.08% +0.12% +0.00% / +0.08% +0.04% -0.16%] index_select linear : Elapsed 0.025 ms (2.459 ms / 100) 2.454 -> 2.454 ( +0.00%) [ +0.04% +0.04% +0.00% / +0.00% +0.33% +0.41%] index_select reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.445 -> 2.445 ( +0.00%) [ +0.08% +0.20% +0.00% / +0.16% +0.04% +0.00%] index_select skip64 : Elapsed 0.024 ms (2.447 ms / 100) 2.443 -> 2.450 ( +0.29%) [ +0.00% +0.20% +0.25% / +0.29% +0.78% +0.37%] index_select skip256 : Elapsed 0.024 ms (2.443 ms / 100) 2.455 -> 2.456 ( +0.04%) [ +0.12% +0.00% +0.08% / +0.04% +0.08% +0.08%] index_select spread : Elapsed 0.025 ms (2.458 ms / 100) 2.453 -> 2.458 ( +0.20%) [ +0.00% +0.04% +0.04% / +0.20% +0.29% +0.33%] index_select strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.00% +0.20% +0.20% / +0.16% +0.70% +0.37%] index_select strided 5 : Elapsed 0.024 ms (2.446 ms / 100) 2.453 -> 2.457 ( +0.16%) [ +0.08% +0.00% +0.12% / +0.29% +0.29% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.29% +0.16% +0.00% / +0.16% +0.20% -0.08%] index_select strided 8 : Elapsed 0.025 ms (2.454 ms / 100) 2.449 -> 2.445 ( -0.16%) [ +0.04% +0.04% +0.00% / -0.16% +0.08% +0.16%] index_select strided 16 : Elapsed 0.025 ms (2.450 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.00% +0.20% +0.00% / +0.04% +0.33% +0.20%] index_select random : Elapsed 0.025 ms (2.450 ms / 100) 2.452 -> 2.455 ( +0.12%) [ +0.00% +0.04% +0.08% / +0.16% +0.12% +0.12%] index_select random_sorted : Elapsed 0.025 ms (2.452 ms / 100) 2.454 -> 2.455 ( +0.04%) [ +0.00% +0.04% +0.08% / +0.20% +0.24% +0.04%] index_select perm : Elapsed 0.025 ms (2.454 ms / 100) 2.455 -> 2.457 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.24% +0.33% +0.08%] index_select perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) B = [20, 5, 16, 4] (stride (1, 320, 20, 1600)) A = [40, 5, 16, 4] (stride (320, 1, 20, 5)) dim = 0 1.459 -> 1.451 ( -0.55%) [ +0.00% +0.14% +0.00% / -0.55% -0.48% -0.48%] index_select const : Elapsed 0.015 ms (1.459 ms / 100) 1.472 -> 1.471 ( -0.07%) [ +0.00% +0.20% +0.00% / +0.07% +0.20% -0.07%] index_select wrap : Elapsed 0.015 ms (1.472 ms / 100) 1.474 -> 1.469 ( -0.34%) [ +0.00% +0.27% +0.00% / -0.34% -0.14% -0.27%] index_select linear : Elapsed 0.015 ms (1.474 ms / 100) 1.468 -> 1.468 ( +0.00%) [ +0.27% +0.20% +0.00% / +0.41% +0.20% +0.00%] index_select reverse : Elapsed 0.015 ms (1.472 ms / 100) 1.461 -> 1.447 ( -0.96%) [ +0.27% +0.00% +0.27% / -0.62% -0.62% -0.96%] index_select skip64 : Elapsed 0.015 ms (1.465 ms / 100) 1.460 -> 1.451 ( -0.62%) [ +0.00% +0.14% +0.07% / -0.62% -0.07% -0.62%] index_select skip256 : Elapsed 0.015 ms (1.460 ms / 100) 1.471 -> 1.472 ( +0.07%) [ +0.00% +0.07% +0.20% / +0.07% +0.61% +0.27%] index_select spread : Elapsed 0.015 ms (1.471 ms / 100) 1.470 -> 1.472 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.20% +0.48% +0.14%] index_select strided 3 : Elapsed 0.015 ms (1.472 ms / 100) 1.461 -> 1.458 ( -0.21%) [ +0.14% +0.27% +0.00% / -0.21% +0.21% -0.14%] index_select strided 5 : Elapsed 0.015 ms (1.463 ms / 100) 1.470 -> 1.474 ( +0.27%) [ +0.00% +0.00% +0.20% / +0.27% +0.95% +0.61%] index_select strided 7 : Elapsed 0.015 ms (1.470 ms / 100) 1.463 -> 1.457 ( -0.41%) [ +0.00% +0.07% +0.00% / -0.41% -0.41% -0.27%] index_select strided 8 : Elapsed 0.015 ms (1.463 ms / 100) 1.462 -> 1.455 ( -0.48%) [ +0.07% +0.21% +0.00% / -0.27% -0.48% -0.34%] index_select strided 16 : Elapsed 0.015 ms (1.463 ms / 100) 1.470 -> 1.464 ( -0.41%) [ +0.00% +0.00% +0.07% / -0.41% +0.14% +0.14%] index_select random : Elapsed 0.015 ms (1.470 ms / 100) 1.469 -> 1.466 ( -0.20%) [ +0.20% +0.07% +0.00% / -0.20% +0.00% +0.00%] index_select random_sorted : Elapsed 0.015 ms (1.472 ms / 100) 1.466 -> 1.471 ( +0.34%) [ +0.34% +0.00% +0.00% / +0.41% +0.48% +0.34%] index_select perm : Elapsed 0.015 ms (1.471 ms / 100) 1.468 -> 1.471 ( +0.20%) [ +0.00% +0.54% +0.20% / +0.54% +0.61% +0.20%] index_select perm_sorted : Elapsed 0.015 ms (1.468 ms / 100) out_shape = [40, 20, 16, 4] in_shape = [40, 5, 16, 4] idx_dim = 1 B = [40, 20, 16, 4] (stride (80, 4, 3200, 1)) A = [40, 5, 16, 4] (stride (16, 640, 1, 3200)) dim = 1 1.891 -> 1.895 ( +0.21%) [ +0.16% +0.00% +0.16% / +0.21% +0.53% +0.69%] index_add_ linear : Elapsed 0.019 ms (1.894 ms / 100) 1.839 -> 1.844 ( +0.27%) [ +0.00% +0.16% +0.22% / +0.27% +1.52% +1.20%] index_copy_ linear : Elapsed 0.018 ms (1.839 ms / 100) 1.890 -> 1.891 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.79% +1.06%] index_add_ reverse : Elapsed 0.019 ms (1.891 ms / 100) 1.844 -> 1.847 ( +0.16%) [ +0.00% +0.11% +0.05% / +0.16% +1.08% +1.14%] index_copy_ reverse : Elapsed 0.018 ms (1.844 ms / 100) 1.931 -> 1.935 ( +0.21%) [ +0.10% +0.00% +0.10% / +0.21% +0.98% +0.98%] index_add_ spread : Elapsed 0.019 ms (1.933 ms / 100) 1.889 -> 1.898 ( +0.48%) [ +0.26% +0.00% +0.16% / +0.48% +0.85% +0.74%] index_copy_ spread : Elapsed 0.019 ms (1.894 ms / 100) 1.930 -> 1.936 ( +0.31%) [ +0.21% +0.41% +0.00% / +0.31% +1.04% +1.14%] index_add_ strided 3 : Elapsed 0.019 ms (1.934 ms / 100) 1.882 -> 1.891 ( +0.48%) [ +0.00% +0.32% +0.32% / +0.48% +1.06% +1.01%] index_copy_ strided 3 : Elapsed 0.019 ms (1.882 ms / 100) 1.915 -> 1.917 ( +0.10%) [ +0.31% +0.26% +0.00% / +0.10% +1.10% +0.89%] index_add_ strided 7 : Elapsed 0.019 ms (1.921 ms / 100) 1.877 -> 1.872 ( -0.27%) [ +0.00% +0.05% +0.00% / -0.27% +0.64% +1.01%] index_copy_ strided 7 : Elapsed 0.019 ms (1.877 ms / 100) 1.917 -> 1.933 ( +0.83%) [ +0.00% +0.00% +0.16% / +0.83% +0.99% +0.94%] index_add_ perm : Elapsed 0.019 ms (1.917 ms / 100) 1.878 -> 1.880 ( +0.11%) [ +0.43% +0.21% +0.00% / +0.11% +1.12% +1.12%] index_copy_ perm : Elapsed 0.019 ms (1.886 ms / 100) 1.917 -> 1.917 ( +0.00%) [ +0.21% +0.10% +0.00% / +0.00% +0.94% +0.83%] index_add_ perm_sorted : Elapsed 0.019 ms (1.921 ms / 100) 1.884 -> 1.882 ( -0.11%) [ +0.00% +0.05% +0.00% / -0.11% +0.80% +0.64%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.884 ms / 100) 8.558 -> 8.557 ( -0.01%) [ +0.02% +0.07% +0.00% / +0.06% -0.01% +0.12%] index_select const : Elapsed 0.086 ms (8.560 ms / 100) 8.635 -> 8.617 ( -0.21%) [ +0.00% +0.03% +0.00% / +0.06% +0.01% -0.21%] index_select wrap : Elapsed 0.086 ms (8.635 ms / 100) 8.618 -> 8.600 ( -0.21%) [ +0.06% +0.24% +0.00% / +0.35% -0.21% -0.21%] index_select linear : Elapsed 0.086 ms (8.623 ms / 100) 8.611 -> 8.625 ( +0.16%) [ +0.15% +0.23% +0.00% / +0.20% +0.23% +0.16%] index_select reverse : Elapsed 0.086 ms (8.624 ms / 100) 8.553 -> 8.552 ( -0.01%) [ +0.48% +0.21% +0.00% / +0.23% +0.04% -0.01%] index_select skip64 : Elapsed 0.086 ms (8.594 ms / 100) 8.557 -> 8.550 ( -0.08%) [ +0.20% +0.00% +0.08% / +0.05% -0.08% -0.05%] index_select skip256 : Elapsed 0.086 ms (8.574 ms / 100) 8.604 -> 8.616 ( +0.14%) [ +0.19% +0.00% +0.26% / +0.14% +0.30% +0.19%] index_select spread : Elapsed 0.086 ms (8.620 ms / 100) 8.631 -> 8.626 ( -0.06%) [ +0.07% +0.00% +0.12% / +0.06% -0.02% -0.06%] index_select strided 3 : Elapsed 0.086 ms (8.637 ms / 100) 8.617 -> 8.625 ( +0.09%) [ +0.22% +0.20% +0.00% / +0.09% +0.34% +0.19%] index_select random : Elapsed 0.086 ms (8.636 ms / 100) 8.608 -> 8.614 ( +0.07%) [ +0.00% +0.10% +0.12% / +0.12% +0.23% +0.07%] index_select random_sorted : Elapsed 0.086 ms (8.608 ms / 100) B = [40, 20, 16, 4] (stride (80, 1, 3200, 20)) A = [40, 5, 16, 4] (stride (64, 2560, 1, 16)) dim = 1 1.895 -> 1.890 ( -0.26%) [ +0.26% +0.11% +0.00% / +0.21% -0.26% -0.05%] index_add_ linear : Elapsed 0.019 ms (1.900 ms / 100) 1.861 -> 1.856 ( -0.27%) [ +0.00% +0.11% +0.11% / -0.11% -0.21% -0.27%] index_copy_ linear : Elapsed 0.019 ms (1.861 ms / 100) 1.895 -> 1.898 ( +0.16%) [ +0.00% +0.32% +0.00% / +0.16% +0.26% +0.16%] index_add_ reverse : Elapsed 0.019 ms (1.895 ms / 100) 1.858 -> 1.857 ( -0.05%) [ +0.00% +0.16% +0.11% / +0.16% +0.22% -0.05%] index_copy_ reverse : Elapsed 0.019 ms (1.858 ms / 100) 1.910 -> 1.912 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.26% +0.10%] index_add_ spread : Elapsed 0.019 ms (1.910 ms / 100) 1.884 -> 1.883 ( -0.05%) [ +0.27% +0.00% +0.37% / +0.11% +0.00% -0.05%] index_copy_ spread : Elapsed 0.019 ms (1.889 ms / 100) 1.910 -> 1.907 ( -0.16%) [ +0.00% +0.00% +0.99% / +0.05% +0.10% -0.16%] index_add_ strided 3 : Elapsed 0.019 ms (1.910 ms / 100) 1.884 -> 1.878 ( -0.32%) [ +0.32% +0.00% +0.00% / +0.27% -0.27% -0.32%] index_copy_ strided 3 : Elapsed 0.019 ms (1.890 ms / 100) 1.907 -> 1.908 ( +0.05%) [ +0.16% +0.16% +0.00% / +0.16% +0.05% +0.31%] index_add_ strided 7 : Elapsed 0.019 ms (1.910 ms / 100) 1.885 -> 1.882 ( -0.16%) [ +0.27% +0.21% +0.00% / -0.05% -0.16% -0.16%] index_copy_ strided 7 : Elapsed 0.019 ms (1.890 ms / 100) 1.902 -> 1.907 ( +0.26%) [ +0.05% +0.00% +0.32% / +0.26% +0.32% +0.26%] index_add_ perm : Elapsed 0.019 ms (1.903 ms / 100) 1.874 -> 1.871 ( -0.16%) [ +0.05% +0.00% +0.27% / +0.53% +0.05% -0.16%] index_copy_ perm : Elapsed 0.019 ms (1.875 ms / 100) 1.904 -> 1.899 ( -0.26%) [ +0.16% +0.11% +0.00% / -0.26% +0.21% +0.26%] index_add_ perm_sorted : Elapsed 0.019 ms (1.907 ms / 100) 1.873 -> 1.875 ( +0.11%) [ +0.27% +0.43% +0.00% / +0.11% +0.16% +0.16%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.878 ms / 100) 8.589 -> 8.600 ( +0.13%) [ +0.00% +0.12% +0.08% / +0.13% +0.35% +0.28%] index_select const : Elapsed 0.086 ms (8.589 ms / 100) 8.637 -> 8.650 ( +0.15%) [ +0.10% +0.00% +0.01% / +0.15% +0.23% +0.19%] index_select wrap : Elapsed 0.086 ms (8.646 ms / 100) 8.621 -> 8.615 ( -0.07%) [ +0.09% +0.05% +0.00% / -0.07% +0.24% +0.36%] index_select linear : Elapsed 0.086 ms (8.629 ms / 100) 8.633 -> 8.638 ( +0.06%) [ +0.00% +0.02% +0.19% / +0.29% +0.07% +0.06%] index_select reverse : Elapsed 0.086 ms (8.633 ms / 100) 8.578 -> 8.581 ( +0.03%) [ +0.00% +0.31% +0.31% / +0.03% +0.29% +0.10%] index_select skip64 : Elapsed 0.086 ms (8.578 ms / 100) 8.585 -> 8.584 ( -0.01%) [ +0.16% +0.00% +0.29% / -0.01% +0.23% +0.26%] index_select skip256 : Elapsed 0.086 ms (8.599 ms / 100) 8.598 -> 8.620 ( +0.26%) [ +0.00% +0.14% +0.30% / +0.26% +0.43% +0.52%] index_select spread : Elapsed 0.086 ms (8.598 ms / 100) 8.618 -> 8.638 ( +0.23%) [ +0.13% +0.00% +0.29% / +0.23% +0.43% +0.39%] index_select strided 3 : Elapsed 0.086 ms (8.629 ms / 100) 8.627 -> 8.648 ( +0.24%) [ +0.00% +0.08% +0.05% / +0.28% +0.37% +0.24%] index_select random : Elapsed 0.086 ms (8.627 ms / 100) 8.619 -> 8.625 ( +0.07%) [ +0.03% +0.09% +0.00% / +0.07% +0.56% +0.30%] index_select random_sorted : Elapsed 0.086 ms (8.622 ms / 100) B = [40, 20, 16, 4] (stride (4, 160, 3200, 1)) A = [40, 5, 16, 4] (stride (4, 2560, 160, 1)) dim = 1 1.807 -> 1.815 ( +0.44%) [ +0.00% +0.06% +0.33% / +0.44% +3.54% +3.54%] index_add_ linear : Elapsed 0.018 ms (1.807 ms / 100) 1.760 -> 1.765 ( +0.28%) [ +0.11% +0.00% +0.23% / +0.28% +3.47% +3.69%] index_copy_ linear : Elapsed 0.018 ms (1.762 ms / 100) 1.807 -> 1.815 ( +0.44%) [ +0.00% +0.00% +0.39% / +0.44% +3.60% +3.71%] index_add_ reverse : Elapsed 0.018 ms (1.807 ms / 100) 1.759 -> 1.767 ( +0.45%) [ +0.17% +0.00% +0.40% / +0.45% +3.70% +3.98%] index_copy_ reverse : Elapsed 0.018 ms (1.762 ms / 100) 1.836 -> 1.837 ( +0.05%) [ +0.05% +0.00% +0.27% / +0.05% +1.58% +1.74%] index_add_ spread : Elapsed 0.018 ms (1.837 ms / 100) 1.789 -> 1.793 ( +0.22%) [ +0.22% +0.00% +0.11% / +0.22% +1.90% +1.96%] index_copy_ spread : Elapsed 0.018 ms (1.793 ms / 100) 1.829 -> 1.831 ( +0.11%) [ +0.00% +0.00% +0.33% / +0.11% +1.75% +1.64%] index_add_ strided 3 : Elapsed 0.018 ms (1.829 ms / 100) 1.782 -> 1.785 ( +0.17%) [ +0.00% +0.00% +0.28% / +0.17% +1.85% +2.02%] index_copy_ strided 3 : Elapsed 0.018 ms (1.782 ms / 100) 1.848 -> 1.849 ( +0.05%) [ +0.00% +0.16% +0.00% / +0.05% +0.54% +0.49%] index_add_ strided 7 : Elapsed 0.018 ms (1.848 ms / 100) 1.802 -> 1.806 ( +0.22%) [ +0.06% +0.00% +0.11% / +0.22% +0.50% +0.39%] index_copy_ strided 7 : Elapsed 0.018 ms (1.803 ms / 100) 1.837 -> 1.839 ( +0.11%) [ +0.00% +0.16% +0.16% / +0.11% +1.25% +1.09%] index_add_ perm : Elapsed 0.018 ms (1.837 ms / 100) 1.790 -> 1.791 ( +0.06%) [ +0.11% +0.00% +0.17% / +0.06% +1.28% +1.12%] index_copy_ perm : Elapsed 0.018 ms (1.792 ms / 100) 1.838 -> 1.840 ( +0.11%) [ +0.05% +0.11% +0.00% / +0.11% +0.87% +0.92%] index_add_ perm_sorted : Elapsed 0.018 ms (1.839 ms / 100) 1.793 -> 1.792 ( -0.06%) [ +0.00% +0.17% +0.00% / -0.06% +0.95% +0.95%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.793 ms / 100) 8.523 -> 8.542 ( +0.22%) [ +0.28% +0.00% +0.18% / +0.22% +0.33% +0.38%] index_select const : Elapsed 0.085 ms (8.547 ms / 100) 8.584 -> 8.560 ( -0.28%) [ +0.00% +0.08% +0.09% / -0.28% +0.31% -0.16%] index_select wrap : Elapsed 0.086 ms (8.584 ms / 100) 8.569 -> 8.580 ( +0.13%) [ +0.05% +0.04% +0.00% / +0.13% +0.20% +0.23%] index_select linear : Elapsed 0.086 ms (8.573 ms / 100) 8.551 -> 8.563 ( +0.14%) [ +0.07% +0.00% +0.15% / +0.14% +0.69% +0.20%] index_select reverse : Elapsed 0.086 ms (8.557 ms / 100) 8.526 -> 8.537 ( +0.13%) [ +0.04% +0.22% +0.00% / +0.13% +0.20% +0.25%] index_select skip64 : Elapsed 0.085 ms (8.529 ms / 100) 8.522 -> 8.523 ( +0.01%) [ +0.00% +0.12% +0.16% / +0.01% +0.33% +0.20%] index_select skip256 : Elapsed 0.085 ms (8.522 ms / 100) 8.563 -> 8.556 ( -0.08%) [ +0.14% +0.00% +0.22% / +0.32% -0.08% +0.15%] index_select spread : Elapsed 0.086 ms (8.575 ms / 100) 8.575 -> 8.576 ( +0.01%) [ +0.00% +0.05% +0.09% / +0.01% +0.02% +0.07%] index_select strided 3 : Elapsed 0.086 ms (8.575 ms / 100) 8.578 -> 8.560 ( -0.21%) [ +0.21% +0.00% +0.26% / -0.21% +0.20% +0.27%] index_select random : Elapsed 0.086 ms (8.596 ms / 100) 8.553 -> 8.570 ( +0.20%) [ +0.00% +0.29% +0.23% / +0.20% +0.57% +0.41%] index_select random_sorted : Elapsed 0.086 ms (8.553 ms / 100) B = [40, 20, 16, 4] (stride (1, 40, 800, 12800)) A = [40, 5, 16, 4] (stride (1, 2560, 160, 40)) dim = 1 1.823 -> 1.829 ( +0.33%) [ +0.22% +0.16% +0.00% / +0.33% +0.55% +0.77%] index_add_ linear : Elapsed 0.018 ms (1.827 ms / 100) 1.777 -> 1.777 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.45% +0.79%] index_copy_ linear : Elapsed 0.018 ms (1.778 ms / 100) 1.825 -> 1.827 ( +0.11%) [ +0.00% +0.11% +0.16% / +0.11% +0.49% +0.55%] index_add_ reverse : Elapsed 0.018 ms (1.825 ms / 100) 1.774 -> 1.776 ( +0.11%) [ +0.00% +0.23% +0.23% / +0.11% +0.56% +0.51%] index_copy_ reverse : Elapsed 0.018 ms (1.774 ms / 100) 1.845 -> 1.845 ( +0.00%) [ +0.11% +0.00% +0.16% / +0.00% +0.38% +0.27%] index_add_ spread : Elapsed 0.018 ms (1.847 ms / 100) 1.796 -> 1.800 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.33% +0.28%] index_copy_ spread : Elapsed 0.018 ms (1.796 ms / 100) 1.840 -> 1.843 ( +0.16%) [ +0.11% +0.22% +0.00% / +0.16% +0.65% +0.87%] index_add_ strided 3 : Elapsed 0.018 ms (1.842 ms / 100) 1.791 -> 1.792 ( +0.06%) [ +0.00% +0.22% +0.17% / +0.06% +0.56% +0.39%] index_copy_ strided 3 : Elapsed 0.018 ms (1.791 ms / 100) 1.833 -> 1.836 ( +0.16%) [ +0.22% +0.33% +0.00% / +0.16% +0.76% +0.60%] index_add_ strided 7 : Elapsed 0.018 ms (1.837 ms / 100) 1.783 -> 1.783 ( +0.00%) [ +0.22% +0.00% +0.06% / +0.00% +0.79% +0.50%] index_copy_ strided 7 : Elapsed 0.018 ms (1.787 ms / 100) 1.831 -> 1.835 ( +0.22%) [ +0.05% +0.33% +0.00% / +0.22% +0.82% +0.76%] index_add_ perm : Elapsed 0.018 ms (1.832 ms / 100) 1.781 -> 1.784 ( +0.17%) [ +0.00% +0.34% +0.06% / +0.17% +0.95% +0.95%] index_copy_ perm : Elapsed 0.018 ms (1.781 ms / 100) 1.831 -> 1.831 ( +0.00%) [ +0.00% +0.16% +0.05% / +0.00% +0.98% +0.93%] index_add_ perm_sorted : Elapsed 0.018 ms (1.831 ms / 100) 1.781 -> 1.783 ( +0.11%) [ +0.11% +0.17% +0.00% / +0.11% +1.07% +0.90%] index_copy_ perm_sorted : Elapsed 0.018 ms (1.783 ms / 100) 8.205 -> 8.219 ( +0.17%) [ +0.05% +0.10% +0.00% / +0.17% +0.22% +0.27%] index_select const : Elapsed 0.082 ms (8.209 ms / 100) 8.223 -> 8.242 ( +0.23%) [ +0.00% +0.21% +0.21% / +0.34% +0.23% +0.40%] index_select wrap : Elapsed 0.082 ms (8.223 ms / 100) 8.237 -> 8.230 ( -0.08%) [ +0.00% +0.04% +0.16% / -0.08% +0.28% +0.11%] index_select linear : Elapsed 0.082 ms (8.237 ms / 100) 8.226 -> 8.229 ( +0.04%) [ +0.00% +0.29% +0.13% / +0.04% +0.30% +0.33%] index_select reverse : Elapsed 0.082 ms (8.226 ms / 100) 8.198 -> 8.207 ( +0.11%) [ +0.34% +0.10% +0.00% / +0.11% +0.60% +0.32%] index_select skip64 : Elapsed 0.082 ms (8.226 ms / 100) 8.217 -> 8.209 ( -0.10%) [ +0.21% +0.24% +0.00% / -0.10% +0.10% +0.05%] index_select skip256 : Elapsed 0.082 ms (8.234 ms / 100) 8.232 -> 8.233 ( +0.01%) [ +0.26% +0.00% +0.11% / +0.01% +0.15% +0.52%] index_select spread : Elapsed 0.083 ms (8.253 ms / 100) 8.231 -> 8.235 ( +0.05%) [ +0.10% +0.47% +0.00% / +0.11% +0.24% +0.05%] index_select strided 3 : Elapsed 0.082 ms (8.239 ms / 100) 8.253 -> 8.242 ( -0.13%) [ +0.01% +0.00% +0.11% / -0.13% +0.15% -0.07%] index_select random : Elapsed 0.083 ms (8.254 ms / 100) 8.235 -> 8.234 ( -0.01%) [ +0.00% +0.09% +0.13% / -0.01% +0.19% +0.12%] index_select random_sorted : Elapsed 0.082 ms (8.235 ms / 100) out_shape = [40, 5, 20, 4] in_shape = [40, 5, 16, 4] idx_dim = 2 B = [40, 5, 20, 4] (stride (400, 1, 5, 100)) A = [40, 5, 16, 4] (stride (16, 640, 1, 3200)) dim = 2 4.404 -> 4.416 ( +0.27%) [ +0.00% +0.39% +0.05% / +0.27% +0.68% +0.79%] index_add_ linear : Elapsed 0.044 ms (4.404 ms / 100) 4.233 -> 4.245 ( +0.28%) [ +0.00% +0.28% +0.19% / +0.28% +0.94% +0.76%] index_copy_ linear : Elapsed 0.042 ms (4.233 ms / 100) 4.408 -> 4.406 ( -0.05%) [ +0.00% +0.00% +0.02% / -0.05% +0.61% +0.34%] index_add_ reverse : Elapsed 0.044 ms (4.408 ms / 100) 4.244 -> 4.249 ( +0.12%) [ +0.12% +0.05% +0.00% / +0.12% +0.71% +0.49%] index_copy_ reverse : Elapsed 0.042 ms (4.249 ms / 100) 4.406 -> 4.404 ( -0.05%) [ +0.05% +0.00% +0.14% / -0.05% +0.77% +0.75%] index_add_ spread : Elapsed 0.044 ms (4.408 ms / 100) 4.247 -> 4.247 ( +0.00%) [ +0.09% +0.00% +0.12% / +0.00% +0.87% +0.82%] index_copy_ spread : Elapsed 0.043 ms (4.251 ms / 100) 4.402 -> 4.410 ( +0.18%) [ +0.09% +0.00% +0.09% / +0.18% +0.82% +0.95%] index_add_ strided 3 : Elapsed 0.044 ms (4.406 ms / 100) 4.236 -> 4.244 ( +0.19%) [ +0.07% +0.00% +0.07% / +0.19% +0.92% +0.92%] index_copy_ strided 3 : Elapsed 0.042 ms (4.239 ms / 100) 4.406 -> 4.404 ( -0.05%) [ +0.11% +0.00% +0.14% / -0.05% +0.73% +0.79%] index_add_ strided 7 : Elapsed 0.044 ms (4.411 ms / 100) 4.240 -> 4.245 ( +0.12%) [ +0.00% +0.12% +0.17% / +0.12% +0.80% +0.75%] index_copy_ strided 7 : Elapsed 0.042 ms (4.240 ms / 100) 4.406 -> 4.410 ( +0.09%) [ +0.02% +0.00% +0.09% / +0.09% +0.79% +0.75%] index_add_ perm : Elapsed 0.044 ms (4.407 ms / 100) 4.249 -> 4.250 ( +0.02%) [ +0.00% +0.12% +0.09% / +0.02% +1.13% +0.61%] index_copy_ perm : Elapsed 0.042 ms (4.249 ms / 100) 4.400 -> 4.408 ( +0.18%) [ +0.30% +0.23% +0.00% / +0.18% +0.84% +0.80%] index_add_ perm_sorted : Elapsed 0.044 ms (4.413 ms / 100) 4.242 -> 4.246 ( +0.09%) [ +0.12% +0.17% +0.00% / +0.09% +0.83% +0.75%] index_copy_ perm_sorted : Elapsed 0.042 ms (4.247 ms / 100) 5.575 -> 5.569 ( -0.11%) [ +0.05% +0.00% +0.00% / +0.04% +0.02% -0.11%] index_select const : Elapsed 0.056 ms (5.578 ms / 100) 5.578 -> 5.579 ( +0.02%) [ +0.14% +0.00% +0.02% / +0.11% +0.02% +0.09%] index_select wrap : Elapsed 0.056 ms (5.586 ms / 100) 5.579 -> 5.576 ( -0.05%) [ +0.04% +0.07% +0.00% / +0.00% -0.05% -0.04%] index_select linear : Elapsed 0.056 ms (5.581 ms / 100) 5.582 -> 5.575 ( -0.13%) [ +0.14% +0.05% +0.00% / +0.07% -0.13% +0.05%] index_select reverse : Elapsed 0.056 ms (5.590 ms / 100) 5.570 -> 5.571 ( +0.02%) [ +0.11% +0.07% +0.00% / +0.16% +0.02% +0.07%] index_select skip64 : Elapsed 0.056 ms (5.576 ms / 100) 5.563 -> 5.574 ( +0.20%) [ +0.27% +0.11% +0.00% / +0.20% +0.27% +0.29%] index_select skip256 : Elapsed 0.056 ms (5.578 ms / 100) 5.580 -> 5.578 ( -0.04%) [ +0.00% +0.13% +0.11% / +0.11% +0.09% -0.04%] index_select spread : Elapsed 0.056 ms (5.580 ms / 100) 5.585 -> 5.580 ( -0.09%) [ +0.07% +0.00% +0.00% / +0.07% -0.09% +0.02%] index_select strided 3 : Elapsed 0.056 ms (5.589 ms / 100) 5.581 -> 5.580 ( -0.02%) [ +0.00% +0.05% +0.05% / +0.22% -0.02% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.581 ms / 100) 5.581 -> 5.580 ( -0.02%) [ +0.16% +0.00% +0.05% / +0.04% +0.04% -0.02%] index_select strided 7 : Elapsed 0.056 ms (5.590 ms / 100) 5.581 -> 5.580 ( -0.02%) [ +0.02% +0.04% +0.00% / -0.02% +0.04% -0.02%] index_select strided 8 : Elapsed 0.056 ms (5.582 ms / 100) 5.579 -> 5.579 ( +0.00%) [ +0.02% +0.00% +0.09% / +0.05% +0.00% +0.04%] index_select random : Elapsed 0.056 ms (5.580 ms / 100) 5.583 -> 5.579 ( -0.07%) [ +0.02% +0.07% +0.00% / +0.09% -0.05% -0.07%] index_select random_sorted : Elapsed 0.056 ms (5.584 ms / 100) B = [40, 5, 20, 4] (stride (1, 40, 800, 200)) A = [40, 5, 16, 4] (stride (16, 2560, 1, 640)) dim = 2 4.390 -> 4.397 ( +0.16%) [ +0.00% +0.09% +0.14% / +0.16% +0.96% +0.77%] index_add_ linear : Elapsed 0.044 ms (4.390 ms / 100) 4.230 -> 4.241 ( +0.26%) [ +0.14% +0.00% +0.14% / +0.26% +0.97% +0.80%] index_copy_ linear : Elapsed 0.042 ms (4.236 ms / 100) 4.393 -> 4.395 ( +0.05%) [ +0.00% +0.07% +0.00% / +0.05% +0.66% +0.66%] index_add_ reverse : Elapsed 0.044 ms (4.393 ms / 100) 4.236 -> 4.238 ( +0.05%) [ +0.05% +0.17% +0.00% / +0.05% +0.78% +0.71%] index_copy_ reverse : Elapsed 0.042 ms (4.238 ms / 100) 4.400 -> 4.399 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.68% +0.77%] index_add_ spread : Elapsed 0.044 ms (4.400 ms / 100) 4.243 -> 4.246 ( +0.07%) [ +0.00% +0.00% +0.26% / +0.07% +0.61% +0.61%] index_copy_ spread : Elapsed 0.042 ms (4.243 ms / 100) 4.402 -> 4.395 ( -0.16%) [ +0.02% +0.07% +0.00% / -0.16% +0.77% +0.55%] index_add_ strided 3 : Elapsed 0.044 ms (4.403 ms / 100) 4.237 -> 4.242 ( +0.12%) [ +0.00% +0.05% +0.07% / +0.12% +0.85% +0.68%] index_copy_ strided 3 : Elapsed 0.042 ms (4.237 ms / 100) 4.393 -> 4.391 ( -0.05%) [ +0.09% +0.00% +0.09% / -0.05% +0.68% +0.73%] index_add_ strided 7 : Elapsed 0.044 ms (4.397 ms / 100) 4.240 -> 4.236 ( -0.09%) [ +0.12% +0.07% +0.00% / -0.09% +0.54% +0.57%] index_copy_ strided 7 : Elapsed 0.042 ms (4.245 ms / 100) 4.401 -> 4.389 ( -0.27%) [ +0.05% +0.02% +0.00% / -0.27% +0.32% +0.61%] index_add_ perm : Elapsed 0.044 ms (4.403 ms / 100) 4.235 -> 4.236 ( +0.02%) [ +0.09% +0.07% +0.00% / +0.02% +0.54% +0.73%] index_copy_ perm : Elapsed 0.042 ms (4.239 ms / 100) 4.399 -> 4.408 ( +0.20%) [ +0.14% +0.00% +0.11% / +0.20% +0.75% +0.75%] index_add_ perm_sorted : Elapsed 0.044 ms (4.405 ms / 100) 4.244 -> 4.239 ( -0.12%) [ +0.02% +0.00% +0.02% / -0.12% +0.66% +0.54%] index_copy_ perm_sorted : Elapsed 0.042 ms (4.245 ms / 100) 5.578 -> 5.576 ( -0.04%) [ +0.07% +0.16% +0.00% / -0.04% +0.04% +0.02%] index_select const : Elapsed 0.056 ms (5.582 ms / 100) 5.584 -> 5.581 ( -0.05%) [ +0.05% +0.00% +0.13% / +0.04% +0.02% -0.05%] index_select wrap : Elapsed 0.056 ms (5.587 ms / 100) 5.580 -> 5.586 ( +0.11%) [ +0.04% +0.00% +0.14% / +0.11% +0.14% +0.16%] index_select linear : Elapsed 0.056 ms (5.582 ms / 100) 5.583 -> 5.581 ( -0.04%) [ +0.13% +0.00% +0.13% / -0.04% +0.29% +0.11%] index_select reverse : Elapsed 0.056 ms (5.590 ms / 100) 5.578 -> 5.574 ( -0.07%) [ +0.00% +0.00% +0.02% / -0.07% -0.02% +0.00%] index_select skip64 : Elapsed 0.056 ms (5.578 ms / 100) 5.573 -> 5.575 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.04% +0.05% +0.18%] index_select skip256 : Elapsed 0.056 ms (5.573 ms / 100) 5.583 -> 5.586 ( +0.05%) [ +0.00% +0.00% +0.07% / +0.05% +0.18% +0.16%] index_select spread : Elapsed 0.056 ms (5.583 ms / 100) 5.582 -> 5.588 ( +0.11%) [ +0.00% +0.05% +0.11% / +0.11% +0.20% +0.14%] index_select strided 3 : Elapsed 0.056 ms (5.582 ms / 100) 5.585 -> 5.584 ( -0.02%) [ +0.05% +0.00% +0.04% / -0.02% +0.20% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.588 ms / 100) 5.583 -> 5.585 ( +0.04%) [ +0.11% +0.13% +0.00% / +0.04% +0.14% +0.07%] index_select strided 7 : Elapsed 0.056 ms (5.589 ms / 100) 5.582 -> 5.587 ( +0.09%) [ +0.11% +0.04% +0.00% / +0.09% +0.25% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.588 ms / 100) 5.580 -> 5.588 ( +0.14%) [ +0.02% +0.00% +0.00% / +0.14% +0.27% +0.20%] index_select random : Elapsed 0.056 ms (5.581 ms / 100) 5.584 -> 5.583 ( -0.02%) [ +0.13% +0.00% +0.11% / -0.02% +0.13% +0.09%] index_select random_sorted : Elapsed 0.056 ms (5.591 ms / 100) out_shape = [40, 5, 16, 20] in_shape = [40, 5, 16, 4] idx_dim = 3 B = [40, 5, 16, 20] (stride (320, 12800, 20, 1)) A = [40, 5, 16, 4] (stride (1, 40, 200, 3200)) dim = 3 2.251 -> 2.250 ( -0.04%) [ +0.00% +0.13% +0.09% / -0.04% +0.31% +0.36%] index_add_ linear : Elapsed 0.023 ms (2.251 ms / 100) 2.198 -> 2.198 ( +0.00%) [ +0.18% +0.14% +0.00% / +0.00% +0.18% +0.14%] index_copy_ linear : Elapsed 0.022 ms (2.202 ms / 100) 2.251 -> 2.257 ( +0.27%) [ +0.00% +0.22% +0.09% / +0.27% +0.62% +0.53%] index_add_ reverse : Elapsed 0.023 ms (2.251 ms / 100) 2.199 -> 2.204 ( +0.23%) [ +0.00% +0.05% +0.00% / +0.23% +0.73% +0.41%] index_copy_ reverse : Elapsed 0.022 ms (2.199 ms / 100) 2.290 -> 2.293 ( +0.13%) [ +0.09% +0.26% +0.00% / +0.13% +0.44% +0.66%] index_add_ spread : Elapsed 0.023 ms (2.292 ms / 100) 2.297 -> 2.301 ( +0.17%) [ +0.00% +0.17% +0.04% / +0.17% +0.65% +0.65%] index_copy_ spread : Elapsed 0.023 ms (2.297 ms / 100) 2.279 -> 2.282 ( +0.13%) [ +0.18% +0.13% +0.00% / +0.13% +0.44% +0.48%] index_add_ strided 3 : Elapsed 0.023 ms (2.283 ms / 100) 2.262 -> 2.267 ( +0.22%) [ +0.35% +0.00% +0.13% / +0.22% +0.62% +0.62%] index_copy_ strided 3 : Elapsed 0.023 ms (2.270 ms / 100) 2.286 -> 2.296 ( +0.44%) [ +0.00% +0.26% +0.13% / +0.44% +0.44% +0.48%] index_add_ strided 7 : Elapsed 0.023 ms (2.286 ms / 100) 2.296 -> 2.301 ( +0.22%) [ +0.09% +0.39% +0.00% / +0.22% +0.52% +0.61%] index_copy_ strided 7 : Elapsed 0.023 ms (2.298 ms / 100) 2.291 -> 2.298 ( +0.31%) [ +0.17% +0.00% +0.09% / +0.39% +0.31% +0.39%] index_add_ perm : Elapsed 0.023 ms (2.295 ms / 100) 2.299 -> 2.299 ( +0.00%) [ +0.26% +0.13% +0.00% / +0.00% +0.52% +0.48%] index_copy_ perm : Elapsed 0.023 ms (2.305 ms / 100) 2.290 -> 2.293 ( +0.13%) [ +0.13% +0.09% +0.00% / +0.13% +0.31% +0.26%] index_add_ perm_sorted : Elapsed 0.023 ms (2.293 ms / 100) 2.294 -> 2.298 ( +0.17%) [ +0.35% +0.31% +0.00% / +0.17% +0.74% +0.74%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.302 ms / 100) 9.259 -> 9.258 ( -0.01%) [ +0.14% +0.01% +0.00% / -0.01% +0.06% +0.02%] index_select const : Elapsed 0.093 ms (9.272 ms / 100) 9.308 -> 9.327 ( +0.20%) [ +0.15% +0.00% +0.21% / +0.35% +0.31% +0.20%] index_select wrap : Elapsed 0.093 ms (9.322 ms / 100) 9.304 -> 9.301 ( -0.03%) [ +0.03% +0.12% +0.00% / +0.08% -0.03% +0.09%] index_select linear : Elapsed 0.093 ms (9.307 ms / 100) 9.304 -> 9.311 ( +0.08%) [ +0.03% +0.00% +0.19% / +0.34% +0.08% +0.10%] index_select reverse : Elapsed 0.093 ms (9.307 ms / 100) 9.258 -> 9.267 ( +0.10%) [ +0.06% +0.00% +0.13% / +0.21% +0.10% +0.30%] index_select skip64 : Elapsed 0.093 ms (9.264 ms / 100) 9.258 -> 9.260 ( +0.02%) [ +0.00% +0.01% +0.33% / +0.05% +0.16% +0.02%] index_select skip256 : Elapsed 0.093 ms (9.258 ms / 100) 9.309 -> 9.307 ( -0.02%) [ +0.06% +0.00% +0.19% / +0.20% +0.31% -0.02%] index_select spread : Elapsed 0.093 ms (9.315 ms / 100) 9.315 -> 9.318 ( +0.03%) [ +0.17% +0.03% +0.00% / +0.15% +0.15% +0.03%] index_select strided 3 : Elapsed 0.093 ms (9.331 ms / 100) 9.316 -> 9.330 ( +0.15%) [ +0.19% +0.00% +0.14% / +0.16% +0.20% +0.15%] index_select random : Elapsed 0.093 ms (9.334 ms / 100) 9.304 -> 9.311 ( +0.08%) [ +0.02% +0.17% +0.00% / +0.10% +0.13% +0.08%] index_select random_sorted : Elapsed 0.093 ms (9.306 ms / 100) B = [40, 5, 16, 20] (stride (1, 12800, 800, 40)) A = [40, 5, 16, 4] (stride (320, 1, 20, 5)) dim = 3 2.049 -> 2.046 ( -0.15%) [ +0.98% +0.00% +0.05% / -0.15% +0.68% +0.63%] index_add_ linear : Elapsed 0.021 ms (2.069 ms / 100) 1.997 -> 2.001 ( +0.20%) [ +0.25% +0.05% +0.00% / +0.20% +1.25% +0.85%] index_copy_ linear : Elapsed 0.020 ms (2.002 ms / 100) 2.048 -> 2.049 ( +0.05%) [ +0.00% +0.24% +0.34% / +0.05% +0.63% +0.49%] index_add_ reverse : Elapsed 0.020 ms (2.048 ms / 100) 1.997 -> 2.002 ( +0.25%) [ +0.00% +0.40% +0.65% / +0.25% +0.85% +0.90%] index_copy_ reverse : Elapsed 0.020 ms (1.997 ms / 100) 2.042 -> 2.045 ( +0.15%) [ +0.00% +0.39% +0.05% / +0.15% +1.32% +0.93%] index_add_ spread : Elapsed 0.020 ms (2.042 ms / 100) 1.995 -> 1.999 ( +0.20%) [ +0.05% +0.60% +0.00% / +0.20% +1.05% +1.10%] index_copy_ spread : Elapsed 0.020 ms (1.996 ms / 100) 2.044 -> 2.047 ( +0.15%) [ +0.24% +0.54% +0.00% / +0.15% +1.13% +0.93%] index_add_ strided 3 : Elapsed 0.020 ms (2.049 ms / 100) 1.999 -> 1.998 ( -0.05%) [ +0.00% +0.15% +0.00% / -0.05% +1.10% +0.95%] index_copy_ strided 3 : Elapsed 0.020 ms (1.999 ms / 100) 2.045 -> 2.047 ( +0.10%) [ +0.34% +0.54% +0.00% / +0.10% +1.08% +0.88%] index_add_ strided 7 : Elapsed 0.021 ms (2.052 ms / 100) 1.999 -> 2.005 ( +0.30%) [ +0.15% +0.40% +0.00% / +0.30% +1.25% +1.20%] index_copy_ strided 7 : Elapsed 0.020 ms (2.002 ms / 100) 2.051 -> 2.058 ( +0.34%) [ +0.44% +0.10% +0.00% / +0.34% +1.12% +0.93%] index_add_ perm : Elapsed 0.021 ms (2.060 ms / 100) 2.002 -> 2.007 ( +0.25%) [ +0.35% +0.15% +0.00% / +0.25% +1.35% +1.05%] index_copy_ perm : Elapsed 0.020 ms (2.009 ms / 100) 2.048 -> 2.052 ( +0.20%) [ +0.00% +0.24% +0.29% / +0.20% +0.83% +0.83%] index_add_ perm_sorted : Elapsed 0.020 ms (2.048 ms / 100) 2.001 -> 2.006 ( +0.25%) [ +0.00% +0.15% +0.20% / +0.25% +0.85% +0.70%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.001 ms / 100) 8.758 -> 8.739 ( -0.22%) [ +0.00% +0.15% +0.07% / -0.06% -0.06% -0.22%] index_select const : Elapsed 0.088 ms (8.758 ms / 100) 8.767 -> 8.754 ( -0.15%) [ +0.09% +0.00% +0.07% / +0.05% -0.15% -0.02%] index_select wrap : Elapsed 0.088 ms (8.775 ms / 100) 8.764 -> 8.757 ( -0.08%) [ +0.18% +0.00% +0.23% / +0.08% +0.02% -0.08%] index_select linear : Elapsed 0.088 ms (8.780 ms / 100) 8.758 -> 8.748 ( -0.11%) [ +0.08% +0.02% +0.00% / +0.21% +0.09% -0.11%] index_select reverse : Elapsed 0.088 ms (8.765 ms / 100) 8.751 -> 8.746 ( -0.06%) [ +0.21% +0.00% +0.03% / +0.18% -0.06% -0.02%] index_select skip64 : Elapsed 0.088 ms (8.769 ms / 100) 8.761 -> 8.750 ( -0.13%) [ +0.02% +0.02% +0.00% / -0.13% -0.11% -0.02%] index_select skip256 : Elapsed 0.088 ms (8.763 ms / 100) 8.757 -> 8.747 ( -0.11%) [ +0.15% +0.07% +0.00% / +0.24% -0.09% -0.11%] index_select spread : Elapsed 0.088 ms (8.770 ms / 100) 8.765 -> 8.764 ( -0.01%) [ +0.06% +0.00% +0.09% / -0.01% +0.06% +0.11%] index_select strided 3 : Elapsed 0.088 ms (8.770 ms / 100) 8.762 -> 8.770 ( +0.09%) [ +0.00% +0.22% +0.07% / +0.18% +0.09% +0.11%] index_select random : Elapsed 0.088 ms (8.762 ms / 100) 8.767 -> 8.755 ( -0.14%) [ +0.10% +0.00% +0.11% / +0.15% +0.22% -0.14%] index_select random_sorted : Elapsed 0.088 ms (8.776 ms / 100) B = [40, 5, 16, 20] (stride (100, 20, 4000, 1)) A = [40, 5, 16, 4] (stride (320, 64, 4, 1)) dim = 3 1.813 -> 1.817 ( +0.22%) [ +0.28% +0.44% +0.00% / +0.39% +0.28% +0.22%] index_add_ linear : Elapsed 0.018 ms (1.818 ms / 100) 1.784 -> 1.788 ( +0.22%) [ +0.39% +0.17% +0.00% / +0.22% +0.22% +0.28%] index_copy_ linear : Elapsed 0.018 ms (1.791 ms / 100) 1.799 -> 1.796 ( -0.17%) [ +0.00% +0.22% +0.00% / -0.17% +0.94% +0.78%] index_add_ reverse : Elapsed 0.018 ms (1.799 ms / 100) 1.769 -> 1.771 ( +0.11%) [ +0.00% +0.34% +0.28% / +0.11% +1.02% +0.73%] index_copy_ reverse : Elapsed 0.018 ms (1.769 ms / 100) 1.837 -> 1.838 ( +0.05%) [ +0.11% +0.38% +0.00% / +0.05% +0.60% +0.71%] index_add_ spread : Elapsed 0.018 ms (1.839 ms / 100) 1.873 -> 1.878 ( +0.27%) [ +0.05% +0.00% +0.05% / +0.27% +1.33% +1.07%] index_copy_ spread : Elapsed 0.019 ms (1.874 ms / 100) 1.848 -> 1.852 ( +0.22%) [ +0.05% +0.00% +0.05% / +0.22% +0.27% +0.43%] index_add_ strided 3 : Elapsed 0.018 ms (1.849 ms / 100) 1.851 -> 1.854 ( +0.16%) [ +0.22% +0.00% +0.16% / +0.16% +0.43% +0.76%] index_copy_ strided 3 : Elapsed 0.019 ms (1.855 ms / 100) 1.845 -> 1.846 ( +0.05%) [ +0.00% +0.11% +0.05% / +0.05% +0.76% +0.70%] index_add_ strided 7 : Elapsed 0.018 ms (1.845 ms / 100) 1.878 -> 1.882 ( +0.21%) [ +0.00% +0.37% +0.21% / +0.21% +1.06% +1.06%] index_copy_ strided 7 : Elapsed 0.019 ms (1.878 ms / 100) 1.855 -> 1.854 ( -0.05%) [ +0.22% +0.22% +0.00% / -0.05% +0.43% +0.54%] index_add_ perm : Elapsed 0.019 ms (1.859 ms / 100) 1.884 -> 1.888 ( +0.21%) [ +0.21% +0.27% +0.00% / +0.21% +0.74% +0.53%] index_copy_ perm : Elapsed 0.019 ms (1.888 ms / 100) 1.846 -> 1.850 ( +0.22%) [ +0.11% +0.00% +0.22% / +0.22% +0.49% +0.60%] index_add_ perm_sorted : Elapsed 0.018 ms (1.848 ms / 100) 1.879 -> 1.885 ( +0.32%) [ +0.21% +0.00% +0.21% / +0.32% +1.22% +1.01%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.883 ms / 100) 8.004 -> 8.012 ( +0.10%) [ +0.00% +0.20% +0.05% / +0.12% +0.11% +0.10%] index_select const : Elapsed 0.080 ms (8.004 ms / 100) 8.006 -> 8.025 ( +0.24%) [ +0.09% +0.00% +0.00% / +0.30% +0.35% +0.24%] index_select wrap : Elapsed 0.080 ms (8.013 ms / 100) 8.008 -> 8.016 ( +0.10%) [ +0.19% +0.00% +0.06% / +0.19% +0.22% +0.10%] index_select linear : Elapsed 0.080 ms (8.023 ms / 100) 8.008 -> 8.015 ( +0.09%) [ +0.05% +0.01% +0.00% / +0.09% +0.51% +0.14%] index_select reverse : Elapsed 0.080 ms (8.012 ms / 100) 8.013 -> 8.008 ( -0.06%) [ +0.16% +0.00% +0.00% / -0.06% +0.32% +0.02%] index_select skip64 : Elapsed 0.080 ms (8.026 ms / 100) 8.013 -> 8.022 ( +0.11%) [ +0.09% +0.12% +0.00% / +0.11% +0.35% +0.20%] index_select skip256 : Elapsed 0.080 ms (8.020 ms / 100) 8.007 -> 8.013 ( +0.07%) [ +0.12% +0.10% +0.00% / +0.16% +0.27% +0.07%] index_select spread : Elapsed 0.080 ms (8.017 ms / 100) 8.015 -> 8.014 ( -0.01%) [ +0.07% +0.01% +0.00% / -0.01% +0.06% +0.20%] index_select strided 3 : Elapsed 0.080 ms (8.021 ms / 100) 8.010 -> 8.010 ( +0.00%) [ +0.01% +0.00% +0.04% / +0.26% +0.00% +0.35%] index_select random : Elapsed 0.080 ms (8.011 ms / 100) 8.009 -> 8.014 ( +0.06%) [ +0.06% +0.01% +0.00% / +0.06% +0.14% +0.17%] index_select random_sorted : Elapsed 0.080 ms (8.014 ms / 100) B = [40, 5, 16, 20] (stride (100, 1, 4000, 5)) A = [40, 5, 16, 4] (stride (320, 4, 20, 1)) dim = 3 2.162 -> 2.160 ( -0.09%) [ +0.00% +0.23% +0.23% / -0.09% -0.05% +0.56%] index_add_ linear : Elapsed 0.022 ms (2.162 ms / 100) 2.118 -> 2.115 ( -0.14%) [ +0.05% +0.33% +0.00% / +0.14% -0.14% +0.52%] index_copy_ linear : Elapsed 0.021 ms (2.119 ms / 100) 2.156 -> 2.157 ( +0.05%) [ +0.00% +0.19% +0.28% / +0.05% +0.32% +0.46%] index_add_ reverse : Elapsed 0.022 ms (2.156 ms / 100) 2.111 -> 2.117 ( +0.28%) [ +0.05% +0.00% +0.33% / +0.28% +0.38% +0.76%] index_copy_ reverse : Elapsed 0.021 ms (2.112 ms / 100) 2.135 -> 2.139 ( +0.19%) [ +0.89% +0.00% +0.61% / +0.19% +0.56% +1.03%] index_add_ spread : Elapsed 0.022 ms (2.154 ms / 100) 2.114 -> 2.115 ( +0.05%) [ +0.85% +0.00% +0.57% / +0.05% +0.85% +0.95%] index_copy_ spread : Elapsed 0.021 ms (2.132 ms / 100) 2.173 -> 2.173 ( +0.00%) [ +0.00% +0.05% +0.09% / +0.00% +0.23% +0.41%] index_add_ strided 3 : Elapsed 0.022 ms (2.173 ms / 100) 2.143 -> 2.144 ( +0.05%) [ +0.05% +0.37% +0.00% / +0.05% +0.61% +0.28%] index_copy_ strided 3 : Elapsed 0.021 ms (2.144 ms / 100) 2.142 -> 2.142 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.14% +0.37% +0.00%] index_add_ strided 7 : Elapsed 0.021 ms (2.142 ms / 100) 2.110 -> 2.117 ( +0.33%) [ +0.28% +0.38% +0.00% / +0.33% +0.71% +0.43%] index_copy_ strided 7 : Elapsed 0.021 ms (2.116 ms / 100) 2.135 -> 2.147 ( +0.56%) [ +0.00% +0.84% +0.28% / +0.94% +0.56% +0.66%] index_add_ perm : Elapsed 0.021 ms (2.135 ms / 100) 2.113 -> 2.124 ( +0.52%) [ +0.00% +0.71% +0.47% / +0.80% +0.52% +0.71%] index_copy_ perm : Elapsed 0.021 ms (2.113 ms / 100) 2.145 -> 2.137 ( -0.37%) [ +0.05% +0.00% +0.23% / +0.28% +0.00% -0.37%] index_add_ perm_sorted : Elapsed 0.021 ms (2.146 ms / 100) 2.125 -> 2.113 ( -0.56%) [ +0.14% +0.14% +0.00% / +0.05% +0.33% -0.56%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.128 ms / 100) 9.298 -> 9.301 ( +0.03%) [ +0.24% +0.03% +0.00% / +0.03% +0.09% +0.33%] index_select const : Elapsed 0.093 ms (9.320 ms / 100) 9.292 -> 9.296 ( +0.04%) [ +0.00% +0.34% +0.23% / +0.04% +0.24% +0.06%] index_select wrap : Elapsed 0.093 ms (9.292 ms / 100) 9.295 -> 9.294 ( -0.01%) [ +0.05% +0.20% +0.00% / +0.11% +0.17% -0.01%] index_select linear : Elapsed 0.093 ms (9.300 ms / 100) 9.296 -> 9.289 ( -0.08%) [ +0.01% +0.01% +0.00% / -0.08% +0.02% -0.01%] index_select reverse : Elapsed 0.093 ms (9.297 ms / 100) 9.282 -> 9.297 ( +0.16%) [ +0.05% +0.30% +0.00% / +0.23% +0.23% +0.16%] index_select skip64 : Elapsed 0.093 ms (9.287 ms / 100) 9.290 -> 9.298 ( +0.09%) [ +0.00% +0.02% +0.06% / +0.29% +0.16% +0.09%] index_select skip256 : Elapsed 0.093 ms (9.290 ms / 100) 9.293 -> 9.293 ( +0.00%) [ +0.11% +0.00% +0.15% / +0.24% +0.02% +0.00%] index_select spread : Elapsed 0.093 ms (9.303 ms / 100) 9.288 -> 9.283 ( -0.05%) [ +0.00% +0.25% +0.22% / +0.01% +0.20% -0.05%] index_select strided 3 : Elapsed 0.093 ms (9.288 ms / 100) 9.293 -> 9.298 ( +0.05%) [ +0.01% +0.00% +0.05% / +0.32% +0.05% +0.06%] index_select random : Elapsed 0.093 ms (9.294 ms / 100) 9.290 -> 9.297 ( +0.08%) [ +0.00% +0.09% +0.16% / +0.11% +0.31% +0.08%] index_select random_sorted : Elapsed 0.093 ms (9.290 ms / 100) B = [40, 5, 16, 20] (stride (1, 640, 40, 3200)) A = [40, 5, 16, 4] (stride (4, 160, 800, 1)) dim = 3 2.138 -> 2.136 ( -0.09%) [ +0.09% +0.05% +0.00% / +0.00% +0.05% -0.09%] index_add_ linear : Elapsed 0.021 ms (2.140 ms / 100) 2.089 -> 2.090 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.10% +0.05% +0.10%] index_copy_ linear : Elapsed 0.021 ms (2.090 ms / 100) 2.131 -> 2.130 ( -0.05%) [ +0.05% +0.09% +0.00% / -0.05% +0.38% +0.28%] index_add_ reverse : Elapsed 0.021 ms (2.132 ms / 100) 2.087 -> 2.084 ( -0.14%) [ +0.00% +0.00% +0.05% / -0.14% +0.43% +0.14%] index_copy_ reverse : Elapsed 0.021 ms (2.087 ms / 100) 2.136 -> 2.134 ( -0.09%) [ +0.00% +0.14% +0.00% / -0.09% +0.14% +0.23%] index_add_ spread : Elapsed 0.021 ms (2.136 ms / 100) 2.088 -> 2.089 ( +0.05%) [ +0.00% +0.19% +0.10% / +0.05% +0.10% +0.10%] index_copy_ spread : Elapsed 0.021 ms (2.088 ms / 100) 2.140 -> 2.138 ( -0.09%) [ +0.05% +0.19% +0.00% / +0.05% -0.09% -0.05%] index_add_ strided 3 : Elapsed 0.021 ms (2.141 ms / 100) 2.090 -> 2.090 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.10% +0.14% +0.00%] index_copy_ strided 3 : Elapsed 0.021 ms (2.092 ms / 100) 2.130 -> 2.128 ( -0.09%) [ +0.00% +0.00% +0.05% / -0.09% +0.19% +0.28%] index_add_ strided 7 : Elapsed 0.021 ms (2.130 ms / 100) 2.085 -> 2.084 ( -0.05%) [ +0.05% +0.00% +0.10% / -0.05% +0.29% +0.19%] index_copy_ strided 7 : Elapsed 0.021 ms (2.086 ms / 100) 2.132 -> 2.130 ( -0.09%) [ +0.00% +0.05% +0.00% / -0.09% +0.28% +0.38%] index_add_ perm : Elapsed 0.021 ms (2.132 ms / 100) 2.087 -> 2.089 ( +0.10%) [ +0.00% +0.00% +0.05% / +0.10% +0.19% +0.14%] index_copy_ perm : Elapsed 0.021 ms (2.087 ms / 100) 2.130 -> 2.129 ( -0.05%) [ +0.14% +0.00% +0.05% / -0.05% +0.33% +0.42%] index_add_ perm_sorted : Elapsed 0.021 ms (2.133 ms / 100) 2.086 -> 2.086 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.10% +0.24%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.086 ms / 100) 8.756 -> 8.764 ( +0.09%) [ +0.00% +0.37% +0.21% / +0.15% +0.24% +0.09%] index_select const : Elapsed 0.088 ms (8.756 ms / 100) 8.755 -> 8.770 ( +0.17%) [ +0.00% +0.06% +0.11% / +0.23% +0.17% +0.30%] index_select wrap : Elapsed 0.088 ms (8.755 ms / 100) 8.756 -> 8.768 ( +0.14%) [ +0.15% +0.00% +0.08% / +0.24% +0.14% +0.25%] index_select linear : Elapsed 0.088 ms (8.769 ms / 100) 8.757 -> 8.755 ( -0.02%) [ +0.00% +0.19% +0.23% / +0.00% -0.02% +0.10%] index_select reverse : Elapsed 0.088 ms (8.757 ms / 100) 8.758 -> 8.766 ( +0.09%) [ +0.00% +0.00% +0.02% / +0.15% +0.45% +0.09%] index_select skip64 : Elapsed 0.088 ms (8.758 ms / 100) 8.744 -> 8.760 ( +0.18%) [ +0.17% +0.00% +0.30% / +0.47% +0.18% +0.38%] index_select skip256 : Elapsed 0.088 ms (8.759 ms / 100) 8.765 -> 8.771 ( +0.07%) [ +0.10% +0.00% +0.08% / +0.07% +0.13% +0.25%] index_select spread : Elapsed 0.088 ms (8.774 ms / 100) 8.759 -> 8.761 ( +0.02%) [ +0.05% +0.00% +0.26% / +0.38% +0.02% +0.14%] index_select strided 3 : Elapsed 0.088 ms (8.763 ms / 100) 8.763 -> 8.753 ( -0.11%) [ +0.09% +0.09% +0.00% / -0.11% +0.29% +0.09%] index_select random : Elapsed 0.088 ms (8.771 ms / 100) 8.761 -> 8.758 ( -0.03%) [ +0.06% +0.16% +0.00% / -0.03% +0.18% +0.01%] index_select random_sorted : Elapsed 0.088 ms (8.766 ms / 100) B = [40, 5, 16, 20] (stride (1, 640, 40, 3200)) A = [40, 5, 16, 4] (stride (1, 640, 40, 3200)) dim = 3 0.780 -> 0.788 ( +1.03%) [ +0.00% +0.90% +2.05% / +1.28% +1.03% +2.05%] index_add_ linear : Elapsed 0.008 ms (0.780 ms / 100) 0.799 -> 0.805 ( +0.75%) [ +0.13% +0.13% +0.00% / +0.75% +1.38% +1.75%] index_copy_ linear : Elapsed 0.008 ms (0.800 ms / 100) 0.791 -> 0.795 ( +0.51%) [ +0.00% +0.88% +0.00% / +0.51% +0.76% +1.01%] index_add_ reverse : Elapsed 0.008 ms (0.791 ms / 100) 0.809 -> 0.808 ( -0.12%) [ +0.00% +0.12% +0.12% / -0.12% +0.74% +0.37%] index_copy_ reverse : Elapsed 0.008 ms (0.809 ms / 100) 0.797 -> 0.785 ( -1.51%) [ +0.00% +0.13% +0.13% / +0.38% -1.51% -0.75%] index_add_ spread : Elapsed 0.008 ms (0.797 ms / 100) 0.815 -> 0.805 ( -1.23%) [ +0.61% +0.00% +0.37% / +0.98% -1.23% -0.86%] index_copy_ spread : Elapsed 0.008 ms (0.820 ms / 100) 0.792 -> 0.781 ( -1.39%) [ +1.01% +0.13% +0.00% / +0.51% -1.39% -0.63%] index_add_ strided 3 : Elapsed 0.008 ms (0.800 ms / 100) 0.807 -> 0.800 ( -0.87%) [ +0.12% +0.12% +0.00% / +0.00% -0.62% -0.87%] index_copy_ strided 3 : Elapsed 0.008 ms (0.808 ms / 100) 0.786 -> 0.788 ( +0.25%) [ +0.00% +0.00% +0.76% / +0.25% +1.65% +1.27%] index_add_ strided 7 : Elapsed 0.008 ms (0.786 ms / 100) 0.803 -> 0.804 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +1.25% +1.25%] index_copy_ strided 7 : Elapsed 0.008 ms (0.803 ms / 100) 0.782 -> 0.785 ( +0.38%) [ +0.77% +0.26% +0.00% / +0.38% +2.56% +2.05%] index_add_ perm : Elapsed 0.008 ms (0.788 ms / 100) 0.801 -> 0.802 ( +0.12%) [ +0.00% +0.50% +0.37% / +0.12% +4.99% +2.87%] index_copy_ perm : Elapsed 0.008 ms (0.801 ms / 100) 0.783 -> 0.788 ( +0.64%) [ +1.92% +0.64% +0.00% / +0.64% +2.04% +2.04%] index_add_ perm_sorted : Elapsed 0.008 ms (0.798 ms / 100) 0.804 -> 0.802 ( -0.25%) [ +0.00% +0.12% +0.12% / -0.25% +2.36% +1.74%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.804 ms / 100) 5.098 -> 5.105 ( +0.14%) [ +0.26% +0.00% +0.27% / +0.35% +0.14% +0.18%] index_select const : Elapsed 0.051 ms (5.111 ms / 100) 5.136 -> 5.130 ( -0.12%) [ +0.16% +0.16% +0.00% / -0.04% +0.12% -0.12%] index_select wrap : Elapsed 0.051 ms (5.144 ms / 100) 5.145 -> 5.148 ( +0.06%) [ +0.10% +0.00% +0.12% / +0.17% +0.27% +0.06%] index_select linear : Elapsed 0.052 ms (5.150 ms / 100) 5.107 -> 5.103 ( -0.08%) [ +0.00% +0.08% +0.14% / -0.08% +0.35% +0.41%] index_select reverse : Elapsed 0.051 ms (5.107 ms / 100) 5.104 -> 5.102 ( -0.04%) [ +0.06% +0.00% +0.08% / +0.08% +0.10% -0.04%] index_select skip64 : Elapsed 0.051 ms (5.107 ms / 100) 5.098 -> 5.104 ( +0.12%) [ +0.20% +0.00% +0.33% / +0.18% +0.12% +0.18%] index_select skip256 : Elapsed 0.051 ms (5.108 ms / 100) 5.124 -> 5.126 ( +0.04%) [ +0.04% +0.06% +0.00% / +0.04% +0.31% +0.33%] index_select spread : Elapsed 0.051 ms (5.126 ms / 100) 5.129 -> 5.134 ( +0.10%) [ +0.18% +0.00% +0.16% / +0.10% +0.12% +0.35%] index_select strided 3 : Elapsed 0.051 ms (5.138 ms / 100) 5.132 -> 5.116 ( -0.31%) [ +0.12% +0.00% +0.14% / -0.31% +0.18% +0.10%] index_select random : Elapsed 0.051 ms (5.138 ms / 100) 5.116 -> 5.131 ( +0.29%) [ +0.31% +0.00% +0.22% / +0.29% +0.35% +0.41%] index_select random_sorted : Elapsed 0.051 ms (5.132 ms / 100) B = [40, 5, 16, 20] (stride (5, 1, 200, 3200)) dim = 3 fill_cnt = 4 0.557 -> 0.558 ( +0.18%) [ +0.18% +0.36% +0.00% / +0.18% +0.72% +0.72%] index_fill_ const : Elapsed 0.006 ms (0.558 ms / 100) 0.555 -> 0.556 ( +0.18%) [ +0.00% +0.00% +0.18% / +0.18% +0.72% +0.54%] index_fill_ linear : Elapsed 0.006 ms (0.555 ms / 100) 0.550 -> 0.551 ( +0.18%) [ +0.36% +0.18% +0.00% / +0.18% +0.91% +0.91%] index_fill_ reverse : Elapsed 0.006 ms (0.552 ms / 100) 0.555 -> 0.556 ( +0.18%) [ +0.36% +0.18% +0.00% / +0.18% +0.36% +0.36%] index_fill_ skip64 : Elapsed 0.006 ms (0.557 ms / 100) 0.556 -> 0.556 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.18% +0.36%] index_fill_ skip256 : Elapsed 0.006 ms (0.556 ms / 100) 0.556 -> 0.556 ( +0.00%) [ +0.00% +0.36% +0.18% / +0.00% +2.70% +0.00%] index_fill_ spread : Elapsed 0.006 ms (0.556 ms / 100) 0.558 -> 0.557 ( -0.18%) [ +0.00% +0.36% +1.25% / +0.18% -0.18% +0.00%] index_fill_ strided 3 : Elapsed 0.006 ms (0.558 ms / 100) 0.557 -> 0.556 ( -0.18%) [ +0.36% +0.36% +0.00% / +0.36% -0.18% +0.00%] index_fill_ strided 5 : Elapsed 0.006 ms (0.559 ms / 100) 0.565 -> 0.560 ( -0.88%) [ +0.00% +0.18% +0.18% / +0.00% -0.88% -0.71%] index_fill_ strided 7 : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.562 ( +0.18%) [ +0.18% +0.36% +0.00% / +0.18% +0.36% +0.18%] index_fill_ strided 8 : Elapsed 0.006 ms (0.562 ms / 100) 0.557 -> 0.558 ( +0.18%) [ +0.00% +0.18% +0.00% / +0.18% +0.54% +0.72%] index_fill_ strided 16 : Elapsed 0.006 ms (0.557 ms / 100) 0.554 -> 0.551 ( -0.54%) [ +0.00% +0.36% +0.00% / +0.00% -0.36% -0.54%] index_fill_ random : Elapsed 0.006 ms (0.554 ms / 100) 0.557 -> 0.556 ( -0.18%) [ +0.00% +0.18% +0.18% / +0.36% +0.00% -0.18%] index_fill_ random_sorted : Elapsed 0.006 ms (0.557 ms / 100) 0.554 -> 0.554 ( +0.00%) [ +0.00% +0.36% +0.00% / +0.00% +0.00% +0.18%] index_fill_ perm : Elapsed 0.006 ms (0.554 ms / 100) 0.554 -> 0.555 ( +0.18%) [ +0.00% +0.36% +0.00% / +0.18% +0.54% +0.18%] index_fill_ perm_sorted : Elapsed 0.006 ms (0.554 ms / 100) out_shape = [20, 16, 4, 5] in_shape = [40, 16, 4, 5] idx_dim = 0 B = [20, 16, 4, 5] (stride (20, 400, 1, 4)) A = [40, 16, 4, 5] (stride (80, 5, 3200, 1)) dim = 0 2.446 -> 2.446 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.29% +0.45%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.465 -> 2.465 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.08% +0.16% +0.00%] index_select wrap : Elapsed 0.025 ms (2.465 ms / 100) 2.466 -> 2.463 ( -0.12%) [ +0.08% +0.00% +0.04% / +0.08% -0.12% -0.12%] index_select linear : Elapsed 0.025 ms (2.468 ms / 100) 2.468 -> 2.464 ( -0.16%) [ +0.00% +0.04% +0.08% / +0.04% -0.16% -0.12%] index_select reverse : Elapsed 0.025 ms (2.468 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.20% +0.12% +0.00% / +0.00% +0.16% +0.25%] index_select skip64 : Elapsed 0.025 ms (2.453 ms / 100) 2.448 -> 2.445 ( -0.12%) [ +0.00% +0.16% +0.00% / -0.12% +0.16% +0.20%] index_select skip256 : Elapsed 0.024 ms (2.448 ms / 100) 2.463 -> 2.462 ( -0.04%) [ +0.00% +0.08% +0.20% / +0.12% +0.08% -0.04%] index_select spread : Elapsed 0.025 ms (2.463 ms / 100) 2.464 -> 2.464 ( +0.00%) [ +0.12% +0.00% +0.08% / +0.00% +0.16% +0.04%] index_select strided 3 : Elapsed 0.025 ms (2.467 ms / 100) 2.454 -> 2.456 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.24% +0.20% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.454 ms / 100) 2.460 -> 2.462 ( +0.08%) [ +0.16% +0.00% +0.24% / +0.08% +0.24% +0.24%] index_select strided 7 : Elapsed 0.025 ms (2.464 ms / 100) 2.452 -> 2.450 ( -0.08%) [ +0.04% +0.00% +0.08% / -0.08% +0.20% +0.04%] index_select strided 8 : Elapsed 0.025 ms (2.453 ms / 100) 2.448 -> 2.453 ( +0.20%) [ +0.29% +0.04% +0.00% / +0.20% +0.25% +0.33%] index_select strided 16 : Elapsed 0.025 ms (2.455 ms / 100) 2.459 -> 2.456 ( -0.12%) [ +0.00% +0.04% +0.12% / -0.12% +0.28% -0.04%] index_select random : Elapsed 0.025 ms (2.459 ms / 100) 2.460 -> 2.458 ( -0.08%) [ +0.16% +0.04% +0.00% / +0.16% -0.08% +0.04%] index_select random_sorted : Elapsed 0.025 ms (2.464 ms / 100) 2.462 -> 2.461 ( -0.04%) [ +0.08% +0.20% +0.00% / +0.04% -0.04% +0.12%] index_select perm : Elapsed 0.025 ms (2.464 ms / 100) 2.465 -> 2.454 ( -0.45%) [ +0.00% +0.20% +0.16% / +0.00% -0.45% -0.20%] index_select perm_sorted : Elapsed 0.025 ms (2.465 ms / 100) B = [20, 16, 4, 5] (stride (4, 400, 1, 80)) A = [40, 16, 4, 5] (stride (16, 1, 640, 2560)) dim = 0 2.447 -> 2.448 ( +0.04%) [ +0.16% +0.12% +0.00% / +0.04% +0.25% +0.29%] index_select const : Elapsed 0.025 ms (2.451 ms / 100) 2.460 -> 2.452 ( -0.33%) [ +0.16% +0.00% +0.37% / +0.16% -0.33% -0.04%] index_select wrap : Elapsed 0.025 ms (2.464 ms / 100) 2.461 -> 2.457 ( -0.16%) [ +0.12% +0.00% +0.24% / +0.12% -0.16% +0.04%] index_select linear : Elapsed 0.025 ms (2.464 ms / 100) 2.461 -> 2.461 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.12% +0.16% +0.00%] index_select reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.446 -> 2.450 ( +0.16%) [ +0.00% +0.20% +0.08% / +0.16% +0.25% +0.25%] index_select skip64 : Elapsed 0.024 ms (2.446 ms / 100) 2.449 -> 2.445 ( -0.16%) [ +0.04% +0.04% +0.00% / -0.16% +0.24% +0.16%] index_select skip256 : Elapsed 0.025 ms (2.450 ms / 100) 2.461 -> 2.463 ( +0.08%) [ +0.00% +0.00% +0.16% / +0.08% +0.33% +0.16%] index_select spread : Elapsed 0.025 ms (2.461 ms / 100) 2.460 -> 2.462 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.08% +0.28% +0.45%] index_select strided 3 : Elapsed 0.025 ms (2.460 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.16% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.456 ms / 100) 2.465 -> 2.462 ( -0.12%) [ +0.00% +0.00% +0.00% / +0.00% -0.12% +0.16%] index_select strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.24% +0.04% +0.33%] index_select strided 8 : Elapsed 0.025 ms (2.455 ms / 100) 2.451 -> 2.450 ( -0.04%) [ +0.08% +0.00% +0.04% / -0.04% +0.41% +0.37%] index_select strided 16 : Elapsed 0.025 ms (2.453 ms / 100) 2.458 -> 2.458 ( +0.00%) [ +0.00% +0.16% +0.12% / +0.00% +0.53% +0.45%] index_select random : Elapsed 0.025 ms (2.458 ms / 100) 2.463 -> 2.464 ( +0.04%) [ +0.08% +0.08% +0.00% / +0.08% +0.04% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.465 ms / 100) 2.462 -> 2.465 ( +0.12%) [ +0.00% +0.08% +0.08% / +0.20% +0.12% +0.12%] index_select perm : Elapsed 0.025 ms (2.462 ms / 100) 2.465 -> 2.462 ( -0.12%) [ +0.04% +0.00% +0.16% / -0.12% -0.04% +0.04%] index_select perm_sorted : Elapsed 0.025 ms (2.466 ms / 100) B = [20, 16, 4, 5] (stride (1, 400, 20, 80)) A = [40, 16, 4, 5] (stride (320, 20, 1, 4)) dim = 0 2.442 -> 2.444 ( +0.08%) [ +0.04% +0.12% +0.00% / +0.08% +0.37% +0.25%] index_select const : Elapsed 0.024 ms (2.443 ms / 100) 2.454 -> 2.454 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.04% +0.04% +0.00%] index_select wrap : Elapsed 0.025 ms (2.454 ms / 100) 2.456 -> 2.455 ( -0.04%) [ +0.08% +0.12% +0.00% / +0.12% -0.04% -0.04%] index_select linear : Elapsed 0.025 ms (2.458 ms / 100) 2.457 -> 2.451 ( -0.24%) [ +0.20% +0.08% +0.00% / +0.12% +0.00% -0.24%] index_select reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.445 -> 2.446 ( +0.04%) [ +0.00% +0.12% +0.20% / +0.12% +0.20% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.445 ms / 100) 2.442 -> 2.445 ( +0.12%) [ +0.20% +0.00% +0.25% / +0.16% +0.12% +0.12%] index_select skip256 : Elapsed 0.024 ms (2.447 ms / 100) 2.452 -> 2.455 ( +0.12%) [ +0.16% +0.00% +0.16% / +0.12% +0.20% +0.33%] index_select spread : Elapsed 0.025 ms (2.456 ms / 100) 2.457 -> 2.455 ( -0.08%) [ +0.28% +0.00% +0.16% / +0.12% +0.08% -0.08%] index_select strided 3 : Elapsed 0.025 ms (2.464 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.04% +0.16%] index_select strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.454 -> 2.456 ( +0.08%) [ +0.08% +0.00% +0.12% / +0.08% +0.08% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.456 ms / 100) 2.448 -> 2.448 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.12% +0.20%] index_select strided 8 : Elapsed 0.024 ms (2.448 ms / 100) 2.448 -> 2.450 ( +0.08%) [ +0.04% +0.04% +0.00% / +0.16% +0.08% +0.20%] index_select strided 16 : Elapsed 0.024 ms (2.449 ms / 100) 2.453 -> 2.456 ( +0.12%) [ +0.00% +0.08% +0.33% / +0.12% +0.24% +0.24%] index_select random : Elapsed 0.025 ms (2.453 ms / 100) 2.457 -> 2.455 ( -0.08%) [ +0.12% +0.00% +0.08% / +0.04% -0.08% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.460 ms / 100) 2.458 -> 2.455 ( -0.12%) [ +0.08% +0.00% +0.12% / -0.12% -0.12% -0.04%] index_select perm : Elapsed 0.025 ms (2.460 ms / 100) 2.457 -> 2.448 ( -0.37%) [ +0.04% +0.00% +0.16% / +0.08% -0.28% -0.37%] index_select perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) B = [20, 16, 4, 5] (stride (80, 5, 1600, 1)) A = [40, 16, 4, 5] (stride (20, 800, 5, 1)) dim = 0 2.406 -> 2.409 ( +0.12%) [ +0.00% +0.12% +0.17% / +0.12% +0.25% +0.12%] index_select const : Elapsed 0.024 ms (2.406 ms / 100) 2.421 -> 2.415 ( -0.25%) [ +0.17% +0.00% +0.12% / +0.25% -0.25% -0.12%] index_select wrap : Elapsed 0.024 ms (2.425 ms / 100) 2.420 -> 2.415 ( -0.21%) [ +0.17% +0.17% +0.00% / +0.12% -0.21% -0.17%] index_select linear : Elapsed 0.024 ms (2.424 ms / 100) 2.421 -> 2.420 ( -0.04%) [ +0.08% +0.04% +0.00% / +0.17% +0.00% -0.04%] index_select reverse : Elapsed 0.024 ms (2.423 ms / 100) 2.411 -> 2.407 ( -0.17%) [ +0.17% +0.08% +0.00% / -0.04% -0.17% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.415 ms / 100) 2.406 -> 2.410 ( +0.17%) [ +0.12% +0.00% +0.29% / +0.25% +0.17% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.409 ms / 100) 2.421 -> 2.422 ( +0.04%) [ +0.12% +0.00% +0.00% / +0.12% +0.04% +0.08%] index_select spread : Elapsed 0.024 ms (2.424 ms / 100) 2.420 -> 2.417 ( -0.12%) [ +0.00% +0.33% +0.17% / +0.04% -0.12% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.420 ms / 100) 2.413 -> 2.412 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.00% +0.08% -0.04%] index_select strided 5 : Elapsed 0.024 ms (2.413 ms / 100) 2.424 -> 2.421 ( -0.12%) [ +0.00% +0.04% +0.12% / -0.12% -0.12% -0.04%] index_select strided 7 : Elapsed 0.024 ms (2.424 ms / 100) 2.414 -> 2.408 ( -0.25%) [ +0.00% +0.00% +0.12% / +0.17% -0.04% -0.25%] index_select strided 8 : Elapsed 0.024 ms (2.414 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.08% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.414 ms / 100) 2.418 -> 2.421 ( +0.12%) [ +0.00% +0.04% +0.00% / +0.17% +0.12% +0.12%] index_select random : Elapsed 0.024 ms (2.418 ms / 100) 2.423 -> 2.418 ( -0.21%) [ +0.00% +0.00% +0.08% / +0.04% -0.21% -0.12%] index_select random_sorted : Elapsed 0.024 ms (2.423 ms / 100) 2.424 -> 2.421 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.04% -0.12% +0.04%] index_select perm : Elapsed 0.024 ms (2.424 ms / 100) 2.423 -> 2.419 ( -0.17%) [ +0.04% +0.12% +0.00% / -0.12% -0.17% -0.04%] index_select perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) B = [20, 16, 4, 5] (stride (1, 100, 1600, 20)) A = [40, 16, 4, 5] (stride (320, 4, 1, 64)) dim = 0 2.405 -> 2.405 ( +0.00%) [ +0.04% +0.12% +0.00% / +0.00% +0.17% +0.25%] index_select const : Elapsed 0.024 ms (2.406 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.04% +0.00% +0.21% / +0.04% +0.12% +0.08%] index_select wrap : Elapsed 0.024 ms (2.414 ms / 100) 2.417 -> 2.415 ( -0.08%) [ +0.04% +0.00% +0.12% / +0.17% -0.08% -0.08%] index_select linear : Elapsed 0.024 ms (2.418 ms / 100) 2.416 -> 2.415 ( -0.04%) [ +0.12% +0.08% +0.00% / +0.08% -0.04% +0.08%] index_select reverse : Elapsed 0.024 ms (2.419 ms / 100) 2.406 -> 2.405 ( -0.04%) [ +0.00% +0.08% +0.12% / +0.12% +0.04% -0.04%] index_select skip64 : Elapsed 0.024 ms (2.406 ms / 100) 2.407 -> 2.409 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.08% +0.17%] index_select skip256 : Elapsed 0.024 ms (2.409 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.21% +0.12%] index_select spread : Elapsed 0.024 ms (2.414 ms / 100) 2.416 -> 2.415 ( -0.04%) [ +0.00% +0.12% +0.04% / -0.04% -0.04% +0.08%] index_select strided 3 : Elapsed 0.024 ms (2.416 ms / 100) 2.412 -> 2.412 ( +0.00%) [ +0.04% +0.00% +0.08% / +0.12% +0.25% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.413 ms / 100) 2.412 -> 2.414 ( +0.08%) [ +0.00% +0.12% +0.17% / +0.08% +0.12% +0.12%] index_select strided 7 : Elapsed 0.024 ms (2.412 ms / 100) 2.404 -> 2.407 ( +0.12%) [ +0.08% +0.00% +0.12% / +0.12% +0.33% +0.25%] index_select strided 8 : Elapsed 0.024 ms (2.406 ms / 100) 2.408 -> 2.410 ( +0.08%) [ +0.21% +0.17% +0.00% / +0.12% +0.08% +0.33%] index_select strided 16 : Elapsed 0.024 ms (2.413 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.08% +0.12%] index_select random : Elapsed 0.024 ms (2.414 ms / 100) 2.415 -> 2.413 ( -0.08%) [ +0.08% +0.25% +0.00% / -0.04% -0.04% -0.08%] index_select random_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.415 -> 2.414 ( -0.04%) [ +0.00% +0.08% +0.12% / +0.08% -0.04% -0.04%] index_select perm : Elapsed 0.024 ms (2.415 ms / 100) 2.418 -> 2.408 ( -0.41%) [ +0.00% +0.12% +0.04% / +0.00% -0.21% -0.41%] index_select perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) B = [20, 16, 4, 5] (stride (4, 80, 1, 1280)) A = [40, 16, 4, 5] (stride (80, 1, 3200, 16)) dim = 0 2.449 -> 2.449 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.04% +0.00%] index_select const : Elapsed 0.024 ms (2.450 ms / 100) 2.458 -> 2.448 ( -0.41%) [ +0.04% +0.00% +0.00% / +0.08% -0.33% -0.41%] index_select wrap : Elapsed 0.025 ms (2.459 ms / 100) 2.455 -> 2.451 ( -0.16%) [ +0.08% +0.00% +0.12% / +0.16% -0.16% -0.12%] index_select linear : Elapsed 0.025 ms (2.457 ms / 100) 2.455 -> 2.454 ( -0.04%) [ +0.00% +0.20% +0.29% / +0.04% -0.04% +0.08%] index_select reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.00% +0.04% +0.16% / +0.04% +0.04% +0.08%] index_select skip64 : Elapsed 0.025 ms (2.450 ms / 100) 2.448 -> 2.447 ( -0.04%) [ +0.08% +0.04% +0.00% / +0.00% -0.04% +0.08%] index_select skip256 : Elapsed 0.025 ms (2.450 ms / 100) 2.455 -> 2.453 ( -0.08%) [ +0.04% +0.00% +0.08% / -0.08% +0.08% +0.41%] index_select spread : Elapsed 0.025 ms (2.456 ms / 100) 2.453 -> 2.452 ( -0.04%) [ +0.12% +0.00% +0.00% / -0.04% +0.20% +0.24%] index_select strided 3 : Elapsed 0.025 ms (2.456 ms / 100) 2.450 -> 2.448 ( -0.08%) [ +0.04% +0.00% +0.20% / -0.08% +0.08% +0.20%] index_select strided 5 : Elapsed 0.025 ms (2.451 ms / 100) 2.452 -> 2.460 ( +0.33%) [ +0.00% +0.12% +0.29% / +0.37% +0.33% +0.33%] index_select strided 7 : Elapsed 0.025 ms (2.452 ms / 100) 2.451 -> 2.450 ( -0.04%) [ +0.00% +0.04% +0.00% / +0.12% +0.08% -0.04%] index_select strided 8 : Elapsed 0.025 ms (2.451 ms / 100) 2.447 -> 2.449 ( +0.08%) [ +0.00% +0.04% +0.12% / +0.08% +0.29% +0.20%] index_select strided 16 : Elapsed 0.024 ms (2.447 ms / 100) 2.453 -> 2.454 ( +0.04%) [ +0.12% +0.00% +0.20% / +0.04% +0.16% +0.24%] index_select random : Elapsed 0.025 ms (2.456 ms / 100) 2.452 -> 2.454 ( +0.08%) [ +0.00% +0.04% +0.16% / +0.16% +0.08% +0.08%] index_select random_sorted : Elapsed 0.025 ms (2.452 ms / 100) 2.454 -> 2.456 ( +0.08%) [ +0.00% +0.00% +0.04% / +0.08% +0.12% +0.08%] index_select perm : Elapsed 0.025 ms (2.454 ms / 100) 2.455 -> 2.453 ( -0.08%) [ +0.12% +0.16% +0.00% / -0.08% +0.29% +0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) out_shape = [40, 20, 4, 5] in_shape = [40, 16, 4, 5] idx_dim = 1 B = [40, 20, 4, 5] (stride (400, 4, 1, 80)) A = [40, 16, 4, 5] (stride (320, 4, 1, 64)) dim = 1 4.149 -> 4.152 ( +0.07%) [ +0.10% +0.07% +0.00% / +0.07% +0.67% +0.82%] index_add_ linear : Elapsed 0.042 ms (4.153 ms / 100) 4.012 -> 4.014 ( +0.05%) [ +0.00% +0.07% +0.15% / +0.05% +0.82% +0.72%] index_copy_ linear : Elapsed 0.040 ms (4.012 ms / 100) 4.148 -> 4.147 ( -0.02%) [ +0.05% +0.02% +0.00% / -0.02% +0.89% +0.75%] index_add_ reverse : Elapsed 0.042 ms (4.150 ms / 100) 4.009 -> 4.011 ( +0.05%) [ +0.00% +0.12% +0.12% / +0.05% +0.77% +0.70%] index_copy_ reverse : Elapsed 0.040 ms (4.009 ms / 100) 4.151 -> 4.149 ( -0.05%) [ +0.02% +0.00% +0.00% / -0.05% +0.75% +0.79%] index_add_ spread : Elapsed 0.042 ms (4.152 ms / 100) 4.013 -> 4.016 ( +0.07%) [ +0.05% +0.00% +0.00% / +0.07% +0.82% +1.02%] index_copy_ spread : Elapsed 0.040 ms (4.015 ms / 100) 4.153 -> 4.148 ( -0.12%) [ +0.00% +0.07% +0.02% / -0.12% +0.77% +0.67%] index_add_ strided 3 : Elapsed 0.042 ms (4.153 ms / 100) 4.012 -> 4.017 ( +0.12%) [ +0.20% +0.00% +0.22% / +0.12% +0.82% +0.70%] index_copy_ strided 3 : Elapsed 0.040 ms (4.020 ms / 100) 4.148 -> 4.150 ( +0.05%) [ +0.00% +0.12% +0.12% / +0.05% +0.80% +0.80%] index_add_ strided 7 : Elapsed 0.041 ms (4.148 ms / 100) 4.014 -> 4.020 ( +0.15%) [ +0.15% +0.00% +0.17% / +0.15% +0.80% +0.80%] index_copy_ strided 7 : Elapsed 0.040 ms (4.020 ms / 100) 4.154 -> 4.153 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.65% +0.60%] index_add_ perm : Elapsed 0.042 ms (4.154 ms / 100) 4.016 -> 4.021 ( +0.12%) [ +0.05% +0.02% +0.00% / +0.12% +0.62% +0.70%] index_copy_ perm : Elapsed 0.040 ms (4.018 ms / 100) 4.152 -> 4.147 ( -0.12%) [ +0.00% +0.00% +0.02% / -0.12% +0.79% +0.60%] index_add_ perm_sorted : Elapsed 0.042 ms (4.152 ms / 100) 4.013 -> 4.009 ( -0.10%) [ +0.02% +0.00% +0.00% / -0.10% +0.57% +0.47%] index_copy_ perm_sorted : Elapsed 0.040 ms (4.014 ms / 100) 5.559 -> 5.556 ( -0.05%) [ +0.04% +0.07% +0.00% / -0.05% +0.05% -0.04%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.573 -> 5.574 ( +0.02%) [ +0.09% +0.13% +0.00% / +0.11% +0.11% +0.02%] index_select wrap : Elapsed 0.056 ms (5.578 ms / 100) 5.575 -> 5.574 ( -0.02%) [ +0.11% +0.16% +0.00% / -0.02% +0.07% +0.02%] index_select linear : Elapsed 0.056 ms (5.581 ms / 100) 5.574 -> 5.575 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.13% +0.13% +0.02%] index_select reverse : Elapsed 0.056 ms (5.574 ms / 100) 5.552 -> 5.557 ( +0.09%) [ +0.13% +0.18% +0.00% / +0.09% +0.25% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.559 ms / 100) 5.557 -> 5.557 ( +0.00%) [ +0.09% +0.00% +0.07% / +0.07% +0.02% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.562 ms / 100) 5.576 -> 5.571 ( -0.09%) [ +0.00% +0.00% +0.05% / +0.04% -0.05% -0.09%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.579 -> 5.574 ( -0.09%) [ +0.00% +0.02% +0.09% / -0.02% -0.09% -0.04%] index_select strided 3 : Elapsed 0.056 ms (5.579 ms / 100) 5.572 -> 5.572 ( +0.00%) [ +0.14% +0.09% +0.00% / +0.11% +0.00% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.580 ms / 100) 5.570 -> 5.573 ( +0.05%) [ +0.25% +0.22% +0.00% / +0.20% +0.16% +0.05%] index_select strided 7 : Elapsed 0.056 ms (5.584 ms / 100) 5.562 -> 5.562 ( +0.00%) [ +0.05% +0.13% +0.00% / +0.00% +0.07% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.565 ms / 100) 5.572 -> 5.570 ( -0.04%) [ +0.18% +0.00% +0.20% / +0.11% +0.05% -0.04%] index_select random : Elapsed 0.056 ms (5.582 ms / 100) 5.577 -> 5.578 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.04% +0.04% +0.02%] index_select random_sorted : Elapsed 0.056 ms (5.577 ms / 100) B = [40, 20, 4, 5] (stride (400, 4, 1, 80)) A = [40, 16, 4, 5] (stride (1, 160, 40, 2560)) dim = 1 4.284 -> 4.284 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.72% +0.77%] index_add_ linear : Elapsed 0.043 ms (4.285 ms / 100) 4.138 -> 4.139 ( +0.02%) [ +0.00% +0.07% +0.00% / +0.02% +0.75% +0.82%] index_copy_ linear : Elapsed 0.041 ms (4.138 ms / 100) 4.244 -> 4.245 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.66% +0.66%] index_add_ reverse : Elapsed 0.042 ms (4.245 ms / 100) 4.093 -> 4.092 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.71% +0.71%] index_copy_ reverse : Elapsed 0.041 ms (4.094 ms / 100) 4.257 -> 4.257 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.82% +0.78%] index_add_ spread : Elapsed 0.043 ms (4.257 ms / 100) 4.114 -> 4.114 ( +0.00%) [ +0.02% +0.00% +0.10% / +0.00% +0.88% +0.78%] index_copy_ spread : Elapsed 0.041 ms (4.115 ms / 100) 4.282 -> 4.283 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.84% +0.82%] index_add_ strided 3 : Elapsed 0.043 ms (4.284 ms / 100) 4.122 -> 4.123 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.82% +0.82%] index_copy_ strided 3 : Elapsed 0.041 ms (4.122 ms / 100) 4.244 -> 4.243 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.85% +0.73%] index_add_ strided 7 : Elapsed 0.042 ms (4.244 ms / 100) 4.094 -> 4.095 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +1.05% +0.71%] index_copy_ strided 7 : Elapsed 0.041 ms (4.095 ms / 100) 4.285 -> 4.286 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.72% +0.68%] index_add_ perm : Elapsed 0.043 ms (4.287 ms / 100) 4.140 -> 4.140 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.70% +0.65%] index_copy_ perm : Elapsed 0.041 ms (4.140 ms / 100) 4.287 -> 4.288 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.63% +0.61%] index_add_ perm_sorted : Elapsed 0.043 ms (4.287 ms / 100) 4.127 -> 4.130 ( +0.07%) [ +0.02% +0.00% +0.00% / +0.07% +0.68% +0.61%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.128 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.05% +0.04% +0.00%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.571 -> 5.577 ( +0.11%) [ +0.20% +0.00% +0.14% / +0.11% +0.20% +0.16%] index_select wrap : Elapsed 0.056 ms (5.582 ms / 100) 5.571 -> 5.569 ( -0.04%) [ +0.11% +0.07% +0.00% / -0.04% +0.20% +0.23%] index_select linear : Elapsed 0.056 ms (5.577 ms / 100) 5.570 -> 5.574 ( +0.07%) [ +0.09% +0.09% +0.00% / +0.07% +0.20% +0.25%] index_select reverse : Elapsed 0.056 ms (5.575 ms / 100) 5.556 -> 5.558 ( +0.04%) [ +0.07% +0.00% +0.04% / +0.14% +0.22% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.560 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.04% +0.00% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.573 -> 5.579 ( +0.11%) [ +0.05% +0.07% +0.00% / +0.11% +0.20% +0.22%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.572 -> 5.576 ( +0.07%) [ +0.09% +0.00% +0.11% / +0.07% +0.27% +0.18%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.571 -> 5.575 ( +0.07%) [ +0.09% +0.04% +0.00% / +0.07% +0.13% +0.18%] index_select strided 5 : Elapsed 0.056 ms (5.576 ms / 100) 5.574 -> 5.569 ( -0.09%) [ +0.16% +0.07% +0.00% / -0.09% +0.18% +0.22%] index_select strided 7 : Elapsed 0.056 ms (5.583 ms / 100) 5.560 -> 5.563 ( +0.05%) [ +0.04% +0.11% +0.00% / +0.05% +0.34% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.570 -> 5.575 ( +0.09%) [ +0.05% +0.00% +0.04% / +0.13% +0.11% +0.09%] index_select random : Elapsed 0.056 ms (5.573 ms / 100) 5.569 -> 5.571 ( +0.04%) [ +0.05% +0.00% +0.04% / +0.04% +0.20% +0.16%] index_select random_sorted : Elapsed 0.056 ms (5.572 ms / 100) B = [40, 20, 4, 5] (stride (1, 800, 40, 160)) A = [40, 16, 4, 5] (stride (5, 800, 200, 1)) dim = 1 4.010 -> 4.006 ( -0.10%) [ +0.02% +0.02% +0.00% / -0.10% +0.70% +0.70%] index_add_ linear : Elapsed 0.040 ms (4.011 ms / 100) 3.886 -> 3.883 ( -0.08%) [ +0.00% +0.03% +0.00% / -0.08% +0.93% +0.75%] index_copy_ linear : Elapsed 0.039 ms (3.886 ms / 100) 4.004 -> 4.011 ( +0.17%) [ +0.00% +0.07% +0.17% / +0.17% +0.85% +0.80%] index_add_ reverse : Elapsed 0.040 ms (4.004 ms / 100) 3.871 -> 3.881 ( +0.26%) [ +0.00% +0.08% +0.15% / +0.26% +1.16% +0.83%] index_copy_ reverse : Elapsed 0.039 ms (3.871 ms / 100) 3.994 -> 3.990 ( -0.10%) [ +0.13% +0.00% +0.05% / -0.10% +0.83% +0.83%] index_add_ spread : Elapsed 0.040 ms (3.999 ms / 100) 3.861 -> 3.863 ( +0.05%) [ +0.18% +0.00% +0.10% / +0.05% +0.93% +0.88%] index_copy_ spread : Elapsed 0.039 ms (3.868 ms / 100) 4.009 -> 4.008 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.72% +0.75%] index_add_ strided 3 : Elapsed 0.040 ms (4.009 ms / 100) 3.886 -> 3.891 ( +0.13%) [ +0.13% +0.10% +0.00% / +0.13% +0.82% +0.75%] index_copy_ strided 3 : Elapsed 0.039 ms (3.891 ms / 100) 4.009 -> 4.009 ( +0.00%) [ +0.10% +0.10% +0.00% / +0.00% +0.55% +0.77%] index_add_ strided 7 : Elapsed 0.040 ms (4.013 ms / 100) 3.893 -> 3.890 ( -0.08%) [ +0.00% +0.00% +0.10% / -0.08% +0.46% +0.67%] index_copy_ strided 7 : Elapsed 0.039 ms (3.893 ms / 100) 3.996 -> 3.999 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.83% +0.70%] index_add_ perm : Elapsed 0.040 ms (3.997 ms / 100) 3.885 -> 3.892 ( +0.18%) [ +0.18% +0.08% +0.00% / +0.18% +0.90% +0.95%] index_copy_ perm : Elapsed 0.039 ms (3.892 ms / 100) 4.007 -> 4.009 ( +0.05%) [ +0.15% +0.07% +0.00% / +0.05% +0.60% +0.80%] index_add_ perm_sorted : Elapsed 0.040 ms (4.013 ms / 100) 3.878 -> 3.880 ( +0.05%) [ +0.08% +0.00% +0.08% / +0.05% +0.75% +0.95%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.881 ms / 100) 5.559 -> 5.562 ( +0.05%) [ +0.02% +0.00% +0.07% / +0.05% +0.27% +0.11%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.588 -> 5.586 ( -0.04%) [ +0.00% +0.09% +0.00% / -0.04% +0.07% +0.20%] index_select wrap : Elapsed 0.056 ms (5.588 ms / 100) 5.581 -> 5.584 ( +0.05%) [ +0.16% +0.07% +0.00% / +0.16% +0.18% +0.05%] index_select linear : Elapsed 0.056 ms (5.590 ms / 100) 5.588 -> 5.581 ( -0.13%) [ +0.02% +0.00% +0.04% / -0.13% +0.09% +0.02%] index_select reverse : Elapsed 0.056 ms (5.589 ms / 100) 5.560 -> 5.562 ( +0.04%) [ +0.14% +0.00% +0.14% / +0.04% +0.18% +0.20%] index_select skip64 : Elapsed 0.056 ms (5.568 ms / 100) 5.559 -> 5.560 ( +0.02%) [ +0.02% +0.09% +0.00% / +0.02% +0.11% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.560 ms / 100) 5.579 -> 5.581 ( +0.04%) [ +0.16% +0.14% +0.00% / +0.25% +0.14% +0.04%] index_select spread : Elapsed 0.056 ms (5.588 ms / 100) 5.581 -> 5.588 ( +0.13%) [ +0.23% +0.22% +0.00% / +0.23% +0.25% +0.13%] index_select strided 3 : Elapsed 0.056 ms (5.594 ms / 100) 5.585 -> 5.585 ( +0.00%) [ +0.18% +0.09% +0.00% / +0.16% +0.07% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.595 ms / 100) 5.580 -> 5.582 ( +0.04%) [ +0.00% +0.13% +0.13% / +0.20% +0.20% +0.04%] index_select strided 7 : Elapsed 0.056 ms (5.580 ms / 100) 5.561 -> 5.563 ( +0.04%) [ +0.02% +0.04% +0.00% / +0.14% +0.04% +0.13%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.581 -> 5.576 ( -0.09%) [ +0.11% +0.00% +0.04% / +0.14% -0.02% -0.09%] index_select random : Elapsed 0.056 ms (5.587 ms / 100) 5.579 -> 5.577 ( -0.04%) [ +0.00% +0.00% +0.11% / +0.05% -0.04% +0.14%] index_select random_sorted : Elapsed 0.056 ms (5.579 ms / 100) B = [40, 20, 4, 5] (stride (1, 40, 4000, 800)) A = [40, 16, 4, 5] (stride (1, 160, 40, 2560)) dim = 1 3.973 -> 3.974 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.88% +0.81%] index_add_ linear : Elapsed 0.040 ms (3.974 ms / 100) 3.807 -> 3.808 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.89% +0.76%] index_copy_ linear : Elapsed 0.038 ms (3.807 ms / 100) 3.918 -> 3.919 ( +0.03%) [ +0.05% +0.00% +0.10% / +0.03% +0.61% +0.64%] index_add_ reverse : Elapsed 0.039 ms (3.920 ms / 100) 3.774 -> 3.777 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.74% +0.72%] index_copy_ reverse : Elapsed 0.038 ms (3.775 ms / 100) 3.943 -> 3.943 ( +0.00%) [ +0.03% +0.20% +0.00% / +0.00% +0.86% +0.86%] index_add_ spread : Elapsed 0.039 ms (3.944 ms / 100) 3.793 -> 3.795 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.90% +0.87%] index_copy_ spread : Elapsed 0.038 ms (3.794 ms / 100) 3.939 -> 3.941 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.79% +0.71%] index_add_ strided 3 : Elapsed 0.039 ms (3.940 ms / 100) 3.783 -> 3.784 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.79% +0.77%] index_copy_ strided 3 : Elapsed 0.038 ms (3.785 ms / 100) 3.920 -> 3.920 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.74% +0.66%] index_add_ strided 7 : Elapsed 0.039 ms (3.926 ms / 100) 3.775 -> 3.775 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.77% +0.79%] index_copy_ strided 7 : Elapsed 0.038 ms (3.777 ms / 100) 3.975 -> 3.975 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.78% +0.73%] index_add_ perm : Elapsed 0.040 ms (3.976 ms / 100) 3.808 -> 3.810 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.68% +0.74%] index_copy_ perm : Elapsed 0.038 ms (3.808 ms / 100) 3.939 -> 3.941 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.96% +0.71%] index_add_ perm_sorted : Elapsed 0.039 ms (3.942 ms / 100) 3.788 -> 3.789 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.90% +0.63%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.789 ms / 100) 5.472 -> 5.473 ( +0.02%) [ +0.16% +0.09% +0.00% / +0.11% +0.02% +0.02%] index_select const : Elapsed 0.055 ms (5.481 ms / 100) 5.492 -> 5.495 ( +0.05%) [ +0.05% +0.00% +0.04% / +0.05% +0.15% +0.05%] index_select wrap : Elapsed 0.055 ms (5.495 ms / 100) 5.490 -> 5.498 ( +0.15%) [ +0.00% +0.11% +0.11% / +0.15% +0.15% +0.18%] index_select linear : Elapsed 0.055 ms (5.490 ms / 100) 5.491 -> 5.493 ( +0.04%) [ +0.00% +0.05% +0.16% / +0.04% +0.18% +0.24%] index_select reverse : Elapsed 0.055 ms (5.491 ms / 100) 5.476 -> 5.474 ( -0.04%) [ +0.13% +0.15% +0.00% / +0.11% +0.04% -0.04%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.475 -> 5.483 ( +0.15%) [ +0.09% +0.00% +0.15% / +0.22% +0.15% +0.15%] index_select skip256 : Elapsed 0.055 ms (5.480 ms / 100) 5.489 -> 5.494 ( +0.09%) [ +0.16% +0.00% +0.04% / +0.20% +0.13% +0.09%] index_select spread : Elapsed 0.055 ms (5.498 ms / 100) 5.487 -> 5.491 ( +0.07%) [ +0.13% +0.00% +0.02% / +0.15% +0.07% +0.29%] index_select strided 3 : Elapsed 0.055 ms (5.494 ms / 100) 5.490 -> 5.492 ( +0.04%) [ +0.11% +0.00% +0.05% / +0.04% +0.16% +0.07%] index_select strided 5 : Elapsed 0.055 ms (5.496 ms / 100) 5.488 -> 5.485 ( -0.05%) [ +0.00% +0.05% +0.02% / -0.05% +0.26% +0.22%] index_select strided 7 : Elapsed 0.055 ms (5.488 ms / 100) 5.477 -> 5.477 ( +0.00%) [ +0.00% +0.04% +0.05% / +0.00% +0.07% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.477 ms / 100) 5.490 -> 5.485 ( -0.09%) [ +0.00% +0.05% +0.02% / -0.09% +0.07% +0.05%] index_select random : Elapsed 0.055 ms (5.490 ms / 100) 5.488 -> 5.492 ( +0.07%) [ +0.09% +0.02% +0.00% / +0.09% +0.18% +0.07%] index_select random_sorted : Elapsed 0.055 ms (5.493 ms / 100) B = [40, 20, 4, 5] (stride (1, 40, 800, 3200)) A = [40, 16, 4, 5] (stride (320, 20, 5, 1)) dim = 1 3.632 -> 3.634 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.80% +0.85%] index_add_ linear : Elapsed 0.036 ms (3.633 ms / 100) 3.516 -> 3.526 ( +0.28%) [ +0.00% +0.31% +0.14% / +0.28% +0.94% +0.80%] index_copy_ linear : Elapsed 0.035 ms (3.516 ms / 100) 3.638 -> 3.638 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.71% +0.74%] index_add_ reverse : Elapsed 0.036 ms (3.640 ms / 100) 3.512 -> 3.514 ( +0.06%) [ +0.09% +0.00% +0.06% / +0.06% +0.80% +0.71%] index_copy_ reverse : Elapsed 0.035 ms (3.515 ms / 100) 3.624 -> 3.624 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.86% +0.77%] index_add_ spread : Elapsed 0.036 ms (3.625 ms / 100) 3.496 -> 3.498 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +1.03% +0.94%] index_copy_ spread : Elapsed 0.035 ms (3.498 ms / 100) 3.631 -> 3.632 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.77% +0.83%] index_add_ strided 3 : Elapsed 0.036 ms (3.632 ms / 100) 3.517 -> 3.518 ( +0.03%) [ +0.11% +0.11% +0.00% / +0.03% +1.02% +0.74%] index_copy_ strided 3 : Elapsed 0.035 ms (3.521 ms / 100) 3.632 -> 3.633 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.72% +0.74%] index_add_ strided 7 : Elapsed 0.036 ms (3.632 ms / 100) 3.517 -> 3.523 ( +0.17%) [ +0.20% +0.00% +0.23% / +0.17% +0.88% +0.71%] index_copy_ strided 7 : Elapsed 0.035 ms (3.524 ms / 100) 3.627 -> 3.629 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.63% +0.66%] index_add_ perm : Elapsed 0.036 ms (3.628 ms / 100) 3.499 -> 3.500 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.91% +0.94%] index_copy_ perm : Elapsed 0.035 ms (3.500 ms / 100) 3.637 -> 3.641 ( +0.11%) [ +0.00% +0.11% +0.03% / +0.11% +0.77% +0.71%] index_add_ perm_sorted : Elapsed 0.036 ms (3.637 ms / 100) 3.512 -> 3.515 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.63% +0.65%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.512 ms / 100) 5.486 -> 5.488 ( +0.04%) [ +0.05% +0.00% +0.18% / +0.04% +0.13% +0.07%] index_select const : Elapsed 0.055 ms (5.489 ms / 100) 5.512 -> 5.503 ( -0.16%) [ +0.00% +0.00% +0.02% / -0.05% -0.09% -0.16%] index_select wrap : Elapsed 0.055 ms (5.512 ms / 100) 5.509 -> 5.504 ( -0.09%) [ +0.04% +0.00% +0.07% / -0.09% -0.05% +0.04%] index_select linear : Elapsed 0.055 ms (5.511 ms / 100) 5.505 -> 5.507 ( +0.04%) [ +0.02% +0.11% +0.00% / +0.04% +0.13% +0.24%] index_select reverse : Elapsed 0.055 ms (5.506 ms / 100) 5.488 -> 5.491 ( +0.05%) [ +0.00% +0.07% +0.11% / +0.07% +0.05% +0.09%] index_select skip64 : Elapsed 0.055 ms (5.488 ms / 100) 5.488 -> 5.492 ( +0.07%) [ +0.02% +0.00% +0.00% / +0.07% +0.18% +0.07%] index_select skip256 : Elapsed 0.055 ms (5.489 ms / 100) 5.505 -> 5.509 ( +0.07%) [ +0.04% +0.05% +0.00% / +0.07% +0.20% +0.11%] index_select spread : Elapsed 0.055 ms (5.507 ms / 100) 5.508 -> 5.512 ( +0.07%) [ +0.16% +0.15% +0.00% / +0.07% +0.07% +0.09%] index_select strided 3 : Elapsed 0.055 ms (5.517 ms / 100) 5.511 -> 5.509 ( -0.04%) [ +0.00% +0.09% +0.02% / +0.05% -0.02% -0.04%] index_select strided 5 : Elapsed 0.055 ms (5.511 ms / 100) 5.507 -> 5.509 ( +0.04%) [ +0.05% +0.00% +0.13% / +0.13% +0.04% +0.05%] index_select strided 7 : Elapsed 0.055 ms (5.510 ms / 100) 5.491 -> 5.488 ( -0.05%) [ +0.09% +0.00% +0.00% / +0.07% -0.05% -0.05%] index_select strided 8 : Elapsed 0.055 ms (5.496 ms / 100) 5.503 -> 5.506 ( +0.05%) [ +0.07% +0.00% +0.13% / +0.07% +0.05% +0.09%] index_select random : Elapsed 0.055 ms (5.507 ms / 100) 5.505 -> 5.506 ( +0.02%) [ +0.18% +0.00% +0.02% / +0.02% +0.05% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.515 ms / 100) out_shape = [40, 16, 20, 5] in_shape = [40, 16, 4, 5] idx_dim = 2 B = [40, 16, 20, 5] (stride (100, 4000, 5, 1)) A = [40, 16, 4, 5] (stride (64, 4, 1, 2560)) dim = 2 2.038 -> 2.039 ( +0.05%) [ +0.39% +0.20% +0.00% / +0.59% +0.05% +0.64%] index_add_ linear : Elapsed 0.020 ms (2.046 ms / 100) 1.981 -> 1.993 ( +0.61%) [ +0.61% +0.50% +0.00% / +0.91% +0.61% +0.81%] index_copy_ linear : Elapsed 0.020 ms (1.993 ms / 100) 2.040 -> 2.038 ( -0.10%) [ +0.34% +0.00% +0.10% / -0.10% +0.10% +0.34%] index_add_ reverse : Elapsed 0.020 ms (2.047 ms / 100) 1.988 -> 1.983 ( -0.25%) [ +0.05% +0.20% +0.00% / -0.25% +0.25% +0.00%] index_copy_ reverse : Elapsed 0.020 ms (1.989 ms / 100) 2.026 -> 2.031 ( +0.25%) [ +0.00% +0.39% +0.25% / +0.25% +0.39% +0.59%] index_add_ spread : Elapsed 0.020 ms (2.026 ms / 100) 1.997 -> 1.997 ( +0.00%) [ +0.00% +0.30% +0.35% / +0.00% +0.70% +0.95%] index_copy_ spread : Elapsed 0.020 ms (1.997 ms / 100) 2.041 -> 2.048 ( +0.34%) [ +0.00% +0.59% +0.24% / +0.34% +0.64% +0.69%] index_add_ strided 3 : Elapsed 0.020 ms (2.041 ms / 100) 2.014 -> 2.018 ( +0.20%) [ +0.00% +0.55% +0.15% / +0.20% +0.60% +0.50%] index_copy_ strided 3 : Elapsed 0.020 ms (2.014 ms / 100) 2.018 -> 2.019 ( +0.05%) [ +0.35% +0.20% +0.00% / +0.05% +0.20% +0.20%] index_add_ strided 7 : Elapsed 0.020 ms (2.025 ms / 100) 1.987 -> 1.989 ( +0.10%) [ +0.10% +0.05% +0.00% / +0.10% +0.25% +0.35%] index_copy_ strided 7 : Elapsed 0.020 ms (1.989 ms / 100) 2.038 -> 2.046 ( +0.39%) [ +0.00% +0.15% +0.25% / +0.39% +0.54% +0.44%] index_add_ perm : Elapsed 0.020 ms (2.038 ms / 100) 2.012 -> 2.011 ( -0.05%) [ +0.00% +0.10% +0.00% / -0.05% +0.10% +0.05%] index_copy_ perm : Elapsed 0.020 ms (2.012 ms / 100) 2.030 -> 2.038 ( +0.39%) [ +0.10% +0.30% +0.00% / +0.39% +0.49% +0.64%] index_add_ perm_sorted : Elapsed 0.020 ms (2.032 ms / 100) 2.001 -> 2.003 ( +0.10%) [ +0.00% +0.05% +0.05% / +0.10% +0.45% +0.70%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.001 ms / 100) 8.865 -> 8.873 ( +0.09%) [ +0.00% +0.02% +0.08% / +0.17% +0.26% +0.09%] index_select const : Elapsed 0.089 ms (8.865 ms / 100) 8.853 -> 8.874 ( +0.24%) [ +0.19% +0.00% +0.27% / +0.24% +0.34% +0.26%] index_select wrap : Elapsed 0.089 ms (8.870 ms / 100) 8.862 -> 8.876 ( +0.16%) [ +0.07% +0.00% +0.06% / +0.16% +0.38% +0.25%] index_select linear : Elapsed 0.089 ms (8.868 ms / 100) 8.853 -> 8.866 ( +0.15%) [ +0.35% +0.24% +0.00% / +0.15% +0.25% +0.45%] index_select reverse : Elapsed 0.089 ms (8.884 ms / 100) 8.859 -> 8.871 ( +0.14%) [ +0.21% +0.00% +0.08% / +0.14% +0.29% +0.40%] index_select skip64 : Elapsed 0.089 ms (8.878 ms / 100) 8.859 -> 8.868 ( +0.10%) [ +0.00% +0.16% +0.10% / +0.10% +0.23% +0.21%] index_select skip256 : Elapsed 0.089 ms (8.859 ms / 100) 8.858 -> 8.872 ( +0.16%) [ +0.00% +0.20% +0.11% / +0.16% +0.17% +0.19%] index_select spread : Elapsed 0.089 ms (8.858 ms / 100) 8.859 -> 8.855 ( -0.05%) [ +0.00% +0.17% +0.23% / -0.05% +0.23% +0.14%] index_select strided 3 : Elapsed 0.089 ms (8.859 ms / 100) 8.860 -> 8.856 ( -0.05%) [ +0.03% +0.00% +0.15% / -0.05% +0.07% +0.29%] index_select random : Elapsed 0.089 ms (8.863 ms / 100) 8.851 -> 8.858 ( +0.08%) [ +0.19% +0.17% +0.00% / +0.08% +0.27% +0.51%] index_select random_sorted : Elapsed 0.089 ms (8.868 ms / 100) B = [40, 16, 20, 5] (stride (5, 4000, 200, 1)) A = [40, 16, 4, 5] (stride (4, 160, 1, 2560)) dim = 2 2.254 -> 2.252 ( -0.09%) [ +0.00% +0.00% +0.18% / -0.09% +0.35% +0.35%] index_add_ linear : Elapsed 0.023 ms (2.254 ms / 100) 2.187 -> 2.190 ( +0.14%) [ +0.23% +0.00% +0.27% / +0.14% +0.37% +0.50%] index_copy_ linear : Elapsed 0.022 ms (2.192 ms / 100) 2.249 -> 2.254 ( +0.22%) [ +0.13% +0.36% +0.00% / +0.22% +0.53% +0.49%] index_add_ reverse : Elapsed 0.023 ms (2.252 ms / 100) 2.188 -> 2.193 ( +0.23%) [ +0.09% +0.18% +0.00% / +0.23% +0.23% +0.32%] index_copy_ reverse : Elapsed 0.022 ms (2.190 ms / 100) 2.249 -> 2.251 ( +0.09%) [ +0.00% +0.31% +0.13% / +0.09% +0.58% +0.67%] index_add_ spread : Elapsed 0.022 ms (2.249 ms / 100) 2.185 -> 2.187 ( +0.09%) [ +0.14% +0.05% +0.00% / +0.09% +0.55% +0.50%] index_copy_ spread : Elapsed 0.022 ms (2.188 ms / 100) 2.253 -> 2.254 ( +0.04%) [ +0.09% +0.18% +0.00% / +0.04% +0.27% +0.40%] index_add_ strided 3 : Elapsed 0.023 ms (2.255 ms / 100) 2.190 -> 2.189 ( -0.05%) [ +0.05% +0.09% +0.00% / -0.05% +0.32% +0.27%] index_copy_ strided 3 : Elapsed 0.022 ms (2.191 ms / 100) 2.249 -> 2.254 ( +0.22%) [ +0.13% +0.00% +0.00% / +0.22% +0.36% +0.49%] index_add_ strided 7 : Elapsed 0.023 ms (2.252 ms / 100) 2.186 -> 2.191 ( +0.23%) [ +0.27% +0.09% +0.00% / +0.23% +0.69% +0.32%] index_copy_ strided 7 : Elapsed 0.022 ms (2.192 ms / 100) 2.252 -> 2.254 ( +0.09%) [ +0.18% +0.04% +0.00% / +0.09% +0.49% +0.67%] index_add_ perm : Elapsed 0.023 ms (2.256 ms / 100) 2.186 -> 2.190 ( +0.18%) [ +0.14% +0.18% +0.00% / +0.18% +0.27% +0.50%] index_copy_ perm : Elapsed 0.022 ms (2.189 ms / 100) 2.246 -> 2.252 ( +0.27%) [ +0.36% +0.27% +0.00% / +0.27% +0.62% +0.67%] index_add_ perm_sorted : Elapsed 0.023 ms (2.254 ms / 100) 2.182 -> 2.190 ( +0.37%) [ +0.14% +0.23% +0.00% / +0.37% +0.55% +0.55%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.185 ms / 100) 9.236 -> 9.235 ( -0.01%) [ +0.00% +0.18% +0.15% / -0.01% +0.34% +0.39%] index_select const : Elapsed 0.092 ms (9.236 ms / 100) 9.233 -> 9.254 ( +0.23%) [ +0.02% +0.03% +0.00% / +0.26% +0.26% +0.23%] index_select wrap : Elapsed 0.092 ms (9.235 ms / 100) 9.233 -> 9.231 ( -0.02%) [ +0.37% +0.04% +0.00% / -0.02% +0.49% +0.29%] index_select linear : Elapsed 0.093 ms (9.267 ms / 100) 9.239 -> 9.230 ( -0.10%) [ +0.00% +0.21% +0.08% / -0.10% +0.45% +0.22%] index_select reverse : Elapsed 0.092 ms (9.239 ms / 100) 9.236 -> 9.243 ( +0.08%) [ +0.15% +0.00% +0.13% / +0.08% +0.48% +0.35%] index_select skip64 : Elapsed 0.092 ms (9.250 ms / 100) 9.244 -> 9.255 ( +0.12%) [ +0.06% +0.08% +0.00% / +0.14% +0.12% +0.29%] index_select skip256 : Elapsed 0.092 ms (9.250 ms / 100) 9.249 -> 9.256 ( +0.08%) [ +0.24% +0.04% +0.00% / +0.08% +0.28% +0.36%] index_select spread : Elapsed 0.093 ms (9.271 ms / 100) 9.238 -> 9.243 ( +0.05%) [ +0.25% +0.24% +0.00% / +0.05% +0.35% +0.57%] index_select strided 3 : Elapsed 0.093 ms (9.261 ms / 100) 9.229 -> 9.248 ( +0.21%) [ +0.12% +0.00% +0.15% / +0.21% +0.42% +0.41%] index_select random : Elapsed 0.092 ms (9.240 ms / 100) 9.243 -> 9.237 ( -0.06%) [ +0.02% +0.05% +0.00% / -0.06% +0.29% +0.25%] index_select random_sorted : Elapsed 0.092 ms (9.245 ms / 100) B = [40, 16, 20, 5] (stride (1, 4000, 200, 40)) A = [40, 16, 4, 5] (stride (64, 4, 1, 2560)) dim = 2 2.020 -> 2.019 ( -0.05%) [ +0.45% +0.79% +0.00% / -0.05% +0.25% +0.45%] index_add_ linear : Elapsed 0.020 ms (2.029 ms / 100) 1.984 -> 1.982 ( -0.10%) [ +0.50% +0.25% +0.00% / -0.05% -0.05% -0.10%] index_copy_ linear : Elapsed 0.020 ms (1.994 ms / 100) 2.022 -> 2.024 ( +0.10%) [ +0.10% +0.15% +0.00% / +0.15% +0.10% +0.10%] index_add_ reverse : Elapsed 0.020 ms (2.024 ms / 100) 1.981 -> 1.986 ( +0.25%) [ +0.25% +0.50% +0.00% / +0.35% +0.35% +0.25%] index_copy_ reverse : Elapsed 0.020 ms (1.986 ms / 100) 2.022 -> 2.029 ( +0.35%) [ +0.49% +0.20% +0.00% / +0.49% +0.49% +0.35%] index_add_ spread : Elapsed 0.020 ms (2.032 ms / 100) 1.978 -> 1.987 ( +0.46%) [ +0.40% +0.00% +0.35% / +0.51% +0.46% +0.46%] index_copy_ spread : Elapsed 0.020 ms (1.986 ms / 100) 2.023 -> 2.026 ( +0.15%) [ +0.64% +0.49% +0.00% / +0.15% +0.25% +0.35%] index_add_ strided 3 : Elapsed 0.020 ms (2.036 ms / 100) 1.983 -> 1.984 ( +0.05%) [ +0.35% +0.30% +0.00% / +0.10% +0.05% +0.30%] index_copy_ strided 3 : Elapsed 0.020 ms (1.990 ms / 100) 2.026 -> 2.019 ( -0.35%) [ +0.05% +0.00% +0.00% / +0.05% -0.25% -0.35%] index_add_ strided 7 : Elapsed 0.020 ms (2.027 ms / 100) 1.984 -> 1.979 ( -0.25%) [ +0.05% +0.00% +0.05% / +0.00% -0.10% -0.25%] index_copy_ strided 7 : Elapsed 0.020 ms (1.985 ms / 100) 2.019 -> 2.018 ( -0.05%) [ +0.25% +0.64% +0.00% / +0.20% +0.54% -0.05%] index_add_ perm : Elapsed 0.020 ms (2.024 ms / 100) 1.980 -> 1.980 ( +0.00%) [ +0.10% +0.10% +0.00% / +0.05% +0.35% +0.00%] index_copy_ perm : Elapsed 0.020 ms (1.982 ms / 100) 2.020 -> 2.024 ( +0.20%) [ +0.00% +0.45% +0.40% / +0.25% +0.59% +0.20%] index_add_ perm_sorted : Elapsed 0.020 ms (2.020 ms / 100) 1.981 -> 1.981 ( +0.00%) [ +0.00% +0.20% +0.25% / +0.30% +0.10% +0.00%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.981 ms / 100) 8.805 -> 8.833 ( +0.32%) [ +0.00% +0.12% +0.15% / +0.33% +0.32% +0.36%] index_select const : Elapsed 0.088 ms (8.805 ms / 100) 8.816 -> 8.810 ( -0.07%) [ +0.00% +0.15% +0.03% / -0.07% +0.11% +0.15%] index_select wrap : Elapsed 0.088 ms (8.816 ms / 100) 8.819 -> 8.823 ( +0.05%) [ +0.00% +0.18% +0.11% / +0.05% +0.32% +0.16%] index_select linear : Elapsed 0.088 ms (8.819 ms / 100) 8.801 -> 8.814 ( +0.15%) [ +0.00% +0.15% +0.14% / +0.15% +0.25% +0.25%] index_select reverse : Elapsed 0.088 ms (8.801 ms / 100) 8.830 -> 8.824 ( -0.07%) [ +0.03% +0.01% +0.00% / -0.07% +0.08% -0.06%] index_select skip64 : Elapsed 0.088 ms (8.833 ms / 100) 8.808 -> 8.822 ( +0.16%) [ +0.09% +0.00% +0.22% / +0.32% +0.37% +0.16%] index_select skip256 : Elapsed 0.088 ms (8.816 ms / 100) 8.815 -> 8.833 ( +0.20%) [ +0.00% +0.29% +0.23% / +0.20% +0.20% +0.22%] index_select spread : Elapsed 0.088 ms (8.815 ms / 100) 8.815 -> 8.818 ( +0.03%) [ +0.26% +0.01% +0.00% / +0.08% +0.07% +0.03%] index_select strided 3 : Elapsed 0.088 ms (8.838 ms / 100) 8.806 -> 8.821 ( +0.17%) [ +0.17% +0.00% +0.39% / +0.17% +0.40% +0.37%] index_select random : Elapsed 0.088 ms (8.821 ms / 100) 8.814 -> 8.808 ( -0.07%) [ +0.08% +0.00% +0.27% / -0.07% +0.11% +0.33%] index_select random_sorted : Elapsed 0.088 ms (8.821 ms / 100) B = [40, 16, 20, 5] (stride (80, 5, 3200, 1)) A = [40, 16, 4, 5] (stride (1, 40, 640, 2560)) dim = 2 2.128 -> 2.130 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.33% +0.28%] index_add_ linear : Elapsed 0.021 ms (2.128 ms / 100) 2.062 -> 2.069 ( +0.34%) [ +0.00% +0.15% +0.05% / +0.34% +0.34% +0.58%] index_copy_ linear : Elapsed 0.021 ms (2.062 ms / 100) 2.120 -> 2.123 ( +0.14%) [ +0.24% +0.24% +0.00% / +0.14% +0.75% +0.57%] index_add_ reverse : Elapsed 0.021 ms (2.125 ms / 100) 2.059 -> 2.065 ( +0.29%) [ +0.05% +0.49% +0.00% / +0.29% +0.68% +0.53%] index_copy_ reverse : Elapsed 0.021 ms (2.060 ms / 100) 2.125 -> 2.124 ( -0.05%) [ +0.00% +0.24% +0.05% / -0.05% +0.52% +0.42%] index_add_ spread : Elapsed 0.021 ms (2.125 ms / 100) 2.063 -> 2.064 ( +0.05%) [ +0.19% +0.29% +0.00% / +0.05% +0.92% +0.53%] index_copy_ spread : Elapsed 0.021 ms (2.067 ms / 100) 2.127 -> 2.127 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.38% +0.47%] index_add_ strided 3 : Elapsed 0.021 ms (2.127 ms / 100) 2.062 -> 2.064 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.48% +0.82%] index_copy_ strided 3 : Elapsed 0.021 ms (2.063 ms / 100) 2.123 -> 2.125 ( +0.09%) [ +0.00% +0.14% +0.00% / +0.09% +0.61% +0.66%] index_add_ strided 7 : Elapsed 0.021 ms (2.123 ms / 100) 2.057 -> 2.060 ( +0.15%) [ +0.15% +0.05% +0.00% / +0.15% +0.92% +0.92%] index_copy_ strided 7 : Elapsed 0.021 ms (2.060 ms / 100) 2.125 -> 2.125 ( +0.00%) [ +0.19% +0.00% +0.00% / +0.00% +0.33% +0.24%] index_add_ perm : Elapsed 0.021 ms (2.129 ms / 100) 2.063 -> 2.066 ( +0.15%) [ +0.19% +0.15% +0.00% / +0.15% +0.19% +0.39%] index_copy_ perm : Elapsed 0.021 ms (2.067 ms / 100) 2.125 -> 2.131 ( +0.28%) [ +0.19% +0.00% +0.09% / +0.28% +0.38% +0.33%] index_add_ perm_sorted : Elapsed 0.021 ms (2.129 ms / 100) 2.062 -> 2.070 ( +0.39%) [ +0.29% +0.29% +0.00% / +0.44% +0.39% +0.39%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.068 ms / 100) 8.679 -> 8.677 ( -0.02%) [ +0.00% +0.30% +0.16% / +0.15% -0.02% +0.08%] index_select const : Elapsed 0.087 ms (8.679 ms / 100) 8.721 -> 8.719 ( -0.02%) [ +0.37% +0.10% +0.00% / +0.28% -0.02% +0.06%] index_select wrap : Elapsed 0.088 ms (8.753 ms / 100) 8.726 -> 8.719 ( -0.08%) [ +0.00% +0.09% +0.00% / -0.01% -0.08% -0.01%] index_select linear : Elapsed 0.087 ms (8.726 ms / 100) 8.730 -> 8.705 ( -0.29%) [ +0.06% +0.00% +0.02% / -0.17% -0.21% -0.29%] index_select reverse : Elapsed 0.087 ms (8.735 ms / 100) 8.691 -> 8.694 ( +0.03%) [ +0.03% +0.00% +0.08% / +0.06% +0.07% +0.03%] index_select skip64 : Elapsed 0.087 ms (8.694 ms / 100) 8.684 -> 8.684 ( +0.00%) [ +0.00% +0.08% +0.07% / +0.18% +0.26% +0.00%] index_select skip256 : Elapsed 0.087 ms (8.684 ms / 100) 8.735 -> 8.730 ( -0.06%) [ +0.00% +0.06% +0.07% / -0.06% +0.06% -0.01%] index_select spread : Elapsed 0.087 ms (8.735 ms / 100) 8.728 -> 8.713 ( -0.17%) [ +0.11% +0.14% +0.00% / +0.32% +0.42% -0.17%] index_select strided 3 : Elapsed 0.087 ms (8.738 ms / 100) 8.730 -> 8.719 ( -0.13%) [ +0.07% +0.00% +0.36% / +0.18% +0.18% -0.13%] index_select random : Elapsed 0.087 ms (8.736 ms / 100) 8.729 -> 8.734 ( +0.06%) [ +0.00% +0.09% +0.09% / +0.13% +0.06% +0.14%] index_select random_sorted : Elapsed 0.087 ms (8.729 ms / 100) B = [40, 16, 20, 5] (stride (320, 1, 16, 12800)) A = [40, 16, 4, 5] (stride (16, 1, 3200, 640)) dim = 2 2.019 -> 2.024 ( +0.25%) [ +0.40% +0.15% +0.00% / +0.30% +0.45% +0.25%] index_add_ linear : Elapsed 0.020 ms (2.027 ms / 100) 1.970 -> 1.972 ( +0.10%) [ +0.25% +0.05% +0.00% / +0.25% +0.25% +0.10%] index_copy_ linear : Elapsed 0.020 ms (1.975 ms / 100) 2.015 -> 2.015 ( +0.00%) [ +0.30% +0.30% +0.00% / +0.00% +0.74% +0.50%] index_add_ reverse : Elapsed 0.020 ms (2.021 ms / 100) 1.966 -> 1.967 ( +0.05%) [ +0.00% +0.00% +0.10% / +0.05% +0.20% +0.20%] index_copy_ reverse : Elapsed 0.020 ms (1.966 ms / 100) 2.008 -> 2.007 ( -0.05%) [ +0.35% +0.20% +0.00% / -0.05% +0.35% +0.35%] index_add_ spread : Elapsed 0.020 ms (2.015 ms / 100) 1.961 -> 1.958 ( -0.15%) [ +0.00% +0.00% +0.00% / +0.10% +0.10% -0.15%] index_copy_ spread : Elapsed 0.020 ms (1.961 ms / 100) 2.016 -> 2.018 ( +0.10%) [ +0.00% +0.10% +0.25% / +0.10% +0.50% +0.45%] index_add_ strided 3 : Elapsed 0.020 ms (2.016 ms / 100) 1.966 -> 1.968 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.61% +0.10%] index_copy_ strided 3 : Elapsed 0.020 ms (1.967 ms / 100) 2.020 -> 2.017 ( -0.15%) [ +0.15% +0.35% +0.00% / +0.10% -0.15% +0.00%] index_add_ strided 7 : Elapsed 0.020 ms (2.023 ms / 100) 1.972 -> 1.967 ( -0.25%) [ +0.00% +0.05% +0.15% / +0.10% -0.25% -0.10%] index_copy_ strided 7 : Elapsed 0.020 ms (1.972 ms / 100) 2.017 -> 2.014 ( -0.15%) [ +0.00% +0.05% +0.00% / -0.15% +0.05% +0.30%] index_add_ perm : Elapsed 0.020 ms (2.017 ms / 100) 1.964 -> 1.962 ( -0.10%) [ +0.20% +0.15% +0.00% / -0.05% +0.10% -0.10%] index_copy_ perm : Elapsed 0.020 ms (1.968 ms / 100) 2.017 -> 2.016 ( -0.05%) [ +0.20% +0.20% +0.00% / +0.10% -0.05% -0.05%] index_add_ perm_sorted : Elapsed 0.020 ms (2.021 ms / 100) 1.960 -> 1.961 ( +0.05%) [ +0.51% +0.71% +0.00% / +0.46% +0.05% +0.26%] index_copy_ perm_sorted : Elapsed 0.020 ms (1.970 ms / 100) 8.738 -> 8.744 ( +0.07%) [ +0.06% +0.00% +0.10% / +0.09% +0.11% +0.07%] index_select const : Elapsed 0.087 ms (8.743 ms / 100) 8.825 -> 8.807 ( -0.20%) [ +0.00% +0.02% +0.07% / -0.06% -0.09% -0.20%] index_select wrap : Elapsed 0.088 ms (8.825 ms / 100) 8.779 -> 8.781 ( +0.02%) [ +0.22% +0.09% +0.00% / +0.02% +0.27% +0.10%] index_select linear : Elapsed 0.088 ms (8.798 ms / 100) 8.796 -> 8.768 ( -0.32%) [ +0.06% +0.00% +0.06% / +0.09% -0.32% -0.28%] index_select reverse : Elapsed 0.088 ms (8.801 ms / 100) 8.737 -> 8.744 ( +0.08%) [ +0.01% +0.00% +0.08% / +0.08% +0.31% +0.24%] index_select skip64 : Elapsed 0.087 ms (8.738 ms / 100) 8.729 -> 8.742 ( +0.15%) [ +0.21% +0.01% +0.00% / +0.15% +0.23% +0.30%] index_select skip256 : Elapsed 0.087 ms (8.747 ms / 100) 8.812 -> 8.794 ( -0.20%) [ +0.02% +0.01% +0.00% / -0.10% -0.08% -0.20%] index_select spread : Elapsed 0.088 ms (8.814 ms / 100) 8.817 -> 8.814 ( -0.03%) [ +0.00% +0.01% +0.11% / -0.01% -0.03% +0.09%] index_select strided 3 : Elapsed 0.088 ms (8.817 ms / 100) 8.815 -> 8.803 ( -0.14%) [ +0.15% +0.18% +0.00% / +0.09% -0.14% -0.09%] index_select random : Elapsed 0.088 ms (8.828 ms / 100) 8.789 -> 8.785 ( -0.05%) [ +0.03% +0.00% +0.16% / +0.11% +0.34% -0.05%] index_select random_sorted : Elapsed 0.088 ms (8.792 ms / 100) out_shape = [40, 16, 4, 20] in_shape = [40, 16, 4, 5] idx_dim = 3 B = [40, 16, 4, 20] (stride (1280, 1, 320, 16)) A = [40, 16, 4, 5] (stride (1, 200, 3200, 40)) dim = 3 1.952 -> 1.954 ( +0.10%) [ +0.00% +0.15% +0.20% / +0.10% +0.92% +0.97%] index_add_ linear : Elapsed 0.020 ms (1.952 ms / 100) 1.899 -> 1.899 ( +0.00%) [ +0.00% +0.05% +0.11% / +0.00% +1.11% +0.84%] index_copy_ linear : Elapsed 0.019 ms (1.899 ms / 100) 1.952 -> 1.955 ( +0.15%) [ +0.05% +0.00% +0.20% / +0.15% +0.87% +0.92%] index_add_ reverse : Elapsed 0.020 ms (1.953 ms / 100) 1.897 -> 1.897 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.74% +0.79%] index_copy_ reverse : Elapsed 0.019 ms (1.898 ms / 100) 1.961 -> 1.962 ( +0.05%) [ +0.00% +0.00% +0.15% / +0.05% +0.61% +0.66%] index_add_ spread : Elapsed 0.020 ms (1.961 ms / 100) 1.911 -> 1.911 ( +0.00%) [ +0.10% +0.00% +0.05% / +0.00% +0.63% +0.52%] index_copy_ spread : Elapsed 0.019 ms (1.913 ms / 100) 1.978 -> 1.986 ( +0.40%) [ +0.00% +0.15% +0.10% / +0.40% +0.91% +0.96%] index_add_ strided 3 : Elapsed 0.020 ms (1.978 ms / 100) 1.922 -> 1.924 ( +0.10%) [ +0.00% +0.16% +0.10% / +0.10% +0.94% +0.99%] index_copy_ strided 3 : Elapsed 0.019 ms (1.922 ms / 100) 1.949 -> 1.952 ( +0.15%) [ +0.10% +0.00% +0.15% / +0.15% +1.03% +0.97%] index_add_ strided 7 : Elapsed 0.020 ms (1.951 ms / 100) 1.894 -> 1.897 ( +0.16%) [ +0.00% +0.32% +0.37% / +0.16% +0.79% +0.90%] index_copy_ strided 7 : Elapsed 0.019 ms (1.894 ms / 100) 1.955 -> 1.955 ( +0.00%) [ +0.31% +0.00% +0.15% / +0.00% +0.87% +0.87%] index_add_ perm : Elapsed 0.020 ms (1.961 ms / 100) 1.904 -> 1.909 ( +0.26%) [ +0.00% +0.21% +0.16% / +0.26% +0.89% +0.95%] index_copy_ perm : Elapsed 0.019 ms (1.904 ms / 100) 1.968 -> 1.966 ( -0.10%) [ +0.00% +0.05% +0.15% / -0.10% +0.81% +0.76%] index_add_ perm_sorted : Elapsed 0.020 ms (1.968 ms / 100) 1.910 -> 1.914 ( +0.21%) [ +0.00% +0.05% +0.21% / +0.21% +1.05% +1.20%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.910 ms / 100) 8.531 -> 8.548 ( +0.20%) [ +0.14% +0.00% +0.02% / +0.29% +0.22% +0.20%] index_select const : Elapsed 0.085 ms (8.543 ms / 100) 8.552 -> 8.556 ( +0.05%) [ +0.00% +0.14% +0.30% / +0.05% +0.22% +0.22%] index_select wrap : Elapsed 0.086 ms (8.552 ms / 100) 8.552 -> 8.541 ( -0.13%) [ +0.00% +0.05% +0.32% / -0.06% +0.20% -0.13%] index_select linear : Elapsed 0.086 ms (8.552 ms / 100) 8.540 -> 8.546 ( +0.07%) [ +0.09% +0.30% +0.00% / +0.07% +0.42% +0.28%] index_select reverse : Elapsed 0.085 ms (8.548 ms / 100) 8.530 -> 8.529 ( -0.01%) [ +0.00% +0.35% +0.08% / +0.02% -0.01% +0.22%] index_select skip64 : Elapsed 0.085 ms (8.530 ms / 100) 8.526 -> 8.540 ( +0.16%) [ +0.13% +0.29% +0.00% / +0.22% +0.38% +0.16%] index_select skip256 : Elapsed 0.085 ms (8.537 ms / 100) 8.556 -> 8.571 ( +0.18%) [ +0.00% +0.37% +0.13% / +0.22% +0.18% +0.27%] index_select spread : Elapsed 0.086 ms (8.556 ms / 100) 8.549 -> 8.557 ( +0.09%) [ +0.16% +0.30% +0.00% / +0.15% +0.09% +0.18%] index_select strided 3 : Elapsed 0.086 ms (8.563 ms / 100) 8.564 -> 8.553 ( -0.13%) [ +0.08% +0.00% +0.02% / -0.13% +0.00% +0.15%] index_select random : Elapsed 0.086 ms (8.571 ms / 100) 8.564 -> 8.567 ( +0.04%) [ +0.00% +0.16% +0.11% / +0.50% +0.09% +0.04%] index_select random_sorted : Elapsed 0.086 ms (8.564 ms / 100) B = [40, 16, 4, 20] (stride (80, 3200, 1, 4)) A = [40, 16, 4, 5] (stride (1, 200, 3200, 40)) dim = 3 1.955 -> 1.958 ( +0.15%) [ +0.15% +0.05% +0.00% / +0.15% +0.56% +0.61%] index_add_ linear : Elapsed 0.020 ms (1.958 ms / 100) 1.901 -> 1.905 ( +0.21%) [ +0.00% +0.11% +0.05% / +0.21% +0.68% +0.53%] index_copy_ linear : Elapsed 0.019 ms (1.901 ms / 100) 1.944 -> 1.944 ( +0.00%) [ +0.00% +0.36% +0.26% / +0.00% +0.72% +0.62%] index_add_ reverse : Elapsed 0.019 ms (1.944 ms / 100) 1.893 -> 1.894 ( +0.05%) [ +0.11% +0.32% +0.00% / +0.05% +0.48% +0.32%] index_copy_ reverse : Elapsed 0.019 ms (1.895 ms / 100) 1.989 -> 1.992 ( +0.15%) [ +0.00% +0.00% +0.05% / +0.25% +0.25% +0.15%] index_add_ spread : Elapsed 0.020 ms (1.989 ms / 100) 1.939 -> 1.939 ( +0.00%) [ +0.36% +0.41% +0.00% / +0.00% +0.21% +0.46%] index_copy_ spread : Elapsed 0.019 ms (1.946 ms / 100) 1.992 -> 1.989 ( -0.15%) [ +0.00% +0.00% +0.00% / +0.10% +0.05% -0.15%] index_add_ strided 3 : Elapsed 0.020 ms (1.992 ms / 100) 1.935 -> 1.935 ( +0.00%) [ +0.00% +0.21% +0.21% / +0.00% +0.36% +0.26%] index_copy_ strided 3 : Elapsed 0.019 ms (1.935 ms / 100) 1.985 -> 1.985 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.35% +0.05% +0.00%] index_add_ strided 7 : Elapsed 0.020 ms (1.986 ms / 100) 1.928 -> 1.926 ( -0.10%) [ +0.00% +0.05% +0.00% / -0.10% +0.00% +0.05%] index_copy_ strided 7 : Elapsed 0.019 ms (1.928 ms / 100) 1.998 -> 1.999 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.10% +0.15%] index_add_ perm : Elapsed 0.020 ms (1.999 ms / 100) 1.943 -> 1.945 ( +0.10%) [ +0.00% +0.10% +0.15% / +0.21% +0.41% +0.10%] index_copy_ perm : Elapsed 0.019 ms (1.943 ms / 100) 1.983 -> 1.987 ( +0.20%) [ +0.15% +0.15% +0.00% / +0.20% +0.30% +0.30%] index_add_ perm_sorted : Elapsed 0.020 ms (1.986 ms / 100) 1.932 -> 1.937 ( +0.26%) [ +0.05% +0.21% +0.00% / +0.26% +0.26% +0.31%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.933 ms / 100) 8.555 -> 8.552 ( -0.04%) [ +0.16% +0.20% +0.00% / -0.04% +0.14% +0.20%] index_select const : Elapsed 0.086 ms (8.569 ms / 100) 8.577 -> 8.566 ( -0.13%) [ +0.16% +0.00% +0.09% / +0.15% -0.13% +0.24%] index_select wrap : Elapsed 0.086 ms (8.591 ms / 100) 8.579 -> 8.576 ( -0.03%) [ +0.31% +0.00% +0.27% / +0.03% +0.08% -0.03%] index_select linear : Elapsed 0.086 ms (8.606 ms / 100) 8.582 -> 8.579 ( -0.03%) [ +0.16% +0.00% +0.10% / -0.03% +0.09% +0.00%] index_select reverse : Elapsed 0.086 ms (8.596 ms / 100) 8.557 -> 8.553 ( -0.05%) [ +0.04% +0.12% +0.00% / -0.02% -0.05% +0.20%] index_select skip64 : Elapsed 0.086 ms (8.560 ms / 100) 8.553 -> 8.554 ( +0.01%) [ +0.02% +0.00% +0.21% / +0.13% +0.02% +0.01%] index_select skip256 : Elapsed 0.086 ms (8.555 ms / 100) 8.597 -> 8.583 ( -0.16%) [ +0.02% +0.08% +0.00% / +0.07% +0.07% -0.16%] index_select spread : Elapsed 0.086 ms (8.599 ms / 100) 8.572 -> 8.570 ( -0.02%) [ +0.29% +0.06% +0.00% / +0.09% +0.06% -0.02%] index_select strided 3 : Elapsed 0.086 ms (8.597 ms / 100) 8.580 -> 8.584 ( +0.05%) [ +0.00% +0.12% +0.23% / +0.10% +0.09% +0.05%] index_select random : Elapsed 0.086 ms (8.580 ms / 100) 8.584 -> 8.590 ( +0.07%) [ +0.07% +0.17% +0.00% / +0.09% +0.07% +0.10%] index_select random_sorted : Elapsed 0.086 ms (8.590 ms / 100) B = [40, 16, 4, 20] (stride (20, 800, 12800, 1)) A = [40, 16, 4, 5] (stride (320, 1, 16, 64)) dim = 3 1.935 -> 1.936 ( +0.05%) [ +0.00% +0.26% +0.10% / +0.05% +0.26% +0.47%] index_add_ linear : Elapsed 0.019 ms (1.935 ms / 100) 1.904 -> 1.906 ( +0.11%) [ +0.00% +0.05% +0.05% / +0.11% +0.21% +0.26%] index_copy_ linear : Elapsed 0.019 ms (1.904 ms / 100) 1.935 -> 1.943 ( +0.41%) [ +0.16% +0.21% +0.00% / +0.41% +0.62% +0.41%] index_add_ reverse : Elapsed 0.019 ms (1.938 ms / 100) 1.905 -> 1.905 ( +0.00%) [ +0.26% +0.00% +0.16% / +0.00% +0.21% +0.31%] index_copy_ reverse : Elapsed 0.019 ms (1.910 ms / 100) 1.958 -> 1.961 ( +0.15%) [ +0.26% +0.15% +0.00% / +0.15% +0.15% +0.31%] index_add_ spread : Elapsed 0.020 ms (1.963 ms / 100) 1.927 -> 1.932 ( +0.26%) [ +0.00% +0.26% +0.16% / +0.26% +0.57% +0.47%] index_copy_ spread : Elapsed 0.019 ms (1.927 ms / 100) 1.959 -> 1.958 ( -0.05%) [ +0.20% +0.00% +0.00% / -0.05% +0.15% +0.15%] index_add_ strided 3 : Elapsed 0.020 ms (1.963 ms / 100) 1.931 -> 1.934 ( +0.16%) [ +0.05% +0.00% +0.00% / +0.26% +0.31% +0.16%] index_copy_ strided 3 : Elapsed 0.019 ms (1.932 ms / 100) 1.960 -> 1.959 ( -0.05%) [ +0.10% +0.00% +0.05% / -0.05% +0.20% +0.15%] index_add_ strided 7 : Elapsed 0.020 ms (1.962 ms / 100) 1.935 -> 1.935 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.26% +0.16%] index_copy_ strided 7 : Elapsed 0.019 ms (1.936 ms / 100) 1.958 -> 1.958 ( +0.00%) [ +0.10% +0.15% +0.00% / +0.00% +0.10% +0.05%] index_add_ perm : Elapsed 0.020 ms (1.960 ms / 100) 1.928 -> 1.934 ( +0.31%) [ +0.31% +0.16% +0.00% / +0.31% +0.52% +0.47%] index_copy_ perm : Elapsed 0.019 ms (1.934 ms / 100) 1.958 -> 1.955 ( -0.15%) [ +0.05% +0.10% +0.00% / -0.15% +0.05% -0.05%] index_add_ perm_sorted : Elapsed 0.020 ms (1.959 ms / 100) 1.932 -> 1.937 ( +0.26%) [ +0.00% +0.00% +0.00% / +0.41% +0.26% +0.26%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.932 ms / 100) 8.581 -> 8.593 ( +0.14%) [ +0.19% +0.00% +0.05% / +0.24% +0.14% +0.21%] index_select const : Elapsed 0.086 ms (8.597 ms / 100) 8.633 -> 8.645 ( +0.14%) [ +0.20% +0.07% +0.00% / +0.14% +0.16% +0.29%] index_select wrap : Elapsed 0.086 ms (8.650 ms / 100) 8.616 -> 8.614 ( -0.02%) [ +0.05% +0.01% +0.00% / -0.02% +0.26% +0.19%] index_select linear : Elapsed 0.086 ms (8.620 ms / 100) 8.618 -> 8.637 ( +0.22%) [ +0.00% +0.23% +0.21% / +0.27% +0.27% +0.22%] index_select reverse : Elapsed 0.086 ms (8.618 ms / 100) 8.584 -> 8.596 ( +0.14%) [ +0.08% +0.00% +0.10% / +0.14% +0.24% +0.19%] index_select skip64 : Elapsed 0.086 ms (8.591 ms / 100) 8.582 -> 8.575 ( -0.08%) [ +0.00% +0.13% +0.16% / -0.08% +0.43% +0.09%] index_select skip256 : Elapsed 0.086 ms (8.582 ms / 100) 8.605 -> 8.603 ( -0.02%) [ +0.00% +0.19% +0.09% / -0.02% +0.22% +0.36%] index_select spread : Elapsed 0.086 ms (8.605 ms / 100) 8.630 -> 8.620 ( -0.12%) [ +0.09% +0.00% +0.06% / -0.12% +0.35% +0.29%] index_select strided 3 : Elapsed 0.086 ms (8.638 ms / 100) 8.623 -> 8.629 ( +0.07%) [ +0.38% +0.00% +0.17% / +0.07% +0.50% +0.35%] index_select random : Elapsed 0.087 ms (8.656 ms / 100) 8.616 -> 8.617 ( +0.01%) [ +0.09% +0.00% +0.03% / +0.01% +0.30% +0.34%] index_select random_sorted : Elapsed 0.086 ms (8.624 ms / 100) B = [40, 16, 4, 20] (stride (1, 40, 640, 2560)) dim = 3 fill_cnt = 5 0.946 -> 0.945 ( -0.11%) [ +0.00% +0.11% +0.00% / -0.11% +0.53% +0.63%] index_fill_ const : Elapsed 0.009 ms (0.946 ms / 100) 0.948 -> 0.949 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.21% +0.42%] index_fill_ linear : Elapsed 0.009 ms (0.948 ms / 100) 0.947 -> 0.949 ( +0.21%) [ +0.32% +0.53% +0.00% / +0.21% +0.32% +0.53%] index_fill_ reverse : Elapsed 0.009 ms (0.950 ms / 100) 0.946 -> 0.947 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.53% +0.53%] index_fill_ skip64 : Elapsed 0.009 ms (0.946 ms / 100) 0.947 -> 0.948 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.63% +0.11%] index_fill_ skip256 : Elapsed 0.009 ms (0.948 ms / 100) 0.947 -> 0.948 ( +0.11%) [ +0.42% +0.32% +0.00% / +0.11% +0.53% +0.53%] index_fill_ spread : Elapsed 0.010 ms (0.951 ms / 100) 0.946 -> 0.947 ( +0.11%) [ +0.21% +0.00% +0.00% / +0.11% +0.53% +0.63%] index_fill_ strided 3 : Elapsed 0.009 ms (0.948 ms / 100) 0.946 -> 0.947 ( +0.11%) [ +0.11% +0.21% +0.00% / +0.11% +0.63% +0.42%] index_fill_ strided 5 : Elapsed 0.009 ms (0.947 ms / 100) 0.947 -> 0.949 ( +0.21%) [ +0.32% +0.21% +0.00% / +0.21% +0.42% +0.53%] index_fill_ strided 7 : Elapsed 0.009 ms (0.950 ms / 100) 0.947 -> 0.948 ( +0.11%) [ +0.21% +0.11% +0.00% / +0.11% +0.42% +0.53%] index_fill_ strided 8 : Elapsed 0.009 ms (0.949 ms / 100) 0.948 -> 0.949 ( +0.11%) [ +0.00% +0.00% +0.11% / +0.11% +0.42% +0.42%] index_fill_ strided 16 : Elapsed 0.009 ms (0.948 ms / 100) 0.946 -> 0.947 ( +0.11%) [ +0.11% +0.21% +0.00% / +0.11% +0.32% +0.32%] index_fill_ random : Elapsed 0.009 ms (0.947 ms / 100) 0.945 -> 0.946 ( +0.11%) [ +0.11% +0.32% +0.00% / +0.11% +0.63% +0.63%] index_fill_ random_sorted : Elapsed 0.009 ms (0.946 ms / 100) 0.947 -> 0.947 ( +0.00%) [ +0.32% +0.11% +0.00% / +0.00% +0.32% +0.53%] index_fill_ perm : Elapsed 0.010 ms (0.950 ms / 100) 0.947 -> 0.947 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.42% +0.53%] index_fill_ perm_sorted : Elapsed 0.009 ms (0.948 ms / 100) out_shape = [20, 16, 5, 4] in_shape = [40, 16, 5, 4] idx_dim = 0 B = [20, 16, 5, 4] (stride (64, 4, 1280, 1)) A = [40, 16, 5, 4] (stride (320, 1, 64, 16)) dim = 0 2.403 -> 2.409 ( +0.25%) [ +0.33% +0.25% +0.00% / +0.25% +0.29% +0.33%] index_select const : Elapsed 0.024 ms (2.411 ms / 100) 2.416 -> 2.414 ( -0.08%) [ +0.04% +0.00% +0.00% / -0.08% +0.08% -0.08%] index_select wrap : Elapsed 0.024 ms (2.417 ms / 100) 2.416 -> 2.417 ( +0.04%) [ +0.04% +0.17% +0.00% / +0.04% +0.12% +0.12%] index_select linear : Elapsed 0.024 ms (2.417 ms / 100) 2.415 -> 2.414 ( -0.04%) [ +0.00% +0.12% +0.17% / +0.04% +0.17% -0.04%] index_select reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.408 -> 2.406 ( -0.08%) [ +0.00% +0.17% +0.12% / +0.12% -0.08% +0.12%] index_select skip64 : Elapsed 0.024 ms (2.408 ms / 100) 2.408 -> 2.410 ( +0.08%) [ +0.00% +0.12% +0.12% / +0.08% +0.25% +0.25%] index_select skip256 : Elapsed 0.024 ms (2.408 ms / 100) 2.414 -> 2.415 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.21% +0.25%] index_select spread : Elapsed 0.024 ms (2.415 ms / 100) 2.416 -> 2.416 ( +0.00%) [ +0.04% +0.00% +0.12% / +0.00% +0.04% +0.12%] index_select strided 3 : Elapsed 0.024 ms (2.417 ms / 100) 2.410 -> 2.412 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.21% +0.08% +0.17%] index_select strided 5 : Elapsed 0.024 ms (2.412 ms / 100) 2.412 -> 2.413 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.04% +0.21% +0.21%] index_select strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.407 -> 2.409 ( +0.08%) [ +0.25% +0.12% +0.00% / +0.08% +0.37% +0.21%] index_select strided 8 : Elapsed 0.024 ms (2.413 ms / 100) 2.410 -> 2.411 ( +0.04%) [ +0.08% +0.00% +0.17% / +0.04% +0.08% +0.17%] index_select strided 16 : Elapsed 0.024 ms (2.412 ms / 100) 2.411 -> 2.409 ( -0.08%) [ +0.12% +0.08% +0.00% / -0.08% +0.33% +0.21%] index_select random : Elapsed 0.024 ms (2.414 ms / 100) 2.411 -> 2.414 ( +0.12%) [ +0.17% +0.00% +0.08% / +0.17% +0.17% +0.12%] index_select random_sorted : Elapsed 0.024 ms (2.415 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.12% +0.21% +0.00% / +0.21% +0.04% +0.21%] index_select perm : Elapsed 0.024 ms (2.416 ms / 100) 2.417 -> 2.413 ( -0.17%) [ +0.08% +0.12% +0.00% / +0.00% -0.17% -0.12%] index_select perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) B = [20, 16, 5, 4] (stride (1, 80, 1280, 20)) A = [40, 16, 5, 4] (stride (320, 4, 64, 1)) dim = 0 2.445 -> 2.443 ( -0.08%) [ +0.08% +0.08% +0.00% / +0.00% +0.00% -0.08%] index_select const : Elapsed 0.024 ms (2.447 ms / 100) 2.456 -> 2.450 ( -0.24%) [ +0.16% +0.12% +0.00% / +0.04% +0.04% -0.24%] index_select wrap : Elapsed 0.025 ms (2.460 ms / 100) 2.455 -> 2.453 ( -0.08%) [ +0.08% +0.20% +0.00% / -0.04% +0.04% -0.08%] index_select linear : Elapsed 0.025 ms (2.457 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.29% +0.08% +0.00% / +0.04% +0.20% +0.16%] index_select reverse : Elapsed 0.025 ms (2.459 ms / 100) 2.447 -> 2.445 ( -0.08%) [ +0.04% +0.12% +0.00% / -0.08% -0.04% +0.16%] index_select skip64 : Elapsed 0.024 ms (2.448 ms / 100) 2.445 -> 2.443 ( -0.08%) [ +0.25% +0.00% +0.04% / -0.08% +0.04% +0.12%] index_select skip256 : Elapsed 0.025 ms (2.451 ms / 100) 2.453 -> 2.456 ( +0.12%) [ +0.04% +0.08% +0.00% / +0.12% +0.12% +0.24%] index_select spread : Elapsed 0.025 ms (2.454 ms / 100) 2.452 -> 2.450 ( -0.08%) [ +0.00% +0.04% +0.08% / -0.08% +0.24% +0.12%] index_select strided 3 : Elapsed 0.025 ms (2.452 ms / 100) 2.450 -> 2.451 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.12% +0.08%] index_select strided 5 : Elapsed 0.025 ms (2.450 ms / 100) 2.452 -> 2.453 ( +0.04%) [ +0.12% +0.33% +0.00% / +0.04% +0.20% +0.12%] index_select strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.448 -> 2.446 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.08% +0.20%] index_select strided 8 : Elapsed 0.025 ms (2.450 ms / 100) 2.447 -> 2.448 ( +0.04%) [ +0.04% +0.00% +0.08% / +0.04% +0.16% +0.08%] index_select strided 16 : Elapsed 0.024 ms (2.448 ms / 100) 2.451 -> 2.454 ( +0.12%) [ +0.16% +0.00% +0.24% / +0.12% +0.16% +0.33%] index_select random : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.455 ( +0.20%) [ +0.20% +0.12% +0.00% / +0.20% +0.20% +0.20%] index_select random_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.456 -> 2.454 ( -0.08%) [ +0.08% +0.16% +0.00% / -0.08% -0.04% +0.00%] index_select perm : Elapsed 0.025 ms (2.458 ms / 100) 2.457 -> 2.454 ( -0.12%) [ +0.08% +0.00% +0.00% / -0.08% -0.08% -0.12%] index_select perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) B = [20, 16, 5, 4] (stride (1, 20, 1280, 320)) A = [40, 16, 5, 4] (stride (64, 4, 2560, 1)) dim = 0 2.394 -> 2.397 ( +0.13%) [ +0.04% +0.00% +0.25% / +0.13% +0.25% +0.29%] index_select const : Elapsed 0.024 ms (2.395 ms / 100) 2.410 -> 2.415 ( +0.21%) [ +0.33% +0.29% +0.00% / +0.21% +0.33% +0.25%] index_select wrap : Elapsed 0.024 ms (2.418 ms / 100) 2.416 -> 2.413 ( -0.12%) [ +0.08% +0.08% +0.00% / -0.12% +0.04% +0.17%] index_select linear : Elapsed 0.024 ms (2.418 ms / 100) 2.416 -> 2.416 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.04% +0.04%] index_select reverse : Elapsed 0.024 ms (2.416 ms / 100) 2.396 -> 2.397 ( +0.04%) [ +0.13% +0.00% +0.17% / +0.17% +0.04% +0.04%] index_select skip64 : Elapsed 0.024 ms (2.399 ms / 100) 2.395 -> 2.394 ( -0.04%) [ +0.04% +0.13% +0.00% / -0.04% +0.21% +0.29%] index_select skip256 : Elapsed 0.024 ms (2.396 ms / 100) 2.416 -> 2.413 ( -0.12%) [ +0.08% +0.17% +0.00% / +0.04% -0.12% +0.04%] index_select spread : Elapsed 0.024 ms (2.418 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.08% +0.00% +0.04% / +0.08% +0.08% +0.08%] index_select strided 3 : Elapsed 0.024 ms (2.416 ms / 100) 2.404 -> 2.404 ( +0.00%) [ +0.08% +0.17% +0.00% / +0.08% +0.21% +0.00%] index_select strided 5 : Elapsed 0.024 ms (2.406 ms / 100) 2.405 -> 2.410 ( +0.21%) [ +0.12% +0.17% +0.00% / +0.21% +0.58% +0.58%] index_select strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.399 -> 2.400 ( +0.04%) [ +0.00% +0.08% +0.04% / +0.17% +0.21% +0.04%] index_select strided 8 : Elapsed 0.024 ms (2.399 ms / 100) 2.400 -> 2.400 ( +0.00%) [ +0.00% +0.21% +0.13% / +0.00% +0.21% +0.08%] index_select strided 16 : Elapsed 0.024 ms (2.400 ms / 100) 2.411 -> 2.409 ( -0.08%) [ +0.04% +0.00% +0.04% / -0.08% +0.17% +0.33%] index_select random : Elapsed 0.024 ms (2.412 ms / 100) 2.413 -> 2.414 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.17% +0.08%] index_select random_sorted : Elapsed 0.024 ms (2.413 ms / 100) 2.418 -> 2.410 ( -0.33%) [ +0.08% +0.00% +0.17% / +0.04% -0.21% -0.33%] index_select perm : Elapsed 0.024 ms (2.420 ms / 100) 2.420 -> 2.407 ( -0.54%) [ +0.12% +0.12% +0.00% / -0.12% -0.54% -0.33%] index_select perm_sorted : Elapsed 0.024 ms (2.423 ms / 100) out_shape = [40, 20, 5, 4] in_shape = [40, 16, 5, 4] idx_dim = 1 B = [40, 20, 5, 4] (stride (4, 800, 160, 1)) A = [40, 16, 5, 4] (stride (320, 20, 1, 5)) dim = 1 3.944 -> 3.945 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.81% +0.79%] index_add_ linear : Elapsed 0.039 ms (3.945 ms / 100) 3.826 -> 3.830 ( +0.10%) [ +0.05% +0.13% +0.00% / +0.10% +1.05% +0.73%] index_copy_ linear : Elapsed 0.038 ms (3.828 ms / 100) 3.945 -> 3.945 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.89% +0.84%] index_add_ reverse : Elapsed 0.039 ms (3.946 ms / 100) 3.820 -> 3.822 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.94% +0.84%] index_copy_ reverse : Elapsed 0.038 ms (3.820 ms / 100) 3.948 -> 3.948 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.79% +0.73%] index_add_ spread : Elapsed 0.039 ms (3.949 ms / 100) 3.822 -> 3.822 ( +0.00%) [ +0.08% +0.05% +0.00% / +0.00% +0.73% +0.84%] index_copy_ spread : Elapsed 0.038 ms (3.825 ms / 100) 3.937 -> 3.938 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.81% +0.79%] index_add_ strided 3 : Elapsed 0.039 ms (3.939 ms / 100) 3.832 -> 3.838 ( +0.16%) [ +0.10% +0.10% +0.00% / +0.16% +0.99% +0.86%] index_copy_ strided 3 : Elapsed 0.038 ms (3.836 ms / 100) 3.940 -> 3.940 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.71% +0.69%] index_add_ strided 7 : Elapsed 0.039 ms (3.941 ms / 100) 3.836 -> 3.837 ( +0.03%) [ +0.00% +0.10% +0.03% / +0.03% +0.65% +0.68%] index_copy_ strided 7 : Elapsed 0.038 ms (3.836 ms / 100) 3.950 -> 3.949 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.71% +0.71%] index_add_ perm : Elapsed 0.040 ms (3.951 ms / 100) 3.824 -> 3.823 ( -0.03%) [ +0.00% +0.05% +0.00% / -0.03% +0.76% +0.63%] index_copy_ perm : Elapsed 0.038 ms (3.824 ms / 100) 3.949 -> 3.950 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.63% +0.66%] index_add_ perm_sorted : Elapsed 0.039 ms (3.950 ms / 100) 3.824 -> 3.825 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.68% +0.65%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.824 ms / 100) 5.557 -> 5.553 ( -0.07%) [ +0.04% +0.02% +0.00% / +0.07% -0.07% +0.05%] index_select const : Elapsed 0.056 ms (5.559 ms / 100) 5.571 -> 5.572 ( +0.02%) [ +0.05% +0.00% +0.05% / +0.22% +0.02% +0.07%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.570 -> 5.577 ( +0.13%) [ +0.09% +0.07% +0.00% / +0.14% +0.18% +0.13%] index_select linear : Elapsed 0.056 ms (5.575 ms / 100) 5.571 -> 5.575 ( +0.07%) [ +0.16% +0.14% +0.00% / +0.07% +0.09% +0.14%] index_select reverse : Elapsed 0.056 ms (5.580 ms / 100) 5.557 -> 5.553 ( -0.07%) [ +0.00% +0.09% +0.11% / -0.04% +0.09% -0.07%] index_select skip64 : Elapsed 0.056 ms (5.557 ms / 100) 5.554 -> 5.553 ( -0.02%) [ +0.05% +0.00% +0.09% / -0.02% +0.04% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.557 ms / 100) 5.571 -> 5.574 ( +0.05%) [ +0.14% +0.00% +0.13% / +0.07% +0.05% +0.11%] index_select spread : Elapsed 0.056 ms (5.579 ms / 100) 5.577 -> 5.570 ( -0.13%) [ +0.04% +0.00% +0.04% / +0.00% -0.13% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.579 ms / 100) 5.576 -> 5.575 ( -0.02%) [ +0.00% +0.02% +0.05% / +0.00% -0.02% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.576 ms / 100) 5.569 -> 5.574 ( +0.09%) [ +0.09% +0.02% +0.00% / +0.09% +0.22% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.574 ms / 100) 5.553 -> 5.557 ( +0.07%) [ +0.16% +0.07% +0.00% / +0.14% +0.07% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.567 -> 5.567 ( +0.00%) [ +0.13% +0.00% +0.09% / +0.20% +0.00% +0.13%] index_select random : Elapsed 0.056 ms (5.574 ms / 100) 5.568 -> 5.567 ( -0.02%) [ +0.02% +0.07% +0.00% / +0.04% -0.02% +0.16%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [40, 20, 5, 4] (stride (80, 1, 3200, 20)) A = [40, 16, 5, 4] (stride (1, 40, 640, 3200)) dim = 1 4.312 -> 4.313 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.74% +0.74%] index_add_ linear : Elapsed 0.043 ms (4.314 ms / 100) 4.155 -> 4.157 ( +0.05%) [ +0.02% +0.00% +0.02% / +0.05% +0.75% +0.75%] index_copy_ linear : Elapsed 0.042 ms (4.156 ms / 100) 4.279 -> 4.279 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.82% +0.82%] index_add_ reverse : Elapsed 0.043 ms (4.279 ms / 100) 4.122 -> 4.124 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.75% +0.70%] index_copy_ reverse : Elapsed 0.041 ms (4.123 ms / 100) 4.288 -> 4.289 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.86% +0.84%] index_add_ spread : Elapsed 0.043 ms (4.290 ms / 100) 4.134 -> 4.139 ( +0.12%) [ +0.02% +0.02% +0.00% / +0.12% +0.85% +0.82%] index_copy_ spread : Elapsed 0.041 ms (4.135 ms / 100) 4.309 -> 4.308 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.65% +0.63%] index_add_ strided 3 : Elapsed 0.043 ms (4.309 ms / 100) 4.140 -> 4.145 ( +0.12%) [ +0.17% +0.10% +0.00% / +0.12% +0.77% +0.77%] index_copy_ strided 3 : Elapsed 0.041 ms (4.147 ms / 100) 4.278 -> 4.279 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.84% +0.82%] index_add_ strided 7 : Elapsed 0.043 ms (4.280 ms / 100) 4.121 -> 4.123 ( +0.05%) [ +0.07% +0.05% +0.00% / +0.05% +0.78% +0.75%] index_copy_ strided 7 : Elapsed 0.041 ms (4.124 ms / 100) 4.312 -> 4.315 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.72% +0.72%] index_add_ perm : Elapsed 0.043 ms (4.312 ms / 100) 4.158 -> 4.158 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.67%] index_copy_ perm : Elapsed 0.042 ms (4.158 ms / 100) 4.304 -> 4.306 ( +0.05%) [ +0.07% +0.21% +0.00% / +0.05% +0.77% +0.77%] index_add_ perm_sorted : Elapsed 0.043 ms (4.307 ms / 100) 4.141 -> 4.143 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.87% +0.85%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.143 ms / 100) 5.556 -> 5.559 ( +0.05%) [ +0.23% +0.07% +0.00% / +0.05% +0.14% +0.23%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.580 -> 5.589 ( +0.16%) [ +0.23% +0.22% +0.00% / +0.16% +0.34% +0.32%] index_select wrap : Elapsed 0.056 ms (5.593 ms / 100) 5.582 -> 5.589 ( +0.13%) [ +0.00% +0.14% +0.07% / +0.13% +0.25% +0.20%] index_select linear : Elapsed 0.056 ms (5.582 ms / 100) 5.589 -> 5.584 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.04% +0.21%] index_select reverse : Elapsed 0.056 ms (5.589 ms / 100) 5.559 -> 5.561 ( +0.04%) [ +0.05% +0.11% +0.00% / +0.04% +0.09% +0.14%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.564 -> 5.559 ( -0.09%) [ +0.04% +0.00% +0.09% / -0.09% +0.13% +0.02%] index_select skip256 : Elapsed 0.056 ms (5.566 ms / 100) 5.583 -> 5.585 ( +0.04%) [ +0.13% +0.05% +0.00% / +0.04% +0.14% +0.09%] index_select spread : Elapsed 0.056 ms (5.590 ms / 100) 5.589 -> 5.588 ( -0.02%) [ +0.00% +0.11% +0.04% / -0.02% +0.13% +0.14%] index_select strided 3 : Elapsed 0.056 ms (5.589 ms / 100) 5.585 -> 5.588 ( +0.05%) [ +0.07% +0.00% +0.07% / +0.05% +0.09% +0.23%] index_select strided 5 : Elapsed 0.056 ms (5.589 ms / 100) 5.589 -> 5.582 ( -0.13%) [ +0.00% +0.09% +0.14% / -0.13% +0.09% +0.23%] index_select strided 7 : Elapsed 0.056 ms (5.589 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.02% +0.00% +0.13% / -0.02% +0.16% +0.22%] index_select strided 8 : Elapsed 0.056 ms (5.567 ms / 100) 5.584 -> 5.588 ( +0.07%) [ +0.00% +0.00% +0.04% / +0.07% +0.11% +0.21%] index_select random : Elapsed 0.056 ms (5.584 ms / 100) 5.583 -> 5.579 ( -0.07%) [ +0.13% +0.00% +0.07% / -0.07% +0.14% +0.09%] index_select random_sorted : Elapsed 0.056 ms (5.590 ms / 100) B = [40, 20, 5, 4] (stride (5, 200, 1, 4000)) A = [40, 16, 5, 4] (stride (80, 1, 16, 3200)) dim = 1 4.206 -> 4.207 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.71% +0.67%] index_add_ linear : Elapsed 0.042 ms (4.207 ms / 100) 4.059 -> 4.060 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.67% +0.64%] index_copy_ linear : Elapsed 0.041 ms (4.059 ms / 100) 4.214 -> 4.214 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.74% +0.74%] index_add_ reverse : Elapsed 0.042 ms (4.214 ms / 100) 4.066 -> 4.066 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.84% +0.76%] index_copy_ reverse : Elapsed 0.041 ms (4.066 ms / 100) 4.208 -> 4.210 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.71% +0.69%] index_add_ spread : Elapsed 0.042 ms (4.210 ms / 100) 4.065 -> 4.066 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.66% +0.64%] index_copy_ spread : Elapsed 0.041 ms (4.065 ms / 100) 4.198 -> 4.199 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.76% +0.71%] index_add_ strided 3 : Elapsed 0.042 ms (4.200 ms / 100) 4.051 -> 4.052 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.89% +0.74%] index_copy_ strided 3 : Elapsed 0.041 ms (4.053 ms / 100) 4.201 -> 4.203 ( +0.05%) [ +0.05% +0.07% +0.00% / +0.05% +0.71% +0.67%] index_add_ strided 7 : Elapsed 0.042 ms (4.203 ms / 100) 4.052 -> 4.054 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.74% +0.77%] index_copy_ strided 7 : Elapsed 0.041 ms (4.054 ms / 100) 4.212 -> 4.212 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.59% +0.59%] index_add_ perm : Elapsed 0.042 ms (4.214 ms / 100) 4.067 -> 4.068 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.59% +0.54%] index_copy_ perm : Elapsed 0.041 ms (4.068 ms / 100) 4.215 -> 4.215 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.64% +0.64%] index_add_ perm_sorted : Elapsed 0.042 ms (4.216 ms / 100) 4.066 -> 4.069 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.69% +0.76%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.066 ms / 100) 5.561 -> 5.559 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% -0.04% -0.04%] index_select const : Elapsed 0.056 ms (5.563 ms / 100) 5.559 -> 5.563 ( +0.07%) [ +0.00% +0.16% +0.02% / +0.23% +0.07% +0.16%] index_select wrap : Elapsed 0.056 ms (5.559 ms / 100) 5.565 -> 5.560 ( -0.09%) [ +0.07% +0.00% +0.11% / -0.02% -0.09% +0.00%] index_select linear : Elapsed 0.056 ms (5.569 ms / 100) 5.564 -> 5.568 ( +0.07%) [ +0.02% +0.09% +0.00% / +0.09% +0.25% +0.07%] index_select reverse : Elapsed 0.056 ms (5.565 ms / 100) 5.558 -> 5.551 ( -0.13%) [ +0.00% +0.13% +0.07% / -0.13% +0.13% +0.11%] index_select skip64 : Elapsed 0.056 ms (5.558 ms / 100) 5.551 -> 5.559 ( +0.14%) [ +0.00% +0.16% +0.22% / +0.14% +0.16% +0.18%] index_select skip256 : Elapsed 0.056 ms (5.551 ms / 100) 5.563 -> 5.559 ( -0.07%) [ +0.13% +0.00% +0.09% / +0.20% -0.07% +0.05%] index_select spread : Elapsed 0.056 ms (5.570 ms / 100) 5.569 -> 5.563 ( -0.11%) [ +0.11% +0.05% +0.00% / +0.02% -0.11% +0.02%] index_select strided 3 : Elapsed 0.056 ms (5.575 ms / 100) 5.566 -> 5.560 ( -0.11%) [ +0.00% +0.13% +0.00% / -0.11% +0.02% +0.07%] index_select strided 5 : Elapsed 0.056 ms (5.566 ms / 100) 5.563 -> 5.565 ( +0.04%) [ +0.07% +0.00% +0.00% / +0.11% +0.14% +0.04%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.00% +0.11% +0.14% / +0.20% +0.09% +0.05%] index_select strided 8 : Elapsed 0.056 ms (5.563 ms / 100) 5.567 -> 5.566 ( -0.02%) [ +0.04% +0.00% +0.00% / -0.02% +0.09% +0.09%] index_select random : Elapsed 0.056 ms (5.569 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.04% +0.09% +0.00% / +0.11% +0.00% -0.02%] index_select random_sorted : Elapsed 0.056 ms (5.568 ms / 100) out_shape = [40, 16, 20, 4] in_shape = [40, 16, 5, 4] idx_dim = 2 B = [40, 16, 20, 4] (stride (1280, 80, 4, 1)) A = [40, 16, 5, 4] (stride (64, 1, 2560, 16)) dim = 2 1.770 -> 1.768 ( -0.11%) [ +0.00% +0.06% +0.00% / -0.11% +0.68% +0.73%] index_add_ linear : Elapsed 0.018 ms (1.770 ms / 100) 1.717 -> 1.722 ( +0.29%) [ +0.00% +0.00% +0.06% / +0.29% +1.46% +1.22%] index_copy_ linear : Elapsed 0.017 ms (1.717 ms / 100) 1.772 -> 1.771 ( -0.06%) [ +0.00% +0.17% +0.11% / -0.06% +0.62% +0.51%] index_add_ reverse : Elapsed 0.018 ms (1.772 ms / 100) 1.728 -> 1.728 ( +0.00%) [ +0.00% +0.29% +0.29% / +0.00% +0.41% +0.58%] index_copy_ reverse : Elapsed 0.017 ms (1.728 ms / 100) 1.812 -> 1.811 ( -0.06%) [ +0.11% +0.00% +0.17% / -0.06% +0.39% +0.33%] index_add_ spread : Elapsed 0.018 ms (1.814 ms / 100) 1.766 -> 1.772 ( +0.34%) [ +0.11% +0.34% +0.00% / +0.34% +0.74% +0.79%] index_copy_ spread : Elapsed 0.018 ms (1.768 ms / 100) 1.812 -> 1.814 ( +0.11%) [ +0.28% +0.00% +0.06% / +0.11% +0.39% +0.50%] index_add_ strided 3 : Elapsed 0.018 ms (1.817 ms / 100) 1.770 -> 1.774 ( +0.23%) [ +0.11% +0.17% +0.00% / +0.23% +0.51% +0.90%] index_copy_ strided 3 : Elapsed 0.018 ms (1.772 ms / 100) 1.798 -> 1.798 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.17% +0.56%] index_add_ strided 7 : Elapsed 0.018 ms (1.798 ms / 100) 1.756 -> 1.755 ( -0.06%) [ +0.00% +0.17% +0.11% / -0.06% +0.57% +0.57%] index_copy_ strided 7 : Elapsed 0.018 ms (1.756 ms / 100) 1.791 -> 1.793 ( +0.11%) [ +0.00% +0.06% +0.00% / +0.11% +0.56% +0.56%] index_add_ perm : Elapsed 0.018 ms (1.791 ms / 100) 1.747 -> 1.751 ( +0.23%) [ +0.17% +0.06% +0.00% / +0.23% +0.74% +0.86%] index_copy_ perm : Elapsed 0.018 ms (1.750 ms / 100) 1.791 -> 1.794 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.39% +0.34%] index_add_ perm_sorted : Elapsed 0.018 ms (1.794 ms / 100) 1.745 -> 1.749 ( +0.23%) [ +0.00% +0.23% +0.34% / +0.23% +0.74% +0.80%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.745 ms / 100) 8.219 -> 8.220 ( +0.01%) [ +0.01% +0.00% +0.06% / +0.01% +0.18% +0.01%] index_select const : Elapsed 0.082 ms (8.220 ms / 100) 8.262 -> 8.272 ( +0.12%) [ +0.16% +0.00% +0.21% / +0.31% +0.12% +0.25%] index_select wrap : Elapsed 0.083 ms (8.275 ms / 100) 8.254 -> 8.255 ( +0.01%) [ +0.12% +0.13% +0.00% / +0.17% +0.01% +0.30%] index_select linear : Elapsed 0.083 ms (8.264 ms / 100) 8.260 -> 8.251 ( -0.11%) [ +0.15% +0.13% +0.00% / +0.16% +0.16% -0.11%] index_select reverse : Elapsed 0.083 ms (8.272 ms / 100) 8.219 -> 8.220 ( +0.01%) [ +0.00% +0.19% +0.27% / +0.37% +0.01% +0.11%] index_select skip64 : Elapsed 0.082 ms (8.219 ms / 100) 8.212 -> 8.214 ( +0.02%) [ +0.10% +0.30% +0.00% / +0.02% +0.11% +0.29%] index_select skip256 : Elapsed 0.082 ms (8.220 ms / 100) 8.248 -> 8.235 ( -0.16%) [ +0.18% +0.13% +0.00% / +0.13% -0.16% -0.06%] index_select spread : Elapsed 0.083 ms (8.263 ms / 100) 8.258 -> 8.255 ( -0.04%) [ +0.11% +0.08% +0.00% / +0.21% +0.24% -0.04%] index_select strided 3 : Elapsed 0.083 ms (8.267 ms / 100) 8.264 -> 8.263 ( -0.01%) [ +0.15% +0.00% +0.05% / -0.01% +0.31% +0.06%] index_select random : Elapsed 0.083 ms (8.276 ms / 100) 8.247 -> 8.245 ( -0.02%) [ +0.13% +0.16% +0.00% / +0.17% -0.02% +0.19%] index_select random_sorted : Elapsed 0.083 ms (8.258 ms / 100) B = [40, 16, 20, 4] (stride (1280, 1, 16, 320)) A = [40, 16, 5, 4] (stride (5, 200, 1, 3200)) dim = 2 1.953 -> 1.956 ( +0.15%) [ +0.26% +0.15% +0.00% / +0.15% +0.72% +0.56%] index_add_ linear : Elapsed 0.020 ms (1.958 ms / 100) 1.898 -> 1.900 ( +0.11%) [ +0.00% +0.11% +0.05% / +0.11% +0.47% +0.47%] index_copy_ linear : Elapsed 0.019 ms (1.898 ms / 100) 1.955 -> 1.952 ( -0.15%) [ +0.00% +0.10% +0.20% / -0.15% +0.46% +0.36%] index_add_ reverse : Elapsed 0.020 ms (1.955 ms / 100) 1.894 -> 1.893 ( -0.05%) [ +0.26% +0.00% +0.05% / -0.05% +0.79% +0.53%] index_copy_ reverse : Elapsed 0.019 ms (1.899 ms / 100) 1.968 -> 1.967 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.51% +0.36%] index_add_ spread : Elapsed 0.020 ms (1.968 ms / 100) 1.909 -> 1.909 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.31% +0.31%] index_copy_ spread : Elapsed 0.019 ms (1.910 ms / 100) 1.961 -> 1.963 ( +0.10%) [ +0.00% +0.15% +0.10% / +0.10% +0.92% +0.66%] index_add_ strided 3 : Elapsed 0.020 ms (1.961 ms / 100) 1.903 -> 1.903 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.00% +0.68% +0.79%] index_copy_ strided 3 : Elapsed 0.019 ms (1.905 ms / 100) 1.956 -> 1.959 ( +0.15%) [ +0.10% +0.31% +0.00% / +0.15% +0.77% +0.87%] index_add_ strided 7 : Elapsed 0.020 ms (1.958 ms / 100) 1.898 -> 1.902 ( +0.21%) [ +0.11% +0.26% +0.00% / +0.21% +1.26% +1.00%] index_copy_ strided 7 : Elapsed 0.019 ms (1.900 ms / 100) 1.959 -> 1.963 ( +0.20%) [ +0.10% +0.00% +0.26% / +0.20% +0.56% +0.46%] index_add_ perm : Elapsed 0.020 ms (1.961 ms / 100) 1.903 -> 1.904 ( +0.05%) [ +0.11% +0.05% +0.00% / +0.05% +0.26% +0.42%] index_copy_ perm : Elapsed 0.019 ms (1.905 ms / 100) 1.961 -> 1.960 ( -0.05%) [ +0.25% +0.15% +0.00% / -0.05% +0.51% +0.51%] index_add_ perm_sorted : Elapsed 0.020 ms (1.966 ms / 100) 1.902 -> 1.909 ( +0.37%) [ +0.00% +0.05% +0.05% / +0.37% +0.42% +0.53%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.902 ms / 100) 8.558 -> 8.554 ( -0.05%) [ +0.19% +0.00% +0.01% / -0.02% -0.05% +0.07%] index_select const : Elapsed 0.086 ms (8.574 ms / 100) 8.551 -> 8.554 ( +0.04%) [ +0.00% +0.37% +0.20% / +0.04% +0.07% +0.12%] index_select wrap : Elapsed 0.086 ms (8.551 ms / 100) 8.566 -> 8.548 ( -0.21%) [ +0.12% +0.00% +0.00% / -0.21% +0.27% +0.01%] index_select linear : Elapsed 0.086 ms (8.576 ms / 100) 8.558 -> 8.560 ( +0.02%) [ +0.00% +0.13% +0.18% / +0.02% +0.21% +0.11%] index_select reverse : Elapsed 0.086 ms (8.558 ms / 100) 8.551 -> 8.554 ( +0.04%) [ +0.07% +0.00% +0.18% / +0.04% +0.32% +0.22%] index_select skip64 : Elapsed 0.086 ms (8.557 ms / 100) 8.554 -> 8.563 ( +0.11%) [ +0.00% +0.20% +0.01% / +0.11% +0.27% +0.14%] index_select skip256 : Elapsed 0.086 ms (8.554 ms / 100) 8.559 -> 8.554 ( -0.06%) [ +0.14% +0.00% +0.04% / +0.15% -0.06% -0.05%] index_select spread : Elapsed 0.086 ms (8.571 ms / 100) 8.549 -> 8.561 ( +0.14%) [ +0.06% +0.32% +0.00% / +0.20% +0.34% +0.14%] index_select strided 3 : Elapsed 0.086 ms (8.554 ms / 100) 8.561 -> 8.561 ( +0.00%) [ +0.00% +0.05% +0.13% / +0.23% +0.00% +0.16%] index_select random : Elapsed 0.086 ms (8.561 ms / 100) 8.554 -> 8.554 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.19% +0.09% +0.00%] index_select random_sorted : Elapsed 0.086 ms (8.560 ms / 100) B = [40, 16, 20, 4] (stride (4, 3200, 160, 1)) A = [40, 16, 5, 4] (stride (1, 200, 40, 3200)) dim = 2 1.922 -> 1.936 ( +0.73%) [ +0.26% +0.00% +0.62% / +0.73% +2.45% +2.45%] index_add_ linear : Elapsed 0.019 ms (1.927 ms / 100) 1.874 -> 1.881 ( +0.37%) [ +0.05% +0.00% +0.37% / +0.37% +1.92% +1.65%] index_copy_ linear : Elapsed 0.019 ms (1.875 ms / 100) 1.926 -> 1.937 ( +0.57%) [ +0.05% +0.00% +0.47% / +0.57% +2.18% +2.02%] index_add_ reverse : Elapsed 0.019 ms (1.927 ms / 100) 1.877 -> 1.882 ( +0.27%) [ +0.00% +0.16% +0.43% / +0.27% +1.86% +1.86%] index_copy_ reverse : Elapsed 0.019 ms (1.877 ms / 100) 1.937 -> 1.935 ( -0.10%) [ +0.05% +0.00% +0.05% / -0.10% +0.77% +0.72%] index_add_ spread : Elapsed 0.019 ms (1.938 ms / 100) 1.888 -> 1.887 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +1.01% +0.69%] index_copy_ spread : Elapsed 0.019 ms (1.889 ms / 100) 1.945 -> 1.959 ( +0.72%) [ +0.15% +0.00% +0.21% / +0.72% +1.08% +1.23%] index_add_ strided 3 : Elapsed 0.019 ms (1.948 ms / 100) 1.891 -> 1.897 ( +0.32%) [ +0.00% +0.00% +0.26% / +0.32% +0.95% +0.95%] index_copy_ strided 3 : Elapsed 0.019 ms (1.891 ms / 100) 1.935 -> 1.941 ( +0.31%) [ +0.21% +0.26% +0.00% / +0.31% +0.62% +0.93%] index_add_ strided 7 : Elapsed 0.019 ms (1.939 ms / 100) 1.882 -> 1.886 ( +0.21%) [ +0.11% +0.00% +0.32% / +0.21% +0.64% +0.80%] index_copy_ strided 7 : Elapsed 0.019 ms (1.884 ms / 100) 1.931 -> 1.936 ( +0.26%) [ +0.05% +0.10% +0.00% / +0.26% +1.04% +0.88%] index_add_ perm : Elapsed 0.019 ms (1.932 ms / 100) 1.883 -> 1.884 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.74% +0.96%] index_copy_ perm : Elapsed 0.019 ms (1.883 ms / 100) 1.932 -> 1.942 ( +0.52%) [ +0.16% +0.00% +0.41% / +0.52% +1.29% +1.14%] index_add_ perm_sorted : Elapsed 0.019 ms (1.935 ms / 100) 1.886 -> 1.891 ( +0.27%) [ +0.05% +0.00% +0.21% / +0.27% +0.95% +0.90%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.887 ms / 100) 8.537 -> 8.531 ( -0.07%) [ +0.00% +0.00% +0.18% / -0.07% -0.05% +0.02%] index_select const : Elapsed 0.085 ms (8.537 ms / 100) 8.554 -> 8.554 ( +0.00%) [ +0.20% +0.06% +0.00% / +0.00% +0.34% +0.12%] index_select wrap : Elapsed 0.086 ms (8.571 ms / 100) 8.547 -> 8.558 ( +0.13%) [ +0.29% +0.19% +0.00% / +0.41% +0.48% +0.13%] index_select linear : Elapsed 0.086 ms (8.572 ms / 100) 8.530 -> 8.552 ( +0.26%) [ +0.40% +0.00% +0.63% / +0.26% +0.42% +0.57%] index_select reverse : Elapsed 0.086 ms (8.564 ms / 100) 8.523 -> 8.518 ( -0.06%) [ +0.25% +0.00% +0.11% / -0.06% +0.32% +0.25%] index_select skip64 : Elapsed 0.085 ms (8.544 ms / 100) 8.533 -> 8.532 ( -0.01%) [ +0.00% +0.00% +0.05% / -0.01% +0.25% +0.09%] index_select skip256 : Elapsed 0.085 ms (8.533 ms / 100) 8.566 -> 8.574 ( +0.09%) [ +0.15% +0.00% +0.05% / +0.29% +0.27% +0.09%] index_select spread : Elapsed 0.086 ms (8.579 ms / 100) 8.552 -> 8.569 ( +0.20%) [ +0.20% +0.00% +0.04% / +0.20% +0.28% +0.36%] index_select strided 3 : Elapsed 0.086 ms (8.569 ms / 100) 8.556 -> 8.556 ( +0.00%) [ +0.22% +0.11% +0.00% / +0.00% +0.36% +0.21%] index_select random : Elapsed 0.086 ms (8.575 ms / 100) 8.553 -> 8.582 ( +0.34%) [ +0.55% +0.00% +0.26% / +0.48% +0.34% +0.61%] index_select random_sorted : Elapsed 0.086 ms (8.600 ms / 100) B = [40, 16, 20, 4] (stride (1, 3200, 160, 40)) A = [40, 16, 5, 4] (stride (320, 20, 4, 1)) dim = 2 1.709 -> 1.710 ( +0.06%) [ +0.00% +0.18% +0.06% / +0.06% +1.35% +1.35%] index_add_ linear : Elapsed 0.017 ms (1.709 ms / 100) 1.663 -> 1.661 ( -0.12%) [ +0.00% +0.18% +0.00% / -0.12% +1.50% +1.50%] index_copy_ linear : Elapsed 0.017 ms (1.663 ms / 100) 1.705 -> 1.704 ( -0.06%) [ +0.12% +0.23% +0.00% / -0.06% +2.11% +2.29%] index_add_ reverse : Elapsed 0.017 ms (1.707 ms / 100) 1.662 -> 1.663 ( +0.06%) [ +0.30% +0.00% +0.00% / +0.06% +2.11% +1.87%] index_copy_ reverse : Elapsed 0.017 ms (1.667 ms / 100) 1.707 -> 1.710 ( +0.18%) [ +0.00% +0.35% +0.12% / +0.18% +0.94% +1.00%] index_add_ spread : Elapsed 0.017 ms (1.707 ms / 100) 1.662 -> 1.666 ( +0.24%) [ +0.12% +0.18% +0.00% / +0.24% +0.96% +1.14%] index_copy_ spread : Elapsed 0.017 ms (1.664 ms / 100) 1.700 -> 1.706 ( +0.35%) [ +0.35% +0.41% +0.00% / +0.35% +1.12% +1.29%] index_add_ strided 3 : Elapsed 0.017 ms (1.706 ms / 100) 1.661 -> 1.659 ( -0.12%) [ +0.18% +0.00% +0.00% / -0.12% +1.08% +1.20%] index_copy_ strided 3 : Elapsed 0.017 ms (1.664 ms / 100) 1.712 -> 1.713 ( +0.06%) [ +0.41% +0.12% +0.00% / +0.06% +0.29% +0.06%] index_add_ strided 7 : Elapsed 0.017 ms (1.719 ms / 100) 1.668 -> 1.669 ( +0.06%) [ +0.18% +0.00% +0.24% / +0.06% +0.30% +0.36%] index_copy_ strided 7 : Elapsed 0.017 ms (1.671 ms / 100) 1.704 -> 1.703 ( -0.06%) [ +0.06% +0.00% +0.23% / -0.06% +0.76% +0.82%] index_add_ perm : Elapsed 0.017 ms (1.705 ms / 100) 1.663 -> 1.665 ( +0.12%) [ +0.18% +0.48% +0.00% / +0.12% +0.84% +0.60%] index_copy_ perm : Elapsed 0.017 ms (1.666 ms / 100) 1.708 -> 1.717 ( +0.53%) [ +0.00% +0.00% +0.12% / +0.53% +0.59% +0.53%] index_add_ perm_sorted : Elapsed 0.017 ms (1.708 ms / 100) 1.665 -> 1.671 ( +0.36%) [ +0.00% +0.12% +0.00% / +0.36% +0.72% +0.66%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.665 ms / 100) 8.250 -> 8.243 ( -0.08%) [ +0.04% +0.15% +0.00% / -0.07% +0.10% -0.08%] index_select const : Elapsed 0.083 ms (8.253 ms / 100) 8.249 -> 8.248 ( -0.01%) [ +0.06% +0.11% +0.00% / -0.01% +0.23% +0.46%] index_select wrap : Elapsed 0.083 ms (8.254 ms / 100) 8.254 -> 8.253 ( -0.01%) [ +0.17% +0.00% +0.07% / -0.01% +0.17% +0.23%] index_select linear : Elapsed 0.083 ms (8.268 ms / 100) 8.265 -> 8.269 ( +0.05%) [ +0.12% +0.12% +0.00% / +0.05% +0.28% +0.22%] index_select reverse : Elapsed 0.083 ms (8.275 ms / 100) 8.232 -> 8.252 ( +0.24%) [ +0.00% +0.18% +0.29% / +0.35% +0.35% +0.24%] index_select skip64 : Elapsed 0.082 ms (8.232 ms / 100) 8.242 -> 8.247 ( +0.06%) [ +0.23% +0.22% +0.00% / +0.06% +0.25% +0.22%] index_select skip256 : Elapsed 0.083 ms (8.261 ms / 100) 8.261 -> 8.265 ( +0.05%) [ +0.00% +0.29% +0.01% / +0.05% +0.22% +0.10%] index_select spread : Elapsed 0.083 ms (8.261 ms / 100) 8.249 -> 8.258 ( +0.11%) [ +0.34% +0.06% +0.00% / +0.11% +0.24% +0.22%] index_select strided 3 : Elapsed 0.083 ms (8.277 ms / 100) 8.265 -> 8.258 ( -0.08%) [ +0.02% +0.00% +0.04% / -0.08% -0.01% +0.31%] index_select random : Elapsed 0.083 ms (8.267 ms / 100) 8.266 -> 8.255 ( -0.13%) [ +0.00% +0.01% +0.02% / -0.01% -0.05% -0.13%] index_select random_sorted : Elapsed 0.083 ms (8.266 ms / 100) B = [40, 16, 20, 4] (stride (1, 40, 2560, 640)) A = [40, 16, 5, 4] (stride (16, 1, 2560, 640)) dim = 2 1.742 -> 1.753 ( +0.63%) [ +0.06% +0.00% +0.11% / +0.63% +1.89% +1.55%] index_add_ linear : Elapsed 0.017 ms (1.743 ms / 100) 1.704 -> 1.709 ( +0.29%) [ +0.00% +0.23% +0.59% / +0.29% +1.06% +1.41%] index_copy_ linear : Elapsed 0.017 ms (1.704 ms / 100) 1.747 -> 1.749 ( +0.11%) [ +0.00% +0.17% +0.00% / +0.11% +0.46% +0.63%] index_add_ reverse : Elapsed 0.017 ms (1.747 ms / 100) 1.705 -> 1.712 ( +0.41%) [ +0.00% +0.00% +0.53% / +0.41% +1.11% +0.82%] index_copy_ reverse : Elapsed 0.017 ms (1.705 ms / 100) 1.724 -> 1.730 ( +0.35%) [ +0.00% +0.12% +0.52% / +0.35% +3.07% +2.96%] index_add_ spread : Elapsed 0.017 ms (1.724 ms / 100) 1.687 -> 1.695 ( +0.47%) [ +0.00% +0.24% +0.53% / +0.47% +2.13% +2.61%] index_copy_ spread : Elapsed 0.017 ms (1.687 ms / 100) 1.733 -> 1.745 ( +0.69%) [ +0.00% +0.46% +0.35% / +0.69% +2.60% +2.77%] index_add_ strided 3 : Elapsed 0.017 ms (1.733 ms / 100) 1.694 -> 1.707 ( +0.77%) [ +0.12% +0.00% +0.77% / +0.77% +1.83% +1.77%] index_copy_ strided 3 : Elapsed 0.017 ms (1.696 ms / 100) 1.728 -> 1.734 ( +0.35%) [ +0.00% +0.06% +0.29% / +0.35% +2.55% +2.43%] index_add_ strided 7 : Elapsed 0.017 ms (1.728 ms / 100) 1.688 -> 1.693 ( +0.30%) [ +0.00% +0.00% +0.24% / +0.30% +1.72% +1.95%] index_copy_ strided 7 : Elapsed 0.017 ms (1.688 ms / 100) 1.713 -> 1.723 ( +0.58%) [ +0.00% +0.06% +0.76% / +0.58% +2.45% +2.86%] index_add_ perm : Elapsed 0.017 ms (1.713 ms / 100) 1.673 -> 1.688 ( +0.90%) [ +0.00% +0.18% +0.72% / +0.90% +2.51% +2.93%] index_copy_ perm : Elapsed 0.017 ms (1.673 ms / 100) 1.706 -> 1.714 ( +0.47%) [ +0.12% +0.00% +0.59% / +0.47% +3.34% +3.17%] index_add_ perm_sorted : Elapsed 0.017 ms (1.708 ms / 100) 1.664 -> 1.674 ( +0.60%) [ +0.06% +0.00% +0.54% / +0.60% +3.61% +3.25%] index_copy_ perm_sorted : Elapsed 0.017 ms (1.665 ms / 100) 8.226 -> 8.223 ( -0.04%) [ +0.00% +0.07% +0.04% / +0.01% +0.34% -0.04%] index_select const : Elapsed 0.082 ms (8.226 ms / 100) 8.282 -> 8.291 ( +0.11%) [ +0.08% +0.18% +0.00% / +0.11% +0.11% +0.22%] index_select wrap : Elapsed 0.083 ms (8.289 ms / 100) 8.265 -> 8.261 ( -0.05%) [ +0.00% +0.15% +0.01% / +0.05% -0.05% +0.19%] index_select linear : Elapsed 0.083 ms (8.265 ms / 100) 8.259 -> 8.274 ( +0.18%) [ +0.19% +0.00% +0.19% / +0.18% +0.30% +0.46%] index_select reverse : Elapsed 0.083 ms (8.275 ms / 100) 8.220 -> 8.219 ( -0.01%) [ +0.11% +0.00% +0.11% / -0.01% +0.40% +0.16%] index_select skip64 : Elapsed 0.082 ms (8.229 ms / 100) 8.202 -> 8.216 ( +0.17%) [ +0.22% +0.00% +0.15% / +0.17% +0.24% +0.43%] index_select skip256 : Elapsed 0.082 ms (8.220 ms / 100) 8.277 -> 8.263 ( -0.17%) [ +0.00% +0.06% +0.05% / -0.17% -0.01% +0.04%] index_select spread : Elapsed 0.083 ms (8.277 ms / 100) 8.285 -> 8.284 ( -0.01%) [ +0.19% +0.33% +0.00% / +0.00% +0.19% -0.01%] index_select strided 3 : Elapsed 0.083 ms (8.301 ms / 100) 8.273 -> 8.291 ( +0.22%) [ +0.00% +0.11% +0.00% / +0.35% +0.44% +0.22%] index_select random : Elapsed 0.083 ms (8.273 ms / 100) 8.261 -> 8.258 ( -0.04%) [ +0.10% +0.31% +0.00% / -0.04% +0.17% +0.05%] index_select random_sorted : Elapsed 0.083 ms (8.269 ms / 100) B = [40, 16, 20, 4] (stride (20, 800, 1, 12800)) A = [40, 16, 5, 4] (stride (5, 200, 1, 3200)) dim = 2 2.026 -> 2.022 ( -0.20%) [ +0.25% +0.00% +0.15% / +0.15% -0.20% +0.05%] index_add_ linear : Elapsed 0.020 ms (2.031 ms / 100) 1.978 -> 1.978 ( +0.00%) [ +0.51% +0.00% +0.10% / +0.05% +0.00% +0.05%] index_copy_ linear : Elapsed 0.020 ms (1.988 ms / 100) 2.027 -> 2.023 ( -0.20%) [ +0.35% +0.05% +0.00% / -0.10% -0.20% -0.20%] index_add_ reverse : Elapsed 0.020 ms (2.034 ms / 100) 1.979 -> 1.975 ( -0.20%) [ +0.05% +0.10% +0.00% / +0.10% +0.00% -0.20%] index_copy_ reverse : Elapsed 0.020 ms (1.980 ms / 100) 2.043 -> 2.041 ( -0.10%) [ +0.20% +0.24% +0.00% / +0.05% -0.10% -0.05%] index_add_ spread : Elapsed 0.020 ms (2.047 ms / 100) 2.007 -> 2.003 ( -0.20%) [ +0.15% +0.25% +0.00% / +0.15% -0.20% -0.15%] index_copy_ spread : Elapsed 0.020 ms (2.010 ms / 100) 2.045 -> 2.042 ( -0.15%) [ +0.00% +0.49% +0.20% / -0.05% -0.05% -0.15%] index_add_ strided 3 : Elapsed 0.020 ms (2.045 ms / 100) 2.005 -> 2.005 ( +0.00%) [ +0.00% +0.45% +0.05% / +0.00% +0.05% +0.15%] index_copy_ strided 3 : Elapsed 0.020 ms (2.005 ms / 100) 2.043 -> 2.039 ( -0.20%) [ +0.05% +0.00% +0.20% / +0.00% -0.20% -0.20%] index_add_ strided 7 : Elapsed 0.020 ms (2.044 ms / 100) 2.003 -> 2.003 ( +0.00%) [ +0.05% +0.20% +0.00% / +0.20% +0.10% +0.00%] index_copy_ strided 7 : Elapsed 0.020 ms (2.004 ms / 100) 2.041 -> 2.039 ( -0.10%) [ +0.29% +0.10% +0.00% / +0.29% -0.10% +0.00%] index_add_ perm : Elapsed 0.020 ms (2.047 ms / 100) 2.006 -> 2.003 ( -0.15%) [ +0.00% +0.45% +0.20% / +0.20% -0.15% +0.10%] index_copy_ perm : Elapsed 0.020 ms (2.006 ms / 100) 2.045 -> 2.038 ( -0.34%) [ +0.15% +0.00% +0.00% / +0.10% -0.15% -0.34%] index_add_ perm_sorted : Elapsed 0.020 ms (2.048 ms / 100) 2.004 -> 2.003 ( -0.05%) [ +0.00% +0.15% +0.05% / +0.30% +0.25% -0.05%] index_copy_ perm_sorted : Elapsed 0.020 ms (2.004 ms / 100) 8.626 -> 8.630 ( +0.05%) [ +0.00% +0.03% +0.21% / +0.05% +0.24% +0.34%] index_select const : Elapsed 0.086 ms (8.626 ms / 100) 8.627 -> 8.629 ( +0.02%) [ +0.10% +0.23% +0.00% / +0.02% +0.27% +0.22%] index_select wrap : Elapsed 0.086 ms (8.636 ms / 100) 8.630 -> 8.627 ( -0.03%) [ +0.06% +0.07% +0.00% / -0.03% +0.66% +0.08%] index_select linear : Elapsed 0.086 ms (8.635 ms / 100) 8.633 -> 8.632 ( -0.01%) [ +0.17% +0.21% +0.00% / -0.01% +0.36% +0.27%] index_select reverse : Elapsed 0.086 ms (8.648 ms / 100) 8.635 -> 8.620 ( -0.17%) [ +0.00% +0.12% +0.12% / -0.17% +0.41% +0.13%] index_select skip64 : Elapsed 0.086 ms (8.635 ms / 100) 8.621 -> 8.635 ( +0.16%) [ +0.00% +0.20% +0.15% / +0.16% +0.22% +0.38%] index_select skip256 : Elapsed 0.086 ms (8.621 ms / 100) 8.615 -> 8.619 ( +0.05%) [ +0.09% +0.19% +0.00% / +0.05% +0.51% +0.37%] index_select spread : Elapsed 0.086 ms (8.623 ms / 100) 8.621 -> 8.638 ( +0.20%) [ +0.05% +0.12% +0.00% / +0.20% +0.27% +0.53%] index_select strided 3 : Elapsed 0.086 ms (8.625 ms / 100) 8.620 -> 8.624 ( +0.05%) [ +0.05% +0.08% +0.00% / +0.05% +0.36% +0.36%] index_select random : Elapsed 0.086 ms (8.624 ms / 100) 8.623 -> 8.636 ( +0.15%) [ +0.13% +0.16% +0.00% / +0.15% +0.55% +0.36%] index_select random_sorted : Elapsed 0.086 ms (8.634 ms / 100) out_shape = [40, 16, 5, 20] in_shape = [40, 16, 5, 4] idx_dim = 3 B = [40, 16, 5, 20] (stride (1600, 20, 320, 1)) A = [40, 16, 5, 4] (stride (320, 5, 1, 80)) dim = 3 2.017 -> 2.013 ( -0.20%) [ +0.25% +0.15% +0.00% / -0.20% +0.99% +0.89%] index_add_ linear : Elapsed 0.020 ms (2.022 ms / 100) 1.986 -> 1.982 ( -0.20%) [ +0.00% +0.10% +0.05% / -0.20% +1.01% +1.26%] index_copy_ linear : Elapsed 0.020 ms (1.986 ms / 100) 2.015 -> 2.014 ( -0.05%) [ +0.00% +0.00% +0.10% / -0.05% +0.99% +0.94%] index_add_ reverse : Elapsed 0.020 ms (2.015 ms / 100) 1.980 -> 1.981 ( +0.05%) [ +0.00% +0.10% +0.20% / +0.05% +1.67% +1.52%] index_copy_ reverse : Elapsed 0.020 ms (1.980 ms / 100) 2.057 -> 2.058 ( +0.05%) [ +0.58% +0.44% +0.00% / +0.05% +1.65% +1.46%] index_add_ spread : Elapsed 0.021 ms (2.069 ms / 100) 2.083 -> 2.082 ( -0.05%) [ +0.00% +0.14% +0.05% / -0.05% +1.34% +1.49%] index_copy_ spread : Elapsed 0.021 ms (2.083 ms / 100) 2.048 -> 2.053 ( +0.24%) [ +0.10% +0.00% +0.10% / +0.24% +1.37% +0.93%] index_add_ strided 3 : Elapsed 0.021 ms (2.050 ms / 100) 2.049 -> 2.050 ( +0.05%) [ +0.00% +0.20% +0.10% / +0.05% +1.56% +1.56%] index_copy_ strided 3 : Elapsed 0.020 ms (2.049 ms / 100) 2.060 -> 2.061 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +1.21% +1.12%] index_add_ strided 7 : Elapsed 0.021 ms (2.060 ms / 100) 2.079 -> 2.085 ( +0.29%) [ +0.29% +0.00% +0.43% / +0.29% +1.64% +1.54%] index_copy_ strided 7 : Elapsed 0.021 ms (2.085 ms / 100) 2.057 -> 2.061 ( +0.19%) [ +0.15% +0.00% +0.24% / +0.19% +1.26% +1.26%] index_add_ perm : Elapsed 0.021 ms (2.060 ms / 100) 2.081 -> 2.078 ( -0.14%) [ +0.00% +0.00% +0.14% / -0.14% +1.35% +1.20%] index_copy_ perm : Elapsed 0.021 ms (2.081 ms / 100) 2.066 -> 2.068 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.87% +0.97%] index_add_ perm_sorted : Elapsed 0.021 ms (2.068 ms / 100) 2.076 -> 2.088 ( +0.58%) [ +0.10% +0.72% +0.00% / +0.58% +1.69% +1.69%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.078 ms / 100) 8.809 -> 8.810 ( +0.01%) [ +0.15% +0.15% +0.00% / +0.01% +0.45% +0.22%] index_select const : Elapsed 0.088 ms (8.822 ms / 100) 8.878 -> 8.903 ( +0.28%) [ +0.05% +0.23% +0.00% / +0.28% +0.33% +0.35%] index_select wrap : Elapsed 0.089 ms (8.882 ms / 100) 8.840 -> 8.861 ( +0.24%) [ +0.00% +0.27% +0.14% / +0.24% +0.36% +0.40%] index_select linear : Elapsed 0.088 ms (8.840 ms / 100) 8.853 -> 8.866 ( +0.15%) [ +0.15% +0.12% +0.00% / +0.40% +0.15% +0.20%] index_select reverse : Elapsed 0.089 ms (8.866 ms / 100) 8.799 -> 8.802 ( +0.03%) [ +0.13% +0.00% +0.17% / +0.39% +0.03% +0.36%] index_select skip64 : Elapsed 0.088 ms (8.810 ms / 100) 8.796 -> 8.821 ( +0.28%) [ +0.00% +0.38% +0.24% / +0.42% +0.28% +0.33%] index_select skip256 : Elapsed 0.088 ms (8.796 ms / 100) 8.859 -> 8.861 ( +0.02%) [ +0.00% +0.03% +0.05% / +0.02% +0.09% +0.03%] index_select spread : Elapsed 0.089 ms (8.859 ms / 100) 8.880 -> 8.883 ( +0.03%) [ +0.00% +0.30% +0.03% / +0.37% +0.10% +0.03%] index_select strided 3 : Elapsed 0.089 ms (8.880 ms / 100) 8.888 -> 8.893 ( +0.06%) [ +0.19% +0.00% +0.11% / +0.06% +0.30% +0.10%] index_select random : Elapsed 0.089 ms (8.905 ms / 100) 8.868 -> 8.863 ( -0.06%) [ +0.00% +0.07% +0.01% / -0.06% +0.11% +0.11%] index_select random_sorted : Elapsed 0.089 ms (8.868 ms / 100) B = [40, 16, 5, 20] (stride (1600, 1, 320, 16)) A = [40, 16, 5, 4] (stride (320, 1, 64, 16)) dim = 3 2.112 -> 2.118 ( +0.28%) [ +0.00% +0.24% +0.24% / +0.28% +0.57% +0.90%] index_add_ linear : Elapsed 0.021 ms (2.112 ms / 100) 2.063 -> 2.068 ( +0.24%) [ +0.15% +0.00% +0.15% / +0.24% +0.82% +0.82%] index_copy_ linear : Elapsed 0.021 ms (2.066 ms / 100) 2.111 -> 2.123 ( +0.57%) [ +0.09% +0.00% +0.14% / +1.09% +0.71% +0.57%] index_add_ reverse : Elapsed 0.021 ms (2.113 ms / 100) 2.063 -> 2.068 ( +0.24%) [ +0.05% +0.00% +0.15% / +0.24% +0.92% +0.63%] index_copy_ reverse : Elapsed 0.021 ms (2.064 ms / 100) 2.105 -> 2.111 ( +0.29%) [ +0.24% +0.00% +0.19% / +0.29% +0.76% +0.43%] index_add_ spread : Elapsed 0.021 ms (2.110 ms / 100) 2.057 -> 2.060 ( +0.15%) [ +0.05% +0.00% +0.00% / +0.15% +0.49% +0.34%] index_copy_ spread : Elapsed 0.021 ms (2.058 ms / 100) 2.103 -> 2.112 ( +0.43%) [ +0.10% +0.14% +0.00% / +0.43% +0.62% +1.14%] index_add_ strided 3 : Elapsed 0.021 ms (2.105 ms / 100) 2.056 -> 2.062 ( +0.29%) [ +0.05% +0.00% +0.05% / +0.29% +0.73% +0.92%] index_copy_ strided 3 : Elapsed 0.021 ms (2.057 ms / 100) 2.112 -> 2.111 ( -0.05%) [ +0.28% +0.00% +0.09% / -0.05% +1.28% +0.95%] index_add_ strided 7 : Elapsed 0.021 ms (2.118 ms / 100) 2.070 -> 2.064 ( -0.29%) [ +0.05% +0.05% +0.00% / -0.29% +0.77% +0.87%] index_copy_ strided 7 : Elapsed 0.021 ms (2.071 ms / 100) 2.114 -> 2.118 ( +0.19%) [ +0.00% +0.00% +0.09% / +0.19% +0.28% +0.33%] index_add_ perm : Elapsed 0.021 ms (2.114 ms / 100) 2.061 -> 2.063 ( +0.10%) [ +0.00% +0.15% +0.00% / +0.10% +0.39% +0.29%] index_copy_ perm : Elapsed 0.021 ms (2.061 ms / 100) 2.102 -> 2.113 ( +0.52%) [ +0.10% +0.57% +0.00% / +0.52% +0.90% +0.86%] index_add_ perm_sorted : Elapsed 0.021 ms (2.104 ms / 100) 2.051 -> 2.060 ( +0.44%) [ +0.39% +0.10% +0.00% / +0.44% +0.88% +0.78%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.059 ms / 100) 9.178 -> 9.178 ( +0.00%) [ +0.08% +0.12% +0.00% / +0.09% +0.00% +0.12%] index_select const : Elapsed 0.092 ms (9.185 ms / 100) 9.230 -> 9.227 ( -0.03%) [ +0.13% +0.12% +0.00% / +0.00% +0.00% -0.03%] index_select wrap : Elapsed 0.092 ms (9.242 ms / 100) 9.218 -> 9.208 ( -0.11%) [ +0.10% +0.00% +0.04% / +0.01% -0.11% +0.01%] index_select linear : Elapsed 0.092 ms (9.227 ms / 100) 9.208 -> 9.209 ( +0.01%) [ +0.00% +0.12% +0.04% / +0.12% +0.01% +0.02%] index_select reverse : Elapsed 0.092 ms (9.208 ms / 100) 9.185 -> 9.183 ( -0.02%) [ +0.25% +0.13% +0.00% / -0.02% +0.14% +0.14%] index_select skip64 : Elapsed 0.092 ms (9.208 ms / 100) 9.195 -> 9.183 ( -0.13%) [ +0.07% +0.00% +0.25% / -0.12% -0.13% -0.04%] index_select skip256 : Elapsed 0.092 ms (9.201 ms / 100) 9.241 -> 9.218 ( -0.25%) [ +0.29% +0.14% +0.00% / +0.09% +0.02% -0.25%] index_select spread : Elapsed 0.093 ms (9.268 ms / 100) 9.245 -> 9.230 ( -0.16%) [ +0.03% +0.04% +0.00% / +0.24% -0.16% +0.09%] index_select strided 3 : Elapsed 0.092 ms (9.248 ms / 100) 9.238 -> 9.235 ( -0.03%) [ +0.22% +0.06% +0.00% / +0.09% -0.03% -0.01%] index_select random : Elapsed 0.093 ms (9.258 ms / 100) 9.218 -> 9.219 ( +0.01%) [ +0.07% +0.18% +0.00% / +0.07% +0.01% +0.08%] index_select random_sorted : Elapsed 0.092 ms (9.224 ms / 100) B = [40, 16, 5, 20] (stride (1600, 1, 16, 80)) A = [40, 16, 5, 4] (stride (20, 800, 4, 1)) dim = 3 2.130 -> 2.127 ( -0.14%) [ +0.00% +0.19% +0.14% / -0.14% +0.23% +0.28%] index_add_ linear : Elapsed 0.021 ms (2.130 ms / 100) 2.075 -> 2.072 ( -0.14%) [ +0.00% +0.34% +0.10% / -0.14% +0.48% +0.43%] index_copy_ linear : Elapsed 0.021 ms (2.075 ms / 100) 2.133 -> 2.133 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.56% +0.38%] index_add_ reverse : Elapsed 0.021 ms (2.133 ms / 100) 2.075 -> 2.076 ( +0.05%) [ +0.19% +0.24% +0.00% / +0.05% +0.92% +0.58%] index_copy_ reverse : Elapsed 0.021 ms (2.079 ms / 100) 2.130 -> 2.130 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.00% +0.56% +0.42%] index_add_ spread : Elapsed 0.021 ms (2.133 ms / 100) 2.071 -> 2.074 ( +0.14%) [ +0.48% +0.00% +0.48% / +0.14% +0.82% +0.87%] index_copy_ spread : Elapsed 0.021 ms (2.081 ms / 100) 2.134 -> 2.136 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.23% +0.14%] index_add_ strided 3 : Elapsed 0.021 ms (2.134 ms / 100) 2.076 -> 2.077 ( +0.05%) [ +0.00% +0.10% +0.19% / +0.05% +0.24% +0.53%] index_copy_ strided 3 : Elapsed 0.021 ms (2.076 ms / 100) 2.132 -> 2.133 ( +0.05%) [ +0.00% +0.19% +0.05% / +0.05% +0.28% +0.14%] index_add_ strided 7 : Elapsed 0.021 ms (2.132 ms / 100) 2.077 -> 2.078 ( +0.05%) [ +0.05% +0.00% +0.10% / +0.19% +0.19% +0.05%] index_copy_ strided 7 : Elapsed 0.021 ms (2.078 ms / 100) 2.130 -> 2.130 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.38% +0.00% +0.61%] index_add_ perm : Elapsed 0.021 ms (2.131 ms / 100) 2.074 -> 2.075 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.24% +0.05% +0.39%] index_copy_ perm : Elapsed 0.021 ms (2.074 ms / 100) 2.132 -> 2.132 ( +0.00%) [ +0.00% +0.19% +0.14% / +0.00% +0.42% +0.28%] index_add_ perm_sorted : Elapsed 0.021 ms (2.132 ms / 100) 2.078 -> 2.075 ( -0.14%) [ +0.05% +0.00% +0.34% / -0.14% +0.24% +0.34%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.079 ms / 100) 9.195 -> 9.186 ( -0.10%) [ +0.02% +0.09% +0.00% / -0.04% -0.10% +0.20%] index_select const : Elapsed 0.092 ms (9.197 ms / 100) 9.194 -> 9.195 ( +0.01%) [ +0.18% +0.15% +0.00% / +0.01% +0.23% +0.25%] index_select wrap : Elapsed 0.092 ms (9.211 ms / 100) 9.188 -> 9.189 ( +0.01%) [ +0.14% +0.13% +0.00% / +0.17% +0.24% +0.01%] index_select linear : Elapsed 0.092 ms (9.201 ms / 100) 9.180 -> 9.190 ( +0.11%) [ +0.15% +0.10% +0.00% / +0.11% +0.15% +0.23%] index_select reverse : Elapsed 0.092 ms (9.194 ms / 100) 9.175 -> 9.204 ( +0.32%) [ +0.27% +0.00% +0.23% / +0.35% +0.41% +0.32%] index_select skip64 : Elapsed 0.092 ms (9.200 ms / 100) 9.181 -> 9.199 ( +0.20%) [ +0.00% +0.32% +0.19% / +0.20% +0.48% +0.22%] index_select skip256 : Elapsed 0.092 ms (9.181 ms / 100) 9.197 -> 9.204 ( +0.08%) [ +0.05% +0.00% +0.07% / +0.08% +0.14% +0.14%] index_select spread : Elapsed 0.092 ms (9.202 ms / 100) 9.191 -> 9.200 ( +0.10%) [ +0.11% +0.01% +0.00% / +0.10% +0.20% +0.18%] index_select strided 3 : Elapsed 0.092 ms (9.201 ms / 100) 9.179 -> 9.181 ( +0.02%) [ +0.00% +0.24% +0.14% / +0.02% +0.09% +0.23%] index_select random : Elapsed 0.092 ms (9.179 ms / 100) 9.189 -> 9.195 ( +0.07%) [ +0.10% +0.03% +0.00% / +0.13% +0.07% +0.47%] index_select random_sorted : Elapsed 0.092 ms (9.198 ms / 100) B = [40, 16, 5, 20] (stride (1, 4000, 800, 40)) A = [40, 16, 5, 4] (stride (320, 1, 16, 80)) dim = 3 1.985 -> 1.986 ( +0.05%) [ +0.00% +0.45% +0.15% / +0.15% +0.30% +0.05%] index_add_ linear : Elapsed 0.020 ms (1.985 ms / 100) 1.948 -> 1.947 ( -0.05%) [ +0.26% +0.00% +0.05% / -0.05% +0.51% +0.36%] index_copy_ linear : Elapsed 0.020 ms (1.953 ms / 100) 1.987 -> 1.990 ( +0.15%) [ +0.00% +0.10% +0.05% / +0.15% +0.40% +0.20%] index_add_ reverse : Elapsed 0.020 ms (1.987 ms / 100) 1.948 -> 1.943 ( -0.26%) [ +0.05% +0.00% +0.05% / -0.26% +0.46% +0.56%] index_copy_ reverse : Elapsed 0.019 ms (1.949 ms / 100) 1.972 -> 1.976 ( +0.20%) [ +0.30% +0.00% +0.20% / +0.20% +0.41% +0.76%] index_add_ spread : Elapsed 0.020 ms (1.978 ms / 100) 1.932 -> 1.939 ( +0.36%) [ +0.26% +0.31% +0.00% / +0.36% +0.83% +1.09%] index_copy_ spread : Elapsed 0.019 ms (1.937 ms / 100) 1.985 -> 1.986 ( +0.05%) [ +0.05% +0.20% +0.00% / +0.05% +0.76% +0.76%] index_add_ strided 3 : Elapsed 0.020 ms (1.986 ms / 100) 1.943 -> 1.943 ( +0.00%) [ +0.21% +0.26% +0.00% / +0.00% +0.93% +0.98%] index_copy_ strided 3 : Elapsed 0.019 ms (1.947 ms / 100) 1.987 -> 1.991 ( +0.20%) [ +0.10% +0.00% +0.20% / +0.20% +0.91% +0.50%] index_add_ strided 7 : Elapsed 0.020 ms (1.989 ms / 100) 1.947 -> 1.952 ( +0.26%) [ +0.10% +0.00% +0.05% / +0.26% +1.23% +1.13%] index_copy_ strided 7 : Elapsed 0.019 ms (1.949 ms / 100) 1.980 -> 1.983 ( +0.15%) [ +0.05% +0.00% +0.40% / +0.15% +0.61% +0.45%] index_add_ perm : Elapsed 0.020 ms (1.981 ms / 100) 1.940 -> 1.940 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.52% +0.57%] index_copy_ perm : Elapsed 0.019 ms (1.940 ms / 100) 1.989 -> 1.990 ( +0.05%) [ +0.25% +0.45% +0.00% / +0.20% +0.35% +0.05%] index_add_ perm_sorted : Elapsed 0.020 ms (1.994 ms / 100) 1.949 -> 1.948 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.67% +0.36%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.949 ms / 100) 8.702 -> 8.718 ( +0.18%) [ +0.24% +0.00% +0.10% / +0.31% +0.24% +0.18%] index_select const : Elapsed 0.087 ms (8.723 ms / 100) 8.777 -> 8.753 ( -0.27%) [ +0.00% +0.02% +0.09% / -0.15% -0.27% +0.07%] index_select wrap : Elapsed 0.088 ms (8.777 ms / 100) 8.738 -> 8.733 ( -0.06%) [ +0.13% +0.18% +0.00% / +0.19% +0.13% -0.06%] index_select linear : Elapsed 0.087 ms (8.749 ms / 100) 8.725 -> 8.728 ( +0.03%) [ +0.00% +0.03% +0.05% / +0.03% +0.26% +0.25%] index_select reverse : Elapsed 0.087 ms (8.725 ms / 100) 8.701 -> 8.720 ( +0.22%) [ +0.06% +0.00% +0.14% / +0.33% +0.23% +0.22%] index_select skip64 : Elapsed 0.087 ms (8.706 ms / 100) 8.697 -> 8.708 ( +0.13%) [ +0.00% +0.15% +0.25% / +0.16% +0.13% +0.23%] index_select skip256 : Elapsed 0.087 ms (8.697 ms / 100) 8.750 -> 8.752 ( +0.02%) [ +0.06% +0.02% +0.00% / +0.05% +0.24% +0.02%] index_select spread : Elapsed 0.088 ms (8.755 ms / 100) 8.773 -> 8.763 ( -0.11%) [ +0.01% +0.03% +0.00% / +0.00% -0.09% -0.11%] index_select strided 3 : Elapsed 0.088 ms (8.774 ms / 100) 8.754 -> 8.760 ( +0.07%) [ +0.00% +0.09% +0.05% / +0.18% +0.18% +0.07%] index_select random : Elapsed 0.088 ms (8.754 ms / 100) 8.745 -> 8.746 ( +0.01%) [ +0.08% +0.03% +0.00% / +0.01% +0.29% +0.21%] index_select random_sorted : Elapsed 0.088 ms (8.752 ms / 100) B = [40, 16, 5, 20] (stride (1, 4000, 40, 200)) A = [40, 16, 5, 4] (stride (1, 40, 2560, 640)) dim = 3 2.245 -> 2.249 ( +0.18%) [ +0.31% +0.27% +0.00% / +0.49% +0.27% +0.18%] index_add_ linear : Elapsed 0.023 ms (2.252 ms / 100) 2.193 -> 2.194 ( +0.05%) [ +0.00% +0.09% +0.27% / +0.05% +0.23% +0.18%] index_copy_ linear : Elapsed 0.022 ms (2.193 ms / 100) 2.250 -> 2.257 ( +0.31%) [ +0.22% +0.36% +0.00% / +0.31% +0.40% +0.40%] index_add_ reverse : Elapsed 0.023 ms (2.255 ms / 100) 2.198 -> 2.200 ( +0.09%) [ +0.14% +0.00% +0.18% / +0.09% +0.68% +0.27%] index_copy_ reverse : Elapsed 0.022 ms (2.201 ms / 100) 2.246 -> 2.248 ( +0.09%) [ +0.27% +0.00% +0.18% / +0.09% +0.45% +0.45%] index_add_ spread : Elapsed 0.023 ms (2.252 ms / 100) 2.191 -> 2.193 ( +0.09%) [ +0.00% +0.18% +0.14% / +0.09% +0.46% +0.46%] index_copy_ spread : Elapsed 0.022 ms (2.191 ms / 100) 2.245 -> 2.252 ( +0.31%) [ +0.40% +0.09% +0.00% / +0.36% +0.36% +0.31%] index_add_ strided 3 : Elapsed 0.023 ms (2.254 ms / 100) 2.194 -> 2.194 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.05% +0.27%] index_copy_ strided 3 : Elapsed 0.022 ms (2.194 ms / 100) 2.249 -> 2.248 ( -0.04%) [ +0.00% +0.04% +0.22% / -0.04% +0.22% +0.18%] index_add_ strided 7 : Elapsed 0.022 ms (2.249 ms / 100) 2.198 -> 2.197 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.23% -0.05%] index_copy_ strided 7 : Elapsed 0.022 ms (2.198 ms / 100) 2.252 -> 2.253 ( +0.04%) [ +0.09% +0.13% +0.00% / +0.04% +0.36% +0.40%] index_add_ perm : Elapsed 0.023 ms (2.254 ms / 100) 2.191 -> 2.192 ( +0.05%) [ +0.27% +0.23% +0.00% / +0.05% +0.64% +0.73%] index_copy_ perm : Elapsed 0.022 ms (2.197 ms / 100) 2.251 -> 2.248 ( -0.13%) [ +0.04% +0.09% +0.00% / -0.13% +0.40% +0.44%] index_add_ perm_sorted : Elapsed 0.023 ms (2.252 ms / 100) 2.192 -> 2.195 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.55% +0.73%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.192 ms / 100) 9.184 -> 9.186 ( +0.02%) [ +0.09% +0.00% +0.38% / +0.02% +0.54% +0.20%] index_select const : Elapsed 0.092 ms (9.192 ms / 100) 9.250 -> 9.236 ( -0.15%) [ +0.00% +0.15% +0.15% / -0.15% +0.19% +0.09%] index_select wrap : Elapsed 0.092 ms (9.250 ms / 100) 9.216 -> 9.243 ( +0.29%) [ +0.00% +0.35% +0.25% / +0.29% +0.42% +0.42%] index_select linear : Elapsed 0.092 ms (9.216 ms / 100) 9.242 -> 9.247 ( +0.05%) [ +0.24% +0.00% +0.10% / +0.05% +0.06% +0.17%] index_select reverse : Elapsed 0.093 ms (9.264 ms / 100) 9.197 -> 9.194 ( -0.03%) [ +0.00% +0.08% +0.04% / -0.03% +0.28% +0.45%] index_select skip64 : Elapsed 0.092 ms (9.197 ms / 100) 9.191 -> 9.194 ( +0.03%) [ +0.05% +0.00% +0.28% / +0.03% +0.36% +0.17%] index_select skip256 : Elapsed 0.092 ms (9.196 ms / 100) 9.253 -> 9.246 ( -0.08%) [ +0.03% +0.00% +0.01% / +0.04% -0.08% +0.12%] index_select spread : Elapsed 0.093 ms (9.256 ms / 100) 9.247 -> 9.247 ( +0.00%) [ +0.00% +0.16% +0.22% / +0.09% +0.00% +0.10%] index_select strided 3 : Elapsed 0.092 ms (9.247 ms / 100) 9.249 -> 9.257 ( +0.09%) [ +0.00% +0.09% +0.11% / +0.09% +0.27% +0.22%] index_select random : Elapsed 0.092 ms (9.249 ms / 100) 9.255 -> 9.256 ( +0.01%) [ +0.06% +0.00% +0.19% / +0.01% +0.05% +0.15%] index_select random_sorted : Elapsed 0.093 ms (9.261 ms / 100) B = [40, 16, 5, 20] (stride (320, 1, 12800, 16)) A = [40, 16, 5, 4] (stride (1, 160, 2560, 40)) dim = 3 2.247 -> 2.249 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.45% +0.45%] index_add_ linear : Elapsed 0.022 ms (2.249 ms / 100) 2.181 -> 2.181 ( +0.00%) [ +0.05% +0.14% +0.00% / +0.00% +0.55% +0.55%] index_copy_ linear : Elapsed 0.022 ms (2.182 ms / 100) 2.248 -> 2.251 ( +0.13%) [ +0.22% +0.00% +0.00% / +0.13% +0.62% +0.44%] index_add_ reverse : Elapsed 0.023 ms (2.253 ms / 100) 2.183 -> 2.183 ( +0.00%) [ +0.05% +0.09% +0.00% / +0.00% +0.46% +0.41%] index_copy_ reverse : Elapsed 0.022 ms (2.184 ms / 100) 2.236 -> 2.235 ( -0.04%) [ +0.22% +0.36% +0.00% / -0.04% +0.49% +0.63%] index_add_ spread : Elapsed 0.022 ms (2.241 ms / 100) 2.173 -> 2.175 ( +0.09%) [ +0.09% +0.23% +0.00% / +0.09% +0.18% +0.46%] index_copy_ spread : Elapsed 0.022 ms (2.175 ms / 100) 2.235 -> 2.231 ( -0.18%) [ +0.00% +0.18% +0.00% / -0.18% +0.49% +0.36%] index_add_ strided 3 : Elapsed 0.022 ms (2.235 ms / 100) 2.168 -> 2.169 ( +0.05%) [ +0.00% +0.00% +0.18% / +0.05% +0.65% +0.37%] index_copy_ strided 3 : Elapsed 0.022 ms (2.168 ms / 100) 2.243 -> 2.247 ( +0.18%) [ +0.31% +0.27% +0.00% / +0.18% +0.36% +0.58%] index_add_ strided 7 : Elapsed 0.023 ms (2.250 ms / 100) 2.177 -> 2.185 ( +0.37%) [ +0.09% +0.14% +0.00% / +0.37% +0.51% +0.55%] index_copy_ strided 7 : Elapsed 0.022 ms (2.179 ms / 100) 2.240 -> 2.242 ( +0.09%) [ +0.00% +0.04% +0.00% / +0.09% +0.40% +0.54%] index_add_ perm : Elapsed 0.022 ms (2.240 ms / 100) 2.174 -> 2.173 ( -0.05%) [ +0.00% +0.09% +0.18% / -0.05% +0.41% +0.60%] index_copy_ perm : Elapsed 0.022 ms (2.174 ms / 100) 2.239 -> 2.243 ( +0.18%) [ +0.00% +0.09% +0.09% / +0.18% +0.36% +0.54%] index_add_ perm_sorted : Elapsed 0.022 ms (2.239 ms / 100) 2.174 -> 2.175 ( +0.05%) [ +0.09% +0.14% +0.00% / +0.05% +0.46% +0.51%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.176 ms / 100) 9.181 -> 9.196 ( +0.16%) [ +0.16% +0.00% +0.21% / +0.16% +0.27% +0.23%] index_select const : Elapsed 0.092 ms (9.196 ms / 100) 9.236 -> 9.224 ( -0.13%) [ +0.15% +0.00% +0.19% / +0.05% +0.08% -0.13%] index_select wrap : Elapsed 0.093 ms (9.250 ms / 100) 9.222 -> 9.211 ( -0.12%) [ +0.00% +0.10% +0.10% / -0.08% -0.12% +0.23%] index_select linear : Elapsed 0.092 ms (9.222 ms / 100) 9.242 -> 9.227 ( -0.16%) [ +0.19% +0.00% +0.05% / +0.01% -0.04% -0.16%] index_select reverse : Elapsed 0.093 ms (9.260 ms / 100) 9.197 -> 9.186 ( -0.12%) [ +0.00% +0.03% +0.14% / -0.12% +0.16% +0.04%] index_select skip64 : Elapsed 0.092 ms (9.197 ms / 100) 9.192 -> 9.182 ( -0.11%) [ +0.00% +0.04% +0.21% / -0.11% -0.02% +0.03%] index_select skip256 : Elapsed 0.092 ms (9.192 ms / 100) 9.252 -> 9.246 ( -0.06%) [ +0.00% +0.35% +0.19% / +0.18% +0.23% -0.06%] index_select spread : Elapsed 0.093 ms (9.252 ms / 100) 9.241 -> 9.231 ( -0.11%) [ +0.14% +0.00% +0.09% / +0.14% -0.11% +0.25%] index_select strided 3 : Elapsed 0.093 ms (9.254 ms / 100) 9.249 -> 9.250 ( +0.01%) [ +0.08% +0.00% +0.05% / +0.21% +0.01% +0.05%] index_select random : Elapsed 0.093 ms (9.256 ms / 100) 9.261 -> 9.259 ( -0.02%) [ +0.00% +0.10% +0.01% / +0.09% -0.02% +0.08%] index_select random_sorted : Elapsed 0.093 ms (9.261 ms / 100) B = [40, 16, 5, 20] (stride (20, 800, 12800, 1)) A = [40, 16, 5, 4] (stride (1, 800, 40, 200)) dim = 3 2.266 -> 2.271 ( +0.22%) [ +0.26% +0.40% +0.00% / +0.22% +0.44% +0.40%] index_add_ linear : Elapsed 0.023 ms (2.272 ms / 100) 2.222 -> 2.223 ( +0.05%) [ +0.00% +0.18% +0.09% / +0.14% +0.05% +0.14%] index_copy_ linear : Elapsed 0.022 ms (2.222 ms / 100) 2.269 -> 2.276 ( +0.31%) [ +0.04% +0.00% +0.04% / +0.40% +0.57% +0.31%] index_add_ reverse : Elapsed 0.023 ms (2.270 ms / 100) 2.224 -> 2.222 ( -0.09%) [ +0.00% +0.00% +0.13% / -0.09% +0.27% -0.04%] index_copy_ reverse : Elapsed 0.022 ms (2.224 ms / 100) 2.309 -> 2.318 ( +0.39%) [ +0.39% +0.00% +0.22% / +0.39% +0.52% +0.52%] index_add_ spread : Elapsed 0.023 ms (2.318 ms / 100) 2.319 -> 2.327 ( +0.34%) [ +0.17% +0.00% +0.22% / +0.34% +0.52% +0.60%] index_copy_ spread : Elapsed 0.023 ms (2.323 ms / 100) 2.301 -> 2.301 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.26% +0.09%] index_add_ strided 3 : Elapsed 0.023 ms (2.301 ms / 100) 2.290 -> 2.289 ( -0.04%) [ +0.00% +0.09% +0.09% / +0.04% +0.26% -0.04%] index_copy_ strided 3 : Elapsed 0.023 ms (2.290 ms / 100) 2.307 -> 2.311 ( +0.17%) [ +0.00% +0.00% +0.26% / +0.17% +0.56% +0.17%] index_add_ strided 7 : Elapsed 0.023 ms (2.307 ms / 100) 2.321 -> 2.327 ( +0.26%) [ +0.00% +0.22% +0.26% / +0.26% +0.39% +0.43%] index_copy_ strided 7 : Elapsed 0.023 ms (2.321 ms / 100) 2.298 -> 2.304 ( +0.26%) [ +0.00% +0.00% +0.04% / +0.26% +0.39% +0.30%] index_add_ perm : Elapsed 0.023 ms (2.298 ms / 100) 2.284 -> 2.290 ( +0.26%) [ +0.00% +0.22% +0.26% / +0.39% +0.35% +0.26%] index_copy_ perm : Elapsed 0.023 ms (2.284 ms / 100) 2.301 -> 2.301 ( +0.00%) [ +0.26% +0.00% +0.09% / +0.00% +0.22% +0.13%] index_add_ perm_sorted : Elapsed 0.023 ms (2.307 ms / 100) 2.292 -> 2.290 ( -0.09%) [ +0.17% +0.04% +0.00% / +0.04% -0.09% +0.26%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.296 ms / 100) 9.244 -> 9.253 ( +0.10%) [ +0.01% +0.00% +0.09% / +0.16% +0.10% +0.21%] index_select const : Elapsed 0.092 ms (9.245 ms / 100) 9.296 -> 9.305 ( +0.10%) [ +0.03% +0.11% +0.00% / +0.14% +0.10% +0.10%] index_select wrap : Elapsed 0.093 ms (9.299 ms / 100) 9.284 -> 9.297 ( +0.14%) [ +0.00% +0.06% +0.04% / +0.14% +0.46% +0.16%] index_select linear : Elapsed 0.093 ms (9.284 ms / 100) 9.275 -> 9.279 ( +0.04%) [ +0.04% +0.12% +0.00% / +0.05% +0.20% +0.04%] index_select reverse : Elapsed 0.093 ms (9.279 ms / 100) 9.237 -> 9.252 ( +0.16%) [ +0.04% +0.32% +0.00% / +0.16% +0.17% +0.25%] index_select skip64 : Elapsed 0.092 ms (9.241 ms / 100) 9.250 -> 9.264 ( +0.15%) [ +0.00% +0.11% +0.14% / +0.18% +0.36% +0.15%] index_select skip256 : Elapsed 0.092 ms (9.250 ms / 100) 9.297 -> 9.302 ( +0.05%) [ +0.13% +0.11% +0.00% / +0.31% +0.18% +0.05%] index_select spread : Elapsed 0.093 ms (9.309 ms / 100) 9.296 -> 9.300 ( +0.04%) [ +0.40% +0.16% +0.00% / +0.09% +0.44% +0.04%] index_select strided 3 : Elapsed 0.093 ms (9.333 ms / 100) 9.299 -> 9.305 ( +0.06%) [ +0.00% +0.05% +0.17% / +0.11% +0.06% +0.06%] index_select random : Elapsed 0.093 ms (9.299 ms / 100) 9.291 -> 9.305 ( +0.15%) [ +0.01% +0.00% +0.11% / +0.15% +0.22% +0.31%] index_select random_sorted : Elapsed 0.093 ms (9.292 ms / 100) B = [40, 16, 5, 20] (stride (1, 800, 12800, 40)) A = [40, 16, 5, 4] (stride (1, 40, 2560, 640)) dim = 3 2.256 -> 2.255 ( -0.04%) [ +0.09% +0.09% +0.00% / -0.04% +0.31% +0.44%] index_add_ linear : Elapsed 0.023 ms (2.258 ms / 100) 2.196 -> 2.197 ( +0.05%) [ +0.09% +0.00% +0.05% / +0.05% +0.55% +0.46%] index_copy_ linear : Elapsed 0.022 ms (2.198 ms / 100) 2.253 -> 2.254 ( +0.04%) [ +0.36% +0.00% +0.13% / +0.04% +0.71% +0.27%] index_add_ reverse : Elapsed 0.023 ms (2.261 ms / 100) 2.193 -> 2.198 ( +0.23%) [ +0.32% +0.00% +0.27% / +0.23% +0.55% +0.46%] index_copy_ reverse : Elapsed 0.022 ms (2.200 ms / 100) 2.248 -> 2.248 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.40% +0.49%] index_add_ spread : Elapsed 0.023 ms (2.251 ms / 100) 2.191 -> 2.194 ( +0.14%) [ +0.23% +0.05% +0.00% / +0.14% +0.41% +0.50%] index_copy_ spread : Elapsed 0.022 ms (2.196 ms / 100) 2.250 -> 2.251 ( +0.04%) [ +0.00% +0.09% +0.09% / +0.04% +0.53% +0.40%] index_add_ strided 3 : Elapsed 0.023 ms (2.250 ms / 100) 2.190 -> 2.193 ( +0.14%) [ +0.18% +0.05% +0.00% / +0.14% +0.64% +0.50%] index_copy_ strided 3 : Elapsed 0.022 ms (2.194 ms / 100) 2.248 -> 2.250 ( +0.09%) [ +0.31% +0.22% +0.00% / +0.09% +0.67% +0.67%] index_add_ strided 7 : Elapsed 0.023 ms (2.255 ms / 100) 2.193 -> 2.192 ( -0.05%) [ +0.14% +0.05% +0.00% / -0.05% +0.32% +0.46%] index_copy_ strided 7 : Elapsed 0.022 ms (2.196 ms / 100) 2.255 -> 2.253 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.27% +0.40%] index_add_ perm : Elapsed 0.023 ms (2.255 ms / 100) 2.190 -> 2.194 ( +0.18%) [ +0.18% +0.00% +0.37% / +0.18% +0.78% +0.59%] index_copy_ perm : Elapsed 0.022 ms (2.194 ms / 100) 2.259 -> 2.255 ( -0.18%) [ +0.00% +0.13% +0.18% / -0.18% +0.44% +0.40%] index_add_ perm_sorted : Elapsed 0.023 ms (2.259 ms / 100) 2.196 -> 2.199 ( +0.14%) [ +0.18% +0.00% +0.18% / +0.14% +0.55% +0.46%] index_copy_ perm_sorted : Elapsed 0.022 ms (2.200 ms / 100) 9.174 -> 9.172 ( -0.02%) [ +0.00% +0.07% +0.07% / +0.11% +0.05% -0.02%] index_select const : Elapsed 0.092 ms (9.174 ms / 100) 9.234 -> 9.219 ( -0.16%) [ +0.13% +0.10% +0.00% / +0.06% -0.16% -0.01%] index_select wrap : Elapsed 0.092 ms (9.246 ms / 100) 9.213 -> 9.211 ( -0.02%) [ +0.00% +0.05% +0.11% / +0.10% +0.10% -0.02%] index_select linear : Elapsed 0.092 ms (9.213 ms / 100) 9.203 -> 9.199 ( -0.04%) [ +0.00% +0.09% +0.17% / -0.04% +0.21% -0.01%] index_select reverse : Elapsed 0.092 ms (9.203 ms / 100) 9.180 -> 9.177 ( -0.03%) [ +0.00% +0.00% +0.08% / +0.10% +0.03% -0.03%] index_select skip64 : Elapsed 0.092 ms (9.180 ms / 100) 9.177 -> 9.172 ( -0.05%) [ +0.00% +0.39% +0.02% / +0.04% +0.10% -0.05%] index_select skip256 : Elapsed 0.092 ms (9.177 ms / 100) 9.219 -> 9.215 ( -0.04%) [ +0.10% +0.12% +0.00% / +0.10% +0.15% -0.04%] index_select spread : Elapsed 0.092 ms (9.228 ms / 100) 9.238 -> 9.207 ( -0.34%) [ +0.16% +0.15% +0.00% / -0.05% -0.34% -0.29%] index_select strided 3 : Elapsed 0.093 ms (9.253 ms / 100) 9.227 -> 9.218 ( -0.10%) [ +0.28% +0.10% +0.00% / +0.14% +0.03% -0.10%] index_select random : Elapsed 0.093 ms (9.253 ms / 100) 9.221 -> 9.221 ( +0.00%) [ +0.17% +0.00% +0.17% / +0.11% +0.12% +0.00%] index_select random_sorted : Elapsed 0.092 ms (9.237 ms / 100) B = [40, 16, 5, 20] (stride (5, 200, 1, 3200)) A = [40, 16, 5, 4] (stride (320, 5, 1, 80)) dim = 3 1.974 -> 1.975 ( +0.05%) [ +0.10% +0.00% +0.15% / +0.05% +1.17% +1.42%] index_add_ linear : Elapsed 0.020 ms (1.976 ms / 100) 1.923 -> 1.916 ( -0.36%) [ +0.16% +0.00% +0.31% / -0.36% +1.61% +1.46%] index_copy_ linear : Elapsed 0.019 ms (1.926 ms / 100) 1.968 -> 1.974 ( +0.30%) [ +0.15% +0.15% +0.00% / +0.30% +1.27% +1.58%] index_add_ reverse : Elapsed 0.020 ms (1.971 ms / 100) 1.919 -> 1.920 ( +0.05%) [ +0.05% +0.00% +0.16% / +0.05% +1.46% +1.41%] index_copy_ reverse : Elapsed 0.019 ms (1.920 ms / 100) 1.985 -> 1.989 ( +0.20%) [ +0.00% +0.05% +0.05% / +0.20% +1.06% +0.81%] index_add_ spread : Elapsed 0.020 ms (1.985 ms / 100) 1.935 -> 1.937 ( +0.10%) [ +0.00% +0.26% +0.00% / +0.10% +1.40% +1.40%] index_copy_ spread : Elapsed 0.019 ms (1.935 ms / 100) 1.975 -> 1.973 ( -0.10%) [ +0.05% +0.00% +0.00% / -0.10% +1.01% +0.96%] index_add_ strided 3 : Elapsed 0.020 ms (1.976 ms / 100) 1.920 -> 1.925 ( +0.26%) [ +0.00% +0.10% +0.57% / +0.26% +1.61% +1.61%] index_copy_ strided 3 : Elapsed 0.019 ms (1.920 ms / 100) 1.973 -> 1.972 ( -0.05%) [ +0.15% +0.00% +0.10% / -0.05% +1.57% +1.57%] index_add_ strided 7 : Elapsed 0.020 ms (1.976 ms / 100) 1.915 -> 1.920 ( +0.26%) [ +0.00% +0.16% +0.21% / +0.26% +2.19% +2.51%] index_copy_ strided 7 : Elapsed 0.019 ms (1.915 ms / 100) 1.977 -> 1.980 ( +0.15%) [ +0.00% +0.25% +0.20% / +0.15% +1.11% +1.32%] index_add_ perm : Elapsed 0.020 ms (1.977 ms / 100) 1.925 -> 1.926 ( +0.05%) [ +0.21% +0.00% +0.10% / +0.05% +1.82% +1.71%] index_copy_ perm : Elapsed 0.019 ms (1.929 ms / 100) 1.978 -> 1.978 ( +0.00%) [ +0.00% +0.15% +0.20% / +0.00% +1.37% +1.16%] index_add_ perm_sorted : Elapsed 0.020 ms (1.978 ms / 100) 1.924 -> 1.927 ( +0.16%) [ +0.21% +0.10% +0.00% / +0.16% +2.23% +1.92%] index_copy_ perm_sorted : Elapsed 0.019 ms (1.928 ms / 100) 8.712 -> 8.718 ( +0.07%) [ +0.00% +0.00% +0.24% / +0.11% +0.07% +0.13%] index_select const : Elapsed 0.087 ms (8.712 ms / 100) 8.769 -> 8.770 ( +0.01%) [ +0.33% +0.16% +0.00% / +0.01% +0.29% +0.16%] index_select wrap : Elapsed 0.088 ms (8.798 ms / 100) 8.728 -> 8.750 ( +0.25%) [ +0.00% +0.42% +0.46% / +0.25% +0.31% +0.63%] index_select linear : Elapsed 0.087 ms (8.728 ms / 100) 8.756 -> 8.745 ( -0.13%) [ +0.00% +0.06% +0.06% / -0.13% +0.14% +0.02%] index_select reverse : Elapsed 0.088 ms (8.756 ms / 100) 8.715 -> 8.706 ( -0.10%) [ +0.07% +0.00% +0.07% / -0.10% +0.02% +0.36%] index_select skip64 : Elapsed 0.087 ms (8.721 ms / 100) 8.705 -> 8.710 ( +0.06%) [ +0.15% +0.18% +0.00% / +0.24% +0.30% +0.06%] index_select skip256 : Elapsed 0.087 ms (8.718 ms / 100) 8.751 -> 8.756 ( +0.06%) [ +0.10% +0.00% +0.11% / +0.06% +0.17% +0.11%] index_select spread : Elapsed 0.088 ms (8.760 ms / 100) 8.789 -> 8.784 ( -0.06%) [ +0.00% +0.01% +0.01% / -0.06% +0.00% +0.20%] index_select strided 3 : Elapsed 0.088 ms (8.789 ms / 100) 8.764 -> 8.782 ( +0.21%) [ +0.00% +0.16% +0.19% / +0.21% +0.31% +0.21%] index_select random : Elapsed 0.088 ms (8.764 ms / 100) 8.742 -> 8.769 ( +0.31%) [ +0.00% +0.01% +0.18% / +0.35% +0.31% +0.32%] index_select random_sorted : Elapsed 0.087 ms (8.742 ms / 100) out_shape = [40, 5, 16, 20] in_shape = [4, 5, 16, 20] idx_dim = 0 B = [40, 5, 16, 20] (stride (1600, 20, 100, 1)) A = [4, 5, 16, 20] (stride (80, 16, 1, 320)) dim = 0 1.282 -> 1.281 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.39% +0.39%] index_add_ linear : Elapsed 0.013 ms (1.283 ms / 100) 1.237 -> 1.238 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.49% +0.40%] index_copy_ linear : Elapsed 0.012 ms (1.237 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.39% +0.47%] index_add_ reverse : Elapsed 0.013 ms (1.282 ms / 100) 1.237 -> 1.236 ( -0.08%) [ +0.00% +0.00% +0.49% / -0.08% +0.40% +0.49%] index_copy_ reverse : Elapsed 0.012 ms (1.237 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.47% +0.47%] index_add_ spread : Elapsed 0.013 ms (1.283 ms / 100) 1.236 -> 1.236 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.57% +0.57%] index_copy_ spread : Elapsed 0.012 ms (1.236 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.62% +0.62%] index_add_ strided 3 : Elapsed 0.013 ms (1.282 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.08% +0.65% +0.65%] index_copy_ strided 3 : Elapsed 0.012 ms (1.235 ms / 100) 1.286 -> 1.286 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.70% +0.70%] index_add_ strided 7 : Elapsed 0.013 ms (1.287 ms / 100) 1.242 -> 1.242 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.56% +0.48%] index_copy_ strided 7 : Elapsed 0.012 ms (1.244 ms / 100) 1.281 -> 1.287 ( +0.47%) [ +0.00% +0.00% +0.00% / +0.47% +0.62% +0.55%] index_add_ perm : Elapsed 0.013 ms (1.281 ms / 100) 1.235 -> 1.240 ( +0.40%) [ +0.08% +0.00% +0.00% / +0.40% +0.65% +0.65%] index_copy_ perm : Elapsed 0.012 ms (1.236 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.55% +0.47%] index_add_ perm_sorted : Elapsed 0.013 ms (1.282 ms / 100) 1.236 -> 1.235 ( -0.08%) [ +0.24% +0.00% +0.24% / -0.08% +0.57% +0.57%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.239 ms / 100) 8.720 -> 8.726 ( +0.07%) [ +0.00% +0.08% +0.18% / +0.07% +0.40% +0.28%] index_select const : Elapsed 0.087 ms (8.720 ms / 100) 8.741 -> 8.749 ( +0.09%) [ +0.00% +0.05% +0.03% / +0.09% +0.50% +0.45%] index_select wrap : Elapsed 0.087 ms (8.741 ms / 100) 8.733 -> 8.754 ( +0.24%) [ +0.03% +0.00% +0.19% / +0.24% +0.48% +0.24%] index_select linear : Elapsed 0.087 ms (8.736 ms / 100) 8.743 -> 8.741 ( -0.02%) [ +0.22% +0.00% +0.22% / -0.02% +0.29% +0.30%] index_select reverse : Elapsed 0.088 ms (8.762 ms / 100) 8.740 -> 8.737 ( -0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.31% -0.03%] index_select skip64 : Elapsed 0.087 ms (8.740 ms / 100) 8.722 -> 8.744 ( +0.25%) [ +0.08% +0.00% +0.17% / +0.31% +0.25% +0.32%] index_select skip256 : Elapsed 0.087 ms (8.729 ms / 100) 8.755 -> 8.749 ( -0.07%) [ +0.16% +0.07% +0.00% / -0.07% +0.34% +0.37%] index_select spread : Elapsed 0.088 ms (8.769 ms / 100) 8.737 -> 8.748 ( +0.13%) [ +0.11% +0.00% +0.41% / +0.13% +0.33% +0.37%] index_select strided 3 : Elapsed 0.087 ms (8.747 ms / 100) 8.742 -> 8.741 ( -0.01%) [ +0.00% +0.16% +0.33% / -0.01% +0.32% +0.46%] index_select random : Elapsed 0.087 ms (8.742 ms / 100) 8.747 -> 8.753 ( +0.07%) [ +0.19% +0.00% +0.02% / +0.07% +0.39% +0.34%] index_select random_sorted : Elapsed 0.088 ms (8.764 ms / 100) B = [40, 5, 16, 20] (stride (1600, 20, 100, 1)) A = [4, 5, 16, 20] (stride (80, 1, 5, 320)) dim = 0 1.372 -> 1.375 ( +0.22%) [ +0.00% +0.22% +0.36% / +0.22% +0.80% +0.87%] index_add_ linear : Elapsed 0.014 ms (1.372 ms / 100) 1.331 -> 1.332 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.53% +0.45%] index_copy_ linear : Elapsed 0.013 ms (1.331 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.73% +0.73%] index_add_ reverse : Elapsed 0.014 ms (1.375 ms / 100) 1.329 -> 1.333 ( +0.30%) [ +0.00% +0.23% +0.30% / +0.30% +0.60% +0.68%] index_copy_ reverse : Elapsed 0.013 ms (1.329 ms / 100) 1.373 -> 1.376 ( +0.22%) [ +0.00% +0.73% +0.15% / +0.22% +0.80% +0.87%] index_add_ spread : Elapsed 0.014 ms (1.373 ms / 100) 1.328 -> 1.331 ( +0.23%) [ +0.00% +0.83% +0.23% / +0.23% +1.05% +1.20%] index_copy_ spread : Elapsed 0.013 ms (1.328 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.51% +0.65%] index_add_ strided 3 : Elapsed 0.014 ms (1.383 ms / 100) 1.338 -> 1.342 ( +0.30%) [ +0.15% +0.00% +0.52% / +0.30% +0.60% +0.82%] index_copy_ strided 3 : Elapsed 0.013 ms (1.340 ms / 100) 1.374 -> 1.379 ( +0.36%) [ +0.00% +0.22% +0.00% / +0.36% +0.58% +0.73%] index_add_ strided 7 : Elapsed 0.014 ms (1.374 ms / 100) 1.333 -> 1.336 ( +0.23%) [ +0.00% +0.08% +0.00% / +0.23% +0.68% +0.53%] index_copy_ strided 7 : Elapsed 0.013 ms (1.333 ms / 100) 1.381 -> 1.380 ( -0.07%) [ +0.14% +0.14% +0.00% / -0.07% +0.22% +0.07%] index_add_ perm : Elapsed 0.014 ms (1.383 ms / 100) 1.335 -> 1.333 ( -0.15%) [ +0.15% +0.07% +0.00% / -0.15% +0.30% +0.15%] index_copy_ perm : Elapsed 0.013 ms (1.337 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.36% +0.66%] index_add_ perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) 1.330 -> 1.331 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.45% +0.60%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.331 ms / 100) 9.156 -> 9.162 ( +0.07%) [ +0.04% +0.03% +0.00% / +0.17% +0.07% +0.24%] index_select const : Elapsed 0.092 ms (9.160 ms / 100) 9.188 -> 9.192 ( +0.04%) [ +0.04% +0.02% +0.00% / +0.04% +0.19% +0.21%] index_select wrap : Elapsed 0.092 ms (9.192 ms / 100) 9.189 -> 9.195 ( +0.07%) [ +0.11% +0.00% +0.01% / +0.07% +0.15% +0.15%] index_select linear : Elapsed 0.092 ms (9.199 ms / 100) 9.172 -> 9.193 ( +0.23%) [ +0.17% +0.14% +0.00% / +0.31% +0.23% +0.41%] index_select reverse : Elapsed 0.092 ms (9.188 ms / 100) 9.162 -> 9.150 ( -0.13%) [ +0.11% +0.00% +0.11% / -0.13% +0.04% +0.27%] index_select skip64 : Elapsed 0.092 ms (9.172 ms / 100) 9.165 -> 9.161 ( -0.04%) [ +0.09% +0.00% +0.07% / -0.04% +0.15% +0.09%] index_select skip256 : Elapsed 0.092 ms (9.173 ms / 100) 9.202 -> 9.186 ( -0.17%) [ +0.05% +0.00% +0.09% / +0.23% +0.00% -0.17%] index_select spread : Elapsed 0.092 ms (9.207 ms / 100) 9.184 -> 9.187 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.05% +0.09%] index_select strided 3 : Elapsed 0.092 ms (9.189 ms / 100) 9.170 -> 9.198 ( +0.31%) [ +0.17% +0.25% +0.00% / +0.32% +0.31% +0.65%] index_select random : Elapsed 0.092 ms (9.186 ms / 100) 9.199 -> 9.190 ( -0.10%) [ +0.01% +0.00% +0.03% / -0.10% -0.02% +0.07%] index_select random_sorted : Elapsed 0.092 ms (9.200 ms / 100) B = [40, 5, 16, 20] (stride (1600, 16, 1, 80)) A = [4, 5, 16, 20] (stride (100, 20, 400, 1)) dim = 0 1.227 -> 1.227 ( +0.00%) [ +0.24% +0.16% +0.00% / +0.00% +0.49% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.230 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_copy_ linear : Elapsed 0.012 ms (1.190 ms / 100) 1.227 -> 1.228 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.65% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.08% +0.17% +0.00% / +0.00% +0.59% +0.59%] index_copy_ reverse : Elapsed 0.012 ms (1.190 ms / 100) 1.228 -> 1.227 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.49% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.76% +0.59%] index_copy_ spread : Elapsed 0.012 ms (1.189 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.65% +0.73%] index_add_ strided 3 : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.012 ms (1.188 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.82% +0.73%] index_add_ strided 7 : Elapsed 0.012 ms (1.227 ms / 100) 1.187 -> 1.189 ( +0.17%) [ +0.00% +0.17% +0.17% / +0.17% +0.93% +0.93%] index_copy_ strided 7 : Elapsed 0.012 ms (1.187 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.57% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.227 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.59% +0.76%] index_copy_ perm : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.73% +0.65%] index_add_ perm_sorted : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) 8.691 -> 8.699 ( +0.09%) [ +0.33% +0.02% +0.00% / +0.09% +0.31% +0.09%] index_select const : Elapsed 0.087 ms (8.720 ms / 100) 8.713 -> 8.714 ( +0.01%) [ +0.21% +0.13% +0.00% / +0.13% +0.14% +0.01%] index_select wrap : Elapsed 0.087 ms (8.731 ms / 100) 8.702 -> 8.704 ( +0.02%) [ +0.00% +0.05% +0.10% / +0.02% +0.10% +0.34%] index_select linear : Elapsed 0.087 ms (8.702 ms / 100) 8.708 -> 8.718 ( +0.11%) [ +0.14% +0.17% +0.00% / +0.11% +0.37% +0.15%] index_select reverse : Elapsed 0.087 ms (8.720 ms / 100) 8.682 -> 8.696 ( +0.16%) [ +0.00% +0.17% +0.01% / +0.16% +0.18% +0.30%] index_select skip64 : Elapsed 0.087 ms (8.682 ms / 100) 8.682 -> 8.700 ( +0.21%) [ +0.14% +0.16% +0.00% / +0.21% +0.23% +0.25%] index_select skip256 : Elapsed 0.087 ms (8.694 ms / 100) 8.712 -> 8.716 ( +0.05%) [ +0.22% +0.11% +0.00% / +0.06% +0.05% +0.10%] index_select spread : Elapsed 0.087 ms (8.731 ms / 100) 8.718 -> 8.726 ( +0.09%) [ +0.00% +0.18% +0.24% / +0.16% +0.15% +0.09%] index_select strided 3 : Elapsed 0.087 ms (8.718 ms / 100) 8.688 -> 8.703 ( +0.17%) [ +0.37% +0.00% +0.18% / +0.17% +0.61% +0.55%] index_select random : Elapsed 0.087 ms (8.720 ms / 100) 8.722 -> 8.696 ( -0.30%) [ +0.00% +0.03% +0.02% / -0.30% -0.06% +0.06%] index_select random_sorted : Elapsed 0.087 ms (8.722 ms / 100) B = [40, 5, 16, 20] (stride (1600, 16, 1, 80)) A = [4, 5, 16, 20] (stride (1, 64, 4, 320)) dim = 0 0.584 -> 0.584 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.86% +0.68%] index_add_ linear : Elapsed 0.006 ms (0.584 ms / 100) 0.596 -> 0.598 ( +0.34%) [ +0.34% +0.17% +0.00% / +0.34% +0.50% +0.67%] index_copy_ linear : Elapsed 0.006 ms (0.598 ms / 100) 0.584 -> 0.584 ( +0.00%) [ +0.00% +0.17% +0.17% / +0.00% +0.51% +0.17%] index_add_ reverse : Elapsed 0.006 ms (0.584 ms / 100) 0.596 -> 0.597 ( +0.17%) [ +0.34% +0.17% +0.00% / +0.17% +0.34% +0.50%] index_copy_ reverse : Elapsed 0.006 ms (0.598 ms / 100) 0.586 -> 0.586 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.17% +0.34%] index_add_ spread : Elapsed 0.006 ms (0.587 ms / 100) 0.597 -> 0.599 ( +0.34%) [ +0.00% +0.34% +0.17% / +0.34% +0.34% +0.34%] index_copy_ spread : Elapsed 0.006 ms (0.597 ms / 100) 0.586 -> 0.586 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.17% +0.17%] index_add_ strided 3 : Elapsed 0.006 ms (0.586 ms / 100) 0.596 -> 0.598 ( +0.34%) [ +0.34% +0.67% +0.00% / +0.34% +0.50% +0.34%] index_copy_ strided 3 : Elapsed 0.006 ms (0.598 ms / 100) 0.587 -> 0.587 ( +0.00%) [ +0.17% +0.34% +0.00% / +0.00% +0.17% +0.34%] index_add_ strided 7 : Elapsed 0.006 ms (0.588 ms / 100) 0.599 -> 0.601 ( +0.33%) [ +0.17% +0.33% +0.00% / +0.33% +0.50% +0.50%] index_copy_ strided 7 : Elapsed 0.006 ms (0.600 ms / 100) 0.587 -> 0.586 ( -0.17%) [ +1.02% +0.00% +0.00% / +0.17% -0.17% -0.17%] index_add_ perm : Elapsed 0.006 ms (0.593 ms / 100) 0.597 -> 0.597 ( +0.00%) [ +0.00% +0.34% +0.34% / +0.34% +0.00% +0.00%] index_copy_ perm : Elapsed 0.006 ms (0.597 ms / 100) 0.586 -> 0.585 ( -0.17%) [ +0.17% +0.34% +0.00% / +0.17% -0.17% +0.00%] index_add_ perm_sorted : Elapsed 0.006 ms (0.587 ms / 100) 0.598 -> 0.597 ( -0.17%) [ +0.17% +0.00% +0.00% / +0.33% +0.17% -0.17%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.599 ms / 100) 5.089 -> 5.102 ( +0.26%) [ +0.29% +0.18% +0.00% / +0.26% +0.73% +0.63%] index_select const : Elapsed 0.051 ms (5.104 ms / 100) 5.096 -> 5.097 ( +0.02%) [ +0.00% +0.26% +0.27% / +0.02% +0.37% +0.27%] index_select wrap : Elapsed 0.051 ms (5.096 ms / 100) 5.097 -> 5.096 ( -0.02%) [ +0.00% +0.16% +0.22% / -0.02% +0.39% +0.08%] index_select linear : Elapsed 0.051 ms (5.097 ms / 100) 5.093 -> 5.094 ( +0.02%) [ +0.00% +0.00% +0.20% / +0.02% +0.43% +0.35%] index_select reverse : Elapsed 0.051 ms (5.093 ms / 100) 5.090 -> 5.093 ( +0.06%) [ +0.20% +0.02% +0.00% / +0.06% +0.47% +0.49%] index_select skip64 : Elapsed 0.051 ms (5.100 ms / 100) 5.087 -> 5.101 ( +0.28%) [ +0.26% +0.06% +0.00% / +0.28% +0.57% +0.47%] index_select skip256 : Elapsed 0.051 ms (5.100 ms / 100) 5.092 -> 5.092 ( +0.00%) [ +0.00% +0.26% +0.12% / +0.00% +0.37% +0.45%] index_select spread : Elapsed 0.051 ms (5.092 ms / 100) 5.092 -> 5.095 ( +0.06%) [ +0.10% +0.00% +0.02% / +0.06% +0.41% +0.53%] index_select strided 3 : Elapsed 0.051 ms (5.097 ms / 100) 5.096 -> 5.096 ( +0.00%) [ +0.00% +0.16% +0.12% / +0.00% +0.22% +0.16%] index_select random : Elapsed 0.051 ms (5.096 ms / 100) 5.093 -> 5.102 ( +0.18%) [ +0.00% +0.04% +0.08% / +0.18% +0.29% +0.31%] index_select random_sorted : Elapsed 0.051 ms (5.093 ms / 100) B = [40, 5, 16, 20] (stride (16, 12800, 1, 640)) A = [4, 5, 16, 20] (stride (16, 1280, 1, 64)) dim = 0 1.354 -> 1.354 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.44%] index_add_ linear : Elapsed 0.014 ms (1.354 ms / 100) 1.317 -> 1.319 ( +0.15%) [ +0.00% +0.08% +0.00% / +0.15% +0.68% +0.68%] index_copy_ linear : Elapsed 0.013 ms (1.317 ms / 100) 1.345 -> 1.345 ( +0.00%) [ +0.30% +0.07% +0.00% / +0.00% +0.82% +0.82%] index_add_ reverse : Elapsed 0.013 ms (1.349 ms / 100) 1.309 -> 1.308 ( -0.08%) [ +0.15% +0.00% +0.15% / -0.08% +0.53% +0.53%] index_copy_ reverse : Elapsed 0.013 ms (1.311 ms / 100) 1.348 -> 1.347 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.52% +0.59%] index_add_ spread : Elapsed 0.013 ms (1.348 ms / 100) 1.309 -> 1.311 ( +0.15%) [ +0.46% +0.15% +0.00% / +0.15% +0.61% +0.69%] index_copy_ spread : Elapsed 0.013 ms (1.315 ms / 100) 1.353 -> 1.352 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.59% +0.52%] index_add_ strided 3 : Elapsed 0.014 ms (1.353 ms / 100) 1.315 -> 1.317 ( +0.15%) [ +0.23% +0.15% +0.00% / +0.15% +0.91% +0.84%] index_copy_ strided 3 : Elapsed 0.013 ms (1.318 ms / 100) 1.349 -> 1.350 ( +0.07%) [ +0.07% +0.00% +0.15% / +0.07% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.013 ms (1.350 ms / 100) 1.312 -> 1.315 ( +0.23%) [ +0.30% +0.00% +0.23% / +0.23% +0.46% +0.46%] index_copy_ strided 7 : Elapsed 0.013 ms (1.316 ms / 100) 1.347 -> 1.349 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.59% +0.67%] index_add_ perm : Elapsed 0.013 ms (1.348 ms / 100) 1.310 -> 1.314 ( +0.31%) [ +0.00% +0.31% +0.23% / +0.31% +0.61% +0.46%] index_copy_ perm : Elapsed 0.013 ms (1.310 ms / 100) 1.345 -> 1.345 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.82% +0.89%] index_add_ perm_sorted : Elapsed 0.013 ms (1.345 ms / 100) 1.308 -> 1.309 ( +0.08%) [ +0.00% +0.15% +0.00% / +0.08% +0.54% +0.69%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.308 ms / 100) 9.186 -> 9.176 ( -0.11%) [ +0.00% +0.12% +0.03% / -0.11% +0.27% +0.28%] index_select const : Elapsed 0.092 ms (9.186 ms / 100) 9.180 -> 9.176 ( -0.04%) [ +0.08% +0.00% +0.09% / -0.04% +0.44% +0.52%] index_select wrap : Elapsed 0.092 ms (9.187 ms / 100) 9.201 -> 9.182 ( -0.21%) [ +0.00% +0.16% +0.07% / -0.21% +0.15% +0.15%] index_select linear : Elapsed 0.092 ms (9.201 ms / 100) 9.186 -> 9.191 ( +0.05%) [ +0.15% +0.00% +0.25% / +0.09% +0.39% +0.05%] index_select reverse : Elapsed 0.092 ms (9.200 ms / 100) 9.170 -> 9.186 ( +0.17%) [ +0.33% +0.16% +0.00% / +0.17% +0.32% +0.51%] index_select skip64 : Elapsed 0.092 ms (9.200 ms / 100) 9.170 -> 9.172 ( +0.02%) [ +0.14% +0.10% +0.00% / +0.02% +0.19% +0.34%] index_select skip256 : Elapsed 0.092 ms (9.183 ms / 100) 9.183 -> 9.204 ( +0.23%) [ +0.00% +0.16% +0.33% / +0.23% +0.52% +0.44%] index_select spread : Elapsed 0.092 ms (9.183 ms / 100) 9.195 -> 9.181 ( -0.15%) [ +0.05% +0.00% +0.10% / -0.15% +0.35% +0.09%] index_select strided 3 : Elapsed 0.092 ms (9.200 ms / 100) 9.189 -> 9.189 ( +0.00%) [ +0.00% +0.01% +0.16% / +0.00% +0.41% +0.19%] index_select random : Elapsed 0.092 ms (9.189 ms / 100) 9.188 -> 9.180 ( -0.09%) [ +0.19% +0.17% +0.00% / -0.09% +0.44% +0.48%] index_select random_sorted : Elapsed 0.092 ms (9.205 ms / 100) B = [40, 5, 16, 20] (stride (100, 1, 4000, 5)) A = [4, 5, 16, 20] (stride (1600, 320, 20, 1)) dim = 0 1.146 -> 1.148 ( +0.17%) [ +0.00% +0.17% +0.09% / +0.17% +0.70% +0.70%] index_add_ linear : Elapsed 0.011 ms (1.146 ms / 100) 1.108 -> 1.110 ( +0.18%) [ +0.00% +0.00% +0.09% / +0.18% +0.72% +0.54%] index_copy_ linear : Elapsed 0.011 ms (1.108 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.35% +0.44%] index_add_ reverse : Elapsed 0.011 ms (1.149 ms / 100) 1.108 -> 1.109 ( +0.09%) [ +0.00% +0.18% +0.09% / +0.09% +0.45% +0.45%] index_copy_ reverse : Elapsed 0.011 ms (1.108 ms / 100) 1.147 -> 1.148 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.35% +0.35%] index_add_ spread : Elapsed 0.011 ms (1.148 ms / 100) 1.108 -> 1.109 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.45% +0.45%] index_copy_ spread : Elapsed 0.011 ms (1.108 ms / 100) 1.147 -> 1.149 ( +0.17%) [ +0.17% +0.09% +0.00% / +0.17% +0.52% +0.52%] index_add_ strided 3 : Elapsed 0.011 ms (1.149 ms / 100) 1.108 -> 1.107 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.45% +0.54%] index_copy_ strided 3 : Elapsed 0.011 ms (1.109 ms / 100) 1.148 -> 1.147 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.44% +0.52%] index_add_ strided 7 : Elapsed 0.011 ms (1.148 ms / 100) 1.108 -> 1.110 ( +0.18%) [ +0.09% +0.27% +0.00% / +0.18% +0.99% +0.54%] index_copy_ strided 7 : Elapsed 0.011 ms (1.109 ms / 100) 1.148 -> 1.148 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.44% +0.44%] index_add_ perm : Elapsed 0.011 ms (1.148 ms / 100) 1.108 -> 1.110 ( +0.18%) [ +0.00% +0.18% +0.27% / +0.18% +0.54% +0.36%] index_copy_ perm : Elapsed 0.011 ms (1.108 ms / 100) 1.148 -> 1.148 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.44% +0.35%] index_add_ perm_sorted : Elapsed 0.011 ms (1.148 ms / 100) 1.108 -> 1.108 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.45% +0.36%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.108 ms / 100) 8.260 -> 8.283 ( +0.28%) [ +0.13% +0.13% +0.00% / +0.28% +0.31% +0.58%] index_select const : Elapsed 0.083 ms (8.271 ms / 100) 8.289 -> 8.306 ( +0.21%) [ +0.00% +0.11% +0.00% / +0.21% +0.84% +0.49%] index_select wrap : Elapsed 0.083 ms (8.289 ms / 100) 8.283 -> 8.297 ( +0.17%) [ +0.05% +0.11% +0.00% / +0.17% +0.35% +0.22%] index_select linear : Elapsed 0.083 ms (8.287 ms / 100) 8.310 -> 8.297 ( -0.16%) [ +0.05% +0.00% +0.08% / -0.16% +0.14% +0.18%] index_select reverse : Elapsed 0.083 ms (8.314 ms / 100) 8.257 -> 8.282 ( +0.30%) [ +0.00% +0.11% +0.35% / +0.30% +0.58% +0.35%] index_select skip64 : Elapsed 0.083 ms (8.257 ms / 100) 8.274 -> 8.272 ( -0.02%) [ +0.00% +0.05% +0.08% / -0.02% +0.47% +0.22%] index_select skip256 : Elapsed 0.083 ms (8.274 ms / 100) 8.296 -> 8.298 ( +0.02%) [ +0.16% +0.00% +0.11% / +0.06% +0.02% +0.17%] index_select spread : Elapsed 0.083 ms (8.309 ms / 100) 8.296 -> 8.321 ( +0.30%) [ +0.13% +0.00% +0.10% / +0.37% +0.30% +0.63%] index_select strided 3 : Elapsed 0.083 ms (8.307 ms / 100) 8.295 -> 8.299 ( +0.05%) [ +0.20% +0.00% +0.05% / +0.05% +0.46% +0.18%] index_select random : Elapsed 0.083 ms (8.312 ms / 100) 8.292 -> 8.309 ( +0.21%) [ +0.06% +0.05% +0.00% / +0.33% +0.21% +0.24%] index_select random_sorted : Elapsed 0.083 ms (8.297 ms / 100) B = [40, 5, 16, 20] (stride (1, 40, 4000, 200)) A = [4, 5, 16, 20] (stride (1, 1280, 4, 64)) dim = 0 1.284 -> 1.285 ( +0.08%) [ +0.23% +0.00% +0.00% / +0.08% +0.55% +0.55%] index_add_ linear : Elapsed 0.013 ms (1.287 ms / 100) 1.242 -> 1.240 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.16% +0.24%] index_copy_ linear : Elapsed 0.012 ms (1.242 ms / 100) 1.276 -> 1.281 ( +0.39%) [ +0.00% +0.00% +0.16% / +0.39% +0.55% +0.71%] index_add_ reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.232 -> 1.234 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.73% +0.57%] index_copy_ reverse : Elapsed 0.012 ms (1.233 ms / 100) 1.284 -> 1.286 ( +0.16%) [ +0.00% +0.08% +0.16% / +0.16% +0.39% +0.31%] index_add_ spread : Elapsed 0.013 ms (1.284 ms / 100) 1.241 -> 1.243 ( +0.16%) [ +0.08% +0.00% +0.24% / +0.16% +0.32% +0.32%] index_copy_ spread : Elapsed 0.012 ms (1.242 ms / 100) 1.286 -> 1.286 ( +0.00%) [ +0.00% +0.08% +0.47% / +0.00% +0.62% +0.86%] index_add_ strided 3 : Elapsed 0.013 ms (1.286 ms / 100) 1.242 -> 1.243 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.72% +0.81%] index_copy_ strided 3 : Elapsed 0.012 ms (1.243 ms / 100) 1.278 -> 1.281 ( +0.23%) [ +0.00% +0.23% +0.16% / +0.23% +0.70% +0.78%] index_add_ strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.235 -> 1.238 ( +0.24%) [ +0.00% +0.16% +0.32% / +0.24% +0.65% +0.73%] index_copy_ strided 7 : Elapsed 0.012 ms (1.235 ms / 100) 1.279 -> 1.278 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.63% +0.63%] index_add_ perm : Elapsed 0.013 ms (1.280 ms / 100) 1.236 -> 1.237 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.65% +0.65%] index_copy_ perm : Elapsed 0.012 ms (1.238 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.94% +0.70%] index_add_ perm_sorted : Elapsed 0.013 ms (1.278 ms / 100) 1.234 -> 1.237 ( +0.24%) [ +0.16% +0.24% +0.00% / +0.24% +0.81% +0.81%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.236 ms / 100) 8.805 -> 8.816 ( +0.12%) [ +0.09% +0.41% +0.00% / +0.20% +0.12% +0.35%] index_select const : Elapsed 0.088 ms (8.813 ms / 100) 8.812 -> 8.805 ( -0.08%) [ +0.08% +0.00% +0.17% / -0.03% +0.02% -0.08%] index_select wrap : Elapsed 0.088 ms (8.819 ms / 100) 8.811 -> 8.818 ( +0.08%) [ +0.12% +0.00% +0.06% / +0.17% +0.15% +0.08%] index_select linear : Elapsed 0.088 ms (8.822 ms / 100) 8.815 -> 8.807 ( -0.09%) [ +0.00% +0.07% +0.18% / +0.25% -0.09% +0.02%] index_select reverse : Elapsed 0.088 ms (8.815 ms / 100) 8.810 -> 8.811 ( +0.01%) [ +0.00% +0.17% +0.18% / +0.01% +0.28% +0.06%] index_select skip64 : Elapsed 0.088 ms (8.810 ms / 100) 8.803 -> 8.819 ( +0.18%) [ +0.09% +0.00% +0.14% / +0.18% +0.26% +0.34%] index_select skip256 : Elapsed 0.088 ms (8.811 ms / 100) 8.794 -> 8.814 ( +0.23%) [ +0.00% +0.34% +0.26% / +0.25% +0.26% +0.23%] index_select spread : Elapsed 0.088 ms (8.794 ms / 100) 8.805 -> 8.817 ( +0.14%) [ +0.11% +0.11% +0.00% / +0.30% +0.15% +0.14%] index_select strided 3 : Elapsed 0.088 ms (8.815 ms / 100) 8.802 -> 8.810 ( +0.09%) [ +0.00% +0.17% +0.31% / +0.23% +0.09% +0.12%] index_select random : Elapsed 0.088 ms (8.802 ms / 100) 8.798 -> 8.813 ( +0.17%) [ +0.27% +0.00% +0.15% / +0.24% +0.17% +0.20%] index_select random_sorted : Elapsed 0.088 ms (8.822 ms / 100) B = [40, 5, 16, 20] (stride (5, 1, 200, 3200)) A = [4, 5, 16, 20] (stride (1, 1280, 4, 64)) dim = 0 1.362 -> 1.362 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.59% +0.66%] index_add_ linear : Elapsed 0.014 ms (1.363 ms / 100) 1.324 -> 1.324 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.68% +0.53%] index_copy_ linear : Elapsed 0.013 ms (1.324 ms / 100) 1.362 -> 1.363 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.51% +0.44%] index_add_ reverse : Elapsed 0.014 ms (1.363 ms / 100) 1.324 -> 1.325 ( +0.08%) [ +0.00% +0.08% +0.15% / +0.08% +0.53% +0.53%] index_copy_ reverse : Elapsed 0.013 ms (1.324 ms / 100) 1.359 -> 1.360 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.44% +0.59%] index_add_ spread : Elapsed 0.014 ms (1.360 ms / 100) 1.323 -> 1.323 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.53% +0.60%] index_copy_ spread : Elapsed 0.013 ms (1.324 ms / 100) 1.354 -> 1.355 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.37% +0.59%] index_add_ strided 3 : Elapsed 0.014 ms (1.356 ms / 100) 1.316 -> 1.317 ( +0.08%) [ +0.08% +0.15% +0.00% / +0.08% +0.61% +0.61%] index_copy_ strided 3 : Elapsed 0.013 ms (1.317 ms / 100) 1.359 -> 1.360 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.52% +0.44%] index_add_ strided 7 : Elapsed 0.014 ms (1.360 ms / 100) 1.321 -> 1.323 ( +0.15%) [ +0.08% +0.23% +0.00% / +0.15% +0.53% +0.53%] index_copy_ strided 7 : Elapsed 0.013 ms (1.322 ms / 100) 1.359 -> 1.360 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.52% +0.59%] index_add_ perm : Elapsed 0.014 ms (1.361 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.91% +0.61%] index_copy_ perm : Elapsed 0.013 ms (1.322 ms / 100) 1.365 -> 1.366 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.37% +0.37%] index_add_ perm_sorted : Elapsed 0.014 ms (1.365 ms / 100) 1.327 -> 1.329 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.45% +0.45%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.327 ms / 100) 9.227 -> 9.216 ( -0.12%) [ +0.07% +0.10% +0.00% / +0.07% -0.12% +0.13%] index_select const : Elapsed 0.092 ms (9.233 ms / 100) 9.224 -> 9.216 ( -0.09%) [ +0.05% +0.00% +0.00% / -0.09% -0.03% +0.07%] index_select wrap : Elapsed 0.092 ms (9.229 ms / 100) 9.233 -> 9.223 ( -0.11%) [ +0.06% +0.00% +0.06% / -0.11% +0.10% +0.13%] index_select linear : Elapsed 0.092 ms (9.239 ms / 100) 9.209 -> 9.218 ( +0.10%) [ +0.17% +0.16% +0.00% / +0.33% +0.13% +0.10%] index_select reverse : Elapsed 0.092 ms (9.225 ms / 100) 9.217 -> 9.225 ( +0.09%) [ +0.11% +0.12% +0.00% / +0.09% +0.27% +0.26%] index_select skip64 : Elapsed 0.092 ms (9.227 ms / 100) 9.228 -> 9.235 ( +0.08%) [ +0.02% +0.00% +0.03% / +0.26% +0.08% +0.09%] index_select skip256 : Elapsed 0.092 ms (9.230 ms / 100) 9.226 -> 9.224 ( -0.02%) [ +0.03% +0.15% +0.00% / +0.14% -0.02% +0.07%] index_select spread : Elapsed 0.092 ms (9.229 ms / 100) 9.225 -> 9.219 ( -0.07%) [ +0.28% +0.14% +0.00% / -0.07% +0.09% -0.03%] index_select strided 3 : Elapsed 0.093 ms (9.251 ms / 100) 9.220 -> 9.220 ( +0.00%) [ +0.10% +0.00% +0.08% / +0.40% +0.41% +0.00%] index_select random : Elapsed 0.092 ms (9.229 ms / 100) 9.222 -> 9.237 ( +0.16%) [ +0.13% +0.00% +0.18% / +0.20% +0.16% +0.17%] index_select random_sorted : Elapsed 0.092 ms (9.234 ms / 100) B = [40, 5, 16, 20] (stride (1, 40, 200, 3200)) A = [4, 5, 16, 20] (stride (80, 1, 5, 320)) dim = 0 1.372 -> 1.371 ( -0.07%) [ +0.15% +0.00% +0.00% / -0.07% +0.66% +0.95%] index_add_ linear : Elapsed 0.014 ms (1.374 ms / 100) 1.330 -> 1.335 ( +0.38%) [ +0.23% +0.15% +0.00% / +0.38% +0.53% +0.60%] index_copy_ linear : Elapsed 0.013 ms (1.333 ms / 100) 1.375 -> 1.380 ( +0.36%) [ +0.00% +0.00% +0.36% / +0.36% +0.65% +0.73%] index_add_ reverse : Elapsed 0.014 ms (1.375 ms / 100) 1.334 -> 1.336 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.52% +0.30%] index_copy_ reverse : Elapsed 0.013 ms (1.336 ms / 100) 1.378 -> 1.377 ( -0.07%) [ +0.15% +0.15% +0.00% / -0.07% +0.36% +0.44%] index_add_ spread : Elapsed 0.014 ms (1.380 ms / 100) 1.336 -> 1.336 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.07% +0.07% +0.00%] index_copy_ spread : Elapsed 0.013 ms (1.337 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.80% +0.73%] index_add_ strided 3 : Elapsed 0.014 ms (1.374 ms / 100) 1.333 -> 1.337 ( +0.30%) [ +0.00% +0.23% +0.38% / +0.38% +0.53% +0.30%] index_copy_ strided 3 : Elapsed 0.013 ms (1.333 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.87% +0.80%] index_add_ strided 7 : Elapsed 0.014 ms (1.373 ms / 100) 1.332 -> 1.334 ( +0.15%) [ +0.15% +0.38% +0.00% / +0.15% +0.53% +0.38%] index_copy_ strided 7 : Elapsed 0.013 ms (1.334 ms / 100) 1.373 -> 1.373 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.66% +0.80%] index_add_ perm : Elapsed 0.014 ms (1.375 ms / 100) 1.329 -> 1.329 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.60% +0.90%] index_copy_ perm : Elapsed 0.013 ms (1.331 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.00% +0.07% +0.29% / +0.29% +0.65% +0.36%] index_add_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 1.337 -> 1.338 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.75% +0.37%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.337 ms / 100) 9.244 -> 9.251 ( +0.08%) [ +0.03% +0.14% +0.00% / +0.21% +0.28% +0.08%] index_select const : Elapsed 0.092 ms (9.247 ms / 100) 9.263 -> 9.285 ( +0.24%) [ +0.09% +0.31% +0.00% / +0.24% +0.36% +0.26%] index_select wrap : Elapsed 0.093 ms (9.271 ms / 100) 9.261 -> 9.260 ( -0.01%) [ +0.05% +0.04% +0.00% / -0.01% +0.11% +0.13%] index_select linear : Elapsed 0.093 ms (9.266 ms / 100) 9.263 -> 9.270 ( +0.08%) [ +0.00% +0.06% +0.26% / +0.08% +0.09% +0.33%] index_select reverse : Elapsed 0.093 ms (9.263 ms / 100) 9.245 -> 9.248 ( +0.03%) [ +0.10% +0.16% +0.00% / +0.17% +0.10% +0.03%] index_select skip64 : Elapsed 0.093 ms (9.254 ms / 100) 9.245 -> 9.246 ( +0.01%) [ +0.17% +0.00% +0.02% / +0.14% +0.02% +0.01%] index_select skip256 : Elapsed 0.093 ms (9.261 ms / 100) 9.268 -> 9.278 ( +0.11%) [ +0.00% +0.14% +0.11% / +0.14% +0.38% +0.11%] index_select spread : Elapsed 0.093 ms (9.268 ms / 100) 9.281 -> 9.278 ( -0.03%) [ +0.00% +0.16% +0.02% / +0.06% +0.09% -0.03%] index_select strided 3 : Elapsed 0.093 ms (9.281 ms / 100) 9.268 -> 9.278 ( +0.11%) [ +0.00% +0.03% +0.10% / +0.18% +0.23% +0.11%] index_select random : Elapsed 0.093 ms (9.268 ms / 100) 9.279 -> 9.279 ( +0.00%) [ +0.03% +0.28% +0.00% / +0.00% +0.16% +0.22%] index_select random_sorted : Elapsed 0.093 ms (9.282 ms / 100) out_shape = [4, 40, 16, 20] in_shape = [4, 5, 16, 20] idx_dim = 1 B = [4, 40, 16, 20] (stride (20, 80, 3200, 1)) dim = 1 fill_cnt = 5 0.893 -> 0.894 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.67% +0.67%] index_fill_ const : Elapsed 0.009 ms (0.894 ms / 100) 0.893 -> 0.894 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.78% +0.78%] index_fill_ linear : Elapsed 0.009 ms (0.894 ms / 100) 0.893 -> 0.896 ( +0.34%) [ +0.22% +0.11% +0.00% / +0.34% +0.67% +0.67%] index_fill_ reverse : Elapsed 0.009 ms (0.895 ms / 100) 0.892 -> 0.893 ( +0.11%) [ +0.11% +0.22% +0.00% / +0.11% +0.78% +0.78%] index_fill_ skip64 : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.894 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.67% +0.67%] index_fill_ skip256 : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.894 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.78% +1.34%] index_fill_ spread : Elapsed 0.009 ms (0.893 ms / 100) 0.892 -> 0.893 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.90% +0.90%] index_fill_ strided 3 : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.892 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% +0.78% +0.78%] index_fill_ strided 5 : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.892 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% +0.90% +0.67%] index_fill_ strided 7 : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.893 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.78% +0.78%] index_fill_ strided 8 : Elapsed 0.009 ms (0.893 ms / 100) 0.892 -> 0.894 ( +0.22%) [ +0.22% +0.11% +0.00% / +0.22% +0.78% +1.01%] index_fill_ strided 16 : Elapsed 0.009 ms (0.894 ms / 100) 0.892 -> 0.893 ( +0.11%) [ +0.22% +0.11% +0.00% / +0.11% +0.78% +0.78%] index_fill_ random : Elapsed 0.009 ms (0.894 ms / 100) 0.892 -> 0.892 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.78% +0.78%] index_fill_ random_sorted : Elapsed 0.009 ms (0.893 ms / 100) 0.892 -> 0.895 ( +0.34%) [ +0.00% +0.11% +0.00% / +0.34% +0.90% +0.78%] index_fill_ perm : Elapsed 0.009 ms (0.892 ms / 100) 0.892 -> 0.892 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.90% +0.78%] index_fill_ perm_sorted : Elapsed 0.009 ms (0.892 ms / 100) B = [4, 40, 16, 20] (stride (40, 1, 3200, 160)) A = [4, 5, 16, 20] (stride (100, 20, 400, 1)) dim = 1 1.422 -> 1.420 ( -0.14%) [ +0.07% +0.07% +0.00% / -0.14% +0.28% +0.28%] index_add_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.375 -> 1.380 ( +0.36%) [ +0.07% +0.00% +0.00% / +0.36% +0.58% +0.65%] index_copy_ linear : Elapsed 0.014 ms (1.376 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.07% +0.00% +0.21% / -0.07% +0.28% +0.21%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.374 -> 1.379 ( +0.36%) [ +0.00% +0.22% +0.15% / +0.36% +0.58% +0.58%] index_copy_ reverse : Elapsed 0.014 ms (1.374 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.28% +0.35%] index_add_ spread : Elapsed 0.014 ms (1.423 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.22% +0.07% +0.00% / +0.07% +0.36% +0.36%] index_copy_ spread : Elapsed 0.014 ms (1.379 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.35% +0.42%] index_add_ strided 3 : Elapsed 0.014 ms (1.423 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.00% +0.07% +0.07% / +0.15% +0.73% +0.51%] index_copy_ strided 3 : Elapsed 0.014 ms (1.375 ms / 100) 1.420 -> 1.426 ( +0.42%) [ +0.21% +0.14% +0.00% / +0.42% +0.42% +0.42%] index_add_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.36%] index_copy_ strided 7 : Elapsed 0.014 ms (1.376 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.35% +0.35%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.00% +0.15% / +0.07% +0.51% +0.58%] index_copy_ perm : Elapsed 0.014 ms (1.375 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.07% +0.28% +0.00% / +0.07% +0.35% +0.42%] index_add_ perm_sorted : Elapsed 0.014 ms (1.421 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.15% +0.00% / +0.07% +0.58% +0.51%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) 8.236 -> 8.231 ( -0.06%) [ +0.00% +0.00% +0.15% / -0.06% +0.34% +0.12%] index_select const : Elapsed 0.082 ms (8.236 ms / 100) 8.263 -> 8.269 ( +0.07%) [ +0.00% +0.34% +0.22% / +0.18% +0.13% +0.07%] index_select wrap : Elapsed 0.083 ms (8.263 ms / 100) 8.273 -> 8.263 ( -0.12%) [ +0.17% +0.17% +0.00% / +0.08% -0.11% -0.12%] index_select linear : Elapsed 0.083 ms (8.287 ms / 100) 8.261 -> 8.268 ( +0.08%) [ +0.12% +0.00% +0.11% / +0.08% +0.24% +0.46%] index_select reverse : Elapsed 0.083 ms (8.271 ms / 100) 8.239 -> 8.251 ( +0.15%) [ +0.08% +0.00% +0.06% / +0.21% +0.19% +0.15%] index_select skip64 : Elapsed 0.082 ms (8.246 ms / 100) 8.244 -> 8.236 ( -0.10%) [ +0.13% +0.00% +0.06% / -0.10% +0.11% +0.44%] index_select skip256 : Elapsed 0.083 ms (8.255 ms / 100) 8.266 -> 8.260 ( -0.07%) [ +0.25% +0.00% +0.02% / -0.07% -0.01% -0.04%] index_select spread : Elapsed 0.083 ms (8.287 ms / 100) 8.266 -> 8.270 ( +0.05%) [ +0.12% +0.00% +0.11% / +0.13% +0.05% +0.24%] index_select strided 3 : Elapsed 0.083 ms (8.276 ms / 100) 8.268 -> 8.262 ( -0.07%) [ +0.00% +0.02% +0.00% / +0.10% -0.07% +0.27%] index_select random : Elapsed 0.083 ms (8.268 ms / 100) 8.261 -> 8.260 ( -0.01%) [ +0.29% +0.00% +0.06% / +0.18% -0.01% +0.28%] index_select random_sorted : Elapsed 0.083 ms (8.285 ms / 100) out_shape = [4, 5, 40, 20] in_shape = [4, 5, 16, 20] idx_dim = 2 B = [4, 5, 40, 20] (stride (40, 3200, 1, 160)) A = [4, 5, 16, 20] (stride (20, 80, 400, 1)) dim = 2 3.928 -> 3.931 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.59% +0.61%] index_add_ linear : Elapsed 0.039 ms (3.929 ms / 100) 3.803 -> 3.806 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.60% +0.63%] index_copy_ linear : Elapsed 0.038 ms (3.804 ms / 100) 3.945 -> 3.945 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.51% +0.48%] index_add_ reverse : Elapsed 0.039 ms (3.945 ms / 100) 3.819 -> 3.820 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.50% +0.45%] index_copy_ reverse : Elapsed 0.038 ms (3.820 ms / 100) 3.930 -> 3.929 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.53% +0.53%] index_add_ spread : Elapsed 0.039 ms (3.930 ms / 100) 3.808 -> 3.810 ( +0.05%) [ +0.00% +0.08% +0.00% / +0.05% +0.50% +0.50%] index_copy_ spread : Elapsed 0.038 ms (3.808 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.53% +0.48%] index_add_ strided 3 : Elapsed 0.039 ms (3.936 ms / 100) 3.809 -> 3.811 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.55% +0.53%] index_copy_ strided 3 : Elapsed 0.038 ms (3.810 ms / 100) 3.945 -> 3.945 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.56% +0.56%] index_add_ strided 7 : Elapsed 0.039 ms (3.946 ms / 100) 3.819 -> 3.820 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.58% +0.60%] index_copy_ strided 7 : Elapsed 0.038 ms (3.820 ms / 100) 3.929 -> 3.929 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.56% +0.51%] index_add_ perm : Elapsed 0.039 ms (3.931 ms / 100) 3.803 -> 3.805 ( +0.05%) [ +0.11% +0.03% +0.00% / +0.05% +0.55% +0.58%] index_copy_ perm : Elapsed 0.038 ms (3.807 ms / 100) 3.929 -> 3.930 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.46% +0.48%] index_add_ perm_sorted : Elapsed 0.039 ms (3.930 ms / 100) 3.803 -> 3.806 ( +0.08%) [ +0.05% +0.16% +0.00% / +0.08% +0.53% +0.53%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.805 ms / 100) 5.556 -> 5.556 ( +0.00%) [ +0.04% +0.00% +0.13% / +0.00% +0.22% +0.23%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.561 -> 5.568 ( +0.13%) [ +0.07% +0.04% +0.00% / +0.13% +0.31% +0.45%] index_select wrap : Elapsed 0.056 ms (5.565 ms / 100) 5.566 -> 5.572 ( +0.11%) [ +0.00% +0.02% +0.13% / +0.11% +0.20% +0.14%] index_select linear : Elapsed 0.056 ms (5.566 ms / 100) 5.564 -> 5.568 ( +0.07%) [ +0.00% +0.11% +0.07% / +0.07% +0.16% +0.29%] index_select reverse : Elapsed 0.056 ms (5.564 ms / 100) 5.567 -> 5.560 ( -0.13%) [ +0.07% +0.00% +0.02% / +0.09% -0.05% -0.13%] index_select skip64 : Elapsed 0.056 ms (5.571 ms / 100) 5.562 -> 5.562 ( +0.00%) [ +0.05% +0.00% +0.04% / +0.00% +0.00% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.565 ms / 100) 5.568 -> 5.567 ( -0.02%) [ +0.05% +0.00% +0.09% / +0.02% +0.09% -0.02%] index_select spread : Elapsed 0.056 ms (5.571 ms / 100) 5.557 -> 5.566 ( +0.16%) [ +0.00% +0.20% +0.05% / +0.16% +0.31% +0.38%] index_select strided 3 : Elapsed 0.056 ms (5.557 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.02% +0.00% +0.22% / +0.05% +0.16% +0.20%] index_select strided 5 : Elapsed 0.056 ms (5.564 ms / 100) 5.560 -> 5.565 ( +0.09%) [ +0.13% +0.13% +0.00% / +0.09% +0.18% +0.32%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.558 -> 5.563 ( +0.09%) [ +0.18% +0.00% +0.09% / +0.11% +0.09% +0.23%] index_select strided 8 : Elapsed 0.056 ms (5.568 ms / 100) 5.566 -> 5.567 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.02% +0.02% +0.16%] index_select random : Elapsed 0.056 ms (5.566 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.00% +0.02% +0.05% / -0.02% +0.11% +0.18%] index_select random_sorted : Elapsed 0.056 ms (5.566 ms / 100) B = [4, 5, 40, 20] (stride (1, 3200, 4, 160)) A = [4, 5, 16, 20] (stride (1, 80, 400, 4)) dim = 2 4.000 -> 4.002 ( +0.05%) [ +0.00% +0.08% +0.05% / +0.05% +0.88% +0.88%] index_add_ linear : Elapsed 0.040 ms (4.000 ms / 100) 3.869 -> 3.870 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.85% +0.83%] index_copy_ linear : Elapsed 0.039 ms (3.870 ms / 100) 3.990 -> 3.996 ( +0.15%) [ +0.13% +0.00% +0.00% / +0.15% +0.90% +0.68%] index_add_ reverse : Elapsed 0.040 ms (3.995 ms / 100) 3.861 -> 3.875 ( +0.36%) [ +0.13% +0.00% +0.08% / +0.36% +0.98% +0.65%] index_copy_ reverse : Elapsed 0.039 ms (3.866 ms / 100) 3.989 -> 3.995 ( +0.15%) [ +0.00% +0.18% +0.18% / +0.15% +0.85% +0.88%] index_add_ spread : Elapsed 0.040 ms (3.989 ms / 100) 3.867 -> 3.868 ( +0.03%) [ +0.00% +0.10% +0.00% / +0.03% +0.88% +0.96%] index_copy_ spread : Elapsed 0.039 ms (3.867 ms / 100) 3.981 -> 3.989 ( +0.20%) [ +0.20% +0.05% +0.00% / +0.20% +0.83% +0.73%] index_add_ strided 3 : Elapsed 0.040 ms (3.989 ms / 100) 3.855 -> 3.860 ( +0.13%) [ +0.08% +0.00% +0.00% / +0.13% +0.80% +0.70%] index_copy_ strided 3 : Elapsed 0.039 ms (3.858 ms / 100) 3.989 -> 3.990 ( +0.03%) [ +0.18% +0.00% +0.15% / +0.03% +0.83% +0.80%] index_add_ strided 7 : Elapsed 0.040 ms (3.996 ms / 100) 3.865 -> 3.863 ( -0.05%) [ +0.08% +0.00% +0.05% / -0.05% +0.67% +0.65%] index_copy_ strided 7 : Elapsed 0.039 ms (3.868 ms / 100) 4.008 -> 4.004 ( -0.10%) [ +0.05% +0.00% +0.00% / -0.10% +0.72% +0.62%] index_add_ perm : Elapsed 0.040 ms (4.010 ms / 100) 3.873 -> 3.873 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.85% +0.70%] index_copy_ perm : Elapsed 0.039 ms (3.875 ms / 100) 3.982 -> 3.984 ( +0.05%) [ +0.08% +0.00% +0.05% / +0.05% +0.88% +0.78%] index_add_ perm_sorted : Elapsed 0.040 ms (3.985 ms / 100) 3.852 -> 3.853 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.86% +0.83%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.854 ms / 100) 5.566 -> 5.563 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.02% -0.02% -0.05%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.568 -> 5.562 ( -0.11%) [ +0.00% +0.00% +0.04% / +0.00% -0.11% +0.04%] index_select wrap : Elapsed 0.056 ms (5.568 ms / 100) 5.571 -> 5.562 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.04% -0.09%] index_select linear : Elapsed 0.056 ms (5.571 ms / 100) 5.565 -> 5.563 ( -0.04%) [ +0.00% +0.02% +0.13% / -0.04% +0.04% +0.09%] index_select reverse : Elapsed 0.056 ms (5.565 ms / 100) 5.561 -> 5.563 ( +0.04%) [ +0.00% +0.00% +0.02% / +0.04% +0.07% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.561 ms / 100) 5.560 -> 5.557 ( -0.05%) [ +0.07% +0.00% +0.02% / -0.05% +0.16% +0.18%] index_select skip256 : Elapsed 0.056 ms (5.564 ms / 100) 5.572 -> 5.559 ( -0.23%) [ +0.00% +0.09% +0.04% / +0.07% -0.23% -0.18%] index_select spread : Elapsed 0.056 ms (5.572 ms / 100) 5.569 -> 5.562 ( -0.13%) [ +0.02% +0.04% +0.00% / -0.09% -0.07% -0.13%] index_select strided 3 : Elapsed 0.056 ms (5.570 ms / 100) 5.570 -> 5.564 ( -0.11%) [ +0.04% +0.00% +0.02% / -0.05% +0.00% -0.11%] index_select strided 5 : Elapsed 0.056 ms (5.572 ms / 100) 5.561 -> 5.564 ( +0.05%) [ +0.00% +0.16% +0.14% / +0.05% +0.25% +0.29%] index_select strided 7 : Elapsed 0.056 ms (5.561 ms / 100) 5.560 -> 5.564 ( +0.07%) [ +0.04% +0.13% +0.00% / +0.07% +0.07% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.568 -> 5.566 ( -0.04%) [ +0.00% +0.14% +0.11% / +0.07% +0.13% -0.04%] index_select random : Elapsed 0.056 ms (5.568 ms / 100) 5.571 -> 5.569 ( -0.04%) [ +0.00% +0.13% +0.00% / +0.04% -0.04% -0.02%] index_select random_sorted : Elapsed 0.056 ms (5.571 ms / 100) B = [4, 5, 40, 20] (stride (100, 20, 400, 1)) A = [4, 5, 16, 20] (stride (1600, 16, 1, 80)) dim = 2 4.019 -> 4.021 ( +0.05%) [ +0.00% +0.37% +0.07% / +0.05% +0.57% +0.50%] index_add_ linear : Elapsed 0.040 ms (4.019 ms / 100) 3.846 -> 3.851 ( +0.13%) [ +0.00% +0.39% +0.13% / +0.13% +0.60% +0.49%] index_copy_ linear : Elapsed 0.038 ms (3.846 ms / 100) 3.997 -> 4.007 ( +0.25%) [ +0.60% +0.00% +0.43% / +0.25% +0.78% +0.73%] index_add_ reverse : Elapsed 0.040 ms (4.021 ms / 100) 3.836 -> 3.843 ( +0.18%) [ +0.57% +0.00% +0.36% / +0.18% +0.70% +0.68%] index_copy_ reverse : Elapsed 0.039 ms (3.858 ms / 100) 4.027 -> 4.027 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.60% +0.42%] index_add_ spread : Elapsed 0.040 ms (4.029 ms / 100) 3.857 -> 3.858 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.49% +0.52%] index_copy_ spread : Elapsed 0.039 ms (3.857 ms / 100) 4.032 -> 4.032 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.52% +0.55%] index_add_ strided 3 : Elapsed 0.040 ms (4.033 ms / 100) 3.858 -> 3.858 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.54% +0.54%] index_copy_ strided 3 : Elapsed 0.039 ms (3.858 ms / 100) 4.009 -> 4.020 ( +0.27%) [ +0.00% +0.00% +0.27% / +0.27% +0.80% +0.57%] index_add_ strided 7 : Elapsed 0.040 ms (4.009 ms / 100) 3.843 -> 3.859 ( +0.42%) [ +0.08% +0.00% +0.39% / +0.42% +0.81% +0.60%] index_copy_ strided 7 : Elapsed 0.038 ms (3.846 ms / 100) 4.021 -> 4.022 ( +0.02%) [ +0.37% +0.00% +0.05% / +0.02% +0.75% +0.40%] index_add_ perm : Elapsed 0.040 ms (4.036 ms / 100) 3.845 -> 3.846 ( +0.03%) [ +0.39% +0.00% +0.10% / +0.03% +0.83% +0.49%] index_copy_ perm : Elapsed 0.039 ms (3.860 ms / 100) 4.022 -> 4.035 ( +0.32%) [ +0.00% +0.05% +0.05% / +0.35% +0.82% +0.32%] index_add_ perm_sorted : Elapsed 0.040 ms (4.022 ms / 100) 3.848 -> 3.863 ( +0.39%) [ +0.00% +0.05% +0.13% / +0.39% +0.86% +0.39%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.848 ms / 100) 5.470 -> 5.474 ( +0.07%) [ +0.02% +0.22% +0.00% / +0.09% +0.07% +0.16%] index_select const : Elapsed 0.055 ms (5.471 ms / 100) 5.473 -> 5.471 ( -0.04%) [ +0.00% +0.09% +0.02% / -0.04% +0.18% +0.09%] index_select wrap : Elapsed 0.055 ms (5.473 ms / 100) 5.474 -> 5.476 ( +0.04%) [ +0.05% +0.00% +0.09% / +0.04% +0.16% +0.24%] index_select linear : Elapsed 0.055 ms (5.477 ms / 100) 5.472 -> 5.475 ( +0.05%) [ +0.00% +0.13% +0.11% / +0.05% +0.22% +0.18%] index_select reverse : Elapsed 0.055 ms (5.472 ms / 100) 5.477 -> 5.467 ( -0.18%) [ +0.02% +0.15% +0.00% / +0.02% -0.09% -0.18%] index_select skip64 : Elapsed 0.055 ms (5.478 ms / 100) 5.477 -> 5.470 ( -0.13%) [ +0.05% +0.00% +0.02% / -0.04% -0.13% -0.09%] index_select skip256 : Elapsed 0.055 ms (5.480 ms / 100) 5.469 -> 5.471 ( +0.04%) [ +0.00% +0.07% +0.00% / +0.27% +0.04% +0.26%] index_select spread : Elapsed 0.055 ms (5.469 ms / 100) 5.468 -> 5.468 ( +0.00%) [ +0.00% +0.13% +0.07% / +0.00% +0.20% +0.22%] index_select strided 3 : Elapsed 0.055 ms (5.468 ms / 100) 5.472 -> 5.469 ( -0.05%) [ +0.00% +0.04% +0.07% / -0.05% +0.11% +0.26%] index_select strided 5 : Elapsed 0.055 ms (5.472 ms / 100) 5.473 -> 5.479 ( +0.11%) [ +0.15% +0.00% +0.07% / +0.11% +0.13% +0.22%] index_select strided 7 : Elapsed 0.055 ms (5.481 ms / 100) 5.469 -> 5.475 ( +0.11%) [ +0.05% +0.11% +0.00% / +0.11% +0.22% +0.13%] index_select strided 8 : Elapsed 0.055 ms (5.472 ms / 100) 5.474 -> 5.471 ( -0.05%) [ +0.04% +0.05% +0.00% / -0.05% +0.16% +0.16%] index_select random : Elapsed 0.055 ms (5.476 ms / 100) 5.474 -> 5.472 ( -0.04%) [ +0.16% +0.00% +0.00% / -0.04% +0.13% +0.13%] index_select random_sorted : Elapsed 0.055 ms (5.483 ms / 100) B = [4, 5, 40, 20] (stride (100, 1, 400, 5)) A = [4, 5, 16, 20] (stride (320, 1280, 1, 16)) dim = 2 4.232 -> 4.233 ( +0.02%) [ +0.09% +0.09% +0.00% / +0.02% +0.69% +0.69%] index_add_ linear : Elapsed 0.042 ms (4.236 ms / 100) 4.084 -> 4.095 ( +0.27%) [ +0.10% +0.12% +0.00% / +0.27% +0.69% +0.69%] index_copy_ linear : Elapsed 0.041 ms (4.088 ms / 100) 4.212 -> 4.213 ( +0.02%) [ +0.07% +0.05% +0.00% / +0.02% +0.69% +0.66%] index_add_ reverse : Elapsed 0.042 ms (4.215 ms / 100) 4.063 -> 4.063 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.64% +0.62%] index_copy_ reverse : Elapsed 0.041 ms (4.064 ms / 100) 4.220 -> 4.217 ( -0.07%) [ +0.05% +0.12% +0.00% / -0.07% +0.43% +0.64%] index_add_ spread : Elapsed 0.042 ms (4.222 ms / 100) 4.072 -> 4.069 ( -0.07%) [ +0.00% +0.02% +0.00% / -0.07% +0.61% +0.64%] index_copy_ spread : Elapsed 0.041 ms (4.072 ms / 100) 4.209 -> 4.210 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.62% +0.67%] index_add_ strided 3 : Elapsed 0.042 ms (4.209 ms / 100) 4.062 -> 4.063 ( +0.02%) [ +0.00% +0.07% +0.02% / +0.02% +0.71% +0.69%] index_copy_ strided 3 : Elapsed 0.041 ms (4.062 ms / 100) 4.209 -> 4.213 ( +0.10%) [ +0.10% +0.07% +0.00% / +0.10% +0.78% +0.83%] index_add_ strided 7 : Elapsed 0.042 ms (4.213 ms / 100) 4.062 -> 4.065 ( +0.07%) [ +0.02% +0.07% +0.00% / +0.07% +0.69% +0.74%] index_copy_ strided 7 : Elapsed 0.041 ms (4.063 ms / 100) 4.234 -> 4.236 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.59% +0.73%] index_add_ perm : Elapsed 0.042 ms (4.236 ms / 100) 4.085 -> 4.088 ( +0.07%) [ +0.15% +0.02% +0.00% / +0.07% +0.71% +0.83%] index_copy_ perm : Elapsed 0.041 ms (4.091 ms / 100) 4.209 -> 4.209 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.64% +0.64%] index_add_ perm_sorted : Elapsed 0.042 ms (4.209 ms / 100) 4.062 -> 4.062 ( +0.00%) [ +0.00% +0.05% +0.07% / +0.00% +0.69% +0.62%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.062 ms / 100) 5.562 -> 5.554 ( -0.14%) [ +0.00% +0.05% +0.05% / -0.14% -0.09% +0.00%] index_select const : Elapsed 0.056 ms (5.562 ms / 100) 5.556 -> 5.553 ( -0.05%) [ +0.18% +0.32% +0.00% / +0.22% +0.00% -0.05%] index_select wrap : Elapsed 0.056 ms (5.566 ms / 100) 5.560 -> 5.561 ( +0.02%) [ +0.00% +0.11% +0.05% / +0.09% +0.04% +0.02%] index_select linear : Elapsed 0.056 ms (5.560 ms / 100) 5.560 -> 5.560 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.05% +0.05% +0.00%] index_select reverse : Elapsed 0.056 ms (5.561 ms / 100) 5.556 -> 5.555 ( -0.02%) [ +0.11% +0.16% +0.00% / -0.02% +0.00% +0.07%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.549 -> 5.555 ( +0.11%) [ +0.23% +0.09% +0.00% / +0.11% +0.20% +0.16%] index_select skip256 : Elapsed 0.056 ms (5.562 ms / 100) 5.558 -> 5.558 ( +0.00%) [ +0.05% +0.09% +0.00% / +0.13% +0.00% +0.13%] index_select spread : Elapsed 0.056 ms (5.561 ms / 100) 5.561 -> 5.558 ( -0.05%) [ +0.00% +0.07% +0.11% / +0.04% -0.05% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.561 ms / 100) 5.560 -> 5.552 ( -0.14%) [ +0.00% +0.05% +0.04% / +0.07% -0.14% -0.04%] index_select strided 5 : Elapsed 0.056 ms (5.560 ms / 100) 5.557 -> 5.557 ( +0.00%) [ +0.04% +0.13% +0.00% / +0.00% +0.14% +0.16%] index_select strided 7 : Elapsed 0.056 ms (5.559 ms / 100) 5.557 -> 5.554 ( -0.05%) [ +0.05% +0.00% +0.16% / -0.05% +0.07% +0.13%] index_select strided 8 : Elapsed 0.056 ms (5.560 ms / 100) 5.566 -> 5.559 ( -0.13%) [ +0.00% +0.13% +0.02% / -0.13% +0.00% -0.13%] index_select random : Elapsed 0.056 ms (5.566 ms / 100) 5.559 -> 5.558 ( -0.02%) [ +0.07% +0.00% +0.00% / +0.02% -0.02% +0.04%] index_select random_sorted : Elapsed 0.056 ms (5.563 ms / 100) B = [4, 5, 40, 20] (stride (20, 80, 400, 1)) dim = 2 fill_cnt = 16 2.064 -> 2.065 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.97% +0.97%] index_fill_ const : Elapsed 0.021 ms (2.064 ms / 100) 2.064 -> 2.064 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +1.02% +0.97%] index_fill_ linear : Elapsed 0.021 ms (2.065 ms / 100) 2.064 -> 2.064 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +1.16% +1.07%] index_fill_ reverse : Elapsed 0.021 ms (2.064 ms / 100) 2.064 -> 2.065 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +1.07% +1.07%] index_fill_ skip64 : Elapsed 0.021 ms (2.064 ms / 100) 2.064 -> 2.064 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.92% +0.87%] index_fill_ skip256 : Elapsed 0.021 ms (2.065 ms / 100) 2.064 -> 2.065 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.92% +0.92%] index_fill_ spread : Elapsed 0.021 ms (2.065 ms / 100) 2.065 -> 2.066 ( +0.05%) [ +0.15% +0.05% +0.00% / +0.05% +0.82% +0.87%] index_fill_ strided 3 : Elapsed 0.021 ms (2.068 ms / 100) 2.065 -> 2.065 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.82% +0.82%] index_fill_ strided 5 : Elapsed 0.021 ms (2.066 ms / 100) 2.066 -> 2.066 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.73% +0.68%] index_fill_ strided 7 : Elapsed 0.021 ms (2.066 ms / 100) 2.066 -> 2.067 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.68% +0.68%] index_fill_ strided 8 : Elapsed 0.021 ms (2.066 ms / 100) 2.069 -> 2.068 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.53% +0.53%] index_fill_ strided 16 : Elapsed 0.021 ms (2.069 ms / 100) 2.068 -> 2.069 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.58% +0.58%] index_fill_ random : Elapsed 0.021 ms (2.069 ms / 100) 2.066 -> 2.067 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.77% +0.73%] index_fill_ random_sorted : Elapsed 0.021 ms (2.066 ms / 100) 2.066 -> 2.065 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.77% +0.73%] index_fill_ perm : Elapsed 0.021 ms (2.066 ms / 100) 2.064 -> 2.066 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.97% +0.92%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.065 ms / 100) B = [4, 5, 40, 20] (stride (5, 1, 400, 20)) dim = 2 fill_cnt = 16 1.030 -> 1.031 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +1.07% +0.97%] index_fill_ const : Elapsed 0.010 ms (1.031 ms / 100) 1.024 -> 1.023 ( -0.10%) [ +0.10% +0.00% +0.20% / -0.10% +0.98% +1.07%] index_fill_ linear : Elapsed 0.010 ms (1.025 ms / 100) 1.026 -> 1.026 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.88%] index_fill_ reverse : Elapsed 0.010 ms (1.026 ms / 100) 1.030 -> 1.031 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.87% +0.78%] index_fill_ skip64 : Elapsed 0.010 ms (1.031 ms / 100) 1.028 -> 1.030 ( +0.19%) [ +0.19% +0.39% +0.00% / +0.19% +1.65% +1.75%] index_fill_ skip256 : Elapsed 0.010 ms (1.030 ms / 100) 1.023 -> 1.023 ( +0.00%) [ +0.20% +0.10% +0.00% / +0.00% +1.37% +1.47%] index_fill_ spread : Elapsed 0.010 ms (1.025 ms / 100) 1.020 -> 1.021 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +1.57% +1.47%] index_fill_ strided 3 : Elapsed 0.010 ms (1.021 ms / 100) 1.021 -> 1.024 ( +0.29%) [ +0.00% +0.10% +0.10% / +0.29% +1.67% +1.57%] index_fill_ strided 5 : Elapsed 0.010 ms (1.021 ms / 100) 1.024 -> 1.024 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.00% +0.98% +1.66%] index_fill_ strided 7 : Elapsed 0.010 ms (1.024 ms / 100) 1.026 -> 1.026 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +1.07% +0.97%] index_fill_ strided 8 : Elapsed 0.010 ms (1.026 ms / 100) 1.024 -> 1.024 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.00% +1.37% +1.27%] index_fill_ strided 16 : Elapsed 0.010 ms (1.025 ms / 100) 1.022 -> 1.023 ( +0.10%) [ +0.20% +0.00% +0.00% / +0.10% +1.57% +1.47%] index_fill_ random : Elapsed 0.010 ms (1.024 ms / 100) 1.027 -> 1.027 ( +0.00%) [ +0.00% +0.10% +0.19% / +0.00% +0.78% +0.68%] index_fill_ random_sorted : Elapsed 0.010 ms (1.027 ms / 100) 1.027 -> 1.028 ( +0.10%) [ +0.00% +0.00% +0.19% / +0.10% +0.78% +0.58%] index_fill_ perm : Elapsed 0.010 ms (1.027 ms / 100) 1.028 -> 1.028 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.39% +0.49%] index_fill_ perm_sorted : Elapsed 0.010 ms (1.028 ms / 100) B = [4, 5, 40, 20] (stride (1, 4, 400, 20)) A = [4, 5, 16, 20] (stride (100, 1, 400, 5)) dim = 2 3.975 -> 3.972 ( -0.08%) [ +0.08% +0.00% +0.03% / -0.08% +0.63% +0.40%] index_add_ linear : Elapsed 0.040 ms (3.978 ms / 100) 3.844 -> 3.843 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.60% +0.55%] index_copy_ linear : Elapsed 0.038 ms (3.846 ms / 100) 3.965 -> 3.971 ( +0.15%) [ +0.00% +0.13% +0.15% / +0.15% +0.68% +0.63%] index_add_ reverse : Elapsed 0.040 ms (3.965 ms / 100) 3.837 -> 3.841 ( +0.10%) [ +0.00% +0.05% +0.08% / +0.10% +0.60% +0.52%] index_copy_ reverse : Elapsed 0.038 ms (3.837 ms / 100) 3.967 -> 3.974 ( +0.18%) [ +0.00% +0.23% +0.18% / +0.18% +0.68% +0.60%] index_add_ spread : Elapsed 0.040 ms (3.967 ms / 100) 3.840 -> 3.848 ( +0.21%) [ +0.00% +0.18% +0.13% / +0.21% +0.63% +0.65%] index_copy_ spread : Elapsed 0.038 ms (3.840 ms / 100) 3.968 -> 3.969 ( +0.03%) [ +0.00% +0.15% +0.13% / +0.03% +0.66% +0.60%] index_add_ strided 3 : Elapsed 0.040 ms (3.968 ms / 100) 3.844 -> 3.840 ( -0.10%) [ +0.00% +0.00% +0.00% / -0.10% +0.55% +0.49%] index_copy_ strided 3 : Elapsed 0.038 ms (3.844 ms / 100) 3.968 -> 3.968 ( +0.00%) [ +0.05% +0.18% +0.00% / +0.00% +0.53% +0.45%] index_add_ strided 7 : Elapsed 0.040 ms (3.970 ms / 100) 3.835 -> 3.838 ( +0.08%) [ +0.05% +0.23% +0.00% / +0.08% +0.63% +0.52%] index_copy_ strided 7 : Elapsed 0.038 ms (3.837 ms / 100) 3.971 -> 3.980 ( +0.23%) [ +0.05% +0.28% +0.00% / +0.23% +0.48% +0.68%] index_add_ perm : Elapsed 0.040 ms (3.973 ms / 100) 3.841 -> 3.849 ( +0.21%) [ +0.00% +0.23% +0.03% / +0.21% +0.52% +0.60%] index_copy_ perm : Elapsed 0.038 ms (3.841 ms / 100) 3.973 -> 3.976 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.40% +0.55%] index_add_ perm_sorted : Elapsed 0.040 ms (3.976 ms / 100) 3.842 -> 3.844 ( +0.05%) [ +0.08% +0.10% +0.00% / +0.05% +0.44% +0.55%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.845 ms / 100) 5.559 -> 5.553 ( -0.11%) [ +0.07% +0.05% +0.00% / -0.11% +0.23% +0.09%] index_select const : Elapsed 0.056 ms (5.563 ms / 100) 5.561 -> 5.556 ( -0.09%) [ +0.02% +0.00% +0.02% / -0.09% +0.16% +0.09%] index_select wrap : Elapsed 0.056 ms (5.562 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.00% +0.04% +0.02% / +0.02% -0.02% -0.02%] index_select linear : Elapsed 0.056 ms (5.562 ms / 100) 5.560 -> 5.563 ( +0.05%) [ +0.04% +0.11% +0.00% / +0.05% +0.13% +0.09%] index_select reverse : Elapsed 0.056 ms (5.562 ms / 100) 5.561 -> 5.560 ( -0.02%) [ +0.00% +0.04% +0.04% / +0.09% -0.02% +0.07%] index_select skip64 : Elapsed 0.056 ms (5.561 ms / 100) 5.564 -> 5.557 ( -0.13%) [ +0.00% +0.07% +0.05% / +0.07% -0.04% -0.13%] index_select skip256 : Elapsed 0.056 ms (5.564 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.05% +0.00% +0.07%] index_select spread : Elapsed 0.056 ms (5.568 ms / 100) 5.556 -> 5.558 ( +0.04%) [ +0.00% +0.07% +0.07% / +0.04% +0.25% +0.22%] index_select strided 3 : Elapsed 0.056 ms (5.556 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.00% +0.07% +0.11% / +0.00% +0.00% +0.13%] index_select strided 5 : Elapsed 0.056 ms (5.561 ms / 100) 5.562 -> 5.563 ( +0.02%) [ +0.09% +0.07% +0.00% / +0.02% +0.09% +0.07%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.555 -> 5.559 ( +0.07%) [ +0.16% +0.02% +0.00% / +0.18% +0.07% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.560 -> 5.560 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.05% +0.05%] index_select random : Elapsed 0.056 ms (5.560 ms / 100) 5.559 -> 5.562 ( +0.05%) [ +0.07% +0.04% +0.00% / +0.05% +0.05% +0.07%] index_select random_sorted : Elapsed 0.056 ms (5.563 ms / 100) B = [4, 5, 40, 20] (stride (1, 4, 20, 800)) A = [4, 5, 16, 20] (stride (100, 1, 400, 5)) dim = 2 3.967 -> 3.968 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.73% +0.71%] index_add_ linear : Elapsed 0.040 ms (3.967 ms / 100) 3.841 -> 3.843 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.70% +0.76%] index_copy_ linear : Elapsed 0.038 ms (3.842 ms / 100) 3.975 -> 3.982 ( +0.18%) [ +0.20% +0.20% +0.00% / +0.18% +0.70% +0.91%] index_add_ reverse : Elapsed 0.040 ms (3.983 ms / 100) 3.850 -> 3.851 ( +0.03%) [ +0.10% +0.10% +0.00% / +0.03% +0.81% +0.81%] index_copy_ reverse : Elapsed 0.039 ms (3.854 ms / 100) 3.972 -> 3.978 ( +0.15%) [ +0.03% +0.08% +0.00% / +0.15% +0.81% +0.73%] index_add_ spread : Elapsed 0.040 ms (3.973 ms / 100) 3.857 -> 3.858 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.78% +0.75%] index_copy_ spread : Elapsed 0.039 ms (3.860 ms / 100) 3.978 -> 3.976 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.65% +0.80%] index_add_ strided 3 : Elapsed 0.040 ms (3.978 ms / 100) 3.848 -> 3.846 ( -0.05%) [ +0.05% +0.00% +0.13% / -0.05% +0.70% +0.68%] index_copy_ strided 3 : Elapsed 0.038 ms (3.850 ms / 100) 3.977 -> 3.982 ( +0.13%) [ +0.10% +0.05% +0.00% / +0.13% +0.58% +0.88%] index_add_ strided 7 : Elapsed 0.040 ms (3.981 ms / 100) 3.850 -> 3.855 ( +0.13%) [ +0.00% +0.08% +0.13% / +0.13% +0.75% +0.91%] index_copy_ strided 7 : Elapsed 0.039 ms (3.850 ms / 100) 3.966 -> 3.966 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.76% +0.76%] index_add_ perm : Elapsed 0.040 ms (3.967 ms / 100) 3.840 -> 3.841 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.78% +0.78%] index_copy_ perm : Elapsed 0.038 ms (3.840 ms / 100) 3.970 -> 3.980 ( +0.25%) [ +0.00% +0.23% +0.43% / +0.25% +0.93% +1.01%] index_add_ perm_sorted : Elapsed 0.040 ms (3.970 ms / 100) 3.839 -> 3.846 ( +0.18%) [ +0.00% +0.13% +0.60% / +0.18% +0.91% +0.96%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.839 ms / 100) 5.561 -> 5.562 ( +0.02%) [ +0.05% +0.00% +0.14% / +0.05% +0.20% +0.02%] index_select const : Elapsed 0.056 ms (5.564 ms / 100) 5.571 -> 5.561 ( -0.18%) [ +0.00% +0.07% +0.14% / +0.00% -0.18% -0.09%] index_select wrap : Elapsed 0.056 ms (5.571 ms / 100) 5.566 -> 5.570 ( +0.07%) [ +0.14% +0.11% +0.00% / +0.07% +0.14% +0.14%] index_select linear : Elapsed 0.056 ms (5.574 ms / 100) 5.566 -> 5.566 ( +0.00%) [ +0.00% +0.11% +0.11% / +0.07% +0.05% +0.00%] index_select reverse : Elapsed 0.056 ms (5.566 ms / 100) 5.560 -> 5.564 ( +0.07%) [ +0.02% +0.00% +0.18% / +0.16% +0.09% +0.07%] index_select skip64 : Elapsed 0.056 ms (5.561 ms / 100) 5.559 -> 5.562 ( +0.05%) [ +0.00% +0.16% +0.07% / +0.13% +0.16% +0.05%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.565 -> 5.562 ( -0.05%) [ +0.16% +0.00% +0.07% / +0.13% +0.05% -0.05%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.567 -> 5.566 ( -0.02%) [ +0.16% +0.16% +0.00% / +0.04% +0.05% -0.02%] index_select strided 3 : Elapsed 0.056 ms (5.576 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.00% +0.09% +0.00% / -0.02% +0.14% +0.05%] index_select strided 5 : Elapsed 0.056 ms (5.566 ms / 100) 5.565 -> 5.572 ( +0.13%) [ +0.00% +0.02% +0.18% / +0.16% +0.13% +0.13%] index_select strided 7 : Elapsed 0.056 ms (5.565 ms / 100) 5.563 -> 5.559 ( -0.07%) [ +0.07% +0.02% +0.00% / -0.07% +0.18% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.567 ms / 100) 5.569 -> 5.562 ( -0.13%) [ +0.11% +0.02% +0.00% / +0.04% -0.13% -0.05%] index_select random : Elapsed 0.056 ms (5.575 ms / 100) 5.571 -> 5.569 ( -0.04%) [ +0.00% +0.00% +0.09% / +0.00% -0.04% +0.07%] index_select random_sorted : Elapsed 0.056 ms (5.571 ms / 100) out_shape = [4, 5, 16, 40] in_shape = [4, 5, 16, 20] idx_dim = 3 B = [4, 5, 16, 40] (stride (3200, 640, 1, 16)) A = [4, 5, 16, 20] (stride (100, 1, 400, 5)) dim = 3 2.400 -> 2.412 ( +0.50%) [ +0.00% +0.13% +0.13% / +0.50% +1.00% +0.79%] index_add_ linear : Elapsed 0.024 ms (2.400 ms / 100) 2.393 -> 2.408 ( +0.63%) [ +0.08% +0.08% +0.00% / +0.63% +1.00% +1.17%] index_copy_ linear : Elapsed 0.024 ms (2.395 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.00% +0.08% +0.08% / +0.54% +0.58% +0.54%] index_add_ reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.394 -> 2.409 ( +0.63%) [ +0.00% +0.00% +0.13% / +0.63% +0.79% +0.75%] index_copy_ reverse : Elapsed 0.024 ms (2.394 ms / 100) 2.403 -> 2.416 ( +0.54%) [ +0.00% +0.08% +0.00% / +0.54% +0.62% +0.54%] index_add_ spread : Elapsed 0.024 ms (2.403 ms / 100) 2.391 -> 2.404 ( +0.54%) [ +0.25% +0.25% +0.00% / +0.54% +0.79% +0.79%] index_copy_ spread : Elapsed 0.024 ms (2.397 ms / 100) 2.404 -> 2.414 ( +0.42%) [ +0.00% +0.00% +0.12% / +0.42% +0.67% +0.62%] index_add_ strided 3 : Elapsed 0.024 ms (2.404 ms / 100) 2.393 -> 2.406 ( +0.54%) [ +0.13% +0.00% +0.21% / +0.54% +0.79% +0.63%] index_copy_ strided 3 : Elapsed 0.024 ms (2.396 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.21% +0.17% +0.00% / +0.75% +0.54% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.406 ms / 100) 2.393 -> 2.406 ( +0.54%) [ +0.04% +0.00% +0.04% / +0.67% +0.54% +0.59%] index_copy_ strided 7 : Elapsed 0.024 ms (2.394 ms / 100) 2.402 -> 2.414 ( +0.50%) [ +0.00% +0.00% +0.04% / +0.50% +0.67% +0.79%] index_add_ perm : Elapsed 0.024 ms (2.402 ms / 100) 2.394 -> 2.408 ( +0.58%) [ +0.08% +0.04% +0.00% / +0.58% +0.84% +0.92%] index_copy_ perm : Elapsed 0.024 ms (2.396 ms / 100) 2.400 -> 2.416 ( +0.67%) [ +0.00% +0.25% +0.08% / +0.67% +0.79% +0.79%] index_add_ perm_sorted : Elapsed 0.024 ms (2.400 ms / 100) 2.390 -> 2.410 ( +0.84%) [ +0.25% +0.00% +0.38% / +0.84% +0.92% +0.96%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.396 ms / 100) 4.412 -> 4.419 ( +0.16%) [ +0.00% +0.09% +0.29% / +0.16% +0.18% +0.29%] index_select const : Elapsed 0.044 ms (4.412 ms / 100) 4.422 -> 4.428 ( +0.14%) [ +0.11% +0.07% +0.00% / +0.14% +0.32% +0.29%] index_select wrap : Elapsed 0.044 ms (4.427 ms / 100) 4.427 -> 4.426 ( -0.02%) [ +0.05% +0.14% +0.00% / -0.02% +0.09% +0.18%] index_select linear : Elapsed 0.044 ms (4.429 ms / 100) 4.425 -> 4.427 ( +0.05%) [ +0.11% +0.25% +0.00% / +0.09% +0.05% +0.18%] index_select reverse : Elapsed 0.044 ms (4.430 ms / 100) 4.415 -> 4.422 ( +0.16%) [ +0.14% +0.00% +0.00% / +0.20% +0.16% +0.16%] index_select skip64 : Elapsed 0.044 ms (4.421 ms / 100) 4.409 -> 4.419 ( +0.23%) [ +0.16% +0.00% +0.27% / +0.23% +0.25% +0.36%] index_select skip256 : Elapsed 0.044 ms (4.416 ms / 100) 4.425 -> 4.424 ( -0.02%) [ +0.00% +0.02% +0.05% / -0.02% +0.29% +0.20%] index_select spread : Elapsed 0.044 ms (4.425 ms / 100) 4.421 -> 4.430 ( +0.20%) [ +0.11% +0.00% +0.23% / +0.23% +0.20% +0.23%] index_select strided 3 : Elapsed 0.044 ms (4.426 ms / 100) 4.420 -> 4.425 ( +0.11%) [ +0.02% +0.00% +0.02% / +0.11% +0.20% +0.36%] index_select strided 5 : Elapsed 0.044 ms (4.421 ms / 100) 4.418 -> 4.426 ( +0.18%) [ +0.00% +0.23% +0.05% / +0.36% +0.43% +0.18%] index_select strided 7 : Elapsed 0.044 ms (4.418 ms / 100) 4.421 -> 4.424 ( +0.07%) [ +0.11% +0.09% +0.00% / +0.07% +0.29% +0.18%] index_select strided 8 : Elapsed 0.044 ms (4.426 ms / 100) 4.417 -> 4.424 ( +0.16%) [ +0.20% +0.41% +0.00% / +0.16% +0.34% +0.34%] index_select strided 16 : Elapsed 0.044 ms (4.426 ms / 100) 4.423 -> 4.431 ( +0.18%) [ +0.00% +0.02% +0.09% / +0.18% +0.20% +0.27%] index_select random : Elapsed 0.044 ms (4.423 ms / 100) 4.423 -> 4.428 ( +0.11%) [ +0.02% +0.02% +0.00% / +0.11% +0.23% +0.34%] index_select random_sorted : Elapsed 0.044 ms (4.424 ms / 100) B = [4, 5, 16, 40] (stride (3200, 1, 5, 80)) A = [4, 5, 16, 20] (stride (1, 1280, 80, 4)) dim = 3 2.404 -> 2.414 ( +0.42%) [ +0.00% +0.33% +0.04% / +0.42% +0.62% +0.71%] index_add_ linear : Elapsed 0.024 ms (2.404 ms / 100) 2.406 -> 2.416 ( +0.42%) [ +0.12% +0.04% +0.00% / +0.42% +0.62% +0.62%] index_copy_ linear : Elapsed 0.024 ms (2.409 ms / 100) 2.398 -> 2.415 ( +0.71%) [ +0.17% +0.29% +0.00% / +0.71% +0.88% +0.96%] index_add_ reverse : Elapsed 0.024 ms (2.402 ms / 100) 2.400 -> 2.414 ( +0.58%) [ +0.00% +0.13% +0.21% / +0.58% +1.00% +0.96%] index_copy_ reverse : Elapsed 0.024 ms (2.400 ms / 100) 2.399 -> 2.412 ( +0.54%) [ +0.00% +0.04% +0.17% / +0.54% +1.08% +1.08%] index_add_ spread : Elapsed 0.024 ms (2.399 ms / 100) 2.397 -> 2.412 ( +0.63%) [ +0.04% +0.00% +0.21% / +0.63% +1.17% +1.13%] index_copy_ spread : Elapsed 0.024 ms (2.398 ms / 100) 2.403 -> 2.415 ( +0.50%) [ +0.12% +0.00% +0.29% / +0.58% +0.71% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.406 -> 2.413 ( +0.29%) [ +0.00% +0.04% +0.12% / +0.37% +0.29% +0.50%] index_copy_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.21% +0.25% +0.00% / +0.54% +0.75% +0.75%] index_add_ strided 7 : Elapsed 0.024 ms (2.406 ms / 100) 2.404 -> 2.419 ( +0.62%) [ +0.04% +0.25% +0.00% / +0.62% +0.67% +0.79%] index_copy_ strided 7 : Elapsed 0.024 ms (2.405 ms / 100) 2.406 -> 2.413 ( +0.29%) [ +0.00% +0.17% +0.62% / +0.62% +0.29% +0.42%] index_add_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.408 -> 2.413 ( +0.21%) [ +0.00% +0.04% +0.21% / +0.62% +0.21% +0.25%] index_copy_ perm : Elapsed 0.024 ms (2.408 ms / 100) 2.407 -> 2.410 ( +0.12%) [ +0.08% +0.00% +0.04% / +0.46% +0.21% +0.12%] index_add_ perm_sorted : Elapsed 0.024 ms (2.409 ms / 100) 2.404 -> 2.412 ( +0.33%) [ +0.12% +0.12% +0.00% / +0.92% +0.54% +0.33%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.407 ms / 100) 4.433 -> 4.432 ( -0.02%) [ +0.07% +0.00% +0.02% / +0.23% -0.02% +0.25%] index_select const : Elapsed 0.044 ms (4.436 ms / 100) 4.444 -> 4.444 ( +0.00%) [ +0.09% +0.00% +0.05% / +0.00% +0.05% +0.09%] index_select wrap : Elapsed 0.044 ms (4.448 ms / 100) 4.442 -> 4.442 ( +0.00%) [ +0.00% +0.16% +0.11% / +0.00% +0.07% +0.05%] index_select linear : Elapsed 0.044 ms (4.442 ms / 100) 4.450 -> 4.448 ( -0.04%) [ +0.00% +0.02% +0.04% / -0.04% -0.02% +0.00%] index_select reverse : Elapsed 0.044 ms (4.450 ms / 100) 4.436 -> 4.434 ( -0.05%) [ +0.00% +0.05% +0.23% / +0.05% +0.02% -0.05%] index_select skip64 : Elapsed 0.044 ms (4.436 ms / 100) 4.430 -> 4.432 ( +0.05%) [ +0.00% +0.25% +0.23% / +0.05% +0.09% +0.18%] index_select skip256 : Elapsed 0.044 ms (4.430 ms / 100) 4.440 -> 4.446 ( +0.14%) [ +0.00% +0.14% +0.11% / +0.14% +0.29% +0.23%] index_select spread : Elapsed 0.044 ms (4.440 ms / 100) 4.440 -> 4.448 ( +0.18%) [ +0.14% +0.00% +0.07% / +0.18% +0.20% +0.23%] index_select strided 3 : Elapsed 0.044 ms (4.446 ms / 100) 4.436 -> 4.438 ( +0.05%) [ +0.20% +0.00% +0.18% / +0.05% +0.11% +0.18%] index_select strided 5 : Elapsed 0.044 ms (4.445 ms / 100) 4.438 -> 4.446 ( +0.18%) [ +0.07% +0.05% +0.00% / +0.18% +0.18% +0.29%] index_select strided 7 : Elapsed 0.044 ms (4.441 ms / 100) 4.436 -> 4.441 ( +0.11%) [ +0.18% +0.09% +0.00% / +0.18% +0.11% +0.20%] index_select strided 8 : Elapsed 0.044 ms (4.444 ms / 100) 4.439 -> 4.435 ( -0.09%) [ +0.05% +0.00% +0.00% / -0.09% +0.09% +0.11%] index_select strided 16 : Elapsed 0.044 ms (4.441 ms / 100) 4.449 -> 4.442 ( -0.16%) [ +0.02% +0.11% +0.00% / -0.02% -0.16% -0.02%] index_select random : Elapsed 0.045 ms (4.450 ms / 100) 4.445 -> 4.445 ( +0.00%) [ +0.04% +0.02% +0.00% / +0.09% +0.00% +0.11%] index_select random_sorted : Elapsed 0.044 ms (4.447 ms / 100) B = [4, 5, 16, 40] (stride (640, 2560, 40, 1)) A = [4, 5, 16, 20] (stride (1600, 1, 5, 80)) dim = 3 2.440 -> 2.450 ( +0.41%) [ +0.12% +0.20% +0.00% / +0.41% +0.61% +0.86%] index_add_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.452 -> 2.461 ( +0.37%) [ +0.04% +0.00% +0.08% / +0.37% +0.57% +0.69%] index_copy_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.445 -> 2.451 ( +0.25%) [ +0.00% +0.20% +0.08% / +0.25% +0.45% +0.37%] index_add_ reverse : Elapsed 0.024 ms (2.445 ms / 100) 2.453 -> 2.463 ( +0.41%) [ +0.08% +0.08% +0.00% / +0.41% +0.53% +0.53%] index_copy_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.455 -> 2.464 ( +0.37%) [ +0.00% +0.41% +0.16% / +0.45% +0.45% +0.37%] index_add_ spread : Elapsed 0.025 ms (2.455 ms / 100) 2.470 -> 2.477 ( +0.28%) [ +0.00% +0.04% +0.08% / +0.45% +0.57% +0.28%] index_copy_ spread : Elapsed 0.025 ms (2.470 ms / 100) 2.457 -> 2.464 ( +0.28%) [ +0.00% +0.00% +0.04% / +0.61% +0.28% +0.53%] index_add_ strided 3 : Elapsed 0.025 ms (2.457 ms / 100) 2.467 -> 2.479 ( +0.49%) [ +0.00% +0.04% +0.04% / +0.49% +0.53% +0.73%] index_copy_ strided 3 : Elapsed 0.025 ms (2.467 ms / 100) 2.458 -> 2.460 ( +0.08%) [ +0.00% +0.12% +0.08% / +0.53% +0.08% +0.45%] index_add_ strided 7 : Elapsed 0.025 ms (2.458 ms / 100) 2.467 -> 2.478 ( +0.45%) [ +0.00% +0.08% +0.12% / +0.57% +0.53% +0.45%] index_copy_ strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.454 -> 2.467 ( +0.53%) [ +0.12% +0.08% +0.00% / +0.57% +0.73% +0.53%] index_add_ perm : Elapsed 0.025 ms (2.457 ms / 100) 2.469 -> 2.478 ( +0.36%) [ +0.08% +0.12% +0.00% / +0.36% +0.77% +0.41%] index_copy_ perm : Elapsed 0.025 ms (2.471 ms / 100) 2.454 -> 2.467 ( +0.53%) [ +0.04% +0.04% +0.00% / +0.61% +0.53% +0.65%] index_add_ perm_sorted : Elapsed 0.025 ms (2.455 ms / 100) 2.468 -> 2.478 ( +0.41%) [ +0.24% +0.12% +0.00% / +0.57% +0.41% +0.81%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.474 ms / 100) 4.493 -> 4.489 ( -0.09%) [ +0.11% +0.16% +0.00% / -0.09% -0.09% +0.02%] index_select const : Elapsed 0.045 ms (4.498 ms / 100) 4.497 -> 4.496 ( -0.02%) [ +0.13% +0.13% +0.00% / +0.04% +0.18% -0.02%] index_select wrap : Elapsed 0.045 ms (4.503 ms / 100) 4.495 -> 4.498 ( +0.07%) [ +0.13% +0.00% +0.09% / +0.07% +0.22% +0.13%] index_select linear : Elapsed 0.045 ms (4.501 ms / 100) 4.500 -> 4.498 ( -0.04%) [ +0.20% +0.18% +0.00% / -0.04% +0.27% +0.07%] index_select reverse : Elapsed 0.045 ms (4.509 ms / 100) 4.499 -> 4.493 ( -0.13%) [ +0.00% +0.04% +0.02% / -0.04% +0.04% -0.13%] index_select skip64 : Elapsed 0.045 ms (4.499 ms / 100) 4.495 -> 4.490 ( -0.11%) [ +0.20% +0.00% +0.07% / +0.11% -0.11% -0.04%] index_select skip256 : Elapsed 0.045 ms (4.504 ms / 100) 4.500 -> 4.505 ( +0.11%) [ +0.09% +0.00% +0.04% / +0.16% +0.11% +0.22%] index_select spread : Elapsed 0.045 ms (4.504 ms / 100) 4.501 -> 4.498 ( -0.07%) [ +0.04% +0.00% +0.00% / -0.07% -0.04% +0.07%] index_select strided 3 : Elapsed 0.045 ms (4.503 ms / 100) 4.494 -> 4.490 ( -0.09%) [ +0.09% +0.00% +0.20% / +0.07% +0.07% -0.09%] index_select strided 5 : Elapsed 0.045 ms (4.498 ms / 100) 4.495 -> 4.497 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +0.20% +0.20%] index_select strided 7 : Elapsed 0.045 ms (4.497 ms / 100) 4.493 -> 4.498 ( +0.11%) [ +0.09% +0.00% +0.07% / +0.13% +0.11% +0.22%] index_select strided 8 : Elapsed 0.045 ms (4.497 ms / 100) 4.494 -> 4.496 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.09% +0.29% +0.04%] index_select strided 16 : Elapsed 0.045 ms (4.494 ms / 100) 4.495 -> 4.501 ( +0.13%) [ +0.04% +0.09% +0.00% / +0.13% +0.16% +0.33%] index_select random : Elapsed 0.045 ms (4.497 ms / 100) 4.500 -> 4.497 ( -0.07%) [ +0.13% +0.04% +0.00% / -0.07% +0.09% -0.07%] index_select random_sorted : Elapsed 0.045 ms (4.506 ms / 100) B = [4, 5, 16, 40] (stride (40, 2560, 160, 1)) A = [4, 5, 16, 20] (stride (80, 1, 5, 320)) dim = 3 2.394 -> 2.399 ( +0.21%) [ +0.00% +0.00% +0.04% / +0.21% +0.75% +0.67%] index_add_ linear : Elapsed 0.024 ms (2.394 ms / 100) 2.398 -> 2.411 ( +0.54%) [ +0.17% +0.00% +0.00% / +0.54% +0.79% +0.83%] index_copy_ linear : Elapsed 0.024 ms (2.402 ms / 100) 2.387 -> 2.404 ( +0.71%) [ +0.13% +0.04% +0.00% / +0.71% +0.88% +1.13%] index_add_ reverse : Elapsed 0.024 ms (2.390 ms / 100) 2.392 -> 2.405 ( +0.54%) [ +0.25% +0.00% +0.00% / +0.54% +1.05% +0.92%] index_copy_ reverse : Elapsed 0.024 ms (2.398 ms / 100) 2.402 -> 2.415 ( +0.54%) [ +0.00% +0.00% +0.00% / +0.54% +0.92% +0.87%] index_add_ spread : Elapsed 0.024 ms (2.402 ms / 100) 2.411 -> 2.425 ( +0.58%) [ +0.04% +0.00% +0.04% / +0.58% +1.20% +1.24%] index_copy_ spread : Elapsed 0.024 ms (2.412 ms / 100) 2.410 -> 2.417 ( +0.29%) [ +0.17% +0.00% +0.08% / +0.50% +0.46% +0.29%] index_add_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.416 -> 2.430 ( +0.58%) [ +0.00% +0.00% +0.00% / +0.58% +0.91% +0.83%] index_copy_ strided 3 : Elapsed 0.024 ms (2.416 ms / 100) 2.409 -> 2.418 ( +0.37%) [ +0.08% +0.00% +0.17% / +0.37% +0.50% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.411 ms / 100) 2.417 -> 2.429 ( +0.50%) [ +0.17% +0.00% +0.04% / +0.50% +0.58% +0.74%] index_copy_ strided 7 : Elapsed 0.024 ms (2.421 ms / 100) 2.414 -> 2.412 ( -0.08%) [ +0.00% +0.08% +0.00% / +0.41% -0.08% +0.17%] index_add_ perm : Elapsed 0.024 ms (2.414 ms / 100) 2.419 -> 2.424 ( +0.21%) [ +0.00% +0.04% +0.04% / +0.66% +0.33% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.419 ms / 100) 2.409 -> 2.415 ( +0.25%) [ +0.25% +0.12% +0.00% / +0.37% +0.25% +0.33%] index_add_ perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) 2.421 -> 2.427 ( +0.25%) [ +0.12% +0.00% +0.00% / +0.54% +0.45% +0.25%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) 4.422 -> 4.421 ( -0.02%) [ +0.00% +0.07% +0.05% / -0.02% +0.02% +0.02%] index_select const : Elapsed 0.044 ms (4.422 ms / 100) 4.429 -> 4.424 ( -0.11%) [ +0.02% +0.16% +0.00% / +0.09% -0.09% -0.11%] index_select wrap : Elapsed 0.044 ms (4.430 ms / 100) 4.424 -> 4.417 ( -0.16%) [ +0.07% +0.00% +0.05% / -0.16% +0.18% +0.09%] index_select linear : Elapsed 0.044 ms (4.427 ms / 100) 4.427 -> 4.424 ( -0.07%) [ +0.14% +0.14% +0.00% / +0.16% -0.02% -0.07%] index_select reverse : Elapsed 0.044 ms (4.433 ms / 100) 4.422 -> 4.418 ( -0.09%) [ +0.00% +0.02% +0.07% / +0.02% -0.09% +0.00%] index_select skip64 : Elapsed 0.044 ms (4.422 ms / 100) 4.420 -> 4.413 ( -0.16%) [ +0.00% +0.07% +0.00% / -0.16% +0.00% +0.00%] index_select skip256 : Elapsed 0.044 ms (4.420 ms / 100) 4.424 -> 4.429 ( +0.11%) [ +0.05% +0.00% +0.05% / +0.11% +0.27% +0.23%] index_select spread : Elapsed 0.044 ms (4.426 ms / 100) 4.423 -> 4.429 ( +0.14%) [ +0.14% +0.00% +0.05% / +0.14% +0.20% +0.23%] index_select strided 3 : Elapsed 0.044 ms (4.429 ms / 100) 4.414 -> 4.412 ( -0.05%) [ +0.18% +0.14% +0.00% / -0.05% +0.18% +0.25%] index_select strided 5 : Elapsed 0.044 ms (4.422 ms / 100) 4.423 -> 4.426 ( +0.07%) [ +0.00% +0.07% +0.16% / +0.07% +0.09% +0.18%] index_select strided 7 : Elapsed 0.044 ms (4.423 ms / 100) 4.421 -> 4.413 ( -0.18%) [ +0.18% +0.14% +0.00% / +0.05% -0.02% -0.18%] index_select strided 8 : Elapsed 0.044 ms (4.429 ms / 100) 4.423 -> 4.420 ( -0.07%) [ +0.18% +0.00% +0.05% / -0.05% +0.07% -0.07%] index_select strided 16 : Elapsed 0.044 ms (4.431 ms / 100) 4.429 -> 4.425 ( -0.09%) [ +0.07% +0.16% +0.00% / +0.05% -0.05% -0.09%] index_select random : Elapsed 0.044 ms (4.432 ms / 100) 4.431 -> 4.423 ( -0.18%) [ +0.05% +0.00% +0.14% / +0.02% -0.09% -0.18%] index_select random_sorted : Elapsed 0.044 ms (4.433 ms / 100) B = [4, 5, 16, 40] (stride (1, 4, 800, 20)) A = [4, 5, 16, 20] (stride (1600, 1, 100, 5)) dim = 3 2.448 -> 2.459 ( +0.45%) [ +0.00% +0.20% +0.16% / +0.45% +0.69% +0.69%] index_add_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.448 -> 2.466 ( +0.74%) [ +0.08% +0.04% +0.00% / +0.78% +0.74% +0.90%] index_copy_ linear : Elapsed 0.024 ms (2.450 ms / 100) 2.450 -> 2.462 ( +0.49%) [ +0.00% +0.24% +0.00% / +0.49% +0.61% +0.53%] index_add_ reverse : Elapsed 0.025 ms (2.450 ms / 100) 2.451 -> 2.462 ( +0.45%) [ +0.08% +0.00% +0.16% / +0.45% +0.57% +0.65%] index_copy_ reverse : Elapsed 0.025 ms (2.453 ms / 100) 2.456 -> 2.470 ( +0.57%) [ +0.12% +0.29% +0.00% / +0.65% +0.57% +0.61%] index_add_ spread : Elapsed 0.025 ms (2.459 ms / 100) 2.459 -> 2.474 ( +0.61%) [ +0.00% +0.20% +0.24% / +0.61% +0.89% +0.89%] index_copy_ spread : Elapsed 0.025 ms (2.459 ms / 100) 2.451 -> 2.468 ( +0.69%) [ +0.00% +0.08% +0.37% / +0.69% +0.78% +0.69%] index_add_ strided 3 : Elapsed 0.025 ms (2.451 ms / 100) 2.458 -> 2.472 ( +0.57%) [ +0.08% +0.08% +0.00% / +0.57% +0.77% +0.65%] index_copy_ strided 3 : Elapsed 0.025 ms (2.460 ms / 100) 2.455 -> 2.464 ( +0.37%) [ +0.04% +0.00% +0.00% / +0.61% +0.65% +0.37%] index_add_ strided 7 : Elapsed 0.025 ms (2.456 ms / 100) 2.460 -> 2.471 ( +0.45%) [ +0.00% +0.12% +0.12% / +0.53% +0.69% +0.45%] index_copy_ strided 7 : Elapsed 0.025 ms (2.460 ms / 100) 2.453 -> 2.467 ( +0.57%) [ +0.00% +0.24% +0.16% / +0.57% +0.69% +0.65%] index_add_ perm : Elapsed 0.025 ms (2.453 ms / 100) 2.451 -> 2.474 ( +0.94%) [ +0.00% +0.24% +0.12% / +1.02% +0.94% +0.94%] index_copy_ perm : Elapsed 0.025 ms (2.451 ms / 100) 2.454 -> 2.466 ( +0.49%) [ +0.16% +0.00% +0.16% / +0.61% +0.57% +0.49%] index_add_ perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) 2.451 -> 2.471 ( +0.82%) [ +0.00% +0.49% +0.24% / +0.82% +0.90% +0.86%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.451 ms / 100) 4.494 -> 4.501 ( +0.16%) [ +0.00% +0.09% +0.24% / +0.22% +0.29% +0.16%] index_select const : Elapsed 0.045 ms (4.494 ms / 100) 4.500 -> 4.504 ( +0.09%) [ +0.00% +0.00% +0.13% / +0.09% +0.20% +0.20%] index_select wrap : Elapsed 0.045 ms (4.500 ms / 100) 4.505 -> 4.508 ( +0.07%) [ +0.09% +0.13% +0.00% / +0.22% +0.27% +0.07%] index_select linear : Elapsed 0.045 ms (4.509 ms / 100) 4.501 -> 4.509 ( +0.18%) [ +0.29% +0.18% +0.00% / +0.18% +0.42% +0.31%] index_select reverse : Elapsed 0.045 ms (4.514 ms / 100) 4.502 -> 4.499 ( -0.07%) [ +0.07% +0.04% +0.00% / -0.07% +0.00% -0.04%] index_select skip64 : Elapsed 0.045 ms (4.505 ms / 100) 4.498 -> 4.501 ( +0.07%) [ +0.00% +0.04% +0.13% / +0.07% +0.20% +0.20%] index_select skip256 : Elapsed 0.045 ms (4.498 ms / 100) 4.503 -> 4.502 ( -0.02%) [ +0.00% +0.04% +0.13% / -0.02% +0.13% +0.31%] index_select spread : Elapsed 0.045 ms (4.503 ms / 100) 4.501 -> 4.504 ( +0.07%) [ +0.02% +0.00% +0.09% / +0.07% +0.29% +0.18%] index_select strided 3 : Elapsed 0.045 ms (4.502 ms / 100) 4.495 -> 4.505 ( +0.22%) [ +0.20% +0.09% +0.00% / +0.22% +0.29% +0.29%] index_select strided 5 : Elapsed 0.045 ms (4.504 ms / 100) 4.500 -> 4.506 ( +0.13%) [ +0.00% +0.13% +0.29% / +0.31% +0.13% +0.31%] index_select strided 7 : Elapsed 0.045 ms (4.500 ms / 100) 4.495 -> 4.501 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.31% +0.38%] index_select strided 8 : Elapsed 0.045 ms (4.501 ms / 100) 4.496 -> 4.494 ( -0.04%) [ +0.18% +0.16% +0.00% / -0.04% +0.40% +0.38%] index_select strided 16 : Elapsed 0.045 ms (4.504 ms / 100) 4.505 -> 4.497 ( -0.18%) [ +0.04% +0.04% +0.00% / -0.18% +0.31% +0.20%] index_select random : Elapsed 0.045 ms (4.507 ms / 100) 4.506 -> 4.502 ( -0.09%) [ +0.04% +0.00% +0.02% / -0.09% +0.16% +0.24%] index_select random_sorted : Elapsed 0.045 ms (4.508 ms / 100) B = [4, 5, 16, 40] (stride (1, 4, 800, 20)) A = [4, 5, 16, 20] (stride (1, 4, 20, 320)) dim = 3 2.446 -> 2.459 ( +0.53%) [ +0.08% +0.08% +0.00% / +0.57% +0.61% +0.53%] index_add_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.442 -> 2.454 ( +0.49%) [ +0.04% +0.29% +0.00% / +0.49% +1.02% +0.82%] index_copy_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.443 -> 2.455 ( +0.49%) [ +0.00% +0.16% +0.16% / +0.49% +0.90% +0.82%] index_add_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.437 -> 2.448 ( +0.45%) [ +0.08% +0.00% +0.21% / +0.45% +1.15% +1.19%] index_copy_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.450 -> 2.461 ( +0.45%) [ +0.12% +0.00% +0.08% / +0.45% +0.73% +0.78%] index_add_ spread : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.464 ( +0.45%) [ +0.00% +0.08% +0.04% / +0.45% +1.02% +0.94%] index_copy_ spread : Elapsed 0.025 ms (2.453 ms / 100) 2.454 -> 2.464 ( +0.41%) [ +0.41% +0.12% +0.00% / +0.53% +0.45% +0.41%] index_add_ strided 3 : Elapsed 0.025 ms (2.464 ms / 100) 2.454 -> 2.467 ( +0.53%) [ +0.04% +0.00% +0.04% / +0.53% +0.69% +0.65%] index_copy_ strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.456 -> 2.466 ( +0.41%) [ +0.04% +0.00% +0.04% / +0.41% +0.49% +0.53%] index_add_ strided 7 : Elapsed 0.025 ms (2.457 ms / 100) 2.457 -> 2.470 ( +0.53%) [ +0.16% +0.00% +0.12% / +0.53% +0.57% +0.61%] index_copy_ strided 7 : Elapsed 0.025 ms (2.461 ms / 100) 2.452 -> 2.461 ( +0.37%) [ +0.00% +0.08% +0.04% / +0.65% +0.45% +0.37%] index_add_ perm : Elapsed 0.025 ms (2.452 ms / 100) 2.456 -> 2.465 ( +0.37%) [ +0.00% +0.12% +0.24% / +0.57% +0.37% +0.49%] index_copy_ perm : Elapsed 0.025 ms (2.456 ms / 100) 2.458 -> 2.457 ( -0.04%) [ +0.04% +0.12% +0.00% / +0.45% +0.08% -0.04%] index_add_ perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) 2.455 -> 2.466 ( +0.45%) [ +0.16% +0.00% +0.16% / +0.73% +0.45% +0.45%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) 4.496 -> 4.498 ( +0.04%) [ +0.18% +0.07% +0.00% / +0.09% +0.24% +0.04%] index_select const : Elapsed 0.045 ms (4.504 ms / 100) 4.505 -> 4.505 ( +0.00%) [ +0.11% +0.13% +0.00% / +0.02% +0.00% +0.07%] index_select wrap : Elapsed 0.045 ms (4.510 ms / 100) 4.506 -> 4.508 ( +0.04%) [ +0.07% +0.00% +0.07% / +0.04% +0.04% +0.16%] index_select linear : Elapsed 0.045 ms (4.509 ms / 100) 4.505 -> 4.502 ( -0.07%) [ +0.29% +0.07% +0.00% / +0.20% -0.07% +0.16%] index_select reverse : Elapsed 0.045 ms (4.518 ms / 100) 4.499 -> 4.495 ( -0.09%) [ +0.00% +0.02% +0.00% / +0.09% +0.13% -0.09%] index_select skip64 : Elapsed 0.045 ms (4.499 ms / 100) 4.496 -> 4.499 ( +0.07%) [ +0.00% +0.09% +0.11% / +0.18% +0.20% +0.07%] index_select skip256 : Elapsed 0.045 ms (4.496 ms / 100) 4.508 -> 4.507 ( -0.02%) [ +0.07% +0.00% +0.02% / -0.02% +0.07% +0.22%] index_select spread : Elapsed 0.045 ms (4.511 ms / 100) 4.503 -> 4.506 ( +0.07%) [ +0.00% +0.02% +0.02% / +0.07% +0.27% +0.11%] index_select strided 3 : Elapsed 0.045 ms (4.503 ms / 100) 4.496 -> 4.500 ( +0.09%) [ +0.13% +0.00% +0.07% / +0.09% +0.29% +0.16%] index_select strided 5 : Elapsed 0.045 ms (4.502 ms / 100) 4.503 -> 4.503 ( +0.00%) [ +0.07% +0.04% +0.00% / +0.00% +0.18% +0.27%] index_select strided 7 : Elapsed 0.045 ms (4.506 ms / 100) 4.498 -> 4.494 ( -0.09%) [ +0.18% +0.18% +0.00% / -0.09% +0.11% +0.13%] index_select strided 8 : Elapsed 0.045 ms (4.506 ms / 100) 4.503 -> 4.499 ( -0.09%) [ +0.04% +0.00% +0.07% / +0.16% -0.09% +0.09%] index_select strided 16 : Elapsed 0.045 ms (4.505 ms / 100) 4.506 -> 4.507 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.20% +0.02% +0.04%] index_select random : Elapsed 0.045 ms (4.506 ms / 100) 4.510 -> 4.504 ( -0.13%) [ +0.00% +0.02% +0.09% / +0.09% -0.13% -0.13%] index_select random_sorted : Elapsed 0.045 ms (4.510 ms / 100) B = [4, 5, 16, 40] (stride (16, 64, 1, 320)) A = [4, 5, 16, 20] (stride (1, 1280, 4, 64)) dim = 3 2.439 -> 2.448 ( +0.37%) [ +0.08% +0.00% +0.12% / +0.37% +0.53% +0.62%] index_add_ linear : Elapsed 0.024 ms (2.441 ms / 100) 2.440 -> 2.449 ( +0.37%) [ +0.04% +0.00% +0.04% / +0.37% +0.45% +0.61%] index_copy_ linear : Elapsed 0.024 ms (2.441 ms / 100) 2.441 -> 2.450 ( +0.37%) [ +0.12% +0.00% +0.04% / +0.49% +0.41% +0.37%] index_add_ reverse : Elapsed 0.024 ms (2.444 ms / 100) 2.439 -> 2.452 ( +0.53%) [ +0.12% +0.00% +0.04% / +0.53% +0.70% +0.66%] index_copy_ reverse : Elapsed 0.024 ms (2.442 ms / 100) 2.442 -> 2.453 ( +0.45%) [ +0.00% +0.16% +0.08% / +0.74% +0.45% +0.49%] index_add_ spread : Elapsed 0.024 ms (2.442 ms / 100) 2.441 -> 2.448 ( +0.29%) [ +0.00% +0.00% +0.12% / +0.57% +0.29% +0.45%] index_copy_ spread : Elapsed 0.024 ms (2.441 ms / 100) 2.444 -> 2.450 ( +0.25%) [ +0.04% +0.00% +0.00% / +0.49% +0.37% +0.25%] index_add_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.440 -> 2.448 ( +0.33%) [ +0.08% +0.12% +0.00% / +0.61% +0.33% +0.41%] index_copy_ strided 3 : Elapsed 0.024 ms (2.442 ms / 100) 2.445 -> 2.452 ( +0.29%) [ +0.08% +0.00% +0.20% / +0.45% +0.29% +0.37%] index_add_ strided 7 : Elapsed 0.024 ms (2.447 ms / 100) 2.439 -> 2.451 ( +0.49%) [ +0.00% +0.08% +0.00% / +0.57% +0.62% +0.49%] index_copy_ strided 7 : Elapsed 0.024 ms (2.439 ms / 100) 2.442 -> 2.452 ( +0.41%) [ +0.04% +0.00% +0.00% / +0.49% +0.41% +0.53%] index_add_ perm : Elapsed 0.024 ms (2.443 ms / 100) 2.437 -> 2.452 ( +0.62%) [ +0.21% +0.00% +0.25% / +0.62% +0.74% +0.70%] index_copy_ perm : Elapsed 0.024 ms (2.442 ms / 100) 2.438 -> 2.451 ( +0.53%) [ +0.25% +0.12% +0.00% / +0.53% +0.74% +0.62%] index_add_ perm_sorted : Elapsed 0.024 ms (2.444 ms / 100) 2.438 -> 2.451 ( +0.53%) [ +0.04% +0.16% +0.00% / +0.53% +0.74% +0.86%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.439 ms / 100) 4.490 -> 4.496 ( +0.13%) [ +0.20% +0.00% +0.00% / +0.22% +0.22% +0.13%] index_select const : Elapsed 0.045 ms (4.499 ms / 100) 4.496 -> 4.491 ( -0.11%) [ +0.00% +0.09% +0.02% / -0.11% +0.13% -0.02%] index_select wrap : Elapsed 0.045 ms (4.496 ms / 100) 4.493 -> 4.498 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.20% +0.13% +0.11%] index_select linear : Elapsed 0.045 ms (4.498 ms / 100) 4.493 -> 4.498 ( +0.11%) [ +0.02% +0.18% +0.00% / +0.11% +0.13% +0.22%] index_select reverse : Elapsed 0.045 ms (4.494 ms / 100) 4.491 -> 4.495 ( +0.09%) [ +0.02% +0.00% +0.18% / +0.16% +0.22% +0.09%] index_select skip64 : Elapsed 0.045 ms (4.492 ms / 100) 4.485 -> 4.490 ( +0.11%) [ +0.25% +0.18% +0.00% / +0.11% +0.20% +0.25%] index_select skip256 : Elapsed 0.045 ms (4.496 ms / 100) 4.493 -> 4.494 ( +0.02%) [ +0.13% +0.00% +0.00% / +0.11% +0.02% +0.13%] index_select spread : Elapsed 0.045 ms (4.499 ms / 100) 4.492 -> 4.494 ( +0.04%) [ +0.00% +0.16% +0.16% / +0.22% +0.04% +0.07%] index_select strided 3 : Elapsed 0.045 ms (4.492 ms / 100) 4.491 -> 4.486 ( -0.11%) [ +0.00% +0.11% +0.16% / -0.11% +0.09% +0.20%] index_select strided 5 : Elapsed 0.045 ms (4.491 ms / 100) 4.494 -> 4.497 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.09% +0.07% +0.18%] index_select strided 7 : Elapsed 0.045 ms (4.497 ms / 100) 4.491 -> 4.498 ( +0.16%) [ +0.00% +0.22% +0.00% / +0.22% +0.16% +0.27%] index_select strided 8 : Elapsed 0.045 ms (4.491 ms / 100) 4.494 -> 4.488 ( -0.13%) [ +0.13% +0.07% +0.00% / -0.13% +0.00% +0.04%] index_select strided 16 : Elapsed 0.045 ms (4.500 ms / 100) 4.493 -> 4.494 ( +0.02%) [ +0.18% +0.09% +0.00% / +0.02% +0.22% +0.27%] index_select random : Elapsed 0.045 ms (4.501 ms / 100) 4.491 -> 4.494 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.27% +0.07% +0.33%] index_select random_sorted : Elapsed 0.045 ms (4.494 ms / 100) out_shape = [40, 5, 20, 16] in_shape = [4, 5, 20, 16] idx_dim = 0 B = [40, 5, 20, 16] (stride (1600, 320, 16, 1)) A = [4, 5, 20, 16] (stride (1600, 16, 80, 1)) dim = 0 1.145 -> 1.145 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.70% +0.70%] index_add_ linear : Elapsed 0.011 ms (1.146 ms / 100) 1.108 -> 1.108 ( +0.00%) [ +0.27% +0.00% +0.00% / +0.00% +0.45% +0.45%] index_copy_ linear : Elapsed 0.011 ms (1.111 ms / 100) 1.145 -> 1.144 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.70% +0.61%] index_add_ reverse : Elapsed 0.011 ms (1.145 ms / 100) 1.108 -> 1.107 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.09% +0.45% +0.45%] index_copy_ reverse : Elapsed 0.011 ms (1.109 ms / 100) 1.145 -> 1.145 ( +0.00%) [ +0.17% +0.09% +0.00% / +0.00% +0.87% +0.79%] index_add_ spread : Elapsed 0.011 ms (1.147 ms / 100) 1.106 -> 1.107 ( +0.09%) [ +0.00% +0.18% +0.18% / +0.09% +0.72% +0.63%] index_copy_ spread : Elapsed 0.011 ms (1.106 ms / 100) 1.144 -> 1.145 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.79% +0.79%] index_add_ strided 3 : Elapsed 0.011 ms (1.145 ms / 100) 1.107 -> 1.108 ( +0.09%) [ +0.00% +0.27% +0.09% / +0.09% +0.63% +0.63%] index_copy_ strided 3 : Elapsed 0.011 ms (1.107 ms / 100) 1.144 -> 1.145 ( +0.09%) [ +0.17% +0.00% +0.00% / +0.09% +0.79% +0.79%] index_add_ strided 7 : Elapsed 0.011 ms (1.146 ms / 100) 1.108 -> 1.107 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.54% +0.54%] index_copy_ strided 7 : Elapsed 0.011 ms (1.108 ms / 100) 1.145 -> 1.145 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.70%] index_add_ perm : Elapsed 0.011 ms (1.145 ms / 100) 1.107 -> 1.107 ( +0.00%) [ +0.00% +0.18% +0.27% / +0.00% +0.54% +0.54%] index_copy_ perm : Elapsed 0.011 ms (1.107 ms / 100) 1.145 -> 1.145 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.70% +0.70%] index_add_ perm_sorted : Elapsed 0.011 ms (1.146 ms / 100) 1.107 -> 1.107 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.72% +0.63%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.107 ms / 100) 8.251 -> 8.261 ( +0.12%) [ +0.05% +0.00% +0.28% / +0.12% +0.15% +0.12%] index_select const : Elapsed 0.083 ms (8.255 ms / 100) 8.277 -> 8.281 ( +0.05%) [ +0.05% +0.14% +0.00% / +0.08% +0.27% +0.05%] index_select wrap : Elapsed 0.083 ms (8.281 ms / 100) 8.271 -> 8.267 ( -0.05%) [ +0.27% +0.12% +0.00% / -0.05% +0.18% +0.18%] index_select linear : Elapsed 0.083 ms (8.293 ms / 100) 8.276 -> 8.288 ( +0.14%) [ +0.00% +0.24% +0.07% / +0.21% +0.17% +0.14%] index_select reverse : Elapsed 0.083 ms (8.276 ms / 100) 8.245 -> 8.245 ( +0.00%) [ +0.12% +0.13% +0.00% / +0.00% +0.22% +0.27%] index_select skip64 : Elapsed 0.083 ms (8.255 ms / 100) 8.240 -> 8.252 ( +0.15%) [ +0.00% +0.34% +0.23% / +0.25% +0.15% +0.21%] index_select skip256 : Elapsed 0.082 ms (8.240 ms / 100) 8.275 -> 8.273 ( -0.02%) [ +0.00% +0.01% +0.00% / -0.02% +0.13% +0.21%] index_select spread : Elapsed 0.083 ms (8.275 ms / 100) 8.284 -> 8.268 ( -0.19%) [ +0.14% +0.00% +0.06% / -0.11% +0.21% -0.19%] index_select strided 3 : Elapsed 0.083 ms (8.296 ms / 100) 8.274 -> 8.293 ( +0.23%) [ +0.06% +0.00% +0.39% / +0.29% +0.23% +0.23%] index_select random : Elapsed 0.083 ms (8.279 ms / 100) 8.269 -> 8.284 ( +0.18%) [ +0.00% +0.15% +0.05% / +0.18% +0.25% +0.19%] index_select random_sorted : Elapsed 0.083 ms (8.269 ms / 100) B = [40, 5, 20, 16] (stride (1600, 320, 16, 1)) A = [4, 5, 20, 16] (stride (1600, 1, 80, 5)) dim = 0 1.067 -> 1.068 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.66% +0.66%] index_add_ linear : Elapsed 0.011 ms (1.068 ms / 100) 1.029 -> 1.029 ( +0.00%) [ +0.10% +0.19% +0.00% / +0.00% +1.07% +0.49%] index_copy_ linear : Elapsed 0.010 ms (1.030 ms / 100) 1.068 -> 1.069 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.37% +0.47%] index_add_ reverse : Elapsed 0.011 ms (1.069 ms / 100) 1.030 -> 1.030 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_copy_ reverse : Elapsed 0.010 ms (1.031 ms / 100) 1.068 -> 1.068 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.37% +0.37%] index_add_ spread : Elapsed 0.011 ms (1.069 ms / 100) 1.030 -> 1.029 ( -0.10%) [ +0.00% +0.00% +0.10% / -0.10% +0.39% +0.29%] index_copy_ spread : Elapsed 0.010 ms (1.030 ms / 100) 1.067 -> 1.068 ( +0.09%) [ +0.09% +0.28% +0.00% / +0.09% +0.66% +0.66%] index_add_ strided 3 : Elapsed 0.011 ms (1.068 ms / 100) 1.030 -> 1.029 ( -0.10%) [ +0.00% +0.10% +0.00% / -0.10% +0.39% +0.39%] index_copy_ strided 3 : Elapsed 0.010 ms (1.030 ms / 100) 1.068 -> 1.068 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_add_ strided 7 : Elapsed 0.011 ms (1.068 ms / 100) 1.029 -> 1.030 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.10% +0.49% +0.49%] index_copy_ strided 7 : Elapsed 0.010 ms (1.029 ms / 100) 1.068 -> 1.069 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.37% +0.37%] index_add_ perm : Elapsed 0.011 ms (1.068 ms / 100) 1.030 -> 1.030 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.29%] index_copy_ perm : Elapsed 0.010 ms (1.030 ms / 100) 1.068 -> 1.067 ( -0.09%) [ +0.00% +0.09% +0.00% / -0.09% +0.28% +0.28%] index_add_ perm_sorted : Elapsed 0.011 ms (1.068 ms / 100) 1.029 -> 1.029 ( +0.00%) [ +0.10% +0.19% +0.00% / +0.00% +0.39% +0.39%] index_copy_ perm_sorted : Elapsed 0.010 ms (1.030 ms / 100) 7.885 -> 7.894 ( +0.11%) [ +0.10% +0.09% +0.00% / +0.16% +0.11% +0.34%] index_select const : Elapsed 0.079 ms (7.893 ms / 100) 7.894 -> 7.898 ( +0.05%) [ +0.00% +0.22% +0.13% / +0.05% +0.13% +0.20%] index_select wrap : Elapsed 0.079 ms (7.894 ms / 100) 7.898 -> 7.889 ( -0.11%) [ +0.14% +0.00% +0.11% / -0.11% +0.13% +0.20%] index_select linear : Elapsed 0.079 ms (7.909 ms / 100) 7.890 -> 7.891 ( +0.01%) [ +0.11% +0.00% +0.01% / +0.13% +0.01% +0.19%] index_select reverse : Elapsed 0.079 ms (7.899 ms / 100) 7.882 -> 7.878 ( -0.05%) [ +0.08% +0.00% +0.28% / -0.05% +0.15% +0.24%] index_select skip64 : Elapsed 0.079 ms (7.888 ms / 100) 7.893 -> 7.890 ( -0.04%) [ +0.00% +0.09% +0.03% / -0.04% -0.04% -0.04%] index_select skip256 : Elapsed 0.079 ms (7.893 ms / 100) 7.889 -> 7.894 ( +0.06%) [ +0.08% +0.00% +0.18% / +0.06% +0.13% +0.20%] index_select spread : Elapsed 0.079 ms (7.895 ms / 100) 7.901 -> 7.896 ( -0.06%) [ +0.28% +0.00% +0.18% / +0.09% -0.06% -0.03%] index_select strided 3 : Elapsed 0.079 ms (7.923 ms / 100) 7.896 -> 7.913 ( +0.22%) [ +0.03% +0.09% +0.00% / +0.35% +0.22% +0.38%] index_select random : Elapsed 0.079 ms (7.898 ms / 100) 7.885 -> 7.890 ( +0.06%) [ +0.00% +0.05% +0.16% / +0.06% +0.28% +0.20%] index_select random_sorted : Elapsed 0.079 ms (7.885 ms / 100) B = [40, 5, 20, 16] (stride (1600, 320, 16, 1)) A = [4, 5, 20, 16] (stride (80, 16, 320, 1)) dim = 0 1.146 -> 1.146 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.52% +0.61%] index_add_ linear : Elapsed 0.011 ms (1.147 ms / 100) 1.108 -> 1.108 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.54% +0.54%] index_copy_ linear : Elapsed 0.011 ms (1.108 ms / 100) 1.145 -> 1.146 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.70% +0.70%] index_add_ reverse : Elapsed 0.011 ms (1.145 ms / 100) 1.107 -> 1.108 ( +0.09%) [ +0.18% +0.00% +0.18% / +0.09% +0.63% +0.63%] index_copy_ reverse : Elapsed 0.011 ms (1.109 ms / 100) 1.145 -> 1.146 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.79% +0.70%] index_add_ spread : Elapsed 0.011 ms (1.145 ms / 100) 1.108 -> 1.109 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.54% +0.54%] index_copy_ spread : Elapsed 0.011 ms (1.108 ms / 100) 1.145 -> 1.145 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.70%] index_add_ strided 3 : Elapsed 0.011 ms (1.145 ms / 100) 1.107 -> 1.107 ( +0.00%) [ +0.00% +0.18% +0.09% / +0.00% +0.63% +0.54%] index_copy_ strided 3 : Elapsed 0.011 ms (1.107 ms / 100) 1.145 -> 1.144 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.70% +0.79%] index_add_ strided 7 : Elapsed 0.011 ms (1.145 ms / 100) 1.107 -> 1.108 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.63% +0.63%] index_copy_ strided 7 : Elapsed 0.011 ms (1.107 ms / 100) 1.145 -> 1.146 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.70% +0.70%] index_add_ perm : Elapsed 0.011 ms (1.145 ms / 100) 1.108 -> 1.109 ( +0.09%) [ +0.27% +0.00% +0.00% / +0.09% +0.63% +0.54%] index_copy_ perm : Elapsed 0.011 ms (1.111 ms / 100) 1.144 -> 1.144 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.79% +0.79%] index_add_ perm_sorted : Elapsed 0.011 ms (1.145 ms / 100) 1.107 -> 1.107 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.63% +0.63%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.107 ms / 100) 8.251 -> 8.258 ( +0.08%) [ +0.21% +0.18% +0.00% / +0.11% +0.08% +0.13%] index_select const : Elapsed 0.083 ms (8.268 ms / 100) 8.276 -> 8.280 ( +0.05%) [ +0.00% +0.11% +0.07% / +0.28% +0.05% +0.40%] index_select wrap : Elapsed 0.083 ms (8.276 ms / 100) 8.279 -> 8.281 ( +0.02%) [ +0.11% +0.00% +0.02% / +0.02% +0.25% +0.33%] index_select linear : Elapsed 0.083 ms (8.288 ms / 100) 8.283 -> 8.289 ( +0.07%) [ +0.13% +0.18% +0.00% / +0.07% +0.19% +0.45%] index_select reverse : Elapsed 0.083 ms (8.294 ms / 100) 8.258 -> 8.267 ( +0.11%) [ +0.00% +0.39% +0.07% / +0.16% +0.11% +0.16%] index_select skip64 : Elapsed 0.083 ms (8.258 ms / 100) 8.260 -> 8.253 ( -0.08%) [ +0.00% +0.07% +0.00% / +0.19% -0.08% -0.05%] index_select skip256 : Elapsed 0.083 ms (8.260 ms / 100) 8.285 -> 8.275 ( -0.12%) [ +0.00% +0.07% +0.04% / -0.07% +0.07% -0.12%] index_select spread : Elapsed 0.083 ms (8.285 ms / 100) 8.271 -> 8.283 ( +0.15%) [ +0.19% +0.23% +0.00% / +0.23% +0.15% +0.22%] index_select strided 3 : Elapsed 0.083 ms (8.287 ms / 100) 8.276 -> 8.281 ( +0.06%) [ +0.00% +0.08% +0.25% / +0.06% +0.34% +0.29%] index_select random : Elapsed 0.083 ms (8.276 ms / 100) 8.281 -> 8.272 ( -0.11%) [ +0.00% +0.10% +0.07% / -0.11% +0.00% +0.21%] index_select random_sorted : Elapsed 0.083 ms (8.281 ms / 100) B = [40, 5, 20, 16] (stride (1600, 1, 80, 5)) A = [4, 5, 20, 16] (stride (320, 1280, 16, 1)) dim = 0 0.566 -> 0.566 ( +0.00%) [ +0.18% +0.35% +0.00% / +0.00% +0.88% +0.71%] index_add_ linear : Elapsed 0.006 ms (0.567 ms / 100) 0.559 -> 0.559 ( +0.00%) [ +0.54% +0.00% +0.18% / +0.00% +0.54% +0.54%] index_copy_ linear : Elapsed 0.006 ms (0.562 ms / 100) 0.567 -> 0.567 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.35% +0.00%] index_add_ reverse : Elapsed 0.006 ms (0.567 ms / 100) 0.559 -> 0.558 ( -0.18%) [ +0.00% +0.00% +0.18% / -0.18% +0.36% +0.18%] index_copy_ reverse : Elapsed 0.006 ms (0.559 ms / 100) 0.567 -> 0.567 ( +0.00%) [ +0.00% +0.35% +0.00% / +0.00% +0.35% +0.35%] index_add_ spread : Elapsed 0.006 ms (0.567 ms / 100) 0.559 -> 0.559 ( +0.00%) [ +0.18% +0.18% +0.00% / +0.00% +0.36% +0.18%] index_copy_ spread : Elapsed 0.006 ms (0.560 ms / 100) 0.566 -> 0.567 ( +0.18%) [ +0.35% +0.18% +0.00% / +0.18% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.006 ms (0.568 ms / 100) 0.559 -> 0.560 ( +0.18%) [ +0.00% +0.36% +0.18% / +0.18% +0.36% +0.18%] index_copy_ strided 3 : Elapsed 0.006 ms (0.559 ms / 100) 0.569 -> 0.569 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.18% +0.35%] index_add_ strided 7 : Elapsed 0.006 ms (0.569 ms / 100) 0.559 -> 0.560 ( +0.18%) [ +0.00% +0.89% +0.18% / +0.18% +0.36% +0.36%] index_copy_ strided 7 : Elapsed 0.006 ms (0.559 ms / 100) 0.578 -> 0.577 ( -0.17%) [ +1.73% +0.17% +0.00% / +0.00% -0.17% -0.17%] index_add_ perm : Elapsed 0.006 ms (0.588 ms / 100) 0.570 -> 0.569 ( -0.18%) [ +0.18% +0.00% +0.18% / +0.88% -0.18% +0.00%] index_copy_ perm : Elapsed 0.006 ms (0.571 ms / 100) 0.577 -> 0.576 ( -0.17%) [ +0.17% +0.35% +0.00% / +0.00% -0.17% -0.17%] index_add_ perm_sorted : Elapsed 0.006 ms (0.578 ms / 100) 0.567 -> 0.566 ( -0.18%) [ +0.00% +0.53% +0.00% / +0.00% -0.18% -0.18%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.567 ms / 100) 4.961 -> 4.961 ( +0.00%) [ +0.18% +0.00% +0.38% / +0.00% +0.60% +0.10%] index_select const : Elapsed 0.050 ms (4.970 ms / 100) 4.985 -> 4.984 ( -0.02%) [ +0.20% +0.00% +0.14% / +0.24% +0.24% -0.02%] index_select wrap : Elapsed 0.050 ms (4.995 ms / 100) 4.977 -> 4.964 ( -0.26%) [ +0.00% +0.02% +0.16% / -0.26% +0.34% +0.22%] index_select linear : Elapsed 0.050 ms (4.977 ms / 100) 4.967 -> 4.975 ( +0.16%) [ +0.02% +0.00% +0.00% / +0.16% +0.40% +0.30%] index_select reverse : Elapsed 0.050 ms (4.968 ms / 100) 4.955 -> 4.972 ( +0.34%) [ +0.10% +0.00% +0.06% / +0.48% +0.34% +0.36%] index_select skip64 : Elapsed 0.050 ms (4.960 ms / 100) 4.955 -> 4.954 ( -0.02%) [ +0.02% +0.14% +0.00% / -0.02% +0.32% +0.34%] index_select skip256 : Elapsed 0.050 ms (4.956 ms / 100) 4.982 -> 4.985 ( +0.06%) [ +0.00% +0.06% +0.04% / +0.06% +0.16% +0.18%] index_select spread : Elapsed 0.050 ms (4.982 ms / 100) 4.978 -> 4.978 ( +0.00%) [ +0.16% +0.00% +0.10% / +0.00% +0.30% +0.40%] index_select strided 3 : Elapsed 0.050 ms (4.986 ms / 100) 4.989 -> 4.987 ( -0.04%) [ +0.10% +0.00% +0.06% / -0.04% +0.12% +0.08%] index_select random : Elapsed 0.050 ms (4.994 ms / 100) 4.971 -> 4.972 ( +0.02%) [ +0.20% +0.12% +0.00% / +0.02% +0.38% +0.34%] index_select random_sorted : Elapsed 0.050 ms (4.981 ms / 100) B = [40, 5, 20, 16] (stride (1600, 1, 5, 100)) A = [4, 5, 20, 16] (stride (1, 64, 320, 4)) dim = 0 1.313 -> 1.317 ( +0.30%) [ +0.38% +0.00% +0.15% / +0.30% +0.30% +0.38%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.275 -> 1.277 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.47% +0.71%] index_copy_ linear : Elapsed 0.013 ms (1.275 ms / 100) 1.315 -> 1.316 ( +0.08%) [ +0.23% +0.23% +0.00% / +0.08% +0.08% +0.15%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.275 ms / 100) 1.313 -> 1.318 ( +0.38%) [ +0.15% +0.00% +0.00% / +0.38% +0.38% +0.38%] index_add_ spread : Elapsed 0.013 ms (1.315 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.00% +0.47% +0.00% / +0.00% +0.63% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.275 ms / 100) 1.312 -> 1.316 ( +0.30%) [ +0.15% +0.00% +0.38% / +0.30% +0.46% +0.53%] index_add_ strided 3 : Elapsed 0.013 ms (1.314 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.79% +0.71%] index_copy_ strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.313 -> 1.314 ( +0.08%) [ +0.08% +0.30% +0.00% / +0.08% +0.46% +0.38%] index_add_ strided 7 : Elapsed 0.013 ms (1.314 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.86% +0.71%] index_copy_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.313 -> 1.313 ( +0.00%) [ +0.23% +0.15% +0.00% / +0.00% +0.30% +0.38%] index_add_ perm : Elapsed 0.013 ms (1.316 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.86% +0.78%] index_copy_ perm : Elapsed 0.013 ms (1.276 ms / 100) 1.313 -> 1.311 ( -0.15%) [ +0.23% +0.00% +0.23% / -0.15% +0.30% +0.38%] index_add_ perm_sorted : Elapsed 0.013 ms (1.316 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.71% +0.47%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.276 ms / 100) 9.158 -> 9.158 ( +0.00%) [ +0.19% +0.17% +0.00% / +0.00% +0.23% +0.38%] index_select const : Elapsed 0.092 ms (9.175 ms / 100) 9.151 -> 9.150 ( -0.01%) [ +0.00% +0.08% +0.00% / -0.01% +0.14% +0.08%] index_select wrap : Elapsed 0.092 ms (9.151 ms / 100) 9.150 -> 9.158 ( +0.09%) [ +0.00% +0.01% +0.20% / +0.16% +0.09% +0.17%] index_select linear : Elapsed 0.092 ms (9.150 ms / 100) 9.154 -> 9.158 ( +0.04%) [ +0.01% +0.19% +0.00% / +0.04% +0.11% +0.14%] index_select reverse : Elapsed 0.092 ms (9.155 ms / 100) 9.151 -> 9.165 ( +0.15%) [ +0.03% +0.20% +0.00% / +0.15% +0.16% +0.37%] index_select skip64 : Elapsed 0.092 ms (9.154 ms / 100) 9.145 -> 9.170 ( +0.27%) [ +0.23% +0.00% +0.15% / +0.27% +0.51% +0.33%] index_select skip256 : Elapsed 0.092 ms (9.166 ms / 100) 9.146 -> 9.178 ( +0.35%) [ +0.00% +0.11% +0.23% / +0.36% +0.40% +0.35%] index_select spread : Elapsed 0.091 ms (9.146 ms / 100) 9.160 -> 9.163 ( +0.03%) [ +0.00% +0.08% +0.13% / +0.15% +0.16% +0.03%] index_select strided 3 : Elapsed 0.092 ms (9.160 ms / 100) 9.147 -> 9.154 ( +0.08%) [ +0.00% +0.16% +0.21% / +0.17% +0.08% +0.22%] index_select random : Elapsed 0.091 ms (9.147 ms / 100) 9.165 -> 9.164 ( -0.01%) [ +0.00% +0.14% +0.03% / -0.01% +0.12% +0.25%] index_select random_sorted : Elapsed 0.092 ms (9.165 ms / 100) B = [40, 5, 20, 16] (stride (320, 12800, 16, 1)) A = [4, 5, 20, 16] (stride (1, 1280, 4, 80)) dim = 0 1.265 -> 1.266 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.47% +0.47%] index_add_ linear : Elapsed 0.013 ms (1.266 ms / 100) 1.224 -> 1.224 ( +0.00%) [ +0.00% +0.41% +0.00% / +0.00% +0.57% +0.49%] index_copy_ linear : Elapsed 0.012 ms (1.224 ms / 100) 1.265 -> 1.265 ( +0.00%) [ +0.08% +0.24% +0.00% / +0.00% +0.47% +0.47%] index_add_ reverse : Elapsed 0.013 ms (1.266 ms / 100) 1.225 -> 1.226 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.41% +0.41%] index_copy_ reverse : Elapsed 0.012 ms (1.225 ms / 100) 1.266 -> 1.266 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_add_ spread : Elapsed 0.013 ms (1.267 ms / 100) 1.221 -> 1.221 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.57% +0.57%] index_copy_ spread : Elapsed 0.012 ms (1.221 ms / 100) 1.265 -> 1.266 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.24% +0.32%] index_add_ strided 3 : Elapsed 0.013 ms (1.265 ms / 100) 1.221 -> 1.221 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.33% +0.25%] index_copy_ strided 3 : Elapsed 0.012 ms (1.223 ms / 100) 1.267 -> 1.267 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.39% +0.55%] index_add_ strided 7 : Elapsed 0.013 ms (1.268 ms / 100) 1.222 -> 1.223 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.57% +0.57%] index_copy_ strided 7 : Elapsed 0.012 ms (1.222 ms / 100) 1.267 -> 1.266 ( -0.08%) [ +0.00% +0.47% +0.00% / -0.08% +0.32% +0.32%] index_add_ perm : Elapsed 0.013 ms (1.267 ms / 100) 1.222 -> 1.222 ( +0.00%) [ +0.00% +0.41% +0.00% / +0.00% +0.57% +0.33%] index_copy_ perm : Elapsed 0.012 ms (1.222 ms / 100) 1.266 -> 1.266 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.26% +0.32%] index_add_ perm_sorted : Elapsed 0.013 ms (1.266 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.49% +0.33%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.226 ms / 100) 8.709 -> 8.710 ( +0.01%) [ +0.03% +0.16% +0.00% / +0.20% +0.10% +0.01%] index_select const : Elapsed 0.087 ms (8.712 ms / 100) 8.720 -> 8.706 ( -0.16%) [ +0.01% +0.24% +0.00% / -0.13% -0.16% +0.00%] index_select wrap : Elapsed 0.087 ms (8.721 ms / 100) 8.719 -> 8.698 ( -0.24%) [ +0.00% +0.03% +0.08% / +0.14% -0.13% -0.24%] index_select linear : Elapsed 0.087 ms (8.719 ms / 100) 8.719 -> 8.713 ( -0.07%) [ +0.10% +0.11% +0.00% / -0.02% -0.07% +0.05%] index_select reverse : Elapsed 0.087 ms (8.728 ms / 100) 8.710 -> 8.701 ( -0.10%) [ +0.00% +0.33% +0.00% / -0.10% -0.02% +0.07%] index_select skip64 : Elapsed 0.087 ms (8.710 ms / 100) 8.694 -> 8.699 ( +0.06%) [ +0.38% +0.00% +0.20% / +0.29% +0.10% +0.06%] index_select skip256 : Elapsed 0.087 ms (8.727 ms / 100) 8.722 -> 8.705 ( -0.19%) [ +0.02% +0.01% +0.00% / +0.00% -0.15% -0.19%] index_select spread : Elapsed 0.087 ms (8.724 ms / 100) 8.716 -> 8.699 ( -0.20%) [ +0.07% +0.02% +0.00% / +0.14% -0.13% -0.20%] index_select strided 3 : Elapsed 0.087 ms (8.722 ms / 100) 8.712 -> 8.708 ( -0.05%) [ +0.15% +0.13% +0.00% / -0.01% -0.03% -0.05%] index_select random : Elapsed 0.087 ms (8.725 ms / 100) 8.715 -> 8.708 ( -0.08%) [ +0.16% +0.20% +0.00% / +0.13% -0.08% +0.02%] index_select random_sorted : Elapsed 0.087 ms (8.729 ms / 100) B = [40, 5, 20, 16] (stride (20, 12800, 1, 800)) A = [4, 5, 20, 16] (stride (20, 80, 1, 400)) dim = 0 1.353 -> 1.354 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.44% +0.52%] index_add_ linear : Elapsed 0.014 ms (1.353 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.00% +0.15% +0.00% / -0.08% +0.53% +0.53%] index_copy_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.340 -> 1.341 ( +0.07%) [ +0.00% +0.22% +0.22% / +0.07% +0.60% +0.60%] index_add_ reverse : Elapsed 0.013 ms (1.340 ms / 100) 1.301 -> 1.302 ( +0.08%) [ +0.00% +0.54% +0.31% / +0.08% +0.69% +0.69%] index_copy_ reverse : Elapsed 0.013 ms (1.301 ms / 100) 1.337 -> 1.338 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.52% +0.60%] index_add_ spread : Elapsed 0.013 ms (1.338 ms / 100) 1.299 -> 1.302 ( +0.23%) [ +0.00% +0.23% +0.23% / +0.23% +0.62% +0.69%] index_copy_ spread : Elapsed 0.013 ms (1.299 ms / 100) 1.353 -> 1.354 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.81% +0.59%] index_add_ strided 3 : Elapsed 0.014 ms (1.353 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.00% +0.15% +0.08% / +0.00% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.013 ms (1.317 ms / 100) 1.345 -> 1.344 ( -0.07%) [ +0.00% +0.22% +0.07% / -0.07% +0.67% +0.89%] index_add_ strided 7 : Elapsed 0.013 ms (1.345 ms / 100) 1.304 -> 1.307 ( +0.23%) [ +0.00% +0.61% +0.77% / +0.23% +1.00% +0.84%] index_copy_ strided 7 : Elapsed 0.013 ms (1.304 ms / 100) 1.336 -> 1.338 ( +0.15%) [ +0.15% +0.22% +0.00% / +0.15% +0.60% +0.75%] index_add_ perm : Elapsed 0.013 ms (1.338 ms / 100) 1.298 -> 1.300 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.69% +0.77%] index_copy_ perm : Elapsed 0.013 ms (1.300 ms / 100) 1.341 -> 1.341 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.67% +0.75%] index_add_ perm_sorted : Elapsed 0.013 ms (1.341 ms / 100) 1.301 -> 1.303 ( +0.15%) [ +0.08% +0.00% +0.08% / +0.15% +0.92% +1.23%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.302 ms / 100) 9.175 -> 9.169 ( -0.07%) [ +0.02% +0.24% +0.00% / -0.07% +0.09% +0.31%] index_select const : Elapsed 0.092 ms (9.177 ms / 100) 9.178 -> 9.196 ( +0.20%) [ +0.00% +0.31% +0.24% / +0.22% +0.20% +0.25%] index_select wrap : Elapsed 0.092 ms (9.178 ms / 100) 9.198 -> 9.193 ( -0.05%) [ +0.09% +0.00% +0.04% / +0.05% +0.12% -0.05%] index_select linear : Elapsed 0.092 ms (9.206 ms / 100) 9.187 -> 9.188 ( +0.01%) [ +0.42% +0.00% +0.13% / +0.21% +0.01% +0.05%] index_select reverse : Elapsed 0.092 ms (9.226 ms / 100) 9.189 -> 9.184 ( -0.05%) [ +0.09% +0.14% +0.00% / +0.03% -0.05% +0.00%] index_select skip64 : Elapsed 0.092 ms (9.197 ms / 100) 9.176 -> 9.186 ( +0.11%) [ +0.00% +0.07% +0.14% / +0.17% +0.11% +0.22%] index_select skip256 : Elapsed 0.092 ms (9.176 ms / 100) 9.211 -> 9.214 ( +0.03%) [ +0.13% +0.02% +0.00% / +0.12% +0.03% +0.03%] index_select spread : Elapsed 0.092 ms (9.223 ms / 100) 9.181 -> 9.195 ( +0.15%) [ +0.19% +0.25% +0.00% / +0.15% +0.23% +0.19%] index_select strided 3 : Elapsed 0.092 ms (9.198 ms / 100) 9.194 -> 9.200 ( +0.07%) [ +0.00% +0.17% +0.07% / +0.11% +0.07% +0.28%] index_select random : Elapsed 0.092 ms (9.194 ms / 100) 9.203 -> 9.219 ( +0.17%) [ +0.12% +0.02% +0.00% / +0.17% +0.26% +0.24%] index_select random_sorted : Elapsed 0.092 ms (9.214 ms / 100) B = [40, 5, 20, 16] (stride (1, 12800, 40, 800)) A = [4, 5, 20, 16] (stride (1600, 20, 1, 100)) dim = 0 1.259 -> 1.259 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.71% +0.71%] index_add_ linear : Elapsed 0.013 ms (1.259 ms / 100) 1.222 -> 1.222 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.65% +0.74%] index_copy_ linear : Elapsed 0.012 ms (1.223 ms / 100) 1.259 -> 1.260 ( +0.08%) [ +0.16% +0.24% +0.00% / +0.08% +0.71% +0.64%] index_add_ reverse : Elapsed 0.013 ms (1.261 ms / 100) 1.222 -> 1.222 ( +0.00%) [ +0.08% +0.33% +0.00% / +0.00% +0.98% +0.65%] index_copy_ reverse : Elapsed 0.012 ms (1.223 ms / 100) 1.248 -> 1.247 ( -0.08%) [ +0.08% +0.16% +0.00% / -0.08% +0.96% -0.08%] index_add_ spread : Elapsed 0.012 ms (1.249 ms / 100) 1.210 -> 1.212 ( +0.17%) [ +0.00% +0.17% +0.08% / +0.17% +1.07% +0.17%] index_copy_ spread : Elapsed 0.012 ms (1.210 ms / 100) 1.255 -> 1.256 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.48% +0.32%] index_add_ strided 3 : Elapsed 0.013 ms (1.256 ms / 100) 1.216 -> 1.215 ( -0.08%) [ +0.16% +0.08% +0.00% / -0.08% +0.58% +0.49%] index_copy_ strided 3 : Elapsed 0.012 ms (1.218 ms / 100) 1.248 -> 1.250 ( +0.16%) [ +0.32% +0.16% +0.00% / +0.16% +0.48% +0.64%] index_add_ strided 7 : Elapsed 0.013 ms (1.252 ms / 100) 1.211 -> 1.213 ( +0.17%) [ +0.00% +0.08% +0.25% / +0.17% +0.33% +0.50%] index_copy_ strided 7 : Elapsed 0.012 ms (1.211 ms / 100) 1.243 -> 1.243 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.32% +0.16%] index_add_ perm : Elapsed 0.012 ms (1.244 ms / 100) 1.205 -> 1.206 ( +0.08%) [ +0.00% +0.17% +0.08% / +0.08% +0.50% +0.25%] index_copy_ perm : Elapsed 0.012 ms (1.205 ms / 100) 1.264 -> 1.263 ( -0.08%) [ +0.00% +0.16% +0.08% / -0.08% +0.47% +0.63%] index_add_ perm_sorted : Elapsed 0.013 ms (1.264 ms / 100) 1.225 -> 1.226 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.90% +0.65%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.226 ms / 100) 8.815 -> 8.829 ( +0.16%) [ +0.14% +0.00% +0.05% / +0.23% +0.16% +0.20%] index_select const : Elapsed 0.088 ms (8.827 ms / 100) 8.851 -> 8.838 ( -0.15%) [ +0.06% +0.00% +0.18% / -0.03% -0.15% -0.12%] index_select wrap : Elapsed 0.089 ms (8.856 ms / 100) 8.839 -> 8.842 ( +0.03%) [ +0.34% +0.00% +0.19% / +0.06% +0.03% +0.18%] index_select linear : Elapsed 0.089 ms (8.869 ms / 100) 8.831 -> 8.832 ( +0.01%) [ +0.00% +0.05% +0.02% / +0.33% +0.14% +0.01%] index_select reverse : Elapsed 0.088 ms (8.831 ms / 100) 8.812 -> 8.810 ( -0.02%) [ +0.19% +0.20% +0.00% / -0.02% +0.05% +0.28%] index_select skip64 : Elapsed 0.088 ms (8.829 ms / 100) 8.817 -> 8.826 ( +0.10%) [ +0.18% +0.00% +0.09% / +0.10% +0.29% +0.24%] index_select skip256 : Elapsed 0.088 ms (8.833 ms / 100) 8.847 -> 8.845 ( -0.02%) [ +0.45% +0.10% +0.00% / +0.09% -0.02% +0.01%] index_select spread : Elapsed 0.089 ms (8.887 ms / 100) 8.843 -> 8.852 ( +0.10%) [ +0.12% +0.15% +0.00% / +0.17% +0.14% +0.10%] index_select strided 3 : Elapsed 0.089 ms (8.854 ms / 100) 8.862 -> 8.842 ( -0.23%) [ +0.10% +0.00% +0.00% / +0.02% -0.07% -0.23%] index_select random : Elapsed 0.089 ms (8.871 ms / 100) 8.840 -> 8.844 ( +0.05%) [ +0.20% +0.11% +0.00% / +0.26% +0.05% +0.48%] index_select random_sorted : Elapsed 0.089 ms (8.858 ms / 100) B = [40, 5, 20, 16] (stride (80, 16, 3200, 1)) A = [4, 5, 20, 16] (stride (20, 80, 1, 400)) dim = 0 1.352 -> 1.352 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.52% +0.52%] index_add_ linear : Elapsed 0.014 ms (1.353 ms / 100) 1.316 -> 1.315 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.53% +0.53%] index_copy_ linear : Elapsed 0.013 ms (1.317 ms / 100) 1.342 -> 1.339 ( -0.22%) [ +0.07% +0.07% +0.00% / -0.22% +0.60% +0.45%] index_add_ reverse : Elapsed 0.013 ms (1.343 ms / 100) 1.301 -> 1.300 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +1.31% +0.77%] index_copy_ reverse : Elapsed 0.013 ms (1.301 ms / 100) 1.334 -> 1.336 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.52% +0.82%] index_add_ spread : Elapsed 0.013 ms (1.336 ms / 100) 1.297 -> 1.298 ( +0.08%) [ +0.00% +0.15% +0.00% / +0.08% +0.54% +0.85%] index_copy_ spread : Elapsed 0.013 ms (1.297 ms / 100) 1.350 -> 1.351 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.67% +0.89%] index_add_ strided 3 : Elapsed 0.014 ms (1.352 ms / 100) 1.315 -> 1.314 ( -0.08%) [ +0.08% +0.00% +0.15% / -0.08% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.013 ms (1.316 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.52% +0.74%] index_add_ strided 7 : Elapsed 0.013 ms (1.346 ms / 100) 1.306 -> 1.313 ( +0.54%) [ +0.38% +0.38% +0.00% / +0.54% +0.92% +0.92%] index_copy_ strided 7 : Elapsed 0.013 ms (1.311 ms / 100) 1.335 -> 1.336 ( +0.07%) [ +0.00% +0.22% +0.00% / +0.07% +0.67% +0.82%] index_add_ perm : Elapsed 0.013 ms (1.335 ms / 100) 1.295 -> 1.297 ( +0.15%) [ +0.00% +0.15% +0.23% / +0.15% +0.77% +0.77%] index_copy_ perm : Elapsed 0.013 ms (1.295 ms / 100) 1.339 -> 1.338 ( -0.07%) [ +0.15% +0.00% +0.15% / -0.07% +0.67% +0.52%] index_add_ perm_sorted : Elapsed 0.013 ms (1.341 ms / 100) 1.302 -> 1.301 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.61% +0.54%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.302 ms / 100) 9.160 -> 9.173 ( +0.14%) [ +0.28% +0.03% +0.00% / +0.17% +0.14% +0.19%] index_select const : Elapsed 0.092 ms (9.186 ms / 100) 9.171 -> 9.172 ( +0.01%) [ +0.28% +0.24% +0.00% / +0.02% +0.01% +0.02%] index_select wrap : Elapsed 0.092 ms (9.197 ms / 100) 9.178 -> 9.184 ( +0.07%) [ +0.31% +0.05% +0.00% / +0.08% +0.07% +0.49%] index_select linear : Elapsed 0.092 ms (9.206 ms / 100) 9.168 -> 9.185 ( +0.19%) [ +0.35% +0.00% +0.28% / +0.40% +0.19% +0.26%] index_select reverse : Elapsed 0.092 ms (9.200 ms / 100) 9.156 -> 9.164 ( +0.09%) [ +0.15% +0.05% +0.00% / +0.09% +0.24% +0.23%] index_select skip64 : Elapsed 0.092 ms (9.170 ms / 100) 9.167 -> 9.177 ( +0.11%) [ +0.15% +0.00% +0.05% / +0.11% +0.31% +0.24%] index_select skip256 : Elapsed 0.092 ms (9.181 ms / 100) 9.192 -> 9.196 ( +0.04%) [ +0.11% +0.11% +0.00% / +0.14% +0.04% +0.11%] index_select spread : Elapsed 0.092 ms (9.202 ms / 100) 9.187 -> 9.172 ( -0.16%) [ +0.00% +0.08% +0.16% / -0.04% -0.10% -0.16%] index_select strided 3 : Elapsed 0.092 ms (9.187 ms / 100) 9.183 -> 9.183 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.11% +0.07% +0.00%] index_select random : Elapsed 0.092 ms (9.195 ms / 100) 9.187 -> 9.190 ( +0.03%) [ +0.13% +0.08% +0.00% / +0.05% +0.03% +0.34%] index_select random_sorted : Elapsed 0.092 ms (9.199 ms / 100) B = [40, 5, 20, 16] (stride (1, 640, 3200, 40)) A = [4, 5, 20, 16] (stride (80, 1, 320, 5)) dim = 0 1.317 -> 1.317 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.30% +0.23%] index_add_ linear : Elapsed 0.013 ms (1.317 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.47% +2.19%] index_copy_ linear : Elapsed 0.013 ms (1.278 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.08% +0.23%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.31% +0.31%] index_copy_ reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.329 -> 1.328 ( -0.08%) [ +0.00% +0.00% +0.23% / -0.08% -0.08% +0.23%] index_add_ spread : Elapsed 0.013 ms (1.329 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.23% +0.23%] index_copy_ spread : Elapsed 0.013 ms (1.288 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.53% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.00% +0.08% +0.16% / +0.16% +0.55% +0.47%] index_copy_ strided 3 : Elapsed 0.013 ms (1.280 ms / 100) 1.320 -> 1.319 ( -0.08%) [ +0.00% +0.15% +0.08% / -0.08% +0.23% +0.53%] index_add_ strided 7 : Elapsed 0.013 ms (1.320 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.47% +0.39%] index_copy_ strided 7 : Elapsed 0.013 ms (1.282 ms / 100) 1.323 -> 1.324 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.45% +0.38%] index_add_ perm : Elapsed 0.013 ms (1.324 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.47% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.285 ms / 100) 1.322 -> 1.324 ( +0.15%) [ +0.08% +0.23% +0.00% / +0.15% +0.38% +0.38%] index_add_ perm_sorted : Elapsed 0.013 ms (1.323 ms / 100) 1.285 -> 1.285 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.08% +0.39% +0.00%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.286 ms / 100) 9.217 -> 9.218 ( +0.01%) [ +0.03% +0.03% +0.00% / +0.15% +0.23% +0.01%] index_select const : Elapsed 0.092 ms (9.220 ms / 100) 9.252 -> 9.254 ( +0.02%) [ +0.08% +0.15% +0.00% / +0.14% +0.02% +0.03%] index_select wrap : Elapsed 0.093 ms (9.259 ms / 100) 9.236 -> 9.241 ( +0.05%) [ +0.00% +0.02% +0.06% / +0.12% +0.05% +0.14%] index_select linear : Elapsed 0.092 ms (9.236 ms / 100) 9.234 -> 9.228 ( -0.06%) [ +0.18% +0.03% +0.00% / -0.06% +0.29% -0.02%] index_select reverse : Elapsed 0.093 ms (9.251 ms / 100) 9.208 -> 9.218 ( +0.11%) [ +0.02% +0.21% +0.00% / +0.11% +0.54% +0.38%] index_select skip64 : Elapsed 0.092 ms (9.210 ms / 100) 9.212 -> 9.205 ( -0.08%) [ +0.00% +0.04% +0.18% / -0.08% +0.21% +0.12%] index_select skip256 : Elapsed 0.092 ms (9.212 ms / 100) 9.267 -> 9.249 ( -0.19%) [ +0.00% +0.01% +0.14% / -0.19% -0.17% -0.08%] index_select spread : Elapsed 0.093 ms (9.267 ms / 100) 9.262 -> 9.252 ( -0.11%) [ +0.10% +0.00% +0.10% / -0.02% +0.04% -0.11%] index_select strided 3 : Elapsed 0.093 ms (9.271 ms / 100) 9.245 -> 9.247 ( +0.02%) [ +0.00% +0.16% +0.30% / +0.09% +0.23% +0.02%] index_select random : Elapsed 0.092 ms (9.245 ms / 100) 9.245 -> 9.249 ( +0.04%) [ +0.00% +0.05% +0.01% / +0.04% +0.06% +0.24%] index_select random_sorted : Elapsed 0.092 ms (9.245 ms / 100) B = [40, 5, 20, 16] (stride (5, 1, 200, 4000)) A = [4, 5, 20, 16] (stride (1, 1280, 64, 4)) dim = 0 1.150 -> 1.150 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.35% +0.43%] index_add_ linear : Elapsed 0.012 ms (1.151 ms / 100) 1.112 -> 1.112 ( +0.00%) [ +0.09% +0.18% +0.00% / +0.00% +0.54% +0.63%] index_copy_ linear : Elapsed 0.011 ms (1.113 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.52%] index_add_ reverse : Elapsed 0.011 ms (1.149 ms / 100) 1.112 -> 1.111 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.54% +0.54%] index_copy_ reverse : Elapsed 0.011 ms (1.112 ms / 100) 1.151 -> 1.150 ( -0.09%) [ +0.17% +0.00% +0.00% / -0.09% +0.43% +0.43%] index_add_ spread : Elapsed 0.012 ms (1.153 ms / 100) 1.115 -> 1.116 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.72% +0.45%] index_copy_ spread : Elapsed 0.011 ms (1.115 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.26% +0.00% +0.00% / +0.00% +0.70% +0.61%] index_add_ strided 3 : Elapsed 0.012 ms (1.152 ms / 100) 1.111 -> 1.113 ( +0.18%) [ +0.00% +0.09% +0.00% / +0.18% +0.72% +0.54%] index_copy_ strided 3 : Elapsed 0.011 ms (1.111 ms / 100) 1.150 -> 1.148 ( -0.17%) [ +0.09% +0.00% +0.09% / -0.17% +0.61% +0.61%] index_add_ strided 7 : Elapsed 0.012 ms (1.151 ms / 100) 1.113 -> 1.115 ( +0.18%) [ +0.00% +0.27% +0.09% / +0.18% +0.63% +0.63%] index_copy_ strided 7 : Elapsed 0.011 ms (1.113 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.17% +0.00% +0.44% / +0.09% +0.70% +0.70%] index_add_ perm : Elapsed 0.011 ms (1.150 ms / 100) 1.111 -> 1.111 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.72% +0.63%] index_copy_ perm : Elapsed 0.011 ms (1.112 ms / 100) 1.148 -> 1.148 ( +0.00%) [ +0.17% +0.09% +0.00% / +0.00% +0.61% +0.61%] index_add_ perm_sorted : Elapsed 0.011 ms (1.150 ms / 100) 1.111 -> 1.112 ( +0.09%) [ +0.18% +0.00% +0.00% / +0.09% +0.72% +0.63%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.113 ms / 100) 8.324 -> 8.332 ( +0.10%) [ +0.06% +0.00% +0.07% / +0.10% +0.18% +0.17%] index_select const : Elapsed 0.083 ms (8.329 ms / 100) 8.333 -> 8.338 ( +0.06%) [ +0.06% +0.19% +0.00% / +0.07% +0.06% +0.18%] index_select wrap : Elapsed 0.083 ms (8.338 ms / 100) 8.337 -> 8.330 ( -0.08%) [ +0.00% +0.05% +0.13% / -0.08% -0.06% +0.29%] index_select linear : Elapsed 0.083 ms (8.337 ms / 100) 8.335 -> 8.335 ( +0.00%) [ +0.08% +0.00% +0.06% / +0.06% +0.07% +0.00%] index_select reverse : Elapsed 0.083 ms (8.342 ms / 100) 8.334 -> 8.321 ( -0.16%) [ +0.04% +0.00% +0.02% / +0.19% -0.16% +0.30%] index_select skip64 : Elapsed 0.083 ms (8.337 ms / 100) 8.331 -> 8.327 ( -0.05%) [ +0.13% +0.00% +0.10% / -0.05% +0.11% +0.40%] index_select skip256 : Elapsed 0.083 ms (8.342 ms / 100) 8.326 -> 8.340 ( +0.17%) [ +0.00% +0.34% +0.07% / +0.17% +0.36% +0.32%] index_select spread : Elapsed 0.083 ms (8.326 ms / 100) 8.335 -> 8.333 ( -0.02%) [ +0.08% +0.00% +0.02% / -0.02% +0.04% +0.06%] index_select strided 3 : Elapsed 0.083 ms (8.342 ms / 100) 8.318 -> 8.335 ( +0.20%) [ +0.16% +0.37% +0.00% / +0.22% +0.20% +0.42%] index_select random : Elapsed 0.083 ms (8.331 ms / 100) 8.318 -> 8.327 ( +0.11%) [ +0.23% +0.00% +0.17% / +0.16% +0.11% +0.34%] index_select random_sorted : Elapsed 0.083 ms (8.337 ms / 100) out_shape = [4, 40, 20, 16] in_shape = [4, 5, 20, 16] idx_dim = 1 B = [4, 40, 20, 16] (stride (16, 1280, 64, 1)) A = [4, 5, 20, 16] (stride (80, 1, 320, 5)) dim = 1 1.521 -> 1.522 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.39% +0.46%] index_add_ linear : Elapsed 0.015 ms (1.523 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.14% +0.20% +0.00% / +0.00% +0.41% +0.47%] index_copy_ linear : Elapsed 0.015 ms (1.479 ms / 100) 1.520 -> 1.521 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.53% +0.53%] index_add_ reverse : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.34% +0.47%] index_copy_ reverse : Elapsed 0.015 ms (1.478 ms / 100) 1.520 -> 1.522 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.46% +0.46%] index_add_ spread : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.476 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.41% +0.41%] index_copy_ spread : Elapsed 0.015 ms (1.477 ms / 100) 1.520 -> 1.521 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.72% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.521 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.61% +0.54%] index_copy_ strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.46% +0.53%] index_add_ strided 7 : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.47% +0.41%] index_copy_ strided 7 : Elapsed 0.015 ms (1.477 ms / 100) 1.520 -> 1.520 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.59% +0.53%] index_add_ perm : Elapsed 0.015 ms (1.521 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.41%] index_copy_ perm : Elapsed 0.015 ms (1.476 ms / 100) 1.521 -> 1.520 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.53% +0.46%] index_add_ perm_sorted : Elapsed 0.015 ms (1.521 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.47% +0.47%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) 8.535 -> 8.548 ( +0.15%) [ +0.00% +0.19% +0.06% / +0.22% +0.39% +0.15%] index_select const : Elapsed 0.085 ms (8.535 ms / 100) 8.535 -> 8.541 ( +0.07%) [ +0.15% +0.18% +0.00% / +0.18% +0.18% +0.07%] index_select wrap : Elapsed 0.085 ms (8.548 ms / 100) 8.542 -> 8.543 ( +0.01%) [ +0.19% +0.00% +0.08% / +0.01% +0.01% +0.06%] index_select linear : Elapsed 0.086 ms (8.558 ms / 100) 8.548 -> 8.534 ( -0.16%) [ +0.00% +0.13% +0.07% / +0.11% +0.02% -0.16%] index_select reverse : Elapsed 0.085 ms (8.548 ms / 100) 8.533 -> 8.532 ( -0.01%) [ +0.00% +0.07% +0.06% / -0.01% +0.09% +0.18%] index_select skip64 : Elapsed 0.085 ms (8.533 ms / 100) 8.535 -> 8.544 ( +0.11%) [ +0.00% +0.07% +0.05% / +0.25% +0.11% +0.12%] index_select skip256 : Elapsed 0.085 ms (8.535 ms / 100) 8.525 -> 8.539 ( +0.16%) [ +0.00% +0.27% +0.27% / +0.25% +0.26% +0.16%] index_select spread : Elapsed 0.085 ms (8.525 ms / 100) 8.553 -> 8.541 ( -0.14%) [ +0.01% +0.00% +0.07% / -0.09% -0.14% -0.05%] index_select strided 3 : Elapsed 0.086 ms (8.554 ms / 100) 8.537 -> 8.534 ( -0.04%) [ +0.28% +0.13% +0.00% / +0.13% +0.20% -0.04%] index_select random : Elapsed 0.086 ms (8.561 ms / 100) 8.551 -> 8.544 ( -0.08%) [ +0.07% +0.12% +0.00% / -0.01% -0.08% +0.12%] index_select random_sorted : Elapsed 0.086 ms (8.557 ms / 100) B = [4, 40, 20, 16] (stride (1, 4, 2560, 160)) A = [4, 5, 20, 16] (stride (1, 1280, 4, 80)) dim = 1 1.473 -> 1.474 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.61% +0.61%] index_add_ linear : Elapsed 0.015 ms (1.475 ms / 100) 1.427 -> 1.427 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.56% +0.63%] index_copy_ linear : Elapsed 0.014 ms (1.427 ms / 100) 1.477 -> 1.474 ( -0.20%) [ +0.07% +0.07% +0.00% / -0.20% +0.61% +0.34%] index_add_ reverse : Elapsed 0.015 ms (1.478 ms / 100) 1.425 -> 1.423 ( -0.14%) [ +0.00% +0.00% +0.14% / -0.14% +0.70% +0.56%] index_copy_ reverse : Elapsed 0.014 ms (1.425 ms / 100) 1.472 -> 1.471 ( -0.07%) [ +0.14% +0.00% +0.00% / -0.07% +0.68% +0.68%] index_add_ spread : Elapsed 0.015 ms (1.474 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.77% +0.63%] index_copy_ spread : Elapsed 0.014 ms (1.421 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.54% +0.61%] index_add_ strided 3 : Elapsed 0.015 ms (1.475 ms / 100) 1.427 -> 1.427 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.63% +0.63%] index_copy_ strided 3 : Elapsed 0.014 ms (1.428 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.20% +0.00% +0.07% / +0.07% +0.75% +0.61%] index_add_ strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.84% +0.77%] index_copy_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.61% +0.61%] index_add_ perm : Elapsed 0.015 ms (1.474 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.77% +0.70%] index_copy_ perm : Elapsed 0.014 ms (1.423 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_add_ perm_sorted : Elapsed 0.015 ms (1.474 ms / 100) 1.426 -> 1.427 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.70% +0.70%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.428 ms / 100) 8.194 -> 8.198 ( +0.05%) [ +0.16% +0.13% +0.00% / +0.13% +0.26% +0.05%] index_select const : Elapsed 0.082 ms (8.207 ms / 100) 8.217 -> 8.214 ( -0.04%) [ +0.00% +0.11% +0.00% / -0.04% +0.29% +0.24%] index_select wrap : Elapsed 0.082 ms (8.217 ms / 100) 8.214 -> 8.221 ( +0.09%) [ +0.43% +0.12% +0.00% / +0.09% +0.16% +0.13%] index_select linear : Elapsed 0.082 ms (8.249 ms / 100) 8.204 -> 8.189 ( -0.18%) [ +0.11% +0.22% +0.00% / -0.18% +0.09% +0.17%] index_select reverse : Elapsed 0.082 ms (8.213 ms / 100) 8.197 -> 8.193 ( -0.05%) [ +0.30% +0.00% +0.01% / -0.05% +0.18% +0.41%] index_select skip64 : Elapsed 0.082 ms (8.222 ms / 100) 8.206 -> 8.194 ( -0.15%) [ +0.07% +0.00% +0.06% / -0.15% +0.01% +0.11%] index_select skip256 : Elapsed 0.082 ms (8.212 ms / 100) 8.211 -> 8.213 ( +0.02%) [ +0.24% +0.00% +0.09% / +0.02% +0.28% +0.19%] index_select spread : Elapsed 0.082 ms (8.231 ms / 100) 8.215 -> 8.221 ( +0.07%) [ +0.15% +0.06% +0.00% / +0.10% +0.16% +0.07%] index_select strided 3 : Elapsed 0.082 ms (8.227 ms / 100) 8.209 -> 8.223 ( +0.17%) [ +0.12% +0.06% +0.00% / +0.19% +0.17% +0.38%] index_select random : Elapsed 0.082 ms (8.219 ms / 100) 8.222 -> 8.209 ( -0.16%) [ +0.00% +0.18% +0.17% / -0.16% +0.17% +0.19%] index_select random_sorted : Elapsed 0.082 ms (8.222 ms / 100) out_shape = [4, 5, 40, 16] in_shape = [4, 5, 20, 16] idx_dim = 2 B = [4, 5, 40, 16] (stride (3200, 1, 5, 200)) A = [4, 5, 20, 16] (stride (320, 1280, 16, 1)) dim = 2 2.460 -> 2.470 ( +0.41%) [ +0.16% +0.00% +0.04% / +0.41% +0.49% +0.65%] index_add_ linear : Elapsed 0.025 ms (2.464 ms / 100) 2.450 -> 2.462 ( +0.49%) [ +0.08% +0.00% +0.00% / +0.49% +0.69% +0.86%] index_copy_ linear : Elapsed 0.025 ms (2.452 ms / 100) 2.462 -> 2.472 ( +0.41%) [ +0.20% +0.28% +0.00% / +0.41% +0.53% +0.61%] index_add_ reverse : Elapsed 0.025 ms (2.467 ms / 100) 2.453 -> 2.465 ( +0.49%) [ +0.04% +0.12% +0.00% / +0.49% +0.69% +0.61%] index_copy_ reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.479 -> 2.491 ( +0.48%) [ +0.04% +0.12% +0.00% / +0.52% +0.61% +0.48%] index_add_ spread : Elapsed 0.025 ms (2.480 ms / 100) 2.478 -> 2.496 ( +0.73%) [ +0.12% +0.20% +0.00% / +0.73% +0.73% +0.81%] index_copy_ spread : Elapsed 0.025 ms (2.481 ms / 100) 2.476 -> 2.485 ( +0.36%) [ +0.12% +0.00% +0.08% / +0.40% +0.40% +0.36%] index_add_ strided 3 : Elapsed 0.025 ms (2.479 ms / 100) 2.476 -> 2.488 ( +0.48%) [ +0.04% +0.00% +0.08% / +0.48% +0.65% +0.48%] index_copy_ strided 3 : Elapsed 0.025 ms (2.477 ms / 100) 2.476 -> 2.485 ( +0.36%) [ +0.04% +0.00% +0.08% / +0.36% +0.44% +0.57%] index_add_ strided 7 : Elapsed 0.025 ms (2.477 ms / 100) 2.476 -> 2.486 ( +0.40%) [ +0.12% +0.00% +0.08% / +0.44% +0.40% +0.81%] index_copy_ strided 7 : Elapsed 0.025 ms (2.479 ms / 100) 2.470 -> 2.486 ( +0.65%) [ +0.24% +0.08% +0.00% / +0.65% +0.73% +0.65%] index_add_ perm : Elapsed 0.025 ms (2.476 ms / 100) 2.470 -> 2.480 ( +0.40%) [ +0.00% +0.04% +0.08% / +0.40% +0.77% +0.65%] index_copy_ perm : Elapsed 0.025 ms (2.470 ms / 100) 2.472 -> 2.487 ( +0.61%) [ +0.00% +0.20% +0.12% / +0.61% +0.77% +0.61%] index_add_ perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) 2.473 -> 2.482 ( +0.36%) [ +0.12% +0.12% +0.00% / +0.36% +0.57% +0.73%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.476 ms / 100) 4.490 -> 4.498 ( +0.18%) [ +0.20% +0.07% +0.00% / +0.18% +0.18% +0.20%] index_select const : Elapsed 0.045 ms (4.499 ms / 100) 4.504 -> 4.501 ( -0.07%) [ +0.02% +0.04% +0.00% / -0.07% +0.07% +0.13%] index_select wrap : Elapsed 0.045 ms (4.505 ms / 100) 4.503 -> 4.508 ( +0.11%) [ +0.09% +0.00% +0.11% / +0.13% +0.11% +0.24%] index_select linear : Elapsed 0.045 ms (4.507 ms / 100) 4.508 -> 4.508 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.02% +0.16%] index_select reverse : Elapsed 0.045 ms (4.508 ms / 100) 4.495 -> 4.498 ( +0.07%) [ +0.20% +0.22% +0.00% / +0.07% +0.27% +0.07%] index_select skip64 : Elapsed 0.045 ms (4.504 ms / 100) 4.492 -> 4.497 ( +0.11%) [ +0.13% +0.27% +0.00% / +0.13% +0.18% +0.11%] index_select skip256 : Elapsed 0.045 ms (4.498 ms / 100) 4.502 -> 4.510 ( +0.18%) [ +0.00% +0.09% +0.09% / +0.20% +0.18% +0.18%] index_select spread : Elapsed 0.045 ms (4.502 ms / 100) 4.504 -> 4.506 ( +0.04%) [ +0.00% +0.20% +0.18% / +0.20% +0.04% +0.18%] index_select strided 3 : Elapsed 0.045 ms (4.504 ms / 100) 4.494 -> 4.499 ( +0.11%) [ +0.00% +0.00% +0.31% / +0.11% +0.11% +0.31%] index_select strided 5 : Elapsed 0.045 ms (4.494 ms / 100) 4.500 -> 4.510 ( +0.22%) [ +0.00% +0.29% +0.07% / +0.22% +0.29% +0.29%] index_select strided 7 : Elapsed 0.045 ms (4.500 ms / 100) 4.499 -> 4.498 ( -0.02%) [ +0.00% +0.09% +0.02% / -0.02% +0.13% +0.18%] index_select strided 8 : Elapsed 0.045 ms (4.499 ms / 100) 4.497 -> 4.498 ( +0.02%) [ +0.07% +0.00% +0.04% / +0.02% +0.04% +0.22%] index_select strided 16 : Elapsed 0.045 ms (4.500 ms / 100) 4.501 -> 4.507 ( +0.13%) [ +0.00% +0.24% +0.04% / +0.20% +0.27% +0.13%] index_select random : Elapsed 0.045 ms (4.501 ms / 100) 4.506 -> 4.508 ( +0.04%) [ +0.11% +0.00% +0.00% / +0.04% +0.24% +0.16%] index_select random_sorted : Elapsed 0.045 ms (4.511 ms / 100) B = [4, 5, 40, 16] (stride (1, 2560, 4, 160)) A = [4, 5, 20, 16] (stride (320, 1280, 16, 1)) dim = 2 2.460 -> 2.470 ( +0.41%) [ +0.12% +0.00% +0.00% / +0.41% +0.65% +0.65%] index_add_ linear : Elapsed 0.025 ms (2.463 ms / 100) 2.448 -> 2.459 ( +0.45%) [ +0.12% +0.00% +0.33% / +0.45% +0.78% +0.78%] index_copy_ linear : Elapsed 0.025 ms (2.451 ms / 100) 2.453 -> 2.462 ( +0.37%) [ +0.08% +0.08% +0.00% / +0.37% +0.98% +1.06%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.443 -> 2.459 ( +0.65%) [ +0.00% +0.04% +0.08% / +0.65% +1.06% +0.94%] index_copy_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.473 -> 2.480 ( +0.28%) [ +0.08% +0.04% +0.00% / +0.28% +1.01% +0.81%] index_add_ spread : Elapsed 0.025 ms (2.475 ms / 100) 2.474 -> 2.486 ( +0.49%) [ +0.04% +0.00% +0.00% / +0.49% +1.09% +0.89%] index_copy_ spread : Elapsed 0.025 ms (2.475 ms / 100) 2.470 -> 2.484 ( +0.57%) [ +0.12% +0.24% +0.00% / +0.65% +0.69% +0.57%] index_add_ strided 3 : Elapsed 0.025 ms (2.473 ms / 100) 2.470 -> 2.483 ( +0.53%) [ +0.00% +0.08% +0.00% / +0.53% +0.85% +0.77%] index_copy_ strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.471 -> 2.481 ( +0.40%) [ +0.00% +0.24% +0.16% / +0.40% +0.61% +0.69%] index_add_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.476 -> 2.485 ( +0.36%) [ +0.00% +0.04% +0.12% / +0.36% +0.48% +0.65%] index_copy_ strided 7 : Elapsed 0.025 ms (2.476 ms / 100) 2.469 -> 2.474 ( +0.20%) [ +0.04% +0.08% +0.00% / +0.65% +0.36% +0.20%] index_add_ perm : Elapsed 0.025 ms (2.470 ms / 100) 2.468 -> 2.476 ( +0.32%) [ +0.04% +0.04% +0.00% / +0.49% +0.36% +0.32%] index_copy_ perm : Elapsed 0.025 ms (2.469 ms / 100) 2.472 -> 2.478 ( +0.24%) [ +0.04% +0.00% +0.00% / +0.61% +0.24% +0.44%] index_add_ perm_sorted : Elapsed 0.025 ms (2.473 ms / 100) 2.469 -> 2.476 ( +0.28%) [ +0.04% +0.28% +0.00% / +0.57% +0.28% +0.57%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.470 ms / 100) 4.498 -> 4.497 ( -0.02%) [ +0.04% +0.16% +0.00% / -0.02% +0.02% +0.11%] index_select const : Elapsed 0.045 ms (4.500 ms / 100) 4.510 -> 4.503 ( -0.16%) [ +0.04% +0.00% +0.00% / -0.02% +0.13% -0.16%] index_select wrap : Elapsed 0.045 ms (4.512 ms / 100) 4.507 -> 4.509 ( +0.04%) [ +0.00% +0.13% +0.20% / +0.22% +0.04% +0.13%] index_select linear : Elapsed 0.045 ms (4.507 ms / 100) 4.503 -> 4.505 ( +0.04%) [ +0.24% +0.24% +0.00% / +0.09% +0.04% +0.31%] index_select reverse : Elapsed 0.045 ms (4.514 ms / 100) 4.498 -> 4.496 ( -0.04%) [ +0.04% +0.11% +0.00% / +0.07% +0.09% -0.04%] index_select skip64 : Elapsed 0.045 ms (4.500 ms / 100) 4.499 -> 4.493 ( -0.13%) [ +0.02% +0.00% +0.02% / +0.07% -0.13% +0.11%] index_select skip256 : Elapsed 0.045 ms (4.500 ms / 100) 4.502 -> 4.512 ( +0.22%) [ +0.02% +0.00% +0.16% / +0.22% +0.24% +0.33%] index_select spread : Elapsed 0.045 ms (4.503 ms / 100) 4.506 -> 4.512 ( +0.13%) [ +0.02% +0.13% +0.00% / +0.13% +0.22% +0.27%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.494 -> 4.500 ( +0.13%) [ +0.18% +0.00% +0.09% / +0.13% +0.18% +0.16%] index_select strided 5 : Elapsed 0.045 ms (4.502 ms / 100) 4.503 -> 4.506 ( +0.07%) [ +0.18% +0.00% +0.07% / +0.07% +0.29% +0.22%] index_select strided 7 : Elapsed 0.045 ms (4.511 ms / 100) 4.498 -> 4.500 ( +0.04%) [ +0.02% +0.09% +0.00% / +0.04% +0.07% +0.22%] index_select strided 8 : Elapsed 0.045 ms (4.499 ms / 100) 4.499 -> 4.500 ( +0.02%) [ +0.18% +0.13% +0.00% / +0.09% +0.02% +0.18%] index_select strided 16 : Elapsed 0.045 ms (4.507 ms / 100) 4.508 -> 4.506 ( -0.04%) [ +0.02% +0.02% +0.00% / +0.22% +0.09% -0.04%] index_select random : Elapsed 0.045 ms (4.509 ms / 100) 4.508 -> 4.506 ( -0.04%) [ +0.00% +0.13% +0.16% / +0.18% -0.04% +0.18%] index_select random_sorted : Elapsed 0.045 ms (4.508 ms / 100) B = [4, 5, 40, 16] (stride (80, 1, 320, 5)) A = [4, 5, 20, 16] (stride (1600, 20, 1, 100)) dim = 2 2.439 -> 2.450 ( +0.45%) [ +0.25% +0.00% +0.16% / +0.45% +0.78% +0.90%] index_add_ linear : Elapsed 0.024 ms (2.445 ms / 100) 2.437 -> 2.453 ( +0.66%) [ +0.00% +0.37% +0.16% / +0.66% +0.94% +0.86%] index_copy_ linear : Elapsed 0.024 ms (2.437 ms / 100) 2.439 -> 2.450 ( +0.45%) [ +0.00% +0.08% +0.08% / +0.45% +0.82% +0.74%] index_add_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.439 -> 2.455 ( +0.66%) [ +0.00% +0.12% +0.37% / +0.66% +0.78% +0.66%] index_copy_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.444 -> 2.453 ( +0.37%) [ +0.04% +0.04% +0.00% / +0.37% +0.49% +0.45%] index_add_ spread : Elapsed 0.024 ms (2.445 ms / 100) 2.441 -> 2.455 ( +0.57%) [ +0.16% +0.08% +0.00% / +0.57% +0.78% +0.61%] index_copy_ spread : Elapsed 0.024 ms (2.445 ms / 100) 2.446 -> 2.451 ( +0.20%) [ +0.00% +0.20% +0.08% / +0.20% +0.41% +0.33%] index_add_ strided 3 : Elapsed 0.024 ms (2.446 ms / 100) 2.444 -> 2.455 ( +0.45%) [ +0.00% +0.04% +0.12% / +0.53% +0.49% +0.45%] index_copy_ strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.445 -> 2.457 ( +0.49%) [ +0.12% +0.16% +0.00% / +0.49% +0.57% +0.53%] index_add_ strided 7 : Elapsed 0.024 ms (2.448 ms / 100) 2.440 -> 2.452 ( +0.49%) [ +0.08% +0.00% +0.16% / +0.78% +0.49% +0.57%] index_copy_ strided 7 : Elapsed 0.024 ms (2.442 ms / 100) 2.444 -> 2.456 ( +0.49%) [ +0.00% +0.16% +0.12% / +0.53% +0.49% +0.61%] index_add_ perm : Elapsed 0.024 ms (2.444 ms / 100) 2.439 -> 2.453 ( +0.57%) [ +0.21% +0.00% +0.21% / +0.57% +0.66% +0.86%] index_copy_ perm : Elapsed 0.024 ms (2.444 ms / 100) 2.439 -> 2.455 ( +0.66%) [ +0.00% +0.16% +0.12% / +0.66% +0.90% +0.78%] index_add_ perm_sorted : Elapsed 0.024 ms (2.439 ms / 100) 2.439 -> 2.452 ( +0.53%) [ +0.12% +0.00% +0.00% / +0.53% +0.82% +0.86%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.442 ms / 100) 4.496 -> 4.495 ( -0.02%) [ +0.04% +0.00% +0.00% / +0.02% -0.02% +0.09%] index_select const : Elapsed 0.045 ms (4.498 ms / 100) 4.497 -> 4.498 ( +0.02%) [ +0.11% +0.20% +0.00% / +0.13% +0.02% +0.29%] index_select wrap : Elapsed 0.045 ms (4.502 ms / 100) 4.495 -> 4.497 ( +0.04%) [ +0.18% +0.18% +0.00% / +0.22% +0.04% +0.18%] index_select linear : Elapsed 0.045 ms (4.503 ms / 100) 4.495 -> 4.501 ( +0.13%) [ +0.11% +0.16% +0.00% / +0.13% +0.22% +0.36%] index_select reverse : Elapsed 0.045 ms (4.500 ms / 100) 4.489 -> 4.499 ( +0.22%) [ +0.16% +0.00% +0.09% / +0.22% +0.29% +0.27%] index_select skip64 : Elapsed 0.045 ms (4.496 ms / 100) 4.486 -> 4.491 ( +0.11%) [ +0.00% +0.29% +0.09% / +0.11% +0.29% +0.40%] index_select skip256 : Elapsed 0.045 ms (4.486 ms / 100) 4.503 -> 4.497 ( -0.13%) [ +0.00% +0.07% +0.00% / -0.13% +0.02% +0.07%] index_select spread : Elapsed 0.045 ms (4.503 ms / 100) 4.496 -> 4.501 ( +0.11%) [ +0.11% +0.13% +0.00% / +0.16% +0.11% +0.18%] index_select strided 3 : Elapsed 0.045 ms (4.501 ms / 100) 4.493 -> 4.499 ( +0.13%) [ +0.00% +0.09% +0.18% / +0.20% +0.13% +0.29%] index_select strided 5 : Elapsed 0.045 ms (4.493 ms / 100) 4.493 -> 4.501 ( +0.18%) [ +0.20% +0.07% +0.00% / +0.18% +0.29% +0.31%] index_select strided 7 : Elapsed 0.045 ms (4.502 ms / 100) 4.497 -> 4.494 ( -0.07%) [ +0.09% +0.00% +0.02% / -0.07% +0.33% +0.24%] index_select strided 8 : Elapsed 0.045 ms (4.501 ms / 100) 4.495 -> 4.497 ( +0.04%) [ +0.02% +0.09% +0.00% / +0.04% +0.18% +0.38%] index_select strided 16 : Elapsed 0.045 ms (4.496 ms / 100) 4.495 -> 4.500 ( +0.11%) [ +0.00% +0.09% +0.13% / +0.11% +0.16% +0.18%] index_select random : Elapsed 0.045 ms (4.495 ms / 100) 4.501 -> 4.502 ( +0.02%) [ +0.09% +0.00% +0.04% / +0.02% +0.20% +0.29%] index_select random_sorted : Elapsed 0.045 ms (4.505 ms / 100) B = [4, 5, 40, 16] (stride (5, 1, 20, 800)) A = [4, 5, 20, 16] (stride (80, 16, 320, 1)) dim = 2 1.532 -> 1.501 ( -2.02%) [ +0.20% +0.26% +0.00% / -2.02% -1.76% -1.89%] index_add_ linear : Elapsed 0.015 ms (1.535 ms / 100) 1.492 -> 1.458 ( -2.28%) [ +0.00% +0.40% +0.07% / -2.28% -2.14% -1.94%] index_copy_ linear : Elapsed 0.015 ms (1.492 ms / 100) 1.534 -> 1.502 ( -2.09%) [ +0.07% +0.33% +0.00% / -1.83% -2.09% -2.09%] index_add_ reverse : Elapsed 0.015 ms (1.535 ms / 100) 1.498 -> 1.459 ( -2.60%) [ +0.07% +0.00% +0.07% / -2.20% -2.40% -2.60%] index_copy_ reverse : Elapsed 0.015 ms (1.499 ms / 100) 1.532 -> 1.500 ( -2.09%) [ +0.00% +0.13% +0.07% / -2.09% -1.76% -1.83%] index_add_ spread : Elapsed 0.015 ms (1.532 ms / 100) 1.499 -> 1.464 ( -2.33%) [ +0.00% +0.27% +0.40% / -2.33% -2.33% -2.27%] index_copy_ spread : Elapsed 0.015 ms (1.499 ms / 100) 1.536 -> 1.502 ( -2.21%) [ +0.00% +0.00% +0.00% / -2.15% -2.21% -2.15%] index_add_ strided 3 : Elapsed 0.015 ms (1.536 ms / 100) 1.499 -> 1.460 ( -2.60%) [ +0.20% +0.13% +0.00% / -2.40% -2.54% -2.60%] index_copy_ strided 3 : Elapsed 0.015 ms (1.502 ms / 100) 1.531 -> 1.505 ( -1.70%) [ +0.07% +0.33% +0.00% / -1.70% -1.70% -1.50%] index_add_ strided 7 : Elapsed 0.015 ms (1.532 ms / 100) 1.501 -> 1.467 ( -2.27%) [ +0.00% +0.27% +0.13% / -2.27% -2.20% -2.27%] index_copy_ strided 7 : Elapsed 0.015 ms (1.501 ms / 100) 1.527 -> 1.503 ( -1.57%) [ +0.20% +0.39% +0.00% / -1.57% -1.11% -1.31%] index_add_ perm : Elapsed 0.015 ms (1.530 ms / 100) 1.492 -> 1.460 ( -2.14%) [ +0.27% +0.40% +0.00% / -2.14% -1.47% -1.47%] index_copy_ perm : Elapsed 0.015 ms (1.496 ms / 100) 1.532 -> 1.503 ( -1.89%) [ +0.07% +0.00% +0.07% / -1.89% -1.63% -1.57%] index_add_ perm_sorted : Elapsed 0.015 ms (1.533 ms / 100) 1.495 -> 1.461 ( -2.27%) [ +0.00% +0.07% +0.13% / -2.27% -1.94% -2.01%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.495 ms / 100) 2.876 -> 2.883 ( +0.24%) [ +0.31% +0.17% +0.00% / +0.59% +0.24% +0.31%] index_select const : Elapsed 0.029 ms (2.885 ms / 100) 2.889 -> 2.890 ( +0.03%) [ +0.00% +0.10% +0.03% / +0.03% +0.55% +0.55%] index_select wrap : Elapsed 0.029 ms (2.889 ms / 100) 2.891 -> 2.892 ( +0.03%) [ +0.00% +0.03% +0.17% / +0.03% +0.66% +0.66%] index_select linear : Elapsed 0.029 ms (2.891 ms / 100) 2.883 -> 2.884 ( +0.03%) [ +0.03% +0.00% +0.14% / +0.28% +0.03% +0.03%] index_select reverse : Elapsed 0.029 ms (2.884 ms / 100) 2.880 -> 2.881 ( +0.03%) [ +0.10% +0.00% +0.07% / +0.07% +0.14% +0.03%] index_select skip64 : Elapsed 0.029 ms (2.883 ms / 100) 2.898 -> 2.882 ( -0.55%) [ +0.21% +0.00% +0.21% / +0.31% -0.55% -0.41%] index_select skip256 : Elapsed 0.029 ms (2.904 ms / 100) 2.887 -> 2.878 ( -0.31%) [ +0.00% +0.24% +0.31% / -0.03% -0.14% -0.31%] index_select spread : Elapsed 0.029 ms (2.887 ms / 100) 2.887 -> 2.887 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.10% +0.07%] index_select strided 3 : Elapsed 0.029 ms (2.892 ms / 100) 2.881 -> 2.879 ( -0.07%) [ +0.00% +0.03% +0.03% / +0.00% +0.07% -0.07%] index_select strided 5 : Elapsed 0.029 ms (2.881 ms / 100) 2.884 -> 2.880 ( -0.14%) [ +0.17% +0.00% +0.21% / +0.42% +0.10% -0.14%] index_select strided 7 : Elapsed 0.029 ms (2.889 ms / 100) 2.897 -> 2.884 ( -0.45%) [ +0.00% +0.14% +0.14% / +0.14% -0.45% -0.21%] index_select strided 8 : Elapsed 0.029 ms (2.897 ms / 100) 2.899 -> 2.885 ( -0.48%) [ +0.07% +0.00% +0.17% / +0.03% -0.31% -0.48%] index_select strided 16 : Elapsed 0.029 ms (2.901 ms / 100) 2.886 -> 2.884 ( -0.07%) [ +0.00% +0.10% +0.24% / +0.07% +0.03% -0.07%] index_select random : Elapsed 0.029 ms (2.886 ms / 100) 2.889 -> 2.882 ( -0.24%) [ +0.00% +0.03% +0.17% / -0.17% -0.24% -0.07%] index_select random_sorted : Elapsed 0.029 ms (2.889 ms / 100) out_shape = [4, 5, 20, 40] in_shape = [4, 5, 20, 16] idx_dim = 3 B = [4, 5, 20, 40] (stride (1, 3200, 4, 80)) A = [4, 5, 20, 16] (stride (80, 16, 320, 1)) dim = 3 4.004 -> 4.012 ( +0.20%) [ +0.00% +0.27% +0.00% / +0.20% +0.60% +0.50%] index_add_ linear : Elapsed 0.040 ms (4.004 ms / 100) 3.844 -> 3.855 ( +0.29%) [ +0.00% +0.34% +0.08% / +0.29% +0.75% +0.62%] index_copy_ linear : Elapsed 0.038 ms (3.844 ms / 100) 4.026 -> 4.025 ( -0.02%) [ +0.02% +0.00% +0.10% / -0.02% +0.57% +0.55%] index_add_ reverse : Elapsed 0.040 ms (4.027 ms / 100) 3.859 -> 3.860 ( +0.03%) [ +0.00% +0.00% +0.08% / +0.03% +0.62% +0.70%] index_copy_ reverse : Elapsed 0.039 ms (3.859 ms / 100) 4.011 -> 4.028 ( +0.42%) [ +0.00% +0.40% +0.07% / +0.42% +0.67% +0.55%] index_add_ spread : Elapsed 0.040 ms (4.011 ms / 100) 3.857 -> 3.873 ( +0.41%) [ +0.00% +0.34% +0.16% / +0.41% +0.73% +0.62%] index_copy_ spread : Elapsed 0.039 ms (3.857 ms / 100) 4.017 -> 4.028 ( +0.27%) [ +0.02% +0.05% +0.00% / +0.27% +0.82% +0.37%] index_add_ strided 3 : Elapsed 0.040 ms (4.018 ms / 100) 3.860 -> 3.875 ( +0.39%) [ +0.08% +0.00% +0.13% / +0.47% +1.22% +0.39%] index_copy_ strided 3 : Elapsed 0.039 ms (3.863 ms / 100) 4.025 -> 4.028 ( +0.07%) [ +0.00% +0.12% +0.15% / +0.07% +0.50% +0.60%] index_add_ strided 7 : Elapsed 0.040 ms (4.025 ms / 100) 3.861 -> 3.863 ( +0.05%) [ +0.00% +0.10% +0.10% / +0.05% +0.52% +0.57%] index_copy_ strided 7 : Elapsed 0.039 ms (3.861 ms / 100) 4.007 -> 4.009 ( +0.05%) [ +0.00% +0.20% +0.25% / +0.05% +0.47% +0.45%] index_add_ perm : Elapsed 0.040 ms (4.007 ms / 100) 3.850 -> 3.857 ( +0.18%) [ +0.00% +0.16% +0.21% / +0.18% +0.36% +0.34%] index_copy_ perm : Elapsed 0.038 ms (3.850 ms / 100) 4.015 -> 4.014 ( -0.02%) [ +0.02% +0.12% +0.00% / -0.02% +0.42% +0.20%] index_add_ perm_sorted : Elapsed 0.040 ms (4.016 ms / 100) 3.851 -> 3.852 ( +0.03%) [ +0.18% +0.23% +0.00% / +0.03% +0.44% +0.29%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.858 ms / 100) 5.480 -> 5.477 ( -0.05%) [ +0.11% +0.02% +0.00% / -0.05% +0.22% +0.18%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.479 -> 5.482 ( +0.05%) [ +0.09% +0.18% +0.00% / +0.05% +0.22% +0.18%] index_select wrap : Elapsed 0.055 ms (5.484 ms / 100) 5.483 -> 5.483 ( +0.00%) [ +0.05% +0.00% +0.20% / +0.00% +0.11% +0.00%] index_select linear : Elapsed 0.055 ms (5.486 ms / 100) 5.483 -> 5.486 ( +0.05%) [ +0.16% +0.07% +0.00% / +0.15% +0.27% +0.05%] index_select reverse : Elapsed 0.055 ms (5.492 ms / 100) 5.486 -> 5.479 ( -0.13%) [ +0.02% +0.05% +0.00% / +0.02% -0.13% +0.00%] index_select skip64 : Elapsed 0.055 ms (5.487 ms / 100) 5.483 -> 5.483 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.00% +0.04% +0.05%] index_select skip256 : Elapsed 0.055 ms (5.483 ms / 100) 5.484 -> 5.485 ( +0.02%) [ +0.02% +0.00% +0.16% / +0.02% +0.04% +0.13%] index_select spread : Elapsed 0.055 ms (5.485 ms / 100) 5.488 -> 5.487 ( -0.02%) [ +0.04% +0.00% +0.05% / +0.00% +0.07% -0.02%] index_select strided 3 : Elapsed 0.055 ms (5.490 ms / 100) 5.479 -> 5.487 ( +0.15%) [ +0.04% +0.00% +0.13% / +0.16% +0.15% +0.15%] index_select strided 5 : Elapsed 0.055 ms (5.481 ms / 100) 5.485 -> 5.479 ( -0.11%) [ +0.05% +0.15% +0.00% / -0.11% +0.22% +0.05%] index_select strided 7 : Elapsed 0.055 ms (5.488 ms / 100) 5.482 -> 5.482 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.09% +0.00% +0.09%] index_select strided 8 : Elapsed 0.055 ms (5.482 ms / 100) 5.481 -> 5.485 ( +0.07%) [ +0.00% +0.16% +0.22% / +0.07% +0.26% +0.11%] index_select random : Elapsed 0.055 ms (5.481 ms / 100) 5.484 -> 5.479 ( -0.09%) [ +0.13% +0.00% +0.02% / -0.09% +0.07% +0.07%] index_select random_sorted : Elapsed 0.055 ms (5.491 ms / 100) B = [4, 5, 20, 40] (stride (200, 40, 800, 1)) A = [4, 5, 20, 16] (stride (1, 4, 320, 20)) dim = 3 3.992 -> 3.991 ( -0.03%) [ +0.00% +0.05% +0.05% / -0.03% +0.45% +0.68%] index_add_ linear : Elapsed 0.040 ms (3.992 ms / 100) 3.826 -> 3.826 ( +0.00%) [ +0.00% +0.05% +0.18% / +0.00% +0.73% +0.65%] index_copy_ linear : Elapsed 0.038 ms (3.826 ms / 100) 4.003 -> 4.018 ( +0.37%) [ +0.00% +0.35% +0.55% / +0.37% +0.52% +0.50%] index_add_ reverse : Elapsed 0.040 ms (4.003 ms / 100) 3.838 -> 3.849 ( +0.29%) [ +0.00% +0.26% +0.42% / +0.29% +0.65% +0.70%] index_copy_ reverse : Elapsed 0.038 ms (3.838 ms / 100) 4.010 -> 4.009 ( -0.02%) [ +0.00% +0.02% +0.05% / -0.02% +0.52% +0.60%] index_add_ spread : Elapsed 0.040 ms (4.010 ms / 100) 3.839 -> 3.837 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.52% +0.57%] index_copy_ spread : Elapsed 0.038 ms (3.839 ms / 100) 3.995 -> 3.994 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.63% +0.68%] index_add_ strided 3 : Elapsed 0.040 ms (3.995 ms / 100) 3.826 -> 3.832 ( +0.16%) [ +0.00% +0.08% +0.37% / +0.16% +0.68% +0.76%] index_copy_ strided 3 : Elapsed 0.038 ms (3.826 ms / 100) 3.996 -> 3.996 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.83% +1.18%] index_add_ strided 7 : Elapsed 0.040 ms (3.998 ms / 100) 3.835 -> 3.838 ( +0.08%) [ +0.00% +0.03% +0.00% / +0.08% +0.76% +1.02%] index_copy_ strided 7 : Elapsed 0.038 ms (3.835 ms / 100) 3.984 -> 3.982 ( -0.05%) [ +0.03% +0.00% +0.20% / -0.05% +0.95% +0.75%] index_add_ perm : Elapsed 0.040 ms (3.985 ms / 100) 3.825 -> 3.821 ( -0.10%) [ +0.00% +0.05% +0.08% / -0.10% +0.78% +0.65%] index_copy_ perm : Elapsed 0.038 ms (3.825 ms / 100) 3.988 -> 3.993 ( +0.13%) [ +0.13% +0.18% +0.00% / +0.13% +0.83% +0.83%] index_add_ perm_sorted : Elapsed 0.040 ms (3.993 ms / 100) 3.827 -> 3.827 ( +0.00%) [ +0.03% +0.18% +0.00% / +0.00% +0.71% +0.65%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.828 ms / 100) 5.478 -> 5.476 ( -0.04%) [ +0.18% +0.00% +0.11% / -0.04% -0.02% +0.09%] index_select const : Elapsed 0.055 ms (5.488 ms / 100) 5.486 -> 5.485 ( -0.02%) [ +0.11% +0.18% +0.00% / +0.09% +0.02% -0.02%] index_select wrap : Elapsed 0.055 ms (5.492 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.04% +0.00% +0.00%] index_select linear : Elapsed 0.055 ms (5.492 ms / 100) 5.490 -> 5.484 ( -0.11%) [ +0.11% +0.15% +0.00% / +0.13% -0.04% -0.11%] index_select reverse : Elapsed 0.055 ms (5.496 ms / 100) 5.472 -> 5.479 ( +0.13%) [ +0.07% +0.00% +0.04% / +0.13% +0.16% +0.15%] index_select skip64 : Elapsed 0.055 ms (5.476 ms / 100) 5.475 -> 5.476 ( +0.02%) [ +0.07% +0.00% +0.13% / +0.11% +0.07% +0.02%] index_select skip256 : Elapsed 0.055 ms (5.479 ms / 100) 5.486 -> 5.484 ( -0.04%) [ +0.00% +0.26% +0.02% / +0.09% +0.00% -0.04%] index_select spread : Elapsed 0.055 ms (5.486 ms / 100) 5.487 -> 5.483 ( -0.07%) [ +0.00% +0.00% +0.04% / +0.04% +0.02% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.487 ms / 100) 5.484 -> 5.483 ( -0.02%) [ +0.00% +0.13% +0.16% / +0.00% -0.02% -0.02%] index_select strided 5 : Elapsed 0.055 ms (5.484 ms / 100) 5.484 -> 5.487 ( +0.05%) [ +0.13% +0.07% +0.00% / +0.05% +0.11% +0.07%] index_select strided 7 : Elapsed 0.055 ms (5.491 ms / 100) 5.476 -> 5.480 ( +0.07%) [ +0.02% +0.00% +0.18% / +0.07% +0.15% +0.22%] index_select strided 8 : Elapsed 0.055 ms (5.477 ms / 100) 5.485 -> 5.481 ( -0.07%) [ +0.00% +0.22% +0.20% / +0.09% -0.07% +0.09%] index_select random : Elapsed 0.055 ms (5.485 ms / 100) 5.488 -> 5.485 ( -0.05%) [ +0.00% +0.05% +0.24% / +0.04% +0.04% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.488 ms / 100) B = [4, 5, 20, 40] (stride (200, 1, 800, 5)) A = [4, 5, 20, 16] (stride (5, 1, 320, 20)) dim = 3 3.958 -> 3.960 ( +0.05%) [ +0.10% +0.00% +0.03% / +0.05% +0.63% +0.66%] index_add_ linear : Elapsed 0.040 ms (3.962 ms / 100) 3.803 -> 3.810 ( +0.18%) [ +0.13% +0.00% +0.18% / +0.18% +0.63% +0.71%] index_copy_ linear : Elapsed 0.038 ms (3.808 ms / 100) 3.955 -> 3.955 ( +0.00%) [ +0.20% +0.00% +0.23% / +0.00% +0.71% +0.56%] index_add_ reverse : Elapsed 0.040 ms (3.963 ms / 100) 3.813 -> 3.812 ( -0.03%) [ +0.00% +0.00% +0.08% / -0.03% +0.60% +0.52%] index_copy_ reverse : Elapsed 0.038 ms (3.813 ms / 100) 3.951 -> 3.952 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.51% +0.53%] index_add_ spread : Elapsed 0.040 ms (3.951 ms / 100) 3.806 -> 3.807 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.55% +0.53%] index_copy_ spread : Elapsed 0.038 ms (3.806 ms / 100) 3.943 -> 3.946 ( +0.08%) [ +0.00% +0.10% +0.13% / +0.08% +0.63% +0.61%] index_add_ strided 3 : Elapsed 0.039 ms (3.943 ms / 100) 3.809 -> 3.811 ( +0.05%) [ +0.00% +0.05% +0.13% / +0.05% +0.68% +0.58%] index_copy_ strided 3 : Elapsed 0.038 ms (3.809 ms / 100) 3.962 -> 3.964 ( +0.05%) [ +0.00% +0.15% +0.03% / +0.05% +0.45% +0.50%] index_add_ strided 7 : Elapsed 0.040 ms (3.962 ms / 100) 3.820 -> 3.823 ( +0.08%) [ +0.00% +0.08% +0.03% / +0.08% +0.39% +0.39%] index_copy_ strided 7 : Elapsed 0.038 ms (3.820 ms / 100) 3.962 -> 3.961 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.50% +0.50%] index_add_ perm : Elapsed 0.040 ms (3.963 ms / 100) 3.809 -> 3.810 ( +0.03%) [ +0.00% +0.05% +0.05% / +0.03% +0.53% +0.45%] index_copy_ perm : Elapsed 0.038 ms (3.809 ms / 100) 3.960 -> 3.965 ( +0.13%) [ +0.10% +0.20% +0.00% / +0.13% +0.51% +0.43%] index_add_ perm_sorted : Elapsed 0.040 ms (3.964 ms / 100) 3.807 -> 3.813 ( +0.16%) [ +0.11% +0.13% +0.00% / +0.16% +0.50% +0.45%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.811 ms / 100) 5.486 -> 5.491 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.15% +0.09% +0.20%] index_select const : Elapsed 0.055 ms (5.491 ms / 100) 5.494 -> 5.499 ( +0.09%) [ +0.04% +0.00% +0.05% / +0.13% +0.24% +0.09%] index_select wrap : Elapsed 0.055 ms (5.496 ms / 100) 5.497 -> 5.499 ( +0.04%) [ +0.11% +0.00% +0.02% / +0.04% +0.09% +0.11%] index_select linear : Elapsed 0.055 ms (5.503 ms / 100) 5.499 -> 5.499 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.05% +0.07%] index_select reverse : Elapsed 0.055 ms (5.499 ms / 100) 5.494 -> 5.484 ( -0.18%) [ +0.00% +0.02% +0.00% / -0.09% -0.18% -0.09%] index_select skip64 : Elapsed 0.055 ms (5.494 ms / 100) 5.493 -> 5.487 ( -0.11%) [ +0.07% +0.00% +0.07% / +0.00% -0.07% -0.11%] index_select skip256 : Elapsed 0.055 ms (5.497 ms / 100) 5.497 -> 5.493 ( -0.07%) [ +0.02% +0.07% +0.00% / +0.13% +0.00% -0.07%] index_select spread : Elapsed 0.055 ms (5.498 ms / 100) 5.493 -> 5.495 ( +0.04%) [ +0.05% +0.13% +0.00% / +0.04% +0.04% +0.15%] index_select strided 3 : Elapsed 0.055 ms (5.496 ms / 100) 5.493 -> 5.495 ( +0.04%) [ +0.05% +0.09% +0.00% / +0.04% +0.11% +0.15%] index_select strided 5 : Elapsed 0.055 ms (5.496 ms / 100) 5.493 -> 5.492 ( -0.02%) [ +0.00% +0.07% +0.11% / -0.02% +0.09% +0.18%] index_select strided 7 : Elapsed 0.055 ms (5.493 ms / 100) 5.490 -> 5.491 ( +0.02%) [ +0.00% +0.04% +0.11% / +0.02% +0.02% +0.07%] index_select strided 8 : Elapsed 0.055 ms (5.490 ms / 100) 5.490 -> 5.499 ( +0.16%) [ +0.13% +0.00% +0.15% / +0.26% +0.16% +0.29%] index_select random : Elapsed 0.055 ms (5.497 ms / 100) 5.491 -> 5.495 ( +0.07%) [ +0.20% +0.05% +0.00% / +0.07% +0.11% +0.20%] index_select random_sorted : Elapsed 0.055 ms (5.502 ms / 100) B = [4, 5, 20, 40] (stride (20, 80, 1, 400)) A = [4, 5, 20, 16] (stride (1600, 20, 1, 100)) dim = 3 3.633 -> 3.635 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.85% +0.85%] index_add_ linear : Elapsed 0.036 ms (3.633 ms / 100) 3.507 -> 3.508 ( +0.03%) [ +0.00% +0.06% +0.06% / +0.03% +0.83% +0.80%] index_copy_ linear : Elapsed 0.035 ms (3.507 ms / 100) 3.628 -> 3.629 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.69% +0.69%] index_add_ reverse : Elapsed 0.036 ms (3.629 ms / 100) 3.496 -> 3.497 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.97% +0.97%] index_copy_ reverse : Elapsed 0.035 ms (3.497 ms / 100) 3.628 -> 3.628 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.63% +0.63%] index_add_ spread : Elapsed 0.036 ms (3.628 ms / 100) 3.500 -> 3.502 ( +0.06%) [ +0.06% +0.00% +0.03% / +0.06% +0.74% +0.91%] index_copy_ spread : Elapsed 0.035 ms (3.502 ms / 100) 3.622 -> 3.623 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.80% +0.80%] index_add_ strided 3 : Elapsed 0.036 ms (3.623 ms / 100) 3.496 -> 3.496 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.72% +0.92%] index_copy_ strided 3 : Elapsed 0.035 ms (3.496 ms / 100) 3.626 -> 3.627 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.77% +0.77%] index_add_ strided 7 : Elapsed 0.036 ms (3.627 ms / 100) 3.496 -> 3.498 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.92% +0.94%] index_copy_ strided 7 : Elapsed 0.035 ms (3.497 ms / 100) 3.636 -> 3.633 ( -0.08%) [ +0.03% +0.00% +0.00% / -0.08% +0.77% +0.72%] index_add_ perm : Elapsed 0.036 ms (3.637 ms / 100) 3.509 -> 3.507 ( -0.06%) [ +0.00% +0.00% +0.03% / -0.06% +0.77% +0.77%] index_copy_ perm : Elapsed 0.035 ms (3.509 ms / 100) 3.624 -> 3.624 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.77% +0.80%] index_add_ perm_sorted : Elapsed 0.036 ms (3.624 ms / 100) 3.493 -> 3.494 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +1.03% +1.12%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.494 ms / 100) 5.475 -> 5.477 ( +0.04%) [ +0.15% +0.00% +0.11% / +0.16% +0.04% +0.05%] index_select const : Elapsed 0.055 ms (5.483 ms / 100) 5.489 -> 5.483 ( -0.11%) [ +0.13% +0.11% +0.00% / +0.09% -0.11% -0.09%] index_select wrap : Elapsed 0.055 ms (5.496 ms / 100) 5.491 -> 5.484 ( -0.13%) [ +0.04% +0.00% +0.00% / +0.05% -0.13% +0.05%] index_select linear : Elapsed 0.055 ms (5.493 ms / 100) 5.486 -> 5.482 ( -0.07%) [ +0.09% +0.15% +0.00% / +0.11% -0.07% +0.00%] index_select reverse : Elapsed 0.055 ms (5.491 ms / 100) 5.477 -> 5.481 ( +0.07%) [ +0.00% +0.02% +0.11% / +0.16% +0.07% +0.11%] index_select skip64 : Elapsed 0.055 ms (5.477 ms / 100) 5.475 -> 5.472 ( -0.05%) [ +0.26% +0.09% +0.00% / -0.05% +0.13% +0.18%] index_select skip256 : Elapsed 0.055 ms (5.489 ms / 100) 5.487 -> 5.483 ( -0.07%) [ +0.05% +0.07% +0.00% / +0.16% -0.07% -0.02%] index_select spread : Elapsed 0.055 ms (5.490 ms / 100) 5.488 -> 5.485 ( -0.05%) [ +0.07% +0.07% +0.00% / -0.02% -0.05% +0.00%] index_select strided 3 : Elapsed 0.055 ms (5.492 ms / 100) 5.491 -> 5.485 ( -0.11%) [ +0.00% +0.04% +0.04% / +0.05% -0.11% -0.04%] index_select strided 5 : Elapsed 0.055 ms (5.491 ms / 100) 5.483 -> 5.485 ( +0.04%) [ +0.15% +0.11% +0.00% / +0.09% +0.33% +0.04%] index_select strided 7 : Elapsed 0.055 ms (5.491 ms / 100) 5.479 -> 5.476 ( -0.05%) [ +0.11% +0.05% +0.00% / +0.02% -0.05% +0.09%] index_select strided 8 : Elapsed 0.055 ms (5.485 ms / 100) 5.485 -> 5.482 ( -0.05%) [ +0.16% +0.00% +0.05% / +0.11% +0.13% -0.05%] index_select random : Elapsed 0.055 ms (5.494 ms / 100) 5.490 -> 5.489 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.02% +0.00%] index_select random_sorted : Elapsed 0.055 ms (5.490 ms / 100) B = [4, 5, 20, 40] (stride (20, 80, 1, 400)) A = [4, 5, 20, 16] (stride (5, 1, 320, 20)) dim = 3 3.950 -> 3.956 ( +0.15%) [ +0.03% +0.15% +0.00% / +0.15% +0.68% +0.71%] index_add_ linear : Elapsed 0.040 ms (3.951 ms / 100) 3.794 -> 3.801 ( +0.18%) [ +0.03% +0.11% +0.00% / +0.18% +0.69% +0.69%] index_copy_ linear : Elapsed 0.038 ms (3.795 ms / 100) 3.952 -> 3.951 ( -0.03%) [ +0.18% +0.00% +0.10% / -0.03% +0.66% +0.63%] index_add_ reverse : Elapsed 0.040 ms (3.959 ms / 100) 3.807 -> 3.806 ( -0.03%) [ +0.11% +0.00% +0.03% / -0.03% +0.55% +0.53%] index_copy_ reverse : Elapsed 0.038 ms (3.811 ms / 100) 3.945 -> 3.946 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.56% +0.56%] index_add_ spread : Elapsed 0.039 ms (3.945 ms / 100) 3.792 -> 3.795 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.58% +0.53%] index_copy_ spread : Elapsed 0.038 ms (3.792 ms / 100) 3.944 -> 3.944 ( +0.00%) [ +0.00% +0.03% +0.08% / +0.00% +0.48% +0.63%] index_add_ strided 3 : Elapsed 0.039 ms (3.944 ms / 100) 3.804 -> 3.805 ( +0.03%) [ +0.00% +0.11% +0.11% / +0.03% +0.60% +0.63%] index_copy_ strided 3 : Elapsed 0.038 ms (3.804 ms / 100) 3.956 -> 3.961 ( +0.13%) [ +0.10% +0.10% +0.00% / +0.13% +0.56% +0.38%] index_add_ strided 7 : Elapsed 0.040 ms (3.960 ms / 100) 3.811 -> 3.813 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.50% +0.34%] index_copy_ strided 7 : Elapsed 0.038 ms (3.812 ms / 100) 3.954 -> 3.958 ( +0.10%) [ +0.00% +0.13% +0.00% / +0.10% +0.43% +0.43%] index_add_ perm : Elapsed 0.040 ms (3.954 ms / 100) 3.795 -> 3.799 ( +0.11%) [ +0.03% +0.18% +0.00% / +0.11% +0.47% +0.50%] index_copy_ perm : Elapsed 0.038 ms (3.796 ms / 100) 3.958 -> 3.959 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.43% +0.43%] index_add_ perm_sorted : Elapsed 0.040 ms (3.959 ms / 100) 3.801 -> 3.803 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.42% +0.39%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.802 ms / 100) 5.482 -> 5.482 ( +0.00%) [ +0.00% +0.07% +0.15% / +0.00% +0.00% +0.29%] index_select const : Elapsed 0.055 ms (5.482 ms / 100) 5.484 -> 5.487 ( +0.05%) [ +0.11% +0.00% +0.04% / +0.05% +0.18% +0.13%] index_select wrap : Elapsed 0.055 ms (5.490 ms / 100) 5.487 -> 5.490 ( +0.05%) [ +0.18% +0.15% +0.00% / +0.09% +0.40% +0.05%] index_select linear : Elapsed 0.055 ms (5.497 ms / 100) 5.488 -> 5.486 ( -0.04%) [ +0.13% +0.09% +0.00% / -0.04% +0.29% +0.18%] index_select reverse : Elapsed 0.055 ms (5.495 ms / 100) 5.480 -> 5.486 ( +0.11%) [ +0.20% +0.00% +0.18% / +0.15% +0.20% +0.11%] index_select skip64 : Elapsed 0.055 ms (5.491 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.05% +0.04% +0.00%] index_select skip256 : Elapsed 0.055 ms (5.485 ms / 100) 5.486 -> 5.492 ( +0.11%) [ +0.02% +0.00% +0.16% / +0.11% +0.16% +0.16%] index_select spread : Elapsed 0.055 ms (5.487 ms / 100) 5.486 -> 5.483 ( -0.05%) [ +0.00% +0.04% +0.07% / -0.05% +0.18% +0.11%] index_select strided 3 : Elapsed 0.055 ms (5.486 ms / 100) 5.486 -> 5.483 ( -0.05%) [ +0.05% +0.00% +0.00% / +0.15% +0.11% -0.05%] index_select strided 5 : Elapsed 0.055 ms (5.489 ms / 100) 5.485 -> 5.483 ( -0.04%) [ +0.05% +0.02% +0.00% / -0.04% +0.22% +0.15%] index_select strided 7 : Elapsed 0.055 ms (5.488 ms / 100) 5.482 -> 5.483 ( +0.02%) [ +0.00% +0.24% +0.04% / +0.02% +0.05% +0.05%] index_select strided 8 : Elapsed 0.055 ms (5.482 ms / 100) 5.482 -> 5.487 ( +0.09%) [ +0.16% +0.00% +0.33% / +0.11% +0.09% +0.18%] index_select random : Elapsed 0.055 ms (5.491 ms / 100) 5.490 -> 5.485 ( -0.09%) [ +0.00% +0.07% +0.02% / -0.02% +0.09% -0.09%] index_select random_sorted : Elapsed 0.055 ms (5.490 ms / 100) B = [4, 5, 20, 40] (stride (1, 80, 4, 400)) A = [4, 5, 20, 16] (stride (1, 80, 4, 400)) dim = 3 1.344 -> 1.344 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.82% +0.82%] index_add_ linear : Elapsed 0.013 ms (1.346 ms / 100) 1.294 -> 1.296 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +1.00% +0.77%] index_copy_ linear : Elapsed 0.013 ms (1.295 ms / 100) 1.341 -> 1.344 ( +0.22%) [ +0.30% +0.30% +0.00% / +0.22% +1.04% +1.04%] index_add_ reverse : Elapsed 0.013 ms (1.345 ms / 100) 1.292 -> 1.293 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +1.08% +1.08%] index_copy_ reverse : Elapsed 0.013 ms (1.292 ms / 100) 1.342 -> 1.344 ( +0.15%) [ +0.22% +0.22% +0.00% / +0.15% +0.89% +0.89%] index_add_ spread : Elapsed 0.013 ms (1.345 ms / 100) 1.292 -> 1.292 ( +0.00%) [ +0.08% +0.15% +0.00% / +0.00% +0.77% +0.93%] index_copy_ spread : Elapsed 0.013 ms (1.293 ms / 100) 1.343 -> 1.344 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.07% +2.16% +0.97%] index_add_ strided 3 : Elapsed 0.013 ms (1.343 ms / 100) 1.293 -> 1.294 ( +0.08%) [ +0.00% +0.15% +0.08% / +0.08% +1.31% +1.08%] index_copy_ strided 3 : Elapsed 0.013 ms (1.293 ms / 100) 1.342 -> 1.342 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.89% +0.97%] index_add_ strided 7 : Elapsed 0.013 ms (1.342 ms / 100) 1.292 -> 1.293 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +1.08% +1.01%] index_copy_ strided 7 : Elapsed 0.013 ms (1.292 ms / 100) 1.344 -> 1.344 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.89% +0.97%] index_add_ perm : Elapsed 0.013 ms (1.344 ms / 100) 1.294 -> 1.292 ( -0.15%) [ +0.70% +0.00% +0.08% / -0.15% +1.70% +1.08%] index_copy_ perm : Elapsed 0.013 ms (1.303 ms / 100) 1.344 -> 1.343 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.82% +0.82%] index_add_ perm_sorted : Elapsed 0.013 ms (1.344 ms / 100) 1.293 -> 1.295 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.93% +1.16%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.293 ms / 100) 3.527 -> 3.532 ( +0.14%) [ +0.00% +0.26% +0.20% / +0.26% +0.14% +0.17%] index_select const : Elapsed 0.035 ms (3.527 ms / 100) 3.539 -> 3.541 ( +0.06%) [ +0.00% +0.08% +0.06% / +0.06% +0.06% +0.14%] index_select wrap : Elapsed 0.035 ms (3.539 ms / 100) 3.531 -> 3.545 ( +0.40%) [ +0.23% +0.00% +0.25% / +0.40% +0.42% +0.45%] index_select linear : Elapsed 0.035 ms (3.539 ms / 100) 3.539 -> 3.542 ( +0.08%) [ +0.08% +0.00% +0.11% / +0.08% +0.20% +0.37%] index_select reverse : Elapsed 0.035 ms (3.542 ms / 100) 3.531 -> 3.533 ( +0.06%) [ +0.08% +0.03% +0.00% / +0.06% +0.17% +0.08%] index_select skip64 : Elapsed 0.035 ms (3.534 ms / 100) 3.531 -> 3.531 ( +0.00%) [ +0.17% +0.14% +0.00% / +0.11% +0.06% +0.00%] index_select skip256 : Elapsed 0.035 ms (3.537 ms / 100) 3.541 -> 3.538 ( -0.08%) [ +0.00% +0.08% +0.00% / +0.14% +0.00% -0.08%] index_select spread : Elapsed 0.035 ms (3.541 ms / 100) 3.555 -> 3.534 ( -0.59%) [ +0.00% +0.08% +0.06% / +0.06% -0.53% -0.59%] index_select strided 3 : Elapsed 0.036 ms (3.555 ms / 100) 3.559 -> 3.539 ( -0.56%) [ +0.08% +0.17% +0.00% / +0.20% -0.56% -0.51%] index_select strided 5 : Elapsed 0.036 ms (3.562 ms / 100) 3.538 -> 3.533 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.31% +0.14%] index_select strided 7 : Elapsed 0.035 ms (3.538 ms / 100) 3.530 -> 3.529 ( -0.03%) [ +0.00% +0.03% +0.11% / -0.03% +0.31% +0.20%] index_select strided 8 : Elapsed 0.035 ms (3.530 ms / 100) 3.536 -> 3.532 ( -0.11%) [ +0.00% +0.00% +0.08% / -0.11% +0.57% +0.85%] index_select random : Elapsed 0.035 ms (3.536 ms / 100) 3.532 -> 3.534 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.91% +0.85%] index_select random_sorted : Elapsed 0.035 ms (3.534 ms / 100) out_shape = [40, 16, 5, 20] in_shape = [4, 16, 5, 20] idx_dim = 0 B = [40, 16, 5, 20] (stride (1600, 1, 16, 80)) A = [4, 16, 5, 20] (stride (100, 400, 20, 1)) dim = 0 1.228 -> 1.228 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.33% +0.41%] index_add_ linear : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.190 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.59% +0.50%] index_copy_ linear : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.227 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.57% +0.49%] index_add_ reverse : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.67% +0.67%] index_copy_ reverse : Elapsed 0.012 ms (1.188 ms / 100) 1.227 -> 1.228 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.65% +0.57%] index_add_ spread : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.67% +0.50%] index_copy_ spread : Elapsed 0.012 ms (1.189 ms / 100) 1.225 -> 1.226 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.82% +0.82%] index_add_ strided 3 : Elapsed 0.012 ms (1.227 ms / 100) 1.187 -> 1.187 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.76% +0.84%] index_copy_ strided 3 : Elapsed 0.012 ms (1.188 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.98% +0.73%] index_add_ strided 7 : Elapsed 0.012 ms (1.226 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.76% +0.67%] index_copy_ strided 7 : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.65% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.84%] index_copy_ perm : Elapsed 0.012 ms (1.188 ms / 100) 1.227 -> 1.226 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.57% +0.65%] index_add_ perm_sorted : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.188 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.59% +0.50%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) 8.706 -> 8.697 ( -0.10%) [ +0.09% +0.00% +0.09% / -0.10% +0.28% +0.26%] index_select const : Elapsed 0.087 ms (8.714 ms / 100) 8.731 -> 8.739 ( +0.09%) [ +0.24% +0.00% +0.10% / +0.42% +0.25% +0.09%] index_select wrap : Elapsed 0.088 ms (8.752 ms / 100) 8.716 -> 8.745 ( +0.33%) [ +0.07% +0.15% +0.00% / +0.39% +0.33% +0.33%] index_select linear : Elapsed 0.087 ms (8.722 ms / 100) 8.741 -> 8.752 ( +0.13%) [ +0.00% +0.10% +0.15% / +0.13% +0.23% +0.22%] index_select reverse : Elapsed 0.087 ms (8.741 ms / 100) 8.699 -> 8.699 ( +0.00%) [ +0.26% +0.16% +0.00% / +0.00% +0.53% +0.29%] index_select skip64 : Elapsed 0.087 ms (8.722 ms / 100) 8.708 -> 8.700 ( -0.09%) [ +0.10% +0.00% +0.00% / -0.09% +0.17% +0.25%] index_select skip256 : Elapsed 0.087 ms (8.717 ms / 100) 8.731 -> 8.739 ( +0.09%) [ +0.07% +0.00% +0.18% / +0.18% +0.47% +0.09%] index_select spread : Elapsed 0.087 ms (8.737 ms / 100) 8.742 -> 8.741 ( -0.01%) [ +0.00% +0.01% +0.13% / -0.01% +0.24% +0.05%] index_select strided 3 : Elapsed 0.087 ms (8.742 ms / 100) 8.733 -> 8.735 ( +0.02%) [ +0.06% +0.27% +0.00% / +0.02% +0.40% +0.46%] index_select random : Elapsed 0.087 ms (8.738 ms / 100) 8.738 -> 8.730 ( -0.09%) [ +0.00% +0.07% +0.05% / -0.09% +0.27% +0.29%] index_select random_sorted : Elapsed 0.087 ms (8.738 ms / 100) B = [40, 16, 5, 20] (stride (100, 4000, 1, 5)) A = [4, 16, 5, 20] (stride (1600, 20, 320, 1)) dim = 0 1.313 -> 1.317 ( +0.30%) [ +0.15% +0.23% +0.00% / +0.30% +0.38% +0.38%] index_add_ linear : Elapsed 0.013 ms (1.315 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.55% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.273 ms / 100) 1.313 -> 1.313 ( +0.00%) [ +0.30% +0.15% +0.00% / +0.00% +0.61% +0.30%] index_add_ reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.271 -> 1.275 ( +0.31%) [ +0.00% +0.08% +0.16% / +0.31% +0.94% +0.39%] index_copy_ reverse : Elapsed 0.013 ms (1.271 ms / 100) 1.313 -> 1.317 ( +0.30%) [ +0.08% +0.00% +0.08% / +0.30% +0.46% +0.38%] index_add_ spread : Elapsed 0.013 ms (1.314 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.55% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.271 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.23% +0.00% +0.00% / +0.00% +0.00% +0.15%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.24% +0.00% / +0.00% +0.39% +0.55%] index_copy_ strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.310 -> 1.317 ( +0.53%) [ +0.38% +0.08% +0.00% / +0.53% +0.69% +0.61%] index_add_ strided 7 : Elapsed 0.013 ms (1.315 ms / 100) 1.271 -> 1.270 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.63% +0.71%] index_copy_ strided 7 : Elapsed 0.013 ms (1.271 ms / 100) 1.312 -> 1.314 ( +0.15%) [ +0.00% +0.08% +0.30% / +0.15% +0.61% +0.38%] index_add_ perm : Elapsed 0.013 ms (1.312 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.272 ms / 100) 1.312 -> 1.314 ( +0.15%) [ +0.00% +0.23% +0.30% / +0.15% +0.69% +0.46%] index_add_ perm_sorted : Elapsed 0.013 ms (1.312 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.47% +0.47%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.271 ms / 100) 9.125 -> 9.143 ( +0.20%) [ +0.43% +0.00% +0.16% / +0.20% +0.38% +0.34%] index_select const : Elapsed 0.092 ms (9.164 ms / 100) 9.182 -> 9.180 ( -0.02%) [ +0.10% +0.00% +0.05% / +0.23% +0.28% -0.02%] index_select wrap : Elapsed 0.092 ms (9.191 ms / 100) 9.164 -> 9.164 ( +0.00%) [ +0.23% +0.01% +0.00% / +0.28% +0.00% +0.07%] index_select linear : Elapsed 0.092 ms (9.185 ms / 100) 9.172 -> 9.195 ( +0.25%) [ +0.27% +0.13% +0.00% / +0.48% +0.25% +0.29%] index_select reverse : Elapsed 0.092 ms (9.197 ms / 100) 9.134 -> 9.153 ( +0.21%) [ +0.00% +0.14% +0.12% / +0.21% +0.23% +0.25%] index_select skip64 : Elapsed 0.091 ms (9.134 ms / 100) 9.137 -> 9.149 ( +0.13%) [ +0.04% +0.18% +0.00% / +0.15% +0.13% +0.18%] index_select skip256 : Elapsed 0.091 ms (9.141 ms / 100) 9.170 -> 9.166 ( -0.04%) [ +0.00% +0.23% +0.10% / -0.04% +0.05% +0.09%] index_select spread : Elapsed 0.092 ms (9.170 ms / 100) 9.169 -> 9.177 ( +0.09%) [ +0.26% +0.00% +0.27% / +0.09% +0.10% +0.16%] index_select strided 3 : Elapsed 0.092 ms (9.193 ms / 100) 9.166 -> 9.180 ( +0.15%) [ +0.00% +0.02% +0.04% / +0.15% +0.34% +0.53%] index_select random : Elapsed 0.092 ms (9.166 ms / 100) 9.163 -> 9.172 ( +0.10%) [ +0.12% +0.00% +0.25% / +0.14% +0.20% +0.10%] index_select random_sorted : Elapsed 0.092 ms (9.174 ms / 100) B = [40, 16, 5, 20] (stride (20, 800, 12800, 1)) A = [4, 16, 5, 20] (stride (1600, 1, 16, 80)) dim = 0 1.392 -> 1.393 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.57% +0.57%] index_add_ linear : Elapsed 0.014 ms (1.393 ms / 100) 1.351 -> 1.353 ( +0.15%) [ +0.44% +0.15% +0.00% / +0.15% +0.44% +0.67%] index_copy_ linear : Elapsed 0.014 ms (1.357 ms / 100) 1.388 -> 1.390 ( +0.14%) [ +0.14% +0.00% +0.07% / +0.14% +0.65% +0.65%] index_add_ reverse : Elapsed 0.014 ms (1.390 ms / 100) 1.344 -> 1.345 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.74% +0.82%] index_copy_ reverse : Elapsed 0.013 ms (1.344 ms / 100) 1.395 -> 1.396 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.57% +0.50%] index_add_ spread : Elapsed 0.014 ms (1.396 ms / 100) 1.349 -> 1.349 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.44% +0.37%] index_copy_ spread : Elapsed 0.013 ms (1.349 ms / 100) 1.391 -> 1.390 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +2.30% +0.65%] index_add_ strided 3 : Elapsed 0.014 ms (1.391 ms / 100) 1.350 -> 1.350 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.81% +0.67%] index_copy_ strided 3 : Elapsed 0.014 ms (1.352 ms / 100) 1.387 -> 1.391 ( +0.29%) [ +0.14% +0.00% +0.07% / +0.29% +0.72% +0.58%] index_add_ strided 7 : Elapsed 0.014 ms (1.389 ms / 100) 1.342 -> 1.344 ( +0.15%) [ +0.07% +0.00% +0.00% / +0.15% +0.82% +0.89%] index_copy_ strided 7 : Elapsed 0.013 ms (1.343 ms / 100) 1.394 -> 1.395 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.57% +0.72%] index_add_ perm : Elapsed 0.014 ms (1.394 ms / 100) 1.347 -> 1.349 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.67% +0.59%] index_copy_ perm : Elapsed 0.013 ms (1.347 ms / 100) 1.388 -> 1.389 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.58% +0.86%] index_add_ perm_sorted : Elapsed 0.014 ms (1.390 ms / 100) 1.342 -> 1.344 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.82% +0.82%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.345 ms / 100) 9.157 -> 9.164 ( +0.08%) [ +0.00% +0.20% +0.12% / +0.08% +0.23% +0.20%] index_select const : Elapsed 0.092 ms (9.157 ms / 100) 9.173 -> 9.185 ( +0.13%) [ +0.02% +0.00% +0.02% / +0.13% +0.15% +0.15%] index_select wrap : Elapsed 0.092 ms (9.175 ms / 100) 9.168 -> 9.171 ( +0.03%) [ +0.00% +0.11% +0.14% / +0.15% +0.03% +0.29%] index_select linear : Elapsed 0.092 ms (9.168 ms / 100) 9.175 -> 9.173 ( -0.02%) [ +0.08% +0.07% +0.00% / -0.02% +0.07% +0.09%] index_select reverse : Elapsed 0.092 ms (9.182 ms / 100) 9.152 -> 9.153 ( +0.01%) [ +0.07% +0.00% +0.16% / +0.34% +0.39% +0.01%] index_select skip64 : Elapsed 0.092 ms (9.158 ms / 100) 9.144 -> 9.156 ( +0.13%) [ +0.14% +0.00% +0.16% / +0.28% +0.13% +0.17%] index_select skip256 : Elapsed 0.092 ms (9.157 ms / 100) 9.174 -> 9.178 ( +0.04%) [ +0.03% +0.00% +0.05% / +0.04% +0.04% +0.15%] index_select spread : Elapsed 0.092 ms (9.177 ms / 100) 9.169 -> 9.171 ( +0.02%) [ +0.31% +0.00% +0.14% / +0.02% +0.23% +0.09%] index_select strided 3 : Elapsed 0.092 ms (9.197 ms / 100) 9.172 -> 9.183 ( +0.12%) [ +0.03% +0.20% +0.00% / +0.12% +0.24% +0.16%] index_select random : Elapsed 0.092 ms (9.175 ms / 100) 9.177 -> 9.166 ( -0.12%) [ +0.22% +0.00% +0.02% / -0.12% +0.22% -0.02%] index_select random_sorted : Elapsed 0.092 ms (9.197 ms / 100) B = [40, 16, 5, 20] (stride (1, 40, 12800, 640)) A = [4, 16, 5, 20] (stride (320, 20, 1280, 1)) dim = 0 1.233 -> 1.232 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.41% +0.49%] index_add_ linear : Elapsed 0.012 ms (1.233 ms / 100) 1.195 -> 1.195 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.33%] index_copy_ linear : Elapsed 0.012 ms (1.195 ms / 100) 1.233 -> 1.234 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.49% +0.49%] index_add_ reverse : Elapsed 0.012 ms (1.234 ms / 100) 1.197 -> 1.197 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.08% +0.42% +0.00%] index_copy_ reverse : Elapsed 0.012 ms (1.197 ms / 100) 1.244 -> 1.244 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.00% +0.00%] index_add_ spread : Elapsed 0.012 ms (1.245 ms / 100) 1.205 -> 1.204 ( -0.08%) [ +0.00% +0.17% +0.17% / +0.00% +0.00% -0.08%] index_copy_ spread : Elapsed 0.012 ms (1.205 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.16% +0.24% +0.00% / +0.08% +0.65% +0.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.237 ms / 100) 1.197 -> 1.200 ( +0.25%) [ +0.00% +0.17% +0.08% / +0.25% +0.50% +0.50%] index_copy_ strided 3 : Elapsed 0.012 ms (1.197 ms / 100) 1.238 -> 1.238 ( +0.00%) [ +0.16% +0.24% +0.00% / +0.00% +0.40% +0.40%] index_add_ strided 7 : Elapsed 0.012 ms (1.240 ms / 100) 1.199 -> 1.200 ( +0.08%) [ +0.00% +0.08% +0.17% / +0.08% +0.33% +0.42%] index_copy_ strided 7 : Elapsed 0.012 ms (1.199 ms / 100) 1.244 -> 1.244 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.00% +0.08%] index_add_ perm : Elapsed 0.012 ms (1.245 ms / 100) 1.205 -> 1.205 ( +0.00%) [ +0.17% +0.00% +0.08% / +0.08% +0.17% +0.00%] index_copy_ perm : Elapsed 0.012 ms (1.207 ms / 100) 1.244 -> 1.243 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.00% +0.00%] index_add_ perm_sorted : Elapsed 0.012 ms (1.245 ms / 100) 1.204 -> 1.204 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.17% +0.00% +0.17%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.205 ms / 100) 8.756 -> 8.764 ( +0.09%) [ +0.10% +0.00% +0.16% / +0.40% +0.11% +0.09%] index_select const : Elapsed 0.088 ms (8.765 ms / 100) 8.787 -> 8.781 ( -0.07%) [ +0.13% +0.07% +0.00% / -0.02% -0.07% +0.17%] index_select wrap : Elapsed 0.088 ms (8.798 ms / 100) 8.790 -> 8.796 ( +0.07%) [ +0.00% +0.00% +0.08% / +0.08% +0.10% +0.07%] index_select linear : Elapsed 0.088 ms (8.790 ms / 100) 8.795 -> 8.796 ( +0.01%) [ +0.05% +0.01% +0.00% / +0.01% +0.18% +0.02%] index_select reverse : Elapsed 0.088 ms (8.799 ms / 100) 8.748 -> 8.748 ( +0.00%) [ +0.23% +0.18% +0.00% / +0.00% +0.25% +0.11%] index_select skip64 : Elapsed 0.088 ms (8.768 ms / 100) 8.763 -> 8.755 ( -0.09%) [ +0.10% +0.03% +0.00% / +0.07% +0.03% -0.09%] index_select skip256 : Elapsed 0.088 ms (8.772 ms / 100) 8.802 -> 8.776 ( -0.30%) [ +0.00% +0.06% +0.09% / -0.12% -0.30% -0.22%] index_select spread : Elapsed 0.088 ms (8.802 ms / 100) 8.795 -> 8.781 ( -0.16%) [ +0.00% +0.11% +0.10% / +0.13% -0.07% -0.16%] index_select strided 3 : Elapsed 0.088 ms (8.795 ms / 100) 8.785 -> 8.790 ( +0.06%) [ +0.30% +0.15% +0.00% / +0.07% +0.06% +0.28%] index_select random : Elapsed 0.088 ms (8.811 ms / 100) 8.779 -> 8.789 ( +0.11%) [ +0.08% +0.00% +0.10% / +0.18% +0.13% +0.11%] index_select random_sorted : Elapsed 0.088 ms (8.786 ms / 100) B = [40, 16, 5, 20] (stride (5, 200, 1, 3200)) dim = 0 fill_cnt = 4 0.801 -> 0.802 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.62% +0.50%] index_fill_ const : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.802 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.37%] index_fill_ linear : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.802 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.25% +0.37%] index_fill_ reverse : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.802 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.37% +0.37%] index_fill_ skip64 : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.802 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.37% +0.37%] index_fill_ skip256 : Elapsed 0.008 ms (0.802 ms / 100) 0.803 -> 0.802 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.25% +0.25%] index_fill_ spread : Elapsed 0.008 ms (0.803 ms / 100) 0.802 -> 0.802 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.25% +0.37%] index_fill_ strided 3 : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.802 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.25% +0.50%] index_fill_ strided 5 : Elapsed 0.008 ms (0.803 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.12% +0.12%] index_fill_ strided 7 : Elapsed 0.008 ms (0.803 ms / 100) 0.801 -> 0.803 ( +0.25%) [ +0.12% +0.12% +0.00% / +0.25% +0.50% +0.50%] index_fill_ strided 8 : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.802 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.37%] index_fill_ strided 16 : Elapsed 0.008 ms (0.802 ms / 100) 0.801 -> 0.802 ( +0.12%) [ +0.25% +0.25% +0.00% / +0.12% +0.75% +0.50%] index_fill_ random : Elapsed 0.008 ms (0.803 ms / 100) 0.802 -> 0.802 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.25% +0.37%] index_fill_ random_sorted : Elapsed 0.008 ms (0.803 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.12% +0.25%] index_fill_ perm : Elapsed 0.008 ms (0.803 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.00% +0.00% +0.25% / +0.00% +0.12% +0.12%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.803 ms / 100) B = [40, 16, 5, 20] (stride (5, 200, 1, 3200)) A = [4, 16, 5, 20] (stride (5, 400, 1, 20)) dim = 0 1.318 -> 1.318 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.53% +0.61%] index_add_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.00% +0.00% +0.16% / +0.08% +0.47% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.281 ms / 100) 1.335 -> 1.336 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.52% +0.52%] index_add_ reverse : Elapsed 0.013 ms (1.336 ms / 100) 1.295 -> 1.302 ( +0.54%) [ +0.15% +0.23% +0.00% / +0.54% +0.62% +0.69%] index_copy_ reverse : Elapsed 0.013 ms (1.297 ms / 100) 1.323 -> 1.324 ( +0.08%) [ +0.00% +0.15% +0.08% / +0.08% +0.60% +0.60%] index_add_ spread : Elapsed 0.013 ms (1.323 ms / 100) 1.286 -> 1.285 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.62% +0.54%] index_copy_ spread : Elapsed 0.013 ms (1.286 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.76% +0.68%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.78% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.280 ms / 100) 1.330 -> 1.334 ( +0.30%) [ +0.30% +0.00% +0.30% / +0.30% +0.38% +0.68%] index_add_ strided 7 : Elapsed 0.013 ms (1.334 ms / 100) 1.288 -> 1.295 ( +0.54%) [ +0.47% +0.00% +0.47% / +0.54% +0.70% +0.70%] index_copy_ strided 7 : Elapsed 0.013 ms (1.294 ms / 100) 1.323 -> 1.322 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.60% +0.68%] index_add_ perm : Elapsed 0.013 ms (1.324 ms / 100) 1.285 -> 1.284 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.78% +0.70%] index_copy_ perm : Elapsed 0.013 ms (1.285 ms / 100) 1.337 -> 1.338 ( +0.07%) [ +0.00% +0.37% +0.07% / +0.07% +0.82% +0.90%] index_add_ perm_sorted : Elapsed 0.013 ms (1.337 ms / 100) 1.297 -> 1.297 ( +0.00%) [ +0.00% +0.39% +0.08% / +0.00% +1.23% +0.85%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.297 ms / 100) 9.220 -> 9.215 ( -0.05%) [ +0.27% +0.00% +0.11% / -0.05% +0.31% +0.47%] index_select const : Elapsed 0.092 ms (9.245 ms / 100) 9.219 -> 9.211 ( -0.09%) [ +0.27% +0.00% +0.01% / -0.09% +0.16% +0.25%] index_select wrap : Elapsed 0.092 ms (9.244 ms / 100) 9.220 -> 9.224 ( +0.04%) [ +0.14% +0.09% +0.00% / +0.16% +0.04% +0.18%] index_select linear : Elapsed 0.092 ms (9.233 ms / 100) 9.226 -> 9.220 ( -0.07%) [ +0.18% +0.00% +0.05% / -0.07% +0.05% +0.09%] index_select reverse : Elapsed 0.092 ms (9.243 ms / 100) 9.212 -> 9.212 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +0.27% +0.28%] index_select skip64 : Elapsed 0.092 ms (9.232 ms / 100) 9.209 -> 9.217 ( +0.09%) [ +0.00% +0.04% +0.13% / +0.14% +0.26% +0.09%] index_select skip256 : Elapsed 0.092 ms (9.209 ms / 100) 9.215 -> 9.231 ( +0.17%) [ +0.00% +0.10% +0.14% / +0.17% +0.26% +0.21%] index_select spread : Elapsed 0.092 ms (9.215 ms / 100) 9.210 -> 9.229 ( +0.21%) [ +0.05% +0.00% +0.20% / +0.21% +0.23% +0.35%] index_select strided 3 : Elapsed 0.092 ms (9.215 ms / 100) 9.223 -> 9.232 ( +0.10%) [ +0.08% +0.01% +0.00% / +0.10% +0.34% +0.26%] index_select random : Elapsed 0.092 ms (9.230 ms / 100) 9.209 -> 9.221 ( +0.13%) [ +0.18% +0.00% +0.13% / +0.47% +0.40% +0.13%] index_select random_sorted : Elapsed 0.092 ms (9.226 ms / 100) out_shape = [4, 40, 5, 20] in_shape = [4, 16, 5, 20] idx_dim = 1 B = [4, 40, 5, 20] (stride (4000, 100, 20, 1)) A = [4, 16, 5, 20] (stride (5, 400, 1, 20)) dim = 1 3.838 -> 3.840 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.63% +0.65%] index_add_ linear : Elapsed 0.038 ms (3.839 ms / 100) 3.690 -> 3.691 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.73% +0.65%] index_copy_ linear : Elapsed 0.037 ms (3.690 ms / 100) 3.842 -> 3.841 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.52% +0.55%] index_add_ reverse : Elapsed 0.038 ms (3.842 ms / 100) 3.683 -> 3.682 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.57% +0.60%] index_copy_ reverse : Elapsed 0.037 ms (3.683 ms / 100) 3.829 -> 3.829 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.50% +0.52%] index_add_ spread : Elapsed 0.038 ms (3.830 ms / 100) 3.688 -> 3.688 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.49% +0.46%] index_copy_ spread : Elapsed 0.037 ms (3.689 ms / 100) 3.839 -> 3.839 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.55% +0.49%] index_add_ strided 3 : Elapsed 0.038 ms (3.839 ms / 100) 3.693 -> 3.695 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.70% +0.57%] index_copy_ strided 3 : Elapsed 0.037 ms (3.695 ms / 100) 3.845 -> 3.846 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.39% +0.39%] index_add_ strided 7 : Elapsed 0.038 ms (3.846 ms / 100) 3.686 -> 3.688 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.38% +0.38%] index_copy_ strided 7 : Elapsed 0.037 ms (3.688 ms / 100) 3.840 -> 3.841 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.42% +0.47%] index_add_ perm : Elapsed 0.038 ms (3.841 ms / 100) 3.693 -> 3.694 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.43% +0.49%] index_copy_ perm : Elapsed 0.037 ms (3.693 ms / 100) 3.840 -> 3.842 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.47% +0.42%] index_add_ perm_sorted : Elapsed 0.038 ms (3.844 ms / 100) 3.694 -> 3.695 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.43% +0.43%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.695 ms / 100) 5.463 -> 5.465 ( +0.04%) [ +0.00% +0.13% +0.05% / +0.04% +0.26% +0.11%] index_select const : Elapsed 0.055 ms (5.463 ms / 100) 5.462 -> 5.472 ( +0.18%) [ +0.00% +0.00% +0.11% / +0.18% +0.49% +0.29%] index_select wrap : Elapsed 0.055 ms (5.462 ms / 100) 5.470 -> 5.467 ( -0.05%) [ +0.00% +0.09% +0.00% / -0.05% +0.15% -0.02%] index_select linear : Elapsed 0.055 ms (5.470 ms / 100) 5.469 -> 5.473 ( +0.07%) [ +0.24% +0.00% +0.00% / +0.07% +0.11% +0.29%] index_select reverse : Elapsed 0.055 ms (5.482 ms / 100) 5.467 -> 5.469 ( +0.04%) [ +0.15% +0.11% +0.00% / +0.07% +0.04% +0.04%] index_select skip64 : Elapsed 0.055 ms (5.475 ms / 100) 5.460 -> 5.461 ( +0.02%) [ +0.00% +0.26% +0.26% / +0.09% +0.02% +0.11%] index_select skip256 : Elapsed 0.055 ms (5.460 ms / 100) 5.466 -> 5.467 ( +0.02%) [ +0.15% +0.18% +0.00% / +0.02% +0.05% +0.16%] index_select spread : Elapsed 0.055 ms (5.474 ms / 100) 5.464 -> 5.471 ( +0.13%) [ +0.09% +0.00% +0.13% / +0.13% +0.22% +0.22%] index_select strided 3 : Elapsed 0.055 ms (5.469 ms / 100) 5.471 -> 5.470 ( -0.02%) [ +0.04% +0.00% +0.07% / -0.02% +0.11% +0.05%] index_select strided 5 : Elapsed 0.055 ms (5.473 ms / 100) 5.466 -> 5.470 ( +0.07%) [ +0.07% +0.09% +0.00% / +0.07% +0.16% +0.27%] index_select strided 7 : Elapsed 0.055 ms (5.470 ms / 100) 5.463 -> 5.467 ( +0.07%) [ +0.15% +0.00% +0.05% / +0.13% +0.07% +0.24%] index_select strided 8 : Elapsed 0.055 ms (5.471 ms / 100) 5.462 -> 5.470 ( +0.15%) [ +0.09% +0.07% +0.00% / +0.15% +0.27% +0.20%] index_select random : Elapsed 0.055 ms (5.467 ms / 100) 5.472 -> 5.468 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.04% +0.05%] index_select random_sorted : Elapsed 0.055 ms (5.473 ms / 100) B = [4, 40, 5, 20] (stride (4000, 20, 800, 1)) A = [4, 16, 5, 20] (stride (16, 1, 1280, 64)) dim = 1 4.096 -> 4.097 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.73% +0.71%] index_add_ linear : Elapsed 0.041 ms (4.097 ms / 100) 3.934 -> 3.934 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.86% +0.81%] index_copy_ linear : Elapsed 0.039 ms (3.934 ms / 100) 4.058 -> 4.074 ( +0.39%) [ +0.00% +0.42% +0.32% / +0.39% +1.03% +0.91%] index_add_ reverse : Elapsed 0.041 ms (4.058 ms / 100) 3.891 -> 3.906 ( +0.39%) [ +0.00% +0.59% +0.26% / +0.39% +1.11% +0.87%] index_copy_ reverse : Elapsed 0.039 ms (3.891 ms / 100) 4.088 -> 4.082 ( -0.15%) [ +0.00% +0.17% +0.12% / -0.15% +0.86% +0.51%] index_add_ spread : Elapsed 0.041 ms (4.088 ms / 100) 3.926 -> 3.923 ( -0.08%) [ +0.00% +0.20% +0.10% / -0.08% +0.89% +0.59%] index_copy_ spread : Elapsed 0.039 ms (3.926 ms / 100) 4.069 -> 4.086 ( +0.42%) [ +0.00% +0.25% +0.12% / +0.42% +0.88% +0.88%] index_add_ strided 3 : Elapsed 0.041 ms (4.069 ms / 100) 3.901 -> 3.918 ( +0.44%) [ +0.00% +0.26% +0.21% / +0.44% +0.92% +1.00%] index_copy_ strided 3 : Elapsed 0.039 ms (3.901 ms / 100) 4.058 -> 4.068 ( +0.25%) [ +0.15% +0.00% +0.12% / +0.25% +0.67% +0.89%] index_add_ strided 7 : Elapsed 0.041 ms (4.064 ms / 100) 3.891 -> 3.902 ( +0.28%) [ +0.21% +0.00% +0.18% / +0.28% +0.69% +0.87%] index_copy_ strided 7 : Elapsed 0.039 ms (3.899 ms / 100) 4.097 -> 4.096 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.71% +0.71%] index_add_ perm : Elapsed 0.041 ms (4.097 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.71% +0.76%] index_copy_ perm : Elapsed 0.039 ms (3.935 ms / 100) 4.081 -> 4.076 ( -0.12%) [ +0.00% +0.17% +0.12% / -0.12% +0.64% +0.32%] index_add_ perm_sorted : Elapsed 0.041 ms (4.081 ms / 100) 3.912 -> 3.913 ( +0.03%) [ +0.00% +0.26% +0.31% / +0.03% +0.74% +0.41%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.912 ms / 100) 5.483 -> 5.482 ( -0.02%) [ +0.15% +0.05% +0.00% / +0.11% +0.11% -0.02%] index_select const : Elapsed 0.055 ms (5.491 ms / 100) 5.491 -> 5.487 ( -0.07%) [ +0.02% +0.11% +0.00% / +0.15% -0.07% -0.05%] index_select wrap : Elapsed 0.055 ms (5.492 ms / 100) 5.493 -> 5.483 ( -0.18%) [ +0.09% +0.18% +0.00% / -0.02% -0.11% -0.18%] index_select linear : Elapsed 0.055 ms (5.498 ms / 100) 5.490 -> 5.490 ( +0.00%) [ +0.00% +0.13% +0.04% / +0.16% +0.00% +0.04%] index_select reverse : Elapsed 0.055 ms (5.490 ms / 100) 5.487 -> 5.481 ( -0.11%) [ +0.00% +0.05% +0.04% / -0.09% -0.11% +0.09%] index_select skip64 : Elapsed 0.055 ms (5.487 ms / 100) 5.487 -> 5.480 ( -0.13%) [ +0.00% +0.05% +0.15% / -0.11% +0.00% -0.13%] index_select skip256 : Elapsed 0.055 ms (5.487 ms / 100) 5.488 -> 5.486 ( -0.04%) [ +0.09% +0.05% +0.00% / +0.11% -0.04% +0.00%] index_select spread : Elapsed 0.055 ms (5.493 ms / 100) 5.489 -> 5.489 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.02% +0.09% +0.00%] index_select strided 3 : Elapsed 0.055 ms (5.492 ms / 100) 5.490 -> 5.491 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.07% +0.02% +0.02%] index_select strided 5 : Elapsed 0.055 ms (5.494 ms / 100) 5.489 -> 5.491 ( +0.04%) [ +0.00% +0.05% +0.05% / +0.09% +0.09% +0.04%] index_select strided 7 : Elapsed 0.055 ms (5.489 ms / 100) 5.490 -> 5.491 ( +0.02%) [ +0.04% +0.11% +0.00% / +0.02% +0.04% +0.16%] index_select strided 8 : Elapsed 0.055 ms (5.492 ms / 100) 5.487 -> 5.492 ( +0.09%) [ +0.13% +0.11% +0.00% / +0.13% +0.11% +0.09%] index_select random : Elapsed 0.055 ms (5.494 ms / 100) 5.491 -> 5.488 ( -0.05%) [ +0.00% +0.09% +0.02% / +0.00% -0.04% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.491 ms / 100) B = [4, 40, 5, 20] (stride (4000, 5, 1, 200)) A = [4, 16, 5, 20] (stride (1600, 1, 16, 80)) dim = 1 4.382 -> 4.382 ( +0.00%) [ +0.02% +0.18% +0.00% / +0.00% +0.66% +0.68%] index_add_ linear : Elapsed 0.044 ms (4.383 ms / 100) 4.218 -> 4.221 ( +0.07%) [ +0.05% +0.31% +0.00% / +0.07% +0.71% +0.71%] index_copy_ linear : Elapsed 0.042 ms (4.220 ms / 100) 4.366 -> 4.352 ( -0.32%) [ +0.00% +0.02% +0.07% / -0.32% +0.55% +0.48%] index_add_ reverse : Elapsed 0.044 ms (4.366 ms / 100) 4.208 -> 4.197 ( -0.26%) [ +0.00% +0.02% +0.07% / -0.26% +0.59% +0.52%] index_copy_ reverse : Elapsed 0.042 ms (4.208 ms / 100) 4.384 -> 4.385 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.48% +0.48%] index_add_ spread : Elapsed 0.044 ms (4.385 ms / 100) 4.225 -> 4.226 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.57% +0.52%] index_copy_ spread : Elapsed 0.042 ms (4.226 ms / 100) 4.359 -> 4.359 ( +0.00%) [ +0.14% +0.02% +0.00% / +0.00% +0.46% +0.44%] index_add_ strided 3 : Elapsed 0.044 ms (4.365 ms / 100) 4.202 -> 4.203 ( +0.02%) [ +0.17% +0.00% +0.02% / +0.02% +0.57% +0.52%] index_copy_ strided 3 : Elapsed 0.042 ms (4.209 ms / 100) 4.371 -> 4.365 ( -0.14%) [ +0.05% +0.11% +0.00% / -0.14% +0.48% +0.34%] index_add_ strided 7 : Elapsed 0.044 ms (4.373 ms / 100) 4.214 -> 4.213 ( -0.02%) [ +0.00% +0.12% +0.00% / -0.02% +0.50% +0.40%] index_copy_ strided 7 : Elapsed 0.042 ms (4.214 ms / 100) 4.387 -> 4.386 ( -0.02%) [ +0.00% +0.00% +0.09% / -0.02% +0.46% +0.41%] index_add_ perm : Elapsed 0.044 ms (4.387 ms / 100) 4.222 -> 4.225 ( +0.07%) [ +0.00% +0.07% +0.14% / +0.07% +0.54% +0.50%] index_copy_ perm : Elapsed 0.042 ms (4.222 ms / 100) 4.387 -> 4.388 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +1.30% +0.52%] index_add_ perm_sorted : Elapsed 0.044 ms (4.388 ms / 100) 4.224 -> 4.223 ( -0.02%) [ +0.07% +0.00% +0.00% / -0.02% +0.43% +0.54%] index_copy_ perm_sorted : Elapsed 0.042 ms (4.227 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.23% +0.05%] index_select const : Elapsed 0.056 ms (5.568 ms / 100) 5.569 -> 5.563 ( -0.11%) [ +0.09% +0.16% +0.00% / -0.11% +0.09% +0.18%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.566 -> 5.570 ( +0.07%) [ +0.22% +0.20% +0.00% / +0.07% +0.14% +0.22%] index_select linear : Elapsed 0.056 ms (5.578 ms / 100) 5.573 -> 5.573 ( +0.00%) [ +0.00% +0.04% +0.07% / +0.00% +0.05% +0.07%] index_select reverse : Elapsed 0.056 ms (5.573 ms / 100) 5.571 -> 5.567 ( -0.07%) [ +0.14% +0.02% +0.00% / -0.07% +0.00% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.579 ms / 100) 5.566 -> 5.564 ( -0.04%) [ +0.18% +0.23% +0.00% / +0.14% -0.04% +0.05%] index_select skip256 : Elapsed 0.056 ms (5.576 ms / 100) 5.570 -> 5.569 ( -0.02%) [ +0.09% +0.05% +0.00% / +0.05% -0.02% +0.02%] index_select spread : Elapsed 0.056 ms (5.575 ms / 100) 5.567 -> 5.575 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.23% +0.18%] index_select strided 3 : Elapsed 0.056 ms (5.571 ms / 100) 5.567 -> 5.568 ( +0.02%) [ +0.00% +0.09% +0.05% / +0.02% +0.11% +0.09%] index_select strided 5 : Elapsed 0.056 ms (5.567 ms / 100) 5.566 -> 5.570 ( +0.07%) [ +0.05% +0.13% +0.00% / +0.07% +0.07% +0.14%] index_select strided 7 : Elapsed 0.056 ms (5.569 ms / 100) 5.569 -> 5.568 ( -0.02%) [ +0.00% +0.04% +0.00% / -0.02% +0.20% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.569 ms / 100) 5.570 -> 5.569 ( -0.02%) [ +0.11% +0.11% +0.00% / -0.02% +0.16% +0.00%] index_select random : Elapsed 0.056 ms (5.576 ms / 100) 5.566 -> 5.572 ( +0.11%) [ +0.13% +0.14% +0.00% / +0.11% +0.14% +0.11%] index_select random_sorted : Elapsed 0.056 ms (5.573 ms / 100) B = [4, 40, 5, 20] (stride (1, 400, 4, 20)) A = [4, 16, 5, 20] (stride (1, 400, 4, 20)) dim = 1 4.126 -> 4.125 ( -0.02%) [ +0.05% +0.02% +0.00% / -0.02% +0.68% +0.68%] index_add_ linear : Elapsed 0.041 ms (4.128 ms / 100) 3.985 -> 3.986 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.75% +0.75%] index_copy_ linear : Elapsed 0.040 ms (3.986 ms / 100) 4.147 -> 4.147 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.77% +0.68%] index_add_ reverse : Elapsed 0.041 ms (4.149 ms / 100) 3.999 -> 4.000 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.83% +0.73%] index_copy_ reverse : Elapsed 0.040 ms (4.001 ms / 100) 4.150 -> 4.151 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.65% +0.77%] index_add_ spread : Elapsed 0.042 ms (4.151 ms / 100) 4.012 -> 4.014 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.70% +0.60%] index_copy_ spread : Elapsed 0.040 ms (4.013 ms / 100) 4.140 -> 4.141 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.89% +0.89%] index_add_ strided 3 : Elapsed 0.041 ms (4.141 ms / 100) 3.999 -> 3.998 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.65% +0.65%] index_copy_ strided 3 : Elapsed 0.040 ms (3.999 ms / 100) 4.146 -> 4.145 ( -0.02%) [ +0.02% +0.02% +0.00% / -0.02% +0.80% +0.75%] index_add_ strided 7 : Elapsed 0.041 ms (4.147 ms / 100) 3.998 -> 3.999 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.78% +0.75%] index_copy_ strided 7 : Elapsed 0.040 ms (4.000 ms / 100) 4.124 -> 4.125 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.73% +0.73%] index_add_ perm : Elapsed 0.041 ms (4.126 ms / 100) 3.984 -> 3.986 ( +0.05%) [ +0.00% +0.03% +0.03% / +0.05% +0.73% +0.75%] index_copy_ perm : Elapsed 0.040 ms (3.984 ms / 100) 4.146 -> 4.148 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.58% +0.63%] index_add_ perm_sorted : Elapsed 0.041 ms (4.148 ms / 100) 3.996 -> 3.997 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.78% +0.78%] index_copy_ perm_sorted : Elapsed 0.040 ms (3.996 ms / 100) 5.561 -> 5.559 ( -0.04%) [ +0.00% +0.07% +0.14% / +0.16% -0.04% +0.04%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.567 -> 5.558 ( -0.16%) [ +0.07% +0.00% +0.11% / +0.02% -0.09% -0.16%] index_select wrap : Elapsed 0.056 ms (5.571 ms / 100) 5.563 -> 5.558 ( -0.09%) [ +0.05% +0.09% +0.00% / +0.09% +0.22% -0.09%] index_select linear : Elapsed 0.056 ms (5.566 ms / 100) 5.567 -> 5.567 ( +0.00%) [ +0.14% +0.00% +0.09% / +0.05% +0.00% +0.00%] index_select reverse : Elapsed 0.056 ms (5.575 ms / 100) 5.555 -> 5.562 ( +0.13%) [ +0.00% +0.27% +0.18% / +0.13% +0.20% +0.25%] index_select skip64 : Elapsed 0.056 ms (5.555 ms / 100) 5.557 -> 5.562 ( +0.09%) [ +0.00% +0.09% +0.04% / +0.09% +0.13% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.557 ms / 100) 5.571 -> 5.565 ( -0.11%) [ +0.11% +0.09% +0.00% / +0.00% -0.11% -0.02%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.569 -> 5.563 ( -0.11%) [ +0.00% +0.07% +0.07% / -0.11% -0.02% -0.11%] index_select strided 3 : Elapsed 0.056 ms (5.569 ms / 100) 5.563 -> 5.564 ( +0.02%) [ +0.09% +0.09% +0.00% / +0.09% +0.02% +0.02%] index_select strided 5 : Elapsed 0.056 ms (5.568 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.23% +0.00% +0.16% / +0.07% +0.13% +0.13%] index_select strided 7 : Elapsed 0.056 ms (5.578 ms / 100) 5.558 -> 5.560 ( +0.04%) [ +0.00% +0.14% +0.13% / +0.04% +0.13% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.558 ms / 100) 5.566 -> 5.567 ( +0.02%) [ +0.05% +0.00% +0.04% / +0.14% +0.09% +0.02%] index_select random : Elapsed 0.056 ms (5.569 ms / 100) 5.570 -> 5.571 ( +0.02%) [ +0.07% +0.05% +0.00% / +0.04% +0.09% +0.02%] index_select random_sorted : Elapsed 0.056 ms (5.574 ms / 100) B = [4, 40, 5, 20] (stride (20, 80, 3200, 1)) A = [4, 16, 5, 20] (stride (5, 20, 1, 320)) dim = 1 4.289 -> 4.289 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.54%] index_add_ linear : Elapsed 0.043 ms (4.289 ms / 100) 4.139 -> 4.139 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.48% +0.39%] index_copy_ linear : Elapsed 0.041 ms (4.139 ms / 100) 4.275 -> 4.278 ( +0.07%) [ +0.05% +0.12% +0.00% / +0.07% +0.61% +0.80%] index_add_ reverse : Elapsed 0.043 ms (4.277 ms / 100) 4.134 -> 4.135 ( +0.02%) [ +0.02% +0.10% +0.00% / +0.02% +0.70% +0.63%] index_copy_ reverse : Elapsed 0.041 ms (4.135 ms / 100) 4.294 -> 4.297 ( +0.07%) [ +0.00% +0.26% +0.07% / +0.07% +0.56% +0.49%] index_add_ spread : Elapsed 0.043 ms (4.294 ms / 100) 4.123 -> 4.126 ( +0.07%) [ +0.07% +0.02% +0.00% / +0.07% +0.70% +0.51%] index_copy_ spread : Elapsed 0.041 ms (4.126 ms / 100) 4.292 -> 4.295 ( +0.07%) [ +0.09% +0.00% +0.00% / +0.07% +0.58% +0.56%] index_add_ strided 3 : Elapsed 0.043 ms (4.296 ms / 100) 4.148 -> 4.149 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.72% +0.68%] index_copy_ strided 3 : Elapsed 0.041 ms (4.148 ms / 100) 4.281 -> 4.283 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.33% +0.42%] index_add_ strided 7 : Elapsed 0.043 ms (4.281 ms / 100) 4.138 -> 4.141 ( +0.07%) [ +0.05% +0.00% +0.05% / +0.07% +0.46% +0.48%] index_copy_ strided 7 : Elapsed 0.041 ms (4.140 ms / 100) 4.289 -> 4.289 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.61% +0.61%] index_add_ perm : Elapsed 0.043 ms (4.290 ms / 100) 4.135 -> 4.141 ( +0.15%) [ +0.05% +0.00% +0.02% / +0.15% +0.65% +0.58%] index_copy_ perm : Elapsed 0.041 ms (4.137 ms / 100) 4.290 -> 4.292 ( +0.05%) [ +0.00% +0.07% +0.02% / +0.05% +0.70% +0.56%] index_add_ perm_sorted : Elapsed 0.043 ms (4.290 ms / 100) 4.139 -> 4.144 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.53% +0.41%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.139 ms / 100) 5.559 -> 5.557 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.25% +0.04%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.565 -> 5.567 ( +0.04%) [ +0.00% +0.00% +0.02% / +0.04% +0.18% +0.16%] index_select wrap : Elapsed 0.056 ms (5.565 ms / 100) 5.567 -> 5.565 ( -0.04%) [ +0.23% +0.00% +0.04% / +0.07% +0.09% -0.04%] index_select linear : Elapsed 0.056 ms (5.580 ms / 100) 5.563 -> 5.567 ( +0.07%) [ +0.00% +0.02% +0.14% / +0.07% +0.22% +0.14%] index_select reverse : Elapsed 0.056 ms (5.563 ms / 100) 5.557 -> 5.556 ( -0.02%) [ +0.00% +0.16% +0.05% / +0.22% +0.05% -0.02%] index_select skip64 : Elapsed 0.056 ms (5.557 ms / 100) 5.557 -> 5.563 ( +0.11%) [ +0.09% +0.16% +0.00% / +0.16% +0.11% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.562 ms / 100) 5.562 -> 5.562 ( +0.00%) [ +0.25% +0.02% +0.00% / +0.32% +0.05% +0.00%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.559 -> 5.564 ( +0.09%) [ +0.00% +0.20% +0.11% / +0.09% +0.16% +0.23%] index_select strided 3 : Elapsed 0.056 ms (5.559 ms / 100) 5.558 -> 5.562 ( +0.07%) [ +0.18% +0.22% +0.00% / +0.34% +0.13% +0.07%] index_select strided 5 : Elapsed 0.056 ms (5.568 ms / 100) 5.560 -> 5.561 ( +0.02%) [ +0.00% +0.11% +0.07% / +0.02% +0.18% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.560 ms / 100) 5.557 -> 5.560 ( +0.05%) [ +0.00% +0.13% +0.00% / +0.05% +0.22% +0.22%] index_select strided 8 : Elapsed 0.056 ms (5.557 ms / 100) 5.559 -> 5.563 ( +0.07%) [ +0.11% +0.22% +0.00% / +0.27% +0.07% +0.27%] index_select random : Elapsed 0.056 ms (5.565 ms / 100) 5.565 -> 5.562 ( -0.05%) [ +0.00% +0.07% +0.05% / -0.05% +0.04% +0.11%] index_select random_sorted : Elapsed 0.056 ms (5.565 ms / 100) B = [4, 40, 5, 20] (stride (1, 4, 3200, 160)) A = [4, 16, 5, 20] (stride (1600, 100, 1, 5)) dim = 1 3.679 -> 3.673 ( -0.16%) [ +0.03% +0.03% +0.00% / -0.16% +0.73% +0.73%] index_add_ linear : Elapsed 0.037 ms (3.680 ms / 100) 3.546 -> 3.544 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.76% +0.73%] index_copy_ linear : Elapsed 0.035 ms (3.548 ms / 100) 3.680 -> 3.681 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.68% +0.68%] index_add_ reverse : Elapsed 0.037 ms (3.680 ms / 100) 3.554 -> 3.557 ( +0.08%) [ +0.03% +0.00% +0.31% / +0.08% +0.68% +0.70%] index_copy_ reverse : Elapsed 0.036 ms (3.555 ms / 100) 3.668 -> 3.668 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.68% +0.74%] index_add_ spread : Elapsed 0.037 ms (3.670 ms / 100) 3.541 -> 3.545 ( +0.11%) [ +0.03% +0.00% +0.00% / +0.11% +0.71% +0.76%] index_copy_ spread : Elapsed 0.035 ms (3.542 ms / 100) 3.682 -> 3.682 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +1.20% +0.68%] index_add_ strided 3 : Elapsed 0.037 ms (3.684 ms / 100) 3.553 -> 3.556 ( +0.08%) [ +0.03% +0.00% +0.08% / +0.08% +0.73% +0.65%] index_copy_ strided 3 : Elapsed 0.036 ms (3.554 ms / 100) 3.677 -> 3.682 ( +0.14%) [ +0.05% +0.08% +0.00% / +0.14% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.037 ms (3.679 ms / 100) 3.553 -> 3.559 ( +0.17%) [ +0.00% +0.08% +0.00% / +0.17% +0.79% +0.73%] index_copy_ strided 7 : Elapsed 0.036 ms (3.553 ms / 100) 3.670 -> 3.673 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.82% +1.04%] index_add_ perm : Elapsed 0.037 ms (3.671 ms / 100) 3.539 -> 3.542 ( +0.08%) [ +0.03% +0.00% +0.06% / +0.08% +0.76% +0.96%] index_copy_ perm : Elapsed 0.035 ms (3.540 ms / 100) 3.676 -> 3.678 ( +0.05%) [ +0.03% +0.11% +0.00% / +0.05% +0.92% +0.90%] index_add_ perm_sorted : Elapsed 0.037 ms (3.677 ms / 100) 3.549 -> 3.551 ( +0.06%) [ +0.00% +0.08% +0.00% / +0.06% +0.82% +0.79%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.549 ms / 100) 5.477 -> 5.469 ( -0.15%) [ +0.09% +0.00% +0.02% / +0.04% -0.15% -0.11%] index_select const : Elapsed 0.055 ms (5.482 ms / 100) 5.478 -> 5.477 ( -0.02%) [ +0.05% +0.05% +0.00% / +0.15% -0.02% +0.02%] index_select wrap : Elapsed 0.055 ms (5.481 ms / 100) 5.479 -> 5.471 ( -0.15%) [ +0.00% +0.07% +0.05% / -0.15% +0.05% -0.05%] index_select linear : Elapsed 0.055 ms (5.479 ms / 100) 5.474 -> 5.477 ( +0.05%) [ +0.20% +0.11% +0.00% / +0.18% +0.16% +0.05%] index_select reverse : Elapsed 0.055 ms (5.485 ms / 100) 5.471 -> 5.471 ( +0.00%) [ +0.02% +0.18% +0.00% / +0.00% +0.02% +0.09%] index_select skip64 : Elapsed 0.055 ms (5.472 ms / 100) 5.468 -> 5.474 ( +0.11%) [ +0.00% +0.13% +0.07% / +0.11% +0.20% +0.15%] index_select skip256 : Elapsed 0.055 ms (5.468 ms / 100) 5.484 -> 5.475 ( -0.16%) [ +0.04% +0.00% +0.05% / -0.15% -0.16% -0.05%] index_select spread : Elapsed 0.055 ms (5.486 ms / 100) 5.477 -> 5.478 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.05% +0.02% +0.13%] index_select strided 3 : Elapsed 0.055 ms (5.478 ms / 100) 5.480 -> 5.477 ( -0.05%) [ +0.00% +0.09% +0.09% / +0.05% -0.05% +0.00%] index_select strided 5 : Elapsed 0.055 ms (5.480 ms / 100) 5.475 -> 5.480 ( +0.09%) [ +0.00% +0.02% +0.00% / +0.09% +0.09% +0.26%] index_select strided 7 : Elapsed 0.055 ms (5.475 ms / 100) 5.473 -> 5.474 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.07% +0.05%] index_select strided 8 : Elapsed 0.055 ms (5.476 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.00% +0.13% +0.13% / +0.11% +0.11% +0.04%] index_select random : Elapsed 0.055 ms (5.476 ms / 100) 5.476 -> 5.476 ( +0.00%) [ +0.07% +0.09% +0.00% / +0.00% +0.09% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.480 ms / 100) B = [4, 40, 5, 20] (stride (5, 20, 1, 800)) A = [4, 16, 5, 20] (stride (1, 4, 1280, 64)) dim = 1 4.072 -> 4.083 ( +0.27%) [ +0.02% +0.22% +0.00% / +0.27% +0.59% +0.59%] index_add_ linear : Elapsed 0.041 ms (4.073 ms / 100) 3.904 -> 3.909 ( +0.13%) [ +0.03% +0.10% +0.00% / +0.13% +0.61% +0.59%] index_copy_ linear : Elapsed 0.039 ms (3.905 ms / 100) 4.091 -> 4.092 ( +0.02%) [ +0.02% +0.15% +0.00% / +0.02% +0.56% +0.44%] index_add_ reverse : Elapsed 0.041 ms (4.092 ms / 100) 3.930 -> 3.933 ( +0.08%) [ +0.00% +0.23% +0.10% / +0.08% +0.51% +0.53%] index_copy_ reverse : Elapsed 0.039 ms (3.930 ms / 100) 4.090 -> 4.093 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.54% +0.59%] index_add_ spread : Elapsed 0.041 ms (4.092 ms / 100) 3.927 -> 3.928 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.53% +0.48%] index_copy_ spread : Elapsed 0.039 ms (3.928 ms / 100) 4.103 -> 4.104 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.44% +0.41%] index_add_ strided 3 : Elapsed 0.041 ms (4.103 ms / 100) 3.937 -> 3.937 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.46% +0.41%] index_copy_ strided 3 : Elapsed 0.039 ms (3.937 ms / 100) 4.090 -> 4.093 ( +0.07%) [ +0.05% +0.10% +0.00% / +0.07% +0.51% +0.49%] index_add_ strided 7 : Elapsed 0.041 ms (4.092 ms / 100) 3.930 -> 3.934 ( +0.10%) [ +0.13% +0.15% +0.00% / +0.10% +0.48% +0.56%] index_copy_ strided 7 : Elapsed 0.039 ms (3.935 ms / 100) 4.074 -> 4.076 ( +0.05%) [ +0.02% +0.29% +0.00% / +0.05% +0.76% +0.49%] index_add_ perm : Elapsed 0.041 ms (4.075 ms / 100) 3.907 -> 3.911 ( +0.10%) [ +0.03% +0.05% +0.00% / +0.10% +0.59% +0.38%] index_copy_ perm : Elapsed 0.039 ms (3.908 ms / 100) 4.076 -> 4.076 ( +0.00%) [ +0.00% +0.27% +0.22% / +0.00% +0.44% +0.42%] index_add_ perm_sorted : Elapsed 0.041 ms (4.076 ms / 100) 3.908 -> 3.914 ( +0.15%) [ +0.00% +0.15% +0.08% / +0.15% +0.46% +0.38%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.908 ms / 100) 5.485 -> 5.488 ( +0.05%) [ +0.00% +0.11% +0.20% / +0.05% +0.29% +0.29%] index_select const : Elapsed 0.055 ms (5.485 ms / 100) 5.492 -> 5.491 ( -0.02%) [ +0.09% +0.13% +0.00% / -0.02% +0.13% +0.00%] index_select wrap : Elapsed 0.055 ms (5.497 ms / 100) 5.494 -> 5.495 ( +0.02%) [ +0.00% +0.09% +0.02% / +0.02% +0.16% +0.16%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.494 -> 5.494 ( +0.00%) [ +0.00% +0.13% +0.15% / +0.00% +0.09% +0.07%] index_select reverse : Elapsed 0.055 ms (5.494 ms / 100) 5.498 -> 5.493 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.04% -0.09% -0.02%] index_select skip64 : Elapsed 0.055 ms (5.498 ms / 100) 5.495 -> 5.490 ( -0.09%) [ +0.00% +0.07% +0.02% / -0.09% +0.02% +0.00%] index_select skip256 : Elapsed 0.055 ms (5.495 ms / 100) 5.495 -> 5.496 ( +0.02%) [ +0.16% +0.00% +0.02% / +0.15% +0.02% +0.05%] index_select spread : Elapsed 0.055 ms (5.504 ms / 100) 5.499 -> 5.494 ( -0.09%) [ +0.00% +0.00% +0.02% / -0.09% +0.02% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.499 ms / 100) 5.491 -> 5.494 ( +0.05%) [ +0.02% +0.00% +0.16% / +0.18% +0.05% +0.11%] index_select strided 5 : Elapsed 0.055 ms (5.492 ms / 100) 5.494 -> 5.496 ( +0.04%) [ +0.09% +0.00% +0.05% / +0.04% +0.13% +0.15%] index_select strided 7 : Elapsed 0.055 ms (5.499 ms / 100) 5.487 -> 5.495 ( +0.15%) [ +0.05% +0.04% +0.00% / +0.27% +0.20% +0.15%] index_select strided 8 : Elapsed 0.055 ms (5.490 ms / 100) 5.494 -> 5.493 ( -0.02%) [ +0.00% +0.04% +0.15% / +0.22% +0.04% -0.02%] index_select random : Elapsed 0.055 ms (5.494 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.09% +0.00% +0.11% / +0.13% +0.00% +0.13%] index_select random_sorted : Elapsed 0.055 ms (5.496 ms / 100) B = [4, 40, 5, 20] (stride (40, 1, 160, 800)) A = [4, 16, 5, 20] (stride (5, 20, 1, 320)) dim = 1 4.275 -> 4.279 ( +0.09%) [ +0.00% +0.07% +0.07% / +0.09% +0.65% +0.91%] index_add_ linear : Elapsed 0.043 ms (4.275 ms / 100) 4.143 -> 4.159 ( +0.39%) [ +0.00% +0.00% +0.02% / +0.39% +0.65% +0.63%] index_copy_ linear : Elapsed 0.041 ms (4.143 ms / 100) 4.284 -> 4.287 ( +0.07%) [ +0.00% +0.02% +0.00% / +0.07% +0.75% +0.70%] index_add_ reverse : Elapsed 0.043 ms (4.284 ms / 100) 4.146 -> 4.152 ( +0.14%) [ +0.02% +0.10% +0.00% / +0.14% +0.96% +0.77%] index_copy_ reverse : Elapsed 0.041 ms (4.147 ms / 100) 4.263 -> 4.262 ( -0.02%) [ +0.07% +0.00% +0.09% / -0.02% +0.80% +0.82%] index_add_ spread : Elapsed 0.043 ms (4.266 ms / 100) 4.118 -> 4.122 ( +0.10%) [ +0.00% +0.00% +0.00% / +0.10% +0.75% +0.75%] index_copy_ spread : Elapsed 0.041 ms (4.118 ms / 100) 4.271 -> 4.280 ( +0.21%) [ +0.12% +0.14% +0.00% / +0.21% +0.70% +0.80%] index_add_ strided 3 : Elapsed 0.043 ms (4.276 ms / 100) 4.118 -> 4.123 ( +0.12%) [ +0.05% +0.12% +0.00% / +0.12% +0.78% +0.75%] index_copy_ strided 3 : Elapsed 0.041 ms (4.120 ms / 100) 4.284 -> 4.290 ( +0.14%) [ +0.07% +0.02% +0.00% / +0.14% +0.70% +0.79%] index_add_ strided 7 : Elapsed 0.043 ms (4.287 ms / 100) 4.146 -> 4.154 ( +0.19%) [ +0.14% +0.05% +0.00% / +0.19% +0.89% +0.89%] index_copy_ strided 7 : Elapsed 0.042 ms (4.152 ms / 100) 4.274 -> 4.278 ( +0.09%) [ +0.00% +0.09% +0.12% / +0.09% +0.96% +0.91%] index_add_ perm : Elapsed 0.043 ms (4.274 ms / 100) 4.143 -> 4.146 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.68% +0.68%] index_copy_ perm : Elapsed 0.041 ms (4.143 ms / 100) 4.268 -> 4.272 ( +0.09%) [ +0.09% +0.02% +0.00% / +0.09% +0.82% +0.87%] index_add_ perm_sorted : Elapsed 0.043 ms (4.272 ms / 100) 4.117 -> 4.121 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.78% +0.70%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.119 ms / 100) 5.565 -> 5.567 ( +0.04%) [ +0.23% +0.00% +0.07% / +0.14% +0.04% +0.14%] index_select const : Elapsed 0.056 ms (5.578 ms / 100) 5.577 -> 5.574 ( -0.05%) [ +0.13% +0.00% +0.07% / +0.04% +0.00% -0.05%] index_select wrap : Elapsed 0.056 ms (5.584 ms / 100) 5.583 -> 5.573 ( -0.18%) [ +0.05% +0.00% +0.00% / +0.05% -0.02% -0.18%] index_select linear : Elapsed 0.056 ms (5.586 ms / 100) 5.581 -> 5.573 ( -0.14%) [ +0.18% +0.05% +0.00% / +0.09% -0.14% -0.14%] index_select reverse : Elapsed 0.056 ms (5.591 ms / 100) 5.566 -> 5.566 ( +0.00%) [ +0.00% +0.02% +0.04% / +0.04% +0.00% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.566 ms / 100) 5.570 -> 5.564 ( -0.11%) [ +0.00% +0.05% +0.02% / -0.11% +0.04% +0.09%] index_select skip256 : Elapsed 0.056 ms (5.570 ms / 100) 5.576 -> 5.571 ( -0.09%) [ +0.14% +0.00% +0.16% / +0.05% -0.04% -0.09%] index_select spread : Elapsed 0.056 ms (5.584 ms / 100) 5.577 -> 5.572 ( -0.09%) [ +0.00% +0.14% +0.02% / +0.11% -0.09% -0.05%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.578 -> 5.569 ( -0.16%) [ +0.00% +0.13% +0.00% / +0.05% -0.16% -0.05%] index_select strided 5 : Elapsed 0.056 ms (5.578 ms / 100) 5.574 -> 5.579 ( +0.09%) [ +0.00% +0.13% +0.07% / +0.09% +0.11% +0.09%] index_select strided 7 : Elapsed 0.056 ms (5.574 ms / 100) 5.572 -> 5.567 ( -0.09%) [ +0.00% +0.09% +0.02% / -0.09% +0.09% +0.04%] index_select strided 8 : Elapsed 0.056 ms (5.572 ms / 100) 5.578 -> 5.573 ( -0.09%) [ +0.11% +0.09% +0.00% / -0.09% -0.05% -0.05%] index_select random : Elapsed 0.056 ms (5.584 ms / 100) 5.577 -> 5.579 ( +0.04%) [ +0.22% +0.00% +0.09% / +0.05% +0.04% +0.07%] index_select random_sorted : Elapsed 0.056 ms (5.589 ms / 100) out_shape = [4, 16, 40, 20] in_shape = [4, 16, 5, 20] idx_dim = 2 B = [4, 16, 40, 20] (stride (12800, 1, 16, 640)) A = [4, 16, 5, 20] (stride (1600, 5, 1, 80)) dim = 2 1.617 -> 1.619 ( +0.12%) [ +0.00% +0.00% +0.12% / +0.12% +0.62% +0.62%] index_add_ linear : Elapsed 0.016 ms (1.617 ms / 100) 1.573 -> 1.572 ( -0.06%) [ +0.00% +0.00% +0.25% / -0.06% +0.19% +0.25%] index_copy_ linear : Elapsed 0.016 ms (1.573 ms / 100) 1.616 -> 1.617 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.50% +0.50%] index_add_ reverse : Elapsed 0.016 ms (1.616 ms / 100) 1.575 -> 1.571 ( -0.25%) [ +0.13% +0.06% +0.00% / -0.13% +0.00% -0.25%] index_copy_ reverse : Elapsed 0.016 ms (1.577 ms / 100) 1.615 -> 1.616 ( +0.06%) [ +0.00% +0.25% +0.06% / +0.06% +0.62% +0.31%] index_add_ spread : Elapsed 0.016 ms (1.615 ms / 100) 1.567 -> 1.571 ( +0.26%) [ +0.45% +0.45% +0.00% / +0.64% +0.26% +0.26%] index_copy_ spread : Elapsed 0.016 ms (1.574 ms / 100) 1.614 -> 1.616 ( +0.12%) [ +0.25% +0.00% +0.19% / +0.12% +0.62% +0.50%] index_add_ strided 3 : Elapsed 0.016 ms (1.618 ms / 100) 1.566 -> 1.571 ( +0.32%) [ +0.57% +0.38% +0.00% / +0.64% +0.57% +0.32%] index_copy_ strided 3 : Elapsed 0.016 ms (1.575 ms / 100) 1.616 -> 1.618 ( +0.12%) [ +0.06% +0.12% +0.00% / +0.12% +0.50% +0.56%] index_add_ strided 7 : Elapsed 0.016 ms (1.617 ms / 100) 1.572 -> 1.575 ( +0.19%) [ +0.32% +0.06% +0.00% / +0.25% +0.19% +0.32%] index_copy_ strided 7 : Elapsed 0.016 ms (1.577 ms / 100) 1.617 -> 1.621 ( +0.25%) [ +0.06% +0.00% +0.12% / +0.25% +0.68% +0.49%] index_add_ perm : Elapsed 0.016 ms (1.618 ms / 100) 1.570 -> 1.570 ( +0.00%) [ +0.00% +0.19% +0.00% / +0.00% +0.64% +0.38%] index_copy_ perm : Elapsed 0.016 ms (1.570 ms / 100) 1.616 -> 1.614 ( -0.12%) [ +0.12% +0.00% +0.00% / -0.12% +0.56% +0.37%] index_add_ perm_sorted : Elapsed 0.016 ms (1.618 ms / 100) 1.570 -> 1.571 ( +0.06%) [ +0.00% +0.32% +0.19% / +0.06% +0.13% +0.38%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.570 ms / 100) 8.532 -> 8.543 ( +0.13%) [ +0.26% +0.00% +0.23% / +0.13% +0.40% +0.28%] index_select const : Elapsed 0.086 ms (8.554 ms / 100) 8.539 -> 8.538 ( -0.01%) [ +0.07% +0.00% +0.18% / +0.04% -0.01% +0.18%] index_select wrap : Elapsed 0.085 ms (8.545 ms / 100) 8.540 -> 8.542 ( +0.02%) [ +0.00% +0.09% +0.05% / +0.02% +0.25% +0.19%] index_select linear : Elapsed 0.085 ms (8.540 ms / 100) 8.533 -> 8.544 ( +0.13%) [ +0.00% +0.19% +0.21% / +0.13% +0.45% +0.22%] index_select reverse : Elapsed 0.085 ms (8.533 ms / 100) 8.536 -> 8.535 ( -0.01%) [ +0.00% +0.05% +0.34% / -0.01% +0.01% +0.08%] index_select skip64 : Elapsed 0.085 ms (8.536 ms / 100) 8.553 -> 8.541 ( -0.14%) [ +0.01% +0.00% +0.05% / +0.07% -0.08% -0.14%] index_select skip256 : Elapsed 0.086 ms (8.554 ms / 100) 8.534 -> 8.549 ( +0.18%) [ +0.30% +0.00% +0.25% / +0.18% +0.41% +0.32%] index_select spread : Elapsed 0.086 ms (8.560 ms / 100) 8.536 -> 8.542 ( +0.07%) [ +0.23% +0.09% +0.00% / +0.07% +0.35% +0.15%] index_select strided 3 : Elapsed 0.086 ms (8.556 ms / 100) 8.540 -> 8.538 ( -0.02%) [ +0.23% +0.00% +0.02% / -0.02% +0.18% +0.23%] index_select random : Elapsed 0.086 ms (8.560 ms / 100) 8.540 -> 8.533 ( -0.08%) [ +0.07% +0.04% +0.00% / -0.08% +0.08% -0.06%] index_select random_sorted : Elapsed 0.085 ms (8.546 ms / 100) B = [4, 16, 40, 20] (stride (1, 3200, 80, 4)) A = [4, 16, 5, 20] (stride (1, 20, 4, 320)) dim = 2 1.631 -> 1.635 ( +0.25%) [ +0.12% +0.18% +0.00% / +0.25% +0.86% +0.74%] index_add_ linear : Elapsed 0.016 ms (1.633 ms / 100) 1.579 -> 1.583 ( +0.25%) [ +0.06% +0.06% +0.00% / +0.25% +0.89% +0.76%] index_copy_ linear : Elapsed 0.016 ms (1.580 ms / 100) 1.635 -> 1.635 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.55% +0.73%] index_add_ reverse : Elapsed 0.016 ms (1.635 ms / 100) 1.581 -> 1.582 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.70% +0.63%] index_copy_ reverse : Elapsed 0.016 ms (1.581 ms / 100) 1.632 -> 1.635 ( +0.18%) [ +0.18% +0.18% +0.00% / +0.18% +0.67% +0.74%] index_add_ spread : Elapsed 0.016 ms (1.635 ms / 100) 1.578 -> 1.579 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.76% +0.63%] index_copy_ spread : Elapsed 0.016 ms (1.578 ms / 100) 1.633 -> 1.635 ( +0.12%) [ +0.00% +0.00% +0.12% / +0.12% +0.61% +0.61%] index_add_ strided 3 : Elapsed 0.016 ms (1.633 ms / 100) 1.579 -> 1.581 ( +0.13%) [ +0.19% +0.00% +0.06% / +0.13% +0.70% +0.57%] index_copy_ strided 3 : Elapsed 0.016 ms (1.582 ms / 100) 1.640 -> 1.642 ( +0.12%) [ +0.00% +0.12% +0.12% / +0.12% +0.61% +0.67%] index_add_ strided 7 : Elapsed 0.016 ms (1.640 ms / 100) 1.583 -> 1.591 ( +0.51%) [ +0.06% +0.00% +0.13% / +0.51% +0.95% +0.76%] index_copy_ strided 7 : Elapsed 0.016 ms (1.584 ms / 100) 1.639 -> 1.639 ( +0.00%) [ +0.31% +0.37% +0.00% / +0.00% +0.73% +0.61%] index_add_ perm : Elapsed 0.016 ms (1.644 ms / 100) 1.583 -> 1.584 ( +0.06%) [ +0.06% +0.19% +0.00% / +0.06% +0.88% +0.76%] index_copy_ perm : Elapsed 0.016 ms (1.584 ms / 100) 1.632 -> 1.635 ( +0.18%) [ +0.18% +0.00% +0.18% / +0.18% +0.80% +0.74%] index_add_ perm_sorted : Elapsed 0.016 ms (1.635 ms / 100) 1.580 -> 1.583 ( +0.19%) [ +0.06% +0.00% +0.00% / +0.19% +0.76% +0.63%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.581 ms / 100) 8.512 -> 8.531 ( +0.22%) [ +0.32% +0.47% +0.00% / +0.58% +0.33% +0.22%] index_select const : Elapsed 0.085 ms (8.539 ms / 100) 8.530 -> 8.542 ( +0.14%) [ +0.00% +0.34% +0.19% / +0.14% +0.42% +0.27%] index_select wrap : Elapsed 0.085 ms (8.530 ms / 100) 8.527 -> 8.542 ( +0.18%) [ +0.09% +0.04% +0.00% / +0.18% +0.32% +0.48%] index_select linear : Elapsed 0.085 ms (8.535 ms / 100) 8.521 -> 8.547 ( +0.31%) [ +0.07% +0.15% +0.00% / +0.31% +0.63% +0.34%] index_select reverse : Elapsed 0.085 ms (8.527 ms / 100) 8.512 -> 8.533 ( +0.25%) [ +0.40% +0.00% +0.18% / +0.25% +0.49% +0.55%] index_select skip64 : Elapsed 0.085 ms (8.546 ms / 100) 8.522 -> 8.534 ( +0.14%) [ +0.18% +0.00% +0.19% / +0.14% +0.20% +0.45%] index_select skip256 : Elapsed 0.085 ms (8.537 ms / 100) 8.536 -> 8.548 ( +0.14%) [ +0.27% +0.01% +0.00% / +0.14% +0.41% +0.33%] index_select spread : Elapsed 0.086 ms (8.559 ms / 100) 8.517 -> 8.548 ( +0.36%) [ +0.00% +0.18% +0.21% / +0.36% +0.69% +0.58%] index_select strided 3 : Elapsed 0.085 ms (8.517 ms / 100) 8.524 -> 8.536 ( +0.14%) [ +0.12% +0.04% +0.00% / +0.14% +0.36% +0.43%] index_select random : Elapsed 0.085 ms (8.534 ms / 100) 8.540 -> 8.532 ( -0.09%) [ +0.00% +0.12% +0.14% / -0.09% +0.08% +0.30%] index_select random_sorted : Elapsed 0.085 ms (8.540 ms / 100) B = [4, 16, 40, 20] (stride (1, 3200, 4, 160)) A = [4, 16, 5, 20] (stride (16, 1, 1280, 64)) dim = 2 1.518 -> 1.520 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.40% +0.40%] index_add_ linear : Elapsed 0.015 ms (1.519 ms / 100) 1.469 -> 1.470 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.68% +0.61%] index_copy_ linear : Elapsed 0.015 ms (1.469 ms / 100) 1.513 -> 1.514 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.40% +0.33%] index_add_ reverse : Elapsed 0.015 ms (1.513 ms / 100) 1.465 -> 1.468 ( +0.20%) [ +0.00% +0.07% +0.07% / +0.20% +0.34% +0.34%] index_copy_ reverse : Elapsed 0.015 ms (1.465 ms / 100) 1.506 -> 1.506 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.46% +0.40%] index_add_ spread : Elapsed 0.015 ms (1.506 ms / 100) 1.453 -> 1.454 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.41% +0.55%] index_copy_ spread : Elapsed 0.015 ms (1.454 ms / 100) 1.505 -> 1.505 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.53% +0.47%] index_add_ strided 3 : Elapsed 0.015 ms (1.506 ms / 100) 1.452 -> 1.453 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.55% +0.62%] index_copy_ strided 3 : Elapsed 0.015 ms (1.452 ms / 100) 1.511 -> 1.512 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.53% +0.53%] index_add_ strided 7 : Elapsed 0.015 ms (1.512 ms / 100) 1.463 -> 1.467 ( +0.27%) [ +0.14% +0.21% +0.00% / +0.27% +0.62% +0.62%] index_copy_ strided 7 : Elapsed 0.015 ms (1.465 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.46% +0.46%] index_add_ perm : Elapsed 0.015 ms (1.519 ms / 100) 1.469 -> 1.469 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.54% +0.54%] index_copy_ perm : Elapsed 0.015 ms (1.469 ms / 100) 1.512 -> 1.513 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.46% +0.53%] index_add_ perm_sorted : Elapsed 0.015 ms (1.514 ms / 100) 1.465 -> 1.465 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.41% +0.48%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.465 ms / 100) 8.223 -> 8.218 ( -0.06%) [ +0.06% +0.00% +0.15% / -0.06% +0.00% +0.01%] index_select const : Elapsed 0.082 ms (8.228 ms / 100) 8.235 -> 8.235 ( +0.00%) [ +0.07% +0.02% +0.00% / +0.07% +0.00% +0.18%] index_select wrap : Elapsed 0.082 ms (8.241 ms / 100) 8.234 -> 8.227 ( -0.09%) [ +0.22% +0.13% +0.00% / -0.09% +0.34% +0.04%] index_select linear : Elapsed 0.083 ms (8.252 ms / 100) 8.230 -> 8.216 ( -0.17%) [ +0.07% +0.09% +0.00% / -0.17% +0.32% -0.01%] index_select reverse : Elapsed 0.082 ms (8.236 ms / 100) 8.206 -> 8.223 ( +0.21%) [ +0.39% +0.19% +0.00% / +0.21% +0.27% +0.26%] index_select skip64 : Elapsed 0.082 ms (8.238 ms / 100) 8.213 -> 8.209 ( -0.05%) [ +0.02% +0.00% +0.13% / -0.05% +0.12% +0.10%] index_select skip256 : Elapsed 0.082 ms (8.215 ms / 100) 8.226 -> 8.216 ( -0.12%) [ +0.00% +0.24% +0.10% / -0.12% +0.34% +0.06%] index_select spread : Elapsed 0.082 ms (8.226 ms / 100) 8.234 -> 8.227 ( -0.09%) [ +0.13% +0.00% +0.01% / -0.09% +0.09% +0.06%] index_select strided 3 : Elapsed 0.082 ms (8.245 ms / 100) 8.232 -> 8.236 ( +0.05%) [ +0.00% +0.11% +0.34% / +0.38% +0.05% +0.18%] index_select random : Elapsed 0.082 ms (8.232 ms / 100) 8.224 -> 8.237 ( +0.16%) [ +0.00% +0.24% +0.06% / +0.17% +0.30% +0.16%] index_select random_sorted : Elapsed 0.082 ms (8.224 ms / 100) out_shape = [4, 16, 5, 40] in_shape = [4, 16, 5, 20] idx_dim = 3 B = [4, 16, 5, 40] (stride (3200, 200, 1, 5)) A = [4, 16, 5, 20] (stride (80, 1, 16, 320)) dim = 3 2.398 -> 2.411 ( +0.54%) [ +0.29% +0.00% +0.17% / +0.54% +1.25% +0.75%] index_add_ linear : Elapsed 0.024 ms (2.405 ms / 100) 2.388 -> 2.404 ( +0.67%) [ +0.00% +0.17% +0.17% / +0.67% +0.88% +1.05%] index_copy_ linear : Elapsed 0.024 ms (2.388 ms / 100) 2.402 -> 2.413 ( +0.46%) [ +0.04% +0.29% +0.00% / +0.46% +0.75% +0.54%] index_add_ reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.395 -> 2.404 ( +0.38%) [ +0.04% +0.08% +0.00% / +0.38% +1.09% +0.71%] index_copy_ reverse : Elapsed 0.024 ms (2.396 ms / 100) 2.421 -> 2.430 ( +0.37%) [ +0.00% +0.25% +0.12% / +0.58% +0.37% +0.41%] index_add_ spread : Elapsed 0.024 ms (2.421 ms / 100) 2.423 -> 2.441 ( +0.74%) [ +0.25% +0.00% +0.25% / +0.74% +0.74% +0.78%] index_copy_ spread : Elapsed 0.024 ms (2.429 ms / 100) 2.416 -> 2.429 ( +0.54%) [ +0.25% +0.12% +0.00% / +0.66% +0.58% +0.54%] index_add_ strided 3 : Elapsed 0.024 ms (2.422 ms / 100) 2.420 -> 2.434 ( +0.58%) [ +0.00% +0.04% +0.04% / +0.66% +0.58% +0.79%] index_copy_ strided 3 : Elapsed 0.024 ms (2.420 ms / 100) 2.416 -> 2.427 ( +0.46%) [ +0.00% +0.17% +0.04% / +0.50% +0.50% +0.46%] index_add_ strided 7 : Elapsed 0.024 ms (2.416 ms / 100) 2.419 -> 2.433 ( +0.58%) [ +0.08% +0.04% +0.00% / +0.66% +0.83% +0.58%] index_copy_ strided 7 : Elapsed 0.024 ms (2.421 ms / 100) 2.410 -> 2.420 ( +0.41%) [ +0.04% +0.17% +0.00% / +0.75% +0.50% +0.41%] index_add_ perm : Elapsed 0.024 ms (2.411 ms / 100) 2.413 -> 2.426 ( +0.54%) [ +0.08% +0.08% +0.00% / +0.62% +0.70% +0.54%] index_copy_ perm : Elapsed 0.024 ms (2.415 ms / 100) 2.412 -> 2.422 ( +0.41%) [ +0.08% +0.04% +0.00% / +0.62% +0.46% +0.41%] index_add_ perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.413 -> 2.430 ( +0.70%) [ +0.00% +0.00% +0.12% / +0.75% +0.70% +0.70%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.413 ms / 100) 4.411 -> 4.412 ( +0.02%) [ +0.00% +0.09% +0.23% / +0.02% +0.11% +0.07%] index_select const : Elapsed 0.044 ms (4.411 ms / 100) 4.421 -> 4.425 ( +0.09%) [ +0.02% +0.09% +0.00% / +0.29% +0.32% +0.09%] index_select wrap : Elapsed 0.044 ms (4.422 ms / 100) 4.418 -> 4.423 ( +0.11%) [ +0.11% +0.09% +0.00% / +0.18% +0.11% +0.16%] index_select linear : Elapsed 0.044 ms (4.423 ms / 100) 4.417 -> 4.419 ( +0.05%) [ +0.00% +0.09% +0.20% / +0.09% +0.20% +0.05%] index_select reverse : Elapsed 0.044 ms (4.417 ms / 100) 4.412 -> 4.406 ( -0.14%) [ +0.00% +0.02% +0.09% / -0.14% +0.02% +0.11%] index_select skip64 : Elapsed 0.044 ms (4.412 ms / 100) 4.408 -> 4.412 ( +0.09%) [ +0.14% +0.00% +0.05% / +0.16% +0.09% +0.23%] index_select skip256 : Elapsed 0.044 ms (4.414 ms / 100) 4.417 -> 4.420 ( +0.07%) [ +0.05% +0.00% +0.11% / +0.09% +0.07% +0.32%] index_select spread : Elapsed 0.044 ms (4.419 ms / 100) 4.419 -> 4.421 ( +0.05%) [ +0.00% +0.00% +0.09% / +0.05% +0.16% +0.11%] index_select strided 3 : Elapsed 0.044 ms (4.419 ms / 100) 4.416 -> 4.414 ( -0.05%) [ +0.00% +0.14% +0.05% / +0.00% -0.05% +0.00%] index_select strided 5 : Elapsed 0.044 ms (4.416 ms / 100) 4.419 -> 4.423 ( +0.09%) [ +0.09% +0.14% +0.00% / +0.09% +0.18% +0.16%] index_select strided 7 : Elapsed 0.044 ms (4.423 ms / 100) 4.410 -> 4.414 ( +0.09%) [ +0.02% +0.00% +0.14% / +0.14% +0.09% +0.23%] index_select strided 8 : Elapsed 0.044 ms (4.411 ms / 100) 4.409 -> 4.410 ( +0.02%) [ +0.09% +0.00% +0.07% / +0.02% +0.27% +0.11%] index_select strided 16 : Elapsed 0.044 ms (4.413 ms / 100) 4.415 -> 4.422 ( +0.16%) [ +0.14% +0.00% +0.14% / +0.16% +0.32% +0.23%] index_select random : Elapsed 0.044 ms (4.421 ms / 100) 4.411 -> 4.418 ( +0.16%) [ +0.25% +0.18% +0.00% / +0.23% +0.16% +0.23%] index_select random_sorted : Elapsed 0.044 ms (4.422 ms / 100) B = [4, 16, 5, 40] (stride (3200, 1, 640, 16)) A = [4, 16, 5, 20] (stride (1600, 100, 1, 5)) dim = 3 2.412 -> 2.427 ( +0.62%) [ +0.04% +0.08% +0.00% / +0.62% +0.75% +0.87%] index_add_ linear : Elapsed 0.024 ms (2.413 ms / 100) 2.408 -> 2.420 ( +0.50%) [ +0.08% +0.00% +0.25% / +0.50% +0.66% +0.58%] index_copy_ linear : Elapsed 0.024 ms (2.410 ms / 100) 2.404 -> 2.420 ( +0.67%) [ +0.00% +0.04% +0.12% / +0.67% +1.16% +1.29%] index_add_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.402 -> 2.425 ( +0.96%) [ +0.04% +0.04% +0.00% / +1.25% +0.96% +1.29%] index_copy_ reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.406 -> 2.426 ( +0.83%) [ +0.21% +0.33% +0.00% / +0.83% +1.04% +1.16%] index_add_ spread : Elapsed 0.024 ms (2.411 ms / 100) 2.405 -> 2.414 ( +0.37%) [ +0.00% +0.00% +0.00% / +0.37% +1.04% +1.00%] index_copy_ spread : Elapsed 0.024 ms (2.405 ms / 100) 2.414 -> 2.425 ( +0.46%) [ +0.21% +0.08% +0.00% / +0.58% +0.70% +0.46%] index_add_ strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.407 -> 2.426 ( +0.79%) [ +0.00% +0.08% +0.25% / +0.79% +0.87% +0.79%] index_copy_ strided 3 : Elapsed 0.024 ms (2.407 ms / 100) 2.414 -> 2.428 ( +0.58%) [ +0.00% +0.04% +0.04% / +0.66% +0.70% +0.58%] index_add_ strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.407 -> 2.426 ( +0.79%) [ +0.21% +0.17% +0.00% / +0.87% +0.79% +0.96%] index_copy_ strided 7 : Elapsed 0.024 ms (2.412 ms / 100) 2.414 -> 2.424 ( +0.41%) [ +0.04% +0.00% +0.17% / +1.04% +0.41% +0.50%] index_add_ perm : Elapsed 0.024 ms (2.415 ms / 100) 2.411 -> 2.418 ( +0.29%) [ +0.21% +0.00% +0.12% / +0.58% +0.29% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.416 ms / 100) 2.419 -> 2.420 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.50% +0.04% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) 2.412 -> 2.422 ( +0.41%) [ +0.12% +0.00% +0.04% / +0.50% +1.33% +0.41%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) 4.433 -> 4.434 ( +0.02%) [ +0.02% +0.00% +0.09% / +0.23% +0.05% +0.02%] index_select const : Elapsed 0.044 ms (4.434 ms / 100) 4.440 -> 4.444 ( +0.09%) [ +0.11% +0.16% +0.00% / +0.14% +0.16% +0.09%] index_select wrap : Elapsed 0.044 ms (4.445 ms / 100) 4.440 -> 4.443 ( +0.07%) [ +0.09% +0.00% +0.16% / +0.25% +0.27% +0.07%] index_select linear : Elapsed 0.044 ms (4.444 ms / 100) 4.444 -> 4.440 ( -0.09%) [ +0.07% +0.20% +0.00% / +0.05% -0.09% -0.07%] index_select reverse : Elapsed 0.044 ms (4.447 ms / 100) 4.427 -> 4.431 ( +0.09%) [ +0.27% +0.00% +0.32% / +0.27% +0.09% +0.23%] index_select skip64 : Elapsed 0.044 ms (4.439 ms / 100) 4.432 -> 4.438 ( +0.14%) [ +0.09% +0.11% +0.00% / +0.14% +0.16% +0.18%] index_select skip256 : Elapsed 0.044 ms (4.436 ms / 100) 4.441 -> 4.437 ( -0.09%) [ +0.09% +0.07% +0.00% / -0.09% +0.18% +0.25%] index_select spread : Elapsed 0.044 ms (4.445 ms / 100) 4.439 -> 4.446 ( +0.16%) [ +0.02% +0.07% +0.00% / +0.16% +0.20% +0.18%] index_select strided 3 : Elapsed 0.044 ms (4.440 ms / 100) 4.434 -> 4.437 ( +0.07%) [ +0.00% +0.07% +0.02% / +0.07% +0.27% +0.18%] index_select strided 5 : Elapsed 0.044 ms (4.434 ms / 100) 4.433 -> 4.443 ( +0.23%) [ +0.23% +0.00% +0.20% / +0.23% +0.43% +0.36%] index_select strided 7 : Elapsed 0.044 ms (4.443 ms / 100) 4.439 -> 4.433 ( -0.14%) [ +0.14% +0.00% +0.05% / -0.14% +0.02% +0.07%] index_select strided 8 : Elapsed 0.044 ms (4.445 ms / 100) 4.436 -> 4.436 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.07% +0.00%] index_select strided 16 : Elapsed 0.044 ms (4.442 ms / 100) 4.443 -> 4.443 ( +0.00%) [ +0.00% +0.20% +0.02% / +0.11% +0.00% +0.07%] index_select random : Elapsed 0.044 ms (4.443 ms / 100) 4.444 -> 4.443 ( -0.02%) [ +0.00% +0.05% +0.09% / -0.02% +0.05% +0.09%] index_select random_sorted : Elapsed 0.044 ms (4.444 ms / 100) B = [4, 16, 5, 40] (stride (5, 20, 1, 320)) A = [4, 16, 5, 20] (stride (5, 400, 1, 20)) dim = 3 2.446 -> 2.458 ( +0.49%) [ +0.08% +0.00% +0.04% / +0.49% +0.78% +0.65%] index_add_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.443 -> 2.456 ( +0.53%) [ +0.00% +0.20% +0.12% / +0.53% +0.78% +0.82%] index_copy_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.449 -> 2.460 ( +0.45%) [ +0.16% +0.08% +0.00% / +0.57% +0.61% +0.45%] index_add_ reverse : Elapsed 0.025 ms (2.453 ms / 100) 2.449 -> 2.460 ( +0.45%) [ +0.00% +0.08% +0.00% / +0.49% +0.69% +0.45%] index_copy_ reverse : Elapsed 0.024 ms (2.449 ms / 100) 2.450 -> 2.461 ( +0.45%) [ +0.16% +0.00% +0.04% / +0.53% +0.45% +0.53%] index_add_ spread : Elapsed 0.025 ms (2.454 ms / 100) 2.446 -> 2.462 ( +0.65%) [ +0.16% +0.25% +0.00% / +0.65% +0.70% +0.78%] index_copy_ spread : Elapsed 0.025 ms (2.450 ms / 100) 2.448 -> 2.460 ( +0.49%) [ +0.20% +0.29% +0.00% / +0.74% +0.49% +0.49%] index_add_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.448 -> 2.460 ( +0.49%) [ +0.04% +0.00% +0.04% / +0.57% +0.49% +0.53%] index_copy_ strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.452 -> 2.461 ( +0.37%) [ +0.00% +0.00% +0.08% / +0.37% +0.57% +0.45%] index_add_ strided 7 : Elapsed 0.025 ms (2.452 ms / 100) 2.446 -> 2.455 ( +0.37%) [ +0.20% +0.00% +0.08% / +0.37% +0.53% +0.82%] index_copy_ strided 7 : Elapsed 0.025 ms (2.451 ms / 100) 2.448 -> 2.460 ( +0.49%) [ +0.12% +0.29% +0.00% / +0.49% +0.82% +0.69%] index_add_ perm : Elapsed 0.025 ms (2.451 ms / 100) 2.445 -> 2.457 ( +0.49%) [ +0.12% +0.08% +0.00% / +0.49% +1.10% +0.78%] index_copy_ perm : Elapsed 0.024 ms (2.448 ms / 100) 2.448 -> 2.461 ( +0.53%) [ +0.16% +0.00% +0.04% / +0.53% +0.61% +0.74%] index_add_ perm_sorted : Elapsed 0.025 ms (2.452 ms / 100) 2.446 -> 2.458 ( +0.49%) [ +0.00% +0.00% +0.00% / +0.49% +0.78% +0.61%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.446 ms / 100) 4.493 -> 4.489 ( -0.09%) [ +0.00% +0.04% +0.07% / +0.02% -0.09% -0.09%] index_select const : Elapsed 0.045 ms (4.493 ms / 100) 4.499 -> 4.500 ( +0.02%) [ +0.04% +0.00% +0.00% / +0.02% +0.09% +0.13%] index_select wrap : Elapsed 0.045 ms (4.501 ms / 100) 4.502 -> 4.499 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.20% +0.16%] index_select linear : Elapsed 0.045 ms (4.502 ms / 100) 4.500 -> 4.506 ( +0.13%) [ +0.04% +0.04% +0.00% / +0.13% +0.22% +0.33%] index_select reverse : Elapsed 0.045 ms (4.502 ms / 100) 4.495 -> 4.491 ( -0.09%) [ +0.02% +0.04% +0.00% / -0.07% -0.09% -0.07%] index_select skip64 : Elapsed 0.045 ms (4.496 ms / 100) 4.489 -> 4.494 ( +0.11%) [ +0.20% +0.16% +0.00% / +0.18% +0.11% +0.16%] index_select skip256 : Elapsed 0.045 ms (4.498 ms / 100) 4.500 -> 4.501 ( +0.02%) [ +0.00% +0.11% +0.04% / +0.02% +0.07% +0.18%] index_select spread : Elapsed 0.045 ms (4.500 ms / 100) 4.495 -> 4.499 ( +0.09%) [ +0.09% +0.11% +0.00% / +0.09% +0.33% +0.29%] index_select strided 3 : Elapsed 0.045 ms (4.499 ms / 100) 4.493 -> 4.498 ( +0.11%) [ +0.00% +0.04% +0.04% / +0.11% +0.16% +0.16%] index_select strided 5 : Elapsed 0.045 ms (4.493 ms / 100) 4.498 -> 4.505 ( +0.16%) [ +0.07% +0.00% +0.02% / +0.18% +0.24% +0.16%] index_select strided 7 : Elapsed 0.045 ms (4.501 ms / 100) 4.487 -> 4.496 ( +0.20%) [ +0.00% +0.22% +0.20% / +0.20% +0.27% +0.40%] index_select strided 8 : Elapsed 0.045 ms (4.487 ms / 100) 4.495 -> 4.489 ( -0.13%) [ +0.02% +0.00% +0.02% / -0.13% +0.16% +0.04%] index_select strided 16 : Elapsed 0.045 ms (4.496 ms / 100) 4.496 -> 4.510 ( +0.31%) [ +0.31% +0.11% +0.00% / +0.31% +0.33% +0.40%] index_select random : Elapsed 0.045 ms (4.510 ms / 100) 4.496 -> 4.503 ( +0.16%) [ +0.00% +0.13% +0.16% / +0.22% +0.16% +0.31%] index_select random_sorted : Elapsed 0.045 ms (4.496 ms / 100) out_shape = [40, 16, 20, 5] in_shape = [4, 16, 20, 5] idx_dim = 0 B = [40, 16, 20, 5] (stride (100, 4000, 5, 1)) A = [4, 16, 20, 5] (stride (5, 400, 20, 1)) dim = 0 0.568 -> 0.568 ( +0.00%) [ +0.18% +0.18% +0.00% / +0.00% +0.35% +0.53%] index_add_ linear : Elapsed 0.006 ms (0.569 ms / 100) 0.554 -> 0.554 ( +0.00%) [ +0.18% +0.00% +0.18% / +0.18% +0.00% +0.36%] index_copy_ linear : Elapsed 0.006 ms (0.555 ms / 100) 0.567 -> 0.566 ( -0.18%) [ +0.00% +1.06% +0.00% / -0.18% +1.41% +1.06%] index_add_ reverse : Elapsed 0.006 ms (0.567 ms / 100) 0.552 -> 0.553 ( +0.18%) [ +0.00% +0.18% +0.00% / +0.18% +1.99% +0.91%] index_copy_ reverse : Elapsed 0.006 ms (0.552 ms / 100) 0.566 -> 0.568 ( +0.35%) [ +0.18% +1.06% +0.00% / +0.35% +1.06% +1.06%] index_add_ spread : Elapsed 0.006 ms (0.567 ms / 100) 0.553 -> 0.556 ( +0.54%) [ +0.00% +0.18% +0.00% / +2.17% +0.54% +0.72%] index_copy_ spread : Elapsed 0.006 ms (0.553 ms / 100) 0.567 -> 0.568 ( +0.18%) [ +0.18% +0.18% +0.00% / +0.18% +1.23% +1.06%] index_add_ strided 3 : Elapsed 0.006 ms (0.568 ms / 100) 0.553 -> 0.555 ( +0.36%) [ +0.00% +0.00% +0.00% / +0.36% +0.72% +0.72%] index_copy_ strided 3 : Elapsed 0.006 ms (0.553 ms / 100) 0.566 -> 0.573 ( +1.24%) [ +0.35% +0.88% +0.00% / +3.00% +1.41% +1.24%] index_add_ strided 7 : Elapsed 0.006 ms (0.568 ms / 100) 0.552 -> 0.553 ( +0.18%) [ +4.35% +0.36% +0.00% / +0.18% +0.91% +0.91%] index_copy_ strided 7 : Elapsed 0.006 ms (0.576 ms / 100) 0.566 -> 0.568 ( +0.35%) [ +0.18% +0.18% +0.00% / +0.35% +1.06% +0.53%] index_add_ perm : Elapsed 0.006 ms (0.567 ms / 100) 0.553 -> 0.553 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +0.54% +0.36%] index_copy_ perm : Elapsed 0.006 ms (0.554 ms / 100) 0.568 -> 0.568 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.35% +0.35%] index_add_ perm_sorted : Elapsed 0.006 ms (0.568 ms / 100) 0.553 -> 0.554 ( +0.18%) [ +0.00% +0.18% +0.18% / +0.18% +0.18% +0.36%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.553 ms / 100) 4.989 -> 4.988 ( -0.02%) [ +0.10% +0.00% +0.14% / +0.00% -0.02% +0.16%] index_select const : Elapsed 0.050 ms (4.994 ms / 100) 4.985 -> 4.991 ( +0.12%) [ +0.00% +0.24% +0.08% / +0.16% +0.18% +0.12%] index_select wrap : Elapsed 0.050 ms (4.985 ms / 100) 4.991 -> 4.990 ( -0.02%) [ +0.38% +0.16% +0.00% / +0.12% +0.10% -0.02%] index_select linear : Elapsed 0.050 ms (5.010 ms / 100) 4.979 -> 4.977 ( -0.04%) [ +0.12% +0.00% +0.24% / -0.04% +0.36% +0.30%] index_select reverse : Elapsed 0.050 ms (4.985 ms / 100) 4.983 -> 4.978 ( -0.10%) [ +0.06% +0.00% +0.04% / -0.10% +0.20% +0.20%] index_select skip64 : Elapsed 0.050 ms (4.986 ms / 100) 4.977 -> 4.991 ( +0.28%) [ +0.08% +0.08% +0.00% / +0.30% +0.46% +0.28%] index_select skip256 : Elapsed 0.050 ms (4.981 ms / 100) 4.984 -> 4.992 ( +0.16%) [ +0.00% +0.22% +0.10% / +0.16% +0.38% +0.62%] index_select spread : Elapsed 0.050 ms (4.984 ms / 100) 4.985 -> 4.999 ( +0.28%) [ +0.00% +0.14% +0.18% / +0.30% +0.28% +0.32%] index_select strided 3 : Elapsed 0.050 ms (4.985 ms / 100) 4.990 -> 4.992 ( +0.04%) [ +0.18% +0.08% +0.00% / +0.04% +0.06% +0.10%] index_select random : Elapsed 0.050 ms (4.999 ms / 100) 4.983 -> 5.001 ( +0.36%) [ +0.00% +0.12% +0.18% / +0.36% +0.44% +0.40%] index_select random_sorted : Elapsed 0.050 ms (4.983 ms / 100) B = [40, 16, 20, 5] (stride (80, 5, 3200, 1)) A = [4, 16, 20, 5] (stride (1600, 1, 16, 320)) dim = 0 1.345 -> 1.351 ( +0.45%) [ +0.30% +0.00% +0.07% / +0.45% +0.67% +0.89%] index_add_ linear : Elapsed 0.013 ms (1.349 ms / 100) 1.309 -> 1.315 ( +0.46%) [ +0.23% +0.00% +0.08% / +0.46% +0.53% +0.69%] index_copy_ linear : Elapsed 0.013 ms (1.312 ms / 100) 1.348 -> 1.356 ( +0.59%) [ +0.37% +0.45% +0.00% / +0.82% +0.59% +0.67%] index_add_ reverse : Elapsed 0.014 ms (1.353 ms / 100) 1.313 -> 1.316 ( +0.23%) [ +0.23% +0.23% +0.00% / +0.23% +0.38% +0.38%] index_copy_ reverse : Elapsed 0.013 ms (1.316 ms / 100) 1.356 -> 1.353 ( -0.22%) [ +0.00% +0.00% +0.00% / -0.22% +0.29% +0.22%] index_add_ spread : Elapsed 0.014 ms (1.356 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.38% +0.23%] index_copy_ spread : Elapsed 0.013 ms (1.317 ms / 100) 1.353 -> 1.356 ( +0.22%) [ +0.00% +0.22% +0.15% / +0.22% +0.44% +0.37%] index_add_ strided 3 : Elapsed 0.014 ms (1.353 ms / 100) 1.317 -> 1.320 ( +0.23%) [ +0.00% +0.15% +0.08% / +0.23% +0.53% +0.46%] index_copy_ strided 3 : Elapsed 0.013 ms (1.317 ms / 100) 1.349 -> 1.350 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.37% +0.52%] index_add_ strided 7 : Elapsed 0.013 ms (1.350 ms / 100) 1.314 -> 1.314 ( +0.00%) [ +0.23% +0.08% +0.00% / +0.00% +0.15% +0.15%] index_copy_ strided 7 : Elapsed 0.013 ms (1.317 ms / 100) 1.352 -> 1.356 ( +0.30%) [ +0.22% +0.37% +0.00% / +0.30% +0.52% +0.59%] index_add_ perm : Elapsed 0.014 ms (1.355 ms / 100) 1.315 -> 1.317 ( +0.15%) [ +0.08% +0.15% +0.00% / +0.15% +0.30% +0.46%] index_copy_ perm : Elapsed 0.013 ms (1.316 ms / 100) 1.349 -> 1.350 ( +0.07%) [ +0.22% +0.15% +0.00% / +0.07% +0.44% +0.52%] index_add_ perm_sorted : Elapsed 0.014 ms (1.352 ms / 100) 1.314 -> 1.312 ( -0.15%) [ +0.00% +0.23% +0.08% / -0.15% +0.30% +0.30%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.314 ms / 100) 9.155 -> 9.168 ( +0.14%) [ +0.00% +0.09% +0.07% / +0.22% +0.14% +0.28%] index_select const : Elapsed 0.092 ms (9.155 ms / 100) 9.179 -> 9.181 ( +0.02%) [ +0.31% +0.04% +0.00% / +0.02% +0.12% +0.04%] index_select wrap : Elapsed 0.092 ms (9.207 ms / 100) 9.174 -> 9.170 ( -0.04%) [ +0.00% +0.09% +0.14% / -0.04% +0.09% +0.09%] index_select linear : Elapsed 0.092 ms (9.174 ms / 100) 9.167 -> 9.172 ( +0.05%) [ +0.00% +0.01% +0.01% / +0.13% +0.05% +0.15%] index_select reverse : Elapsed 0.092 ms (9.167 ms / 100) 9.142 -> 9.165 ( +0.25%) [ +0.10% +0.00% +0.18% / +0.26% +0.30% +0.25%] index_select skip64 : Elapsed 0.092 ms (9.151 ms / 100) 9.154 -> 9.167 ( +0.14%) [ +0.12% +0.02% +0.00% / +0.19% +0.21% +0.14%] index_select skip256 : Elapsed 0.092 ms (9.165 ms / 100) 9.182 -> 9.175 ( -0.08%) [ +0.00% +0.13% +0.00% / -0.07% +0.08% -0.08%] index_select spread : Elapsed 0.092 ms (9.182 ms / 100) 9.181 -> 9.178 ( -0.03%) [ +0.00% +0.10% +0.04% / +0.10% +0.26% -0.03%] index_select strided 3 : Elapsed 0.092 ms (9.181 ms / 100) 9.174 -> 9.186 ( +0.13%) [ +0.00% +0.05% +0.05% / +0.13% +0.23% +0.13%] index_select random : Elapsed 0.092 ms (9.174 ms / 100) 9.171 -> 9.183 ( +0.13%) [ +0.11% +0.28% +0.00% / +0.21% +0.27% +0.13%] index_select random_sorted : Elapsed 0.092 ms (9.181 ms / 100) B = [40, 16, 20, 5] (stride (80, 1, 3200, 16)) A = [4, 16, 20, 5] (stride (1600, 1, 16, 320)) dim = 0 1.350 -> 1.351 ( +0.07%) [ +0.15% +0.00% +0.22% / +0.07% +0.52% +0.74%] index_add_ linear : Elapsed 0.014 ms (1.352 ms / 100) 1.315 -> 1.318 ( +0.23%) [ +0.15% +0.08% +0.00% / +0.23% +0.53% +0.76%] index_copy_ linear : Elapsed 0.013 ms (1.317 ms / 100) 1.357 -> 1.359 ( +0.15%) [ +0.44% +0.22% +0.00% / +0.15% +0.66% +0.66%] index_add_ reverse : Elapsed 0.014 ms (1.363 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.30% +0.08% +0.00% / +0.00% +0.68% +0.76%] index_copy_ reverse : Elapsed 0.013 ms (1.320 ms / 100) 1.352 -> 1.354 ( +0.15%) [ +0.00% +0.22% +0.15% / +0.15% +0.44% +0.44%] index_add_ spread : Elapsed 0.014 ms (1.352 ms / 100) 1.316 -> 1.318 ( +0.15%) [ +0.00% +0.08% +0.00% / +0.15% +0.30% +0.15%] index_copy_ spread : Elapsed 0.013 ms (1.316 ms / 100) 1.350 -> 1.351 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.59% +0.52%] index_add_ strided 3 : Elapsed 0.014 ms (1.351 ms / 100) 1.315 -> 1.316 ( +0.08%) [ +0.00% +0.15% +0.15% / +0.08% +0.46% +0.30%] index_copy_ strided 3 : Elapsed 0.013 ms (1.315 ms / 100) 1.350 -> 1.351 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.07% +0.52% +0.81%] index_add_ strided 7 : Elapsed 0.014 ms (1.350 ms / 100) 1.315 -> 1.315 ( +0.00%) [ +0.08% +0.00% +0.15% / +0.00% +0.30% +0.53%] index_copy_ strided 7 : Elapsed 0.013 ms (1.316 ms / 100) 1.350 -> 1.353 ( +0.22%) [ +0.00% +0.00% +0.07% / +0.22% +0.59% +0.67%] index_add_ perm : Elapsed 0.014 ms (1.350 ms / 100) 1.310 -> 1.317 ( +0.53%) [ +0.53% +0.53% +0.00% / +0.53% +0.61% +0.92%] index_copy_ perm : Elapsed 0.013 ms (1.317 ms / 100) 1.357 -> 1.359 ( +0.15%) [ +0.00% +0.37% +0.00% / +0.15% +0.81% +0.66%] index_add_ perm_sorted : Elapsed 0.014 ms (1.357 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.76% +0.53%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.316 ms / 100) 9.170 -> 9.172 ( +0.02%) [ +0.21% +0.00% +0.40% / +0.11% +0.02% +0.23%] index_select const : Elapsed 0.092 ms (9.189 ms / 100) 9.191 -> 9.186 ( -0.05%) [ +0.03% +0.05% +0.00% / +0.03% -0.05% +0.17%] index_select wrap : Elapsed 0.092 ms (9.194 ms / 100) 9.180 -> 9.186 ( +0.07%) [ +0.00% +0.15% +0.29% / +0.07% +0.07% +0.08%] index_select linear : Elapsed 0.092 ms (9.180 ms / 100) 9.187 -> 9.185 ( -0.02%) [ +0.00% +0.14% +0.11% / -0.02% +0.29% +0.08%] index_select reverse : Elapsed 0.092 ms (9.187 ms / 100) 9.181 -> 9.168 ( -0.14%) [ +0.11% +0.21% +0.00% / -0.14% +0.00% +0.14%] index_select skip64 : Elapsed 0.092 ms (9.191 ms / 100) 9.164 -> 9.182 ( +0.20%) [ +0.04% +0.04% +0.00% / +0.36% +0.20% +0.31%] index_select skip256 : Elapsed 0.092 ms (9.168 ms / 100) 9.195 -> 9.185 ( -0.11%) [ +0.00% +0.03% +0.02% / -0.04% +0.08% -0.11%] index_select spread : Elapsed 0.092 ms (9.195 ms / 100) 9.199 -> 9.188 ( -0.12%) [ +0.01% +0.00% +0.12% / +0.00% -0.12% -0.04%] index_select strided 3 : Elapsed 0.092 ms (9.200 ms / 100) 9.187 -> 9.189 ( +0.02%) [ +0.37% +0.00% +0.20% / +0.11% +0.02% +0.16%] index_select random : Elapsed 0.092 ms (9.221 ms / 100) 9.191 -> 9.179 ( -0.13%) [ +0.35% +0.04% +0.00% / +0.29% -0.13% +0.00%] index_select random_sorted : Elapsed 0.092 ms (9.223 ms / 100) B = [40, 16, 20, 5] (stride (1, 200, 3200, 40)) A = [4, 16, 20, 5] (stride (100, 400, 5, 1)) dim = 0 1.232 -> 1.232 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.57% +0.73%] index_add_ linear : Elapsed 0.012 ms (1.234 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.33% +0.42%] index_copy_ linear : Elapsed 0.012 ms (1.195 ms / 100) 1.232 -> 1.236 ( +0.32%) [ +0.24% +0.08% +0.00% / +0.32% +0.41% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.235 ms / 100) 1.194 -> 1.198 ( +0.34%) [ +0.08% +0.17% +0.00% / +0.42% +0.34% +0.42%] index_copy_ reverse : Elapsed 0.012 ms (1.195 ms / 100) 1.243 -> 1.245 ( +0.16%) [ +0.00% +0.16% +0.00% / +1.69% +0.24% +0.16%] index_add_ spread : Elapsed 0.012 ms (1.243 ms / 100) 1.205 -> 1.206 ( +0.08%) [ +0.08% +0.08% +0.00% / +1.16% +0.08% +0.17%] index_copy_ spread : Elapsed 0.012 ms (1.206 ms / 100) 1.236 -> 1.242 ( +0.49%) [ +0.24% +0.00% +0.00% / +1.13% +0.49% +0.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.239 ms / 100) 1.198 -> 1.205 ( +0.58%) [ +0.00% +0.00% +0.08% / +0.58% +0.67% +0.58%] index_copy_ strided 3 : Elapsed 0.012 ms (1.198 ms / 100) 1.238 -> 1.245 ( +0.57%) [ +0.08% +0.16% +0.00% / +0.73% +0.65% +0.57%] index_add_ strided 7 : Elapsed 0.012 ms (1.239 ms / 100) 1.200 -> 1.207 ( +0.58%) [ +0.08% +0.08% +0.00% / +1.42% +0.58% +0.67%] index_copy_ strided 7 : Elapsed 0.012 ms (1.201 ms / 100) 1.240 -> 1.241 ( +0.08%) [ +0.32% +0.24% +0.00% / +0.65% +0.16% +0.08%] index_add_ perm : Elapsed 0.012 ms (1.244 ms / 100) 1.203 -> 1.204 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.58% +0.08% +0.08%] index_copy_ perm : Elapsed 0.012 ms (1.203 ms / 100) 1.241 -> 1.244 ( +0.24%) [ +0.00% +0.00% +0.00% / +0.81% +0.40% +0.24%] index_add_ perm_sorted : Elapsed 0.012 ms (1.241 ms / 100) 1.202 -> 1.206 ( +0.33%) [ +0.00% +0.00% +0.00% / +0.67% +0.42% +0.33%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.202 ms / 100) 8.796 -> 8.797 ( +0.01%) [ +0.00% +0.13% +0.10% / +0.10% +0.01% +0.08%] index_select const : Elapsed 0.088 ms (8.796 ms / 100) 8.818 -> 8.819 ( +0.01%) [ +0.15% +0.16% +0.00% / +0.31% +0.15% +0.01%] index_select wrap : Elapsed 0.088 ms (8.831 ms / 100) 8.802 -> 8.814 ( +0.14%) [ +0.19% +0.00% +0.15% / +0.33% +0.14% +0.26%] index_select linear : Elapsed 0.088 ms (8.819 ms / 100) 8.810 -> 8.821 ( +0.12%) [ +0.15% +0.00% +0.12% / +0.12% +0.24% +0.45%] index_select reverse : Elapsed 0.088 ms (8.823 ms / 100) 8.795 -> 8.792 ( -0.03%) [ +0.06% +0.00% +0.09% / +0.10% -0.03% +0.10%] index_select skip64 : Elapsed 0.088 ms (8.800 ms / 100) 8.790 -> 8.788 ( -0.02%) [ +0.11% +0.00% +0.00% / +0.18% -0.02% +0.09%] index_select skip256 : Elapsed 0.088 ms (8.800 ms / 100) 8.823 -> 8.802 ( -0.24%) [ +0.17% +0.09% +0.00% / +0.14% +0.08% -0.24%] index_select spread : Elapsed 0.088 ms (8.838 ms / 100) 8.830 -> 8.816 ( -0.16%) [ +0.00% +0.08% +0.18% / +0.02% -0.14% -0.16%] index_select strided 3 : Elapsed 0.088 ms (8.830 ms / 100) 8.828 -> 8.816 ( -0.14%) [ +0.07% +0.00% +0.08% / +0.09% -0.14% -0.03%] index_select random : Elapsed 0.088 ms (8.834 ms / 100) 8.816 -> 8.828 ( +0.14%) [ +0.06% +0.00% +0.07% / +0.14% +0.35% +0.14%] index_select random_sorted : Elapsed 0.088 ms (8.821 ms / 100) B = [40, 16, 20, 5] (stride (1, 200, 3200, 40)) A = [4, 16, 20, 5] (stride (1, 400, 20, 4)) dim = 0 1.152 -> 1.155 ( +0.26%) [ +0.26% +0.09% +0.00% / +0.26% +0.52% +0.52%] index_add_ linear : Elapsed 0.012 ms (1.155 ms / 100) 1.117 -> 1.119 ( +0.18%) [ +0.00% +0.00% +0.00% / +0.18% +0.45% +0.45%] index_copy_ linear : Elapsed 0.011 ms (1.117 ms / 100) 1.153 -> 1.153 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.52% +0.52%] index_add_ reverse : Elapsed 0.012 ms (1.153 ms / 100) 1.117 -> 1.122 ( +0.45%) [ +0.00% +0.09% +0.00% / +0.45% +0.63% +0.45%] index_copy_ reverse : Elapsed 0.011 ms (1.117 ms / 100) 1.162 -> 1.164 ( +0.17%) [ +0.26% +0.26% +0.00% / +0.17% +0.52% +0.34%] index_add_ spread : Elapsed 0.012 ms (1.165 ms / 100) 1.126 -> 1.130 ( +0.36%) [ +0.00% +0.18% +0.09% / +0.98% +0.36% +0.62%] index_copy_ spread : Elapsed 0.011 ms (1.126 ms / 100) 1.153 -> 1.164 ( +0.95%) [ +0.17% +0.26% +0.00% / +1.21% +1.13% +0.95%] index_add_ strided 3 : Elapsed 0.012 ms (1.155 ms / 100) 1.119 -> 1.124 ( +0.45%) [ +0.09% +0.18% +0.00% / +0.45% +0.98% +0.71%] index_copy_ strided 3 : Elapsed 0.011 ms (1.120 ms / 100) 1.157 -> 1.158 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.52% +0.78%] index_add_ strided 7 : Elapsed 0.012 ms (1.158 ms / 100) 1.122 -> 1.122 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.45% +0.53%] index_copy_ strided 7 : Elapsed 0.011 ms (1.123 ms / 100) 1.158 -> 1.159 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.69% +0.43%] index_add_ perm : Elapsed 0.012 ms (1.158 ms / 100) 1.122 -> 1.124 ( +0.18%) [ +0.18% +0.27% +0.00% / +0.18% +0.53% +0.45%] index_copy_ perm : Elapsed 0.011 ms (1.124 ms / 100) 1.159 -> 1.160 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.43% +0.43%] index_add_ perm_sorted : Elapsed 0.012 ms (1.159 ms / 100) 1.122 -> 1.126 ( +0.36%) [ +0.00% +0.27% +0.09% / +0.36% +0.80% +0.45%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.122 ms / 100) 8.369 -> 8.392 ( +0.27%) [ +0.10% +0.02% +0.00% / +0.27% +0.37% +0.30%] index_select const : Elapsed 0.084 ms (8.377 ms / 100) 8.366 -> 8.380 ( +0.17%) [ +0.29% +0.06% +0.00% / +0.22% +0.17% +0.35%] index_select wrap : Elapsed 0.084 ms (8.390 ms / 100) 8.373 -> 8.382 ( +0.11%) [ +0.00% +0.11% +0.02% / +0.19% +0.20% +0.11%] index_select linear : Elapsed 0.084 ms (8.373 ms / 100) 8.377 -> 8.377 ( +0.00%) [ +0.04% +0.06% +0.00% / +0.12% +0.00% +0.02%] index_select reverse : Elapsed 0.084 ms (8.380 ms / 100) 8.374 -> 8.374 ( +0.00%) [ +0.10% +0.00% +0.06% / +0.18% +0.00% +0.19%] index_select skip64 : Elapsed 0.084 ms (8.382 ms / 100) 8.371 -> 8.370 ( -0.01%) [ +0.02% +0.00% +0.01% / +0.22% -0.01% +0.16%] index_select skip256 : Elapsed 0.084 ms (8.373 ms / 100) 8.367 -> 8.379 ( +0.14%) [ +0.10% +0.00% +0.11% / +0.17% +0.16% +0.14%] index_select spread : Elapsed 0.084 ms (8.375 ms / 100) 8.369 -> 8.381 ( +0.14%) [ +0.16% +0.32% +0.00% / +0.30% +0.20% +0.14%] index_select strided 3 : Elapsed 0.084 ms (8.382 ms / 100) 8.381 -> 8.374 ( -0.08%) [ +0.05% +0.00% +0.19% / -0.08% +0.05% +0.07%] index_select random : Elapsed 0.084 ms (8.385 ms / 100) 8.381 -> 8.389 ( +0.10%) [ +0.11% +0.00% +0.02% / +0.13% +0.14% +0.10%] index_select random_sorted : Elapsed 0.084 ms (8.390 ms / 100) B = [40, 16, 20, 5] (stride (1, 200, 3200, 40)) A = [4, 16, 20, 5] (stride (20, 400, 1, 80)) dim = 0 1.317 -> 1.319 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.46% +0.38%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.278 -> 1.281 ( +0.23%) [ +0.00% +0.08% +0.00% / +0.23% +0.55% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.278 ms / 100) 1.317 -> 1.319 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.38% +0.30%] index_add_ reverse : Elapsed 0.013 ms (1.319 ms / 100) 1.279 -> 1.282 ( +0.23%) [ +0.00% +0.00% +0.00% / +0.23% +0.31% +0.39%] index_copy_ reverse : Elapsed 0.013 ms (1.279 ms / 100) 1.326 -> 1.330 ( +0.30%) [ +0.68% +0.45% +0.00% / +0.98% +0.30% +0.60%] index_add_ spread : Elapsed 0.013 ms (1.335 ms / 100) 1.291 -> 1.291 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.46% +0.00% +0.23%] index_copy_ spread : Elapsed 0.013 ms (1.291 ms / 100) 1.318 -> 1.324 ( +0.46%) [ +0.15% +0.08% +0.00% / +0.46% +0.61% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.320 ms / 100) 1.282 -> 1.287 ( +0.39%) [ +0.16% +0.08% +0.00% / +0.94% +0.62% +0.39%] index_copy_ strided 3 : Elapsed 0.013 ms (1.284 ms / 100) 1.320 -> 1.326 ( +0.45%) [ +0.23% +0.23% +0.00% / +0.45% +0.53% +0.61%] index_add_ strided 7 : Elapsed 0.013 ms (1.323 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.23% +0.39%] index_copy_ strided 7 : Elapsed 0.013 ms (1.284 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.38% +0.61%] index_add_ perm : Elapsed 0.013 ms (1.323 ms / 100) 1.282 -> 1.285 ( +0.23%) [ +0.08% +0.08% +0.00% / +0.31% +0.23% +0.47%] index_copy_ perm : Elapsed 0.013 ms (1.283 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.30% +0.30%] index_add_ perm_sorted : Elapsed 0.013 ms (1.323 ms / 100) 1.283 -> 1.283 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.39% +0.16%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.284 ms / 100) 9.243 -> 9.230 ( -0.14%) [ +0.01% +0.13% +0.00% / +0.06% +0.16% -0.14%] index_select const : Elapsed 0.092 ms (9.244 ms / 100) 9.257 -> 9.257 ( +0.00%) [ +0.00% +0.10% +0.04% / +0.23% +0.00% +0.01%] index_select wrap : Elapsed 0.093 ms (9.257 ms / 100) 9.249 -> 9.250 ( +0.01%) [ +0.05% +0.09% +0.00% / +0.04% +0.01% +0.04%] index_select linear : Elapsed 0.093 ms (9.254 ms / 100) 9.245 -> 9.253 ( +0.09%) [ +0.00% +0.09% +0.21% / +0.24% +0.29% +0.09%] index_select reverse : Elapsed 0.092 ms (9.245 ms / 100) 9.230 -> 9.231 ( +0.01%) [ +0.04% +0.00% +0.20% / +0.01% +0.26% +0.17%] index_select skip64 : Elapsed 0.092 ms (9.234 ms / 100) 9.228 -> 9.237 ( +0.10%) [ +0.14% +0.15% +0.00% / +0.14% +0.14% +0.10%] index_select skip256 : Elapsed 0.092 ms (9.241 ms / 100) 9.268 -> 9.267 ( -0.01%) [ +0.08% +0.13% +0.00% / +0.09% -0.01% +0.11%] index_select spread : Elapsed 0.093 ms (9.275 ms / 100) 9.265 -> 9.263 ( -0.02%) [ +0.19% +0.04% +0.00% / +0.22% -0.02% +0.11%] index_select strided 3 : Elapsed 0.093 ms (9.283 ms / 100) 9.263 -> 9.248 ( -0.16%) [ +0.14% +0.13% +0.00% / +0.18% -0.16% +0.39%] index_select random : Elapsed 0.093 ms (9.276 ms / 100) 9.264 -> 9.259 ( -0.05%) [ +0.00% +0.00% +0.08% / -0.05% +0.28% +0.13%] index_select random_sorted : Elapsed 0.093 ms (9.264 ms / 100) B = [40, 16, 20, 5] (stride (1, 800, 40, 12800)) A = [4, 16, 20, 5] (stride (16, 1, 320, 64)) dim = 0 0.595 -> 0.597 ( +0.34%) [ +0.17% +0.34% +0.00% / +2.86% +0.34% +0.67%] index_add_ linear : Elapsed 0.006 ms (0.596 ms / 100) 0.598 -> 0.600 ( +0.33%) [ +0.00% +0.17% +0.00% / +0.67% +0.33% +0.33%] index_copy_ linear : Elapsed 0.006 ms (0.598 ms / 100) 0.588 -> 0.589 ( +0.17%) [ +0.00% +0.17% +0.17% / +0.17% +1.36% +1.02%] index_add_ reverse : Elapsed 0.006 ms (0.588 ms / 100) 0.598 -> 0.598 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.84% +0.50%] index_copy_ reverse : Elapsed 0.006 ms (0.598 ms / 100) 0.588 -> 0.589 ( +0.17%) [ +0.17% +0.34% +0.00% / +0.17% +0.85% +0.85%] index_add_ spread : Elapsed 0.006 ms (0.589 ms / 100) 0.598 -> 0.600 ( +0.33%) [ +0.00% +0.00% +0.17% / +0.33% +0.67% +0.67%] index_copy_ spread : Elapsed 0.006 ms (0.598 ms / 100) 0.592 -> 0.594 ( +0.34%) [ +0.00% +0.68% +0.00% / +0.34% +1.69% +1.18%] index_add_ strided 3 : Elapsed 0.006 ms (0.592 ms / 100) 0.598 -> 0.598 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.67% +0.50%] index_copy_ strided 3 : Elapsed 0.006 ms (0.598 ms / 100) 0.591 -> 0.595 ( +0.68%) [ +0.00% +0.17% +0.17% / +1.02% +0.85% +0.68%] index_add_ strided 7 : Elapsed 0.006 ms (0.591 ms / 100) 0.599 -> 0.601 ( +0.33%) [ +0.00% +0.00% +0.17% / +0.83% +0.33% +0.33%] index_copy_ strided 7 : Elapsed 0.006 ms (0.599 ms / 100) 0.588 -> 0.589 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.68% +0.68%] index_add_ perm : Elapsed 0.006 ms (0.589 ms / 100) 0.598 -> 0.599 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.33% +0.33%] index_copy_ perm : Elapsed 0.006 ms (0.599 ms / 100) 0.588 -> 0.589 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.68% +0.68%] index_add_ perm_sorted : Elapsed 0.006 ms (0.589 ms / 100) 0.598 -> 0.599 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.17% +0.33%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.599 ms / 100) 5.165 -> 4.982 ( -3.54%) [ +0.04% +0.15% +0.00% / -3.47% -3.54% -3.48%] index_select const : Elapsed 0.052 ms (5.167 ms / 100) 5.169 -> 4.990 ( -3.46%) [ +0.00% +0.17% +0.10% / -2.92% -3.46% -3.46%] index_select wrap : Elapsed 0.052 ms (5.169 ms / 100) 5.176 -> 4.989 ( -3.61%) [ +0.19% +0.00% +0.12% / -3.25% -3.61% -3.42%] index_select linear : Elapsed 0.052 ms (5.186 ms / 100) 5.157 -> 4.992 ( -3.20%) [ +0.17% +0.08% +0.00% / -2.93% -3.01% -3.20%] index_select reverse : Elapsed 0.052 ms (5.166 ms / 100) 5.163 -> 4.977 ( -3.60%) [ +0.12% +0.12% +0.00% / -3.43% -3.60% -3.53%] index_select skip64 : Elapsed 0.052 ms (5.169 ms / 100) 5.157 -> 4.984 ( -3.35%) [ +0.04% +0.00% +0.21% / -3.28% -3.35% -3.24%] index_select skip256 : Elapsed 0.052 ms (5.159 ms / 100) 5.168 -> 4.998 ( -3.29%) [ +0.00% +0.19% +0.25% / -3.29% -3.27% -3.29%] index_select spread : Elapsed 0.052 ms (5.168 ms / 100) 5.165 -> 5.006 ( -3.08%) [ +0.14% +0.00% +0.10% / -3.08% -3.04% -3.08%] index_select strided 3 : Elapsed 0.052 ms (5.172 ms / 100) 5.167 -> 5.005 ( -3.14%) [ +0.19% +0.00% +0.00% / -3.14% -2.77% -2.83%] index_select random : Elapsed 0.052 ms (5.177 ms / 100) 5.168 -> 5.004 ( -3.17%) [ +0.00% +0.12% +0.02% / -3.04% -3.17% -3.17%] index_select random_sorted : Elapsed 0.052 ms (5.168 ms / 100) out_shape = [4, 40, 20, 5] in_shape = [4, 16, 20, 5] idx_dim = 1 B = [4, 40, 20, 5] (stride (5, 20, 800, 1)) A = [4, 16, 20, 5] (stride (20, 400, 1, 80)) dim = 1 4.031 -> 4.036 ( +0.12%) [ +0.07% +0.02% +0.00% / +0.12% +0.60% +0.60%] index_add_ linear : Elapsed 0.040 ms (4.034 ms / 100) 3.905 -> 3.914 ( +0.23%) [ +0.10% +0.15% +0.00% / +0.23% +0.51% +0.49%] index_copy_ linear : Elapsed 0.039 ms (3.909 ms / 100) 4.022 -> 4.033 ( +0.27%) [ +0.00% +0.35% +0.00% / +0.27% +0.70% +0.67%] index_add_ reverse : Elapsed 0.040 ms (4.022 ms / 100) 3.890 -> 3.901 ( +0.28%) [ +0.03% +0.51% +0.00% / +0.28% +0.80% +0.77%] index_copy_ reverse : Elapsed 0.039 ms (3.891 ms / 100) 4.033 -> 4.038 ( +0.12%) [ +0.00% +0.02% +0.05% / +0.12% +0.52% +0.60%] index_add_ spread : Elapsed 0.040 ms (4.033 ms / 100) 3.907 -> 3.917 ( +0.26%) [ +0.00% +0.10% +0.08% / +0.26% +0.41% +0.56%] index_copy_ spread : Elapsed 0.039 ms (3.907 ms / 100) 4.019 -> 4.022 ( +0.07%) [ +0.02% +0.00% +0.10% / +0.07% +0.45% +0.50%] index_add_ strided 3 : Elapsed 0.040 ms (4.020 ms / 100) 3.893 -> 3.900 ( +0.18%) [ +0.13% +0.00% +0.28% / +0.18% +0.64% +0.64%] index_copy_ strided 3 : Elapsed 0.039 ms (3.898 ms / 100) 4.030 -> 4.030 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.42% +0.47%] index_add_ strided 7 : Elapsed 0.040 ms (4.035 ms / 100) 3.900 -> 3.902 ( +0.05%) [ +0.15% +0.05% +0.00% / +0.05% +0.56% +0.46%] index_copy_ strided 7 : Elapsed 0.039 ms (3.906 ms / 100) 4.034 -> 4.039 ( +0.12%) [ +0.07% +0.00% +0.02% / +0.12% +0.52% +0.55%] index_add_ perm : Elapsed 0.040 ms (4.037 ms / 100) 3.909 -> 3.908 ( -0.03%) [ +0.00% +0.03% +0.13% / -0.03% +0.43% +0.38%] index_copy_ perm : Elapsed 0.039 ms (3.909 ms / 100) 4.034 -> 4.036 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.45% +0.47%] index_add_ perm_sorted : Elapsed 0.040 ms (4.037 ms / 100) 3.917 -> 3.911 ( -0.15%) [ +0.05% +0.05% +0.00% / -0.15% +0.10% +0.08%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.919 ms / 100) 5.548 -> 5.553 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.09% +0.18%] index_select const : Elapsed 0.056 ms (5.553 ms / 100) 5.554 -> 5.551 ( -0.05%) [ +0.14% +0.02% +0.00% / -0.05% +0.22% +0.31%] index_select wrap : Elapsed 0.056 ms (5.562 ms / 100) 5.559 -> 5.559 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.16% +0.00% +0.13%] index_select linear : Elapsed 0.056 ms (5.566 ms / 100) 5.561 -> 5.562 ( +0.02%) [ +0.00% +0.09% +0.07% / +0.04% +0.09% +0.02%] index_select reverse : Elapsed 0.056 ms (5.561 ms / 100) 5.559 -> 5.557 ( -0.04%) [ +0.11% +0.07% +0.00% / +0.04% -0.04% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.565 ms / 100) 5.558 -> 5.556 ( -0.04%) [ +0.05% +0.00% +0.04% / +0.14% -0.04% +0.07%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.558 -> 5.554 ( -0.07%) [ +0.07% +0.18% +0.00% / -0.07% +0.18% +0.13%] index_select spread : Elapsed 0.056 ms (5.562 ms / 100) 5.555 -> 5.551 ( -0.07%) [ +0.14% +0.00% +0.02% / +0.13% +0.13% -0.07%] index_select strided 3 : Elapsed 0.056 ms (5.563 ms / 100) 5.554 -> 5.561 ( +0.13%) [ +0.09% +0.09% +0.00% / +0.13% +0.18% +0.16%] index_select strided 5 : Elapsed 0.056 ms (5.559 ms / 100) 5.556 -> 5.560 ( +0.07%) [ +0.00% +0.05% +0.05% / +0.14% +0.07% +0.22%] index_select strided 7 : Elapsed 0.056 ms (5.556 ms / 100) 5.554 -> 5.560 ( +0.11%) [ +0.04% +0.13% +0.00% / +0.11% +0.14% +0.13%] index_select strided 8 : Elapsed 0.056 ms (5.556 ms / 100) 5.560 -> 5.562 ( +0.04%) [ +0.00% +0.02% +0.02% / +0.04% +0.11% +0.13%] index_select random : Elapsed 0.056 ms (5.560 ms / 100) 5.554 -> 5.562 ( +0.14%) [ +0.00% +0.07% +0.16% / +0.14% +0.25% +0.16%] index_select random_sorted : Elapsed 0.056 ms (5.554 ms / 100) B = [4, 40, 20, 5] (stride (1, 80, 4, 3200)) A = [4, 16, 20, 5] (stride (16, 1, 320, 64)) dim = 1 4.101 -> 4.105 ( +0.10%) [ +0.00% +0.02% +0.00% / +0.10% +0.78% +0.73%] index_add_ linear : Elapsed 0.041 ms (4.101 ms / 100) 3.939 -> 3.945 ( +0.15%) [ +0.00% +0.10% +0.03% / +0.15% +0.84% +0.74%] index_copy_ linear : Elapsed 0.039 ms (3.939 ms / 100) 4.064 -> 4.072 ( +0.20%) [ +0.17% +0.02% +0.00% / +0.20% +0.57% +0.84%] index_add_ reverse : Elapsed 0.041 ms (4.071 ms / 100) 3.900 -> 3.903 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.56% +0.79%] index_copy_ reverse : Elapsed 0.039 ms (3.903 ms / 100) 4.092 -> 4.102 ( +0.24%) [ +0.02% +0.12% +0.00% / +0.24% +0.83% +0.83%] index_add_ spread : Elapsed 0.041 ms (4.093 ms / 100) 3.927 -> 3.936 ( +0.23%) [ +0.00% +0.13% +0.00% / +0.23% +0.84% +0.84%] index_copy_ spread : Elapsed 0.039 ms (3.927 ms / 100) 4.075 -> 4.086 ( +0.27%) [ +0.49% +0.00% +0.12% / +0.27% +1.13% +0.69%] index_add_ strided 3 : Elapsed 0.041 ms (4.095 ms / 100) 3.904 -> 3.913 ( +0.23%) [ +0.44% +0.00% +0.28% / +0.23% +1.20% +0.74%] index_copy_ strided 3 : Elapsed 0.039 ms (3.921 ms / 100) 4.064 -> 4.061 ( -0.07%) [ +0.00% +0.22% +0.17% / -0.07% +0.52% +0.74%] index_add_ strided 7 : Elapsed 0.041 ms (4.064 ms / 100) 3.901 -> 3.894 ( -0.18%) [ +0.00% +0.10% +0.05% / -0.18% +0.51% +0.69%] index_copy_ strided 7 : Elapsed 0.039 ms (3.901 ms / 100) 4.104 -> 4.105 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.68% +0.63%] index_add_ perm : Elapsed 0.041 ms (4.104 ms / 100) 3.940 -> 3.944 ( +0.10%) [ +0.03% +0.05% +0.00% / +0.10% +0.71% +0.71%] index_copy_ perm : Elapsed 0.039 ms (3.941 ms / 100) 4.082 -> 4.092 ( +0.24%) [ +0.00% +0.00% +0.27% / +0.24% +0.56% +0.54%] index_add_ perm_sorted : Elapsed 0.041 ms (4.082 ms / 100) 3.915 -> 3.922 ( +0.18%) [ +0.05% +0.00% +0.18% / +0.18% +0.69% +0.64%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.917 ms / 100) 5.490 -> 5.491 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.05% +0.04% +0.02%] index_select const : Elapsed 0.055 ms (5.490 ms / 100) 5.494 -> 5.487 ( -0.13%) [ +0.00% +0.18% +0.09% / +0.05% -0.13% +0.04%] index_select wrap : Elapsed 0.055 ms (5.494 ms / 100) 5.496 -> 5.497 ( +0.02%) [ +0.00% +0.04% +0.07% / +0.11% +0.09% +0.02%] index_select linear : Elapsed 0.055 ms (5.496 ms / 100) 5.494 -> 5.492 ( -0.04%) [ +0.07% +0.16% +0.00% / +0.02% +0.00% -0.04%] index_select reverse : Elapsed 0.055 ms (5.498 ms / 100) 5.485 -> 5.488 ( +0.05%) [ +0.13% +0.13% +0.00% / +0.09% +0.05% +0.29%] index_select skip64 : Elapsed 0.055 ms (5.492 ms / 100) 5.486 -> 5.486 ( +0.00%) [ +0.18% +0.18% +0.00% / +0.05% +0.00% +0.24%] index_select skip256 : Elapsed 0.055 ms (5.496 ms / 100) 5.487 -> 5.491 ( +0.07%) [ +0.00% +0.13% +0.20% / +0.18% +0.07% +0.11%] index_select spread : Elapsed 0.055 ms (5.487 ms / 100) 5.496 -> 5.492 ( -0.07%) [ +0.02% +0.00% +0.07% / +0.02% +0.00% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.497 ms / 100) 5.496 -> 5.491 ( -0.09%) [ +0.25% +0.07% +0.00% / +0.05% -0.09% -0.04%] index_select strided 5 : Elapsed 0.055 ms (5.510 ms / 100) 5.493 -> 5.497 ( +0.07%) [ +0.04% +0.05% +0.00% / +0.07% +0.07% +0.15%] index_select strided 7 : Elapsed 0.055 ms (5.495 ms / 100) 5.491 -> 5.496 ( +0.09%) [ +0.00% +0.13% +0.04% / +0.20% +0.15% +0.09%] index_select strided 8 : Elapsed 0.055 ms (5.491 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.07% +0.05% +0.00% / +0.18% +0.05% +0.00%] index_select random : Elapsed 0.055 ms (5.495 ms / 100) 5.496 -> 5.491 ( -0.09%) [ +0.00% +0.09% +0.07% / +0.02% -0.09% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.496 ms / 100) out_shape = [4, 16, 40, 5] in_shape = [4, 16, 20, 5] idx_dim = 2 B = [4, 16, 40, 5] (stride (3200, 200, 5, 1)) A = [4, 16, 20, 5] (stride (80, 5, 320, 1)) dim = 2 1.534 -> 1.505 ( -1.89%) [ +0.07% +0.00% +0.07% / -1.89% -1.30% -1.56%] index_add_ linear : Elapsed 0.015 ms (1.535 ms / 100) 1.496 -> 1.466 ( -2.01%) [ +0.07% +0.13% +0.00% / -2.01% -1.34% -1.40%] index_copy_ linear : Elapsed 0.015 ms (1.497 ms / 100) 1.530 -> 1.504 ( -1.70%) [ +0.33% +0.00% +0.20% / -1.70% -1.05% -1.24%] index_add_ reverse : Elapsed 0.015 ms (1.535 ms / 100) 1.499 -> 1.467 ( -2.13%) [ +0.00% +0.27% +0.00% / -2.13% -1.67% -1.80%] index_copy_ reverse : Elapsed 0.015 ms (1.499 ms / 100) 1.549 -> 1.518 ( -2.00%) [ +0.00% +0.32% +0.32% / -2.00% -1.48% -1.42%] index_add_ spread : Elapsed 0.015 ms (1.549 ms / 100) 1.522 -> 1.488 ( -2.23%) [ +0.00% +0.00% +0.00% / -2.23% -1.97% -2.04%] index_copy_ spread : Elapsed 0.015 ms (1.522 ms / 100) 1.552 -> 1.522 ( -1.93%) [ +0.19% +0.00% +0.00% / -1.93% -1.87% -1.74%] index_add_ strided 3 : Elapsed 0.016 ms (1.555 ms / 100) 1.519 -> 1.488 ( -2.04%) [ +0.00% +0.20% +0.07% / -2.04% -1.84% -1.84%] index_copy_ strided 3 : Elapsed 0.015 ms (1.519 ms / 100) 1.553 -> 1.524 ( -1.87%) [ +0.00% +0.26% +0.19% / -1.87% -1.35% -1.55%] index_add_ strided 7 : Elapsed 0.016 ms (1.553 ms / 100) 1.519 -> 1.494 ( -1.65%) [ +0.00% +0.46% +0.13% / -1.58% -1.65% -1.58%] index_copy_ strided 7 : Elapsed 0.015 ms (1.519 ms / 100) 1.545 -> 1.520 ( -1.62%) [ +0.19% +0.06% +0.00% / -1.62% -1.62% -1.29%] index_add_ perm : Elapsed 0.015 ms (1.548 ms / 100) 1.513 -> 1.477 ( -2.38%) [ +0.00% +0.33% +0.13% / -2.38% -1.72% -1.52%] index_copy_ perm : Elapsed 0.015 ms (1.513 ms / 100) 1.539 -> 1.510 ( -1.88%) [ +0.32% +0.06% +0.00% / -1.88% -1.30% -1.43%] index_add_ perm_sorted : Elapsed 0.015 ms (1.544 ms / 100) 1.506 -> 1.474 ( -2.12%) [ +0.00% +0.27% +0.20% / -2.12% -1.59% -1.86%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.506 ms / 100) 2.870 -> 2.877 ( +0.24%) [ +0.00% +0.24% +0.07% / +0.24% +0.42% +0.38%] index_select const : Elapsed 0.029 ms (2.870 ms / 100) 2.892 -> 2.882 ( -0.35%) [ +0.14% +0.00% +0.14% / +0.31% -0.31% -0.35%] index_select wrap : Elapsed 0.029 ms (2.896 ms / 100) 2.888 -> 2.884 ( -0.14%) [ +0.10% +0.00% +0.28% / +0.28% -0.14% +0.00%] index_select linear : Elapsed 0.029 ms (2.891 ms / 100) 2.888 -> 2.887 ( -0.03%) [ +0.21% +0.17% +0.00% / +0.10% +0.10% -0.03%] index_select reverse : Elapsed 0.029 ms (2.894 ms / 100) 2.873 -> 2.877 ( +0.14%) [ +0.00% +0.14% +0.03% / +0.14% +0.17% +0.31%] index_select skip64 : Elapsed 0.029 ms (2.873 ms / 100) 2.866 -> 2.869 ( +0.10%) [ +0.28% +0.00% +0.24% / +0.10% +0.59% +0.59%] index_select skip256 : Elapsed 0.029 ms (2.874 ms / 100) 2.893 -> 2.888 ( -0.17%) [ +0.00% +0.17% +0.17% / +0.17% -0.14% -0.17%] index_select spread : Elapsed 0.029 ms (2.893 ms / 100) 2.892 -> 2.887 ( -0.17%) [ +0.00% +0.24% +0.28% / -0.07% -0.17% -0.14%] index_select strided 3 : Elapsed 0.029 ms (2.892 ms / 100) 2.872 -> 2.873 ( +0.03%) [ +0.00% +0.17% +0.28% / +0.03% +0.31% +0.63%] index_select strided 5 : Elapsed 0.029 ms (2.872 ms / 100) 2.890 -> 2.889 ( -0.03%) [ +0.28% +0.00% +0.28% / +0.35% -0.03% +0.21%] index_select strided 7 : Elapsed 0.029 ms (2.898 ms / 100) 2.870 -> 2.870 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.28% +0.31%] index_select strided 8 : Elapsed 0.029 ms (2.870 ms / 100) 2.868 -> 2.869 ( +0.03%) [ +0.00% +0.24% +0.21% / +0.03% +0.66% +0.31%] index_select strided 16 : Elapsed 0.029 ms (2.868 ms / 100) 2.890 -> 2.889 ( -0.03%) [ +0.17% +0.00% +0.28% / +0.14% -0.03% +0.00%] index_select random : Elapsed 0.029 ms (2.895 ms / 100) 2.897 -> 2.888 ( -0.31%) [ +0.03% +0.00% +0.00% / -0.31% -0.10% -0.28%] index_select random_sorted : Elapsed 0.029 ms (2.898 ms / 100) B = [4, 16, 40, 5] (stride (3200, 1, 80, 16)) A = [4, 16, 20, 5] (stride (80, 5, 320, 1)) dim = 2 2.417 -> 2.424 ( +0.29%) [ +0.00% +0.04% +0.00% / +0.29% +0.50% +0.58%] index_add_ linear : Elapsed 0.024 ms (2.417 ms / 100) 2.401 -> 2.416 ( +0.62%) [ +0.25% +0.17% +0.00% / +0.62% +0.67% +0.79%] index_copy_ linear : Elapsed 0.024 ms (2.407 ms / 100) 2.407 -> 2.420 ( +0.54%) [ +0.17% +0.12% +0.00% / +0.54% +0.87% +0.91%] index_add_ reverse : Elapsed 0.024 ms (2.411 ms / 100) 2.396 -> 2.410 ( +0.58%) [ +0.00% +0.17% +0.13% / +0.58% +0.96% +1.04%] index_copy_ reverse : Elapsed 0.024 ms (2.396 ms / 100) 2.404 -> 2.419 ( +0.62%) [ +0.29% +0.00% +0.00% / +0.62% +1.21% +1.04%] index_add_ spread : Elapsed 0.024 ms (2.411 ms / 100) 2.393 -> 2.406 ( +0.54%) [ +0.00% +0.04% +0.00% / +0.54% +1.25% +1.21%] index_copy_ spread : Elapsed 0.024 ms (2.393 ms / 100) 2.410 -> 2.424 ( +0.58%) [ +0.08% +0.00% +0.17% / +0.66% +0.58% +0.71%] index_add_ strided 3 : Elapsed 0.024 ms (2.412 ms / 100) 2.403 -> 2.414 ( +0.46%) [ +0.12% +0.00% +0.00% / +0.46% +0.54% +0.54%] index_copy_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.412 -> 2.421 ( +0.37%) [ +0.00% +0.00% +0.04% / +0.37% +0.50% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.412 ms / 100) 2.402 -> 2.415 ( +0.54%) [ +0.00% +0.12% +0.04% / +0.54% +0.54% +0.71%] index_copy_ strided 7 : Elapsed 0.024 ms (2.402 ms / 100) 2.415 -> 2.418 ( +0.12%) [ +0.12% +0.00% +0.08% / +0.75% +0.12% +0.29%] index_add_ perm : Elapsed 0.024 ms (2.418 ms / 100) 2.404 -> 2.409 ( +0.21%) [ +0.17% +0.08% +0.00% / +0.67% +0.25% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.408 ms / 100) 2.417 -> 2.419 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.37% +0.29% +0.08%] index_add_ perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) 2.403 -> 2.409 ( +0.25%) [ +0.00% +0.21% +0.08% / +0.79% +0.62% +0.25%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) 4.431 -> 4.430 ( -0.02%) [ +0.02% +0.11% +0.00% / +0.00% +0.00% -0.02%] index_select const : Elapsed 0.044 ms (4.432 ms / 100) 4.436 -> 4.438 ( +0.05%) [ +0.18% +0.00% +0.09% / +0.14% +0.05% +0.11%] index_select wrap : Elapsed 0.044 ms (4.444 ms / 100) 4.436 -> 4.439 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.14% +0.11%] index_select linear : Elapsed 0.044 ms (4.436 ms / 100) 4.441 -> 4.441 ( +0.00%) [ +0.00% +0.07% +0.02% / +0.00% +0.05% +0.07%] index_select reverse : Elapsed 0.044 ms (4.441 ms / 100) 4.432 -> 4.427 ( -0.11%) [ +0.00% +0.07% +0.00% / +0.05% -0.11% -0.09%] index_select skip64 : Elapsed 0.044 ms (4.432 ms / 100) 4.432 -> 4.429 ( -0.07%) [ +0.09% +0.14% +0.00% / -0.07% +0.00% -0.02%] index_select skip256 : Elapsed 0.044 ms (4.436 ms / 100) 4.428 -> 4.439 ( +0.25%) [ +0.20% +0.29% +0.00% / +0.25% +0.32% +0.32%] index_select spread : Elapsed 0.044 ms (4.437 ms / 100) 4.435 -> 4.439 ( +0.09%) [ +0.00% +0.07% +0.02% / +0.09% +0.32% +0.25%] index_select strided 3 : Elapsed 0.044 ms (4.435 ms / 100) 4.424 -> 4.432 ( +0.18%) [ +0.14% +0.05% +0.00% / +0.23% +0.18% +0.29%] index_select strided 5 : Elapsed 0.044 ms (4.430 ms / 100) 4.435 -> 4.433 ( -0.05%) [ +0.00% +0.11% +0.07% / -0.05% +0.18% +0.18%] index_select strided 7 : Elapsed 0.044 ms (4.435 ms / 100) 4.433 -> 4.431 ( -0.05%) [ +0.00% +0.16% +0.25% / +0.02% -0.05% +0.07%] index_select strided 8 : Elapsed 0.044 ms (4.433 ms / 100) 4.433 -> 4.426 ( -0.16%) [ +0.00% +0.14% +0.09% / -0.05% -0.16% -0.02%] index_select strided 16 : Elapsed 0.044 ms (4.433 ms / 100) 4.436 -> 4.437 ( +0.02%) [ +0.20% +0.14% +0.00% / +0.02% +0.16% +0.02%] index_select random : Elapsed 0.044 ms (4.445 ms / 100) 4.439 -> 4.436 ( -0.07%) [ +0.14% +0.05% +0.00% / +0.00% -0.07% +0.09%] index_select random_sorted : Elapsed 0.044 ms (4.445 ms / 100) B = [4, 16, 40, 5] (stride (1, 20, 320, 4)) A = [4, 16, 20, 5] (stride (1600, 100, 5, 1)) dim = 2 2.401 -> 2.413 ( +0.50%) [ +0.00% +0.17% +0.04% / +0.50% +0.71% +0.62%] index_add_ linear : Elapsed 0.024 ms (2.401 ms / 100) 2.400 -> 2.411 ( +0.46%) [ +0.08% +0.00% +0.13% / +0.46% +0.63% +0.71%] index_copy_ linear : Elapsed 0.024 ms (2.402 ms / 100) 2.406 -> 2.416 ( +0.42%) [ +0.00% +0.08% +0.12% / +0.42% +0.42% +0.46%] index_add_ reverse : Elapsed 0.024 ms (2.406 ms / 100) 2.403 -> 2.414 ( +0.46%) [ +0.25% +0.00% +0.08% / +0.46% +0.58% +0.54%] index_copy_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.404 -> 2.414 ( +0.42%) [ +0.12% +0.04% +0.00% / +0.42% +0.46% +0.67%] index_add_ spread : Elapsed 0.024 ms (2.407 ms / 100) 2.402 -> 2.416 ( +0.58%) [ +0.00% +0.29% +0.08% / +0.62% +0.58% +0.71%] index_copy_ spread : Elapsed 0.024 ms (2.402 ms / 100) 2.403 -> 2.412 ( +0.37%) [ +0.25% +0.00% +0.00% / +0.46% +0.37% +0.37%] index_add_ strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.404 -> 2.414 ( +0.42%) [ +0.04% +0.04% +0.00% / +0.46% +0.58% +0.42%] index_copy_ strided 3 : Elapsed 0.024 ms (2.405 ms / 100) 2.406 -> 2.414 ( +0.33%) [ +0.08% +0.00% +0.04% / +0.33% +0.42% +0.37%] index_add_ strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.404 -> 2.413 ( +0.37%) [ +0.08% +0.04% +0.00% / +0.37% +0.67% +0.58%] index_copy_ strided 7 : Elapsed 0.024 ms (2.406 ms / 100) 2.404 -> 2.412 ( +0.33%) [ +0.17% +0.00% +0.04% / +0.33% +0.50% +0.54%] index_add_ perm : Elapsed 0.024 ms (2.408 ms / 100) 2.402 -> 2.415 ( +0.54%) [ +0.00% +0.08% +0.00% / +0.54% +0.79% +0.87%] index_copy_ perm : Elapsed 0.024 ms (2.402 ms / 100) 2.402 -> 2.417 ( +0.62%) [ +0.08% +0.00% +0.21% / +0.62% +0.71% +0.62%] index_add_ perm_sorted : Elapsed 0.024 ms (2.404 ms / 100) 2.401 -> 2.416 ( +0.62%) [ +0.17% +0.00% +0.08% / +0.62% +0.92% +0.75%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.405 ms / 100) 4.427 -> 4.426 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.18% +0.05%] index_select const : Elapsed 0.044 ms (4.427 ms / 100) 4.431 -> 4.433 ( +0.05%) [ +0.00% +0.14% +0.16% / +0.05% +0.23% +0.27%] index_select wrap : Elapsed 0.044 ms (4.431 ms / 100) 4.438 -> 4.439 ( +0.02%) [ +0.11% +0.07% +0.00% / +0.02% +0.14% +0.05%] index_select linear : Elapsed 0.044 ms (4.443 ms / 100) 4.436 -> 4.439 ( +0.07%) [ +0.18% +0.00% +0.05% / +0.09% +0.07% +0.11%] index_select reverse : Elapsed 0.044 ms (4.444 ms / 100) 4.428 -> 4.425 ( -0.07%) [ +0.00% +0.11% +0.00% / +0.07% -0.07% +0.09%] index_select skip64 : Elapsed 0.044 ms (4.428 ms / 100) 4.426 -> 4.430 ( +0.09%) [ +0.00% +0.18% +0.14% / +0.09% +0.25% +0.14%] index_select skip256 : Elapsed 0.044 ms (4.426 ms / 100) 4.432 -> 4.440 ( +0.18%) [ +0.14% +0.00% +0.16% / +0.29% +0.32% +0.18%] index_select spread : Elapsed 0.044 ms (4.438 ms / 100) 4.436 -> 4.439 ( +0.07%) [ +0.02% +0.02% +0.00% / +0.07% +0.27% +0.11%] index_select strided 3 : Elapsed 0.044 ms (4.437 ms / 100) 4.426 -> 4.431 ( +0.11%) [ +0.34% +0.05% +0.00% / +0.11% +0.27% +0.14%] index_select strided 5 : Elapsed 0.044 ms (4.441 ms / 100) 4.437 -> 4.440 ( +0.07%) [ +0.14% +0.00% +0.02% / +0.07% +0.07% +0.32%] index_select strided 7 : Elapsed 0.044 ms (4.443 ms / 100) 4.438 -> 4.431 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.05% -0.16% +0.02%] index_select strided 8 : Elapsed 0.044 ms (4.438 ms / 100) 4.422 -> 4.435 ( +0.29%) [ +0.00% +0.25% +0.09% / +0.34% +0.29% +0.52%] index_select strided 16 : Elapsed 0.044 ms (4.422 ms / 100) 4.439 -> 4.439 ( +0.00%) [ +0.16% +0.00% +0.05% / +0.00% +0.05% +0.16%] index_select random : Elapsed 0.044 ms (4.446 ms / 100) 4.435 -> 4.436 ( +0.02%) [ +0.23% +0.00% +0.25% / +0.02% +0.16% +0.29%] index_select random_sorted : Elapsed 0.044 ms (4.445 ms / 100) B = [4, 16, 40, 5] (stride (640, 40, 1, 2560)) A = [4, 16, 20, 5] (stride (1, 20, 320, 4)) dim = 2 2.390 -> 2.402 ( +0.50%) [ +0.00% +0.13% +0.17% / +0.50% +0.79% +0.63%] index_add_ linear : Elapsed 0.024 ms (2.390 ms / 100) 2.396 -> 2.412 ( +0.67%) [ +0.00% +0.08% +0.13% / +0.67% +0.79% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.396 ms / 100) 2.384 -> 2.395 ( +0.46%) [ +0.00% +0.04% +0.00% / +0.46% +1.05% +1.01%] index_add_ reverse : Elapsed 0.024 ms (2.384 ms / 100) 2.389 -> 2.401 ( +0.50%) [ +0.00% +0.21% +0.04% / +0.50% +1.13% +1.21%] index_copy_ reverse : Elapsed 0.024 ms (2.389 ms / 100) 2.397 -> 2.408 ( +0.46%) [ +0.13% +0.04% +0.00% / +0.46% +0.96% +1.04%] index_add_ spread : Elapsed 0.024 ms (2.400 ms / 100) 2.412 -> 2.422 ( +0.41%) [ +0.00% +0.08% +0.04% / +0.41% +0.91% +0.79%] index_copy_ spread : Elapsed 0.024 ms (2.412 ms / 100) 2.403 -> 2.413 ( +0.42%) [ +0.08% +0.17% +0.00% / +0.42% +0.67% +0.67%] index_add_ strided 3 : Elapsed 0.024 ms (2.405 ms / 100) 2.417 -> 2.424 ( +0.29%) [ +0.08% +0.00% +0.08% / +0.29% +0.70% +0.62%] index_copy_ strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.404 -> 2.414 ( +0.42%) [ +0.00% +0.17% +0.21% / +0.58% +0.58% +0.42%] index_add_ strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.416 -> 2.430 ( +0.58%) [ +0.00% +0.08% +0.08% / +0.70% +0.66% +0.58%] index_copy_ strided 7 : Elapsed 0.024 ms (2.416 ms / 100) 2.405 -> 2.411 ( +0.25%) [ +0.08% +0.00% +0.12% / +0.58% +0.25% +0.46%] index_add_ perm : Elapsed 0.024 ms (2.407 ms / 100) 2.417 -> 2.426 ( +0.37%) [ +0.00% +0.08% +0.08% / +0.62% +0.46% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.417 ms / 100) 2.403 -> 2.408 ( +0.21%) [ +0.37% +0.25% +0.00% / +0.67% +0.42% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.412 ms / 100) 2.418 -> 2.427 ( +0.37%) [ +0.00% +0.25% +0.12% / +0.62% +0.45% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) 4.416 -> 4.421 ( +0.11%) [ +0.09% +0.25% +0.00% / +0.11% +0.14% +0.20%] index_select const : Elapsed 0.044 ms (4.420 ms / 100) 4.425 -> 4.425 ( +0.00%) [ +0.14% +0.00% +0.09% / +0.05% +0.00% +0.05%] index_select wrap : Elapsed 0.044 ms (4.431 ms / 100) 4.425 -> 4.428 ( +0.07%) [ +0.00% +0.14% +0.09% / +0.07% +0.14% +0.07%] index_select linear : Elapsed 0.044 ms (4.425 ms / 100) 4.429 -> 4.429 ( +0.00%) [ +0.14% +0.00% +0.02% / +0.00% +0.27% +0.02%] index_select reverse : Elapsed 0.044 ms (4.435 ms / 100) 4.422 -> 4.424 ( +0.05%) [ +0.02% +0.18% +0.00% / +0.05% +0.45% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.423 ms / 100) 4.418 -> 4.421 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.16% +0.20% +0.07%] index_select skip256 : Elapsed 0.044 ms (4.418 ms / 100) 4.426 -> 4.422 ( -0.09%) [ +0.09% +0.00% +0.14% / +0.05% +0.16% -0.09%] index_select spread : Elapsed 0.044 ms (4.430 ms / 100) 4.422 -> 4.425 ( +0.07%) [ +0.16% +0.02% +0.00% / +0.20% +0.45% +0.07%] index_select strided 3 : Elapsed 0.044 ms (4.429 ms / 100) 4.413 -> 4.418 ( +0.11%) [ +0.14% +0.18% +0.00% / +0.11% +0.39% +0.23%] index_select strided 5 : Elapsed 0.044 ms (4.419 ms / 100) 4.423 -> 4.428 ( +0.11%) [ +0.07% +0.16% +0.00% / +0.14% +0.23% +0.11%] index_select strided 7 : Elapsed 0.044 ms (4.426 ms / 100) 4.421 -> 4.418 ( -0.07%) [ +0.02% +0.00% +0.05% / +0.05% +0.27% -0.07%] index_select strided 8 : Elapsed 0.044 ms (4.422 ms / 100) 4.420 -> 4.414 ( -0.14%) [ +0.11% +0.00% +0.14% / +0.09% -0.07% -0.14%] index_select strided 16 : Elapsed 0.044 ms (4.425 ms / 100) 4.431 -> 4.421 ( -0.23%) [ +0.11% +0.05% +0.00% / -0.05% -0.02% -0.23%] index_select random : Elapsed 0.044 ms (4.436 ms / 100) 4.429 -> 4.422 ( -0.16%) [ +0.16% +0.00% +0.00% / +0.14% -0.16% -0.11%] index_select random_sorted : Elapsed 0.044 ms (4.436 ms / 100) B = [4, 16, 40, 5] (stride (1, 4, 64, 2560)) A = [4, 16, 20, 5] (stride (1600, 1, 16, 320)) dim = 2 2.453 -> 2.459 ( +0.24%) [ +0.00% +0.04% +0.04% / +0.24% +0.65% +0.65%] index_add_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.446 -> 2.460 ( +0.57%) [ +0.16% +0.16% +0.00% / +0.57% +1.02% +0.90%] index_copy_ linear : Elapsed 0.025 ms (2.450 ms / 100) 2.450 -> 2.461 ( +0.45%) [ +0.16% +0.20% +0.00% / +0.45% +0.78% +0.82%] index_add_ reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.451 -> 2.462 ( +0.45%) [ +0.12% +0.29% +0.00% / +0.45% +0.94% +0.57%] index_copy_ reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.451 -> 2.465 ( +0.57%) [ +0.00% +0.33% +0.16% / +0.61% +0.57% +0.57%] index_add_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.453 -> 2.466 ( +0.53%) [ +0.00% +0.12% +0.04% / +0.53% +0.73% +0.57%] index_copy_ spread : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.466 ( +0.53%) [ +0.12% +0.12% +0.00% / +0.57% +0.53% +0.61%] index_add_ strided 3 : Elapsed 0.025 ms (2.456 ms / 100) 2.450 -> 2.463 ( +0.53%) [ +0.12% +0.16% +0.00% / +0.53% +0.82% +0.73%] index_copy_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.457 -> 2.465 ( +0.33%) [ +0.00% +0.04% +0.12% / +0.33% +0.33% +0.41%] index_add_ strided 7 : Elapsed 0.025 ms (2.457 ms / 100) 2.452 -> 2.464 ( +0.49%) [ +0.04% +0.12% +0.00% / +0.49% +0.57% +0.61%] index_copy_ strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.464 ( +0.45%) [ +0.08% +0.24% +0.00% / +0.45% +0.73% +0.65%] index_add_ perm : Elapsed 0.025 ms (2.455 ms / 100) 2.451 -> 2.467 ( +0.65%) [ +0.00% +0.08% +0.24% / +0.69% +0.73% +0.65%] index_copy_ perm : Elapsed 0.025 ms (2.451 ms / 100) 2.453 -> 2.466 ( +0.53%) [ +0.00% +0.08% +0.08% / +0.53% +0.53% +0.57%] index_add_ perm_sorted : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.465 ( +0.49%) [ +0.00% +0.00% +0.16% / +0.61% +0.65% +0.49%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.453 ms / 100) 4.492 -> 4.491 ( -0.02%) [ +0.20% +0.13% +0.00% / +0.16% +0.20% -0.02%] index_select const : Elapsed 0.045 ms (4.501 ms / 100) 4.505 -> 4.506 ( +0.02%) [ +0.11% +0.00% +0.00% / +0.02% +0.20% +0.13%] index_select wrap : Elapsed 0.045 ms (4.510 ms / 100) 4.508 -> 4.505 ( -0.07%) [ +0.09% +0.04% +0.00% / -0.07% +0.13% +0.00%] index_select linear : Elapsed 0.045 ms (4.512 ms / 100) 4.506 -> 4.508 ( +0.04%) [ +0.18% +0.02% +0.00% / +0.04% +0.27% +0.29%] index_select reverse : Elapsed 0.045 ms (4.514 ms / 100) 4.490 -> 4.502 ( +0.27%) [ +0.27% +0.00% +0.18% / +0.27% +0.36% +0.29%] index_select skip64 : Elapsed 0.045 ms (4.502 ms / 100) 4.489 -> 4.498 ( +0.20%) [ +0.18% +0.00% +0.11% / +0.20% +0.20% +0.33%] index_select skip256 : Elapsed 0.045 ms (4.497 ms / 100) 4.503 -> 4.510 ( +0.16%) [ +0.22% +0.04% +0.00% / +0.16% +0.18% +0.16%] index_select spread : Elapsed 0.045 ms (4.513 ms / 100) 4.506 -> 4.511 ( +0.11%) [ +0.02% +0.00% +0.09% / +0.18% +0.11% +0.18%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.497 -> 4.501 ( +0.09%) [ +0.07% +0.07% +0.00% / +0.18% +0.13% +0.09%] index_select strided 5 : Elapsed 0.045 ms (4.500 ms / 100) 4.507 -> 4.497 ( -0.22%) [ +0.00% +0.02% +0.00% / -0.22% +0.04% +0.11%] index_select strided 7 : Elapsed 0.045 ms (4.507 ms / 100) 4.494 -> 4.491 ( -0.07%) [ +0.00% +0.00% +0.13% / -0.07% +0.27% +0.36%] index_select strided 8 : Elapsed 0.045 ms (4.494 ms / 100) 4.499 -> 4.499 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.07% +0.16% +0.00%] index_select strided 16 : Elapsed 0.045 ms (4.499 ms / 100) 4.500 -> 4.501 ( +0.02%) [ +0.09% +0.13% +0.00% / +0.02% +0.29% +0.40%] index_select random : Elapsed 0.045 ms (4.504 ms / 100) 4.503 -> 4.508 ( +0.11%) [ +0.11% +0.00% +0.02% / +0.11% +0.13% +0.18%] index_select random_sorted : Elapsed 0.045 ms (4.508 ms / 100) out_shape = [4, 16, 20, 40] in_shape = [4, 16, 20, 5] idx_dim = 3 B = [4, 16, 20, 40] (stride (12800, 1, 640, 16)) A = [4, 16, 20, 5] (stride (320, 1, 16, 1280)) dim = 3 1.527 -> 1.528 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.65%] index_add_ linear : Elapsed 0.015 ms (1.528 ms / 100) 1.485 -> 1.485 ( +0.00%) [ +0.00% +0.13% +0.07% / +0.00% +0.54% +0.47%] index_copy_ linear : Elapsed 0.015 ms (1.485 ms / 100) 1.526 -> 1.527 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.52% +0.52%] index_add_ reverse : Elapsed 0.015 ms (1.528 ms / 100) 1.485 -> 1.485 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.40%] index_copy_ reverse : Elapsed 0.015 ms (1.485 ms / 100) 1.527 -> 1.527 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.46% +0.52%] index_add_ spread : Elapsed 0.015 ms (1.528 ms / 100) 1.484 -> 1.485 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.47% +0.40%] index_copy_ spread : Elapsed 0.015 ms (1.484 ms / 100) 1.527 -> 1.529 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.59% +0.52%] index_add_ strided 3 : Elapsed 0.015 ms (1.527 ms / 100) 1.484 -> 1.484 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.54% +0.47%] index_copy_ strided 3 : Elapsed 0.015 ms (1.484 ms / 100) 1.527 -> 1.527 ( +0.00%) [ +0.26% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.531 ms / 100) 1.483 -> 1.483 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.67% +0.61%] index_copy_ strided 7 : Elapsed 0.015 ms (1.484 ms / 100) 1.527 -> 1.527 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_add_ perm : Elapsed 0.015 ms (1.528 ms / 100) 1.484 -> 1.484 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.54% +0.47%] index_copy_ perm : Elapsed 0.015 ms (1.485 ms / 100) 1.527 -> 1.526 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.52% +0.59%] index_add_ perm_sorted : Elapsed 0.015 ms (1.528 ms / 100) 1.483 -> 1.485 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.61% +0.54%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.485 ms / 100) 8.518 -> 8.519 ( +0.01%) [ +0.00% +0.04% +0.12% / +0.01% +0.46% +0.06%] index_select const : Elapsed 0.085 ms (8.518 ms / 100) 8.531 -> 8.537 ( +0.07%) [ +0.01% +0.14% +0.00% / +0.12% +0.18% +0.07%] index_select wrap : Elapsed 0.085 ms (8.532 ms / 100) 8.533 -> 8.556 ( +0.27%) [ +0.00% +0.15% +0.05% / +0.27% +0.33% +0.38%] index_select linear : Elapsed 0.085 ms (8.533 ms / 100) 8.535 -> 8.524 ( -0.13%) [ +0.11% +0.07% +0.00% / -0.13% +0.02% +0.14%] index_select reverse : Elapsed 0.085 ms (8.544 ms / 100) 8.529 -> 8.520 ( -0.11%) [ +0.00% +0.07% +0.06% / -0.11% +0.05% +0.16%] index_select skip64 : Elapsed 0.085 ms (8.529 ms / 100) 8.513 -> 8.520 ( +0.08%) [ +0.00% +0.04% +0.21% / +0.34% +0.08% +0.23%] index_select skip256 : Elapsed 0.085 ms (8.513 ms / 100) 8.528 -> 8.522 ( -0.07%) [ +0.00% +0.15% +0.11% / -0.07% +0.13% +0.27%] index_select spread : Elapsed 0.085 ms (8.528 ms / 100) 8.537 -> 8.536 ( -0.01%) [ +0.01% +0.13% +0.00% / -0.01% +0.09% +0.09%] index_select strided 3 : Elapsed 0.085 ms (8.538 ms / 100) 8.537 -> 8.545 ( +0.09%) [ +0.19% +0.00% +0.12% / +0.09% +0.12% +0.32%] index_select random : Elapsed 0.086 ms (8.553 ms / 100) 8.533 -> 8.529 ( -0.05%) [ +0.01% +0.25% +0.00% / -0.05% -0.04% +0.33%] index_select random_sorted : Elapsed 0.085 ms (8.534 ms / 100) B = [4, 16, 20, 40] (stride (640, 1, 2560, 16)) A = [4, 16, 20, 5] (stride (1, 4, 64, 1280)) dim = 3 1.617 -> 1.618 ( +0.06%) [ +0.12% +0.00% +0.06% / +0.06% +0.68% +0.68%] index_add_ linear : Elapsed 0.016 ms (1.619 ms / 100) 1.574 -> 1.575 ( +0.06%) [ +0.00% +0.13% +0.13% / +0.06% +0.57% +0.70%] index_copy_ linear : Elapsed 0.016 ms (1.574 ms / 100) 1.622 -> 1.622 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.55% +0.62%] index_add_ reverse : Elapsed 0.016 ms (1.623 ms / 100) 1.572 -> 1.575 ( +0.19%) [ +0.00% +0.19% +0.32% / +0.19% +0.32% +0.32%] index_copy_ reverse : Elapsed 0.016 ms (1.572 ms / 100) 1.625 -> 1.625 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.49%] index_add_ spread : Elapsed 0.016 ms (1.625 ms / 100) 1.572 -> 1.575 ( +0.19%) [ +0.00% +0.19% +0.00% / +0.32% +0.38% +0.19%] index_copy_ spread : Elapsed 0.016 ms (1.572 ms / 100) 1.618 -> 1.618 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.62% +0.62%] index_add_ strided 3 : Elapsed 0.016 ms (1.619 ms / 100) 1.574 -> 1.574 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.51% +0.51%] index_copy_ strided 3 : Elapsed 0.016 ms (1.574 ms / 100) 1.624 -> 1.625 ( +0.06%) [ +0.18% +0.06% +0.00% / +0.06% +0.68% +0.68%] index_add_ strided 7 : Elapsed 0.016 ms (1.627 ms / 100) 1.575 -> 1.578 ( +0.19%) [ +0.00% +0.06% +0.13% / +0.19% +0.51% +0.51%] index_copy_ strided 7 : Elapsed 0.016 ms (1.575 ms / 100) 1.623 -> 1.624 ( +0.06%) [ +0.18% +0.06% +0.00% / +0.06% +0.92% +0.74%] index_add_ perm : Elapsed 0.016 ms (1.626 ms / 100) 1.572 -> 1.573 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.95% +0.76%] index_copy_ perm : Elapsed 0.016 ms (1.573 ms / 100) 1.618 -> 1.618 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +1.24% +0.68%] index_add_ perm_sorted : Elapsed 0.016 ms (1.618 ms / 100) 1.574 -> 1.575 ( +0.06%) [ +0.00% +0.06% +0.13% / +0.06% +1.91% +0.57%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.574 ms / 100) 8.512 -> 8.518 ( +0.07%) [ +0.00% +0.39% +0.27% / +0.07% +0.61% +0.36%] index_select const : Elapsed 0.085 ms (8.512 ms / 100) 8.538 -> 8.546 ( +0.09%) [ +0.01% +0.26% +0.00% / +0.09% +0.09% +0.09%] index_select wrap : Elapsed 0.085 ms (8.539 ms / 100) 8.533 -> 8.554 ( +0.25%) [ +0.00% +0.09% +0.18% / +0.28% +0.34% +0.25%] index_select linear : Elapsed 0.085 ms (8.533 ms / 100) 8.526 -> 8.531 ( +0.06%) [ +0.00% +0.33% +0.01% / +0.08% +0.06% +0.28%] index_select reverse : Elapsed 0.085 ms (8.526 ms / 100) 8.506 -> 8.521 ( +0.18%) [ +0.13% +0.00% +0.25% / +0.18% +0.42% +0.21%] index_select skip64 : Elapsed 0.085 ms (8.517 ms / 100) 8.514 -> 8.515 ( +0.01%) [ +0.28% +0.00% +0.09% / +0.01% +0.23% +0.26%] index_select skip256 : Elapsed 0.085 ms (8.538 ms / 100) 8.535 -> 8.544 ( +0.11%) [ +0.00% +0.01% +0.00% / +0.20% +0.11% +0.41%] index_select spread : Elapsed 0.085 ms (8.535 ms / 100) 8.551 -> 8.538 ( -0.15%) [ +0.00% +0.08% +0.01% / -0.15% -0.12% +0.08%] index_select strided 3 : Elapsed 0.086 ms (8.551 ms / 100) 8.526 -> 8.540 ( +0.16%) [ +0.00% +0.19% +0.22% / +0.16% +0.39% +0.50%] index_select random : Elapsed 0.085 ms (8.526 ms / 100) 8.529 -> 8.542 ( +0.15%) [ +0.25% +0.00% +0.19% / +0.15% +0.35% +0.18%] index_select random_sorted : Elapsed 0.085 ms (8.550 ms / 100) B = [4, 16, 20, 40] (stride (16, 1, 64, 1280)) A = [4, 16, 20, 5] (stride (20, 400, 1, 80)) dim = 3 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.21% +0.28%] index_add_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.22% +0.29% +0.00% / +0.07% +0.36% +0.36%] index_copy_ linear : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.35% +0.35%] index_add_ reverse : Elapsed 0.014 ms (1.422 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.36% +0.29%] index_copy_ reverse : Elapsed 0.014 ms (1.378 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.35% +0.07% +0.00% / +0.07% +0.42% +0.35%] index_add_ spread : Elapsed 0.014 ms (1.425 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.00% +0.07% +0.15% / +0.00% +0.51% +0.65%] index_copy_ spread : Elapsed 0.014 ms (1.376 ms / 100) 1.420 -> 1.422 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.42% +0.42%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.00% +0.22% +0.15% / +0.07% +0.51% +0.44%] index_copy_ strided 3 : Elapsed 0.014 ms (1.375 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.21% +0.00% +0.07% / +0.14% +0.28% +0.28%] index_add_ strided 7 : Elapsed 0.014 ms (1.424 ms / 100) 1.378 -> 1.381 ( +0.22%) [ +0.22% +0.29% +0.00% / +0.22% +0.44% +0.44%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.49% +0.35%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.379 ( +0.36%) [ +0.07% +0.22% +0.00% / +0.36% +0.73% +0.51%] index_copy_ perm : Elapsed 0.014 ms (1.375 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.28% +0.28%] index_add_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.377 ( +0.22%) [ +0.07% +0.15% +0.00% / +0.22% +0.66% +0.58%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.375 ms / 100) 8.179 -> 8.180 ( +0.01%) [ +0.21% +0.00% +0.01% / +0.01% +0.50% +0.34%] index_select const : Elapsed 0.082 ms (8.196 ms / 100) 8.215 -> 8.207 ( -0.10%) [ +0.00% +0.15% +0.11% / +0.05% +0.00% -0.10%] index_select wrap : Elapsed 0.082 ms (8.215 ms / 100) 8.213 -> 8.217 ( +0.05%) [ +0.18% +0.18% +0.00% / +0.05% +0.10% +0.18%] index_select linear : Elapsed 0.082 ms (8.228 ms / 100) 8.212 -> 8.209 ( -0.04%) [ +0.00% +0.33% +0.13% / +0.10% +0.17% -0.04%] index_select reverse : Elapsed 0.082 ms (8.212 ms / 100) 8.184 -> 8.184 ( +0.00%) [ +0.06% +0.35% +0.00% / +0.10% +0.11% +0.00%] index_select skip64 : Elapsed 0.082 ms (8.189 ms / 100) 8.172 -> 8.185 ( +0.16%) [ +0.00% +0.07% +0.31% / +0.16% +0.20% +0.33%] index_select skip256 : Elapsed 0.082 ms (8.172 ms / 100) 8.204 -> 8.214 ( +0.12%) [ +0.16% +0.18% +0.00% / +0.48% +0.12% +0.37%] index_select spread : Elapsed 0.082 ms (8.217 ms / 100) 8.209 -> 8.213 ( +0.05%) [ +0.27% +0.00% +0.21% / +0.17% +0.05% +0.16%] index_select strided 3 : Elapsed 0.082 ms (8.231 ms / 100) 8.215 -> 8.213 ( -0.02%) [ +0.18% +0.00% +0.17% / +0.17% -0.02% +0.04%] index_select random : Elapsed 0.082 ms (8.230 ms / 100) 8.226 -> 8.221 ( -0.06%) [ +0.10% +0.00% +0.09% / -0.02% +0.00% -0.06%] index_select random_sorted : Elapsed 0.082 ms (8.234 ms / 100) out_shape = [40, 20, 5, 16] in_shape = [4, 20, 5, 16] idx_dim = 0 B = [40, 20, 5, 16] (stride (1600, 80, 16, 1)) A = [4, 20, 5, 16] (stride (20, 1, 1280, 80)) dim = 0 1.140 -> 1.141 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.44% +0.44%] index_add_ linear : Elapsed 0.011 ms (1.140 ms / 100) 1.096 -> 1.099 ( +0.27%) [ +0.00% +0.00% +0.00% / +0.27% +0.55% +0.36%] index_copy_ linear : Elapsed 0.011 ms (1.096 ms / 100) 1.142 -> 1.143 ( +0.09%) [ +0.00% +0.26% +0.00% / +0.09% +0.53% +0.61%] index_add_ reverse : Elapsed 0.011 ms (1.142 ms / 100) 1.096 -> 1.097 ( +0.09%) [ +0.00% +0.27% +0.00% / +0.09% +0.91% +0.55%] index_copy_ reverse : Elapsed 0.011 ms (1.096 ms / 100) 1.140 -> 1.139 ( -0.09%) [ +0.00% +0.09% +0.09% / -0.09% +0.44% +0.61%] index_add_ spread : Elapsed 0.011 ms (1.140 ms / 100) 1.093 -> 1.093 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.64% +0.46%] index_copy_ spread : Elapsed 0.011 ms (1.094 ms / 100) 1.139 -> 1.139 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.53% +0.61%] index_add_ strided 3 : Elapsed 0.011 ms (1.139 ms / 100) 1.094 -> 1.094 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.73% +0.73%] index_copy_ strided 3 : Elapsed 0.011 ms (1.095 ms / 100) 1.139 -> 1.139 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.53% +0.61%] index_add_ strided 7 : Elapsed 0.011 ms (1.140 ms / 100) 1.092 -> 1.092 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.73% +0.92%] index_copy_ strided 7 : Elapsed 0.011 ms (1.092 ms / 100) 1.139 -> 1.139 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.53% +0.53%] index_add_ perm : Elapsed 0.011 ms (1.139 ms / 100) 1.091 -> 1.092 ( +0.09%) [ +0.18% +0.27% +0.00% / +0.09% +0.82% +0.64%] index_copy_ perm : Elapsed 0.011 ms (1.093 ms / 100) 1.141 -> 1.141 ( +0.00%) [ +0.18% +0.18% +0.00% / +0.00% +0.61% +0.61%] index_add_ perm_sorted : Elapsed 0.011 ms (1.143 ms / 100) 1.096 -> 1.096 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.096 ms / 100) 7.928 -> 7.920 ( -0.10%) [ +0.00% +0.03% +0.38% / -0.05% +0.13% -0.10%] index_select const : Elapsed 0.079 ms (7.928 ms / 100) 7.932 -> 7.929 ( -0.04%) [ +0.14% +0.13% +0.00% / -0.01% +0.03% -0.04%] index_select wrap : Elapsed 0.079 ms (7.943 ms / 100) 7.934 -> 7.931 ( -0.04%) [ +0.00% +0.06% +0.04% / +0.08% +0.29% -0.04%] index_select linear : Elapsed 0.079 ms (7.934 ms / 100) 7.932 -> 7.936 ( +0.05%) [ +0.00% +0.10% +0.13% / +0.05% +0.16% +0.09%] index_select reverse : Elapsed 0.079 ms (7.932 ms / 100) 7.922 -> 7.919 ( -0.04%) [ +0.15% +0.05% +0.00% / -0.04% -0.03% +0.09%] index_select skip64 : Elapsed 0.079 ms (7.934 ms / 100) 7.918 -> 7.926 ( +0.10%) [ +0.05% +0.00% +0.16% / +0.18% +0.10% +0.18%] index_select skip256 : Elapsed 0.079 ms (7.922 ms / 100) 7.946 -> 7.933 ( -0.16%) [ +0.01% +0.00% +0.33% / -0.16% -0.08% -0.11%] index_select spread : Elapsed 0.079 ms (7.947 ms / 100) 7.935 -> 7.927 ( -0.10%) [ +0.00% +0.06% +0.21% / +0.03% +0.04% -0.10%] index_select strided 3 : Elapsed 0.079 ms (7.935 ms / 100) 7.922 -> 7.929 ( +0.09%) [ +0.20% +0.08% +0.00% / +0.09% +0.18% +0.24%] index_select random : Elapsed 0.079 ms (7.938 ms / 100) 7.929 -> 7.926 ( -0.04%) [ +0.00% +0.13% +0.15% / +0.09% -0.04% +0.35%] index_select random_sorted : Elapsed 0.079 ms (7.929 ms / 100) B = [40, 20, 5, 16] (stride (1600, 1, 20, 100)) A = [4, 20, 5, 16] (stride (80, 320, 16, 1)) dim = 0 1.227 -> 1.228 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.49% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.76% +0.59%] index_copy_ linear : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.228 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.41% +0.49%] index_add_ reverse : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.50% +0.34%] index_copy_ reverse : Elapsed 0.012 ms (1.190 ms / 100) 1.227 -> 1.228 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.41% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.59% +0.50%] index_copy_ spread : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_add_ strided 3 : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.190 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_copy_ strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.228 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.49% +0.41%] index_add_ strided 7 : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.51% +0.67%] index_copy_ strided 7 : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.229 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.41% +0.41%] index_add_ perm : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.192 ( +0.25%) [ +0.00% +0.08% +0.08% / +0.25% +0.84% +0.42%] index_copy_ perm : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.228 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.33% +0.41%] index_add_ perm_sorted : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.191 ( +0.17%) [ +0.00% +0.08% +0.00% / +0.17% +0.42% +0.42%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) 8.703 -> 8.702 ( -0.01%) [ +0.31% +0.07% +0.00% / -0.01% +0.41% +0.30%] index_select const : Elapsed 0.087 ms (8.730 ms / 100) 8.741 -> 8.747 ( +0.07%) [ +0.21% +0.00% +0.03% / +0.37% +0.07% +0.22%] index_select wrap : Elapsed 0.088 ms (8.759 ms / 100) 8.727 -> 8.736 ( +0.10%) [ +0.19% +0.00% +0.22% / +0.10% +0.11% +0.19%] index_select linear : Elapsed 0.087 ms (8.744 ms / 100) 8.739 -> 8.745 ( +0.07%) [ +0.17% +0.00% +0.06% / +0.07% +0.34% +0.46%] index_select reverse : Elapsed 0.088 ms (8.754 ms / 100) 8.715 -> 8.724 ( +0.10%) [ +0.02% +0.05% +0.00% / +0.10% +0.41% +0.25%] index_select skip64 : Elapsed 0.087 ms (8.717 ms / 100) 8.709 -> 8.701 ( -0.09%) [ +0.08% +0.00% +0.03% / -0.09% +0.34% +0.34%] index_select skip256 : Elapsed 0.087 ms (8.716 ms / 100) 8.732 -> 8.738 ( +0.07%) [ +0.16% +0.00% +0.23% / +0.14% +0.07% +0.49%] index_select spread : Elapsed 0.087 ms (8.746 ms / 100) 8.742 -> 8.758 ( +0.18%) [ +0.06% +0.00% +0.17% / +0.27% +0.18% +0.27%] index_select strided 3 : Elapsed 0.087 ms (8.747 ms / 100) 8.735 -> 8.757 ( +0.25%) [ +0.14% +0.18% +0.00% / +0.41% +0.25% +0.44%] index_select random : Elapsed 0.087 ms (8.747 ms / 100) 8.733 -> 8.739 ( +0.07%) [ +0.17% +0.00% +0.17% / +0.07% +0.22% +0.34%] index_select random_sorted : Elapsed 0.087 ms (8.748 ms / 100) B = [40, 20, 5, 16] (stride (1, 3200, 640, 40)) A = [4, 20, 5, 16] (stride (5, 20, 1, 400)) dim = 0 1.187 -> 1.188 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.51% +0.59%] index_add_ linear : Elapsed 0.012 ms (1.187 ms / 100) 1.148 -> 1.147 ( -0.09%) [ +0.00% +0.00% +0.09% / -0.09% +0.17% +0.17%] index_copy_ linear : Elapsed 0.011 ms (1.148 ms / 100) 1.188 -> 1.187 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.76% +0.59%] index_add_ reverse : Elapsed 0.012 ms (1.189 ms / 100) 1.147 -> 1.149 ( +0.17%) [ +0.09% +0.00% +0.09% / +0.17% +0.70% +0.61%] index_copy_ reverse : Elapsed 0.011 ms (1.148 ms / 100) 1.197 -> 1.197 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.08% +0.00% +0.08%] index_add_ spread : Elapsed 0.012 ms (1.197 ms / 100) 1.154 -> 1.155 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.35% +0.35%] index_copy_ spread : Elapsed 0.012 ms (1.154 ms / 100) 1.189 -> 1.192 ( +0.25%) [ +0.00% +0.00% +0.00% / +0.25% +0.59% +0.67%] index_add_ strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.147 -> 1.150 ( +0.26%) [ +0.09% +0.26% +0.00% / +0.26% +0.52% +0.61%] index_copy_ strided 3 : Elapsed 0.011 ms (1.148 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.25% +0.17% +0.00% / +0.08% +0.50% +0.59%] index_add_ strided 7 : Elapsed 0.012 ms (1.193 ms / 100) 1.150 -> 1.149 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.09% +0.26% +0.43%] index_copy_ strided 7 : Elapsed 0.012 ms (1.151 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.50% +0.59%] index_add_ perm : Elapsed 0.012 ms (1.193 ms / 100) 1.151 -> 1.151 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.78% +0.43%] index_copy_ perm : Elapsed 0.012 ms (1.152 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.25% +0.17% +0.00% / +0.08% +0.50% +0.50%] index_add_ perm_sorted : Elapsed 0.012 ms (1.196 ms / 100) 1.151 -> 1.152 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.70% +0.61%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.151 ms / 100) 8.374 -> 8.393 ( +0.23%) [ +0.31% +0.00% +0.20% / +0.23% +0.55% +0.35%] index_select const : Elapsed 0.084 ms (8.400 ms / 100) 8.396 -> 8.397 ( +0.01%) [ +0.07% +0.00% +0.07% / +0.01% +0.10% +0.06%] index_select wrap : Elapsed 0.084 ms (8.402 ms / 100) 8.384 -> 8.403 ( +0.23%) [ +0.20% +0.07% +0.00% / +0.23% +0.31% +0.49%] index_select linear : Elapsed 0.084 ms (8.401 ms / 100) 8.393 -> 8.394 ( +0.01%) [ +0.15% +0.00% +0.26% / +0.01% +0.20% +0.19%] index_select reverse : Elapsed 0.084 ms (8.406 ms / 100) 8.379 -> 8.388 ( +0.11%) [ +0.16% +0.00% +0.07% / +0.11% +0.17% +0.29%] index_select skip64 : Elapsed 0.084 ms (8.392 ms / 100) 8.397 -> 8.390 ( -0.08%) [ +0.12% +0.00% +0.08% / -0.08% +0.17% +0.05%] index_select skip256 : Elapsed 0.084 ms (8.407 ms / 100) 8.400 -> 8.398 ( -0.02%) [ +0.11% +0.12% +0.00% / -0.01% -0.02% +0.18%] index_select spread : Elapsed 0.084 ms (8.409 ms / 100) 8.396 -> 8.392 ( -0.05%) [ +0.06% +0.00% +0.06% / +0.01% -0.05% +0.05%] index_select strided 3 : Elapsed 0.084 ms (8.401 ms / 100) 8.378 -> 8.389 ( +0.13%) [ +0.30% +0.00% +0.30% / +0.13% +0.16% +0.37%] index_select random : Elapsed 0.084 ms (8.403 ms / 100) 8.401 -> 8.391 ( -0.12%) [ +0.10% +0.00% +0.05% / -0.12% +0.11% +0.04%] index_select random_sorted : Elapsed 0.084 ms (8.409 ms / 100) B = [40, 20, 5, 16] (stride (320, 16, 12800, 1)) A = [4, 20, 5, 16] (stride (1, 64, 1280, 4)) dim = 0 1.313 -> 1.317 ( +0.30%) [ +0.38% +0.00% +0.08% / +0.30% +0.30% +0.38%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.273 ms / 100) 1.313 -> 1.316 ( +0.23%) [ +0.38% +0.30% +0.00% / +0.23% +0.38% +0.38%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.31% +0.08% +0.00% / +0.00% +0.63% +0.47%] index_copy_ reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.312 -> 1.315 ( +0.23%) [ +0.46% +0.00% +0.30% / +0.23% +0.53% +0.46%] index_add_ spread : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.55%] index_copy_ spread : Elapsed 0.013 ms (1.274 ms / 100) 1.314 -> 1.316 ( +0.15%) [ +0.00% +0.00% +0.23% / +0.15% +0.30% +0.30%] index_add_ strided 3 : Elapsed 0.013 ms (1.314 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_copy_ strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.315 -> 1.315 ( +0.00%) [ +0.23% +0.00% +0.00% / +0.00% +0.30% +0.23%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.63%] index_copy_ strided 7 : Elapsed 0.013 ms (1.273 ms / 100) 1.317 -> 1.315 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.00% +0.15%] index_add_ perm : Elapsed 0.013 ms (1.317 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.39% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.273 ms / 100) 1.315 -> 1.313 ( -0.15%) [ +0.23% +0.00% +0.15% / -0.15% +0.23% +0.23%] index_add_ perm_sorted : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.16% +0.16% / +0.08% +0.39% +0.39%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) 9.155 -> 9.152 ( -0.03%) [ +0.00% +0.20% +0.33% / +0.21% +0.04% -0.03%] index_select const : Elapsed 0.092 ms (9.155 ms / 100) 9.160 -> 9.141 ( -0.21%) [ +0.00% +0.08% +0.00% / +0.04% +0.01% -0.21%] index_select wrap : Elapsed 0.092 ms (9.160 ms / 100) 9.143 -> 9.148 ( +0.05%) [ +0.00% +0.32% +0.25% / +0.07% +0.05% +0.35%] index_select linear : Elapsed 0.091 ms (9.143 ms / 100) 9.151 -> 9.144 ( -0.08%) [ +0.09% +0.27% +0.00% / +0.03% -0.08% +0.01%] index_select reverse : Elapsed 0.092 ms (9.159 ms / 100) 9.148 -> 9.147 ( -0.01%) [ +0.14% +0.00% +0.27% / +0.23% -0.01% +0.31%] index_select skip64 : Elapsed 0.092 ms (9.161 ms / 100) 9.161 -> 9.155 ( -0.07%) [ +0.00% +0.02% +0.04% / +0.17% -0.07% +0.10%] index_select skip256 : Elapsed 0.092 ms (9.161 ms / 100) 9.163 -> 9.143 ( -0.22%) [ +0.04% +0.00% +0.05% / +0.07% -0.22% -0.13%] index_select spread : Elapsed 0.092 ms (9.167 ms / 100) 9.157 -> 9.141 ( -0.17%) [ +0.04% +0.00% +0.11% / +0.11% -0.17% +0.26%] index_select strided 3 : Elapsed 0.092 ms (9.161 ms / 100) 9.155 -> 9.149 ( -0.07%) [ +0.01% +0.35% +0.00% / +0.05% -0.07% +0.19%] index_select random : Elapsed 0.092 ms (9.156 ms / 100) 9.165 -> 9.149 ( -0.17%) [ +0.08% +0.01% +0.00% / +0.16% +0.04% -0.17%] index_select random_sorted : Elapsed 0.092 ms (9.172 ms / 100) B = [40, 20, 5, 16] (stride (1, 200, 40, 4000)) A = [4, 20, 5, 16] (stride (1, 20, 4, 400)) dim = 0 0.582 -> 0.583 ( +0.17%) [ +0.34% +0.00% +0.00% / +0.17% +0.17% +0.17%] index_add_ linear : Elapsed 0.006 ms (0.584 ms / 100) 0.596 -> 0.595 ( -0.17%) [ +0.00% +1.34% +0.00% / -0.17% +0.17% +0.00%] index_copy_ linear : Elapsed 0.006 ms (0.596 ms / 100) 0.579 -> 0.581 ( +0.35%) [ +0.00% +0.17% +0.00% / +0.35% +1.55% +1.21%] index_add_ reverse : Elapsed 0.006 ms (0.579 ms / 100) 0.590 -> 0.590 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.53% +1.53%] index_copy_ reverse : Elapsed 0.006 ms (0.590 ms / 100) 0.580 -> 0.579 ( -0.17%) [ +0.17% +0.17% +0.00% / -0.17% +1.03% +1.03%] index_add_ spread : Elapsed 0.006 ms (0.581 ms / 100) 0.591 -> 0.590 ( -0.17%) [ +0.34% +0.17% +0.00% / -0.17% +1.35% +1.02%] index_copy_ spread : Elapsed 0.006 ms (0.593 ms / 100) 0.581 -> 0.582 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.52% +0.34%] index_add_ strided 3 : Elapsed 0.006 ms (0.581 ms / 100) 0.593 -> 0.596 ( +0.51%) [ +0.00% +0.84% +0.17% / +20.07% +0.84% +0.51%] index_copy_ strided 3 : Elapsed 0.006 ms (0.593 ms / 100) 0.581 -> 0.583 ( +0.34%) [ +0.17% +0.17% +0.00% / +0.86% +0.34% +0.52%] index_add_ strided 7 : Elapsed 0.006 ms (0.582 ms / 100) 0.592 -> 0.593 ( +0.17%) [ +0.00% +0.51% +0.51% / +0.17% +1.35% +1.01%] index_copy_ strided 7 : Elapsed 0.006 ms (0.592 ms / 100) 0.581 -> 0.581 ( +0.00%) [ +0.00% +0.00% +0.17% / +0.00% +0.34% +0.52%] index_add_ perm : Elapsed 0.006 ms (0.581 ms / 100) 0.594 -> 0.591 ( -0.51%) [ +0.34% +0.17% +0.00% / -0.51% +1.01% +0.51%] index_copy_ perm : Elapsed 0.006 ms (0.596 ms / 100) 0.579 -> 0.580 ( +0.17%) [ +0.17% +0.35% +0.00% / +0.17% +0.86% +0.86%] index_add_ perm_sorted : Elapsed 0.006 ms (0.580 ms / 100) 0.591 -> 0.592 ( +0.17%) [ +0.17% +0.51% +0.00% / +0.17% +1.35% +1.18%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.592 ms / 100) 5.138 -> 4.978 ( -3.11%) [ +0.06% +0.12% +0.00% / -3.06% -3.04% -3.11%] index_select const : Elapsed 0.051 ms (5.141 ms / 100) 5.137 -> 4.971 ( -3.23%) [ +0.10% +0.00% +0.00% / -2.69% -3.23% -3.21%] index_select wrap : Elapsed 0.051 ms (5.142 ms / 100) 5.130 -> 4.973 ( -3.06%) [ +0.21% +0.12% +0.00% / -2.59% -2.92% -3.06%] index_select linear : Elapsed 0.051 ms (5.141 ms / 100) 5.140 -> 4.969 ( -3.33%) [ +0.00% +0.02% +0.02% / -3.25% -3.29% -3.33%] index_select reverse : Elapsed 0.051 ms (5.140 ms / 100) 5.135 -> 4.982 ( -2.98%) [ +0.00% +0.02% +0.14% / -2.98% -2.80% -2.77%] index_select skip64 : Elapsed 0.051 ms (5.135 ms / 100) 5.129 -> 4.965 ( -3.20%) [ +0.23% +0.00% +0.23% / -3.20% -2.50% -2.71%] index_select skip256 : Elapsed 0.051 ms (5.141 ms / 100) 5.138 -> 4.974 ( -3.19%) [ +0.12% +0.00% +0.06% / -3.19% -2.94% -2.94%] index_select spread : Elapsed 0.051 ms (5.144 ms / 100) 5.125 -> 4.983 ( -2.77%) [ +0.21% +0.00% +0.33% / -2.77% -2.71% -2.67%] index_select strided 3 : Elapsed 0.051 ms (5.136 ms / 100) 5.130 -> 4.990 ( -2.73%) [ +0.00% +0.04% +0.16% / -2.73% -2.48% -2.42%] index_select random : Elapsed 0.051 ms (5.130 ms / 100) 5.124 -> 4.971 ( -2.99%) [ +0.00% +0.00% +0.21% / -2.99% -2.62% -2.77%] index_select random_sorted : Elapsed 0.051 ms (5.124 ms / 100) B = [40, 20, 5, 16] (stride (1, 40, 800, 4000)) A = [4, 20, 5, 16] (stride (16, 320, 64, 1)) dim = 0 1.232 -> 1.232 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.49% +0.49%] index_add_ linear : Elapsed 0.012 ms (1.233 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.25% +0.42%] index_copy_ linear : Elapsed 0.012 ms (1.194 ms / 100) 1.232 -> 1.233 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.08% +0.32% +0.41%] index_add_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.194 -> 1.196 ( +0.17%) [ +0.08% +0.00% +0.08% / +0.17% +0.34% +0.42%] index_copy_ reverse : Elapsed 0.012 ms (1.195 ms / 100) 1.241 -> 1.243 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.32% +0.32%] index_add_ spread : Elapsed 0.012 ms (1.243 ms / 100) 1.204 -> 1.206 ( +0.17%) [ +0.33% +0.00% +0.00% / +0.17% +0.17% +0.25%] index_copy_ spread : Elapsed 0.012 ms (1.208 ms / 100) 1.234 -> 1.236 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.57% +0.81%] index_add_ strided 3 : Elapsed 0.012 ms (1.236 ms / 100) 1.196 -> 1.198 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.17% +0.67% +0.67%] index_copy_ strided 3 : Elapsed 0.012 ms (1.196 ms / 100) 1.237 -> 1.242 ( +0.40%) [ +0.24% +0.00% +0.16% / +0.40% +0.57% +0.57%] index_add_ strided 7 : Elapsed 0.012 ms (1.240 ms / 100) 1.198 -> 1.201 ( +0.25%) [ +0.17% +0.00% +0.00% / +0.25% +0.58% +0.42%] index_copy_ strided 7 : Elapsed 0.012 ms (1.200 ms / 100) 1.241 -> 1.243 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.16% +0.24%] index_add_ perm : Elapsed 0.012 ms (1.243 ms / 100) 1.204 -> 1.204 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.08% +0.00% +0.00%] index_copy_ perm : Elapsed 0.012 ms (1.206 ms / 100) 1.243 -> 1.242 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.08% +0.16%] index_add_ perm_sorted : Elapsed 0.012 ms (1.243 ms / 100) 1.203 -> 1.205 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.17% +0.17%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.205 ms / 100) 8.797 -> 8.797 ( +0.00%) [ +0.00% +0.26% +0.14% / +0.00% +0.20% +0.25%] index_select const : Elapsed 0.088 ms (8.797 ms / 100) 8.824 -> 8.810 ( -0.16%) [ +0.26% +0.15% +0.00% / +0.07% -0.16% -0.03%] index_select wrap : Elapsed 0.088 ms (8.847 ms / 100) 8.804 -> 8.824 ( +0.23%) [ +0.28% +0.00% +0.07% / +0.23% +0.25% +0.40%] index_select linear : Elapsed 0.088 ms (8.829 ms / 100) 8.811 -> 8.827 ( +0.18%) [ +0.25% +0.07% +0.00% / +0.18% +0.37% +0.47%] index_select reverse : Elapsed 0.088 ms (8.833 ms / 100) 8.790 -> 8.787 ( -0.03%) [ +0.17% +0.00% +0.19% / -0.03% +0.53% +0.13%] index_select skip64 : Elapsed 0.088 ms (8.805 ms / 100) 8.790 -> 8.800 ( +0.11%) [ +0.00% +0.05% +0.22% / +0.11% +0.14% +0.20%] index_select skip256 : Elapsed 0.088 ms (8.790 ms / 100) 8.823 -> 8.813 ( -0.11%) [ +0.15% +0.22% +0.00% / +0.06% -0.01% -0.11%] index_select spread : Elapsed 0.088 ms (8.836 ms / 100) 8.832 -> 8.818 ( -0.16%) [ +0.00% +0.02% +0.00% / +0.10% -0.09% -0.16%] index_select strided 3 : Elapsed 0.088 ms (8.832 ms / 100) 8.821 -> 8.811 ( -0.11%) [ +0.00% +0.01% +0.15% / -0.06% -0.11% +0.11%] index_select random : Elapsed 0.088 ms (8.821 ms / 100) 8.810 -> 8.828 ( +0.20%) [ +0.00% +0.08% +0.23% / +0.24% +0.20% +0.54%] index_select random_sorted : Elapsed 0.088 ms (8.810 ms / 100) out_shape = [4, 40, 5, 16] in_shape = [4, 20, 5, 16] idx_dim = 1 B = [4, 40, 5, 16] (stride (3200, 16, 640, 1)) A = [4, 20, 5, 16] (stride (20, 1, 1280, 80)) dim = 1 2.407 -> 2.416 ( +0.37%) [ +0.08% +0.12% +0.00% / +0.37% +0.62% +0.66%] index_add_ linear : Elapsed 0.024 ms (2.409 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.12% +0.04% +0.00% / +0.54% +0.54% +0.87%] index_copy_ linear : Elapsed 0.024 ms (2.404 ms / 100) 2.408 -> 2.418 ( +0.42%) [ +0.04% +0.00% +0.00% / +0.42% +0.62% +0.58%] index_add_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.405 -> 2.421 ( +0.67%) [ +0.08% +0.04% +0.00% / +0.67% +0.71% +0.71%] index_copy_ reverse : Elapsed 0.024 ms (2.407 ms / 100) 2.410 -> 2.419 ( +0.37%) [ +0.08% +0.21% +0.00% / +0.37% +0.58% +0.58%] index_add_ spread : Elapsed 0.024 ms (2.412 ms / 100) 2.401 -> 2.409 ( +0.33%) [ +0.00% +0.04% +0.04% / +0.33% +0.58% +0.50%] index_copy_ spread : Elapsed 0.024 ms (2.401 ms / 100) 2.404 -> 2.416 ( +0.50%) [ +0.21% +0.33% +0.00% / +0.67% +0.50% +0.75%] index_add_ strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.398 -> 2.408 ( +0.42%) [ +0.00% +0.17% +0.38% / +0.42% +0.88% +0.83%] index_copy_ strided 3 : Elapsed 0.024 ms (2.398 ms / 100) 2.407 -> 2.423 ( +0.66%) [ +0.00% +0.21% +0.33% / +0.71% +0.66% +0.66%] index_add_ strided 7 : Elapsed 0.024 ms (2.407 ms / 100) 2.402 -> 2.412 ( +0.42%) [ +0.00% +0.04% +0.04% / +0.46% +0.46% +0.42%] index_copy_ strided 7 : Elapsed 0.024 ms (2.402 ms / 100) 2.408 -> 2.422 ( +0.58%) [ +0.12% +0.17% +0.00% / +0.58% +0.58% +0.79%] index_add_ perm : Elapsed 0.024 ms (2.411 ms / 100) 2.402 -> 2.412 ( +0.42%) [ +0.00% +0.00% +0.00% / +0.42% +0.67% +0.58%] index_copy_ perm : Elapsed 0.024 ms (2.402 ms / 100) 2.407 -> 2.420 ( +0.54%) [ +0.12% +0.00% +0.00% / +0.54% +0.54% +0.71%] index_add_ perm_sorted : Elapsed 0.024 ms (2.410 ms / 100) 2.402 -> 2.411 ( +0.37%) [ +0.00% +0.04% +0.00% / +0.37% +0.58% +0.50%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.402 ms / 100) 4.437 -> 4.438 ( +0.02%) [ +0.07% +0.07% +0.00% / +0.07% +0.20% +0.02%] index_select const : Elapsed 0.044 ms (4.440 ms / 100) 4.444 -> 4.441 ( -0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.11% -0.07%] index_select wrap : Elapsed 0.044 ms (4.447 ms / 100) 4.439 -> 4.446 ( +0.16%) [ +0.00% +0.16% +0.05% / +0.18% +0.16% +0.23%] index_select linear : Elapsed 0.044 ms (4.439 ms / 100) 4.443 -> 4.439 ( -0.09%) [ +0.09% +0.16% +0.00% / -0.09% +0.20% +0.05%] index_select reverse : Elapsed 0.044 ms (4.447 ms / 100) 4.436 -> 4.439 ( +0.07%) [ +0.09% +0.00% +0.09% / +0.14% +0.07% +0.14%] index_select skip64 : Elapsed 0.044 ms (4.440 ms / 100) 4.430 -> 4.437 ( +0.16%) [ +0.23% +0.07% +0.00% / +0.16% +0.32% +0.16%] index_select skip256 : Elapsed 0.044 ms (4.440 ms / 100) 4.438 -> 4.441 ( +0.07%) [ +0.23% +0.00% +0.14% / +0.07% +0.27% +0.36%] index_select spread : Elapsed 0.044 ms (4.448 ms / 100) 4.442 -> 4.441 ( -0.02%) [ +0.00% +0.09% +0.02% / +0.05% +0.18% -0.02%] index_select strided 3 : Elapsed 0.044 ms (4.442 ms / 100) 4.442 -> 4.444 ( +0.05%) [ +0.05% +0.00% +0.07% / +0.09% +0.05% +0.14%] index_select strided 5 : Elapsed 0.044 ms (4.444 ms / 100) 4.441 -> 4.442 ( +0.02%) [ +0.00% +0.11% +0.02% / +0.11% +0.05% +0.02%] index_select strided 7 : Elapsed 0.044 ms (4.441 ms / 100) 4.440 -> 4.443 ( +0.07%) [ +0.00% +0.16% +0.07% / +0.07% +0.25% +0.29%] index_select strided 8 : Elapsed 0.044 ms (4.440 ms / 100) 4.435 -> 4.443 ( +0.18%) [ +0.16% +0.11% +0.00% / +0.23% +0.32% +0.18%] index_select strided 16 : Elapsed 0.044 ms (4.442 ms / 100) 4.442 -> 4.441 ( -0.02%) [ +0.00% +0.00% +0.07% / -0.02% +0.07% +0.07%] index_select random : Elapsed 0.044 ms (4.442 ms / 100) 4.442 -> 4.445 ( +0.07%) [ +0.09% +0.00% +0.09% / +0.07% +0.09% +0.09%] index_select random_sorted : Elapsed 0.044 ms (4.446 ms / 100) B = [4, 40, 5, 16] (stride (3200, 1, 40, 200)) A = [4, 20, 5, 16] (stride (1600, 16, 320, 1)) dim = 1 2.459 -> 2.470 ( +0.45%) [ +0.00% +0.04% +0.00% / +0.45% +0.45% +0.65%] index_add_ linear : Elapsed 0.025 ms (2.459 ms / 100) 2.455 -> 2.467 ( +0.49%) [ +0.00% +0.20% +0.04% / +0.49% +0.90% +0.81%] index_copy_ linear : Elapsed 0.025 ms (2.455 ms / 100) 2.453 -> 2.468 ( +0.61%) [ +0.08% +0.04% +0.00% / +0.61% +0.90% +0.90%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.448 -> 2.466 ( +0.74%) [ +0.04% +0.00% +0.16% / +0.74% +1.14% +1.23%] index_copy_ reverse : Elapsed 0.024 ms (2.449 ms / 100) 2.463 -> 2.476 ( +0.53%) [ +0.00% +0.16% +0.12% / +0.53% +1.10% +1.14%] index_add_ spread : Elapsed 0.025 ms (2.463 ms / 100) 2.466 -> 2.480 ( +0.57%) [ +0.12% +0.00% +0.20% / +0.57% +1.18% +1.09%] index_copy_ spread : Elapsed 0.025 ms (2.469 ms / 100) 2.470 -> 2.480 ( +0.40%) [ +0.04% +0.08% +0.00% / +0.45% +0.73% +0.40%] index_add_ strided 3 : Elapsed 0.025 ms (2.471 ms / 100) 2.472 -> 2.488 ( +0.65%) [ +0.12% +0.24% +0.00% / +0.65% +0.89% +0.69%] index_copy_ strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.469 -> 2.485 ( +0.65%) [ +0.00% +0.08% +0.16% / +0.69% +0.65% +0.69%] index_add_ strided 7 : Elapsed 0.025 ms (2.469 ms / 100) 2.472 -> 2.488 ( +0.65%) [ +0.00% +0.20% +0.16% / +0.65% +0.89% +0.69%] index_copy_ strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.472 -> 2.480 ( +0.32%) [ +0.08% +0.00% +0.08% / +0.40% +0.32% +0.32%] index_add_ perm : Elapsed 0.025 ms (2.474 ms / 100) 2.476 -> 2.484 ( +0.32%) [ +0.08% +0.12% +0.00% / +0.61% +0.48% +0.32%] index_copy_ perm : Elapsed 0.025 ms (2.478 ms / 100) 2.473 -> 2.474 ( +0.04%) [ +0.12% +0.04% +0.00% / +0.61% +0.28% +0.04%] index_add_ perm_sorted : Elapsed 0.025 ms (2.476 ms / 100) 2.476 -> 2.481 ( +0.20%) [ +0.00% +0.24% +0.12% / +0.57% +0.40% +0.20%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.476 ms / 100) 4.498 -> 4.498 ( +0.00%) [ +0.04% +0.00% +0.02% / +0.00% +0.04% +0.00%] index_select const : Elapsed 0.045 ms (4.500 ms / 100) 4.506 -> 4.512 ( +0.13%) [ +0.04% +0.00% +0.27% / +0.13% +0.31% +0.18%] index_select wrap : Elapsed 0.045 ms (4.508 ms / 100) 4.512 -> 4.510 ( -0.04%) [ +0.04% +0.00% +0.00% / +0.04% -0.04% +0.16%] index_select linear : Elapsed 0.045 ms (4.514 ms / 100) 4.502 -> 4.508 ( +0.13%) [ +0.22% +0.00% +0.07% / +0.31% +0.24% +0.13%] index_select reverse : Elapsed 0.045 ms (4.512 ms / 100) 4.498 -> 4.495 ( -0.07%) [ +0.00% +0.02% +0.18% / +0.07% +0.09% -0.07%] index_select skip64 : Elapsed 0.045 ms (4.498 ms / 100) 4.492 -> 4.495 ( +0.07%) [ +0.00% +0.33% +0.13% / +0.07% +0.31% +0.22%] index_select skip256 : Elapsed 0.045 ms (4.492 ms / 100) 4.505 -> 4.508 ( +0.07%) [ +0.00% +0.00% +0.04% / +0.13% +0.07% +0.13%] index_select spread : Elapsed 0.045 ms (4.505 ms / 100) 4.506 -> 4.509 ( +0.07%) [ +0.00% +0.29% +0.16% / +0.07% +0.13% +0.11%] index_select strided 3 : Elapsed 0.045 ms (4.506 ms / 100) 4.500 -> 4.501 ( +0.02%) [ +0.11% +0.00% +0.13% / +0.02% +0.11% +0.16%] index_select strided 5 : Elapsed 0.045 ms (4.505 ms / 100) 4.501 -> 4.509 ( +0.18%) [ +0.00% +0.07% +0.09% / +0.24% +0.38% +0.18%] index_select strided 7 : Elapsed 0.045 ms (4.501 ms / 100) 4.498 -> 4.502 ( +0.09%) [ +0.07% +0.00% +0.16% / +0.11% +0.13% +0.09%] index_select strided 8 : Elapsed 0.045 ms (4.501 ms / 100) 4.497 -> 4.503 ( +0.13%) [ +0.20% +0.04% +0.00% / +0.13% +0.38% +0.16%] index_select strided 16 : Elapsed 0.045 ms (4.506 ms / 100) 4.514 -> 4.507 ( -0.16%) [ +0.00% +0.02% +0.02% / -0.07% -0.16% -0.09%] index_select random : Elapsed 0.045 ms (4.514 ms / 100) 4.507 -> 4.509 ( +0.04%) [ +0.16% +0.18% +0.00% / +0.04% +0.07% +0.07%] index_select random_sorted : Elapsed 0.045 ms (4.514 ms / 100) B = [4, 40, 5, 16] (stride (80, 320, 16, 1)) A = [4, 20, 5, 16] (stride (1600, 5, 1, 100)) dim = 1 2.389 -> 2.401 ( +0.50%) [ +0.21% +0.00% +0.04% / +0.50% +0.84% +0.96%] index_add_ linear : Elapsed 0.024 ms (2.394 ms / 100) 2.384 -> 2.397 ( +0.55%) [ +0.29% +0.17% +0.00% / +0.55% +0.92% +1.05%] index_copy_ linear : Elapsed 0.024 ms (2.391 ms / 100) 2.393 -> 2.403 ( +0.42%) [ +0.17% +0.04% +0.00% / +0.42% +0.67% +0.50%] index_add_ reverse : Elapsed 0.024 ms (2.397 ms / 100) 2.388 -> 2.402 ( +0.59%) [ +0.29% +0.00% +0.08% / +0.59% +0.80% +0.63%] index_copy_ reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.393 -> 2.409 ( +0.67%) [ +0.00% +0.08% +0.00% / +0.71% +0.67% +0.71%] index_add_ spread : Elapsed 0.024 ms (2.393 ms / 100) 2.391 -> 2.404 ( +0.54%) [ +0.00% +0.38% +0.08% / +0.54% +0.71% +0.71%] index_copy_ spread : Elapsed 0.024 ms (2.391 ms / 100) 2.392 -> 2.405 ( +0.54%) [ +0.00% +0.33% +0.08% / +0.79% +0.63% +0.54%] index_add_ strided 3 : Elapsed 0.024 ms (2.392 ms / 100) 2.389 -> 2.403 ( +0.59%) [ +0.00% +0.00% +0.13% / +0.59% +0.75% +0.63%] index_copy_ strided 3 : Elapsed 0.024 ms (2.389 ms / 100) 2.399 -> 2.408 ( +0.38%) [ +0.08% +0.00% +0.21% / +0.38% +0.46% +0.46%] index_add_ strided 7 : Elapsed 0.024 ms (2.401 ms / 100) 2.384 -> 2.401 ( +0.71%) [ +0.25% +0.00% +0.17% / +0.71% +0.88% +1.05%] index_copy_ strided 7 : Elapsed 0.024 ms (2.390 ms / 100) 2.394 -> 2.406 ( +0.50%) [ +0.08% +0.00% +0.00% / +0.50% +0.54% +0.67%] index_add_ perm : Elapsed 0.024 ms (2.396 ms / 100) 2.389 -> 2.398 ( +0.38%) [ +0.04% +0.00% +0.13% / +0.38% +0.75% +0.75%] index_copy_ perm : Elapsed 0.024 ms (2.390 ms / 100) 2.400 -> 2.409 ( +0.37%) [ +0.04% +0.13% +0.00% / +0.46% +0.63% +0.37%] index_add_ perm_sorted : Elapsed 0.024 ms (2.401 ms / 100) 2.388 -> 2.399 ( +0.46%) [ +0.17% +0.08% +0.00% / +0.46% +0.80% +0.75%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.392 ms / 100) 4.411 -> 4.415 ( +0.09%) [ +0.14% +0.00% +0.02% / +0.34% +0.14% +0.09%] index_select const : Elapsed 0.044 ms (4.417 ms / 100) 4.423 -> 4.423 ( +0.00%) [ +0.16% +0.18% +0.00% / +0.05% +0.00% +0.16%] index_select wrap : Elapsed 0.044 ms (4.430 ms / 100) 4.425 -> 4.427 ( +0.05%) [ +0.18% +0.02% +0.00% / +0.18% +0.05% +0.07%] index_select linear : Elapsed 0.044 ms (4.433 ms / 100) 4.426 -> 4.422 ( -0.09%) [ +0.00% +0.05% +0.11% / -0.09% +0.14% +0.23%] index_select reverse : Elapsed 0.044 ms (4.426 ms / 100) 4.417 -> 4.418 ( +0.02%) [ +0.18% +0.00% +0.07% / +0.02% +0.09% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.425 ms / 100) 4.416 -> 4.412 ( -0.09%) [ +0.00% +0.07% +0.14% / -0.09% +0.11% +0.16%] index_select skip256 : Elapsed 0.044 ms (4.416 ms / 100) 4.421 -> 4.428 ( +0.16%) [ +0.18% +0.00% +0.02% / +0.16% +0.27% +0.23%] index_select spread : Elapsed 0.044 ms (4.429 ms / 100) 4.420 -> 4.424 ( +0.09%) [ +0.07% +0.00% +0.02% / +0.14% +0.29% +0.09%] index_select strided 3 : Elapsed 0.044 ms (4.423 ms / 100) 4.415 -> 4.419 ( +0.09%) [ +0.34% +0.02% +0.00% / +0.09% +0.34% +0.27%] index_select strided 5 : Elapsed 0.044 ms (4.430 ms / 100) 4.415 -> 4.424 ( +0.20%) [ +0.14% +0.00% +0.07% / +0.20% +0.20% +0.39%] index_select strided 7 : Elapsed 0.044 ms (4.421 ms / 100) 4.418 -> 4.420 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.14% +0.11%] index_select strided 8 : Elapsed 0.044 ms (4.419 ms / 100) 4.418 -> 4.425 ( +0.16%) [ +0.00% +0.05% +0.02% / +0.16% +0.38% +0.25%] index_select strided 16 : Elapsed 0.044 ms (4.418 ms / 100) 4.422 -> 4.424 ( +0.05%) [ +0.25% +0.00% +0.05% / +0.05% +0.20% +0.14%] index_select random : Elapsed 0.044 ms (4.433 ms / 100) 4.423 -> 4.429 ( +0.14%) [ +0.14% +0.00% +0.05% / +0.14% +0.20% +0.25%] index_select random_sorted : Elapsed 0.044 ms (4.429 ms / 100) B = [4, 40, 5, 16] (stride (80, 320, 1, 5)) A = [4, 20, 5, 16] (stride (1600, 5, 1, 100)) dim = 1 2.439 -> 2.453 ( +0.57%) [ +0.08% +0.00% +0.21% / +0.57% +1.03% +0.98%] index_add_ linear : Elapsed 0.024 ms (2.441 ms / 100) 2.443 -> 2.450 ( +0.29%) [ +0.00% +0.16% +0.04% / +0.29% +0.74% +0.86%] index_copy_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.432 -> 2.446 ( +0.58%) [ +0.16% +0.00% +0.04% / +0.58% +1.36% +1.27%] index_add_ reverse : Elapsed 0.024 ms (2.436 ms / 100) 2.435 -> 2.448 ( +0.53%) [ +0.00% +0.08% +0.16% / +0.53% +1.19% +1.19%] index_copy_ reverse : Elapsed 0.024 ms (2.435 ms / 100) 2.438 -> 2.453 ( +0.62%) [ +0.08% +0.00% +0.04% / +0.62% +1.23% +0.94%] index_add_ spread : Elapsed 0.024 ms (2.440 ms / 100) 2.436 -> 2.453 ( +0.70%) [ +0.12% +0.08% +0.00% / +0.70% +1.23% +1.07%] index_copy_ spread : Elapsed 0.024 ms (2.439 ms / 100) 2.445 -> 2.456 ( +0.45%) [ +0.00% +0.04% +0.00% / +0.61% +0.45% +0.49%] index_add_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.442 -> 2.457 ( +0.61%) [ +0.08% +0.08% +0.00% / +0.74% +0.74% +0.61%] index_copy_ strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.445 -> 2.459 ( +0.57%) [ +0.25% +0.00% +0.16% / +0.57% +0.70% +0.78%] index_add_ strided 7 : Elapsed 0.025 ms (2.451 ms / 100) 2.444 -> 2.458 ( +0.57%) [ +0.00% +0.08% +0.25% / +0.57% +0.70% +0.65%] index_copy_ strided 7 : Elapsed 0.024 ms (2.444 ms / 100) 2.445 -> 2.451 ( +0.25%) [ +0.25% +0.00% +0.08% / +0.61% +0.53% +0.25%] index_add_ perm : Elapsed 0.025 ms (2.451 ms / 100) 2.446 -> 2.456 ( +0.41%) [ +0.12% +0.08% +0.00% / +0.53% +0.41% +0.41%] index_copy_ perm : Elapsed 0.024 ms (2.449 ms / 100) 2.447 -> 2.454 ( +0.29%) [ +0.12% +0.12% +0.00% / +0.45% +0.37% +0.29%] index_add_ perm_sorted : Elapsed 0.024 ms (2.450 ms / 100) 2.448 -> 2.456 ( +0.33%) [ +0.00% +0.08% +0.00% / +0.57% +0.33% +0.33%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.448 ms / 100) 4.495 -> 4.497 ( +0.04%) [ +0.02% +0.00% +0.22% / +0.04% +0.16% +0.16%] index_select const : Elapsed 0.045 ms (4.496 ms / 100) 4.506 -> 4.506 ( +0.00%) [ +0.00% +0.09% +0.04% / +0.04% +0.00% +0.16%] index_select wrap : Elapsed 0.045 ms (4.506 ms / 100) 4.508 -> 4.503 ( -0.11%) [ +0.00% +0.00% +0.07% / +0.09% -0.11% +0.02%] index_select linear : Elapsed 0.045 ms (4.508 ms / 100) 4.511 -> 4.511 ( +0.00%) [ +0.07% +0.00% +0.04% / +0.13% +0.00% +0.00%] index_select reverse : Elapsed 0.045 ms (4.514 ms / 100) 4.497 -> 4.492 ( -0.11%) [ +0.02% +0.22% +0.00% / +0.11% -0.11% +0.11%] index_select skip64 : Elapsed 0.045 ms (4.498 ms / 100) 4.496 -> 4.498 ( +0.04%) [ +0.04% +0.02% +0.00% / +0.07% +0.22% +0.04%] index_select skip256 : Elapsed 0.045 ms (4.498 ms / 100) 4.508 -> 4.510 ( +0.04%) [ +0.00% +0.02% +0.04% / +0.09% +0.07% +0.04%] index_select spread : Elapsed 0.045 ms (4.508 ms / 100) 4.503 -> 4.504 ( +0.02%) [ +0.09% +0.00% +0.00% / +0.27% +0.13% +0.02%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.501 -> 4.501 ( +0.00%) [ +0.00% +0.07% +0.13% / +0.00% +0.09% +0.02%] index_select strided 5 : Elapsed 0.045 ms (4.501 ms / 100) 4.504 -> 4.506 ( +0.04%) [ +0.00% +0.11% +0.24% / +0.16% +0.16% +0.04%] index_select strided 7 : Elapsed 0.045 ms (4.504 ms / 100) 4.494 -> 4.499 ( +0.11%) [ +0.02% +0.00% +0.02% / +0.11% +0.22% +0.38%] index_select strided 8 : Elapsed 0.045 ms (4.495 ms / 100) 4.500 -> 4.499 ( -0.02%) [ +0.02% +0.29% +0.00% / +0.13% -0.02% +0.00%] index_select strided 16 : Elapsed 0.045 ms (4.501 ms / 100) 4.502 -> 4.507 ( +0.11%) [ +0.00% +0.20% +0.13% / +0.42% +0.11% +0.24%] index_select random : Elapsed 0.045 ms (4.502 ms / 100) 4.508 -> 4.506 ( -0.04%) [ +0.02% +0.02% +0.00% / +0.16% -0.04% +0.09%] index_select random_sorted : Elapsed 0.045 ms (4.509 ms / 100) B = [4, 40, 5, 16] (stride (5, 320, 1, 20)) A = [4, 20, 5, 16] (stride (1, 320, 64, 4)) dim = 1 2.396 -> 2.410 ( +0.58%) [ +0.00% +0.38% +0.08% / +0.58% +1.09% +0.96%] index_add_ linear : Elapsed 0.024 ms (2.396 ms / 100) 2.397 -> 2.413 ( +0.67%) [ +0.00% +0.25% +0.25% / +0.67% +0.75% +0.79%] index_copy_ linear : Elapsed 0.024 ms (2.397 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.17% +0.00% +0.12% / +0.67% +0.67% +0.54%] index_add_ reverse : Elapsed 0.024 ms (2.405 ms / 100) 2.400 -> 2.412 ( +0.50%) [ +0.08% +0.00% +0.17% / +0.75% +0.58% +0.50%] index_copy_ reverse : Elapsed 0.024 ms (2.402 ms / 100) 2.406 -> 2.410 ( +0.17%) [ +0.00% +0.17% +0.04% / +0.54% +0.33% +0.17%] index_add_ spread : Elapsed 0.024 ms (2.406 ms / 100) 2.402 -> 2.414 ( +0.50%) [ +0.00% +0.29% +0.04% / +0.62% +0.67% +0.50%] index_copy_ spread : Elapsed 0.024 ms (2.402 ms / 100) 2.401 -> 2.409 ( +0.33%) [ +0.25% +0.12% +0.00% / +0.42% +0.33% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.407 ms / 100) 2.402 -> 2.409 ( +0.29%) [ +0.00% +0.08% +0.12% / +0.87% +0.29% +0.33%] index_copy_ strided 3 : Elapsed 0.024 ms (2.402 ms / 100) 2.404 -> 2.415 ( +0.46%) [ +0.12% +0.04% +0.00% / +0.46% +0.50% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.407 ms / 100) 2.405 -> 2.412 ( +0.29%) [ +0.00% +0.12% +0.00% / +0.58% +0.33% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.405 ms / 100) 2.399 -> 2.413 ( +0.58%) [ +0.29% +0.21% +0.00% / +0.58% +0.67% +0.63%] index_add_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.21% +0.25% +0.00% / +0.54% +0.54% +0.54%] index_copy_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.08% +0.08% +0.00% / +0.58% +0.58% +0.54%] index_add_ perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) 2.398 -> 2.413 ( +0.63%) [ +0.42% +0.29% +0.00% / +0.71% +0.67% +0.63%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.408 ms / 100) 4.433 -> 4.435 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.05% +0.07%] index_select const : Elapsed 0.044 ms (4.435 ms / 100) 4.434 -> 4.441 ( +0.16%) [ +0.16% +0.09% +0.00% / +0.16% +0.20% +0.16%] index_select wrap : Elapsed 0.044 ms (4.441 ms / 100) 4.439 -> 4.437 ( -0.05%) [ +0.02% +0.00% +0.05% / -0.05% +0.07% +0.02%] index_select linear : Elapsed 0.044 ms (4.440 ms / 100) 4.437 -> 4.435 ( -0.05%) [ +0.05% +0.02% +0.00% / +0.07% +0.14% -0.05%] index_select reverse : Elapsed 0.044 ms (4.439 ms / 100) 4.431 -> 4.431 ( +0.00%) [ +0.14% +0.11% +0.00% / +0.00% +0.14% +0.00%] index_select skip64 : Elapsed 0.044 ms (4.437 ms / 100) 4.431 -> 4.431 ( +0.00%) [ +0.00% +0.05% +0.09% / +0.00% +0.27% +0.11%] index_select skip256 : Elapsed 0.044 ms (4.431 ms / 100) 4.437 -> 4.435 ( -0.05%) [ +0.00% +0.05% +0.18% / -0.05% +0.07% +0.23%] index_select spread : Elapsed 0.044 ms (4.437 ms / 100) 4.433 -> 4.435 ( +0.05%) [ +0.07% +0.00% +0.20% / +0.09% +0.05% +0.14%] index_select strided 3 : Elapsed 0.044 ms (4.436 ms / 100) 4.435 -> 4.435 ( +0.00%) [ +0.02% +0.09% +0.00% / +0.02% +0.14% +0.00%] index_select strided 5 : Elapsed 0.044 ms (4.436 ms / 100) 4.434 -> 4.437 ( +0.07%) [ +0.00% +0.14% +0.16% / +0.07% +0.27% +0.09%] index_select strided 7 : Elapsed 0.044 ms (4.434 ms / 100) 4.429 -> 4.432 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.14% +0.18%] index_select strided 8 : Elapsed 0.044 ms (4.435 ms / 100) 4.431 -> 4.431 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.14% +0.00% +0.11%] index_select strided 16 : Elapsed 0.044 ms (4.437 ms / 100) 4.433 -> 4.434 ( +0.02%) [ +0.25% +0.16% +0.00% / +0.02% +0.16% +0.32%] index_select random : Elapsed 0.044 ms (4.444 ms / 100) 4.436 -> 4.431 ( -0.11%) [ +0.00% +0.02% +0.02% / -0.11% +0.29% +0.18%] index_select random_sorted : Elapsed 0.044 ms (4.436 ms / 100) B = [4, 40, 5, 16] (stride (640, 16, 2560, 1)) A = [4, 20, 5, 16] (stride (5, 320, 1, 20)) dim = 1 2.446 -> 2.456 ( +0.41%) [ +0.08% +0.12% +0.00% / +0.41% +0.70% +0.61%] index_add_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.441 -> 2.451 ( +0.41%) [ +0.08% +0.00% +0.00% / +0.41% +0.90% +0.86%] index_copy_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.436 -> 2.452 ( +0.66%) [ +0.00% +0.12% +0.16% / +0.66% +1.19% +1.19%] index_add_ reverse : Elapsed 0.024 ms (2.436 ms / 100) 2.432 -> 2.445 ( +0.53%) [ +0.00% +0.00% +0.16% / +0.53% +1.27% +1.11%] index_copy_ reverse : Elapsed 0.024 ms (2.432 ms / 100) 2.439 -> 2.453 ( +0.57%) [ +0.16% +0.00% +0.08% / +0.57% +1.15% +1.03%] index_add_ spread : Elapsed 0.024 ms (2.443 ms / 100) 2.428 -> 2.444 ( +0.66%) [ +0.08% +0.04% +0.00% / +0.66% +1.36% +1.36%] index_copy_ spread : Elapsed 0.024 ms (2.430 ms / 100) 2.447 -> 2.460 ( +0.53%) [ +0.00% +0.20% +0.12% / +0.53% +0.82% +0.65%] index_add_ strided 3 : Elapsed 0.024 ms (2.447 ms / 100) 2.437 -> 2.451 ( +0.57%) [ +0.00% +0.21% +0.12% / +0.57% +0.74% +0.86%] index_copy_ strided 3 : Elapsed 0.024 ms (2.437 ms / 100) 2.446 -> 2.457 ( +0.45%) [ +0.00% +0.04% +0.12% / +0.45% +0.61% +0.57%] index_add_ strided 7 : Elapsed 0.024 ms (2.446 ms / 100) 2.435 -> 2.453 ( +0.74%) [ +0.12% +0.16% +0.00% / +0.82% +0.74% +0.86%] index_copy_ strided 7 : Elapsed 0.024 ms (2.438 ms / 100) 2.449 -> 2.453 ( +0.16%) [ +0.20% +0.00% +0.08% / +0.69% +0.33% +0.16%] index_add_ perm : Elapsed 0.025 ms (2.454 ms / 100) 2.440 -> 2.445 ( +0.20%) [ +0.04% +0.00% +0.00% / +0.66% +0.25% +0.20%] index_copy_ perm : Elapsed 0.024 ms (2.441 ms / 100) 2.447 -> 2.455 ( +0.33%) [ +0.00% +0.20% +0.08% / +0.61% +0.45% +0.33%] index_add_ perm_sorted : Elapsed 0.024 ms (2.447 ms / 100) 2.441 -> 2.446 ( +0.20%) [ +0.00% +0.00% +0.04% / +0.66% +0.41% +0.20%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.441 ms / 100) 4.491 -> 4.495 ( +0.09%) [ +0.13% +0.07% +0.00% / +0.09% +0.09% +0.24%] index_select const : Elapsed 0.045 ms (4.497 ms / 100) 4.504 -> 4.507 ( +0.07%) [ +0.09% +0.09% +0.00% / +0.11% +0.07% +0.09%] index_select wrap : Elapsed 0.045 ms (4.508 ms / 100) 4.500 -> 4.503 ( +0.07%) [ +0.09% +0.00% +0.16% / +0.07% +0.13% +0.11%] index_select linear : Elapsed 0.045 ms (4.504 ms / 100) 4.504 -> 4.505 ( +0.02%) [ +0.33% +0.02% +0.00% / +0.07% +0.16% +0.02%] index_select reverse : Elapsed 0.045 ms (4.519 ms / 100) 4.490 -> 4.495 ( +0.11%) [ +0.18% +0.11% +0.00% / +0.33% +0.11% +0.13%] index_select skip64 : Elapsed 0.045 ms (4.498 ms / 100) 4.492 -> 4.494 ( +0.04%) [ +0.00% +0.04% +0.09% / +0.13% +0.04% +0.16%] index_select skip256 : Elapsed 0.045 ms (4.492 ms / 100) 4.502 -> 4.504 ( +0.04%) [ +0.00% +0.13% +0.02% / +0.04% +0.07% +0.27%] index_select spread : Elapsed 0.045 ms (4.502 ms / 100) 4.500 -> 4.502 ( +0.04%) [ +0.07% +0.20% +0.00% / +0.04% +0.20% +0.18%] index_select strided 3 : Elapsed 0.045 ms (4.503 ms / 100) 4.494 -> 4.490 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.09% +0.24% +0.24%] index_select strided 5 : Elapsed 0.045 ms (4.498 ms / 100) 4.500 -> 4.504 ( +0.09%) [ +0.09% +0.07% +0.00% / +0.09% +0.20% +0.20%] index_select strided 7 : Elapsed 0.045 ms (4.504 ms / 100) 4.497 -> 4.496 ( -0.02%) [ +0.11% +0.00% +0.04% / +0.00% -0.02% +0.11%] index_select strided 8 : Elapsed 0.045 ms (4.502 ms / 100) 4.501 -> 4.495 ( -0.13%) [ +0.02% +0.00% +0.02% / +0.20% +0.00% -0.13%] index_select strided 16 : Elapsed 0.045 ms (4.502 ms / 100) 4.505 -> 4.505 ( +0.00%) [ +0.00% +0.13% +0.16% / +0.13% +0.02% +0.00%] index_select random : Elapsed 0.045 ms (4.505 ms / 100) 4.498 -> 4.507 ( +0.20%) [ +0.24% +0.00% +0.18% / +0.31% +0.22% +0.20%] index_select random_sorted : Elapsed 0.045 ms (4.509 ms / 100) B = [4, 40, 5, 16] (stride (1, 64, 2560, 4)) A = [4, 20, 5, 16] (stride (1600, 80, 16, 1)) dim = 1 2.408 -> 2.420 ( +0.50%) [ +0.12% +0.08% +0.00% / +0.50% +0.66% +0.87%] index_add_ linear : Elapsed 0.024 ms (2.411 ms / 100) 2.399 -> 2.410 ( +0.46%) [ +0.13% +0.00% +0.17% / +0.46% +0.63% +0.63%] index_copy_ linear : Elapsed 0.024 ms (2.402 ms / 100) 2.409 -> 2.422 ( +0.54%) [ +0.00% +0.21% +0.12% / +0.62% +0.58% +0.54%] index_add_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.401 -> 2.415 ( +0.58%) [ +0.17% +0.21% +0.00% / +0.58% +0.71% +0.75%] index_copy_ reverse : Elapsed 0.024 ms (2.405 ms / 100) 2.413 -> 2.420 ( +0.29%) [ +0.00% +0.08% +0.12% / +0.41% +0.29% +0.46%] index_add_ spread : Elapsed 0.024 ms (2.413 ms / 100) 2.408 -> 2.411 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.17% +0.42% +0.12%] index_copy_ spread : Elapsed 0.024 ms (2.410 ms / 100) 2.411 -> 2.422 ( +0.46%) [ +0.04% +0.25% +0.00% / +0.50% +0.50% +0.46%] index_add_ strided 3 : Elapsed 0.024 ms (2.412 ms / 100) 2.397 -> 2.409 ( +0.50%) [ +0.00% +0.25% +0.46% / +0.67% +0.75% +0.50%] index_copy_ strided 3 : Elapsed 0.024 ms (2.397 ms / 100) 2.413 -> 2.417 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.50% +0.25% +0.17%] index_add_ strided 7 : Elapsed 0.024 ms (2.413 ms / 100) 2.403 -> 2.413 ( +0.42%) [ +0.00% +0.08% +0.04% / +0.54% +0.42% +0.50%] index_copy_ strided 7 : Elapsed 0.024 ms (2.403 ms / 100) 2.410 -> 2.421 ( +0.46%) [ +0.00% +0.12% +0.04% / +0.46% +0.62% +0.54%] index_add_ perm : Elapsed 0.024 ms (2.410 ms / 100) 2.400 -> 2.412 ( +0.50%) [ +0.08% +0.13% +0.00% / +0.58% +0.71% +0.50%] index_copy_ perm : Elapsed 0.024 ms (2.402 ms / 100) 2.409 -> 2.422 ( +0.54%) [ +0.00% +0.21% +0.04% / +0.54% +0.62% +0.58%] index_add_ perm_sorted : Elapsed 0.024 ms (2.409 ms / 100) 2.401 -> 2.415 ( +0.58%) [ +0.21% +0.25% +0.00% / +0.58% +0.62% +0.58%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) 4.425 -> 4.426 ( +0.02%) [ +0.02% +0.16% +0.00% / +0.02% +0.14% +0.11%] index_select const : Elapsed 0.044 ms (4.426 ms / 100) 4.437 -> 4.434 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% -0.02% +0.09%] index_select wrap : Elapsed 0.044 ms (4.438 ms / 100) 4.432 -> 4.438 ( +0.14%) [ +0.00% +0.27% +0.20% / +0.14% +0.32% +0.34%] index_select linear : Elapsed 0.044 ms (4.432 ms / 100) 4.437 -> 4.438 ( +0.02%) [ +0.11% +0.00% +0.09% / +0.02% +0.43% +0.14%] index_select reverse : Elapsed 0.044 ms (4.442 ms / 100) 4.425 -> 4.427 ( +0.05%) [ +0.00% +0.07% +0.16% / +0.23% +0.09% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.425 ms / 100) 4.423 -> 4.431 ( +0.18%) [ +0.02% +0.00% +0.25% / +0.25% +0.25% +0.18%] index_select skip256 : Elapsed 0.044 ms (4.424 ms / 100) 4.436 -> 4.437 ( +0.02%) [ +0.00% +0.09% +0.05% / +0.02% +0.09% +0.20%] index_select spread : Elapsed 0.044 ms (4.436 ms / 100) 4.434 -> 4.437 ( +0.07%) [ +0.16% +0.09% +0.00% / +0.07% +0.09% +0.07%] index_select strided 3 : Elapsed 0.044 ms (4.441 ms / 100) 4.426 -> 4.435 ( +0.20%) [ +0.14% +0.05% +0.00% / +0.27% +0.20% +0.20%] index_select strided 5 : Elapsed 0.044 ms (4.432 ms / 100) 4.435 -> 4.436 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.23% +0.18%] index_select strided 7 : Elapsed 0.044 ms (4.438 ms / 100) 4.431 -> 4.431 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.09% +0.07% +0.00%] index_select strided 8 : Elapsed 0.044 ms (4.431 ms / 100) 4.424 -> 4.433 ( +0.20%) [ +0.07% +0.11% +0.00% / +0.20% +0.32% +0.43%] index_select strided 16 : Elapsed 0.044 ms (4.427 ms / 100) 4.436 -> 4.437 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.05% +0.14%] index_select random : Elapsed 0.044 ms (4.438 ms / 100) 4.437 -> 4.444 ( +0.16%) [ +0.07% +0.16% +0.00% / +0.18% +0.16% +0.23%] index_select random_sorted : Elapsed 0.044 ms (4.440 ms / 100) B = [4, 40, 5, 16] (stride (1, 20, 4, 800)) A = [4, 20, 5, 16] (stride (1600, 1, 20, 100)) dim = 1 2.449 -> 2.461 ( +0.49%) [ +0.04% +0.00% +0.04% / +0.49% +0.69% +0.53%] index_add_ linear : Elapsed 0.025 ms (2.450 ms / 100) 2.448 -> 2.463 ( +0.61%) [ +0.00% +0.08% +0.16% / +0.61% +0.74% +0.78%] index_copy_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.443 -> 2.457 ( +0.57%) [ +0.12% +0.00% +0.12% / +0.57% +1.02% +1.02%] index_add_ reverse : Elapsed 0.024 ms (2.446 ms / 100) 2.441 -> 2.456 ( +0.61%) [ +0.00% +0.16% +0.12% / +0.61% +1.31% +1.07%] index_copy_ reverse : Elapsed 0.024 ms (2.441 ms / 100) 2.451 -> 2.468 ( +0.69%) [ +0.00% +0.00% +0.08% / +0.69% +0.90% +0.98%] index_add_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.451 -> 2.470 ( +0.78%) [ +0.00% +0.29% +0.33% / +0.78% +1.18% +1.14%] index_copy_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.452 -> 2.463 ( +0.45%) [ +0.24% +0.08% +0.00% / +0.65% +0.69% +0.45%] index_add_ strided 3 : Elapsed 0.025 ms (2.458 ms / 100) 2.457 -> 2.473 ( +0.65%) [ +0.16% +0.00% +0.00% / +0.69% +0.73% +0.65%] index_copy_ strided 3 : Elapsed 0.025 ms (2.461 ms / 100) 2.454 -> 2.468 ( +0.57%) [ +0.00% +0.16% +0.00% / +0.61% +0.57% +0.65%] index_add_ strided 7 : Elapsed 0.025 ms (2.454 ms / 100) 2.457 -> 2.472 ( +0.61%) [ +0.00% +0.04% +0.33% / +0.61% +0.65% +0.65%] index_copy_ strided 7 : Elapsed 0.025 ms (2.457 ms / 100) 2.456 -> 2.462 ( +0.24%) [ +0.08% +0.12% +0.00% / +0.61% +0.24% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.458 ms / 100) 2.457 -> 2.465 ( +0.33%) [ +0.00% +0.12% +0.00% / +0.73% +0.33% +0.33%] index_copy_ perm : Elapsed 0.025 ms (2.457 ms / 100) 2.457 -> 2.459 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.77% +0.08% +0.20%] index_add_ perm_sorted : Elapsed 0.025 ms (2.460 ms / 100) 2.453 -> 2.462 ( +0.37%) [ +0.12% +0.04% +0.00% / +1.30% +0.45% +0.37%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.456 ms / 100) 4.499 -> 4.501 ( +0.04%) [ +0.18% +0.00% +0.20% / +0.16% +0.33% +0.04%] index_select const : Elapsed 0.045 ms (4.507 ms / 100) 4.503 -> 4.502 ( -0.02%) [ +0.20% +0.27% +0.00% / -0.02% +0.02% +0.40%] index_select wrap : Elapsed 0.045 ms (4.512 ms / 100) 4.508 -> 4.512 ( +0.09%) [ +0.20% +0.00% +0.09% / +0.35% +0.18% +0.09%] index_select linear : Elapsed 0.045 ms (4.517 ms / 100) 4.514 -> 4.509 ( -0.11%) [ +0.07% +0.00% +0.07% / +0.18% -0.07% -0.11%] index_select reverse : Elapsed 0.045 ms (4.517 ms / 100) 4.504 -> 4.493 ( -0.24%) [ +0.00% +0.09% +0.09% / +0.04% -0.24% -0.04%] index_select skip64 : Elapsed 0.045 ms (4.504 ms / 100) 4.497 -> 4.505 ( +0.18%) [ +0.11% +0.16% +0.00% / +0.47% +0.24% +0.18%] index_select skip256 : Elapsed 0.045 ms (4.502 ms / 100) 4.511 -> 4.508 ( -0.07%) [ +0.00% +0.00% +0.04% / -0.07% +0.02% +0.09%] index_select spread : Elapsed 0.045 ms (4.511 ms / 100) 4.504 -> 4.514 ( +0.22%) [ +0.07% +0.00% +0.24% / +0.24% +0.22% +0.29%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.512 -> 4.510 ( -0.04%) [ +0.16% +0.07% +0.00% / +0.27% +0.11% -0.04%] index_select strided 5 : Elapsed 0.045 ms (4.519 ms / 100) 4.508 -> 4.510 ( +0.04%) [ +0.11% +0.02% +0.00% / +0.42% +0.22% +0.04%] index_select strided 7 : Elapsed 0.045 ms (4.513 ms / 100) 4.509 -> 4.507 ( -0.04%) [ +0.16% +0.00% +0.09% / -0.04% +0.07% +0.07%] index_select strided 8 : Elapsed 0.045 ms (4.516 ms / 100) 4.511 -> 4.507 ( -0.09%) [ +0.13% +0.00% +0.02% / -0.04% -0.09% -0.04%] index_select strided 16 : Elapsed 0.045 ms (4.517 ms / 100) 4.512 -> 4.505 ( -0.16%) [ +0.13% +0.09% +0.00% / +0.13% +0.04% -0.16%] index_select random : Elapsed 0.045 ms (4.518 ms / 100) 4.513 -> 4.510 ( -0.07%) [ +0.00% +0.09% +0.07% / +0.11% -0.07% -0.04%] index_select random_sorted : Elapsed 0.045 ms (4.513 ms / 100) B = [4, 40, 5, 16] (stride (40, 1, 160, 800)) A = [4, 20, 5, 16] (stride (1600, 80, 16, 1)) dim = 1 2.416 -> 2.431 ( +0.62%) [ +0.25% +0.25% +0.00% / +0.75% +0.62% +0.75%] index_add_ linear : Elapsed 0.024 ms (2.422 ms / 100) 2.415 -> 2.424 ( +0.37%) [ +0.00% +0.00% +0.04% / +0.37% +0.37% +0.58%] index_copy_ linear : Elapsed 0.024 ms (2.415 ms / 100) 2.418 -> 2.428 ( +0.41%) [ +0.21% +0.00% +0.12% / +0.79% +0.50% +0.41%] index_add_ reverse : Elapsed 0.024 ms (2.423 ms / 100) 2.414 -> 2.424 ( +0.41%) [ +0.04% +0.08% +0.00% / +0.83% +0.41% +0.50%] index_copy_ reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.435 -> 2.442 ( +0.29%) [ +0.00% +0.00% +0.08% / +0.53% +0.29% +0.29%] index_add_ spread : Elapsed 0.024 ms (2.435 ms / 100) 2.432 -> 2.445 ( +0.53%) [ +0.12% +0.00% +0.25% / +1.85% +0.53% +0.58%] index_copy_ spread : Elapsed 0.024 ms (2.435 ms / 100) 2.433 -> 2.438 ( +0.21%) [ +0.12% +0.08% +0.00% / +0.86% +0.21% +0.53%] index_add_ strided 3 : Elapsed 0.024 ms (2.436 ms / 100) 2.430 -> 2.444 ( +0.58%) [ +0.12% +0.12% +0.00% / +0.86% +0.62% +0.58%] index_copy_ strided 3 : Elapsed 0.024 ms (2.433 ms / 100) 2.432 -> 2.441 ( +0.37%) [ +0.16% +0.00% +0.04% / +0.95% +0.41% +0.37%] index_add_ strided 7 : Elapsed 0.024 ms (2.436 ms / 100) 2.434 -> 2.443 ( +0.37%) [ +0.08% +0.00% +0.00% / +1.36% +0.37% +0.41%] index_copy_ strided 7 : Elapsed 0.024 ms (2.436 ms / 100) 2.431 -> 2.441 ( +0.41%) [ +0.16% +0.21% +0.00% / +0.41% +0.45% +0.53%] index_add_ perm : Elapsed 0.024 ms (2.435 ms / 100) 2.430 -> 2.443 ( +0.53%) [ +0.16% +0.00% +0.04% / +0.62% +0.53% +0.78%] index_copy_ perm : Elapsed 0.024 ms (2.434 ms / 100) 2.430 -> 2.440 ( +0.41%) [ +0.08% +0.12% +0.00% / +0.41% +0.62% +0.45%] index_add_ perm_sorted : Elapsed 0.024 ms (2.432 ms / 100) 2.430 -> 2.446 ( +0.66%) [ +0.25% +0.21% +0.00% / +0.78% +0.66% +0.66%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.436 ms / 100) 4.433 -> 4.433 ( +0.00%) [ +0.00% +0.07% +0.18% / +0.43% +0.14% +0.00%] index_select const : Elapsed 0.044 ms (4.433 ms / 100) 4.439 -> 4.443 ( +0.09%) [ +0.07% +0.11% +0.00% / +0.52% +0.11% +0.09%] index_select wrap : Elapsed 0.044 ms (4.442 ms / 100) 4.443 -> 4.442 ( -0.02%) [ +0.00% +0.07% +0.09% / +0.20% +0.05% -0.02%] index_select linear : Elapsed 0.044 ms (4.443 ms / 100) 4.443 -> 4.446 ( +0.07%) [ +0.09% +0.00% +0.18% / +0.23% +0.14% +0.07%] index_select reverse : Elapsed 0.044 ms (4.447 ms / 100) 4.433 -> 4.431 ( -0.05%) [ +0.09% +0.00% +0.07% / +0.50% -0.05% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.437 ms / 100) 4.436 -> 4.436 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.07% +0.00% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.438 ms / 100) 4.439 -> 4.436 ( -0.07%) [ +0.11% +0.18% +0.00% / +0.18% -0.07% +0.16%] index_select spread : Elapsed 0.044 ms (4.444 ms / 100) 4.437 -> 4.444 ( +0.16%) [ +0.23% +0.14% +0.00% / +0.43% +0.23% +0.16%] index_select strided 3 : Elapsed 0.044 ms (4.447 ms / 100) 4.434 -> 4.437 ( +0.07%) [ +0.11% +0.25% +0.00% / +0.11% +0.20% +0.07%] index_select strided 5 : Elapsed 0.044 ms (4.439 ms / 100) 4.436 -> 4.445 ( +0.20%) [ +0.00% +0.27% +0.11% / +0.20% +0.25% +0.43%] index_select strided 7 : Elapsed 0.044 ms (4.436 ms / 100) 4.435 -> 4.437 ( +0.05%) [ +0.00% +0.14% +0.09% / +0.25% +0.20% +0.05%] index_select strided 8 : Elapsed 0.044 ms (4.435 ms / 100) 4.431 -> 4.438 ( +0.16%) [ +0.18% +0.23% +0.00% / +0.16% +0.20% +0.20%] index_select strided 16 : Elapsed 0.044 ms (4.439 ms / 100) 4.444 -> 4.441 ( -0.07%) [ +0.11% +0.00% +0.11% / -0.05% -0.07% +0.20%] index_select random : Elapsed 0.044 ms (4.449 ms / 100) 4.440 -> 4.443 ( +0.07%) [ +0.23% +0.00% +0.11% / +0.07% +0.11% +0.14%] index_select random_sorted : Elapsed 0.044 ms (4.450 ms / 100) out_shape = [4, 20, 40, 16] in_shape = [4, 20, 5, 16] idx_dim = 2 B = [4, 20, 40, 16] (stride (12800, 640, 16, 1)) A = [4, 20, 5, 16] (stride (1, 64, 1280, 4)) dim = 2 1.422 -> 1.430 ( +0.56%) [ +0.00% +0.07% +0.07% / +0.70% +0.56% +0.56%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.383 ( +0.22%) [ +0.14% +0.00% +0.00% / +0.58% +0.29% +0.22%] index_copy_ linear : Elapsed 0.014 ms (1.382 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.56% +0.63%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.29% +0.29%] index_copy_ reverse : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.70% +0.63%] index_add_ spread : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.00% +0.22% +0.22% / +0.22% +0.36% +0.36%] index_copy_ spread : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.430 ( +0.56%) [ +0.00% +0.07% +0.07% / +0.84% +0.63% +0.56%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.378 -> 1.381 ( +0.22%) [ +0.15% +0.00% +0.22% / +0.22% +0.44% +0.44%] index_copy_ strided 3 : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.430 ( +0.56%) [ +0.07% +0.00% +0.00% / +0.77% +0.56% +0.63%] index_add_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.07% +0.07% +0.00% / +1.09% +0.36% +0.29%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.63% +0.56%] index_add_ perm : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.00% +0.14% +0.07% / +0.14% +0.29% +0.36%] index_copy_ perm : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.56% +0.63%] index_add_ perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.383 ( +0.22%) [ +0.07% +0.00% +0.07% / +0.22% +0.29% +0.36%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.381 ms / 100) 8.198 -> 8.194 ( -0.05%) [ +0.30% +0.06% +0.00% / +0.10% +0.07% -0.05%] index_select const : Elapsed 0.082 ms (8.223 ms / 100) 8.212 -> 8.211 ( -0.01%) [ +0.07% +0.01% +0.00% / -0.01% +0.13% +0.28%] index_select wrap : Elapsed 0.082 ms (8.218 ms / 100) 8.219 -> 8.210 ( -0.11%) [ +0.00% +0.17% +0.07% / +0.13% -0.11% -0.11%] index_select linear : Elapsed 0.082 ms (8.219 ms / 100) 8.210 -> 8.206 ( -0.05%) [ +0.00% +0.04% +0.01% / -0.01% +0.12% -0.05%] index_select reverse : Elapsed 0.082 ms (8.210 ms / 100) 8.204 -> 8.217 ( +0.16%) [ +0.04% +0.04% +0.00% / +0.34% +0.16% +0.18%] index_select skip64 : Elapsed 0.082 ms (8.207 ms / 100) 8.191 -> 8.195 ( +0.05%) [ +0.24% +0.10% +0.00% / +0.38% +0.05% +0.32%] index_select skip256 : Elapsed 0.082 ms (8.211 ms / 100) 8.209 -> 8.211 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.09% +0.06%] index_select spread : Elapsed 0.082 ms (8.213 ms / 100) 8.202 -> 8.208 ( +0.07%) [ +0.00% +0.12% +0.24% / +0.52% +0.07% +0.37%] index_select strided 3 : Elapsed 0.082 ms (8.202 ms / 100) 8.211 -> 8.208 ( -0.04%) [ +0.00% +0.16% +0.04% / +0.51% -0.04% +0.00%] index_select random : Elapsed 0.082 ms (8.211 ms / 100) 8.205 -> 8.205 ( +0.00%) [ +0.00% +0.54% +0.12% / +0.17% +0.17% +0.00%] index_select random_sorted : Elapsed 0.082 ms (8.205 ms / 100) B = [4, 20, 40, 16] (stride (12800, 40, 1, 800)) A = [4, 20, 5, 16] (stride (1600, 80, 16, 1)) dim = 2 1.420 -> 1.420 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.35% +0.35%] index_add_ linear : Elapsed 0.014 ms (1.420 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.58% +0.66%] index_copy_ linear : Elapsed 0.014 ms (1.375 ms / 100) 1.419 -> 1.426 ( +0.49%) [ +0.00% +0.21% +0.00% / +1.06% +0.49% +0.49%] index_add_ reverse : Elapsed 0.014 ms (1.419 ms / 100) 1.372 -> 1.379 ( +0.51%) [ +0.00% +0.22% +0.15% / +0.51% +0.66% +0.73%] index_copy_ reverse : Elapsed 0.014 ms (1.372 ms / 100) 1.418 -> 1.420 ( +0.14%) [ +0.21% +0.00% +0.21% / +0.14% +0.56% +0.63%] index_add_ spread : Elapsed 0.014 ms (1.421 ms / 100) 1.373 -> 1.376 ( +0.22%) [ +0.15% +0.07% +0.00% / +0.22% +0.58% +0.58%] index_copy_ spread : Elapsed 0.014 ms (1.375 ms / 100) 1.419 -> 1.424 ( +0.35%) [ +0.00% +0.07% +0.07% / +0.35% +0.42% +0.49%] index_add_ strided 3 : Elapsed 0.014 ms (1.419 ms / 100) 1.372 -> 1.377 ( +0.36%) [ +0.00% +0.07% +0.15% / +0.36% +0.73% +0.66%] index_copy_ strided 3 : Elapsed 0.014 ms (1.372 ms / 100) 1.418 -> 1.422 ( +0.28%) [ +0.14% +0.14% +0.00% / +0.28% +0.63% +0.56%] index_add_ strided 7 : Elapsed 0.014 ms (1.420 ms / 100) 1.374 -> 1.379 ( +0.36%) [ +0.00% +0.51% +0.07% / +0.36% +0.51% +0.66%] index_copy_ strided 7 : Elapsed 0.014 ms (1.374 ms / 100) 1.418 -> 1.420 ( +0.14%) [ +0.28% +0.28% +0.00% / +0.14% +0.63% +0.63%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.66% +0.51%] index_copy_ perm : Elapsed 0.014 ms (1.374 ms / 100) 1.419 -> 1.420 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +0.63% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.420 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.58% +0.87%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.374 ms / 100) 8.242 -> 8.244 ( +0.02%) [ +0.11% +0.00% +0.27% / +0.02% +0.28% +0.56%] index_select const : Elapsed 0.083 ms (8.251 ms / 100) 8.274 -> 8.276 ( +0.02%) [ +0.05% +0.00% +0.17% / +0.02% +0.05% +0.40%] index_select wrap : Elapsed 0.083 ms (8.278 ms / 100) 8.261 -> 8.276 ( +0.18%) [ +0.00% +0.25% +0.13% / +0.18% +0.28% +0.30%] index_select linear : Elapsed 0.083 ms (8.261 ms / 100) 8.268 -> 8.277 ( +0.11%) [ +0.08% +0.00% +0.08% / +0.11% +0.11% +0.28%] index_select reverse : Elapsed 0.083 ms (8.275 ms / 100) 8.233 -> 8.258 ( +0.30%) [ +0.17% +0.13% +0.00% / +0.30% +0.44% +0.39%] index_select skip64 : Elapsed 0.082 ms (8.247 ms / 100) 8.245 -> 8.257 ( +0.15%) [ +0.00% +0.34% +0.12% / +0.28% +0.24% +0.15%] index_select skip256 : Elapsed 0.082 ms (8.245 ms / 100) 8.258 -> 8.280 ( +0.27%) [ +0.00% +0.01% +0.07% / +0.28% +0.27% +0.41%] index_select spread : Elapsed 0.083 ms (8.258 ms / 100) 8.260 -> 8.278 ( +0.22%) [ +0.19% +0.10% +0.00% / +0.22% +0.44% +0.25%] index_select strided 3 : Elapsed 0.083 ms (8.276 ms / 100) 8.263 -> 8.267 ( +0.05%) [ +0.00% +0.13% +0.15% / +0.05% +0.19% +0.35%] index_select random : Elapsed 0.083 ms (8.263 ms / 100) 8.265 -> 8.275 ( +0.12%) [ +0.00% +0.21% +0.16% / +0.17% +0.12% +0.24%] index_select random_sorted : Elapsed 0.083 ms (8.265 ms / 100) B = [4, 20, 40, 16] (stride (640, 2560, 16, 1)) A = [4, 20, 5, 16] (stride (1, 4, 1280, 80)) dim = 2 1.585 -> 1.586 ( +0.06%) [ +0.06% +0.00% +0.13% / +0.06% +0.50% +0.57%] index_add_ linear : Elapsed 0.016 ms (1.586 ms / 100) 1.536 -> 1.537 ( +0.07%) [ +0.13% +0.00% +0.13% / +0.07% +0.59% +0.46%] index_copy_ linear : Elapsed 0.015 ms (1.538 ms / 100) 1.586 -> 1.587 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.44% +0.44%] index_add_ reverse : Elapsed 0.016 ms (1.586 ms / 100) 1.534 -> 1.535 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.59% +0.46%] index_copy_ reverse : Elapsed 0.015 ms (1.534 ms / 100) 1.583 -> 1.585 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.51% +0.63%] index_add_ spread : Elapsed 0.016 ms (1.585 ms / 100) 1.529 -> 1.536 ( +0.46%) [ +0.20% +0.13% +0.00% / +0.59% +0.52% +0.46%] index_copy_ spread : Elapsed 0.015 ms (1.532 ms / 100) 1.584 -> 1.583 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.51% +0.63%] index_add_ strided 3 : Elapsed 0.016 ms (1.585 ms / 100) 1.529 -> 1.533 ( +0.26%) [ +0.00% +0.07% +0.07% / +0.26% +0.52% +0.52%] index_copy_ strided 3 : Elapsed 0.015 ms (1.529 ms / 100) 1.584 -> 1.586 ( +0.13%) [ +0.13% +0.19% +0.00% / +0.13% +0.63% +0.69%] index_add_ strided 7 : Elapsed 0.016 ms (1.586 ms / 100) 1.533 -> 1.534 ( +0.07%) [ +0.00% +0.20% +0.00% / +0.07% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.015 ms (1.533 ms / 100) 1.585 -> 1.586 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.38% +0.57%] index_add_ perm : Elapsed 0.016 ms (1.586 ms / 100) 1.534 -> 1.536 ( +0.13%) [ +0.00% +0.26% +0.20% / +0.13% +0.52% +0.46%] index_copy_ perm : Elapsed 0.015 ms (1.534 ms / 100) 1.577 -> 1.578 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.57% +0.70%] index_add_ perm_sorted : Elapsed 0.016 ms (1.578 ms / 100) 1.527 -> 1.528 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.59% +0.46%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.527 ms / 100) 8.526 -> 8.532 ( +0.07%) [ +0.20% +0.00% +0.02% / +0.07% +0.09% +0.29%] index_select const : Elapsed 0.085 ms (8.543 ms / 100) 8.545 -> 8.563 ( +0.21%) [ +0.18% +0.02% +0.00% / +0.21% +0.34% +0.27%] index_select wrap : Elapsed 0.086 ms (8.560 ms / 100) 8.542 -> 8.547 ( +0.06%) [ +0.19% +0.06% +0.00% / +0.25% +0.06% +0.18%] index_select linear : Elapsed 0.086 ms (8.558 ms / 100) 8.534 -> 8.537 ( +0.04%) [ +0.11% +0.32% +0.00% / +0.26% +0.04% +0.20%] index_select reverse : Elapsed 0.085 ms (8.543 ms / 100) 8.523 -> 8.533 ( +0.12%) [ +0.00% +0.21% +0.22% / +0.28% +0.12% +0.14%] index_select skip64 : Elapsed 0.085 ms (8.523 ms / 100) 8.530 -> 8.537 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.15% +0.08% +0.08%] index_select skip256 : Elapsed 0.085 ms (8.530 ms / 100) 8.548 -> 8.540 ( -0.09%) [ +0.08% +0.00% +0.06% / -0.02% -0.09% +0.14%] index_select spread : Elapsed 0.086 ms (8.555 ms / 100) 8.536 -> 8.545 ( +0.11%) [ +0.21% +0.00% +0.20% / +0.23% +0.20% +0.11%] index_select strided 3 : Elapsed 0.086 ms (8.554 ms / 100) 8.544 -> 8.538 ( -0.07%) [ +0.00% +0.11% +0.36% / +0.18% +0.00% -0.07%] index_select random : Elapsed 0.085 ms (8.544 ms / 100) 8.535 -> 8.537 ( +0.02%) [ +0.13% +0.00% +0.19% / +0.23% +0.34% +0.02%] index_select random_sorted : Elapsed 0.085 ms (8.546 ms / 100) B = [4, 20, 40, 16] (stride (40, 160, 1, 3200)) A = [4, 20, 5, 16] (stride (1600, 16, 320, 1)) dim = 2 1.517 -> 1.517 ( +0.00%) [ +0.20% +0.13% +0.00% / +0.00% +0.59% +0.66%] index_add_ linear : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.61%] index_copy_ linear : Elapsed 0.015 ms (1.473 ms / 100) 1.517 -> 1.518 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.66% +0.73%] index_add_ reverse : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.61% +0.68%] index_copy_ reverse : Elapsed 0.015 ms (1.472 ms / 100) 1.517 -> 1.521 ( +0.26%) [ +0.20% +0.07% +0.00% / +0.26% +0.79% +0.59%] index_add_ spread : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.482 ( +0.61%) [ +0.00% +0.00% +0.00% / +0.75% +0.61% +0.61%] index_copy_ spread : Elapsed 0.015 ms (1.473 ms / 100) 1.516 -> 1.522 ( +0.40%) [ +0.20% +0.07% +0.00% / +0.40% +0.66% +0.73%] index_add_ strided 3 : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.474 ( +0.14%) [ +0.00% +0.07% +0.07% / +0.14% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.015 ms (1.472 ms / 100) 1.516 -> 1.518 ( +0.13%) [ +0.13% +0.20% +0.00% / +0.13% +0.79% +0.73%] index_add_ strided 7 : Elapsed 0.015 ms (1.518 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.61% +0.75%] index_copy_ strided 7 : Elapsed 0.015 ms (1.472 ms / 100) 1.517 -> 1.518 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.66% +0.73%] index_add_ perm : Elapsed 0.015 ms (1.518 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.00% +0.14% +0.07% / +0.00% +0.75% +0.75%] index_copy_ perm : Elapsed 0.015 ms (1.472 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.518 ms / 100) 1.472 -> 1.475 ( +0.20%) [ +0.00% +0.00% +0.07% / +0.20% +0.75% +0.75%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.472 ms / 100) 8.559 -> 8.545 ( -0.16%) [ +0.07% +0.01% +0.00% / -0.16% +0.13% +0.32%] index_select const : Elapsed 0.086 ms (8.565 ms / 100) 8.588 -> 8.596 ( +0.09%) [ +0.01% +0.07% +0.00% / +0.09% +0.13% +0.12%] index_select wrap : Elapsed 0.086 ms (8.589 ms / 100) 8.569 -> 8.589 ( +0.23%) [ +0.08% +0.00% +0.13% / +0.28% +0.23% +0.35%] index_select linear : Elapsed 0.086 ms (8.576 ms / 100) 8.568 -> 8.577 ( +0.11%) [ +0.08% +0.04% +0.00% / +0.11% +0.35% +0.32%] index_select reverse : Elapsed 0.086 ms (8.575 ms / 100) 8.557 -> 8.554 ( -0.04%) [ +0.01% +0.00% +0.02% / +0.02% +0.35% -0.04%] index_select skip64 : Elapsed 0.086 ms (8.558 ms / 100) 8.557 -> 8.564 ( +0.08%) [ +0.00% +0.23% +0.16% / +0.08% +0.23% +0.26%] index_select skip256 : Elapsed 0.086 ms (8.557 ms / 100) 8.559 -> 8.581 ( +0.26%) [ +0.15% +0.23% +0.00% / +0.30% +0.30% +0.26%] index_select spread : Elapsed 0.086 ms (8.572 ms / 100) 8.578 -> 8.590 ( +0.14%) [ +0.09% +0.00% +0.09% / +0.14% +0.41% +0.19%] index_select strided 3 : Elapsed 0.086 ms (8.586 ms / 100) 8.574 -> 8.587 ( +0.15%) [ +0.00% +0.30% +0.02% / +0.15% +0.33% +0.35%] index_select random : Elapsed 0.086 ms (8.574 ms / 100) 8.574 -> 8.566 ( -0.09%) [ +0.21% +0.00% +0.27% / -0.09% +0.07% +0.15%] index_select random_sorted : Elapsed 0.086 ms (8.592 ms / 100) out_shape = [4, 20, 5, 40] in_shape = [4, 20, 5, 16] idx_dim = 3 B = [4, 20, 5, 40] (stride (4000, 1, 800, 20)) A = [4, 20, 5, 16] (stride (1600, 80, 1, 5)) dim = 3 3.734 -> 3.736 ( +0.05%) [ +0.00% +0.08% +0.08% / +0.05% +0.70% +0.64%] index_add_ linear : Elapsed 0.037 ms (3.734 ms / 100) 3.602 -> 3.608 ( +0.17%) [ +0.00% +0.03% +0.03% / +0.17% +0.47% +0.44%] index_copy_ linear : Elapsed 0.036 ms (3.602 ms / 100) 3.744 -> 3.752 ( +0.21%) [ +0.00% +0.03% +0.00% / +0.21% +0.59% +0.48%] index_add_ reverse : Elapsed 0.037 ms (3.744 ms / 100) 3.614 -> 3.621 ( +0.19%) [ +0.00% +0.00% +0.00% / +0.19% +0.53% +0.50%] index_copy_ reverse : Elapsed 0.036 ms (3.614 ms / 100) 3.744 -> 3.742 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.43% +0.40%] index_add_ spread : Elapsed 0.037 ms (3.744 ms / 100) 3.610 -> 3.614 ( +0.11%) [ +0.08% +0.00% +0.06% / +0.11% +0.36% +0.36%] index_copy_ spread : Elapsed 0.036 ms (3.613 ms / 100) 3.740 -> 3.740 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.45% +0.48%] index_add_ strided 3 : Elapsed 0.037 ms (3.741 ms / 100) 3.611 -> 3.611 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.53% +0.53%] index_copy_ strided 3 : Elapsed 0.036 ms (3.611 ms / 100) 3.747 -> 3.748 ( +0.03%) [ +0.11% +0.11% +0.00% / +0.03% +0.59% +0.53%] index_add_ strided 7 : Elapsed 0.038 ms (3.751 ms / 100) 3.617 -> 3.620 ( +0.08%) [ +0.00% +0.03% +0.06% / +0.08% +0.47% +0.39%] index_copy_ strided 7 : Elapsed 0.036 ms (3.617 ms / 100) 3.736 -> 3.738 ( +0.05%) [ +0.00% +0.03% +0.05% / +0.05% +0.56% +0.54%] index_add_ perm : Elapsed 0.037 ms (3.736 ms / 100) 3.601 -> 3.604 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.47% +0.47%] index_copy_ perm : Elapsed 0.036 ms (3.601 ms / 100) 3.740 -> 3.742 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.40% +0.40%] index_add_ perm_sorted : Elapsed 0.037 ms (3.740 ms / 100) 3.602 -> 3.610 ( +0.22%) [ +0.03% +0.00% +0.06% / +0.22% +0.44% +0.36%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.603 ms / 100) 5.480 -> 5.482 ( +0.04%) [ +0.07% +0.02% +0.00% / +0.04% +0.13% +0.09%] index_select const : Elapsed 0.055 ms (5.484 ms / 100) 5.483 -> 5.491 ( +0.15%) [ +0.00% +0.02% +0.04% / +0.15% +0.20% +0.18%] index_select wrap : Elapsed 0.055 ms (5.483 ms / 100) 5.487 -> 5.486 ( -0.02%) [ +0.05% +0.05% +0.00% / -0.02% +0.15% +0.20%] index_select linear : Elapsed 0.055 ms (5.490 ms / 100) 5.485 -> 5.483 ( -0.04%) [ +0.00% +0.05% +0.05% / -0.04% +0.07% +0.18%] index_select reverse : Elapsed 0.055 ms (5.485 ms / 100) 5.485 -> 5.482 ( -0.05%) [ +0.00% +0.16% +0.02% / +0.13% -0.05% +0.00%] index_select skip64 : Elapsed 0.055 ms (5.485 ms / 100) 5.486 -> 5.483 ( -0.05%) [ +0.04% +0.00% +0.04% / +0.05% +0.00% -0.05%] index_select skip256 : Elapsed 0.055 ms (5.488 ms / 100) 5.485 -> 5.486 ( +0.02%) [ +0.05% +0.18% +0.00% / +0.09% +0.07% +0.02%] index_select spread : Elapsed 0.055 ms (5.488 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.16% +0.00% +0.05%] index_select strided 3 : Elapsed 0.055 ms (5.490 ms / 100) 5.485 -> 5.483 ( -0.04%) [ +0.02% +0.07% +0.00% / -0.04% +0.02% +0.04%] index_select strided 5 : Elapsed 0.055 ms (5.486 ms / 100) 5.482 -> 5.491 ( +0.16%) [ +0.09% +0.04% +0.00% / +0.16% +0.20% +0.16%] index_select strided 7 : Elapsed 0.055 ms (5.487 ms / 100) 5.470 -> 5.483 ( +0.24%) [ +0.16% +0.00% +0.00% / +0.27% +0.24% +0.29%] index_select strided 8 : Elapsed 0.055 ms (5.479 ms / 100) 5.488 -> 5.488 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.04% +0.05% +0.00%] index_select random : Elapsed 0.055 ms (5.491 ms / 100) 5.483 -> 5.486 ( +0.05%) [ +0.00% +0.02% +0.04% / +0.11% +0.11% +0.05%] index_select random_sorted : Elapsed 0.055 ms (5.483 ms / 100) B = [4, 20, 5, 40] (stride (1, 800, 4, 20)) A = [4, 20, 5, 16] (stride (1600, 5, 1, 100)) dim = 3 3.634 -> 3.639 ( +0.14%) [ +0.00% +0.08% +0.03% / +0.14% +0.83% +0.83%] index_add_ linear : Elapsed 0.036 ms (3.634 ms / 100) 3.507 -> 3.511 ( +0.11%) [ +0.00% +0.11% +0.09% / +0.11% +0.77% +0.80%] index_copy_ linear : Elapsed 0.035 ms (3.507 ms / 100) 3.629 -> 3.631 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.69% +0.66%] index_add_ reverse : Elapsed 0.036 ms (3.630 ms / 100) 3.496 -> 3.497 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +1.03% +0.94%] index_copy_ reverse : Elapsed 0.035 ms (3.497 ms / 100) 3.628 -> 3.631 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.66% +0.66%] index_add_ spread : Elapsed 0.036 ms (3.629 ms / 100) 3.499 -> 3.505 ( +0.17%) [ +0.06% +0.06% +0.00% / +0.17% +0.91% +0.89%] index_copy_ spread : Elapsed 0.035 ms (3.501 ms / 100) 3.624 -> 3.624 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.77% +0.77%] index_add_ strided 3 : Elapsed 0.036 ms (3.624 ms / 100) 3.495 -> 3.497 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.74% +0.83%] index_copy_ strided 3 : Elapsed 0.035 ms (3.496 ms / 100) 3.627 -> 3.629 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.77% +0.80%] index_add_ strided 7 : Elapsed 0.036 ms (3.628 ms / 100) 3.495 -> 3.504 ( +0.26%) [ +0.06% +0.00% +0.03% / +0.26% +0.97% +1.06%] index_copy_ strided 7 : Elapsed 0.035 ms (3.497 ms / 100) 3.636 -> 3.636 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.80% +0.77%] index_add_ perm : Elapsed 0.036 ms (3.637 ms / 100) 3.509 -> 3.510 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.83% +0.77%] index_copy_ perm : Elapsed 0.035 ms (3.509 ms / 100) 3.623 -> 3.626 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.80% +0.77%] index_add_ perm_sorted : Elapsed 0.036 ms (3.623 ms / 100) 3.492 -> 3.498 ( +0.17%) [ +0.03% +0.03% +0.00% / +0.17% +0.95% +0.97%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.493 ms / 100) 5.483 -> 5.482 ( -0.02%) [ +0.00% +0.00% +0.09% / +0.13% -0.02% +0.04%] index_select const : Elapsed 0.055 ms (5.483 ms / 100) 5.491 -> 5.483 ( -0.15%) [ +0.05% +0.02% +0.00% / +0.11% -0.15% -0.07%] index_select wrap : Elapsed 0.055 ms (5.494 ms / 100) 5.485 -> 5.483 ( -0.04%) [ +0.16% +0.05% +0.00% / +0.11% -0.04% +0.15%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.490 -> 5.490 ( +0.00%) [ +0.02% +0.16% +0.00% / +0.11% +0.00% +0.00%] index_select reverse : Elapsed 0.055 ms (5.491 ms / 100) 5.482 -> 5.478 ( -0.07%) [ +0.02% +0.00% +0.04% / -0.04% -0.02% -0.07%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.480 -> 5.476 ( -0.07%) [ +0.04% +0.00% +0.05% / -0.07% +0.04% +0.00%] index_select skip256 : Elapsed 0.055 ms (5.482 ms / 100) 5.488 -> 5.480 ( -0.15%) [ +0.00% +0.05% +0.02% / +0.04% +0.04% -0.15%] index_select spread : Elapsed 0.055 ms (5.488 ms / 100) 5.487 -> 5.488 ( +0.02%) [ +0.15% +0.02% +0.00% / +0.09% +0.02% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.495 ms / 100) 5.490 -> 5.485 ( -0.09%) [ +0.05% +0.09% +0.00% / -0.02% +0.00% -0.09%] index_select strided 5 : Elapsed 0.055 ms (5.493 ms / 100) 5.492 -> 5.485 ( -0.13%) [ +0.00% +0.04% +0.05% / -0.13% +0.09% +0.20%] index_select strided 7 : Elapsed 0.055 ms (5.492 ms / 100) 5.478 -> 5.477 ( -0.02%) [ +0.05% +0.00% +0.05% / -0.02% +0.00% +0.20%] index_select strided 8 : Elapsed 0.055 ms (5.481 ms / 100) 5.486 -> 5.485 ( -0.02%) [ +0.07% +0.02% +0.00% / +0.05% +0.02% -0.02%] index_select random : Elapsed 0.055 ms (5.490 ms / 100) 5.490 -> 5.483 ( -0.13%) [ +0.05% +0.15% +0.00% / -0.02% -0.13% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.493 ms / 100) B = [4, 20, 5, 40] (stride (1, 800, 4, 20)) A = [4, 20, 5, 16] (stride (100, 1, 20, 400)) dim = 3 3.981 -> 3.983 ( +0.05%) [ +0.03% +0.00% +0.05% / +0.05% +0.63% +0.68%] index_add_ linear : Elapsed 0.040 ms (3.982 ms / 100) 3.854 -> 3.859 ( +0.13%) [ +0.10% +0.00% +0.16% / +0.13% +0.54% +0.62%] index_copy_ linear : Elapsed 0.039 ms (3.858 ms / 100) 3.990 -> 3.990 ( +0.00%) [ +0.18% +0.00% +0.13% / +0.00% +0.65% +0.43%] index_add_ reverse : Elapsed 0.040 ms (3.997 ms / 100) 3.860 -> 3.861 ( +0.03%) [ +0.16% +0.00% +0.05% / +0.03% +0.65% +0.39%] index_copy_ reverse : Elapsed 0.039 ms (3.866 ms / 100) 3.977 -> 3.985 ( +0.20%) [ +0.15% +0.20% +0.00% / +0.20% +0.53% +0.40%] index_add_ spread : Elapsed 0.040 ms (3.983 ms / 100) 3.850 -> 3.857 ( +0.18%) [ +0.10% +0.18% +0.00% / +0.18% +0.49% +0.47%] index_copy_ spread : Elapsed 0.039 ms (3.854 ms / 100) 3.984 -> 3.992 ( +0.20%) [ +0.05% +0.25% +0.00% / +0.20% +0.55% +0.70%] index_add_ strided 3 : Elapsed 0.040 ms (3.986 ms / 100) 3.853 -> 3.852 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.60% +0.57%] index_copy_ strided 3 : Elapsed 0.039 ms (3.853 ms / 100) 3.988 -> 3.991 ( +0.08%) [ +0.23% +0.23% +0.00% / +0.08% +0.65% +0.45%] index_add_ strided 7 : Elapsed 0.040 ms (3.997 ms / 100) 3.858 -> 3.860 ( +0.05%) [ +0.23% +0.23% +0.00% / +0.05% +0.73% +0.47%] index_copy_ strided 7 : Elapsed 0.039 ms (3.867 ms / 100) 3.982 -> 3.984 ( +0.05%) [ +0.08% +0.00% +0.05% / +0.05% +0.63% +0.60%] index_add_ perm : Elapsed 0.040 ms (3.985 ms / 100) 3.855 -> 3.858 ( +0.08%) [ +0.03% +0.10% +0.00% / +0.08% +0.67% +0.54%] index_copy_ perm : Elapsed 0.039 ms (3.856 ms / 100) 3.984 -> 3.985 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.35% +0.48%] index_add_ perm_sorted : Elapsed 0.040 ms (3.986 ms / 100) 3.858 -> 3.858 ( +0.00%) [ +0.13% +0.00% +0.08% / +0.00% +0.39% +0.44%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.863 ms / 100) 5.552 -> 5.554 ( +0.04%) [ +0.05% +0.00% +0.25% / +0.04% +0.13% +0.23%] index_select const : Elapsed 0.056 ms (5.555 ms / 100) 5.555 -> 5.556 ( +0.02%) [ +0.05% +0.00% +0.14% / +0.02% +0.22% +0.29%] index_select wrap : Elapsed 0.056 ms (5.558 ms / 100) 5.560 -> 5.550 ( -0.18%) [ +0.05% +0.00% +0.04% / -0.18% +0.13% +0.18%] index_select linear : Elapsed 0.056 ms (5.563 ms / 100) 5.560 -> 5.560 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.04% +0.09%] index_select reverse : Elapsed 0.056 ms (5.561 ms / 100) 5.556 -> 5.550 ( -0.11%) [ +0.11% +0.00% +0.02% / +0.13% -0.02% -0.11%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.559 -> 5.555 ( -0.07%) [ +0.20% +0.00% +0.00% / +0.02% -0.02% -0.07%] index_select skip256 : Elapsed 0.056 ms (5.570 ms / 100) 5.562 -> 5.553 ( -0.16%) [ +0.05% +0.05% +0.00% / +0.04% -0.16% +0.11%] index_select spread : Elapsed 0.056 ms (5.565 ms / 100) 5.557 -> 5.559 ( +0.04%) [ +0.05% +0.00% +0.07% / +0.04% +0.23% +0.07%] index_select strided 3 : Elapsed 0.056 ms (5.560 ms / 100) 5.552 -> 5.561 ( +0.16%) [ +0.02% +0.00% +0.14% / +0.16% +0.32% +0.25%] index_select strided 5 : Elapsed 0.056 ms (5.553 ms / 100) 5.553 -> 5.559 ( +0.11%) [ +0.00% +0.07% +0.05% / +0.11% +0.20% +0.23%] index_select strided 7 : Elapsed 0.056 ms (5.553 ms / 100) 5.553 -> 5.557 ( +0.07%) [ +0.02% +0.07% +0.00% / +0.16% +0.07% +0.18%] index_select strided 8 : Elapsed 0.056 ms (5.554 ms / 100) 5.553 -> 5.563 ( +0.18%) [ +0.07% +0.02% +0.00% / +0.18% +0.20% +0.20%] index_select random : Elapsed 0.056 ms (5.557 ms / 100) 5.554 -> 5.561 ( +0.13%) [ +0.04% +0.00% +0.11% / +0.20% +0.13% +0.29%] index_select random_sorted : Elapsed 0.056 ms (5.556 ms / 100) B = [4, 20, 5, 40] (stride (1, 160, 3200, 4)) A = [4, 20, 5, 16] (stride (1600, 1, 320, 20)) dim = 3 4.063 -> 4.065 ( +0.05%) [ +0.02% +0.00% +0.07% / +0.05% +0.81% +0.86%] index_add_ linear : Elapsed 0.041 ms (4.064 ms / 100) 3.922 -> 3.923 ( +0.03%) [ +0.05% +0.00% +0.08% / +0.03% +0.79% +0.89%] index_copy_ linear : Elapsed 0.039 ms (3.924 ms / 100) 4.049 -> 4.047 ( -0.05%) [ +0.02% +0.00% +0.00% / -0.05% +0.67% +0.77%] index_add_ reverse : Elapsed 0.041 ms (4.050 ms / 100) 3.921 -> 3.918 ( -0.08%) [ +0.05% +0.00% +0.03% / -0.08% +0.54% +0.61%] index_copy_ reverse : Elapsed 0.039 ms (3.923 ms / 100) 4.050 -> 4.053 ( +0.07%) [ +0.00% +0.12% +0.10% / +0.07% +0.64% +0.62%] index_add_ spread : Elapsed 0.040 ms (4.050 ms / 100) 3.918 -> 3.917 ( -0.03%) [ +0.00% +0.03% +0.10% / -0.03% +0.51% +0.54%] index_copy_ spread : Elapsed 0.039 ms (3.918 ms / 100) 4.054 -> 4.056 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.67% +0.67%] index_add_ strided 3 : Elapsed 0.041 ms (4.054 ms / 100) 3.919 -> 3.929 ( +0.26%) [ +0.00% +0.03% +0.03% / +0.26% +0.46% +0.46%] index_copy_ strided 3 : Elapsed 0.039 ms (3.919 ms / 100) 4.047 -> 4.050 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.82% +0.82%] index_add_ strided 7 : Elapsed 0.040 ms (4.047 ms / 100) 3.917 -> 3.925 ( +0.20%) [ +0.00% +0.18% +0.15% / +0.20% +0.69% +0.82%] index_copy_ strided 7 : Elapsed 0.039 ms (3.917 ms / 100) 4.064 -> 4.067 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.74% +0.74%] index_add_ perm : Elapsed 0.041 ms (4.066 ms / 100) 3.921 -> 3.928 ( +0.18%) [ +0.00% +0.05% +0.03% / +0.18% +0.77% +0.74%] index_copy_ perm : Elapsed 0.039 ms (3.921 ms / 100) 4.048 -> 4.052 ( +0.10%) [ +0.05% +0.02% +0.00% / +0.10% +0.82% +0.82%] index_add_ perm_sorted : Elapsed 0.041 ms (4.050 ms / 100) 3.915 -> 3.925 ( +0.26%) [ +0.08% +0.10% +0.00% / +0.26% +0.64% +0.61%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.918 ms / 100) 5.570 -> 5.563 ( -0.13%) [ +0.07% +0.00% +0.02% / +0.00% -0.13% -0.05%] index_select const : Elapsed 0.056 ms (5.574 ms / 100) 5.575 -> 5.570 ( -0.09%) [ +0.00% +0.23% +0.11% / +0.20% +0.00% -0.09%] index_select wrap : Elapsed 0.056 ms (5.575 ms / 100) 5.580 -> 5.578 ( -0.04%) [ +0.02% +0.07% +0.00% / -0.02% +0.00% -0.04%] index_select linear : Elapsed 0.056 ms (5.581 ms / 100) 5.585 -> 5.578 ( -0.13%) [ +0.05% +0.04% +0.00% / -0.07% -0.04% -0.13%] index_select reverse : Elapsed 0.056 ms (5.588 ms / 100) 5.561 -> 5.565 ( +0.07%) [ +0.09% +0.05% +0.00% / +0.07% +0.09% +0.18%] index_select skip64 : Elapsed 0.056 ms (5.566 ms / 100) 5.561 -> 5.559 ( -0.04%) [ +0.16% +0.09% +0.00% / +0.05% -0.04% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.570 ms / 100) 5.575 -> 5.577 ( +0.04%) [ +0.07% +0.25% +0.00% / +0.18% +0.04% +0.05%] index_select spread : Elapsed 0.056 ms (5.579 ms / 100) 5.578 -> 5.577 ( -0.02%) [ +0.22% +0.09% +0.00% / +0.05% -0.02% +0.04%] index_select strided 3 : Elapsed 0.056 ms (5.590 ms / 100) 5.575 -> 5.576 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.25% +0.02% +0.11%] index_select strided 5 : Elapsed 0.056 ms (5.579 ms / 100) 5.580 -> 5.577 ( -0.05%) [ +0.05% +0.00% +0.00% / +0.14% -0.05% -0.02%] index_select strided 7 : Elapsed 0.056 ms (5.583 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.00% +0.09% +0.11% / +0.00% +0.23% +0.31%] index_select strided 8 : Elapsed 0.056 ms (5.561 ms / 100) 5.571 -> 5.575 ( +0.07%) [ +0.14% +0.18% +0.00% / +0.20% +0.07% +0.13%] index_select random : Elapsed 0.056 ms (5.579 ms / 100) 5.579 -> 5.574 ( -0.09%) [ +0.07% +0.02% +0.00% / +0.16% -0.05% -0.09%] index_select random_sorted : Elapsed 0.056 ms (5.583 ms / 100) B = [4, 20, 5, 40] (stride (1, 4, 3200, 80)) A = [4, 20, 5, 16] (stride (1, 64, 1280, 4)) dim = 3 4.441 -> 4.444 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.72% +0.68%] index_add_ linear : Elapsed 0.044 ms (4.444 ms / 100) 4.275 -> 4.279 ( +0.09%) [ +0.05% +0.00% +0.02% / +0.09% +0.77% +0.65%] index_copy_ linear : Elapsed 0.043 ms (4.277 ms / 100) 4.467 -> 4.470 ( +0.07%) [ +0.00% +0.00% +0.11% / +0.07% +0.43% +0.36%] index_add_ reverse : Elapsed 0.045 ms (4.467 ms / 100) 4.290 -> 4.291 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.56% +0.51%] index_copy_ reverse : Elapsed 0.043 ms (4.290 ms / 100) 4.443 -> 4.446 ( +0.07%) [ +0.02% +0.02% +0.00% / +0.07% +0.50% +0.56%] index_add_ spread : Elapsed 0.044 ms (4.444 ms / 100) 4.277 -> 4.282 ( +0.12%) [ +0.00% +0.00% +0.02% / +0.12% +0.47% +0.56%] index_copy_ spread : Elapsed 0.043 ms (4.277 ms / 100) 4.447 -> 4.449 ( +0.04%) [ +0.02% +0.00% +0.02% / +0.04% +0.45% +0.52%] index_add_ strided 3 : Elapsed 0.044 ms (4.448 ms / 100) 4.283 -> 4.285 ( +0.05%) [ +0.00% +0.09% +0.02% / +0.05% +0.58% +0.68%] index_copy_ strided 3 : Elapsed 0.043 ms (4.283 ms / 100) 4.465 -> 4.475 ( +0.22%) [ +0.13% +0.09% +0.00% / +0.22% +0.52% +0.49%] index_add_ strided 7 : Elapsed 0.045 ms (4.471 ms / 100) 4.292 -> 4.301 ( +0.21%) [ +0.00% +0.00% +0.00% / +0.21% +0.51% +0.44%] index_copy_ strided 7 : Elapsed 0.043 ms (4.292 ms / 100) 4.442 -> 4.447 ( +0.11%) [ +0.09% +0.09% +0.00% / +0.11% +0.63% +0.63%] index_add_ perm : Elapsed 0.044 ms (4.446 ms / 100) 4.279 -> 4.282 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.49%] index_copy_ perm : Elapsed 0.043 ms (4.282 ms / 100) 4.443 -> 4.447 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.68% +0.61%] index_add_ perm_sorted : Elapsed 0.044 ms (4.443 ms / 100) 4.279 -> 4.282 ( +0.07%) [ +0.02% +0.07% +0.00% / +0.07% +0.51% +0.49%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.280 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.23% +0.16%] index_select const : Elapsed 0.056 ms (5.566 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.07% +0.02% +0.00% / +0.00% +0.20% +0.20%] index_select wrap : Elapsed 0.056 ms (5.573 ms / 100) 5.576 -> 5.573 ( -0.05%) [ +0.05% +0.00% +0.02% / -0.05% +0.07% +0.07%] index_select linear : Elapsed 0.056 ms (5.579 ms / 100) 5.570 -> 5.573 ( +0.05%) [ +0.00% +0.07% +0.04% / +0.05% +0.22% +0.05%] index_select reverse : Elapsed 0.056 ms (5.570 ms / 100) 5.567 -> 5.568 ( +0.02%) [ +0.00% +0.11% +0.00% / +0.07% +0.04% +0.02%] index_select skip64 : Elapsed 0.056 ms (5.567 ms / 100) 5.569 -> 5.566 ( -0.05%) [ +0.07% +0.02% +0.00% / +0.09% -0.02% -0.05%] index_select skip256 : Elapsed 0.056 ms (5.573 ms / 100) 5.563 -> 5.567 ( +0.07%) [ +0.25% +0.22% +0.00% / +0.16% +0.07% +0.16%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.569 -> 5.570 ( +0.02%) [ +0.00% +0.00% +0.09% / +0.05% +0.14% +0.02%] index_select strided 3 : Elapsed 0.056 ms (5.569 ms / 100) 5.570 -> 5.576 ( +0.11%) [ +0.00% +0.02% +0.02% / +0.11% +0.16% +0.20%] index_select strided 5 : Elapsed 0.056 ms (5.570 ms / 100) 5.569 -> 5.570 ( +0.02%) [ +0.07% +0.04% +0.00% / +0.07% +0.02% +0.14%] index_select strided 7 : Elapsed 0.056 ms (5.573 ms / 100) 5.560 -> 5.566 ( +0.11%) [ +0.16% +0.14% +0.00% / +0.11% +0.16% +0.11%] index_select strided 8 : Elapsed 0.056 ms (5.569 ms / 100) 5.571 -> 5.573 ( +0.04%) [ +0.00% +0.00% +0.07% / +0.04% +0.13% +0.09%] index_select random : Elapsed 0.056 ms (5.571 ms / 100) 5.573 -> 5.569 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.05% +0.07%] index_select random_sorted : Elapsed 0.056 ms (5.574 ms / 100) B = [4, 20, 5, 40] (stride (20, 1, 80, 400)) A = [4, 20, 5, 16] (stride (1600, 1, 320, 20)) dim = 3 3.742 -> 3.750 ( +0.21%) [ +0.00% +0.16% +0.13% / +0.21% +0.96% +0.94%] index_add_ linear : Elapsed 0.037 ms (3.742 ms / 100) 3.605 -> 3.604 ( -0.03%) [ +0.00% +0.06% +0.06% / -0.03% +0.69% +0.69%] index_copy_ linear : Elapsed 0.036 ms (3.605 ms / 100) 3.737 -> 3.745 ( +0.21%) [ +0.21% +0.00% +0.13% / +0.21% +0.94% +0.72%] index_add_ reverse : Elapsed 0.037 ms (3.745 ms / 100) 3.602 -> 3.610 ( +0.22%) [ +0.17% +0.00% +0.17% / +0.22% +0.83% +0.75%] index_copy_ reverse : Elapsed 0.036 ms (3.608 ms / 100) 3.744 -> 3.746 ( +0.05%) [ +0.11% +0.03% +0.00% / +0.05% +0.75% +0.80%] index_add_ spread : Elapsed 0.037 ms (3.748 ms / 100) 3.610 -> 3.611 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.72% +0.80%] index_copy_ spread : Elapsed 0.036 ms (3.611 ms / 100) 3.744 -> 3.746 ( +0.05%) [ +0.16% +0.11% +0.00% / +0.05% +0.80% +0.64%] index_add_ strided 3 : Elapsed 0.037 ms (3.750 ms / 100) 3.610 -> 3.609 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.58% +0.53%] index_copy_ strided 3 : Elapsed 0.036 ms (3.610 ms / 100) 3.742 -> 3.746 ( +0.11%) [ +0.11% +0.03% +0.00% / +0.11% +0.72% +0.77%] index_add_ strided 7 : Elapsed 0.037 ms (3.746 ms / 100) 3.604 -> 3.609 ( +0.14%) [ +0.14% +0.06% +0.00% / +0.14% +0.80% +0.78%] index_copy_ strided 7 : Elapsed 0.036 ms (3.609 ms / 100) 3.747 -> 3.747 ( +0.00%) [ +0.11% +0.05% +0.00% / +0.00% +0.83% +0.85%] index_add_ perm : Elapsed 0.038 ms (3.751 ms / 100) 3.604 -> 3.607 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.72% +0.80%] index_copy_ perm : Elapsed 0.036 ms (3.605 ms / 100) 3.740 -> 3.746 ( +0.16%) [ +0.11% +0.08% +0.00% / +0.16% +0.86% +0.91%] index_add_ perm_sorted : Elapsed 0.037 ms (3.744 ms / 100) 3.603 -> 3.605 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.83% +0.80%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.605 ms / 100) 5.472 -> 5.470 ( -0.04%) [ +0.09% +0.15% +0.00% / +0.11% -0.04% +0.05%] index_select const : Elapsed 0.055 ms (5.477 ms / 100) 5.483 -> 5.477 ( -0.11%) [ +0.09% +0.05% +0.00% / -0.02% -0.02% -0.11%] index_select wrap : Elapsed 0.055 ms (5.488 ms / 100) 5.482 -> 5.481 ( -0.02%) [ +0.00% +0.04% +0.07% / -0.02% +0.00% +0.07%] index_select linear : Elapsed 0.055 ms (5.482 ms / 100) 5.487 -> 5.481 ( -0.11%) [ +0.05% +0.00% +0.02% / -0.05% -0.05% -0.11%] index_select reverse : Elapsed 0.055 ms (5.490 ms / 100) 5.471 -> 5.464 ( -0.13%) [ +0.09% +0.00% +0.20% / -0.13% +0.09% -0.02%] index_select skip64 : Elapsed 0.055 ms (5.476 ms / 100) 5.470 -> 5.470 ( +0.00%) [ +0.11% +0.16% +0.00% / +0.00% +0.07% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.476 ms / 100) 5.476 -> 5.482 ( +0.11%) [ +0.13% +0.00% +0.29% / +0.16% +0.11% +0.18%] index_select spread : Elapsed 0.055 ms (5.483 ms / 100) 5.484 -> 5.475 ( -0.16%) [ +0.00% +0.00% +0.04% / -0.04% -0.16% -0.13%] index_select strided 3 : Elapsed 0.055 ms (5.484 ms / 100) 5.483 -> 5.481 ( -0.04%) [ +0.02% +0.09% +0.00% / -0.04% -0.04% +0.04%] index_select strided 5 : Elapsed 0.055 ms (5.484 ms / 100) 5.479 -> 5.480 ( +0.02%) [ +0.00% +0.07% +0.11% / +0.15% +0.05% +0.02%] index_select strided 7 : Elapsed 0.055 ms (5.479 ms / 100) 5.475 -> 5.477 ( +0.04%) [ +0.04% +0.00% +0.02% / +0.05% +0.04% +0.05%] index_select strided 8 : Elapsed 0.055 ms (5.477 ms / 100) 5.485 -> 5.476 ( -0.16%) [ +0.02% +0.00% +0.02% / -0.02% -0.16% -0.05%] index_select random : Elapsed 0.055 ms (5.486 ms / 100) 5.480 -> 5.483 ( +0.05%) [ +0.04% +0.00% +0.07% / +0.18% +0.05% +0.09%] index_select random_sorted : Elapsed 0.055 ms (5.482 ms / 100) out_shape = [40, 20, 16, 5] in_shape = [4, 20, 16, 5] idx_dim = 0 B = [40, 20, 16, 5] (stride (1600, 5, 100, 1)) A = [4, 20, 16, 5] (stride (1, 320, 20, 4)) dim = 0 1.151 -> 1.152 ( +0.09%) [ +0.26% +0.09% +0.00% / +0.09% +0.35% +0.35%] index_add_ linear : Elapsed 0.012 ms (1.154 ms / 100) 1.112 -> 1.112 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.54%] index_copy_ linear : Elapsed 0.011 ms (1.112 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.17% +0.00% +0.09% / +0.00% +0.61% +0.61%] index_add_ reverse : Elapsed 0.012 ms (1.151 ms / 100) 1.111 -> 1.112 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.63% +0.63%] index_copy_ reverse : Elapsed 0.011 ms (1.112 ms / 100) 1.149 -> 1.150 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.61% +0.61%] index_add_ spread : Elapsed 0.012 ms (1.150 ms / 100) 1.111 -> 1.112 ( +0.09%) [ +0.00% +0.63% +0.00% / +0.09% +0.63% +0.63%] index_copy_ spread : Elapsed 0.011 ms (1.111 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.70%] index_add_ strided 3 : Elapsed 0.011 ms (1.149 ms / 100) 1.110 -> 1.112 ( +0.18%) [ +0.00% +0.09% +0.00% / +0.18% +0.72% +0.72%] index_copy_ strided 3 : Elapsed 0.011 ms (1.110 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +0.17% +0.09% +0.00% / +0.09% +0.70% +0.78%] index_add_ strided 7 : Elapsed 0.012 ms (1.150 ms / 100) 1.110 -> 1.111 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +0.72% +0.72%] index_copy_ strided 7 : Elapsed 0.011 ms (1.110 ms / 100) 1.149 -> 1.148 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.61% +0.52%] index_add_ perm : Elapsed 0.012 ms (1.150 ms / 100) 1.111 -> 1.111 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.54%] index_copy_ perm : Elapsed 0.011 ms (1.111 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.70% +0.61%] index_add_ perm_sorted : Elapsed 0.012 ms (1.150 ms / 100) 1.111 -> 1.110 ( -0.09%) [ +0.18% +0.00% +0.00% / -0.09% +0.63% +0.63%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.113 ms / 100) 8.270 -> 8.291 ( +0.25%) [ +0.18% +0.00% +0.22% / +0.25% +0.30% +0.36%] index_select const : Elapsed 0.083 ms (8.285 ms / 100) 8.278 -> 8.290 ( +0.14%) [ +0.00% +0.17% +0.33% / +0.14% +0.35% +0.18%] index_select wrap : Elapsed 0.083 ms (8.278 ms / 100) 8.277 -> 8.292 ( +0.18%) [ +0.17% +0.00% +0.10% / +0.18% +0.33% +0.23%] index_select linear : Elapsed 0.083 ms (8.291 ms / 100) 8.286 -> 8.281 ( -0.06%) [ +0.01% +0.00% +0.06% / -0.06% +0.10% +0.06%] index_select reverse : Elapsed 0.083 ms (8.287 ms / 100) 8.271 -> 8.274 ( +0.04%) [ +0.00% +0.06% +0.40% / +0.04% +0.36% +0.31%] index_select skip64 : Elapsed 0.083 ms (8.271 ms / 100) 8.273 -> 8.272 ( -0.01%) [ +0.10% +0.00% +0.27% / -0.01% +0.17% +0.15%] index_select skip256 : Elapsed 0.083 ms (8.281 ms / 100) 8.276 -> 8.277 ( +0.01%) [ +0.27% +0.00% +0.05% / +0.01% +0.12% +0.28%] index_select spread : Elapsed 0.083 ms (8.298 ms / 100) 8.271 -> 8.273 ( +0.02%) [ +0.24% +0.00% +0.23% / +0.31% +0.02% +0.22%] index_select strided 3 : Elapsed 0.083 ms (8.291 ms / 100) 8.273 -> 8.268 ( -0.06%) [ +0.00% +0.05% +0.17% / -0.06% +0.21% +0.39%] index_select random : Elapsed 0.083 ms (8.273 ms / 100) 8.277 -> 8.278 ( +0.01%) [ +0.00% +0.34% +0.14% / +0.01% +0.16% +0.16%] index_select random_sorted : Elapsed 0.083 ms (8.277 ms / 100) B = [40, 20, 16, 5] (stride (1600, 16, 1, 320)) A = [4, 20, 16, 5] (stride (16, 320, 1, 64)) dim = 0 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.65% +0.49%] index_add_ linear : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.08% +0.25% / +0.08% +0.59% +0.59%] index_copy_ linear : Elapsed 0.012 ms (1.192 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.49% +0.08% +0.00% / +0.00% +0.41% +0.49%] index_add_ reverse : Elapsed 0.012 ms (1.237 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.67% / +0.00% +0.34% +0.50%] index_copy_ reverse : Elapsed 0.012 ms (1.193 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.49% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.34% +0.42%] index_copy_ spread : Elapsed 0.012 ms (1.193 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.08% +0.00% +0.32% / +0.00% +0.41% +0.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.192 ( -0.08%) [ +0.00% +0.00% +0.17% / -0.08% +0.42% +0.42%] index_copy_ strided 3 : Elapsed 0.012 ms (1.193 ms / 100) 1.232 -> 1.233 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.49% +0.41%] index_add_ strided 7 : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.59% +0.50%] index_copy_ strided 7 : Elapsed 0.012 ms (1.193 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.24% +0.41%] index_add_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.08% +0.42% / +0.08% +0.50% +0.42%] index_copy_ perm : Elapsed 0.012 ms (1.192 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.32% +0.41%] index_add_ perm_sorted : Elapsed 0.012 ms (1.233 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.42% +0.34%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.193 ms / 100) 8.699 -> 8.698 ( -0.01%) [ +0.00% +0.21% +0.02% / +0.08% +0.06% -0.01%] index_select const : Elapsed 0.087 ms (8.699 ms / 100) 8.703 -> 8.710 ( +0.08%) [ +0.10% +0.00% +0.22% / +0.08% +0.32% +0.43%] index_select wrap : Elapsed 0.087 ms (8.712 ms / 100) 8.697 -> 8.716 ( +0.22%) [ +0.00% +0.30% +0.11% / +0.38% +0.22% +0.31%] index_select linear : Elapsed 0.087 ms (8.697 ms / 100) 8.689 -> 8.704 ( +0.17%) [ +0.00% +0.09% +0.39% / +0.17% +0.40% +0.40%] index_select reverse : Elapsed 0.087 ms (8.689 ms / 100) 8.686 -> 8.692 ( +0.07%) [ +0.00% +0.26% +0.07% / +0.07% +0.29% +0.21%] index_select skip64 : Elapsed 0.087 ms (8.686 ms / 100) 8.688 -> 8.701 ( +0.15%) [ +0.00% +0.12% +0.03% / +0.15% +0.21% +0.29%] index_select skip256 : Elapsed 0.087 ms (8.688 ms / 100) 8.724 -> 8.722 ( -0.02%) [ +0.07% +0.00% +0.00% / -0.02% +0.01% +0.01%] index_select spread : Elapsed 0.087 ms (8.730 ms / 100) 8.711 -> 8.714 ( +0.03%) [ +0.15% +0.00% +0.01% / +0.18% +0.15% +0.03%] index_select strided 3 : Elapsed 0.087 ms (8.724 ms / 100) 8.713 -> 8.712 ( -0.01%) [ +0.20% +0.00% +0.01% / -0.01% +0.02% +0.17%] index_select random : Elapsed 0.087 ms (8.730 ms / 100) 8.713 -> 8.721 ( +0.09%) [ +0.34% +0.06% +0.00% / +0.09% +0.11% +0.21%] index_select random_sorted : Elapsed 0.087 ms (8.743 ms / 100) B = [40, 20, 16, 5] (stride (80, 3200, 1, 16)) A = [4, 20, 16, 5] (stride (1, 4, 80, 1280)) dim = 0 1.407 -> 1.406 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.50% +0.43%] index_add_ linear : Elapsed 0.014 ms (1.407 ms / 100) 1.360 -> 1.362 ( +0.15%) [ +0.00% +0.22% +0.07% / +0.15% +0.66% +0.66%] index_copy_ linear : Elapsed 0.014 ms (1.360 ms / 100) 1.406 -> 1.407 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.57% +0.71%] index_add_ reverse : Elapsed 0.014 ms (1.407 ms / 100) 1.359 -> 1.361 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.66% +0.88%] index_copy_ reverse : Elapsed 0.014 ms (1.361 ms / 100) 1.406 -> 1.407 ( +0.07%) [ +0.07% +0.21% +0.00% / +0.07% +0.64% +0.64%] index_add_ spread : Elapsed 0.014 ms (1.407 ms / 100) 1.360 -> 1.360 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.59%] index_copy_ spread : Elapsed 0.014 ms (1.360 ms / 100) 1.405 -> 1.406 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.64% +0.64%] index_add_ strided 3 : Elapsed 0.014 ms (1.406 ms / 100) 1.360 -> 1.359 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.74% +0.66%] index_copy_ strided 3 : Elapsed 0.014 ms (1.360 ms / 100) 1.406 -> 1.405 ( -0.07%) [ +0.00% +0.00% +0.36% / -0.07% +0.64% +0.71%] index_add_ strided 7 : Elapsed 0.014 ms (1.406 ms / 100) 1.360 -> 1.360 ( +0.00%) [ +0.00% +0.00% +0.29% / +0.00% +0.66% +0.66%] index_copy_ strided 7 : Elapsed 0.014 ms (1.360 ms / 100) 1.406 -> 1.406 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.57% +0.57%] index_add_ perm : Elapsed 0.014 ms (1.406 ms / 100) 1.360 -> 1.360 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.66% +0.51%] index_copy_ perm : Elapsed 0.014 ms (1.360 ms / 100) 1.406 -> 1.406 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.64% +0.71%] index_add_ perm_sorted : Elapsed 0.014 ms (1.406 ms / 100) 1.360 -> 1.359 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.66% +0.59%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.360 ms / 100) 9.175 -> 9.167 ( -0.09%) [ +0.10% +0.00% +0.03% / +0.03% -0.09% -0.04%] index_select const : Elapsed 0.092 ms (9.184 ms / 100) 9.176 -> 9.181 ( +0.05%) [ +0.14% +0.07% +0.00% / +0.05% +0.13% +0.05%] index_select wrap : Elapsed 0.092 ms (9.189 ms / 100) 9.173 -> 9.167 ( -0.07%) [ +0.00% +0.01% +0.12% / +0.08% -0.05% -0.07%] index_select linear : Elapsed 0.092 ms (9.173 ms / 100) 9.163 -> 9.162 ( -0.01%) [ +0.11% +0.14% +0.00% / +0.24% +0.11% -0.01%] index_select reverse : Elapsed 0.092 ms (9.173 ms / 100) 9.187 -> 9.175 ( -0.13%) [ +0.03% +0.05% +0.00% / -0.12% +0.02% -0.13%] index_select skip64 : Elapsed 0.092 ms (9.190 ms / 100) 9.177 -> 9.174 ( -0.03%) [ +0.01% +0.00% +0.25% / -0.02% -0.03% +0.10%] index_select skip256 : Elapsed 0.092 ms (9.178 ms / 100) 9.176 -> 9.182 ( +0.07%) [ +0.05% +0.15% +0.00% / +0.25% +0.07% +0.07%] index_select spread : Elapsed 0.092 ms (9.181 ms / 100) 9.189 -> 9.167 ( -0.24%) [ +0.00% +0.04% +0.00% / -0.05% -0.16% -0.24%] index_select strided 3 : Elapsed 0.092 ms (9.189 ms / 100) 9.179 -> 9.173 ( -0.07%) [ +0.04% +0.00% +0.07% / -0.07% -0.02% +0.04%] index_select random : Elapsed 0.092 ms (9.183 ms / 100) 9.187 -> 9.166 ( -0.23%) [ +0.00% +0.05% +0.22% / -0.14% -0.11% -0.23%] index_select random_sorted : Elapsed 0.092 ms (9.187 ms / 100) B = [40, 20, 16, 5] (stride (5, 3200, 200, 1)) A = [4, 20, 16, 5] (stride (1600, 80, 5, 1)) dim = 0 1.065 -> 1.066 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.56% +0.56%] index_add_ linear : Elapsed 0.011 ms (1.066 ms / 100) 1.028 -> 1.029 ( +0.10%) [ +0.00% +0.19% +0.00% / +0.10% +0.49% +0.49%] index_copy_ linear : Elapsed 0.010 ms (1.028 ms / 100) 1.066 -> 1.068 ( +0.19%) [ +0.09% +0.09% +0.00% / +0.19% +0.38% +0.47%] index_add_ reverse : Elapsed 0.011 ms (1.067 ms / 100) 1.030 -> 1.029 ( -0.10%) [ +0.00% +0.00% +0.29% / -0.10% +0.10% +0.10%] index_copy_ reverse : Elapsed 0.010 ms (1.030 ms / 100) 1.069 -> 1.069 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.37% +0.47%] index_add_ spread : Elapsed 0.011 ms (1.069 ms / 100) 1.030 -> 1.030 ( +0.00%) [ +0.00% +0.10% +0.10% / +0.00% +0.39% +0.49%] index_copy_ spread : Elapsed 0.010 ms (1.030 ms / 100) 1.066 -> 1.066 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_add_ strided 3 : Elapsed 0.011 ms (1.067 ms / 100) 1.029 -> 1.028 ( -0.10%) [ +0.00% +0.00% +0.10% / -0.10% +0.39% +0.19%] index_copy_ strided 3 : Elapsed 0.010 ms (1.029 ms / 100) 1.068 -> 1.068 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +0.47% +0.47%] index_add_ strided 7 : Elapsed 0.011 ms (1.068 ms / 100) 1.030 -> 1.030 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.49% +0.49%] index_copy_ strided 7 : Elapsed 0.010 ms (1.030 ms / 100) 1.068 -> 1.069 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.19% +0.37%] index_add_ perm : Elapsed 0.011 ms (1.069 ms / 100) 1.030 -> 1.030 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.19% +0.39%] index_copy_ perm : Elapsed 0.010 ms (1.030 ms / 100) 1.068 -> 1.069 ( +0.09%) [ +0.19% +0.09% +0.00% / +0.09% +0.47% +0.37%] index_add_ perm_sorted : Elapsed 0.011 ms (1.070 ms / 100) 1.029 -> 1.033 ( +0.39%) [ +0.10% +0.10% +0.00% / +0.39% +0.49% +0.49%] index_copy_ perm_sorted : Elapsed 0.010 ms (1.030 ms / 100) 7.892 -> 7.893 ( +0.01%) [ +0.04% +0.04% +0.00% / +0.03% +0.01% +0.35%] index_select const : Elapsed 0.079 ms (7.895 ms / 100) 7.905 -> 7.913 ( +0.10%) [ +0.19% +0.00% +0.03% / +0.10% +0.38% +0.52%] index_select wrap : Elapsed 0.079 ms (7.920 ms / 100) 7.903 -> 7.903 ( +0.00%) [ +0.09% +0.00% +0.10% / +0.00% +0.08% +0.20%] index_select linear : Elapsed 0.079 ms (7.910 ms / 100) 7.927 -> 7.931 ( +0.05%) [ +0.11% +0.00% +0.04% / +0.05% +0.29% +0.23%] index_select reverse : Elapsed 0.079 ms (7.936 ms / 100) 7.885 -> 7.875 ( -0.13%) [ +0.10% +0.09% +0.00% / -0.13% +0.24% +0.48%] index_select skip64 : Elapsed 0.079 ms (7.893 ms / 100) 7.879 -> 7.905 ( +0.33%) [ +0.00% +0.23% +0.04% / +0.33% +0.43% +0.42%] index_select skip256 : Elapsed 0.079 ms (7.879 ms / 100) 7.902 -> 7.910 ( +0.10%) [ +0.15% +0.05% +0.00% / +0.10% +0.35% +0.29%] index_select spread : Elapsed 0.079 ms (7.914 ms / 100) 7.923 -> 7.903 ( -0.25%) [ +0.04% +0.00% +0.21% / -0.25% +0.16% +0.19%] index_select strided 3 : Elapsed 0.079 ms (7.926 ms / 100) 7.898 -> 7.923 ( +0.32%) [ +0.19% +0.43% +0.00% / +0.32% +0.52% +0.39%] index_select random : Elapsed 0.079 ms (7.913 ms / 100) 7.896 -> 7.901 ( +0.06%) [ +0.00% +0.18% +0.25% / +0.06% +0.24% +0.39%] index_select random_sorted : Elapsed 0.079 ms (7.896 ms / 100) B = [40, 20, 16, 5] (stride (1, 40, 800, 12800)) A = [4, 20, 16, 5] (stride (16, 64, 1, 1280)) dim = 0 1.317 -> 1.319 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.38% +0.38%] index_add_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.23% +0.00% +0.08% / +0.00% +0.55% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.281 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.00% +0.15% +0.08% / -0.08% +0.38% +0.53%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.013 ms (1.279 ms / 100) 1.329 -> 1.328 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.53% +0.23%] index_add_ spread : Elapsed 0.013 ms (1.330 ms / 100) 1.288 -> 1.288 ( +0.00%) [ +0.00% +0.31% +0.31% / +0.00% +0.31% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.288 ms / 100) 1.319 -> 1.318 ( -0.08%) [ +0.15% +0.00% +0.15% / -0.08% +0.76% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.321 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.00% +0.23% +0.08% / +0.16% +0.86% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.280 ms / 100) 1.321 -> 1.320 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.76% +0.83%] index_add_ strided 7 : Elapsed 0.013 ms (1.321 ms / 100) 1.282 -> 1.285 ( +0.23%) [ +0.00% +0.16% +0.08% / +0.23% +0.62% +0.62%] index_copy_ strided 7 : Elapsed 0.013 ms (1.282 ms / 100) 1.325 -> 1.328 ( +0.23%) [ +0.00% +0.23% +0.23% / +0.23% +0.53% +0.38%] index_add_ perm : Elapsed 0.013 ms (1.325 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.39% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.287 ms / 100) 1.325 -> 1.328 ( +0.23%) [ +0.00% +0.15% +0.08% / +0.23% +0.68% +0.53%] index_add_ perm_sorted : Elapsed 0.013 ms (1.325 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.47%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.288 ms / 100) 9.238 -> 9.234 ( -0.04%) [ +0.05% +0.00% +0.02% / -0.04% +0.02% +0.19%] index_select const : Elapsed 0.092 ms (9.243 ms / 100) 9.254 -> 9.257 ( +0.03%) [ +0.00% +0.04% +0.12% / +0.03% +0.12% +0.13%] index_select wrap : Elapsed 0.093 ms (9.254 ms / 100) 9.262 -> 9.250 ( -0.13%) [ +0.02% +0.00% +0.10% / -0.13% +0.15% +0.14%] index_select linear : Elapsed 0.093 ms (9.264 ms / 100) 9.233 -> 9.252 ( +0.21%) [ +0.29% +0.14% +0.00% / +0.34% +0.21% +0.36%] index_select reverse : Elapsed 0.093 ms (9.260 ms / 100) 9.235 -> 9.236 ( +0.01%) [ +0.00% +0.16% +0.05% / +0.01% +0.05% +0.35%] index_select skip64 : Elapsed 0.092 ms (9.235 ms / 100) 9.222 -> 9.233 ( +0.12%) [ +0.00% +0.12% +0.33% / +0.12% +0.46% +0.23%] index_select skip256 : Elapsed 0.092 ms (9.222 ms / 100) 9.264 -> 9.268 ( +0.04%) [ +0.00% +0.03% +0.22% / +0.04% +0.24% +0.15%] index_select spread : Elapsed 0.093 ms (9.264 ms / 100) 9.266 -> 9.261 ( -0.05%) [ +0.02% +0.00% +0.03% / -0.05% +0.13% +0.21%] index_select strided 3 : Elapsed 0.093 ms (9.268 ms / 100) 9.250 -> 9.266 ( +0.17%) [ +0.06% +0.03% +0.00% / +0.17% +0.25% +0.39%] index_select random : Elapsed 0.093 ms (9.256 ms / 100) 9.261 -> 9.263 ( +0.02%) [ +0.00% +0.17% +0.22% / +0.02% +0.22% +0.08%] index_select random_sorted : Elapsed 0.093 ms (9.261 ms / 100) out_shape = [4, 40, 16, 5] in_shape = [4, 20, 16, 5] idx_dim = 1 B = [4, 40, 16, 5] (stride (3200, 1, 200, 40)) A = [4, 20, 16, 5] (stride (1600, 1, 20, 320)) dim = 1 2.393 -> 2.404 ( +0.46%) [ +0.25% +0.13% +0.00% / +0.46% +0.71% +0.75%] index_add_ linear : Elapsed 0.024 ms (2.399 ms / 100) 2.398 -> 2.414 ( +0.67%) [ +0.00% +0.00% +0.08% / +0.67% +0.79% +0.92%] index_copy_ linear : Elapsed 0.024 ms (2.398 ms / 100) 2.394 -> 2.404 ( +0.42%) [ +0.25% +0.58% +0.00% / +0.58% +0.42% +0.58%] index_add_ reverse : Elapsed 0.024 ms (2.400 ms / 100) 2.401 -> 2.410 ( +0.37%) [ +0.00% +0.08% +0.00% / +0.37% +0.67% +0.71%] index_copy_ reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.405 -> 2.414 ( +0.37%) [ +0.04% +0.00% +0.04% / +0.50% +0.37% +0.50%] index_add_ spread : Elapsed 0.024 ms (2.406 ms / 100) 2.419 -> 2.435 ( +0.66%) [ +0.08% +0.00% +0.17% / +0.70% +0.66% +0.66%] index_copy_ spread : Elapsed 0.024 ms (2.421 ms / 100) 2.408 -> 2.418 ( +0.42%) [ +0.12% +0.00% +0.00% / +0.46% +0.50% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.411 ms / 100) 2.419 -> 2.427 ( +0.33%) [ +0.00% +0.00% +0.04% / +0.33% +0.54% +0.45%] index_copy_ strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.409 -> 2.421 ( +0.50%) [ +0.21% +0.00% +0.00% / +0.58% +0.50% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.417 -> 2.437 ( +0.83%) [ +0.21% +0.00% +0.04% / +1.12% +0.87% +0.83%] index_copy_ strided 7 : Elapsed 0.024 ms (2.422 ms / 100) 2.407 -> 2.419 ( +0.50%) [ +0.00% +0.08% +0.00% / +0.50% +0.62% +0.58%] index_add_ perm : Elapsed 0.024 ms (2.407 ms / 100) 2.417 -> 2.431 ( +0.58%) [ +0.12% +0.00% +0.12% / +0.58% +0.74% +0.74%] index_copy_ perm : Elapsed 0.024 ms (2.420 ms / 100) 2.406 -> 2.416 ( +0.42%) [ +0.00% +0.17% +0.08% / +0.42% +0.67% +0.54%] index_add_ perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) 2.416 -> 2.428 ( +0.50%) [ +0.00% +0.08% +0.21% / +0.50% +0.75% +0.83%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.416 ms / 100) 4.421 -> 4.419 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.02% -0.05% +0.09%] index_select const : Elapsed 0.044 ms (4.421 ms / 100) 4.425 -> 4.423 ( -0.05%) [ +0.00% +0.20% +0.05% / +0.09% +0.14% -0.05%] index_select wrap : Elapsed 0.044 ms (4.425 ms / 100) 4.427 -> 4.427 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.07% +0.16%] index_select linear : Elapsed 0.044 ms (4.427 ms / 100) 4.425 -> 4.429 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.11% +0.09% +0.25%] index_select reverse : Elapsed 0.044 ms (4.429 ms / 100) 4.418 -> 4.420 ( +0.05%) [ +0.14% +0.09% +0.00% / +0.16% +0.20% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.424 ms / 100) 4.416 -> 4.424 ( +0.18%) [ +0.18% +0.20% +0.00% / +0.18% +0.54% +0.23%] index_select skip256 : Elapsed 0.044 ms (4.424 ms / 100) 4.429 -> 4.427 ( -0.05%) [ +0.14% +0.00% +0.05% / -0.05% +0.11% -0.05%] index_select spread : Elapsed 0.044 ms (4.435 ms / 100) 4.425 -> 4.428 ( +0.07%) [ +0.00% +0.14% +0.02% / +0.20% +0.20% +0.07%] index_select strided 3 : Elapsed 0.044 ms (4.425 ms / 100) 4.422 -> 4.428 ( +0.14%) [ +0.25% +0.18% +0.00% / +0.23% +0.14% +0.43%] index_select strided 5 : Elapsed 0.044 ms (4.433 ms / 100) 4.424 -> 4.426 ( +0.05%) [ +0.20% +0.00% +0.11% / +0.05% +0.11% +0.14%] index_select strided 7 : Elapsed 0.044 ms (4.433 ms / 100) 4.423 -> 4.428 ( +0.11%) [ +0.00% +0.09% +0.09% / +0.11% +0.27% +0.23%] index_select strided 8 : Elapsed 0.044 ms (4.423 ms / 100) 4.422 -> 4.425 ( +0.07%) [ +0.00% +0.18% +0.09% / +0.07% +0.27% +0.16%] index_select strided 16 : Elapsed 0.044 ms (4.422 ms / 100) 4.423 -> 4.427 ( +0.09%) [ +0.09% +0.11% +0.00% / +0.11% +0.09% +0.18%] index_select random : Elapsed 0.044 ms (4.427 ms / 100) 4.416 -> 4.425 ( +0.20%) [ +0.32% +0.23% +0.00% / +0.20% +0.29% +0.38%] index_select random_sorted : Elapsed 0.044 ms (4.430 ms / 100) B = [4, 40, 16, 5] (stride (80, 320, 1, 16)) A = [4, 20, 16, 5] (stride (1600, 1, 100, 20)) dim = 1 2.398 -> 2.405 ( +0.29%) [ +0.00% +0.17% +0.33% / +0.29% +0.88% +0.88%] index_add_ linear : Elapsed 0.024 ms (2.398 ms / 100) 2.398 -> 2.411 ( +0.54%) [ +0.00% +0.04% +0.08% / +0.54% +0.67% +0.79%] index_copy_ linear : Elapsed 0.024 ms (2.398 ms / 100) 2.392 -> 2.407 ( +0.63%) [ +0.17% +0.00% +0.13% / +0.63% +1.17% +1.17%] index_add_ reverse : Elapsed 0.024 ms (2.396 ms / 100) 2.392 -> 2.406 ( +0.59%) [ +0.21% +0.00% +0.04% / +0.59% +1.05% +1.05%] index_copy_ reverse : Elapsed 0.024 ms (2.397 ms / 100) 2.399 -> 2.414 ( +0.63%) [ +0.04% +0.00% +0.04% / +0.63% +0.92% +0.88%] index_add_ spread : Elapsed 0.024 ms (2.400 ms / 100) 2.396 -> 2.412 ( +0.67%) [ +0.13% +0.00% +0.13% / +0.67% +0.96% +1.00%] index_copy_ spread : Elapsed 0.024 ms (2.399 ms / 100) 2.403 -> 2.413 ( +0.42%) [ +0.00% +0.25% +0.04% / +0.50% +0.42% +0.67%] index_add_ strided 3 : Elapsed 0.024 ms (2.403 ms / 100) 2.400 -> 2.412 ( +0.50%) [ +0.13% +0.08% +0.00% / +0.58% +0.63% +0.50%] index_copy_ strided 3 : Elapsed 0.024 ms (2.403 ms / 100) 2.403 -> 2.414 ( +0.46%) [ +0.00% +0.17% +0.25% / +0.46% +0.75% +0.62%] index_add_ strided 7 : Elapsed 0.024 ms (2.403 ms / 100) 2.402 -> 2.411 ( +0.37%) [ +0.00% +0.08% +0.00% / +0.58% +0.37% +0.46%] index_copy_ strided 7 : Elapsed 0.024 ms (2.402 ms / 100) 2.403 -> 2.412 ( +0.37%) [ +0.00% +0.04% +0.17% / +1.25% +0.50% +0.37%] index_add_ perm : Elapsed 0.024 ms (2.403 ms / 100) 2.403 -> 2.410 ( +0.29%) [ +0.00% +0.25% +0.12% / +0.71% +0.29% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.403 ms / 100) 2.408 -> 2.409 ( +0.04%) [ +0.00% +0.08% +0.00% / +0.37% +0.12% +0.04%] index_add_ perm_sorted : Elapsed 0.024 ms (2.408 ms / 100) 2.403 -> 2.407 ( +0.17%) [ +0.29% +0.00% +0.25% / +0.50% +0.37% +0.17%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.410 ms / 100) 4.435 -> 4.435 ( +0.00%) [ +0.00% +0.07% +0.02% / +0.16% +0.00% +0.07%] index_select const : Elapsed 0.044 ms (4.435 ms / 100) 4.437 -> 4.437 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.14% +0.00% +0.07%] index_select wrap : Elapsed 0.044 ms (4.442 ms / 100) 4.437 -> 4.439 ( +0.05%) [ +0.00% +0.02% +0.20% / +0.05% +0.05% +0.09%] index_select linear : Elapsed 0.044 ms (4.437 ms / 100) 4.438 -> 4.441 ( +0.07%) [ +0.20% +0.00% +0.16% / +0.07% +0.09% +0.07%] index_select reverse : Elapsed 0.044 ms (4.447 ms / 100) 4.438 -> 4.427 ( -0.25%) [ +0.00% +0.05% +0.00% / +0.09% -0.25% -0.20%] index_select skip64 : Elapsed 0.044 ms (4.438 ms / 100) 4.434 -> 4.430 ( -0.09%) [ +0.00% +0.07% +0.07% / +0.00% -0.09% +0.05%] index_select skip256 : Elapsed 0.044 ms (4.434 ms / 100) 4.436 -> 4.441 ( +0.11%) [ +0.18% +0.20% +0.00% / +0.11% +0.29% +0.16%] index_select spread : Elapsed 0.044 ms (4.444 ms / 100) 4.436 -> 4.440 ( +0.09%) [ +0.07% +0.16% +0.00% / +0.09% +0.16% +0.18%] index_select strided 3 : Elapsed 0.044 ms (4.439 ms / 100) 4.435 -> 4.444 ( +0.20%) [ +0.00% +0.05% +0.02% / +0.25% +0.20% +0.20%] index_select strided 5 : Elapsed 0.044 ms (4.435 ms / 100) 4.436 -> 4.440 ( +0.09%) [ +0.09% +0.00% +0.05% / +0.11% +0.14% +0.09%] index_select strided 7 : Elapsed 0.044 ms (4.440 ms / 100) 4.440 -> 4.441 ( +0.02%) [ +0.00% +0.07% +0.00% / +0.05% +0.07% +0.02%] index_select strided 8 : Elapsed 0.044 ms (4.440 ms / 100) 4.436 -> 4.441 ( +0.11%) [ +0.00% +0.18% +0.16% / +0.11% +0.11% +0.38%] index_select strided 16 : Elapsed 0.044 ms (4.436 ms / 100) 4.434 -> 4.440 ( +0.14%) [ +0.27% +0.00% +0.11% / +0.23% +0.18% +0.14%] index_select random : Elapsed 0.044 ms (4.446 ms / 100) 4.441 -> 4.435 ( -0.14%) [ +0.00% +0.07% +0.00% / +0.14% -0.14% -0.09%] index_select random_sorted : Elapsed 0.044 ms (4.441 ms / 100) B = [4, 40, 16, 5] (stride (1, 4, 800, 160)) A = [4, 20, 16, 5] (stride (1600, 16, 1, 320)) dim = 1 2.402 -> 2.412 ( +0.42%) [ +0.04% +0.04% +0.00% / +0.42% +0.79% +0.83%] index_add_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.401 -> 2.421 ( +0.83%) [ +0.08% +0.08% +0.00% / +1.08% +0.83% +1.04%] index_copy_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.402 -> 2.415 ( +0.54%) [ +0.00% +0.37% +0.37% / +0.54% +0.62% +0.62%] index_add_ reverse : Elapsed 0.024 ms (2.402 ms / 100) 2.400 -> 2.417 ( +0.71%) [ +0.33% +0.00% +0.13% / +0.71% +0.87% +0.75%] index_copy_ reverse : Elapsed 0.024 ms (2.408 ms / 100) 2.422 -> 2.433 ( +0.45%) [ +0.12% +0.33% +0.00% / +0.54% +0.45% +0.62%] index_add_ spread : Elapsed 0.024 ms (2.425 ms / 100) 2.432 -> 2.446 ( +0.58%) [ +0.21% +0.00% +0.12% / +0.58% +0.78% +0.74%] index_copy_ spread : Elapsed 0.024 ms (2.437 ms / 100) 2.417 -> 2.427 ( +0.41%) [ +0.00% +0.00% +0.12% / +0.62% +0.41% +0.54%] index_add_ strided 3 : Elapsed 0.024 ms (2.417 ms / 100) 2.424 -> 2.440 ( +0.66%) [ +0.00% +0.00% +0.00% / +0.95% +0.66% +0.74%] index_copy_ strided 3 : Elapsed 0.024 ms (2.424 ms / 100) 2.416 -> 2.429 ( +0.54%) [ +0.12% +0.25% +0.00% / +0.70% +0.58% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.419 ms / 100) 2.424 -> 2.439 ( +0.62%) [ +0.00% +0.00% +0.08% / +0.78% +0.95% +0.62%] index_copy_ strided 7 : Elapsed 0.024 ms (2.424 ms / 100) 2.415 -> 2.425 ( +0.41%) [ +0.00% +0.04% +0.00% / +0.41% +0.70% +0.79%] index_add_ perm : Elapsed 0.024 ms (2.415 ms / 100) 2.422 -> 2.432 ( +0.41%) [ +0.00% +0.17% +0.12% / +0.41% +0.87% +0.70%] index_copy_ perm : Elapsed 0.024 ms (2.422 ms / 100) 2.414 -> 2.429 ( +0.62%) [ +0.17% +0.00% +0.04% / +0.62% +0.62% +0.70%] index_add_ perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) 2.419 -> 2.435 ( +0.66%) [ +0.00% +0.25% +0.17% / +0.66% +0.95% +0.87%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) 4.417 -> 4.423 ( +0.14%) [ +0.20% +0.00% +0.20% / +0.14% +0.20% +0.20%] index_select const : Elapsed 0.044 ms (4.426 ms / 100) 4.429 -> 4.431 ( +0.05%) [ +0.05% +0.00% +0.07% / +0.05% +0.23% +0.29%] index_select wrap : Elapsed 0.044 ms (4.431 ms / 100) 4.427 -> 4.434 ( +0.16%) [ +0.07% +0.00% +0.11% / +0.16% +0.27% +0.36%] index_select linear : Elapsed 0.044 ms (4.430 ms / 100) 4.426 -> 4.432 ( +0.14%) [ +0.27% +0.16% +0.00% / +0.14% +0.41% +0.29%] index_select reverse : Elapsed 0.044 ms (4.438 ms / 100) 4.419 -> 4.417 ( -0.05%) [ +0.00% +0.07% +0.09% / +0.02% -0.05% +0.02%] index_select skip64 : Elapsed 0.044 ms (4.419 ms / 100) 4.418 -> 4.420 ( +0.05%) [ +0.00% +0.02% +0.09% / +0.05% +0.20% +0.14%] index_select skip256 : Elapsed 0.044 ms (4.418 ms / 100) 4.421 -> 4.429 ( +0.18%) [ +0.29% +0.29% +0.00% / +0.18% +0.48% +0.36%] index_select spread : Elapsed 0.044 ms (4.434 ms / 100) 4.431 -> 4.431 ( +0.00%) [ +0.05% +0.11% +0.00% / +0.00% +0.14% +0.09%] index_select strided 3 : Elapsed 0.044 ms (4.433 ms / 100) 4.421 -> 4.424 ( +0.07%) [ +0.11% +0.00% +0.02% / +0.23% +0.18% +0.07%] index_select strided 5 : Elapsed 0.044 ms (4.426 ms / 100) 4.426 -> 4.429 ( +0.07%) [ +0.05% +0.23% +0.00% / +0.07% +0.41% +0.50%] index_select strided 7 : Elapsed 0.044 ms (4.428 ms / 100) 4.414 -> 4.427 ( +0.29%) [ +0.00% +0.27% +0.20% / +0.29% +0.29% +0.32%] index_select strided 8 : Elapsed 0.044 ms (4.414 ms / 100) 4.419 -> 4.418 ( -0.02%) [ +0.05% +0.00% +0.16% / -0.02% +0.09% +0.20%] index_select strided 16 : Elapsed 0.044 ms (4.421 ms / 100) 4.429 -> 4.431 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.25% +0.29%] index_select random : Elapsed 0.044 ms (4.431 ms / 100) 4.426 -> 4.430 ( +0.09%) [ +0.18% +0.05% +0.00% / +0.09% +0.25% +0.34%] index_select random_sorted : Elapsed 0.044 ms (4.434 ms / 100) B = [4, 40, 16, 5] (stride (40, 1, 160, 2560)) dim = 1 fill_cnt = 20 2.878 -> 2.841 ( -1.29%) [ +0.14% +0.00% +0.42% / -1.18% -1.29% -1.22%] index_fill_ const : Elapsed 0.029 ms (2.882 ms / 100) 2.892 -> 2.850 ( -1.45%) [ +0.14% +0.31% +0.00% / -1.45% -1.28% -1.38%] index_fill_ linear : Elapsed 0.029 ms (2.896 ms / 100) 2.891 -> 2.849 ( -1.45%) [ +0.00% +0.10% +0.10% / -1.07% -1.45% -1.42%] index_fill_ reverse : Elapsed 0.029 ms (2.891 ms / 100) 2.887 -> 2.839 ( -1.66%) [ +0.00% +0.03% +0.10% / -1.42% -1.66% -1.66%] index_fill_ skip64 : Elapsed 0.029 ms (2.887 ms / 100) 2.883 -> 2.843 ( -1.39%) [ +0.03% +0.00% +0.03% / -1.32% -1.39% -1.39%] index_fill_ skip256 : Elapsed 0.029 ms (2.884 ms / 100) 2.913 -> 2.868 ( -1.54%) [ +0.10% +0.03% +0.00% / -1.44% -1.48% -1.54%] index_fill_ spread : Elapsed 0.029 ms (2.916 ms / 100) 2.911 -> 2.867 ( -1.51%) [ +0.00% +0.21% +0.27% / -1.51% -1.31% -1.17%] index_fill_ strided 3 : Elapsed 0.029 ms (2.911 ms / 100) 2.918 -> 2.866 ( -1.78%) [ +0.10% +0.00% +0.03% / -1.54% -1.61% -1.78%] index_fill_ strided 5 : Elapsed 0.029 ms (2.921 ms / 100) 2.913 -> 2.868 ( -1.54%) [ +0.24% +0.00% +0.07% / -1.54% -1.37% -1.48%] index_fill_ strided 7 : Elapsed 0.029 ms (2.920 ms / 100) 2.912 -> 2.869 ( -1.48%) [ +0.17% +0.00% +0.10% / -1.27% -1.48% -1.37%] index_fill_ strided 8 : Elapsed 0.029 ms (2.917 ms / 100) 2.911 -> 2.867 ( -1.51%) [ +0.17% +0.00% +0.17% / -1.31% -1.51% -1.34%] index_fill_ strided 16 : Elapsed 0.029 ms (2.916 ms / 100) 2.912 -> 2.866 ( -1.58%) [ +0.00% +0.24% +0.17% / -1.58% -1.41% -1.27%] index_fill_ random : Elapsed 0.029 ms (2.912 ms / 100) 2.914 -> 2.870 ( -1.51%) [ +0.00% +0.21% +0.00% / -1.51% -1.41% -1.34%] index_fill_ random_sorted : Elapsed 0.029 ms (2.914 ms / 100) 2.916 -> 2.864 ( -1.78%) [ +0.03% +0.34% +0.00% / -1.44% -1.44% -1.78%] index_fill_ perm : Elapsed 0.029 ms (2.917 ms / 100) 2.915 -> 2.869 ( -1.58%) [ +0.14% +0.03% +0.00% / -1.30% -1.44% -1.58%] index_fill_ perm_sorted : Elapsed 0.029 ms (2.919 ms / 100) B = [4, 40, 16, 5] (stride (40, 1, 160, 2560)) A = [4, 20, 16, 5] (stride (1, 320, 4, 64)) dim = 1 2.452 -> 2.460 ( +0.33%) [ +0.00% +0.08% +0.00% / +0.33% +0.45% +0.45%] index_add_ linear : Elapsed 0.025 ms (2.452 ms / 100) 2.454 -> 2.466 ( +0.49%) [ +0.04% +0.00% +0.04% / +0.49% +0.57% +0.69%] index_copy_ linear : Elapsed 0.025 ms (2.455 ms / 100) 2.441 -> 2.454 ( +0.53%) [ +0.12% +0.04% +0.00% / +0.53% +1.15% +0.98%] index_add_ reverse : Elapsed 0.024 ms (2.444 ms / 100) 2.448 -> 2.457 ( +0.37%) [ +0.00% +0.04% +0.04% / +0.37% +1.02% +1.14%] index_copy_ reverse : Elapsed 0.024 ms (2.448 ms / 100) 2.452 -> 2.468 ( +0.65%) [ +0.20% +0.12% +0.00% / +0.65% +0.82% +1.02%] index_add_ spread : Elapsed 0.025 ms (2.457 ms / 100) 2.462 -> 2.479 ( +0.69%) [ +0.16% +0.08% +0.00% / +0.69% +1.18% +1.10%] index_copy_ spread : Elapsed 0.025 ms (2.466 ms / 100) 2.463 -> 2.474 ( +0.45%) [ +0.00% +0.04% +0.41% / +0.45% +0.57% +0.53%] index_add_ strided 3 : Elapsed 0.025 ms (2.463 ms / 100) 2.469 -> 2.481 ( +0.49%) [ +0.12% +0.00% +0.24% / +0.49% +0.65% +0.53%] index_copy_ strided 3 : Elapsed 0.025 ms (2.472 ms / 100) 2.463 -> 2.470 ( +0.28%) [ +0.08% +0.00% +0.04% / +0.28% +0.65% +0.53%] index_add_ strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.470 -> 2.479 ( +0.36%) [ +0.04% +0.04% +0.00% / +0.36% +0.57% +0.53%] index_copy_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.462 -> 2.466 ( +0.16%) [ +0.28% +0.32% +0.00% / +0.57% +0.16% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.469 ms / 100) 2.472 -> 2.478 ( +0.24%) [ +0.04% +0.00% +0.00% / +0.69% +0.24% +0.24%] index_copy_ perm : Elapsed 0.025 ms (2.473 ms / 100) 2.465 -> 2.470 ( +0.20%) [ +0.04% +0.08% +0.00% / +0.45% +0.28% +0.20%] index_add_ perm_sorted : Elapsed 0.025 ms (2.466 ms / 100) 2.473 -> 2.477 ( +0.16%) [ +0.04% +0.12% +0.00% / +0.57% +0.32% +0.16%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.474 ms / 100) 4.499 -> 4.500 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.04% +0.02% +0.09%] index_select const : Elapsed 0.045 ms (4.502 ms / 100) 4.504 -> 4.504 ( +0.00%) [ +0.00% +0.20% +0.04% / +0.00% +0.11% +0.09%] index_select wrap : Elapsed 0.045 ms (4.504 ms / 100) 4.501 -> 4.510 ( +0.20%) [ +0.09% +0.00% +0.18% / +0.20% +0.20% +0.22%] index_select linear : Elapsed 0.045 ms (4.505 ms / 100) 4.505 -> 4.502 ( -0.07%) [ +0.00% +0.09% +0.09% / +0.04% -0.04% -0.07%] index_select reverse : Elapsed 0.045 ms (4.505 ms / 100) 4.502 -> 4.494 ( -0.18%) [ +0.07% +0.00% +0.02% / +0.07% -0.18% -0.07%] index_select skip64 : Elapsed 0.045 ms (4.505 ms / 100) 4.492 -> 4.502 ( +0.22%) [ +0.24% +0.00% +0.18% / +0.24% +0.22% +0.38%] index_select skip256 : Elapsed 0.045 ms (4.503 ms / 100) 4.504 -> 4.510 ( +0.13%) [ +0.09% +0.02% +0.00% / +0.13% +0.16% +0.18%] index_select spread : Elapsed 0.045 ms (4.508 ms / 100) 4.505 -> 4.502 ( -0.07%) [ +0.18% +0.00% +0.02% / -0.07% +0.09% +0.18%] index_select strided 3 : Elapsed 0.045 ms (4.513 ms / 100) 4.496 -> 4.500 ( +0.09%) [ +0.00% +0.04% +0.04% / +0.09% +0.16% +0.09%] index_select strided 5 : Elapsed 0.045 ms (4.496 ms / 100) 4.499 -> 4.511 ( +0.27%) [ +0.00% +0.13% +0.04% / +0.33% +0.27% +0.27%] index_select strided 7 : Elapsed 0.045 ms (4.499 ms / 100) 4.502 -> 4.499 ( -0.07%) [ +0.11% +0.00% +0.16% / +0.09% -0.07% +0.00%] index_select strided 8 : Elapsed 0.045 ms (4.507 ms / 100) 4.502 -> 4.501 ( -0.02%) [ +0.00% +0.02% +0.04% / +0.02% -0.02% +0.00%] index_select strided 16 : Elapsed 0.045 ms (4.502 ms / 100) 4.503 -> 4.504 ( +0.02%) [ +0.11% +0.18% +0.00% / +0.13% +0.02% +0.20%] index_select random : Elapsed 0.045 ms (4.508 ms / 100) 4.508 -> 4.507 ( -0.02%) [ +0.07% +0.00% +0.02% / +0.07% -0.02% +0.02%] index_select random_sorted : Elapsed 0.045 ms (4.511 ms / 100) B = [4, 40, 16, 5] (stride (40, 1, 160, 2560)) A = [4, 20, 16, 5] (stride (20, 1, 400, 80)) dim = 1 2.409 -> 2.424 ( +0.62%) [ +0.00% +0.00% +0.08% / +0.62% +0.62% +0.62%] index_add_ linear : Elapsed 0.024 ms (2.409 ms / 100) 2.413 -> 2.424 ( +0.46%) [ +0.00% +0.04% +0.21% / +0.46% +0.62% +0.62%] index_copy_ linear : Elapsed 0.024 ms (2.413 ms / 100) 2.409 -> 2.424 ( +0.62%) [ +0.25% +0.21% +0.00% / +0.95% +0.62% +0.62%] index_add_ reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.413 -> 2.427 ( +0.58%) [ +0.00% +0.25% +0.08% / +1.20% +0.58% +0.66%] index_copy_ reverse : Elapsed 0.024 ms (2.413 ms / 100) 2.425 -> 2.435 ( +0.41%) [ +0.00% +0.00% +0.00% / +0.41% +0.41% +0.41%] index_add_ spread : Elapsed 0.024 ms (2.425 ms / 100) 2.432 -> 2.445 ( +0.53%) [ +0.21% +0.08% +0.00% / +0.62% +0.53% +0.53%] index_copy_ spread : Elapsed 0.024 ms (2.437 ms / 100) 2.420 -> 2.435 ( +0.62%) [ +0.00% +0.41% +0.12% / +0.70% +0.62% +0.79%] index_add_ strided 3 : Elapsed 0.024 ms (2.420 ms / 100) 2.430 -> 2.445 ( +0.62%) [ +0.25% +0.33% +0.00% / +0.66% +0.74% +0.62%] index_copy_ strided 3 : Elapsed 0.024 ms (2.436 ms / 100) 2.422 -> 2.434 ( +0.50%) [ +0.25% +0.25% +0.00% / +0.70% +0.62% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.432 -> 2.445 ( +0.53%) [ +0.00% +0.04% +0.12% / +0.66% +0.53% +0.74%] index_copy_ strided 7 : Elapsed 0.024 ms (2.432 ms / 100) 2.425 -> 2.437 ( +0.49%) [ +0.08% +0.00% +0.33% / +0.49% +0.82% +0.49%] index_add_ perm : Elapsed 0.024 ms (2.427 ms / 100) 2.431 -> 2.445 ( +0.58%) [ +0.12% +0.12% +0.00% / +0.74% +0.74% +0.58%] index_copy_ perm : Elapsed 0.024 ms (2.434 ms / 100) 2.423 -> 2.434 ( +0.45%) [ +0.08% +0.17% +0.00% / +0.66% +0.66% +0.45%] index_add_ perm_sorted : Elapsed 0.024 ms (2.425 ms / 100) 2.428 -> 2.450 ( +0.91%) [ +0.00% +0.21% +0.16% / +0.99% +0.91% +0.95%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.428 ms / 100) 4.439 -> 4.441 ( +0.05%) [ +0.09% +0.14% +0.00% / +0.05% +0.05% +0.18%] index_select const : Elapsed 0.044 ms (4.443 ms / 100) 4.440 -> 4.447 ( +0.16%) [ +0.16% +0.25% +0.00% / +0.32% +0.16% +0.18%] index_select wrap : Elapsed 0.044 ms (4.447 ms / 100) 4.445 -> 4.448 ( +0.07%) [ +0.00% +0.09% +0.16% / +0.22% +0.09% +0.07%] index_select linear : Elapsed 0.044 ms (4.445 ms / 100) 4.445 -> 4.450 ( +0.11%) [ +0.00% +0.02% +0.07% / +0.11% +0.11% +0.18%] index_select reverse : Elapsed 0.044 ms (4.445 ms / 100) 4.443 -> 4.441 ( -0.05%) [ +0.16% +0.00% +0.07% / +0.29% -0.05% +0.09%] index_select skip64 : Elapsed 0.045 ms (4.450 ms / 100) 4.437 -> 4.439 ( +0.05%) [ +0.16% +0.05% +0.00% / +0.16% +0.05% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.444 ms / 100) 4.443 -> 4.449 ( +0.14%) [ +0.09% +0.00% +0.07% / +0.18% +0.27% +0.14%] index_select spread : Elapsed 0.044 ms (4.447 ms / 100) 4.447 -> 4.445 ( -0.04%) [ +0.02% +0.00% +0.02% / +0.20% -0.04% +0.09%] index_select strided 3 : Elapsed 0.044 ms (4.448 ms / 100) 4.449 -> 4.448 ( -0.02%) [ +0.00% +0.07% +0.02% / -0.02% +0.11% +0.02%] index_select strided 5 : Elapsed 0.044 ms (4.449 ms / 100) 4.447 -> 4.447 ( +0.00%) [ +0.02% +0.00% +0.02% / +0.13% +0.20% +0.00%] index_select strided 7 : Elapsed 0.044 ms (4.448 ms / 100) 4.443 -> 4.445 ( +0.05%) [ +0.20% +0.00% +0.18% / +0.11% +0.05% +0.18%] index_select strided 8 : Elapsed 0.045 ms (4.452 ms / 100) 4.443 -> 4.447 ( +0.09%) [ +0.16% +0.00% +0.00% / +0.20% +0.14% +0.09%] index_select strided 16 : Elapsed 0.045 ms (4.450 ms / 100) 4.450 -> 4.449 ( -0.02%) [ +0.04% +0.04% +0.00% / -0.02% +0.16% +0.04%] index_select random : Elapsed 0.045 ms (4.452 ms / 100) 4.442 -> 4.445 ( +0.07%) [ +0.00% +0.11% +0.16% / +0.16% +0.07% +0.32%] index_select random_sorted : Elapsed 0.044 ms (4.442 ms / 100) out_shape = [4, 20, 40, 5] in_shape = [4, 20, 16, 5] idx_dim = 2 B = [4, 20, 40, 5] (stride (4000, 1, 100, 20)) dim = 2 fill_cnt = 16 2.064 -> 2.078 ( +0.68%) [ +0.05% +0.00% +0.05% / +0.68% +0.92% +0.92%] index_fill_ const : Elapsed 0.021 ms (2.065 ms / 100) 2.064 -> 2.072 ( +0.39%) [ +0.05% +0.00% +0.00% / +0.39% +0.97% +0.97%] index_fill_ linear : Elapsed 0.021 ms (2.065 ms / 100) 2.064 -> 2.069 ( +0.24%) [ +0.00% +0.00% +0.05% / +0.24% +1.16% +1.07%] index_fill_ reverse : Elapsed 0.021 ms (2.064 ms / 100) 2.064 -> 2.072 ( +0.39%) [ +0.00% +0.00% +0.00% / +0.39% +1.07% +1.07%] index_fill_ skip64 : Elapsed 0.021 ms (2.064 ms / 100) 2.064 -> 2.069 ( +0.24%) [ +0.00% +0.05% +0.05% / +0.24% +0.92% +0.92%] index_fill_ skip256 : Elapsed 0.021 ms (2.064 ms / 100) 2.064 -> 2.069 ( +0.24%) [ +0.05% +0.10% +0.00% / +0.24% +0.87% +0.87%] index_fill_ spread : Elapsed 0.021 ms (2.065 ms / 100) 2.066 -> 2.069 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.77% +0.77%] index_fill_ strided 3 : Elapsed 0.021 ms (2.066 ms / 100) 2.065 -> 2.073 ( +0.39%) [ +0.00% +0.05% +0.00% / +0.39% +0.77% +0.82%] index_fill_ strided 5 : Elapsed 0.021 ms (2.065 ms / 100) 2.066 -> 2.072 ( +0.29%) [ +0.15% +0.00% +0.15% / +0.29% +0.68% +0.63%] index_fill_ strided 7 : Elapsed 0.021 ms (2.069 ms / 100) 2.065 -> 2.067 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.73% +0.73%] index_fill_ strided 8 : Elapsed 0.021 ms (2.066 ms / 100) 2.067 -> 2.068 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.05% +0.58% +0.58%] index_fill_ strided 16 : Elapsed 0.021 ms (2.069 ms / 100) 2.068 -> 2.070 ( +0.10%) [ +0.00% +0.05% +0.00% / +0.10% +0.53% +0.53%] index_fill_ random : Elapsed 0.021 ms (2.068 ms / 100) 2.066 -> 2.067 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.77% +0.73%] index_fill_ random_sorted : Elapsed 0.021 ms (2.066 ms / 100) 2.065 -> 2.071 ( +0.29%) [ +0.05% +0.05% +0.00% / +0.29% +0.77% +0.82%] index_fill_ perm : Elapsed 0.021 ms (2.066 ms / 100) 2.064 -> 2.078 ( +0.68%) [ +0.05% +0.00% +0.00% / +0.68% +1.02% +0.92%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.065 ms / 100) B = [4, 20, 40, 5] (stride (4000, 1, 100, 20)) A = [4, 20, 16, 5] (stride (1, 320, 4, 64)) dim = 2 4.430 -> 4.437 ( +0.16%) [ +0.00% +0.11% +0.16% / +0.16% +0.54% +0.59%] index_add_ linear : Elapsed 0.044 ms (4.430 ms / 100) 4.276 -> 4.291 ( +0.35%) [ +0.00% +0.19% +0.14% / +0.35% +0.54% +0.77%] index_copy_ linear : Elapsed 0.043 ms (4.276 ms / 100) 4.436 -> 4.440 ( +0.09%) [ +0.00% +0.00% +0.02% / +0.09% +0.43% +0.47%] index_add_ reverse : Elapsed 0.044 ms (4.436 ms / 100) 4.278 -> 4.292 ( +0.33%) [ +0.02% +0.00% +0.09% / +0.33% +0.47% +0.77%] index_copy_ reverse : Elapsed 0.043 ms (4.279 ms / 100) 4.435 -> 4.444 ( +0.20%) [ +0.07% +0.00% +0.05% / +0.20% +0.59% +0.54%] index_add_ spread : Elapsed 0.044 ms (4.438 ms / 100) 4.263 -> 4.268 ( +0.12%) [ +0.07% +0.09% +0.00% / +0.12% +0.40% +0.52%] index_copy_ spread : Elapsed 0.043 ms (4.266 ms / 100) 4.431 -> 4.441 ( +0.23%) [ +0.02% +0.11% +0.00% / +0.23% +0.50% +0.43%] index_add_ strided 3 : Elapsed 0.044 ms (4.432 ms / 100) 4.254 -> 4.272 ( +0.42%) [ +0.00% +0.05% +0.12% / +0.42% +0.49% +0.49%] index_copy_ strided 3 : Elapsed 0.043 ms (4.254 ms / 100) 4.437 -> 4.439 ( +0.05%) [ +0.05% +0.00% +0.09% / +0.05% +0.50% +0.45%] index_add_ strided 7 : Elapsed 0.044 ms (4.439 ms / 100) 4.284 -> 4.293 ( +0.21%) [ +0.02% +0.02% +0.00% / +0.21% +0.44% +0.49%] index_copy_ strided 7 : Elapsed 0.043 ms (4.285 ms / 100) 4.437 -> 4.437 ( +0.00%) [ +0.09% +0.07% +0.00% / +0.00% +0.41% +0.38%] index_add_ perm : Elapsed 0.044 ms (4.441 ms / 100) 4.283 -> 4.277 ( -0.14%) [ +0.00% +0.05% +0.05% / -0.14% +0.70% +0.58%] index_copy_ perm : Elapsed 0.043 ms (4.283 ms / 100) 4.434 -> 4.441 ( +0.16%) [ +0.14% +0.14% +0.00% / +0.16% +0.45% +0.45%] index_add_ perm_sorted : Elapsed 0.044 ms (4.440 ms / 100) 4.280 -> 4.291 ( +0.26%) [ +0.16% +0.09% +0.00% / +0.26% +0.49% +0.70%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.287 ms / 100) 5.563 -> 5.553 ( -0.18%) [ +0.00% +0.07% +0.07% / -0.18% +0.14% +0.02%] index_select const : Elapsed 0.056 ms (5.563 ms / 100) 5.557 -> 5.565 ( +0.14%) [ +0.23% +0.05% +0.00% / +0.14% +0.31% +0.20%] index_select wrap : Elapsed 0.056 ms (5.570 ms / 100) 5.563 -> 5.567 ( +0.07%) [ +0.11% +0.00% +0.05% / +0.07% +0.23% +0.09%] index_select linear : Elapsed 0.056 ms (5.569 ms / 100) 5.567 -> 5.563 ( -0.07%) [ +0.04% +0.02% +0.00% / -0.04% -0.07% +0.04%] index_select reverse : Elapsed 0.056 ms (5.569 ms / 100) 5.567 -> 5.560 ( -0.13%) [ +0.04% +0.00% +0.05% / -0.04% -0.13% +0.00%] index_select skip64 : Elapsed 0.056 ms (5.569 ms / 100) 5.563 -> 5.563 ( +0.00%) [ +0.11% +0.09% +0.00% / +0.07% +0.00% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.569 ms / 100) 5.568 -> 5.558 ( -0.18%) [ +0.05% +0.02% +0.00% / +0.11% -0.04% -0.18%] index_select spread : Elapsed 0.056 ms (5.571 ms / 100) 5.559 -> 5.565 ( +0.11%) [ +0.05% +0.00% +0.27% / +0.18% +0.11% +0.14%] index_select strided 3 : Elapsed 0.056 ms (5.562 ms / 100) 5.562 -> 5.567 ( +0.09%) [ +0.00% +0.07% +0.02% / +0.14% +0.20% +0.09%] index_select strided 5 : Elapsed 0.056 ms (5.562 ms / 100) 5.563 -> 5.562 ( -0.02%) [ +0.00% +0.07% +0.00% / -0.02% +0.18% +0.07%] index_select strided 7 : Elapsed 0.056 ms (5.563 ms / 100) 5.558 -> 5.555 ( -0.05%) [ +0.09% +0.00% +0.04% / -0.05% -0.02% +0.07%] index_select strided 8 : Elapsed 0.056 ms (5.563 ms / 100) 5.565 -> 5.564 ( -0.02%) [ +0.09% +0.02% +0.00% / +0.11% +0.04% -0.02%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.16% +0.05% +0.00% / +0.05% +0.09% +0.04%] index_select random_sorted : Elapsed 0.056 ms (5.571 ms / 100) B = [4, 20, 40, 5] (stride (4000, 1, 100, 20)) A = [4, 20, 16, 5] (stride (20, 1, 80, 1280)) dim = 2 3.727 -> 3.730 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.78% +0.67%] index_add_ linear : Elapsed 0.037 ms (3.727 ms / 100) 3.598 -> 3.598 ( +0.00%) [ +0.08% +0.14% +0.00% / +0.00% +0.81% +0.81%] index_copy_ linear : Elapsed 0.036 ms (3.601 ms / 100) 3.694 -> 3.696 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.79% +0.68%] index_add_ reverse : Elapsed 0.037 ms (3.696 ms / 100) 3.571 -> 3.576 ( +0.14%) [ +0.00% +0.06% +0.08% / +0.14% +0.78% +0.73%] index_copy_ reverse : Elapsed 0.036 ms (3.571 ms / 100) 3.710 -> 3.715 ( +0.13%) [ +0.00% +0.03% +0.11% / +0.13% +0.84% +0.86%] index_add_ spread : Elapsed 0.037 ms (3.710 ms / 100) 3.586 -> 3.590 ( +0.11%) [ +0.00% +0.00% +0.03% / +0.11% +0.95% +0.95%] index_copy_ spread : Elapsed 0.036 ms (3.586 ms / 100) 3.707 -> 3.710 ( +0.08%) [ +0.11% +0.05% +0.00% / +0.08% +0.84% +0.86%] index_add_ strided 3 : Elapsed 0.037 ms (3.711 ms / 100) 3.574 -> 3.581 ( +0.20%) [ +0.14% +0.03% +0.00% / +0.20% +0.87% +0.87%] index_copy_ strided 3 : Elapsed 0.036 ms (3.579 ms / 100) 3.694 -> 3.693 ( -0.03%) [ +0.00% +0.03% +0.08% / -0.03% +0.81% +0.73%] index_add_ strided 7 : Elapsed 0.037 ms (3.694 ms / 100) 3.570 -> 3.576 ( +0.17%) [ +0.00% +0.06% +0.08% / +0.17% +0.81% +0.76%] index_copy_ strided 7 : Elapsed 0.036 ms (3.570 ms / 100) 3.729 -> 3.728 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.72% +0.59%] index_add_ perm : Elapsed 0.037 ms (3.730 ms / 100) 3.601 -> 3.609 ( +0.22%) [ +0.06% +0.00% +0.00% / +0.22% +0.78% +0.78%] index_copy_ perm : Elapsed 0.036 ms (3.603 ms / 100) 3.712 -> 3.711 ( -0.03%) [ +0.03% +0.05% +0.00% / -0.03% +0.67% +0.67%] index_add_ perm_sorted : Elapsed 0.037 ms (3.713 ms / 100) 3.576 -> 3.577 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.73% +0.78%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.578 ms / 100) 5.484 -> 5.476 ( -0.15%) [ +0.04% +0.04% +0.00% / -0.13% -0.15% +0.02%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.496 -> 5.493 ( -0.05%) [ +0.13% +0.07% +0.00% / -0.05% -0.04% +0.09%] index_select wrap : Elapsed 0.055 ms (5.503 ms / 100) 5.500 -> 5.485 ( -0.27%) [ +0.00% +0.15% +0.04% / -0.04% -0.27% -0.07%] index_select linear : Elapsed 0.055 ms (5.500 ms / 100) 5.497 -> 5.485 ( -0.22%) [ +0.00% +0.09% +0.00% / +0.13% -0.07% -0.22%] index_select reverse : Elapsed 0.055 ms (5.497 ms / 100) 5.478 -> 5.479 ( +0.02%) [ +0.13% +0.00% +0.02% / +0.02% +0.24% +0.20%] index_select skip64 : Elapsed 0.055 ms (5.485 ms / 100) 5.473 -> 5.481 ( +0.15%) [ +0.16% +0.09% +0.00% / +0.15% +0.24% +0.24%] index_select skip256 : Elapsed 0.055 ms (5.482 ms / 100) 5.491 -> 5.486 ( -0.09%) [ +0.07% +0.00% +0.04% / +0.13% -0.09% +0.04%] index_select spread : Elapsed 0.055 ms (5.495 ms / 100) 5.489 -> 5.493 ( +0.07%) [ +0.00% +0.20% +0.04% / +0.07% +0.07% +0.13%] index_select strided 3 : Elapsed 0.055 ms (5.489 ms / 100) 5.492 -> 5.491 ( -0.02%) [ +0.04% +0.07% +0.00% / +0.05% -0.02% -0.02%] index_select strided 5 : Elapsed 0.055 ms (5.494 ms / 100) 5.493 -> 5.495 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.05% +0.04% +0.13%] index_select strided 7 : Elapsed 0.055 ms (5.495 ms / 100) 5.479 -> 5.482 ( +0.05%) [ +0.05% +0.00% +0.04% / +0.05% +0.16% +0.35%] index_select strided 8 : Elapsed 0.055 ms (5.482 ms / 100) 5.494 -> 5.489 ( -0.09%) [ +0.00% +0.11% +0.07% / +0.07% -0.09% +0.00%] index_select random : Elapsed 0.055 ms (5.494 ms / 100) 5.498 -> 5.493 ( -0.09%) [ +0.00% +0.02% +0.05% / +0.00% -0.09% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.498 ms / 100) B = [4, 20, 40, 5] (stride (5, 800, 20, 1)) A = [4, 20, 16, 5] (stride (80, 320, 1, 16)) dim = 2 4.272 -> 4.279 ( +0.16%) [ +0.12% +0.09% +0.00% / +0.16% +0.66% +0.87%] index_add_ linear : Elapsed 0.043 ms (4.277 ms / 100) 4.119 -> 4.125 ( +0.15%) [ +0.02% +0.07% +0.00% / +0.15% +0.66% +0.66%] index_copy_ linear : Elapsed 0.041 ms (4.120 ms / 100) 4.262 -> 4.273 ( +0.26%) [ +0.12% +0.00% +0.21% / +0.26% +0.66% +0.70%] index_add_ reverse : Elapsed 0.043 ms (4.267 ms / 100) 4.112 -> 4.126 ( +0.34%) [ +0.00% +0.00% +0.12% / +0.34% +0.51% +0.63%] index_copy_ reverse : Elapsed 0.041 ms (4.112 ms / 100) 4.263 -> 4.266 ( +0.07%) [ +0.05% +0.07% +0.00% / +0.07% +0.66% +0.68%] index_add_ spread : Elapsed 0.043 ms (4.265 ms / 100) 4.117 -> 4.114 ( -0.07%) [ +0.02% +0.00% +0.02% / -0.07% +0.49% +0.51%] index_copy_ spread : Elapsed 0.041 ms (4.118 ms / 100) 4.265 -> 4.264 ( -0.02%) [ +0.09% +0.02% +0.00% / -0.02% +0.63% +0.52%] index_add_ strided 3 : Elapsed 0.043 ms (4.269 ms / 100) 4.116 -> 4.129 ( +0.32%) [ +0.00% +0.10% +0.02% / +0.32% +0.58% +0.53%] index_copy_ strided 3 : Elapsed 0.041 ms (4.116 ms / 100) 4.268 -> 4.280 ( +0.28%) [ +0.00% +0.00% +0.07% / +0.28% +0.52% +0.52%] index_add_ strided 7 : Elapsed 0.043 ms (4.268 ms / 100) 4.118 -> 4.132 ( +0.34%) [ +0.00% +0.02% +0.02% / +0.58% +0.34% +0.41%] index_copy_ strided 7 : Elapsed 0.041 ms (4.118 ms / 100) 4.277 -> 4.279 ( +0.05%) [ +0.07% +0.00% +0.00% / +0.05% +0.44% +0.51%] index_add_ perm : Elapsed 0.043 ms (4.280 ms / 100) 4.121 -> 4.131 ( +0.24%) [ +0.10% +0.00% +0.00% / +0.24% +0.51% +0.53%] index_copy_ perm : Elapsed 0.041 ms (4.125 ms / 100) 4.277 -> 4.283 ( +0.14%) [ +0.05% +0.12% +0.00% / +0.14% +0.65% +0.47%] index_add_ perm_sorted : Elapsed 0.043 ms (4.279 ms / 100) 4.118 -> 4.130 ( +0.29%) [ +0.07% +0.22% +0.00% / +0.29% +0.58% +0.58%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.121 ms / 100) 5.555 -> 5.551 ( -0.07%) [ +0.09% +0.05% +0.00% / -0.07% +0.07% +0.11%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.556 -> 5.558 ( +0.04%) [ +0.02% +0.13% +0.00% / +0.04% +0.25% +0.18%] index_select wrap : Elapsed 0.056 ms (5.557 ms / 100) 5.553 -> 5.560 ( +0.13%) [ +0.11% +0.14% +0.00% / +0.13% +0.23% +0.23%] index_select linear : Elapsed 0.056 ms (5.559 ms / 100) 5.555 -> 5.555 ( +0.00%) [ +0.00% +0.18% +0.04% / +0.00% +0.18% +0.18%] index_select reverse : Elapsed 0.056 ms (5.555 ms / 100) 5.556 -> 5.554 ( -0.04%) [ +0.00% +0.16% +0.05% / +0.02% -0.02% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.556 ms / 100) 5.559 -> 5.558 ( -0.02%) [ +0.00% +0.05% +0.14% / +0.07% -0.02% +0.05%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.555 -> 5.556 ( +0.02%) [ +0.00% +0.07% +0.20% / +0.04% +0.02% +0.05%] index_select spread : Elapsed 0.056 ms (5.555 ms / 100) 5.552 -> 5.558 ( +0.11%) [ +0.14% +0.00% +0.18% / +0.11% +0.34% +0.13%] index_select strided 3 : Elapsed 0.056 ms (5.560 ms / 100) 5.555 -> 5.558 ( +0.05%) [ +0.00% +0.23% +0.13% / +0.05% +0.23% +0.05%] index_select strided 5 : Elapsed 0.056 ms (5.555 ms / 100) 5.556 -> 5.558 ( +0.04%) [ +0.00% +0.02% +0.11% / +0.04% +0.14% +0.13%] index_select strided 7 : Elapsed 0.056 ms (5.556 ms / 100) 5.555 -> 5.552 ( -0.05%) [ +0.04% +0.05% +0.00% / -0.05% +0.27% +0.18%] index_select strided 8 : Elapsed 0.056 ms (5.557 ms / 100) 5.555 -> 5.559 ( +0.07%) [ +0.04% +0.00% +0.22% / +0.32% +0.07% +0.13%] index_select random : Elapsed 0.056 ms (5.557 ms / 100) 5.554 -> 5.557 ( +0.05%) [ +0.09% +0.05% +0.00% / +0.16% +0.05% +0.11%] index_select random_sorted : Elapsed 0.056 ms (5.559 ms / 100) B = [4, 20, 40, 5] (stride (100, 5, 400, 1)) A = [4, 20, 16, 5] (stride (16, 320, 1, 64)) dim = 2 4.100 -> 4.106 ( +0.15%) [ +0.07% +0.02% +0.00% / +0.15% +0.76% +0.73%] index_add_ linear : Elapsed 0.041 ms (4.103 ms / 100) 3.923 -> 3.929 ( +0.15%) [ +0.03% +0.00% +0.00% / +0.15% +0.74% +0.71%] index_copy_ linear : Elapsed 0.039 ms (3.924 ms / 100) 4.059 -> 4.083 ( +0.59%) [ +0.47% +0.30% +0.00% / +0.59% +0.94% +1.11%] index_add_ reverse : Elapsed 0.041 ms (4.078 ms / 100) 3.896 -> 3.916 ( +0.51%) [ +0.21% +0.05% +0.00% / +0.51% +0.69% +0.92%] index_copy_ reverse : Elapsed 0.039 ms (3.904 ms / 100) 4.117 -> 4.126 ( +0.22%) [ +0.02% +0.05% +0.00% / +0.22% +0.61% +0.83%] index_add_ spread : Elapsed 0.041 ms (4.118 ms / 100) 3.942 -> 3.950 ( +0.20%) [ +0.00% +0.13% +0.05% / +0.20% +0.81% +0.71%] index_copy_ spread : Elapsed 0.039 ms (3.942 ms / 100) 4.087 -> 4.086 ( -0.02%) [ +0.29% +0.00% +0.22% / -0.02% +0.91% +0.61%] index_add_ strided 3 : Elapsed 0.041 ms (4.099 ms / 100) 3.913 -> 3.919 ( +0.15%) [ +0.18% +0.00% +0.26% / +0.15% +0.95% +0.59%] index_copy_ strided 3 : Elapsed 0.039 ms (3.920 ms / 100) 4.058 -> 4.079 ( +0.52%) [ +0.30% +0.54% +0.00% / +0.52% +0.69% +1.13%] index_add_ strided 7 : Elapsed 0.041 ms (4.070 ms / 100) 3.895 -> 3.907 ( +0.31%) [ +0.08% +0.23% +0.00% / +0.31% +0.67% +0.95%] index_copy_ strided 7 : Elapsed 0.039 ms (3.898 ms / 100) 4.103 -> 4.103 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.63% +0.68%] index_add_ perm : Elapsed 0.041 ms (4.103 ms / 100) 3.923 -> 3.928 ( +0.13%) [ +0.00% +0.00% +0.05% / +0.13% +0.74% +0.74%] index_copy_ perm : Elapsed 0.039 ms (3.923 ms / 100) 4.084 -> 4.086 ( +0.05%) [ +0.00% +0.02% +0.29% / +0.05% +0.69% +1.00%] index_add_ perm_sorted : Elapsed 0.041 ms (4.084 ms / 100) 3.912 -> 3.922 ( +0.26%) [ +0.00% +0.15% +0.23% / +0.26% +0.61% +0.84%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.912 ms / 100) 5.471 -> 5.477 ( +0.11%) [ +0.26% +0.18% +0.00% / +0.26% +0.11% +0.15%] index_select const : Elapsed 0.055 ms (5.485 ms / 100) 5.480 -> 5.476 ( -0.07%) [ +0.04% +0.22% +0.00% / -0.07% -0.04% +0.00%] index_select wrap : Elapsed 0.055 ms (5.482 ms / 100) 5.481 -> 5.475 ( -0.11%) [ +0.05% +0.00% +0.02% / +0.11% -0.11% -0.04%] index_select linear : Elapsed 0.055 ms (5.484 ms / 100) 5.477 -> 5.481 ( +0.07%) [ +0.04% +0.15% +0.00% / +0.20% +0.07% +0.13%] index_select reverse : Elapsed 0.055 ms (5.479 ms / 100) 5.477 -> 5.472 ( -0.09%) [ +0.00% +0.02% +0.02% / +0.02% +0.04% -0.09%] index_select skip64 : Elapsed 0.055 ms (5.477 ms / 100) 5.475 -> 5.471 ( -0.07%) [ +0.11% +0.04% +0.00% / +0.02% +0.05% -0.07%] index_select skip256 : Elapsed 0.055 ms (5.481 ms / 100) 5.480 -> 5.476 ( -0.07%) [ +0.00% +0.13% +0.04% / +0.00% -0.02% -0.07%] index_select spread : Elapsed 0.055 ms (5.480 ms / 100) 5.483 -> 5.475 ( -0.15%) [ +0.02% +0.15% +0.00% / -0.15% -0.11% -0.05%] index_select strided 3 : Elapsed 0.055 ms (5.484 ms / 100) 5.476 -> 5.476 ( +0.00%) [ +0.22% +0.00% +0.11% / +0.07% +0.00% +0.04%] index_select strided 5 : Elapsed 0.055 ms (5.488 ms / 100) 5.477 -> 5.478 ( +0.02%) [ +0.15% +0.15% +0.00% / +0.07% +0.13% +0.02%] index_select strided 7 : Elapsed 0.055 ms (5.485 ms / 100) 5.475 -> 5.476 ( +0.02%) [ +0.13% +0.00% +0.15% / +0.02% +0.20% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.482 ms / 100) 5.474 -> 5.477 ( +0.05%) [ +0.05% +0.00% +0.18% / +0.05% +0.13% +0.11%] index_select random : Elapsed 0.055 ms (5.477 ms / 100) 5.478 -> 5.473 ( -0.09%) [ +0.18% +0.13% +0.00% / +0.00% -0.09% +0.11%] index_select random_sorted : Elapsed 0.055 ms (5.488 ms / 100) B = [4, 20, 40, 5] (stride (20, 1, 400, 80)) A = [4, 20, 16, 5] (stride (100, 1, 400, 20)) dim = 2 3.683 -> 3.689 ( +0.16%) [ +0.19% +0.00% +0.05% / +0.16% +0.71% +0.76%] index_add_ linear : Elapsed 0.037 ms (3.690 ms / 100) 3.549 -> 3.572 ( +0.65%) [ +0.11% +0.00% +0.08% / +0.85% +0.76% +0.65%] index_copy_ linear : Elapsed 0.036 ms (3.553 ms / 100) 3.687 -> 3.696 ( +0.24%) [ +0.19% +0.00% +0.16% / +0.24% +0.73% +0.65%] index_add_ reverse : Elapsed 0.037 ms (3.694 ms / 100) 3.549 -> 3.569 ( +0.56%) [ +0.25% +0.00% +0.20% / +0.56% +0.82% +0.65%] index_copy_ reverse : Elapsed 0.036 ms (3.558 ms / 100) 3.670 -> 3.675 ( +0.14%) [ +0.05% +0.00% +0.14% / +0.14% +0.49% +0.54%] index_add_ spread : Elapsed 0.037 ms (3.672 ms / 100) 3.538 -> 3.546 ( +0.23%) [ +0.14% +0.00% +0.08% / +0.23% +0.51% +0.54%] index_copy_ spread : Elapsed 0.035 ms (3.543 ms / 100) 3.678 -> 3.690 ( +0.33%) [ +0.05% +0.00% +0.30% / +0.33% +0.71% +0.57%] index_add_ strided 3 : Elapsed 0.037 ms (3.680 ms / 100) 3.546 -> 3.560 ( +0.39%) [ +0.03% +0.00% +0.25% / +0.39% +0.68% +0.51%] index_copy_ strided 3 : Elapsed 0.035 ms (3.547 ms / 100) 3.695 -> 3.703 ( +0.22%) [ +0.16% +0.00% +0.16% / +0.22% +0.65% +0.60%] index_add_ strided 7 : Elapsed 0.037 ms (3.701 ms / 100) 3.558 -> 3.565 ( +0.20%) [ +0.11% +0.00% +0.11% / +0.20% +0.56% +0.56%] index_copy_ strided 7 : Elapsed 0.036 ms (3.562 ms / 100) 3.690 -> 3.698 ( +0.22%) [ +0.08% +0.11% +0.00% / +0.22% +0.43% +0.57%] index_add_ perm : Elapsed 0.037 ms (3.693 ms / 100) 3.554 -> 3.559 ( +0.14%) [ +0.06% +0.06% +0.00% / +0.14% +0.48% +0.51%] index_copy_ perm : Elapsed 0.036 ms (3.556 ms / 100) 3.685 -> 3.688 ( +0.08%) [ +0.00% +0.24% +0.14% / +0.08% +0.52% +0.35%] index_add_ perm_sorted : Elapsed 0.037 ms (3.685 ms / 100) 3.550 -> 3.560 ( +0.28%) [ +0.00% +0.20% +0.08% / +0.28% +0.54% +0.45%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.550 ms / 100) 5.470 -> 5.471 ( +0.02%) [ +0.00% +0.00% +0.04% / +0.02% +0.13% +0.13%] index_select const : Elapsed 0.055 ms (5.470 ms / 100) 5.468 -> 5.471 ( +0.05%) [ +0.09% +0.00% +0.02% / +0.05% +0.20% +0.20%] index_select wrap : Elapsed 0.055 ms (5.473 ms / 100) 5.472 -> 5.475 ( +0.05%) [ +0.00% +0.04% +0.04% / +0.05% +0.07% +0.16%] index_select linear : Elapsed 0.055 ms (5.472 ms / 100) 5.477 -> 5.476 ( -0.02%) [ +0.07% +0.00% +0.05% / +0.05% +0.00% -0.02%] index_select reverse : Elapsed 0.055 ms (5.481 ms / 100) 5.472 -> 5.465 ( -0.13%) [ +0.00% +0.04% +0.07% / -0.13% +0.00% -0.07%] index_select skip64 : Elapsed 0.055 ms (5.472 ms / 100) 5.470 -> 5.467 ( -0.05%) [ +0.00% +0.02% +0.04% / -0.02% -0.04% -0.05%] index_select skip256 : Elapsed 0.055 ms (5.470 ms / 100) 5.470 -> 5.473 ( +0.05%) [ +0.04% +0.13% +0.00% / +0.05% +0.15% +0.09%] index_select spread : Elapsed 0.055 ms (5.472 ms / 100) 5.466 -> 5.464 ( -0.04%) [ +0.00% +0.09% +0.00% / -0.04% +0.11% +0.27%] index_select strided 3 : Elapsed 0.055 ms (5.466 ms / 100) 5.471 -> 5.474 ( +0.05%) [ +0.04% +0.13% +0.00% / +0.05% +0.15% +0.11%] index_select strided 5 : Elapsed 0.055 ms (5.473 ms / 100) 5.471 -> 5.471 ( +0.00%) [ +0.02% +0.00% +0.07% / +0.00% +0.18% +0.26%] index_select strided 7 : Elapsed 0.055 ms (5.472 ms / 100) 5.466 -> 5.469 ( +0.05%) [ +0.07% +0.09% +0.00% / +0.05% +0.20% +0.05%] index_select strided 8 : Elapsed 0.055 ms (5.470 ms / 100) 5.470 -> 5.468 ( -0.04%) [ +0.02% +0.00% +0.07% / -0.04% +0.07% +0.00%] index_select random : Elapsed 0.055 ms (5.471 ms / 100) 5.465 -> 5.469 ( +0.07%) [ +0.02% +0.00% +0.15% / +0.11% +0.07% +0.11%] index_select random_sorted : Elapsed 0.055 ms (5.466 ms / 100) B = [4, 20, 40, 5] (stride (20, 1, 400, 80)) A = [4, 20, 16, 5] (stride (320, 16, 1, 1280)) dim = 2 1.420 -> 1.421 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.49% +0.42%] index_add_ linear : Elapsed 0.014 ms (1.421 ms / 100) 1.387 -> 1.387 ( +0.00%) [ +0.00% +0.22% +0.07% / +0.00% +0.72% +0.79%] index_copy_ linear : Elapsed 0.014 ms (1.387 ms / 100) 1.409 -> 1.411 ( +0.14%) [ +0.00% +0.14% +0.14% / +0.14% +1.14% +0.92%] index_add_ reverse : Elapsed 0.014 ms (1.409 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.00% +0.22% +0.00% / +0.07% +1.16% +0.94%] index_copy_ reverse : Elapsed 0.014 ms (1.379 ms / 100) 1.414 -> 1.413 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.50% +0.57%] index_add_ spread : Elapsed 0.014 ms (1.415 ms / 100) 1.382 -> 1.380 ( -0.14%) [ +0.07% +0.00% +0.00% / -0.14% +0.65% +0.58%] index_copy_ spread : Elapsed 0.014 ms (1.383 ms / 100) 1.419 -> 1.420 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.85% +0.49%] index_add_ strided 3 : Elapsed 0.014 ms (1.420 ms / 100) 1.388 -> 1.390 ( +0.14%) [ +0.07% +0.00% +0.14% / +0.14% +1.15% +0.86%] index_copy_ strided 3 : Elapsed 0.014 ms (1.389 ms / 100) 1.409 -> 1.411 ( +0.14%) [ +0.28% +0.00% +0.00% / +0.14% +0.85% +1.21%] index_add_ strided 7 : Elapsed 0.014 ms (1.413 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.65% +1.16%] index_copy_ strided 7 : Elapsed 0.014 ms (1.380 ms / 100) 1.417 -> 1.418 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.92% +0.78%] index_add_ perm : Elapsed 0.014 ms (1.418 ms / 100) 1.386 -> 1.391 ( +0.36%) [ +0.00% +0.22% +0.00% / +0.36% +1.08% +0.87%] index_copy_ perm : Elapsed 0.014 ms (1.386 ms / 100) 1.416 -> 1.417 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.78% +1.06%] index_add_ perm_sorted : Elapsed 0.014 ms (1.417 ms / 100) 1.386 -> 1.386 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.87% +1.23%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.386 ms / 100) 3.537 -> 3.535 ( -0.06%) [ +0.03% +0.03% +0.00% / +0.00% -0.06% +0.00%] index_select const : Elapsed 0.035 ms (3.538 ms / 100) 3.536 -> 3.537 ( +0.03%) [ +0.20% +0.14% +0.00% / +0.03% +0.14% +0.23%] index_select wrap : Elapsed 0.035 ms (3.543 ms / 100) 3.537 -> 3.541 ( +0.11%) [ +0.08% +0.00% +0.11% / +0.11% +0.48% +0.51%] index_select linear : Elapsed 0.035 ms (3.540 ms / 100) 3.539 -> 3.540 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.34% +0.40%] index_select reverse : Elapsed 0.035 ms (3.540 ms / 100) 3.531 -> 3.533 ( +0.06%) [ +0.00% +0.14% +0.20% / +0.06% +0.08% +0.25%] index_select skip64 : Elapsed 0.035 ms (3.531 ms / 100) 3.532 -> 3.534 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.17% +0.31%] index_select skip256 : Elapsed 0.035 ms (3.532 ms / 100) 3.543 -> 3.542 ( -0.03%) [ +0.00% +0.23% +0.06% / +0.17% +0.17% -0.03%] index_select spread : Elapsed 0.035 ms (3.543 ms / 100) 3.555 -> 3.535 ( -0.56%) [ +0.28% +0.17% +0.00% / +0.20% -0.39% -0.56%] index_select strided 3 : Elapsed 0.036 ms (3.565 ms / 100) 3.558 -> 3.537 ( -0.59%) [ +0.00% +0.11% +0.06% / +0.06% -0.59% -0.51%] index_select strided 5 : Elapsed 0.036 ms (3.558 ms / 100) 3.537 -> 3.536 ( -0.03%) [ +0.00% +0.06% +0.06% / -0.03% +0.31% +0.20%] index_select strided 7 : Elapsed 0.035 ms (3.537 ms / 100) 3.530 -> 3.535 ( +0.14%) [ +0.00% +0.28% +0.28% / +0.14% +0.42% +0.54%] index_select strided 8 : Elapsed 0.035 ms (3.530 ms / 100) 3.534 -> 3.546 ( +0.34%) [ +0.00% +0.06% +0.14% / +0.34% +0.99% +0.88%] index_select random : Elapsed 0.035 ms (3.534 ms / 100) 3.533 -> 3.532 ( -0.03%) [ +0.00% +0.17% +0.06% / -0.03% +0.76% +0.91%] index_select random_sorted : Elapsed 0.035 ms (3.533 ms / 100) B = [4, 20, 40, 5] (stride (800, 40, 1, 3200)) A = [4, 20, 16, 5] (stride (1, 4, 80, 1280)) dim = 2 3.782 -> 3.782 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.69% +0.56%] index_add_ linear : Elapsed 0.038 ms (3.783 ms / 100) 3.643 -> 3.644 ( +0.03%) [ +0.00% +0.03% +0.14% / +0.03% +0.55% +0.58%] index_copy_ linear : Elapsed 0.036 ms (3.643 ms / 100) 3.778 -> 3.783 ( +0.13%) [ +0.16% +0.00% +0.03% / +0.13% +0.48% +0.48%] index_add_ reverse : Elapsed 0.038 ms (3.784 ms / 100) 3.636 -> 3.647 ( +0.30%) [ +0.11% +0.00% +0.06% / +0.30% +0.55% +0.61%] index_copy_ reverse : Elapsed 0.036 ms (3.640 ms / 100) 3.775 -> 3.782 ( +0.19%) [ +0.00% +0.05% +0.19% / +0.19% +0.58% +0.64%] index_add_ spread : Elapsed 0.038 ms (3.775 ms / 100) 3.637 -> 3.643 ( +0.16%) [ +0.00% +0.05% +0.16% / +0.16% +0.71% +0.71%] index_copy_ spread : Elapsed 0.036 ms (3.637 ms / 100) 3.768 -> 3.770 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.56% +0.53%] index_add_ strided 3 : Elapsed 0.038 ms (3.768 ms / 100) 3.627 -> 3.630 ( +0.08%) [ +0.00% +0.14% +0.08% / +0.08% +0.61% +0.55%] index_copy_ strided 3 : Elapsed 0.036 ms (3.627 ms / 100) 3.779 -> 3.774 ( -0.13%) [ +0.00% +0.03% +0.05% / -0.13% +0.50% +0.56%] index_add_ strided 7 : Elapsed 0.038 ms (3.779 ms / 100) 3.637 -> 3.631 ( -0.16%) [ +0.03% +0.00% +0.05% / -0.16% +0.63% +0.66%] index_copy_ strided 7 : Elapsed 0.036 ms (3.638 ms / 100) 3.783 -> 3.783 ( +0.00%) [ +0.00% +0.24% +0.03% / +0.00% +0.56% +0.50%] index_add_ perm : Elapsed 0.038 ms (3.783 ms / 100) 3.643 -> 3.644 ( +0.03%) [ +0.08% +0.22% +0.00% / +0.03% +0.58% +0.49%] index_copy_ perm : Elapsed 0.036 ms (3.646 ms / 100) 3.782 -> 3.782 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.58% +0.48%] index_add_ perm_sorted : Elapsed 0.038 ms (3.788 ms / 100) 3.644 -> 3.645 ( +0.03%) [ +0.11% +0.03% +0.00% / +0.03% +0.52% +0.41%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.648 ms / 100) 5.467 -> 5.471 ( +0.07%) [ +0.16% +0.00% +0.16% / +0.07% +0.31% +0.33%] index_select const : Elapsed 0.055 ms (5.476 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.16% +0.04% +0.00% / +0.04% +0.22% +0.11%] index_select wrap : Elapsed 0.055 ms (5.485 ms / 100) 5.481 -> 5.481 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.00% +0.02%] index_select linear : Elapsed 0.055 ms (5.482 ms / 100) 5.480 -> 5.479 ( -0.02%) [ +0.07% +0.07% +0.00% / +0.05% -0.02% +0.07%] index_select reverse : Elapsed 0.055 ms (5.484 ms / 100) 5.475 -> 5.472 ( -0.05%) [ +0.09% +0.00% +0.24% / +0.13% -0.05% -0.04%] index_select skip64 : Elapsed 0.055 ms (5.480 ms / 100) 5.476 -> 5.476 ( +0.00%) [ +0.20% +0.00% +0.11% / +0.20% +0.00% +0.07%] index_select skip256 : Elapsed 0.055 ms (5.487 ms / 100) 5.476 -> 5.477 ( +0.02%) [ +0.09% +0.00% +0.00% / +0.05% +0.22% +0.02%] index_select spread : Elapsed 0.055 ms (5.481 ms / 100) 5.478 -> 5.476 ( -0.04%) [ +0.00% +0.02% +0.07% / -0.04% +0.22% +0.04%] index_select strided 3 : Elapsed 0.055 ms (5.478 ms / 100) 5.470 -> 5.468 ( -0.04%) [ +0.16% +0.00% +0.09% / -0.04% +0.22% +0.27%] index_select strided 5 : Elapsed 0.055 ms (5.479 ms / 100) 5.473 -> 5.480 ( +0.13%) [ +0.15% +0.05% +0.00% / +0.13% +0.35% +0.27%] index_select strided 7 : Elapsed 0.055 ms (5.481 ms / 100) 5.470 -> 5.472 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.13% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.475 ms / 100) 5.479 -> 5.473 ( -0.11%) [ +0.13% +0.02% +0.00% / -0.11% +0.07% +0.00%] index_select random : Elapsed 0.055 ms (5.486 ms / 100) 5.484 -> 5.480 ( -0.07%) [ +0.00% +0.02% +0.04% / -0.07% -0.05% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.484 ms / 100) B = [4, 20, 40, 5] (stride (40, 160, 1, 3200)) A = [4, 20, 16, 5] (stride (5, 20, 400, 1)) dim = 2 4.002 -> 4.012 ( +0.25%) [ +0.20% +0.25% +0.00% / +0.25% +0.97% +0.82%] index_add_ linear : Elapsed 0.040 ms (4.010 ms / 100) 3.875 -> 3.881 ( +0.15%) [ +0.08% +0.05% +0.00% / +0.15% +0.90% +0.70%] index_copy_ linear : Elapsed 0.039 ms (3.878 ms / 100) 3.994 -> 3.995 ( +0.03%) [ +0.05% +0.10% +0.00% / +0.03% +0.73% +0.78%] index_add_ reverse : Elapsed 0.040 ms (3.996 ms / 100) 3.863 -> 3.865 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.91% +0.85%] index_copy_ reverse : Elapsed 0.039 ms (3.865 ms / 100) 4.001 -> 4.003 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.80% +0.80%] index_add_ spread : Elapsed 0.040 ms (4.002 ms / 100) 3.872 -> 3.878 ( +0.15%) [ +0.00% +0.08% +0.10% / +0.15% +1.16% +0.88%] index_copy_ spread : Elapsed 0.039 ms (3.872 ms / 100) 3.998 -> 3.999 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.78% +0.75%] index_add_ strided 3 : Elapsed 0.040 ms (3.999 ms / 100) 3.863 -> 3.867 ( +0.10%) [ +0.03% +0.03% +0.00% / +0.10% +0.88% +0.78%] index_copy_ strided 3 : Elapsed 0.039 ms (3.864 ms / 100) 3.994 -> 3.996 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.75% +0.73%] index_add_ strided 7 : Elapsed 0.040 ms (3.995 ms / 100) 3.862 -> 3.875 ( +0.34%) [ +0.05% +0.05% +0.00% / +0.34% +0.73% +0.83%] index_copy_ strided 7 : Elapsed 0.039 ms (3.864 ms / 100) 4.004 -> 4.013 ( +0.22%) [ +0.20% +0.00% +0.17% / +0.22% +0.95% +0.90%] index_add_ perm : Elapsed 0.040 ms (4.012 ms / 100) 3.875 -> 3.878 ( +0.08%) [ +0.03% +0.00% +0.00% / +0.08% +1.16% +0.93%] index_copy_ perm : Elapsed 0.039 ms (3.876 ms / 100) 4.001 -> 4.001 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.75% +0.72%] index_add_ perm_sorted : Elapsed 0.040 ms (4.001 ms / 100) 3.863 -> 3.875 ( +0.31%) [ +0.00% +0.08% +0.05% / +0.31% +0.83% +0.80%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.863 ms / 100) 5.565 -> 5.561 ( -0.07%) [ +0.02% +0.05% +0.00% / -0.07% -0.05% +0.07%] index_select const : Elapsed 0.056 ms (5.566 ms / 100) 5.570 -> 5.567 ( -0.05%) [ +0.14% +0.00% +0.04% / +0.04% -0.04% -0.05%] index_select wrap : Elapsed 0.056 ms (5.578 ms / 100) 5.567 -> 5.573 ( +0.11%) [ +0.16% +0.00% +0.14% / +0.20% +0.13% +0.11%] index_select linear : Elapsed 0.056 ms (5.576 ms / 100) 5.563 -> 5.573 ( +0.18%) [ +0.09% +0.00% +0.14% / +0.18% +0.20% +0.22%] index_select reverse : Elapsed 0.056 ms (5.568 ms / 100) 5.559 -> 5.570 ( +0.20%) [ +0.00% +0.09% +0.11% / +0.20% +0.25% +0.22%] index_select skip64 : Elapsed 0.056 ms (5.559 ms / 100) 5.559 -> 5.559 ( +0.00%) [ +0.00% +0.14% +0.05% / +0.00% +0.13% +0.07%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.14% +0.00% +0.18% / +0.09% -0.04% +0.00%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.00% +0.11% +0.00% / +0.05% +0.07% -0.04%] index_select strided 3 : Elapsed 0.056 ms (5.569 ms / 100) 5.572 -> 5.570 ( -0.04%) [ +0.05% +0.04% +0.00% / +0.13% -0.04% +0.07%] index_select strided 5 : Elapsed 0.056 ms (5.575 ms / 100) 5.570 -> 5.570 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.02% +0.00% +0.27%] index_select strided 7 : Elapsed 0.056 ms (5.570 ms / 100) 5.555 -> 5.563 ( +0.14%) [ +0.18% +0.00% +0.22% / +0.14% +0.25% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.565 ms / 100) 5.573 -> 5.563 ( -0.18%) [ +0.00% +0.11% +0.02% / -0.04% -0.18% -0.07%] index_select random : Elapsed 0.056 ms (5.573 ms / 100) 5.571 -> 5.568 ( -0.05%) [ +0.02% +0.00% +0.07% / +0.05% +0.02% -0.05%] index_select random_sorted : Elapsed 0.056 ms (5.572 ms / 100) out_shape = [4, 20, 16, 40] in_shape = [4, 20, 16, 5] idx_dim = 3 B = [4, 20, 16, 40] (stride (12800, 40, 800, 1)) A = [4, 20, 16, 5] (stride (1600, 80, 5, 1)) dim = 3 1.321 -> 1.323 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.45% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.321 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.78% +0.71%] index_copy_ linear : Elapsed 0.013 ms (1.278 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.45% +0.45%] index_add_ reverse : Elapsed 0.013 ms (1.322 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.53% +0.45%] index_add_ spread : Elapsed 0.013 ms (1.322 ms / 100) 1.278 -> 1.277 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.55% +0.55%] index_copy_ spread : Elapsed 0.013 ms (1.279 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.68% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.322 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.63% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.277 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.53% +0.61%] index_add_ strided 7 : Elapsed 0.013 ms (1.322 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.55% +0.63%] index_copy_ strided 7 : Elapsed 0.013 ms (1.277 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.61% +0.61%] index_add_ perm : Elapsed 0.013 ms (1.321 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.55% +0.63%] index_copy_ perm : Elapsed 0.013 ms (1.276 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.61% +0.53%] index_add_ perm_sorted : Elapsed 0.013 ms (1.321 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.94% +0.63%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.277 ms / 100) 7.928 -> 7.927 ( -0.01%) [ +0.25% +0.01% +0.00% / -0.01% +0.47% +0.40%] index_select const : Elapsed 0.079 ms (7.948 ms / 100) 7.927 -> 7.929 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.39% +0.37%] index_select wrap : Elapsed 0.079 ms (7.927 ms / 100) 7.936 -> 7.935 ( -0.01%) [ +0.00% +0.01% +0.23% / -0.01% +0.33% +0.30%] index_select linear : Elapsed 0.079 ms (7.936 ms / 100) 7.927 -> 7.939 ( +0.15%) [ +0.00% +0.30% +0.25% / +0.15% +0.72% +0.29%] index_select reverse : Elapsed 0.079 ms (7.927 ms / 100) 7.930 -> 7.955 ( +0.32%) [ +0.18% +0.15% +0.00% / +0.32% +0.38% +0.43%] index_select skip64 : Elapsed 0.079 ms (7.944 ms / 100) 7.928 -> 7.939 ( +0.14%) [ +0.00% +0.20% +0.00% / +0.14% +0.57% +0.58%] index_select skip256 : Elapsed 0.079 ms (7.928 ms / 100) 7.931 -> 7.924 ( -0.09%) [ +0.08% +0.20% +0.00% / -0.09% +0.33% +0.35%] index_select spread : Elapsed 0.079 ms (7.937 ms / 100) 7.936 -> 7.939 ( +0.04%) [ +0.16% +0.13% +0.00% / +0.04% +0.33% +0.50%] index_select strided 3 : Elapsed 0.079 ms (7.949 ms / 100) 7.932 -> 7.941 ( +0.11%) [ +0.05% +0.16% +0.00% / +0.11% +0.35% +0.33%] index_select random : Elapsed 0.079 ms (7.936 ms / 100) 7.930 -> 7.941 ( +0.14%) [ +0.06% +0.00% +0.15% / +0.14% +0.33% +0.30%] index_select random_sorted : Elapsed 0.079 ms (7.935 ms / 100) B = [4, 20, 16, 40] (stride (1, 4, 80, 1280)) A = [4, 20, 16, 5] (stride (16, 64, 1, 1280)) dim = 3 1.516 -> 1.517 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.66%] index_add_ linear : Elapsed 0.015 ms (1.517 ms / 100) 1.478 -> 1.480 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.61% +0.54%] index_copy_ linear : Elapsed 0.015 ms (1.479 ms / 100) 1.516 -> 1.517 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.79% +0.79%] index_add_ reverse : Elapsed 0.015 ms (1.518 ms / 100) 1.471 -> 1.473 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.61% +0.61%] index_copy_ reverse : Elapsed 0.015 ms (1.472 ms / 100) 1.515 -> 1.517 ( +0.13%) [ +0.20% +0.20% +0.00% / +0.13% +0.73% +0.79%] index_add_ spread : Elapsed 0.015 ms (1.518 ms / 100) 1.471 -> 1.472 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.68% +0.61%] index_copy_ spread : Elapsed 0.015 ms (1.472 ms / 100) 1.516 -> 1.517 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.66% +0.66%] index_add_ strided 3 : Elapsed 0.015 ms (1.517 ms / 100) 1.470 -> 1.472 ( +0.14%) [ +0.00% +0.07% +0.07% / +0.14% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.015 ms (1.470 ms / 100) 1.516 -> 1.517 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.66%] index_add_ strided 7 : Elapsed 0.015 ms (1.517 ms / 100) 1.471 -> 1.472 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.61% +0.54%] index_copy_ strided 7 : Elapsed 0.015 ms (1.472 ms / 100) 1.516 -> 1.517 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.66% +0.73%] index_add_ perm : Elapsed 0.015 ms (1.516 ms / 100) 1.470 -> 1.472 ( +0.14%) [ +0.00% +0.14% +0.07% / +0.14% +0.68% +0.68%] index_copy_ perm : Elapsed 0.015 ms (1.470 ms / 100) 1.515 -> 1.517 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.79% +0.79%] index_add_ perm_sorted : Elapsed 0.015 ms (1.517 ms / 100) 1.471 -> 1.471 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.68% +0.75%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.471 ms / 100) 8.513 -> 8.529 ( +0.19%) [ +0.11% +0.00% +0.06% / +0.19% +0.20% +0.32%] index_select const : Elapsed 0.085 ms (8.522 ms / 100) 8.540 -> 8.541 ( +0.01%) [ +0.00% +0.01% +0.14% / +0.01% +0.09% +0.19%] index_select wrap : Elapsed 0.085 ms (8.540 ms / 100) 8.524 -> 8.536 ( +0.14%) [ +0.00% +0.09% +0.36% / +0.20% +0.16% +0.14%] index_select linear : Elapsed 0.085 ms (8.524 ms / 100) 8.522 -> 8.537 ( +0.18%) [ +0.00% +0.19% +0.09% / +0.18% +0.29% +0.20%] index_select reverse : Elapsed 0.085 ms (8.522 ms / 100) 8.505 -> 8.521 ( +0.19%) [ +0.29% +0.00% +0.35% / +0.25% +0.40% +0.19%] index_select skip64 : Elapsed 0.085 ms (8.530 ms / 100) 8.517 -> 8.517 ( +0.00%) [ +0.15% +0.11% +0.00% / +0.00% +0.08% +0.36%] index_select skip256 : Elapsed 0.085 ms (8.530 ms / 100) 8.541 -> 8.537 ( -0.05%) [ +0.00% +0.04% +0.08% / -0.05% +0.07% +0.18%] index_select spread : Elapsed 0.085 ms (8.541 ms / 100) 8.527 -> 8.540 ( +0.15%) [ +0.30% +0.00% +0.26% / +0.15% +0.32% +0.34%] index_select strided 3 : Elapsed 0.086 ms (8.553 ms / 100) 8.538 -> 8.544 ( +0.07%) [ +0.18% +0.06% +0.00% / +0.07% +0.14% +0.27%] index_select random : Elapsed 0.086 ms (8.553 ms / 100) 8.515 -> 8.534 ( +0.22%) [ +0.31% +0.07% +0.00% / +0.43% +0.22% +0.41%] index_select random_sorted : Elapsed 0.085 ms (8.541 ms / 100) out_shape = [40, 4, 16, 20] in_shape = [5, 4, 16, 20] idx_dim = 0 B = [40, 4, 16, 20] (stride (1280, 16, 1, 64)) A = [5, 4, 16, 20] (stride (1280, 20, 80, 1)) dim = 0 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.28% +0.28%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.378 -> 1.377 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.36% +0.22%] index_copy_ linear : Elapsed 0.014 ms (1.379 ms / 100) 1.421 -> 1.424 ( +0.21%) [ +0.07% +0.21% +0.00% / +0.21% +0.42% +0.35%] index_add_ reverse : Elapsed 0.014 ms (1.422 ms / 100) 1.375 -> 1.378 ( +0.22%) [ +0.15% +0.00% +0.07% / +0.22% +0.51% +0.36%] index_copy_ reverse : Elapsed 0.014 ms (1.377 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.28% +0.28%] index_add_ spread : Elapsed 0.014 ms (1.422 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.44% +0.36%] index_copy_ spread : Elapsed 0.014 ms (1.377 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.28% +0.35%] index_add_ strided 3 : Elapsed 0.014 ms (1.423 ms / 100) 1.375 -> 1.380 ( +0.36%) [ +0.07% +0.07% +0.00% / +0.36% +0.36% +0.44%] index_copy_ strided 3 : Elapsed 0.014 ms (1.376 ms / 100) 1.420 -> 1.423 ( +0.21%) [ +0.21% +0.14% +0.00% / +0.21% +0.42% +0.42%] index_add_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.65% +0.07% +0.00% / +0.22% +0.44% +0.44%] index_copy_ strided 7 : Elapsed 0.014 ms (1.388 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.42% +0.35%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.380 ( +0.44%) [ +0.07% +0.29% +0.00% / +0.44% +0.58% +0.51%] index_copy_ perm : Elapsed 0.014 ms (1.375 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.56% +0.35%] index_add_ perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) 1.374 -> 1.379 ( +0.36%) [ +0.00% +0.00% +0.00% / +0.36% +0.51% +0.51%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.374 ms / 100) 8.181 -> 8.184 ( +0.04%) [ +0.05% +0.10% +0.00% / +0.04% +0.29% +0.20%] index_select const : Elapsed 0.082 ms (8.185 ms / 100) 8.202 -> 8.221 ( +0.23%) [ +0.00% +0.04% +0.27% / +0.51% +0.23% +0.34%] index_select wrap : Elapsed 0.082 ms (8.202 ms / 100) 8.214 -> 8.211 ( -0.04%) [ +0.07% +0.00% +0.05% / +0.09% +0.01% -0.04%] index_select linear : Elapsed 0.082 ms (8.220 ms / 100) 8.217 -> 8.211 ( -0.07%) [ +0.12% +0.02% +0.00% / -0.06% -0.07% +0.00%] index_select reverse : Elapsed 0.082 ms (8.227 ms / 100) 8.187 -> 8.176 ( -0.13%) [ +0.00% +0.02% +0.06% / +0.05% -0.13% +0.12%] index_select skip64 : Elapsed 0.082 ms (8.187 ms / 100) 8.185 -> 8.172 ( -0.16%) [ +0.00% +0.29% +0.00% / -0.16% +0.02% +0.26%] index_select skip256 : Elapsed 0.082 ms (8.185 ms / 100) 8.191 -> 8.184 ( -0.09%) [ +0.42% +0.00% +0.35% / -0.09% +0.23% +0.16%] index_select spread : Elapsed 0.082 ms (8.225 ms / 100) 8.200 -> 8.204 ( +0.05%) [ +0.30% +0.00% +0.22% / +0.15% +0.05% +0.09%] index_select strided 3 : Elapsed 0.082 ms (8.225 ms / 100) 8.202 -> 8.199 ( -0.04%) [ +0.34% +0.00% +0.34% / -0.04% +0.11% +0.41%] index_select random : Elapsed 0.082 ms (8.230 ms / 100) 8.204 -> 8.200 ( -0.05%) [ +0.00% +0.06% +0.02% / +0.07% -0.05% +0.00%] index_select random_sorted : Elapsed 0.082 ms (8.204 ms / 100) B = [40, 4, 16, 20] (stride (1, 12800, 800, 40)) A = [5, 4, 16, 20] (stride (1, 100, 400, 5)) dim = 0 1.322 -> 1.323 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.61% +0.61%] index_add_ linear : Elapsed 0.013 ms (1.323 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.23% +0.08% +0.00% / +0.16% +0.55% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.280 ms / 100) 1.328 -> 1.329 ( +0.08%) [ +0.23% +0.15% +0.00% / +0.08% +0.75% +0.75%] index_add_ reverse : Elapsed 0.013 ms (1.331 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.31% +0.16% +0.00% / +0.08% +0.70% +0.78%] index_copy_ reverse : Elapsed 0.013 ms (1.288 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.76% +0.68%] index_add_ spread : Elapsed 0.013 ms (1.322 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.86% +0.70%] index_copy_ spread : Elapsed 0.013 ms (1.277 ms / 100) 1.322 -> 1.322 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.76% +0.68%] index_add_ strided 3 : Elapsed 0.013 ms (1.322 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.78% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.278 ms / 100) 1.322 -> 1.322 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.013 ms (1.323 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.70% +0.63%] index_copy_ strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.322 -> 1.322 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.76% +0.83%] index_add_ perm : Elapsed 0.013 ms (1.322 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.78% +0.70%] index_copy_ perm : Elapsed 0.013 ms (1.278 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.83% +0.83%] index_add_ perm_sorted : Elapsed 0.013 ms (1.322 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.78% +0.78%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.277 ms / 100) 7.939 -> 7.936 ( -0.04%) [ +0.00% +0.19% +0.03% / -0.04% +0.04% +0.14%] index_select const : Elapsed 0.079 ms (7.939 ms / 100) 7.924 -> 7.938 ( +0.18%) [ +0.14% +0.16% +0.00% / +0.18% +0.32% +0.49%] index_select wrap : Elapsed 0.079 ms (7.935 ms / 100) 7.933 -> 7.944 ( +0.14%) [ +0.06% +0.00% +0.10% / +0.14% +0.16% +0.55%] index_select linear : Elapsed 0.079 ms (7.938 ms / 100) 7.921 -> 7.943 ( +0.28%) [ +0.00% +0.33% +0.18% / +0.28% +0.40% +0.38%] index_select reverse : Elapsed 0.079 ms (7.921 ms / 100) 7.933 -> 7.931 ( -0.03%) [ +0.23% +0.01% +0.00% / -0.03% +0.39% +0.34%] index_select skip64 : Elapsed 0.080 ms (7.951 ms / 100) 7.939 -> 7.945 ( +0.08%) [ +0.15% +0.00% +0.03% / +0.09% +0.16% +0.08%] index_select skip256 : Elapsed 0.080 ms (7.951 ms / 100) 7.917 -> 7.935 ( +0.23%) [ +0.48% +0.11% +0.00% / +0.23% +0.42% +0.57%] index_select spread : Elapsed 0.080 ms (7.955 ms / 100) 7.944 -> 7.936 ( -0.10%) [ +0.11% +0.00% +0.16% / -0.10% +0.04% +0.25%] index_select strided 3 : Elapsed 0.080 ms (7.953 ms / 100) 7.924 -> 7.934 ( +0.13%) [ +0.00% +0.23% +0.15% / +0.13% +0.62% +0.48%] index_select random : Elapsed 0.079 ms (7.924 ms / 100) 7.926 -> 7.936 ( +0.13%) [ +0.18% +0.00% +0.14% / +0.37% +0.34% +0.13%] index_select random_sorted : Elapsed 0.079 ms (7.940 ms / 100) B = [40, 4, 16, 20] (stride (1, 800, 3200, 40)) A = [5, 4, 16, 20] (stride (80, 20, 400, 1)) dim = 0 1.519 -> 1.519 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.46% +0.46%] index_add_ linear : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.14% +0.20% +0.00% / +0.00% +0.48% +0.48%] index_copy_ linear : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.519 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.46% +0.46%] index_add_ reverse : Elapsed 0.015 ms (1.519 ms / 100) 1.474 -> 1.474 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.34% +0.41%] index_copy_ reverse : Elapsed 0.015 ms (1.474 ms / 100) 1.519 -> 1.518 ( -0.07%) [ +0.00% +0.00% +0.13% / -0.07% +0.53% +0.53%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.48%] index_copy_ spread : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.00% +0.13% +0.07% / +0.07% +0.53% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.518 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.48%] index_copy_ strided 3 : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.13% +0.07% +0.00% / +0.00% +0.66% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.54% +0.54%] index_copy_ strided 7 : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.72% +0.53%] index_add_ perm : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.472 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.48% +0.48%] index_copy_ perm : Elapsed 0.015 ms (1.473 ms / 100) 1.520 -> 1.518 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.39% +0.53%] index_add_ perm_sorted : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.54% +0.54%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.473 ms / 100) 8.558 -> 8.555 ( -0.04%) [ +0.00% +0.23% +0.07% / -0.04% +0.09% +0.26%] index_select const : Elapsed 0.086 ms (8.558 ms / 100) 8.594 -> 8.580 ( -0.16%) [ +0.13% +0.21% +0.00% / -0.16% +0.12% +0.12%] index_select wrap : Elapsed 0.086 ms (8.605 ms / 100) 8.586 -> 8.590 ( +0.05%) [ +0.07% +0.00% +0.21% / +0.10% +0.08% +0.05%] index_select linear : Elapsed 0.086 ms (8.592 ms / 100) 8.590 -> 8.572 ( -0.21%) [ +0.13% +0.00% +0.14% / -0.21% +0.23% +0.12%] index_select reverse : Elapsed 0.086 ms (8.601 ms / 100) 8.564 -> 8.555 ( -0.11%) [ +0.00% +0.18% +0.01% / -0.11% +0.11% +0.11%] index_select skip64 : Elapsed 0.086 ms (8.564 ms / 100) 8.564 -> 8.577 ( +0.15%) [ +0.00% +0.08% +0.11% / +0.15% +0.28% +0.25%] index_select skip256 : Elapsed 0.086 ms (8.564 ms / 100) 8.595 -> 8.593 ( -0.02%) [ +0.00% +0.20% +0.02% / +0.03% +0.07% -0.02%] index_select spread : Elapsed 0.086 ms (8.595 ms / 100) 8.572 -> 8.580 ( +0.09%) [ +0.19% +0.28% +0.00% / +0.09% +0.21% +0.47%] index_select strided 3 : Elapsed 0.086 ms (8.588 ms / 100) 8.587 -> 8.597 ( +0.12%) [ +0.30% +0.00% +0.12% / +0.13% +0.12% +0.14%] index_select random : Elapsed 0.086 ms (8.613 ms / 100) 8.599 -> 8.593 ( -0.07%) [ +0.05% +0.00% +0.06% / -0.07% +0.06% -0.01%] index_select random_sorted : Elapsed 0.086 ms (8.603 ms / 100) B = [40, 4, 16, 20] (stride (16, 640, 1, 2560)) A = [5, 4, 16, 20] (stride (4, 1, 20, 320)) dim = 0 1.638 -> 1.638 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.43% +0.49%] index_add_ linear : Elapsed 0.016 ms (1.639 ms / 100) 1.584 -> 1.584 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.69% +0.63%] index_copy_ linear : Elapsed 0.016 ms (1.584 ms / 100) 1.638 -> 1.637 ( -0.06%) [ +0.06% +0.00% +0.06% / -0.06% +0.55% +0.73%] index_add_ reverse : Elapsed 0.016 ms (1.639 ms / 100) 1.583 -> 1.584 ( +0.06%) [ +0.06% +0.00% +0.25% / +0.06% +0.82% +0.76%] index_copy_ reverse : Elapsed 0.016 ms (1.584 ms / 100) 1.636 -> 1.638 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.61% +0.67%] index_add_ spread : Elapsed 0.016 ms (1.638 ms / 100) 1.581 -> 1.584 ( +0.19%) [ +0.00% +0.06% +0.06% / +0.19% +0.89% +0.82%] index_copy_ spread : Elapsed 0.016 ms (1.581 ms / 100) 1.636 -> 1.638 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.67% +0.67%] index_add_ strided 3 : Elapsed 0.016 ms (1.637 ms / 100) 1.584 -> 1.584 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.76% +0.69%] index_copy_ strided 3 : Elapsed 0.016 ms (1.584 ms / 100) 1.644 -> 1.644 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +0.49% +0.49%] index_add_ strided 7 : Elapsed 0.016 ms (1.647 ms / 100) 1.589 -> 1.590 ( +0.06%) [ +0.00% +0.13% +0.06% / +0.06% +0.50% +0.69%] index_copy_ strided 7 : Elapsed 0.016 ms (1.589 ms / 100) 1.643 -> 1.642 ( -0.06%) [ +0.06% +0.00% +0.00% / -0.06% +0.43% +0.67%] index_add_ perm : Elapsed 0.016 ms (1.644 ms / 100) 1.588 -> 1.588 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.76% +0.69%] index_copy_ perm : Elapsed 0.016 ms (1.588 ms / 100) 1.637 -> 1.639 ( +0.12%) [ +0.00% +0.06% +0.00% / +0.12% +0.49% +0.61%] index_add_ perm_sorted : Elapsed 0.016 ms (1.637 ms / 100) 1.583 -> 1.584 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.88% +0.63%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.584 ms / 100) 8.541 -> 8.558 ( +0.20%) [ +0.04% +0.00% +0.12% / +0.22% +0.20% +0.20%] index_select const : Elapsed 0.085 ms (8.544 ms / 100) 8.555 -> 8.544 ( -0.13%) [ +0.00% +0.11% +0.06% / +0.15% +0.19% -0.13%] index_select wrap : Elapsed 0.086 ms (8.555 ms / 100) 8.545 -> 8.557 ( +0.14%) [ +0.28% +0.00% +0.08% / +0.14% +0.44% +0.29%] index_select linear : Elapsed 0.086 ms (8.569 ms / 100) 8.546 -> 8.560 ( +0.16%) [ +0.12% +0.15% +0.00% / +0.29% +0.41% +0.16%] index_select reverse : Elapsed 0.086 ms (8.556 ms / 100) 8.547 -> 8.552 ( +0.06%) [ +0.13% +0.00% +0.08% / +0.06% +0.39% +0.23%] index_select skip64 : Elapsed 0.086 ms (8.558 ms / 100) 8.545 -> 8.554 ( +0.11%) [ +0.08% +0.15% +0.00% / +0.11% +0.36% +0.14%] index_select skip256 : Elapsed 0.086 ms (8.552 ms / 100) 8.555 -> 8.554 ( -0.01%) [ +0.01% +0.26% +0.00% / -0.01% +0.22% +0.43%] index_select spread : Elapsed 0.086 ms (8.556 ms / 100) 8.543 -> 8.545 ( +0.02%) [ +0.20% +0.30% +0.00% / +0.23% +0.39% +0.02%] index_select strided 3 : Elapsed 0.086 ms (8.560 ms / 100) 8.550 -> 8.555 ( +0.06%) [ +0.00% +0.21% +0.16% / +0.06% +0.16% +0.15%] index_select random : Elapsed 0.085 ms (8.550 ms / 100) 8.561 -> 8.555 ( -0.07%) [ +0.00% +0.01% +0.04% / -0.07% +0.41% +0.33%] index_select random_sorted : Elapsed 0.086 ms (8.561 ms / 100) out_shape = [5, 40, 16, 20] in_shape = [5, 4, 16, 20] idx_dim = 1 B = [5, 40, 16, 20] (stride (12800, 1, 800, 40)) A = [5, 4, 16, 20] (stride (64, 16, 1, 320)) dim = 1 1.290 -> 1.290 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_add_ linear : Elapsed 0.013 ms (1.291 ms / 100) 1.253 -> 1.257 ( +0.32%) [ +0.08% +0.00% +0.00% / +0.32% +0.64% +0.64%] index_copy_ linear : Elapsed 0.013 ms (1.254 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.62% +0.47%] index_add_ reverse : Elapsed 0.013 ms (1.286 ms / 100) 1.241 -> 1.242 ( +0.08%) [ +0.00% +0.32% +0.08% / +0.08% +0.73% +0.56%] index_copy_ reverse : Elapsed 0.012 ms (1.241 ms / 100) 1.307 -> 1.307 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.84% +0.77%] index_add_ spread : Elapsed 0.013 ms (1.309 ms / 100) 1.258 -> 1.260 ( +0.16%) [ +0.08% +0.24% +0.00% / +0.16% +0.56% +0.64%] index_copy_ spread : Elapsed 0.013 ms (1.259 ms / 100) 1.291 -> 1.295 ( +0.31%) [ +0.15% +0.00% +0.08% / +0.31% +0.85% +0.77%] index_add_ strided 3 : Elapsed 0.013 ms (1.293 ms / 100) 1.255 -> 1.258 ( +0.24%) [ +0.00% +0.00% +0.08% / +0.24% +2.55% +0.88%] index_copy_ strided 3 : Elapsed 0.013 ms (1.255 ms / 100) 1.288 -> 1.288 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.78% +0.85%] index_add_ strided 7 : Elapsed 0.013 ms (1.288 ms / 100) 1.244 -> 1.245 ( +0.08%) [ +0.00% +0.56% +0.16% / +0.08% +0.88% +0.96%] index_copy_ strided 7 : Elapsed 0.012 ms (1.244 ms / 100) 1.306 -> 1.309 ( +0.23%) [ +0.08% +0.08% +0.00% / +0.23% +0.84% +0.92%] index_add_ perm : Elapsed 0.013 ms (1.307 ms / 100) 1.256 -> 1.256 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.80% +0.80%] index_copy_ perm : Elapsed 0.013 ms (1.256 ms / 100) 1.290 -> 1.295 ( +0.39%) [ +0.00% +0.00% +0.00% / +0.70% +0.39% +0.62%] index_add_ perm_sorted : Elapsed 0.013 ms (1.290 ms / 100) 1.245 -> 1.250 ( +0.40%) [ +0.00% +0.24% +0.08% / +0.40% +0.72% +0.72%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.245 ms / 100) 8.771 -> 8.794 ( +0.26%) [ +0.02% +0.15% +0.00% / +0.27% +0.49% +0.26%] index_select const : Elapsed 0.088 ms (8.773 ms / 100) 8.806 -> 8.806 ( +0.00%) [ +0.00% +0.06% +0.03% / +0.00% +0.00% +0.19%] index_select wrap : Elapsed 0.088 ms (8.806 ms / 100) 8.793 -> 8.814 ( +0.24%) [ +0.23% +0.09% +0.00% / +0.24% +0.30% +0.32%] index_select linear : Elapsed 0.088 ms (8.813 ms / 100) 8.805 -> 8.805 ( +0.00%) [ +0.00% +0.06% +0.07% / +0.05% +0.00% +0.02%] index_select reverse : Elapsed 0.088 ms (8.805 ms / 100) 8.783 -> 8.782 ( -0.01%) [ +0.13% +0.00% +0.18% / -0.01% +0.16% +0.20%] index_select skip64 : Elapsed 0.088 ms (8.794 ms / 100) 8.791 -> 8.797 ( +0.07%) [ +0.00% +0.08% +0.01% / +0.09% +0.07% +0.13%] index_select skip256 : Elapsed 0.088 ms (8.791 ms / 100) 8.810 -> 8.817 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.12% +0.12% +0.08%] index_select spread : Elapsed 0.088 ms (8.813 ms / 100) 8.812 -> 8.811 ( -0.01%) [ +0.14% +0.06% +0.00% / -0.01% +0.35% +0.11%] index_select strided 3 : Elapsed 0.088 ms (8.824 ms / 100) 8.803 -> 8.808 ( +0.06%) [ +0.00% +0.15% +0.03% / +0.15% +0.12% +0.06%] index_select random : Elapsed 0.088 ms (8.803 ms / 100) 8.816 -> 8.814 ( -0.02%) [ +0.06% +0.00% +0.06% / +0.02% +0.34% -0.02%] index_select random_sorted : Elapsed 0.088 ms (8.821 ms / 100) B = [5, 40, 16, 20] (stride (320, 1600, 20, 1)) A = [5, 4, 16, 20] (stride (64, 16, 1, 320)) dim = 1 1.291 -> 1.293 ( +0.15%) [ +0.15% +0.00% +0.08% / +0.15% +0.54% +0.54%] index_add_ linear : Elapsed 0.013 ms (1.293 ms / 100) 1.249 -> 1.250 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.56% +0.48%] index_copy_ linear : Elapsed 0.012 ms (1.249 ms / 100) 1.292 -> 1.295 ( +0.23%) [ +0.00% +0.00% +0.00% / +0.23% +0.54% +0.46%] index_add_ reverse : Elapsed 0.013 ms (1.292 ms / 100) 1.248 -> 1.250 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.48% +0.48%] index_copy_ reverse : Elapsed 0.012 ms (1.249 ms / 100) 1.290 -> 1.292 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.47% +0.62%] index_add_ spread : Elapsed 0.013 ms (1.292 ms / 100) 1.242 -> 1.243 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.48% +0.56%] index_copy_ spread : Elapsed 0.012 ms (1.242 ms / 100) 1.292 -> 1.295 ( +0.23%) [ +0.08% +0.00% +0.08% / +0.23% +0.31% +0.31%] index_add_ strided 3 : Elapsed 0.013 ms (1.293 ms / 100) 1.242 -> 1.248 ( +0.48%) [ +0.08% +0.08% +0.00% / +0.56% +0.64% +0.48%] index_copy_ strided 3 : Elapsed 0.012 ms (1.243 ms / 100) 1.291 -> 1.293 ( +0.15%) [ +0.15% +0.23% +0.00% / +0.15% +0.46% +0.46%] index_add_ strided 7 : Elapsed 0.013 ms (1.293 ms / 100) 1.243 -> 1.247 ( +0.32%) [ +0.00% +0.32% +0.00% / +0.32% +0.56% +0.40%] index_copy_ strided 7 : Elapsed 0.012 ms (1.243 ms / 100) 1.291 -> 1.292 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.54% +0.46%] index_add_ perm : Elapsed 0.013 ms (1.292 ms / 100) 1.242 -> 1.243 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.81% +0.56%] index_copy_ perm : Elapsed 0.012 ms (1.242 ms / 100) 1.292 -> 1.294 ( +0.15%) [ +0.00% +0.08% +0.00% / +0.15% +0.46% +0.46%] index_add_ perm_sorted : Elapsed 0.013 ms (1.292 ms / 100) 1.249 -> 1.250 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.32% +0.32%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.249 ms / 100) 8.699 -> 8.691 ( -0.09%) [ +0.15% +0.21% +0.00% / +0.30% -0.09% +0.08%] index_select const : Elapsed 0.087 ms (8.712 ms / 100) 8.718 -> 8.703 ( -0.17%) [ +0.05% +0.08% +0.00% / -0.17% -0.01% -0.06%] index_select wrap : Elapsed 0.087 ms (8.722 ms / 100) 8.708 -> 8.704 ( -0.05%) [ +0.06% +0.03% +0.00% / +0.09% -0.05% +0.08%] index_select linear : Elapsed 0.087 ms (8.713 ms / 100) 8.710 -> 8.700 ( -0.11%) [ +0.03% +0.10% +0.00% / -0.11% +0.09% +0.23%] index_select reverse : Elapsed 0.087 ms (8.713 ms / 100) 8.688 -> 8.698 ( +0.12%) [ +0.29% +0.24% +0.00% / +0.23% +0.12% +0.18%] index_select skip64 : Elapsed 0.087 ms (8.713 ms / 100) 8.703 -> 8.697 ( -0.07%) [ +0.20% +0.00% +0.15% / +0.17% -0.07% +0.01%] index_select skip256 : Elapsed 0.087 ms (8.720 ms / 100) 8.739 -> 8.719 ( -0.23%) [ +0.01% +0.00% +0.06% / -0.23% -0.16% -0.08%] index_select spread : Elapsed 0.087 ms (8.740 ms / 100) 8.707 -> 8.707 ( +0.00%) [ +0.18% +0.00% +0.05% / +0.15% +0.31% +0.00%] index_select strided 3 : Elapsed 0.087 ms (8.723 ms / 100) 8.709 -> 8.710 ( +0.01%) [ +0.00% +0.13% +0.28% / +0.31% +0.01% +0.17%] index_select random : Elapsed 0.087 ms (8.709 ms / 100) 8.734 -> 8.726 ( -0.09%) [ +0.06% +0.00% +0.02% / -0.09% +0.02% -0.02%] index_select random_sorted : Elapsed 0.087 ms (8.739 ms / 100) B = [5, 40, 16, 20] (stride (800, 20, 4000, 1)) A = [5, 4, 16, 20] (stride (16, 80, 1, 320)) dim = 1 1.368 -> 1.369 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.66%] index_add_ linear : Elapsed 0.014 ms (1.369 ms / 100) 1.323 -> 1.324 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.60% +0.53%] index_copy_ linear : Elapsed 0.013 ms (1.323 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.58% +0.66%] index_add_ reverse : Elapsed 0.014 ms (1.368 ms / 100) 1.322 -> 1.324 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.68% +0.61%] index_copy_ reverse : Elapsed 0.013 ms (1.322 ms / 100) 1.368 -> 1.369 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.58% +0.58%] index_add_ spread : Elapsed 0.014 ms (1.369 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.15% +0.30% +0.00% / +0.08% +0.61% +0.61%] index_copy_ spread : Elapsed 0.013 ms (1.324 ms / 100) 1.367 -> 1.371 ( +0.29%) [ +0.07% +0.07% +0.00% / +0.29% +0.88% +0.66%] index_add_ strided 3 : Elapsed 0.014 ms (1.368 ms / 100) 1.321 -> 1.330 ( +0.68%) [ +0.08% +0.15% +0.00% / +0.68% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.013 ms (1.322 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.80% +0.88%] index_add_ strided 7 : Elapsed 0.014 ms (1.368 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.76% +0.76%] index_copy_ strided 7 : Elapsed 0.013 ms (1.322 ms / 100) 1.368 -> 1.368 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm : Elapsed 0.014 ms (1.369 ms / 100) 1.321 -> 1.325 ( +0.30%) [ +0.08% +0.15% +0.00% / +0.30% +0.83% +0.76%] index_copy_ perm : Elapsed 0.013 ms (1.322 ms / 100) 1.367 -> 1.369 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.73% +0.73%] index_add_ perm_sorted : Elapsed 0.014 ms (1.369 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.68% +0.68%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.322 ms / 100) 9.159 -> 9.146 ( -0.14%) [ +0.05% +0.01% +0.00% / +0.23% -0.14% -0.10%] index_select const : Elapsed 0.092 ms (9.164 ms / 100) 9.178 -> 9.155 ( -0.25%) [ +0.00% +0.04% +0.08% / +0.15% +0.03% -0.25%] index_select wrap : Elapsed 0.092 ms (9.178 ms / 100) 9.160 -> 9.167 ( +0.08%) [ +0.00% +0.44% +0.16% / +0.19% +0.08% +0.19%] index_select linear : Elapsed 0.092 ms (9.160 ms / 100) 9.178 -> 9.168 ( -0.11%) [ +0.10% +0.03% +0.00% / +0.10% +0.10% -0.11%] index_select reverse : Elapsed 0.092 ms (9.187 ms / 100) 9.150 -> 9.153 ( +0.03%) [ +0.00% +0.03% +0.24% / +0.22% +0.22% +0.03%] index_select skip64 : Elapsed 0.092 ms (9.150 ms / 100) 9.159 -> 9.147 ( -0.13%) [ +0.00% +0.13% +0.01% / -0.01% +0.40% -0.13%] index_select skip256 : Elapsed 0.092 ms (9.159 ms / 100) 9.184 -> 9.176 ( -0.09%) [ +0.11% +0.00% +0.07% / +0.04% +0.26% -0.09%] index_select spread : Elapsed 0.092 ms (9.194 ms / 100) 9.178 -> 9.169 ( -0.10%) [ +0.04% +0.00% +0.16% / -0.03% +0.00% -0.10%] index_select strided 3 : Elapsed 0.092 ms (9.182 ms / 100) 9.179 -> 9.182 ( +0.03%) [ +0.07% +0.00% +0.09% / +0.04% +0.09% +0.03%] index_select random : Elapsed 0.092 ms (9.185 ms / 100) 9.174 -> 9.190 ( +0.17%) [ +0.00% +0.16% +0.03% / +0.17% +0.29% +0.32%] index_select random_sorted : Elapsed 0.092 ms (9.174 ms / 100) B = [5, 40, 16, 20] (stride (1, 100, 4000, 5)) A = [5, 4, 16, 20] (stride (64, 16, 1, 320)) dim = 1 1.375 -> 1.375 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.58% +0.58%] index_add_ linear : Elapsed 0.014 ms (1.376 ms / 100) 1.330 -> 1.331 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.53% +0.53%] index_copy_ linear : Elapsed 0.013 ms (1.330 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.00% +0.15% +0.00% / +0.07% +0.51% +0.51%] index_add_ reverse : Elapsed 0.014 ms (1.377 ms / 100) 1.331 -> 1.330 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.38% +0.38%] index_copy_ reverse : Elapsed 0.013 ms (1.331 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.65% +0.65%] index_add_ spread : Elapsed 0.014 ms (1.376 ms / 100) 1.330 -> 1.330 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.53% +0.45%] index_copy_ spread : Elapsed 0.013 ms (1.331 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.00% +0.07% +0.07% / +0.15% +0.44% +0.44%] index_add_ strided 3 : Elapsed 0.014 ms (1.377 ms / 100) 1.330 -> 1.337 ( +0.53%) [ +0.00% +0.23% +0.00% / +0.60% +0.53% +0.53%] index_copy_ strided 3 : Elapsed 0.013 ms (1.330 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.15% +0.00% +0.00% / +0.22% +0.36% +0.36%] index_add_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.331 -> 1.332 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.53% +0.45%] index_copy_ strided 7 : Elapsed 0.013 ms (1.332 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.95% +0.65%] index_add_ perm : Elapsed 0.014 ms (1.376 ms / 100) 1.329 -> 1.332 ( +0.23%) [ +0.15% +0.15% +0.00% / +0.23% +0.60% +0.53%] index_copy_ perm : Elapsed 0.013 ms (1.331 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.07% +0.00% +0.15% / +0.15% +0.51% +0.58%] index_add_ perm_sorted : Elapsed 0.014 ms (1.376 ms / 100) 1.331 -> 1.331 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.45% +0.38%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.331 ms / 100) 9.179 -> 9.223 ( +0.48%) [ +0.23% +0.21% +0.00% / +0.53% +0.62% +0.48%] index_select const : Elapsed 0.092 ms (9.200 ms / 100) 9.218 -> 9.222 ( +0.04%) [ +0.00% +0.00% +0.08% / +0.11% +0.04% +0.24%] index_select wrap : Elapsed 0.092 ms (9.218 ms / 100) 9.197 -> 9.199 ( +0.02%) [ +0.04% +0.22% +0.00% / +0.02% +0.34% +0.41%] index_select linear : Elapsed 0.092 ms (9.201 ms / 100) 9.199 -> 9.208 ( +0.10%) [ +0.21% +0.02% +0.00% / +0.10% +0.37% +0.21%] index_select reverse : Elapsed 0.092 ms (9.218 ms / 100) 9.188 -> 9.204 ( +0.17%) [ +0.26% +0.23% +0.00% / +0.23% +0.17% +0.28%] index_select skip64 : Elapsed 0.092 ms (9.212 ms / 100) 9.202 -> 9.209 ( +0.08%) [ +0.01% +0.15% +0.00% / +0.14% +0.25% +0.08%] index_select skip256 : Elapsed 0.092 ms (9.203 ms / 100) 9.229 -> 9.218 ( -0.12%) [ +0.04% +0.00% +0.03% / -0.12% +0.04% +0.03%] index_select spread : Elapsed 0.092 ms (9.233 ms / 100) 9.205 -> 9.212 ( +0.08%) [ +0.02% +0.00% +0.16% / +0.12% +0.08% +0.14%] index_select strided 3 : Elapsed 0.092 ms (9.207 ms / 100) 9.204 -> 9.204 ( +0.00%) [ +0.05% +0.00% +0.10% / +0.00% +0.29% +0.16%] index_select random : Elapsed 0.092 ms (9.209 ms / 100) 9.220 -> 9.233 ( +0.14%) [ +0.05% +0.00% +0.07% / +0.14% +0.30% +0.30%] index_select random_sorted : Elapsed 0.092 ms (9.225 ms / 100) B = [5, 40, 16, 20] (stride (1, 5, 4000, 200)) A = [5, 4, 16, 20] (stride (1, 80, 5, 320)) dim = 1 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.55% +0.55%] index_add_ linear : Elapsed 0.013 ms (1.280 ms / 100) 1.242 -> 1.244 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.48% +0.32%] index_copy_ linear : Elapsed 0.012 ms (1.243 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.23% +0.47% +0.00% / +0.08% +0.78% +0.78%] index_add_ reverse : Elapsed 0.013 ms (1.289 ms / 100) 1.245 -> 1.245 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.56% +0.64%] index_copy_ reverse : Elapsed 0.012 ms (1.247 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.23% +0.00% +0.08% / +0.00% +0.70% +0.70%] index_add_ spread : Elapsed 0.013 ms (1.281 ms / 100) 1.241 -> 1.243 ( +0.16%) [ +0.00% +0.08% +0.08% / +0.16% +0.56% +0.56%] index_copy_ spread : Elapsed 0.012 ms (1.241 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.013 ms (1.281 ms / 100) 1.241 -> 1.242 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.64% +0.40%] index_copy_ strided 3 : Elapsed 0.012 ms (1.241 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.63% +0.63%] index_add_ strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.242 -> 1.243 ( +0.08%) [ +0.00% +0.16% +0.16% / +0.08% +1.13% +0.48%] index_copy_ strided 7 : Elapsed 0.012 ms (1.242 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.63% +0.70%] index_add_ perm : Elapsed 0.013 ms (1.278 ms / 100) 1.239 -> 1.239 ( +0.00%) [ +0.32% +0.00% +0.08% / +0.00% +0.73% +0.73%] index_copy_ perm : Elapsed 0.012 ms (1.243 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.00% +0.39% +0.00% / +0.00% +0.85% +0.78%] index_add_ perm_sorted : Elapsed 0.013 ms (1.287 ms / 100) 1.245 -> 1.245 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.80% +0.56%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.245 ms / 100) 8.747 -> 8.758 ( +0.13%) [ +0.00% +0.07% +0.01% / +0.13% +0.25% +0.30%] index_select const : Elapsed 0.087 ms (8.747 ms / 100) 8.775 -> 8.777 ( +0.02%) [ +0.03% +0.00% +0.08% / +0.21% +0.02% +0.08%] index_select wrap : Elapsed 0.088 ms (8.778 ms / 100) 8.760 -> 8.774 ( +0.16%) [ +0.00% +0.07% +0.08% / +0.16% +0.16% +0.25%] index_select linear : Elapsed 0.088 ms (8.760 ms / 100) 8.767 -> 8.768 ( +0.01%) [ +0.07% +0.00% +0.11% / +0.13% +0.09% +0.01%] index_select reverse : Elapsed 0.088 ms (8.773 ms / 100) 8.753 -> 8.755 ( +0.02%) [ +0.14% +0.00% +0.01% / +0.02% +0.26% +0.21%] index_select skip64 : Elapsed 0.088 ms (8.765 ms / 100) 8.747 -> 8.746 ( -0.01%) [ +0.17% +0.09% +0.00% / +0.01% -0.01% +0.22%] index_select skip256 : Elapsed 0.088 ms (8.762 ms / 100) 8.774 -> 8.775 ( +0.01%) [ +0.14% +0.02% +0.00% / +0.01% +0.07% +0.14%] index_select spread : Elapsed 0.088 ms (8.786 ms / 100) 8.765 -> 8.780 ( +0.17%) [ +0.00% +0.14% +0.00% / +0.17% +0.21% +0.25%] index_select strided 3 : Elapsed 0.088 ms (8.765 ms / 100) 8.775 -> 8.765 ( -0.11%) [ +0.25% +0.00% +0.06% / +0.06% -0.11% +0.10%] index_select random : Elapsed 0.088 ms (8.797 ms / 100) 8.779 -> 8.780 ( +0.01%) [ +0.00% +0.01% +0.03% / +0.01% +0.16% +0.03%] index_select random_sorted : Elapsed 0.088 ms (8.779 ms / 100) B = [5, 40, 16, 20] (stride (640, 16, 1, 3200)) A = [5, 4, 16, 20] (stride (320, 1600, 1, 16)) dim = 1 1.317 -> 1.316 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.38% +0.38%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.63% +0.47%] index_copy_ linear : Elapsed 0.013 ms (1.279 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.15% +0.23%] index_add_ reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.013 ms (1.279 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.23% +0.23%] index_add_ spread : Elapsed 0.013 ms (1.317 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.47% +0.55%] index_copy_ spread : Elapsed 0.013 ms (1.279 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.23% +0.38%] index_add_ strided 3 : Elapsed 0.013 ms (1.317 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.47% +0.47%] index_copy_ strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.23% +0.23%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.47% +0.47%] index_copy_ strided 7 : Elapsed 0.013 ms (1.279 ms / 100) 1.317 -> 1.319 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.15% +0.23%] index_add_ perm : Elapsed 0.013 ms (1.318 ms / 100) 1.280 -> 1.279 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.47%] index_copy_ perm : Elapsed 0.013 ms (1.280 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.23% +0.15%] index_add_ perm_sorted : Elapsed 0.013 ms (1.319 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.47% +0.39%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.279 ms / 100) 9.169 -> 9.163 ( -0.07%) [ +0.00% +0.22% +0.10% / -0.07% +0.23% +0.10%] index_select const : Elapsed 0.092 ms (9.169 ms / 100) 9.180 -> 9.197 ( +0.19%) [ +0.21% +0.00% +0.24% / +0.22% +0.22% +0.19%] index_select wrap : Elapsed 0.092 ms (9.199 ms / 100) 9.189 -> 9.182 ( -0.08%) [ +0.09% +0.00% +0.00% / -0.08% +0.27% +0.28%] index_select linear : Elapsed 0.092 ms (9.197 ms / 100) 9.166 -> 9.183 ( +0.19%) [ +0.22% +0.13% +0.00% / +0.19% +0.23% +0.45%] index_select reverse : Elapsed 0.092 ms (9.186 ms / 100) 9.161 -> 9.163 ( +0.02%) [ +0.21% +0.03% +0.00% / +0.02% +0.26% +0.17%] index_select skip64 : Elapsed 0.092 ms (9.180 ms / 100) 9.173 -> 9.190 ( +0.19%) [ +0.01% +0.09% +0.00% / +0.23% +0.19% +0.22%] index_select skip256 : Elapsed 0.092 ms (9.174 ms / 100) 9.182 -> 9.186 ( +0.04%) [ +0.00% +0.04% +0.02% / +0.04% +0.11% +0.42%] index_select spread : Elapsed 0.092 ms (9.182 ms / 100) 9.190 -> 9.191 ( +0.01%) [ +0.30% +0.00% +0.07% / +0.01% +0.08% +0.02%] index_select strided 3 : Elapsed 0.092 ms (9.218 ms / 100) 9.201 -> 9.187 ( -0.15%) [ +0.02% +0.00% +0.00% / -0.15% +0.04% +0.24%] index_select random : Elapsed 0.092 ms (9.203 ms / 100) 9.169 -> 9.193 ( +0.26%) [ +0.21% +0.16% +0.00% / +0.26% +0.35% +0.32%] index_select random_sorted : Elapsed 0.092 ms (9.188 ms / 100) B = [5, 40, 16, 20] (stride (40, 1, 200, 3200)) A = [5, 4, 16, 20] (stride (20, 1600, 100, 1)) dim = 1 1.315 -> 1.312 ( -0.23%) [ +0.23% +0.08% +0.00% / -0.23% +0.30% +0.23%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.275 -> 1.274 ( -0.08%) [ +0.00% +0.08% +0.16% / -0.08% +0.47% +0.47%] index_copy_ linear : Elapsed 0.013 ms (1.275 ms / 100) 1.313 -> 1.317 ( +0.30%) [ +0.30% +0.00% +0.00% / +0.30% +0.38% +0.46%] index_add_ reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.63% +0.47%] index_copy_ reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.323 -> 1.325 ( +0.15%) [ +0.30% +0.30% +0.00% / +0.15% +0.45% +0.30%] index_add_ spread : Elapsed 0.013 ms (1.327 ms / 100) 1.285 -> 1.287 ( +0.16%) [ +0.23% +0.08% +0.00% / +0.16% +0.16% +0.39%] index_copy_ spread : Elapsed 0.013 ms (1.288 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.61% +0.46%] index_add_ strided 3 : Elapsed 0.013 ms (1.318 ms / 100) 1.278 -> 1.277 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.70% +0.86%] index_copy_ strided 3 : Elapsed 0.013 ms (1.278 ms / 100) 1.319 -> 1.318 ( -0.08%) [ +0.00% +0.15% +0.00% / -0.08% +0.45% +0.53%] index_add_ strided 7 : Elapsed 0.013 ms (1.319 ms / 100) 1.279 -> 1.278 ( -0.08%) [ +0.00% +0.08% +0.16% / -0.08% +0.63% +0.70%] index_copy_ strided 7 : Elapsed 0.013 ms (1.279 ms / 100) 1.325 -> 1.326 ( +0.08%) [ +0.15% +0.00% +0.08% / +0.15% +0.08% +0.15%] index_add_ perm : Elapsed 0.013 ms (1.327 ms / 100) 1.286 -> 1.287 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.08% +0.16%] index_copy_ perm : Elapsed 0.013 ms (1.287 ms / 100) 1.323 -> 1.327 ( +0.30%) [ +0.00% +0.00% +0.08% / +0.38% +0.30% +0.53%] index_add_ perm_sorted : Elapsed 0.013 ms (1.323 ms / 100) 1.285 -> 1.287 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.16% +0.16%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.285 ms / 100) 9.215 -> 9.214 ( -0.01%) [ +0.00% +0.00% +0.09% / -0.01% +0.25% +0.04%] index_select const : Elapsed 0.092 ms (9.215 ms / 100) 9.238 -> 9.253 ( +0.16%) [ +0.21% +0.13% +0.00% / +0.24% +0.16% +0.49%] index_select wrap : Elapsed 0.093 ms (9.257 ms / 100) 9.241 -> 9.227 ( -0.15%) [ +0.00% +0.19% +0.05% / -0.15% +0.05% +0.21%] index_select linear : Elapsed 0.092 ms (9.241 ms / 100) 9.243 -> 9.248 ( +0.05%) [ +0.24% +0.04% +0.00% / +0.19% +0.22% +0.05%] index_select reverse : Elapsed 0.093 ms (9.265 ms / 100) 9.208 -> 9.223 ( +0.16%) [ +0.00% +0.10% +0.26% / +0.16% +0.38% +0.35%] index_select skip64 : Elapsed 0.092 ms (9.208 ms / 100) 9.217 -> 9.207 ( -0.11%) [ +0.08% +0.18% +0.00% / -0.11% +0.02% +0.22%] index_select skip256 : Elapsed 0.092 ms (9.224 ms / 100) 9.238 -> 9.239 ( +0.01%) [ +0.00% +0.03% +0.00% / +0.01% +0.16% +0.12%] index_select spread : Elapsed 0.092 ms (9.238 ms / 100) 9.243 -> 9.240 ( -0.03%) [ +0.27% +0.00% +0.02% / -0.03% +0.19% +0.06%] index_select strided 3 : Elapsed 0.093 ms (9.268 ms / 100) 9.240 -> 9.248 ( +0.09%) [ +0.00% +0.10% +0.13% / +0.12% +0.09% +0.17%] index_select random : Elapsed 0.092 ms (9.240 ms / 100) 9.232 -> 9.245 ( +0.14%) [ +0.11% +0.13% +0.00% / +0.14% +0.22% +0.48%] index_select random_sorted : Elapsed 0.092 ms (9.242 ms / 100) B = [5, 40, 16, 20] (stride (40, 1, 200, 3200)) A = [5, 4, 16, 20] (stride (16, 80, 1, 320)) dim = 1 1.356 -> 1.356 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.59% +0.66%] index_add_ linear : Elapsed 0.014 ms (1.356 ms / 100) 1.319 -> 1.320 ( +0.08%) [ +0.15% +0.15% +0.00% / +0.08% +0.68% +0.68%] index_copy_ linear : Elapsed 0.013 ms (1.321 ms / 100) 1.356 -> 1.356 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.59% +0.66%] index_add_ reverse : Elapsed 0.014 ms (1.357 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.53% +0.53%] index_copy_ reverse : Elapsed 0.013 ms (1.321 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.22% +0.07% +0.00% / +0.29% +0.00% +0.00%] index_add_ spread : Elapsed 0.014 ms (1.370 ms / 100) 1.334 -> 1.328 ( -0.45%) [ +0.00% +0.00% +0.00% / -0.07% -0.45% -0.15%] index_copy_ spread : Elapsed 0.013 ms (1.334 ms / 100) 1.383 -> 1.384 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.51% +0.65%] index_add_ strided 3 : Elapsed 0.014 ms (1.383 ms / 100) 1.342 -> 1.345 ( +0.22%) [ +0.07% +0.15% +0.00% / +0.22% +0.67% +0.75%] index_copy_ strided 3 : Elapsed 0.013 ms (1.343 ms / 100) 1.373 -> 1.372 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.73% +0.66%] index_add_ strided 7 : Elapsed 0.014 ms (1.373 ms / 100) 1.335 -> 1.337 ( +0.15%) [ +0.00% +0.22% +0.00% / +0.15% +0.15% +0.22%] index_copy_ strided 7 : Elapsed 0.013 ms (1.335 ms / 100) 1.363 -> 1.366 ( +0.22%) [ +0.00% +0.07% +0.00% / +0.22% +0.29% +0.22%] index_add_ perm : Elapsed 0.014 ms (1.363 ms / 100) 1.323 -> 1.325 ( +0.15%) [ +0.00% +0.23% +0.00% / +0.15% +0.38% +0.38%] index_copy_ perm : Elapsed 0.013 ms (1.323 ms / 100) 1.360 -> 1.362 ( +0.15%) [ +0.00% +0.00% +0.07% / +0.15% +0.37% +0.51%] index_add_ perm_sorted : Elapsed 0.014 ms (1.360 ms / 100) 1.323 -> 1.326 ( +0.23%) [ +0.23% +0.00% +0.08% / +0.23% +0.60% +0.76%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.326 ms / 100) 9.247 -> 9.247 ( +0.00%) [ +0.10% +0.08% +0.00% / +0.00% +0.14% +0.11%] index_select const : Elapsed 0.093 ms (9.256 ms / 100) 9.270 -> 9.273 ( +0.03%) [ +0.16% +0.00% +0.05% / +0.05% +0.20% +0.03%] index_select wrap : Elapsed 0.093 ms (9.285 ms / 100) 9.266 -> 9.261 ( -0.05%) [ +0.10% +0.05% +0.00% / +0.02% -0.05% -0.03%] index_select linear : Elapsed 0.093 ms (9.275 ms / 100) 9.255 -> 9.264 ( +0.10%) [ +0.21% +0.14% +0.00% / +0.10% +0.17% +0.22%] index_select reverse : Elapsed 0.093 ms (9.274 ms / 100) 9.247 -> 9.251 ( +0.04%) [ +0.03% +0.12% +0.00% / +0.31% +0.04% +0.16%] index_select skip64 : Elapsed 0.093 ms (9.250 ms / 100) 9.238 -> 9.243 ( +0.05%) [ +0.12% +0.00% +0.22% / +0.05% +0.08% +0.11%] index_select skip256 : Elapsed 0.092 ms (9.249 ms / 100) 9.285 -> 9.288 ( +0.03%) [ +0.18% +0.00% +0.00% / +0.20% +0.03% +0.08%] index_select spread : Elapsed 0.093 ms (9.302 ms / 100) 9.272 -> 9.277 ( +0.05%) [ +0.11% +0.00% +0.13% / +0.23% +0.20% +0.05%] index_select strided 3 : Elapsed 0.093 ms (9.282 ms / 100) 9.288 -> 9.281 ( -0.08%) [ +0.00% +0.06% +0.02% / -0.06% -0.08% +0.16%] index_select random : Elapsed 0.093 ms (9.288 ms / 100) 9.287 -> 9.285 ( -0.02%) [ +0.00% +0.13% +0.02% / -0.02% -0.01% +0.11%] index_select random_sorted : Elapsed 0.093 ms (9.287 ms / 100) out_shape = [5, 4, 40, 20] in_shape = [5, 4, 16, 20] idx_dim = 2 B = [5, 4, 40, 20] (stride (3200, 1, 4, 160)) A = [5, 4, 16, 20] (stride (320, 1600, 20, 1)) dim = 2 3.928 -> 3.930 ( +0.05%) [ +0.08% +0.03% +0.00% / +0.05% +0.61% +0.59%] index_add_ linear : Elapsed 0.039 ms (3.931 ms / 100) 3.811 -> 3.811 ( +0.00%) [ +0.13% +0.03% +0.00% / +0.00% +0.79% +0.73%] index_copy_ linear : Elapsed 0.038 ms (3.816 ms / 100) 3.942 -> 3.944 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.51% +0.53%] index_add_ reverse : Elapsed 0.039 ms (3.942 ms / 100) 3.816 -> 3.817 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.58% +0.50%] index_copy_ reverse : Elapsed 0.038 ms (3.816 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.48% +0.51%] index_add_ spread : Elapsed 0.039 ms (3.933 ms / 100) 3.822 -> 3.825 ( +0.08%) [ +0.00% +0.05% +0.05% / +0.08% +0.52% +0.50%] index_copy_ spread : Elapsed 0.038 ms (3.822 ms / 100) 3.932 -> 3.934 ( +0.05%) [ +0.05% +0.08% +0.00% / +0.05% +0.53% +0.56%] index_add_ strided 3 : Elapsed 0.039 ms (3.934 ms / 100) 3.804 -> 3.806 ( +0.05%) [ +0.00% +0.32% +0.00% / +0.05% +0.58% +0.58%] index_copy_ strided 3 : Elapsed 0.038 ms (3.804 ms / 100) 3.940 -> 3.940 ( +0.00%) [ +0.05% +0.00% +0.03% / +0.00% +0.63% +0.61%] index_add_ strided 7 : Elapsed 0.039 ms (3.942 ms / 100) 3.814 -> 3.813 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.60% +0.58%] index_copy_ strided 7 : Elapsed 0.038 ms (3.815 ms / 100) 3.930 -> 3.930 ( +0.00%) [ +0.00% +0.03% +0.05% / +0.00% +0.51% +0.53%] index_add_ perm : Elapsed 0.039 ms (3.930 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.58% +0.52%] index_copy_ perm : Elapsed 0.038 ms (3.814 ms / 100) 3.930 -> 3.931 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.51% +0.48%] index_add_ perm_sorted : Elapsed 0.039 ms (3.931 ms / 100) 3.814 -> 3.814 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.63% +0.47%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.816 ms / 100) 5.560 -> 5.565 ( +0.09%) [ +0.11% +0.00% +0.05% / +0.09% +0.14% +0.16%] index_select const : Elapsed 0.056 ms (5.566 ms / 100) 5.569 -> 5.564 ( -0.09%) [ +0.04% +0.09% +0.00% / -0.09% +0.25% +0.22%] index_select wrap : Elapsed 0.056 ms (5.571 ms / 100) 5.567 -> 5.570 ( +0.05%) [ +0.02% +0.00% +0.04% / +0.05% +0.16% +0.27%] index_select linear : Elapsed 0.056 ms (5.568 ms / 100) 5.569 -> 5.572 ( +0.05%) [ +0.07% +0.00% +0.04% / +0.05% +0.14% +0.11%] index_select reverse : Elapsed 0.056 ms (5.573 ms / 100) 5.563 -> 5.567 ( +0.07%) [ +0.11% +0.11% +0.00% / +0.07% +0.18% +0.07%] index_select skip64 : Elapsed 0.056 ms (5.569 ms / 100) 5.572 -> 5.563 ( -0.16%) [ +0.04% +0.02% +0.00% / -0.16% -0.09% -0.11%] index_select skip256 : Elapsed 0.056 ms (5.574 ms / 100) 5.566 -> 5.571 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.13% +0.09% +0.09%] index_select spread : Elapsed 0.056 ms (5.571 ms / 100) 5.567 -> 5.567 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.00% +0.18% +0.20%] index_select strided 3 : Elapsed 0.056 ms (5.574 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.09% +0.00% +0.07% / +0.02% +0.18% +0.23%] index_select strided 5 : Elapsed 0.056 ms (5.569 ms / 100) 5.567 -> 5.566 ( -0.02%) [ +0.00% +0.05% +0.07% / -0.02% +0.13% +0.31%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.13% +0.00% +0.18% / +0.05% +0.07% +0.05%] index_select strided 8 : Elapsed 0.056 ms (5.571 ms / 100) 5.566 -> 5.571 ( +0.09%) [ +0.14% +0.16% +0.00% / +0.18% +0.11% +0.09%] index_select random : Elapsed 0.056 ms (5.574 ms / 100) 5.568 -> 5.569 ( +0.02%) [ +0.02% +0.00% +0.07% / +0.16% +0.02% +0.04%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [5, 4, 40, 20] (stride (1, 4000, 100, 5)) A = [5, 4, 16, 20] (stride (20, 100, 400, 1)) dim = 2 3.937 -> 3.940 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.81% +0.76%] index_add_ linear : Elapsed 0.039 ms (3.939 ms / 100) 3.812 -> 3.815 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.81% +0.73%] index_copy_ linear : Elapsed 0.038 ms (3.812 ms / 100) 3.925 -> 3.925 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.71% +0.74%] index_add_ reverse : Elapsed 0.039 ms (3.925 ms / 100) 3.799 -> 3.804 ( +0.13%) [ +0.00% +0.11% +0.03% / +0.13% +0.74% +0.76%] index_copy_ reverse : Elapsed 0.038 ms (3.799 ms / 100) 3.932 -> 3.939 ( +0.18%) [ +0.00% +0.00% +0.00% / +0.18% +0.64% +0.61%] index_add_ spread : Elapsed 0.039 ms (3.932 ms / 100) 3.818 -> 3.821 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.52% +0.55%] index_copy_ spread : Elapsed 0.038 ms (3.821 ms / 100) 3.930 -> 3.931 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.66% +0.66%] index_add_ strided 3 : Elapsed 0.039 ms (3.931 ms / 100) 3.801 -> 3.804 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.71% +0.68%] index_copy_ strided 3 : Elapsed 0.038 ms (3.802 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.82% +0.82%] index_add_ strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.799 -> 3.803 ( +0.11%) [ +0.00% +0.03% +0.05% / +0.11% +0.76% +0.71%] index_copy_ strided 7 : Elapsed 0.038 ms (3.799 ms / 100) 3.938 -> 3.939 ( +0.03%) [ +0.08% +0.00% +0.05% / +0.03% +0.76% +0.74%] index_add_ perm : Elapsed 0.039 ms (3.941 ms / 100) 3.811 -> 3.812 ( +0.03%) [ +0.05% +0.00% +0.05% / +0.03% +0.79% +0.73%] index_copy_ perm : Elapsed 0.038 ms (3.813 ms / 100) 3.927 -> 3.928 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.84% +0.84%] index_add_ perm_sorted : Elapsed 0.039 ms (3.927 ms / 100) 3.799 -> 3.803 ( +0.11%) [ +0.00% +0.03% +0.00% / +0.11% +0.82% +0.82%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.799 ms / 100) 5.554 -> 5.554 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.13% +0.13% +0.00%] index_select const : Elapsed 0.056 ms (5.562 ms / 100) 5.569 -> 5.560 ( -0.16%) [ +0.11% +0.02% +0.00% / -0.11% -0.16% -0.05%] index_select wrap : Elapsed 0.056 ms (5.575 ms / 100) 5.565 -> 5.563 ( -0.04%) [ +0.00% +0.07% +0.04% / +0.07% -0.04% +0.11%] index_select linear : Elapsed 0.056 ms (5.565 ms / 100) 5.564 -> 5.562 ( -0.04%) [ +0.00% +0.09% +0.07% / +0.14% -0.04% +0.11%] index_select reverse : Elapsed 0.056 ms (5.564 ms / 100) 5.551 -> 5.557 ( +0.11%) [ +0.20% +0.13% +0.00% / +0.11% +0.22% +0.14%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.555 -> 5.551 ( -0.07%) [ +0.00% +0.05% +0.04% / -0.07% -0.07% +0.05%] index_select skip256 : Elapsed 0.056 ms (5.555 ms / 100) 5.560 -> 5.561 ( +0.02%) [ +0.13% +0.16% +0.00% / +0.14% +0.09% +0.02%] index_select spread : Elapsed 0.056 ms (5.567 ms / 100) 5.565 -> 5.555 ( -0.18%) [ +0.04% +0.00% +0.05% / +0.11% -0.07% -0.18%] index_select strided 3 : Elapsed 0.056 ms (5.567 ms / 100) 5.566 -> 5.562 ( -0.07%) [ +0.09% +0.00% +0.00% / +0.00% -0.04% -0.07%] index_select strided 5 : Elapsed 0.056 ms (5.571 ms / 100) 5.561 -> 5.558 ( -0.05%) [ +0.13% +0.14% +0.00% / -0.05% +0.14% +0.05%] index_select strided 7 : Elapsed 0.056 ms (5.568 ms / 100) 5.549 -> 5.552 ( +0.05%) [ +0.16% +0.07% +0.00% / +0.05% +0.18% +0.22%] index_select strided 8 : Elapsed 0.056 ms (5.558 ms / 100) 5.569 -> 5.558 ( -0.20%) [ +0.00% +0.00% +0.07% / -0.09% -0.20% -0.16%] index_select random : Elapsed 0.056 ms (5.569 ms / 100) 5.557 -> 5.563 ( +0.11%) [ +0.14% +0.14% +0.00% / +0.31% +0.11% +0.22%] index_select random_sorted : Elapsed 0.056 ms (5.565 ms / 100) B = [5, 4, 40, 20] (stride (40, 4000, 1, 200)) A = [5, 4, 16, 20] (stride (1, 5, 400, 20)) dim = 2 4.153 -> 4.153 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.72% +0.75%] index_add_ linear : Elapsed 0.042 ms (4.154 ms / 100) 4.013 -> 4.011 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.60% +0.60%] index_copy_ linear : Elapsed 0.040 ms (4.013 ms / 100) 4.159 -> 4.162 ( +0.07%) [ +0.05% +0.00% +0.02% / +0.07% +0.60% +0.58%] index_add_ reverse : Elapsed 0.042 ms (4.161 ms / 100) 4.020 -> 4.021 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.55% +0.52%] index_copy_ reverse : Elapsed 0.040 ms (4.021 ms / 100) 4.139 -> 4.140 ( +0.02%) [ +0.00% +0.05% +0.02% / +0.02% +0.56% +0.53%] index_add_ spread : Elapsed 0.041 ms (4.139 ms / 100) 4.011 -> 4.016 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.67% +0.57%] index_copy_ spread : Elapsed 0.040 ms (4.011 ms / 100) 4.147 -> 4.148 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.53% +0.51%] index_add_ strided 3 : Elapsed 0.041 ms (4.148 ms / 100) 4.010 -> 4.014 ( +0.10%) [ +0.02% +0.00% +0.00% / +0.10% +0.55% +0.52%] index_copy_ strided 3 : Elapsed 0.040 ms (4.011 ms / 100) 4.159 -> 4.160 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.58% +0.63%] index_add_ strided 7 : Elapsed 0.042 ms (4.160 ms / 100) 4.021 -> 4.026 ( +0.12%) [ +0.02% +0.05% +0.00% / +0.12% +0.50% +0.50%] index_copy_ strided 7 : Elapsed 0.040 ms (4.022 ms / 100) 4.155 -> 4.160 ( +0.12%) [ +0.05% +0.05% +0.00% / +0.12% +0.48% +0.46%] index_add_ perm : Elapsed 0.042 ms (4.157 ms / 100) 4.014 -> 4.031 ( +0.42%) [ +0.02% +0.00% +0.00% / +0.42% +0.55% +0.50%] index_copy_ perm : Elapsed 0.040 ms (4.015 ms / 100) 4.158 -> 4.159 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.43% +0.43%] index_add_ perm_sorted : Elapsed 0.042 ms (4.158 ms / 100) 4.016 -> 4.023 ( +0.17%) [ +0.00% +0.02% +0.02% / +0.17% +0.47% +0.42%] index_copy_ perm_sorted : Elapsed 0.040 ms (4.016 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.09% +0.16% +0.00% / -0.02% +0.11% +0.18%] index_select const : Elapsed 0.056 ms (5.567 ms / 100) 5.558 -> 5.572 ( +0.25%) [ +0.13% +0.00% +0.09% / +0.32% +0.25% +0.27%] index_select wrap : Elapsed 0.056 ms (5.565 ms / 100) 5.569 -> 5.565 ( -0.07%) [ +0.00% +0.02% +0.04% / -0.07% +0.00% +0.09%] index_select linear : Elapsed 0.056 ms (5.569 ms / 100) 5.563 -> 5.572 ( +0.16%) [ +0.13% +0.14% +0.00% / +0.16% +0.16% +0.18%] index_select reverse : Elapsed 0.056 ms (5.570 ms / 100) 5.565 -> 5.560 ( -0.09%) [ +0.13% +0.11% +0.00% / +0.11% -0.09% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.572 ms / 100) 5.568 -> 5.562 ( -0.11%) [ +0.07% +0.07% +0.00% / +0.02% -0.02% -0.11%] index_select skip256 : Elapsed 0.056 ms (5.572 ms / 100) 5.566 -> 5.560 ( -0.11%) [ +0.05% +0.05% +0.00% / +0.13% +0.09% -0.11%] index_select spread : Elapsed 0.056 ms (5.569 ms / 100) 5.561 -> 5.564 ( +0.05%) [ +0.00% +0.14% +0.05% / +0.05% +0.09% +0.23%] index_select strided 3 : Elapsed 0.056 ms (5.561 ms / 100) 5.565 -> 5.561 ( -0.07%) [ +0.16% +0.05% +0.00% / -0.07% -0.02% +0.14%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.557 -> 5.570 ( +0.23%) [ +0.13% +0.14% +0.00% / +0.23% +0.25% +0.31%] index_select strided 7 : Elapsed 0.056 ms (5.564 ms / 100) 5.558 -> 5.566 ( +0.14%) [ +0.18% +0.00% +0.05% / +0.14% +0.27% +0.18%] index_select strided 8 : Elapsed 0.056 ms (5.568 ms / 100) 5.559 -> 5.560 ( +0.02%) [ +0.00% +0.22% +0.16% / +0.02% +0.20% +0.09%] index_select random : Elapsed 0.056 ms (5.559 ms / 100) 5.569 -> 5.566 ( -0.05%) [ +0.00% +0.02% +0.02% / -0.02% -0.05% +0.02%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [5, 4, 40, 20] (stride (1, 5, 400, 20)) A = [5, 4, 16, 20] (stride (4, 1, 20, 320)) dim = 2 3.954 -> 3.956 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.66% +0.73%] index_add_ linear : Elapsed 0.040 ms (3.954 ms / 100) 3.808 -> 3.818 ( +0.26%) [ +0.05% +0.00% +0.08% / +0.26% +0.71% +0.76%] index_copy_ linear : Elapsed 0.038 ms (3.810 ms / 100) 3.973 -> 3.975 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.65% +0.60%] index_add_ reverse : Elapsed 0.040 ms (3.973 ms / 100) 3.811 -> 3.815 ( +0.10%) [ +0.00% +0.03% +0.03% / +0.10% +0.71% +0.66%] index_copy_ reverse : Elapsed 0.038 ms (3.811 ms / 100) 3.957 -> 3.959 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.68% +0.66%] index_add_ spread : Elapsed 0.040 ms (3.958 ms / 100) 3.809 -> 3.814 ( +0.13%) [ +0.00% +0.00% +0.03% / +0.13% +0.66% +0.60%] index_copy_ spread : Elapsed 0.038 ms (3.809 ms / 100) 3.950 -> 3.958 ( +0.20%) [ +0.03% +0.15% +0.00% / +0.20% +0.86% +0.86%] index_add_ strided 3 : Elapsed 0.040 ms (3.951 ms / 100) 3.800 -> 3.810 ( +0.26%) [ +0.05% +0.08% +0.00% / +0.26% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.038 ms (3.802 ms / 100) 3.971 -> 3.973 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.65% +0.71%] index_add_ strided 7 : Elapsed 0.040 ms (3.971 ms / 100) 3.809 -> 3.813 ( +0.11%) [ +0.00% +0.05% +0.03% / +0.11% +0.71% +0.71%] index_copy_ strided 7 : Elapsed 0.038 ms (3.809 ms / 100) 3.953 -> 3.958 ( +0.13%) [ +0.00% +0.08% +0.00% / +0.13% +0.76% +0.73%] index_add_ perm : Elapsed 0.040 ms (3.953 ms / 100) 3.806 -> 3.810 ( +0.11%) [ +0.00% +0.11% +0.03% / +0.11% +0.84% +0.79%] index_copy_ perm : Elapsed 0.038 ms (3.806 ms / 100) 3.949 -> 3.956 ( +0.18%) [ +0.00% +0.08% +0.05% / +0.18% +0.76% +0.96%] index_add_ perm_sorted : Elapsed 0.039 ms (3.949 ms / 100) 3.800 -> 3.803 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.76% +0.79%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.803 ms / 100) 5.491 -> 5.485 ( -0.11%) [ +0.09% +0.02% +0.00% / +0.04% -0.11% -0.11%] index_select const : Elapsed 0.055 ms (5.496 ms / 100) 5.497 -> 5.493 ( -0.07%) [ +0.05% +0.00% +0.05% / +0.15% -0.05% -0.07%] index_select wrap : Elapsed 0.055 ms (5.500 ms / 100) 5.503 -> 5.494 ( -0.16%) [ +0.11% +0.00% +0.02% / +0.04% -0.04% -0.16%] index_select linear : Elapsed 0.055 ms (5.509 ms / 100) 5.494 -> 5.490 ( -0.07%) [ +0.00% +0.22% +0.11% / +0.31% -0.07% -0.02%] index_select reverse : Elapsed 0.055 ms (5.494 ms / 100) 5.490 -> 5.491 ( +0.02%) [ +0.04% +0.00% +0.05% / +0.16% +0.02% +0.13%] index_select skip64 : Elapsed 0.055 ms (5.492 ms / 100) 5.483 -> 5.488 ( +0.09%) [ +0.00% +0.20% +0.13% / +0.24% +0.09% +0.20%] index_select skip256 : Elapsed 0.055 ms (5.483 ms / 100) 5.497 -> 5.490 ( -0.13%) [ +0.07% +0.00% +0.09% / +0.02% -0.13% +0.04%] index_select spread : Elapsed 0.055 ms (5.501 ms / 100) 5.496 -> 5.488 ( -0.15%) [ +0.04% +0.00% +0.11% / +0.16% -0.05% -0.15%] index_select strided 3 : Elapsed 0.055 ms (5.498 ms / 100) 5.501 -> 5.494 ( -0.13%) [ +0.00% +0.02% +0.05% / -0.02% -0.07% -0.13%] index_select strided 5 : Elapsed 0.055 ms (5.501 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.18% +0.00% +0.16% / +0.09% +0.00% +0.31%] index_select strided 7 : Elapsed 0.055 ms (5.501 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.04% +0.13% +0.00% / +0.05% +0.00% +0.09%] index_select strided 8 : Elapsed 0.055 ms (5.493 ms / 100) 5.497 -> 5.496 ( -0.02%) [ +0.15% +0.07% +0.00% / +0.09% +0.04% -0.02%] index_select random : Elapsed 0.055 ms (5.505 ms / 100) 5.499 -> 5.494 ( -0.09%) [ +0.00% +0.05% +0.20% / -0.02% -0.07% -0.09%] index_select random_sorted : Elapsed 0.055 ms (5.499 ms / 100) B = [5, 4, 40, 20] (stride (160, 40, 1, 800)) A = [5, 4, 16, 20] (stride (1, 100, 400, 5)) dim = 2 3.709 -> 3.712 ( +0.08%) [ +0.00% +0.05% +0.03% / +0.08% +0.57% +0.57%] index_add_ linear : Elapsed 0.037 ms (3.709 ms / 100) 3.571 -> 3.577 ( +0.17%) [ +0.06% +0.11% +0.00% / +0.17% +0.76% +0.64%] index_copy_ linear : Elapsed 0.036 ms (3.573 ms / 100) 3.716 -> 3.724 ( +0.22%) [ +0.00% +0.22% +0.30% / +0.22% +0.78% +0.62%] index_add_ reverse : Elapsed 0.037 ms (3.716 ms / 100) 3.586 -> 3.594 ( +0.22%) [ +0.00% +0.03% +0.06% / +0.22% +0.64% +0.53%] index_copy_ reverse : Elapsed 0.036 ms (3.586 ms / 100) 3.711 -> 3.713 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.49% +0.49%] index_add_ spread : Elapsed 0.037 ms (3.711 ms / 100) 3.580 -> 3.590 ( +0.28%) [ +0.06% +0.06% +0.00% / +0.28% +0.64% +0.53%] index_copy_ spread : Elapsed 0.036 ms (3.582 ms / 100) 3.720 -> 3.721 ( +0.03%) [ +0.16% +0.00% +0.00% / +0.03% +0.48% +0.38%] index_add_ strided 3 : Elapsed 0.037 ms (3.726 ms / 100) 3.585 -> 3.592 ( +0.20%) [ +0.11% +0.00% +0.03% / +0.20% +0.45% +0.45%] index_copy_ strided 3 : Elapsed 0.036 ms (3.589 ms / 100) 3.725 -> 3.729 ( +0.11%) [ +0.00% +0.11% +0.05% / +0.11% +0.30% +0.43%] index_add_ strided 7 : Elapsed 0.037 ms (3.725 ms / 100) 3.589 -> 3.598 ( +0.25%) [ +0.03% +0.03% +0.00% / +0.25% +0.39% +0.56%] index_copy_ strided 7 : Elapsed 0.036 ms (3.590 ms / 100) 3.711 -> 3.713 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.51% +0.49%] index_add_ perm : Elapsed 0.037 ms (3.711 ms / 100) 3.574 -> 3.582 ( +0.22%) [ +0.03% +0.14% +0.00% / +0.22% +0.56% +0.45%] index_copy_ perm : Elapsed 0.036 ms (3.575 ms / 100) 3.709 -> 3.713 ( +0.11%) [ +0.11% +0.05% +0.00% / +0.11% +0.54% +0.51%] index_add_ perm_sorted : Elapsed 0.037 ms (3.713 ms / 100) 3.575 -> 3.583 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.45% +0.42%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.575 ms / 100) 5.470 -> 5.471 ( +0.02%) [ +0.11% +0.05% +0.00% / +0.02% +0.07% +0.22%] index_select const : Elapsed 0.055 ms (5.476 ms / 100) 5.469 -> 5.468 ( -0.02%) [ +0.11% +0.00% +0.04% / -0.02% +0.20% +0.24%] index_select wrap : Elapsed 0.055 ms (5.475 ms / 100) 5.471 -> 5.474 ( +0.05%) [ +0.09% +0.13% +0.00% / +0.15% +0.13% +0.05%] index_select linear : Elapsed 0.055 ms (5.476 ms / 100) 5.465 -> 5.475 ( +0.18%) [ +0.40% +0.15% +0.00% / +0.18% +0.20% +0.18%] index_select reverse : Elapsed 0.055 ms (5.487 ms / 100) 5.478 -> 5.470 ( -0.15%) [ +0.02% +0.00% +0.00% / +0.00% -0.07% -0.15%] index_select skip64 : Elapsed 0.055 ms (5.479 ms / 100) 5.473 -> 5.471 ( -0.04%) [ +0.11% +0.20% +0.00% / +0.05% -0.04% +0.09%] index_select skip256 : Elapsed 0.055 ms (5.479 ms / 100) 5.473 -> 5.477 ( +0.07%) [ +0.16% +0.13% +0.00% / +0.09% +0.07% +0.07%] index_select spread : Elapsed 0.055 ms (5.482 ms / 100) 5.470 -> 5.473 ( +0.05%) [ +0.11% +0.00% +0.00% / +0.05% +0.20% +0.22%] index_select strided 3 : Elapsed 0.055 ms (5.476 ms / 100) 5.471 -> 5.470 ( -0.02%) [ +0.04% +0.00% +0.09% / +0.00% -0.02% +0.13%] index_select strided 5 : Elapsed 0.055 ms (5.473 ms / 100) 5.463 -> 5.476 ( +0.24%) [ +0.27% +0.00% +0.07% / +0.24% +0.31% +0.24%] index_select strided 7 : Elapsed 0.055 ms (5.478 ms / 100) 5.471 -> 5.475 ( +0.07%) [ +0.00% +0.16% +0.07% / +0.07% +0.18% +0.15%] index_select strided 8 : Elapsed 0.055 ms (5.471 ms / 100) 5.471 -> 5.472 ( +0.02%) [ +0.11% +0.13% +0.00% / +0.02% +0.26% +0.05%] index_select random : Elapsed 0.055 ms (5.477 ms / 100) 5.473 -> 5.472 ( -0.02%) [ +0.07% +0.00% +0.09% / +0.13% -0.02% +0.20%] index_select random_sorted : Elapsed 0.055 ms (5.477 ms / 100) out_shape = [5, 4, 16, 40] in_shape = [5, 4, 16, 20] idx_dim = 3 B = [5, 4, 16, 40] (stride (2560, 640, 1, 16)) A = [5, 4, 16, 20] (stride (1, 5, 20, 320)) dim = 3 2.397 -> 2.408 ( +0.46%) [ +0.17% +0.04% +0.00% / +0.46% +0.58% +0.50%] index_add_ linear : Elapsed 0.024 ms (2.401 ms / 100) 2.393 -> 2.404 ( +0.46%) [ +0.00% +0.00% +0.00% / +0.46% +0.63% +0.50%] index_copy_ linear : Elapsed 0.024 ms (2.393 ms / 100) 2.397 -> 2.407 ( +0.42%) [ +0.04% +0.00% +0.17% / +0.54% +0.42% +0.50%] index_add_ reverse : Elapsed 0.024 ms (2.398 ms / 100) 2.390 -> 2.403 ( +0.54%) [ +0.04% +0.00% +0.04% / +0.54% +0.79% +0.75%] index_copy_ reverse : Elapsed 0.024 ms (2.391 ms / 100) 2.395 -> 2.406 ( +0.46%) [ +0.46% +0.13% +0.00% / +0.79% +0.58% +0.46%] index_add_ spread : Elapsed 0.024 ms (2.406 ms / 100) 2.390 -> 2.395 ( +0.21%) [ +0.21% +0.00% +0.04% / +0.46% +0.21% +0.59%] index_copy_ spread : Elapsed 0.024 ms (2.395 ms / 100) 2.402 -> 2.409 ( +0.29%) [ +0.00% +0.04% +0.00% / +0.50% +0.33% +0.29%] index_add_ strided 3 : Elapsed 0.024 ms (2.402 ms / 100) 2.385 -> 2.398 ( +0.55%) [ +0.00% +0.25% +0.04% / +0.71% +0.67% +0.55%] index_copy_ strided 3 : Elapsed 0.024 ms (2.385 ms / 100) 2.400 -> 2.407 ( +0.29%) [ +0.04% +0.00% +0.00% / +0.63% +0.29% +0.29%] index_add_ strided 7 : Elapsed 0.024 ms (2.401 ms / 100) 2.386 -> 2.402 ( +0.67%) [ +0.25% +0.00% +0.21% / +0.67% +0.67% +0.67%] index_copy_ strided 7 : Elapsed 0.024 ms (2.392 ms / 100) 2.397 -> 2.408 ( +0.46%) [ +0.00% +0.04% +0.25% / +0.79% +0.63% +0.46%] index_add_ perm : Elapsed 0.024 ms (2.397 ms / 100) 2.387 -> 2.402 ( +0.63%) [ +0.00% +0.08% +0.00% / +0.63% +0.71% +0.67%] index_copy_ perm : Elapsed 0.024 ms (2.387 ms / 100) 2.395 -> 2.407 ( +0.50%) [ +0.13% +0.00% +0.08% / +0.63% +0.50% +0.71%] index_add_ perm_sorted : Elapsed 0.024 ms (2.398 ms / 100) 2.386 -> 2.403 ( +0.71%) [ +0.17% +0.04% +0.00% / +0.71% +0.71% +0.75%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.390 ms / 100) 4.416 -> 4.412 ( -0.09%) [ +0.05% +0.00% +0.02% / -0.09% +0.05% -0.09%] index_select const : Elapsed 0.044 ms (4.418 ms / 100) 4.418 -> 4.420 ( +0.05%) [ +0.18% +0.23% +0.00% / +0.16% +0.05% +0.16%] index_select wrap : Elapsed 0.044 ms (4.426 ms / 100) 4.417 -> 4.421 ( +0.09%) [ +0.16% +0.18% +0.00% / +0.16% +0.09% +0.16%] index_select linear : Elapsed 0.044 ms (4.424 ms / 100) 4.422 -> 4.429 ( +0.16%) [ +0.09% +0.00% +0.00% / +0.16% +0.18% +0.18%] index_select reverse : Elapsed 0.044 ms (4.426 ms / 100) 4.413 -> 4.413 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.09% +0.00% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.419 ms / 100) 4.415 -> 4.414 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.07% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.416 ms / 100) 4.420 -> 4.422 ( +0.05%) [ +0.02% +0.00% +0.02% / +0.05% +0.32% +0.20%] index_select spread : Elapsed 0.044 ms (4.421 ms / 100) 4.417 -> 4.418 ( +0.02%) [ +0.00% +0.25% +0.11% / +0.27% +0.27% +0.02%] index_select strided 3 : Elapsed 0.044 ms (4.417 ms / 100) 4.416 -> 4.417 ( +0.02%) [ +0.00% +0.16% +0.11% / +0.20% +0.02% +0.07%] index_select strided 5 : Elapsed 0.044 ms (4.416 ms / 100) 4.417 -> 4.421 ( +0.09%) [ +0.14% +0.23% +0.00% / +0.09% +0.14% +0.11%] index_select strided 7 : Elapsed 0.044 ms (4.423 ms / 100) 4.416 -> 4.417 ( +0.02%) [ +0.09% +0.02% +0.00% / +0.02% +0.14% +0.20%] index_select strided 8 : Elapsed 0.044 ms (4.420 ms / 100) 4.416 -> 4.416 ( +0.00%) [ +0.00% +0.00% +0.02% / +0.00% +0.16% +0.07%] index_select strided 16 : Elapsed 0.044 ms (4.416 ms / 100) 4.417 -> 4.420 ( +0.07%) [ +0.02% +0.00% +0.09% / +0.07% +0.09% +0.16%] index_select random : Elapsed 0.044 ms (4.418 ms / 100) 4.410 -> 4.425 ( +0.34%) [ +0.29% +0.00% +0.07% / +0.34% +0.39% +0.57%] index_select random_sorted : Elapsed 0.044 ms (4.423 ms / 100) B = [5, 4, 16, 40] (stride (16, 3200, 1, 80)) A = [5, 4, 16, 20] (stride (1, 1600, 100, 5)) dim = 3 2.415 -> 2.424 ( +0.37%) [ +0.12% +0.00% +0.04% / +0.46% +0.50% +0.37%] index_add_ linear : Elapsed 0.024 ms (2.418 ms / 100) 2.409 -> 2.419 ( +0.42%) [ +0.00% +0.08% +0.08% / +0.42% +0.62% +0.66%] index_copy_ linear : Elapsed 0.024 ms (2.409 ms / 100) 2.406 -> 2.420 ( +0.58%) [ +0.00% +0.12% +0.12% / +0.58% +0.71% +0.83%] index_add_ reverse : Elapsed 0.024 ms (2.406 ms / 100) 2.400 -> 2.420 ( +0.83%) [ +0.13% +0.17% +0.00% / +0.83% +1.08% +1.00%] index_copy_ reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.402 -> 2.412 ( +0.42%) [ +0.17% +0.00% +0.04% / +0.42% +1.17% +1.12%] index_add_ spread : Elapsed 0.024 ms (2.406 ms / 100) 2.392 -> 2.415 ( +0.96%) [ +0.50% +0.00% +0.38% / +0.96% +1.30% +1.51%] index_copy_ spread : Elapsed 0.024 ms (2.404 ms / 100) 2.407 -> 2.425 ( +0.75%) [ +0.29% +0.00% +0.25% / +0.79% +0.83% +0.75%] index_add_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.403 -> 2.420 ( +0.71%) [ +0.00% +0.08% +0.00% / +0.75% +0.71% +0.71%] index_copy_ strided 3 : Elapsed 0.024 ms (2.403 ms / 100) 2.408 -> 2.424 ( +0.66%) [ +0.12% +0.00% +0.17% / +0.66% +0.83% +0.71%] index_add_ strided 7 : Elapsed 0.024 ms (2.411 ms / 100) 2.402 -> 2.417 ( +0.62%) [ +0.17% +0.00% +0.21% / +0.62% +0.79% +0.62%] index_copy_ strided 7 : Elapsed 0.024 ms (2.406 ms / 100) 2.408 -> 2.419 ( +0.46%) [ +0.04% +0.12% +0.00% / +0.58% +0.50% +0.46%] index_add_ perm : Elapsed 0.024 ms (2.409 ms / 100) 2.406 -> 2.416 ( +0.42%) [ +0.00% +0.00% +0.08% / +0.46% +0.46% +0.42%] index_copy_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.411 -> 2.416 ( +0.21%) [ +0.00% +0.08% +0.08% / +0.58% +0.50% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.411 ms / 100) 2.407 -> 2.416 ( +0.37%) [ +0.04% +0.17% +0.00% / +0.62% +0.46% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.408 ms / 100) 4.433 -> 4.440 ( +0.16%) [ +0.00% +0.23% +0.09% / +0.16% +0.20% +0.16%] index_select const : Elapsed 0.044 ms (4.433 ms / 100) 4.453 -> 4.444 ( -0.20%) [ +0.00% +0.02% +0.00% / -0.02% -0.02% -0.20%] index_select wrap : Elapsed 0.045 ms (4.453 ms / 100) 4.446 -> 4.448 ( +0.04%) [ +0.00% +0.02% +0.09% / +0.34% +0.04% +0.16%] index_select linear : Elapsed 0.044 ms (4.446 ms / 100) 4.447 -> 4.451 ( +0.09%) [ +0.04% +0.16% +0.00% / +0.09% +0.16% +0.13%] index_select reverse : Elapsed 0.044 ms (4.449 ms / 100) 4.440 -> 4.435 ( -0.11%) [ +0.05% +0.18% +0.00% / -0.11% -0.02% +0.00%] index_select skip64 : Elapsed 0.044 ms (4.442 ms / 100) 4.436 -> 4.436 ( +0.00%) [ +0.07% +0.23% +0.00% / +0.18% +0.00% +0.23%] index_select skip256 : Elapsed 0.044 ms (4.439 ms / 100) 4.441 -> 4.449 ( +0.18%) [ +0.14% +0.09% +0.00% / +0.18% +0.29% +0.27%] index_select spread : Elapsed 0.044 ms (4.447 ms / 100) 4.441 -> 4.448 ( +0.16%) [ +0.23% +0.14% +0.00% / +0.16% +0.41% +0.20%] index_select strided 3 : Elapsed 0.045 ms (4.451 ms / 100) 4.445 -> 4.452 ( +0.16%) [ +0.07% +0.16% +0.00% / +0.16% +0.18% +0.16%] index_select strided 5 : Elapsed 0.044 ms (4.448 ms / 100) 4.444 -> 4.443 ( -0.02%) [ +0.16% +0.00% +0.00% / -0.02% +0.25% +0.11%] index_select strided 7 : Elapsed 0.045 ms (4.451 ms / 100) 4.439 -> 4.443 ( +0.09%) [ +0.00% +0.32% +0.14% / +0.09% +0.23% +0.23%] index_select strided 8 : Elapsed 0.044 ms (4.439 ms / 100) 4.439 -> 4.448 ( +0.20%) [ +0.00% +0.11% +0.16% / +0.20% +0.20% +0.25%] index_select strided 16 : Elapsed 0.044 ms (4.439 ms / 100) 4.450 -> 4.453 ( +0.07%) [ +0.00% +0.18% +0.22% / +0.09% +0.13% +0.07%] index_select random : Elapsed 0.044 ms (4.450 ms / 100) 4.447 -> 4.451 ( +0.09%) [ +0.13% +0.18% +0.00% / +0.11% +0.11% +0.09%] index_select random_sorted : Elapsed 0.045 ms (4.453 ms / 100) out_shape = [40, 4, 20, 16] in_shape = [5, 4, 20, 16] idx_dim = 0 B = [40, 4, 20, 16] (stride (1280, 320, 16, 1)) A = [5, 4, 20, 16] (stride (1, 1600, 80, 5)) dim = 0 0.579 -> 0.579 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.35% +0.00%] index_add_ linear : Elapsed 0.006 ms (0.579 ms / 100) 0.561 -> 0.561 ( +0.00%) [ +0.00% +0.36% +0.71% / +0.00% +0.36% +0.18%] index_copy_ linear : Elapsed 0.006 ms (0.561 ms / 100) 0.579 -> 0.579 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.35% +0.17%] index_add_ reverse : Elapsed 0.006 ms (0.579 ms / 100) 0.561 -> 0.562 ( +0.18%) [ +0.18% +0.00% +0.00% / +0.18% +0.36% +0.18%] index_copy_ reverse : Elapsed 0.006 ms (0.562 ms / 100) 0.578 -> 0.580 ( +0.35%) [ +0.17% +0.17% +0.00% / +9.52% +0.35% +0.35%] index_add_ spread : Elapsed 0.006 ms (0.579 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.18% +0.18% +0.00% / +0.89% +0.36% +0.36%] index_copy_ spread : Elapsed 0.006 ms (0.562 ms / 100) 0.577 -> 0.578 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.52% +0.87%] index_add_ strided 3 : Elapsed 0.006 ms (0.578 ms / 100) 0.561 -> 0.562 ( +0.18%) [ +0.00% +0.18% +0.00% / +0.18% +0.71% +0.53%] index_copy_ strided 3 : Elapsed 0.006 ms (0.561 ms / 100) 0.578 -> 0.580 ( +0.35%) [ +0.00% +0.00% +0.17% / +0.35% +0.69% +0.35%] index_add_ strided 7 : Elapsed 0.006 ms (0.578 ms / 100) 0.562 -> 0.561 ( -0.18%) [ +0.00% +0.18% +0.00% / -0.18% +1.25% +0.36%] index_copy_ strided 7 : Elapsed 0.006 ms (0.562 ms / 100) 0.578 -> 0.579 ( +0.17%) [ +0.35% +0.17% +0.00% / +0.17% +0.52% +0.52%] index_add_ perm : Elapsed 0.006 ms (0.580 ms / 100) 0.562 -> 0.562 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.00% +0.53%] index_copy_ perm : Elapsed 0.006 ms (0.562 ms / 100) 0.579 -> 0.580 ( +0.17%) [ +0.00% +0.35% +0.00% / +0.17% +0.17% +1.04%] index_add_ perm_sorted : Elapsed 0.006 ms (0.579 ms / 100) 0.562 -> 0.563 ( +0.18%) [ +0.18% +0.00% +0.18% / +0.53% +0.18% +0.18%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.563 ms / 100) 4.578 -> 4.576 ( -0.04%) [ +0.04% +0.00% +0.02% / +0.00% +0.09% -0.04%] index_select const : Elapsed 0.046 ms (4.580 ms / 100) 4.566 -> 4.575 ( +0.20%) [ +0.24% +0.39% +0.00% / +0.28% +0.20% +0.31%] index_select wrap : Elapsed 0.046 ms (4.577 ms / 100) 4.574 -> 4.567 ( -0.15%) [ +0.00% +0.31% +0.17% / +0.20% -0.11% -0.15%] index_select linear : Elapsed 0.046 ms (4.574 ms / 100) 4.574 -> 4.575 ( +0.02%) [ +0.31% +0.24% +0.00% / +0.17% +0.04% +0.02%] index_select reverse : Elapsed 0.046 ms (4.588 ms / 100) 4.582 -> 4.575 ( -0.15%) [ +0.04% +0.07% +0.00% / +0.00% -0.15% -0.13%] index_select skip64 : Elapsed 0.046 ms (4.584 ms / 100) 4.582 -> 4.584 ( +0.04%) [ +0.13% +0.00% +0.00% / +0.04% +0.28% +0.20%] index_select skip256 : Elapsed 0.046 ms (4.588 ms / 100) 4.576 -> 4.576 ( +0.00%) [ +0.17% +0.07% +0.00% / +0.20% +0.00% +0.11%] index_select spread : Elapsed 0.046 ms (4.584 ms / 100) 4.572 -> 4.579 ( +0.15%) [ +0.09% +0.00% +0.07% / +0.15% +0.17% +0.37%] index_select strided 3 : Elapsed 0.046 ms (4.576 ms / 100) 4.595 -> 4.578 ( -0.37%) [ +0.00% +0.00% +0.02% / +0.11% -0.37% -0.30%] index_select random : Elapsed 0.046 ms (4.595 ms / 100) 4.592 -> 4.571 ( -0.46%) [ +0.04% +0.00% +0.07% / +0.20% -0.26% -0.46%] index_select random_sorted : Elapsed 0.046 ms (4.594 ms / 100) B = [40, 4, 20, 16] (stride (1280, 320, 1, 20)) dim = 0 fill_cnt = 5 0.901 -> 0.905 ( +0.44%) [ +0.22% +0.33% +0.00% / +0.44% +0.55% +0.67%] index_fill_ const : Elapsed 0.009 ms (0.903 ms / 100) 0.903 -> 0.905 ( +0.22%) [ +0.22% +0.22% +0.00% / +0.22% +0.44% +0.33%] index_fill_ linear : Elapsed 0.009 ms (0.905 ms / 100) 0.904 -> 0.904 ( +0.00%) [ +0.33% +0.22% +0.00% / +0.00% +0.22% +0.22%] index_fill_ reverse : Elapsed 0.009 ms (0.907 ms / 100) 0.901 -> 0.906 ( +0.55%) [ +0.11% +0.11% +0.00% / +0.55% +0.55% +0.55%] index_fill_ skip64 : Elapsed 0.009 ms (0.902 ms / 100) 0.902 -> 0.904 ( +0.22%) [ +0.00% +0.00% +0.11% / +0.22% +0.55% +0.44%] index_fill_ skip256 : Elapsed 0.009 ms (0.902 ms / 100) 0.901 -> 0.901 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.55% +0.55%] index_fill_ spread : Elapsed 0.009 ms (0.902 ms / 100) 0.902 -> 0.906 ( +0.44%) [ +0.00% +0.22% +0.11% / +0.44% +0.55% +0.44%] index_fill_ strided 3 : Elapsed 0.009 ms (0.902 ms / 100) 0.905 -> 0.905 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.00% +0.33% +0.11%] index_fill_ strided 5 : Elapsed 0.009 ms (0.906 ms / 100) 0.903 -> 0.904 ( +0.11%) [ +0.11% +0.33% +0.00% / +0.11% +0.33% +0.44%] index_fill_ strided 7 : Elapsed 0.009 ms (0.904 ms / 100) 0.902 -> 0.902 ( +0.00%) [ +0.11% +0.22% +0.00% / +0.00% +0.44% +0.55%] index_fill_ strided 8 : Elapsed 0.009 ms (0.903 ms / 100) 0.902 -> 0.903 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +0.44% +0.55%] index_fill_ strided 16 : Elapsed 0.009 ms (0.903 ms / 100) 0.906 -> 0.905 ( -0.11%) [ +0.00% +0.11% +0.00% / -0.11% +0.44% +0.33%] index_fill_ random : Elapsed 0.009 ms (0.906 ms / 100) 0.905 -> 0.906 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.55% +0.55%] index_fill_ random_sorted : Elapsed 0.009 ms (0.906 ms / 100) 0.903 -> 0.906 ( +0.33%) [ +0.00% +0.00% +0.00% / +0.33% +0.44% +0.44%] index_fill_ perm : Elapsed 0.009 ms (0.903 ms / 100) 0.906 -> 0.907 ( +0.11%) [ +0.11% +0.00% +0.11% / +0.11% +0.33% +0.22%] index_fill_ perm_sorted : Elapsed 0.009 ms (0.907 ms / 100) B = [40, 4, 20, 16] (stride (1280, 1, 64, 4)) A = [5, 4, 20, 16] (stride (1, 5, 20, 400)) dim = 0 1.516 -> 1.516 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.73% +0.53%] index_add_ linear : Elapsed 0.015 ms (1.517 ms / 100) 1.460 -> 1.461 ( +0.07%) [ +0.14% +0.21% +0.00% / +0.07% +0.75% +0.62%] index_copy_ linear : Elapsed 0.015 ms (1.462 ms / 100) 1.515 -> 1.517 ( +0.13%) [ +0.00% +0.07% +0.00% / +0.13% +0.53% +0.53%] index_add_ reverse : Elapsed 0.015 ms (1.515 ms / 100) 1.461 -> 1.468 ( +0.48%) [ +0.14% +0.21% +0.00% / +0.48% +0.55% +0.48%] index_copy_ reverse : Elapsed 0.015 ms (1.463 ms / 100) 1.519 -> 1.524 ( +0.33%) [ +0.07% +0.07% +0.00% / +0.33% +0.79% +0.72%] index_add_ spread : Elapsed 0.015 ms (1.520 ms / 100) 1.463 -> 1.465 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.68% +0.68%] index_copy_ spread : Elapsed 0.015 ms (1.464 ms / 100) 1.516 -> 1.518 ( +0.13%) [ +0.00% +0.07% +0.07% / +0.13% +0.59% +0.66%] index_add_ strided 3 : Elapsed 0.015 ms (1.516 ms / 100) 1.460 -> 1.468 ( +0.55%) [ +0.00% +0.07% +0.41% / +0.55% +0.62% +0.62%] index_copy_ strided 3 : Elapsed 0.015 ms (1.460 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.66% +0.53%] index_add_ strided 7 : Elapsed 0.015 ms (1.519 ms / 100) 1.465 -> 1.467 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.75% +0.55%] index_copy_ strided 7 : Elapsed 0.015 ms (1.467 ms / 100) 1.518 -> 1.520 ( +0.13%) [ +0.20% +0.00% +0.00% / +0.13% +0.66% +0.66%] index_add_ perm : Elapsed 0.015 ms (1.521 ms / 100) 1.465 -> 1.471 ( +0.41%) [ +0.00% +0.00% +0.00% / +0.41% +0.68% +0.68%] index_copy_ perm : Elapsed 0.015 ms (1.465 ms / 100) 1.517 -> 1.517 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.518 ms / 100) 1.463 -> 1.462 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.62% +0.48%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.464 ms / 100) 8.194 -> 8.199 ( +0.06%) [ +0.04% +0.00% +0.00% / +0.06% +0.52% +0.45%] index_select const : Elapsed 0.082 ms (8.197 ms / 100) 8.199 -> 8.208 ( +0.11%) [ +0.24% +0.09% +0.00% / +0.11% +0.41% +0.39%] index_select wrap : Elapsed 0.082 ms (8.219 ms / 100) 8.197 -> 8.213 ( +0.20%) [ +0.15% +0.00% +0.06% / +0.34% +0.39% +0.20%] index_select linear : Elapsed 0.082 ms (8.209 ms / 100) 8.188 -> 8.202 ( +0.17%) [ +0.34% +0.24% +0.00% / +0.17% +0.70% +0.26%] index_select reverse : Elapsed 0.082 ms (8.216 ms / 100) 8.179 -> 8.196 ( +0.21%) [ +0.34% +0.00% +0.28% / +0.21% +0.46% +0.53%] index_select skip64 : Elapsed 0.082 ms (8.207 ms / 100) 8.196 -> 8.210 ( +0.17%) [ +0.00% +0.11% +0.21% / +0.17% +0.45% +0.18%] index_select skip256 : Elapsed 0.082 ms (8.196 ms / 100) 8.194 -> 8.187 ( -0.09%) [ +0.16% +0.00% +0.06% / -0.09% +0.35% +0.31%] index_select spread : Elapsed 0.082 ms (8.207 ms / 100) 8.195 -> 8.211 ( +0.20%) [ +0.11% +0.07% +0.00% / +0.20% +0.48% +0.51%] index_select strided 3 : Elapsed 0.082 ms (8.204 ms / 100) 8.185 -> 8.211 ( +0.32%) [ +0.26% +0.35% +0.00% / +0.34% +0.32% +0.40%] index_select random : Elapsed 0.082 ms (8.206 ms / 100) 8.195 -> 8.205 ( +0.12%) [ +0.23% +0.05% +0.00% / +0.12% +0.31% +0.29%] index_select random_sorted : Elapsed 0.082 ms (8.214 ms / 100) B = [40, 4, 20, 16] (stride (1280, 20, 1, 80)) A = [5, 4, 20, 16] (stride (20, 100, 1, 400)) dim = 0 1.487 -> 1.488 ( +0.07%) [ +0.13% +0.00% +0.00% / +0.07% +0.54% +0.61%] index_add_ linear : Elapsed 0.015 ms (1.489 ms / 100) 1.438 -> 1.439 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.83% +0.83%] index_copy_ linear : Elapsed 0.014 ms (1.439 ms / 100) 1.481 -> 1.481 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.41% +0.34%] index_add_ reverse : Elapsed 0.015 ms (1.482 ms / 100) 1.430 -> 1.430 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.42% +0.35%] index_copy_ reverse : Elapsed 0.014 ms (1.431 ms / 100) 1.480 -> 1.485 ( +0.34%) [ +0.14% +0.07% +0.00% / +0.34% +0.54% +0.47%] index_add_ spread : Elapsed 0.015 ms (1.482 ms / 100) 1.431 -> 1.436 ( +0.35%) [ +0.00% +0.00% +0.00% / +0.84% +0.49% +0.35%] index_copy_ spread : Elapsed 0.014 ms (1.431 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.14% +0.27% +0.00% / +0.07% +0.54% +0.54%] index_add_ strided 3 : Elapsed 0.015 ms (1.482 ms / 100) 1.430 -> 1.430 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.56% +0.56%] index_copy_ strided 3 : Elapsed 0.014 ms (1.431 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.41% +0.41%] index_add_ strided 7 : Elapsed 0.015 ms (1.481 ms / 100) 1.439 -> 1.441 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.56% +0.49%] index_copy_ strided 7 : Elapsed 0.014 ms (1.439 ms / 100) 1.488 -> 1.491 ( +0.20%) [ +0.20% +0.07% +0.00% / +0.20% +0.54% +0.40%] index_add_ perm : Elapsed 0.015 ms (1.491 ms / 100) 1.436 -> 1.437 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.77% +0.63%] index_copy_ perm : Elapsed 0.014 ms (1.436 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.00% +0.00% +0.00% / +0.14% +0.47% +0.41%] index_add_ perm_sorted : Elapsed 0.015 ms (1.475 ms / 100) 1.426 -> 1.428 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.35% +0.42%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.427 ms / 100) 8.200 -> 8.210 ( +0.12%) [ +0.04% +0.13% +0.00% / +0.27% +0.12% +0.12%] index_select const : Elapsed 0.082 ms (8.203 ms / 100) 8.218 -> 8.219 ( +0.01%) [ +0.27% +0.00% +0.12% / +0.19% +0.01% +0.15%] index_select wrap : Elapsed 0.082 ms (8.240 ms / 100) 8.219 -> 8.226 ( +0.09%) [ +0.13% +0.30% +0.00% / +0.17% +0.09% +0.28%] index_select linear : Elapsed 0.082 ms (8.230 ms / 100) 8.210 -> 8.224 ( +0.17%) [ +0.00% +0.34% +0.00% / +0.28% +0.19% +0.17%] index_select reverse : Elapsed 0.082 ms (8.210 ms / 100) 8.195 -> 8.204 ( +0.11%) [ +0.00% +0.07% +0.24% / +0.23% +0.13% +0.11%] index_select skip64 : Elapsed 0.082 ms (8.195 ms / 100) 8.197 -> 8.205 ( +0.10%) [ +0.00% +0.20% +0.30% / +0.10% +0.21% +0.45%] index_select skip256 : Elapsed 0.082 ms (8.197 ms / 100) 8.222 -> 8.232 ( +0.12%) [ +0.19% +0.00% +0.28% / +0.12% +0.23% +0.22%] index_select spread : Elapsed 0.082 ms (8.238 ms / 100) 8.220 -> 8.224 ( +0.05%) [ +0.26% +0.00% +0.26% / +0.05% +0.12% +0.12%] index_select strided 3 : Elapsed 0.082 ms (8.241 ms / 100) 8.207 -> 8.219 ( +0.15%) [ +0.00% +0.24% +0.38% / +0.34% +0.15% +0.37%] index_select random : Elapsed 0.082 ms (8.207 ms / 100) 8.230 -> 8.223 ( -0.09%) [ +0.22% +0.12% +0.00% / -0.09% +0.27% +0.17%] index_select random_sorted : Elapsed 0.082 ms (8.248 ms / 100) B = [40, 4, 20, 16] (stride (1280, 1, 4, 80)) A = [5, 4, 20, 16] (stride (16, 80, 320, 1)) dim = 0 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.66%] index_add_ linear : Elapsed 0.015 ms (1.519 ms / 100) 1.477 -> 1.480 ( +0.20%) [ +0.07% +0.14% +0.00% / +0.20% +0.61% +0.68%] index_copy_ linear : Elapsed 0.015 ms (1.478 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.72% +0.66%] index_add_ reverse : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.61% +0.68%] index_copy_ reverse : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.59% +0.59%] index_add_ spread : Elapsed 0.015 ms (1.518 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.07% +0.20% +0.00% / +0.00% +0.61% +0.68%] index_copy_ spread : Elapsed 0.015 ms (1.473 ms / 100) 1.519 -> 1.518 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.53% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.00% +0.13% / +0.07% +0.59% +0.72%] index_add_ strided 7 : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.476 ( +0.20%) [ +0.14% +0.00% +0.00% / +0.20% +0.48% +0.54%] index_copy_ strided 7 : Elapsed 0.015 ms (1.475 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.66% +0.59%] index_add_ perm : Elapsed 0.015 ms (1.519 ms / 100) 1.471 -> 1.472 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.75%] index_copy_ perm : Elapsed 0.015 ms (1.472 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.59% +0.59%] index_add_ perm_sorted : Elapsed 0.015 ms (1.518 ms / 100) 1.471 -> 1.473 ( +0.14%) [ +0.00% +0.14% +0.07% / +0.14% +0.75% +0.68%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.471 ms / 100) 8.515 -> 8.533 ( +0.21%) [ +0.21% +0.00% +0.29% / +0.28% +0.21% +0.38%] index_select const : Elapsed 0.085 ms (8.533 ms / 100) 8.532 -> 8.553 ( +0.25%) [ +0.14% +0.15% +0.00% / +0.25% +0.47% +0.33%] index_select wrap : Elapsed 0.085 ms (8.544 ms / 100) 8.537 -> 8.528 ( -0.11%) [ +0.00% +0.07% +0.06% / -0.11% +0.11% +0.30%] index_select linear : Elapsed 0.085 ms (8.537 ms / 100) 8.532 -> 8.540 ( +0.09%) [ +0.14% +0.00% +0.28% / +0.09% +0.42% +0.34%] index_select reverse : Elapsed 0.085 ms (8.544 ms / 100) 8.521 -> 8.528 ( +0.08%) [ +0.02% +0.12% +0.00% / +0.45% +0.23% +0.08%] index_select skip64 : Elapsed 0.085 ms (8.523 ms / 100) 8.528 -> 8.523 ( -0.06%) [ +0.00% +0.20% +0.07% / -0.06% +0.22% +0.22%] index_select skip256 : Elapsed 0.085 ms (8.528 ms / 100) 8.538 -> 8.539 ( +0.01%) [ +0.08% +0.13% +0.00% / +0.01% +0.32% +0.22%] index_select spread : Elapsed 0.085 ms (8.545 ms / 100) 8.540 -> 8.550 ( +0.12%) [ +0.00% +0.09% +0.02% / +0.12% +0.32% +0.40%] index_select strided 3 : Elapsed 0.085 ms (8.540 ms / 100) 8.546 -> 8.536 ( -0.12%) [ +0.00% +0.02% +0.14% / -0.12% +0.23% +0.07%] index_select random : Elapsed 0.085 ms (8.546 ms / 100) 8.535 -> 8.548 ( +0.15%) [ +0.13% +0.00% +0.11% / +0.15% +0.36% +0.48%] index_select random_sorted : Elapsed 0.085 ms (8.546 ms / 100) B = [40, 4, 20, 16] (stride (320, 12800, 1, 20)) A = [5, 4, 20, 16] (stride (1280, 16, 64, 1)) dim = 0 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.59%] index_add_ linear : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.48%] index_copy_ linear : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.53% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.48% +0.54%] index_copy_ reverse : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.53% +0.53%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.41% +0.54%] index_copy_ spread : Elapsed 0.015 ms (1.474 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.00% +0.13% +0.00% / +0.07% +0.53% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.518 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.54% +0.54%] index_copy_ strided 3 : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.520 ( +0.13%) [ +0.07% +0.00% +0.00% / +0.13% +0.53% +0.53%] index_add_ strided 7 : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.475 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +0.54% +0.54%] index_copy_ strided 7 : Elapsed 0.015 ms (1.473 ms / 100) 1.519 -> 1.519 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.46% +0.53%] index_add_ perm : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.48% +0.54%] index_copy_ perm : Elapsed 0.015 ms (1.472 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.59%] index_add_ perm_sorted : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.48% +0.48%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.472 ms / 100) 8.499 -> 8.520 ( +0.25%) [ +0.27% +0.00% +0.19% / +0.40% +0.25% +0.31%] index_select const : Elapsed 0.085 ms (8.522 ms / 100) 8.528 -> 8.548 ( +0.23%) [ +0.14% +0.00% +0.09% / +0.26% +0.23% +0.25%] index_select wrap : Elapsed 0.085 ms (8.540 ms / 100) 8.538 -> 8.540 ( +0.02%) [ +0.00% +0.14% +0.00% / +0.18% +0.02% +0.25%] index_select linear : Elapsed 0.085 ms (8.538 ms / 100) 8.531 -> 8.543 ( +0.14%) [ +0.00% +0.05% +0.13% / +0.16% +0.25% +0.14%] index_select reverse : Elapsed 0.085 ms (8.531 ms / 100) 8.514 -> 8.511 ( -0.04%) [ +0.16% +0.00% +0.29% / +0.04% -0.01% -0.04%] index_select skip64 : Elapsed 0.085 ms (8.528 ms / 100) 8.505 -> 8.509 ( +0.05%) [ +0.00% +0.07% +0.14% / +0.05% +0.20% +0.29%] index_select skip256 : Elapsed 0.085 ms (8.505 ms / 100) 8.520 -> 8.522 ( +0.02%) [ +0.00% +0.21% +0.11% / +0.18% +0.02% +0.42%] index_select spread : Elapsed 0.085 ms (8.520 ms / 100) 8.529 -> 8.543 ( +0.16%) [ +0.35% +0.16% +0.00% / +0.16% +0.27% +0.16%] index_select strided 3 : Elapsed 0.086 ms (8.559 ms / 100) 8.538 -> 8.537 ( -0.01%) [ +0.15% +0.00% +0.16% / +0.07% +0.15% -0.01%] index_select random : Elapsed 0.086 ms (8.551 ms / 100) 8.538 -> 8.529 ( -0.11%) [ +0.00% +0.11% +0.01% / +0.11% -0.11% +0.34%] index_select random_sorted : Elapsed 0.085 ms (8.538 ms / 100) B = [40, 4, 20, 16] (stride (1, 12800, 40, 800)) A = [5, 4, 20, 16] (stride (1, 5, 20, 400)) dim = 0 1.621 -> 1.623 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.68% +0.62%] index_add_ linear : Elapsed 0.016 ms (1.623 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.13% +0.06% +0.00% / +0.13% +0.70% +0.64%] index_copy_ linear : Elapsed 0.016 ms (1.575 ms / 100) 1.619 -> 1.619 ( +0.00%) [ +0.00% +0.06% +0.12% / +0.00% +0.49% +0.68%] index_add_ reverse : Elapsed 0.016 ms (1.619 ms / 100) 1.571 -> 1.570 ( -0.06%) [ +0.32% +0.00% +0.70% / -0.06% +0.45% +0.57%] index_copy_ reverse : Elapsed 0.016 ms (1.576 ms / 100) 1.633 -> 1.634 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.49% +0.73%] index_add_ spread : Elapsed 0.016 ms (1.635 ms / 100) 1.579 -> 1.583 ( +0.25%) [ +0.19% +0.00% +0.25% / +0.25% +0.63% +0.82%] index_copy_ spread : Elapsed 0.016 ms (1.582 ms / 100) 1.622 -> 1.624 ( +0.12%) [ +0.06% +0.12% +0.00% / +0.12% +0.55% +0.55%] index_add_ strided 3 : Elapsed 0.016 ms (1.623 ms / 100) 1.575 -> 1.576 ( +0.06%) [ +0.00% +0.13% +0.00% / +0.06% +0.57% +0.57%] index_copy_ strided 3 : Elapsed 0.016 ms (1.575 ms / 100) 1.632 -> 1.635 ( +0.18%) [ +0.00% +0.18% +0.06% / +0.18% +0.74% +0.67%] index_add_ strided 7 : Elapsed 0.016 ms (1.632 ms / 100) 1.581 -> 1.584 ( +0.19%) [ +0.00% +0.25% +0.06% / +0.19% +0.89% +0.70%] index_copy_ strided 7 : Elapsed 0.016 ms (1.581 ms / 100) 1.633 -> 1.631 ( -0.12%) [ +0.00% +0.12% +0.12% / -0.12% +0.67% +0.73%] index_add_ perm : Elapsed 0.016 ms (1.633 ms / 100) 1.582 -> 1.580 ( -0.13%) [ +0.00% +0.19% +0.63% / -0.13% +0.76% +0.70%] index_copy_ perm : Elapsed 0.016 ms (1.582 ms / 100) 1.622 -> 1.622 ( +0.00%) [ +0.00% +0.12% +0.06% / +0.00% +0.62% +0.62%] index_add_ perm_sorted : Elapsed 0.016 ms (1.622 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.00% +0.06% +0.19% / +0.13% +0.83% +0.64%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.573 ms / 100) 8.594 -> 8.575 ( -0.22%) [ +0.23% +0.05% +0.00% / -0.22% +0.41% +0.13%] index_select const : Elapsed 0.086 ms (8.614 ms / 100) 8.584 -> 8.598 ( +0.16%) [ +0.00% +0.09% +0.15% / +0.16% +0.36% +0.52%] index_select wrap : Elapsed 0.086 ms (8.584 ms / 100) 8.602 -> 8.589 ( -0.15%) [ +0.00% +0.13% +0.03% / -0.15% +0.00% +0.15%] index_select linear : Elapsed 0.086 ms (8.602 ms / 100) 8.594 -> 8.572 ( -0.26%) [ +0.00% +0.21% +0.07% / -0.26% +0.09% +0.49%] index_select reverse : Elapsed 0.086 ms (8.594 ms / 100) 8.595 -> 8.591 ( -0.05%) [ +0.07% +0.06% +0.00% / -0.05% +0.16% +0.05%] index_select skip64 : Elapsed 0.086 ms (8.601 ms / 100) 8.585 -> 8.582 ( -0.03%) [ +0.00% +0.21% +0.26% / -0.03% +0.22% +0.38%] index_select skip256 : Elapsed 0.086 ms (8.585 ms / 100) 8.590 -> 8.595 ( +0.06%) [ +0.13% +0.16% +0.00% / +0.06% +0.22% +0.26%] index_select spread : Elapsed 0.086 ms (8.601 ms / 100) 8.594 -> 8.606 ( +0.14%) [ +0.00% +0.06% +0.07% / +0.27% +0.31% +0.14%] index_select strided 3 : Elapsed 0.086 ms (8.594 ms / 100) 8.584 -> 8.598 ( +0.16%) [ +0.17% +0.05% +0.00% / +0.16% +0.29% +0.34%] index_select random : Elapsed 0.086 ms (8.599 ms / 100) 8.577 -> 8.589 ( +0.14%) [ +0.00% +0.13% +0.13% / +0.14% +0.59% +0.52%] index_select random_sorted : Elapsed 0.086 ms (8.577 ms / 100) B = [40, 4, 20, 16] (stride (4, 1, 2560, 160)) A = [5, 4, 20, 16] (stride (1280, 320, 1, 20)) dim = 0 1.425 -> 1.424 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.42% +0.42%] index_add_ linear : Elapsed 0.014 ms (1.425 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.14% +0.00% +0.51% / +0.07% +0.36% +0.36%] index_copy_ linear : Elapsed 0.014 ms (1.382 ms / 100) 1.424 -> 1.424 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.42% +0.49%] index_add_ reverse : Elapsed 0.014 ms (1.425 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.22% +0.22%] index_copy_ reverse : Elapsed 0.014 ms (1.383 ms / 100) 1.424 -> 1.424 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.56% +0.49%] index_add_ spread : Elapsed 0.014 ms (1.425 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.36% +1.30%] index_copy_ spread : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.21% +0.14% +0.00% / +0.07% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.014 ms (1.425 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.43% +0.43%] index_copy_ strided 3 : Elapsed 0.014 ms (1.380 ms / 100) 1.424 -> 1.423 ( -0.07%) [ +0.07% +0.14% +0.00% / -0.07% +0.49% +0.49%] index_add_ strided 7 : Elapsed 0.014 ms (1.425 ms / 100) 1.381 -> 1.380 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.29% +0.36%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.424 -> 1.424 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.49% +0.49%] index_add_ perm : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.00% +0.80% / +0.07% +0.36% +0.43%] index_copy_ perm : Elapsed 0.014 ms (1.381 ms / 100) 1.424 -> 1.425 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.49% +0.49%] index_add_ perm_sorted : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.07% +0.22% / +0.07% +0.36% +0.36%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 8.187 -> 8.207 ( +0.24%) [ +0.10% +0.00% +0.22% / +0.24% +0.39% +0.45%] index_select const : Elapsed 0.082 ms (8.195 ms / 100) 8.219 -> 8.214 ( -0.06%) [ +0.00% +0.06% +0.02% / -0.06% -0.06% +0.19%] index_select wrap : Elapsed 0.082 ms (8.219 ms / 100) 8.219 -> 8.221 ( +0.02%) [ +0.21% +0.00% +0.12% / +0.11% +0.02% +0.23%] index_select linear : Elapsed 0.082 ms (8.236 ms / 100) 8.210 -> 8.218 ( +0.10%) [ +0.12% +0.00% +0.19% / +0.12% +0.13% +0.10%] index_select reverse : Elapsed 0.082 ms (8.220 ms / 100) 8.203 -> 8.209 ( +0.07%) [ +0.00% +0.10% +0.00% / +0.07% +0.38% +0.24%] index_select skip64 : Elapsed 0.082 ms (8.203 ms / 100) 8.193 -> 8.192 ( -0.01%) [ +0.00% +0.11% +0.41% / +0.20% -0.01% +0.28%] index_select skip256 : Elapsed 0.082 ms (8.193 ms / 100) 8.208 -> 8.209 ( +0.01%) [ +0.11% +0.00% +0.16% / +0.01% +0.15% +0.17%] index_select spread : Elapsed 0.082 ms (8.217 ms / 100) 8.218 -> 8.225 ( +0.09%) [ +0.04% +0.00% +0.13% / +0.10% +0.09% +0.22%] index_select strided 3 : Elapsed 0.082 ms (8.221 ms / 100) 8.224 -> 8.212 ( -0.15%) [ +0.07% +0.27% +0.00% / +0.04% -0.04% -0.15%] index_select random : Elapsed 0.082 ms (8.230 ms / 100) 8.207 -> 8.214 ( +0.09%) [ +0.11% +0.00% +0.11% / +0.09% +0.11% +0.22%] index_select random_sorted : Elapsed 0.082 ms (8.216 ms / 100) B = [40, 4, 20, 16] (stride (4, 1, 2560, 160)) A = [5, 4, 20, 16] (stride (64, 16, 320, 1)) dim = 0 1.421 -> 1.422 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.35% +0.49%] index_add_ linear : Elapsed 0.014 ms (1.421 ms / 100) 1.373 -> 1.373 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.51% +0.66%] index_copy_ linear : Elapsed 0.014 ms (1.373 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.00% +0.00% +0.07% / +0.14% +0.49% +0.49%] index_add_ reverse : Elapsed 0.014 ms (1.421 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.00% +0.15% +0.22% / +0.15% +0.66% +0.66%] index_copy_ reverse : Elapsed 0.014 ms (1.373 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.42% +0.42%] index_add_ spread : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.66% +0.66%] index_copy_ spread : Elapsed 0.014 ms (1.375 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.49% +0.49%] index_add_ strided 3 : Elapsed 0.014 ms (1.420 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.58% +0.51%] index_copy_ strided 3 : Elapsed 0.014 ms (1.375 ms / 100) 1.418 -> 1.420 ( +0.14%) [ +0.28% +0.14% +0.00% / +0.14% +0.63% +0.63%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.375 -> 1.375 ( +0.00%) [ +0.44% +0.00% +0.15% / +0.00% +0.51% +0.44%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.419 -> 1.420 ( +0.07%) [ +0.21% +0.14% +0.00% / +0.07% +0.70% +0.63%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.373 -> 1.372 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.87% +0.80%] index_copy_ perm : Elapsed 0.014 ms (1.373 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.56% +0.63%] index_add_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 1.373 -> 1.373 ( +0.00%) [ +0.00% +0.07% +0.29% / +0.00% +0.66% +0.66%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.373 ms / 100) 8.185 -> 8.186 ( +0.01%) [ +0.00% +0.09% +0.11% / +0.04% +0.37% +0.01%] index_select const : Elapsed 0.082 ms (8.185 ms / 100) 8.211 -> 8.221 ( +0.12%) [ +0.17% +0.00% +0.28% / +0.12% +0.45% +0.28%] index_select wrap : Elapsed 0.082 ms (8.225 ms / 100) 8.202 -> 8.226 ( +0.29%) [ +0.04% +0.00% +0.21% / +0.29% +0.35% +0.35%] index_select linear : Elapsed 0.082 ms (8.205 ms / 100) 8.208 -> 8.205 ( -0.04%) [ +0.04% +0.00% +0.10% / -0.04% +0.33% +0.17%] index_select reverse : Elapsed 0.082 ms (8.211 ms / 100) 8.193 -> 8.186 ( -0.09%) [ +0.00% +0.18% +0.10% / -0.09% +0.20% +0.27%] index_select skip64 : Elapsed 0.082 ms (8.193 ms / 100) 8.187 -> 8.205 ( +0.22%) [ +0.00% +0.01% +0.27% / +0.22% +0.27% +0.50%] index_select skip256 : Elapsed 0.082 ms (8.187 ms / 100) 8.204 -> 8.209 ( +0.06%) [ +0.01% +0.15% +0.00% / +0.15% +0.15% +0.06%] index_select spread : Elapsed 0.082 ms (8.205 ms / 100) 8.209 -> 8.204 ( -0.06%) [ +0.00% +0.12% +0.23% / -0.06% +0.18% +0.30%] index_select strided 3 : Elapsed 0.082 ms (8.209 ms / 100) 8.207 -> 8.216 ( +0.11%) [ +0.00% +0.06% +0.22% / +0.11% +0.32% +0.37%] index_select random : Elapsed 0.082 ms (8.207 ms / 100) 8.213 -> 8.219 ( +0.07%) [ +0.00% +0.13% +0.21% / +0.07% +0.16% +0.16%] index_select random_sorted : Elapsed 0.082 ms (8.213 ms / 100) B = [40, 4, 20, 16] (stride (80, 20, 1, 3200)) A = [5, 4, 20, 16] (stride (320, 1600, 1, 20)) dim = 0 1.424 -> 1.425 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.56% +0.77%] index_add_ linear : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.29% +0.36%] index_copy_ linear : Elapsed 0.014 ms (1.380 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.49% +0.56%] index_add_ reverse : Elapsed 0.014 ms (1.424 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.00% +0.07% +0.15% / +0.15% +0.51% +0.44%] index_copy_ reverse : Elapsed 0.014 ms (1.379 ms / 100) 1.423 -> 1.424 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.49% +0.56%] index_add_ spread : Elapsed 0.014 ms (1.424 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.29% +0.36%] index_copy_ spread : Elapsed 0.014 ms (1.381 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.56% +0.63%] index_add_ strided 3 : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.43% +0.36%] index_copy_ strided 3 : Elapsed 0.014 ms (1.381 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.56% +0.63%] index_add_ strided 7 : Elapsed 0.014 ms (1.424 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.58% +0.51%] index_copy_ strided 7 : Elapsed 0.014 ms (1.379 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.56% +0.77%] index_add_ perm : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.36% +0.36%] index_copy_ perm : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.70% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.07% +0.14% / +0.00% +0.36% +0.36%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 8.190 -> 8.183 ( -0.09%) [ +0.07% +0.43% +0.00% / -0.09% +0.50% +0.37%] index_select const : Elapsed 0.082 ms (8.196 ms / 100) 8.209 -> 8.206 ( -0.04%) [ +0.16% +0.07% +0.00% / -0.04% +0.27% +0.23%] index_select wrap : Elapsed 0.082 ms (8.222 ms / 100) 8.204 -> 8.226 ( +0.27%) [ +0.29% +0.02% +0.00% / +0.27% +0.32% +0.38%] index_select linear : Elapsed 0.082 ms (8.228 ms / 100) 8.201 -> 8.198 ( -0.04%) [ +0.17% +0.01% +0.00% / -0.04% +0.11% +0.23%] index_select reverse : Elapsed 0.082 ms (8.215 ms / 100) 8.192 -> 8.206 ( +0.17%) [ +0.02% +0.00% +0.20% / +0.18% +0.17% +0.32%] index_select skip64 : Elapsed 0.082 ms (8.194 ms / 100) 8.185 -> 8.202 ( +0.21%) [ +0.17% +0.00% +0.23% / +0.21% +0.24% +0.33%] index_select skip256 : Elapsed 0.082 ms (8.199 ms / 100) 8.205 -> 8.204 ( -0.01%) [ +0.00% +0.09% +0.10% / -0.01% +0.24% +0.65%] index_select spread : Elapsed 0.082 ms (8.205 ms / 100) 8.216 -> 8.206 ( -0.12%) [ +0.00% +0.35% +0.10% / -0.07% +0.15% -0.12%] index_select strided 3 : Elapsed 0.082 ms (8.216 ms / 100) 8.216 -> 8.215 ( -0.01%) [ +0.00% +0.00% +0.16% / +0.07% +0.00% -0.01%] index_select random : Elapsed 0.082 ms (8.216 ms / 100) 8.209 -> 8.219 ( +0.12%) [ +0.00% +0.12% +0.23% / +0.12% +0.12% +0.12%] index_select random_sorted : Elapsed 0.082 ms (8.209 ms / 100) B = [40, 4, 20, 16] (stride (80, 20, 1, 3200)) A = [5, 4, 20, 16] (stride (64, 1, 320, 4)) dim = 0 1.422 -> 1.421 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.70% +0.63%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.378 -> 1.380 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.58% +0.58%] index_copy_ linear : Elapsed 0.014 ms (1.380 ms / 100) 1.423 -> 1.422 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.63% +0.70%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.51% +0.65%] index_copy_ reverse : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.21% +0.14% +0.00% / +0.07% +0.77% +0.77%] index_add_ spread : Elapsed 0.014 ms (1.424 ms / 100) 1.381 -> 1.383 ( +0.14%) [ +0.14% +0.22% +0.00% / +0.14% +0.87% +0.72%] index_copy_ spread : Elapsed 0.014 ms (1.383 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.28% +0.00% +0.00% / +0.07% +0.70% +0.70%] index_add_ strided 3 : Elapsed 0.014 ms (1.425 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.07% +0.00% +0.15% / +0.07% +0.51% +0.44%] index_copy_ strided 3 : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.77% +0.63%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.00% +0.07% +0.22% / +0.00% +0.73% +0.44%] index_copy_ strided 7 : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.14% +0.07% +0.00% / -0.07% +0.70% +0.77%] index_add_ perm : Elapsed 0.014 ms (1.424 ms / 100) 1.378 -> 1.380 ( +0.15%) [ +0.00% +0.07% +0.22% / +0.15% +0.58% +0.65%] index_copy_ perm : Elapsed 0.014 ms (1.378 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.70% +0.77%] index_add_ perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.00% +0.15% +0.22% / +0.07% +0.58% +0.73%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.379 ms / 100) 8.192 -> 8.194 ( +0.02%) [ +0.05% +0.11% +0.00% / +0.02% +0.46% +0.26%] index_select const : Elapsed 0.082 ms (8.196 ms / 100) 8.202 -> 8.221 ( +0.23%) [ +0.02% +0.00% +0.05% / +0.33% +0.32% +0.23%] index_select wrap : Elapsed 0.082 ms (8.204 ms / 100) 8.191 -> 8.207 ( +0.20%) [ +0.27% +0.00% +0.04% / +0.20% +0.51% +0.38%] index_select linear : Elapsed 0.082 ms (8.213 ms / 100) 8.193 -> 8.212 ( +0.23%) [ +0.00% +0.10% +0.04% / +0.26% +0.34% +0.23%] index_select reverse : Elapsed 0.082 ms (8.193 ms / 100) 8.188 -> 8.185 ( -0.04%) [ +0.02% +0.17% +0.00% / -0.04% +0.38% +0.21%] index_select skip64 : Elapsed 0.082 ms (8.190 ms / 100) 8.197 -> 8.197 ( +0.00%) [ +0.00% +0.02% +0.09% / +0.02% +0.00% +0.32%] index_select skip256 : Elapsed 0.082 ms (8.197 ms / 100) 8.199 -> 8.210 ( +0.13%) [ +0.00% +0.07% +0.13% / +0.13% +0.33% +0.28%] index_select spread : Elapsed 0.082 ms (8.199 ms / 100) 8.195 -> 8.215 ( +0.24%) [ +0.00% +0.24% +0.10% / +0.24% +0.43% +0.48%] index_select strided 3 : Elapsed 0.082 ms (8.195 ms / 100) 8.198 -> 8.203 ( +0.06%) [ +0.06% +0.00% +0.16% / +0.06% +0.45% +0.32%] index_select random : Elapsed 0.082 ms (8.203 ms / 100) 8.194 -> 8.219 ( +0.31%) [ +0.00% +0.04% +0.09% / +0.31% +0.38% +0.51%] index_select random_sorted : Elapsed 0.082 ms (8.194 ms / 100) B = [40, 4, 20, 16] (stride (20, 800, 1, 3200)) A = [5, 4, 20, 16] (stride (20, 1600, 1, 100)) dim = 0 1.577 -> 1.577 ( +0.00%) [ +0.00% +0.19% +0.13% / +0.00% +0.38% +0.44%] index_add_ linear : Elapsed 0.016 ms (1.577 ms / 100) 1.528 -> 1.529 ( +0.07%) [ +0.00% +0.52% +0.07% / +0.07% +0.72% +0.65%] index_copy_ linear : Elapsed 0.015 ms (1.528 ms / 100) 1.578 -> 1.579 ( +0.06%) [ +0.00% +0.06% +0.13% / +0.06% +0.38% +0.38%] index_add_ reverse : Elapsed 0.016 ms (1.578 ms / 100) 1.525 -> 1.531 ( +0.39%) [ +0.00% +0.13% +0.00% / +0.52% +0.39% +0.52%] index_copy_ reverse : Elapsed 0.015 ms (1.525 ms / 100) 1.577 -> 1.575 ( -0.13%) [ +0.13% +0.13% +0.00% / -0.13% +0.19% +0.19%] index_add_ spread : Elapsed 0.016 ms (1.579 ms / 100) 1.525 -> 1.526 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.46% +0.59%] index_copy_ spread : Elapsed 0.015 ms (1.525 ms / 100) 1.572 -> 1.574 ( +0.13%) [ +0.45% +0.45% +0.00% / +0.13% +0.51% +0.45%] index_add_ strided 3 : Elapsed 0.016 ms (1.579 ms / 100) 1.524 -> 1.531 ( +0.46%) [ +0.00% +0.07% +0.07% / +0.46% +0.59% +0.59%] index_copy_ strided 3 : Elapsed 0.015 ms (1.524 ms / 100) 1.577 -> 1.577 ( +0.00%) [ +0.00% +0.06% +0.19% / +0.00% +0.51% +0.44%] index_add_ strided 7 : Elapsed 0.016 ms (1.577 ms / 100) 1.525 -> 1.528 ( +0.20%) [ +0.13% +0.07% +0.00% / +0.20% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.015 ms (1.527 ms / 100) 1.577 -> 1.577 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.57% +0.44%] index_add_ perm : Elapsed 0.016 ms (1.577 ms / 100) 1.529 -> 1.529 ( +0.00%) [ +0.00% +0.20% +0.00% / +0.00% +0.72% +0.59%] index_copy_ perm : Elapsed 0.015 ms (1.529 ms / 100) 1.576 -> 1.584 ( +0.51%) [ +0.00% +0.06% +0.19% / +0.51% +0.57% +0.63%] index_add_ perm_sorted : Elapsed 0.016 ms (1.576 ms / 100) 1.526 -> 1.529 ( +0.20%) [ +0.00% +0.13% +0.13% / +0.20% +0.66% +0.66%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.526 ms / 100) 8.522 -> 8.540 ( +0.21%) [ +0.09% +0.00% +0.20% / +0.21% +0.25% +0.22%] index_select const : Elapsed 0.085 ms (8.530 ms / 100) 8.537 -> 8.542 ( +0.06%) [ +0.21% +0.00% +0.27% / +0.06% +0.54% +0.16%] index_select wrap : Elapsed 0.086 ms (8.555 ms / 100) 8.535 -> 8.555 ( +0.23%) [ +0.14% +0.15% +0.00% / +0.28% +0.23% +0.23%] index_select linear : Elapsed 0.085 ms (8.547 ms / 100) 8.526 -> 8.553 ( +0.32%) [ +0.36% +0.38% +0.00% / +0.32% +0.50% +0.35%] index_select reverse : Elapsed 0.086 ms (8.557 ms / 100) 8.519 -> 8.538 ( +0.22%) [ +0.39% +0.14% +0.00% / +0.22% +0.25% +0.26%] index_select skip64 : Elapsed 0.086 ms (8.552 ms / 100) 8.537 -> 8.532 ( -0.06%) [ +0.01% +0.07% +0.00% / -0.06% +0.11% +0.16%] index_select skip256 : Elapsed 0.085 ms (8.538 ms / 100) 8.548 -> 8.566 ( +0.21%) [ +0.02% +0.12% +0.00% / +0.39% +0.21% +0.33%] index_select spread : Elapsed 0.085 ms (8.550 ms / 100) 8.554 -> 8.543 ( -0.13%) [ +0.00% +0.23% +0.18% / +0.04% -0.02% -0.13%] index_select strided 3 : Elapsed 0.086 ms (8.554 ms / 100) 8.532 -> 8.557 ( +0.29%) [ +0.00% +0.12% +0.09% / +0.29% +0.39% +0.47%] index_select random : Elapsed 0.085 ms (8.532 ms / 100) 8.553 -> 8.557 ( +0.05%) [ +0.00% +0.07% +0.26% / +0.08% +0.15% +0.05%] index_select random_sorted : Elapsed 0.086 ms (8.553 ms / 100) B = [40, 4, 20, 16] (stride (1, 800, 40, 3200)) A = [5, 4, 20, 16] (stride (4, 1, 20, 400)) dim = 0 1.515 -> 1.516 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.86% +0.79%] index_add_ linear : Elapsed 0.015 ms (1.515 ms / 100) 1.465 -> 1.466 ( +0.07%) [ +0.00% +0.07% +0.14% / +0.07% +0.89% +0.68%] index_copy_ linear : Elapsed 0.015 ms (1.465 ms / 100) 1.515 -> 1.518 ( +0.20%) [ +0.13% +0.00% +0.07% / +0.20% +0.66% +0.66%] index_add_ reverse : Elapsed 0.015 ms (1.517 ms / 100) 1.465 -> 1.466 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.55% +0.68%] index_copy_ reverse : Elapsed 0.015 ms (1.466 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.53%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.466 -> 1.469 ( +0.20%) [ +0.07% +0.14% +0.00% / +0.20% +0.68% +0.61%] index_copy_ spread : Elapsed 0.015 ms (1.467 ms / 100) 1.516 -> 1.518 ( +0.13%) [ +0.13% +0.07% +0.00% / +0.13% +0.59% +0.53%] index_add_ strided 3 : Elapsed 0.015 ms (1.518 ms / 100) 1.466 -> 1.466 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.61% +0.55%] index_copy_ strided 3 : Elapsed 0.015 ms (1.466 ms / 100) 1.523 -> 1.523 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.523 ms / 100) 1.468 -> 1.470 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.82% +0.68%] index_copy_ strided 7 : Elapsed 0.015 ms (1.469 ms / 100) 1.522 -> 1.523 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +1.58% +0.72%] index_add_ perm : Elapsed 0.015 ms (1.524 ms / 100) 1.467 -> 1.468 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +1.09% +1.16%] index_copy_ perm : Elapsed 0.015 ms (1.467 ms / 100) 1.515 -> 1.516 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.73% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.516 ms / 100) 1.465 -> 1.466 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.61% +0.61%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.465 ms / 100) 8.252 -> 8.257 ( +0.06%) [ +0.00% +0.18% +0.27% / +0.06% +0.35% +0.38%] index_select const : Elapsed 0.083 ms (8.252 ms / 100) 8.261 -> 8.270 ( +0.11%) [ +0.25% +0.10% +0.00% / +0.11% +0.38% +0.29%] index_select wrap : Elapsed 0.083 ms (8.282 ms / 100) 8.264 -> 8.258 ( -0.07%) [ +0.15% +0.10% +0.00% / -0.07% +0.42% +0.16%] index_select linear : Elapsed 0.083 ms (8.276 ms / 100) 8.251 -> 8.254 ( +0.04%) [ +0.00% +0.16% +0.10% / +0.04% +0.39% +0.48%] index_select reverse : Elapsed 0.083 ms (8.251 ms / 100) 8.267 -> 8.247 ( -0.24%) [ +0.02% +0.07% +0.00% / -0.24% +0.06% +0.12%] index_select skip64 : Elapsed 0.083 ms (8.269 ms / 100) 8.246 -> 8.265 ( +0.23%) [ +0.19% +0.46% +0.00% / +0.23% +0.24% +0.35%] index_select skip256 : Elapsed 0.083 ms (8.262 ms / 100) 8.262 -> 8.254 ( -0.10%) [ +0.19% +0.00% +0.19% / -0.10% +0.33% +0.29%] index_select spread : Elapsed 0.083 ms (8.278 ms / 100) 8.256 -> 8.261 ( +0.06%) [ +0.28% +0.04% +0.00% / +0.06% +0.29% +0.42%] index_select strided 3 : Elapsed 0.083 ms (8.279 ms / 100) 8.257 -> 8.275 ( +0.22%) [ +0.11% +0.10% +0.00% / +0.28% +0.22% +0.34%] index_select random : Elapsed 0.083 ms (8.266 ms / 100) 8.277 -> 8.265 ( -0.14%) [ +0.04% +0.00% +0.02% / -0.14% +0.27% +0.40%] index_select random_sorted : Elapsed 0.083 ms (8.280 ms / 100) out_shape = [5, 40, 20, 16] in_shape = [5, 4, 20, 16] idx_dim = 1 B = [5, 40, 20, 16] (stride (12800, 320, 16, 1)) A = [5, 4, 20, 16] (stride (20, 100, 1, 400)) dim = 1 1.244 -> 1.244 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.48% +0.56%] index_add_ linear : Elapsed 0.012 ms (1.244 ms / 100) 1.204 -> 1.206 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.66% +0.50%] index_copy_ linear : Elapsed 0.012 ms (1.204 ms / 100) 1.259 -> 1.261 ( +0.16%) [ +0.16% +0.24% +0.00% / +0.16% +0.87% +0.71%] index_add_ reverse : Elapsed 0.013 ms (1.261 ms / 100) 1.216 -> 1.216 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.66%] index_copy_ reverse : Elapsed 0.012 ms (1.216 ms / 100) 1.247 -> 1.247 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.64% +0.64%] index_add_ spread : Elapsed 0.012 ms (1.249 ms / 100) 1.204 -> 1.204 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.66%] index_copy_ spread : Elapsed 0.012 ms (1.204 ms / 100) 1.243 -> 1.244 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.64% +0.80%] index_add_ strided 3 : Elapsed 0.012 ms (1.245 ms / 100) 1.203 -> 1.203 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.75% +0.67%] index_copy_ strided 3 : Elapsed 0.012 ms (1.203 ms / 100) 1.241 -> 1.244 ( +0.24%) [ +0.00% +0.32% +0.32% / +0.24% +0.48% +0.97%] index_add_ strided 7 : Elapsed 0.012 ms (1.241 ms / 100) 1.200 -> 1.205 ( +0.42%) [ +0.00% +0.42% +0.42% / +0.42% +0.58% +1.08%] index_copy_ strided 7 : Elapsed 0.012 ms (1.200 ms / 100) 1.248 -> 1.249 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.56% +0.64%] index_add_ perm : Elapsed 0.012 ms (1.249 ms / 100) 1.204 -> 1.206 ( +0.17%) [ +0.00% +0.08% +0.00% / +0.17% +0.66% +0.50%] index_copy_ perm : Elapsed 0.012 ms (1.204 ms / 100) 1.260 -> 1.262 ( +0.16%) [ +0.16% +0.24% +0.00% / +0.16% +0.56% +0.56%] index_add_ perm_sorted : Elapsed 0.013 ms (1.262 ms / 100) 1.216 -> 1.216 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.66%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.216 ms / 100) 8.697 -> 8.689 ( -0.09%) [ +0.00% +0.13% +0.14% / -0.05% +0.31% -0.09%] index_select const : Elapsed 0.087 ms (8.697 ms / 100) 8.715 -> 8.700 ( -0.17%) [ +0.00% +0.09% +0.08% / +0.01% +0.06% -0.17%] index_select wrap : Elapsed 0.087 ms (8.715 ms / 100) 8.714 -> 8.700 ( -0.16%) [ +0.05% +0.00% +0.08% / +0.03% -0.07% -0.16%] index_select linear : Elapsed 0.087 ms (8.718 ms / 100) 8.716 -> 8.702 ( -0.16%) [ +0.00% +0.20% +0.02% / -0.01% -0.10% -0.16%] index_select reverse : Elapsed 0.087 ms (8.716 ms / 100) 8.699 -> 8.689 ( -0.11%) [ +0.10% +0.09% +0.00% / -0.07% -0.10% -0.11%] index_select skip64 : Elapsed 0.087 ms (8.708 ms / 100) 8.701 -> 8.686 ( -0.17%) [ +0.16% +0.06% +0.00% / +0.03% -0.17% -0.13%] index_select skip256 : Elapsed 0.087 ms (8.715 ms / 100) 8.723 -> 8.724 ( +0.01%) [ +0.00% +0.00% +0.00% / +0.11% +0.01% +0.03%] index_select spread : Elapsed 0.087 ms (8.723 ms / 100) 8.700 -> 8.707 ( +0.08%) [ +0.17% +0.00% +0.37% / +0.32% +0.08% +0.08%] index_select strided 3 : Elapsed 0.087 ms (8.715 ms / 100) 8.712 -> 8.717 ( +0.06%) [ +0.02% +0.08% +0.00% / +0.10% +0.11% +0.06%] index_select random : Elapsed 0.087 ms (8.714 ms / 100) 8.706 -> 8.725 ( +0.22%) [ +0.00% +0.46% +0.17% / +0.33% +0.33% +0.22%] index_select random_sorted : Elapsed 0.087 ms (8.706 ms / 100) B = [5, 40, 20, 16] (stride (12800, 320, 1, 20)) A = [5, 4, 20, 16] (stride (20, 100, 1, 400)) dim = 1 1.334 -> 1.334 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.52% +0.60%] index_add_ linear : Elapsed 0.013 ms (1.335 ms / 100) 1.293 -> 1.293 ( +0.00%) [ +0.00% +0.08% +0.15% / +0.00% +0.62% +0.62%] index_copy_ linear : Elapsed 0.013 ms (1.293 ms / 100) 1.335 -> 1.335 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.37%] index_add_ reverse : Elapsed 0.013 ms (1.335 ms / 100) 1.292 -> 1.293 ( +0.08%) [ +0.15% +0.23% +0.00% / +0.08% +0.62% +0.70%] index_copy_ reverse : Elapsed 0.013 ms (1.294 ms / 100) 1.348 -> 1.348 ( +0.00%) [ +0.07% +0.37% +0.00% / +0.00% +0.45% +0.45%] index_add_ spread : Elapsed 0.013 ms (1.349 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.46% +0.15%] index_copy_ spread : Elapsed 0.013 ms (1.318 ms / 100) 1.356 -> 1.357 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.52%] index_add_ strided 3 : Elapsed 0.014 ms (1.357 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.08% +0.15% +0.00% / +0.00% +0.61% +0.53%] index_copy_ strided 3 : Elapsed 0.013 ms (1.320 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.38% +0.30%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.287 -> 1.287 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.39% +0.47%] index_copy_ strided 7 : Elapsed 0.013 ms (1.287 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.52% +0.52%] index_add_ perm : Elapsed 0.013 ms (1.346 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.38% +0.46%] index_copy_ perm : Elapsed 0.013 ms (1.318 ms / 100) 1.334 -> 1.335 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.52% +0.52%] index_add_ perm_sorted : Elapsed 0.013 ms (1.335 ms / 100) 1.301 -> 1.303 ( +0.15%) [ +0.00% +0.08% +0.00% / +0.15% +0.46% +0.38%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.301 ms / 100) 9.162 -> 9.152 ( -0.11%) [ +0.09% +0.00% +0.09% / +0.37% -0.04% -0.11%] index_select const : Elapsed 0.092 ms (9.170 ms / 100) 9.175 -> 9.173 ( -0.02%) [ +0.24% +0.00% +0.05% / +0.10% -0.02% +0.10%] index_select wrap : Elapsed 0.092 ms (9.197 ms / 100) 9.171 -> 9.177 ( +0.07%) [ +0.09% +0.15% +0.00% / +0.07% +0.16% +0.15%] index_select linear : Elapsed 0.092 ms (9.179 ms / 100) 9.170 -> 9.180 ( +0.11%) [ +0.14% +0.07% +0.00% / +0.11% +0.32% +0.13%] index_select reverse : Elapsed 0.092 ms (9.183 ms / 100) 9.163 -> 9.146 ( -0.19%) [ +0.00% +0.03% +0.19% / -0.19% +0.32% +0.34%] index_select skip64 : Elapsed 0.092 ms (9.163 ms / 100) 9.167 -> 9.159 ( -0.09%) [ +0.13% +0.13% +0.00% / -0.09% +0.40% +0.03%] index_select skip256 : Elapsed 0.092 ms (9.179 ms / 100) 9.191 -> 9.177 ( -0.15%) [ +0.00% +0.05% +0.13% / +0.14% -0.15% -0.05%] index_select spread : Elapsed 0.092 ms (9.191 ms / 100) 9.182 -> 9.175 ( -0.08%) [ +0.03% +0.00% +0.30% / +0.04% -0.08% +0.11%] index_select strided 3 : Elapsed 0.092 ms (9.185 ms / 100) 9.178 -> 9.189 ( +0.12%) [ +0.26% +0.00% +0.04% / +0.20% +0.12% +0.17%] index_select random : Elapsed 0.092 ms (9.202 ms / 100) 9.170 -> 9.182 ( +0.13%) [ +0.11% +0.14% +0.00% / +0.20% +0.13% +0.37%] index_select random_sorted : Elapsed 0.092 ms (9.180 ms / 100) B = [5, 40, 20, 16] (stride (320, 1600, 1, 20)) dim = 1 fill_cnt = 4 0.793 -> 0.793 ( +0.00%) [ +0.00% +0.25% +0.00% / +0.00% +0.50% +0.50%] index_fill_ const : Elapsed 0.008 ms (0.793 ms / 100) 0.793 -> 0.794 ( +0.13%) [ +0.13% +0.25% +0.00% / +0.13% +0.38% +0.38%] index_fill_ linear : Elapsed 0.008 ms (0.794 ms / 100) 0.793 -> 0.794 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.63% +0.50%] index_fill_ reverse : Elapsed 0.008 ms (0.793 ms / 100) 0.793 -> 0.793 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.38% +0.50%] index_fill_ skip64 : Elapsed 0.008 ms (0.793 ms / 100) 0.793 -> 0.792 ( -0.13%) [ +0.13% +0.88% +0.00% / -0.13% +0.38% +0.38%] index_fill_ skip256 : Elapsed 0.008 ms (0.794 ms / 100) 0.794 -> 0.794 ( +0.00%) [ +0.25% +0.00% +0.00% / +0.00% +0.25% +0.25%] index_fill_ spread : Elapsed 0.008 ms (0.796 ms / 100) 0.795 -> 0.794 ( -0.13%) [ +0.00% +0.00% +0.13% / -0.13% +0.25% +0.25%] index_fill_ strided 3 : Elapsed 0.008 ms (0.795 ms / 100) 0.793 -> 0.794 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.38% +0.38%] index_fill_ strided 5 : Elapsed 0.008 ms (0.794 ms / 100) 0.793 -> 0.794 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.38% +0.38%] index_fill_ strided 7 : Elapsed 0.008 ms (0.794 ms / 100) 0.793 -> 0.793 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_fill_ strided 8 : Elapsed 0.008 ms (0.793 ms / 100) 0.793 -> 0.792 ( -0.13%) [ +0.25% +0.00% +0.00% / -0.13% +0.38% +0.38%] index_fill_ strided 16 : Elapsed 0.008 ms (0.795 ms / 100) 0.795 -> 0.795 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.38% +0.38%] index_fill_ random : Elapsed 0.008 ms (0.796 ms / 100) 0.794 -> 0.795 ( +0.13%) [ +0.13% +0.25% +0.00% / +0.13% +0.50% +0.50%] index_fill_ random_sorted : Elapsed 0.008 ms (0.795 ms / 100) 0.797 -> 0.797 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.13% +0.38%] index_fill_ perm : Elapsed 0.008 ms (0.797 ms / 100) 0.795 -> 0.797 ( +0.25%) [ +0.25% +0.13% +0.00% / +0.38% +0.38% +0.25%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.797 ms / 100) B = [5, 40, 20, 16] (stride (16, 1600, 80, 1)) A = [5, 4, 20, 16] (stride (1280, 320, 1, 20)) dim = 1 1.316 -> 1.316 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.15% +0.15%] index_add_ linear : Elapsed 0.013 ms (1.317 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.55% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.273 ms / 100) 1.313 -> 1.314 ( +0.08%) [ +0.30% +0.38% +0.00% / +0.08% +0.38% +0.38%] index_add_ reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.314 -> 1.312 ( -0.15%) [ +0.30% +0.30% +0.00% / -0.15% +0.38% +0.53%] index_add_ spread : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.55% +0.55%] index_copy_ spread : Elapsed 0.013 ms (1.274 ms / 100) 1.314 -> 1.317 ( +0.23%) [ +0.00% +0.08% +0.08% / +0.23% +0.30% +0.30%] index_add_ strided 3 : Elapsed 0.013 ms (1.314 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.71% +0.71%] index_copy_ strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.312 -> 1.314 ( +0.15%) [ +0.00% +0.00% +0.30% / +0.15% +0.46% +0.46%] index_add_ strided 7 : Elapsed 0.013 ms (1.312 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.86% +0.71%] index_copy_ strided 7 : Elapsed 0.013 ms (1.272 ms / 100) 1.312 -> 1.317 ( +0.38%) [ +0.00% +0.38% +0.15% / +0.38% +0.53% +0.53%] index_add_ perm : Elapsed 0.013 ms (1.312 ms / 100) 1.272 -> 1.275 ( +0.24%) [ +0.24% +0.24% +0.00% / +0.24% +0.79% +0.63%] index_copy_ perm : Elapsed 0.013 ms (1.275 ms / 100) 1.315 -> 1.316 ( +0.08%) [ +0.23% +0.00% +0.30% / +0.08% +0.30% +0.23%] index_add_ perm_sorted : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.63%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) 9.135 -> 9.145 ( +0.11%) [ +0.15% +0.05% +0.00% / +0.11% +0.20% +0.48%] index_select const : Elapsed 0.091 ms (9.149 ms / 100) 9.151 -> 9.170 ( +0.21%) [ +0.08% +0.00% +0.24% / +0.21% +0.36% +0.27%] index_select wrap : Elapsed 0.092 ms (9.158 ms / 100) 9.163 -> 9.149 ( -0.15%) [ +0.00% +0.08% +0.17% / -0.15% -0.02% -0.01%] index_select linear : Elapsed 0.092 ms (9.163 ms / 100) 9.171 -> 9.133 ( -0.41%) [ +0.09% +0.08% +0.00% / -0.14% -0.10% -0.41%] index_select reverse : Elapsed 0.092 ms (9.179 ms / 100) 9.138 -> 9.160 ( +0.24%) [ +0.05% +0.00% +0.08% / +0.24% +0.26% +0.27%] index_select skip64 : Elapsed 0.091 ms (9.143 ms / 100) 9.137 -> 9.145 ( +0.09%) [ +0.04% +0.09% +0.00% / +0.09% +0.34% +0.31%] index_select skip256 : Elapsed 0.091 ms (9.141 ms / 100) 9.171 -> 9.169 ( -0.02%) [ +0.24% +0.08% +0.00% / -0.01% +0.00% -0.02%] index_select spread : Elapsed 0.092 ms (9.193 ms / 100) 9.159 -> 9.170 ( +0.12%) [ +0.00% +0.37% +0.23% / +0.13% +0.12% +0.37%] index_select strided 3 : Elapsed 0.092 ms (9.159 ms / 100) 9.171 -> 9.180 ( +0.10%) [ +0.05% +0.08% +0.00% / +0.13% +0.12% +0.10%] index_select random : Elapsed 0.092 ms (9.176 ms / 100) 9.167 -> 9.160 ( -0.08%) [ +0.00% +0.16% +0.10% / +0.11% +0.02% -0.08%] index_select random_sorted : Elapsed 0.092 ms (9.167 ms / 100) B = [5, 40, 20, 16] (stride (1, 1600, 80, 5)) A = [5, 4, 20, 16] (stride (1280, 320, 1, 20)) dim = 1 1.232 -> 1.232 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.49% +1.14%] index_add_ linear : Elapsed 0.012 ms (1.234 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.59% +1.01%] index_copy_ linear : Elapsed 0.012 ms (1.194 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.41% +0.49%] index_add_ reverse : Elapsed 0.012 ms (1.233 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.42% +0.42%] index_copy_ reverse : Elapsed 0.012 ms (1.194 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.49% +0.65%] index_add_ spread : Elapsed 0.012 ms (1.233 ms / 100) 1.194 -> 1.193 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.42% +0.92%] index_copy_ spread : Elapsed 0.012 ms (1.194 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.49% +0.49%] index_add_ strided 3 : Elapsed 0.012 ms (1.232 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.67%] index_copy_ strided 3 : Elapsed 0.012 ms (1.194 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.49% +0.65%] index_add_ strided 7 : Elapsed 0.012 ms (1.231 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.08% +0.34% / +0.00% +0.59% +0.67%] index_copy_ strided 7 : Elapsed 0.012 ms (1.193 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +1.06% +1.06%] index_add_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.50% +0.84%] index_copy_ perm : Elapsed 0.012 ms (1.195 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.49% +0.65%] index_add_ perm_sorted : Elapsed 0.012 ms (1.233 ms / 100) 1.194 -> 1.193 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.42% +0.75%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.194 ms / 100) 8.683 -> 8.698 ( +0.17%) [ +0.32% +0.00% +0.24% / +0.17% +0.36% +0.22%] index_select const : Elapsed 0.087 ms (8.711 ms / 100) 8.712 -> 8.713 ( +0.01%) [ +0.00% +0.02% +0.09% / +0.01% +0.17% +0.11%] index_select wrap : Elapsed 0.087 ms (8.712 ms / 100) 8.714 -> 8.705 ( -0.10%) [ +0.08% +0.00% +0.06% / -0.10% +0.07% +0.16%] index_select linear : Elapsed 0.087 ms (8.721 ms / 100) 8.701 -> 8.696 ( -0.06%) [ +0.00% +0.20% +0.03% / -0.06% +0.28% +0.16%] index_select reverse : Elapsed 0.087 ms (8.701 ms / 100) 8.682 -> 8.688 ( +0.07%) [ +0.09% +0.00% +0.17% / +0.07% +0.15% +0.43%] index_select skip64 : Elapsed 0.087 ms (8.690 ms / 100) 8.675 -> 8.686 ( +0.13%) [ +0.17% +0.30% +0.00% / +0.13% +0.17% +0.39%] index_select skip256 : Elapsed 0.087 ms (8.690 ms / 100) 8.711 -> 8.711 ( +0.00%) [ +0.02% +0.00% +0.17% / +0.03% +0.00% +0.21%] index_select spread : Elapsed 0.087 ms (8.713 ms / 100) 8.709 -> 8.711 ( +0.02%) [ +0.11% +0.09% +0.00% / +0.11% +0.02% +0.09%] index_select strided 3 : Elapsed 0.087 ms (8.719 ms / 100) 8.713 -> 8.721 ( +0.09%) [ +0.06% +0.00% +0.14% / +0.09% +0.24% +0.13%] index_select random : Elapsed 0.087 ms (8.718 ms / 100) 8.715 -> 8.714 ( -0.01%) [ +0.00% +0.07% +0.02% / -0.01% +0.21% +0.15%] index_select random_sorted : Elapsed 0.087 ms (8.715 ms / 100) B = [5, 40, 20, 16] (stride (1, 1600, 5, 100)) dim = 1 fill_cnt = 4 0.798 -> 0.799 ( +0.13%) [ +0.25% +0.25% +0.00% / +0.13% +0.75% +0.88%] index_fill_ const : Elapsed 0.008 ms (0.800 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.00% +0.25% +0.00% / +0.00% +0.38% +0.75%] index_fill_ linear : Elapsed 0.008 ms (0.800 ms / 100) 0.800 -> 0.801 ( +0.13%) [ +0.25% +0.25% +0.00% / +0.13% +0.50% +0.63%] index_fill_ reverse : Elapsed 0.008 ms (0.802 ms / 100) 0.799 -> 0.800 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.50% +0.75%] index_fill_ skip64 : Elapsed 0.008 ms (0.800 ms / 100) 0.799 -> 0.800 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.63% +0.50%] index_fill_ skip256 : Elapsed 0.008 ms (0.800 ms / 100) 0.801 -> 0.801 ( +0.00%) [ +0.25% +0.12% +0.00% / +0.00% +0.25% +0.37%] index_fill_ spread : Elapsed 0.008 ms (0.803 ms / 100) 0.800 -> 0.801 ( +0.13%) [ +0.38% +0.13% +0.00% / +0.13% +0.38% +0.50%] index_fill_ strided 3 : Elapsed 0.008 ms (0.803 ms / 100) 0.801 -> 0.801 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.37%] index_fill_ strided 5 : Elapsed 0.008 ms (0.801 ms / 100) 0.800 -> 0.801 ( +0.13%) [ +0.25% +0.13% +0.00% / +0.13% +0.38% +0.38%] index_fill_ strided 7 : Elapsed 0.008 ms (0.802 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_fill_ strided 8 : Elapsed 0.008 ms (0.801 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.50% +0.50%] index_fill_ strided 16 : Elapsed 0.008 ms (0.800 ms / 100) 0.801 -> 0.801 ( +0.00%) [ +0.12% +0.00% +0.12% / +0.00% +0.37% +0.50%] index_fill_ random : Elapsed 0.008 ms (0.802 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.13% +0.25% +0.00% / +0.00% +0.88% +0.63%] index_fill_ random_sorted : Elapsed 0.008 ms (0.801 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.12% +0.12%] index_fill_ perm : Elapsed 0.008 ms (0.803 ms / 100) 0.801 -> 0.801 ( +0.00%) [ +0.12% +0.25% +0.00% / +0.00% +0.75% +0.37%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.802 ms / 100) B = [5, 40, 20, 16] (stride (1, 1600, 5, 100)) A = [5, 4, 20, 16] (stride (64, 16, 320, 1)) dim = 1 1.308 -> 1.309 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.69% +0.99%] index_add_ linear : Elapsed 0.013 ms (1.310 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.271 ms / 100) 1.307 -> 1.308 ( +0.08%) [ +0.00% +0.15% +0.00% / +0.08% +0.77% +0.92%] index_add_ reverse : Elapsed 0.013 ms (1.307 ms / 100) 1.271 -> 1.270 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.55% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.271 ms / 100) 1.307 -> 1.308 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.92% +0.92%] index_add_ spread : Elapsed 0.013 ms (1.310 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.71%] index_copy_ spread : Elapsed 0.013 ms (1.271 ms / 100) 1.307 -> 1.307 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.92% +0.99%] index_add_ strided 3 : Elapsed 0.013 ms (1.307 ms / 100) 1.269 -> 1.270 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.87% +0.95%] index_copy_ strided 3 : Elapsed 0.013 ms (1.269 ms / 100) 1.306 -> 1.307 ( +0.08%) [ +0.15% +0.15% +0.00% / +0.08% +0.92% +0.92%] index_add_ strided 7 : Elapsed 0.013 ms (1.308 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.87% +0.79%] index_copy_ strided 7 : Elapsed 0.013 ms (1.271 ms / 100) 1.308 -> 1.308 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.76%] index_add_ perm : Elapsed 0.013 ms (1.308 ms / 100) 1.269 -> 1.270 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.71% +0.79%] index_copy_ perm : Elapsed 0.013 ms (1.269 ms / 100) 1.308 -> 1.308 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.76% +0.84%] index_add_ perm_sorted : Elapsed 0.013 ms (1.309 ms / 100) 1.270 -> 1.270 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.71% +0.71%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.271 ms / 100) 9.133 -> 9.126 ( -0.08%) [ +0.00% +0.16% +0.21% / -0.08% +0.14% +0.31%] index_select const : Elapsed 0.091 ms (9.133 ms / 100) 9.164 -> 9.163 ( -0.01%) [ +0.08% +0.00% +0.00% / +0.05% +0.20% -0.01%] index_select wrap : Elapsed 0.092 ms (9.171 ms / 100) 9.147 -> 9.151 ( +0.04%) [ +0.00% +0.13% +0.11% / +0.04% +0.32% +0.44%] index_select linear : Elapsed 0.091 ms (9.147 ms / 100) 9.162 -> 9.171 ( +0.10%) [ +0.11% +0.07% +0.00% / +0.15% +0.16% +0.10%] index_select reverse : Elapsed 0.092 ms (9.172 ms / 100) 9.134 -> 9.152 ( +0.20%) [ +0.28% +0.00% +0.02% / +0.22% +0.22% +0.20%] index_select skip64 : Elapsed 0.092 ms (9.160 ms / 100) 9.133 -> 9.145 ( +0.13%) [ +0.11% +0.00% +0.08% / +0.13% +0.14% +0.24%] index_select skip256 : Elapsed 0.091 ms (9.143 ms / 100) 9.154 -> 9.163 ( +0.10%) [ +0.00% +0.20% +0.17% / +0.10% +0.36% +0.17%] index_select spread : Elapsed 0.092 ms (9.154 ms / 100) 9.169 -> 9.160 ( -0.10%) [ +0.00% +0.07% +0.11% / -0.10% +0.22% +0.20%] index_select strided 3 : Elapsed 0.092 ms (9.169 ms / 100) 9.156 -> 9.159 ( +0.03%) [ +0.34% +0.05% +0.00% / +0.03% +0.28% +0.15%] index_select random : Elapsed 0.092 ms (9.187 ms / 100) 9.155 -> 9.150 ( -0.05%) [ +0.11% +0.00% +0.07% / -0.05% +0.40% +0.24%] index_select random_sorted : Elapsed 0.092 ms (9.165 ms / 100) B = [5, 40, 20, 16] (stride (1, 1600, 5, 100)) A = [5, 4, 20, 16] (stride (1, 5, 320, 20)) dim = 1 1.266 -> 1.264 ( -0.16%) [ +0.08% +0.00% +0.00% / -0.16% +0.24% +0.39%] index_add_ linear : Elapsed 0.013 ms (1.267 ms / 100) 1.231 -> 1.229 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.41% +0.49%] index_copy_ linear : Elapsed 0.012 ms (1.231 ms / 100) 1.266 -> 1.265 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.24% +0.24%] index_add_ reverse : Elapsed 0.013 ms (1.267 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.24% +0.24%] index_copy_ reverse : Elapsed 0.012 ms (1.227 ms / 100) 1.260 -> 1.260 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.48% +0.79%] index_add_ spread : Elapsed 0.013 ms (1.261 ms / 100) 1.221 -> 1.225 ( +0.33%) [ +0.00% +0.08% +0.08% / +0.33% +0.57% +0.82%] index_copy_ spread : Elapsed 0.012 ms (1.221 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.71% +0.63%] index_add_ strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.239 -> 1.241 ( +0.16%) [ +0.16% +0.24% +0.00% / +0.16% +0.32% +0.32%] index_copy_ strided 3 : Elapsed 0.012 ms (1.241 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_add_ strided 7 : Elapsed 0.013 ms (1.275 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.73% +0.81%] index_copy_ strided 7 : Elapsed 0.012 ms (1.235 ms / 100) 1.261 -> 1.261 ( +0.00%) [ +0.24% +0.00% +0.00% / +0.00% +0.40% +0.56%] index_add_ perm : Elapsed 0.013 ms (1.264 ms / 100) 1.222 -> 1.222 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.33% +0.41%] index_copy_ perm : Elapsed 0.012 ms (1.222 ms / 100) 1.265 -> 1.265 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.32% +0.32%] index_add_ perm_sorted : Elapsed 0.013 ms (1.266 ms / 100) 1.230 -> 1.230 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.33% +0.33%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.231 ms / 100) 8.755 -> 8.754 ( -0.01%) [ +0.08% +0.00% +0.03% / -0.01% +0.21% +0.09%] index_select const : Elapsed 0.088 ms (8.762 ms / 100) 8.762 -> 8.749 ( -0.15%) [ +0.08% +0.00% +0.03% / -0.15% +0.03% +0.07%] index_select wrap : Elapsed 0.088 ms (8.769 ms / 100) 8.762 -> 8.767 ( +0.06%) [ +0.00% +0.06% +0.16% / +0.06% +0.26% +0.27%] index_select linear : Elapsed 0.088 ms (8.762 ms / 100) 8.754 -> 8.758 ( +0.05%) [ +0.21% +0.03% +0.00% / +0.05% +0.11% +0.25%] index_select reverse : Elapsed 0.088 ms (8.772 ms / 100) 8.759 -> 8.752 ( -0.08%) [ +0.00% +0.05% +0.15% / -0.08% +0.32% +0.06%] index_select skip64 : Elapsed 0.088 ms (8.759 ms / 100) 8.746 -> 8.753 ( +0.08%) [ +0.18% +0.00% +0.33% / +0.27% +0.22% +0.08%] index_select skip256 : Elapsed 0.088 ms (8.762 ms / 100) 8.761 -> 8.759 ( -0.02%) [ +0.00% +0.22% +0.13% / -0.02% +0.16% +0.08%] index_select spread : Elapsed 0.088 ms (8.761 ms / 100) 8.761 -> 8.763 ( +0.02%) [ +0.00% +0.14% +0.05% / +0.02% +0.02% +0.07%] index_select strided 3 : Elapsed 0.088 ms (8.761 ms / 100) 8.751 -> 8.757 ( +0.07%) [ +0.31% +0.00% +0.14% / +0.27% +0.19% +0.07%] index_select random : Elapsed 0.088 ms (8.778 ms / 100) 8.748 -> 8.752 ( +0.05%) [ +0.33% +0.25% +0.00% / +0.05% +0.41% +0.31%] index_select random_sorted : Elapsed 0.088 ms (8.777 ms / 100) B = [5, 40, 20, 16] (stride (16, 80, 3200, 1)) A = [5, 4, 20, 16] (stride (1, 1600, 5, 100)) dim = 1 1.349 -> 1.350 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.67% +0.67%] index_add_ linear : Elapsed 0.013 ms (1.350 ms / 100) 1.313 -> 1.316 ( +0.23%) [ +0.23% +0.00% +0.23% / +0.23% +0.53% +0.46%] index_copy_ linear : Elapsed 0.013 ms (1.316 ms / 100) 1.340 -> 1.342 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.60% +0.60%] index_add_ reverse : Elapsed 0.013 ms (1.341 ms / 100) 1.302 -> 1.302 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +1.00% +0.69%] index_copy_ reverse : Elapsed 0.013 ms (1.303 ms / 100) 1.345 -> 1.344 ( -0.07%) [ +0.00% +0.15% +0.00% / -0.07% +0.52% +0.59%] index_add_ spread : Elapsed 0.013 ms (1.345 ms / 100) 1.305 -> 1.306 ( +0.08%) [ +0.00% +0.15% +0.23% / +0.08% +0.84% +0.92%] index_copy_ spread : Elapsed 0.013 ms (1.305 ms / 100) 1.350 -> 1.349 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.52% +0.52%] index_add_ strided 3 : Elapsed 0.014 ms (1.350 ms / 100) 1.315 -> 1.317 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.23% +0.23%] index_copy_ strided 3 : Elapsed 0.013 ms (1.316 ms / 100) 1.354 -> 1.355 ( +0.07%) [ +0.07% +0.00% +0.15% / +0.07% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.014 ms (1.355 ms / 100) 1.316 -> 1.315 ( -0.08%) [ +0.08% +0.15% +0.00% / -0.08% +0.53% +0.46%] index_copy_ strided 7 : Elapsed 0.013 ms (1.317 ms / 100) 1.345 -> 1.345 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.74%] index_add_ perm : Elapsed 0.013 ms (1.345 ms / 100) 1.304 -> 1.305 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.92% +0.84%] index_copy_ perm : Elapsed 0.013 ms (1.305 ms / 100) 1.341 -> 1.339 ( -0.15%) [ +0.00% +0.07% +0.00% / -0.15% +0.60% +0.52%] index_add_ perm_sorted : Elapsed 0.013 ms (1.341 ms / 100) 1.301 -> 1.302 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.85% +1.15%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.301 ms / 100) 9.151 -> 9.151 ( +0.00%) [ +0.00% +0.21% +0.01% / +0.00% +0.20% +0.04%] index_select const : Elapsed 0.092 ms (9.151 ms / 100) 9.168 -> 9.177 ( +0.10%) [ +0.17% +0.00% +0.12% / +0.11% +0.10% +0.13%] index_select wrap : Elapsed 0.092 ms (9.184 ms / 100) 9.162 -> 9.163 ( +0.01%) [ +0.00% +0.05% +0.05% / +0.01% +0.21% +0.10%] index_select linear : Elapsed 0.092 ms (9.162 ms / 100) 9.175 -> 9.172 ( -0.03%) [ +0.00% +0.09% +0.04% / +0.24% +0.04% -0.03%] index_select reverse : Elapsed 0.092 ms (9.175 ms / 100) 9.143 -> 9.156 ( +0.14%) [ +0.20% +0.02% +0.00% / +0.27% +0.26% +0.14%] index_select skip64 : Elapsed 0.092 ms (9.161 ms / 100) 9.146 -> 9.144 ( -0.02%) [ +0.09% +0.00% +0.32% / +0.16% +0.05% -0.02%] index_select skip256 : Elapsed 0.092 ms (9.154 ms / 100) 9.159 -> 9.163 ( +0.04%) [ +0.12% +0.00% +0.21% / +0.12% +0.17% +0.04%] index_select spread : Elapsed 0.092 ms (9.170 ms / 100) 9.162 -> 9.164 ( +0.02%) [ +0.00% +0.02% +0.16% / +0.19% +0.02% +0.20%] index_select strided 3 : Elapsed 0.092 ms (9.162 ms / 100) 9.179 -> 9.161 ( -0.20%) [ +0.07% +0.00% +0.13% / -0.20% -0.04% +0.12%] index_select random : Elapsed 0.092 ms (9.185 ms / 100) 9.168 -> 9.171 ( +0.03%) [ +0.04% +0.09% +0.00% / +0.05% +0.12% +0.03%] index_select random_sorted : Elapsed 0.092 ms (9.172 ms / 100) out_shape = [5, 4, 40, 16] in_shape = [5, 4, 20, 16] idx_dim = 2 B = [5, 4, 40, 16] (stride (2560, 16, 64, 1)) A = [5, 4, 20, 16] (stride (1280, 1, 64, 4)) dim = 2 2.439 -> 2.450 ( +0.45%) [ +0.16% +0.21% +0.00% / +0.45% +0.66% +0.74%] index_add_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.436 -> 2.444 ( +0.33%) [ +0.04% +0.04% +0.00% / +0.33% +0.49% +0.53%] index_copy_ linear : Elapsed 0.024 ms (2.437 ms / 100) 2.443 -> 2.454 ( +0.45%) [ +0.00% +0.08% +0.08% / +0.45% +0.61% +0.45%] index_add_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.435 -> 2.448 ( +0.53%) [ +0.00% +0.21% +0.08% / +1.11% +0.53% +0.66%] index_copy_ reverse : Elapsed 0.024 ms (2.435 ms / 100) 2.443 -> 2.454 ( +0.45%) [ +0.12% +0.04% +0.00% / +0.53% +0.53% +0.45%] index_add_ spread : Elapsed 0.024 ms (2.446 ms / 100) 2.438 -> 2.447 ( +0.37%) [ +0.00% +0.08% +0.00% / +0.37% +0.49% +0.45%] index_copy_ spread : Elapsed 0.024 ms (2.438 ms / 100) 2.440 -> 2.451 ( +0.45%) [ +0.41% +0.25% +0.00% / +0.45% +0.49% +0.53%] index_add_ strided 3 : Elapsed 0.025 ms (2.450 ms / 100) 2.436 -> 2.446 ( +0.41%) [ +0.08% +0.00% +0.08% / +0.57% +0.45% +0.41%] index_copy_ strided 3 : Elapsed 0.024 ms (2.438 ms / 100) 2.445 -> 2.452 ( +0.29%) [ +0.04% +0.04% +0.00% / +0.45% +0.33% +0.29%] index_add_ strided 7 : Elapsed 0.024 ms (2.446 ms / 100) 2.440 -> 2.449 ( +0.37%) [ +0.04% +0.08% +0.00% / +0.53% +0.45% +0.37%] index_copy_ strided 7 : Elapsed 0.024 ms (2.441 ms / 100) 2.439 -> 2.453 ( +0.57%) [ +0.21% +0.00% +0.08% / +0.57% +0.82% +0.78%] index_add_ perm : Elapsed 0.024 ms (2.444 ms / 100) 2.435 -> 2.445 ( +0.41%) [ +0.08% +0.08% +0.00% / +0.41% +0.70% +0.66%] index_copy_ perm : Elapsed 0.024 ms (2.437 ms / 100) 2.440 -> 2.450 ( +0.41%) [ +0.08% +0.00% +0.04% / +0.41% +0.66% +0.82%] index_add_ perm_sorted : Elapsed 0.024 ms (2.442 ms / 100) 2.433 -> 2.447 ( +0.58%) [ +0.12% +0.00% +0.33% / +0.74% +0.66% +0.58%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.436 ms / 100) 4.488 -> 4.493 ( +0.11%) [ +0.38% +0.18% +0.00% / +0.36% +0.11% +0.16%] index_select const : Elapsed 0.045 ms (4.505 ms / 100) 4.495 -> 4.496 ( +0.02%) [ +0.07% +0.13% +0.00% / +0.04% +0.02% +0.16%] index_select wrap : Elapsed 0.045 ms (4.498 ms / 100) 4.500 -> 4.497 ( -0.07%) [ +0.04% +0.07% +0.00% / -0.07% +0.16% -0.04%] index_select linear : Elapsed 0.045 ms (4.502 ms / 100) 4.494 -> 4.503 ( +0.20%) [ +0.09% +0.04% +0.00% / +0.45% +0.20% +0.22%] index_select reverse : Elapsed 0.045 ms (4.498 ms / 100) 4.496 -> 4.497 ( +0.02%) [ +0.02% +0.11% +0.00% / +0.09% +0.02% +0.11%] index_select skip64 : Elapsed 0.045 ms (4.497 ms / 100) 4.492 -> 4.501 ( +0.20%) [ +0.04% +0.04% +0.00% / +0.24% +0.27% +0.20%] index_select skip256 : Elapsed 0.045 ms (4.494 ms / 100) 4.497 -> 4.497 ( +0.00%) [ +0.11% +0.09% +0.00% / +0.00% +0.27% +0.33%] index_select spread : Elapsed 0.045 ms (4.502 ms / 100) 4.497 -> 4.499 ( +0.04%) [ +0.00% +0.00% +0.11% / +0.04% +0.09% +0.22%] index_select strided 3 : Elapsed 0.045 ms (4.497 ms / 100) 4.497 -> 4.500 ( +0.07%) [ +0.02% +0.02% +0.00% / +0.13% +0.11% +0.07%] index_select strided 5 : Elapsed 0.045 ms (4.498 ms / 100) 4.497 -> 4.503 ( +0.13%) [ +0.00% +0.02% +0.13% / +0.24% +0.18% +0.13%] index_select strided 7 : Elapsed 0.045 ms (4.497 ms / 100) 4.492 -> 4.495 ( +0.07%) [ +0.11% +0.07% +0.00% / +0.22% +0.07% +0.18%] index_select strided 8 : Elapsed 0.045 ms (4.497 ms / 100) 4.493 -> 4.494 ( +0.02%) [ +0.13% +0.13% +0.00% / +0.02% +0.18% +0.13%] index_select strided 16 : Elapsed 0.045 ms (4.499 ms / 100) 4.494 -> 4.490 ( -0.09%) [ +0.09% +0.00% +0.02% / -0.09% +0.09% +0.20%] index_select random : Elapsed 0.045 ms (4.498 ms / 100) 4.496 -> 4.499 ( +0.07%) [ +0.02% +0.00% +0.04% / +0.07% +0.24% +0.22%] index_select random_sorted : Elapsed 0.045 ms (4.497 ms / 100) B = [5, 4, 40, 16] (stride (2560, 1, 64, 4)) A = [5, 4, 20, 16] (stride (1280, 20, 1, 80)) dim = 2 2.438 -> 2.449 ( +0.45%) [ +0.00% +0.12% +0.21% / +0.45% +0.98% +0.70%] index_add_ linear : Elapsed 0.024 ms (2.438 ms / 100) 2.434 -> 2.445 ( +0.45%) [ +0.00% +0.21% +0.53% / +0.45% +0.86% +1.03%] index_copy_ linear : Elapsed 0.024 ms (2.434 ms / 100) 2.427 -> 2.446 ( +0.78%) [ +0.37% +0.29% +0.00% / +0.78% +1.32% +1.11%] index_add_ reverse : Elapsed 0.024 ms (2.436 ms / 100) 2.434 -> 2.439 ( +0.21%) [ +0.04% +0.04% +0.00% / +0.21% +0.90% +1.07%] index_copy_ reverse : Elapsed 0.024 ms (2.435 ms / 100) 2.434 -> 2.445 ( +0.45%) [ +0.04% +0.00% +0.08% / +0.45% +1.07% +0.94%] index_add_ spread : Elapsed 0.024 ms (2.435 ms / 100) 2.433 -> 2.443 ( +0.41%) [ +0.00% +0.08% +0.08% / +0.41% +1.11% +1.07%] index_copy_ spread : Elapsed 0.024 ms (2.433 ms / 100) 2.444 -> 2.451 ( +0.29%) [ +0.00% +0.00% +0.04% / +0.33% +0.29% +0.53%] index_add_ strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.438 -> 2.452 ( +0.57%) [ +0.04% +0.04% +0.00% / +0.57% +0.62% +0.70%] index_copy_ strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.440 -> 2.452 ( +0.49%) [ +0.04% +0.00% +0.04% / +0.49% +0.53% +0.57%] index_add_ strided 7 : Elapsed 0.024 ms (2.441 ms / 100) 2.439 -> 2.450 ( +0.45%) [ +0.00% +0.08% +0.16% / +0.45% +0.57% +0.57%] index_copy_ strided 7 : Elapsed 0.024 ms (2.439 ms / 100) 2.442 -> 2.447 ( +0.20%) [ +0.08% +0.08% +0.00% / +0.49% +0.20% +0.45%] index_add_ perm : Elapsed 0.024 ms (2.444 ms / 100) 2.440 -> 2.450 ( +0.41%) [ +0.00% +0.04% +0.12% / +0.61% +0.41% +0.41%] index_copy_ perm : Elapsed 0.024 ms (2.440 ms / 100) 2.445 -> 2.448 ( +0.12%) [ +0.00% +0.04% +0.00% / +0.65% +0.20% +0.12%] index_add_ perm_sorted : Elapsed 0.024 ms (2.445 ms / 100) 2.439 -> 2.447 ( +0.33%) [ +0.00% +0.08% +0.33% / +0.78% +0.33% +0.70%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.439 ms / 100) 4.498 -> 4.496 ( -0.04%) [ +0.04% +0.02% +0.00% / -0.04% +0.16% +0.16%] index_select const : Elapsed 0.045 ms (4.500 ms / 100) 4.502 -> 4.506 ( +0.09%) [ +0.07% +0.00% +0.20% / +0.16% +0.09% +0.09%] index_select wrap : Elapsed 0.045 ms (4.505 ms / 100) 4.503 -> 4.508 ( +0.11%) [ +0.00% +0.24% +0.31% / +0.11% +0.27% +0.20%] index_select linear : Elapsed 0.045 ms (4.503 ms / 100) 4.508 -> 4.505 ( -0.07%) [ +0.00% +0.09% +0.13% / -0.07% +0.07% +0.00%] index_select reverse : Elapsed 0.045 ms (4.508 ms / 100) 4.498 -> 4.498 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.09% +0.20% +0.00%] index_select skip64 : Elapsed 0.045 ms (4.498 ms / 100) 4.497 -> 4.500 ( +0.07%) [ +0.00% +0.07% +0.31% / +0.07% +0.07% +0.13%] index_select skip256 : Elapsed 0.045 ms (4.497 ms / 100) 4.499 -> 4.502 ( +0.07%) [ +0.00% +0.29% +0.16% / +0.07% +0.13% +0.24%] index_select spread : Elapsed 0.045 ms (4.499 ms / 100) 4.500 -> 4.504 ( +0.09%) [ +0.00% +0.20% +0.27% / +0.24% +0.13% +0.09%] index_select strided 3 : Elapsed 0.045 ms (4.500 ms / 100) 4.502 -> 4.504 ( +0.04%) [ +0.00% +0.09% +0.07% / +0.04% +0.20% +0.16%] index_select strided 5 : Elapsed 0.045 ms (4.502 ms / 100) 4.502 -> 4.498 ( -0.09%) [ +0.07% +0.00% +0.22% / -0.09% +0.13% +0.22%] index_select strided 7 : Elapsed 0.045 ms (4.505 ms / 100) 4.500 -> 4.500 ( +0.00%) [ +0.00% +0.09% +0.22% / +0.00% +0.11% +0.22%] index_select strided 8 : Elapsed 0.045 ms (4.500 ms / 100) 4.501 -> 4.504 ( +0.07%) [ +0.02% +0.00% +0.18% / +0.20% +0.11% +0.07%] index_select strided 16 : Elapsed 0.045 ms (4.502 ms / 100) 4.508 -> 4.502 ( -0.13%) [ +0.00% +0.18% +0.13% / -0.13% +0.09% -0.04%] index_select random : Elapsed 0.045 ms (4.508 ms / 100) 4.507 -> 4.505 ( -0.04%) [ +0.02% +0.20% +0.00% / +0.02% -0.04% +0.18%] index_select random_sorted : Elapsed 0.045 ms (4.508 ms / 100) B = [5, 4, 40, 16] (stride (1, 3200, 5, 200)) A = [5, 4, 20, 16] (stride (1280, 320, 16, 1)) dim = 2 2.418 -> 2.430 ( +0.50%) [ +0.00% +0.08% +0.08% / +0.50% +0.66% +0.79%] index_add_ linear : Elapsed 0.024 ms (2.418 ms / 100) 2.409 -> 2.420 ( +0.46%) [ +0.00% +0.08% +0.21% / +0.46% +0.83% +0.71%] index_copy_ linear : Elapsed 0.024 ms (2.409 ms / 100) 2.420 -> 2.430 ( +0.41%) [ +0.00% +0.08% +0.37% / +0.62% +0.41% +0.79%] index_add_ reverse : Elapsed 0.024 ms (2.420 ms / 100) 2.414 -> 2.426 ( +0.50%) [ +0.00% +0.04% +0.66% / +0.58% +0.54% +0.50%] index_copy_ reverse : Elapsed 0.024 ms (2.414 ms / 100) 2.433 -> 2.447 ( +0.58%) [ +0.41% +0.00% +0.25% / +0.66% +0.58% +0.62%] index_add_ spread : Elapsed 0.024 ms (2.443 ms / 100) 2.443 -> 2.454 ( +0.45%) [ +0.00% +0.04% +0.04% / +0.49% +0.45% +0.61%] index_copy_ spread : Elapsed 0.024 ms (2.443 ms / 100) 2.433 -> 2.443 ( +0.41%) [ +0.00% +0.21% +0.00% / +0.41% +0.70% +0.62%] index_add_ strided 3 : Elapsed 0.024 ms (2.433 ms / 100) 2.436 -> 2.446 ( +0.41%) [ +0.08% +0.00% +0.08% / +0.41% +0.57% +0.66%] index_copy_ strided 3 : Elapsed 0.024 ms (2.438 ms / 100) 2.434 -> 2.439 ( +0.21%) [ +0.00% +0.00% +0.04% / +0.41% +0.49% +0.21%] index_add_ strided 7 : Elapsed 0.024 ms (2.434 ms / 100) 2.436 -> 2.448 ( +0.49%) [ +0.08% +0.00% +0.41% / +0.49% +0.62% +0.62%] index_copy_ strided 7 : Elapsed 0.024 ms (2.438 ms / 100) 2.434 -> 2.446 ( +0.49%) [ +0.21% +0.00% +0.08% / +0.49% +0.53% +0.70%] index_add_ perm : Elapsed 0.024 ms (2.439 ms / 100) 2.434 -> 2.446 ( +0.49%) [ +0.04% +0.08% +0.00% / +0.49% +0.82% +0.78%] index_copy_ perm : Elapsed 0.024 ms (2.435 ms / 100) 2.431 -> 2.448 ( +0.70%) [ +0.00% +0.25% +0.08% / +0.74% +0.82% +0.70%] index_add_ perm_sorted : Elapsed 0.024 ms (2.431 ms / 100) 2.437 -> 2.453 ( +0.66%) [ +0.00% +0.04% +0.08% / +0.74% +0.66% +0.90%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.437 ms / 100) 4.432 -> 4.436 ( +0.09%) [ +0.00% +0.09% +0.07% / +0.09% +0.20% +0.20%] index_select const : Elapsed 0.044 ms (4.432 ms / 100) 4.444 -> 4.448 ( +0.09%) [ +0.07% +0.00% +0.02% / +0.09% +0.16% +0.27%] index_select wrap : Elapsed 0.044 ms (4.447 ms / 100) 4.446 -> 4.446 ( +0.00%) [ +0.13% +0.11% +0.00% / +0.00% +0.09% +0.20%] index_select linear : Elapsed 0.045 ms (4.452 ms / 100) 4.445 -> 4.437 ( -0.18%) [ +0.07% +0.16% +0.00% / -0.18% +0.20% +0.20%] index_select reverse : Elapsed 0.044 ms (4.448 ms / 100) 4.436 -> 4.437 ( +0.02%) [ +0.07% +0.00% +0.14% / +0.02% +0.09% +0.11%] index_select skip64 : Elapsed 0.044 ms (4.439 ms / 100) 4.431 -> 4.433 ( +0.05%) [ +0.05% +0.16% +0.00% / +0.05% +0.16% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.433 ms / 100) 4.443 -> 4.448 ( +0.11%) [ +0.00% +0.11% +0.02% / +0.11% +0.18% +0.14%] index_select spread : Elapsed 0.044 ms (4.443 ms / 100) 4.444 -> 4.448 ( +0.09%) [ +0.11% +0.02% +0.00% / +0.11% +0.09% +0.23%] index_select strided 3 : Elapsed 0.044 ms (4.449 ms / 100) 4.432 -> 4.439 ( +0.16%) [ +0.00% +0.09% +0.27% / +0.16% +0.34% +0.25%] index_select strided 5 : Elapsed 0.044 ms (4.432 ms / 100) 4.443 -> 4.450 ( +0.16%) [ +0.00% +0.11% +0.00% / +0.16% +0.20% +0.23%] index_select strided 7 : Elapsed 0.044 ms (4.443 ms / 100) 4.433 -> 4.439 ( +0.14%) [ +0.00% +0.00% +0.16% / +0.20% +0.14% +0.18%] index_select strided 8 : Elapsed 0.044 ms (4.433 ms / 100) 4.434 -> 4.441 ( +0.16%) [ +0.00% +0.07% +0.14% / +0.20% +0.32% +0.16%] index_select strided 16 : Elapsed 0.044 ms (4.434 ms / 100) 4.438 -> 4.438 ( +0.00%) [ +0.27% +0.14% +0.00% / +0.00% +0.43% +0.29%] index_select random : Elapsed 0.044 ms (4.450 ms / 100) 4.441 -> 4.447 ( +0.14%) [ +0.07% +0.00% +0.11% / +0.16% +0.25% +0.14%] index_select random_sorted : Elapsed 0.044 ms (4.444 ms / 100) B = [5, 4, 40, 16] (stride (1, 3200, 5, 200)) A = [5, 4, 20, 16] (stride (1280, 320, 1, 20)) dim = 2 2.401 -> 2.418 ( +0.71%) [ +0.12% +0.00% +0.21% / +0.71% +0.96% +0.96%] index_add_ linear : Elapsed 0.024 ms (2.404 ms / 100) 2.406 -> 2.422 ( +0.67%) [ +0.12% +0.00% +0.33% / +0.67% +0.79% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.409 ms / 100) 2.397 -> 2.410 ( +0.54%) [ +0.25% +0.00% +0.04% / +0.54% +1.13% +1.17%] index_add_ reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.401 -> 2.407 ( +0.25%) [ +0.04% +0.08% +0.00% / +0.25% +1.00% +1.04%] index_copy_ reverse : Elapsed 0.024 ms (2.402 ms / 100) 2.415 -> 2.431 ( +0.66%) [ +0.17% +0.21% +0.00% / +0.66% +0.87% +1.16%] index_add_ spread : Elapsed 0.024 ms (2.419 ms / 100) 2.429 -> 2.440 ( +0.45%) [ +0.00% +0.04% +0.00% / +0.45% +1.15% +1.07%] index_copy_ spread : Elapsed 0.024 ms (2.429 ms / 100) 2.421 -> 2.430 ( +0.37%) [ +0.17% +0.00% +0.04% / +0.37% +0.54% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.425 ms / 100) 2.431 -> 2.442 ( +0.45%) [ +0.00% +0.00% +0.00% / +0.45% +0.70% +0.53%] index_copy_ strided 3 : Elapsed 0.024 ms (2.431 ms / 100) 2.419 -> 2.432 ( +0.54%) [ +0.00% +0.08% +0.00% / +0.62% +0.54% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.419 ms / 100) 2.429 -> 2.441 ( +0.49%) [ +0.12% +0.08% +0.00% / +0.49% +0.49% +0.74%] index_copy_ strided 7 : Elapsed 0.024 ms (2.432 ms / 100) 2.423 -> 2.425 ( +0.08%) [ +0.17% +0.00% +0.08% / +0.54% +0.29% +0.08%] index_add_ perm : Elapsed 0.024 ms (2.427 ms / 100) 2.429 -> 2.436 ( +0.29%) [ +0.25% +0.00% +0.00% / +0.70% +0.29% +0.29%] index_copy_ perm : Elapsed 0.024 ms (2.435 ms / 100) 2.425 -> 2.427 ( +0.08%) [ +0.04% +0.00% +0.04% / +0.54% +0.08% +0.12%] index_add_ perm_sorted : Elapsed 0.024 ms (2.426 ms / 100) 2.432 -> 2.438 ( +0.25%) [ +0.12% +0.00% +0.21% / +0.66% +0.33% +0.25%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.435 ms / 100) 4.441 -> 4.440 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.09% +0.00%] index_select const : Elapsed 0.044 ms (4.441 ms / 100) 4.440 -> 4.440 ( +0.00%) [ +0.14% +0.25% +0.00% / +0.05% +0.00% +0.18%] index_select wrap : Elapsed 0.044 ms (4.446 ms / 100) 4.436 -> 4.443 ( +0.16%) [ +0.00% +0.25% +0.41% / +0.29% +0.16% +0.18%] index_select linear : Elapsed 0.044 ms (4.436 ms / 100) 4.443 -> 4.444 ( +0.02%) [ +0.00% +0.07% +0.05% / +0.05% +0.07% +0.02%] index_select reverse : Elapsed 0.044 ms (4.443 ms / 100) 4.439 -> 4.438 ( -0.02%) [ +0.02% +0.00% +0.05% / +0.07% -0.02% +0.16%] index_select skip64 : Elapsed 0.044 ms (4.440 ms / 100) 4.435 -> 4.433 ( -0.05%) [ +0.00% +0.18% +0.14% / -0.05% +0.18% +0.14%] index_select skip256 : Elapsed 0.044 ms (4.435 ms / 100) 4.436 -> 4.440 ( +0.09%) [ +0.14% +0.07% +0.00% / +0.09% +0.23% +0.23%] index_select spread : Elapsed 0.044 ms (4.442 ms / 100) 4.447 -> 4.445 ( -0.04%) [ +0.11% +0.00% +0.07% / -0.04% +0.18% +0.04%] index_select strided 3 : Elapsed 0.045 ms (4.452 ms / 100) 4.439 -> 4.439 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.00% +0.11% +0.09%] index_select strided 5 : Elapsed 0.044 ms (4.443 ms / 100) 4.445 -> 4.440 ( -0.11%) [ +0.04% +0.04% +0.00% / -0.11% +0.09% +0.04%] index_select strided 7 : Elapsed 0.044 ms (4.447 ms / 100) 4.439 -> 4.444 ( +0.11%) [ +0.09% +0.00% +0.18% / +0.11% +0.18% +0.36%] index_select strided 8 : Elapsed 0.044 ms (4.443 ms / 100) 4.436 -> 4.440 ( +0.09%) [ +0.43% +0.00% +0.11% / +0.16% +0.14% +0.09%] index_select strided 16 : Elapsed 0.045 ms (4.455 ms / 100) 4.443 -> 4.443 ( +0.00%) [ +0.00% +0.14% +0.14% / +0.11% +0.00% +0.02%] index_select random : Elapsed 0.044 ms (4.443 ms / 100) 4.445 -> 4.441 ( -0.09%) [ +0.07% +0.04% +0.00% / +0.00% +0.09% -0.09%] index_select random_sorted : Elapsed 0.044 ms (4.448 ms / 100) B = [5, 4, 40, 16] (stride (64, 16, 320, 1)) A = [5, 4, 20, 16] (stride (20, 100, 1, 400)) dim = 2 2.391 -> 2.404 ( +0.54%) [ +0.00% +0.13% +0.13% / +0.54% +0.96% +0.71%] index_add_ linear : Elapsed 0.024 ms (2.391 ms / 100) 2.389 -> 2.402 ( +0.54%) [ +0.00% +0.08% +0.25% / +0.54% +0.92% +0.80%] index_copy_ linear : Elapsed 0.024 ms (2.389 ms / 100) 2.390 -> 2.407 ( +0.71%) [ +0.17% +0.04% +0.00% / +0.71% +0.88% +0.75%] index_add_ reverse : Elapsed 0.024 ms (2.394 ms / 100) 2.392 -> 2.403 ( +0.46%) [ +0.08% +0.00% +0.00% / +0.46% +0.71% +0.75%] index_copy_ reverse : Elapsed 0.024 ms (2.394 ms / 100) 2.395 -> 2.407 ( +0.50%) [ +0.04% +0.00% +0.17% / +0.71% +0.58% +0.50%] index_add_ spread : Elapsed 0.024 ms (2.396 ms / 100) 2.389 -> 2.402 ( +0.54%) [ +0.00% +0.17% +0.17% / +0.54% +0.71% +0.88%] index_copy_ spread : Elapsed 0.024 ms (2.389 ms / 100) 2.396 -> 2.408 ( +0.50%) [ +0.00% +0.04% +0.17% / +0.54% +0.67% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.396 ms / 100) 2.391 -> 2.405 ( +0.59%) [ +0.04% +0.00% +0.21% / +0.59% +0.71% +0.67%] index_copy_ strided 3 : Elapsed 0.024 ms (2.392 ms / 100) 2.399 -> 2.409 ( +0.42%) [ +0.08% +0.00% +0.00% / +0.46% +0.58% +0.42%] index_add_ strided 7 : Elapsed 0.024 ms (2.401 ms / 100) 2.388 -> 2.398 ( +0.42%) [ +0.21% +0.17% +0.00% / +0.42% +0.80% +0.92%] index_copy_ strided 7 : Elapsed 0.024 ms (2.393 ms / 100) 2.398 -> 2.409 ( +0.46%) [ +0.00% +0.08% +0.04% / +0.50% +0.50% +0.46%] index_add_ perm : Elapsed 0.024 ms (2.398 ms / 100) 2.390 -> 2.407 ( +0.71%) [ +0.00% +0.04% +0.00% / +0.71% +0.92% +0.88%] index_copy_ perm : Elapsed 0.024 ms (2.390 ms / 100) 2.395 -> 2.404 ( +0.38%) [ +0.13% +0.00% +0.00% / +0.38% +0.58% +0.75%] index_add_ perm_sorted : Elapsed 0.024 ms (2.398 ms / 100) 2.389 -> 2.405 ( +0.67%) [ +0.17% +0.00% +0.08% / +0.67% +0.67% +0.96%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.393 ms / 100) 4.414 -> 4.423 ( +0.20%) [ +0.11% +0.00% +0.27% / +0.20% +0.23% +0.20%] index_select const : Elapsed 0.044 ms (4.419 ms / 100) 4.433 -> 4.435 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.05% +0.09%] index_select wrap : Elapsed 0.044 ms (4.433 ms / 100) 4.420 -> 4.429 ( +0.20%) [ +0.09% +0.00% +0.14% / +0.20% +0.34% +0.25%] index_select linear : Elapsed 0.044 ms (4.424 ms / 100) 4.420 -> 4.428 ( +0.18%) [ +0.00% +0.11% +0.27% / +0.18% +0.43% +0.43%] index_select reverse : Elapsed 0.044 ms (4.420 ms / 100) 4.419 -> 4.415 ( -0.09%) [ +0.00% +0.07% +0.23% / -0.09% +0.18% +0.14%] index_select skip64 : Elapsed 0.044 ms (4.419 ms / 100) 4.421 -> 4.422 ( +0.02%) [ +0.00% +0.14% +0.14% / +0.02% +0.23% +0.07%] index_select skip256 : Elapsed 0.044 ms (4.421 ms / 100) 4.423 -> 4.425 ( +0.05%) [ +0.20% +0.00% +0.16% / +0.05% +0.25% +0.27%] index_select spread : Elapsed 0.044 ms (4.432 ms / 100) 4.426 -> 4.423 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.18% +0.32%] index_select strided 3 : Elapsed 0.044 ms (4.429 ms / 100) 4.427 -> 4.427 ( +0.00%) [ +0.02% +0.07% +0.00% / +0.00% +0.18% +0.18%] index_select strided 5 : Elapsed 0.044 ms (4.428 ms / 100) 4.426 -> 4.426 ( +0.00%) [ +0.00% +0.14% +0.07% / +0.00% +0.07% +0.11%] index_select strided 7 : Elapsed 0.044 ms (4.426 ms / 100) 4.427 -> 4.427 ( +0.00%) [ +0.05% +0.07% +0.00% / +0.00% +0.05% +0.25%] index_select strided 8 : Elapsed 0.044 ms (4.429 ms / 100) 4.422 -> 4.423 ( +0.02%) [ +0.14% +0.16% +0.00% / +0.02% +0.20% +0.18%] index_select strided 16 : Elapsed 0.044 ms (4.428 ms / 100) 4.426 -> 4.431 ( +0.11%) [ +0.07% +0.09% +0.00% / +0.11% +0.18% +0.45%] index_select random : Elapsed 0.044 ms (4.429 ms / 100) 4.426 -> 4.425 ( -0.02%) [ +0.00% +0.00% +0.02% / -0.02% +0.09% +0.09%] index_select random_sorted : Elapsed 0.044 ms (4.426 ms / 100) B = [5, 4, 40, 16] (stride (1, 200, 5, 800)) A = [5, 4, 20, 16] (stride (1280, 320, 1, 20)) dim = 2 2.407 -> 2.415 ( +0.33%) [ +0.04% +0.00% +0.08% / +0.33% +0.71% +0.75%] index_add_ linear : Elapsed 0.024 ms (2.408 ms / 100) 2.404 -> 2.417 ( +0.54%) [ +0.00% +0.08% +0.08% / +0.54% +0.83% +0.92%] index_copy_ linear : Elapsed 0.024 ms (2.404 ms / 100) 2.399 -> 2.413 ( +0.58%) [ +0.00% +0.13% +0.08% / +0.58% +1.25% +1.42%] index_add_ reverse : Elapsed 0.024 ms (2.399 ms / 100) 2.398 -> 2.411 ( +0.54%) [ +0.00% +0.29% +0.21% / +0.54% +0.96% +1.29%] index_copy_ reverse : Elapsed 0.024 ms (2.398 ms / 100) 2.417 -> 2.432 ( +0.62%) [ +0.00% +0.17% +0.21% / +0.62% +1.20% +1.08%] index_add_ spread : Elapsed 0.024 ms (2.417 ms / 100) 2.428 -> 2.441 ( +0.54%) [ +0.21% +0.00% +0.12% / +0.54% +1.03% +1.15%] index_copy_ spread : Elapsed 0.024 ms (2.433 ms / 100) 2.419 -> 2.437 ( +0.74%) [ +0.00% +0.21% +0.37% / +0.83% +0.79% +0.74%] index_add_ strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.430 -> 2.443 ( +0.53%) [ +0.00% +0.00% +0.12% / +0.53% +0.53% +0.66%] index_copy_ strided 3 : Elapsed 0.024 ms (2.430 ms / 100) 2.419 -> 2.431 ( +0.50%) [ +0.25% +0.17% +0.00% / +0.79% +0.58% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.431 -> 2.439 ( +0.33%) [ +0.12% +0.04% +0.00% / +0.33% +0.58% +0.53%] index_copy_ strided 7 : Elapsed 0.024 ms (2.434 ms / 100) 2.418 -> 2.421 ( +0.12%) [ +0.04% +0.21% +0.00% / +0.66% +0.12% +0.12%] index_add_ perm : Elapsed 0.024 ms (2.419 ms / 100) 2.424 -> 2.431 ( +0.29%) [ +0.00% +0.04% +0.04% / +0.50% +0.33% +0.29%] index_copy_ perm : Elapsed 0.024 ms (2.424 ms / 100) 2.422 -> 2.429 ( +0.29%) [ +0.00% +0.00% +0.04% / +0.58% +0.29% +0.29%] index_add_ perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) 2.431 -> 2.437 ( +0.25%) [ +0.00% +0.04% +0.04% / +0.25% +0.29% +0.25%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.431 ms / 100) 4.443 -> 4.439 ( -0.09%) [ +0.07% +0.11% +0.00% / -0.05% -0.07% -0.09%] index_select const : Elapsed 0.044 ms (4.446 ms / 100) 4.447 -> 4.441 ( -0.13%) [ +0.13% +0.00% +0.00% / -0.13% +0.02% -0.02%] index_select wrap : Elapsed 0.045 ms (4.453 ms / 100) 4.446 -> 4.443 ( -0.07%) [ +0.00% +0.04% +0.07% / -0.04% -0.07% +0.04%] index_select linear : Elapsed 0.044 ms (4.446 ms / 100) 4.447 -> 4.444 ( -0.07%) [ +0.00% +0.00% +0.07% / +0.11% -0.07% -0.04%] index_select reverse : Elapsed 0.044 ms (4.447 ms / 100) 4.437 -> 4.435 ( -0.05%) [ +0.16% +0.07% +0.00% / -0.05% +0.02% +0.00%] index_select skip64 : Elapsed 0.044 ms (4.444 ms / 100) 4.438 -> 4.438 ( +0.00%) [ +0.09% +0.00% +0.11% / +0.11% +0.00% +0.11%] index_select skip256 : Elapsed 0.044 ms (4.442 ms / 100) 4.439 -> 4.444 ( +0.11%) [ +0.18% +0.11% +0.00% / +0.11% +0.14% +0.27%] index_select spread : Elapsed 0.044 ms (4.447 ms / 100) 4.441 -> 4.441 ( +0.00%) [ +0.02% +0.09% +0.00% / +0.00% +0.23% +0.14%] index_select strided 3 : Elapsed 0.044 ms (4.442 ms / 100) 4.442 -> 4.443 ( +0.02%) [ +0.11% +0.05% +0.00% / +0.02% +0.11% +0.02%] index_select strided 5 : Elapsed 0.044 ms (4.447 ms / 100) 4.441 -> 4.442 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.18% +0.23%] index_select strided 7 : Elapsed 0.044 ms (4.442 ms / 100) 4.446 -> 4.439 ( -0.16%) [ +0.00% +0.04% +0.02% / -0.16% -0.02% +0.00%] index_select strided 8 : Elapsed 0.044 ms (4.446 ms / 100) 4.443 -> 4.441 ( -0.05%) [ +0.20% +0.14% +0.00% / -0.05% +0.09% +0.07%] index_select strided 16 : Elapsed 0.045 ms (4.452 ms / 100) 4.443 -> 4.446 ( +0.07%) [ +0.09% +0.09% +0.00% / +0.11% +0.07% +0.36%] index_select random : Elapsed 0.044 ms (4.447 ms / 100) 4.447 -> 4.439 ( -0.18%) [ +0.00% +0.07% +0.04% / +0.09% +0.07% -0.18%] index_select random_sorted : Elapsed 0.044 ms (4.447 ms / 100) out_shape = [5, 4, 20, 40] in_shape = [5, 4, 20, 16] idx_dim = 3 B = [5, 4, 20, 40] (stride (3200, 1, 4, 80)) A = [5, 4, 20, 16] (stride (1, 80, 320, 5)) dim = 3 4.439 -> 4.447 ( +0.18%) [ +0.18% +0.00% +0.05% / +0.18% +0.63% +0.59%] index_add_ linear : Elapsed 0.044 ms (4.447 ms / 100) 4.270 -> 4.280 ( +0.23%) [ +0.00% +0.14% +0.09% / +0.23% +0.70% +0.75%] index_copy_ linear : Elapsed 0.043 ms (4.270 ms / 100) 4.456 -> 4.464 ( +0.18%) [ +0.16% +0.00% +0.16% / +0.18% +0.58% +0.61%] index_add_ reverse : Elapsed 0.045 ms (4.463 ms / 100) 4.278 -> 4.281 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.84% +0.58%] index_copy_ reverse : Elapsed 0.043 ms (4.284 ms / 100) 4.449 -> 4.457 ( +0.18%) [ +0.16% +0.16% +0.00% / +0.18% +0.61% +0.61%] index_add_ spread : Elapsed 0.045 ms (4.456 ms / 100) 4.281 -> 4.287 ( +0.14%) [ +0.12% +0.14% +0.00% / +0.14% +0.70% +0.65%] index_copy_ spread : Elapsed 0.043 ms (4.286 ms / 100) 4.448 -> 4.449 ( +0.02%) [ +0.00% +0.04% +0.04% / +0.02% +0.49% +0.38%] index_add_ strided 3 : Elapsed 0.044 ms (4.448 ms / 100) 4.275 -> 4.276 ( +0.02%) [ +0.02% +0.00% +0.05% / +0.02% +0.47% +0.56%] index_copy_ strided 3 : Elapsed 0.043 ms (4.276 ms / 100) 4.457 -> 4.462 ( +0.11%) [ +0.11% +0.09% +0.00% / +0.11% +0.63% +0.67%] index_add_ strided 7 : Elapsed 0.045 ms (4.462 ms / 100) 4.284 -> 4.284 ( +0.00%) [ +0.09% +0.02% +0.00% / +0.00% +0.72% +0.75%] index_copy_ strided 7 : Elapsed 0.043 ms (4.288 ms / 100) 4.444 -> 4.442 ( -0.05%) [ +0.00% +0.02% +0.16% / -0.05% +0.41% +0.59%] index_add_ perm : Elapsed 0.044 ms (4.444 ms / 100) 4.270 -> 4.277 ( +0.16%) [ +0.00% +0.23% +0.21% / +0.16% +0.63% +0.70%] index_copy_ perm : Elapsed 0.043 ms (4.270 ms / 100) 4.444 -> 4.448 ( +0.09%) [ +0.18% +0.23% +0.00% / +0.09% +0.56% +0.52%] index_add_ perm_sorted : Elapsed 0.045 ms (4.452 ms / 100) 4.273 -> 4.276 ( +0.07%) [ +0.14% +0.26% +0.00% / +0.07% +0.59% +0.47%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.279 ms / 100) 5.558 -> 5.564 ( +0.11%) [ +0.20% +0.02% +0.00% / +0.18% +0.16% +0.11%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.564 -> 5.563 ( -0.02%) [ +0.04% +0.09% +0.00% / -0.02% +0.20% +0.14%] index_select wrap : Elapsed 0.056 ms (5.566 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.11% +0.09% +0.00% / -0.04% +0.02% +0.13%] index_select linear : Elapsed 0.056 ms (5.575 ms / 100) 5.568 -> 5.568 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.00% +0.13% +0.09%] index_select reverse : Elapsed 0.056 ms (5.573 ms / 100) 5.563 -> 5.562 ( -0.02%) [ +0.02% +0.11% +0.00% / +0.04% +0.02% -0.02%] index_select skip64 : Elapsed 0.056 ms (5.564 ms / 100) 5.563 -> 5.564 ( +0.02%) [ +0.00% +0.11% +0.04% / +0.11% +0.04% +0.02%] index_select skip256 : Elapsed 0.056 ms (5.563 ms / 100) 5.567 -> 5.563 ( -0.07%) [ +0.00% +0.18% +0.05% / -0.07% +0.00% +0.07%] index_select spread : Elapsed 0.056 ms (5.567 ms / 100) 5.560 -> 5.561 ( +0.02%) [ +0.16% +0.18% +0.00% / +0.02% +0.13% +0.18%] index_select strided 3 : Elapsed 0.056 ms (5.569 ms / 100) 5.559 -> 5.567 ( +0.14%) [ +0.00% +0.07% +0.02% / +0.20% +0.14% +0.22%] index_select strided 5 : Elapsed 0.056 ms (5.559 ms / 100) 5.560 -> 5.563 ( +0.05%) [ +0.13% +0.05% +0.00% / +0.05% +0.31% +0.09%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.561 -> 5.559 ( -0.04%) [ +0.02% +0.00% +0.04% / +0.02% +0.23% -0.04%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.566 -> 5.561 ( -0.09%) [ +0.00% +0.04% +0.05% / +0.09% -0.09% +0.02%] index_select random : Elapsed 0.056 ms (5.566 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.00% +0.02% +0.04% / +0.05% +0.05% +0.11%] index_select random_sorted : Elapsed 0.056 ms (5.564 ms / 100) B = [5, 4, 20, 40] (stride (1, 200, 800, 5)) A = [5, 4, 20, 16] (stride (1280, 20, 1, 80)) dim = 3 3.632 -> 3.633 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.91% +0.80%] index_add_ linear : Elapsed 0.036 ms (3.633 ms / 100) 3.506 -> 3.509 ( +0.09%) [ +0.03% +0.03% +0.00% / +0.09% +0.83% +0.74%] index_copy_ linear : Elapsed 0.035 ms (3.507 ms / 100) 3.617 -> 3.618 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.75% +0.77%] index_add_ reverse : Elapsed 0.036 ms (3.618 ms / 100) 3.491 -> 3.491 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.74% +0.74%] index_copy_ reverse : Elapsed 0.035 ms (3.491 ms / 100) 3.624 -> 3.625 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.69% +0.69%] index_add_ spread : Elapsed 0.036 ms (3.625 ms / 100) 3.496 -> 3.498 ( +0.06%) [ +0.00% +0.03% +0.09% / +0.06% +0.92% +0.94%] index_copy_ spread : Elapsed 0.035 ms (3.496 ms / 100) 3.622 -> 3.623 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.75% +0.75%] index_add_ strided 3 : Elapsed 0.036 ms (3.623 ms / 100) 3.494 -> 3.494 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.97% +0.74%] index_copy_ strided 3 : Elapsed 0.035 ms (3.497 ms / 100) 3.616 -> 3.615 ( -0.03%) [ +0.06% +0.00% +0.00% / -0.03% +0.83% +0.77%] index_add_ strided 7 : Elapsed 0.036 ms (3.618 ms / 100) 3.489 -> 3.490 ( +0.03%) [ +0.00% +0.06% +0.00% / +0.03% +1.00% +0.80%] index_copy_ strided 7 : Elapsed 0.035 ms (3.489 ms / 100) 3.631 -> 3.633 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.88% +0.91%] index_add_ perm : Elapsed 0.036 ms (3.632 ms / 100) 3.506 -> 3.506 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.74% +0.74%] index_copy_ perm : Elapsed 0.035 ms (3.506 ms / 100) 3.621 -> 3.622 ( +0.03%) [ +0.00% +0.06% +0.00% / +0.03% +0.77% +0.77%] index_add_ perm_sorted : Elapsed 0.036 ms (3.621 ms / 100) 3.493 -> 3.494 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +1.00% +1.00%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.493 ms / 100) 5.485 -> 5.480 ( -0.09%) [ +0.00% +0.09% +0.00% / -0.09% -0.04% +0.07%] index_select const : Elapsed 0.055 ms (5.485 ms / 100) 5.494 -> 5.494 ( +0.00%) [ +0.09% +0.00% +0.15% / +0.00% +0.00% +0.02%] index_select wrap : Elapsed 0.055 ms (5.499 ms / 100) 5.492 -> 5.492 ( +0.00%) [ +0.16% +0.13% +0.00% / +0.09% +0.00% +0.11%] index_select linear : Elapsed 0.055 ms (5.501 ms / 100) 5.492 -> 5.491 ( -0.02%) [ +0.09% +0.20% +0.00% / +0.02% -0.02% +0.13%] index_select reverse : Elapsed 0.055 ms (5.497 ms / 100) 5.481 -> 5.484 ( +0.05%) [ +0.07% +0.13% +0.00% / +0.05% +0.16% +0.20%] index_select skip64 : Elapsed 0.055 ms (5.485 ms / 100) 5.482 -> 5.485 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.31% +0.16%] index_select skip256 : Elapsed 0.055 ms (5.482 ms / 100) 5.493 -> 5.496 ( +0.05%) [ +0.00% +0.04% +0.11% / +0.13% +0.05% +0.05%] index_select spread : Elapsed 0.055 ms (5.493 ms / 100) 5.495 -> 5.493 ( -0.04%) [ +0.00% +0.05% +0.16% / +0.02% -0.04% +0.25%] index_select strided 3 : Elapsed 0.055 ms (5.495 ms / 100) 5.493 -> 5.491 ( -0.04%) [ +0.04% +0.00% +0.11% / -0.02% -0.04% +0.04%] index_select strided 5 : Elapsed 0.055 ms (5.495 ms / 100) 5.495 -> 5.491 ( -0.07%) [ +0.00% +0.09% +0.02% / +0.13% -0.07% +0.05%] index_select strided 7 : Elapsed 0.055 ms (5.495 ms / 100) 5.486 -> 5.477 ( -0.16%) [ +0.00% +0.02% +0.02% / -0.16% +0.07% +0.04%] index_select strided 8 : Elapsed 0.055 ms (5.486 ms / 100) 5.497 -> 5.488 ( -0.16%) [ +0.05% +0.11% +0.00% / +0.11% -0.15% -0.16%] index_select random : Elapsed 0.055 ms (5.500 ms / 100) 5.490 -> 5.490 ( +0.00%) [ +0.02% +0.00% +0.13% / +0.09% +0.11% +0.00%] index_select random_sorted : Elapsed 0.055 ms (5.491 ms / 100) B = [5, 4, 20, 40] (stride (1, 200, 800, 5)) A = [5, 4, 20, 16] (stride (20, 100, 1, 400)) dim = 3 3.931 -> 3.930 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.64% +0.64%] index_add_ linear : Elapsed 0.039 ms (3.931 ms / 100) 3.804 -> 3.805 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.63% +0.58%] index_copy_ linear : Elapsed 0.038 ms (3.804 ms / 100) 3.945 -> 3.945 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.46% +0.46%] index_add_ reverse : Elapsed 0.039 ms (3.946 ms / 100) 3.819 -> 3.818 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.50% +0.47%] index_copy_ reverse : Elapsed 0.038 ms (3.819 ms / 100) 3.933 -> 3.935 ( +0.05%) [ +0.05% +0.00% +0.03% / +0.05% +0.58% +0.51%] index_add_ spread : Elapsed 0.039 ms (3.935 ms / 100) 3.816 -> 3.819 ( +0.08%) [ +0.05% +0.03% +0.00% / +0.08% +0.55% +0.47%] index_copy_ spread : Elapsed 0.038 ms (3.818 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.039 ms (3.934 ms / 100) 3.805 -> 3.805 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.58% +0.55%] index_copy_ strided 3 : Elapsed 0.038 ms (3.807 ms / 100) 3.943 -> 3.944 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.63% +0.56%] index_add_ strided 7 : Elapsed 0.039 ms (3.945 ms / 100) 3.818 -> 3.819 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.71% +0.52%] index_copy_ strided 7 : Elapsed 0.038 ms (3.818 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.10% +0.08% +0.00% / +0.03% +0.53% +0.51%] index_add_ perm : Elapsed 0.039 ms (3.936 ms / 100) 3.805 -> 3.805 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.50% +0.53%] index_copy_ perm : Elapsed 0.038 ms (3.805 ms / 100) 3.933 -> 3.934 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.43% +0.43%] index_add_ perm_sorted : Elapsed 0.039 ms (3.935 ms / 100) 3.806 -> 3.808 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.55% +0.39%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.807 ms / 100) 5.558 -> 5.557 ( -0.02%) [ +0.00% +0.14% +0.04% / +0.07% +0.27% -0.02%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.560 -> 5.571 ( +0.20%) [ +0.05% +0.00% +0.18% / +0.20% +0.22% +0.22%] index_select wrap : Elapsed 0.056 ms (5.563 ms / 100) 5.567 -> 5.570 ( +0.05%) [ +0.14% +0.16% +0.00% / +0.13% +0.22% +0.05%] index_select linear : Elapsed 0.056 ms (5.575 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.00% +0.14% +0.11% / +0.02% +0.27% +0.16%] index_select reverse : Elapsed 0.056 ms (5.564 ms / 100) 5.569 -> 5.563 ( -0.11%) [ +0.00% +0.07% +0.00% / -0.05% -0.11% -0.11%] index_select skip64 : Elapsed 0.056 ms (5.569 ms / 100) 5.566 -> 5.556 ( -0.18%) [ +0.04% +0.00% +0.05% / +0.05% -0.16% -0.18%] index_select skip256 : Elapsed 0.056 ms (5.568 ms / 100) 5.566 -> 5.566 ( +0.00%) [ +0.13% +0.00% +0.04% / +0.00% +0.14% +0.11%] index_select spread : Elapsed 0.056 ms (5.573 ms / 100) 5.563 -> 5.567 ( +0.07%) [ +0.07% +0.00% +0.05% / +0.22% +0.09% +0.07%] index_select strided 3 : Elapsed 0.056 ms (5.567 ms / 100) 5.569 -> 5.566 ( -0.05%) [ +0.07% +0.00% +0.04% / -0.05% +0.09% +0.11%] index_select strided 5 : Elapsed 0.056 ms (5.573 ms / 100) 5.562 -> 5.568 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.18% +0.25%] index_select strided 7 : Elapsed 0.056 ms (5.568 ms / 100) 5.558 -> 5.562 ( +0.07%) [ +0.14% +0.05% +0.00% / +0.07% +0.09% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.566 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.00% +0.07% +0.05% / +0.07% +0.14% +0.22%] index_select random : Elapsed 0.056 ms (5.565 ms / 100) 5.567 -> 5.567 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.05% +0.00% +0.02%] index_select random_sorted : Elapsed 0.056 ms (5.576 ms / 100) B = [5, 4, 20, 40] (stride (4, 1, 800, 20)) dim = 3 fill_cnt = 16 1.062 -> 1.061 ( -0.09%) [ +0.00% +0.19% +0.00% / -0.09% +0.00% +0.09%] index_fill_ const : Elapsed 0.011 ms (1.062 ms / 100) 1.059 -> 1.059 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.09% +0.38%] index_fill_ linear : Elapsed 0.011 ms (1.059 ms / 100) 1.056 -> 1.056 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.28% +0.38%] index_fill_ reverse : Elapsed 0.011 ms (1.057 ms / 100) 1.057 -> 1.057 ( +0.00%) [ +0.00% +0.09% +0.19% / +0.00% +0.47% +0.47%] index_fill_ skip64 : Elapsed 0.011 ms (1.057 ms / 100) 1.058 -> 1.060 ( +0.19%) [ +0.09% +0.19% +0.00% / +0.19% +0.19% +0.28%] index_fill_ skip256 : Elapsed 0.011 ms (1.059 ms / 100) 1.056 -> 1.060 ( +0.38%) [ +0.38% +0.28% +0.00% / +0.38% +0.47% +0.76%] index_fill_ spread : Elapsed 0.011 ms (1.060 ms / 100) 1.059 -> 1.058 ( -0.09%) [ +0.00% +0.09% +0.09% / +0.00% -0.09% +0.00%] index_fill_ strided 3 : Elapsed 0.011 ms (1.059 ms / 100) 1.057 -> 1.057 ( +0.00%) [ +0.19% +0.00% +0.09% / +0.00% +0.19% +0.09%] index_fill_ strided 5 : Elapsed 0.011 ms (1.059 ms / 100) 1.058 -> 1.058 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.00% +0.09%] index_fill_ strided 7 : Elapsed 0.011 ms (1.058 ms / 100) 1.060 -> 1.060 ( +0.00%) [ +0.00% +0.47% +0.00% / +0.00% +0.00% +0.00%] index_fill_ strided 8 : Elapsed 0.011 ms (1.060 ms / 100) 1.060 -> 1.058 ( -0.19%) [ +0.09% +0.09% +0.00% / +0.09% -0.19% +0.00%] index_fill_ strided 16 : Elapsed 0.011 ms (1.061 ms / 100) 1.059 -> 1.057 ( -0.19%) [ +0.09% +0.00% +0.00% / -0.19% +0.09% +0.00%] index_fill_ random : Elapsed 0.011 ms (1.060 ms / 100) 1.055 -> 1.057 ( +0.19%) [ +0.28% +0.19% +0.00% / +0.19% +0.47% +0.47%] index_fill_ random_sorted : Elapsed 0.011 ms (1.058 ms / 100) 1.057 -> 1.057 ( +0.00%) [ +0.19% +0.28% +0.00% / +0.09% +0.09% +0.00%] index_fill_ perm : Elapsed 0.011 ms (1.059 ms / 100) 1.059 -> 1.058 ( -0.09%) [ +0.09% +0.00% +0.00% / +0.00% -0.09% -0.09%] index_fill_ perm_sorted : Elapsed 0.011 ms (1.060 ms / 100) B = [5, 4, 20, 40] (stride (1, 5, 800, 20)) A = [5, 4, 20, 16] (stride (1, 5, 20, 400)) dim = 3 4.128 -> 4.128 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.75% +0.75%] index_add_ linear : Elapsed 0.041 ms (4.128 ms / 100) 3.993 -> 3.994 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.80% +0.85%] index_copy_ linear : Elapsed 0.040 ms (3.995 ms / 100) 4.150 -> 4.151 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.84% +0.84%] index_add_ reverse : Elapsed 0.042 ms (4.152 ms / 100) 4.008 -> 4.012 ( +0.10%) [ +0.05% +0.07% +0.00% / +0.10% +0.70% +0.70%] index_copy_ reverse : Elapsed 0.040 ms (4.010 ms / 100) 4.140 -> 4.140 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.70% +0.77%] index_add_ spread : Elapsed 0.041 ms (4.141 ms / 100) 4.007 -> 4.006 ( -0.02%) [ +0.00% +0.12% +0.00% / -0.02% +0.72% +0.80%] index_copy_ spread : Elapsed 0.040 ms (4.007 ms / 100) 4.153 -> 4.151 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.72% +0.77%] index_add_ strided 3 : Elapsed 0.042 ms (4.153 ms / 100) 4.001 -> 4.003 ( +0.05%) [ +0.00% +0.02% +0.05% / +0.05% +0.67% +0.67%] index_copy_ strided 3 : Elapsed 0.040 ms (4.001 ms / 100) 4.151 -> 4.150 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.77% +0.79%] index_add_ strided 7 : Elapsed 0.042 ms (4.151 ms / 100) 4.004 -> 4.006 ( +0.05%) [ +0.15% +0.15% +0.00% / +0.05% +0.85% +0.90%] index_copy_ strided 7 : Elapsed 0.040 ms (4.010 ms / 100) 4.128 -> 4.131 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.07% +0.78% +0.75%] index_add_ perm : Elapsed 0.041 ms (4.130 ms / 100) 3.994 -> 3.998 ( +0.10%) [ +0.05% +0.00% +0.05% / +0.10% +0.75% +0.78%] index_copy_ perm : Elapsed 0.040 ms (3.996 ms / 100) 4.148 -> 4.149 ( +0.02%) [ +0.00% +0.05% +0.12% / +0.02% +0.84% +0.94%] index_add_ perm_sorted : Elapsed 0.041 ms (4.148 ms / 100) 3.997 -> 3.997 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.85% +1.13%] index_copy_ perm_sorted : Elapsed 0.040 ms (3.997 ms / 100) 5.562 -> 5.566 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.11% +0.07% +0.11%] index_select const : Elapsed 0.056 ms (5.566 ms / 100) 5.573 -> 5.565 ( -0.14%) [ +0.04% +0.00% +0.04% / +0.11% -0.14% +0.07%] index_select wrap : Elapsed 0.056 ms (5.575 ms / 100) 5.569 -> 5.571 ( +0.04%) [ +0.00% +0.07% +0.09% / +0.07% +0.07% +0.04%] index_select linear : Elapsed 0.056 ms (5.569 ms / 100) 5.568 -> 5.573 ( +0.09%) [ +0.13% +0.13% +0.00% / +0.09% +0.18% +0.27%] index_select reverse : Elapsed 0.056 ms (5.575 ms / 100) 5.565 -> 5.563 ( -0.04%) [ +0.04% +0.11% +0.00% / +0.09% +0.07% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.567 ms / 100) 5.560 -> 5.569 ( +0.16%) [ +0.07% +0.00% +0.02% / +0.16% +0.22% +0.16%] index_select skip256 : Elapsed 0.056 ms (5.564 ms / 100) 5.567 -> 5.568 ( +0.02%) [ +0.22% +0.18% +0.00% / +0.13% +0.04% +0.02%] index_select spread : Elapsed 0.056 ms (5.579 ms / 100) 5.566 -> 5.562 ( -0.07%) [ +0.00% +0.16% +0.02% / +0.04% -0.07% +0.09%] index_select strided 3 : Elapsed 0.056 ms (5.566 ms / 100) 5.570 -> 5.568 ( -0.04%) [ +0.00% +0.04% +0.02% / +0.04% -0.04% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.570 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.07% +0.05% +0.00% / -0.04% +0.05% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.573 ms / 100) 5.564 -> 5.566 ( +0.04%) [ +0.00% +0.00% +0.22% / +0.04% +0.16% +0.22%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.569 -> 5.568 ( -0.02%) [ +0.09% +0.00% +0.11% / +0.05% +0.16% -0.02%] index_select random : Elapsed 0.056 ms (5.574 ms / 100) 5.566 -> 5.572 ( +0.11%) [ +0.11% +0.00% +0.05% / +0.13% +0.11% +0.27%] index_select random_sorted : Elapsed 0.056 ms (5.572 ms / 100) out_shape = [40, 16, 4, 20] in_shape = [5, 16, 4, 20] idx_dim = 0 B = [40, 16, 4, 20] (stride (1280, 20, 320, 1)) A = [5, 16, 4, 20] (stride (16, 1, 1600, 80)) dim = 0 1.532 -> 1.539 ( +0.46%) [ +0.26% +0.46% +0.00% / +0.59% +0.46% +1.04%] index_add_ linear : Elapsed 0.015 ms (1.536 ms / 100) 1.477 -> 1.482 ( +0.34%) [ +0.27% +0.20% +0.00% / +0.41% +0.34% +0.61%] index_copy_ linear : Elapsed 0.015 ms (1.481 ms / 100) 1.514 -> 1.515 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.73% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.515 ms / 100) 1.465 -> 1.469 ( +0.27%) [ +0.00% +0.00% +0.00% / +0.27% +0.48% +0.34%] index_copy_ reverse : Elapsed 0.015 ms (1.465 ms / 100) 1.540 -> 1.545 ( +0.32%) [ +0.91% +0.71% +0.00% / +0.32% +0.45% +0.58%] index_add_ spread : Elapsed 0.016 ms (1.554 ms / 100) 1.481 -> 1.483 ( +0.14%) [ +0.61% +0.54% +0.00% / +0.14% +0.68% +0.68%] index_copy_ spread : Elapsed 0.015 ms (1.490 ms / 100) 1.539 -> 1.540 ( +0.06%) [ +0.65% +0.39% +0.00% / +0.06% +0.65% +1.56%] index_add_ strided 3 : Elapsed 0.015 ms (1.549 ms / 100) 1.480 -> 1.481 ( +0.07%) [ +0.61% +0.34% +0.00% / +0.07% +0.68% +1.01%] index_copy_ strided 3 : Elapsed 0.015 ms (1.489 ms / 100) 1.514 -> 1.516 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.59% +0.66%] index_add_ strided 7 : Elapsed 0.015 ms (1.514 ms / 100) 1.463 -> 1.465 ( +0.14%) [ +0.21% +0.07% +0.00% / +0.14% +0.48% +0.68%] index_copy_ strided 7 : Elapsed 0.015 ms (1.466 ms / 100) 1.531 -> 1.538 ( +0.46%) [ +0.65% +0.20% +0.00% / +0.46% +0.78% +0.65%] index_add_ perm : Elapsed 0.015 ms (1.541 ms / 100) 1.473 -> 1.480 ( +0.48%) [ +0.34% +0.27% +0.00% / +0.48% +0.68% +0.61%] index_copy_ perm : Elapsed 0.015 ms (1.478 ms / 100) 1.525 -> 1.526 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.526 ms / 100) 1.473 -> 1.476 ( +0.20%) [ +0.07% +0.00% +0.07% / +0.20% +0.61% +0.61%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.474 ms / 100) 8.206 -> 8.214 ( +0.10%) [ +0.07% +0.00% +0.17% / +0.10% +0.32% +0.27%] index_select const : Elapsed 0.082 ms (8.212 ms / 100) 8.212 -> 8.229 ( +0.21%) [ +0.12% +0.16% +0.00% / +0.28% +0.50% +0.21%] index_select wrap : Elapsed 0.082 ms (8.222 ms / 100) 8.228 -> 8.224 ( -0.05%) [ +0.09% +0.00% +0.17% / +0.12% +0.32% -0.05%] index_select linear : Elapsed 0.082 ms (8.235 ms / 100) 8.221 -> 8.224 ( +0.04%) [ +0.00% +0.05% +0.27% / +0.06% +0.57% +0.04%] index_select reverse : Elapsed 0.082 ms (8.221 ms / 100) 8.211 -> 8.217 ( +0.07%) [ +0.00% +0.06% +0.09% / +0.12% +0.24% +0.07%] index_select skip64 : Elapsed 0.082 ms (8.211 ms / 100) 8.198 -> 8.200 ( +0.02%) [ +0.07% +0.17% +0.00% / +0.02% +0.48% +0.38%] index_select skip256 : Elapsed 0.082 ms (8.204 ms / 100) 8.228 -> 8.208 ( -0.24%) [ +0.00% +0.04% +0.16% / -0.24% +0.47% +0.15%] index_select spread : Elapsed 0.082 ms (8.228 ms / 100) 8.214 -> 8.225 ( +0.13%) [ +0.00% +0.33% +0.19% / +0.13% +0.35% +0.21%] index_select strided 3 : Elapsed 0.082 ms (8.214 ms / 100) 8.211 -> 8.229 ( +0.22%) [ +0.11% +0.09% +0.00% / +0.45% +0.44% +0.22%] index_select random : Elapsed 0.082 ms (8.220 ms / 100) 8.220 -> 8.218 ( -0.02%) [ +0.09% +0.04% +0.00% / -0.02% +0.40% +0.33%] index_select random_sorted : Elapsed 0.082 ms (8.227 ms / 100) B = [40, 16, 4, 20] (stride (1, 3200, 800, 40)) A = [5, 16, 4, 20] (stride (4, 400, 1, 20)) dim = 0 1.355 -> 1.354 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.59% +0.59%] index_add_ linear : Elapsed 0.014 ms (1.356 ms / 100) 1.308 -> 1.312 ( +0.31%) [ +0.31% +0.08% +0.00% / +0.31% +0.69% +0.76%] index_copy_ linear : Elapsed 0.013 ms (1.312 ms / 100) 1.360 -> 1.360 ( +0.00%) [ +0.00% +0.00% +0.15% / +0.00% +0.59% +0.81%] index_add_ reverse : Elapsed 0.014 ms (1.360 ms / 100) 1.312 -> 1.317 ( +0.38%) [ +0.23% +0.00% +0.00% / +0.38% +0.38% +0.69%] index_copy_ reverse : Elapsed 0.013 ms (1.315 ms / 100) 1.359 -> 1.360 ( +0.07%) [ +0.22% +0.07% +0.00% / +0.07% +0.74% +0.96%] index_add_ spread : Elapsed 0.014 ms (1.362 ms / 100) 1.313 -> 1.316 ( +0.23%) [ +0.15% +0.00% +0.15% / +0.23% +0.53% +0.61%] index_copy_ spread : Elapsed 0.013 ms (1.315 ms / 100) 1.355 -> 1.355 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.74%] index_add_ strided 3 : Elapsed 0.014 ms (1.355 ms / 100) 1.312 -> 1.309 ( -0.23%) [ +0.15% +0.08% +0.00% / -0.23% +0.30% +0.53%] index_copy_ strided 3 : Elapsed 0.013 ms (1.314 ms / 100) 1.362 -> 1.362 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.29% +0.44%] index_add_ strided 7 : Elapsed 0.014 ms (1.362 ms / 100) 1.312 -> 1.315 ( +0.23%) [ +0.00% +0.08% +0.08% / +0.23% +0.38% +0.46%] index_copy_ strided 7 : Elapsed 0.013 ms (1.312 ms / 100) 1.358 -> 1.362 ( +0.29%) [ +0.29% +0.07% +0.00% / +0.29% +1.10% +0.81%] index_add_ perm : Elapsed 0.014 ms (1.362 ms / 100) 1.312 -> 1.314 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.69% +0.46%] index_copy_ perm : Elapsed 0.013 ms (1.314 ms / 100) 1.355 -> 1.359 ( +0.30%) [ +0.00% +0.00% +0.07% / +0.30% +0.74% +0.66%] index_add_ perm_sorted : Elapsed 0.014 ms (1.355 ms / 100) 1.309 -> 1.314 ( +0.38%) [ +0.00% +0.46% +0.08% / +0.38% +0.61% +0.69%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.309 ms / 100) 7.929 -> 7.930 ( +0.01%) [ +0.23% +0.21% +0.00% / +0.01% +0.23% +0.45%] index_select const : Elapsed 0.079 ms (7.947 ms / 100) 7.929 -> 7.933 ( +0.05%) [ +0.40% +0.08% +0.00% / +0.05% +0.37% +0.67%] index_select wrap : Elapsed 0.080 ms (7.961 ms / 100) 7.936 -> 7.925 ( -0.14%) [ +0.18% +0.00% +0.26% / -0.14% +0.42% +0.29%] index_select linear : Elapsed 0.079 ms (7.950 ms / 100) 7.931 -> 7.944 ( +0.16%) [ +0.00% +0.03% +0.08% / +0.16% +0.26% +0.25%] index_select reverse : Elapsed 0.079 ms (7.931 ms / 100) 7.933 -> 7.914 ( -0.24%) [ +0.14% +0.04% +0.00% / -0.24% +0.19% +0.23%] index_select skip64 : Elapsed 0.079 ms (7.944 ms / 100) 7.926 -> 7.919 ( -0.09%) [ +0.00% +0.19% +0.00% / -0.09% +0.37% +0.44%] index_select skip256 : Elapsed 0.079 ms (7.926 ms / 100) 7.929 -> 7.944 ( +0.19%) [ +0.00% +0.00% +0.14% / +0.19% +0.39% +0.45%] index_select spread : Elapsed 0.079 ms (7.929 ms / 100) 7.932 -> 7.936 ( +0.05%) [ +0.05% +0.13% +0.00% / +0.05% +0.29% +0.38%] index_select strided 3 : Elapsed 0.079 ms (7.936 ms / 100) 7.932 -> 7.936 ( +0.05%) [ +0.10% +0.00% +0.09% / +0.05% +0.62% +0.47%] index_select random : Elapsed 0.079 ms (7.940 ms / 100) 7.930 -> 7.952 ( +0.28%) [ +0.00% +0.26% +0.23% / +0.28% +0.47% +0.50%] index_select random_sorted : Elapsed 0.079 ms (7.930 ms / 100) B = [40, 16, 4, 20] (stride (4, 3200, 1, 160)) A = [5, 16, 4, 20] (stride (1280, 80, 20, 1)) dim = 0 1.319 -> 1.319 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.53% +0.68%] index_add_ linear : Elapsed 0.013 ms (1.320 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.39% +0.86%] index_copy_ linear : Elapsed 0.013 ms (1.275 ms / 100) 1.319 -> 1.320 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.45% +0.53%] index_add_ reverse : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.24% +0.00% +0.08% / +0.16% +0.39% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.319 -> 1.320 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.45% +0.61%] index_add_ spread : Elapsed 0.013 ms (1.320 ms / 100) 1.275 -> 1.274 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.275 ms / 100) 1.319 -> 1.318 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.68% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.55% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.61% +0.68%] index_add_ strided 7 : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.273 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.47% +1.02%] index_copy_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.61% +0.76%] index_add_ perm : Elapsed 0.013 ms (1.319 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.47% +0.63%] index_copy_ perm : Elapsed 0.013 ms (1.275 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.61% +0.61%] index_add_ perm_sorted : Elapsed 0.013 ms (1.319 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) 7.861 -> 7.869 ( +0.10%) [ +0.17% +0.18% +0.00% / +0.10% +0.37% +0.47%] index_select const : Elapsed 0.079 ms (7.874 ms / 100) 7.900 -> 7.894 ( -0.08%) [ +0.01% +0.00% +0.00% / +0.13% -0.08% +0.16%] index_select wrap : Elapsed 0.079 ms (7.901 ms / 100) 7.885 -> 7.898 ( +0.16%) [ +0.13% +0.00% +0.16% / +0.16% +0.24% +0.20%] index_select linear : Elapsed 0.079 ms (7.895 ms / 100) 7.886 -> 7.886 ( +0.00%) [ +0.00% +0.22% +0.28% / +0.00% +0.15% +0.19%] index_select reverse : Elapsed 0.079 ms (7.886 ms / 100) 7.875 -> 7.882 ( +0.09%) [ +0.03% +0.00% +0.04% / +0.14% +0.09% +0.18%] index_select skip64 : Elapsed 0.079 ms (7.877 ms / 100) 7.860 -> 7.874 ( +0.18%) [ +0.00% +0.05% +0.08% / +0.18% +0.43% +0.31%] index_select skip256 : Elapsed 0.079 ms (7.860 ms / 100) 7.879 -> 7.890 ( +0.14%) [ +0.05% +0.00% +0.08% / +0.14% +0.53% +0.30%] index_select spread : Elapsed 0.079 ms (7.883 ms / 100) 7.897 -> 7.891 ( -0.08%) [ +0.10% +0.00% +0.16% / -0.08% +0.18% -0.01%] index_select strided 3 : Elapsed 0.079 ms (7.905 ms / 100) 7.901 -> 7.890 ( -0.14%) [ +0.00% +0.05% +0.05% / +0.01% +0.15% -0.14%] index_select random : Elapsed 0.079 ms (7.901 ms / 100) 7.884 -> 7.893 ( +0.11%) [ +0.18% +0.00% +0.10% / +0.11% +0.20% +0.16%] index_select random_sorted : Elapsed 0.079 ms (7.898 ms / 100) B = [40, 16, 4, 20] (stride (1, 3200, 40, 160)) A = [5, 16, 4, 20] (stride (1280, 1, 320, 16)) dim = 0 1.461 -> 1.461 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.75% +0.75%] index_add_ linear : Elapsed 0.015 ms (1.462 ms / 100) 1.418 -> 1.420 ( +0.14%) [ +0.00% +0.00% +0.07% / +0.14% +0.56% +0.56%] index_copy_ linear : Elapsed 0.014 ms (1.418 ms / 100) 1.455 -> 1.456 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.82% +1.17%] index_add_ reverse : Elapsed 0.015 ms (1.457 ms / 100) 1.412 -> 1.413 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.85% +1.13%] index_copy_ reverse : Elapsed 0.014 ms (1.413 ms / 100) 1.463 -> 1.464 ( +0.07%) [ +0.21% +0.14% +0.00% / +0.07% +0.62% +0.75%] index_add_ spread : Elapsed 0.015 ms (1.466 ms / 100) 1.418 -> 1.420 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.71% +0.63%] index_copy_ spread : Elapsed 0.014 ms (1.420 ms / 100) 1.462 -> 1.461 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.68% +0.68%] index_add_ strided 3 : Elapsed 0.015 ms (1.462 ms / 100) 1.417 -> 1.417 ( +0.00%) [ +0.21% +0.14% +0.00% / +0.00% +0.64% +0.85%] index_copy_ strided 3 : Elapsed 0.014 ms (1.420 ms / 100) 1.468 -> 1.468 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.95% +0.89%] index_add_ strided 7 : Elapsed 0.015 ms (1.468 ms / 100) 1.424 -> 1.425 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.77% +0.77%] index_copy_ strided 7 : Elapsed 0.014 ms (1.425 ms / 100) 1.467 -> 1.467 ( +0.00%) [ +0.20% +0.07% +0.00% / +0.00% +0.89% +1.02%] index_add_ perm : Elapsed 0.015 ms (1.470 ms / 100) 1.423 -> 1.425 ( +0.14%) [ +0.07% +0.00% +0.00% / +0.14% +0.91% +1.19%] index_copy_ perm : Elapsed 0.014 ms (1.424 ms / 100) 1.462 -> 1.462 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.89% +0.82%] index_add_ perm_sorted : Elapsed 0.015 ms (1.462 ms / 100) 1.415 -> 1.416 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.92% +0.85%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.416 ms / 100) 8.244 -> 8.257 ( +0.16%) [ +0.19% +0.00% +0.29% / +0.16% +0.36% +0.33%] index_select const : Elapsed 0.083 ms (8.260 ms / 100) 8.271 -> 8.263 ( -0.10%) [ +0.13% +0.00% +0.11% / -0.10% +0.16% +0.34%] index_select wrap : Elapsed 0.083 ms (8.282 ms / 100) 8.270 -> 8.274 ( +0.05%) [ +0.00% +0.23% +0.13% / +0.05% +0.34% +0.39%] index_select linear : Elapsed 0.083 ms (8.270 ms / 100) 8.257 -> 8.270 ( +0.16%) [ +0.23% +0.18% +0.00% / +0.16% +0.18% +0.36%] index_select reverse : Elapsed 0.083 ms (8.276 ms / 100) 8.264 -> 8.256 ( -0.10%) [ +0.12% +0.00% +0.22% / -0.10% +0.12% -0.01%] index_select skip64 : Elapsed 0.083 ms (8.274 ms / 100) 8.257 -> 8.277 ( +0.24%) [ +0.00% +0.01% +0.06% / +0.29% +0.29% +0.24%] index_select skip256 : Elapsed 0.083 ms (8.257 ms / 100) 8.270 -> 8.264 ( -0.07%) [ +0.00% +0.10% +0.15% / -0.07% +0.05% +0.34%] index_select spread : Elapsed 0.083 ms (8.270 ms / 100) 8.269 -> 8.276 ( +0.08%) [ +0.05% +0.00% +0.24% / +0.08% +0.30% +0.41%] index_select strided 3 : Elapsed 0.083 ms (8.273 ms / 100) 8.268 -> 8.273 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.17% +0.59%] index_select random : Elapsed 0.083 ms (8.268 ms / 100) 8.258 -> 8.260 ( +0.02%) [ +0.11% +0.15% +0.00% / +0.02% +0.44% +0.68%] index_select random_sorted : Elapsed 0.083 ms (8.267 ms / 100) B = [40, 16, 4, 20] (stride (1, 40, 12800, 640)) A = [5, 16, 4, 20] (stride (20, 100, 1600, 1)) dim = 0 1.422 -> 1.421 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.28% +0.35%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.22% +0.15% +0.00% / +0.07% +0.36% +0.51%] index_copy_ linear : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.28% +0.56%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.376 -> 1.378 ( +0.15%) [ +0.00% +0.22% +0.00% / +0.15% +0.44% +0.65%] index_copy_ reverse : Elapsed 0.014 ms (1.376 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.28% +0.42%] index_add_ spread : Elapsed 0.014 ms (1.423 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.44% +0.00% +0.00% / +0.07% +0.44% +0.51%] index_copy_ spread : Elapsed 0.014 ms (1.382 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.42% +0.56%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.375 -> 1.377 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.44% +0.65%] index_copy_ strided 3 : Elapsed 0.014 ms (1.377 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.49% +0.49%] index_add_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.375 -> 1.379 ( +0.29%) [ +0.22% +0.22% +0.00% / +0.29% +0.58% +0.65%] index_copy_ strided 7 : Elapsed 0.014 ms (1.378 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.35% +0.56%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.376 -> 1.375 ( -0.07%) [ +0.15% +0.22% +0.00% / -0.07% +0.36% +0.65%] index_copy_ perm : Elapsed 0.014 ms (1.378 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.42% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.36% +0.44%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.377 ms / 100) 8.233 -> 8.247 ( +0.17%) [ +0.00% +0.27% +0.21% / +0.17% +0.21% +0.33%] index_select const : Elapsed 0.082 ms (8.233 ms / 100) 8.264 -> 8.266 ( +0.02%) [ +0.00% +0.05% +0.33% / +0.15% +0.02% +0.51%] index_select wrap : Elapsed 0.083 ms (8.264 ms / 100) 8.256 -> 8.270 ( +0.17%) [ +0.36% +0.00% +0.16% / +0.27% +0.45% +0.17%] index_select linear : Elapsed 0.083 ms (8.286 ms / 100) 8.262 -> 8.264 ( +0.02%) [ +0.00% +0.15% +0.19% / +0.12% +0.02% +0.15%] index_select reverse : Elapsed 0.083 ms (8.262 ms / 100) 8.241 -> 8.246 ( +0.06%) [ +0.07% +0.00% +0.25% / +0.13% +0.06% +0.29%] index_select skip64 : Elapsed 0.082 ms (8.247 ms / 100) 8.242 -> 8.254 ( +0.15%) [ +0.32% +0.08% +0.00% / +0.15% +0.22% +0.40%] index_select skip256 : Elapsed 0.083 ms (8.268 ms / 100) 8.269 -> 8.270 ( +0.01%) [ +0.07% +0.00% +0.18% / +0.01% +0.24% +0.18%] index_select spread : Elapsed 0.083 ms (8.275 ms / 100) 8.264 -> 8.266 ( +0.02%) [ +0.00% +0.27% +0.13% / +0.02% +0.23% +0.05%] index_select strided 3 : Elapsed 0.083 ms (8.264 ms / 100) 8.273 -> 8.272 ( -0.01%) [ +0.05% +0.00% +0.04% / +0.02% +0.25% -0.01%] index_select random : Elapsed 0.083 ms (8.277 ms / 100) 8.259 -> 8.290 ( +0.38%) [ +0.00% +0.29% +0.23% / +0.45% +0.44% +0.38%] index_select random_sorted : Elapsed 0.083 ms (8.259 ms / 100) B = [40, 16, 4, 20] (stride (64, 1, 16, 2560)) A = [5, 16, 4, 20] (stride (1, 400, 100, 5)) dim = 0 1.321 -> 1.322 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.68% +0.76%] index_add_ linear : Elapsed 0.013 ms (1.321 ms / 100) 1.277 -> 1.276 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.55% +0.47%] index_copy_ linear : Elapsed 0.013 ms (1.277 ms / 100) 1.322 -> 1.321 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.61% +0.68%] index_add_ reverse : Elapsed 0.013 ms (1.322 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.08% +0.00% +0.39% / +0.00% +0.63% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.322 -> 1.323 ( +0.08%) [ +0.30% +0.08% +0.00% / +0.08% +0.83% +0.68%] index_add_ spread : Elapsed 0.013 ms (1.326 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.70% +0.70%] index_copy_ spread : Elapsed 0.013 ms (1.284 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.76% +0.68%] index_add_ strided 3 : Elapsed 0.013 ms (1.322 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.71% +0.71%] index_copy_ strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.68% +0.68%] index_add_ strided 7 : Elapsed 0.013 ms (1.322 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.54% +0.62%] index_copy_ strided 7 : Elapsed 0.013 ms (1.290 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.76% +0.76%] index_add_ perm : Elapsed 0.013 ms (1.321 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.08% +0.00% +0.00% / +0.16% +0.71% +0.71%] index_copy_ perm : Elapsed 0.013 ms (1.277 ms / 100) 1.320 -> 1.322 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.83% +0.76%] index_add_ perm_sorted : Elapsed 0.013 ms (1.322 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.71% +0.63%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.276 ms / 100) 7.892 -> 7.889 ( -0.04%) [ +0.00% +0.05% +0.06% / -0.04% +0.33% +0.05%] index_select const : Elapsed 0.079 ms (7.892 ms / 100) 7.874 -> 7.895 ( +0.27%) [ +0.10% +0.00% +0.29% / +0.27% +0.38% +0.46%] index_select wrap : Elapsed 0.079 ms (7.882 ms / 100) 7.871 -> 7.870 ( -0.01%) [ +0.00% +0.10% +0.47% / -0.01% +0.52% +0.44%] index_select linear : Elapsed 0.079 ms (7.871 ms / 100) 7.873 -> 7.893 ( +0.25%) [ +0.14% +0.00% +0.06% / +0.25% +0.32% +0.25%] index_select reverse : Elapsed 0.079 ms (7.884 ms / 100) 7.866 -> 7.900 ( +0.43%) [ +0.01% +0.00% +0.00% / +0.43% +0.46% +0.99%] index_select skip64 : Elapsed 0.079 ms (7.867 ms / 100) 7.882 -> 7.895 ( +0.16%) [ +0.27% +0.19% +0.00% / +0.29% +0.16% +0.74%] index_select skip256 : Elapsed 0.079 ms (7.903 ms / 100) 7.879 -> 7.880 ( +0.01%) [ +0.20% +0.00% +0.09% / +0.01% +0.23% +0.53%] index_select spread : Elapsed 0.079 ms (7.895 ms / 100) 7.885 -> 7.891 ( +0.08%) [ +0.06% +0.00% +0.05% / +0.08% +0.44% +0.39%] index_select strided 3 : Elapsed 0.079 ms (7.890 ms / 100) 7.881 -> 7.890 ( +0.11%) [ +0.14% +0.13% +0.00% / +0.11% +0.29% +0.39%] index_select random : Elapsed 0.079 ms (7.892 ms / 100) 7.880 -> 7.899 ( +0.24%) [ +0.00% +0.04% +0.23% / +0.24% +0.52% +0.39%] index_select random_sorted : Elapsed 0.079 ms (7.880 ms / 100) B = [40, 16, 4, 20] (stride (4, 160, 1, 2560)) A = [5, 16, 4, 20] (stride (1280, 80, 1, 4)) dim = 0 1.523 -> 1.522 ( -0.07%) [ +0.00% +0.00% +0.13% / -0.07% +0.39% +1.18%] index_add_ linear : Elapsed 0.015 ms (1.523 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.41% +0.61%] index_copy_ linear : Elapsed 0.015 ms (1.479 ms / 100) 1.523 -> 1.522 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.39% +1.05%] index_add_ reverse : Elapsed 0.015 ms (1.524 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.41% +0.88%] index_copy_ reverse : Elapsed 0.015 ms (1.478 ms / 100) 1.523 -> 1.523 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.46% +0.92%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.481 ( +0.14%) [ +0.00% +0.07% +0.07% / +0.14% +0.34% +1.22%] index_copy_ spread : Elapsed 0.015 ms (1.479 ms / 100) 1.522 -> 1.523 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.53% +1.31%] index_add_ strided 3 : Elapsed 0.015 ms (1.523 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.47% +1.35%] index_copy_ strided 3 : Elapsed 0.015 ms (1.480 ms / 100) 1.523 -> 1.522 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.53% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.523 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.00% +0.14% +0.20% / +0.07% +0.41% +0.61%] index_copy_ strided 7 : Elapsed 0.015 ms (1.478 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.59% +0.59%] index_add_ perm : Elapsed 0.015 ms (1.523 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.61% +0.74%] index_copy_ perm : Elapsed 0.015 ms (1.478 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.53% +0.59%] index_add_ perm_sorted : Elapsed 0.015 ms (1.523 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.47% +0.74%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.479 ms / 100) 8.531 -> 8.522 ( -0.11%) [ +0.00% +0.12% +0.00% / -0.11% +0.25% +0.00%] index_select const : Elapsed 0.085 ms (8.531 ms / 100) 8.543 -> 8.550 ( +0.08%) [ +0.04% +0.32% +0.00% / +0.08% +0.23% +0.30%] index_select wrap : Elapsed 0.085 ms (8.546 ms / 100) 8.541 -> 8.540 ( -0.01%) [ +0.25% +0.11% +0.00% / +0.22% -0.01% +0.05%] index_select linear : Elapsed 0.086 ms (8.562 ms / 100) 8.527 -> 8.532 ( +0.06%) [ +0.19% +0.00% +0.26% / +0.28% +0.68% +0.06%] index_select reverse : Elapsed 0.085 ms (8.543 ms / 100) 8.526 -> 8.528 ( +0.02%) [ +0.14% +0.14% +0.00% / +0.08% +0.16% +0.02%] index_select skip64 : Elapsed 0.085 ms (8.538 ms / 100) 8.530 -> 8.528 ( -0.02%) [ +0.09% +0.01% +0.00% / -0.02% +0.23% +0.04%] index_select skip256 : Elapsed 0.085 ms (8.538 ms / 100) 8.525 -> 8.539 ( +0.16%) [ +0.00% +0.28% +0.41% / +0.16% +0.33% +0.39%] index_select spread : Elapsed 0.085 ms (8.525 ms / 100) 8.545 -> 8.547 ( +0.02%) [ +0.04% +0.00% +0.18% / +0.26% +0.02% +0.39%] index_select strided 3 : Elapsed 0.085 ms (8.548 ms / 100) 8.544 -> 8.555 ( +0.13%) [ +0.27% +0.00% +0.08% / +0.13% +0.13% +0.28%] index_select random : Elapsed 0.086 ms (8.567 ms / 100) 8.530 -> 8.535 ( +0.06%) [ +0.21% +0.00% +0.27% / +0.32% +0.06% +0.36%] index_select random_sorted : Elapsed 0.085 ms (8.548 ms / 100) out_shape = [5, 40, 4, 20] in_shape = [5, 16, 4, 20] idx_dim = 1 B = [5, 40, 4, 20] (stride (3200, 80, 20, 1)) A = [5, 16, 4, 20] (stride (1280, 1, 320, 16)) dim = 1 1.410 -> 1.411 ( +0.07%) [ +0.21% +0.00% +0.00% / +0.07% +0.28% +0.28%] index_add_ linear : Elapsed 0.014 ms (1.413 ms / 100) 1.359 -> 1.360 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.29% +0.22%] index_copy_ linear : Elapsed 0.014 ms (1.359 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.49% +0.56%] index_add_ reverse : Elapsed 0.014 ms (1.422 ms / 100) 1.371 -> 1.373 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.51% +1.31%] index_copy_ reverse : Elapsed 0.014 ms (1.372 ms / 100) 1.412 -> 1.412 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.28% +0.14%] index_add_ spread : Elapsed 0.014 ms (1.412 ms / 100) 1.362 -> 1.361 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.07% +0.15%] index_copy_ spread : Elapsed 0.014 ms (1.362 ms / 100) 1.424 -> 1.424 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.49% +0.49%] index_add_ strided 3 : Elapsed 0.014 ms (1.425 ms / 100) 1.376 -> 1.377 ( +0.07%) [ +0.00% +0.29% +0.00% / +0.07% +0.36% +0.44%] index_copy_ strided 3 : Elapsed 0.014 ms (1.376 ms / 100) 1.427 -> 1.427 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.49% +0.42%] index_add_ strided 7 : Elapsed 0.014 ms (1.427 ms / 100) 1.378 -> 1.380 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.29% +0.29%] index_copy_ strided 7 : Elapsed 0.014 ms (1.379 ms / 100) 1.415 -> 1.413 ( -0.14%) [ +0.00% +0.07% +0.00% / -0.14% +0.35% +0.49%] index_add_ perm : Elapsed 0.014 ms (1.415 ms / 100) 1.362 -> 1.363 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.07% +0.29% +0.29%] index_copy_ perm : Elapsed 0.014 ms (1.362 ms / 100) 1.415 -> 1.414 ( -0.07%) [ +0.00% +0.00% +0.14% / +0.14% -0.07% +0.00%] index_add_ perm_sorted : Elapsed 0.014 ms (1.415 ms / 100) 1.363 -> 1.362 ( -0.07%) [ +0.07% +0.00% +0.07% / +0.07% -0.07% +0.15%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.364 ms / 100) 3.526 -> 3.524 ( -0.06%) [ +0.09% +0.00% +0.23% / +0.11% -0.06% -0.03%] index_select const : Elapsed 0.035 ms (3.529 ms / 100) 3.538 -> 3.536 ( -0.06%) [ +0.06% +0.17% +0.00% / -0.06% -0.06% +0.00%] index_select wrap : Elapsed 0.035 ms (3.540 ms / 100) 3.534 -> 3.534 ( +0.00%) [ +0.14% +0.00% +0.03% / +0.00% +0.54% +0.54%] index_select linear : Elapsed 0.035 ms (3.539 ms / 100) 3.536 -> 3.538 ( +0.06%) [ +0.00% +0.00% +0.28% / +0.06% +0.48% +0.34%] index_select reverse : Elapsed 0.035 ms (3.536 ms / 100) 3.533 -> 3.527 ( -0.17%) [ +0.06% +0.00% +0.11% / +0.17% -0.03% -0.17%] index_select skip64 : Elapsed 0.035 ms (3.535 ms / 100) 3.531 -> 3.522 ( -0.25%) [ +0.11% +0.14% +0.00% / +0.14% -0.14% -0.25%] index_select skip256 : Elapsed 0.035 ms (3.535 ms / 100) 3.536 -> 3.527 ( -0.25%) [ +0.00% +0.08% +0.17% / +0.17% -0.25% -0.06%] index_select spread : Elapsed 0.035 ms (3.536 ms / 100) 3.533 -> 3.540 ( +0.20%) [ +0.14% +0.37% +0.00% / +0.28% +0.20% +0.25%] index_select strided 3 : Elapsed 0.035 ms (3.538 ms / 100) 3.540 -> 3.537 ( -0.08%) [ +0.06% +0.17% +0.00% / -0.08% -0.06% +0.08%] index_select strided 5 : Elapsed 0.035 ms (3.542 ms / 100) 3.535 -> 3.531 ( -0.11%) [ +0.00% +0.00% +0.28% / +0.14% -0.11% -0.11%] index_select strided 7 : Elapsed 0.035 ms (3.535 ms / 100) 3.534 -> 3.531 ( -0.08%) [ +0.17% +0.00% +0.11% / +0.11% -0.08% +0.00%] index_select strided 8 : Elapsed 0.035 ms (3.540 ms / 100) 3.541 -> 3.525 ( -0.45%) [ +0.00% +0.06% +0.00% / +0.11% -0.45% -0.23%] index_select random : Elapsed 0.035 ms (3.541 ms / 100) 3.539 -> 3.528 ( -0.31%) [ +0.11% +0.14% +0.00% / +0.23% -0.31% -0.23%] index_select random_sorted : Elapsed 0.035 ms (3.543 ms / 100) B = [5, 40, 4, 20] (stride (40, 1, 200, 800)) A = [5, 16, 4, 20] (stride (1, 400, 5, 20)) dim = 1 4.119 -> 4.119 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.63% +0.70%] index_add_ linear : Elapsed 0.041 ms (4.121 ms / 100) 3.983 -> 3.985 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.70% +0.68%] index_copy_ linear : Elapsed 0.040 ms (3.983 ms / 100) 4.141 -> 4.141 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.70% +0.75%] index_add_ reverse : Elapsed 0.041 ms (4.142 ms / 100) 4.007 -> 4.008 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.70% +0.77%] index_copy_ reverse : Elapsed 0.040 ms (4.009 ms / 100) 4.139 -> 4.144 ( +0.12%) [ +0.17% +0.02% +0.00% / +0.12% +0.72% +0.72%] index_add_ spread : Elapsed 0.041 ms (4.146 ms / 100) 4.004 -> 4.010 ( +0.15%) [ +0.12% +0.12% +0.00% / +0.15% +0.77% +0.77%] index_copy_ spread : Elapsed 0.040 ms (4.009 ms / 100) 4.145 -> 4.144 ( -0.02%) [ +0.14% +0.00% +0.02% / -0.02% +0.65% +0.65%] index_add_ strided 3 : Elapsed 0.042 ms (4.151 ms / 100) 3.998 -> 4.001 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.70% +0.65%] index_copy_ strided 3 : Elapsed 0.040 ms (3.999 ms / 100) 4.141 -> 4.140 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.72% +0.72%] index_add_ strided 7 : Elapsed 0.041 ms (4.141 ms / 100) 4.006 -> 4.009 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.07% +0.72% +0.70%] index_copy_ strided 7 : Elapsed 0.040 ms (4.008 ms / 100) 4.118 -> 4.117 ( -0.02%) [ +0.05% +0.00% +0.02% / -0.02% +0.70% +0.75%] index_add_ perm : Elapsed 0.041 ms (4.120 ms / 100) 3.983 -> 3.988 ( +0.13%) [ +0.05% +0.03% +0.00% / +0.13% +0.73% +0.70%] index_copy_ perm : Elapsed 0.040 ms (3.985 ms / 100) 4.138 -> 4.141 ( +0.07%) [ +0.10% +0.02% +0.00% / +0.07% +0.94% +0.87%] index_add_ perm_sorted : Elapsed 0.041 ms (4.142 ms / 100) 3.994 -> 3.995 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.85% +0.83%] index_copy_ perm_sorted : Elapsed 0.040 ms (3.997 ms / 100) 5.564 -> 5.559 ( -0.09%) [ +0.09% +0.00% +0.14% / +0.00% -0.09% +0.02%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.571 -> 5.566 ( -0.09%) [ +0.00% +0.13% +0.16% / +0.04% -0.09% +0.11%] index_select wrap : Elapsed 0.056 ms (5.571 ms / 100) 5.563 -> 5.568 ( +0.09%) [ +0.00% +0.18% +0.16% / +0.16% +0.09% +0.27%] index_select linear : Elapsed 0.056 ms (5.563 ms / 100) 5.567 -> 5.564 ( -0.05%) [ +0.18% +0.02% +0.00% / +0.22% -0.05% +0.11%] index_select reverse : Elapsed 0.056 ms (5.577 ms / 100) 5.558 -> 5.563 ( +0.09%) [ +0.13% +0.00% +0.18% / +0.09% +0.11% +0.18%] index_select skip64 : Elapsed 0.056 ms (5.565 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.14% +0.00% +0.02% / -0.02% +0.02% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.570 ms / 100) 5.568 -> 5.563 ( -0.09%) [ +0.07% +0.09% +0.00% / +0.14% -0.09% -0.09%] index_select spread : Elapsed 0.056 ms (5.572 ms / 100) 5.577 -> 5.569 ( -0.14%) [ +0.00% +0.00% +0.00% / +0.00% -0.14% -0.11%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.569 -> 5.571 ( +0.04%) [ +0.09% +0.14% +0.00% / +0.04% +0.05% +0.09%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.00% +0.05% +0.05% / +0.05% -0.02% +0.22%] index_select strided 7 : Elapsed 0.056 ms (5.566 ms / 100) 5.560 -> 5.562 ( +0.04%) [ +0.07% +0.00% +0.07% / +0.04% +0.31% +0.13%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.571 -> 5.568 ( -0.05%) [ +0.05% +0.02% +0.00% / +0.02% -0.05% -0.04%] index_select random : Elapsed 0.056 ms (5.574 ms / 100) 5.570 -> 5.566 ( -0.07%) [ +0.02% +0.11% +0.00% / +0.00% +0.07% -0.07%] index_select random_sorted : Elapsed 0.056 ms (5.571 ms / 100) B = [5, 40, 4, 20] (stride (1, 5, 200, 800)) A = [5, 16, 4, 20] (stride (64, 4, 1, 320)) dim = 1 4.358 -> 4.358 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.50% +0.34%] index_add_ linear : Elapsed 0.044 ms (4.359 ms / 100) 4.198 -> 4.198 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.48% +0.36%] index_copy_ linear : Elapsed 0.042 ms (4.199 ms / 100) 4.353 -> 4.345 ( -0.18%) [ +0.02% +0.02% +0.00% / -0.18% +0.30% +0.30%] index_add_ reverse : Elapsed 0.044 ms (4.354 ms / 100) 4.190 -> 4.185 ( -0.12%) [ +0.07% +0.05% +0.00% / -0.12% +0.38% +0.36%] index_copy_ reverse : Elapsed 0.042 ms (4.193 ms / 100) 4.339 -> 4.347 ( +0.18%) [ +0.21% +0.18% +0.00% / +0.18% +0.81% +0.69%] index_add_ spread : Elapsed 0.043 ms (4.348 ms / 100) 4.188 -> 4.192 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.64% +0.53%] index_copy_ spread : Elapsed 0.042 ms (4.192 ms / 100) 4.352 -> 4.358 ( +0.14%) [ +0.00% +0.11% +0.11% / +0.14% +0.46% +0.46%] index_add_ strided 3 : Elapsed 0.044 ms (4.352 ms / 100) 4.186 -> 4.189 ( +0.07%) [ +0.00% +0.05% +0.07% / +0.07% +0.55% +0.45%] index_copy_ strided 3 : Elapsed 0.042 ms (4.186 ms / 100) 4.350 -> 4.351 ( +0.02%) [ +0.09% +0.02% +0.00% / +0.02% +0.55% +0.57%] index_add_ strided 7 : Elapsed 0.044 ms (4.354 ms / 100) 4.190 -> 4.194 ( +0.10%) [ +0.07% +0.00% +0.00% / +0.10% +0.62% +0.55%] index_copy_ strided 7 : Elapsed 0.042 ms (4.193 ms / 100) 4.354 -> 4.356 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.55% +0.57%] index_add_ perm : Elapsed 0.044 ms (4.356 ms / 100) 4.194 -> 4.194 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.00% +0.57% +0.55%] index_copy_ perm : Elapsed 0.042 ms (4.194 ms / 100) 4.358 -> 4.359 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.48% +0.44%] index_add_ perm_sorted : Elapsed 0.044 ms (4.360 ms / 100) 4.197 -> 4.202 ( +0.12%) [ +0.05% +0.02% +0.00% / +0.12% +0.48% +0.45%] index_copy_ perm_sorted : Elapsed 0.042 ms (4.199 ms / 100) 5.557 -> 5.563 ( +0.11%) [ +0.00% +0.23% +0.14% / +0.11% +0.27% +0.32%] index_select const : Elapsed 0.056 ms (5.557 ms / 100) 5.567 -> 5.572 ( +0.09%) [ +0.11% +0.00% +0.02% / +0.09% +0.14% +0.31%] index_select wrap : Elapsed 0.056 ms (5.573 ms / 100) 5.574 -> 5.574 ( +0.00%) [ +0.00% +0.04% +0.02% / +0.00% +0.07% +0.05%] index_select linear : Elapsed 0.056 ms (5.574 ms / 100) 5.574 -> 5.579 ( +0.09%) [ +0.00% +0.16% +0.07% / +0.09% +0.09% +0.14%] index_select reverse : Elapsed 0.056 ms (5.574 ms / 100) 5.572 -> 5.569 ( -0.05%) [ +0.02% +0.00% +0.07% / -0.05% -0.04% -0.05%] index_select skip64 : Elapsed 0.056 ms (5.573 ms / 100) 5.567 -> 5.565 ( -0.04%) [ +0.04% +0.02% +0.00% / +0.11% +0.02% -0.04%] index_select skip256 : Elapsed 0.056 ms (5.569 ms / 100) 5.572 -> 5.575 ( +0.05%) [ +0.18% +0.11% +0.00% / +0.11% +0.05% +0.09%] index_select spread : Elapsed 0.056 ms (5.582 ms / 100) 5.569 -> 5.570 ( +0.02%) [ +0.05% +0.04% +0.00% / +0.02% +0.18% +0.11%] index_select strided 3 : Elapsed 0.056 ms (5.572 ms / 100) 5.570 -> 5.563 ( -0.13%) [ +0.00% +0.20% +0.14% / -0.13% +0.09% +0.18%] index_select strided 5 : Elapsed 0.056 ms (5.570 ms / 100) 5.567 -> 5.572 ( +0.09%) [ +0.11% +0.00% +0.22% / +0.09% +0.11% +0.13%] index_select strided 7 : Elapsed 0.056 ms (5.573 ms / 100) 5.562 -> 5.567 ( +0.09%) [ +0.04% +0.05% +0.00% / +0.09% +0.09% +0.16%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.568 -> 5.576 ( +0.14%) [ +0.00% +0.09% +0.11% / +0.16% +0.14% +0.18%] index_select random : Elapsed 0.056 ms (5.568 ms / 100) 5.570 -> 5.574 ( +0.07%) [ +0.00% +0.02% +0.07% / +0.07% +0.13% +0.16%] index_select random_sorted : Elapsed 0.056 ms (5.570 ms / 100) out_shape = [5, 16, 40, 20] in_shape = [5, 16, 4, 20] idx_dim = 2 B = [5, 16, 40, 20] (stride (12800, 800, 20, 1)) A = [5, 16, 4, 20] (stride (320, 20, 1600, 1)) dim = 2 0.567 -> 0.566 ( -0.18%) [ +0.00% +0.00% +0.00% / -0.18% +0.00% +0.35%] index_add_ linear : Elapsed 0.006 ms (0.567 ms / 100) 0.552 -> 0.553 ( +0.18%) [ +1.81% +0.00% +0.00% / +0.18% +0.18% +0.36%] index_copy_ linear : Elapsed 0.006 ms (0.562 ms / 100) 0.565 -> 0.565 ( +0.00%) [ +0.18% +0.18% +0.00% / +0.00% +1.24% +0.88%] index_add_ reverse : Elapsed 0.006 ms (0.566 ms / 100) 0.550 -> 0.551 ( +0.18%) [ +0.18% +0.18% +0.00% / +0.18% +0.91% +0.73%] index_copy_ reverse : Elapsed 0.006 ms (0.551 ms / 100) 0.565 -> 0.566 ( +0.18%) [ +0.00% +0.18% +0.00% / +0.18% +1.06% +1.42%] index_add_ spread : Elapsed 0.006 ms (0.565 ms / 100) 0.551 -> 0.551 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.91% +0.91%] index_copy_ spread : Elapsed 0.006 ms (0.551 ms / 100) 0.565 -> 0.565 ( +0.00%) [ +0.18% +0.35% +0.00% / +0.00% +1.24% +0.88%] index_add_ strided 3 : Elapsed 0.006 ms (0.566 ms / 100) 0.551 -> 0.551 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.54%] index_copy_ strided 3 : Elapsed 0.006 ms (0.551 ms / 100) 0.565 -> 0.567 ( +0.35%) [ +0.00% +0.18% +0.00% / +0.35% +1.06% +0.88%] index_add_ strided 7 : Elapsed 0.006 ms (0.565 ms / 100) 0.551 -> 0.551 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.91% +0.54%] index_copy_ strided 7 : Elapsed 0.006 ms (0.551 ms / 100) 0.566 -> 0.566 ( +0.00%) [ +0.18% +0.35% +0.00% / +0.00% +0.35% +0.35%] index_add_ perm : Elapsed 0.006 ms (0.567 ms / 100) 0.551 -> 0.551 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.73%] index_copy_ perm : Elapsed 0.006 ms (0.551 ms / 100) 0.566 -> 0.566 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.35% +0.18%] index_add_ perm_sorted : Elapsed 0.006 ms (0.566 ms / 100) 0.550 -> 0.552 ( +0.36%) [ +0.00% +0.18% +0.18% / +0.36% +0.91% +0.73%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.550 ms / 100) 4.948 -> 4.948 ( +0.00%) [ +0.24% +0.20% +0.00% / +0.06% +0.02% +0.00%] index_select const : Elapsed 0.050 ms (4.960 ms / 100) 4.959 -> 4.962 ( +0.06%) [ +0.24% +0.24% +0.00% / +0.16% +0.12% +0.06%] index_select wrap : Elapsed 0.050 ms (4.971 ms / 100) 4.961 -> 4.956 ( -0.10%) [ +0.00% +0.08% +0.12% / -0.10% -0.06% -0.06%] index_select linear : Elapsed 0.050 ms (4.961 ms / 100) 4.960 -> 4.954 ( -0.12%) [ +0.08% +0.02% +0.00% / -0.12% +0.02% +0.00%] index_select reverse : Elapsed 0.050 ms (4.964 ms / 100) 4.937 -> 4.946 ( +0.18%) [ +0.00% +0.20% +0.08% / +0.22% +0.18% +0.43%] index_select skip64 : Elapsed 0.049 ms (4.937 ms / 100) 4.942 -> 4.938 ( -0.08%) [ +0.00% +0.24% +0.04% / -0.08% +0.26% +0.12%] index_select skip256 : Elapsed 0.049 ms (4.942 ms / 100) 4.962 -> 4.970 ( +0.16%) [ +0.00% +0.08% +0.02% / +0.20% +0.16% +0.22%] index_select spread : Elapsed 0.050 ms (4.962 ms / 100) 4.974 -> 4.971 ( -0.06%) [ +0.02% +0.18% +0.00% / +0.04% -0.04% -0.06%] index_select strided 3 : Elapsed 0.050 ms (4.975 ms / 100) 4.966 -> 4.964 ( -0.04%) [ +0.04% +0.00% +0.20% / +0.10% -0.04% +0.16%] index_select random : Elapsed 0.050 ms (4.968 ms / 100) 4.959 -> 4.957 ( -0.04%) [ +0.20% +0.12% +0.00% / +0.14% +0.10% -0.04%] index_select random_sorted : Elapsed 0.050 ms (4.969 ms / 100) B = [5, 16, 40, 20] (stride (12800, 20, 320, 1)) A = [5, 16, 4, 20] (stride (80, 400, 1, 4)) dim = 2 1.312 -> 1.312 ( +0.00%) [ +0.23% +0.08% +0.00% / +0.00% +0.46% +0.38%] index_add_ linear : Elapsed 0.013 ms (1.315 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.272 ms / 100) 1.314 -> 1.316 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.30% +0.30%] index_add_ reverse : Elapsed 0.013 ms (1.316 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.24% +0.08% +0.00% / +0.00% +0.47% +0.39%] index_copy_ reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.313 -> 1.317 ( +0.30%) [ +0.23% +0.00% +0.23% / +0.38% +0.30% +0.53%] index_add_ spread : Elapsed 0.013 ms (1.316 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.39% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.273 ms / 100) 1.311 -> 1.312 ( +0.08%) [ +0.31% +0.00% +0.46% / +0.08% +0.53% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.315 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.55% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.315 -> 1.317 ( +0.15%) [ +0.23% +0.00% +0.08% / +0.15% +0.23% +0.23%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_copy_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.314 -> 1.314 ( +0.00%) [ +0.08% +0.00% +0.30% / +0.00% +0.30% +0.61%] index_add_ perm : Elapsed 0.013 ms (1.315 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.31% +0.55%] index_copy_ perm : Elapsed 0.013 ms (1.274 ms / 100) 1.314 -> 1.313 ( -0.08%) [ +0.23% +0.15% +0.00% / -0.08% +0.38% +0.46%] index_add_ perm_sorted : Elapsed 0.013 ms (1.317 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +1.02%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) 9.147 -> 9.124 ( -0.25%) [ +0.03% +0.14% +0.00% / +0.12% +0.02% -0.25%] index_select const : Elapsed 0.092 ms (9.150 ms / 100) 9.140 -> 9.126 ( -0.15%) [ +0.08% +0.12% +0.00% / +0.22% -0.15% +0.04%] index_select wrap : Elapsed 0.091 ms (9.147 ms / 100) 9.139 -> 9.128 ( -0.12%) [ +0.00% +0.13% +0.18% / +0.23% -0.12% +0.04%] index_select linear : Elapsed 0.091 ms (9.139 ms / 100) 9.145 -> 9.133 ( -0.13%) [ +0.04% +0.00% +0.32% / -0.11% -0.13% -0.01%] index_select reverse : Elapsed 0.091 ms (9.149 ms / 100) 9.132 -> 9.119 ( -0.14%) [ +0.00% +0.13% +0.16% / +0.45% -0.14% +0.25%] index_select skip64 : Elapsed 0.091 ms (9.132 ms / 100) 9.135 -> 9.124 ( -0.12%) [ +0.00% +0.16% +0.26% / +0.19% -0.12% -0.02%] index_select skip256 : Elapsed 0.091 ms (9.135 ms / 100) 9.137 -> 9.124 ( -0.14%) [ +0.00% +0.27% +0.16% / +0.22% +0.08% -0.14%] index_select spread : Elapsed 0.091 ms (9.137 ms / 100) 9.137 -> 9.130 ( -0.08%) [ +0.00% +0.19% +0.15% / +0.26% -0.03% -0.08%] index_select strided 3 : Elapsed 0.091 ms (9.137 ms / 100) 9.139 -> 9.124 ( -0.16%) [ +0.00% +0.09% +0.10% / +0.07% -0.16% +0.12%] index_select random : Elapsed 0.091 ms (9.139 ms / 100) 9.143 -> 9.130 ( -0.14%) [ +0.14% +0.00% +0.17% / +0.35% -0.14% +0.09%] index_select random_sorted : Elapsed 0.092 ms (9.156 ms / 100) B = [5, 16, 40, 20] (stride (12800, 1, 320, 16)) A = [5, 16, 4, 20] (stride (1280, 80, 20, 1)) dim = 2 1.229 -> 1.229 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.49% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.230 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.50% +0.59%] index_copy_ linear : Elapsed 0.012 ms (1.191 ms / 100) 1.232 -> 1.231 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.32% +0.65%] index_add_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.50% +0.76%] index_copy_ reverse : Elapsed 0.012 ms (1.190 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.231 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.67% +0.67%] index_copy_ spread : Elapsed 0.012 ms (1.191 ms / 100) 1.227 -> 1.228 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.81% +0.98%] index_add_ strided 3 : Elapsed 0.012 ms (1.229 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.76% +1.09%] index_copy_ strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.228 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.73% +1.06%] index_add_ strided 7 : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +1.18%] index_copy_ strided 7 : Elapsed 0.012 ms (1.189 ms / 100) 1.229 -> 1.228 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.73% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.191 ( +0.17%) [ +0.00% +0.08% +0.00% / +0.17% +0.67% +0.84%] index_copy_ perm : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.229 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.65% +0.65%] index_add_ perm_sorted : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.190 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.76% +0.67%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) 8.710 -> 8.717 ( +0.08%) [ +0.10% +0.10% +0.00% / +0.08% +0.44% +0.32%] index_select const : Elapsed 0.087 ms (8.719 ms / 100) 8.739 -> 8.733 ( -0.07%) [ +0.22% +0.00% +0.01% / +0.02% +0.16% -0.07%] index_select wrap : Elapsed 0.088 ms (8.758 ms / 100) 8.723 -> 8.735 ( +0.14%) [ +0.13% +0.41% +0.00% / +0.16% +0.14% +0.38%] index_select linear : Elapsed 0.087 ms (8.734 ms / 100) 8.735 -> 8.736 ( +0.01%) [ +0.24% +0.00% +0.13% / +0.25% +0.01% +0.32%] index_select reverse : Elapsed 0.088 ms (8.756 ms / 100) 8.712 -> 8.720 ( +0.09%) [ +0.17% +0.26% +0.00% / +0.30% +0.34% +0.09%] index_select skip64 : Elapsed 0.087 ms (8.727 ms / 100) 8.712 -> 8.721 ( +0.10%) [ +0.20% +0.00% +0.15% / +0.10% +0.37% +0.40%] index_select skip256 : Elapsed 0.087 ms (8.729 ms / 100) 8.741 -> 8.746 ( +0.06%) [ +0.00% +0.00% +0.15% / +0.11% +0.06% +0.18%] index_select spread : Elapsed 0.087 ms (8.741 ms / 100) 8.746 -> 8.727 ( -0.22%) [ +0.00% +0.09% +0.03% / -0.22% +0.05% +0.17%] index_select strided 3 : Elapsed 0.087 ms (8.746 ms / 100) 8.737 -> 8.726 ( -0.13%) [ +0.01% +0.18% +0.00% / -0.13% +0.22% +0.14%] index_select random : Elapsed 0.087 ms (8.738 ms / 100) 8.728 -> 8.727 ( -0.01%) [ +0.55% +0.09% +0.00% / -0.01% +0.36% +0.18%] index_select random_sorted : Elapsed 0.088 ms (8.776 ms / 100) B = [5, 16, 40, 20] (stride (800, 4000, 1, 40)) A = [5, 16, 4, 20] (stride (1, 100, 1600, 5)) dim = 2 1.313 -> 1.319 ( +0.46%) [ +0.38% +0.00% +0.38% / +0.46% +0.46% +0.61%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.276 -> 1.279 ( +0.24%) [ +0.08% +0.08% +0.00% / +0.24% +0.71% +0.78%] index_copy_ linear : Elapsed 0.013 ms (1.277 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.23% +0.23%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.277 -> 1.279 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.55% +0.47%] index_copy_ reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.327 -> 1.326 ( -0.08%) [ +0.00% +0.08% +0.15% / -0.08% +0.15% +0.15%] index_add_ spread : Elapsed 0.013 ms (1.327 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.31% +0.39%] index_copy_ spread : Elapsed 0.013 ms (1.288 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.08% +0.00% +0.23% / -0.08% +0.53% +1.52%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.70% +1.25%] index_copy_ strided 3 : Elapsed 0.013 ms (1.280 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.30% +0.38%] index_add_ strided 7 : Elapsed 0.013 ms (1.321 ms / 100) 1.284 -> 1.282 ( -0.16%) [ +0.00% +0.08% +0.23% / -0.16% +0.23% +0.31%] index_copy_ strided 7 : Elapsed 0.013 ms (1.284 ms / 100) 1.330 -> 1.327 ( -0.23%) [ +0.38% +0.15% +0.00% / +0.38% -0.23% +0.15%] index_add_ perm : Elapsed 0.013 ms (1.335 ms / 100) 1.291 -> 1.290 ( -0.08%) [ +0.15% +0.08% +0.00% / -0.08% -0.08% +0.46%] index_copy_ perm : Elapsed 0.013 ms (1.293 ms / 100) 1.327 -> 1.327 ( +0.00%) [ +0.30% +0.23% +0.00% / +0.08% +0.00% +0.08%] index_add_ perm_sorted : Elapsed 0.013 ms (1.331 ms / 100) 1.290 -> 1.288 ( -0.16%) [ +0.08% +0.08% +0.00% / -0.16% -0.16% +0.08%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.291 ms / 100) 9.216 -> 9.223 ( +0.08%) [ +0.28% +0.00% +0.15% / +0.08% +0.30% +0.11%] index_select const : Elapsed 0.092 ms (9.242 ms / 100) 9.244 -> 9.247 ( +0.03%) [ +0.10% +0.06% +0.00% / +0.13% +0.03% +0.03%] index_select wrap : Elapsed 0.093 ms (9.253 ms / 100) 9.240 -> 9.242 ( +0.02%) [ +0.08% +0.08% +0.00% / +0.02% +0.04% +0.04%] index_select linear : Elapsed 0.092 ms (9.247 ms / 100) 9.232 -> 9.226 ( -0.06%) [ +0.03% +0.00% +0.08% / +0.01% -0.06% +0.01%] index_select reverse : Elapsed 0.092 ms (9.235 ms / 100) 9.212 -> 9.216 ( +0.04%) [ +0.12% +0.11% +0.00% / +0.04% +0.33% +0.33%] index_select skip64 : Elapsed 0.092 ms (9.223 ms / 100) 9.220 -> 9.214 ( -0.07%) [ +0.05% +0.00% +0.02% / -0.07% +0.14% +0.15%] index_select skip256 : Elapsed 0.092 ms (9.225 ms / 100) 9.243 -> 9.245 ( +0.02%) [ +0.08% +0.03% +0.00% / +0.08% +0.02% +0.06%] index_select spread : Elapsed 0.092 ms (9.250 ms / 100) 9.252 -> 9.235 ( -0.18%) [ +0.00% +0.12% +0.04% / +0.03% -0.18% -0.13%] index_select strided 3 : Elapsed 0.093 ms (9.252 ms / 100) 9.244 -> 9.259 ( +0.16%) [ +0.00% +0.04% +0.14% / +0.17% +0.16% +0.17%] index_select random : Elapsed 0.092 ms (9.244 ms / 100) 9.230 -> 9.219 ( -0.12%) [ +0.20% +0.25% +0.00% / -0.12% +0.07% +0.17%] index_select random_sorted : Elapsed 0.092 ms (9.248 ms / 100) B = [5, 16, 40, 20] (stride (1, 4000, 5, 200)) A = [5, 16, 4, 20] (stride (320, 1, 1600, 16)) dim = 2 1.318 -> 1.319 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.15% +0.23%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.63% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.279 ms / 100) 1.316 -> 1.318 ( +0.15%) [ +0.15% +0.00% +0.08% / +0.15% +0.30% +0.46%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.278 -> 1.277 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.55% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.279 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.15% +0.30% +0.00% / +0.08% +0.46% +0.61%] index_add_ spread : Elapsed 0.013 ms (1.319 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.47% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.281 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.30% +0.46%] index_add_ strided 3 : Elapsed 0.013 ms (1.318 ms / 100) 1.278 -> 1.277 ( -0.08%) [ +0.00% +0.31% +0.08% / -0.08% +0.63% +1.33%] index_copy_ strided 3 : Elapsed 0.013 ms (1.278 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.38% +0.61%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.23% +0.00% / +0.00% +0.47% +0.62%] index_copy_ strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.316 -> 1.318 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +0.53% +0.61%] index_add_ perm : Elapsed 0.013 ms (1.320 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.47% +0.47%] index_copy_ perm : Elapsed 0.013 ms (1.280 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.00% +0.30% +0.00% / +0.00% +0.38% +0.46%] index_add_ perm_sorted : Elapsed 0.013 ms (1.317 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.55%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.281 ms / 100) 9.177 -> 9.202 ( +0.27%) [ +0.00% +0.01% +0.19% / +0.39% +0.27% +0.36%] index_select const : Elapsed 0.092 ms (9.177 ms / 100) 9.203 -> 9.224 ( +0.23%) [ +0.03% +0.15% +0.00% / +0.23% +0.33% +0.28%] index_select wrap : Elapsed 0.092 ms (9.206 ms / 100) 9.194 -> 9.201 ( +0.08%) [ +0.44% +0.00% +0.36% / +0.08% +0.18% +0.38%] index_select linear : Elapsed 0.092 ms (9.234 ms / 100) 9.196 -> 9.197 ( +0.01%) [ +0.00% +0.26% +0.20% / +0.01% +0.02% +0.15%] index_select reverse : Elapsed 0.092 ms (9.196 ms / 100) 9.180 -> 9.203 ( +0.25%) [ +0.00% +0.15% +0.03% / +0.41% +0.25% +0.52%] index_select skip64 : Elapsed 0.092 ms (9.180 ms / 100) 9.173 -> 9.178 ( +0.05%) [ +0.19% +0.08% +0.00% / +0.05% +0.17% +0.23%] index_select skip256 : Elapsed 0.092 ms (9.190 ms / 100) 9.200 -> 9.207 ( +0.08%) [ +0.35% +0.00% +0.04% / +0.08% +0.17% +0.15%] index_select spread : Elapsed 0.092 ms (9.232 ms / 100) 9.211 -> 9.212 ( +0.01%) [ +0.12% +0.00% +0.15% / +0.01% +0.04% +0.33%] index_select strided 3 : Elapsed 0.092 ms (9.222 ms / 100) 9.216 -> 9.199 ( -0.18%) [ +0.00% +0.04% +0.04% / -0.18% +0.09% +0.11%] index_select random : Elapsed 0.092 ms (9.216 ms / 100) 9.201 -> 9.200 ( -0.01%) [ +0.07% +0.01% +0.00% / -0.01% +0.11% +0.07%] index_select random_sorted : Elapsed 0.092 ms (9.207 ms / 100) out_shape = [5, 16, 4, 40] in_shape = [5, 16, 4, 20] idx_dim = 3 B = [5, 16, 4, 40] (stride (2560, 160, 1, 4)) A = [5, 16, 4, 20] (stride (16, 1, 1600, 80)) dim = 3 1.533 -> 1.513 ( -1.30%) [ +0.07% +0.33% +0.00% / -1.30% -0.72% -0.52%] index_add_ linear : Elapsed 0.015 ms (1.534 ms / 100) 1.501 -> 1.469 ( -2.13%) [ +0.07% +0.13% +0.00% / -2.13% -1.13% -0.67%] index_copy_ linear : Elapsed 0.015 ms (1.502 ms / 100) 1.539 -> 1.509 ( -1.95%) [ +0.00% +0.00% +0.00% / -1.95% -1.23% -0.84%] index_add_ reverse : Elapsed 0.015 ms (1.539 ms / 100) 1.503 -> 1.472 ( -2.06%) [ +0.13% +0.33% +0.00% / -2.06% -1.40% -1.60%] index_copy_ reverse : Elapsed 0.015 ms (1.505 ms / 100) 1.554 -> 1.525 ( -1.87%) [ +0.06% +0.39% +0.00% / -1.87% -1.09% -1.16%] index_add_ spread : Elapsed 0.016 ms (1.555 ms / 100) 1.523 -> 1.487 ( -2.36%) [ +0.00% +0.13% +0.26% / -2.36% -1.25% -1.44%] index_copy_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.554 -> 1.526 ( -1.80%) [ +0.00% +0.06% +0.06% / -1.80% -1.09% -1.29%] index_add_ strided 3 : Elapsed 0.016 ms (1.554 ms / 100) 1.520 -> 1.487 ( -2.17%) [ +0.07% +0.00% +0.20% / -2.17% -1.05% -1.25%] index_copy_ strided 3 : Elapsed 0.015 ms (1.521 ms / 100) 1.556 -> 1.530 ( -1.67%) [ +0.19% +0.19% +0.00% / -1.67% -0.96% -1.09%] index_add_ strided 7 : Elapsed 0.016 ms (1.559 ms / 100) 1.525 -> 1.497 ( -1.84%) [ +0.13% +0.00% +0.20% / -1.84% -1.31% -1.25%] index_copy_ strided 7 : Elapsed 0.015 ms (1.527 ms / 100) 1.551 -> 1.522 ( -1.87%) [ +0.13% +0.00% +0.13% / -1.87% -0.97% -1.16%] index_add_ perm : Elapsed 0.016 ms (1.553 ms / 100) 1.515 -> 1.483 ( -2.11%) [ +0.13% +0.00% +0.13% / -2.11% -1.12% -1.19%] index_copy_ perm : Elapsed 0.015 ms (1.517 ms / 100) 1.549 -> 1.522 ( -1.74%) [ +0.00% +0.32% +0.13% / -1.74% -1.10% -0.97%] index_add_ perm_sorted : Elapsed 0.015 ms (1.549 ms / 100) 1.513 -> 1.484 ( -1.92%) [ +0.00% +0.26% +0.13% / -1.92% -1.19% -0.99%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.513 ms / 100) 2.870 -> 2.880 ( +0.35%) [ +0.00% +0.17% +0.28% / +0.35% +0.42% +0.42%] index_select const : Elapsed 0.029 ms (2.870 ms / 100) 2.898 -> 2.883 ( -0.52%) [ +0.00% +0.07% +0.07% / +0.35% -0.24% -0.52%] index_select wrap : Elapsed 0.029 ms (2.898 ms / 100) 2.895 -> 2.883 ( -0.41%) [ +0.00% +0.07% +0.10% / +0.03% -0.28% -0.41%] index_select linear : Elapsed 0.029 ms (2.895 ms / 100) 2.898 -> 2.901 ( +0.10%) [ +0.00% +0.07% +0.03% / +0.10% +0.21% +0.17%] index_select reverse : Elapsed 0.029 ms (2.898 ms / 100) 2.874 -> 2.872 ( -0.07%) [ +0.00% +0.14% +0.00% / +0.07% -0.07% +0.21%] index_select skip64 : Elapsed 0.029 ms (2.874 ms / 100) 2.874 -> 2.870 ( -0.14%) [ +0.07% +0.00% +0.07% / -0.14% +0.24% +0.28%] index_select skip256 : Elapsed 0.029 ms (2.876 ms / 100) 2.898 -> 2.894 ( -0.14%) [ +0.14% +0.14% +0.00% / +0.10% -0.14% -0.07%] index_select spread : Elapsed 0.029 ms (2.902 ms / 100) 2.897 -> 2.891 ( -0.21%) [ +0.00% +0.14% +0.14% / +0.10% -0.03% -0.21%] index_select strided 3 : Elapsed 0.029 ms (2.897 ms / 100) 2.887 -> 2.880 ( -0.24%) [ +0.07% +0.00% +0.03% / -0.21% -0.14% -0.24%] index_select strided 5 : Elapsed 0.029 ms (2.889 ms / 100) 2.896 -> 2.898 ( +0.07%) [ +0.24% +0.00% +0.17% / +0.07% +0.07% +0.10%] index_select strided 7 : Elapsed 0.029 ms (2.903 ms / 100) 2.874 -> 2.871 ( -0.10%) [ +0.07% +0.00% +0.07% / -0.10% +0.38% +0.28%] index_select strided 8 : Elapsed 0.029 ms (2.876 ms / 100) 2.867 -> 2.875 ( +0.28%) [ +0.00% +0.35% +0.35% / +0.28% +0.56% +0.70%] index_select strided 16 : Elapsed 0.029 ms (2.867 ms / 100) 2.895 -> 2.892 ( -0.10%) [ +0.00% +0.17% +0.00% / +0.14% -0.10% -0.07%] index_select random : Elapsed 0.029 ms (2.895 ms / 100) 2.896 -> 2.889 ( -0.24%) [ +0.07% +0.00% +0.28% / -0.21% -0.24% +0.07%] index_select random_sorted : Elapsed 0.029 ms (2.898 ms / 100) B = [5, 16, 4, 40] (stride (160, 800, 40, 1)) A = [5, 16, 4, 20] (stride (1, 400, 5, 20)) dim = 3 2.444 -> 2.459 ( +0.61%) [ +0.20% +0.12% +0.00% / +0.61% +0.86% +0.82%] index_add_ linear : Elapsed 0.024 ms (2.449 ms / 100) 2.453 -> 2.466 ( +0.53%) [ +0.20% +0.08% +0.00% / +0.53% +0.77% +0.86%] index_copy_ linear : Elapsed 0.025 ms (2.458 ms / 100) 2.437 -> 2.449 ( +0.49%) [ +0.25% +0.21% +0.00% / +0.49% +1.19% +1.27%] index_add_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.445 -> 2.461 ( +0.65%) [ +0.16% +0.00% +0.33% / +0.65% +1.19% +1.10%] index_copy_ reverse : Elapsed 0.024 ms (2.449 ms / 100) 2.448 -> 2.465 ( +0.69%) [ +0.16% +0.20% +0.00% / +0.69% +1.14% +1.18%] index_add_ spread : Elapsed 0.025 ms (2.452 ms / 100) 2.467 -> 2.479 ( +0.49%) [ +0.12% +0.12% +0.00% / +0.49% +0.97% +1.01%] index_copy_ spread : Elapsed 0.025 ms (2.470 ms / 100) 2.461 -> 2.471 ( +0.41%) [ +0.04% +0.00% +0.04% / +0.41% +0.53% +0.45%] index_add_ strided 3 : Elapsed 0.025 ms (2.462 ms / 100) 2.470 -> 2.481 ( +0.45%) [ +0.24% +0.00% +0.00% / +0.45% +0.69% +0.89%] index_copy_ strided 3 : Elapsed 0.025 ms (2.476 ms / 100) 2.460 -> 2.471 ( +0.45%) [ +0.12% +0.08% +0.00% / +0.45% +0.53% +0.53%] index_add_ strided 7 : Elapsed 0.025 ms (2.463 ms / 100) 2.469 -> 2.482 ( +0.53%) [ +0.12% +0.00% +0.32% / +0.53% +0.81% +0.81%] index_copy_ strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.464 -> 2.468 ( +0.16%) [ +0.08% +0.00% +0.04% / +0.65% +0.16% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.466 ms / 100) 2.473 -> 2.475 ( +0.08%) [ +0.00% +0.04% +0.04% / +0.65% +0.08% +0.49%] index_copy_ perm : Elapsed 0.025 ms (2.473 ms / 100) 2.462 -> 2.465 ( +0.12%) [ +0.00% +0.04% +0.08% / +0.49% +0.12% +0.16%] index_add_ perm_sorted : Elapsed 0.025 ms (2.462 ms / 100) 2.474 -> 2.478 ( +0.16%) [ +0.28% +0.04% +0.00% / +0.44% +0.16% +0.36%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.481 ms / 100) 4.496 -> 4.499 ( +0.07%) [ +0.13% +0.09% +0.00% / +0.24% +0.07% +0.24%] index_select const : Elapsed 0.045 ms (4.502 ms / 100) 4.508 -> 4.502 ( -0.13%) [ +0.00% +0.09% +0.00% / -0.13% +0.00% +0.18%] index_select wrap : Elapsed 0.045 ms (4.508 ms / 100) 4.506 -> 4.509 ( +0.07%) [ +0.04% +0.00% +0.24% / +0.07% +0.24% +0.11%] index_select linear : Elapsed 0.045 ms (4.508 ms / 100) 4.510 -> 4.508 ( -0.04%) [ +0.07% +0.00% +0.20% / -0.02% +0.02% -0.04%] index_select reverse : Elapsed 0.045 ms (4.513 ms / 100) 4.498 -> 4.495 ( -0.07%) [ +0.00% +0.09% +0.07% / +0.04% -0.07% -0.07%] index_select skip64 : Elapsed 0.045 ms (4.498 ms / 100) 4.496 -> 4.498 ( +0.04%) [ +0.07% +0.00% +0.00% / +0.07% +0.11% +0.04%] index_select skip256 : Elapsed 0.045 ms (4.499 ms / 100) 4.508 -> 4.511 ( +0.07%) [ +0.11% +0.09% +0.00% / +0.16% +0.11% +0.07%] index_select spread : Elapsed 0.045 ms (4.513 ms / 100) 4.506 -> 4.508 ( +0.04%) [ +0.02% +0.00% +0.16% / +0.09% +0.16% +0.04%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.495 -> 4.502 ( +0.16%) [ +0.18% +0.00% +0.16% / +0.16% +0.24% +0.36%] index_select strided 5 : Elapsed 0.045 ms (4.503 ms / 100) 4.507 -> 4.506 ( -0.02%) [ +0.00% +0.16% +0.04% / +0.00% -0.02% +0.07%] index_select strided 7 : Elapsed 0.045 ms (4.507 ms / 100) 4.499 -> 4.501 ( +0.04%) [ +0.11% +0.00% +0.00% / +0.13% +0.04% +0.09%] index_select strided 8 : Elapsed 0.045 ms (4.504 ms / 100) 4.497 -> 4.499 ( +0.04%) [ +0.04% +0.20% +0.00% / +0.04% +0.13% +0.18%] index_select strided 16 : Elapsed 0.045 ms (4.499 ms / 100) 4.510 -> 4.510 ( +0.00%) [ +0.07% +0.02% +0.00% / +0.09% +0.09% +0.00%] index_select random : Elapsed 0.045 ms (4.513 ms / 100) 4.506 -> 4.511 ( +0.11%) [ +0.18% +0.00% +0.07% / +0.18% +0.11% +0.20%] index_select random_sorted : Elapsed 0.045 ms (4.514 ms / 100) B = [5, 16, 4, 40] (stride (64, 4, 1, 320)) A = [5, 16, 4, 20] (stride (1280, 4, 1, 64)) dim = 3 2.309 -> 2.323 ( +0.61%) [ +0.22% +0.00% +0.09% / +0.61% +0.69% +0.82%] index_add_ linear : Elapsed 0.023 ms (2.314 ms / 100) 2.300 -> 2.310 ( +0.43%) [ +0.00% +0.00% +0.00% / +0.43% +0.74% +0.70%] index_copy_ linear : Elapsed 0.023 ms (2.300 ms / 100) 2.315 -> 2.324 ( +0.39%) [ +0.04% +0.22% +0.00% / +0.39% +0.56% +0.52%] index_add_ reverse : Elapsed 0.023 ms (2.316 ms / 100) 2.298 -> 2.311 ( +0.57%) [ +0.00% +0.30% +0.26% / +0.65% +0.57% +0.61%] index_copy_ reverse : Elapsed 0.023 ms (2.298 ms / 100) 2.316 -> 2.323 ( +0.30%) [ +0.00% +0.09% +0.00% / +0.47% +0.30% +0.35%] index_add_ spread : Elapsed 0.023 ms (2.316 ms / 100) 2.301 -> 2.311 ( +0.43%) [ +0.04% +0.17% +0.00% / +0.43% +0.56% +0.56%] index_copy_ spread : Elapsed 0.023 ms (2.302 ms / 100) 2.317 -> 2.325 ( +0.35%) [ +0.00% +0.00% +0.04% / +0.47% +0.39% +0.35%] index_add_ strided 3 : Elapsed 0.023 ms (2.317 ms / 100) 2.299 -> 2.313 ( +0.61%) [ +0.00% +0.04% +0.00% / +0.70% +0.61% +0.65%] index_copy_ strided 3 : Elapsed 0.023 ms (2.299 ms / 100) 2.315 -> 2.327 ( +0.52%) [ +0.22% +0.00% +0.04% / +0.56% +0.52% +0.69%] index_add_ strided 7 : Elapsed 0.023 ms (2.320 ms / 100) 2.299 -> 2.309 ( +0.43%) [ +0.09% +0.09% +0.00% / +0.43% +0.52% +0.52%] index_copy_ strided 7 : Elapsed 0.023 ms (2.301 ms / 100) 2.309 -> 2.323 ( +0.61%) [ +0.00% +0.30% +0.56% / +0.61% +0.69% +1.00%] index_add_ perm : Elapsed 0.023 ms (2.309 ms / 100) 2.299 -> 2.315 ( +0.70%) [ +0.09% +0.13% +0.00% / +0.70% +0.83% +0.70%] index_copy_ perm : Elapsed 0.023 ms (2.301 ms / 100) 2.314 -> 2.325 ( +0.48%) [ +0.09% +0.00% +0.04% / +0.48% +0.78% +0.61%] index_add_ perm_sorted : Elapsed 0.023 ms (2.316 ms / 100) 2.300 -> 2.314 ( +0.61%) [ +0.13% +0.00% +0.17% / +0.61% +0.74% +0.83%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.303 ms / 100) 4.271 -> 4.270 ( -0.02%) [ +0.05% +0.07% +0.00% / -0.02% +0.02% -0.02%] index_select const : Elapsed 0.043 ms (4.273 ms / 100) 4.281 -> 4.277 ( -0.09%) [ +0.07% +0.00% +0.00% / +0.00% -0.09% +0.09%] index_select wrap : Elapsed 0.043 ms (4.284 ms / 100) 4.280 -> 4.279 ( -0.02%) [ +0.09% +0.00% +0.07% / +0.02% -0.02% +0.21%] index_select linear : Elapsed 0.043 ms (4.284 ms / 100) 4.277 -> 4.281 ( +0.09%) [ +0.00% +0.16% +0.16% / +0.09% +0.12% +0.09%] index_select reverse : Elapsed 0.043 ms (4.277 ms / 100) 4.270 -> 4.267 ( -0.07%) [ +0.00% +0.02% +0.05% / +0.16% -0.07% +0.00%] index_select skip64 : Elapsed 0.043 ms (4.270 ms / 100) 4.273 -> 4.270 ( -0.07%) [ +0.00% +0.05% +0.05% / -0.07% -0.05% +0.05%] index_select skip256 : Elapsed 0.043 ms (4.273 ms / 100) 4.278 -> 4.280 ( +0.05%) [ +0.09% +0.05% +0.00% / +0.05% +0.09% +0.12%] index_select spread : Elapsed 0.043 ms (4.282 ms / 100) 4.273 -> 4.280 ( +0.16%) [ +0.09% +0.21% +0.00% / +0.16% +0.16% +0.30%] index_select strided 3 : Elapsed 0.043 ms (4.277 ms / 100) 4.274 -> 4.273 ( -0.02%) [ +0.07% +0.05% +0.00% / +0.16% +0.00% -0.02%] index_select strided 5 : Elapsed 0.043 ms (4.277 ms / 100) 4.276 -> 4.279 ( +0.07%) [ +0.14% +0.21% +0.00% / +0.07% +0.30% +0.26%] index_select strided 7 : Elapsed 0.043 ms (4.282 ms / 100) 4.275 -> 4.270 ( -0.12%) [ +0.09% +0.02% +0.00% / -0.12% +0.05% +0.12%] index_select strided 8 : Elapsed 0.043 ms (4.279 ms / 100) 4.272 -> 4.270 ( -0.05%) [ +0.09% +0.02% +0.00% / -0.05% +0.09% +0.21%] index_select strided 16 : Elapsed 0.043 ms (4.276 ms / 100) 4.275 -> 4.273 ( -0.05%) [ +0.14% +0.09% +0.00% / -0.05% +0.07% +0.19%] index_select random : Elapsed 0.043 ms (4.281 ms / 100) 4.274 -> 4.277 ( +0.07%) [ +0.12% +0.07% +0.00% / +0.07% +0.19% +0.30%] index_select random_sorted : Elapsed 0.043 ms (4.279 ms / 100) B = [5, 16, 4, 40] (stride (64, 1, 16, 320)) A = [5, 16, 4, 20] (stride (320, 1, 1600, 16)) dim = 3 2.456 -> 2.468 ( +0.49%) [ +0.00% +0.08% +0.04% / +0.49% +0.90% +0.98%] index_add_ linear : Elapsed 0.025 ms (2.456 ms / 100) 2.449 -> 2.461 ( +0.49%) [ +0.04% +0.08% +0.00% / +0.49% +0.82% +0.78%] index_copy_ linear : Elapsed 0.024 ms (2.450 ms / 100) 2.450 -> 2.467 ( +0.69%) [ +0.00% +0.16% +0.04% / +0.69% +1.27% +1.10%] index_add_ reverse : Elapsed 0.025 ms (2.450 ms / 100) 2.439 -> 2.455 ( +0.66%) [ +0.12% +0.00% +0.21% / +0.66% +1.39% +1.35%] index_copy_ reverse : Elapsed 0.024 ms (2.442 ms / 100) 2.452 -> 2.466 ( +0.57%) [ +0.00% +0.16% +0.12% / +0.57% +1.22% +1.18%] index_add_ spread : Elapsed 0.025 ms (2.452 ms / 100) 2.438 -> 2.460 ( +0.90%) [ +0.25% +0.21% +0.00% / +0.90% +1.39% +1.31%] index_copy_ spread : Elapsed 0.024 ms (2.444 ms / 100) 2.459 -> 2.474 ( +0.61%) [ +0.12% +0.16% +0.00% / +0.73% +0.61% +0.69%] index_add_ strided 3 : Elapsed 0.025 ms (2.462 ms / 100) 2.449 -> 2.464 ( +0.61%) [ +0.00% +0.08% +0.08% / +0.61% +0.61% +0.82%] index_copy_ strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.463 -> 2.475 ( +0.49%) [ +0.16% +0.20% +0.00% / +0.73% +0.49% +0.57%] index_add_ strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.447 -> 2.465 ( +0.74%) [ +0.12% +0.16% +0.00% / +0.74% +0.86% +0.78%] index_copy_ strided 7 : Elapsed 0.025 ms (2.450 ms / 100) 2.463 -> 2.470 ( +0.28%) [ +0.04% +0.16% +0.00% / +0.73% +0.28% +0.41%] index_add_ perm : Elapsed 0.025 ms (2.464 ms / 100) 2.453 -> 2.460 ( +0.29%) [ +0.00% +0.04% +0.00% / +0.57% +0.29% +0.45%] index_copy_ perm : Elapsed 0.025 ms (2.453 ms / 100) 2.465 -> 2.473 ( +0.32%) [ +0.08% +0.08% +0.00% / +0.57% +0.32% +0.32%] index_add_ perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) 2.456 -> 2.462 ( +0.24%) [ +0.08% +0.08% +0.00% / +0.49% +0.24% +0.37%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) 4.497 -> 4.486 ( -0.24%) [ +0.16% +0.13% +0.00% / -0.24% +0.00% +0.11%] index_select const : Elapsed 0.045 ms (4.504 ms / 100) 4.512 -> 4.513 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.04% +0.07% +0.02%] index_select wrap : Elapsed 0.045 ms (4.513 ms / 100) 4.509 -> 4.511 ( +0.04%) [ +0.22% +0.24% +0.00% / +0.11% +0.04% +0.31%] index_select linear : Elapsed 0.045 ms (4.519 ms / 100) 4.516 -> 4.512 ( -0.09%) [ +0.04% +0.00% +0.09% / +0.09% -0.09% -0.07%] index_select reverse : Elapsed 0.045 ms (4.518 ms / 100) 4.494 -> 4.496 ( +0.04%) [ +0.20% +0.20% +0.00% / +0.07% +0.04% +0.13%] index_select skip64 : Elapsed 0.045 ms (4.503 ms / 100) 4.493 -> 4.497 ( +0.09%) [ +0.13% +0.00% +0.18% / +0.16% +0.22% +0.09%] index_select skip256 : Elapsed 0.045 ms (4.499 ms / 100) 4.507 -> 4.511 ( +0.09%) [ +0.00% +0.16% +0.09% / +0.22% +0.09% +0.33%] index_select spread : Elapsed 0.045 ms (4.507 ms / 100) 4.509 -> 4.513 ( +0.09%) [ +0.00% +0.20% +0.22% / +0.18% +0.09% +0.22%] index_select strided 3 : Elapsed 0.045 ms (4.509 ms / 100) 4.497 -> 4.506 ( +0.20%) [ +0.11% +0.24% +0.00% / +0.24% +0.36% +0.20%] index_select strided 5 : Elapsed 0.045 ms (4.502 ms / 100) 4.511 -> 4.510 ( -0.02%) [ +0.09% +0.00% +0.04% / -0.02% +0.07% +0.22%] index_select strided 7 : Elapsed 0.045 ms (4.515 ms / 100) 4.497 -> 4.500 ( +0.07%) [ +0.11% +0.33% +0.00% / +0.13% +0.13% +0.07%] index_select strided 8 : Elapsed 0.045 ms (4.502 ms / 100) 4.496 -> 4.503 ( +0.16%) [ +0.18% +0.00% +0.13% / +0.16% +0.24% +0.36%] index_select strided 16 : Elapsed 0.045 ms (4.504 ms / 100) 4.508 -> 4.512 ( +0.09%) [ +0.07% +0.13% +0.00% / +0.24% +0.13% +0.09%] index_select random : Elapsed 0.045 ms (4.511 ms / 100) 4.511 -> 4.507 ( -0.09%) [ +0.09% +0.16% +0.00% / +0.16% -0.09% +0.20%] index_select random_sorted : Elapsed 0.045 ms (4.515 ms / 100) B = [5, 16, 4, 40] (stride (4, 20, 1, 320)) A = [5, 16, 4, 20] (stride (1, 100, 1600, 5)) dim = 3 2.442 -> 2.451 ( +0.37%) [ +0.00% +0.12% +0.04% / +0.37% +0.98% +0.82%] index_add_ linear : Elapsed 0.024 ms (2.442 ms / 100) 2.444 -> 2.453 ( +0.37%) [ +0.00% +0.04% +0.04% / +0.37% +0.65% +0.94%] index_copy_ linear : Elapsed 0.024 ms (2.444 ms / 100) 2.444 -> 2.460 ( +0.65%) [ +0.12% +0.08% +0.00% / +0.65% +0.82% +0.74%] index_add_ reverse : Elapsed 0.024 ms (2.447 ms / 100) 2.444 -> 2.457 ( +0.53%) [ +0.16% +0.00% +0.20% / +0.53% +0.57% +0.86%] index_copy_ reverse : Elapsed 0.024 ms (2.448 ms / 100) 2.451 -> 2.458 ( +0.29%) [ +0.00% +0.04% +0.04% / +0.29% +0.45% +0.53%] index_add_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.448 -> 2.459 ( +0.45%) [ +0.00% +0.04% +0.00% / +0.45% +0.49% +0.69%] index_copy_ spread : Elapsed 0.024 ms (2.448 ms / 100) 2.444 -> 2.459 ( +0.61%) [ +0.00% +0.16% +0.08% / +0.78% +0.61% +0.65%] index_add_ strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.445 -> 2.456 ( +0.45%) [ +0.00% +0.08% +0.08% / +0.45% +0.45% +0.57%] index_copy_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.447 -> 2.460 ( +0.53%) [ +0.12% +0.00% +0.29% / +0.65% +0.53% +0.74%] index_add_ strided 7 : Elapsed 0.024 ms (2.450 ms / 100) 2.444 -> 2.459 ( +0.61%) [ +0.12% +0.20% +0.00% / +0.61% +0.65% +0.70%] index_copy_ strided 7 : Elapsed 0.024 ms (2.447 ms / 100) 2.446 -> 2.457 ( +0.45%) [ +0.00% +0.12% +0.04% / +0.45% +0.78% +0.90%] index_add_ perm : Elapsed 0.024 ms (2.446 ms / 100) 2.444 -> 2.455 ( +0.45%) [ +0.00% +0.00% +0.16% / +0.45% +0.61% +0.98%] index_copy_ perm : Elapsed 0.024 ms (2.444 ms / 100) 2.445 -> 2.458 ( +0.53%) [ +0.00% +0.25% +0.12% / +0.53% +0.53% +0.82%] index_add_ perm_sorted : Elapsed 0.024 ms (2.445 ms / 100) 2.445 -> 2.458 ( +0.53%) [ +0.00% +0.00% +0.04% / +0.53% +0.65% +0.78%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.445 ms / 100) 4.490 -> 4.493 ( +0.07%) [ +0.13% +0.24% +0.00% / +0.07% +0.11% +0.33%] index_select const : Elapsed 0.045 ms (4.496 ms / 100) 4.494 -> 4.503 ( +0.20%) [ +0.22% +0.00% +0.18% / +0.20% +0.31% +0.36%] index_select wrap : Elapsed 0.045 ms (4.504 ms / 100) 4.500 -> 4.501 ( +0.02%) [ +0.04% +0.00% +0.27% / +0.02% +0.29% +0.29%] index_select linear : Elapsed 0.045 ms (4.502 ms / 100) 4.496 -> 4.501 ( +0.11%) [ +0.09% +0.16% +0.00% / +0.11% +0.42% +0.27%] index_select reverse : Elapsed 0.045 ms (4.500 ms / 100) 4.489 -> 4.495 ( +0.13%) [ +0.09% +0.18% +0.00% / +0.18% +0.13% +0.36%] index_select skip64 : Elapsed 0.045 ms (4.493 ms / 100) 4.489 -> 4.501 ( +0.27%) [ +0.00% +0.13% +0.11% / +0.27% +0.42% +0.47%] index_select skip256 : Elapsed 0.045 ms (4.489 ms / 100) 4.499 -> 4.498 ( -0.02%) [ +0.11% +0.02% +0.00% / -0.02% +0.20% +0.22%] index_select spread : Elapsed 0.045 ms (4.504 ms / 100) 4.495 -> 4.502 ( +0.16%) [ +0.16% +0.00% +0.09% / +0.16% +0.20% +0.29%] index_select strided 3 : Elapsed 0.045 ms (4.502 ms / 100) 4.494 -> 4.501 ( +0.16%) [ +0.11% +0.00% +0.13% / +0.16% +0.16% +0.47%] index_select strided 5 : Elapsed 0.045 ms (4.499 ms / 100) 4.495 -> 4.504 ( +0.20%) [ +0.00% +0.24% +0.07% / +0.20% +0.38% +0.33%] index_select strided 7 : Elapsed 0.045 ms (4.495 ms / 100) 4.492 -> 4.498 ( +0.13%) [ +0.00% +0.09% +0.18% / +0.13% +0.24% +0.20%] index_select strided 8 : Elapsed 0.045 ms (4.492 ms / 100) 4.495 -> 4.500 ( +0.11%) [ +0.07% +0.18% +0.00% / +0.11% +0.29% +0.29%] index_select strided 16 : Elapsed 0.045 ms (4.498 ms / 100) 4.496 -> 4.500 ( +0.09%) [ +0.20% +0.27% +0.00% / +0.09% +0.22% +0.29%] index_select random : Elapsed 0.045 ms (4.505 ms / 100) 4.496 -> 4.500 ( +0.09%) [ +0.27% +0.00% +0.20% / +0.09% +0.24% +0.36%] index_select random_sorted : Elapsed 0.045 ms (4.508 ms / 100) B = [5, 16, 4, 40] (stride (16, 1, 80, 320)) A = [5, 16, 4, 20] (stride (1, 400, 100, 5)) dim = 3 1.518 -> 1.489 ( -1.91%) [ +0.26% +0.26% +0.00% / -1.91% -1.12% -1.32%] index_add_ linear : Elapsed 0.015 ms (1.522 ms / 100) 1.499 -> 1.466 ( -2.20%) [ +0.00% +0.13% +0.00% / -2.20% -1.67% -1.33%] index_copy_ linear : Elapsed 0.015 ms (1.499 ms / 100) 1.519 -> 1.488 ( -2.04%) [ +0.07% +0.00% +0.26% / -2.04% -1.18% -0.79%] index_add_ reverse : Elapsed 0.015 ms (1.520 ms / 100) 1.498 -> 1.470 ( -1.87%) [ +0.00% +0.13% +0.20% / -1.87% -1.74% -1.67%] index_copy_ reverse : Elapsed 0.015 ms (1.498 ms / 100) 1.520 -> 1.492 ( -1.84%) [ +0.00% +0.07% +0.13% / -1.78% -1.84% -1.45%] index_add_ spread : Elapsed 0.015 ms (1.520 ms / 100) 1.496 -> 1.465 ( -2.07%) [ +0.13% +0.00% +0.20% / -2.07% -1.74% -1.54%] index_copy_ spread : Elapsed 0.015 ms (1.498 ms / 100) 1.520 -> 1.490 ( -1.97%) [ +0.39% +0.26% +0.00% / -1.71% -1.97% -1.84%] index_add_ strided 3 : Elapsed 0.015 ms (1.526 ms / 100) 1.503 -> 1.468 ( -2.33%) [ +0.33% +0.00% +0.13% / -2.13% -2.13% -2.33%] index_copy_ strided 3 : Elapsed 0.015 ms (1.508 ms / 100) 1.522 -> 1.495 ( -1.77%) [ +0.20% +0.00% +0.26% / -1.71% -1.77% -1.58%] index_add_ strided 7 : Elapsed 0.015 ms (1.525 ms / 100) 1.501 -> 1.464 ( -2.47%) [ +0.00% +0.00% +0.00% / -2.47% -1.93% -1.80%] index_copy_ strided 7 : Elapsed 0.015 ms (1.501 ms / 100) 1.517 -> 1.486 ( -2.04%) [ +0.00% +0.26% +0.13% / -2.04% -1.38% -1.38%] index_add_ perm : Elapsed 0.015 ms (1.517 ms / 100) 1.493 -> 1.462 ( -2.08%) [ +0.27% +0.47% +0.00% / -2.08% -1.34% -1.41%] index_copy_ perm : Elapsed 0.015 ms (1.497 ms / 100) 1.520 -> 1.492 ( -1.84%) [ +0.00% +0.07% +0.00% / -1.84% -1.51% -1.45%] index_add_ perm_sorted : Elapsed 0.015 ms (1.520 ms / 100) 1.499 -> 1.465 ( -2.27%) [ +0.00% +0.40% +0.00% / -2.27% -1.80% -1.80%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.499 ms / 100) 2.871 -> 2.870 ( -0.03%) [ +0.10% +0.03% +0.00% / -0.03% +0.28% +0.07%] index_select const : Elapsed 0.029 ms (2.874 ms / 100) 2.881 -> 2.883 ( +0.07%) [ +0.10% +0.07% +0.00% / +0.07% +0.45% +0.62%] index_select wrap : Elapsed 0.029 ms (2.884 ms / 100) 2.878 -> 2.880 ( +0.07%) [ +0.00% +0.03% +0.14% / +0.07% +0.45% +0.87%] index_select linear : Elapsed 0.029 ms (2.878 ms / 100) 2.880 -> 2.875 ( -0.17%) [ +0.00% +0.10% +0.03% / +0.14% -0.17% +0.14%] index_select reverse : Elapsed 0.029 ms (2.880 ms / 100) 2.871 -> 2.874 ( +0.10%) [ +0.03% +0.10% +0.00% / +0.10% +0.10% +0.10%] index_select skip64 : Elapsed 0.029 ms (2.872 ms / 100) 2.892 -> 2.875 ( -0.59%) [ +0.03% +0.07% +0.00% / +0.00% -0.59% -0.55%] index_select skip256 : Elapsed 0.029 ms (2.893 ms / 100) 2.874 -> 2.875 ( +0.03%) [ +0.14% +0.00% +0.14% / +0.21% +0.03% +0.07%] index_select spread : Elapsed 0.029 ms (2.878 ms / 100) 2.874 -> 2.872 ( -0.07%) [ +0.35% +0.21% +0.00% / +0.24% +0.24% -0.07%] index_select strided 3 : Elapsed 0.029 ms (2.884 ms / 100) 2.876 -> 2.866 ( -0.35%) [ +0.10% +0.00% +0.07% / +0.00% -0.35% -0.03%] index_select strided 5 : Elapsed 0.029 ms (2.879 ms / 100) 2.883 -> 2.876 ( -0.24%) [ +0.00% +0.03% +0.00% / -0.07% -0.24% -0.17%] index_select strided 7 : Elapsed 0.029 ms (2.883 ms / 100) 2.898 -> 2.878 ( -0.69%) [ +0.00% +0.00% +0.00% / +0.14% -0.48% -0.69%] index_select strided 8 : Elapsed 0.029 ms (2.898 ms / 100) 2.901 -> 2.873 ( -0.97%) [ +0.00% +0.07% +0.14% / -0.21% -0.83% -0.97%] index_select strided 16 : Elapsed 0.029 ms (2.901 ms / 100) 2.884 -> 2.873 ( -0.38%) [ +0.00% +0.07% +0.03% / -0.14% +0.00% -0.38%] index_select random : Elapsed 0.029 ms (2.884 ms / 100) 2.880 -> 2.876 ( -0.14%) [ +0.00% +0.03% +0.07% / +0.00% -0.14% -0.03%] index_select random_sorted : Elapsed 0.029 ms (2.880 ms / 100) out_shape = [40, 16, 20, 4] in_shape = [5, 16, 20, 4] idx_dim = 0 B = [40, 16, 20, 4] (stride (1280, 80, 1, 20)) A = [5, 16, 20, 4] (stride (1280, 1, 16, 320)) dim = 0 1.575 -> 1.576 ( +0.06%) [ +0.00% +0.19% +0.00% / +0.06% +0.51% +0.63%] index_add_ linear : Elapsed 0.016 ms (1.575 ms / 100) 1.527 -> 1.529 ( +0.13%) [ +0.00% +0.07% +0.07% / +0.13% +0.52% +0.52%] index_copy_ linear : Elapsed 0.015 ms (1.527 ms / 100) 1.580 -> 1.580 ( +0.00%) [ +0.13% +0.06% +0.00% / +0.00% +0.44% +0.44%] index_add_ reverse : Elapsed 0.016 ms (1.582 ms / 100) 1.530 -> 1.531 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.39% +0.46%] index_copy_ reverse : Elapsed 0.015 ms (1.530 ms / 100) 1.580 -> 1.581 ( +0.06%) [ +0.32% +0.06% +0.00% / +0.06% +0.57% +0.63%] index_add_ spread : Elapsed 0.016 ms (1.585 ms / 100) 1.535 -> 1.537 ( +0.13%) [ +0.00% +0.13% +0.00% / +0.13% +0.85% +0.65%] index_copy_ spread : Elapsed 0.015 ms (1.535 ms / 100) 1.581 -> 1.582 ( +0.06%) [ +0.13% +0.00% +0.00% / +0.06% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.016 ms (1.583 ms / 100) 1.535 -> 1.538 ( +0.20%) [ +0.20% +0.00% +0.13% / +0.20% +1.37% +0.91%] index_copy_ strided 3 : Elapsed 0.015 ms (1.538 ms / 100) 1.578 -> 1.580 ( +0.13%) [ +0.06% +0.13% +0.00% / +0.13% +0.63% +0.63%] index_add_ strided 7 : Elapsed 0.016 ms (1.579 ms / 100) 1.529 -> 1.531 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.52% +0.59%] index_copy_ strided 7 : Elapsed 0.015 ms (1.530 ms / 100) 1.576 -> 1.581 ( +0.32%) [ +0.19% +0.00% +0.13% / +0.32% +0.44% +0.57%] index_add_ perm : Elapsed 0.016 ms (1.579 ms / 100) 1.527 -> 1.530 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +0.59% +0.52%] index_copy_ perm : Elapsed 0.015 ms (1.528 ms / 100) 1.582 -> 1.582 ( +0.00%) [ +0.06% +0.13% +0.00% / +0.00% +0.51% +0.57%] index_add_ perm_sorted : Elapsed 0.016 ms (1.583 ms / 100) 1.530 -> 1.532 ( +0.13%) [ +0.13% +0.20% +0.00% / +0.13% +0.59% +0.65%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.532 ms / 100) 8.516 -> 8.519 ( +0.04%) [ +0.06% +0.34% +0.00% / +0.35% +0.15% +0.04%] index_select const : Elapsed 0.085 ms (8.521 ms / 100) 8.530 -> 8.534 ( +0.05%) [ +0.00% +0.07% +0.06% / +0.09% +0.35% +0.05%] index_select wrap : Elapsed 0.085 ms (8.530 ms / 100) 8.541 -> 8.537 ( -0.05%) [ +0.00% +0.02% +0.02% / +0.07% -0.05% +0.12%] index_select linear : Elapsed 0.085 ms (8.541 ms / 100) 8.534 -> 8.540 ( +0.07%) [ +0.01% +0.00% +0.13% / +0.07% +0.36% +0.27%] index_select reverse : Elapsed 0.085 ms (8.535 ms / 100) 8.505 -> 8.532 ( +0.32%) [ +0.27% +0.00% +0.33% / +0.45% +0.33% +0.32%] index_select skip64 : Elapsed 0.085 ms (8.528 ms / 100) 8.507 -> 8.519 ( +0.14%) [ +0.09% +0.02% +0.00% / +0.14% +0.20% +0.43%] index_select skip256 : Elapsed 0.085 ms (8.515 ms / 100) 8.532 -> 8.540 ( +0.09%) [ +0.19% +0.21% +0.00% / +0.13% +0.09% +0.19%] index_select spread : Elapsed 0.085 ms (8.548 ms / 100) 8.520 -> 8.524 ( +0.05%) [ +0.05% +0.00% +0.15% / +0.05% +0.26% +0.43%] index_select strided 3 : Elapsed 0.085 ms (8.524 ms / 100) 8.530 -> 8.522 ( -0.09%) [ +0.27% +0.05% +0.00% / -0.01% +0.20% -0.09%] index_select random : Elapsed 0.086 ms (8.553 ms / 100) 8.534 -> 8.532 ( -0.02%) [ +0.23% +0.00% +0.04% / +0.09% -0.02% +0.30%] index_select random_sorted : Elapsed 0.086 ms (8.554 ms / 100) B = [40, 16, 20, 4] (stride (4, 160, 2560, 1)) A = [5, 16, 20, 4] (stride (1280, 1, 16, 320)) dim = 0 1.574 -> 1.580 ( +0.38%) [ +0.00% +0.32% +0.25% / +0.38% +0.51% +0.89%] index_add_ linear : Elapsed 0.016 ms (1.574 ms / 100) 1.526 -> 1.531 ( +0.33%) [ +0.00% +0.26% +0.20% / +0.33% +0.72% +0.92%] index_copy_ linear : Elapsed 0.015 ms (1.526 ms / 100) 1.581 -> 1.584 ( +0.19%) [ +0.13% +0.00% +0.06% / +0.19% +0.63% +0.70%] index_add_ reverse : Elapsed 0.016 ms (1.583 ms / 100) 1.530 -> 1.534 ( +0.26%) [ +0.07% +0.00% +0.20% / +0.26% +0.72% +0.78%] index_copy_ reverse : Elapsed 0.015 ms (1.531 ms / 100) 1.581 -> 1.580 ( -0.06%) [ +0.19% +0.00% +0.13% / -0.06% +0.70% +0.82%] index_add_ spread : Elapsed 0.016 ms (1.584 ms / 100) 1.534 -> 1.534 ( +0.00%) [ +0.13% +0.00% +0.20% / +0.00% +0.78% +0.78%] index_copy_ spread : Elapsed 0.015 ms (1.536 ms / 100) 1.576 -> 1.576 ( +0.00%) [ +0.00% +0.25% +0.06% / +0.00% +0.70% +0.89%] index_add_ strided 3 : Elapsed 0.016 ms (1.576 ms / 100) 1.528 -> 1.529 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.65% +0.72%] index_copy_ strided 3 : Elapsed 0.015 ms (1.530 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.32% +0.32% +0.00% / +0.13% +0.32% +0.45%] index_add_ strided 7 : Elapsed 0.016 ms (1.578 ms / 100) 1.524 -> 1.522 ( -0.13%) [ +0.00% +0.07% +0.07% / -0.13% +0.52% +0.59%] index_copy_ strided 7 : Elapsed 0.015 ms (1.524 ms / 100) 1.574 -> 1.572 ( -0.13%) [ +0.00% +0.25% +0.06% / -0.13% +0.44% +0.51%] index_add_ perm : Elapsed 0.016 ms (1.574 ms / 100) 1.524 -> 1.522 ( -0.13%) [ +0.00% +0.00% +0.13% / -0.13% +0.66% +0.66%] index_copy_ perm : Elapsed 0.015 ms (1.524 ms / 100) 1.577 -> 1.579 ( +0.13%) [ +0.19% +0.00% +0.00% / +0.13% +0.70% +0.70%] index_add_ perm_sorted : Elapsed 0.016 ms (1.580 ms / 100) 1.528 -> 1.528 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.00% +0.65% +0.72%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.530 ms / 100) 8.521 -> 8.519 ( -0.02%) [ +0.00% +0.02% +0.26% / -0.02% -0.01% +0.16%] index_select const : Elapsed 0.085 ms (8.521 ms / 100) 8.540 -> 8.530 ( -0.12%) [ +0.04% +0.06% +0.00% / -0.05% +0.12% -0.12%] index_select wrap : Elapsed 0.085 ms (8.543 ms / 100) 8.545 -> 8.534 ( -0.13%) [ +0.05% +0.00% +0.04% / -0.06% +0.27% -0.13%] index_select linear : Elapsed 0.085 ms (8.549 ms / 100) 8.540 -> 8.543 ( +0.04%) [ +0.07% +0.00% +0.08% / +0.11% +0.04% +0.14%] index_select reverse : Elapsed 0.085 ms (8.546 ms / 100) 8.530 -> 8.527 ( -0.04%) [ +0.00% +0.05% +0.13% / -0.04% +0.02% +0.32%] index_select skip64 : Elapsed 0.085 ms (8.530 ms / 100) 8.526 -> 8.526 ( +0.00%) [ +0.02% +0.00% +0.26% / +0.00% +0.14% +0.28%] index_select skip256 : Elapsed 0.085 ms (8.528 ms / 100) 8.542 -> 8.532 ( -0.12%) [ +0.15% +0.00% +0.01% / +0.02% -0.12% +0.12%] index_select spread : Elapsed 0.086 ms (8.555 ms / 100) 8.548 -> 8.546 ( -0.02%) [ +0.09% +0.00% +0.00% / +0.04% +0.07% -0.02%] index_select strided 3 : Elapsed 0.086 ms (8.556 ms / 100) 8.541 -> 8.534 ( -0.08%) [ +0.00% +0.04% +0.06% / -0.08% +0.25% +0.05%] index_select random : Elapsed 0.085 ms (8.541 ms / 100) 8.547 -> 8.558 ( +0.13%) [ +0.11% +0.01% +0.00% / +0.23% +0.13% +0.27%] index_select random_sorted : Elapsed 0.086 ms (8.556 ms / 100) B = [40, 16, 20, 4] (stride (16, 1, 2560, 640)) A = [5, 16, 20, 4] (stride (1, 20, 320, 5)) dim = 0 1.472 -> 1.475 ( +0.20%) [ +0.14% +0.07% +0.00% / +0.20% +0.54% +0.61%] index_add_ linear : Elapsed 0.015 ms (1.474 ms / 100) 1.422 -> 1.424 ( +0.14%) [ +0.07% +0.14% +0.00% / +0.14% +0.49% +0.56%] index_copy_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.471 -> 1.471 ( +0.00%) [ +0.27% +0.27% +0.00% / +0.00% +0.41% +0.75%] index_add_ reverse : Elapsed 0.015 ms (1.475 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.21% +0.28% +0.00% / +0.07% +0.49% +0.84%] index_copy_ reverse : Elapsed 0.014 ms (1.424 ms / 100) 1.470 -> 1.468 ( -0.14%) [ +0.20% +0.07% +0.00% / -0.14% +0.48% +0.27%] index_add_ spread : Elapsed 0.015 ms (1.473 ms / 100) 1.421 -> 1.419 ( -0.14%) [ +0.00% +0.07% +0.07% / -0.14% +0.63% +0.35%] index_copy_ spread : Elapsed 0.014 ms (1.421 ms / 100) 1.468 -> 1.468 ( +0.00%) [ +0.07% +0.27% +0.00% / +0.00% +0.48% +0.41%] index_add_ strided 3 : Elapsed 0.015 ms (1.469 ms / 100) 1.420 -> 1.420 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.42% +0.49%] index_copy_ strided 3 : Elapsed 0.014 ms (1.421 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.61% +0.75%] index_add_ strided 7 : Elapsed 0.015 ms (1.474 ms / 100) 1.422 -> 1.424 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.70% +0.63%] index_copy_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.00% +0.20% +0.00% / +0.00% +0.54% +0.61%] index_add_ perm : Elapsed 0.015 ms (1.472 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.70% +0.77%] index_copy_ perm : Elapsed 0.014 ms (1.423 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.54%] index_add_ perm_sorted : Elapsed 0.015 ms (1.472 ms / 100) 1.420 -> 1.420 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.35% +0.42%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.420 ms / 100) 8.219 -> 8.223 ( +0.05%) [ +0.10% +0.00% +0.26% / +0.18% +0.05% +0.29%] index_select const : Elapsed 0.082 ms (8.227 ms / 100) 8.224 -> 8.222 ( -0.02%) [ +0.09% +0.05% +0.00% / -0.02% +0.01% -0.01%] index_select wrap : Elapsed 0.082 ms (8.231 ms / 100) 8.213 -> 8.208 ( -0.06%) [ +0.07% +0.00% +0.05% / -0.06% +0.05% +0.30%] index_select linear : Elapsed 0.082 ms (8.219 ms / 100) 8.222 -> 8.234 ( +0.15%) [ +0.13% +0.00% +0.04% / +0.27% +0.15% +0.27%] index_select reverse : Elapsed 0.082 ms (8.233 ms / 100) 8.234 -> 8.219 ( -0.18%) [ +0.00% +0.00% +0.04% / -0.02% -0.18% -0.05%] index_select skip64 : Elapsed 0.082 ms (8.234 ms / 100) 8.208 -> 8.222 ( +0.17%) [ +0.00% +0.23% +0.22% / +0.24% +0.17% +0.30%] index_select skip256 : Elapsed 0.082 ms (8.208 ms / 100) 8.218 -> 8.228 ( +0.12%) [ +0.18% +0.04% +0.00% / +0.18% +0.13% +0.12%] index_select spread : Elapsed 0.082 ms (8.233 ms / 100) 8.217 -> 8.213 ( -0.05%) [ +0.02% +0.16% +0.00% / +0.04% -0.05% +0.28%] index_select strided 3 : Elapsed 0.082 ms (8.219 ms / 100) 8.211 -> 8.231 ( +0.24%) [ +0.00% +0.21% +0.06% / +0.32% +0.50% +0.24%] index_select random : Elapsed 0.082 ms (8.211 ms / 100) 8.209 -> 8.222 ( +0.16%) [ +0.34% +0.32% +0.00% / +0.24% +0.16% +0.32%] index_select random_sorted : Elapsed 0.082 ms (8.237 ms / 100) B = [40, 16, 20, 4] (stride (20, 800, 1, 12800)) A = [5, 16, 20, 4] (stride (20, 100, 1, 1600)) dim = 0 1.522 -> 1.521 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.59% +0.59%] index_add_ linear : Elapsed 0.015 ms (1.522 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.61% +0.61%] index_copy_ linear : Elapsed 0.015 ms (1.476 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.99% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.475 -> 1.478 ( +0.20%) [ +0.07% +0.14% +0.00% / +0.20% +0.75% +0.75%] index_copy_ reverse : Elapsed 0.015 ms (1.476 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.59% +0.66%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.61% +0.68%] index_copy_ spread : Elapsed 0.015 ms (1.476 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.66% +0.66%] index_add_ strided 3 : Elapsed 0.015 ms (1.522 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.75%] index_copy_ strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.522 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.61% +0.61%] index_copy_ strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.521 -> 1.523 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.72% +0.79%] index_add_ perm : Elapsed 0.015 ms (1.522 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.75% +0.81%] index_copy_ perm : Elapsed 0.015 ms (1.475 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.523 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.81% +0.81%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.475 ms / 100) 8.514 -> 8.530 ( +0.19%) [ +0.14% +0.00% +0.21% / +0.25% +0.46% +0.19%] index_select const : Elapsed 0.085 ms (8.526 ms / 100) 8.532 -> 8.551 ( +0.22%) [ +0.04% +0.00% +0.13% / +0.23% +0.30% +0.22%] index_select wrap : Elapsed 0.085 ms (8.535 ms / 100) 8.544 -> 8.538 ( -0.07%) [ +0.05% +0.00% +0.05% / -0.07% -0.05% +0.13%] index_select linear : Elapsed 0.085 ms (8.548 ms / 100) 8.545 -> 8.539 ( -0.07%) [ +0.13% +0.00% +0.11% / -0.07% +0.18% +0.09%] index_select reverse : Elapsed 0.086 ms (8.556 ms / 100) 8.523 -> 8.525 ( +0.02%) [ +0.04% +0.06% +0.00% / +0.02% +0.28% +0.08%] index_select skip64 : Elapsed 0.085 ms (8.526 ms / 100) 8.515 -> 8.527 ( +0.14%) [ +0.20% +0.00% +0.19% / +0.14% +0.35% +0.58%] index_select skip256 : Elapsed 0.085 ms (8.532 ms / 100) 8.542 -> 8.546 ( +0.05%) [ +0.01% +0.13% +0.00% / +0.05% +0.27% +0.19%] index_select spread : Elapsed 0.085 ms (8.543 ms / 100) 8.519 -> 8.556 ( +0.43%) [ +0.40% +0.22% +0.00% / +0.45% +0.43% +0.56%] index_select strided 3 : Elapsed 0.086 ms (8.553 ms / 100) 8.533 -> 8.553 ( +0.23%) [ +0.00% +0.30% +0.14% / +0.23% +0.29% +0.35%] index_select random : Elapsed 0.085 ms (8.533 ms / 100) 8.546 -> 8.535 ( -0.13%) [ +0.14% +0.00% +0.09% / -0.13% +0.18% +0.51%] index_select random_sorted : Elapsed 0.086 ms (8.558 ms / 100) out_shape = [5, 40, 20, 4] in_shape = [5, 16, 20, 4] idx_dim = 1 B = [5, 40, 20, 4] (stride (3200, 1, 40, 800)) A = [5, 16, 20, 4] (stride (320, 20, 1, 1600)) dim = 1 4.022 -> 4.026 ( +0.10%) [ +0.05% +0.02% +0.00% / +0.10% +0.50% +0.77%] index_add_ linear : Elapsed 0.040 ms (4.024 ms / 100) 3.889 -> 3.894 ( +0.13%) [ +0.05% +0.10% +0.00% / +0.13% +0.80% +1.11%] index_copy_ linear : Elapsed 0.039 ms (3.891 ms / 100) 4.036 -> 4.036 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.47% +0.40%] index_add_ reverse : Elapsed 0.040 ms (4.037 ms / 100) 3.916 -> 3.920 ( +0.10%) [ +0.03% +0.13% +0.00% / +0.10% +0.31% +0.64%] index_copy_ reverse : Elapsed 0.039 ms (3.917 ms / 100) 4.024 -> 4.028 ( +0.10%) [ +0.00% +0.07% +0.02% / +0.10% +0.50% +0.65%] index_add_ spread : Elapsed 0.040 ms (4.024 ms / 100) 3.888 -> 3.891 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.59% +0.75%] index_copy_ spread : Elapsed 0.039 ms (3.889 ms / 100) 4.026 -> 4.027 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.47% +0.60%] index_add_ strided 3 : Elapsed 0.040 ms (4.027 ms / 100) 3.894 -> 3.896 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.67% +1.05%] index_copy_ strided 3 : Elapsed 0.039 ms (3.896 ms / 100) 4.037 -> 4.037 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.45% +0.47%] index_add_ strided 7 : Elapsed 0.040 ms (4.039 ms / 100) 3.913 -> 3.913 ( +0.00%) [ +0.00% +0.00% +0.23% / +0.00% +0.43% +0.61%] index_copy_ strided 7 : Elapsed 0.039 ms (3.913 ms / 100) 4.026 -> 4.024 ( -0.05%) [ +0.00% +0.02% +0.02% / -0.05% +0.45% +0.37%] index_add_ perm : Elapsed 0.040 ms (4.026 ms / 100) 3.894 -> 3.896 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.67% +0.90%] index_copy_ perm : Elapsed 0.039 ms (3.895 ms / 100) 4.021 -> 4.025 ( +0.10%) [ +0.05% +0.02% +0.00% / +0.10% +0.50% +0.62%] index_add_ perm_sorted : Elapsed 0.040 ms (4.023 ms / 100) 3.892 -> 3.907 ( +0.39%) [ +0.05% +0.10% +0.00% / +0.39% +0.59% +0.90%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.894 ms / 100) 5.563 -> 5.564 ( +0.02%) [ +0.07% +0.05% +0.00% / +0.02% +0.11% +0.23%] index_select const : Elapsed 0.056 ms (5.567 ms / 100) 5.566 -> 5.567 ( +0.02%) [ +0.00% +0.23% +0.04% / +0.02% +0.23% +0.32%] index_select wrap : Elapsed 0.056 ms (5.566 ms / 100) 5.572 -> 5.585 ( +0.23%) [ +0.04% +0.02% +0.00% / +0.34% +0.23% +0.31%] index_select linear : Elapsed 0.056 ms (5.574 ms / 100) 5.568 -> 5.574 ( +0.11%) [ +0.04% +0.16% +0.00% / +0.11% +0.22% +0.27%] index_select reverse : Elapsed 0.056 ms (5.570 ms / 100) 5.565 -> 5.562 ( -0.05%) [ +0.00% +0.11% +0.05% / +0.14% -0.05% +0.14%] index_select skip64 : Elapsed 0.056 ms (5.565 ms / 100) 5.564 -> 5.561 ( -0.05%) [ +0.02% +0.09% +0.00% / +0.05% -0.05% -0.04%] index_select skip256 : Elapsed 0.056 ms (5.565 ms / 100) 5.565 -> 5.571 ( +0.11%) [ +0.22% +0.16% +0.00% / +0.14% +0.11% +0.20%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.568 -> 5.572 ( +0.07%) [ +0.05% +0.00% +0.14% / +0.07% +0.16% +0.45%] index_select strided 3 : Elapsed 0.056 ms (5.571 ms / 100) 5.572 -> 5.570 ( -0.04%) [ +0.09% +0.00% +0.07% / -0.04% +0.05% +0.13%] index_select strided 5 : Elapsed 0.056 ms (5.577 ms / 100) 5.566 -> 5.576 ( +0.18%) [ +0.05% +0.00% +0.04% / +0.18% +0.20% +0.34%] index_select strided 7 : Elapsed 0.056 ms (5.569 ms / 100) 5.564 -> 5.555 ( -0.16%) [ +0.00% +0.05% +0.02% / +0.04% -0.16% +0.22%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.572 -> 5.574 ( +0.04%) [ +0.00% +0.02% +0.13% / +0.04% +0.13% +0.07%] index_select random : Elapsed 0.056 ms (5.572 ms / 100) 5.569 -> 5.574 ( +0.09%) [ +0.07% +0.00% +0.05% / +0.11% +0.09% +0.14%] index_select random_sorted : Elapsed 0.056 ms (5.573 ms / 100) B = [5, 40, 20, 4] (stride (80, 400, 4, 1)) A = [5, 16, 20, 4] (stride (4, 20, 320, 1)) dim = 1 3.728 -> 3.729 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.67% +0.75%] index_add_ linear : Elapsed 0.037 ms (3.728 ms / 100) 3.592 -> 3.594 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +0.86% +1.14%] index_copy_ linear : Elapsed 0.036 ms (3.593 ms / 100) 3.733 -> 3.737 ( +0.11%) [ +0.05% +0.08% +0.00% / +0.11% +0.67% +0.78%] index_add_ reverse : Elapsed 0.037 ms (3.735 ms / 100) 3.603 -> 3.606 ( +0.08%) [ +0.08% +0.00% +0.03% / +0.08% +0.72% +0.89%] index_copy_ reverse : Elapsed 0.036 ms (3.606 ms / 100) 3.733 -> 3.740 ( +0.19%) [ +0.13% +0.03% +0.00% / +0.19% +0.75% +0.83%] index_add_ spread : Elapsed 0.037 ms (3.738 ms / 100) 3.610 -> 3.613 ( +0.08%) [ +0.06% +0.11% +0.00% / +0.08% +0.69% +0.91%] index_copy_ spread : Elapsed 0.036 ms (3.612 ms / 100) 3.742 -> 3.743 ( +0.03%) [ +0.13% +0.05% +0.00% / +0.03% +0.67% +0.72%] index_add_ strided 3 : Elapsed 0.037 ms (3.747 ms / 100) 3.613 -> 3.615 ( +0.06%) [ +0.00% +0.03% +0.00% / +0.06% +0.66% +0.91%] index_copy_ strided 3 : Elapsed 0.036 ms (3.613 ms / 100) 3.733 -> 3.734 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.75% +0.80%] index_add_ strided 7 : Elapsed 0.037 ms (3.734 ms / 100) 3.602 -> 3.606 ( +0.11%) [ +0.00% +0.00% +0.03% / +0.11% +0.81% +0.86%] index_copy_ strided 7 : Elapsed 0.036 ms (3.602 ms / 100) 3.728 -> 3.728 ( +0.00%) [ +0.00% +0.05% +0.03% / +0.00% +0.72% +0.70%] index_add_ perm : Elapsed 0.037 ms (3.728 ms / 100) 3.592 -> 3.594 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.92% +1.03%] index_copy_ perm : Elapsed 0.036 ms (3.593 ms / 100) 3.738 -> 3.738 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.80% +0.83%] index_add_ perm_sorted : Elapsed 0.037 ms (3.740 ms / 100) 3.605 -> 3.610 ( +0.14%) [ +0.14% +0.17% +0.00% / +0.14% +0.89% +1.05%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.610 ms / 100) 5.476 -> 5.468 ( -0.15%) [ +0.07% +0.02% +0.00% / -0.09% -0.02% -0.15%] index_select const : Elapsed 0.055 ms (5.480 ms / 100) 5.481 -> 5.478 ( -0.05%) [ +0.02% +0.05% +0.00% / +0.22% -0.05% +0.00%] index_select wrap : Elapsed 0.055 ms (5.482 ms / 100) 5.484 -> 5.482 ( -0.04%) [ +0.02% +0.05% +0.00% / +0.07% -0.04% +0.00%] index_select linear : Elapsed 0.055 ms (5.485 ms / 100) 5.479 -> 5.479 ( +0.00%) [ +0.00% +0.33% +0.09% / +0.22% +0.04% +0.00%] index_select reverse : Elapsed 0.055 ms (5.479 ms / 100) 5.471 -> 5.471 ( +0.00%) [ +0.00% +0.05% +0.11% / +0.00% +0.13% +0.18%] index_select skip64 : Elapsed 0.055 ms (5.471 ms / 100) 5.468 -> 5.472 ( +0.07%) [ +0.13% +0.05% +0.00% / +0.11% +0.07% +0.13%] index_select skip256 : Elapsed 0.055 ms (5.475 ms / 100) 5.480 -> 5.476 ( -0.07%) [ +0.11% +0.07% +0.00% / +0.07% -0.07% +0.07%] index_select spread : Elapsed 0.055 ms (5.486 ms / 100) 5.480 -> 5.480 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.15% +0.00% +0.16%] index_select strided 3 : Elapsed 0.055 ms (5.486 ms / 100) 5.479 -> 5.479 ( +0.00%) [ +0.13% +0.00% +0.05% / +0.04% +0.00% +0.05%] index_select strided 5 : Elapsed 0.055 ms (5.486 ms / 100) 5.480 -> 5.478 ( -0.04%) [ +0.13% +0.00% +0.02% / +0.07% +0.04% -0.04%] index_select strided 7 : Elapsed 0.055 ms (5.487 ms / 100) 5.476 -> 5.471 ( -0.09%) [ +0.00% +0.00% +0.18% / -0.09% +0.13% +0.02%] index_select strided 8 : Elapsed 0.055 ms (5.476 ms / 100) 5.482 -> 5.479 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.09% +0.02%] index_select random : Elapsed 0.055 ms (5.485 ms / 100) 5.484 -> 5.478 ( -0.11%) [ +0.00% +0.00% +0.00% / +0.07% -0.07% -0.11%] index_select random_sorted : Elapsed 0.055 ms (5.484 ms / 100) B = [5, 40, 20, 4] (stride (20, 400, 1, 100)) A = [5, 16, 20, 4] (stride (1280, 4, 64, 1)) dim = 1 4.027 -> 4.029 ( +0.05%) [ +0.05% +0.07% +0.00% / +0.05% +0.65% +0.67%] index_add_ linear : Elapsed 0.040 ms (4.029 ms / 100) 3.911 -> 3.900 ( -0.28%) [ +0.00% +0.00% +0.00% / -0.28% +0.28% +0.23%] index_copy_ linear : Elapsed 0.039 ms (3.911 ms / 100) 4.042 -> 4.044 ( +0.05%) [ +0.02% +0.00% +0.00% / +0.05% +0.47% +0.49%] index_add_ reverse : Elapsed 0.040 ms (4.043 ms / 100) 3.917 -> 3.923 ( +0.15%) [ +0.00% +0.10% +0.10% / +0.15% +0.26% +0.26%] index_copy_ reverse : Elapsed 0.039 ms (3.917 ms / 100) 4.044 -> 4.045 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.49% +0.47%] index_add_ spread : Elapsed 0.040 ms (4.046 ms / 100) 3.927 -> 3.927 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.51% +0.46%] index_copy_ spread : Elapsed 0.039 ms (3.928 ms / 100) 4.042 -> 4.042 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.47% +0.45%] index_add_ strided 3 : Elapsed 0.040 ms (4.042 ms / 100) 3.912 -> 3.918 ( +0.15%) [ +0.15% +0.23% +0.00% / +0.15% +0.31% +0.26%] index_copy_ strided 3 : Elapsed 0.039 ms (3.918 ms / 100) 4.042 -> 4.043 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.59% +0.52%] index_add_ strided 7 : Elapsed 0.040 ms (4.044 ms / 100) 3.914 -> 3.919 ( +0.13%) [ +0.20% +0.00% +0.10% / +0.13% +0.46% +0.41%] index_copy_ strided 7 : Elapsed 0.039 ms (3.922 ms / 100) 4.033 -> 4.033 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.47% +0.45%] index_add_ perm : Elapsed 0.040 ms (4.035 ms / 100) 3.903 -> 3.910 ( +0.18%) [ +0.41% +0.00% +0.38% / +0.18% +0.49% +0.46%] index_copy_ perm : Elapsed 0.039 ms (3.919 ms / 100) 4.034 -> 4.034 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.37% +0.37%] index_add_ perm_sorted : Elapsed 0.040 ms (4.036 ms / 100) 3.905 -> 3.922 ( +0.44%) [ +0.46% +0.00% +0.18% / +0.44% +0.46% +0.44%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) 5.552 -> 5.552 ( +0.00%) [ +0.00% +0.02% +0.09% / +0.00% +0.09% +0.14%] index_select const : Elapsed 0.056 ms (5.552 ms / 100) 5.554 -> 5.558 ( +0.07%) [ +0.22% +0.25% +0.00% / +0.07% +0.32% +0.20%] index_select wrap : Elapsed 0.056 ms (5.566 ms / 100) 5.564 -> 5.562 ( -0.04%) [ +0.02% +0.00% +0.00% / -0.04% +0.00% +0.00%] index_select linear : Elapsed 0.056 ms (5.565 ms / 100) 5.558 -> 5.562 ( +0.07%) [ +0.00% +0.18% +0.11% / +0.14% +0.23% +0.07%] index_select reverse : Elapsed 0.056 ms (5.558 ms / 100) 5.555 -> 5.548 ( -0.13%) [ +0.13% +0.00% +0.05% / +0.14% -0.13% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.559 -> 5.551 ( -0.14%) [ +0.09% +0.00% +0.11% / +0.05% -0.04% -0.14%] index_select skip256 : Elapsed 0.056 ms (5.564 ms / 100) 5.554 -> 5.561 ( +0.13%) [ +0.31% +0.00% +0.14% / +0.23% +0.16% +0.13%] index_select spread : Elapsed 0.056 ms (5.571 ms / 100) 5.552 -> 5.561 ( +0.16%) [ +0.05% +0.14% +0.00% / +0.16% +0.23% +0.20%] index_select strided 3 : Elapsed 0.056 ms (5.555 ms / 100) 5.552 -> 5.556 ( +0.07%) [ +0.00% +0.13% +0.07% / +0.23% +0.07% +0.20%] index_select strided 5 : Elapsed 0.056 ms (5.552 ms / 100) 5.560 -> 5.558 ( -0.04%) [ +0.05% +0.00% +0.07% / +0.02% -0.04% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.563 ms / 100) 5.547 -> 5.556 ( +0.16%) [ +0.16% +0.00% +0.20% / +0.16% +0.25% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.556 ms / 100) 5.554 -> 5.559 ( +0.09%) [ +0.07% +0.11% +0.00% / +0.22% +0.13% +0.09%] index_select random : Elapsed 0.056 ms (5.558 ms / 100) 5.558 -> 5.552 ( -0.11%) [ +0.02% +0.04% +0.00% / +0.11% -0.11% +0.00%] index_select random_sorted : Elapsed 0.056 ms (5.559 ms / 100) B = [5, 40, 20, 4] (stride (160, 4, 800, 1)) A = [5, 16, 20, 4] (stride (1, 5, 320, 80)) dim = 1 4.099 -> 4.095 ( -0.10%) [ +0.02% +0.02% +0.00% / -0.10% +0.46% +0.71%] index_add_ linear : Elapsed 0.041 ms (4.100 ms / 100) 3.928 -> 3.932 ( +0.10%) [ +0.05% +0.03% +0.00% / +0.10% +0.69% +0.71%] index_copy_ linear : Elapsed 0.039 ms (3.930 ms / 100) 4.089 -> 4.097 ( +0.20%) [ +0.10% +0.02% +0.00% / +0.20% +0.71% +0.66%] index_add_ reverse : Elapsed 0.041 ms (4.093 ms / 100) 3.919 -> 3.924 ( +0.13%) [ +0.13% +0.00% +0.10% / +0.13% +0.79% +0.84%] index_copy_ reverse : Elapsed 0.039 ms (3.924 ms / 100) 4.102 -> 4.104 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.05% +0.63% +0.49%] index_add_ spread : Elapsed 0.041 ms (4.104 ms / 100) 3.937 -> 3.942 ( +0.13%) [ +0.00% +0.08% +0.13% / +0.13% +0.66% +0.46%] index_copy_ spread : Elapsed 0.039 ms (3.937 ms / 100) 4.091 -> 4.096 ( +0.12%) [ +0.15% +0.00% +0.15% / +0.12% +0.68% +0.68%] index_add_ strided 3 : Elapsed 0.041 ms (4.097 ms / 100) 3.924 -> 3.927 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.61% +0.71%] index_copy_ strided 3 : Elapsed 0.039 ms (3.927 ms / 100) 4.088 -> 4.097 ( +0.22%) [ +0.07% +0.00% +0.10% / +0.22% +0.76% +0.73%] index_add_ strided 7 : Elapsed 0.041 ms (4.091 ms / 100) 3.921 -> 3.927 ( +0.15%) [ +0.10% +0.00% +0.26% / +0.15% +0.66% +0.66%] index_copy_ strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 4.093 -> 4.100 ( +0.17%) [ +0.15% +0.00% +0.17% / +0.17% +0.86% +0.86%] index_add_ perm : Elapsed 0.041 ms (4.099 ms / 100) 3.929 -> 3.928 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.71% +0.74%] index_copy_ perm : Elapsed 0.039 ms (3.931 ms / 100) 4.087 -> 4.098 ( +0.27%) [ +0.12% +0.17% +0.00% / +0.27% +0.71% +0.88%] index_add_ perm_sorted : Elapsed 0.041 ms (4.092 ms / 100) 3.920 -> 3.926 ( +0.15%) [ +0.05% +0.00% +0.10% / +0.15% +0.74% +1.02%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.922 ms / 100) 5.480 -> 5.482 ( +0.04%) [ +0.11% +0.13% +0.00% / +0.09% +0.04% +0.15%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.493 -> 5.486 ( -0.13%) [ +0.04% +0.04% +0.00% / -0.02% -0.05% -0.13%] index_select wrap : Elapsed 0.055 ms (5.495 ms / 100) 5.496 -> 5.484 ( -0.22%) [ +0.00% +0.02% +0.05% / +0.02% -0.15% -0.22%] index_select linear : Elapsed 0.055 ms (5.496 ms / 100) 5.489 -> 5.491 ( +0.04%) [ +0.04% +0.00% +0.15% / +0.09% +0.04% +0.05%] index_select reverse : Elapsed 0.055 ms (5.491 ms / 100) 5.483 -> 5.486 ( +0.05%) [ +0.07% +0.00% +0.02% / +0.05% +0.15% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.487 ms / 100) 5.482 -> 5.484 ( +0.04%) [ +0.09% +0.05% +0.00% / +0.04% +0.13% +0.11%] index_select skip256 : Elapsed 0.055 ms (5.487 ms / 100) 5.491 -> 5.483 ( -0.15%) [ +0.09% +0.00% +0.05% / +0.15% -0.15% -0.11%] index_select spread : Elapsed 0.055 ms (5.496 ms / 100) 5.490 -> 5.486 ( -0.07%) [ +0.00% +0.04% +0.16% / +0.05% -0.07% +0.05%] index_select strided 3 : Elapsed 0.055 ms (5.490 ms / 100) 5.491 -> 5.486 ( -0.09%) [ +0.00% +0.11% +0.07% / +0.07% -0.04% -0.09%] index_select strided 5 : Elapsed 0.055 ms (5.491 ms / 100) 5.489 -> 5.490 ( +0.02%) [ +0.04% +0.00% +0.05% / +0.02% +0.05% +0.11%] index_select strided 7 : Elapsed 0.055 ms (5.491 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.00% +0.20% +0.24% / +0.15% +0.04% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.476 ms / 100) 5.495 -> 5.489 ( -0.11%) [ +0.02% +0.00% +0.11% / +0.04% -0.04% -0.11%] index_select random : Elapsed 0.055 ms (5.496 ms / 100) 5.489 -> 5.488 ( -0.02%) [ +0.05% +0.15% +0.00% / +0.09% -0.02% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.492 ms / 100) B = [5, 40, 20, 4] (stride (160, 1, 800, 40)) A = [5, 16, 20, 4] (stride (1, 100, 5, 1600)) dim = 1 4.087 -> 4.091 ( +0.10%) [ +0.02% +0.00% +0.17% / +0.10% +0.51% +0.81%] index_add_ linear : Elapsed 0.041 ms (4.088 ms / 100) 3.957 -> 3.964 ( +0.18%) [ +0.03% +0.00% +0.66% / +0.18% +0.63% +0.68%] index_copy_ linear : Elapsed 0.040 ms (3.958 ms / 100) 4.092 -> 4.093 ( +0.02%) [ +0.02% +0.00% +0.07% / +0.02% +0.51% +0.46%] index_add_ reverse : Elapsed 0.041 ms (4.093 ms / 100) 3.956 -> 3.958 ( +0.05%) [ +0.13% +0.00% +0.10% / +0.05% +0.56% +0.48%] index_copy_ reverse : Elapsed 0.040 ms (3.961 ms / 100) 4.071 -> 4.086 ( +0.37%) [ +0.00% +0.12% +0.22% / +0.37% +0.64% +0.47%] index_add_ spread : Elapsed 0.041 ms (4.071 ms / 100) 3.944 -> 3.958 ( +0.35%) [ +0.00% +0.00% +0.15% / +0.35% +0.46% +0.48%] index_copy_ spread : Elapsed 0.039 ms (3.944 ms / 100) 4.082 -> 4.087 ( +0.12%) [ +0.00% +0.07% +0.07% / +0.12% +0.51% +0.54%] index_add_ strided 3 : Elapsed 0.041 ms (4.082 ms / 100) 3.946 -> 3.954 ( +0.20%) [ +0.00% +0.05% +0.00% / +0.20% +0.51% +0.58%] index_copy_ strided 3 : Elapsed 0.039 ms (3.946 ms / 100) 4.094 -> 4.097 ( +0.07%) [ +0.00% +0.02% +0.17% / +0.07% +0.64% +0.56%] index_add_ strided 7 : Elapsed 0.041 ms (4.094 ms / 100) 3.960 -> 3.966 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.51% +0.53%] index_copy_ strided 7 : Elapsed 0.040 ms (3.960 ms / 100) 4.090 -> 4.092 ( +0.05%) [ +0.00% +0.15% +0.00% / +0.05% +0.46% +0.54%] index_add_ perm : Elapsed 0.041 ms (4.090 ms / 100) 3.963 -> 3.969 ( +0.15%) [ +0.00% +0.05% +0.08% / +0.15% +0.53% +0.50%] index_copy_ perm : Elapsed 0.040 ms (3.963 ms / 100) 4.090 -> 4.103 ( +0.32%) [ +0.15% +0.00% +0.17% / +0.32% +0.42% +0.44%] index_add_ perm_sorted : Elapsed 0.041 ms (4.096 ms / 100) 3.963 -> 3.980 ( +0.43%) [ +0.03% +0.00% +0.05% / +1.03% +0.43% +0.50%] index_copy_ perm_sorted : Elapsed 0.040 ms (3.964 ms / 100) 5.559 -> 5.565 ( +0.11%) [ +0.16% +0.05% +0.00% / +0.11% +0.27% +0.11%] index_select const : Elapsed 0.056 ms (5.568 ms / 100) 5.566 -> 5.570 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.13% +0.20% +0.07%] index_select wrap : Elapsed 0.056 ms (5.569 ms / 100) 5.565 -> 5.574 ( +0.16%) [ +0.00% +0.05% +0.00% / +0.23% +0.16% +0.20%] index_select linear : Elapsed 0.056 ms (5.565 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.00% +0.13% +0.09% / +0.18% +0.05% +0.27%] index_select reverse : Elapsed 0.056 ms (5.564 ms / 100) 5.565 -> 5.560 ( -0.09%) [ +0.00% +0.09% +0.04% / +0.49% -0.09% -0.02%] index_select skip64 : Elapsed 0.056 ms (5.565 ms / 100) 5.566 -> 5.564 ( -0.04%) [ +0.00% +0.05% +0.05% / +0.18% -0.04% +0.02%] index_select skip256 : Elapsed 0.056 ms (5.566 ms / 100) 5.569 -> 5.570 ( +0.02%) [ +0.16% +0.07% +0.00% / +0.16% +0.05% +0.02%] index_select spread : Elapsed 0.056 ms (5.578 ms / 100) 5.570 -> 5.567 ( -0.05%) [ +0.11% +0.04% +0.00% / +0.05% -0.02% -0.05%] index_select strided 3 : Elapsed 0.056 ms (5.576 ms / 100) 5.561 -> 5.563 ( +0.04%) [ +0.11% +0.09% +0.00% / +0.20% +0.04% +0.16%] index_select strided 5 : Elapsed 0.056 ms (5.567 ms / 100) 5.564 -> 5.571 ( +0.13%) [ +0.00% +0.16% +0.07% / +0.13% +0.18% +0.23%] index_select strided 7 : Elapsed 0.056 ms (5.564 ms / 100) 5.563 -> 5.560 ( -0.05%) [ +0.00% +0.00% +0.04% / -0.05% +0.02% +0.00%] index_select strided 8 : Elapsed 0.056 ms (5.563 ms / 100) 5.562 -> 5.566 ( +0.07%) [ +0.00% +0.16% +0.16% / +0.20% +0.07% +0.16%] index_select random : Elapsed 0.056 ms (5.562 ms / 100) 5.565 -> 5.570 ( +0.09%) [ +0.00% +0.05% +0.05% / +0.13% +0.09% +0.09%] index_select random_sorted : Elapsed 0.056 ms (5.565 ms / 100) B = [5, 40, 20, 4] (stride (1, 5, 800, 200)) A = [5, 16, 20, 4] (stride (80, 400, 4, 1)) dim = 1 1.323 -> 1.324 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.91% +1.06%] index_add_ linear : Elapsed 0.013 ms (1.323 ms / 100) 1.383 -> 1.382 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +1.01% +1.08%] index_copy_ linear : Elapsed 0.014 ms (1.383 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.00% +0.08% +0.46% / +0.08% +0.91% +0.99%] index_add_ reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.379 -> 1.378 ( -0.07%) [ +0.07% +0.00% +0.15% / -0.07% +0.87% +0.87%] index_copy_ reverse : Elapsed 0.014 ms (1.380 ms / 100) 1.319 -> 1.321 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.91% +0.99%] index_add_ spread : Elapsed 0.013 ms (1.321 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.00% +0.15% +0.22% / +0.29% +0.80% +0.87%] index_copy_ spread : Elapsed 0.014 ms (1.379 ms / 100) 1.325 -> 1.329 ( +0.30%) [ +0.00% +0.08% +0.15% / +0.30% +0.75% +0.68%] index_add_ strided 3 : Elapsed 0.013 ms (1.325 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.00% +0.07% +0.36% / +0.29% +1.09% +1.02%] index_copy_ strided 3 : Elapsed 0.014 ms (1.379 ms / 100) 1.317 -> 1.323 ( +0.46%) [ +0.15% +0.00% +0.08% / +0.46% +0.84% +0.84%] index_add_ strided 7 : Elapsed 0.013 ms (1.319 ms / 100) 1.380 -> 1.385 ( +0.36%) [ +0.07% +0.07% +0.00% / +0.36% +0.72% +0.72%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.322 -> 1.324 ( +0.15%) [ +0.15% +0.00% +0.08% / +0.15% +1.13% +1.21%] index_add_ perm : Elapsed 0.013 ms (1.324 ms / 100) 1.382 -> 1.382 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +1.09% +1.09%] index_copy_ perm : Elapsed 0.014 ms (1.382 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +1.14% +1.06%] index_add_ perm_sorted : Elapsed 0.013 ms (1.321 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.14% +0.07% / +0.00% +1.01% +1.09%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 3.544 -> 3.546 ( +0.06%) [ +0.14% +0.00% +0.08% / +0.08% +0.14% +0.06%] index_select const : Elapsed 0.035 ms (3.549 ms / 100) 3.550 -> 3.551 ( +0.03%) [ +0.17% +0.11% +0.00% / +0.34% +0.03% +0.17%] index_select wrap : Elapsed 0.036 ms (3.556 ms / 100) 3.542 -> 3.554 ( +0.34%) [ +0.25% +0.00% +0.23% / +0.34% +0.65% +0.71%] index_select linear : Elapsed 0.036 ms (3.551 ms / 100) 3.548 -> 3.555 ( +0.20%) [ +0.00% +0.17% +0.08% / +0.20% +0.62% +0.48%] index_select reverse : Elapsed 0.035 ms (3.548 ms / 100) 3.540 -> 3.545 ( +0.14%) [ +0.08% +0.23% +0.00% / +0.23% +0.14% +0.25%] index_select skip64 : Elapsed 0.035 ms (3.543 ms / 100) 3.541 -> 3.548 ( +0.20%) [ +0.11% +0.31% +0.00% / +0.28% +0.20% +0.23%] index_select skip256 : Elapsed 0.035 ms (3.545 ms / 100) 3.559 -> 3.551 ( -0.22%) [ +0.03% +0.00% +0.06% / +0.11% -0.14% -0.22%] index_select spread : Elapsed 0.036 ms (3.560 ms / 100) 3.569 -> 3.547 ( -0.62%) [ +0.06% +0.00% +0.11% / +0.22% -0.39% -0.62%] index_select strided 3 : Elapsed 0.036 ms (3.571 ms / 100) 3.569 -> 3.550 ( -0.53%) [ +0.50% +0.00% +0.03% / +0.25% -0.53% -0.45%] index_select strided 5 : Elapsed 0.036 ms (3.587 ms / 100) 3.546 -> 3.555 ( +0.25%) [ +0.00% +0.11% +0.11% / +0.31% +0.25% +0.34%] index_select strided 7 : Elapsed 0.035 ms (3.546 ms / 100) 3.547 -> 3.546 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.11% +0.06%] index_select strided 8 : Elapsed 0.035 ms (3.547 ms / 100) 3.539 -> 3.549 ( +0.28%) [ +0.14% +0.37% +0.00% / +0.28% +0.88% +0.99%] index_select random : Elapsed 0.035 ms (3.544 ms / 100) 3.546 -> 3.552 ( +0.17%) [ +0.00% +0.03% +0.00% / +0.17% +0.68% +0.71%] index_select random_sorted : Elapsed 0.035 ms (3.546 ms / 100) B = [5, 40, 20, 4] (stride (1, 5, 800, 200)) A = [5, 16, 20, 4] (stride (4, 400, 20, 1)) dim = 1 3.694 -> 3.697 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.46% +0.46%] index_add_ linear : Elapsed 0.037 ms (3.694 ms / 100) 3.567 -> 3.579 ( +0.34%) [ +0.00% +0.08% +0.06% / +0.34% +0.59% +0.50%] index_copy_ linear : Elapsed 0.036 ms (3.567 ms / 100) 3.708 -> 3.712 ( +0.11%) [ +0.00% +0.13% +0.08% / +0.11% +0.57% +0.57%] index_add_ reverse : Elapsed 0.037 ms (3.708 ms / 100) 3.573 -> 3.579 ( +0.17%) [ +0.11% +0.08% +0.00% / +0.17% +0.56% +0.50%] index_copy_ reverse : Elapsed 0.036 ms (3.577 ms / 100) 3.696 -> 3.712 ( +0.43%) [ +0.00% +0.08% +0.00% / +0.46% +0.43% +0.46%] index_add_ spread : Elapsed 0.037 ms (3.696 ms / 100) 3.570 -> 3.589 ( +0.53%) [ +0.08% +0.03% +0.00% / +0.87% +0.53% +0.53%] index_copy_ spread : Elapsed 0.036 ms (3.573 ms / 100) 3.706 -> 3.711 ( +0.13%) [ +0.05% +0.00% +0.11% / +0.13% +0.62% +0.59%] index_add_ strided 3 : Elapsed 0.037 ms (3.708 ms / 100) 3.575 -> 3.583 ( +0.22%) [ +0.03% +0.00% +0.03% / +0.22% +0.56% +0.45%] index_copy_ strided 3 : Elapsed 0.036 ms (3.576 ms / 100) 3.711 -> 3.713 ( +0.05%) [ +0.00% +0.03% +0.05% / +0.05% +0.51% +0.57%] index_add_ strided 7 : Elapsed 0.037 ms (3.711 ms / 100) 3.576 -> 3.583 ( +0.20%) [ +0.00% +0.00% +0.03% / +0.20% +0.45% +0.50%] index_copy_ strided 7 : Elapsed 0.036 ms (3.576 ms / 100) 3.696 -> 3.697 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.41% +0.43%] index_add_ perm : Elapsed 0.037 ms (3.696 ms / 100) 3.572 -> 3.575 ( +0.08%) [ +0.00% +0.11% +0.00% / +0.08% +0.42% +0.42%] index_copy_ perm : Elapsed 0.036 ms (3.572 ms / 100) 3.695 -> 3.696 ( +0.03%) [ +0.08% +0.00% +0.16% / +0.03% +0.51% +0.49%] index_add_ perm_sorted : Elapsed 0.037 ms (3.698 ms / 100) 3.569 -> 3.578 ( +0.25%) [ +0.11% +0.06% +0.00% / +0.25% +0.48% +0.56%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.573 ms / 100) 5.473 -> 5.477 ( +0.07%) [ +0.04% +0.09% +0.00% / +0.13% +0.13% +0.07%] index_select const : Elapsed 0.055 ms (5.475 ms / 100) 5.471 -> 5.478 ( +0.13%) [ +0.00% +0.07% +0.16% / +0.13% +0.31% +0.26%] index_select wrap : Elapsed 0.055 ms (5.471 ms / 100) 5.473 -> 5.483 ( +0.18%) [ +0.00% +0.05% +0.09% / +0.33% +0.20% +0.18%] index_select linear : Elapsed 0.055 ms (5.473 ms / 100) 5.472 -> 5.476 ( +0.07%) [ +0.00% +0.11% +0.15% / +0.15% +0.07% +0.26%] index_select reverse : Elapsed 0.055 ms (5.472 ms / 100) 5.473 -> 5.472 ( -0.02%) [ +0.00% +0.07% +0.13% / +0.15% +0.07% -0.02%] index_select skip64 : Elapsed 0.055 ms (5.473 ms / 100) 5.476 -> 5.472 ( -0.07%) [ +0.18% +0.16% +0.00% / +0.05% -0.07% +0.02%] index_select skip256 : Elapsed 0.055 ms (5.486 ms / 100) 5.475 -> 5.480 ( +0.09%) [ +0.04% +0.07% +0.00% / +0.09% +0.11% +0.22%] index_select spread : Elapsed 0.055 ms (5.477 ms / 100) 5.473 -> 5.474 ( +0.02%) [ +0.20% +0.13% +0.00% / +0.09% +0.16% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.484 ms / 100) 5.475 -> 5.473 ( -0.04%) [ +0.09% +0.00% +0.15% / +0.07% +0.11% -0.04%] index_select strided 5 : Elapsed 0.055 ms (5.480 ms / 100) 5.478 -> 5.476 ( -0.04%) [ +0.00% +0.07% +0.00% / -0.04% +0.15% -0.04%] index_select strided 7 : Elapsed 0.055 ms (5.478 ms / 100) 5.466 -> 5.472 ( +0.11%) [ +0.09% +0.04% +0.00% / +0.18% +0.11% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.471 ms / 100) 5.476 -> 5.479 ( +0.05%) [ +0.00% +0.02% +0.09% / +0.05% +0.09% +0.18%] index_select random : Elapsed 0.055 ms (5.476 ms / 100) 5.469 -> 5.478 ( +0.16%) [ +0.15% +0.00% +0.07% / +0.24% +0.16% +0.24%] index_select random_sorted : Elapsed 0.055 ms (5.477 ms / 100) B = [5, 40, 20, 4] (stride (800, 1, 40, 4000)) A = [5, 16, 20, 4] (stride (1, 400, 5, 100)) dim = 1 4.047 -> 4.052 ( +0.12%) [ +0.00% +0.05% +0.15% / +0.12% +0.82% +0.77%] index_add_ linear : Elapsed 0.040 ms (4.047 ms / 100) 3.916 -> 3.921 ( +0.13%) [ +0.00% +0.08% +0.28% / +0.13% +0.72% +0.66%] index_copy_ linear : Elapsed 0.039 ms (3.916 ms / 100) 4.054 -> 4.056 ( +0.05%) [ +0.00% +0.05% +0.10% / +0.05% +0.64% +0.72%] index_add_ reverse : Elapsed 0.041 ms (4.054 ms / 100) 3.925 -> 3.927 ( +0.05%) [ +0.00% +0.10% +0.15% / +0.05% +0.76% +0.69%] index_copy_ reverse : Elapsed 0.039 ms (3.925 ms / 100) 4.055 -> 4.055 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.59% +0.59%] index_add_ spread : Elapsed 0.041 ms (4.057 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.00% +0.00% +0.03% / +0.05% +0.71% +0.71%] index_copy_ spread : Elapsed 0.039 ms (3.923 ms / 100) 4.056 -> 4.054 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.59% +0.64%] index_add_ strided 3 : Elapsed 0.041 ms (4.056 ms / 100) 3.924 -> 3.931 ( +0.18%) [ +0.15% +0.15% +0.00% / +0.18% +0.84% +0.82%] index_copy_ strided 3 : Elapsed 0.039 ms (3.930 ms / 100) 4.054 -> 4.056 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.041 ms (4.056 ms / 100) 3.924 -> 3.930 ( +0.15%) [ +0.03% +0.00% +0.05% / +0.15% +0.76% +0.84%] index_copy_ strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 4.049 -> 4.051 ( +0.05%) [ +0.02% +0.00% +0.07% / +0.05% +0.77% +0.79%] index_add_ perm : Elapsed 0.040 ms (4.050 ms / 100) 3.917 -> 3.926 ( +0.23%) [ +0.15% +0.00% +0.08% / +0.23% +0.77% +0.71%] index_copy_ perm : Elapsed 0.039 ms (3.923 ms / 100) 4.052 -> 4.055 ( +0.07%) [ +0.05% +0.00% +0.02% / +0.07% +0.77% +0.72%] index_add_ perm_sorted : Elapsed 0.041 ms (4.054 ms / 100) 3.923 -> 3.931 ( +0.20%) [ +0.08% +0.00% +0.13% / +0.20% +0.94% +1.02%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.926 ms / 100) 5.561 -> 5.563 ( +0.04%) [ +0.00% +0.05% +0.07% / +0.04% +0.05% +0.07%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.572 -> 5.564 ( -0.14%) [ +0.00% +0.00% +0.11% / +0.09% -0.14% -0.09%] index_select wrap : Elapsed 0.056 ms (5.572 ms / 100) 5.568 -> 5.565 ( -0.05%) [ +0.16% +0.00% +0.04% / +0.09% -0.05% +0.04%] index_select linear : Elapsed 0.056 ms (5.577 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.09% +0.02% +0.00% / +0.25% +0.14% +0.07%] index_select reverse : Elapsed 0.056 ms (5.570 ms / 100) 5.568 -> 5.563 ( -0.09%) [ +0.05% +0.02% +0.00% / -0.02% -0.09% +0.02%] index_select skip64 : Elapsed 0.056 ms (5.571 ms / 100) 5.559 -> 5.563 ( +0.07%) [ +0.00% +0.11% +0.02% / +0.09% +0.07% +0.16%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.568 -> 5.561 ( -0.13%) [ +0.14% +0.00% +0.11% / +0.11% -0.13% -0.05%] index_select spread : Elapsed 0.056 ms (5.576 ms / 100) 5.567 -> 5.573 ( +0.11%) [ +0.00% +0.00% +0.02% / +0.22% +0.11% +0.14%] index_select strided 3 : Elapsed 0.056 ms (5.567 ms / 100) 5.567 -> 5.565 ( -0.04%) [ +0.11% +0.00% +0.07% / +0.05% -0.04% +0.02%] index_select strided 5 : Elapsed 0.056 ms (5.573 ms / 100) 5.567 -> 5.572 ( +0.09%) [ +0.04% +0.00% +0.04% / +0.13% +0.09% +0.09%] index_select strided 7 : Elapsed 0.056 ms (5.569 ms / 100) 5.564 -> 5.557 ( -0.13%) [ +0.00% +0.02% +0.00% / -0.13% +0.07% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.564 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.04% +0.00% +0.14% / +0.07% +0.13% -0.02%] index_select random : Elapsed 0.056 ms (5.568 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.00% +0.29% +0.13% / +0.05% +0.07% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.564 ms / 100) out_shape = [5, 16, 40, 4] in_shape = [5, 16, 20, 4] idx_dim = 2 B = [5, 16, 40, 4] (stride (4, 20, 320, 1)) A = [5, 16, 20, 4] (stride (4, 20, 320, 1)) dim = 2 2.444 -> 2.453 ( +0.37%) [ +0.00% +0.04% +0.00% / +0.37% +0.65% +0.53%] index_add_ linear : Elapsed 0.024 ms (2.444 ms / 100) 2.444 -> 2.455 ( +0.45%) [ +0.00% +0.00% +0.04% / +0.65% +0.45% +0.57%] index_copy_ linear : Elapsed 0.024 ms (2.444 ms / 100) 2.443 -> 2.457 ( +0.57%) [ +0.00% +0.04% +0.04% / +0.57% +0.61% +0.61%] index_add_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.441 -> 2.454 ( +0.53%) [ +0.08% +0.08% +0.00% / +0.70% +0.57% +0.53%] index_copy_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.449 -> 2.454 ( +0.20%) [ +0.00% +0.16% +0.04% / +0.41% +0.33% +0.20%] index_add_ spread : Elapsed 0.024 ms (2.449 ms / 100) 2.444 -> 2.452 ( +0.33%) [ +0.00% +0.08% +0.29% / +0.45% +0.33% +0.57%] index_copy_ spread : Elapsed 0.024 ms (2.444 ms / 100) 2.446 -> 2.454 ( +0.33%) [ +0.12% +0.00% +0.53% / +0.45% +0.37% +0.33%] index_add_ strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.440 -> 2.450 ( +0.41%) [ +0.25% +0.00% +0.16% / +0.57% +0.41% +0.49%] index_copy_ strided 3 : Elapsed 0.024 ms (2.446 ms / 100) 2.447 -> 2.455 ( +0.33%) [ +0.00% +0.12% +0.04% / +0.61% +0.45% +0.33%] index_add_ strided 7 : Elapsed 0.024 ms (2.447 ms / 100) 2.441 -> 2.454 ( +0.53%) [ +0.20% +0.00% +0.12% / +0.78% +0.70% +0.53%] index_copy_ strided 7 : Elapsed 0.024 ms (2.446 ms / 100) 2.447 -> 2.454 ( +0.29%) [ +0.04% +0.00% +0.12% / +0.29% +0.41% +0.49%] index_add_ perm : Elapsed 0.024 ms (2.448 ms / 100) 2.440 -> 2.452 ( +0.49%) [ +0.00% +0.29% +0.25% / +0.61% +0.49% +0.66%] index_copy_ perm : Elapsed 0.024 ms (2.440 ms / 100) 2.445 -> 2.455 ( +0.41%) [ +0.00% +0.20% +0.04% / +0.41% +0.57% +0.49%] index_add_ perm_sorted : Elapsed 0.024 ms (2.445 ms / 100) 2.440 -> 2.457 ( +0.70%) [ +0.00% +0.16% +0.16% / +0.74% +0.74% +0.70%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.440 ms / 100) 4.488 -> 4.489 ( +0.02%) [ +0.00% +0.07% +0.13% / +0.09% +0.13% +0.02%] index_select const : Elapsed 0.045 ms (4.488 ms / 100) 4.496 -> 4.495 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.24% +0.24%] index_select wrap : Elapsed 0.045 ms (4.496 ms / 100) 4.492 -> 4.498 ( +0.13%) [ +0.11% +0.00% +0.11% / +0.13% +0.22% +0.29%] index_select linear : Elapsed 0.045 ms (4.497 ms / 100) 4.494 -> 4.501 ( +0.16%) [ +0.00% +0.11% +0.13% / +0.20% +0.16% +0.22%] index_select reverse : Elapsed 0.045 ms (4.494 ms / 100) 4.487 -> 4.487 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.07% +0.00% +0.04%] index_select skip64 : Elapsed 0.045 ms (4.492 ms / 100) 4.484 -> 4.492 ( +0.18%) [ +0.00% +0.09% +0.22% / +0.18% +0.18% +0.29%] index_select skip256 : Elapsed 0.045 ms (4.484 ms / 100) 4.498 -> 4.492 ( -0.13%) [ +0.00% +0.07% +0.20% / -0.13% +0.11% +0.11%] index_select spread : Elapsed 0.045 ms (4.498 ms / 100) 4.493 -> 4.499 ( +0.13%) [ +0.29% +0.00% +0.20% / +0.13% +0.16% +0.33%] index_select strided 3 : Elapsed 0.045 ms (4.506 ms / 100) 4.490 -> 4.493 ( +0.07%) [ +0.07% +0.00% +0.16% / +0.07% +0.11% +0.11%] index_select strided 5 : Elapsed 0.045 ms (4.493 ms / 100) 4.492 -> 4.497 ( +0.11%) [ +0.00% +0.18% +0.20% / +0.11% +0.24% +0.22%] index_select strided 7 : Elapsed 0.045 ms (4.492 ms / 100) 4.487 -> 4.489 ( +0.04%) [ +0.09% +0.00% +0.18% / +0.04% +0.11% +0.11%] index_select strided 8 : Elapsed 0.045 ms (4.491 ms / 100) 4.490 -> 4.489 ( -0.02%) [ +0.00% +0.22% +0.11% / -0.02% +0.09% +0.24%] index_select strided 16 : Elapsed 0.045 ms (4.490 ms / 100) 4.495 -> 4.498 ( +0.07%) [ +0.16% +0.07% +0.00% / +0.11% +0.33% +0.07%] index_select random : Elapsed 0.045 ms (4.502 ms / 100) 4.497 -> 4.495 ( -0.04%) [ +0.09% +0.04% +0.00% / +0.02% +0.11% -0.04%] index_select random_sorted : Elapsed 0.045 ms (4.501 ms / 100) B = [5, 16, 40, 4] (stride (640, 40, 1, 3200)) A = [5, 16, 20, 4] (stride (20, 400, 1, 100)) dim = 2 2.391 -> 2.405 ( +0.59%) [ +0.00% +0.29% +0.17% / +0.59% +0.96% +0.92%] index_add_ linear : Elapsed 0.024 ms (2.391 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.00% +0.17% +0.08% / +0.54% +0.92% +0.71%] index_copy_ linear : Elapsed 0.024 ms (2.401 ms / 100) 2.390 -> 2.404 ( +0.59%) [ +0.21% +0.00% +0.04% / +0.59% +0.79% +1.00%] index_add_ reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.393 -> 2.411 ( +0.75%) [ +0.33% +0.00% +0.33% / +0.75% +1.21% +1.38%] index_copy_ reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.403 -> 2.412 ( +0.37%) [ +0.00% +0.04% +0.00% / +0.37% +0.79% +0.92%] index_add_ spread : Elapsed 0.024 ms (2.403 ms / 100) 2.417 -> 2.429 ( +0.50%) [ +0.00% +0.08% +0.00% / +0.50% +1.16% +0.99%] index_copy_ spread : Elapsed 0.024 ms (2.417 ms / 100) 2.410 -> 2.421 ( +0.46%) [ +0.17% +0.08% +0.00% / +0.54% +0.46% +0.58%] index_add_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.421 -> 2.434 ( +0.54%) [ +0.00% +0.17% +0.17% / +0.54% +0.66% +0.91%] index_copy_ strided 3 : Elapsed 0.024 ms (2.421 ms / 100) 2.411 -> 2.420 ( +0.37%) [ +0.04% +0.00% +0.00% / +0.37% +0.54% +0.41%] index_add_ strided 7 : Elapsed 0.024 ms (2.412 ms / 100) 2.422 -> 2.430 ( +0.33%) [ +0.25% +0.00% +0.08% / +0.33% +0.54% +0.58%] index_copy_ strided 7 : Elapsed 0.024 ms (2.428 ms / 100) 2.415 -> 2.416 ( +0.04%) [ +0.12% +0.08% +0.00% / +0.50% +0.17% +0.04%] index_add_ perm : Elapsed 0.024 ms (2.418 ms / 100) 2.426 -> 2.429 ( +0.12%) [ +0.08% +0.00% +0.00% / +0.41% +0.33% +0.12%] index_copy_ perm : Elapsed 0.024 ms (2.428 ms / 100) 2.410 -> 2.416 ( +0.25%) [ +0.00% +0.12% +0.12% / +0.58% +0.25% +0.33%] index_add_ perm_sorted : Elapsed 0.024 ms (2.410 ms / 100) 2.425 -> 2.436 ( +0.45%) [ +0.04% +0.00% +0.08% / +0.66% +0.45% +0.45%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.426 ms / 100) 4.431 -> 4.429 ( -0.05%) [ +0.00% +0.11% +0.07% / +0.02% +0.00% -0.05%] index_select const : Elapsed 0.044 ms (4.431 ms / 100) 4.439 -> 4.434 ( -0.11%) [ +0.09% +0.00% +0.02% / -0.11% -0.07% -0.05%] index_select wrap : Elapsed 0.044 ms (4.443 ms / 100) 4.441 -> 4.432 ( -0.20%) [ +0.02% +0.00% +0.00% / +0.02% -0.20% +0.05%] index_select linear : Elapsed 0.044 ms (4.442 ms / 100) 4.437 -> 4.431 ( -0.14%) [ +0.27% +0.23% +0.00% / -0.02% -0.09% -0.14%] index_select reverse : Elapsed 0.044 ms (4.449 ms / 100) 4.427 -> 4.426 ( -0.02%) [ +0.00% +0.07% +0.16% / +0.05% -0.02% +0.16%] index_select skip64 : Elapsed 0.044 ms (4.427 ms / 100) 4.428 -> 4.423 ( -0.11%) [ +0.05% +0.00% +0.00% / +0.07% +0.02% -0.11%] index_select skip256 : Elapsed 0.044 ms (4.430 ms / 100) 4.435 -> 4.435 ( +0.00%) [ +0.07% +0.09% +0.00% / +0.00% +0.00% +0.07%] index_select spread : Elapsed 0.044 ms (4.438 ms / 100) 4.436 -> 4.437 ( +0.02%) [ +0.05% +0.00% +0.09% / +0.09% +0.05% +0.02%] index_select strided 3 : Elapsed 0.044 ms (4.438 ms / 100) 4.440 -> 4.436 ( -0.09%) [ +0.00% +0.00% +0.00% / +0.07% -0.09% -0.02%] index_select strided 5 : Elapsed 0.044 ms (4.440 ms / 100) 4.431 -> 4.441 ( +0.23%) [ +0.00% +0.18% +0.11% / +0.23% +0.25% +0.25%] index_select strided 7 : Elapsed 0.044 ms (4.431 ms / 100) 4.436 -> 4.432 ( -0.09%) [ +0.07% +0.00% +0.05% / +0.07% -0.05% -0.09%] index_select strided 8 : Elapsed 0.044 ms (4.439 ms / 100) 4.434 -> 4.434 ( +0.00%) [ +0.16% +0.07% +0.00% / +0.11% +0.00% +0.02%] index_select strided 16 : Elapsed 0.044 ms (4.441 ms / 100) 4.440 -> 4.440 ( +0.00%) [ +0.09% +0.05% +0.00% / +0.02% +0.00% +0.05%] index_select random : Elapsed 0.044 ms (4.444 ms / 100) 4.441 -> 4.430 ( -0.25%) [ +0.05% +0.09% +0.00% / +0.00% -0.25% -0.09%] index_select random_sorted : Elapsed 0.044 ms (4.443 ms / 100) B = [5, 16, 40, 4] (stride (640, 1, 16, 3200)) A = [5, 16, 20, 4] (stride (4, 20, 320, 1)) dim = 2 2.450 -> 2.461 ( +0.45%) [ +0.12% +0.16% +0.00% / +0.45% +0.78% +0.82%] index_add_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.443 -> 2.459 ( +0.65%) [ +0.00% +0.00% +0.08% / +0.65% +0.78% +1.02%] index_copy_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.451 -> 2.466 ( +0.61%) [ +0.37% +0.00% +0.24% / +0.65% +0.61% +0.82%] index_add_ reverse : Elapsed 0.025 ms (2.460 ms / 100) 2.444 -> 2.460 ( +0.65%) [ +0.29% +0.08% +0.00% / +0.70% +0.70% +0.65%] index_copy_ reverse : Elapsed 0.025 ms (2.451 ms / 100) 2.455 -> 2.468 ( +0.53%) [ +0.00% +0.04% +0.16% / +0.77% +0.65% +0.53%] index_add_ spread : Elapsed 0.025 ms (2.455 ms / 100) 2.447 -> 2.461 ( +0.57%) [ +0.12% +0.00% +0.20% / +0.57% +0.78% +0.82%] index_copy_ spread : Elapsed 0.025 ms (2.450 ms / 100) 2.455 -> 2.467 ( +0.49%) [ +0.00% +0.24% +0.24% / +0.77% +0.49% +0.81%] index_add_ strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.446 -> 2.458 ( +0.49%) [ +0.00% +0.16% +0.04% / +0.78% +0.49% +1.02%] index_copy_ strided 3 : Elapsed 0.024 ms (2.446 ms / 100) 2.456 -> 2.470 ( +0.57%) [ +0.12% +0.16% +0.00% / +0.61% +0.57% +0.65%] index_add_ strided 7 : Elapsed 0.025 ms (2.459 ms / 100) 2.446 -> 2.463 ( +0.70%) [ +0.00% +0.20% +0.08% / +0.70% +0.70% +0.82%] index_copy_ strided 7 : Elapsed 0.024 ms (2.446 ms / 100) 2.458 -> 2.465 ( +0.28%) [ +0.00% +0.04% +0.00% / +0.77% +0.28% +0.33%] index_add_ perm : Elapsed 0.025 ms (2.458 ms / 100) 2.448 -> 2.461 ( +0.53%) [ +0.08% +0.08% +0.00% / +0.74% +0.53% +0.69%] index_copy_ perm : Elapsed 0.025 ms (2.450 ms / 100) 2.454 -> 2.469 ( +0.61%) [ +0.16% +0.04% +0.00% / +0.65% +0.61% +0.77%] index_add_ perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) 2.446 -> 2.461 ( +0.61%) [ +0.00% +0.04% +0.04% / +0.65% +0.74% +0.61%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.446 ms / 100) 4.491 -> 4.491 ( +0.00%) [ +0.09% +0.04% +0.00% / +0.16% +0.00% +0.11%] index_select const : Elapsed 0.045 ms (4.495 ms / 100) 4.500 -> 4.499 ( -0.02%) [ +0.00% +0.02% +0.04% / -0.02% +0.16% +0.16%] index_select wrap : Elapsed 0.045 ms (4.500 ms / 100) 4.497 -> 4.501 ( +0.09%) [ +0.11% +0.00% +0.07% / +0.09% +0.13% +0.13%] index_select linear : Elapsed 0.045 ms (4.502 ms / 100) 4.501 -> 4.505 ( +0.09%) [ +0.04% +0.00% +0.04% / +0.11% +0.09% +0.24%] index_select reverse : Elapsed 0.045 ms (4.503 ms / 100) 4.488 -> 4.487 ( -0.02%) [ +0.22% +0.00% +0.00% / +0.18% -0.02% +0.31%] index_select skip64 : Elapsed 0.045 ms (4.498 ms / 100) 4.495 -> 4.493 ( -0.04%) [ +0.00% +0.00% +0.04% / +0.11% -0.04% +0.11%] index_select skip256 : Elapsed 0.045 ms (4.495 ms / 100) 4.493 -> 4.501 ( +0.18%) [ +0.00% +0.24% +0.22% / +0.18% +0.36% +0.29%] index_select spread : Elapsed 0.045 ms (4.493 ms / 100) 4.496 -> 4.502 ( +0.13%) [ +0.07% +0.11% +0.00% / +0.16% +0.22% +0.13%] index_select strided 3 : Elapsed 0.045 ms (4.499 ms / 100) 4.493 -> 4.492 ( -0.02%) [ +0.13% +0.00% +0.13% / +0.09% -0.02% +0.16%] index_select strided 5 : Elapsed 0.045 ms (4.499 ms / 100) 4.493 -> 4.498 ( +0.11%) [ +0.27% +0.16% +0.00% / +0.11% +0.33% +0.40%] index_select strided 7 : Elapsed 0.045 ms (4.505 ms / 100) 4.493 -> 4.498 ( +0.11%) [ +0.00% +0.09% +0.00% / +0.22% +0.11% +0.18%] index_select strided 8 : Elapsed 0.045 ms (4.493 ms / 100) 4.493 -> 4.494 ( +0.02%) [ +0.00% +0.11% +0.11% / +0.02% +0.09% +0.11%] index_select strided 16 : Elapsed 0.045 ms (4.493 ms / 100) 4.496 -> 4.496 ( +0.00%) [ +0.00% +0.02% +0.04% / +0.00% +0.11% +0.09%] index_select random : Elapsed 0.045 ms (4.496 ms / 100) 4.489 -> 4.499 ( +0.22%) [ +0.18% +0.00% +0.16% / +0.22% +0.40% +0.40%] index_select random_sorted : Elapsed 0.045 ms (4.497 ms / 100) B = [5, 16, 40, 4] (stride (16, 1, 80, 3200)) A = [5, 16, 20, 4] (stride (1280, 1, 16, 320)) dim = 2 2.423 -> 2.436 ( +0.54%) [ +0.25% +0.12% +0.00% / +0.54% +0.58% +0.54%] index_add_ linear : Elapsed 0.024 ms (2.429 ms / 100) 2.406 -> 2.422 ( +0.67%) [ +0.00% +0.00% +0.12% / +0.87% +0.67% +0.87%] index_copy_ linear : Elapsed 0.024 ms (2.406 ms / 100) 2.416 -> 2.429 ( +0.54%) [ +0.04% +0.12% +0.00% / +0.54% +0.91% +0.83%] index_add_ reverse : Elapsed 0.024 ms (2.417 ms / 100) 2.399 -> 2.412 ( +0.54%) [ +0.04% +0.04% +0.00% / +0.54% +1.13% +1.17%] index_copy_ reverse : Elapsed 0.024 ms (2.400 ms / 100) 2.410 -> 2.436 ( +1.08%) [ +0.25% +0.17% +0.00% / +1.08% +1.20% +1.24%] index_add_ spread : Elapsed 0.024 ms (2.416 ms / 100) 2.398 -> 2.415 ( +0.71%) [ +0.08% +0.08% +0.00% / +0.71% +0.92% +1.08%] index_copy_ spread : Elapsed 0.024 ms (2.400 ms / 100) 2.417 -> 2.436 ( +0.79%) [ +0.25% +0.21% +0.00% / +0.79% +0.91% +0.79%] index_add_ strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.406 -> 2.420 ( +0.58%) [ +0.00% +0.08% +0.04% / +0.58% +0.79% +0.83%] index_copy_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.423 -> 2.429 ( +0.25%) [ +0.00% +0.00% +0.08% / +0.25% +0.50% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.423 ms / 100) 2.404 -> 2.415 ( +0.46%) [ +0.00% +0.00% +0.12% / +0.46% +0.87% +0.92%] index_copy_ strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.420 -> 2.429 ( +0.37%) [ +0.00% +0.21% +0.08% / +0.54% +0.37% +0.41%] index_add_ perm : Elapsed 0.024 ms (2.420 ms / 100) 2.404 -> 2.416 ( +0.50%) [ +0.00% +0.04% +0.12% / +0.54% +0.50% +0.54%] index_copy_ perm : Elapsed 0.024 ms (2.404 ms / 100) 2.423 -> 2.426 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.70% +0.25% +0.12%] index_add_ perm_sorted : Elapsed 0.024 ms (2.425 ms / 100) 2.404 -> 2.416 ( +0.50%) [ +0.08% +0.04% +0.00% / +1.04% +0.50% +0.67%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) 4.417 -> 4.418 ( +0.02%) [ +0.14% +0.00% +0.02% / +0.02% +0.20% +0.02%] index_select const : Elapsed 0.044 ms (4.423 ms / 100) 4.437 -> 4.436 ( -0.02%) [ +0.05% +0.00% +0.05% / +0.14% -0.02% -0.02%] index_select wrap : Elapsed 0.044 ms (4.439 ms / 100) 4.436 -> 4.431 ( -0.11%) [ +0.00% +0.02% +0.07% / +0.25% +0.11% -0.11%] index_select linear : Elapsed 0.044 ms (4.436 ms / 100) 4.438 -> 4.434 ( -0.09%) [ +0.09% +0.16% +0.00% / +0.11% -0.09% +0.09%] index_select reverse : Elapsed 0.044 ms (4.442 ms / 100) 4.421 -> 4.414 ( -0.16%) [ +0.00% +0.14% +0.20% / -0.05% -0.16% -0.05%] index_select skip64 : Elapsed 0.044 ms (4.421 ms / 100) 4.419 -> 4.413 ( -0.14%) [ +0.18% +0.00% +0.07% / -0.14% +0.20% +0.05%] index_select skip256 : Elapsed 0.044 ms (4.427 ms / 100) 4.434 -> 4.440 ( +0.14%) [ +0.09% +0.07% +0.00% / +0.14% +0.16% +0.23%] index_select spread : Elapsed 0.044 ms (4.438 ms / 100) 4.426 -> 4.430 ( +0.09%) [ +0.00% +0.18% +0.34% / +0.09% +0.38% +0.25%] index_select strided 3 : Elapsed 0.044 ms (4.426 ms / 100) 4.420 -> 4.419 ( -0.02%) [ +0.00% +0.41% +0.16% / -0.02% +0.16% +0.14%] index_select strided 5 : Elapsed 0.044 ms (4.420 ms / 100) 4.434 -> 4.438 ( +0.09%) [ +0.00% +0.18% +0.20% / +0.09% +0.11% +0.25%] index_select strided 7 : Elapsed 0.044 ms (4.434 ms / 100) 4.417 -> 4.426 ( +0.20%) [ +0.00% +0.27% +0.23% / +0.20% +0.38% +0.48%] index_select strided 8 : Elapsed 0.044 ms (4.417 ms / 100) 4.422 -> 4.425 ( +0.07%) [ +0.09% +0.00% +0.00% / +0.11% +0.07% +0.14%] index_select strided 16 : Elapsed 0.044 ms (4.426 ms / 100) 4.435 -> 4.433 ( -0.05%) [ +0.02% +0.00% +0.16% / +0.11% -0.05% +0.00%] index_select random : Elapsed 0.044 ms (4.436 ms / 100) 4.436 -> 4.430 ( -0.14%) [ +0.07% +0.00% +0.29% / +0.23% -0.14% +0.20%] index_select random_sorted : Elapsed 0.044 ms (4.439 ms / 100) B = [5, 16, 40, 4] (stride (1, 5, 80, 3200)) A = [5, 16, 20, 4] (stride (1280, 80, 4, 1)) dim = 2 2.413 -> 2.417 ( +0.17%) [ +0.12% +0.04% +0.00% / +0.25% +0.25% +0.17%] index_add_ linear : Elapsed 0.024 ms (2.416 ms / 100) 2.409 -> 2.415 ( +0.25%) [ +0.08% +0.00% +0.00% / +0.25% +0.46% +0.42%] index_copy_ linear : Elapsed 0.024 ms (2.411 ms / 100) 2.412 -> 2.418 ( +0.25%) [ +0.08% +0.04% +0.00% / +0.33% +0.25% +0.37%] index_add_ reverse : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.417 ( +0.29%) [ +0.08% +0.17% +0.00% / +0.41% +0.29% +0.33%] index_copy_ reverse : Elapsed 0.024 ms (2.412 ms / 100) 2.406 -> 2.423 ( +0.71%) [ +0.21% +0.04% +0.00% / +0.71% +0.75% +1.08%] index_add_ spread : Elapsed 0.024 ms (2.411 ms / 100) 2.407 -> 2.422 ( +0.62%) [ +0.00% +0.17% +0.08% / +0.62% +0.83% +1.12%] index_copy_ spread : Elapsed 0.024 ms (2.407 ms / 100) 2.403 -> 2.416 ( +0.54%) [ +0.25% +0.00% +0.33% / +0.75% +0.67% +0.54%] index_add_ strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.408 -> 2.417 ( +0.37%) [ +0.04% +0.00% +0.04% / +0.54% +0.37% +0.46%] index_copy_ strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.407 -> 2.416 ( +0.37%) [ +0.04% +0.00% +0.08% / +0.50% +0.42% +0.37%] index_add_ strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.409 -> 2.416 ( +0.29%) [ +0.00% +0.04% +0.17% / +0.50% +0.46% +0.29%] index_copy_ strided 7 : Elapsed 0.024 ms (2.409 ms / 100) 2.405 -> 2.420 ( +0.62%) [ +0.12% +0.04% +0.00% / +0.62% +0.75% +1.25%] index_add_ perm : Elapsed 0.024 ms (2.408 ms / 100) 2.404 -> 2.424 ( +0.83%) [ +0.17% +0.00% +0.17% / +0.83% +0.83% +1.00%] index_copy_ perm : Elapsed 0.024 ms (2.408 ms / 100) 2.409 -> 2.417 ( +0.33%) [ +0.08% +0.00% +0.12% / +0.33% +0.42% +0.54%] index_add_ perm_sorted : Elapsed 0.024 ms (2.411 ms / 100) 2.409 -> 2.420 ( +0.46%) [ +0.00% +0.04% +0.00% / +0.58% +0.58% +0.46%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.409 ms / 100) 4.431 -> 4.434 ( +0.07%) [ +0.07% +0.00% +0.09% / +0.07% +0.18% +0.11%] index_select const : Elapsed 0.044 ms (4.434 ms / 100) 4.443 -> 4.444 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.05% +0.14% +0.02%] index_select wrap : Elapsed 0.044 ms (4.443 ms / 100) 4.438 -> 4.447 ( +0.20%) [ +0.00% +0.14% +0.09% / +0.20% +0.25% +0.23%] index_select linear : Elapsed 0.044 ms (4.438 ms / 100) 4.440 -> 4.446 ( +0.14%) [ +0.00% +0.05% +0.27% / +0.14% +0.20% +0.18%] index_select reverse : Elapsed 0.044 ms (4.440 ms / 100) 4.426 -> 4.437 ( +0.25%) [ +0.00% +0.23% +0.23% / +0.25% +0.36% +0.34%] index_select skip64 : Elapsed 0.044 ms (4.426 ms / 100) 4.433 -> 4.436 ( +0.07%) [ +0.14% +0.02% +0.00% / +0.11% +0.09% +0.07%] index_select skip256 : Elapsed 0.044 ms (4.439 ms / 100) 4.438 -> 4.444 ( +0.14%) [ +0.20% +0.16% +0.00% / +0.14% +0.29% +0.27%] index_select spread : Elapsed 0.044 ms (4.447 ms / 100) 4.442 -> 4.447 ( +0.11%) [ +0.00% +0.11% +0.09% / +0.11% +0.14% +0.25%] index_select strided 3 : Elapsed 0.044 ms (4.442 ms / 100) 4.433 -> 4.435 ( +0.05%) [ +0.14% +0.00% +0.20% / +0.05% +0.05% +0.23%] index_select strided 5 : Elapsed 0.044 ms (4.439 ms / 100) 4.438 -> 4.443 ( +0.11%) [ +0.07% +0.32% +0.00% / +0.11% +0.16% +0.23%] index_select strided 7 : Elapsed 0.044 ms (4.441 ms / 100) 4.432 -> 4.436 ( +0.09%) [ +0.14% +0.09% +0.00% / +0.11% +0.27% +0.09%] index_select strided 8 : Elapsed 0.044 ms (4.438 ms / 100) 4.435 -> 4.440 ( +0.11%) [ +0.00% +0.18% +0.00% / +0.11% +0.20% +0.14%] index_select strided 16 : Elapsed 0.044 ms (4.435 ms / 100) 4.445 -> 4.448 ( +0.07%) [ +0.00% +0.16% +0.04% / +0.07% +0.16% +0.18%] index_select random : Elapsed 0.044 ms (4.445 ms / 100) 4.439 -> 4.436 ( -0.07%) [ +0.09% +0.00% +0.00% / -0.07% +0.02% +0.34%] index_select random_sorted : Elapsed 0.044 ms (4.443 ms / 100) B = [5, 16, 40, 4] (stride (1, 5, 80, 3200)) A = [5, 16, 20, 4] (stride (1280, 80, 1, 20)) dim = 2 2.406 -> 2.416 ( +0.42%) [ +0.00% +0.17% +0.17% / +0.42% +0.42% +0.50%] index_add_ linear : Elapsed 0.024 ms (2.406 ms / 100) 2.406 -> 2.420 ( +0.58%) [ +0.08% +0.04% +0.00% / +0.58% +0.62% +0.58%] index_copy_ linear : Elapsed 0.024 ms (2.408 ms / 100) 2.404 -> 2.420 ( +0.67%) [ +0.00% +0.12% +0.00% / +0.79% +0.87% +0.67%] index_add_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.397 -> 2.422 ( +1.04%) [ +0.00% +0.04% +0.21% / +1.25% +1.25% +1.04%] index_copy_ reverse : Elapsed 0.024 ms (2.397 ms / 100) 2.399 -> 2.413 ( +0.58%) [ +0.00% +0.00% +0.00% / +0.58% +0.96% +1.04%] index_add_ spread : Elapsed 0.024 ms (2.399 ms / 100) 2.397 -> 2.418 ( +0.88%) [ +0.08% +0.00% +0.04% / +0.88% +1.21% +1.08%] index_copy_ spread : Elapsed 0.024 ms (2.399 ms / 100) 2.406 -> 2.415 ( +0.37%) [ +0.00% +0.04% +0.00% / +0.71% +0.37% +0.46%] index_add_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.402 -> 2.420 ( +0.75%) [ +0.21% +0.00% +0.21% / +0.75% +0.83% +0.79%] index_copy_ strided 3 : Elapsed 0.024 ms (2.407 ms / 100) 2.405 -> 2.419 ( +0.58%) [ +0.00% +0.00% +0.17% / +0.71% +0.58% +0.67%] index_add_ strided 7 : Elapsed 0.024 ms (2.405 ms / 100) 2.403 -> 2.417 ( +0.58%) [ +0.00% +0.08% +0.25% / +1.58% +0.58% +0.92%] index_copy_ strided 7 : Elapsed 0.024 ms (2.403 ms / 100) 2.405 -> 2.415 ( +0.42%) [ +0.21% +0.00% +0.04% / +0.67% +0.50% +0.42%] index_add_ perm : Elapsed 0.024 ms (2.410 ms / 100) 2.406 -> 2.414 ( +0.33%) [ +0.00% +0.17% +0.12% / +0.58% +0.33% +0.33%] index_copy_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.406 -> 2.409 ( +0.12%) [ +0.00% +0.12% +0.25% / +0.87% +0.12% +0.29%] index_add_ perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) 2.403 -> 2.414 ( +0.46%) [ +0.00% +0.42% +0.25% / +0.96% +0.46% +0.67%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) 4.434 -> 4.427 ( -0.16%) [ +0.18% +0.00% +0.18% / +0.23% -0.16% +0.11%] index_select const : Elapsed 0.044 ms (4.442 ms / 100) 4.436 -> 4.439 ( +0.07%) [ +0.11% +0.25% +0.00% / +0.07% +0.25% +0.20%] index_select wrap : Elapsed 0.044 ms (4.441 ms / 100) 4.438 -> 4.443 ( +0.11%) [ +0.16% +0.09% +0.00% / +0.25% +0.11% +0.25%] index_select linear : Elapsed 0.044 ms (4.445 ms / 100) 4.448 -> 4.443 ( -0.11%) [ +0.00% +0.00% +0.04% / -0.11% -0.09% -0.11%] index_select reverse : Elapsed 0.044 ms (4.448 ms / 100) 4.435 -> 4.435 ( +0.00%) [ +0.00% +0.09% +0.07% / +0.16% +0.00% +0.18%] index_select skip64 : Elapsed 0.044 ms (4.435 ms / 100) 4.433 -> 4.437 ( +0.09%) [ +0.02% +0.16% +0.00% / +0.09% +0.20% +0.18%] index_select skip256 : Elapsed 0.044 ms (4.434 ms / 100) 4.441 -> 4.435 ( -0.14%) [ +0.05% +0.00% +0.07% / -0.14% +0.02% +0.11%] index_select spread : Elapsed 0.044 ms (4.443 ms / 100) 4.441 -> 4.443 ( +0.05%) [ +0.00% +0.16% +0.07% / +0.14% +0.05% +0.16%] index_select strided 3 : Elapsed 0.044 ms (4.441 ms / 100) 4.438 -> 4.442 ( +0.09%) [ +0.02% +0.00% +0.14% / +0.16% +0.11% +0.09%] index_select strided 5 : Elapsed 0.044 ms (4.439 ms / 100) 4.436 -> 4.441 ( +0.11%) [ +0.18% +0.11% +0.00% / +0.14% +0.11% +0.11%] index_select strided 7 : Elapsed 0.044 ms (4.444 ms / 100) 4.443 -> 4.446 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.07% +0.07%] index_select strided 8 : Elapsed 0.044 ms (4.445 ms / 100) 4.442 -> 4.441 ( -0.02%) [ +0.00% +0.09% +0.09% / +0.14% -0.02% +0.02%] index_select strided 16 : Elapsed 0.044 ms (4.442 ms / 100) 4.445 -> 4.444 ( -0.02%) [ +0.07% +0.11% +0.00% / +0.02% -0.02% +0.02%] index_select random : Elapsed 0.044 ms (4.448 ms / 100) 4.442 -> 4.437 ( -0.11%) [ +0.00% +0.09% +0.18% / +0.14% -0.11% +0.16%] index_select random_sorted : Elapsed 0.044 ms (4.442 ms / 100) out_shape = [5, 16, 20, 40] in_shape = [5, 16, 20, 4] idx_dim = 3 B = [5, 16, 20, 40] (stride (12800, 40, 640, 1)) dim = 3 fill_cnt = 4 0.802 -> 0.802 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.50%] index_fill_ const : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.802 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.37% +0.37%] index_fill_ linear : Elapsed 0.008 ms (0.803 ms / 100) 0.802 -> 0.803 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.12% +0.37% +0.62%] index_fill_ reverse : Elapsed 0.008 ms (0.803 ms / 100) 0.802 -> 0.803 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.37% +0.37%] index_fill_ skip64 : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.803 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.37% +0.50%] index_fill_ skip256 : Elapsed 0.008 ms (0.803 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.37% +0.25%] index_fill_ spread : Elapsed 0.008 ms (0.804 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.12% +0.25%] index_fill_ strided 3 : Elapsed 0.008 ms (0.803 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.25% +0.25%] index_fill_ strided 5 : Elapsed 0.008 ms (0.804 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.25% +0.37%] index_fill_ strided 7 : Elapsed 0.008 ms (0.804 ms / 100) 0.802 -> 0.803 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.50% +0.75%] index_fill_ strided 8 : Elapsed 0.008 ms (0.803 ms / 100) 0.803 -> 0.804 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.50% +0.50%] index_fill_ strided 16 : Elapsed 0.008 ms (0.803 ms / 100) 0.802 -> 0.804 ( +0.25%) [ +0.37% +0.12% +0.00% / +0.25% +0.37% +0.37%] index_fill_ random : Elapsed 0.008 ms (0.805 ms / 100) 0.802 -> 0.803 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +0.37% +0.50%] index_fill_ random_sorted : Elapsed 0.008 ms (0.803 ms / 100) 0.804 -> 0.803 ( -0.12%) [ +0.00% +0.00% +0.12% / -0.12% +0.12% +0.25%] index_fill_ perm : Elapsed 0.008 ms (0.804 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.25% +0.12%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.803 ms / 100) B = [5, 16, 20, 40] (stride (1, 4000, 5, 100)) A = [5, 16, 20, 4] (stride (1280, 1, 64, 16)) dim = 3 1.357 -> 1.358 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.52% +0.44%] index_add_ linear : Elapsed 0.014 ms (1.358 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.08% +0.00% +0.00% / +0.15% +0.53% +0.53%] index_copy_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.347 -> 1.347 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_add_ reverse : Elapsed 0.013 ms (1.347 ms / 100) 1.309 -> 1.311 ( +0.15%) [ +0.08% +0.15% +0.00% / +0.15% +0.53% +0.61%] index_copy_ reverse : Elapsed 0.013 ms (1.310 ms / 100) 1.348 -> 1.349 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.59% +0.67%] index_add_ spread : Elapsed 0.013 ms (1.350 ms / 100) 1.313 -> 1.312 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.30% +0.23%] index_copy_ spread : Elapsed 0.013 ms (1.313 ms / 100) 1.356 -> 1.355 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.81% +0.74%] index_add_ strided 3 : Elapsed 0.014 ms (1.357 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.00% +0.23% +0.15% / +0.00% +0.84% +0.76%] index_copy_ strided 3 : Elapsed 0.013 ms (1.317 ms / 100) 1.355 -> 1.355 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.014 ms (1.355 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.00% +0.08% +0.30% / +0.00% +0.68% +0.53%] index_copy_ strided 7 : Elapsed 0.013 ms (1.316 ms / 100) 1.348 -> 1.348 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.59% +0.59%] index_add_ perm : Elapsed 0.013 ms (1.350 ms / 100) 1.310 -> 1.315 ( +0.38%) [ +0.08% +0.23% +0.00% / +0.46% +0.53% +0.38%] index_copy_ perm : Elapsed 0.013 ms (1.311 ms / 100) 1.346 -> 1.349 ( +0.22%) [ +0.00% +0.00% +0.00% / +0.22% +0.67% +0.89%] index_add_ perm_sorted : Elapsed 0.013 ms (1.346 ms / 100) 1.310 -> 1.315 ( +0.38%) [ +0.31% +0.15% +0.00% / +0.69% +0.38% +0.53%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.314 ms / 100) 9.176 -> 9.200 ( +0.26%) [ +0.00% +0.40% +0.13% / +0.26% +0.62% +0.27%] index_select const : Elapsed 0.092 ms (9.176 ms / 100) 9.196 -> 9.198 ( +0.02%) [ +0.25% +0.00% +0.20% / +0.02% +0.21% +0.18%] index_select wrap : Elapsed 0.092 ms (9.219 ms / 100) 9.184 -> 9.203 ( +0.21%) [ +0.00% +0.17% +0.09% / +0.38% +0.33% +0.21%] index_select linear : Elapsed 0.092 ms (9.184 ms / 100) 9.189 -> 9.191 ( +0.02%) [ +0.09% +0.24% +0.00% / +0.08% +0.02% +0.12%] index_select reverse : Elapsed 0.092 ms (9.197 ms / 100) 9.194 -> 9.189 ( -0.05%) [ +0.09% +0.03% +0.00% / -0.03% -0.05% +0.05%] index_select skip64 : Elapsed 0.092 ms (9.202 ms / 100) 9.176 -> 9.181 ( +0.05%) [ +0.00% +0.38% +0.04% / +0.05% +0.40% +0.20%] index_select skip256 : Elapsed 0.092 ms (9.176 ms / 100) 9.196 -> 9.201 ( +0.05%) [ +0.00% +0.05% +0.25% / +0.05% +0.40% +0.35%] index_select spread : Elapsed 0.092 ms (9.196 ms / 100) 9.213 -> 9.206 ( -0.08%) [ +0.01% +0.00% +0.01% / -0.08% +0.05% +0.01%] index_select strided 3 : Elapsed 0.092 ms (9.214 ms / 100) 9.207 -> 9.228 ( +0.23%) [ +0.00% +0.08% +0.09% / +0.25% +0.23% +0.25%] index_select random : Elapsed 0.092 ms (9.207 ms / 100) 9.194 -> 9.198 ( +0.04%) [ +0.00% +0.23% +0.14% / +0.04% +0.36% +0.22%] index_select random_sorted : Elapsed 0.092 ms (9.194 ms / 100) B = [5, 16, 20, 40] (stride (640, 40, 3200, 1)) A = [5, 16, 20, 4] (stride (1, 5, 80, 1600)) dim = 3 1.285 -> 1.285 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.47% +0.54%] index_add_ linear : Elapsed 0.013 ms (1.286 ms / 100) 1.242 -> 1.242 ( +0.00%) [ +0.00% +0.08% +0.16% / +0.00% +0.64% +0.72%] index_copy_ linear : Elapsed 0.012 ms (1.242 ms / 100) 1.285 -> 1.286 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.31% +0.54%] index_add_ reverse : Elapsed 0.013 ms (1.285 ms / 100) 1.244 -> 1.243 ( -0.08%) [ +0.16% +0.08% +0.00% / -0.08% +0.24% +0.40%] index_copy_ reverse : Elapsed 0.012 ms (1.246 ms / 100) 1.295 -> 1.293 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.23% +0.08%] index_add_ spread : Elapsed 0.013 ms (1.295 ms / 100) 1.251 -> 1.253 ( +0.16%) [ +0.00% +0.00% +0.16% / +0.24% +0.16% +0.24%] index_copy_ spread : Elapsed 0.013 ms (1.251 ms / 100) 1.291 -> 1.292 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.54% +0.46%] index_add_ strided 3 : Elapsed 0.013 ms (1.292 ms / 100) 1.247 -> 1.253 ( +0.48%) [ +0.16% +0.40% +0.00% / +0.48% +0.72% +0.72%] index_copy_ strided 3 : Elapsed 0.012 ms (1.249 ms / 100) 1.295 -> 1.294 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.54% +0.46%] index_add_ strided 7 : Elapsed 0.013 ms (1.295 ms / 100) 1.252 -> 1.255 ( +0.24%) [ +0.08% +0.00% +0.00% / +0.24% +0.64% +0.72%] index_copy_ strided 7 : Elapsed 0.013 ms (1.253 ms / 100) 1.292 -> 1.292 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.08% +0.00% +0.39%] index_add_ perm : Elapsed 0.013 ms (1.294 ms / 100) 1.249 -> 1.249 ( +0.00%) [ +0.00% +0.24% +0.16% / +0.08% +0.00% +0.24%] index_copy_ perm : Elapsed 0.012 ms (1.249 ms / 100) 1.287 -> 1.291 ( +0.31%) [ +0.39% +0.47% +0.00% / +0.31% +0.47% +0.62%] index_add_ perm_sorted : Elapsed 0.013 ms (1.292 ms / 100) 1.247 -> 1.250 ( +0.24%) [ +0.08% +0.16% +0.00% / +0.24% +0.32% +0.40%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.248 ms / 100) 8.776 -> 8.779 ( +0.03%) [ +0.00% +0.10% +0.19% / +0.03% +0.05% +0.24%] index_select const : Elapsed 0.088 ms (8.776 ms / 100) 8.790 -> 8.788 ( -0.02%) [ +0.00% +0.39% +0.00% / +0.09% -0.02% +0.27%] index_select wrap : Elapsed 0.088 ms (8.790 ms / 100) 8.794 -> 8.792 ( -0.02%) [ +0.00% +0.36% +0.02% / +0.00% -0.02% +0.16%] index_select linear : Elapsed 0.088 ms (8.794 ms / 100) 8.785 -> 8.793 ( +0.09%) [ +0.00% +0.07% +0.18% / +0.09% +0.09% +0.25%] index_select reverse : Elapsed 0.088 ms (8.785 ms / 100) 8.777 -> 8.769 ( -0.09%) [ +0.00% +0.09% +0.08% / -0.09% +0.11% +0.13%] index_select skip64 : Elapsed 0.088 ms (8.777 ms / 100) 8.772 -> 8.783 ( +0.13%) [ +0.00% +0.13% +0.22% / +0.13% +0.18% +0.21%] index_select skip256 : Elapsed 0.088 ms (8.772 ms / 100) 8.801 -> 8.778 ( -0.26%) [ +0.05% +0.22% +0.00% / +0.02% -0.26% -0.05%] index_select spread : Elapsed 0.088 ms (8.805 ms / 100) 8.800 -> 8.788 ( -0.14%) [ +0.05% +0.00% +0.10% / +0.01% -0.14% +0.01%] index_select strided 3 : Elapsed 0.088 ms (8.804 ms / 100) 8.805 -> 8.800 ( -0.06%) [ +0.05% +0.00% +0.17% / +0.02% -0.02% -0.06%] index_select random : Elapsed 0.088 ms (8.809 ms / 100) 8.790 -> 8.802 ( +0.14%) [ +0.00% +0.35% +0.34% / +0.35% +0.27% +0.14%] index_select random_sorted : Elapsed 0.088 ms (8.790 ms / 100) B = [5, 16, 20, 40] (stride (20, 100, 1, 1600)) A = [5, 16, 20, 4] (stride (1280, 1, 64, 16)) dim = 3 1.355 -> 1.356 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.59%] index_add_ linear : Elapsed 0.014 ms (1.356 ms / 100) 1.317 -> 1.320 ( +0.23%) [ +0.00% +0.00% +0.00% / +0.23% +0.53% +0.53%] index_copy_ linear : Elapsed 0.013 ms (1.317 ms / 100) 1.345 -> 1.344 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.59% +0.52%] index_add_ reverse : Elapsed 0.013 ms (1.346 ms / 100) 1.309 -> 1.314 ( +0.38%) [ +0.00% +0.08% +0.00% / +0.38% +0.61% +0.53%] index_copy_ reverse : Elapsed 0.013 ms (1.309 ms / 100) 1.346 -> 1.347 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.74% +0.74%] index_add_ spread : Elapsed 0.013 ms (1.347 ms / 100) 1.307 -> 1.316 ( +0.69%) [ +0.00% +0.23% +0.31% / +1.15% +0.69% +0.69%] index_copy_ spread : Elapsed 0.013 ms (1.307 ms / 100) 1.355 -> 1.355 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.81% +0.74%] index_add_ strided 3 : Elapsed 0.014 ms (1.356 ms / 100) 1.315 -> 1.316 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.84% +0.76%] index_copy_ strided 3 : Elapsed 0.013 ms (1.315 ms / 100) 1.354 -> 1.356 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.59% +0.74%] index_add_ strided 7 : Elapsed 0.014 ms (1.355 ms / 100) 1.315 -> 1.318 ( +0.23%) [ +0.00% +0.15% +0.15% / +0.23% +0.61% +0.61%] index_copy_ strided 7 : Elapsed 0.013 ms (1.315 ms / 100) 1.347 -> 1.346 ( -0.07%) [ +0.15% +0.07% +0.00% / -0.07% +0.59% +0.59%] index_add_ perm : Elapsed 0.013 ms (1.349 ms / 100) 1.309 -> 1.313 ( +0.31%) [ +0.15% +0.00% +0.08% / +0.31% +0.61% +0.53%] index_copy_ perm : Elapsed 0.013 ms (1.311 ms / 100) 1.345 -> 1.345 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.74%] index_add_ perm_sorted : Elapsed 0.013 ms (1.345 ms / 100) 1.309 -> 1.309 ( +0.00%) [ +0.00% +0.38% +0.15% / +0.00% +0.46% +0.53%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.309 ms / 100) 9.148 -> 9.141 ( -0.08%) [ +0.01% +0.15% +0.00% / -0.08% +0.33% +0.19%] index_select const : Elapsed 0.091 ms (9.149 ms / 100) 9.150 -> 9.151 ( +0.01%) [ +0.00% +0.25% +0.09% / +0.01% +0.22% +0.30%] index_select wrap : Elapsed 0.091 ms (9.150 ms / 100) 9.151 -> 9.161 ( +0.11%) [ +0.17% +0.39% +0.00% / +0.11% +0.33% +0.27%] index_select linear : Elapsed 0.092 ms (9.167 ms / 100) 9.165 -> 9.155 ( -0.11%) [ +0.00% +0.02% +0.11% / -0.11% +0.11% +0.20%] index_select reverse : Elapsed 0.092 ms (9.165 ms / 100) 9.137 -> 9.156 ( +0.21%) [ +0.00% +0.27% +0.02% / +0.21% +0.23% +0.32%] index_select skip64 : Elapsed 0.091 ms (9.137 ms / 100) 9.137 -> 9.134 ( -0.03%) [ +0.00% +0.21% +0.15% / -0.03% +0.33% +0.33%] index_select skip256 : Elapsed 0.091 ms (9.137 ms / 100) 9.157 -> 9.171 ( +0.15%) [ +0.12% +0.15% +0.00% / +0.15% +0.40% +0.37%] index_select spread : Elapsed 0.092 ms (9.168 ms / 100) 9.146 -> 9.164 ( +0.20%) [ +0.00% +0.34% +0.15% / +0.23% +0.20% +0.33%] index_select strided 3 : Elapsed 0.091 ms (9.146 ms / 100) 9.151 -> 9.160 ( +0.10%) [ +0.04% +0.00% +0.05% / +0.10% +0.21% +0.47%] index_select random : Elapsed 0.092 ms (9.155 ms / 100) 9.156 -> 9.152 ( -0.04%) [ +0.15% +0.25% +0.00% / -0.04% +0.34% +0.28%] index_select random_sorted : Elapsed 0.092 ms (9.170 ms / 100) B = [5, 16, 20, 40] (stride (1, 5, 80, 1600)) A = [5, 16, 20, 4] (stride (4, 400, 20, 1)) dim = 3 1.248 -> 1.249 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.56% +0.48%] index_add_ linear : Elapsed 0.012 ms (1.249 ms / 100) 1.222 -> 1.224 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.41% +0.49%] index_copy_ linear : Elapsed 0.012 ms (1.222 ms / 100) 1.248 -> 1.253 ( +0.40%) [ +0.00% +0.08% +0.08% / +0.40% +0.48% +0.56%] index_add_ reverse : Elapsed 0.012 ms (1.248 ms / 100) 1.220 -> 1.223 ( +0.25%) [ +0.00% +0.16% +0.16% / +0.25% +0.41% +0.41%] index_copy_ reverse : Elapsed 0.012 ms (1.220 ms / 100) 1.241 -> 1.242 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.40% +0.89%] index_add_ spread : Elapsed 0.012 ms (1.242 ms / 100) 1.222 -> 1.225 ( +0.25%) [ +0.00% +0.00% +0.08% / +0.25% +0.41% +0.65%] index_copy_ spread : Elapsed 0.012 ms (1.222 ms / 100) 1.275 -> 1.282 ( +0.55%) [ +0.16% +0.16% +0.00% / +0.63% +0.55% +0.55%] index_add_ strided 3 : Elapsed 0.013 ms (1.277 ms / 100) 1.240 -> 1.242 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.24% +0.24%] index_copy_ strided 3 : Elapsed 0.012 ms (1.241 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.63%] index_add_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.57% +0.97%] index_copy_ strided 7 : Elapsed 0.012 ms (1.235 ms / 100) 1.241 -> 1.246 ( +0.40%) [ +0.08% +0.00% +0.00% / +0.48% +0.40% +0.56%] index_add_ perm : Elapsed 0.012 ms (1.242 ms / 100) 1.221 -> 1.226 ( +0.41%) [ +0.00% +0.16% +0.00% / +0.41% +0.82% +0.41%] index_copy_ perm : Elapsed 0.012 ms (1.221 ms / 100) 1.249 -> 1.251 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.32% +0.32%] index_add_ perm_sorted : Elapsed 0.012 ms (1.250 ms / 100) 1.222 -> 1.223 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.33% +0.33%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.222 ms / 100) 8.749 -> 8.758 ( +0.10%) [ +0.02% +0.24% +0.00% / +0.18% +0.10% +0.10%] index_select const : Elapsed 0.088 ms (8.751 ms / 100) 8.745 -> 8.769 ( +0.27%) [ +0.10% +0.09% +0.00% / +0.27% +0.29% +0.29%] index_select wrap : Elapsed 0.088 ms (8.754 ms / 100) 8.756 -> 8.773 ( +0.19%) [ +0.14% +0.00% +0.01% / +0.22% +0.22% +0.19%] index_select linear : Elapsed 0.088 ms (8.768 ms / 100) 8.748 -> 8.764 ( +0.18%) [ +0.09% +0.00% +0.07% / +0.18% +0.30% +0.38%] index_select reverse : Elapsed 0.088 ms (8.756 ms / 100) 8.740 -> 8.760 ( +0.23%) [ +0.00% +0.24% +0.27% / +0.23% +0.37% +0.54%] index_select skip64 : Elapsed 0.087 ms (8.740 ms / 100) 8.749 -> 8.757 ( +0.09%) [ +0.18% +0.00% +0.07% / +0.09% +0.22% +0.26%] index_select skip256 : Elapsed 0.088 ms (8.765 ms / 100) 8.750 -> 8.759 ( +0.10%) [ +0.08% +0.00% +0.22% / +0.22% +0.38% +0.10%] index_select spread : Elapsed 0.088 ms (8.757 ms / 100) 8.742 -> 8.752 ( +0.11%) [ +0.53% +0.00% +0.22% / +0.25% +0.67% +0.11%] index_select strided 3 : Elapsed 0.088 ms (8.788 ms / 100) 8.751 -> 8.757 ( +0.07%) [ +0.00% +0.08% +0.09% / +0.09% +0.39% +0.07%] index_select random : Elapsed 0.088 ms (8.751 ms / 100) 8.753 -> 8.756 ( +0.03%) [ +0.15% +0.11% +0.00% / +0.03% +0.14% +0.03%] index_select random_sorted : Elapsed 0.088 ms (8.766 ms / 100) out_shape = [40, 20, 4, 16] in_shape = [5, 20, 4, 16] idx_dim = 0 B = [40, 20, 4, 16] (stride (1280, 1, 320, 20)) A = [5, 20, 4, 16] (stride (1280, 1, 20, 80)) dim = 0 1.513 -> 1.514 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.40% +0.33%] index_add_ linear : Elapsed 0.015 ms (1.515 ms / 100) 1.458 -> 1.460 ( +0.14%) [ +0.07% +0.21% +0.00% / +0.14% +0.69% +0.75%] index_copy_ linear : Elapsed 0.015 ms (1.459 ms / 100) 1.513 -> 1.514 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.40% +0.46%] index_add_ reverse : Elapsed 0.015 ms (1.515 ms / 100) 1.461 -> 1.466 ( +0.34%) [ +0.21% +0.07% +0.00% / +0.34% +0.48% +0.48%] index_copy_ reverse : Elapsed 0.015 ms (1.464 ms / 100) 1.518 -> 1.521 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +0.33% +0.40%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.464 -> 1.466 ( +0.14%) [ +0.20% +0.14% +0.00% / +0.14% +0.41% +0.61%] index_copy_ spread : Elapsed 0.015 ms (1.467 ms / 100) 1.517 -> 1.521 ( +0.26%) [ +0.07% +0.13% +0.00% / +0.26% +0.46% +0.53%] index_add_ strided 3 : Elapsed 0.015 ms (1.518 ms / 100) 1.462 -> 1.464 ( +0.14%) [ +0.14% +0.21% +0.00% / +0.14% +0.55% +0.48%] index_copy_ strided 3 : Elapsed 0.015 ms (1.464 ms / 100) 1.513 -> 1.514 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.53% +0.46%] index_add_ strided 7 : Elapsed 0.015 ms (1.514 ms / 100) 1.463 -> 1.464 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.41% +0.62%] index_copy_ strided 7 : Elapsed 0.015 ms (1.464 ms / 100) 1.513 -> 1.512 ( -0.07%) [ +0.13% +0.00% +0.00% / -0.07% +0.53% +0.46%] index_add_ perm : Elapsed 0.015 ms (1.515 ms / 100) 1.464 -> 1.466 ( +0.14%) [ +0.00% +0.00% +0.07% / +0.14% +0.48% +0.55%] index_copy_ perm : Elapsed 0.015 ms (1.464 ms / 100) 1.512 -> 1.514 ( +0.13%) [ +0.00% +0.26% +0.07% / +0.13% +0.46% +0.46%] index_add_ perm_sorted : Elapsed 0.015 ms (1.512 ms / 100) 1.466 -> 1.469 ( +0.20%) [ +0.00% +0.07% +0.00% / +0.20% +0.41% +0.61%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.466 ms / 100) 8.187 -> 8.194 ( +0.09%) [ +0.00% +0.23% +0.13% / +0.09% +0.27% +0.37%] index_select const : Elapsed 0.082 ms (8.187 ms / 100) 8.202 -> 8.193 ( -0.11%) [ +0.00% +0.01% +0.17% / +0.15% -0.11% -0.01%] index_select wrap : Elapsed 0.082 ms (8.202 ms / 100) 8.210 -> 8.201 ( -0.11%) [ +0.00% +0.33% +0.23% / -0.09% -0.11% +0.00%] index_select linear : Elapsed 0.082 ms (8.210 ms / 100) 8.184 -> 8.199 ( +0.18%) [ +0.00% +0.51% +0.22% / +0.38% +0.18% +0.43%] index_select reverse : Elapsed 0.082 ms (8.184 ms / 100) 8.186 -> 8.199 ( +0.16%) [ +0.21% +0.00% +0.13% / +0.17% +0.16% +0.28%] index_select skip64 : Elapsed 0.082 ms (8.203 ms / 100) 8.186 -> 8.189 ( +0.04%) [ +0.12% +0.07% +0.00% / +0.04% +0.22% +0.06%] index_select skip256 : Elapsed 0.082 ms (8.196 ms / 100) 8.202 -> 8.197 ( -0.06%) [ +0.17% +0.00% +0.00% / -0.06% +0.09% +0.09%] index_select spread : Elapsed 0.082 ms (8.216 ms / 100) 8.194 -> 8.189 ( -0.06%) [ +0.00% +0.27% +0.09% / +0.38% +0.22% -0.06%] index_select strided 3 : Elapsed 0.082 ms (8.194 ms / 100) 8.200 -> 8.213 ( +0.16%) [ +0.18% +0.16% +0.00% / +0.17% +0.49% +0.16%] index_select random : Elapsed 0.082 ms (8.215 ms / 100) 8.206 -> 8.214 ( +0.10%) [ +0.00% +0.04% +0.02% / +0.10% +0.13% +0.16%] index_select random_sorted : Elapsed 0.082 ms (8.206 ms / 100) B = [40, 20, 4, 16] (stride (1280, 1, 20, 80)) A = [5, 20, 4, 16] (stride (20, 1, 100, 400)) dim = 0 1.649 -> 1.649 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.49% +0.67%] index_add_ linear : Elapsed 0.016 ms (1.649 ms / 100) 1.599 -> 1.599 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.38% +1.00%] index_copy_ linear : Elapsed 0.016 ms (1.601 ms / 100) 1.649 -> 1.650 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.49% +0.55%] index_add_ reverse : Elapsed 0.017 ms (1.650 ms / 100) 1.597 -> 1.597 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.81% +0.63%] index_copy_ reverse : Elapsed 0.016 ms (1.599 ms / 100) 1.653 -> 1.654 ( +0.06%) [ +0.12% +0.00% +0.06% / +0.06% +0.67% +0.73%] index_add_ spread : Elapsed 0.017 ms (1.655 ms / 100) 1.598 -> 1.601 ( +0.19%) [ +0.13% +0.13% +0.00% / +0.19% +0.63% +0.69%] index_copy_ spread : Elapsed 0.016 ms (1.600 ms / 100) 1.646 -> 1.646 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.73% +0.67%] index_add_ strided 3 : Elapsed 0.016 ms (1.647 ms / 100) 1.595 -> 1.596 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.63% +0.56%] index_copy_ strided 3 : Elapsed 0.016 ms (1.596 ms / 100) 1.640 -> 1.641 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.85% +0.79%] index_add_ strided 7 : Elapsed 0.016 ms (1.642 ms / 100) 1.595 -> 1.596 ( +0.06%) [ +0.06% +0.13% +0.00% / +0.06% +1.13% +1.13%] index_copy_ strided 7 : Elapsed 0.016 ms (1.596 ms / 100) 1.640 -> 1.641 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.55% +0.61%] index_add_ perm : Elapsed 0.016 ms (1.642 ms / 100) 1.596 -> 1.596 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.56% +0.63%] index_copy_ perm : Elapsed 0.016 ms (1.597 ms / 100) 1.646 -> 1.647 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.67% +0.67%] index_add_ perm_sorted : Elapsed 0.016 ms (1.648 ms / 100) 1.595 -> 1.596 ( +0.06%) [ +0.00% +0.06% +0.13% / +0.06% +0.69% +0.75%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.595 ms / 100) 8.524 -> 8.528 ( +0.05%) [ +0.16% +0.28% +0.00% / +0.05% +0.25% +0.27%] index_select const : Elapsed 0.085 ms (8.538 ms / 100) 8.532 -> 8.543 ( +0.13%) [ +0.18% +0.30% +0.00% / +0.13% +0.32% +0.40%] index_select wrap : Elapsed 0.085 ms (8.547 ms / 100) 8.537 -> 8.550 ( +0.15%) [ +0.00% +0.21% +0.00% / +0.15% +0.19% +0.16%] index_select linear : Elapsed 0.085 ms (8.537 ms / 100) 8.537 -> 8.531 ( -0.07%) [ +0.00% +0.16% +0.11% / -0.07% +0.34% +0.22%] index_select reverse : Elapsed 0.085 ms (8.537 ms / 100) 8.519 -> 8.537 ( +0.21%) [ +0.00% +0.35% +0.35% / +0.31% +0.21% +0.26%] index_select skip64 : Elapsed 0.085 ms (8.519 ms / 100) 8.525 -> 8.529 ( +0.05%) [ +0.15% +0.18% +0.00% / +0.26% +0.36% +0.05%] index_select skip256 : Elapsed 0.085 ms (8.538 ms / 100) 8.554 -> 8.552 ( -0.02%) [ +0.18% +0.00% +0.04% / +0.02% -0.02% +0.18%] index_select spread : Elapsed 0.086 ms (8.569 ms / 100) 8.533 -> 8.535 ( +0.02%) [ +0.21% +0.00% +0.18% / +0.29% +0.02% +0.23%] index_select strided 3 : Elapsed 0.086 ms (8.551 ms / 100) 8.534 -> 8.545 ( +0.13%) [ +0.18% +0.21% +0.00% / +0.13% +0.29% +0.22%] index_select random : Elapsed 0.085 ms (8.549 ms / 100) 8.553 -> 8.546 ( -0.08%) [ +0.00% +0.02% +0.04% / -0.08% +0.18% +0.47%] index_select random_sorted : Elapsed 0.086 ms (8.553 ms / 100) B = [40, 20, 4, 16] (stride (4, 2560, 1, 160)) A = [5, 20, 4, 16] (stride (1, 80, 1600, 5)) dim = 0 1.521 -> 1.521 ( +0.00%) [ +0.13% +0.20% +0.00% / +0.00% +0.53% +0.53%] index_add_ linear : Elapsed 0.015 ms (1.523 ms / 100) 1.477 -> 1.480 ( +0.20%) [ +0.14% +0.00% +0.00% / +0.20% +0.47% +0.47%] index_copy_ linear : Elapsed 0.015 ms (1.479 ms / 100) 1.520 -> 1.522 ( +0.13%) [ +0.07% +0.20% +0.00% / +0.13% +0.53% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.521 ms / 100) 1.479 -> 1.480 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.27% +0.27%] index_copy_ reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.522 -> 1.523 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.46% +0.46%] index_add_ spread : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.00% +0.14% +0.07% / +0.14% +0.47% +0.47%] index_copy_ spread : Elapsed 0.015 ms (1.477 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.015 ms (1.521 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.47% +0.41%] index_copy_ strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.520 -> 1.522 ( +0.13%) [ +0.07% +0.13% +0.00% / +0.13% +0.66% +0.66%] index_add_ strided 7 : Elapsed 0.015 ms (1.521 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.54% +0.54%] index_copy_ strided 7 : Elapsed 0.015 ms (1.478 ms / 100) 1.519 -> 1.521 ( +0.13%) [ +0.13% +0.07% +0.00% / +0.13% +0.66% +0.86%] index_add_ perm : Elapsed 0.015 ms (1.521 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.47% +0.88%] index_copy_ perm : Elapsed 0.015 ms (1.477 ms / 100) 1.520 -> 1.521 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.59%] index_add_ perm_sorted : Elapsed 0.015 ms (1.521 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.54% +0.47%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.477 ms / 100) 8.541 -> 8.530 ( -0.13%) [ +0.00% +0.00% +0.29% / -0.13% +0.43% +0.27%] index_select const : Elapsed 0.085 ms (8.541 ms / 100) 8.536 -> 8.537 ( +0.01%) [ +0.00% +0.01% +0.02% / +0.01% +0.20% +0.27%] index_select wrap : Elapsed 0.085 ms (8.536 ms / 100) 8.537 -> 8.555 ( +0.21%) [ +0.00% +0.04% +0.09% / +0.21% +0.25% +0.28%] index_select linear : Elapsed 0.085 ms (8.537 ms / 100) 8.550 -> 8.551 ( +0.01%) [ +0.07% +0.02% +0.00% / +0.08% +0.16% +0.01%] index_select reverse : Elapsed 0.086 ms (8.556 ms / 100) 8.546 -> 8.551 ( +0.06%) [ +0.00% +0.01% +0.21% / +0.06% +0.50% +0.22%] index_select skip64 : Elapsed 0.085 ms (8.546 ms / 100) 8.546 -> 8.549 ( +0.04%) [ +0.00% +0.06% +0.23% / +0.04% +0.08% +0.11%] index_select skip256 : Elapsed 0.085 ms (8.546 ms / 100) 8.543 -> 8.543 ( +0.00%) [ +0.14% +0.00% +0.01% / +0.19% +0.19% +0.00%] index_select spread : Elapsed 0.086 ms (8.555 ms / 100) 8.539 -> 8.530 ( -0.11%) [ +0.11% +0.00% +0.16% / -0.11% +0.21% +0.19%] index_select strided 3 : Elapsed 0.085 ms (8.548 ms / 100) 8.541 -> 8.552 ( +0.13%) [ +0.08% +0.00% +0.29% / +0.13% +0.16% +0.13%] index_select random : Elapsed 0.085 ms (8.548 ms / 100) 8.542 -> 8.550 ( +0.09%) [ +0.15% +0.00% +0.02% / +0.09% +0.40% +0.25%] index_select random_sorted : Elapsed 0.086 ms (8.555 ms / 100) B = [40, 20, 4, 16] (stride (4, 2560, 1, 160)) A = [5, 20, 4, 16] (stride (1, 20, 5, 400)) dim = 0 1.494 -> 1.494 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.67% +0.87%] index_add_ linear : Elapsed 0.015 ms (1.494 ms / 100) 1.442 -> 1.448 ( +0.42%) [ +0.21% +0.00% +0.28% / +0.42% +0.69% +0.69%] index_copy_ linear : Elapsed 0.014 ms (1.445 ms / 100) 1.491 -> 1.491 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.60% +0.60%] index_add_ reverse : Elapsed 0.015 ms (1.491 ms / 100) 1.444 -> 1.445 ( +0.07%) [ +0.21% +0.14% +0.00% / +0.07% +0.69% +0.76%] index_copy_ reverse : Elapsed 0.014 ms (1.447 ms / 100) 1.491 -> 1.492 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.60% +0.60%] index_add_ spread : Elapsed 0.015 ms (1.492 ms / 100) 1.441 -> 1.442 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.69% +0.69%] index_copy_ spread : Elapsed 0.014 ms (1.441 ms / 100) 1.493 -> 1.495 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.54% +0.74%] index_add_ strided 3 : Elapsed 0.015 ms (1.495 ms / 100) 1.441 -> 1.444 ( +0.21%) [ +0.00% +0.14% +0.14% / +0.21% +0.42% +0.62%] index_copy_ strided 3 : Elapsed 0.014 ms (1.441 ms / 100) 1.494 -> 1.495 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.67% +0.67%] index_add_ strided 7 : Elapsed 0.015 ms (1.495 ms / 100) 1.447 -> 1.449 ( +0.14%) [ +0.00% +0.00% +0.07% / +0.14% +0.76% +0.76%] index_copy_ strided 7 : Elapsed 0.014 ms (1.447 ms / 100) 1.493 -> 1.494 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.87% +0.80%] index_add_ perm : Elapsed 0.015 ms (1.494 ms / 100) 1.447 -> 1.446 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.69% +0.76%] index_copy_ perm : Elapsed 0.014 ms (1.447 ms / 100) 1.493 -> 1.492 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.60% +0.74%] index_add_ perm_sorted : Elapsed 0.015 ms (1.494 ms / 100) 1.441 -> 1.441 ( +0.00%) [ +0.42% +0.00% +0.21% / +0.00% +0.62% +0.62%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.447 ms / 100) 8.234 -> 8.237 ( +0.04%) [ +0.00% +0.15% +0.06% / +0.26% +0.04% +0.05%] index_select const : Elapsed 0.082 ms (8.234 ms / 100) 8.235 -> 8.241 ( +0.07%) [ +0.30% +0.00% +0.13% / +0.26% +0.07% +0.19%] index_select wrap : Elapsed 0.083 ms (8.260 ms / 100) 8.230 -> 8.234 ( +0.05%) [ +0.01% +0.12% +0.00% / +0.05% +0.45% +0.24%] index_select linear : Elapsed 0.082 ms (8.231 ms / 100) 8.237 -> 8.242 ( +0.06%) [ +0.10% +0.06% +0.00% / +0.10% +0.16% +0.06%] index_select reverse : Elapsed 0.082 ms (8.245 ms / 100) 8.226 -> 8.237 ( +0.13%) [ +0.00% +0.06% +0.24% / +0.13% +0.18% +0.17%] index_select skip64 : Elapsed 0.082 ms (8.226 ms / 100) 8.234 -> 8.231 ( -0.04%) [ +0.05% +0.13% +0.00% / -0.04% +0.10% +0.11%] index_select skip256 : Elapsed 0.082 ms (8.238 ms / 100) 8.236 -> 8.241 ( +0.06%) [ +0.15% +0.00% +0.29% / +0.18% +0.06% +0.21%] index_select spread : Elapsed 0.082 ms (8.248 ms / 100) 8.232 -> 8.237 ( +0.06%) [ +0.16% +0.02% +0.00% / +0.12% +0.06% +0.53%] index_select strided 3 : Elapsed 0.082 ms (8.245 ms / 100) 8.233 -> 8.257 ( +0.29%) [ +0.00% +0.00% +0.06% / +0.32% +0.39% +0.29%] index_select random : Elapsed 0.082 ms (8.233 ms / 100) 8.236 -> 8.250 ( +0.17%) [ +0.00% +0.27% +0.17% / +0.17% +0.36% +0.29%] index_select random_sorted : Elapsed 0.082 ms (8.236 ms / 100) B = [40, 20, 4, 16] (stride (4, 160, 1, 3200)) A = [5, 20, 4, 16] (stride (1, 80, 1600, 5)) dim = 0 1.520 -> 1.522 ( +0.13%) [ +0.13% +0.20% +0.00% / +0.13% +0.53% +0.59%] index_add_ linear : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.20% +0.34% +0.00% / +0.07% +0.47% +0.47%] index_copy_ linear : Elapsed 0.015 ms (1.480 ms / 100) 1.520 -> 1.522 ( +0.13%) [ +0.20% +0.13% +0.00% / +0.13% +0.59% +0.53%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.480 -> 1.478 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.27% +0.27%] index_copy_ reverse : Elapsed 0.015 ms (1.480 ms / 100) 1.522 -> 1.521 ( -0.07%) [ +0.07% +0.13% +0.00% / -0.07% +0.53% +0.46%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.477 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.41% +0.34%] index_copy_ spread : Elapsed 0.015 ms (1.479 ms / 100) 1.520 -> 1.522 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.59% +0.66%] index_add_ strided 3 : Elapsed 0.015 ms (1.522 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.00% +0.41% / +0.00% +0.41% +0.47%] index_copy_ strided 3 : Elapsed 0.015 ms (1.478 ms / 100) 1.520 -> 1.522 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.66% +0.72%] index_add_ strided 7 : Elapsed 0.015 ms (1.522 ms / 100) 1.476 -> 1.479 ( +0.20%) [ +0.07% +0.20% +0.00% / +0.20% +0.68% +0.61%] index_copy_ strided 7 : Elapsed 0.015 ms (1.477 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.53% +0.59%] index_add_ perm : Elapsed 0.015 ms (1.521 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.20% +0.07% / +0.00% +0.54% +0.61%] index_copy_ perm : Elapsed 0.015 ms (1.477 ms / 100) 1.519 -> 1.521 ( +0.13%) [ +0.20% +0.20% +0.00% / +0.13% +0.72% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.522 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.47% +0.54%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.478 ms / 100) 8.548 -> 8.543 ( -0.06%) [ +0.00% +0.14% +0.19% / -0.06% +0.30% +0.06%] index_select const : Elapsed 0.085 ms (8.548 ms / 100) 8.537 -> 8.553 ( +0.19%) [ +0.26% +0.09% +0.00% / +0.20% +0.19% +0.28%] index_select wrap : Elapsed 0.086 ms (8.559 ms / 100) 8.546 -> 8.540 ( -0.07%) [ +0.01% +0.02% +0.00% / -0.07% +0.14% +0.35%] index_select linear : Elapsed 0.085 ms (8.547 ms / 100) 8.535 -> 8.547 ( +0.14%) [ +0.29% +0.21% +0.00% / +0.14% +0.55% +0.21%] index_select reverse : Elapsed 0.086 ms (8.560 ms / 100) 8.539 -> 8.539 ( +0.00%) [ +0.00% +0.05% +0.07% / +0.02% +0.25% +0.00%] index_select skip64 : Elapsed 0.085 ms (8.539 ms / 100) 8.540 -> 8.551 ( +0.13%) [ +0.19% +0.00% +0.08% / +0.39% +0.18% +0.13%] index_select skip256 : Elapsed 0.086 ms (8.556 ms / 100) 8.541 -> 8.538 ( -0.04%) [ +0.07% +0.09% +0.00% / -0.04% +0.32% +0.23%] index_select spread : Elapsed 0.085 ms (8.547 ms / 100) 8.543 -> 8.546 ( +0.04%) [ +0.06% +0.00% +0.02% / +0.04% +0.14% +0.13%] index_select strided 3 : Elapsed 0.085 ms (8.548 ms / 100) 8.547 -> 8.541 ( -0.07%) [ +0.00% +0.02% +0.05% / -0.02% -0.07% +0.05%] index_select random : Elapsed 0.085 ms (8.547 ms / 100) 8.537 -> 8.527 ( -0.12%) [ +0.00% +0.07% +0.29% / -0.12% +0.14% +0.30%] index_select random_sorted : Elapsed 0.085 ms (8.537 ms / 100) B = [40, 20, 4, 16] (stride (1, 160, 40, 3200)) A = [5, 20, 4, 16] (stride (1, 20, 5, 400)) dim = 0 0.652 -> 0.650 ( -0.31%) [ +0.00% +0.15% +0.15% / -0.31% +0.61% +0.46%] index_add_ linear : Elapsed 0.007 ms (0.652 ms / 100) 0.663 -> 0.664 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.30% +0.30% +0.15%] index_copy_ linear : Elapsed 0.007 ms (0.664 ms / 100) 0.653 -> 0.653 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.31% +0.46%] index_add_ reverse : Elapsed 0.007 ms (0.654 ms / 100) 0.662 -> 0.665 ( +0.45%) [ +0.30% +0.30% +0.00% / +0.45% +0.60% +0.60%] index_copy_ reverse : Elapsed 0.007 ms (0.664 ms / 100) 0.654 -> 0.655 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.46% +0.31%] index_add_ spread : Elapsed 0.007 ms (0.654 ms / 100) 0.663 -> 0.664 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +0.45% +0.30%] index_copy_ spread : Elapsed 0.007 ms (0.665 ms / 100) 0.653 -> 0.653 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.61% +0.61%] index_add_ strided 3 : Elapsed 0.007 ms (0.654 ms / 100) 0.661 -> 0.663 ( +0.30%) [ +0.00% +0.30% +0.45% / +0.30% +0.76% +0.91%] index_copy_ strided 3 : Elapsed 0.007 ms (0.661 ms / 100) 0.652 -> 0.653 ( +0.15%) [ +0.00% +0.46% +0.31% / +0.15% +1.07% +0.92%] index_add_ strided 7 : Elapsed 0.007 ms (0.652 ms / 100) 0.662 -> 0.664 ( +0.30%) [ +0.45% +0.00% +0.15% / +0.30% +0.76% +0.60%] index_copy_ strided 7 : Elapsed 0.007 ms (0.665 ms / 100) 0.651 -> 0.651 ( +0.00%) [ +0.31% +0.31% +0.00% / +0.00% +1.08% +1.08%] index_add_ perm : Elapsed 0.007 ms (0.653 ms / 100) 0.662 -> 0.663 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.45% +0.60%] index_copy_ perm : Elapsed 0.007 ms (0.662 ms / 100) 0.651 -> 0.650 ( -0.15%) [ +0.00% +0.00% +0.31% / -0.15% +0.92% +0.61%] index_add_ perm_sorted : Elapsed 0.007 ms (0.651 ms / 100) 0.660 -> 0.666 ( +0.91%) [ +0.30% +0.00% +0.45% / +3.18% +0.91% +0.91%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.662 ms / 100) 4.942 -> 4.842 ( -2.02%) [ +0.26% +0.00% +0.16% / -1.92% -1.84% -2.02%] index_select const : Elapsed 0.050 ms (4.955 ms / 100) 4.964 -> 4.831 ( -2.68%) [ +0.04% +0.00% +0.06% / -2.68% -2.54% -2.66%] index_select wrap : Elapsed 0.050 ms (4.966 ms / 100) 4.960 -> 4.837 ( -2.48%) [ +0.24% +0.00% +0.08% / -2.40% -2.48% -2.38%] index_select linear : Elapsed 0.050 ms (4.972 ms / 100) 4.949 -> 4.833 ( -2.34%) [ +0.26% +0.26% +0.00% / -2.34% -2.28% -2.28%] index_select reverse : Elapsed 0.050 ms (4.962 ms / 100) 4.956 -> 4.833 ( -2.48%) [ +0.06% +0.02% +0.00% / -2.48% -2.16% -2.32%] index_select skip64 : Elapsed 0.050 ms (4.959 ms / 100) 4.955 -> 4.838 ( -2.36%) [ +0.06% +0.14% +0.00% / -2.32% -2.36% -2.32%] index_select skip256 : Elapsed 0.050 ms (4.958 ms / 100) 4.945 -> 4.833 ( -2.26%) [ +0.00% +0.08% +0.12% / -2.26% -2.26% -2.10%] index_select spread : Elapsed 0.049 ms (4.945 ms / 100) 4.949 -> 4.826 ( -2.49%) [ +0.00% +0.24% +0.14% / -2.36% -2.16% -2.49%] index_select strided 3 : Elapsed 0.049 ms (4.949 ms / 100) 4.951 -> 4.824 ( -2.57%) [ +0.00% +0.24% +0.10% / -2.08% -2.57% -2.36%] index_select random : Elapsed 0.050 ms (4.951 ms / 100) 4.951 -> 4.828 ( -2.48%) [ +0.24% +0.00% +0.16% / -2.44% -2.48% -2.34%] index_select random_sorted : Elapsed 0.050 ms (4.963 ms / 100) out_shape = [5, 40, 4, 16] in_shape = [5, 20, 4, 16] idx_dim = 1 B = [5, 40, 4, 16] (stride (2560, 1, 40, 160)) A = [5, 20, 4, 16] (stride (20, 1, 100, 400)) dim = 1 2.449 -> 2.465 ( +0.65%) [ +0.04% +0.04% +0.00% / +0.65% +0.86% +0.90%] index_add_ linear : Elapsed 0.024 ms (2.450 ms / 100) 2.460 -> 2.467 ( +0.28%) [ +0.04% +0.00% +0.04% / +0.28% +0.61% +0.49%] index_copy_ linear : Elapsed 0.025 ms (2.461 ms / 100) 2.453 -> 2.463 ( +0.41%) [ +0.08% +0.20% +0.00% / +0.57% +0.57% +0.41%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.460 -> 2.469 ( +0.37%) [ +0.08% +0.49% +0.00% / +0.37% +0.53% +0.57%] index_copy_ reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.465 -> 2.472 ( +0.28%) [ +0.08% +0.00% +0.00% / +0.28% +0.45% +0.57%] index_add_ spread : Elapsed 0.025 ms (2.467 ms / 100) 2.476 -> 2.489 ( +0.53%) [ +0.00% +0.16% +0.00% / +0.65% +0.53% +0.77%] index_copy_ spread : Elapsed 0.025 ms (2.476 ms / 100) 2.463 -> 2.471 ( +0.32%) [ +0.24% +0.12% +0.00% / +0.41% +0.32% +0.37%] index_add_ strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.472 -> 2.486 ( +0.57%) [ +0.00% +0.04% +0.12% / +0.65% +0.89% +0.57%] index_copy_ strided 3 : Elapsed 0.025 ms (2.472 ms / 100) 2.468 -> 2.473 ( +0.20%) [ +0.08% +0.00% +0.00% / +0.32% +0.20% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.470 ms / 100) 2.474 -> 2.490 ( +0.65%) [ +0.08% +0.00% +0.00% / +0.65% +0.65% +0.69%] index_copy_ strided 7 : Elapsed 0.025 ms (2.476 ms / 100) 2.465 -> 2.475 ( +0.41%) [ +0.08% +0.08% +0.00% / +0.45% +0.53% +0.41%] index_add_ perm : Elapsed 0.025 ms (2.467 ms / 100) 2.469 -> 2.486 ( +0.69%) [ +0.32% +0.24% +0.00% / +0.69% +1.05% +1.01%] index_copy_ perm : Elapsed 0.025 ms (2.477 ms / 100) 2.464 -> 2.470 ( +0.24%) [ +0.04% +0.20% +0.00% / +0.24% +0.45% +0.41%] index_add_ perm_sorted : Elapsed 0.025 ms (2.465 ms / 100) 2.475 -> 2.484 ( +0.36%) [ +0.04% +0.00% +0.00% / +0.36% +0.69% +0.85%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.476 ms / 100) 4.505 -> 4.507 ( +0.04%) [ +0.00% +0.02% +0.04% / +0.04% +0.07% +0.09%] index_select const : Elapsed 0.045 ms (4.505 ms / 100) 4.503 -> 4.506 ( +0.07%) [ +0.18% +0.16% +0.00% / +0.07% +0.18% +0.27%] index_select wrap : Elapsed 0.045 ms (4.511 ms / 100) 4.503 -> 4.504 ( +0.02%) [ +0.00% +0.04% +0.24% / +0.11% +0.02% +0.40%] index_select linear : Elapsed 0.045 ms (4.503 ms / 100) 4.507 -> 4.511 ( +0.09%) [ +0.29% +0.24% +0.00% / +0.09% +0.09% +0.40%] index_select reverse : Elapsed 0.045 ms (4.520 ms / 100) 4.495 -> 4.507 ( +0.27%) [ +0.00% +0.07% +0.29% / +0.36% +0.27% +0.29%] index_select skip64 : Elapsed 0.045 ms (4.495 ms / 100) 4.502 -> 4.499 ( -0.07%) [ +0.11% +0.00% +0.11% / +0.02% -0.07% +0.20%] index_select skip256 : Elapsed 0.045 ms (4.507 ms / 100) 4.504 -> 4.508 ( +0.09%) [ +0.22% +0.00% +0.18% / +0.09% +0.31% +0.22%] index_select spread : Elapsed 0.045 ms (4.514 ms / 100) 4.503 -> 4.507 ( +0.09%) [ +0.22% +0.18% +0.00% / +0.09% +0.09% +0.24%] index_select strided 3 : Elapsed 0.045 ms (4.513 ms / 100) 4.506 -> 4.507 ( +0.02%) [ +0.09% +0.00% +0.00% / +0.02% +0.27% +0.22%] index_select strided 5 : Elapsed 0.045 ms (4.510 ms / 100) 4.507 -> 4.508 ( +0.02%) [ +0.11% +0.04% +0.00% / +0.02% +0.13% +0.31%] index_select strided 7 : Elapsed 0.045 ms (4.512 ms / 100) 4.503 -> 4.506 ( +0.07%) [ +0.16% +0.00% +0.07% / +0.07% +0.33% +0.24%] index_select strided 8 : Elapsed 0.045 ms (4.510 ms / 100) 4.504 -> 4.503 ( -0.02%) [ +0.02% +0.00% +0.04% / -0.02% +0.18% +0.18%] index_select strided 16 : Elapsed 0.045 ms (4.505 ms / 100) 4.505 -> 4.505 ( +0.00%) [ +0.02% +0.20% +0.00% / +0.00% +0.24% +0.31%] index_select random : Elapsed 0.045 ms (4.506 ms / 100) 4.507 -> 4.510 ( +0.07%) [ +0.00% +0.09% +0.13% / +0.07% +0.31% +0.07%] index_select random_sorted : Elapsed 0.045 ms (4.507 ms / 100) B = [5, 40, 4, 16] (stride (64, 320, 1, 4)) A = [5, 20, 4, 16] (stride (1280, 1, 320, 20)) dim = 1 2.396 -> 2.410 ( +0.58%) [ +0.04% +0.25% +0.00% / +0.58% +0.96% +0.83%] index_add_ linear : Elapsed 0.024 ms (2.397 ms / 100) 2.398 -> 2.410 ( +0.50%) [ +0.00% +0.04% +0.04% / +0.50% +0.83% +0.67%] index_copy_ linear : Elapsed 0.024 ms (2.398 ms / 100) 2.393 -> 2.401 ( +0.33%) [ +0.00% +0.17% +0.13% / +0.33% +0.92% +1.17%] index_add_ reverse : Elapsed 0.024 ms (2.393 ms / 100) 2.391 -> 2.401 ( +0.42%) [ +0.08% +0.17% +0.00% / +0.42% +1.13% +1.13%] index_copy_ reverse : Elapsed 0.024 ms (2.393 ms / 100) 2.392 -> 2.407 ( +0.63%) [ +0.00% +0.33% +0.29% / +0.63% +1.00% +1.38%] index_add_ spread : Elapsed 0.024 ms (2.392 ms / 100) 2.393 -> 2.411 ( +0.75%) [ +0.25% +0.04% +0.00% / +0.75% +0.88% +1.17%] index_copy_ spread : Elapsed 0.024 ms (2.399 ms / 100) 2.403 -> 2.411 ( +0.33%) [ +0.00% +0.08% +0.08% / +0.33% +0.46% +0.37%] index_add_ strided 3 : Elapsed 0.024 ms (2.403 ms / 100) 2.397 -> 2.412 ( +0.63%) [ +0.00% +0.29% +0.21% / +0.83% +0.63% +0.67%] index_copy_ strided 3 : Elapsed 0.024 ms (2.397 ms / 100) 2.402 -> 2.410 ( +0.33%) [ +0.12% +0.00% +0.08% / +0.58% +0.54% +0.33%] index_add_ strided 7 : Elapsed 0.024 ms (2.405 ms / 100) 2.400 -> 2.409 ( +0.37%) [ +0.13% +0.00% +0.00% / +0.46% +0.50% +0.37%] index_copy_ strided 7 : Elapsed 0.024 ms (2.403 ms / 100) 2.402 -> 2.407 ( +0.21%) [ +0.00% +0.04% +0.08% / +0.67% +0.21% +0.42%] index_add_ perm : Elapsed 0.024 ms (2.402 ms / 100) 2.400 -> 2.406 ( +0.25%) [ +0.13% +0.17% +0.00% / +0.79% +0.25% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.403 ms / 100) 2.400 -> 2.406 ( +0.25%) [ +0.33% +0.33% +0.00% / +0.58% +0.33% +0.25%] index_add_ perm_sorted : Elapsed 0.024 ms (2.408 ms / 100) 2.402 -> 2.405 ( +0.12%) [ +0.00% +0.00% +0.08% / +0.58% +0.12% +0.12%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.402 ms / 100) 4.436 -> 4.433 ( -0.07%) [ +0.02% +0.09% +0.00% / -0.07% +0.05% -0.05%] index_select const : Elapsed 0.044 ms (4.437 ms / 100) 4.438 -> 4.436 ( -0.05%) [ +0.07% +0.00% +0.09% / +0.07% -0.05% +0.14%] index_select wrap : Elapsed 0.044 ms (4.441 ms / 100) 4.440 -> 4.436 ( -0.09%) [ +0.14% +0.02% +0.00% / +0.02% -0.09% +0.09%] index_select linear : Elapsed 0.044 ms (4.446 ms / 100) 4.441 -> 4.441 ( +0.00%) [ +0.00% +0.05% +0.11% / +0.02% +0.00% +0.02%] index_select reverse : Elapsed 0.044 ms (4.441 ms / 100) 4.431 -> 4.433 ( +0.05%) [ +0.18% +0.00% +0.00% / +0.16% +0.23% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.439 ms / 100) 4.434 -> 4.431 ( -0.07%) [ +0.16% +0.07% +0.00% / +0.00% +0.16% -0.07%] index_select skip256 : Elapsed 0.044 ms (4.441 ms / 100) 4.436 -> 4.441 ( +0.11%) [ +0.00% +0.02% +0.07% / +0.23% +0.11% +0.25%] index_select spread : Elapsed 0.044 ms (4.436 ms / 100) 4.434 -> 4.440 ( +0.14%) [ +0.09% +0.00% +0.20% / +0.14% +0.29% +0.25%] index_select strided 3 : Elapsed 0.044 ms (4.438 ms / 100) 4.435 -> 4.431 ( -0.09%) [ +0.11% +0.29% +0.00% / -0.09% +0.16% +0.02%] index_select strided 5 : Elapsed 0.044 ms (4.440 ms / 100) 4.438 -> 4.438 ( +0.00%) [ +0.07% +0.11% +0.00% / +0.09% +0.18% +0.00%] index_select strided 7 : Elapsed 0.044 ms (4.441 ms / 100) 4.438 -> 4.440 ( +0.05%) [ +0.05% +0.00% +0.11% / +0.09% +0.07% +0.05%] index_select strided 8 : Elapsed 0.044 ms (4.440 ms / 100) 4.434 -> 4.434 ( +0.00%) [ +0.00% +0.02% +0.11% / +0.00% +0.05% +0.07%] index_select strided 16 : Elapsed 0.044 ms (4.434 ms / 100) 4.440 -> 4.436 ( -0.09%) [ +0.00% +0.02% +0.18% / +0.00% -0.09% +0.05%] index_select random : Elapsed 0.044 ms (4.440 ms / 100) 4.440 -> 4.435 ( -0.11%) [ +0.11% +0.00% +0.05% / +0.02% -0.11% -0.02%] index_select random_sorted : Elapsed 0.044 ms (4.445 ms / 100) B = [5, 40, 4, 16] (stride (1, 320, 80, 5)) A = [5, 20, 4, 16] (stride (1280, 16, 320, 1)) dim = 1 2.399 -> 2.412 ( +0.54%) [ +0.08% +0.04% +0.00% / +0.54% +0.83% +0.79%] index_add_ linear : Elapsed 0.024 ms (2.401 ms / 100) 2.390 -> 2.405 ( +0.63%) [ +0.13% +0.00% +0.21% / +0.63% +0.79% +0.71%] index_copy_ linear : Elapsed 0.024 ms (2.393 ms / 100) 2.403 -> 2.415 ( +0.50%) [ +0.25% +0.00% +0.12% / +0.50% +0.67% +0.58%] index_add_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.391 -> 2.405 ( +0.59%) [ +0.29% +0.00% +0.21% / +0.59% +0.67% +0.67%] index_copy_ reverse : Elapsed 0.024 ms (2.398 ms / 100) 2.406 -> 2.414 ( +0.33%) [ +0.00% +0.17% +0.00% / +0.54% +0.33% +0.46%] index_add_ spread : Elapsed 0.024 ms (2.406 ms / 100) 2.396 -> 2.405 ( +0.38%) [ +0.08% +0.04% +0.00% / +0.38% +0.54% +0.38%] index_copy_ spread : Elapsed 0.024 ms (2.398 ms / 100) 2.406 -> 2.414 ( +0.33%) [ +0.00% +0.08% +0.00% / +0.37% +0.33% +0.33%] index_add_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.395 -> 2.404 ( +0.38%) [ +0.13% +0.00% +0.00% / +0.46% +0.42% +0.38%] index_copy_ strided 3 : Elapsed 0.024 ms (2.398 ms / 100) 2.405 -> 2.414 ( +0.37%) [ +0.00% +0.21% +0.04% / +0.37% +0.50% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.405 ms / 100) 2.394 -> 2.405 ( +0.46%) [ +0.00% +0.04% +0.25% / +0.46% +0.58% +0.67%] index_copy_ strided 7 : Elapsed 0.024 ms (2.394 ms / 100) 2.402 -> 2.416 ( +0.58%) [ +0.00% +0.12% +0.08% / +0.71% +0.58% +0.79%] index_add_ perm : Elapsed 0.024 ms (2.402 ms / 100) 2.392 -> 2.407 ( +0.63%) [ +0.00% +0.08% +0.13% / +0.63% +0.75% +0.71%] index_copy_ perm : Elapsed 0.024 ms (2.392 ms / 100) 2.400 -> 2.414 ( +0.58%) [ +0.00% +0.17% +0.17% / +0.58% +0.71% +0.79%] index_add_ perm_sorted : Elapsed 0.024 ms (2.400 ms / 100) 2.393 -> 2.402 ( +0.38%) [ +0.13% +0.00% +0.00% / +0.38% +0.71% +0.79%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.396 ms / 100) 4.413 -> 4.410 ( -0.07%) [ +0.18% +0.00% +0.16% / -0.07% +0.16% +0.07%] index_select const : Elapsed 0.044 ms (4.421 ms / 100) 4.421 -> 4.417 ( -0.09%) [ +0.11% +0.00% +0.00% / -0.09% +0.14% +0.16%] index_select wrap : Elapsed 0.044 ms (4.426 ms / 100) 4.420 -> 4.421 ( +0.02%) [ +0.20% +0.00% +0.05% / +0.02% +0.16% +0.20%] index_select linear : Elapsed 0.044 ms (4.429 ms / 100) 4.422 -> 4.420 ( -0.05%) [ +0.07% +0.02% +0.00% / -0.05% +0.27% +0.18%] index_select reverse : Elapsed 0.044 ms (4.425 ms / 100) 4.408 -> 4.415 ( +0.16%) [ +0.11% +0.11% +0.00% / +0.18% +0.16% +0.29%] index_select skip64 : Elapsed 0.044 ms (4.413 ms / 100) 4.411 -> 4.413 ( +0.05%) [ +0.00% +0.09% +0.14% / +0.05% +0.09% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.411 ms / 100) 4.419 -> 4.420 ( +0.02%) [ +0.00% +0.16% +0.27% / +0.02% +0.14% +0.23%] index_select spread : Elapsed 0.044 ms (4.419 ms / 100) 4.420 -> 4.423 ( +0.07%) [ +0.05% +0.00% +0.18% / +0.07% +0.23% +0.11%] index_select strided 3 : Elapsed 0.044 ms (4.422 ms / 100) 4.409 -> 4.415 ( +0.14%) [ +0.00% +0.29% +0.16% / +0.14% +0.20% +0.27%] index_select strided 5 : Elapsed 0.044 ms (4.409 ms / 100) 4.418 -> 4.422 ( +0.09%) [ +0.07% +0.14% +0.00% / +0.09% +0.27% +0.20%] index_select strided 7 : Elapsed 0.044 ms (4.421 ms / 100) 4.413 -> 4.416 ( +0.07%) [ +0.00% +0.00% +0.05% / +0.20% +0.25% +0.07%] index_select strided 8 : Elapsed 0.044 ms (4.413 ms / 100) 4.417 -> 4.413 ( -0.09%) [ +0.07% +0.00% +0.09% / -0.07% +0.07% -0.09%] index_select strided 16 : Elapsed 0.044 ms (4.420 ms / 100) 4.417 -> 4.418 ( +0.02%) [ +0.09% +0.00% +0.05% / +0.02% +0.25% +0.27%] index_select random : Elapsed 0.044 ms (4.421 ms / 100) 4.414 -> 4.417 ( +0.07%) [ +0.00% +0.11% +0.20% / +0.07% +0.23% +0.29%] index_select random_sorted : Elapsed 0.044 ms (4.414 ms / 100) B = [5, 40, 4, 16] (stride (1, 320, 5, 20)) A = [5, 20, 4, 16] (stride (1280, 4, 1, 80)) dim = 1 2.440 -> 2.452 ( +0.49%) [ +0.12% +0.00% +0.00% / +0.49% +0.78% +0.82%] index_add_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.442 -> 2.454 ( +0.49%) [ +0.00% +0.12% +0.08% / +0.49% +0.86% +0.78%] index_copy_ linear : Elapsed 0.024 ms (2.442 ms / 100) 2.432 -> 2.444 ( +0.49%) [ +0.00% +0.12% +0.16% / +0.49% +1.15% +1.40%] index_add_ reverse : Elapsed 0.024 ms (2.432 ms / 100) 2.435 -> 2.449 ( +0.57%) [ +0.00% +0.12% +0.25% / +0.57% +1.11% +1.07%] index_copy_ reverse : Elapsed 0.024 ms (2.435 ms / 100) 2.435 -> 2.451 ( +0.66%) [ +0.12% +0.45% +0.00% / +0.66% +1.15% +1.27%] index_add_ spread : Elapsed 0.024 ms (2.438 ms / 100) 2.436 -> 2.449 ( +0.53%) [ +0.04% +0.00% +0.04% / +0.53% +1.19% +1.15%] index_copy_ spread : Elapsed 0.024 ms (2.437 ms / 100) 2.442 -> 2.452 ( +0.41%) [ +0.16% +0.33% +0.00% / +0.41% +0.66% +0.70%] index_add_ strided 3 : Elapsed 0.024 ms (2.446 ms / 100) 2.444 -> 2.454 ( +0.41%) [ +0.00% +0.16% +0.08% / +0.41% +0.65% +0.45%] index_copy_ strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.447 -> 2.456 ( +0.37%) [ +0.04% +0.04% +0.00% / +0.37% +0.49% +0.78%] index_add_ strided 7 : Elapsed 0.024 ms (2.448 ms / 100) 2.440 -> 2.456 ( +0.66%) [ +0.20% +0.00% +0.25% / +0.66% +0.74% +0.78%] index_copy_ strided 7 : Elapsed 0.024 ms (2.445 ms / 100) 2.442 -> 2.451 ( +0.37%) [ +0.29% +0.00% +0.12% / +0.66% +0.45% +0.37%] index_add_ perm : Elapsed 0.024 ms (2.449 ms / 100) 2.446 -> 2.454 ( +0.33%) [ +0.00% +0.04% +0.08% / +0.61% +0.49% +0.33%] index_copy_ perm : Elapsed 0.024 ms (2.446 ms / 100) 2.452 -> 2.457 ( +0.20%) [ +0.08% +0.00% +0.12% / +0.61% +0.20% +0.24%] index_add_ perm_sorted : Elapsed 0.025 ms (2.454 ms / 100) 2.447 -> 2.454 ( +0.29%) [ +0.08% +0.08% +0.00% / +0.53% +0.29% +0.29%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.449 ms / 100) 4.493 -> 4.498 ( +0.11%) [ +0.16% +0.00% +0.09% / +0.13% +0.18% +0.11%] index_select const : Elapsed 0.045 ms (4.500 ms / 100) 4.507 -> 4.503 ( -0.09%) [ +0.16% +0.00% +0.02% / +0.09% +0.07% -0.09%] index_select wrap : Elapsed 0.045 ms (4.514 ms / 100) 4.510 -> 4.503 ( -0.16%) [ +0.09% +0.02% +0.00% / +0.13% -0.16% -0.09%] index_select linear : Elapsed 0.045 ms (4.514 ms / 100) 4.507 -> 4.506 ( -0.02%) [ +0.16% +0.09% +0.00% / +0.20% -0.02% +0.13%] index_select reverse : Elapsed 0.045 ms (4.514 ms / 100) 4.501 -> 4.496 ( -0.11%) [ +0.00% +0.07% +0.09% / -0.02% +0.02% -0.11%] index_select skip64 : Elapsed 0.045 ms (4.501 ms / 100) 4.498 -> 4.498 ( +0.00%) [ +0.07% +0.16% +0.00% / +0.00% +0.02% +0.11%] index_select skip256 : Elapsed 0.045 ms (4.501 ms / 100) 4.508 -> 4.504 ( -0.09%) [ +0.09% +0.02% +0.00% / -0.09% +0.07% +0.07%] index_select spread : Elapsed 0.045 ms (4.512 ms / 100) 4.506 -> 4.508 ( +0.04%) [ +0.00% +0.04% +0.07% / +0.04% +0.13% +0.27%] index_select strided 3 : Elapsed 0.045 ms (4.506 ms / 100) 4.497 -> 4.503 ( +0.13%) [ +0.00% +0.13% +0.27% / +0.16% +0.13% +0.22%] index_select strided 5 : Elapsed 0.045 ms (4.497 ms / 100) 4.507 -> 4.500 ( -0.16%) [ +0.00% +0.00% +0.04% / -0.16% +0.04% +0.16%] index_select strided 7 : Elapsed 0.045 ms (4.507 ms / 100) 4.501 -> 4.500 ( -0.02%) [ +0.11% +0.16% +0.00% / -0.02% +0.11% +0.09%] index_select strided 8 : Elapsed 0.045 ms (4.506 ms / 100) 4.504 -> 4.496 ( -0.18%) [ +0.09% +0.00% +0.00% / -0.07% -0.18% -0.13%] index_select strided 16 : Elapsed 0.045 ms (4.508 ms / 100) 4.507 -> 4.510 ( +0.07%) [ +0.04% +0.18% +0.00% / +0.16% +0.07% +0.09%] index_select random : Elapsed 0.045 ms (4.509 ms / 100) 4.510 -> 4.503 ( -0.16%) [ +0.11% +0.00% +0.09% / +0.04% -0.16% -0.04%] index_select random_sorted : Elapsed 0.045 ms (4.515 ms / 100) B = [5, 40, 4, 16] (stride (640, 1, 3200, 40)) A = [5, 20, 4, 16] (stride (1, 320, 80, 5)) dim = 1 2.399 -> 2.409 ( +0.42%) [ +0.13% +0.08% +0.00% / +0.42% +0.54% +0.58%] index_add_ linear : Elapsed 0.024 ms (2.402 ms / 100) 2.407 -> 2.424 ( +0.71%) [ +0.00% +0.08% +0.21% / +0.71% +0.71% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.407 ms / 100) 2.405 -> 2.410 ( +0.21%) [ +0.04% +0.04% +0.00% / +0.29% +0.29% +0.21%] index_add_ reverse : Elapsed 0.024 ms (2.406 ms / 100) 2.410 -> 2.420 ( +0.41%) [ +0.00% +0.17% +0.08% / +0.54% +0.41% +0.46%] index_copy_ reverse : Elapsed 0.024 ms (2.410 ms / 100) 2.414 -> 2.422 ( +0.33%) [ +0.12% +0.12% +0.00% / +0.46% +0.46% +0.33%] index_add_ spread : Elapsed 0.024 ms (2.417 ms / 100) 2.427 -> 2.439 ( +0.49%) [ +0.21% +0.00% +0.08% / +0.58% +0.49% +0.54%] index_copy_ spread : Elapsed 0.024 ms (2.432 ms / 100) 2.414 -> 2.421 ( +0.29%) [ +0.08% +0.12% +0.00% / +0.46% +0.29% +0.37%] index_add_ strided 3 : Elapsed 0.024 ms (2.416 ms / 100) 2.425 -> 2.433 ( +0.33%) [ +0.00% +0.21% +0.12% / +0.37% +0.33% +0.66%] index_copy_ strided 3 : Elapsed 0.024 ms (2.425 ms / 100) 2.415 -> 2.423 ( +0.33%) [ +0.21% +0.17% +0.00% / +0.33% +0.66% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.427 -> 2.435 ( +0.33%) [ +0.00% +0.00% +0.08% / +0.49% +0.45% +0.33%] index_copy_ strided 7 : Elapsed 0.024 ms (2.427 ms / 100) 2.411 -> 2.422 ( +0.46%) [ +0.00% +0.25% +0.08% / +0.46% +0.83% +0.66%] index_add_ perm : Elapsed 0.024 ms (2.411 ms / 100) 2.427 -> 2.437 ( +0.41%) [ +0.08% +0.00% +0.16% / +0.41% +0.54% +0.58%] index_copy_ perm : Elapsed 0.024 ms (2.429 ms / 100) 2.411 -> 2.424 ( +0.54%) [ +0.25% +0.00% +0.00% / +0.54% +0.62% +0.62%] index_add_ perm_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.422 -> 2.440 ( +0.74%) [ +0.29% +0.33% +0.00% / +0.78% +0.78% +0.74%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.429 ms / 100) 4.435 -> 4.430 ( -0.11%) [ +0.09% +0.05% +0.00% / -0.05% +0.02% -0.11%] index_select const : Elapsed 0.044 ms (4.439 ms / 100) 4.434 -> 4.437 ( +0.07%) [ +0.00% +0.09% +0.16% / +0.23% +0.07% +0.09%] index_select wrap : Elapsed 0.044 ms (4.434 ms / 100) 4.440 -> 4.435 ( -0.11%) [ +0.00% +0.00% +0.07% / -0.11% -0.05% +0.09%] index_select linear : Elapsed 0.044 ms (4.440 ms / 100) 4.430 -> 4.438 ( +0.18%) [ +0.20% +0.00% +0.23% / +0.29% +0.18% +0.34%] index_select reverse : Elapsed 0.044 ms (4.439 ms / 100) 4.436 -> 4.430 ( -0.14%) [ +0.00% +0.02% +0.00% / +0.05% -0.05% -0.14%] index_select skip64 : Elapsed 0.044 ms (4.436 ms / 100) 4.430 -> 4.438 ( +0.18%) [ +0.00% +0.11% +0.05% / +0.23% +0.25% +0.18%] index_select skip256 : Elapsed 0.044 ms (4.430 ms / 100) 4.435 -> 4.442 ( +0.16%) [ +0.00% +0.05% +0.18% / +0.16% +0.20% +0.23%] index_select spread : Elapsed 0.044 ms (4.435 ms / 100) 4.434 -> 4.438 ( +0.09%) [ +0.11% +0.09% +0.00% / +0.36% +0.09% +0.14%] index_select strided 3 : Elapsed 0.044 ms (4.439 ms / 100) 4.439 -> 4.435 ( -0.09%) [ +0.00% +0.07% +0.00% / -0.09% -0.07% +0.00%] index_select strided 5 : Elapsed 0.044 ms (4.439 ms / 100) 4.435 -> 4.437 ( +0.05%) [ +0.11% +0.00% +0.05% / +0.05% +0.25% +0.07%] index_select strided 7 : Elapsed 0.044 ms (4.440 ms / 100) 4.430 -> 4.427 ( -0.07%) [ +0.14% +0.00% +0.23% / -0.05% -0.07% +0.16%] index_select strided 8 : Elapsed 0.044 ms (4.436 ms / 100) 4.432 -> 4.432 ( +0.00%) [ +0.02% +0.00% +0.09% / +0.00% +0.11% +0.00%] index_select strided 16 : Elapsed 0.044 ms (4.433 ms / 100) 4.433 -> 4.438 ( +0.11%) [ +0.09% +0.00% +0.09% / +0.11% +0.20% +0.25%] index_select random : Elapsed 0.044 ms (4.437 ms / 100) 4.434 -> 4.441 ( +0.16%) [ +0.14% +0.00% +0.02% / +0.16% +0.18% +0.18%] index_select random_sorted : Elapsed 0.044 ms (4.440 ms / 100) B = [5, 40, 4, 16] (stride (16, 80, 3200, 1)) A = [5, 20, 4, 16] (stride (80, 1, 20, 400)) dim = 1 2.453 -> 2.463 ( +0.41%) [ +0.00% +0.16% +0.00% / +0.41% +0.65% +0.61%] index_add_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.446 -> 2.456 ( +0.41%) [ +0.12% +0.00% +0.16% / +0.41% +0.78% +0.78%] index_copy_ linear : Elapsed 0.024 ms (2.449 ms / 100) 2.446 -> 2.458 ( +0.49%) [ +0.04% +0.00% +0.04% / +0.49% +0.90% +1.06%] index_add_ reverse : Elapsed 0.024 ms (2.447 ms / 100) 2.439 -> 2.455 ( +0.66%) [ +0.04% +0.00% +0.21% / +0.66% +1.15% +1.27%] index_copy_ reverse : Elapsed 0.024 ms (2.440 ms / 100) 2.442 -> 2.455 ( +0.53%) [ +0.08% +0.08% +0.00% / +0.53% +1.15% +1.19%] index_add_ spread : Elapsed 0.024 ms (2.444 ms / 100) 2.439 -> 2.449 ( +0.41%) [ +0.00% +0.04% +0.04% / +0.41% +1.03% +1.07%] index_copy_ spread : Elapsed 0.024 ms (2.439 ms / 100) 2.445 -> 2.464 ( +0.78%) [ +0.33% +0.00% +0.37% / +0.78% +0.90% +0.90%] index_add_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.442 -> 2.455 ( +0.53%) [ +0.12% +0.00% +0.20% / +0.53% +0.90% +0.78%] index_copy_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.450 -> 2.462 ( +0.49%) [ +0.20% +0.04% +0.00% / +0.49% +0.65% +0.73%] index_add_ strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.445 -> 2.458 ( +0.53%) [ +0.00% +0.04% +0.04% / +0.53% +0.65% +0.65%] index_copy_ strided 7 : Elapsed 0.024 ms (2.445 ms / 100) 2.448 -> 2.457 ( +0.37%) [ +0.20% +0.16% +0.00% / +0.65% +0.37% +0.61%] index_add_ perm : Elapsed 0.025 ms (2.453 ms / 100) 2.447 -> 2.451 ( +0.16%) [ +0.04% +0.12% +0.00% / +0.41% +0.41% +0.16%] index_copy_ perm : Elapsed 0.024 ms (2.448 ms / 100) 2.453 -> 2.458 ( +0.20%) [ +0.00% +0.04% +0.16% / +0.41% +0.33% +0.20%] index_add_ perm_sorted : Elapsed 0.025 ms (2.453 ms / 100) 2.447 -> 2.456 ( +0.37%) [ +0.00% +0.12% +0.12% / +0.45% +0.37% +0.65%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.447 ms / 100) 4.501 -> 4.499 ( -0.04%) [ +0.13% +0.00% +0.09% / +0.04% -0.04% +0.13%] index_select const : Elapsed 0.045 ms (4.507 ms / 100) 4.508 -> 4.507 ( -0.02%) [ +0.04% +0.00% +0.07% / +0.07% +0.09% -0.02%] index_select wrap : Elapsed 0.045 ms (4.510 ms / 100) 4.508 -> 4.504 ( -0.09%) [ +0.04% +0.00% +0.04% / +0.13% +0.11% -0.09%] index_select linear : Elapsed 0.045 ms (4.510 ms / 100) 4.516 -> 4.510 ( -0.13%) [ +0.00% +0.11% +0.07% / +0.11% -0.04% -0.13%] index_select reverse : Elapsed 0.045 ms (4.516 ms / 100) 4.505 -> 4.506 ( +0.02%) [ +0.11% +0.00% +0.02% / +0.22% +0.02% +0.09%] index_select skip64 : Elapsed 0.045 ms (4.510 ms / 100) 4.503 -> 4.502 ( -0.02%) [ +0.11% +0.09% +0.00% / +0.09% +0.04% -0.02%] index_select skip256 : Elapsed 0.045 ms (4.508 ms / 100) 4.507 -> 4.509 ( +0.04%) [ +0.00% +0.16% +0.02% / +0.20% +0.04% +0.27%] index_select spread : Elapsed 0.045 ms (4.507 ms / 100) 4.505 -> 4.509 ( +0.09%) [ +0.20% +0.20% +0.00% / +0.18% +0.09% +0.22%] index_select strided 3 : Elapsed 0.045 ms (4.514 ms / 100) 4.511 -> 4.513 ( +0.04%) [ +0.09% +0.00% +0.07% / +0.04% +0.22% +0.09%] index_select strided 5 : Elapsed 0.045 ms (4.515 ms / 100) 4.503 -> 4.506 ( +0.07%) [ +0.00% +0.20% +0.07% / +0.07% +0.18% +0.27%] index_select strided 7 : Elapsed 0.045 ms (4.503 ms / 100) 4.508 -> 4.507 ( -0.02%) [ +0.00% +0.07% +0.00% / +0.04% -0.02% +0.11%] index_select strided 8 : Elapsed 0.045 ms (4.508 ms / 100) 4.505 -> 4.508 ( +0.07%) [ +0.00% +0.22% +0.20% / +0.20% +0.07% +0.07%] index_select strided 16 : Elapsed 0.045 ms (4.505 ms / 100) 4.510 -> 4.506 ( -0.09%) [ +0.22% +0.04% +0.00% / +0.02% -0.09% +0.02%] index_select random : Elapsed 0.045 ms (4.520 ms / 100) 4.513 -> 4.510 ( -0.07%) [ +0.13% +0.13% +0.00% / +0.07% -0.07% -0.02%] index_select random_sorted : Elapsed 0.045 ms (4.519 ms / 100) B = [5, 40, 4, 16] (stride (16, 80, 3200, 1)) A = [5, 20, 4, 16] (stride (20, 1, 100, 400)) dim = 1 2.452 -> 2.462 ( +0.41%) [ +0.04% +0.04% +0.00% / +0.41% +0.45% +0.49%] index_add_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.444 -> 2.455 ( +0.45%) [ +0.00% +0.12% +0.16% / +0.45% +0.90% +0.90%] index_copy_ linear : Elapsed 0.024 ms (2.444 ms / 100) 2.455 -> 2.462 ( +0.29%) [ +0.00% +0.00% +0.00% / +0.29% +0.53% +0.41%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.447 -> 2.460 ( +0.53%) [ +0.00% +0.00% +0.16% / +0.53% +0.57% +0.61%] index_copy_ reverse : Elapsed 0.024 ms (2.447 ms / 100) 2.451 -> 2.465 ( +0.57%) [ +0.00% +0.16% +0.00% / +0.61% +0.57% +0.61%] index_add_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.446 -> 2.455 ( +0.37%) [ +0.12% +0.00% +0.04% / +0.37% +0.49% +0.61%] index_copy_ spread : Elapsed 0.024 ms (2.449 ms / 100) 2.449 -> 2.463 ( +0.57%) [ +0.00% +0.33% +0.08% / +0.61% +0.57% +0.65%] index_add_ strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.444 -> 2.455 ( +0.45%) [ +0.00% +0.04% +0.16% / +0.45% +0.61% +0.65%] index_copy_ strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.451 -> 2.462 ( +0.45%) [ +0.08% +0.00% +0.08% / +0.53% +0.49% +0.45%] index_add_ strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.446 -> 2.455 ( +0.37%) [ +0.00% +0.12% +0.00% / +0.37% +0.70% +0.65%] index_copy_ strided 7 : Elapsed 0.024 ms (2.446 ms / 100) 2.450 -> 2.460 ( +0.41%) [ +0.04% +0.00% +0.00% / +0.41% +0.49% +0.73%] index_add_ perm : Elapsed 0.025 ms (2.451 ms / 100) 2.439 -> 2.452 ( +0.53%) [ +0.41% +0.00% +0.21% / +0.53% +0.98% +1.03%] index_copy_ perm : Elapsed 0.024 ms (2.449 ms / 100) 2.450 -> 2.464 ( +0.57%) [ +0.00% +0.08% +0.00% / +0.57% +0.57% +0.78%] index_add_ perm_sorted : Elapsed 0.024 ms (2.450 ms / 100) 2.439 -> 2.452 ( +0.53%) [ +0.00% +0.29% +0.29% / +0.53% +0.82% +0.94%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.439 ms / 100) 4.496 -> 4.496 ( +0.00%) [ +0.02% +0.00% +0.07% / +0.00% +0.29% +0.20%] index_select const : Elapsed 0.045 ms (4.497 ms / 100) 4.499 -> 4.505 ( +0.13%) [ +0.00% +0.00% +0.18% / +0.13% +0.33% +0.29%] index_select wrap : Elapsed 0.045 ms (4.499 ms / 100) 4.507 -> 4.503 ( -0.09%) [ +0.00% +0.07% +0.02% / -0.09% +0.18% +0.11%] index_select linear : Elapsed 0.045 ms (4.507 ms / 100) 4.502 -> 4.508 ( +0.13%) [ +0.11% +0.00% +0.24% / +0.13% +0.38% +0.20%] index_select reverse : Elapsed 0.045 ms (4.507 ms / 100) 4.489 -> 4.502 ( +0.29%) [ +0.18% +0.00% +0.22% / +0.29% +0.38% +0.42%] index_select skip64 : Elapsed 0.045 ms (4.497 ms / 100) 4.502 -> 4.502 ( +0.00%) [ +0.04% +0.02% +0.00% / +0.00% +0.09% +0.02%] index_select skip256 : Elapsed 0.045 ms (4.504 ms / 100) 4.502 -> 4.504 ( +0.04%) [ +0.00% +0.13% +0.02% / +0.04% +0.27% +0.29%] index_select spread : Elapsed 0.045 ms (4.502 ms / 100) 4.504 -> 4.503 ( -0.02%) [ +0.00% +0.07% +0.04% / -0.02% +0.22% +0.11%] index_select strided 3 : Elapsed 0.045 ms (4.504 ms / 100) 4.498 -> 4.503 ( +0.11%) [ +0.00% +0.20% +0.27% / +0.11% +0.36% +0.47%] index_select strided 5 : Elapsed 0.045 ms (4.498 ms / 100) 4.501 -> 4.504 ( +0.07%) [ +0.20% +0.13% +0.00% / +0.07% +0.27% +0.40%] index_select strided 7 : Elapsed 0.045 ms (4.510 ms / 100) 4.501 -> 4.490 ( -0.24%) [ +0.00% +0.04% +0.07% / -0.24% +0.36% +0.33%] index_select strided 8 : Elapsed 0.045 ms (4.501 ms / 100) 4.507 -> 4.497 ( -0.22%) [ +0.00% +0.04% +0.02% / -0.22% +0.27% +0.24%] index_select strided 16 : Elapsed 0.045 ms (4.507 ms / 100) 4.498 -> 4.501 ( +0.07%) [ +0.07% +0.24% +0.00% / +0.07% +0.27% +0.33%] index_select random : Elapsed 0.045 ms (4.501 ms / 100) 4.499 -> 4.502 ( +0.07%) [ +0.13% +0.16% +0.00% / +0.07% +0.33% +0.40%] index_select random_sorted : Elapsed 0.045 ms (4.505 ms / 100) B = [5, 40, 4, 16] (stride (160, 4, 1, 800)) A = [5, 20, 4, 16] (stride (4, 20, 1, 400)) dim = 1 2.456 -> 2.467 ( +0.45%) [ +0.04% +0.00% +0.00% / +0.45% +0.81% +0.77%] index_add_ linear : Elapsed 0.025 ms (2.457 ms / 100) 2.454 -> 2.465 ( +0.45%) [ +0.37% +0.16% +0.00% / +0.45% +0.98% +0.90%] index_copy_ linear : Elapsed 0.025 ms (2.463 ms / 100) 2.447 -> 2.461 ( +0.57%) [ +0.25% +0.04% +0.00% / +0.57% +1.19% +1.23%] index_add_ reverse : Elapsed 0.025 ms (2.453 ms / 100) 2.451 -> 2.467 ( +0.65%) [ +0.00% +0.04% +0.00% / +0.65% +1.02% +1.22%] index_copy_ reverse : Elapsed 0.025 ms (2.451 ms / 100) 2.468 -> 2.481 ( +0.53%) [ +0.12% +0.16% +0.00% / +0.53% +0.89% +0.93%] index_add_ spread : Elapsed 0.025 ms (2.471 ms / 100) 2.478 -> 2.496 ( +0.73%) [ +0.00% +0.36% +0.08% / +0.73% +1.29% +1.33%] index_copy_ spread : Elapsed 0.025 ms (2.478 ms / 100) 2.470 -> 2.481 ( +0.45%) [ +0.04% +0.04% +0.00% / +0.45% +0.69% +0.65%] index_add_ strided 3 : Elapsed 0.025 ms (2.471 ms / 100) 2.478 -> 2.488 ( +0.40%) [ +0.00% +0.04% +0.00% / +0.40% +0.81% +0.73%] index_copy_ strided 3 : Elapsed 0.025 ms (2.478 ms / 100) 2.472 -> 2.479 ( +0.28%) [ +0.00% +0.08% +0.12% / +0.28% +0.53% +0.61%] index_add_ strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.481 -> 2.493 ( +0.48%) [ +0.20% +0.00% +0.04% / +0.48% +0.77% +0.64%] index_copy_ strided 7 : Elapsed 0.025 ms (2.486 ms / 100) 2.469 -> 2.473 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.65% +0.16% +0.24%] index_add_ perm : Elapsed 0.025 ms (2.469 ms / 100) 2.473 -> 2.483 ( +0.40%) [ +0.00% +0.20% +0.04% / +0.73% +0.53% +0.40%] index_copy_ perm : Elapsed 0.025 ms (2.473 ms / 100) 2.469 -> 2.474 ( +0.20%) [ +0.00% +0.08% +0.04% / +0.65% +0.20% +0.20%] index_add_ perm_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.476 -> 2.481 ( +0.20%) [ +0.04% +0.00% +0.16% / +0.69% +0.20% +0.28%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.477 ms / 100) 4.497 -> 4.499 ( +0.04%) [ +0.02% +0.00% +0.04% / +0.04% +0.33% +0.11%] index_select const : Elapsed 0.045 ms (4.498 ms / 100) 4.522 -> 4.522 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.04% +0.00% +0.04%] index_select wrap : Elapsed 0.045 ms (4.522 ms / 100) 4.520 -> 4.521 ( +0.02%) [ +0.00% +0.13% +0.09% / +0.09% +0.02% +0.13%] index_select linear : Elapsed 0.045 ms (4.520 ms / 100) 4.520 -> 4.516 ( -0.09%) [ +0.11% +0.00% +0.07% / +0.13% -0.07% -0.09%] index_select reverse : Elapsed 0.045 ms (4.525 ms / 100) 4.500 -> 4.502 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.18% +0.07% +0.04%] index_select skip64 : Elapsed 0.045 ms (4.502 ms / 100) 4.498 -> 4.500 ( +0.04%) [ +0.22% +0.31% +0.00% / +0.04% +0.04% +0.18%] index_select skip256 : Elapsed 0.045 ms (4.508 ms / 100) 4.518 -> 4.520 ( +0.04%) [ +0.00% +0.02% +0.04% / +0.07% +0.13% +0.04%] index_select spread : Elapsed 0.045 ms (4.518 ms / 100) 4.514 -> 4.513 ( -0.02%) [ +0.16% +0.07% +0.00% / -0.02% +0.18% +0.16%] index_select strided 3 : Elapsed 0.045 ms (4.521 ms / 100) 4.504 -> 4.509 ( +0.11%) [ +0.11% +0.00% +0.04% / +0.13% +0.11% +0.22%] index_select strided 5 : Elapsed 0.045 ms (4.509 ms / 100) 4.513 -> 4.518 ( +0.11%) [ +0.00% +0.09% +0.13% / +0.13% +0.11% +0.11%] index_select strided 7 : Elapsed 0.045 ms (4.513 ms / 100) 4.505 -> 4.508 ( +0.07%) [ +0.00% +0.02% +0.04% / +0.20% +0.09% +0.07%] index_select strided 8 : Elapsed 0.045 ms (4.505 ms / 100) 4.501 -> 4.506 ( +0.11%) [ +0.00% +0.29% +0.16% / +0.22% +0.11% +0.16%] index_select strided 16 : Elapsed 0.045 ms (4.501 ms / 100) 4.521 -> 4.515 ( -0.13%) [ +0.09% +0.00% +0.00% / +0.07% -0.13% -0.11%] index_select random : Elapsed 0.045 ms (4.525 ms / 100) 4.517 -> 4.515 ( -0.04%) [ +0.11% +0.00% +0.00% / -0.04% +0.11% +0.07%] index_select random_sorted : Elapsed 0.045 ms (4.522 ms / 100) B = [5, 40, 4, 16] (stride (40, 1, 200, 800)) A = [5, 20, 4, 16] (stride (64, 320, 16, 1)) dim = 1 2.415 -> 2.428 ( +0.54%) [ +0.08% +0.17% +0.00% / +0.66% +0.54% +0.70%] index_add_ linear : Elapsed 0.024 ms (2.417 ms / 100) 2.410 -> 2.420 ( +0.41%) [ +0.37% +0.08% +0.00% / +0.41% +0.54% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.419 ms / 100) 2.419 -> 2.426 ( +0.29%) [ +0.21% +0.00% +0.12% / +0.29% +0.45% +0.58%] index_add_ reverse : Elapsed 0.024 ms (2.424 ms / 100) 2.417 -> 2.424 ( +0.29%) [ +0.00% +0.00% +0.08% / +0.29% +0.46% +0.33%] index_copy_ reverse : Elapsed 0.024 ms (2.417 ms / 100) 2.434 -> 2.439 ( +0.21%) [ +0.00% +0.21% +0.00% / +0.29% +0.21% +0.33%] index_add_ spread : Elapsed 0.024 ms (2.434 ms / 100) 2.433 -> 2.439 ( +0.25%) [ +0.00% +0.25% +0.00% / +0.25% +0.41% +0.41%] index_copy_ spread : Elapsed 0.024 ms (2.433 ms / 100) 2.432 -> 2.441 ( +0.37%) [ +0.16% +0.25% +0.00% / +0.37% +0.37% +0.37%] index_add_ strided 3 : Elapsed 0.024 ms (2.436 ms / 100) 2.429 -> 2.441 ( +0.49%) [ +0.00% +0.29% +0.08% / +0.62% +0.54% +0.49%] index_copy_ strided 3 : Elapsed 0.024 ms (2.429 ms / 100) 2.432 -> 2.440 ( +0.33%) [ +0.08% +0.04% +0.00% / +0.45% +0.37% +0.33%] index_add_ strided 7 : Elapsed 0.024 ms (2.434 ms / 100) 2.428 -> 2.440 ( +0.49%) [ +0.21% +0.00% +0.21% / +0.49% +0.70% +0.66%] index_copy_ strided 7 : Elapsed 0.024 ms (2.433 ms / 100) 2.430 -> 2.439 ( +0.37%) [ +0.16% +0.12% +0.00% / +0.37% +0.58% +0.45%] index_add_ perm : Elapsed 0.024 ms (2.434 ms / 100) 2.431 -> 2.442 ( +0.45%) [ +0.00% +0.00% +0.00% / +0.49% +0.45% +0.53%] index_copy_ perm : Elapsed 0.024 ms (2.431 ms / 100) 2.429 -> 2.442 ( +0.54%) [ +0.00% +0.00% +0.16% / +0.70% +0.54% +0.66%] index_add_ perm_sorted : Elapsed 0.024 ms (2.429 ms / 100) 2.429 -> 2.438 ( +0.37%) [ +0.00% +0.12% +0.12% / +0.37% +0.86% +0.54%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.429 ms / 100) 4.434 -> 4.434 ( +0.00%) [ +0.14% +0.02% +0.00% / +0.00% +0.16% +0.20%] index_select const : Elapsed 0.044 ms (4.440 ms / 100) 4.440 -> 4.443 ( +0.07%) [ +0.00% +0.05% +0.16% / +0.07% +0.14% +0.16%] index_select wrap : Elapsed 0.044 ms (4.440 ms / 100) 4.438 -> 4.442 ( +0.09%) [ +0.18% +0.11% +0.00% / +0.09% +0.16% +0.25%] index_select linear : Elapsed 0.044 ms (4.446 ms / 100) 4.437 -> 4.442 ( +0.11%) [ +0.18% +0.07% +0.00% / +0.11% +0.14% +0.18%] index_select reverse : Elapsed 0.044 ms (4.445 ms / 100) 4.437 -> 4.434 ( -0.07%) [ +0.00% +0.09% +0.00% / +0.00% -0.07% +0.16%] index_select skip64 : Elapsed 0.044 ms (4.437 ms / 100) 4.435 -> 4.433 ( -0.05%) [ +0.07% +0.00% +0.07% / +0.02% +0.20% -0.05%] index_select skip256 : Elapsed 0.044 ms (4.438 ms / 100) 4.434 -> 4.436 ( +0.05%) [ +0.18% +0.27% +0.00% / +0.05% +0.29% +0.18%] index_select spread : Elapsed 0.044 ms (4.442 ms / 100) 4.441 -> 4.440 ( -0.02%) [ +0.00% +0.14% +0.09% / -0.02% +0.16% +0.02%] index_select strided 3 : Elapsed 0.044 ms (4.441 ms / 100) 4.436 -> 4.429 ( -0.16%) [ +0.25% +0.05% +0.00% / -0.02% -0.16% +0.29%] index_select strided 5 : Elapsed 0.044 ms (4.447 ms / 100) 4.441 -> 4.444 ( +0.07%) [ +0.05% +0.14% +0.00% / +0.07% +0.16% +0.07%] index_select strided 7 : Elapsed 0.044 ms (4.443 ms / 100) 4.428 -> 4.434 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.23% +0.23% +0.14%] index_select strided 8 : Elapsed 0.044 ms (4.434 ms / 100) 4.435 -> 4.434 ( -0.02%) [ +0.14% +0.14% +0.00% / +0.05% -0.02% +0.23%] index_select strided 16 : Elapsed 0.044 ms (4.441 ms / 100) 4.437 -> 4.445 ( +0.18%) [ +0.00% +0.09% +0.14% / +0.36% +0.27% +0.18%] index_select random : Elapsed 0.044 ms (4.437 ms / 100) 4.441 -> 4.439 ( -0.05%) [ +0.00% +0.05% +0.07% / -0.05% +0.02% +0.00%] index_select random_sorted : Elapsed 0.044 ms (4.441 ms / 100) out_shape = [5, 20, 40, 16] in_shape = [5, 20, 4, 16] idx_dim = 2 B = [5, 20, 40, 16] (stride (12800, 640, 1, 40)) A = [5, 20, 4, 16] (stride (80, 4, 1, 400)) dim = 2 0.582 -> 0.582 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.17% +0.52%] index_add_ linear : Elapsed 0.006 ms (0.582 ms / 100) 0.596 -> 0.596 ( +0.00%) [ +0.00% +0.34% +0.50% / +0.00% +0.50% +0.17%] index_copy_ linear : Elapsed 0.006 ms (0.596 ms / 100) 0.579 -> 0.579 ( +0.00%) [ +0.00% +0.17% +0.17% / +0.00% +1.21% +1.38%] index_add_ reverse : Elapsed 0.006 ms (0.579 ms / 100) 0.590 -> 0.590 ( +0.00%) [ +0.00% +0.34% +0.17% / +0.00% +1.36% +1.36%] index_copy_ reverse : Elapsed 0.006 ms (0.590 ms / 100) 0.580 -> 0.582 ( +0.34%) [ +0.00% +0.00% +0.34% / +0.34% +1.03% +1.21%] index_add_ spread : Elapsed 0.006 ms (0.580 ms / 100) 0.591 -> 0.592 ( +0.17%) [ +0.17% +0.00% +0.34% / +0.17% +1.52% +1.18%] index_copy_ spread : Elapsed 0.006 ms (0.592 ms / 100) 0.582 -> 0.581 ( -0.17%) [ +0.00% +0.17% +0.00% / -0.17% +0.34% +0.52%] index_add_ strided 3 : Elapsed 0.006 ms (0.582 ms / 100) 0.595 -> 0.596 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.50% +0.50%] index_copy_ strided 3 : Elapsed 0.006 ms (0.595 ms / 100) 0.582 -> 0.580 ( -0.34%) [ +0.17% +0.00% +0.00% / -0.34% +0.17% +0.34%] index_add_ strided 7 : Elapsed 0.006 ms (0.583 ms / 100) 0.593 -> 0.592 ( -0.17%) [ +0.34% +0.34% +0.00% / -0.17% +0.51% +0.51%] index_copy_ strided 7 : Elapsed 0.006 ms (0.595 ms / 100) 0.581 -> 0.580 ( -0.17%) [ +1.20% +0.17% +0.00% / -0.17% +0.34% +0.34%] index_add_ perm : Elapsed 0.006 ms (0.588 ms / 100) 0.594 -> 0.594 ( +0.00%) [ +1.01% +0.17% +0.00% / +0.00% +2.19% +0.67%] index_copy_ perm : Elapsed 0.006 ms (0.600 ms / 100) 0.580 -> 0.581 ( +0.17%) [ +0.00% +0.34% +0.00% / +0.17% +0.52% +0.69%] index_add_ perm_sorted : Elapsed 0.006 ms (0.580 ms / 100) 0.592 -> 0.593 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.84% +1.18%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.593 ms / 100) 5.131 -> 4.968 ( -3.18%) [ +0.00% +0.14% +0.18% / -3.08% -3.18% -3.00%] index_select const : Elapsed 0.051 ms (5.131 ms / 100) 5.129 -> 4.974 ( -3.02%) [ +0.00% +0.02% +0.14% / -2.79% -2.94% -3.02%] index_select wrap : Elapsed 0.051 ms (5.129 ms / 100) 5.126 -> 4.968 ( -3.08%) [ +0.00% +0.33% +0.14% / -2.81% -3.08% -3.08%] index_select linear : Elapsed 0.051 ms (5.126 ms / 100) 5.122 -> 4.973 ( -2.91%) [ +0.00% +0.10% +0.20% / -2.91% -2.75% -2.83%] index_select reverse : Elapsed 0.051 ms (5.122 ms / 100) 5.121 -> 4.967 ( -3.01%) [ +0.23% +0.00% +0.21% / -3.01% -2.83% -2.48%] index_select skip64 : Elapsed 0.051 ms (5.133 ms / 100) 5.118 -> 4.961 ( -3.07%) [ +0.16% +0.00% +0.29% / -3.07% -2.50% -2.33%] index_select skip256 : Elapsed 0.051 ms (5.126 ms / 100) 5.122 -> 4.969 ( -2.99%) [ +0.00% +0.00% +0.10% / -2.99% -2.69% -2.87%] index_select spread : Elapsed 0.051 ms (5.122 ms / 100) 5.126 -> 4.962 ( -3.20%) [ +0.16% +0.00% +0.00% / -3.20% -2.87% -2.93%] index_select strided 3 : Elapsed 0.051 ms (5.134 ms / 100) 5.124 -> 4.973 ( -2.95%) [ +0.10% +0.00% +0.14% / -2.95% -2.58% -2.22%] index_select random : Elapsed 0.051 ms (5.129 ms / 100) 5.116 -> 4.964 ( -2.97%) [ +0.22% +0.00% +0.45% / -2.97% -2.64% -2.56%] index_select random_sorted : Elapsed 0.051 ms (5.127 ms / 100) B = [5, 20, 40, 16] (stride (12800, 16, 320, 1)) A = [5, 20, 4, 16] (stride (1, 320, 5, 20)) dim = 2 1.345 -> 1.344 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.59% +0.67%] index_add_ linear : Elapsed 0.013 ms (1.346 ms / 100) 1.304 -> 1.304 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.69% +1.07%] index_copy_ linear : Elapsed 0.013 ms (1.304 ms / 100) 1.345 -> 1.345 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.45% +0.59%] index_add_ reverse : Elapsed 0.013 ms (1.346 ms / 100) 1.305 -> 1.305 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.92% +0.92%] index_copy_ reverse : Elapsed 0.013 ms (1.307 ms / 100) 1.344 -> 1.345 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.60% +0.52%] index_add_ spread : Elapsed 0.013 ms (1.344 ms / 100) 1.307 -> 1.309 ( +0.15%) [ +0.38% +0.38% +0.00% / +0.15% +0.61% +0.92%] index_copy_ spread : Elapsed 0.013 ms (1.312 ms / 100) 1.355 -> 1.356 ( +0.07%) [ +0.00% +0.07% +0.15% / +0.07% +0.52% +0.59%] index_add_ strided 3 : Elapsed 0.014 ms (1.355 ms / 100) 1.316 -> 1.317 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.53% +0.61%] index_copy_ strided 3 : Elapsed 0.013 ms (1.317 ms / 100) 1.355 -> 1.355 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.37% +0.37%] index_add_ strided 7 : Elapsed 0.014 ms (1.355 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.08% +0.15% +0.00% / +0.00% +0.38% +0.38%] index_copy_ strided 7 : Elapsed 0.013 ms (1.317 ms / 100) 1.344 -> 1.344 ( +0.00%) [ +0.15% +0.07% +0.00% / +0.00% +0.45% +0.74%] index_add_ perm : Elapsed 0.013 ms (1.346 ms / 100) 1.309 -> 1.316 ( +0.53%) [ +0.00% +0.08% +0.00% / +0.53% +0.61% +0.53%] index_copy_ perm : Elapsed 0.013 ms (1.309 ms / 100) 1.346 -> 1.346 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.45% +0.37%] index_add_ perm_sorted : Elapsed 0.013 ms (1.347 ms / 100) 1.305 -> 1.307 ( +0.15%) [ +0.15% +0.31% +0.00% / +0.15% +0.54% +0.69%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.307 ms / 100) 9.138 -> 9.118 ( -0.22%) [ +0.11% +0.18% +0.00% / +0.05% -0.22% -0.20%] index_select const : Elapsed 0.091 ms (9.148 ms / 100) 9.140 -> 9.129 ( -0.12%) [ +0.20% +0.00% +0.12% / +0.12% +0.02% -0.12%] index_select wrap : Elapsed 0.092 ms (9.158 ms / 100) 9.129 -> 9.130 ( +0.01%) [ +0.15% +0.38% +0.00% / +0.23% +0.01% +0.25%] index_select linear : Elapsed 0.091 ms (9.143 ms / 100) 9.148 -> 9.124 ( -0.26%) [ +0.14% +0.08% +0.00% / +0.19% -0.26% -0.25%] index_select reverse : Elapsed 0.092 ms (9.161 ms / 100) 9.139 -> 9.125 ( -0.15%) [ +0.03% +0.11% +0.00% / +0.01% +0.15% -0.15%] index_select skip64 : Elapsed 0.091 ms (9.142 ms / 100) 9.140 -> 9.154 ( +0.15%) [ +0.20% +0.02% +0.00% / +0.23% +0.27% +0.15%] index_select skip256 : Elapsed 0.092 ms (9.158 ms / 100) 9.150 -> 9.125 ( -0.27%) [ +0.23% +0.31% +0.00% / +0.22% -0.11% -0.27%] index_select spread : Elapsed 0.092 ms (9.171 ms / 100) 9.155 -> 9.129 ( -0.28%) [ +0.05% +0.22% +0.00% / -0.10% -0.28% -0.09%] index_select strided 3 : Elapsed 0.092 ms (9.160 ms / 100) 9.134 -> 9.136 ( +0.02%) [ +0.00% +0.19% +0.20% / +0.02% +0.03% +0.14%] index_select random : Elapsed 0.091 ms (9.134 ms / 100) 9.143 -> 9.145 ( +0.02%) [ +0.00% +0.15% +0.00% / +0.02% +0.16% +0.10%] index_select random_sorted : Elapsed 0.091 ms (9.143 ms / 100) B = [5, 20, 40, 16] (stride (320, 16, 1600, 1)) A = [5, 20, 4, 16] (stride (1, 320, 80, 5)) dim = 2 1.229 -> 1.231 ( +0.16%) [ +0.24% +0.08% +0.00% / +0.16% +0.49% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.232 ms / 100) 1.190 -> 1.192 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.50% +0.59%] index_copy_ linear : Elapsed 0.012 ms (1.191 ms / 100) 1.229 -> 1.229 ( +0.00%) [ +0.24% +0.08% +0.00% / +0.00% +0.57% +0.49%] index_add_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.59% +0.59%] index_copy_ reverse : Elapsed 0.012 ms (1.190 ms / 100) 1.230 -> 1.230 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.49% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.230 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.25% +0.08% +0.00% / +0.00% +0.67% +0.67%] index_copy_ spread : Elapsed 0.012 ms (1.193 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.24% +0.00% / +0.00% +0.81% +0.73%] index_add_ strided 3 : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.42% +0.00% / +0.00% +0.84% +0.76%] index_copy_ strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.228 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.73% +0.73%] index_add_ strided 7 : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.190 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.93% +0.76%] index_copy_ strided 7 : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.230 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.57% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.230 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.67% +0.76%] index_copy_ perm : Elapsed 0.012 ms (1.190 ms / 100) 1.228 -> 1.228 ( +0.00%) [ +0.08% +0.24% +0.00% / +0.00% +0.65% +0.65%] index_add_ perm_sorted : Elapsed 0.012 ms (1.229 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.67%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.190 ms / 100) 8.667 -> 8.672 ( +0.06%) [ +0.00% +0.10% +0.18% / +0.38% +0.07% +0.06%] index_select const : Elapsed 0.087 ms (8.667 ms / 100) 8.698 -> 8.672 ( -0.30%) [ +0.00% +0.08% +0.13% / -0.09% -0.30% -0.16%] index_select wrap : Elapsed 0.087 ms (8.698 ms / 100) 8.675 -> 8.688 ( +0.15%) [ +0.00% +0.14% +0.12% / +0.30% +0.15% +0.21%] index_select linear : Elapsed 0.087 ms (8.675 ms / 100) 8.684 -> 8.681 ( -0.03%) [ +0.00% +0.21% +0.07% / +0.01% -0.02% -0.03%] index_select reverse : Elapsed 0.087 ms (8.684 ms / 100) 8.684 -> 8.674 ( -0.12%) [ +0.03% +0.00% +0.00% / +0.07% +0.03% -0.12%] index_select skip64 : Elapsed 0.087 ms (8.687 ms / 100) 8.674 -> 8.672 ( -0.02%) [ +0.00% +0.01% +0.14% / +0.33% +0.08% -0.02%] index_select skip256 : Elapsed 0.087 ms (8.674 ms / 100) 8.687 -> 8.688 ( +0.01%) [ +0.14% +0.02% +0.00% / +0.10% +0.10% +0.01%] index_select spread : Elapsed 0.087 ms (8.699 ms / 100) 8.698 -> 8.682 ( -0.18%) [ +0.00% +0.06% +0.05% / -0.08% -0.18% +0.14%] index_select strided 3 : Elapsed 0.087 ms (8.698 ms / 100) 8.688 -> 8.686 ( -0.02%) [ +0.00% +0.06% +0.26% / -0.02% +0.13% +0.06%] index_select random : Elapsed 0.087 ms (8.688 ms / 100) 8.682 -> 8.686 ( +0.05%) [ +0.00% +0.26% +0.09% / +0.15% +0.09% +0.05%] index_select random_sorted : Elapsed 0.087 ms (8.682 ms / 100) B = [5, 20, 40, 16] (stride (320, 1, 1600, 20)) A = [5, 20, 4, 16] (stride (64, 320, 16, 1)) dim = 2 1.309 -> 1.309 ( +0.00%) [ +0.31% +0.23% +0.00% / +0.00% +0.76% +0.69%] index_add_ linear : Elapsed 0.013 ms (1.313 ms / 100) 1.270 -> 1.271 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.71% +0.79%] index_copy_ linear : Elapsed 0.013 ms (1.271 ms / 100) 1.312 -> 1.310 ( -0.15%) [ +0.00% +0.08% +0.00% / -0.15% +0.46% +0.53%] index_add_ reverse : Elapsed 0.013 ms (1.312 ms / 100) 1.272 -> 1.271 ( -0.08%) [ +0.08% +0.00% +0.16% / -0.08% +0.39% +0.39%] index_copy_ reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.311 -> 1.311 ( +0.00%) [ +0.23% +0.15% +0.00% / +0.00% +0.61% +0.53%] index_add_ spread : Elapsed 0.013 ms (1.314 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.271 ms / 100) 1.311 -> 1.310 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.46% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.311 ms / 100) 1.270 -> 1.271 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.63% +0.47%] index_copy_ strided 3 : Elapsed 0.013 ms (1.271 ms / 100) 1.308 -> 1.310 ( +0.15%) [ +0.15% +0.23% +0.00% / +0.15% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.013 ms (1.310 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.24% +0.08% +0.00% / +0.00% +0.71% +0.63%] index_copy_ strided 7 : Elapsed 0.013 ms (1.272 ms / 100) 1.313 -> 1.310 ( -0.23%) [ +0.00% +0.00% +0.23% / -0.23% +0.46% +0.61%] index_add_ perm : Elapsed 0.013 ms (1.313 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.31% +0.08% / +0.00% +0.55% +0.63%] index_copy_ perm : Elapsed 0.013 ms (1.271 ms / 100) 1.311 -> 1.311 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_add_ perm_sorted : Elapsed 0.013 ms (1.311 ms / 100) 1.270 -> 1.271 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.47% +0.55%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.271 ms / 100) 9.114 -> 9.124 ( +0.11%) [ +0.31% +0.19% +0.00% / +0.11% +0.11% +0.12%] index_select const : Elapsed 0.091 ms (9.142 ms / 100) 9.162 -> 9.149 ( -0.14%) [ +0.00% +0.27% +0.02% / +0.05% -0.14% +0.05%] index_select wrap : Elapsed 0.092 ms (9.162 ms / 100) 9.136 -> 9.145 ( +0.10%) [ +0.00% +0.10% +0.19% / +0.14% +0.10% +0.35%] index_select linear : Elapsed 0.091 ms (9.136 ms / 100) 9.158 -> 9.156 ( -0.02%) [ +0.15% +0.00% +0.14% / +0.07% +0.20% -0.02%] index_select reverse : Elapsed 0.092 ms (9.172 ms / 100) 9.128 -> 9.124 ( -0.04%) [ +0.04% +0.00% +0.15% / -0.04% -0.03% +0.04%] index_select skip64 : Elapsed 0.091 ms (9.132 ms / 100) 9.125 -> 9.129 ( +0.04%) [ +0.27% +0.18% +0.00% / +0.19% +0.04% +0.09%] index_select skip256 : Elapsed 0.092 ms (9.150 ms / 100) 9.164 -> 9.144 ( -0.22%) [ +0.05% +0.00% +0.03% / +0.09% -0.22% -0.02%] index_select spread : Elapsed 0.092 ms (9.169 ms / 100) 9.150 -> 9.135 ( -0.16%) [ +0.00% +0.17% +0.28% / +0.17% -0.16% -0.09%] index_select strided 3 : Elapsed 0.091 ms (9.150 ms / 100) 9.163 -> 9.136 ( -0.29%) [ +0.00% +0.03% +0.05% / +0.14% -0.29% +0.12%] index_select random : Elapsed 0.092 ms (9.163 ms / 100) 9.161 -> 9.149 ( -0.13%) [ +0.00% +0.14% +0.01% / +0.14% +0.07% -0.13%] index_select random_sorted : Elapsed 0.092 ms (9.161 ms / 100) B = [5, 20, 40, 16] (stride (16, 80, 1600, 1)) A = [5, 20, 4, 16] (stride (1280, 64, 1, 4)) dim = 2 1.231 -> 1.231 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.49% +0.49%] index_add_ linear : Elapsed 0.012 ms (1.233 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.50% +0.50%] index_copy_ linear : Elapsed 0.012 ms (1.193 ms / 100) 1.230 -> 1.231 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.57% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.231 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.17% +0.59% +0.59%] index_copy_ reverse : Elapsed 0.012 ms (1.191 ms / 100) 1.232 -> 1.231 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.41% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.76%] index_copy_ spread : Elapsed 0.012 ms (1.192 ms / 100) 1.230 -> 1.230 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.57% +0.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.230 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.17% +0.17% / +0.08% +0.76% +0.84%] index_copy_ strided 3 : Elapsed 0.012 ms (1.191 ms / 100) 1.231 -> 1.230 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.49% +0.57%] index_add_ strided 7 : Elapsed 0.012 ms (1.232 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.67% +0.67%] index_copy_ strided 7 : Elapsed 0.012 ms (1.191 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.49% +0.49%] index_add_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.67% +0.67%] index_copy_ perm : Elapsed 0.012 ms (1.191 ms / 100) 1.230 -> 1.230 ( +0.00%) [ +0.16% +0.00% +0.08% / +0.00% +0.65% +0.57%] index_add_ perm_sorted : Elapsed 0.012 ms (1.232 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.67% +0.67%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.191 ms / 100) 8.724 -> 8.737 ( +0.15%) [ +0.03% +0.06% +0.00% / +0.15% +0.15% +0.21%] index_select const : Elapsed 0.087 ms (8.727 ms / 100) 8.718 -> 8.730 ( +0.14%) [ +0.08% +0.00% +0.05% / +0.15% +0.14% +0.28%] index_select wrap : Elapsed 0.087 ms (8.725 ms / 100) 8.727 -> 8.728 ( +0.01%) [ +0.00% +0.02% +0.00% / +0.01% +0.23% +0.05%] index_select linear : Elapsed 0.087 ms (8.727 ms / 100) 8.717 -> 8.722 ( +0.06%) [ +0.03% +0.00% +0.05% / +0.06% +0.13% +0.31%] index_select reverse : Elapsed 0.087 ms (8.720 ms / 100) 8.712 -> 8.733 ( +0.24%) [ +0.32% +0.23% +0.00% / +0.24% +0.53% +0.34%] index_select skip64 : Elapsed 0.087 ms (8.740 ms / 100) 8.717 -> 8.717 ( +0.00%) [ +0.07% +0.00% +0.01% / +0.00% +0.31% +0.31%] index_select skip256 : Elapsed 0.087 ms (8.723 ms / 100) 8.728 -> 8.729 ( +0.01%) [ +0.26% +0.00% +0.14% / +0.15% +0.01% +0.09%] index_select spread : Elapsed 0.088 ms (8.751 ms / 100) 8.720 -> 8.729 ( +0.10%) [ +0.00% +0.03% +0.17% / +0.10% +0.22% +0.19%] index_select strided 3 : Elapsed 0.087 ms (8.720 ms / 100) 8.721 -> 8.716 ( -0.06%) [ +0.08% +0.00% +0.14% / -0.06% +0.26% +0.24%] index_select random : Elapsed 0.087 ms (8.728 ms / 100) 8.721 -> 8.707 ( -0.16%) [ +0.03% +0.00% +0.14% / -0.16% +0.24% +0.19%] index_select random_sorted : Elapsed 0.087 ms (8.724 ms / 100) B = [5, 20, 40, 16] (stride (1, 5, 1600, 100)) A = [5, 20, 4, 16] (stride (80, 4, 1, 400)) dim = 2 1.262 -> 1.266 ( +0.32%) [ +0.08% +0.00% +0.16% / +0.32% +0.48% +0.40%] index_add_ linear : Elapsed 0.013 ms (1.263 ms / 100) 1.229 -> 1.230 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.33% +0.08% +0.08%] index_copy_ linear : Elapsed 0.012 ms (1.229 ms / 100) 1.263 -> 1.263 ( +0.00%) [ +0.24% +0.24% +0.00% / +0.00% +0.32% +0.32%] index_add_ reverse : Elapsed 0.013 ms (1.266 ms / 100) 1.225 -> 1.226 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.41% +0.41%] index_copy_ reverse : Elapsed 0.012 ms (1.225 ms / 100) 1.262 -> 1.261 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.32% +0.40%] index_add_ spread : Elapsed 0.013 ms (1.262 ms / 100) 1.221 -> 1.222 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.66% +0.57%] index_copy_ spread : Elapsed 0.012 ms (1.221 ms / 100) 1.271 -> 1.273 ( +0.16%) [ +0.24% +0.16% +0.00% / +0.16% +0.39% +0.39%] index_add_ strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.239 -> 1.237 ( -0.16%) [ +0.08% +0.00% +0.08% / +0.16% -0.16% -0.08%] index_copy_ strided 3 : Elapsed 0.012 ms (1.240 ms / 100) 1.268 -> 1.269 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_add_ strided 7 : Elapsed 0.013 ms (1.270 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.49% +0.57%] index_copy_ strided 7 : Elapsed 0.012 ms (1.232 ms / 100) 1.260 -> 1.261 ( +0.08%) [ +0.00% +0.24% +0.08% / +0.08% +0.48% +0.48%] index_add_ perm : Elapsed 0.013 ms (1.260 ms / 100) 1.222 -> 1.221 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.41% +0.33%] index_copy_ perm : Elapsed 0.012 ms (1.222 ms / 100) 1.263 -> 1.266 ( +0.24%) [ +0.16% +0.16% +0.00% / +0.24% +0.48% +0.32%] index_add_ perm_sorted : Elapsed 0.013 ms (1.265 ms / 100) 1.225 -> 1.226 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.90% +1.06%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.226 ms / 100) 8.772 -> 8.772 ( +0.00%) [ +0.00% +0.02% +0.11% / +0.00% +0.16% +0.22%] index_select const : Elapsed 0.088 ms (8.772 ms / 100) 8.773 -> 8.768 ( -0.06%) [ +0.00% +0.00% +0.01% / +0.26% -0.06% -0.03%] index_select wrap : Elapsed 0.088 ms (8.773 ms / 100) 8.767 -> 8.781 ( +0.16%) [ +0.00% +0.09% +0.25% / +0.18% +0.19% +0.16%] index_select linear : Elapsed 0.088 ms (8.767 ms / 100) 8.777 -> 8.770 ( -0.08%) [ +0.02% +0.10% +0.00% / -0.08% +0.23% -0.02%] index_select reverse : Elapsed 0.088 ms (8.779 ms / 100) 8.769 -> 8.784 ( +0.17%) [ +0.08% +0.03% +0.00% / +0.17% +0.26% +0.21%] index_select skip64 : Elapsed 0.088 ms (8.776 ms / 100) 8.764 -> 8.776 ( +0.14%) [ +0.11% +0.14% +0.00% / +0.14% +0.22% +0.50%] index_select skip256 : Elapsed 0.088 ms (8.774 ms / 100) 8.767 -> 8.767 ( +0.00%) [ +0.19% +0.00% +0.10% / +0.19% +0.03% +0.00%] index_select spread : Elapsed 0.088 ms (8.784 ms / 100) 8.766 -> 8.771 ( +0.06%) [ +0.11% +0.00% +0.22% / +0.06% +0.16% +0.15%] index_select strided 3 : Elapsed 0.088 ms (8.776 ms / 100) 8.769 -> 8.783 ( +0.16%) [ +0.23% +0.07% +0.00% / +0.18% +0.16% +0.26%] index_select random : Elapsed 0.088 ms (8.789 ms / 100) 8.763 -> 8.775 ( +0.14%) [ +0.01% +0.10% +0.00% / +0.25% +0.29% +0.14%] index_select random_sorted : Elapsed 0.088 ms (8.764 ms / 100) out_shape = [5, 20, 4, 40] in_shape = [5, 20, 4, 16] idx_dim = 3 B = [5, 20, 4, 40] (stride (3200, 160, 1, 4)) A = [5, 20, 4, 16] (stride (320, 16, 1600, 1)) dim = 3 1.411 -> 1.411 ( +0.00%) [ +0.00% +0.21% +0.28% / +0.00% +0.57% +0.64%] index_add_ linear : Elapsed 0.014 ms (1.411 ms / 100) 1.361 -> 1.362 ( +0.07%) [ +0.00% +0.07% +0.59% / +0.07% +0.51% +0.59%] index_copy_ linear : Elapsed 0.014 ms (1.361 ms / 100) 1.411 -> 1.415 ( +0.28%) [ +0.00% +0.07% +0.21% / +0.28% +0.50% +0.35%] index_add_ reverse : Elapsed 0.014 ms (1.411 ms / 100) 1.361 -> 1.365 ( +0.29%) [ +0.00% +0.15% +0.07% / +0.29% +0.44% +0.37%] index_copy_ reverse : Elapsed 0.014 ms (1.361 ms / 100) 1.407 -> 1.411 ( +0.28%) [ +0.28% +0.28% +0.00% / +0.28% +0.64% +0.57%] index_add_ spread : Elapsed 0.014 ms (1.411 ms / 100) 1.358 -> 1.363 ( +0.37%) [ +0.29% +0.29% +0.00% / +0.37% +0.44% +0.44%] index_copy_ spread : Elapsed 0.014 ms (1.362 ms / 100) 1.411 -> 1.414 ( +0.21%) [ +0.07% +0.21% +0.00% / +0.21% +0.64% +0.28%] index_add_ strided 3 : Elapsed 0.014 ms (1.412 ms / 100) 1.359 -> 1.364 ( +0.37%) [ +0.00% +0.37% +0.22% / +0.37% +0.81% +0.37%] index_copy_ strided 3 : Elapsed 0.014 ms (1.359 ms / 100) 1.411 -> 1.410 ( -0.07%) [ +0.35% +0.14% +0.00% / -0.07% +0.28% +0.35%] index_add_ strided 7 : Elapsed 0.014 ms (1.416 ms / 100) 1.362 -> 1.362 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.15% +0.22%] index_copy_ strided 7 : Elapsed 0.014 ms (1.364 ms / 100) 1.413 -> 1.415 ( +0.14%) [ +0.28% +0.00% +0.21% / +0.21% +0.14% +0.28%] index_add_ perm : Elapsed 0.014 ms (1.417 ms / 100) 1.365 -> 1.364 ( -0.07%) [ +0.07% +0.00% +0.07% / +0.00% -0.07% +0.15%] index_copy_ perm : Elapsed 0.014 ms (1.366 ms / 100) 1.412 -> 1.417 ( +0.35%) [ +0.14% +0.64% +0.00% / +0.57% +0.50% +0.35%] index_add_ perm_sorted : Elapsed 0.014 ms (1.414 ms / 100) 1.364 -> 1.367 ( +0.22%) [ +0.00% +0.29% +0.00% / +0.22% +0.37% +0.22%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.364 ms / 100) 3.539 -> 3.533 ( -0.17%) [ +0.00% +0.06% +0.00% / +0.00% -0.17% -0.14%] index_select const : Elapsed 0.035 ms (3.539 ms / 100) 3.546 -> 3.539 ( -0.20%) [ +0.00% +0.11% +0.06% / -0.06% -0.20% -0.17%] index_select wrap : Elapsed 0.035 ms (3.546 ms / 100) 3.540 -> 3.539 ( -0.03%) [ +0.23% +0.00% +0.11% / -0.03% +0.48% +0.56%] index_select linear : Elapsed 0.035 ms (3.548 ms / 100) 3.546 -> 3.544 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.42% +0.28%] index_select reverse : Elapsed 0.035 ms (3.546 ms / 100) 3.540 -> 3.532 ( -0.23%) [ +0.11% +0.17% +0.00% / -0.06% -0.11% -0.23%] index_select skip64 : Elapsed 0.035 ms (3.544 ms / 100) 3.545 -> 3.534 ( -0.31%) [ +0.00% +0.03% +0.00% / -0.14% -0.31% -0.20%] index_select skip256 : Elapsed 0.035 ms (3.545 ms / 100) 3.549 -> 3.544 ( -0.14%) [ +0.00% +0.08% +0.11% / +0.08% -0.14% -0.03%] index_select spread : Elapsed 0.035 ms (3.549 ms / 100) 3.546 -> 3.544 ( -0.06%) [ +0.00% +0.00% +0.11% / +0.08% +0.03% -0.06%] index_select strided 3 : Elapsed 0.035 ms (3.546 ms / 100) 3.549 -> 3.543 ( -0.17%) [ +0.06% +0.06% +0.00% / +0.03% -0.17% -0.06%] index_select strided 5 : Elapsed 0.036 ms (3.551 ms / 100) 3.542 -> 3.541 ( -0.03%) [ +0.28% +0.00% +0.17% / +0.23% -0.03% -0.03%] index_select strided 7 : Elapsed 0.036 ms (3.552 ms / 100) 3.548 -> 3.538 ( -0.28%) [ +0.08% +0.08% +0.00% / -0.03% -0.28% -0.17%] index_select strided 8 : Elapsed 0.036 ms (3.551 ms / 100) 3.552 -> 3.541 ( -0.31%) [ +0.00% +0.03% +0.11% / +0.00% -0.31% -0.31%] index_select random : Elapsed 0.036 ms (3.552 ms / 100) 3.553 -> 3.533 ( -0.56%) [ +0.25% +0.00% +0.00% / -0.08% -0.56% -0.31%] index_select random_sorted : Elapsed 0.036 ms (3.562 ms / 100) B = [5, 20, 4, 40] (stride (3200, 40, 800, 1)) A = [5, 20, 4, 16] (stride (20, 1, 100, 400)) dim = 3 3.662 -> 3.664 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.66% +0.63%] index_add_ linear : Elapsed 0.037 ms (3.664 ms / 100) 3.532 -> 3.534 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.76% +0.68%] index_copy_ linear : Elapsed 0.035 ms (3.533 ms / 100) 3.663 -> 3.663 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.74% +0.74%] index_add_ reverse : Elapsed 0.037 ms (3.664 ms / 100) 3.536 -> 3.535 ( -0.03%) [ +0.03% +0.00% +0.06% / -0.03% +0.79% +0.74%] index_copy_ reverse : Elapsed 0.035 ms (3.537 ms / 100) 3.655 -> 3.656 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.85% +0.88%] index_add_ spread : Elapsed 0.037 ms (3.656 ms / 100) 3.533 -> 3.534 ( +0.03%) [ +0.00% +0.06% +0.06% / +0.03% +0.93% +0.91%] index_copy_ spread : Elapsed 0.035 ms (3.533 ms / 100) 3.662 -> 3.664 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.76% +0.79%] index_add_ strided 3 : Elapsed 0.037 ms (3.664 ms / 100) 3.531 -> 3.531 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.93% +0.91%] index_copy_ strided 3 : Elapsed 0.035 ms (3.531 ms / 100) 3.662 -> 3.662 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.037 ms (3.663 ms / 100) 3.536 -> 3.537 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.76% +0.85%] index_copy_ strided 7 : Elapsed 0.035 ms (3.536 ms / 100) 3.661 -> 3.665 ( +0.11%) [ +0.03% +0.00% +0.14% / +0.11% +0.63% +0.66%] index_add_ perm : Elapsed 0.037 ms (3.662 ms / 100) 3.530 -> 3.532 ( +0.06%) [ +0.00% +0.06% +0.03% / +0.06% +0.76% +0.76%] index_copy_ perm : Elapsed 0.035 ms (3.530 ms / 100) 3.664 -> 3.663 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.63% +0.66%] index_add_ perm_sorted : Elapsed 0.037 ms (3.664 ms / 100) 3.535 -> 3.536 ( +0.03%) [ +0.00% +0.08% +0.03% / +0.03% +0.65% +0.68%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.535 ms / 100) 5.480 -> 5.481 ( +0.02%) [ +0.16% +0.24% +0.00% / +0.07% +0.02% +0.16%] index_select const : Elapsed 0.055 ms (5.489 ms / 100) 5.494 -> 5.497 ( +0.05%) [ +0.13% +0.00% +0.13% / +0.05% +0.13% +0.11%] index_select wrap : Elapsed 0.055 ms (5.501 ms / 100) 5.492 -> 5.493 ( +0.02%) [ +0.04% +0.00% +0.18% / +0.16% +0.02% +0.09%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.500 -> 5.489 ( -0.20%) [ +0.00% +0.02% +0.05% / +0.00% -0.20% -0.09%] index_select reverse : Elapsed 0.055 ms (5.500 ms / 100) 5.482 -> 5.478 ( -0.07%) [ +0.02% +0.00% +0.04% / -0.07% +0.11% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.484 -> 5.481 ( -0.05%) [ +0.00% +0.05% +0.04% / -0.05% +0.02% +0.11%] index_select skip256 : Elapsed 0.055 ms (5.484 ms / 100) 5.495 -> 5.491 ( -0.07%) [ +0.05% +0.00% +0.02% / +0.11% -0.07% -0.07%] index_select spread : Elapsed 0.055 ms (5.498 ms / 100) 5.495 -> 5.492 ( -0.05%) [ +0.09% +0.00% +0.09% / +0.15% -0.05% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.500 ms / 100) 5.496 -> 5.498 ( +0.04%) [ +0.16% +0.02% +0.00% / +0.11% +0.04% +0.07%] index_select strided 5 : Elapsed 0.055 ms (5.505 ms / 100) 5.496 -> 5.496 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.09% +0.00% +0.02%] index_select strided 7 : Elapsed 0.055 ms (5.498 ms / 100) 5.482 -> 5.488 ( +0.11%) [ +0.00% +0.04% +0.09% / +0.11% +0.24% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.482 ms / 100) 5.491 -> 5.496 ( +0.09%) [ +0.00% +0.11% +0.16% / +0.09% +0.13% +0.11%] index_select random : Elapsed 0.055 ms (5.491 ms / 100) 5.494 -> 5.497 ( +0.05%) [ +0.00% +0.15% +0.13% / +0.07% +0.05% +0.05%] index_select random_sorted : Elapsed 0.055 ms (5.494 ms / 100) B = [5, 20, 4, 40] (stride (800, 1, 4000, 20)) A = [5, 20, 4, 16] (stride (1, 320, 80, 5)) dim = 3 4.104 -> 4.107 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.07% +0.39% +0.34%] index_add_ linear : Elapsed 0.041 ms (4.106 ms / 100) 3.931 -> 3.931 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.43% +0.36%] index_copy_ linear : Elapsed 0.039 ms (3.931 ms / 100) 4.117 -> 4.118 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.51% +0.49%] index_add_ reverse : Elapsed 0.041 ms (4.118 ms / 100) 3.940 -> 3.939 ( -0.03%) [ +0.13% +0.00% +0.15% / -0.03% +0.48% +0.61%] index_copy_ reverse : Elapsed 0.039 ms (3.945 ms / 100) 4.099 -> 4.102 ( +0.07%) [ +0.00% +0.12% +0.07% / +0.07% +0.51% +0.54%] index_add_ spread : Elapsed 0.041 ms (4.099 ms / 100) 3.925 -> 3.929 ( +0.10%) [ +0.00% +0.03% +0.13% / +0.10% +0.61% +0.64%] index_copy_ spread : Elapsed 0.039 ms (3.925 ms / 100) 4.105 -> 4.105 ( +0.00%) [ +0.00% +0.05% +0.02% / +0.00% +0.39% +0.49%] index_add_ strided 3 : Elapsed 0.041 ms (4.105 ms / 100) 3.930 -> 3.923 ( -0.18%) [ +0.03% +0.00% +0.03% / -0.18% +0.33% +0.48%] index_copy_ strided 3 : Elapsed 0.039 ms (3.931 ms / 100) 4.118 -> 4.116 ( -0.05%) [ +0.00% +0.07% +0.02% / -0.05% +0.66% +0.63%] index_add_ strided 7 : Elapsed 0.041 ms (4.118 ms / 100) 3.941 -> 3.942 ( +0.03%) [ +0.03% +0.00% +0.10% / +0.03% +0.69% +0.69%] index_copy_ strided 7 : Elapsed 0.039 ms (3.942 ms / 100) 4.103 -> 4.104 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.02% +0.46% +0.51%] index_add_ perm : Elapsed 0.041 ms (4.105 ms / 100) 3.927 -> 3.932 ( +0.13%) [ +0.18% +0.00% +0.15% / +0.13% +0.56% +0.66%] index_copy_ perm : Elapsed 0.039 ms (3.934 ms / 100) 4.102 -> 4.106 ( +0.10%) [ +0.10% +0.00% +0.12% / +0.10% +0.51% +0.49%] index_add_ perm_sorted : Elapsed 0.041 ms (4.106 ms / 100) 3.929 -> 3.932 ( +0.08%) [ +0.05% +0.00% +0.18% / +0.08% +0.59% +0.43%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.931 ms / 100) 5.478 -> 5.486 ( +0.15%) [ +0.16% +0.00% +0.09% / +0.18% +0.15% +0.15%] index_select const : Elapsed 0.055 ms (5.487 ms / 100) 5.487 -> 5.480 ( -0.13%) [ +0.09% +0.09% +0.00% / -0.13% +0.16% +0.07%] index_select wrap : Elapsed 0.055 ms (5.492 ms / 100) 5.486 -> 5.489 ( +0.05%) [ +0.00% +0.16% +0.09% / +0.05% +0.11% +0.22%] index_select linear : Elapsed 0.055 ms (5.486 ms / 100) 5.490 -> 5.492 ( +0.04%) [ +0.00% +0.11% +0.04% / +0.07% +0.07% +0.04%] index_select reverse : Elapsed 0.055 ms (5.490 ms / 100) 5.489 -> 5.485 ( -0.07%) [ +0.00% +0.13% +0.07% / -0.02% +0.00% -0.07%] index_select skip64 : Elapsed 0.055 ms (5.489 ms / 100) 5.485 -> 5.484 ( -0.02%) [ +0.00% +0.13% +0.13% / +0.00% -0.02% +0.04%] index_select skip256 : Elapsed 0.055 ms (5.485 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.00% +0.15% +0.22% / +0.11% +0.11% +0.00%] index_select spread : Elapsed 0.055 ms (5.485 ms / 100) 5.486 -> 5.486 ( +0.00%) [ +0.05% +0.00% +0.09% / +0.24% +0.00% +0.07%] index_select strided 3 : Elapsed 0.055 ms (5.489 ms / 100) 5.482 -> 5.489 ( +0.13%) [ +0.16% +0.09% +0.00% / +0.22% +0.13% +0.22%] index_select strided 5 : Elapsed 0.055 ms (5.491 ms / 100) 5.486 -> 5.490 ( +0.07%) [ +0.15% +0.00% +0.04% / +0.07% +0.09% +0.15%] index_select strided 7 : Elapsed 0.055 ms (5.494 ms / 100) 5.477 -> 5.478 ( +0.02%) [ +0.00% +0.09% +0.22% / +0.02% +0.26% +0.22%] index_select strided 8 : Elapsed 0.055 ms (5.477 ms / 100) 5.487 -> 5.487 ( +0.00%) [ +0.11% +0.00% +0.11% / +0.00% +0.07% +0.33%] index_select random : Elapsed 0.055 ms (5.493 ms / 100) 5.483 -> 5.487 ( +0.07%) [ +0.00% +0.15% +0.11% / +0.07% +0.11% +0.11%] index_select random_sorted : Elapsed 0.055 ms (5.483 ms / 100) B = [5, 20, 4, 40] (stride (1, 5, 4000, 100)) A = [5, 20, 4, 16] (stride (64, 320, 16, 1)) dim = 3 4.253 -> 4.252 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.59% +0.61%] index_add_ linear : Elapsed 0.043 ms (4.254 ms / 100) 4.105 -> 4.105 ( +0.00%) [ +0.02% +0.00% +0.05% / +0.00% +0.66% +1.02%] index_copy_ linear : Elapsed 0.041 ms (4.106 ms / 100) 4.267 -> 4.267 ( +0.00%) [ +0.00% +0.12% +0.02% / +0.00% +0.68% +0.68%] index_add_ reverse : Elapsed 0.043 ms (4.267 ms / 100) 4.117 -> 4.120 ( +0.07%) [ +0.00% +0.00% +0.17% / +0.07% +0.63% +0.73%] index_copy_ reverse : Elapsed 0.041 ms (4.117 ms / 100) 4.265 -> 4.266 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.52% +0.59%] index_add_ spread : Elapsed 0.043 ms (4.265 ms / 100) 4.118 -> 4.121 ( +0.07%) [ +0.00% +0.07% +0.19% / +0.07% +0.61% +0.73%] index_copy_ spread : Elapsed 0.041 ms (4.118 ms / 100) 4.267 -> 4.268 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.61% +0.66%] index_add_ strided 3 : Elapsed 0.043 ms (4.267 ms / 100) 4.114 -> 4.117 ( +0.07%) [ +0.05% +0.00% +0.17% / +0.07% +0.61% +0.63%] index_copy_ strided 3 : Elapsed 0.041 ms (4.116 ms / 100) 4.266 -> 4.265 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.70% +0.75%] index_add_ strided 7 : Elapsed 0.043 ms (4.266 ms / 100) 4.117 -> 4.116 ( -0.02%) [ +0.02% +0.00% +0.10% / -0.02% +0.63% +0.68%] index_copy_ strided 7 : Elapsed 0.041 ms (4.118 ms / 100) 4.252 -> 4.251 ( -0.02%) [ +0.00% +0.02% +0.14% / -0.02% +0.61% +0.63%] index_add_ perm : Elapsed 0.043 ms (4.252 ms / 100) 4.105 -> 4.107 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.68% +0.83%] index_copy_ perm : Elapsed 0.041 ms (4.107 ms / 100) 4.265 -> 4.264 ( -0.02%) [ +0.00% +0.00% +0.05% / -0.02% +0.75% +0.82%] index_add_ perm_sorted : Elapsed 0.043 ms (4.265 ms / 100) 4.111 -> 4.109 ( -0.05%) [ +0.00% +0.00% +0.02% / -0.05% +0.78% +0.85%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.111 ms / 100) 5.563 -> 5.563 ( +0.00%) [ +0.13% +0.11% +0.00% / +0.14% +0.09% +0.00%] index_select const : Elapsed 0.056 ms (5.570 ms / 100) 5.571 -> 5.567 ( -0.07%) [ +0.05% +0.00% +0.00% / +0.02% -0.07% -0.04%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.567 -> 5.568 ( +0.02%) [ +0.16% +0.09% +0.00% / +0.07% +0.02% +0.11%] index_select linear : Elapsed 0.056 ms (5.576 ms / 100) 5.568 -> 5.566 ( -0.04%) [ +0.00% +0.13% +0.14% / +0.02% -0.04% +0.04%] index_select reverse : Elapsed 0.056 ms (5.568 ms / 100) 5.562 -> 5.563 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.09% +0.23%] index_select skip64 : Elapsed 0.056 ms (5.563 ms / 100) 5.563 -> 5.559 ( -0.07%) [ +0.00% +0.05% +0.07% / -0.07% +0.04% +0.05%] index_select skip256 : Elapsed 0.056 ms (5.563 ms / 100) 5.569 -> 5.565 ( -0.07%) [ +0.00% +0.00% +0.02% / +0.00% -0.07% +0.05%] index_select spread : Elapsed 0.056 ms (5.569 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.00% +0.27% +0.04% / +0.07% +0.09% +0.11%] index_select strided 3 : Elapsed 0.056 ms (5.565 ms / 100) 5.566 -> 5.563 ( -0.05%) [ +0.13% +0.00% +0.13% / -0.05% +0.02% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.573 ms / 100) 5.563 -> 5.569 ( +0.11%) [ +0.07% +0.00% +0.05% / +0.16% +0.11% +0.20%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.00% +0.02% +0.09% / +0.02% +0.18% +0.13%] index_select strided 8 : Elapsed 0.056 ms (5.565 ms / 100) 5.571 -> 5.562 ( -0.16%) [ +0.13% +0.00% +0.02% / +0.07% -0.16% -0.11%] index_select random : Elapsed 0.056 ms (5.578 ms / 100) 5.567 -> 5.564 ( -0.05%) [ +0.13% +0.05% +0.00% / +0.00% -0.05% -0.02%] index_select random_sorted : Elapsed 0.056 ms (5.574 ms / 100) B = [5, 20, 4, 40] (stride (80, 4, 1, 400)) A = [5, 20, 4, 16] (stride (1280, 4, 1, 80)) dim = 3 3.017 -> 3.018 ( +0.03%) [ +0.00% +0.03% +0.07% / +0.03% +0.70% +0.66%] index_add_ linear : Elapsed 0.030 ms (3.017 ms / 100) 2.894 -> 2.896 ( +0.07%) [ +0.03% +0.00% +0.03% / +0.07% +0.69% +0.73%] index_copy_ linear : Elapsed 0.029 ms (2.895 ms / 100) 3.026 -> 3.025 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.40% +0.46%] index_add_ reverse : Elapsed 0.030 ms (3.027 ms / 100) 2.895 -> 2.894 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.41% +0.45%] index_copy_ reverse : Elapsed 0.029 ms (2.895 ms / 100) 3.023 -> 3.024 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.46% +0.53%] index_add_ spread : Elapsed 0.030 ms (3.023 ms / 100) 2.896 -> 2.895 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.48% +0.48%] index_copy_ spread : Elapsed 0.029 ms (2.897 ms / 100) 3.013 -> 3.012 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.56% +0.50%] index_add_ strided 3 : Elapsed 0.030 ms (3.013 ms / 100) 2.886 -> 2.887 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.55% +0.62%] index_copy_ strided 3 : Elapsed 0.029 ms (2.887 ms / 100) 3.027 -> 3.026 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.43% +0.50%] index_add_ strided 7 : Elapsed 0.030 ms (3.027 ms / 100) 2.894 -> 2.893 ( -0.03%) [ +0.00% +0.10% +0.00% / -0.03% +0.48% +0.62%] index_copy_ strided 7 : Elapsed 0.029 ms (2.894 ms / 100) 3.021 -> 3.021 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.36% +0.33%] index_add_ perm : Elapsed 0.030 ms (3.021 ms / 100) 2.897 -> 2.897 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.38% +0.41%] index_copy_ perm : Elapsed 0.029 ms (2.897 ms / 100) 3.022 -> 3.022 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.33% +0.33%] index_add_ perm_sorted : Elapsed 0.030 ms (3.023 ms / 100) 2.897 -> 2.899 ( +0.07%) [ +0.03% +0.07% +0.00% / +0.07% +0.38% +0.45%] index_copy_ perm_sorted : Elapsed 0.029 ms (2.898 ms / 100) 5.294 -> 5.297 ( +0.06%) [ +0.06% +0.15% +0.00% / +0.06% +0.19% +0.15%] index_select const : Elapsed 0.053 ms (5.297 ms / 100) 5.304 -> 5.309 ( +0.09%) [ +0.08% +0.00% +0.08% / +0.09% +0.19% +0.21%] index_select wrap : Elapsed 0.053 ms (5.308 ms / 100) 5.302 -> 5.311 ( +0.17%) [ +0.08% +0.00% +0.21% / +0.25% +0.17% +0.19%] index_select linear : Elapsed 0.053 ms (5.306 ms / 100) 5.307 -> 5.305 ( -0.04%) [ +0.02% +0.00% +0.02% / -0.04% +0.09% -0.02%] index_select reverse : Elapsed 0.053 ms (5.308 ms / 100) 5.298 -> 5.297 ( -0.02%) [ +0.06% +0.00% +0.06% / +0.09% -0.02% +0.13%] index_select skip64 : Elapsed 0.053 ms (5.301 ms / 100) 5.305 -> 5.294 ( -0.21%) [ +0.08% +0.00% +0.00% / +0.09% -0.15% -0.21%] index_select skip256 : Elapsed 0.053 ms (5.309 ms / 100) 5.304 -> 5.301 ( -0.06%) [ +0.15% +0.13% +0.00% / +0.11% +0.25% -0.06%] index_select spread : Elapsed 0.053 ms (5.312 ms / 100) 5.306 -> 5.307 ( +0.02%) [ +0.00% +0.08% +0.04% / +0.02% +0.09% +0.02%] index_select strided 3 : Elapsed 0.053 ms (5.306 ms / 100) 5.298 -> 5.307 ( +0.17%) [ +0.00% +0.08% +0.06% / +0.17% +0.25% +0.19%] index_select strided 5 : Elapsed 0.053 ms (5.298 ms / 100) 5.304 -> 5.303 ( -0.02%) [ +0.00% +0.04% +0.04% / -0.02% +0.11% +0.04%] index_select strided 7 : Elapsed 0.053 ms (5.304 ms / 100) 5.299 -> 5.299 ( +0.00%) [ +0.11% +0.09% +0.00% / +0.00% +0.11% +0.06%] index_select strided 8 : Elapsed 0.053 ms (5.305 ms / 100) 5.305 -> 5.301 ( -0.08%) [ +0.02% +0.09% +0.00% / +0.06% -0.08% +0.02%] index_select random : Elapsed 0.053 ms (5.306 ms / 100) 5.301 -> 5.302 ( +0.02%) [ +0.00% +0.09% +0.15% / +0.09% +0.11% +0.02%] index_select random_sorted : Elapsed 0.053 ms (5.301 ms / 100) B = [5, 20, 4, 40] (stride (80, 1, 20, 400)) dim = 3 fill_cnt = 16 2.073 -> 2.074 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.24% +0.24%] index_fill_ const : Elapsed 0.021 ms (2.074 ms / 100) 2.072 -> 2.073 ( +0.05%) [ +0.10% +0.00% +0.00% / +0.05% +0.34% +0.34%] index_fill_ linear : Elapsed 0.021 ms (2.074 ms / 100) 2.070 -> 2.070 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.43% +0.48%] index_fill_ reverse : Elapsed 0.021 ms (2.071 ms / 100) 2.070 -> 2.069 ( -0.05%) [ +0.00% +0.00% +0.29% / -0.05% +0.43% +0.43%] index_fill_ skip64 : Elapsed 0.021 ms (2.070 ms / 100) 2.070 -> 2.070 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.48% +0.43%] index_fill_ skip256 : Elapsed 0.021 ms (2.070 ms / 100) 2.070 -> 2.069 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.48% +0.43%] index_fill_ spread : Elapsed 0.021 ms (2.070 ms / 100) 2.072 -> 2.072 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.19% +0.24%] index_fill_ strided 3 : Elapsed 0.021 ms (2.072 ms / 100) 2.073 -> 2.072 ( -0.05%) [ +0.10% +0.00% +0.05% / -0.05% +0.19% +0.19%] index_fill_ strided 5 : Elapsed 0.021 ms (2.075 ms / 100) 2.071 -> 2.073 ( +0.10%) [ +0.05% +0.05% +0.00% / +0.10% +0.39% +0.34%] index_fill_ strided 7 : Elapsed 0.021 ms (2.072 ms / 100) 2.072 -> 2.071 ( -0.05%) [ +0.00% +0.00% +0.10% / -0.05% +0.34% +0.43%] index_fill_ strided 8 : Elapsed 0.021 ms (2.072 ms / 100) 2.074 -> 2.073 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.14% +0.10%] index_fill_ strided 16 : Elapsed 0.021 ms (2.074 ms / 100) 2.072 -> 2.072 ( +0.00%) [ +0.10% +0.05% +0.00% / +0.00% +0.24% +0.24%] index_fill_ random : Elapsed 0.021 ms (2.074 ms / 100) 2.074 -> 2.074 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.00% +0.10% +0.10%] index_fill_ random_sorted : Elapsed 0.021 ms (2.074 ms / 100) 2.074 -> 2.074 ( +0.00%) [ +0.00% +0.05% +0.05% / +0.00% +0.10% +0.10%] index_fill_ perm : Elapsed 0.021 ms (2.074 ms / 100) 2.072 -> 2.074 ( +0.10%) [ +0.00% +0.05% +0.10% / +0.10% +0.34% +0.34%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.072 ms / 100) B = [5, 20, 4, 40] (stride (20, 1, 100, 400)) A = [5, 20, 4, 16] (stride (1280, 1, 20, 80)) dim = 3 3.639 -> 3.640 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.74% +0.74%] index_add_ linear : Elapsed 0.036 ms (3.640 ms / 100) 3.508 -> 3.506 ( -0.06%) [ +0.00% +0.00% +0.11% / -0.06% +0.71% +0.74%] index_copy_ linear : Elapsed 0.035 ms (3.508 ms / 100) 3.627 -> 3.627 ( +0.00%) [ +0.03% +0.00% +0.06% / +0.00% +0.77% +0.83%] index_add_ reverse : Elapsed 0.036 ms (3.628 ms / 100) 3.500 -> 3.499 ( -0.03%) [ +0.00% +0.03% +0.20% / -0.03% +0.91% +0.89%] index_copy_ reverse : Elapsed 0.035 ms (3.500 ms / 100) 3.635 -> 3.635 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.77% +0.80%] index_add_ spread : Elapsed 0.036 ms (3.635 ms / 100) 3.507 -> 3.507 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.71% +0.74%] index_copy_ spread : Elapsed 0.035 ms (3.507 ms / 100) 3.635 -> 3.634 ( -0.03%) [ +0.00% +0.06% +0.03% / -0.03% +0.74% +0.72%] index_add_ strided 3 : Elapsed 0.036 ms (3.635 ms / 100) 3.502 -> 3.502 ( +0.00%) [ +0.03% +0.00% +0.17% / +0.00% +0.77% +0.86%] index_copy_ strided 3 : Elapsed 0.035 ms (3.503 ms / 100) 3.628 -> 3.629 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.85% +0.80%] index_add_ strided 7 : Elapsed 0.036 ms (3.628 ms / 100) 3.498 -> 3.501 ( +0.09%) [ +0.00% +0.03% +0.29% / +0.09% +0.97% +0.91%] index_copy_ strided 7 : Elapsed 0.035 ms (3.498 ms / 100) 3.638 -> 3.639 ( +0.03%) [ +0.08% +0.00% +0.11% / +0.03% +0.80% +0.82%] index_add_ perm : Elapsed 0.036 ms (3.641 ms / 100) 3.505 -> 3.505 ( +0.00%) [ +0.09% +0.00% +0.17% / +0.00% +0.86% +0.83%] index_copy_ perm : Elapsed 0.035 ms (3.508 ms / 100) 3.632 -> 3.631 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.83% +0.85%] index_add_ perm_sorted : Elapsed 0.036 ms (3.632 ms / 100) 3.498 -> 3.499 ( +0.03%) [ +0.00% +0.00% +0.14% / +0.03% +0.89% +0.89%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.498 ms / 100) 5.460 -> 5.465 ( +0.09%) [ +0.16% +0.00% +0.13% / +0.09% +0.11% +0.18%] index_select const : Elapsed 0.055 ms (5.469 ms / 100) 5.478 -> 5.475 ( -0.05%) [ +0.07% +0.00% +0.11% / -0.02% -0.04% -0.05%] index_select wrap : Elapsed 0.055 ms (5.482 ms / 100) 5.476 -> 5.476 ( +0.00%) [ +0.04% +0.00% +0.18% / +0.05% +0.02% +0.00%] index_select linear : Elapsed 0.055 ms (5.478 ms / 100) 5.479 -> 5.472 ( -0.13%) [ +0.02% +0.04% +0.00% / -0.11% -0.09% -0.13%] index_select reverse : Elapsed 0.055 ms (5.480 ms / 100) 5.462 -> 5.466 ( +0.07%) [ +0.00% +0.11% +0.11% / +0.07% +0.18% +0.09%] index_select skip64 : Elapsed 0.055 ms (5.462 ms / 100) 5.468 -> 5.462 ( -0.11%) [ +0.00% +0.05% +0.02% / -0.11% +0.02% +0.04%] index_select skip256 : Elapsed 0.055 ms (5.468 ms / 100) 5.475 -> 5.471 ( -0.07%) [ +0.00% +0.05% +0.04% / +0.00% -0.07% +0.22%] index_select spread : Elapsed 0.055 ms (5.475 ms / 100) 5.476 -> 5.475 ( -0.02%) [ +0.00% +0.04% +0.11% / +0.07% -0.02% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.476 ms / 100) 5.471 -> 5.471 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.02% +0.09% +0.00%] index_select strided 5 : Elapsed 0.055 ms (5.480 ms / 100) 5.478 -> 5.475 ( -0.05%) [ +0.00% +0.04% +0.16% / -0.02% -0.02% -0.05%] index_select strided 7 : Elapsed 0.055 ms (5.478 ms / 100) 5.467 -> 5.471 ( +0.07%) [ +0.11% +0.05% +0.00% / +0.07% +0.11% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.473 ms / 100) 5.474 -> 5.471 ( -0.05%) [ +0.07% +0.11% +0.00% / +0.15% -0.05% +0.02%] index_select random : Elapsed 0.055 ms (5.478 ms / 100) 5.471 -> 5.468 ( -0.05%) [ +0.24% +0.13% +0.00% / -0.05% -0.04% +0.00%] index_select random_sorted : Elapsed 0.055 ms (5.484 ms / 100) out_shape = [40, 20, 16, 4] in_shape = [5, 20, 16, 4] idx_dim = 0 B = [40, 20, 16, 4] (stride (1280, 4, 80, 1)) A = [5, 20, 16, 4] (stride (1280, 1, 20, 320)) dim = 0 1.624 -> 1.624 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.37% +0.43%] index_add_ linear : Elapsed 0.016 ms (1.624 ms / 100) 1.573 -> 1.574 ( +0.06%) [ +0.00% +0.19% +0.06% / +0.06% +0.51% +0.57%] index_copy_ linear : Elapsed 0.016 ms (1.573 ms / 100) 1.625 -> 1.625 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.43% +0.55%] index_add_ reverse : Elapsed 0.016 ms (1.625 ms / 100) 1.571 -> 1.574 ( +0.19%) [ +0.00% +0.06% +0.38% / +0.19% +0.25% +0.25%] index_copy_ reverse : Elapsed 0.016 ms (1.571 ms / 100) 1.625 -> 1.624 ( -0.06%) [ +0.06% +0.00% +0.06% / -0.06% +0.49% +0.49%] index_add_ spread : Elapsed 0.016 ms (1.626 ms / 100) 1.572 -> 1.573 ( +0.06%) [ +0.00% +0.19% +0.25% / +0.06% +0.25% +0.19%] index_copy_ spread : Elapsed 0.016 ms (1.572 ms / 100) 1.625 -> 1.625 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.55% +0.62%] index_add_ strided 3 : Elapsed 0.016 ms (1.625 ms / 100) 1.573 -> 1.574 ( +0.06%) [ +0.00% +0.00% +0.19% / +0.06% +0.13% +0.06%] index_copy_ strided 3 : Elapsed 0.016 ms (1.573 ms / 100) 1.624 -> 1.625 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.68% +0.68%] index_add_ strided 7 : Elapsed 0.016 ms (1.625 ms / 100) 1.570 -> 1.571 ( +0.06%) [ +0.00% +0.06% +0.45% / +0.06% +0.32% +0.45%] index_copy_ strided 7 : Elapsed 0.016 ms (1.570 ms / 100) 1.624 -> 1.624 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.49% +0.49%] index_add_ perm : Elapsed 0.016 ms (1.624 ms / 100) 1.573 -> 1.575 ( +0.13%) [ +0.00% +0.06% +0.00% / +0.13% +0.64% +0.51%] index_copy_ perm : Elapsed 0.016 ms (1.573 ms / 100) 1.616 -> 1.616 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.50%] index_add_ perm_sorted : Elapsed 0.016 ms (1.616 ms / 100) 1.569 -> 1.570 ( +0.06%) [ +0.00% +0.25% +0.19% / +0.06% +0.19% +0.45%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.569 ms / 100) 8.537 -> 8.530 ( -0.08%) [ +0.00% +0.08% +0.28% / -0.08% +0.00% -0.05%] index_select const : Elapsed 0.085 ms (8.537 ms / 100) 8.524 -> 8.545 ( +0.25%) [ +0.00% +0.19% +0.22% / +0.25% +0.36% +0.28%] index_select wrap : Elapsed 0.085 ms (8.524 ms / 100) 8.534 -> 8.537 ( +0.04%) [ +0.00% +0.29% +0.09% / +0.04% +0.16% +0.12%] index_select linear : Elapsed 0.085 ms (8.534 ms / 100) 8.538 -> 8.534 ( -0.05%) [ +0.06% +0.00% +0.07% / +0.07% +0.21% -0.05%] index_select reverse : Elapsed 0.085 ms (8.543 ms / 100) 8.526 -> 8.528 ( +0.02%) [ +0.12% +0.00% +0.15% / +0.07% +0.07% +0.02%] index_select skip64 : Elapsed 0.085 ms (8.536 ms / 100) 8.525 -> 8.521 ( -0.05%) [ +0.32% +0.11% +0.00% / -0.05% +0.15% +0.12%] index_select skip256 : Elapsed 0.086 ms (8.552 ms / 100) 8.530 -> 8.535 ( +0.06%) [ +0.00% +0.00% +0.13% / +0.06% +0.13% +0.35%] index_select spread : Elapsed 0.085 ms (8.530 ms / 100) 8.531 -> 8.536 ( +0.06%) [ +0.12% +0.09% +0.00% / +0.13% +0.20% +0.06%] index_select strided 3 : Elapsed 0.085 ms (8.541 ms / 100) 8.520 -> 8.529 ( +0.11%) [ +0.33% +0.01% +0.00% / +0.27% +0.11% +0.36%] index_select random : Elapsed 0.085 ms (8.548 ms / 100) 8.524 -> 8.537 ( +0.15%) [ +0.00% +0.12% +0.05% / +0.15% +0.41% +0.19%] index_select random_sorted : Elapsed 0.085 ms (8.524 ms / 100) B = [40, 20, 16, 4] (stride (64, 2560, 4, 1)) A = [5, 20, 16, 4] (stride (1, 320, 5, 80)) dim = 0 1.423 -> 1.423 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.63% +0.70%] index_add_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.07% +0.22% / +0.00% +0.43% +0.36%] index_copy_ linear : Elapsed 0.014 ms (1.380 ms / 100) 1.424 -> 1.424 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.56% +0.63%] index_add_ reverse : Elapsed 0.014 ms (1.424 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +0.58% +0.58%] index_copy_ reverse : Elapsed 0.014 ms (1.379 ms / 100) 1.425 -> 1.423 ( -0.14%) [ +0.00% +0.07% +0.07% / -0.14% +0.49% +0.56%] index_add_ spread : Elapsed 0.014 ms (1.425 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.07% +0.14% / +0.00% +0.58% +0.43%] index_copy_ spread : Elapsed 0.014 ms (1.380 ms / 100) 1.424 -> 1.423 ( -0.07%) [ +0.00% +0.00% +0.42% / -0.07% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.014 ms (1.424 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.00% +0.22% +0.07% / +0.00% +0.58% +0.51%] index_copy_ strided 3 : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.98% +0.84%] index_add_ strided 7 : Elapsed 0.014 ms (1.424 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +0.51% +0.58%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.00% +0.07% +0.21% / +0.00% +0.70% +0.70%] index_add_ perm : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.378 ( -0.14%) [ +0.00% +0.07% +0.07% / -0.14% +0.58% +0.58%] index_copy_ perm : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.21% +0.00% / +0.00% +0.70% +0.84%] index_add_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.29% +0.07% / +0.00% +0.58% +0.51%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 8.188 -> 8.204 ( +0.20%) [ +0.15% +0.18% +0.00% / +0.34% +0.20% +0.46%] index_select const : Elapsed 0.082 ms (8.200 ms / 100) 8.193 -> 8.201 ( +0.10%) [ +0.00% +0.13% +0.04% / +0.23% +0.10% +0.16%] index_select wrap : Elapsed 0.082 ms (8.193 ms / 100) 8.178 -> 8.198 ( +0.24%) [ +0.23% +0.29% +0.00% / +0.24% +0.26% +0.43%] index_select linear : Elapsed 0.082 ms (8.197 ms / 100) 8.196 -> 8.197 ( +0.01%) [ +0.21% +0.00% +0.24% / +0.01% +0.07% +0.27%] index_select reverse : Elapsed 0.082 ms (8.213 ms / 100) 8.195 -> 8.203 ( +0.10%) [ +0.00% +0.01% +0.04% / +0.10% +0.35% +0.24%] index_select skip64 : Elapsed 0.082 ms (8.195 ms / 100) 8.192 -> 8.209 ( +0.21%) [ +0.00% +0.04% +0.31% / +0.21% +0.24% +0.28%] index_select skip256 : Elapsed 0.082 ms (8.192 ms / 100) 8.191 -> 8.200 ( +0.11%) [ +0.01% +0.00% +0.05% / +0.11% +0.35% +0.34%] index_select spread : Elapsed 0.082 ms (8.192 ms / 100) 8.180 -> 8.190 ( +0.12%) [ +0.00% +0.17% +0.20% / +0.12% +0.37% +0.44%] index_select strided 3 : Elapsed 0.082 ms (8.180 ms / 100) 8.191 -> 8.198 ( +0.09%) [ +0.13% +0.00% +0.17% / +0.09% +0.32% +0.22%] index_select random : Elapsed 0.082 ms (8.202 ms / 100) 8.191 -> 8.198 ( +0.09%) [ +0.00% +0.10% +0.31% / +0.09% +0.16% +0.40%] index_select random_sorted : Elapsed 0.082 ms (8.191 ms / 100) B = [40, 20, 16, 4] (stride (64, 2560, 1, 16)) A = [5, 20, 16, 4] (stride (64, 320, 4, 1)) dim = 0 1.421 -> 1.420 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.28% +0.35%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.51% +0.51%] index_copy_ linear : Elapsed 0.014 ms (1.374 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.35% +0.35%] index_add_ reverse : Elapsed 0.014 ms (1.422 ms / 100) 1.373 -> 1.373 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.51% +0.66%] index_copy_ reverse : Elapsed 0.014 ms (1.373 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.35% +0.35%] index_add_ spread : Elapsed 0.014 ms (1.423 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.00% +0.15% +0.00% / +0.07% +0.51% +0.73%] index_copy_ spread : Elapsed 0.014 ms (1.373 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.35% +0.35%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.373 -> 1.373 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.58% +0.66%] index_copy_ strided 3 : Elapsed 0.014 ms (1.373 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.63% +0.42%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.58% +0.66%] index_copy_ strided 7 : Elapsed 0.014 ms (1.374 ms / 100) 1.419 -> 1.421 ( +0.14%) [ +0.21% +0.07% +0.00% / +0.14% +0.49% +0.49%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.373 -> 1.372 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.66% +0.58%] index_copy_ perm : Elapsed 0.014 ms (1.373 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.35% +0.35%] index_add_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.07% +0.73% +0.58%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.373 ms / 100) 8.180 -> 8.185 ( +0.06%) [ +0.33% +0.20% +0.00% / +0.20% +0.13% +0.06%] index_select const : Elapsed 0.082 ms (8.207 ms / 100) 8.208 -> 8.201 ( -0.09%) [ +0.12% +0.00% +0.27% / -0.09% +0.35% +0.18%] index_select wrap : Elapsed 0.082 ms (8.218 ms / 100) 8.203 -> 8.198 ( -0.06%) [ +0.00% +0.29% +0.07% / +0.07% +0.23% -0.06%] index_select linear : Elapsed 0.082 ms (8.203 ms / 100) 8.198 -> 8.204 ( +0.07%) [ +0.00% +0.16% +0.15% / +0.07% +0.32% +0.15%] index_select reverse : Elapsed 0.082 ms (8.198 ms / 100) 8.185 -> 8.191 ( +0.07%) [ +0.00% +0.05% +0.04% / +0.07% +0.40% +0.35%] index_select skip64 : Elapsed 0.082 ms (8.185 ms / 100) 8.184 -> 8.186 ( +0.02%) [ +0.00% +0.07% +0.16% / +0.02% +0.51% +0.24%] index_select skip256 : Elapsed 0.082 ms (8.184 ms / 100) 8.201 -> 8.211 ( +0.12%) [ +0.00% +0.05% +0.07% / +0.12% +0.18% +0.17%] index_select spread : Elapsed 0.082 ms (8.201 ms / 100) 8.225 -> 8.210 ( -0.18%) [ +0.07% +0.00% +0.01% / -0.17% +0.23% -0.18%] index_select strided 3 : Elapsed 0.082 ms (8.231 ms / 100) 8.218 -> 8.222 ( +0.05%) [ +0.04% +0.00% +0.09% / +0.11% +0.05% +0.17%] index_select random : Elapsed 0.082 ms (8.221 ms / 100) 8.200 -> 8.204 ( +0.05%) [ +0.16% +0.00% +0.15% / +0.05% +0.27% +0.23%] index_select random_sorted : Elapsed 0.082 ms (8.213 ms / 100) B = [40, 20, 16, 4] (stride (1, 2560, 160, 40)) A = [5, 20, 16, 4] (stride (20, 1, 100, 1600)) dim = 0 1.431 -> 1.432 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.56% +0.63%] index_add_ linear : Elapsed 0.014 ms (1.432 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.00% +0.07% +0.15% / +0.07% +0.73% +0.80%] index_copy_ linear : Elapsed 0.014 ms (1.372 ms / 100) 1.410 -> 1.410 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.57% +0.57%] index_add_ reverse : Elapsed 0.014 ms (1.410 ms / 100) 1.362 -> 1.363 ( +0.07%) [ +0.15% +0.22% +0.00% / +0.07% +0.51% +0.51%] index_copy_ reverse : Elapsed 0.014 ms (1.364 ms / 100) 1.417 -> 1.417 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.56% +0.56%] index_add_ spread : Elapsed 0.014 ms (1.419 ms / 100) 1.364 -> 1.364 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.59% +0.51%] index_copy_ spread : Elapsed 0.014 ms (1.364 ms / 100) 1.432 -> 1.432 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.49%] index_add_ strided 3 : Elapsed 0.014 ms (1.432 ms / 100) 1.372 -> 1.374 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.73% +0.66%] index_copy_ strided 3 : Elapsed 0.014 ms (1.374 ms / 100) 1.407 -> 1.407 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.57% +0.57%] index_add_ strided 7 : Elapsed 0.014 ms (1.407 ms / 100) 1.358 -> 1.359 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.014 ms (1.358 ms / 100) 1.406 -> 1.407 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.57% +0.71%] index_add_ perm : Elapsed 0.014 ms (1.406 ms / 100) 1.356 -> 1.357 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.81% +0.88%] index_copy_ perm : Elapsed 0.014 ms (1.357 ms / 100) 1.432 -> 1.432 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.56% +0.70%] index_add_ perm_sorted : Elapsed 0.014 ms (1.434 ms / 100) 1.372 -> 1.372 ( +0.00%) [ +0.00% +0.22% +0.00% / +0.00% +0.66% +0.66%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.372 ms / 100) 7.936 -> 7.935 ( -0.01%) [ +0.11% +0.00% +0.26% / -0.01% +0.11% +0.13%] index_select const : Elapsed 0.079 ms (7.945 ms / 100) 7.956 -> 7.943 ( -0.16%) [ +0.08% +0.00% +0.00% / -0.16% +0.18% +0.18%] index_select wrap : Elapsed 0.080 ms (7.962 ms / 100) 7.943 -> 7.939 ( -0.05%) [ +0.16% +0.19% +0.00% / -0.05% +0.54% +0.19%] index_select linear : Elapsed 0.080 ms (7.956 ms / 100) 7.931 -> 7.955 ( +0.30%) [ +0.00% +0.13% +0.13% / +0.30% +0.66% +0.43%] index_select reverse : Elapsed 0.079 ms (7.931 ms / 100) 7.937 -> 7.946 ( +0.11%) [ +0.01% +0.05% +0.00% / +0.11% +0.26% +0.44%] index_select skip64 : Elapsed 0.079 ms (7.938 ms / 100) 7.934 -> 7.950 ( +0.20%) [ +0.08% +0.06% +0.00% / +0.20% +0.57% +0.30%] index_select skip256 : Elapsed 0.079 ms (7.940 ms / 100) 7.957 -> 7.943 ( -0.18%) [ +0.00% +0.10% +0.05% / -0.18% +0.11% +0.18%] index_select spread : Elapsed 0.080 ms (7.957 ms / 100) 7.934 -> 7.968 ( +0.43%) [ +0.00% +0.10% +0.09% / +0.43% +0.64% +0.43%] index_select strided 3 : Elapsed 0.079 ms (7.934 ms / 100) 7.947 -> 7.938 ( -0.11%) [ +0.00% +0.05% +0.20% / -0.11% +0.31% +0.44%] index_select random : Elapsed 0.079 ms (7.947 ms / 100) 7.950 -> 7.947 ( -0.04%) [ +0.05% +0.00% +0.14% / -0.04% +0.16% +0.23%] index_select random_sorted : Elapsed 0.080 ms (7.954 ms / 100) B = [40, 20, 16, 4] (stride (4, 160, 3200, 1)) dim = 0 fill_cnt = 5 0.893 -> 0.894 ( +0.11%) [ +0.22% +0.22% +0.00% / +0.11% +0.67% +0.78%] index_fill_ const : Elapsed 0.009 ms (0.895 ms / 100) 0.894 -> 0.895 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.67% +0.67%] index_fill_ linear : Elapsed 0.009 ms (0.895 ms / 100) 0.894 -> 0.894 ( +0.00%) [ +0.11% +0.22% +0.00% / +0.00% +0.67% +0.67%] index_fill_ reverse : Elapsed 0.009 ms (0.895 ms / 100) 0.894 -> 0.894 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.67% +0.67%] index_fill_ skip64 : Elapsed 0.009 ms (0.894 ms / 100) 0.894 -> 0.894 ( +0.00%) [ +0.00% +0.11% +0.56% / +0.00% +0.56% +0.56%] index_fill_ skip256 : Elapsed 0.009 ms (0.894 ms / 100) 0.894 -> 0.894 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.00% +0.67% +0.78%] index_fill_ spread : Elapsed 0.009 ms (0.894 ms / 100) 0.893 -> 0.893 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.90% +1.01%] index_fill_ strided 3 : Elapsed 0.009 ms (0.894 ms / 100) 0.893 -> 0.893 ( +0.00%) [ +0.22% +0.00% +0.00% / +0.00% +0.78% +0.67%] index_fill_ strided 5 : Elapsed 0.009 ms (0.895 ms / 100) 0.892 -> 0.895 ( +0.34%) [ +0.11% +0.22% +0.00% / +0.34% +0.90% +0.78%] index_fill_ strided 7 : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.893 ( +0.00%) [ +0.00% +0.34% +0.00% / +0.00% +0.78% +0.78%] index_fill_ strided 8 : Elapsed 0.009 ms (0.893 ms / 100) 0.892 -> 0.893 ( +0.11%) [ +0.11% +0.22% +0.00% / +0.11% +0.90% +0.90%] index_fill_ strided 16 : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.893 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.67% +0.78%] index_fill_ random : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.894 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.78% +0.67%] index_fill_ random_sorted : Elapsed 0.009 ms (0.893 ms / 100) 0.892 -> 0.893 ( +0.11%) [ +0.22% +0.11% +0.00% / +0.11% +0.90% +0.90%] index_fill_ perm : Elapsed 0.009 ms (0.894 ms / 100) 0.892 -> 0.894 ( +0.22%) [ +0.00% +0.11% +0.11% / +0.22% +0.90% +0.78%] index_fill_ perm_sorted : Elapsed 0.009 ms (0.892 ms / 100) B = [40, 20, 16, 4] (stride (1, 40, 3200, 800)) A = [5, 20, 16, 4] (stride (4, 20, 400, 1)) dim = 0 1.435 -> 1.436 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.42% +0.49%] index_add_ linear : Elapsed 0.014 ms (1.437 ms / 100) 1.387 -> 1.387 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.50% +0.43%] index_copy_ linear : Elapsed 0.014 ms (1.387 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.49% +0.35%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.22% +0.22% +0.15%] index_copy_ reverse : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.424 ( +0.14%) [ +0.00% +0.00% +0.07% / +0.14% +0.42% +0.49%] index_add_ spread : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.00% +0.00% +0.15% / +0.15% +0.15% +0.22%] index_copy_ spread : Elapsed 0.014 ms (1.379 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.28% +0.07% +0.00% / +0.07% +0.56% +0.63%] index_add_ strided 3 : Elapsed 0.014 ms (1.425 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.29% +0.07% +0.00% / +0.00% +0.29% +0.36%] index_copy_ strided 3 : Elapsed 0.014 ms (1.383 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.49% +0.49%] index_add_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.00% +0.22% +0.00% / +0.07% +0.36% +0.44%] index_copy_ strided 7 : Elapsed 0.014 ms (1.377 ms / 100) 1.434 -> 1.434 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.63% +0.63%] index_add_ perm : Elapsed 0.014 ms (1.436 ms / 100) 1.386 -> 1.387 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.58% +0.58%] index_copy_ perm : Elapsed 0.014 ms (1.388 ms / 100) 1.437 -> 1.437 ( +0.00%) [ +0.00% +0.00% +0.28% / +0.00% +0.49% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.437 ms / 100) 1.387 -> 1.388 ( +0.07%) [ +0.00% +0.00% +0.72% / +0.07% +0.58% +0.79%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.387 ms / 100) 8.259 -> 8.260 ( +0.01%) [ +0.18% +0.16% +0.00% / +0.10% +0.01% +0.17%] index_select const : Elapsed 0.083 ms (8.274 ms / 100) 8.273 -> 8.272 ( -0.01%) [ +0.00% +0.25% +0.05% / -0.01% +0.18% +0.16%] index_select wrap : Elapsed 0.083 ms (8.273 ms / 100) 8.273 -> 8.276 ( +0.04%) [ +0.05% +0.04% +0.00% / +0.04% +0.05% +0.10%] index_select linear : Elapsed 0.083 ms (8.277 ms / 100) 8.266 -> 8.268 ( +0.02%) [ +0.00% +0.38% +0.15% / +0.02% +0.33% +0.16%] index_select reverse : Elapsed 0.083 ms (8.266 ms / 100) 8.264 -> 8.258 ( -0.07%) [ +0.02% +0.07% +0.00% / -0.07% +0.21% +0.12%] index_select skip64 : Elapsed 0.083 ms (8.266 ms / 100) 8.268 -> 8.269 ( +0.01%) [ +0.36% +0.00% +0.12% / +0.01% +0.29% +0.04%] index_select skip256 : Elapsed 0.083 ms (8.298 ms / 100) 8.275 -> 8.284 ( +0.11%) [ +0.00% +0.12% +0.11% / +0.11% +0.51% +0.47%] index_select spread : Elapsed 0.083 ms (8.275 ms / 100) 8.276 -> 8.268 ( -0.10%) [ +0.01% +0.01% +0.00% / -0.10% +0.28% +0.16%] index_select strided 3 : Elapsed 0.083 ms (8.277 ms / 100) 8.267 -> 8.276 ( +0.11%) [ +0.02% +0.06% +0.00% / +0.11% +0.31% +0.46%] index_select random : Elapsed 0.083 ms (8.269 ms / 100) 8.284 -> 8.284 ( +0.00%) [ +0.00% +0.07% +0.24% / +0.00% +0.04% +0.01%] index_select random_sorted : Elapsed 0.083 ms (8.284 ms / 100) out_shape = [5, 40, 16, 4] in_shape = [5, 20, 16, 4] idx_dim = 1 B = [5, 40, 16, 4] (stride (1, 320, 20, 5)) A = [5, 20, 16, 4] (stride (4, 320, 20, 1)) dim = 1 2.387 -> 2.403 ( +0.67%) [ +0.08% +0.08% +0.00% / +0.67% +0.80% +0.84%] index_add_ linear : Elapsed 0.024 ms (2.389 ms / 100) 2.389 -> 2.400 ( +0.46%) [ +0.17% +0.00% +0.17% / +0.46% +0.67% +0.88%] index_copy_ linear : Elapsed 0.024 ms (2.393 ms / 100) 2.388 -> 2.402 ( +0.59%) [ +0.00% +0.00% +0.13% / +0.63% +0.84% +0.59%] index_add_ reverse : Elapsed 0.024 ms (2.388 ms / 100) 2.392 -> 2.403 ( +0.46%) [ +0.00% +0.08% +0.00% / +0.46% +0.54% +0.46%] index_copy_ reverse : Elapsed 0.024 ms (2.392 ms / 100) 2.392 -> 2.400 ( +0.33%) [ +0.21% +0.00% +0.08% / +0.59% +0.67% +0.33%] index_add_ spread : Elapsed 0.024 ms (2.397 ms / 100) 2.392 -> 2.403 ( +0.46%) [ +0.04% +0.08% +0.00% / +0.46% +0.54% +0.46%] index_copy_ spread : Elapsed 0.024 ms (2.393 ms / 100) 2.393 -> 2.401 ( +0.33%) [ +0.08% +0.04% +0.00% / +0.54% +0.33% +0.33%] index_add_ strided 3 : Elapsed 0.024 ms (2.395 ms / 100) 2.392 -> 2.403 ( +0.46%) [ +0.04% +0.00% +0.04% / +0.75% +0.59% +0.46%] index_copy_ strided 3 : Elapsed 0.024 ms (2.393 ms / 100) 2.390 -> 2.403 ( +0.54%) [ +0.25% +0.21% +0.00% / +0.67% +0.54% +0.71%] index_add_ strided 7 : Elapsed 0.024 ms (2.396 ms / 100) 2.393 -> 2.402 ( +0.38%) [ +0.13% +0.04% +0.00% / +0.38% +0.59% +0.54%] index_copy_ strided 7 : Elapsed 0.024 ms (2.396 ms / 100) 2.392 -> 2.399 ( +0.29%) [ +0.08% +0.00% +0.04% / +0.29% +0.75% +0.54%] index_add_ perm : Elapsed 0.024 ms (2.394 ms / 100) 2.389 -> 2.405 ( +0.67%) [ +0.08% +0.13% +0.00% / +0.67% +0.92% +0.80%] index_copy_ perm : Elapsed 0.024 ms (2.391 ms / 100) 2.391 -> 2.406 ( +0.63%) [ +0.17% +0.00% +0.13% / +0.63% +0.71% +0.79%] index_add_ perm_sorted : Elapsed 0.024 ms (2.395 ms / 100) 2.391 -> 2.403 ( +0.50%) [ +0.13% +0.04% +0.00% / +0.50% +0.79% +0.75%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.394 ms / 100) 4.411 -> 4.418 ( +0.16%) [ +0.16% +0.05% +0.00% / +0.16% +0.18% +0.16%] index_select const : Elapsed 0.044 ms (4.418 ms / 100) 4.419 -> 4.417 ( -0.05%) [ +0.14% +0.00% +0.05% / -0.05% +0.14% -0.05%] index_select wrap : Elapsed 0.044 ms (4.425 ms / 100) 4.416 -> 4.421 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +0.25% +0.16%] index_select linear : Elapsed 0.044 ms (4.421 ms / 100) 4.421 -> 4.425 ( +0.09%) [ +0.07% +0.00% +0.07% / +0.14% +0.18% +0.09%] index_select reverse : Elapsed 0.044 ms (4.424 ms / 100) 4.408 -> 4.409 ( +0.02%) [ +0.07% +0.09% +0.00% / +0.25% +0.02% +0.09%] index_select skip64 : Elapsed 0.044 ms (4.411 ms / 100) 4.408 -> 4.411 ( +0.07%) [ +0.00% +0.11% +0.07% / +0.32% +0.16% +0.07%] index_select skip256 : Elapsed 0.044 ms (4.408 ms / 100) 4.418 -> 4.417 ( -0.02%) [ +0.00% +0.18% +0.18% / -0.02% +0.16% +0.18%] index_select spread : Elapsed 0.044 ms (4.418 ms / 100) 4.417 -> 4.424 ( +0.16%) [ +0.07% +0.09% +0.00% / +0.16% +0.29% +0.18%] index_select strided 3 : Elapsed 0.044 ms (4.420 ms / 100) 4.415 -> 4.412 ( -0.07%) [ +0.11% +0.00% +0.00% / +0.00% -0.07% -0.07%] index_select strided 5 : Elapsed 0.044 ms (4.420 ms / 100) 4.418 -> 4.412 ( -0.14%) [ +0.09% +0.07% +0.00% / -0.14% +0.16% +0.25%] index_select strided 7 : Elapsed 0.044 ms (4.422 ms / 100) 4.409 -> 4.409 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.25% +0.18%] index_select strided 8 : Elapsed 0.044 ms (4.409 ms / 100) 4.408 -> 4.411 ( +0.07%) [ +0.00% +0.18% +0.00% / +0.07% +0.39% +0.29%] index_select strided 16 : Elapsed 0.044 ms (4.408 ms / 100) 4.417 -> 4.416 ( -0.02%) [ +0.00% +0.05% +0.00% / -0.02% +0.05% +0.16%] index_select random : Elapsed 0.044 ms (4.417 ms / 100) 4.419 -> 4.422 ( +0.07%) [ +0.11% +0.02% +0.00% / +0.16% +0.07% +0.09%] index_select random_sorted : Elapsed 0.044 ms (4.424 ms / 100) B = [5, 40, 16, 4] (stride (160, 1, 800, 40)) A = [5, 20, 16, 4] (stride (1, 320, 20, 5)) dim = 1 2.399 -> 2.409 ( +0.42%) [ +0.17% +0.00% +0.17% / +0.42% +0.63% +0.67%] index_add_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.407 -> 2.419 ( +0.50%) [ +0.04% +0.00% +0.12% / +0.50% +0.54% +0.62%] index_copy_ linear : Elapsed 0.024 ms (2.408 ms / 100) 2.395 -> 2.405 ( +0.42%) [ +0.04% +0.00% +0.13% / +0.42% +1.09% +0.79%] index_add_ reverse : Elapsed 0.024 ms (2.396 ms / 100) 2.404 -> 2.416 ( +0.50%) [ +0.00% +0.29% +0.12% / +0.50% +0.92% +0.87%] index_copy_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.409 -> 2.420 ( +0.46%) [ +0.00% +0.08% +0.04% / +0.46% +0.79% +0.75%] index_add_ spread : Elapsed 0.024 ms (2.409 ms / 100) 2.422 -> 2.434 ( +0.50%) [ +0.08% +0.00% +0.91% / +0.50% +0.78% +0.87%] index_copy_ spread : Elapsed 0.024 ms (2.424 ms / 100) 2.418 -> 2.424 ( +0.25%) [ +0.00% +0.04% +0.29% / +0.33% +0.25% +0.33%] index_add_ strided 3 : Elapsed 0.024 ms (2.418 ms / 100) 2.428 -> 2.441 ( +0.54%) [ +0.08% +0.00% +0.25% / +0.58% +0.58% +0.54%] index_copy_ strided 3 : Elapsed 0.024 ms (2.430 ms / 100) 2.414 -> 2.426 ( +0.50%) [ +0.00% +0.17% +0.04% / +0.70% +0.54% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.426 -> 2.438 ( +0.49%) [ +0.00% +0.12% +0.16% / +0.49% +0.70% +0.54%] index_copy_ strided 7 : Elapsed 0.024 ms (2.426 ms / 100) 2.415 -> 2.422 ( +0.29%) [ +0.08% +0.00% +0.12% / +0.62% +0.29% +0.33%] index_add_ perm : Elapsed 0.024 ms (2.417 ms / 100) 2.429 -> 2.435 ( +0.25%) [ +0.16% +0.00% +0.12% / +0.58% +0.29% +0.25%] index_copy_ perm : Elapsed 0.024 ms (2.433 ms / 100) 2.417 -> 2.420 ( +0.12%) [ +0.00% +0.12% +0.04% / +0.46% +0.17% +0.12%] index_add_ perm_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.431 -> 2.438 ( +0.29%) [ +0.00% +0.04% +0.16% / +0.58% +0.29% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.431 ms / 100) 4.437 -> 4.436 ( -0.02%) [ +0.09% +0.00% +0.18% / -0.02% +0.00% +0.18%] index_select const : Elapsed 0.044 ms (4.441 ms / 100) 4.436 -> 4.437 ( +0.02%) [ +0.11% +0.00% +0.11% / +0.02% +0.14% +0.25%] index_select wrap : Elapsed 0.044 ms (4.441 ms / 100) 4.437 -> 4.438 ( +0.02%) [ +0.25% +0.00% +0.11% / +0.25% +0.23% +0.02%] index_select linear : Elapsed 0.044 ms (4.448 ms / 100) 4.442 -> 4.445 ( +0.07%) [ +0.05% +0.00% +0.16% / +0.11% +0.09% +0.07%] index_select reverse : Elapsed 0.044 ms (4.444 ms / 100) 4.433 -> 4.433 ( +0.00%) [ +0.11% +0.00% +0.18% / +0.16% +0.00% +0.00%] index_select skip64 : Elapsed 0.044 ms (4.438 ms / 100) 4.440 -> 4.435 ( -0.11%) [ +0.02% +0.00% +0.00% / +0.00% -0.07% -0.11%] index_select skip256 : Elapsed 0.044 ms (4.441 ms / 100) 4.437 -> 4.442 ( +0.11%) [ +0.14% +0.00% +0.14% / +0.11% +0.34% +0.14%] index_select spread : Elapsed 0.044 ms (4.443 ms / 100) 4.437 -> 4.441 ( +0.09%) [ +0.02% +0.00% +0.09% / +0.09% +0.18% +0.11%] index_select strided 3 : Elapsed 0.044 ms (4.438 ms / 100) 4.429 -> 4.435 ( +0.14%) [ +0.00% +0.07% +0.16% / +0.14% +0.32% +0.27%] index_select strided 5 : Elapsed 0.044 ms (4.429 ms / 100) 4.437 -> 4.437 ( +0.00%) [ +0.00% +0.11% +0.18% / +0.00% +0.14% +0.07%] index_select strided 7 : Elapsed 0.044 ms (4.437 ms / 100) 4.437 -> 4.435 ( -0.05%) [ +0.00% +0.00% +0.09% / -0.05% +0.00% +0.02%] index_select strided 8 : Elapsed 0.044 ms (4.437 ms / 100) 4.438 -> 4.432 ( -0.14%) [ +0.09% +0.07% +0.00% / +0.11% -0.14% -0.07%] index_select strided 16 : Elapsed 0.044 ms (4.442 ms / 100) 4.440 -> 4.437 ( -0.07%) [ +0.00% +0.11% +0.11% / +0.02% -0.07% +0.09%] index_select random : Elapsed 0.044 ms (4.440 ms / 100) 4.442 -> 4.436 ( -0.14%) [ +0.05% +0.00% +0.05% / +0.07% -0.02% -0.14%] index_select random_sorted : Elapsed 0.044 ms (4.444 ms / 100) B = [5, 40, 16, 4] (stride (4, 20, 800, 1)) A = [5, 20, 16, 4] (stride (16, 320, 1, 80)) dim = 1 2.462 -> 2.475 ( +0.53%) [ +0.04% +0.00% +0.08% / +0.53% +0.57% +0.69%] index_add_ linear : Elapsed 0.025 ms (2.463 ms / 100) 2.446 -> 2.459 ( +0.53%) [ +0.16% +0.00% +0.08% / +0.53% +0.86% +0.74%] index_copy_ linear : Elapsed 0.024 ms (2.450 ms / 100) 2.467 -> 2.475 ( +0.32%) [ +0.00% +0.04% +0.16% / +0.32% +0.41% +0.41%] index_add_ reverse : Elapsed 0.025 ms (2.467 ms / 100) 2.448 -> 2.461 ( +0.53%) [ +0.04% +0.00% +0.04% / +0.53% +0.61% +0.65%] index_copy_ reverse : Elapsed 0.024 ms (2.449 ms / 100) 2.473 -> 2.487 ( +0.57%) [ +0.12% +0.00% +0.04% / +0.73% +0.57% +0.57%] index_add_ spread : Elapsed 0.025 ms (2.476 ms / 100) 2.460 -> 2.475 ( +0.61%) [ +0.12% +0.20% +0.00% / +0.65% +0.61% +0.73%] index_copy_ spread : Elapsed 0.025 ms (2.463 ms / 100) 2.470 -> 2.477 ( +0.28%) [ +0.12% +0.12% +0.00% / +0.40% +0.28% +0.49%] index_add_ strided 3 : Elapsed 0.025 ms (2.473 ms / 100) 2.456 -> 2.471 ( +0.61%) [ +0.20% +0.00% +0.16% / +0.61% +0.69% +0.69%] index_copy_ strided 3 : Elapsed 0.025 ms (2.461 ms / 100) 2.468 -> 2.482 ( +0.57%) [ +0.08% +0.32% +0.00% / +0.73% +0.57% +0.65%] index_add_ strided 7 : Elapsed 0.025 ms (2.470 ms / 100) 2.459 -> 2.470 ( +0.45%) [ +0.12% +0.12% +0.00% / +0.61% +0.45% +0.69%] index_copy_ strided 7 : Elapsed 0.025 ms (2.462 ms / 100) 2.470 -> 2.482 ( +0.49%) [ +0.00% +0.16% +0.04% / +0.49% +0.53% +0.65%] index_add_ perm : Elapsed 0.025 ms (2.470 ms / 100) 2.454 -> 2.468 ( +0.57%) [ +0.04% +0.33% +0.00% / +0.57% +0.98% +0.86%] index_copy_ perm : Elapsed 0.025 ms (2.455 ms / 100) 2.468 -> 2.479 ( +0.45%) [ +0.16% +0.28% +0.00% / +0.45% +0.81% +0.53%] index_add_ perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) 2.454 -> 2.465 ( +0.45%) [ +0.20% +0.04% +0.00% / +0.45% +0.86% +0.90%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) 4.488 -> 4.493 ( +0.11%) [ +0.04% +0.00% +0.13% / +0.16% +0.11% +0.20%] index_select const : Elapsed 0.045 ms (4.490 ms / 100) 4.499 -> 4.498 ( -0.02%) [ +0.20% +0.04% +0.00% / -0.02% +0.11% +0.20%] index_select wrap : Elapsed 0.045 ms (4.508 ms / 100) 4.502 -> 4.504 ( +0.04%) [ +0.11% +0.04% +0.00% / +0.04% +0.07% +0.16%] index_select linear : Elapsed 0.045 ms (4.507 ms / 100) 4.493 -> 4.500 ( +0.16%) [ +0.00% +0.16% +0.22% / +0.16% +0.33% +0.29%] index_select reverse : Elapsed 0.045 ms (4.493 ms / 100) 4.492 -> 4.491 ( -0.02%) [ +0.00% +0.16% +0.07% / +0.11% +0.02% -0.02%] index_select skip64 : Elapsed 0.045 ms (4.492 ms / 100) 4.490 -> 4.488 ( -0.04%) [ +0.11% +0.09% +0.00% / +0.16% -0.04% +0.16%] index_select skip256 : Elapsed 0.045 ms (4.495 ms / 100) 4.501 -> 4.502 ( +0.02%) [ +0.02% +0.07% +0.00% / +0.02% +0.04% +0.07%] index_select spread : Elapsed 0.045 ms (4.502 ms / 100) 4.499 -> 4.501 ( +0.04%) [ +0.02% +0.07% +0.00% / +0.16% +0.38% +0.04%] index_select strided 3 : Elapsed 0.045 ms (4.500 ms / 100) 4.492 -> 4.493 ( +0.02%) [ +0.07% +0.13% +0.00% / +0.02% +0.11% +0.13%] index_select strided 5 : Elapsed 0.045 ms (4.495 ms / 100) 4.501 -> 4.496 ( -0.11%) [ +0.18% +0.00% +0.09% / -0.11% +0.16% +0.27%] index_select strided 7 : Elapsed 0.045 ms (4.509 ms / 100) 4.491 -> 4.490 ( -0.02%) [ +0.11% +0.00% +0.04% / -0.02% +0.22% +0.09%] index_select strided 8 : Elapsed 0.045 ms (4.496 ms / 100) 4.488 -> 4.492 ( +0.09%) [ +0.13% +0.18% +0.00% / +0.09% +0.25% +0.20%] index_select strided 16 : Elapsed 0.045 ms (4.494 ms / 100) 4.497 -> 4.497 ( +0.00%) [ +0.00% +0.16% +0.07% / +0.00% +0.07% +0.09%] index_select random : Elapsed 0.045 ms (4.497 ms / 100) 4.497 -> 4.495 ( -0.04%) [ +0.00% +0.18% +0.11% / -0.04% +0.20% +0.29%] index_select random_sorted : Elapsed 0.045 ms (4.497 ms / 100) B = [5, 40, 16, 4] (stride (4, 20, 800, 1)) A = [5, 20, 16, 4] (stride (20, 1, 100, 1600)) dim = 1 2.452 -> 2.466 ( +0.57%) [ +0.04% +0.16% +0.00% / +0.57% +0.69% +0.57%] index_add_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.444 -> 2.455 ( +0.45%) [ +0.16% +0.00% +0.25% / +0.45% +0.82% +1.10%] index_copy_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.445 -> 2.456 ( +0.45%) [ +0.04% +0.00% +0.12% / +0.45% +1.15% +1.27%] index_add_ reverse : Elapsed 0.024 ms (2.446 ms / 100) 2.439 -> 2.449 ( +0.41%) [ +0.00% +0.08% +0.12% / +0.41% +1.07% +1.19%] index_copy_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.454 -> 2.468 ( +0.57%) [ +0.16% +0.12% +0.00% / +0.57% +0.86% +0.90%] index_add_ spread : Elapsed 0.025 ms (2.458 ms / 100) 2.450 -> 2.468 ( +0.73%) [ +0.04% +0.00% +0.33% / +0.73% +1.31% +1.27%] index_copy_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.460 -> 2.469 ( +0.37%) [ +0.08% +0.00% +0.08% / +0.45% +0.37% +0.61%] index_add_ strided 3 : Elapsed 0.025 ms (2.462 ms / 100) 2.454 -> 2.468 ( +0.57%) [ +0.16% +0.00% +0.24% / +0.57% +0.77% +0.69%] index_copy_ strided 3 : Elapsed 0.025 ms (2.458 ms / 100) 2.460 -> 2.473 ( +0.53%) [ +0.00% +0.08% +0.04% / +0.61% +0.53% +0.73%] index_add_ strided 7 : Elapsed 0.025 ms (2.460 ms / 100) 2.455 -> 2.473 ( +0.73%) [ +0.12% +0.33% +0.00% / +0.73% +0.81% +1.02%] index_copy_ strided 7 : Elapsed 0.025 ms (2.458 ms / 100) 2.456 -> 2.461 ( +0.20%) [ +0.00% +0.24% +0.33% / +0.69% +0.41% +0.20%] index_add_ perm : Elapsed 0.025 ms (2.456 ms / 100) 2.456 -> 2.463 ( +0.29%) [ +0.04% +0.00% +0.08% / +0.57% +0.29% +0.37%] index_copy_ perm : Elapsed 0.025 ms (2.457 ms / 100) 2.461 -> 2.466 ( +0.20%) [ +0.00% +0.24% +0.08% / +0.57% +0.33% +0.20%] index_add_ perm_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.455 -> 2.463 ( +0.33%) [ +0.00% +0.41% +0.08% / +0.61% +0.33% +0.45%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.455 ms / 100) 4.501 -> 4.501 ( +0.00%) [ +0.00% +0.16% +0.07% / +0.00% +0.09% +0.18%] index_select const : Elapsed 0.045 ms (4.501 ms / 100) 4.504 -> 4.506 ( +0.04%) [ +0.13% +0.24% +0.00% / +0.18% +0.11% +0.04%] index_select wrap : Elapsed 0.045 ms (4.510 ms / 100) 4.509 -> 4.504 ( -0.11%) [ +0.11% +0.04% +0.00% / +0.18% -0.11% +0.20%] index_select linear : Elapsed 0.045 ms (4.514 ms / 100) 4.512 -> 4.506 ( -0.13%) [ +0.07% +0.00% +0.07% / -0.02% -0.13% -0.02%] index_select reverse : Elapsed 0.045 ms (4.515 ms / 100) 4.505 -> 4.501 ( -0.09%) [ +0.00% +0.04% +0.02% / -0.04% -0.07% -0.09%] index_select skip64 : Elapsed 0.045 ms (4.505 ms / 100) 4.496 -> 4.501 ( +0.11%) [ +0.27% +0.00% +0.42% / +0.11% +0.16% +0.16%] index_select skip256 : Elapsed 0.045 ms (4.508 ms / 100) 4.507 -> 4.510 ( +0.07%) [ +0.00% +0.00% +0.04% / +0.07% +0.18% +0.09%] index_select spread : Elapsed 0.045 ms (4.507 ms / 100) 4.507 -> 4.509 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.07% +0.04% +0.07%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.504 -> 4.508 ( +0.09%) [ +0.18% +0.00% +0.09% / +0.09% +0.20% +0.18%] index_select strided 5 : Elapsed 0.045 ms (4.512 ms / 100) 4.506 -> 4.507 ( +0.02%) [ +0.11% +0.00% +0.04% / +0.02% +0.07% +0.13%] index_select strided 7 : Elapsed 0.045 ms (4.511 ms / 100) 4.507 -> 4.505 ( -0.04%) [ +0.00% +0.00% +0.07% / +0.07% +0.04% -0.04%] index_select strided 8 : Elapsed 0.045 ms (4.507 ms / 100) 4.503 -> 4.504 ( +0.02%) [ +0.18% +0.16% +0.00% / +0.13% +0.02% +0.04%] index_select strided 16 : Elapsed 0.045 ms (4.511 ms / 100) 4.510 -> 4.505 ( -0.11%) [ +0.07% +0.00% +0.00% / +0.00% +0.00% -0.11%] index_select random : Elapsed 0.045 ms (4.513 ms / 100) 4.510 -> 4.509 ( -0.02%) [ +0.00% +0.04% +0.13% / +0.00% -0.02% +0.02%] index_select random_sorted : Elapsed 0.045 ms (4.510 ms / 100) out_shape = [5, 20, 40, 4] in_shape = [5, 20, 16, 4] idx_dim = 2 B = [5, 20, 40, 4] (stride (3200, 1, 20, 800)) A = [5, 20, 16, 4] (stride (320, 16, 1, 1600)) dim = 2 3.885 -> 3.888 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.82% +0.72%] index_add_ linear : Elapsed 0.039 ms (3.888 ms / 100) 3.738 -> 3.748 ( +0.27%) [ +0.13% +0.00% +0.19% / +0.27% +0.70% +0.78%] index_copy_ linear : Elapsed 0.037 ms (3.743 ms / 100) 3.918 -> 3.919 ( +0.03%) [ +0.05% +0.00% +0.05% / +0.03% +0.59% +0.59%] index_add_ reverse : Elapsed 0.039 ms (3.920 ms / 100) 3.776 -> 3.777 ( +0.03%) [ +0.00% +0.05% +0.11% / +0.03% +0.64% +0.61%] index_copy_ reverse : Elapsed 0.038 ms (3.776 ms / 100) 3.897 -> 3.894 ( -0.08%) [ +0.03% +0.05% +0.00% / -0.08% +0.46% +0.56%] index_add_ spread : Elapsed 0.039 ms (3.898 ms / 100) 3.744 -> 3.743 ( -0.03%) [ +0.00% +0.00% +0.08% / -0.03% +0.51% +0.56%] index_copy_ spread : Elapsed 0.037 ms (3.744 ms / 100) 3.897 -> 3.894 ( -0.08%) [ +0.21% +0.00% +0.13% / -0.08% +0.51% +0.51%] index_add_ strided 3 : Elapsed 0.039 ms (3.905 ms / 100) 3.747 -> 3.747 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.48% +0.43%] index_copy_ strided 3 : Elapsed 0.037 ms (3.748 ms / 100) 3.920 -> 3.920 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.51% +0.48%] index_add_ strided 7 : Elapsed 0.039 ms (3.922 ms / 100) 3.780 -> 3.780 ( +0.00%) [ +0.03% +0.00% +0.13% / +0.00% +0.42% +0.42%] index_copy_ strided 7 : Elapsed 0.038 ms (3.781 ms / 100) 3.888 -> 3.884 ( -0.10%) [ +0.03% +0.00% +0.08% / -0.10% +0.57% +0.82%] index_add_ perm : Elapsed 0.039 ms (3.889 ms / 100) 3.744 -> 3.740 ( -0.11%) [ +0.03% +0.00% +0.11% / -0.11% +0.43% +0.61%] index_copy_ perm : Elapsed 0.037 ms (3.745 ms / 100) 3.889 -> 3.893 ( +0.10%) [ +0.03% +0.00% +0.08% / +0.10% +0.57% +0.75%] index_add_ perm_sorted : Elapsed 0.039 ms (3.890 ms / 100) 3.745 -> 3.747 ( +0.05%) [ +0.03% +0.00% +0.11% / +0.05% +0.35% +0.48%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.746 ms / 100) 5.480 -> 5.478 ( -0.04%) [ +0.00% +0.04% +0.04% / -0.04% +0.22% +0.05%] index_select const : Elapsed 0.055 ms (5.480 ms / 100) 5.485 -> 5.486 ( +0.02%) [ +0.02% +0.00% +0.05% / +0.02% +0.05% +0.13%] index_select wrap : Elapsed 0.055 ms (5.486 ms / 100) 5.481 -> 5.478 ( -0.05%) [ +0.15% +0.00% +0.18% / -0.05% +0.00% +0.00%] index_select linear : Elapsed 0.055 ms (5.489 ms / 100) 5.484 -> 5.486 ( +0.04%) [ +0.00% +0.04% +0.09% / +0.05% +0.04% +0.07%] index_select reverse : Elapsed 0.055 ms (5.484 ms / 100) 5.482 -> 5.483 ( +0.02%) [ +0.22% +0.00% +0.11% / +0.07% +0.07% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.494 ms / 100) 5.482 -> 5.479 ( -0.05%) [ +0.16% +0.00% +0.20% / +0.13% -0.05% -0.02%] index_select skip256 : Elapsed 0.055 ms (5.491 ms / 100) 5.481 -> 5.483 ( +0.04%) [ +0.00% +0.11% +0.05% / +0.11% +0.07% +0.04%] index_select spread : Elapsed 0.055 ms (5.481 ms / 100) 5.481 -> 5.480 ( -0.02%) [ +0.00% +0.20% +0.09% / -0.02% +0.02% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.481 ms / 100) 5.488 -> 5.480 ( -0.15%) [ +0.07% +0.00% +0.02% / +0.00% -0.15% -0.07%] index_select strided 5 : Elapsed 0.055 ms (5.492 ms / 100) 5.482 -> 5.479 ( -0.05%) [ +0.00% +0.05% +0.18% / +0.02% +0.11% -0.05%] index_select strided 7 : Elapsed 0.055 ms (5.482 ms / 100) 5.482 -> 5.485 ( +0.05%) [ +0.00% +0.02% +0.05% / +0.05% +0.18% +0.15%] index_select strided 8 : Elapsed 0.055 ms (5.482 ms / 100) 5.482 -> 5.485 ( +0.05%) [ +0.00% +0.05% +0.02% / +0.05% +0.09% +0.05%] index_select random : Elapsed 0.055 ms (5.482 ms / 100) 5.481 -> 5.478 ( -0.05%) [ +0.00% +0.11% +0.11% / +0.02% -0.05% +0.09%] index_select random_sorted : Elapsed 0.055 ms (5.481 ms / 100) B = [5, 20, 40, 4] (stride (160, 800, 4, 1)) A = [5, 20, 16, 4] (stride (1, 5, 100, 1600)) dim = 2 4.085 -> 4.081 ( -0.10%) [ +0.02% +0.00% +0.00% / -0.10% +0.71% +0.78%] index_add_ linear : Elapsed 0.041 ms (4.086 ms / 100) 3.956 -> 3.953 ( -0.08%) [ +0.00% +0.00% +0.10% / -0.08% +0.71% +0.76%] index_copy_ linear : Elapsed 0.040 ms (3.956 ms / 100) 4.080 -> 4.080 ( +0.00%) [ +0.00% +0.02% +0.05% / +0.00% +0.76% +0.86%] index_add_ reverse : Elapsed 0.041 ms (4.080 ms / 100) 3.940 -> 3.942 ( +0.05%) [ +0.03% +0.00% +0.15% / +0.05% +0.81% +0.74%] index_copy_ reverse : Elapsed 0.039 ms (3.941 ms / 100) 4.081 -> 4.083 ( +0.05%) [ +0.00% +0.05% +0.22% / +0.05% +0.76% +0.71%] index_add_ spread : Elapsed 0.041 ms (4.081 ms / 100) 3.943 -> 3.943 ( +0.00%) [ +0.05% +0.00% +0.20% / +0.00% +0.79% +0.79%] index_copy_ spread : Elapsed 0.039 ms (3.945 ms / 100) 4.081 -> 4.082 ( +0.02%) [ +0.07% +0.00% +0.02% / +0.02% +0.88% +0.96%] index_add_ strided 3 : Elapsed 0.041 ms (4.084 ms / 100) 3.943 -> 3.941 ( -0.05%) [ +0.03% +0.00% +0.10% / -0.05% +0.79% +0.74%] index_copy_ strided 3 : Elapsed 0.039 ms (3.944 ms / 100) 4.080 -> 4.081 ( +0.02%) [ +0.05% +0.00% +0.07% / +0.02% +0.78% +0.78%] index_add_ strided 7 : Elapsed 0.041 ms (4.082 ms / 100) 3.942 -> 3.942 ( +0.00%) [ +0.00% +0.00% +0.18% / +0.00% +0.74% +0.76%] index_copy_ strided 7 : Elapsed 0.039 ms (3.942 ms / 100) 4.086 -> 4.083 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.69% +0.64%] index_add_ perm : Elapsed 0.041 ms (4.086 ms / 100) 3.957 -> 3.955 ( -0.05%) [ +0.00% +0.00% +0.18% / -0.05% +0.68% +0.58%] index_copy_ perm : Elapsed 0.040 ms (3.957 ms / 100) 4.081 -> 4.085 ( +0.10%) [ +0.12% +0.00% +0.10% / +0.10% +0.91% +0.93%] index_add_ perm_sorted : Elapsed 0.041 ms (4.086 ms / 100) 3.944 -> 3.945 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.74% +0.76%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.944 ms / 100) 5.555 -> 5.551 ( -0.07%) [ +0.00% +0.05% +0.16% / -0.04% -0.07% +0.00%] index_select const : Elapsed 0.056 ms (5.555 ms / 100) 5.556 -> 5.557 ( +0.02%) [ +0.14% +0.20% +0.00% / +0.34% +0.11% +0.02%] index_select wrap : Elapsed 0.056 ms (5.564 ms / 100) 5.562 -> 5.558 ( -0.07%) [ +0.09% +0.07% +0.00% / +0.09% -0.07% +0.09%] index_select linear : Elapsed 0.056 ms (5.567 ms / 100) 5.558 -> 5.562 ( +0.07%) [ +0.00% +0.16% +0.18% / +0.09% +0.07% +0.07%] index_select reverse : Elapsed 0.056 ms (5.558 ms / 100) 5.555 -> 5.560 ( +0.09%) [ +0.04% +0.00% +0.02% / +0.09% +0.13% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.557 ms / 100) 5.554 -> 5.555 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.07% +0.07%] index_select skip256 : Elapsed 0.056 ms (5.557 ms / 100) 5.563 -> 5.555 ( -0.14%) [ +0.00% +0.00% +0.00% / +0.11% -0.11% -0.14%] index_select spread : Elapsed 0.056 ms (5.563 ms / 100) 5.563 -> 5.559 ( -0.07%) [ +0.14% +0.04% +0.00% / -0.04% +0.05% -0.07%] index_select strided 3 : Elapsed 0.056 ms (5.571 ms / 100) 5.560 -> 5.559 ( -0.02%) [ +0.11% +0.13% +0.00% / +0.27% -0.02% +0.11%] index_select strided 5 : Elapsed 0.056 ms (5.566 ms / 100) 5.553 -> 5.562 ( +0.16%) [ +0.22% +0.00% +0.31% / +0.16% +0.20% +0.18%] index_select strided 7 : Elapsed 0.056 ms (5.565 ms / 100) 5.558 -> 5.555 ( -0.05%) [ +0.04% +0.00% +0.09% / -0.05% +0.05% +0.02%] index_select strided 8 : Elapsed 0.056 ms (5.560 ms / 100) 5.562 -> 5.555 ( -0.13%) [ +0.04% +0.00% +0.02% / -0.05% -0.13% +0.02%] index_select random : Elapsed 0.056 ms (5.564 ms / 100) 5.565 -> 5.558 ( -0.13%) [ +0.07% +0.00% +0.04% / +0.09% +0.00% -0.13%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [5, 20, 40, 4] (stride (1, 800, 5, 200)) A = [5, 20, 16, 4] (stride (1280, 4, 80, 1)) dim = 2 3.928 -> 3.927 ( -0.03%) [ +0.05% +0.05% +0.00% / -0.03% +0.56% +0.59%] index_add_ linear : Elapsed 0.039 ms (3.930 ms / 100) 3.802 -> 3.802 ( +0.00%) [ +0.05% +0.00% +0.03% / +0.00% +0.53% +0.55%] index_copy_ linear : Elapsed 0.038 ms (3.804 ms / 100) 3.943 -> 3.942 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.51% +0.48%] index_add_ reverse : Elapsed 0.039 ms (3.943 ms / 100) 3.816 -> 3.816 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.52% +0.55%] index_copy_ reverse : Elapsed 0.038 ms (3.817 ms / 100) 3.929 -> 3.927 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.48% +0.48%] index_add_ spread : Elapsed 0.039 ms (3.929 ms / 100) 3.807 -> 3.806 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.50% +0.47%] index_copy_ spread : Elapsed 0.038 ms (3.807 ms / 100) 3.930 -> 3.931 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.53% +0.51%] index_add_ strided 3 : Elapsed 0.039 ms (3.932 ms / 100) 3.803 -> 3.804 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.50% +0.53%] index_copy_ strided 3 : Elapsed 0.038 ms (3.803 ms / 100) 3.943 -> 3.943 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.53% +0.51%] index_add_ strided 7 : Elapsed 0.039 ms (3.945 ms / 100) 3.818 -> 3.818 ( +0.00%) [ +0.00% +0.05% +0.03% / +0.00% +0.47% +0.47%] index_copy_ strided 7 : Elapsed 0.038 ms (3.818 ms / 100) 3.929 -> 3.928 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.48% +0.48%] index_add_ perm : Elapsed 0.039 ms (3.929 ms / 100) 3.803 -> 3.803 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.45% +0.42%] index_copy_ perm : Elapsed 0.038 ms (3.803 ms / 100) 3.930 -> 3.931 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.41% +0.43%] index_add_ perm_sorted : Elapsed 0.039 ms (3.930 ms / 100) 3.803 -> 3.804 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.42% +0.42%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.803 ms / 100) 5.557 -> 5.555 ( -0.04%) [ +0.00% +0.05% +0.11% / -0.04% +0.23% +0.22%] index_select const : Elapsed 0.056 ms (5.557 ms / 100) 5.564 -> 5.569 ( +0.09%) [ +0.00% +0.13% +0.00% / +0.09% +0.14% +0.11%] index_select wrap : Elapsed 0.056 ms (5.564 ms / 100) 5.566 -> 5.568 ( +0.04%) [ +0.09% +0.00% +0.00% / +0.04% +0.04% +0.05%] index_select linear : Elapsed 0.056 ms (5.571 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.07% +0.00% +0.22% / +0.09% +0.16% +0.05%] index_select reverse : Elapsed 0.056 ms (5.568 ms / 100) 5.560 -> 5.558 ( -0.04%) [ +0.00% +0.13% +0.22% / +0.07% -0.04% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.560 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.00% +0.09% +0.14% / +0.13% +0.02% +0.04%] index_select skip256 : Elapsed 0.056 ms (5.564 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.11% +0.02% +0.00% / +0.02% +0.00% +0.00%] index_select spread : Elapsed 0.056 ms (5.575 ms / 100) 5.567 -> 5.565 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.04% +0.07%] index_select strided 3 : Elapsed 0.056 ms (5.567 ms / 100) 5.566 -> 5.564 ( -0.04%) [ +0.02% +0.00% +0.00% / -0.04% +0.05% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.567 ms / 100) 5.566 -> 5.568 ( +0.04%) [ +0.09% +0.09% +0.00% / +0.04% +0.07% +0.23%] index_select strided 7 : Elapsed 0.056 ms (5.571 ms / 100) 5.562 -> 5.558 ( -0.07%) [ +0.02% +0.04% +0.00% / -0.07% -0.02% +0.00%] index_select strided 8 : Elapsed 0.056 ms (5.563 ms / 100) 5.564 -> 5.568 ( +0.07%) [ +0.04% +0.07% +0.00% / +0.09% +0.14% +0.07%] index_select random : Elapsed 0.056 ms (5.566 ms / 100) 5.565 -> 5.572 ( +0.13%) [ +0.09% +0.00% +0.02% / +0.20% +0.13% +0.20%] index_select random_sorted : Elapsed 0.056 ms (5.570 ms / 100) B = [5, 20, 40, 4] (stride (20, 1, 400, 100)) A = [5, 20, 16, 4] (stride (20, 1, 400, 100)) dim = 2 1.334 -> 1.335 ( +0.07%) [ +0.30% +0.15% +0.00% / +0.07% +1.12% +0.97%] index_add_ linear : Elapsed 0.013 ms (1.338 ms / 100) 1.296 -> 1.297 ( +0.08%) [ +0.08% +0.15% +0.00% / +0.08% +1.08% +1.00%] index_copy_ linear : Elapsed 0.013 ms (1.297 ms / 100) 1.329 -> 1.329 ( +0.00%) [ +0.08% +0.23% +0.00% / +0.00% +0.98% +0.90%] index_add_ reverse : Elapsed 0.013 ms (1.330 ms / 100) 1.293 -> 1.294 ( +0.08%) [ +0.15% +0.23% +0.00% / +0.08% +1.08% +1.16%] index_copy_ reverse : Elapsed 0.013 ms (1.295 ms / 100) 1.330 -> 1.331 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.90% +0.98%] index_add_ spread : Elapsed 0.013 ms (1.331 ms / 100) 1.292 -> 1.292 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +1.32% +1.16%] index_copy_ spread : Elapsed 0.013 ms (1.292 ms / 100) 1.337 -> 1.336 ( -0.07%) [ +0.00% +0.00% +0.30% / -0.07% +0.90% +0.75%] index_add_ strided 3 : Elapsed 0.013 ms (1.337 ms / 100) 1.298 -> 1.297 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +1.39% +1.16%] index_copy_ strided 3 : Elapsed 0.013 ms (1.298 ms / 100) 1.330 -> 1.328 ( -0.15%) [ +0.08% +0.00% +0.00% / -0.15% +1.13% +0.83%] index_add_ strided 7 : Elapsed 0.013 ms (1.331 ms / 100) 1.295 -> 1.295 ( +0.00%) [ +0.08% +0.00% +0.15% / +0.00% +1.24% +1.16%] index_copy_ strided 7 : Elapsed 0.013 ms (1.296 ms / 100) 1.333 -> 1.335 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.90% +0.98%] index_add_ perm : Elapsed 0.013 ms (1.333 ms / 100) 1.296 -> 1.297 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +1.16% +1.23%] index_copy_ perm : Elapsed 0.013 ms (1.296 ms / 100) 1.336 -> 1.336 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +1.05% +0.82%] index_add_ perm_sorted : Elapsed 0.013 ms (1.336 ms / 100) 1.299 -> 1.299 ( +0.00%) [ +0.23% +0.15% +0.00% / +0.00% +1.39% +1.08%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.302 ms / 100) 3.525 -> 3.527 ( +0.06%) [ +0.26% +0.00% +0.17% / +0.11% +0.34% +0.06%] index_select const : Elapsed 0.035 ms (3.534 ms / 100) 3.541 -> 3.540 ( -0.03%) [ +0.17% +0.00% +0.11% / +0.06% -0.03% +0.20%] index_select wrap : Elapsed 0.035 ms (3.547 ms / 100) 3.535 -> 3.548 ( +0.37%) [ +0.00% +0.11% +0.17% / +0.37% +0.48% +0.76%] index_select linear : Elapsed 0.035 ms (3.535 ms / 100) 3.540 -> 3.550 ( +0.28%) [ +0.11% +0.08% +0.00% / +0.28% +0.31% +0.45%] index_select reverse : Elapsed 0.035 ms (3.544 ms / 100) 3.528 -> 3.526 ( -0.06%) [ +0.23% +0.11% +0.00% / +0.09% -0.06% +0.09%] index_select skip64 : Elapsed 0.035 ms (3.536 ms / 100) 3.530 -> 3.531 ( +0.03%) [ +0.00% +0.11% +0.08% / +0.03% +0.06% +0.14%] index_select skip256 : Elapsed 0.035 ms (3.530 ms / 100) 3.542 -> 3.542 ( +0.00%) [ +0.17% +0.00% +0.20% / +0.11% +0.00% +0.14%] index_select spread : Elapsed 0.035 ms (3.548 ms / 100) 3.556 -> 3.538 ( -0.51%) [ +0.00% +0.17% +0.08% / +0.25% -0.37% -0.51%] index_select strided 3 : Elapsed 0.036 ms (3.556 ms / 100) 3.562 -> 3.542 ( -0.56%) [ +0.08% +0.00% +0.08% / -0.20% -0.56% -0.56%] index_select strided 5 : Elapsed 0.036 ms (3.565 ms / 100) 3.537 -> 3.543 ( +0.17%) [ +0.11% +0.00% +0.00% / +0.17% +0.20% +0.25%] index_select strided 7 : Elapsed 0.035 ms (3.541 ms / 100) 3.527 -> 3.531 ( +0.11%) [ +0.00% +0.06% +0.14% / +0.28% +0.20% +0.11%] index_select strided 8 : Elapsed 0.035 ms (3.527 ms / 100) 3.531 -> 3.536 ( +0.14%) [ +0.25% +0.06% +0.00% / +0.14% +0.91% +0.85%] index_select random : Elapsed 0.035 ms (3.540 ms / 100) 3.534 -> 3.537 ( +0.08%) [ +0.00% +0.06% +0.08% / +0.08% +0.99% +0.91%] index_select random_sorted : Elapsed 0.035 ms (3.534 ms / 100) B = [5, 20, 40, 4] (stride (800, 40, 1, 4000)) A = [5, 20, 16, 4] (stride (80, 1, 400, 20)) dim = 2 3.632 -> 3.632 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.58%] index_add_ linear : Elapsed 0.036 ms (3.632 ms / 100) 3.516 -> 3.525 ( +0.26%) [ +0.20% +0.00% +0.11% / +0.26% +0.63% +0.65%] index_copy_ linear : Elapsed 0.035 ms (3.523 ms / 100) 3.643 -> 3.644 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.58% +0.58%] index_add_ reverse : Elapsed 0.036 ms (3.643 ms / 100) 3.511 -> 3.512 ( +0.03%) [ +0.00% +0.06% +0.00% / +0.03% +0.57% +0.57%] index_copy_ reverse : Elapsed 0.035 ms (3.511 ms / 100) 3.635 -> 3.635 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.55% +0.55%] index_add_ spread : Elapsed 0.036 ms (3.637 ms / 100) 3.514 -> 3.520 ( +0.17%) [ +0.17% +0.11% +0.00% / +0.17% +0.54% +0.63%] index_copy_ spread : Elapsed 0.035 ms (3.520 ms / 100) 3.635 -> 3.635 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.50% +0.52%] index_add_ strided 3 : Elapsed 0.036 ms (3.636 ms / 100) 3.501 -> 3.502 ( +0.03%) [ +0.06% +0.11% +0.00% / +0.03% +0.60% +0.60%] index_copy_ strided 3 : Elapsed 0.035 ms (3.503 ms / 100) 3.641 -> 3.641 ( +0.00%) [ +0.08% +0.03% +0.00% / +0.00% +0.63% +0.60%] index_add_ strided 7 : Elapsed 0.036 ms (3.644 ms / 100) 3.510 -> 3.510 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.57% +0.74%] index_copy_ strided 7 : Elapsed 0.035 ms (3.510 ms / 100) 3.631 -> 3.631 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.55% +0.52%] index_add_ perm : Elapsed 0.036 ms (3.633 ms / 100) 3.516 -> 3.526 ( +0.28%) [ +0.23% +0.00% +0.14% / +0.28% +0.63% +0.74%] index_copy_ perm : Elapsed 0.035 ms (3.524 ms / 100) 3.631 -> 3.632 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.47% +0.50%] index_add_ perm_sorted : Elapsed 0.036 ms (3.632 ms / 100) 3.524 -> 3.524 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.37% +0.54%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.524 ms / 100) 5.473 -> 5.475 ( +0.04%) [ +0.02% +0.00% +0.00% / +0.09% +0.04% +0.11%] index_select const : Elapsed 0.055 ms (5.474 ms / 100) 5.471 -> 5.471 ( +0.00%) [ +0.05% +0.07% +0.00% / +0.00% +0.37% +0.29%] index_select wrap : Elapsed 0.055 ms (5.474 ms / 100) 5.473 -> 5.477 ( +0.07%) [ +0.00% +0.07% +0.13% / +0.07% +0.15% +0.33%] index_select linear : Elapsed 0.055 ms (5.473 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.00% +0.07% +0.00% / +0.04% +0.20% +0.04%] index_select reverse : Elapsed 0.055 ms (5.476 ms / 100) 5.475 -> 5.474 ( -0.02%) [ +0.05% +0.00% +0.11% / +0.05% +0.02% -0.02%] index_select skip64 : Elapsed 0.055 ms (5.478 ms / 100) 5.472 -> 5.467 ( -0.09%) [ +0.07% +0.00% +0.15% / +0.15% -0.09% +0.00%] index_select skip256 : Elapsed 0.055 ms (5.476 ms / 100) 5.475 -> 5.478 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.09% +0.13%] index_select spread : Elapsed 0.055 ms (5.480 ms / 100) 5.474 -> 5.480 ( +0.11%) [ +0.04% +0.00% +0.04% / +0.11% +0.13% +0.11%] index_select strided 3 : Elapsed 0.055 ms (5.476 ms / 100) 5.477 -> 5.477 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.09% +0.00%] index_select strided 5 : Elapsed 0.055 ms (5.480 ms / 100) 5.477 -> 5.477 ( +0.00%) [ +0.02% +0.00% +0.15% / +0.00% +0.05% +0.18%] index_select strided 7 : Elapsed 0.055 ms (5.478 ms / 100) 5.467 -> 5.469 ( +0.04%) [ +0.13% +0.04% +0.00% / +0.09% +0.24% +0.04%] index_select strided 8 : Elapsed 0.055 ms (5.474 ms / 100) 5.476 -> 5.475 ( -0.02%) [ +0.00% +0.04% +0.02% / -0.02% +0.09% +0.09%] index_select random : Elapsed 0.055 ms (5.476 ms / 100) 5.474 -> 5.476 ( +0.04%) [ +0.00% +0.16% +0.13% / +0.04% +0.04% +0.13%] index_select random_sorted : Elapsed 0.055 ms (5.474 ms / 100) B = [5, 20, 40, 4] (stride (800, 1, 20, 4000)) A = [5, 20, 16, 4] (stride (1280, 1, 20, 320)) dim = 2 4.023 -> 4.035 ( +0.30%) [ +0.02% +0.00% +0.00% / +0.30% +1.04% +0.92%] index_add_ linear : Elapsed 0.040 ms (4.024 ms / 100) 3.889 -> 3.898 ( +0.23%) [ +0.03% +0.03% +0.00% / +0.23% +0.90% +0.93%] index_copy_ linear : Elapsed 0.039 ms (3.890 ms / 100) 4.020 -> 4.019 ( -0.02%) [ +0.32% +0.05% +0.00% / -0.02% +1.04% +1.00%] index_add_ reverse : Elapsed 0.040 ms (4.033 ms / 100) 3.888 -> 3.888 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +0.87% +0.87%] index_copy_ reverse : Elapsed 0.039 ms (3.895 ms / 100) 4.012 -> 4.015 ( +0.07%) [ +0.00% +0.10% +0.05% / +0.07% +0.82% +0.82%] index_add_ spread : Elapsed 0.040 ms (4.012 ms / 100) 3.874 -> 3.877 ( +0.08%) [ +0.00% +0.10% +0.13% / +0.08% +1.24% +0.96%] index_copy_ spread : Elapsed 0.039 ms (3.874 ms / 100) 4.021 -> 4.022 ( +0.02%) [ +0.02% +0.10% +0.00% / +0.02% +0.85% +0.85%] index_add_ strided 3 : Elapsed 0.040 ms (4.022 ms / 100) 3.887 -> 3.887 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.90% +0.77%] index_copy_ strided 3 : Elapsed 0.039 ms (3.888 ms / 100) 4.023 -> 4.019 ( -0.10%) [ +0.02% +0.20% +0.00% / -0.10% +0.99% +0.99%] index_add_ strided 7 : Elapsed 0.040 ms (4.024 ms / 100) 3.891 -> 3.891 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.87% +0.82%] index_copy_ strided 7 : Elapsed 0.039 ms (3.893 ms / 100) 4.024 -> 4.024 ( +0.00%) [ +0.25% +0.00% +0.22% / +0.00% +0.89% +0.99%] index_add_ perm : Elapsed 0.040 ms (4.034 ms / 100) 3.890 -> 3.890 ( +0.00%) [ +0.10% +0.00% +0.03% / +0.00% +0.77% +0.90%] index_copy_ perm : Elapsed 0.039 ms (3.894 ms / 100) 4.025 -> 4.026 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.70% +0.77%] index_add_ perm_sorted : Elapsed 0.040 ms (4.026 ms / 100) 3.891 -> 3.891 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.69% +0.82%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.892 ms / 100) 5.558 -> 5.560 ( +0.04%) [ +0.09% +0.20% +0.00% / +0.04% +0.13% +0.05%] index_select const : Elapsed 0.056 ms (5.563 ms / 100) 5.570 -> 5.570 ( +0.00%) [ +0.00% +0.11% +0.07% / +0.05% +0.00% +0.04%] index_select wrap : Elapsed 0.056 ms (5.570 ms / 100) 5.572 -> 5.573 ( +0.02%) [ +0.18% +0.00% +0.00% / +0.14% +0.02% +0.04%] index_select linear : Elapsed 0.056 ms (5.582 ms / 100) 5.570 -> 5.566 ( -0.07%) [ +0.00% +0.22% +0.02% / +0.16% +0.11% -0.07%] index_select reverse : Elapsed 0.056 ms (5.570 ms / 100) 5.560 -> 5.553 ( -0.13%) [ +0.02% +0.00% +0.04% / -0.13% +0.05% +0.07%] index_select skip64 : Elapsed 0.056 ms (5.561 ms / 100) 5.550 -> 5.551 ( +0.02%) [ +0.29% +0.18% +0.00% / +0.02% +0.16% +0.27%] index_select skip256 : Elapsed 0.056 ms (5.566 ms / 100) 5.569 -> 5.564 ( -0.09%) [ +0.07% +0.07% +0.00% / -0.09% +0.02% -0.04%] index_select spread : Elapsed 0.056 ms (5.573 ms / 100) 5.571 -> 5.562 ( -0.16%) [ +0.11% +0.04% +0.00% / +0.09% -0.16% -0.07%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.571 -> 5.567 ( -0.07%) [ +0.05% +0.00% +0.05% / +0.07% -0.07% -0.04%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.565 -> 5.570 ( +0.09%) [ +0.05% +0.00% +0.20% / +0.09% +0.22% +0.18%] index_select strided 7 : Elapsed 0.056 ms (5.568 ms / 100) 5.559 -> 5.553 ( -0.11%) [ +0.11% +0.16% +0.00% / -0.11% +0.09% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.565 ms / 100) 5.571 -> 5.573 ( +0.04%) [ +0.14% +0.00% +0.00% / +0.04% +0.04% +0.05%] index_select random : Elapsed 0.056 ms (5.579 ms / 100) 5.573 -> 5.567 ( -0.11%) [ +0.04% +0.00% +0.00% / +0.09% -0.09% -0.11%] index_select random_sorted : Elapsed 0.056 ms (5.575 ms / 100) out_shape = [5, 20, 16, 40] in_shape = [5, 20, 16, 4] idx_dim = 3 B = [5, 20, 16, 40] (stride (40, 3200, 200, 1)) A = [5, 20, 16, 4] (stride (80, 1, 400, 20)) dim = 3 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.71% +0.63%] index_add_ linear : Elapsed 0.013 ms (1.276 ms / 100) 1.233 -> 1.235 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +0.65% +0.73%] index_copy_ linear : Elapsed 0.012 ms (1.233 ms / 100) 1.259 -> 1.259 ( +0.00%) [ +0.00% +0.24% +0.16% / +0.00% +0.56% +0.56%] index_add_ reverse : Elapsed 0.013 ms (1.259 ms / 100) 1.217 -> 1.217 ( +0.00%) [ +0.00% +0.25% +0.16% / +0.00% +0.58% +1.15%] index_copy_ reverse : Elapsed 0.012 ms (1.217 ms / 100) 1.263 -> 1.263 ( +0.00%) [ +0.00% +0.00% +0.24% / +0.00% +0.48% +0.40%] index_add_ spread : Elapsed 0.013 ms (1.263 ms / 100) 1.224 -> 1.225 ( +0.08%) [ +0.16% +0.25% +0.00% / +0.08% +0.49% +0.49%] index_copy_ spread : Elapsed 0.012 ms (1.226 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.86% +0.86%] index_add_ strided 3 : Elapsed 0.013 ms (1.278 ms / 100) 1.235 -> 1.235 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.65% +0.81%] index_copy_ strided 3 : Elapsed 0.012 ms (1.235 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.00% +0.24% +0.00% / +0.08% +0.47% +0.79%] index_add_ strided 7 : Elapsed 0.013 ms (1.272 ms / 100) 1.230 -> 1.232 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.49% +0.65%] index_copy_ strided 7 : Elapsed 0.012 ms (1.230 ms / 100) 1.261 -> 1.262 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.48% +0.56%] index_add_ perm : Elapsed 0.013 ms (1.262 ms / 100) 1.222 -> 1.224 ( +0.16%) [ +0.00% +0.25% +0.16% / +0.16% +0.74% +0.57%] index_copy_ perm : Elapsed 0.012 ms (1.222 ms / 100) 1.262 -> 1.266 ( +0.32%) [ +0.08% +0.00% +0.16% / +0.32% +0.48% +0.40%] index_add_ perm_sorted : Elapsed 0.013 ms (1.263 ms / 100) 1.221 -> 1.224 ( +0.25%) [ +0.00% +0.25% +0.08% / +0.25% +0.57% +0.57%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.221 ms / 100) 8.776 -> 8.782 ( +0.07%) [ +0.00% +0.39% +0.28% / +0.18% +0.40% +0.07%] index_select const : Elapsed 0.088 ms (8.776 ms / 100) 8.789 -> 8.792 ( +0.03%) [ +0.00% +0.19% +0.18% / +0.07% +0.31% +0.03%] index_select wrap : Elapsed 0.088 ms (8.789 ms / 100) 8.795 -> 8.794 ( -0.01%) [ +0.00% +0.23% +0.26% / +0.17% +0.02% -0.01%] index_select linear : Elapsed 0.088 ms (8.795 ms / 100) 8.802 -> 8.814 ( +0.14%) [ +0.17% +0.18% +0.00% / +0.15% +0.14% +0.20%] index_select reverse : Elapsed 0.088 ms (8.817 ms / 100) 8.783 -> 8.785 ( +0.02%) [ +0.00% +0.14% +0.11% / +0.02% +0.20% +0.19%] index_select skip64 : Elapsed 0.088 ms (8.783 ms / 100) 8.783 -> 8.784 ( +0.01%) [ +0.22% +0.11% +0.00% / +0.16% +0.01% +0.05%] index_select skip256 : Elapsed 0.088 ms (8.802 ms / 100) 8.801 -> 8.811 ( +0.11%) [ +0.00% +0.14% +0.23% / +0.26% +0.18% +0.11%] index_select spread : Elapsed 0.088 ms (8.801 ms / 100) 8.797 -> 8.801 ( +0.05%) [ +0.32% +0.00% +0.05% / +0.05% +0.22% +0.41%] index_select strided 3 : Elapsed 0.088 ms (8.825 ms / 100) 8.800 -> 8.800 ( +0.00%) [ +0.06% +0.00% +0.14% / +0.00% +0.11% +0.12%] index_select random : Elapsed 0.088 ms (8.805 ms / 100) 8.816 -> 8.810 ( -0.07%) [ +0.00% +0.18% +0.06% / +0.05% -0.07% +0.09%] index_select random_sorted : Elapsed 0.088 ms (8.816 ms / 100) B = [5, 20, 16, 40] (stride (1, 5, 4000, 100)) A = [5, 20, 16, 4] (stride (320, 1, 20, 1600)) dim = 3 1.317 -> 1.318 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.08% +0.08%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.277 ms / 100) 1.315 -> 1.317 ( +0.15%) [ +0.30% +0.00% +0.23% / +0.30% +0.15% +0.46%] index_add_ reverse : Elapsed 0.013 ms (1.319 ms / 100) 1.284 -> 1.284 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.013 ms (1.284 ms / 100) 1.315 -> 1.315 ( +0.00%) [ +0.15% +0.00% +0.08% / +0.00% +0.30% +0.46%] index_add_ spread : Elapsed 0.013 ms (1.317 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.08% +0.24% +0.00% / +0.16% +0.39% +0.78%] index_copy_ spread : Elapsed 0.013 ms (1.275 ms / 100) 1.315 -> 1.318 ( +0.23%) [ +0.15% +0.23% +0.00% / +0.30% +0.23% +0.30%] index_add_ strided 3 : Elapsed 0.013 ms (1.317 ms / 100) 1.286 -> 1.286 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.54% +0.62%] index_copy_ strided 3 : Elapsed 0.013 ms (1.287 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.15% +0.08%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.63% +0.63%] index_copy_ strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.315 -> 1.318 ( +0.23%) [ +0.15% +0.00% +0.15% / +0.30% +0.23% +0.23%] index_add_ perm : Elapsed 0.013 ms (1.317 ms / 100) 1.282 -> 1.283 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.47% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.283 ms / 100) 1.315 -> 1.318 ( +0.23%) [ +0.23% +0.15% +0.00% / +0.23% +0.30% +0.30%] index_add_ perm_sorted : Elapsed 0.013 ms (1.318 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.47% +0.39%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.280 ms / 100) 9.176 -> 9.180 ( +0.04%) [ +0.00% +0.09% +0.20% / +0.04% +0.16% +0.10%] index_select const : Elapsed 0.092 ms (9.176 ms / 100) 9.197 -> 9.197 ( +0.00%) [ +0.11% +0.04% +0.00% / +0.02% +0.00% +0.22%] index_select wrap : Elapsed 0.092 ms (9.207 ms / 100) 9.190 -> 9.189 ( -0.01%) [ +0.14% +0.00% +0.14% / -0.01% +0.23% +0.30%] index_select linear : Elapsed 0.092 ms (9.203 ms / 100) 9.184 -> 9.169 ( -0.16%) [ +0.07% +0.08% +0.00% / -0.16% +0.22% +0.39%] index_select reverse : Elapsed 0.092 ms (9.190 ms / 100) 9.170 -> 9.176 ( +0.07%) [ +0.00% +0.24% +0.01% / +0.07% +0.41% +0.12%] index_select skip64 : Elapsed 0.092 ms (9.170 ms / 100) 9.179 -> 9.174 ( -0.05%) [ +0.00% +0.40% +0.03% / -0.05% +0.08% +0.32%] index_select skip256 : Elapsed 0.092 ms (9.179 ms / 100) 9.199 -> 9.204 ( +0.05%) [ +0.00% +0.01% +0.09% / +0.05% +0.17% +0.09%] index_select spread : Elapsed 0.092 ms (9.199 ms / 100) 9.190 -> 9.200 ( +0.11%) [ +0.08% +0.00% +0.08% / +0.12% +0.11% +0.18%] index_select strided 3 : Elapsed 0.092 ms (9.197 ms / 100) 9.191 -> 9.207 ( +0.17%) [ +0.04% +0.00% +0.25% / +0.17% +0.32% +0.37%] index_select random : Elapsed 0.092 ms (9.195 ms / 100) 9.188 -> 9.193 ( +0.05%) [ +0.02% +0.00% +0.20% / +0.05% +0.38% +0.10%] index_select random_sorted : Elapsed 0.092 ms (9.190 ms / 100) B = [5, 20, 16, 40] (stride (1, 5, 4000, 100)) A = [5, 20, 16, 4] (stride (20, 1, 100, 1600)) dim = 3 1.251 -> 1.249 ( -0.16%) [ +0.00% +0.00% +0.08% / -0.16% +0.40% +0.64%] index_add_ linear : Elapsed 0.013 ms (1.251 ms / 100) 1.217 -> 1.220 ( +0.25%) [ +0.00% +0.33% +0.16% / +0.25% +0.90% +0.74%] index_copy_ linear : Elapsed 0.012 ms (1.217 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.65% +0.73%] index_add_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.210 -> 1.210 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.66% +0.66%] index_copy_ reverse : Elapsed 0.012 ms (1.211 ms / 100) 1.257 -> 1.258 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.64% +0.64%] index_add_ spread : Elapsed 0.013 ms (1.257 ms / 100) 1.222 -> 1.224 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +0.74% +0.65%] index_copy_ spread : Elapsed 0.012 ms (1.222 ms / 100) 1.248 -> 1.249 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.80% +0.64%] index_add_ strided 3 : Elapsed 0.012 ms (1.249 ms / 100) 1.216 -> 1.217 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.74% +0.74%] index_copy_ strided 3 : Elapsed 0.012 ms (1.216 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.32% +0.00% +0.00% / +0.00% +0.57% +0.81%] index_add_ strided 7 : Elapsed 0.012 ms (1.236 ms / 100) 1.206 -> 1.206 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.66% +0.83%] index_copy_ strided 7 : Elapsed 0.012 ms (1.207 ms / 100) 1.255 -> 1.256 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.56% +0.88%] index_add_ perm : Elapsed 0.013 ms (1.255 ms / 100) 1.231 -> 1.230 ( -0.08%) [ +0.08% +0.08% +0.00% / +0.08% -0.08% +0.24%] index_copy_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.57% +0.57%] index_add_ perm_sorted : Elapsed 0.012 ms (1.233 ms / 100) 1.203 -> 1.203 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.75% +0.75%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.203 ms / 100) 8.740 -> 8.774 ( +0.39%) [ +0.32% +0.07% +0.00% / +0.39% +0.58% +0.39%] index_select const : Elapsed 0.088 ms (8.768 ms / 100) 8.775 -> 8.776 ( +0.01%) [ +0.14% +0.00% +0.09% / +0.01% +0.47% +0.32%] index_select wrap : Elapsed 0.088 ms (8.787 ms / 100) 8.779 -> 8.787 ( +0.09%) [ +0.21% +0.00% +0.15% / +0.09% +0.47% +0.26%] index_select linear : Elapsed 0.088 ms (8.797 ms / 100) 8.770 -> 8.794 ( +0.27%) [ +0.00% +0.14% +0.22% / +0.27% +0.29% +0.29%] index_select reverse : Elapsed 0.088 ms (8.770 ms / 100) 8.744 -> 8.756 ( +0.14%) [ +0.00% +0.03% +0.21% / +0.14% +0.26% +0.65%] index_select skip64 : Elapsed 0.087 ms (8.744 ms / 100) 8.745 -> 8.751 ( +0.07%) [ +0.31% +0.34% +0.00% / +0.07% +0.33% +0.70%] index_select skip256 : Elapsed 0.088 ms (8.772 ms / 100) 8.768 -> 8.783 ( +0.17%) [ +0.16% +0.09% +0.00% / +0.17% +0.56% +0.29%] index_select spread : Elapsed 0.088 ms (8.782 ms / 100) 8.785 -> 8.806 ( +0.24%) [ +0.00% +0.05% +0.05% / +0.24% +0.35% +0.26%] index_select strided 3 : Elapsed 0.088 ms (8.785 ms / 100) 8.780 -> 8.780 ( +0.00%) [ +0.05% +0.09% +0.00% / +0.00% +0.35% +0.27%] index_select random : Elapsed 0.088 ms (8.784 ms / 100) 8.769 -> 8.765 ( -0.05%) [ +0.00% +0.10% +0.21% / -0.05% +0.38% +0.42%] index_select random_sorted : Elapsed 0.088 ms (8.769 ms / 100) B = [5, 20, 16, 40] (stride (1, 80, 5, 1600)) A = [5, 20, 16, 4] (stride (320, 16, 1, 1600)) dim = 3 1.066 -> 1.066 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.56%] index_add_ linear : Elapsed 0.011 ms (1.066 ms / 100) 1.029 -> 1.029 ( +0.00%) [ +0.10% +0.00% +0.10% / +0.00% +0.39% +0.78%] index_copy_ linear : Elapsed 0.010 ms (1.030 ms / 100) 1.066 -> 1.070 ( +0.38%) [ +0.00% +0.09% +0.00% / +0.38% +0.38% +0.38%] index_add_ reverse : Elapsed 0.011 ms (1.066 ms / 100) 1.028 -> 1.030 ( +0.19%) [ +0.19% +0.19% +0.00% / +0.19% +0.19% +0.29%] index_copy_ reverse : Elapsed 0.010 ms (1.030 ms / 100) 1.066 -> 1.066 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.38% +0.38%] index_add_ spread : Elapsed 0.011 ms (1.066 ms / 100) 1.029 -> 1.029 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.29% +0.19%] index_copy_ spread : Elapsed 0.010 ms (1.029 ms / 100) 1.065 -> 1.065 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.47% +0.66%] index_add_ strided 3 : Elapsed 0.011 ms (1.066 ms / 100) 1.028 -> 1.029 ( +0.10%) [ +0.00% +0.19% +0.10% / +0.10% +0.49% +0.39%] index_copy_ strided 3 : Elapsed 0.010 ms (1.028 ms / 100) 1.065 -> 1.066 ( +0.09%) [ +0.19% +0.00% +0.09% / +0.09% +0.47% +0.75%] index_add_ strided 7 : Elapsed 0.011 ms (1.067 ms / 100) 1.028 -> 1.028 ( +0.00%) [ +0.00% +0.00% +0.10% / +0.00% +0.39% +0.39%] index_copy_ strided 7 : Elapsed 0.010 ms (1.028 ms / 100) 1.066 -> 1.068 ( +0.19%) [ +0.09% +0.09% +0.00% / +0.19% +0.38% +0.38%] index_add_ perm : Elapsed 0.011 ms (1.067 ms / 100) 1.028 -> 1.029 ( +0.10%) [ +0.29% +0.19% +0.00% / +0.10% +0.29% +0.29%] index_copy_ perm : Elapsed 0.010 ms (1.031 ms / 100) 1.067 -> 1.067 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.28% +0.28%] index_add_ perm_sorted : Elapsed 0.011 ms (1.068 ms / 100) 1.029 -> 1.029 ( +0.00%) [ +0.00% +0.39% +0.00% / +0.00% +0.10% +0.10%] index_copy_ perm_sorted : Elapsed 0.010 ms (1.029 ms / 100) 7.850 -> 7.858 ( +0.10%) [ +0.18% +0.19% +0.00% / +0.10% +0.24% +0.27%] index_select const : Elapsed 0.079 ms (7.864 ms / 100) 7.883 -> 7.878 ( -0.06%) [ +0.01% +0.01% +0.00% / -0.01% +0.22% -0.06%] index_select wrap : Elapsed 0.079 ms (7.884 ms / 100) 7.860 -> 7.882 ( +0.28%) [ +0.24% +0.24% +0.00% / +0.28% +0.37% +0.37%] index_select linear : Elapsed 0.079 ms (7.879 ms / 100) 7.887 -> 7.890 ( +0.04%) [ +0.00% +0.23% +0.03% / +0.05% +0.04% +0.22%] index_select reverse : Elapsed 0.079 ms (7.887 ms / 100) 7.847 -> 7.864 ( +0.22%) [ +0.08% +0.10% +0.00% / +0.22% +0.25% +0.25%] index_select skip64 : Elapsed 0.079 ms (7.853 ms / 100) 7.840 -> 7.852 ( +0.15%) [ +0.00% +0.15% +0.09% / +0.15% +0.47% +0.34%] index_select skip256 : Elapsed 0.078 ms (7.840 ms / 100) 7.865 -> 7.880 ( +0.19%) [ +0.38% +0.15% +0.00% / +0.19% +0.41% +0.27%] index_select spread : Elapsed 0.079 ms (7.895 ms / 100) 7.880 -> 7.889 ( +0.11%) [ +0.09% +0.00% +0.13% / +0.11% +0.11% +0.20%] index_select strided 3 : Elapsed 0.079 ms (7.887 ms / 100) 7.887 -> 7.884 ( -0.04%) [ +0.01% +0.00% +0.00% / -0.04% +0.20% +0.09%] index_select random : Elapsed 0.079 ms (7.888 ms / 100) 7.864 -> 7.865 ( +0.01%) [ +0.19% +0.00% +0.13% / +0.01% +0.36% +0.43%] index_select random_sorted : Elapsed 0.079 ms (7.879 ms / 100) out_shape = [40, 4, 5, 20] in_shape = [16, 4, 5, 20] idx_dim = 0 B = [40, 4, 5, 20] (stride (400, 5, 1, 20)) A = [16, 4, 5, 20] (stride (5, 1600, 1, 80)) dim = 0 4.001 -> 4.004 ( +0.07%) [ +0.00% +0.07% +0.10% / +0.07% +0.62% +0.60%] index_add_ linear : Elapsed 0.040 ms (4.001 ms / 100) 3.828 -> 3.829 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.68% +0.63%] index_copy_ linear : Elapsed 0.038 ms (3.830 ms / 100) 4.008 -> 4.019 ( +0.27%) [ +0.22% +0.00% +0.22% / +0.27% +0.75% +0.70%] index_add_ reverse : Elapsed 0.040 ms (4.017 ms / 100) 3.842 -> 3.851 ( +0.23%) [ +0.21% +0.00% +0.18% / +0.23% +0.73% +0.70%] index_copy_ reverse : Elapsed 0.039 ms (3.850 ms / 100) 4.020 -> 4.021 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.52% +0.45%] index_add_ spread : Elapsed 0.040 ms (4.021 ms / 100) 3.851 -> 3.853 ( +0.05%) [ +0.00% +0.08% +0.03% / +0.05% +0.52% +0.55%] index_copy_ spread : Elapsed 0.039 ms (3.851 ms / 100) 4.011 -> 4.014 ( +0.07%) [ +0.00% +0.00% +0.10% / +0.07% +0.52% +0.52%] index_add_ strided 3 : Elapsed 0.040 ms (4.011 ms / 100) 3.842 -> 3.844 ( +0.05%) [ +0.00% +0.03% +0.03% / +0.05% +0.49% +0.49%] index_copy_ strided 3 : Elapsed 0.038 ms (3.842 ms / 100) 4.011 -> 4.018 ( +0.17%) [ +0.20% +0.25% +0.00% / +0.17% +0.62% +0.55%] index_add_ strided 7 : Elapsed 0.040 ms (4.019 ms / 100) 3.845 -> 3.851 ( +0.16%) [ +0.00% +0.18% +0.05% / +0.16% +0.62% +0.52%] index_copy_ strided 7 : Elapsed 0.038 ms (3.845 ms / 100) 4.007 -> 4.003 ( -0.10%) [ +0.07% +0.07% +0.00% / -0.10% +0.42% +0.37%] index_add_ perm : Elapsed 0.040 ms (4.010 ms / 100) 3.832 -> 3.831 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.47% +0.44%] index_copy_ perm : Elapsed 0.038 ms (3.833 ms / 100) 4.004 -> 4.010 ( +0.15%) [ +0.10% +0.12% +0.00% / +0.15% +0.45% +0.37%] index_add_ perm_sorted : Elapsed 0.040 ms (4.008 ms / 100) 3.833 -> 3.837 ( +0.10%) [ +0.03% +0.00% +0.03% / +0.10% +0.47% +0.42%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.834 ms / 100) 5.475 -> 5.469 ( -0.11%) [ +0.02% +0.00% +0.09% / -0.11% +0.04% +0.04%] index_select const : Elapsed 0.055 ms (5.476 ms / 100) 5.466 -> 5.477 ( +0.20%) [ +0.13% +0.00% +0.22% / +0.20% +0.31% +0.33%] index_select wrap : Elapsed 0.055 ms (5.473 ms / 100) 5.477 -> 5.475 ( -0.04%) [ +0.00% +0.02% +0.11% / -0.04% +0.22% +0.27%] index_select linear : Elapsed 0.055 ms (5.477 ms / 100) 5.478 -> 5.475 ( -0.05%) [ +0.00% +0.04% +0.05% / -0.05% +0.22% +0.07%] index_select reverse : Elapsed 0.055 ms (5.478 ms / 100) 5.468 -> 5.466 ( -0.04%) [ +0.15% +0.15% +0.00% / +0.22% +0.20% -0.04%] index_select skip64 : Elapsed 0.055 ms (5.476 ms / 100) 5.476 -> 5.467 ( -0.16%) [ +0.00% +0.11% +0.04% / +0.00% -0.16% +0.00%] index_select skip256 : Elapsed 0.055 ms (5.476 ms / 100) 5.480 -> 5.481 ( +0.02%) [ +0.04% +0.09% +0.00% / +0.02% +0.11% +0.02%] index_select spread : Elapsed 0.055 ms (5.482 ms / 100) 5.468 -> 5.476 ( +0.15%) [ +0.29% +0.00% +0.15% / +0.15% +0.20% +0.22%] index_select strided 3 : Elapsed 0.055 ms (5.484 ms / 100) 5.474 -> 5.480 ( +0.11%) [ +0.00% +0.02% +0.04% / +0.16% +0.22% +0.11%] index_select strided 5 : Elapsed 0.055 ms (5.474 ms / 100) 5.472 -> 5.475 ( +0.05%) [ +0.07% +0.11% +0.00% / +0.05% +0.18% +0.20%] index_select strided 7 : Elapsed 0.055 ms (5.476 ms / 100) 5.475 -> 5.474 ( -0.02%) [ +0.11% +0.00% +0.02% / +0.02% +0.05% -0.02%] index_select strided 8 : Elapsed 0.055 ms (5.481 ms / 100) 5.478 -> 5.474 ( -0.07%) [ +0.00% +0.02% +0.13% / +0.05% -0.07% +0.02%] index_select random : Elapsed 0.055 ms (5.478 ms / 100) 5.473 -> 5.476 ( +0.05%) [ +0.00% +0.11% +0.07% / +0.05% +0.24% +0.05%] index_select random_sorted : Elapsed 0.055 ms (5.473 ms / 100) B = [40, 4, 5, 20] (stride (100, 4000, 1, 5)) A = [16, 4, 5, 20] (stride (100, 1600, 20, 1)) dim = 0 3.626 -> 3.632 ( +0.17%) [ +0.14% +0.14% +0.00% / +0.17% +0.88% +0.94%] index_add_ linear : Elapsed 0.036 ms (3.631 ms / 100) 3.502 -> 3.505 ( +0.09%) [ +0.03% +0.00% +0.00% / +0.09% +0.83% +0.83%] index_copy_ linear : Elapsed 0.035 ms (3.503 ms / 100) 3.630 -> 3.630 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.69% +0.69%] index_add_ reverse : Elapsed 0.036 ms (3.632 ms / 100) 3.496 -> 3.496 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.80% +0.97%] index_copy_ reverse : Elapsed 0.035 ms (3.497 ms / 100) 3.628 -> 3.627 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.69% +0.69%] index_add_ spread : Elapsed 0.036 ms (3.628 ms / 100) 3.505 -> 3.505 ( +0.00%) [ +0.00% +0.06% +0.00% / +0.00% +0.74% +0.68%] index_copy_ spread : Elapsed 0.035 ms (3.505 ms / 100) 3.624 -> 3.623 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.75% +0.72%] index_add_ strided 3 : Elapsed 0.036 ms (3.624 ms / 100) 3.494 -> 3.496 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +1.12% +0.72%] index_copy_ strided 3 : Elapsed 0.035 ms (3.495 ms / 100) 3.628 -> 3.628 ( +0.00%) [ +0.06% +0.08% +0.00% / +0.00% +0.80% +0.77%] index_add_ strided 7 : Elapsed 0.036 ms (3.630 ms / 100) 3.496 -> 3.495 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +1.00% +0.97%] index_copy_ strided 7 : Elapsed 0.035 ms (3.496 ms / 100) 3.631 -> 3.630 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.88% +0.91%] index_add_ perm : Elapsed 0.036 ms (3.631 ms / 100) 3.502 -> 3.503 ( +0.03%) [ +0.06% +0.00% +0.09% / +0.03% +0.86% +0.83%] index_copy_ perm : Elapsed 0.035 ms (3.504 ms / 100) 3.623 -> 3.624 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.80% +0.80%] index_add_ perm_sorted : Elapsed 0.036 ms (3.624 ms / 100) 3.492 -> 3.493 ( +0.03%) [ +0.03% +0.00% +0.09% / +0.03% +1.06% +1.09%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.493 ms / 100) 5.479 -> 5.480 ( +0.02%) [ +0.00% +0.07% +0.05% / +0.13% +0.04% +0.02%] index_select const : Elapsed 0.055 ms (5.479 ms / 100) 5.490 -> 5.484 ( -0.11%) [ +0.09% +0.05% +0.00% / +0.15% -0.11% +0.00%] index_select wrap : Elapsed 0.055 ms (5.495 ms / 100) 5.488 -> 5.483 ( -0.09%) [ +0.11% +0.18% +0.00% / +0.18% +0.04% -0.09%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.482 -> 5.485 ( +0.05%) [ +0.20% +0.00% +0.22% / +0.35% +0.11% +0.05%] index_select reverse : Elapsed 0.055 ms (5.493 ms / 100) 5.474 -> 5.480 ( +0.11%) [ +0.18% +0.16% +0.00% / +0.13% +0.11% +0.31%] index_select skip64 : Elapsed 0.055 ms (5.484 ms / 100) 5.481 -> 5.483 ( +0.04%) [ +0.11% +0.00% +0.07% / +0.04% +0.15% +0.11%] index_select skip256 : Elapsed 0.055 ms (5.487 ms / 100) 5.485 -> 5.492 ( +0.13%) [ +0.00% +0.05% +0.00% / +0.22% +0.13% +0.13%] index_select spread : Elapsed 0.055 ms (5.485 ms / 100) 5.486 -> 5.488 ( +0.04%) [ +0.00% +0.11% +0.09% / +0.04% +0.09% +0.05%] index_select strided 3 : Elapsed 0.055 ms (5.486 ms / 100) 5.489 -> 5.488 ( -0.02%) [ +0.07% +0.00% +0.04% / +0.07% -0.02% +0.04%] index_select strided 5 : Elapsed 0.055 ms (5.493 ms / 100) 5.484 -> 5.487 ( +0.05%) [ +0.00% +0.16% +0.05% / +0.05% +0.13% +0.11%] index_select strided 7 : Elapsed 0.055 ms (5.484 ms / 100) 5.475 -> 5.479 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.07% +0.24% +0.24%] index_select strided 8 : Elapsed 0.055 ms (5.478 ms / 100) 5.488 -> 5.484 ( -0.07%) [ +0.00% +0.15% +0.02% / -0.04% -0.04% -0.07%] index_select random : Elapsed 0.055 ms (5.488 ms / 100) 5.490 -> 5.486 ( -0.07%) [ +0.15% +0.00% +0.05% / +0.02% -0.07% -0.05%] index_select random_sorted : Elapsed 0.055 ms (5.498 ms / 100) B = [40, 4, 5, 20] (stride (5, 4000, 1, 200)) dim = 0 fill_cnt = 16 2.100 -> 2.098 ( -0.10%) [ +0.05% +0.05% +0.00% / -0.10% +0.76% +0.86%] index_fill_ const : Elapsed 0.021 ms (2.101 ms / 100) 2.096 -> 2.098 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.86% +0.86%] index_fill_ linear : Elapsed 0.021 ms (2.097 ms / 100) 2.092 -> 2.096 ( +0.19%) [ +0.24% +0.14% +0.00% / +0.19% +0.76% +0.86%] index_fill_ reverse : Elapsed 0.021 ms (2.097 ms / 100) 2.098 -> 2.101 ( +0.14%) [ +0.14% +0.00% +0.05% / +0.14% +1.14% +1.05%] index_fill_ skip64 : Elapsed 0.021 ms (2.101 ms / 100) 2.098 -> 2.101 ( +0.14%) [ +0.24% +0.00% +0.10% / +0.14% +0.95% +1.05%] index_fill_ skip256 : Elapsed 0.021 ms (2.103 ms / 100) 2.099 -> 2.097 ( -0.10%) [ +0.00% +0.05% +0.05% / -0.10% +0.95% +0.86%] index_fill_ spread : Elapsed 0.021 ms (2.099 ms / 100) 2.095 -> 2.097 ( +0.10%) [ +0.10% +0.00% +0.05% / +0.10% +0.91% +0.95%] index_fill_ strided 3 : Elapsed 0.021 ms (2.097 ms / 100) 2.097 -> 2.096 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.86% +0.95%] index_fill_ strided 5 : Elapsed 0.021 ms (2.098 ms / 100) 2.098 -> 2.097 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.71% +0.71%] index_fill_ strided 7 : Elapsed 0.021 ms (2.099 ms / 100) 2.096 -> 2.096 ( +0.00%) [ +0.10% +0.14% +0.00% / +0.00% +0.81% +0.81%] index_fill_ strided 8 : Elapsed 0.021 ms (2.098 ms / 100) 2.098 -> 2.099 ( +0.05%) [ +0.00% +0.10% +0.05% / +0.05% +0.81% +0.76%] index_fill_ strided 16 : Elapsed 0.021 ms (2.098 ms / 100) 2.096 -> 2.098 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.86% +0.76%] index_fill_ random : Elapsed 0.021 ms (2.097 ms / 100) 2.096 -> 2.100 ( +0.19%) [ +0.00% +0.10% +0.19% / +0.19% +0.95% +0.91%] index_fill_ random_sorted : Elapsed 0.021 ms (2.096 ms / 100) 2.100 -> 2.102 ( +0.10%) [ +0.00% +0.00% +0.05% / +0.10% +0.71% +0.67%] index_fill_ perm : Elapsed 0.021 ms (2.100 ms / 100) 2.100 -> 2.102 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.71% +0.76%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.102 ms / 100) B = [40, 4, 5, 20] (stride (20, 5, 1, 800)) A = [16, 4, 5, 20] (stride (20, 5, 1, 320)) dim = 0 1.425 -> 1.425 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.35% +0.35%] index_add_ linear : Elapsed 0.014 ms (1.426 ms / 100) 1.457 -> 1.457 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.14% +0.07% +0.00%] index_copy_ linear : Elapsed 0.015 ms (1.458 ms / 100) 1.424 -> 1.425 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.28% +0.21%] index_add_ reverse : Elapsed 0.014 ms (1.424 ms / 100) 1.446 -> 1.448 ( +0.14%) [ +0.14% +0.28% +0.00% / +0.14% +0.41% +0.28%] index_copy_ reverse : Elapsed 0.014 ms (1.448 ms / 100) 1.428 -> 1.428 ( +0.00%) [ +0.07% +0.35% +0.00% / +0.00% +0.42% +0.49%] index_add_ spread : Elapsed 0.014 ms (1.429 ms / 100) 1.457 -> 1.459 ( +0.14%) [ +0.00% +0.34% +0.00% / +0.14% +0.27% +0.21%] index_copy_ spread : Elapsed 0.015 ms (1.457 ms / 100) 1.428 -> 1.428 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.28% +0.35%] index_add_ strided 3 : Elapsed 0.014 ms (1.429 ms / 100) 1.450 -> 1.451 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.28% +0.28%] index_copy_ strided 3 : Elapsed 0.014 ms (1.450 ms / 100) 1.424 -> 1.422 ( -0.14%) [ +0.00% +0.07% +0.07% / -0.14% +0.35% +0.35%] index_add_ strided 7 : Elapsed 0.014 ms (1.424 ms / 100) 1.447 -> 1.446 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.41% +0.41%] index_copy_ strided 7 : Elapsed 0.014 ms (1.447 ms / 100) 1.426 -> 1.426 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.21% +0.21%] index_add_ perm : Elapsed 0.014 ms (1.428 ms / 100) 1.456 -> 1.455 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.41% +0.48%] index_copy_ perm : Elapsed 0.015 ms (1.456 ms / 100) 1.427 -> 1.427 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.07% +0.00% +0.14%] index_add_ perm_sorted : Elapsed 0.014 ms (1.428 ms / 100) 1.459 -> 1.457 ( -0.14%) [ +0.00% +0.00% +0.07% / +0.07% -0.14% -0.14%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.459 ms / 100) 3.536 -> 3.534 ( -0.06%) [ +0.00% +0.14% +0.03% / +0.06% -0.06% +0.20%] index_select const : Elapsed 0.035 ms (3.536 ms / 100) 3.557 -> 3.555 ( -0.06%) [ +0.11% +0.00% +0.03% / -0.06% -0.06% -0.06%] index_select wrap : Elapsed 0.036 ms (3.561 ms / 100) 3.555 -> 3.559 ( +0.11%) [ +0.00% +0.00% +0.08% / +0.11% +0.51% +0.59%] index_select linear : Elapsed 0.036 ms (3.555 ms / 100) 3.553 -> 3.559 ( +0.17%) [ +0.14% +0.00% +0.11% / +0.17% +0.76% +0.68%] index_select reverse : Elapsed 0.036 ms (3.558 ms / 100) 3.538 -> 3.536 ( -0.06%) [ +0.08% +0.14% +0.00% / +0.11% -0.06% +0.25%] index_select skip64 : Elapsed 0.035 ms (3.541 ms / 100) 3.536 -> 3.535 ( -0.03%) [ +0.11% +0.14% +0.00% / +0.17% +0.20% -0.03%] index_select skip256 : Elapsed 0.035 ms (3.540 ms / 100) 3.560 -> 3.548 ( -0.34%) [ +0.17% +0.00% +0.06% / +0.20% -0.34% -0.34%] index_select spread : Elapsed 0.036 ms (3.566 ms / 100) 3.561 -> 3.556 ( -0.14%) [ +0.06% +0.08% +0.00% / +0.08% -0.03% -0.14%] index_select strided 3 : Elapsed 0.036 ms (3.563 ms / 100) 3.554 -> 3.557 ( +0.08%) [ +0.39% +0.03% +0.00% / +0.11% +0.08% +0.08%] index_select strided 5 : Elapsed 0.036 ms (3.568 ms / 100) 3.562 -> 3.554 ( -0.22%) [ +0.08% +0.03% +0.00% / +0.00% -0.11% -0.22%] index_select strided 7 : Elapsed 0.036 ms (3.565 ms / 100) 3.537 -> 3.540 ( +0.08%) [ +0.00% +0.11% +0.11% / +0.08% +0.20% +0.14%] index_select strided 8 : Elapsed 0.035 ms (3.537 ms / 100) 3.566 -> 3.546 ( -0.56%) [ +0.00% +0.08% +0.00% / -0.03% -0.56% -0.42%] index_select random : Elapsed 0.036 ms (3.566 ms / 100) 3.564 -> 3.547 ( -0.48%) [ +0.06% +0.00% +0.03% / -0.06% -0.48% -0.28%] index_select random_sorted : Elapsed 0.036 ms (3.566 ms / 100) B = [40, 4, 5, 20] (stride (4, 1, 160, 800)) A = [16, 4, 5, 20] (stride (4, 1, 64, 320)) dim = 0 4.443 -> 4.441 ( -0.05%) [ +0.07% +0.00% +0.00% / -0.05% +0.70% +0.70%] index_add_ linear : Elapsed 0.044 ms (4.446 ms / 100) 4.293 -> 4.295 ( +0.05%) [ +0.00% +0.09% +0.07% / +0.05% +0.72% +0.75%] index_copy_ linear : Elapsed 0.043 ms (4.293 ms / 100) 4.442 -> 4.443 ( +0.02%) [ +0.09% +0.05% +0.00% / +0.02% +0.56% +0.74%] index_add_ reverse : Elapsed 0.044 ms (4.446 ms / 100) 4.279 -> 4.281 ( +0.05%) [ +0.07% +0.09% +0.00% / +0.05% +0.79% +0.79%] index_copy_ reverse : Elapsed 0.043 ms (4.282 ms / 100) 4.431 -> 4.428 ( -0.07%) [ +0.09% +0.18% +0.00% / -0.07% +0.77% +0.81%] index_add_ spread : Elapsed 0.044 ms (4.435 ms / 100) 4.275 -> 4.274 ( -0.02%) [ +0.16% +0.14% +0.00% / -0.02% +0.91% +0.84%] index_copy_ spread : Elapsed 0.043 ms (4.282 ms / 100) 4.445 -> 4.448 ( +0.07%) [ +0.00% +0.09% +0.04% / +0.07% +0.74% +0.83%] index_add_ strided 3 : Elapsed 0.044 ms (4.445 ms / 100) 4.280 -> 4.283 ( +0.07%) [ +0.00% +0.23% +0.07% / +0.07% +0.75% +0.79%] index_copy_ strided 3 : Elapsed 0.043 ms (4.280 ms / 100) 4.441 -> 4.444 ( +0.07%) [ +0.00% +0.00% +0.09% / +0.07% +0.74% +0.86%] index_add_ strided 7 : Elapsed 0.044 ms (4.441 ms / 100) 4.281 -> 4.283 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.77% +0.79%] index_copy_ strided 7 : Elapsed 0.043 ms (4.282 ms / 100) 4.443 -> 4.444 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.65% +0.65%] index_add_ perm : Elapsed 0.044 ms (4.443 ms / 100) 4.297 -> 4.297 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.61% +0.58%] index_copy_ perm : Elapsed 0.043 ms (4.297 ms / 100) 4.448 -> 4.452 ( +0.09%) [ +0.07% +0.00% +0.07% / +0.09% +0.70% +0.72%] index_add_ perm_sorted : Elapsed 0.045 ms (4.451 ms / 100) 4.285 -> 4.287 ( +0.05%) [ +0.02% +0.00% +0.09% / +0.05% +0.61% +0.65%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.286 ms / 100) 5.569 -> 5.566 ( -0.05%) [ +0.07% +0.09% +0.00% / -0.05% +0.05% +0.20%] index_select const : Elapsed 0.056 ms (5.573 ms / 100) 5.579 -> 5.574 ( -0.09%) [ +0.20% +0.00% +0.00% / -0.02% -0.09% +0.00%] index_select wrap : Elapsed 0.056 ms (5.590 ms / 100) 5.585 -> 5.577 ( -0.14%) [ +0.00% +0.02% +0.07% / +0.00% -0.13% -0.14%] index_select linear : Elapsed 0.056 ms (5.585 ms / 100) 5.583 -> 5.580 ( -0.05%) [ +0.00% +0.13% +0.09% / +0.09% -0.05% +0.00%] index_select reverse : Elapsed 0.056 ms (5.583 ms / 100) 5.574 -> 5.572 ( -0.04%) [ +0.11% +0.00% +0.00% / -0.04% -0.02% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.580 ms / 100) 5.569 -> 5.568 ( -0.02%) [ +0.00% +0.11% +0.00% / +0.09% -0.02% +0.07%] index_select skip256 : Elapsed 0.056 ms (5.569 ms / 100) 5.577 -> 5.573 ( -0.07%) [ +0.00% +0.13% +0.07% / +0.20% -0.05% -0.07%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.574 -> 5.573 ( -0.02%) [ +0.00% +0.04% +0.02% / -0.02% +0.09% +0.07%] index_select strided 3 : Elapsed 0.056 ms (5.574 ms / 100) 5.580 -> 5.572 ( -0.14%) [ +0.00% +0.09% +0.09% / +0.13% -0.14% -0.11%] index_select strided 5 : Elapsed 0.056 ms (5.580 ms / 100) 5.581 -> 5.582 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.02% +0.07% +0.07%] index_select strided 7 : Elapsed 0.056 ms (5.585 ms / 100) 5.566 -> 5.570 ( +0.07%) [ +0.00% +0.04% +0.04% / +0.07% +0.32% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.566 ms / 100) 5.582 -> 5.578 ( -0.07%) [ +0.00% +0.05% +0.02% / +0.00% -0.04% -0.07%] index_select random : Elapsed 0.056 ms (5.582 ms / 100) 5.579 -> 5.570 ( -0.16%) [ +0.02% +0.00% +0.23% / +0.04% -0.05% -0.16%] index_select random_sorted : Elapsed 0.056 ms (5.580 ms / 100) B = [40, 4, 5, 20] (stride (1, 40, 160, 800)) dim = 0 fill_cnt = 16 2.105 -> 2.104 ( -0.05%) [ +0.00% +0.05% +0.10% / -0.05% +0.95% +0.95%] index_fill_ const : Elapsed 0.021 ms (2.105 ms / 100) 2.103 -> 2.111 ( +0.38%) [ +0.29% +0.00% +0.52% / +0.38% +1.05% +1.09%] index_fill_ linear : Elapsed 0.021 ms (2.109 ms / 100) 2.116 -> 2.119 ( +0.14%) [ +0.00% +0.00% +0.05% / +0.14% +0.43% +0.61%] index_fill_ reverse : Elapsed 0.021 ms (2.116 ms / 100) 2.111 -> 2.108 ( -0.14%) [ +0.00% +0.14% +0.09% / -0.14% +0.43% +0.57%] index_fill_ skip64 : Elapsed 0.021 ms (2.111 ms / 100) 2.106 -> 2.110 ( +0.19%) [ +0.00% +0.14% +0.14% / +0.19% +1.00% +0.90%] index_fill_ skip256 : Elapsed 0.021 ms (2.106 ms / 100) 2.104 -> 2.113 ( +0.43%) [ +0.24% +0.00% +0.14% / +0.43% +1.00% +0.90%] index_fill_ spread : Elapsed 0.021 ms (2.109 ms / 100) 2.107 -> 2.109 ( +0.09%) [ +0.05% +0.00% +0.05% / +0.09% +0.85% +0.66%] index_fill_ strided 3 : Elapsed 0.021 ms (2.108 ms / 100) 2.103 -> 2.103 ( +0.00%) [ +0.10% +0.00% +0.14% / +0.00% +1.19% +0.76%] index_fill_ strided 5 : Elapsed 0.021 ms (2.105 ms / 100) 2.104 -> 2.107 ( +0.14%) [ +0.00% +0.29% +0.19% / +0.14% +0.57% +0.57%] index_fill_ strided 7 : Elapsed 0.021 ms (2.104 ms / 100) 2.103 -> 2.103 ( +0.00%) [ +0.33% +0.00% +0.05% / +0.00% +0.90% +0.81%] index_fill_ strided 8 : Elapsed 0.021 ms (2.110 ms / 100) 2.102 -> 2.105 ( +0.14%) [ +0.14% +0.00% +0.24% / +0.14% +0.95% +0.81%] index_fill_ strided 16 : Elapsed 0.021 ms (2.105 ms / 100) 2.101 -> 2.101 ( +0.00%) [ +0.00% +0.05% +0.19% / +0.00% +0.90% +0.90%] index_fill_ random : Elapsed 0.021 ms (2.101 ms / 100) 2.104 -> 2.112 ( +0.38%) [ +0.00% +0.14% +0.14% / +0.38% +0.62% +0.71%] index_fill_ random_sorted : Elapsed 0.021 ms (2.104 ms / 100) 2.109 -> 2.110 ( +0.05%) [ +0.00% +0.00% +0.19% / +0.05% +0.76% +0.57%] index_fill_ perm : Elapsed 0.021 ms (2.109 ms / 100) 2.114 -> 2.115 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.14% +0.19%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.114 ms / 100) out_shape = [16, 40, 5, 20] in_shape = [16, 4, 5, 20] idx_dim = 1 B = [16, 40, 5, 20] (stride (4000, 1, 800, 40)) A = [16, 4, 5, 20] (stride (400, 20, 80, 1)) dim = 1 1.231 -> 1.232 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.57% +0.49%] index_add_ linear : Elapsed 0.012 ms (1.233 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_copy_ linear : Elapsed 0.012 ms (1.194 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.57% +0.73%] index_add_ reverse : Elapsed 0.012 ms (1.233 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.59% +0.50%] index_copy_ reverse : Elapsed 0.012 ms (1.194 ms / 100) 1.243 -> 1.243 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.16% +0.08%] index_add_ spread : Elapsed 0.012 ms (1.243 ms / 100) 1.204 -> 1.203 ( -0.08%) [ +0.00% +0.00% +0.25% / -0.08% +0.25% +0.25%] index_copy_ spread : Elapsed 0.012 ms (1.204 ms / 100) 1.234 -> 1.235 ( +0.08%) [ +0.08% +0.00% +0.49% / +0.08% +0.65% +0.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.235 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.00% +0.17% / +0.08% +0.67% +0.75%] index_copy_ strided 3 : Elapsed 0.012 ms (1.196 ms / 100) 1.236 -> 1.237 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.65% +0.65%] index_add_ strided 7 : Elapsed 0.012 ms (1.238 ms / 100) 1.198 -> 1.198 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.50%] index_copy_ strided 7 : Elapsed 0.012 ms (1.198 ms / 100) 1.239 -> 1.238 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.24% +0.40%] index_add_ perm : Elapsed 0.012 ms (1.239 ms / 100) 1.200 -> 1.200 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.33% +0.50%] index_copy_ perm : Elapsed 0.012 ms (1.200 ms / 100) 1.238 -> 1.238 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.32% +0.40%] index_add_ perm_sorted : Elapsed 0.012 ms (1.239 ms / 100) 1.200 -> 1.200 ( +0.00%) [ +0.08% +0.17% +0.00% / +0.00% +0.33% +0.33%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.201 ms / 100) 8.759 -> 8.762 ( +0.03%) [ +0.15% +0.00% +0.06% / +0.21% +0.03% +0.24%] index_select const : Elapsed 0.088 ms (8.772 ms / 100) 8.783 -> 8.775 ( -0.09%) [ +0.08% +0.00% +0.08% / -0.02% -0.02% -0.09%] index_select wrap : Elapsed 0.088 ms (8.790 ms / 100) 8.777 -> 8.774 ( -0.03%) [ +0.09% +0.00% +0.05% / -0.01% +0.27% -0.03%] index_select linear : Elapsed 0.088 ms (8.785 ms / 100) 8.794 -> 8.785 ( -0.10%) [ +0.19% +0.00% +0.03% / +0.07% +0.08% -0.10%] index_select reverse : Elapsed 0.088 ms (8.811 ms / 100) 8.754 -> 8.764 ( +0.11%) [ +0.10% +0.00% +0.11% / +0.14% +0.27% +0.11%] index_select skip64 : Elapsed 0.088 ms (8.763 ms / 100) 8.758 -> 8.755 ( -0.03%) [ +0.05% +0.00% +0.08% / +0.14% +0.13% -0.03%] index_select skip256 : Elapsed 0.088 ms (8.762 ms / 100) 8.784 -> 8.780 ( -0.05%) [ +0.08% +0.18% +0.00% / +0.15% -0.05% +0.15%] index_select spread : Elapsed 0.088 ms (8.791 ms / 100) 8.789 -> 8.774 ( -0.17%) [ +0.00% +0.06% +0.00% / -0.17% -0.05% -0.03%] index_select strided 3 : Elapsed 0.088 ms (8.789 ms / 100) 8.776 -> 8.779 ( +0.03%) [ +0.00% +0.33% +0.13% / +0.03% +0.19% +0.14%] index_select random : Elapsed 0.088 ms (8.776 ms / 100) 8.781 -> 8.780 ( -0.01%) [ +0.07% +0.00% +0.17% / +0.17% -0.01% -0.01%] index_select random_sorted : Elapsed 0.088 ms (8.787 ms / 100) B = [16, 40, 5, 20] (stride (4000, 5, 1, 200)) A = [16, 4, 5, 20] (stride (400, 100, 20, 1)) dim = 1 1.227 -> 1.226 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.49% +0.49%] index_add_ linear : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.67% +0.76%] index_copy_ linear : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.41% +0.41%] index_add_ reverse : Elapsed 0.012 ms (1.228 ms / 100) 1.190 -> 1.189 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.42% +0.42%] index_copy_ reverse : Elapsed 0.012 ms (1.190 ms / 100) 1.232 -> 1.231 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.24% +0.16%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.34% / +0.00% +0.34% +0.34%] index_copy_ spread : Elapsed 0.012 ms (1.193 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.49% +0.41%] index_add_ strided 3 : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.190 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.59%] index_copy_ strided 3 : Elapsed 0.012 ms (1.190 ms / 100) 1.230 -> 1.231 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.33% +0.49%] index_add_ strided 7 : Elapsed 0.012 ms (1.232 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.00% +0.08% +0.17% / +0.17% +0.42% +0.50%] index_copy_ strided 7 : Elapsed 0.012 ms (1.191 ms / 100) 1.229 -> 1.230 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.33% +0.41%] index_add_ perm : Elapsed 0.012 ms (1.230 ms / 100) 1.190 -> 1.193 ( +0.25%) [ +0.17% +0.17% +0.00% / +0.25% +0.50% +0.50%] index_copy_ perm : Elapsed 0.012 ms (1.192 ms / 100) 1.228 -> 1.229 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.49% +0.57%] index_add_ perm_sorted : Elapsed 0.012 ms (1.230 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.42% +0.42%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.191 ms / 100) 8.767 -> 8.765 ( -0.02%) [ +0.00% +0.14% +0.03% / -0.02% -0.01% +0.09%] index_select const : Elapsed 0.088 ms (8.767 ms / 100) 8.780 -> 8.780 ( +0.00%) [ +0.00% +0.30% +0.15% / +0.08% +0.27% +0.00%] index_select wrap : Elapsed 0.088 ms (8.780 ms / 100) 8.770 -> 8.778 ( +0.09%) [ +0.08% +0.00% +0.16% / +0.09% +0.17% +0.22%] index_select linear : Elapsed 0.088 ms (8.777 ms / 100) 8.777 -> 8.777 ( +0.00%) [ +0.15% +0.17% +0.00% / +0.00% +0.40% +0.49%] index_select reverse : Elapsed 0.088 ms (8.790 ms / 100) 8.758 -> 8.750 ( -0.09%) [ +0.03% +0.10% +0.00% / -0.09% +0.03% +0.21%] index_select skip64 : Elapsed 0.088 ms (8.761 ms / 100) 8.752 -> 8.765 ( +0.15%) [ +0.00% +0.23% +0.01% / +0.15% +0.31% +0.22%] index_select skip256 : Elapsed 0.088 ms (8.752 ms / 100) 8.777 -> 8.775 ( -0.02%) [ +0.00% +0.11% +0.07% / -0.02% +0.01% -0.01%] index_select spread : Elapsed 0.088 ms (8.777 ms / 100) 8.785 -> 8.780 ( -0.06%) [ +0.00% +0.10% +0.05% / +0.16% +0.14% -0.06%] index_select strided 3 : Elapsed 0.088 ms (8.785 ms / 100) 8.794 -> 8.791 ( -0.03%) [ +0.00% +0.10% +0.06% / -0.03% +0.15% +0.03%] index_select random : Elapsed 0.088 ms (8.794 ms / 100) 8.771 -> 8.779 ( +0.09%) [ +0.09% +0.11% +0.00% / +0.09% +0.18% +0.29%] index_select random_sorted : Elapsed 0.088 ms (8.779 ms / 100) B = [16, 40, 5, 20] (stride (4000, 1, 40, 200)) A = [16, 4, 5, 20] (stride (400, 20, 80, 1)) dim = 1 1.314 -> 1.318 ( +0.30%) [ +0.23% +0.00% +0.23% / +0.30% +0.38% +0.30%] index_add_ linear : Elapsed 0.013 ms (1.317 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.47% +0.47%] index_copy_ linear : Elapsed 0.013 ms (1.276 ms / 100) 1.314 -> 1.314 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.30% +0.30%] index_add_ reverse : Elapsed 0.013 ms (1.314 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.00% +0.16% / +0.08% +0.63% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.323 -> 1.325 ( +0.15%) [ +0.15% +0.00% +0.08% / +0.15% +0.23% +0.38%] index_add_ spread : Elapsed 0.013 ms (1.325 ms / 100) 1.284 -> 1.286 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.16% +0.23%] index_copy_ spread : Elapsed 0.013 ms (1.285 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.38% +0.38%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.70% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.277 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.15% +0.08% +0.00% / +0.00% +0.61% +0.46%] index_add_ strided 7 : Elapsed 0.013 ms (1.319 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.55% +0.55%] index_copy_ strided 7 : Elapsed 0.013 ms (1.280 ms / 100) 1.324 -> 1.327 ( +0.23%) [ +0.30% +0.00% +0.08% / +0.30% +0.23% +0.23%] index_add_ perm : Elapsed 0.013 ms (1.328 ms / 100) 1.285 -> 1.287 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.23% +0.23%] index_copy_ perm : Elapsed 0.013 ms (1.285 ms / 100) 1.324 -> 1.323 ( -0.08%) [ +0.00% +0.15% +0.08% / -0.08% +0.15% +0.15%] index_add_ perm_sorted : Elapsed 0.013 ms (1.324 ms / 100) 1.283 -> 1.285 ( +0.16%) [ +0.16% +0.31% +0.00% / +0.16% +0.31% +0.39%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.285 ms / 100) 9.223 -> 9.222 ( -0.01%) [ +0.00% +0.03% +0.21% / +0.08% -0.01% +0.16%] index_select const : Elapsed 0.092 ms (9.223 ms / 100) 9.249 -> 9.247 ( -0.02%) [ +0.08% +0.21% +0.00% / -0.02% +0.00% +0.09%] index_select wrap : Elapsed 0.093 ms (9.256 ms / 100) 9.224 -> 9.243 ( +0.21%) [ +0.00% +0.23% +0.15% / +0.21% +0.38% +0.42%] index_select linear : Elapsed 0.092 ms (9.224 ms / 100) 9.257 -> 9.261 ( +0.04%) [ +0.00% +0.08% +0.06% / +0.08% +0.06% +0.04%] index_select reverse : Elapsed 0.093 ms (9.257 ms / 100) 9.228 -> 9.222 ( -0.07%) [ +0.05% +0.00% +0.17% / -0.07% +0.26% +0.31%] index_select skip64 : Elapsed 0.092 ms (9.233 ms / 100) 9.223 -> 9.223 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.20% +0.00% +0.12%] index_select skip256 : Elapsed 0.092 ms (9.236 ms / 100) 9.243 -> 9.244 ( +0.01%) [ +0.25% +0.00% +0.09% / +0.01% +0.08% +0.13%] index_select spread : Elapsed 0.093 ms (9.266 ms / 100) 9.246 -> 9.239 ( -0.08%) [ +0.08% +0.00% +0.03% / -0.01% +0.05% -0.08%] index_select strided 3 : Elapsed 0.093 ms (9.253 ms / 100) 9.227 -> 9.245 ( +0.20%) [ +0.27% +0.00% +0.23% / +0.20% +0.27% +0.26%] index_select random : Elapsed 0.093 ms (9.252 ms / 100) 9.243 -> 9.250 ( +0.08%) [ +0.13% +0.08% +0.00% / +0.08% +0.23% +0.10%] index_select random_sorted : Elapsed 0.093 ms (9.255 ms / 100) B = [16, 40, 5, 20] (stride (5, 1600, 1, 80)) A = [16, 4, 5, 20] (stride (20, 1600, 320, 1)) dim = 1 1.308 -> 1.312 ( +0.31%) [ +0.15% +0.23% +0.00% / +0.31% +0.69% +0.84%] index_add_ linear : Elapsed 0.013 ms (1.310 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.63% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.271 ms / 100) 1.310 -> 1.312 ( +0.15%) [ +0.08% +0.92% +0.00% / +0.15% +0.61% +0.69%] index_add_ reverse : Elapsed 0.013 ms (1.311 ms / 100) 1.271 -> 1.273 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.55% +0.47%] index_copy_ reverse : Elapsed 0.013 ms (1.272 ms / 100) 1.309 -> 1.310 ( +0.08%) [ +0.31% +0.15% +0.00% / +0.08% +0.69% +0.76%] index_add_ spread : Elapsed 0.013 ms (1.313 ms / 100) 1.271 -> 1.274 ( +0.24%) [ +0.08% +0.08% +0.00% / +0.24% +0.55% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.272 ms / 100) 1.310 -> 1.310 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.38% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.311 ms / 100) 1.271 -> 1.273 ( +0.16%) [ +0.00% +0.16% +0.08% / +0.16% +0.55% +0.47%] index_copy_ strided 3 : Elapsed 0.013 ms (1.271 ms / 100) 1.309 -> 1.310 ( +0.08%) [ +0.15% +0.23% +0.00% / +0.08% +0.61% +0.69%] index_add_ strided 7 : Elapsed 0.013 ms (1.311 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_copy_ strided 7 : Elapsed 0.013 ms (1.272 ms / 100) 1.309 -> 1.312 ( +0.23%) [ +0.08% +0.31% +0.00% / +0.23% +0.76% +0.69%] index_add_ perm : Elapsed 0.013 ms (1.310 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.39% +0.31%] index_copy_ perm : Elapsed 0.013 ms (1.274 ms / 100) 1.310 -> 1.312 ( +0.15%) [ +0.00% +0.15% +0.46% / +0.15% +0.53% +0.69%] index_add_ perm_sorted : Elapsed 0.013 ms (1.310 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.47% +0.47%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.271 ms / 100) 9.126 -> 9.135 ( +0.10%) [ +0.01% +0.21% +0.00% / +0.20% +0.10% +0.22%] index_select const : Elapsed 0.091 ms (9.127 ms / 100) 9.166 -> 9.168 ( +0.02%) [ +0.17% +0.00% +0.22% / +0.11% +0.14% +0.02%] index_select wrap : Elapsed 0.092 ms (9.182 ms / 100) 9.171 -> 9.159 ( -0.13%) [ +0.04% +0.00% +0.13% / -0.13% +0.02% -0.13%] index_select linear : Elapsed 0.092 ms (9.175 ms / 100) 9.177 -> 9.166 ( -0.12%) [ +0.00% +0.03% +0.27% / -0.05% -0.12% +0.00%] index_select reverse : Elapsed 0.092 ms (9.177 ms / 100) 9.138 -> 9.132 ( -0.07%) [ +0.00% +0.11% +0.20% / -0.07% +0.08% +0.08%] index_select skip64 : Elapsed 0.091 ms (9.138 ms / 100) 9.140 -> 9.140 ( +0.00%) [ +0.09% +0.00% +0.02% / +0.00% +0.16% +0.00%] index_select skip256 : Elapsed 0.091 ms (9.148 ms / 100) 9.167 -> 9.165 ( -0.02%) [ +0.09% +0.00% +0.15% / +0.07% +0.07% -0.02%] index_select spread : Elapsed 0.092 ms (9.175 ms / 100) 9.176 -> 9.173 ( -0.03%) [ +0.00% +0.27% +0.28% / +0.03% +0.16% -0.03%] index_select strided 3 : Elapsed 0.092 ms (9.176 ms / 100) 9.170 -> 9.168 ( -0.02%) [ +0.10% +0.08% +0.00% / +0.24% -0.02% +0.24%] index_select random : Elapsed 0.092 ms (9.179 ms / 100) 9.172 -> 9.162 ( -0.11%) [ +0.09% +0.00% +0.12% / +0.16% -0.04% -0.11%] index_select random_sorted : Elapsed 0.092 ms (9.180 ms / 100) B = [16, 40, 5, 20] (stride (800, 1, 12800, 40)) A = [16, 4, 5, 20] (stride (4, 1, 64, 320)) dim = 1 1.410 -> 1.411 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.71% +0.64%] index_add_ linear : Elapsed 0.014 ms (1.410 ms / 100) 1.365 -> 1.365 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.59% +0.51%] index_copy_ linear : Elapsed 0.014 ms (1.365 ms / 100) 1.408 -> 1.410 ( +0.14%) [ +0.28% +0.07% +0.00% / +0.14% +0.78% +0.64%] index_add_ reverse : Elapsed 0.014 ms (1.412 ms / 100) 1.364 -> 1.365 ( +0.07%) [ +0.15% +0.00% +0.00% / +0.07% +0.73% +0.59%] index_copy_ reverse : Elapsed 0.014 ms (1.366 ms / 100) 1.418 -> 1.421 ( +0.21%) [ +0.14% +0.00% +0.14% / +0.21% +0.28% +0.49%] index_add_ spread : Elapsed 0.014 ms (1.420 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.15% +0.00% +0.15% / +0.07% +0.44% +0.44%] index_copy_ spread : Elapsed 0.014 ms (1.376 ms / 100) 1.410 -> 1.412 ( +0.14%) [ +0.21% +0.14% +0.00% / +0.14% +0.85% +0.85%] index_add_ strided 3 : Elapsed 0.014 ms (1.413 ms / 100) 1.367 -> 1.367 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.66% +0.95%] index_copy_ strided 3 : Elapsed 0.014 ms (1.367 ms / 100) 1.412 -> 1.413 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.64% +0.71%] index_add_ strided 7 : Elapsed 0.014 ms (1.413 ms / 100) 1.369 -> 1.372 ( +0.22%) [ +0.07% +0.15% +0.00% / +0.22% +0.80% +0.58%] index_copy_ strided 7 : Elapsed 0.014 ms (1.370 ms / 100) 1.414 -> 1.416 ( +0.14%) [ +0.21% +0.14% +0.00% / +0.14% +0.57% +0.71%] index_add_ perm : Elapsed 0.014 ms (1.417 ms / 100) 1.369 -> 1.372 ( +0.22%) [ +0.07% +0.15% +0.00% / +0.22% +0.58% +0.51%] index_copy_ perm : Elapsed 0.014 ms (1.370 ms / 100) 1.414 -> 1.415 ( +0.07%) [ +0.21% +0.21% +0.00% / +0.07% +0.57% +0.42%] index_add_ perm_sorted : Elapsed 0.014 ms (1.417 ms / 100) 1.371 -> 1.370 ( -0.07%) [ +0.00% +0.44% +0.07% / -0.07% +0.44% +0.66%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.371 ms / 100) 9.264 -> 9.278 ( +0.15%) [ +0.05% +0.10% +0.00% / +0.15% +0.15% +0.26%] index_select const : Elapsed 0.093 ms (9.269 ms / 100) 9.266 -> 9.269 ( +0.03%) [ +0.04% +0.00% +0.14% / +0.26% +0.03% +0.10%] index_select wrap : Elapsed 0.093 ms (9.270 ms / 100) 9.262 -> 9.265 ( +0.03%) [ +0.18% +0.33% +0.00% / +0.03% +0.17% +0.12%] index_select linear : Elapsed 0.093 ms (9.279 ms / 100) 9.283 -> 9.273 ( -0.11%) [ +0.00% +0.17% +0.00% / -0.04% -0.11% -0.10%] index_select reverse : Elapsed 0.093 ms (9.283 ms / 100) 9.258 -> 9.258 ( +0.00%) [ +0.24% +0.00% +0.17% / +0.00% +0.10% +0.36%] index_select skip64 : Elapsed 0.093 ms (9.280 ms / 100) 9.266 -> 9.272 ( +0.06%) [ +0.03% +0.00% +0.17% / +0.06% +0.17% +0.31%] index_select skip256 : Elapsed 0.093 ms (9.269 ms / 100) 9.264 -> 9.267 ( +0.03%) [ +0.17% +0.05% +0.00% / +0.03% +0.16% +0.29%] index_select spread : Elapsed 0.093 ms (9.280 ms / 100) 9.262 -> 9.275 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.16% +0.30%] index_select strided 3 : Elapsed 0.093 ms (9.275 ms / 100) 9.250 -> 9.266 ( +0.17%) [ +0.00% +0.26% +0.00% / +0.17% +0.48% +0.23%] index_select random : Elapsed 0.092 ms (9.250 ms / 100) 9.263 -> 9.264 ( +0.01%) [ +0.00% +0.01% +0.06% / +0.01% +0.05% +0.03%] index_select random_sorted : Elapsed 0.093 ms (9.263 ms / 100) B = [16, 40, 5, 20] (stride (40, 1, 12800, 640)) A = [16, 4, 5, 20] (stride (1, 80, 16, 320)) dim = 1 1.322 -> 1.320 ( -0.15%) [ +0.15% +0.00% +0.08% / -0.15% +0.68% +0.30%] index_add_ linear : Elapsed 0.013 ms (1.324 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.16% +0.00% +0.16% / +0.08% +0.78% +0.39%] index_copy_ linear : Elapsed 0.013 ms (1.280 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.53% +0.61%] index_add_ reverse : Elapsed 0.013 ms (1.321 ms / 100) 1.278 -> 1.280 ( +0.16%) [ +0.00% +0.39% +0.00% / +0.16% +0.55% +0.39%] index_copy_ reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.331 -> 1.333 ( +0.15%) [ +0.38% +0.15% +0.00% / +0.15% +0.23% +0.38%] index_add_ spread : Elapsed 0.013 ms (1.336 ms / 100) 1.286 -> 1.286 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.08% +0.08%] index_copy_ spread : Elapsed 0.013 ms (1.287 ms / 100) 1.323 -> 1.324 ( +0.08%) [ +0.00% +0.23% +0.08% / +0.08% +0.53% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.323 ms / 100) 1.281 -> 1.284 ( +0.23%) [ +0.16% +0.16% +0.00% / +0.23% +0.55% +0.70%] index_copy_ strided 3 : Elapsed 0.013 ms (1.283 ms / 100) 1.319 -> 1.320 ( +0.08%) [ +0.00% +0.15% +0.08% / +0.08% +0.23% +0.45%] index_add_ strided 7 : Elapsed 0.013 ms (1.319 ms / 100) 1.276 -> 1.280 ( +0.31%) [ +0.00% +0.16% +0.00% / +0.31% +0.31% +0.47%] index_copy_ strided 7 : Elapsed 0.013 ms (1.276 ms / 100) 1.328 -> 1.329 ( +0.08%) [ +0.00% +0.15% +0.08% / +0.15% +0.08% +0.30%] index_add_ perm : Elapsed 0.013 ms (1.328 ms / 100) 1.282 -> 1.286 ( +0.31%) [ +0.16% +0.16% +0.00% / +0.39% +0.31% +0.31%] index_copy_ perm : Elapsed 0.013 ms (1.284 ms / 100) 1.326 -> 1.331 ( +0.38%) [ +0.15% +0.23% +0.00% / +0.53% +0.38% +0.90%] index_add_ perm_sorted : Elapsed 0.013 ms (1.328 ms / 100) 1.283 -> 1.287 ( +0.31%) [ +0.00% +0.00% +0.00% / +0.31% +0.31% +0.39%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.283 ms / 100) 8.794 -> 8.797 ( +0.03%) [ +0.18% +0.00% +0.18% / +0.10% +0.03% +0.17%] index_select const : Elapsed 0.088 ms (8.810 ms / 100) 8.814 -> 8.809 ( -0.06%) [ +0.00% +0.24% +0.03% / +0.24% +0.22% -0.06%] index_select wrap : Elapsed 0.088 ms (8.814 ms / 100) 8.810 -> 8.807 ( -0.03%) [ +0.00% +0.11% +0.31% / +0.07% -0.03% +0.15%] index_select linear : Elapsed 0.088 ms (8.810 ms / 100) 8.812 -> 8.813 ( +0.01%) [ +0.00% +0.06% +0.16% / +0.01% +0.03% +0.16%] index_select reverse : Elapsed 0.088 ms (8.812 ms / 100) 8.795 -> 8.800 ( +0.06%) [ +0.08% +0.27% +0.00% / +0.06% +0.09% +0.20%] index_select skip64 : Elapsed 0.088 ms (8.802 ms / 100) 8.795 -> 8.794 ( -0.01%) [ +0.00% +0.02% +0.07% / +0.03% +0.22% -0.01%] index_select skip256 : Elapsed 0.088 ms (8.795 ms / 100) 8.825 -> 8.810 ( -0.17%) [ +0.11% +0.06% +0.00% / +0.03% +0.26% -0.17%] index_select spread : Elapsed 0.088 ms (8.835 ms / 100) 8.817 -> 8.816 ( -0.01%) [ +0.00% +0.27% +0.05% / +0.17% -0.01% +0.26%] index_select strided 3 : Elapsed 0.088 ms (8.817 ms / 100) 8.810 -> 8.817 ( +0.08%) [ +0.00% +0.14% +0.20% / +0.17% +0.08% +0.18%] index_select random : Elapsed 0.088 ms (8.810 ms / 100) 8.811 -> 8.813 ( +0.02%) [ +0.00% +0.32% +0.17% / +0.02% +0.31% +0.31%] index_select random_sorted : Elapsed 0.088 ms (8.811 ms / 100) B = [16, 40, 5, 20] (stride (200, 5, 1, 3200)) A = [16, 4, 5, 20] (stride (20, 1600, 320, 1)) dim = 1 1.308 -> 1.310 ( +0.15%) [ +0.08% +0.31% +0.00% / +0.15% +0.69% +0.76%] index_add_ linear : Elapsed 0.013 ms (1.309 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.271 ms / 100) 1.309 -> 1.310 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.76% +0.53%] index_add_ reverse : Elapsed 0.013 ms (1.309 ms / 100) 1.270 -> 1.271 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.63% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.270 ms / 100) 1.312 -> 1.313 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.46% +0.38%] index_add_ spread : Elapsed 0.013 ms (1.314 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.47% +0.55%] index_copy_ spread : Elapsed 0.013 ms (1.273 ms / 100) 1.309 -> 1.308 ( -0.08%) [ +0.00% +0.61% +0.00% / -0.08% +0.61% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.309 ms / 100) 1.270 -> 1.270 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.63% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.272 ms / 100) 1.311 -> 1.312 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.61% +0.61%] index_add_ strided 7 : Elapsed 0.013 ms (1.311 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.47% +0.63%] index_copy_ strided 7 : Elapsed 0.013 ms (1.273 ms / 100) 1.308 -> 1.309 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.76% +0.92%] index_add_ perm : Elapsed 0.013 ms (1.310 ms / 100) 1.269 -> 1.270 ( +0.08%) [ +0.16% +0.24% +0.00% / +0.08% +0.71% +0.87%] index_copy_ perm : Elapsed 0.013 ms (1.271 ms / 100) 1.309 -> 1.309 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.69% +0.76%] index_add_ perm_sorted : Elapsed 0.013 ms (1.309 ms / 100) 1.270 -> 1.270 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.71% +0.71%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.271 ms / 100) 9.183 -> 9.184 ( +0.01%) [ +0.00% +0.21% +0.02% / +0.01% +0.25% +0.08%] index_select const : Elapsed 0.092 ms (9.183 ms / 100) 9.216 -> 9.216 ( +0.00%) [ +0.05% +0.04% +0.00% / +0.00% +0.41% +0.12%] index_select wrap : Elapsed 0.092 ms (9.221 ms / 100) 9.207 -> 9.205 ( -0.02%) [ +0.00% +0.09% +0.12% / -0.02% +0.17% +0.26%] index_select linear : Elapsed 0.092 ms (9.207 ms / 100) 9.209 -> 9.211 ( +0.02%) [ +0.29% +0.10% +0.00% / +0.04% +0.02% +0.05%] index_select reverse : Elapsed 0.092 ms (9.236 ms / 100) 9.174 -> 9.188 ( +0.15%) [ +0.19% +0.00% +0.14% / +0.17% +0.26% +0.15%] index_select skip64 : Elapsed 0.092 ms (9.191 ms / 100) 9.166 -> 9.189 ( +0.25%) [ +0.25% +0.24% +0.00% / +0.26% +0.25% +0.44%] index_select skip256 : Elapsed 0.092 ms (9.189 ms / 100) 9.207 -> 9.203 ( -0.04%) [ +0.17% +0.00% +0.17% / -0.04% +0.12% +0.37%] index_select spread : Elapsed 0.092 ms (9.223 ms / 100) 9.220 -> 9.217 ( -0.03%) [ +0.00% +0.11% +0.07% / -0.03% +0.09% +0.28%] index_select strided 3 : Elapsed 0.092 ms (9.220 ms / 100) 9.218 -> 9.219 ( +0.01%) [ +0.00% +0.08% +0.04% / +0.01% +0.08% +0.28%] index_select random : Elapsed 0.092 ms (9.218 ms / 100) 9.202 -> 9.204 ( +0.02%) [ +0.28% +0.00% +0.23% / +0.02% +0.15% +0.23%] index_select random_sorted : Elapsed 0.092 ms (9.228 ms / 100) B = [16, 40, 5, 20] (stride (5, 80, 1, 3200)) A = [16, 4, 5, 20] (stride (400, 20, 80, 1)) dim = 1 1.308 -> 1.310 ( +0.15%) [ +0.08% +0.08% +0.00% / +0.15% +0.84% +0.84%] index_add_ linear : Elapsed 0.013 ms (1.309 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.271 ms / 100) 1.311 -> 1.311 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.38% +0.46%] index_add_ reverse : Elapsed 0.013 ms (1.312 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.47% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.310 -> 1.313 ( +0.23%) [ +0.08% +0.00% +0.31% / +0.23% +0.53% +0.69%] index_add_ spread : Elapsed 0.013 ms (1.311 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.08% +0.71% +0.00% / +0.00% +0.55% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.272 ms / 100) 1.310 -> 1.310 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.53% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.310 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_copy_ strided 3 : Elapsed 0.013 ms (1.272 ms / 100) 1.310 -> 1.308 ( -0.15%) [ +0.08% +0.00% +0.00% / -0.15% +0.53% +0.69%] index_add_ strided 7 : Elapsed 0.013 ms (1.311 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_copy_ strided 7 : Elapsed 0.013 ms (1.272 ms / 100) 1.312 -> 1.312 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.46% +0.53%] index_add_ perm : Elapsed 0.013 ms (1.313 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.47% +0.55%] index_copy_ perm : Elapsed 0.013 ms (1.271 ms / 100) 1.312 -> 1.310 ( -0.15%) [ +0.00% +0.08% +0.30% / -0.15% +0.23% +0.61%] index_add_ perm_sorted : Elapsed 0.013 ms (1.312 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.24% +0.24% / +0.00% +0.55% +0.47%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.271 ms / 100) 9.159 -> 9.146 ( -0.14%) [ +0.10% +0.00% +0.03% / -0.14% -0.09% -0.10%] index_select const : Elapsed 0.092 ms (9.168 ms / 100) 9.186 -> 9.185 ( -0.01%) [ +0.00% +0.00% +0.03% / +0.13% -0.01% +0.07%] index_select wrap : Elapsed 0.092 ms (9.186 ms / 100) 9.160 -> 9.159 ( -0.01%) [ +0.00% +0.19% +0.23% / -0.01% +0.21% +0.02%] index_select linear : Elapsed 0.092 ms (9.160 ms / 100) 9.179 -> 9.171 ( -0.09%) [ +0.04% +0.10% +0.00% / -0.09% +0.25% +0.02%] index_select reverse : Elapsed 0.092 ms (9.183 ms / 100) 9.162 -> 9.150 ( -0.13%) [ +0.13% +0.00% +0.02% / -0.13% +0.00% -0.01%] index_select skip64 : Elapsed 0.092 ms (9.174 ms / 100) 9.153 -> 9.159 ( +0.07%) [ +0.10% +0.13% +0.00% / +0.19% +0.20% +0.07%] index_select skip256 : Elapsed 0.092 ms (9.162 ms / 100) 9.186 -> 9.164 ( -0.24%) [ +0.00% +0.14% +0.01% / -0.10% -0.14% -0.24%] index_select spread : Elapsed 0.092 ms (9.186 ms / 100) 9.163 -> 9.173 ( +0.11%) [ +0.25% +0.29% +0.00% / +0.38% +0.22% +0.11%] index_select strided 3 : Elapsed 0.092 ms (9.186 ms / 100) 9.159 -> 9.166 ( +0.08%) [ +0.19% +0.21% +0.00% / +0.08% +0.53% +0.19%] index_select random : Elapsed 0.092 ms (9.176 ms / 100) 9.175 -> 9.165 ( -0.11%) [ +0.11% +0.21% +0.00% / -0.11% +0.01% -0.05%] index_select random_sorted : Elapsed 0.092 ms (9.185 ms / 100) out_shape = [16, 4, 40, 20] in_shape = [16, 4, 5, 20] idx_dim = 2 B = [16, 4, 40, 20] (stride (3200, 800, 1, 40)) A = [16, 4, 5, 20] (stride (100, 1600, 1, 5)) dim = 2 1.421 -> 1.422 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.63% +0.49%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.36% +0.22%] index_copy_ linear : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.49% +0.56%] index_add_ reverse : Elapsed 0.014 ms (1.421 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.29% +0.29%] index_copy_ reverse : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.49% +0.49%] index_add_ spread : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.384 ( +0.36%) [ +0.00% +0.15% +0.07% / +0.36% +0.44% +0.36%] index_copy_ spread : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.49% +0.49%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.384 ( +0.29%) [ +0.07% +0.00% +0.43% / +0.29% +0.36% +0.43%] index_copy_ strided 3 : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.49% +0.56%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.36% +0.58%] index_copy_ strided 7 : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.56% +0.56%] index_add_ perm : Elapsed 0.014 ms (1.421 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.00% +0.07% +0.29% / +0.14% +0.36% +0.43%] index_copy_ perm : Elapsed 0.014 ms (1.380 ms / 100) 1.431 -> 1.432 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.56% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.431 ms / 100) 1.386 -> 1.385 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.51% +0.43%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.387 ms / 100) 8.239 -> 8.270 ( +0.38%) [ +0.00% +0.24% +0.24% / +0.51% +0.38% +0.44%] index_select const : Elapsed 0.082 ms (8.239 ms / 100) 8.245 -> 8.257 ( +0.15%) [ +0.11% +0.00% +0.32% / +0.15% +0.23% +0.27%] index_select wrap : Elapsed 0.083 ms (8.254 ms / 100) 8.250 -> 8.249 ( -0.01%) [ +0.00% +0.32% +0.35% / -0.01% +0.55% +0.23%] index_select linear : Elapsed 0.082 ms (8.250 ms / 100) 8.252 -> 8.250 ( -0.02%) [ +0.13% +0.00% +0.12% / +0.28% -0.02% +0.12%] index_select reverse : Elapsed 0.083 ms (8.263 ms / 100) 8.247 -> 8.254 ( +0.08%) [ +0.00% +0.29% +0.24% / +0.08% +0.35% +0.19%] index_select skip64 : Elapsed 0.082 ms (8.247 ms / 100) 8.249 -> 8.257 ( +0.10%) [ +0.07% +0.16% +0.00% / +0.11% +0.12% +0.10%] index_select skip256 : Elapsed 0.083 ms (8.255 ms / 100) 8.247 -> 8.253 ( +0.07%) [ +0.15% +0.19% +0.00% / +0.07% +0.59% +0.17%] index_select spread : Elapsed 0.083 ms (8.259 ms / 100) 8.252 -> 8.250 ( -0.02%) [ +0.00% +0.00% +0.18% / -0.02% +0.48% +0.06%] index_select strided 3 : Elapsed 0.083 ms (8.252 ms / 100) 8.252 -> 8.263 ( +0.13%) [ +0.21% +0.06% +0.00% / +0.23% +0.32% +0.13%] index_select random : Elapsed 0.083 ms (8.269 ms / 100) 8.252 -> 8.250 ( -0.02%) [ +0.13% +0.00% +0.06% / -0.02% +0.11% +0.11%] index_select random_sorted : Elapsed 0.083 ms (8.263 ms / 100) B = [16, 4, 40, 20] (stride (800, 12800, 20, 1)) A = [16, 4, 5, 20] (stride (20, 5, 1, 320)) dim = 2 1.516 -> 1.516 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.66% +0.66%] index_add_ linear : Elapsed 0.015 ms (1.517 ms / 100) 1.465 -> 1.464 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.55% +0.61%] index_copy_ linear : Elapsed 0.015 ms (1.465 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.72% +0.79%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.464 -> 1.464 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.75% +0.75%] index_copy_ reverse : Elapsed 0.015 ms (1.464 ms / 100) 1.522 -> 1.526 ( +0.26%) [ +0.07% +0.26% +0.00% / +0.26% +0.99% +0.92%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.469 -> 1.474 ( +0.34%) [ +0.14% +0.20% +0.00% / +0.34% +0.88% +0.95%] index_copy_ spread : Elapsed 0.015 ms (1.471 ms / 100) 1.516 -> 1.517 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.59% +0.66%] index_add_ strided 3 : Elapsed 0.015 ms (1.517 ms / 100) 1.465 -> 1.466 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.61% +0.61%] index_copy_ strided 3 : Elapsed 0.015 ms (1.466 ms / 100) 1.517 -> 1.517 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.517 ms / 100) 1.465 -> 1.465 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.61%] index_copy_ strided 7 : Elapsed 0.015 ms (1.465 ms / 100) 1.516 -> 1.515 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.73% +0.73%] index_add_ perm : Elapsed 0.015 ms (1.516 ms / 100) 1.464 -> 1.464 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.82% +0.75%] index_copy_ perm : Elapsed 0.015 ms (1.465 ms / 100) 1.515 -> 1.516 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.73% +0.79%] index_add_ perm_sorted : Elapsed 0.015 ms (1.516 ms / 100) 1.463 -> 1.464 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.75% +0.82%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.465 ms / 100) 8.220 -> 8.228 ( +0.10%) [ +0.00% +0.04% +0.07% / +0.16% +0.10% +0.47%] index_select const : Elapsed 0.082 ms (8.220 ms / 100) 8.223 -> 8.226 ( +0.04%) [ +0.18% +0.15% +0.00% / +0.04% +0.05% +0.15%] index_select wrap : Elapsed 0.082 ms (8.238 ms / 100) 8.218 -> 8.214 ( -0.05%) [ +0.07% +0.00% +0.06% / -0.05% +0.18% +0.15%] index_select linear : Elapsed 0.082 ms (8.224 ms / 100) 8.202 -> 8.227 ( +0.30%) [ +0.30% +0.00% +0.29% / +0.32% +0.30% +0.39%] index_select reverse : Elapsed 0.082 ms (8.227 ms / 100) 8.202 -> 8.215 ( +0.16%) [ +0.56% +0.49% +0.00% / +0.16% +0.28% +0.29%] index_select skip64 : Elapsed 0.082 ms (8.248 ms / 100) 8.230 -> 8.223 ( -0.09%) [ +0.13% +0.00% +0.00% / -0.09% +0.04% +0.04%] index_select skip256 : Elapsed 0.082 ms (8.241 ms / 100) 8.218 -> 8.215 ( -0.04%) [ +0.00% +0.21% +0.30% / +0.13% -0.04% +0.21%] index_select spread : Elapsed 0.082 ms (8.218 ms / 100) 8.212 -> 8.215 ( +0.04%) [ +0.54% +0.11% +0.00% / +0.16% +0.04% +0.34%] index_select strided 3 : Elapsed 0.083 ms (8.256 ms / 100) 8.224 -> 8.221 ( -0.04%) [ +0.09% +0.00% +0.24% / -0.04% +0.09% +0.18%] index_select random : Elapsed 0.082 ms (8.231 ms / 100) 8.218 -> 8.229 ( +0.13%) [ +0.07% +0.18% +0.00% / +0.13% +0.21% +0.16%] index_select random_sorted : Elapsed 0.082 ms (8.224 ms / 100) B = [16, 4, 40, 20] (stride (1, 12800, 16, 640)) A = [16, 4, 5, 20] (stride (100, 1600, 1, 5)) dim = 2 1.521 -> 1.522 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.53% +0.53%] index_add_ linear : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.47% +0.47%] index_copy_ linear : Elapsed 0.015 ms (1.481 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.53% +0.46%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.478 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.015 ms (1.480 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.47% +0.41%] index_copy_ spread : Elapsed 0.015 ms (1.481 ms / 100) 1.521 -> 1.520 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.53% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.54% +0.47%] index_copy_ strided 3 : Elapsed 0.015 ms (1.480 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.59% +0.53%] index_add_ strided 7 : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.54% +0.54%] index_copy_ strided 7 : Elapsed 0.015 ms (1.480 ms / 100) 1.520 -> 1.521 ( +0.07%) [ +0.26% +0.07% +0.00% / +0.07% +0.66% +0.59%] index_add_ perm : Elapsed 0.015 ms (1.524 ms / 100) 1.480 -> 1.479 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.41% +0.41%] index_copy_ perm : Elapsed 0.015 ms (1.481 ms / 100) 1.529 -> 1.530 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.65% +0.72%] index_add_ perm_sorted : Elapsed 0.015 ms (1.530 ms / 100) 1.489 -> 1.491 ( +0.13%) [ +0.07% +0.07% +0.00% / +0.13% +0.47% +0.47%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.490 ms / 100) 8.531 -> 8.532 ( +0.01%) [ +0.06% +0.00% +0.04% / +0.28% +0.01% +0.27%] index_select const : Elapsed 0.085 ms (8.536 ms / 100) 8.535 -> 8.531 ( -0.05%) [ +0.08% +0.04% +0.00% / +0.13% +0.13% -0.05%] index_select wrap : Elapsed 0.085 ms (8.542 ms / 100) 8.528 -> 8.540 ( +0.14%) [ +0.00% +0.41% +0.36% / +0.14% +0.25% +0.26%] index_select linear : Elapsed 0.085 ms (8.528 ms / 100) 8.527 -> 8.544 ( +0.20%) [ +0.43% +0.11% +0.00% / +0.21% +0.20% +0.41%] index_select reverse : Elapsed 0.086 ms (8.564 ms / 100) 8.540 -> 8.549 ( +0.11%) [ +0.00% +0.33% +0.07% / +0.27% +0.41% +0.11%] index_select skip64 : Elapsed 0.085 ms (8.540 ms / 100) 8.543 -> 8.543 ( +0.00%) [ +0.05% +0.12% +0.00% / +0.00% +0.21% +0.08%] index_select skip256 : Elapsed 0.085 ms (8.547 ms / 100) 8.546 -> 8.548 ( +0.02%) [ +0.02% +0.23% +0.00% / +0.09% +0.23% +0.02%] index_select spread : Elapsed 0.085 ms (8.548 ms / 100) 8.537 -> 8.538 ( +0.01%) [ +0.04% +0.22% +0.00% / +0.09% +0.29% +0.01%] index_select strided 3 : Elapsed 0.085 ms (8.540 ms / 100) 8.546 -> 8.531 ( -0.18%) [ +0.00% +0.02% +0.02% / +0.34% -0.12% -0.18%] index_select random : Elapsed 0.085 ms (8.546 ms / 100) 8.521 -> 8.542 ( +0.25%) [ +0.28% +0.33% +0.00% / +0.67% +0.34% +0.25%] index_select random_sorted : Elapsed 0.085 ms (8.545 ms / 100) B = [16, 4, 40, 20] (stride (40, 640, 1, 2560)) A = [16, 4, 5, 20] (stride (20, 1600, 320, 1)) dim = 2 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.66% +0.66%] index_add_ linear : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.476 ( +0.20%) [ +0.00% +0.00% +0.07% / +0.20% +0.68% +0.75%] index_copy_ linear : Elapsed 0.015 ms (1.473 ms / 100) 1.519 -> 1.523 ( +0.26%) [ +0.00% +0.00% +0.00% / +0.26% +0.59% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.519 ms / 100) 1.474 -> 1.478 ( +0.27%) [ +0.00% +0.00% +0.00% / +0.27% +0.61% +0.75%] index_copy_ reverse : Elapsed 0.015 ms (1.474 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.66% +0.66%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.477 ( +0.27%) [ +0.00% +0.20% +0.00% / +0.27% +0.68% +0.61%] index_copy_ spread : Elapsed 0.015 ms (1.473 ms / 100) 1.517 -> 1.521 ( +0.26%) [ +0.13% +0.13% +0.00% / +0.26% +0.66% +0.66%] index_add_ strided 3 : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.478 ( +0.34%) [ +0.00% +0.14% +0.07% / +0.34% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.015 ms (1.473 ms / 100) 1.517 -> 1.517 ( +0.00%) [ +0.13% +0.20% +0.00% / +0.00% +0.66% +0.79%] index_add_ strided 7 : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.477 ( +0.27%) [ +0.00% +0.00% +0.07% / +0.27% +0.68% +0.81%] index_copy_ strided 7 : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.521 ( +0.20%) [ +0.00% +0.07% +0.00% / +0.20% +0.72% +0.72%] index_add_ perm : Elapsed 0.015 ms (1.518 ms / 100) 1.473 -> 1.477 ( +0.27%) [ +0.00% +0.07% +0.07% / +0.27% +0.75% +0.81%] index_copy_ perm : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.518 ms / 100) 1.473 -> 1.476 ( +0.20%) [ +0.00% +0.07% +0.07% / +0.20% +0.68% +0.81%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.473 ms / 100) 8.553 -> 8.570 ( +0.20%) [ +0.13% +0.08% +0.00% / +0.42% +0.20% +0.32%] index_select const : Elapsed 0.086 ms (8.564 ms / 100) 8.590 -> 8.599 ( +0.10%) [ +0.00% +0.30% +0.12% / +0.10% +0.27% +0.27%] index_select wrap : Elapsed 0.086 ms (8.590 ms / 100) 8.581 -> 8.587 ( +0.07%) [ +0.41% +0.00% +0.10% / +0.07% +0.30% +0.16%] index_select linear : Elapsed 0.086 ms (8.616 ms / 100) 8.592 -> 8.595 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.19% +0.24%] index_select reverse : Elapsed 0.086 ms (8.596 ms / 100) 8.571 -> 8.564 ( -0.08%) [ +0.00% +0.01% +0.00% / -0.08% -0.08% +0.33%] index_select skip64 : Elapsed 0.086 ms (8.571 ms / 100) 8.572 -> 8.564 ( -0.09%) [ +0.34% +0.24% +0.00% / +0.10% +0.42% -0.09%] index_select skip256 : Elapsed 0.086 ms (8.601 ms / 100) 8.587 -> 8.597 ( +0.12%) [ +0.00% +0.13% +0.17% / +0.14% +0.12% +0.13%] index_select spread : Elapsed 0.086 ms (8.587 ms / 100) 8.594 -> 8.605 ( +0.13%) [ +0.00% +0.08% +0.05% / +0.13% +0.16% +0.13%] index_select strided 3 : Elapsed 0.086 ms (8.594 ms / 100) 8.572 -> 8.597 ( +0.29%) [ +0.00% +0.22% +0.15% / +0.29% +0.57% +0.45%] index_select random : Elapsed 0.086 ms (8.572 ms / 100) 8.583 -> 8.598 ( +0.17%) [ +0.00% +0.16% +0.24% / +0.27% +0.17% +0.22%] index_select random_sorted : Elapsed 0.086 ms (8.583 ms / 100) out_shape = [16, 4, 5, 40] in_shape = [16, 4, 5, 20] idx_dim = 3 B = [16, 4, 5, 40] (stride (800, 200, 40, 1)) A = [16, 4, 5, 20] (stride (20, 5, 1, 320)) dim = 3 1.507 -> 1.455 ( -3.45%) [ +0.07% +0.00% +0.20% / -3.45% -2.92% -3.12%] index_add_ linear : Elapsed 0.015 ms (1.508 ms / 100) 1.486 -> 1.454 ( -2.15%) [ +0.00% +0.27% +0.20% / -2.15% -1.88% -1.62%] index_copy_ linear : Elapsed 0.015 ms (1.486 ms / 100) 1.507 -> 1.456 ( -3.38%) [ +0.00% +0.20% +0.33% / -3.38% -3.25% -3.19%] index_add_ reverse : Elapsed 0.015 ms (1.507 ms / 100) 1.488 -> 1.451 ( -2.49%) [ +0.00% +0.40% +0.07% / -2.49% -1.75% -1.81%] index_copy_ reverse : Elapsed 0.015 ms (1.488 ms / 100) 1.522 -> 1.470 ( -3.42%) [ +0.00% +0.20% +0.13% / -3.42% -3.35% -3.42%] index_add_ spread : Elapsed 0.015 ms (1.522 ms / 100) 1.506 -> 1.486 ( -1.33%) [ +0.00% +0.40% +0.20% / -1.33% -1.20% -1.20%] index_copy_ spread : Elapsed 0.015 ms (1.506 ms / 100) 1.524 -> 1.468 ( -3.67%) [ +0.20% +0.00% +0.00% / -3.67% -3.61% -3.67%] index_add_ strided 3 : Elapsed 0.015 ms (1.527 ms / 100) 1.505 -> 1.470 ( -2.33%) [ +0.13% +0.27% +0.00% / -2.33% -1.79% -1.79%] index_copy_ strided 3 : Elapsed 0.015 ms (1.507 ms / 100) 1.527 -> 1.465 ( -4.06%) [ +0.13% +0.00% +0.13% / -4.06% -3.80% -3.60%] index_add_ strided 7 : Elapsed 0.015 ms (1.529 ms / 100) 1.508 -> 1.471 ( -2.45%) [ +0.00% +0.07% +0.00% / -2.45% -1.19% -1.99%] index_copy_ strided 7 : Elapsed 0.015 ms (1.508 ms / 100) 1.528 -> 1.462 ( -4.32%) [ +0.00% +0.00% +0.07% / -4.32% -3.66% -3.60%] index_add_ perm : Elapsed 0.015 ms (1.528 ms / 100) 1.505 -> 1.467 ( -2.52%) [ +0.33% +0.00% +0.27% / -2.52% -1.86% -1.86%] index_copy_ perm : Elapsed 0.015 ms (1.510 ms / 100) 1.526 -> 1.465 ( -4.00%) [ +0.07% +0.26% +0.00% / -4.00% -3.54% -3.80%] index_add_ perm_sorted : Elapsed 0.015 ms (1.527 ms / 100) 1.504 -> 1.472 ( -2.13%) [ +0.00% +0.53% +0.20% / -2.13% -1.26% -1.46%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.504 ms / 100) 2.803 -> 2.796 ( -0.25%) [ +0.00% +0.25% +0.25% / -0.11% -0.25% -0.07%] index_select const : Elapsed 0.028 ms (2.803 ms / 100) 2.824 -> 2.809 ( -0.53%) [ +0.14% +0.00% +0.04% / +0.04% -0.53% -0.39%] index_select wrap : Elapsed 0.028 ms (2.828 ms / 100) 2.825 -> 2.807 ( -0.64%) [ +0.00% +0.07% +0.21% / -0.11% -0.46% -0.64%] index_select linear : Elapsed 0.028 ms (2.825 ms / 100) 2.819 -> 2.814 ( -0.18%) [ +0.18% +0.04% +0.00% / -0.18% -0.11% -0.11%] index_select reverse : Elapsed 0.028 ms (2.824 ms / 100) 2.800 -> 2.791 ( -0.32%) [ +0.18% +0.32% +0.00% / -0.32% +0.04% -0.18%] index_select skip64 : Elapsed 0.028 ms (2.805 ms / 100) 2.797 -> 2.785 ( -0.43%) [ +0.14% +0.07% +0.00% / -0.43% +0.14% +0.14%] index_select skip256 : Elapsed 0.028 ms (2.801 ms / 100) 2.824 -> 2.805 ( -0.67%) [ +0.14% +0.07% +0.00% / -0.35% -0.67% -0.35%] index_select spread : Elapsed 0.028 ms (2.828 ms / 100) 2.822 -> 2.808 ( -0.50%) [ +0.00% +0.00% +0.04% / +0.04% -0.18% -0.50%] index_select strided 3 : Elapsed 0.028 ms (2.822 ms / 100) 2.805 -> 2.789 ( -0.57%) [ +0.04% +0.00% +0.11% / -0.57% -0.07% +0.11%] index_select strided 5 : Elapsed 0.028 ms (2.806 ms / 100) 2.823 -> 2.816 ( -0.25%) [ +0.14% +0.14% +0.00% / -0.25% +0.00% -0.04%] index_select strided 7 : Elapsed 0.028 ms (2.827 ms / 100) 2.802 -> 2.791 ( -0.39%) [ +0.00% +0.07% +0.04% / -0.39% +0.25% +0.04%] index_select strided 8 : Elapsed 0.028 ms (2.802 ms / 100) 2.804 -> 2.789 ( -0.53%) [ +0.04% +0.00% +0.00% / -0.53% -0.04% -0.21%] index_select strided 16 : Elapsed 0.028 ms (2.805 ms / 100) 2.817 -> 2.812 ( -0.18%) [ +0.14% +0.14% +0.00% / -0.18% +0.25% +0.18%] index_select random : Elapsed 0.028 ms (2.821 ms / 100) 2.820 -> 2.804 ( -0.57%) [ +0.07% +0.18% +0.00% / -0.57% -0.04% -0.04%] index_select random_sorted : Elapsed 0.028 ms (2.822 ms / 100) B = [16, 4, 5, 40] (stride (800, 1, 160, 4)) A = [16, 4, 5, 20] (stride (20, 1, 4, 320)) dim = 3 2.453 -> 2.468 ( +0.61%) [ +0.00% +0.29% +0.12% / +0.61% +0.77% +0.77%] index_add_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.443 -> 2.458 ( +0.61%) [ +0.00% +0.04% +0.20% / +0.61% +0.90% +0.86%] index_copy_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.451 -> 2.466 ( +0.61%) [ +0.04% +0.00% +0.04% / +0.61% +1.10% +0.86%] index_add_ reverse : Elapsed 0.025 ms (2.452 ms / 100) 2.437 -> 2.452 ( +0.62%) [ +0.08% +0.08% +0.00% / +0.62% +1.27% +1.15%] index_copy_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.470 -> 2.483 ( +0.53%) [ +0.12% +0.04% +0.00% / +0.53% +0.81% +0.97%] index_add_ spread : Elapsed 0.025 ms (2.473 ms / 100) 2.466 -> 2.485 ( +0.77%) [ +0.12% +0.12% +0.00% / +0.77% +1.34% +1.14%] index_copy_ spread : Elapsed 0.025 ms (2.469 ms / 100) 2.471 -> 2.484 ( +0.53%) [ +0.00% +0.12% +0.12% / +0.53% +0.73% +0.53%] index_add_ strided 3 : Elapsed 0.025 ms (2.471 ms / 100) 2.469 -> 2.480 ( +0.45%) [ +0.04% +0.24% +0.00% / +0.45% +0.69% +0.65%] index_copy_ strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.472 -> 2.483 ( +0.44%) [ +0.00% +0.12% +0.16% / +0.53% +0.61% +0.44%] index_add_ strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.473 -> 2.485 ( +0.49%) [ +0.00% +0.08% +0.00% / +0.49% +0.49% +0.65%] index_copy_ strided 7 : Elapsed 0.025 ms (2.473 ms / 100) 2.468 -> 2.474 ( +0.24%) [ +0.16% +0.00% +0.12% / +0.53% +0.24% +0.32%] index_add_ perm : Elapsed 0.025 ms (2.472 ms / 100) 2.462 -> 2.472 ( +0.41%) [ +0.20% +0.12% +0.00% / +0.57% +0.41% +0.41%] index_copy_ perm : Elapsed 0.025 ms (2.467 ms / 100) 2.471 -> 2.475 ( +0.16%) [ +0.00% +0.12% +0.00% / +0.53% +0.24% +0.16%] index_add_ perm_sorted : Elapsed 0.025 ms (2.471 ms / 100) 2.465 -> 2.471 ( +0.24%) [ +0.16% +0.24% +0.00% / +0.89% +0.45% +0.24%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.469 ms / 100) 4.493 -> 4.493 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.09% +0.04%] index_select const : Elapsed 0.045 ms (4.496 ms / 100) 4.503 -> 4.502 ( -0.02%) [ +0.00% +0.11% +0.11% / -0.02% +0.18% +0.04%] index_select wrap : Elapsed 0.045 ms (4.503 ms / 100) 4.502 -> 4.503 ( +0.02%) [ +0.00% +0.13% +0.02% / +0.02% +0.04% +0.11%] index_select linear : Elapsed 0.045 ms (4.502 ms / 100) 4.505 -> 4.498 ( -0.16%) [ +0.18% +0.00% +0.16% / -0.02% -0.16% +0.00%] index_select reverse : Elapsed 0.045 ms (4.513 ms / 100) 4.490 -> 4.494 ( +0.09%) [ +0.11% +0.27% +0.00% / +0.18% +0.09% +0.22%] index_select skip64 : Elapsed 0.045 ms (4.495 ms / 100) 4.494 -> 4.491 ( -0.07%) [ +0.09% +0.00% +0.00% / -0.07% +0.07% -0.02%] index_select skip256 : Elapsed 0.045 ms (4.498 ms / 100) 4.502 -> 4.500 ( -0.04%) [ +0.07% +0.11% +0.00% / +0.07% +0.22% -0.04%] index_select spread : Elapsed 0.045 ms (4.505 ms / 100) 4.505 -> 4.503 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.02% -0.04% +0.02%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.492 -> 4.495 ( +0.07%) [ +0.07% +0.00% +0.04% / +0.07% +0.09% +0.33%] index_select strided 5 : Elapsed 0.045 ms (4.495 ms / 100) 4.501 -> 4.505 ( +0.09%) [ +0.00% +0.13% +0.04% / +0.09% +0.18% +0.13%] index_select strided 7 : Elapsed 0.045 ms (4.501 ms / 100) 4.498 -> 4.496 ( -0.04%) [ +0.07% +0.00% +0.13% / +0.07% -0.04% -0.04%] index_select strided 8 : Elapsed 0.045 ms (4.501 ms / 100) 4.500 -> 4.495 ( -0.11%) [ +0.11% +0.00% +0.09% / +0.16% -0.07% -0.11%] index_select strided 16 : Elapsed 0.045 ms (4.505 ms / 100) 4.505 -> 4.502 ( -0.07%) [ +0.00% +0.00% +0.11% / +0.09% +0.00% -0.07%] index_select random : Elapsed 0.045 ms (4.505 ms / 100) 4.505 -> 4.506 ( +0.02%) [ +0.02% +0.04% +0.00% / +0.02% +0.07% +0.13%] index_select random_sorted : Elapsed 0.045 ms (4.506 ms / 100) B = [16, 4, 5, 40] (stride (800, 1, 4, 20)) A = [16, 4, 5, 20] (stride (1, 320, 1280, 16)) dim = 3 2.450 -> 2.461 ( +0.45%) [ +0.00% +0.12% +0.20% / +0.45% +0.69% +0.78%] index_add_ linear : Elapsed 0.024 ms (2.450 ms / 100) 2.445 -> 2.459 ( +0.57%) [ +0.20% +0.08% +0.00% / +0.57% +1.15% +0.98%] index_copy_ linear : Elapsed 0.025 ms (2.450 ms / 100) 2.456 -> 2.464 ( +0.33%) [ +0.41% +0.00% +0.00% / +0.57% +0.53% +0.33%] index_add_ reverse : Elapsed 0.025 ms (2.466 ms / 100) 2.451 -> 2.464 ( +0.53%) [ +0.45% +0.00% +0.04% / +0.61% +0.57% +0.53%] index_copy_ reverse : Elapsed 0.025 ms (2.462 ms / 100) 2.459 -> 2.471 ( +0.49%) [ +0.16% +0.16% +0.00% / +0.57% +0.57% +0.49%] index_add_ spread : Elapsed 0.025 ms (2.463 ms / 100) 2.463 -> 2.472 ( +0.37%) [ +0.08% +0.12% +0.00% / +0.37% +0.49% +0.65%] index_copy_ spread : Elapsed 0.025 ms (2.465 ms / 100) 2.458 -> 2.467 ( +0.37%) [ +0.04% +0.00% +0.08% / +0.45% +0.53% +0.37%] index_add_ strided 3 : Elapsed 0.025 ms (2.459 ms / 100) 2.458 -> 2.470 ( +0.49%) [ +0.00% +0.08% +0.08% / +0.57% +0.69% +0.49%] index_copy_ strided 3 : Elapsed 0.025 ms (2.458 ms / 100) 2.458 -> 2.470 ( +0.49%) [ +0.08% +0.04% +0.00% / +0.61% +0.49% +0.49%] index_add_ strided 7 : Elapsed 0.025 ms (2.460 ms / 100) 2.452 -> 2.472 ( +0.82%) [ +0.08% +0.00% +0.33% / +0.82% +0.86% +0.86%] index_copy_ strided 7 : Elapsed 0.025 ms (2.454 ms / 100) 2.457 -> 2.465 ( +0.33%) [ +0.08% +0.00% +0.12% / +0.33% +0.65% +0.77%] index_add_ perm : Elapsed 0.025 ms (2.459 ms / 100) 2.453 -> 2.470 ( +0.69%) [ +0.04% +0.00% +0.16% / +0.69% +0.90% +0.73%] index_copy_ perm : Elapsed 0.025 ms (2.454 ms / 100) 2.459 -> 2.473 ( +0.57%) [ +0.04% +0.08% +0.00% / +0.61% +0.69% +0.57%] index_add_ perm_sorted : Elapsed 0.025 ms (2.460 ms / 100) 2.456 -> 2.467 ( +0.45%) [ +0.00% +0.12% +0.00% / +0.45% +0.69% +0.86%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.456 ms / 100) 4.491 -> 4.493 ( +0.04%) [ +0.07% +0.11% +0.00% / +0.04% +0.22% +0.11%] index_select const : Elapsed 0.045 ms (4.494 ms / 100) 4.501 -> 4.504 ( +0.07%) [ +0.07% +0.16% +0.00% / +0.07% +0.27% +0.20%] index_select wrap : Elapsed 0.045 ms (4.504 ms / 100) 4.503 -> 4.503 ( +0.00%) [ +0.29% +0.11% +0.00% / +0.00% +0.24% +0.29%] index_select linear : Elapsed 0.045 ms (4.516 ms / 100) 4.502 -> 4.508 ( +0.13%) [ +0.02% +0.02% +0.00% / +0.13% +0.27% +0.22%] index_select reverse : Elapsed 0.045 ms (4.503 ms / 100) 4.492 -> 4.494 ( +0.04%) [ +0.00% +0.20% +0.02% / +0.07% +0.04% +0.09%] index_select skip64 : Elapsed 0.045 ms (4.492 ms / 100) 4.494 -> 4.498 ( +0.09%) [ +0.09% +0.16% +0.00% / +0.16% +0.09% +0.09%] index_select skip256 : Elapsed 0.045 ms (4.498 ms / 100) 4.504 -> 4.511 ( +0.16%) [ +0.04% +0.07% +0.00% / +0.24% +0.16% +0.18%] index_select spread : Elapsed 0.045 ms (4.506 ms / 100) 4.500 -> 4.501 ( +0.02%) [ +0.11% +0.22% +0.00% / +0.02% +0.27% +0.18%] index_select strided 3 : Elapsed 0.045 ms (4.505 ms / 100) 4.495 -> 4.497 ( +0.04%) [ +0.22% +0.02% +0.00% / +0.04% +0.31% +0.20%] index_select strided 5 : Elapsed 0.045 ms (4.505 ms / 100) 4.502 -> 4.503 ( +0.02%) [ +0.02% +0.00% +0.18% / +0.02% +0.09% +0.18%] index_select strided 7 : Elapsed 0.045 ms (4.503 ms / 100) 4.496 -> 4.497 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.22% +0.20%] index_select strided 8 : Elapsed 0.045 ms (4.496 ms / 100) 4.493 -> 4.496 ( +0.07%) [ +0.00% +0.16% +0.02% / +0.07% +0.11% +0.24%] index_select strided 16 : Elapsed 0.045 ms (4.493 ms / 100) 4.501 -> 4.503 ( +0.04%) [ +0.00% +0.07% +0.22% / +0.04% +0.24% +0.33%] index_select random : Elapsed 0.045 ms (4.501 ms / 100) 4.506 -> 4.506 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.13% +0.11%] index_select random_sorted : Elapsed 0.045 ms (4.506 ms / 100) B = [16, 4, 5, 40] (stride (1, 3200, 16, 80)) A = [16, 4, 5, 20] (stride (4, 1, 64, 320)) dim = 3 2.408 -> 2.422 ( +0.58%) [ +0.00% +0.12% +0.17% / +0.58% +0.58% +0.62%] index_add_ linear : Elapsed 0.024 ms (2.408 ms / 100) 2.407 -> 2.418 ( +0.46%) [ +0.08% +0.33% +0.00% / +0.83% +0.46% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.409 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.00% +0.17% +0.12% / +0.54% +0.92% +1.04%] index_add_ reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.400 -> 2.412 ( +0.50%) [ +0.00% +0.00% +0.04% / +0.50% +0.92% +0.87%] index_copy_ reverse : Elapsed 0.024 ms (2.400 ms / 100) 2.401 -> 2.412 ( +0.46%) [ +0.00% +0.08% +0.00% / +0.46% +0.92% +0.83%] index_add_ spread : Elapsed 0.024 ms (2.401 ms / 100) 2.398 -> 2.408 ( +0.42%) [ +0.08% +0.00% +0.08% / +0.42% +1.17% +1.17%] index_copy_ spread : Elapsed 0.024 ms (2.400 ms / 100) 2.408 -> 2.422 ( +0.58%) [ +0.17% +0.04% +0.00% / +0.62% +0.58% +0.62%] index_add_ strided 3 : Elapsed 0.024 ms (2.412 ms / 100) 2.406 -> 2.422 ( +0.67%) [ +0.00% +0.08% +0.08% / +0.96% +0.67% +0.71%] index_copy_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.407 -> 2.414 ( +0.29%) [ +0.08% +0.00% +0.00% / +0.58% +0.58% +0.29%] index_add_ strided 7 : Elapsed 0.024 ms (2.409 ms / 100) 2.403 -> 2.417 ( +0.58%) [ +0.17% +0.00% +0.21% / +0.75% +0.75% +0.58%] index_copy_ strided 7 : Elapsed 0.024 ms (2.407 ms / 100) 2.407 -> 2.416 ( +0.37%) [ +0.00% +0.12% +0.00% / +0.83% +0.37% +0.50%] index_add_ perm : Elapsed 0.024 ms (2.407 ms / 100) 2.405 -> 2.413 ( +0.33%) [ +0.00% +0.08% +0.17% / +0.87% +0.33% +0.50%] index_copy_ perm : Elapsed 0.024 ms (2.405 ms / 100) 2.410 -> 2.415 ( +0.21%) [ +0.00% +0.17% +0.12% / +0.54% +0.25% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.410 ms / 100) 2.409 -> 2.413 ( +0.17%) [ +0.00% +0.21% +0.04% / +0.50% +0.25% +0.17%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.409 ms / 100) 4.431 -> 4.434 ( +0.07%) [ +0.16% +0.00% +0.00% / +0.07% +0.27% +0.14%] index_select const : Elapsed 0.044 ms (4.438 ms / 100) 4.440 -> 4.441 ( +0.02%) [ +0.00% +0.07% +0.11% / +0.11% +0.02% +0.11%] index_select wrap : Elapsed 0.044 ms (4.440 ms / 100) 4.442 -> 4.438 ( -0.09%) [ +0.07% +0.00% +0.05% / -0.09% +0.05% +0.11%] index_select linear : Elapsed 0.044 ms (4.445 ms / 100) 4.441 -> 4.447 ( +0.14%) [ +0.14% +0.02% +0.00% / +0.14% +0.18% +0.23%] index_select reverse : Elapsed 0.044 ms (4.447 ms / 100) 4.435 -> 4.435 ( +0.00%) [ +0.20% +0.18% +0.00% / +0.05% +0.00% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.444 ms / 100) 4.436 -> 4.435 ( -0.02%) [ +0.00% +0.07% +0.00% / -0.02% -0.02% +0.02%] index_select skip256 : Elapsed 0.044 ms (4.436 ms / 100) 4.440 -> 4.444 ( +0.09%) [ +0.00% +0.20% +0.16% / +0.09% +0.18% +0.25%] index_select spread : Elapsed 0.044 ms (4.440 ms / 100) 4.442 -> 4.443 ( +0.02%) [ +0.14% +0.00% +0.05% / +0.02% +0.11% +0.20%] index_select strided 3 : Elapsed 0.044 ms (4.448 ms / 100) 4.432 -> 4.431 ( -0.02%) [ +0.05% +0.20% +0.00% / -0.02% +0.18% +0.05%] index_select strided 5 : Elapsed 0.044 ms (4.434 ms / 100) 4.443 -> 4.443 ( +0.00%) [ +0.00% +0.18% +0.05% / +0.00% +0.14% +0.02%] index_select strided 7 : Elapsed 0.044 ms (4.443 ms / 100) 4.430 -> 4.431 ( +0.02%) [ +0.14% +0.16% +0.00% / +0.20% +0.02% +0.14%] index_select strided 8 : Elapsed 0.044 ms (4.436 ms / 100) 4.436 -> 4.437 ( +0.02%) [ +0.00% +0.07% +0.02% / +0.05% +0.02% +0.16%] index_select strided 16 : Elapsed 0.044 ms (4.436 ms / 100) 4.445 -> 4.442 ( -0.07%) [ +0.00% +0.11% +0.11% / +0.04% -0.07% +0.09%] index_select random : Elapsed 0.044 ms (4.445 ms / 100) 4.440 -> 4.442 ( +0.05%) [ +0.09% +0.23% +0.00% / +0.29% +0.05% +0.05%] index_select random_sorted : Elapsed 0.044 ms (4.444 ms / 100) B = [16, 4, 5, 40] (stride (1, 16, 2560, 64)) A = [16, 4, 5, 20] (stride (1, 80, 16, 320)) dim = 3 2.397 -> 2.412 ( +0.63%) [ +0.13% +0.21% +0.00% / +0.63% +0.67% +0.63%] index_add_ linear : Elapsed 0.024 ms (2.400 ms / 100) 2.404 -> 2.414 ( +0.42%) [ +0.00% +0.00% +0.08% / +0.42% +0.62% +0.46%] index_copy_ linear : Elapsed 0.024 ms (2.404 ms / 100) 2.403 -> 2.409 ( +0.25%) [ +0.04% +0.08% +0.00% / +0.46% +0.50% +0.25%] index_add_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.405 -> 2.415 ( +0.42%) [ +0.17% +0.00% +0.17% / +1.08% +0.42% +0.50%] index_copy_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.403 -> 2.409 ( +0.25%) [ +0.08% +0.00% +0.08% / +0.58% +0.25% +0.42%] index_add_ spread : Elapsed 0.024 ms (2.405 ms / 100) 2.407 -> 2.415 ( +0.33%) [ +0.04% +0.00% +0.08% / +0.54% +0.46% +0.33%] index_copy_ spread : Elapsed 0.024 ms (2.408 ms / 100) 2.402 -> 2.410 ( +0.33%) [ +0.08% +0.08% +0.00% / +0.54% +0.33% +0.42%] index_add_ strided 3 : Elapsed 0.024 ms (2.404 ms / 100) 2.407 -> 2.414 ( +0.29%) [ +0.00% +0.00% +0.04% / +0.46% +0.29% +0.66%] index_copy_ strided 3 : Elapsed 0.024 ms (2.407 ms / 100) 2.403 -> 2.410 ( +0.29%) [ +0.00% +0.00% +0.17% / +0.58% +0.29% +0.33%] index_add_ strided 7 : Elapsed 0.024 ms (2.403 ms / 100) 2.403 -> 2.415 ( +0.50%) [ +0.17% +0.12% +0.00% / +0.75% +0.50% +0.54%] index_copy_ strided 7 : Elapsed 0.024 ms (2.407 ms / 100) 2.402 -> 2.410 ( +0.33%) [ +0.08% +0.17% +0.00% / +0.71% +0.42% +0.33%] index_add_ perm : Elapsed 0.024 ms (2.404 ms / 100) 2.403 -> 2.414 ( +0.46%) [ +0.17% +0.21% +0.00% / +0.46% +0.83% +0.62%] index_copy_ perm : Elapsed 0.024 ms (2.407 ms / 100) 2.401 -> 2.413 ( +0.50%) [ +0.00% +0.08% +0.04% / +0.62% +0.58% +0.50%] index_add_ perm_sorted : Elapsed 0.024 ms (2.401 ms / 100) 2.404 -> 2.415 ( +0.46%) [ +0.00% +0.17% +0.17% / +0.46% +0.67% +0.75%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.404 ms / 100) 4.434 -> 4.434 ( +0.00%) [ +0.00% +0.02% +0.25% / +0.00% +0.14% +0.05%] index_select const : Elapsed 0.044 ms (4.434 ms / 100) 4.441 -> 4.441 ( +0.00%) [ +0.07% +0.00% +0.11% / +0.00% +0.11% +0.05%] index_select wrap : Elapsed 0.044 ms (4.444 ms / 100) 4.444 -> 4.441 ( -0.07%) [ +0.07% +0.00% +0.02% / -0.07% +0.07% +0.02%] index_select linear : Elapsed 0.044 ms (4.447 ms / 100) 4.441 -> 4.445 ( +0.09%) [ +0.14% +0.00% +0.00% / +0.32% +0.09% +0.09%] index_select reverse : Elapsed 0.044 ms (4.447 ms / 100) 4.436 -> 4.439 ( +0.07%) [ +0.09% +0.18% +0.00% / +0.07% +0.20% +0.11%] index_select skip64 : Elapsed 0.044 ms (4.440 ms / 100) 4.438 -> 4.432 ( -0.14%) [ +0.09% +0.00% +0.05% / +0.00% +0.00% -0.14%] index_select skip256 : Elapsed 0.044 ms (4.442 ms / 100) 4.434 -> 4.438 ( +0.09%) [ +0.00% +0.11% +0.00% / +0.09% +0.18% +0.41%] index_select spread : Elapsed 0.044 ms (4.434 ms / 100) 4.439 -> 4.442 ( +0.07%) [ +0.20% +0.16% +0.00% / +0.07% +0.18% +0.20%] index_select strided 3 : Elapsed 0.044 ms (4.448 ms / 100) 4.439 -> 4.432 ( -0.16%) [ +0.02% +0.00% +0.11% / +0.23% -0.16% +0.11%] index_select strided 5 : Elapsed 0.044 ms (4.440 ms / 100) 4.434 -> 4.445 ( +0.25%) [ +0.09% +0.09% +0.00% / +0.27% +0.27% +0.25%] index_select strided 7 : Elapsed 0.044 ms (4.438 ms / 100) 4.433 -> 4.435 ( +0.05%) [ +0.18% +0.18% +0.00% / +0.11% +0.05% +0.09%] index_select strided 8 : Elapsed 0.044 ms (4.441 ms / 100) 4.433 -> 4.432 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.20% +0.25%] index_select strided 16 : Elapsed 0.044 ms (4.433 ms / 100) 4.438 -> 4.443 ( +0.11%) [ +0.05% +0.11% +0.00% / +0.11% +0.11% +0.20%] index_select random : Elapsed 0.044 ms (4.440 ms / 100) 4.438 -> 4.437 ( -0.02%) [ +0.00% +0.00% +0.05% / +0.20% -0.02% +0.18%] index_select random_sorted : Elapsed 0.044 ms (4.438 ms / 100) B = [16, 4, 5, 40] (stride (5, 80, 1, 320)) A = [16, 4, 5, 20] (stride (4, 1, 1280, 64)) dim = 3 2.413 -> 2.423 ( +0.41%) [ +0.04% +0.08% +0.00% / +0.41% +0.79% +0.75%] index_add_ linear : Elapsed 0.024 ms (2.414 ms / 100) 2.408 -> 2.423 ( +0.62%) [ +0.00% +0.04% +0.17% / +0.62% +0.79% +0.79%] index_copy_ linear : Elapsed 0.024 ms (2.408 ms / 100) 2.406 -> 2.419 ( +0.54%) [ +0.08% +0.00% +0.12% / +0.54% +1.25% +1.00%] index_add_ reverse : Elapsed 0.024 ms (2.408 ms / 100) 2.401 -> 2.415 ( +0.58%) [ +0.00% +0.17% +0.12% / +0.58% +1.17% +1.21%] index_copy_ reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.408 -> 2.423 ( +0.62%) [ +0.12% +0.17% +0.00% / +0.62% +1.08% +1.00%] index_add_ spread : Elapsed 0.024 ms (2.411 ms / 100) 2.406 -> 2.419 ( +0.54%) [ +0.04% +0.08% +0.00% / +0.54% +1.00% +1.00%] index_copy_ spread : Elapsed 0.024 ms (2.407 ms / 100) 2.414 -> 2.424 ( +0.41%) [ +0.00% +0.04% +0.21% / +0.58% +0.41% +0.58%] index_add_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.409 -> 2.421 ( +0.50%) [ +0.00% +0.00% +0.12% / +0.75% +0.54% +0.50%] index_copy_ strided 3 : Elapsed 0.024 ms (2.409 ms / 100) 2.415 -> 2.429 ( +0.58%) [ +0.00% +0.17% +0.04% / +0.58% +0.62% +0.58%] index_add_ strided 7 : Elapsed 0.024 ms (2.415 ms / 100) 2.410 -> 2.423 ( +0.54%) [ +0.25% +0.08% +0.00% / +0.54% +0.71% +0.75%] index_copy_ strided 7 : Elapsed 0.024 ms (2.416 ms / 100) 2.417 -> 2.421 ( +0.17%) [ +0.00% +0.04% +0.12% / +0.66% +0.17% +0.25%] index_add_ perm : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.421 ( +0.41%) [ +0.04% +0.25% +0.00% / +0.66% +0.41% +0.41%] index_copy_ perm : Elapsed 0.024 ms (2.412 ms / 100) 2.417 -> 2.422 ( +0.21%) [ +0.00% +0.12% +0.00% / +0.70% +0.33% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.417 ms / 100) 2.411 -> 2.419 ( +0.33%) [ +0.00% +0.21% +0.00% / +0.83% +0.46% +0.33%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.411 ms / 100) 4.429 -> 4.431 ( +0.05%) [ +0.09% +0.16% +0.00% / +0.05% +0.05% +0.16%] index_select const : Elapsed 0.044 ms (4.433 ms / 100) 4.440 -> 4.447 ( +0.16%) [ +0.20% +0.05% +0.00% / +0.16% +0.34% +0.16%] index_select wrap : Elapsed 0.044 ms (4.449 ms / 100) 4.446 -> 4.445 ( -0.02%) [ +0.16% +0.00% +0.02% / -0.02% +0.09% +0.18%] index_select linear : Elapsed 0.045 ms (4.453 ms / 100) 4.448 -> 4.445 ( -0.07%) [ +0.07% +0.02% +0.00% / +0.11% +0.11% -0.07%] index_select reverse : Elapsed 0.045 ms (4.451 ms / 100) 4.429 -> 4.426 ( -0.07%) [ +0.09% +0.16% +0.00% / +0.23% +0.02% -0.07%] index_select skip64 : Elapsed 0.044 ms (4.433 ms / 100) 4.427 -> 4.431 ( +0.09%) [ +0.00% +0.20% +0.00% / +0.09% +0.25% +0.23%] index_select skip256 : Elapsed 0.044 ms (4.427 ms / 100) 4.441 -> 4.448 ( +0.16%) [ +0.00% +0.23% +0.07% / +0.16% +0.32% +0.29%] index_select spread : Elapsed 0.044 ms (4.441 ms / 100) 4.438 -> 4.451 ( +0.29%) [ +0.00% +0.18% +0.16% / +0.29% +0.38% +0.36%] index_select strided 3 : Elapsed 0.044 ms (4.438 ms / 100) 4.439 -> 4.434 ( -0.11%) [ +0.00% +0.02% +0.00% / -0.11% +0.02% +0.02%] index_select strided 5 : Elapsed 0.044 ms (4.439 ms / 100) 4.444 -> 4.441 ( -0.07%) [ +0.00% +0.00% +0.02% / -0.07% +0.20% +0.09%] index_select strided 7 : Elapsed 0.044 ms (4.444 ms / 100) 4.436 -> 4.436 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.16% +0.00% +0.20%] index_select strided 8 : Elapsed 0.044 ms (4.441 ms / 100) 4.434 -> 4.438 ( +0.09%) [ +0.00% +0.11% +0.02% / +0.09% +0.20% +0.14%] index_select strided 16 : Elapsed 0.044 ms (4.434 ms / 100) 4.449 -> 4.441 ( -0.18%) [ +0.00% +0.02% +0.16% / -0.04% +0.02% -0.18%] index_select random : Elapsed 0.044 ms (4.449 ms / 100) 4.447 -> 4.444 ( -0.07%) [ +0.09% +0.00% +0.02% / +0.20% +0.11% -0.07%] index_select random_sorted : Elapsed 0.045 ms (4.451 ms / 100) B = [16, 4, 5, 40] (stride (4, 1, 64, 320)) A = [16, 4, 5, 20] (stride (1, 1600, 320, 16)) dim = 3 1.521 -> 1.495 ( -1.71%) [ +0.33% +0.00% +0.20% / -1.71% -1.05% -0.85%] index_add_ linear : Elapsed 0.015 ms (1.526 ms / 100) 1.506 -> 1.473 ( -2.19%) [ +0.00% +0.20% +0.13% / -2.19% -1.39% -1.46%] index_copy_ linear : Elapsed 0.015 ms (1.506 ms / 100) 1.523 -> 1.490 ( -2.17%) [ +0.13% +0.20% +0.00% / -2.17% -1.25% -1.12%] index_add_ reverse : Elapsed 0.015 ms (1.525 ms / 100) 1.508 -> 1.474 ( -2.25%) [ +0.00% +0.20% +0.07% / -2.25% -1.72% -1.26%] index_copy_ reverse : Elapsed 0.015 ms (1.508 ms / 100) 1.523 -> 1.495 ( -1.84%) [ +0.00% +0.13% +0.00% / -1.77% -1.58% -1.84%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.503 -> 1.476 ( -1.80%) [ +0.00% +0.20% +0.27% / -1.80% -1.46% -1.46%] index_copy_ spread : Elapsed 0.015 ms (1.503 ms / 100) 1.523 -> 1.496 ( -1.77%) [ +0.13% +0.00% +0.33% / -1.77% -1.64% -1.64%] index_add_ strided 3 : Elapsed 0.015 ms (1.525 ms / 100) 1.508 -> 1.475 ( -2.19%) [ +0.40% +0.27% +0.00% / -2.19% -1.92% -1.92%] index_copy_ strided 3 : Elapsed 0.015 ms (1.514 ms / 100) 1.522 -> 1.496 ( -1.71%) [ +0.20% +0.00% +0.00% / -1.71% -1.18% -1.12%] index_add_ strided 7 : Elapsed 0.015 ms (1.525 ms / 100) 1.504 -> 1.472 ( -2.13%) [ +0.20% +0.00% +0.07% / -2.13% -1.13% -1.46%] index_copy_ strided 7 : Elapsed 0.015 ms (1.507 ms / 100) 1.523 -> 1.491 ( -2.10%) [ +0.00% +0.59% +0.07% / -2.10% -1.51% -1.77%] index_add_ perm : Elapsed 0.015 ms (1.523 ms / 100) 1.502 -> 1.470 ( -2.13%) [ +0.40% +0.00% +0.20% / -2.13% -1.60% -1.53%] index_copy_ perm : Elapsed 0.015 ms (1.508 ms / 100) 1.520 -> 1.492 ( -1.84%) [ +0.20% +0.00% +0.20% / -1.84% -1.25% -1.25%] index_add_ perm_sorted : Elapsed 0.015 ms (1.523 ms / 100) 1.502 -> 1.471 ( -2.06%) [ +0.00% +0.13% +0.13% / -2.06% -1.13% -1.07%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.502 ms / 100) 2.870 -> 2.871 ( +0.03%) [ +0.00% +0.07% +0.14% / +0.10% +0.10% +0.03%] index_select const : Elapsed 0.029 ms (2.870 ms / 100) 2.891 -> 2.886 ( -0.17%) [ +0.00% +0.52% +0.28% / +0.31% -0.17% +0.00%] index_select wrap : Elapsed 0.029 ms (2.891 ms / 100) 2.892 -> 2.883 ( -0.31%) [ +0.21% +0.03% +0.00% / +0.31% -0.17% -0.31%] index_select linear : Elapsed 0.029 ms (2.898 ms / 100) 2.894 -> 2.892 ( -0.07%) [ +0.28% +0.14% +0.00% / +0.07% -0.07% +0.03%] index_select reverse : Elapsed 0.029 ms (2.902 ms / 100) 2.872 -> 2.872 ( +0.00%) [ +0.07% +0.10% +0.00% / +0.03% +0.14% +0.00%] index_select skip64 : Elapsed 0.029 ms (2.874 ms / 100) 2.865 -> 2.871 ( +0.21%) [ +0.10% +0.00% +0.28% / +0.21% +0.31% +0.31%] index_select skip256 : Elapsed 0.029 ms (2.868 ms / 100) 2.897 -> 2.884 ( -0.45%) [ +0.03% +0.00% +0.17% / +0.03% -0.45% -0.28%] index_select spread : Elapsed 0.029 ms (2.898 ms / 100) 2.900 -> 2.890 ( -0.34%) [ +0.00% +0.17% +0.00% / +0.03% -0.34% -0.34%] index_select strided 3 : Elapsed 0.029 ms (2.900 ms / 100) 2.875 -> 2.877 ( +0.07%) [ +0.17% +0.00% +0.03% / +0.14% +0.07% +0.10%] index_select strided 5 : Elapsed 0.029 ms (2.880 ms / 100) 2.895 -> 2.893 ( -0.07%) [ +0.48% +0.10% +0.00% / +0.00% -0.07% +0.07%] index_select strided 7 : Elapsed 0.029 ms (2.909 ms / 100) 2.871 -> 2.872 ( +0.03%) [ +0.24% +0.00% +0.21% / +0.03% +0.24% +0.45%] index_select strided 8 : Elapsed 0.029 ms (2.878 ms / 100) 2.877 -> 2.876 ( -0.03%) [ +0.10% +0.00% +0.00% / -0.03% +0.17% +0.28%] index_select strided 16 : Elapsed 0.029 ms (2.880 ms / 100) 2.893 -> 2.898 ( +0.17%) [ +0.31% +0.00% +0.10% / +0.17% +0.17% +0.17%] index_select random : Elapsed 0.029 ms (2.902 ms / 100) 2.897 -> 2.896 ( -0.03%) [ +0.31% +0.00% +0.03% / +0.03% -0.03% -0.03%] index_select random_sorted : Elapsed 0.029 ms (2.906 ms / 100) out_shape = [40, 4, 20, 5] in_shape = [16, 4, 20, 5] idx_dim = 0 B = [40, 4, 20, 5] (stride (400, 20, 1, 80)) A = [16, 4, 20, 5] (stride (5, 80, 320, 1)) dim = 0 3.748 -> 3.751 ( +0.08%) [ +0.11% +0.05% +0.00% / +0.08% +0.72% +0.67%] index_add_ linear : Elapsed 0.038 ms (3.752 ms / 100) 3.610 -> 3.615 ( +0.14%) [ +0.17% +0.08% +0.00% / +0.14% +0.80% +0.66%] index_copy_ linear : Elapsed 0.036 ms (3.616 ms / 100) 3.751 -> 3.750 ( -0.03%) [ +0.11% +0.00% +0.05% / -0.03% +0.56% +0.51%] index_add_ reverse : Elapsed 0.038 ms (3.755 ms / 100) 3.607 -> 3.611 ( +0.11%) [ +0.25% +0.08% +0.00% / +0.11% +0.67% +0.67%] index_copy_ reverse : Elapsed 0.036 ms (3.616 ms / 100) 3.736 -> 3.741 ( +0.13%) [ +0.16% +0.16% +0.00% / +0.13% +0.64% +0.54%] index_add_ spread : Elapsed 0.037 ms (3.742 ms / 100) 3.602 -> 3.607 ( +0.14%) [ +0.14% +0.17% +0.00% / +0.14% +0.47% +0.44%] index_copy_ spread : Elapsed 0.036 ms (3.607 ms / 100) 3.747 -> 3.746 ( -0.03%) [ +0.05% +0.00% +0.05% / -0.03% +0.53% +0.48%] index_add_ strided 3 : Elapsed 0.037 ms (3.749 ms / 100) 3.608 -> 3.611 ( +0.08%) [ +0.00% +0.11% +0.11% / +0.08% +0.55% +0.50%] index_copy_ strided 3 : Elapsed 0.036 ms (3.608 ms / 100) 3.752 -> 3.756 ( +0.11%) [ +0.03% +0.03% +0.00% / +0.11% +0.51% +0.48%] index_add_ strided 7 : Elapsed 0.038 ms (3.753 ms / 100) 3.610 -> 3.611 ( +0.03%) [ +0.00% +0.00% +0.06% / +0.03% +0.50% +0.47%] index_copy_ strided 7 : Elapsed 0.036 ms (3.610 ms / 100) 3.753 -> 3.755 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.40% +0.40%] index_add_ perm : Elapsed 0.038 ms (3.756 ms / 100) 3.616 -> 3.619 ( +0.08%) [ +0.11% +0.14% +0.00% / +0.08% +0.50% +0.39%] index_copy_ perm : Elapsed 0.036 ms (3.620 ms / 100) 3.754 -> 3.757 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.37% +0.35%] index_add_ perm_sorted : Elapsed 0.038 ms (3.757 ms / 100) 3.613 -> 3.622 ( +0.25%) [ +0.19% +0.06% +0.00% / +0.25% +0.47% +0.50%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.620 ms / 100) 5.465 -> 5.474 ( +0.16%) [ +0.00% +0.05% +0.11% / +0.16% +0.31% +0.16%] index_select const : Elapsed 0.055 ms (5.465 ms / 100) 5.467 -> 5.465 ( -0.04%) [ +0.13% +0.00% +0.07% / -0.04% +0.24% +0.31%] index_select wrap : Elapsed 0.055 ms (5.474 ms / 100) 5.472 -> 5.476 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.22% +0.29%] index_select linear : Elapsed 0.055 ms (5.476 ms / 100) 5.482 -> 5.478 ( -0.07%) [ +0.04% +0.00% +0.04% / +0.09% -0.07% +0.07%] index_select reverse : Elapsed 0.055 ms (5.484 ms / 100) 5.469 -> 5.464 ( -0.09%) [ +0.22% +0.00% +0.09% / +0.07% -0.02% -0.09%] index_select skip64 : Elapsed 0.055 ms (5.481 ms / 100) 5.470 -> 5.466 ( -0.07%) [ +0.22% +0.05% +0.00% / +0.22% +0.04% -0.07%] index_select skip256 : Elapsed 0.055 ms (5.482 ms / 100) 5.470 -> 5.471 ( +0.02%) [ +0.00% +0.15% +0.00% / +0.02% +0.13% +0.26%] index_select spread : Elapsed 0.055 ms (5.470 ms / 100) 5.466 -> 5.477 ( +0.20%) [ +0.04% +0.00% +0.09% / +0.27% +0.20% +0.24%] index_select strided 3 : Elapsed 0.055 ms (5.468 ms / 100) 5.475 -> 5.473 ( -0.04%) [ +0.11% +0.00% +0.02% / +0.09% +0.18% -0.04%] index_select strided 5 : Elapsed 0.055 ms (5.481 ms / 100) 5.471 -> 5.478 ( +0.13%) [ +0.11% +0.00% +0.15% / +0.13% +0.16% +0.27%] index_select strided 7 : Elapsed 0.055 ms (5.477 ms / 100) 5.461 -> 5.471 ( +0.18%) [ +0.00% +0.16% +0.16% / +0.18% +0.31% +0.24%] index_select strided 8 : Elapsed 0.055 ms (5.461 ms / 100) 5.479 -> 5.481 ( +0.04%) [ +0.00% +0.02% +0.05% / +0.04% +0.04% +0.07%] index_select random : Elapsed 0.055 ms (5.479 ms / 100) 5.474 -> 5.482 ( +0.15%) [ +0.13% +0.04% +0.00% / +0.18% +0.15% +0.22%] index_select random_sorted : Elapsed 0.055 ms (5.481 ms / 100) B = [40, 4, 20, 5] (stride (20, 4000, 1, 800)) A = [16, 4, 20, 5] (stride (100, 1600, 1, 20)) dim = 0 4.006 -> 4.013 ( +0.17%) [ +0.00% +0.15% +0.00% / +0.17% +0.92% +0.72%] index_add_ linear : Elapsed 0.040 ms (4.006 ms / 100) 3.875 -> 3.881 ( +0.15%) [ +0.03% +0.10% +0.00% / +0.15% +1.19% +0.75%] index_copy_ linear : Elapsed 0.039 ms (3.876 ms / 100) 3.996 -> 4.010 ( +0.35%) [ +0.00% +0.18% +0.23% / +0.35% +1.03% +1.03%] index_add_ reverse : Elapsed 0.040 ms (3.996 ms / 100) 3.866 -> 3.878 ( +0.31%) [ +0.00% +0.21% +0.21% / +0.31% +1.29% +1.37%] index_copy_ reverse : Elapsed 0.039 ms (3.866 ms / 100) 3.991 -> 3.995 ( +0.10%) [ +0.00% +0.15% +0.10% / +0.10% +0.95% +0.93%] index_add_ spread : Elapsed 0.040 ms (3.991 ms / 100) 3.862 -> 3.866 ( +0.10%) [ +0.00% +0.05% +0.05% / +0.10% +1.14% +0.93%] index_copy_ spread : Elapsed 0.039 ms (3.862 ms / 100) 4.004 -> 4.002 ( -0.05%) [ +0.00% +0.02% +0.00% / -0.05% +0.77% +0.80%] index_add_ strided 3 : Elapsed 0.040 ms (4.004 ms / 100) 3.866 -> 3.865 ( -0.03%) [ +0.21% +0.03% +0.00% / -0.03% +1.47% +1.50%] index_copy_ strided 3 : Elapsed 0.039 ms (3.874 ms / 100) 4.000 -> 4.007 ( +0.17%) [ +0.00% +0.00% +0.10% / +0.17% +0.77% +0.60%] index_add_ strided 7 : Elapsed 0.040 ms (4.000 ms / 100) 3.867 -> 3.879 ( +0.31%) [ +0.00% +0.03% +0.13% / +0.31% +0.83% +0.75%] index_copy_ strided 7 : Elapsed 0.039 ms (3.867 ms / 100) 4.007 -> 4.005 ( -0.05%) [ +0.17% +0.05% +0.00% / -0.05% +0.95% +0.95%] index_add_ perm : Elapsed 0.040 ms (4.014 ms / 100) 3.875 -> 3.876 ( +0.03%) [ +0.08% +0.05% +0.00% / +0.03% +0.90% +0.77%] index_copy_ perm : Elapsed 0.039 ms (3.878 ms / 100) 3.998 -> 4.004 ( +0.15%) [ +0.15% +0.00% +0.05% / +0.15% +0.73% +0.88%] index_add_ perm_sorted : Elapsed 0.040 ms (4.004 ms / 100) 3.867 -> 3.879 ( +0.31%) [ +0.16% +0.00% +0.10% / +0.31% +0.72% +1.14%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.873 ms / 100) 5.561 -> 5.554 ( -0.13%) [ +0.07% +0.02% +0.00% / -0.04% -0.13% -0.02%] index_select const : Elapsed 0.056 ms (5.565 ms / 100) 5.573 -> 5.568 ( -0.09%) [ +0.04% +0.00% +0.05% / +0.00% -0.07% -0.09%] index_select wrap : Elapsed 0.056 ms (5.575 ms / 100) 5.568 -> 5.565 ( -0.05%) [ +0.02% +0.05% +0.00% / +0.00% -0.05% -0.05%] index_select linear : Elapsed 0.056 ms (5.569 ms / 100) 5.566 -> 5.562 ( -0.07%) [ +0.05% +0.14% +0.00% / +0.07% -0.04% -0.07%] index_select reverse : Elapsed 0.056 ms (5.569 ms / 100) 5.558 -> 5.560 ( +0.04%) [ +0.02% +0.05% +0.00% / +0.04% +0.07% +0.11%] index_select skip64 : Elapsed 0.056 ms (5.559 ms / 100) 5.556 -> 5.560 ( +0.07%) [ +0.09% +0.11% +0.00% / +0.14% +0.07% +0.07%] index_select skip256 : Elapsed 0.056 ms (5.561 ms / 100) 5.567 -> 5.559 ( -0.14%) [ +0.09% +0.00% +0.23% / +0.09% -0.05% -0.14%] index_select spread : Elapsed 0.056 ms (5.572 ms / 100) 5.565 -> 5.560 ( -0.09%) [ +0.02% +0.04% +0.00% / +0.11% +0.05% -0.09%] index_select strided 3 : Elapsed 0.056 ms (5.566 ms / 100) 5.570 -> 5.562 ( -0.14%) [ +0.07% +0.00% +0.02% / +0.04% -0.11% -0.14%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.04% +0.13% +0.00% / +0.20% +0.14% +0.07%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.559 -> 5.560 ( +0.02%) [ +0.05% +0.09% +0.00% / +0.02% +0.07% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.561 -> 5.560 ( -0.02%) [ +0.00% +0.23% +0.13% / +0.09% -0.02% +0.16%] index_select random : Elapsed 0.056 ms (5.561 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.25% +0.07% +0.00% / +0.14% +0.02% +0.02%] index_select random_sorted : Elapsed 0.056 ms (5.578 ms / 100) B = [40, 4, 20, 5] (stride (1, 4000, 40, 800)) A = [16, 4, 20, 5] (stride (400, 100, 1, 20)) dim = 0 3.986 -> 3.987 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.55% +0.65%] index_add_ linear : Elapsed 0.040 ms (3.988 ms / 100) 3.853 -> 3.863 ( +0.26%) [ +0.13% +0.08% +0.00% / +0.26% +0.57% +0.67%] index_copy_ linear : Elapsed 0.039 ms (3.858 ms / 100) 3.978 -> 3.989 ( +0.28%) [ +0.13% +0.05% +0.00% / +0.28% +0.80% +0.63%] index_add_ reverse : Elapsed 0.040 ms (3.983 ms / 100) 3.852 -> 3.861 ( +0.23%) [ +0.10% +0.00% +0.05% / +0.23% +0.83% +0.60%] index_copy_ reverse : Elapsed 0.039 ms (3.856 ms / 100) 3.975 -> 3.974 ( -0.03%) [ +0.13% +0.23% +0.00% / -0.03% +0.50% +0.43%] index_add_ spread : Elapsed 0.040 ms (3.980 ms / 100) 3.849 -> 3.848 ( -0.03%) [ +0.13% +0.21% +0.00% / -0.03% +0.57% +0.49%] index_copy_ spread : Elapsed 0.039 ms (3.854 ms / 100) 3.994 -> 3.991 ( -0.08%) [ +0.00% +0.10% +0.00% / -0.08% +0.48% +0.63%] index_add_ strided 3 : Elapsed 0.040 ms (3.994 ms / 100) 3.852 -> 3.859 ( +0.18%) [ +0.00% +0.16% +0.13% / +0.18% +0.60% +0.65%] index_copy_ strided 3 : Elapsed 0.039 ms (3.852 ms / 100) 3.981 -> 3.984 ( +0.08%) [ +0.00% +0.08% +0.20% / +0.08% +0.70% +0.68%] index_add_ strided 7 : Elapsed 0.040 ms (3.981 ms / 100) 3.859 -> 3.860 ( +0.03%) [ +0.03% +0.00% +0.13% / +0.03% +0.62% +0.65%] index_copy_ strided 7 : Elapsed 0.039 ms (3.860 ms / 100) 3.989 -> 3.990 ( +0.03%) [ +0.00% +0.03% +0.18% / +0.03% +0.40% +0.63%] index_add_ perm : Elapsed 0.040 ms (3.989 ms / 100) 3.856 -> 3.862 ( +0.16%) [ +0.00% +0.08% +0.05% / +0.16% +0.49% +0.57%] index_copy_ perm : Elapsed 0.039 ms (3.856 ms / 100) 3.990 -> 3.996 ( +0.15%) [ +0.00% +0.20% +0.00% / +0.15% +0.45% +0.53%] index_add_ perm_sorted : Elapsed 0.040 ms (3.990 ms / 100) 3.860 -> 3.867 ( +0.18%) [ +0.03% +0.03% +0.00% / +0.18% +0.39% +0.39%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.861 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.00% +0.00% +0.13% / +0.14% -0.02% +0.05%] index_select const : Elapsed 0.056 ms (5.562 ms / 100) 5.567 -> 5.569 ( +0.04%) [ +0.02% +0.00% +0.04% / +0.04% +0.09% +0.31%] index_select wrap : Elapsed 0.056 ms (5.568 ms / 100) 5.567 -> 5.569 ( +0.04%) [ +0.02% +0.00% +0.07% / +0.04% +0.11% +0.16%] index_select linear : Elapsed 0.056 ms (5.568 ms / 100) 5.563 -> 5.562 ( -0.02%) [ +0.00% +0.11% +0.05% / +0.13% -0.02% +0.11%] index_select reverse : Elapsed 0.056 ms (5.563 ms / 100) 5.562 -> 5.565 ( +0.05%) [ +0.07% +0.11% +0.00% / +0.16% +0.05% +0.07%] index_select skip64 : Elapsed 0.056 ms (5.566 ms / 100) 5.564 -> 5.559 ( -0.09%) [ +0.07% +0.04% +0.00% / +0.11% -0.05% -0.09%] index_select skip256 : Elapsed 0.056 ms (5.568 ms / 100) 5.568 -> 5.563 ( -0.09%) [ +0.00% +0.07% +0.14% / +0.02% -0.09% +0.07%] index_select spread : Elapsed 0.056 ms (5.568 ms / 100) 5.558 -> 5.569 ( +0.20%) [ +0.09% +0.00% +0.18% / +0.20% +0.23% +0.23%] index_select strided 3 : Elapsed 0.056 ms (5.563 ms / 100) 5.559 -> 5.570 ( +0.20%) [ +0.00% +0.09% +0.11% / +0.22% +0.22% +0.20%] index_select strided 5 : Elapsed 0.056 ms (5.559 ms / 100) 5.561 -> 5.567 ( +0.11%) [ +0.20% +0.00% +0.02% / +0.14% +0.11% +0.20%] index_select strided 7 : Elapsed 0.056 ms (5.572 ms / 100) 5.562 -> 5.563 ( +0.02%) [ +0.00% +0.00% +0.18% / +0.02% +0.07% +0.02%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.07% +0.00% +0.04% / +0.07% +0.11% +0.05%] index_select random : Elapsed 0.056 ms (5.567 ms / 100) 5.561 -> 5.564 ( +0.05%) [ +0.02% +0.00% +0.13% / +0.20% +0.16% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.562 ms / 100) B = [40, 4, 20, 5] (stride (1, 4000, 40, 800)) A = [16, 4, 20, 5] (stride (20, 1600, 1, 320)) dim = 0 4.054 -> 4.055 ( +0.02%) [ +0.12% +0.02% +0.00% / +0.02% +0.79% +0.84%] index_add_ linear : Elapsed 0.041 ms (4.059 ms / 100) 3.923 -> 3.925 ( +0.05%) [ +0.08% +0.10% +0.00% / +0.05% +0.84% +0.92%] index_copy_ linear : Elapsed 0.039 ms (3.926 ms / 100) 4.043 -> 4.044 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.79% +0.79%] index_add_ reverse : Elapsed 0.040 ms (4.044 ms / 100) 3.904 -> 3.904 ( +0.00%) [ +0.10% +0.20% +0.00% / +0.00% +0.90% +0.87%] index_copy_ reverse : Elapsed 0.039 ms (3.908 ms / 100) 4.052 -> 4.058 ( +0.15%) [ +0.00% +0.07% +0.07% / +0.15% +0.64% +0.69%] index_add_ spread : Elapsed 0.041 ms (4.052 ms / 100) 3.918 -> 3.934 ( +0.41%) [ +0.00% +0.10% +0.00% / +0.41% +0.56% +0.66%] index_copy_ spread : Elapsed 0.039 ms (3.918 ms / 100) 4.054 -> 4.067 ( +0.32%) [ +0.02% +0.05% +0.00% / +0.32% +0.67% +0.64%] index_add_ strided 3 : Elapsed 0.041 ms (4.055 ms / 100) 3.917 -> 3.935 ( +0.46%) [ +0.10% +0.08% +0.00% / +1.40% +0.46% +0.49%] index_copy_ strided 3 : Elapsed 0.039 ms (3.921 ms / 100) 4.041 -> 4.046 ( +0.12%) [ +0.05% +0.00% +0.10% / +0.12% +0.87% +0.79%] index_add_ strided 7 : Elapsed 0.040 ms (4.043 ms / 100) 3.902 -> 3.932 ( +0.77%) [ +0.00% +0.08% +0.33% / +0.77% +0.95% +0.90%] index_copy_ strided 7 : Elapsed 0.039 ms (3.902 ms / 100) 4.054 -> 4.058 ( +0.10%) [ +0.00% +0.12% +0.00% / +0.10% +0.76% +0.86%] index_add_ perm : Elapsed 0.041 ms (4.054 ms / 100) 3.923 -> 3.938 ( +0.38%) [ +0.10% +0.13% +0.00% / +0.38% +0.82% +0.89%] index_copy_ perm : Elapsed 0.039 ms (3.927 ms / 100) 4.051 -> 4.074 ( +0.57%) [ +0.02% +0.02% +0.00% / +0.57% +0.77% +0.81%] index_add_ perm_sorted : Elapsed 0.041 ms (4.052 ms / 100) 3.911 -> 3.938 ( +0.69%) [ +0.13% +0.15% +0.00% / +1.43% +0.69% +0.72%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.916 ms / 100) 5.563 -> 5.565 ( +0.04%) [ +0.13% +0.11% +0.00% / +0.20% +0.04% +0.07%] index_select const : Elapsed 0.056 ms (5.570 ms / 100) 5.578 -> 5.566 ( -0.22%) [ +0.11% +0.13% +0.00% / +0.14% -0.11% -0.22%] index_select wrap : Elapsed 0.056 ms (5.584 ms / 100) 5.581 -> 5.575 ( -0.11%) [ +0.09% +0.00% +0.16% / +0.05% -0.09% -0.11%] index_select linear : Elapsed 0.056 ms (5.586 ms / 100) 5.574 -> 5.578 ( +0.07%) [ +0.00% +0.27% +0.09% / +0.20% +0.09% +0.07%] index_select reverse : Elapsed 0.056 ms (5.574 ms / 100) 5.562 -> 5.567 ( +0.09%) [ +0.05% +0.05% +0.00% / +0.16% +0.14% +0.09%] index_select skip64 : Elapsed 0.056 ms (5.565 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.02% +0.00% +0.09% / +0.05% +0.04% +0.18%] index_select skip256 : Elapsed 0.056 ms (5.563 ms / 100) 5.577 -> 5.572 ( -0.09%) [ +0.09% +0.00% +0.09% / +0.47% -0.09% -0.04%] index_select spread : Elapsed 0.056 ms (5.582 ms / 100) 5.575 -> 5.573 ( -0.04%) [ +0.07% +0.11% +0.00% / +0.25% +0.02% -0.04%] index_select strided 3 : Elapsed 0.056 ms (5.579 ms / 100) 5.576 -> 5.572 ( -0.07%) [ +0.11% +0.13% +0.00% / +0.30% -0.07% -0.02%] index_select strided 5 : Elapsed 0.056 ms (5.582 ms / 100) 5.574 -> 5.579 ( +0.09%) [ +0.20% +0.00% +0.02% / +0.27% +0.13% +0.09%] index_select strided 7 : Elapsed 0.056 ms (5.585 ms / 100) 5.564 -> 5.569 ( +0.09%) [ +0.04% +0.14% +0.00% / +0.09% +0.16% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.566 ms / 100) 5.581 -> 5.577 ( -0.07%) [ +0.00% +0.00% +0.05% / +0.13% +0.02% -0.07%] index_select random : Elapsed 0.056 ms (5.581 ms / 100) 5.580 -> 5.574 ( -0.11%) [ +0.07% +0.00% +0.02% / +0.16% -0.11% -0.09%] index_select random_sorted : Elapsed 0.056 ms (5.584 ms / 100) B = [40, 4, 20, 5] (stride (1, 200, 800, 40)) A = [16, 4, 20, 5] (stride (1, 320, 16, 1280)) dim = 0 3.944 -> 3.947 ( +0.08%) [ +0.00% +0.03% +0.15% / +0.08% +0.41% +0.48%] index_add_ linear : Elapsed 0.039 ms (3.944 ms / 100) 3.800 -> 3.805 ( +0.13%) [ +0.03% +0.00% +0.11% / +0.13% +0.45% +0.53%] index_copy_ linear : Elapsed 0.038 ms (3.801 ms / 100) 3.949 -> 3.952 ( +0.08%) [ +0.13% +0.00% +0.13% / +0.08% +0.63% +0.66%] index_add_ reverse : Elapsed 0.040 ms (3.954 ms / 100) 3.805 -> 3.821 ( +0.42%) [ +0.29% +0.00% +0.29% / +0.42% +0.68% +0.60%] index_copy_ reverse : Elapsed 0.038 ms (3.816 ms / 100) 3.929 -> 3.939 ( +0.25%) [ +0.05% +0.23% +0.00% / +0.25% +0.71% +0.51%] index_add_ spread : Elapsed 0.039 ms (3.931 ms / 100) 3.790 -> 3.807 ( +0.45%) [ +0.05% +0.11% +0.00% / +0.58% +0.58% +0.45%] index_copy_ spread : Elapsed 0.038 ms (3.792 ms / 100) 3.933 -> 3.943 ( +0.25%) [ +0.00% +0.23% +0.20% / +0.25% +0.61% +0.71%] index_add_ strided 3 : Elapsed 0.039 ms (3.933 ms / 100) 3.785 -> 3.800 ( +0.40%) [ +0.00% +0.45% +0.18% / +0.40% +0.85% +0.87%] index_copy_ strided 3 : Elapsed 0.038 ms (3.785 ms / 100) 3.953 -> 3.964 ( +0.28%) [ +0.03% +0.00% +0.00% / +0.28% +0.40% +0.48%] index_add_ strided 7 : Elapsed 0.040 ms (3.954 ms / 100) 3.812 -> 3.825 ( +0.34%) [ +0.05% +0.00% +0.00% / +0.34% +0.55% +0.50%] index_copy_ strided 7 : Elapsed 0.038 ms (3.814 ms / 100) 3.940 -> 3.943 ( +0.08%) [ +0.10% +0.00% +0.08% / +0.08% +0.63% +0.63%] index_add_ perm : Elapsed 0.039 ms (3.944 ms / 100) 3.798 -> 3.800 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.55% +0.55%] index_copy_ perm : Elapsed 0.038 ms (3.799 ms / 100) 3.944 -> 3.952 ( +0.20%) [ +0.00% +0.08% +0.00% / +0.20% +0.48% +0.43%] index_add_ perm_sorted : Elapsed 0.039 ms (3.944 ms / 100) 3.800 -> 3.811 ( +0.29%) [ +0.00% +0.13% +0.03% / +0.29% +0.42% +0.42%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.800 ms / 100) 5.486 -> 5.490 ( +0.07%) [ +0.00% +0.00% +0.04% / +0.11% +0.07% +0.18%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.00% +0.15% +0.07% / +0.00% +0.13% +0.29%] index_select wrap : Elapsed 0.055 ms (5.485 ms / 100) 5.488 -> 5.489 ( +0.02%) [ +0.22% +0.05% +0.00% / +0.02% +0.11% +0.07%] index_select linear : Elapsed 0.055 ms (5.500 ms / 100) 5.489 -> 5.492 ( +0.05%) [ +0.15% +0.02% +0.00% / +0.13% +0.05% +0.13%] index_select reverse : Elapsed 0.055 ms (5.497 ms / 100) 5.491 -> 5.485 ( -0.11%) [ +0.05% +0.05% +0.00% / +0.00% -0.07% -0.11%] index_select skip64 : Elapsed 0.055 ms (5.494 ms / 100) 5.488 -> 5.490 ( +0.04%) [ +0.00% +0.13% +0.13% / +0.20% +0.04% +0.05%] index_select skip256 : Elapsed 0.055 ms (5.488 ms / 100) 5.493 -> 5.493 ( +0.00%) [ +0.05% +0.11% +0.00% / +0.00% +0.05% +0.07%] index_select spread : Elapsed 0.055 ms (5.496 ms / 100) 5.489 -> 5.494 ( +0.09%) [ +0.00% +0.13% +0.05% / +0.09% +0.13% +0.09%] index_select strided 3 : Elapsed 0.055 ms (5.489 ms / 100) 5.490 -> 5.493 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.05% +0.11%] index_select strided 5 : Elapsed 0.055 ms (5.493 ms / 100) 5.492 -> 5.490 ( -0.04%) [ +0.02% +0.02% +0.00% / -0.04% -0.02% +0.13%] index_select strided 7 : Elapsed 0.055 ms (5.493 ms / 100) 5.487 -> 5.489 ( +0.04%) [ +0.15% +0.00% +0.11% / +0.04% +0.07% +0.09%] index_select strided 8 : Elapsed 0.055 ms (5.495 ms / 100) 5.488 -> 5.487 ( -0.02%) [ +0.00% +0.05% +0.05% / -0.02% +0.16% +0.05%] index_select random : Elapsed 0.055 ms (5.488 ms / 100) 5.488 -> 5.491 ( +0.05%) [ +0.00% +0.05% +0.02% / +0.05% +0.11% +0.07%] index_select random_sorted : Elapsed 0.055 ms (5.488 ms / 100) B = [40, 4, 20, 5] (stride (80, 20, 1, 3200)) A = [16, 4, 20, 5] (stride (400, 1, 4, 80)) dim = 0 3.746 -> 3.751 ( +0.13%) [ +0.03% +0.00% +0.03% / +0.13% +0.75% +0.85%] index_add_ linear : Elapsed 0.037 ms (3.747 ms / 100) 3.608 -> 3.618 ( +0.28%) [ +0.11% +0.14% +0.00% / +0.28% +0.78% +0.83%] index_copy_ linear : Elapsed 0.036 ms (3.612 ms / 100) 3.747 -> 3.749 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.91% +0.88%] index_add_ reverse : Elapsed 0.037 ms (3.749 ms / 100) 3.616 -> 3.628 ( +0.33%) [ +0.03% +0.00% +0.19% / +0.33% +1.00% +0.94%] index_copy_ reverse : Elapsed 0.036 ms (3.617 ms / 100) 3.756 -> 3.764 ( +0.21%) [ +0.00% +0.00% +0.03% / +0.21% +0.72% +0.69%] index_add_ spread : Elapsed 0.038 ms (3.756 ms / 100) 3.612 -> 3.638 ( +0.72%) [ +0.17% +0.00% +0.14% / +0.72% +0.80% +0.75%] index_copy_ spread : Elapsed 0.036 ms (3.618 ms / 100) 3.755 -> 3.762 ( +0.19%) [ +0.05% +0.16% +0.00% / +0.19% +0.77% +0.96%] index_add_ strided 3 : Elapsed 0.038 ms (3.757 ms / 100) 3.621 -> 3.638 ( +0.47%) [ +0.06% +0.06% +0.00% / +0.47% +0.86% +1.10%] index_copy_ strided 3 : Elapsed 0.036 ms (3.623 ms / 100) 3.745 -> 3.752 ( +0.19%) [ +0.00% +0.08% +0.03% / +0.19% +0.93% +0.96%] index_add_ strided 7 : Elapsed 0.037 ms (3.745 ms / 100) 3.615 -> 3.639 ( +0.66%) [ +0.14% +0.00% +0.19% / +0.66% +1.00% +1.05%] index_copy_ strided 7 : Elapsed 0.036 ms (3.620 ms / 100) 3.747 -> 3.751 ( +0.11%) [ +0.00% +0.00% +0.03% / +0.11% +0.69% +0.64%] index_add_ perm : Elapsed 0.037 ms (3.747 ms / 100) 3.609 -> 3.613 ( +0.11%) [ +0.00% +0.08% +0.03% / +0.11% +0.86% +0.86%] index_copy_ perm : Elapsed 0.036 ms (3.609 ms / 100) 3.762 -> 3.765 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.61% +0.77%] index_add_ perm_sorted : Elapsed 0.038 ms (3.765 ms / 100) 3.623 -> 3.636 ( +0.36%) [ +0.03% +0.06% +0.00% / +0.36% +0.77% +0.86%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.624 ms / 100) 5.468 -> 5.464 ( -0.07%) [ +0.15% +0.15% +0.00% / +0.00% +0.04% -0.07%] index_select const : Elapsed 0.055 ms (5.476 ms / 100) 5.471 -> 5.466 ( -0.09%) [ +0.09% +0.15% +0.00% / +0.02% -0.09% +0.11%] index_select wrap : Elapsed 0.055 ms (5.476 ms / 100) 5.476 -> 5.473 ( -0.05%) [ +0.00% +0.04% +0.00% / -0.05% -0.04% +0.00%] index_select linear : Elapsed 0.055 ms (5.476 ms / 100) 5.472 -> 5.477 ( +0.09%) [ +0.00% +0.13% +0.04% / +0.09% +0.18% +0.11%] index_select reverse : Elapsed 0.055 ms (5.472 ms / 100) 5.467 -> 5.467 ( +0.00%) [ +0.04% +0.11% +0.00% / +0.00% +0.09% +0.20%] index_select skip64 : Elapsed 0.055 ms (5.469 ms / 100) 5.465 -> 5.473 ( +0.15%) [ +0.00% +0.18% +0.09% / +0.15% +0.15% +0.24%] index_select skip256 : Elapsed 0.055 ms (5.465 ms / 100) 5.474 -> 5.475 ( +0.02%) [ +0.00% +0.04% +0.04% / +0.11% +0.02% +0.04%] index_select spread : Elapsed 0.055 ms (5.474 ms / 100) 5.475 -> 5.476 ( +0.02%) [ +0.09% +0.00% +0.00% / +0.02% +0.04% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.480 ms / 100) 5.472 -> 5.469 ( -0.05%) [ +0.00% +0.05% +0.16% / +0.02% +0.02% -0.05%] index_select strided 5 : Elapsed 0.055 ms (5.472 ms / 100) 5.469 -> 5.466 ( -0.05%) [ +0.00% +0.13% +0.16% / -0.05% +0.24% +0.26%] index_select strided 7 : Elapsed 0.055 ms (5.469 ms / 100) 5.466 -> 5.472 ( +0.11%) [ +0.05% +0.02% +0.00% / +0.11% +0.29% +0.26%] index_select strided 8 : Elapsed 0.055 ms (5.469 ms / 100) 5.476 -> 5.472 ( -0.07%) [ +0.09% +0.02% +0.00% / +0.05% -0.07% -0.05%] index_select random : Elapsed 0.055 ms (5.481 ms / 100) 5.475 -> 5.471 ( -0.07%) [ +0.00% +0.04% +0.02% / -0.07% +0.00% -0.04%] index_select random_sorted : Elapsed 0.055 ms (5.475 ms / 100) B = [40, 4, 20, 5] (stride (80, 1, 4, 3200)) A = [16, 4, 20, 5] (stride (20, 1, 320, 4)) dim = 0 4.107 -> 4.110 ( +0.07%) [ +0.05% +0.00% +0.00% / +0.07% +0.51% +0.49%] index_add_ linear : Elapsed 0.041 ms (4.109 ms / 100) 3.966 -> 3.971 ( +0.13%) [ +0.03% +0.03% +0.00% / +0.13% +0.58% +0.50%] index_copy_ linear : Elapsed 0.040 ms (3.967 ms / 100) 4.111 -> 4.112 ( +0.02%) [ +0.02% +0.07% +0.00% / +0.02% +0.41% +0.46%] index_add_ reverse : Elapsed 0.041 ms (4.112 ms / 100) 3.968 -> 3.971 ( +0.08%) [ +0.10% +0.15% +0.00% / +0.08% +0.60% +0.63%] index_copy_ reverse : Elapsed 0.040 ms (3.972 ms / 100) 4.111 -> 4.111 ( +0.00%) [ +0.00% +0.12% +0.00% / +0.00% +0.54% +0.51%] index_add_ spread : Elapsed 0.041 ms (4.111 ms / 100) 3.972 -> 3.980 ( +0.20%) [ +0.08% +0.15% +0.00% / +0.20% +0.58% +0.60%] index_copy_ spread : Elapsed 0.040 ms (3.975 ms / 100) 4.127 -> 4.122 ( -0.12%) [ +0.00% +0.07% +0.02% / -0.12% +0.48% +0.41%] index_add_ strided 3 : Elapsed 0.041 ms (4.127 ms / 100) 3.981 -> 3.983 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.43% +0.45%] index_copy_ strided 3 : Elapsed 0.040 ms (3.981 ms / 100) 4.108 -> 4.112 ( +0.10%) [ +0.00% +0.07% +0.05% / +0.10% +0.61% +0.63%] index_add_ strided 7 : Elapsed 0.041 ms (4.108 ms / 100) 3.971 -> 3.972 ( +0.03%) [ +0.00% +0.03% +0.05% / +0.03% +0.50% +0.53%] index_copy_ strided 7 : Elapsed 0.040 ms (3.971 ms / 100) 4.106 -> 4.108 ( +0.05%) [ +0.00% +0.12% +0.02% / +0.05% +0.58% +0.56%] index_add_ perm : Elapsed 0.041 ms (4.106 ms / 100) 3.968 -> 3.967 ( -0.03%) [ +0.00% +0.05% +0.03% / -0.03% +0.50% +0.50%] index_copy_ perm : Elapsed 0.040 ms (3.968 ms / 100) 4.109 -> 4.116 ( +0.17%) [ +0.07% +0.05% +0.00% / +0.17% +0.41% +0.46%] index_add_ perm_sorted : Elapsed 0.041 ms (4.112 ms / 100) 3.967 -> 3.977 ( +0.25%) [ +0.08% +0.00% +0.03% / +0.25% +0.43% +0.45%] index_copy_ perm_sorted : Elapsed 0.040 ms (3.970 ms / 100) 5.557 -> 5.564 ( +0.13%) [ +0.14% +0.05% +0.00% / +0.13% +0.23% +0.32%] index_select const : Elapsed 0.056 ms (5.565 ms / 100) 5.562 -> 5.560 ( -0.04%) [ +0.09% +0.02% +0.00% / -0.04% +0.36% +0.27%] index_select wrap : Elapsed 0.056 ms (5.567 ms / 100) 5.561 -> 5.564 ( +0.05%) [ +0.00% +0.11% +0.11% / +0.07% +0.32% +0.05%] index_select linear : Elapsed 0.056 ms (5.561 ms / 100) 5.568 -> 5.570 ( +0.04%) [ +0.07% +0.00% +0.05% / +0.04% +0.11% +0.20%] index_select reverse : Elapsed 0.056 ms (5.572 ms / 100) 5.563 -> 5.561 ( -0.04%) [ +0.20% +0.00% +0.11% / +0.09% +0.00% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.574 ms / 100) 5.565 -> 5.557 ( -0.14%) [ +0.00% +0.11% +0.13% / +0.00% -0.07% -0.14%] index_select skip256 : Elapsed 0.056 ms (5.565 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.00% +0.05% +0.02% / +0.07% -0.02% +0.04%] index_select spread : Elapsed 0.056 ms (5.566 ms / 100) 5.563 -> 5.559 ( -0.07%) [ +0.11% +0.00% +0.05% / +0.18% -0.07% +0.13%] index_select strided 3 : Elapsed 0.056 ms (5.569 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.00% +0.23% +0.18% / +0.27% +0.27% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.562 ms / 100) 5.561 -> 5.567 ( +0.11%) [ +0.13% +0.00% +0.05% / +0.11% +0.14% +0.18%] index_select strided 7 : Elapsed 0.056 ms (5.568 ms / 100) 5.560 -> 5.564 ( +0.07%) [ +0.09% +0.02% +0.00% / +0.16% +0.07% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.565 ms / 100) 5.563 -> 5.561 ( -0.04%) [ +0.18% +0.16% +0.00% / +0.43% -0.04% +0.16%] index_select random : Elapsed 0.056 ms (5.573 ms / 100) 5.565 -> 5.564 ( -0.02%) [ +0.07% +0.00% +0.04% / +0.36% -0.02% +0.14%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [40, 4, 20, 5] (stride (20, 800, 1, 3200)) A = [16, 4, 20, 5] (stride (5, 80, 320, 1)) dim = 0 4.063 -> 4.072 ( +0.22%) [ +0.02% +0.00% +0.02% / +0.22% +0.71% +0.57%] index_add_ linear : Elapsed 0.041 ms (4.064 ms / 100) 3.927 -> 3.947 ( +0.51%) [ +0.08% +0.00% +0.03% / +0.51% +0.76% +0.69%] index_copy_ linear : Elapsed 0.039 ms (3.930 ms / 100) 4.048 -> 4.056 ( +0.20%) [ +0.15% +0.07% +0.00% / +0.20% +0.89% +0.84%] index_add_ reverse : Elapsed 0.041 ms (4.054 ms / 100) 3.917 -> 3.928 ( +0.28%) [ +0.13% +0.03% +0.00% / +0.28% +0.87% +1.15%] index_copy_ reverse : Elapsed 0.039 ms (3.922 ms / 100) 4.047 -> 4.046 ( -0.02%) [ +0.10% +0.00% +0.05% / -0.02% +0.84% +0.84%] index_add_ spread : Elapsed 0.041 ms (4.051 ms / 100) 3.917 -> 3.933 ( +0.41%) [ +0.15% +0.18% +0.00% / +0.41% +0.92% +0.92%] index_copy_ spread : Elapsed 0.039 ms (3.923 ms / 100) 4.063 -> 4.076 ( +0.32%) [ +0.00% +0.07% +0.07% / +0.32% +0.64% +0.64%] index_add_ strided 3 : Elapsed 0.041 ms (4.063 ms / 100) 3.927 -> 3.950 ( +0.59%) [ +0.03% +0.05% +0.00% / +0.59% +0.71% +0.87%] index_copy_ strided 3 : Elapsed 0.039 ms (3.928 ms / 100) 4.049 -> 4.057 ( +0.20%) [ +0.10% +0.02% +0.00% / +0.20% +0.79% +0.91%] index_add_ strided 7 : Elapsed 0.041 ms (4.053 ms / 100) 3.917 -> 3.932 ( +0.38%) [ +0.00% +0.13% +0.05% / +0.38% +0.79% +0.94%] index_copy_ strided 7 : Elapsed 0.039 ms (3.917 ms / 100) 4.057 -> 4.060 ( +0.07%) [ +0.00% +0.15% +0.02% / +0.07% +0.71% +0.99%] index_add_ perm : Elapsed 0.041 ms (4.057 ms / 100) 3.925 -> 3.927 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.84% +0.94%] index_copy_ perm : Elapsed 0.039 ms (3.928 ms / 100) 4.059 -> 4.063 ( +0.10%) [ +0.00% +0.02% +0.05% / +0.10% +0.89% +0.89%] index_add_ perm_sorted : Elapsed 0.041 ms (4.059 ms / 100) 3.924 -> 3.928 ( +0.10%) [ +0.05% +0.00% +0.00% / +0.10% +0.94% +0.92%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.926 ms / 100) 5.563 -> 5.558 ( -0.09%) [ +0.13% +0.00% +0.05% / +0.14% -0.09% +0.00%] index_select const : Elapsed 0.056 ms (5.570 ms / 100) 5.574 -> 5.562 ( -0.22%) [ +0.05% +0.00% +0.13% / +0.05% -0.14% -0.22%] index_select wrap : Elapsed 0.056 ms (5.577 ms / 100) 5.575 -> 5.567 ( -0.14%) [ +0.07% +0.00% +0.07% / +0.16% -0.04% -0.14%] index_select linear : Elapsed 0.056 ms (5.579 ms / 100) 5.574 -> 5.566 ( -0.14%) [ +0.00% +0.09% +0.05% / +0.05% +0.00% -0.14%] index_select reverse : Elapsed 0.056 ms (5.574 ms / 100) 5.563 -> 5.564 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.05% +0.02% +0.13%] index_select skip64 : Elapsed 0.056 ms (5.566 ms / 100) 5.559 -> 5.556 ( -0.05%) [ +0.00% +0.04% +0.14% / +0.13% -0.05% +0.11%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.573 -> 5.560 ( -0.23%) [ +0.04% +0.00% +0.02% / +0.02% -0.20% -0.23%] index_select spread : Elapsed 0.056 ms (5.575 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.14% +0.09% +0.00% / +0.18% +0.02% +0.00%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.568 -> 5.566 ( -0.04%) [ +0.04% +0.00% +0.20% / +0.05% -0.04% +0.11%] index_select strided 5 : Elapsed 0.056 ms (5.570 ms / 100) 5.567 -> 5.569 ( +0.04%) [ +0.02% +0.05% +0.00% / +0.13% +0.14% +0.04%] index_select strided 7 : Elapsed 0.056 ms (5.568 ms / 100) 5.557 -> 5.562 ( +0.09%) [ +0.25% +0.11% +0.00% / +0.09% +0.22% +0.13%] index_select strided 8 : Elapsed 0.056 ms (5.571 ms / 100) 5.574 -> 5.571 ( -0.05%) [ +0.02% +0.11% +0.00% / +0.09% -0.05% -0.05%] index_select random : Elapsed 0.056 ms (5.575 ms / 100) 5.571 -> 5.568 ( -0.05%) [ +0.16% +0.00% +0.00% / +0.09% -0.05% +0.00%] index_select random_sorted : Elapsed 0.056 ms (5.580 ms / 100) out_shape = [16, 40, 20, 5] in_shape = [16, 4, 20, 5] idx_dim = 1 B = [16, 40, 20, 5] (stride (100, 1600, 5, 1)) A = [16, 4, 20, 5] (stride (20, 1600, 1, 320)) dim = 1 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.08% +0.16% / +0.00% +0.41% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.00% +0.08% +0.00% / +0.17% +0.59% +0.50%] index_copy_ linear : Elapsed 0.012 ms (1.192 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.49% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.231 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.17% +0.00% / +0.08% +0.67% +0.67%] index_copy_ reverse : Elapsed 0.012 ms (1.191 ms / 100) 1.230 -> 1.231 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.57% +0.65%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.67%] index_copy_ spread : Elapsed 0.012 ms (1.191 ms / 100) 1.229 -> 1.230 ( +0.08%) [ +0.16% +0.24% +0.00% / +0.08% +0.73% +0.73%] index_add_ strided 3 : Elapsed 0.012 ms (1.231 ms / 100) 1.190 -> 1.189 ( -0.08%) [ +0.17% +0.08% +0.00% / -0.08% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.012 ms (1.192 ms / 100) 1.229 -> 1.234 ( +0.41%) [ +0.16% +0.08% +0.00% / +0.41% +0.65% +0.73%] index_add_ strided 7 : Elapsed 0.012 ms (1.231 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.84% +0.76%] index_copy_ strided 7 : Elapsed 0.012 ms (1.191 ms / 100) 1.230 -> 1.232 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.65% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.84% +0.67%] index_copy_ perm : Elapsed 0.012 ms (1.191 ms / 100) 1.230 -> 1.231 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.57% +0.65%] index_add_ perm_sorted : Elapsed 0.012 ms (1.230 ms / 100) 1.190 -> 1.191 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.76% +0.67%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.191 ms / 100) 8.678 -> 8.663 ( -0.17%) [ +0.06% +0.14% +0.00% / +0.17% +0.12% -0.17%] index_select const : Elapsed 0.087 ms (8.683 ms / 100) 8.713 -> 8.709 ( -0.05%) [ +0.11% +0.00% +0.03% / +0.15% -0.02% -0.05%] index_select wrap : Elapsed 0.087 ms (8.723 ms / 100) 8.693 -> 8.701 ( +0.09%) [ +0.13% +0.00% +0.06% / +0.09% +0.33% +0.18%] index_select linear : Elapsed 0.087 ms (8.704 ms / 100) 8.707 -> 8.687 ( -0.23%) [ +0.00% +0.01% +0.14% / +0.03% -0.23% -0.23%] index_select reverse : Elapsed 0.087 ms (8.707 ms / 100) 8.678 -> 8.677 ( -0.01%) [ +0.08% +0.12% +0.00% / +0.10% -0.01% +0.10%] index_select skip64 : Elapsed 0.087 ms (8.685 ms / 100) 8.672 -> 8.676 ( +0.05%) [ +0.13% +0.05% +0.00% / +0.05% +0.10% +0.06%] index_select skip256 : Elapsed 0.087 ms (8.683 ms / 100) 8.702 -> 8.701 ( -0.01%) [ +0.00% +0.01% +0.07% / +0.08% -0.01% +0.01%] index_select spread : Elapsed 0.087 ms (8.702 ms / 100) 8.701 -> 8.699 ( -0.02%) [ +0.03% +0.00% +0.09% / +0.01% -0.02% +0.20%] index_select strided 3 : Elapsed 0.087 ms (8.704 ms / 100) 8.710 -> 8.687 ( -0.26%) [ +0.00% +0.06% +0.01% / -0.26% -0.20% -0.18%] index_select random : Elapsed 0.087 ms (8.710 ms / 100) 8.701 -> 8.706 ( +0.06%) [ +0.08% +0.15% +0.00% / +0.06% +0.18% +0.10%] index_select random_sorted : Elapsed 0.087 ms (8.708 ms / 100) B = [16, 40, 20, 5] (stride (5, 80, 3200, 1)) A = [16, 4, 20, 5] (stride (20, 1, 320, 4)) dim = 1 1.335 -> 1.334 ( -0.07%) [ +0.07% +0.15% +0.00% / -0.07% +0.67% +0.60%] index_add_ linear : Elapsed 0.013 ms (1.336 ms / 100) 1.294 -> 1.295 ( +0.08%) [ +0.08% +0.15% +0.00% / +0.08% +0.70% +0.70%] index_copy_ linear : Elapsed 0.013 ms (1.295 ms / 100) 1.336 -> 1.337 ( +0.07%) [ +0.00% +0.15% +0.07% / +0.07% +0.52% +0.22%] index_add_ reverse : Elapsed 0.013 ms (1.336 ms / 100) 1.294 -> 1.300 ( +0.46%) [ +0.00% +0.15% +0.08% / +0.46% +0.54% +0.46%] index_copy_ reverse : Elapsed 0.013 ms (1.294 ms / 100) 1.334 -> 1.336 ( +0.15%) [ +0.30% +0.15% +0.00% / +0.15% +0.30% +0.37%] index_add_ spread : Elapsed 0.013 ms (1.338 ms / 100) 1.294 -> 1.295 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.39% +0.39%] index_copy_ spread : Elapsed 0.013 ms (1.294 ms / 100) 1.337 -> 1.344 ( +0.52%) [ +0.67% +0.22% +0.00% / +0.52% +0.60% +0.97%] index_add_ strided 3 : Elapsed 0.013 ms (1.346 ms / 100) 1.298 -> 1.304 ( +0.46%) [ +0.31% +0.31% +0.00% / +0.46% +0.69% +0.77%] index_copy_ strided 3 : Elapsed 0.013 ms (1.302 ms / 100) 1.338 -> 1.338 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.45% +0.45%] index_add_ strided 7 : Elapsed 0.013 ms (1.339 ms / 100) 1.293 -> 1.294 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.46% +0.39%] index_copy_ strided 7 : Elapsed 0.013 ms (1.293 ms / 100) 1.334 -> 1.335 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.37% +0.60%] index_add_ perm : Elapsed 0.013 ms (1.335 ms / 100) 1.294 -> 1.293 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.39% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.294 ms / 100) 1.337 -> 1.334 ( -0.22%) [ +0.07% +0.07% +0.00% / -0.22% +0.30% +0.15%] index_add_ perm_sorted : Elapsed 0.013 ms (1.338 ms / 100) 1.296 -> 1.295 ( -0.08%) [ +0.08% +0.00% +0.15% / -0.08% +0.46% +0.23%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.297 ms / 100) 9.170 -> 9.170 ( +0.00%) [ +0.00% +0.01% +0.11% / +0.10% +0.12% +0.00%] index_select const : Elapsed 0.092 ms (9.170 ms / 100) 9.166 -> 9.167 ( +0.01%) [ +0.09% +0.13% +0.00% / +0.01% +0.23% +0.24%] index_select wrap : Elapsed 0.092 ms (9.174 ms / 100) 9.169 -> 9.179 ( +0.11%) [ +0.00% +0.24% +0.09% / +0.21% +0.11% +0.47%] index_select linear : Elapsed 0.092 ms (9.169 ms / 100) 9.167 -> 9.160 ( -0.08%) [ +0.14% +0.11% +0.00% / +0.17% -0.08% +0.16%] index_select reverse : Elapsed 0.092 ms (9.180 ms / 100) 9.173 -> 9.173 ( +0.00%) [ +0.23% +0.00% +0.07% / +0.00% +0.35% +0.09%] index_select skip64 : Elapsed 0.092 ms (9.194 ms / 100) 9.176 -> 9.173 ( -0.03%) [ +0.00% +0.12% +0.29% / -0.03% +0.07% -0.03%] index_select skip256 : Elapsed 0.092 ms (9.176 ms / 100) 9.166 -> 9.173 ( +0.08%) [ +0.33% +0.29% +0.00% / +0.36% +0.08% +0.16%] index_select spread : Elapsed 0.092 ms (9.196 ms / 100) 9.167 -> 9.165 ( -0.02%) [ +0.00% +0.28% +0.15% / +0.24% -0.02% +0.22%] index_select strided 3 : Elapsed 0.092 ms (9.167 ms / 100) 9.173 -> 9.182 ( +0.10%) [ +0.00% +0.05% +0.19% / +0.10% +0.20% +0.36%] index_select random : Elapsed 0.092 ms (9.173 ms / 100) 9.173 -> 9.180 ( +0.08%) [ +0.00% +0.05% +0.04% / +0.28% +0.22% +0.08%] index_select random_sorted : Elapsed 0.092 ms (9.173 ms / 100) B = [16, 40, 20, 5] (stride (1, 16, 3200, 640)) A = [16, 4, 20, 5] (stride (20, 1600, 1, 320)) dim = 1 1.232 -> 1.233 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.57% +0.32%] index_add_ linear : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_copy_ linear : Elapsed 0.012 ms (1.194 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.49% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.233 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.59% +0.50%] index_copy_ reverse : Elapsed 0.012 ms (1.193 ms / 100) 1.232 -> 1.231 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.41% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.67% +0.75%] index_copy_ spread : Elapsed 0.012 ms (1.193 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.49% +0.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.012 ms (1.192 ms / 100) 1.230 -> 1.236 ( +0.49%) [ +0.16% +0.16% +0.00% / +0.49% +0.65% +0.65%] index_add_ strided 7 : Elapsed 0.012 ms (1.232 ms / 100) 1.191 -> 1.197 ( +0.50%) [ +0.08% +0.08% +0.00% / +0.50% +0.76% +0.92%] index_copy_ strided 7 : Elapsed 0.012 ms (1.192 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.57% +0.57%] index_add_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.08% +0.00% +0.17% / +0.17% +0.92% +0.84%] index_copy_ perm : Elapsed 0.012 ms (1.193 ms / 100) 1.230 -> 1.230 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.65% +0.65%] index_add_ perm_sorted : Elapsed 0.012 ms (1.231 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.67% +0.59%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.192 ms / 100) 8.710 -> 8.706 ( -0.05%) [ +0.00% +0.02% +0.06% / -0.05% +0.11% +0.36%] index_select const : Elapsed 0.087 ms (8.710 ms / 100) 8.738 -> 8.756 ( +0.21%) [ +0.00% +0.23% +0.11% / +0.32% +0.31% +0.21%] index_select wrap : Elapsed 0.087 ms (8.738 ms / 100) 8.730 -> 8.732 ( +0.02%) [ +0.08% +0.00% +0.17% / +0.02% +0.41% +0.33%] index_select linear : Elapsed 0.087 ms (8.737 ms / 100) 8.737 -> 8.738 ( +0.01%) [ +0.00% +0.06% +0.10% / +0.01% +0.22% +0.15%] index_select reverse : Elapsed 0.087 ms (8.737 ms / 100) 8.711 -> 8.714 ( +0.03%) [ +0.00% +0.02% +0.07% / +0.03% +0.14% +0.42%] index_select skip64 : Elapsed 0.087 ms (8.711 ms / 100) 8.697 -> 8.708 ( +0.13%) [ +0.00% +0.23% +0.18% / +0.13% +0.37% +0.46%] index_select skip256 : Elapsed 0.087 ms (8.697 ms / 100) 8.737 -> 8.739 ( +0.02%) [ +0.00% +0.16% +0.16% / +0.02% +0.38% +0.27%] index_select spread : Elapsed 0.087 ms (8.737 ms / 100) 8.730 -> 8.748 ( +0.21%) [ +0.45% +0.06% +0.00% / +0.21% +0.36% +0.48%] index_select strided 3 : Elapsed 0.088 ms (8.769 ms / 100) 8.742 -> 8.737 ( -0.06%) [ +0.06% +0.00% +0.05% / -0.06% +0.19% +0.15%] index_select random : Elapsed 0.087 ms (8.747 ms / 100) 8.716 -> 8.743 ( +0.31%) [ +0.26% +0.00% +0.14% / +0.41% +0.52% +0.31%] index_select random_sorted : Elapsed 0.087 ms (8.739 ms / 100) B = [16, 40, 20, 5] (stride (40, 1, 640, 12800)) A = [16, 4, 20, 5] (stride (100, 1600, 1, 20)) dim = 1 1.315 -> 1.317 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +0.30% +0.30%] index_add_ linear : Elapsed 0.013 ms (1.315 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.08% +0.63% +0.47%] index_copy_ linear : Elapsed 0.013 ms (1.276 ms / 100) 1.315 -> 1.319 ( +0.30%) [ +0.38% +0.15% +0.00% / +0.38% +0.30% +0.30%] index_add_ reverse : Elapsed 0.013 ms (1.320 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.39% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.326 -> 1.328 ( +0.15%) [ +0.23% +0.08% +0.00% / +0.23% +0.23% +0.15%] index_add_ spread : Elapsed 0.013 ms (1.329 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.00% +0.31% +0.00% / +0.08% +0.08% +0.23%] index_copy_ spread : Elapsed 0.013 ms (1.287 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.00% +0.15% +0.00% / +0.15% +0.38% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.318 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.23% +0.08% / +0.08% +0.47% +0.47%] index_copy_ strided 3 : Elapsed 0.013 ms (1.280 ms / 100) 1.319 -> 1.321 ( +0.15%) [ +0.00% +0.08% +0.15% / +0.15% +0.61% +0.38%] index_add_ strided 7 : Elapsed 0.013 ms (1.319 ms / 100) 1.281 -> 1.286 ( +0.39%) [ +0.08% +0.16% +0.00% / +0.39% +0.47% +0.47%] index_copy_ strided 7 : Elapsed 0.013 ms (1.282 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.30% +0.45%] index_add_ perm : Elapsed 0.013 ms (1.322 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.31% +0.16% +0.00% / +0.00% +0.39% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.285 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.45% +0.23%] index_add_ perm_sorted : Elapsed 0.013 ms (1.321 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.16% +0.16%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.284 ms / 100) 9.222 -> 9.233 ( +0.12%) [ +0.08% +0.00% +0.08% / +0.12% +0.33% +0.13%] index_select const : Elapsed 0.092 ms (9.229 ms / 100) 9.252 -> 9.242 ( -0.11%) [ +0.17% +0.17% +0.00% / +0.16% -0.01% -0.11%] index_select wrap : Elapsed 0.093 ms (9.268 ms / 100) 9.237 -> 9.237 ( +0.00%) [ +0.06% +0.00% +0.12% / +0.01% +0.03% +0.00%] index_select linear : Elapsed 0.092 ms (9.243 ms / 100) 9.235 -> 9.238 ( +0.03%) [ +0.00% +0.06% +0.12% / +0.14% +0.22% +0.03%] index_select reverse : Elapsed 0.092 ms (9.235 ms / 100) 9.209 -> 9.213 ( +0.04%) [ +0.05% +0.11% +0.00% / +0.04% +0.49% +0.18%] index_select skip64 : Elapsed 0.092 ms (9.214 ms / 100) 9.214 -> 9.223 ( +0.10%) [ +0.14% +0.00% +0.02% / +0.10% +0.28% +0.15%] index_select skip256 : Elapsed 0.092 ms (9.227 ms / 100) 9.237 -> 9.244 ( +0.08%) [ +0.00% +0.17% +0.15% / +0.45% +0.08% +0.16%] index_select spread : Elapsed 0.092 ms (9.237 ms / 100) 9.256 -> 9.254 ( -0.02%) [ +0.04% +0.00% +0.18% / +0.15% -0.02% +0.13%] index_select strided 3 : Elapsed 0.093 ms (9.260 ms / 100) 9.251 -> 9.255 ( +0.04%) [ +0.00% +0.13% +0.22% / +0.05% +0.04% +0.16%] index_select random : Elapsed 0.093 ms (9.251 ms / 100) 9.250 -> 9.257 ( +0.08%) [ +0.00% +0.12% +0.25% / +0.12% +0.08% +0.13%] index_select random_sorted : Elapsed 0.092 ms (9.250 ms / 100) B = [16, 40, 20, 5] (stride (40, 1, 640, 12800)) A = [16, 4, 20, 5] (stride (4, 1, 64, 1280)) dim = 1 1.411 -> 1.415 ( +0.28%) [ +0.07% +0.00% +0.21% / +0.28% +0.78% +0.78%] index_add_ linear : Elapsed 0.014 ms (1.412 ms / 100) 1.370 -> 1.374 ( +0.29%) [ +0.00% +0.07% +0.29% / +0.29% +0.88% +0.88%] index_copy_ linear : Elapsed 0.014 ms (1.370 ms / 100) 1.411 -> 1.415 ( +0.28%) [ +0.14% +0.00% +0.07% / +0.28% +0.50% +0.78%] index_add_ reverse : Elapsed 0.014 ms (1.413 ms / 100) 1.368 -> 1.371 ( +0.22%) [ +0.15% +0.07% +0.00% / +0.22% +0.80% +0.95%] index_copy_ reverse : Elapsed 0.014 ms (1.370 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.07% +0.14%] index_add_ spread : Elapsed 0.014 ms (1.423 ms / 100) 1.378 -> 1.381 ( +0.22%) [ +0.73% +0.00% +0.07% / +0.22% +0.36% +0.36%] index_copy_ spread : Elapsed 0.014 ms (1.388 ms / 100) 1.415 -> 1.414 ( -0.07%) [ +0.00% +0.21% +0.00% / -0.07% +0.57% +0.49%] index_add_ strided 3 : Elapsed 0.014 ms (1.415 ms / 100) 1.372 -> 1.373 ( +0.07%) [ +0.07% +0.22% +0.00% / +0.07% +0.73% +0.80%] index_copy_ strided 3 : Elapsed 0.014 ms (1.373 ms / 100) 1.415 -> 1.413 ( -0.14%) [ +0.14% +0.21% +0.00% / -0.14% +0.57% +0.57%] index_add_ strided 7 : Elapsed 0.014 ms (1.417 ms / 100) 1.372 -> 1.370 ( -0.15%) [ +0.00% +0.15% +0.00% / -0.15% +0.73% +0.80%] index_copy_ strided 7 : Elapsed 0.014 ms (1.372 ms / 100) 1.414 -> 1.414 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.64% +0.57%] index_add_ perm : Elapsed 0.014 ms (1.415 ms / 100) 1.372 -> 1.374 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.80% +0.58%] index_copy_ perm : Elapsed 0.014 ms (1.373 ms / 100) 1.414 -> 1.415 ( +0.07%) [ +0.21% +0.00% +0.21% / +0.07% +0.64% +0.57%] index_add_ perm_sorted : Elapsed 0.014 ms (1.417 ms / 100) 1.370 -> 1.371 ( +0.07%) [ +0.29% +0.00% +0.15% / +0.07% +0.80% +0.66%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.374 ms / 100) 9.271 -> 9.270 ( -0.01%) [ +0.00% +0.14% +0.06% / -0.01% +0.08% +0.14%] index_select const : Elapsed 0.093 ms (9.271 ms / 100) 9.265 -> 9.282 ( +0.18%) [ +0.02% +0.00% +0.19% / +0.30% +0.18% +0.31%] index_select wrap : Elapsed 0.093 ms (9.267 ms / 100) 9.264 -> 9.269 ( +0.05%) [ +0.08% +0.10% +0.00% / +0.26% +0.10% +0.05%] index_select linear : Elapsed 0.093 ms (9.271 ms / 100) 9.269 -> 9.273 ( +0.04%) [ +0.02% +0.00% +0.06% / +0.04% +0.20% +0.05%] index_select reverse : Elapsed 0.093 ms (9.271 ms / 100) 9.266 -> 9.279 ( +0.14%) [ +0.01% +0.00% +0.11% / +0.14% +0.22% +0.22%] index_select skip64 : Elapsed 0.093 ms (9.267 ms / 100) 9.258 -> 9.275 ( +0.18%) [ +0.00% +0.09% +0.11% / +0.18% +0.49% +0.33%] index_select skip256 : Elapsed 0.093 ms (9.258 ms / 100) 9.268 -> 9.264 ( -0.04%) [ +0.13% +0.13% +0.00% / -0.04% +0.08% +0.23%] index_select spread : Elapsed 0.093 ms (9.280 ms / 100) 9.264 -> 9.267 ( +0.03%) [ +0.00% +0.09% +0.15% / +0.03% +0.17% +0.05%] index_select strided 3 : Elapsed 0.093 ms (9.264 ms / 100) 9.259 -> 9.263 ( +0.04%) [ +0.00% +0.16% +0.16% / +0.23% +0.18% +0.04%] index_select random : Elapsed 0.093 ms (9.259 ms / 100) 9.273 -> 9.279 ( +0.06%) [ +0.04% +0.00% +0.12% / +0.06% +0.20% +0.24%] index_select random_sorted : Elapsed 0.093 ms (9.277 ms / 100) out_shape = [16, 4, 40, 5] in_shape = [16, 4, 20, 5] idx_dim = 2 B = [16, 4, 40, 5] (stride (5, 80, 320, 1)) dim = 2 fill_cnt = 20 2.881 -> 2.842 ( -1.35%) [ +0.03% +0.10% +0.00% / -1.35% -1.35% -1.32%] index_fill_ const : Elapsed 0.029 ms (2.882 ms / 100) 2.882 -> 2.844 ( -1.32%) [ +0.38% +0.42% +0.00% / -1.32% -1.32% -1.08%] index_fill_ linear : Elapsed 0.029 ms (2.893 ms / 100) 2.886 -> 2.846 ( -1.39%) [ +0.28% +0.00% +0.10% / -1.25% -1.32% -1.39%] index_fill_ reverse : Elapsed 0.029 ms (2.894 ms / 100) 2.881 -> 2.839 ( -1.46%) [ +0.07% +0.14% +0.00% / -1.32% -1.46% -1.39%] index_fill_ skip64 : Elapsed 0.029 ms (2.883 ms / 100) 2.877 -> 2.840 ( -1.29%) [ +0.00% +0.17% +0.35% / -1.29% -1.29% -0.97%] index_fill_ skip256 : Elapsed 0.029 ms (2.877 ms / 100) 2.888 -> 2.843 ( -1.56%) [ +0.00% +0.03% +0.07% / -1.18% -1.39% -1.56%] index_fill_ spread : Elapsed 0.029 ms (2.888 ms / 100) 2.886 -> 2.843 ( -1.49%) [ +0.03% +0.00% +0.14% / -1.35% -1.49% -1.28%] index_fill_ strided 3 : Elapsed 0.029 ms (2.887 ms / 100) 2.882 -> 2.841 ( -1.42%) [ +0.10% +0.21% +0.00% / -1.18% -1.15% -1.42%] index_fill_ strided 5 : Elapsed 0.029 ms (2.885 ms / 100) 2.889 -> 2.846 ( -1.49%) [ +0.21% +0.10% +0.00% / -1.49% -1.35% -1.45%] index_fill_ strided 7 : Elapsed 0.029 ms (2.895 ms / 100) 2.888 -> 2.837 ( -1.77%) [ +0.00% +0.00% +0.00% / -1.66% -1.77% -1.56%] index_fill_ strided 8 : Elapsed 0.029 ms (2.888 ms / 100) 2.880 -> 2.836 ( -1.53%) [ +0.10% +0.00% +0.21% / -1.39% -1.35% -1.53%] index_fill_ strided 16 : Elapsed 0.029 ms (2.883 ms / 100) 2.887 -> 2.844 ( -1.49%) [ +0.03% +0.00% +0.03% / -1.42% -1.18% -1.49%] index_fill_ random : Elapsed 0.029 ms (2.888 ms / 100) 2.884 -> 2.844 ( -1.39%) [ +0.14% +0.00% +0.10% / -1.39% -1.25% -1.32%] index_fill_ random_sorted : Elapsed 0.029 ms (2.888 ms / 100) 2.888 -> 2.844 ( -1.52%) [ +0.03% +0.10% +0.00% / -1.45% -1.52% -1.42%] index_fill_ perm : Elapsed 0.029 ms (2.889 ms / 100) 2.887 -> 2.842 ( -1.56%) [ +0.14% +0.10% +0.00% / -1.21% -1.52% -1.56%] index_fill_ perm_sorted : Elapsed 0.029 ms (2.891 ms / 100) B = [16, 4, 40, 5] (stride (5, 80, 320, 1)) A = [16, 4, 20, 5] (stride (400, 100, 5, 1)) dim = 2 2.406 -> 2.417 ( +0.46%) [ +0.00% +0.08% +0.12% / +0.46% +0.75% +0.71%] index_add_ linear : Elapsed 0.024 ms (2.406 ms / 100) 2.402 -> 2.417 ( +0.62%) [ +0.00% +0.04% +0.12% / +0.62% +0.92% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.402 ms / 100) 2.408 -> 2.417 ( +0.37%) [ +0.04% +0.17% +0.00% / +0.50% +0.62% +0.37%] index_add_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.406 -> 2.415 ( +0.37%) [ +0.12% +0.00% +0.08% / +0.37% +0.46% +0.50%] index_copy_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.413 -> 2.419 ( +0.25%) [ +0.00% +0.12% +0.00% / +0.66% +0.29% +0.25%] index_add_ spread : Elapsed 0.024 ms (2.413 ms / 100) 2.408 -> 2.418 ( +0.42%) [ +0.00% +0.17% +0.08% / +0.58% +0.42% +0.54%] index_copy_ spread : Elapsed 0.024 ms (2.408 ms / 100) 2.408 -> 2.416 ( +0.33%) [ +0.00% +0.17% +0.21% / +0.62% +0.33% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.408 ms / 100) 2.405 -> 2.415 ( +0.42%) [ +0.04% +0.00% +0.04% / +0.71% +0.54% +0.42%] index_copy_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.412 -> 2.419 ( +0.29%) [ +0.08% +0.00% +0.00% / +0.46% +0.29% +0.46%] index_add_ strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.404 -> 2.418 ( +0.58%) [ +0.17% +0.04% +0.00% / +0.62% +0.58% +0.58%] index_copy_ strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.409 -> 2.423 ( +0.58%) [ +0.12% +0.00% +0.00% / +0.75% +0.62% +0.58%] index_add_ perm : Elapsed 0.024 ms (2.412 ms / 100) 2.406 -> 2.418 ( +0.50%) [ +0.00% +0.17% +0.12% / +0.50% +0.71% +0.71%] index_copy_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.407 -> 2.425 ( +0.75%) [ +0.25% +0.17% +0.00% / +0.79% +0.79% +0.75%] index_add_ perm_sorted : Elapsed 0.024 ms (2.413 ms / 100) 2.405 -> 2.415 ( +0.42%) [ +0.08% +0.00% +0.08% / +0.42% +0.75% +0.87%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.407 ms / 100) 4.427 -> 4.430 ( +0.07%) [ +0.20% +0.00% +0.07% / +0.07% +0.20% +0.09%] index_select const : Elapsed 0.044 ms (4.436 ms / 100) 4.436 -> 4.443 ( +0.16%) [ +0.14% +0.00% +0.16% / +0.16% +0.20% +0.23%] index_select wrap : Elapsed 0.044 ms (4.442 ms / 100) 4.437 -> 4.442 ( +0.11%) [ +0.14% +0.07% +0.00% / +0.14% +0.11% +0.34%] index_select linear : Elapsed 0.044 ms (4.443 ms / 100) 4.437 -> 4.441 ( +0.09%) [ +0.18% +0.09% +0.00% / +0.09% +0.34% +0.23%] index_select reverse : Elapsed 0.044 ms (4.445 ms / 100) 4.430 -> 4.431 ( +0.02%) [ +0.07% +0.00% +0.07% / +0.02% +0.11% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.433 ms / 100) 4.428 -> 4.429 ( +0.02%) [ +0.00% +0.14% +0.14% / +0.02% +0.20% +0.29%] index_select skip256 : Elapsed 0.044 ms (4.428 ms / 100) 4.434 -> 4.441 ( +0.16%) [ +0.07% +0.20% +0.00% / +0.16% +0.18% +0.18%] index_select spread : Elapsed 0.044 ms (4.437 ms / 100) 4.437 -> 4.445 ( +0.18%) [ +0.11% +0.16% +0.00% / +0.18% +0.27% +0.20%] index_select strided 3 : Elapsed 0.044 ms (4.442 ms / 100) 4.430 -> 4.427 ( -0.07%) [ +0.14% +0.09% +0.00% / -0.07% +0.18% +0.11%] index_select strided 5 : Elapsed 0.044 ms (4.436 ms / 100) 4.436 -> 4.437 ( +0.02%) [ +0.02% +0.20% +0.00% / +0.02% +0.32% +0.23%] index_select strided 7 : Elapsed 0.044 ms (4.437 ms / 100) 4.431 -> 4.438 ( +0.16%) [ +0.14% +0.00% +0.07% / +0.16% +0.16% +0.18%] index_select strided 8 : Elapsed 0.044 ms (4.437 ms / 100) 4.434 -> 4.435 ( +0.02%) [ +0.05% +0.11% +0.00% / +0.16% +0.18% +0.02%] index_select strided 16 : Elapsed 0.044 ms (4.436 ms / 100) 4.436 -> 4.445 ( +0.20%) [ +0.16% +0.00% +0.07% / +0.23% +0.23% +0.20%] index_select random : Elapsed 0.044 ms (4.443 ms / 100) 4.438 -> 4.447 ( +0.20%) [ +0.05% +0.00% +0.14% / +0.20% +0.29% +0.27%] index_select random_sorted : Elapsed 0.044 ms (4.440 ms / 100) B = [16, 4, 40, 5] (stride (4, 1, 64, 2560)) A = [16, 4, 20, 5] (stride (20, 1, 320, 4)) dim = 2 2.400 -> 2.415 ( +0.63%) [ +0.21% +0.33% +0.00% / +0.63% +0.83% +0.83%] index_add_ linear : Elapsed 0.024 ms (2.405 ms / 100) 2.389 -> 2.403 ( +0.59%) [ +0.00% +0.08% +0.21% / +0.59% +0.96% +0.88%] index_copy_ linear : Elapsed 0.024 ms (2.389 ms / 100) 2.395 -> 2.410 ( +0.63%) [ +0.00% +0.13% +0.08% / +0.63% +1.25% +1.25%] index_add_ reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.387 -> 2.401 ( +0.59%) [ +0.00% +0.29% +0.00% / +0.59% +1.01% +1.26%] index_copy_ reverse : Elapsed 0.024 ms (2.387 ms / 100) 2.395 -> 2.413 ( +0.75%) [ +0.00% +0.21% +0.13% / +0.75% +1.29% +1.04%] index_add_ spread : Elapsed 0.024 ms (2.395 ms / 100) 2.384 -> 2.402 ( +0.76%) [ +0.04% +0.17% +0.00% / +0.76% +1.30% +1.34%] index_copy_ spread : Elapsed 0.024 ms (2.385 ms / 100) 2.410 -> 2.416 ( +0.25%) [ +0.00% +0.17% +0.00% / +0.33% +0.46% +0.25%] index_add_ strided 3 : Elapsed 0.024 ms (2.410 ms / 100) 2.391 -> 2.408 ( +0.71%) [ +0.17% +0.04% +0.00% / +0.71% +0.79% +0.79%] index_copy_ strided 3 : Elapsed 0.024 ms (2.395 ms / 100) 2.408 -> 2.415 ( +0.29%) [ +0.12% +0.17% +0.00% / +0.50% +0.54% +0.29%] index_add_ strided 7 : Elapsed 0.024 ms (2.411 ms / 100) 2.392 -> 2.408 ( +0.67%) [ +0.13% +0.08% +0.00% / +0.67% +0.79% +0.71%] index_copy_ strided 7 : Elapsed 0.024 ms (2.395 ms / 100) 2.408 -> 2.413 ( +0.21%) [ +0.37% +0.17% +0.00% / +0.54% +0.21% +0.25%] index_add_ perm : Elapsed 0.024 ms (2.417 ms / 100) 2.392 -> 2.404 ( +0.50%) [ +0.00% +0.25% +0.25% / +0.84% +0.67% +0.50%] index_copy_ perm : Elapsed 0.024 ms (2.392 ms / 100) 2.405 -> 2.415 ( +0.42%) [ +0.00% +0.08% +0.08% / +0.67% +0.42% +0.42%] index_add_ perm_sorted : Elapsed 0.024 ms (2.405 ms / 100) 2.393 -> 2.403 ( +0.42%) [ +0.00% +0.13% +0.04% / +0.63% +0.50% +0.42%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.393 ms / 100) 4.413 -> 4.417 ( +0.09%) [ +0.02% +0.00% +0.16% / +0.09% +0.27% +0.09%] index_select const : Elapsed 0.044 ms (4.414 ms / 100) 4.426 -> 4.420 ( -0.14%) [ +0.00% +0.05% +0.07% / +0.02% -0.14% -0.02%] index_select wrap : Elapsed 0.044 ms (4.426 ms / 100) 4.422 -> 4.422 ( +0.00%) [ +0.27% +0.18% +0.00% / +0.16% +0.00% +0.11%] index_select linear : Elapsed 0.044 ms (4.434 ms / 100) 4.429 -> 4.424 ( -0.11%) [ +0.16% +0.00% +0.02% / -0.11% -0.09% -0.05%] index_select reverse : Elapsed 0.044 ms (4.436 ms / 100) 4.419 -> 4.415 ( -0.09%) [ +0.00% +0.07% +0.11% / -0.09% -0.02% +0.02%] index_select skip64 : Elapsed 0.044 ms (4.419 ms / 100) 4.419 -> 4.414 ( -0.11%) [ +0.00% +0.07% +0.00% / +0.05% -0.05% -0.11%] index_select skip256 : Elapsed 0.044 ms (4.419 ms / 100) 4.422 -> 4.432 ( +0.23%) [ +0.00% +0.11% +0.11% / +0.23% +0.38% +0.23%] index_select spread : Elapsed 0.044 ms (4.422 ms / 100) 4.424 -> 4.428 ( +0.09%) [ +0.14% +0.00% +0.09% / +0.11% +0.09% +0.18%] index_select strided 3 : Elapsed 0.044 ms (4.430 ms / 100) 4.415 -> 4.418 ( +0.07%) [ +0.02% +0.00% +0.07% / +0.11% +0.07% +0.16%] index_select strided 5 : Elapsed 0.044 ms (4.416 ms / 100) 4.423 -> 4.421 ( -0.05%) [ +0.00% +0.05% +0.20% / -0.05% -0.02% +0.07%] index_select strided 7 : Elapsed 0.044 ms (4.423 ms / 100) 4.419 -> 4.419 ( +0.00%) [ +0.09% +0.07% +0.00% / +0.05% +0.00% +0.09%] index_select strided 8 : Elapsed 0.044 ms (4.423 ms / 100) 4.423 -> 4.417 ( -0.14%) [ +0.02% +0.18% +0.00% / +0.14% -0.09% -0.14%] index_select strided 16 : Elapsed 0.044 ms (4.424 ms / 100) 4.431 -> 4.418 ( -0.29%) [ +0.00% +0.00% +0.09% / +0.02% -0.29% -0.05%] index_select random : Elapsed 0.044 ms (4.431 ms / 100) 4.424 -> 4.420 ( -0.09%) [ +0.11% +0.14% +0.00% / +0.02% -0.07% -0.09%] index_select random_sorted : Elapsed 0.044 ms (4.429 ms / 100) B = [16, 4, 40, 5] (stride (1, 16, 64, 2560)) A = [16, 4, 20, 5] (stride (400, 5, 20, 1)) dim = 2 2.450 -> 2.461 ( +0.45%) [ +0.20% +0.16% +0.00% / +0.45% +0.73% +0.69%] index_add_ linear : Elapsed 0.025 ms (2.455 ms / 100) 2.446 -> 2.459 ( +0.53%) [ +0.00% +0.00% +0.04% / +0.53% +0.65% +0.61%] index_copy_ linear : Elapsed 0.024 ms (2.446 ms / 100) 2.453 -> 2.460 ( +0.29%) [ +0.04% +0.00% +0.08% / +0.29% +0.49% +0.73%] index_add_ reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.447 -> 2.460 ( +0.53%) [ +0.08% +0.12% +0.00% / +0.53% +0.53% +0.57%] index_copy_ reverse : Elapsed 0.024 ms (2.449 ms / 100) 2.454 -> 2.464 ( +0.41%) [ +0.12% +0.00% +0.08% / +0.41% +0.65% +0.57%] index_add_ spread : Elapsed 0.025 ms (2.457 ms / 100) 2.448 -> 2.455 ( +0.29%) [ +0.20% +0.20% +0.00% / +0.29% +0.69% +0.65%] index_copy_ spread : Elapsed 0.025 ms (2.453 ms / 100) 2.455 -> 2.463 ( +0.33%) [ +0.00% +0.12% +0.00% / +0.33% +0.45% +0.45%] index_add_ strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.447 -> 2.457 ( +0.41%) [ +0.08% +0.00% +0.12% / +0.41% +0.53% +0.69%] index_copy_ strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.456 -> 2.463 ( +0.29%) [ +0.20% +0.12% +0.00% / +0.45% +0.49% +0.29%] index_add_ strided 7 : Elapsed 0.025 ms (2.461 ms / 100) 2.447 -> 2.455 ( +0.33%) [ +0.00% +0.08% +0.04% / +0.45% +0.57% +0.33%] index_copy_ strided 7 : Elapsed 0.024 ms (2.447 ms / 100) 2.451 -> 2.465 ( +0.57%) [ +0.16% +0.00% +0.08% / +0.57% +0.78% +0.86%] index_add_ perm : Elapsed 0.025 ms (2.455 ms / 100) 2.445 -> 2.456 ( +0.45%) [ +0.29% +0.08% +0.00% / +0.45% +0.94% +1.06%] index_copy_ perm : Elapsed 0.025 ms (2.452 ms / 100) 2.454 -> 2.466 ( +0.49%) [ +0.29% +0.00% +0.12% / +0.49% +0.53% +0.81%] index_add_ perm_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.447 -> 2.456 ( +0.37%) [ +0.00% +0.12% +0.12% / +0.37% +0.74% +0.61%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.447 ms / 100) 4.495 -> 4.498 ( +0.07%) [ +0.07% +0.11% +0.00% / +0.24% +0.20% +0.07%] index_select const : Elapsed 0.045 ms (4.498 ms / 100) 4.504 -> 4.507 ( +0.07%) [ +0.11% +0.00% +0.16% / +0.07% +0.11% +0.11%] index_select wrap : Elapsed 0.045 ms (4.509 ms / 100) 4.503 -> 4.509 ( +0.13%) [ +0.18% +0.00% +0.13% / +0.13% +0.24% +0.24%] index_select linear : Elapsed 0.045 ms (4.511 ms / 100) 4.503 -> 4.500 ( -0.07%) [ +0.13% +0.16% +0.00% / -0.07% +0.20% +0.29%] index_select reverse : Elapsed 0.045 ms (4.509 ms / 100) 4.491 -> 4.496 ( +0.11%) [ +0.13% +0.22% +0.00% / +0.27% +0.16% +0.11%] index_select skip64 : Elapsed 0.045 ms (4.497 ms / 100) 4.493 -> 4.496 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.09% +0.16%] index_select skip256 : Elapsed 0.045 ms (4.496 ms / 100) 4.503 -> 4.503 ( +0.00%) [ +0.11% +0.02% +0.00% / +0.00% +0.31% +0.16%] index_select spread : Elapsed 0.045 ms (4.508 ms / 100) 4.508 -> 4.505 ( -0.07%) [ +0.02% +0.00% +0.02% / +0.04% -0.07% +0.22%] index_select strided 3 : Elapsed 0.045 ms (4.509 ms / 100) 4.493 -> 4.499 ( +0.13%) [ +0.29% +0.00% +0.09% / +0.13% +0.22% +0.20%] index_select strided 5 : Elapsed 0.045 ms (4.506 ms / 100) 4.501 -> 4.502 ( +0.02%) [ +0.24% +0.00% +0.04% / +0.02% +0.36% +0.27%] index_select strided 7 : Elapsed 0.045 ms (4.512 ms / 100) 4.495 -> 4.499 ( +0.09%) [ +0.13% +0.00% +0.00% / +0.09% +0.27% +0.11%] index_select strided 8 : Elapsed 0.045 ms (4.501 ms / 100) 4.495 -> 4.492 ( -0.07%) [ +0.11% +0.00% +0.00% / -0.07% +0.22% +0.29%] index_select strided 16 : Elapsed 0.045 ms (4.500 ms / 100) 4.506 -> 4.504 ( -0.04%) [ +0.00% +0.02% +0.02% / -0.04% +0.36% +0.09%] index_select random : Elapsed 0.045 ms (4.506 ms / 100) 4.504 -> 4.501 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.16% +0.16%] index_select random_sorted : Elapsed 0.045 ms (4.504 ms / 100) out_shape = [16, 4, 20, 40] in_shape = [16, 4, 20, 5] idx_dim = 3 B = [16, 4, 20, 40] (stride (800, 12800, 40, 1)) A = [16, 4, 20, 5] (stride (400, 100, 5, 1)) dim = 3 1.321 -> 1.321 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.53% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.322 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.47% +0.78%] index_copy_ linear : Elapsed 0.013 ms (1.279 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.45% +0.45%] index_add_ reverse : Elapsed 0.013 ms (1.321 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.279 ms / 100) 1.321 -> 1.320 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.45% +0.45%] index_add_ spread : Elapsed 0.013 ms (1.322 ms / 100) 1.278 -> 1.277 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.47% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.278 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.61% +0.68%] index_add_ strided 3 : Elapsed 0.013 ms (1.321 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.55%] index_copy_ strided 3 : Elapsed 0.013 ms (1.277 ms / 100) 1.320 -> 1.323 ( +0.23%) [ +0.23% +0.15% +0.00% / +0.23% +0.61% +0.68%] index_add_ strided 7 : Elapsed 0.013 ms (1.323 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.71%] index_copy_ strided 7 : Elapsed 0.013 ms (1.276 ms / 100) 1.320 -> 1.320 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.61% +0.83%] index_add_ perm : Elapsed 0.013 ms (1.322 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.63%] index_copy_ perm : Elapsed 0.013 ms (1.277 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.61% +0.61%] index_add_ perm_sorted : Elapsed 0.013 ms (1.320 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.71% +0.71%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.276 ms / 100) 7.935 -> 7.946 ( +0.14%) [ +0.14% +0.00% +0.05% / +0.14% +0.18% +0.34%] index_select const : Elapsed 0.079 ms (7.946 ms / 100) 7.932 -> 7.930 ( -0.03%) [ +0.04% +0.20% +0.00% / -0.03% +0.49% +0.33%] index_select wrap : Elapsed 0.079 ms (7.935 ms / 100) 7.933 -> 7.933 ( +0.00%) [ +0.11% +0.16% +0.00% / +0.00% +0.38% +0.26%] index_select linear : Elapsed 0.079 ms (7.942 ms / 100) 7.938 -> 7.950 ( +0.15%) [ +0.00% +0.10% +0.06% / +0.19% +0.16% +0.15%] index_select reverse : Elapsed 0.079 ms (7.938 ms / 100) 7.935 -> 7.931 ( -0.05%) [ +0.14% +0.05% +0.00% / -0.05% +0.32% +0.21%] index_select skip64 : Elapsed 0.079 ms (7.946 ms / 100) 7.923 -> 7.926 ( +0.04%) [ +0.35% +0.00% +0.11% / +0.04% +0.44% +0.21%] index_select skip256 : Elapsed 0.080 ms (7.951 ms / 100) 7.938 -> 7.936 ( -0.03%) [ +0.11% +0.00% +0.14% / -0.03% +0.18% +0.38%] index_select spread : Elapsed 0.079 ms (7.947 ms / 100) 7.922 -> 7.929 ( +0.09%) [ +0.01% +0.00% +0.27% / +0.09% +0.35% +0.35%] index_select strided 3 : Elapsed 0.079 ms (7.923 ms / 100) 7.931 -> 7.935 ( +0.05%) [ +0.20% +0.25% +0.00% / +0.05% +0.32% +0.25%] index_select random : Elapsed 0.079 ms (7.947 ms / 100) 7.936 -> 7.922 ( -0.18%) [ +0.08% +0.00% +0.08% / -0.18% +0.32% +0.10%] index_select random_sorted : Elapsed 0.079 ms (7.942 ms / 100) B = [16, 4, 20, 40] (stride (800, 12800, 1, 20)) A = [16, 4, 20, 5] (stride (100, 1600, 1, 20)) dim = 3 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.79% +0.59%] index_add_ linear : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.61% +0.61%] index_copy_ linear : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.59% +0.66%] index_add_ reverse : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.68% +0.68%] index_copy_ reverse : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.66% +0.59%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.68% +0.68%] index_copy_ spread : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.59% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.34% +0.00% +0.00% / +0.00% +0.61% +0.68%] index_copy_ strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.66% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.520 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.75% +0.68%] index_copy_ strided 7 : Elapsed 0.015 ms (1.472 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.72% +0.66%] index_add_ perm : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.82% +0.68%] index_copy_ perm : Elapsed 0.015 ms (1.472 ms / 100) 1.516 -> 1.518 ( +0.13%) [ +0.13% +0.20% +0.00% / +0.13% +0.79% +0.79%] index_add_ perm_sorted : Elapsed 0.015 ms (1.518 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.473 ms / 100) 8.519 -> 8.511 ( -0.09%) [ +0.00% +0.22% +0.13% / -0.09% +0.59% +0.14%] index_select const : Elapsed 0.085 ms (8.519 ms / 100) 8.532 -> 8.537 ( +0.06%) [ +0.09% +0.14% +0.00% / +0.13% +0.06% +0.21%] index_select wrap : Elapsed 0.085 ms (8.540 ms / 100) 8.539 -> 8.532 ( -0.08%) [ +0.00% +0.05% +0.05% / -0.02% +0.14% -0.08%] index_select linear : Elapsed 0.085 ms (8.539 ms / 100) 8.535 -> 8.550 ( +0.18%) [ +0.00% +0.06% +0.01% / +0.40% +0.26% +0.18%] index_select reverse : Elapsed 0.085 ms (8.535 ms / 100) 8.515 -> 8.514 ( -0.01%) [ +0.13% +0.05% +0.00% / +0.07% +0.32% -0.01%] index_select skip64 : Elapsed 0.085 ms (8.526 ms / 100) 8.518 -> 8.513 ( -0.06%) [ +0.00% +0.05% +0.12% / -0.06% +0.13% +0.09%] index_select skip256 : Elapsed 0.085 ms (8.518 ms / 100) 8.523 -> 8.543 ( +0.23%) [ +0.28% +0.00% +0.16% / +0.35% +0.23% +0.35%] index_select spread : Elapsed 0.085 ms (8.547 ms / 100) 8.526 -> 8.530 ( +0.05%) [ +0.02% +0.04% +0.00% / +0.05% +0.38% +0.18%] index_select strided 3 : Elapsed 0.085 ms (8.528 ms / 100) 8.533 -> 8.534 ( +0.01%) [ +0.15% +0.15% +0.00% / +0.01% +0.05% +0.38%] index_select random : Elapsed 0.085 ms (8.546 ms / 100) 8.540 -> 8.547 ( +0.08%) [ +0.07% +0.00% +0.05% / +0.18% +0.56% +0.08%] index_select random_sorted : Elapsed 0.085 ms (8.546 ms / 100) B = [16, 4, 20, 40] (stride (800, 12800, 1, 20)) A = [16, 4, 20, 5] (stride (80, 20, 1, 1280)) dim = 3 1.318 -> 1.319 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.46% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.320 ms / 100) 1.275 -> 1.274 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.31% +0.39%] index_copy_ linear : Elapsed 0.013 ms (1.275 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.46% +0.46%] index_add_ reverse : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_copy_ reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_add_ spread : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.47% +0.55%] index_copy_ spread : Elapsed 0.013 ms (1.273 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.55% +0.55%] index_copy_ strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.53% +0.61%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.71% +0.86%] index_copy_ strided 7 : Elapsed 0.013 ms (1.273 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.68% +0.61%] index_add_ perm : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.79% +0.55%] index_copy_ perm : Elapsed 0.013 ms (1.273 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.61% +0.61%] index_add_ perm_sorted : Elapsed 0.013 ms (1.319 ms / 100) 1.273 -> 1.276 ( +0.24%) [ +0.00% +0.08% +0.00% / +0.24% +0.55% +0.71%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) 7.843 -> 7.857 ( +0.18%) [ +0.20% +0.00% +0.17% / +0.18% +0.27% +0.20%] index_select const : Elapsed 0.079 ms (7.859 ms / 100) 7.876 -> 7.888 ( +0.15%) [ +0.03% +0.00% +0.13% / +0.15% +0.38% +0.18%] index_select wrap : Elapsed 0.079 ms (7.878 ms / 100) 7.871 -> 7.881 ( +0.13%) [ +0.00% +0.20% +0.22% / +0.13% +0.23% +0.25%] index_select linear : Elapsed 0.079 ms (7.871 ms / 100) 7.859 -> 7.867 ( +0.10%) [ +0.00% +0.10% +0.22% / +0.10% +0.25% +0.41%] index_select reverse : Elapsed 0.079 ms (7.859 ms / 100) 7.852 -> 7.859 ( +0.09%) [ +0.28% +0.00% +0.04% / +0.27% +0.09% +0.33%] index_select skip64 : Elapsed 0.079 ms (7.874 ms / 100) 7.851 -> 7.844 ( -0.09%) [ +0.10% +0.15% +0.00% / -0.09% +0.00% +0.31%] index_select skip256 : Elapsed 0.079 ms (7.859 ms / 100) 7.862 -> 7.874 ( +0.15%) [ +0.32% +0.00% +0.03% / +0.15% +0.42% +0.29%] index_select spread : Elapsed 0.079 ms (7.887 ms / 100) 7.875 -> 7.879 ( +0.05%) [ +0.00% +0.09% +0.05% / +0.29% +0.05% +0.17%] index_select strided 3 : Elapsed 0.079 ms (7.875 ms / 100) 7.878 -> 7.883 ( +0.06%) [ +0.05% +0.00% +0.06% / +0.06% +0.20% +0.29%] index_select random : Elapsed 0.079 ms (7.882 ms / 100) 7.866 -> 7.867 ( +0.01%) [ +0.00% +0.03% +0.15% / +0.01% +0.24% +0.28%] index_select random_sorted : Elapsed 0.079 ms (7.866 ms / 100) B = [16, 4, 20, 40] (stride (160, 40, 2560, 1)) A = [16, 4, 20, 5] (stride (1, 1600, 80, 16)) dim = 3 0.664 -> 0.665 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.30% +0.45%] index_add_ linear : Elapsed 0.007 ms (0.664 ms / 100) 0.674 -> 0.674 ( +0.00%) [ +0.15% +0.45% +0.00% / +0.00% +0.89% +0.89%] index_copy_ linear : Elapsed 0.007 ms (0.675 ms / 100) 0.655 -> 0.655 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.61% +0.31%] index_add_ reverse : Elapsed 0.007 ms (0.655 ms / 100) 0.666 -> 0.666 ( +0.00%) [ +0.00% +0.30% +0.00% / +0.00% +0.60% +0.60%] index_copy_ reverse : Elapsed 0.007 ms (0.666 ms / 100) 0.666 -> 0.667 ( +0.15%) [ +0.00% +0.30% +0.15% / +0.15% +0.15% +0.15%] index_add_ spread : Elapsed 0.007 ms (0.666 ms / 100) 0.674 -> 0.674 ( +0.00%) [ +0.00% +0.00% +0.30% / +0.00% +0.30% +0.45%] index_copy_ spread : Elapsed 0.007 ms (0.674 ms / 100) 0.663 -> 0.665 ( +0.30%) [ +0.00% +0.15% +0.45% / +0.30% +0.60% +0.75%] index_add_ strided 3 : Elapsed 0.007 ms (0.663 ms / 100) 0.674 -> 0.674 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +1.34% +2.08%] index_copy_ strided 3 : Elapsed 0.007 ms (0.675 ms / 100) 0.659 -> 0.658 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.76% +0.91%] index_add_ strided 7 : Elapsed 0.007 ms (0.659 ms / 100) 0.671 -> 0.672 ( +0.15%) [ +0.15% +0.30% +0.00% / +0.15% +0.89% +0.75%] index_copy_ strided 7 : Elapsed 0.007 ms (0.672 ms / 100) 0.659 -> 0.659 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.76%] index_add_ perm : Elapsed 0.007 ms (0.659 ms / 100) 0.671 -> 0.675 ( +0.60%) [ +0.00% +0.15% +0.00% / +0.60% +0.75% +0.75%] index_copy_ perm : Elapsed 0.007 ms (0.671 ms / 100) 0.663 -> 0.665 ( +0.30%) [ +0.30% +0.45% +0.00% / +0.30% +0.60% +0.45%] index_add_ perm_sorted : Elapsed 0.007 ms (0.665 ms / 100) 0.673 -> 0.674 ( +0.15%) [ +0.15% +0.00% +0.15% / +0.15% +1.19% +1.04%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.674 ms / 100) 4.946 -> 4.814 ( -2.67%) [ +0.08% +0.00% +0.24% / -2.67% -2.57% -2.59%] index_select const : Elapsed 0.049 ms (4.950 ms / 100) 4.979 -> 4.845 ( -2.69%) [ +0.00% +0.02% +0.26% / -2.67% -2.69% -2.65%] index_select wrap : Elapsed 0.050 ms (4.979 ms / 100) 4.978 -> 4.838 ( -2.81%) [ +0.12% +0.04% +0.00% / -2.79% -2.75% -2.81%] index_select linear : Elapsed 0.050 ms (4.984 ms / 100) 4.955 -> 4.843 ( -2.26%) [ +0.26% +0.00% +0.20% / -2.26% -2.16% -2.06%] index_select reverse : Elapsed 0.050 ms (4.968 ms / 100) 4.958 -> 4.802 ( -3.15%) [ +0.14% +0.00% +0.08% / -3.15% -3.01% -2.78%] index_select skip64 : Elapsed 0.050 ms (4.965 ms / 100) 4.944 -> 4.803 ( -2.85%) [ +0.24% +0.10% +0.00% / -2.85% -2.37% -2.65%] index_select skip256 : Elapsed 0.050 ms (4.956 ms / 100) 4.964 -> 4.817 ( -2.96%) [ +0.28% +0.00% +0.12% / -2.96% -2.70% -2.56%] index_select spread : Elapsed 0.050 ms (4.978 ms / 100) 4.958 -> 4.839 ( -2.40%) [ +0.14% +0.00% +0.34% / -2.40% -2.00% -2.28%] index_select strided 3 : Elapsed 0.050 ms (4.965 ms / 100) 4.968 -> 4.844 ( -2.50%) [ +0.16% +0.00% +0.00% / -2.50% -2.42% -2.29%] index_select random : Elapsed 0.050 ms (4.976 ms / 100) 4.966 -> 4.834 ( -2.66%) [ +0.12% +0.08% +0.00% / -2.42% -2.48% -2.66%] index_select random_sorted : Elapsed 0.050 ms (4.972 ms / 100) B = [16, 4, 20, 40] (stride (40, 640, 2560, 1)) A = [16, 4, 20, 5] (stride (100, 1600, 5, 1)) dim = 3 1.522 -> 1.522 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.46%] index_add_ linear : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.47% +0.54%] index_copy_ linear : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.53% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.480 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.522 -> 1.521 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.66% +0.53%] index_add_ spread : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.478 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.54% +0.54%] index_copy_ spread : Elapsed 0.015 ms (1.479 ms / 100) 1.522 -> 1.521 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.54% +0.54%] index_copy_ strided 3 : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.66% +1.25%] index_add_ strided 7 : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.480 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.54% +0.74%] index_copy_ strided 7 : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.59% +0.59%] index_add_ perm : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.481 ( +0.14%) [ +0.07% +0.20% +0.00% / +0.14% +0.54% +0.54%] index_copy_ perm : Elapsed 0.015 ms (1.480 ms / 100) 1.529 -> 1.530 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.78% +0.65%] index_add_ perm_sorted : Elapsed 0.015 ms (1.531 ms / 100) 1.489 -> 1.489 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.54% +0.60%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.490 ms / 100) 8.578 -> 8.577 ( -0.01%) [ +0.40% +0.00% +0.00% / -0.01% +0.24% +0.37%] index_select const : Elapsed 0.086 ms (8.612 ms / 100) 8.568 -> 8.574 ( +0.07%) [ +0.00% +0.09% +0.06% / +0.07% +0.55% +0.20%] index_select wrap : Elapsed 0.086 ms (8.568 ms / 100) 8.582 -> 8.582 ( +0.00%) [ +0.09% +0.00% +0.02% / +0.09% +0.00% +0.17%] index_select linear : Elapsed 0.086 ms (8.590 ms / 100) 8.576 -> 8.579 ( +0.03%) [ +0.00% +0.03% +0.01% / +0.03% +0.06% +0.41%] index_select reverse : Elapsed 0.086 ms (8.576 ms / 100) 8.578 -> 8.574 ( -0.05%) [ +0.01% +0.12% +0.00% / -0.05% +0.16% +0.14%] index_select skip64 : Elapsed 0.086 ms (8.579 ms / 100) 8.590 -> 8.576 ( -0.16%) [ +0.06% +0.00% +0.01% / -0.16% +0.16% +0.07%] index_select skip256 : Elapsed 0.086 ms (8.595 ms / 100) 8.573 -> 8.573 ( +0.00%) [ +0.35% +0.00% +0.06% / +0.00% +0.20% +0.38%] index_select spread : Elapsed 0.086 ms (8.603 ms / 100) 8.573 -> 8.584 ( +0.13%) [ +0.22% +0.00% +0.20% / +0.15% +0.13% +0.28%] index_select strided 3 : Elapsed 0.086 ms (8.592 ms / 100) 8.572 -> 8.584 ( +0.14%) [ +0.38% +0.00% +0.01% / +0.19% +0.14% +0.19%] index_select random : Elapsed 0.086 ms (8.605 ms / 100) 8.576 -> 8.586 ( +0.12%) [ +0.34% +0.02% +0.00% / +0.12% +0.23% +0.34%] index_select random_sorted : Elapsed 0.086 ms (8.605 ms / 100) B = [16, 4, 20, 40] (stride (1, 640, 2560, 16)) A = [16, 4, 20, 5] (stride (1, 16, 64, 1280)) dim = 3 1.637 -> 1.643 ( +0.37%) [ +0.00% +0.98% +0.49% / +0.37% +1.10% +0.61%] index_add_ linear : Elapsed 0.016 ms (1.637 ms / 100) 1.584 -> 1.592 ( +0.51%) [ +0.00% +1.01% +0.51% / +0.51% +1.07% +0.69%] index_copy_ linear : Elapsed 0.016 ms (1.584 ms / 100) 1.623 -> 1.621 ( -0.12%) [ +0.06% +0.00% +0.00% / -0.12% +0.49% +0.49%] index_add_ reverse : Elapsed 0.016 ms (1.624 ms / 100) 1.574 -> 1.576 ( +0.13%) [ +0.19% +0.00% +0.13% / +0.13% +0.19% +0.44%] index_copy_ reverse : Elapsed 0.016 ms (1.577 ms / 100) 1.620 -> 1.621 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.68% +0.68%] index_add_ spread : Elapsed 0.016 ms (1.622 ms / 100) 1.570 -> 1.570 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.51% +0.57%] index_copy_ spread : Elapsed 0.016 ms (1.572 ms / 100) 1.636 -> 1.644 ( +0.49%) [ +1.10% +0.00% +0.12% / +0.49% +0.61% +1.77%] index_add_ strided 3 : Elapsed 0.017 ms (1.654 ms / 100) 1.585 -> 1.591 ( +0.38%) [ +0.95% +0.06% +0.00% / +0.38% +0.63% +1.83%] index_copy_ strided 3 : Elapsed 0.016 ms (1.600 ms / 100) 1.642 -> 1.650 ( +0.49%) [ +0.00% +0.43% +0.49% / +0.49% +0.61% +0.55%] index_add_ strided 7 : Elapsed 0.016 ms (1.642 ms / 100) 1.590 -> 1.597 ( +0.44%) [ +0.00% +0.69% +0.31% / +0.88% +0.44% +0.44%] index_copy_ strided 7 : Elapsed 0.016 ms (1.590 ms / 100) 1.649 -> 1.641 ( -0.49%) [ +0.06% +0.00% +0.00% / -0.49% +0.18% +0.73%] index_add_ perm : Elapsed 0.016 ms (1.650 ms / 100) 1.596 -> 1.591 ( -0.31%) [ +0.00% +0.13% +0.06% / -0.31% +0.25% +1.25%] index_copy_ perm : Elapsed 0.016 ms (1.596 ms / 100) 1.636 -> 1.637 ( +0.06%) [ +1.04% +0.06% +0.00% / +0.06% +1.16% +0.61%] index_add_ perm_sorted : Elapsed 0.017 ms (1.653 ms / 100) 1.582 -> 1.583 ( +0.06%) [ +1.01% +0.32% +0.00% / +0.06% +1.26% +0.70%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.598 ms / 100) 8.512 -> 8.520 ( +0.09%) [ +0.16% +0.14% +0.00% / +0.09% +0.22% +0.33%] index_select const : Elapsed 0.085 ms (8.526 ms / 100) 8.532 -> 8.533 ( +0.01%) [ +0.16% +0.26% +0.00% / +0.15% +0.01% +0.39%] index_select wrap : Elapsed 0.085 ms (8.546 ms / 100) 8.542 -> 8.535 ( -0.08%) [ +0.09% +0.19% +0.00% / +0.09% -0.08% +0.16%] index_select linear : Elapsed 0.086 ms (8.550 ms / 100) 8.532 -> 8.543 ( +0.13%) [ +0.02% +0.00% +0.06% / +0.13% +0.40% +0.42%] index_select reverse : Elapsed 0.085 ms (8.534 ms / 100) 8.521 -> 8.525 ( +0.05%) [ +0.01% +0.00% +0.01% / +0.05% +0.26% +0.12%] index_select skip64 : Elapsed 0.085 ms (8.522 ms / 100) 8.525 -> 8.522 ( -0.04%) [ +0.07% +0.00% +0.07% / -0.04% +0.22% +0.39%] index_select skip256 : Elapsed 0.085 ms (8.531 ms / 100) 8.525 -> 8.527 ( +0.02%) [ +0.36% +0.00% +0.20% / +0.14% +0.26% +0.02%] index_select spread : Elapsed 0.086 ms (8.556 ms / 100) 8.532 -> 8.530 ( -0.02%) [ +0.00% +0.16% +0.18% / -0.02% +0.20% +0.15%] index_select strided 3 : Elapsed 0.085 ms (8.532 ms / 100) 8.539 -> 8.531 ( -0.09%) [ +0.01% +0.14% +0.00% / -0.09% +0.30% +0.12%] index_select random : Elapsed 0.085 ms (8.540 ms / 100) 8.535 -> 8.544 ( +0.11%) [ +0.25% +0.00% +0.12% / +0.11% +0.15% +0.42%] index_select random_sorted : Elapsed 0.086 ms (8.556 ms / 100) B = [16, 4, 20, 40] (stride (4, 1, 2560, 64)) A = [16, 4, 20, 5] (stride (5, 1600, 80, 1)) dim = 3 0.671 -> 0.672 ( +0.15%) [ +0.00% +0.15% +0.45% / +0.15% +0.30% +0.15%] index_add_ linear : Elapsed 0.007 ms (0.671 ms / 100) 0.686 -> 0.688 ( +0.29%) [ +0.15% +0.15% +0.00% / +0.29% +0.44% +0.29%] index_copy_ linear : Elapsed 0.007 ms (0.687 ms / 100) 0.665 -> 0.665 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.15% +0.15%] index_add_ reverse : Elapsed 0.007 ms (0.665 ms / 100) 0.677 -> 0.676 ( -0.15%) [ +0.00% +0.00% +0.59% / -0.15% +0.15% +0.30%] index_copy_ reverse : Elapsed 0.007 ms (0.677 ms / 100) 0.670 -> 0.670 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.30% +0.30%] index_add_ spread : Elapsed 0.007 ms (0.670 ms / 100) 0.689 -> 0.690 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.73% +0.15%] index_copy_ spread : Elapsed 0.007 ms (0.690 ms / 100) 0.669 -> 0.670 ( +0.15%) [ +0.15% +0.30% +0.00% / +0.15% +0.60% +0.60%] index_add_ strided 3 : Elapsed 0.007 ms (0.670 ms / 100) 0.682 -> 0.683 ( +0.15%) [ +0.15% +0.29% +0.00% / +0.15% +0.59% +0.59%] index_copy_ strided 3 : Elapsed 0.007 ms (0.683 ms / 100) 0.670 -> 0.671 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.30% +0.30%] index_add_ strided 7 : Elapsed 0.007 ms (0.671 ms / 100) 0.688 -> 0.691 ( +0.44%) [ +0.15% +0.73% +0.00% / +0.58% +0.73% +0.44%] index_copy_ strided 7 : Elapsed 0.007 ms (0.689 ms / 100) 0.668 -> 0.667 ( -0.15%) [ +0.00% +0.30% +0.00% / +0.00% -0.15% +0.15%] index_add_ perm : Elapsed 0.007 ms (0.668 ms / 100) 0.692 -> 0.692 ( +0.00%) [ +0.00% +0.43% +0.29% / +0.29% +0.14% +0.00%] index_copy_ perm : Elapsed 0.007 ms (0.692 ms / 100) 0.669 -> 0.670 ( +0.15%) [ +0.15% +0.00% +0.30% / +0.15% +0.60% +0.45%] index_add_ perm_sorted : Elapsed 0.007 ms (0.670 ms / 100) 0.689 -> 0.690 ( +0.15%) [ +0.44% +0.29% +0.00% / +0.15% +1.31% +0.73%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.692 ms / 100) 4.926 -> 4.919 ( -0.14%) [ +0.18% +0.00% +0.20% / -0.14% +0.06% +0.18%] index_select const : Elapsed 0.049 ms (4.935 ms / 100) 4.926 -> 4.915 ( -0.22%) [ +0.06% +0.00% +0.18% / -0.22% -0.12% +0.12%] index_select wrap : Elapsed 0.049 ms (4.929 ms / 100) 4.932 -> 4.913 ( -0.39%) [ +0.10% +0.00% +0.34% / -0.39% -0.24% -0.28%] index_select linear : Elapsed 0.049 ms (4.937 ms / 100) 4.921 -> 4.920 ( -0.02%) [ +0.00% +0.08% +0.06% / -0.02% +0.22% +0.28%] index_select reverse : Elapsed 0.049 ms (4.921 ms / 100) 4.926 -> 4.921 ( -0.10%) [ +0.00% +0.24% +0.08% / -0.06% -0.10% +0.10%] index_select skip64 : Elapsed 0.049 ms (4.926 ms / 100) 4.914 -> 4.911 ( -0.06%) [ +0.00% +0.06% +0.35% / -0.06% +0.59% +0.45%] index_select skip256 : Elapsed 0.049 ms (4.914 ms / 100) 4.916 -> 4.912 ( -0.08%) [ +0.10% +0.00% +0.59% / -0.08% +0.22% +0.45%] index_select spread : Elapsed 0.049 ms (4.921 ms / 100) 4.918 -> 4.919 ( +0.02%) [ +0.00% +0.08% +0.24% / +0.02% +0.12% +0.33%] index_select strided 3 : Elapsed 0.049 ms (4.918 ms / 100) 4.926 -> 4.927 ( +0.02%) [ +0.00% +0.26% +0.35% / +0.12% +0.02% +0.14%] index_select random : Elapsed 0.049 ms (4.926 ms / 100) 4.935 -> 4.923 ( -0.24%) [ +0.06% +0.00% +0.04% / -0.24% -0.22% +0.04%] index_select random_sorted : Elapsed 0.049 ms (4.938 ms / 100) B = [16, 4, 20, 40] (stride (4, 1, 2560, 64)) A = [16, 4, 20, 5] (stride (1, 320, 16, 1280)) dim = 3 0.647 -> 0.646 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.77% +1.70%] index_add_ linear : Elapsed 0.006 ms (0.647 ms / 100) 0.663 -> 0.664 ( +0.15%) [ +0.00% +0.30% +0.00% / +0.15% +0.75% +0.90%] index_copy_ linear : Elapsed 0.007 ms (0.663 ms / 100) 0.653 -> 0.652 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.15% +0.31%] index_add_ reverse : Elapsed 0.007 ms (0.653 ms / 100) 0.668 -> 0.668 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.60% +0.75%] index_copy_ reverse : Elapsed 0.007 ms (0.668 ms / 100) 0.648 -> 0.650 ( +0.31%) [ +0.15% +0.15% +0.00% / +0.31% +0.93% +0.93%] index_add_ spread : Elapsed 0.006 ms (0.649 ms / 100) 0.665 -> 0.666 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +0.90% +1.65%] index_copy_ spread : Elapsed 0.007 ms (0.665 ms / 100) 0.647 -> 0.648 ( +0.15%) [ +0.00% +0.15% +0.15% / +0.15% +1.08% +1.08%] index_add_ strided 3 : Elapsed 0.006 ms (0.647 ms / 100) 0.664 -> 0.665 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +1.20% +1.20%] index_copy_ strided 3 : Elapsed 0.007 ms (0.665 ms / 100) 0.652 -> 0.651 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.77% +0.77%] index_add_ strided 7 : Elapsed 0.007 ms (0.652 ms / 100) 0.670 -> 0.671 ( +0.15%) [ +0.00% +0.30% +0.15% / +0.15% +0.90% +1.04%] index_copy_ strided 7 : Elapsed 0.007 ms (0.670 ms / 100) 0.648 -> 0.649 ( +0.15%) [ +0.00% +0.31% +0.15% / +0.15% +0.93% +1.08%] index_add_ perm : Elapsed 0.006 ms (0.648 ms / 100) 0.669 -> 0.669 ( +0.00%) [ +0.30% +0.15% +0.00% / +0.00% +0.75% +0.75%] index_copy_ perm : Elapsed 0.007 ms (0.671 ms / 100) 0.651 -> 0.651 ( +0.00%) [ +0.15% +0.31% +0.00% / +0.00% +1.08% +1.08%] index_add_ perm_sorted : Elapsed 0.007 ms (0.652 ms / 100) 0.676 -> 0.679 ( +0.44%) [ +0.00% +0.30% +0.00% / +0.44% +1.33% +2.07%] index_copy_ perm_sorted : Elapsed 0.007 ms (0.676 ms / 100) 4.886 -> 4.893 ( +0.14%) [ +0.16% +0.00% +0.08% / +0.14% +0.33% +0.57%] index_select const : Elapsed 0.049 ms (4.894 ms / 100) 4.899 -> 4.909 ( +0.20%) [ +0.00% +0.29% +0.31% / +0.20% +0.33% +0.24%] index_select wrap : Elapsed 0.049 ms (4.899 ms / 100) 4.912 -> 4.916 ( +0.08%) [ +0.18% +0.18% +0.00% / +0.16% +0.08% +0.20%] index_select linear : Elapsed 0.049 ms (4.921 ms / 100) 4.907 -> 4.894 ( -0.26%) [ +0.16% +0.06% +0.00% / +0.20% -0.12% -0.26%] index_select reverse : Elapsed 0.049 ms (4.915 ms / 100) 4.901 -> 4.888 ( -0.27%) [ +0.16% +0.00% +0.02% / -0.27% -0.20% -0.27%] index_select skip64 : Elapsed 0.049 ms (4.909 ms / 100) 4.882 -> 4.889 ( +0.14%) [ +0.37% +0.06% +0.00% / +0.41% +0.14% +0.20%] index_select skip256 : Elapsed 0.049 ms (4.900 ms / 100) 4.887 -> 4.880 ( -0.14%) [ +0.39% +0.25% +0.00% / -0.14% +0.31% +0.55%] index_select spread : Elapsed 0.049 ms (4.906 ms / 100) 4.890 -> 4.898 ( +0.16%) [ +0.39% +0.00% +0.08% / +0.22% +0.16% +0.31%] index_select strided 3 : Elapsed 0.049 ms (4.909 ms / 100) 4.889 -> 4.894 ( +0.10%) [ +0.25% +0.00% +0.12% / +0.27% +0.22% +0.10%] index_select random : Elapsed 0.049 ms (4.901 ms / 100) 4.891 -> 4.891 ( +0.00%) [ +0.18% +0.00% +0.08% / +0.49% +0.00% +0.25%] index_select random_sorted : Elapsed 0.049 ms (4.900 ms / 100) B = [16, 4, 20, 40] (stride (20, 320, 1, 1280)) A = [16, 4, 20, 5] (stride (1, 80, 320, 16)) dim = 3 1.656 -> 1.656 ( +0.00%) [ +0.06% +1.03% +0.00% / +0.00% +0.54% +0.66%] index_add_ linear : Elapsed 0.017 ms (1.657 ms / 100) 1.600 -> 1.601 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.56% +1.00%] index_copy_ linear : Elapsed 0.016 ms (1.600 ms / 100) 1.650 -> 1.656 ( +0.36%) [ +0.00% +0.42% +0.30% / +0.36% +0.67% +0.67%] index_add_ reverse : Elapsed 0.017 ms (1.650 ms / 100) 1.594 -> 1.599 ( +0.31%) [ +0.00% +0.31% +0.19% / +0.31% +0.69% +0.75%] index_copy_ reverse : Elapsed 0.016 ms (1.594 ms / 100) 1.655 -> 1.657 ( +0.12%) [ +0.06% +0.00% +0.00% / +0.12% +0.42% +0.85%] index_add_ spread : Elapsed 0.017 ms (1.656 ms / 100) 1.600 -> 1.603 ( +0.19%) [ +0.00% +0.19% +0.00% / +0.19% +0.38% +0.88%] index_copy_ spread : Elapsed 0.016 ms (1.600 ms / 100) 1.656 -> 1.655 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.42% +0.54%] index_add_ strided 3 : Elapsed 0.017 ms (1.657 ms / 100) 1.599 -> 1.600 ( +0.06%) [ +0.06% +0.00% +0.06% / +0.06% +0.50% +0.50%] index_copy_ strided 3 : Elapsed 0.016 ms (1.600 ms / 100) 1.650 -> 1.652 ( +0.12%) [ +0.36% +0.24% +0.00% / +0.12% +0.67% +0.73%] index_add_ strided 7 : Elapsed 0.017 ms (1.656 ms / 100) 1.595 -> 1.595 ( +0.00%) [ +0.19% +0.19% +0.00% / +0.00% +0.75% +0.69%] index_copy_ strided 7 : Elapsed 0.016 ms (1.598 ms / 100) 1.653 -> 1.657 ( +0.24%) [ +0.24% +0.00% +0.18% / +0.24% +0.54% +0.67%] index_add_ perm : Elapsed 0.017 ms (1.657 ms / 100) 1.597 -> 1.599 ( +0.13%) [ +0.19% +0.00% +0.06% / +0.13% +0.50% +0.81%] index_copy_ perm : Elapsed 0.016 ms (1.600 ms / 100) 1.654 -> 1.655 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.48% +0.42%] index_add_ perm_sorted : Elapsed 0.017 ms (1.656 ms / 100) 1.596 -> 1.597 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.56% +0.63%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.597 ms / 100) 8.513 -> 8.520 ( +0.08%) [ +0.21% +0.00% +0.33% / +0.08% +0.36% +0.45%] index_select const : Elapsed 0.085 ms (8.531 ms / 100) 8.536 -> 8.536 ( +0.00%) [ +0.00% +0.00% +0.18% / +0.00% +0.08% +0.13%] index_select wrap : Elapsed 0.085 ms (8.536 ms / 100) 8.533 -> 8.536 ( +0.04%) [ +0.19% +0.00% +0.11% / +0.04% +0.05% +0.34%] index_select linear : Elapsed 0.085 ms (8.549 ms / 100) 8.535 -> 8.536 ( +0.01%) [ +0.27% +0.11% +0.00% / +0.01% +0.34% +0.40%] index_select reverse : Elapsed 0.086 ms (8.558 ms / 100) 8.518 -> 8.513 ( -0.06%) [ +0.00% +0.12% +0.00% / -0.06% +0.33% +0.02%] index_select skip64 : Elapsed 0.085 ms (8.518 ms / 100) 8.511 -> 8.507 ( -0.05%) [ +0.07% +0.00% +0.14% / -0.05% +0.34% +0.58%] index_select skip256 : Elapsed 0.085 ms (8.517 ms / 100) 8.543 -> 8.543 ( +0.00%) [ +0.07% +0.00% +0.32% / +0.00% +0.22% +0.27%] index_select spread : Elapsed 0.085 ms (8.549 ms / 100) 8.530 -> 8.543 ( +0.15%) [ +0.06% +0.23% +0.00% / +0.15% +0.36% +0.22%] index_select strided 3 : Elapsed 0.085 ms (8.535 ms / 100) 8.543 -> 8.544 ( +0.01%) [ +0.02% +0.00% +0.19% / +0.11% +0.01% +0.01%] index_select random : Elapsed 0.085 ms (8.545 ms / 100) 8.550 -> 8.561 ( +0.13%) [ +0.01% +0.00% +0.02% / +0.19% +0.13% +0.53%] index_select random_sorted : Elapsed 0.086 ms (8.551 ms / 100) B = [16, 4, 20, 40] (stride (4, 1, 64, 1280)) A = [16, 4, 20, 5] (stride (400, 5, 20, 1)) dim = 3 1.459 -> 1.460 ( +0.07%) [ +0.27% +0.00% +0.21% / +0.07% +0.82% +0.69%] index_add_ linear : Elapsed 0.015 ms (1.463 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.63% +0.84%] index_copy_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.465 -> 1.468 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +0.82% +0.82%] index_add_ reverse : Elapsed 0.015 ms (1.466 ms / 100) 1.419 -> 1.419 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.49% +0.56%] index_copy_ reverse : Elapsed 0.014 ms (1.420 ms / 100) 1.465 -> 1.464 ( -0.07%) [ +0.00% +0.00% +0.14% / -0.07% +0.61% +0.61%] index_add_ spread : Elapsed 0.015 ms (1.465 ms / 100) 1.417 -> 1.417 ( +0.00%) [ +0.00% +0.07% +0.14% / +0.00% +0.49% +0.42%] index_copy_ spread : Elapsed 0.014 ms (1.417 ms / 100) 1.458 -> 1.463 ( +0.34%) [ +0.14% +0.14% +0.00% / +0.34% +0.82% +0.96%] index_add_ strided 3 : Elapsed 0.015 ms (1.460 ms / 100) 1.412 -> 1.411 ( -0.07%) [ +0.07% +0.14% +0.00% / -0.07% +0.57% +0.99%] index_copy_ strided 3 : Elapsed 0.014 ms (1.413 ms / 100) 1.461 -> 1.466 ( +0.34%) [ +0.00% +0.34% +0.34% / +0.34% +0.89% +0.89%] index_add_ strided 7 : Elapsed 0.015 ms (1.461 ms / 100) 1.411 -> 1.412 ( +0.07%) [ +0.00% +0.28% +0.21% / +0.07% +0.85% +0.78%] index_copy_ strided 7 : Elapsed 0.014 ms (1.411 ms / 100) 1.462 -> 1.464 ( +0.14%) [ +0.27% +0.00% +0.07% / +0.14% +0.68% +1.03%] index_add_ perm : Elapsed 0.015 ms (1.466 ms / 100) 1.414 -> 1.417 ( +0.21%) [ +0.28% +0.00% +0.00% / +0.21% +0.50% +0.92%] index_copy_ perm : Elapsed 0.014 ms (1.418 ms / 100) 1.459 -> 1.462 ( +0.21%) [ +0.00% +0.07% +0.21% / +0.21% +0.69% +0.82%] index_add_ perm_sorted : Elapsed 0.015 ms (1.459 ms / 100) 1.412 -> 1.413 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.64% +0.64%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.412 ms / 100) 8.198 -> 8.203 ( +0.06%) [ +0.05% +0.10% +0.00% / +0.07% +0.22% +0.06%] index_select const : Elapsed 0.082 ms (8.202 ms / 100) 8.191 -> 8.182 ( -0.11%) [ +0.16% +0.01% +0.00% / -0.11% +0.45% +0.55%] index_select wrap : Elapsed 0.082 ms (8.204 ms / 100) 8.192 -> 8.194 ( +0.02%) [ +0.23% +0.02% +0.00% / +0.02% +0.39% +0.37%] index_select linear : Elapsed 0.082 ms (8.211 ms / 100) 8.185 -> 8.199 ( +0.17%) [ +0.00% +0.15% +0.23% / +0.24% +0.17% +0.27%] index_select reverse : Elapsed 0.082 ms (8.185 ms / 100) 8.186 -> 8.205 ( +0.23%) [ +0.15% +0.00% +0.23% / +0.23% +0.59% +0.46%] index_select skip64 : Elapsed 0.082 ms (8.198 ms / 100) 8.193 -> 8.196 ( +0.04%) [ +0.01% +0.22% +0.00% / +0.04% +0.17% +0.11%] index_select skip256 : Elapsed 0.082 ms (8.194 ms / 100) 8.204 -> 8.194 ( -0.12%) [ +0.11% +0.00% +0.18% / -0.12% +0.04% +0.12%] index_select spread : Elapsed 0.082 ms (8.213 ms / 100) 8.202 -> 8.195 ( -0.09%) [ +0.11% +0.00% +0.07% / -0.09% +0.30% +0.04%] index_select strided 3 : Elapsed 0.082 ms (8.211 ms / 100) 8.198 -> 8.198 ( +0.00%) [ +0.09% +0.04% +0.00% / +0.00% +0.15% +0.29%] index_select random : Elapsed 0.082 ms (8.205 ms / 100) 8.189 -> 8.200 ( +0.13%) [ +0.06% +0.12% +0.00% / +0.13% +0.27% +0.21%] index_select random_sorted : Elapsed 0.082 ms (8.194 ms / 100) out_shape = [40, 5, 4, 20] in_shape = [16, 5, 4, 20] idx_dim = 0 B = [40, 5, 4, 20] (stride (20, 3200, 800, 1)) A = [16, 5, 4, 20] (stride (80, 1280, 1, 4)) dim = 0 3.637 -> 3.637 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.71% +0.69%] index_add_ linear : Elapsed 0.036 ms (3.637 ms / 100) 3.509 -> 3.511 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.63% +0.74%] index_copy_ linear : Elapsed 0.035 ms (3.509 ms / 100) 3.639 -> 3.642 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.69% +0.71%] index_add_ reverse : Elapsed 0.036 ms (3.642 ms / 100) 3.511 -> 3.512 ( +0.03%) [ +0.06% +0.09% +0.00% / +0.03% +0.66% +0.66%] index_copy_ reverse : Elapsed 0.035 ms (3.513 ms / 100) 3.641 -> 3.644 ( +0.08%) [ +0.00% +0.05% +0.00% / +0.08% +0.58% +0.63%] index_add_ spread : Elapsed 0.036 ms (3.641 ms / 100) 3.515 -> 3.532 ( +0.48%) [ +0.00% +0.09% +0.09% / +2.53% +0.48% +0.65%] index_copy_ spread : Elapsed 0.035 ms (3.515 ms / 100) 3.634 -> 3.636 ( +0.06%) [ +0.03% +0.00% +0.03% / +0.06% +0.50% +0.61%] index_add_ strided 3 : Elapsed 0.036 ms (3.635 ms / 100) 3.504 -> 3.507 ( +0.09%) [ +0.03% +0.03% +0.00% / +0.09% +0.60% +0.68%] index_copy_ strided 3 : Elapsed 0.035 ms (3.505 ms / 100) 3.645 -> 3.643 ( -0.05%) [ +0.03% +0.00% +0.00% / -0.05% +0.49% +0.58%] index_add_ strided 7 : Elapsed 0.036 ms (3.646 ms / 100) 3.513 -> 3.513 ( +0.00%) [ +0.06% +0.00% +0.03% / +0.00% +0.51% +0.54%] index_copy_ strided 7 : Elapsed 0.035 ms (3.515 ms / 100) 3.639 -> 3.641 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.63% +0.58%] index_add_ perm : Elapsed 0.036 ms (3.640 ms / 100) 3.511 -> 3.512 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.60% +0.71%] index_copy_ perm : Elapsed 0.035 ms (3.511 ms / 100) 3.639 -> 3.641 ( +0.05%) [ +0.08% +0.00% +0.00% / +0.05% +0.41% +0.47%] index_add_ perm_sorted : Elapsed 0.036 ms (3.642 ms / 100) 3.511 -> 3.514 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.54% +0.66%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.514 ms / 100) 5.465 -> 5.465 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.13% +0.09%] index_select const : Elapsed 0.055 ms (5.468 ms / 100) 5.466 -> 5.465 ( -0.02%) [ +0.13% +0.00% +0.05% / -0.02% +0.29% +0.16%] index_select wrap : Elapsed 0.055 ms (5.473 ms / 100) 5.465 -> 5.475 ( +0.18%) [ +0.13% +0.02% +0.00% / +0.18% +0.24% +0.20%] index_select linear : Elapsed 0.055 ms (5.472 ms / 100) 5.466 -> 5.468 ( +0.04%) [ +0.20% +0.00% +0.20% / +0.16% +0.04% +0.20%] index_select reverse : Elapsed 0.055 ms (5.477 ms / 100) 5.462 -> 5.466 ( +0.07%) [ +0.13% +0.24% +0.00% / +0.20% +0.07% +0.15%] index_select skip64 : Elapsed 0.055 ms (5.469 ms / 100) 5.466 -> 5.464 ( -0.04%) [ +0.07% +0.15% +0.00% / +0.13% -0.04% +0.04%] index_select skip256 : Elapsed 0.055 ms (5.470 ms / 100) 5.468 -> 5.472 ( +0.07%) [ +0.07% +0.04% +0.00% / +0.07% +0.07% +0.16%] index_select spread : Elapsed 0.055 ms (5.472 ms / 100) 5.464 -> 5.464 ( +0.00%) [ +0.00% +0.13% +0.05% / +0.00% +0.09% +0.05%] index_select strided 3 : Elapsed 0.055 ms (5.464 ms / 100) 5.461 -> 5.465 ( +0.07%) [ +0.00% +0.13% +0.11% / +0.07% +0.33% +0.26%] index_select strided 5 : Elapsed 0.055 ms (5.461 ms / 100) 5.460 -> 5.470 ( +0.18%) [ +0.18% +0.18% +0.00% / +0.18% +0.24% +0.29%] index_select strided 7 : Elapsed 0.055 ms (5.470 ms / 100) 5.461 -> 5.473 ( +0.22%) [ +0.04% +0.05% +0.00% / +0.22% +0.27% +0.37%] index_select strided 8 : Elapsed 0.055 ms (5.463 ms / 100) 5.462 -> 5.467 ( +0.09%) [ +0.09% +0.04% +0.00% / +0.09% +0.29% +0.35%] index_select random : Elapsed 0.055 ms (5.467 ms / 100) 5.466 -> 5.469 ( +0.05%) [ +0.00% +0.04% +0.02% / +0.05% +0.13% +0.07%] index_select random_sorted : Elapsed 0.055 ms (5.466 ms / 100) B = [40, 5, 4, 20] (stride (1, 3200, 40, 160)) A = [16, 5, 4, 20] (stride (400, 1, 100, 5)) dim = 0 3.709 -> 3.709 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.89% +0.89%] index_add_ linear : Elapsed 0.037 ms (3.715 ms / 100) 3.581 -> 3.590 ( +0.25%) [ +0.25% +0.22% +0.00% / +0.25% +1.03% +1.03%] index_copy_ linear : Elapsed 0.036 ms (3.590 ms / 100) 3.699 -> 3.699 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.73% +0.68%] index_add_ reverse : Elapsed 0.037 ms (3.702 ms / 100) 3.567 -> 3.567 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.76% +0.87%] index_copy_ reverse : Elapsed 0.036 ms (3.572 ms / 100) 3.700 -> 3.700 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.76% +0.76%] index_add_ spread : Elapsed 0.037 ms (3.702 ms / 100) 3.579 -> 3.575 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% +0.87% +0.81%] index_copy_ spread : Elapsed 0.036 ms (3.579 ms / 100) 3.696 -> 3.697 ( +0.03%) [ +0.03% +0.14% +0.00% / +0.03% +0.87% +0.89%] index_add_ strided 3 : Elapsed 0.037 ms (3.697 ms / 100) 3.568 -> 3.569 ( +0.03%) [ +0.14% +0.14% +0.00% / +0.03% +0.90% +0.90%] index_copy_ strided 3 : Elapsed 0.036 ms (3.573 ms / 100) 3.700 -> 3.699 ( -0.03%) [ +0.08% +0.08% +0.00% / -0.03% +0.81% +0.73%] index_add_ strided 7 : Elapsed 0.037 ms (3.703 ms / 100) 3.570 -> 3.568 ( -0.06%) [ +0.03% +0.08% +0.00% / -0.06% +0.81% +0.78%] index_copy_ strided 7 : Elapsed 0.036 ms (3.571 ms / 100) 3.714 -> 3.709 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.59% +0.57%] index_add_ perm : Elapsed 0.037 ms (3.714 ms / 100) 3.588 -> 3.584 ( -0.11%) [ +0.00% +0.08% +0.08% / -0.11% +0.67% +0.75%] index_copy_ perm : Elapsed 0.036 ms (3.588 ms / 100) 3.695 -> 3.695 ( +0.00%) [ +0.00% +0.03% +0.14% / +0.00% +0.76% +0.92%] index_add_ perm_sorted : Elapsed 0.037 ms (3.695 ms / 100) 3.567 -> 3.571 ( +0.11%) [ +0.00% +0.14% +0.17% / +0.11% +0.84% +1.07%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.567 ms / 100) 5.490 -> 5.488 ( -0.04%) [ +0.13% +0.02% +0.00% / +0.05% +0.02% -0.04%] index_select const : Elapsed 0.055 ms (5.497 ms / 100) 5.489 -> 5.482 ( -0.13%) [ +0.18% +0.00% +0.04% / +0.09% -0.13% +0.05%] index_select wrap : Elapsed 0.055 ms (5.499 ms / 100) 5.488 -> 5.485 ( -0.05%) [ +0.04% +0.00% +0.00% / +0.04% -0.05% -0.05%] index_select linear : Elapsed 0.055 ms (5.490 ms / 100) 5.490 -> 5.487 ( -0.05%) [ +0.00% +0.00% +0.02% / -0.05% +0.11% +0.05%] index_select reverse : Elapsed 0.055 ms (5.490 ms / 100) 5.486 -> 5.478 ( -0.15%) [ +0.04% +0.04% +0.00% / -0.15% +0.11% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.488 ms / 100) 5.483 -> 5.488 ( +0.09%) [ +0.00% +0.11% +0.04% / +0.09% +0.15% +0.20%] index_select skip256 : Elapsed 0.055 ms (5.483 ms / 100) 5.490 -> 5.484 ( -0.11%) [ +0.07% +0.09% +0.00% / -0.04% -0.11% +0.05%] index_select spread : Elapsed 0.055 ms (5.494 ms / 100) 5.489 -> 5.487 ( -0.04%) [ +0.09% +0.04% +0.00% / -0.04% +0.02% +0.05%] index_select strided 3 : Elapsed 0.055 ms (5.494 ms / 100) 5.486 -> 5.481 ( -0.09%) [ +0.00% +0.07% +0.04% / +0.00% +0.07% -0.09%] index_select strided 5 : Elapsed 0.055 ms (5.486 ms / 100) 5.488 -> 5.490 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.05% +0.11% +0.04%] index_select strided 7 : Elapsed 0.055 ms (5.488 ms / 100) 5.486 -> 5.486 ( +0.00%) [ +0.00% +0.16% +0.04% / +0.00% +0.16% +0.29%] index_select strided 8 : Elapsed 0.055 ms (5.486 ms / 100) 5.486 -> 5.487 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.09% +0.13%] index_select random : Elapsed 0.055 ms (5.486 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.00% +0.20% +0.02% / +0.00% +0.07% +0.15%] index_select random_sorted : Elapsed 0.055 ms (5.485 ms / 100) B = [40, 5, 4, 20] (stride (5, 1, 4000, 200)) A = [16, 5, 4, 20] (stride (100, 1, 1600, 5)) dim = 0 3.735 -> 3.742 ( +0.19%) [ +0.00% +0.00% +0.16% / +0.19% +0.62% +0.64%] index_add_ linear : Elapsed 0.037 ms (3.735 ms / 100) 3.608 -> 3.610 ( +0.06%) [ +0.03% +0.03% +0.00% / +0.06% +0.55% +0.61%] index_copy_ linear : Elapsed 0.036 ms (3.609 ms / 100) 3.737 -> 3.737 ( +0.00%) [ +0.08% +0.11% +0.00% / +0.00% +0.64% +0.70%] index_add_ reverse : Elapsed 0.037 ms (3.740 ms / 100) 3.602 -> 3.603 ( +0.03%) [ +0.08% +0.11% +0.00% / +0.03% +0.58% +0.69%] index_copy_ reverse : Elapsed 0.036 ms (3.605 ms / 100) 3.739 -> 3.742 ( +0.08%) [ +0.03% +0.08% +0.00% / +0.08% +0.56% +0.70%] index_add_ spread : Elapsed 0.037 ms (3.740 ms / 100) 3.608 -> 3.609 ( +0.03%) [ +0.11% +0.00% +0.03% / +0.03% +0.61% +0.75%] index_copy_ spread : Elapsed 0.036 ms (3.612 ms / 100) 3.744 -> 3.745 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.61% +0.69%] index_add_ strided 3 : Elapsed 0.037 ms (3.745 ms / 100) 3.602 -> 3.604 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.44% +0.61%] index_copy_ strided 3 : Elapsed 0.036 ms (3.604 ms / 100) 3.739 -> 3.740 ( +0.03%) [ +0.08% +0.16% +0.00% / +0.03% +0.48% +0.53%] index_add_ strided 7 : Elapsed 0.037 ms (3.742 ms / 100) 3.603 -> 3.608 ( +0.14%) [ +0.08% +0.08% +0.00% / +0.14% +0.56% +0.64%] index_copy_ strided 7 : Elapsed 0.036 ms (3.606 ms / 100) 3.737 -> 3.739 ( +0.05%) [ +0.03% +0.19% +0.00% / +0.05% +0.67% +0.56%] index_add_ perm : Elapsed 0.037 ms (3.738 ms / 100) 3.605 -> 3.609 ( +0.11%) [ +0.03% +0.19% +0.00% / +0.11% +0.61% +0.55%] index_copy_ perm : Elapsed 0.036 ms (3.606 ms / 100) 3.739 -> 3.742 ( +0.08%) [ +0.11% +0.00% +0.11% / +0.08% +0.53% +0.37%] index_add_ perm_sorted : Elapsed 0.037 ms (3.743 ms / 100) 3.611 -> 3.613 ( +0.06%) [ +0.00% +0.06% +0.00% / +0.06% +0.39% +0.36%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.611 ms / 100) 5.469 -> 5.472 ( +0.05%) [ +0.07% +0.16% +0.00% / +0.05% +0.13% +0.29%] index_select const : Elapsed 0.055 ms (5.473 ms / 100) 5.472 -> 5.474 ( +0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.20% +0.15%] index_select wrap : Elapsed 0.055 ms (5.474 ms / 100) 5.472 -> 5.475 ( +0.05%) [ +0.00% +0.04% +0.13% / +0.18% +0.05% +0.16%] index_select linear : Elapsed 0.055 ms (5.472 ms / 100) 5.471 -> 5.469 ( -0.04%) [ +0.05% +0.00% +0.13% / -0.04% +0.18% +0.13%] index_select reverse : Elapsed 0.055 ms (5.474 ms / 100) 5.475 -> 5.471 ( -0.07%) [ +0.05% +0.00% +0.11% / +0.07% -0.07% +0.00%] index_select skip64 : Elapsed 0.055 ms (5.478 ms / 100) 5.473 -> 5.470 ( -0.05%) [ +0.15% +0.00% +0.05% / +0.15% -0.05% +0.02%] index_select skip256 : Elapsed 0.055 ms (5.481 ms / 100) 5.473 -> 5.474 ( +0.02%) [ +0.20% +0.00% +0.15% / +0.02% +0.15% +0.15%] index_select spread : Elapsed 0.055 ms (5.484 ms / 100) 5.470 -> 5.478 ( +0.15%) [ +0.00% +0.09% +0.13% / +0.20% +0.15% +0.26%] index_select strided 3 : Elapsed 0.055 ms (5.470 ms / 100) 5.473 -> 5.476 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.09% +0.09% +0.05%] index_select strided 5 : Elapsed 0.055 ms (5.476 ms / 100) 5.473 -> 5.476 ( +0.05%) [ +0.04% +0.00% +0.02% / +0.05% +0.15% +0.16%] index_select strided 7 : Elapsed 0.055 ms (5.475 ms / 100) 5.469 -> 5.470 ( +0.02%) [ +0.20% +0.13% +0.00% / +0.04% +0.13% +0.02%] index_select strided 8 : Elapsed 0.055 ms (5.480 ms / 100) 5.470 -> 5.478 ( +0.15%) [ +0.16% +0.07% +0.00% / +0.15% +0.16% +0.20%] index_select random : Elapsed 0.055 ms (5.479 ms / 100) 5.476 -> 5.468 ( -0.15%) [ +0.05% +0.16% +0.00% / +0.00% -0.15% +0.07%] index_select random_sorted : Elapsed 0.055 ms (5.479 ms / 100) B = [40, 5, 4, 20] (stride (20, 1, 5, 800)) A = [16, 5, 4, 20] (stride (1, 16, 80, 320)) dim = 0 4.426 -> 4.426 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.75% +0.81%] index_add_ linear : Elapsed 0.044 ms (4.432 ms / 100) 4.259 -> 4.262 ( +0.07%) [ +0.09% +0.00% +0.02% / +0.07% +0.75% +0.94%] index_copy_ linear : Elapsed 0.043 ms (4.263 ms / 100) 4.441 -> 4.440 ( -0.02%) [ +0.07% +0.00% +0.05% / -0.02% +0.52% +0.68%] index_add_ reverse : Elapsed 0.044 ms (4.444 ms / 100) 4.278 -> 4.279 ( +0.02%) [ +0.05% +0.00% +0.07% / +0.02% +0.49% +0.61%] index_copy_ reverse : Elapsed 0.043 ms (4.280 ms / 100) 4.433 -> 4.438 ( +0.11%) [ +0.16% +0.00% +0.07% / +0.11% +0.65% +0.70%] index_add_ spread : Elapsed 0.044 ms (4.440 ms / 100) 4.279 -> 4.282 ( +0.07%) [ +0.09% +0.00% +0.00% / +0.07% +0.51% +0.77%] index_copy_ spread : Elapsed 0.043 ms (4.283 ms / 100) 4.423 -> 4.434 ( +0.25%) [ +0.29% +0.27% +0.00% / +0.25% +0.88% +0.68%] index_add_ strided 3 : Elapsed 0.044 ms (4.436 ms / 100) 4.266 -> 4.277 ( +0.26%) [ +0.19% +0.16% +0.00% / +0.26% +0.84% +0.87%] index_copy_ strided 3 : Elapsed 0.043 ms (4.274 ms / 100) 4.432 -> 4.440 ( +0.18%) [ +0.23% +0.25% +0.00% / +0.18% +0.83% +0.70%] index_add_ strided 7 : Elapsed 0.044 ms (4.442 ms / 100) 4.272 -> 4.278 ( +0.14%) [ +0.16% +0.07% +0.00% / +0.14% +0.73% +0.73%] index_copy_ strided 7 : Elapsed 0.043 ms (4.279 ms / 100) 4.428 -> 4.427 ( -0.02%) [ +0.05% +0.07% +0.00% / -0.02% +0.68% +0.68%] index_add_ perm : Elapsed 0.044 ms (4.430 ms / 100) 4.261 -> 4.262 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.68% +0.75%] index_copy_ perm : Elapsed 0.043 ms (4.262 ms / 100) 4.424 -> 4.431 ( +0.16%) [ +0.20% +0.00% +0.05% / +0.16% +0.72% +0.59%] index_add_ perm_sorted : Elapsed 0.044 ms (4.433 ms / 100) 4.267 -> 4.277 ( +0.23%) [ +0.21% +0.26% +0.00% / +0.23% +0.82% +0.63%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.276 ms / 100) 5.575 -> 5.574 ( -0.02%) [ +0.00% +0.13% +0.05% / -0.02% +0.02% +0.07%] index_select const : Elapsed 0.056 ms (5.575 ms / 100) 5.579 -> 5.571 ( -0.14%) [ +0.14% +0.04% +0.00% / +0.09% -0.04% -0.14%] index_select wrap : Elapsed 0.056 ms (5.587 ms / 100) 5.583 -> 5.576 ( -0.13%) [ +0.00% +0.04% +0.07% / +0.09% -0.07% -0.13%] index_select linear : Elapsed 0.056 ms (5.583 ms / 100) 5.582 -> 5.578 ( -0.07%) [ +0.00% +0.04% +0.05% / +0.09% -0.07% +0.02%] index_select reverse : Elapsed 0.056 ms (5.582 ms / 100) 5.571 -> 5.572 ( +0.02%) [ +0.02% +0.04% +0.00% / +0.02% +0.16% +0.05%] index_select skip64 : Elapsed 0.056 ms (5.572 ms / 100) 5.574 -> 5.571 ( -0.05%) [ +0.00% +0.05% +0.04% / +0.00% -0.05% -0.04%] index_select skip256 : Elapsed 0.056 ms (5.574 ms / 100) 5.577 -> 5.577 ( +0.00%) [ +0.09% +0.00% +0.02% / +0.05% +0.02% +0.00%] index_select spread : Elapsed 0.056 ms (5.582 ms / 100) 5.579 -> 5.569 ( -0.18%) [ +0.02% +0.00% +0.05% / +0.07% -0.18% +0.00%] index_select strided 3 : Elapsed 0.056 ms (5.580 ms / 100) 5.580 -> 5.575 ( -0.09%) [ +0.05% +0.00% +0.11% / +0.00% -0.09% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.583 ms / 100) 5.574 -> 5.581 ( +0.13%) [ +0.04% +0.09% +0.00% / +0.22% +0.13% +0.14%] index_select strided 7 : Elapsed 0.056 ms (5.576 ms / 100) 5.578 -> 5.574 ( -0.07%) [ +0.04% +0.02% +0.00% / -0.07% +0.00% +0.07%] index_select strided 8 : Elapsed 0.056 ms (5.580 ms / 100) 5.572 -> 5.579 ( +0.13%) [ +0.00% +0.23% +0.20% / +0.23% +0.14% +0.13%] index_select random : Elapsed 0.056 ms (5.572 ms / 100) 5.572 -> 5.575 ( +0.05%) [ +0.13% +0.22% +0.00% / +0.16% +0.14% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.579 ms / 100) B = [40, 5, 4, 20] (stride (5, 1, 200, 800)) A = [16, 5, 4, 20] (stride (20, 4, 1, 320)) dim = 0 3.956 -> 3.959 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_add_ linear : Elapsed 0.040 ms (3.959 ms / 100) 3.804 -> 3.809 ( +0.13%) [ +0.11% +0.13% +0.00% / +0.13% +0.66% +0.63%] index_copy_ linear : Elapsed 0.038 ms (3.808 ms / 100) 3.943 -> 3.951 ( +0.20%) [ +0.15% +0.05% +0.00% / +0.20% +0.61% +0.81%] index_add_ reverse : Elapsed 0.039 ms (3.949 ms / 100) 3.803 -> 3.814 ( +0.29%) [ +0.21% +0.00% +0.03% / +0.29% +0.58% +0.82%] index_copy_ reverse : Elapsed 0.038 ms (3.811 ms / 100) 3.949 -> 3.948 ( -0.03%) [ +0.00% +0.10% +0.05% / -0.03% +0.48% +0.41%] index_add_ spread : Elapsed 0.039 ms (3.949 ms / 100) 3.804 -> 3.808 ( +0.11%) [ +0.00% +0.05% +0.03% / +0.11% +0.50% +0.47%] index_copy_ spread : Elapsed 0.038 ms (3.804 ms / 100) 3.939 -> 3.940 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.03% +0.63% +0.56%] index_add_ strided 3 : Elapsed 0.039 ms (3.939 ms / 100) 3.793 -> 3.801 ( +0.21%) [ +0.00% +0.16% +0.16% / +0.21% +0.69% +0.61%] index_copy_ strided 3 : Elapsed 0.038 ms (3.793 ms / 100) 3.946 -> 3.954 ( +0.20%) [ +0.25% +0.18% +0.00% / +0.20% +0.68% +0.46%] index_add_ strided 7 : Elapsed 0.040 ms (3.956 ms / 100) 3.807 -> 3.826 ( +0.50%) [ +0.24% +0.21% +0.00% / +0.53% +0.66% +0.50%] index_copy_ strided 7 : Elapsed 0.038 ms (3.816 ms / 100) 3.959 -> 3.962 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.43% +0.43%] index_add_ perm : Elapsed 0.040 ms (3.960 ms / 100) 3.808 -> 3.817 ( +0.24%) [ +0.08% +0.05% +0.00% / +0.24% +0.47% +0.45%] index_copy_ perm : Elapsed 0.038 ms (3.811 ms / 100) 3.960 -> 3.961 ( +0.03%) [ +0.08% +0.00% +0.00% / +0.03% +0.40% +0.43%] index_add_ perm_sorted : Elapsed 0.040 ms (3.963 ms / 100) 3.810 -> 3.818 ( +0.21%) [ +0.03% +0.00% +0.03% / +0.21% +0.42% +0.45%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.811 ms / 100) 5.486 -> 5.488 ( +0.04%) [ +0.00% +0.07% +0.02% / +0.04% +0.09% +0.15%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.491 -> 5.498 ( +0.13%) [ +0.02% +0.00% +0.04% / +0.13% +0.16% +0.31%] index_select wrap : Elapsed 0.055 ms (5.492 ms / 100) 5.495 -> 5.494 ( -0.02%) [ +0.07% +0.11% +0.00% / -0.02% +0.24% +0.18%] index_select linear : Elapsed 0.055 ms (5.499 ms / 100) 5.495 -> 5.490 ( -0.09%) [ +0.05% +0.00% +0.07% / -0.09% +0.20% +0.18%] index_select reverse : Elapsed 0.055 ms (5.498 ms / 100) 5.493 -> 5.488 ( -0.09%) [ +0.02% +0.07% +0.00% / +0.04% -0.09% +0.04%] index_select skip64 : Elapsed 0.055 ms (5.494 ms / 100) 5.487 -> 5.491 ( +0.07%) [ +0.00% +0.02% +0.07% / +0.15% +0.09% +0.07%] index_select skip256 : Elapsed 0.055 ms (5.487 ms / 100) 5.502 -> 5.501 ( -0.02%) [ +0.04% +0.00% +0.04% / +0.07% -0.02% +0.05%] index_select spread : Elapsed 0.055 ms (5.504 ms / 100) 5.495 -> 5.493 ( -0.04%) [ +0.04% +0.00% +0.00% / +0.04% +0.16% -0.04%] index_select strided 3 : Elapsed 0.055 ms (5.497 ms / 100) 5.492 -> 5.496 ( +0.07%) [ +0.09% +0.09% +0.00% / +0.07% +0.18% +0.22%] index_select strided 5 : Elapsed 0.055 ms (5.497 ms / 100) 5.496 -> 5.490 ( -0.11%) [ +0.00% +0.04% +0.02% / -0.11% +0.16% +0.11%] index_select strided 7 : Elapsed 0.055 ms (5.496 ms / 100) 5.488 -> 5.487 ( -0.02%) [ +0.16% +0.04% +0.00% / -0.02% +0.04% +0.24%] index_select strided 8 : Elapsed 0.055 ms (5.497 ms / 100) 5.497 -> 5.498 ( +0.02%) [ +0.05% +0.09% +0.00% / +0.11% +0.05% +0.02%] index_select random : Elapsed 0.055 ms (5.500 ms / 100) 5.491 -> 5.495 ( +0.07%) [ +0.05% +0.22% +0.00% / +0.07% +0.16% +0.09%] index_select random_sorted : Elapsed 0.055 ms (5.494 ms / 100) out_shape = [16, 40, 4, 20] in_shape = [16, 5, 4, 20] idx_dim = 1 B = [16, 40, 4, 20] (stride (40, 1, 12800, 640)) A = [16, 5, 4, 20] (stride (4, 1280, 1, 64)) dim = 1 1.507 -> 1.508 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.66% +0.60%] index_add_ linear : Elapsed 0.015 ms (1.508 ms / 100) 1.456 -> 1.458 ( +0.14%) [ +0.00% +0.07% +0.00% / +0.14% +0.62% +0.55%] index_copy_ linear : Elapsed 0.015 ms (1.456 ms / 100) 1.494 -> 1.495 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.40% +0.40%] index_add_ reverse : Elapsed 0.015 ms (1.494 ms / 100) 1.445 -> 1.448 ( +0.21%) [ +0.00% +0.07% +0.07% / +0.21% +0.42% +0.42%] index_copy_ reverse : Elapsed 0.014 ms (1.445 ms / 100) 1.498 -> 1.497 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.40% +0.40%] index_add_ spread : Elapsed 0.015 ms (1.499 ms / 100) 1.446 -> 1.447 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.55% +0.41%] index_copy_ spread : Elapsed 0.014 ms (1.447 ms / 100) 1.496 -> 1.496 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.67% +0.74%] index_add_ strided 3 : Elapsed 0.015 ms (1.497 ms / 100) 1.446 -> 1.446 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.55% +0.83%] index_copy_ strided 3 : Elapsed 0.014 ms (1.447 ms / 100) 1.493 -> 1.493 ( +0.00%) [ +0.13% +0.07% +0.00% / +0.00% +0.60% +0.60%] index_add_ strided 7 : Elapsed 0.015 ms (1.495 ms / 100) 1.445 -> 1.445 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.00% +0.55% +0.55%] index_copy_ strided 7 : Elapsed 0.014 ms (1.447 ms / 100) 1.508 -> 1.508 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.53% +0.60%] index_add_ perm : Elapsed 0.015 ms (1.508 ms / 100) 1.456 -> 1.455 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.62% +0.62%] index_copy_ perm : Elapsed 0.015 ms (1.456 ms / 100) 1.508 -> 1.507 ( -0.07%) [ +0.13% +0.07% +0.00% / -0.07% +0.53% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.510 ms / 100) 1.457 -> 1.457 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.69% +0.55%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.457 ms / 100) 8.244 -> 8.242 ( -0.02%) [ +0.00% +0.15% +0.16% / -0.02% +0.40% +0.06%] index_select const : Elapsed 0.082 ms (8.244 ms / 100) 8.246 -> 8.269 ( +0.28%) [ +0.23% +0.00% +0.12% / +0.36% +0.28% +0.34%] index_select wrap : Elapsed 0.083 ms (8.265 ms / 100) 8.257 -> 8.260 ( +0.04%) [ +0.17% +0.05% +0.00% / +0.29% +0.04% +0.15%] index_select linear : Elapsed 0.083 ms (8.271 ms / 100) 8.249 -> 8.258 ( +0.11%) [ +0.08% +0.05% +0.00% / +0.11% +0.19% +0.39%] index_select reverse : Elapsed 0.083 ms (8.256 ms / 100) 8.242 -> 8.241 ( -0.01%) [ +0.05% +0.00% +0.23% / +0.23% -0.01% +0.11%] index_select skip64 : Elapsed 0.082 ms (8.246 ms / 100) 8.225 -> 8.238 ( +0.16%) [ +0.12% +0.07% +0.00% / +0.16% +0.34% +0.41%] index_select skip256 : Elapsed 0.082 ms (8.235 ms / 100) 8.252 -> 8.251 ( -0.01%) [ +0.00% +0.00% +0.21% / +0.17% +0.24% -0.01%] index_select spread : Elapsed 0.083 ms (8.252 ms / 100) 8.252 -> 8.257 ( +0.06%) [ +0.00% +0.08% +0.13% / +0.23% +0.15% +0.06%] index_select strided 3 : Elapsed 0.083 ms (8.252 ms / 100) 8.249 -> 8.269 ( +0.24%) [ +0.13% +0.23% +0.00% / +0.35% +0.48% +0.24%] index_select random : Elapsed 0.083 ms (8.260 ms / 100) 8.250 -> 8.246 ( -0.05%) [ +0.22% +0.12% +0.00% / -0.05% +0.06% +0.27%] index_select random_sorted : Elapsed 0.083 ms (8.268 ms / 100) out_shape = [16, 5, 40, 20] in_shape = [16, 5, 4, 20] idx_dim = 2 B = [16, 5, 40, 20] (stride (4000, 800, 20, 1)) A = [16, 5, 4, 20] (stride (400, 4, 1, 20)) dim = 2 1.237 -> 1.244 ( +0.57%) [ +0.08% +0.08% +0.00% / +0.65% +0.57% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.238 ms / 100) 1.199 -> 1.205 ( +0.50%) [ +0.00% +0.00% +0.00% / +0.50% +0.58% +0.67%] index_copy_ linear : Elapsed 0.012 ms (1.199 ms / 100) 1.253 -> 1.259 ( +0.48%) [ +0.08% +0.00% +0.08% / +0.48% +0.64% +0.56%] index_add_ reverse : Elapsed 0.013 ms (1.254 ms / 100) 1.210 -> 1.216 ( +0.50%) [ +0.00% +0.17% +0.00% / +0.50% +0.58% +0.58%] index_copy_ reverse : Elapsed 0.012 ms (1.210 ms / 100) 1.241 -> 1.248 ( +0.56%) [ +0.00% +0.00% +0.08% / +0.64% +0.56% +0.56%] index_add_ spread : Elapsed 0.012 ms (1.241 ms / 100) 1.199 -> 1.206 ( +0.58%) [ +0.00% +0.00% +0.08% / +0.58% +0.83% +0.67%] index_copy_ spread : Elapsed 0.012 ms (1.199 ms / 100) 1.237 -> 1.243 ( +0.49%) [ +0.08% +0.00% +0.00% / +0.49% +0.65% +0.65%] index_add_ strided 3 : Elapsed 0.012 ms (1.238 ms / 100) 1.198 -> 1.206 ( +0.67%) [ +0.08% +0.00% +0.00% / +0.67% +0.67% +0.67%] index_copy_ strided 3 : Elapsed 0.012 ms (1.199 ms / 100) 1.257 -> 1.257 ( +0.00%) [ +0.08% +0.00% +0.64% / +0.32% +0.64% +0.00%] index_add_ strided 7 : Elapsed 0.013 ms (1.258 ms / 100) 1.215 -> 1.214 ( -0.08%) [ +0.00% +0.00% +0.33% / +0.33% +0.82% -0.08%] index_copy_ strided 7 : Elapsed 0.012 ms (1.215 ms / 100) 1.240 -> 1.239 ( -0.08%) [ +0.16% +0.00% +0.65% / -0.08% +0.65% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.242 ms / 100) 1.199 -> 1.200 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.83% +0.83%] index_copy_ perm : Elapsed 0.012 ms (1.199 ms / 100) 1.253 -> 1.252 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.56% +0.64%] index_add_ perm_sorted : Elapsed 0.013 ms (1.254 ms / 100) 1.210 -> 1.210 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.66% +0.58%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.210 ms / 100) 8.712 -> 8.706 ( -0.07%) [ +0.00% +0.18% +0.01% / -0.01% +0.13% -0.07%] index_select const : Elapsed 0.087 ms (8.712 ms / 100) 8.715 -> 8.713 ( -0.02%) [ +0.00% +0.11% +0.06% / +0.09% -0.02% +0.14%] index_select wrap : Elapsed 0.087 ms (8.715 ms / 100) 8.701 -> 8.695 ( -0.07%) [ +0.26% +0.00% +0.21% / +0.15% -0.07% +0.18%] index_select linear : Elapsed 0.087 ms (8.724 ms / 100) 8.707 -> 8.718 ( +0.13%) [ +0.00% +0.15% +0.13% / +0.17% +0.20% +0.13%] index_select reverse : Elapsed 0.087 ms (8.707 ms / 100) 8.709 -> 8.708 ( -0.01%) [ +0.01% +0.00% +0.24% / -0.01% +0.25% +0.00%] index_select skip64 : Elapsed 0.087 ms (8.710 ms / 100) 8.703 -> 8.709 ( +0.07%) [ +0.00% +0.08% +0.30% / +0.07% +0.25% +0.15%] index_select skip256 : Elapsed 0.087 ms (8.703 ms / 100) 8.705 -> 8.703 ( -0.02%) [ +0.02% +0.26% +0.00% / -0.02% +0.07% +0.31%] index_select spread : Elapsed 0.087 ms (8.707 ms / 100) 8.709 -> 8.693 ( -0.18%) [ +0.00% +0.06% +0.16% / -0.18% +0.25% +0.17%] index_select strided 3 : Elapsed 0.087 ms (8.709 ms / 100) 8.711 -> 8.708 ( -0.03%) [ +0.10% +0.00% +0.05% / +0.13% -0.03% +0.09%] index_select random : Elapsed 0.087 ms (8.720 ms / 100) 8.700 -> 8.710 ( +0.11%) [ +0.15% +0.00% +0.01% / +0.11% +0.23% +0.26%] index_select random_sorted : Elapsed 0.087 ms (8.713 ms / 100) B = [16, 5, 40, 20] (stride (4000, 800, 20, 1)) A = [16, 5, 4, 20] (stride (20, 1, 5, 320)) dim = 2 1.296 -> 1.297 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.46% +0.54%] index_add_ linear : Elapsed 0.013 ms (1.296 ms / 100) 1.251 -> 1.251 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.56% +0.56%] index_copy_ linear : Elapsed 0.013 ms (1.252 ms / 100) 1.296 -> 1.296 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.46% +0.46%] index_add_ reverse : Elapsed 0.013 ms (1.297 ms / 100) 1.252 -> 1.251 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.40% +0.32%] index_copy_ reverse : Elapsed 0.013 ms (1.252 ms / 100) 1.285 -> 1.285 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.54% +0.54%] index_add_ spread : Elapsed 0.013 ms (1.286 ms / 100) 1.242 -> 1.242 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.48% +0.32%] index_copy_ spread : Elapsed 0.012 ms (1.242 ms / 100) 1.286 -> 1.286 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.54% +0.47%] index_add_ strided 3 : Elapsed 0.013 ms (1.287 ms / 100) 1.242 -> 1.243 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.72% +0.40%] index_copy_ strided 3 : Elapsed 0.012 ms (1.243 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +1.01% +0.62%] index_add_ strided 7 : Elapsed 0.013 ms (1.289 ms / 100) 1.243 -> 1.242 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +1.37% +0.48%] index_copy_ strided 7 : Elapsed 0.012 ms (1.244 ms / 100) 1.286 -> 1.285 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.47% +0.47%] index_add_ perm : Elapsed 0.013 ms (1.287 ms / 100) 1.241 -> 1.243 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.40% +0.32%] index_copy_ perm : Elapsed 0.012 ms (1.243 ms / 100) 1.297 -> 1.296 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.54% +0.31%] index_add_ perm_sorted : Elapsed 0.013 ms (1.297 ms / 100) 1.252 -> 1.251 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.56% +0.32%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.252 ms / 100) 8.709 -> 8.723 ( +0.16%) [ +0.24% +0.26% +0.00% / +0.29% +0.16% +0.22%] index_select const : Elapsed 0.087 ms (8.730 ms / 100) 8.719 -> 8.718 ( -0.01%) [ +0.03% +0.17% +0.00% / -0.01% -0.01% +0.05%] index_select wrap : Elapsed 0.087 ms (8.722 ms / 100) 8.725 -> 8.729 ( +0.05%) [ +0.00% +0.14% +0.05% / +0.07% +0.05% +0.11%] index_select linear : Elapsed 0.087 ms (8.725 ms / 100) 8.719 -> 8.724 ( +0.06%) [ +0.08% +0.18% +0.00% / +0.28% +0.24% +0.06%] index_select reverse : Elapsed 0.087 ms (8.726 ms / 100) 8.722 -> 8.719 ( -0.03%) [ +0.06% +0.08% +0.00% / +0.11% +0.10% -0.03%] index_select skip64 : Elapsed 0.087 ms (8.727 ms / 100) 8.720 -> 8.721 ( +0.01%) [ +0.07% +0.00% +0.09% / +0.09% +0.06% +0.01%] index_select skip256 : Elapsed 0.087 ms (8.726 ms / 100) 8.724 -> 8.734 ( +0.11%) [ +0.00% +0.15% +0.00% / +0.11% +0.30% +0.32%] index_select spread : Elapsed 0.087 ms (8.724 ms / 100) 8.725 -> 8.722 ( -0.03%) [ +0.00% +0.02% +0.26% / +0.02% +0.14% -0.03%] index_select strided 3 : Elapsed 0.087 ms (8.725 ms / 100) 8.729 -> 8.725 ( -0.05%) [ +0.05% +0.00% +0.11% / -0.05% +0.24% -0.01%] index_select random : Elapsed 0.087 ms (8.733 ms / 100) 8.726 -> 8.733 ( +0.08%) [ +0.00% +0.00% +0.09% / +0.17% +0.17% +0.08%] index_select random_sorted : Elapsed 0.087 ms (8.726 ms / 100) B = [16, 5, 40, 20] (stride (4000, 1, 100, 5)) A = [16, 5, 4, 20] (stride (100, 20, 1600, 1)) dim = 2 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.49% +0.49%] index_add_ linear : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.59% +0.51%] index_copy_ linear : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.226 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.65% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.84% +0.59%] index_copy_ reverse : Elapsed 0.012 ms (1.188 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.65% +0.65%] index_add_ spread : Elapsed 0.012 ms (1.227 ms / 100) 1.187 -> 1.188 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.67% +0.67%] index_copy_ spread : Elapsed 0.012 ms (1.188 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.82% +0.73%] index_add_ strided 3 : Elapsed 0.012 ms (1.226 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.012 ms (1.188 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.82% +0.73%] index_add_ strided 7 : Elapsed 0.012 ms (1.226 ms / 100) 1.187 -> 1.187 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.84% +0.93%] index_copy_ strided 7 : Elapsed 0.012 ms (1.187 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.57% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.76% +0.67%] index_copy_ perm : Elapsed 0.012 ms (1.189 ms / 100) 1.226 -> 1.227 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.65% +0.57%] index_add_ perm_sorted : Elapsed 0.012 ms (1.226 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.67% +0.59%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) 8.693 -> 8.698 ( +0.06%) [ +0.29% +0.00% +0.45% / +0.06% +0.35% +0.54%] index_select const : Elapsed 0.087 ms (8.718 ms / 100) 8.742 -> 8.740 ( -0.02%) [ +0.19% +0.03% +0.00% / -0.02% +0.05% +0.23%] index_select wrap : Elapsed 0.088 ms (8.759 ms / 100) 8.719 -> 8.724 ( +0.06%) [ +0.23% +0.16% +0.00% / +0.06% +0.15% +0.21%] index_select linear : Elapsed 0.087 ms (8.739 ms / 100) 8.741 -> 8.740 ( -0.01%) [ +0.00% +0.25% +0.05% / -0.01% +0.25% +0.10%] index_select reverse : Elapsed 0.087 ms (8.741 ms / 100) 8.710 -> 8.713 ( +0.03%) [ +0.00% +0.05% +0.09% / +0.03% +0.15% +0.16%] index_select skip64 : Elapsed 0.087 ms (8.710 ms / 100) 8.689 -> 8.699 ( +0.12%) [ +0.38% +0.00% +0.28% / +0.12% +0.51% +0.43%] index_select skip256 : Elapsed 0.087 ms (8.722 ms / 100) 8.716 -> 8.728 ( +0.14%) [ +0.24% +0.00% +0.17% / +0.18% +0.14% +0.40%] index_select spread : Elapsed 0.087 ms (8.737 ms / 100) 8.737 -> 8.745 ( +0.09%) [ +0.23% +0.27% +0.00% / +0.09% +0.19% +0.23%] index_select strided 3 : Elapsed 0.088 ms (8.757 ms / 100) 8.727 -> 8.748 ( +0.24%) [ +0.00% +0.14% +0.05% / +0.24% +0.42% +0.50%] index_select random : Elapsed 0.087 ms (8.727 ms / 100) 8.727 -> 8.721 ( -0.07%) [ +0.01% +0.00% +0.08% / -0.07% +0.25% +0.16%] index_select random_sorted : Elapsed 0.087 ms (8.728 ms / 100) B = [16, 5, 40, 20] (stride (100, 20, 1600, 1)) A = [16, 5, 4, 20] (stride (20, 4, 1, 320)) dim = 2 0.583 -> 0.584 ( +0.17%) [ +0.17% +1.72% +0.00% / +0.17% +1.20% +1.03%] index_add_ linear : Elapsed 0.006 ms (0.584 ms / 100) 0.570 -> 0.571 ( +0.18%) [ +0.00% +1.58% +0.18% / +0.18% +0.53% +0.70%] index_copy_ linear : Elapsed 0.006 ms (0.570 ms / 100) 0.584 -> 0.585 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +0.51% +0.68%] index_add_ reverse : Elapsed 0.006 ms (0.585 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.18% +0.88%] index_copy_ reverse : Elapsed 0.006 ms (0.571 ms / 100) 0.585 -> 0.585 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.17% +0.17%] index_add_ spread : Elapsed 0.006 ms (0.586 ms / 100) 0.570 -> 0.571 ( +0.18%) [ +0.18% +0.18% +0.00% / +0.18% +0.18% +0.35%] index_copy_ spread : Elapsed 0.006 ms (0.571 ms / 100) 0.586 -> 0.587 ( +0.17%) [ +1.71% +0.00% +0.00% / +0.17% +0.68% +0.34%] index_add_ strided 3 : Elapsed 0.006 ms (0.596 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +1.40% +0.00% +0.00% / +0.00% +0.88% +0.35%] index_copy_ strided 3 : Elapsed 0.006 ms (0.579 ms / 100) 0.585 -> 0.584 ( -0.17%) [ +0.00% +0.17% +0.00% / -0.17% +0.51% +0.51%] index_add_ strided 7 : Elapsed 0.006 ms (0.585 ms / 100) 0.570 -> 0.571 ( +0.18%) [ +0.00% +0.18% +0.18% / +0.18% +0.18% +0.18%] index_copy_ strided 7 : Elapsed 0.006 ms (0.570 ms / 100) 0.587 -> 0.586 ( -0.17%) [ +0.00% +0.00% +0.00% / +0.00% -0.17% -0.17%] index_add_ perm : Elapsed 0.006 ms (0.587 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.00% +0.00%] index_copy_ perm : Elapsed 0.006 ms (0.571 ms / 100) 0.587 -> 0.585 ( -0.34%) [ +0.00% +0.17% +0.00% / -0.17% -0.34% +0.00%] index_add_ perm_sorted : Elapsed 0.006 ms (0.587 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.00% +0.00%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.571 ms / 100) 5.015 -> 5.010 ( -0.10%) [ +0.08% +0.00% +0.00% / -0.10% +0.46% +0.58%] index_select const : Elapsed 0.050 ms (5.019 ms / 100) 5.012 -> 5.008 ( -0.08%) [ +0.04% +0.06% +0.00% / -0.08% +0.14% +0.18%] index_select wrap : Elapsed 0.050 ms (5.014 ms / 100) 5.015 -> 5.008 ( -0.14%) [ +0.00% +0.08% +0.06% / -0.14% +0.14% +0.10%] index_select linear : Elapsed 0.050 ms (5.015 ms / 100) 5.002 -> 5.009 ( +0.14%) [ +0.00% +0.04% +0.36% / +0.14% +0.32% +0.58%] index_select reverse : Elapsed 0.050 ms (5.002 ms / 100) 5.006 -> 5.012 ( +0.12%) [ +0.00% +0.08% +0.30% / +0.12% +0.44% +0.40%] index_select skip64 : Elapsed 0.050 ms (5.006 ms / 100) 5.005 -> 5.007 ( +0.04%) [ +0.20% +0.04% +0.00% / +0.04% +0.44% +0.44%] index_select skip256 : Elapsed 0.050 ms (5.015 ms / 100) 5.008 -> 5.011 ( +0.06%) [ +0.22% +0.14% +0.00% / +0.06% +0.40% +0.26%] index_select spread : Elapsed 0.050 ms (5.019 ms / 100) 5.008 -> 5.012 ( +0.08%) [ +0.04% +0.10% +0.00% / +0.08% +0.42% +0.34%] index_select strided 3 : Elapsed 0.050 ms (5.010 ms / 100) 5.009 -> 5.013 ( +0.08%) [ +0.00% +0.18% +0.22% / +0.08% +0.26% +0.38%] index_select random : Elapsed 0.050 ms (5.009 ms / 100) 5.008 -> 4.998 ( -0.20%) [ +0.26% +0.00% +0.08% / -0.20% +0.28% +0.42%] index_select random_sorted : Elapsed 0.050 ms (5.021 ms / 100) B = [16, 5, 40, 20] (stride (200, 40, 1, 3200)) dim = 2 fill_cnt = 4 0.462 -> 0.463 ( +0.22%) [ +0.00% +0.22% +0.00% / +0.22% +0.43% +0.65%] index_fill_ const : Elapsed 0.005 ms (0.462 ms / 100) 0.462 -> 0.464 ( +0.43%) [ +0.22% +0.22% +0.00% / +3.46% +0.65% +0.43%] index_fill_ linear : Elapsed 0.005 ms (0.463 ms / 100) 0.462 -> 0.465 ( +0.65%) [ +0.00% +0.22% +0.22% / +3.03% +0.65% +0.65%] index_fill_ reverse : Elapsed 0.005 ms (0.462 ms / 100) 0.461 -> 0.462 ( +0.22%) [ +0.00% +0.22% +0.00% / +0.22% +1.08% +0.87%] index_fill_ skip64 : Elapsed 0.005 ms (0.461 ms / 100) 0.461 -> 0.460 ( -0.22%) [ +0.00% +0.00% +0.00% / -0.22% +2.17% +0.87%] index_fill_ skip256 : Elapsed 0.005 ms (0.461 ms / 100) 0.463 -> 0.463 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.22% +1.30%] index_fill_ spread : Elapsed 0.005 ms (0.463 ms / 100) 0.464 -> 0.463 ( -0.22%) [ +7.11% +0.00% +1.08% / -0.22% +0.22% +12.50%] index_fill_ strided 3 : Elapsed 0.005 ms (0.497 ms / 100) 0.468 -> 0.464 ( -0.85%) [ +0.21% +0.00% +9.40% / -0.43% -0.85% -0.85%] index_fill_ strided 5 : Elapsed 0.005 ms (0.469 ms / 100) 0.465 -> 0.464 ( -0.22%) [ +0.22% +0.43% +0.00% / +0.22% -0.22% -0.22%] index_fill_ strided 7 : Elapsed 0.005 ms (0.466 ms / 100) 0.460 -> 0.466 ( +1.30%) [ +0.22% +0.22% +0.00% / +3.48% +1.30% +1.30%] index_fill_ strided 8 : Elapsed 0.005 ms (0.461 ms / 100) 0.462 -> 0.461 ( -0.22%) [ +3.25% +0.00% +0.00% / -0.22% +0.87% +0.65%] index_fill_ strided 16 : Elapsed 0.005 ms (0.477 ms / 100) 0.462 -> 0.462 ( +0.00%) [ +0.00% +0.22% +0.22% / +0.00% +0.43% +0.65%] index_fill_ random : Elapsed 0.005 ms (0.462 ms / 100) 0.462 -> 0.462 ( +0.00%) [ +0.00% +0.22% +0.00% / +0.00% +2.38% +0.65%] index_fill_ random_sorted : Elapsed 0.005 ms (0.462 ms / 100) 0.465 -> 0.464 ( -0.22%) [ +0.00% +0.22% +0.00% / +0.22% -0.22% -0.22%] index_fill_ perm : Elapsed 0.005 ms (0.465 ms / 100) 0.465 -> 0.463 ( -0.43%) [ +0.22% +0.22% +0.00% / +0.43% +5.59% -0.43%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.466 ms / 100) B = [16, 5, 40, 20] (stride (1, 16, 80, 3200)) A = [16, 5, 4, 20] (stride (400, 1, 5, 20)) dim = 2 1.319 -> 1.318 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.53% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.282 -> 1.285 ( +0.23%) [ +0.00% +0.16% +0.08% / +0.23% +0.55% +0.62%] index_copy_ linear : Elapsed 0.013 ms (1.282 ms / 100) 1.336 -> 1.337 ( +0.07%) [ +0.00% +0.15% +0.15% / +0.07% +0.37% +0.75%] index_add_ reverse : Elapsed 0.013 ms (1.336 ms / 100) 1.296 -> 1.297 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.54% +0.46%] index_copy_ reverse : Elapsed 0.013 ms (1.296 ms / 100) 1.322 -> 1.321 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.61% +0.61%] index_add_ spread : Elapsed 0.013 ms (1.322 ms / 100) 1.284 -> 1.284 ( +0.00%) [ +0.00% +0.16% +0.16% / +0.00% +0.70% +0.70%] index_copy_ spread : Elapsed 0.013 ms (1.284 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.15% +0.23% +0.00% / +0.00% +0.68% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.281 -> 1.280 ( -0.08%) [ +0.55% +0.08% +0.00% / -0.08% +0.70% +0.78%] index_copy_ strided 3 : Elapsed 0.013 ms (1.288 ms / 100) 1.327 -> 1.332 ( +0.38%) [ +0.00% +0.00% +0.68% / +0.38% +0.60% +1.06%] index_add_ strided 7 : Elapsed 0.013 ms (1.327 ms / 100) 1.289 -> 1.293 ( +0.31%) [ +0.00% +0.00% +0.31% / +0.31% +0.62% +1.09%] index_copy_ strided 7 : Elapsed 0.013 ms (1.289 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.76% +0.68%] index_add_ perm : Elapsed 0.013 ms (1.322 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.70% +0.70%] index_copy_ perm : Elapsed 0.013 ms (1.284 ms / 100) 1.334 -> 1.334 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.75% +0.60%] index_add_ perm_sorted : Elapsed 0.013 ms (1.335 ms / 100) 1.295 -> 1.294 ( -0.08%) [ +0.00% +0.15% +0.08% / -0.08% +0.69% +0.77%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.295 ms / 100) 9.190 -> 9.180 ( -0.11%) [ +0.00% +0.05% +0.03% / -0.11% -0.01% -0.04%] index_select const : Elapsed 0.092 ms (9.190 ms / 100) 9.186 -> 9.186 ( +0.00%) [ +0.00% +0.03% +0.12% / +0.00% +0.04% +0.00%] index_select wrap : Elapsed 0.092 ms (9.186 ms / 100) 9.193 -> 9.165 ( -0.30%) [ +0.17% +0.01% +0.00% / -0.15% +0.03% -0.30%] index_select linear : Elapsed 0.092 ms (9.209 ms / 100) 9.181 -> 9.174 ( -0.08%) [ +0.00% +0.29% +0.09% / +0.24% +0.05% -0.08%] index_select reverse : Elapsed 0.092 ms (9.181 ms / 100) 9.177 -> 9.186 ( +0.10%) [ +0.09% +0.00% +0.31% / +0.23% +0.10% +0.23%] index_select skip64 : Elapsed 0.092 ms (9.185 ms / 100) 9.183 -> 9.184 ( +0.01%) [ +0.02% +0.29% +0.00% / +0.01% +0.11% +0.10%] index_select skip256 : Elapsed 0.092 ms (9.185 ms / 100) 9.180 -> 9.187 ( +0.08%) [ +0.13% +0.04% +0.00% / +0.37% +0.08% +0.12%] index_select spread : Elapsed 0.092 ms (9.192 ms / 100) 9.191 -> 9.173 ( -0.20%) [ +0.02% +0.00% +0.17% / -0.20% -0.08% -0.17%] index_select strided 3 : Elapsed 0.092 ms (9.193 ms / 100) 9.175 -> 9.185 ( +0.11%) [ +0.37% +0.04% +0.00% / +0.21% +0.11% +0.40%] index_select random : Elapsed 0.092 ms (9.209 ms / 100) 9.180 -> 9.191 ( +0.12%) [ +0.02% +0.07% +0.00% / +0.25% +0.12% +0.37%] index_select random_sorted : Elapsed 0.092 ms (9.182 ms / 100) out_shape = [16, 5, 4, 40] in_shape = [16, 5, 4, 20] idx_dim = 3 B = [16, 5, 4, 40] (stride (800, 4, 1, 20)) A = [16, 5, 4, 20] (stride (400, 4, 1, 20)) dim = 3 1.538 -> 1.508 ( -1.95%) [ +0.07% +0.00% +0.13% / -1.95% -1.30% -1.30%] index_add_ linear : Elapsed 0.015 ms (1.539 ms / 100) 1.503 -> 1.468 ( -2.33%) [ +0.00% +0.13% +0.00% / -2.33% -1.73% -1.93%] index_copy_ linear : Elapsed 0.015 ms (1.503 ms / 100) 1.537 -> 1.508 ( -1.89%) [ +0.39% +0.07% +0.00% / -1.89% -1.63% -1.56%] index_add_ reverse : Elapsed 0.015 ms (1.543 ms / 100) 1.504 -> 1.469 ( -2.33%) [ +0.07% +0.00% +0.00% / -2.33% -1.86% -1.73%] index_copy_ reverse : Elapsed 0.015 ms (1.505 ms / 100) 1.537 -> 1.506 ( -2.02%) [ +0.00% +0.20% +0.20% / -2.02% -1.50% -1.56%] index_add_ spread : Elapsed 0.015 ms (1.537 ms / 100) 1.508 -> 1.475 ( -2.19%) [ +0.20% +0.00% +0.13% / -2.19% -1.59% -1.72%] index_copy_ spread : Elapsed 0.015 ms (1.511 ms / 100) 1.537 -> 1.511 ( -1.69%) [ +0.13% +0.13% +0.00% / -1.69% -1.24% -1.63%] index_add_ strided 3 : Elapsed 0.015 ms (1.539 ms / 100) 1.506 -> 1.472 ( -2.26%) [ +0.27% +0.13% +0.00% / -2.26% -1.99% -2.12%] index_copy_ strided 3 : Elapsed 0.015 ms (1.510 ms / 100) 1.537 -> 1.510 ( -1.76%) [ +0.00% +0.20% +0.13% / -1.76% -1.50% -1.43%] index_add_ strided 7 : Elapsed 0.015 ms (1.537 ms / 100) 1.508 -> 1.474 ( -2.25%) [ +0.00% +0.20% +0.07% / -2.25% -1.66% -1.79%] index_copy_ strided 7 : Elapsed 0.015 ms (1.508 ms / 100) 1.529 -> 1.502 ( -1.77%) [ +0.59% +0.00% +0.20% / -1.77% -0.39% -0.78%] index_add_ perm : Elapsed 0.015 ms (1.538 ms / 100) 1.503 -> 1.473 ( -2.00%) [ +0.00% +0.00% +0.13% / -2.00% -1.13% -1.46%] index_copy_ perm : Elapsed 0.015 ms (1.503 ms / 100) 1.534 -> 1.504 ( -1.96%) [ +0.00% +0.20% +0.20% / -1.96% -1.37% -0.98%] index_add_ perm_sorted : Elapsed 0.015 ms (1.534 ms / 100) 1.502 -> 1.472 ( -2.00%) [ +0.00% +0.47% +0.40% / -2.00% -1.46% -1.33%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.502 ms / 100) 2.869 -> 2.870 ( +0.03%) [ +0.56% +0.00% +0.03% / +0.10% +0.03% +0.21%] index_select const : Elapsed 0.029 ms (2.885 ms / 100) 2.897 -> 2.890 ( -0.24%) [ +0.07% +0.00% +0.07% / +0.07% -0.14% -0.24%] index_select wrap : Elapsed 0.029 ms (2.899 ms / 100) 2.896 -> 2.883 ( -0.45%) [ +0.00% +0.00% +0.00% / -0.14% -0.45% -0.31%] index_select linear : Elapsed 0.029 ms (2.896 ms / 100) 2.897 -> 2.894 ( -0.10%) [ +0.03% +0.00% +0.28% / +0.07% -0.10% +0.10%] index_select reverse : Elapsed 0.029 ms (2.898 ms / 100) 2.870 -> 2.875 ( +0.17%) [ +0.35% +0.21% +0.00% / +0.38% +0.35% +0.17%] index_select skip64 : Elapsed 0.029 ms (2.880 ms / 100) 2.864 -> 2.874 ( +0.35%) [ +0.14% +0.00% +0.17% / +0.35% +0.63% +0.38%] index_select skip256 : Elapsed 0.029 ms (2.868 ms / 100) 2.895 -> 2.881 ( -0.48%) [ +0.00% +0.00% +0.28% / +0.07% -0.38% -0.48%] index_select spread : Elapsed 0.029 ms (2.895 ms / 100) 2.896 -> 2.884 ( -0.41%) [ +0.21% +0.00% +0.03% / +0.21% -0.31% -0.41%] index_select strided 3 : Elapsed 0.029 ms (2.902 ms / 100) 2.878 -> 2.877 ( -0.03%) [ +0.14% +0.38% +0.00% / +0.14% +0.10% -0.03%] index_select strided 5 : Elapsed 0.029 ms (2.882 ms / 100) 2.891 -> 2.885 ( -0.21%) [ +0.24% +0.28% +0.00% / +0.21% -0.10% -0.21%] index_select strided 7 : Elapsed 0.029 ms (2.898 ms / 100) 2.875 -> 2.874 ( -0.03%) [ +0.00% +0.17% +0.10% / +0.14% -0.03% +0.28%] index_select strided 8 : Elapsed 0.029 ms (2.875 ms / 100) 2.877 -> 2.879 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.14% +0.28%] index_select strided 16 : Elapsed 0.029 ms (2.877 ms / 100) 2.895 -> 2.888 ( -0.24%) [ +0.21% +0.00% +0.21% / +0.10% -0.24% -0.17%] index_select random : Elapsed 0.029 ms (2.901 ms / 100) 2.891 -> 2.890 ( -0.03%) [ +0.00% +0.24% +0.10% / +0.14% +0.00% -0.03%] index_select random_sorted : Elapsed 0.029 ms (2.891 ms / 100) B = [16, 5, 4, 40] (stride (200, 1, 3200, 5)) A = [16, 5, 4, 20] (stride (20, 1280, 320, 1)) dim = 3 2.412 -> 2.426 ( +0.58%) [ +0.12% +0.04% +0.00% / +0.58% +0.83% +0.91%] index_add_ linear : Elapsed 0.024 ms (2.415 ms / 100) 2.410 -> 2.425 ( +0.62%) [ +0.00% +0.04% +0.00% / +0.62% +0.83% +0.87%] index_copy_ linear : Elapsed 0.024 ms (2.410 ms / 100) 2.406 -> 2.421 ( +0.62%) [ +0.17% +0.17% +0.00% / +0.62% +1.21% +1.21%] index_add_ reverse : Elapsed 0.024 ms (2.410 ms / 100) 2.403 -> 2.415 ( +0.50%) [ +0.00% +0.04% +0.00% / +0.50% +1.21% +1.29%] index_copy_ reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.426 -> 2.442 ( +0.66%) [ +0.16% +0.00% +0.12% / +0.66% +1.07% +0.99%] index_add_ spread : Elapsed 0.024 ms (2.430 ms / 100) 2.430 -> 2.444 ( +0.58%) [ +0.00% +0.33% +0.25% / +0.58% +1.32% +1.32%] index_copy_ spread : Elapsed 0.024 ms (2.430 ms / 100) 2.428 -> 2.441 ( +0.54%) [ +0.45% +0.25% +0.00% / +0.86% +0.54% +0.66%] index_add_ strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.435 -> 2.452 ( +0.70%) [ +0.00% +0.16% +0.16% / +0.70% +0.82% +0.82%] index_copy_ strided 3 : Elapsed 0.024 ms (2.435 ms / 100) 2.428 -> 2.440 ( +0.49%) [ +0.04% +0.21% +0.00% / +0.62% +0.66% +0.49%] index_add_ strided 7 : Elapsed 0.024 ms (2.429 ms / 100) 2.437 -> 2.450 ( +0.53%) [ +0.00% +0.25% +0.04% / +0.53% +0.78% +0.70%] index_copy_ strided 7 : Elapsed 0.024 ms (2.437 ms / 100) 2.429 -> 2.432 ( +0.12%) [ +0.08% +0.16% +0.00% / +0.66% +0.37% +0.12%] index_add_ perm : Elapsed 0.024 ms (2.431 ms / 100) 2.435 -> 2.440 ( +0.21%) [ +0.12% +0.00% +0.16% / +0.49% +0.21% +0.33%] index_copy_ perm : Elapsed 0.024 ms (2.438 ms / 100) 2.426 -> 2.433 ( +0.29%) [ +0.25% +0.12% +0.00% / +0.78% +0.33% +0.29%] index_add_ perm_sorted : Elapsed 0.024 ms (2.432 ms / 100) 2.433 -> 2.442 ( +0.37%) [ +0.00% +0.04% +0.04% / +0.66% +0.37% +0.45%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.433 ms / 100) 4.437 -> 4.442 ( +0.11%) [ +0.05% +0.00% +0.05% / +0.16% +0.20% +0.11%] index_select const : Elapsed 0.044 ms (4.439 ms / 100) 4.448 -> 4.444 ( -0.09%) [ +0.09% +0.00% +0.02% / +0.02% +0.04% -0.09%] index_select wrap : Elapsed 0.045 ms (4.452 ms / 100) 4.446 -> 4.448 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.09% +0.04% +0.11%] index_select linear : Elapsed 0.044 ms (4.446 ms / 100) 4.448 -> 4.448 ( +0.00%) [ +0.09% +0.27% +0.00% / +0.00% +0.22% +0.20%] index_select reverse : Elapsed 0.045 ms (4.452 ms / 100) 4.440 -> 4.441 ( +0.02%) [ +0.16% +0.23% +0.00% / +0.32% +0.02% +0.02%] index_select skip64 : Elapsed 0.044 ms (4.447 ms / 100) 4.436 -> 4.442 ( +0.14%) [ +0.00% +0.20% +0.11% / +0.14% +0.16% +0.25%] index_select skip256 : Elapsed 0.044 ms (4.436 ms / 100) 4.442 -> 4.441 ( -0.02%) [ +0.00% +0.29% +0.05% / -0.02% +0.23% +0.25%] index_select spread : Elapsed 0.044 ms (4.442 ms / 100) 4.446 -> 4.446 ( +0.00%) [ +0.00% +0.09% +0.02% / +0.00% +0.16% +0.22%] index_select strided 3 : Elapsed 0.044 ms (4.446 ms / 100) 4.448 -> 4.450 ( +0.04%) [ +0.07% +0.20% +0.00% / +0.04% +0.16% +0.04%] index_select strided 5 : Elapsed 0.045 ms (4.451 ms / 100) 4.443 -> 4.450 ( +0.16%) [ +0.00% +0.09% +0.11% / +0.16% +0.29% +0.25%] index_select strided 7 : Elapsed 0.044 ms (4.443 ms / 100) 4.445 -> 4.444 ( -0.02%) [ +0.00% +0.07% +0.00% / -0.02% +0.02% +0.07%] index_select strided 8 : Elapsed 0.044 ms (4.445 ms / 100) 4.448 -> 4.447 ( -0.02%) [ +0.04% +0.02% +0.00% / -0.02% +0.07% +0.04%] index_select strided 16 : Elapsed 0.044 ms (4.450 ms / 100) 4.444 -> 4.452 ( +0.18%) [ +0.07% +0.00% +0.25% / +0.23% +0.25% +0.18%] index_select random : Elapsed 0.044 ms (4.447 ms / 100) 4.448 -> 4.447 ( -0.02%) [ +0.18% +0.02% +0.00% / -0.02% +0.00% +0.09%] index_select random_sorted : Elapsed 0.045 ms (4.456 ms / 100) B = [16, 5, 4, 40] (stride (200, 1, 3200, 5)) A = [16, 5, 4, 20] (stride (1, 320, 1600, 16)) dim = 3 2.455 -> 2.466 ( +0.45%) [ +0.00% +0.08% +0.08% / +0.45% +0.73% +0.81%] index_add_ linear : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.467 ( +0.69%) [ +0.08% +0.08% +0.00% / +0.69% +1.02% +0.90%] index_copy_ linear : Elapsed 0.025 ms (2.452 ms / 100) 2.458 -> 2.468 ( +0.41%) [ +0.08% +0.20% +0.00% / +0.41% +0.69% +0.53%] index_add_ reverse : Elapsed 0.025 ms (2.460 ms / 100) 2.454 -> 2.464 ( +0.41%) [ +0.12% +0.00% +0.16% / +0.41% +0.73% +0.73%] index_copy_ reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.474 -> 2.485 ( +0.44%) [ +0.04% +0.00% +0.12% / +0.61% +0.44% +0.65%] index_add_ spread : Elapsed 0.025 ms (2.475 ms / 100) 2.483 -> 2.496 ( +0.52%) [ +0.08% +0.00% +0.12% / +0.52% +0.72% +0.72%] index_copy_ spread : Elapsed 0.025 ms (2.485 ms / 100) 2.471 -> 2.486 ( +0.61%) [ +0.20% +0.20% +0.00% / +0.61% +0.77% +0.61%] index_add_ strided 3 : Elapsed 0.025 ms (2.476 ms / 100) 2.478 -> 2.492 ( +0.56%) [ +0.00% +0.00% +0.16% / +0.56% +0.73% +0.65%] index_copy_ strided 3 : Elapsed 0.025 ms (2.478 ms / 100) 2.471 -> 2.483 ( +0.49%) [ +0.00% +0.24% +0.16% / +0.61% +0.65% +0.49%] index_add_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.476 -> 2.493 ( +0.69%) [ +0.04% +0.12% +0.00% / +0.81% +0.73% +0.69%] index_copy_ strided 7 : Elapsed 0.025 ms (2.477 ms / 100) 2.465 -> 2.480 ( +0.61%) [ +0.12% +0.24% +0.00% / +0.61% +0.77% +0.61%] index_add_ perm : Elapsed 0.025 ms (2.468 ms / 100) 2.469 -> 2.480 ( +0.45%) [ +0.00% +0.04% +0.00% / +0.45% +0.53% +0.85%] index_copy_ perm : Elapsed 0.025 ms (2.469 ms / 100) 2.465 -> 2.478 ( +0.53%) [ +0.24% +0.16% +0.00% / +0.53% +0.65% +0.85%] index_add_ perm_sorted : Elapsed 0.025 ms (2.471 ms / 100) 2.472 -> 2.480 ( +0.32%) [ +0.00% +0.00% +0.04% / +0.32% +0.69% +0.73%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.472 ms / 100) 4.498 -> 4.495 ( -0.07%) [ +0.07% +0.00% +0.04% / -0.04% -0.07% +0.07%] index_select const : Elapsed 0.045 ms (4.501 ms / 100) 4.498 -> 4.510 ( +0.27%) [ +0.22% +0.00% +0.11% / +0.27% +0.29% +0.40%] index_select wrap : Elapsed 0.045 ms (4.508 ms / 100) 4.507 -> 4.510 ( +0.07%) [ +0.18% +0.00% +0.16% / +0.07% +0.20% +0.16%] index_select linear : Elapsed 0.045 ms (4.515 ms / 100) 4.511 -> 4.510 ( -0.02%) [ +0.04% +0.09% +0.00% / +0.18% +0.04% -0.02%] index_select reverse : Elapsed 0.045 ms (4.513 ms / 100) 4.497 -> 4.492 ( -0.11%) [ +0.13% +0.00% +0.02% / -0.11% +0.00% +0.13%] index_select skip64 : Elapsed 0.045 ms (4.503 ms / 100) 4.491 -> 4.497 ( +0.13%) [ +0.24% +0.00% +0.18% / +0.13% +0.31% +0.47%] index_select skip256 : Elapsed 0.045 ms (4.502 ms / 100) 4.504 -> 4.510 ( +0.13%) [ +0.33% +0.11% +0.00% / +0.13% +0.29% +0.24%] index_select spread : Elapsed 0.045 ms (4.519 ms / 100) 4.503 -> 4.504 ( +0.02%) [ +0.02% +0.00% +0.16% / +0.02% +0.24% +0.18%] index_select strided 3 : Elapsed 0.045 ms (4.504 ms / 100) 4.496 -> 4.504 ( +0.18%) [ +0.00% +0.11% +0.02% / +0.22% +0.18% +0.18%] index_select strided 5 : Elapsed 0.045 ms (4.496 ms / 100) 4.503 -> 4.504 ( +0.02%) [ +0.04% +0.00% +0.00% / +0.02% +0.18% +0.29%] index_select strided 7 : Elapsed 0.045 ms (4.505 ms / 100) 4.492 -> 4.497 ( +0.11%) [ +0.20% +0.18% +0.00% / +0.11% +0.47% +0.36%] index_select strided 8 : Elapsed 0.045 ms (4.501 ms / 100) 4.499 -> 4.500 ( +0.02%) [ +0.07% +0.00% +0.04% / +0.02% +0.33% +0.31%] index_select strided 16 : Elapsed 0.045 ms (4.502 ms / 100) 4.501 -> 4.507 ( +0.13%) [ +0.20% +0.22% +0.00% / +0.13% +0.24% +0.13%] index_select random : Elapsed 0.045 ms (4.510 ms / 100) 4.499 -> 4.509 ( +0.22%) [ +0.20% +0.11% +0.00% / +0.22% +0.42% +0.29%] index_select random_sorted : Elapsed 0.045 ms (4.508 ms / 100) B = [16, 5, 4, 40] (stride (1, 640, 3200, 16)) A = [16, 5, 4, 20] (stride (400, 1, 100, 5)) dim = 3 2.451 -> 2.461 ( +0.41%) [ +0.12% +0.00% +0.16% / +0.41% +0.78% +0.78%] index_add_ linear : Elapsed 0.025 ms (2.454 ms / 100) 2.449 -> 2.458 ( +0.37%) [ +0.00% +0.08% +0.16% / +0.37% +0.82% +0.82%] index_copy_ linear : Elapsed 0.024 ms (2.449 ms / 100) 2.445 -> 2.463 ( +0.74%) [ +0.20% +0.08% +0.00% / +0.74% +1.02% +1.47%] index_add_ reverse : Elapsed 0.024 ms (2.450 ms / 100) 2.440 -> 2.454 ( +0.57%) [ +0.16% +0.25% +0.00% / +0.57% +1.11% +1.68%] index_copy_ reverse : Elapsed 0.024 ms (2.444 ms / 100) 2.445 -> 2.460 ( +0.61%) [ +0.00% +0.16% +0.08% / +0.61% +1.10% +1.35%] index_add_ spread : Elapsed 0.024 ms (2.445 ms / 100) 2.443 -> 2.462 ( +0.78%) [ +0.00% +0.00% +0.12% / +0.78% +1.11% +1.88%] index_copy_ spread : Elapsed 0.024 ms (2.443 ms / 100) 2.456 -> 2.466 ( +0.41%) [ +0.00% +0.12% +0.08% / +0.41% +0.57% +0.69%] index_add_ strided 3 : Elapsed 0.025 ms (2.456 ms / 100) 2.452 -> 2.464 ( +0.49%) [ +0.00% +0.12% +0.00% / +0.49% +0.69% +0.65%] index_copy_ strided 3 : Elapsed 0.025 ms (2.452 ms / 100) 2.455 -> 2.470 ( +0.61%) [ +0.04% +0.04% +0.00% / +0.61% +0.81% +0.61%] index_add_ strided 7 : Elapsed 0.025 ms (2.456 ms / 100) 2.451 -> 2.464 ( +0.53%) [ +0.16% +0.24% +0.00% / +0.53% +0.90% +1.02%] index_copy_ strided 7 : Elapsed 0.025 ms (2.455 ms / 100) 2.459 -> 2.465 ( +0.24%) [ +0.00% +0.04% +0.00% / +0.41% +0.24% +0.33%] index_add_ perm : Elapsed 0.025 ms (2.459 ms / 100) 2.455 -> 2.461 ( +0.24%) [ +0.24% +0.00% +0.04% / +0.37% +0.24% +0.57%] index_copy_ perm : Elapsed 0.025 ms (2.461 ms / 100) 2.456 -> 2.463 ( +0.29%) [ +0.33% +0.08% +0.00% / +0.69% +0.29% +0.29%] index_add_ perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) 2.454 -> 2.459 ( +0.20%) [ +0.24% +0.16% +0.00% / +0.49% +0.37% +0.20%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.460 ms / 100) 4.503 -> 4.506 ( +0.07%) [ +0.04% +0.11% +0.00% / +0.07% +0.07% +0.29%] index_select const : Elapsed 0.045 ms (4.505 ms / 100) 4.505 -> 4.504 ( -0.02%) [ +0.13% +0.00% +0.18% / +0.16% +0.07% -0.02%] index_select wrap : Elapsed 0.045 ms (4.511 ms / 100) 4.513 -> 4.509 ( -0.09%) [ +0.00% +0.02% +0.02% / +0.00% -0.09% +0.00%] index_select linear : Elapsed 0.045 ms (4.513 ms / 100) 4.511 -> 4.510 ( -0.02%) [ +0.00% +0.07% +0.16% / +0.29% -0.02% +0.04%] index_select reverse : Elapsed 0.045 ms (4.511 ms / 100) 4.500 -> 4.497 ( -0.07%) [ +0.04% +0.00% +0.07% / +0.00% -0.07% -0.02%] index_select skip64 : Elapsed 0.045 ms (4.502 ms / 100) 4.500 -> 4.498 ( -0.04%) [ +0.07% +0.00% +0.09% / +0.07% -0.04% +0.02%] index_select skip256 : Elapsed 0.045 ms (4.503 ms / 100) 4.510 -> 4.511 ( +0.02%) [ +0.00% +0.07% +0.09% / +0.02% +0.13% +0.04%] index_select spread : Elapsed 0.045 ms (4.510 ms / 100) 4.509 -> 4.511 ( +0.04%) [ +0.13% +0.16% +0.00% / +0.07% +0.04% +0.07%] index_select strided 3 : Elapsed 0.045 ms (4.515 ms / 100) 4.500 -> 4.499 ( -0.02%) [ +0.00% +0.02% +0.02% / +0.24% -0.02% +0.22%] index_select strided 5 : Elapsed 0.045 ms (4.500 ms / 100) 4.505 -> 4.505 ( +0.00%) [ +0.00% +0.13% +0.13% / +0.00% +0.33% +0.29%] index_select strided 7 : Elapsed 0.045 ms (4.505 ms / 100) 4.506 -> 4.503 ( -0.07%) [ +0.07% +0.00% +0.02% / +0.04% -0.02% -0.07%] index_select strided 8 : Elapsed 0.045 ms (4.509 ms / 100) 4.504 -> 4.506 ( +0.04%) [ +0.07% +0.00% +0.18% / +0.04% +0.07% +0.09%] index_select strided 16 : Elapsed 0.045 ms (4.507 ms / 100) 4.508 -> 4.514 ( +0.13%) [ +0.13% +0.09% +0.00% / +0.13% +0.18% +0.16%] index_select random : Elapsed 0.045 ms (4.514 ms / 100) 4.513 -> 4.507 ( -0.13%) [ +0.00% +0.16% +0.07% / +0.07% -0.13% +0.11%] index_select random_sorted : Elapsed 0.045 ms (4.513 ms / 100) B = [16, 5, 4, 40] (stride (4, 64, 1, 320)) A = [16, 5, 4, 20] (stride (400, 1, 100, 5)) dim = 3 2.445 -> 2.455 ( +0.41%) [ +0.04% +0.04% +0.00% / +0.41% +0.90% +0.61%] index_add_ linear : Elapsed 0.024 ms (2.446 ms / 100) 2.438 -> 2.454 ( +0.66%) [ +0.00% +0.37% +0.16% / +0.66% +1.07% +0.98%] index_copy_ linear : Elapsed 0.024 ms (2.438 ms / 100) 2.449 -> 2.457 ( +0.33%) [ +0.04% +0.00% +0.04% / +0.33% +0.45% +0.45%] index_add_ reverse : Elapsed 0.024 ms (2.450 ms / 100) 2.443 -> 2.459 ( +0.65%) [ +0.00% +0.29% +0.16% / +0.65% +0.65% +0.70%] index_copy_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.448 -> 2.457 ( +0.37%) [ +0.12% +0.16% +0.00% / +0.37% +0.49% +0.57%] index_add_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.447 -> 2.458 ( +0.45%) [ +0.00% +0.12% +0.04% / +0.45% +0.69% +0.53%] index_copy_ spread : Elapsed 0.024 ms (2.447 ms / 100) 2.446 -> 2.456 ( +0.41%) [ +0.33% +0.16% +0.00% / +0.70% +0.53% +0.41%] index_add_ strided 3 : Elapsed 0.025 ms (2.454 ms / 100) 2.443 -> 2.458 ( +0.61%) [ +0.08% +0.16% +0.00% / +0.65% +0.70% +0.61%] index_copy_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.449 -> 2.457 ( +0.33%) [ +0.00% +0.33% +0.04% / +0.61% +0.49% +0.33%] index_add_ strided 7 : Elapsed 0.024 ms (2.449 ms / 100) 2.447 -> 2.458 ( +0.45%) [ +0.04% +0.12% +0.00% / +0.45% +0.45% +0.57%] index_copy_ strided 7 : Elapsed 0.024 ms (2.448 ms / 100) 2.447 -> 2.458 ( +0.45%) [ +0.00% +0.12% +0.12% / +0.45% +0.74% +0.74%] index_add_ perm : Elapsed 0.024 ms (2.447 ms / 100) 2.444 -> 2.457 ( +0.53%) [ +0.00% +0.04% +0.08% / +0.53% +0.70% +0.70%] index_copy_ perm : Elapsed 0.024 ms (2.444 ms / 100) 2.448 -> 2.458 ( +0.41%) [ +0.04% +0.00% +0.08% / +0.41% +0.53% +0.49%] index_add_ perm_sorted : Elapsed 0.024 ms (2.449 ms / 100) 2.443 -> 2.458 ( +0.61%) [ +0.20% +0.00% +0.12% / +0.61% +0.78% +0.78%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.448 ms / 100) 4.488 -> 4.485 ( -0.07%) [ +0.18% +0.07% +0.00% / -0.07% +0.20% +0.29%] index_select const : Elapsed 0.045 ms (4.496 ms / 100) 4.495 -> 4.496 ( +0.02%) [ +0.20% +0.04% +0.00% / +0.02% +0.24% +0.36%] index_select wrap : Elapsed 0.045 ms (4.504 ms / 100) 4.500 -> 4.498 ( -0.04%) [ +0.00% +0.04% +0.13% / -0.04% +0.29% +0.22%] index_select linear : Elapsed 0.045 ms (4.500 ms / 100) 4.501 -> 4.502 ( +0.02%) [ +0.00% +0.04% +0.07% / +0.02% +0.07% +0.18%] index_select reverse : Elapsed 0.045 ms (4.501 ms / 100) 4.490 -> 4.491 ( +0.02%) [ +0.22% +0.00% +0.00% / +0.02% +0.18% +0.24%] index_select skip64 : Elapsed 0.045 ms (4.500 ms / 100) 4.493 -> 4.490 ( -0.07%) [ +0.00% +0.00% +0.02% / -0.07% +0.07% +0.09%] index_select skip256 : Elapsed 0.045 ms (4.493 ms / 100) 4.498 -> 4.502 ( +0.09%) [ +0.09% +0.02% +0.00% / +0.18% +0.27% +0.09%] index_select spread : Elapsed 0.045 ms (4.502 ms / 100) 4.498 -> 4.499 ( +0.02%) [ +0.00% +0.11% +0.04% / +0.02% +0.02% +0.11%] index_select strided 3 : Elapsed 0.045 ms (4.498 ms / 100) 4.488 -> 4.492 ( +0.09%) [ +0.25% +0.22% +0.00% / +0.09% +0.25% +0.22%] index_select strided 5 : Elapsed 0.045 ms (4.499 ms / 100) 4.494 -> 4.498 ( +0.09%) [ +0.00% +0.13% +0.20% / +0.09% +0.20% +0.18%] index_select strided 7 : Elapsed 0.045 ms (4.494 ms / 100) 4.491 -> 4.497 ( +0.13%) [ +0.07% +0.09% +0.00% / +0.13% +0.20% +0.31%] index_select strided 8 : Elapsed 0.045 ms (4.494 ms / 100) 4.488 -> 4.500 ( +0.27%) [ +0.00% +0.09% +0.29% / +0.31% +0.27% +0.29%] index_select strided 16 : Elapsed 0.045 ms (4.488 ms / 100) 4.495 -> 4.504 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.22% +0.20% +0.36%] index_select random : Elapsed 0.045 ms (4.498 ms / 100) 4.498 -> 4.500 ( +0.04%) [ +0.00% +0.04% +0.00% / +0.04% +0.18% +0.09%] index_select random_sorted : Elapsed 0.045 ms (4.498 ms / 100) out_shape = [40, 5, 20, 4] in_shape = [16, 5, 20, 4] idx_dim = 0 B = [40, 5, 20, 4] (stride (400, 80, 1, 20)) A = [16, 5, 20, 4] (stride (80, 1280, 1, 20)) dim = 0 3.936 -> 3.937 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.69% +0.91%] index_add_ linear : Elapsed 0.039 ms (3.937 ms / 100) 3.807 -> 3.809 ( +0.05%) [ +0.00% +0.24% +0.05% / +0.05% +0.63% +0.81%] index_copy_ linear : Elapsed 0.038 ms (3.807 ms / 100) 3.938 -> 3.940 ( +0.05%) [ +0.00% +0.13% +0.08% / +0.05% +0.56% +0.53%] index_add_ reverse : Elapsed 0.039 ms (3.938 ms / 100) 3.812 -> 3.815 ( +0.08%) [ +0.00% +0.18% +0.16% / +0.08% +0.60% +0.58%] index_copy_ reverse : Elapsed 0.038 ms (3.812 ms / 100) 3.938 -> 3.938 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.56% +0.71%] index_add_ spread : Elapsed 0.039 ms (3.939 ms / 100) 3.810 -> 3.812 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.71% +0.84%] index_copy_ spread : Elapsed 0.038 ms (3.812 ms / 100) 3.933 -> 3.934 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.039 ms (3.934 ms / 100) 3.804 -> 3.806 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.58% +0.60%] index_copy_ strided 3 : Elapsed 0.038 ms (3.806 ms / 100) 3.937 -> 3.939 ( +0.05%) [ +0.00% +0.13% +0.13% / +0.05% +0.64% +0.71%] index_add_ strided 7 : Elapsed 0.039 ms (3.937 ms / 100) 3.813 -> 3.815 ( +0.05%) [ +0.00% +0.08% +0.10% / +0.05% +0.66% +0.63%] index_copy_ strided 7 : Elapsed 0.038 ms (3.813 ms / 100) 3.939 -> 3.940 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.51% +0.51%] index_add_ perm : Elapsed 0.039 ms (3.940 ms / 100) 3.810 -> 3.810 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.50% +0.55%] index_copy_ perm : Elapsed 0.038 ms (3.811 ms / 100) 3.940 -> 3.940 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.46% +0.89%] index_add_ perm_sorted : Elapsed 0.039 ms (3.941 ms / 100) 3.808 -> 3.811 ( +0.08%) [ +0.13% +0.11% +0.00% / +0.08% +0.50% +0.79%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.813 ms / 100) 5.556 -> 5.557 ( +0.02%) [ +0.13% +0.05% +0.00% / +0.04% +0.02% +0.27%] index_select const : Elapsed 0.056 ms (5.563 ms / 100) 5.551 -> 5.558 ( +0.13%) [ +0.11% +0.11% +0.00% / +0.13% +0.32% +0.85%] index_select wrap : Elapsed 0.056 ms (5.557 ms / 100) 5.556 -> 5.557 ( +0.02%) [ +0.00% +0.13% +0.16% / +0.02% +0.14% +0.25%] index_select linear : Elapsed 0.056 ms (5.556 ms / 100) 5.566 -> 5.563 ( -0.05%) [ +0.00% +0.04% +0.02% / -0.05% +0.02% +0.02%] index_select reverse : Elapsed 0.056 ms (5.566 ms / 100) 5.559 -> 5.553 ( -0.11%) [ +0.00% +0.09% +0.02% / +0.20% -0.04% -0.11%] index_select skip64 : Elapsed 0.056 ms (5.559 ms / 100) 5.559 -> 5.557 ( -0.04%) [ +0.13% +0.18% +0.00% / +0.07% -0.04% -0.02%] index_select skip256 : Elapsed 0.056 ms (5.566 ms / 100) 5.559 -> 5.559 ( +0.00%) [ +0.27% +0.00% +0.00% / +0.00% +0.07% +0.23%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.553 -> 5.558 ( +0.09%) [ +0.00% +0.07% +0.09% / +0.09% +0.11% +0.16%] index_select strided 3 : Elapsed 0.056 ms (5.553 ms / 100) 5.554 -> 5.562 ( +0.14%) [ +0.13% +0.07% +0.00% / +0.16% +0.14% +0.20%] index_select strided 5 : Elapsed 0.056 ms (5.561 ms / 100) 5.556 -> 5.559 ( +0.05%) [ +0.05% +0.00% +0.14% / +0.05% +0.09% +0.14%] index_select strided 7 : Elapsed 0.056 ms (5.559 ms / 100) 5.548 -> 5.548 ( +0.00%) [ +0.18% +0.23% +0.00% / +0.00% +0.09% +0.23%] index_select strided 8 : Elapsed 0.056 ms (5.558 ms / 100) 5.556 -> 5.558 ( +0.04%) [ +0.04% +0.00% +0.14% / +0.04% +0.13% +0.04%] index_select random : Elapsed 0.056 ms (5.558 ms / 100) 5.559 -> 5.562 ( +0.05%) [ +0.00% +0.09% +0.00% / +0.07% +0.11% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.559 ms / 100) B = [40, 5, 20, 4] (stride (400, 4, 20, 1)) A = [16, 5, 20, 4] (stride (1, 16, 80, 1600)) dim = 0 4.428 -> 4.431 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.63% +0.54%] index_add_ linear : Elapsed 0.044 ms (4.434 ms / 100) 4.258 -> 4.261 ( +0.07%) [ +0.00% +0.02% +0.02% / +0.07% +0.56% +0.56%] index_copy_ linear : Elapsed 0.043 ms (4.258 ms / 100) 4.407 -> 4.407 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.00% +0.79% +0.70%] index_add_ reverse : Elapsed 0.044 ms (4.407 ms / 100) 4.245 -> 4.243 ( -0.05%) [ +0.00% +0.05% +0.00% / -0.05% +0.75% +0.73%] index_copy_ reverse : Elapsed 0.042 ms (4.245 ms / 100) 4.426 -> 4.434 ( +0.18%) [ +0.00% +0.25% +0.05% / +0.18% +0.68% +0.72%] index_add_ spread : Elapsed 0.044 ms (4.426 ms / 100) 4.258 -> 4.258 ( +0.00%) [ +0.00% +0.12% +0.07% / +0.00% +0.56% +0.73%] index_copy_ spread : Elapsed 0.043 ms (4.258 ms / 100) 4.433 -> 4.432 ( -0.02%) [ +0.00% +0.00% +0.20% / -0.02% +0.68% +0.38%] index_add_ strided 3 : Elapsed 0.044 ms (4.433 ms / 100) 4.266 -> 4.273 ( +0.16%) [ +0.00% +0.16% +0.21% / +0.16% +0.70% +0.59%] index_copy_ strided 3 : Elapsed 0.043 ms (4.266 ms / 100) 4.406 -> 4.411 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.89% +0.66%] index_add_ strided 7 : Elapsed 0.044 ms (4.406 ms / 100) 4.244 -> 4.248 ( +0.09%) [ +0.00% +0.07% +0.00% / +0.09% +0.90% +0.75%] index_copy_ strided 7 : Elapsed 0.042 ms (4.244 ms / 100) 4.429 -> 4.429 ( +0.00%) [ +0.05% +0.00% +0.07% / +0.00% +0.72% +0.70%] index_add_ perm : Elapsed 0.044 ms (4.431 ms / 100) 4.257 -> 4.261 ( +0.09%) [ +0.00% +0.02% +0.02% / +0.09% +0.80% +0.70%] index_copy_ perm : Elapsed 0.043 ms (4.257 ms / 100) 4.423 -> 4.432 ( +0.20%) [ +0.34% +0.00% +0.32% / +0.20% +0.88% +0.93%] index_add_ perm_sorted : Elapsed 0.044 ms (4.438 ms / 100) 4.267 -> 4.266 ( -0.02%) [ +0.14% +0.00% +0.14% / -0.02% +0.75% +0.75%] index_copy_ perm_sorted : Elapsed 0.043 ms (4.273 ms / 100) 5.567 -> 5.563 ( -0.07%) [ +0.02% +0.00% +0.07% / +0.05% -0.07% -0.07%] index_select const : Elapsed 0.056 ms (5.568 ms / 100) 5.571 -> 5.559 ( -0.22%) [ +0.18% +0.07% +0.00% / -0.09% -0.16% -0.22%] index_select wrap : Elapsed 0.056 ms (5.581 ms / 100) 5.570 -> 5.562 ( -0.14%) [ +0.14% +0.02% +0.00% / -0.02% -0.14% -0.14%] index_select linear : Elapsed 0.056 ms (5.578 ms / 100) 5.569 -> 5.562 ( -0.13%) [ +0.00% +0.07% +0.11% / +0.07% -0.05% -0.13%] index_select reverse : Elapsed 0.056 ms (5.569 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.09% +0.00% +0.14% / +0.04% +0.00% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.566 ms / 100) 5.559 -> 5.564 ( +0.09%) [ +0.00% +0.05% +0.22% / +0.09% +0.16% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.566 -> 5.567 ( +0.02%) [ +0.04% +0.00% +0.02% / +0.05% +0.02% +0.04%] index_select spread : Elapsed 0.056 ms (5.568 ms / 100) 5.562 -> 5.563 ( +0.02%) [ +0.00% +0.14% +0.16% / +0.09% +0.02% +0.09%] index_select strided 3 : Elapsed 0.056 ms (5.562 ms / 100) 5.563 -> 5.565 ( +0.04%) [ +0.00% +0.11% +0.02% / +0.04% +0.04% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.563 ms / 100) 5.567 -> 5.569 ( +0.04%) [ +0.00% +0.05% +0.07% / +0.04% +0.09% +0.05%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.566 -> 5.561 ( -0.09%) [ +0.00% +0.11% +0.00% / -0.09% +0.11% +0.07%] index_select strided 8 : Elapsed 0.056 ms (5.566 ms / 100) 5.569 -> 5.570 ( +0.02%) [ +0.00% +0.02% +0.07% / +0.02% +0.05% +0.11%] index_select random : Elapsed 0.056 ms (5.569 ms / 100) 5.568 -> 5.570 ( +0.04%) [ +0.00% +0.05% +0.00% / +0.14% +0.04% +0.09%] index_select random_sorted : Elapsed 0.056 ms (5.568 ms / 100) B = [40, 5, 20, 4] (stride (400, 1, 20, 5)) A = [16, 5, 20, 4] (stride (80, 1280, 1, 20)) dim = 0 3.633 -> 3.633 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.52% +0.55%] index_add_ linear : Elapsed 0.036 ms (3.633 ms / 100) 3.503 -> 3.505 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.71% +0.80%] index_copy_ linear : Elapsed 0.035 ms (3.505 ms / 100) 3.637 -> 3.638 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.63% +0.69%] index_add_ reverse : Elapsed 0.036 ms (3.640 ms / 100) 3.505 -> 3.509 ( +0.11%) [ +0.11% +0.17% +0.00% / +0.11% +0.77% +0.77%] index_copy_ reverse : Elapsed 0.035 ms (3.509 ms / 100) 3.636 -> 3.639 ( +0.08%) [ +0.00% +0.06% +0.03% / +0.08% +0.52% +0.58%] index_add_ spread : Elapsed 0.036 ms (3.636 ms / 100) 3.508 -> 3.510 ( +0.06%) [ +0.00% +0.06% +0.03% / +0.06% +0.71% +0.74%] index_copy_ spread : Elapsed 0.035 ms (3.508 ms / 100) 3.631 -> 3.631 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.52% +0.50%] index_add_ strided 3 : Elapsed 0.036 ms (3.631 ms / 100) 3.501 -> 3.502 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.69% +0.83%] index_copy_ strided 3 : Elapsed 0.035 ms (3.501 ms / 100) 3.640 -> 3.639 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.55% +0.58%] index_add_ strided 7 : Elapsed 0.036 ms (3.640 ms / 100) 3.508 -> 3.510 ( +0.06%) [ +0.00% +0.09% +0.17% / +0.06% +0.68% +0.66%] index_copy_ strided 7 : Elapsed 0.035 ms (3.508 ms / 100) 3.634 -> 3.635 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.50% +0.52%] index_add_ perm : Elapsed 0.036 ms (3.635 ms / 100) 3.505 -> 3.508 ( +0.09%) [ +0.00% +0.03% +0.03% / +0.09% +0.63% +0.71%] index_copy_ perm : Elapsed 0.035 ms (3.505 ms / 100) 3.636 -> 3.634 ( -0.06%) [ +0.00% +0.03% +0.00% / -0.06% +0.36% +0.36%] index_add_ perm_sorted : Elapsed 0.036 ms (3.636 ms / 100) 3.506 -> 3.506 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.60% +0.60%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.508 ms / 100) 5.464 -> 5.459 ( -0.09%) [ +0.05% +0.04% +0.00% / -0.09% +0.16% +0.16%] index_select const : Elapsed 0.055 ms (5.467 ms / 100) 5.471 -> 5.472 ( +0.02%) [ +0.02% +0.07% +0.00% / +0.02% +0.22% +0.29%] index_select wrap : Elapsed 0.055 ms (5.472 ms / 100) 5.465 -> 5.472 ( +0.13%) [ +0.18% +0.00% +0.05% / +0.20% +0.13% +0.24%] index_select linear : Elapsed 0.055 ms (5.475 ms / 100) 5.468 -> 5.470 ( +0.04%) [ +0.13% +0.05% +0.00% / +0.04% +0.13% +0.20%] index_select reverse : Elapsed 0.055 ms (5.475 ms / 100) 5.466 -> 5.462 ( -0.07%) [ +0.00% +0.09% +0.00% / -0.05% +0.09% -0.07%] index_select skip64 : Elapsed 0.055 ms (5.466 ms / 100) 5.463 -> 5.464 ( +0.02%) [ +0.15% +0.00% +0.13% / +0.09% +0.13% +0.02%] index_select skip256 : Elapsed 0.055 ms (5.471 ms / 100) 5.470 -> 5.472 ( +0.04%) [ +0.00% +0.00% +0.11% / +0.04% +0.07% +0.15%] index_select spread : Elapsed 0.055 ms (5.470 ms / 100) 5.463 -> 5.471 ( +0.15%) [ +0.20% +0.05% +0.00% / +0.16% +0.15% +0.15%] index_select strided 3 : Elapsed 0.055 ms (5.474 ms / 100) 5.467 -> 5.467 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.35% +0.16%] index_select strided 5 : Elapsed 0.055 ms (5.468 ms / 100) 5.468 -> 5.469 ( +0.02%) [ +0.00% +0.18% +0.15% / +0.02% +0.18% +0.20%] index_select strided 7 : Elapsed 0.055 ms (5.468 ms / 100) 5.459 -> 5.459 ( +0.00%) [ +0.00% +0.22% +0.09% / +0.00% +0.37% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.459 ms / 100) 5.469 -> 5.467 ( -0.04%) [ +0.07% +0.13% +0.00% / +0.05% -0.04% +0.02%] index_select random : Elapsed 0.055 ms (5.473 ms / 100) 5.468 -> 5.471 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.20% +0.20%] index_select random_sorted : Elapsed 0.055 ms (5.472 ms / 100) B = [40, 5, 20, 4] (stride (400, 1, 5, 100)) A = [16, 5, 20, 4] (stride (400, 1, 5, 100)) dim = 0 4.041 -> 4.048 ( +0.17%) [ +0.02% +0.05% +0.00% / +0.17% +0.94% +0.99%] index_add_ linear : Elapsed 0.040 ms (4.042 ms / 100) 3.915 -> 3.922 ( +0.18%) [ +0.00% +0.15% +0.18% / +0.18% +0.87% +0.87%] index_copy_ linear : Elapsed 0.039 ms (3.915 ms / 100) 4.060 -> 4.060 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.64% +0.64%] index_add_ reverse : Elapsed 0.041 ms (4.060 ms / 100) 3.920 -> 3.921 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.59% +0.69%] index_copy_ reverse : Elapsed 0.039 ms (3.921 ms / 100) 4.049 -> 4.047 ( -0.05%) [ +0.10% +0.02% +0.00% / -0.05% +0.77% +0.79%] index_add_ spread : Elapsed 0.041 ms (4.053 ms / 100) 3.916 -> 3.924 ( +0.20%) [ +0.00% +0.08% +0.20% / +0.20% +0.72% +0.72%] index_copy_ spread : Elapsed 0.039 ms (3.916 ms / 100) 4.053 -> 4.054 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.59% +0.67%] index_add_ strided 3 : Elapsed 0.041 ms (4.055 ms / 100) 3.915 -> 3.921 ( +0.15%) [ +0.00% +0.05% +0.18% / +0.15% +0.74% +0.69%] index_copy_ strided 3 : Elapsed 0.039 ms (3.915 ms / 100) 4.055 -> 4.058 ( +0.07%) [ +0.10% +0.00% +0.07% / +0.07% +0.84% +0.81%] index_add_ strided 7 : Elapsed 0.041 ms (4.059 ms / 100) 3.920 -> 3.924 ( +0.10%) [ +0.00% +0.03% +0.00% / +0.10% +0.74% +0.77%] index_copy_ strided 7 : Elapsed 0.039 ms (3.920 ms / 100) 4.043 -> 4.046 ( +0.07%) [ +0.15% +0.02% +0.00% / +0.07% +0.72% +0.69%] index_add_ perm : Elapsed 0.040 ms (4.049 ms / 100) 3.917 -> 3.923 ( +0.15%) [ +0.05% +0.00% +0.08% / +0.15% +0.71% +0.66%] index_copy_ perm : Elapsed 0.039 ms (3.919 ms / 100) 4.050 -> 4.049 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.77% +0.79%] index_add_ perm_sorted : Elapsed 0.041 ms (4.050 ms / 100) 3.917 -> 3.922 ( +0.13%) [ +0.13% +0.15% +0.00% / +0.13% +0.61% +0.59%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.922 ms / 100) 5.565 -> 5.563 ( -0.04%) [ +0.07% +0.04% +0.00% / +0.14% -0.04% -0.04%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.567 -> 5.565 ( -0.04%) [ +0.18% +0.07% +0.00% / +0.11% -0.04% +0.02%] index_select wrap : Elapsed 0.056 ms (5.577 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.04% +0.00% +0.04% / -0.02% +0.04% +0.09%] index_select linear : Elapsed 0.056 ms (5.568 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.00% +0.18% +0.13% / +0.14% +0.05% +0.16%] index_select reverse : Elapsed 0.056 ms (5.563 ms / 100) 5.557 -> 5.563 ( +0.11%) [ +0.00% +0.13% +0.00% / +0.13% +0.16% +0.11%] index_select skip64 : Elapsed 0.056 ms (5.557 ms / 100) 5.557 -> 5.563 ( +0.11%) [ +0.00% +0.04% +0.16% / +0.11% +0.16% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.557 ms / 100) 5.569 -> 5.562 ( -0.13%) [ +0.07% +0.00% +0.04% / +0.02% -0.13% -0.09%] index_select spread : Elapsed 0.056 ms (5.573 ms / 100) 5.563 -> 5.558 ( -0.09%) [ +0.22% +0.14% +0.00% / +0.11% +0.09% -0.09%] index_select strided 3 : Elapsed 0.056 ms (5.575 ms / 100) 5.564 -> 5.557 ( -0.13%) [ +0.14% +0.00% +0.02% / +0.09% -0.13% +0.02%] index_select strided 5 : Elapsed 0.056 ms (5.572 ms / 100) 5.565 -> 5.559 ( -0.11%) [ +0.02% +0.00% +0.00% / -0.11% +0.22% -0.02%] index_select strided 7 : Elapsed 0.056 ms (5.566 ms / 100) 5.557 -> 5.568 ( +0.20%) [ +0.00% +0.13% +0.13% / +0.20% +0.29% +0.25%] index_select strided 8 : Elapsed 0.056 ms (5.557 ms / 100) 5.568 -> 5.557 ( -0.20%) [ +0.00% +0.13% +0.05% / +0.09% -0.20% -0.18%] index_select random : Elapsed 0.056 ms (5.568 ms / 100) 5.569 -> 5.563 ( -0.11%) [ +0.00% +0.09% +0.09% / +0.04% -0.11% -0.05%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [40, 5, 20, 4] (stride (5, 1, 800, 200)) A = [16, 5, 20, 4] (stride (400, 1, 5, 100)) dim = 0 3.750 -> 3.757 ( +0.19%) [ +0.00% +0.19% +0.16% / +0.19% +0.64% +0.64%] index_add_ linear : Elapsed 0.037 ms (3.750 ms / 100) 3.618 -> 3.619 ( +0.03%) [ +0.17% +0.06% +0.00% / +0.03% +0.83% +0.83%] index_copy_ linear : Elapsed 0.036 ms (3.624 ms / 100) 3.750 -> 3.751 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.48% +0.59%] index_add_ reverse : Elapsed 0.038 ms (3.750 ms / 100) 3.612 -> 3.613 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.53% +0.66%] index_copy_ reverse : Elapsed 0.036 ms (3.614 ms / 100) 3.749 -> 3.752 ( +0.08%) [ +0.08% +0.11% +0.00% / +0.08% +0.48% +0.61%] index_add_ spread : Elapsed 0.038 ms (3.752 ms / 100) 3.612 -> 3.617 ( +0.14%) [ +0.17% +0.11% +0.00% / +0.14% +0.53% +0.61%] index_copy_ spread : Elapsed 0.036 ms (3.618 ms / 100) 3.743 -> 3.744 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.43% +0.45%] index_add_ strided 3 : Elapsed 0.037 ms (3.744 ms / 100) 3.608 -> 3.611 ( +0.08%) [ +0.06% +0.08% +0.00% / +0.08% +0.42% +0.58%] index_copy_ strided 3 : Elapsed 0.036 ms (3.610 ms / 100) 3.750 -> 3.753 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.48% +0.51%] index_add_ strided 7 : Elapsed 0.038 ms (3.752 ms / 100) 3.614 -> 3.618 ( +0.11%) [ +0.06% +0.08% +0.00% / +0.11% +0.55% +0.55%] index_copy_ strided 7 : Elapsed 0.036 ms (3.616 ms / 100) 3.755 -> 3.757 ( +0.05%) [ +0.08% +0.03% +0.00% / +0.05% +0.53% +0.53%] index_add_ perm : Elapsed 0.038 ms (3.758 ms / 100) 3.629 -> 3.633 ( +0.11%) [ +0.00% +0.06% +0.03% / +0.11% +0.44% +0.50%] index_copy_ perm : Elapsed 0.036 ms (3.629 ms / 100) 3.760 -> 3.760 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.40% +0.32%] index_add_ perm_sorted : Elapsed 0.038 ms (3.761 ms / 100) 3.629 -> 3.631 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.25% +0.44%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.630 ms / 100) 5.473 -> 5.470 ( -0.05%) [ +0.02% +0.02% +0.00% / -0.05% +0.11% +0.20%] index_select const : Elapsed 0.055 ms (5.474 ms / 100) 5.471 -> 5.472 ( +0.02%) [ +0.22% +0.00% +0.02% / +0.02% +0.15% +0.24%] index_select wrap : Elapsed 0.055 ms (5.483 ms / 100) 5.475 -> 5.476 ( +0.02%) [ +0.13% +0.04% +0.00% / +0.02% +0.02% +0.20%] index_select linear : Elapsed 0.055 ms (5.482 ms / 100) 5.469 -> 5.478 ( +0.16%) [ +0.00% +0.22% +0.22% / +0.16% +0.27% +0.18%] index_select reverse : Elapsed 0.055 ms (5.469 ms / 100) 5.473 -> 5.474 ( +0.02%) [ +0.07% +0.00% +0.22% / +0.13% +0.07% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.477 ms / 100) 5.476 -> 5.474 ( -0.04%) [ +0.05% +0.00% +0.15% / +0.09% -0.04% -0.02%] index_select skip256 : Elapsed 0.055 ms (5.479 ms / 100) 5.468 -> 5.473 ( +0.09%) [ +0.18% +0.20% +0.00% / +0.09% +0.11% +0.29%] index_select spread : Elapsed 0.055 ms (5.478 ms / 100) 5.471 -> 5.471 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.15% +0.24%] index_select strided 3 : Elapsed 0.055 ms (5.473 ms / 100) 5.475 -> 5.471 ( -0.07%) [ +0.05% +0.00% +0.00% / -0.07% +0.13% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.478 ms / 100) 5.473 -> 5.479 ( +0.11%) [ +0.00% +0.16% +0.09% / +0.11% +0.15% +0.22%] index_select strided 7 : Elapsed 0.055 ms (5.473 ms / 100) 5.472 -> 5.474 ( +0.04%) [ +0.00% +0.15% +0.00% / +0.04% +0.05% +0.05%] index_select strided 8 : Elapsed 0.055 ms (5.472 ms / 100) 5.473 -> 5.471 ( -0.04%) [ +0.00% +0.13% +0.11% / -0.04% -0.02% +0.00%] index_select random : Elapsed 0.055 ms (5.473 ms / 100) 5.473 -> 5.471 ( -0.04%) [ +0.07% +0.02% +0.00% / -0.04% +0.15% +0.13%] index_select random_sorted : Elapsed 0.055 ms (5.477 ms / 100) out_shape = [16, 40, 20, 4] in_shape = [16, 5, 20, 4] idx_dim = 1 B = [16, 40, 20, 4] (stride (3200, 80, 4, 1)) A = [16, 5, 20, 4] (stride (100, 20, 1, 1600)) dim = 1 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.49% +0.63%] index_add_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.15% +0.00% +0.07% / +0.15% +0.36% +0.36%] index_copy_ linear : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.424 ( +0.21%) [ +0.14% +0.00% +0.00% / +0.21% +0.56% +0.56%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.29% +0.07% +0.00% / +0.07% +0.29% +0.29%] index_copy_ reverse : Elapsed 0.014 ms (1.383 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.49% +0.63%] index_add_ spread : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.36% +0.36%] index_copy_ spread : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.63%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.36% +0.44%] index_copy_ strided 3 : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.427 ( +0.35%) [ +0.00% +0.07% +0.00% / +0.35% +0.56% +0.63%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.07% +0.00% +0.00% / +0.44% +0.29% +0.65%] index_copy_ strided 7 : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.63% +0.56%] index_add_ perm : Elapsed 0.014 ms (1.423 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.22% +0.07% +0.00% / +0.07% +0.44% +0.36%] index_copy_ perm : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.63% +0.49%] index_add_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.07% +0.22% +0.00% / +0.07% +0.36% +0.36%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 8.193 -> 8.189 ( -0.05%) [ +0.20% +0.00% +0.20% / -0.05% +0.29% +0.41%] index_select const : Elapsed 0.082 ms (8.209 ms / 100) 8.202 -> 8.225 ( +0.28%) [ +0.00% +0.04% +0.11% / +0.28% +0.40% +0.29%] index_select wrap : Elapsed 0.082 ms (8.202 ms / 100) 8.213 -> 8.218 ( +0.06%) [ +0.33% +0.00% +0.16% / +0.06% +0.19% +0.22%] index_select linear : Elapsed 0.082 ms (8.240 ms / 100) 8.216 -> 8.224 ( +0.10%) [ +0.00% +0.15% +0.24% / +0.10% +0.16% +0.32%] index_select reverse : Elapsed 0.082 ms (8.216 ms / 100) 8.197 -> 8.207 ( +0.12%) [ +0.01% +0.00% +0.06% / +0.16% +0.12% +0.28%] index_select skip64 : Elapsed 0.082 ms (8.198 ms / 100) 8.185 -> 8.196 ( +0.13%) [ +0.20% +0.38% +0.00% / +0.13% +0.48% +0.40%] index_select skip256 : Elapsed 0.082 ms (8.201 ms / 100) 8.218 -> 8.221 ( +0.04%) [ +0.00% +0.27% +0.10% / +0.04% +0.49% +0.17%] index_select spread : Elapsed 0.082 ms (8.218 ms / 100) 8.219 -> 8.213 ( -0.07%) [ +0.05% +0.00% +0.16% / -0.01% +0.16% -0.07%] index_select strided 3 : Elapsed 0.082 ms (8.223 ms / 100) 8.211 -> 8.223 ( +0.15%) [ +0.40% +0.11% +0.00% / +0.29% +0.15% +0.32%] index_select random : Elapsed 0.082 ms (8.244 ms / 100) 8.220 -> 8.226 ( +0.07%) [ +0.23% +0.10% +0.00% / +0.10% +0.28% +0.07%] index_select random_sorted : Elapsed 0.082 ms (8.239 ms / 100) B = [16, 40, 20, 4] (stride (3200, 1, 40, 800)) A = [16, 5, 20, 4] (stride (20, 4, 320, 1)) dim = 1 1.561 -> 1.559 ( -0.13%) [ +1.09% +0.00% +0.26% / -0.13% +0.77% +0.96%] index_add_ linear : Elapsed 0.016 ms (1.578 ms / 100) 1.512 -> 1.512 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.53% +0.53%] index_copy_ linear : Elapsed 0.015 ms (1.513 ms / 100) 1.547 -> 1.550 ( +0.19%) [ +0.19% +0.00% +0.13% / +0.19% +1.94% +1.10%] index_add_ reverse : Elapsed 0.015 ms (1.550 ms / 100) 1.502 -> 1.502 ( +0.00%) [ +0.40% +0.00% +0.33% / +0.00% +0.67% +0.67%] index_copy_ reverse : Elapsed 0.015 ms (1.508 ms / 100) 1.534 -> 1.534 ( +0.00%) [ +0.46% +0.00% +0.13% / +0.00% +1.04% +0.91%] index_add_ spread : Elapsed 0.015 ms (1.541 ms / 100) 1.491 -> 1.488 ( -0.20%) [ +0.40% +0.00% +0.00% / -0.20% +0.94% +0.60%] index_copy_ spread : Elapsed 0.015 ms (1.497 ms / 100) 1.557 -> 1.577 ( +1.28%) [ +1.03% +0.13% +0.00% / +1.35% +1.28% +1.35%] index_add_ strided 3 : Elapsed 0.016 ms (1.573 ms / 100) 1.512 -> 1.512 ( +0.00%) [ +0.00% +0.00% +0.26% / +0.00% +0.60% +0.93%] index_copy_ strided 3 : Elapsed 0.015 ms (1.512 ms / 100) 1.543 -> 1.550 ( +0.45%) [ +0.00% +0.13% +0.26% / +0.45% +1.62% +1.43%] index_add_ strided 7 : Elapsed 0.015 ms (1.543 ms / 100) 1.494 -> 1.499 ( +0.33%) [ +0.00% +0.20% +0.27% / +0.33% +0.60% +1.14%] index_copy_ strided 7 : Elapsed 0.015 ms (1.494 ms / 100) 1.545 -> 1.548 ( +0.19%) [ +0.00% +0.19% +0.00% / +0.19% +0.84% +0.97%] index_add_ perm : Elapsed 0.015 ms (1.545 ms / 100) 1.498 -> 1.498 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.73% +0.93%] index_copy_ perm : Elapsed 0.015 ms (1.499 ms / 100) 1.566 -> 1.572 ( +0.38%) [ +0.57% +0.00% +0.77% / +0.38% +0.64% +0.89%] index_add_ perm_sorted : Elapsed 0.016 ms (1.575 ms / 100) 1.510 -> 1.511 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.79% +0.79%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.512 ms / 100) 8.574 -> 8.599 ( +0.29%) [ +0.19% +0.00% +0.21% / +0.29% +0.31% +0.40%] index_select const : Elapsed 0.086 ms (8.590 ms / 100) 8.591 -> 8.608 ( +0.20%) [ +0.00% +0.12% +0.07% / +0.35% +0.26% +0.20%] index_select wrap : Elapsed 0.086 ms (8.591 ms / 100) 8.596 -> 8.604 ( +0.09%) [ +0.07% +0.00% +0.13% / +0.17% +0.45% +0.09%] index_select linear : Elapsed 0.086 ms (8.602 ms / 100) 8.587 -> 8.587 ( +0.00%) [ +0.08% +0.00% +0.05% / +0.00% +0.36% +0.17%] index_select reverse : Elapsed 0.086 ms (8.594 ms / 100) 8.577 -> 8.590 ( +0.15%) [ +0.12% +0.00% +0.23% / +0.27% +0.15% +0.23%] index_select skip64 : Elapsed 0.086 ms (8.587 ms / 100) 8.587 -> 8.592 ( +0.06%) [ +0.21% +0.00% +0.33% / +0.08% +0.06% +0.36%] index_select skip256 : Elapsed 0.086 ms (8.605 ms / 100) 8.590 -> 8.600 ( +0.12%) [ +0.22% +0.00% +0.30% / +0.12% +0.23% +0.22%] index_select spread : Elapsed 0.086 ms (8.609 ms / 100) 8.582 -> 8.575 ( -0.08%) [ +0.17% +0.00% +0.21% / -0.08% +0.34% +0.33%] index_select strided 3 : Elapsed 0.086 ms (8.597 ms / 100) 8.600 -> 8.591 ( -0.10%) [ +0.00% +0.05% +0.07% / -0.10% +0.44% +0.13%] index_select random : Elapsed 0.086 ms (8.600 ms / 100) 8.605 -> 8.597 ( -0.09%) [ +0.06% +0.00% +0.01% / -0.09% +0.14% +0.60%] index_select random_sorted : Elapsed 0.086 ms (8.610 ms / 100) B = [16, 40, 20, 4] (stride (160, 4, 2560, 1)) A = [16, 5, 20, 4] (stride (4, 64, 320, 1)) dim = 1 1.520 -> 1.520 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.53% +0.53%] index_add_ linear : Elapsed 0.015 ms (1.521 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.47% +0.47%] index_copy_ linear : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.520 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.53% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.520 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.41% +0.54%] index_copy_ reverse : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.520 ( +0.07%) [ +0.20% +0.13% +0.00% / +0.07% +0.53% +0.79%] index_add_ spread : Elapsed 0.015 ms (1.522 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.47% +1.36%] index_copy_ spread : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.521 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.53% +0.86%] index_add_ strided 3 : Elapsed 0.015 ms (1.521 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.54% +1.02%] index_copy_ strided 3 : Elapsed 0.015 ms (1.474 ms / 100) 1.519 -> 1.520 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.53% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.521 ms / 100) 1.475 -> 1.474 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.47% +0.47%] index_copy_ strided 7 : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.520 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.59% +0.53%] index_add_ perm : Elapsed 0.015 ms (1.520 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.54% +0.54%] index_copy_ perm : Elapsed 0.015 ms (1.474 ms / 100) 1.519 -> 1.520 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.520 ms / 100) 1.474 -> 1.477 ( +0.20%) [ +0.07% +0.07% +0.00% / +0.20% +0.54% +0.81%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.475 ms / 100) 8.534 -> 8.525 ( -0.11%) [ +0.12% +0.05% +0.00% / -0.11% +0.04% +0.45%] index_select const : Elapsed 0.085 ms (8.544 ms / 100) 8.549 -> 8.539 ( -0.12%) [ +0.05% +0.13% +0.00% / -0.12% -0.09% +0.13%] index_select wrap : Elapsed 0.086 ms (8.553 ms / 100) 8.551 -> 8.546 ( -0.06%) [ +0.00% +0.21% +0.23% / -0.06% +0.40% +0.27%] index_select linear : Elapsed 0.086 ms (8.551 ms / 100) 8.542 -> 8.546 ( +0.05%) [ +0.44% +0.04% +0.00% / +0.05% +0.35% +0.20%] index_select reverse : Elapsed 0.086 ms (8.580 ms / 100) 8.516 -> 8.526 ( +0.12%) [ +0.00% +0.14% +0.13% / +0.22% +0.12% +0.38%] index_select skip64 : Elapsed 0.085 ms (8.516 ms / 100) 8.516 -> 8.524 ( +0.09%) [ +0.09% +0.00% +0.32% / +0.25% +0.09% +0.18%] index_select skip256 : Elapsed 0.085 ms (8.524 ms / 100) 8.553 -> 8.568 ( +0.18%) [ +0.13% +0.13% +0.00% / +0.29% +0.25% +0.18%] index_select spread : Elapsed 0.086 ms (8.564 ms / 100) 8.552 -> 8.556 ( +0.05%) [ +0.16% +0.11% +0.00% / +0.05% +0.28% +0.29%] index_select strided 3 : Elapsed 0.086 ms (8.566 ms / 100) 8.572 -> 8.568 ( -0.05%) [ +0.00% +0.12% +0.00% / -0.05% -0.03% +0.01%] index_select random : Elapsed 0.086 ms (8.572 ms / 100) 8.555 -> 8.554 ( -0.01%) [ +0.09% +0.05% +0.00% / -0.01% +0.16% +0.05%] index_select random_sorted : Elapsed 0.086 ms (8.563 ms / 100) B = [16, 40, 20, 4] (stride (4, 64, 2560, 1)) A = [16, 5, 20, 4] (stride (4, 64, 320, 1)) dim = 1 1.518 -> 1.519 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.66% +0.72%] index_add_ linear : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.68% +0.95%] index_copy_ linear : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.72% +0.79%] index_add_ reverse : Elapsed 0.015 ms (1.518 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.07% +0.27% +0.00% / +0.07% +0.68% +0.95%] index_copy_ reverse : Elapsed 0.015 ms (1.473 ms / 100) 1.519 -> 1.519 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.59% +0.59%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.472 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.61% +0.61%] index_copy_ spread : Elapsed 0.015 ms (1.473 ms / 100) 1.519 -> 1.518 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.59% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.015 ms (1.473 ms / 100) 1.519 -> 1.518 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.53% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.472 ( -0.07%) [ +0.00% +0.00% +0.14% / -0.07% +0.61% +0.61%] index_copy_ strided 7 : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.72% +0.79%] index_add_ perm : Elapsed 0.015 ms (1.518 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.68% +1.02%] index_copy_ perm : Elapsed 0.015 ms (1.472 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.66% +0.79%] index_add_ perm_sorted : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.75% +0.82%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.472 ms / 100) 8.517 -> 8.540 ( +0.27%) [ +0.43% +0.00% +0.16% / +0.31% +0.27% +0.28%] index_select const : Elapsed 0.086 ms (8.554 ms / 100) 8.545 -> 8.542 ( -0.04%) [ +0.02% +0.21% +0.00% / -0.04% +0.02% +0.42%] index_select wrap : Elapsed 0.085 ms (8.547 ms / 100) 8.546 -> 8.551 ( +0.06%) [ +0.00% +0.07% +0.20% / +0.06% +0.35% +0.18%] index_select linear : Elapsed 0.085 ms (8.546 ms / 100) 8.536 -> 8.553 ( +0.20%) [ +0.21% +0.00% +0.08% / +0.26% +0.45% +0.20%] index_select reverse : Elapsed 0.086 ms (8.554 ms / 100) 8.527 -> 8.530 ( +0.04%) [ +0.12% +0.13% +0.00% / +0.11% +0.04% +0.19%] index_select skip64 : Elapsed 0.085 ms (8.537 ms / 100) 8.525 -> 8.529 ( +0.05%) [ +0.29% +0.00% +0.09% / +0.05% +0.09% +0.39%] index_select skip256 : Elapsed 0.086 ms (8.550 ms / 100) 8.536 -> 8.544 ( +0.09%) [ +0.15% +0.26% +0.00% / +0.18% +0.09% +0.28%] index_select spread : Elapsed 0.085 ms (8.549 ms / 100) 8.537 -> 8.543 ( +0.07%) [ +0.00% +0.13% +0.23% / +0.07% +0.33% +0.32%] index_select strided 3 : Elapsed 0.085 ms (8.537 ms / 100) 8.548 -> 8.548 ( +0.00%) [ +0.00% +0.20% +0.02% / +0.00% +0.28% +0.28%] index_select random : Elapsed 0.085 ms (8.548 ms / 100) 8.541 -> 8.550 ( +0.11%) [ +0.00% +0.06% +0.36% / +0.11% +0.36% +0.32%] index_select random_sorted : Elapsed 0.085 ms (8.541 ms / 100) B = [16, 40, 20, 4] (stride (800, 20, 1, 12800)) A = [16, 5, 20, 4] (stride (1, 1280, 16, 320)) dim = 1 1.576 -> 1.577 ( +0.06%) [ +0.06% +0.19% +0.00% / +0.06% +0.44% +0.63%] index_add_ linear : Elapsed 0.016 ms (1.577 ms / 100) 1.528 -> 1.530 ( +0.13%) [ +0.00% +0.07% +0.07% / +0.13% +0.46% +0.46%] index_copy_ linear : Elapsed 0.015 ms (1.528 ms / 100) 1.584 -> 1.583 ( -0.06%) [ +0.00% +0.19% +0.06% / -0.06% +0.44% +0.44%] index_add_ reverse : Elapsed 0.016 ms (1.584 ms / 100) 1.534 -> 1.534 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.52% +0.46%] index_copy_ reverse : Elapsed 0.015 ms (1.534 ms / 100) 1.580 -> 1.582 ( +0.13%) [ +0.06% +0.13% +0.00% / +0.13% +0.51% +0.44%] index_add_ spread : Elapsed 0.016 ms (1.581 ms / 100) 1.534 -> 1.536 ( +0.13%) [ +0.20% +0.13% +0.00% / +0.13% +0.59% +0.52%] index_copy_ spread : Elapsed 0.015 ms (1.537 ms / 100) 1.580 -> 1.579 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.57% +0.51%] index_add_ strided 3 : Elapsed 0.016 ms (1.581 ms / 100) 1.535 -> 1.534 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.52% +0.85%] index_copy_ strided 3 : Elapsed 0.015 ms (1.535 ms / 100) 1.584 -> 1.584 ( +0.00%) [ +0.19% +0.06% +0.00% / +0.00% +0.44% +0.57%] index_add_ strided 7 : Elapsed 0.016 ms (1.587 ms / 100) 1.533 -> 1.534 ( +0.07%) [ +0.13% +0.00% +0.07% / +0.07% +0.46% +0.72%] index_copy_ strided 7 : Elapsed 0.015 ms (1.535 ms / 100) 1.575 -> 1.577 ( +0.13%) [ +0.06% +0.13% +0.00% / +0.13% +0.57% +0.63%] index_add_ perm : Elapsed 0.016 ms (1.576 ms / 100) 1.528 -> 1.528 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.52% +0.46%] index_copy_ perm : Elapsed 0.015 ms (1.528 ms / 100) 1.581 -> 1.581 ( +0.00%) [ +0.19% +0.00% +0.06% / +0.00% +0.57% +0.63%] index_add_ perm_sorted : Elapsed 0.016 ms (1.584 ms / 100) 1.534 -> 1.534 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.52% +0.65%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.534 ms / 100) 8.523 -> 8.510 ( -0.15%) [ +0.06% +0.00% +0.11% / -0.15% +0.05% +0.15%] index_select const : Elapsed 0.085 ms (8.528 ms / 100) 8.525 -> 8.527 ( +0.02%) [ +0.12% +0.00% +0.05% / +0.06% +0.20% +0.02%] index_select wrap : Elapsed 0.085 ms (8.535 ms / 100) 8.529 -> 8.541 ( +0.14%) [ +0.00% +0.16% +0.06% / +0.14% +0.23% +0.33%] index_select linear : Elapsed 0.085 ms (8.529 ms / 100) 8.525 -> 8.518 ( -0.08%) [ +0.41% +0.00% +0.28% / -0.08% +0.21% +0.35%] index_select reverse : Elapsed 0.086 ms (8.560 ms / 100) 8.506 -> 8.525 ( +0.22%) [ +0.28% +0.00% +0.14% / +0.29% +0.22% +0.63%] index_select skip64 : Elapsed 0.085 ms (8.530 ms / 100) 8.516 -> 8.519 ( +0.04%) [ +0.00% +0.01% +0.07% / +0.09% +0.20% +0.04%] index_select skip256 : Elapsed 0.085 ms (8.516 ms / 100) 8.528 -> 8.532 ( +0.05%) [ +0.00% +0.05% +0.12% / +0.05% +0.08% +0.40%] index_select spread : Elapsed 0.085 ms (8.528 ms / 100) 8.529 -> 8.523 ( -0.07%) [ +0.08% +0.01% +0.00% / -0.07% +0.32% +0.34%] index_select strided 3 : Elapsed 0.085 ms (8.536 ms / 100) 8.527 -> 8.533 ( +0.07%) [ +0.00% +0.04% +0.14% / +0.07% +0.20% +0.39%] index_select random : Elapsed 0.085 ms (8.527 ms / 100) 8.529 -> 8.516 ( -0.15%) [ +0.23% +0.02% +0.00% / -0.15% +0.16% +0.29%] index_select random_sorted : Elapsed 0.085 ms (8.549 ms / 100) out_shape = [16, 5, 40, 4] in_shape = [16, 5, 20, 4] idx_dim = 2 B = [16, 5, 40, 4] (stride (800, 160, 4, 1)) A = [16, 5, 20, 4] (stride (1, 1280, 16, 320)) dim = 2 2.398 -> 2.414 ( +0.67%) [ +0.50% +0.00% +0.13% / +0.67% +0.96% +0.67%] index_add_ linear : Elapsed 0.024 ms (2.410 ms / 100) 2.394 -> 2.409 ( +0.63%) [ +0.00% +0.04% +0.04% / +0.63% +0.96% +1.04%] index_copy_ linear : Elapsed 0.024 ms (2.394 ms / 100) 2.402 -> 2.415 ( +0.54%) [ +0.08% +0.00% +0.12% / +0.54% +0.58% +0.62%] index_add_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.397 -> 2.412 ( +0.63%) [ +0.13% +0.00% +0.13% / +0.63% +0.96% +0.83%] index_copy_ reverse : Elapsed 0.024 ms (2.400 ms / 100) 2.423 -> 2.434 ( +0.45%) [ +0.04% +0.04% +0.00% / +0.74% +0.58% +0.45%] index_add_ spread : Elapsed 0.024 ms (2.424 ms / 100) 2.429 -> 2.441 ( +0.49%) [ +0.37% +0.00% +0.00% / +0.49% +0.74% +1.11%] index_copy_ spread : Elapsed 0.024 ms (2.438 ms / 100) 2.420 -> 2.426 ( +0.25%) [ +0.04% +0.00% +0.08% / +0.25% +0.41% +0.45%] index_add_ strided 3 : Elapsed 0.024 ms (2.421 ms / 100) 2.419 -> 2.434 ( +0.62%) [ +0.12% +0.00% +0.87% / +0.62% +0.99% +0.87%] index_copy_ strided 3 : Elapsed 0.024 ms (2.422 ms / 100) 2.419 -> 2.432 ( +0.54%) [ +0.33% +0.37% +0.00% / +0.54% +0.54% +0.74%] index_add_ strided 7 : Elapsed 0.024 ms (2.427 ms / 100) 2.427 -> 2.442 ( +0.62%) [ +0.21% +0.00% +0.08% / +0.62% +0.66% +0.70%] index_copy_ strided 7 : Elapsed 0.024 ms (2.432 ms / 100) 2.420 -> 2.432 ( +0.50%) [ +0.00% +0.08% +0.00% / +0.50% +0.70% +0.58%] index_add_ perm : Elapsed 0.024 ms (2.420 ms / 100) 2.422 -> 2.440 ( +0.74%) [ +0.17% +0.00% +0.17% / +0.74% +0.91% +0.99%] index_copy_ perm : Elapsed 0.024 ms (2.426 ms / 100) 2.420 -> 2.432 ( +0.50%) [ +0.00% +0.21% +0.04% / +0.54% +0.50% +0.66%] index_add_ perm_sorted : Elapsed 0.024 ms (2.420 ms / 100) 2.423 -> 2.437 ( +0.58%) [ +0.00% +0.08% +0.08% / +0.58% +0.83% +0.87%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.423 ms / 100) 4.413 -> 4.412 ( -0.02%) [ +0.14% +0.00% +0.07% / +0.07% +0.07% -0.02%] index_select const : Elapsed 0.044 ms (4.419 ms / 100) 4.420 -> 4.427 ( +0.16%) [ +0.16% +0.00% +0.16% / +0.16% +0.25% +0.25%] index_select wrap : Elapsed 0.044 ms (4.427 ms / 100) 4.422 -> 4.428 ( +0.14%) [ +0.09% +0.00% +0.07% / +0.14% +0.32% +0.27%] index_select linear : Elapsed 0.044 ms (4.426 ms / 100) 4.425 -> 4.432 ( +0.16%) [ +0.05% +0.00% +0.07% / +0.16% +0.16% +0.27%] index_select reverse : Elapsed 0.044 ms (4.427 ms / 100) 4.415 -> 4.418 ( +0.07%) [ +0.16% +0.00% +0.02% / +0.07% +0.18% +0.14%] index_select skip64 : Elapsed 0.044 ms (4.422 ms / 100) 4.417 -> 4.412 ( -0.11%) [ +0.02% +0.00% +0.09% / -0.11% +0.11% -0.05%] index_select skip256 : Elapsed 0.044 ms (4.418 ms / 100) 4.423 -> 4.422 ( -0.02%) [ +0.25% +0.00% +0.05% / -0.02% +0.23% +0.16%] index_select spread : Elapsed 0.044 ms (4.434 ms / 100) 4.424 -> 4.431 ( +0.16%) [ +0.05% +0.00% +0.11% / +0.16% +0.38% +0.25%] index_select strided 3 : Elapsed 0.044 ms (4.426 ms / 100) 4.417 -> 4.419 ( +0.05%) [ +0.00% +0.02% +0.02% / +0.05% +0.18% +0.16%] index_select strided 5 : Elapsed 0.044 ms (4.417 ms / 100) 4.418 -> 4.427 ( +0.20%) [ +0.16% +0.00% +0.11% / +0.20% +0.45% +0.34%] index_select strided 7 : Elapsed 0.044 ms (4.425 ms / 100) 4.416 -> 4.415 ( -0.02%) [ +0.05% +0.05% +0.00% / -0.02% +0.16% +0.00%] index_select strided 8 : Elapsed 0.044 ms (4.418 ms / 100) 4.413 -> 4.410 ( -0.07%) [ +0.00% +0.07% +0.20% / -0.07% +0.14% +0.11%] index_select strided 16 : Elapsed 0.044 ms (4.413 ms / 100) 4.427 -> 4.429 ( +0.05%) [ +0.00% +0.02% +0.00% / +0.07% +0.32% +0.05%] index_select random : Elapsed 0.044 ms (4.427 ms / 100) 4.425 -> 4.426 ( +0.02%) [ +0.16% +0.07% +0.00% / +0.02% +0.07% +0.23%] index_select random_sorted : Elapsed 0.044 ms (4.432 ms / 100) B = [16, 5, 40, 4] (stride (800, 4, 20, 1)) A = [16, 5, 20, 4] (stride (400, 80, 4, 1)) dim = 2 2.410 -> 2.418 ( +0.33%) [ +0.00% +0.04% +0.04% / +0.33% +0.46% +0.50%] index_add_ linear : Elapsed 0.024 ms (2.410 ms / 100) 2.403 -> 2.415 ( +0.50%) [ +0.00% +0.17% +0.12% / +0.50% +0.62% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.402 -> 2.416 ( +0.58%) [ +0.04% +0.00% +0.08% / +0.58% +0.87% +1.17%] index_add_ reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.395 -> 2.409 ( +0.58%) [ +0.17% +0.04% +0.00% / +0.58% +1.04% +1.25%] index_copy_ reverse : Elapsed 0.024 ms (2.399 ms / 100) 2.411 -> 2.422 ( +0.46%) [ +0.00% +0.04% +0.00% / +0.46% +0.83% +0.87%] index_add_ spread : Elapsed 0.024 ms (2.411 ms / 100) 2.408 -> 2.421 ( +0.54%) [ +0.00% +0.08% +0.00% / +0.54% +1.12% +0.96%] index_copy_ spread : Elapsed 0.024 ms (2.408 ms / 100) 2.414 -> 2.425 ( +0.46%) [ +0.00% +0.08% +0.25% / +0.46% +0.46% +0.54%] index_add_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.423 ( +0.54%) [ +0.08% +0.00% +0.21% / +0.54% +0.66% +0.66%] index_copy_ strided 3 : Elapsed 0.024 ms (2.412 ms / 100) 2.416 -> 2.426 ( +0.41%) [ +0.08% +0.00% +0.29% / +0.58% +0.46% +0.41%] index_add_ strided 7 : Elapsed 0.024 ms (2.418 ms / 100) 2.413 -> 2.422 ( +0.37%) [ +0.00% +0.08% +0.04% / +0.37% +0.54% +0.54%] index_copy_ strided 7 : Elapsed 0.024 ms (2.413 ms / 100) 2.415 -> 2.418 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.66% +0.12% +0.17%] index_add_ perm : Elapsed 0.024 ms (2.418 ms / 100) 2.415 -> 2.419 ( +0.17%) [ +0.00% +0.00% +0.04% / +0.33% +0.17% +0.25%] index_copy_ perm : Elapsed 0.024 ms (2.415 ms / 100) 2.414 -> 2.416 ( +0.08%) [ +0.21% +0.00% +0.33% / +0.54% +0.08% +0.17%] index_add_ perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) 2.412 -> 2.418 ( +0.25%) [ +0.00% +0.00% +0.04% / +0.66% +0.41% +0.25%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.412 ms / 100) 4.428 -> 4.429 ( +0.02%) [ +0.00% +0.18% +0.11% / +0.09% +0.02% +0.14%] index_select const : Elapsed 0.044 ms (4.428 ms / 100) 4.443 -> 4.438 ( -0.11%) [ +0.09% +0.00% +0.07% / -0.11% +0.07% +0.11%] index_select wrap : Elapsed 0.044 ms (4.447 ms / 100) 4.447 -> 4.441 ( -0.13%) [ +0.11% +0.00% +0.00% / -0.09% -0.13% -0.11%] index_select linear : Elapsed 0.045 ms (4.452 ms / 100) 4.438 -> 4.436 ( -0.05%) [ +0.16% +0.00% +0.29% / -0.05% +0.25% +0.14%] index_select reverse : Elapsed 0.044 ms (4.445 ms / 100) 4.436 -> 4.435 ( -0.02%) [ +0.11% +0.00% +0.07% / +0.16% +0.00% -0.02%] index_select skip64 : Elapsed 0.044 ms (4.441 ms / 100) 4.427 -> 4.431 ( +0.09%) [ +0.00% +0.18% +0.20% / +0.09% +0.25% +0.27%] index_select skip256 : Elapsed 0.044 ms (4.427 ms / 100) 4.440 -> 4.441 ( +0.02%) [ +0.07% +0.07% +0.00% / +0.02% +0.11% +0.14%] index_select spread : Elapsed 0.044 ms (4.443 ms / 100) 4.440 -> 4.439 ( -0.02%) [ +0.00% +0.07% +0.00% / -0.02% +0.14% +0.14%] index_select strided 3 : Elapsed 0.044 ms (4.440 ms / 100) 4.428 -> 4.437 ( +0.20%) [ +0.09% +0.16% +0.00% / +0.23% +0.36% +0.20%] index_select strided 5 : Elapsed 0.044 ms (4.432 ms / 100) 4.436 -> 4.442 ( +0.14%) [ +0.11% +0.00% +0.02% / +0.14% +0.23% +0.32%] index_select strided 7 : Elapsed 0.044 ms (4.441 ms / 100) 4.433 -> 4.437 ( +0.09%) [ +0.00% +0.05% +0.00% / +0.18% +0.09% +0.32%] index_select strided 8 : Elapsed 0.044 ms (4.433 ms / 100) 4.432 -> 4.437 ( +0.11%) [ +0.11% +0.20% +0.00% / +0.18% +0.11% +0.52%] index_select strided 16 : Elapsed 0.044 ms (4.437 ms / 100) 4.442 -> 4.443 ( +0.02%) [ +0.00% +0.09% +0.18% / +0.05% +0.02% +0.63%] index_select random : Elapsed 0.044 ms (4.442 ms / 100) 4.441 -> 4.443 ( +0.05%) [ +0.09% +0.09% +0.00% / +0.11% +0.09% +0.05%] index_select random_sorted : Elapsed 0.044 ms (4.445 ms / 100) B = [16, 5, 40, 4] (stride (160, 2560, 4, 1)) A = [16, 5, 20, 4] (stride (80, 1280, 1, 20)) dim = 2 2.449 -> 2.461 ( +0.49%) [ +0.24% +0.08% +0.00% / +0.49% +0.65% +0.82%] index_add_ linear : Elapsed 0.025 ms (2.455 ms / 100) 2.441 -> 2.452 ( +0.45%) [ +0.00% +0.16% +0.08% / +0.45% +0.74% +1.02%] index_copy_ linear : Elapsed 0.024 ms (2.441 ms / 100) 2.451 -> 2.463 ( +0.49%) [ +0.16% +0.20% +0.00% / +0.49% +0.65% +0.69%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.447 -> 2.455 ( +0.33%) [ +0.00% +0.04% +0.08% / +0.33% +0.61% +0.82%] index_copy_ reverse : Elapsed 0.024 ms (2.447 ms / 100) 2.472 -> 2.483 ( +0.44%) [ +0.00% +0.24% +0.00% / +0.53% +0.44% +0.44%] index_add_ spread : Elapsed 0.025 ms (2.472 ms / 100) 2.475 -> 2.487 ( +0.48%) [ +0.00% +0.12% +0.00% / +0.65% +0.48% +0.89%] index_copy_ spread : Elapsed 0.025 ms (2.475 ms / 100) 2.469 -> 2.480 ( +0.45%) [ +0.04% +0.16% +0.00% / +0.45% +0.61% +0.49%] index_add_ strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.463 -> 2.483 ( +0.81%) [ +0.53% +0.12% +0.00% / +0.81% +1.62% +0.81%] index_copy_ strided 3 : Elapsed 0.025 ms (2.476 ms / 100) 2.472 -> 2.485 ( +0.53%) [ +0.20% +0.04% +0.00% / +0.69% +0.53% +0.65%] index_add_ strided 7 : Elapsed 0.025 ms (2.477 ms / 100) 2.471 -> 2.483 ( +0.49%) [ +0.24% +0.00% +0.12% / +0.69% +0.49% +1.25%] index_copy_ strided 7 : Elapsed 0.025 ms (2.477 ms / 100) 2.466 -> 2.483 ( +0.69%) [ +0.00% +0.08% +0.08% / +0.81% +0.69% +1.01%] index_add_ perm : Elapsed 0.025 ms (2.466 ms / 100) 2.467 -> 2.479 ( +0.49%) [ +0.00% +0.04% +0.08% / +0.49% +0.57% +1.86%] index_copy_ perm : Elapsed 0.025 ms (2.467 ms / 100) 2.468 -> 2.479 ( +0.45%) [ +0.04% +0.16% +0.00% / +0.57% +0.45% +0.97%] index_add_ perm_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.469 -> 2.481 ( +0.49%) [ +0.04% +0.04% +0.00% / +0.49% +0.65% +1.94%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.470 ms / 100) 4.490 -> 4.491 ( +0.02%) [ +0.02% +0.00% +0.07% / +0.02% +0.16% +0.51%] index_select const : Elapsed 0.045 ms (4.491 ms / 100) 4.487 -> 4.499 ( +0.27%) [ +0.18% +0.00% +0.09% / +0.29% +0.27% +0.42%] index_select wrap : Elapsed 0.045 ms (4.495 ms / 100) 4.491 -> 4.500 ( +0.20%) [ +0.16% +0.00% +0.13% / +0.31% +0.20% +0.47%] index_select linear : Elapsed 0.045 ms (4.498 ms / 100) 4.496 -> 4.497 ( +0.02%) [ +0.00% +0.18% +0.00% / +0.02% +0.16% +0.53%] index_select reverse : Elapsed 0.045 ms (4.496 ms / 100) 4.490 -> 4.490 ( +0.00%) [ +0.00% +0.13% +0.09% / +0.00% +0.24% +0.27%] index_select skip64 : Elapsed 0.045 ms (4.490 ms / 100) 4.488 -> 4.492 ( +0.09%) [ +0.00% +0.25% +0.09% / +0.11% +0.09% +0.47%] index_select skip256 : Elapsed 0.045 ms (4.488 ms / 100) 4.493 -> 4.501 ( +0.18%) [ +0.00% +0.07% +0.07% / +0.18% +0.20% +0.56%] index_select spread : Elapsed 0.045 ms (4.493 ms / 100) 4.493 -> 4.502 ( +0.20%) [ +0.13% +0.00% +0.04% / +0.22% +0.24% +0.20%] index_select strided 3 : Elapsed 0.045 ms (4.499 ms / 100) 4.494 -> 4.502 ( +0.18%) [ +0.02% +0.00% +0.07% / +0.18% +0.18% +0.53%] index_select strided 5 : Elapsed 0.045 ms (4.495 ms / 100) 4.492 -> 4.502 ( +0.22%) [ +0.22% +0.00% +0.20% / +0.22% +0.27% +0.49%] index_select strided 7 : Elapsed 0.045 ms (4.502 ms / 100) 4.495 -> 4.493 ( -0.04%) [ +0.00% +0.07% +0.00% / -0.04% +0.18% +0.42%] index_select strided 8 : Elapsed 0.045 ms (4.495 ms / 100) 4.495 -> 4.492 ( -0.07%) [ +0.07% +0.13% +0.00% / -0.07% +0.07% +0.42%] index_select strided 16 : Elapsed 0.045 ms (4.498 ms / 100) 4.492 -> 4.495 ( +0.07%) [ +0.27% +0.00% +0.16% / +0.07% +0.27% +0.51%] index_select random : Elapsed 0.045 ms (4.504 ms / 100) 4.494 -> 4.497 ( +0.07%) [ +0.16% +0.00% +0.11% / +0.07% +0.22% +0.27%] index_select random_sorted : Elapsed 0.045 ms (4.501 ms / 100) B = [16, 5, 40, 4] (stride (4, 64, 320, 1)) A = [16, 5, 20, 4] (stride (400, 4, 20, 1)) dim = 2 2.452 -> 2.465 ( +0.53%) [ +0.16% +0.04% +0.00% / +0.53% +0.73% +0.90%] index_add_ linear : Elapsed 0.025 ms (2.456 ms / 100) 2.443 -> 2.455 ( +0.49%) [ +0.12% +0.08% +0.00% / +0.49% +0.86% +0.82%] index_copy_ linear : Elapsed 0.024 ms (2.446 ms / 100) 2.444 -> 2.461 ( +0.70%) [ +0.04% +0.04% +0.00% / +0.70% +1.43% +1.47%] index_add_ reverse : Elapsed 0.024 ms (2.445 ms / 100) 2.437 -> 2.461 ( +0.98%) [ +0.00% +0.41% +0.08% / +0.98% +1.19% +1.11%] index_copy_ reverse : Elapsed 0.024 ms (2.437 ms / 100) 2.452 -> 2.468 ( +0.65%) [ +0.16% +0.04% +0.00% / +0.65% +0.90% +0.90%] index_add_ spread : Elapsed 0.025 ms (2.456 ms / 100) 2.438 -> 2.456 ( +0.74%) [ +0.00% +0.16% +0.08% / +0.74% +1.07% +1.23%] index_copy_ spread : Elapsed 0.024 ms (2.438 ms / 100) 2.460 -> 2.467 ( +0.28%) [ +0.00% +0.00% +0.00% / +0.53% +0.28% +0.57%] index_add_ strided 3 : Elapsed 0.025 ms (2.460 ms / 100) 2.443 -> 2.457 ( +0.57%) [ +0.00% +0.20% +0.16% / +0.57% +0.74% +1.11%] index_copy_ strided 3 : Elapsed 0.024 ms (2.443 ms / 100) 2.461 -> 2.473 ( +0.49%) [ +0.24% +0.04% +0.00% / +0.49% +0.57% +0.53%] index_add_ strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.447 -> 2.460 ( +0.53%) [ +0.04% +0.25% +0.00% / +0.53% +0.53% +0.78%] index_copy_ strided 7 : Elapsed 0.024 ms (2.448 ms / 100) 2.460 -> 2.468 ( +0.33%) [ +0.12% +0.00% +0.00% / +0.45% +0.33% +0.33%] index_add_ perm : Elapsed 0.025 ms (2.463 ms / 100) 2.447 -> 2.459 ( +0.49%) [ +0.00% +0.00% +0.08% / +0.94% +0.49% +0.57%] index_copy_ perm : Elapsed 0.024 ms (2.447 ms / 100) 2.459 -> 2.469 ( +0.41%) [ +0.08% +0.12% +0.00% / +0.57% +0.41% +0.45%] index_add_ perm_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.444 -> 2.459 ( +0.61%) [ +0.00% +0.12% +0.29% / +0.82% +0.61% +1.68%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.444 ms / 100) 4.491 -> 4.495 ( +0.09%) [ +0.04% +0.00% +0.18% / +0.09% +0.16% +0.31%] index_select const : Elapsed 0.045 ms (4.493 ms / 100) 4.499 -> 4.504 ( +0.11%) [ +0.18% +0.00% +0.11% / +0.16% +0.11% +0.13%] index_select wrap : Elapsed 0.045 ms (4.507 ms / 100) 4.506 -> 4.504 ( -0.04%) [ +0.07% +0.00% +0.13% / -0.02% -0.04% +0.20%] index_select linear : Elapsed 0.045 ms (4.509 ms / 100) 4.502 -> 4.505 ( +0.07%) [ +0.18% +0.00% +0.07% / +0.18% +0.07% +0.76%] index_select reverse : Elapsed 0.045 ms (4.510 ms / 100) 4.491 -> 4.491 ( +0.00%) [ +0.29% +0.00% +0.07% / +0.13% +0.00% +0.07%] index_select skip64 : Elapsed 0.045 ms (4.504 ms / 100) 4.493 -> 4.496 ( +0.07%) [ +0.00% +0.00% +0.16% / +0.07% +0.11% +0.16%] index_select skip256 : Elapsed 0.045 ms (4.493 ms / 100) 4.503 -> 4.501 ( -0.04%) [ +0.04% +0.02% +0.00% / -0.04% +0.04% +0.69%] index_select spread : Elapsed 0.045 ms (4.505 ms / 100) 4.499 -> 4.509 ( +0.22%) [ +0.20% +0.00% +0.13% / +0.24% +0.22% +0.24%] index_select strided 3 : Elapsed 0.045 ms (4.508 ms / 100) 4.493 -> 4.497 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.11% +0.24% +0.09%] index_select strided 5 : Elapsed 0.045 ms (4.497 ms / 100) 4.500 -> 4.504 ( +0.09%) [ +0.24% +0.00% +0.16% / +0.09% +0.20% +0.36%] index_select strided 7 : Elapsed 0.045 ms (4.511 ms / 100) 4.498 -> 4.501 ( +0.07%) [ +0.00% +0.09% +0.04% / +0.07% +0.07% +0.16%] index_select strided 8 : Elapsed 0.045 ms (4.498 ms / 100) 4.495 -> 4.497 ( +0.04%) [ +0.02% +0.00% +0.04% / +0.04% +0.07% +0.22%] index_select strided 16 : Elapsed 0.045 ms (4.496 ms / 100) 4.507 -> 4.502 ( -0.11%) [ +0.02% +0.02% +0.00% / +0.00% -0.11% +0.11%] index_select random : Elapsed 0.045 ms (4.508 ms / 100) 4.502 -> 4.505 ( +0.07%) [ +0.13% +0.00% +0.09% / +0.20% +0.07% +0.13%] index_select random_sorted : Elapsed 0.045 ms (4.508 ms / 100) B = [16, 5, 40, 4] (stride (1, 64, 320, 16)) A = [16, 5, 20, 4] (stride (20, 1280, 1, 320)) dim = 2 2.442 -> 2.455 ( +0.53%) [ +0.20% +0.08% +0.00% / +0.53% +0.82% +0.94%] index_add_ linear : Elapsed 0.024 ms (2.447 ms / 100) 2.445 -> 2.454 ( +0.37%) [ +0.04% +0.04% +0.00% / +0.37% +0.74% +0.78%] index_copy_ linear : Elapsed 0.024 ms (2.446 ms / 100) 2.445 -> 2.460 ( +0.61%) [ +0.12% +0.00% +0.12% / +0.70% +0.61% +0.78%] index_add_ reverse : Elapsed 0.024 ms (2.448 ms / 100) 2.448 -> 2.458 ( +0.41%) [ +0.16% +0.00% +0.00% / +0.41% +0.57% +0.86%] index_copy_ reverse : Elapsed 0.025 ms (2.452 ms / 100) 2.450 -> 2.455 ( +0.20%) [ +0.04% +0.00% +0.20% / +0.20% +0.61% +0.45%] index_add_ spread : Elapsed 0.025 ms (2.451 ms / 100) 2.445 -> 2.460 ( +0.61%) [ +0.29% +0.16% +0.00% / +0.61% +0.90% +0.65%] index_copy_ spread : Elapsed 0.025 ms (2.452 ms / 100) 2.444 -> 2.460 ( +0.65%) [ +0.08% +0.00% +0.20% / +0.98% +0.70% +0.65%] index_add_ strided 3 : Elapsed 0.024 ms (2.446 ms / 100) 2.445 -> 2.460 ( +0.61%) [ +0.00% +0.12% +0.08% / +0.94% +0.65% +0.61%] index_copy_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.450 -> 2.460 ( +0.41%) [ +0.00% +0.04% +0.08% / +0.41% +0.49% +0.49%] index_add_ strided 7 : Elapsed 0.024 ms (2.450 ms / 100) 2.447 -> 2.456 ( +0.37%) [ +0.16% +0.08% +0.00% / +0.37% +0.49% +0.65%] index_copy_ strided 7 : Elapsed 0.025 ms (2.451 ms / 100) 2.448 -> 2.459 ( +0.45%) [ +0.00% +0.00% +0.00% / +0.45% +0.69% +0.53%] index_add_ perm : Elapsed 0.024 ms (2.448 ms / 100) 2.449 -> 2.458 ( +0.37%) [ +0.04% +0.04% +0.00% / +0.37% +0.57% +0.69%] index_copy_ perm : Elapsed 0.024 ms (2.450 ms / 100) 2.442 -> 2.465 ( +0.94%) [ +0.00% +0.25% +0.29% / +0.94% +0.98% +1.02%] index_add_ perm_sorted : Elapsed 0.024 ms (2.442 ms / 100) 2.446 -> 2.465 ( +0.78%) [ +0.04% +0.08% +0.00% / +1.19% +0.90% +0.78%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.447 ms / 100) 4.497 -> 4.499 ( +0.04%) [ +0.02% +0.00% +0.02% / +0.07% +0.16% +0.04%] index_select const : Elapsed 0.045 ms (4.498 ms / 100) 4.502 -> 4.503 ( +0.02%) [ +0.00% +0.11% +0.00% / +0.02% +0.29% +0.09%] index_select wrap : Elapsed 0.045 ms (4.502 ms / 100) 4.500 -> 4.508 ( +0.18%) [ +0.13% +0.09% +0.00% / +0.18% +0.24% +0.24%] index_select linear : Elapsed 0.045 ms (4.506 ms / 100) 4.502 -> 4.506 ( +0.09%) [ +0.13% +0.02% +0.00% / +0.09% +0.24% +0.22%] index_select reverse : Elapsed 0.045 ms (4.508 ms / 100) 4.498 -> 4.504 ( +0.13%) [ +0.13% +0.00% +0.07% / +0.13% +0.18% +0.13%] index_select skip64 : Elapsed 0.045 ms (4.504 ms / 100) 4.491 -> 4.496 ( +0.11%) [ +0.07% +0.00% +0.16% / +0.11% +0.31% +0.42%] index_select skip256 : Elapsed 0.045 ms (4.494 ms / 100) 4.495 -> 4.501 ( +0.13%) [ +0.00% +0.22% +0.18% / +0.13% +0.33% +0.36%] index_select spread : Elapsed 0.045 ms (4.495 ms / 100) 4.502 -> 4.501 ( -0.02%) [ +0.00% +0.04% +0.00% / -0.02% +0.16% +0.11%] index_select strided 3 : Elapsed 0.045 ms (4.502 ms / 100) 4.502 -> 4.505 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.11% +0.16%] index_select strided 5 : Elapsed 0.045 ms (4.502 ms / 100) 4.498 -> 4.498 ( +0.00%) [ +0.18% +0.00% +0.09% / +0.00% +0.24% +0.38%] index_select strided 7 : Elapsed 0.045 ms (4.506 ms / 100) 4.501 -> 4.501 ( +0.00%) [ +0.09% +0.04% +0.00% / +0.00% +0.22% +0.20%] index_select strided 8 : Elapsed 0.045 ms (4.505 ms / 100) 4.503 -> 4.497 ( -0.13%) [ +0.24% +0.00% +0.09% / -0.13% +0.09% +0.16%] index_select strided 16 : Elapsed 0.045 ms (4.514 ms / 100) 4.498 -> 4.505 ( +0.16%) [ +0.09% +0.18% +0.00% / +0.16% +0.18% +0.18%] index_select random : Elapsed 0.045 ms (4.502 ms / 100) 4.500 -> 4.506 ( +0.13%) [ +0.00% +0.00% +0.18% / +0.13% +0.20% +0.29%] index_select random_sorted : Elapsed 0.045 ms (4.500 ms / 100) B = [16, 5, 40, 4] (stride (1, 640, 16, 3200)) A = [16, 5, 20, 4] (stride (20, 1, 320, 5)) dim = 2 2.458 -> 2.469 ( +0.45%) [ +0.08% +0.08% +0.00% / +0.45% +0.73% +0.81%] index_add_ linear : Elapsed 0.025 ms (2.460 ms / 100) 2.445 -> 2.459 ( +0.57%) [ +0.08% +0.00% +0.08% / +0.57% +0.98% +0.74%] index_copy_ linear : Elapsed 0.024 ms (2.447 ms / 100) 2.450 -> 2.469 ( +0.78%) [ +0.20% +0.12% +0.00% / +0.78% +1.18% +0.98%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.437 -> 2.452 ( +0.62%) [ +0.04% +0.04% +0.00% / +0.62% +1.07% +1.19%] index_copy_ reverse : Elapsed 0.024 ms (2.438 ms / 100) 2.453 -> 2.470 ( +0.69%) [ +0.16% +0.00% +0.04% / +0.69% +0.86% +1.22%] index_add_ spread : Elapsed 0.025 ms (2.457 ms / 100) 2.439 -> 2.455 ( +0.66%) [ +0.08% +0.21% +0.00% / +0.66% +1.23% +1.89%] index_copy_ spread : Elapsed 0.024 ms (2.441 ms / 100) 2.461 -> 2.473 ( +0.49%) [ +0.00% +0.04% +0.12% / +0.49% +0.61% +0.81%] index_add_ strided 3 : Elapsed 0.025 ms (2.461 ms / 100) 2.450 -> 2.464 ( +0.57%) [ +0.08% +0.00% +0.04% / +0.57% +0.69% +0.82%] index_copy_ strided 3 : Elapsed 0.025 ms (2.452 ms / 100) 2.460 -> 2.474 ( +0.57%) [ +0.20% +0.00% +0.20% / +0.57% +0.65% +0.57%] index_add_ strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.448 -> 2.459 ( +0.45%) [ +0.08% +0.00% +0.04% / +0.45% +0.78% +0.90%] index_copy_ strided 7 : Elapsed 0.025 ms (2.450 ms / 100) 2.462 -> 2.468 ( +0.24%) [ +0.20% +0.20% +0.00% / +0.41% +0.24% +0.28%] index_add_ perm : Elapsed 0.025 ms (2.467 ms / 100) 2.449 -> 2.458 ( +0.37%) [ +0.08% +0.00% +0.08% / +0.65% +0.37% +0.49%] index_copy_ perm : Elapsed 0.025 ms (2.451 ms / 100) 2.464 -> 2.468 ( +0.16%) [ +0.00% +0.20% +0.04% / +0.49% +0.16% +0.49%] index_add_ perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) 2.450 -> 2.459 ( +0.37%) [ +0.12% +0.16% +0.00% / +0.53% +0.37% +0.61%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.453 ms / 100) 4.493 -> 4.494 ( +0.02%) [ +0.00% +0.04% +0.31% / +0.02% +0.11% +0.20%] index_select const : Elapsed 0.045 ms (4.493 ms / 100) 4.507 -> 4.507 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.02% +0.04% +0.00%] index_select wrap : Elapsed 0.045 ms (4.507 ms / 100) 4.507 -> 4.504 ( -0.07%) [ +0.02% +0.13% +0.00% / -0.04% +0.07% -0.07%] index_select linear : Elapsed 0.045 ms (4.508 ms / 100) 4.507 -> 4.503 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.04% +0.13%] index_select reverse : Elapsed 0.045 ms (4.511 ms / 100) 4.502 -> 4.491 ( -0.24%) [ +0.04% +0.00% +0.04% / -0.07% -0.24% -0.02%] index_select skip64 : Elapsed 0.045 ms (4.504 ms / 100) 4.493 -> 4.497 ( +0.09%) [ +0.16% +0.16% +0.00% / +0.22% +0.09% +0.42%] index_select skip256 : Elapsed 0.045 ms (4.500 ms / 100) 4.502 -> 4.509 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.29% +0.18%] index_select spread : Elapsed 0.045 ms (4.509 ms / 100) 4.507 -> 4.499 ( -0.18%) [ +0.00% +0.04% +0.11% / -0.18% +0.16% +0.04%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.495 -> 4.496 ( +0.02%) [ +0.07% +0.00% +0.07% / +0.02% +0.29% +0.24%] index_select strided 5 : Elapsed 0.045 ms (4.498 ms / 100) 4.503 -> 4.511 ( +0.18%) [ +0.02% +0.00% +0.13% / +0.18% +0.20% +0.24%] index_select strided 7 : Elapsed 0.045 ms (4.504 ms / 100) 4.499 -> 4.498 ( -0.02%) [ +0.00% +0.13% +0.13% / +0.13% -0.02% +0.00%] index_select strided 8 : Elapsed 0.045 ms (4.499 ms / 100) 4.503 -> 4.495 ( -0.18%) [ +0.07% +0.18% +0.00% / +0.07% +0.02% -0.18%] index_select strided 16 : Elapsed 0.045 ms (4.506 ms / 100) 4.511 -> 4.504 ( -0.16%) [ +0.00% +0.11% +0.11% / +0.00% -0.09% -0.16%] index_select random : Elapsed 0.045 ms (4.511 ms / 100) 4.507 -> 4.504 ( -0.07%) [ +0.00% +0.13% +0.09% / -0.02% -0.07% +0.04%] index_select random_sorted : Elapsed 0.045 ms (4.507 ms / 100) B = [16, 5, 40, 4] (stride (1, 640, 16, 3200)) A = [16, 5, 20, 4] (stride (4, 64, 320, 1)) dim = 2 2.460 -> 2.472 ( +0.49%) [ +0.00% +0.04% +0.08% / +0.49% +0.65% +0.65%] index_add_ linear : Elapsed 0.025 ms (2.460 ms / 100) 2.445 -> 2.461 ( +0.65%) [ +0.20% +0.00% +0.04% / +0.65% +0.98% +0.86%] index_copy_ linear : Elapsed 0.025 ms (2.450 ms / 100) 2.462 -> 2.474 ( +0.49%) [ +0.04% +0.00% +0.08% / +0.49% +0.61% +0.57%] index_add_ reverse : Elapsed 0.025 ms (2.463 ms / 100) 2.452 -> 2.462 ( +0.41%) [ +0.00% +0.08% +0.00% / +0.41% +0.49% +0.82%] index_copy_ reverse : Elapsed 0.025 ms (2.452 ms / 100) 2.463 -> 2.473 ( +0.41%) [ +0.08% +0.04% +0.00% / +0.41% +0.45% +0.41%] index_add_ spread : Elapsed 0.025 ms (2.465 ms / 100) 2.453 -> 2.467 ( +0.57%) [ +0.12% +0.00% +0.16% / +0.57% +0.61% +0.65%] index_copy_ spread : Elapsed 0.025 ms (2.456 ms / 100) 2.464 -> 2.470 ( +0.24%) [ +0.00% +0.20% +0.00% / +0.28% +0.24% +0.41%] index_add_ strided 3 : Elapsed 0.025 ms (2.464 ms / 100) 2.451 -> 2.466 ( +0.61%) [ +0.00% +0.16% +0.08% / +0.61% +0.69% +0.73%] index_copy_ strided 3 : Elapsed 0.025 ms (2.451 ms / 100) 2.468 -> 2.474 ( +0.24%) [ +0.00% +0.00% +0.00% / +0.24% +0.24% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.468 ms / 100) 2.452 -> 2.468 ( +0.65%) [ +0.00% +0.16% +0.12% / +0.65% +0.65% +1.14%] index_copy_ strided 7 : Elapsed 0.025 ms (2.452 ms / 100) 2.465 -> 2.473 ( +0.32%) [ +0.04% +0.12% +0.00% / +0.32% +0.77% +0.49%] index_add_ perm : Elapsed 0.025 ms (2.466 ms / 100) 2.450 -> 2.462 ( +0.49%) [ +0.16% +0.12% +0.00% / +0.49% +0.78% +0.78%] index_copy_ perm : Elapsed 0.025 ms (2.454 ms / 100) 2.464 -> 2.473 ( +0.37%) [ +0.00% +0.12% +0.00% / +0.37% +0.49% +0.65%] index_add_ perm_sorted : Elapsed 0.025 ms (2.464 ms / 100) 2.453 -> 2.463 ( +0.41%) [ +0.00% +0.04% +0.04% / +0.41% +0.73% +0.69%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.453 ms / 100) 4.498 -> 4.498 ( +0.00%) [ +0.00% +0.04% +0.16% / +0.00% +0.02% +0.07%] index_select const : Elapsed 0.045 ms (4.498 ms / 100) 4.500 -> 4.501 ( +0.02%) [ +0.16% +0.00% +0.18% / +0.02% +0.31% +0.13%] index_select wrap : Elapsed 0.045 ms (4.507 ms / 100) 4.503 -> 4.502 ( -0.02%) [ +0.04% +0.09% +0.00% / -0.02% +0.09% +0.33%] index_select linear : Elapsed 0.045 ms (4.505 ms / 100) 4.502 -> 4.506 ( +0.09%) [ +0.13% +0.00% +0.04% / +0.09% +0.27% +0.24%] index_select reverse : Elapsed 0.045 ms (4.508 ms / 100) 4.495 -> 4.495 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.02% +0.07%] index_select skip64 : Elapsed 0.045 ms (4.497 ms / 100) 4.491 -> 4.500 ( +0.20%) [ +0.11% +0.29% +0.00% / +0.20% +0.22% +0.29%] index_select skip256 : Elapsed 0.045 ms (4.496 ms / 100) 4.506 -> 4.509 ( +0.07%) [ +0.02% +0.00% +0.00% / +0.09% +0.07% +0.60%] index_select spread : Elapsed 0.045 ms (4.507 ms / 100) 4.502 -> 4.502 ( +0.00%) [ +0.11% +0.07% +0.00% / +0.00% +0.27% +0.16%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.501 -> 4.501 ( +0.00%) [ +0.04% +0.00% +0.02% / +0.16% +0.00% +0.20%] index_select strided 5 : Elapsed 0.045 ms (4.503 ms / 100) 4.500 -> 4.508 ( +0.18%) [ +0.18% +0.00% +0.02% / +0.27% +0.44% +0.18%] index_select strided 7 : Elapsed 0.045 ms (4.508 ms / 100) 4.497 -> 4.497 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.00% +0.20% +0.16%] index_select strided 8 : Elapsed 0.045 ms (4.503 ms / 100) 4.498 -> 4.493 ( -0.11%) [ +0.07% +0.16% +0.00% / -0.11% +0.13% +0.27%] index_select strided 16 : Elapsed 0.045 ms (4.501 ms / 100) 4.495 -> 4.508 ( +0.29%) [ +0.22% +0.00% +0.20% / +0.29% +0.56% +0.47%] index_select random : Elapsed 0.045 ms (4.505 ms / 100) 4.501 -> 4.508 ( +0.16%) [ +0.00% +0.16% +0.11% / +0.16% +0.20% +0.27%] index_select random_sorted : Elapsed 0.045 ms (4.501 ms / 100) B = [16, 5, 40, 4] (stride (5, 1, 80, 3200)) A = [16, 5, 20, 4] (stride (1, 64, 320, 16)) dim = 2 2.400 -> 2.413 ( +0.54%) [ +0.33% +0.13% +0.00% / +0.67% +0.54% +0.79%] index_add_ linear : Elapsed 0.024 ms (2.408 ms / 100) 2.391 -> 2.404 ( +0.54%) [ +0.25% +0.13% +0.00% / +0.59% +0.54% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.397 ms / 100) 2.394 -> 2.407 ( +0.54%) [ +0.00% +0.21% +0.17% / +0.54% +0.75% +1.04%] index_add_ reverse : Elapsed 0.024 ms (2.394 ms / 100) 2.383 -> 2.405 ( +0.92%) [ +0.13% +0.29% +0.00% / +0.92% +1.17% +1.72%] index_copy_ reverse : Elapsed 0.024 ms (2.386 ms / 100) 2.394 -> 2.412 ( +0.75%) [ +0.04% +0.13% +0.00% / +0.75% +0.92% +1.09%] index_add_ spread : Elapsed 0.024 ms (2.395 ms / 100) 2.386 -> 2.398 ( +0.50%) [ +0.00% +0.04% +0.00% / +0.50% +0.96% +1.17%] index_copy_ spread : Elapsed 0.024 ms (2.386 ms / 100) 2.400 -> 2.409 ( +0.37%) [ +0.13% +0.00% +0.04% / +0.63% +0.37% +0.58%] index_add_ strided 3 : Elapsed 0.024 ms (2.403 ms / 100) 2.394 -> 2.406 ( +0.50%) [ +0.00% +0.00% +0.04% / +0.50% +0.58% +0.63%] index_copy_ strided 3 : Elapsed 0.024 ms (2.394 ms / 100) 2.398 -> 2.410 ( +0.50%) [ +0.00% +0.13% +0.25% / +0.58% +0.71% +0.50%] index_add_ strided 7 : Elapsed 0.024 ms (2.398 ms / 100) 2.391 -> 2.401 ( +0.42%) [ +0.00% +0.00% +0.04% / +0.42% +0.54% +0.67%] index_copy_ strided 7 : Elapsed 0.024 ms (2.391 ms / 100) 2.399 -> 2.406 ( +0.29%) [ +0.00% +0.17% +0.08% / +0.58% +0.29% +0.29%] index_add_ perm : Elapsed 0.024 ms (2.399 ms / 100) 2.388 -> 2.399 ( +0.46%) [ +0.21% +0.00% +0.13% / +0.67% +0.46% +0.80%] index_copy_ perm : Elapsed 0.024 ms (2.393 ms / 100) 2.400 -> 2.407 ( +0.29%) [ +0.08% +0.00% +0.04% / +0.54% +0.29% +0.50%] index_add_ perm_sorted : Elapsed 0.024 ms (2.402 ms / 100) 2.393 -> 2.399 ( +0.25%) [ +0.04% +0.00% +0.08% / +0.59% +0.25% +0.84%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.394 ms / 100) 4.419 -> 4.421 ( +0.05%) [ +0.11% +0.07% +0.00% / +0.05% +0.29% +0.32%] index_select const : Elapsed 0.044 ms (4.424 ms / 100) 4.428 -> 4.425 ( -0.07%) [ +0.05% +0.09% +0.00% / -0.05% +0.00% -0.07%] index_select wrap : Elapsed 0.044 ms (4.430 ms / 100) 4.426 -> 4.422 ( -0.09%) [ +0.00% +0.11% +0.09% / +0.07% -0.09% +0.18%] index_select linear : Elapsed 0.044 ms (4.426 ms / 100) 4.423 -> 4.424 ( +0.02%) [ +0.18% +0.07% +0.00% / +0.14% +0.09% +0.02%] index_select reverse : Elapsed 0.044 ms (4.431 ms / 100) 4.416 -> 4.411 ( -0.11%) [ +0.00% +0.16% +0.23% / +0.07% -0.11% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.416 ms / 100) 4.414 -> 4.415 ( +0.02%) [ +0.09% +0.11% +0.00% / +0.02% +0.16% +0.27%] index_select skip256 : Elapsed 0.044 ms (4.418 ms / 100) 4.419 -> 4.421 ( +0.05%) [ +0.00% +0.23% +0.20% / +0.05% +0.27% +0.29%] index_select spread : Elapsed 0.044 ms (4.419 ms / 100) 4.424 -> 4.429 ( +0.11%) [ +0.07% +0.05% +0.00% / +0.16% +0.16% +0.11%] index_select strided 3 : Elapsed 0.044 ms (4.427 ms / 100) 4.420 -> 4.411 ( -0.20%) [ +0.02% +0.00% +0.00% / -0.20% +0.09% +0.07%] index_select strided 5 : Elapsed 0.044 ms (4.421 ms / 100) 4.419 -> 4.422 ( +0.07%) [ +0.00% +0.23% +0.18% / +0.07% +0.27% +0.20%] index_select strided 7 : Elapsed 0.044 ms (4.419 ms / 100) 4.425 -> 4.417 ( -0.18%) [ +0.00% +0.02% +0.00% / +0.07% -0.05% -0.18%] index_select strided 8 : Elapsed 0.044 ms (4.425 ms / 100) 4.418 -> 4.416 ( -0.05%) [ +0.00% +0.14% +0.20% / +0.07% +0.00% -0.05%] index_select strided 16 : Elapsed 0.044 ms (4.418 ms / 100) 4.424 -> 4.427 ( +0.07%) [ +0.07% +0.05% +0.00% / +0.29% +0.07% +0.25%] index_select random : Elapsed 0.044 ms (4.427 ms / 100) 4.423 -> 4.422 ( -0.02%) [ +0.00% +0.09% +0.00% / +0.02% -0.02% +0.16%] index_select random_sorted : Elapsed 0.044 ms (4.423 ms / 100) out_shape = [16, 5, 20, 40] in_shape = [16, 5, 20, 4] idx_dim = 3 B = [16, 5, 20, 40] (stride (4000, 20, 1, 100)) A = [16, 5, 20, 4] (stride (20, 320, 1, 1600)) dim = 3 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.41% +0.49%] index_add_ linear : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.51% +0.51%] index_copy_ linear : Elapsed 0.012 ms (1.190 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.41% +0.49%] index_add_ reverse : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.67% +0.59%] index_copy_ reverse : Elapsed 0.012 ms (1.188 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.49% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.59% +0.59%] index_copy_ spread : Elapsed 0.012 ms (1.188 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.73% +0.65%] index_add_ strided 3 : Elapsed 0.012 ms (1.227 ms / 100) 1.187 -> 1.187 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_copy_ strided 3 : Elapsed 0.012 ms (1.187 ms / 100) 1.225 -> 1.227 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.73% +1.14%] index_add_ strided 7 : Elapsed 0.012 ms (1.226 ms / 100) 1.187 -> 1.188 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.76% +1.85%] index_copy_ strided 7 : Elapsed 0.012 ms (1.188 ms / 100) 1.226 -> 1.229 ( +0.24%) [ +0.00% +0.08% +0.00% / +0.24% +0.65% +0.73%] index_add_ perm : Elapsed 0.012 ms (1.226 ms / 100) 1.187 -> 1.189 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.76% +0.76%] index_copy_ perm : Elapsed 0.012 ms (1.188 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.57% +0.57%] index_add_ perm_sorted : Elapsed 0.012 ms (1.227 ms / 100) 1.186 -> 1.188 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.17% +0.76% +0.93%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.188 ms / 100) 8.690 -> 8.699 ( +0.10%) [ +0.02% +0.00% +0.26% / +0.20% +0.10% +0.29%] index_select const : Elapsed 0.087 ms (8.692 ms / 100) 8.727 -> 8.735 ( +0.09%) [ +0.00% +0.14% +0.00% / +0.26% +0.09% +0.14%] index_select wrap : Elapsed 0.087 ms (8.727 ms / 100) 8.727 -> 8.717 ( -0.11%) [ +0.09% +0.08% +0.00% / -0.11% +0.08% -0.07%] index_select linear : Elapsed 0.087 ms (8.735 ms / 100) 8.734 -> 8.729 ( -0.06%) [ +0.00% +0.10% +0.06% / +0.19% -0.06% +0.25%] index_select reverse : Elapsed 0.087 ms (8.734 ms / 100) 8.701 -> 8.694 ( -0.08%) [ +0.09% +0.15% +0.00% / -0.08% +0.20% +0.00%] index_select skip64 : Elapsed 0.087 ms (8.709 ms / 100) 8.693 -> 8.700 ( +0.08%) [ +0.00% +0.12% +0.00% / +0.15% +0.21% +0.08%] index_select skip256 : Elapsed 0.087 ms (8.693 ms / 100) 8.733 -> 8.724 ( -0.10%) [ +0.08% +0.00% +0.21% / +0.02% -0.10% -0.01%] index_select spread : Elapsed 0.087 ms (8.740 ms / 100) 8.723 -> 8.725 ( +0.02%) [ +0.21% +0.45% +0.00% / +0.02% +0.07% +0.30%] index_select strided 3 : Elapsed 0.087 ms (8.741 ms / 100) 8.724 -> 8.728 ( +0.05%) [ +0.06% +0.14% +0.00% / +0.11% +0.19% +0.05%] index_select random : Elapsed 0.087 ms (8.729 ms / 100) 8.727 -> 8.733 ( +0.07%) [ +0.00% +0.18% +0.16% / +0.14% +0.11% +0.07%] index_select random_sorted : Elapsed 0.087 ms (8.727 ms / 100) B = [16, 5, 20, 40] (stride (800, 12800, 40, 1)) A = [16, 5, 20, 4] (stride (400, 80, 4, 1)) dim = 3 1.151 -> 1.153 ( +0.17%) [ +0.09% +0.17% +0.00% / +0.17% +0.61% +0.61%] index_add_ linear : Elapsed 0.012 ms (1.152 ms / 100) 1.117 -> 1.116 ( -0.09%) [ +0.00% +0.18% +0.09% / -0.09% +0.45% +0.36%] index_copy_ linear : Elapsed 0.011 ms (1.117 ms / 100) 1.151 -> 1.153 ( +0.17%) [ +0.17% +0.26% +0.00% / +0.17% +0.52% +0.52%] index_add_ reverse : Elapsed 0.012 ms (1.153 ms / 100) 1.117 -> 1.118 ( +0.09%) [ +0.00% +0.18% +0.00% / +0.09% +0.45% +0.36%] index_copy_ reverse : Elapsed 0.011 ms (1.117 ms / 100) 1.165 -> 1.163 ( -0.17%) [ +0.00% +0.17% +0.26% / +0.09% -0.17% +0.00%] index_add_ spread : Elapsed 0.012 ms (1.165 ms / 100) 1.129 -> 1.128 ( -0.09%) [ +0.35% +0.18% +0.00% / -0.09% +0.00% +0.00%] index_copy_ spread : Elapsed 0.011 ms (1.133 ms / 100) 1.155 -> 1.154 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.09% +0.52% +0.61%] index_add_ strided 3 : Elapsed 0.012 ms (1.156 ms / 100) 1.120 -> 1.121 ( +0.09%) [ +0.18% +0.18% +0.00% / +0.09% +0.62% +0.45%] index_copy_ strided 3 : Elapsed 0.011 ms (1.122 ms / 100) 1.159 -> 1.159 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.26% +0.35%] index_add_ strided 7 : Elapsed 0.012 ms (1.159 ms / 100) 1.123 -> 1.121 ( -0.18%) [ +0.09% +0.27% +0.00% / -0.18% +0.27% +0.18%] index_copy_ strided 7 : Elapsed 0.011 ms (1.124 ms / 100) 1.158 -> 1.161 ( +0.26%) [ +0.17% +0.35% +0.00% / +0.35% +0.26% +0.35%] index_add_ perm : Elapsed 0.012 ms (1.160 ms / 100) 1.125 -> 1.125 ( +0.00%) [ +0.18% +0.09% +0.00% / +0.00% +0.00% +0.09%] index_copy_ perm : Elapsed 0.011 ms (1.127 ms / 100) 1.160 -> 1.160 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.00% +0.26%] index_add_ perm_sorted : Elapsed 0.012 ms (1.160 ms / 100) 1.125 -> 1.124 ( -0.09%) [ +0.00% +0.00% +0.00% / +0.18% -0.09% +0.09%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.125 ms / 100) 8.370 -> 8.374 ( +0.05%) [ +0.23% +0.11% +0.00% / +0.31% +0.36% +0.05%] index_select const : Elapsed 0.084 ms (8.389 ms / 100) 8.363 -> 8.373 ( +0.12%) [ +0.16% +0.37% +0.00% / +0.12% +0.50% +0.50%] index_select wrap : Elapsed 0.084 ms (8.376 ms / 100) 8.366 -> 8.373 ( +0.08%) [ +0.00% +0.19% +0.30% / +0.08% +0.16% +0.13%] index_select linear : Elapsed 0.084 ms (8.366 ms / 100) 8.374 -> 8.366 ( -0.10%) [ +0.05% +0.00% +0.02% / -0.10% +0.14% +0.14%] index_select reverse : Elapsed 0.084 ms (8.378 ms / 100) 8.366 -> 8.373 ( +0.08%) [ +0.16% +0.11% +0.00% / +0.08% +0.31% +0.33%] index_select skip64 : Elapsed 0.084 ms (8.379 ms / 100) 8.369 -> 8.376 ( +0.08%) [ +0.23% +0.00% +0.11% / +0.08% +0.19% +0.08%] index_select skip256 : Elapsed 0.084 ms (8.388 ms / 100) 8.370 -> 8.379 ( +0.11%) [ +0.16% +0.00% +0.12% / +0.11% +0.14% +0.20%] index_select spread : Elapsed 0.084 ms (8.383 ms / 100) 8.368 -> 8.372 ( +0.05%) [ +0.19% +0.00% +0.06% / +0.18% +0.05% +0.20%] index_select strided 3 : Elapsed 0.084 ms (8.384 ms / 100) 8.367 -> 8.381 ( +0.17%) [ +0.16% +0.01% +0.00% / +0.17% +0.48% +0.24%] index_select random : Elapsed 0.084 ms (8.380 ms / 100) 8.372 -> 8.388 ( +0.19%) [ +0.17% +0.01% +0.00% / +0.25% +0.19% +0.25%] index_select random_sorted : Elapsed 0.084 ms (8.386 ms / 100) B = [16, 5, 20, 40] (stride (800, 12800, 1, 20)) A = [16, 5, 20, 4] (stride (1, 1280, 64, 16)) dim = 3 1.324 -> 1.327 ( +0.23%) [ +0.08% +0.00% +0.00% / +0.23% +0.53% +0.45%] index_add_ linear : Elapsed 0.013 ms (1.325 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.39% +0.39%] index_copy_ linear : Elapsed 0.013 ms (1.280 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.68% +0.61%] index_add_ reverse : Elapsed 0.013 ms (1.322 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.86%] index_copy_ reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.318 -> 1.321 ( +0.23%) [ +0.15% +0.23% +0.00% / +0.23% +0.61% +0.83%] index_add_ spread : Elapsed 0.013 ms (1.320 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.63% +0.78%] index_copy_ spread : Elapsed 0.013 ms (1.276 ms / 100) 1.323 -> 1.323 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.323 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.78% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.280 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.013 ms (1.320 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.78% +0.78%] index_copy_ strided 7 : Elapsed 0.013 ms (1.275 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +0.76%] index_add_ perm : Elapsed 0.013 ms (1.319 ms / 100) 1.273 -> 1.272 ( -0.08%) [ +0.16% +0.00% +0.16% / -0.08% +0.94% +0.79%] index_copy_ perm : Elapsed 0.013 ms (1.275 ms / 100) 1.318 -> 1.321 ( +0.23%) [ +0.15% +0.30% +0.00% / +0.23% +0.68% +1.14%] index_add_ perm_sorted : Elapsed 0.013 ms (1.320 ms / 100) 1.275 -> 1.277 ( +0.16%) [ +0.08% +0.24% +0.00% / +0.16% +0.71% +0.71%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.276 ms / 100) 8.720 -> 8.732 ( +0.14%) [ +0.38% +0.00% +0.21% / +0.14% +0.25% +0.14%] index_select const : Elapsed 0.088 ms (8.753 ms / 100) 8.733 -> 8.747 ( +0.16%) [ +0.29% +0.00% +0.09% / +0.16% +0.21% +0.19%] index_select wrap : Elapsed 0.088 ms (8.758 ms / 100) 8.746 -> 8.748 ( +0.02%) [ +0.06% +0.00% +0.03% / +0.18% +0.37% +0.02%] index_select linear : Elapsed 0.088 ms (8.751 ms / 100) 8.753 -> 8.742 ( -0.13%) [ +0.00% +0.01% +0.00% / -0.13% +0.31% -0.05%] index_select reverse : Elapsed 0.088 ms (8.753 ms / 100) 8.728 -> 8.729 ( +0.01%) [ +0.00% +0.10% +0.05% / +0.01% +0.07% +0.26%] index_select skip64 : Elapsed 0.087 ms (8.728 ms / 100) 8.725 -> 8.733 ( +0.09%) [ +0.00% +0.10% +0.14% / +0.09% +0.16% +0.61%] index_select skip256 : Elapsed 0.087 ms (8.725 ms / 100) 8.759 -> 8.752 ( -0.08%) [ +0.01% +0.00% +0.16% / -0.08% -0.08% +0.11%] index_select spread : Elapsed 0.088 ms (8.760 ms / 100) 8.734 -> 8.751 ( +0.19%) [ +0.00% +0.25% +0.21% / +0.23% +0.19% +0.54%] index_select strided 3 : Elapsed 0.087 ms (8.734 ms / 100) 8.751 -> 8.746 ( -0.06%) [ +0.00% +0.10% +0.05% / +0.26% -0.02% -0.06%] index_select random : Elapsed 0.088 ms (8.751 ms / 100) 8.736 -> 8.738 ( +0.02%) [ +0.00% +0.10% +0.15% / +0.02% +0.26% +0.09%] index_select random_sorted : Elapsed 0.087 ms (8.736 ms / 100) B = [16, 5, 20, 40] (stride (1, 12800, 640, 16)) A = [16, 5, 20, 4] (stride (80, 1280, 4, 1)) dim = 3 1.229 -> 1.230 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.49% +0.65%] index_add_ linear : Elapsed 0.012 ms (1.231 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.59% +0.84%] index_copy_ linear : Elapsed 0.012 ms (1.192 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.24% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.59%] index_copy_ reverse : Elapsed 0.012 ms (1.192 ms / 100) 1.231 -> 1.230 ( -0.08%) [ +0.08% +0.00% +0.16% / -0.08% +0.32% +0.41%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.34% +0.34%] index_copy_ spread : Elapsed 0.012 ms (1.193 ms / 100) 1.230 -> 1.231 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.41% +0.49%] index_add_ strided 3 : Elapsed 0.012 ms (1.230 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.00% +0.17% +0.08% / +0.17% +0.59% +0.59%] index_copy_ strided 3 : Elapsed 0.012 ms (1.191 ms / 100) 1.230 -> 1.233 ( +0.24%) [ +0.33% +0.00% +0.08% / +0.24% +0.41% +0.49%] index_add_ strided 7 : Elapsed 0.012 ms (1.234 ms / 100) 1.191 -> 1.197 ( +0.50%) [ +0.25% +0.08% +0.00% / +0.84% +0.50% +0.67%] index_copy_ strided 7 : Elapsed 0.012 ms (1.194 ms / 100) 1.231 -> 1.233 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.24% +0.41%] index_add_ perm : Elapsed 0.012 ms (1.231 ms / 100) 1.192 -> 1.197 ( +0.42%) [ +0.00% +0.08% +0.08% / +0.42% +0.42% +0.76%] index_copy_ perm : Elapsed 0.012 ms (1.192 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.24% +0.41%] index_add_ perm_sorted : Elapsed 0.012 ms (1.231 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.50% +0.76%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.194 ms / 100) 8.729 -> 8.727 ( -0.02%) [ +0.06% +0.09% +0.00% / +0.17% +0.21% -0.02%] index_select const : Elapsed 0.087 ms (8.734 ms / 100) 8.717 -> 8.728 ( +0.13%) [ +0.00% +0.14% +0.24% / +0.13% +0.34% +0.24%] index_select wrap : Elapsed 0.087 ms (8.717 ms / 100) 8.710 -> 8.739 ( +0.33%) [ +0.24% +0.02% +0.00% / +0.34% +0.37% +0.33%] index_select linear : Elapsed 0.087 ms (8.731 ms / 100) 8.719 -> 8.721 ( +0.02%) [ +0.00% +0.07% +0.00% / +0.02% +0.41% +0.41%] index_select reverse : Elapsed 0.087 ms (8.719 ms / 100) 8.729 -> 8.730 ( +0.01%) [ +0.03% +0.02% +0.00% / +0.01% +0.34% +0.53%] index_select skip64 : Elapsed 0.087 ms (8.732 ms / 100) 8.726 -> 8.740 ( +0.16%) [ +0.18% +0.00% +0.13% / +0.16% +0.41% +0.19%] index_select skip256 : Elapsed 0.087 ms (8.742 ms / 100) 8.720 -> 8.732 ( +0.14%) [ +0.00% +0.05% +0.13% / +0.14% +0.18% +0.22%] index_select spread : Elapsed 0.087 ms (8.720 ms / 100) 8.730 -> 8.729 ( -0.01%) [ +0.05% +0.02% +0.00% / -0.01% +0.03% +0.08%] index_select strided 3 : Elapsed 0.087 ms (8.734 ms / 100) 8.716 -> 8.741 ( +0.29%) [ +0.21% +0.11% +0.00% / +0.29% +0.34% +0.40%] index_select random : Elapsed 0.087 ms (8.734 ms / 100) 8.728 -> 8.715 ( -0.15%) [ +0.10% +0.11% +0.00% / -0.15% +0.15% +0.15%] index_select random_sorted : Elapsed 0.087 ms (8.737 ms / 100) B = [16, 5, 20, 40] (stride (200, 1, 3200, 5)) A = [16, 5, 20, 4] (stride (400, 20, 1, 100)) dim = 3 1.226 -> 1.227 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.49% +0.82%] index_add_ linear : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.93%] index_copy_ linear : Elapsed 0.012 ms (1.189 ms / 100) 1.226 -> 1.227 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.65% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.227 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.59% +0.67%] index_copy_ reverse : Elapsed 0.012 ms (1.188 ms / 100) 1.230 -> 1.231 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.49% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.50% +0.50%] index_copy_ spread : Elapsed 0.012 ms (1.193 ms / 100) 1.226 -> 1.227 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.65% +0.73%] index_add_ strided 3 : Elapsed 0.012 ms (1.227 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.230 -> 1.230 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.41% +0.65%] index_add_ strided 7 : Elapsed 0.012 ms (1.230 ms / 100) 1.191 -> 1.193 ( +0.17%) [ +0.00% +0.42% +0.08% / +0.17% +0.67% +0.76%] index_copy_ strided 7 : Elapsed 0.012 ms (1.191 ms / 100) 1.228 -> 1.228 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.41% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.230 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.08% +0.25% +0.00% / +0.00% +0.59% +0.59%] index_copy_ perm : Elapsed 0.012 ms (1.191 ms / 100) 1.228 -> 1.230 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +0.49% +0.49%] index_add_ perm_sorted : Elapsed 0.012 ms (1.230 ms / 100) 1.191 -> 1.191 ( +0.00%) [ +0.17% +0.00% +0.08% / +0.00% +0.59% +0.50%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.193 ms / 100) 8.753 -> 8.752 ( -0.01%) [ +0.00% +0.14% +0.06% / -0.01% +0.27% +0.33%] index_select const : Elapsed 0.088 ms (8.753 ms / 100) 8.774 -> 8.792 ( +0.21%) [ +0.19% +0.01% +0.00% / +0.21% +0.27% +0.43%] index_select wrap : Elapsed 0.088 ms (8.791 ms / 100) 8.760 -> 8.777 ( +0.19%) [ +0.00% +0.23% +0.02% / +0.19% +0.37% +0.37%] index_select linear : Elapsed 0.088 ms (8.760 ms / 100) 8.772 -> 8.785 ( +0.15%) [ +0.27% +0.18% +0.00% / +0.15% +0.50% +0.49%] index_select reverse : Elapsed 0.088 ms (8.796 ms / 100) 8.758 -> 8.760 ( +0.02%) [ +0.05% +0.00% +0.15% / +0.08% +0.02% +0.19%] index_select skip64 : Elapsed 0.088 ms (8.762 ms / 100) 8.752 -> 8.752 ( +0.00%) [ +0.00% +0.01% +0.13% / +0.08% +0.00% +0.13%] index_select skip256 : Elapsed 0.088 ms (8.752 ms / 100) 8.765 -> 8.785 ( +0.23%) [ +0.09% +0.18% +0.00% / +0.23% +0.29% +0.23%] index_select spread : Elapsed 0.088 ms (8.773 ms / 100) 8.786 -> 8.782 ( -0.05%) [ +0.14% +0.00% +0.22% / -0.05% +0.18% +0.18%] index_select strided 3 : Elapsed 0.088 ms (8.798 ms / 100) 8.769 -> 8.784 ( +0.17%) [ +0.00% +0.11% +0.11% / +0.17% +0.32% +0.35%] index_select random : Elapsed 0.088 ms (8.769 ms / 100) 8.771 -> 8.776 ( +0.06%) [ +0.22% +0.10% +0.00% / +0.10% +0.06% +0.24%] index_select random_sorted : Elapsed 0.088 ms (8.790 ms / 100) B = [16, 5, 20, 40] (stride (200, 1, 3200, 5)) A = [16, 5, 20, 4] (stride (80, 1280, 1, 20)) dim = 3 1.309 -> 1.308 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.76% +0.76%] index_add_ linear : Elapsed 0.013 ms (1.310 ms / 100) 1.270 -> 1.270 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.55% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.271 ms / 100) 1.311 -> 1.311 ( +0.00%) [ +0.00% +0.08% +0.15% / +0.00% +0.46% +0.53%] index_add_ reverse : Elapsed 0.013 ms (1.311 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.271 ms / 100) 1.315 -> 1.315 ( +0.00%) [ +0.15% +0.00% +0.23% / +0.00% +0.38% +0.23%] index_add_ spread : Elapsed 0.013 ms (1.317 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.47% +0.31%] index_copy_ spread : Elapsed 0.013 ms (1.275 ms / 100) 1.311 -> 1.312 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.61% +0.53%] index_add_ strided 3 : Elapsed 0.013 ms (1.311 ms / 100) 1.270 -> 1.277 ( +0.55%) [ +0.08% +0.08% +0.00% / +0.55% +0.79% +0.71%] index_copy_ strided 3 : Elapsed 0.013 ms (1.271 ms / 100) 1.314 -> 1.315 ( +0.08%) [ +0.30% +0.00% +0.15% / +0.08% +0.38% +0.30%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.00% +0.08% +0.08% / +0.16% +0.39% +0.24%] index_copy_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.312 -> 1.314 ( +0.15%) [ +0.00% +0.15% +0.23% / +0.15% +0.46% +0.38%] index_add_ perm : Elapsed 0.013 ms (1.312 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.00% +0.16% +0.16% / +0.16% +0.39% +0.31%] index_copy_ perm : Elapsed 0.013 ms (1.273 ms / 100) 1.315 -> 1.314 ( -0.08%) [ +0.08% +0.23% +0.00% / -0.08% +0.30% +0.38%] index_add_ perm_sorted : Elapsed 0.013 ms (1.316 ms / 100) 1.274 -> 1.273 ( -0.08%) [ +0.08% +0.08% +0.00% / -0.08% +0.31% +0.39%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.275 ms / 100) 9.197 -> 9.204 ( +0.08%) [ +0.00% +0.12% +0.10% / +0.08% +0.13% +0.21%] index_select const : Elapsed 0.092 ms (9.197 ms / 100) 9.213 -> 9.215 ( +0.02%) [ +0.17% +0.00% +0.11% / +0.13% +0.02% +0.07%] index_select wrap : Elapsed 0.092 ms (9.229 ms / 100) 9.206 -> 9.211 ( +0.05%) [ +0.09% +0.00% +0.05% / +0.05% +0.15% +0.18%] index_select linear : Elapsed 0.092 ms (9.214 ms / 100) 9.228 -> 9.220 ( -0.09%) [ +0.02% +0.11% +0.00% / +0.07% -0.09% +0.08%] index_select reverse : Elapsed 0.092 ms (9.230 ms / 100) 9.181 -> 9.200 ( +0.21%) [ +0.00% +0.05% +0.04% / +0.23% +0.40% +0.21%] index_select skip64 : Elapsed 0.092 ms (9.181 ms / 100) 9.189 -> 9.190 ( +0.01%) [ +0.14% +0.00% +0.15% / +0.01% +0.16% +0.13%] index_select skip256 : Elapsed 0.092 ms (9.202 ms / 100) 9.216 -> 9.210 ( -0.07%) [ +0.08% +0.08% +0.00% / +0.11% -0.07% +0.24%] index_select spread : Elapsed 0.092 ms (9.223 ms / 100) 9.221 -> 9.218 ( -0.03%) [ +0.07% +0.00% +0.11% / -0.03% +0.24% +0.25%] index_select strided 3 : Elapsed 0.092 ms (9.227 ms / 100) 9.213 -> 9.216 ( +0.03%) [ +0.20% +0.00% +0.20% / +0.03% +0.07% +0.14%] index_select random : Elapsed 0.092 ms (9.231 ms / 100) 9.207 -> 9.218 ( +0.12%) [ +0.15% +0.07% +0.00% / +0.12% +0.29% +0.41%] index_select random_sorted : Elapsed 0.092 ms (9.221 ms / 100) B = [16, 5, 20, 40] (stride (5, 1, 3200, 80)) A = [16, 5, 20, 4] (stride (400, 20, 1, 100)) dim = 3 0.574 -> 0.575 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.35% +0.17%] index_add_ linear : Elapsed 0.006 ms (0.574 ms / 100) 0.588 -> 0.589 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.34% +0.68% +0.17%] index_copy_ linear : Elapsed 0.006 ms (0.588 ms / 100) 0.566 -> 0.567 ( +0.18%) [ +0.18% +0.35% +0.00% / +0.18% +1.06% +1.06%] index_add_ reverse : Elapsed 0.006 ms (0.567 ms / 100) 0.577 -> 0.578 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.87% +0.87%] index_copy_ reverse : Elapsed 0.006 ms (0.578 ms / 100) 0.567 -> 0.567 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +1.06% +7.23%] index_add_ spread : Elapsed 0.006 ms (0.568 ms / 100) 0.584 -> 0.585 ( +0.17%) [ +0.00% +0.34% +0.17% / +0.17% +0.68% +0.51%] index_copy_ spread : Elapsed 0.006 ms (0.584 ms / 100) 0.566 -> 0.567 ( +0.18%) [ +0.00% +0.18% +0.53% / +0.18% +1.06% +0.88%] index_add_ strided 3 : Elapsed 0.006 ms (0.566 ms / 100) 0.577 -> 0.578 ( +0.17%) [ +0.00% +0.00% +0.17% / +0.17% +0.69% +0.52%] index_copy_ strided 3 : Elapsed 0.006 ms (0.577 ms / 100) 0.569 -> 0.569 ( +0.00%) [ +0.00% +3.69% +0.35% / +0.00% +0.70% +0.70%] index_add_ strided 7 : Elapsed 0.006 ms (0.569 ms / 100) 0.581 -> 0.582 ( +0.17%) [ +0.00% +0.17% +0.17% / +0.17% +0.52% +0.34%] index_copy_ strided 7 : Elapsed 0.006 ms (0.581 ms / 100) 0.566 -> 0.567 ( +0.18%) [ +0.35% +0.35% +0.00% / +0.18% +0.53% +0.53%] index_add_ perm : Elapsed 0.006 ms (0.568 ms / 100) 0.577 -> 0.579 ( +0.35%) [ +0.00% +0.00% +0.00% / +0.35% +0.69% +0.69%] index_copy_ perm : Elapsed 0.006 ms (0.577 ms / 100) 0.566 -> 0.567 ( +0.18%) [ +0.18% +0.00% +0.18% / +0.18% +0.53% +0.53%] index_add_ perm_sorted : Elapsed 0.006 ms (0.567 ms / 100) 0.580 -> 0.580 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.34% +0.34%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.580 ms / 100) 5.030 -> 5.024 ( -0.12%) [ +0.04% +0.10% +0.00% / -0.04% -0.12% -0.06%] index_select const : Elapsed 0.050 ms (5.032 ms / 100) 5.042 -> 5.046 ( +0.08%) [ +0.00% +0.18% +0.20% / +0.08% +0.08% +0.08%] index_select wrap : Elapsed 0.050 ms (5.042 ms / 100) 5.035 -> 5.033 ( -0.04%) [ +0.26% +0.16% +0.00% / -0.04% +0.02% +0.22%] index_select linear : Elapsed 0.050 ms (5.048 ms / 100) 5.044 -> 5.049 ( +0.10%) [ +0.20% +0.20% +0.00% / +0.28% +0.10% +0.10%] index_select reverse : Elapsed 0.051 ms (5.054 ms / 100) 5.025 -> 5.035 ( +0.20%) [ +0.24% +0.00% +0.26% / +0.20% +0.38% +0.36%] index_select skip64 : Elapsed 0.050 ms (5.037 ms / 100) 5.030 -> 5.027 ( -0.06%) [ +0.02% +0.06% +0.00% / -0.06% +0.14% +0.12%] index_select skip256 : Elapsed 0.050 ms (5.031 ms / 100) 5.049 -> 5.039 ( -0.20%) [ +0.00% +0.00% +0.06% / -0.20% +0.06% +0.04%] index_select spread : Elapsed 0.050 ms (5.049 ms / 100) 5.049 -> 5.042 ( -0.14%) [ +0.16% +0.00% +0.08% / -0.08% -0.14% +0.06%] index_select strided 3 : Elapsed 0.051 ms (5.057 ms / 100) 5.039 -> 5.033 ( -0.12%) [ +0.00% +0.04% +0.36% / +0.18% -0.12% +0.18%] index_select random : Elapsed 0.050 ms (5.039 ms / 100) 5.034 -> 5.038 ( +0.08%) [ +0.10% +0.22% +0.00% / +0.12% +0.08% +0.14%] index_select random_sorted : Elapsed 0.050 ms (5.039 ms / 100) B = [16, 5, 20, 40] (stride (100, 20, 1, 1600)) A = [16, 5, 20, 4] (stride (400, 80, 1, 20)) dim = 3 1.065 -> 1.066 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.66% +0.66%] index_add_ linear : Elapsed 0.011 ms (1.065 ms / 100) 1.026 -> 1.028 ( +0.19%) [ +0.10% +0.10% +0.00% / +0.19% +0.88% +0.78%] index_copy_ linear : Elapsed 0.010 ms (1.027 ms / 100) 1.065 -> 1.065 ( +0.00%) [ +0.19% +0.09% +0.00% / +0.00% +0.38% +0.47%] index_add_ reverse : Elapsed 0.011 ms (1.067 ms / 100) 1.028 -> 1.030 ( +0.19%) [ +0.00% +0.29% +0.00% / +0.19% +0.39% +0.29%] index_copy_ reverse : Elapsed 0.010 ms (1.028 ms / 100) 1.065 -> 1.065 ( +0.00%) [ +0.09% +0.19% +0.00% / +0.00% +0.47% +0.47%] index_add_ spread : Elapsed 0.011 ms (1.066 ms / 100) 1.028 -> 1.028 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.29% +0.19%] index_copy_ spread : Elapsed 0.010 ms (1.028 ms / 100) 1.065 -> 1.065 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.47% +0.66%] index_add_ strided 3 : Elapsed 0.011 ms (1.066 ms / 100) 1.027 -> 1.027 ( +0.00%) [ +0.19% +0.10% +0.00% / +0.00% +0.49% +0.88%] index_copy_ strided 3 : Elapsed 0.010 ms (1.029 ms / 100) 1.065 -> 1.066 ( +0.09%) [ +0.00% +0.00% +0.00% / +0.09% +0.38% +0.66%] index_add_ strided 7 : Elapsed 0.011 ms (1.065 ms / 100) 1.027 -> 1.028 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.58% +0.39%] index_copy_ strided 7 : Elapsed 0.010 ms (1.028 ms / 100) 1.066 -> 1.066 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.38% +0.38%] index_add_ perm : Elapsed 0.011 ms (1.067 ms / 100) 1.027 -> 1.029 ( +0.19%) [ +0.19% +0.19% +0.00% / +0.19% +0.39% +0.29%] index_copy_ perm : Elapsed 0.010 ms (1.029 ms / 100) 1.065 -> 1.065 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.75% +0.47%] index_add_ perm_sorted : Elapsed 0.011 ms (1.066 ms / 100) 1.028 -> 1.027 ( -0.10%) [ +0.00% +0.10% +0.00% / -0.10% +0.49% +0.29%] index_copy_ perm_sorted : Elapsed 0.010 ms (1.028 ms / 100) 7.891 -> 7.900 ( +0.11%) [ +0.00% +0.18% +0.27% / +0.11% +0.13% +0.39%] index_select const : Elapsed 0.079 ms (7.891 ms / 100) 7.896 -> 7.895 ( -0.01%) [ +0.23% +0.00% +0.08% / -0.01% +0.09% +0.24%] index_select wrap : Elapsed 0.079 ms (7.914 ms / 100) 7.886 -> 7.915 ( +0.37%) [ +0.18% +0.06% +0.00% / +0.41% +0.37% +0.37%] index_select linear : Elapsed 0.079 ms (7.900 ms / 100) 7.902 -> 7.918 ( +0.20%) [ +0.20% +0.19% +0.00% / +0.20% +0.38% +0.22%] index_select reverse : Elapsed 0.079 ms (7.918 ms / 100) 7.887 -> 7.900 ( +0.16%) [ +0.00% +0.04% +0.09% / +0.16% +0.52% +0.16%] index_select skip64 : Elapsed 0.079 ms (7.887 ms / 100) 7.888 -> 7.892 ( +0.05%) [ +0.00% +0.10% +0.22% / +0.05% +0.19% +0.25%] index_select skip256 : Elapsed 0.079 ms (7.888 ms / 100) 7.905 -> 7.906 ( +0.01%) [ +0.00% +0.00% +0.16% / +0.05% +0.01% +0.13%] index_select spread : Elapsed 0.079 ms (7.905 ms / 100) 7.906 -> 7.899 ( -0.09%) [ +0.18% +0.29% +0.00% / -0.09% +0.13% -0.01%] index_select strided 3 : Elapsed 0.079 ms (7.920 ms / 100) 7.902 -> 7.908 ( +0.08%) [ +0.11% +0.05% +0.00% / +0.08% +0.08% +0.27%] index_select random : Elapsed 0.079 ms (7.911 ms / 100) 7.899 -> 7.909 ( +0.13%) [ +0.01% +0.10% +0.00% / +0.13% +0.38% +0.24%] index_select random_sorted : Elapsed 0.079 ms (7.900 ms / 100) B = [16, 5, 20, 40] (stride (100, 20, 1, 1600)) A = [16, 5, 20, 4] (stride (400, 4, 20, 1)) dim = 3 1.152 -> 1.152 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.43% +0.43%] index_add_ linear : Elapsed 0.012 ms (1.152 ms / 100) 1.113 -> 1.112 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.54% +0.45%] index_copy_ linear : Elapsed 0.011 ms (1.114 ms / 100) 1.158 -> 1.158 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.43% +0.52%] index_add_ reverse : Elapsed 0.012 ms (1.158 ms / 100) 1.120 -> 1.119 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.54% +0.54%] index_copy_ reverse : Elapsed 0.011 ms (1.121 ms / 100) 1.152 -> 1.152 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.43% +0.52%] index_add_ spread : Elapsed 0.012 ms (1.152 ms / 100) 1.112 -> 1.113 ( +0.09%) [ +0.00% +0.18% +0.00% / +0.09% +0.63% +0.72%] index_copy_ spread : Elapsed 0.011 ms (1.112 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.17% +0.09% +0.00% / +0.00% +0.78% +0.87%] index_add_ strided 3 : Elapsed 0.012 ms (1.151 ms / 100) 1.111 -> 1.111 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.90% +0.81%] index_copy_ strided 3 : Elapsed 0.011 ms (1.112 ms / 100) 1.155 -> 1.156 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.09% +0.95% +0.87%] index_add_ strided 7 : Elapsed 0.012 ms (1.156 ms / 100) 1.117 -> 1.118 ( +0.09%) [ +0.18% +0.00% +0.00% / +0.09% +1.07% +0.90%] index_copy_ strided 7 : Elapsed 0.011 ms (1.119 ms / 100) 1.150 -> 1.150 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.78% +0.61%] index_add_ perm : Elapsed 0.012 ms (1.151 ms / 100) 1.112 -> 1.111 ( -0.09%) [ +0.00% +0.00% +0.00% / -0.09% +0.81% +0.63%] index_copy_ perm : Elapsed 0.011 ms (1.112 ms / 100) 1.157 -> 1.158 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.61% +0.61%] index_add_ perm_sorted : Elapsed 0.012 ms (1.158 ms / 100) 1.119 -> 1.119 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.71% +0.63%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.120 ms / 100) 8.286 -> 8.293 ( +0.08%) [ +0.12% +0.00% +0.12% / +0.08% +0.23% +0.25%] index_select const : Elapsed 0.083 ms (8.296 ms / 100) 8.283 -> 8.293 ( +0.12%) [ +0.19% +0.29% +0.00% / +0.12% +0.14% +0.23%] index_select wrap : Elapsed 0.083 ms (8.299 ms / 100) 8.280 -> 8.285 ( +0.06%) [ +0.04% +0.00% +0.07% / +0.06% +0.12% +0.29%] index_select linear : Elapsed 0.083 ms (8.283 ms / 100) 8.281 -> 8.282 ( +0.01%) [ +0.00% +0.04% +0.39% / +0.19% +0.01% +0.18%] index_select reverse : Elapsed 0.083 ms (8.281 ms / 100) 8.283 -> 8.290 ( +0.08%) [ +0.07% +0.07% +0.00% / +0.08% +0.16% +0.19%] index_select skip64 : Elapsed 0.083 ms (8.289 ms / 100) 8.285 -> 8.284 ( -0.01%) [ +0.24% +0.07% +0.00% / +0.02% +0.05% -0.01%] index_select skip256 : Elapsed 0.083 ms (8.305 ms / 100) 8.287 -> 8.292 ( +0.06%) [ +0.12% +0.16% +0.00% / +0.33% +0.06% +0.24%] index_select spread : Elapsed 0.083 ms (8.297 ms / 100) 8.287 -> 8.280 ( -0.08%) [ +0.04% +0.00% +0.01% / +0.19% -0.08% +0.14%] index_select strided 3 : Elapsed 0.083 ms (8.290 ms / 100) 8.286 -> 8.295 ( +0.11%) [ +0.14% +0.00% +0.07% / +0.11% +0.14% +0.13%] index_select random : Elapsed 0.083 ms (8.298 ms / 100) 8.282 -> 8.287 ( +0.06%) [ +0.14% +0.12% +0.00% / +0.10% +0.28% +0.06%] index_select random_sorted : Elapsed 0.083 ms (8.294 ms / 100) B = [16, 5, 20, 40] (stride (100, 1, 5, 1600)) A = [16, 5, 20, 4] (stride (20, 4, 320, 1)) dim = 3 1.303 -> 1.306 ( +0.23%) [ +0.00% +0.08% +0.00% / +0.23% +1.15% +0.77%] index_add_ linear : Elapsed 0.013 ms (1.303 ms / 100) 1.256 -> 1.256 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.80% +1.19%] index_copy_ linear : Elapsed 0.013 ms (1.256 ms / 100) 1.305 -> 1.305 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.84% +0.84%] index_add_ reverse : Elapsed 0.013 ms (1.306 ms / 100) 1.257 -> 1.258 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.64% +0.56%] index_copy_ reverse : Elapsed 0.013 ms (1.257 ms / 100) 1.291 -> 1.292 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.39% +0.39%] index_add_ spread : Elapsed 0.013 ms (1.292 ms / 100) 1.248 -> 1.253 ( +0.40%) [ +0.08% +0.16% +0.00% / +0.40% +0.56% +0.56%] index_copy_ spread : Elapsed 0.012 ms (1.249 ms / 100) 1.285 -> 1.285 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.47% +0.47%] index_add_ strided 3 : Elapsed 0.013 ms (1.286 ms / 100) 1.245 -> 1.247 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.64% +0.72%] index_copy_ strided 3 : Elapsed 0.012 ms (1.246 ms / 100) 1.285 -> 1.284 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.47% +0.47%] index_add_ strided 7 : Elapsed 0.013 ms (1.285 ms / 100) 1.245 -> 1.245 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.64% +0.48%] index_copy_ strided 7 : Elapsed 0.012 ms (1.246 ms / 100) 1.290 -> 1.291 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.54% +0.47%] index_add_ perm : Elapsed 0.013 ms (1.292 ms / 100) 1.248 -> 1.250 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.48% +0.40%] index_copy_ perm : Elapsed 0.012 ms (1.249 ms / 100) 1.305 -> 1.305 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.54% +0.54%] index_add_ perm_sorted : Elapsed 0.013 ms (1.306 ms / 100) 1.257 -> 1.259 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.56% +0.56%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.257 ms / 100) 8.730 -> 8.728 ( -0.02%) [ +0.00% +0.00% +0.27% / -0.02% -0.01% +0.01%] index_select const : Elapsed 0.087 ms (8.730 ms / 100) 8.738 -> 8.726 ( -0.14%) [ +0.11% +0.05% +0.00% / -0.14% +0.07% +0.05%] index_select wrap : Elapsed 0.087 ms (8.748 ms / 100) 8.729 -> 8.741 ( +0.14%) [ +0.00% +0.19% +0.05% / +0.14% +0.25% +0.22%] index_select linear : Elapsed 0.087 ms (8.729 ms / 100) 8.735 -> 8.729 ( -0.07%) [ +0.05% +0.10% +0.00% / +0.05% +0.05% -0.07%] index_select reverse : Elapsed 0.087 ms (8.739 ms / 100) 8.730 -> 8.730 ( +0.00%) [ +0.19% +0.14% +0.00% / +0.00% +0.21% +0.18%] index_select skip64 : Elapsed 0.087 ms (8.747 ms / 100) 8.731 -> 8.733 ( +0.02%) [ +0.00% +0.22% +0.03% / +0.02% +0.14% +0.15%] index_select skip256 : Elapsed 0.087 ms (8.731 ms / 100) 8.734 -> 8.731 ( -0.03%) [ +0.00% +0.05% +0.15% / -0.03% +0.14% +0.07%] index_select spread : Elapsed 0.087 ms (8.734 ms / 100) 8.745 -> 8.732 ( -0.15%) [ +0.00% +0.05% +0.07% / -0.15% -0.05% +0.03%] index_select strided 3 : Elapsed 0.087 ms (8.745 ms / 100) 8.728 -> 8.730 ( +0.02%) [ +0.13% +0.10% +0.00% / +0.02% +0.29% +0.06%] index_select random : Elapsed 0.087 ms (8.739 ms / 100) 8.740 -> 8.741 ( +0.01%) [ +0.00% +0.09% +0.03% / +0.21% +0.01% +0.03%] index_select random_sorted : Elapsed 0.087 ms (8.740 ms / 100) B = [16, 5, 20, 40] (stride (20, 320, 1, 1600)) A = [16, 5, 20, 4] (stride (20, 1280, 1, 320)) dim = 3 1.311 -> 1.310 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.53% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.311 ms / 100) 1.270 -> 1.269 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.63% +0.47%] index_copy_ linear : Elapsed 0.013 ms (1.270 ms / 100) 1.310 -> 1.312 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.53% +0.61%] index_add_ reverse : Elapsed 0.013 ms (1.310 ms / 100) 1.269 -> 1.271 ( +0.16%) [ +0.00% +0.08% +0.16% / +0.16% +0.71% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.269 ms / 100) 1.308 -> 1.313 ( +0.38%) [ +0.23% +0.31% +0.00% / +0.38% +0.69% +0.76%] index_add_ spread : Elapsed 0.013 ms (1.311 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.63% +0.63%] index_copy_ spread : Elapsed 0.013 ms (1.269 ms / 100) 1.308 -> 1.308 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.76% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.310 ms / 100) 1.268 -> 1.270 ( +0.16%) [ +0.00% +0.00% +0.08% / +0.16% +0.71% +0.71%] index_copy_ strided 3 : Elapsed 0.013 ms (1.268 ms / 100) 1.307 -> 1.308 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.77% +0.92%] index_add_ strided 7 : Elapsed 0.013 ms (1.309 ms / 100) 1.268 -> 1.270 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.16% +0.71% +0.71%] index_copy_ strided 7 : Elapsed 0.013 ms (1.268 ms / 100) 1.308 -> 1.309 ( +0.08%) [ +0.15% +0.15% +0.00% / +0.08% +0.84% +0.69%] index_add_ perm : Elapsed 0.013 ms (1.310 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.63% +0.55%] index_copy_ perm : Elapsed 0.013 ms (1.270 ms / 100) 1.308 -> 1.309 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.84% +0.76%] index_add_ perm_sorted : Elapsed 0.013 ms (1.310 ms / 100) 1.269 -> 1.270 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.63%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.270 ms / 100) 9.125 -> 9.112 ( -0.14%) [ +0.00% +0.28% +0.13% / -0.14% +0.32% +0.23%] index_select const : Elapsed 0.091 ms (9.125 ms / 100) 9.153 -> 9.165 ( +0.13%) [ +0.02% +0.05% +0.00% / +0.13% +0.26% +0.24%] index_select wrap : Elapsed 0.092 ms (9.155 ms / 100) 9.135 -> 9.157 ( +0.24%) [ +0.11% +0.16% +0.00% / +0.31% +0.24% +0.35%] index_select linear : Elapsed 0.091 ms (9.145 ms / 100) 9.162 -> 9.142 ( -0.22%) [ +0.09% +0.34% +0.00% / +0.01% -0.22% -0.01%] index_select reverse : Elapsed 0.092 ms (9.170 ms / 100) 9.117 -> 9.124 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.22% +0.33%] index_select skip64 : Elapsed 0.091 ms (9.131 ms / 100) 9.109 -> 9.096 ( -0.14%) [ +0.11% +0.15% +0.00% / -0.14% +0.31% +0.53%] index_select skip256 : Elapsed 0.091 ms (9.119 ms / 100) 9.144 -> 9.156 ( +0.13%) [ +0.00% +0.35% +0.16% / +0.13% +0.24% +0.25%] index_select spread : Elapsed 0.091 ms (9.144 ms / 100) 9.144 -> 9.159 ( +0.16%) [ +0.00% +0.16% +0.23% / +0.16% +0.30% +0.32%] index_select strided 3 : Elapsed 0.091 ms (9.144 ms / 100) 9.156 -> 9.164 ( +0.09%) [ +0.05% +0.12% +0.00% / +0.09% +0.15% +0.20%] index_select random : Elapsed 0.092 ms (9.161 ms / 100) 9.153 -> 9.144 ( -0.10%) [ +0.08% +0.00% +0.01% / -0.10% +0.35% +0.11%] index_select random_sorted : Elapsed 0.092 ms (9.160 ms / 100) out_shape = [40, 20, 4, 5] in_shape = [16, 20, 4, 5] idx_dim = 0 B = [40, 20, 4, 5] (stride (20, 800, 1, 4)) A = [16, 20, 4, 5] (stride (1, 320, 80, 16)) dim = 0 3.571 -> 3.571 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.36% +0.36%] index_add_ linear : Elapsed 0.036 ms (3.572 ms / 100) 3.424 -> 3.423 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.32% +0.41%] index_copy_ linear : Elapsed 0.034 ms (3.425 ms / 100) 3.565 -> 3.565 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.56% +0.87%] index_add_ reverse : Elapsed 0.036 ms (3.566 ms / 100) 3.433 -> 3.434 ( +0.03%) [ +0.03% +0.09% +0.00% / +0.03% +0.47% +0.90%] index_copy_ reverse : Elapsed 0.034 ms (3.434 ms / 100) 3.594 -> 3.594 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.17% +0.17%] index_add_ spread : Elapsed 0.036 ms (3.595 ms / 100) 3.435 -> 3.439 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.32% +0.35%] index_copy_ spread : Elapsed 0.034 ms (3.437 ms / 100) 3.587 -> 3.588 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.25% +0.39%] index_add_ strided 3 : Elapsed 0.036 ms (3.588 ms / 100) 3.442 -> 3.443 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.29% +0.76%] index_copy_ strided 3 : Elapsed 0.034 ms (3.443 ms / 100) 3.569 -> 3.569 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.53% +0.56%] index_add_ strided 7 : Elapsed 0.036 ms (3.571 ms / 100) 3.437 -> 3.436 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.49% +0.52%] index_copy_ strided 7 : Elapsed 0.034 ms (3.437 ms / 100) 3.574 -> 3.577 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.11% +0.14%] index_add_ perm : Elapsed 0.036 ms (3.575 ms / 100) 3.426 -> 3.425 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.09% +0.20%] index_copy_ perm : Elapsed 0.034 ms (3.426 ms / 100) 3.575 -> 3.575 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.08% +0.11%] index_add_ perm_sorted : Elapsed 0.036 ms (3.576 ms / 100) 3.426 -> 3.426 ( +0.00%) [ +0.09% +0.00% +0.03% / +0.00% +0.12% +0.15%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.429 ms / 100) 5.385 -> 5.388 ( +0.06%) [ +0.06% +0.02% +0.00% / +0.06% +0.22% +0.28%] index_select const : Elapsed 0.054 ms (5.388 ms / 100) 5.389 -> 5.390 ( +0.02%) [ +0.02% +0.09% +0.00% / +0.02% +0.26% +0.04%] index_select wrap : Elapsed 0.054 ms (5.390 ms / 100) 5.391 -> 5.391 ( +0.00%) [ +0.09% +0.00% +0.19% / +0.00% +0.02% +0.11%] index_select linear : Elapsed 0.054 ms (5.396 ms / 100) 5.393 -> 5.389 ( -0.07%) [ +0.00% +0.00% +0.06% / +0.04% -0.07% +0.11%] index_select reverse : Elapsed 0.054 ms (5.393 ms / 100) 5.389 -> 5.389 ( +0.00%) [ +0.11% +0.07% +0.00% / +0.06% +0.00% +0.17%] index_select skip64 : Elapsed 0.054 ms (5.395 ms / 100) 5.387 -> 5.388 ( +0.02%) [ +0.24% +0.15% +0.00% / +0.07% +0.02% +0.07%] index_select skip256 : Elapsed 0.054 ms (5.400 ms / 100) 5.396 -> 5.389 ( -0.13%) [ +0.00% +0.09% +0.00% / -0.04% -0.13% -0.04%] index_select spread : Elapsed 0.054 ms (5.396 ms / 100) 5.391 -> 5.389 ( -0.04%) [ +0.09% +0.11% +0.00% / -0.04% -0.02% +0.07%] index_select strided 3 : Elapsed 0.054 ms (5.396 ms / 100) 5.391 -> 5.390 ( -0.02%) [ +0.02% +0.06% +0.00% / -0.02% +0.11% +0.22%] index_select strided 5 : Elapsed 0.054 ms (5.392 ms / 100) 5.387 -> 5.393 ( +0.11%) [ +0.00% +0.19% +0.02% / +0.11% +0.19% +0.19%] index_select strided 7 : Elapsed 0.054 ms (5.387 ms / 100) 5.388 -> 5.386 ( -0.04%) [ +0.00% +0.07% +0.04% / -0.04% +0.09% +0.17%] index_select strided 8 : Elapsed 0.054 ms (5.388 ms / 100) 5.388 -> 5.392 ( +0.07%) [ +0.00% +0.11% +0.06% / +0.07% +0.07% +0.20%] index_select random : Elapsed 0.054 ms (5.388 ms / 100) 5.390 -> 5.391 ( +0.02%) [ +0.00% +0.09% +0.13% / +0.02% +0.02% +0.02%] index_select random_sorted : Elapsed 0.054 ms (5.390 ms / 100) B = [40, 20, 4, 5] (stride (100, 1, 4000, 20)) A = [16, 20, 4, 5] (stride (400, 1, 100, 20)) dim = 0 3.849 -> 3.850 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.68% +0.70%] index_add_ linear : Elapsed 0.038 ms (3.850 ms / 100) 3.715 -> 3.716 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.78% +0.83%] index_copy_ linear : Elapsed 0.037 ms (3.715 ms / 100) 3.837 -> 3.839 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.73% +0.78%] index_add_ reverse : Elapsed 0.038 ms (3.837 ms / 100) 3.694 -> 3.694 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.73% +0.73%] index_copy_ reverse : Elapsed 0.037 ms (3.694 ms / 100) 3.853 -> 3.853 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.62% +0.62%] index_add_ spread : Elapsed 0.039 ms (3.853 ms / 100) 3.704 -> 3.703 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.67% +0.70%] index_copy_ spread : Elapsed 0.037 ms (3.707 ms / 100) 3.850 -> 3.853 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.65% +0.68%] index_add_ strided 3 : Elapsed 0.039 ms (3.852 ms / 100) 3.705 -> 3.706 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.76% +0.76%] index_copy_ strided 3 : Elapsed 0.037 ms (3.705 ms / 100) 3.837 -> 3.836 ( -0.03%) [ +0.05% +0.00% +0.00% / -0.03% +0.73% +0.73%] index_add_ strided 7 : Elapsed 0.038 ms (3.839 ms / 100) 3.694 -> 3.695 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.87% +0.76%] index_copy_ strided 7 : Elapsed 0.037 ms (3.694 ms / 100) 3.848 -> 3.849 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.75% +0.75%] index_add_ perm : Elapsed 0.038 ms (3.849 ms / 100) 3.712 -> 3.712 ( +0.00%) [ +0.00% +0.05% +0.03% / +0.00% +0.81% +0.94%] index_copy_ perm : Elapsed 0.037 ms (3.712 ms / 100) 3.846 -> 3.847 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.86% +0.83%] index_add_ perm_sorted : Elapsed 0.038 ms (3.846 ms / 100) 3.703 -> 3.705 ( +0.05%) [ +0.05% +0.08% +0.00% / +0.05% +0.76% +0.78%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.705 ms / 100) 5.488 -> 5.484 ( -0.07%) [ +0.05% +0.09% +0.00% / -0.07% +0.05% +0.00%] index_select const : Elapsed 0.055 ms (5.491 ms / 100) 5.491 -> 5.488 ( -0.05%) [ +0.00% +0.11% +0.15% / +0.04% -0.05% +0.02%] index_select wrap : Elapsed 0.055 ms (5.491 ms / 100) 5.491 -> 5.486 ( -0.09%) [ +0.05% +0.00% +0.02% / -0.09% +0.07% +0.00%] index_select linear : Elapsed 0.055 ms (5.494 ms / 100) 5.495 -> 5.491 ( -0.07%) [ +0.00% +0.20% +0.00% / -0.07% +0.05% -0.04%] index_select reverse : Elapsed 0.055 ms (5.495 ms / 100) 5.483 -> 5.486 ( +0.05%) [ +0.00% +0.02% +0.09% / +0.09% +0.11% +0.05%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.489 -> 5.482 ( -0.13%) [ +0.04% +0.05% +0.00% / -0.13% -0.09% +0.09%] index_select skip256 : Elapsed 0.055 ms (5.491 ms / 100) 5.492 -> 5.491 ( -0.02%) [ +0.05% +0.02% +0.00% / +0.00% -0.02% +0.13%] index_select spread : Elapsed 0.055 ms (5.495 ms / 100) 5.491 -> 5.487 ( -0.07%) [ +0.00% +0.05% +0.15% / +0.02% -0.07% -0.07%] index_select strided 3 : Elapsed 0.055 ms (5.491 ms / 100) 5.490 -> 5.492 ( +0.04%) [ +0.09% +0.16% +0.00% / +0.11% +0.04% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.495 ms / 100) 5.496 -> 5.491 ( -0.09%) [ +0.09% +0.00% +0.02% / -0.09% -0.02% -0.05%] index_select strided 7 : Elapsed 0.055 ms (5.501 ms / 100) 5.486 -> 5.486 ( +0.00%) [ +0.02% +0.00% +0.04% / +0.00% +0.02% +0.13%] index_select strided 8 : Elapsed 0.055 ms (5.487 ms / 100) 5.492 -> 5.488 ( -0.07%) [ +0.02% +0.04% +0.00% / -0.05% +0.05% -0.07%] index_select random : Elapsed 0.055 ms (5.493 ms / 100) 5.489 -> 5.487 ( -0.04%) [ +0.00% +0.11% +0.11% / +0.04% +0.02% -0.04%] index_select random_sorted : Elapsed 0.055 ms (5.489 ms / 100) B = [40, 20, 4, 5] (stride (80, 4, 1, 3200)) A = [16, 20, 4, 5] (stride (400, 20, 1, 4)) dim = 0 3.630 -> 3.628 ( -0.06%) [ +0.00% +0.08% +0.00% / -0.06% +0.55% +0.50%] index_add_ linear : Elapsed 0.036 ms (3.630 ms / 100) 3.500 -> 3.503 ( +0.09%) [ +0.00% +0.06% +0.06% / +0.09% +0.66% +0.74%] index_copy_ linear : Elapsed 0.035 ms (3.500 ms / 100) 3.634 -> 3.635 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.77% +0.66%] index_add_ reverse : Elapsed 0.036 ms (3.635 ms / 100) 3.501 -> 3.504 ( +0.09%) [ +0.00% +0.00% +0.09% / +0.09% +0.89% +0.86%] index_copy_ reverse : Elapsed 0.035 ms (3.501 ms / 100) 3.635 -> 3.635 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.47% +0.50%] index_add_ spread : Elapsed 0.036 ms (3.635 ms / 100) 3.510 -> 3.510 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.57% +0.60%] index_copy_ spread : Elapsed 0.035 ms (3.510 ms / 100) 3.632 -> 3.634 ( +0.06%) [ +0.03% +0.00% +0.00% / +0.06% +0.55% +0.52%] index_add_ strided 3 : Elapsed 0.036 ms (3.633 ms / 100) 3.502 -> 3.503 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.49% +0.51%] index_copy_ strided 3 : Elapsed 0.035 ms (3.502 ms / 100) 3.637 -> 3.638 ( +0.03%) [ +0.05% +0.00% +0.03% / +0.03% +0.52% +0.49%] index_add_ strided 7 : Elapsed 0.036 ms (3.639 ms / 100) 3.504 -> 3.504 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.54% +0.57%] index_copy_ strided 7 : Elapsed 0.035 ms (3.505 ms / 100) 3.630 -> 3.633 ( +0.08%) [ +0.03% +0.03% +0.00% / +0.08% +0.52% +0.55%] index_add_ perm : Elapsed 0.036 ms (3.631 ms / 100) 3.502 -> 3.507 ( +0.14%) [ +0.03% +0.00% +0.00% / +0.14% +0.51% +0.66%] index_copy_ perm : Elapsed 0.035 ms (3.503 ms / 100) 3.632 -> 3.631 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.39% +0.36%] index_add_ perm_sorted : Elapsed 0.036 ms (3.632 ms / 100) 3.503 -> 3.502 ( -0.03%) [ +0.06% +0.00% +0.00% / -0.03% +0.63% +0.43%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.505 ms / 100) 5.467 -> 5.466 ( -0.02%) [ +0.11% +0.00% +0.16% / -0.02% +0.13% +0.35%] index_select const : Elapsed 0.055 ms (5.473 ms / 100) 5.469 -> 5.471 ( +0.04%) [ +0.16% +0.00% +0.11% / +0.04% +0.15% +0.31%] index_select wrap : Elapsed 0.055 ms (5.478 ms / 100) 5.472 -> 5.474 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +0.27% +0.27%] index_select linear : Elapsed 0.055 ms (5.474 ms / 100) 5.468 -> 5.472 ( +0.07%) [ +0.13% +0.04% +0.00% / +0.07% +0.15% +0.18%] index_select reverse : Elapsed 0.055 ms (5.475 ms / 100) 5.472 -> 5.475 ( +0.05%) [ +0.00% +0.11% +0.11% / +0.05% +0.05% +0.09%] index_select skip64 : Elapsed 0.055 ms (5.472 ms / 100) 5.465 -> 5.468 ( +0.05%) [ +0.09% +0.15% +0.00% / +0.05% +0.11% +0.07%] index_select skip256 : Elapsed 0.055 ms (5.470 ms / 100) 5.470 -> 5.467 ( -0.05%) [ +0.05% +0.00% +0.07% / -0.05% +0.11% +0.16%] index_select spread : Elapsed 0.055 ms (5.473 ms / 100) 5.466 -> 5.469 ( +0.05%) [ +0.13% +0.00% +0.11% / +0.05% +0.22% +0.24%] index_select strided 3 : Elapsed 0.055 ms (5.473 ms / 100) 5.464 -> 5.475 ( +0.20%) [ +0.00% +0.15% +0.15% / +0.20% +0.31% +0.33%] index_select strided 5 : Elapsed 0.055 ms (5.464 ms / 100) 5.469 -> 5.472 ( +0.05%) [ +0.05% +0.00% +0.09% / +0.05% +0.20% +0.15%] index_select strided 7 : Elapsed 0.055 ms (5.472 ms / 100) 5.468 -> 5.469 ( +0.02%) [ +0.09% +0.00% +0.05% / +0.02% +0.02% +0.05%] index_select strided 8 : Elapsed 0.055 ms (5.473 ms / 100) 5.470 -> 5.470 ( +0.00%) [ +0.00% +0.07% +0.13% / +0.00% +0.11% +0.05%] index_select random : Elapsed 0.055 ms (5.470 ms / 100) 5.467 -> 5.467 ( +0.00%) [ +0.16% +0.02% +0.00% / +0.00% +0.16% +0.20%] index_select random_sorted : Elapsed 0.055 ms (5.476 ms / 100) out_shape = [16, 40, 4, 5] in_shape = [16, 20, 4, 5] idx_dim = 1 B = [16, 40, 4, 5] (stride (800, 20, 5, 1)) A = [16, 20, 4, 5] (stride (100, 5, 1600, 1)) dim = 1 2.402 -> 2.412 ( +0.42%) [ +0.04% +0.00% +0.08% / +0.42% +0.62% +0.50%] index_add_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.390 -> 2.405 ( +0.63%) [ +0.00% +0.33% +0.25% / +0.63% +0.96% +1.17%] index_copy_ linear : Elapsed 0.024 ms (2.390 ms / 100) 2.405 -> 2.415 ( +0.42%) [ +0.00% +0.08% +0.08% / +0.46% +0.54% +0.42%] index_add_ reverse : Elapsed 0.024 ms (2.405 ms / 100) 2.396 -> 2.407 ( +0.46%) [ +0.08% +0.25% +0.00% / +0.46% +0.63% +1.00%] index_copy_ reverse : Elapsed 0.024 ms (2.398 ms / 100) 2.407 -> 2.418 ( +0.46%) [ +0.25% +0.00% +0.12% / +0.54% +0.54% +0.46%] index_add_ spread : Elapsed 0.024 ms (2.413 ms / 100) 2.405 -> 2.416 ( +0.46%) [ +0.00% +0.12% +0.08% / +0.46% +0.71% +0.91%] index_copy_ spread : Elapsed 0.024 ms (2.405 ms / 100) 2.406 -> 2.411 ( +0.21%) [ +0.21% +0.29% +0.00% / +0.67% +0.42% +0.21%] index_add_ strided 3 : Elapsed 0.024 ms (2.411 ms / 100) 2.401 -> 2.420 ( +0.79%) [ +0.00% +0.29% +0.17% / +0.79% +0.87% +0.79%] index_copy_ strided 3 : Elapsed 0.024 ms (2.401 ms / 100) 2.407 -> 2.416 ( +0.37%) [ +0.04% +0.04% +0.00% / +0.62% +0.37% +0.58%] index_add_ strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.402 -> 2.420 ( +0.75%) [ +0.08% +0.08% +0.00% / +0.79% +0.92% +0.75%] index_copy_ strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.402 -> 2.414 ( +0.50%) [ +0.17% +0.12% +0.00% / +0.50% +0.71% +0.67%] index_add_ perm : Elapsed 0.024 ms (2.406 ms / 100) 2.399 -> 2.412 ( +0.54%) [ +0.00% +0.00% +0.00% / +0.54% +0.75% +1.00%] index_copy_ perm : Elapsed 0.024 ms (2.399 ms / 100) 2.403 -> 2.416 ( +0.54%) [ +0.00% +0.12% +0.00% / +0.54% +0.75% +0.96%] index_add_ perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) 2.398 -> 2.414 ( +0.67%) [ +0.17% +0.00% +0.08% / +0.67% +0.92% +1.58%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.402 ms / 100) 4.412 -> 4.416 ( +0.09%) [ +0.14% +0.00% +0.07% / +0.23% +0.09% +0.59%] index_select const : Elapsed 0.044 ms (4.418 ms / 100) 4.420 -> 4.423 ( +0.07%) [ +0.32% +0.00% +0.05% / +0.07% +0.23% +0.48%] index_select wrap : Elapsed 0.044 ms (4.434 ms / 100) 4.423 -> 4.424 ( +0.02%) [ +0.18% +0.00% +0.25% / +0.02% +0.20% +0.05%] index_select linear : Elapsed 0.044 ms (4.431 ms / 100) 4.423 -> 4.421 ( -0.05%) [ +0.09% +0.23% +0.00% / -0.05% +0.07% +0.18%] index_select reverse : Elapsed 0.044 ms (4.427 ms / 100) 4.414 -> 4.415 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.07% +0.16%] index_select skip64 : Elapsed 0.044 ms (4.414 ms / 100) 4.416 -> 4.410 ( -0.14%) [ +0.00% +0.00% +0.09% / +0.07% -0.14% +0.23%] index_select skip256 : Elapsed 0.044 ms (4.416 ms / 100) 4.425 -> 4.427 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +0.27% +0.27%] index_select spread : Elapsed 0.044 ms (4.427 ms / 100) 4.429 -> 4.427 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% -0.02% +0.18%] index_select strided 3 : Elapsed 0.044 ms (4.429 ms / 100) 4.411 -> 4.416 ( +0.11%) [ +0.20% +0.27% +0.00% / +0.11% +0.29% +0.48%] index_select strided 5 : Elapsed 0.044 ms (4.420 ms / 100) 4.421 -> 4.424 ( +0.07%) [ +0.00% +0.25% +0.14% / +0.07% +0.34% +0.29%] index_select strided 7 : Elapsed 0.044 ms (4.421 ms / 100) 4.418 -> 4.418 ( +0.00%) [ +0.07% +0.02% +0.00% / +0.00% +0.20% +0.09%] index_select strided 8 : Elapsed 0.044 ms (4.421 ms / 100) 4.414 -> 4.414 ( +0.00%) [ +0.18% +0.00% +0.02% / +0.00% +0.20% +0.34%] index_select strided 16 : Elapsed 0.044 ms (4.422 ms / 100) 4.423 -> 4.423 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.23% +0.29%] index_select random : Elapsed 0.044 ms (4.426 ms / 100) 4.424 -> 4.424 ( +0.00%) [ +0.16% +0.00% +0.07% / +0.00% +0.27% +0.20%] index_select random_sorted : Elapsed 0.044 ms (4.431 ms / 100) B = [16, 40, 4, 5] (stride (800, 5, 200, 1)) A = [16, 20, 4, 5] (stride (5, 80, 1600, 1)) dim = 1 2.462 -> 2.473 ( +0.45%) [ +0.12% +0.04% +0.00% / +0.45% +0.65% +0.69%] index_add_ linear : Elapsed 0.025 ms (2.465 ms / 100) 2.450 -> 2.462 ( +0.49%) [ +0.08% +0.24% +0.00% / +0.49% +0.82% +1.02%] index_copy_ linear : Elapsed 0.025 ms (2.452 ms / 100) 2.455 -> 2.472 ( +0.69%) [ +0.04% +0.00% +0.04% / +0.69% +0.90% +1.18%] index_add_ reverse : Elapsed 0.025 ms (2.456 ms / 100) 2.446 -> 2.457 ( +0.45%) [ +0.08% +0.00% +0.00% / +0.45% +1.06% +1.06%] index_copy_ reverse : Elapsed 0.024 ms (2.448 ms / 100) 2.471 -> 2.484 ( +0.53%) [ +0.00% +0.08% +0.04% / +0.53% +0.97% +0.97%] index_add_ spread : Elapsed 0.025 ms (2.471 ms / 100) 2.470 -> 2.484 ( +0.57%) [ +0.00% +0.08% +0.20% / +0.57% +1.21% +1.21%] index_copy_ spread : Elapsed 0.025 ms (2.470 ms / 100) 2.475 -> 2.488 ( +0.53%) [ +0.08% +0.00% +0.16% / +0.57% +0.53% +0.73%] index_add_ strided 3 : Elapsed 0.025 ms (2.477 ms / 100) 2.473 -> 2.492 ( +0.77%) [ +0.12% +0.12% +0.00% / +0.81% +0.77% +0.93%] index_copy_ strided 3 : Elapsed 0.025 ms (2.476 ms / 100) 2.476 -> 2.488 ( +0.48%) [ +0.00% +0.08% +0.04% / +0.48% +0.77% +0.57%] index_add_ strided 7 : Elapsed 0.025 ms (2.476 ms / 100) 2.473 -> 2.485 ( +0.49%) [ +0.00% +0.28% +0.04% / +0.49% +0.73% +0.97%] index_copy_ strided 7 : Elapsed 0.025 ms (2.473 ms / 100) 2.479 -> 2.478 ( -0.04%) [ +0.12% +0.00% +0.04% / +0.65% +0.24% -0.04%] index_add_ perm : Elapsed 0.025 ms (2.482 ms / 100) 2.471 -> 2.483 ( +0.49%) [ +0.16% +0.24% +0.00% / +0.65% +0.49% +0.61%] index_copy_ perm : Elapsed 0.025 ms (2.475 ms / 100) 2.479 -> 2.483 ( +0.16%) [ +0.00% +0.08% +0.00% / +0.61% +0.20% +0.16%] index_add_ perm_sorted : Elapsed 0.025 ms (2.479 ms / 100) 2.472 -> 2.487 ( +0.61%) [ +0.32% +0.00% +0.12% / +0.93% +0.61% +0.81%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.480 ms / 100) 4.489 -> 4.487 ( -0.04%) [ +0.07% +0.16% +0.00% / -0.04% +0.22% +0.20%] index_select const : Elapsed 0.045 ms (4.492 ms / 100) 4.511 -> 4.502 ( -0.20%) [ +0.09% +0.09% +0.00% / -0.20% -0.04% -0.09%] index_select wrap : Elapsed 0.045 ms (4.515 ms / 100) 4.504 -> 4.504 ( +0.00%) [ +0.13% +0.09% +0.00% / +0.00% +0.27% +0.38%] index_select linear : Elapsed 0.045 ms (4.510 ms / 100) 4.508 -> 4.502 ( -0.13%) [ +0.16% +0.00% +0.11% / +0.11% -0.13% +0.02%] index_select reverse : Elapsed 0.045 ms (4.515 ms / 100) 4.493 -> 4.493 ( +0.00%) [ +0.13% +0.00% +0.09% / +0.07% +0.00% +0.02%] index_select skip64 : Elapsed 0.045 ms (4.499 ms / 100) 4.490 -> 4.491 ( +0.02%) [ +0.04% +0.00% +0.27% / +0.02% +0.09% +0.18%] index_select skip256 : Elapsed 0.045 ms (4.492 ms / 100) 4.511 -> 4.509 ( -0.04%) [ +0.00% +0.04% +0.00% / +0.13% +0.04% -0.04%] index_select spread : Elapsed 0.045 ms (4.511 ms / 100) 4.502 -> 4.508 ( +0.13%) [ +0.00% +0.31% +0.16% / +0.13% +0.20% +0.33%] index_select strided 3 : Elapsed 0.045 ms (4.502 ms / 100) 4.491 -> 4.491 ( +0.00%) [ +0.00% +0.16% +0.13% / +0.00% +0.18% +0.20%] index_select strided 5 : Elapsed 0.045 ms (4.491 ms / 100) 4.503 -> 4.512 ( +0.20%) [ +0.00% +0.09% +0.04% / +0.27% +0.20% +0.27%] index_select strided 7 : Elapsed 0.045 ms (4.503 ms / 100) 4.495 -> 4.501 ( +0.13%) [ +0.02% +0.00% +0.11% / +0.20% +0.13% +0.20%] index_select strided 8 : Elapsed 0.045 ms (4.496 ms / 100) 4.501 -> 4.502 ( +0.02%) [ +0.00% +0.04% +0.00% / +0.02% +0.02% +0.04%] index_select strided 16 : Elapsed 0.045 ms (4.501 ms / 100) 4.509 -> 4.503 ( -0.13%) [ +0.00% +0.00% +0.16% / +0.02% -0.13% -0.02%] index_select random : Elapsed 0.045 ms (4.509 ms / 100) 4.513 -> 4.508 ( -0.11%) [ +0.04% +0.07% +0.00% / -0.11% -0.07% -0.11%] index_select random_sorted : Elapsed 0.045 ms (4.515 ms / 100) B = [16, 40, 4, 5] (stride (800, 1, 40, 160)) A = [16, 20, 4, 5] (stride (400, 4, 1, 80)) dim = 1 2.446 -> 2.457 ( +0.45%) [ +0.00% +0.00% +0.08% / +0.45% +0.74% +0.65%] index_add_ linear : Elapsed 0.024 ms (2.446 ms / 100) 2.451 -> 2.465 ( +0.57%) [ +0.08% +0.37% +0.00% / +0.57% +0.78% +0.98%] index_copy_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.450 -> 2.460 ( +0.41%) [ +0.00% +0.20% +0.00% / +0.41% +0.49% +0.41%] index_add_ reverse : Elapsed 0.024 ms (2.450 ms / 100) 2.457 -> 2.469 ( +0.49%) [ +0.00% +0.00% +0.04% / +0.49% +0.49% +0.65%] index_copy_ reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.461 -> 2.469 ( +0.33%) [ +0.12% +0.24% +0.00% / +0.41% +0.33% +0.45%] index_add_ spread : Elapsed 0.025 ms (2.464 ms / 100) 2.472 -> 2.484 ( +0.49%) [ +0.00% +0.08% +0.24% / +0.49% +0.65% +0.81%] index_copy_ spread : Elapsed 0.025 ms (2.472 ms / 100) 2.462 -> 2.470 ( +0.32%) [ +0.20% +0.00% +0.12% / +0.45% +0.32% +0.32%] index_add_ strided 3 : Elapsed 0.025 ms (2.467 ms / 100) 2.469 -> 2.483 ( +0.57%) [ +0.00% +0.12% +0.20% / +0.65% +0.65% +0.57%] index_copy_ strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.463 -> 2.469 ( +0.24%) [ +0.08% +0.04% +0.00% / +0.24% +0.37% +0.65%] index_add_ strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.469 -> 2.478 ( +0.36%) [ +0.12% +0.00% +0.08% / +0.36% +0.61% +0.73%] index_copy_ strided 7 : Elapsed 0.025 ms (2.472 ms / 100) 2.461 -> 2.472 ( +0.45%) [ +0.00% +0.28% +0.08% / +0.53% +0.49% +0.45%] index_add_ perm : Elapsed 0.025 ms (2.461 ms / 100) 2.470 -> 2.481 ( +0.45%) [ +0.08% +0.00% +0.08% / +0.45% +0.77% +0.89%] index_copy_ perm : Elapsed 0.025 ms (2.472 ms / 100) 2.461 -> 2.471 ( +0.41%) [ +0.00% +0.08% +0.12% / +0.41% +0.45% +0.53%] index_add_ perm_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.471 -> 2.480 ( +0.36%) [ +0.00% +0.16% +0.04% / +0.36% +0.69% +0.65%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.471 ms / 100) 4.495 -> 4.493 ( -0.04%) [ +0.11% +0.00% +0.13% / -0.04% +0.22% +0.13%] index_select const : Elapsed 0.045 ms (4.500 ms / 100) 4.496 -> 4.503 ( +0.16%) [ +0.00% +0.20% +0.13% / +0.27% +0.16% +0.40%] index_select wrap : Elapsed 0.045 ms (4.496 ms / 100) 4.504 -> 4.509 ( +0.11%) [ +0.27% +0.00% +0.02% / +0.11% +0.18% +0.13%] index_select linear : Elapsed 0.045 ms (4.516 ms / 100) 4.509 -> 4.501 ( -0.18%) [ +0.00% +0.02% +0.07% / -0.18% +0.04% +0.11%] index_select reverse : Elapsed 0.045 ms (4.509 ms / 100) 4.494 -> 4.496 ( +0.04%) [ +0.16% +0.09% +0.00% / +0.04% +0.20% +0.18%] index_select skip64 : Elapsed 0.045 ms (4.501 ms / 100) 4.493 -> 4.495 ( +0.04%) [ +0.00% +0.11% +0.07% / +0.04% +0.22% +0.18%] index_select skip256 : Elapsed 0.045 ms (4.493 ms / 100) 4.502 -> 4.505 ( +0.07%) [ +0.00% +0.04% +0.11% / +0.07% +0.09% +0.24%] index_select spread : Elapsed 0.045 ms (4.502 ms / 100) 4.501 -> 4.507 ( +0.13%) [ +0.04% +0.13% +0.00% / +0.16% +0.18% +0.13%] index_select strided 3 : Elapsed 0.045 ms (4.503 ms / 100) 4.498 -> 4.502 ( +0.09%) [ +0.00% +0.04% +0.04% / +0.09% +0.18% +0.22%] index_select strided 5 : Elapsed 0.045 ms (4.498 ms / 100) 4.505 -> 4.502 ( -0.07%) [ +0.00% +0.16% +0.02% / -0.07% +0.13% +0.11%] index_select strided 7 : Elapsed 0.045 ms (4.505 ms / 100) 4.503 -> 4.501 ( -0.04%) [ +0.00% +0.02% +0.00% / -0.04% +0.00% +0.07%] index_select strided 8 : Elapsed 0.045 ms (4.503 ms / 100) 4.496 -> 4.493 ( -0.07%) [ +0.11% +0.11% +0.00% / -0.07% +0.27% +0.07%] index_select strided 16 : Elapsed 0.045 ms (4.501 ms / 100) 4.495 -> 4.503 ( +0.18%) [ +0.00% +0.04% +0.20% / +0.18% +0.18% +0.27%] index_select random : Elapsed 0.045 ms (4.495 ms / 100) 4.505 -> 4.508 ( +0.07%) [ +0.00% +0.02% +0.07% / +0.07% +0.11% +0.13%] index_select random_sorted : Elapsed 0.045 ms (4.505 ms / 100) B = [16, 40, 4, 5] (stride (20, 320, 5, 1)) dim = 1 fill_cnt = 20 1.415 -> 1.397 ( -1.27%) [ +0.00% +0.28% +0.07% / -1.27% -1.06% -0.92%] index_fill_ const : Elapsed 0.014 ms (1.415 ms / 100) 1.420 -> 1.399 ( -1.48%) [ +0.07% +0.00% +0.00% / -1.06% -1.34% -1.48%] index_fill_ linear : Elapsed 0.014 ms (1.421 ms / 100) 1.419 -> 1.402 ( -1.20%) [ +0.00% +0.00% +0.00% / -1.13% -1.20% -0.92%] index_fill_ reverse : Elapsed 0.014 ms (1.419 ms / 100) 1.411 -> 1.395 ( -1.13%) [ +0.43% +0.07% +0.00% / -1.13% -0.92% -0.78%] index_fill_ skip64 : Elapsed 0.014 ms (1.417 ms / 100) 1.412 -> 1.394 ( -1.27%) [ +0.07% +0.07% +0.00% / -1.06% -0.78% -1.27%] index_fill_ skip256 : Elapsed 0.014 ms (1.413 ms / 100) 1.419 -> 1.400 ( -1.34%) [ +0.14% +0.00% +0.07% / -1.34% -0.99% -0.99%] index_fill_ spread : Elapsed 0.014 ms (1.421 ms / 100) 1.416 -> 1.402 ( -0.99%) [ +0.14% +0.14% +0.00% / -0.99% -0.71% -0.64%] index_fill_ strided 3 : Elapsed 0.014 ms (1.418 ms / 100) 1.413 -> 1.395 ( -1.27%) [ +0.21% +0.28% +0.00% / -1.06% -1.27% -1.20%] index_fill_ strided 5 : Elapsed 0.014 ms (1.416 ms / 100) 1.417 -> 1.404 ( -0.92%) [ +0.42% +0.00% +0.28% / -0.71% -0.92% -0.71%] index_fill_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.414 -> 1.399 ( -1.06%) [ +0.42% +0.35% +0.00% / -1.06% -0.99% -0.64%] index_fill_ strided 8 : Elapsed 0.014 ms (1.420 ms / 100) 1.417 -> 1.399 ( -1.27%) [ +0.00% +0.07% +0.00% / -1.20% -1.27% -0.99%] index_fill_ strided 16 : Elapsed 0.014 ms (1.417 ms / 100) 1.415 -> 1.397 ( -1.27%) [ +0.07% +0.14% +0.00% / -1.27% -0.78% -0.35%] index_fill_ random : Elapsed 0.014 ms (1.416 ms / 100) 1.413 -> 1.399 ( -0.99%) [ +0.35% +0.00% +0.21% / -0.99% -0.35% -0.14%] index_fill_ random_sorted : Elapsed 0.014 ms (1.418 ms / 100) 1.419 -> 1.404 ( -1.06%) [ +0.00% +0.14% +0.21% / -1.06% -0.63% -0.92%] index_fill_ perm : Elapsed 0.014 ms (1.419 ms / 100) 1.417 -> 1.401 ( -1.13%) [ +0.21% +0.35% +0.00% / -1.13% -0.85% -0.78%] index_fill_ perm_sorted : Elapsed 0.014 ms (1.420 ms / 100) B = [16, 40, 4, 5] (stride (1, 320, 80, 16)) A = [16, 20, 4, 5] (stride (1, 80, 1600, 16)) dim = 1 2.388 -> 2.399 ( +0.46%) [ +0.25% +0.00% +0.04% / +0.46% +0.80% +0.71%] index_add_ linear : Elapsed 0.024 ms (2.394 ms / 100) 2.388 -> 2.404 ( +0.67%) [ +0.04% +0.00% +0.25% / +0.67% +0.71% +0.88%] index_copy_ linear : Elapsed 0.024 ms (2.389 ms / 100) 2.384 -> 2.399 ( +0.63%) [ +0.13% +0.25% +0.00% / +0.63% +0.92% +1.30%] index_add_ reverse : Elapsed 0.024 ms (2.387 ms / 100) 2.380 -> 2.400 ( +0.84%) [ +0.00% +0.25% +0.13% / +0.84% +1.26% +1.22%] index_copy_ reverse : Elapsed 0.024 ms (2.380 ms / 100) 2.388 -> 2.398 ( +0.42%) [ +0.00% +0.08% +0.00% / +0.42% +0.80% +0.96%] index_add_ spread : Elapsed 0.024 ms (2.388 ms / 100) 2.385 -> 2.397 ( +0.50%) [ +0.04% +0.17% +0.00% / +0.50% +1.05% +1.05%] index_copy_ spread : Elapsed 0.024 ms (2.386 ms / 100) 2.392 -> 2.398 ( +0.25%) [ +0.00% +0.00% +0.21% / +0.42% +0.25% +0.67%] index_add_ strided 3 : Elapsed 0.024 ms (2.392 ms / 100) 2.390 -> 2.405 ( +0.63%) [ +0.00% +0.13% +0.08% / +0.63% +0.79% +0.63%] index_copy_ strided 3 : Elapsed 0.024 ms (2.390 ms / 100) 2.391 -> 2.405 ( +0.59%) [ +0.00% +0.25% +0.21% / +0.59% +0.67% +0.63%] index_add_ strided 7 : Elapsed 0.024 ms (2.391 ms / 100) 2.392 -> 2.405 ( +0.54%) [ +0.04% +0.00% +0.08% / +0.63% +0.54% +0.84%] index_copy_ strided 7 : Elapsed 0.024 ms (2.393 ms / 100) 2.396 -> 2.398 ( +0.08%) [ +0.00% +0.21% +0.08% / +0.42% +0.33% +0.08%] index_add_ perm : Elapsed 0.024 ms (2.396 ms / 100) 2.394 -> 2.398 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.50% +0.17% +0.17%] index_copy_ perm : Elapsed 0.024 ms (2.398 ms / 100) 2.392 -> 2.399 ( +0.29%) [ +0.17% +0.13% +0.00% / +0.54% +0.29% +0.38%] index_add_ perm_sorted : Elapsed 0.024 ms (2.396 ms / 100) 2.394 -> 2.398 ( +0.17%) [ +0.00% +0.13% +0.33% / +0.63% +0.17% +0.42%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.394 ms / 100) 4.411 -> 4.415 ( +0.09%) [ +0.25% +0.00% +0.00% / +0.09% +0.23% +0.14%] index_select const : Elapsed 0.044 ms (4.422 ms / 100) 4.424 -> 4.427 ( +0.07%) [ +0.18% +0.00% +0.09% / +0.07% +0.16% +0.18%] index_select wrap : Elapsed 0.044 ms (4.432 ms / 100) 4.428 -> 4.425 ( -0.07%) [ +0.02% +0.05% +0.00% / +0.05% -0.07% -0.07%] index_select linear : Elapsed 0.044 ms (4.429 ms / 100) 4.426 -> 4.422 ( -0.09%) [ +0.09% +0.07% +0.00% / +0.09% -0.09% +0.02%] index_select reverse : Elapsed 0.044 ms (4.430 ms / 100) 4.416 -> 4.415 ( -0.02%) [ +0.05% +0.05% +0.00% / +0.02% -0.02% +0.00%] index_select skip64 : Elapsed 0.044 ms (4.418 ms / 100) 4.416 -> 4.413 ( -0.07%) [ +0.00% +0.02% +0.07% / +0.07% +0.14% -0.07%] index_select skip256 : Elapsed 0.044 ms (4.416 ms / 100) 4.422 -> 4.420 ( -0.05%) [ +0.34% +0.25% +0.00% / -0.05% +0.27% +0.29%] index_select spread : Elapsed 0.044 ms (4.437 ms / 100) 4.427 -> 4.429 ( +0.05%) [ +0.00% +0.09% +0.18% / +0.07% +0.05% +0.11%] index_select strided 3 : Elapsed 0.044 ms (4.427 ms / 100) 4.417 -> 4.419 ( +0.05%) [ +0.02% +0.00% +0.09% / +0.05% +0.09% +0.18%] index_select strided 5 : Elapsed 0.044 ms (4.418 ms / 100) 4.421 -> 4.426 ( +0.11%) [ +0.20% +0.11% +0.00% / +0.11% +0.41% +0.29%] index_select strided 7 : Elapsed 0.044 ms (4.430 ms / 100) 4.416 -> 4.420 ( +0.09%) [ +0.05% +0.32% +0.00% / +0.09% +0.14% +0.11%] index_select strided 8 : Elapsed 0.044 ms (4.418 ms / 100) 4.418 -> 4.413 ( -0.11%) [ +0.11% +0.05% +0.00% / -0.11% +0.16% +0.07%] index_select strided 16 : Elapsed 0.044 ms (4.423 ms / 100) 4.428 -> 4.426 ( -0.05%) [ +0.07% +0.09% +0.00% / +0.11% -0.05% +0.23%] index_select random : Elapsed 0.044 ms (4.431 ms / 100) 4.428 -> 4.424 ( -0.09%) [ +0.00% +0.02% +0.11% / +0.07% +0.11% -0.09%] index_select random_sorted : Elapsed 0.044 ms (4.428 ms / 100) B = [16, 40, 4, 5] (stride (200, 5, 3200, 1)) A = [16, 20, 4, 5] (stride (20, 320, 1, 4)) dim = 1 2.459 -> 2.470 ( +0.45%) [ +0.12% +0.00% +0.00% / +0.45% +0.61% +0.69%] index_add_ linear : Elapsed 0.025 ms (2.462 ms / 100) 2.443 -> 2.458 ( +0.61%) [ +0.25% +0.00% +0.12% / +0.61% +0.70% +0.98%] index_copy_ linear : Elapsed 0.024 ms (2.449 ms / 100) 2.460 -> 2.474 ( +0.57%) [ +0.20% +0.04% +0.00% / +0.65% +0.57% +0.65%] index_add_ reverse : Elapsed 0.025 ms (2.465 ms / 100) 2.446 -> 2.458 ( +0.49%) [ +0.00% +0.08% +0.12% / +0.49% +0.53% +0.65%] index_copy_ reverse : Elapsed 0.024 ms (2.446 ms / 100) 2.480 -> 2.490 ( +0.40%) [ +0.16% +0.00% +0.08% / +0.52% +0.40% +0.48%] index_add_ spread : Elapsed 0.025 ms (2.484 ms / 100) 2.472 -> 2.488 ( +0.65%) [ +0.16% +0.24% +0.00% / +0.69% +0.65% +0.77%] index_copy_ spread : Elapsed 0.025 ms (2.476 ms / 100) 2.475 -> 2.485 ( +0.40%) [ +0.12% +0.00% +0.24% / +0.44% +0.40% +0.57%] index_add_ strided 3 : Elapsed 0.025 ms (2.478 ms / 100) 2.471 -> 2.487 ( +0.65%) [ +0.16% +0.00% +0.08% / +0.65% +0.69% +0.93%] index_copy_ strided 3 : Elapsed 0.025 ms (2.475 ms / 100) 2.477 -> 2.486 ( +0.36%) [ +0.08% +0.00% +0.00% / +0.52% +0.36% +0.36%] index_add_ strided 7 : Elapsed 0.025 ms (2.479 ms / 100) 2.471 -> 2.481 ( +0.40%) [ +0.00% +0.12% +0.12% / +0.40% +0.53% +0.57%] index_copy_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.474 -> 2.484 ( +0.40%) [ +0.12% +0.00% +0.16% / +0.44% +0.40% +0.44%] index_add_ perm : Elapsed 0.025 ms (2.477 ms / 100) 2.465 -> 2.480 ( +0.61%) [ +0.12% +0.00% +0.08% / +0.69% +0.61% +0.61%] index_copy_ perm : Elapsed 0.025 ms (2.468 ms / 100) 2.474 -> 2.482 ( +0.32%) [ +0.00% +0.08% +0.00% / +0.53% +0.32% +0.49%] index_add_ perm_sorted : Elapsed 0.025 ms (2.474 ms / 100) 2.467 -> 2.477 ( +0.41%) [ +0.08% +0.04% +0.00% / +0.41% +0.53% +0.77%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.469 ms / 100) 4.490 -> 4.488 ( -0.04%) [ +0.24% +0.11% +0.00% / -0.04% +0.13% +0.02%] index_select const : Elapsed 0.045 ms (4.501 ms / 100) 4.490 -> 4.497 ( +0.16%) [ +0.33% +0.00% +0.07% / +0.16% +0.36% +0.33%] index_select wrap : Elapsed 0.045 ms (4.505 ms / 100) 4.495 -> 4.504 ( +0.20%) [ +0.13% +0.04% +0.00% / +0.24% +0.22% +0.20%] index_select linear : Elapsed 0.045 ms (4.501 ms / 100) 4.492 -> 4.501 ( +0.20%) [ +0.29% +0.04% +0.00% / +0.22% +0.20% +0.27%] index_select reverse : Elapsed 0.045 ms (4.505 ms / 100) 4.491 -> 4.486 ( -0.11%) [ +0.00% +0.07% +0.07% / -0.07% +0.09% -0.11%] index_select skip64 : Elapsed 0.045 ms (4.491 ms / 100) 4.481 -> 4.493 ( +0.27%) [ +0.18% +0.00% +0.27% / +0.27% +0.42% +0.40%] index_select skip256 : Elapsed 0.045 ms (4.489 ms / 100) 4.497 -> 4.498 ( +0.02%) [ +0.00% +0.04% +0.04% / +0.02% +0.24% +0.24%] index_select spread : Elapsed 0.045 ms (4.497 ms / 100) 4.498 -> 4.501 ( +0.07%) [ +0.00% +0.11% +0.13% / +0.07% +0.22% +0.11%] index_select strided 3 : Elapsed 0.045 ms (4.498 ms / 100) 4.493 -> 4.493 ( +0.00%) [ +0.00% +0.07% +0.11% / +0.13% +0.02% +0.00%] index_select strided 5 : Elapsed 0.045 ms (4.493 ms / 100) 4.493 -> 4.500 ( +0.16%) [ +0.09% +0.11% +0.00% / +0.16% +0.22% +0.38%] index_select strided 7 : Elapsed 0.045 ms (4.497 ms / 100) 4.492 -> 4.492 ( +0.00%) [ +0.09% +0.02% +0.00% / +0.02% +0.00% +0.20%] index_select strided 8 : Elapsed 0.045 ms (4.496 ms / 100) 4.488 -> 4.495 ( +0.16%) [ +0.18% +0.00% +0.20% / +0.22% +0.16% +0.20%] index_select strided 16 : Elapsed 0.045 ms (4.496 ms / 100) 4.496 -> 4.498 ( +0.04%) [ +0.07% +0.24% +0.00% / +0.04% +0.24% +0.09%] index_select random : Elapsed 0.045 ms (4.499 ms / 100) 4.498 -> 4.496 ( -0.04%) [ +0.04% +0.02% +0.00% / -0.04% +0.13% +0.02%] index_select random_sorted : Elapsed 0.045 ms (4.500 ms / 100) B = [16, 40, 4, 5] (stride (4, 64, 1, 2560)) A = [16, 20, 4, 5] (stride (20, 320, 1, 4)) dim = 1 2.456 -> 2.466 ( +0.41%) [ +0.00% +0.04% +0.00% / +0.41% +0.61% +0.57%] index_add_ linear : Elapsed 0.025 ms (2.456 ms / 100) 2.442 -> 2.456 ( +0.57%) [ +0.12% +0.08% +0.00% / +0.57% +0.74% +0.82%] index_copy_ linear : Elapsed 0.024 ms (2.445 ms / 100) 2.447 -> 2.455 ( +0.33%) [ +0.08% +0.00% +0.00% / +0.33% +1.02% +1.31%] index_add_ reverse : Elapsed 0.024 ms (2.449 ms / 100) 2.432 -> 2.448 ( +0.66%) [ +0.45% +0.00% +0.16% / +0.66% +1.27% +1.36%] index_copy_ reverse : Elapsed 0.024 ms (2.443 ms / 100) 2.449 -> 2.462 ( +0.53%) [ +0.16% +0.00% +0.08% / +0.53% +1.31% +1.10%] index_add_ spread : Elapsed 0.025 ms (2.453 ms / 100) 2.435 -> 2.453 ( +0.74%) [ +0.04% +0.08% +0.00% / +0.74% +1.27% +1.23%] index_copy_ spread : Elapsed 0.024 ms (2.436 ms / 100) 2.456 -> 2.466 ( +0.41%) [ +0.16% +0.00% +0.16% / +0.57% +0.69% +0.41%] index_add_ strided 3 : Elapsed 0.025 ms (2.460 ms / 100) 2.443 -> 2.458 ( +0.61%) [ +0.04% +0.08% +0.00% / +0.61% +0.70% +0.74%] index_copy_ strided 3 : Elapsed 0.024 ms (2.444 ms / 100) 2.456 -> 2.466 ( +0.41%) [ +0.04% +0.00% +0.12% / +0.41% +0.57% +0.53%] index_add_ strided 7 : Elapsed 0.025 ms (2.457 ms / 100) 2.441 -> 2.457 ( +0.66%) [ +0.00% +0.16% +0.20% / +0.74% +0.66% +0.94%] index_copy_ strided 7 : Elapsed 0.024 ms (2.441 ms / 100) 2.453 -> 2.465 ( +0.49%) [ +0.00% +0.12% +0.08% / +0.69% +0.49% +0.69%] index_add_ perm : Elapsed 0.025 ms (2.453 ms / 100) 2.445 -> 2.455 ( +0.41%) [ +0.20% +0.08% +0.00% / +0.53% +0.41% +0.45%] index_copy_ perm : Elapsed 0.025 ms (2.450 ms / 100) 2.453 -> 2.459 ( +0.24%) [ +0.00% +0.20% +0.20% / +0.61% +0.53% +0.24%] index_add_ perm_sorted : Elapsed 0.025 ms (2.453 ms / 100) 2.442 -> 2.451 ( +0.37%) [ +0.04% +0.00% +0.16% / +0.53% +0.53% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.443 ms / 100) 4.492 -> 4.494 ( +0.04%) [ +0.27% +0.00% +0.11% / +0.18% +0.04% +0.20%] index_select const : Elapsed 0.045 ms (4.504 ms / 100) 4.498 -> 4.499 ( +0.02%) [ +0.00% +0.16% +0.20% / +0.02% +0.04% +0.29%] index_select wrap : Elapsed 0.045 ms (4.498 ms / 100) 4.500 -> 4.501 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.18% +0.27%] index_select linear : Elapsed 0.045 ms (4.501 ms / 100) 4.503 -> 4.502 ( -0.02%) [ +0.18% +0.00% +0.02% / +0.00% +0.04% -0.02%] index_select reverse : Elapsed 0.045 ms (4.511 ms / 100) 4.497 -> 4.491 ( -0.13%) [ +0.00% +0.07% +0.07% / +0.00% -0.13% +0.07%] index_select skip64 : Elapsed 0.045 ms (4.497 ms / 100) 4.489 -> 4.493 ( +0.09%) [ +0.11% +0.22% +0.00% / +0.27% +0.09% +0.13%] index_select skip256 : Elapsed 0.045 ms (4.494 ms / 100) 4.499 -> 4.496 ( -0.07%) [ +0.09% +0.13% +0.00% / -0.07% +0.33% +0.13%] index_select spread : Elapsed 0.045 ms (4.503 ms / 100) 4.500 -> 4.506 ( +0.13%) [ +0.00% +0.27% +0.09% / +0.13% +0.24% +0.20%] index_select strided 3 : Elapsed 0.045 ms (4.500 ms / 100) 4.497 -> 4.495 ( -0.04%) [ +0.04% +0.00% +0.09% / -0.02% -0.04% +0.13%] index_select strided 5 : Elapsed 0.045 ms (4.499 ms / 100) 4.502 -> 4.504 ( +0.04%) [ +0.13% +0.00% +0.02% / +0.07% +0.04% +0.07%] index_select strided 7 : Elapsed 0.045 ms (4.508 ms / 100) 4.502 -> 4.487 ( -0.33%) [ +0.07% +0.09% +0.00% / -0.33% -0.11% +0.11%] index_select strided 8 : Elapsed 0.045 ms (4.505 ms / 100) 4.498 -> 4.500 ( +0.04%) [ +0.24% +0.07% +0.00% / +0.20% +0.04% +0.33%] index_select strided 16 : Elapsed 0.045 ms (4.509 ms / 100) 4.502 -> 4.498 ( -0.09%) [ +0.00% +0.13% +0.04% / +0.16% +0.07% -0.09%] index_select random : Elapsed 0.045 ms (4.502 ms / 100) 4.506 -> 4.501 ( -0.11%) [ +0.00% +0.02% +0.20% / +0.04% -0.11% -0.04%] index_select random_sorted : Elapsed 0.045 ms (4.506 ms / 100) out_shape = [16, 20, 40, 5] in_shape = [16, 20, 4, 5] idx_dim = 2 B = [16, 20, 40, 5] (stride (4000, 5, 100, 1)) A = [16, 20, 4, 5] (stride (400, 4, 1, 80)) dim = 2 1.313 -> 1.316 ( +0.23%) [ +0.23% +0.15% +0.00% / +0.23% +0.46% +0.61%] index_add_ linear : Elapsed 0.013 ms (1.316 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.63% +0.71%] index_copy_ linear : Elapsed 0.013 ms (1.273 ms / 100) 1.314 -> 1.317 ( +0.23%) [ +0.30% +0.08% +0.00% / +0.23% +0.30% +0.30%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.79% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.315 -> 1.317 ( +0.15%) [ +0.23% +0.00% +0.23% / +0.15% +0.23% +0.30%] index_add_ spread : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.79% +0.63%] index_copy_ spread : Elapsed 0.013 ms (1.274 ms / 100) 1.311 -> 1.312 ( +0.08%) [ +0.00% +0.31% +0.08% / +0.08% +0.61% +0.69%] index_add_ strided 3 : Elapsed 0.013 ms (1.311 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.63% +0.94%] index_copy_ strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.312 -> 1.317 ( +0.38%) [ +0.38% +0.00% +0.15% / +0.38% +0.61% +0.69%] index_add_ strided 7 : Elapsed 0.013 ms (1.317 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.94% +1.49%] index_copy_ strided 7 : Elapsed 0.013 ms (1.272 ms / 100) 1.314 -> 1.315 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.38% +0.46%] index_add_ perm : Elapsed 0.013 ms (1.315 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.94% +0.71%] index_copy_ perm : Elapsed 0.013 ms (1.274 ms / 100) 1.315 -> 1.315 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.23% +0.23%] index_add_ perm_sorted : Elapsed 0.013 ms (1.315 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.63% +0.71%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.274 ms / 100) 9.175 -> 9.179 ( +0.04%) [ +0.07% +0.00% +0.04% / +0.04% +0.15% +0.38%] index_select const : Elapsed 0.092 ms (9.181 ms / 100) 9.177 -> 9.167 ( -0.11%) [ +0.02% +0.04% +0.00% / -0.11% +0.26% +0.28%] index_select wrap : Elapsed 0.092 ms (9.179 ms / 100) 9.177 -> 9.179 ( +0.02%) [ +0.00% +0.19% +0.08% / +0.02% +0.22% +0.20%] index_select linear : Elapsed 0.092 ms (9.177 ms / 100) 9.169 -> 9.191 ( +0.24%) [ +0.00% +0.17% +0.20% / +0.24% +0.27% +0.46%] index_select reverse : Elapsed 0.092 ms (9.169 ms / 100) 9.174 -> 9.176 ( +0.02%) [ +0.21% +0.00% +0.01% / +0.04% +0.02% +0.35%] index_select skip64 : Elapsed 0.092 ms (9.193 ms / 100) 9.171 -> 9.186 ( +0.16%) [ +0.00% +0.17% +0.24% / +0.16% +0.24% +0.28%] index_select skip256 : Elapsed 0.092 ms (9.171 ms / 100) 9.169 -> 9.168 ( -0.01%) [ +0.00% +0.29% +0.05% / +0.27% -0.01% +0.27%] index_select spread : Elapsed 0.092 ms (9.169 ms / 100) 9.182 -> 9.190 ( +0.09%) [ +0.00% +0.14% +0.10% / +0.10% +0.23% +0.09%] index_select strided 3 : Elapsed 0.092 ms (9.182 ms / 100) 9.169 -> 9.179 ( +0.11%) [ +0.11% +0.14% +0.00% / +0.39% +0.11% +0.15%] index_select random : Elapsed 0.092 ms (9.179 ms / 100) 9.177 -> 9.167 ( -0.11%) [ +0.00% +0.00% +0.04% / +0.14% -0.11% +0.38%] index_select random_sorted : Elapsed 0.092 ms (9.177 ms / 100) B = [16, 20, 40, 5] (stride (4000, 5, 100, 1)) A = [16, 20, 4, 5] (stride (20, 1, 320, 1280)) dim = 2 1.231 -> 1.231 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.49% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.233 ms / 100) 1.191 -> 1.192 ( +0.08%) [ +0.00% +0.17% +0.08% / +0.08% +0.67% +0.67%] index_copy_ linear : Elapsed 0.012 ms (1.191 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.49% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.192 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.42% +0.75%] index_copy_ reverse : Elapsed 0.012 ms (1.193 ms / 100) 1.232 -> 1.233 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.57% +0.65%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.00% +0.00% +0.08% / +0.17% +0.59% +1.34%] index_copy_ spread : Elapsed 0.012 ms (1.192 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.65% +0.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.08% +0.17% +0.00% / +0.00% +0.67% +0.59%] index_copy_ strided 3 : Elapsed 0.012 ms (1.193 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.41% +0.49%] index_add_ strided 7 : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.00% +0.25% +0.00% / +0.17% +0.67% +0.50%] index_copy_ strided 7 : Elapsed 0.012 ms (1.192 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.32% +0.32%] index_add_ perm : Elapsed 0.012 ms (1.233 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.34%] index_copy_ perm : Elapsed 0.012 ms (1.193 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.32% +0.41%] index_add_ perm_sorted : Elapsed 0.012 ms (1.234 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.50% +0.50%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.192 ms / 100) 8.721 -> 8.727 ( +0.07%) [ +0.00% +0.17% +0.01% / +0.07% +0.10% +0.09%] index_select const : Elapsed 0.087 ms (8.721 ms / 100) 8.756 -> 8.749 ( -0.08%) [ +0.07% +0.03% +0.00% / +0.32% +0.07% -0.08%] index_select wrap : Elapsed 0.088 ms (8.762 ms / 100) 8.749 -> 8.750 ( +0.01%) [ +0.00% +0.15% +0.15% / +0.33% +0.01% +0.11%] index_select linear : Elapsed 0.087 ms (8.749 ms / 100) 8.744 -> 8.748 ( +0.05%) [ +0.15% +0.00% +0.02% / +0.05% +0.21% +0.23%] index_select reverse : Elapsed 0.088 ms (8.757 ms / 100) 8.702 -> 8.742 ( +0.46%) [ +0.00% +0.26% +0.16% / +0.49% +0.56% +0.46%] index_select skip64 : Elapsed 0.087 ms (8.702 ms / 100) 8.703 -> 8.711 ( +0.09%) [ +0.00% +0.15% +0.24% / +0.09% +0.23% +0.29%] index_select skip256 : Elapsed 0.087 ms (8.703 ms / 100) 8.744 -> 8.739 ( -0.06%) [ +0.00% +0.14% +0.29% / -0.06% +0.01% +0.15%] index_select spread : Elapsed 0.087 ms (8.744 ms / 100) 8.749 -> 8.751 ( +0.02%) [ +0.11% +0.00% +0.07% / +0.02% +0.16% +0.30%] index_select strided 3 : Elapsed 0.088 ms (8.759 ms / 100) 8.748 -> 8.757 ( +0.10%) [ +0.01% +0.00% +0.16% / +0.10% +0.10% +0.19%] index_select random : Elapsed 0.087 ms (8.749 ms / 100) 8.750 -> 8.769 ( +0.22%) [ +0.13% +0.30% +0.00% / +0.22% +0.22% +0.32%] index_select random_sorted : Elapsed 0.088 ms (8.761 ms / 100) B = [16, 20, 40, 5] (stride (4000, 1, 100, 20)) dim = 2 fill_cnt = 4 0.790 -> 0.790 ( +0.00%) [ +0.13% +0.25% +0.00% / +0.00% +0.51% +0.63%] index_fill_ const : Elapsed 0.008 ms (0.791 ms / 100) 0.791 -> 0.792 ( +0.13%) [ +0.13% +0.25% +0.00% / +0.13% +0.38% +0.88%] index_fill_ linear : Elapsed 0.008 ms (0.792 ms / 100) 0.791 -> 0.793 ( +0.25%) [ +0.00% +0.25% +0.13% / +0.25% +0.25% +2.40%] index_fill_ reverse : Elapsed 0.008 ms (0.791 ms / 100) 0.791 -> 0.793 ( +0.25%) [ +0.00% +0.00% +0.00% / +0.25% +0.38% +0.63%] index_fill_ skip64 : Elapsed 0.008 ms (0.791 ms / 100) 0.791 -> 0.791 ( +0.00%) [ +0.25% +0.13% +0.00% / +0.00% +0.38% +0.38%] index_fill_ skip256 : Elapsed 0.008 ms (0.793 ms / 100) 0.792 -> 0.792 ( +0.00%) [ +0.63% +0.25% +0.00% / +0.00% +0.13% +0.13%] index_fill_ spread : Elapsed 0.008 ms (0.797 ms / 100) 0.790 -> 0.793 ( +0.38%) [ +0.76% +0.51% +0.00% / +0.38% +0.51% +0.76%] index_fill_ strided 3 : Elapsed 0.008 ms (0.796 ms / 100) 0.791 -> 0.792 ( +0.13%) [ +0.63% +0.38% +0.00% / +0.13% +0.38% +0.25%] index_fill_ strided 5 : Elapsed 0.008 ms (0.796 ms / 100) 0.793 -> 0.792 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.13% +0.00%] index_fill_ strided 7 : Elapsed 0.008 ms (0.793 ms / 100) 0.791 -> 0.791 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.38% +0.38%] index_fill_ strided 8 : Elapsed 0.008 ms (0.791 ms / 100) 0.793 -> 0.794 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.25% +0.13% +0.13%] index_fill_ strided 16 : Elapsed 0.008 ms (0.794 ms / 100) 0.792 -> 0.792 ( +0.00%) [ +0.25% +0.00% +0.00% / +0.00% +0.13% +0.25%] index_fill_ random : Elapsed 0.008 ms (0.794 ms / 100) 0.791 -> 0.792 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.13% +0.63% +0.25%] index_fill_ random_sorted : Elapsed 0.008 ms (0.791 ms / 100) 0.792 -> 0.793 ( +0.13%) [ +0.13% +0.00% +0.38% / +0.63% +0.13% +0.25%] index_fill_ perm : Elapsed 0.008 ms (0.793 ms / 100) 0.792 -> 0.791 ( -0.13%) [ +0.00% +0.51% +0.00% / -0.13% +0.13% +0.13%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.792 ms / 100) B = [16, 20, 40, 5] (stride (200, 3200, 5, 1)) A = [16, 20, 4, 5] (stride (80, 4, 1, 1280)) dim = 2 1.231 -> 1.232 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.65% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.233 ms / 100) 1.192 -> 1.195 ( +0.25%) [ +0.17% +0.08% +0.00% / +0.25% +0.84% +0.67%] index_copy_ linear : Elapsed 0.012 ms (1.194 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_add_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.59% +0.75%] index_copy_ reverse : Elapsed 0.012 ms (1.193 ms / 100) 1.235 -> 1.234 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.57% +0.57%] index_add_ spread : Elapsed 0.012 ms (1.235 ms / 100) 1.196 -> 1.197 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.59% +0.50%] index_copy_ spread : Elapsed 0.012 ms (1.196 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.24% +0.00% +0.00% / +0.00% +0.73% +0.73%] index_add_ strided 3 : Elapsed 0.012 ms (1.234 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.08% +0.08% +0.00% / +0.17% +0.76% +0.84%] index_copy_ strided 3 : Elapsed 0.012 ms (1.193 ms / 100) 1.233 -> 1.234 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.65% +1.30%] index_add_ strided 7 : Elapsed 0.012 ms (1.235 ms / 100) 1.195 -> 1.198 ( +0.25%) [ +0.08% +0.08% +0.00% / +0.25% +0.59% +1.17%] index_copy_ strided 7 : Elapsed 0.012 ms (1.196 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.73% +0.57%] index_add_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.50% +0.50%] index_copy_ perm : Elapsed 0.012 ms (1.194 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.57% +0.57%] index_add_ perm_sorted : Elapsed 0.012 ms (1.232 ms / 100) 1.195 -> 1.194 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.50% +0.42%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.195 ms / 100) 8.785 -> 8.803 ( +0.20%) [ +0.03% +0.09% +0.00% / +0.22% +0.20% +0.35%] index_select const : Elapsed 0.088 ms (8.788 ms / 100) 8.789 -> 8.775 ( -0.16%) [ +0.06% +0.00% +0.02% / -0.16% +0.25% +0.07%] index_select wrap : Elapsed 0.088 ms (8.794 ms / 100) 8.783 -> 8.787 ( +0.05%) [ +0.03% +0.06% +0.00% / +0.05% +0.07% +0.20%] index_select linear : Elapsed 0.088 ms (8.786 ms / 100) 8.786 -> 8.779 ( -0.08%) [ +0.10% +0.11% +0.00% / -0.08% +0.30% +0.15%] index_select reverse : Elapsed 0.088 ms (8.795 ms / 100) 8.783 -> 8.769 ( -0.16%) [ +0.11% +0.16% +0.00% / -0.16% +0.40% +0.08%] index_select skip64 : Elapsed 0.088 ms (8.793 ms / 100) 8.769 -> 8.781 ( +0.14%) [ +0.25% +0.00% +0.00% / +0.14% +0.48% +0.48%] index_select skip256 : Elapsed 0.088 ms (8.791 ms / 100) 8.790 -> 8.790 ( +0.00%) [ +0.14% +0.00% +0.02% / +0.00% +0.27% +0.34%] index_select spread : Elapsed 0.088 ms (8.802 ms / 100) 8.780 -> 8.800 ( +0.23%) [ +0.34% +0.00% +0.18% / +0.23% +0.46% +0.33%] index_select strided 3 : Elapsed 0.088 ms (8.810 ms / 100) 8.770 -> 8.780 ( +0.11%) [ +0.23% +0.21% +0.00% / +0.11% +0.30% +0.47%] index_select random : Elapsed 0.088 ms (8.790 ms / 100) 8.770 -> 8.777 ( +0.08%) [ +0.17% +0.33% +0.00% / +0.08% +0.31% +0.62%] index_select random_sorted : Elapsed 0.088 ms (8.785 ms / 100) B = [16, 20, 40, 5] (stride (100, 1, 1600, 20)) A = [16, 20, 4, 5] (stride (20, 1, 1600, 320)) dim = 2 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +1.46%] index_add_ linear : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.59% +1.76%] index_copy_ linear : Elapsed 0.012 ms (1.194 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.32% +0.24% +0.00% / +0.08% +0.57% +1.38%] index_add_ reverse : Elapsed 0.012 ms (1.235 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +1.26%] index_copy_ reverse : Elapsed 0.012 ms (1.194 ms / 100) 1.232 -> 1.231 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.41% +0.57%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.194 -> 1.193 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.42% +0.75%] index_copy_ spread : Elapsed 0.012 ms (1.194 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.49% +0.49%] index_add_ strided 3 : Elapsed 0.012 ms (1.233 ms / 100) 1.192 -> 1.192 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.50% +0.50%] index_copy_ strided 3 : Elapsed 0.012 ms (1.192 ms / 100) 1.232 -> 1.233 ( +0.08%) [ +0.08% +0.00% +0.16% / +0.08% +0.41% +0.57%] index_add_ strided 7 : Elapsed 0.012 ms (1.233 ms / 100) 1.192 -> 1.194 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.17% +0.59% +1.26%] index_copy_ strided 7 : Elapsed 0.012 ms (1.192 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.57%] index_add_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.196 -> 1.195 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.33% +0.59%] index_copy_ perm : Elapsed 0.012 ms (1.196 ms / 100) 1.232 -> 1.231 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.49% +1.06%] index_add_ perm_sorted : Elapsed 0.012 ms (1.232 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.42% +0.92%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.194 ms / 100) 8.704 -> 8.702 ( -0.02%) [ +0.00% +0.06% +0.17% / -0.02% +0.28% +0.16%] index_select const : Elapsed 0.087 ms (8.704 ms / 100) 8.722 -> 8.727 ( +0.06%) [ +0.19% +0.28% +0.00% / +0.06% +0.19% +0.30%] index_select wrap : Elapsed 0.087 ms (8.739 ms / 100) 8.734 -> 8.733 ( -0.01%) [ +0.00% +0.14% +0.06% / -0.01% +0.00% +0.40%] index_select linear : Elapsed 0.087 ms (8.734 ms / 100) 8.735 -> 8.720 ( -0.17%) [ +0.00% +0.06% +0.10% / -0.17% +0.01% +0.17%] index_select reverse : Elapsed 0.087 ms (8.735 ms / 100) 8.695 -> 8.714 ( +0.22%) [ +0.09% +0.13% +0.00% / +0.22% +0.37% +0.29%] index_select skip64 : Elapsed 0.087 ms (8.703 ms / 100) 8.684 -> 8.716 ( +0.37%) [ +0.00% +0.28% +0.28% / +0.46% +0.37% +0.43%] index_select skip256 : Elapsed 0.087 ms (8.684 ms / 100) 8.720 -> 8.736 ( +0.18%) [ +0.00% +0.38% +0.16% / +0.23% +0.34% +0.18%] index_select spread : Elapsed 0.087 ms (8.720 ms / 100) 8.725 -> 8.729 ( +0.05%) [ +0.11% +0.13% +0.00% / +0.15% +0.05% +0.28%] index_select strided 3 : Elapsed 0.087 ms (8.735 ms / 100) 8.741 -> 8.727 ( -0.16%) [ +0.05% +0.11% +0.00% / +0.17% -0.16% +0.09%] index_select random : Elapsed 0.087 ms (8.745 ms / 100) 8.725 -> 8.739 ( +0.16%) [ +0.24% +0.24% +0.00% / +0.16% +0.29% +0.18%] index_select random_sorted : Elapsed 0.087 ms (8.746 ms / 100) out_shape = [16, 20, 4, 40] in_shape = [16, 20, 4, 5] idx_dim = 3 B = [16, 20, 4, 40] (stride (3200, 40, 800, 1)) A = [16, 20, 4, 5] (stride (400, 1, 20, 80)) dim = 3 1.518 -> 1.520 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.59% +0.66%] index_add_ linear : Elapsed 0.015 ms (1.520 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.41% +0.41%] index_copy_ linear : Elapsed 0.015 ms (1.476 ms / 100) 1.520 -> 1.519 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.46% +0.66%] index_add_ reverse : Elapsed 0.015 ms (1.520 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.41% +0.68%] index_copy_ reverse : Elapsed 0.015 ms (1.476 ms / 100) 1.520 -> 1.520 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.46% +0.46%] index_add_ spread : Elapsed 0.015 ms (1.521 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.14% +0.07% / +0.00% +0.47% +0.41%] index_copy_ spread : Elapsed 0.015 ms (1.475 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.59% +0.66%] index_add_ strided 3 : Elapsed 0.015 ms (1.520 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.47% +0.75%] index_copy_ strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.66% +0.66%] index_add_ strided 7 : Elapsed 0.015 ms (1.520 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.54% +0.47%] index_copy_ strided 7 : Elapsed 0.015 ms (1.475 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.59% +0.72%] index_add_ perm : Elapsed 0.015 ms (1.519 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.54% +0.00% / +0.00% +0.47% +0.75%] index_copy_ perm : Elapsed 0.015 ms (1.475 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.00% +0.72% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.520 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.47% +0.41%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) 8.556 -> 8.578 ( +0.26%) [ +0.00% +0.22% +0.05% / +0.26% +0.30% +0.29%] index_select const : Elapsed 0.086 ms (8.556 ms / 100) 8.566 -> 8.584 ( +0.21%) [ +0.19% +0.27% +0.00% / +0.21% +0.54% +0.32%] index_select wrap : Elapsed 0.086 ms (8.582 ms / 100) 8.579 -> 8.580 ( +0.01%) [ +0.16% +0.00% +0.16% / +0.01% +0.28% +0.12%] index_select linear : Elapsed 0.086 ms (8.593 ms / 100) 8.577 -> 8.585 ( +0.09%) [ +0.07% +0.15% +0.00% / +0.09% +0.14% +0.29%] index_select reverse : Elapsed 0.086 ms (8.583 ms / 100) 8.558 -> 8.551 ( -0.08%) [ +0.20% +0.16% +0.00% / +0.05% -0.08% +0.30%] index_select skip64 : Elapsed 0.086 ms (8.575 ms / 100) 8.557 -> 8.562 ( +0.06%) [ +0.00% +0.23% +0.12% / +0.06% +0.28% +0.18%] index_select skip256 : Elapsed 0.086 ms (8.557 ms / 100) 8.565 -> 8.574 ( +0.11%) [ +0.16% +0.30% +0.00% / +0.11% +0.36% +0.53%] index_select spread : Elapsed 0.086 ms (8.579 ms / 100) 8.579 -> 8.580 ( +0.01%) [ +0.05% +0.00% +0.26% / +0.07% +0.01% +0.14%] index_select strided 3 : Elapsed 0.086 ms (8.583 ms / 100) 8.586 -> 8.592 ( +0.07%) [ +0.09% +0.06% +0.00% / +0.07% +0.24% +0.23%] index_select random : Elapsed 0.086 ms (8.594 ms / 100) 8.584 -> 8.574 ( -0.12%) [ +0.10% +0.12% +0.00% / -0.12% +0.07% +0.14%] index_select random_sorted : Elapsed 0.086 ms (8.593 ms / 100) B = [16, 20, 4, 40] (stride (3200, 40, 800, 1)) A = [16, 20, 4, 5] (stride (1, 16, 320, 1280)) dim = 3 1.578 -> 1.579 ( +0.06%) [ +0.13% +0.13% +0.00% / +0.06% +0.51% +0.76%] index_add_ linear : Elapsed 0.016 ms (1.580 ms / 100) 1.532 -> 1.533 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.72% +0.85%] index_copy_ linear : Elapsed 0.015 ms (1.532 ms / 100) 1.581 -> 1.581 ( +0.00%) [ +0.06% +0.13% +0.00% / +0.00% +0.57% +0.70%] index_add_ reverse : Elapsed 0.016 ms (1.582 ms / 100) 1.537 -> 1.539 ( +0.13%) [ +0.00% +0.07% +0.13% / +0.13% +0.98% +0.78%] index_copy_ reverse : Elapsed 0.015 ms (1.537 ms / 100) 1.584 -> 1.583 ( -0.06%) [ +0.06% +0.06% +0.00% / -0.06% +0.63% +0.69%] index_add_ spread : Elapsed 0.016 ms (1.585 ms / 100) 1.537 -> 1.539 ( +0.13%) [ +0.20% +0.26% +0.00% / +0.13% +0.91% +0.85%] index_copy_ spread : Elapsed 0.015 ms (1.540 ms / 100) 1.576 -> 1.585 ( +0.57%) [ +0.19% +0.00% +0.00% / +0.57% +0.57% +0.89%] index_add_ strided 3 : Elapsed 0.016 ms (1.579 ms / 100) 1.532 -> 1.535 ( +0.20%) [ +0.00% +0.00% +0.00% / +0.20% +0.78% +0.85%] index_copy_ strided 3 : Elapsed 0.015 ms (1.532 ms / 100) 1.574 -> 1.573 ( -0.06%) [ +0.25% +0.00% +0.06% / -0.06% +0.51% +0.44%] index_add_ strided 7 : Elapsed 0.016 ms (1.578 ms / 100) 1.528 -> 1.529 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.79% +0.85%] index_copy_ strided 7 : Elapsed 0.015 ms (1.529 ms / 100) 1.573 -> 1.576 ( +0.19%) [ +0.32% +0.45% +0.00% / +0.19% +0.57% +0.64%] index_add_ perm : Elapsed 0.016 ms (1.578 ms / 100) 1.526 -> 1.528 ( +0.13%) [ +0.00% +0.20% +0.07% / +0.13% +0.72% +0.66%] index_copy_ perm : Elapsed 0.015 ms (1.526 ms / 100) 1.578 -> 1.580 ( +0.13%) [ +0.00% +0.13% +0.13% / +0.13% +0.63% +0.82%] index_add_ perm_sorted : Elapsed 0.016 ms (1.578 ms / 100) 1.530 -> 1.534 ( +0.26%) [ +0.00% +0.20% +0.07% / +0.26% +0.72% +1.11%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.530 ms / 100) 8.551 -> 8.574 ( +0.27%) [ +0.33% +0.13% +0.00% / +0.48% +0.27% +0.62%] index_select const : Elapsed 0.086 ms (8.579 ms / 100) 8.563 -> 8.570 ( +0.08%) [ +0.19% +0.00% +0.19% / +0.08% +0.25% +0.61%] index_select wrap : Elapsed 0.086 ms (8.579 ms / 100) 8.574 -> 8.584 ( +0.12%) [ +0.02% +0.03% +0.00% / +0.31% +0.12% +0.28%] index_select linear : Elapsed 0.086 ms (8.576 ms / 100) 8.571 -> 8.573 ( +0.02%) [ +0.07% +0.00% +0.27% / +0.02% +0.14% +0.14%] index_select reverse : Elapsed 0.086 ms (8.577 ms / 100) 8.556 -> 8.569 ( +0.15%) [ +0.00% +0.09% +0.30% / +0.15% +0.33% +0.28%] index_select skip64 : Elapsed 0.086 ms (8.556 ms / 100) 8.569 -> 8.563 ( -0.07%) [ +0.00% +0.05% +0.08% / +0.08% -0.07% +0.04%] index_select skip256 : Elapsed 0.086 ms (8.569 ms / 100) 8.588 -> 8.587 ( -0.01%) [ +0.02% +0.00% +0.14% / -0.01% +0.14% -0.01%] index_select spread : Elapsed 0.086 ms (8.590 ms / 100) 8.574 -> 8.571 ( -0.03%) [ +0.00% +0.10% +0.17% / -0.03% +0.24% +0.19%] index_select strided 3 : Elapsed 0.086 ms (8.574 ms / 100) 8.571 -> 8.575 ( +0.05%) [ +0.00% +0.16% +0.19% / +0.18% +0.05% +0.20%] index_select random : Elapsed 0.086 ms (8.571 ms / 100) 8.577 -> 8.577 ( +0.00%) [ +0.00% +0.13% +0.16% / +0.00% +0.31% +0.24%] index_select random_sorted : Elapsed 0.086 ms (8.577 ms / 100) B = [16, 20, 4, 40] (stride (4, 2560, 1, 64)) A = [16, 20, 4, 5] (stride (400, 5, 100, 1)) dim = 3 1.537 -> 1.539 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.46% +0.39%] index_add_ linear : Elapsed 0.015 ms (1.539 ms / 100) 1.489 -> 1.491 ( +0.13%) [ +0.20% +0.07% +0.00% / +0.13% +0.54% +0.34%] index_copy_ linear : Elapsed 0.015 ms (1.492 ms / 100) 1.546 -> 1.553 ( +0.45%) [ +0.00% +0.91% +0.00% / +0.45% +0.52% +0.58%] index_add_ reverse : Elapsed 0.015 ms (1.546 ms / 100) 1.498 -> 1.506 ( +0.53%) [ +0.13% +0.53% +0.00% / +0.53% +0.60% +0.87%] index_copy_ reverse : Elapsed 0.015 ms (1.500 ms / 100) 1.534 -> 1.539 ( +0.33%) [ +0.13% +0.00% +0.39% / +0.33% +0.91% +0.91%] index_add_ spread : Elapsed 0.015 ms (1.536 ms / 100) 1.490 -> 1.492 ( +0.13%) [ +0.00% +0.00% +0.27% / +0.13% +0.67% +0.60%] index_copy_ spread : Elapsed 0.015 ms (1.490 ms / 100) 1.534 -> 1.540 ( +0.39%) [ +0.13% +0.00% +0.07% / +0.39% +0.59% +0.91%] index_add_ strided 3 : Elapsed 0.015 ms (1.536 ms / 100) 1.489 -> 1.493 ( +0.27%) [ +0.07% +0.00% +0.07% / +0.27% +0.60% +0.74%] index_copy_ strided 3 : Elapsed 0.015 ms (1.490 ms / 100) 1.546 -> 1.547 ( +0.06%) [ +1.16% +0.00% +0.06% / +0.06% +1.23% +0.58%] index_add_ strided 7 : Elapsed 0.016 ms (1.564 ms / 100) 1.498 -> 1.501 ( +0.20%) [ +0.53% +0.00% +0.07% / +0.20% +1.00% +0.53%] index_copy_ strided 7 : Elapsed 0.015 ms (1.506 ms / 100) 1.534 -> 1.535 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.65% +0.72%] index_add_ perm : Elapsed 0.015 ms (1.536 ms / 100) 1.487 -> 1.488 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.67% +0.74%] index_copy_ perm : Elapsed 0.015 ms (1.489 ms / 100) 1.555 -> 1.557 ( +0.13%) [ +0.32% +0.00% +0.26% / +0.13% +1.29% +1.41%] index_add_ perm_sorted : Elapsed 0.016 ms (1.560 ms / 100) 1.506 -> 1.506 ( +0.00%) [ +0.20% +0.07% +0.00% / +0.00% +0.60% +0.60%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.509 ms / 100) 8.535 -> 8.537 ( +0.02%) [ +0.12% +0.00% +0.25% / +0.02% +0.26% +0.27%] index_select const : Elapsed 0.085 ms (8.545 ms / 100) 8.528 -> 8.531 ( +0.04%) [ +0.14% +0.29% +0.00% / +0.14% +0.04% +0.40%] index_select wrap : Elapsed 0.085 ms (8.540 ms / 100) 8.527 -> 8.535 ( +0.09%) [ +0.30% +0.30% +0.00% / +0.21% +0.36% +0.09%] index_select linear : Elapsed 0.086 ms (8.553 ms / 100) 8.529 -> 8.539 ( +0.12%) [ +0.38% +0.00% +0.25% / +0.12% +0.18% +0.13%] index_select reverse : Elapsed 0.086 ms (8.561 ms / 100) 8.529 -> 8.526 ( -0.04%) [ +0.11% +0.06% +0.00% / +0.14% -0.04% +0.29%] index_select skip64 : Elapsed 0.085 ms (8.538 ms / 100) 8.538 -> 8.540 ( +0.02%) [ +0.06% +0.14% +0.00% / +0.14% +0.02% +0.08%] index_select skip256 : Elapsed 0.085 ms (8.543 ms / 100) 8.527 -> 8.535 ( +0.09%) [ +0.12% +0.12% +0.00% / +0.09% +0.45% +0.29%] index_select spread : Elapsed 0.085 ms (8.537 ms / 100) 8.530 -> 8.520 ( -0.12%) [ +0.05% +0.23% +0.00% / -0.12% +0.11% +0.19%] index_select strided 3 : Elapsed 0.085 ms (8.534 ms / 100) 8.535 -> 8.540 ( +0.06%) [ +0.00% +0.06% +0.01% / +0.06% +0.30% +0.22%] index_select random : Elapsed 0.085 ms (8.535 ms / 100) 8.531 -> 8.542 ( +0.13%) [ +0.00% +0.08% +0.00% / +0.38% +0.23% +0.13%] index_select random_sorted : Elapsed 0.085 ms (8.531 ms / 100) B = [16, 20, 4, 40] (stride (1, 640, 12800, 16)) A = [16, 20, 4, 5] (stride (400, 20, 5, 1)) dim = 3 1.320 -> 1.321 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.61% +0.83%] index_add_ linear : Elapsed 0.013 ms (1.322 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.78%] index_copy_ linear : Elapsed 0.013 ms (1.276 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.61% +0.91%] index_add_ reverse : Elapsed 0.013 ms (1.321 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.68% +0.76%] index_add_ spread : Elapsed 0.013 ms (1.321 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.71%] index_copy_ spread : Elapsed 0.013 ms (1.276 ms / 100) 1.319 -> 1.320 ( +0.08%) [ +0.15% +0.15% +0.00% / +0.08% +0.83% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.321 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.78% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.320 -> 1.320 ( +0.00%) [ +0.23% +0.08% +0.00% / +0.00% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.013 ms (1.323 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.71% +0.71%] index_copy_ strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.320 -> 1.320 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.76% +0.76%] index_add_ perm : Elapsed 0.013 ms (1.321 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.86%] index_copy_ perm : Elapsed 0.013 ms (1.276 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.76% +0.91%] index_add_ perm_sorted : Elapsed 0.013 ms (1.321 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.71% +0.71%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.276 ms / 100) 7.878 -> 7.883 ( +0.06%) [ +0.13% +0.00% +0.30% / +0.06% +0.67% +0.41%] index_select const : Elapsed 0.079 ms (7.888 ms / 100) 7.898 -> 7.880 ( -0.23%) [ +0.00% +0.10% +0.19% / -0.23% +0.29% +0.23%] index_select wrap : Elapsed 0.079 ms (7.898 ms / 100) 7.879 -> 7.888 ( +0.11%) [ +0.41% +0.19% +0.00% / +0.11% +0.33% +0.41%] index_select linear : Elapsed 0.079 ms (7.911 ms / 100) 7.876 -> 7.892 ( +0.20%) [ +0.15% +0.00% +0.25% / +0.20% +0.43% +0.62%] index_select reverse : Elapsed 0.079 ms (7.888 ms / 100) 7.887 -> 7.889 ( +0.03%) [ +0.00% +0.03% +0.01% / +0.03% +0.22% +0.15%] index_select skip64 : Elapsed 0.079 ms (7.887 ms / 100) 7.892 -> 7.900 ( +0.10%) [ +0.00% +0.06% +0.01% / +0.10% +0.16% +0.20%] index_select skip256 : Elapsed 0.079 ms (7.892 ms / 100) 7.891 -> 7.888 ( -0.04%) [ +0.10% +0.10% +0.00% / -0.04% +0.09% +0.11%] index_select spread : Elapsed 0.079 ms (7.899 ms / 100) 7.876 -> 7.896 ( +0.25%) [ +0.00% +0.06% +0.51% / +0.27% +0.63% +0.25%] index_select strided 3 : Elapsed 0.079 ms (7.876 ms / 100) 7.891 -> 7.883 ( -0.10%) [ +0.00% +0.09% +0.05% / -0.10% +0.18% +0.30%] index_select random : Elapsed 0.079 ms (7.891 ms / 100) 7.878 -> 7.881 ( +0.04%) [ +0.08% +0.24% +0.00% / +0.04% +0.20% +0.44%] index_select random_sorted : Elapsed 0.079 ms (7.884 ms / 100) B = [16, 20, 4, 40] (stride (20, 1, 12800, 320)) A = [16, 20, 4, 5] (stride (1, 80, 1600, 16)) dim = 3 1.551 -> 1.562 ( +0.71%) [ +0.77% +0.00% +0.13% / +0.71% +1.23% +0.84%] index_add_ linear : Elapsed 0.016 ms (1.563 ms / 100) 1.488 -> 1.491 ( +0.20%) [ +0.27% +0.00% +0.27% / +0.20% +0.67% +0.60%] index_copy_ linear : Elapsed 0.015 ms (1.492 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.46% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.468 -> 1.471 ( +0.20%) [ +0.00% +0.14% +0.00% / +0.20% +0.34% +0.61%] index_copy_ reverse : Elapsed 0.015 ms (1.468 ms / 100) 1.537 -> 1.540 ( +0.20%) [ +0.00% +0.13% +0.39% / +0.20% +0.33% +0.98%] index_add_ spread : Elapsed 0.015 ms (1.537 ms / 100) 1.480 -> 1.482 ( +0.14%) [ +0.00% +0.00% +0.34% / +0.14% +0.27% +0.74%] index_copy_ spread : Elapsed 0.015 ms (1.480 ms / 100) 1.533 -> 1.545 ( +0.78%) [ +0.00% +0.39% +0.26% / +0.78% +0.91% +0.98%] index_add_ strided 3 : Elapsed 0.015 ms (1.533 ms / 100) 1.476 -> 1.486 ( +0.68%) [ +0.00% +0.47% +0.20% / +0.68% +1.02% +0.88%] index_copy_ strided 3 : Elapsed 0.015 ms (1.476 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.53% +0.46%] index_add_ strided 7 : Elapsed 0.015 ms (1.522 ms / 100) 1.466 -> 1.466 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.55% +0.48%] index_copy_ strided 7 : Elapsed 0.015 ms (1.466 ms / 100) 1.555 -> 1.572 ( +1.09%) [ +0.00% +0.84% +0.32% / +1.48% +1.16% +1.09%] index_add_ perm : Elapsed 0.016 ms (1.555 ms / 100) 1.492 -> 1.499 ( +0.47%) [ +0.00% +0.47% +0.13% / +0.54% +0.67% +0.47%] index_copy_ perm : Elapsed 0.015 ms (1.492 ms / 100) 1.531 -> 1.540 ( +0.59%) [ +0.00% +0.26% +0.26% / +0.59% +0.85% +0.85%] index_add_ perm_sorted : Elapsed 0.015 ms (1.531 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.00% +0.27% +0.20% / +0.07% +0.81% +0.81%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) 8.172 -> 8.183 ( +0.13%) [ +0.00% +0.09% +0.06% / +0.13% +0.28% +0.20%] index_select const : Elapsed 0.082 ms (8.172 ms / 100) 8.191 -> 8.198 ( +0.09%) [ +0.18% +0.00% +0.12% / +0.12% +0.09% +0.20%] index_select wrap : Elapsed 0.082 ms (8.206 ms / 100) 8.197 -> 8.194 ( -0.04%) [ +0.00% +0.09% +0.15% / +0.00% +0.28% -0.04%] index_select linear : Elapsed 0.082 ms (8.197 ms / 100) 8.200 -> 8.200 ( +0.00%) [ +0.04% +0.05% +0.00% / +0.00% +0.39% +0.00%] index_select reverse : Elapsed 0.082 ms (8.203 ms / 100) 8.183 -> 8.186 ( +0.04%) [ +0.23% +0.06% +0.00% / +0.04% +0.16% +0.11%] index_select skip64 : Elapsed 0.082 ms (8.202 ms / 100) 8.178 -> 8.195 ( +0.21%) [ +0.12% +0.20% +0.00% / +0.34% +0.34% +0.21%] index_select skip256 : Elapsed 0.082 ms (8.188 ms / 100) 8.197 -> 8.193 ( -0.05%) [ +0.00% +0.18% +0.10% / -0.05% +0.22% +0.38%] index_select spread : Elapsed 0.082 ms (8.197 ms / 100) 8.189 -> 8.198 ( +0.11%) [ +0.00% +0.15% +0.07% / +0.13% +0.17% +0.11%] index_select strided 3 : Elapsed 0.082 ms (8.189 ms / 100) 8.184 -> 8.199 ( +0.18%) [ +0.32% +0.23% +0.00% / +0.34% +0.27% +0.18%] index_select random : Elapsed 0.082 ms (8.210 ms / 100) 8.192 -> 8.213 ( +0.26%) [ +0.13% +0.09% +0.00% / +0.26% +0.40% +0.28%] index_select random_sorted : Elapsed 0.082 ms (8.203 ms / 100) out_shape = [40, 20, 5, 4] in_shape = [16, 20, 5, 4] idx_dim = 0 B = [40, 20, 5, 4] (stride (400, 1, 20, 100)) A = [16, 20, 5, 4] (stride (1, 80, 16, 1600)) dim = 0 3.887 -> 3.887 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.62% +0.85%] index_add_ linear : Elapsed 0.039 ms (3.887 ms / 100) 3.739 -> 3.741 ( +0.05%) [ +0.08% +0.03% +0.00% / +0.05% +0.64% +0.67%] index_copy_ linear : Elapsed 0.037 ms (3.742 ms / 100) 3.917 -> 3.917 ( +0.00%) [ +0.03% +0.10% +0.00% / +0.00% +0.61% +0.56%] index_add_ reverse : Elapsed 0.039 ms (3.918 ms / 100) 3.779 -> 3.781 ( +0.05%) [ +0.21% +0.13% +0.00% / +0.05% +0.79% +0.71%] index_copy_ reverse : Elapsed 0.038 ms (3.787 ms / 100) 3.889 -> 3.888 ( -0.03%) [ +0.05% +0.03% +0.00% / -0.03% +0.57% +0.75%] index_add_ spread : Elapsed 0.039 ms (3.891 ms / 100) 3.749 -> 3.751 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.51% +0.48%] index_copy_ spread : Elapsed 0.037 ms (3.749 ms / 100) 3.894 -> 3.895 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.64% +0.72%] index_add_ strided 3 : Elapsed 0.039 ms (3.894 ms / 100) 3.751 -> 3.750 ( -0.03%) [ +0.08% +0.00% +0.03% / -0.03% +0.48% +0.43%] index_copy_ strided 3 : Elapsed 0.038 ms (3.754 ms / 100) 3.918 -> 3.922 ( +0.10%) [ +0.10% +0.13% +0.00% / +0.10% +0.59% +0.48%] index_add_ strided 7 : Elapsed 0.039 ms (3.922 ms / 100) 3.785 -> 3.788 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.61% +0.50%] index_copy_ strided 7 : Elapsed 0.038 ms (3.788 ms / 100) 3.884 -> 3.887 ( +0.08%) [ +0.10% +0.15% +0.00% / +0.08% +0.75% +0.80%] index_add_ perm : Elapsed 0.039 ms (3.888 ms / 100) 3.737 -> 3.739 ( +0.05%) [ +0.08% +0.13% +0.00% / +0.05% +0.62% +0.56%] index_copy_ perm : Elapsed 0.037 ms (3.740 ms / 100) 3.888 -> 3.886 ( -0.05%) [ +0.15% +0.00% +0.03% / -0.05% +0.75% +0.69%] index_add_ perm_sorted : Elapsed 0.039 ms (3.894 ms / 100) 3.741 -> 3.738 ( -0.08%) [ +0.11% +0.05% +0.00% / -0.08% +0.51% +0.56%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.745 ms / 100) 5.488 -> 5.493 ( +0.09%) [ +0.07% +0.00% +0.16% / +0.09% +0.18% +0.27%] index_select const : Elapsed 0.055 ms (5.492 ms / 100) 5.492 -> 5.489 ( -0.05%) [ +0.11% +0.09% +0.00% / -0.05% +0.13% +0.33%] index_select wrap : Elapsed 0.055 ms (5.498 ms / 100) 5.494 -> 5.494 ( +0.00%) [ +0.05% +0.04% +0.00% / +0.00% +0.15% +0.16%] index_select linear : Elapsed 0.055 ms (5.497 ms / 100) 5.492 -> 5.494 ( +0.04%) [ +0.00% +0.07% +0.07% / +0.04% +0.05% +0.15%] index_select reverse : Elapsed 0.055 ms (5.492 ms / 100) 5.494 -> 5.492 ( -0.04%) [ +0.00% +0.09% +0.16% / +0.00% -0.04% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.494 ms / 100) 5.497 -> 5.492 ( -0.09%) [ +0.11% +0.00% +0.11% / +0.00% -0.05% -0.09%] index_select skip256 : Elapsed 0.055 ms (5.503 ms / 100) 5.492 -> 5.494 ( +0.04%) [ +0.09% +0.02% +0.00% / +0.11% +0.04% +0.18%] index_select spread : Elapsed 0.055 ms (5.497 ms / 100) 5.489 -> 5.489 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.02% +0.00% +0.15%] index_select strided 3 : Elapsed 0.055 ms (5.489 ms / 100) 5.488 -> 5.495 ( +0.13%) [ +0.05% +0.07% +0.00% / +0.13% +0.13% +0.18%] index_select strided 5 : Elapsed 0.055 ms (5.491 ms / 100) 5.488 -> 5.495 ( +0.13%) [ +0.07% +0.15% +0.00% / +0.20% +0.13% +0.26%] index_select strided 7 : Elapsed 0.055 ms (5.492 ms / 100) 5.487 -> 5.495 ( +0.15%) [ +0.09% +0.05% +0.00% / +0.15% +0.16% +0.18%] index_select strided 8 : Elapsed 0.055 ms (5.492 ms / 100) 5.493 -> 5.493 ( +0.00%) [ +0.13% +0.02% +0.00% / +0.00% +0.00% +0.11%] index_select random : Elapsed 0.055 ms (5.500 ms / 100) 5.490 -> 5.492 ( +0.04%) [ +0.13% +0.00% +0.04% / +0.11% +0.04% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.497 ms / 100) B = [40, 20, 5, 4] (stride (1, 800, 160, 40)) A = [16, 20, 5, 4] (stride (5, 320, 1, 80)) dim = 0 3.472 -> 3.472 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.75% +0.69%] index_add_ linear : Elapsed 0.035 ms (3.472 ms / 100) 3.336 -> 3.335 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.87% +0.87%] index_copy_ linear : Elapsed 0.033 ms (3.336 ms / 100) 3.481 -> 3.481 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.72% +0.78%] index_add_ reverse : Elapsed 0.035 ms (3.483 ms / 100) 3.345 -> 3.344 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.87% +0.81%] index_copy_ reverse : Elapsed 0.033 ms (3.345 ms / 100) 3.473 -> 3.471 ( -0.06%) [ +0.09% +0.20% +0.00% / -0.06% +0.81% +0.78%] index_add_ spread : Elapsed 0.035 ms (3.476 ms / 100) 3.336 -> 3.332 ( -0.12%) [ +0.00% +0.06% +0.00% / -0.12% +0.72% +0.69%] index_copy_ spread : Elapsed 0.033 ms (3.336 ms / 100) 3.475 -> 3.476 ( +0.03%) [ +0.06% +0.09% +0.00% / +0.03% +0.75% +0.89%] index_add_ strided 3 : Elapsed 0.035 ms (3.477 ms / 100) 3.333 -> 3.335 ( +0.06%) [ +0.09% +0.12% +0.00% / +0.06% +0.66% +0.75%] index_copy_ strided 3 : Elapsed 0.033 ms (3.336 ms / 100) 3.478 -> 3.478 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.81% +0.86%] index_add_ strided 7 : Elapsed 0.035 ms (3.479 ms / 100) 3.342 -> 3.343 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.93% +1.02%] index_copy_ strided 7 : Elapsed 0.033 ms (3.342 ms / 100) 3.469 -> 3.470 ( +0.03%) [ +0.09% +0.03% +0.00% / +0.03% +0.92% +0.98%] index_add_ perm : Elapsed 0.035 ms (3.472 ms / 100) 3.336 -> 3.335 ( -0.03%) [ +0.00% +0.09% +0.00% / -0.03% +0.87% +0.99%] index_copy_ perm : Elapsed 0.033 ms (3.336 ms / 100) 3.475 -> 3.476 ( +0.03%) [ +0.09% +0.03% +0.00% / +0.03% +0.83% +0.86%] index_add_ perm_sorted : Elapsed 0.035 ms (3.478 ms / 100) 3.335 -> 3.331 ( -0.12%) [ +0.06% +0.03% +0.00% / -0.12% +0.78% +0.72%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.337 ms / 100) 5.383 -> 5.382 ( -0.02%) [ +0.00% +0.07% +0.00% / +0.00% +0.07% -0.02%] index_select const : Elapsed 0.054 ms (5.383 ms / 100) 5.396 -> 5.388 ( -0.15%) [ +0.11% +0.00% +0.06% / +0.00% -0.06% -0.15%] index_select wrap : Elapsed 0.054 ms (5.402 ms / 100) 5.392 -> 5.384 ( -0.15%) [ +0.13% +0.02% +0.00% / +0.06% -0.02% -0.15%] index_select linear : Elapsed 0.054 ms (5.399 ms / 100) 5.397 -> 5.393 ( -0.07%) [ +0.02% +0.06% +0.00% / -0.07% +0.00% -0.06%] index_select reverse : Elapsed 0.054 ms (5.398 ms / 100) 5.383 -> 5.380 ( -0.06%) [ +0.06% +0.22% +0.00% / -0.06% +0.11% +0.00%] index_select skip64 : Elapsed 0.054 ms (5.386 ms / 100) 5.379 -> 5.376 ( -0.06%) [ +0.13% +0.07% +0.00% / -0.06% +0.11% +0.13%] index_select skip256 : Elapsed 0.054 ms (5.386 ms / 100) 5.394 -> 5.388 ( -0.11%) [ +0.00% +0.00% +0.04% / -0.11% -0.06% -0.11%] index_select spread : Elapsed 0.054 ms (5.394 ms / 100) 5.396 -> 5.388 ( -0.15%) [ +0.06% +0.06% +0.00% / +0.02% -0.15% -0.15%] index_select strided 3 : Elapsed 0.054 ms (5.399 ms / 100) 5.390 -> 5.388 ( -0.04%) [ +0.11% +0.00% +0.04% / +0.17% +0.00% -0.04%] index_select strided 5 : Elapsed 0.054 ms (5.396 ms / 100) 5.391 -> 5.392 ( +0.02%) [ +0.07% +0.00% +0.07% / +0.02% +0.17% +0.04%] index_select strided 7 : Elapsed 0.054 ms (5.395 ms / 100) 5.382 -> 5.382 ( +0.00%) [ +0.00% +0.09% +0.19% / +0.00% +0.13% +0.22%] index_select strided 8 : Elapsed 0.054 ms (5.382 ms / 100) 5.394 -> 5.393 ( -0.02%) [ +0.00% +0.07% +0.00% / +0.00% +0.00% -0.02%] index_select random : Elapsed 0.054 ms (5.394 ms / 100) 5.393 -> 5.388 ( -0.09%) [ +0.11% +0.00% +0.02% / -0.09% +0.02% +0.00%] index_select random_sorted : Elapsed 0.054 ms (5.399 ms / 100) B = [40, 20, 5, 4] (stride (1, 800, 160, 40)) A = [16, 20, 5, 4] (stride (20, 1, 320, 1600)) dim = 0 3.623 -> 3.623 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.41%] index_add_ linear : Elapsed 0.036 ms (3.623 ms / 100) 3.465 -> 3.466 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.35% +0.40%] index_copy_ linear : Elapsed 0.035 ms (3.465 ms / 100) 3.635 -> 3.638 ( +0.08%) [ +0.00% +0.08% +0.11% / +0.08% +0.52% +0.69%] index_add_ reverse : Elapsed 0.036 ms (3.635 ms / 100) 3.482 -> 3.486 ( +0.11%) [ +0.00% +0.09% +0.03% / +0.11% +0.43% +0.63%] index_copy_ reverse : Elapsed 0.035 ms (3.482 ms / 100) 3.637 -> 3.638 ( +0.03%) [ +0.08% +0.00% +0.03% / +0.03% +0.60% +0.69%] index_add_ spread : Elapsed 0.036 ms (3.640 ms / 100) 3.475 -> 3.474 ( -0.03%) [ +0.06% +0.06% +0.00% / -0.03% +0.52% +0.46%] index_copy_ spread : Elapsed 0.035 ms (3.477 ms / 100) 3.598 -> 3.597 ( -0.03%) [ +0.06% +0.00% +0.00% / -0.03% +0.50% +0.47%] index_add_ strided 3 : Elapsed 0.036 ms (3.600 ms / 100) 3.450 -> 3.454 ( +0.12%) [ +0.12% +0.09% +0.00% / +0.12% +0.52% +0.61%] index_copy_ strided 3 : Elapsed 0.035 ms (3.454 ms / 100) 3.638 -> 3.639 ( +0.03%) [ +0.00% +0.05% +0.11% / +0.03% +0.52% +0.41%] index_add_ strided 7 : Elapsed 0.036 ms (3.638 ms / 100) 3.484 -> 3.486 ( +0.06%) [ +0.00% +0.11% +0.03% / +0.06% +0.46% +0.37%] index_copy_ strided 7 : Elapsed 0.035 ms (3.484 ms / 100) 3.618 -> 3.618 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.64% +0.64%] index_add_ perm : Elapsed 0.036 ms (3.620 ms / 100) 3.459 -> 3.461 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.66% +0.64%] index_copy_ perm : Elapsed 0.035 ms (3.461 ms / 100) 3.622 -> 3.623 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.41% +0.47%] index_add_ perm_sorted : Elapsed 0.036 ms (3.623 ms / 100) 3.462 -> 3.463 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.43% +0.43%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.463 ms / 100) 5.384 -> 5.389 ( +0.09%) [ +0.00% +0.06% +0.09% / +0.09% +0.17% +0.26%] index_select const : Elapsed 0.054 ms (5.384 ms / 100) 5.395 -> 5.391 ( -0.07%) [ +0.02% +0.00% +0.06% / -0.07% +0.13% +0.06%] index_select wrap : Elapsed 0.054 ms (5.396 ms / 100) 5.392 -> 5.404 ( +0.22%) [ +0.00% +0.15% +0.04% / +0.24% +0.22% +0.22%] index_select linear : Elapsed 0.054 ms (5.392 ms / 100) 5.394 -> 5.399 ( +0.09%) [ +0.11% +0.09% +0.00% / +0.09% +0.15% +0.30%] index_select reverse : Elapsed 0.054 ms (5.400 ms / 100) 5.391 -> 5.392 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.04% +0.07% +0.02%] index_select skip64 : Elapsed 0.054 ms (5.391 ms / 100) 5.391 -> 5.384 ( -0.13%) [ +0.04% +0.00% +0.00% / +0.00% -0.13% -0.06%] index_select skip256 : Elapsed 0.054 ms (5.393 ms / 100) 5.392 -> 5.390 ( -0.04%) [ +0.22% +0.00% +0.13% / +0.15% +0.13% -0.04%] index_select spread : Elapsed 0.054 ms (5.404 ms / 100) 5.393 -> 5.394 ( +0.02%) [ +0.00% +0.04% +0.06% / +0.04% +0.02% +0.07%] index_select strided 3 : Elapsed 0.054 ms (5.393 ms / 100) 5.394 -> 5.393 ( -0.02%) [ +0.00% +0.07% +0.02% / -0.02% +0.07% +0.17%] index_select strided 5 : Elapsed 0.054 ms (5.394 ms / 100) 5.390 -> 5.394 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.17% +0.07% +0.11%] index_select strided 7 : Elapsed 0.054 ms (5.394 ms / 100) 5.387 -> 5.392 ( +0.09%) [ +0.09% +0.00% +0.02% / +0.09% +0.09% +0.17%] index_select strided 8 : Elapsed 0.054 ms (5.392 ms / 100) 5.393 -> 5.397 ( +0.07%) [ +0.06% +0.00% +0.13% / +0.09% +0.15% +0.07%] index_select random : Elapsed 0.054 ms (5.396 ms / 100) 5.394 -> 5.395 ( +0.02%) [ +0.04% +0.00% +0.17% / +0.02% +0.13% +0.11%] index_select random_sorted : Elapsed 0.054 ms (5.396 ms / 100) B = [40, 20, 5, 4] (stride (5, 800, 1, 200)) A = [16, 20, 5, 4] (stride (1, 64, 1280, 16)) dim = 0 4.243 -> 4.241 ( -0.05%) [ +0.02% +0.05% +0.00% / -0.05% +0.92% +0.92%] index_add_ linear : Elapsed 0.042 ms (4.244 ms / 100) 4.104 -> 4.104 ( +0.00%) [ +0.00% +0.05% +0.07% / +0.00% +1.00% +1.02%] index_copy_ linear : Elapsed 0.041 ms (4.104 ms / 100) 4.225 -> 4.224 ( -0.02%) [ +0.19% +0.19% +0.00% / -0.02% +0.71% +0.73%] index_add_ reverse : Elapsed 0.042 ms (4.233 ms / 100) 4.085 -> 4.086 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.59% +0.66%] index_copy_ reverse : Elapsed 0.041 ms (4.085 ms / 100) 4.228 -> 4.227 ( -0.02%) [ +0.07% +0.00% +0.05% / -0.02% +0.64% +0.64%] index_add_ spread : Elapsed 0.042 ms (4.231 ms / 100) 4.084 -> 4.084 ( +0.00%) [ +0.07% +0.02% +0.00% / +0.00% +0.56% +0.61%] index_copy_ spread : Elapsed 0.041 ms (4.087 ms / 100) 4.225 -> 4.227 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.76% +0.69%] index_add_ strided 3 : Elapsed 0.042 ms (4.225 ms / 100) 4.081 -> 4.084 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.61% +0.64%] index_copy_ strided 3 : Elapsed 0.041 ms (4.083 ms / 100) 4.223 -> 4.234 ( +0.26%) [ +0.00% +0.05% +0.21% / +0.26% +0.92% +0.95%] index_add_ strided 7 : Elapsed 0.042 ms (4.223 ms / 100) 4.084 -> 4.085 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.83% +0.81%] index_copy_ strided 7 : Elapsed 0.041 ms (4.085 ms / 100) 4.231 -> 4.242 ( +0.26%) [ +0.50% +0.24% +0.00% / +0.26% +1.11% +1.28%] index_add_ perm : Elapsed 0.043 ms (4.252 ms / 100) 4.090 -> 4.100 ( +0.24%) [ +0.73% +0.34% +0.00% / +0.24% +0.98% +1.25%] index_copy_ perm : Elapsed 0.041 ms (4.120 ms / 100) 4.223 -> 4.222 ( -0.02%) [ +0.07% +0.00% +0.02% / -0.02% +0.83% +0.76%] index_add_ perm_sorted : Elapsed 0.042 ms (4.226 ms / 100) 4.076 -> 4.079 ( +0.07%) [ +0.10% +0.10% +0.00% / +0.07% +0.91% +0.74%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.080 ms / 100) 5.563 -> 5.557 ( -0.11%) [ +0.05% +0.00% +0.07% / +0.05% -0.11% -0.09%] index_select const : Elapsed 0.056 ms (5.566 ms / 100) 5.561 -> 5.556 ( -0.09%) [ +0.23% +0.18% +0.00% / +0.07% -0.09% -0.07%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.561 -> 5.560 ( -0.02%) [ +0.00% +0.11% +0.14% / +0.09% +0.09% -0.02%] index_select linear : Elapsed 0.056 ms (5.561 ms / 100) 5.567 -> 5.560 ( -0.13%) [ +0.09% +0.07% +0.00% / +0.05% -0.13% -0.07%] index_select reverse : Elapsed 0.056 ms (5.572 ms / 100) 5.558 -> 5.561 ( +0.05%) [ +0.18% +0.07% +0.00% / +0.05% +0.09% +0.05%] index_select skip64 : Elapsed 0.056 ms (5.568 ms / 100) 5.554 -> 5.557 ( +0.05%) [ +0.09% +0.22% +0.00% / +0.05% +0.22% +0.18%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.563 -> 5.556 ( -0.13%) [ +0.00% +0.04% +0.00% / +0.05% -0.13% -0.07%] index_select spread : Elapsed 0.056 ms (5.563 ms / 100) 5.560 -> 5.557 ( -0.05%) [ +0.00% +0.14% +0.11% / +0.09% +0.09% -0.05%] index_select strided 3 : Elapsed 0.056 ms (5.560 ms / 100) 5.563 -> 5.564 ( +0.02%) [ +0.00% +0.04% +0.05% / +0.11% +0.02% +0.05%] index_select strided 5 : Elapsed 0.056 ms (5.563 ms / 100) 5.562 -> 5.565 ( +0.05%) [ +0.11% +0.00% +0.02% / +0.05% +0.29% +0.23%] index_select strided 7 : Elapsed 0.056 ms (5.568 ms / 100) 5.559 -> 5.568 ( +0.16%) [ +0.29% +0.11% +0.00% / +0.16% +0.32% +0.47%] index_select strided 8 : Elapsed 0.056 ms (5.575 ms / 100) 5.561 -> 5.567 ( +0.11%) [ +0.16% +0.00% +0.11% / +0.23% +0.11% +0.27%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.565 -> 5.560 ( -0.09%) [ +0.16% +0.00% +0.02% / -0.09% -0.02% +0.04%] index_select random_sorted : Elapsed 0.056 ms (5.574 ms / 100) B = [40, 20, 5, 4] (stride (1, 800, 40, 200)) A = [16, 20, 5, 4] (stride (80, 1, 1280, 20)) dim = 0 4.153 -> 4.158 ( +0.12%) [ +0.12% +0.02% +0.00% / +0.12% +0.58% +0.63%] index_add_ linear : Elapsed 0.042 ms (4.158 ms / 100) 4.010 -> 4.011 ( +0.02%) [ +0.05% +0.07% +0.00% / +0.02% +0.60% +0.77%] index_copy_ linear : Elapsed 0.040 ms (4.012 ms / 100) 4.177 -> 4.175 ( -0.05%) [ +0.00% +0.05% +0.07% / -0.05% +0.50% +0.55%] index_add_ reverse : Elapsed 0.042 ms (4.177 ms / 100) 4.034 -> 4.035 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.52% +0.57%] index_copy_ reverse : Elapsed 0.040 ms (4.035 ms / 100) 4.163 -> 4.162 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.53% +0.50%] index_add_ spread : Elapsed 0.042 ms (4.164 ms / 100) 4.031 -> 4.031 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.57%] index_copy_ spread : Elapsed 0.040 ms (4.031 ms / 100) 4.179 -> 4.178 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.45% +0.50%] index_add_ strided 3 : Elapsed 0.042 ms (4.179 ms / 100) 4.055 -> 4.039 ( -0.39%) [ +0.05% +0.02% +0.00% / -0.39% +0.47% +0.49%] index_copy_ strided 3 : Elapsed 0.041 ms (4.057 ms / 100) 4.180 -> 4.183 ( +0.07%) [ +0.12% +0.10% +0.00% / +0.07% +0.41% +0.41%] index_add_ strided 7 : Elapsed 0.042 ms (4.185 ms / 100) 4.036 -> 4.037 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.50% +0.57%] index_copy_ strided 7 : Elapsed 0.040 ms (4.038 ms / 100) 4.156 -> 4.157 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.58% +0.41%] index_add_ perm : Elapsed 0.042 ms (4.157 ms / 100) 4.009 -> 4.012 ( +0.07%) [ +0.07% +0.05% +0.00% / +0.07% +0.60% +0.52%] index_copy_ perm : Elapsed 0.040 ms (4.012 ms / 100) 4.156 -> 4.159 ( +0.07%) [ +0.12% +0.02% +0.00% / +0.07% +0.53% +0.53%] index_add_ perm_sorted : Elapsed 0.042 ms (4.161 ms / 100) 4.011 -> 4.014 ( +0.07%) [ +0.05% +0.05% +0.00% / +0.07% +0.50% +0.47%] index_copy_ perm_sorted : Elapsed 0.040 ms (4.013 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.00% +0.05% +0.04% / +0.05% +0.05% +0.09%] index_select const : Elapsed 0.056 ms (5.563 ms / 100) 5.563 -> 5.573 ( +0.18%) [ +0.11% +0.16% +0.00% / +0.34% +0.18% +0.27%] index_select wrap : Elapsed 0.056 ms (5.569 ms / 100) 5.565 -> 5.565 ( +0.00%) [ +0.00% +0.07% +0.04% / +0.00% +0.25% +0.16%] index_select linear : Elapsed 0.056 ms (5.565 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.00% +0.14% +0.11% / +0.16% +0.13% +0.05%] index_select reverse : Elapsed 0.056 ms (5.564 ms / 100) 5.564 -> 5.559 ( -0.09%) [ +0.18% +0.04% +0.00% / +0.11% -0.09% -0.07%] index_select skip64 : Elapsed 0.056 ms (5.574 ms / 100) 5.567 -> 5.561 ( -0.11%) [ +0.05% +0.20% +0.00% / -0.04% -0.05% -0.11%] index_select skip256 : Elapsed 0.056 ms (5.570 ms / 100) 5.567 -> 5.565 ( -0.04%) [ +0.11% +0.00% +0.11% / +0.07% +0.11% -0.04%] index_select spread : Elapsed 0.056 ms (5.573 ms / 100) 5.565 -> 5.568 ( +0.05%) [ +0.04% +0.00% +0.00% / +0.05% +0.14% +0.09%] index_select strided 3 : Elapsed 0.056 ms (5.567 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.00% +0.11% +0.04% / -0.02% +0.23% +0.13%] index_select strided 5 : Elapsed 0.056 ms (5.562 ms / 100) 5.561 -> 5.567 ( +0.11%) [ +0.07% +0.00% +0.13% / +0.11% +0.27% +0.20%] index_select strided 7 : Elapsed 0.056 ms (5.565 ms / 100) 5.557 -> 5.563 ( +0.11%) [ +0.05% +0.00% +0.00% / +0.11% +0.16% +0.27%] index_select strided 8 : Elapsed 0.056 ms (5.560 ms / 100) 5.565 -> 5.567 ( +0.04%) [ +0.00% +0.02% +0.04% / +0.07% +0.04% +0.13%] index_select random : Elapsed 0.056 ms (5.565 ms / 100) 5.568 -> 5.567 ( -0.02%) [ +0.11% +0.13% +0.00% / -0.02% +0.04% +0.04%] index_select random_sorted : Elapsed 0.056 ms (5.574 ms / 100) B = [40, 20, 5, 4] (stride (4, 160, 3200, 1)) A = [16, 20, 5, 4] (stride (400, 20, 4, 1)) dim = 0 3.319 -> 3.321 ( +0.06%) [ +0.18% +0.00% +0.09% / +0.06% +0.78% +0.90%] index_add_ linear : Elapsed 0.033 ms (3.325 ms / 100) 3.193 -> 3.193 ( +0.00%) [ +0.16% +0.09% +0.00% / +0.00% +0.78% +0.94%] index_copy_ linear : Elapsed 0.032 ms (3.198 ms / 100) 3.323 -> 3.324 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.72% +0.69%] index_add_ reverse : Elapsed 0.033 ms (3.324 ms / 100) 3.191 -> 3.192 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.72% +0.69%] index_copy_ reverse : Elapsed 0.032 ms (3.192 ms / 100) 3.324 -> 3.323 ( -0.03%) [ +0.00% +0.06% +0.00% / -0.03% +0.66% +0.69%] index_add_ spread : Elapsed 0.033 ms (3.324 ms / 100) 3.201 -> 3.200 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.72% +0.62%] index_copy_ spread : Elapsed 0.032 ms (3.202 ms / 100) 3.317 -> 3.318 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.63% +0.66%] index_add_ strided 3 : Elapsed 0.033 ms (3.317 ms / 100) 3.190 -> 3.191 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.66% +0.66%] index_copy_ strided 3 : Elapsed 0.032 ms (3.190 ms / 100) 3.322 -> 3.322 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.72% +0.78%] index_add_ strided 7 : Elapsed 0.033 ms (3.323 ms / 100) 3.190 -> 3.190 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.75% +0.85%] index_copy_ strided 7 : Elapsed 0.032 ms (3.190 ms / 100) 3.317 -> 3.320 ( +0.09%) [ +0.24% +0.09% +0.00% / +0.09% +1.03% +0.90%] index_add_ perm : Elapsed 0.033 ms (3.325 ms / 100) 3.194 -> 3.194 ( +0.00%) [ +0.13% +0.06% +0.00% / +0.00% +0.94% +0.72%] index_copy_ perm : Elapsed 0.032 ms (3.198 ms / 100) 3.313 -> 3.314 ( +0.03%) [ +0.06% +0.09% +0.00% / +0.03% +0.81% +0.81%] index_add_ perm_sorted : Elapsed 0.033 ms (3.315 ms / 100) 3.186 -> 3.186 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.82% +0.85%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.187 ms / 100) 5.386 -> 5.386 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.02% +0.00% +0.07%] index_select const : Elapsed 0.054 ms (5.390 ms / 100) 5.395 -> 5.389 ( -0.11%) [ +0.00% +0.17% +0.11% / +0.07% -0.11% -0.11%] index_select wrap : Elapsed 0.054 ms (5.395 ms / 100) 5.398 -> 5.391 ( -0.13%) [ +0.11% +0.06% +0.00% / -0.04% +0.00% -0.13%] index_select linear : Elapsed 0.054 ms (5.404 ms / 100) 5.397 -> 5.394 ( -0.06%) [ +0.15% +0.00% +0.07% / -0.02% -0.04% -0.06%] index_select reverse : Elapsed 0.054 ms (5.405 ms / 100) 5.382 -> 5.385 ( +0.06%) [ +0.13% +0.11% +0.00% / +0.20% +0.15% +0.06%] index_select skip64 : Elapsed 0.054 ms (5.389 ms / 100) 5.388 -> 5.385 ( -0.06%) [ +0.02% +0.02% +0.00% / -0.06% +0.02% +0.11%] index_select skip256 : Elapsed 0.054 ms (5.389 ms / 100) 5.397 -> 5.393 ( -0.07%) [ +0.00% +0.04% +0.13% / +0.11% -0.07% +0.02%] index_select spread : Elapsed 0.054 ms (5.397 ms / 100) 5.398 -> 5.393 ( -0.09%) [ +0.17% +0.00% +0.09% / +0.04% -0.06% -0.09%] index_select strided 3 : Elapsed 0.054 ms (5.407 ms / 100) 5.396 -> 5.394 ( -0.04%) [ +0.02% +0.15% +0.00% / +0.06% -0.04% -0.02%] index_select strided 5 : Elapsed 0.054 ms (5.397 ms / 100) 5.396 -> 5.394 ( -0.04%) [ +0.06% +0.00% +0.00% / -0.04% +0.09% +0.02%] index_select strided 7 : Elapsed 0.054 ms (5.399 ms / 100) 5.387 -> 5.385 ( -0.04%) [ +0.00% +0.04% +0.00% / -0.04% +0.13% -0.04%] index_select strided 8 : Elapsed 0.054 ms (5.387 ms / 100) 5.397 -> 5.395 ( -0.04%) [ +0.09% +0.07% +0.00% / +0.04% -0.04% +0.06%] index_select random : Elapsed 0.054 ms (5.402 ms / 100) 5.394 -> 5.386 ( -0.15%) [ +0.00% +0.06% +0.13% / -0.15% +0.02% +0.09%] index_select random_sorted : Elapsed 0.054 ms (5.394 ms / 100) B = [40, 20, 5, 4] (stride (5, 200, 1, 4000)) A = [16, 20, 5, 4] (stride (100, 5, 1, 1600)) dim = 0 3.663 -> 3.663 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.60% +0.63%] index_add_ linear : Elapsed 0.037 ms (3.663 ms / 100) 3.531 -> 3.533 ( +0.06%) [ +0.00% +0.08% +0.08% / +0.06% +0.65% +0.71%] index_copy_ linear : Elapsed 0.035 ms (3.531 ms / 100) 3.668 -> 3.668 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.68% +0.65%] index_add_ reverse : Elapsed 0.037 ms (3.669 ms / 100) 3.533 -> 3.535 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.71% +0.74%] index_copy_ reverse : Elapsed 0.035 ms (3.535 ms / 100) 3.672 -> 3.677 ( +0.14%) [ +0.05% +0.08% +0.00% / +0.14% +0.54% +0.52%] index_add_ spread : Elapsed 0.037 ms (3.674 ms / 100) 3.560 -> 3.565 ( +0.14%) [ +0.00% +0.06% +0.00% / +0.14% +0.39% +0.39%] index_copy_ spread : Elapsed 0.036 ms (3.560 ms / 100) 3.671 -> 3.676 ( +0.14%) [ +0.08% +0.00% +0.05% / +0.14% +0.52% +0.54%] index_add_ strided 3 : Elapsed 0.037 ms (3.674 ms / 100) 3.542 -> 3.546 ( +0.11%) [ +0.00% +0.03% +0.08% / +0.11% +0.48% +0.48%] index_copy_ strided 3 : Elapsed 0.035 ms (3.542 ms / 100) 3.672 -> 3.672 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.44% +0.44%] index_add_ strided 7 : Elapsed 0.037 ms (3.672 ms / 100) 3.539 -> 3.540 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.40% +0.45%] index_copy_ strided 7 : Elapsed 0.035 ms (3.539 ms / 100) 3.666 -> 3.663 ( -0.08%) [ +0.05% +0.00% +0.05% / -0.08% +0.44% +0.41%] index_add_ perm : Elapsed 0.037 ms (3.668 ms / 100) 3.536 -> 3.531 ( -0.14%) [ +0.06% +0.00% +0.03% / -0.14% +0.42% +0.45%] index_copy_ perm : Elapsed 0.035 ms (3.538 ms / 100) 3.666 -> 3.665 ( -0.03%) [ +0.08% +0.00% +0.08% / -0.03% +0.41% +0.49%] index_add_ perm_sorted : Elapsed 0.037 ms (3.669 ms / 100) 3.538 -> 3.538 ( +0.00%) [ +0.03% +0.00% +0.06% / +0.00% +0.37% +0.42%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.539 ms / 100) 5.483 -> 5.479 ( -0.07%) [ +0.05% +0.00% +0.05% / -0.07% +0.05% +0.07%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.04% +0.00% +0.02% / +0.00% +0.00% +0.18%] index_select wrap : Elapsed 0.055 ms (5.493 ms / 100) 5.491 -> 5.491 ( +0.00%) [ +0.04% +0.11% +0.00% / +0.00% +0.18% +0.22%] index_select linear : Elapsed 0.055 ms (5.493 ms / 100) 5.486 -> 5.488 ( +0.04%) [ +0.07% +0.13% +0.00% / +0.04% +0.09% +0.27%] index_select reverse : Elapsed 0.055 ms (5.490 ms / 100) 5.481 -> 5.483 ( +0.04%) [ +0.00% +0.13% +0.13% / +0.04% +0.04% +0.13%] index_select skip64 : Elapsed 0.055 ms (5.481 ms / 100) 5.488 -> 5.482 ( -0.11%) [ +0.05% +0.07% +0.00% / -0.11% -0.09% -0.09%] index_select skip256 : Elapsed 0.055 ms (5.491 ms / 100) 5.485 -> 5.492 ( +0.13%) [ +0.00% +0.11% +0.09% / +0.16% +0.13% +0.24%] index_select spread : Elapsed 0.055 ms (5.485 ms / 100) 5.492 -> 5.490 ( -0.04%) [ +0.00% +0.02% +0.02% / -0.04% +0.20% +0.07%] index_select strided 3 : Elapsed 0.055 ms (5.492 ms / 100) 5.490 -> 5.489 ( -0.02%) [ +0.05% +0.00% +0.04% / -0.02% +0.09% +0.02%] index_select strided 5 : Elapsed 0.055 ms (5.493 ms / 100) 5.487 -> 5.492 ( +0.09%) [ +0.00% +0.05% +0.00% / +0.11% +0.09% +0.20%] index_select strided 7 : Elapsed 0.055 ms (5.487 ms / 100) 5.480 -> 5.482 ( +0.04%) [ +0.02% +0.05% +0.00% / +0.04% +0.15% +0.04%] index_select strided 8 : Elapsed 0.055 ms (5.481 ms / 100) 5.490 -> 5.484 ( -0.11%) [ +0.07% +0.07% +0.00% / -0.07% -0.04% -0.11%] index_select random : Elapsed 0.055 ms (5.494 ms / 100) 5.484 -> 5.489 ( +0.09%) [ +0.20% +0.00% +0.13% / +0.09% +0.18% +0.15%] index_select random_sorted : Elapsed 0.055 ms (5.495 ms / 100) B = [40, 20, 5, 4] (stride (20, 1, 800, 4000)) A = [16, 20, 5, 4] (stride (20, 1, 320, 1600)) dim = 0 4.280 -> 4.279 ( -0.02%) [ +0.00% +0.07% +0.00% / -0.02% +0.65% +0.65%] index_add_ linear : Elapsed 0.043 ms (4.280 ms / 100) 4.140 -> 4.141 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.65% +0.68%] index_copy_ linear : Elapsed 0.041 ms (4.142 ms / 100) 4.278 -> 4.279 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.94% +0.79%] index_add_ reverse : Elapsed 0.043 ms (4.278 ms / 100) 4.145 -> 4.147 ( +0.05%) [ +0.05% +0.00% +0.02% / +0.05% +1.16% +0.77%] index_copy_ reverse : Elapsed 0.041 ms (4.147 ms / 100) 4.267 -> 4.266 ( -0.02%) [ +0.00% +0.00% +0.00% / -0.02% +0.68% +0.75%] index_add_ spread : Elapsed 0.043 ms (4.267 ms / 100) 4.123 -> 4.125 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.87% +0.87%] index_copy_ spread : Elapsed 0.041 ms (4.125 ms / 100) 4.289 -> 4.291 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.05% +0.61% +0.56%] index_add_ strided 3 : Elapsed 0.043 ms (4.289 ms / 100) 4.129 -> 4.133 ( +0.10%) [ +0.07% +0.05% +0.00% / +0.10% +0.73% +0.73%] index_copy_ strided 3 : Elapsed 0.041 ms (4.132 ms / 100) 4.278 -> 4.280 ( +0.05%) [ +0.00% +0.05% +0.02% / +0.05% +0.79% +0.82%] index_add_ strided 7 : Elapsed 0.043 ms (4.278 ms / 100) 4.147 -> 4.147 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.77% +0.80%] index_copy_ strided 7 : Elapsed 0.041 ms (4.147 ms / 100) 4.281 -> 4.283 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.72% +0.72%] index_add_ perm : Elapsed 0.043 ms (4.283 ms / 100) 4.140 -> 4.141 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +0.77% +0.72%] index_copy_ perm : Elapsed 0.041 ms (4.142 ms / 100) 4.283 -> 4.284 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.79% +0.79%] index_add_ perm_sorted : Elapsed 0.043 ms (4.284 ms / 100) 4.127 -> 4.128 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.73% +0.82%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.127 ms / 100) 5.568 -> 5.566 ( -0.04%) [ +0.11% +0.00% +0.14% / +0.13% -0.04% +0.07%] index_select const : Elapsed 0.056 ms (5.574 ms / 100) 5.584 -> 5.577 ( -0.13%) [ +0.00% +0.02% +0.04% / +0.02% +0.05% -0.13%] index_select wrap : Elapsed 0.056 ms (5.584 ms / 100) 5.579 -> 5.580 ( +0.02%) [ +0.22% +0.00% +0.09% / +0.02% +0.05% +0.04%] index_select linear : Elapsed 0.056 ms (5.591 ms / 100) 5.578 -> 5.581 ( +0.05%) [ +0.00% +0.05% +0.22% / +0.05% +0.11% +0.14%] index_select reverse : Elapsed 0.056 ms (5.578 ms / 100) 5.564 -> 5.570 ( +0.11%) [ +0.18% +0.00% +0.11% / +0.11% +0.18% +0.29%] index_select skip64 : Elapsed 0.056 ms (5.574 ms / 100) 5.563 -> 5.571 ( +0.14%) [ +0.04% +0.00% +0.04% / +0.14% +0.20% +0.29%] index_select skip256 : Elapsed 0.056 ms (5.565 ms / 100) 5.578 -> 5.580 ( +0.04%) [ +0.00% +0.16% +0.11% / +0.11% +0.04% +0.07%] index_select spread : Elapsed 0.056 ms (5.578 ms / 100) 5.578 -> 5.576 ( -0.04%) [ +0.00% +0.14% +0.04% / +0.09% -0.02% -0.04%] index_select strided 3 : Elapsed 0.056 ms (5.578 ms / 100) 5.574 -> 5.575 ( +0.02%) [ +0.00% +0.14% +0.05% / +0.11% +0.14% +0.02%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.573 -> 5.577 ( +0.07%) [ +0.13% +0.23% +0.00% / +0.09% +0.07% +0.20%] index_select strided 7 : Elapsed 0.056 ms (5.580 ms / 100) 5.569 -> 5.571 ( +0.04%) [ +0.05% +0.00% +0.13% / +0.04% +0.07% +0.27%] index_select strided 8 : Elapsed 0.056 ms (5.572 ms / 100) 5.579 -> 5.579 ( +0.00%) [ +0.13% +0.16% +0.00% / +0.11% +0.00% +0.02%] index_select random : Elapsed 0.056 ms (5.586 ms / 100) 5.580 -> 5.573 ( -0.13%) [ +0.02% +0.09% +0.00% / -0.02% +0.02% -0.13%] index_select random_sorted : Elapsed 0.056 ms (5.581 ms / 100) B = [40, 20, 5, 4] (stride (1, 40, 800, 4000)) A = [16, 20, 5, 4] (stride (80, 4, 1280, 1)) dim = 0 4.003 -> 4.004 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.40% +0.45%] index_add_ linear : Elapsed 0.040 ms (4.003 ms / 100) 3.878 -> 3.879 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.39% +0.49%] index_copy_ linear : Elapsed 0.039 ms (3.878 ms / 100) 4.009 -> 4.010 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.57% +0.65%] index_add_ reverse : Elapsed 0.040 ms (4.010 ms / 100) 3.883 -> 3.888 ( +0.13%) [ +0.18% +0.13% +0.00% / +0.13% +0.72% +0.62%] index_copy_ reverse : Elapsed 0.039 ms (3.890 ms / 100) 4.019 -> 4.019 ( +0.00%) [ +0.10% +0.00% +0.10% / +0.00% +0.62% +0.52%] index_add_ spread : Elapsed 0.040 ms (4.023 ms / 100) 3.889 -> 3.890 ( +0.03%) [ +0.10% +0.00% +0.15% / +0.03% +0.67% +0.85%] index_copy_ spread : Elapsed 0.039 ms (3.893 ms / 100) 4.014 -> 4.013 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.50% +0.52%] index_add_ strided 3 : Elapsed 0.040 ms (4.014 ms / 100) 3.883 -> 3.881 ( -0.05%) [ +0.00% +0.15% +0.10% / -0.05% +0.57% +0.57%] index_copy_ strided 3 : Elapsed 0.039 ms (3.883 ms / 100) 4.013 -> 4.013 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.00% +0.47% +0.45%] index_add_ strided 7 : Elapsed 0.040 ms (4.014 ms / 100) 3.888 -> 3.887 ( -0.03%) [ +0.05% +0.08% +0.00% / -0.03% +0.46% +0.44%] index_copy_ strided 7 : Elapsed 0.039 ms (3.890 ms / 100) 3.998 -> 4.002 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.63% +0.63%] index_add_ perm : Elapsed 0.040 ms (4.002 ms / 100) 3.874 -> 3.874 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.67% +0.77%] index_copy_ perm : Elapsed 0.039 ms (3.874 ms / 100) 4.002 -> 4.002 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.50% +0.45%] index_add_ perm_sorted : Elapsed 0.040 ms (4.003 ms / 100) 3.876 -> 3.877 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.52% +0.52%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.877 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.05% +0.00% +0.00% / +0.04% +0.27% +0.16%] index_select const : Elapsed 0.056 ms (5.565 ms / 100) 5.568 -> 5.571 ( +0.05%) [ +0.05% +0.00% +0.07% / +0.05% +0.20% +0.14%] index_select wrap : Elapsed 0.056 ms (5.571 ms / 100) 5.572 -> 5.574 ( +0.04%) [ +0.11% +0.04% +0.00% / +0.04% +0.13% +0.25%] index_select linear : Elapsed 0.056 ms (5.578 ms / 100) 5.571 -> 5.574 ( +0.05%) [ +0.00% +0.02% +0.00% / +0.05% +0.16% +0.16%] index_select reverse : Elapsed 0.056 ms (5.571 ms / 100) 5.569 -> 5.558 ( -0.20%) [ +0.13% +0.07% +0.00% / -0.02% -0.02% -0.20%] index_select skip64 : Elapsed 0.056 ms (5.576 ms / 100) 5.565 -> 5.558 ( -0.13%) [ +0.00% +0.05% +0.00% / -0.05% -0.13% -0.13%] index_select skip256 : Elapsed 0.056 ms (5.565 ms / 100) 5.569 -> 5.570 ( +0.02%) [ +0.14% +0.00% +0.11% / +0.02% +0.11% +0.11%] index_select spread : Elapsed 0.056 ms (5.577 ms / 100) 5.567 -> 5.574 ( +0.13%) [ +0.00% +0.02% +0.20% / +0.13% +0.16% +0.13%] index_select strided 3 : Elapsed 0.056 ms (5.567 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.00% +0.18% +0.13%] index_select strided 5 : Elapsed 0.056 ms (5.576 ms / 100) 5.568 -> 5.565 ( -0.05%) [ +0.09% +0.00% +0.11% / -0.05% +0.14% +0.23%] index_select strided 7 : Elapsed 0.056 ms (5.573 ms / 100) 5.561 -> 5.557 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.09% +0.04%] index_select strided 8 : Elapsed 0.056 ms (5.565 ms / 100) 5.571 -> 5.572 ( +0.02%) [ +0.02% +0.11% +0.00% / +0.07% +0.02% +0.11%] index_select random : Elapsed 0.056 ms (5.572 ms / 100) 5.573 -> 5.573 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.04% +0.05% +0.00%] index_select random_sorted : Elapsed 0.056 ms (5.575 ms / 100) out_shape = [16, 40, 5, 4] in_shape = [16, 20, 5, 4] idx_dim = 1 B = [16, 40, 5, 4] (stride (800, 5, 1, 200)) A = [16, 20, 5, 4] (stride (400, 4, 80, 1)) dim = 1 2.447 -> 2.458 ( +0.45%) [ +0.25% +0.00% +0.04% / +0.45% +0.90% +0.86%] index_add_ linear : Elapsed 0.025 ms (2.453 ms / 100) 2.445 -> 2.457 ( +0.49%) [ +0.08% +0.20% +0.00% / +0.49% +0.94% +0.82%] index_copy_ linear : Elapsed 0.024 ms (2.447 ms / 100) 2.453 -> 2.461 ( +0.33%) [ +0.08% +0.20% +0.00% / +0.33% +0.73% +0.61%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.450 -> 2.458 ( +0.33%) [ +0.04% +0.00% +0.00% / +0.33% +0.78% +0.69%] index_copy_ reverse : Elapsed 0.025 ms (2.451 ms / 100) 2.471 -> 2.479 ( +0.32%) [ +0.00% +0.12% +0.00% / +0.45% +0.49% +0.32%] index_add_ spread : Elapsed 0.025 ms (2.471 ms / 100) 2.475 -> 2.486 ( +0.44%) [ +0.16% +0.00% +0.12% / +0.44% +0.81% +0.69%] index_copy_ spread : Elapsed 0.025 ms (2.479 ms / 100) 2.468 -> 2.472 ( +0.16%) [ +0.04% +0.04% +0.00% / +0.32% +0.49% +0.16%] index_add_ strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.470 -> 2.486 ( +0.65%) [ +0.40% +0.16% +0.00% / +0.65% +0.73% +0.69%] index_copy_ strided 3 : Elapsed 0.025 ms (2.480 ms / 100) 2.467 -> 2.474 ( +0.28%) [ +0.12% +0.00% +0.16% / +0.41% +0.28% +0.36%] index_add_ strided 7 : Elapsed 0.025 ms (2.470 ms / 100) 2.470 -> 2.484 ( +0.57%) [ +0.20% +0.16% +0.00% / +0.57% +0.65% +0.85%] index_copy_ strided 7 : Elapsed 0.025 ms (2.475 ms / 100) 2.461 -> 2.475 ( +0.57%) [ +0.04% +0.08% +0.00% / +0.61% +0.69% +0.57%] index_add_ perm : Elapsed 0.025 ms (2.462 ms / 100) 2.464 -> 2.475 ( +0.45%) [ +0.12% +0.04% +0.00% / +0.45% +0.65% +0.77%] index_copy_ perm : Elapsed 0.025 ms (2.467 ms / 100) 2.465 -> 2.475 ( +0.41%) [ +0.00% +0.00% +0.00% / +0.45% +0.41% +0.49%] index_add_ perm_sorted : Elapsed 0.025 ms (2.465 ms / 100) 2.466 -> 2.477 ( +0.45%) [ +0.00% +0.20% +0.08% / +0.57% +0.65% +0.45%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.466 ms / 100) 4.495 -> 4.492 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.11% +0.18%] index_select const : Elapsed 0.045 ms (4.498 ms / 100) 4.493 -> 4.500 ( +0.16%) [ +0.22% +0.13% +0.00% / +0.18% +0.33% +0.16%] index_select wrap : Elapsed 0.045 ms (4.503 ms / 100) 4.501 -> 4.500 ( -0.02%) [ +0.04% +0.02% +0.00% / -0.02% +0.07% +0.22%] index_select linear : Elapsed 0.045 ms (4.503 ms / 100) 4.498 -> 4.501 ( +0.07%) [ +0.22% +0.02% +0.00% / +0.07% +0.18% +0.31%] index_select reverse : Elapsed 0.045 ms (4.508 ms / 100) 4.494 -> 4.493 ( -0.02%) [ +0.09% +0.00% +0.09% / -0.02% +0.04% +0.27%] index_select skip64 : Elapsed 0.045 ms (4.498 ms / 100) 4.494 -> 4.493 ( -0.02%) [ +0.04% +0.00% +0.13% / +0.07% +0.13% -0.02%] index_select skip256 : Elapsed 0.045 ms (4.496 ms / 100) 4.503 -> 4.500 ( -0.07%) [ +0.09% +0.00% +0.02% / +0.07% -0.07% +0.07%] index_select spread : Elapsed 0.045 ms (4.507 ms / 100) 4.498 -> 4.502 ( +0.09%) [ +0.18% +0.00% +0.20% / +0.09% +0.38% +0.11%] index_select strided 3 : Elapsed 0.045 ms (4.506 ms / 100) 4.491 -> 4.499 ( +0.18%) [ +0.13% +0.07% +0.00% / +0.18% +0.20% +0.22%] index_select strided 5 : Elapsed 0.045 ms (4.497 ms / 100) 4.497 -> 4.505 ( +0.18%) [ +0.04% +0.04% +0.00% / +0.24% +0.18% +0.31%] index_select strided 7 : Elapsed 0.045 ms (4.499 ms / 100) 4.485 -> 4.497 ( +0.27%) [ +0.00% +0.29% +0.25% / +0.33% +0.27% +0.45%] index_select strided 8 : Elapsed 0.045 ms (4.485 ms / 100) 4.490 -> 4.492 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.38% +0.36%] index_select strided 16 : Elapsed 0.045 ms (4.490 ms / 100) 4.495 -> 4.502 ( +0.16%) [ +0.16% +0.09% +0.00% / +0.16% +0.24% +0.27%] index_select random : Elapsed 0.045 ms (4.502 ms / 100) 4.496 -> 4.504 ( +0.18%) [ +0.27% +0.02% +0.00% / +0.27% +0.18% +0.36%] index_select random_sorted : Elapsed 0.045 ms (4.508 ms / 100) B = [16, 40, 5, 4] (stride (800, 1, 40, 200)) A = [16, 20, 5, 4] (stride (20, 320, 4, 1)) dim = 1 2.411 -> 2.421 ( +0.41%) [ +0.17% +0.12% +0.00% / +0.41% +0.58% +0.66%] index_add_ linear : Elapsed 0.024 ms (2.415 ms / 100) 2.408 -> 2.422 ( +0.58%) [ +0.17% +0.00% +0.33% / +0.58% +0.66% +0.83%] index_copy_ linear : Elapsed 0.024 ms (2.412 ms / 100) 2.406 -> 2.419 ( +0.54%) [ +0.00% +0.04% +0.12% / +0.54% +0.87% +1.08%] index_add_ reverse : Elapsed 0.024 ms (2.406 ms / 100) 2.403 -> 2.417 ( +0.58%) [ +0.17% +0.00% +0.00% / +0.58% +0.92% +1.08%] index_copy_ reverse : Elapsed 0.024 ms (2.407 ms / 100) 2.416 -> 2.429 ( +0.54%) [ +0.04% +0.17% +0.00% / +0.54% +0.99% +1.08%] index_add_ spread : Elapsed 0.024 ms (2.417 ms / 100) 2.424 -> 2.439 ( +0.62%) [ +0.00% +0.12% +0.12% / +0.62% +0.99% +1.20%] index_copy_ spread : Elapsed 0.024 ms (2.424 ms / 100) 2.423 -> 2.433 ( +0.41%) [ +0.21% +0.00% +0.25% / +0.50% +0.54% +0.41%] index_add_ strided 3 : Elapsed 0.024 ms (2.428 ms / 100) 2.430 -> 2.439 ( +0.37%) [ +0.00% +0.00% +0.12% / +0.37% +0.78% +0.49%] index_copy_ strided 3 : Elapsed 0.024 ms (2.430 ms / 100) 2.427 -> 2.436 ( +0.37%) [ +0.08% +0.00% +0.12% / +0.37% +0.41% +0.49%] index_add_ strided 7 : Elapsed 0.024 ms (2.429 ms / 100) 2.425 -> 2.440 ( +0.62%) [ +0.21% +0.00% +0.12% / +0.62% +0.78% +0.78%] index_copy_ strided 7 : Elapsed 0.024 ms (2.430 ms / 100) 2.428 -> 2.428 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.41% +0.12% +0.00%] index_add_ perm : Elapsed 0.024 ms (2.428 ms / 100) 2.433 -> 2.440 ( +0.29%) [ +0.21% +0.04% +0.00% / +0.45% +0.37% +0.29%] index_copy_ perm : Elapsed 0.024 ms (2.438 ms / 100) 2.427 -> 2.432 ( +0.21%) [ +0.04% +0.08% +0.00% / +0.62% +0.21% +0.33%] index_add_ perm_sorted : Elapsed 0.024 ms (2.428 ms / 100) 2.431 -> 2.438 ( +0.29%) [ +0.29% +0.00% +0.12% / +0.58% +0.33% +0.29%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.438 ms / 100) 4.432 -> 4.431 ( -0.02%) [ +0.00% +0.09% +0.02% / -0.02% +0.09% -0.02%] index_select const : Elapsed 0.044 ms (4.432 ms / 100) 4.441 -> 4.443 ( +0.05%) [ +0.09% +0.00% +0.00% / +0.05% +0.16% +0.07%] index_select wrap : Elapsed 0.044 ms (4.445 ms / 100) 4.443 -> 4.441 ( -0.05%) [ +0.07% +0.00% +0.02% / -0.05% -0.02% +0.11%] index_select linear : Elapsed 0.044 ms (4.446 ms / 100) 4.442 -> 4.440 ( -0.05%) [ +0.00% +0.00% +0.02% / -0.05% +0.00% +0.02%] index_select reverse : Elapsed 0.044 ms (4.442 ms / 100) 4.433 -> 4.433 ( +0.00%) [ +0.00% +0.00% +0.20% / +0.14% +0.05% +0.00%] index_select skip64 : Elapsed 0.044 ms (4.433 ms / 100) 4.434 -> 4.432 ( -0.05%) [ +0.07% +0.00% +0.09% / -0.05% +0.00% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.437 ms / 100) 4.437 -> 4.441 ( +0.09%) [ +0.16% +0.00% +0.05% / +0.09% +0.20% +0.25%] index_select spread : Elapsed 0.044 ms (4.444 ms / 100) 4.444 -> 4.442 ( -0.05%) [ +0.00% +0.11% +0.02% / -0.05% -0.02% +0.02%] index_select strided 3 : Elapsed 0.044 ms (4.444 ms / 100) 4.435 -> 4.432 ( -0.07%) [ +0.00% +0.25% +0.11% / +0.05% -0.07% +0.07%] index_select strided 5 : Elapsed 0.044 ms (4.435 ms / 100) 4.438 -> 4.445 ( +0.16%) [ +0.00% +0.05% +0.14% / +0.18% +0.16% +0.16%] index_select strided 7 : Elapsed 0.044 ms (4.438 ms / 100) 4.437 -> 4.436 ( -0.02%) [ +0.09% +0.05% +0.00% / +0.02% -0.02% +0.00%] index_select strided 8 : Elapsed 0.044 ms (4.441 ms / 100) 4.434 -> 4.430 ( -0.09%) [ +0.00% +0.20% +0.14% / +0.20% -0.09% +0.07%] index_select strided 16 : Elapsed 0.044 ms (4.434 ms / 100) 4.444 -> 4.441 ( -0.07%) [ +0.02% +0.11% +0.00% / +0.07% -0.07% +0.11%] index_select random : Elapsed 0.044 ms (4.445 ms / 100) 4.440 -> 4.441 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.05% +0.02% +0.02%] index_select random_sorted : Elapsed 0.044 ms (4.441 ms / 100) B = [16, 40, 5, 4] (stride (800, 1, 40, 200)) A = [16, 20, 5, 4] (stride (20, 320, 1, 5)) dim = 1 2.451 -> 2.465 ( +0.57%) [ +0.12% +0.24% +0.00% / +0.57% +0.65% +0.57%] index_add_ linear : Elapsed 0.025 ms (2.454 ms / 100) 2.454 -> 2.464 ( +0.41%) [ +0.12% +0.20% +0.00% / +0.41% +0.65% +0.49%] index_copy_ linear : Elapsed 0.025 ms (2.457 ms / 100) 2.460 -> 2.466 ( +0.24%) [ +0.04% +0.00% +0.04% / +0.24% +0.37% +0.37%] index_add_ reverse : Elapsed 0.025 ms (2.461 ms / 100) 2.457 -> 2.464 ( +0.28%) [ +0.04% +0.04% +0.00% / +0.28% +0.45% +0.53%] index_copy_ reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.470 -> 2.477 ( +0.28%) [ +0.00% +0.00% +0.08% / +0.45% +0.28% +0.49%] index_add_ spread : Elapsed 0.025 ms (2.470 ms / 100) 2.473 -> 2.486 ( +0.53%) [ +0.00% +0.12% +0.20% / +0.57% +0.53% +0.65%] index_copy_ spread : Elapsed 0.025 ms (2.473 ms / 100) 2.468 -> 2.475 ( +0.28%) [ +0.08% +0.04% +0.00% / +0.32% +0.32% +0.28%] index_add_ strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.471 -> 2.481 ( +0.40%) [ +0.04% +0.16% +0.00% / +0.49% +0.49% +0.40%] index_copy_ strided 3 : Elapsed 0.025 ms (2.472 ms / 100) 2.465 -> 2.475 ( +0.41%) [ +0.00% +0.41% +0.24% / +0.41% +0.45% +0.53%] index_add_ strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.471 -> 2.479 ( +0.32%) [ +0.12% +0.00% +0.00% / +0.45% +0.45% +0.32%] index_copy_ strided 7 : Elapsed 0.025 ms (2.474 ms / 100) 2.470 -> 2.474 ( +0.16%) [ +0.00% +0.08% +0.04% / +0.16% +0.36% +0.45%] index_add_ perm : Elapsed 0.025 ms (2.470 ms / 100) 2.470 -> 2.482 ( +0.49%) [ +0.00% +0.16% +0.08% / +0.49% +0.65% +0.61%] index_copy_ perm : Elapsed 0.025 ms (2.470 ms / 100) 2.467 -> 2.476 ( +0.36%) [ +0.00% +0.08% +0.04% / +0.36% +0.45% +0.53%] index_add_ perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) 2.468 -> 2.482 ( +0.57%) [ +0.00% +0.20% +0.24% / +0.57% +0.89% +0.89%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.468 ms / 100) 4.496 -> 4.495 ( -0.02%) [ +0.09% +0.00% +0.02% / -0.02% +0.09% +0.07%] index_select const : Elapsed 0.045 ms (4.500 ms / 100) 4.503 -> 4.502 ( -0.02%) [ +0.00% +0.02% +0.07% / -0.02% +0.16% +0.20%] index_select wrap : Elapsed 0.045 ms (4.503 ms / 100) 4.498 -> 4.503 ( +0.11%) [ +0.22% +0.09% +0.00% / +0.11% +0.33% +0.20%] index_select linear : Elapsed 0.045 ms (4.508 ms / 100) 4.498 -> 4.501 ( +0.07%) [ +0.00% +0.24% +0.33% / +0.31% +0.07% +0.09%] index_select reverse : Elapsed 0.045 ms (4.498 ms / 100) 4.497 -> 4.492 ( -0.11%) [ +0.20% +0.16% +0.00% / +0.00% -0.11% -0.02%] index_select skip64 : Elapsed 0.045 ms (4.506 ms / 100) 4.488 -> 4.494 ( +0.13%) [ +0.13% +0.00% +0.16% / +0.13% +0.18% +0.13%] index_select skip256 : Elapsed 0.045 ms (4.494 ms / 100) 4.504 -> 4.498 ( -0.13%) [ +0.02% +0.02% +0.00% / -0.13% +0.02% +0.04%] index_select spread : Elapsed 0.045 ms (4.505 ms / 100) 4.495 -> 4.505 ( +0.22%) [ +0.20% +0.27% +0.00% / +0.22% +0.29% +0.27%] index_select strided 3 : Elapsed 0.045 ms (4.504 ms / 100) 4.500 -> 4.502 ( +0.04%) [ +0.16% +0.00% +0.04% / +0.04% +0.07% +0.27%] index_select strided 5 : Elapsed 0.045 ms (4.507 ms / 100) 4.499 -> 4.498 ( -0.02%) [ +0.04% +0.00% +0.00% / -0.02% +0.18% +0.09%] index_select strided 7 : Elapsed 0.045 ms (4.501 ms / 100) 4.494 -> 4.497 ( +0.07%) [ +0.13% +0.00% +0.07% / +0.16% +0.11% +0.07%] index_select strided 8 : Elapsed 0.045 ms (4.500 ms / 100) 4.492 -> 4.498 ( +0.13%) [ +0.09% +0.00% +0.11% / +0.13% +0.31% +0.24%] index_select strided 16 : Elapsed 0.045 ms (4.496 ms / 100) 4.505 -> 4.496 ( -0.20%) [ +0.04% +0.00% +0.02% / -0.20% +0.09% +0.04%] index_select random : Elapsed 0.045 ms (4.507 ms / 100) 4.503 -> 4.496 ( -0.16%) [ +0.02% +0.00% +0.00% / -0.16% +0.00% +0.07%] index_select random_sorted : Elapsed 0.045 ms (4.504 ms / 100) B = [16, 40, 5, 4] (stride (4, 320, 64, 1)) A = [16, 20, 5, 4] (stride (100, 5, 1, 1600)) dim = 1 2.443 -> 2.454 ( +0.45%) [ +0.00% +0.12% +0.08% / +0.45% +1.11% +1.02%] index_add_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.441 -> 2.458 ( +0.70%) [ +0.04% +0.12% +0.00% / +0.70% +0.98% +1.19%] index_copy_ linear : Elapsed 0.024 ms (2.442 ms / 100) 2.437 -> 2.451 ( +0.57%) [ +0.00% +0.04% +0.08% / +0.57% +1.31% +1.15%] index_add_ reverse : Elapsed 0.024 ms (2.437 ms / 100) 2.435 -> 2.453 ( +0.74%) [ +0.21% +0.00% +0.25% / +0.74% +1.52% +1.36%] index_copy_ reverse : Elapsed 0.024 ms (2.440 ms / 100) 2.441 -> 2.454 ( +0.53%) [ +0.16% +0.00% +0.04% / +0.53% +1.07% +1.07%] index_add_ spread : Elapsed 0.024 ms (2.445 ms / 100) 2.438 -> 2.451 ( +0.53%) [ +0.00% +0.16% +0.12% / +0.53% +1.19% +1.23%] index_copy_ spread : Elapsed 0.024 ms (2.438 ms / 100) 2.447 -> 2.460 ( +0.53%) [ +0.29% +0.00% +0.08% / +0.53% +0.69% +0.61%] index_add_ strided 3 : Elapsed 0.025 ms (2.454 ms / 100) 2.445 -> 2.455 ( +0.41%) [ +0.08% +0.00% +0.08% / +0.41% +0.74% +0.86%] index_copy_ strided 3 : Elapsed 0.024 ms (2.447 ms / 100) 2.452 -> 2.462 ( +0.41%) [ +0.00% +0.12% +0.04% / +0.49% +0.41% +0.65%] index_add_ strided 7 : Elapsed 0.025 ms (2.452 ms / 100) 2.449 -> 2.461 ( +0.49%) [ +0.00% +0.00% +0.12% / +0.49% +0.69% +0.61%] index_copy_ strided 7 : Elapsed 0.024 ms (2.449 ms / 100) 2.448 -> 2.458 ( +0.41%) [ +0.00% +0.29% +0.12% / +0.61% +0.41% +0.57%] index_add_ perm : Elapsed 0.024 ms (2.448 ms / 100) 2.448 -> 2.460 ( +0.49%) [ +0.00% +0.00% +0.08% / +0.69% +0.61% +0.49%] index_copy_ perm : Elapsed 0.024 ms (2.448 ms / 100) 2.452 -> 2.458 ( +0.24%) [ +0.16% +0.24% +0.00% / +0.61% +0.29% +0.24%] index_add_ perm_sorted : Elapsed 0.025 ms (2.456 ms / 100) 2.448 -> 2.459 ( +0.45%) [ +0.08% +0.16% +0.00% / +0.65% +0.53% +0.45%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.450 ms / 100) 4.494 -> 4.495 ( +0.02%) [ +0.11% +0.00% +0.07% / +0.09% +0.13% +0.02%] index_select const : Elapsed 0.045 ms (4.499 ms / 100) 4.501 -> 4.507 ( +0.13%) [ +0.33% +0.13% +0.00% / +0.27% +0.13% +0.20%] index_select wrap : Elapsed 0.045 ms (4.516 ms / 100) 4.509 -> 4.510 ( +0.02%) [ +0.11% +0.00% +0.04% / +0.02% +0.04% +0.04%] index_select linear : Elapsed 0.045 ms (4.514 ms / 100) 4.510 -> 4.502 ( -0.18%) [ +0.11% +0.18% +0.00% / -0.16% -0.18% +0.07%] index_select reverse : Elapsed 0.045 ms (4.515 ms / 100) 4.499 -> 4.494 ( -0.11%) [ +0.09% +0.11% +0.00% / +0.00% -0.04% -0.11%] index_select skip64 : Elapsed 0.045 ms (4.503 ms / 100) 4.495 -> 4.491 ( -0.09%) [ +0.11% +0.00% +0.09% / +0.09% -0.09% +0.09%] index_select skip256 : Elapsed 0.045 ms (4.500 ms / 100) 4.506 -> 4.504 ( -0.04%) [ +0.00% +0.04% +0.09% / -0.04% -0.02% +0.18%] index_select spread : Elapsed 0.045 ms (4.506 ms / 100) 4.504 -> 4.506 ( +0.04%) [ +0.09% +0.11% +0.00% / +0.22% +0.04% +0.09%] index_select strided 3 : Elapsed 0.045 ms (4.508 ms / 100) 4.498 -> 4.501 ( +0.07%) [ +0.29% +0.00% +0.20% / +0.11% +0.07% +0.11%] index_select strided 5 : Elapsed 0.045 ms (4.511 ms / 100) 4.506 -> 4.506 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.04% +0.22%] index_select strided 7 : Elapsed 0.045 ms (4.507 ms / 100) 4.502 -> 4.500 ( -0.04%) [ +0.00% +0.07% +0.04% / +0.13% -0.04% +0.07%] index_select strided 8 : Elapsed 0.045 ms (4.502 ms / 100) 4.505 -> 4.498 ( -0.16%) [ +0.00% +0.04% +0.02% / +0.04% -0.16% +0.02%] index_select strided 16 : Elapsed 0.045 ms (4.505 ms / 100) 4.508 -> 4.508 ( +0.00%) [ +0.20% +0.00% +0.22% / +0.20% +0.04% +0.00%] index_select random : Elapsed 0.045 ms (4.517 ms / 100) 4.508 -> 4.504 ( -0.09%) [ +0.02% +0.00% +0.13% / -0.02% +0.02% -0.09%] index_select random_sorted : Elapsed 0.045 ms (4.509 ms / 100) B = [16, 40, 5, 4] (stride (40, 1, 2560, 640)) A = [16, 20, 5, 4] (stride (1, 80, 16, 1600)) dim = 1 2.397 -> 2.409 ( +0.50%) [ +0.00% +0.13% +0.08% / +0.50% +0.67% +0.67%] index_add_ linear : Elapsed 0.024 ms (2.397 ms / 100) 2.403 -> 2.415 ( +0.50%) [ +0.00% +0.12% +0.08% / +0.50% +0.71% +0.62%] index_copy_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.402 -> 2.408 ( +0.25%) [ +0.08% +0.00% +0.17% / +0.25% +0.37% +0.50%] index_add_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.403 -> 2.414 ( +0.46%) [ +0.00% +0.25% +0.08% / +0.46% +0.58% +0.67%] index_copy_ reverse : Elapsed 0.024 ms (2.403 ms / 100) 2.412 -> 2.423 ( +0.46%) [ +0.08% +0.12% +0.00% / +0.50% +0.50% +0.46%] index_add_ spread : Elapsed 0.024 ms (2.414 ms / 100) 2.425 -> 2.435 ( +0.41%) [ +0.16% +0.04% +0.00% / +0.49% +0.41% +0.49%] index_copy_ spread : Elapsed 0.024 ms (2.429 ms / 100) 2.413 -> 2.421 ( +0.33%) [ +0.08% +0.25% +0.00% / +0.54% +0.33% +0.37%] index_add_ strided 3 : Elapsed 0.024 ms (2.415 ms / 100) 2.419 -> 2.433 ( +0.58%) [ +0.21% +0.08% +0.00% / +0.70% +0.79% +0.58%] index_copy_ strided 3 : Elapsed 0.024 ms (2.424 ms / 100) 2.413 -> 2.423 ( +0.41%) [ +0.17% +0.04% +0.00% / +0.46% +0.41% +0.46%] index_add_ strided 7 : Elapsed 0.024 ms (2.417 ms / 100) 2.421 -> 2.433 ( +0.50%) [ +0.08% +0.00% +0.12% / +0.50% +0.74% +0.50%] index_copy_ strided 7 : Elapsed 0.024 ms (2.423 ms / 100) 2.412 -> 2.424 ( +0.50%) [ +0.00% +0.25% +0.04% / +0.66% +0.50% +0.75%] index_add_ perm : Elapsed 0.024 ms (2.412 ms / 100) 2.423 -> 2.431 ( +0.33%) [ +0.00% +0.04% +0.08% / +0.33% +0.66% +0.66%] index_copy_ perm : Elapsed 0.024 ms (2.423 ms / 100) 2.411 -> 2.420 ( +0.37%) [ +0.17% +0.12% +0.00% / +0.37% +0.75% +0.37%] index_add_ perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) 2.419 -> 2.431 ( +0.50%) [ +0.00% +0.12% +0.04% / +0.50% +0.74% +0.74%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) 4.422 -> 4.424 ( +0.05%) [ +0.02% +0.14% +0.00% / +0.09% +0.11% +0.05%] index_select const : Elapsed 0.044 ms (4.423 ms / 100) 4.419 -> 4.428 ( +0.20%) [ +0.34% +0.11% +0.00% / +0.20% +0.29% +0.27%] index_select wrap : Elapsed 0.044 ms (4.434 ms / 100) 4.425 -> 4.429 ( +0.09%) [ +0.00% +0.14% +0.00% / +0.14% +0.09% +0.23%] index_select linear : Elapsed 0.044 ms (4.425 ms / 100) 4.426 -> 4.435 ( +0.20%) [ +0.07% +0.11% +0.00% / +0.25% +0.20% +0.25%] index_select reverse : Elapsed 0.044 ms (4.429 ms / 100) 4.422 -> 4.412 ( -0.23%) [ +0.00% +0.02% +0.09% / +0.00% -0.23% +0.00%] index_select skip64 : Elapsed 0.044 ms (4.422 ms / 100) 4.421 -> 4.422 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.11% +0.16% +0.02%] index_select skip256 : Elapsed 0.044 ms (4.424 ms / 100) 4.424 -> 4.430 ( +0.14%) [ +0.05% +0.00% +0.02% / +0.27% +0.14% +0.29%] index_select spread : Elapsed 0.044 ms (4.426 ms / 100) 4.425 -> 4.429 ( +0.09%) [ +0.11% +0.00% +0.11% / +0.11% +0.25% +0.09%] index_select strided 3 : Elapsed 0.044 ms (4.430 ms / 100) 4.423 -> 4.421 ( -0.05%) [ +0.00% +0.09% +0.02% / +0.07% +0.07% -0.05%] index_select strided 5 : Elapsed 0.044 ms (4.423 ms / 100) 4.423 -> 4.419 ( -0.09%) [ +0.29% +0.16% +0.00% / -0.09% +0.16% +0.09%] index_select strided 7 : Elapsed 0.044 ms (4.436 ms / 100) 4.422 -> 4.421 ( -0.02%) [ +0.05% +0.00% +0.05% / +0.02% -0.02% +0.05%] index_select strided 8 : Elapsed 0.044 ms (4.424 ms / 100) 4.418 -> 4.426 ( +0.18%) [ +0.07% +0.00% +0.14% / +0.18% +0.23% +0.20%] index_select strided 16 : Elapsed 0.044 ms (4.421 ms / 100) 4.422 -> 4.428 ( +0.14%) [ +0.23% +0.00% +0.14% / +0.18% +0.14% +0.32%] index_select random : Elapsed 0.044 ms (4.432 ms / 100) 4.421 -> 4.426 ( +0.11%) [ +0.00% +0.09% +0.18% / +0.11% +0.16% +0.20%] index_select random_sorted : Elapsed 0.044 ms (4.421 ms / 100) out_shape = [16, 20, 40, 4] in_shape = [16, 20, 5, 4] idx_dim = 2 B = [16, 20, 40, 4] (stride (3200, 160, 1, 40)) A = [16, 20, 5, 4] (stride (20, 1, 320, 1600)) dim = 2 0.629 -> 0.629 ( +0.00%) [ +0.32% +0.32% +0.00% / +0.00% +0.64% +0.16%] index_add_ linear : Elapsed 0.006 ms (0.631 ms / 100) 0.646 -> 0.648 ( +0.31%) [ +0.62% +0.46% +0.00% / +0.31% +0.46% +0.31%] index_copy_ linear : Elapsed 0.006 ms (0.650 ms / 100) 0.629 -> 0.630 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.64% +0.48%] index_add_ reverse : Elapsed 0.006 ms (0.630 ms / 100) 0.646 -> 0.647 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.31% +0.46%] index_copy_ reverse : Elapsed 0.006 ms (0.647 ms / 100) 0.630 -> 0.631 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.32% +0.48%] index_add_ spread : Elapsed 0.006 ms (0.631 ms / 100) 0.647 -> 0.647 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.15% +0.15%] index_copy_ spread : Elapsed 0.006 ms (0.647 ms / 100) 0.630 -> 0.630 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.48% +0.32%] index_add_ strided 3 : Elapsed 0.006 ms (0.630 ms / 100) 0.647 -> 0.646 ( -0.15%) [ +0.00% +0.46% +0.15% / -0.15% +0.15% +0.15%] index_copy_ strided 3 : Elapsed 0.006 ms (0.647 ms / 100) 0.630 -> 0.629 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.48% +0.48%] index_add_ strided 7 : Elapsed 0.006 ms (0.630 ms / 100) 0.645 -> 0.646 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.47% +0.62%] index_copy_ strided 7 : Elapsed 0.006 ms (0.645 ms / 100) 0.631 -> 0.632 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.32% +0.16%] index_add_ perm : Elapsed 0.006 ms (0.631 ms / 100) 0.648 -> 0.648 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.00% +0.00%] index_copy_ perm : Elapsed 0.006 ms (0.648 ms / 100) 0.630 -> 0.631 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.48% +0.48%] index_add_ perm_sorted : Elapsed 0.006 ms (0.631 ms / 100) 0.647 -> 0.648 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.15% +0.15%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.648 ms / 100) 4.897 -> 4.797 ( -2.04%) [ +0.00% +0.25% +0.02% / -1.90% -1.98% -2.04%] index_select const : Elapsed 0.049 ms (4.897 ms / 100) 4.910 -> 4.830 ( -1.63%) [ +0.00% +0.02% +0.10% / -1.34% -1.63% -1.61%] index_select wrap : Elapsed 0.049 ms (4.910 ms / 100) 4.905 -> 4.820 ( -1.73%) [ +0.00% +0.14% +0.18% / -1.47% -1.73% -1.73%] index_select linear : Elapsed 0.049 ms (4.905 ms / 100) 4.892 -> 4.827 ( -1.33%) [ +0.00% +0.00% +0.00% / -1.00% -1.33% -1.25%] index_select reverse : Elapsed 0.049 ms (4.892 ms / 100) 4.897 -> 4.791 ( -2.16%) [ +0.04% +0.00% +0.18% / -1.88% -2.16% -2.02%] index_select skip64 : Elapsed 0.049 ms (4.899 ms / 100) 4.899 -> 4.802 ( -1.98%) [ +0.00% +0.22% +0.10% / -1.98% -1.90% -1.90%] index_select skip256 : Elapsed 0.049 ms (4.899 ms / 100) 4.920 -> 4.814 ( -2.15%) [ +0.02% +0.00% +0.00% / -2.15% -1.87% -1.85%] index_select spread : Elapsed 0.049 ms (4.921 ms / 100) 4.914 -> 4.833 ( -1.65%) [ +0.18% +0.04% +0.00% / -1.61% -1.65% -1.65%] index_select strided 3 : Elapsed 0.049 ms (4.923 ms / 100) 4.914 -> 4.833 ( -1.65%) [ +0.08% +0.08% +0.00% / -1.53% -1.65% -1.59%] index_select random : Elapsed 0.049 ms (4.918 ms / 100) 4.918 -> 4.826 ( -1.87%) [ +0.00% +0.08% +0.04% / -1.87% -1.75% -1.73%] index_select random_sorted : Elapsed 0.049 ms (4.918 ms / 100) B = [16, 20, 40, 4] (stride (40, 2560, 1, 640)) A = [16, 20, 5, 4] (stride (4, 64, 1280, 1)) dim = 2 1.518 -> 1.519 ( +0.07%) [ +0.00% +0.07% +0.13% / +0.07% +0.59% +0.59%] index_add_ linear : Elapsed 0.015 ms (1.518 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.68% +0.68%] index_copy_ linear : Elapsed 0.015 ms (1.473 ms / 100) 1.519 -> 1.518 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.59% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.475 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.75% +0.68%] index_copy_ reverse : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.519 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.53%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.68% +0.68%] index_copy_ spread : Elapsed 0.015 ms (1.474 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.59% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.68% +0.75%] index_copy_ strided 3 : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.519 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.00% +0.59% +0.53%] index_add_ strided 7 : Elapsed 0.015 ms (1.521 ms / 100) 1.474 -> 1.473 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.61% +0.61%] index_copy_ strided 7 : Elapsed 0.015 ms (1.474 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.75% +0.68%] index_copy_ perm : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.68% +0.75%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.474 ms / 100) 8.564 -> 8.570 ( +0.07%) [ +0.30% +0.00% +0.00% / +0.14% +0.07% +0.13%] index_select const : Elapsed 0.086 ms (8.590 ms / 100) 8.578 -> 8.586 ( +0.09%) [ +0.00% +0.20% +0.17% / +0.17% +0.09% +0.28%] index_select wrap : Elapsed 0.086 ms (8.578 ms / 100) 8.576 -> 8.578 ( +0.02%) [ +0.23% +0.38% +0.00% / +0.02% +0.34% +0.21%] index_select linear : Elapsed 0.086 ms (8.596 ms / 100) 8.569 -> 8.579 ( +0.12%) [ +0.16% +0.00% +0.11% / +0.12% +0.21% +0.35%] index_select reverse : Elapsed 0.086 ms (8.583 ms / 100) 8.564 -> 8.568 ( +0.05%) [ +0.00% +0.16% +0.14% / +0.05% +0.43% +0.18%] index_select skip64 : Elapsed 0.086 ms (8.564 ms / 100) 8.577 -> 8.574 ( -0.03%) [ +0.10% +0.00% +0.17% / -0.02% +0.10% -0.03%] index_select skip256 : Elapsed 0.086 ms (8.586 ms / 100) 8.578 -> 8.594 ( +0.19%) [ +0.13% +0.00% +0.03% / +0.20% +0.23% +0.19%] index_select spread : Elapsed 0.086 ms (8.589 ms / 100) 8.578 -> 8.585 ( +0.08%) [ +0.00% +0.31% +0.33% / +0.08% +0.12% +0.24%] index_select strided 3 : Elapsed 0.086 ms (8.578 ms / 100) 8.584 -> 8.587 ( +0.03%) [ +0.09% +0.00% +0.21% / +0.03% +0.36% +0.30%] index_select random : Elapsed 0.086 ms (8.592 ms / 100) 8.582 -> 8.584 ( +0.02%) [ +0.06% +0.00% +0.14% / +0.02% +0.33% +0.47%] index_select random_sorted : Elapsed 0.086 ms (8.587 ms / 100) B = [16, 20, 40, 4] (stride (1, 64, 1280, 16)) A = [16, 20, 5, 4] (stride (400, 1, 80, 20)) dim = 2 1.520 -> 1.520 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.39% +0.46%] index_add_ linear : Elapsed 0.015 ms (1.521 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.34% +0.47%] index_copy_ linear : Elapsed 0.015 ms (1.477 ms / 100) 1.518 -> 1.520 ( +0.13%) [ +0.33% +0.20% +0.00% / +0.13% +0.66% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.475 -> 1.474 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.34% +0.34%] index_copy_ reverse : Elapsed 0.015 ms (1.475 ms / 100) 1.520 -> 1.520 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.46% +0.53%] index_add_ spread : Elapsed 0.015 ms (1.521 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.41% +0.54%] index_copy_ spread : Elapsed 0.015 ms (1.476 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.46% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.522 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.41% +0.54%] index_copy_ strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.520 -> 1.520 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.46% +0.46%] index_add_ strided 7 : Elapsed 0.015 ms (1.520 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.41% +0.47%] index_copy_ strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.519 -> 1.520 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.53% +0.59%] index_add_ perm : Elapsed 0.015 ms (1.521 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.41% +0.47%] index_copy_ perm : Elapsed 0.015 ms (1.476 ms / 100) 1.520 -> 1.519 ( -0.07%) [ +0.20% +0.00% +0.00% / -0.07% +0.53% +0.39%] index_add_ perm_sorted : Elapsed 0.015 ms (1.523 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.14% +0.00% +0.07% / +0.00% +0.47% +0.47%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.477 ms / 100) 8.516 -> 8.511 ( -0.06%) [ +0.11% +0.00% +0.06% / -0.02% +0.23% -0.06%] index_select const : Elapsed 0.085 ms (8.525 ms / 100) 8.538 -> 8.552 ( +0.16%) [ +0.00% +0.23% +0.00% / +0.19% +0.16% +0.25%] index_select wrap : Elapsed 0.085 ms (8.538 ms / 100) 8.539 -> 8.543 ( +0.05%) [ +0.12% +0.00% +0.11% / +0.05% +0.21% +0.07%] index_select linear : Elapsed 0.085 ms (8.549 ms / 100) 8.521 -> 8.536 ( +0.18%) [ +0.00% +0.34% +0.15% / +0.45% +0.18% +0.34%] index_select reverse : Elapsed 0.085 ms (8.521 ms / 100) 8.496 -> 8.506 ( +0.12%) [ +0.00% +0.29% +0.40% / +0.12% +0.42% +0.18%] index_select skip64 : Elapsed 0.085 ms (8.496 ms / 100) 8.513 -> 8.512 ( -0.01%) [ +0.20% +0.09% +0.00% / -0.01% +0.12% +0.14%] index_select skip256 : Elapsed 0.085 ms (8.530 ms / 100) 8.531 -> 8.537 ( +0.07%) [ +0.42% +0.15% +0.00% / +0.07% +0.25% +0.18%] index_select spread : Elapsed 0.086 ms (8.567 ms / 100) 8.540 -> 8.540 ( +0.00%) [ +0.28% +0.27% +0.00% / +0.00% +0.00% +0.05%] index_select strided 3 : Elapsed 0.086 ms (8.564 ms / 100) 8.537 -> 8.532 ( -0.06%) [ +0.08% +0.13% +0.00% / -0.06% +0.28% +0.19%] index_select random : Elapsed 0.085 ms (8.544 ms / 100) 8.531 -> 8.533 ( +0.02%) [ +0.00% +0.36% +0.11% / +0.02% +0.35% +0.13%] index_select random_sorted : Elapsed 0.085 ms (8.531 ms / 100) B = [16, 20, 40, 4] (stride (20, 1, 1280, 320)) A = [16, 20, 5, 4] (stride (1, 320, 64, 16)) dim = 2 1.469 -> 1.470 ( +0.07%) [ +0.00% +0.14% +0.00% / +0.07% +0.68% +0.82%] index_add_ linear : Elapsed 0.015 ms (1.469 ms / 100) 1.417 -> 1.419 ( +0.14%) [ +0.21% +0.21% +0.00% / +0.14% +0.64% +0.64%] index_copy_ linear : Elapsed 0.014 ms (1.420 ms / 100) 1.475 -> 1.474 ( -0.07%) [ +0.00% +0.00% +0.14% / -0.07% +0.68% +0.61%] index_add_ reverse : Elapsed 0.015 ms (1.475 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.00% +0.28% +0.00% / +0.00% +0.70% +0.77%] index_copy_ reverse : Elapsed 0.014 ms (1.421 ms / 100) 1.474 -> 1.476 ( +0.14%) [ +0.00% +0.07% +0.14% / +0.14% +0.68% +0.75%] index_add_ spread : Elapsed 0.015 ms (1.474 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.70% +0.70%] index_copy_ spread : Elapsed 0.014 ms (1.421 ms / 100) 1.469 -> 1.471 ( +0.14%) [ +0.20% +0.07% +0.00% / +0.14% +0.68% +0.75%] index_add_ strided 3 : Elapsed 0.015 ms (1.472 ms / 100) 1.418 -> 1.420 ( +0.14%) [ +0.14% +0.00% +0.14% / +0.14% +0.56% +0.49%] index_copy_ strided 3 : Elapsed 0.014 ms (1.420 ms / 100) 1.471 -> 1.470 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.61% +0.61%] index_add_ strided 7 : Elapsed 0.015 ms (1.471 ms / 100) 1.419 -> 1.420 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.56% +0.49%] index_copy_ strided 7 : Elapsed 0.014 ms (1.419 ms / 100) 1.470 -> 1.469 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.75% +0.82%] index_add_ perm : Elapsed 0.015 ms (1.471 ms / 100) 1.419 -> 1.419 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.56% +0.56%] index_copy_ perm : Elapsed 0.014 ms (1.420 ms / 100) 1.469 -> 1.471 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.88% +0.82%] index_add_ perm_sorted : Elapsed 0.015 ms (1.471 ms / 100) 1.420 -> 1.421 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.42% +0.49%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.420 ms / 100) 8.169 -> 8.189 ( +0.24%) [ +0.00% +0.45% +0.27% / +0.24% +0.31% +0.27%] index_select const : Elapsed 0.082 ms (8.169 ms / 100) 8.188 -> 8.200 ( +0.15%) [ +0.00% +0.05% +0.06% / +0.15% +0.27% +0.43%] index_select wrap : Elapsed 0.082 ms (8.188 ms / 100) 8.191 -> 8.184 ( -0.09%) [ +0.04% +0.02% +0.00% / -0.09% +0.02% +0.00%] index_select linear : Elapsed 0.082 ms (8.194 ms / 100) 8.188 -> 8.178 ( -0.12%) [ +0.21% +0.00% +0.04% / -0.12% +0.37% +0.46%] index_select reverse : Elapsed 0.082 ms (8.205 ms / 100) 8.172 -> 8.179 ( +0.09%) [ +0.00% +0.16% +0.07% / +0.09% +0.29% +0.29%] index_select skip64 : Elapsed 0.082 ms (8.172 ms / 100) 8.183 -> 8.183 ( +0.00%) [ +0.43% +0.07% +0.00% / +0.00% +0.16% +0.29%] index_select skip256 : Elapsed 0.082 ms (8.218 ms / 100) 8.181 -> 8.197 ( +0.20%) [ +0.04% +0.00% +0.21% / +0.21% +0.21% +0.20%] index_select spread : Elapsed 0.082 ms (8.184 ms / 100) 8.187 -> 8.188 ( +0.01%) [ +0.07% +0.13% +0.00% / +0.01% +0.50% +0.13%] index_select strided 3 : Elapsed 0.082 ms (8.193 ms / 100) 8.177 -> 8.183 ( +0.07%) [ +0.28% +0.02% +0.00% / +0.07% +0.50% +0.40%] index_select random : Elapsed 0.082 ms (8.200 ms / 100) 8.187 -> 8.203 ( +0.20%) [ +0.11% +0.00% +0.32% / +0.20% +0.35% +0.48%] index_select random_sorted : Elapsed 0.082 ms (8.196 ms / 100) B = [16, 20, 40, 4] (stride (20, 1, 1280, 320)) A = [16, 20, 5, 4] (stride (1, 16, 1280, 320)) dim = 2 1.475 -> 1.475 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.34% +0.47%] index_add_ linear : Elapsed 0.015 ms (1.475 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.00% +0.07% +0.49% / +0.14% +0.42% +0.49%] index_copy_ linear : Elapsed 0.014 ms (1.421 ms / 100) 1.478 -> 1.477 ( -0.07%) [ +0.00% +0.00% +0.14% / -0.07% +0.27% +0.27%] index_add_ reverse : Elapsed 0.015 ms (1.478 ms / 100) 1.425 -> 1.426 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.42% +0.35%] index_copy_ reverse : Elapsed 0.014 ms (1.426 ms / 100) 1.481 -> 1.482 ( +0.07%) [ +0.20% +0.00% +0.07% / +0.07% +0.20% +0.20%] index_add_ spread : Elapsed 0.015 ms (1.484 ms / 100) 1.428 -> 1.428 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.28% +0.49%] index_copy_ spread : Elapsed 0.014 ms (1.428 ms / 100) 1.479 -> 1.482 ( +0.20%) [ +0.07% +0.00% +0.20% / +0.20% +0.34% +0.27%] index_add_ strided 3 : Elapsed 0.015 ms (1.480 ms / 100) 1.426 -> 1.429 ( +0.21%) [ +0.00% +0.07% +0.07% / +0.21% +0.35% +0.42%] index_copy_ strided 3 : Elapsed 0.014 ms (1.426 ms / 100) 1.477 -> 1.476 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.47% +0.41%] index_add_ strided 7 : Elapsed 0.015 ms (1.478 ms / 100) 1.424 -> 1.425 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.49% +0.42%] index_copy_ strided 7 : Elapsed 0.014 ms (1.424 ms / 100) 1.474 -> 1.473 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.47% +0.54%] index_add_ perm : Elapsed 0.015 ms (1.474 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.49% +0.42%] index_copy_ perm : Elapsed 0.014 ms (1.421 ms / 100) 1.477 -> 1.480 ( +0.20%) [ +0.27% +0.00% +0.20% / +0.20% +0.34% +0.41%] index_add_ perm_sorted : Elapsed 0.015 ms (1.481 ms / 100) 1.426 -> 1.428 ( +0.14%) [ +0.07% +0.00% +0.14% / +0.14% +0.35% +0.56%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.427 ms / 100) 8.169 -> 8.183 ( +0.17%) [ +0.20% +0.00% +0.11% / +0.17% +0.43% +0.38%] index_select const : Elapsed 0.082 ms (8.185 ms / 100) 8.196 -> 8.191 ( -0.06%) [ +0.00% +0.13% +0.12% / +0.15% +0.11% -0.06%] index_select wrap : Elapsed 0.082 ms (8.196 ms / 100) 8.187 -> 8.200 ( +0.16%) [ +0.00% +0.15% +0.20% / +0.16% +0.29% +0.31%] index_select linear : Elapsed 0.082 ms (8.187 ms / 100) 8.192 -> 8.187 ( -0.06%) [ +0.20% +0.02% +0.00% / -0.06% +0.37% +0.40%] index_select reverse : Elapsed 0.082 ms (8.208 ms / 100) 8.181 -> 8.177 ( -0.05%) [ +0.15% +0.22% +0.00% / -0.05% +0.21% +0.06%] index_select skip64 : Elapsed 0.082 ms (8.193 ms / 100) 8.180 -> 8.186 ( +0.07%) [ +0.00% +0.01% +0.13% / +0.16% +0.07% +0.48%] index_select skip256 : Elapsed 0.082 ms (8.180 ms / 100) 8.196 -> 8.194 ( -0.02%) [ +0.00% +0.02% +0.16% / -0.02% +0.16% +0.38%] index_select spread : Elapsed 0.082 ms (8.196 ms / 100) 8.197 -> 8.187 ( -0.12%) [ +0.16% +0.00% +0.04% / -0.12% +0.37% +0.30%] index_select strided 3 : Elapsed 0.082 ms (8.210 ms / 100) 8.190 -> 8.198 ( +0.10%) [ +0.11% +0.00% +0.12% / +0.18% +0.43% +0.10%] index_select random : Elapsed 0.082 ms (8.199 ms / 100) 8.190 -> 8.197 ( +0.09%) [ +0.06% +0.00% +0.21% / +0.09% +0.29% +0.09%] index_select random_sorted : Elapsed 0.082 ms (8.195 ms / 100) B = [16, 20, 40, 4] (stride (20, 1, 1280, 320)) A = [16, 20, 5, 4] (stride (1, 16, 320, 1600)) dim = 2 1.474 -> 1.475 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +0.75% +0.61%] index_add_ linear : Elapsed 0.015 ms (1.475 ms / 100) 1.424 -> 1.423 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.63% +0.63%] index_copy_ linear : Elapsed 0.014 ms (1.424 ms / 100) 1.482 -> 1.481 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.67% +0.74%] index_add_ reverse : Elapsed 0.015 ms (1.482 ms / 100) 1.430 -> 1.431 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.70% +0.63%] index_copy_ reverse : Elapsed 0.014 ms (1.432 ms / 100) 1.480 -> 1.477 ( -0.20%) [ +0.00% +0.14% +0.14% / -0.20% +0.54% +0.47%] index_add_ spread : Elapsed 0.015 ms (1.480 ms / 100) 1.426 -> 1.425 ( -0.07%) [ +0.00% +0.07% +0.14% / -0.07% +0.77% +0.70%] index_copy_ spread : Elapsed 0.014 ms (1.426 ms / 100) 1.475 -> 1.475 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.68% +0.68%] index_add_ strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.424 -> 1.422 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.63% +0.84%] index_copy_ strided 3 : Elapsed 0.014 ms (1.424 ms / 100) 1.472 -> 1.474 ( +0.14%) [ +0.00% +0.14% +0.14% / +0.14% +0.68% +0.68%] index_add_ strided 7 : Elapsed 0.015 ms (1.472 ms / 100) 1.420 -> 1.422 ( +0.14%) [ +0.00% +0.07% +0.42% / +0.14% +0.77% +0.70%] index_copy_ strided 7 : Elapsed 0.014 ms (1.420 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.61% +0.68%] index_add_ perm : Elapsed 0.015 ms (1.474 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.77% +0.63%] index_copy_ perm : Elapsed 0.014 ms (1.421 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.14% +0.00% +0.07% / +0.07% +0.75% +0.81%] index_add_ perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.77% +0.77%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 8.179 -> 8.184 ( +0.06%) [ +0.17% +0.02% +0.00% / +0.17% +0.06% +0.26%] index_select const : Elapsed 0.082 ms (8.193 ms / 100) 8.181 -> 8.197 ( +0.20%) [ +0.00% +0.10% +0.18% / +0.20% +0.38% +0.35%] index_select wrap : Elapsed 0.082 ms (8.181 ms / 100) 8.175 -> 8.175 ( +0.00%) [ +0.00% +0.27% +0.26% / +0.00% +0.48% +0.51%] index_select linear : Elapsed 0.082 ms (8.175 ms / 100) 8.182 -> 8.179 ( -0.04%) [ +0.00% +0.16% +0.09% / -0.04% +0.61% +0.23%] index_select reverse : Elapsed 0.082 ms (8.182 ms / 100) 8.170 -> 8.192 ( +0.27%) [ +0.18% +0.00% +0.27% / +0.27% +0.39% +0.35%] index_select skip64 : Elapsed 0.082 ms (8.185 ms / 100) 8.188 -> 8.178 ( -0.12%) [ +0.02% +0.00% +0.01% / -0.12% +0.11% +0.09%] index_select skip256 : Elapsed 0.082 ms (8.190 ms / 100) 8.192 -> 8.186 ( -0.07%) [ +0.01% +0.09% +0.00% / -0.07% +0.38% +0.17%] index_select spread : Elapsed 0.082 ms (8.193 ms / 100) 8.182 -> 8.181 ( -0.01%) [ +0.02% +0.27% +0.00% / -0.01% +0.20% +0.51%] index_select strided 3 : Elapsed 0.082 ms (8.184 ms / 100) 8.183 -> 8.189 ( +0.07%) [ +0.13% +0.00% +0.04% / +0.07% +0.38% +0.49%] index_select random : Elapsed 0.082 ms (8.194 ms / 100) 8.184 -> 8.183 ( -0.01%) [ +0.32% +0.01% +0.00% / -0.01% +0.44% +0.20%] index_select random_sorted : Elapsed 0.082 ms (8.210 ms / 100) B = [16, 20, 40, 4] (stride (800, 40, 1, 12800)) A = [16, 20, 5, 4] (stride (400, 1, 20, 100)) dim = 2 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.35% +0.35%] index_add_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.22% +0.15%] index_copy_ linear : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.42% +0.49%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.51% +0.15% +0.00% / +0.15% +0.36% +0.29%] index_copy_ reverse : Elapsed 0.014 ms (1.384 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.35% +0.42%] index_add_ spread : Elapsed 0.014 ms (1.423 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.36% +0.00% +0.22% / +0.15% +0.36% +0.44%] index_copy_ spread : Elapsed 0.014 ms (1.382 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.63% +0.42%] index_add_ strided 3 : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +0.29% +0.22%] index_copy_ strided 3 : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.91% +0.49%] index_add_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.376 ( -0.22%) [ +0.15% +0.00% +0.15% / -0.22% +0.36% +0.51%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.49% +0.49%] index_add_ perm : Elapsed 0.014 ms (1.423 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.29% +0.36%] index_copy_ perm : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.42% +0.42%] index_add_ perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.376 ( -0.29%) [ +0.00% +0.22% +0.00% / -0.29% +0.22% +0.14%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 8.225 -> 8.242 ( +0.21%) [ +0.27% +0.21% +0.00% / +0.21% +0.52% +0.50%] index_select const : Elapsed 0.082 ms (8.247 ms / 100) 8.251 -> 8.263 ( +0.15%) [ +0.36% +0.00% +0.32% / +0.23% +0.15% +0.30%] index_select wrap : Elapsed 0.083 ms (8.281 ms / 100) 8.254 -> 8.255 ( +0.01%) [ +0.00% +0.13% +0.10% / +0.02% +0.01% +0.21%] index_select linear : Elapsed 0.083 ms (8.254 ms / 100) 8.253 -> 8.270 ( +0.21%) [ +0.21% +0.08% +0.00% / +0.29% +0.21% +0.28%] index_select reverse : Elapsed 0.083 ms (8.270 ms / 100) 8.239 -> 8.233 ( -0.07%) [ +0.13% +0.23% +0.00% / -0.07% +0.05% -0.04%] index_select skip64 : Elapsed 0.083 ms (8.250 ms / 100) 8.241 -> 8.233 ( -0.10%) [ +0.00% +0.24% +0.00% / -0.10% +0.30% +0.06%] index_select skip256 : Elapsed 0.082 ms (8.241 ms / 100) 8.245 -> 8.264 ( +0.23%) [ +0.16% +0.32% +0.00% / +0.23% +0.57% +0.32%] index_select spread : Elapsed 0.083 ms (8.258 ms / 100) 8.254 -> 8.261 ( +0.08%) [ +0.02% +0.00% +0.22% / +0.08% +0.12% +0.12%] index_select strided 3 : Elapsed 0.083 ms (8.256 ms / 100) 8.268 -> 8.254 ( -0.17%) [ +0.00% +0.18% +0.04% / +0.11% -0.17% -0.02%] index_select random : Elapsed 0.083 ms (8.268 ms / 100) 8.252 -> 8.271 ( +0.23%) [ +0.19% +0.00% +0.10% / +0.23% +0.27% +0.47%] index_select random_sorted : Elapsed 0.083 ms (8.268 ms / 100) B = [16, 20, 40, 4] (stride (1, 640, 16, 12800)) A = [16, 20, 5, 4] (stride (20, 1, 320, 1600)) dim = 2 1.421 -> 1.423 ( +0.14%) [ +0.00% +0.35% +0.07% / +0.14% +0.70% +0.70%] index_add_ linear : Elapsed 0.014 ms (1.421 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.29% +0.00% +0.00% / +0.07% +0.58% +0.65%] index_copy_ linear : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.28% +0.00% +0.07% / +0.07% +0.56% +0.56%] index_add_ reverse : Elapsed 0.014 ms (1.426 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.07% +0.00% +0.15% / +0.15% +0.44% +0.36%] index_copy_ reverse : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.63% +0.98%] index_add_ spread : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.377 ( -0.22%) [ +0.00% +0.22% +0.00% / -0.22% +0.29% +0.36%] index_copy_ spread : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.70% +0.63%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.379 ( -0.07%) [ +0.00% +0.14% +0.07% / -0.07% +0.29% +0.29%] index_copy_ strided 3 : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.21% +0.00% +0.07% / +0.07% +0.70% +0.63%] index_add_ strided 7 : Elapsed 0.014 ms (1.424 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +0.51% +0.51%] index_copy_ strided 7 : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.70% +0.70%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.378 -> 1.377 ( -0.07%) [ +0.00% +0.15% +0.29% / -0.07% +0.51% +0.51%] index_copy_ perm : Elapsed 0.014 ms (1.378 ms / 100) 1.423 -> 1.421 ( -0.14%) [ +0.00% +0.07% +0.00% / -0.14% +0.56% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) 1.376 -> 1.379 ( +0.22%) [ +0.15% +0.29% +0.00% / +0.22% +0.65% +0.65%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.378 ms / 100) 8.217 -> 8.207 ( -0.12%) [ +0.15% +0.05% +0.00% / +0.00% -0.12% -0.09%] index_select const : Elapsed 0.082 ms (8.229 ms / 100) 8.242 -> 8.235 ( -0.08%) [ +0.00% +0.12% +0.08% / -0.08% +0.00% +0.15%] index_select wrap : Elapsed 0.082 ms (8.242 ms / 100) 8.241 -> 8.243 ( +0.02%) [ +0.10% +0.04% +0.00% / +0.02% +0.22% +0.04%] index_select linear : Elapsed 0.082 ms (8.249 ms / 100) 8.225 -> 8.236 ( +0.13%) [ +0.00% +0.21% +0.12% / +0.13% +0.34% +0.23%] index_select reverse : Elapsed 0.082 ms (8.225 ms / 100) 8.205 -> 8.214 ( +0.11%) [ +0.04% +0.11% +0.00% / +0.13% +0.17% +0.11%] index_select skip64 : Elapsed 0.082 ms (8.208 ms / 100) 8.195 -> 8.203 ( +0.10%) [ +0.00% +0.22% +0.21% / +0.10% +0.33% +0.20%] index_select skip256 : Elapsed 0.082 ms (8.195 ms / 100) 8.233 -> 8.227 ( -0.07%) [ +0.21% +0.24% +0.00% / +0.12% +0.09% -0.07%] index_select spread : Elapsed 0.083 ms (8.250 ms / 100) 8.229 -> 8.249 ( +0.24%) [ +0.15% +0.22% +0.00% / +0.24% +0.35% +0.39%] index_select strided 3 : Elapsed 0.082 ms (8.241 ms / 100) 8.236 -> 8.253 ( +0.21%) [ +0.12% +0.00% +0.15% / +0.35% +0.21% +0.23%] index_select random : Elapsed 0.082 ms (8.246 ms / 100) 8.246 -> 8.237 ( -0.11%) [ +0.07% +0.00% +0.02% / +0.00% +0.19% -0.11%] index_select random_sorted : Elapsed 0.083 ms (8.252 ms / 100) out_shape = [16, 20, 5, 40] in_shape = [16, 20, 5, 4] idx_dim = 3 B = [16, 20, 5, 40] (stride (4000, 40, 800, 1)) A = [16, 20, 5, 4] (stride (400, 1, 20, 100)) dim = 3 1.316 -> 1.315 ( -0.08%) [ +0.15% +0.15% +0.00% / -0.08% +0.38% +0.38%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.63% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.276 ms / 100) 1.316 -> 1.313 ( -0.23%) [ +0.08% +0.23% +0.00% / -0.23% +0.30% +0.38%] index_add_ reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.276 -> 1.276 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.71% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.327 -> 1.328 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.23% +0.08%] index_add_ spread : Elapsed 0.013 ms (1.327 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.16% +0.16% +0.08%] index_copy_ spread : Elapsed 0.013 ms (1.287 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.61% +0.68%] index_add_ strided 3 : Elapsed 0.013 ms (1.318 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.31% +0.08% / +0.08% +0.86% +0.78%] index_copy_ strided 3 : Elapsed 0.013 ms (1.277 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.30% +0.23% +0.00% / +0.08% +0.68% +0.76%] index_add_ strided 7 : Elapsed 0.013 ms (1.322 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.47% +0.55%] index_copy_ strided 7 : Elapsed 0.013 ms (1.281 ms / 100) 1.324 -> 1.326 ( +0.15%) [ +0.23% +0.38% +0.00% / +0.15% +0.23% +0.38%] index_add_ perm : Elapsed 0.013 ms (1.327 ms / 100) 1.286 -> 1.288 ( +0.16%) [ +0.00% +0.08% +0.08% / +0.16% +0.47% +0.16%] index_copy_ perm : Elapsed 0.013 ms (1.286 ms / 100) 1.326 -> 1.327 ( +0.08%) [ +0.15% +0.00% +0.08% / +0.23% +0.08% +0.23%] index_add_ perm_sorted : Elapsed 0.013 ms (1.328 ms / 100) 1.286 -> 1.288 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.23% +0.16%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.288 ms / 100) 9.206 -> 9.213 ( +0.08%) [ +0.00% +0.15% +0.16% / +0.08% +0.49% +0.28%] index_select const : Elapsed 0.092 ms (9.206 ms / 100) 9.238 -> 9.231 ( -0.08%) [ +0.17% +0.00% +0.04% / -0.08% +0.24% +0.04%] index_select wrap : Elapsed 0.093 ms (9.254 ms / 100) 9.221 -> 9.242 ( +0.23%) [ +0.00% +0.02% +0.05% / +0.23% +0.35% +0.34%] index_select linear : Elapsed 0.092 ms (9.221 ms / 100) 9.229 -> 9.238 ( +0.10%) [ +0.09% +0.04% +0.00% / +0.10% +0.15% +0.16%] index_select reverse : Elapsed 0.092 ms (9.237 ms / 100) 9.212 -> 9.223 ( +0.12%) [ +0.00% +0.08% +0.24% / +0.12% +0.22% +0.29%] index_select skip64 : Elapsed 0.092 ms (9.212 ms / 100) 9.217 -> 9.230 ( +0.14%) [ +0.17% +0.00% +0.08% / +0.18% +0.14% +0.29%] index_select skip256 : Elapsed 0.092 ms (9.233 ms / 100) 9.230 -> 9.251 ( +0.23%) [ +0.00% +0.10% +0.09% / +0.23% +0.29% +0.24%] index_select spread : Elapsed 0.092 ms (9.230 ms / 100) 9.241 -> 9.263 ( +0.24%) [ +0.00% +0.01% +0.15% / +0.29% +0.24% +0.28%] index_select strided 3 : Elapsed 0.092 ms (9.241 ms / 100) 9.228 -> 9.246 ( +0.20%) [ +0.20% +0.29% +0.00% / +0.28% +0.20% +0.55%] index_select random : Elapsed 0.092 ms (9.246 ms / 100) 9.242 -> 9.225 ( -0.18%) [ +0.00% +0.01% +0.11% / -0.18% +0.24% +0.29%] index_select random_sorted : Elapsed 0.092 ms (9.242 ms / 100) B = [16, 20, 5, 40] (stride (200, 3200, 1, 5)) A = [16, 20, 5, 4] (stride (4, 320, 64, 1)) dim = 3 1.320 -> 1.320 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.61% +0.68%] index_add_ linear : Elapsed 0.013 ms (1.322 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.00% +0.00% +0.16% / +0.16% +0.55% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.272 ms / 100) 1.320 -> 1.318 ( -0.15%) [ +0.15% +0.00% +0.08% / -0.15% +0.45% +0.61%] index_add_ reverse : Elapsed 0.013 ms (1.322 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.39% +0.71%] index_copy_ reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.319 -> 1.317 ( -0.15%) [ +0.00% +0.08% +0.00% / -0.15% +0.38% +0.61%] index_add_ spread : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.47%] index_copy_ spread : Elapsed 0.013 ms (1.275 ms / 100) 1.322 -> 1.322 ( +0.00%) [ +0.15% +0.00% +0.76% / +0.00% +0.45% +0.53%] index_add_ strided 3 : Elapsed 0.013 ms (1.324 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.16% +0.00% +0.63% / +0.08% +0.63% +0.55%] index_copy_ strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.325 -> 1.325 ( +0.00%) [ +0.30% +0.08% +0.00% / +0.00% +0.30% +0.45%] index_add_ strided 7 : Elapsed 0.013 ms (1.329 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.39% +0.39%] index_copy_ strided 7 : Elapsed 0.013 ms (1.280 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.30% +0.38%] index_add_ perm : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.24% +0.16% / +0.00% +0.47% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.274 ms / 100) 1.322 -> 1.320 ( -0.15%) [ +0.00% +0.00% +0.08% / -0.15% +0.30% +0.53%] index_add_ perm_sorted : Elapsed 0.013 ms (1.322 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.08% +0.00% +0.16% / +0.16% +0.47% +1.10%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.275 ms / 100) 8.798 -> 8.798 ( +0.00%) [ +0.03% +0.09% +0.00% / +0.01% +0.14% +0.00%] index_select const : Elapsed 0.088 ms (8.801 ms / 100) 8.784 -> 8.787 ( +0.03%) [ +0.13% +0.24% +0.00% / +0.17% +0.03% +0.16%] index_select wrap : Elapsed 0.088 ms (8.795 ms / 100) 8.785 -> 8.781 ( -0.05%) [ +0.15% +0.00% +0.01% / +0.24% -0.05% +0.07%] index_select linear : Elapsed 0.088 ms (8.798 ms / 100) 8.790 -> 8.784 ( -0.07%) [ +0.18% +0.00% +0.11% / -0.07% +0.05% +0.03%] index_select reverse : Elapsed 0.088 ms (8.806 ms / 100) 8.795 -> 8.794 ( -0.01%) [ +0.00% +0.06% +0.07% / +0.00% -0.01% +0.02%] index_select skip64 : Elapsed 0.088 ms (8.795 ms / 100) 8.792 -> 8.797 ( +0.06%) [ +0.11% +0.00% +0.01% / +0.06% +0.27% +0.06%] index_select skip256 : Elapsed 0.088 ms (8.802 ms / 100) 8.789 -> 8.795 ( +0.07%) [ +0.00% +0.24% +0.13% / +0.07% +0.22% +0.26%] index_select spread : Elapsed 0.088 ms (8.789 ms / 100) 8.800 -> 8.779 ( -0.24%) [ +0.00% +0.02% +0.07% / +0.02% +0.16% -0.24%] index_select strided 3 : Elapsed 0.088 ms (8.800 ms / 100) 8.792 -> 8.794 ( +0.02%) [ +0.00% +0.14% +0.24% / +0.02% +0.26% +0.07%] index_select random : Elapsed 0.088 ms (8.792 ms / 100) 8.785 -> 8.789 ( +0.05%) [ +0.15% +0.00% +0.07% / +0.05% +0.18% +0.22%] index_select random_sorted : Elapsed 0.088 ms (8.798 ms / 100) B = [16, 20, 5, 40] (stride (20, 1, 12800, 320)) A = [16, 20, 5, 4] (stride (1, 320, 16, 80)) dim = 3 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.94%] index_add_ linear : Elapsed 0.013 ms (1.278 ms / 100) 1.230 -> 1.230 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.73% +1.30%] index_copy_ linear : Elapsed 0.012 ms (1.230 ms / 100) 1.268 -> 1.269 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.71% +0.63%] index_add_ reverse : Elapsed 0.013 ms (1.268 ms / 100) 1.222 -> 1.223 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.65% +0.65%] index_copy_ reverse : Elapsed 0.012 ms (1.222 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.24% +0.00% +0.08% / +0.00% +0.47% +0.71%] index_add_ spread : Elapsed 0.013 ms (1.272 ms / 100) 1.229 -> 1.229 ( +0.00%) [ +0.49% +0.00% +0.00% / +0.00% +0.57% +0.65%] index_copy_ spread : Elapsed 0.012 ms (1.235 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.78% +1.10%] index_add_ strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.229 -> 1.230 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.81% +1.06%] index_copy_ strided 3 : Elapsed 0.012 ms (1.229 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +1.57% +0.86%] index_add_ strided 7 : Elapsed 0.013 ms (1.275 ms / 100) 1.232 -> 1.233 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.32% +0.41%] index_copy_ strided 7 : Elapsed 0.012 ms (1.233 ms / 100) 1.268 -> 1.269 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.71% +0.63%] index_add_ perm : Elapsed 0.013 ms (1.269 ms / 100) 1.229 -> 1.229 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.57% +0.49%] index_copy_ perm : Elapsed 0.012 ms (1.229 ms / 100) 1.267 -> 1.268 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.79% +0.87%] index_add_ perm_sorted : Elapsed 0.013 ms (1.267 ms / 100) 1.223 -> 1.225 ( +0.16%) [ +0.00% +0.16% +0.08% / +0.16% +0.57% +0.49%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.223 ms / 100) 8.701 -> 8.703 ( +0.02%) [ +0.00% +0.05% +0.01% / +0.07% +0.02% +0.17%] index_select const : Elapsed 0.087 ms (8.701 ms / 100) 8.722 -> 8.714 ( -0.09%) [ +0.01% +0.19% +0.00% / -0.09% -0.01% -0.01%] index_select wrap : Elapsed 0.087 ms (8.723 ms / 100) 8.712 -> 8.702 ( -0.11%) [ +0.00% +0.09% +0.14% / +0.08% -0.11% +0.17%] index_select linear : Elapsed 0.087 ms (8.712 ms / 100) 8.708 -> 8.719 ( +0.13%) [ +0.31% +0.00% +0.11% / +0.13% +0.17% +0.13%] index_select reverse : Elapsed 0.087 ms (8.735 ms / 100) 8.703 -> 8.700 ( -0.03%) [ +0.00% +0.14% +0.11% / +0.28% -0.03% +0.20%] index_select skip64 : Elapsed 0.087 ms (8.703 ms / 100) 8.713 -> 8.708 ( -0.06%) [ +0.05% +0.00% +0.03% / +0.11% +0.16% -0.06%] index_select skip256 : Elapsed 0.087 ms (8.717 ms / 100) 8.729 -> 8.712 ( -0.19%) [ +0.01% +0.10% +0.00% / +0.22% +0.14% -0.19%] index_select spread : Elapsed 0.087 ms (8.730 ms / 100) 8.730 -> 8.717 ( -0.15%) [ +0.06% +0.00% +0.05% / -0.03% -0.15% -0.05%] index_select strided 3 : Elapsed 0.087 ms (8.735 ms / 100) 8.719 -> 8.731 ( +0.14%) [ +0.00% +0.06% +0.32% / +0.14% +0.22% +0.14%] index_select random : Elapsed 0.087 ms (8.719 ms / 100) 8.715 -> 8.716 ( +0.01%) [ +0.00% +0.05% +0.13% / +0.01% +0.14% +0.13%] index_select random_sorted : Elapsed 0.087 ms (8.715 ms / 100) B = [16, 20, 5, 40] (stride (100, 1, 20, 1600)) A = [16, 20, 5, 4] (stride (20, 1, 320, 1600)) dim = 3 1.232 -> 1.231 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.49% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.233 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.67%] index_copy_ linear : Elapsed 0.012 ms (1.193 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.49% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.194 -> 1.194 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.50% +0.59%] index_copy_ reverse : Elapsed 0.012 ms (1.194 ms / 100) 1.232 -> 1.232 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.49% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.42% +0.42%] index_copy_ spread : Elapsed 0.012 ms (1.193 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.49% +0.49%] index_add_ strided 3 : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.50% +0.50%] index_copy_ strided 3 : Elapsed 0.012 ms (1.194 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.57% +0.65%] index_add_ strided 7 : Elapsed 0.012 ms (1.232 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.25% +0.08% / +0.08% +0.50% +0.50%] index_copy_ strided 7 : Elapsed 0.012 ms (1.192 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.41% +0.49%] index_add_ perm : Elapsed 0.012 ms (1.233 ms / 100) 1.193 -> 1.194 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.50% +0.42%] index_copy_ perm : Elapsed 0.012 ms (1.194 ms / 100) 1.232 -> 1.231 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.41% +1.79%] index_add_ perm_sorted : Elapsed 0.012 ms (1.232 ms / 100) 1.193 -> 1.193 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.34% +0.42%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.193 ms / 100) 8.703 -> 8.714 ( +0.13%) [ +0.00% +0.00% +0.03% / +0.13% +0.15% +0.40%] index_select const : Elapsed 0.087 ms (8.703 ms / 100) 8.726 -> 8.724 ( -0.02%) [ +0.00% +0.09% +0.05% / +0.11% -0.02% +0.31%] index_select wrap : Elapsed 0.087 ms (8.726 ms / 100) 8.732 -> 8.728 ( -0.05%) [ +0.02% +0.00% +0.15% / -0.05% -0.01% +0.03%] index_select linear : Elapsed 0.087 ms (8.734 ms / 100) 8.720 -> 8.729 ( +0.10%) [ +0.00% +0.18% +0.03% / +0.24% +0.11% +0.10%] index_select reverse : Elapsed 0.087 ms (8.720 ms / 100) 8.706 -> 8.718 ( +0.14%) [ +0.08% +0.00% +0.15% / +0.14% +0.33% +0.16%] index_select skip64 : Elapsed 0.087 ms (8.713 ms / 100) 8.705 -> 8.704 ( -0.01%) [ +0.00% +0.02% +0.05% / +0.16% -0.01% +0.13%] index_select skip256 : Elapsed 0.087 ms (8.705 ms / 100) 8.733 -> 8.726 ( -0.08%) [ +0.01% +0.00% +0.03% / +0.16% -0.08% +0.10%] index_select spread : Elapsed 0.087 ms (8.734 ms / 100) 8.730 -> 8.741 ( +0.13%) [ +0.09% +0.00% +0.13% / +0.27% +0.26% +0.13%] index_select strided 3 : Elapsed 0.087 ms (8.738 ms / 100) 8.729 -> 8.728 ( -0.01%) [ +0.00% +0.19% +0.21% / +0.21% +0.10% -0.01%] index_select random : Elapsed 0.087 ms (8.729 ms / 100) 8.715 -> 8.726 ( +0.13%) [ +0.00% +0.10% +0.05% / +0.39% +0.13% +0.33%] index_select random_sorted : Elapsed 0.087 ms (8.715 ms / 100) out_shape = [40, 4, 5, 16] in_shape = [20, 4, 5, 16] idx_dim = 0 B = [40, 4, 5, 16] (stride (320, 5, 1, 20)) A = [20, 4, 5, 16] (stride (20, 5, 1, 400)) dim = 0 1.532 -> 1.506 ( -1.70%) [ +0.00% +0.33% +0.26% / -1.70% -0.91% -1.04%] index_add_ linear : Elapsed 0.015 ms (1.532 ms / 100) 1.511 -> 1.478 ( -2.18%) [ +0.20% +0.00% +0.26% / -2.18% -1.59% -1.65%] index_copy_ linear : Elapsed 0.015 ms (1.514 ms / 100) 1.533 -> 1.506 ( -1.76%) [ +0.13% +0.00% +0.00% / -1.76% -1.17% -1.30%] index_add_ reverse : Elapsed 0.015 ms (1.535 ms / 100) 1.514 -> 1.477 ( -2.44%) [ +0.20% +0.20% +0.00% / -2.44% -1.78% -1.92%] index_copy_ reverse : Elapsed 0.015 ms (1.517 ms / 100) 1.528 -> 1.505 ( -1.51%) [ +0.33% +0.20% +0.00% / -1.51% -0.98% -1.11%] index_add_ spread : Elapsed 0.015 ms (1.533 ms / 100) 1.506 -> 1.472 ( -2.26%) [ +0.27% +0.00% +0.00% / -2.26% -1.33% -1.53%] index_copy_ spread : Elapsed 0.015 ms (1.510 ms / 100) 1.529 -> 1.504 ( -1.64%) [ +0.00% +0.20% +0.20% / -1.50% -1.64% -1.24%] index_add_ strided 3 : Elapsed 0.015 ms (1.529 ms / 100) 1.512 -> 1.481 ( -2.05%) [ +0.26% +0.40% +0.00% / -2.05% -1.85% -1.72%] index_copy_ strided 3 : Elapsed 0.015 ms (1.516 ms / 100) 1.528 -> 1.506 ( -1.44%) [ +0.00% +0.46% +0.13% / -1.44% -0.92% -1.05%] index_add_ strided 7 : Elapsed 0.015 ms (1.528 ms / 100) 1.510 -> 1.475 ( -2.32%) [ +0.00% +0.13% +0.20% / -2.32% -1.66% -1.72%] index_copy_ strided 7 : Elapsed 0.015 ms (1.510 ms / 100) 1.535 -> 1.507 ( -1.82%) [ +0.13% +0.00% +0.20% / -1.82% -1.76% -1.43%] index_add_ perm : Elapsed 0.015 ms (1.537 ms / 100) 1.512 -> 1.480 ( -2.12%) [ +0.00% +0.00% +0.00% / -2.12% -2.05% -1.72%] index_copy_ perm : Elapsed 0.015 ms (1.512 ms / 100) 1.529 -> 1.501 ( -1.83%) [ +0.07% +0.13% +0.00% / -1.83% -1.11% -1.05%] index_add_ perm_sorted : Elapsed 0.015 ms (1.530 ms / 100) 1.510 -> 1.475 ( -2.32%) [ +0.00% +0.07% +0.40% / -2.32% -1.85% -1.79%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.510 ms / 100) 2.876 -> 2.876 ( +0.00%) [ +0.07% +0.00% +0.31% / +0.07% +0.03% +0.00%] index_select const : Elapsed 0.029 ms (2.878 ms / 100) 2.906 -> 2.891 ( -0.52%) [ +0.00% +0.03% +0.03% / +0.00% -0.34% -0.52%] index_select wrap : Elapsed 0.029 ms (2.906 ms / 100) 2.900 -> 2.892 ( -0.28%) [ +0.00% +0.24% +0.14% / +0.28% -0.03% -0.28%] index_select linear : Elapsed 0.029 ms (2.900 ms / 100) 2.895 -> 2.897 ( +0.07%) [ +0.17% +0.14% +0.00% / +0.21% +0.31% +0.07%] index_select reverse : Elapsed 0.029 ms (2.900 ms / 100) 2.870 -> 2.876 ( +0.21%) [ +0.00% +0.07% +0.45% / +0.45% +0.21% +0.49%] index_select skip64 : Elapsed 0.029 ms (2.870 ms / 100) 2.868 -> 2.866 ( -0.07%) [ +0.07% +0.17% +0.00% / -0.07% +0.42% +0.63%] index_select skip256 : Elapsed 0.029 ms (2.870 ms / 100) 2.902 -> 2.898 ( -0.14%) [ +0.03% +0.07% +0.00% / +0.00% +0.03% -0.14%] index_select spread : Elapsed 0.029 ms (2.903 ms / 100) 2.904 -> 2.897 ( -0.24%) [ +0.00% +0.17% +0.10% / -0.10% -0.24% -0.21%] index_select strided 3 : Elapsed 0.029 ms (2.904 ms / 100) 2.883 -> 2.884 ( +0.03%) [ +0.00% +0.17% +0.00% / +0.03% +0.10% +0.69%] index_select strided 5 : Elapsed 0.029 ms (2.883 ms / 100) 2.902 -> 2.895 ( -0.24%) [ +0.03% +0.28% +0.00% / +0.10% -0.24% -0.07%] index_select strided 7 : Elapsed 0.029 ms (2.903 ms / 100) 2.876 -> 2.885 ( +0.31%) [ +0.10% +0.00% +0.21% / +0.31% +0.52% +0.52%] index_select strided 8 : Elapsed 0.029 ms (2.879 ms / 100) 2.881 -> 2.878 ( -0.10%) [ +0.00% +0.10% +0.07% / -0.10% +0.35% +0.31%] index_select strided 16 : Elapsed 0.029 ms (2.881 ms / 100) 2.900 -> 2.887 ( -0.45%) [ +0.10% +0.10% +0.00% / +0.10% -0.10% -0.45%] index_select random : Elapsed 0.029 ms (2.903 ms / 100) 2.898 -> 2.901 ( +0.10%) [ +0.10% +0.17% +0.00% / +0.10% +0.14% +0.17%] index_select random_sorted : Elapsed 0.029 ms (2.901 ms / 100) B = [40, 4, 5, 16] (stride (80, 3200, 16, 1)) A = [20, 4, 5, 16] (stride (1, 1600, 20, 100)) dim = 0 2.397 -> 2.406 ( +0.38%) [ +0.21% +0.08% +0.00% / +0.58% +0.58% +0.38%] index_add_ linear : Elapsed 0.024 ms (2.402 ms / 100) 2.386 -> 2.402 ( +0.67%) [ +0.00% +0.13% +0.17% / +0.67% +0.84% +0.88%] index_copy_ linear : Elapsed 0.024 ms (2.386 ms / 100) 2.391 -> 2.403 ( +0.50%) [ +0.00% +0.04% +0.08% / +0.50% +1.09% +1.00%] index_add_ reverse : Elapsed 0.024 ms (2.391 ms / 100) 2.379 -> 2.398 ( +0.80%) [ +0.25% +0.21% +0.00% / +0.80% +1.22% +1.39%] index_copy_ reverse : Elapsed 0.024 ms (2.385 ms / 100) 2.386 -> 2.403 ( +0.71%) [ +0.08% +0.00% +0.08% / +0.71% +1.13% +1.13%] index_add_ spread : Elapsed 0.024 ms (2.388 ms / 100) 2.384 -> 2.397 ( +0.55%) [ +0.04% +0.08% +0.00% / +0.55% +1.05% +0.96%] index_copy_ spread : Elapsed 0.024 ms (2.385 ms / 100) 2.392 -> 2.405 ( +0.54%) [ +0.00% +0.17% +0.00% / +0.54% +0.59% +0.84%] index_add_ strided 3 : Elapsed 0.024 ms (2.392 ms / 100) 2.392 -> 2.402 ( +0.42%) [ +0.00% +0.00% +0.00% / +0.46% +0.42% +0.84%] index_copy_ strided 3 : Elapsed 0.024 ms (2.392 ms / 100) 2.394 -> 2.407 ( +0.54%) [ +0.17% +0.00% +0.04% / +0.54% +0.71% +0.79%] index_add_ strided 7 : Elapsed 0.024 ms (2.398 ms / 100) 2.390 -> 2.398 ( +0.33%) [ +0.13% +0.13% +0.00% / +0.33% +0.50% +1.17%] index_copy_ strided 7 : Elapsed 0.024 ms (2.393 ms / 100) 2.395 -> 2.398 ( +0.13%) [ +0.00% +0.21% +0.21% / +0.75% +0.21% +0.13%] index_add_ perm : Elapsed 0.024 ms (2.395 ms / 100) 2.388 -> 2.399 ( +0.46%) [ +0.00% +0.17% +0.13% / +0.80% +0.46% +0.46%] index_copy_ perm : Elapsed 0.024 ms (2.388 ms / 100) 2.398 -> 2.403 ( +0.21%) [ +0.04% +0.00% +0.00% / +0.63% +0.21% +0.25%] index_add_ perm_sorted : Elapsed 0.024 ms (2.399 ms / 100) 2.392 -> 2.397 ( +0.21%) [ +0.08% +0.04% +0.00% / +0.63% +0.38% +0.21%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.394 ms / 100) 4.420 -> 4.424 ( +0.09%) [ +0.07% +0.00% +0.09% / +0.09% +0.11% +0.34%] index_select const : Elapsed 0.044 ms (4.423 ms / 100) 4.427 -> 4.430 ( +0.07%) [ +0.34% +0.00% +0.02% / +0.07% +0.16% +0.25%] index_select wrap : Elapsed 0.044 ms (4.442 ms / 100) 4.428 -> 4.428 ( +0.00%) [ +0.11% +0.00% +0.05% / +0.07% +0.00% +0.68%] index_select linear : Elapsed 0.044 ms (4.433 ms / 100) 4.436 -> 4.428 ( -0.18%) [ +0.00% +0.05% +0.05% / -0.07% -0.18% +0.00%] index_select reverse : Elapsed 0.044 ms (4.436 ms / 100) 4.417 -> 4.419 ( +0.05%) [ +0.27% +0.27% +0.00% / +0.05% +0.14% +0.18%] index_select skip64 : Elapsed 0.044 ms (4.429 ms / 100) 4.424 -> 4.421 ( -0.07%) [ +0.00% +0.14% +0.09% / -0.07% +0.05% +0.00%] index_select skip256 : Elapsed 0.044 ms (4.424 ms / 100) 4.427 -> 4.428 ( +0.02%) [ +0.41% +0.05% +0.00% / +0.02% +0.25% +0.50%] index_select spread : Elapsed 0.044 ms (4.445 ms / 100) 4.426 -> 4.429 ( +0.07%) [ +0.05% +0.09% +0.00% / +0.07% +0.14% +0.27%] index_select strided 3 : Elapsed 0.044 ms (4.428 ms / 100) 4.426 -> 4.431 ( +0.11%) [ +0.14% +0.23% +0.00% / +0.11% +0.14% +0.25%] index_select strided 5 : Elapsed 0.044 ms (4.432 ms / 100) 4.422 -> 4.431 ( +0.20%) [ +0.27% +0.27% +0.00% / +0.20% +0.23% +0.41%] index_select strided 7 : Elapsed 0.044 ms (4.434 ms / 100) 4.427 -> 4.430 ( +0.07%) [ +0.05% +0.07% +0.00% / +0.09% +0.07% +0.41%] index_select strided 8 : Elapsed 0.044 ms (4.429 ms / 100) 4.429 -> 4.429 ( +0.00%) [ +0.00% +0.09% +0.16% / +0.09% +0.00% +0.05%] index_select strided 16 : Elapsed 0.044 ms (4.429 ms / 100) 4.433 -> 4.429 ( -0.09%) [ +0.07% +0.00% +0.00% / +0.05% -0.09% -0.05%] index_select random : Elapsed 0.044 ms (4.436 ms / 100) 4.438 -> 4.425 ( -0.29%) [ +0.00% +0.09% +0.02% / -0.09% -0.29% -0.25%] index_select random_sorted : Elapsed 0.044 ms (4.438 ms / 100) B = [40, 4, 5, 16] (stride (1, 40, 2560, 160)) A = [20, 4, 5, 16] (stride (4, 1, 1280, 80)) dim = 0 1.534 -> 1.483 ( -3.32%) [ +0.13% +0.07% +0.00% / -3.32% -2.61% -2.28%] index_add_ linear : Elapsed 0.015 ms (1.536 ms / 100) 1.523 -> 1.465 ( -3.81%) [ +0.00% +0.07% +0.07% / -3.81% -3.35% -2.63%] index_copy_ linear : Elapsed 0.015 ms (1.523 ms / 100) 1.538 -> 1.488 ( -3.25%) [ +0.26% +0.20% +0.00% / -3.25% -3.06% -2.73%] index_add_ reverse : Elapsed 0.015 ms (1.542 ms / 100) 1.521 -> 1.469 ( -3.42%) [ +0.00% +0.13% +0.07% / -2.76% -3.42% -3.22%] index_copy_ reverse : Elapsed 0.015 ms (1.521 ms / 100) 1.547 -> 1.498 ( -3.17%) [ +0.26% +0.26% +0.00% / -3.17% -2.59% -2.39%] index_add_ spread : Elapsed 0.016 ms (1.551 ms / 100) 1.539 -> 1.482 ( -3.70%) [ +0.13% +0.00% +0.13% / -3.70% -3.25% -2.66%] index_copy_ spread : Elapsed 0.015 ms (1.541 ms / 100) 1.553 -> 1.499 ( -3.48%) [ +0.19% +0.00% +0.19% / -3.48% -3.03% -2.70%] index_add_ strided 3 : Elapsed 0.016 ms (1.556 ms / 100) 1.538 -> 1.483 ( -3.58%) [ +0.13% +0.13% +0.00% / -3.58% -3.06% -2.34%] index_copy_ strided 3 : Elapsed 0.015 ms (1.540 ms / 100) 1.558 -> 1.500 ( -3.72%) [ +0.13% +0.00% +0.06% / -3.72% -3.27% -3.15%] index_add_ strided 7 : Elapsed 0.016 ms (1.560 ms / 100) 1.542 -> 1.485 ( -3.70%) [ +0.06% +0.00% +0.00% / -3.70% -3.37% -3.50%] index_copy_ strided 7 : Elapsed 0.015 ms (1.543 ms / 100) 1.555 -> 1.496 ( -3.79%) [ +0.06% +0.13% +0.00% / -3.79% -3.09% -3.22%] index_add_ perm : Elapsed 0.016 ms (1.556 ms / 100) 1.537 -> 1.482 ( -3.58%) [ +0.00% +0.20% +0.13% / -3.58% -2.93% -2.93%] index_copy_ perm : Elapsed 0.015 ms (1.537 ms / 100) 1.549 -> 1.496 ( -3.42%) [ +0.45% +0.19% +0.00% / -3.42% -2.78% -2.65%] index_add_ perm_sorted : Elapsed 0.016 ms (1.556 ms / 100) 1.538 -> 1.483 ( -3.58%) [ +0.20% +0.13% +0.00% / -3.58% -3.06% -3.12%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.541 ms / 100) 2.884 -> 2.874 ( -0.35%) [ +0.03% +0.00% +0.10% / -0.28% -0.35% -0.31%] index_select const : Elapsed 0.029 ms (2.885 ms / 100) 2.908 -> 2.877 ( -1.07%) [ +0.00% +0.00% +0.00% / -0.65% -1.07% -0.62%] index_select wrap : Elapsed 0.029 ms (2.908 ms / 100) 2.901 -> 2.882 ( -0.65%) [ +0.38% +0.00% +0.00% / -0.28% -0.41% -0.65%] index_select linear : Elapsed 0.029 ms (2.912 ms / 100) 2.903 -> 2.884 ( -0.65%) [ +0.17% +0.00% +0.00% / -0.45% -0.55% -0.65%] index_select reverse : Elapsed 0.029 ms (2.908 ms / 100) 2.882 -> 2.876 ( -0.21%) [ +0.07% +0.14% +0.00% / +0.00% -0.21% +0.24%] index_select skip64 : Elapsed 0.029 ms (2.884 ms / 100) 2.877 -> 2.867 ( -0.35%) [ +0.00% +0.03% +0.00% / -0.35% -0.03% +0.24%] index_select skip256 : Elapsed 0.029 ms (2.877 ms / 100) 2.902 -> 2.878 ( -0.83%) [ +0.28% +0.17% +0.00% / -0.41% -0.83% -0.72%] index_select spread : Elapsed 0.029 ms (2.910 ms / 100) 2.903 -> 2.882 ( -0.72%) [ +0.14% +0.03% +0.00% / -0.62% -0.72% -0.31%] index_select strided 3 : Elapsed 0.029 ms (2.907 ms / 100) 2.893 -> 2.879 ( -0.48%) [ +0.00% +0.21% +0.03% / -0.28% -0.14% -0.48%] index_select strided 5 : Elapsed 0.029 ms (2.893 ms / 100) 2.902 -> 2.888 ( -0.48%) [ +0.00% +0.07% +0.10% / -0.34% -0.48% -0.45%] index_select strided 7 : Elapsed 0.029 ms (2.902 ms / 100) 2.883 -> 2.876 ( -0.24%) [ +0.00% +0.31% +0.03% / -0.24% -0.03% +0.00%] index_select strided 8 : Elapsed 0.029 ms (2.883 ms / 100) 2.889 -> 2.875 ( -0.48%) [ +0.00% +0.21% +0.07% / -0.48% -0.07% +0.14%] index_select strided 16 : Elapsed 0.029 ms (2.889 ms / 100) 2.903 -> 2.888 ( -0.52%) [ +0.00% +0.14% +0.17% / -0.41% -0.52% -0.21%] index_select random : Elapsed 0.029 ms (2.903 ms / 100) 2.903 -> 2.883 ( -0.69%) [ +0.00% +0.17% +0.28% / -0.69% -0.38% -0.59%] index_select random_sorted : Elapsed 0.029 ms (2.903 ms / 100) B = [40, 4, 5, 16] (stride (20, 5, 1, 800)) A = [20, 4, 5, 16] (stride (5, 1600, 1, 100)) dim = 0 2.400 -> 2.409 ( +0.37%) [ +0.13% +0.08% +0.00% / +0.37% +0.58% +0.79%] index_add_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.397 -> 2.410 ( +0.54%) [ +0.17% +0.00% +0.13% / +0.54% +1.00% +0.96%] index_copy_ linear : Elapsed 0.024 ms (2.401 ms / 100) 2.393 -> 2.409 ( +0.67%) [ +0.13% +0.17% +0.00% / +0.67% +1.04% +1.17%] index_add_ reverse : Elapsed 0.024 ms (2.396 ms / 100) 2.392 -> 2.405 ( +0.54%) [ +0.04% +0.08% +0.00% / +0.54% +1.13% +1.09%] index_copy_ reverse : Elapsed 0.024 ms (2.393 ms / 100) 2.398 -> 2.415 ( +0.71%) [ +0.17% +0.00% +0.08% / +0.71% +1.21% +1.13%] index_add_ spread : Elapsed 0.024 ms (2.402 ms / 100) 2.403 -> 2.422 ( +0.79%) [ +0.00% +0.21% +0.17% / +0.79% +1.29% +1.08%] index_copy_ spread : Elapsed 0.024 ms (2.403 ms / 100) 2.407 -> 2.418 ( +0.46%) [ +0.04% +0.00% +0.17% / +0.46% +0.54% +0.46%] index_add_ strided 3 : Elapsed 0.024 ms (2.408 ms / 100) 2.408 -> 2.423 ( +0.62%) [ +0.00% +0.00% +0.29% / +0.66% +0.71% +0.62%] index_copy_ strided 3 : Elapsed 0.024 ms (2.408 ms / 100) 2.403 -> 2.419 ( +0.67%) [ +0.00% +0.29% +0.00% / +0.67% +0.71% +0.67%] index_add_ strided 7 : Elapsed 0.024 ms (2.403 ms / 100) 2.410 -> 2.424 ( +0.58%) [ +0.08% +0.00% +0.08% / +0.58% +0.66% +0.79%] index_copy_ strided 7 : Elapsed 0.024 ms (2.412 ms / 100) 2.409 -> 2.409 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.62% +0.08% +0.00%] index_add_ perm : Elapsed 0.024 ms (2.410 ms / 100) 2.407 -> 2.416 ( +0.37%) [ +0.00% +0.04% +0.04% / +0.62% +0.54% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.407 ms / 100) 2.408 -> 2.413 ( +0.21%) [ +0.25% +0.00% +0.17% / +0.71% +0.21% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.414 ms / 100) 2.409 -> 2.416 ( +0.29%) [ +0.17% +0.12% +0.00% / +0.58% +0.29% +0.42%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.413 ms / 100) 4.423 -> 4.426 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.20% +0.07% +0.23%] index_select const : Elapsed 0.044 ms (4.423 ms / 100) 4.438 -> 4.431 ( -0.16%) [ +0.00% +0.05% +0.07% / -0.16% -0.11% +0.07%] index_select wrap : Elapsed 0.044 ms (4.438 ms / 100) 4.436 -> 4.435 ( -0.02%) [ +0.00% +0.09% +0.02% / +0.02% -0.02% +0.09%] index_select linear : Elapsed 0.044 ms (4.436 ms / 100) 4.440 -> 4.434 ( -0.14%) [ +0.00% +0.27% +0.09% / +0.11% -0.09% -0.14%] index_select reverse : Elapsed 0.044 ms (4.440 ms / 100) 4.424 -> 4.425 ( +0.02%) [ +0.00% +0.16% +0.27% / +0.09% +0.20% +0.02%] index_select skip64 : Elapsed 0.044 ms (4.424 ms / 100) 4.428 -> 4.426 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% -0.05% -0.05%] index_select skip256 : Elapsed 0.044 ms (4.428 ms / 100) 4.435 -> 4.438 ( +0.07%) [ +0.00% +0.16% +0.05% / +0.11% +0.07% +0.18%] index_select spread : Elapsed 0.044 ms (4.435 ms / 100) 4.434 -> 4.436 ( +0.05%) [ +0.07% +0.00% +0.02% / +0.09% +0.09% +0.05%] index_select strided 3 : Elapsed 0.044 ms (4.437 ms / 100) 4.428 -> 4.428 ( +0.00%) [ +0.07% +0.05% +0.00% / +0.00% +0.05% +0.18%] index_select strided 5 : Elapsed 0.044 ms (4.431 ms / 100) 4.433 -> 4.433 ( +0.00%) [ +0.00% +0.16% +0.05% / +0.14% +0.00% +0.32%] index_select strided 7 : Elapsed 0.044 ms (4.433 ms / 100) 4.432 -> 4.429 ( -0.07%) [ +0.14% +0.00% +0.09% / +0.00% +0.14% -0.07%] index_select strided 8 : Elapsed 0.044 ms (4.438 ms / 100) 4.430 -> 4.429 ( -0.02%) [ +0.14% +0.18% +0.00% / +0.05% -0.02% +0.23%] index_select strided 16 : Elapsed 0.044 ms (4.436 ms / 100) 4.440 -> 4.434 ( -0.14%) [ +0.02% +0.05% +0.00% / -0.14% +0.05% -0.07%] index_select random : Elapsed 0.044 ms (4.441 ms / 100) 4.436 -> 4.436 ( +0.00%) [ +0.27% +0.00% +0.23% / +0.00% +0.07% +0.05%] index_select random_sorted : Elapsed 0.044 ms (4.448 ms / 100) B = [40, 4, 5, 16] (stride (5, 200, 1, 800)) A = [20, 4, 5, 16] (stride (1, 1600, 20, 100)) dim = 0 2.449 -> 2.465 ( +0.65%) [ +0.04% +0.00% +0.16% / +0.65% +0.69% +0.94%] index_add_ linear : Elapsed 0.024 ms (2.450 ms / 100) 2.446 -> 2.462 ( +0.65%) [ +0.00% +0.12% +0.04% / +0.65% +0.82% +0.86%] index_copy_ linear : Elapsed 0.024 ms (2.446 ms / 100) 2.449 -> 2.465 ( +0.65%) [ +0.24% +0.00% +0.16% / +0.65% +0.78% +0.82%] index_add_ reverse : Elapsed 0.025 ms (2.455 ms / 100) 2.449 -> 2.462 ( +0.53%) [ +0.00% +0.20% +0.08% / +0.53% +0.78% +0.73%] index_copy_ reverse : Elapsed 0.024 ms (2.449 ms / 100) 2.470 -> 2.481 ( +0.45%) [ +0.24% +0.16% +0.00% / +0.45% +0.49% +0.53%] index_add_ spread : Elapsed 0.025 ms (2.476 ms / 100) 2.480 -> 2.491 ( +0.44%) [ +0.00% +0.00% +0.04% / +0.44% +0.56% +0.52%] index_copy_ spread : Elapsed 0.025 ms (2.480 ms / 100) 2.468 -> 2.476 ( +0.32%) [ +0.12% +0.00% +0.12% / +0.32% +0.53% +0.45%] index_add_ strided 3 : Elapsed 0.025 ms (2.471 ms / 100) 2.472 -> 2.485 ( +0.53%) [ +0.08% +0.00% +0.08% / +0.53% +0.73% +0.65%] index_copy_ strided 3 : Elapsed 0.025 ms (2.474 ms / 100) 2.466 -> 2.474 ( +0.32%) [ +0.00% +0.08% +0.12% / +0.49% +0.32% +0.65%] index_add_ strided 7 : Elapsed 0.025 ms (2.466 ms / 100) 2.470 -> 2.484 ( +0.57%) [ +0.04% +0.00% +0.12% / +0.57% +0.73% +0.77%] index_copy_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.462 -> 2.473 ( +0.45%) [ +0.12% +0.04% +0.00% / +0.45% +0.61% +0.57%] index_add_ perm : Elapsed 0.025 ms (2.465 ms / 100) 2.467 -> 2.478 ( +0.45%) [ +0.00% +0.08% +0.04% / +0.45% +0.65% +0.73%] index_copy_ perm : Elapsed 0.025 ms (2.467 ms / 100) 2.467 -> 2.478 ( +0.45%) [ +0.08% +0.28% +0.00% / +0.45% +0.49% +0.49%] index_add_ perm_sorted : Elapsed 0.025 ms (2.469 ms / 100) 2.470 -> 2.479 ( +0.36%) [ +0.00% +0.12% +0.24% / +0.36% +0.69% +0.69%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.470 ms / 100) 4.493 -> 4.498 ( +0.11%) [ +0.18% +0.20% +0.00% / +0.27% +0.11% +0.24%] index_select const : Elapsed 0.045 ms (4.501 ms / 100) 4.498 -> 4.498 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.22% +0.18%] index_select wrap : Elapsed 0.045 ms (4.498 ms / 100) 4.500 -> 4.499 ( -0.02%) [ +0.09% +0.00% +0.11% / -0.02% +0.11% +0.29%] index_select linear : Elapsed 0.045 ms (4.504 ms / 100) 4.505 -> 4.508 ( +0.07%) [ +0.20% +0.00% +0.00% / +0.07% +0.27% +0.18%] index_select reverse : Elapsed 0.045 ms (4.514 ms / 100) 4.497 -> 4.501 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.16% +0.18%] index_select skip64 : Elapsed 0.045 ms (4.501 ms / 100) 4.498 -> 4.503 ( +0.11%) [ +0.11% +0.00% +0.04% / +0.11% +0.20% +0.22%] index_select skip256 : Elapsed 0.045 ms (4.503 ms / 100) 4.505 -> 4.507 ( +0.04%) [ +0.00% +0.04% +0.13% / +0.13% +0.04% +0.20%] index_select spread : Elapsed 0.045 ms (4.505 ms / 100) 4.506 -> 4.504 ( -0.04%) [ +0.00% +0.00% +0.16% / -0.04% +0.11% +0.07%] index_select strided 3 : Elapsed 0.045 ms (4.506 ms / 100) 4.501 -> 4.504 ( +0.07%) [ +0.29% +0.00% +0.04% / +0.07% +0.33% +0.27%] index_select strided 5 : Elapsed 0.045 ms (4.514 ms / 100) 4.501 -> 4.501 ( +0.00%) [ +0.07% +0.09% +0.00% / +0.07% +0.24% +0.00%] index_select strided 7 : Elapsed 0.045 ms (4.504 ms / 100) 4.500 -> 4.503 ( +0.07%) [ +0.13% +0.00% +0.18% / +0.07% +0.33% +0.24%] index_select strided 8 : Elapsed 0.045 ms (4.506 ms / 100) 4.504 -> 4.504 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.04% +0.20%] index_select strided 16 : Elapsed 0.045 ms (4.504 ms / 100) 4.500 -> 4.503 ( +0.07%) [ +0.22% +0.07% +0.00% / +0.07% +0.20% +0.18%] index_select random : Elapsed 0.045 ms (4.510 ms / 100) 4.504 -> 4.500 ( -0.09%) [ +0.00% +0.11% +0.02% / -0.09% +0.22% +0.11%] index_select random_sorted : Elapsed 0.045 ms (4.504 ms / 100) out_shape = [20, 40, 5, 16] in_shape = [20, 4, 5, 16] idx_dim = 1 B = [20, 40, 5, 16] (stride (640, 16, 12800, 1)) A = [20, 4, 5, 16] (stride (1, 1600, 20, 100)) dim = 1 1.387 -> 1.388 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.58% +0.65%] index_add_ linear : Elapsed 0.014 ms (1.388 ms / 100) 1.344 -> 1.344 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.74% +0.52%] index_copy_ linear : Elapsed 0.013 ms (1.344 ms / 100) 1.388 -> 1.388 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.58% +0.65%] index_add_ reverse : Elapsed 0.014 ms (1.389 ms / 100) 1.344 -> 1.343 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.60% +0.74%] index_copy_ reverse : Elapsed 0.013 ms (1.344 ms / 100) 1.386 -> 1.386 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.65% +0.58%] index_add_ spread : Elapsed 0.014 ms (1.386 ms / 100) 1.341 -> 1.343 ( +0.15%) [ +0.15% +0.00% +0.30% / +0.15% +0.75% +0.67%] index_copy_ spread : Elapsed 0.013 ms (1.343 ms / 100) 1.387 -> 1.387 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.58% +0.65%] index_add_ strided 3 : Elapsed 0.014 ms (1.388 ms / 100) 1.343 -> 1.343 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.82% +0.74%] index_copy_ strided 3 : Elapsed 0.013 ms (1.343 ms / 100) 1.385 -> 1.386 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.79% +0.72%] index_add_ strided 7 : Elapsed 0.014 ms (1.386 ms / 100) 1.342 -> 1.343 ( +0.07%) [ +0.00% +0.22% +0.07% / +0.07% +0.67% +0.52%] index_copy_ strided 7 : Elapsed 0.013 ms (1.342 ms / 100) 1.386 -> 1.387 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.65% +0.65%] index_add_ perm : Elapsed 0.014 ms (1.387 ms / 100) 1.342 -> 1.345 ( +0.22%) [ +0.00% +0.07% +0.15% / +0.22% +0.75% +0.52%] index_copy_ perm : Elapsed 0.013 ms (1.342 ms / 100) 1.388 -> 1.389 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.58% +0.72%] index_add_ perm_sorted : Elapsed 0.014 ms (1.389 ms / 100) 1.343 -> 1.343 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.82% +0.82%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.343 ms / 100) 9.153 -> 9.166 ( +0.14%) [ +0.09% +0.09% +0.00% / +0.14% +0.22% +0.31%] index_select const : Elapsed 0.092 ms (9.161 ms / 100) 9.192 -> 9.176 ( -0.17%) [ +0.07% +0.05% +0.00% / -0.17% -0.03% -0.07%] index_select wrap : Elapsed 0.092 ms (9.198 ms / 100) 9.176 -> 9.177 ( +0.01%) [ +0.22% +0.01% +0.00% / +0.01% +0.23% +0.04%] index_select linear : Elapsed 0.092 ms (9.196 ms / 100) 9.183 -> 9.179 ( -0.04%) [ +0.00% +0.04% +0.03% / +0.01% -0.04% +0.02%] index_select reverse : Elapsed 0.092 ms (9.183 ms / 100) 9.156 -> 9.160 ( +0.04%) [ +0.12% +0.00% +0.09% / +0.04% +0.19% +0.39%] index_select skip64 : Elapsed 0.092 ms (9.167 ms / 100) 9.160 -> 9.158 ( -0.02%) [ +0.24% +0.01% +0.00% / -0.02% +0.13% +0.15%] index_select skip256 : Elapsed 0.092 ms (9.182 ms / 100) 9.179 -> 9.178 ( -0.01%) [ +0.00% +0.10% +0.00% / -0.01% +0.12% +0.17%] index_select spread : Elapsed 0.092 ms (9.179 ms / 100) 9.171 -> 9.175 ( +0.04%) [ +0.13% +0.00% +0.02% / +0.04% +0.21% +0.35%] index_select strided 3 : Elapsed 0.092 ms (9.183 ms / 100) 9.176 -> 9.186 ( +0.11%) [ +0.24% +0.00% +0.07% / +0.11% +0.16% +0.34%] index_select random : Elapsed 0.092 ms (9.198 ms / 100) 9.181 -> 9.197 ( +0.17%) [ +0.09% +0.28% +0.00% / +0.19% +0.17% +0.23%] index_select random_sorted : Elapsed 0.092 ms (9.189 ms / 100) B = [20, 40, 5, 16] (stride (1, 20, 12800, 800)) A = [20, 4, 5, 16] (stride (80, 1600, 16, 1)) dim = 1 0.566 -> 0.566 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.88% +1.06%] index_add_ linear : Elapsed 0.006 ms (0.566 ms / 100) 0.576 -> 0.576 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.87% +1.22%] index_copy_ linear : Elapsed 0.006 ms (0.577 ms / 100) 0.566 -> 0.566 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.18% +0.00% +0.18%] index_add_ reverse : Elapsed 0.006 ms (0.566 ms / 100) 0.576 -> 0.577 ( +0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.17% +0.35%] index_copy_ reverse : Elapsed 0.006 ms (0.576 ms / 100) 0.567 -> 0.567 ( +0.00%) [ +0.00% +0.18% +0.00% / +0.00% +0.00% +0.18%] index_add_ spread : Elapsed 0.006 ms (0.567 ms / 100) 0.577 -> 0.576 ( -0.17%) [ +0.17% +0.17% +0.00% / -0.17% +0.35% +0.00%] index_copy_ spread : Elapsed 0.006 ms (0.578 ms / 100) 0.567 -> 0.567 ( +0.00%) [ +0.00% +0.00% +0.71% / +0.00% +0.53% +0.35%] index_add_ strided 3 : Elapsed 0.006 ms (0.567 ms / 100) 0.577 -> 0.577 ( +0.00%) [ +0.17% +0.17% +0.00% / +0.00% +0.69% +0.35%] index_copy_ strided 3 : Elapsed 0.006 ms (0.578 ms / 100) 0.567 -> 0.566 ( -0.18%) [ +0.00% +0.18% +0.00% / -0.18% +0.71% +0.53%] index_add_ strided 7 : Elapsed 0.006 ms (0.567 ms / 100) 0.577 -> 0.577 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.87% +0.69%] index_copy_ strided 7 : Elapsed 0.006 ms (0.577 ms / 100) 0.569 -> 0.568 ( -0.18%) [ +0.18% +0.18% +0.00% / +0.18% +0.00% -0.18%] index_add_ perm : Elapsed 0.006 ms (0.570 ms / 100) 0.581 -> 0.580 ( -0.17%) [ +0.17% +0.00% +1.38% / +0.17% -0.17% -0.17%] index_copy_ perm : Elapsed 0.006 ms (0.582 ms / 100) 0.568 -> 0.567 ( -0.18%) [ +0.18% +0.18% +0.00% / +0.18% +0.00% -0.18%] index_add_ perm_sorted : Elapsed 0.006 ms (0.569 ms / 100) 0.579 -> 0.579 ( +0.00%) [ +0.17% +0.35% +0.00% / +0.17% +0.00% +0.00%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.580 ms / 100) 5.010 -> 5.021 ( +0.22%) [ +0.00% +0.16% +0.22% / +0.22% +0.54% +0.48%] index_select const : Elapsed 0.050 ms (5.010 ms / 100) 5.029 -> 5.028 ( -0.02%) [ +0.36% +0.00% +0.10% / -0.02% +0.30% +0.30%] index_select wrap : Elapsed 0.050 ms (5.047 ms / 100) 5.026 -> 5.013 ( -0.26%) [ +0.12% +0.00% +0.18% / -0.26% +0.02% +0.00%] index_select linear : Elapsed 0.050 ms (5.032 ms / 100) 5.018 -> 5.013 ( -0.10%) [ +0.00% +0.18% +0.04% / -0.10% +0.00% +0.00%] index_select reverse : Elapsed 0.050 ms (5.018 ms / 100) 5.003 -> 4.999 ( -0.08%) [ +0.32% +0.00% +0.06% / -0.08% +0.18% +0.30%] index_select skip64 : Elapsed 0.050 ms (5.019 ms / 100) 5.006 -> 5.004 ( -0.04%) [ +0.16% +0.00% +0.06% / -0.04% +0.36% +0.38%] index_select skip256 : Elapsed 0.050 ms (5.014 ms / 100) 5.011 -> 5.010 ( -0.02%) [ +0.14% +0.38% +0.00% / -0.02% +0.12% +0.36%] index_select spread : Elapsed 0.050 ms (5.018 ms / 100) 5.025 -> 5.020 ( -0.10%) [ +0.08% +0.22% +0.00% / -0.10% +0.24% +0.08%] index_select strided 3 : Elapsed 0.050 ms (5.029 ms / 100) 5.021 -> 5.017 ( -0.08%) [ +0.06% +0.14% +0.00% / -0.08% +0.36% +0.42%] index_select random : Elapsed 0.050 ms (5.024 ms / 100) 5.015 -> 5.000 ( -0.30%) [ +0.16% +0.00% +0.14% / -0.30% +0.20% +0.24%] index_select random_sorted : Elapsed 0.050 ms (5.023 ms / 100) out_shape = [20, 4, 40, 16] in_shape = [20, 4, 5, 16] idx_dim = 2 B = [20, 4, 40, 16] (stride (2560, 640, 1, 40)) A = [20, 4, 5, 16] (stride (1, 20, 80, 400)) dim = 2 1.508 -> 1.507 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.40% +0.40%] index_add_ linear : Elapsed 0.015 ms (1.508 ms / 100) 1.465 -> 1.462 ( -0.20%) [ +0.00% +0.07% +0.00% / -0.20% +0.34% +0.41%] index_copy_ linear : Elapsed 0.015 ms (1.465 ms / 100) 1.516 -> 1.515 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.46% +0.40%] index_add_ reverse : Elapsed 0.015 ms (1.517 ms / 100) 1.477 -> 1.476 ( -0.07%) [ +0.00% +0.07% +0.14% / -0.07% +0.41% +0.41%] index_copy_ reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.515 -> 1.515 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.40% +0.46%] index_add_ spread : Elapsed 0.015 ms (1.516 ms / 100) 1.465 -> 1.466 ( +0.07%) [ +0.00% +0.07% +0.14% / +0.07% +0.55% +0.55%] index_copy_ spread : Elapsed 0.015 ms (1.465 ms / 100) 1.515 -> 1.516 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.015 ms (1.516 ms / 100) 1.466 -> 1.467 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.55% +0.41%] index_copy_ strided 3 : Elapsed 0.015 ms (1.466 ms / 100) 1.515 -> 1.515 ( +0.00%) [ +0.07% +0.00% +0.20% / +0.00% +0.66% +0.46%] index_add_ strided 7 : Elapsed 0.015 ms (1.516 ms / 100) 1.477 -> 1.475 ( -0.14%) [ +0.00% +0.00% +0.61% / -0.14% +0.68% +0.47%] index_copy_ strided 7 : Elapsed 0.015 ms (1.477 ms / 100) 1.508 -> 1.507 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.46% +0.40%] index_add_ perm : Elapsed 0.015 ms (1.508 ms / 100) 1.463 -> 1.465 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.48% +0.55%] index_copy_ perm : Elapsed 0.015 ms (1.465 ms / 100) 1.517 -> 1.517 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.53% +0.46%] index_add_ perm_sorted : Elapsed 0.015 ms (1.517 ms / 100) 1.465 -> 1.466 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.68% +0.61%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.465 ms / 100) 8.239 -> 8.241 ( +0.02%) [ +0.00% +0.15% +0.18% / +0.02% +0.42% +0.22%] index_select const : Elapsed 0.082 ms (8.239 ms / 100) 8.265 -> 8.258 ( -0.08%) [ +0.05% +0.00% +0.05% / +0.02% -0.06% -0.08%] index_select wrap : Elapsed 0.083 ms (8.269 ms / 100) 8.259 -> 8.264 ( +0.06%) [ +0.25% +0.10% +0.00% / +0.33% +0.06% +0.15%] index_select linear : Elapsed 0.083 ms (8.280 ms / 100) 8.250 -> 8.252 ( +0.02%) [ +0.15% +0.21% +0.00% / +0.02% +0.30% +0.08%] index_select reverse : Elapsed 0.083 ms (8.262 ms / 100) 8.252 -> 8.251 ( -0.01%) [ +0.10% +0.00% +0.04% / +0.22% +0.25% -0.01%] index_select skip64 : Elapsed 0.083 ms (8.260 ms / 100) 8.250 -> 8.232 ( -0.22%) [ +0.04% +0.22% +0.00% / -0.22% +0.30% -0.01%] index_select skip256 : Elapsed 0.083 ms (8.253 ms / 100) 8.268 -> 8.265 ( -0.04%) [ +0.15% +0.00% +0.29% / -0.04% +0.15% +0.21%] index_select spread : Elapsed 0.083 ms (8.280 ms / 100) 8.251 -> 8.251 ( +0.00%) [ +0.24% +0.48% +0.00% / +0.00% +0.51% +0.08%] index_select strided 3 : Elapsed 0.083 ms (8.271 ms / 100) 8.259 -> 8.259 ( +0.00%) [ +0.21% +0.00% +0.15% / +0.15% +0.33% +0.00%] index_select random : Elapsed 0.083 ms (8.276 ms / 100) 8.265 -> 8.271 ( +0.07%) [ +0.00% +0.22% +0.05% / +0.07% +0.46% +0.39%] index_select random_sorted : Elapsed 0.083 ms (8.265 ms / 100) B = [20, 4, 40, 16] (stride (40, 12800, 1, 800)) A = [20, 4, 5, 16] (stride (20, 5, 1, 400)) dim = 2 1.491 -> 1.490 ( -0.07%) [ +0.00% +0.07% +0.13% / -0.07% +0.60% +0.47%] index_add_ linear : Elapsed 0.015 ms (1.491 ms / 100) 1.442 -> 1.441 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.55% +0.49%] index_copy_ linear : Elapsed 0.014 ms (1.442 ms / 100) 1.492 -> 1.492 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.74% +0.74%] index_add_ reverse : Elapsed 0.015 ms (1.492 ms / 100) 1.446 -> 1.445 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.69% +0.69%] index_copy_ reverse : Elapsed 0.014 ms (1.446 ms / 100) 1.493 -> 1.494 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.67% +0.74%] index_add_ spread : Elapsed 0.015 ms (1.494 ms / 100) 1.445 -> 1.447 ( +0.14%) [ +0.00% +0.14% +0.14% / +0.14% +0.55% +0.62%] index_copy_ spread : Elapsed 0.014 ms (1.445 ms / 100) 1.491 -> 1.491 ( +0.00%) [ +0.13% +0.00% +0.07% / +0.00% +0.74% +0.60%] index_add_ strided 3 : Elapsed 0.015 ms (1.493 ms / 100) 1.441 -> 1.443 ( +0.14%) [ +0.00% +0.07% +0.21% / +0.14% +0.62% +0.69%] index_copy_ strided 3 : Elapsed 0.014 ms (1.441 ms / 100) 1.497 -> 1.497 ( +0.00%) [ +0.00% +0.00% +0.33% / +0.00% +0.60% +0.73%] index_add_ strided 7 : Elapsed 0.015 ms (1.497 ms / 100) 1.447 -> 1.449 ( +0.14%) [ +0.00% +0.07% +0.41% / +0.14% +0.69% +0.83%] index_copy_ strided 7 : Elapsed 0.014 ms (1.447 ms / 100) 1.495 -> 1.495 ( +0.00%) [ +0.00% +0.00% +0.13% / +0.00% +0.87% +0.74%] index_add_ perm : Elapsed 0.015 ms (1.495 ms / 100) 1.446 -> 1.446 ( +0.00%) [ +0.07% +0.00% +0.21% / +0.00% +0.97% +0.83%] index_copy_ perm : Elapsed 0.014 ms (1.447 ms / 100) 1.489 -> 1.491 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.81% +0.74%] index_add_ perm_sorted : Elapsed 0.015 ms (1.491 ms / 100) 1.441 -> 1.442 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.69% +0.69%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.442 ms / 100) 8.282 -> 8.268 ( -0.17%) [ +0.00% +0.30% +0.27% / -0.17% +0.40% +0.22%] index_select const : Elapsed 0.083 ms (8.282 ms / 100) 8.279 -> 8.273 ( -0.07%) [ +0.06% +0.07% +0.00% / -0.07% +0.24% +0.19%] index_select wrap : Elapsed 0.083 ms (8.284 ms / 100) 8.282 -> 8.285 ( +0.04%) [ +0.11% +0.00% +0.04% / +0.11% +0.04% +0.13%] index_select linear : Elapsed 0.083 ms (8.291 ms / 100) 8.282 -> 8.286 ( +0.05%) [ +0.04% +0.33% +0.00% / +0.05% +0.35% +0.31%] index_select reverse : Elapsed 0.083 ms (8.285 ms / 100) 8.288 -> 8.292 ( +0.05%) [ +0.00% +0.02% +0.08% / +0.05% +0.16% +0.45%] index_select skip64 : Elapsed 0.083 ms (8.288 ms / 100) 8.281 -> 8.293 ( +0.14%) [ +0.37% +0.27% +0.00% / +0.14% +0.18% +0.25%] index_select skip256 : Elapsed 0.083 ms (8.312 ms / 100) 8.278 -> 8.288 ( +0.12%) [ +0.00% +0.25% +0.13% / +0.14% +0.12% +0.31%] index_select spread : Elapsed 0.083 ms (8.278 ms / 100) 8.279 -> 8.290 ( +0.13%) [ +0.08% +0.33% +0.00% / +0.13% +0.43% +0.31%] index_select strided 3 : Elapsed 0.083 ms (8.286 ms / 100) 8.284 -> 8.280 ( -0.05%) [ +0.00% +0.25% +0.37% / -0.05% +0.36% +0.14%] index_select random : Elapsed 0.083 ms (8.284 ms / 100) 8.279 -> 8.283 ( +0.05%) [ +0.11% +0.00% +0.01% / +0.05% +0.59% +0.40%] index_select random_sorted : Elapsed 0.083 ms (8.288 ms / 100) B = [20, 4, 40, 16] (stride (160, 1, 4, 3200)) dim = 2 fill_cnt = 5 0.915 -> 0.916 ( +0.11%) [ +0.00% +0.00% +0.00% / +0.11% +0.44% +0.44%] index_fill_ const : Elapsed 0.009 ms (0.915 ms / 100) 0.913 -> 0.912 ( -0.11%) [ +0.00% +0.11% +0.00% / -0.11% +0.66% +0.77%] index_fill_ linear : Elapsed 0.009 ms (0.913 ms / 100) 0.913 -> 0.912 ( -0.11%) [ +0.11% +0.11% +0.00% / -0.11% +0.66% +0.77%] index_fill_ reverse : Elapsed 0.009 ms (0.914 ms / 100) 0.912 -> 0.913 ( +0.11%) [ +0.22% +0.33% +0.00% / +0.11% +1.10% +0.77%] index_fill_ skip64 : Elapsed 0.009 ms (0.914 ms / 100) 0.912 -> 0.913 ( +0.11%) [ +0.22% +0.22% +0.00% / +0.11% +0.66% +0.77%] index_fill_ skip256 : Elapsed 0.009 ms (0.914 ms / 100) 0.912 -> 0.912 ( +0.00%) [ +0.11% +0.33% +0.00% / +0.00% +0.77% +0.77%] index_fill_ spread : Elapsed 0.009 ms (0.913 ms / 100) 0.911 -> 0.911 ( +0.00%) [ +0.11% +0.22% +0.00% / +0.00% +0.99% +1.10%] index_fill_ strided 3 : Elapsed 0.009 ms (0.912 ms / 100) 0.912 -> 0.912 ( +0.00%) [ +0.22% +0.11% +0.00% / +0.00% +0.77% +0.99%] index_fill_ strided 5 : Elapsed 0.009 ms (0.914 ms / 100) 0.912 -> 0.912 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.77% +0.88%] index_fill_ strided 7 : Elapsed 0.009 ms (0.913 ms / 100) 0.913 -> 0.914 ( +0.11%) [ +0.00% +0.22% +0.88% / +0.11% +0.99% +0.77%] index_fill_ strided 8 : Elapsed 0.009 ms (0.913 ms / 100) 0.913 -> 0.915 ( +0.22%) [ +0.11% +0.44% +0.00% / +0.22% +1.10% +0.88%] index_fill_ strided 16 : Elapsed 0.009 ms (0.914 ms / 100) 0.914 -> 0.915 ( +0.11%) [ +0.11% +0.00% +0.00% / +0.11% +0.66% +0.88%] index_fill_ random : Elapsed 0.009 ms (0.915 ms / 100) 0.914 -> 0.914 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.66% +0.77%] index_fill_ random_sorted : Elapsed 0.009 ms (0.914 ms / 100) 0.912 -> 0.913 ( +0.11%) [ +0.11% +0.22% +0.00% / +0.11% +0.77% +0.77%] index_fill_ perm : Elapsed 0.009 ms (0.913 ms / 100) 0.912 -> 0.912 ( +0.00%) [ +0.00% +0.22% +0.00% / +0.00% +0.77% +0.77%] index_fill_ perm_sorted : Elapsed 0.009 ms (0.912 ms / 100) B = [20, 4, 40, 16] (stride (1, 20, 80, 3200)) A = [20, 4, 5, 16] (stride (320, 1, 64, 4)) dim = 2 1.522 -> 1.522 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.46% +0.46%] index_add_ linear : Elapsed 0.015 ms (1.523 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.47%] index_copy_ linear : Elapsed 0.015 ms (1.478 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.07% +0.00% +0.53% / +0.00% +0.46% +0.46%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.00% +0.34% / +0.07% +0.54% +0.47%] index_copy_ reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.39% +0.39%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.477 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.41% +0.34%] index_copy_ spread : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.59% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.522 ms / 100) 1.478 -> 1.477 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.41% +0.47%] index_copy_ strided 3 : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.07% +0.13% +0.00% / +0.07% +0.53% +0.72%] index_add_ strided 7 : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.20% +0.00% +0.00% / +0.07% +0.54% +0.61%] index_copy_ strided 7 : Elapsed 0.015 ms (1.480 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.53% +0.59%] index_add_ perm : Elapsed 0.015 ms (1.522 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.47% +0.47%] index_copy_ perm : Elapsed 0.015 ms (1.478 ms / 100) 1.522 -> 1.521 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.53% +0.46%] index_add_ perm_sorted : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.54% +0.47%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.479 ms / 100) 8.520 -> 8.523 ( +0.04%) [ +0.07% +0.23% +0.00% / +0.04% +0.33% +0.33%] index_select const : Elapsed 0.085 ms (8.526 ms / 100) 8.545 -> 8.529 ( -0.19%) [ +0.11% +0.01% +0.00% / -0.19% -0.04% +0.20%] index_select wrap : Elapsed 0.086 ms (8.554 ms / 100) 8.528 -> 8.532 ( +0.05%) [ +0.23% +0.00% +0.28% / +0.05% +0.36% +0.23%] index_select linear : Elapsed 0.085 ms (8.548 ms / 100) 8.519 -> 8.536 ( +0.20%) [ +0.00% +0.16% +0.11% / +0.20% +0.38% +0.38%] index_select reverse : Elapsed 0.085 ms (8.519 ms / 100) 8.521 -> 8.521 ( +0.00%) [ +0.12% +0.00% +0.16% / +0.00% +0.22% +0.35%] index_select skip64 : Elapsed 0.085 ms (8.531 ms / 100) 8.510 -> 8.520 ( +0.12%) [ +0.00% +0.01% +0.14% / +0.12% +0.38% +0.61%] index_select skip256 : Elapsed 0.085 ms (8.510 ms / 100) 8.540 -> 8.523 ( -0.20%) [ +0.00% +0.08% +0.12% / -0.20% +0.20% +0.08%] index_select spread : Elapsed 0.085 ms (8.540 ms / 100) 8.526 -> 8.543 ( +0.20%) [ +0.35% +0.00% +0.16% / +0.20% +0.41% +0.33%] index_select strided 3 : Elapsed 0.086 ms (8.556 ms / 100) 8.541 -> 8.552 ( +0.13%) [ +0.00% +0.21% +0.23% / +0.13% +0.19% +0.23%] index_select random : Elapsed 0.085 ms (8.541 ms / 100) 8.530 -> 8.528 ( -0.02%) [ +0.08% +0.14% +0.00% / -0.02% +0.40% +0.35%] index_select random_sorted : Elapsed 0.085 ms (8.537 ms / 100) out_shape = [20, 4, 5, 40] in_shape = [20, 4, 5, 16] idx_dim = 3 B = [20, 4, 5, 40] (stride (800, 1, 160, 4)) A = [20, 4, 5, 16] (stride (80, 1600, 1, 5)) dim = 3 4.064 -> 4.067 ( +0.07%) [ +0.02% +0.00% +0.00% / +0.07% +0.66% +0.66%] index_add_ linear : Elapsed 0.041 ms (4.065 ms / 100) 3.928 -> 3.930 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.64% +0.64%] index_copy_ linear : Elapsed 0.039 ms (3.928 ms / 100) 4.054 -> 4.055 ( +0.02%) [ +0.02% +0.00% +0.07% / +0.02% +0.64% +0.62%] index_add_ reverse : Elapsed 0.041 ms (4.055 ms / 100) 3.923 -> 3.922 ( -0.03%) [ +0.05% +0.00% +0.10% / -0.03% +0.51% +0.48%] index_copy_ reverse : Elapsed 0.039 ms (3.925 ms / 100) 4.055 -> 4.057 ( +0.05%) [ +0.12% +0.12% +0.00% / +0.05% +0.62% +0.64%] index_add_ spread : Elapsed 0.041 ms (4.060 ms / 100) 3.926 -> 3.924 ( -0.05%) [ +0.05% +0.05% +0.00% / -0.05% +0.51% +0.59%] index_copy_ spread : Elapsed 0.039 ms (3.928 ms / 100) 4.063 -> 4.063 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.54% +0.54%] index_add_ strided 3 : Elapsed 0.041 ms (4.064 ms / 100) 3.924 -> 3.926 ( +0.05%) [ +0.03% +0.00% +0.03% / +0.05% +0.61% +0.59%] index_copy_ strided 3 : Elapsed 0.039 ms (3.925 ms / 100) 4.054 -> 4.054 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.62% +0.59%] index_add_ strided 7 : Elapsed 0.041 ms (4.056 ms / 100) 3.920 -> 3.922 ( +0.05%) [ +0.10% +0.00% +0.10% / +0.05% +0.56% +0.51%] index_copy_ strided 7 : Elapsed 0.039 ms (3.924 ms / 100) 4.070 -> 4.069 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.49% +0.39%] index_add_ perm : Elapsed 0.041 ms (4.071 ms / 100) 3.931 -> 3.932 ( +0.03%) [ +0.00% +0.03% +0.05% / +0.03% +0.51% +0.56%] index_copy_ perm : Elapsed 0.039 ms (3.931 ms / 100) 4.070 -> 4.071 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.42% +0.47%] index_add_ perm_sorted : Elapsed 0.041 ms (4.071 ms / 100) 3.931 -> 3.932 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.48% +0.48%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.931 ms / 100) 5.553 -> 5.560 ( +0.13%) [ +0.29% +0.05% +0.00% / +0.18% +0.32% +0.13%] index_select const : Elapsed 0.056 ms (5.569 ms / 100) 5.558 -> 5.562 ( +0.07%) [ +0.16% +0.16% +0.00% / +0.07% +0.40% +0.32%] index_select wrap : Elapsed 0.056 ms (5.567 ms / 100) 5.565 -> 5.565 ( +0.00%) [ +0.09% +0.11% +0.00% / +0.00% +0.00% +0.14%] index_select linear : Elapsed 0.056 ms (5.570 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.00% +0.23% +0.00% / -0.02% +0.14% +0.14%] index_select reverse : Elapsed 0.056 ms (5.566 ms / 100) 5.561 -> 5.557 ( -0.07%) [ +0.00% +0.20% +0.05% / +0.00% -0.07% +0.05%] index_select skip64 : Elapsed 0.056 ms (5.561 ms / 100) 5.558 -> 5.560 ( +0.04%) [ +0.00% +0.13% +0.00% / +0.07% +0.04% +0.11%] index_select skip256 : Elapsed 0.056 ms (5.558 ms / 100) 5.566 -> 5.564 ( -0.04%) [ +0.00% +0.05% +0.05% / -0.04% -0.02% -0.02%] index_select spread : Elapsed 0.056 ms (5.566 ms / 100) 5.560 -> 5.568 ( +0.14%) [ +0.02% +0.14% +0.00% / +0.23% +0.14% +0.16%] index_select strided 3 : Elapsed 0.056 ms (5.561 ms / 100) 5.564 -> 5.562 ( -0.04%) [ +0.00% +0.04% +0.09% / -0.04% +0.16% +0.05%] index_select strided 5 : Elapsed 0.056 ms (5.564 ms / 100) 5.564 -> 5.566 ( +0.04%) [ +0.00% +0.04% +0.02% / +0.04% +0.13% +0.13%] index_select strided 7 : Elapsed 0.056 ms (5.564 ms / 100) 5.555 -> 5.564 ( +0.16%) [ +0.13% +0.00% +0.05% / +0.16% +0.18% +0.18%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.566 -> 5.567 ( +0.02%) [ +0.00% +0.11% +0.05% / +0.02% +0.02% +0.05%] index_select random : Elapsed 0.056 ms (5.566 ms / 100) 5.564 -> 5.561 ( -0.05%) [ +0.04% +0.11% +0.00% / -0.05% +0.04% +0.13%] index_select random_sorted : Elapsed 0.056 ms (5.566 ms / 100) B = [20, 4, 5, 40] (stride (1, 4000, 800, 20)) A = [20, 4, 5, 16] (stride (320, 1, 64, 4)) dim = 3 3.849 -> 3.850 ( +0.03%) [ +0.05% +0.21% +0.00% / +0.03% +0.75% +0.81%] index_add_ linear : Elapsed 0.039 ms (3.851 ms / 100) 3.704 -> 3.704 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.76% +0.78%] index_copy_ linear : Elapsed 0.037 ms (3.704 ms / 100) 3.831 -> 3.837 ( +0.16%) [ +0.00% +0.08% +0.08% / +0.16% +0.84% +0.57%] index_add_ reverse : Elapsed 0.038 ms (3.831 ms / 100) 3.690 -> 3.697 ( +0.19%) [ +0.19% +0.00% +0.19% / +0.19% +0.79% +0.68%] index_copy_ reverse : Elapsed 0.037 ms (3.697 ms / 100) 3.825 -> 3.829 ( +0.10%) [ +0.13% +0.10% +0.00% / +0.10% +0.84% +0.84%] index_add_ spread : Elapsed 0.038 ms (3.830 ms / 100) 3.679 -> 3.683 ( +0.11%) [ +0.11% +0.14% +0.00% / +0.11% +0.98% +0.92%] index_copy_ spread : Elapsed 0.037 ms (3.683 ms / 100) 3.839 -> 3.838 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.038 ms (3.839 ms / 100) 3.692 -> 3.692 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.037 ms (3.693 ms / 100) 3.830 -> 3.833 ( +0.08%) [ +0.10% +0.00% +0.18% / +0.08% +0.60% +0.63%] index_add_ strided 7 : Elapsed 0.038 ms (3.834 ms / 100) 3.689 -> 3.690 ( +0.03%) [ +0.19% +0.00% +0.19% / +0.03% +0.65% +0.73%] index_copy_ strided 7 : Elapsed 0.037 ms (3.696 ms / 100) 3.850 -> 3.848 ( -0.05%) [ +0.13% +0.00% +0.03% / -0.05% +0.78% +0.83%] index_add_ perm : Elapsed 0.039 ms (3.855 ms / 100) 3.703 -> 3.702 ( -0.03%) [ +0.08% +0.14% +0.00% / -0.03% +0.81% +0.84%] index_copy_ perm : Elapsed 0.037 ms (3.706 ms / 100) 3.834 -> 3.834 ( +0.00%) [ +0.05% +0.08% +0.00% / +0.00% +0.83% +0.81%] index_add_ perm_sorted : Elapsed 0.038 ms (3.836 ms / 100) 3.689 -> 3.690 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.84% +0.87%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.689 ms / 100) 5.470 -> 5.475 ( +0.09%) [ +0.00% +0.16% +0.15% / +0.20% +0.22% +0.09%] index_select const : Elapsed 0.055 ms (5.470 ms / 100) 5.489 -> 5.476 ( -0.24%) [ +0.00% +0.09% +0.07% / +0.04% -0.11% -0.24%] index_select wrap : Elapsed 0.055 ms (5.489 ms / 100) 5.486 -> 5.481 ( -0.09%) [ +0.00% +0.02% +0.04% / +0.04% +0.00% -0.09%] index_select linear : Elapsed 0.055 ms (5.486 ms / 100) 5.485 -> 5.479 ( -0.11%) [ +0.00% +0.04% +0.02% / +0.07% -0.11% +0.02%] index_select reverse : Elapsed 0.055 ms (5.485 ms / 100) 5.477 -> 5.478 ( +0.02%) [ +0.00% +0.04% +0.00% / +0.02% +0.04% +0.11%] index_select skip64 : Elapsed 0.055 ms (5.477 ms / 100) 5.469 -> 5.476 ( +0.13%) [ +0.09% +0.00% +0.00% / +0.22% +0.13% +0.16%] index_select skip256 : Elapsed 0.055 ms (5.474 ms / 100) 5.477 -> 5.478 ( +0.02%) [ +0.20% +0.29% +0.00% / +0.15% +0.02% +0.16%] index_select spread : Elapsed 0.055 ms (5.488 ms / 100) 5.484 -> 5.477 ( -0.13%) [ +0.00% +0.04% +0.05% / +0.04% -0.13% +0.04%] index_select strided 3 : Elapsed 0.055 ms (5.484 ms / 100) 5.483 -> 5.477 ( -0.11%) [ +0.00% +0.09% +0.04% / +0.18% -0.11% -0.11%] index_select strided 5 : Elapsed 0.055 ms (5.483 ms / 100) 5.482 -> 5.480 ( -0.04%) [ +0.15% +0.16% +0.00% / -0.04% +0.05% +0.00%] index_select strided 7 : Elapsed 0.055 ms (5.490 ms / 100) 5.476 -> 5.470 ( -0.11%) [ +0.00% +0.05% +0.04% / -0.11% +0.05% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.476 ms / 100) 5.480 -> 5.481 ( +0.02%) [ +0.11% +0.00% +0.04% / +0.02% +0.04% +0.16%] index_select random : Elapsed 0.055 ms (5.486 ms / 100) 5.486 -> 5.479 ( -0.13%) [ +0.00% +0.04% +0.04% / +0.00% -0.13% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.486 ms / 100) B = [20, 4, 5, 40] (stride (160, 40, 3200, 1)) dim = 3 fill_cnt = 16 1.052 -> 1.053 ( +0.10%) [ +0.38% +0.19% +0.00% / +0.10% +1.43% +1.43%] index_fill_ const : Elapsed 0.011 ms (1.056 ms / 100) 1.052 -> 1.054 ( +0.19%) [ +0.10% +0.10% +0.00% / +0.19% +1.24% +1.52%] index_fill_ linear : Elapsed 0.011 ms (1.053 ms / 100) 1.053 -> 1.053 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +1.52% +1.52%] index_fill_ reverse : Elapsed 0.011 ms (1.054 ms / 100) 1.053 -> 1.054 ( +0.09%) [ +0.19% +0.19% +0.00% / +0.09% +1.33% +1.61%] index_fill_ skip64 : Elapsed 0.011 ms (1.055 ms / 100) 1.052 -> 1.052 ( +0.00%) [ +0.10% +0.19% +0.00% / +0.00% +1.43% +1.52%] index_fill_ skip256 : Elapsed 0.011 ms (1.053 ms / 100) 1.052 -> 1.053 ( +0.10%) [ +0.19% +0.29% +0.00% / +0.10% +1.71% +1.62%] index_fill_ spread : Elapsed 0.011 ms (1.054 ms / 100) 1.053 -> 1.054 ( +0.09%) [ +0.19% +0.00% +0.09% / +0.09% +1.52% +1.52%] index_fill_ strided 3 : Elapsed 0.011 ms (1.055 ms / 100) 1.053 -> 1.053 ( +0.00%) [ +0.19% +0.09% +0.00% / +0.00% +1.52% +1.61%] index_fill_ strided 5 : Elapsed 0.011 ms (1.055 ms / 100) 1.053 -> 1.054 ( +0.09%) [ +0.09% +0.28% +0.00% / +0.09% +1.14% +1.14%] index_fill_ strided 7 : Elapsed 0.011 ms (1.054 ms / 100) 1.054 -> 1.054 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +1.04% +1.14%] index_fill_ strided 8 : Elapsed 0.011 ms (1.054 ms / 100) 1.054 -> 1.055 ( +0.09%) [ +0.00% +0.09% +0.09% / +0.09% +1.14% +1.14%] index_fill_ strided 16 : Elapsed 0.011 ms (1.054 ms / 100) 1.054 -> 1.055 ( +0.09%) [ +0.09% +0.19% +0.00% / +0.09% +1.14% +1.14%] index_fill_ random : Elapsed 0.011 ms (1.055 ms / 100) 1.055 -> 1.055 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.95% +0.95%] index_fill_ random_sorted : Elapsed 0.011 ms (1.056 ms / 100) 1.054 -> 1.056 ( +0.19%) [ +0.28% +0.28% +0.00% / +0.19% +0.95% +1.04%] index_fill_ perm : Elapsed 0.011 ms (1.057 ms / 100) 1.054 -> 1.055 ( +0.09%) [ +0.19% +0.00% +0.19% / +0.09% +1.23% +1.23%] index_fill_ perm_sorted : Elapsed 0.011 ms (1.056 ms / 100) B = [20, 4, 5, 40] (stride (40, 800, 3200, 1)) A = [20, 4, 5, 16] (stride (80, 1600, 1, 5)) dim = 3 4.064 -> 4.064 ( +0.00%) [ +0.07% +0.05% +0.00% / +0.00% +0.64% +0.66%] index_add_ linear : Elapsed 0.041 ms (4.067 ms / 100) 3.929 -> 3.931 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.69% +0.69%] index_copy_ linear : Elapsed 0.039 ms (3.929 ms / 100) 4.056 -> 4.054 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.57% +0.54%] index_add_ reverse : Elapsed 0.041 ms (4.056 ms / 100) 3.925 -> 3.930 ( +0.13%) [ +0.05% +0.00% +0.13% / +0.13% +0.46% +0.61%] index_copy_ reverse : Elapsed 0.039 ms (3.927 ms / 100) 4.057 -> 4.062 ( +0.12%) [ +0.15% +0.00% +0.07% / +0.12% +0.54% +0.47%] index_add_ spread : Elapsed 0.041 ms (4.063 ms / 100) 3.929 -> 3.932 ( +0.08%) [ +0.03% +0.00% +0.05% / +0.08% +0.53% +0.46%] index_copy_ spread : Elapsed 0.039 ms (3.930 ms / 100) 4.063 -> 4.063 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.00% +0.54% +0.54%] index_add_ strided 3 : Elapsed 0.041 ms (4.064 ms / 100) 3.926 -> 3.927 ( +0.03%) [ +0.10% +0.05% +0.00% / +0.03% +0.59% +0.64%] index_copy_ strided 3 : Elapsed 0.039 ms (3.930 ms / 100) 4.055 -> 4.053 ( -0.05%) [ +0.00% +0.00% +0.02% / -0.05% +0.62% +0.64%] index_add_ strided 7 : Elapsed 0.041 ms (4.055 ms / 100) 3.928 -> 3.929 ( +0.03%) [ +0.03% +0.08% +0.00% / +0.03% +0.53% +0.53%] index_copy_ strided 7 : Elapsed 0.039 ms (3.929 ms / 100) 4.068 -> 4.068 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.47% +0.47%] index_add_ perm : Elapsed 0.041 ms (4.070 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.46% +0.51%] index_copy_ perm : Elapsed 0.039 ms (3.935 ms / 100) 4.069 -> 4.069 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.42% +0.42%] index_add_ perm_sorted : Elapsed 0.041 ms (4.069 ms / 100) 3.935 -> 3.934 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.48% +0.51%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.935 ms / 100) 5.557 -> 5.554 ( -0.05%) [ +0.16% +0.11% +0.00% / -0.05% +0.13% +0.18%] index_select const : Elapsed 0.056 ms (5.566 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.18% +0.14% +0.00% / +0.02% +0.13% +0.23%] index_select wrap : Elapsed 0.056 ms (5.574 ms / 100) 5.570 -> 5.567 ( -0.05%) [ +0.00% +0.04% +0.07% / -0.05% +0.04% +0.16%] index_select linear : Elapsed 0.056 ms (5.570 ms / 100) 5.564 -> 5.572 ( +0.14%) [ +0.00% +0.27% +0.20% / +0.18% +0.14% +0.22%] index_select reverse : Elapsed 0.056 ms (5.564 ms / 100) 5.566 -> 5.563 ( -0.05%) [ +0.00% +0.02% +0.05% / +0.07% -0.05% -0.05%] index_select skip64 : Elapsed 0.056 ms (5.566 ms / 100) 5.568 -> 5.563 ( -0.09%) [ +0.09% +0.00% +0.09% / -0.04% -0.09% +0.00%] index_select skip256 : Elapsed 0.056 ms (5.573 ms / 100) 5.565 -> 5.568 ( +0.05%) [ +0.11% +0.00% +0.09% / +0.07% +0.14% +0.05%] index_select spread : Elapsed 0.056 ms (5.571 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.11% +0.18% +0.00% / +0.02% +0.11% +0.11%] index_select strided 3 : Elapsed 0.056 ms (5.571 ms / 100) 5.568 -> 5.568 ( +0.00%) [ +0.00% +0.13% +0.09% / +0.00% +0.04% +0.05%] index_select strided 5 : Elapsed 0.056 ms (5.568 ms / 100) 5.567 -> 5.566 ( -0.02%) [ +0.00% +0.05% +0.13% / -0.02% +0.20% +0.16%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.556 -> 5.562 ( +0.11%) [ +0.09% +0.00% +0.14% / +0.13% +0.11% +0.18%] index_select strided 8 : Elapsed 0.056 ms (5.561 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.00% +0.04% +0.02% / +0.14% +0.07% +0.20%] index_select random : Elapsed 0.056 ms (5.565 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.09% +0.05% +0.00% / +0.07% +0.27% +0.13%] index_select random_sorted : Elapsed 0.056 ms (5.570 ms / 100) B = [20, 4, 5, 40] (stride (40, 800, 3200, 1)) A = [20, 4, 5, 16] (stride (1, 20, 1280, 80)) dim = 3 4.197 -> 4.198 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.67% +0.69%] index_add_ linear : Elapsed 0.042 ms (4.199 ms / 100) 4.064 -> 4.065 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.69% +0.71%] index_copy_ linear : Elapsed 0.041 ms (4.065 ms / 100) 4.178 -> 4.183 ( +0.12%) [ +0.05% +0.00% +0.02% / +0.12% +0.65% +0.65%] index_add_ reverse : Elapsed 0.042 ms (4.180 ms / 100) 4.050 -> 4.057 ( +0.17%) [ +0.00% +0.07% +0.05% / +0.17% +0.64% +0.72%] index_copy_ reverse : Elapsed 0.041 ms (4.050 ms / 100) 4.197 -> 4.198 ( +0.02%) [ +0.10% +0.00% +0.00% / +0.02% +0.67% +0.62%] index_add_ spread : Elapsed 0.042 ms (4.201 ms / 100) 4.064 -> 4.066 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.62% +0.64%] index_copy_ spread : Elapsed 0.041 ms (4.065 ms / 100) 4.194 -> 4.196 ( +0.05%) [ +0.02% +0.02% +0.00% / +0.05% +0.67% +0.69%] index_add_ strided 3 : Elapsed 0.042 ms (4.195 ms / 100) 4.043 -> 4.044 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.64% +0.64%] index_copy_ strided 3 : Elapsed 0.040 ms (4.043 ms / 100) 4.176 -> 4.177 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.72% +0.72%] index_add_ strided 7 : Elapsed 0.042 ms (4.178 ms / 100) 4.051 -> 4.052 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.72% +0.67%] index_copy_ strided 7 : Elapsed 0.041 ms (4.051 ms / 100) 4.196 -> 4.197 ( +0.02%) [ +0.10% +0.02% +0.00% / +0.02% +0.76% +0.81%] index_add_ perm : Elapsed 0.042 ms (4.200 ms / 100) 4.062 -> 4.063 ( +0.02%) [ +0.02% +0.07% +0.00% / +0.02% +0.76% +0.84%] index_copy_ perm : Elapsed 0.041 ms (4.063 ms / 100) 4.191 -> 4.192 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.86% +0.84%] index_add_ perm_sorted : Elapsed 0.042 ms (4.191 ms / 100) 4.039 -> 4.040 ( +0.02%) [ +0.00% +0.05% +0.05% / +0.02% +0.82% +0.84%] index_copy_ perm_sorted : Elapsed 0.040 ms (4.039 ms / 100) 5.561 -> 5.566 ( +0.09%) [ +0.05% +0.16% +0.00% / +0.09% +0.14% +0.14%] index_select const : Elapsed 0.056 ms (5.564 ms / 100) 5.580 -> 5.567 ( -0.23%) [ +0.02% +0.00% +0.02% / -0.05% -0.23% -0.13%] index_select wrap : Elapsed 0.056 ms (5.581 ms / 100) 5.578 -> 5.577 ( -0.02%) [ +0.05% +0.13% +0.00% / +0.00% -0.02% +0.00%] index_select linear : Elapsed 0.056 ms (5.581 ms / 100) 5.582 -> 5.576 ( -0.11%) [ +0.00% +0.04% +0.02% / -0.09% -0.11% -0.09%] index_select reverse : Elapsed 0.056 ms (5.582 ms / 100) 5.569 -> 5.561 ( -0.14%) [ +0.04% +0.00% +0.00% / -0.14% -0.09% +0.07%] index_select skip64 : Elapsed 0.056 ms (5.571 ms / 100) 5.556 -> 5.569 ( +0.23%) [ +0.02% +0.00% +0.29% / +0.23% +0.32% +0.23%] index_select skip256 : Elapsed 0.056 ms (5.557 ms / 100) 5.574 -> 5.573 ( -0.02%) [ +0.00% +0.04% +0.04% / +0.05% +0.07% -0.02%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.577 -> 5.571 ( -0.11%) [ +0.00% +0.02% +0.04% / -0.11% -0.11% -0.02%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.572 -> 5.568 ( -0.07%) [ +0.00% +0.04% +0.13% / +0.04% -0.07% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.572 ms / 100) 5.570 -> 5.576 ( +0.11%) [ +0.05% +0.13% +0.00% / +0.11% +0.13% +0.18%] index_select strided 7 : Elapsed 0.056 ms (5.573 ms / 100) 5.561 -> 5.560 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.14% +0.04%] index_select strided 8 : Elapsed 0.056 ms (5.562 ms / 100) 5.574 -> 5.572 ( -0.04%) [ +0.07% +0.00% +0.04% / +0.00% +0.04% -0.04%] index_select random : Elapsed 0.056 ms (5.578 ms / 100) 5.575 -> 5.573 ( -0.04%) [ +0.04% +0.00% +0.07% / +0.07% -0.04% -0.04%] index_select random_sorted : Elapsed 0.056 ms (5.577 ms / 100) out_shape = [40, 4, 16, 5] in_shape = [20, 4, 16, 5] idx_dim = 0 B = [40, 4, 16, 5] (stride (320, 80, 1, 16)) A = [20, 4, 16, 5] (stride (64, 16, 1, 1280)) dim = 0 2.412 -> 2.420 ( +0.33%) [ +0.00% +0.00% +0.08% / +0.33% +0.66% +1.00%] index_add_ linear : Elapsed 0.024 ms (2.412 ms / 100) 2.408 -> 2.420 ( +0.50%) [ +0.17% +0.21% +0.00% / +0.50% +0.79% +0.79%] index_copy_ linear : Elapsed 0.024 ms (2.412 ms / 100) 2.412 -> 2.422 ( +0.41%) [ +0.04% +0.17% +0.00% / +0.41% +0.62% +0.62%] index_add_ reverse : Elapsed 0.024 ms (2.413 ms / 100) 2.411 -> 2.418 ( +0.29%) [ +0.04% +0.00% +0.12% / +0.29% +0.58% +0.62%] index_copy_ reverse : Elapsed 0.024 ms (2.412 ms / 100) 2.414 -> 2.425 ( +0.46%) [ +0.00% +0.21% +0.04% / +0.46% +0.58% +0.58%] index_add_ spread : Elapsed 0.024 ms (2.414 ms / 100) 2.413 -> 2.425 ( +0.50%) [ +0.00% +0.04% +0.17% / +0.50% +0.54% +0.54%] index_copy_ spread : Elapsed 0.024 ms (2.413 ms / 100) 2.412 -> 2.423 ( +0.46%) [ +0.08% +0.17% +0.00% / +0.58% +0.46% +0.46%] index_add_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.409 -> 2.419 ( +0.42%) [ +0.08% +0.17% +0.00% / +0.46% +0.42% +0.66%] index_copy_ strided 3 : Elapsed 0.024 ms (2.411 ms / 100) 2.414 -> 2.426 ( +0.50%) [ +0.00% +0.12% +0.21% / +0.50% +0.58% +0.54%] index_add_ strided 7 : Elapsed 0.024 ms (2.414 ms / 100) 2.411 -> 2.424 ( +0.54%) [ +0.00% +0.25% +0.25% / +0.54% +0.66% +0.54%] index_copy_ strided 7 : Elapsed 0.024 ms (2.411 ms / 100) 2.414 -> 2.427 ( +0.54%) [ +0.00% +0.21% +0.08% / +0.54% +0.58% +0.58%] index_add_ perm : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.422 ( +0.50%) [ +0.08% +0.00% +0.04% / +0.50% +0.66% +0.75%] index_copy_ perm : Elapsed 0.024 ms (2.412 ms / 100) 2.409 -> 2.423 ( +0.58%) [ +0.00% +0.17% +0.12% / +0.58% +0.87% +0.87%] index_add_ perm_sorted : Elapsed 0.024 ms (2.409 ms / 100) 2.409 -> 2.421 ( +0.50%) [ +0.12% +0.00% +0.00% / +0.50% +0.62% +1.00%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.412 ms / 100) 4.429 -> 4.425 ( -0.09%) [ +0.09% +0.00% +0.25% / -0.02% +0.07% -0.09%] index_select const : Elapsed 0.044 ms (4.433 ms / 100) 4.441 -> 4.447 ( +0.14%) [ +0.00% +0.02% +0.07% / +0.14% +0.18% +0.23%] index_select wrap : Elapsed 0.044 ms (4.441 ms / 100) 4.443 -> 4.445 ( +0.05%) [ +0.00% +0.11% +0.14% / +0.05% +0.18% +0.09%] index_select linear : Elapsed 0.044 ms (4.443 ms / 100) 4.445 -> 4.444 ( -0.02%) [ +0.16% +0.11% +0.00% / -0.02% +0.16% +0.16%] index_select reverse : Elapsed 0.045 ms (4.452 ms / 100) 4.433 -> 4.429 ( -0.09%) [ +0.02% +0.00% +0.02% / -0.05% +0.16% -0.09%] index_select skip64 : Elapsed 0.044 ms (4.434 ms / 100) 4.428 -> 4.431 ( +0.07%) [ +0.16% +0.00% +0.18% / +0.07% +0.07% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.435 ms / 100) 4.444 -> 4.441 ( -0.07%) [ +0.16% +0.00% +0.02% / -0.07% +0.16% +0.00%] index_select spread : Elapsed 0.045 ms (4.451 ms / 100) 4.441 -> 4.449 ( +0.18%) [ +0.11% +0.11% +0.00% / +0.18% +0.20% +0.25%] index_select strided 3 : Elapsed 0.044 ms (4.446 ms / 100) 4.431 -> 4.436 ( +0.11%) [ +0.09% +0.00% +0.20% / +0.11% +0.27% +0.23%] index_select strided 5 : Elapsed 0.044 ms (4.435 ms / 100) 4.443 -> 4.442 ( -0.02%) [ +0.00% +0.14% +0.02% / +0.05% -0.02% +0.41%] index_select strided 7 : Elapsed 0.044 ms (4.443 ms / 100) 4.437 -> 4.436 ( -0.02%) [ +0.05% +0.00% +0.02% / +0.11% -0.02% +0.16%] index_select strided 8 : Elapsed 0.044 ms (4.439 ms / 100) 4.431 -> 4.432 ( +0.02%) [ +0.23% +0.00% +0.07% / +0.02% +0.09% +0.29%] index_select strided 16 : Elapsed 0.044 ms (4.441 ms / 100) 4.445 -> 4.440 ( -0.11%) [ +0.07% +0.00% +0.04% / -0.11% +0.09% +0.11%] index_select random : Elapsed 0.044 ms (4.448 ms / 100) 4.443 -> 4.447 ( +0.09%) [ +0.07% +0.14% +0.00% / +0.09% +0.14% +0.18%] index_select random_sorted : Elapsed 0.044 ms (4.446 ms / 100) B = [40, 4, 16, 5] (stride (320, 16, 1, 64)) A = [20, 4, 16, 5] (stride (320, 5, 20, 1)) dim = 0 2.388 -> 2.399 ( +0.46%) [ +0.04% +0.04% +0.00% / +0.46% +0.92% +0.96%] index_add_ linear : Elapsed 0.024 ms (2.389 ms / 100) 2.383 -> 2.399 ( +0.67%) [ +0.00% +0.46% +0.21% / +0.67% +1.13% +1.22%] index_copy_ linear : Elapsed 0.024 ms (2.383 ms / 100) 2.381 -> 2.396 ( +0.63%) [ +0.17% +0.21% +0.00% / +0.63% +1.30% +1.47%] index_add_ reverse : Elapsed 0.024 ms (2.385 ms / 100) 2.379 -> 2.397 ( +0.76%) [ +0.00% +0.13% +0.25% / +0.76% +1.26% +1.47%] index_copy_ reverse : Elapsed 0.024 ms (2.379 ms / 100) 2.384 -> 2.401 ( +0.71%) [ +0.17% +0.00% +0.08% / +0.71% +1.09% +0.96%] index_add_ spread : Elapsed 0.024 ms (2.388 ms / 100) 2.386 -> 2.401 ( +0.63%) [ +0.04% +0.00% +0.04% / +0.63% +1.05% +0.96%] index_copy_ spread : Elapsed 0.024 ms (2.387 ms / 100) 2.394 -> 2.406 ( +0.50%) [ +0.17% +0.13% +0.00% / +0.50% +0.58% +0.63%] index_add_ strided 3 : Elapsed 0.024 ms (2.398 ms / 100) 2.391 -> 2.405 ( +0.59%) [ +0.00% +0.17% +0.21% / +0.59% +0.79% +0.59%] index_copy_ strided 3 : Elapsed 0.024 ms (2.391 ms / 100) 2.394 -> 2.407 ( +0.54%) [ +0.08% +0.08% +0.00% / +0.54% +0.54% +0.63%] index_add_ strided 7 : Elapsed 0.024 ms (2.396 ms / 100) 2.391 -> 2.401 ( +0.42%) [ +0.04% +0.08% +0.00% / +0.42% +0.71% +0.84%] index_copy_ strided 7 : Elapsed 0.024 ms (2.392 ms / 100) 2.397 -> 2.403 ( +0.25%) [ +0.00% +0.13% +0.04% / +0.54% +0.25% +0.38%] index_add_ perm : Elapsed 0.024 ms (2.397 ms / 100) 2.394 -> 2.399 ( +0.21%) [ +0.04% +0.13% +0.00% / +0.54% +0.21% +0.42%] index_copy_ perm : Elapsed 0.024 ms (2.395 ms / 100) 2.393 -> 2.398 ( +0.21%) [ +0.13% +0.00% +0.17% / +0.79% +0.29% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.396 ms / 100) 2.394 -> 2.403 ( +0.38%) [ +0.04% +0.00% +0.00% / +0.54% +0.38% +0.54%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.395 ms / 100) 4.413 -> 4.412 ( -0.02%) [ +0.23% +0.14% +0.00% / -0.02% +0.05% +0.07%] index_select const : Elapsed 0.044 ms (4.423 ms / 100) 4.429 -> 4.421 ( -0.18%) [ +0.09% +0.00% +0.07% / -0.09% -0.18% -0.11%] index_select wrap : Elapsed 0.044 ms (4.433 ms / 100) 4.422 -> 4.425 ( +0.07%) [ +0.14% +0.00% +0.20% / +0.16% +0.07% +0.32%] index_select linear : Elapsed 0.044 ms (4.428 ms / 100) 4.425 -> 4.424 ( -0.02%) [ +0.18% +0.18% +0.00% / +0.05% +0.00% -0.02%] index_select reverse : Elapsed 0.044 ms (4.433 ms / 100) 4.419 -> 4.410 ( -0.20%) [ +0.00% +0.18% +0.07% / -0.07% -0.20% -0.14%] index_select skip64 : Elapsed 0.044 ms (4.419 ms / 100) 4.411 -> 4.412 ( +0.02%) [ +0.34% +0.00% +0.09% / +0.02% +0.07% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.426 ms / 100) 4.425 -> 4.430 ( +0.11%) [ +0.00% +0.25% +0.09% / +0.11% +0.14% +0.20%] index_select spread : Elapsed 0.044 ms (4.425 ms / 100) 4.427 -> 4.427 ( +0.00%) [ +0.00% +0.07% +0.05% / +0.14% +0.00% +0.16%] index_select strided 3 : Elapsed 0.044 ms (4.427 ms / 100) 4.412 -> 4.415 ( +0.07%) [ +0.07% +0.09% +0.00% / +0.09% +0.16% +0.07%] index_select strided 5 : Elapsed 0.044 ms (4.415 ms / 100) 4.422 -> 4.428 ( +0.14%) [ +0.34% +0.00% +0.11% / +0.14% +0.20% +0.16%] index_select strided 7 : Elapsed 0.044 ms (4.437 ms / 100) 4.419 -> 4.410 ( -0.20%) [ +0.14% +0.00% +0.07% / +0.05% +0.07% -0.20%] index_select strided 8 : Elapsed 0.044 ms (4.425 ms / 100) 4.419 -> 4.413 ( -0.14%) [ +0.16% +0.05% +0.00% / +0.09% -0.14% +0.00%] index_select strided 16 : Elapsed 0.044 ms (4.426 ms / 100) 4.426 -> 4.423 ( -0.07%) [ +0.05% +0.09% +0.00% / +0.11% -0.07% +0.14%] index_select random : Elapsed 0.044 ms (4.428 ms / 100) 4.424 -> 4.419 ( -0.11%) [ +0.23% +0.00% +0.14% / +0.07% +0.18% -0.11%] index_select random_sorted : Elapsed 0.044 ms (4.434 ms / 100) B = [40, 4, 16, 5] (stride (80, 3200, 1, 16)) A = [20, 4, 16, 5] (stride (1, 320, 20, 1280)) dim = 0 2.405 -> 2.417 ( +0.50%) [ +0.21% +0.21% +0.00% / +0.50% +0.54% +0.62%] index_add_ linear : Elapsed 0.024 ms (2.410 ms / 100) 2.403 -> 2.412 ( +0.37%) [ +0.00% +0.04% +0.25% / +0.37% +0.50% +0.46%] index_copy_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.409 -> 2.415 ( +0.25%) [ +0.00% +0.00% +0.17% / +0.37% +0.25% +0.25%] index_add_ reverse : Elapsed 0.024 ms (2.409 ms / 100) 2.403 -> 2.412 ( +0.37%) [ +0.04% +0.25% +0.00% / +0.58% +0.54% +0.37%] index_copy_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.405 -> 2.416 ( +0.46%) [ +0.00% +0.17% +0.04% / +0.50% +0.46% +0.50%] index_add_ spread : Elapsed 0.024 ms (2.405 ms / 100) 2.404 -> 2.414 ( +0.42%) [ +0.04% +0.08% +0.00% / +0.42% +0.62% +0.50%] index_copy_ spread : Elapsed 0.024 ms (2.405 ms / 100) 2.405 -> 2.415 ( +0.42%) [ +0.04% +0.04% +0.00% / +0.46% +0.42% +0.58%] index_add_ strided 3 : Elapsed 0.024 ms (2.406 ms / 100) 2.402 -> 2.416 ( +0.58%) [ +0.12% +0.00% +0.21% / +0.58% +0.58% +0.75%] index_copy_ strided 3 : Elapsed 0.024 ms (2.405 ms / 100) 2.401 -> 2.412 ( +0.46%) [ +0.25% +0.25% +0.00% / +0.46% +0.83% +0.75%] index_add_ strided 7 : Elapsed 0.024 ms (2.407 ms / 100) 2.400 -> 2.412 ( +0.50%) [ +0.33% +0.00% +0.29% / +0.50% +0.54% +0.54%] index_copy_ strided 7 : Elapsed 0.024 ms (2.408 ms / 100) 2.405 -> 2.416 ( +0.46%) [ +0.00% +0.08% +0.04% / +0.50% +0.46% +0.50%] index_add_ perm : Elapsed 0.024 ms (2.405 ms / 100) 2.403 -> 2.411 ( +0.33%) [ +0.00% +0.08% +0.17% / +0.33% +0.67% +0.58%] index_copy_ perm : Elapsed 0.024 ms (2.403 ms / 100) 2.404 -> 2.419 ( +0.62%) [ +0.12% +0.08% +0.00% / +0.62% +0.75% +0.67%] index_add_ perm_sorted : Elapsed 0.024 ms (2.407 ms / 100) 2.403 -> 2.412 ( +0.37%) [ +0.00% +0.08% +0.00% / +0.37% +0.62% +0.58%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.403 ms / 100) 4.430 -> 4.432 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.23% +0.05% +0.20%] index_select const : Elapsed 0.044 ms (4.430 ms / 100) 4.433 -> 4.437 ( +0.09%) [ +0.14% +0.00% +0.16% / +0.09% +0.20% +0.16%] index_select wrap : Elapsed 0.044 ms (4.439 ms / 100) 4.443 -> 4.433 ( -0.23%) [ +0.14% +0.00% +0.02% / -0.23% -0.02% -0.05%] index_select linear : Elapsed 0.044 ms (4.449 ms / 100) 4.436 -> 4.440 ( +0.09%) [ +0.11% +0.14% +0.00% / +0.09% +0.20% +0.14%] index_select reverse : Elapsed 0.044 ms (4.441 ms / 100) 4.436 -> 4.431 ( -0.11%) [ +0.05% +0.00% +0.00% / -0.11% -0.05% -0.02%] index_select skip64 : Elapsed 0.044 ms (4.438 ms / 100) 4.429 -> 4.434 ( +0.11%) [ +0.00% +0.07% +0.00% / +0.11% +0.23% +0.18%] index_select skip256 : Elapsed 0.044 ms (4.429 ms / 100) 4.440 -> 4.439 ( -0.02%) [ +0.02% +0.07% +0.00% / +0.07% +0.14% -0.02%] index_select spread : Elapsed 0.044 ms (4.441 ms / 100) 4.438 -> 4.439 ( +0.02%) [ +0.00% +0.09% +0.02% / +0.02% +0.07% +0.02%] index_select strided 3 : Elapsed 0.044 ms (4.438 ms / 100) 4.437 -> 4.438 ( +0.02%) [ +0.00% +0.09% +0.02% / +0.02% +0.16% +0.14%] index_select strided 5 : Elapsed 0.044 ms (4.437 ms / 100) 4.436 -> 4.437 ( +0.02%) [ +0.18% +0.05% +0.00% / +0.02% +0.07% +0.18%] index_select strided 7 : Elapsed 0.044 ms (4.444 ms / 100) 4.435 -> 4.438 ( +0.07%) [ +0.07% +0.00% +0.14% / +0.07% +0.20% +0.34%] index_select strided 8 : Elapsed 0.044 ms (4.438 ms / 100) 4.438 -> 4.439 ( +0.02%) [ +0.02% +0.23% +0.00% / +0.02% +0.02% +0.07%] index_select strided 16 : Elapsed 0.044 ms (4.439 ms / 100) 4.441 -> 4.438 ( -0.07%) [ +0.00% +0.11% +0.00% / +0.07% -0.07% +0.02%] index_select random : Elapsed 0.044 ms (4.441 ms / 100) 4.437 -> 4.439 ( +0.05%) [ +0.00% +0.09% +0.02% / +0.05% +0.18% +0.09%] index_select random_sorted : Elapsed 0.044 ms (4.437 ms / 100) B = [40, 4, 16, 5] (stride (1, 3200, 200, 40)) A = [20, 4, 16, 5] (stride (1, 100, 400, 20)) dim = 0 2.337 -> 2.351 ( +0.60%) [ +0.00% +0.26% +0.13% / +0.60% +0.86% +0.86%] index_add_ linear : Elapsed 0.023 ms (2.337 ms / 100) 2.347 -> 2.359 ( +0.51%) [ +0.00% +0.04% +0.09% / +0.51% +0.68% +0.85%] index_copy_ linear : Elapsed 0.023 ms (2.347 ms / 100) 2.334 -> 2.350 ( +0.69%) [ +0.00% +0.26% +0.17% / +0.69% +0.99% +1.07%] index_add_ reverse : Elapsed 0.023 ms (2.334 ms / 100) 2.341 -> 2.353 ( +0.51%) [ +0.43% +0.17% +0.00% / +0.51% +1.20% +1.15%] index_copy_ reverse : Elapsed 0.024 ms (2.351 ms / 100) 2.347 -> 2.357 ( +0.43%) [ +0.13% +0.00% +0.04% / +0.43% +0.81% +0.77%] index_add_ spread : Elapsed 0.024 ms (2.350 ms / 100) 2.360 -> 2.375 ( +0.64%) [ +0.00% +0.00% +0.13% / +0.64% +1.06% +1.06%] index_copy_ spread : Elapsed 0.024 ms (2.360 ms / 100) 2.355 -> 2.366 ( +0.47%) [ +0.00% +0.17% +0.04% / +0.47% +0.55% +0.68%] index_add_ strided 3 : Elapsed 0.024 ms (2.355 ms / 100) 2.366 -> 2.378 ( +0.51%) [ +0.00% +0.38% +0.13% / +0.51% +0.59% +0.80%] index_copy_ strided 3 : Elapsed 0.024 ms (2.366 ms / 100) 2.356 -> 2.365 ( +0.38%) [ +0.13% +0.00% +0.21% / +0.38% +0.55% +0.51%] index_add_ strided 7 : Elapsed 0.024 ms (2.359 ms / 100) 2.369 -> 2.379 ( +0.42%) [ +0.00% +0.08% +0.04% / +0.42% +0.46% +0.51%] index_copy_ strided 7 : Elapsed 0.024 ms (2.369 ms / 100) 2.356 -> 2.363 ( +0.30%) [ +0.17% +0.17% +0.00% / +0.51% +0.30% +0.34%] index_add_ perm : Elapsed 0.024 ms (2.360 ms / 100) 2.367 -> 2.376 ( +0.38%) [ +0.08% +0.00% +0.55% / +0.51% +0.46% +0.38%] index_copy_ perm : Elapsed 0.024 ms (2.369 ms / 100) 2.355 -> 2.362 ( +0.30%) [ +0.08% +0.21% +0.00% / +0.47% +0.30% +0.42%] index_add_ perm_sorted : Elapsed 0.024 ms (2.357 ms / 100) 2.367 -> 2.373 ( +0.25%) [ +0.00% +0.17% +0.21% / +0.55% +0.25% +0.38%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.367 ms / 100) 4.345 -> 4.350 ( +0.12%) [ +0.18% +0.25% +0.00% / +0.25% +0.12% +0.21%] index_select const : Elapsed 0.044 ms (4.353 ms / 100) 4.353 -> 4.352 ( -0.02%) [ +0.09% +0.05% +0.00% / +0.21% -0.02% +0.05%] index_select wrap : Elapsed 0.044 ms (4.357 ms / 100) 4.354 -> 4.359 ( +0.11%) [ +0.16% +0.00% +0.14% / +0.18% +0.11% +0.21%] index_select linear : Elapsed 0.044 ms (4.361 ms / 100) 4.359 -> 4.353 ( -0.14%) [ +0.02% +0.00% +0.07% / -0.07% -0.14% -0.07%] index_select reverse : Elapsed 0.044 ms (4.360 ms / 100) 4.348 -> 4.342 ( -0.14%) [ +0.00% +0.18% +0.16% / +0.09% +0.02% -0.14%] index_select skip64 : Elapsed 0.043 ms (4.348 ms / 100) 4.345 -> 4.345 ( +0.00%) [ +0.21% +0.00% +0.09% / +0.00% +0.16% +0.16%] index_select skip256 : Elapsed 0.044 ms (4.354 ms / 100) 4.355 -> 4.356 ( +0.02%) [ +0.05% +0.05% +0.00% / +0.02% +0.23% +0.14%] index_select spread : Elapsed 0.044 ms (4.357 ms / 100) 4.352 -> 4.349 ( -0.07%) [ +0.11% +0.09% +0.00% / -0.07% +0.23% +0.05%] index_select strided 3 : Elapsed 0.044 ms (4.357 ms / 100) 4.354 -> 4.356 ( +0.05%) [ +0.00% +0.07% +0.00% / +0.14% +0.21% +0.05%] index_select strided 5 : Elapsed 0.044 ms (4.354 ms / 100) 4.355 -> 4.356 ( +0.02%) [ +0.11% +0.07% +0.00% / +0.02% +0.21% +0.05%] index_select strided 7 : Elapsed 0.044 ms (4.360 ms / 100) 4.358 -> 4.352 ( -0.14%) [ +0.11% +0.05% +0.00% / +0.00% +0.05% -0.14%] index_select strided 8 : Elapsed 0.044 ms (4.363 ms / 100) 4.353 -> 4.354 ( +0.02%) [ +0.00% +0.00% +0.11% / +0.11% +0.21% +0.02%] index_select strided 16 : Elapsed 0.044 ms (4.353 ms / 100) 4.357 -> 4.354 ( -0.07%) [ +0.00% +0.05% +0.21% / -0.05% -0.07% +0.00%] index_select random : Elapsed 0.044 ms (4.357 ms / 100) 4.360 -> 4.355 ( -0.11%) [ +0.00% +0.00% +0.18% / +0.02% -0.11% +0.05%] index_select random_sorted : Elapsed 0.044 ms (4.360 ms / 100) B = [40, 4, 16, 5] (stride (20, 1, 800, 4)) A = [20, 4, 16, 5] (stride (320, 16, 1, 64)) dim = 0 2.410 -> 2.422 ( +0.50%) [ +0.00% +0.00% +0.08% / +0.50% +0.54% +0.58%] index_add_ linear : Elapsed 0.024 ms (2.410 ms / 100) 2.405 -> 2.418 ( +0.54%) [ +0.00% +0.08% +0.00% / +0.54% +0.58% +0.67%] index_copy_ linear : Elapsed 0.024 ms (2.405 ms / 100) 2.412 -> 2.420 ( +0.33%) [ +0.00% +0.12% +0.04% / +0.54% +0.54% +0.33%] index_add_ reverse : Elapsed 0.024 ms (2.412 ms / 100) 2.409 -> 2.415 ( +0.25%) [ +0.04% +0.04% +0.00% / +0.54% +0.46% +0.25%] index_copy_ reverse : Elapsed 0.024 ms (2.410 ms / 100) 2.421 -> 2.427 ( +0.25%) [ +0.04% +0.04% +0.00% / +0.33% +0.25% +0.45%] index_add_ spread : Elapsed 0.024 ms (2.422 ms / 100) 2.420 -> 2.433 ( +0.54%) [ +0.00% +0.12% +0.17% / +0.70% +0.54% +0.54%] index_copy_ spread : Elapsed 0.024 ms (2.420 ms / 100) 2.414 -> 2.424 ( +0.41%) [ +0.04% +0.12% +0.00% / +0.75% +0.41% +0.41%] index_add_ strided 3 : Elapsed 0.024 ms (2.415 ms / 100) 2.416 -> 2.427 ( +0.46%) [ +0.21% +0.00% +0.08% / +0.50% +0.50% +0.46%] index_copy_ strided 3 : Elapsed 0.024 ms (2.421 ms / 100) 2.417 -> 2.427 ( +0.41%) [ +0.00% +0.04% +0.00% / +0.41% +0.41% +0.41%] index_add_ strided 7 : Elapsed 0.024 ms (2.417 ms / 100) 2.414 -> 2.426 ( +0.50%) [ +0.21% +0.00% +0.12% / +0.58% +0.50% +0.66%] index_copy_ strided 7 : Elapsed 0.024 ms (2.419 ms / 100) 2.413 -> 2.425 ( +0.50%) [ +0.04% +0.00% +0.04% / +0.50% +0.66% +0.66%] index_add_ perm : Elapsed 0.024 ms (2.414 ms / 100) 2.409 -> 2.424 ( +0.62%) [ +0.12% +0.00% +0.25% / +0.83% +0.62% +0.95%] index_copy_ perm : Elapsed 0.024 ms (2.412 ms / 100) 2.418 -> 2.424 ( +0.25%) [ +0.00% +0.12% +0.04% / +0.62% +0.25% +0.54%] index_add_ perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) 2.417 -> 2.426 ( +0.37%) [ +0.04% +0.04% +0.00% / +0.37% +0.46% +0.41%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) 4.427 -> 4.431 ( +0.09%) [ +0.00% +0.18% +0.25% / +0.18% +0.09% +0.18%] index_select const : Elapsed 0.044 ms (4.427 ms / 100) 4.436 -> 4.440 ( +0.09%) [ +0.00% +0.09% +0.02% / +0.09% +0.09% +0.14%] index_select wrap : Elapsed 0.044 ms (4.436 ms / 100) 4.436 -> 4.441 ( +0.11%) [ +0.00% +0.05% +0.25% / +0.11% +0.25% +0.27%] index_select linear : Elapsed 0.044 ms (4.436 ms / 100) 4.436 -> 4.440 ( +0.09%) [ +0.00% +0.16% +0.16% / +0.11% +0.16% +0.09%] index_select reverse : Elapsed 0.044 ms (4.436 ms / 100) 4.429 -> 4.426 ( -0.07%) [ +0.23% +0.11% +0.00% / +0.16% -0.02% -0.07%] index_select skip64 : Elapsed 0.044 ms (4.439 ms / 100) 4.427 -> 4.430 ( +0.07%) [ +0.29% +0.00% +0.00% / +0.07% +0.16% +0.20%] index_select skip256 : Elapsed 0.044 ms (4.440 ms / 100) 4.434 -> 4.439 ( +0.11%) [ +0.11% +0.00% +0.18% / +0.11% +0.25% +0.25%] index_select spread : Elapsed 0.044 ms (4.439 ms / 100) 4.435 -> 4.441 ( +0.14%) [ +0.00% +0.02% +0.16% / +0.14% +0.16% +0.32%] index_select strided 3 : Elapsed 0.044 ms (4.435 ms / 100) 4.434 -> 4.432 ( -0.05%) [ +0.00% +0.11% +0.05% / -0.05% +0.16% +0.09%] index_select strided 5 : Elapsed 0.044 ms (4.434 ms / 100) 4.434 -> 4.440 ( +0.14%) [ +0.02% +0.00% +0.09% / +0.14% +0.27% +0.41%] index_select strided 7 : Elapsed 0.044 ms (4.435 ms / 100) 4.427 -> 4.431 ( +0.09%) [ +0.07% +0.20% +0.00% / +0.09% +0.20% +0.14%] index_select strided 8 : Elapsed 0.044 ms (4.430 ms / 100) 4.430 -> 4.432 ( +0.05%) [ +0.02% +0.07% +0.00% / +0.11% +0.05% +0.14%] index_select strided 16 : Elapsed 0.044 ms (4.431 ms / 100) 4.437 -> 4.439 ( +0.05%) [ +0.02% +0.00% +0.05% / +0.05% +0.09% +0.11%] index_select random : Elapsed 0.044 ms (4.438 ms / 100) 4.433 -> 4.436 ( +0.07%) [ +0.18% +0.11% +0.00% / +0.11% +0.07% +0.18%] index_select random_sorted : Elapsed 0.044 ms (4.441 ms / 100) B = [40, 4, 16, 5] (stride (1, 640, 40, 2560)) A = [20, 4, 16, 5] (stride (320, 16, 1, 64)) dim = 0 1.533 -> 1.491 ( -2.74%) [ +0.39% +0.07% +0.00% / -2.74% -2.41% -2.54%] index_add_ linear : Elapsed 0.015 ms (1.539 ms / 100) 1.514 -> 1.476 ( -2.51%) [ +0.26% +0.00% +0.00% / -2.51% -2.25% -2.44%] index_copy_ linear : Elapsed 0.015 ms (1.518 ms / 100) 1.541 -> 1.488 ( -3.44%) [ +0.13% +0.00% +0.00% / -3.44% -2.92% -2.14%] index_add_ reverse : Elapsed 0.015 ms (1.543 ms / 100) 1.511 -> 1.479 ( -2.12%) [ +0.07% +0.00% +0.33% / -1.99% -2.12% -0.93%] index_copy_ reverse : Elapsed 0.015 ms (1.512 ms / 100) 1.552 -> 1.504 ( -3.09%) [ +0.26% +0.00% +0.06% / -3.09% -2.90% -2.77%] index_add_ spread : Elapsed 0.016 ms (1.556 ms / 100) 1.526 -> 1.499 ( -1.77%) [ +0.07% +0.00% +0.00% / -1.77% -1.70% -1.44%] index_copy_ spread : Elapsed 0.015 ms (1.527 ms / 100) 1.555 -> 1.503 ( -3.34%) [ +0.26% +0.45% +0.00% / -3.22% -3.34% -3.02%] index_add_ strided 3 : Elapsed 0.016 ms (1.559 ms / 100) 1.528 -> 1.496 ( -2.09%) [ +0.07% +0.39% +0.00% / -2.09% -1.77% -1.70%] index_copy_ strided 3 : Elapsed 0.015 ms (1.529 ms / 100) 1.560 -> 1.504 ( -3.59%) [ +0.06% +0.13% +0.00% / -3.59% -3.46% -3.14%] index_add_ strided 7 : Elapsed 0.016 ms (1.561 ms / 100) 1.528 -> 1.495 ( -2.16%) [ +0.20% +0.20% +0.00% / -2.16% -1.96% -1.90%] index_copy_ strided 7 : Elapsed 0.015 ms (1.531 ms / 100) 1.555 -> 1.499 ( -3.60%) [ +0.26% +0.00% +0.13% / -3.60% -3.09% -3.15%] index_add_ perm : Elapsed 0.016 ms (1.559 ms / 100) 1.523 -> 1.488 ( -2.30%) [ +0.07% +0.26% +0.00% / -2.30% -1.90% -2.04%] index_copy_ perm : Elapsed 0.015 ms (1.524 ms / 100) 1.548 -> 1.498 ( -3.23%) [ +0.06% +0.00% +0.00% / -3.23% -2.71% -2.58%] index_add_ perm_sorted : Elapsed 0.015 ms (1.549 ms / 100) 1.519 -> 1.489 ( -1.97%) [ +0.13% +0.00% +0.33% / -1.97% -1.97% -1.51%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.521 ms / 100) 2.881 -> 2.868 ( -0.45%) [ +0.10% +0.00% +0.03% / -0.24% -0.45% -0.35%] index_select const : Elapsed 0.029 ms (2.884 ms / 100) 2.890 -> 2.879 ( -0.38%) [ +0.00% +0.35% +0.17% / -0.31% -0.38% -0.38%] index_select wrap : Elapsed 0.029 ms (2.890 ms / 100) 2.887 -> 2.877 ( -0.35%) [ +0.21% +0.31% +0.00% / -0.10% -0.24% -0.35%] index_select linear : Elapsed 0.029 ms (2.893 ms / 100) 2.887 -> 2.872 ( -0.52%) [ +0.07% +0.03% +0.00% / -0.21% -0.52% -0.17%] index_select reverse : Elapsed 0.029 ms (2.889 ms / 100) 2.882 -> 2.865 ( -0.59%) [ +0.14% +0.03% +0.00% / -0.59% -0.49% -0.35%] index_select skip64 : Elapsed 0.029 ms (2.886 ms / 100) 2.904 -> 2.868 ( -1.24%) [ +0.10% +0.00% +0.14% / -0.93% -1.17% -1.24%] index_select skip256 : Elapsed 0.029 ms (2.907 ms / 100) 2.888 -> 2.868 ( -0.69%) [ +0.24% +0.00% +0.03% / -0.07% -0.69% -0.59%] index_select spread : Elapsed 0.029 ms (2.895 ms / 100) 2.890 -> 2.870 ( -0.69%) [ +0.07% +0.17% +0.00% / -0.48% -0.69% -0.66%] index_select strided 3 : Elapsed 0.029 ms (2.892 ms / 100) 2.878 -> 2.870 ( -0.28%) [ +0.00% +0.17% +0.21% / +0.00% -0.28% -0.14%] index_select strided 5 : Elapsed 0.029 ms (2.878 ms / 100) 2.892 -> 2.873 ( -0.66%) [ +0.00% +0.07% +0.14% / -0.24% -0.66% -0.52%] index_select strided 7 : Elapsed 0.029 ms (2.892 ms / 100) 2.898 -> 2.871 ( -0.93%) [ +0.03% +0.17% +0.00% / -0.93% -0.79% -0.72%] index_select strided 8 : Elapsed 0.029 ms (2.899 ms / 100) 2.899 -> 2.868 ( -1.07%) [ +0.17% +0.00% +0.03% / -1.00% -1.07% -0.83%] index_select strided 16 : Elapsed 0.029 ms (2.904 ms / 100) 2.883 -> 2.866 ( -0.59%) [ +0.14% +0.07% +0.00% / -0.59% -0.42% -0.10%] index_select random : Elapsed 0.029 ms (2.887 ms / 100) 2.880 -> 2.877 ( -0.10%) [ +0.00% +0.21% +0.21% / -0.10% -0.10% -0.03%] index_select random_sorted : Elapsed 0.029 ms (2.880 ms / 100) B = [40, 4, 16, 5] (stride (1, 640, 40, 2560)) A = [20, 4, 16, 5] (stride (64, 1, 4, 1280)) dim = 0 2.391 -> 2.406 ( +0.63%) [ +0.08% +0.17% +0.00% / +0.63% +0.75% +0.79%] index_add_ linear : Elapsed 0.024 ms (2.393 ms / 100) 2.400 -> 2.412 ( +0.50%) [ +0.08% +0.25% +0.00% / +0.50% +0.87% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.402 ms / 100) 2.396 -> 2.406 ( +0.42%) [ +0.04% +0.17% +0.00% / +0.42% +0.54% +0.58%] index_add_ reverse : Elapsed 0.024 ms (2.397 ms / 100) 2.402 -> 2.414 ( +0.50%) [ +0.12% +0.00% +0.17% / +0.50% +0.58% +0.67%] index_copy_ reverse : Elapsed 0.024 ms (2.405 ms / 100) 2.410 -> 2.417 ( +0.29%) [ +0.00% +0.12% +0.04% / +0.29% +0.37% +0.41%] index_add_ spread : Elapsed 0.024 ms (2.410 ms / 100) 2.422 -> 2.433 ( +0.45%) [ +0.08% +0.12% +0.00% / +0.45% +0.45% +0.66%] index_copy_ spread : Elapsed 0.024 ms (2.424 ms / 100) 2.409 -> 2.419 ( +0.42%) [ +0.08% +0.00% +0.00% / +0.42% +0.50% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.411 ms / 100) 2.419 -> 2.434 ( +0.62%) [ +0.00% +0.00% +0.08% / +0.62% +0.79% +0.70%] index_copy_ strided 3 : Elapsed 0.024 ms (2.419 ms / 100) 2.410 -> 2.418 ( +0.33%) [ +0.21% +0.00% +0.00% / +0.58% +0.33% +0.58%] index_add_ strided 7 : Elapsed 0.024 ms (2.415 ms / 100) 2.422 -> 2.432 ( +0.41%) [ +0.04% +0.00% +0.08% / +0.41% +0.54% +1.07%] index_copy_ strided 7 : Elapsed 0.024 ms (2.423 ms / 100) 2.410 -> 2.422 ( +0.50%) [ +0.12% +0.00% +0.00% / +0.50% +0.71% +1.00%] index_add_ perm : Elapsed 0.024 ms (2.413 ms / 100) 2.417 -> 2.437 ( +0.83%) [ +0.21% +0.46% +0.00% / +0.91% +0.83% +1.94%] index_copy_ perm : Elapsed 0.024 ms (2.422 ms / 100) 2.408 -> 2.421 ( +0.54%) [ +0.04% +0.00% +0.04% / +0.54% +0.54% +0.87%] index_add_ perm_sorted : Elapsed 0.024 ms (2.409 ms / 100) 2.422 -> 2.432 ( +0.41%) [ +0.00% +0.00% +0.00% / +0.41% +0.66% +1.20%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.422 ms / 100) 4.421 -> 4.416 ( -0.11%) [ +0.14% +0.05% +0.00% / -0.11% -0.02% +0.18%] index_select const : Elapsed 0.044 ms (4.427 ms / 100) 4.429 -> 4.425 ( -0.09%) [ +0.02% +0.00% +0.02% / -0.09% +0.02% +0.11%] index_select wrap : Elapsed 0.044 ms (4.430 ms / 100) 4.424 -> 4.430 ( +0.14%) [ +0.16% +0.20% +0.00% / +0.14% +0.29% +0.45%] index_select linear : Elapsed 0.044 ms (4.431 ms / 100) 4.422 -> 4.421 ( -0.02%) [ +0.20% +0.16% +0.00% / -0.02% +0.32% +0.43%] index_select reverse : Elapsed 0.044 ms (4.431 ms / 100) 4.417 -> 4.421 ( +0.09%) [ +0.23% +0.07% +0.00% / +0.18% +0.09% +0.09%] index_select skip64 : Elapsed 0.044 ms (4.427 ms / 100) 4.416 -> 4.419 ( +0.07%) [ +0.07% +0.02% +0.00% / +0.07% +0.16% +0.57%] index_select skip256 : Elapsed 0.044 ms (4.419 ms / 100) 4.428 -> 4.430 ( +0.05%) [ +0.00% +0.07% +0.05% / +0.05% +0.16% +0.11%] index_select spread : Elapsed 0.044 ms (4.428 ms / 100) 4.423 -> 4.426 ( +0.07%) [ +0.09% +0.00% +0.11% / +0.07% +0.20% +0.14%] index_select strided 3 : Elapsed 0.044 ms (4.427 ms / 100) 4.421 -> 4.424 ( +0.07%) [ +0.00% +0.09% +0.11% / +0.23% +0.16% +0.07%] index_select strided 5 : Elapsed 0.044 ms (4.421 ms / 100) 4.418 -> 4.425 ( +0.16%) [ +0.23% +0.18% +0.00% / +0.16% +0.32% +0.86%] index_select strided 7 : Elapsed 0.044 ms (4.428 ms / 100) 4.423 -> 4.419 ( -0.09%) [ +0.00% +0.07% +0.00% / -0.09% +0.00% +0.18%] index_select strided 8 : Elapsed 0.044 ms (4.423 ms / 100) 4.416 -> 4.424 ( +0.18%) [ +0.18% +0.16% +0.00% / +0.18% +0.20% +0.52%] index_select strided 16 : Elapsed 0.044 ms (4.424 ms / 100) 4.424 -> 4.427 ( +0.07%) [ +0.16% +0.05% +0.00% / +0.11% +0.07% +0.61%] index_select random : Elapsed 0.044 ms (4.431 ms / 100) 4.426 -> 4.423 ( -0.07%) [ +0.00% +0.00% +0.11% / -0.07% +0.05% +0.41%] index_select random_sorted : Elapsed 0.044 ms (4.426 ms / 100) out_shape = [20, 40, 16, 5] in_shape = [20, 4, 16, 5] idx_dim = 1 B = [20, 40, 16, 5] (stride (80, 1600, 5, 1)) A = [20, 4, 16, 5] (stride (5, 100, 400, 1)) dim = 1 1.230 -> 1.229 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.33% +2.52%] index_add_ linear : Elapsed 0.012 ms (1.230 ms / 100) 1.190 -> 1.190 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.59% +1.01%] index_copy_ linear : Elapsed 0.012 ms (1.191 ms / 100) 1.229 -> 1.229 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.57% +0.90%] index_add_ reverse : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.191 ( +0.17%) [ +0.00% +0.17% +0.00% / +0.17% +0.67% +1.35%] index_copy_ reverse : Elapsed 0.012 ms (1.189 ms / 100) 1.229 -> 1.229 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +1.46%] index_add_ spread : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.191 ( +0.17%) [ +0.00% +0.08% +0.00% / +0.17% +0.67% +1.51%] index_copy_ spread : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.227 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.65% +0.90%] index_add_ strided 3 : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.76% +0.93%] index_copy_ strided 3 : Elapsed 0.012 ms (1.188 ms / 100) 1.228 -> 1.228 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.65% +0.65%] index_add_ strided 7 : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.188 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.67% +0.67%] index_copy_ strided 7 : Elapsed 0.012 ms (1.190 ms / 100) 1.227 -> 1.228 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.73% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_copy_ perm : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.228 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.57% +1.38%] index_add_ perm_sorted : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.76% +1.93%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) 8.668 -> 8.684 ( +0.18%) [ +0.00% +0.35% +0.07% / +0.24% +0.18% +0.21%] index_select const : Elapsed 0.087 ms (8.668 ms / 100) 8.692 -> 8.711 ( +0.22%) [ +0.23% +0.08% +0.00% / +0.22% +0.36% +0.24%] index_select wrap : Elapsed 0.087 ms (8.712 ms / 100) 8.684 -> 8.705 ( +0.24%) [ +0.13% +0.12% +0.00% / +0.24% +0.51% +0.25%] index_select linear : Elapsed 0.087 ms (8.695 ms / 100) 8.694 -> 8.705 ( +0.13%) [ +0.03% +0.08% +0.00% / +0.13% +0.26% +0.18%] index_select reverse : Elapsed 0.087 ms (8.697 ms / 100) 8.677 -> 8.679 ( +0.02%) [ +0.12% +0.33% +0.00% / +0.09% +0.06% +0.02%] index_select skip64 : Elapsed 0.087 ms (8.687 ms / 100) 8.678 -> 8.682 ( +0.05%) [ +0.00% +0.18% +0.17% / +0.05% +0.08% +0.15%] index_select skip256 : Elapsed 0.087 ms (8.678 ms / 100) 8.707 -> 8.697 ( -0.11%) [ +0.07% +0.07% +0.00% / +0.10% -0.11% +0.14%] index_select spread : Elapsed 0.087 ms (8.713 ms / 100) 8.694 -> 8.702 ( +0.09%) [ +0.00% +0.02% +0.20% / +0.09% +0.21% +0.10%] index_select strided 3 : Elapsed 0.087 ms (8.694 ms / 100) 8.709 -> 8.708 ( -0.01%) [ +0.10% +0.00% +0.01% / +0.09% -0.01% +0.09%] index_select random : Elapsed 0.087 ms (8.718 ms / 100) 8.694 -> 8.708 ( +0.16%) [ +0.38% +0.00% +0.07% / +0.29% +0.29% +0.16%] index_select random_sorted : Elapsed 0.087 ms (8.727 ms / 100) B = [20, 40, 16, 5] (stride (5, 1600, 100, 1)) A = [20, 4, 16, 5] (stride (5, 100, 400, 1)) dim = 1 1.312 -> 1.311 ( -0.08%) [ +0.23% +0.15% +0.00% / -0.08% +0.46% +0.61%] index_add_ linear : Elapsed 0.013 ms (1.315 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.71% +0.47%] index_copy_ linear : Elapsed 0.013 ms (1.272 ms / 100) 1.313 -> 1.313 ( +0.00%) [ +0.38% +0.00% +0.38% / +0.00% +0.38% +0.38%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.39% +0.39%] index_copy_ reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.312 -> 1.316 ( +0.30%) [ +0.23% +0.00% +0.08% / +0.30% +0.53% +0.53%] index_add_ spread : Elapsed 0.013 ms (1.315 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.47% +0.31%] index_copy_ spread : Elapsed 0.013 ms (1.273 ms / 100) 1.312 -> 1.312 ( +0.00%) [ +0.08% +0.30% +0.00% / +0.00% +0.46% +0.53%] index_add_ strided 3 : Elapsed 0.013 ms (1.313 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.55%] index_copy_ strided 3 : Elapsed 0.013 ms (1.272 ms / 100) 1.312 -> 1.318 ( +0.46%) [ +0.00% +0.15% +0.08% / +0.46% +0.46% +0.53%] index_add_ strided 7 : Elapsed 0.013 ms (1.312 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.47% +0.55%] index_copy_ strided 7 : Elapsed 0.013 ms (1.272 ms / 100) 1.312 -> 1.314 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.46% +0.46%] index_add_ perm : Elapsed 0.013 ms (1.314 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.00% +0.16% +0.08% / +0.16% +0.39% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.272 ms / 100) 1.312 -> 1.316 ( +0.30%) [ +0.30% +0.15% +0.00% / +0.46% +0.46% +0.30%] index_add_ perm_sorted : Elapsed 0.013 ms (1.316 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.39% +0.39%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) 9.141 -> 9.145 ( +0.04%) [ +0.00% +0.13% +0.13% / +0.04% +0.09% +0.26%] index_select const : Elapsed 0.091 ms (9.141 ms / 100) 9.177 -> 9.170 ( -0.08%) [ +0.00% +0.05% +0.07% / -0.02% -0.08% +0.14%] index_select wrap : Elapsed 0.092 ms (9.177 ms / 100) 9.154 -> 9.169 ( +0.16%) [ +0.00% +0.22% +0.12% / +0.25% +0.34% +0.16%] index_select linear : Elapsed 0.092 ms (9.154 ms / 100) 9.155 -> 9.152 ( -0.03%) [ +0.02% +0.00% +0.13% / -0.03% +0.52% +0.29%] index_select reverse : Elapsed 0.092 ms (9.157 ms / 100) 9.139 -> 9.150 ( +0.12%) [ +0.19% +0.00% +0.18% / +0.12% +0.36% +0.26%] index_select skip64 : Elapsed 0.092 ms (9.156 ms / 100) 9.142 -> 9.145 ( +0.03%) [ +0.34% +0.01% +0.00% / +0.03% +0.30% +0.19%] index_select skip256 : Elapsed 0.092 ms (9.173 ms / 100) 9.173 -> 9.180 ( +0.08%) [ +0.03% +0.26% +0.00% / +0.09% +0.21% +0.08%] index_select spread : Elapsed 0.092 ms (9.176 ms / 100) 9.177 -> 9.167 ( -0.11%) [ +0.00% +0.15% +0.02% / -0.11% +0.11% +0.04%] index_select strided 3 : Elapsed 0.092 ms (9.177 ms / 100) 9.168 -> 9.168 ( +0.00%) [ +0.08% +0.00% +0.01% / +0.04% +0.00% +0.23%] index_select random : Elapsed 0.092 ms (9.175 ms / 100) 9.177 -> 9.187 ( +0.11%) [ +0.00% +0.05% +0.14% / +0.11% +0.12% +0.22%] index_select random_sorted : Elapsed 0.092 ms (9.177 ms / 100) B = [20, 40, 16, 5] (stride (16, 1600, 1, 320)) A = [20, 4, 16, 5] (stride (20, 1, 400, 4)) dim = 1 1.315 -> 1.314 ( -0.08%) [ +0.23% +0.00% +0.08% / -0.08% +0.23% +0.68%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.275 ( +0.16%) [ +0.24% +0.16% +0.00% / +0.16% +0.71% +0.71%] index_copy_ linear : Elapsed 0.013 ms (1.276 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.61% +0.61%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.08% +0.39% +0.00% / +0.16% +0.63% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.280 ms / 100) 1.315 -> 1.317 ( +0.15%) [ +0.08% +0.00% +0.00% / +0.15% +0.23% +0.23%] index_add_ spread : Elapsed 0.013 ms (1.316 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.86% +1.10%] index_copy_ spread : Elapsed 0.013 ms (1.274 ms / 100) 1.312 -> 1.316 ( +0.30%) [ +0.30% +0.00% +0.15% / +0.30% +0.46% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.316 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.79% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.311 -> 1.315 ( +0.31%) [ +0.15% +0.31% +0.00% / +0.31% +0.53% +0.53%] index_add_ strided 7 : Elapsed 0.013 ms (1.313 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.79% +0.71%] index_copy_ strided 7 : Elapsed 0.013 ms (1.273 ms / 100) 1.312 -> 1.317 ( +0.38%) [ +0.46% +0.23% +0.00% / +0.46% +0.38% +0.46%] index_add_ perm : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.71% +0.63%] index_copy_ perm : Elapsed 0.013 ms (1.273 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.68% +0.68%] index_add_ perm_sorted : Elapsed 0.013 ms (1.318 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.63% +0.55%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.283 ms / 100) 9.146 -> 9.177 ( +0.34%) [ +0.00% +0.09% +0.03% / +0.38% +0.34% +0.44%] index_select const : Elapsed 0.091 ms (9.146 ms / 100) 9.153 -> 9.158 ( +0.05%) [ +0.07% +0.00% +0.27% / +0.05% +0.12% +0.28%] index_select wrap : Elapsed 0.092 ms (9.159 ms / 100) 9.159 -> 9.165 ( +0.07%) [ +0.03% +0.00% +0.13% / +0.20% +0.07% +0.22%] index_select linear : Elapsed 0.092 ms (9.162 ms / 100) 9.160 -> 9.152 ( -0.09%) [ +0.12% +0.00% +0.19% / -0.09% +0.27% +0.20%] index_select reverse : Elapsed 0.092 ms (9.171 ms / 100) 9.154 -> 9.187 ( +0.36%) [ +0.17% +0.12% +0.00% / +0.36% +0.44% +0.43%] index_select skip64 : Elapsed 0.092 ms (9.170 ms / 100) 9.163 -> 9.166 ( +0.03%) [ +0.04% +0.00% +0.02% / +0.03% +0.11% +0.39%] index_select skip256 : Elapsed 0.092 ms (9.167 ms / 100) 9.153 -> 9.182 ( +0.32%) [ +0.23% +0.14% +0.00% / +0.32% +0.35% +0.37%] index_select spread : Elapsed 0.092 ms (9.174 ms / 100) 9.154 -> 9.166 ( +0.13%) [ +0.17% +0.11% +0.00% / +0.13% +0.54% +0.32%] index_select strided 3 : Elapsed 0.092 ms (9.170 ms / 100) 9.153 -> 9.169 ( +0.17%) [ +0.22% +0.00% +0.40% / +0.17% +0.38% +0.33%] index_select random : Elapsed 0.092 ms (9.173 ms / 100) 9.157 -> 9.167 ( +0.11%) [ +0.02% +0.00% +0.19% / +0.11% +0.39% +0.23%] index_select random_sorted : Elapsed 0.092 ms (9.159 ms / 100) B = [20, 40, 16, 5] (stride (1, 1600, 20, 320)) A = [20, 4, 16, 5] (stride (320, 80, 5, 1)) dim = 1 1.226 -> 1.226 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.57% +0.82%] index_add_ linear : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.67% +0.84%] index_copy_ linear : Elapsed 0.012 ms (1.188 ms / 100) 1.228 -> 1.228 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.57% +0.41%] index_add_ reverse : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.17% +0.08% +0.00% / +0.00% +0.42% +0.42%] index_copy_ reverse : Elapsed 0.012 ms (1.191 ms / 100) 1.227 -> 1.229 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.33% +0.41%] index_add_ spread : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.50% +0.42%] index_copy_ spread : Elapsed 0.012 ms (1.190 ms / 100) 1.227 -> 1.226 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.49% +0.41%] index_add_ strided 3 : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.51%] index_copy_ strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.41% +0.41%] index_add_ strided 7 : Elapsed 0.012 ms (1.227 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.41% +0.57%] index_add_ perm : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.42% +0.34%] index_copy_ perm : Elapsed 0.012 ms (1.190 ms / 100) 1.227 -> 1.228 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.33% +0.41%] index_add_ perm_sorted : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.42% +0.42%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) 8.701 -> 8.714 ( +0.15%) [ +0.33% +0.18% +0.00% / +0.15% +0.23% +0.29%] index_select const : Elapsed 0.087 ms (8.730 ms / 100) 8.733 -> 8.751 ( +0.21%) [ +0.00% +0.06% +0.10% / +0.34% +0.21% +0.31%] index_select wrap : Elapsed 0.087 ms (8.733 ms / 100) 8.734 -> 8.740 ( +0.07%) [ +0.06% +0.03% +0.00% / +0.07% +0.25% +0.26%] index_select linear : Elapsed 0.087 ms (8.739 ms / 100) 8.725 -> 8.729 ( +0.05%) [ +0.13% +0.22% +0.00% / +0.05% +0.58% +0.31%] index_select reverse : Elapsed 0.087 ms (8.736 ms / 100) 8.719 -> 8.716 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.08% +0.24%] index_select skip64 : Elapsed 0.087 ms (8.719 ms / 100) 8.707 -> 8.703 ( -0.05%) [ +0.11% +0.00% +0.15% / -0.05% +0.07% +0.24%] index_select skip256 : Elapsed 0.087 ms (8.717 ms / 100) 8.741 -> 8.736 ( -0.06%) [ +0.00% +0.02% +0.16% / -0.06% +0.09% +0.11%] index_select spread : Elapsed 0.087 ms (8.741 ms / 100) 8.735 -> 8.740 ( +0.06%) [ +0.22% +0.00% +0.19% / +0.06% +0.21% +0.39%] index_select strided 3 : Elapsed 0.088 ms (8.754 ms / 100) 8.740 -> 8.739 ( -0.01%) [ +0.00% +0.27% +0.16% / -0.01% +0.27% +0.22%] index_select random : Elapsed 0.087 ms (8.740 ms / 100) 8.733 -> 8.724 ( -0.10%) [ +0.01% +0.03% +0.00% / -0.10% +0.18% +0.41%] index_select random_sorted : Elapsed 0.087 ms (8.734 ms / 100) B = [20, 40, 16, 5] (stride (1, 1600, 20, 320)) A = [20, 4, 16, 5] (stride (1, 1600, 100, 20)) dim = 1 1.276 -> 1.277 ( +0.08%) [ +0.24% +0.00% +0.00% / +0.08% +0.63% +0.71%] index_add_ linear : Elapsed 0.013 ms (1.279 ms / 100) 1.242 -> 1.243 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.40% +0.16% +0.08%] index_copy_ linear : Elapsed 0.012 ms (1.243 ms / 100) 1.261 -> 1.261 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +0.40% +0.79%] index_add_ reverse : Elapsed 0.013 ms (1.263 ms / 100) 1.222 -> 1.222 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.74% +0.57%] index_copy_ reverse : Elapsed 0.012 ms (1.222 ms / 100) 1.264 -> 1.264 ( +0.00%) [ +0.24% +0.24% +0.00% / +0.00% +0.47% +0.55%] index_add_ spread : Elapsed 0.013 ms (1.267 ms / 100) 1.231 -> 1.233 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.32% +0.32%] index_copy_ spread : Elapsed 0.012 ms (1.233 ms / 100) 1.276 -> 1.275 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.013 ms (1.276 ms / 100) 1.240 -> 1.242 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.40% +0.40%] index_copy_ strided 3 : Elapsed 0.012 ms (1.241 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.79% +0.87%] index_add_ strided 7 : Elapsed 0.013 ms (1.273 ms / 100) 1.234 -> 1.234 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.65% +0.81%] index_copy_ strided 7 : Elapsed 0.012 ms (1.234 ms / 100) 1.264 -> 1.265 ( +0.08%) [ +0.24% +0.00% +0.16% / +0.08% +0.40% +0.55%] index_add_ perm : Elapsed 0.013 ms (1.267 ms / 100) 1.225 -> 1.226 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.73% +0.73%] index_copy_ perm : Elapsed 0.012 ms (1.226 ms / 100) 1.260 -> 1.259 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.48% +0.48%] index_add_ perm_sorted : Elapsed 0.013 ms (1.260 ms / 100) 1.221 -> 1.222 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.82% +0.74%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.221 ms / 100) 8.713 -> 8.724 ( +0.13%) [ +0.26% +0.00% +0.29% / +0.13% +0.47% +0.62%] index_select const : Elapsed 0.087 ms (8.736 ms / 100) 8.735 -> 8.751 ( +0.18%) [ +0.00% +0.24% +0.15% / +0.18% +0.56% +0.37%] index_select wrap : Elapsed 0.087 ms (8.735 ms / 100) 8.730 -> 8.746 ( +0.18%) [ +0.00% +0.14% +0.37% / +0.18% +0.70% +0.64%] index_select linear : Elapsed 0.087 ms (8.730 ms / 100) 8.739 -> 8.751 ( +0.14%) [ +0.00% +0.23% +0.19% / +0.17% +0.18% +0.14%] index_select reverse : Elapsed 0.087 ms (8.739 ms / 100) 8.720 -> 8.736 ( +0.18%) [ +0.00% +0.11% +0.03% / +0.18% +0.29% +0.26%] index_select skip64 : Elapsed 0.087 ms (8.720 ms / 100) 8.719 -> 8.740 ( +0.24%) [ +0.00% +0.02% +0.24% / +0.24% +0.33% +0.44%] index_select skip256 : Elapsed 0.087 ms (8.719 ms / 100) 8.740 -> 8.755 ( +0.17%) [ +0.31% +0.00% +0.17% / +0.24% +0.17% +0.43%] index_select spread : Elapsed 0.088 ms (8.767 ms / 100) 8.750 -> 8.742 ( -0.09%) [ +0.13% +0.02% +0.00% / -0.09% +0.26% +0.18%] index_select strided 3 : Elapsed 0.088 ms (8.761 ms / 100) 8.742 -> 8.753 ( +0.13%) [ +0.19% +0.16% +0.00% / +0.13% +0.23% +0.27%] index_select random : Elapsed 0.088 ms (8.759 ms / 100) 8.744 -> 8.753 ( +0.10%) [ +0.00% +0.07% +0.22% / +0.10% +0.37% +0.32%] index_select random_sorted : Elapsed 0.087 ms (8.744 ms / 100) B = [20, 40, 16, 5] (stride (1, 1600, 20, 320)) A = [20, 4, 16, 5] (stride (64, 16, 1, 1280)) dim = 1 1.311 -> 1.318 ( +0.53%) [ +0.31% +0.31% +0.00% / +0.53% +0.53% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.315 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.71% +0.71%] index_copy_ linear : Elapsed 0.013 ms (1.274 ms / 100) 1.313 -> 1.317 ( +0.30%) [ +0.38% +0.00% +0.23% / +0.30% +0.38% +0.30%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.47% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.275 ms / 100) 1.313 -> 1.318 ( +0.38%) [ +0.23% +0.15% +0.00% / +0.38% +0.38% +0.38%] index_add_ spread : Elapsed 0.013 ms (1.316 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.47% +0.71%] index_copy_ spread : Elapsed 0.013 ms (1.275 ms / 100) 1.312 -> 1.317 ( +0.38%) [ +0.30% +0.00% +0.23% / +0.38% +0.38% +0.53%] index_add_ strided 3 : Elapsed 0.013 ms (1.316 ms / 100) 1.274 -> 1.273 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.78% +0.55%] index_copy_ strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.316 -> 1.315 ( -0.08%) [ +0.15% +0.38% +0.00% / -0.08% +0.23% +0.15%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.24% +0.00% / +0.08% +0.63% +0.71%] index_copy_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.23% +0.23%] index_add_ perm : Elapsed 0.013 ms (1.318 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.55% +0.55%] index_copy_ perm : Elapsed 0.013 ms (1.275 ms / 100) 1.316 -> 1.316 ( +0.00%) [ +0.23% +0.00% +0.15% / +0.00% +0.23% +0.08%] index_add_ perm_sorted : Elapsed 0.013 ms (1.319 ms / 100) 1.275 -> 1.275 ( +0.00%) [ +0.08% +0.24% +0.00% / +0.00% +0.47% +0.39%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.276 ms / 100) 9.154 -> 9.163 ( +0.10%) [ +0.00% +0.08% +0.01% / +0.10% +0.13% +0.31%] index_select const : Elapsed 0.092 ms (9.154 ms / 100) 9.168 -> 9.179 ( +0.12%) [ +0.16% +0.00% +0.10% / +0.12% +0.13% +0.34%] index_select wrap : Elapsed 0.092 ms (9.183 ms / 100) 9.161 -> 9.153 ( -0.09%) [ +0.17% +0.21% +0.00% / +0.19% -0.09% +0.31%] index_select linear : Elapsed 0.092 ms (9.177 ms / 100) 9.156 -> 9.177 ( +0.23%) [ +0.00% +0.07% +0.16% / +0.25% +0.29% +0.23%] index_select reverse : Elapsed 0.092 ms (9.156 ms / 100) 9.148 -> 9.150 ( +0.02%) [ +0.15% +0.12% +0.00% / +0.02% +0.11% +0.25%] index_select skip64 : Elapsed 0.092 ms (9.162 ms / 100) 9.145 -> 9.161 ( +0.17%) [ +0.00% +0.28% +0.30% / +0.21% +0.44% +0.17%] index_select skip256 : Elapsed 0.091 ms (9.145 ms / 100) 9.194 -> 9.188 ( -0.07%) [ +0.02% +0.05% +0.00% / +0.09% +0.14% -0.07%] index_select spread : Elapsed 0.092 ms (9.196 ms / 100) 9.182 -> 9.194 ( +0.13%) [ +0.05% +0.00% +0.10% / +0.24% +0.13% +0.37%] index_select strided 3 : Elapsed 0.092 ms (9.187 ms / 100) 9.177 -> 9.180 ( +0.03%) [ +0.00% +0.09% +0.13% / +0.04% +0.03% +0.10%] index_select random : Elapsed 0.092 ms (9.177 ms / 100) 9.189 -> 9.193 ( +0.04%) [ +0.09% +0.00% +0.05% / +0.04% +0.19% +0.25%] index_select random_sorted : Elapsed 0.092 ms (9.197 ms / 100) B = [20, 40, 16, 5] (stride (40, 1, 800, 12800)) A = [20, 4, 16, 5] (stride (320, 16, 1, 64)) dim = 1 1.317 -> 1.318 ( +0.08%) [ +0.15% +0.23% +0.00% / +0.08% +0.38% +0.30%] index_add_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.55% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.278 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.38% +0.38%] index_add_ reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.278 -> 1.278 ( +0.00%) [ +0.00% +0.23% +0.00% / +0.00% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.327 -> 1.329 ( +0.15%) [ +0.30% +0.00% +0.23% / +0.15% +0.30% +0.38%] index_add_ spread : Elapsed 0.013 ms (1.331 ms / 100) 1.288 -> 1.287 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.31% +0.16%] index_copy_ spread : Elapsed 0.013 ms (1.288 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.15% +0.08% +0.00% / +0.08% +0.76% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.320 ms / 100) 1.280 -> 1.282 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.78% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.282 ms / 100) 1.321 -> 1.320 ( -0.08%) [ +0.15% +0.00% +0.08% / -0.08% +0.68% +0.61%] index_add_ strided 7 : Elapsed 0.013 ms (1.323 ms / 100) 1.282 -> 1.284 ( +0.16%) [ +0.23% +0.00% +0.16% / +0.16% +0.62% +0.55%] index_copy_ strided 7 : Elapsed 0.013 ms (1.285 ms / 100) 1.319 -> 1.321 ( +0.15%) [ +0.08% +0.00% +0.00% / +0.15% +0.91% +0.68%] index_add_ perm : Elapsed 0.013 ms (1.320 ms / 100) 1.281 -> 1.283 ( +0.16%) [ +0.23% +0.08% +0.00% / +0.16% +0.55% +0.55%] index_copy_ perm : Elapsed 0.013 ms (1.284 ms / 100) 1.320 -> 1.318 ( -0.15%) [ +0.00% +0.00% +0.00% / -0.15% +0.76% +0.76%] index_add_ perm_sorted : Elapsed 0.013 ms (1.320 ms / 100) 1.280 -> 1.283 ( +0.23%) [ +0.31% +0.08% +0.00% / +0.23% +0.78% +0.70%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.284 ms / 100) 9.225 -> 9.234 ( +0.10%) [ +0.24% +0.00% +0.03% / +0.10% +0.25% +0.17%] index_select const : Elapsed 0.092 ms (9.247 ms / 100) 9.249 -> 9.259 ( +0.11%) [ +0.00% +0.01% +0.22% / +0.18% +0.31% +0.11%] index_select wrap : Elapsed 0.092 ms (9.249 ms / 100) 9.235 -> 9.236 ( +0.01%) [ +0.15% +0.00% +0.26% / +0.01% +0.27% +0.54%] index_select linear : Elapsed 0.092 ms (9.249 ms / 100) 9.233 -> 9.253 ( +0.22%) [ +0.10% +0.00% +0.05% / +0.22% +0.37% +0.28%] index_select reverse : Elapsed 0.092 ms (9.242 ms / 100) 9.231 -> 9.239 ( +0.09%) [ +0.00% +0.15% +0.08% / +0.09% +0.22% +0.39%] index_select skip64 : Elapsed 0.092 ms (9.231 ms / 100) 9.228 -> 9.231 ( +0.03%) [ +0.00% +0.27% +0.00% / +0.10% +0.20% +0.03%] index_select skip256 : Elapsed 0.092 ms (9.228 ms / 100) 9.251 -> 9.255 ( +0.04%) [ +0.22% +0.13% +0.00% / +0.04% +0.22% +0.29%] index_select spread : Elapsed 0.093 ms (9.271 ms / 100) 9.249 -> 9.254 ( +0.05%) [ +0.19% +0.44% +0.00% / +0.05% +0.15% +0.25%] index_select strided 3 : Elapsed 0.093 ms (9.267 ms / 100) 9.246 -> 9.273 ( +0.29%) [ +0.09% +0.00% +0.02% / +0.29% +0.47% +0.34%] index_select random : Elapsed 0.093 ms (9.254 ms / 100) 9.245 -> 9.273 ( +0.30%) [ +0.00% +0.28% +0.05% / +0.32% +0.34% +0.30%] index_select random_sorted : Elapsed 0.092 ms (9.245 ms / 100) B = [20, 40, 16, 5] (stride (1, 20, 800, 12800)) A = [20, 4, 16, 5] (stride (64, 16, 1, 1280)) dim = 1 1.312 -> 1.314 ( +0.15%) [ +0.15% +0.38% +0.00% / +0.15% +0.46% +0.38%] index_add_ linear : Elapsed 0.013 ms (1.314 ms / 100) 1.274 -> 1.273 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.55% +0.78%] index_copy_ linear : Elapsed 0.013 ms (1.274 ms / 100) 1.314 -> 1.318 ( +0.30%) [ +0.23% +0.00% +0.08% / +0.30% +0.46% +0.30%] index_add_ reverse : Elapsed 0.013 ms (1.317 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.55% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.276 ms / 100) 1.315 -> 1.315 ( +0.00%) [ +0.15% +0.00% +0.08% / +0.00% +0.30% +0.30%] index_add_ spread : Elapsed 0.013 ms (1.317 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.00% +0.08% +0.08% / +0.16% +0.55% +0.55%] index_copy_ spread : Elapsed 0.013 ms (1.274 ms / 100) 1.315 -> 1.317 ( +0.15%) [ +0.30% +0.23% +0.00% / +0.15% +0.15% +0.30%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.86% +0.71%] index_copy_ strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.316 -> 1.317 ( +0.08%) [ +0.15% +0.00% +0.00% / +0.08% +0.15% +0.23%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.63% +0.55%] index_copy_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.08% +0.08% +0.00%] index_add_ perm : Elapsed 0.013 ms (1.318 ms / 100) 1.274 -> 1.276 ( +0.16%) [ +0.00% +0.24% +0.08% / +0.16% +0.39% +0.55%] index_copy_ perm : Elapsed 0.013 ms (1.274 ms / 100) 1.316 -> 1.317 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.08% +0.15%] index_add_ perm_sorted : Elapsed 0.013 ms (1.317 ms / 100) 1.275 -> 1.276 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.47% +0.47%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.275 ms / 100) 9.170 -> 9.173 ( +0.03%) [ +0.08% +0.00% +0.19% / +0.03% +0.15% +0.07%] index_select const : Elapsed 0.092 ms (9.177 ms / 100) 9.183 -> 9.191 ( +0.09%) [ +0.22% +0.00% +0.14% / +0.15% +0.11% +0.09%] index_select wrap : Elapsed 0.092 ms (9.203 ms / 100) 9.183 -> 9.175 ( -0.09%) [ +0.00% +0.15% +0.01% / +0.09% +0.09% -0.09%] index_select linear : Elapsed 0.092 ms (9.183 ms / 100) 9.167 -> 9.176 ( +0.10%) [ +0.10% +0.15% +0.00% / +0.10% +0.36% +0.15%] index_select reverse : Elapsed 0.092 ms (9.176 ms / 100) 9.172 -> 9.176 ( +0.04%) [ +0.00% +0.13% +0.03% / +0.04% +0.13% +0.12%] index_select skip64 : Elapsed 0.092 ms (9.172 ms / 100) 9.173 -> 9.160 ( -0.14%) [ +0.00% +0.07% +0.10% / -0.14% +0.08% +0.11%] index_select skip256 : Elapsed 0.092 ms (9.173 ms / 100) 9.193 -> 9.197 ( +0.04%) [ +0.11% +0.00% +0.10% / +0.04% +0.12% +0.05%] index_select spread : Elapsed 0.092 ms (9.203 ms / 100) 9.192 -> 9.193 ( +0.01%) [ +0.00% +0.05% +0.04% / +0.01% +0.16% +0.10%] index_select strided 3 : Elapsed 0.092 ms (9.192 ms / 100) 9.189 -> 9.192 ( +0.03%) [ +0.05% +0.00% +0.16% / +0.03% +0.03% +0.14%] index_select random : Elapsed 0.092 ms (9.194 ms / 100) 9.189 -> 9.190 ( +0.01%) [ +0.09% +0.27% +0.00% / +0.21% +0.01% +0.17%] index_select random_sorted : Elapsed 0.092 ms (9.197 ms / 100) out_shape = [20, 4, 40, 5] in_shape = [20, 4, 16, 5] idx_dim = 2 B = [20, 4, 40, 5] (stride (800, 200, 1, 40)) A = [20, 4, 16, 5] (stride (320, 1, 4, 64)) dim = 2 3.840 -> 3.832 ( -0.21%) [ +0.10% +0.03% +0.00% / -0.21% +0.34% +0.10%] index_add_ linear : Elapsed 0.038 ms (3.844 ms / 100) 3.697 -> 3.695 ( -0.05%) [ +0.11% +0.05% +0.00% / -0.05% +0.54% +0.43%] index_copy_ linear : Elapsed 0.037 ms (3.701 ms / 100) 3.846 -> 3.845 ( -0.03%) [ +0.13% +0.00% +0.10% / -0.03% +0.42% +0.42%] index_add_ reverse : Elapsed 0.039 ms (3.851 ms / 100) 3.703 -> 3.707 ( +0.11%) [ +0.14% +0.00% +0.03% / +0.11% +0.41% +0.46%] index_copy_ reverse : Elapsed 0.037 ms (3.708 ms / 100) 3.836 -> 3.838 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.50% +0.47%] index_add_ spread : Elapsed 0.038 ms (3.838 ms / 100) 3.700 -> 3.697 ( -0.08%) [ +0.14% +0.00% +0.03% / -0.08% +0.41% +0.57%] index_copy_ spread : Elapsed 0.037 ms (3.705 ms / 100) 3.816 -> 3.821 ( +0.13%) [ +0.00% +0.10% +0.05% / +0.13% +0.55% +0.58%] index_add_ strided 3 : Elapsed 0.038 ms (3.816 ms / 100) 3.685 -> 3.688 ( +0.08%) [ +0.00% +0.03% +0.05% / +0.08% +0.60% +0.46%] index_copy_ strided 3 : Elapsed 0.037 ms (3.685 ms / 100) 3.842 -> 3.844 ( +0.05%) [ +0.05% +0.13% +0.00% / +0.05% +0.68% +0.68%] index_add_ strided 7 : Elapsed 0.038 ms (3.844 ms / 100) 3.700 -> 3.704 ( +0.11%) [ +0.24% +0.19% +0.00% / +0.11% +0.81% +0.76%] index_copy_ strided 7 : Elapsed 0.037 ms (3.709 ms / 100) 3.826 -> 3.836 ( +0.26%) [ +0.00% +0.37% +0.18% / +0.26% +0.84% +0.81%] index_add_ perm : Elapsed 0.038 ms (3.826 ms / 100) 3.691 -> 3.695 ( +0.11%) [ +0.00% +0.24% +0.14% / +0.11% +0.65% +0.73%] index_copy_ perm : Elapsed 0.037 ms (3.691 ms / 100) 3.829 -> 3.832 ( +0.08%) [ +0.10% +0.00% +0.34% / +0.08% +0.52% +0.71%] index_add_ perm_sorted : Elapsed 0.038 ms (3.833 ms / 100) 3.693 -> 3.694 ( +0.03%) [ +0.03% +0.00% +0.19% / +0.03% +0.49% +0.62%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.694 ms / 100) 5.470 -> 5.475 ( +0.09%) [ +0.00% +0.09% +0.26% / +0.09% +0.09% +0.11%] index_select const : Elapsed 0.055 ms (5.470 ms / 100) 5.473 -> 5.472 ( -0.02%) [ +0.09% +0.00% +0.11% / -0.02% +0.33% +0.20%] index_select wrap : Elapsed 0.055 ms (5.478 ms / 100) 5.476 -> 5.482 ( +0.11%) [ +0.00% +0.05% +0.18% / +0.15% +0.18% +0.11%] index_select linear : Elapsed 0.055 ms (5.476 ms / 100) 5.477 -> 5.479 ( +0.04%) [ +0.09% +0.00% +0.07% / +0.09% +0.13% +0.04%] index_select reverse : Elapsed 0.055 ms (5.482 ms / 100) 5.477 -> 5.471 ( -0.11%) [ +0.04% +0.00% +0.09% / +0.00% -0.11% -0.09%] index_select skip64 : Elapsed 0.055 ms (5.479 ms / 100) 5.472 -> 5.470 ( -0.04%) [ +0.07% +0.20% +0.00% / -0.02% -0.04% -0.04%] index_select skip256 : Elapsed 0.055 ms (5.476 ms / 100) 5.480 -> 5.480 ( +0.00%) [ +0.00% +0.00% +0.05% / +0.02% +0.00% +0.07%] index_select spread : Elapsed 0.055 ms (5.480 ms / 100) 5.473 -> 5.474 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.13% +0.24%] index_select strided 3 : Elapsed 0.055 ms (5.474 ms / 100) 5.473 -> 5.478 ( +0.09%) [ +0.05% +0.02% +0.00% / +0.11% +0.22% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.476 ms / 100) 5.471 -> 5.470 ( -0.02%) [ +0.16% +0.20% +0.00% / -0.02% +0.16% +0.26%] index_select strided 7 : Elapsed 0.055 ms (5.480 ms / 100) 5.466 -> 5.470 ( +0.07%) [ +0.24% +0.07% +0.00% / +0.07% +0.18% +0.31%] index_select strided 8 : Elapsed 0.055 ms (5.479 ms / 100) 5.476 -> 5.476 ( +0.00%) [ +0.11% +0.07% +0.00% / +0.00% +0.09% +0.00%] index_select random : Elapsed 0.055 ms (5.482 ms / 100) 5.471 -> 5.477 ( +0.11%) [ +0.18% +0.27% +0.00% / +0.11% +0.27% +0.18%] index_select random_sorted : Elapsed 0.055 ms (5.481 ms / 100) B = [20, 4, 40, 5] (stride (800, 1, 20, 4)) A = [20, 4, 16, 5] (stride (320, 5, 20, 1)) dim = 2 3.938 -> 3.938 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.79% +0.76%] index_add_ linear : Elapsed 0.039 ms (3.938 ms / 100) 3.812 -> 3.813 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.81% +0.79%] index_copy_ linear : Elapsed 0.038 ms (3.814 ms / 100) 3.926 -> 3.925 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.74% +0.71%] index_add_ reverse : Elapsed 0.039 ms (3.926 ms / 100) 3.798 -> 3.799 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.79% +0.79%] index_copy_ reverse : Elapsed 0.038 ms (3.799 ms / 100) 3.931 -> 3.932 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.64% +0.66%] index_add_ spread : Elapsed 0.039 ms (3.932 ms / 100) 3.816 -> 3.820 ( +0.10%) [ +0.00% +0.03% +0.00% / +0.10% +0.58% +0.55%] index_copy_ spread : Elapsed 0.038 ms (3.816 ms / 100) 3.931 -> 3.931 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.66% +0.69%] index_add_ strided 3 : Elapsed 0.039 ms (3.931 ms / 100) 3.804 -> 3.804 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.68% +0.68%] index_copy_ strided 3 : Elapsed 0.038 ms (3.804 ms / 100) 3.925 -> 3.926 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.76% +0.74%] index_add_ strided 7 : Elapsed 0.039 ms (3.926 ms / 100) 3.798 -> 3.804 ( +0.16%) [ +0.00% +0.00% +0.00% / +0.16% +0.82% +0.84%] index_copy_ strided 7 : Elapsed 0.038 ms (3.798 ms / 100) 3.938 -> 3.939 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.81% +0.79%] index_add_ perm : Elapsed 0.039 ms (3.938 ms / 100) 3.813 -> 3.814 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.79% +0.81%] index_copy_ perm : Elapsed 0.038 ms (3.813 ms / 100) 3.927 -> 3.927 ( +0.00%) [ +0.03% +0.00% +0.08% / +0.00% +0.87% +0.87%] index_add_ perm_sorted : Elapsed 0.039 ms (3.928 ms / 100) 3.802 -> 3.802 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.82% +0.79%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.802 ms / 100) 5.558 -> 5.554 ( -0.07%) [ +0.02% +0.09% +0.00% / +0.23% +0.02% -0.07%] index_select const : Elapsed 0.056 ms (5.559 ms / 100) 5.565 -> 5.563 ( -0.04%) [ +0.11% +0.31% +0.00% / +0.25% -0.04% -0.04%] index_select wrap : Elapsed 0.056 ms (5.571 ms / 100) 5.572 -> 5.566 ( -0.11%) [ +0.04% +0.04% +0.00% / -0.02% -0.11% -0.04%] index_select linear : Elapsed 0.056 ms (5.574 ms / 100) 5.567 -> 5.564 ( -0.05%) [ +0.07% +0.18% +0.00% / +0.18% +0.02% -0.05%] index_select reverse : Elapsed 0.056 ms (5.571 ms / 100) 5.552 -> 5.553 ( +0.02%) [ +0.00% +0.11% +0.18% / +0.02% +0.22% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.552 ms / 100) 5.557 -> 5.559 ( +0.04%) [ +0.04% +0.00% +0.05% / +0.04% +0.18% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.569 -> 5.560 ( -0.16%) [ +0.07% +0.02% +0.00% / -0.04% -0.16% -0.04%] index_select spread : Elapsed 0.056 ms (5.573 ms / 100) 5.566 -> 5.560 ( -0.11%) [ +0.00% +0.07% +0.00% / +0.05% +0.04% -0.11%] index_select strided 3 : Elapsed 0.056 ms (5.566 ms / 100) 5.564 -> 5.561 ( -0.05%) [ +0.18% +0.05% +0.00% / +0.16% +0.13% -0.05%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.563 -> 5.565 ( +0.04%) [ +0.13% +0.13% +0.00% / +0.14% +0.22% +0.04%] index_select strided 7 : Elapsed 0.056 ms (5.570 ms / 100) 5.551 -> 5.557 ( +0.11%) [ +0.14% +0.13% +0.00% / +0.11% +0.29% +0.27%] index_select strided 8 : Elapsed 0.056 ms (5.559 ms / 100) 5.564 -> 5.559 ( -0.09%) [ +0.20% +0.00% +0.05% / +0.20% +0.14% -0.09%] index_select random : Elapsed 0.056 ms (5.575 ms / 100) 5.560 -> 5.569 ( +0.16%) [ +0.20% +0.20% +0.00% / +0.16% +0.16% +0.18%] index_select random_sorted : Elapsed 0.056 ms (5.571 ms / 100) B = [20, 4, 40, 5] (stride (800, 1, 20, 4)) A = [20, 4, 16, 5] (stride (4, 1, 400, 80)) dim = 2 3.694 -> 3.696 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.51% +0.49%] index_add_ linear : Elapsed 0.037 ms (3.695 ms / 100) 3.574 -> 3.574 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.31% +0.31%] index_copy_ linear : Elapsed 0.036 ms (3.576 ms / 100) 3.729 -> 3.731 ( +0.05%) [ +0.13% +0.13% +0.00% / +0.05% +0.46% +0.56%] index_add_ reverse : Elapsed 0.037 ms (3.734 ms / 100) 3.591 -> 3.591 ( +0.00%) [ +0.17% +0.25% +0.00% / +0.00% +0.42% +0.61%] index_copy_ reverse : Elapsed 0.036 ms (3.597 ms / 100) 3.727 -> 3.728 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.32% +0.35%] index_add_ spread : Elapsed 0.037 ms (3.728 ms / 100) 3.595 -> 3.597 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.39% +0.42%] index_copy_ spread : Elapsed 0.036 ms (3.597 ms / 100) 3.701 -> 3.704 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.68% +0.57%] index_add_ strided 3 : Elapsed 0.037 ms (3.703 ms / 100) 3.575 -> 3.576 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.03% +0.48% +0.56%] index_copy_ strided 3 : Elapsed 0.036 ms (3.575 ms / 100) 3.729 -> 3.731 ( +0.05%) [ +0.05% +0.13% +0.00% / +0.05% +0.67% +0.70%] index_add_ strided 7 : Elapsed 0.037 ms (3.731 ms / 100) 3.589 -> 3.593 ( +0.11%) [ +0.11% +0.31% +0.00% / +0.11% +0.81% +0.78%] index_copy_ strided 7 : Elapsed 0.036 ms (3.593 ms / 100) 3.695 -> 3.697 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.51% +0.51%] index_add_ perm : Elapsed 0.037 ms (3.696 ms / 100) 3.573 -> 3.574 ( +0.03%) [ +0.00% +0.03% +0.03% / +0.03% +0.45% +0.45%] index_copy_ perm : Elapsed 0.036 ms (3.573 ms / 100) 3.697 -> 3.696 ( -0.03%) [ +0.00% +0.08% +0.14% / -0.03% +0.41% +0.41%] index_add_ perm_sorted : Elapsed 0.037 ms (3.697 ms / 100) 3.575 -> 3.577 ( +0.06%) [ +0.00% +0.06% +0.11% / +0.06% +0.22% +0.20%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.575 ms / 100) 5.478 -> 5.480 ( +0.04%) [ +0.05% +0.00% +0.11% / +0.04% +0.05% +0.05%] index_select const : Elapsed 0.055 ms (5.481 ms / 100) 5.481 -> 5.489 ( +0.15%) [ +0.00% +0.07% +0.09% / +0.15% +0.20% +0.16%] index_select wrap : Elapsed 0.055 ms (5.481 ms / 100) 5.482 -> 5.489 ( +0.13%) [ +0.00% +0.02% +0.15% / +0.16% +0.22% +0.13%] index_select linear : Elapsed 0.055 ms (5.482 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.15% +0.13% +0.00%] index_select reverse : Elapsed 0.055 ms (5.486 ms / 100) 5.478 -> 5.476 ( -0.04%) [ +0.00% +0.05% +0.07% / +0.00% -0.04% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.478 ms / 100) 5.479 -> 5.475 ( -0.07%) [ +0.20% +0.07% +0.00% / +0.18% -0.07% -0.04%] index_select skip256 : Elapsed 0.055 ms (5.490 ms / 100) 5.486 -> 5.481 ( -0.09%) [ +0.00% +0.07% +0.15% / -0.09% +0.09% +0.05%] index_select spread : Elapsed 0.055 ms (5.486 ms / 100) 5.479 -> 5.483 ( +0.07%) [ +0.00% +0.04% +0.00% / +0.07% +0.20% +0.13%] index_select strided 3 : Elapsed 0.055 ms (5.479 ms / 100) 5.478 -> 5.482 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.16% +0.20%] index_select strided 5 : Elapsed 0.055 ms (5.485 ms / 100) 5.481 -> 5.487 ( +0.11%) [ +0.00% +0.09% +0.04% / +0.11% +0.15% +0.15%] index_select strided 7 : Elapsed 0.055 ms (5.481 ms / 100) 5.481 -> 5.487 ( +0.11%) [ +0.15% +0.05% +0.00% / +0.11% +0.24% +0.11%] index_select strided 8 : Elapsed 0.055 ms (5.489 ms / 100) 5.483 -> 5.477 ( -0.11%) [ +0.02% +0.00% +0.00% / +0.00% -0.11% +0.18%] index_select random : Elapsed 0.055 ms (5.484 ms / 100) 5.477 -> 5.486 ( +0.16%) [ +0.07% +0.00% +0.22% / +0.22% +0.26% +0.16%] index_select random_sorted : Elapsed 0.055 ms (5.481 ms / 100) B = [20, 4, 40, 5] (stride (800, 40, 1, 160)) A = [20, 4, 16, 5] (stride (320, 80, 1, 16)) dim = 2 3.907 -> 3.908 ( +0.03%) [ +0.33% +0.00% +0.18% / +0.03% +0.23% +0.31%] index_add_ linear : Elapsed 0.039 ms (3.920 ms / 100) 3.763 -> 3.766 ( +0.08%) [ +0.05% +0.05% +0.00% / +0.08% +0.48% +0.45%] index_copy_ linear : Elapsed 0.038 ms (3.765 ms / 100) 3.882 -> 3.882 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.98% +1.00%] index_add_ reverse : Elapsed 0.039 ms (3.882 ms / 100) 3.738 -> 3.738 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.56% +0.51%] index_copy_ reverse : Elapsed 0.037 ms (3.738 ms / 100) 3.883 -> 3.882 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +1.00% +0.98%] index_add_ spread : Elapsed 0.039 ms (3.884 ms / 100) 3.750 -> 3.749 ( -0.03%) [ +0.05% +0.05% +0.00% / -0.03% +0.85% +0.77%] index_copy_ spread : Elapsed 0.038 ms (3.752 ms / 100) 3.889 -> 3.890 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.80% +0.82%] index_add_ strided 3 : Elapsed 0.039 ms (3.889 ms / 100) 3.750 -> 3.750 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.83% +0.83%] index_copy_ strided 3 : Elapsed 0.038 ms (3.751 ms / 100) 3.883 -> 3.883 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.95% +0.88%] index_add_ strided 7 : Elapsed 0.039 ms (3.886 ms / 100) 3.739 -> 3.740 ( +0.03%) [ +0.13% +0.00% +0.00% / +0.03% +0.53% +0.56%] index_copy_ strided 7 : Elapsed 0.037 ms (3.744 ms / 100) 3.913 -> 3.918 ( +0.13%) [ +0.20% +0.00% +0.20% / +0.15% +0.20% +0.13%] index_add_ perm : Elapsed 0.039 ms (3.921 ms / 100) 3.764 -> 3.763 ( -0.03%) [ +0.08% +0.08% +0.00% / -0.03% +0.45% +0.48%] index_copy_ perm : Elapsed 0.038 ms (3.767 ms / 100) 3.888 -> 3.889 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.93% +0.93%] index_add_ perm_sorted : Elapsed 0.039 ms (3.888 ms / 100) 3.746 -> 3.747 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +1.04% +1.04%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.748 ms / 100) 5.485 -> 5.485 ( +0.00%) [ +0.05% +0.13% +0.00% / +0.15% +0.07% +0.00%] index_select const : Elapsed 0.055 ms (5.488 ms / 100) 5.488 -> 5.485 ( -0.05%) [ +0.11% +0.00% +0.09% / -0.04% -0.05% -0.04%] index_select wrap : Elapsed 0.055 ms (5.494 ms / 100) 5.486 -> 5.484 ( -0.04%) [ +0.09% +0.11% +0.00% / -0.04% +0.02% -0.04%] index_select linear : Elapsed 0.055 ms (5.491 ms / 100) 5.489 -> 5.485 ( -0.07%) [ +0.09% +0.04% +0.00% / +0.04% +0.05% -0.07%] index_select reverse : Elapsed 0.055 ms (5.494 ms / 100) 5.481 -> 5.482 ( +0.02%) [ +0.16% +0.15% +0.00% / +0.20% +0.11% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.490 ms / 100) 5.485 -> 5.487 ( +0.04%) [ +0.00% +0.00% +0.04% / +0.04% +0.11% +0.05%] index_select skip256 : Elapsed 0.055 ms (5.485 ms / 100) 5.490 -> 5.485 ( -0.09%) [ +0.04% +0.00% +0.07% / -0.09% -0.02% +0.00%] index_select spread : Elapsed 0.055 ms (5.492 ms / 100) 5.486 -> 5.494 ( +0.15%) [ +0.15% +0.18% +0.00% / +0.16% +0.15% +0.15%] index_select strided 3 : Elapsed 0.055 ms (5.494 ms / 100) 5.488 -> 5.481 ( -0.13%) [ +0.00% +0.04% +0.07% / +0.00% -0.13% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.488 ms / 100) 5.485 -> 5.489 ( +0.07%) [ +0.13% +0.09% +0.00% / +0.07% +0.09% +0.07%] index_select strided 7 : Elapsed 0.055 ms (5.492 ms / 100) 5.488 -> 5.489 ( +0.02%) [ +0.00% +0.00% +0.02% / +0.02% +0.05% +0.02%] index_select strided 8 : Elapsed 0.055 ms (5.488 ms / 100) 5.491 -> 5.486 ( -0.09%) [ +0.22% +0.00% +0.00% / +0.00% +0.00% -0.09%] index_select random : Elapsed 0.055 ms (5.503 ms / 100) 5.487 -> 5.486 ( -0.02%) [ +0.09% +0.00% +0.15% / +0.11% +0.13% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.492 ms / 100) B = [20, 4, 40, 5] (stride (1, 100, 400, 20)) A = [20, 4, 16, 5] (stride (320, 1, 4, 64)) dim = 2 4.152 -> 4.154 ( +0.05%) [ +0.00% +0.00% +0.12% / +0.05% +0.58% +0.43%] index_add_ linear : Elapsed 0.042 ms (4.152 ms / 100) 4.009 -> 4.009 ( +0.00%) [ +0.00% +0.02% +0.05% / +0.00% +0.50% +0.42%] index_copy_ linear : Elapsed 0.040 ms (4.009 ms / 100) 4.167 -> 4.167 ( +0.00%) [ +0.10% +0.02% +0.00% / +0.00% +0.38% +0.38%] index_add_ reverse : Elapsed 0.042 ms (4.171 ms / 100) 4.020 -> 4.021 ( +0.02%) [ +0.00% +0.02% +0.02% / +0.02% +0.37% +0.37%] index_copy_ reverse : Elapsed 0.040 ms (4.020 ms / 100) 4.154 -> 4.154 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.72% +0.58%] index_add_ spread : Elapsed 0.042 ms (4.157 ms / 100) 4.005 -> 4.006 ( +0.02%) [ +0.02% +0.00% +0.00% / +0.02% +0.50% +0.50%] index_copy_ spread : Elapsed 0.040 ms (4.006 ms / 100) 4.144 -> 4.146 ( +0.05%) [ +0.07% +0.10% +0.00% / +0.05% +0.46% +0.51%] index_add_ strided 3 : Elapsed 0.041 ms (4.147 ms / 100) 4.007 -> 4.011 ( +0.10%) [ +0.00% +0.17% +0.10% / +0.10% +0.50% +0.67%] index_copy_ strided 3 : Elapsed 0.040 ms (4.007 ms / 100) 4.169 -> 4.164 ( -0.12%) [ +0.07% +0.00% +0.02% / -0.12% +0.53% +0.43%] index_add_ strided 7 : Elapsed 0.042 ms (4.172 ms / 100) 4.020 -> 4.019 ( -0.02%) [ +0.05% +0.00% +0.02% / -0.02% +0.50% +0.57%] index_copy_ strided 7 : Elapsed 0.040 ms (4.022 ms / 100) 4.152 -> 4.157 ( +0.12%) [ +0.00% +0.17% +0.10% / +0.12% +0.67% +0.51%] index_add_ perm : Elapsed 0.042 ms (4.152 ms / 100) 4.007 -> 4.010 ( +0.07%) [ +0.00% +0.10% +0.05% / +0.07% +0.52% +0.55%] index_copy_ perm : Elapsed 0.040 ms (4.007 ms / 100) 4.150 -> 4.153 ( +0.07%) [ +0.19% +0.19% +0.00% / +0.07% +0.70% +0.72%] index_add_ perm_sorted : Elapsed 0.042 ms (4.158 ms / 100) 4.007 -> 4.008 ( +0.02%) [ +0.07% +0.07% +0.00% / +0.02% +0.52% +0.55%] index_copy_ perm_sorted : Elapsed 0.040 ms (4.010 ms / 100) 5.557 -> 5.565 ( +0.14%) [ +0.00% +0.04% +0.02% / +0.14% +0.29% +0.23%] index_select const : Elapsed 0.056 ms (5.557 ms / 100) 5.560 -> 5.558 ( -0.04%) [ +0.18% +0.18% +0.00% / -0.04% +0.23% +0.29%] index_select wrap : Elapsed 0.056 ms (5.570 ms / 100) 5.563 -> 5.567 ( +0.07%) [ +0.00% +0.14% +0.09% / +0.09% +0.13% +0.07%] index_select linear : Elapsed 0.056 ms (5.563 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.05% +0.16%] index_select reverse : Elapsed 0.056 ms (5.562 ms / 100) 5.562 -> 5.553 ( -0.16%) [ +0.14% +0.13% +0.00% / +0.04% -0.02% -0.16%] index_select skip64 : Elapsed 0.056 ms (5.570 ms / 100) 5.563 -> 5.555 ( -0.14%) [ +0.07% +0.04% +0.00% / +0.04% -0.02% -0.14%] index_select skip256 : Elapsed 0.056 ms (5.567 ms / 100) 5.569 -> 5.559 ( -0.18%) [ +0.04% +0.07% +0.00% / +0.00% -0.18% +0.00%] index_select spread : Elapsed 0.056 ms (5.571 ms / 100) 5.560 -> 5.565 ( +0.09%) [ +0.09% +0.14% +0.00% / +0.09% +0.18% +0.20%] index_select strided 3 : Elapsed 0.056 ms (5.565 ms / 100) 5.563 -> 5.560 ( -0.05%) [ +0.07% +0.04% +0.00% / -0.05% +0.20% +0.14%] index_select strided 5 : Elapsed 0.056 ms (5.567 ms / 100) 5.563 -> 5.563 ( +0.00%) [ +0.13% +0.04% +0.00% / +0.00% +0.18% +0.31%] index_select strided 7 : Elapsed 0.056 ms (5.570 ms / 100) 5.555 -> 5.554 ( -0.02%) [ +0.07% +0.05% +0.00% / +0.02% -0.02% +0.05%] index_select strided 8 : Elapsed 0.056 ms (5.559 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.00% +0.04% +0.11% / +0.18% +0.11% -0.02%] index_select random : Elapsed 0.056 ms (5.562 ms / 100) 5.562 -> 5.563 ( +0.02%) [ +0.00% +0.18% +0.02% / +0.09% +0.18% +0.02%] index_select random_sorted : Elapsed 0.056 ms (5.562 ms / 100) B = [20, 4, 40, 5] (stride (1, 20, 400, 80)) A = [20, 4, 16, 5] (stride (1, 1600, 100, 20)) dim = 2 4.159 -> 4.161 ( +0.05%) [ +0.07% +0.07% +0.00% / +0.05% +0.79% +0.75%] index_add_ linear : Elapsed 0.042 ms (4.162 ms / 100) 4.018 -> 4.019 ( +0.02%) [ +0.07% +0.02% +0.00% / +0.02% +0.75% +0.75%] index_copy_ linear : Elapsed 0.040 ms (4.021 ms / 100) 4.173 -> 4.172 ( -0.02%) [ +0.00% +0.02% +0.00% / -0.02% +0.74% +0.72%] index_add_ reverse : Elapsed 0.042 ms (4.173 ms / 100) 4.023 -> 4.024 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.82% +0.77%] index_copy_ reverse : Elapsed 0.040 ms (4.023 ms / 100) 4.172 -> 4.171 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.84% +0.55%] index_add_ spread : Elapsed 0.042 ms (4.173 ms / 100) 4.022 -> 4.021 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.82% +0.57%] index_copy_ spread : Elapsed 0.040 ms (4.022 ms / 100) 4.172 -> 4.172 ( +0.00%) [ +0.07% +0.05% +0.00% / +0.00% +0.67% +0.70%] index_add_ strided 3 : Elapsed 0.042 ms (4.175 ms / 100) 4.028 -> 4.028 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.77% +0.82%] index_copy_ strided 3 : Elapsed 0.040 ms (4.031 ms / 100) 4.174 -> 4.174 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.69% +0.67%] index_add_ strided 7 : Elapsed 0.042 ms (4.174 ms / 100) 4.024 -> 4.025 ( +0.02%) [ +0.00% +0.10% +0.02% / +0.02% +0.75% +0.72%] index_copy_ strided 7 : Elapsed 0.040 ms (4.024 ms / 100) 4.160 -> 4.160 ( +0.00%) [ +0.00% +0.07% +0.02% / +0.00% +0.77% +0.75%] index_add_ perm : Elapsed 0.042 ms (4.160 ms / 100) 4.017 -> 4.019 ( +0.05%) [ +0.05% +0.12% +0.00% / +0.05% +0.77% +0.85%] index_copy_ perm : Elapsed 0.040 ms (4.019 ms / 100) 4.168 -> 4.169 ( +0.02%) [ +0.07% +0.00% +0.07% / +0.02% +1.01% +0.82%] index_add_ perm_sorted : Elapsed 0.042 ms (4.171 ms / 100) 4.030 -> 4.028 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.69% +0.69%] index_copy_ perm_sorted : Elapsed 0.040 ms (4.030 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.13% +0.00% +0.02% / +0.05% +0.09% +0.11%] index_select const : Elapsed 0.056 ms (5.571 ms / 100) 5.573 -> 5.566 ( -0.13%) [ +0.13% +0.11% +0.00% / +0.02% -0.02% -0.13%] index_select wrap : Elapsed 0.056 ms (5.580 ms / 100) 5.573 -> 5.568 ( -0.09%) [ +0.00% +0.05% +0.02% / +0.11% -0.09% -0.04%] index_select linear : Elapsed 0.056 ms (5.573 ms / 100) 5.572 -> 5.573 ( +0.02%) [ +0.13% +0.00% +0.07% / +0.14% +0.02% +0.09%] index_select reverse : Elapsed 0.056 ms (5.579 ms / 100) 5.562 -> 5.562 ( +0.00%) [ +0.00% +0.05% +0.16% / +0.00% +0.41% +0.02%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.560 -> 5.568 ( +0.14%) [ +0.07% +0.00% +0.16% / +0.14% +0.25% +0.27%] index_select skip256 : Elapsed 0.056 ms (5.564 ms / 100) 5.573 -> 5.568 ( -0.09%) [ +0.31% +0.00% +0.07% / +0.02% -0.04% -0.09%] index_select spread : Elapsed 0.056 ms (5.590 ms / 100) 5.573 -> 5.573 ( +0.00%) [ +0.00% +0.02% +0.07% / +0.00% +0.00% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.573 ms / 100) 5.572 -> 5.564 ( -0.14%) [ +0.00% +0.13% +0.05% / +0.07% -0.14% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.572 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.09% +0.02% +0.00% / +0.00% -0.04% +0.05%] index_select strided 7 : Elapsed 0.056 ms (5.574 ms / 100) 5.567 -> 5.558 ( -0.16%) [ +0.00% +0.04% +0.05% / -0.16% +0.04% +0.00%] index_select strided 8 : Elapsed 0.056 ms (5.567 ms / 100) 5.572 -> 5.567 ( -0.09%) [ +0.14% +0.00% +0.05% / -0.09% -0.05% +0.05%] index_select random : Elapsed 0.056 ms (5.580 ms / 100) 5.573 -> 5.571 ( -0.04%) [ +0.05% +0.04% +0.00% / +0.11% -0.04% -0.02%] index_select random_sorted : Elapsed 0.056 ms (5.576 ms / 100) B = [20, 4, 40, 5] (stride (40, 800, 1, 3200)) A = [20, 4, 16, 5] (stride (80, 1600, 5, 1)) dim = 2 4.054 -> 4.062 ( +0.20%) [ +0.00% +0.05% +0.15% / +0.20% +0.79% +0.72%] index_add_ linear : Elapsed 0.041 ms (4.054 ms / 100) 3.927 -> 3.931 ( +0.10%) [ +0.00% +0.00% +0.08% / +0.10% +0.66% +0.71%] index_copy_ linear : Elapsed 0.039 ms (3.927 ms / 100) 4.057 -> 4.060 ( +0.07%) [ +0.15% +0.00% +0.02% / +0.07% +0.62% +0.54%] index_add_ reverse : Elapsed 0.041 ms (4.063 ms / 100) 3.921 -> 3.923 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.61% +0.51%] index_copy_ reverse : Elapsed 0.039 ms (3.922 ms / 100) 4.048 -> 4.054 ( +0.15%) [ +0.17% +0.20% +0.00% / +0.15% +0.79% +0.72%] index_add_ spread : Elapsed 0.041 ms (4.055 ms / 100) 3.919 -> 3.924 ( +0.13%) [ +0.10% +0.00% +0.03% / +0.13% +0.61% +0.66%] index_copy_ spread : Elapsed 0.039 ms (3.923 ms / 100) 4.061 -> 4.064 ( +0.07%) [ +0.05% +0.10% +0.00% / +0.07% +0.59% +0.49%] index_add_ strided 3 : Elapsed 0.041 ms (4.063 ms / 100) 3.920 -> 3.924 ( +0.10%) [ +0.00% +0.03% +0.13% / +0.10% +0.74% +0.66%] index_copy_ strided 3 : Elapsed 0.039 ms (3.920 ms / 100) 4.060 -> 4.063 ( +0.07%) [ +0.12% +0.15% +0.00% / +0.07% +0.52% +0.52%] index_add_ strided 7 : Elapsed 0.041 ms (4.065 ms / 100) 3.918 -> 3.919 ( +0.03%) [ +0.10% +0.13% +0.00% / +0.03% +0.56% +0.74%] index_copy_ strided 7 : Elapsed 0.039 ms (3.922 ms / 100) 4.058 -> 4.058 ( +0.00%) [ +0.15% +0.05% +0.00% / +0.00% +0.67% +0.67%] index_add_ perm : Elapsed 0.041 ms (4.064 ms / 100) 3.930 -> 3.932 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.59% +0.59%] index_copy_ perm : Elapsed 0.039 ms (3.933 ms / 100) 4.064 -> 4.062 ( -0.05%) [ +0.05% +0.02% +0.00% / -0.05% +0.37% +0.39%] index_add_ perm_sorted : Elapsed 0.041 ms (4.066 ms / 100) 3.932 -> 3.933 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.43% +0.46%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.932 ms / 100) 5.559 -> 5.563 ( +0.07%) [ +0.00% +0.13% +0.05% / +0.07% +0.25% +0.07%] index_select const : Elapsed 0.056 ms (5.559 ms / 100) 5.567 -> 5.564 ( -0.05%) [ +0.00% +0.07% +0.05% / -0.05% +0.11% +0.22%] index_select wrap : Elapsed 0.056 ms (5.567 ms / 100) 5.572 -> 5.578 ( +0.11%) [ +0.09% +0.11% +0.00% / +0.11% +0.11% +0.16%] index_select linear : Elapsed 0.056 ms (5.577 ms / 100) 5.565 -> 5.569 ( +0.07%) [ +0.00% +0.25% +0.13% / +0.07% +0.18% +0.20%] index_select reverse : Elapsed 0.056 ms (5.565 ms / 100) 5.568 -> 5.559 ( -0.16%) [ +0.11% +0.00% +0.04% / +0.04% -0.16% +0.05%] index_select skip64 : Elapsed 0.056 ms (5.574 ms / 100) 5.568 -> 5.558 ( -0.18%) [ +0.00% +0.04% +0.14% / -0.11% -0.18% -0.05%] index_select skip256 : Elapsed 0.056 ms (5.568 ms / 100) 5.572 -> 5.565 ( -0.13%) [ +0.00% +0.04% +0.05% / +0.02% +0.02% -0.13%] index_select spread : Elapsed 0.056 ms (5.572 ms / 100) 5.567 -> 5.571 ( +0.07%) [ +0.00% +0.02% +0.04% / +0.07% +0.14% +0.11%] index_select strided 3 : Elapsed 0.056 ms (5.567 ms / 100) 5.569 -> 5.561 ( -0.14%) [ +0.02% +0.00% +0.02% / -0.14% +0.18% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.570 ms / 100) 5.566 -> 5.571 ( +0.09%) [ +0.09% +0.00% +0.09% / +0.16% +0.09% +0.20%] index_select strided 7 : Elapsed 0.056 ms (5.571 ms / 100) 5.563 -> 5.564 ( +0.02%) [ +0.04% +0.00% +0.04% / +0.02% +0.04% +0.11%] index_select strided 8 : Elapsed 0.056 ms (5.565 ms / 100) 5.565 -> 5.571 ( +0.11%) [ +0.09% +0.09% +0.00% / +0.20% +0.16% +0.11%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.567 -> 5.568 ( +0.02%) [ +0.00% +0.04% +0.05% / +0.02% +0.13% +0.09%] index_select random_sorted : Elapsed 0.056 ms (5.567 ms / 100) B = [20, 4, 40, 5] (stride (1, 800, 20, 3200)) A = [20, 4, 16, 5] (stride (1, 1600, 20, 320)) dim = 2 4.281 -> 4.282 ( +0.02%) [ +0.12% +0.00% +0.00% / +0.02% +0.79% +0.77%] index_add_ linear : Elapsed 0.043 ms (4.286 ms / 100) 4.138 -> 4.137 ( -0.02%) [ +0.07% +0.02% +0.00% / -0.02% +0.72% +0.72%] index_copy_ linear : Elapsed 0.041 ms (4.141 ms / 100) 4.286 -> 4.285 ( -0.02%) [ +0.00% +0.05% +0.00% / -0.02% +0.63% +0.68%] index_add_ reverse : Elapsed 0.043 ms (4.286 ms / 100) 4.136 -> 4.138 ( +0.05%) [ +0.02% +0.05% +0.00% / +0.05% +0.70% +0.80%] index_copy_ reverse : Elapsed 0.041 ms (4.137 ms / 100) 4.274 -> 4.274 ( +0.00%) [ +0.02% +0.00% +0.09% / +0.00% +0.94% +0.70%] index_add_ spread : Elapsed 0.043 ms (4.275 ms / 100) 4.117 -> 4.118 ( +0.02%) [ +0.00% +0.02% +0.05% / +0.02% +0.63% +0.56%] index_copy_ spread : Elapsed 0.041 ms (4.117 ms / 100) 4.272 -> 4.274 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.66% +0.68%] index_add_ strided 3 : Elapsed 0.043 ms (4.272 ms / 100) 4.118 -> 4.119 ( +0.02%) [ +0.02% +0.07% +0.00% / +0.02% +0.66% +0.73%] index_copy_ strided 3 : Elapsed 0.041 ms (4.119 ms / 100) 4.284 -> 4.284 ( +0.00%) [ +0.07% +0.02% +0.00% / +0.00% +0.75% +0.77%] index_add_ strided 7 : Elapsed 0.043 ms (4.287 ms / 100) 4.137 -> 4.136 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.80% +0.68%] index_copy_ strided 7 : Elapsed 0.041 ms (4.138 ms / 100) 4.281 -> 4.281 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.84% +0.84%] index_add_ perm : Elapsed 0.043 ms (4.288 ms / 100) 4.137 -> 4.137 ( +0.00%) [ +0.05% +0.07% +0.00% / +0.00% +0.80% +0.75%] index_copy_ perm : Elapsed 0.041 ms (4.139 ms / 100) 4.270 -> 4.271 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.80% +0.75%] index_add_ perm_sorted : Elapsed 0.043 ms (4.271 ms / 100) 4.117 -> 4.122 ( +0.12%) [ +0.07% +0.05% +0.00% / +0.12% +0.78% +0.75%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.120 ms / 100) 5.569 -> 5.567 ( -0.04%) [ +0.13% +0.11% +0.00% / +0.11% +0.02% -0.04%] index_select const : Elapsed 0.056 ms (5.576 ms / 100) 5.580 -> 5.578 ( -0.04%) [ +0.18% +0.04% +0.00% / +0.07% -0.02% -0.04%] index_select wrap : Elapsed 0.056 ms (5.590 ms / 100) 5.583 -> 5.574 ( -0.16%) [ +0.05% +0.04% +0.00% / +0.04% +0.00% -0.16%] index_select linear : Elapsed 0.056 ms (5.586 ms / 100) 5.582 -> 5.576 ( -0.11%) [ +0.00% +0.07% +0.04% / +0.18% -0.04% -0.11%] index_select reverse : Elapsed 0.056 ms (5.582 ms / 100) 5.568 -> 5.566 ( -0.04%) [ +0.16% +0.02% +0.00% / +0.05% -0.02% -0.04%] index_select skip64 : Elapsed 0.056 ms (5.577 ms / 100) 5.562 -> 5.566 ( +0.07%) [ +0.14% +0.05% +0.00% / +0.11% +0.07% +0.18%] index_select skip256 : Elapsed 0.056 ms (5.570 ms / 100) 5.580 -> 5.575 ( -0.09%) [ +0.02% +0.00% +0.14% / -0.04% -0.04% -0.09%] index_select spread : Elapsed 0.056 ms (5.581 ms / 100) 5.579 -> 5.574 ( -0.09%) [ +0.00% +0.16% +0.07% / +0.04% -0.09% +0.04%] index_select strided 3 : Elapsed 0.056 ms (5.579 ms / 100) 5.574 -> 5.572 ( -0.04%) [ +0.00% +0.14% +0.20% / +0.04% +0.04% -0.04%] index_select strided 5 : Elapsed 0.056 ms (5.574 ms / 100) 5.578 -> 5.576 ( -0.04%) [ +0.09% +0.13% +0.00% / -0.04% +0.14% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.583 ms / 100) 5.569 -> 5.574 ( +0.09%) [ +0.16% +0.14% +0.00% / +0.20% +0.25% +0.09%] index_select strided 8 : Elapsed 0.056 ms (5.578 ms / 100) 5.578 -> 5.576 ( -0.04%) [ +0.04% +0.00% +0.09% / +0.02% +0.00% -0.04%] index_select random : Elapsed 0.056 ms (5.580 ms / 100) 5.581 -> 5.576 ( -0.09%) [ +0.13% +0.00% +0.05% / +0.11% -0.09% -0.02%] index_select random_sorted : Elapsed 0.056 ms (5.588 ms / 100) B = [20, 4, 40, 5] (stride (4, 1, 80, 3200)) A = [20, 4, 16, 5] (stride (1, 320, 20, 1280)) dim = 2 3.942 -> 3.942 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.61% +0.58%] index_add_ linear : Elapsed 0.039 ms (3.943 ms / 100) 3.792 -> 3.792 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.61% +0.58%] index_copy_ linear : Elapsed 0.038 ms (3.793 ms / 100) 3.962 -> 3.961 ( -0.03%) [ +0.03% +0.10% +0.00% / -0.03% +0.58% +0.61%] index_add_ reverse : Elapsed 0.040 ms (3.963 ms / 100) 3.795 -> 3.793 ( -0.05%) [ +0.03% +0.03% +0.00% / -0.05% +0.55% +0.55%] index_copy_ reverse : Elapsed 0.038 ms (3.796 ms / 100) 3.957 -> 3.956 ( -0.03%) [ +0.08% +0.05% +0.00% / -0.03% +0.56% +0.53%] index_add_ spread : Elapsed 0.040 ms (3.960 ms / 100) 3.797 -> 3.797 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.61% +0.58%] index_copy_ spread : Elapsed 0.038 ms (3.797 ms / 100) 3.946 -> 3.952 ( +0.15%) [ +0.00% +0.18% +0.00% / +0.15% +0.53% +0.48%] index_add_ strided 3 : Elapsed 0.039 ms (3.946 ms / 100) 3.798 -> 3.804 ( +0.16%) [ +0.00% +0.21% +0.03% / +0.16% +0.53% +0.55%] index_copy_ strided 3 : Elapsed 0.038 ms (3.798 ms / 100) 3.965 -> 3.964 ( -0.03%) [ +0.05% +0.05% +0.00% / -0.03% +0.40% +0.35%] index_add_ strided 7 : Elapsed 0.040 ms (3.967 ms / 100) 3.798 -> 3.799 ( +0.03%) [ +0.05% +0.08% +0.00% / +0.03% +0.42% +0.34%] index_copy_ strided 7 : Elapsed 0.038 ms (3.800 ms / 100) 3.945 -> 3.946 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.48% +0.38%] index_add_ perm : Elapsed 0.039 ms (3.947 ms / 100) 3.793 -> 3.796 ( +0.08%) [ +0.03% +0.05% +0.00% / +0.08% +0.47% +0.47%] index_copy_ perm : Elapsed 0.038 ms (3.794 ms / 100) 3.949 -> 3.950 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.35% +0.35%] index_add_ perm_sorted : Elapsed 0.040 ms (3.950 ms / 100) 3.796 -> 3.796 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.42% +0.40%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.796 ms / 100) 5.471 -> 5.476 ( +0.09%) [ +0.13% +0.05% +0.00% / +0.09% +0.24% +0.18%] index_select const : Elapsed 0.055 ms (5.478 ms / 100) 5.474 -> 5.477 ( +0.05%) [ +0.00% +0.00% +0.09% / +0.05% +0.27% +0.13%] index_select wrap : Elapsed 0.055 ms (5.474 ms / 100) 5.476 -> 5.483 ( +0.13%) [ +0.00% +0.04% +0.13% / +0.13% +0.20% +0.15%] index_select linear : Elapsed 0.055 ms (5.476 ms / 100) 5.480 -> 5.481 ( +0.02%) [ +0.00% +0.04% +0.02% / +0.09% +0.04% +0.02%] index_select reverse : Elapsed 0.055 ms (5.480 ms / 100) 5.476 -> 5.473 ( -0.05%) [ +0.04% +0.00% +0.02% / +0.13% +0.00% -0.05%] index_select skip64 : Elapsed 0.055 ms (5.478 ms / 100) 5.478 -> 5.465 ( -0.24%) [ +0.02% +0.00% +0.04% / -0.24% -0.20% -0.16%] index_select skip256 : Elapsed 0.055 ms (5.479 ms / 100) 5.477 -> 5.480 ( +0.05%) [ +0.02% +0.00% +0.22% / +0.15% +0.05% +0.15%] index_select spread : Elapsed 0.055 ms (5.478 ms / 100) 5.474 -> 5.478 ( +0.07%) [ +0.11% +0.00% +0.09% / +0.09% +0.07% +0.09%] index_select strided 3 : Elapsed 0.055 ms (5.480 ms / 100) 5.477 -> 5.478 ( +0.02%) [ +0.00% +0.04% +0.05% / +0.05% +0.07% +0.02%] index_select strided 5 : Elapsed 0.055 ms (5.477 ms / 100) 5.478 -> 5.478 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.07% +0.24%] index_select strided 7 : Elapsed 0.055 ms (5.478 ms / 100) 5.473 -> 5.476 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.11% +0.05%] index_select strided 8 : Elapsed 0.055 ms (5.473 ms / 100) 5.477 -> 5.480 ( +0.05%) [ +0.00% +0.04% +0.00% / +0.24% +0.05% +0.11%] index_select random : Elapsed 0.055 ms (5.477 ms / 100) 5.474 -> 5.477 ( +0.05%) [ +0.27% +0.00% +0.11% / +0.09% +0.05% +0.29%] index_select random_sorted : Elapsed 0.055 ms (5.489 ms / 100) out_shape = [20, 4, 16, 40] in_shape = [20, 4, 16, 5] idx_dim = 3 B = [20, 4, 16, 40] (stride (2560, 640, 1, 16)) A = [20, 4, 16, 5] (stride (320, 1, 20, 4)) dim = 3 1.423 -> 1.423 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.49% +0.49%] index_add_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.29% +0.29%] index_copy_ linear : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.56% +0.63%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.00% +0.07% +0.07% / +0.15% +0.44% +0.44%] index_copy_ reverse : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.56% +0.56%] index_add_ spread : Elapsed 0.014 ms (1.424 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.51% +0.36%] index_copy_ spread : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.70% +0.56%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.36% +0.29%] index_copy_ strided 3 : Elapsed 0.014 ms (1.381 ms / 100) 1.423 -> 1.422 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.49% +0.49%] index_add_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.36% +0.43%] index_copy_ strided 7 : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.63% +0.63%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.15% +0.07% +0.00% / +0.15% +0.44% +0.44%] index_copy_ perm : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.70% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.29% +0.36%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 8.209 -> 8.204 ( -0.06%) [ +0.01% +0.06% +0.00% / +0.26% -0.02% -0.06%] index_select const : Elapsed 0.082 ms (8.210 ms / 100) 8.207 -> 8.202 ( -0.06%) [ +0.00% +0.18% +0.32% / -0.04% -0.06% +0.04%] index_select wrap : Elapsed 0.082 ms (8.207 ms / 100) 8.201 -> 8.223 ( +0.27%) [ +0.00% +0.33% +0.12% / +0.55% +0.29% +0.27%] index_select linear : Elapsed 0.082 ms (8.201 ms / 100) 8.207 -> 8.209 ( +0.02%) [ +0.00% +0.06% +0.05% / +0.15% +0.30% +0.02%] index_select reverse : Elapsed 0.082 ms (8.207 ms / 100) 8.208 -> 8.204 ( -0.05%) [ +0.00% +0.21% +0.10% / +0.05% +0.02% -0.05%] index_select skip64 : Elapsed 0.082 ms (8.208 ms / 100) 8.199 -> 8.207 ( +0.10%) [ +0.21% +0.00% +0.29% / +0.40% +0.20% +0.10%] index_select skip256 : Elapsed 0.082 ms (8.216 ms / 100) 8.210 -> 8.199 ( -0.13%) [ +0.11% +0.44% +0.00% / -0.05% -0.13% +0.28%] index_select spread : Elapsed 0.082 ms (8.219 ms / 100) 8.204 -> 8.197 ( -0.09%) [ +0.22% +0.11% +0.00% / +0.33% -0.09% +0.13%] index_select strided 3 : Elapsed 0.082 ms (8.222 ms / 100) 8.218 -> 8.195 ( -0.28%) [ +0.07% +0.00% +0.09% / +0.23% -0.28% -0.19%] index_select random : Elapsed 0.082 ms (8.224 ms / 100) 8.218 -> 8.209 ( -0.11%) [ +0.00% +0.02% +0.02% / -0.11% +0.07% +0.23%] index_select random_sorted : Elapsed 0.082 ms (8.218 ms / 100) B = [20, 4, 16, 40] (stride (640, 12800, 40, 1)) A = [20, 4, 16, 5] (stride (320, 5, 20, 1)) dim = 3 1.522 -> 1.523 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.59% +0.59%] index_add_ linear : Elapsed 0.015 ms (1.523 ms / 100) 1.480 -> 1.479 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.61% +0.54%] index_copy_ linear : Elapsed 0.015 ms (1.480 ms / 100) 1.523 -> 1.523 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.66%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.68% +0.74%] index_copy_ reverse : Elapsed 0.015 ms (1.480 ms / 100) 1.523 -> 1.523 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.59% +0.59%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.68% +0.61%] index_copy_ spread : Elapsed 0.015 ms (1.480 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.66% +0.72%] index_add_ strided 3 : Elapsed 0.015 ms (1.523 ms / 100) 1.480 -> 1.479 ( -0.07%) [ +0.14% +0.00% +0.00% / -0.07% +0.61% +0.54%] index_copy_ strided 3 : Elapsed 0.015 ms (1.482 ms / 100) 1.523 -> 1.522 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.523 ms / 100) 1.480 -> 1.479 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.61% +0.47%] index_copy_ strided 7 : Elapsed 0.015 ms (1.480 ms / 100) 1.523 -> 1.522 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.66% +0.66%] index_add_ perm : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.68% +0.81%] index_copy_ perm : Elapsed 0.015 ms (1.479 ms / 100) 1.522 -> 1.523 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.72% +0.72%] index_add_ perm_sorted : Elapsed 0.015 ms (1.524 ms / 100) 1.479 -> 1.478 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.88% +0.68%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.479 ms / 100) 8.569 -> 8.583 ( +0.16%) [ +0.12% +0.06% +0.00% / +0.19% +0.26% +0.16%] index_select const : Elapsed 0.086 ms (8.579 ms / 100) 8.562 -> 8.574 ( +0.14%) [ +0.22% +0.21% +0.00% / +0.14% +0.37% +0.41%] index_select wrap : Elapsed 0.086 ms (8.581 ms / 100) 8.576 -> 8.587 ( +0.13%) [ +0.21% +0.12% +0.00% / +0.19% +0.13% +0.35%] index_select linear : Elapsed 0.086 ms (8.594 ms / 100) 8.584 -> 8.582 ( -0.02%) [ +0.01% +0.00% +0.06% / +0.07% -0.02% +0.24%] index_select reverse : Elapsed 0.086 ms (8.585 ms / 100) 8.573 -> 8.569 ( -0.05%) [ +0.12% +0.00% +0.00% / -0.05% +0.19% +0.31%] index_select skip64 : Elapsed 0.086 ms (8.583 ms / 100) 8.574 -> 8.579 ( +0.06%) [ +0.12% +0.00% +0.01% / +0.14% +0.06% +0.38%] index_select skip256 : Elapsed 0.086 ms (8.584 ms / 100) 8.565 -> 8.583 ( +0.21%) [ +0.14% +0.00% +0.18% / +0.25% +0.21% +0.36%] index_select spread : Elapsed 0.086 ms (8.577 ms / 100) 8.570 -> 8.573 ( +0.04%) [ +0.05% +0.01% +0.00% / +0.04% +0.29% +0.25%] index_select strided 3 : Elapsed 0.086 ms (8.574 ms / 100) 8.572 -> 8.576 ( +0.05%) [ +0.00% +0.14% +0.14% / +0.05% +0.30% +0.35%] index_select random : Elapsed 0.086 ms (8.572 ms / 100) 8.572 -> 8.578 ( +0.07%) [ +0.34% +0.17% +0.00% / +0.07% +0.44% +0.41%] index_select random_sorted : Elapsed 0.086 ms (8.601 ms / 100) B = [20, 4, 16, 40] (stride (640, 12800, 40, 1)) A = [20, 4, 16, 5] (stride (64, 1, 4, 1280)) dim = 3 1.522 -> 1.523 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.39% +0.46%] index_add_ linear : Elapsed 0.015 ms (1.523 ms / 100) 1.478 -> 1.482 ( +0.27%) [ +0.07% +0.00% +0.07% / +0.27% +0.61% +0.61%] index_copy_ linear : Elapsed 0.015 ms (1.479 ms / 100) 1.522 -> 1.523 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.39% +0.39%] index_add_ reverse : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.478 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.34% +0.34%] index_copy_ reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.523 ( +0.13%) [ +0.20% +0.13% +0.00% / +0.13% +0.53% +0.46%] index_add_ spread : Elapsed 0.015 ms (1.524 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.47% +0.41%] index_copy_ spread : Elapsed 0.015 ms (1.478 ms / 100) 1.522 -> 1.526 ( +0.26%) [ +0.07% +0.07% +0.00% / +0.26% +0.53% +0.46%] index_add_ strided 3 : Elapsed 0.015 ms (1.523 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.54% +0.47%] index_copy_ strided 3 : Elapsed 0.015 ms (1.480 ms / 100) 1.522 -> 1.523 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.53% +0.46%] index_add_ strided 7 : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.477 ( -0.14%) [ +0.07% +0.00% +0.00% / -0.14% +0.41% +0.41%] index_copy_ strided 7 : Elapsed 0.015 ms (1.480 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.53% +0.53%] index_add_ perm : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.47% +0.47%] index_copy_ perm : Elapsed 0.015 ms (1.479 ms / 100) 1.520 -> 1.521 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.59% +0.59%] index_add_ perm_sorted : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.478 ( +0.07%) [ +0.14% +0.00% +0.00% / +0.07% +0.54% +0.47%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.479 ms / 100) 8.550 -> 8.562 ( +0.14%) [ +0.25% +0.00% +0.21% / +0.14% +0.18% +0.29%] index_select const : Elapsed 0.086 ms (8.571 ms / 100) 8.575 -> 8.590 ( +0.17%) [ +0.00% +0.20% +0.05% / +0.17% +0.20% +0.26%] index_select wrap : Elapsed 0.086 ms (8.575 ms / 100) 8.572 -> 8.572 ( +0.00%) [ +0.00% +0.34% +0.10% / +0.00% +0.26% +0.00%] index_select linear : Elapsed 0.086 ms (8.572 ms / 100) 8.570 -> 8.551 ( -0.22%) [ +0.05% +0.00% +0.07% / -0.22% +0.08% +0.15%] index_select reverse : Elapsed 0.086 ms (8.574 ms / 100) 8.567 -> 8.569 ( +0.02%) [ +0.01% +0.00% +0.18% / +0.02% +0.23% +0.22%] index_select skip64 : Elapsed 0.086 ms (8.568 ms / 100) 8.553 -> 8.558 ( +0.06%) [ +0.00% +0.14% +0.11% / +0.30% +0.06% +0.39%] index_select skip256 : Elapsed 0.086 ms (8.553 ms / 100) 8.564 -> 8.573 ( +0.11%) [ +0.00% +0.08% +0.13% / +0.11% +0.27% +0.28%] index_select spread : Elapsed 0.086 ms (8.564 ms / 100) 8.561 -> 8.577 ( +0.19%) [ +0.19% +0.57% +0.00% / +0.23% +0.46% +0.19%] index_select strided 3 : Elapsed 0.086 ms (8.577 ms / 100) 8.582 -> 8.580 ( -0.02%) [ +0.06% +0.00% +0.02% / +0.01% +0.23% -0.02%] index_select random : Elapsed 0.086 ms (8.587 ms / 100) 8.566 -> 8.575 ( +0.11%) [ +0.05% +0.11% +0.00% / +0.11% +0.20% +0.16%] index_select random_sorted : Elapsed 0.086 ms (8.570 ms / 100) B = [20, 4, 16, 40] (stride (160, 1, 3200, 4)) A = [20, 4, 16, 5] (stride (64, 16, 1, 1280)) dim = 3 1.316 -> 1.317 ( +0.08%) [ +0.23% +0.15% +0.00% / +0.08% +0.76% +0.76%] index_add_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.71% +0.71%] index_copy_ linear : Elapsed 0.013 ms (1.272 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.61% +0.53%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.61% +0.61%] index_add_ spread : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.63% +0.63%] index_copy_ spread : Elapsed 0.013 ms (1.273 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_add_ strided 3 : Elapsed 0.013 ms (1.318 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.71% +0.86%] index_copy_ strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.68% +0.68%] index_add_ strided 7 : Elapsed 0.013 ms (1.319 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.63%] index_copy_ strided 7 : Elapsed 0.013 ms (1.273 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.61%] index_add_ perm : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.94% +0.71%] index_copy_ perm : Elapsed 0.013 ms (1.273 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.76% +0.76%] index_add_ perm_sorted : Elapsed 0.013 ms (1.317 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.79% +0.71%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) 7.858 -> 7.862 ( +0.05%) [ +0.14% +0.00% +0.20% / +0.05% +0.32% +0.24%] index_select const : Elapsed 0.079 ms (7.869 ms / 100) 7.892 -> 7.890 ( -0.03%) [ +0.00% +0.13% +0.22% / -0.03% +0.29% +0.49%] index_select wrap : Elapsed 0.079 ms (7.892 ms / 100) 7.885 -> 7.886 ( +0.01%) [ +0.14% +0.08% +0.00% / +0.01% +0.38% +0.42%] index_select linear : Elapsed 0.079 ms (7.896 ms / 100) 7.870 -> 7.887 ( +0.22%) [ +0.19% +0.19% +0.00% / +0.22% +0.27% +0.48%] index_select reverse : Elapsed 0.079 ms (7.885 ms / 100) 7.866 -> 7.863 ( -0.04%) [ +0.00% +0.09% +0.04% / -0.04% +0.37% +0.25%] index_select skip64 : Elapsed 0.079 ms (7.866 ms / 100) 7.862 -> 7.859 ( -0.04%) [ +0.00% +0.14% +0.01% / -0.04% +0.10% +0.60%] index_select skip256 : Elapsed 0.079 ms (7.862 ms / 100) 7.885 -> 7.874 ( -0.14%) [ +0.03% +0.08% +0.00% / -0.14% -0.01% +0.14%] index_select spread : Elapsed 0.079 ms (7.887 ms / 100) 7.883 -> 7.895 ( +0.15%) [ +0.00% +0.03% +0.28% / +0.15% +0.32% +0.39%] index_select strided 3 : Elapsed 0.079 ms (7.883 ms / 100) 7.883 -> 7.876 ( -0.09%) [ +0.23% +0.00% +0.03% / -0.09% +0.48% +0.47%] index_select random : Elapsed 0.079 ms (7.901 ms / 100) 7.879 -> 7.894 ( +0.19%) [ +0.20% +0.14% +0.00% / +0.19% +0.29% +0.44%] index_select random_sorted : Elapsed 0.079 ms (7.895 ms / 100) B = [20, 4, 16, 40] (stride (1, 800, 3200, 20)) A = [20, 4, 16, 5] (stride (80, 1600, 5, 1)) dim = 3 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.46% +0.59%] index_add_ linear : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.478 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.34% +0.41%] index_copy_ linear : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.39% +0.39%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.34% +0.34%] index_copy_ reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.53% +0.53%] index_add_ spread : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.54% +0.34%] index_copy_ spread : Elapsed 0.015 ms (1.480 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.46% +0.53%] index_add_ strided 3 : Elapsed 0.015 ms (1.523 ms / 100) 1.478 -> 1.478 ( +0.00%) [ +0.00% +0.07% +0.14% / +0.00% +0.47% +0.54%] index_copy_ strided 3 : Elapsed 0.015 ms (1.478 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.53% +0.53%] index_add_ strided 7 : Elapsed 0.015 ms (1.522 ms / 100) 1.479 -> 1.477 ( -0.14%) [ +0.07% +0.07% +0.00% / -0.14% +0.47% +0.47%] index_copy_ strided 7 : Elapsed 0.015 ms (1.480 ms / 100) 1.519 -> 1.521 ( +0.13%) [ +0.13% +0.07% +0.00% / +0.13% +0.66% +0.66%] index_add_ perm : Elapsed 0.015 ms (1.521 ms / 100) 1.478 -> 1.479 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.54% +0.47%] index_copy_ perm : Elapsed 0.015 ms (1.478 ms / 100) 1.520 -> 1.520 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.66% +0.66%] index_add_ perm_sorted : Elapsed 0.015 ms (1.521 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.47% +0.41%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.480 ms / 100) 8.540 -> 8.531 ( -0.11%) [ +0.05% +0.19% +0.00% / -0.11% +0.15% -0.01%] index_select const : Elapsed 0.085 ms (8.544 ms / 100) 8.528 -> 8.540 ( +0.14%) [ +0.00% +0.21% +0.13% / +0.48% +0.27% +0.14%] index_select wrap : Elapsed 0.085 ms (8.528 ms / 100) 8.526 -> 8.540 ( +0.16%) [ +0.22% +0.34% +0.00% / +0.28% +0.30% +0.16%] index_select linear : Elapsed 0.085 ms (8.545 ms / 100) 8.525 -> 8.548 ( +0.27%) [ +0.19% +0.28% +0.00% / +0.27% +0.63% +0.33%] index_select reverse : Elapsed 0.085 ms (8.541 ms / 100) 8.537 -> 8.531 ( -0.07%) [ +0.05% +0.12% +0.00% / -0.07% +0.11% -0.02%] index_select skip64 : Elapsed 0.085 ms (8.541 ms / 100) 8.531 -> 8.537 ( +0.07%) [ +0.06% +0.04% +0.00% / +0.18% +0.11% +0.07%] index_select skip256 : Elapsed 0.085 ms (8.536 ms / 100) 8.530 -> 8.531 ( +0.01%) [ +0.09% +0.16% +0.00% / +0.01% +0.20% +0.06%] index_select spread : Elapsed 0.085 ms (8.538 ms / 100) 8.528 -> 8.543 ( +0.18%) [ +0.26% +0.00% +0.05% / +0.18% +0.27% +0.21%] index_select strided 3 : Elapsed 0.086 ms (8.550 ms / 100) 8.526 -> 8.537 ( +0.13%) [ +0.16% +0.00% +0.08% / +0.25% +0.19% +0.13%] index_select random : Elapsed 0.085 ms (8.540 ms / 100) 8.516 -> 8.544 ( +0.33%) [ +0.33% +0.00% +0.20% / +0.33% +0.40% +0.43%] index_select random_sorted : Elapsed 0.085 ms (8.544 ms / 100) B = [20, 4, 16, 40] (stride (4, 1, 3200, 80)) A = [20, 4, 16, 5] (stride (320, 5, 20, 1)) dim = 3 1.423 -> 1.423 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.70% +0.63%] index_add_ linear : Elapsed 0.014 ms (1.425 ms / 100) 1.382 -> 1.380 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.36% +0.36%] index_copy_ linear : Elapsed 0.014 ms (1.382 ms / 100) 1.424 -> 1.424 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.56% +0.56%] index_add_ reverse : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.72% +0.72%] index_copy_ reverse : Elapsed 0.014 ms (1.380 ms / 100) 1.424 -> 1.423 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.63% +0.63%] index_add_ spread : Elapsed 0.014 ms (1.425 ms / 100) 1.385 -> 1.385 ( +0.00%) [ +0.14% +0.07% +0.00% / +0.00% +0.79% +0.79%] index_copy_ spread : Elapsed 0.014 ms (1.387 ms / 100) 1.423 -> 1.424 ( +0.07%) [ +0.28% +0.07% +0.00% / +0.07% +0.70% +0.70%] index_add_ strided 3 : Elapsed 0.014 ms (1.427 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.58% +0.80%] index_copy_ strided 3 : Elapsed 0.014 ms (1.381 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.63% +0.70%] index_add_ strided 7 : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.72%] index_copy_ strided 7 : Elapsed 0.014 ms (1.380 ms / 100) 1.423 -> 1.424 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.77% +0.70%] index_add_ perm : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.00% +0.14% / +0.00% +0.72% +0.72%] index_copy_ perm : Elapsed 0.014 ms (1.380 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.77% +0.84%] index_add_ perm_sorted : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.72% +0.72%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 8.196 -> 8.213 ( +0.21%) [ +0.07% +0.00% +0.15% / +0.21% +0.35% +0.35%] index_select const : Elapsed 0.082 ms (8.202 ms / 100) 8.197 -> 8.197 ( +0.00%) [ +0.02% +0.04% +0.00% / +0.00% +0.63% +0.24%] index_select wrap : Elapsed 0.082 ms (8.199 ms / 100) 8.197 -> 8.211 ( +0.17%) [ +0.17% +0.00% +0.18% / +0.17% +0.40% +0.20%] index_select linear : Elapsed 0.082 ms (8.211 ms / 100) 8.211 -> 8.190 ( -0.26%) [ +0.01% +0.00% +0.05% / -0.26% +0.35% +0.21%] index_select reverse : Elapsed 0.082 ms (8.212 ms / 100) 8.208 -> 8.209 ( +0.01%) [ +0.06% +0.13% +0.00% / +0.11% +0.06% +0.01%] index_select skip64 : Elapsed 0.082 ms (8.213 ms / 100) 8.202 -> 8.208 ( +0.07%) [ +0.00% +0.11% +0.26% / +0.07% +0.40% +0.26%] index_select skip256 : Elapsed 0.082 ms (8.202 ms / 100) 8.205 -> 8.201 ( -0.05%) [ +0.00% +0.15% +0.23% / -0.05% +0.22% +0.40%] index_select spread : Elapsed 0.082 ms (8.205 ms / 100) 8.195 -> 8.214 ( +0.23%) [ +0.04% +0.05% +0.00% / +0.23% +0.51% +0.44%] index_select strided 3 : Elapsed 0.082 ms (8.198 ms / 100) 8.202 -> 8.201 ( -0.01%) [ +0.32% +0.00% +0.28% / -0.01% +0.32% +0.26%] index_select random : Elapsed 0.082 ms (8.228 ms / 100) 8.195 -> 8.215 ( +0.24%) [ +0.05% +0.00% +0.26% / +0.24% +0.39% +0.44%] index_select random_sorted : Elapsed 0.082 ms (8.199 ms / 100) B = [20, 4, 16, 40] (stride (64, 16, 1, 1280)) A = [20, 4, 16, 5] (stride (1, 320, 20, 1280)) dim = 3 1.277 -> 1.278 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.31% +0.31%] index_add_ linear : Elapsed 0.013 ms (1.277 ms / 100) 1.225 -> 1.225 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.41% +0.41%] index_copy_ linear : Elapsed 0.012 ms (1.227 ms / 100) 1.285 -> 1.285 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.23% +0.31%] index_add_ reverse : Elapsed 0.013 ms (1.286 ms / 100) 1.231 -> 1.231 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.32% +0.32%] index_copy_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.280 -> 1.281 ( +0.08%) [ +0.31% +0.16% +0.00% / +0.08% +0.47% +0.55%] index_add_ spread : Elapsed 0.013 ms (1.284 ms / 100) 1.229 -> 1.229 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.41% +0.33%] index_copy_ spread : Elapsed 0.012 ms (1.229 ms / 100) 1.280 -> 1.278 ( -0.16%) [ +0.23% +0.00% +0.08% / -0.16% +0.47% +0.47%] index_add_ strided 3 : Elapsed 0.013 ms (1.283 ms / 100) 1.229 -> 1.229 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.57% +0.49%] index_copy_ strided 3 : Elapsed 0.012 ms (1.229 ms / 100) 1.283 -> 1.285 ( +0.16%) [ +0.23% +0.23% +0.00% / +0.16% +0.39% +0.39%] index_add_ strided 7 : Elapsed 0.013 ms (1.286 ms / 100) 1.230 -> 1.230 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.41% +0.49%] index_copy_ strided 7 : Elapsed 0.012 ms (1.230 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.71% +0.55%] index_add_ perm : Elapsed 0.013 ms (1.278 ms / 100) 1.224 -> 1.223 ( -0.08%) [ +0.00% +0.16% +0.00% / -0.08% +0.49% +0.41%] index_copy_ perm : Elapsed 0.012 ms (1.224 ms / 100) 1.281 -> 1.281 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.47% +0.94%] index_add_ perm_sorted : Elapsed 0.013 ms (1.281 ms / 100) 1.229 -> 1.228 ( -0.08%) [ +0.00% +0.08% +0.00% / -0.08% +0.49% +0.49%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.229 ms / 100) 7.570 -> 7.563 ( -0.09%) [ +0.05% +0.11% +0.00% / -0.07% -0.09% +0.11%] index_select const : Elapsed 0.076 ms (7.574 ms / 100) 7.580 -> 7.577 ( -0.04%) [ +0.00% +0.04% +0.05% / +0.00% +0.21% -0.04%] index_select wrap : Elapsed 0.076 ms (7.580 ms / 100) 7.563 -> 7.565 ( +0.03%) [ +0.34% +0.00% +0.01% / +0.03% +0.38% +0.32%] index_select linear : Elapsed 0.076 ms (7.589 ms / 100) 7.568 -> 7.576 ( +0.11%) [ +0.07% +0.12% +0.00% / +0.11% +0.22% +0.15%] index_select reverse : Elapsed 0.076 ms (7.573 ms / 100) 7.565 -> 7.574 ( +0.12%) [ +0.25% +0.20% +0.00% / +0.30% +0.12% +0.26%] index_select skip64 : Elapsed 0.076 ms (7.584 ms / 100) 7.562 -> 7.574 ( +0.16%) [ +0.11% +0.04% +0.00% / +0.16% +0.16% +0.28%] index_select skip256 : Elapsed 0.076 ms (7.570 ms / 100) 7.576 -> 7.570 ( -0.08%) [ +0.00% +0.01% +0.03% / -0.08% +0.26% +0.37%] index_select spread : Elapsed 0.076 ms (7.576 ms / 100) 7.573 -> 7.577 ( +0.05%) [ +0.09% +0.00% +0.09% / +0.05% +0.21% +0.24%] index_select strided 3 : Elapsed 0.076 ms (7.580 ms / 100) 7.572 -> 7.581 ( +0.12%) [ +0.00% +0.12% +0.17% / +0.12% +0.40% +0.25%] index_select random : Elapsed 0.076 ms (7.572 ms / 100) 7.574 -> 7.579 ( +0.07%) [ +0.22% +0.37% +0.00% / +0.09% +0.18% +0.07%] index_select random_sorted : Elapsed 0.076 ms (7.591 ms / 100) B = [20, 4, 16, 40] (stride (1, 20, 80, 1280)) A = [20, 4, 16, 5] (stride (64, 16, 1, 1280)) dim = 3 1.317 -> 1.318 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.68% +0.68%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.63% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.278 ms / 100) 1.317 -> 1.317 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.76% +0.68%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.71% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.68% +0.61%] index_add_ spread : Elapsed 0.013 ms (1.318 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.79% +0.63%] index_copy_ spread : Elapsed 0.013 ms (1.272 ms / 100) 1.317 -> 1.319 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.61% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.317 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.63% +0.63%] index_copy_ strided 3 : Elapsed 0.013 ms (1.273 ms / 100) 1.316 -> 1.318 ( +0.15%) [ +0.15% +0.08% +0.00% / +0.15% +0.76% +0.76%] index_add_ strided 7 : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.63% +0.63%] index_copy_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.316 -> 1.317 ( +0.08%) [ +0.23% +0.15% +0.00% / +0.08% +0.76% +0.84%] index_add_ perm : Elapsed 0.013 ms (1.319 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.79% +0.94%] index_copy_ perm : Elapsed 0.013 ms (1.272 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.68% +0.68%] index_add_ perm_sorted : Elapsed 0.013 ms (1.318 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.86% +0.79%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.272 ms / 100) 7.847 -> 7.848 ( +0.01%) [ +0.00% +0.18% +0.13% / +0.01% +0.22% +0.41%] index_select const : Elapsed 0.078 ms (7.847 ms / 100) 7.879 -> 7.882 ( +0.04%) [ +0.00% +0.04% +0.39% / +0.13% +0.43% +0.04%] index_select wrap : Elapsed 0.079 ms (7.879 ms / 100) 7.869 -> 7.872 ( +0.04%) [ +0.22% +0.00% +0.03% / +0.04% +0.25% +0.25%] index_select linear : Elapsed 0.079 ms (7.886 ms / 100) 7.879 -> 7.879 ( +0.00%) [ +0.01% +0.00% +0.04% / +0.00% +0.25% +0.04%] index_select reverse : Elapsed 0.079 ms (7.880 ms / 100) 7.848 -> 7.848 ( +0.00%) [ +0.00% +0.00% +0.25% / +0.00% +0.51% +0.32%] index_select skip64 : Elapsed 0.078 ms (7.848 ms / 100) 7.859 -> 7.854 ( -0.06%) [ +0.00% +0.00% +0.15% / -0.06% +0.23% +0.25%] index_select skip256 : Elapsed 0.079 ms (7.859 ms / 100) 7.868 -> 7.869 ( +0.01%) [ +0.00% +0.13% +0.01% / +0.01% +0.22% +0.31%] index_select spread : Elapsed 0.079 ms (7.868 ms / 100) 7.871 -> 7.878 ( +0.09%) [ +0.11% +0.00% +0.23% / +0.09% +0.37% +0.20%] index_select strided 3 : Elapsed 0.079 ms (7.880 ms / 100) 7.884 -> 7.872 ( -0.15%) [ +0.15% +0.06% +0.00% / -0.15% +0.27% +0.39%] index_select random : Elapsed 0.079 ms (7.896 ms / 100) 7.863 -> 7.863 ( +0.00%) [ +0.04% +0.03% +0.00% / +0.00% +0.48% +0.33%] index_select random_sorted : Elapsed 0.079 ms (7.866 ms / 100) out_shape = [40, 5, 4, 16] in_shape = [20, 5, 4, 16] idx_dim = 0 B = [40, 5, 4, 16] (stride (320, 16, 80, 1)) A = [20, 5, 4, 16] (stride (64, 1280, 1, 4)) dim = 0 2.437 -> 2.449 ( +0.49%) [ +0.08% +0.16% +0.00% / +0.49% +0.78% +0.82%] index_add_ linear : Elapsed 0.024 ms (2.439 ms / 100) 2.435 -> 2.448 ( +0.53%) [ +0.33% +0.00% +0.16% / +0.53% +0.62% +0.86%] index_copy_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.442 -> 2.452 ( +0.41%) [ +0.00% +0.20% +0.00% / +0.41% +0.61% +0.53%] index_add_ reverse : Elapsed 0.024 ms (2.442 ms / 100) 2.438 -> 2.450 ( +0.49%) [ +0.04% +0.00% +0.12% / +0.57% +0.49% +0.49%] index_copy_ reverse : Elapsed 0.024 ms (2.439 ms / 100) 2.447 -> 2.450 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.49% +0.41% +0.12%] index_add_ spread : Elapsed 0.024 ms (2.448 ms / 100) 2.439 -> 2.455 ( +0.66%) [ +0.08% +0.00% +0.21% / +0.66% +0.66% +0.66%] index_copy_ spread : Elapsed 0.024 ms (2.441 ms / 100) 2.445 -> 2.456 ( +0.45%) [ +0.12% +0.12% +0.00% / +0.61% +0.45% +0.53%] index_add_ strided 3 : Elapsed 0.024 ms (2.448 ms / 100) 2.438 -> 2.449 ( +0.45%) [ +0.00% +0.37% +0.04% / +0.45% +0.62% +0.74%] index_copy_ strided 3 : Elapsed 0.024 ms (2.438 ms / 100) 2.446 -> 2.453 ( +0.29%) [ +0.12% +0.04% +0.00% / +0.57% +0.41% +0.29%] index_add_ strided 7 : Elapsed 0.024 ms (2.449 ms / 100) 2.441 -> 2.451 ( +0.41%) [ +0.16% +0.00% +0.12% / +0.45% +0.41% +0.57%] index_copy_ strided 7 : Elapsed 0.024 ms (2.445 ms / 100) 2.446 -> 2.454 ( +0.33%) [ +0.00% +0.04% +0.00% / +0.33% +0.45% +0.45%] index_add_ perm : Elapsed 0.024 ms (2.446 ms / 100) 2.438 -> 2.452 ( +0.57%) [ +0.04% +0.00% +0.04% / +0.57% +0.86% +0.70%] index_copy_ perm : Elapsed 0.024 ms (2.439 ms / 100) 2.441 -> 2.449 ( +0.33%) [ +0.00% +0.00% +0.00% / +0.33% +0.53% +0.70%] index_add_ perm_sorted : Elapsed 0.024 ms (2.441 ms / 100) 2.439 -> 2.452 ( +0.53%) [ +0.04% +0.00% +0.00% / +0.53% +0.78% +0.66%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.440 ms / 100) 4.495 -> 4.487 ( -0.18%) [ +0.00% +0.02% +0.07% / -0.18% +0.22% +0.07%] index_select const : Elapsed 0.045 ms (4.495 ms / 100) 4.496 -> 4.490 ( -0.13%) [ +0.24% +0.00% +0.09% / -0.13% +0.13% +0.02%] index_select wrap : Elapsed 0.045 ms (4.507 ms / 100) 4.495 -> 4.494 ( -0.02%) [ +0.07% +0.00% +0.00% / -0.02% +0.24% +0.27%] index_select linear : Elapsed 0.045 ms (4.498 ms / 100) 4.499 -> 4.500 ( +0.02%) [ +0.00% +0.11% +0.11% / +0.02% +0.02% +0.09%] index_select reverse : Elapsed 0.045 ms (4.499 ms / 100) 4.493 -> 4.496 ( +0.07%) [ +0.02% +0.00% +0.02% / +0.16% +0.09% +0.07%] index_select skip64 : Elapsed 0.045 ms (4.494 ms / 100) 4.489 -> 4.495 ( +0.13%) [ +0.25% +0.20% +0.00% / +0.13% +0.18% +0.31%] index_select skip256 : Elapsed 0.045 ms (4.500 ms / 100) 4.494 -> 4.500 ( +0.13%) [ +0.22% +0.00% +0.02% / +0.13% +0.27% +0.16%] index_select spread : Elapsed 0.045 ms (4.504 ms / 100) 4.496 -> 4.494 ( -0.04%) [ +0.00% +0.02% +0.11% / -0.04% +0.09% +0.29%] index_select strided 3 : Elapsed 0.045 ms (4.496 ms / 100) 4.489 -> 4.496 ( +0.16%) [ +0.00% +0.27% +0.09% / +0.25% +0.38% +0.16%] index_select strided 5 : Elapsed 0.045 ms (4.489 ms / 100) 4.496 -> 4.497 ( +0.02%) [ +0.07% +0.18% +0.00% / +0.02% +0.24% +0.09%] index_select strided 7 : Elapsed 0.045 ms (4.499 ms / 100) 4.489 -> 4.494 ( +0.11%) [ +0.02% +0.00% +0.07% / +0.11% +0.31% +0.22%] index_select strided 8 : Elapsed 0.045 ms (4.490 ms / 100) 4.494 -> 4.494 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.00% +0.13% +0.07%] index_select strided 16 : Elapsed 0.045 ms (4.499 ms / 100) 4.492 -> 4.498 ( +0.13%) [ +0.22% +0.13% +0.00% / +0.16% +0.24% +0.13%] index_select random : Elapsed 0.045 ms (4.502 ms / 100) 4.494 -> 4.501 ( +0.16%) [ +0.04% +0.16% +0.00% / +0.16% +0.18% +0.24%] index_select random_sorted : Elapsed 0.045 ms (4.496 ms / 100) B = [40, 5, 4, 16] (stride (320, 1, 5, 20)) A = [20, 5, 4, 16] (stride (1, 20, 100, 400)) dim = 0 2.444 -> 2.454 ( +0.41%) [ +0.16% +0.04% +0.00% / +0.41% +0.94% +0.90%] index_add_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.447 -> 2.460 ( +0.53%) [ +0.08% +0.04% +0.00% / +0.53% +0.69% +0.82%] index_copy_ linear : Elapsed 0.024 ms (2.449 ms / 100) 2.440 -> 2.451 ( +0.45%) [ +0.00% +0.16% +0.04% / +0.45% +1.19% +1.23%] index_add_ reverse : Elapsed 0.024 ms (2.440 ms / 100) 2.437 -> 2.449 ( +0.49%) [ +0.21% +0.00% +0.29% / +0.49% +1.11% +1.27%] index_copy_ reverse : Elapsed 0.024 ms (2.442 ms / 100) 2.443 -> 2.454 ( +0.45%) [ +0.08% +0.00% +0.04% / +0.45% +1.02% +0.94%] index_add_ spread : Elapsed 0.024 ms (2.445 ms / 100) 2.438 -> 2.454 ( +0.66%) [ +0.00% +0.16% +0.16% / +0.66% +1.23% +1.35%] index_copy_ spread : Elapsed 0.024 ms (2.438 ms / 100) 2.449 -> 2.456 ( +0.29%) [ +0.08% +0.00% +0.04% / +0.29% +0.53% +0.49%] index_add_ strided 3 : Elapsed 0.025 ms (2.451 ms / 100) 2.447 -> 2.461 ( +0.57%) [ +0.20% +0.08% +0.00% / +0.57% +0.69% +0.57%] index_copy_ strided 3 : Elapsed 0.025 ms (2.452 ms / 100) 2.448 -> 2.463 ( +0.61%) [ +0.00% +0.08% +0.08% / +0.61% +0.78% +0.78%] index_add_ strided 7 : Elapsed 0.024 ms (2.448 ms / 100) 2.446 -> 2.458 ( +0.49%) [ +0.20% +0.00% +0.16% / +0.49% +0.65% +0.78%] index_copy_ strided 7 : Elapsed 0.025 ms (2.451 ms / 100) 2.455 -> 2.462 ( +0.29%) [ +0.16% +0.04% +0.00% / +0.57% +0.29% +0.37%] index_add_ perm : Elapsed 0.025 ms (2.459 ms / 100) 2.447 -> 2.461 ( +0.57%) [ +0.20% +0.00% +0.25% / +0.74% +0.57% +0.57%] index_copy_ perm : Elapsed 0.025 ms (2.452 ms / 100) 2.453 -> 2.465 ( +0.49%) [ +0.20% +0.16% +0.00% / +0.49% +0.49% +0.53%] index_add_ perm_sorted : Elapsed 0.025 ms (2.458 ms / 100) 2.450 -> 2.461 ( +0.45%) [ +0.00% +0.04% +0.00% / +0.61% +0.45% +0.45%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.450 ms / 100) 4.507 -> 4.507 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.02% +0.00% +0.09%] index_select const : Elapsed 0.045 ms (4.514 ms / 100) 4.511 -> 4.505 ( -0.13%) [ +0.09% +0.18% +0.00% / +0.13% -0.11% -0.13%] index_select wrap : Elapsed 0.045 ms (4.515 ms / 100) 4.510 -> 4.513 ( +0.07%) [ +0.22% +0.02% +0.00% / +0.11% +0.07% +0.07%] index_select linear : Elapsed 0.045 ms (4.520 ms / 100) 4.518 -> 4.515 ( -0.07%) [ +0.18% +0.00% +0.02% / -0.07% -0.02% -0.04%] index_select reverse : Elapsed 0.045 ms (4.526 ms / 100) 4.508 -> 4.503 ( -0.11%) [ +0.00% +0.09% +0.00% / +0.00% +0.07% -0.11%] index_select skip64 : Elapsed 0.045 ms (4.508 ms / 100) 4.503 -> 4.504 ( +0.02%) [ +0.13% +0.09% +0.00% / +0.16% +0.02% +0.27%] index_select skip256 : Elapsed 0.045 ms (4.509 ms / 100) 4.515 -> 4.512 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.04% +0.18%] index_select spread : Elapsed 0.045 ms (4.516 ms / 100) 4.510 -> 4.504 ( -0.13%) [ +0.18% +0.11% +0.00% / -0.13% +0.24% +0.20%] index_select strided 3 : Elapsed 0.045 ms (4.518 ms / 100) 4.510 -> 4.509 ( -0.02%) [ +0.22% +0.00% +0.04% / -0.02% +0.33% +0.31%] index_select strided 5 : Elapsed 0.045 ms (4.520 ms / 100) 4.514 -> 4.510 ( -0.09%) [ +0.09% +0.00% +0.00% / -0.09% +0.02% +0.02%] index_select strided 7 : Elapsed 0.045 ms (4.518 ms / 100) 4.510 -> 4.512 ( +0.04%) [ +0.11% +0.00% +0.07% / +0.04% +0.04% +0.07%] index_select strided 8 : Elapsed 0.045 ms (4.515 ms / 100) 4.513 -> 4.511 ( -0.04%) [ +0.02% +0.13% +0.00% / -0.04% +0.00% +0.04%] index_select strided 16 : Elapsed 0.045 ms (4.514 ms / 100) 4.516 -> 4.508 ( -0.18%) [ +0.02% +0.00% +0.00% / +0.11% +0.00% -0.18%] index_select random : Elapsed 0.045 ms (4.517 ms / 100) 4.518 -> 4.512 ( -0.13%) [ +0.00% +0.04% +0.11% / +0.02% -0.04% -0.13%] index_select random_sorted : Elapsed 0.045 ms (4.518 ms / 100) B = [40, 5, 4, 16] (stride (80, 16, 3200, 1)) A = [20, 5, 4, 16] (stride (320, 16, 80, 1)) dim = 0 2.460 -> 2.468 ( +0.33%) [ +0.00% +0.00% +0.08% / +0.49% +0.45% +0.33%] index_add_ linear : Elapsed 0.025 ms (2.460 ms / 100) 2.444 -> 2.453 ( +0.37%) [ +0.08% +0.25% +0.00% / +0.37% +0.78% +0.70%] index_copy_ linear : Elapsed 0.024 ms (2.446 ms / 100) 2.459 -> 2.469 ( +0.41%) [ +0.20% +0.12% +0.00% / +0.53% +0.49% +0.41%] index_add_ reverse : Elapsed 0.025 ms (2.464 ms / 100) 2.442 -> 2.456 ( +0.57%) [ +0.41% +0.00% +0.25% / +0.78% +0.57% +0.70%] index_copy_ reverse : Elapsed 0.025 ms (2.452 ms / 100) 2.462 -> 2.470 ( +0.32%) [ +0.12% +0.12% +0.00% / +0.41% +0.32% +0.37%] index_add_ spread : Elapsed 0.025 ms (2.465 ms / 100) 2.444 -> 2.457 ( +0.53%) [ +0.00% +0.16% +0.12% / +0.74% +0.53% +0.53%] index_copy_ spread : Elapsed 0.024 ms (2.444 ms / 100) 2.461 -> 2.469 ( +0.33%) [ +0.00% +0.00% +0.00% / +0.49% +0.53% +0.33%] index_add_ strided 3 : Elapsed 0.025 ms (2.461 ms / 100) 2.444 -> 2.457 ( +0.53%) [ +0.04% +0.00% +0.00% / +0.57% +0.65% +0.53%] index_copy_ strided 3 : Elapsed 0.024 ms (2.445 ms / 100) 2.457 -> 2.467 ( +0.41%) [ +0.12% +0.00% +0.08% / +0.41% +0.61% +0.49%] index_add_ strided 7 : Elapsed 0.025 ms (2.460 ms / 100) 2.444 -> 2.455 ( +0.45%) [ +0.08% +0.00% +0.00% / +0.53% +0.74% +0.45%] index_copy_ strided 7 : Elapsed 0.024 ms (2.446 ms / 100) 2.457 -> 2.470 ( +0.53%) [ +0.00% +0.08% +0.08% / +0.53% +0.53% +0.73%] index_add_ perm : Elapsed 0.025 ms (2.457 ms / 100) 2.442 -> 2.454 ( +0.49%) [ +0.00% +0.08% +0.12% / +0.49% +0.66% +0.78%] index_copy_ perm : Elapsed 0.024 ms (2.442 ms / 100) 2.457 -> 2.468 ( +0.45%) [ +0.08% +0.00% +0.04% / +0.45% +0.65% +0.57%] index_add_ perm_sorted : Elapsed 0.025 ms (2.459 ms / 100) 2.445 -> 2.454 ( +0.37%) [ +0.08% +0.00% +0.04% / +0.37% +0.78% +0.61%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.447 ms / 100) 4.493 -> 4.492 ( -0.02%) [ +0.00% +0.00% +0.09% / +0.04% +0.00% -0.02%] index_select const : Elapsed 0.045 ms (4.493 ms / 100) 4.493 -> 4.499 ( +0.13%) [ +0.00% +0.24% +0.13% / +0.13% +0.20% +0.22%] index_select wrap : Elapsed 0.045 ms (4.493 ms / 100) 4.497 -> 4.501 ( +0.09%) [ +0.00% +0.11% +0.20% / +0.09% +0.18% +0.27%] index_select linear : Elapsed 0.045 ms (4.497 ms / 100) 4.496 -> 4.501 ( +0.11%) [ +0.16% +0.16% +0.00% / +0.20% +0.11% +0.16%] index_select reverse : Elapsed 0.045 ms (4.503 ms / 100) 4.489 -> 4.490 ( +0.02%) [ +0.13% +0.18% +0.00% / +0.02% +0.18% +0.11%] index_select skip64 : Elapsed 0.045 ms (4.495 ms / 100) 4.489 -> 4.490 ( +0.02%) [ +0.11% +0.00% +0.22% / +0.02% +0.13% +0.07%] index_select skip256 : Elapsed 0.045 ms (4.494 ms / 100) 4.497 -> 4.501 ( +0.09%) [ +0.13% +0.00% +0.09% / +0.11% +0.09% +0.16%] index_select spread : Elapsed 0.045 ms (4.503 ms / 100) 4.498 -> 4.494 ( -0.09%) [ +0.02% +0.07% +0.00% / -0.09% -0.02% +0.18%] index_select strided 3 : Elapsed 0.045 ms (4.499 ms / 100) 4.498 -> 4.494 ( -0.09%) [ +0.02% +0.00% +0.00% / +0.13% +0.13% -0.09%] index_select strided 5 : Elapsed 0.045 ms (4.499 ms / 100) 4.491 -> 4.496 ( +0.11%) [ +0.27% +0.22% +0.00% / +0.11% +0.38% +0.51%] index_select strided 7 : Elapsed 0.045 ms (4.503 ms / 100) 4.493 -> 4.496 ( +0.07%) [ +0.16% +0.07% +0.00% / +0.07% +0.18% +0.22%] index_select strided 8 : Elapsed 0.045 ms (4.500 ms / 100) 4.490 -> 4.495 ( +0.11%) [ +0.00% +0.09% +0.04% / +0.18% +0.22% +0.11%] index_select strided 16 : Elapsed 0.045 ms (4.490 ms / 100) 4.496 -> 4.492 ( -0.09%) [ +0.11% +0.00% +0.11% / -0.09% +0.27% +0.18%] index_select random : Elapsed 0.045 ms (4.501 ms / 100) 4.500 -> 4.499 ( -0.02%) [ +0.07% +0.00% +0.04% / -0.02% +0.20% +0.13%] index_select random_sorted : Elapsed 0.045 ms (4.503 ms / 100) B = [40, 5, 4, 16] (stride (5, 1, 3200, 200)) A = [20, 5, 4, 16] (stride (4, 1280, 1, 80)) dim = 0 2.395 -> 2.404 ( +0.38%) [ +0.04% +0.00% +0.04% / +0.38% +0.84% +0.88%] index_add_ linear : Elapsed 0.024 ms (2.396 ms / 100) 2.398 -> 2.411 ( +0.54%) [ +0.00% +0.00% +0.17% / +0.54% +0.83% +0.88%] index_copy_ linear : Elapsed 0.024 ms (2.398 ms / 100) 2.391 -> 2.404 ( +0.54%) [ +0.00% +0.13% +0.04% / +0.54% +1.13% +1.05%] index_add_ reverse : Elapsed 0.024 ms (2.391 ms / 100) 2.392 -> 2.406 ( +0.59%) [ +0.13% +0.13% +0.00% / +0.59% +1.30% +1.21%] index_copy_ reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.404 -> 2.420 ( +0.67%) [ +0.42% +0.33% +0.00% / +0.67% +1.04% +1.12%] index_add_ spread : Elapsed 0.024 ms (2.414 ms / 100) 2.421 -> 2.434 ( +0.54%) [ +0.21% +0.12% +0.00% / +0.54% +1.16% +1.20%] index_copy_ spread : Elapsed 0.024 ms (2.426 ms / 100) 2.410 -> 2.425 ( +0.62%) [ +0.17% +0.00% +0.12% / +0.62% +0.79% +0.75%] index_add_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.424 -> 2.431 ( +0.29%) [ +0.08% +0.17% +0.00% / +0.29% +0.66% +0.54%] index_copy_ strided 3 : Elapsed 0.024 ms (2.426 ms / 100) 2.414 -> 2.419 ( +0.21%) [ +0.12% +0.00% +0.00% / +0.21% +0.50% +0.25%] index_add_ strided 7 : Elapsed 0.024 ms (2.417 ms / 100) 2.420 -> 2.433 ( +0.54%) [ +0.00% +0.17% +0.12% / +0.54% +0.58% +0.74%] index_copy_ strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.406 -> 2.414 ( +0.33%) [ +0.21% +0.00% +0.08% / +0.67% +0.37% +0.33%] index_add_ perm : Elapsed 0.024 ms (2.411 ms / 100) 2.420 -> 2.426 ( +0.25%) [ +0.08% +0.00% +0.12% / +0.62% +0.25% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.422 ms / 100) 2.408 -> 2.417 ( +0.37%) [ +0.42% +0.00% +0.37% / +0.62% +0.37% +0.46%] index_add_ perm_sorted : Elapsed 0.024 ms (2.418 ms / 100) 2.420 -> 2.431 ( +0.45%) [ +0.25% +0.00% +0.17% / +0.58% +0.45% +0.45%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.426 ms / 100) 4.421 -> 4.420 ( -0.02%) [ +0.43% +0.00% +0.18% / +0.14% -0.02% +0.14%] index_select const : Elapsed 0.044 ms (4.440 ms / 100) 4.435 -> 4.431 ( -0.09%) [ +0.00% +0.02% +0.00% / -0.09% -0.07% +0.09%] index_select wrap : Elapsed 0.044 ms (4.435 ms / 100) 4.431 -> 4.434 ( +0.07%) [ +0.00% +0.11% +0.05% / +0.18% +0.07% +0.16%] index_select linear : Elapsed 0.044 ms (4.431 ms / 100) 4.437 -> 4.432 ( -0.11%) [ +0.16% +0.16% +0.00% / +0.00% -0.02% -0.11%] index_select reverse : Elapsed 0.044 ms (4.444 ms / 100) 4.426 -> 4.422 ( -0.09%) [ +0.00% +0.11% +0.05% / +0.00% -0.09% +0.09%] index_select skip64 : Elapsed 0.044 ms (4.426 ms / 100) 4.424 -> 4.417 ( -0.16%) [ +0.00% +0.07% +0.09% / -0.16% +0.11% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.424 ms / 100) 4.430 -> 4.435 ( +0.11%) [ +0.27% +0.14% +0.00% / +0.11% +0.16% +0.11%] index_select spread : Elapsed 0.044 ms (4.442 ms / 100) 4.433 -> 4.434 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.27% +0.02% +0.18%] index_select strided 3 : Elapsed 0.044 ms (4.433 ms / 100) 4.425 -> 4.424 ( -0.02%) [ +0.18% +0.00% +0.02% / -0.02% +0.14% +0.14%] index_select strided 5 : Elapsed 0.044 ms (4.433 ms / 100) 4.430 -> 4.433 ( +0.07%) [ +0.20% +0.00% +0.07% / +0.09% +0.07% +0.16%] index_select strided 7 : Elapsed 0.044 ms (4.439 ms / 100) 4.422 -> 4.430 ( +0.18%) [ +0.20% +0.00% +0.11% / +0.27% +0.23% +0.18%] index_select strided 8 : Elapsed 0.044 ms (4.431 ms / 100) 4.425 -> 4.424 ( -0.02%) [ +0.11% +0.05% +0.00% / -0.02% +0.05% +0.05%] index_select strided 16 : Elapsed 0.044 ms (4.430 ms / 100) 4.431 -> 4.434 ( +0.07%) [ +0.09% +0.20% +0.00% / +0.07% +0.07% +0.09%] index_select random : Elapsed 0.044 ms (4.435 ms / 100) 4.434 -> 4.433 ( -0.02%) [ +0.09% +0.00% +0.14% / -0.02% +0.07% -0.02%] index_select random_sorted : Elapsed 0.044 ms (4.438 ms / 100) B = [40, 5, 4, 16] (stride (1, 40, 200, 800)) A = [20, 5, 4, 16] (stride (80, 1, 1600, 5)) dim = 0 2.448 -> 2.457 ( +0.37%) [ +0.04% +0.00% +0.00% / +0.49% +0.37% +0.41%] index_add_ linear : Elapsed 0.024 ms (2.449 ms / 100) 2.448 -> 2.461 ( +0.53%) [ +0.00% +0.08% +0.12% / +0.53% +0.74% +0.86%] index_copy_ linear : Elapsed 0.024 ms (2.448 ms / 100) 2.447 -> 2.459 ( +0.49%) [ +0.00% +0.33% +0.33% / +0.53% +0.53% +0.49%] index_add_ reverse : Elapsed 0.024 ms (2.447 ms / 100) 2.453 -> 2.460 ( +0.29%) [ +0.20% +0.00% +0.12% / +0.45% +0.29% +0.57%] index_copy_ reverse : Elapsed 0.025 ms (2.458 ms / 100) 2.465 -> 2.470 ( +0.20%) [ +0.00% +0.04% +0.00% / +0.24% +0.20% +0.24%] index_add_ spread : Elapsed 0.025 ms (2.465 ms / 100) 2.469 -> 2.481 ( +0.49%) [ +0.12% +0.20% +0.00% / +0.49% +0.57% +0.61%] index_copy_ spread : Elapsed 0.025 ms (2.472 ms / 100) 2.464 -> 2.470 ( +0.24%) [ +0.04% +0.28% +0.00% / +0.24% +0.32% +0.37%] index_add_ strided 3 : Elapsed 0.025 ms (2.465 ms / 100) 2.469 -> 2.481 ( +0.49%) [ +0.04% +0.08% +0.00% / +0.49% +0.57% +0.61%] index_copy_ strided 3 : Elapsed 0.025 ms (2.470 ms / 100) 2.462 -> 2.470 ( +0.32%) [ +0.12% +0.16% +0.00% / +0.41% +0.41% +0.32%] index_add_ strided 7 : Elapsed 0.025 ms (2.465 ms / 100) 2.469 -> 2.479 ( +0.41%) [ +0.08% +0.16% +0.00% / +0.49% +0.41% +0.61%] index_copy_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.465 -> 2.471 ( +0.24%) [ +0.00% +0.00% +0.00% / +0.24% +0.37% +0.32%] index_add_ perm : Elapsed 0.025 ms (2.465 ms / 100) 2.469 -> 2.477 ( +0.32%) [ +0.00% +0.04% +0.04% / +0.32% +0.49% +0.53%] index_copy_ perm : Elapsed 0.025 ms (2.469 ms / 100) 2.461 -> 2.472 ( +0.45%) [ +0.00% +0.24% +0.04% / +0.57% +0.49% +0.45%] index_add_ perm_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.467 -> 2.477 ( +0.41%) [ +0.00% +0.28% +0.12% / +0.41% +0.65% +0.73%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) 4.494 -> 4.492 ( -0.04%) [ +0.04% +0.00% +0.09% / -0.04% +0.00% +0.04%] index_select const : Elapsed 0.045 ms (4.496 ms / 100) 4.500 -> 4.497 ( -0.07%) [ +0.16% +0.02% +0.00% / -0.07% +0.13% -0.07%] index_select wrap : Elapsed 0.045 ms (4.507 ms / 100) 4.498 -> 4.494 ( -0.09%) [ +0.13% +0.00% +0.00% / -0.09% +0.58% +0.20%] index_select linear : Elapsed 0.045 ms (4.504 ms / 100) 4.496 -> 4.497 ( +0.02%) [ +0.13% +0.00% +0.04% / +0.02% +0.27% +0.29%] index_select reverse : Elapsed 0.045 ms (4.502 ms / 100) 4.498 -> 4.494 ( -0.09%) [ +0.16% +0.09% +0.00% / -0.09% +0.11% +0.00%] index_select skip64 : Elapsed 0.045 ms (4.505 ms / 100) 4.493 -> 4.497 ( +0.09%) [ +0.09% +0.02% +0.00% / +0.09% +0.20% +0.09%] index_select skip256 : Elapsed 0.045 ms (4.497 ms / 100) 4.500 -> 4.494 ( -0.13%) [ +0.07% +0.00% +0.02% / -0.13% +0.13% +0.13%] index_select spread : Elapsed 0.045 ms (4.503 ms / 100) 4.498 -> 4.501 ( +0.07%) [ +0.00% +0.04% +0.11% / +0.18% +0.16% +0.07%] index_select strided 3 : Elapsed 0.045 ms (4.498 ms / 100) 4.493 -> 4.495 ( +0.04%) [ +0.07% +0.00% +0.13% / +0.09% +0.16% +0.04%] index_select strided 5 : Elapsed 0.045 ms (4.496 ms / 100) 4.496 -> 4.503 ( +0.16%) [ +0.22% +0.00% +0.09% / +0.16% +0.22% +0.18%] index_select strided 7 : Elapsed 0.045 ms (4.506 ms / 100) 4.496 -> 4.501 ( +0.11%) [ +0.07% +0.00% +0.18% / +0.11% +0.13% +0.16%] index_select strided 8 : Elapsed 0.045 ms (4.499 ms / 100) 4.490 -> 4.498 ( +0.18%) [ +0.09% +0.18% +0.00% / +0.18% +0.42% +0.40%] index_select strided 16 : Elapsed 0.045 ms (4.494 ms / 100) 4.495 -> 4.499 ( +0.09%) [ +0.22% +0.00% +0.11% / +0.09% +0.20% +0.29%] index_select random : Elapsed 0.045 ms (4.505 ms / 100) 4.497 -> 4.501 ( +0.09%) [ +0.11% +0.00% +0.13% / +0.09% +0.24% +0.09%] index_select random_sorted : Elapsed 0.045 ms (4.502 ms / 100) out_shape = [20, 40, 4, 16] in_shape = [20, 5, 4, 16] idx_dim = 1 B = [20, 40, 4, 16] (stride (2560, 16, 640, 1)) A = [20, 5, 4, 16] (stride (64, 1280, 16, 1)) dim = 1 1.422 -> 1.421 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.21% +0.28%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.44% +0.66%] index_copy_ linear : Elapsed 0.014 ms (1.376 ms / 100) 1.419 -> 1.421 ( +0.14%) [ +0.28% +0.21% +0.00% / +0.14% +0.35% +0.35%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.374 -> 1.376 ( +0.15%) [ +0.00% +0.07% +0.00% / +0.15% +0.51% +0.36%] index_copy_ reverse : Elapsed 0.014 ms (1.374 ms / 100) 1.420 -> 1.420 ( +0.00%) [ +0.14% +0.00% +0.14% / +0.00% +0.28% +0.28%] index_add_ spread : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.66% +0.51%] index_copy_ spread : Elapsed 0.014 ms (1.374 ms / 100) 1.421 -> 1.420 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.35% +0.21%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.07% +0.07% +0.00% / +0.15% +0.87% +0.66%] index_copy_ strided 3 : Elapsed 0.014 ms (1.374 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.28% +0.42%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.58% +0.58%] index_copy_ strided 7 : Elapsed 0.014 ms (1.374 ms / 100) 1.419 -> 1.419 ( +0.00%) [ +0.07% +0.21% +0.00% / +0.00% +0.42% +0.49%] index_add_ perm : Elapsed 0.014 ms (1.420 ms / 100) 1.374 -> 1.373 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.66% +0.51%] index_copy_ perm : Elapsed 0.014 ms (1.374 ms / 100) 1.418 -> 1.420 ( +0.14%) [ +0.21% +0.35% +0.00% / +0.14% +0.49% +0.49%] index_add_ perm_sorted : Elapsed 0.014 ms (1.421 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.51% +0.51%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.373 ms / 100) 8.193 -> 8.185 ( -0.10%) [ +0.00% +0.35% +0.17% / -0.10% +0.15% +0.00%] index_select const : Elapsed 0.082 ms (8.193 ms / 100) 8.235 -> 8.231 ( -0.05%) [ +0.00% +0.13% +0.01% / +0.17% +0.15% -0.05%] index_select wrap : Elapsed 0.082 ms (8.235 ms / 100) 8.235 -> 8.215 ( -0.24%) [ +0.00% +0.19% +0.10% / +0.11% -0.01% -0.24%] index_select linear : Elapsed 0.082 ms (8.235 ms / 100) 8.224 -> 8.224 ( +0.00%) [ +0.06% +0.00% +0.07% / +0.00% +0.13% +0.17%] index_select reverse : Elapsed 0.082 ms (8.229 ms / 100) 8.209 -> 8.205 ( -0.05%) [ +0.02% +0.00% +0.07% / +0.18% -0.05% +0.16%] index_select skip64 : Elapsed 0.082 ms (8.211 ms / 100) 8.205 -> 8.204 ( -0.01%) [ +0.04% +0.00% +0.09% / -0.01% +0.07% +0.28%] index_select skip256 : Elapsed 0.082 ms (8.208 ms / 100) 8.211 -> 8.217 ( +0.07%) [ +0.00% +0.28% +0.09% / +0.07% +0.30% +0.26%] index_select spread : Elapsed 0.082 ms (8.211 ms / 100) 8.234 -> 8.221 ( -0.16%) [ +0.00% +0.06% +0.27% / -0.16% -0.06% +0.00%] index_select strided 3 : Elapsed 0.082 ms (8.234 ms / 100) 8.238 -> 8.219 ( -0.23%) [ +0.00% +0.02% +0.00% / -0.11% -0.22% -0.23%] index_select random : Elapsed 0.082 ms (8.238 ms / 100) 8.205 -> 8.219 ( +0.17%) [ +0.00% +0.32% +0.39% / +0.26% +0.45% +0.17%] index_select random_sorted : Elapsed 0.082 ms (8.205 ms / 100) B = [20, 40, 4, 16] (stride (2560, 16, 640, 1)) A = [20, 5, 4, 16] (stride (4, 1280, 1, 80)) dim = 1 1.575 -> 1.577 ( +0.13%) [ +0.32% +0.19% +0.00% / +0.13% +0.51% +0.57%] index_add_ linear : Elapsed 0.016 ms (1.580 ms / 100) 1.522 -> 1.523 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.07% +0.66% +0.72%] index_copy_ linear : Elapsed 0.015 ms (1.523 ms / 100) 1.585 -> 1.586 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.63% +0.63%] index_add_ reverse : Elapsed 0.016 ms (1.586 ms / 100) 1.527 -> 1.528 ( +0.07%) [ +0.00% +0.07% +0.13% / +0.07% +0.85% +0.65%] index_copy_ reverse : Elapsed 0.015 ms (1.527 ms / 100) 1.588 -> 1.589 ( +0.06%) [ +0.13% +0.13% +0.00% / +0.06% +0.63% +0.63%] index_add_ spread : Elapsed 0.016 ms (1.590 ms / 100) 1.536 -> 1.537 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.78% +0.72%] index_copy_ spread : Elapsed 0.015 ms (1.536 ms / 100) 1.574 -> 1.577 ( +0.19%) [ +0.06% +0.32% +0.00% / +0.19% +0.64% +0.64%] index_add_ strided 3 : Elapsed 0.016 ms (1.575 ms / 100) 1.523 -> 1.523 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.66% +0.72%] index_copy_ strided 3 : Elapsed 0.015 ms (1.523 ms / 100) 1.581 -> 1.581 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.63% +0.63%] index_add_ strided 7 : Elapsed 0.016 ms (1.582 ms / 100) 1.530 -> 1.530 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.59% +0.85%] index_copy_ strided 7 : Elapsed 0.015 ms (1.530 ms / 100) 1.582 -> 1.581 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.57% +0.70%] index_add_ perm : Elapsed 0.016 ms (1.582 ms / 100) 1.529 -> 1.529 ( +0.00%) [ +0.00% +0.13% +0.07% / +0.00% +0.85% +0.78%] index_copy_ perm : Elapsed 0.015 ms (1.529 ms / 100) 1.575 -> 1.575 ( +0.00%) [ +0.00% +0.25% +0.06% / +0.00% +0.63% +0.63%] index_add_ perm_sorted : Elapsed 0.016 ms (1.575 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +1.05% +0.79%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.522 ms / 100) 8.534 -> 8.530 ( -0.05%) [ +0.00% +0.05% +0.05% / -0.05% +0.33% +0.16%] index_select const : Elapsed 0.085 ms (8.534 ms / 100) 8.549 -> 8.543 ( -0.07%) [ +0.20% +0.00% +0.09% / -0.07% -0.04% +0.18%] index_select wrap : Elapsed 0.086 ms (8.566 ms / 100) 8.556 -> 8.561 ( +0.06%) [ +0.26% +0.00% +0.02% / +0.36% +0.11% +0.06%] index_select linear : Elapsed 0.086 ms (8.578 ms / 100) 8.546 -> 8.536 ( -0.12%) [ +0.14% +0.04% +0.00% / -0.09% -0.08% -0.12%] index_select reverse : Elapsed 0.086 ms (8.558 ms / 100) 8.538 -> 8.536 ( -0.02%) [ +0.21% +0.13% +0.00% / +0.05% -0.02% +0.00%] index_select skip64 : Elapsed 0.086 ms (8.556 ms / 100) 8.541 -> 8.528 ( -0.15%) [ +0.00% +0.14% +0.02% / +0.00% +0.00% -0.15%] index_select skip256 : Elapsed 0.085 ms (8.541 ms / 100) 8.558 -> 8.548 ( -0.12%) [ +0.19% +0.00% +0.18% / -0.12% +0.02% +0.19%] index_select spread : Elapsed 0.086 ms (8.574 ms / 100) 8.548 -> 8.542 ( -0.07%) [ +0.00% +0.01% +0.15% / -0.06% -0.02% -0.07%] index_select strided 3 : Elapsed 0.085 ms (8.548 ms / 100) 8.551 -> 8.549 ( -0.02%) [ +0.23% +0.00% +0.11% / +0.29% -0.02% +0.00%] index_select random : Elapsed 0.086 ms (8.571 ms / 100) 8.548 -> 8.547 ( -0.01%) [ +0.00% +0.19% +0.00% / +0.16% +0.09% -0.01%] index_select random_sorted : Elapsed 0.085 ms (8.548 ms / 100) B = [20, 40, 4, 16] (stride (64, 1280, 16, 1)) A = [20, 5, 4, 16] (stride (320, 4, 1, 20)) dim = 1 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.63% +0.56%] index_add_ linear : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.14% +0.07% / +0.07% +0.29% +0.29%] index_copy_ linear : Elapsed 0.014 ms (1.380 ms / 100) 1.423 -> 1.421 ( -0.14%) [ +0.07% +0.00% +0.07% / -0.14% +0.56% +0.49%] index_add_ reverse : Elapsed 0.014 ms (1.424 ms / 100) 1.381 -> 1.380 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.29% +0.22%] index_copy_ reverse : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.63% +0.56%] index_add_ spread : Elapsed 0.014 ms (1.423 ms / 100) 1.381 -> 1.379 ( -0.14%) [ +0.07% +0.07% +0.00% / -0.14% +0.22% +0.14%] index_copy_ spread : Elapsed 0.014 ms (1.382 ms / 100) 1.423 -> 1.422 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.49% +0.63%] index_add_ strided 3 : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.29% +0.58%] index_copy_ strided 3 : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.63% +0.56%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.00% +0.07% +0.15% / +0.15% +0.44% +0.44%] index_copy_ strided 7 : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.56% +0.63%] index_add_ perm : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.00% +0.22% +0.15% / +0.15% +0.51% +0.51%] index_copy_ perm : Elapsed 0.014 ms (1.379 ms / 100) 1.423 -> 1.422 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.56% +0.49%] index_add_ perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) 1.381 -> 1.379 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.29% +0.22%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.381 ms / 100) 8.192 -> 8.205 ( +0.16%) [ +0.34% +0.11% +0.00% / +0.26% +0.20% +0.16%] index_select const : Elapsed 0.082 ms (8.220 ms / 100) 8.192 -> 8.204 ( +0.15%) [ +0.27% +0.00% +0.10% / +0.34% +0.20% +0.15%] index_select wrap : Elapsed 0.082 ms (8.214 ms / 100) 8.192 -> 8.199 ( +0.09%) [ +0.00% +0.10% +0.34% / +0.10% +0.13% +0.09%] index_select linear : Elapsed 0.082 ms (8.192 ms / 100) 8.200 -> 8.205 ( +0.06%) [ +0.00% +0.00% +0.33% / +0.11% +0.10% +0.06%] index_select reverse : Elapsed 0.082 ms (8.200 ms / 100) 8.196 -> 8.209 ( +0.16%) [ +0.31% +0.16% +0.00% / +0.20% +0.28% +0.16%] index_select skip64 : Elapsed 0.082 ms (8.221 ms / 100) 8.191 -> 8.192 ( +0.01%) [ +0.00% +0.16% +0.05% / +0.01% +0.27% +0.16%] index_select skip256 : Elapsed 0.082 ms (8.191 ms / 100) 8.192 -> 8.212 ( +0.24%) [ +0.00% +0.17% +0.20% / +0.29% +0.24% +0.37%] index_select spread : Elapsed 0.082 ms (8.192 ms / 100) 8.211 -> 8.200 ( -0.13%) [ +0.26% +0.10% +0.00% / +0.16% +0.10% -0.13%] index_select strided 3 : Elapsed 0.082 ms (8.232 ms / 100) 8.196 -> 8.202 ( +0.07%) [ +0.00% +0.10% +0.56% / +0.20% +0.17% +0.07%] index_select random : Elapsed 0.082 ms (8.196 ms / 100) 8.206 -> 8.203 ( -0.04%) [ +0.18% +0.00% +0.02% / -0.04% +0.32% +0.05%] index_select random_sorted : Elapsed 0.082 ms (8.221 ms / 100) B = [20, 40, 4, 16] (stride (640, 1, 12800, 40)) A = [20, 5, 4, 16] (stride (1, 20, 100, 400)) dim = 1 1.653 -> 1.654 ( +0.06%) [ +0.00% +0.06% +0.06% / +0.06% +0.67% +0.73%] index_add_ linear : Elapsed 0.017 ms (1.653 ms / 100) 1.599 -> 1.603 ( +0.25%) [ +0.06% +0.06% +0.00% / +0.25% +0.63% +0.69%] index_copy_ linear : Elapsed 0.016 ms (1.600 ms / 100) 1.646 -> 1.646 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.67% +0.55%] index_add_ reverse : Elapsed 0.016 ms (1.647 ms / 100) 1.597 -> 1.598 ( +0.06%) [ +0.00% +0.00% +0.38% / +0.06% +0.56% +0.50%] index_copy_ reverse : Elapsed 0.016 ms (1.597 ms / 100) 1.661 -> 1.661 ( +0.00%) [ +0.06% +0.00% +0.36% / +0.00% +0.66% +0.60%] index_add_ spread : Elapsed 0.017 ms (1.662 ms / 100) 1.602 -> 1.604 ( +0.12%) [ +0.06% +0.00% +0.19% / +0.12% +0.62% +0.44%] index_copy_ spread : Elapsed 0.016 ms (1.603 ms / 100) 1.653 -> 1.654 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.67% +0.85%] index_add_ strided 3 : Elapsed 0.017 ms (1.655 ms / 100) 1.603 -> 1.602 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.50% +0.56%] index_copy_ strided 3 : Elapsed 0.016 ms (1.603 ms / 100) 1.654 -> 1.654 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.73% +0.67%] index_add_ strided 7 : Elapsed 0.017 ms (1.656 ms / 100) 1.599 -> 1.600 ( +0.06%) [ +0.19% +0.00% +0.13% / +0.06% +0.88% +0.56%] index_copy_ strided 7 : Elapsed 0.016 ms (1.602 ms / 100) 1.653 -> 1.654 ( +0.06%) [ +0.12% +0.00% +0.18% / +0.06% +0.79% +0.67%] index_add_ perm : Elapsed 0.017 ms (1.655 ms / 100) 1.601 -> 1.599 ( -0.12%) [ +0.00% +0.00% +0.31% / -0.12% +0.56% +0.50%] index_copy_ perm : Elapsed 0.016 ms (1.601 ms / 100) 1.653 -> 1.655 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.67% +0.67%] index_add_ perm_sorted : Elapsed 0.017 ms (1.654 ms / 100) 1.601 -> 1.603 ( +0.12%) [ +0.00% +0.00% +0.06% / +0.12% +0.62% +0.62%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.601 ms / 100) 8.578 -> 8.572 ( -0.07%) [ +0.24% +0.05% +0.00% / -0.07% +0.06% +0.17%] index_select const : Elapsed 0.086 ms (8.599 ms / 100) 8.590 -> 8.582 ( -0.09%) [ +0.00% +0.08% +0.05% / -0.09% +0.09% +0.19%] index_select wrap : Elapsed 0.086 ms (8.590 ms / 100) 8.581 -> 8.609 ( +0.33%) [ +0.00% +0.14% +0.21% / +0.33% +0.42% +0.38%] index_select linear : Elapsed 0.086 ms (8.581 ms / 100) 8.583 -> 8.569 ( -0.16%) [ +0.01% +0.16% +0.00% / -0.16% +0.27% +0.16%] index_select reverse : Elapsed 0.086 ms (8.584 ms / 100) 8.561 -> 8.576 ( +0.18%) [ +0.00% +0.30% +0.34% / +0.18% +0.32% +0.46%] index_select skip64 : Elapsed 0.086 ms (8.561 ms / 100) 8.579 -> 8.583 ( +0.05%) [ +0.00% +0.13% +0.09% / +0.13% +0.05% +0.24%] index_select skip256 : Elapsed 0.086 ms (8.579 ms / 100) 8.592 -> 8.601 ( +0.10%) [ +0.00% +0.19% +0.12% / +0.12% +0.22% +0.10%] index_select spread : Elapsed 0.086 ms (8.592 ms / 100) 8.586 -> 8.589 ( +0.03%) [ +0.00% +0.05% +0.10% / +0.03% +0.42% +0.29%] index_select strided 3 : Elapsed 0.086 ms (8.586 ms / 100) 8.586 -> 8.589 ( +0.03%) [ +0.01% +0.00% +0.06% / +0.03% +0.16% +0.44%] index_select random : Elapsed 0.086 ms (8.587 ms / 100) 8.589 -> 8.608 ( +0.22%) [ +0.06% +0.00% +0.02% / +0.23% +0.29% +0.22%] index_select random_sorted : Elapsed 0.086 ms (8.594 ms / 100) B = [20, 40, 4, 16] (stride (1, 20, 12800, 800)) A = [20, 5, 4, 16] (stride (80, 1, 1600, 5)) dim = 1 1.421 -> 1.420 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.49% +0.49%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.381 ( +0.15%) [ +0.15% +0.00% +0.07% / +0.15% +0.29% +0.29%] index_copy_ linear : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.426 ( +0.35%) [ +0.14% +0.00% +0.00% / +0.35% +0.49% +0.49%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.381 -> 1.381 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.14% +0.22%] index_copy_ reverse : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.42% +0.49%] index_add_ spread : Elapsed 0.014 ms (1.421 ms / 100) 1.380 -> 1.378 ( -0.14%) [ +0.00% +0.07% +0.00% / -0.14% +0.29% +0.22%] index_copy_ spread : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.423 ( +0.14%) [ +0.14% +0.07% +0.00% / +0.14% +0.49% +0.56%] index_add_ strided 3 : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.380 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.29% +0.36%] index_copy_ strided 3 : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.56% +0.70%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.377 -> 1.379 ( +0.15%) [ +0.29% +0.00% +0.36% / +0.15% +0.51% +0.51%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.49% +0.49%] index_add_ perm : Elapsed 0.014 ms (1.421 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.00% +0.22% / +0.07% +0.29% +0.29%] index_copy_ perm : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.42% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.15% +0.00% +0.22% / +0.07% +0.36% +0.65%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.381 ms / 100) 8.208 -> 8.219 ( +0.13%) [ +0.04% +0.00% +0.12% / +0.37% +0.13% +0.23%] index_select const : Elapsed 0.082 ms (8.211 ms / 100) 8.196 -> 8.216 ( +0.24%) [ +0.11% +0.00% +0.23% / +0.24% +0.43% +0.31%] index_select wrap : Elapsed 0.082 ms (8.205 ms / 100) 8.225 -> 8.210 ( -0.18%) [ +0.00% +0.10% +0.01% / -0.15% -0.18% -0.11%] index_select linear : Elapsed 0.082 ms (8.225 ms / 100) 8.212 -> 8.206 ( -0.07%) [ +0.00% +0.02% +0.17% / -0.07% -0.02% +0.19%] index_select reverse : Elapsed 0.082 ms (8.212 ms / 100) 8.206 -> 8.223 ( +0.21%) [ +0.00% +0.02% +0.10% / +0.38% +0.24% +0.21%] index_select skip64 : Elapsed 0.082 ms (8.206 ms / 100) 8.201 -> 8.206 ( +0.06%) [ +0.00% +0.15% +0.18% / +0.06% +0.28% +0.20%] index_select skip256 : Elapsed 0.082 ms (8.201 ms / 100) 8.201 -> 8.204 ( +0.04%) [ +0.20% +0.00% +0.23% / +0.04% +0.33% +0.38%] index_select spread : Elapsed 0.082 ms (8.217 ms / 100) 8.208 -> 8.214 ( +0.07%) [ +0.01% +0.11% +0.00% / +0.16% +0.07% +0.41%] index_select strided 3 : Elapsed 0.082 ms (8.209 ms / 100) 8.218 -> 8.220 ( +0.02%) [ +0.00% +0.02% +0.24% / +0.19% +0.06% +0.02%] index_select random : Elapsed 0.082 ms (8.218 ms / 100) 8.207 -> 8.214 ( +0.09%) [ +0.04% +0.00% +0.38% / +0.10% +0.09% +0.28%] index_select random_sorted : Elapsed 0.082 ms (8.210 ms / 100) out_shape = [20, 5, 40, 16] in_shape = [20, 5, 4, 16] idx_dim = 2 B = [20, 5, 40, 16] (stride (3200, 640, 16, 1)) A = [20, 5, 4, 16] (stride (320, 16, 80, 1)) dim = 2 1.227 -> 1.227 ( +0.00%) [ +0.00% +0.24% +0.00% / +0.00% +0.33% +0.41%] index_add_ linear : Elapsed 0.012 ms (1.227 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.51% +0.51%] index_copy_ linear : Elapsed 0.012 ms (1.188 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.73% +0.00% +0.16% / +0.00% +0.57% +0.57%] index_add_ reverse : Elapsed 0.012 ms (1.235 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.51% +0.51%] index_copy_ reverse : Elapsed 0.012 ms (1.188 ms / 100) 1.227 -> 1.226 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.49% +0.49%] index_add_ spread : Elapsed 0.012 ms (1.227 ms / 100) 1.188 -> 1.187 ( -0.08%) [ +0.00% +0.00% +0.76% / -0.08% +0.59% +0.67%] index_copy_ spread : Elapsed 0.012 ms (1.188 ms / 100) 1.225 -> 1.225 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.82% +0.73%] index_add_ strided 3 : Elapsed 0.012 ms (1.226 ms / 100) 1.186 -> 1.187 ( +0.08%) [ +0.00% +0.17% +0.08% / +0.08% +1.18% +0.84%] index_copy_ strided 3 : Elapsed 0.012 ms (1.186 ms / 100) 1.225 -> 1.225 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.73% +0.65%] index_add_ strided 7 : Elapsed 0.012 ms (1.225 ms / 100) 1.186 -> 1.187 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +1.01% +0.76%] index_copy_ strided 7 : Elapsed 0.012 ms (1.187 ms / 100) 1.225 -> 1.226 ( +0.08%) [ +0.00% +0.16% +0.08% / +0.08% +0.73% +0.65%] index_add_ perm : Elapsed 0.012 ms (1.225 ms / 100) 1.187 -> 1.187 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.59%] index_copy_ perm : Elapsed 0.012 ms (1.187 ms / 100) 1.226 -> 1.226 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.57% +0.57%] index_add_ perm_sorted : Elapsed 0.012 ms (1.226 ms / 100) 1.187 -> 1.187 ( +0.00%) [ +0.08% +0.00% +0.17% / +0.00% +0.76% +0.67%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.188 ms / 100) 8.686 -> 8.693 ( +0.08%) [ +0.00% +0.22% +0.05% / +0.32% +0.10% +0.08%] index_select const : Elapsed 0.087 ms (8.686 ms / 100) 8.713 -> 8.720 ( +0.08%) [ +0.00% +0.05% +0.31% / +0.08% +0.25% +0.11%] index_select wrap : Elapsed 0.087 ms (8.713 ms / 100) 8.698 -> 8.706 ( +0.09%) [ +0.03% +0.15% +0.00% / +0.09% +0.26% +0.23%] index_select linear : Elapsed 0.087 ms (8.701 ms / 100) 8.708 -> 8.702 ( -0.07%) [ +0.08% +0.05% +0.00% / +0.02% -0.02% -0.07%] index_select reverse : Elapsed 0.087 ms (8.715 ms / 100) 8.686 -> 8.690 ( +0.05%) [ +0.25% +0.00% +0.15% / +0.23% +0.08% +0.05%] index_select skip64 : Elapsed 0.087 ms (8.708 ms / 100) 8.686 -> 8.681 ( -0.06%) [ +0.00% +0.10% +0.02% / +0.15% -0.06% +0.20%] index_select skip256 : Elapsed 0.087 ms (8.686 ms / 100) 8.708 -> 8.698 ( -0.11%) [ +0.01% +0.11% +0.00% / +0.09% +0.20% -0.11%] index_select spread : Elapsed 0.087 ms (8.709 ms / 100) 8.713 -> 8.713 ( +0.00%) [ +0.09% +0.00% +0.06% / +0.00% +0.20% +0.17%] index_select strided 3 : Elapsed 0.087 ms (8.721 ms / 100) 8.720 -> 8.711 ( -0.10%) [ +0.05% +0.00% +0.00% / +0.16% -0.10% -0.03%] index_select random : Elapsed 0.087 ms (8.724 ms / 100) 8.705 -> 8.716 ( +0.13%) [ +0.00% +0.24% +0.05% / +0.13% +0.15% +0.15%] index_select random_sorted : Elapsed 0.087 ms (8.705 ms / 100) B = [20, 5, 40, 16] (stride (3200, 1, 80, 5)) A = [20, 5, 4, 16] (stride (320, 64, 16, 1)) dim = 2 1.227 -> 1.227 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.57% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.228 ms / 100) 1.187 -> 1.189 ( +0.17%) [ +0.00% +0.08% +0.17% / +0.17% +0.67% +0.76%] index_copy_ linear : Elapsed 0.012 ms (1.187 ms / 100) 1.228 -> 1.227 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.41% +0.41%] index_add_ reverse : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.59% +0.51%] index_copy_ reverse : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.227 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.41% +0.41%] index_add_ spread : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.51% +0.51%] index_copy_ spread : Elapsed 0.012 ms (1.188 ms / 100) 1.228 -> 1.227 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.33% +0.81%] index_add_ strided 3 : Elapsed 0.012 ms (1.228 ms / 100) 1.189 -> 1.188 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.42% +1.35%] index_copy_ strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.227 -> 1.227 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.41% +1.39%] index_add_ strided 7 : Elapsed 0.012 ms (1.227 ms / 100) 1.188 -> 1.188 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.59% +0.76%] index_copy_ strided 7 : Elapsed 0.012 ms (1.188 ms / 100) 1.228 -> 1.227 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.41% +0.41%] index_add_ perm : Elapsed 0.012 ms (1.228 ms / 100) 1.188 -> 1.189 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.59% +0.67%] index_copy_ perm : Elapsed 0.012 ms (1.189 ms / 100) 1.228 -> 1.227 ( -0.08%) [ +0.08% +0.00% +0.08% / -0.08% +0.41% +0.41%] index_add_ perm_sorted : Elapsed 0.012 ms (1.229 ms / 100) 1.189 -> 1.189 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +0.42%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) 8.700 -> 8.687 ( -0.15%) [ +0.26% +0.00% +0.13% / +0.29% -0.15% -0.11%] index_select const : Elapsed 0.087 ms (8.723 ms / 100) 8.726 -> 8.709 ( -0.19%) [ +0.16% +0.00% +0.10% / +0.07% +0.14% -0.19%] index_select wrap : Elapsed 0.087 ms (8.740 ms / 100) 8.706 -> 8.697 ( -0.10%) [ +0.02% +0.11% +0.00% / +0.09% -0.02% -0.10%] index_select linear : Elapsed 0.087 ms (8.708 ms / 100) 8.715 -> 8.719 ( +0.05%) [ +0.17% +0.17% +0.00% / +0.10% +0.15% +0.05%] index_select reverse : Elapsed 0.087 ms (8.730 ms / 100) 8.701 -> 8.693 ( -0.09%) [ +0.15% +0.00% +0.13% / +0.15% +0.00% -0.09%] index_select skip64 : Elapsed 0.087 ms (8.714 ms / 100) 8.706 -> 8.691 ( -0.17%) [ +0.11% +0.00% +0.06% / +0.05% -0.07% -0.17%] index_select skip256 : Elapsed 0.087 ms (8.716 ms / 100) 8.713 -> 8.713 ( +0.00%) [ +0.00% +0.34% +0.26% / +0.33% +0.00% +0.07%] index_select spread : Elapsed 0.087 ms (8.713 ms / 100) 8.724 -> 8.719 ( -0.06%) [ +0.11% +0.06% +0.00% / -0.02% +0.02% -0.06%] index_select strided 3 : Elapsed 0.087 ms (8.734 ms / 100) 8.722 -> 8.723 ( +0.01%) [ +0.03% +0.09% +0.00% / +0.06% +0.17% +0.01%] index_select random : Elapsed 0.087 ms (8.725 ms / 100) 8.710 -> 8.716 ( +0.07%) [ +0.24% +0.00% +0.31% / +0.20% +0.14% +0.07%] index_select random_sorted : Elapsed 0.087 ms (8.731 ms / 100) B = [20, 5, 40, 16] (stride (640, 12800, 16, 1)) A = [20, 5, 4, 16] (stride (16, 320, 1600, 1)) dim = 2 1.308 -> 1.309 ( +0.08%) [ +0.00% +0.23% +0.00% / +0.08% +0.76% +0.69%] index_add_ linear : Elapsed 0.013 ms (1.308 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.63% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.271 ms / 100) 1.309 -> 1.309 ( +0.00%) [ +0.15% +0.00% +0.08% / +0.00% +0.69% +0.69%] index_add_ reverse : Elapsed 0.013 ms (1.311 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.63% +0.08% +0.00% / +0.00% +0.87% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.308 -> 1.309 ( +0.08%) [ +0.23% +0.00% +0.00% / +0.08% +0.76% +0.76%] index_add_ spread : Elapsed 0.013 ms (1.311 ms / 100) 1.270 -> 1.269 ( -0.08%) [ +0.71% +0.00% +0.00% / -0.08% +0.55% +0.55%] index_copy_ spread : Elapsed 0.013 ms (1.279 ms / 100) 1.307 -> 1.306 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.77% +0.92%] index_add_ strided 3 : Elapsed 0.013 ms (1.307 ms / 100) 1.268 -> 1.268 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.79% +0.79%] index_copy_ strided 3 : Elapsed 0.013 ms (1.269 ms / 100) 1.307 -> 1.307 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.84% +0.84%] index_add_ strided 7 : Elapsed 0.013 ms (1.307 ms / 100) 1.268 -> 1.270 ( +0.16%) [ +0.08% +0.00% +0.08% / +0.16% +0.71% +0.79%] index_copy_ strided 7 : Elapsed 0.013 ms (1.269 ms / 100) 1.307 -> 1.310 ( +0.23%) [ +0.08% +0.38% +0.00% / +0.23% +0.69% +0.92%] index_add_ perm : Elapsed 0.013 ms (1.308 ms / 100) 1.269 -> 1.269 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.63% +0.71%] index_copy_ perm : Elapsed 0.013 ms (1.271 ms / 100) 1.307 -> 1.308 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.92% +0.84%] index_add_ perm_sorted : Elapsed 0.013 ms (1.308 ms / 100) 1.268 -> 1.269 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.71% +0.79%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.269 ms / 100) 9.126 -> 9.146 ( +0.22%) [ +0.19% +0.02% +0.00% / +0.22% +0.27% +0.31%] index_select const : Elapsed 0.091 ms (9.143 ms / 100) 9.161 -> 9.183 ( +0.24%) [ +0.10% +0.21% +0.00% / +0.24% +0.41% +0.26%] index_select wrap : Elapsed 0.092 ms (9.170 ms / 100) 9.154 -> 9.158 ( +0.04%) [ +0.07% +0.00% +0.00% / +0.13% +0.46% +0.04%] index_select linear : Elapsed 0.092 ms (9.160 ms / 100) 9.182 -> 9.174 ( -0.09%) [ +0.07% +0.00% +0.02% / -0.07% +0.09% -0.09%] index_select reverse : Elapsed 0.092 ms (9.188 ms / 100) 9.137 -> 9.129 ( -0.09%) [ +0.15% +0.09% +0.00% / -0.09% +0.14% +0.07%] index_select skip64 : Elapsed 0.092 ms (9.151 ms / 100) 9.132 -> 9.128 ( -0.04%) [ +0.11% +0.02% +0.00% / -0.04% +0.13% +0.19%] index_select skip256 : Elapsed 0.091 ms (9.142 ms / 100) 9.163 -> 9.185 ( +0.24%) [ +0.00% +0.28% +0.02% / +0.27% +0.39% +0.24%] index_select spread : Elapsed 0.092 ms (9.163 ms / 100) 9.177 -> 9.177 ( +0.00%) [ +0.00% +0.04% +0.04% / +0.00% +0.20% +0.09%] index_select strided 3 : Elapsed 0.092 ms (9.177 ms / 100) 9.177 -> 9.187 ( +0.11%) [ +0.01% +0.00% +0.25% / +0.11% +0.17% +0.15%] index_select random : Elapsed 0.092 ms (9.178 ms / 100) 9.153 -> 9.151 ( -0.02%) [ +0.12% +0.22% +0.00% / -0.02% +0.16% +0.12%] index_select random_sorted : Elapsed 0.092 ms (9.164 ms / 100) out_shape = [20, 5, 4, 40] in_shape = [20, 5, 4, 16] idx_dim = 3 B = [20, 5, 4, 40] (stride (800, 1, 5, 20)) A = [20, 5, 4, 16] (stride (80, 16, 1600, 1)) dim = 3 3.889 -> 3.891 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.85% +0.85%] index_add_ linear : Elapsed 0.039 ms (3.891 ms / 100) 3.737 -> 3.738 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.75% +0.83%] index_copy_ linear : Elapsed 0.037 ms (3.738 ms / 100) 3.922 -> 3.922 ( +0.00%) [ +0.05% +0.00% +0.03% / +0.00% +0.51% +0.46%] index_add_ reverse : Elapsed 0.039 ms (3.924 ms / 100) 3.771 -> 3.776 ( +0.13%) [ +0.16% +0.16% +0.00% / +0.13% +0.66% +0.72%] index_copy_ reverse : Elapsed 0.038 ms (3.777 ms / 100) 3.884 -> 3.886 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.90% +0.88%] index_add_ spread : Elapsed 0.039 ms (3.886 ms / 100) 3.740 -> 3.741 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.56% +0.56%] index_copy_ spread : Elapsed 0.037 ms (3.742 ms / 100) 3.892 -> 3.895 ( +0.08%) [ +0.10% +0.00% +0.13% / +0.08% +0.75% +0.67%] index_add_ strided 3 : Elapsed 0.039 ms (3.896 ms / 100) 3.743 -> 3.748 ( +0.13%) [ +0.13% +0.03% +0.00% / +0.13% +0.69% +0.69%] index_copy_ strided 3 : Elapsed 0.037 ms (3.748 ms / 100) 3.919 -> 3.920 ( +0.03%) [ +0.00% +0.10% +0.13% / +0.03% +0.64% +0.61%] index_add_ strided 7 : Elapsed 0.039 ms (3.919 ms / 100) 3.774 -> 3.776 ( +0.05%) [ +0.03% +0.08% +0.00% / +0.05% +0.61% +0.66%] index_copy_ strided 7 : Elapsed 0.038 ms (3.775 ms / 100) 3.886 -> 3.891 ( +0.13%) [ +0.18% +0.00% +0.03% / +0.13% +0.77% +0.85%] index_add_ perm : Elapsed 0.039 ms (3.893 ms / 100) 3.736 -> 3.737 ( +0.03%) [ +0.19% +0.16% +0.00% / +0.03% +0.54% +0.78%] index_copy_ perm : Elapsed 0.037 ms (3.743 ms / 100) 3.881 -> 3.889 ( +0.21%) [ +0.28% +0.26% +0.00% / +0.21% +1.00% +0.70%] index_add_ perm_sorted : Elapsed 0.039 ms (3.892 ms / 100) 3.736 -> 3.738 ( +0.05%) [ +0.08% +0.08% +0.00% / +0.05% +0.59% +0.56%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.739 ms / 100) 5.475 -> 5.475 ( +0.00%) [ +0.05% +0.00% +0.02% / +0.00% +0.15% +0.22%] index_select const : Elapsed 0.055 ms (5.478 ms / 100) 5.481 -> 5.478 ( -0.05%) [ +0.07% +0.00% +0.09% / -0.05% +0.09% +0.09%] index_select wrap : Elapsed 0.055 ms (5.485 ms / 100) 5.479 -> 5.479 ( +0.00%) [ +0.15% +0.18% +0.00% / +0.00% +0.00% +0.11%] index_select linear : Elapsed 0.055 ms (5.487 ms / 100) 5.478 -> 5.483 ( +0.09%) [ +0.13% +0.05% +0.00% / +0.13% +0.09% +0.24%] index_select reverse : Elapsed 0.055 ms (5.485 ms / 100) 5.479 -> 5.475 ( -0.07%) [ +0.07% +0.09% +0.00% / -0.07% +0.11% -0.04%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.480 -> 5.483 ( +0.05%) [ +0.02% +0.07% +0.00% / +0.16% +0.15% +0.05%] index_select skip256 : Elapsed 0.055 ms (5.481 ms / 100) 5.477 -> 5.487 ( +0.18%) [ +0.00% +0.16% +0.15% / +0.18% +0.27% +0.22%] index_select spread : Elapsed 0.055 ms (5.477 ms / 100) 5.477 -> 5.484 ( +0.13%) [ +0.00% +0.24% +0.07% / +0.20% +0.13% +0.18%] index_select strided 3 : Elapsed 0.055 ms (5.477 ms / 100) 5.480 -> 5.476 ( -0.07%) [ +0.02% +0.13% +0.00% / -0.07% +0.15% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.481 ms / 100) 5.475 -> 5.480 ( +0.09%) [ +0.07% +0.00% +0.18% / +0.13% +0.09% +0.22%] index_select strided 7 : Elapsed 0.055 ms (5.479 ms / 100) 5.479 -> 5.477 ( -0.04%) [ +0.15% +0.00% +0.07% / -0.02% -0.04% +0.22%] index_select strided 8 : Elapsed 0.055 ms (5.487 ms / 100) 5.480 -> 5.479 ( -0.02%) [ +0.02% +0.00% +0.09% / -0.02% +0.11% +0.13%] index_select random : Elapsed 0.055 ms (5.481 ms / 100) 5.485 -> 5.475 ( -0.18%) [ +0.00% +0.00% +0.00% / -0.18% +0.09% +0.00%] index_select random_sorted : Elapsed 0.055 ms (5.485 ms / 100) B = [20, 5, 4, 40] (stride (40, 800, 4000, 1)) A = [20, 5, 4, 16] (stride (20, 1, 5, 400)) dim = 3 3.941 -> 3.943 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.76% +0.76%] index_add_ linear : Elapsed 0.039 ms (3.943 ms / 100) 3.815 -> 3.817 ( +0.05%) [ +0.05% +0.03% +0.00% / +0.05% +0.81% +0.79%] index_copy_ linear : Elapsed 0.038 ms (3.817 ms / 100) 3.933 -> 3.933 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.71% +0.74%] index_add_ reverse : Elapsed 0.039 ms (3.933 ms / 100) 3.803 -> 3.803 ( +0.00%) [ +0.00% +0.05% +0.03% / +0.00% +0.76% +0.74%] index_copy_ reverse : Elapsed 0.038 ms (3.803 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.69% +0.64%] index_add_ spread : Elapsed 0.039 ms (3.935 ms / 100) 3.809 -> 3.809 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.76% +0.74%] index_copy_ spread : Elapsed 0.038 ms (3.810 ms / 100) 3.937 -> 3.937 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.58% +0.61%] index_add_ strided 3 : Elapsed 0.039 ms (3.938 ms / 100) 3.812 -> 3.812 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.58% +0.63%] index_copy_ strided 3 : Elapsed 0.038 ms (3.812 ms / 100) 3.931 -> 3.931 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.79% +0.76%] index_add_ strided 7 : Elapsed 0.039 ms (3.932 ms / 100) 3.802 -> 3.803 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.79% +0.79%] index_copy_ strided 7 : Elapsed 0.038 ms (3.803 ms / 100) 3.941 -> 3.942 ( +0.03%) [ +0.08% +0.03% +0.00% / +0.03% +0.79% +0.79%] index_add_ perm : Elapsed 0.039 ms (3.944 ms / 100) 3.816 -> 3.816 ( +0.00%) [ +0.03% +0.00% +0.00% / +0.00% +0.79% +0.73%] index_copy_ perm : Elapsed 0.038 ms (3.817 ms / 100) 3.932 -> 3.932 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.81% +0.81%] index_add_ perm_sorted : Elapsed 0.039 ms (3.932 ms / 100) 3.806 -> 3.807 ( +0.03%) [ +0.16% +0.00% +0.05% / +0.03% +0.81% +0.81%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.812 ms / 100) 5.561 -> 5.565 ( +0.07%) [ +0.00% +0.05% +0.07% / +0.07% +0.11% +0.07%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.574 -> 5.566 ( -0.14%) [ +0.02% +0.09% +0.00% / -0.09% -0.07% -0.14%] index_select wrap : Elapsed 0.056 ms (5.575 ms / 100) 5.570 -> 5.572 ( +0.04%) [ +0.16% +0.13% +0.00% / +0.05% +0.04% +0.13%] index_select linear : Elapsed 0.056 ms (5.579 ms / 100) 5.564 -> 5.569 ( +0.09%) [ +0.07% +0.00% +0.16% / +0.13% +0.09% +0.09%] index_select reverse : Elapsed 0.056 ms (5.568 ms / 100) 5.557 -> 5.559 ( +0.04%) [ +0.11% +0.09% +0.00% / +0.04% +0.04% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.563 ms / 100) 5.559 -> 5.555 ( -0.07%) [ +0.00% +0.02% +0.07% / -0.07% +0.22% +0.11%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.569 -> 5.569 ( +0.00%) [ +0.09% +0.00% +0.07% / +0.11% +0.00% +0.05%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.569 -> 5.571 ( +0.04%) [ +0.07% +0.00% +0.09% / +0.11% +0.04% +0.04%] index_select strided 3 : Elapsed 0.056 ms (5.573 ms / 100) 5.568 -> 5.560 ( -0.14%) [ +0.02% +0.14% +0.00% / +0.25% +0.13% -0.14%] index_select strided 5 : Elapsed 0.056 ms (5.569 ms / 100) 5.566 -> 5.574 ( +0.14%) [ +0.11% +0.11% +0.00% / +0.14% +0.16% +0.14%] index_select strided 7 : Elapsed 0.056 ms (5.572 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.02% +0.07% +0.00% / -0.02% +0.05% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.563 ms / 100) 5.571 -> 5.563 ( -0.14%) [ +0.13% +0.09% +0.00% / +0.13% -0.14% -0.02%] index_select random : Elapsed 0.056 ms (5.578 ms / 100) 5.569 -> 5.565 ( -0.07%) [ +0.00% +0.00% +0.02% / +0.02% +0.14% -0.07%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [20, 5, 4, 40] (stride (5, 1, 4000, 100)) A = [20, 5, 4, 16] (stride (4, 1280, 1, 80)) dim = 3 3.693 -> 3.697 ( +0.11%) [ +0.05% +0.08% +0.00% / +0.11% +0.51% +0.65%] index_add_ linear : Elapsed 0.037 ms (3.695 ms / 100) 3.561 -> 3.562 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.45% +0.56%] index_copy_ linear : Elapsed 0.036 ms (3.562 ms / 100) 3.711 -> 3.707 ( -0.11%) [ +0.03% +0.05% +0.00% / -0.11% +0.57% +0.54%] index_add_ reverse : Elapsed 0.037 ms (3.712 ms / 100) 3.577 -> 3.576 ( -0.03%) [ +0.08% +0.06% +0.00% / -0.03% +0.62% +0.59%] index_copy_ reverse : Elapsed 0.036 ms (3.580 ms / 100) 3.713 -> 3.716 ( +0.08%) [ +0.11% +0.00% +0.08% / +0.08% +0.59% +0.57%] index_add_ spread : Elapsed 0.037 ms (3.717 ms / 100) 3.576 -> 3.578 ( +0.06%) [ +0.17% +0.00% +0.11% / +0.06% +0.59% +0.64%] index_copy_ spread : Elapsed 0.036 ms (3.582 ms / 100) 3.706 -> 3.706 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.51% +0.49%] index_add_ strided 3 : Elapsed 0.037 ms (3.707 ms / 100) 3.567 -> 3.567 ( +0.00%) [ +0.06% +0.00% +0.06% / +0.00% +0.62% +0.59%] index_copy_ strided 3 : Elapsed 0.036 ms (3.569 ms / 100) 3.713 -> 3.715 ( +0.05%) [ +0.00% +0.05% +0.05% / +0.05% +0.48% +0.48%] index_add_ strided 7 : Elapsed 0.037 ms (3.713 ms / 100) 3.579 -> 3.582 ( +0.08%) [ +0.06% +0.06% +0.00% / +0.08% +1.09% +0.47%] index_copy_ strided 7 : Elapsed 0.036 ms (3.581 ms / 100) 3.696 -> 3.695 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.57% +0.49%] index_add_ perm : Elapsed 0.037 ms (3.697 ms / 100) 3.561 -> 3.560 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.56% +0.62%] index_copy_ perm : Elapsed 0.036 ms (3.561 ms / 100) 3.700 -> 3.698 ( -0.05%) [ +0.05% +0.03% +0.00% / -0.05% +0.24% +0.32%] index_add_ perm_sorted : Elapsed 0.037 ms (3.702 ms / 100) 3.564 -> 3.562 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.34% +0.34%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.564 ms / 100) 5.463 -> 5.468 ( +0.09%) [ +0.16% +0.16% +0.00% / +0.09% +0.33% +0.20%] index_select const : Elapsed 0.055 ms (5.472 ms / 100) 5.472 -> 5.474 ( +0.04%) [ +0.04% +0.02% +0.00% / +0.04% +0.29% +0.22%] index_select wrap : Elapsed 0.055 ms (5.474 ms / 100) 5.475 -> 5.479 ( +0.07%) [ +0.04% +0.00% +0.20% / +0.07% +0.16% +0.15%] index_select linear : Elapsed 0.055 ms (5.477 ms / 100) 5.472 -> 5.478 ( +0.11%) [ +0.07% +0.26% +0.00% / +0.11% +0.42% +0.38%] index_select reverse : Elapsed 0.055 ms (5.476 ms / 100) 5.468 -> 5.466 ( -0.04%) [ +0.11% +0.07% +0.00% / -0.04% +0.15% +0.09%] index_select skip64 : Elapsed 0.055 ms (5.474 ms / 100) 5.476 -> 5.472 ( -0.07%) [ +0.09% +0.02% +0.00% / +0.04% -0.07% -0.07%] index_select skip256 : Elapsed 0.055 ms (5.481 ms / 100) 5.479 -> 5.480 ( +0.02%) [ +0.00% +0.04% +0.09% / +0.02% +0.09% +0.09%] index_select spread : Elapsed 0.055 ms (5.479 ms / 100) 5.466 -> 5.472 ( +0.11%) [ +0.11% +0.22% +0.00% / +0.11% +0.22% +0.31%] index_select strided 3 : Elapsed 0.055 ms (5.472 ms / 100) 5.471 -> 5.464 ( -0.13%) [ +0.20% +0.00% +0.05% / -0.13% +0.31% +0.18%] index_select strided 5 : Elapsed 0.055 ms (5.482 ms / 100) 5.473 -> 5.464 ( -0.16%) [ +0.00% +0.09% +0.00% / -0.16% +0.09% +0.44%] index_select strided 7 : Elapsed 0.055 ms (5.473 ms / 100) 5.464 -> 5.465 ( +0.02%) [ +0.09% +0.26% +0.00% / +0.02% +0.15% +0.26%] index_select strided 8 : Elapsed 0.055 ms (5.469 ms / 100) 5.471 -> 5.473 ( +0.04%) [ +0.15% +0.00% +0.15% / +0.04% +0.09% +0.15%] index_select random : Elapsed 0.055 ms (5.479 ms / 100) 5.472 -> 5.475 ( +0.05%) [ +0.13% +0.00% +0.04% / +0.11% +0.05% +0.16%] index_select random_sorted : Elapsed 0.055 ms (5.479 ms / 100) B = [20, 5, 4, 40] (stride (5, 1, 4000, 100)) A = [20, 5, 4, 16] (stride (1, 1280, 20, 80)) dim = 3 3.881 -> 3.882 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.93% +1.11%] index_add_ linear : Elapsed 0.039 ms (3.882 ms / 100) 3.716 -> 3.717 ( +0.03%) [ +0.05% +0.03% +0.00% / +0.03% +0.73% +1.08%] index_copy_ linear : Elapsed 0.037 ms (3.718 ms / 100) 3.884 -> 3.884 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +1.03% +0.93%] index_add_ reverse : Elapsed 0.039 ms (3.884 ms / 100) 3.726 -> 3.728 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.72% +0.81%] index_copy_ reverse : Elapsed 0.037 ms (3.727 ms / 100) 3.856 -> 3.855 ( -0.03%) [ +0.10% +0.10% +0.00% / -0.03% +0.78% +0.88%] index_add_ spread : Elapsed 0.039 ms (3.860 ms / 100) 3.700 -> 3.703 ( +0.08%) [ +0.00% +0.05% +0.05% / +0.08% +0.78% +1.22%] index_copy_ spread : Elapsed 0.037 ms (3.700 ms / 100) 3.865 -> 3.864 ( -0.03%) [ +0.00% +0.03% +0.05% / -0.03% +0.70% +0.75%] index_add_ strided 3 : Elapsed 0.039 ms (3.865 ms / 100) 3.709 -> 3.709 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.70% +0.81%] index_copy_ strided 3 : Elapsed 0.037 ms (3.710 ms / 100) 3.883 -> 3.885 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.88% +1.06%] index_add_ strided 7 : Elapsed 0.039 ms (3.883 ms / 100) 3.727 -> 3.726 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.80% +0.97%] index_copy_ strided 7 : Elapsed 0.037 ms (3.727 ms / 100) 3.881 -> 3.881 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.80% +0.93%] index_add_ perm : Elapsed 0.039 ms (3.881 ms / 100) 3.716 -> 3.715 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.73% +1.05%] index_copy_ perm : Elapsed 0.037 ms (3.716 ms / 100) 3.863 -> 3.863 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.91% +0.83%] index_add_ perm_sorted : Elapsed 0.039 ms (3.864 ms / 100) 3.709 -> 3.708 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.92% +1.02%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.709 ms / 100) 5.468 -> 5.466 ( -0.04%) [ +0.00% +0.02% +0.05% / +0.13% -0.04% +0.26%] index_select const : Elapsed 0.055 ms (5.468 ms / 100) 5.477 -> 5.471 ( -0.11%) [ +0.04% +0.11% +0.00% / +0.15% -0.04% -0.11%] index_select wrap : Elapsed 0.055 ms (5.479 ms / 100) 5.473 -> 5.477 ( +0.07%) [ +0.00% +0.20% +0.05% / +0.18% +0.07% +0.26%] index_select linear : Elapsed 0.055 ms (5.473 ms / 100) 5.479 -> 5.474 ( -0.09%) [ +0.13% +0.13% +0.00% / +0.02% -0.09% +0.04%] index_select reverse : Elapsed 0.055 ms (5.486 ms / 100) 5.466 -> 5.465 ( -0.02%) [ +0.09% +0.00% +0.07% / -0.02% +0.26% +0.24%] index_select skip64 : Elapsed 0.055 ms (5.471 ms / 100) 5.465 -> 5.458 ( -0.13%) [ +0.02% +0.00% +0.07% / -0.13% +0.27% +0.26%] index_select skip256 : Elapsed 0.055 ms (5.466 ms / 100) 5.476 -> 5.475 ( -0.02%) [ +0.02% +0.09% +0.00% / +0.13% -0.02% +0.04%] index_select spread : Elapsed 0.055 ms (5.477 ms / 100) 5.474 -> 5.474 ( +0.00%) [ +0.05% +0.09% +0.00% / +0.13% +0.00% +0.11%] index_select strided 3 : Elapsed 0.055 ms (5.477 ms / 100) 5.473 -> 5.476 ( +0.05%) [ +0.00% +0.02% +0.00% / +0.07% +0.05% +0.29%] index_select strided 5 : Elapsed 0.055 ms (5.473 ms / 100) 5.474 -> 5.475 ( +0.02%) [ +0.13% +0.02% +0.00% / +0.02% +0.15% +0.11%] index_select strided 7 : Elapsed 0.055 ms (5.481 ms / 100) 5.462 -> 5.469 ( +0.13%) [ +0.00% +0.07% +0.09% / +0.13% +0.35% +0.53%] index_select strided 8 : Elapsed 0.055 ms (5.462 ms / 100) 5.481 -> 5.476 ( -0.09%) [ +0.05% +0.00% +0.00% / +0.07% -0.09% -0.02%] index_select random : Elapsed 0.055 ms (5.484 ms / 100) 5.475 -> 5.468 ( -0.13%) [ +0.00% +0.02% +0.11% / +0.11% -0.13% +0.09%] index_select random_sorted : Elapsed 0.055 ms (5.475 ms / 100) B = [20, 5, 4, 40] (stride (4, 80, 1, 400)) A = [20, 5, 4, 16] (stride (320, 64, 16, 1)) dim = 3 3.575 -> 3.576 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.36% +0.45%] index_add_ linear : Elapsed 0.036 ms (3.576 ms / 100) 3.421 -> 3.425 ( +0.12%) [ +0.06% +0.06% +0.00% / +0.12% +0.41% +1.05%] index_copy_ linear : Elapsed 0.034 ms (3.423 ms / 100) 3.578 -> 3.579 ( +0.03%) [ +0.00% +0.00% +0.03% / +0.03% +0.53% +0.50%] index_add_ reverse : Elapsed 0.036 ms (3.578 ms / 100) 3.439 -> 3.439 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.47% +0.44%] index_copy_ reverse : Elapsed 0.034 ms (3.440 ms / 100) 3.594 -> 3.594 ( +0.00%) [ +0.03% +0.06% +0.00% / +0.00% +0.14% +0.33%] index_add_ spread : Elapsed 0.036 ms (3.595 ms / 100) 3.435 -> 3.436 ( +0.03%) [ +0.06% +0.00% +0.09% / +0.03% +0.52% +1.34%] index_copy_ spread : Elapsed 0.034 ms (3.437 ms / 100) 3.593 -> 3.595 ( +0.06%) [ +0.00% +0.03% +0.03% / +0.06% +0.17% +0.17%] index_add_ strided 3 : Elapsed 0.036 ms (3.593 ms / 100) 3.444 -> 3.444 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.23% +0.15%] index_copy_ strided 3 : Elapsed 0.034 ms (3.446 ms / 100) 3.583 -> 3.583 ( +0.00%) [ +0.06% +0.03% +0.00% / +0.00% +0.56% +0.61%] index_add_ strided 7 : Elapsed 0.036 ms (3.585 ms / 100) 3.443 -> 3.443 ( +0.00%) [ +0.03% +0.00% +0.03% / +0.00% +0.49% +0.70%] index_copy_ strided 7 : Elapsed 0.034 ms (3.444 ms / 100) 3.579 -> 3.579 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.08% +0.11%] index_add_ perm : Elapsed 0.036 ms (3.579 ms / 100) 3.425 -> 3.425 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.12% +0.18%] index_copy_ perm : Elapsed 0.034 ms (3.425 ms / 100) 3.581 -> 3.580 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.03% +0.14%] index_add_ perm_sorted : Elapsed 0.036 ms (3.581 ms / 100) 3.425 -> 3.425 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.09% +0.23%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.425 ms / 100) 5.385 -> 5.389 ( +0.07%) [ +0.17% +0.00% +0.11% / +0.07% +0.15% +0.13%] index_select const : Elapsed 0.054 ms (5.394 ms / 100) 5.385 -> 5.393 ( +0.15%) [ +0.00% +0.15% +0.22% / +0.15% +0.26% +0.26%] index_select wrap : Elapsed 0.054 ms (5.385 ms / 100) 5.389 -> 5.394 ( +0.09%) [ +0.13% +0.11% +0.00% / +0.09% +0.15% +0.17%] index_select linear : Elapsed 0.054 ms (5.396 ms / 100) 5.391 -> 5.390 ( -0.02%) [ +0.04% +0.00% +0.07% / +0.13% +0.11% -0.02%] index_select reverse : Elapsed 0.054 ms (5.393 ms / 100) 5.389 -> 5.384 ( -0.09%) [ +0.17% +0.00% +0.06% / +0.11% -0.09% +0.00%] index_select skip64 : Elapsed 0.054 ms (5.398 ms / 100) 5.391 -> 5.387 ( -0.07%) [ +0.07% +0.15% +0.00% / +0.17% -0.07% +0.06%] index_select skip256 : Elapsed 0.054 ms (5.395 ms / 100) 5.393 -> 5.385 ( -0.15%) [ +0.06% +0.00% +0.02% / +0.02% +0.04% -0.15%] index_select spread : Elapsed 0.054 ms (5.396 ms / 100) 5.388 -> 5.386 ( -0.04%) [ +0.07% +0.00% +0.06% / +0.07% -0.04% +0.24%] index_select strided 3 : Elapsed 0.054 ms (5.392 ms / 100) 5.388 -> 5.387 ( -0.02%) [ +0.00% +0.00% +0.11% / +0.06% +0.13% -0.02%] index_select strided 5 : Elapsed 0.054 ms (5.388 ms / 100) 5.390 -> 5.393 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.13% +0.11%] index_select strided 7 : Elapsed 0.054 ms (5.390 ms / 100) 5.383 -> 5.388 ( +0.09%) [ +0.24% +0.02% +0.00% / +0.09% +0.13% +0.30%] index_select strided 8 : Elapsed 0.054 ms (5.396 ms / 100) 5.391 -> 5.387 ( -0.07%) [ +0.00% +0.09% +0.06% / +0.00% -0.07% +0.00%] index_select random : Elapsed 0.054 ms (5.391 ms / 100) 5.390 -> 5.385 ( -0.09%) [ +0.07% +0.00% +0.07% / +0.04% -0.09% +0.13%] index_select random_sorted : Elapsed 0.054 ms (5.394 ms / 100) B = [20, 5, 4, 40] (stride (4, 80, 1, 400)) A = [20, 5, 4, 16] (stride (1, 80, 20, 400)) dim = 3 3.856 -> 3.855 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.67% +0.70%] index_add_ linear : Elapsed 0.039 ms (3.856 ms / 100) 3.706 -> 3.707 ( +0.03%) [ +0.00% +0.03% +0.00% / +0.03% +0.73% +0.86%] index_copy_ linear : Elapsed 0.037 ms (3.706 ms / 100) 3.831 -> 3.831 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.76% +0.76%] index_add_ reverse : Elapsed 0.038 ms (3.832 ms / 100) 3.690 -> 3.693 ( +0.08%) [ +0.03% +0.00% +0.03% / +0.08% +0.73% +0.70%] index_copy_ reverse : Elapsed 0.037 ms (3.691 ms / 100) 3.853 -> 3.855 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.67% +0.67%] index_add_ spread : Elapsed 0.039 ms (3.854 ms / 100) 3.699 -> 3.700 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.73% +0.84%] index_copy_ spread : Elapsed 0.037 ms (3.699 ms / 100) 3.846 -> 3.846 ( +0.00%) [ +0.00% +0.03% +0.00% / +0.00% +0.70% +0.68%] index_add_ strided 3 : Elapsed 0.038 ms (3.846 ms / 100) 3.700 -> 3.700 ( +0.00%) [ +0.03% +0.05% +0.00% / +0.00% +0.76% +0.70%] index_copy_ strided 3 : Elapsed 0.037 ms (3.701 ms / 100) 3.830 -> 3.832 ( +0.05%) [ +0.00% +0.03% +0.05% / +0.05% +0.78% +0.78%] index_add_ strided 7 : Elapsed 0.038 ms (3.830 ms / 100) 3.689 -> 3.691 ( +0.05%) [ +0.03% +0.00% +0.00% / +0.05% +0.84% +0.84%] index_copy_ strided 7 : Elapsed 0.037 ms (3.690 ms / 100) 3.855 -> 3.855 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.78% +0.75%] index_add_ perm : Elapsed 0.039 ms (3.855 ms / 100) 3.706 -> 3.705 ( -0.03%) [ +0.05% +0.00% +0.05% / -0.03% +0.78% +0.76%] index_copy_ perm : Elapsed 0.037 ms (3.708 ms / 100) 3.846 -> 3.846 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.73%] index_add_ perm_sorted : Elapsed 0.038 ms (3.846 ms / 100) 3.697 -> 3.698 ( +0.03%) [ +0.05% +0.00% +0.08% / +0.03% +0.78% +0.89%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.699 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.00% +0.18% +0.16% / +0.11% +0.04% +0.13%] index_select const : Elapsed 0.055 ms (5.476 ms / 100) 5.486 -> 5.479 ( -0.13%) [ +0.09% +0.00% +0.04% / +0.11% -0.05% -0.13%] index_select wrap : Elapsed 0.055 ms (5.491 ms / 100) 5.483 -> 5.487 ( +0.07%) [ +0.07% +0.20% +0.00% / +0.15% +0.07% +0.16%] index_select linear : Elapsed 0.055 ms (5.487 ms / 100) 5.482 -> 5.483 ( +0.02%) [ +0.05% +0.00% +0.02% / +0.16% +0.07% +0.02%] index_select reverse : Elapsed 0.055 ms (5.485 ms / 100) 5.476 -> 5.482 ( +0.11%) [ +0.18% +0.00% +0.05% / +0.16% +0.11% +0.18%] index_select skip64 : Elapsed 0.055 ms (5.486 ms / 100) 5.473 -> 5.475 ( +0.04%) [ +0.24% +0.07% +0.00% / +0.04% +0.16% +0.27%] index_select skip256 : Elapsed 0.055 ms (5.486 ms / 100) 5.482 -> 5.484 ( +0.04%) [ +0.22% +0.00% +0.04% / +0.04% +0.09% +0.09%] index_select spread : Elapsed 0.055 ms (5.494 ms / 100) 5.480 -> 5.482 ( +0.04%) [ +0.22% +0.00% +0.15% / +0.09% +0.04% +0.26%] index_select strided 3 : Elapsed 0.055 ms (5.492 ms / 100) 5.482 -> 5.484 ( +0.04%) [ +0.05% +0.00% +0.16% / +0.13% +0.13% +0.04%] index_select strided 5 : Elapsed 0.055 ms (5.485 ms / 100) 5.483 -> 5.485 ( +0.04%) [ +0.04% +0.11% +0.00% / +0.04% +0.07% +0.09%] index_select strided 7 : Elapsed 0.055 ms (5.485 ms / 100) 5.477 -> 5.480 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.15% +0.26%] index_select strided 8 : Elapsed 0.055 ms (5.477 ms / 100) 5.478 -> 5.483 ( +0.09%) [ +0.00% +0.04% +0.07% / +0.09% +0.18% +0.11%] index_select random : Elapsed 0.055 ms (5.478 ms / 100) 5.477 -> 5.479 ( +0.04%) [ +0.00% +0.11% +0.04% / +0.16% +0.09% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.477 ms / 100) B = [20, 5, 4, 40] (stride (1, 80, 20, 400)) A = [20, 5, 4, 16] (stride (320, 64, 16, 1)) dim = 3 3.252 -> 3.254 ( +0.06%) [ +0.06% +0.03% +0.00% / +0.06% +0.40% +0.37%] index_add_ linear : Elapsed 0.033 ms (3.254 ms / 100) 3.100 -> 3.100 ( +0.00%) [ +0.13% +0.03% +0.00% / +0.00% +0.32% +0.65%] index_copy_ linear : Elapsed 0.031 ms (3.104 ms / 100) 3.267 -> 3.268 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.49% +0.46%] index_add_ reverse : Elapsed 0.033 ms (3.267 ms / 100) 3.120 -> 3.134 ( +0.45%) [ +0.10% +0.00% +0.38% / +0.54% +0.61% +0.45%] index_copy_ reverse : Elapsed 0.031 ms (3.123 ms / 100) 3.266 -> 3.264 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +0.55% +0.55%] index_add_ spread : Elapsed 0.033 ms (3.266 ms / 100) 3.117 -> 3.126 ( +0.29%) [ +0.22% +0.00% +0.19% / +0.29% +0.61% +0.77%] index_copy_ spread : Elapsed 0.031 ms (3.124 ms / 100) 3.276 -> 3.276 ( +0.00%) [ +0.00% +0.03% +0.03% / +0.00% +0.15% +0.12%] index_add_ strided 3 : Elapsed 0.033 ms (3.276 ms / 100) 3.129 -> 3.129 ( +0.00%) [ +0.22% +0.00% +0.06% / +0.29% +0.00% +0.26%] index_copy_ strided 3 : Elapsed 0.031 ms (3.136 ms / 100) 3.270 -> 3.272 ( +0.06%) [ +0.00% +0.03% +0.06% / +0.06% +0.55% +0.52%] index_add_ strided 7 : Elapsed 0.033 ms (3.270 ms / 100) 3.129 -> 3.133 ( +0.13%) [ +0.29% +0.00% +0.22% / +0.13% +0.32% +0.22%] index_copy_ strided 7 : Elapsed 0.031 ms (3.138 ms / 100) 3.255 -> 3.254 ( -0.03%) [ +0.00% +0.00% +0.03% / -0.03% +0.09% +0.12%] index_add_ perm : Elapsed 0.033 ms (3.255 ms / 100) 3.102 -> 3.103 ( +0.03%) [ +0.06% +0.00% +0.06% / +0.03% +0.06% +0.10%] index_copy_ perm : Elapsed 0.031 ms (3.104 ms / 100) 3.256 -> 3.255 ( -0.03%) [ +0.03% +0.00% +0.06% / -0.03% +0.03% +0.06%] index_add_ perm_sorted : Elapsed 0.033 ms (3.257 ms / 100) 3.104 -> 3.104 ( +0.00%) [ +0.03% +0.00% +0.06% / +0.00% +0.10% +0.19%] index_copy_ perm_sorted : Elapsed 0.031 ms (3.105 ms / 100) 5.298 -> 5.298 ( +0.00%) [ +0.25% +0.09% +0.00% / +0.00% +0.26% +0.25%] index_select const : Elapsed 0.053 ms (5.311 ms / 100) 5.298 -> 5.302 ( +0.08%) [ +0.00% +0.19% +0.25% / +0.08% +0.15% +0.15%] index_select wrap : Elapsed 0.053 ms (5.298 ms / 100) 5.303 -> 5.303 ( +0.00%) [ +0.13% +0.00% +0.11% / +0.04% +0.00% +0.04%] index_select linear : Elapsed 0.053 ms (5.310 ms / 100) 5.305 -> 5.305 ( +0.00%) [ +0.08% +0.19% +0.00% / +0.00% +0.11% +0.06%] index_select reverse : Elapsed 0.053 ms (5.309 ms / 100) 5.306 -> 5.300 ( -0.11%) [ +0.09% +0.00% +0.00% / +0.00% -0.11% -0.08%] index_select skip64 : Elapsed 0.053 ms (5.311 ms / 100) 5.302 -> 5.298 ( -0.08%) [ +0.08% +0.08% +0.00% / +0.06% -0.08% -0.04%] index_select skip256 : Elapsed 0.053 ms (5.306 ms / 100) 5.305 -> 5.304 ( -0.02%) [ +0.02% +0.00% +0.08% / +0.15% -0.02% +0.06%] index_select spread : Elapsed 0.053 ms (5.306 ms / 100) 5.303 -> 5.305 ( +0.04%) [ +0.08% +0.00% +0.06% / +0.21% +0.17% +0.04%] index_select strided 3 : Elapsed 0.053 ms (5.307 ms / 100) 5.299 -> 5.301 ( +0.04%) [ +0.08% +0.00% +0.09% / +0.09% +0.13% +0.04%] index_select strided 5 : Elapsed 0.053 ms (5.303 ms / 100) 5.302 -> 5.301 ( -0.02%) [ +0.00% +0.17% +0.11% / -0.02% +0.15% +0.11%] index_select strided 7 : Elapsed 0.053 ms (5.302 ms / 100) 5.304 -> 5.305 ( +0.02%) [ +0.00% +0.00% +0.09% / +0.02% +0.02% +0.08%] index_select strided 8 : Elapsed 0.053 ms (5.304 ms / 100) 5.306 -> 5.305 ( -0.02%) [ +0.04% +0.09% +0.00% / +0.00% -0.02% +0.00%] index_select random : Elapsed 0.053 ms (5.308 ms / 100) 5.298 -> 5.303 ( +0.09%) [ +0.00% +0.19% +0.25% / +0.23% +0.09% +0.15%] index_select random_sorted : Elapsed 0.053 ms (5.298 ms / 100) out_shape = [40, 5, 16, 4] in_shape = [20, 5, 16, 4] idx_dim = 0 B = [40, 5, 16, 4] (stride (320, 64, 4, 1)) A = [20, 5, 16, 4] (stride (320, 1, 20, 5)) dim = 0 2.297 -> 2.308 ( +0.48%) [ +0.17% +0.00% +0.22% / +0.48% +0.70% +0.87%] index_add_ linear : Elapsed 0.023 ms (2.301 ms / 100) 2.298 -> 2.311 ( +0.57%) [ +0.04% +0.13% +0.00% / +0.57% +0.57% +1.22%] index_copy_ linear : Elapsed 0.023 ms (2.299 ms / 100) 2.300 -> 2.311 ( +0.48%) [ +0.00% +0.04% +0.17% / +0.48% +0.57% +1.13%] index_add_ reverse : Elapsed 0.023 ms (2.300 ms / 100) 2.297 -> 2.314 ( +0.74%) [ +0.00% +0.22% +0.17% / +0.74% +0.74% +3.53%] index_copy_ reverse : Elapsed 0.023 ms (2.297 ms / 100) 2.303 -> 2.312 ( +0.39%) [ +0.04% +0.00% +0.04% / +0.39% +0.48% +1.13%] index_add_ spread : Elapsed 0.023 ms (2.304 ms / 100) 2.299 -> 2.311 ( +0.52%) [ +0.00% +0.00% +0.00% / +0.65% +0.52% +2.04%] index_copy_ spread : Elapsed 0.023 ms (2.299 ms / 100) 2.302 -> 2.311 ( +0.39%) [ +0.00% +0.09% +0.26% / +0.52% +0.39% +0.39%] index_add_ strided 3 : Elapsed 0.023 ms (2.302 ms / 100) 2.298 -> 2.310 ( +0.52%) [ +0.09% +0.26% +0.00% / +0.52% +0.61% +0.61%] index_copy_ strided 3 : Elapsed 0.023 ms (2.300 ms / 100) 2.305 -> 2.311 ( +0.26%) [ +0.17% +0.22% +0.00% / +0.48% +0.26% +0.30%] index_add_ strided 7 : Elapsed 0.023 ms (2.309 ms / 100) 2.299 -> 2.310 ( +0.48%) [ +0.04% +0.00% +0.04% / +0.61% +0.48% +0.52%] index_copy_ strided 7 : Elapsed 0.023 ms (2.300 ms / 100) 2.297 -> 2.315 ( +0.78%) [ +0.22% +0.17% +0.00% / +0.78% +0.78% +1.18%] index_add_ perm : Elapsed 0.023 ms (2.302 ms / 100) 2.299 -> 2.311 ( +0.52%) [ +0.04% +0.13% +0.00% / +0.52% +0.70% +1.22%] index_copy_ perm : Elapsed 0.023 ms (2.300 ms / 100) 2.298 -> 2.312 ( +0.61%) [ +0.22% +0.00% +0.13% / +0.65% +0.61% +0.91%] index_add_ perm_sorted : Elapsed 0.023 ms (2.303 ms / 100) 2.293 -> 2.308 ( +0.65%) [ +0.00% +0.09% +0.31% / +0.65% +0.83% +1.00%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.293 ms / 100) 4.271 -> 4.273 ( +0.05%) [ +0.00% +0.00% +0.02% / +0.09% +0.05% +0.12%] index_select const : Elapsed 0.043 ms (4.271 ms / 100) 4.273 -> 4.270 ( -0.07%) [ +0.09% +0.00% +0.00% / -0.07% +0.07% +0.14%] index_select wrap : Elapsed 0.043 ms (4.277 ms / 100) 4.272 -> 4.274 ( +0.05%) [ +0.16% +0.00% +0.07% / +0.23% +0.19% +0.05%] index_select linear : Elapsed 0.043 ms (4.279 ms / 100) 4.272 -> 4.276 ( +0.09%) [ +0.09% +0.00% +0.23% / +0.12% +0.16% +0.09%] index_select reverse : Elapsed 0.043 ms (4.276 ms / 100) 4.268 -> 4.268 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.12% +0.02% +0.00%] index_select skip64 : Elapsed 0.043 ms (4.271 ms / 100) 4.268 -> 4.274 ( +0.14%) [ +0.00% +0.05% +0.00% / +0.19% +0.14% +0.30%] index_select skip256 : Elapsed 0.043 ms (4.268 ms / 100) 4.277 -> 4.274 ( -0.07%) [ +0.00% +0.14% +0.02% / -0.07% +0.19% -0.07%] index_select spread : Elapsed 0.043 ms (4.277 ms / 100) 4.271 -> 4.277 ( +0.14%) [ +0.26% +0.12% +0.00% / +0.14% +0.19% +0.28%] index_select strided 3 : Elapsed 0.043 ms (4.282 ms / 100) 4.273 -> 4.262 ( -0.26%) [ +0.02% +0.00% +0.12% / +0.14% -0.14% -0.26%] index_select strided 5 : Elapsed 0.043 ms (4.274 ms / 100) 4.274 -> 4.281 ( +0.16%) [ +0.12% +0.00% +0.19% / +0.19% +0.16% +0.19%] index_select strided 7 : Elapsed 0.043 ms (4.279 ms / 100) 4.267 -> 4.263 ( -0.09%) [ +0.19% +0.00% +0.07% / -0.02% +0.02% -0.09%] index_select strided 8 : Elapsed 0.043 ms (4.275 ms / 100) 4.267 -> 4.270 ( +0.07%) [ +0.00% +0.23% +0.14% / +0.07% +0.16% +0.14%] index_select strided 16 : Elapsed 0.043 ms (4.267 ms / 100) 4.276 -> 4.268 ( -0.19%) [ +0.00% +0.21% +0.28% / +0.02% -0.19% +0.02%] index_select random : Elapsed 0.043 ms (4.276 ms / 100) 4.275 -> 4.275 ( +0.00%) [ +0.14% +0.00% +0.05% / +0.09% +0.02% +0.00%] index_select random_sorted : Elapsed 0.043 ms (4.281 ms / 100) B = [40, 5, 16, 4] (stride (64, 2560, 1, 16)) A = [20, 5, 16, 4] (stride (1, 320, 20, 1600)) dim = 0 2.398 -> 2.417 ( +0.79%) [ +0.13% +0.13% +0.00% / +0.79% +0.83% +0.83%] index_add_ linear : Elapsed 0.024 ms (2.401 ms / 100) 2.397 -> 2.407 ( +0.42%) [ +0.00% +0.25% +0.13% / +0.42% +0.79% +0.75%] index_copy_ linear : Elapsed 0.024 ms (2.397 ms / 100) 2.393 -> 2.401 ( +0.33%) [ +0.00% +0.00% +0.00% / +0.33% +1.09% +1.46%] index_add_ reverse : Elapsed 0.024 ms (2.393 ms / 100) 2.392 -> 2.406 ( +0.59%) [ +0.13% +0.00% +0.42% / +0.59% +1.17% +1.84%] index_copy_ reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.397 -> 2.405 ( +0.33%) [ +0.00% +0.13% +0.08% / +0.33% +0.88% +1.00%] index_add_ spread : Elapsed 0.024 ms (2.397 ms / 100) 2.393 -> 2.406 ( +0.54%) [ +0.00% +0.08% +0.21% / +0.54% +1.17% +1.09%] index_copy_ spread : Elapsed 0.024 ms (2.393 ms / 100) 2.404 -> 2.416 ( +0.50%) [ +0.04% +0.08% +0.00% / +0.54% +0.58% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.405 ms / 100) 2.397 -> 2.409 ( +0.50%) [ +0.13% +0.00% +0.13% / +0.50% +0.63% +0.75%] index_copy_ strided 3 : Elapsed 0.024 ms (2.400 ms / 100) 2.403 -> 2.416 ( +0.54%) [ +0.04% +0.00% +0.12% / +0.71% +0.54% +0.71%] index_add_ strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.399 -> 2.414 ( +0.63%) [ +0.00% +0.08% +0.29% / +0.63% +0.71% +1.17%] index_copy_ strided 7 : Elapsed 0.024 ms (2.399 ms / 100) 2.405 -> 2.412 ( +0.29%) [ +0.00% +0.12% +0.04% / +0.58% +0.29% +0.37%] index_add_ perm : Elapsed 0.024 ms (2.405 ms / 100) 2.404 -> 2.409 ( +0.21%) [ +0.00% +0.04% +0.00% / +0.33% +0.21% +0.37%] index_copy_ perm : Elapsed 0.024 ms (2.404 ms / 100) 2.404 -> 2.407 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.58% +0.12% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) 2.399 -> 2.409 ( +0.42%) [ +0.25% +0.04% +0.00% / +0.54% +0.63% +0.42%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.405 ms / 100) 4.435 -> 4.432 ( -0.07%) [ +0.09% +0.14% +0.00% / +0.09% -0.07% +0.20%] index_select const : Elapsed 0.044 ms (4.439 ms / 100) 4.438 -> 4.444 ( +0.14%) [ +0.00% +0.02% +0.27% / +0.18% +0.14% +0.23%] index_select wrap : Elapsed 0.044 ms (4.438 ms / 100) 4.440 -> 4.440 ( +0.00%) [ +0.02% +0.00% +0.11% / +0.00% +0.09% +0.16%] index_select linear : Elapsed 0.044 ms (4.441 ms / 100) 4.447 -> 4.438 ( -0.20%) [ +0.00% +0.07% +0.02% / -0.02% -0.20% +0.02%] index_select reverse : Elapsed 0.044 ms (4.447 ms / 100) 4.435 -> 4.440 ( +0.11%) [ +0.00% +0.05% +0.02% / +0.14% +0.11% +0.11%] index_select skip64 : Elapsed 0.044 ms (4.435 ms / 100) 4.433 -> 4.435 ( +0.05%) [ +0.00% +0.09% +0.16% / +0.05% +0.20% +0.16%] index_select skip256 : Elapsed 0.044 ms (4.433 ms / 100) 4.438 -> 4.444 ( +0.14%) [ +0.00% +0.07% +0.11% / +0.14% +0.18% +0.27%] index_select spread : Elapsed 0.044 ms (4.438 ms / 100) 4.440 -> 4.439 ( -0.02%) [ +0.00% +0.05% +0.11% / -0.02% +0.07% +0.38%] index_select strided 3 : Elapsed 0.044 ms (4.440 ms / 100) 4.439 -> 4.441 ( +0.05%) [ +0.00% +0.11% +0.27% / +0.05% +0.34% +0.27%] index_select strided 5 : Elapsed 0.044 ms (4.439 ms / 100) 4.436 -> 4.439 ( +0.07%) [ +0.00% +0.07% +0.14% / +0.07% +0.32% +0.47%] index_select strided 7 : Elapsed 0.044 ms (4.436 ms / 100) 4.437 -> 4.442 ( +0.11%) [ +0.25% +0.00% +0.16% / +0.11% +0.11% +0.32%] index_select strided 8 : Elapsed 0.044 ms (4.448 ms / 100) 4.435 -> 4.442 ( +0.16%) [ +0.20% +0.18% +0.00% / +0.16% +0.18% +0.43%] index_select strided 16 : Elapsed 0.044 ms (4.444 ms / 100) 4.442 -> 4.443 ( +0.02%) [ +0.20% +0.00% +0.05% / +0.09% +0.02% +0.02%] index_select random : Elapsed 0.045 ms (4.451 ms / 100) 4.444 -> 4.441 ( -0.07%) [ +0.00% +0.05% +0.18% / +0.23% -0.07% +0.05%] index_select random_sorted : Elapsed 0.044 ms (4.444 ms / 100) B = [40, 5, 16, 4] (stride (4, 2560, 160, 1)) A = [20, 5, 16, 4] (stride (1, 20, 100, 1600)) dim = 0 2.398 -> 2.410 ( +0.50%) [ +0.04% +0.13% +0.00% / +0.50% +0.71% +1.38%] index_add_ linear : Elapsed 0.024 ms (2.399 ms / 100) 2.391 -> 2.405 ( +0.59%) [ +0.29% +0.13% +0.00% / +0.59% +0.92% +2.22%] index_copy_ linear : Elapsed 0.024 ms (2.398 ms / 100) 2.402 -> 2.411 ( +0.37%) [ +0.00% +0.29% +0.00% / +0.50% +0.37% +0.50%] index_add_ reverse : Elapsed 0.024 ms (2.402 ms / 100) 2.393 -> 2.405 ( +0.50%) [ +0.08% +0.25% +0.00% / +0.50% +0.71% +0.92%] index_copy_ reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.418 -> 2.428 ( +0.41%) [ +0.12% +0.17% +0.00% / +0.79% +0.41% +0.91%] index_add_ spread : Elapsed 0.024 ms (2.421 ms / 100) 2.426 -> 2.433 ( +0.29%) [ +0.00% +0.12% +0.08% / +0.29% +0.74% +0.91%] index_copy_ spread : Elapsed 0.024 ms (2.426 ms / 100) 2.418 -> 2.427 ( +0.37%) [ +0.00% +0.25% +0.08% / +0.41% +0.37% +0.87%] index_add_ strided 3 : Elapsed 0.024 ms (2.418 ms / 100) 2.419 -> 2.429 ( +0.41%) [ +0.17% +0.08% +0.00% / +0.41% +0.62% +2.94%] index_copy_ strided 3 : Elapsed 0.024 ms (2.423 ms / 100) 2.419 -> 2.431 ( +0.50%) [ +0.04% +0.08% +0.00% / +0.50% +0.54% +0.62%] index_add_ strided 7 : Elapsed 0.024 ms (2.420 ms / 100) 2.425 -> 2.437 ( +0.49%) [ +0.00% +0.16% +0.00% / +0.49% +0.49% +0.78%] index_copy_ strided 7 : Elapsed 0.024 ms (2.425 ms / 100) 2.413 -> 2.427 ( +0.58%) [ +0.00% +0.04% +0.08% / +0.66% +0.58% +0.66%] index_add_ perm : Elapsed 0.024 ms (2.413 ms / 100) 2.413 -> 2.426 ( +0.54%) [ +0.04% +0.04% +0.00% / +0.54% +0.79% +1.12%] index_copy_ perm : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.424 ( +0.58%) [ +0.12% +0.25% +0.00% / +0.79% +0.58% +1.00%] index_add_ perm_sorted : Elapsed 0.024 ms (2.413 ms / 100) 2.411 -> 2.423 ( +0.50%) [ +0.17% +0.00% +0.08% / +0.50% +0.83% +2.53%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.415 ms / 100) 4.415 -> 4.422 ( +0.16%) [ +0.16% +0.02% +0.00% / +0.16% +0.25% +0.41%] index_select const : Elapsed 0.044 ms (4.422 ms / 100) 4.419 -> 4.425 ( +0.14%) [ +0.27% +0.00% +0.05% / +0.20% +0.14% +0.20%] index_select wrap : Elapsed 0.044 ms (4.431 ms / 100) 4.425 -> 4.424 ( -0.02%) [ +0.00% +0.05% +0.02% / -0.02% +0.11% +0.41%] index_select linear : Elapsed 0.044 ms (4.425 ms / 100) 4.427 -> 4.420 ( -0.16%) [ +0.07% +0.02% +0.00% / -0.16% +0.25% +0.29%] index_select reverse : Elapsed 0.044 ms (4.430 ms / 100) 4.413 -> 4.421 ( +0.18%) [ +0.00% +0.05% +0.07% / +0.20% +0.25% +0.18%] index_select skip64 : Elapsed 0.044 ms (4.413 ms / 100) 4.421 -> 4.422 ( +0.02%) [ +0.07% +0.00% +0.09% / +0.02% +0.05% +0.16%] index_select skip256 : Elapsed 0.044 ms (4.424 ms / 100) 4.426 -> 4.422 ( -0.09%) [ +0.00% +0.00% +0.05% / -0.09% -0.02% +0.23%] index_select spread : Elapsed 0.044 ms (4.426 ms / 100) 4.420 -> 4.423 ( +0.07%) [ +0.18% +0.00% +0.07% / +0.20% +0.16% +0.07%] index_select strided 3 : Elapsed 0.044 ms (4.428 ms / 100) 4.420 -> 4.427 ( +0.16%) [ +0.05% +0.00% +0.23% / +0.16% +0.16% +0.27%] index_select strided 5 : Elapsed 0.044 ms (4.422 ms / 100) 4.421 -> 4.424 ( +0.07%) [ +0.00% +0.07% +0.02% / +0.07% +0.14% +0.34%] index_select strided 7 : Elapsed 0.044 ms (4.421 ms / 100) 4.422 -> 4.424 ( +0.05%) [ +0.00% +0.11% +0.11% / +0.05% +0.23% +0.05%] index_select strided 8 : Elapsed 0.044 ms (4.422 ms / 100) 4.416 -> 4.414 ( -0.05%) [ +0.05% +0.11% +0.00% / -0.05% +0.23% +0.34%] index_select strided 16 : Elapsed 0.044 ms (4.418 ms / 100) 4.418 -> 4.420 ( +0.05%) [ +0.14% +0.11% +0.00% / +0.05% +0.16% +0.45%] index_select random : Elapsed 0.044 ms (4.424 ms / 100) 4.419 -> 4.430 ( +0.25%) [ +0.18% +0.00% +0.14% / +0.25% +0.29% +0.54%] index_select random_sorted : Elapsed 0.044 ms (4.427 ms / 100) B = [40, 5, 16, 4] (stride (5, 1, 800, 200)) A = [20, 5, 16, 4] (stride (80, 16, 1, 1600)) dim = 0 1.539 -> 1.507 ( -2.08%) [ +0.00% +0.13% +0.00% / -2.08% -1.17% -0.97%] index_add_ linear : Elapsed 0.015 ms (1.539 ms / 100) 1.503 -> 1.469 ( -2.26%) [ +0.07% +0.07% +0.00% / -2.26% -1.26% -1.20%] index_copy_ linear : Elapsed 0.015 ms (1.504 ms / 100) 1.540 -> 1.514 ( -1.69%) [ +0.32% +0.00% +0.13% / -1.69% -1.43% -0.97%] index_add_ reverse : Elapsed 0.015 ms (1.545 ms / 100) 1.503 -> 1.473 ( -2.00%) [ +0.00% +0.20% +0.33% / -2.00% -1.46% -1.06%] index_copy_ reverse : Elapsed 0.015 ms (1.503 ms / 100) 1.556 -> 1.527 ( -1.86%) [ +0.00% +0.06% +0.00% / -1.86% -1.61% -0.96%] index_add_ spread : Elapsed 0.016 ms (1.556 ms / 100) 1.526 -> 1.494 ( -2.10%) [ +0.20% +0.00% +0.07% / -2.10% -1.31% -0.07%] index_copy_ spread : Elapsed 0.015 ms (1.529 ms / 100) 1.561 -> 1.526 ( -2.24%) [ +0.00% +0.00% +0.06% / -2.24% -1.73% -1.35%] index_add_ strided 3 : Elapsed 0.016 ms (1.561 ms / 100) 1.526 -> 1.496 ( -1.97%) [ +0.00% +0.00% +0.07% / -1.97% -0.92% -0.07%] index_copy_ strided 3 : Elapsed 0.015 ms (1.526 ms / 100) 1.563 -> 1.534 ( -1.86%) [ +0.13% +0.19% +0.00% / -1.86% -1.79% -1.60%] index_add_ strided 7 : Elapsed 0.016 ms (1.565 ms / 100) 1.529 -> 1.500 ( -1.90%) [ +0.00% +0.13% +0.33% / -1.90% -1.24% -1.05%] index_copy_ strided 7 : Elapsed 0.015 ms (1.529 ms / 100) 1.549 -> 1.521 ( -1.81%) [ +0.00% +0.32% +0.13% / -1.81% -1.23% -1.23%] index_add_ perm : Elapsed 0.015 ms (1.549 ms / 100) 1.520 -> 1.489 ( -2.04%) [ +0.00% +0.20% +0.00% / -2.04% -1.25% -0.53%] index_copy_ perm : Elapsed 0.015 ms (1.520 ms / 100) 1.548 -> 1.522 ( -1.68%) [ +0.32% +0.06% +0.00% / -1.68% -1.29% -1.36%] index_add_ perm_sorted : Elapsed 0.016 ms (1.553 ms / 100) 1.516 -> 1.484 ( -2.11%) [ +0.00% +0.13% +0.33% / -2.11% -1.25% -0.86%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.516 ms / 100) 2.879 -> 2.885 ( +0.21%) [ +0.17% +0.24% +0.00% / +0.21% +0.24% +0.45%] index_select const : Elapsed 0.029 ms (2.884 ms / 100) 2.893 -> 2.898 ( +0.17%) [ +0.17% +0.00% +0.03% / +0.17% +0.59% +0.97%] index_select wrap : Elapsed 0.029 ms (2.898 ms / 100) 2.893 -> 2.896 ( +0.10%) [ +0.28% +0.10% +0.00% / +0.10% +0.62% +0.66%] index_select linear : Elapsed 0.029 ms (2.901 ms / 100) 2.887 -> 2.895 ( +0.28%) [ +0.24% +0.00% +0.28% / +0.28% +0.28% +0.38%] index_select reverse : Elapsed 0.029 ms (2.894 ms / 100) 2.880 -> 2.879 ( -0.03%) [ +0.00% +0.21% +0.17% / -0.03% +0.24% +0.45%] index_select skip64 : Elapsed 0.029 ms (2.880 ms / 100) 2.902 -> 2.887 ( -0.52%) [ +0.21% +0.07% +0.00% / -0.07% -0.52% -0.17%] index_select skip256 : Elapsed 0.029 ms (2.908 ms / 100) 2.890 -> 2.884 ( -0.21%) [ +0.00% +0.07% +0.00% / -0.21% -0.07% +0.31%] index_select spread : Elapsed 0.029 ms (2.890 ms / 100) 2.890 -> 2.892 ( +0.07%) [ +0.07% +0.17% +0.00% / +0.10% +0.07% +0.10%] index_select strided 3 : Elapsed 0.029 ms (2.892 ms / 100) 2.883 -> 2.882 ( -0.03%) [ +0.00% +0.00% +0.07% / -0.03% +0.21% +0.38%] index_select strided 5 : Elapsed 0.029 ms (2.883 ms / 100) 2.891 -> 2.884 ( -0.24%) [ +0.24% +0.00% +0.14% / +0.03% -0.24% +0.10%] index_select strided 7 : Elapsed 0.029 ms (2.898 ms / 100) 2.903 -> 2.891 ( -0.41%) [ +0.00% +0.17% +0.17% / +0.17% -0.41% -0.31%] index_select strided 8 : Elapsed 0.029 ms (2.903 ms / 100) 2.899 -> 2.889 ( -0.34%) [ +0.10% +0.31% +0.00% / +0.38% -0.34% -0.28%] index_select strided 16 : Elapsed 0.029 ms (2.902 ms / 100) 2.890 -> 2.892 ( +0.07%) [ +0.07% +0.00% +0.31% / +0.07% +0.45% +0.28%] index_select random : Elapsed 0.029 ms (2.892 ms / 100) 2.884 -> 2.886 ( +0.07%) [ +0.00% +0.14% +0.45% / +0.07% +0.24% +0.28%] index_select random_sorted : Elapsed 0.029 ms (2.884 ms / 100) out_shape = [20, 40, 16, 4] in_shape = [20, 5, 16, 4] idx_dim = 1 B = [20, 40, 16, 4] (stride (2560, 16, 1, 640)) A = [20, 5, 16, 4] (stride (64, 1280, 1, 16)) dim = 1 1.522 -> 1.522 ( +0.00%) [ +0.13% +0.07% +0.00% / +0.00% +0.39% +0.99%] index_add_ linear : Elapsed 0.015 ms (1.524 ms / 100) 1.478 -> 1.477 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.27% +1.15%] index_copy_ linear : Elapsed 0.015 ms (1.478 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.39% +0.53%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.34% +0.47%] index_copy_ reverse : Elapsed 0.015 ms (1.478 ms / 100) 1.522 -> 1.521 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.39% +0.46%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.47% +0.68%] index_copy_ spread : Elapsed 0.015 ms (1.477 ms / 100) 1.522 -> 1.521 ( -0.07%) [ +0.07% +0.00% +0.13% / -0.07% +0.46% +0.53%] index_add_ strided 3 : Elapsed 0.015 ms (1.523 ms / 100) 1.476 -> 1.477 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.47% +0.61%] index_copy_ strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.522 -> 1.521 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.46% +0.85%] index_add_ strided 7 : Elapsed 0.015 ms (1.522 ms / 100) 1.476 -> 1.475 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.47% +1.56%] index_copy_ strided 7 : Elapsed 0.015 ms (1.476 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.46% +0.72%] index_add_ perm : Elapsed 0.015 ms (1.522 ms / 100) 1.476 -> 1.475 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.47% +0.75%] index_copy_ perm : Elapsed 0.015 ms (1.476 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.39% +0.59%] index_add_ perm_sorted : Elapsed 0.015 ms (1.522 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.47% +0.54%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) 8.519 -> 8.531 ( +0.14%) [ +0.21% +0.28% +0.00% / +0.20% +0.15% +0.14%] index_select const : Elapsed 0.085 ms (8.537 ms / 100) 8.541 -> 8.569 ( +0.33%) [ +0.32% +0.00% +0.35% / +0.33% +0.42% +0.34%] index_select wrap : Elapsed 0.086 ms (8.568 ms / 100) 8.536 -> 8.550 ( +0.16%) [ +0.00% +0.25% +0.06% / +0.21% +0.16% +0.45%] index_select linear : Elapsed 0.085 ms (8.536 ms / 100) 8.544 -> 8.541 ( -0.04%) [ +0.11% +0.00% +0.01% / +0.19% -0.04% +0.20%] index_select reverse : Elapsed 0.086 ms (8.553 ms / 100) 8.523 -> 8.534 ( +0.13%) [ +0.02% +0.00% +0.16% / +0.15% +0.13% +0.41%] index_select skip64 : Elapsed 0.085 ms (8.525 ms / 100) 8.520 -> 8.527 ( +0.08%) [ +0.00% +0.32% +0.12% / +0.08% +0.08% +0.33%] index_select skip256 : Elapsed 0.085 ms (8.520 ms / 100) 8.541 -> 8.545 ( +0.05%) [ +0.08% +0.00% +0.05% / +0.30% +0.05% +0.21%] index_select spread : Elapsed 0.085 ms (8.548 ms / 100) 8.541 -> 8.537 ( -0.05%) [ +0.00% +0.15% +0.14% / -0.05% +0.25% +0.20%] index_select strided 3 : Elapsed 0.085 ms (8.541 ms / 100) 8.544 -> 8.546 ( +0.02%) [ +0.00% +0.12% +0.02% / +0.02% +0.02% +0.06%] index_select random : Elapsed 0.085 ms (8.544 ms / 100) 8.550 -> 8.545 ( -0.06%) [ +0.00% +0.16% +0.00% / -0.06% -0.01% +0.12%] index_select random_sorted : Elapsed 0.085 ms (8.550 ms / 100) B = [20, 40, 16, 4] (stride (2560, 1, 40, 640)) A = [20, 5, 16, 4] (stride (320, 16, 1, 80)) dim = 1 1.518 -> 1.519 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.72% +0.79%] index_add_ linear : Elapsed 0.015 ms (1.520 ms / 100) 1.475 -> 1.477 ( +0.14%) [ +0.41% +0.14% +0.00% / +0.14% +0.61% +0.75%] index_copy_ linear : Elapsed 0.015 ms (1.481 ms / 100) 1.520 -> 1.521 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.59% +0.72%] index_add_ reverse : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.480 ( +0.20%) [ +0.00% +0.00% +0.00% / +0.20% +0.47% +0.61%] index_copy_ reverse : Elapsed 0.015 ms (1.477 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.59% +1.25%] index_add_ spread : Elapsed 0.015 ms (1.522 ms / 100) 1.476 -> 1.480 ( +0.27%) [ +0.14% +0.07% +0.00% / +0.27% +0.75% +2.51%] index_copy_ spread : Elapsed 0.015 ms (1.478 ms / 100) 1.519 -> 1.520 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.79% +1.65%] index_add_ strided 3 : Elapsed 0.015 ms (1.521 ms / 100) 1.477 -> 1.479 ( +0.14%) [ +0.00% +0.07% +0.00% / +0.14% +0.54% +1.96%] index_copy_ strided 3 : Elapsed 0.015 ms (1.477 ms / 100) 1.520 -> 1.520 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.66% +1.05%] index_add_ strided 7 : Elapsed 0.015 ms (1.521 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.61% +0.81%] index_copy_ strided 7 : Elapsed 0.015 ms (1.477 ms / 100) 1.520 -> 1.521 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.79% +0.72%] index_add_ perm : Elapsed 0.015 ms (1.522 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.88% +0.75%] index_copy_ perm : Elapsed 0.015 ms (1.477 ms / 100) 1.521 -> 1.520 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.59% +0.85%] index_add_ perm_sorted : Elapsed 0.015 ms (1.521 ms / 100) 1.476 -> 1.476 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.61% +1.29%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.476 ms / 100) 8.561 -> 8.572 ( +0.13%) [ +0.08% +0.40% +0.00% / +0.13% +0.44% +0.47%] index_select const : Elapsed 0.086 ms (8.568 ms / 100) 8.585 -> 8.578 ( -0.08%) [ +0.00% +0.08% +0.07% / -0.08% +0.10% +0.30%] index_select wrap : Elapsed 0.086 ms (8.585 ms / 100) 8.587 -> 8.571 ( -0.19%) [ +0.00% +0.03% +0.21% / -0.19% +0.47% +0.08%] index_select linear : Elapsed 0.086 ms (8.587 ms / 100) 8.588 -> 8.591 ( +0.03%) [ +0.00% +0.00% +0.02% / +0.03% +0.22% +0.27%] index_select reverse : Elapsed 0.086 ms (8.588 ms / 100) 8.567 -> 8.568 ( +0.01%) [ +0.00% +0.01% +0.06% / +0.01% +0.47% +0.23%] index_select skip64 : Elapsed 0.086 ms (8.567 ms / 100) 8.567 -> 8.572 ( +0.06%) [ +0.02% +0.00% +0.04% / +0.06% +0.40% +0.14%] index_select skip256 : Elapsed 0.086 ms (8.569 ms / 100) 8.584 -> 8.575 ( -0.10%) [ +0.22% +0.00% +0.08% / -0.10% +0.29% +0.28%] index_select spread : Elapsed 0.086 ms (8.603 ms / 100) 8.575 -> 8.588 ( +0.15%) [ +0.17% +0.00% +0.13% / +0.15% +0.42% +0.45%] index_select strided 3 : Elapsed 0.086 ms (8.590 ms / 100) 8.588 -> 8.593 ( +0.06%) [ +0.03% +0.00% +0.06% / +0.06% +0.14% +0.52%] index_select random : Elapsed 0.086 ms (8.591 ms / 100) 8.575 -> 8.606 ( +0.36%) [ +0.00% +0.23% +0.16% / +0.36% +0.50% +0.42%] index_select random_sorted : Elapsed 0.086 ms (8.575 ms / 100) B = [20, 40, 16, 4] (stride (2560, 1, 40, 640)) A = [20, 5, 16, 4] (stride (64, 1280, 1, 16)) dim = 1 1.523 -> 1.523 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.39% +0.59%] index_add_ linear : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.34% +0.95%] index_copy_ linear : Elapsed 0.015 ms (1.480 ms / 100) 1.522 -> 1.521 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.39% +0.99%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.479 ( +0.00%) [ +0.00% +0.14% +0.00% / +0.00% +0.27% +1.08%] index_copy_ reverse : Elapsed 0.015 ms (1.479 ms / 100) 1.522 -> 1.522 ( +0.00%) [ +0.07% +0.20% +0.00% / +0.00% +0.33% +0.39%] index_add_ spread : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.477 ( -0.14%) [ +0.00% +0.07% +0.07% / -0.14% +0.34% +0.34%] index_copy_ spread : Elapsed 0.015 ms (1.479 ms / 100) 1.522 -> 1.521 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.46% +0.53%] index_add_ strided 3 : Elapsed 0.015 ms (1.523 ms / 100) 1.478 -> 1.477 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.41% +0.47%] index_copy_ strided 3 : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.522 ( +0.07%) [ +0.13% +0.13% +0.00% / +0.07% +0.53% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.523 ms / 100) 1.479 -> 1.478 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.41% +0.54%] index_copy_ strided 7 : Elapsed 0.015 ms (1.479 ms / 100) 1.521 -> 1.521 ( +0.00%) [ +0.07% +0.13% +0.00% / +0.00% +0.53% +1.05%] index_add_ perm : Elapsed 0.015 ms (1.522 ms / 100) 1.478 -> 1.477 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.41% +1.29%] index_copy_ perm : Elapsed 0.015 ms (1.478 ms / 100) 1.520 -> 1.522 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.59% +0.72%] index_add_ perm_sorted : Elapsed 0.015 ms (1.522 ms / 100) 1.477 -> 1.477 ( +0.00%) [ +0.14% +0.14% +0.00% / +0.00% +0.47% +1.02%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.479 ms / 100) 8.546 -> 8.548 ( +0.02%) [ +0.12% +0.15% +0.00% / +0.02% +0.73% +0.23%] index_select const : Elapsed 0.086 ms (8.556 ms / 100) 8.576 -> 8.577 ( +0.01%) [ +0.00% +0.28% +0.23% / +0.06% +0.01% +0.86%] index_select wrap : Elapsed 0.086 ms (8.576 ms / 100) 8.579 -> 8.577 ( -0.02%) [ +0.00% +0.03% +0.02% / -0.02% +0.00% +0.23%] index_select linear : Elapsed 0.086 ms (8.579 ms / 100) 8.567 -> 8.590 ( +0.27%) [ +0.00% +0.18% +0.16% / +0.27% +0.33% +0.40%] index_select reverse : Elapsed 0.086 ms (8.567 ms / 100) 8.560 -> 8.567 ( +0.08%) [ +0.02% +0.02% +0.00% / +0.18% +0.11% +0.08%] index_select skip64 : Elapsed 0.086 ms (8.562 ms / 100) 8.559 -> 8.569 ( +0.12%) [ +0.00% +0.39% +0.02% / +0.13% +0.12% +0.33%] index_select skip256 : Elapsed 0.086 ms (8.559 ms / 100) 8.550 -> 8.583 ( +0.39%) [ +0.34% +0.25% +0.00% / +0.41% +0.39% +0.56%] index_select spread : Elapsed 0.086 ms (8.579 ms / 100) 8.576 -> 8.589 ( +0.15%) [ +0.00% +0.12% +0.42% / +0.26% +0.15% +0.52%] index_select strided 3 : Elapsed 0.086 ms (8.576 ms / 100) 8.575 -> 8.580 ( +0.06%) [ +0.09% +0.00% +0.24% / +0.06% +0.41% +0.34%] index_select random : Elapsed 0.086 ms (8.583 ms / 100) 8.556 -> 8.569 ( +0.15%) [ +0.36% +0.08% +0.00% / +0.15% +0.37% +0.54%] index_select random_sorted : Elapsed 0.086 ms (8.587 ms / 100) B = [20, 40, 16, 4] (stride (64, 1280, 4, 1)) A = [20, 5, 16, 4] (stride (1, 1280, 20, 320)) dim = 1 1.497 -> 1.499 ( +0.13%) [ +0.00% +0.27% +0.00% / +0.13% +0.73% +0.87%] index_add_ linear : Elapsed 0.015 ms (1.497 ms / 100) 1.445 -> 1.446 ( +0.07%) [ +0.00% +0.14% +0.14% / +0.07% +0.62% +0.83%] index_copy_ linear : Elapsed 0.014 ms (1.445 ms / 100) 1.497 -> 1.499 ( +0.13%) [ +0.07% +0.13% +0.00% / +0.13% +0.73% +0.80%] index_add_ reverse : Elapsed 0.015 ms (1.498 ms / 100) 1.445 -> 1.447 ( +0.14%) [ +0.00% +0.14% +0.07% / +0.14% +0.62% +0.62%] index_copy_ reverse : Elapsed 0.014 ms (1.445 ms / 100) 1.497 -> 1.498 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.80% +0.73%] index_add_ spread : Elapsed 0.015 ms (1.498 ms / 100) 1.447 -> 1.447 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.97%] index_copy_ spread : Elapsed 0.014 ms (1.447 ms / 100) 1.497 -> 1.497 ( +0.00%) [ +0.00% +0.13% +0.07% / +0.00% +0.73% +0.87%] index_add_ strided 3 : Elapsed 0.015 ms (1.497 ms / 100) 1.446 -> 1.444 ( -0.14%) [ +0.00% +0.00% +0.07% / -0.14% +0.48% +1.11%] index_copy_ strided 3 : Elapsed 0.014 ms (1.446 ms / 100) 1.510 -> 1.511 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.60% +0.66%] index_add_ strided 7 : Elapsed 0.015 ms (1.511 ms / 100) 1.456 -> 1.456 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.69% +0.82%] index_copy_ strided 7 : Elapsed 0.015 ms (1.456 ms / 100) 1.509 -> 1.510 ( +0.07%) [ +0.07% +0.40% +0.00% / +0.07% +0.66% +0.73%] index_add_ perm : Elapsed 0.015 ms (1.510 ms / 100) 1.456 -> 1.456 ( +0.00%) [ +0.27% +0.07% +0.00% / +0.00% +0.69% +0.69%] index_copy_ perm : Elapsed 0.015 ms (1.460 ms / 100) 1.496 -> 1.496 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.80% +1.00%] index_add_ perm_sorted : Elapsed 0.015 ms (1.497 ms / 100) 1.445 -> 1.444 ( -0.07%) [ +0.00% +0.00% +0.14% / -0.07% +0.62% +0.83%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.445 ms / 100) 8.185 -> 8.198 ( +0.16%) [ +0.01% +0.01% +0.00% / +0.28% +0.18% +0.16%] index_select const : Elapsed 0.082 ms (8.186 ms / 100) 8.188 -> 8.197 ( +0.11%) [ +0.00% +0.01% +0.06% / +0.11% +0.42% +0.62%] index_select wrap : Elapsed 0.082 ms (8.188 ms / 100) 8.187 -> 8.194 ( +0.09%) [ +0.27% +0.00% +0.12% / +0.09% +0.20% +0.33%] index_select linear : Elapsed 0.082 ms (8.209 ms / 100) 8.178 -> 8.196 ( +0.22%) [ +0.07% +0.00% +0.24% / +0.22% +0.54% +0.45%] index_select reverse : Elapsed 0.082 ms (8.184 ms / 100) 8.184 -> 8.173 ( -0.13%) [ +0.06% +0.00% +0.18% / -0.13% +0.06% +0.65%] index_select skip64 : Elapsed 0.082 ms (8.189 ms / 100) 8.192 -> 8.198 ( +0.07%) [ +0.00% +0.07% +0.06% / +0.09% +0.16% +0.07%] index_select skip256 : Elapsed 0.082 ms (8.192 ms / 100) 8.197 -> 8.190 ( -0.09%) [ +0.15% +0.00% +0.28% / -0.09% +0.50% +0.56%] index_select spread : Elapsed 0.082 ms (8.209 ms / 100) 8.182 -> 8.193 ( +0.13%) [ +0.00% +0.27% +0.15% / +0.13% +0.28% +0.50%] index_select strided 3 : Elapsed 0.082 ms (8.182 ms / 100) 8.185 -> 8.182 ( -0.04%) [ +0.00% +0.31% +0.02% / -0.04% +0.37% +0.53%] index_select random : Elapsed 0.082 ms (8.185 ms / 100) 8.185 -> 8.206 ( +0.26%) [ +0.15% +0.06% +0.00% / +0.26% +0.59% +0.61%] index_select random_sorted : Elapsed 0.082 ms (8.197 ms / 100) B = [20, 40, 16, 4] (stride (160, 4, 3200, 1)) A = [20, 5, 16, 4] (stride (1, 80, 400, 20)) dim = 1 1.623 -> 1.622 ( -0.06%) [ +0.00% +0.00% +0.00% / -0.06% +0.37% +0.99%] index_add_ linear : Elapsed 0.016 ms (1.623 ms / 100) 1.573 -> 1.571 ( -0.13%) [ +0.06% +0.00% +0.13% / -0.13% +0.32% +0.45%] index_copy_ linear : Elapsed 0.016 ms (1.574 ms / 100) 1.616 -> 1.618 ( +0.12%) [ +0.19% +0.00% +0.00% / +0.12% +0.50% +0.62%] index_add_ reverse : Elapsed 0.016 ms (1.619 ms / 100) 1.572 -> 1.574 ( +0.13%) [ +0.38% +0.00% +0.06% / +0.19% +0.13% +0.32%] index_copy_ reverse : Elapsed 0.016 ms (1.578 ms / 100) 1.617 -> 1.617 ( +0.00%) [ +0.12% +0.06% +0.00% / +0.00% +0.49% +0.62%] index_add_ spread : Elapsed 0.016 ms (1.619 ms / 100) 1.571 -> 1.572 ( +0.06%) [ +0.45% +0.00% +0.32% / +0.06% +0.13% +0.70%] index_copy_ spread : Elapsed 0.016 ms (1.578 ms / 100) 1.615 -> 1.616 ( +0.06%) [ +0.12% +0.12% +0.00% / +0.06% +0.62% +0.68%] index_add_ strided 3 : Elapsed 0.016 ms (1.617 ms / 100) 1.573 -> 1.570 ( -0.19%) [ +0.00% +0.19% +0.00% / -0.19% +0.19% +0.19%] index_copy_ strided 3 : Elapsed 0.016 ms (1.573 ms / 100) 1.616 -> 1.616 ( +0.00%) [ +0.06% +0.00% +0.12% / +0.00% +0.56% +0.50%] index_add_ strided 7 : Elapsed 0.016 ms (1.617 ms / 100) 1.576 -> 1.570 ( -0.38%) [ +0.00% +0.00% +0.00% / -0.38% -0.06% +0.06%] index_copy_ strided 7 : Elapsed 0.016 ms (1.576 ms / 100) 1.622 -> 1.622 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.62% +0.55%] index_add_ perm : Elapsed 0.016 ms (1.623 ms / 100) 1.571 -> 1.576 ( +0.32%) [ +0.00% +0.19% +0.32% / +0.32% +0.57% +0.57%] index_copy_ perm : Elapsed 0.016 ms (1.571 ms / 100) 1.609 -> 1.610 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.56% +0.75%] index_add_ perm_sorted : Elapsed 0.016 ms (1.611 ms / 100) 1.571 -> 1.570 ( -0.06%) [ +0.00% +0.19% +0.25% / +0.06% -0.06% +0.38%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.571 ms / 100) 8.524 -> 8.527 ( +0.04%) [ +0.11% +0.00% +0.11% / +0.08% +0.04% +0.11%] index_select const : Elapsed 0.085 ms (8.533 ms / 100) 8.535 -> 8.542 ( +0.08%) [ +0.12% +0.00% +0.05% / +0.08% +0.11% +0.23%] index_select wrap : Elapsed 0.085 ms (8.545 ms / 100) 8.541 -> 8.537 ( -0.05%) [ +0.00% +0.32% +0.19% / +0.01% -0.05% +0.12%] index_select linear : Elapsed 0.085 ms (8.541 ms / 100) 8.517 -> 8.546 ( +0.34%) [ +0.00% +0.16% +0.20% / +0.34% +0.40% +0.66%] index_select reverse : Elapsed 0.085 ms (8.517 ms / 100) 8.521 -> 8.519 ( -0.02%) [ +0.07% +0.00% +0.11% / -0.02% +0.21% +0.58%] index_select skip64 : Elapsed 0.085 ms (8.527 ms / 100) 8.524 -> 8.522 ( -0.02%) [ +0.11% +0.00% +0.22% / -0.02% +0.08% +0.35%] index_select skip256 : Elapsed 0.085 ms (8.533 ms / 100) 8.554 -> 8.543 ( -0.13%) [ +0.00% +0.12% +0.13% / -0.13% +0.40% +0.57%] index_select spread : Elapsed 0.086 ms (8.554 ms / 100) 8.535 -> 8.530 ( -0.06%) [ +0.01% +0.25% +0.00% / -0.06% -0.01% +0.35%] index_select strided 3 : Elapsed 0.085 ms (8.536 ms / 100) 8.534 -> 8.558 ( +0.28%) [ +0.00% +0.05% +0.30% / +0.32% +0.28% +0.43%] index_select random : Elapsed 0.085 ms (8.534 ms / 100) 8.543 -> 8.557 ( +0.16%) [ +0.01% +0.00% +0.13% / +0.16% +0.19% +0.46%] index_select random_sorted : Elapsed 0.085 ms (8.544 ms / 100) B = [20, 40, 16, 4] (stride (40, 1, 3200, 800)) A = [20, 5, 16, 4] (stride (320, 64, 1, 16)) dim = 1 1.421 -> 1.422 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.63% +1.06%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.377 -> 1.380 ( +0.22%) [ +0.15% +0.29% +0.00% / +0.22% +0.58% +1.31%] index_copy_ linear : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.63% +0.70%] index_add_ reverse : Elapsed 0.014 ms (1.422 ms / 100) 1.381 -> 1.378 ( -0.22%) [ +0.00% +0.00% +0.00% / -0.22% +0.29% +0.43%] index_copy_ reverse : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.21% +0.14% +0.00% / +0.07% +0.77% +0.70%] index_add_ spread : Elapsed 0.014 ms (1.424 ms / 100) 1.382 -> 1.378 ( -0.29%) [ +0.00% +0.00% +0.00% / -0.29% +0.43% +0.29%] index_copy_ spread : Elapsed 0.014 ms (1.382 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.378 -> 1.380 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.51% +0.58%] index_copy_ strided 3 : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.63% +0.63%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.383 ( +0.29%) [ +0.00% +0.22% +0.15% / +0.29% +0.36% +0.65%] index_copy_ strided 7 : Elapsed 0.014 ms (1.379 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.70% +1.20%] index_add_ perm : Elapsed 0.014 ms (1.421 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.15% +0.00% +0.15% / +0.22% +0.58% +1.74%] index_copy_ perm : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.77% +0.84%] index_add_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 1.378 -> 1.382 ( +0.29%) [ +0.22% +0.15% +0.00% / +0.29% +0.58% +0.87%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.381 ms / 100) 8.235 -> 8.238 ( +0.04%) [ +0.12% +0.05% +0.00% / +0.10% +0.28% +0.04%] index_select const : Elapsed 0.082 ms (8.245 ms / 100) 8.257 -> 8.260 ( +0.04%) [ +0.00% +0.13% +0.08% / +0.04% +0.25% +0.65%] index_select wrap : Elapsed 0.083 ms (8.257 ms / 100) 8.248 -> 8.258 ( +0.12%) [ +0.22% +0.00% +0.05% / +0.12% +0.33% +0.35%] index_select linear : Elapsed 0.083 ms (8.266 ms / 100) 8.257 -> 8.252 ( -0.06%) [ +0.01% +0.05% +0.00% / -0.06% +0.22% +0.40%] index_select reverse : Elapsed 0.083 ms (8.258 ms / 100) 8.230 -> 8.236 ( +0.07%) [ +0.21% +0.00% +0.11% / +0.07% +0.18% +0.44%] index_select skip64 : Elapsed 0.082 ms (8.247 ms / 100) 8.236 -> 8.237 ( +0.01%) [ +0.15% +0.00% +0.05% / +0.08% +0.01% +0.17%] index_select skip256 : Elapsed 0.082 ms (8.248 ms / 100) 8.242 -> 8.247 ( +0.06%) [ +0.13% +0.02% +0.00% / +0.17% +0.17% +0.06%] index_select spread : Elapsed 0.083 ms (8.253 ms / 100) 8.248 -> 8.271 ( +0.28%) [ +0.00% +0.16% +0.12% / +0.33% +0.48% +0.28%] index_select strided 3 : Elapsed 0.082 ms (8.248 ms / 100) 8.245 -> 8.268 ( +0.28%) [ +0.15% +0.04% +0.00% / +0.28% +0.38% +0.50%] index_select random : Elapsed 0.083 ms (8.257 ms / 100) 8.240 -> 8.257 ( +0.21%) [ +0.00% +0.22% +0.11% / +0.21% +0.56% +0.58%] index_select random_sorted : Elapsed 0.082 ms (8.240 ms / 100) B = [20, 40, 16, 4] (stride (640, 1, 40, 12800)) A = [20, 5, 16, 4] (stride (320, 4, 20, 1)) dim = 1 1.519 -> 1.518 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +1.71% +0.53%] index_add_ linear : Elapsed 0.015 ms (1.519 ms / 100) 1.475 -> 1.476 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.47% +0.54%] index_copy_ linear : Elapsed 0.015 ms (1.475 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.53% +0.72%] index_add_ reverse : Elapsed 0.015 ms (1.518 ms / 100) 1.474 -> 1.474 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.54% +0.88%] index_copy_ reverse : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.518 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.59% +0.66%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.48% +0.61%] index_copy_ spread : Elapsed 0.015 ms (1.474 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.59% +0.66%] index_add_ strided 3 : Elapsed 0.015 ms (1.518 ms / 100) 1.473 -> 1.475 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.54% +0.61%] index_copy_ strided 3 : Elapsed 0.015 ms (1.474 ms / 100) 1.519 -> 1.519 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.475 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.54% +0.75%] index_copy_ strided 7 : Elapsed 0.015 ms (1.474 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.72%] index_add_ perm : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.54% +0.75%] index_copy_ perm : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.99% +0.59%] index_add_ perm_sorted : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.88% +0.54%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.473 ms / 100) 8.563 -> 8.581 ( +0.21%) [ +0.02% +0.00% +0.16% / +0.23% +0.35% +0.21%] index_select const : Elapsed 0.086 ms (8.565 ms / 100) 8.574 -> 8.592 ( +0.21%) [ +0.14% +0.00% +0.03% / +0.26% +0.27% +0.21%] index_select wrap : Elapsed 0.086 ms (8.586 ms / 100) 8.561 -> 8.573 ( +0.14%) [ +0.29% +0.00% +0.34% / +0.14% +0.46% +0.62%] index_select linear : Elapsed 0.086 ms (8.586 ms / 100) 8.564 -> 8.580 ( +0.19%) [ +0.16% +0.00% +0.28% / +0.27% +0.41% +0.19%] index_select reverse : Elapsed 0.086 ms (8.578 ms / 100) 8.570 -> 8.600 ( +0.35%) [ +0.16% +0.08% +0.00% / +0.44% +0.35% +0.47%] index_select skip64 : Elapsed 0.086 ms (8.584 ms / 100) 8.558 -> 8.580 ( +0.26%) [ +0.23% +0.00% +0.18% / +0.37% +0.49% +0.26%] index_select skip256 : Elapsed 0.086 ms (8.578 ms / 100) 8.580 -> 8.572 ( -0.09%) [ +0.00% +0.02% +0.00% / -0.09% +0.22% +0.24%] index_select spread : Elapsed 0.086 ms (8.580 ms / 100) 8.565 -> 8.590 ( +0.29%) [ +0.35% +0.00% +0.14% / +0.29% +0.54% +0.47%] index_select strided 3 : Elapsed 0.086 ms (8.595 ms / 100) 8.577 -> 8.582 ( +0.06%) [ +0.08% +0.00% +0.12% / +0.08% +0.06% +0.43%] index_select random : Elapsed 0.086 ms (8.584 ms / 100) 8.583 -> 8.577 ( -0.07%) [ +0.01% +0.00% +0.03% / -0.07% +0.31% +0.07%] index_select random_sorted : Elapsed 0.086 ms (8.584 ms / 100) B = [20, 40, 16, 4] (stride (640, 1, 40, 12800)) A = [20, 5, 16, 4] (stride (5, 1, 100, 1600)) dim = 1 1.662 -> 1.664 ( +0.12%) [ +0.18% +0.12% +0.00% / +0.12% +0.84% +0.72%] index_add_ linear : Elapsed 0.017 ms (1.665 ms / 100) 1.611 -> 1.610 ( -0.06%) [ +0.00% +0.00% +0.06% / -0.06% +1.18% +0.56%] index_copy_ linear : Elapsed 0.016 ms (1.611 ms / 100) 1.664 -> 1.665 ( +0.06%) [ +0.06% +0.12% +0.00% / +0.06% +0.60% +1.20%] index_add_ reverse : Elapsed 0.017 ms (1.665 ms / 100) 1.613 -> 1.613 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.62% +0.87%] index_copy_ reverse : Elapsed 0.016 ms (1.614 ms / 100) 1.664 -> 1.665 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.54% +0.60%] index_add_ spread : Elapsed 0.017 ms (1.666 ms / 100) 1.612 -> 1.614 ( +0.12%) [ +0.00% +0.25% +0.12% / +0.12% +0.56% +0.62%] index_copy_ spread : Elapsed 0.016 ms (1.612 ms / 100) 1.665 -> 1.663 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.48% +0.54%] index_add_ strided 3 : Elapsed 0.017 ms (1.665 ms / 100) 1.611 -> 1.612 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.62% +0.93%] index_copy_ strided 3 : Elapsed 0.016 ms (1.612 ms / 100) 1.663 -> 1.664 ( +0.06%) [ +0.06% +0.00% +0.00% / +0.06% +0.72% +0.66%] index_add_ strided 7 : Elapsed 0.017 ms (1.664 ms / 100) 1.612 -> 1.612 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.87% +0.62%] index_copy_ strided 7 : Elapsed 0.016 ms (1.612 ms / 100) 1.662 -> 1.663 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.72% +0.72%] index_add_ perm : Elapsed 0.017 ms (1.664 ms / 100) 1.610 -> 1.612 ( +0.12%) [ +0.12% +0.06% +0.00% / +0.12% +0.68% +0.87%] index_copy_ perm : Elapsed 0.016 ms (1.612 ms / 100) 1.665 -> 1.664 ( -0.06%) [ +0.00% +0.06% +0.00% / -0.06% +0.60% +0.60%] index_add_ perm_sorted : Elapsed 0.017 ms (1.665 ms / 100) 1.610 -> 1.611 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.06% +0.75% +1.30%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.611 ms / 100) 8.576 -> 8.595 ( +0.22%) [ +0.05% +0.16% +0.00% / +0.33% +0.22% +0.55%] index_select const : Elapsed 0.086 ms (8.580 ms / 100) 8.587 -> 8.592 ( +0.06%) [ +0.20% +0.00% +0.06% / +0.06% +0.43% +0.33%] index_select wrap : Elapsed 0.086 ms (8.604 ms / 100) 8.584 -> 8.581 ( -0.03%) [ +0.22% +0.00% +0.01% / -0.03% +0.40% +0.49%] index_select linear : Elapsed 0.086 ms (8.603 ms / 100) 8.593 -> 8.576 ( -0.20%) [ +0.00% +0.02% +0.01% / -0.20% +0.22% +0.20%] index_select reverse : Elapsed 0.086 ms (8.593 ms / 100) 8.584 -> 8.586 ( +0.02%) [ +0.00% +0.13% +0.24% / +0.02% +0.03% +0.38%] index_select skip64 : Elapsed 0.086 ms (8.584 ms / 100) 8.589 -> 8.584 ( -0.06%) [ +0.16% +0.00% +0.02% / +0.07% +0.26% -0.06%] index_select skip256 : Elapsed 0.086 ms (8.603 ms / 100) 8.583 -> 8.583 ( +0.00%) [ +0.00% +0.02% +0.08% / +0.00% +0.33% +0.13%] index_select spread : Elapsed 0.086 ms (8.583 ms / 100) 8.584 -> 8.585 ( +0.01%) [ +0.19% +0.00% +0.05% / +0.01% +0.24% +0.22%] index_select strided 3 : Elapsed 0.086 ms (8.600 ms / 100) 8.588 -> 8.586 ( -0.02%) [ +0.14% +0.00% +0.21% / -0.02% +0.08% +0.50%] index_select random : Elapsed 0.086 ms (8.600 ms / 100) 8.581 -> 8.580 ( -0.01%) [ +0.06% +0.00% +0.22% / -0.01% +0.35% +0.42%] index_select random_sorted : Elapsed 0.086 ms (8.586 ms / 100) B = [20, 40, 16, 4] (stride (1, 20, 800, 12800)) A = [20, 5, 16, 4] (stride (64, 1280, 4, 1)) dim = 1 1.420 -> 1.421 ( +0.07%) [ +0.14% +0.14% +0.00% / +0.07% +0.42% +0.42%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.07% +0.22% +0.00% / +0.00% +0.66% +0.58%] index_copy_ linear : Elapsed 0.014 ms (1.375 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.21% +0.28%] index_add_ reverse : Elapsed 0.014 ms (1.422 ms / 100) 1.375 -> 1.376 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.44% +0.80%] index_copy_ reverse : Elapsed 0.014 ms (1.375 ms / 100) 1.419 -> 1.420 ( +0.07%) [ +0.14% +0.00% +0.14% / +0.07% +0.28% +0.42%] index_add_ spread : Elapsed 0.014 ms (1.421 ms / 100) 1.373 -> 1.375 ( +0.15%) [ +0.07% +0.22% +0.00% / +0.15% +0.66% +0.80%] index_copy_ spread : Elapsed 0.014 ms (1.374 ms / 100) 1.419 -> 1.420 ( +0.07%) [ +0.21% +0.00% +0.14% / +0.07% +0.42% +0.49%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.58% +0.51%] index_copy_ strided 3 : Elapsed 0.014 ms (1.374 ms / 100) 1.419 -> 1.420 ( +0.07%) [ +0.00% +0.14% +0.21% / +0.07% +0.42% +0.49%] index_add_ strided 7 : Elapsed 0.014 ms (1.419 ms / 100) 1.373 -> 1.374 ( +0.07%) [ +0.00% +0.15% +0.15% / +0.07% +0.51% +0.58%] index_copy_ strided 7 : Elapsed 0.014 ms (1.373 ms / 100) 1.419 -> 1.420 ( +0.07%) [ +0.21% +0.14% +0.00% / +0.07% +0.42% +0.49%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.374 -> 1.374 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.58% +0.51%] index_copy_ perm : Elapsed 0.014 ms (1.374 ms / 100) 1.420 -> 1.420 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.35% +0.56%] index_add_ perm_sorted : Elapsed 0.014 ms (1.420 ms / 100) 1.374 -> 1.375 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.58% +0.51%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.375 ms / 100) 8.185 -> 8.205 ( +0.24%) [ +0.38% +0.13% +0.00% / +0.24% +0.38% +0.50%] index_select const : Elapsed 0.082 ms (8.216 ms / 100) 8.224 -> 8.239 ( +0.18%) [ +0.24% +0.05% +0.00% / +0.40% +0.18% +0.22%] index_select wrap : Elapsed 0.082 ms (8.244 ms / 100) 8.217 -> 8.219 ( +0.02%) [ +0.00% +0.35% +0.18% / +0.02% +0.09% +0.21%] index_select linear : Elapsed 0.082 ms (8.217 ms / 100) 8.213 -> 8.223 ( +0.12%) [ +0.00% +0.34% +0.11% / +0.13% +0.12% +0.17%] index_select reverse : Elapsed 0.082 ms (8.213 ms / 100) 8.209 -> 8.203 ( -0.07%) [ +0.13% +0.00% +0.04% / +0.02% +0.02% -0.07%] index_select skip64 : Elapsed 0.082 ms (8.220 ms / 100) 8.199 -> 8.201 ( +0.02%) [ +0.05% +0.21% +0.00% / +0.06% +0.02% +0.21%] index_select skip256 : Elapsed 0.082 ms (8.203 ms / 100) 8.210 -> 8.221 ( +0.13%) [ +0.00% +0.18% +0.26% / +0.13% +0.40% +0.19%] index_select spread : Elapsed 0.082 ms (8.210 ms / 100) 8.226 -> 8.233 ( +0.09%) [ +0.13% +0.07% +0.00% / +0.22% +0.09% +0.16%] index_select strided 3 : Elapsed 0.082 ms (8.237 ms / 100) 8.235 -> 8.222 ( -0.16%) [ +0.00% +0.00% +0.01% / -0.09% -0.16% +0.07%] index_select random : Elapsed 0.082 ms (8.235 ms / 100) 8.212 -> 8.222 ( +0.12%) [ +0.00% +0.02% +0.09% / +0.12% +0.13% +0.30%] index_select random_sorted : Elapsed 0.082 ms (8.212 ms / 100) out_shape = [20, 5, 40, 4] in_shape = [20, 5, 16, 4] idx_dim = 2 B = [20, 5, 40, 4] (stride (800, 160, 1, 40)) A = [20, 5, 16, 4] (stride (80, 16, 1, 1600)) dim = 2 1.410 -> 1.412 ( +0.14%) [ +0.14% +0.21% +0.00% / +0.14% +0.57% +0.43%] index_add_ linear : Elapsed 0.014 ms (1.412 ms / 100) 1.431 -> 1.432 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.42% +0.42%] index_copy_ linear : Elapsed 0.014 ms (1.431 ms / 100) 1.410 -> 1.412 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.78% +0.92%] index_add_ reverse : Elapsed 0.014 ms (1.412 ms / 100) 1.426 -> 1.425 ( -0.07%) [ +0.00% +0.07% +0.07% / -0.07% +0.63% +1.47%] index_copy_ reverse : Elapsed 0.014 ms (1.426 ms / 100) 1.410 -> 1.410 ( +0.00%) [ +0.21% +0.21% +0.00% / +0.00% +1.06% +0.35%] index_add_ spread : Elapsed 0.014 ms (1.413 ms / 100) 1.426 -> 1.426 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.35% +0.56%] index_copy_ spread : Elapsed 0.014 ms (1.426 ms / 100) 1.408 -> 1.414 ( +0.43%) [ +0.43% +0.36% +0.00% / +0.43% +0.43% +0.78%] index_add_ strided 3 : Elapsed 0.014 ms (1.414 ms / 100) 1.430 -> 1.433 ( +0.21%) [ +0.35% +0.28% +0.00% / +0.21% +0.35% +0.42%] index_copy_ strided 3 : Elapsed 0.014 ms (1.435 ms / 100) 1.412 -> 1.412 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.64% +0.35%] index_add_ strided 7 : Elapsed 0.014 ms (1.414 ms / 100) 1.427 -> 1.428 ( +0.07%) [ +0.00% +0.21% +0.00% / +0.07% +0.56% +0.35%] index_copy_ strided 7 : Elapsed 0.014 ms (1.427 ms / 100) 1.412 -> 1.410 ( -0.14%) [ +0.14% +0.07% +0.00% / -0.14% +0.21% +0.28%] index_add_ perm : Elapsed 0.014 ms (1.414 ms / 100) 1.435 -> 1.433 ( -0.14%) [ +0.07% +0.00% +0.07% / -0.14% +0.07% +0.28%] index_copy_ perm : Elapsed 0.014 ms (1.436 ms / 100) 1.412 -> 1.412 ( +0.00%) [ +0.28% +0.28% +0.00% / +0.42% +0.00% +2.12%] index_add_ perm_sorted : Elapsed 0.014 ms (1.416 ms / 100) 1.433 -> 1.434 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.21% +0.07% +0.56%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.434 ms / 100) 3.551 -> 3.531 ( -0.56%) [ +0.17% +0.00% +0.03% / -0.56% -0.51% -0.54%] index_select const : Elapsed 0.036 ms (3.557 ms / 100) 3.557 -> 3.536 ( -0.59%) [ +0.06% +0.00% +0.20% / -0.42% -0.59% -0.51%] index_select wrap : Elapsed 0.036 ms (3.559 ms / 100) 3.558 -> 3.533 ( -0.70%) [ +0.00% +0.00% +0.03% / -0.45% -0.70% -0.53%] index_select linear : Elapsed 0.036 ms (3.558 ms / 100) 3.557 -> 3.534 ( -0.65%) [ +0.00% +0.00% +0.11% / -0.53% -0.65% -0.42%] index_select reverse : Elapsed 0.036 ms (3.557 ms / 100) 3.556 -> 3.524 ( -0.90%) [ +0.00% +0.20% +0.28% / -0.42% -0.90% -0.76%] index_select skip64 : Elapsed 0.036 ms (3.556 ms / 100) 3.553 -> 3.522 ( -0.87%) [ +0.00% +0.14% +0.14% / -0.17% -0.87% -0.28%] index_select skip256 : Elapsed 0.036 ms (3.553 ms / 100) 3.562 -> 3.530 ( -0.90%) [ +0.03% +0.00% +0.06% / -0.59% -0.90% -0.56%] index_select spread : Elapsed 0.036 ms (3.563 ms / 100) 3.555 -> 3.532 ( -0.65%) [ +0.00% +0.14% +0.31% / -0.48% -0.65% -0.56%] index_select strided 3 : Elapsed 0.036 ms (3.555 ms / 100) 3.557 -> 3.525 ( -0.90%) [ +0.08% +0.14% +0.00% / -0.53% -0.90% -0.59%] index_select strided 5 : Elapsed 0.036 ms (3.560 ms / 100) 3.559 -> 3.530 ( -0.81%) [ +0.08% +0.14% +0.00% / -0.81% -0.65% -0.79%] index_select strided 7 : Elapsed 0.036 ms (3.562 ms / 100) 3.562 -> 3.527 ( -0.98%) [ +0.11% +0.08% +0.00% / -0.73% -0.98% -0.73%] index_select strided 8 : Elapsed 0.036 ms (3.566 ms / 100) 3.566 -> 3.534 ( -0.90%) [ +0.08% +0.00% +0.03% / -0.84% -0.90% -0.76%] index_select random : Elapsed 0.036 ms (3.569 ms / 100) 3.568 -> 3.535 ( -0.92%) [ +0.08% +0.00% +0.03% / -0.73% -0.92% -0.78%] index_select random_sorted : Elapsed 0.036 ms (3.571 ms / 100) B = [20, 5, 40, 4] (stride (800, 1, 20, 5)) A = [20, 5, 16, 4] (stride (5, 1, 400, 100)) dim = 2 3.660 -> 3.665 ( +0.14%) [ +0.05% +0.08% +0.00% / +0.14% +0.66% +0.68%] index_add_ linear : Elapsed 0.037 ms (3.662 ms / 100) 3.532 -> 3.531 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.71% +0.93%] index_copy_ linear : Elapsed 0.035 ms (3.532 ms / 100) 3.668 -> 3.670 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.79% +0.90%] index_add_ reverse : Elapsed 0.037 ms (3.669 ms / 100) 3.541 -> 3.540 ( -0.03%) [ +0.03% +0.03% +0.00% / -0.03% +0.82% +1.19%] index_copy_ reverse : Elapsed 0.035 ms (3.542 ms / 100) 3.673 -> 3.675 ( +0.05%) [ +0.08% +0.00% +0.03% / +0.05% +0.93% +1.36%] index_add_ spread : Elapsed 0.037 ms (3.676 ms / 100) 3.547 -> 3.548 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.87% +1.89%] index_copy_ spread : Elapsed 0.035 ms (3.548 ms / 100) 3.664 -> 3.665 ( +0.03%) [ +0.03% +0.00% +0.00% / +0.03% +0.96% +0.98%] index_add_ strided 3 : Elapsed 0.037 ms (3.665 ms / 100) 3.534 -> 3.535 ( +0.03%) [ +0.03% +0.00% +0.03% / +0.03% +0.85% +0.99%] index_copy_ strided 3 : Elapsed 0.035 ms (3.535 ms / 100) 3.668 -> 3.669 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.79% +0.90%] index_add_ strided 7 : Elapsed 0.037 ms (3.669 ms / 100) 3.541 -> 3.541 ( +0.00%) [ +0.17% +0.06% +0.00% / +0.00% +0.73% +0.96%] index_copy_ strided 7 : Elapsed 0.035 ms (3.547 ms / 100) 3.660 -> 3.659 ( -0.03%) [ +0.08% +0.00% +0.00% / -0.03% +0.77% +0.82%] index_add_ perm : Elapsed 0.037 ms (3.663 ms / 100) 3.530 -> 3.531 ( +0.03%) [ +0.06% +0.00% +0.06% / +0.03% +1.02% +1.10%] index_copy_ perm : Elapsed 0.035 ms (3.532 ms / 100) 3.671 -> 3.670 ( -0.03%) [ +0.03% +0.00% +0.03% / -0.03% +0.63% +0.71%] index_add_ perm_sorted : Elapsed 0.037 ms (3.672 ms / 100) 3.535 -> 3.538 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.76% +1.16%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.535 ms / 100) 5.481 -> 5.481 ( +0.00%) [ +0.09% +0.11% +0.00% / +0.02% +0.00% +0.04%] index_select const : Elapsed 0.055 ms (5.486 ms / 100) 5.494 -> 5.488 ( -0.11%) [ +0.02% +0.00% +0.07% / +0.02% -0.11% +0.11%] index_select wrap : Elapsed 0.055 ms (5.495 ms / 100) 5.486 -> 5.486 ( +0.00%) [ +0.07% +0.00% +0.11% / +0.16% +0.00% +0.15%] index_select linear : Elapsed 0.055 ms (5.490 ms / 100) 5.495 -> 5.484 ( -0.20%) [ +0.05% +0.00% +0.04% / -0.11% -0.20% +0.11%] index_select reverse : Elapsed 0.055 ms (5.498 ms / 100) 5.477 -> 5.478 ( +0.02%) [ +0.22% +0.00% +0.09% / +0.20% +0.02% +0.20%] index_select skip64 : Elapsed 0.055 ms (5.489 ms / 100) 5.476 -> 5.481 ( +0.09%) [ +0.00% +0.09% +0.02% / +0.09% +0.20% +0.15%] index_select skip256 : Elapsed 0.055 ms (5.476 ms / 100) 5.477 -> 5.490 ( +0.24%) [ +0.27% +0.33% +0.00% / +0.33% +0.24% +0.35%] index_select spread : Elapsed 0.055 ms (5.492 ms / 100) 5.485 -> 5.486 ( +0.02%) [ +0.00% +0.15% +0.11% / +0.15% +0.02% +0.02%] index_select strided 3 : Elapsed 0.055 ms (5.485 ms / 100) 5.490 -> 5.485 ( -0.09%) [ +0.00% +0.09% +0.05% / -0.09% +0.00% -0.05%] index_select strided 5 : Elapsed 0.055 ms (5.490 ms / 100) 5.483 -> 5.486 ( +0.05%) [ +0.22% +0.20% +0.00% / +0.05% +0.18% +0.27%] index_select strided 7 : Elapsed 0.055 ms (5.495 ms / 100) 5.476 -> 5.479 ( +0.05%) [ +0.09% +0.02% +0.00% / +0.07% +0.05% +0.40%] index_select strided 8 : Elapsed 0.055 ms (5.481 ms / 100) 5.486 -> 5.485 ( -0.02%) [ +0.00% +0.05% +0.20% / +0.15% -0.02% +0.07%] index_select random : Elapsed 0.055 ms (5.486 ms / 100) 5.485 -> 5.483 ( -0.04%) [ +0.15% +0.00% +0.24% / -0.04% +0.22% +0.15%] index_select random_sorted : Elapsed 0.055 ms (5.493 ms / 100) B = [20, 5, 40, 4] (stride (800, 1, 20, 5)) A = [20, 5, 16, 4] (stride (1, 20, 400, 100)) dim = 2 4.147 -> 4.147 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.70% +0.70%] index_add_ linear : Elapsed 0.041 ms (4.149 ms / 100) 4.004 -> 4.004 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.67% +0.77%] index_copy_ linear : Elapsed 0.040 ms (4.004 ms / 100) 4.150 -> 4.150 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.60% +0.77%] index_add_ reverse : Elapsed 0.042 ms (4.152 ms / 100) 4.006 -> 4.006 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.47% +0.97%] index_copy_ reverse : Elapsed 0.040 ms (4.007 ms / 100) 4.137 -> 4.142 ( +0.12%) [ +0.07% +0.02% +0.00% / +0.12% +0.53% +0.51%] index_add_ spread : Elapsed 0.041 ms (4.140 ms / 100) 3.996 -> 3.997 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.55% +0.68%] index_copy_ spread : Elapsed 0.040 ms (3.998 ms / 100) 4.139 -> 4.138 ( -0.02%) [ +0.10% +0.05% +0.00% / -0.02% +0.48% +0.68%] index_add_ strided 3 : Elapsed 0.041 ms (4.143 ms / 100) 4.000 -> 4.003 ( +0.08%) [ +0.08% +0.05% +0.00% / +0.08% +0.50% +0.75%] index_copy_ strided 3 : Elapsed 0.040 ms (4.003 ms / 100) 4.154 -> 4.154 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.46% +0.48%] index_add_ strided 7 : Elapsed 0.042 ms (4.154 ms / 100) 4.006 -> 4.006 ( +0.00%) [ +0.00% +0.02% +0.00% / +0.00% +0.55% +0.80%] index_copy_ strided 7 : Elapsed 0.040 ms (4.006 ms / 100) 4.150 -> 4.150 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.46% +0.70%] index_add_ perm : Elapsed 0.042 ms (4.151 ms / 100) 4.008 -> 4.007 ( -0.02%) [ +0.00% +0.02% +0.02% / -0.02% +0.42% +0.55%] index_copy_ perm : Elapsed 0.040 ms (4.008 ms / 100) 4.151 -> 4.151 ( +0.00%) [ +0.02% +0.00% +0.00% / +0.00% +0.48% +0.51%] index_add_ perm_sorted : Elapsed 0.042 ms (4.152 ms / 100) 4.007 -> 4.009 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.55% +0.60%] index_copy_ perm_sorted : Elapsed 0.040 ms (4.009 ms / 100) 5.552 -> 5.557 ( +0.09%) [ +0.02% +0.00% +0.13% / +0.09% +0.22% +0.23%] index_select const : Elapsed 0.056 ms (5.553 ms / 100) 5.556 -> 5.556 ( +0.00%) [ +0.00% +0.04% +0.07% / +0.00% +0.25% +0.23%] index_select wrap : Elapsed 0.056 ms (5.556 ms / 100) 5.554 -> 5.558 ( +0.07%) [ +0.16% +0.00% +0.05% / +0.07% +0.20% +0.27%] index_select linear : Elapsed 0.056 ms (5.563 ms / 100) 5.553 -> 5.555 ( +0.04%) [ +0.07% +0.00% +0.11% / +0.04% +0.18% +0.25%] index_select reverse : Elapsed 0.056 ms (5.557 ms / 100) 5.555 -> 5.563 ( +0.14%) [ +0.00% +0.11% +0.14% / +0.20% +0.14% +0.20%] index_select skip64 : Elapsed 0.056 ms (5.555 ms / 100) 5.560 -> 5.560 ( +0.00%) [ +0.11% +0.00% +0.04% / +0.02% +0.00% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.566 ms / 100) 5.562 -> 5.557 ( -0.09%) [ +0.02% +0.07% +0.00% / -0.09% -0.05% +0.04%] index_select spread : Elapsed 0.056 ms (5.563 ms / 100) 5.553 -> 5.550 ( -0.05%) [ +0.00% +0.04% +0.13% / -0.05% +0.05% +0.31%] index_select strided 3 : Elapsed 0.056 ms (5.553 ms / 100) 5.556 -> 5.556 ( +0.00%) [ +0.09% +0.00% +0.02% / +0.00% +0.14% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.561 ms / 100) 5.549 -> 5.552 ( +0.05%) [ +0.16% +0.22% +0.00% / +0.05% +0.43% +0.50%] index_select strided 7 : Elapsed 0.056 ms (5.558 ms / 100) 5.553 -> 5.559 ( +0.11%) [ +0.00% +0.07% +0.09% / +0.11% +0.11% +0.20%] index_select strided 8 : Elapsed 0.056 ms (5.553 ms / 100) 5.558 -> 5.557 ( -0.02%) [ +0.00% +0.02% +0.16% / +0.07% -0.02% +0.02%] index_select random : Elapsed 0.056 ms (5.558 ms / 100) 5.551 -> 5.559 ( +0.14%) [ +0.22% +0.25% +0.00% / +0.14% +0.16% +0.16%] index_select random_sorted : Elapsed 0.056 ms (5.563 ms / 100) B = [20, 5, 40, 4] (stride (4, 3200, 80, 1)) A = [20, 5, 16, 4] (stride (320, 1, 5, 80)) dim = 2 4.128 -> 4.138 ( +0.24%) [ +0.27% +0.19% +0.00% / +0.24% +0.87% +1.16%] index_add_ linear : Elapsed 0.041 ms (4.139 ms / 100) 3.980 -> 3.990 ( +0.25%) [ +0.30% +0.30% +0.00% / +0.25% +0.95% +1.23%] index_copy_ linear : Elapsed 0.040 ms (3.992 ms / 100) 4.117 -> 4.117 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +0.85% +0.87%] index_add_ reverse : Elapsed 0.041 ms (4.123 ms / 100) 3.972 -> 3.969 ( -0.08%) [ +0.23% +0.00% +0.13% / -0.08% +1.03% +1.01%] index_copy_ reverse : Elapsed 0.040 ms (3.981 ms / 100) 4.117 -> 4.120 ( +0.07%) [ +0.02% +0.07% +0.00% / +0.07% +0.68% +0.97%] index_add_ spread : Elapsed 0.041 ms (4.118 ms / 100) 3.973 -> 3.967 ( -0.15%) [ +0.08% +0.03% +0.00% / -0.15% +0.60% +0.98%] index_copy_ spread : Elapsed 0.040 ms (3.976 ms / 100) 4.131 -> 4.134 ( +0.07%) [ +0.12% +0.00% +0.27% / +0.07% +0.82% +0.68%] index_add_ strided 3 : Elapsed 0.041 ms (4.136 ms / 100) 3.987 -> 3.990 ( +0.08%) [ +0.10% +0.00% +0.38% / +0.08% +0.80% +0.88%] index_copy_ strided 3 : Elapsed 0.040 ms (3.991 ms / 100) 4.119 -> 4.119 ( +0.00%) [ +0.15% +0.24% +0.00% / +0.00% +0.97% +0.97%] index_add_ strided 7 : Elapsed 0.041 ms (4.125 ms / 100) 3.970 -> 3.974 ( +0.10%) [ +0.25% +0.25% +0.00% / +0.10% +1.03% +1.11%] index_copy_ strided 7 : Elapsed 0.040 ms (3.980 ms / 100) 4.130 -> 4.140 ( +0.24%) [ +0.00% +0.19% +0.17% / +0.24% +0.80% +1.02%] index_add_ perm : Elapsed 0.041 ms (4.130 ms / 100) 3.981 -> 3.992 ( +0.28%) [ +0.00% +0.25% +0.10% / +0.28% +0.93% +1.28%] index_copy_ perm : Elapsed 0.040 ms (3.981 ms / 100) 4.127 -> 4.131 ( +0.10%) [ +0.17% +0.15% +0.00% / +0.10% +0.92% +0.94%] index_add_ perm_sorted : Elapsed 0.041 ms (4.134 ms / 100) 3.985 -> 3.988 ( +0.08%) [ +0.10% +0.03% +0.00% / +0.08% +0.95% +0.88%] index_copy_ perm_sorted : Elapsed 0.040 ms (3.989 ms / 100) 5.557 -> 5.554 ( -0.05%) [ +0.00% +0.11% +0.00% / -0.02% -0.05% +0.11%] index_select const : Elapsed 0.056 ms (5.557 ms / 100) 5.567 -> 5.558 ( -0.16%) [ +0.00% +0.05% +0.16% / +0.02% -0.16% +0.14%] index_select wrap : Elapsed 0.056 ms (5.567 ms / 100) 5.565 -> 5.558 ( -0.13%) [ +0.11% +0.00% +0.05% / +0.14% -0.13% +0.11%] index_select linear : Elapsed 0.056 ms (5.571 ms / 100) 5.563 -> 5.558 ( -0.09%) [ +0.00% +0.09% +0.13% / +0.13% -0.09% +0.18%] index_select reverse : Elapsed 0.056 ms (5.563 ms / 100) 5.552 -> 5.559 ( +0.13%) [ +0.00% +0.07% +0.07% / +0.13% +0.20% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.552 ms / 100) 5.551 -> 5.557 ( +0.11%) [ +0.00% +0.09% +0.05% / +0.16% +0.11% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.551 ms / 100) 5.560 -> 5.553 ( -0.13%) [ +0.16% +0.27% +0.00% / +0.18% -0.13% +0.02%] index_select spread : Elapsed 0.056 ms (5.569 ms / 100) 5.568 -> 5.560 ( -0.14%) [ +0.00% +0.04% +0.02% / +0.00% -0.04% -0.14%] index_select strided 3 : Elapsed 0.056 ms (5.568 ms / 100) 5.563 -> 5.565 ( +0.04%) [ +0.07% +0.02% +0.00% / +0.05% +0.11% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.567 ms / 100) 5.562 -> 5.566 ( +0.07%) [ +0.00% +0.00% +0.09% / +0.07% +0.07% +0.23%] index_select strided 7 : Elapsed 0.056 ms (5.562 ms / 100) 5.554 -> 5.553 ( -0.02%) [ +0.00% +0.13% +0.11% / +0.00% -0.02% +0.13%] index_select strided 8 : Elapsed 0.056 ms (5.554 ms / 100) 5.564 -> 5.561 ( -0.05%) [ +0.00% +0.07% +0.07% / -0.04% -0.05% +0.11%] index_select random : Elapsed 0.056 ms (5.564 ms / 100) 5.563 -> 5.560 ( -0.05%) [ +0.00% +0.04% +0.00% / +0.07% -0.05% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.563 ms / 100) B = [20, 5, 40, 4] (stride (5, 1, 400, 100)) A = [20, 5, 16, 4] (stride (320, 16, 1, 80)) dim = 2 3.942 -> 3.942 ( +0.00%) [ +0.05% +0.00% +0.18% / +0.00% +0.38% +0.28%] index_add_ linear : Elapsed 0.039 ms (3.944 ms / 100) 3.787 -> 3.788 ( +0.03%) [ +0.00% +0.03% +0.13% / +0.03% +0.29% +0.32%] index_copy_ linear : Elapsed 0.038 ms (3.787 ms / 100) 3.961 -> 3.965 ( +0.10%) [ +0.15% +0.15% +0.00% / +0.10% +0.66% +0.83%] index_add_ reverse : Elapsed 0.040 ms (3.967 ms / 100) 3.795 -> 3.797 ( +0.05%) [ +0.13% +0.16% +0.00% / +0.05% +0.61% +0.74%] index_copy_ reverse : Elapsed 0.038 ms (3.800 ms / 100) 3.941 -> 3.938 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.38% +0.46%] index_add_ spread : Elapsed 0.039 ms (3.941 ms / 100) 3.784 -> 3.785 ( +0.03%) [ +0.13% +0.00% +0.08% / +0.03% +0.71% +0.79%] index_copy_ spread : Elapsed 0.038 ms (3.789 ms / 100) 3.942 -> 3.943 ( +0.03%) [ +0.13% +0.03% +0.00% / +0.03% +0.61% +0.79%] index_add_ strided 3 : Elapsed 0.039 ms (3.947 ms / 100) 3.786 -> 3.785 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.53% +0.61%] index_copy_ strided 3 : Elapsed 0.038 ms (3.789 ms / 100) 3.963 -> 3.966 ( +0.08%) [ +0.13% +0.00% +0.05% / +0.08% +0.56% +0.53%] index_add_ strided 7 : Elapsed 0.040 ms (3.968 ms / 100) 3.798 -> 3.803 ( +0.13%) [ +0.05% +0.00% +0.05% / +0.13% +0.53% +0.53%] index_copy_ strided 7 : Elapsed 0.038 ms (3.800 ms / 100) 3.938 -> 3.946 ( +0.20%) [ +0.05% +0.00% +0.00% / +0.20% +0.51% +0.63%] index_add_ perm : Elapsed 0.039 ms (3.940 ms / 100) 3.782 -> 3.790 ( +0.21%) [ +0.03% +0.05% +0.00% / +0.21% +0.56% +0.56%] index_copy_ perm : Elapsed 0.038 ms (3.783 ms / 100) 3.943 -> 3.942 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.25% +0.28%] index_add_ perm_sorted : Elapsed 0.039 ms (3.944 ms / 100) 3.788 -> 3.788 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.26% +0.29%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.788 ms / 100) 5.465 -> 5.471 ( +0.11%) [ +0.00% +0.00% +0.13% / +0.11% +0.20% +0.26%] index_select const : Elapsed 0.055 ms (5.465 ms / 100) 5.466 -> 5.473 ( +0.13%) [ +0.00% +0.05% +0.00% / +0.13% +0.16% +0.26%] index_select wrap : Elapsed 0.055 ms (5.466 ms / 100) 5.465 -> 5.478 ( +0.24%) [ +0.00% +0.18% +0.05% / +0.27% +0.29% +0.24%] index_select linear : Elapsed 0.055 ms (5.465 ms / 100) 5.467 -> 5.471 ( +0.07%) [ +0.00% +0.02% +0.07% / +0.07% +0.24% +0.13%] index_select reverse : Elapsed 0.055 ms (5.467 ms / 100) 5.469 -> 5.468 ( -0.02%) [ +0.02% +0.00% +0.00% / -0.02% +0.00% +0.02%] index_select skip64 : Elapsed 0.055 ms (5.470 ms / 100) 5.470 -> 5.469 ( -0.02%) [ +0.15% +0.00% +0.00% / +0.05% -0.02% +0.02%] index_select skip256 : Elapsed 0.055 ms (5.478 ms / 100) 5.471 -> 5.470 ( -0.02%) [ +0.16% +0.00% +0.04% / -0.02% -0.02% +0.00%] index_select spread : Elapsed 0.055 ms (5.480 ms / 100) 5.464 -> 5.472 ( +0.15%) [ +0.09% +0.16% +0.00% / +0.15% +0.22% +0.20%] index_select strided 3 : Elapsed 0.055 ms (5.469 ms / 100) 5.464 -> 5.467 ( +0.05%) [ +0.00% +0.07% +0.20% / +0.05% +0.24% +0.31%] index_select strided 5 : Elapsed 0.055 ms (5.464 ms / 100) 5.468 -> 5.465 ( -0.05%) [ +0.07% +0.15% +0.00% / -0.05% +0.04% +0.09%] index_select strided 7 : Elapsed 0.055 ms (5.472 ms / 100) 5.466 -> 5.457 ( -0.16%) [ +0.00% +0.00% +0.15% / -0.16% +0.15% +0.09%] index_select strided 8 : Elapsed 0.055 ms (5.466 ms / 100) 5.470 -> 5.466 ( -0.07%) [ +0.11% +0.00% +0.15% / -0.07% +0.02% +0.31%] index_select random : Elapsed 0.055 ms (5.476 ms / 100) 5.464 -> 5.470 ( +0.11%) [ +0.00% +0.20% +0.20% / +0.11% +0.11% +0.11%] index_select random_sorted : Elapsed 0.055 ms (5.464 ms / 100) B = [20, 5, 40, 4] (stride (1, 800, 20, 4000)) A = [20, 5, 16, 4] (stride (4, 1280, 80, 1)) dim = 2 4.023 -> 4.022 ( -0.02%) [ +0.10% +0.10% +0.00% / -0.02% +0.82% +0.72%] index_add_ linear : Elapsed 0.040 ms (4.027 ms / 100) 3.891 -> 3.893 ( +0.05%) [ +0.31% +0.00% +0.00% / +0.05% +0.80% +1.13%] index_copy_ linear : Elapsed 0.039 ms (3.903 ms / 100) 4.003 -> 4.009 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.85% +0.87%] index_add_ reverse : Elapsed 0.040 ms (4.012 ms / 100) 3.877 -> 3.879 ( +0.05%) [ +0.03% +0.03% +0.00% / +0.05% +0.85% +0.77%] index_copy_ reverse : Elapsed 0.039 ms (3.878 ms / 100) 4.003 -> 4.005 ( +0.05%) [ +0.15% +0.15% +0.00% / +0.05% +0.85% +0.95%] index_add_ spread : Elapsed 0.040 ms (4.009 ms / 100) 3.883 -> 3.891 ( +0.21%) [ +0.08% +0.10% +0.00% / +0.21% +0.88% +1.21%] index_copy_ spread : Elapsed 0.039 ms (3.886 ms / 100) 4.002 -> 4.008 ( +0.15%) [ +0.12% +0.00% +0.00% / +0.15% +0.70% +0.87%] index_add_ strided 3 : Elapsed 0.040 ms (4.007 ms / 100) 3.868 -> 3.872 ( +0.10%) [ +0.18% +0.00% +0.00% / +0.10% +0.83% +1.37%] index_copy_ strided 3 : Elapsed 0.039 ms (3.875 ms / 100) 4.009 -> 4.009 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.77% +0.65%] index_add_ strided 7 : Elapsed 0.040 ms (4.009 ms / 100) 3.876 -> 3.879 ( +0.08%) [ +0.00% +0.10% +0.05% / +0.08% +1.03% +1.19%] index_copy_ strided 7 : Elapsed 0.039 ms (3.876 ms / 100) 4.023 -> 4.024 ( +0.02%) [ +0.02% +0.12% +0.00% / +0.02% +0.67% +0.70%] index_add_ perm : Elapsed 0.040 ms (4.024 ms / 100) 3.891 -> 3.892 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.03% +0.87% +0.75%] index_copy_ perm : Elapsed 0.039 ms (3.891 ms / 100) 4.004 -> 4.002 ( -0.05%) [ +0.00% +0.00% +0.05% / -0.05% +0.72% +0.92%] index_add_ perm_sorted : Elapsed 0.040 ms (4.004 ms / 100) 3.866 -> 3.871 ( +0.13%) [ +0.00% +0.10% +0.13% / +0.13% +0.80% +1.66%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.866 ms / 100) 5.561 -> 5.564 ( +0.05%) [ +0.00% +0.04% +0.09% / +0.05% +0.13% +0.31%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.579 -> 5.579 ( +0.00%) [ +0.22% +0.00% +0.07% / +0.14% +0.00% +0.27%] index_select wrap : Elapsed 0.056 ms (5.591 ms / 100) 5.579 -> 5.581 ( +0.04%) [ +0.00% +0.11% +0.11% / +0.04% +0.05% +0.05%] index_select linear : Elapsed 0.056 ms (5.579 ms / 100) 5.585 -> 5.577 ( -0.14%) [ +0.14% +0.02% +0.00% / +0.05% -0.05% -0.14%] index_select reverse : Elapsed 0.056 ms (5.593 ms / 100) 5.562 -> 5.568 ( +0.11%) [ +0.09% +0.11% +0.00% / +0.11% +0.20% +0.27%] index_select skip64 : Elapsed 0.056 ms (5.567 ms / 100) 5.560 -> 5.564 ( +0.07%) [ +0.16% +0.00% +0.22% / +0.09% +0.07% +0.25%] index_select skip256 : Elapsed 0.056 ms (5.569 ms / 100) 5.579 -> 5.576 ( -0.05%) [ +0.09% +0.00% +0.04% / -0.04% -0.05% -0.05%] index_select spread : Elapsed 0.056 ms (5.584 ms / 100) 5.576 -> 5.572 ( -0.07%) [ +0.07% +0.00% +0.14% / +0.05% -0.07% -0.04%] index_select strided 3 : Elapsed 0.056 ms (5.580 ms / 100) 5.574 -> 5.574 ( +0.00%) [ +0.16% +0.22% +0.00% / +0.09% +0.14% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.583 ms / 100) 5.577 -> 5.575 ( -0.04%) [ +0.14% +0.00% +0.11% / -0.04% +0.18% +0.11%] index_select strided 7 : Elapsed 0.056 ms (5.585 ms / 100) 5.562 -> 5.559 ( -0.05%) [ +0.05% +0.00% +0.09% / -0.05% +0.20% +0.32%] index_select strided 8 : Elapsed 0.056 ms (5.565 ms / 100) 5.580 -> 5.574 ( -0.11%) [ +0.07% +0.00% +0.14% / +0.04% -0.09% -0.11%] index_select random : Elapsed 0.056 ms (5.584 ms / 100) 5.579 -> 5.575 ( -0.07%) [ +0.16% +0.00% +0.02% / -0.07% +0.05% +0.05%] index_select random_sorted : Elapsed 0.056 ms (5.588 ms / 100) out_shape = [20, 5, 16, 40] in_shape = [20, 5, 16, 4] idx_dim = 3 B = [20, 5, 16, 40] (stride (3200, 1, 5, 80)) A = [20, 5, 16, 4] (stride (64, 1280, 4, 1)) dim = 3 1.311 -> 1.315 ( +0.31%) [ +0.08% +0.31% +0.00% / +0.31% +0.61% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.312 ms / 100) 1.273 -> 1.272 ( -0.08%) [ +0.00% +0.00% +0.08% / -0.08% +0.55% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.273 ms / 100) 1.312 -> 1.316 ( +0.30%) [ +0.23% +0.46% +0.00% / +0.30% +0.38% +0.53%] index_add_ reverse : Elapsed 0.013 ms (1.315 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.08% +0.16% +0.00% / +0.00% +0.71% +0.71%] index_copy_ reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.313 -> 1.316 ( +0.23%) [ +0.23% +0.00% +0.00% / +0.23% +0.38% +0.46%] index_add_ spread : Elapsed 0.013 ms (1.316 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.71% +0.63%] index_copy_ spread : Elapsed 0.013 ms (1.272 ms / 100) 1.311 -> 1.315 ( +0.31%) [ +0.15% +0.31% +0.00% / +0.31% +0.76% +1.22%] index_add_ strided 3 : Elapsed 0.013 ms (1.313 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.79% +1.26%] index_copy_ strided 3 : Elapsed 0.013 ms (1.271 ms / 100) 1.309 -> 1.316 ( +0.53%) [ +0.08% +0.31% +0.00% / +0.53% +0.69% +0.69%] index_add_ strided 7 : Elapsed 0.013 ms (1.310 ms / 100) 1.272 -> 1.271 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.71% +0.86%] index_copy_ strided 7 : Elapsed 0.013 ms (1.273 ms / 100) 1.311 -> 1.316 ( +0.38%) [ +0.31% +0.00% +0.31% / +0.38% +0.84% +0.69%] index_add_ perm : Elapsed 0.013 ms (1.315 ms / 100) 1.272 -> 1.271 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.79% +0.79%] index_copy_ perm : Elapsed 0.013 ms (1.273 ms / 100) 1.314 -> 1.312 ( -0.15%) [ +0.30% +0.00% +0.30% / -0.15% +0.30% +0.46%] index_add_ perm_sorted : Elapsed 0.013 ms (1.318 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.71% +0.79%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.272 ms / 100) 9.146 -> 9.139 ( -0.08%) [ +0.10% +0.21% +0.00% / -0.08% -0.03% +0.21%] index_select const : Elapsed 0.092 ms (9.155 ms / 100) 9.142 -> 9.130 ( -0.13%) [ +0.00% +0.13% +0.08% / +0.14% -0.07% -0.13%] index_select wrap : Elapsed 0.091 ms (9.142 ms / 100) 9.122 -> 9.137 ( +0.16%) [ +0.43% +0.33% +0.00% / +0.30% +0.54% +0.16%] index_select linear : Elapsed 0.092 ms (9.161 ms / 100) 9.143 -> 9.153 ( +0.11%) [ +0.13% +0.03% +0.00% / +0.19% +0.11% +0.15%] index_select reverse : Elapsed 0.092 ms (9.155 ms / 100) 9.142 -> 9.127 ( -0.16%) [ +0.00% +0.19% +0.22% / +0.07% -0.16% -0.01%] index_select skip64 : Elapsed 0.091 ms (9.142 ms / 100) 9.138 -> 9.142 ( +0.04%) [ +0.20% +0.00% +0.42% / +0.08% +0.04% +0.44%] index_select skip256 : Elapsed 0.092 ms (9.156 ms / 100) 9.142 -> 9.134 ( -0.09%) [ +0.00% +0.13% +0.15% / +0.18% +0.04% -0.09%] index_select spread : Elapsed 0.091 ms (9.142 ms / 100) 9.139 -> 9.135 ( -0.04%) [ +0.04% +0.18% +0.00% / +0.31% -0.04% +0.56%] index_select strided 3 : Elapsed 0.091 ms (9.143 ms / 100) 9.143 -> 9.133 ( -0.11%) [ +0.24% +0.00% +0.05% / +0.01% -0.03% -0.11%] index_select random : Elapsed 0.092 ms (9.165 ms / 100) 9.154 -> 9.137 ( -0.19%) [ +0.01% +0.07% +0.00% / -0.03% -0.16% -0.19%] index_select random_sorted : Elapsed 0.092 ms (9.155 ms / 100) B = [20, 5, 16, 40] (stride (40, 12800, 800, 1)) A = [20, 5, 16, 4] (stride (1, 20, 400, 100)) dim = 3 1.318 -> 1.320 ( +0.15%) [ +0.08% +0.00% +0.15% / +0.15% +0.61% +1.37%] index_add_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.55% +1.73%] index_copy_ linear : Elapsed 0.013 ms (1.272 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.30% +0.23% +0.00% / +0.15% +0.61% +1.52%] index_add_ reverse : Elapsed 0.013 ms (1.322 ms / 100) 1.272 -> 1.273 ( +0.08%) [ +0.08% +0.16% +0.00% / +0.08% +0.47% +1.42%] index_copy_ reverse : Elapsed 0.013 ms (1.273 ms / 100) 1.325 -> 1.328 ( +0.23%) [ +0.00% +0.38% +0.15% / +0.23% +0.23% +0.30%] index_add_ spread : Elapsed 0.013 ms (1.325 ms / 100) 1.279 -> 1.280 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.31% +0.55%] index_copy_ spread : Elapsed 0.013 ms (1.281 ms / 100) 1.316 -> 1.317 ( +0.08%) [ +0.23% +0.23% +0.00% / +0.08% +0.76% +0.46%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.79% +0.55%] index_copy_ strided 3 : Elapsed 0.013 ms (1.272 ms / 100) 1.322 -> 1.320 ( -0.15%) [ +0.08% +0.00% +0.08% / -0.15% +0.38% +0.76%] index_add_ strided 7 : Elapsed 0.013 ms (1.323 ms / 100) 1.276 -> 1.278 ( +0.16%) [ +0.16% +0.00% +0.08% / +0.16% +0.63% +0.86%] index_copy_ strided 7 : Elapsed 0.013 ms (1.278 ms / 100) 1.320 -> 1.321 ( +0.08%) [ +0.45% +0.00% +0.00% / +0.08% +0.83% +1.44%] index_add_ perm : Elapsed 0.013 ms (1.326 ms / 100) 1.276 -> 1.275 ( -0.08%) [ +0.16% +0.08% +0.00% / -0.08% +0.71% +0.86%] index_copy_ perm : Elapsed 0.013 ms (1.278 ms / 100) 1.322 -> 1.326 ( +0.30%) [ +0.30% +0.00% +0.00% / +0.30% +0.45% +0.61%] index_add_ perm_sorted : Elapsed 0.013 ms (1.326 ms / 100) 1.276 -> 1.277 ( +0.08%) [ +0.24% +0.00% +0.24% / +0.08% +1.18% +0.63%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.279 ms / 100) 8.790 -> 8.786 ( -0.05%) [ +0.14% +0.00% +0.05% / +0.18% +0.11% -0.05%] index_select const : Elapsed 0.088 ms (8.802 ms / 100) 8.808 -> 8.814 ( +0.07%) [ +0.00% +0.10% +0.11% / +0.09% +0.07% +0.12%] index_select wrap : Elapsed 0.088 ms (8.808 ms / 100) 8.800 -> 8.791 ( -0.10%) [ +0.16% +0.00% +0.19% / -0.10% +0.14% +0.05%] index_select linear : Elapsed 0.088 ms (8.814 ms / 100) 8.801 -> 8.798 ( -0.03%) [ +0.00% +0.06% +0.28% / +0.31% +0.11% -0.03%] index_select reverse : Elapsed 0.088 ms (8.801 ms / 100) 8.791 -> 8.793 ( +0.02%) [ +0.10% +0.14% +0.00% / +0.02% +0.08% +0.17%] index_select skip64 : Elapsed 0.088 ms (8.800 ms / 100) 8.797 -> 8.786 ( -0.13%) [ +0.09% +0.16% +0.00% / +0.00% +0.08% -0.13%] index_select skip256 : Elapsed 0.088 ms (8.805 ms / 100) 8.811 -> 8.818 ( +0.08%) [ +0.32% +0.00% +0.06% / +0.08% +0.10% +0.20%] index_select spread : Elapsed 0.088 ms (8.839 ms / 100) 8.810 -> 8.809 ( -0.01%) [ +0.03% +0.00% +0.07% / -0.01% +0.06% +0.08%] index_select strided 3 : Elapsed 0.088 ms (8.813 ms / 100) 8.809 -> 8.803 ( -0.07%) [ +0.23% +0.00% +0.09% / +0.03% -0.07% +0.00%] index_select random : Elapsed 0.088 ms (8.829 ms / 100) 8.811 -> 8.818 ( +0.08%) [ +0.12% +0.05% +0.00% / +0.14% +0.30% +0.08%] index_select random_sorted : Elapsed 0.088 ms (8.822 ms / 100) B = [20, 5, 16, 40] (stride (16, 320, 1, 1600)) A = [20, 5, 16, 4] (stride (4, 80, 400, 1)) dim = 3 1.405 -> 1.405 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.50% +0.64%] index_add_ linear : Elapsed 0.014 ms (1.406 ms / 100) 1.360 -> 1.360 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.51% +0.51%] index_copy_ linear : Elapsed 0.014 ms (1.360 ms / 100) 1.407 -> 1.407 ( +0.00%) [ +0.14% +0.00% +0.00% / +0.00% +0.57% +0.57%] index_add_ reverse : Elapsed 0.014 ms (1.409 ms / 100) 1.358 -> 1.358 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.52% +0.59%] index_copy_ reverse : Elapsed 0.014 ms (1.358 ms / 100) 1.406 -> 1.405 ( -0.07%) [ +0.07% +0.14% +0.00% / -0.07% +0.57% +0.78%] index_add_ spread : Elapsed 0.014 ms (1.407 ms / 100) 1.357 -> 1.356 ( -0.07%) [ +0.00% +0.07% +0.00% / -0.07% +0.66% +1.03%] index_copy_ spread : Elapsed 0.014 ms (1.357 ms / 100) 1.406 -> 1.405 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.50% +0.85%] index_add_ strided 3 : Elapsed 0.014 ms (1.406 ms / 100) 1.358 -> 1.358 ( +0.00%) [ +0.07% +0.15% +0.00% / +0.00% +0.81% +1.10%] index_copy_ strided 3 : Elapsed 0.014 ms (1.359 ms / 100) 1.402 -> 1.403 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.64% +0.71%] index_add_ strided 7 : Elapsed 0.014 ms (1.402 ms / 100) 1.354 -> 1.353 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.74% +0.74%] index_copy_ strided 7 : Elapsed 0.014 ms (1.355 ms / 100) 1.405 -> 1.407 ( +0.14%) [ +0.28% +0.07% +0.00% / +0.14% +0.85% +0.78%] index_add_ perm : Elapsed 0.014 ms (1.409 ms / 100) 1.355 -> 1.357 ( +0.15%) [ +0.22% +0.15% +0.00% / +0.15% +0.74% +0.66%] index_copy_ perm : Elapsed 0.014 ms (1.358 ms / 100) 1.406 -> 1.406 ( +0.00%) [ +0.00% +0.14% +0.07% / +0.00% +0.78% +0.78%] index_add_ perm_sorted : Elapsed 0.014 ms (1.406 ms / 100) 1.357 -> 1.359 ( +0.15%) [ +0.00% +0.07% +0.00% / +0.15% +0.52% +0.59%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.357 ms / 100) 9.155 -> 9.167 ( +0.13%) [ +0.07% +0.11% +0.00% / +0.13% +0.28% +0.15%] index_select const : Elapsed 0.092 ms (9.161 ms / 100) 9.153 -> 9.166 ( +0.14%) [ +0.21% +0.21% +0.00% / +0.23% +0.14% +0.51%] index_select wrap : Elapsed 0.092 ms (9.172 ms / 100) 9.168 -> 9.159 ( -0.10%) [ +0.03% +0.04% +0.00% / -0.10% +0.32% +0.26%] index_select linear : Elapsed 0.092 ms (9.171 ms / 100) 9.155 -> 9.152 ( -0.03%) [ +0.11% +0.00% +0.09% / +0.21% -0.03% +0.42%] index_select reverse : Elapsed 0.092 ms (9.165 ms / 100) 9.147 -> 9.165 ( +0.20%) [ +0.26% +0.00% +0.23% / +0.20% +0.28% +0.24%] index_select skip64 : Elapsed 0.092 ms (9.171 ms / 100) 9.163 -> 9.159 ( -0.04%) [ +0.00% +0.04% +0.03% / +0.08% -0.01% -0.04%] index_select skip256 : Elapsed 0.092 ms (9.163 ms / 100) 9.157 -> 9.159 ( +0.02%) [ +0.10% +0.13% +0.00% / +0.10% +0.02% +0.07%] index_select spread : Elapsed 0.092 ms (9.166 ms / 100) 9.152 -> 9.156 ( +0.04%) [ +0.01% +0.21% +0.00% / +0.04% +0.24% +0.26%] index_select strided 3 : Elapsed 0.092 ms (9.153 ms / 100) 9.152 -> 9.157 ( +0.05%) [ +0.03% +0.00% +0.02% / +0.05% +0.21% +0.21%] index_select random : Elapsed 0.092 ms (9.155 ms / 100) 9.165 -> 9.152 ( -0.14%) [ +0.07% +0.00% +0.05% / -0.14% +0.09% +0.02%] index_select random_sorted : Elapsed 0.092 ms (9.171 ms / 100) B = [20, 5, 16, 40] (stride (1, 320, 20, 1600)) A = [20, 5, 16, 4] (stride (20, 1, 400, 5)) dim = 3 1.282 -> 1.284 ( +0.16%) [ +0.00% +0.08% +0.16% / +0.16% +0.62% +1.64%] index_add_ linear : Elapsed 0.013 ms (1.282 ms / 100) 1.240 -> 1.243 ( +0.24%) [ +0.08% +0.00% +0.16% / +0.24% +0.56% +1.05%] index_copy_ linear : Elapsed 0.012 ms (1.241 ms / 100) 1.282 -> 1.284 ( +0.16%) [ +0.08% +0.31% +0.00% / +0.16% +0.31% +0.70%] index_add_ reverse : Elapsed 0.013 ms (1.283 ms / 100) 1.240 -> 1.242 ( +0.16%) [ +0.00% +0.24% +0.00% / +0.16% +0.40% +0.40%] index_copy_ reverse : Elapsed 0.012 ms (1.240 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.55% +0.63%] index_add_ spread : Elapsed 0.013 ms (1.280 ms / 100) 1.233 -> 1.237 ( +0.32%) [ +0.08% +0.24% +0.00% / +0.32% +0.73% +0.65%] index_copy_ spread : Elapsed 0.012 ms (1.234 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.55% +0.63%] index_add_ strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.235 -> 1.237 ( +0.16%) [ +0.16% +0.08% +0.00% / +0.16% +0.49% +0.49%] index_copy_ strided 3 : Elapsed 0.012 ms (1.237 ms / 100) 1.278 -> 1.279 ( +0.08%) [ +0.16% +0.16% +0.00% / +0.08% +0.39% +0.70%] index_add_ strided 7 : Elapsed 0.013 ms (1.280 ms / 100) 1.235 -> 1.235 ( +0.00%) [ +0.00% +0.08% +0.24% / +0.00% +0.65% +1.30%] index_copy_ strided 7 : Elapsed 0.012 ms (1.235 ms / 100) 1.279 -> 1.278 ( -0.08%) [ +0.00% +0.08% +0.08% / -0.08% +0.55% +0.94%] index_add_ perm : Elapsed 0.013 ms (1.279 ms / 100) 1.233 -> 1.235 ( +0.16%) [ +0.00% +0.08% +0.16% / +0.16% +0.73% +0.57%] index_copy_ perm : Elapsed 0.012 ms (1.233 ms / 100) 1.283 -> 1.287 ( +0.31%) [ +0.00% +0.16% +0.23% / +0.31% +0.39% +0.39%] index_add_ perm_sorted : Elapsed 0.013 ms (1.283 ms / 100) 1.239 -> 1.240 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.32% +0.40%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.240 ms / 100) 8.713 -> 8.714 ( +0.01%) [ +0.11% +0.17% +0.00% / +0.01% +0.14% +0.52%] index_select const : Elapsed 0.087 ms (8.723 ms / 100) 8.714 -> 8.717 ( +0.03%) [ +0.00% +0.10% +0.29% / +0.03% +0.18% +0.24%] index_select wrap : Elapsed 0.087 ms (8.714 ms / 100) 8.724 -> 8.723 ( -0.01%) [ +0.06% +0.07% +0.00% / +0.07% -0.01% +0.44%] index_select linear : Elapsed 0.087 ms (8.729 ms / 100) 8.721 -> 8.732 ( +0.13%) [ +0.13% +0.00% +0.10% / +0.13% +0.36% +0.28%] index_select reverse : Elapsed 0.087 ms (8.732 ms / 100) 8.719 -> 8.740 ( +0.24%) [ +0.07% +0.00% +0.14% / +0.29% +0.24% +0.46%] index_select skip64 : Elapsed 0.087 ms (8.725 ms / 100) 8.720 -> 8.730 ( +0.11%) [ +0.15% +0.07% +0.00% / +0.22% +0.11% +0.29%] index_select skip256 : Elapsed 0.087 ms (8.733 ms / 100) 8.729 -> 8.731 ( +0.02%) [ +0.00% +0.01% +0.27% / +0.02% +0.08% +0.37%] index_select spread : Elapsed 0.087 ms (8.729 ms / 100) 8.721 -> 8.720 ( -0.01%) [ +0.02% +0.01% +0.00% / +0.23% -0.01% +0.33%] index_select strided 3 : Elapsed 0.087 ms (8.723 ms / 100) 8.729 -> 8.728 ( -0.01%) [ +0.02% +0.00% +0.00% / +0.00% +0.11% -0.01%] index_select random : Elapsed 0.087 ms (8.731 ms / 100) 8.726 -> 8.723 ( -0.03%) [ +0.00% +0.08% +0.11% / -0.03% +0.32% +0.24%] index_select random_sorted : Elapsed 0.087 ms (8.726 ms / 100) B = [20, 5, 16, 40] (stride (1, 20, 100, 1600)) A = [20, 5, 16, 4] (stride (80, 1, 5, 1600)) dim = 3 1.312 -> 1.318 ( +0.46%) [ +0.08% +0.15% +0.00% / +0.46% +0.53% +0.46%] index_add_ linear : Elapsed 0.013 ms (1.313 ms / 100) 1.271 -> 1.273 ( +0.16%) [ +0.24% +0.00% +0.08% / +0.16% +0.63% +0.79%] index_copy_ linear : Elapsed 0.013 ms (1.274 ms / 100) 1.312 -> 1.315 ( +0.23%) [ +0.30% +0.00% +0.08% / +0.23% +0.53% +0.53%] index_add_ reverse : Elapsed 0.013 ms (1.316 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.08% +0.00% +0.08% / +0.00% +0.63% +0.71%] index_copy_ reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.313 -> 1.311 ( -0.15%) [ +0.00% +0.15% +0.91% / -0.15% +0.46% +0.38%] index_add_ spread : Elapsed 0.013 ms (1.313 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.16% +0.08% +0.00% / +0.00% +0.71% +0.78%] index_copy_ spread : Elapsed 0.013 ms (1.276 ms / 100) 1.312 -> 1.309 ( -0.23%) [ +0.00% +0.08% +0.15% / -0.23% +0.38% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.312 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.94% +0.79%] index_copy_ strided 3 : Elapsed 0.013 ms (1.271 ms / 100) 1.311 -> 1.311 ( +0.00%) [ +0.23% +0.00% +0.08% / +0.00% +0.61% +0.53%] index_add_ strided 7 : Elapsed 0.013 ms (1.314 ms / 100) 1.271 -> 1.271 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +1.02% +0.79%] index_copy_ strided 7 : Elapsed 0.013 ms (1.271 ms / 100) 1.313 -> 1.315 ( +0.15%) [ +0.00% +0.08% +0.30% / +0.15% +0.38% +0.30%] index_add_ perm : Elapsed 0.013 ms (1.313 ms / 100) 1.271 -> 1.272 ( +0.08%) [ +0.00% +0.00% +0.16% / +0.08% +0.79% +0.63%] index_copy_ perm : Elapsed 0.013 ms (1.271 ms / 100) 1.313 -> 1.313 ( +0.00%) [ +0.23% +0.00% +0.15% / +0.00% +0.38% +0.46%] index_add_ perm_sorted : Elapsed 0.013 ms (1.316 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.71% +0.79%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) 9.125 -> 9.129 ( +0.04%) [ +0.13% +0.00% +0.14% / +0.04% +0.35% +0.39%] index_select const : Elapsed 0.091 ms (9.137 ms / 100) 9.148 -> 9.150 ( +0.02%) [ +0.28% +0.00% +0.08% / +0.02% +0.38% +0.23%] index_select wrap : Elapsed 0.092 ms (9.174 ms / 100) 9.141 -> 9.146 ( +0.05%) [ +0.34% +0.14% +0.00% / +0.05% +0.18% +0.48%] index_select linear : Elapsed 0.092 ms (9.172 ms / 100) 9.141 -> 9.144 ( +0.03%) [ +0.16% +0.00% +0.09% / +0.09% +0.03% +0.04%] index_select reverse : Elapsed 0.092 ms (9.156 ms / 100) 9.113 -> 9.128 ( +0.16%) [ +0.30% +0.00% +0.24% / +0.16% +0.30% +0.40%] index_select skip64 : Elapsed 0.091 ms (9.140 ms / 100) 9.128 -> 9.129 ( +0.01%) [ +0.23% +0.00% +0.05% / +0.01% +0.16% +0.25%] index_select skip256 : Elapsed 0.091 ms (9.149 ms / 100) 9.152 -> 9.150 ( -0.02%) [ +0.02% +0.00% +0.26% / -0.02% +0.28% +0.09%] index_select spread : Elapsed 0.092 ms (9.154 ms / 100) 9.156 -> 9.156 ( +0.00%) [ +0.09% +0.00% +0.12% / +0.00% +0.02% +0.07%] index_select strided 3 : Elapsed 0.092 ms (9.164 ms / 100) 9.163 -> 9.149 ( -0.15%) [ +0.05% +0.00% +0.04% / -0.15% +0.00% -0.07%] index_select random : Elapsed 0.092 ms (9.168 ms / 100) 9.146 -> 9.138 ( -0.09%) [ +0.07% +0.09% +0.00% / -0.09% +0.13% +0.27%] index_select random_sorted : Elapsed 0.092 ms (9.152 ms / 100) out_shape = [40, 16, 4, 5] in_shape = [20, 16, 4, 5] idx_dim = 0 B = [40, 16, 4, 5] (stride (1, 200, 3200, 40)) A = [20, 16, 4, 5] (stride (320, 1, 16, 64)) dim = 0 2.442 -> 2.454 ( +0.49%) [ +0.00% +0.12% +0.04% / +0.49% +0.61% +0.82%] index_add_ linear : Elapsed 0.024 ms (2.442 ms / 100) 2.451 -> 2.463 ( +0.49%) [ +0.00% +0.08% +0.08% / +0.49% +0.65% +0.90%] index_copy_ linear : Elapsed 0.025 ms (2.451 ms / 100) 2.448 -> 2.455 ( +0.29%) [ +0.16% +0.00% +0.08% / +0.29% +0.49% +0.41%] index_add_ reverse : Elapsed 0.025 ms (2.452 ms / 100) 2.453 -> 2.465 ( +0.49%) [ +0.16% +0.08% +0.00% / +0.49% +0.57% +0.53%] index_copy_ reverse : Elapsed 0.025 ms (2.457 ms / 100) 2.460 -> 2.466 ( +0.24%) [ +0.00% +0.20% +0.16% / +0.53% +0.45% +0.24%] index_add_ spread : Elapsed 0.025 ms (2.460 ms / 100) 2.470 -> 2.482 ( +0.49%) [ +0.00% +0.12% +0.08% / +0.49% +0.53% +0.49%] index_copy_ spread : Elapsed 0.025 ms (2.470 ms / 100) 2.459 -> 2.467 ( +0.33%) [ +0.33% +0.00% +0.08% / +0.33% +0.49% +0.53%] index_add_ strided 3 : Elapsed 0.025 ms (2.467 ms / 100) 2.469 -> 2.480 ( +0.45%) [ +0.00% +0.00% +0.04% / +0.45% +0.49% +0.77%] index_copy_ strided 3 : Elapsed 0.025 ms (2.469 ms / 100) 2.461 -> 2.467 ( +0.24%) [ +0.00% +0.00% +0.04% / +0.37% +0.24% +0.37%] index_add_ strided 7 : Elapsed 0.025 ms (2.461 ms / 100) 2.470 -> 2.476 ( +0.24%) [ +0.20% +0.04% +0.00% / +0.45% +0.45% +0.24%] index_copy_ strided 7 : Elapsed 0.025 ms (2.475 ms / 100) 2.456 -> 2.469 ( +0.53%) [ +0.20% +0.08% +0.00% / +0.61% +0.65% +0.53%] index_add_ perm : Elapsed 0.025 ms (2.461 ms / 100) 2.471 -> 2.474 ( +0.12%) [ +0.00% +0.00% +0.00% / +0.12% +0.73% +0.40%] index_copy_ perm : Elapsed 0.025 ms (2.471 ms / 100) 2.461 -> 2.470 ( +0.37%) [ +0.16% +0.04% +0.00% / +0.37% +0.49% +0.41%] index_add_ perm_sorted : Elapsed 0.025 ms (2.465 ms / 100) 2.468 -> 2.479 ( +0.45%) [ +0.00% +0.08% +0.08% / +0.53% +0.45% +0.81%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.468 ms / 100) 4.500 -> 4.497 ( -0.07%) [ +0.00% +0.13% +0.02% / +0.00% +0.02% -0.07%] index_select const : Elapsed 0.045 ms (4.500 ms / 100) 4.502 -> 4.493 ( -0.20%) [ +0.07% +0.00% +0.04% / -0.20% +0.07% +0.27%] index_select wrap : Elapsed 0.045 ms (4.505 ms / 100) 4.504 -> 4.500 ( -0.09%) [ +0.02% +0.00% +0.02% / -0.09% -0.02% +0.13%] index_select linear : Elapsed 0.045 ms (4.505 ms / 100) 4.503 -> 4.498 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.11% -0.04% +0.00%] index_select reverse : Elapsed 0.045 ms (4.503 ms / 100) 4.497 -> 4.493 ( -0.09%) [ +0.00% +0.00% +0.11% / -0.09% +0.11% +0.04%] index_select skip64 : Elapsed 0.045 ms (4.497 ms / 100) 4.499 -> 4.502 ( +0.07%) [ +0.04% +0.04% +0.00% / +0.09% +0.07% +0.11%] index_select skip256 : Elapsed 0.045 ms (4.501 ms / 100) 4.500 -> 4.498 ( -0.04%) [ +0.11% +0.00% +0.07% / -0.04% +0.11% +0.11%] index_select spread : Elapsed 0.045 ms (4.505 ms / 100) 4.503 -> 4.495 ( -0.18%) [ +0.02% +0.07% +0.00% / -0.18% +0.07% +0.02%] index_select strided 3 : Elapsed 0.045 ms (4.504 ms / 100) 4.502 -> 4.501 ( -0.02%) [ +0.00% +0.11% +0.02% / -0.02% +0.00% +0.11%] index_select strided 5 : Elapsed 0.045 ms (4.502 ms / 100) 4.499 -> 4.500 ( +0.02%) [ +0.00% +0.09% +0.13% / +0.04% +0.02% +0.27%] index_select strided 7 : Elapsed 0.045 ms (4.499 ms / 100) 4.496 -> 4.493 ( -0.07%) [ +0.04% +0.11% +0.00% / -0.07% +0.22% +0.16%] index_select strided 8 : Elapsed 0.045 ms (4.498 ms / 100) 4.489 -> 4.495 ( +0.13%) [ +0.27% +0.00% +0.20% / +0.31% +0.31% +0.13%] index_select strided 16 : Elapsed 0.045 ms (4.501 ms / 100) 4.502 -> 4.497 ( -0.11%) [ +0.02% +0.00% +0.04% / -0.11% +0.04% +0.29%] index_select random : Elapsed 0.045 ms (4.503 ms / 100) 4.499 -> 4.503 ( +0.09%) [ +0.16% +0.09% +0.00% / +0.09% +0.20% +0.22%] index_select random_sorted : Elapsed 0.045 ms (4.506 ms / 100) B = [40, 16, 4, 5] (stride (1, 40, 3200, 640)) A = [20, 16, 4, 5] (stride (4, 400, 1, 80)) dim = 0 2.396 -> 2.407 ( +0.46%) [ +0.00% +0.29% +0.04% / +0.46% +0.83% +0.83%] index_add_ linear : Elapsed 0.024 ms (2.396 ms / 100) 2.402 -> 2.412 ( +0.42%) [ +0.08% +0.00% +0.00% / +0.42% +0.87% +0.79%] index_copy_ linear : Elapsed 0.024 ms (2.404 ms / 100) 2.394 -> 2.408 ( +0.58%) [ +0.25% +0.00% +0.00% / +0.58% +0.88% +0.88%] index_add_ reverse : Elapsed 0.024 ms (2.400 ms / 100) 2.395 -> 2.410 ( +0.63%) [ +0.00% +0.13% +0.13% / +0.63% +1.29% +1.25%] index_copy_ reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.407 -> 2.420 ( +0.54%) [ +0.21% +0.00% +0.08% / +0.54% +0.87% +1.16%] index_add_ spread : Elapsed 0.024 ms (2.412 ms / 100) 2.417 -> 2.428 ( +0.46%) [ +0.00% +0.08% +0.00% / +0.46% +1.03% +0.95%] index_copy_ spread : Elapsed 0.024 ms (2.417 ms / 100) 2.410 -> 2.423 ( +0.54%) [ +0.00% +0.17% +0.17% / +0.54% +0.66% +0.62%] index_add_ strided 3 : Elapsed 0.024 ms (2.410 ms / 100) 2.424 -> 2.434 ( +0.41%) [ +0.08% +0.04% +0.00% / +0.41% +0.62% +0.66%] index_copy_ strided 3 : Elapsed 0.024 ms (2.426 ms / 100) 2.414 -> 2.424 ( +0.41%) [ +0.08% +0.00% +0.00% / +0.41% +0.54% +0.70%] index_add_ strided 7 : Elapsed 0.024 ms (2.416 ms / 100) 2.421 -> 2.434 ( +0.54%) [ +0.04% +0.17% +0.00% / +0.54% +0.66% +0.99%] index_copy_ strided 7 : Elapsed 0.024 ms (2.422 ms / 100) 2.416 -> 2.421 ( +0.21%) [ +0.12% +0.04% +0.00% / +0.54% +0.21% +0.25%] index_add_ perm : Elapsed 0.024 ms (2.419 ms / 100) 2.422 -> 2.428 ( +0.25%) [ +0.17% +0.00% +0.12% / +0.62% +0.25% +0.41%] index_copy_ perm : Elapsed 0.024 ms (2.426 ms / 100) 2.413 -> 2.418 ( +0.21%) [ +0.00% +0.04% +0.04% / +0.50% +0.25% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.413 ms / 100) 2.424 -> 2.433 ( +0.37%) [ +0.00% +0.12% +0.08% / +0.54% +0.41% +0.37%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) 4.421 -> 4.421 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.18% +0.11% +0.00%] index_select const : Elapsed 0.044 ms (4.421 ms / 100) 4.431 -> 4.429 ( -0.05%) [ +0.16% +0.09% +0.00% / +0.09% -0.05% +0.09%] index_select wrap : Elapsed 0.044 ms (4.438 ms / 100) 4.436 -> 4.432 ( -0.09%) [ +0.00% +0.02% +0.02% / -0.07% +0.00% -0.09%] index_select linear : Elapsed 0.044 ms (4.436 ms / 100) 4.438 -> 4.427 ( -0.25%) [ +0.00% +0.09% +0.00% / +0.07% -0.16% -0.25%] index_select reverse : Elapsed 0.044 ms (4.438 ms / 100) 4.423 -> 4.420 ( -0.07%) [ +0.16% +0.00% +0.16% / +0.18% -0.07% +0.05%] index_select skip64 : Elapsed 0.044 ms (4.430 ms / 100) 4.420 -> 4.420 ( +0.00%) [ +0.00% +0.02% +0.23% / +0.00% +0.05% +0.14%] index_select skip256 : Elapsed 0.044 ms (4.420 ms / 100) 4.428 -> 4.432 ( +0.09%) [ +0.11% +0.00% +0.11% / +0.09% +0.11% +0.36%] index_select spread : Elapsed 0.044 ms (4.433 ms / 100) 4.433 -> 4.436 ( +0.07%) [ +0.09% +0.00% +0.14% / +0.09% +0.11% +0.07%] index_select strided 3 : Elapsed 0.044 ms (4.437 ms / 100) 4.423 -> 4.424 ( +0.02%) [ +0.00% +0.02% +0.16% / +0.02% +0.18% +0.11%] index_select strided 5 : Elapsed 0.044 ms (4.423 ms / 100) 4.434 -> 4.431 ( -0.07%) [ +0.00% +0.05% +0.00% / +0.02% -0.07% +0.09%] index_select strided 7 : Elapsed 0.044 ms (4.434 ms / 100) 4.427 -> 4.424 ( -0.07%) [ +0.00% +0.18% +0.11% / +0.07% -0.07% -0.02%] index_select strided 8 : Elapsed 0.044 ms (4.427 ms / 100) 4.424 -> 4.422 ( -0.05%) [ +0.02% +0.14% +0.00% / -0.05% +0.16% +0.16%] index_select strided 16 : Elapsed 0.044 ms (4.425 ms / 100) 4.428 -> 4.428 ( +0.00%) [ +0.09% +0.34% +0.00% / +0.27% +0.20% +0.00%] index_select random : Elapsed 0.044 ms (4.432 ms / 100) 4.435 -> 4.430 ( -0.11%) [ +0.00% +0.14% +0.05% / -0.02% -0.11% +0.05%] index_select random_sorted : Elapsed 0.044 ms (4.435 ms / 100) B = [40, 16, 4, 5] (stride (4, 160, 1, 2560)) A = [20, 16, 4, 5] (stride (1, 400, 20, 80)) dim = 0 2.451 -> 2.462 ( +0.45%) [ +0.04% +0.20% +0.00% / +0.45% +0.65% +0.57%] index_add_ linear : Elapsed 0.025 ms (2.452 ms / 100) 2.444 -> 2.452 ( +0.33%) [ +0.00% +0.37% +0.08% / +0.33% +0.70% +0.94%] index_copy_ linear : Elapsed 0.024 ms (2.444 ms / 100) 2.451 -> 2.462 ( +0.45%) [ +0.12% +0.16% +0.00% / +0.45% +0.49% +0.49%] index_add_ reverse : Elapsed 0.025 ms (2.454 ms / 100) 2.446 -> 2.455 ( +0.37%) [ +0.00% +0.16% +0.08% / +0.37% +0.65% +0.57%] index_copy_ reverse : Elapsed 0.024 ms (2.446 ms / 100) 2.473 -> 2.481 ( +0.32%) [ +0.00% +0.08% +0.16% / +0.40% +0.32% +0.53%] index_add_ spread : Elapsed 0.025 ms (2.473 ms / 100) 2.474 -> 2.486 ( +0.49%) [ +0.12% +0.20% +0.00% / +0.49% +0.65% +0.69%] index_copy_ spread : Elapsed 0.025 ms (2.477 ms / 100) 2.467 -> 2.478 ( +0.45%) [ +0.04% +0.00% +0.12% / +0.49% +0.45% +0.49%] index_add_ strided 3 : Elapsed 0.025 ms (2.468 ms / 100) 2.470 -> 2.479 ( +0.36%) [ +0.04% +0.00% +0.08% / +0.49% +0.36% +0.73%] index_copy_ strided 3 : Elapsed 0.025 ms (2.471 ms / 100) 2.466 -> 2.476 ( +0.41%) [ +0.04% +0.00% +0.36% / +0.41% +0.57% +0.57%] index_add_ strided 7 : Elapsed 0.025 ms (2.467 ms / 100) 2.469 -> 2.484 ( +0.61%) [ +0.08% +0.12% +0.00% / +0.61% +0.61% +0.65%] index_copy_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.463 -> 2.476 ( +0.53%) [ +0.08% +0.32% +0.00% / +0.53% +0.69% +0.57%] index_add_ perm : Elapsed 0.025 ms (2.465 ms / 100) 2.465 -> 2.472 ( +0.28%) [ +0.00% +0.00% +0.20% / +0.28% +0.81% +0.77%] index_copy_ perm : Elapsed 0.025 ms (2.465 ms / 100) 2.467 -> 2.477 ( +0.41%) [ +0.04% +0.00% +0.08% / +0.45% +0.45% +0.41%] index_add_ perm_sorted : Elapsed 0.025 ms (2.468 ms / 100) 2.466 -> 2.475 ( +0.36%) [ +0.04% +0.04% +0.00% / +0.36% +0.53% +0.65%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.467 ms / 100) 4.492 -> 4.495 ( +0.07%) [ +0.13% +0.22% +0.00% / +0.24% +0.07% +0.20%] index_select const : Elapsed 0.045 ms (4.498 ms / 100) 4.498 -> 4.496 ( -0.04%) [ +0.20% +0.00% +0.20% / +0.20% -0.04% +0.31%] index_select wrap : Elapsed 0.045 ms (4.507 ms / 100) 4.498 -> 4.503 ( +0.11%) [ +0.04% +0.00% +0.00% / +0.11% +0.20% +0.22%] index_select linear : Elapsed 0.045 ms (4.500 ms / 100) 4.501 -> 4.500 ( -0.02%) [ +0.00% +0.02% +0.00% / +0.09% +0.20% -0.02%] index_select reverse : Elapsed 0.045 ms (4.501 ms / 100) 4.490 -> 4.495 ( +0.11%) [ +0.07% +0.16% +0.00% / +0.20% +0.16% +0.11%] index_select skip64 : Elapsed 0.045 ms (4.493 ms / 100) 4.493 -> 4.497 ( +0.09%) [ +0.02% +0.18% +0.00% / +0.09% +0.22% +0.11%] index_select skip256 : Elapsed 0.045 ms (4.494 ms / 100) 4.501 -> 4.502 ( +0.02%) [ +0.00% +0.04% +0.09% / +0.18% +0.02% +0.02%] index_select spread : Elapsed 0.045 ms (4.501 ms / 100) 4.498 -> 4.503 ( +0.11%) [ +0.04% +0.00% +0.00% / +0.13% +0.11% +0.13%] index_select strided 3 : Elapsed 0.045 ms (4.500 ms / 100) 4.494 -> 4.498 ( +0.09%) [ +0.24% +0.24% +0.00% / +0.09% +0.33% +0.36%] index_select strided 5 : Elapsed 0.045 ms (4.505 ms / 100) 4.490 -> 4.492 ( +0.04%) [ +0.20% +0.07% +0.00% / +0.04% +0.40% +0.38%] index_select strided 7 : Elapsed 0.045 ms (4.499 ms / 100) 4.496 -> 4.498 ( +0.04%) [ +0.02% +0.00% +0.09% / +0.04% +0.42% +0.18%] index_select strided 8 : Elapsed 0.045 ms (4.497 ms / 100) 4.496 -> 4.496 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.16% +0.47%] index_select strided 16 : Elapsed 0.045 ms (4.496 ms / 100) 4.495 -> 4.499 ( +0.09%) [ +0.11% +0.00% +0.16% / +0.09% +0.11% +0.20%] index_select random : Elapsed 0.045 ms (4.500 ms / 100) 4.500 -> 4.506 ( +0.13%) [ +0.07% +0.04% +0.00% / +0.13% +0.22% +0.24%] index_select random_sorted : Elapsed 0.045 ms (4.503 ms / 100) out_shape = [20, 40, 4, 5] in_shape = [20, 16, 4, 5] idx_dim = 1 B = [20, 40, 4, 5] (stride (800, 20, 1, 4)) A = [20, 16, 4, 5] (stride (16, 1, 320, 1280)) dim = 1 4.357 -> 4.357 ( +0.00%) [ +0.11% +0.14% +0.00% / +0.00% +0.44% +0.62%] index_add_ linear : Elapsed 0.044 ms (4.362 ms / 100) 4.191 -> 4.195 ( +0.10%) [ +0.24% +0.19% +0.00% / +0.10% +0.64% +0.72%] index_copy_ linear : Elapsed 0.042 ms (4.201 ms / 100) 4.354 -> 4.363 ( +0.21%) [ +0.00% +0.02% +0.02% / +0.21% +0.53% +0.60%] index_add_ reverse : Elapsed 0.044 ms (4.354 ms / 100) 4.183 -> 4.194 ( +0.26%) [ +0.14% +0.12% +0.00% / +0.26% +0.72% +0.53%] index_copy_ reverse : Elapsed 0.042 ms (4.189 ms / 100) 4.358 -> 4.361 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.57% +0.57%] index_add_ spread : Elapsed 0.044 ms (4.361 ms / 100) 4.200 -> 4.201 ( +0.02%) [ +0.00% +0.07% +0.02% / +0.02% +0.57% +0.95%] index_copy_ spread : Elapsed 0.042 ms (4.200 ms / 100) 4.374 -> 4.375 ( +0.02%) [ +0.00% +0.02% +0.18% / +0.02% +0.59% +0.48%] index_add_ strided 3 : Elapsed 0.044 ms (4.374 ms / 100) 4.220 -> 4.220 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.64% +0.45%] index_copy_ strided 3 : Elapsed 0.042 ms (4.220 ms / 100) 4.356 -> 4.360 ( +0.09%) [ +0.09% +0.00% +0.02% / +0.09% +0.41% +0.41%] index_add_ strided 7 : Elapsed 0.044 ms (4.360 ms / 100) 4.184 -> 4.183 ( -0.02%) [ +0.00% +0.12% +0.19% / -0.02% +0.57% +0.62%] index_copy_ strided 7 : Elapsed 0.042 ms (4.184 ms / 100) 4.364 -> 4.373 ( +0.21%) [ +0.07% +0.00% +0.00% / +0.21% +0.30% +0.34%] index_add_ perm : Elapsed 0.044 ms (4.367 ms / 100) 4.202 -> 4.203 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.33% +0.31%] index_copy_ perm : Elapsed 0.042 ms (4.202 ms / 100) 4.365 -> 4.364 ( -0.02%) [ +0.02% +0.05% +0.00% / -0.02% +0.23% +0.37%] index_add_ perm_sorted : Elapsed 0.044 ms (4.366 ms / 100) 4.198 -> 4.204 ( +0.14%) [ +0.12% +0.05% +0.00% / +0.14% +0.45% +0.55%] index_copy_ perm_sorted : Elapsed 0.042 ms (4.203 ms / 100) 5.552 -> 5.553 ( +0.02%) [ +0.14% +0.25% +0.00% / +0.02% +0.25% +0.38%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.557 -> 5.558 ( +0.02%) [ +0.00% +0.11% +0.00% / +0.02% +0.11% +0.32%] index_select wrap : Elapsed 0.056 ms (5.557 ms / 100) 5.563 -> 5.565 ( +0.04%) [ +0.02% +0.00% +0.02% / +0.09% +0.13% +0.04%] index_select linear : Elapsed 0.056 ms (5.564 ms / 100) 5.562 -> 5.564 ( +0.04%) [ +0.13% +0.00% +0.07% / +0.04% +0.22% +0.09%] index_select reverse : Elapsed 0.056 ms (5.569 ms / 100) 5.559 -> 5.556 ( -0.05%) [ +0.09% +0.18% +0.00% / +0.16% -0.05% +0.07%] index_select skip64 : Elapsed 0.056 ms (5.564 ms / 100) 5.562 -> 5.556 ( -0.11%) [ +0.00% +0.09% +0.11% / -0.11% -0.09% +0.04%] index_select skip256 : Elapsed 0.056 ms (5.562 ms / 100) 5.557 -> 5.560 ( +0.05%) [ +0.00% +0.14% +0.13% / +0.22% +0.20% +0.05%] index_select spread : Elapsed 0.056 ms (5.557 ms / 100) 5.562 -> 5.561 ( -0.02%) [ +0.00% +0.04% +0.04% / +0.02% -0.02% +0.07%] index_select strided 3 : Elapsed 0.056 ms (5.562 ms / 100) 5.559 -> 5.563 ( +0.07%) [ +0.05% +0.04% +0.00% / +0.11% +0.13% +0.07%] index_select strided 5 : Elapsed 0.056 ms (5.562 ms / 100) 5.557 -> 5.564 ( +0.13%) [ +0.04% +0.00% +0.13% / +0.13% +0.14% +0.22%] index_select strided 7 : Elapsed 0.056 ms (5.559 ms / 100) 5.566 -> 5.563 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.02% +0.18%] index_select strided 8 : Elapsed 0.056 ms (5.566 ms / 100) 5.559 -> 5.560 ( +0.02%) [ +0.02% +0.00% +0.13% / +0.07% +0.02% +0.16%] index_select random : Elapsed 0.056 ms (5.560 ms / 100) 5.567 -> 5.562 ( -0.09%) [ +0.04% +0.04% +0.00% / -0.09% +0.00% +0.02%] index_select random_sorted : Elapsed 0.056 ms (5.569 ms / 100) B = [20, 40, 4, 5] (stride (1, 100, 4000, 20)) A = [20, 16, 4, 5] (stride (80, 5, 1600, 1)) dim = 1 4.045 -> 4.046 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.87% +0.94%] index_add_ linear : Elapsed 0.040 ms (4.046 ms / 100) 3.920 -> 3.917 ( -0.08%) [ +0.05% +0.00% +0.03% / -0.08% +0.87% +0.97%] index_copy_ linear : Elapsed 0.039 ms (3.922 ms / 100) 4.048 -> 4.048 ( +0.00%) [ +0.05% +0.10% +0.00% / +0.00% +0.79% +0.89%] index_add_ reverse : Elapsed 0.040 ms (4.050 ms / 100) 3.918 -> 3.922 ( +0.10%) [ +0.13% +0.08% +0.00% / +0.10% +0.82% +0.94%] index_copy_ reverse : Elapsed 0.039 ms (3.923 ms / 100) 4.038 -> 4.042 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.77% +0.79%] index_add_ spread : Elapsed 0.040 ms (4.042 ms / 100) 3.919 -> 3.917 ( -0.05%) [ +0.08% +0.00% +0.05% / -0.05% +0.54% +0.56%] index_copy_ spread : Elapsed 0.039 ms (3.922 ms / 100) 4.059 -> 4.057 ( -0.05%) [ +0.17% +0.00% +0.17% / -0.05% +0.76% +0.96%] index_add_ strided 3 : Elapsed 0.041 ms (4.066 ms / 100) 3.923 -> 3.923 ( +0.00%) [ +0.05% +0.00% +0.05% / +0.00% +0.74% +0.87%] index_copy_ strided 3 : Elapsed 0.039 ms (3.925 ms / 100) 4.049 -> 4.051 ( +0.05%) [ +0.12% +0.02% +0.00% / +0.05% +0.89% +0.79%] index_add_ strided 7 : Elapsed 0.041 ms (4.054 ms / 100) 3.920 -> 3.921 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.03% +0.82% +0.79%] index_copy_ strided 7 : Elapsed 0.039 ms (3.920 ms / 100) 4.045 -> 4.050 ( +0.12%) [ +0.12% +0.10% +0.00% / +0.12% +0.94% +0.74%] index_add_ perm : Elapsed 0.040 ms (4.050 ms / 100) 3.920 -> 3.920 ( +0.00%) [ +0.03% +0.00% +0.05% / +0.00% +0.87% +0.84%] index_copy_ perm : Elapsed 0.039 ms (3.921 ms / 100) 4.056 -> 4.057 ( +0.02%) [ +0.00% +0.20% +0.20% / +0.02% +0.94% +0.96%] index_add_ perm_sorted : Elapsed 0.041 ms (4.056 ms / 100) 3.920 -> 3.918 ( -0.05%) [ +0.00% +0.05% +0.10% / -0.05% +0.82% +0.77%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.920 ms / 100) 5.561 -> 5.560 ( -0.02%) [ +0.00% +0.20% +0.07% / +0.11% -0.02% +0.18%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.578 -> 5.568 ( -0.18%) [ +0.00% +0.02% +0.00% / +0.04% -0.13% -0.18%] index_select wrap : Elapsed 0.056 ms (5.578 ms / 100) 5.571 -> 5.573 ( +0.04%) [ +0.09% +0.18% +0.00% / +0.11% +0.07% +0.04%] index_select linear : Elapsed 0.056 ms (5.576 ms / 100) 5.574 -> 5.573 ( -0.02%) [ +0.20% +0.02% +0.00% / +0.04% +0.04% -0.02%] index_select reverse : Elapsed 0.056 ms (5.585 ms / 100) 5.559 -> 5.563 ( +0.07%) [ +0.14% +0.00% +0.11% / +0.07% +0.09% +0.11%] index_select skip64 : Elapsed 0.056 ms (5.567 ms / 100) 5.562 -> 5.565 ( +0.05%) [ +0.00% +0.07% +0.02% / +0.05% +0.14% +0.27%] index_select skip256 : Elapsed 0.056 ms (5.562 ms / 100) 5.572 -> 5.570 ( -0.04%) [ +0.04% +0.00% +0.05% / +0.00% -0.04% +0.07%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.572 -> 5.567 ( -0.09%) [ +0.00% +0.02% +0.02% / +0.02% -0.02% -0.09%] index_select strided 3 : Elapsed 0.056 ms (5.572 ms / 100) 5.574 -> 5.568 ( -0.11%) [ +0.27% +0.00% +0.05% / -0.11% -0.02% +0.04%] index_select strided 5 : Elapsed 0.056 ms (5.589 ms / 100) 5.569 -> 5.577 ( +0.14%) [ +0.09% +0.07% +0.00% / +0.14% +0.14% +0.32%] index_select strided 7 : Elapsed 0.056 ms (5.574 ms / 100) 5.556 -> 5.559 ( +0.05%) [ +0.20% +0.11% +0.00% / +0.05% +0.14% +0.31%] index_select strided 8 : Elapsed 0.056 ms (5.567 ms / 100) 5.571 -> 5.571 ( +0.00%) [ +0.00% +0.02% +0.04% / +0.04% +0.00% +0.07%] index_select random : Elapsed 0.056 ms (5.571 ms / 100) 5.572 -> 5.569 ( -0.05%) [ +0.07% +0.00% +0.09% / -0.04% +0.00% -0.05%] index_select random_sorted : Elapsed 0.056 ms (5.576 ms / 100) B = [20, 40, 4, 5] (stride (40, 1, 4000, 800)) A = [20, 16, 4, 5] (stride (5, 100, 1600, 1)) dim = 1 3.673 -> 3.675 ( +0.05%) [ +0.11% +0.00% +0.08% / +0.05% +0.63% +0.60%] index_add_ linear : Elapsed 0.037 ms (3.677 ms / 100) 3.546 -> 3.551 ( +0.14%) [ +0.20% +0.00% +0.06% / +0.14% +0.71% +0.68%] index_copy_ linear : Elapsed 0.036 ms (3.553 ms / 100) 3.681 -> 3.686 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.52% +0.54%] index_add_ reverse : Elapsed 0.037 ms (3.681 ms / 100) 3.552 -> 3.556 ( +0.11%) [ +0.03% +0.03% +0.00% / +0.11% +0.42% +0.53%] index_copy_ reverse : Elapsed 0.036 ms (3.553 ms / 100) 3.684 -> 3.689 ( +0.14%) [ +0.00% +0.16% +0.16% / +0.14% +0.62% +0.46%] index_add_ spread : Elapsed 0.037 ms (3.684 ms / 100) 3.569 -> 3.570 ( +0.03%) [ +0.00% +0.08% +0.00% / +0.03% +0.45% +0.39%] index_copy_ spread : Elapsed 0.036 ms (3.569 ms / 100) 3.684 -> 3.678 ( -0.16%) [ +0.03% +0.00% +0.00% / -0.16% +0.35% +0.49%] index_add_ strided 3 : Elapsed 0.037 ms (3.685 ms / 100) 3.553 -> 3.548 ( -0.14%) [ +0.03% +0.03% +0.00% / -0.14% +0.34% +0.42%] index_copy_ strided 3 : Elapsed 0.036 ms (3.554 ms / 100) 3.680 -> 3.680 ( +0.00%) [ +0.03% +0.16% +0.00% / +0.00% +0.57% +0.71%] index_add_ strided 7 : Elapsed 0.037 ms (3.681 ms / 100) 3.551 -> 3.550 ( -0.03%) [ +0.03% +0.08% +0.00% / -0.03% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.036 ms (3.552 ms / 100) 3.674 -> 3.676 ( +0.05%) [ +0.00% +0.03% +0.03% / +0.05% +0.60% +0.63%] index_add_ perm : Elapsed 0.037 ms (3.674 ms / 100) 3.551 -> 3.551 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.59%] index_copy_ perm : Elapsed 0.036 ms (3.551 ms / 100) 3.675 -> 3.675 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.44% +0.46%] index_add_ perm_sorted : Elapsed 0.037 ms (3.676 ms / 100) 3.551 -> 3.552 ( +0.03%) [ +0.08% +0.06% +0.00% / +0.03% +0.37% +0.39%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.554 ms / 100) 5.467 -> 5.471 ( +0.07%) [ +0.11% +0.05% +0.00% / +0.07% +0.16% +0.26%] index_select const : Elapsed 0.055 ms (5.473 ms / 100) 5.480 -> 5.482 ( +0.04%) [ +0.00% +0.07% +0.04% / +0.04% +0.09% +0.26%] index_select wrap : Elapsed 0.055 ms (5.480 ms / 100) 5.476 -> 5.483 ( +0.13%) [ +0.15% +0.00% +0.05% / +0.13% +0.27% +0.20%] index_select linear : Elapsed 0.055 ms (5.484 ms / 100) 5.478 -> 5.485 ( +0.13%) [ +0.04% +0.04% +0.00% / +0.26% +0.22% +0.13%] index_select reverse : Elapsed 0.055 ms (5.480 ms / 100) 5.471 -> 5.471 ( +0.00%) [ +0.11% +0.00% +0.16% / +0.07% +0.00% +0.04%] index_select skip64 : Elapsed 0.055 ms (5.477 ms / 100) 5.480 -> 5.475 ( -0.09%) [ +0.04% +0.07% +0.00% / +0.02% -0.09% -0.09%] index_select skip256 : Elapsed 0.055 ms (5.482 ms / 100) 5.476 -> 5.486 ( +0.18%) [ +0.00% +0.16% +0.11% / +0.18% +0.20% +0.27%] index_select spread : Elapsed 0.055 ms (5.476 ms / 100) 5.478 -> 5.482 ( +0.07%) [ +0.00% +0.16% +0.04% / +0.13% +0.13% +0.07%] index_select strided 3 : Elapsed 0.055 ms (5.478 ms / 100) 5.473 -> 5.481 ( +0.15%) [ +0.16% +0.09% +0.00% / +0.16% +0.33% +0.15%] index_select strided 5 : Elapsed 0.055 ms (5.482 ms / 100) 5.479 -> 5.479 ( +0.00%) [ +0.05% +0.11% +0.00% / +0.00% +0.26% +0.38%] index_select strided 7 : Elapsed 0.055 ms (5.482 ms / 100) 5.468 -> 5.467 ( -0.02%) [ +0.15% +0.00% +0.02% / -0.02% +0.11% +0.24%] index_select strided 8 : Elapsed 0.055 ms (5.476 ms / 100) 5.476 -> 5.484 ( +0.15%) [ +0.15% +0.15% +0.00% / +0.15% +0.22% +0.29%] index_select random : Elapsed 0.055 ms (5.484 ms / 100) 5.476 -> 5.478 ( +0.04%) [ +0.00% +0.05% +0.13% / +0.04% +0.18% +0.24%] index_select random_sorted : Elapsed 0.055 ms (5.476 ms / 100) B = [20, 40, 4, 5] (stride (40, 1, 4000, 800)) A = [20, 16, 4, 5] (stride (64, 1, 16, 1280)) dim = 1 3.940 -> 3.950 ( +0.25%) [ +0.05% +0.00% +0.18% / +0.25% +0.89% +1.09%] index_add_ linear : Elapsed 0.039 ms (3.942 ms / 100) 3.780 -> 3.793 ( +0.34%) [ +0.00% +0.00% +0.29% / +0.34% +0.90% +0.93%] index_copy_ linear : Elapsed 0.038 ms (3.780 ms / 100) 3.912 -> 3.919 ( +0.18%) [ +0.36% +0.00% +0.15% / +0.18% +1.00% +1.30%] index_add_ reverse : Elapsed 0.039 ms (3.926 ms / 100) 3.764 -> 3.772 ( +0.21%) [ +0.40% +0.00% +0.27% / +0.21% +1.14% +1.12%] index_copy_ reverse : Elapsed 0.038 ms (3.779 ms / 100) 3.918 -> 3.924 ( +0.15%) [ +0.10% +0.43% +0.00% / +0.15% +0.66% +0.56%] index_add_ spread : Elapsed 0.039 ms (3.922 ms / 100) 3.776 -> 3.784 ( +0.21%) [ +0.13% +0.42% +0.00% / +0.21% +0.66% +0.61%] index_copy_ spread : Elapsed 0.038 ms (3.781 ms / 100) 3.926 -> 3.934 ( +0.20%) [ +0.13% +0.00% +0.18% / +0.20% +0.64% +0.79%] index_add_ strided 3 : Elapsed 0.039 ms (3.931 ms / 100) 3.775 -> 3.785 ( +0.26%) [ +0.21% +0.00% +0.16% / +0.26% +0.64% +0.79%] index_copy_ strided 3 : Elapsed 0.038 ms (3.783 ms / 100) 3.920 -> 3.925 ( +0.13%) [ +0.13% +0.00% +0.33% / +0.13% +0.56% +0.41%] index_add_ strided 7 : Elapsed 0.039 ms (3.925 ms / 100) 3.767 -> 3.767 ( +0.00%) [ +0.03% +0.00% +0.29% / +0.00% +0.88% +0.72%] index_copy_ strided 7 : Elapsed 0.038 ms (3.768 ms / 100) 3.929 -> 3.940 ( +0.28%) [ +0.05% +0.00% +0.48% / +0.28% +1.22% +1.25%] index_add_ perm : Elapsed 0.039 ms (3.931 ms / 100) 3.772 -> 3.786 ( +0.37%) [ +0.00% +0.00% +0.53% / +0.37% +1.09% +1.22%] index_copy_ perm : Elapsed 0.038 ms (3.772 ms / 100) 3.922 -> 3.933 ( +0.28%) [ +0.03% +0.28% +0.00% / +0.28% +0.79% +0.92%] index_add_ perm_sorted : Elapsed 0.039 ms (3.923 ms / 100) 3.772 -> 3.782 ( +0.27%) [ +0.05% +0.24% +0.00% / +0.27% +0.82% +0.90%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.774 ms / 100) 5.478 -> 5.479 ( +0.02%) [ +0.09% +0.00% +0.20% / +0.13% +0.05% +0.02%] index_select const : Elapsed 0.055 ms (5.483 ms / 100) 5.480 -> 5.481 ( +0.02%) [ +0.16% +0.05% +0.00% / +0.13% +0.13% +0.02%] index_select wrap : Elapsed 0.055 ms (5.489 ms / 100) 5.480 -> 5.479 ( -0.02%) [ +0.04% +0.00% +0.15% / +0.11% -0.02% +0.07%] index_select linear : Elapsed 0.055 ms (5.482 ms / 100) 5.478 -> 5.480 ( +0.04%) [ +0.13% +0.13% +0.00% / +0.05% +0.04% +0.15%] index_select reverse : Elapsed 0.055 ms (5.485 ms / 100) 5.483 -> 5.473 ( -0.18%) [ +0.00% +0.04% +0.04% / -0.18% +0.02% +0.11%] index_select skip64 : Elapsed 0.055 ms (5.483 ms / 100) 5.474 -> 5.475 ( +0.02%) [ +0.00% +0.13% +0.04% / +0.02% +0.16% +0.15%] index_select skip256 : Elapsed 0.055 ms (5.474 ms / 100) 5.480 -> 5.484 ( +0.07%) [ +0.05% +0.00% +0.07% / +0.07% +0.16% +0.07%] index_select spread : Elapsed 0.055 ms (5.483 ms / 100) 5.484 -> 5.479 ( -0.09%) [ +0.00% +0.13% +0.04% / +0.05% -0.09% -0.05%] index_select strided 3 : Elapsed 0.055 ms (5.484 ms / 100) 5.479 -> 5.479 ( +0.00%) [ +0.22% +0.11% +0.00% / +0.11% +0.00% +0.00%] index_select strided 5 : Elapsed 0.055 ms (5.491 ms / 100) 5.482 -> 5.485 ( +0.05%) [ +0.00% +0.11% +0.07% / +0.07% +0.05% +0.09%] index_select strided 7 : Elapsed 0.055 ms (5.482 ms / 100) 5.478 -> 5.478 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.00% +0.15% +0.13%] index_select strided 8 : Elapsed 0.055 ms (5.480 ms / 100) 5.480 -> 5.479 ( -0.02%) [ +0.07% +0.11% +0.00% / +0.15% -0.02% +0.18%] index_select random : Elapsed 0.055 ms (5.484 ms / 100) 5.481 -> 5.483 ( +0.04%) [ +0.00% +0.09% +0.02% / +0.07% +0.04% +0.11%] index_select random_sorted : Elapsed 0.055 ms (5.481 ms / 100) B = [20, 40, 4, 5] (stride (1, 20, 800, 3200)) A = [20, 16, 4, 5] (stride (1, 400, 20, 80)) dim = 1 4.162 -> 4.162 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.60% +0.62%] index_add_ linear : Elapsed 0.042 ms (4.164 ms / 100) 4.011 -> 4.012 ( +0.02%) [ +0.07% +0.02% +0.00% / +0.02% +0.70% +0.72%] index_copy_ linear : Elapsed 0.040 ms (4.014 ms / 100) 4.139 -> 4.139 ( +0.00%) [ +0.00% +0.02% +0.02% / +0.00% +0.60% +0.60%] index_add_ reverse : Elapsed 0.041 ms (4.139 ms / 100) 4.011 -> 4.013 ( +0.05%) [ +0.05% +0.00% +0.00% / +0.05% +0.65% +0.62%] index_copy_ reverse : Elapsed 0.040 ms (4.013 ms / 100) 4.141 -> 4.139 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.46% +0.43%] index_add_ spread : Elapsed 0.041 ms (4.141 ms / 100) 4.009 -> 4.009 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.52% +0.52%] index_copy_ spread : Elapsed 0.040 ms (4.009 ms / 100) 4.147 -> 4.148 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.51% +0.60%] index_add_ strided 3 : Elapsed 0.041 ms (4.147 ms / 100) 4.006 -> 4.006 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.50% +0.65%] index_copy_ strided 3 : Elapsed 0.040 ms (4.006 ms / 100) 4.141 -> 4.140 ( -0.02%) [ +0.02% +0.02% +0.00% / -0.02% +0.56% +0.46%] index_add_ strided 7 : Elapsed 0.041 ms (4.142 ms / 100) 4.014 -> 4.016 ( +0.05%) [ +0.02% +0.07% +0.00% / +0.05% +0.50% +0.52%] index_copy_ strided 7 : Elapsed 0.040 ms (4.015 ms / 100) 4.164 -> 4.164 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.48% +0.50%] index_add_ perm : Elapsed 0.042 ms (4.165 ms / 100) 4.014 -> 4.015 ( +0.02%) [ +0.02% +0.02% +0.00% / +0.02% +0.50% +0.55%] index_copy_ perm : Elapsed 0.040 ms (4.015 ms / 100) 4.165 -> 4.167 ( +0.05%) [ +0.10% +0.05% +0.00% / +0.05% +0.46% +0.48%] index_add_ perm_sorted : Elapsed 0.042 ms (4.169 ms / 100) 4.016 -> 4.014 ( -0.05%) [ +0.02% +0.00% +0.00% / -0.05% +0.52% +0.55%] index_copy_ perm_sorted : Elapsed 0.040 ms (4.017 ms / 100) 5.566 -> 5.565 ( -0.02%) [ +0.00% +0.07% +0.00% / +0.07% +0.13% -0.02%] index_select const : Elapsed 0.056 ms (5.566 ms / 100) 5.568 -> 5.563 ( -0.09%) [ +0.04% +0.04% +0.00% / -0.09% +0.23% +0.09%] index_select wrap : Elapsed 0.056 ms (5.570 ms / 100) 5.564 -> 5.571 ( +0.13%) [ +0.16% +0.16% +0.00% / +0.13% +0.16% +0.16%] index_select linear : Elapsed 0.056 ms (5.573 ms / 100) 5.566 -> 5.567 ( +0.02%) [ +0.00% +0.05% +0.04% / +0.02% +0.23% +0.18%] index_select reverse : Elapsed 0.056 ms (5.566 ms / 100) 5.570 -> 5.566 ( -0.07%) [ +0.02% +0.05% +0.00% / +0.04% -0.07% -0.05%] index_select skip64 : Elapsed 0.056 ms (5.571 ms / 100) 5.569 -> 5.566 ( -0.05%) [ +0.02% +0.09% +0.00% / -0.05% +0.00% -0.02%] index_select skip256 : Elapsed 0.056 ms (5.570 ms / 100) 5.564 -> 5.567 ( +0.05%) [ +0.14% +0.16% +0.00% / +0.18% +0.05% +0.22%] index_select spread : Elapsed 0.056 ms (5.572 ms / 100) 5.563 -> 5.566 ( +0.05%) [ +0.05% +0.00% +0.13% / +0.05% +0.27% +0.23%] index_select strided 3 : Elapsed 0.056 ms (5.566 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.00% +0.04% +0.09% / +0.02% +0.09% +0.25%] index_select strided 5 : Elapsed 0.056 ms (5.564 ms / 100) 5.568 -> 5.569 ( +0.02%) [ +0.02% +0.07% +0.00% / +0.02% +0.11% +0.14%] index_select strided 7 : Elapsed 0.056 ms (5.569 ms / 100) 5.566 -> 5.564 ( -0.04%) [ +0.09% +0.00% +0.05% / -0.04% +0.11% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.571 ms / 100) 5.570 -> 5.564 ( -0.11%) [ +0.00% +0.02% +0.00% / +0.07% -0.11% +0.02%] index_select random : Elapsed 0.056 ms (5.570 ms / 100) 5.567 -> 5.566 ( -0.02%) [ +0.00% +0.04% +0.11% / +0.09% +0.09% -0.02%] index_select random_sorted : Elapsed 0.056 ms (5.567 ms / 100) out_shape = [20, 16, 40, 5] in_shape = [20, 16, 4, 5] idx_dim = 2 B = [20, 16, 40, 5] (stride (3200, 5, 80, 1)) dim = 2 fill_cnt = 4 0.790 -> 0.793 ( +0.38%) [ +0.13% +0.63% +0.00% / +0.38% +0.51% +0.76%] index_fill_ const : Elapsed 0.008 ms (0.791 ms / 100) 0.792 -> 0.794 ( +0.25%) [ +0.00% +0.00% +0.38% / +0.38% +0.25% +0.25%] index_fill_ linear : Elapsed 0.008 ms (0.792 ms / 100) 0.792 -> 0.791 ( -0.13%) [ +0.38% +0.00% +0.00% / -0.13% +0.25% +0.13%] index_fill_ reverse : Elapsed 0.008 ms (0.795 ms / 100) 0.791 -> 0.791 ( +0.00%) [ +0.00% +0.00% +0.38% / +0.00% +0.38% +0.25%] index_fill_ skip64 : Elapsed 0.008 ms (0.791 ms / 100) 0.792 -> 0.791 ( -0.13%) [ +0.00% +0.38% +0.25% / -0.13% +0.25% +0.38%] index_fill_ skip256 : Elapsed 0.008 ms (0.792 ms / 100) 0.791 -> 0.791 ( +0.00%) [ +0.38% +0.38% +0.00% / +0.00% +0.38% +0.25%] index_fill_ spread : Elapsed 0.008 ms (0.794 ms / 100) 0.791 -> 0.792 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.25% +0.38%] index_fill_ strided 3 : Elapsed 0.008 ms (0.792 ms / 100) 0.791 -> 0.793 ( +0.25%) [ +0.25% +0.13% +0.00% / +0.25% +0.38% +0.38%] index_fill_ strided 5 : Elapsed 0.008 ms (0.793 ms / 100) 0.792 -> 0.793 ( +0.13%) [ +0.38% +0.00% +0.25% / +0.25% +0.13% +0.13%] index_fill_ strided 7 : Elapsed 0.008 ms (0.795 ms / 100) 0.792 -> 0.792 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.25% +0.25%] index_fill_ strided 8 : Elapsed 0.008 ms (0.792 ms / 100) 0.793 -> 0.790 ( -0.38%) [ +0.00% +0.38% +0.13% / -0.38% +0.25% +0.25%] index_fill_ strided 16 : Elapsed 0.008 ms (0.793 ms / 100) 0.791 -> 0.791 ( +0.00%) [ +0.00% +0.13% +0.51% / +0.00% +0.38% +0.38%] index_fill_ random : Elapsed 0.008 ms (0.791 ms / 100) 0.794 -> 0.791 ( -0.38%) [ +0.00% +0.13% +0.13% / -0.38% -0.13% +0.00%] index_fill_ random_sorted : Elapsed 0.008 ms (0.794 ms / 100) 0.792 -> 0.792 ( +0.00%) [ +0.00% +0.13% +0.00% / +0.00% +0.13% +0.25%] index_fill_ perm : Elapsed 0.008 ms (0.792 ms / 100) 0.791 -> 0.792 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.25% +0.88%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.792 ms / 100) B = [20, 16, 40, 5] (stride (3200, 1, 80, 16)) A = [20, 16, 4, 5] (stride (320, 1, 80, 16)) dim = 2 1.313 -> 1.316 ( +0.23%) [ +0.30% +0.00% +0.30% / +0.23% +0.38% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.317 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.273 ms / 100) 1.315 -> 1.314 ( -0.08%) [ +0.00% +0.23% +0.15% / -0.08% +0.23% +0.23%] index_add_ reverse : Elapsed 0.013 ms (1.315 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.71% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.315 -> 1.314 ( -0.08%) [ +0.23% +0.00% +0.23% / -0.08% +0.15% +0.30%] index_add_ spread : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.274 ( +0.08%) [ +0.00% +0.08% +0.00% / +0.08% +0.47% +0.71%] index_copy_ spread : Elapsed 0.013 ms (1.273 ms / 100) 1.312 -> 1.315 ( +0.23%) [ +0.08% +0.00% +0.08% / +0.23% +0.46% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.313 ms / 100) 1.271 -> 1.273 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +1.02% +0.79%] index_copy_ strided 3 : Elapsed 0.013 ms (1.272 ms / 100) 1.312 -> 1.317 ( +0.38%) [ +0.00% +0.23% +0.15% / +0.38% +0.46% +0.46%] index_add_ strided 7 : Elapsed 0.013 ms (1.312 ms / 100) 1.272 -> 1.272 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.71% +0.71%] index_copy_ strided 7 : Elapsed 0.013 ms (1.272 ms / 100) 1.310 -> 1.310 ( +0.00%) [ +0.38% +0.15% +0.00% / +0.00% +0.53% +0.69%] index_add_ perm : Elapsed 0.013 ms (1.315 ms / 100) 1.272 -> 1.274 ( +0.16%) [ +0.08% +0.16% +0.00% / +0.16% +0.71% +0.63%] index_copy_ perm : Elapsed 0.013 ms (1.273 ms / 100) 1.312 -> 1.315 ( +0.23%) [ +0.15% +0.15% +0.00% / +0.23% +0.38% +0.53%] index_add_ perm_sorted : Elapsed 0.013 ms (1.314 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.31% +0.00% +0.00% / +0.00% +0.71% +0.47%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.277 ms / 100) 9.129 -> 9.110 ( -0.21%) [ +0.20% +0.00% +0.07% / -0.08% -0.15% -0.21%] index_select const : Elapsed 0.091 ms (9.147 ms / 100) 9.160 -> 9.145 ( -0.16%) [ +0.19% +0.16% +0.00% / -0.01% -0.16% +0.07%] index_select wrap : Elapsed 0.092 ms (9.177 ms / 100) 9.143 -> 9.146 ( +0.03%) [ +0.07% +0.10% +0.00% / +0.03% +0.14% +0.13%] index_select linear : Elapsed 0.091 ms (9.149 ms / 100) 9.140 -> 9.133 ( -0.08%) [ +0.33% +0.00% +0.28% / +0.12% -0.08% -0.02%] index_select reverse : Elapsed 0.092 ms (9.170 ms / 100) 9.130 -> 9.134 ( +0.04%) [ +0.37% +0.11% +0.00% / +0.08% +0.05% +0.04%] index_select skip64 : Elapsed 0.092 ms (9.164 ms / 100) 9.122 -> 9.120 ( -0.02%) [ +0.00% +0.04% +0.16% / +0.25% +0.26% -0.02%] index_select skip256 : Elapsed 0.091 ms (9.122 ms / 100) 9.161 -> 9.148 ( -0.14%) [ +0.05% +0.00% +0.07% / -0.14% +0.28% +0.00%] index_select spread : Elapsed 0.092 ms (9.166 ms / 100) 9.150 -> 9.153 ( +0.03%) [ +0.20% +0.00% +0.08% / +0.03% +0.09% +0.07%] index_select strided 3 : Elapsed 0.092 ms (9.168 ms / 100) 9.162 -> 9.140 ( -0.24%) [ +0.16% +0.02% +0.00% / +0.04% -0.24% -0.01%] index_select random : Elapsed 0.092 ms (9.177 ms / 100) 9.147 -> 9.145 ( -0.02%) [ +0.08% +0.04% +0.00% / -0.02% +0.07% +0.20%] index_select random_sorted : Elapsed 0.092 ms (9.154 ms / 100) B = [20, 16, 40, 5] (stride (200, 4000, 1, 40)) A = [20, 16, 4, 5] (stride (64, 1, 16, 1280)) dim = 2 1.316 -> 1.317 ( +0.08%) [ +0.23% +0.08% +0.00% / +0.08% +0.46% +0.61%] index_add_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.63% +0.63%] index_copy_ linear : Elapsed 0.013 ms (1.278 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.23% +0.30%] index_add_ reverse : Elapsed 0.013 ms (1.319 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.70% +0.55%] index_copy_ reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.329 -> 1.331 ( +0.15%) [ +0.00% +0.23% +0.00% / +0.15% +0.60% +0.15%] index_add_ spread : Elapsed 0.013 ms (1.329 ms / 100) 1.290 -> 1.290 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.08% +0.16%] index_copy_ spread : Elapsed 0.013 ms (1.292 ms / 100) 1.319 -> 1.319 ( +0.00%) [ +0.00% +0.15% +0.00% / +0.00% +0.68% +0.61%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.281 -> 1.283 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.16% +0.70% +0.47%] index_copy_ strided 3 : Elapsed 0.013 ms (1.282 ms / 100) 1.321 -> 1.322 ( +0.08%) [ +0.00% +0.15% +0.23% / +0.08% +0.45% +0.68%] index_add_ strided 7 : Elapsed 0.013 ms (1.321 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.55% +0.62%] index_copy_ strided 7 : Elapsed 0.013 ms (1.284 ms / 100) 1.325 -> 1.321 ( -0.30%) [ +0.00% +0.00% +0.00% / -0.30% +0.30% +0.45%] index_add_ perm : Elapsed 0.013 ms (1.325 ms / 100) 1.284 -> 1.286 ( +0.16%) [ +0.08% +0.00% +0.23% / +0.16% +0.47% +0.39%] index_copy_ perm : Elapsed 0.013 ms (1.285 ms / 100) 1.324 -> 1.322 ( -0.15%) [ +0.15% +0.45% +0.00% / -0.15% +0.23% +0.45%] index_add_ perm_sorted : Elapsed 0.013 ms (1.326 ms / 100) 1.286 -> 1.285 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.31% +0.39%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.286 ms / 100) 9.242 -> 9.241 ( -0.01%) [ +0.00% +0.05% +0.01% / +0.13% +0.15% -0.01%] index_select const : Elapsed 0.092 ms (9.242 ms / 100) 9.265 -> 9.261 ( -0.04%) [ +0.12% +0.00% +0.08% / -0.04% +0.00% +0.02%] index_select wrap : Elapsed 0.093 ms (9.276 ms / 100) 9.255 -> 9.260 ( +0.05%) [ +0.15% +0.12% +0.00% / +0.18% +0.10% +0.05%] index_select linear : Elapsed 0.093 ms (9.269 ms / 100) 9.251 -> 9.254 ( +0.03%) [ +0.02% +0.19% +0.00% / +0.19% +0.03% +0.18%] index_select reverse : Elapsed 0.093 ms (9.253 ms / 100) 9.225 -> 9.256 ( +0.34%) [ +0.00% +0.34% +0.18% / +0.35% +0.34% +0.51%] index_select skip64 : Elapsed 0.092 ms (9.225 ms / 100) 9.237 -> 9.247 ( +0.11%) [ +0.00% +0.05% +0.14% / +0.11% +0.16% +0.19%] index_select skip256 : Elapsed 0.092 ms (9.237 ms / 100) 9.278 -> 9.255 ( -0.25%) [ +0.01% +0.00% +0.24% / -0.03% -0.12% -0.25%] index_select spread : Elapsed 0.093 ms (9.279 ms / 100) 9.259 -> 9.266 ( +0.08%) [ +0.13% +0.00% +0.15% / +0.32% +0.08% +0.12%] index_select strided 3 : Elapsed 0.093 ms (9.271 ms / 100) 9.262 -> 9.258 ( -0.04%) [ +0.24% +0.09% +0.00% / +0.22% -0.04% +0.05%] index_select random : Elapsed 0.093 ms (9.284 ms / 100) 9.267 -> 9.275 ( +0.09%) [ +0.05% +0.00% +0.16% / +0.09% +0.22% +0.11%] index_select random_sorted : Elapsed 0.093 ms (9.272 ms / 100) B = [20, 16, 40, 5] (stride (5, 4000, 100, 1)) A = [20, 16, 4, 5] (stride (1, 400, 20, 80)) dim = 2 1.406 -> 1.408 ( +0.14%) [ +0.14% +0.14% +0.00% / +0.14% +0.64% +0.57%] index_add_ linear : Elapsed 0.014 ms (1.408 ms / 100) 1.361 -> 1.362 ( +0.07%) [ +0.15% +0.00% +0.07% / +0.07% +0.59% +0.59%] index_copy_ linear : Elapsed 0.014 ms (1.363 ms / 100) 1.409 -> 1.410 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.50% +0.64%] index_add_ reverse : Elapsed 0.014 ms (1.410 ms / 100) 1.364 -> 1.363 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.66% +0.95%] index_copy_ reverse : Elapsed 0.014 ms (1.364 ms / 100) 1.405 -> 1.406 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.64% +0.78%] index_add_ spread : Elapsed 0.014 ms (1.407 ms / 100) 1.360 -> 1.361 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.59% +0.88%] index_copy_ spread : Elapsed 0.014 ms (1.360 ms / 100) 1.405 -> 1.405 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.71% +0.85%] index_add_ strided 3 : Elapsed 0.014 ms (1.406 ms / 100) 1.361 -> 1.362 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.07% +0.81% +0.59%] index_copy_ strided 3 : Elapsed 0.014 ms (1.361 ms / 100) 1.407 -> 1.408 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.64% +0.71%] index_add_ strided 7 : Elapsed 0.014 ms (1.407 ms / 100) 1.361 -> 1.361 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.66% +0.59%] index_copy_ strided 7 : Elapsed 0.014 ms (1.362 ms / 100) 1.405 -> 1.407 ( +0.14%) [ +0.07% +0.07% +0.00% / +0.14% +0.71% +0.71%] index_add_ perm : Elapsed 0.014 ms (1.406 ms / 100) 1.359 -> 1.361 ( +0.15%) [ +0.07% +0.15% +0.00% / +0.15% +0.66% +0.66%] index_copy_ perm : Elapsed 0.014 ms (1.360 ms / 100) 1.408 -> 1.407 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.64% +1.07%] index_add_ perm_sorted : Elapsed 0.014 ms (1.408 ms / 100) 1.361 -> 1.362 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.59% +0.81%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.362 ms / 100) 9.188 -> 9.202 ( +0.15%) [ +0.07% +0.00% +0.04% / +0.15% +0.28% +0.47%] index_select const : Elapsed 0.092 ms (9.194 ms / 100) 9.201 -> 9.210 ( +0.10%) [ +0.20% +0.18% +0.00% / +0.25% +0.21% +0.10%] index_select wrap : Elapsed 0.092 ms (9.219 ms / 100) 9.192 -> 9.208 ( +0.17%) [ +0.29% +0.10% +0.00% / +0.38% +0.17% +0.24%] index_select linear : Elapsed 0.092 ms (9.219 ms / 100) 9.207 -> 9.217 ( +0.11%) [ +0.00% +0.09% +0.07% / +0.11% +0.29% +0.22%] index_select reverse : Elapsed 0.092 ms (9.207 ms / 100) 9.174 -> 9.187 ( +0.14%) [ +0.08% +0.00% +0.20% / +0.14% +0.57% +0.28%] index_select skip64 : Elapsed 0.092 ms (9.181 ms / 100) 9.180 -> 9.186 ( +0.07%) [ +0.11% +0.00% +0.24% / +0.07% +0.49% +0.41%] index_select skip256 : Elapsed 0.092 ms (9.190 ms / 100) 9.208 -> 9.198 ( -0.11%) [ +0.27% +0.14% +0.00% / -0.11% +0.22% +0.25%] index_select spread : Elapsed 0.092 ms (9.233 ms / 100) 9.202 -> 9.191 ( -0.12%) [ +0.00% +0.12% +0.24% / -0.12% +0.38% +0.16%] index_select strided 3 : Elapsed 0.092 ms (9.202 ms / 100) 9.187 -> 9.211 ( +0.26%) [ +0.24% +0.38% +0.00% / +0.26% +0.34% +0.39%] index_select random : Elapsed 0.092 ms (9.209 ms / 100) 9.204 -> 9.220 ( +0.17%) [ +0.18% +0.17% +0.00% / +0.17% +0.33% +0.26%] index_select random_sorted : Elapsed 0.092 ms (9.221 ms / 100) B = [20, 16, 40, 5] (stride (5, 4000, 100, 1)) A = [20, 16, 4, 5] (stride (4, 80, 1, 1280)) dim = 2 1.405 -> 1.411 ( +0.43%) [ +0.00% +0.00% +0.00% / +0.64% +0.43% +0.50%] index_add_ linear : Elapsed 0.014 ms (1.405 ms / 100) 1.360 -> 1.359 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.37% +0.29%] index_copy_ linear : Elapsed 0.014 ms (1.361 ms / 100) 1.405 -> 1.405 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.43% +0.50%] index_add_ reverse : Elapsed 0.014 ms (1.406 ms / 100) 1.360 -> 1.360 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.37% +0.59%] index_copy_ reverse : Elapsed 0.014 ms (1.360 ms / 100) 1.404 -> 1.405 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.43% +0.43%] index_add_ spread : Elapsed 0.014 ms (1.405 ms / 100) 1.360 -> 1.359 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.29% +0.29%] index_copy_ spread : Elapsed 0.014 ms (1.360 ms / 100) 1.402 -> 1.405 ( +0.21%) [ +0.07% +0.21% +0.00% / +0.21% +0.43% +0.64%] index_add_ strided 3 : Elapsed 0.014 ms (1.403 ms / 100) 1.356 -> 1.357 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.96%] index_copy_ strided 3 : Elapsed 0.014 ms (1.357 ms / 100) 1.405 -> 1.405 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.43% +0.50%] index_add_ strided 7 : Elapsed 0.014 ms (1.405 ms / 100) 1.359 -> 1.360 ( +0.07%) [ +0.15% +0.00% +0.15% / +0.07% +0.44% +0.52%] index_copy_ strided 7 : Elapsed 0.014 ms (1.361 ms / 100) 1.405 -> 1.405 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.36% +0.36%] index_add_ perm : Elapsed 0.014 ms (1.405 ms / 100) 1.360 -> 1.360 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.37%] index_copy_ perm : Elapsed 0.014 ms (1.360 ms / 100) 1.405 -> 1.404 ( -0.07%) [ +0.07% +0.00% +0.07% / -0.07% +0.36% +0.50%] index_add_ perm_sorted : Elapsed 0.014 ms (1.406 ms / 100) 1.360 -> 1.360 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.29% +0.37%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.360 ms / 100) 9.186 -> 9.190 ( +0.04%) [ +0.14% +0.15% +0.00% / +0.04% +0.29% +0.25%] index_select const : Elapsed 0.092 ms (9.199 ms / 100) 9.196 -> 9.202 ( +0.07%) [ +0.00% +0.17% +0.17% / +0.07% +0.29% +0.34%] index_select wrap : Elapsed 0.092 ms (9.196 ms / 100) 9.185 -> 9.193 ( +0.09%) [ +0.10% +0.00% +0.33% / +0.09% +0.39% +0.28%] index_select linear : Elapsed 0.092 ms (9.194 ms / 100) 9.191 -> 9.187 ( -0.04%) [ +0.33% +0.00% +0.09% / -0.04% +0.12% +0.20%] index_select reverse : Elapsed 0.092 ms (9.221 ms / 100) 9.200 -> 9.183 ( -0.18%) [ +0.04% +0.10% +0.00% / -0.18% +0.23% +0.25%] index_select skip64 : Elapsed 0.092 ms (9.204 ms / 100) 9.198 -> 9.209 ( +0.12%) [ +0.00% +0.08% +0.05% / +0.12% +0.22% +0.34%] index_select skip256 : Elapsed 0.092 ms (9.198 ms / 100) 9.183 -> 9.203 ( +0.22%) [ +0.00% +0.25% +0.21% / +0.22% +0.45% +0.28%] index_select spread : Elapsed 0.092 ms (9.183 ms / 100) 9.193 -> 9.205 ( +0.13%) [ +0.20% +0.00% +0.20% / +0.13% +0.23% +0.30%] index_select strided 3 : Elapsed 0.092 ms (9.211 ms / 100) 9.192 -> 9.213 ( +0.23%) [ +0.00% +0.02% +0.08% / +0.23% +0.41% +0.35%] index_select random : Elapsed 0.092 ms (9.192 ms / 100) 9.188 -> 9.207 ( +0.21%) [ +0.00% +0.13% +0.10% / +0.21% +0.63% +0.36%] index_select random_sorted : Elapsed 0.092 ms (9.188 ms / 100) B = [20, 16, 40, 5] (stride (1, 4000, 20, 800)) A = [20, 16, 4, 5] (stride (1, 100, 1600, 20)) dim = 2 1.357 -> 1.356 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.44% +0.74%] index_add_ linear : Elapsed 0.014 ms (1.357 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.53% +1.14%] index_copy_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.344 -> 1.344 ( +0.00%) [ +0.07% +0.00% +0.00% / +0.00% +0.67% +0.74%] index_add_ reverse : Elapsed 0.013 ms (1.345 ms / 100) 1.307 -> 1.309 ( +0.15%) [ +0.23% +0.00% +0.23% / +0.15% +0.69% +1.22%] index_copy_ reverse : Elapsed 0.013 ms (1.310 ms / 100) 1.344 -> 1.346 ( +0.15%) [ +0.07% +0.00% +0.07% / +0.15% +0.52% +0.97%] index_add_ spread : Elapsed 0.013 ms (1.345 ms / 100) 1.310 -> 1.308 ( -0.15%) [ +0.00% +0.23% +0.00% / -0.15% +0.53% +1.07%] index_copy_ spread : Elapsed 0.013 ms (1.310 ms / 100) 1.355 -> 1.355 ( +0.00%) [ +0.00% +0.00% +0.22% / +0.00% +0.81% +0.89%] index_add_ strided 3 : Elapsed 0.014 ms (1.355 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.76% +0.91%] index_copy_ strided 3 : Elapsed 0.013 ms (1.318 ms / 100) 1.354 -> 1.355 ( +0.07%) [ +0.07% +0.00% +0.15% / +0.07% +0.59% +0.81%] index_add_ strided 7 : Elapsed 0.014 ms (1.355 ms / 100) 1.315 -> 1.315 ( +0.00%) [ +0.15% +0.08% +0.00% / +0.00% +0.76% +0.84%] index_copy_ strided 7 : Elapsed 0.013 ms (1.317 ms / 100) 1.343 -> 1.343 ( +0.00%) [ +0.15% +0.15% +0.00% / +0.00% +0.67% +1.34%] index_add_ perm : Elapsed 0.013 ms (1.345 ms / 100) 1.308 -> 1.315 ( +0.54%) [ +0.31% +0.00% +0.23% / +0.54% +0.76% +1.38%] index_copy_ perm : Elapsed 0.013 ms (1.312 ms / 100) 1.345 -> 1.345 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.67% +1.04%] index_add_ perm_sorted : Elapsed 0.013 ms (1.345 ms / 100) 1.308 -> 1.309 ( +0.08%) [ +0.15% +0.00% +0.15% / +0.08% +0.61% +1.83%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.310 ms / 100) 9.152 -> 9.160 ( +0.09%) [ +0.14% +0.07% +0.00% / +0.09% +0.32% +0.31%] index_select const : Elapsed 0.092 ms (9.165 ms / 100) 9.166 -> 9.180 ( +0.15%) [ +0.20% +0.22% +0.00% / +0.15% +0.17% +0.15%] index_select wrap : Elapsed 0.092 ms (9.184 ms / 100) 9.172 -> 9.164 ( -0.09%) [ +0.07% +0.21% +0.00% / +0.08% -0.09% +0.59%] index_select linear : Elapsed 0.092 ms (9.178 ms / 100) 9.167 -> 9.169 ( +0.02%) [ +0.25% +0.08% +0.00% / +0.02% +0.33% +0.22%] index_select reverse : Elapsed 0.092 ms (9.190 ms / 100) 9.150 -> 9.152 ( +0.02%) [ +0.19% +0.00% +0.12% / +0.02% +0.45% +0.25%] index_select skip64 : Elapsed 0.092 ms (9.167 ms / 100) 9.151 -> 9.158 ( +0.08%) [ +0.12% +0.00% +0.03% / +0.08% +0.30% +0.27%] index_select skip256 : Elapsed 0.092 ms (9.162 ms / 100) 9.170 -> 9.179 ( +0.10%) [ +0.14% +0.00% +0.10% / +0.10% +0.23% +0.16%] index_select spread : Elapsed 0.092 ms (9.183 ms / 100) 9.165 -> 9.172 ( +0.08%) [ +0.27% +0.00% +0.19% / +0.08% +0.15% +0.32%] index_select strided 3 : Elapsed 0.092 ms (9.190 ms / 100) 9.179 -> 9.174 ( -0.05%) [ +0.00% +0.14% +0.04% / -0.05% -0.05% +0.21%] index_select random : Elapsed 0.092 ms (9.179 ms / 100) 9.170 -> 9.183 ( +0.14%) [ +0.00% +0.14% +0.09% / +0.14% +0.27% +0.26%] index_select random_sorted : Elapsed 0.092 ms (9.170 ms / 100) B = [20, 16, 40, 5] (stride (16, 1, 1600, 320)) A = [20, 16, 4, 5] (stride (20, 400, 1, 4)) dim = 2 1.234 -> 1.236 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.73% +0.65%] index_add_ linear : Elapsed 0.012 ms (1.236 ms / 100) 1.196 -> 1.196 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.75% +0.50%] index_copy_ linear : Elapsed 0.012 ms (1.196 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.40% +0.40%] index_add_ reverse : Elapsed 0.012 ms (1.236 ms / 100) 1.197 -> 1.197 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.50% +0.50%] index_copy_ reverse : Elapsed 0.012 ms (1.197 ms / 100) 1.234 -> 1.235 ( +0.08%) [ +0.32% +0.08% +0.00% / +0.08% +0.49% +0.65%] index_add_ spread : Elapsed 0.012 ms (1.238 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.17% +0.08% +0.00% / +0.08% +0.50% +0.75%] index_copy_ spread : Elapsed 0.012 ms (1.197 ms / 100) 1.241 -> 1.241 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +1.29% +1.61%] index_add_ strided 3 : Elapsed 0.012 ms (1.241 ms / 100) 1.201 -> 1.203 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.17% +0.58% +0.83%] index_copy_ strided 3 : Elapsed 0.012 ms (1.201 ms / 100) 1.231 -> 1.232 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.32% +0.57%] index_add_ strided 7 : Elapsed 0.012 ms (1.233 ms / 100) 1.192 -> 1.193 ( +0.08%) [ +0.00% +0.08% +0.08% / +0.08% +0.59% +0.59%] index_copy_ strided 7 : Elapsed 0.012 ms (1.192 ms / 100) 1.235 -> 1.235 ( +0.00%) [ +0.08% +0.00% +0.00% / +0.00% +0.40% +0.73%] index_add_ perm : Elapsed 0.012 ms (1.236 ms / 100) 1.195 -> 1.196 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.08% +0.42% +0.42%] index_copy_ perm : Elapsed 0.012 ms (1.196 ms / 100) 1.235 -> 1.236 ( +0.08%) [ +0.16% +0.00% +0.00% / +0.08% +0.40% +0.49%] index_add_ perm_sorted : Elapsed 0.012 ms (1.237 ms / 100) 1.197 -> 1.197 ( +0.00%) [ +0.00% +0.08% +0.00% / +0.00% +0.42% +0.42%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.197 ms / 100) 8.713 -> 8.714 ( +0.01%) [ +0.03% +0.00% +0.01% / +0.10% +0.01% +0.01%] index_select const : Elapsed 0.087 ms (8.716 ms / 100) 8.698 -> 8.721 ( +0.26%) [ +0.06% +0.00% +0.24% / +0.26% +0.62% +0.38%] index_select wrap : Elapsed 0.087 ms (8.703 ms / 100) 8.700 -> 8.701 ( +0.01%) [ +0.00% +0.11% +0.22% / +0.01% +0.23% +0.28%] index_select linear : Elapsed 0.087 ms (8.700 ms / 100) 8.710 -> 8.708 ( -0.02%) [ +0.02% +0.00% +0.17% / -0.02% +0.20% +0.31%] index_select reverse : Elapsed 0.087 ms (8.712 ms / 100) 8.701 -> 8.714 ( +0.15%) [ +0.00% +0.08% +0.30% / +0.15% +0.18% +0.31%] index_select skip64 : Elapsed 0.087 ms (8.701 ms / 100) 8.714 -> 8.717 ( +0.03%) [ +0.00% +0.05% +0.03% / +0.13% +0.03% +0.06%] index_select skip256 : Elapsed 0.087 ms (8.714 ms / 100) 8.717 -> 8.710 ( -0.08%) [ +0.25% +0.00% +0.09% / +0.22% -0.08% -0.03%] index_select spread : Elapsed 0.087 ms (8.739 ms / 100) 8.716 -> 8.720 ( +0.05%) [ +0.00% +0.03% +0.07% / +0.05% +0.05% +0.29%] index_select strided 3 : Elapsed 0.087 ms (8.716 ms / 100) 8.709 -> 8.715 ( +0.07%) [ +0.08% +0.00% +0.21% / +0.09% +0.31% +0.07%] index_select random : Elapsed 0.087 ms (8.716 ms / 100) 8.710 -> 8.716 ( +0.07%) [ +0.05% +0.00% +0.10% / +0.07% +0.22% +0.28%] index_select random_sorted : Elapsed 0.087 ms (8.714 ms / 100) B = [20, 16, 40, 5] (stride (40, 800, 1, 12800)) A = [20, 16, 4, 5] (stride (320, 1, 16, 64)) dim = 2 1.315 -> 1.318 ( +0.23%) [ +0.46% +0.30% +0.00% / +0.23% +0.46% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.321 ms / 100) 1.279 -> 1.279 ( +0.00%) [ +0.16% +0.16% +0.00% / +0.00% +0.47% +0.55%] index_copy_ linear : Elapsed 0.013 ms (1.281 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.46% +0.68%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.86% +1.02%] index_copy_ reverse : Elapsed 0.013 ms (1.278 ms / 100) 1.328 -> 1.330 ( +0.15%) [ +0.00% +0.38% +0.08% / +0.15% +0.38% +0.53%] index_add_ spread : Elapsed 0.013 ms (1.328 ms / 100) 1.290 -> 1.287 ( -0.23%) [ +0.00% +0.08% +0.00% / -0.23% +0.08% +0.23%] index_copy_ spread : Elapsed 0.013 ms (1.290 ms / 100) 1.319 -> 1.318 ( -0.08%) [ +0.23% +0.08% +0.00% / -0.08% +0.68% +0.76%] index_add_ strided 3 : Elapsed 0.013 ms (1.322 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.16% +0.00% +0.08% / +0.08% +0.78% +0.70%] index_copy_ strided 3 : Elapsed 0.013 ms (1.283 ms / 100) 1.321 -> 1.319 ( -0.15%) [ +0.08% +0.08% +0.00% / -0.15% +0.61% +0.53%] index_add_ strided 7 : Elapsed 0.013 ms (1.322 ms / 100) 1.284 -> 1.288 ( +0.31%) [ +0.00% +0.08% +0.00% / +0.31% +0.62% +0.55%] index_copy_ strided 7 : Elapsed 0.013 ms (1.284 ms / 100) 1.320 -> 1.324 ( +0.30%) [ +0.23% +0.00% +0.23% / +0.30% +0.68% +1.21%] index_add_ perm : Elapsed 0.013 ms (1.323 ms / 100) 1.285 -> 1.288 ( +0.23%) [ +0.00% +0.00% +0.00% / +0.23% +0.39% +0.70%] index_copy_ perm : Elapsed 0.013 ms (1.285 ms / 100) 1.322 -> 1.321 ( -0.08%) [ +0.15% +0.00% +0.08% / -0.08% +0.53% +0.91%] index_add_ perm_sorted : Elapsed 0.013 ms (1.324 ms / 100) 1.284 -> 1.285 ( +0.08%) [ +0.00% +0.00% +0.08% / +0.08% +0.62% +0.78%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.284 ms / 100) 9.240 -> 9.243 ( +0.03%) [ +0.06% +0.00% +0.04% / +0.03% +0.04% +0.13%] index_select const : Elapsed 0.092 ms (9.246 ms / 100) 9.257 -> 9.241 ( -0.17%) [ +0.02% +0.06% +0.00% / -0.17% +0.15% +0.11%] index_select wrap : Elapsed 0.093 ms (9.259 ms / 100) 9.246 -> 9.242 ( -0.04%) [ +0.00% +0.03% +0.00% / -0.04% +0.48% +0.10%] index_select linear : Elapsed 0.092 ms (9.246 ms / 100) 9.249 -> 9.257 ( +0.09%) [ +0.00% +0.01% +0.00% / +0.14% +0.12% +0.09%] index_select reverse : Elapsed 0.092 ms (9.249 ms / 100) 9.230 -> 9.231 ( +0.01%) [ +0.03% +0.05% +0.00% / +0.01% +0.13% +0.22%] index_select skip64 : Elapsed 0.092 ms (9.233 ms / 100) 9.218 -> 9.238 ( +0.22%) [ +0.00% +0.17% +0.40% / +0.37% +0.22% +0.22%] index_select skip256 : Elapsed 0.092 ms (9.218 ms / 100) 9.247 -> 9.268 ( +0.23%) [ +0.24% +0.19% +0.00% / +0.23% +0.41% +0.56%] index_select spread : Elapsed 0.093 ms (9.269 ms / 100) 9.242 -> 9.263 ( +0.23%) [ +0.30% +0.09% +0.00% / +0.23% +0.45% +0.61%] index_select strided 3 : Elapsed 0.093 ms (9.270 ms / 100) 9.258 -> 9.259 ( +0.01%) [ +0.03% +0.02% +0.00% / +0.04% +0.17% +0.01%] index_select random : Elapsed 0.093 ms (9.261 ms / 100) 9.243 -> 9.253 ( +0.11%) [ +0.19% +0.00% +0.12% / +0.12% +0.11% +0.57%] index_select random_sorted : Elapsed 0.093 ms (9.261 ms / 100) B = [20, 16, 40, 5] (stride (1, 800, 20, 12800)) dim = 2 fill_cnt = 4 0.800 -> 0.800 ( +0.00%) [ +0.13% +0.13% +0.00% / +0.00% +0.50% +0.63%] index_fill_ const : Elapsed 0.008 ms (0.801 ms / 100) 0.799 -> 0.800 ( +0.13%) [ +0.13% +0.38% +0.00% / +0.13% +0.75% +0.75%] index_fill_ linear : Elapsed 0.008 ms (0.800 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.13% +0.25% +0.00% / +0.00% +0.63% +0.63%] index_fill_ reverse : Elapsed 0.008 ms (0.801 ms / 100) 0.800 -> 0.801 ( +0.13%) [ +0.13% +0.00% +0.00% / +0.13% +0.63% +0.75%] index_fill_ skip64 : Elapsed 0.008 ms (0.801 ms / 100) 0.799 -> 0.800 ( +0.13%) [ +0.13% +0.25% +0.00% / +0.13% +0.88% +1.00%] index_fill_ skip256 : Elapsed 0.008 ms (0.800 ms / 100) 0.800 -> 0.799 ( -0.13%) [ +0.13% +0.13% +0.00% / -0.13% +0.63% +0.63%] index_fill_ spread : Elapsed 0.008 ms (0.801 ms / 100) 0.800 -> 0.801 ( +0.13%) [ +0.13% +0.13% +0.00% / +0.13% +0.63% +1.13%] index_fill_ strided 3 : Elapsed 0.008 ms (0.801 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.63% +0.88%] index_fill_ strided 5 : Elapsed 0.008 ms (0.801 ms / 100) 0.799 -> 0.799 ( +0.00%) [ +0.25% +0.25% +0.00% / +0.00% +0.75% +0.75%] index_fill_ strided 7 : Elapsed 0.008 ms (0.801 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.63% +0.50%] index_fill_ strided 8 : Elapsed 0.008 ms (0.801 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.13% +0.25% +0.00% / +0.00% +0.63% +0.63%] index_fill_ strided 16 : Elapsed 0.008 ms (0.801 ms / 100) 0.802 -> 0.801 ( -0.12%) [ +0.00% +0.00% +0.00% / -0.12% +0.25% +0.37%] index_fill_ random : Elapsed 0.008 ms (0.802 ms / 100) 0.802 -> 0.804 ( +0.25%) [ +0.00% +0.12% +0.12% / +0.25% +0.50% +0.37%] index_fill_ random_sorted : Elapsed 0.008 ms (0.802 ms / 100) 0.800 -> 0.802 ( +0.25%) [ +0.13% +0.13% +0.00% / +0.25% +0.63% +0.50%] index_fill_ perm : Elapsed 0.008 ms (0.801 ms / 100) 0.800 -> 0.800 ( +0.00%) [ +0.13% +0.00% +0.00% / +0.00% +0.50% +0.50%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.801 ms / 100) out_shape = [20, 16, 4, 40] in_shape = [20, 16, 4, 5] idx_dim = 3 B = [20, 16, 4, 40] (stride (2560, 160, 40, 1)) A = [20, 16, 4, 5] (stride (320, 4, 1, 64)) dim = 3 1.220 -> 1.221 ( +0.08%) [ +0.16% +0.08% +0.00% / +0.08% +0.49% +0.41%] index_add_ linear : Elapsed 0.012 ms (1.222 ms / 100) 1.180 -> 1.182 ( +0.17%) [ +0.25% +0.42% +0.00% / +0.25% +0.34% +0.17%] index_copy_ linear : Elapsed 0.012 ms (1.183 ms / 100) 1.220 -> 1.221 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.33% +0.49%] index_add_ reverse : Elapsed 0.012 ms (1.221 ms / 100) 1.183 -> 1.179 ( -0.34%) [ +0.17% +0.08% +0.00% / -0.34% +0.08% +0.17%] index_copy_ reverse : Elapsed 0.012 ms (1.185 ms / 100) 1.221 -> 1.221 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.33% +0.33%] index_add_ spread : Elapsed 0.012 ms (1.221 ms / 100) 1.177 -> 1.183 ( +0.51%) [ +0.68% +0.00% +0.42% / +0.51% +0.76% +0.76%] index_copy_ spread : Elapsed 0.012 ms (1.185 ms / 100) 1.220 -> 1.220 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.57% +0.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.220 ms / 100) 1.178 -> 1.182 ( +0.34%) [ +0.34% +0.34% +0.00% / +0.34% +0.51% +0.76%] index_copy_ strided 3 : Elapsed 0.012 ms (1.182 ms / 100) 1.221 -> 1.221 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.41% +0.41%] index_add_ strided 7 : Elapsed 0.012 ms (1.221 ms / 100) 1.180 -> 1.184 ( +0.34%) [ +0.42% +0.00% +0.25% / +0.51% +0.42% +0.34%] index_copy_ strided 7 : Elapsed 0.012 ms (1.185 ms / 100) 1.219 -> 1.219 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.74% +0.57%] index_add_ perm : Elapsed 0.012 ms (1.219 ms / 100) 1.177 -> 1.183 ( +0.51%) [ +0.51% +0.00% +0.51% / +0.51% +1.02% +0.76%] index_copy_ perm : Elapsed 0.012 ms (1.183 ms / 100) 1.219 -> 1.220 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.57% +0.66%] index_add_ perm_sorted : Elapsed 0.012 ms (1.220 ms / 100) 1.176 -> 1.185 ( +0.77%) [ +0.68% +0.26% +0.00% / +0.77% +0.94% +1.36%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.184 ms / 100) 7.601 -> 7.609 ( +0.11%) [ +0.17% +0.05% +0.00% / +0.11% +0.16% +0.32%] index_select const : Elapsed 0.076 ms (7.614 ms / 100) 7.627 -> 7.624 ( -0.04%) [ +0.05% +0.08% +0.00% / -0.04% +0.09% +0.07%] index_select wrap : Elapsed 0.076 ms (7.631 ms / 100) 7.623 -> 7.631 ( +0.10%) [ +0.05% +0.00% +0.24% / +0.10% +0.29% +0.20%] index_select linear : Elapsed 0.076 ms (7.627 ms / 100) 7.619 -> 7.621 ( +0.03%) [ +0.13% +0.00% +0.14% / +0.03% +0.18% +0.32%] index_select reverse : Elapsed 0.076 ms (7.629 ms / 100) 7.599 -> 7.607 ( +0.11%) [ +0.11% +0.00% +0.28% / +0.11% +0.53% +0.16%] index_select skip64 : Elapsed 0.076 ms (7.607 ms / 100) 7.598 -> 7.598 ( +0.00%) [ +0.12% +0.00% +0.12% / +0.00% +0.09% +0.28%] index_select skip256 : Elapsed 0.076 ms (7.607 ms / 100) 7.616 -> 7.613 ( -0.04%) [ +0.18% +0.11% +0.00% / -0.04% -0.03% +0.49%] index_select spread : Elapsed 0.076 ms (7.630 ms / 100) 7.626 -> 7.623 ( -0.04%) [ +0.13% +0.00% +0.04% / -0.04% +0.04% +0.18%] index_select strided 3 : Elapsed 0.076 ms (7.636 ms / 100) 7.624 -> 7.617 ( -0.09%) [ +0.18% +0.16% +0.00% / -0.09% +0.14% +0.35%] index_select random : Elapsed 0.076 ms (7.638 ms / 100) 7.623 -> 7.619 ( -0.05%) [ +0.12% +0.16% +0.00% / +0.18% -0.05% +0.03%] index_select random_sorted : Elapsed 0.076 ms (7.632 ms / 100) B = [20, 16, 4, 40] (stride (2560, 1, 640, 16)) dim = 3 fill_cnt = 5 0.895 -> 0.896 ( +0.11%) [ +0.34% +0.11% +0.00% / +0.11% +0.34% +0.34%] index_fill_ const : Elapsed 0.009 ms (0.898 ms / 100) 0.895 -> 0.895 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.45% +0.45%] index_fill_ linear : Elapsed 0.009 ms (0.896 ms / 100) 0.895 -> 0.895 ( +0.00%) [ +0.11% +0.11% +0.00% / +0.00% +0.45% +0.56%] index_fill_ reverse : Elapsed 0.009 ms (0.896 ms / 100) 0.895 -> 0.895 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.56% +0.67%] index_fill_ skip64 : Elapsed 0.009 ms (0.895 ms / 100) 0.895 -> 0.896 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.67% +0.56%] index_fill_ skip256 : Elapsed 0.009 ms (0.896 ms / 100) 0.895 -> 0.895 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.45% +0.45%] index_fill_ spread : Elapsed 0.009 ms (0.895 ms / 100) 0.895 -> 0.895 ( +0.00%) [ +0.00% +0.00% +0.11% / +0.00% +0.45% +0.56%] index_fill_ strided 3 : Elapsed 0.009 ms (0.895 ms / 100) 0.894 -> 0.896 ( +0.22%) [ +0.22% +0.22% +0.00% / +0.22% +0.67% +0.56%] index_fill_ strided 5 : Elapsed 0.009 ms (0.896 ms / 100) 0.895 -> 0.896 ( +0.11%) [ +0.11% +0.11% +0.00% / +0.11% +0.56% +0.45%] index_fill_ strided 7 : Elapsed 0.009 ms (0.896 ms / 100) 0.894 -> 0.895 ( +0.11%) [ +0.11% +0.22% +0.00% / +0.11% +0.67% +0.67%] index_fill_ strided 8 : Elapsed 0.009 ms (0.895 ms / 100) 0.894 -> 0.896 ( +0.22%) [ +0.11% +0.22% +0.00% / +0.22% +0.56% +0.67%] index_fill_ strided 16 : Elapsed 0.009 ms (0.895 ms / 100) 0.893 -> 0.894 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.78% +0.67%] index_fill_ random : Elapsed 0.009 ms (0.893 ms / 100) 0.893 -> 0.894 ( +0.11%) [ +0.00% +0.11% +0.00% / +0.11% +0.67% +0.78%] index_fill_ random_sorted : Elapsed 0.009 ms (0.893 ms / 100) 0.894 -> 0.894 ( +0.00%) [ +0.00% +0.11% +0.00% / +0.00% +0.56% +0.78%] index_fill_ perm : Elapsed 0.009 ms (0.894 ms / 100) 0.894 -> 0.894 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.67% +0.67%] index_fill_ perm_sorted : Elapsed 0.009 ms (0.894 ms / 100) B = [20, 16, 4, 40] (stride (2560, 1, 640, 16)) A = [20, 16, 4, 5] (stride (1, 20, 1600, 320)) dim = 3 1.622 -> 1.622 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.68% +0.62%] index_add_ linear : Elapsed 0.016 ms (1.624 ms / 100) 1.575 -> 1.570 ( -0.32%) [ +0.00% +0.13% +0.13% / -0.32% +0.32% +0.25%] index_copy_ linear : Elapsed 0.016 ms (1.575 ms / 100) 1.622 -> 1.622 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.74%] index_add_ reverse : Elapsed 0.016 ms (1.622 ms / 100) 1.570 -> 1.574 ( +0.25%) [ +0.06% +0.00% +0.32% / +0.25% +0.45% +0.64%] index_copy_ reverse : Elapsed 0.016 ms (1.571 ms / 100) 1.614 -> 1.615 ( +0.06%) [ +0.12% +0.06% +0.00% / +0.06% +0.62% +0.62%] index_add_ spread : Elapsed 0.016 ms (1.616 ms / 100) 1.570 -> 1.573 ( +0.19%) [ +0.25% +0.32% +0.00% / +0.19% +0.19% +0.38%] index_copy_ spread : Elapsed 0.016 ms (1.574 ms / 100) 1.623 -> 1.623 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.68%] index_add_ strided 3 : Elapsed 0.016 ms (1.623 ms / 100) 1.572 -> 1.570 ( -0.13%) [ +0.00% +0.25% +0.06% / -0.13% +0.51% +0.45%] index_copy_ strided 3 : Elapsed 0.016 ms (1.572 ms / 100) 1.627 -> 1.627 ( +0.00%) [ +0.06% +0.12% +0.00% / +0.00% +0.61% +0.61%] index_add_ strided 7 : Elapsed 0.016 ms (1.628 ms / 100) 1.576 -> 1.576 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.82% +0.70%] index_copy_ strided 7 : Elapsed 0.016 ms (1.576 ms / 100) 1.627 -> 1.627 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.74% +0.80%] index_add_ perm : Elapsed 0.016 ms (1.628 ms / 100) 1.575 -> 1.577 ( +0.13%) [ +0.00% +0.00% +0.06% / +0.13% +0.76% +0.83%] index_copy_ perm : Elapsed 0.016 ms (1.575 ms / 100) 1.622 -> 1.622 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.68% +0.99%] index_add_ perm_sorted : Elapsed 0.016 ms (1.623 ms / 100) 1.573 -> 1.572 ( -0.06%) [ +0.25% +0.19% +0.00% / -0.06% +0.57% +0.76%] index_copy_ perm_sorted : Elapsed 0.016 ms (1.577 ms / 100) 8.519 -> 8.524 ( +0.06%) [ +0.23% +0.00% +0.02% / +0.16% +0.40% +0.06%] index_select const : Elapsed 0.085 ms (8.539 ms / 100) 8.535 -> 8.535 ( +0.00%) [ +0.26% +0.00% +0.18% / +0.00% +0.16% +0.25%] index_select wrap : Elapsed 0.086 ms (8.557 ms / 100) 8.550 -> 8.526 ( -0.28%) [ +0.14% +0.01% +0.00% / +0.01% -0.06% -0.28%] index_select linear : Elapsed 0.086 ms (8.562 ms / 100) 8.524 -> 8.537 ( +0.15%) [ +0.00% +0.08% +0.00% / +0.15% +0.46% +0.31%] index_select reverse : Elapsed 0.085 ms (8.524 ms / 100) 8.526 -> 8.524 ( -0.02%) [ +0.08% +0.00% +0.09% / +0.00% +0.04% -0.02%] index_select skip64 : Elapsed 0.085 ms (8.533 ms / 100) 8.526 -> 8.523 ( -0.04%) [ +0.28% +0.00% +0.22% / -0.04% +0.08% +0.05%] index_select skip256 : Elapsed 0.086 ms (8.550 ms / 100) 8.544 -> 8.536 ( -0.09%) [ +0.09% +0.00% +0.16% / +0.00% -0.09% -0.05%] index_select spread : Elapsed 0.086 ms (8.552 ms / 100) 8.538 -> 8.540 ( +0.02%) [ +0.18% +0.08% +0.00% / +0.04% +0.02% +0.11%] index_select strided 3 : Elapsed 0.086 ms (8.553 ms / 100) 8.533 -> 8.527 ( -0.07%) [ +0.13% +0.00% +0.13% / -0.05% +0.30% -0.07%] index_select random : Elapsed 0.085 ms (8.544 ms / 100) 8.530 -> 8.525 ( -0.06%) [ +0.00% +0.13% +0.04% / -0.06% +0.04% +0.14%] index_select random_sorted : Elapsed 0.085 ms (8.530 ms / 100) B = [20, 16, 4, 40] (stride (1, 3200, 800, 20)) dim = 3 fill_cnt = 5 0.509 -> 0.509 ( +0.00%) [ +0.00% +0.00% +0.20% / +0.00% +0.59% +0.98%] index_fill_ const : Elapsed 0.005 ms (0.509 ms / 100) 0.512 -> 0.513 ( +0.20%) [ +0.20% +0.20% +0.00% / +0.20% +1.17% +1.37%] index_fill_ linear : Elapsed 0.005 ms (0.513 ms / 100) 0.509 -> 0.509 ( +0.00%) [ +0.20% +0.20% +0.00% / +0.00% +1.18% +1.18%] index_fill_ reverse : Elapsed 0.005 ms (0.510 ms / 100) 0.510 -> 0.515 ( +0.98%) [ +0.39% +1.37% +0.00% / +1.37% +0.98% +2.16%] index_fill_ skip64 : Elapsed 0.005 ms (0.512 ms / 100) 0.513 -> 0.513 ( +0.00%) [ +0.39% +0.97% +0.00% / +0.00% +0.00% +0.39%] index_fill_ skip256 : Elapsed 0.005 ms (0.515 ms / 100) 0.513 -> 0.514 ( +0.19%) [ +0.19% +0.19% +0.00% / +0.19% +1.17% +0.58%] index_fill_ spread : Elapsed 0.005 ms (0.514 ms / 100) 0.509 -> 0.511 ( +0.39%) [ +0.39% +0.39% +0.00% / +0.39% +1.18% +1.18%] index_fill_ strided 3 : Elapsed 0.005 ms (0.511 ms / 100) 0.510 -> 0.509 ( -0.20%) [ +0.00% +0.00% +0.00% / -0.20% +0.98% +0.78%] index_fill_ strided 5 : Elapsed 0.005 ms (0.510 ms / 100) 0.509 -> 0.510 ( +0.20%) [ +0.20% +0.00% +0.00% / +0.20% +1.18% +1.57%] index_fill_ strided 7 : Elapsed 0.005 ms (0.510 ms / 100) 0.510 -> 0.509 ( -0.20%) [ +0.00% +0.00% +0.00% / -0.20% +1.76% +2.55%] index_fill_ strided 8 : Elapsed 0.005 ms (0.510 ms / 100) 0.509 -> 0.510 ( +0.20%) [ +0.00% +0.39% +0.20% / +0.20% +1.18% +1.38%] index_fill_ strided 16 : Elapsed 0.005 ms (0.509 ms / 100) 0.512 -> 0.513 ( +0.20%) [ +0.00% +0.59% +0.00% / +0.20% +0.39% +0.39%] index_fill_ random : Elapsed 0.005 ms (0.512 ms / 100) 0.510 -> 0.510 ( +0.00%) [ +0.20% +0.20% +0.00% / +0.00% +0.59% +0.59%] index_fill_ random_sorted : Elapsed 0.005 ms (0.511 ms / 100) 0.509 -> 0.509 ( +0.00%) [ +0.39% +0.00% +0.20% / +0.00% +1.18% +1.18%] index_fill_ perm : Elapsed 0.005 ms (0.511 ms / 100) 0.508 -> 0.510 ( +0.39%) [ +0.00% +0.59% +0.20% / +0.39% +1.18% +1.18%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.508 ms / 100) B = [20, 16, 4, 40] (stride (4, 3200, 1, 80)) A = [20, 16, 4, 5] (stride (4, 80, 1, 1280)) dim = 3 1.520 -> 1.519 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.46% +0.46%] index_add_ linear : Elapsed 0.015 ms (1.520 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.41% +0.41%] index_copy_ linear : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.520 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.53% +0.59%] index_add_ reverse : Elapsed 0.015 ms (1.520 ms / 100) 1.474 -> 1.475 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.41% +0.61%] index_copy_ reverse : Elapsed 0.015 ms (1.474 ms / 100) 1.519 -> 1.519 ( +0.00%) [ +0.13% +0.07% +0.00% / +0.00% +0.53% +0.53%] index_add_ spread : Elapsed 0.015 ms (1.521 ms / 100) 1.474 -> 1.474 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.41% +0.41%] index_copy_ spread : Elapsed 0.015 ms (1.475 ms / 100) 1.519 -> 1.519 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.53% +0.66%] index_add_ strided 3 : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.48% +0.61%] index_copy_ strided 3 : Elapsed 0.015 ms (1.474 ms / 100) 1.519 -> 1.519 ( +0.00%) [ +0.07% +0.00% +0.13% / +0.00% +0.53% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.54% +0.61%] index_copy_ strided 7 : Elapsed 0.015 ms (1.474 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.13% +0.07% +0.00% / +0.00% +0.59% +0.59%] index_add_ perm : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.48% +0.61%] index_copy_ perm : Elapsed 0.015 ms (1.474 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.66% +0.59%] index_add_ perm_sorted : Elapsed 0.015 ms (1.520 ms / 100) 1.473 -> 1.474 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.54% +0.54%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.474 ms / 100) 8.520 -> 8.514 ( -0.07%) [ +0.34% +0.08% +0.00% / -0.07% +0.07% +0.11%] index_select const : Elapsed 0.085 ms (8.549 ms / 100) 8.534 -> 8.538 ( +0.05%) [ +0.05% +0.36% +0.00% / +0.18% +0.23% +0.05%] index_select wrap : Elapsed 0.085 ms (8.538 ms / 100) 8.544 -> 8.545 ( +0.01%) [ +0.00% +0.00% +0.07% / +0.01% +0.04% +0.14%] index_select linear : Elapsed 0.085 ms (8.544 ms / 100) 8.527 -> 8.519 ( -0.09%) [ +0.04% +0.00% +0.27% / -0.09% +0.33% +0.42%] index_select reverse : Elapsed 0.085 ms (8.530 ms / 100) 8.512 -> 8.528 ( +0.19%) [ +0.20% +0.00% +0.20% / +0.19% +0.23% +0.32%] index_select skip64 : Elapsed 0.085 ms (8.529 ms / 100) 8.516 -> 8.520 ( +0.05%) [ +0.15% +0.13% +0.00% / +0.16% +0.35% +0.05%] index_select skip256 : Elapsed 0.085 ms (8.529 ms / 100) 8.521 -> 8.523 ( +0.02%) [ +0.34% +0.26% +0.00% / +0.02% +0.36% +0.42%] index_select spread : Elapsed 0.085 ms (8.550 ms / 100) 8.537 -> 8.529 ( -0.09%) [ +0.06% +0.00% +0.04% / -0.09% +0.46% +0.39%] index_select strided 3 : Elapsed 0.085 ms (8.542 ms / 100) 8.535 -> 8.526 ( -0.11%) [ +0.00% +0.18% +0.16% / -0.11% +0.11% +0.37%] index_select random : Elapsed 0.085 ms (8.535 ms / 100) 8.533 -> 8.538 ( +0.06%) [ +0.05% +0.00% +0.06% / +0.07% +0.06% +0.30%] index_select random_sorted : Elapsed 0.085 ms (8.537 ms / 100) B = [20, 16, 4, 40] (stride (64, 1, 16, 1280)) A = [20, 16, 4, 5] (stride (4, 80, 1, 1280)) dim = 3 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.72%] index_add_ linear : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.61% +0.61%] index_copy_ linear : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.72% +0.72%] index_add_ reverse : Elapsed 0.015 ms (1.518 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.61% +0.68%] index_copy_ reverse : Elapsed 0.015 ms (1.472 ms / 100) 1.518 -> 1.518 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.72% +0.72%] index_add_ spread : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.68% +0.68%] index_copy_ spread : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.72% +0.59%] index_add_ strided 3 : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.472 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.00% +0.82% +0.61%] index_copy_ strided 3 : Elapsed 0.015 ms (1.472 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.59% +0.59%] index_add_ strided 7 : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.68% +0.68%] index_copy_ strided 7 : Elapsed 0.015 ms (1.472 ms / 100) 1.519 -> 1.518 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.72% +0.66%] index_add_ perm : Elapsed 0.015 ms (1.519 ms / 100) 1.473 -> 1.473 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.75% +0.61%] index_copy_ perm : Elapsed 0.015 ms (1.473 ms / 100) 1.518 -> 1.519 ( +0.07%) [ +0.07% +0.00% +0.00% / +0.07% +0.72% +0.79%] index_add_ perm_sorted : Elapsed 0.015 ms (1.519 ms / 100) 1.472 -> 1.473 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.75% +1.22%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.473 ms / 100) 8.504 -> 8.516 ( +0.14%) [ +0.07% +0.00% +0.32% / +0.22% +0.33% +0.14%] index_select const : Elapsed 0.085 ms (8.510 ms / 100) 8.527 -> 8.527 ( +0.00%) [ +0.05% +0.00% +0.27% / +0.00% +0.25% +0.14%] index_select wrap : Elapsed 0.085 ms (8.531 ms / 100) 8.535 -> 8.534 ( -0.01%) [ +0.00% +0.22% +0.05% / +0.11% +0.37% -0.01%] index_select linear : Elapsed 0.085 ms (8.535 ms / 100) 8.538 -> 8.529 ( -0.11%) [ +0.00% +0.00% +0.06% / +0.28% +0.25% -0.11%] index_select reverse : Elapsed 0.085 ms (8.538 ms / 100) 8.513 -> 8.517 ( +0.05%) [ +0.11% +0.00% +0.06% / +0.12% +0.05% +0.23%] index_select skip64 : Elapsed 0.085 ms (8.522 ms / 100) 8.524 -> 8.507 ( -0.20%) [ +0.02% +0.14% +0.00% / +0.04% -0.20% +0.05%] index_select skip256 : Elapsed 0.085 ms (8.526 ms / 100) 8.531 -> 8.539 ( +0.09%) [ +0.01% +0.13% +0.00% / +0.25% +0.09% +0.27%] index_select spread : Elapsed 0.085 ms (8.532 ms / 100) 8.525 -> 8.526 ( +0.01%) [ +0.29% +0.00% +0.07% / +0.27% +0.39% +0.01%] index_select strided 3 : Elapsed 0.085 ms (8.550 ms / 100) 8.535 -> 8.525 ( -0.12%) [ +0.02% +0.07% +0.00% / -0.12% +0.05% -0.06%] index_select random : Elapsed 0.085 ms (8.537 ms / 100) 8.528 -> 8.547 ( +0.22%) [ +0.21% +0.00% +0.06% / +0.22% +0.33% +0.28%] index_select random_sorted : Elapsed 0.085 ms (8.546 ms / 100) B = [20, 16, 4, 40] (stride (4, 80, 1, 1280)) A = [20, 16, 4, 5] (stride (64, 4, 1, 1280)) dim = 3 1.318 -> 1.320 ( +0.15%) [ +0.08% +0.00% +0.08% / +0.15% +0.53% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.275 ( +0.08%) [ +0.00% +0.16% +0.00% / +0.08% +0.39% +0.47%] index_copy_ linear : Elapsed 0.013 ms (1.274 ms / 100) 1.318 -> 1.320 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.15% +0.46% +0.46%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.013 ms (1.274 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.08% +0.00% +0.15% / +0.00% +0.46% +0.46%] index_add_ spread : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.47% +0.39%] index_copy_ spread : Elapsed 0.013 ms (1.275 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.08% +0.15% +0.00% / +0.00% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.273 ( -0.08%) [ +0.00% +0.55% +0.00% / -0.08% +0.55% +0.47%] index_copy_ strided 3 : Elapsed 0.013 ms (1.274 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.08% +0.08% +0.00% / +0.08% +0.53% +0.53%] index_add_ strided 7 : Elapsed 0.013 ms (1.319 ms / 100) 1.274 -> 1.274 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.55% +0.47%] index_copy_ strided 7 : Elapsed 0.013 ms (1.274 ms / 100) 1.318 -> 1.317 ( -0.08%) [ +0.00% +0.00% +0.00% / -0.08% +0.53% +0.53%] index_add_ perm : Elapsed 0.013 ms (1.318 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.16% +0.08% / +0.00% +0.71% +0.55%] index_copy_ perm : Elapsed 0.013 ms (1.273 ms / 100) 1.318 -> 1.318 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.53% +0.53%] index_add_ perm_sorted : Elapsed 0.013 ms (1.319 ms / 100) 1.273 -> 1.273 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.55% +0.63%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.273 ms / 100) 7.849 -> 7.855 ( +0.08%) [ +0.47% +0.00% +0.03% / +0.08% +0.46% +0.28%] index_select const : Elapsed 0.079 ms (7.886 ms / 100) 7.884 -> 7.890 ( +0.08%) [ +0.01% +0.00% +0.08% / +0.08% +0.28% +0.19%] index_select wrap : Elapsed 0.079 ms (7.885 ms / 100) 7.862 -> 7.885 ( +0.29%) [ +0.14% +0.00% +0.29% / +0.29% +0.46% +0.33%] index_select linear : Elapsed 0.079 ms (7.873 ms / 100) 7.876 -> 7.872 ( -0.05%) [ +0.11% +0.24% +0.00% / -0.05% +0.15% +0.25%] index_select reverse : Elapsed 0.079 ms (7.885 ms / 100) 7.850 -> 7.857 ( +0.09%) [ +0.00% +0.25% +0.17% / +0.24% +0.11% +0.09%] index_select skip64 : Elapsed 0.078 ms (7.850 ms / 100) 7.844 -> 7.864 ( +0.25%) [ +0.11% +0.00% +0.15% / +0.25% +0.36% +0.34%] index_select skip256 : Elapsed 0.079 ms (7.853 ms / 100) 7.859 -> 7.872 ( +0.17%) [ +0.25% +0.00% +0.18% / +0.17% +0.48% +0.25%] index_select spread : Elapsed 0.079 ms (7.879 ms / 100) 7.875 -> 7.866 ( -0.11%) [ +0.00% +0.09% +0.03% / -0.11% +0.22% +0.19%] index_select strided 3 : Elapsed 0.079 ms (7.875 ms / 100) 7.881 -> 7.892 ( +0.14%) [ +0.20% +0.18% +0.00% / +0.14% +0.33% +0.24%] index_select random : Elapsed 0.079 ms (7.897 ms / 100) 7.859 -> 7.878 ( +0.24%) [ +0.00% +0.19% +0.20% / +0.42% +0.24% +0.43%] index_select random_sorted : Elapsed 0.079 ms (7.859 ms / 100) B = [20, 16, 4, 40] (stride (16, 1, 320, 1280)) A = [20, 16, 4, 5] (stride (16, 1, 320, 1280)) dim = 3 0.633 -> 0.633 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.95% +0.95%] index_add_ linear : Elapsed 0.006 ms (0.633 ms / 100) 0.623 -> 0.623 ( +0.00%) [ +0.00% +0.16% +0.00% / +0.00% +0.64% +0.64%] index_copy_ linear : Elapsed 0.006 ms (0.623 ms / 100) 0.629 -> 0.629 ( +0.00%) [ +0.48% +0.16% +0.00% / +0.00% +0.64% +0.79%] index_add_ reverse : Elapsed 0.006 ms (0.632 ms / 100) 0.620 -> 0.621 ( +0.16%) [ +0.00% +0.48% +0.16% / +0.16% +0.65% +0.48%] index_copy_ reverse : Elapsed 0.006 ms (0.620 ms / 100) 0.630 -> 0.631 ( +0.16%) [ +0.00% +0.16% +0.32% / +0.16% +0.32% +0.63%] index_add_ spread : Elapsed 0.006 ms (0.630 ms / 100) 0.621 -> 0.621 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.48% +0.64%] index_copy_ spread : Elapsed 0.006 ms (0.621 ms / 100) 0.628 -> 0.629 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.80% +0.80%] index_add_ strided 3 : Elapsed 0.006 ms (0.629 ms / 100) 0.620 -> 0.620 ( +0.00%) [ +0.00% +3.71% +0.16% / +0.00% +0.65% +0.65%] index_copy_ strided 3 : Elapsed 0.006 ms (0.620 ms / 100) 0.629 -> 0.630 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.79% +0.79%] index_add_ strided 7 : Elapsed 0.006 ms (0.629 ms / 100) 0.620 -> 0.621 ( +0.16%) [ +0.00% +0.32% +0.00% / +0.16% +0.48% +0.81%] index_copy_ strided 7 : Elapsed 0.006 ms (0.620 ms / 100) 0.629 -> 0.629 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.48% +0.64%] index_add_ perm : Elapsed 0.006 ms (0.629 ms / 100) 0.620 -> 0.619 ( -0.16%) [ +0.00% +0.16% +0.00% / -0.16% +0.65% +0.97%] index_copy_ perm : Elapsed 0.006 ms (0.620 ms / 100) 0.629 -> 0.629 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.64% +0.79%] index_add_ perm_sorted : Elapsed 0.006 ms (0.629 ms / 100) 0.621 -> 0.621 ( +0.00%) [ +0.00% +0.00% +0.32% / +0.00% +0.48% +0.48%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.621 ms / 100) 4.812 -> 4.815 ( +0.06%) [ +0.04% +0.15% +0.00% / +0.06% +0.23% +0.48%] index_select const : Elapsed 0.048 ms (4.814 ms / 100) 4.854 -> 4.858 ( +0.08%) [ +0.00% +0.04% +0.16% / +0.10% +0.08% +0.21%] index_select wrap : Elapsed 0.049 ms (4.854 ms / 100) 4.843 -> 4.846 ( +0.06%) [ +0.31% +0.14% +0.00% / +0.10% +0.31% +0.06%] index_select linear : Elapsed 0.049 ms (4.858 ms / 100) 4.846 -> 4.832 ( -0.29%) [ +0.10% +0.00% +0.04% / -0.10% -0.25% -0.29%] index_select reverse : Elapsed 0.049 ms (4.851 ms / 100) 4.819 -> 4.821 ( +0.04%) [ +0.25% +0.00% +0.19% / +0.39% +0.04% +0.12%] index_select skip64 : Elapsed 0.048 ms (4.831 ms / 100) 4.804 -> 4.816 ( +0.25%) [ +0.00% +0.19% +0.25% / +0.25% +0.42% +0.48%] index_select skip256 : Elapsed 0.048 ms (4.804 ms / 100) 4.837 -> 4.838 ( +0.02%) [ +0.02% +0.00% +0.27% / +0.02% +0.21% +0.27%] index_select spread : Elapsed 0.048 ms (4.838 ms / 100) 4.847 -> 4.851 ( +0.08%) [ +0.17% +0.10% +0.00% / +0.17% +0.08% +0.29%] index_select strided 3 : Elapsed 0.049 ms (4.855 ms / 100) 4.850 -> 4.848 ( -0.04%) [ +0.00% +0.10% +0.12% / +0.00% +0.06% -0.04%] index_select random : Elapsed 0.049 ms (4.850 ms / 100) 4.836 -> 4.840 ( +0.08%) [ +0.37% +0.00% +0.31% / +0.23% +0.08% +0.56%] index_select random_sorted : Elapsed 0.049 ms (4.854 ms / 100) out_shape = [40, 16, 5, 4] in_shape = [20, 16, 5, 4] idx_dim = 0 B = [40, 16, 5, 4] (stride (320, 20, 4, 1)) A = [20, 16, 5, 4] (stride (1, 20, 1280, 320)) dim = 0 2.304 -> 2.315 ( +0.48%) [ +0.26% +0.13% +0.00% / +0.48% +0.91% +0.78%] index_add_ linear : Elapsed 0.023 ms (2.310 ms / 100) 2.300 -> 2.317 ( +0.74%) [ +0.09% +0.00% +0.09% / +0.74% +0.78% +0.96%] index_copy_ linear : Elapsed 0.023 ms (2.302 ms / 100) 2.306 -> 2.316 ( +0.43%) [ +0.17% +0.00% +0.13% / +0.43% +0.65% +0.43%] index_add_ reverse : Elapsed 0.023 ms (2.310 ms / 100) 2.304 -> 2.316 ( +0.52%) [ +0.09% +0.00% +0.00% / +0.52% +0.69% +0.74%] index_copy_ reverse : Elapsed 0.023 ms (2.306 ms / 100) 2.306 -> 2.317 ( +0.48%) [ +0.09% +0.00% +0.09% / +0.48% +0.69% +0.65%] index_add_ spread : Elapsed 0.023 ms (2.308 ms / 100) 2.306 -> 2.321 ( +0.65%) [ +0.13% +0.00% +0.00% / +0.65% +0.69% +0.74%] index_copy_ spread : Elapsed 0.023 ms (2.309 ms / 100) 2.306 -> 2.318 ( +0.52%) [ +0.13% +0.22% +0.00% / +0.52% +0.69% +0.69%] index_add_ strided 3 : Elapsed 0.023 ms (2.309 ms / 100) 2.301 -> 2.315 ( +0.61%) [ +0.22% +0.00% +0.00% / +0.87% +0.61% +0.78%] index_copy_ strided 3 : Elapsed 0.023 ms (2.306 ms / 100) 2.308 -> 2.321 ( +0.56%) [ +0.00% +0.09% +0.04% / +0.56% +0.61% +0.61%] index_add_ strided 7 : Elapsed 0.023 ms (2.308 ms / 100) 2.302 -> 2.316 ( +0.61%) [ +0.00% +0.09% +0.17% / +0.61% +0.65% +0.65%] index_copy_ strided 7 : Elapsed 0.023 ms (2.302 ms / 100) 2.305 -> 2.319 ( +0.61%) [ +0.00% +0.17% +0.17% / +0.61% +0.78% +0.74%] index_add_ perm : Elapsed 0.023 ms (2.305 ms / 100) 2.300 -> 2.317 ( +0.74%) [ +0.00% +0.26% +0.04% / +0.74% +0.87% +0.96%] index_copy_ perm : Elapsed 0.023 ms (2.300 ms / 100) 2.306 -> 2.320 ( +0.61%) [ +0.04% +0.00% +0.00% / +0.65% +0.61% +0.61%] index_add_ perm_sorted : Elapsed 0.023 ms (2.307 ms / 100) 2.302 -> 2.316 ( +0.61%) [ +0.22% +0.30% +0.00% / +0.61% +1.04% +0.74%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.307 ms / 100) 4.279 -> 4.273 ( -0.14%) [ +0.05% +0.09% +0.00% / +0.07% +0.07% -0.14%] index_select const : Elapsed 0.043 ms (4.281 ms / 100) 4.284 -> 4.281 ( -0.07%) [ +0.02% +0.14% +0.00% / -0.07% -0.05% +0.07%] index_select wrap : Elapsed 0.043 ms (4.285 ms / 100) 4.280 -> 4.284 ( +0.09%) [ +0.05% +0.02% +0.00% / +0.09% +0.16% +0.28%] index_select linear : Elapsed 0.043 ms (4.282 ms / 100) 4.285 -> 4.285 ( +0.00%) [ +0.07% +0.00% +0.05% / +0.05% +0.26% +0.00%] index_select reverse : Elapsed 0.043 ms (4.288 ms / 100) 4.278 -> 4.277 ( -0.02%) [ +0.16% +0.07% +0.00% / -0.02% +0.05% +0.12%] index_select skip64 : Elapsed 0.043 ms (4.285 ms / 100) 4.280 -> 4.279 ( -0.02%) [ +0.09% +0.00% +0.05% / -0.02% +0.05% +0.00%] index_select skip256 : Elapsed 0.043 ms (4.284 ms / 100) 4.285 -> 4.282 ( -0.07%) [ +0.02% +0.09% +0.00% / -0.07% +0.09% +0.00%] index_select spread : Elapsed 0.043 ms (4.286 ms / 100) 4.280 -> 4.287 ( +0.16%) [ +0.23% +0.07% +0.00% / +0.19% +0.30% +0.16%] index_select strided 3 : Elapsed 0.043 ms (4.290 ms / 100) 4.283 -> 4.281 ( -0.05%) [ +0.16% +0.05% +0.00% / -0.05% +0.09% +0.21%] index_select strided 5 : Elapsed 0.043 ms (4.290 ms / 100) 4.279 -> 4.287 ( +0.19%) [ +0.12% +0.00% +0.12% / +0.19% +0.19% +0.28%] index_select strided 7 : Elapsed 0.043 ms (4.284 ms / 100) 4.283 -> 4.281 ( -0.05%) [ +0.12% +0.16% +0.00% / -0.05% +0.21% +0.16%] index_select strided 8 : Elapsed 0.043 ms (4.288 ms / 100) 4.278 -> 4.282 ( +0.09%) [ +0.00% +0.14% +0.16% / +0.09% +0.26% +0.40%] index_select strided 16 : Elapsed 0.043 ms (4.278 ms / 100) 4.277 -> 4.288 ( +0.26%) [ +0.23% +0.00% +0.07% / +0.30% +0.42% +0.26%] index_select random : Elapsed 0.043 ms (4.287 ms / 100) 4.282 -> 4.281 ( -0.02%) [ +0.02% +0.12% +0.00% / +0.00% -0.02% +0.23%] index_select random_sorted : Elapsed 0.043 ms (4.283 ms / 100) B = [40, 16, 5, 4] (stride (320, 20, 1, 5)) A = [20, 16, 5, 4] (stride (4, 400, 80, 1)) dim = 0 2.400 -> 2.412 ( +0.50%) [ +0.00% +0.13% +0.04% / +0.50% +0.79% +0.79%] index_add_ linear : Elapsed 0.024 ms (2.400 ms / 100) 2.400 -> 2.408 ( +0.33%) [ +0.13% +0.00% +0.00% / +0.33% +0.75% +0.71%] index_copy_ linear : Elapsed 0.024 ms (2.403 ms / 100) 2.392 -> 2.413 ( +0.88%) [ +0.13% +0.00% +0.13% / +0.88% +1.13% +1.30%] index_add_ reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.395 -> 2.406 ( +0.46%) [ +0.00% +0.08% +0.08% / +0.46% +1.09% +1.25%] index_copy_ reverse : Elapsed 0.024 ms (2.395 ms / 100) 2.398 -> 2.414 ( +0.67%) [ +0.00% +0.13% +0.08% / +0.67% +1.00% +1.13%] index_add_ spread : Elapsed 0.024 ms (2.398 ms / 100) 2.397 -> 2.411 ( +0.58%) [ +0.04% +0.00% +0.08% / +0.58% +1.04% +0.96%] index_copy_ spread : Elapsed 0.024 ms (2.398 ms / 100) 2.407 -> 2.415 ( +0.33%) [ +0.00% +0.08% +0.08% / +0.46% +0.33% +0.50%] index_add_ strided 3 : Elapsed 0.024 ms (2.407 ms / 100) 2.403 -> 2.413 ( +0.42%) [ +0.08% +0.00% +0.08% / +0.42% +0.71% +0.58%] index_copy_ strided 3 : Elapsed 0.024 ms (2.405 ms / 100) 2.404 -> 2.420 ( +0.67%) [ +0.00% +0.12% +0.21% / +0.67% +0.71% +0.67%] index_add_ strided 7 : Elapsed 0.024 ms (2.404 ms / 100) 2.402 -> 2.414 ( +0.50%) [ +0.17% +0.00% +0.12% / +0.50% +0.67% +0.71%] index_copy_ strided 7 : Elapsed 0.024 ms (2.406 ms / 100) 2.408 -> 2.412 ( +0.17%) [ +0.04% +0.17% +0.00% / +0.46% +0.17% +0.33%] index_add_ perm : Elapsed 0.024 ms (2.409 ms / 100) 2.404 -> 2.410 ( +0.25%) [ +0.00% +0.21% +0.08% / +0.75% +0.25% +0.46%] index_copy_ perm : Elapsed 0.024 ms (2.404 ms / 100) 2.404 -> 2.414 ( +0.42%) [ +0.12% +0.00% +0.00% / +0.71% +0.46% +0.42%] index_add_ perm_sorted : Elapsed 0.024 ms (2.407 ms / 100) 2.406 -> 2.408 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.50% +0.42% +0.08%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.408 ms / 100) 4.428 -> 4.433 ( +0.11%) [ +0.00% +0.27% +0.23% / +0.11% +0.32% +0.27%] index_select const : Elapsed 0.044 ms (4.428 ms / 100) 4.439 -> 4.443 ( +0.09%) [ +0.25% +0.00% +0.00% / +0.16% +0.09% +0.16%] index_select wrap : Elapsed 0.044 ms (4.450 ms / 100) 4.447 -> 4.442 ( -0.11%) [ +0.00% +0.02% +0.11% / -0.11% -0.07% -0.02%] index_select linear : Elapsed 0.044 ms (4.447 ms / 100) 4.441 -> 4.442 ( +0.02%) [ +0.00% +0.11% +0.20% / +0.02% +0.05% +0.02%] index_select reverse : Elapsed 0.044 ms (4.441 ms / 100) 4.426 -> 4.430 ( +0.09%) [ +0.00% +0.20% +0.27% / +0.18% +0.09% +0.25%] index_select skip64 : Elapsed 0.044 ms (4.426 ms / 100) 4.432 -> 4.432 ( +0.00%) [ +0.07% +0.05% +0.00% / +0.14% +0.09% +0.00%] index_select skip256 : Elapsed 0.044 ms (4.435 ms / 100) 4.439 -> 4.442 ( +0.07%) [ +0.11% +0.09% +0.00% / +0.07% +0.29% +0.16%] index_select spread : Elapsed 0.044 ms (4.444 ms / 100) 4.433 -> 4.445 ( +0.27%) [ +0.00% +0.27% +0.23% / +0.27% +0.41% +0.32%] index_select strided 3 : Elapsed 0.044 ms (4.433 ms / 100) 4.434 -> 4.438 ( +0.09%) [ +0.16% +0.16% +0.00% / +0.09% +0.11% +0.29%] index_select strided 5 : Elapsed 0.044 ms (4.441 ms / 100) 4.436 -> 4.446 ( +0.23%) [ +0.07% +0.09% +0.00% / +0.23% +0.29% +0.25%] index_select strided 7 : Elapsed 0.044 ms (4.439 ms / 100) 4.434 -> 4.438 ( +0.09%) [ +0.07% +0.00% +0.02% / +0.09% +0.25% +0.20%] index_select strided 8 : Elapsed 0.044 ms (4.437 ms / 100) 4.434 -> 4.433 ( -0.02%) [ +0.05% +0.00% +0.02% / -0.02% +0.14% +0.14%] index_select strided 16 : Elapsed 0.044 ms (4.436 ms / 100) 4.446 -> 4.441 ( -0.11%) [ +0.00% +0.02% +0.02% / +0.09% -0.11% -0.09%] index_select random : Elapsed 0.044 ms (4.446 ms / 100) 4.446 -> 4.441 ( -0.11%) [ +0.04% +0.09% +0.00% / -0.04% -0.11% -0.02%] index_select random_sorted : Elapsed 0.044 ms (4.448 ms / 100) B = [40, 16, 5, 4] (stride (320, 5, 1, 80)) A = [20, 16, 5, 4] (stride (4, 400, 80, 1)) dim = 0 1.523 -> 1.495 ( -1.84%) [ +0.13% +0.20% +0.00% / -1.84% -0.85% -0.85%] index_add_ linear : Elapsed 0.015 ms (1.525 ms / 100) 1.501 -> 1.470 ( -2.07%) [ +0.20% +0.53% +0.00% / -2.07% -1.60% -1.47%] index_copy_ linear : Elapsed 0.015 ms (1.504 ms / 100) 1.523 -> 1.493 ( -1.97%) [ +0.00% +0.20% +0.13% / -1.97% -0.85% -0.92%] index_add_ reverse : Elapsed 0.015 ms (1.523 ms / 100) 1.502 -> 1.468 ( -2.26%) [ +0.00% +0.13% +0.13% / -2.26% -1.60% -1.46%] index_copy_ reverse : Elapsed 0.015 ms (1.502 ms / 100) 1.520 -> 1.493 ( -1.78%) [ +0.13% +0.26% +0.00% / -1.78% -1.45% -1.38%] index_add_ spread : Elapsed 0.015 ms (1.522 ms / 100) 1.499 -> 1.467 ( -2.13%) [ +0.00% +0.07% +0.00% / -2.13% -1.53% -1.67%] index_copy_ spread : Elapsed 0.015 ms (1.499 ms / 100) 1.528 -> 1.498 ( -1.96%) [ +0.13% +0.07% +0.00% / -1.77% -1.70% -1.96%] index_add_ strided 3 : Elapsed 0.015 ms (1.530 ms / 100) 1.502 -> 1.470 ( -2.13%) [ +0.00% +0.07% +0.00% / -2.13% -1.93% -2.13%] index_copy_ strided 3 : Elapsed 0.015 ms (1.502 ms / 100) 1.523 -> 1.492 ( -2.04%) [ +0.07% +0.07% +0.00% / -2.04% -1.12% -1.25%] index_add_ strided 7 : Elapsed 0.015 ms (1.524 ms / 100) 1.498 -> 1.467 ( -2.07%) [ +0.20% +0.07% +0.00% / -2.07% -1.60% -1.40%] index_copy_ strided 7 : Elapsed 0.015 ms (1.501 ms / 100) 1.525 -> 1.493 ( -2.10%) [ +0.13% +0.07% +0.00% / -2.10% -1.51% -1.31%] index_add_ perm : Elapsed 0.015 ms (1.527 ms / 100) 1.501 -> 1.469 ( -2.13%) [ +0.00% +0.13% +0.07% / -2.13% -1.47% -1.67%] index_copy_ perm : Elapsed 0.015 ms (1.501 ms / 100) 1.523 -> 1.490 ( -2.17%) [ +0.13% +0.00% +0.00% / -2.17% -1.71% -1.64%] index_add_ perm_sorted : Elapsed 0.015 ms (1.525 ms / 100) 1.494 -> 1.465 ( -1.94%) [ +0.07% +0.00% +0.27% / -1.94% -1.34% -1.14%] index_copy_ perm_sorted : Elapsed 0.015 ms (1.495 ms / 100) 2.876 -> 2.872 ( -0.14%) [ +0.17% +0.03% +0.00% / -0.14% +0.24% +0.24%] index_select const : Elapsed 0.029 ms (2.881 ms / 100) 2.898 -> 2.887 ( -0.38%) [ +0.14% +0.00% +0.07% / +0.03% -0.38% -0.38%] index_select wrap : Elapsed 0.029 ms (2.902 ms / 100) 2.894 -> 2.889 ( -0.17%) [ +0.24% +0.00% +0.14% / +0.24% -0.17% -0.03%] index_select linear : Elapsed 0.029 ms (2.901 ms / 100) 2.896 -> 2.896 ( +0.00%) [ +0.03% +0.00% +0.21% / +0.00% +0.31% +0.03%] index_select reverse : Elapsed 0.029 ms (2.897 ms / 100) 2.875 -> 2.875 ( +0.00%) [ +0.00% +0.03% +0.21% / +0.00% +0.10% +0.24%] index_select skip64 : Elapsed 0.029 ms (2.875 ms / 100) 2.869 -> 2.870 ( +0.03%) [ +0.28% +0.07% +0.00% / +0.03% +0.42% +0.31%] index_select skip256 : Elapsed 0.029 ms (2.877 ms / 100) 2.895 -> 2.891 ( -0.14%) [ +0.10% +0.00% +0.03% / +0.10% -0.14% -0.10%] index_select spread : Elapsed 0.029 ms (2.898 ms / 100) 2.903 -> 2.887 ( -0.55%) [ +0.00% +0.10% +0.03% / +0.03% -0.38% -0.55%] index_select strided 3 : Elapsed 0.029 ms (2.903 ms / 100) 2.883 -> 2.882 ( -0.03%) [ +0.14% +0.00% +0.03% / -0.03% +0.35% +0.10%] index_select strided 5 : Elapsed 0.029 ms (2.887 ms / 100) 2.898 -> 2.899 ( +0.03%) [ +0.03% +0.07% +0.00% / +0.07% +0.03% +0.14%] index_select strided 7 : Elapsed 0.029 ms (2.899 ms / 100) 2.878 -> 2.877 ( -0.03%) [ +0.00% +0.14% +0.14% / -0.03% +0.45% +0.45%] index_select strided 8 : Elapsed 0.029 ms (2.878 ms / 100) 2.877 -> 2.883 ( +0.21%) [ +0.00% +0.24% +0.17% / +0.21% +0.31% +0.35%] index_select strided 16 : Elapsed 0.029 ms (2.877 ms / 100) 2.896 -> 2.893 ( -0.10%) [ +0.07% +0.00% +0.17% / +0.17% +0.03% -0.10%] index_select random : Elapsed 0.029 ms (2.898 ms / 100) 2.896 -> 2.894 ( -0.07%) [ +0.21% +0.00% +0.14% / -0.03% +0.07% -0.07%] index_select random_sorted : Elapsed 0.029 ms (2.902 ms / 100) B = [40, 16, 5, 4] (stride (20, 800, 1, 5)) A = [20, 16, 5, 4] (stride (80, 1, 16, 1600)) dim = 0 2.445 -> 2.456 ( +0.45%) [ +0.16% +0.00% +0.16% / +0.45% +0.82% +0.82%] index_add_ linear : Elapsed 0.024 ms (2.449 ms / 100) 2.443 -> 2.453 ( +0.41%) [ +0.00% +0.08% +0.12% / +0.41% +0.65% +0.82%] index_copy_ linear : Elapsed 0.024 ms (2.443 ms / 100) 2.438 -> 2.453 ( +0.62%) [ +0.00% +0.00% +0.21% / +0.62% +1.11% +1.27%] index_add_ reverse : Elapsed 0.024 ms (2.438 ms / 100) 2.433 -> 2.450 ( +0.70%) [ +0.00% +0.37% +0.29% / +0.70% +1.23% +1.23%] index_copy_ reverse : Elapsed 0.024 ms (2.433 ms / 100) 2.445 -> 2.463 ( +0.74%) [ +0.12% +0.08% +0.00% / +0.74% +1.10% +1.02%] index_add_ spread : Elapsed 0.024 ms (2.448 ms / 100) 2.446 -> 2.460 ( +0.57%) [ +0.12% +0.00% +0.16% / +0.57% +1.14% +1.14%] index_copy_ spread : Elapsed 0.024 ms (2.449 ms / 100) 2.450 -> 2.464 ( +0.57%) [ +0.12% +0.29% +0.00% / +0.61% +0.65% +0.57%] index_add_ strided 3 : Elapsed 0.025 ms (2.453 ms / 100) 2.450 -> 2.465 ( +0.61%) [ +0.20% +0.16% +0.00% / +0.61% +0.65% +0.78%] index_copy_ strided 3 : Elapsed 0.025 ms (2.455 ms / 100) 2.451 -> 2.462 ( +0.45%) [ +0.00% +0.12% +0.00% / +0.45% +0.61% +0.61%] index_add_ strided 7 : Elapsed 0.025 ms (2.451 ms / 100) 2.450 -> 2.465 ( +0.61%) [ +0.12% +0.12% +0.00% / +0.61% +0.78% +0.73%] index_copy_ strided 7 : Elapsed 0.025 ms (2.453 ms / 100) 2.453 -> 2.458 ( +0.20%) [ +0.16% +0.08% +0.00% / +0.49% +0.20% +0.37%] index_add_ perm : Elapsed 0.025 ms (2.457 ms / 100) 2.454 -> 2.452 ( -0.08%) [ +0.16% +0.16% +0.00% / +0.45% -0.08% +0.12%] index_copy_ perm : Elapsed 0.025 ms (2.458 ms / 100) 2.453 -> 2.460 ( +0.29%) [ +0.12% +0.24% +0.00% / +0.57% +0.29% +0.33%] index_add_ perm_sorted : Elapsed 0.025 ms (2.456 ms / 100) 2.450 -> 2.458 ( +0.33%) [ +0.00% +0.20% +0.16% / +0.73% +0.37% +0.33%] index_copy_ perm_sorted : Elapsed 0.025 ms (2.450 ms / 100) 4.495 -> 4.490 ( -0.11%) [ +0.11% +0.00% +0.00% / -0.11% +0.13% +0.22%] index_select const : Elapsed 0.045 ms (4.500 ms / 100) 4.500 -> 4.504 ( +0.09%) [ +0.18% +0.00% +0.18% / +0.18% +0.18% +0.09%] index_select wrap : Elapsed 0.045 ms (4.508 ms / 100) 4.505 -> 4.504 ( -0.02%) [ +0.00% +0.13% +0.02% / -0.02% +0.11% +0.02%] index_select linear : Elapsed 0.045 ms (4.505 ms / 100) 4.504 -> 4.510 ( +0.13%) [ +0.09% +0.18% +0.00% / +0.22% +0.20% +0.13%] index_select reverse : Elapsed 0.045 ms (4.508 ms / 100) 4.490 -> 4.495 ( +0.11%) [ +0.00% +0.20% +0.04% / +0.11% +0.16% +0.20%] index_select skip64 : Elapsed 0.045 ms (4.490 ms / 100) 4.491 -> 4.493 ( +0.04%) [ +0.18% +0.00% +0.04% / +0.11% +0.24% +0.04%] index_select skip256 : Elapsed 0.045 ms (4.499 ms / 100) 4.499 -> 4.508 ( +0.20%) [ +0.09% +0.00% +0.13% / +0.20% +0.20% +0.22%] index_select spread : Elapsed 0.045 ms (4.503 ms / 100) 4.507 -> 4.506 ( -0.02%) [ +0.00% +0.07% +0.07% / +0.00% +0.02% -0.02%] index_select strided 3 : Elapsed 0.045 ms (4.507 ms / 100) 4.490 -> 4.494 ( +0.09%) [ +0.16% +0.04% +0.00% / +0.18% +0.29% +0.09%] index_select strided 5 : Elapsed 0.045 ms (4.497 ms / 100) 4.502 -> 4.501 ( -0.02%) [ +0.00% +0.04% +0.00% / -0.02% +0.13% +0.13%] index_select strided 7 : Elapsed 0.045 ms (4.502 ms / 100) 4.498 -> 4.494 ( -0.09%) [ +0.04% +0.13% +0.00% / +0.02% +0.07% -0.09%] index_select strided 8 : Elapsed 0.045 ms (4.500 ms / 100) 4.494 -> 4.495 ( +0.02%) [ +0.22% +0.09% +0.00% / +0.09% +0.02% +0.20%] index_select strided 16 : Elapsed 0.045 ms (4.504 ms / 100) 4.505 -> 4.501 ( -0.09%) [ +0.04% +0.04% +0.00% / +0.00% -0.09% +0.00%] index_select random : Elapsed 0.045 ms (4.507 ms / 100) 4.509 -> 4.501 ( -0.18%) [ +0.02% +0.02% +0.00% / -0.13% -0.18% -0.07%] index_select random_sorted : Elapsed 0.045 ms (4.510 ms / 100) B = [40, 16, 5, 4] (stride (1, 800, 160, 40)) A = [20, 16, 5, 4] (stride (64, 1, 1280, 16)) dim = 0 2.343 -> 2.347 ( +0.17%) [ +0.04% +0.04% +0.00% / +0.17% +0.43% +0.64%] index_add_ linear : Elapsed 0.023 ms (2.344 ms / 100) 2.348 -> 2.358 ( +0.43%) [ +0.26% +0.13% +0.00% / +0.43% +0.68% +0.64%] index_copy_ linear : Elapsed 0.024 ms (2.354 ms / 100) 2.344 -> 2.355 ( +0.47%) [ +0.04% +0.21% +0.00% / +0.47% +0.47% +0.64%] index_add_ reverse : Elapsed 0.023 ms (2.345 ms / 100) 2.352 -> 2.363 ( +0.47%) [ +0.00% +0.17% +0.17% / +0.47% +0.47% +0.47%] index_copy_ reverse : Elapsed 0.024 ms (2.352 ms / 100) 2.353 -> 2.361 ( +0.34%) [ +0.13% +0.04% +0.00% / +0.47% +0.38% +0.34%] index_add_ spread : Elapsed 0.024 ms (2.356 ms / 100) 2.367 -> 2.377 ( +0.42%) [ +0.08% +0.13% +0.00% / +0.51% +0.55% +0.42%] index_copy_ spread : Elapsed 0.024 ms (2.369 ms / 100) 2.354 -> 2.363 ( +0.38%) [ +0.04% +0.25% +0.00% / +0.47% +0.55% +0.38%] index_add_ strided 3 : Elapsed 0.024 ms (2.355 ms / 100) 2.366 -> 2.378 ( +0.51%) [ +0.04% +0.00% +0.00% / +0.51% +0.59% +0.51%] index_copy_ strided 3 : Elapsed 0.024 ms (2.367 ms / 100) 2.352 -> 2.363 ( +0.47%) [ +0.00% +0.38% +0.26% / +0.47% +0.47% +0.51%] index_add_ strided 7 : Elapsed 0.024 ms (2.352 ms / 100) 2.365 -> 2.379 ( +0.59%) [ +0.00% +0.00% +0.13% / +0.59% +0.76% +0.72%] index_copy_ strided 7 : Elapsed 0.024 ms (2.365 ms / 100) 2.356 -> 2.363 ( +0.30%) [ +0.00% +0.13% +0.08% / +0.30% +0.51% +0.42%] index_add_ perm : Elapsed 0.024 ms (2.356 ms / 100) 2.366 -> 2.377 ( +0.46%) [ +0.13% +0.13% +0.00% / +0.51% +0.46% +0.85%] index_copy_ perm : Elapsed 0.024 ms (2.369 ms / 100) 2.354 -> 2.359 ( +0.21%) [ +0.00% +0.00% +0.08% / +0.21% +0.47% +0.42%] index_add_ perm_sorted : Elapsed 0.024 ms (2.354 ms / 100) 2.368 -> 2.374 ( +0.25%) [ +0.04% +0.00% +0.00% / +0.25% +0.68% +0.72%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.369 ms / 100) 4.335 -> 4.337 ( +0.05%) [ +0.23% +0.39% +0.00% / +0.05% +0.23% +0.23%] index_select const : Elapsed 0.043 ms (4.345 ms / 100) 4.350 -> 4.346 ( -0.09%) [ +0.18% +0.00% +0.09% / -0.07% -0.09% +0.05%] index_select wrap : Elapsed 0.044 ms (4.358 ms / 100) 4.341 -> 4.344 ( +0.07%) [ +0.30% +0.07% +0.00% / +0.07% +0.23% +0.18%] index_select linear : Elapsed 0.044 ms (4.354 ms / 100) 4.348 -> 4.351 ( +0.07%) [ +0.11% +0.02% +0.00% / +0.11% +0.25% +0.07%] index_select reverse : Elapsed 0.044 ms (4.353 ms / 100) 4.339 -> 4.339 ( +0.00%) [ +0.23% +0.18% +0.00% / +0.00% +0.02% +0.05%] index_select skip64 : Elapsed 0.043 ms (4.349 ms / 100) 4.344 -> 4.342 ( -0.05%) [ +0.05% +0.00% +0.00% / +0.07% -0.05% +0.05%] index_select skip256 : Elapsed 0.043 ms (4.346 ms / 100) 4.349 -> 4.348 ( -0.02%) [ +0.14% +0.05% +0.00% / -0.02% +0.02% +0.09%] index_select spread : Elapsed 0.044 ms (4.355 ms / 100) 4.342 -> 4.348 ( +0.14%) [ +0.30% +0.00% +0.14% / +0.18% +0.14% +0.21%] index_select strided 3 : Elapsed 0.044 ms (4.355 ms / 100) 4.339 -> 4.344 ( +0.12%) [ +0.00% +0.07% +0.12% / +0.12% +0.12% +0.14%] index_select strided 5 : Elapsed 0.043 ms (4.339 ms / 100) 4.343 -> 4.346 ( +0.07%) [ +0.12% +0.12% +0.00% / +0.28% +0.12% +0.07%] index_select strided 7 : Elapsed 0.043 ms (4.348 ms / 100) 4.345 -> 4.347 ( +0.05%) [ +0.05% +0.02% +0.00% / +0.07% +0.09% +0.05%] index_select strided 8 : Elapsed 0.043 ms (4.347 ms / 100) 4.341 -> 4.344 ( +0.07%) [ +0.07% +0.09% +0.00% / +0.07% +0.12% +0.16%] index_select strided 16 : Elapsed 0.043 ms (4.344 ms / 100) 4.346 -> 4.346 ( +0.00%) [ +0.16% +0.02% +0.00% / +0.00% +0.18% +0.25%] index_select random : Elapsed 0.044 ms (4.353 ms / 100) 4.341 -> 4.347 ( +0.14%) [ +0.12% +0.00% +0.05% / +0.14% +0.25% +0.32%] index_select random_sorted : Elapsed 0.043 ms (4.346 ms / 100) B = [40, 16, 5, 4] (stride (5, 800, 1, 200)) A = [20, 16, 5, 4] (stride (320, 5, 1, 80)) dim = 0 2.422 -> 2.434 ( +0.50%) [ +0.12% +0.00% +0.00% / +0.50% +0.62% +0.78%] index_add_ linear : Elapsed 0.024 ms (2.425 ms / 100) 2.406 -> 2.423 ( +0.71%) [ +0.29% +0.00% +0.04% / +0.71% +1.00% +1.08%] index_copy_ linear : Elapsed 0.024 ms (2.413 ms / 100) 2.415 -> 2.429 ( +0.58%) [ +0.00% +0.12% +0.00% / +0.58% +1.16% +0.95%] index_add_ reverse : Elapsed 0.024 ms (2.415 ms / 100) 2.403 -> 2.416 ( +0.54%) [ +0.04% +0.00% +0.12% / +0.54% +1.12% +1.37%] index_copy_ reverse : Elapsed 0.024 ms (2.404 ms / 100) 2.433 -> 2.444 ( +0.45%) [ +0.00% +0.12% +0.08% / +0.45% +1.03% +0.95%] index_add_ spread : Elapsed 0.024 ms (2.433 ms / 100) 2.431 -> 2.446 ( +0.62%) [ +0.00% +0.16% +0.29% / +0.62% +1.15% +1.11%] index_copy_ spread : Elapsed 0.024 ms (2.431 ms / 100) 2.439 -> 2.448 ( +0.37%) [ +0.00% +0.12% +0.00% / +0.37% +0.37% +0.57%] index_add_ strided 3 : Elapsed 0.024 ms (2.439 ms / 100) 2.434 -> 2.444 ( +0.41%) [ +0.00% +0.08% +0.16% / +0.41% +0.66% +0.62%] index_copy_ strided 3 : Elapsed 0.024 ms (2.434 ms / 100) 2.436 -> 2.447 ( +0.45%) [ +0.00% +0.29% +0.25% / +0.57% +0.45% +0.53%] index_add_ strided 7 : Elapsed 0.024 ms (2.436 ms / 100) 2.434 -> 2.443 ( +0.37%) [ +0.04% +0.00% +0.12% / +0.37% +0.74% +0.74%] index_copy_ strided 7 : Elapsed 0.024 ms (2.435 ms / 100) 2.432 -> 2.436 ( +0.16%) [ +0.08% +0.08% +0.00% / +0.49% +0.29% +0.16%] index_add_ perm : Elapsed 0.024 ms (2.434 ms / 100) 2.429 -> 2.434 ( +0.21%) [ +0.12% +0.08% +0.00% / +0.49% +0.21% +0.21%] index_copy_ perm : Elapsed 0.024 ms (2.432 ms / 100) 2.436 -> 2.441 ( +0.21%) [ +0.12% +0.12% +0.00% / +0.66% +0.29% +0.21%] index_add_ perm_sorted : Elapsed 0.024 ms (2.439 ms / 100) 2.431 -> 2.435 ( +0.16%) [ +0.12% +0.21% +0.00% / +0.62% +0.16% +0.53%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.434 ms / 100) 4.429 -> 4.433 ( +0.09%) [ +0.11% +0.11% +0.00% / +0.09% +0.18% +0.16%] index_select const : Elapsed 0.044 ms (4.434 ms / 100) 4.445 -> 4.443 ( -0.04%) [ +0.04% +0.00% +0.00% / +0.02% +0.13% -0.04%] index_select wrap : Elapsed 0.044 ms (4.447 ms / 100) 4.445 -> 4.446 ( +0.02%) [ +0.00% +0.13% +0.02% / +0.02% +0.13% +0.02%] index_select linear : Elapsed 0.044 ms (4.445 ms / 100) 4.445 -> 4.442 ( -0.07%) [ +0.16% +0.13% +0.00% / +0.02% -0.07% -0.04%] index_select reverse : Elapsed 0.045 ms (4.452 ms / 100) 4.431 -> 4.429 ( -0.05%) [ +0.16% +0.00% +0.32% / +0.14% +0.02% -0.05%] index_select skip64 : Elapsed 0.044 ms (4.438 ms / 100) 4.429 -> 4.429 ( +0.00%) [ +0.02% +0.18% +0.00% / +0.00% +0.20% +0.16%] index_select skip256 : Elapsed 0.044 ms (4.430 ms / 100) 4.443 -> 4.445 ( +0.05%) [ +0.09% +0.00% +0.02% / +0.05% +0.25% +0.23%] index_select spread : Elapsed 0.044 ms (4.447 ms / 100) 4.444 -> 4.445 ( +0.02%) [ +0.11% +0.00% +0.11% / +0.02% +0.14% +0.32%] index_select strided 3 : Elapsed 0.044 ms (4.449 ms / 100) 4.428 -> 4.435 ( +0.16%) [ +0.16% +0.00% +0.29% / +0.16% +0.29% +0.29%] index_select strided 5 : Elapsed 0.044 ms (4.435 ms / 100) 4.442 -> 4.440 ( -0.05%) [ +0.05% +0.16% +0.00% / -0.05% +0.38% +0.09%] index_select strided 7 : Elapsed 0.044 ms (4.444 ms / 100) 4.438 -> 4.428 ( -0.23%) [ +0.02% +0.02% +0.00% / -0.05% -0.23% -0.11%] index_select strided 8 : Elapsed 0.044 ms (4.439 ms / 100) 4.436 -> 4.427 ( -0.20%) [ +0.14% +0.00% +0.11% / +0.05% -0.20% +0.07%] index_select strided 16 : Elapsed 0.044 ms (4.442 ms / 100) 4.445 -> 4.446 ( +0.02%) [ +0.09% +0.02% +0.00% / +0.02% +0.18% +0.02%] index_select random : Elapsed 0.044 ms (4.449 ms / 100) 4.443 -> 4.443 ( +0.00%) [ +0.16% +0.11% +0.00% / +0.02% +0.05% +0.00%] index_select random_sorted : Elapsed 0.045 ms (4.450 ms / 100) B = [40, 16, 5, 4] (stride (1, 200, 40, 3200)) A = [20, 16, 5, 4] (stride (1, 400, 20, 100)) dim = 0 2.389 -> 2.403 ( +0.59%) [ +0.21% +0.21% +0.00% / +0.59% +0.71% +0.88%] index_add_ linear : Elapsed 0.024 ms (2.394 ms / 100) 2.398 -> 2.408 ( +0.42%) [ +0.04% +0.21% +0.00% / +0.42% +0.79% +0.88%] index_copy_ linear : Elapsed 0.024 ms (2.399 ms / 100) 2.393 -> 2.404 ( +0.46%) [ +0.00% +0.21% +0.08% / +0.59% +0.46% +0.59%] index_add_ reverse : Elapsed 0.024 ms (2.393 ms / 100) 2.401 -> 2.414 ( +0.54%) [ +0.00% +0.12% +0.17% / +0.54% +0.54% +0.54%] index_copy_ reverse : Elapsed 0.024 ms (2.401 ms / 100) 2.408 -> 2.414 ( +0.25%) [ +0.17% +0.04% +0.00% / +0.33% +0.25% +0.25%] index_add_ spread : Elapsed 0.024 ms (2.412 ms / 100) 2.421 -> 2.430 ( +0.37%) [ +0.08% +0.00% +0.00% / +0.37% +0.66% +0.45%] index_copy_ spread : Elapsed 0.024 ms (2.423 ms / 100) 2.410 -> 2.417 ( +0.29%) [ +0.00% +0.00% +0.17% / +0.37% +0.33% +0.29%] index_add_ strided 3 : Elapsed 0.024 ms (2.410 ms / 100) 2.414 -> 2.427 ( +0.54%) [ +0.00% +0.17% +0.21% / +0.54% +0.66% +0.95%] index_copy_ strided 3 : Elapsed 0.024 ms (2.414 ms / 100) 2.410 -> 2.418 ( +0.33%) [ +0.00% +0.00% +0.08% / +0.50% +0.33% +0.46%] index_add_ strided 7 : Elapsed 0.024 ms (2.410 ms / 100) 2.418 -> 2.430 ( +0.50%) [ +0.00% +0.12% +0.00% / +0.50% +0.79% +0.54%] index_copy_ strided 7 : Elapsed 0.024 ms (2.418 ms / 100) 2.407 -> 2.417 ( +0.42%) [ +0.21% +0.12% +0.00% / +0.42% +0.62% +0.75%] index_add_ perm : Elapsed 0.024 ms (2.412 ms / 100) 2.418 -> 2.430 ( +0.50%) [ +0.00% +0.04% +0.17% / +0.54% +0.50% +0.62%] index_copy_ perm : Elapsed 0.024 ms (2.418 ms / 100) 2.404 -> 2.419 ( +0.62%) [ +0.08% +0.12% +0.00% / +0.62% +0.75% +0.62%] index_add_ perm_sorted : Elapsed 0.024 ms (2.406 ms / 100) 2.419 -> 2.431 ( +0.50%) [ +0.00% +0.04% +0.08% / +0.50% +0.62% +0.87%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.419 ms / 100) 4.413 -> 4.424 ( +0.25%) [ +0.23% +0.00% +0.02% / +0.32% +0.29% +0.25%] index_select const : Elapsed 0.044 ms (4.423 ms / 100) 4.426 -> 4.423 ( -0.07%) [ +0.02% +0.00% +0.05% / +0.00% -0.02% -0.07%] index_select wrap : Elapsed 0.044 ms (4.427 ms / 100) 4.427 -> 4.418 ( -0.20%) [ +0.09% +0.00% +0.20% / -0.20% +0.09% +0.05%] index_select linear : Elapsed 0.044 ms (4.431 ms / 100) 4.423 -> 4.429 ( +0.14%) [ +0.05% +0.11% +0.00% / +0.14% +0.25% +0.27%] index_select reverse : Elapsed 0.044 ms (4.425 ms / 100) 4.414 -> 4.419 ( +0.11%) [ +0.18% +0.00% +0.29% / +0.11% +0.18% +0.16%] index_select skip64 : Elapsed 0.044 ms (4.422 ms / 100) 4.416 -> 4.411 ( -0.11%) [ +0.07% +0.09% +0.00% / -0.11% +0.11% +0.20%] index_select skip256 : Elapsed 0.044 ms (4.419 ms / 100) 4.425 -> 4.423 ( -0.05%) [ +0.09% +0.02% +0.00% / -0.05% +0.02% +0.18%] index_select spread : Elapsed 0.044 ms (4.429 ms / 100) 4.421 -> 4.430 ( +0.20%) [ +0.11% +0.23% +0.00% / +0.32% +0.20% +0.34%] index_select strided 3 : Elapsed 0.044 ms (4.426 ms / 100) 4.420 -> 4.424 ( +0.09%) [ +0.18% +0.00% +0.14% / +0.09% +0.32% +0.29%] index_select strided 5 : Elapsed 0.044 ms (4.428 ms / 100) 4.425 -> 4.416 ( -0.20%) [ +0.16% +0.00% +0.07% / -0.20% +0.00% +0.11%] index_select strided 7 : Elapsed 0.044 ms (4.432 ms / 100) 4.418 -> 4.423 ( +0.11%) [ +0.11% +0.00% +0.18% / +0.11% +0.32% +0.18%] index_select strided 8 : Elapsed 0.044 ms (4.423 ms / 100) 4.424 -> 4.424 ( +0.00%) [ +0.02% +0.00% +0.09% / +0.00% +0.34% +0.23%] index_select strided 16 : Elapsed 0.044 ms (4.425 ms / 100) 4.423 -> 4.421 ( -0.05%) [ +0.11% +0.00% +0.00% / -0.05% +0.20% +0.02%] index_select random : Elapsed 0.044 ms (4.428 ms / 100) 4.426 -> 4.422 ( -0.09%) [ +0.02% +0.11% +0.00% / -0.02% +0.23% -0.09%] index_select random_sorted : Elapsed 0.044 ms (4.427 ms / 100) B = [40, 16, 5, 4] (stride (16, 1, 640, 3200)) A = [20, 16, 5, 4] (stride (320, 20, 4, 1)) dim = 0 2.373 -> 2.388 ( +0.63%) [ +0.17% +0.13% +0.00% / +0.63% +0.67% +0.88%] index_add_ linear : Elapsed 0.024 ms (2.377 ms / 100) 2.361 -> 2.371 ( +0.42%) [ +0.00% +0.68% +0.04% / +0.42% +0.89% +0.68%] index_copy_ linear : Elapsed 0.024 ms (2.361 ms / 100) 2.368 -> 2.382 ( +0.59%) [ +0.00% +0.42% +0.04% / +0.59% +1.22% +0.97%] index_add_ reverse : Elapsed 0.024 ms (2.368 ms / 100) 2.356 -> 2.367 ( +0.47%) [ +0.00% +0.13% +0.17% / +0.47% +0.98% +1.06%] index_copy_ reverse : Elapsed 0.024 ms (2.356 ms / 100) 2.367 -> 2.381 ( +0.59%) [ +0.08% +0.00% +0.13% / +0.59% +0.97% +1.01%] index_add_ spread : Elapsed 0.024 ms (2.369 ms / 100) 2.358 -> 2.368 ( +0.42%) [ +0.08% +0.04% +0.00% / +0.42% +0.98% +1.10%] index_copy_ spread : Elapsed 0.024 ms (2.360 ms / 100) 2.375 -> 2.389 ( +0.59%) [ +0.00% +0.21% +0.00% / +0.59% +0.63% +0.59%] index_add_ strided 3 : Elapsed 0.024 ms (2.375 ms / 100) 2.364 -> 2.377 ( +0.55%) [ +0.00% +0.17% +0.00% / +0.55% +0.55% +0.72%] index_copy_ strided 3 : Elapsed 0.024 ms (2.364 ms / 100) 2.372 -> 2.391 ( +0.80%) [ +0.25% +0.21% +0.00% / +0.80% +0.80% +0.84%] index_add_ strided 7 : Elapsed 0.024 ms (2.378 ms / 100) 2.361 -> 2.375 ( +0.59%) [ +0.00% +0.17% +0.00% / +0.80% +0.59% +0.72%] index_copy_ strided 7 : Elapsed 0.024 ms (2.361 ms / 100) 2.374 -> 2.386 ( +0.51%) [ +0.25% +0.25% +0.00% / +0.63% +0.51% +0.72%] index_add_ perm : Elapsed 0.024 ms (2.380 ms / 100) 2.366 -> 2.372 ( +0.25%) [ +0.00% +0.08% +0.00% / +0.59% +0.25% +0.34%] index_copy_ perm : Elapsed 0.024 ms (2.366 ms / 100) 2.377 -> 2.383 ( +0.25%) [ +0.21% +0.13% +0.00% / +0.67% +0.25% +0.34%] index_add_ perm_sorted : Elapsed 0.024 ms (2.382 ms / 100) 2.368 -> 2.373 ( +0.21%) [ +0.00% +0.00% +0.04% / +0.55% +0.21% +0.21%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.368 ms / 100) 4.358 -> 4.360 ( +0.05%) [ +0.09% +0.00% +0.16% / +0.16% +0.05% +0.21%] index_select const : Elapsed 0.044 ms (4.362 ms / 100) 4.366 -> 4.371 ( +0.11%) [ +0.00% +0.05% +0.11% / +0.14% +0.11% +0.25%] index_select wrap : Elapsed 0.044 ms (4.366 ms / 100) 4.369 -> 4.364 ( -0.11%) [ +0.00% +0.00% +0.11% / +0.00% +0.09% -0.11%] index_select linear : Elapsed 0.044 ms (4.369 ms / 100) 4.369 -> 4.369 ( +0.00%) [ +0.00% +0.09% +0.11% / +0.00% +0.00% +0.11%] index_select reverse : Elapsed 0.044 ms (4.369 ms / 100) 4.365 -> 4.358 ( -0.16%) [ +0.00% +0.05% +0.05% / +0.02% -0.05% -0.16%] index_select skip64 : Elapsed 0.044 ms (4.365 ms / 100) 4.360 -> 4.363 ( +0.07%) [ +0.02% +0.02% +0.00% / +0.07% +0.14% +0.09%] index_select skip256 : Elapsed 0.044 ms (4.361 ms / 100) 4.369 -> 4.369 ( +0.00%) [ +0.02% +0.05% +0.00% / +0.05% +0.02% +0.00%] index_select spread : Elapsed 0.044 ms (4.370 ms / 100) 4.367 -> 4.370 ( +0.07%) [ +0.16% +0.11% +0.00% / +0.18% +0.07% +0.32%] index_select strided 3 : Elapsed 0.044 ms (4.374 ms / 100) 4.361 -> 4.364 ( +0.07%) [ +0.00% +0.05% +0.11% / +0.07% +0.07% +0.14%] index_select strided 5 : Elapsed 0.044 ms (4.361 ms / 100) 4.366 -> 4.366 ( +0.00%) [ +0.02% +0.07% +0.00% / +0.00% +0.14% +0.16%] index_select strided 7 : Elapsed 0.044 ms (4.367 ms / 100) 4.361 -> 4.360 ( -0.02%) [ +0.00% +0.14% +0.02% / +0.05% -0.02% +0.05%] index_select strided 8 : Elapsed 0.044 ms (4.361 ms / 100) 4.363 -> 4.360 ( -0.07%) [ +0.00% +0.16% +0.11% / +0.07% +0.18% -0.07%] index_select strided 16 : Elapsed 0.044 ms (4.363 ms / 100) 4.367 -> 4.364 ( -0.07%) [ +0.14% +0.14% +0.00% / +0.16% +0.05% -0.07%] index_select random : Elapsed 0.044 ms (4.373 ms / 100) 4.372 -> 4.370 ( -0.05%) [ +0.02% +0.00% +0.05% / +0.05% -0.05% +0.00%] index_select random_sorted : Elapsed 0.044 ms (4.373 ms / 100) out_shape = [20, 40, 5, 4] in_shape = [20, 16, 5, 4] idx_dim = 1 B = [20, 40, 5, 4] (stride (800, 20, 4, 1)) A = [20, 16, 5, 4] (stride (5, 400, 1, 100)) dim = 1 3.665 -> 3.667 ( +0.05%) [ +0.00% +0.00% +0.05% / +0.05% +0.55% +0.41%] index_add_ linear : Elapsed 0.037 ms (3.665 ms / 100) 3.541 -> 3.541 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.54% +0.56%] index_copy_ linear : Elapsed 0.035 ms (3.541 ms / 100) 3.672 -> 3.674 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.54% +0.52%] index_add_ reverse : Elapsed 0.037 ms (3.672 ms / 100) 3.536 -> 3.537 ( +0.03%) [ +0.03% +0.06% +0.00% / +0.03% +0.54% +0.54%] index_copy_ reverse : Elapsed 0.035 ms (3.537 ms / 100) 3.670 -> 3.670 ( +0.00%) [ +0.05% +0.11% +0.00% / +0.00% +0.57% +0.60%] index_add_ spread : Elapsed 0.037 ms (3.672 ms / 100) 3.541 -> 3.543 ( +0.06%) [ +0.00% +0.00% +0.00% / +0.06% +0.51% +0.56%] index_copy_ spread : Elapsed 0.035 ms (3.541 ms / 100) 3.664 -> 3.672 ( +0.22%) [ +0.14% +0.16% +0.00% / +0.22% +0.68% +0.60%] index_add_ strided 3 : Elapsed 0.037 ms (3.669 ms / 100) 3.531 -> 3.537 ( +0.17%) [ +0.14% +0.17% +0.00% / +0.17% +0.71% +0.48%] index_copy_ strided 3 : Elapsed 0.035 ms (3.536 ms / 100) 3.672 -> 3.673 ( +0.03%) [ +0.08% +0.08% +0.00% / +0.03% +0.54% +0.49%] index_add_ strided 7 : Elapsed 0.037 ms (3.675 ms / 100) 3.536 -> 3.538 ( +0.06%) [ +0.08% +0.03% +0.00% / +0.06% +0.57% +0.54%] index_copy_ strided 7 : Elapsed 0.035 ms (3.539 ms / 100) 3.663 -> 3.662 ( -0.03%) [ +0.03% +0.05% +0.00% / -0.03% +0.63% +0.66%] index_add_ perm : Elapsed 0.037 ms (3.664 ms / 100) 3.542 -> 3.544 ( +0.06%) [ +0.00% +0.03% +0.00% / +0.06% +0.54% +0.54%] index_copy_ perm : Elapsed 0.035 ms (3.542 ms / 100) 3.665 -> 3.664 ( -0.03%) [ +0.00% +0.08% +0.05% / -0.03% +0.44% +0.44%] index_add_ perm_sorted : Elapsed 0.037 ms (3.665 ms / 100) 3.543 -> 3.545 ( +0.06%) [ +0.00% +0.03% +0.03% / +0.06% +0.48% +0.37%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.543 ms / 100) 5.460 -> 5.466 ( +0.11%) [ +0.04% +0.00% +0.05% / +0.11% +0.16% +0.37%] index_select const : Elapsed 0.055 ms (5.462 ms / 100) 5.473 -> 5.474 ( +0.02%) [ +0.00% +0.05% +0.00% / +0.02% +0.33% +0.16%] index_select wrap : Elapsed 0.055 ms (5.473 ms / 100) 5.474 -> 5.473 ( -0.02%) [ +0.11% +0.00% +0.16% / -0.02% +0.16% +0.16%] index_select linear : Elapsed 0.055 ms (5.480 ms / 100) 5.473 -> 5.472 ( -0.02%) [ +0.09% +0.00% +0.13% / -0.02% +0.15% +0.15%] index_select reverse : Elapsed 0.055 ms (5.478 ms / 100) 5.468 -> 5.465 ( -0.05%) [ +0.00% +0.05% +0.04% / +0.18% +0.05% -0.05%] index_select skip64 : Elapsed 0.055 ms (5.468 ms / 100) 5.463 -> 5.465 ( +0.04%) [ +0.05% +0.00% +0.15% / +0.15% +0.04% +0.20%] index_select skip256 : Elapsed 0.055 ms (5.466 ms / 100) 5.468 -> 5.472 ( +0.07%) [ +0.11% +0.00% +0.13% / +0.09% +0.16% +0.07%] index_select spread : Elapsed 0.055 ms (5.474 ms / 100) 5.468 -> 5.470 ( +0.04%) [ +0.00% +0.09% +0.40% / +0.04% +0.07% +0.20%] index_select strided 3 : Elapsed 0.055 ms (5.468 ms / 100) 5.465 -> 5.468 ( +0.05%) [ +0.00% +0.16% +0.07% / +0.05% +0.26% +0.09%] index_select strided 5 : Elapsed 0.055 ms (5.465 ms / 100) 5.462 -> 5.473 ( +0.20%) [ +0.09% +0.07% +0.00% / +0.20% +0.20% +0.29%] index_select strided 7 : Elapsed 0.055 ms (5.467 ms / 100) 5.464 -> 5.466 ( +0.04%) [ +0.11% +0.11% +0.00% / +0.05% +0.07% +0.04%] index_select strided 8 : Elapsed 0.055 ms (5.470 ms / 100) 5.468 -> 5.467 ( -0.02%) [ +0.00% +0.09% +0.05% / -0.02% +0.07% +0.05%] index_select random : Elapsed 0.055 ms (5.468 ms / 100) 5.472 -> 5.472 ( +0.00%) [ +0.00% +0.04% +0.05% / +0.02% +0.00% +0.16%] index_select random_sorted : Elapsed 0.055 ms (5.472 ms / 100) B = [20, 40, 5, 4] (stride (800, 4, 160, 1)) A = [20, 16, 5, 4] (stride (1, 400, 20, 100)) dim = 1 4.120 -> 4.120 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.68% +0.73%] index_add_ linear : Elapsed 0.041 ms (4.122 ms / 100) 3.977 -> 3.975 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.68% +0.65%] index_copy_ linear : Elapsed 0.040 ms (3.977 ms / 100) 4.147 -> 4.148 ( +0.02%) [ +0.00% +0.07% +0.05% / +0.02% +0.77% +0.82%] index_add_ reverse : Elapsed 0.041 ms (4.147 ms / 100) 4.006 -> 4.007 ( +0.02%) [ +0.00% +0.10% +0.07% / +0.02% +0.75% +0.77%] index_copy_ reverse : Elapsed 0.040 ms (4.006 ms / 100) 4.129 -> 4.132 ( +0.07%) [ +0.07% +0.07% +0.00% / +0.07% +0.78% +0.78%] index_add_ spread : Elapsed 0.041 ms (4.132 ms / 100) 3.991 -> 3.990 ( -0.03%) [ +0.03% +0.08% +0.00% / -0.03% +0.75% +0.78%] index_copy_ spread : Elapsed 0.040 ms (3.992 ms / 100) 4.148 -> 4.148 ( +0.00%) [ +0.10% +0.00% +0.14% / +0.00% +0.87% +0.94%] index_add_ strided 3 : Elapsed 0.042 ms (4.152 ms / 100) 4.003 -> 4.002 ( -0.02%) [ +0.00% +0.00% +0.05% / -0.02% +0.80% +0.85%] index_copy_ strided 3 : Elapsed 0.040 ms (4.003 ms / 100) 4.147 -> 4.149 ( +0.05%) [ +0.10% +0.12% +0.00% / +0.05% +0.92% +0.77%] index_add_ strided 7 : Elapsed 0.042 ms (4.151 ms / 100) 4.006 -> 4.009 ( +0.07%) [ +0.07% +0.15% +0.00% / +0.07% +0.77% +0.77%] index_copy_ strided 7 : Elapsed 0.040 ms (4.009 ms / 100) 4.119 -> 4.120 ( +0.02%) [ +0.00% +0.02% +0.00% / +0.02% +0.78% +0.75%] index_add_ perm : Elapsed 0.041 ms (4.119 ms / 100) 3.975 -> 3.977 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.05% +0.73% +0.73%] index_copy_ perm : Elapsed 0.040 ms (3.975 ms / 100) 4.152 -> 4.156 ( +0.10%) [ +0.14% +0.02% +0.00% / +0.10% +0.79% +0.77%] index_add_ perm_sorted : Elapsed 0.042 ms (4.158 ms / 100) 4.005 -> 4.008 ( +0.07%) [ +0.10% +0.02% +0.00% / +0.07% +0.80% +0.72%] index_copy_ perm_sorted : Elapsed 0.040 ms (4.009 ms / 100) 5.556 -> 5.553 ( -0.05%) [ +0.04% +0.07% +0.00% / -0.05% -0.02% -0.05%] index_select const : Elapsed 0.056 ms (5.558 ms / 100) 5.564 -> 5.550 ( -0.25%) [ +0.00% +0.02% +0.00% / +0.00% -0.14% -0.25%] index_select wrap : Elapsed 0.056 ms (5.564 ms / 100) 5.563 -> 5.555 ( -0.14%) [ +0.02% +0.00% +0.02% / +0.14% -0.14% -0.04%] index_select linear : Elapsed 0.056 ms (5.564 ms / 100) 5.560 -> 5.564 ( +0.07%) [ +0.00% +0.13% +0.00% / +0.18% +0.20% +0.07%] index_select reverse : Elapsed 0.056 ms (5.560 ms / 100) 5.552 -> 5.550 ( -0.04%) [ +0.02% +0.00% +0.09% / +0.27% -0.04% +0.02%] index_select skip64 : Elapsed 0.056 ms (5.553 ms / 100) 5.549 -> 5.557 ( +0.14%) [ +0.11% +0.00% +0.04% / +0.14% +0.14% +0.27%] index_select skip256 : Elapsed 0.056 ms (5.555 ms / 100) 5.559 -> 5.546 ( -0.23%) [ +0.14% +0.07% +0.00% / +0.07% -0.23% +0.04%] index_select spread : Elapsed 0.056 ms (5.567 ms / 100) 5.560 -> 5.556 ( -0.07%) [ +0.11% +0.04% +0.00% / +0.05% -0.07% -0.04%] index_select strided 3 : Elapsed 0.056 ms (5.566 ms / 100) 5.557 -> 5.557 ( +0.00%) [ +0.04% +0.00% +0.20% / +0.11% +0.04% +0.00%] index_select strided 5 : Elapsed 0.056 ms (5.559 ms / 100) 5.556 -> 5.558 ( +0.04%) [ +0.18% +0.16% +0.00% / +0.04% +0.22% +0.23%] index_select strided 7 : Elapsed 0.056 ms (5.566 ms / 100) 5.557 -> 5.561 ( +0.07%) [ +0.11% +0.00% +0.02% / +0.09% +0.07% +0.27%] index_select strided 8 : Elapsed 0.056 ms (5.563 ms / 100) 5.553 -> 5.560 ( +0.13%) [ +0.11% +0.16% +0.00% / +0.31% +0.13% +0.18%] index_select random : Elapsed 0.056 ms (5.559 ms / 100) 5.560 -> 5.555 ( -0.09%) [ +0.00% +0.00% +0.02% / +0.11% +0.04% -0.09%] index_select random_sorted : Elapsed 0.056 ms (5.560 ms / 100) B = [20, 40, 5, 4] (stride (800, 5, 1, 200)) A = [20, 16, 5, 4] (stride (16, 1, 1280, 320)) dim = 1 4.013 -> 4.023 ( +0.25%) [ +0.02% +0.17% +0.00% / +0.25% +0.65% +0.47%] index_add_ linear : Elapsed 0.040 ms (4.014 ms / 100) 3.849 -> 3.858 ( +0.23%) [ +0.00% +0.16% +0.00% / +0.23% +0.75% +0.65%] index_copy_ linear : Elapsed 0.038 ms (3.849 ms / 100) 4.029 -> 4.029 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.40% +0.60%] index_add_ reverse : Elapsed 0.040 ms (4.029 ms / 100) 3.864 -> 3.864 ( +0.00%) [ +0.00% +0.08% +0.08% / +0.00% +0.44% +0.52%] index_copy_ reverse : Elapsed 0.039 ms (3.864 ms / 100) 4.021 -> 4.020 ( -0.02%) [ +0.00% +0.05% +0.12% / -0.02% +0.42% +0.52%] index_add_ spread : Elapsed 0.040 ms (4.021 ms / 100) 3.866 -> 3.864 ( -0.05%) [ +0.00% +0.08% +0.05% / -0.05% +0.44% +0.62%] index_copy_ spread : Elapsed 0.039 ms (3.866 ms / 100) 4.026 -> 4.029 ( +0.07%) [ +0.00% +0.37% +0.05% / +0.07% +0.47% +0.57%] index_add_ strided 3 : Elapsed 0.040 ms (4.026 ms / 100) 3.860 -> 3.862 ( +0.05%) [ +0.00% +0.36% +0.00% / +0.05% +0.60% +0.57%] index_copy_ strided 3 : Elapsed 0.039 ms (3.860 ms / 100) 4.029 -> 4.025 ( -0.10%) [ +0.00% +0.15% +0.05% / -0.10% +0.60% +0.65%] index_add_ strided 7 : Elapsed 0.040 ms (4.029 ms / 100) 3.867 -> 3.867 ( +0.00%) [ +0.00% +0.10% +0.05% / +0.00% +0.52% +0.67%] index_copy_ strided 7 : Elapsed 0.039 ms (3.867 ms / 100) 4.019 -> 4.020 ( +0.02%) [ +0.00% +0.17% +0.12% / +0.02% +0.45% +0.35%] index_add_ perm : Elapsed 0.040 ms (4.019 ms / 100) 3.852 -> 3.858 ( +0.16%) [ +0.00% +0.21% +0.21% / +0.16% +0.42% +0.42%] index_copy_ perm : Elapsed 0.039 ms (3.852 ms / 100) 4.019 -> 4.014 ( -0.12%) [ +0.22% +0.07% +0.00% / -0.12% +0.45% +0.30%] index_add_ perm_sorted : Elapsed 0.040 ms (4.028 ms / 100) 3.856 -> 3.851 ( -0.13%) [ +0.13% +0.10% +0.00% / -0.13% +0.54% +0.34%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.861 ms / 100) 5.482 -> 5.487 ( +0.09%) [ +0.00% +0.09% +0.11% / +0.16% +0.09% +0.11%] index_select const : Elapsed 0.055 ms (5.482 ms / 100) 5.487 -> 5.496 ( +0.16%) [ +0.04% +0.07% +0.00% / +0.16% +0.20% +0.16%] index_select wrap : Elapsed 0.055 ms (5.489 ms / 100) 5.489 -> 5.493 ( +0.07%) [ +0.05% +0.02% +0.00% / +0.07% +0.16% +0.13%] index_select linear : Elapsed 0.055 ms (5.492 ms / 100) 5.492 -> 5.488 ( -0.07%) [ +0.02% +0.00% +0.00% / -0.07% +0.05% +0.02%] index_select reverse : Elapsed 0.055 ms (5.493 ms / 100) 5.486 -> 5.487 ( +0.02%) [ +0.22% +0.13% +0.00% / +0.18% +0.02% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.498 ms / 100) 5.490 -> 5.485 ( -0.09%) [ +0.00% +0.07% +0.05% / +0.02% -0.09% -0.07%] index_select skip256 : Elapsed 0.055 ms (5.490 ms / 100) 5.488 -> 5.487 ( -0.02%) [ +0.04% +0.04% +0.00% / +0.05% -0.02% +0.02%] index_select spread : Elapsed 0.055 ms (5.490 ms / 100) 5.483 -> 5.482 ( -0.02%) [ +0.00% +0.20% +0.11% / -0.02% +0.15% +0.11%] index_select strided 3 : Elapsed 0.055 ms (5.483 ms / 100) 5.487 -> 5.491 ( +0.07%) [ +0.18% +0.00% +0.02% / +0.13% +0.07% +0.15%] index_select strided 5 : Elapsed 0.055 ms (5.497 ms / 100) 5.486 -> 5.491 ( +0.09%) [ +0.00% +0.09% +0.04% / +0.09% +0.24% +0.09%] index_select strided 7 : Elapsed 0.055 ms (5.486 ms / 100) 5.486 -> 5.483 ( -0.05%) [ +0.05% +0.00% +0.00% / -0.05% +0.16% +0.16%] index_select strided 8 : Elapsed 0.055 ms (5.489 ms / 100) 5.490 -> 5.486 ( -0.07%) [ +0.02% +0.04% +0.00% / -0.07% +0.02% -0.02%] index_select random : Elapsed 0.055 ms (5.491 ms / 100) 5.486 -> 5.484 ( -0.04%) [ +0.16% +0.22% +0.00% / -0.04% +0.09% +0.00%] index_select random_sorted : Elapsed 0.055 ms (5.495 ms / 100) B = [20, 40, 5, 4] (stride (4, 400, 80, 1)) dim = 1 fill_cnt = 16 2.071 -> 2.072 ( +0.05%) [ +0.05% +0.10% +0.00% / +0.05% +0.39% +0.34%] index_fill_ const : Elapsed 0.021 ms (2.072 ms / 100) 2.071 -> 2.073 ( +0.10%) [ +0.05% +0.10% +0.00% / +0.10% +0.39% +0.34%] index_fill_ linear : Elapsed 0.021 ms (2.072 ms / 100) 2.069 -> 2.069 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.48% +0.48%] index_fill_ reverse : Elapsed 0.021 ms (2.070 ms / 100) 2.069 -> 2.069 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.48% +0.53%] index_fill_ skip64 : Elapsed 0.021 ms (2.070 ms / 100) 2.069 -> 2.069 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.48% +0.53%] index_fill_ skip256 : Elapsed 0.021 ms (2.069 ms / 100) 2.070 -> 2.069 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.43% +0.43%] index_fill_ spread : Elapsed 0.021 ms (2.070 ms / 100) 2.071 -> 2.071 ( +0.00%) [ +0.05% +0.00% +0.00% / +0.00% +0.77% +0.24%] index_fill_ strided 3 : Elapsed 0.021 ms (2.072 ms / 100) 2.070 -> 2.073 ( +0.14%) [ +0.05% +0.10% +0.00% / +0.14% +0.39% +0.29%] index_fill_ strided 5 : Elapsed 0.021 ms (2.071 ms / 100) 2.070 -> 2.071 ( +0.05%) [ +0.05% +0.05% +0.00% / +0.05% +0.43% +0.43%] index_fill_ strided 7 : Elapsed 0.021 ms (2.071 ms / 100) 2.070 -> 2.070 ( +0.00%) [ +0.10% +0.00% +0.00% / +0.00% +0.43% +0.39%] index_fill_ strided 8 : Elapsed 0.021 ms (2.072 ms / 100) 2.073 -> 2.072 ( -0.05%) [ +0.14% +0.00% +0.05% / -0.05% +0.29% +0.14%] index_fill_ strided 16 : Elapsed 0.021 ms (2.076 ms / 100) 2.071 -> 2.073 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.29% +0.24%] index_fill_ random : Elapsed 0.021 ms (2.073 ms / 100) 2.074 -> 2.073 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.05% +0.10%] index_fill_ random_sorted : Elapsed 0.021 ms (2.074 ms / 100) 2.074 -> 2.074 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.05% +0.05%] index_fill_ perm : Elapsed 0.021 ms (2.074 ms / 100) 2.070 -> 2.071 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.39% +0.39%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.070 ms / 100) B = [20, 40, 5, 4] (stride (4, 400, 80, 1)) A = [20, 16, 5, 4] (stride (5, 400, 1, 100)) dim = 1 3.961 -> 3.968 ( +0.18%) [ +0.00% +0.13% +0.05% / +0.18% +0.76% +0.76%] index_add_ linear : Elapsed 0.040 ms (3.961 ms / 100) 3.836 -> 3.837 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.73% +0.65%] index_copy_ linear : Elapsed 0.038 ms (3.836 ms / 100) 3.968 -> 3.971 ( +0.08%) [ +0.05% +0.08% +0.00% / +0.08% +0.78% +0.83%] index_add_ reverse : Elapsed 0.040 ms (3.970 ms / 100) 3.850 -> 3.850 ( +0.00%) [ +0.03% +0.03% +0.00% / +0.00% +0.83% +0.88%] index_copy_ reverse : Elapsed 0.039 ms (3.851 ms / 100) 3.972 -> 3.973 ( +0.03%) [ +0.15% +0.08% +0.00% / +0.03% +0.98% +0.81%] index_add_ spread : Elapsed 0.040 ms (3.978 ms / 100) 3.845 -> 3.853 ( +0.21%) [ +0.18% +0.05% +0.00% / +0.21% +0.96% +0.86%] index_copy_ spread : Elapsed 0.039 ms (3.852 ms / 100) 3.973 -> 3.970 ( -0.08%) [ +0.05% +0.00% +0.00% / -0.08% +0.65% +0.76%] index_add_ strided 3 : Elapsed 0.040 ms (3.975 ms / 100) 3.841 -> 3.839 ( -0.05%) [ +0.10% +0.10% +0.00% / -0.05% +0.76% +0.78%] index_copy_ strided 3 : Elapsed 0.038 ms (3.845 ms / 100) 3.970 -> 3.972 ( +0.05%) [ +0.00% +0.03% +0.00% / +0.05% +0.73% +0.76%] index_add_ strided 7 : Elapsed 0.040 ms (3.970 ms / 100) 3.852 -> 3.851 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.75% +0.78%] index_copy_ strided 7 : Elapsed 0.039 ms (3.852 ms / 100) 3.962 -> 3.963 ( +0.03%) [ +0.08% +0.00% +0.05% / +0.03% +0.81% +0.88%] index_add_ perm : Elapsed 0.040 ms (3.965 ms / 100) 3.836 -> 3.834 ( -0.05%) [ +0.00% +0.00% +0.03% / -0.05% +0.76% +0.83%] index_copy_ perm : Elapsed 0.038 ms (3.836 ms / 100) 3.971 -> 3.971 ( +0.00%) [ +0.08% +0.05% +0.00% / +0.00% +0.81% +0.71%] index_add_ perm_sorted : Elapsed 0.040 ms (3.974 ms / 100) 3.842 -> 3.841 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.78% +0.68%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.843 ms / 100) 5.554 -> 5.552 ( -0.04%) [ +0.00% +0.16% +0.09% / +0.07% +0.04% -0.04%] index_select const : Elapsed 0.056 ms (5.554 ms / 100) 5.566 -> 5.562 ( -0.07%) [ +0.04% +0.04% +0.00% / +0.13% -0.02% -0.07%] index_select wrap : Elapsed 0.056 ms (5.568 ms / 100) 5.561 -> 5.561 ( +0.00%) [ +0.14% +0.16% +0.00% / +0.02% +0.00% +0.22%] index_select linear : Elapsed 0.056 ms (5.569 ms / 100) 5.571 -> 5.563 ( -0.14%) [ +0.00% +0.05% +0.00% / -0.07% -0.14% +0.00%] index_select reverse : Elapsed 0.056 ms (5.571 ms / 100) 5.549 -> 5.553 ( +0.07%) [ +0.14% +0.09% +0.00% / +0.14% +0.07% +0.54%] index_select skip64 : Elapsed 0.056 ms (5.557 ms / 100) 5.550 -> 5.557 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.25% +0.29%] index_select skip256 : Elapsed 0.056 ms (5.557 ms / 100) 5.564 -> 5.561 ( -0.05%) [ +0.16% +0.04% +0.00% / +0.09% -0.05% +0.09%] index_select spread : Elapsed 0.056 ms (5.573 ms / 100) 5.563 -> 5.563 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.02% +0.00% +0.16%] index_select strided 3 : Elapsed 0.056 ms (5.565 ms / 100) 5.564 -> 5.564 ( +0.00%) [ +0.11% +0.20% +0.00% / +0.00% +0.00% +0.07%] index_select strided 5 : Elapsed 0.056 ms (5.570 ms / 100) 5.567 -> 5.565 ( -0.04%) [ +0.11% +0.04% +0.00% / +0.02% +0.09% -0.04%] index_select strided 7 : Elapsed 0.056 ms (5.573 ms / 100) 5.553 -> 5.552 ( -0.02%) [ +0.14% +0.04% +0.00% / -0.02% +0.22% +0.14%] index_select strided 8 : Elapsed 0.056 ms (5.561 ms / 100) 5.565 -> 5.565 ( +0.00%) [ +0.04% +0.09% +0.00% / +0.00% +0.02% +0.04%] index_select random : Elapsed 0.056 ms (5.567 ms / 100) 5.565 -> 5.561 ( -0.07%) [ +0.13% +0.07% +0.00% / -0.07% +0.02% -0.05%] index_select random_sorted : Elapsed 0.056 ms (5.572 ms / 100) B = [20, 40, 5, 4] (stride (1, 400, 80, 20)) A = [20, 16, 5, 4] (stride (1, 80, 1280, 20)) dim = 1 3.839 -> 3.839 ( +0.00%) [ +0.00% +0.08% +0.03% / +0.00% +0.57% +0.55%] index_add_ linear : Elapsed 0.038 ms (3.839 ms / 100) 3.694 -> 3.694 ( +0.00%) [ +0.00% +0.14% +0.14% / +0.00% +0.54% +0.60%] index_copy_ linear : Elapsed 0.037 ms (3.694 ms / 100) 3.866 -> 3.867 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.65% +0.67%] index_add_ reverse : Elapsed 0.039 ms (3.868 ms / 100) 3.720 -> 3.719 ( -0.03%) [ +0.08% +0.00% +0.00% / -0.03% +0.62% +0.65%] index_copy_ reverse : Elapsed 0.037 ms (3.723 ms / 100) 3.846 -> 3.847 ( +0.03%) [ +0.05% +0.00% +0.00% / +0.03% +0.55% +0.47%] index_add_ spread : Elapsed 0.038 ms (3.848 ms / 100) 3.703 -> 3.702 ( -0.03%) [ +0.08% +0.03% +0.00% / -0.03% +0.57% +0.49%] index_copy_ spread : Elapsed 0.037 ms (3.706 ms / 100) 3.859 -> 3.860 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.52% +0.52%] index_add_ strided 3 : Elapsed 0.039 ms (3.860 ms / 100) 3.710 -> 3.711 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.49% +0.46%] index_copy_ strided 3 : Elapsed 0.037 ms (3.711 ms / 100) 3.871 -> 3.872 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.49% +0.46%] index_add_ strided 7 : Elapsed 0.039 ms (3.873 ms / 100) 3.725 -> 3.727 ( +0.05%) [ +0.05% +0.00% +0.05% / +0.05% +0.38% +0.40%] index_copy_ strided 7 : Elapsed 0.037 ms (3.727 ms / 100) 3.839 -> 3.841 ( +0.05%) [ +0.05% +0.08% +0.00% / +0.05% +0.57% +0.52%] index_add_ perm : Elapsed 0.038 ms (3.841 ms / 100) 3.693 -> 3.693 ( +0.00%) [ +0.00% +0.14% +0.16% / +0.00% +0.65% +0.62%] index_copy_ perm : Elapsed 0.037 ms (3.693 ms / 100) 3.841 -> 3.843 ( +0.05%) [ +0.03% +0.05% +0.00% / +0.05% +0.42% +0.49%] index_add_ perm_sorted : Elapsed 0.038 ms (3.842 ms / 100) 3.695 -> 3.696 ( +0.03%) [ +0.00% +0.14% +0.11% / +0.03% +0.43% +0.46%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.695 ms / 100) 5.472 -> 5.466 ( -0.11%) [ +0.00% +0.09% +0.02% / -0.11% +0.09% +0.11%] index_select const : Elapsed 0.055 ms (5.472 ms / 100) 5.469 -> 5.467 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.31% +0.24%] index_select wrap : Elapsed 0.055 ms (5.471 ms / 100) 5.474 -> 5.466 ( -0.15%) [ +0.00% +0.04% +0.11% / -0.15% +0.16% +0.15%] index_select linear : Elapsed 0.055 ms (5.474 ms / 100) 5.473 -> 5.474 ( +0.02%) [ +0.07% +0.00% +0.04% / +0.02% +0.15% +0.02%] index_select reverse : Elapsed 0.055 ms (5.477 ms / 100) 5.470 -> 5.474 ( +0.07%) [ +0.15% +0.22% +0.00% / +0.13% +0.07% +0.15%] index_select skip64 : Elapsed 0.055 ms (5.478 ms / 100) 5.474 -> 5.471 ( -0.05%) [ +0.13% +0.00% +0.13% / -0.05% +0.02% -0.02%] index_select skip256 : Elapsed 0.055 ms (5.481 ms / 100) 5.475 -> 5.478 ( +0.05%) [ +0.11% +0.00% +0.04% / +0.05% +0.09% +0.11%] index_select spread : Elapsed 0.055 ms (5.481 ms / 100) 5.473 -> 5.468 ( -0.09%) [ +0.00% +0.07% +0.00% / -0.09% +0.15% +0.15%] index_select strided 3 : Elapsed 0.055 ms (5.473 ms / 100) 5.470 -> 5.471 ( +0.02%) [ +0.04% +0.16% +0.00% / +0.02% +0.18% +0.20%] index_select strided 5 : Elapsed 0.055 ms (5.472 ms / 100) 5.476 -> 5.469 ( -0.13%) [ +0.00% +0.04% +0.00% / -0.13% +0.16% -0.02%] index_select strided 7 : Elapsed 0.055 ms (5.476 ms / 100) 5.467 -> 5.473 ( +0.11%) [ +0.05% +0.18% +0.00% / +0.11% +0.27% +0.16%] index_select strided 8 : Elapsed 0.055 ms (5.470 ms / 100) 5.480 -> 5.476 ( -0.07%) [ +0.11% +0.02% +0.00% / +0.02% -0.07% -0.07%] index_select random : Elapsed 0.055 ms (5.486 ms / 100) 5.473 -> 5.464 ( -0.16%) [ +0.09% +0.00% +0.00% / -0.16% +0.13% +0.04%] index_select random_sorted : Elapsed 0.055 ms (5.478 ms / 100) B = [20, 40, 5, 4] (stride (200, 5, 1, 4000)) A = [20, 16, 5, 4] (stride (4, 80, 1280, 1)) dim = 1 4.019 -> 4.019 ( +0.00%) [ +0.00% +0.17% +0.00% / +0.00% +0.85% +0.72%] index_add_ linear : Elapsed 0.040 ms (4.019 ms / 100) 3.880 -> 3.884 ( +0.10%) [ +0.00% +0.28% +0.08% / +0.10% +0.98% +0.88%] index_copy_ linear : Elapsed 0.039 ms (3.880 ms / 100) 4.001 -> 4.012 ( +0.27%) [ +0.15% +0.30% +0.00% / +0.27% +0.90% +0.85%] index_add_ reverse : Elapsed 0.040 ms (4.007 ms / 100) 3.871 -> 3.880 ( +0.23%) [ +0.21% +0.21% +0.00% / +0.23% +1.34% +0.80%] index_copy_ reverse : Elapsed 0.039 ms (3.879 ms / 100) 4.000 -> 4.000 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.00% +0.77% +0.75%] index_add_ spread : Elapsed 0.040 ms (4.003 ms / 100) 3.878 -> 3.883 ( +0.13%) [ +0.00% +0.10% +0.05% / +0.13% +1.24% +0.98%] index_copy_ spread : Elapsed 0.039 ms (3.878 ms / 100) 4.003 -> 4.000 ( -0.07%) [ +0.00% +0.00% +0.00% / -0.07% +0.77% +0.75%] index_add_ strided 3 : Elapsed 0.040 ms (4.003 ms / 100) 3.866 -> 3.864 ( -0.05%) [ +0.00% +0.00% +0.00% / -0.05% +0.85% +0.80%] index_copy_ strided 3 : Elapsed 0.039 ms (3.866 ms / 100) 4.001 -> 4.004 ( +0.07%) [ +0.00% +0.25% +0.15% / +0.07% +0.95% +0.97%] index_add_ strided 7 : Elapsed 0.040 ms (4.001 ms / 100) 3.871 -> 3.880 ( +0.23%) [ +0.00% +0.28% +0.23% / +0.23% +1.27% +1.34%] index_copy_ strided 7 : Elapsed 0.039 ms (3.871 ms / 100) 4.019 -> 4.027 ( +0.20%) [ +0.10% +0.25% +0.00% / +0.20% +0.77% +0.80%] index_add_ perm : Elapsed 0.040 ms (4.023 ms / 100) 3.881 -> 3.891 ( +0.26%) [ +0.21% +0.31% +0.00% / +0.26% +0.95% +0.98%] index_copy_ perm : Elapsed 0.039 ms (3.889 ms / 100) 3.999 -> 3.999 ( +0.00%) [ +0.00% +0.15% +0.03% / +0.00% +0.73% +0.73%] index_add_ perm_sorted : Elapsed 0.040 ms (3.999 ms / 100) 3.864 -> 3.863 ( -0.03%) [ +0.00% +0.13% +0.00% / -0.03% +0.80% +0.83%] index_copy_ perm_sorted : Elapsed 0.039 ms (3.864 ms / 100) 5.553 -> 5.559 ( +0.11%) [ +0.14% +0.14% +0.00% / +0.11% +0.31% +0.18%] index_select const : Elapsed 0.056 ms (5.561 ms / 100) 5.580 -> 5.567 ( -0.23%) [ +0.14% +0.11% +0.00% / -0.02% -0.23% -0.20%] index_select wrap : Elapsed 0.056 ms (5.588 ms / 100) 5.572 -> 5.572 ( +0.00%) [ +0.00% +0.11% +0.02% / +0.00% +0.11% +0.18%] index_select linear : Elapsed 0.056 ms (5.572 ms / 100) 5.577 -> 5.569 ( -0.14%) [ +0.09% +0.14% +0.00% / +0.02% -0.04% -0.14%] index_select reverse : Elapsed 0.056 ms (5.582 ms / 100) 5.554 -> 5.558 ( +0.07%) [ +0.00% +0.05% +0.04% / +0.07% +0.14% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.554 ms / 100) 5.556 -> 5.556 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.11% +0.20%] index_select skip256 : Elapsed 0.056 ms (5.558 ms / 100) 5.574 -> 5.564 ( -0.18%) [ +0.02% +0.00% +0.00% / +0.00% -0.18% -0.14%] index_select spread : Elapsed 0.056 ms (5.575 ms / 100) 5.574 -> 5.570 ( -0.07%) [ +0.05% +0.05% +0.00% / -0.07% +0.09% -0.02%] index_select strided 3 : Elapsed 0.056 ms (5.577 ms / 100) 5.571 -> 5.563 ( -0.14%) [ +0.11% +0.13% +0.00% / +0.22% -0.02% -0.14%] index_select strided 5 : Elapsed 0.056 ms (5.577 ms / 100) 5.567 -> 5.578 ( +0.20%) [ +0.00% +0.13% +0.02% / +0.23% +0.25% +0.20%] index_select strided 7 : Elapsed 0.056 ms (5.567 ms / 100) 5.559 -> 5.556 ( -0.05%) [ +0.00% +0.09% +0.05% / -0.05% +0.14% +0.23%] index_select strided 8 : Elapsed 0.056 ms (5.559 ms / 100) 5.570 -> 5.572 ( +0.04%) [ +0.05% +0.00% +0.13% / +0.14% +0.22% +0.04%] index_select random : Elapsed 0.056 ms (5.573 ms / 100) 5.571 -> 5.574 ( +0.05%) [ +0.04% +0.23% +0.00% / +0.09% +0.05% +0.07%] index_select random_sorted : Elapsed 0.056 ms (5.573 ms / 100) B = [20, 40, 5, 4] (stride (200, 1, 40, 4000)) A = [20, 16, 5, 4] (stride (20, 400, 1, 5)) dim = 1 3.935 -> 3.935 ( +0.00%) [ +0.05% +0.05% +0.00% / +0.00% +0.71% +0.79%] index_add_ linear : Elapsed 0.039 ms (3.937 ms / 100) 3.821 -> 3.818 ( -0.08%) [ +0.03% +0.00% +0.00% / -0.08% +0.55% +0.47%] index_copy_ linear : Elapsed 0.038 ms (3.822 ms / 100) 3.937 -> 3.941 ( +0.10%) [ +0.10% +0.03% +0.00% / +0.10% +0.71% +0.74%] index_add_ reverse : Elapsed 0.039 ms (3.941 ms / 100) 3.812 -> 3.820 ( +0.21%) [ +0.10% +0.00% +0.00% / +0.21% +0.71% +0.66%] index_copy_ reverse : Elapsed 0.038 ms (3.816 ms / 100) 3.935 -> 3.936 ( +0.03%) [ +0.03% +0.03% +0.00% / +0.03% +0.61% +0.56%] index_add_ spread : Elapsed 0.039 ms (3.936 ms / 100) 3.815 -> 3.814 ( -0.03%) [ +0.00% +0.03% +0.03% / -0.03% +0.50% +0.55%] index_copy_ spread : Elapsed 0.038 ms (3.815 ms / 100) 3.935 -> 3.935 ( +0.00%) [ +0.03% +0.10% +0.00% / +0.00% +0.56% +0.51%] index_add_ strided 3 : Elapsed 0.039 ms (3.936 ms / 100) 3.810 -> 3.810 ( +0.00%) [ +0.05% +0.03% +0.00% / +0.00% +0.50% +0.50%] index_copy_ strided 3 : Elapsed 0.038 ms (3.812 ms / 100) 3.941 -> 3.940 ( -0.03%) [ +0.08% +0.00% +0.05% / -0.03% +0.58% +0.53%] index_add_ strided 7 : Elapsed 0.039 ms (3.944 ms / 100) 3.815 -> 3.811 ( -0.10%) [ +0.10% +0.00% +0.05% / -0.10% +0.63% +0.58%] index_copy_ strided 7 : Elapsed 0.038 ms (3.819 ms / 100) 3.939 -> 3.938 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.46% +0.43%] index_add_ perm : Elapsed 0.039 ms (3.939 ms / 100) 3.821 -> 3.824 ( +0.08%) [ +0.00% +0.05% +0.08% / +0.08% +0.47% +0.42%] index_copy_ perm : Elapsed 0.038 ms (3.821 ms / 100) 3.939 -> 3.940 ( +0.03%) [ +0.05% +0.05% +0.00% / +0.03% +0.46% +0.46%] index_add_ perm_sorted : Elapsed 0.039 ms (3.941 ms / 100) 3.822 -> 3.824 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.44% +0.34%] index_copy_ perm_sorted : Elapsed 0.038 ms (3.822 ms / 100) 5.560 -> 5.566 ( +0.11%) [ +0.00% +0.04% +0.07% / +0.13% +0.11% +0.27%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.559 -> 5.563 ( +0.07%) [ +0.00% +0.22% +0.14% / +0.07% +0.27% +0.27%] index_select wrap : Elapsed 0.056 ms (5.559 ms / 100) 5.564 -> 5.565 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.18% +0.20%] index_select linear : Elapsed 0.056 ms (5.567 ms / 100) 5.567 -> 5.573 ( +0.11%) [ +0.04% +0.00% +0.09% / +0.20% +0.11% +0.14%] index_select reverse : Elapsed 0.056 ms (5.569 ms / 100) 5.565 -> 5.556 ( -0.16%) [ +0.04% +0.00% +0.04% / +0.07% -0.16% -0.13%] index_select skip64 : Elapsed 0.056 ms (5.567 ms / 100) 5.562 -> 5.558 ( -0.07%) [ +0.00% +0.18% +0.09% / +0.05% -0.07% +0.04%] index_select skip256 : Elapsed 0.056 ms (5.562 ms / 100) 5.561 -> 5.564 ( +0.05%) [ +0.23% +0.00% +0.04% / +0.16% +0.05% +0.18%] index_select spread : Elapsed 0.056 ms (5.574 ms / 100) 5.563 -> 5.557 ( -0.11%) [ +0.16% +0.00% +0.18% / -0.11% +0.11% +0.14%] index_select strided 3 : Elapsed 0.056 ms (5.572 ms / 100) 5.565 -> 5.567 ( +0.04%) [ +0.00% +0.00% +0.05% / +0.04% +0.13% +0.05%] index_select strided 5 : Elapsed 0.056 ms (5.565 ms / 100) 5.560 -> 5.569 ( +0.16%) [ +0.07% +0.00% +0.07% / +0.16% +0.22% +0.16%] index_select strided 7 : Elapsed 0.056 ms (5.564 ms / 100) 5.562 -> 5.558 ( -0.07%) [ +0.02% +0.05% +0.00% / -0.07% +0.31% +0.13%] index_select strided 8 : Elapsed 0.056 ms (5.563 ms / 100) 5.566 -> 5.563 ( -0.05%) [ +0.05% +0.05% +0.00% / +0.09% -0.05% -0.04%] index_select random : Elapsed 0.056 ms (5.569 ms / 100) 5.563 -> 5.565 ( +0.04%) [ +0.00% +0.02% +0.09% / +0.13% +0.04% +0.11%] index_select random_sorted : Elapsed 0.056 ms (5.563 ms / 100) B = [20, 40, 5, 4] (stride (5, 100, 1, 4000)) A = [20, 16, 5, 4] (stride (320, 1, 16, 80)) dim = 1 4.271 -> 4.276 ( +0.12%) [ +0.07% +0.09% +0.00% / +0.12% +0.75% +0.98%] index_add_ linear : Elapsed 0.043 ms (4.274 ms / 100) 4.119 -> 4.123 ( +0.10%) [ +0.15% +0.07% +0.00% / +0.10% +0.68% +0.83%] index_copy_ linear : Elapsed 0.041 ms (4.125 ms / 100) 4.270 -> 4.276 ( +0.14%) [ +0.16% +0.14% +0.00% / +0.14% +0.80% +0.77%] index_add_ reverse : Elapsed 0.043 ms (4.277 ms / 100) 4.121 -> 4.123 ( +0.05%) [ +0.00% +0.05% +0.00% / +0.05% +0.68% +0.73%] index_copy_ reverse : Elapsed 0.041 ms (4.121 ms / 100) 4.261 -> 4.273 ( +0.28%) [ +0.00% +0.45% +0.40% / +0.28% +1.20% +1.20%] index_add_ spread : Elapsed 0.043 ms (4.261 ms / 100) 4.113 -> 4.122 ( +0.22%) [ +0.00% +0.34% +0.32% / +0.22% +1.09% +1.05%] index_copy_ spread : Elapsed 0.041 ms (4.113 ms / 100) 4.266 -> 4.268 ( +0.05%) [ +0.21% +0.00% +0.07% / +0.05% +0.82% +0.75%] index_add_ strided 3 : Elapsed 0.043 ms (4.275 ms / 100) 4.112 -> 4.117 ( +0.12%) [ +0.15% +0.00% +0.17% / +0.12% +0.75% +0.80%] index_copy_ strided 3 : Elapsed 0.041 ms (4.118 ms / 100) 4.265 -> 4.274 ( +0.21%) [ +0.26% +0.00% +0.23% / +0.21% +1.13% +0.91%] index_add_ strided 7 : Elapsed 0.043 ms (4.276 ms / 100) 4.116 -> 4.122 ( +0.15%) [ +0.12% +0.00% +0.15% / +0.15% +0.95% +0.87%] index_copy_ strided 7 : Elapsed 0.041 ms (4.121 ms / 100) 4.271 -> 4.274 ( +0.07%) [ +0.00% +0.12% +0.12% / +0.07% +0.91% +0.96%] index_add_ perm : Elapsed 0.043 ms (4.271 ms / 100) 4.119 -> 4.123 ( +0.10%) [ +0.02% +0.12% +0.00% / +0.10% +0.70% +0.78%] index_copy_ perm : Elapsed 0.041 ms (4.120 ms / 100) 4.265 -> 4.275 ( +0.23%) [ +0.02% +0.12% +0.00% / +0.23% +0.61% +1.01%] index_add_ perm_sorted : Elapsed 0.043 ms (4.266 ms / 100) 4.115 -> 4.118 ( +0.07%) [ +0.07% +0.05% +0.00% / +0.07% +0.68% +0.80%] index_copy_ perm_sorted : Elapsed 0.041 ms (4.118 ms / 100) 5.560 -> 5.556 ( -0.07%) [ +0.00% +0.05% +0.07% / -0.07% +0.05% +0.00%] index_select const : Elapsed 0.056 ms (5.560 ms / 100) 5.559 -> 5.558 ( -0.02%) [ +0.07% +0.09% +0.00% / +0.07% -0.02% +0.18%] index_select wrap : Elapsed 0.056 ms (5.563 ms / 100) 5.563 -> 5.557 ( -0.11%) [ +0.00% +0.00% +0.00% / -0.04% -0.11% +0.00%] index_select linear : Elapsed 0.056 ms (5.563 ms / 100) 5.560 -> 5.563 ( +0.05%) [ +0.11% +0.05% +0.00% / +0.07% +0.05% +0.07%] index_select reverse : Elapsed 0.056 ms (5.566 ms / 100) 5.559 -> 5.561 ( +0.04%) [ +0.05% +0.02% +0.00% / +0.07% +0.07% +0.04%] index_select skip64 : Elapsed 0.056 ms (5.562 ms / 100) 5.551 -> 5.558 ( +0.13%) [ +0.14% +0.04% +0.00% / +0.18% +0.13% +0.13%] index_select skip256 : Elapsed 0.056 ms (5.559 ms / 100) 5.562 -> 5.558 ( -0.07%) [ +0.11% +0.00% +0.11% / -0.07% +0.00% +0.04%] index_select spread : Elapsed 0.056 ms (5.568 ms / 100) 5.560 -> 5.561 ( +0.02%) [ +0.04% +0.00% +0.25% / +0.16% +0.02% +0.05%] index_select strided 3 : Elapsed 0.056 ms (5.562 ms / 100) 5.563 -> 5.554 ( -0.16%) [ +0.14% +0.09% +0.00% / +0.04% -0.16% -0.07%] index_select strided 5 : Elapsed 0.056 ms (5.571 ms / 100) 5.561 -> 5.564 ( +0.05%) [ +0.27% +0.16% +0.00% / +0.05% +0.16% +0.14%] index_select strided 7 : Elapsed 0.056 ms (5.576 ms / 100) 5.565 -> 5.566 ( +0.02%) [ +0.07% +0.00% +0.00% / +0.07% +0.02% +0.07%] index_select strided 8 : Elapsed 0.056 ms (5.569 ms / 100) 5.566 -> 5.560 ( -0.11%) [ +0.02% +0.00% +0.07% / -0.11% -0.11% -0.02%] index_select random : Elapsed 0.056 ms (5.567 ms / 100) 5.557 -> 5.566 ( +0.16%) [ +0.00% +0.09% +0.31% / +0.16% +0.18% +0.20%] index_select random_sorted : Elapsed 0.056 ms (5.557 ms / 100) B = [20, 40, 5, 4] (stride (1, 100, 20, 4000)) A = [20, 16, 5, 4] (stride (320, 1, 64, 16)) dim = 1 3.886 -> 3.886 ( +0.00%) [ +0.05% +0.08% +0.00% / +0.00% +0.77% +0.90%] index_add_ linear : Elapsed 0.039 ms (3.888 ms / 100) 3.742 -> 3.744 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.53% +0.56%] index_copy_ linear : Elapsed 0.037 ms (3.742 ms / 100) 3.905 -> 3.918 ( +0.33%) [ +0.33% +0.00% +0.33% / +0.33% +0.38% +0.38%] index_add_ reverse : Elapsed 0.039 ms (3.918 ms / 100) 3.763 -> 3.763 ( +0.00%) [ +0.00% +0.11% +0.03% / +0.00% +0.50% +0.50%] index_copy_ reverse : Elapsed 0.038 ms (3.763 ms / 100) 3.886 -> 3.887 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.03% +0.67% +0.87%] index_add_ spread : Elapsed 0.039 ms (3.886 ms / 100) 3.756 -> 3.757 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.45% +0.48%] index_copy_ spread : Elapsed 0.038 ms (3.756 ms / 100) 3.909 -> 3.908 ( -0.03%) [ +0.33% +0.00% +0.33% / -0.03% +0.36% +0.31%] index_add_ strided 3 : Elapsed 0.039 ms (3.922 ms / 100) 3.761 -> 3.764 ( +0.08%) [ +0.08% +0.11% +0.00% / +0.08% +0.27% +0.24%] index_copy_ strided 3 : Elapsed 0.038 ms (3.764 ms / 100) 3.917 -> 3.918 ( +0.03%) [ +0.03% +0.15% +0.00% / +0.03% +0.20% +0.23%] index_add_ strided 7 : Elapsed 0.039 ms (3.918 ms / 100) 3.767 -> 3.767 ( +0.00%) [ +0.00% +0.05% +0.00% / +0.00% +0.53% +0.56%] index_copy_ strided 7 : Elapsed 0.038 ms (3.767 ms / 100) 3.891 -> 3.891 ( +0.00%) [ +0.00% +0.10% +0.00% / +0.00% +0.51% +0.64%] index_add_ perm : Elapsed 0.039 ms (3.891 ms / 100) 3.749 -> 3.748 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.56% +0.48%] index_copy_ perm : Elapsed 0.037 ms (3.749 ms / 100) 3.892 -> 3.900 ( +0.21%) [ +0.05% +0.08% +0.00% / +0.21% +0.72% +0.64%] index_add_ perm_sorted : Elapsed 0.039 ms (3.894 ms / 100) 3.749 -> 3.748 ( -0.03%) [ +0.00% +0.00% +0.00% / -0.03% +0.51% +0.48%] index_copy_ perm_sorted : Elapsed 0.037 ms (3.749 ms / 100) 5.482 -> 5.485 ( +0.05%) [ +0.05% +0.15% +0.00% / +0.05% +0.18% +0.20%] index_select const : Elapsed 0.055 ms (5.485 ms / 100) 5.484 -> 5.488 ( +0.07%) [ +0.22% +0.04% +0.00% / +0.07% +0.20% +0.18%] index_select wrap : Elapsed 0.055 ms (5.496 ms / 100) 5.482 -> 5.491 ( +0.16%) [ +0.11% +0.20% +0.00% / +0.16% +0.24% +0.20%] index_select linear : Elapsed 0.055 ms (5.488 ms / 100) 5.485 -> 5.487 ( +0.04%) [ +0.11% +0.00% +0.00% / +0.16% +0.11% +0.04%] index_select reverse : Elapsed 0.055 ms (5.491 ms / 100) 5.492 -> 5.486 ( -0.11%) [ +0.09% +0.04% +0.00% / +0.05% -0.07% -0.11%] index_select skip64 : Elapsed 0.055 ms (5.497 ms / 100) 5.489 -> 5.486 ( -0.05%) [ +0.07% +0.00% +0.09% / -0.04% -0.05% +0.04%] index_select skip256 : Elapsed 0.055 ms (5.493 ms / 100) 5.490 -> 5.484 ( -0.11%) [ +0.05% +0.00% +0.13% / -0.11% -0.07% -0.05%] index_select spread : Elapsed 0.055 ms (5.493 ms / 100) 5.491 -> 5.486 ( -0.09%) [ +0.05% +0.00% +0.04% / -0.09% -0.09% +0.11%] index_select strided 3 : Elapsed 0.055 ms (5.494 ms / 100) 5.491 -> 5.481 ( -0.18%) [ +0.00% +0.02% +0.07% / -0.18% -0.02% -0.13%] index_select strided 5 : Elapsed 0.055 ms (5.491 ms / 100) 5.489 -> 5.492 ( +0.05%) [ +0.07% +0.00% +0.07% / +0.13% +0.15% +0.05%] index_select strided 7 : Elapsed 0.055 ms (5.493 ms / 100) 5.483 -> 5.491 ( +0.15%) [ +0.11% +0.11% +0.00% / +0.24% +0.15% +0.31%] index_select strided 8 : Elapsed 0.055 ms (5.489 ms / 100) 5.487 -> 5.490 ( +0.05%) [ +0.05% +0.00% +0.07% / +0.07% +0.15% +0.05%] index_select random : Elapsed 0.055 ms (5.490 ms / 100) 5.487 -> 5.487 ( +0.00%) [ +0.11% +0.00% +0.05% / +0.00% +0.07% +0.13%] index_select random_sorted : Elapsed 0.055 ms (5.493 ms / 100) out_shape = [20, 16, 40, 4] in_shape = [20, 16, 5, 4] idx_dim = 2 B = [20, 16, 40, 4] (stride (1, 3200, 20, 800)) A = [20, 16, 5, 4] (stride (16, 1, 320, 1600)) dim = 2 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.07% / +0.00% +0.49% +0.42%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.07% +0.00% +0.43% / +0.07% +0.29% +0.22%] index_copy_ linear : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.07% +0.00% +0.00% / -0.07% +0.56% +0.42%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.382 ( +0.22%) [ +0.00% +0.15% +0.51% / +0.22% +0.36% +0.36%] index_copy_ reverse : Elapsed 0.014 ms (1.379 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.07% +0.70% / +0.00% +0.49% +0.42%] index_add_ spread : Elapsed 0.014 ms (1.422 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.00% +0.00% +0.72% / +0.14% +0.22% +0.22%] index_copy_ spread : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.56% / +0.00% +0.56% +0.42%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.07% +0.00% +0.36% / +0.07% +0.36% +0.29%] index_copy_ strided 3 : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.07% +0.14% / +0.00% +0.49% +0.56%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.15% +0.00% +0.07% / +0.00% +0.44% +0.51%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.423 -> 1.423 ( +0.00%) [ +0.07% +0.00% +0.49% / +0.00% +0.42% +0.42%] index_add_ perm : Elapsed 0.014 ms (1.424 ms / 100) 1.380 -> 1.381 ( +0.07%) [ +0.00% +0.00% +0.00% / +0.07% +0.29% +0.22%] index_copy_ perm : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.14% +0.21% +0.00% / +0.07% +0.56% +0.49%] index_add_ perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.22% +0.07% +0.00% / +0.00% +0.29% +0.29%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.382 ms / 100) 8.221 -> 8.213 ( -0.10%) [ +0.00% +0.01% +0.00% / -0.10% +0.17% +0.07%] index_select const : Elapsed 0.082 ms (8.221 ms / 100) 8.232 -> 8.247 ( +0.18%) [ +0.00% +0.27% +0.23% / +0.18% +0.23% +0.30%] index_select wrap : Elapsed 0.082 ms (8.232 ms / 100) 8.246 -> 8.242 ( -0.05%) [ +0.00% +0.21% +0.21% / +0.04% -0.05% -0.04%] index_select linear : Elapsed 0.082 ms (8.246 ms / 100) 8.240 -> 8.244 ( +0.05%) [ +0.00% +0.23% +0.06% / +0.05% +0.08% +0.13%] index_select reverse : Elapsed 0.082 ms (8.240 ms / 100) 8.224 -> 8.211 ( -0.16%) [ +0.00% +0.04% +0.04% / -0.16% -0.09% +0.12%] index_select skip64 : Elapsed 0.082 ms (8.224 ms / 100) 8.209 -> 8.209 ( +0.00%) [ +0.00% +0.06% +0.18% / +0.00% +0.16% +0.16%] index_select skip256 : Elapsed 0.082 ms (8.209 ms / 100) 8.239 -> 8.246 ( +0.08%) [ +0.00% +0.00% +0.16% / +0.08% +0.22% +0.19%] index_select spread : Elapsed 0.082 ms (8.239 ms / 100) 8.245 -> 8.255 ( +0.12%) [ +0.08% +0.00% +0.12% / +0.12% +0.21% +0.13%] index_select strided 3 : Elapsed 0.083 ms (8.252 ms / 100) 8.246 -> 8.240 ( -0.07%) [ +0.00% +0.21% +0.04% / -0.07% +0.01% +0.11%] index_select random : Elapsed 0.082 ms (8.246 ms / 100) 8.220 -> 8.249 ( +0.35%) [ +0.00% +0.18% +0.32% / +0.36% +0.35% +0.61%] index_select random_sorted : Elapsed 0.082 ms (8.220 ms / 100) B = [20, 16, 40, 4] (stride (64, 4, 1280, 1)) A = [20, 16, 5, 4] (stride (80, 5, 1, 1600)) dim = 2 0.631 -> 0.632 ( +0.16%) [ +0.00% +0.32% +13.47% / +0.16% +0.79% +0.79%] index_add_ linear : Elapsed 0.006 ms (0.631 ms / 100) 0.614 -> 0.614 ( +0.00%) [ +0.00% +0.16% +0.65% / +0.00% +0.81% +0.98%] index_copy_ linear : Elapsed 0.006 ms (0.614 ms / 100) 0.632 -> 0.631 ( -0.16%) [ +0.16% +0.00% +0.00% / -0.16% +0.47% +1.58%] index_add_ reverse : Elapsed 0.006 ms (0.633 ms / 100) 0.614 -> 0.615 ( +0.16%) [ +0.49% +0.16% +0.00% / +0.16% +0.65% +0.98%] index_copy_ reverse : Elapsed 0.006 ms (0.617 ms / 100) 0.632 -> 0.633 ( +0.16%) [ +0.16% +0.00% +0.00% / +0.16% +1.11% +0.79%] index_add_ spread : Elapsed 0.006 ms (0.633 ms / 100) 0.614 -> 0.615 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.16% +0.65% +0.65%] index_copy_ spread : Elapsed 0.006 ms (0.615 ms / 100) 0.632 -> 0.632 ( +0.00%) [ +0.00% +0.00% +0.16% / +0.00% +0.95% +0.79%] index_add_ strided 3 : Elapsed 0.006 ms (0.632 ms / 100) 0.614 -> 0.615 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +1.30% +0.98%] index_copy_ strided 3 : Elapsed 0.006 ms (0.614 ms / 100) 0.632 -> 0.631 ( -0.16%) [ +0.00% +0.00% +0.00% / -0.16% +0.95% +0.79%] index_add_ strided 7 : Elapsed 0.006 ms (0.632 ms / 100) 0.614 -> 0.614 ( +0.00%) [ +0.16% +0.00% +0.00% / +0.00% +1.30% +0.98%] index_copy_ strided 7 : Elapsed 0.006 ms (0.615 ms / 100) 0.631 -> 0.633 ( +0.32%) [ +0.48% +0.00% +0.16% / +0.32% +1.43% +1.11%] index_add_ perm : Elapsed 0.006 ms (0.634 ms / 100) 0.614 -> 0.614 ( +0.00%) [ +0.81% +0.16% +0.00% / +0.00% +0.81% +0.98%] index_copy_ perm : Elapsed 0.006 ms (0.619 ms / 100) 0.630 -> 0.632 ( +0.32%) [ +0.32% +0.48% +0.00% / +0.32% +1.11% +1.43%] index_add_ perm_sorted : Elapsed 0.006 ms (0.632 ms / 100) 0.614 -> 0.615 ( +0.16%) [ +0.00% +0.16% +0.00% / +0.16% +0.81% +0.81%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.614 ms / 100) 4.831 -> 4.831 ( +0.00%) [ +0.19% +0.04% +0.00% / +0.00% +0.33% +0.29%] index_select const : Elapsed 0.048 ms (4.840 ms / 100) 4.839 -> 4.829 ( -0.21%) [ +0.02% +0.00% +0.00% / +0.06% -0.21% -0.19%] index_select wrap : Elapsed 0.048 ms (4.840 ms / 100) 4.838 -> 4.839 ( +0.02%) [ +0.02% +0.00% +0.04% / +0.12% +0.02% +0.04%] index_select linear : Elapsed 0.048 ms (4.839 ms / 100) 4.842 -> 4.835 ( -0.14%) [ +0.04% +0.08% +0.00% / +0.14% -0.02% -0.14%] index_select reverse : Elapsed 0.048 ms (4.844 ms / 100) 4.841 -> 4.829 ( -0.25%) [ +0.10% +0.00% +0.10% / +0.31% -0.19% -0.25%] index_select skip64 : Elapsed 0.048 ms (4.846 ms / 100) 4.822 -> 4.833 ( +0.23%) [ +0.31% +0.29% +0.00% / +0.29% +0.23% +0.29%] index_select skip256 : Elapsed 0.048 ms (4.837 ms / 100) 4.827 -> 4.832 ( +0.10%) [ +0.00% +0.12% +0.33% / +0.10% +0.21% +0.29%] index_select spread : Elapsed 0.048 ms (4.827 ms / 100) 4.827 -> 4.822 ( -0.10%) [ +0.08% +0.00% +0.23% / -0.10% +0.25% +0.21%] index_select strided 3 : Elapsed 0.048 ms (4.831 ms / 100) 4.833 -> 4.831 ( -0.04%) [ +0.00% +0.14% +0.12% / -0.04% -0.04% +0.14%] index_select random : Elapsed 0.048 ms (4.833 ms / 100) 4.835 -> 4.836 ( +0.02%) [ +0.29% +0.00% +0.06% / +0.02% +0.08% +0.19%] index_select random_sorted : Elapsed 0.048 ms (4.849 ms / 100) B = [20, 16, 40, 4] (stride (64, 1, 1280, 16)) A = [20, 16, 5, 4] (stride (320, 20, 1, 5)) dim = 2 1.421 -> 1.421 ( +0.00%) [ +0.07% +0.00% +0.07% / +0.00% +0.56% +0.49%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.381 -> 1.378 ( -0.22%) [ +0.00% +0.07% +0.00% / -0.22% +0.14% +0.22%] index_copy_ linear : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.421 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.49% +0.56%] index_add_ reverse : Elapsed 0.014 ms (1.421 ms / 100) 1.380 -> 1.382 ( +0.14%) [ +0.07% +0.00% +0.14% / +0.14% +0.22% +0.14%] index_copy_ reverse : Elapsed 0.014 ms (1.381 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.28% +0.00% / +0.00% +0.35% +0.35%] index_add_ spread : Elapsed 0.014 ms (1.422 ms / 100) 1.377 -> 1.378 ( +0.07%) [ +0.00% +0.29% +0.07% / +0.07% +0.29% +0.44%] index_copy_ spread : Elapsed 0.014 ms (1.377 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.00% +0.00% +0.14% / +0.07% +0.63% +0.56%] index_add_ strided 3 : Elapsed 0.014 ms (1.421 ms / 100) 1.379 -> 1.380 ( +0.07%) [ +0.07% +0.00% +0.07% / +0.07% +0.36% +0.36%] index_copy_ strided 3 : Elapsed 0.014 ms (1.380 ms / 100) 1.422 -> 1.420 ( -0.14%) [ +0.00% +0.00% +0.00% / -0.14% +0.49% +0.49%] index_add_ strided 7 : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.15% +0.00% +0.00% / +0.00% +0.29% +0.36%] index_copy_ strided 7 : Elapsed 0.014 ms (1.381 ms / 100) 1.421 -> 1.420 ( -0.07%) [ +0.00% +0.00% +0.14% / -0.07% +0.56% +0.63%] index_add_ perm : Elapsed 0.014 ms (1.421 ms / 100) 1.379 -> 1.377 ( -0.15%) [ +0.07% +0.00% +0.07% / -0.15% +0.29% +0.22%] index_copy_ perm : Elapsed 0.014 ms (1.380 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.07% +0.14% +0.00% / +0.07% +0.56% +0.49%] index_add_ perm_sorted : Elapsed 0.014 ms (1.422 ms / 100) 1.379 -> 1.379 ( +0.00%) [ +0.00% +0.36% +0.00% / +0.00% +0.29% +0.22%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.379 ms / 100) 8.209 -> 8.219 ( +0.12%) [ +0.04% +0.00% +0.01% / +0.12% +0.23% +0.19%] index_select const : Elapsed 0.082 ms (8.212 ms / 100) 8.209 -> 8.208 ( -0.01%) [ +0.00% +0.24% +0.12% / -0.01% +0.17% +0.19%] index_select wrap : Elapsed 0.082 ms (8.209 ms / 100) 8.203 -> 8.206 ( +0.04%) [ +0.20% +0.00% +0.02% / +0.04% +0.24% +0.49%] index_select linear : Elapsed 0.082 ms (8.219 ms / 100) 8.205 -> 8.213 ( +0.10%) [ +0.02% +0.17% +0.00% / +0.24% +0.10% +0.29%] index_select reverse : Elapsed 0.082 ms (8.207 ms / 100) 8.211 -> 8.214 ( +0.04%) [ +0.00% +0.40% +0.06% / +0.04% +0.07% +0.04%] index_select skip64 : Elapsed 0.082 ms (8.211 ms / 100) 8.201 -> 8.223 ( +0.27%) [ +0.09% +0.22% +0.00% / +0.27% +0.30% +0.41%] index_select skip256 : Elapsed 0.082 ms (8.208 ms / 100) 8.211 -> 8.214 ( +0.04%) [ +0.11% +0.13% +0.00% / +0.34% +0.16% +0.04%] index_select spread : Elapsed 0.082 ms (8.220 ms / 100) 8.215 -> 8.215 ( +0.00%) [ +0.00% +0.07% +0.02% / +0.04% +0.15% +0.00%] index_select strided 3 : Elapsed 0.082 ms (8.215 ms / 100) 8.198 -> 8.204 ( +0.07%) [ +0.39% +0.32% +0.00% / +0.07% +0.30% +0.29%] index_select random : Elapsed 0.082 ms (8.230 ms / 100) 8.205 -> 8.203 ( -0.02%) [ +0.22% +0.16% +0.00% / +0.06% -0.02% +0.21%] index_select random_sorted : Elapsed 0.082 ms (8.223 ms / 100) B = [20, 16, 40, 4] (stride (4, 80, 1280, 1)) A = [20, 16, 5, 4] (stride (16, 1, 1280, 320)) dim = 2 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.21% +0.00% / +0.00% +0.63% +0.63%] index_add_ linear : Elapsed 0.014 ms (1.422 ms / 100) 1.377 -> 1.381 ( +0.29%) [ +0.07% +0.15% +0.00% / +0.29% +0.65% +0.51%] index_copy_ linear : Elapsed 0.014 ms (1.378 ms / 100) 1.422 -> 1.421 ( -0.07%) [ +0.07% +0.07% +0.00% / -0.07% +0.63% +0.63%] index_add_ reverse : Elapsed 0.014 ms (1.423 ms / 100) 1.378 -> 1.378 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.51% +0.36%] index_copy_ reverse : Elapsed 0.014 ms (1.378 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.14% +0.28% +0.00% / +0.07% +0.63% +0.70%] index_add_ spread : Elapsed 0.014 ms (1.424 ms / 100) 1.377 -> 1.381 ( +0.29%) [ +0.07% +0.22% +0.00% / +0.29% +0.65% +0.51%] index_copy_ spread : Elapsed 0.014 ms (1.378 ms / 100) 1.422 -> 1.423 ( +0.07%) [ +0.00% +0.00% +0.07% / +0.07% +0.63% +0.63%] index_add_ strided 3 : Elapsed 0.014 ms (1.422 ms / 100) 1.377 -> 1.380 ( +0.22%) [ +0.07% +0.00% +0.07% / +0.22% +0.51% +0.51%] index_copy_ strided 3 : Elapsed 0.014 ms (1.378 ms / 100) 1.421 -> 1.422 ( +0.07%) [ +0.14% +0.07% +0.00% / +0.07% +0.70% +0.63%] index_add_ strided 7 : Elapsed 0.014 ms (1.423 ms / 100) 1.378 -> 1.379 ( +0.07%) [ +0.44% +0.00% +0.22% / +0.07% +0.36% +0.36%] index_copy_ strided 7 : Elapsed 0.014 ms (1.384 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.70% +0.63%] index_add_ perm : Elapsed 0.014 ms (1.422 ms / 100) 1.376 -> 1.376 ( +0.00%) [ +0.07% +0.00% +0.22% / +0.00% +0.65% +0.58%] index_copy_ perm : Elapsed 0.014 ms (1.377 ms / 100) 1.422 -> 1.422 ( +0.00%) [ +0.07% +0.14% +0.00% / +0.00% +0.63% +0.77%] index_add_ perm_sorted : Elapsed 0.014 ms (1.423 ms / 100) 1.380 -> 1.377 ( -0.22%) [ +0.00% +0.07% +0.14% / -0.22% +0.36% +0.29%] index_copy_ perm_sorted : Elapsed 0.014 ms (1.380 ms / 100) 8.198 -> 8.193 ( -0.06%) [ +0.00% +0.07% +0.07% / -0.06% +0.04% +0.20%] index_select const : Elapsed 0.082 ms (8.198 ms / 100) 8.224 -> 8.224 ( +0.00%) [ +0.18% +0.23% +0.00% / +0.00% +0.19% +0.07%] index_select wrap : Elapsed 0.082 ms (8.239 ms / 100) 8.235 -> 8.220 ( -0.18%) [ +0.04% +0.00% +0.18% / -0.01% -0.18% +0.24%] index_select linear : Elapsed 0.082 ms (8.238 ms / 100) 8.237 -> 8.219 ( -0.22%) [ +0.08% +0.18% +0.00% / -0.22% +0.18% -0.07%] index_select reverse : Elapsed 0.082 ms (8.244 ms / 100) 8.191 -> 8.204 ( +0.16%) [ +0.07% +0.22% +0.00% / +0.16% +0.42% +0.39%] index_select skip64 : Elapsed 0.082 ms (8.197 ms / 100) 8.194 -> 8.199 ( +0.06%) [ +0.09% +0.28% +0.00% / +0.06% +0.34% +0.48%] index_select skip256 : Elapsed 0.082 ms (8.201 ms / 100) 8.213 -> 8.229 ( +0.19%) [ +0.27% +0.15% +0.00% / +0.19% +0.19% +0.44%] index_select spread : Elapsed 0.082 ms (8.235 ms / 100) 8.222 -> 8.227 ( +0.06%) [ +0.27% +0.06% +0.00% / +0.12% +0.18% +0.06%] index_select strided 3 : Elapsed 0.082 ms (8.244 ms / 100) 8.232 -> 8.230 ( -0.02%) [ +0.23% +0.00% +0.09% / +0.04% +0.11% -0.02%] index_select random : Elapsed 0.083 ms (8.251 ms / 100) 8.216 -> 8.243 ( +0.33%) [ +0.00% +0.26% +0.05% / +0.33% +0.52% +0.44%] index_select random_sorted : Elapsed 0.082 ms (8.216 ms / 100) out_shape = [20, 16, 5, 40] in_shape = [20, 16, 5, 4] idx_dim = 3 B = [20, 16, 5, 40] (stride (1, 4000, 20, 100)) dim = 3 fill_cnt = 4 0.802 -> 0.803 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +0.50% +0.62%] index_fill_ const : Elapsed 0.008 ms (0.802 ms / 100) 0.812 -> 0.811 ( -0.12%) [ +0.12% +0.12% +0.00% / -0.12% +0.25% +0.37%] index_fill_ linear : Elapsed 0.008 ms (0.813 ms / 100) 0.807 -> 0.807 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.50% +0.37%] index_fill_ reverse : Elapsed 0.008 ms (0.808 ms / 100) 0.802 -> 0.803 ( +0.12%) [ +0.12% +0.62% +0.00% / +0.12% +0.50% +0.50%] index_fill_ skip64 : Elapsed 0.008 ms (0.803 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.25% +0.50%] index_fill_ skip256 : Elapsed 0.008 ms (0.803 ms / 100) 0.799 -> 0.800 ( +0.13%) [ +0.13% +0.25% +0.00% / +0.13% +0.50% +0.50%] index_fill_ spread : Elapsed 0.008 ms (0.800 ms / 100) 0.812 -> 0.812 ( +0.00%) [ +0.12% +0.12% +0.00% / +0.00% +0.49% +0.49%] index_fill_ strided 3 : Elapsed 0.008 ms (0.813 ms / 100) 0.795 -> 0.794 ( -0.13%) [ +0.00% +0.00% +0.13% / -0.13% +0.25% +0.13%] index_fill_ strided 5 : Elapsed 0.008 ms (0.795 ms / 100) 0.800 -> 0.801 ( +0.13%) [ +0.25% +0.25% +0.00% / +0.13% +1.00% +0.38%] index_fill_ strided 7 : Elapsed 0.008 ms (0.802 ms / 100) 0.803 -> 0.803 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.25% +0.25%] index_fill_ strided 8 : Elapsed 0.008 ms (0.803 ms / 100) 0.796 -> 0.796 ( +0.00%) [ +0.13% +0.00% +0.25% / +0.00% +0.25% +0.38%] index_fill_ strided 16 : Elapsed 0.008 ms (0.797 ms / 100) 0.808 -> 0.810 ( +0.25%) [ +0.25% +0.12% +0.00% / +0.25% +0.62% +0.50%] index_fill_ random : Elapsed 0.008 ms (0.810 ms / 100) 0.809 -> 0.809 ( +0.00%) [ +0.00% +0.12% +0.12% / +0.00% +0.25% +0.25%] index_fill_ random_sorted : Elapsed 0.008 ms (0.809 ms / 100) 0.808 -> 0.808 ( +0.00%) [ +0.12% +0.00% +0.00% / +0.00% +0.37% +0.37%] index_fill_ perm : Elapsed 0.008 ms (0.809 ms / 100) 0.798 -> 0.797 ( -0.13%) [ +0.00% +0.00% +0.00% / -0.13% +0.25% +0.13%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.798 ms / 100) B = [20, 16, 5, 40] (stride (40, 800, 12800, 1)) A = [20, 16, 5, 4] (stride (80, 1, 16, 1600)) dim = 3 1.314 -> 1.318 ( +0.30%) [ +0.30% +0.23% +0.00% / +0.30% +0.61% +0.53%] index_add_ linear : Elapsed 0.013 ms (1.318 ms / 100) 1.277 -> 1.278 ( +0.08%) [ +0.08% +0.23% +0.00% / +0.08% +0.55% +0.86%] index_copy_ linear : Elapsed 0.013 ms (1.278 ms / 100) 1.317 -> 1.318 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.30% +0.38%] index_add_ reverse : Elapsed 0.013 ms (1.318 ms / 100) 1.277 -> 1.277 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.63% +0.63%] index_copy_ reverse : Elapsed 0.013 ms (1.277 ms / 100) 1.328 -> 1.327 ( -0.08%) [ +0.15% +0.00% +0.15% / -0.08% +0.60% +0.45%] index_add_ spread : Elapsed 0.013 ms (1.330 ms / 100) 1.287 -> 1.288 ( +0.08%) [ +0.08% +0.00% +0.08% / +0.08% +0.78% +0.39%] index_copy_ spread : Elapsed 0.013 ms (1.288 ms / 100) 1.318 -> 1.319 ( +0.08%) [ +0.08% +0.00% +0.00% / +0.08% +0.61% +1.06%] index_add_ strided 3 : Elapsed 0.013 ms (1.319 ms / 100) 1.279 -> 1.281 ( +0.16%) [ +0.00% +0.08% +0.23% / +0.16% +0.78% +0.86%] index_copy_ strided 3 : Elapsed 0.013 ms (1.279 ms / 100) 1.319 -> 1.321 ( +0.15%) [ +0.08% +0.30% +0.00% / +0.15% +0.68% +0.76%] index_add_ strided 7 : Elapsed 0.013 ms (1.320 ms / 100) 1.282 -> 1.282 ( +0.00%) [ +0.00% +0.16% +0.16% / +0.00% +0.55% +0.70%] index_copy_ strided 7 : Elapsed 0.013 ms (1.282 ms / 100) 1.321 -> 1.321 ( +0.00%) [ +0.30% +0.08% +0.00% / +0.00% +0.61% +0.68%] index_add_ perm : Elapsed 0.013 ms (1.325 ms / 100) 1.281 -> 1.282 ( +0.08%) [ +0.08% +0.47% +0.00% / +0.08% +0.78% +0.70%] index_copy_ perm : Elapsed 0.013 ms (1.282 ms / 100) 1.319 -> 1.322 ( +0.23%) [ +0.15% +0.76% +0.00% / +0.23% +0.83% +0.83%] index_add_ perm_sorted : Elapsed 0.013 ms (1.321 ms / 100) 1.283 -> 1.284 ( +0.08%) [ +0.23% +0.31% +0.00% / +0.08% +0.62% +0.62%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.286 ms / 100) 9.218 -> 9.210 ( -0.09%) [ +0.00% +0.08% +0.09% / -0.09% +0.05% +0.13%] index_select const : Elapsed 0.092 ms (9.218 ms / 100) 9.235 -> 9.240 ( +0.05%) [ +0.06% +0.00% +0.13% / +0.05% +0.17% +0.43%] index_select wrap : Elapsed 0.092 ms (9.241 ms / 100) 9.218 -> 9.233 ( +0.16%) [ +0.14% +0.31% +0.00% / +0.16% +0.18% +0.42%] index_select linear : Elapsed 0.092 ms (9.231 ms / 100) 9.223 -> 9.222 ( -0.01%) [ +0.04% +0.00% +0.08% / -0.01% +0.43% +0.37%] index_select reverse : Elapsed 0.092 ms (9.227 ms / 100) 9.218 -> 9.218 ( +0.00%) [ +0.00% +0.07% +0.02% / +0.00% +0.08% +0.17%] index_select skip64 : Elapsed 0.092 ms (9.218 ms / 100) 9.207 -> 9.218 ( +0.12%) [ +0.14% +0.23% +0.00% / +0.12% +0.29% +0.27%] index_select skip256 : Elapsed 0.092 ms (9.220 ms / 100) 9.232 -> 9.239 ( +0.08%) [ +0.13% +0.00% +0.14% / +0.08% +0.13% +0.24%] index_select spread : Elapsed 0.092 ms (9.244 ms / 100) 9.249 -> 9.234 ( -0.16%) [ +0.00% +0.12% +0.13% / -0.16% +0.16% +0.15%] index_select strided 3 : Elapsed 0.092 ms (9.249 ms / 100) 9.238 -> 9.236 ( -0.02%) [ +0.35% +0.00% +0.10% / -0.02% +0.24% +0.16%] index_select random : Elapsed 0.093 ms (9.270 ms / 100) 9.222 -> 9.240 ( +0.20%) [ +0.13% +0.20% +0.00% / +0.20% +0.30% +0.39%] index_select random_sorted : Elapsed 0.092 ms (9.234 ms / 100) B = [20, 16, 5, 40] (stride (1, 800, 12800, 20)) A = [20, 16, 5, 4] (stride (320, 20, 4, 1)) dim = 3 1.148 -> 1.148 ( +0.00%) [ +0.17% +0.26% +0.00% / +0.00% +0.52% +0.70%] index_add_ linear : Elapsed 0.011 ms (1.150 ms / 100) 1.111 -> 1.112 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.72% +0.81%] index_copy_ linear : Elapsed 0.011 ms (1.111 ms / 100) 1.149 -> 1.151 ( +0.17%) [ +0.09% +0.09% +0.00% / +0.17% +0.44% +0.44%] index_add_ reverse : Elapsed 0.012 ms (1.150 ms / 100) 1.113 -> 1.112 ( -0.09%) [ +0.18% +0.00% +0.00% / -0.09% +0.36% +0.36%] index_copy_ reverse : Elapsed 0.011 ms (1.115 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.35% +0.35%] index_add_ spread : Elapsed 0.012 ms (1.151 ms / 100) 1.112 -> 1.113 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.54% +0.54%] index_copy_ spread : Elapsed 0.011 ms (1.113 ms / 100) 1.148 -> 1.148 ( +0.00%) [ +0.09% +0.17% +0.00% / +0.00% +0.61% +0.61%] index_add_ strided 3 : Elapsed 0.011 ms (1.149 ms / 100) 1.112 -> 1.113 ( +0.09%) [ +0.00% +0.09% +0.00% / +0.09% +0.99% +0.54%] index_copy_ strided 3 : Elapsed 0.011 ms (1.112 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.00% +0.09% +0.09% / +0.00% +0.44% +0.44%] index_add_ strided 7 : Elapsed 0.011 ms (1.149 ms / 100) 1.112 -> 1.112 ( +0.00%) [ +0.00% +0.09% +0.00% / +0.00% +0.54% +0.54%] index_copy_ strided 7 : Elapsed 0.011 ms (1.112 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.17% +0.09% +0.00% / +0.00% +0.35% +0.44%] index_add_ perm : Elapsed 0.012 ms (1.151 ms / 100) 1.112 -> 1.115 ( +0.27%) [ +0.18% +0.09% +0.00% / +0.27% +0.36% +0.54%] index_copy_ perm : Elapsed 0.011 ms (1.114 ms / 100) 1.150 -> 1.149 ( -0.09%) [ +0.26% +0.00% +0.00% / -0.09% +0.26% +0.35%] index_add_ perm_sorted : Elapsed 0.012 ms (1.153 ms / 100) 1.113 -> 1.113 ( +0.00%) [ +0.09% +0.09% +0.00% / +0.00% +0.36% +0.45%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.114 ms / 100) 8.303 -> 8.312 ( +0.11%) [ +0.04% +0.00% +0.19% / +0.11% +0.41% +0.52%] index_select const : Elapsed 0.083 ms (8.306 ms / 100) 8.297 -> 8.299 ( +0.02%) [ +0.00% +0.02% +0.08% / +0.02% +0.48% +0.53%] index_select wrap : Elapsed 0.083 ms (8.297 ms / 100) 8.299 -> 8.306 ( +0.08%) [ +0.00% +0.28% +0.16% / +0.08% +0.16% +0.49%] index_select linear : Elapsed 0.083 ms (8.299 ms / 100) 8.302 -> 8.296 ( -0.07%) [ +0.00% +0.06% +0.11% / -0.07% +0.23% +0.30%] index_select reverse : Elapsed 0.083 ms (8.302 ms / 100) 8.300 -> 8.307 ( +0.08%) [ +0.00% +0.00% +0.14% / +0.08% +0.31% +0.43%] index_select skip64 : Elapsed 0.083 ms (8.300 ms / 100) 8.302 -> 8.311 ( +0.11%) [ +0.00% +0.01% +0.11% / +0.11% +0.60% +0.65%] index_select skip256 : Elapsed 0.083 ms (8.302 ms / 100) 8.304 -> 8.293 ( -0.13%) [ +0.29% +0.00% +0.00% / -0.13% +0.28% +0.28%] index_select spread : Elapsed 0.083 ms (8.328 ms / 100) 8.301 -> 8.301 ( +0.00%) [ +0.00% +0.23% +0.00% / +0.00% +0.49% +0.24%] index_select strided 3 : Elapsed 0.083 ms (8.301 ms / 100) 8.286 -> 8.299 ( +0.16%) [ +0.00% +0.27% +0.30% / +0.16% +0.41% +0.54%] index_select random : Elapsed 0.083 ms (8.286 ms / 100) 8.297 -> 8.298 ( +0.01%) [ +0.23% +0.05% +0.00% / +0.01% +0.43% +0.52%] index_select random_sorted : Elapsed 0.083 ms (8.316 ms / 100) ==================== rep_count = 100 dimensions = [15, 50, 150, 250] out_shape = [15, 150, 250] in_shape = [50, 150, 250] idx_dim = 0 B = [15, 150, 250] (stride (37500, 1, 150)) A = [50, 150, 250] (stride (37500, 1, 150)) dim = 0 10.700 -> 10.707 ( +0.07%) [ +0.00% +0.02% +0.02% / +0.07% +0.12% +0.08%] index_select const : Elapsed 0.107 ms (10.700 ms / 100) 11.576 -> 11.599 ( +0.20%) [ +0.00% +0.14% +0.43% / +0.20% +0.27% +0.23%] index_select wrap : Elapsed 0.116 ms (11.576 ms / 100) 11.575 -> 11.594 ( +0.16%) [ +0.00% +0.06% +0.37% / +0.16% +0.54% +0.17%] index_select linear : Elapsed 0.116 ms (11.575 ms / 100) 11.628 -> 11.596 ( -0.28%) [ +0.04% +0.00% +0.11% / -0.13% -0.10% -0.28%] index_select reverse : Elapsed 0.116 ms (11.633 ms / 100) 10.655 -> 10.660 ( +0.05%) [ +0.04% +0.06% +0.00% / +0.05% +0.24% +0.21%] index_select skip64 : Elapsed 0.107 ms (10.659 ms / 100) 10.694 -> 10.708 ( +0.13%) [ +0.00% +0.03% +0.03% / +0.13% +0.14% +0.35%] index_select skip256 : Elapsed 0.107 ms (10.694 ms / 100) 11.650 -> 11.601 ( -0.42%) [ +0.07% +0.00% +0.08% / -0.08% -0.27% -0.42%] index_select spread : Elapsed 0.117 ms (11.658 ms / 100) 11.542 -> 11.549 ( +0.06%) [ +0.16% +0.42% +0.00% / +0.06% +0.94% +0.70%] index_select strided 3 : Elapsed 0.116 ms (11.560 ms / 100) 11.588 -> 11.627 ( +0.34%) [ +0.42% +0.00% +0.29% / +0.34% +0.72% +0.79%] index_select strided 5 : Elapsed 0.116 ms (11.637 ms / 100) 11.575 -> 11.587 ( +0.10%) [ +0.00% +0.35% +0.13% / +0.10% +0.35% +0.37%] index_select strided 7 : Elapsed 0.116 ms (11.575 ms / 100) 11.594 -> 11.634 ( +0.35%) [ +0.03% +0.00% +0.06% / +0.42% +0.35% +0.47%] index_select strided 8 : Elapsed 0.116 ms (11.598 ms / 100) 11.595 -> 11.613 ( +0.16%) [ +0.13% +0.00% +0.33% / +0.22% +0.16% +0.53%] index_select strided 16 : Elapsed 0.116 ms (11.610 ms / 100) 11.449 -> 11.457 ( +0.07%) [ +0.11% +0.00% +0.24% / +0.07% +0.59% +0.58%] index_select random : Elapsed 0.115 ms (11.462 ms / 100) 11.223 -> 11.231 ( +0.07%) [ +0.00% +0.14% +0.15% / +0.07% +0.30% +0.14%] index_select random_sorted : Elapsed 0.112 ms (11.223 ms / 100) 11.628 -> 11.576 ( -0.45%) [ +0.00% +0.01% +0.01% / -0.03% -0.42% -0.45%] index_select perm : Elapsed 0.116 ms (11.628 ms / 100) 11.611 -> 11.614 ( +0.03%) [ +0.18% +0.00% +0.03% / +0.32% +0.03% +0.09%] index_select perm_sorted : Elapsed 0.116 ms (11.632 ms / 100) B = [15, 150, 250] (stride (250, 3750, 1)) A = [50, 150, 250] (stride (37500, 250, 1)) dim = 0 15.177 -> 15.150 ( -0.18%) [ +0.00% +0.07% +0.13% / +0.14% -0.05% -0.18%] index_select const : Elapsed 0.152 ms (15.177 ms / 100) 15.889 -> 15.877 ( -0.08%) [ +0.00% +0.01% +0.01% / -0.08% +0.26% +0.31%] index_select wrap : Elapsed 0.159 ms (15.889 ms / 100) 15.880 -> 15.869 ( -0.07%) [ +0.00% +0.11% +0.03% / -0.07% +0.40% +0.40%] index_select linear : Elapsed 0.159 ms (15.880 ms / 100) 15.885 -> 15.868 ( -0.11%) [ +0.11% +0.00% +0.17% / -0.11% +0.16% +0.12%] index_select reverse : Elapsed 0.159 ms (15.903 ms / 100) 15.188 -> 15.179 ( -0.06%) [ +0.14% +0.07% +0.00% / +0.11% -0.06% -0.01%] index_select skip64 : Elapsed 0.152 ms (15.209 ms / 100) 15.206 -> 15.179 ( -0.18%) [ +0.00% +0.14% +0.10% / -0.05% -0.18% -0.09%] index_select skip256 : Elapsed 0.152 ms (15.206 ms / 100) 15.895 -> 15.893 ( -0.01%) [ +0.03% +0.06% +0.00% / -0.01% +0.16% +0.20%] index_select spread : Elapsed 0.159 ms (15.899 ms / 100) 15.892 -> 15.914 ( +0.14%) [ +0.00% +0.14% +0.18% / +0.14% +0.30% +0.24%] index_select strided 3 : Elapsed 0.159 ms (15.892 ms / 100) 15.909 -> 15.908 ( -0.01%) [ +0.06% +0.00% +0.00% / -0.01% +0.01% +0.04%] index_select strided 5 : Elapsed 0.159 ms (15.918 ms / 100) 15.888 -> 15.887 ( -0.01%) [ +0.06% +0.00% +0.10% / -0.01% +0.26% +0.13%] index_select strided 7 : Elapsed 0.159 ms (15.897 ms / 100) 15.923 -> 15.921 ( -0.01%) [ +0.09% +0.00% +0.04% / -0.01% +0.20% +0.31%] index_select strided 8 : Elapsed 0.159 ms (15.937 ms / 100) 15.891 -> 15.896 ( +0.03%) [ +0.03% +0.00% +0.04% / +0.03% +0.20% +0.18%] index_select strided 16 : Elapsed 0.159 ms (15.895 ms / 100) 15.862 -> 15.854 ( -0.05%) [ +0.01% +0.01% +0.00% / -0.05% +0.11% -0.02%] index_select random : Elapsed 0.159 ms (15.863 ms / 100) 15.759 -> 15.772 ( +0.08%) [ +0.01% +0.00% +0.20% / +0.08% +0.34% +0.18%] index_select random_sorted : Elapsed 0.158 ms (15.761 ms / 100) 15.913 -> 15.908 ( -0.03%) [ +0.07% +0.00% +0.08% / +0.07% -0.03% +0.20%] index_select perm : Elapsed 0.159 ms (15.924 ms / 100) 15.895 -> 15.922 ( +0.17%) [ +0.09% +0.00% +0.11% / +0.17% +0.22% +0.26%] index_select perm_sorted : Elapsed 0.159 ms (15.909 ms / 100) B = [15, 150, 250] (stride (250, 3750, 1)) A = [50, 150, 250] (stride (37500, 1, 150)) dim = 0 8.426 -> 8.383 ( -0.51%) [ +0.24% +0.13% +0.00% / +0.15% -0.51% -0.32%] index_select const : Elapsed 0.084 ms (8.446 ms / 100) 9.891 -> 9.798 ( -0.94%) [ +0.11% +0.00% +0.17% / +0.08% -0.94% -0.77%] index_select wrap : Elapsed 0.099 ms (9.902 ms / 100) 9.876 -> 9.809 ( -0.68%) [ +0.24% +0.37% +0.00% / +0.18% -0.68% -0.56%] index_select linear : Elapsed 0.099 ms (9.900 ms / 100) 9.847 -> 9.765 ( -0.83%) [ +0.02% +0.21% +0.00% / +0.13% -0.83% -0.77%] index_select reverse : Elapsed 0.098 ms (9.849 ms / 100) 8.387 -> 8.290 ( -1.16%) [ +0.20% +0.00% +0.27% / +0.39% -1.16% -0.99%] index_select skip64 : Elapsed 0.084 ms (8.404 ms / 100) 8.483 -> 8.329 ( -1.82%) [ +0.12% +0.13% +0.00% / -0.02% -1.82% -1.66%] index_select skip256 : Elapsed 0.085 ms (8.493 ms / 100) 9.935 -> 9.793 ( -1.43%) [ +0.03% +0.21% +0.00% / -0.19% -1.43% -1.36%] index_select spread : Elapsed 0.099 ms (9.938 ms / 100) 9.824 -> 9.832 ( +0.08%) [ +0.00% +0.10% +0.66% / +0.08% +0.35% +0.09%] index_select strided 3 : Elapsed 0.098 ms (9.824 ms / 100) 9.846 -> 9.856 ( +0.10%) [ +0.24% +0.00% +0.24% / +0.12% +0.13% +0.10%] index_select strided 5 : Elapsed 0.099 ms (9.870 ms / 100) 9.835 -> 9.829 ( -0.06%) [ +0.42% +0.00% +0.52% / +0.32% -0.06% +0.09%] index_select strided 7 : Elapsed 0.099 ms (9.876 ms / 100) 9.900 -> 9.827 ( -0.74%) [ +0.00% +0.06% +0.20% / +0.08% -0.74% -0.66%] index_select strided 8 : Elapsed 0.099 ms (9.900 ms / 100) 9.943 -> 9.834 ( -1.10%) [ +0.18% +0.00% +0.30% / +0.21% -1.08% -1.10%] index_select strided 16 : Elapsed 0.100 ms (9.961 ms / 100) 9.869 -> 9.830 ( -0.40%) [ +0.00% +0.07% +0.37% / +0.12% -0.40% +0.00%] index_select random : Elapsed 0.099 ms (9.869 ms / 100) 9.797 -> 9.780 ( -0.17%) [ +0.20% +0.00% +0.19% / +0.16% -0.10% -0.17%] index_select random_sorted : Elapsed 0.098 ms (9.817 ms / 100) 9.886 -> 9.831 ( -0.56%) [ +0.11% +0.00% +0.14% / -0.05% -0.56% -0.34%] index_select perm : Elapsed 0.099 ms (9.897 ms / 100) 9.923 -> 9.801 ( -1.23%) [ +0.00% +0.16% +0.44% / +0.17% -1.23% -1.14%] index_select perm_sorted : Elapsed 0.099 ms (9.923 ms / 100) B = [15, 150, 250] (stride (250, 3750, 1)) A = [50, 150, 250] (stride (250, 12500, 1)) dim = 0 8.739 -> 8.762 ( +0.26%) [ +0.41% +0.00% +0.98% / +0.26% +0.37% +0.58%] index_select const : Elapsed 0.088 ms (8.775 ms / 100) 9.677 -> 9.672 ( -0.05%) [ +0.00% +0.03% +0.06% / -0.04% -0.01% -0.05%] index_select wrap : Elapsed 0.097 ms (9.677 ms / 100) 9.677 -> 9.680 ( +0.03%) [ +0.00% +0.04% +0.19% / +0.11% +0.18% +0.03%] index_select linear : Elapsed 0.097 ms (9.677 ms / 100) 9.766 -> 9.568 ( -2.03%) [ +0.00% +0.07% +0.34% / +0.13% -1.97% -2.03%] index_select reverse : Elapsed 0.098 ms (9.766 ms / 100) 8.730 -> 8.791 ( +0.70%) [ +0.48% +0.32% +0.00% / +0.70% +0.93% +0.86%] index_select skip64 : Elapsed 0.088 ms (8.772 ms / 100) 8.795 -> 8.783 ( -0.14%) [ +0.18% +0.05% +0.00% / -0.14% +0.11% +0.08%] index_select skip256 : Elapsed 0.088 ms (8.811 ms / 100) 9.557 -> 9.420 ( -1.43%) [ +0.00% +0.10% +0.10% / -0.16% -1.43% -1.18%] index_select spread : Elapsed 0.096 ms (9.557 ms / 100) 9.629 -> 9.454 ( -1.82%) [ +0.22% +0.10% +0.00% / +0.00% -1.66% -1.82%] index_select strided 3 : Elapsed 0.096 ms (9.650 ms / 100) 9.583 -> 9.400 ( -1.91%) [ +0.20% +0.00% +0.16% / +0.27% -1.89% -1.91%] index_select strided 5 : Elapsed 0.096 ms (9.602 ms / 100) 9.486 -> 9.432 ( -0.57%) [ +0.00% +0.17% +0.11% / +0.22% -0.57% -0.56%] index_select strided 7 : Elapsed 0.095 ms (9.486 ms / 100) 9.521 -> 9.466 ( -0.58%) [ +0.26% +0.51% +0.00% / +0.39% -0.58% -0.48%] index_select strided 8 : Elapsed 0.095 ms (9.546 ms / 100) 9.914 -> 9.750 ( -1.65%) [ +0.00% +0.44% +0.14% / +0.41% -1.28% -1.65%] index_select strided 16 : Elapsed 0.099 ms (9.914 ms / 100) 9.552 -> 9.531 ( -0.22%) [ +0.44% +0.24% +0.00% / +0.48% -0.04% -0.22%] index_select random : Elapsed 0.096 ms (9.594 ms / 100) 9.445 -> 9.335 ( -1.16%) [ +0.30% +0.00% +0.43% / +0.06% -1.13% -1.16%] index_select random_sorted : Elapsed 0.095 ms (9.473 ms / 100) 9.665 -> 9.586 ( -0.82%) [ +0.27% +0.00% +0.29% / -0.08% -0.80% -0.82%] index_select perm : Elapsed 0.097 ms (9.691 ms / 100) 9.593 -> 9.577 ( -0.17%) [ +0.10% +0.00% +0.03% / -0.04% +0.23% -0.17%] index_select perm_sorted : Elapsed 0.096 ms (9.603 ms / 100) B = [15, 150, 250] (stride (150, 1, 2250)) A = [50, 150, 250] (stride (37500, 1, 150)) dim = 0 10.945 -> 10.952 ( +0.06%) [ +0.00% +0.10% +0.16% / +0.06% +0.13% +0.26%] index_select const : Elapsed 0.109 ms (10.945 ms / 100) 11.776 -> 11.753 ( -0.20%) [ +0.25% +0.07% +0.00% / -0.20% +1.03% +1.05%] index_select wrap : Elapsed 0.118 ms (11.805 ms / 100) 11.724 -> 11.748 ( +0.20%) [ +0.28% +0.00% +0.51% / +0.20% +1.63% +1.56%] index_select linear : Elapsed 0.118 ms (11.757 ms / 100) 11.828 -> 11.837 ( +0.08%) [ +0.00% +0.12% +0.08% / +0.08% +0.94% +0.93%] index_select reverse : Elapsed 0.118 ms (11.828 ms / 100) 10.917 -> 10.920 ( +0.03%) [ +0.00% +0.02% +0.03% / +0.03% +0.65% +0.61%] index_select skip64 : Elapsed 0.109 ms (10.917 ms / 100) 10.950 -> 10.962 ( +0.11%) [ +0.00% +0.12% +0.22% / +0.11% +0.45% +0.43%] index_select skip256 : Elapsed 0.110 ms (10.950 ms / 100) 11.799 -> 11.853 ( +0.46%) [ +0.07% +0.19% +0.00% / +0.46% +0.74% +0.47%] index_select spread : Elapsed 0.118 ms (11.807 ms / 100) 11.811 -> 11.791 ( -0.17%) [ +0.01% +0.00% +0.00% / -0.17% +1.05% +0.74%] index_select strided 3 : Elapsed 0.118 ms (11.812 ms / 100) 11.812 -> 11.863 ( +0.43%) [ +0.23% +0.00% +0.16% / +0.43% +1.37% +1.32%] index_select strided 5 : Elapsed 0.118 ms (11.839 ms / 100) 11.743 -> 11.814 ( +0.60%) [ +0.00% +0.05% +0.51% / +0.60% +1.89% +1.69%] index_select strided 7 : Elapsed 0.117 ms (11.743 ms / 100) 11.795 -> 11.812 ( +0.14%) [ +0.00% +0.19% +0.22% / +0.14% +0.75% +0.84%] index_select strided 8 : Elapsed 0.118 ms (11.795 ms / 100) 11.801 -> 11.819 ( +0.15%) [ +0.19% +0.14% +0.00% / +0.15% +1.15% +0.92%] index_select strided 16 : Elapsed 0.118 ms (11.823 ms / 100) 11.801 -> 11.829 ( +0.24%) [ +0.11% +0.48% +0.00% / +0.24% +0.97% +0.98%] index_select random : Elapsed 0.118 ms (11.814 ms / 100) 11.691 -> 11.662 ( -0.25%) [ +0.00% +0.02% +0.03% / -0.25% +0.53% +0.62%] index_select random_sorted : Elapsed 0.117 ms (11.691 ms / 100) 11.775 -> 11.812 ( +0.31%) [ +0.00% +0.20% +0.12% / +0.31% +1.10% +0.76%] index_select perm : Elapsed 0.118 ms (11.775 ms / 100) 11.803 -> 11.838 ( +0.30%) [ +0.15% +0.00% +0.20% / +0.30% +0.91% +0.68%] index_select perm_sorted : Elapsed 0.118 ms (11.821 ms / 100) out_shape = [50, 15, 250] in_shape = [50, 150, 250] idx_dim = 1 B = [50, 15, 250] (stride (3750, 250, 1)) A = [50, 150, 250] (stride (37500, 1, 150)) dim = 1 10.273 -> 10.190 ( -0.81%) [ +0.04% +0.00% +1.23% / +1.77% -0.81% -0.65%] index_select const : Elapsed 0.103 ms (10.277 ms / 100) 12.166 -> 11.967 ( -1.64%) [ +0.00% +0.07% +0.46% / +0.62% -1.64% -1.00%] index_select wrap : Elapsed 0.122 ms (12.166 ms / 100) 12.073 -> 11.958 ( -0.95%) [ +0.85% +0.00% +0.48% / +1.12% -0.75% -0.95%] index_select linear : Elapsed 0.122 ms (12.176 ms / 100) 11.801 -> 11.908 ( +0.91%) [ +0.00% +0.12% +0.39% / +0.91% +1.30% +1.19%] index_select reverse : Elapsed 0.118 ms (11.801 ms / 100) 10.092 -> 10.213 ( +1.20%) [ +0.00% +2.79% +2.09% / +1.20% +1.64% +1.50%] index_select skip64 : Elapsed 0.101 ms (10.092 ms / 100) 10.265 -> 10.056 ( -2.04%) [ +1.38% +0.03% +0.00% / -0.12% -2.04% -0.44%] index_select skip256 : Elapsed 0.104 ms (10.407 ms / 100) 14.077 -> 13.965 ( -0.80%) [ +0.00% +0.37% +0.42% / +0.13% -0.80% -0.53%] index_select spread : Elapsed 0.141 ms (14.077 ms / 100) 13.064 -> 12.831 ( -1.78%) [ +0.00% +0.04% +0.11% / +0.37% -1.17% -1.78%] index_select strided 3 : Elapsed 0.131 ms (13.064 ms / 100) 13.622 -> 13.401 ( -1.62%) [ +0.00% +0.00% +0.11% / +0.02% -1.62% -1.56%] index_select strided 5 : Elapsed 0.136 ms (13.622 ms / 100) 13.873 -> 13.730 ( -1.03%) [ +0.17% +0.00% +0.14% / +0.18% -1.03% -0.98%] index_select strided 7 : Elapsed 0.139 ms (13.896 ms / 100) 14.071 -> 13.891 ( -1.28%) [ +0.01% +0.08% +0.00% / +0.13% -1.28% -1.15%] index_select strided 8 : Elapsed 0.141 ms (14.073 ms / 100) 14.164 -> 14.110 ( -0.38%) [ +0.00% +0.28% +0.01% / -0.06% -0.30% -0.38%] index_select strided 16 : Elapsed 0.142 ms (14.164 ms / 100) 14.305 -> 14.163 ( -0.99%) [ +0.06% +0.01% +0.00% / -0.22% -0.79% -0.99%] index_select strided 64 : Elapsed 0.143 ms (14.313 ms / 100) 14.275 -> 14.190 ( -0.60%) [ +0.00% +0.74% +0.47% / +0.39% -0.60% -0.31%] index_select strided 100 : Elapsed 0.143 ms (14.275 ms / 100) 14.289 -> 14.020 ( -1.88%) [ +0.05% +0.00% +0.06% / +0.15% -1.78% -1.88%] index_select random : Elapsed 0.143 ms (14.296 ms / 100) 13.794 -> 13.682 ( -0.81%) [ +0.00% +0.23% +0.45% / +0.22% -0.72% -0.81%] index_select random_sorted : Elapsed 0.138 ms (13.794 ms / 100) 14.241 -> 14.101 ( -0.98%) [ +0.24% +0.00% +0.18% / +0.25% -0.96% -0.98%] index_select perm : Elapsed 0.143 ms (14.275 ms / 100) 13.762 -> 13.632 ( -0.94%) [ +0.25% +0.00% +0.41% / +0.23% -0.59% -0.94%] index_select perm_sorted : Elapsed 0.138 ms (13.796 ms / 100) B = [50, 15, 250] (stride (3750, 1, 15)) A = [50, 150, 250] (stride (250, 12500, 1)) dim = 1 4.458 -> 4.463 ( +0.11%) [ +0.00% +0.22% +0.22% / +0.11% +0.29% +0.22%] index_select const : Elapsed 0.045 ms (4.458 ms / 100) 4.844 -> 4.840 ( -0.08%) [ +0.00% +0.12% +0.02% / -0.08% +0.27% +0.12%] index_select wrap : Elapsed 0.048 ms (4.844 ms / 100) 4.838 -> 4.838 ( +0.00%) [ +0.14% +0.19% +0.00% / +0.00% +0.48% +0.48%] index_select linear : Elapsed 0.048 ms (4.845 ms / 100) 4.857 -> 4.858 ( +0.02%) [ +0.00% +0.06% +0.00% / +0.02% +0.08% +0.25%] index_select reverse : Elapsed 0.049 ms (4.857 ms / 100) 4.458 -> 4.467 ( +0.20%) [ +0.00% +0.27% +0.11% / +0.20% +0.40% +0.34%] index_select skip64 : Elapsed 0.045 ms (4.458 ms / 100) 4.472 -> 4.470 ( -0.04%) [ +0.00% +0.07% +0.04% / +0.13% +0.07% -0.04%] index_select skip256 : Elapsed 0.045 ms (4.472 ms / 100) 4.854 -> 4.855 ( +0.02%) [ +0.02% +0.00% +0.10% / +0.02% +0.27% +0.06%] index_select spread : Elapsed 0.049 ms (4.855 ms / 100) 4.867 -> 4.867 ( +0.00%) [ +0.00% +0.25% +0.04% / +0.02% +0.00% +0.21%] index_select strided 3 : Elapsed 0.049 ms (4.867 ms / 100) 4.866 -> 4.871 ( +0.10%) [ +0.04% +0.14% +0.00% / +0.10% +0.29% +0.31%] index_select strided 5 : Elapsed 0.049 ms (4.868 ms / 100) 4.871 -> 4.868 ( -0.06%) [ +0.02% +0.00% +0.06% / +0.02% +0.00% -0.06%] index_select strided 7 : Elapsed 0.049 ms (4.872 ms / 100) 4.794 -> 4.799 ( +0.10%) [ +0.08% +0.23% +0.00% / +0.15% +0.10% +0.27%] index_select strided 8 : Elapsed 0.048 ms (4.798 ms / 100) 4.830 -> 4.827 ( -0.06%) [ +0.00% +0.48% +0.14% / -0.06% +0.46% +0.43%] index_select strided 16 : Elapsed 0.048 ms (4.830 ms / 100) 4.855 -> 4.854 ( -0.02%) [ +0.00% +0.43% +0.16% / +0.27% +0.12% -0.02%] index_select strided 64 : Elapsed 0.049 ms (4.855 ms / 100) 4.613 -> 4.612 ( -0.02%) [ +0.50% +0.04% +0.00% / +0.09% -0.02% +0.00%] index_select strided 100 : Elapsed 0.046 ms (4.636 ms / 100) 4.865 -> 4.862 ( -0.06%) [ +0.23% +0.00% +0.25% / +0.27% -0.06% +0.06%] index_select random : Elapsed 0.049 ms (4.876 ms / 100) 4.849 -> 4.855 ( +0.12%) [ +0.29% +0.10% +0.00% / +0.12% +0.31% +0.19%] index_select random_sorted : Elapsed 0.049 ms (4.863 ms / 100) 4.870 -> 4.863 ( -0.14%) [ +0.00% +0.08% +0.12% / -0.02% -0.02% -0.14%] index_select perm : Elapsed 0.049 ms (4.870 ms / 100) 4.835 -> 4.838 ( +0.06%) [ +0.04% +0.00% +0.00% / +0.12% +0.23% +0.06%] index_select perm_sorted : Elapsed 0.048 ms (4.837 ms / 100) B = [50, 15, 250] (stride (3750, 1, 15)) A = [50, 150, 250] (stride (1, 12500, 50)) dim = 1 5.179 -> 5.117 ( -1.20%) [ +0.27% +0.17% +0.00% / +0.02% -1.20% -1.06%] index_select const : Elapsed 0.052 ms (5.193 ms / 100) 5.423 -> 5.391 ( -0.59%) [ +0.26% +0.00% +0.31% / +0.13% -0.59% -0.50%] index_select wrap : Elapsed 0.054 ms (5.437 ms / 100) 5.428 -> 5.394 ( -0.63%) [ +0.09% +0.06% +0.00% / +0.17% -0.63% -0.59%] index_select linear : Elapsed 0.054 ms (5.433 ms / 100) 5.433 -> 5.409 ( -0.44%) [ +0.00% +0.09% +0.07% / +0.17% -0.35% -0.44%] index_select reverse : Elapsed 0.054 ms (5.433 ms / 100) 5.179 -> 5.151 ( -0.54%) [ +0.06% +0.00% +0.14% / -0.10% -0.50% -0.54%] index_select skip64 : Elapsed 0.052 ms (5.182 ms / 100) 5.304 -> 5.118 ( -3.51%) [ +0.23% +0.00% +0.04% / +0.08% -3.51% -3.36%] index_select skip256 : Elapsed 0.053 ms (5.316 ms / 100) 5.506 -> 5.407 ( -1.80%) [ +0.00% +0.25% +0.15% / +0.18% -1.80% -1.76%] index_select spread : Elapsed 0.055 ms (5.506 ms / 100) 5.419 -> 5.418 ( -0.02%) [ +0.04% +0.00% +0.26% / +0.20% -0.02% +0.06%] index_select strided 3 : Elapsed 0.054 ms (5.421 ms / 100) 5.438 -> 5.411 ( -0.50%) [ +0.00% +0.09% +0.18% / +0.13% -0.33% -0.50%] index_select strided 5 : Elapsed 0.054 ms (5.438 ms / 100) 5.497 -> 5.461 ( -0.65%) [ +0.00% +0.18% +0.31% / +0.00% -0.62% -0.65%] index_select strided 7 : Elapsed 0.055 ms (5.497 ms / 100) 5.438 -> 5.461 ( +0.42%) [ +0.37% +0.00% +0.17% / +0.42% +1.18% +1.10%] index_select strided 8 : Elapsed 0.055 ms (5.458 ms / 100) 5.462 -> 5.469 ( +0.13%) [ +0.00% +0.29% +0.29% / +0.13% +0.51% +0.64%] index_select strided 16 : Elapsed 0.055 ms (5.462 ms / 100) 5.432 -> 5.440 ( +0.15%) [ +0.28% +0.00% +0.15% / +0.15% +0.55% +0.35%] index_select strided 64 : Elapsed 0.054 ms (5.447 ms / 100) 5.390 -> 5.328 ( -1.15%) [ +0.00% +0.02% +0.04% / +0.13% -0.96% -1.15%] index_select strided 100 : Elapsed 0.054 ms (5.390 ms / 100) 5.437 -> 5.434 ( -0.06%) [ +0.02% +0.00% +0.09% / +0.07% +0.13% -0.06%] index_select random : Elapsed 0.054 ms (5.438 ms / 100) 5.411 -> 5.402 ( -0.17%) [ +0.09% +0.00% +0.07% / +0.33% -0.17% -0.07%] index_select random_sorted : Elapsed 0.054 ms (5.416 ms / 100) 5.479 -> 5.447 ( -0.58%) [ +0.00% +0.07% +0.05% / +0.18% -0.51% -0.58%] index_select perm : Elapsed 0.055 ms (5.479 ms / 100) 5.444 -> 5.424 ( -0.37%) [ +0.00% +0.11% +0.20% / +0.02% -0.37% -0.31%] index_select perm_sorted : Elapsed 0.054 ms (5.444 ms / 100) B = [50, 15, 250] (stride (250, 12500, 1)) A = [50, 150, 250] (stride (37500, 250, 1)) dim = 1 4.078 -> 4.073 ( -0.12%) [ +0.00% +0.29% +0.39% / +0.10% -0.12% -0.02%] index_select const : Elapsed 0.041 ms (4.078 ms / 100) 4.701 -> 4.694 ( -0.15%) [ +0.06% +0.00% +0.09% / +0.09% -0.15% -0.13%] index_select wrap : Elapsed 0.047 ms (4.704 ms / 100) 4.704 -> 4.694 ( -0.21%) [ +0.06% +0.00% +0.11% / -0.17% -0.21% -0.06%] index_select linear : Elapsed 0.047 ms (4.707 ms / 100) 4.692 -> 4.695 ( +0.06%) [ +0.00% +0.28% +0.09% / +0.06% +0.06% +0.06%] index_select reverse : Elapsed 0.047 ms (4.692 ms / 100) 4.081 -> 4.073 ( -0.20%) [ +0.22% +0.00% +0.07% / +0.22% -0.20% -0.05%] index_select skip64 : Elapsed 0.041 ms (4.090 ms / 100) 4.081 -> 4.079 ( -0.05%) [ +0.00% +0.00% +0.27% / +0.10% -0.05% +0.07%] index_select skip256 : Elapsed 0.041 ms (4.081 ms / 100) 4.709 -> 4.699 ( -0.21%) [ +0.17% +0.00% +0.06% / +0.06% -0.19% -0.21%] index_select spread : Elapsed 0.047 ms (4.717 ms / 100) 4.722 -> 4.700 ( -0.47%) [ +0.02% +0.17% +0.00% / +0.08% -0.47% -0.40%] index_select strided 3 : Elapsed 0.047 ms (4.723 ms / 100) 4.712 -> 4.701 ( -0.23%) [ +0.08% +0.17% +0.00% / +0.13% -0.23% -0.19%] index_select strided 5 : Elapsed 0.047 ms (4.716 ms / 100) 4.700 -> 4.691 ( -0.19%) [ +0.00% +0.30% +0.15% / -0.02% -0.02% -0.19%] index_select strided 7 : Elapsed 0.047 ms (4.700 ms / 100) 4.717 -> 4.724 ( +0.15%) [ +0.02% +0.13% +0.00% / +0.15% +0.38% +0.47%] index_select strided 8 : Elapsed 0.047 ms (4.718 ms / 100) 4.777 -> 4.742 ( -0.73%) [ +0.02% +0.10% +0.00% / +0.19% -0.65% -0.73%] index_select strided 16 : Elapsed 0.048 ms (4.778 ms / 100) 4.721 -> 4.715 ( -0.13%) [ +0.02% +0.02% +0.00% / -0.02% -0.13% -0.11%] index_select strided 64 : Elapsed 0.047 ms (4.722 ms / 100) 4.200 -> 4.173 ( -0.64%) [ +0.12% +0.00% +0.10% / -0.10% -0.64% -0.45%] index_select strided 100 : Elapsed 0.042 ms (4.205 ms / 100) 4.658 -> 4.660 ( +0.04%) [ +0.09% +0.00% +0.19% / +0.04% +0.71% +0.67%] index_select random : Elapsed 0.047 ms (4.662 ms / 100) 4.670 -> 4.675 ( +0.11%) [ +0.02% +0.15% +0.00% / +0.11% +0.47% +0.36%] index_select random_sorted : Elapsed 0.047 ms (4.671 ms / 100) 4.691 -> 4.693 ( +0.04%) [ +0.06% +0.11% +0.00% / +0.04% +0.45% +0.30%] index_select perm : Elapsed 0.047 ms (4.694 ms / 100) 4.692 -> 4.689 ( -0.06%) [ +0.15% +0.00% +0.04% / -0.06% +0.70% +0.64%] index_select perm_sorted : Elapsed 0.047 ms (4.699 ms / 100) B = [50, 15, 250] (stride (1, 12500, 50)) A = [50, 150, 250] (stride (250, 12500, 1)) dim = 1 4.498 -> 4.504 ( +0.13%) [ +0.00% +0.36% +0.16% / +0.29% +0.24% +0.13%] index_select const : Elapsed 0.045 ms (4.498 ms / 100) 4.869 -> 4.858 ( -0.23%) [ +0.00% +0.02% +0.14% / -0.23% +0.14% +0.06%] index_select wrap : Elapsed 0.049 ms (4.869 ms / 100) 4.863 -> 4.861 ( -0.04%) [ +0.06% +0.00% +0.02% / -0.04% +0.04% +0.19%] index_select linear : Elapsed 0.049 ms (4.866 ms / 100) 4.870 -> 4.878 ( +0.16%) [ +0.10% +0.00% +0.18% / +0.21% +0.16% +0.16%] index_select reverse : Elapsed 0.049 ms (4.875 ms / 100) 4.499 -> 4.499 ( +0.00%) [ +0.02% +0.00% +0.16% / +0.00% +0.27% +0.36%] index_select skip64 : Elapsed 0.045 ms (4.500 ms / 100) 4.504 -> 4.502 ( -0.04%) [ +0.13% +0.16% +0.00% / -0.04% -0.02% -0.02%] index_select skip256 : Elapsed 0.045 ms (4.510 ms / 100) 4.878 -> 4.872 ( -0.12%) [ +0.21% +0.00% +0.00% / +0.02% +0.04% -0.12%] index_select spread : Elapsed 0.049 ms (4.888 ms / 100) 4.873 -> 4.870 ( -0.06%) [ +0.00% +0.08% +0.00% / +0.04% +0.33% -0.06%] index_select strided 3 : Elapsed 0.049 ms (4.873 ms / 100) 4.881 -> 4.885 ( +0.08%) [ +0.04% +0.00% +0.06% / +0.16% +0.08% +0.18%] index_select strided 5 : Elapsed 0.049 ms (4.883 ms / 100) 4.876 -> 4.873 ( -0.06%) [ +0.10% +0.02% +0.00% / -0.06% +0.21% +0.21%] index_select strided 7 : Elapsed 0.049 ms (4.881 ms / 100) 4.853 -> 4.842 ( -0.23%) [ +0.10% +0.21% +0.00% / +0.06% -0.23% -0.16%] index_select strided 8 : Elapsed 0.049 ms (4.858 ms / 100) 4.872 -> 4.875 ( +0.06%) [ +0.25% +0.00% +0.06% / +0.08% +0.35% +0.06%] index_select strided 16 : Elapsed 0.049 ms (4.884 ms / 100) 4.872 -> 4.870 ( -0.04%) [ +0.12% +0.00% +0.16% / -0.04% -0.04% +0.12%] index_select strided 64 : Elapsed 0.049 ms (4.878 ms / 100) 4.574 -> 4.576 ( +0.04%) [ +0.04% +0.04% +0.00% / +0.17% +0.04% +0.04%] index_select strided 100 : Elapsed 0.046 ms (4.576 ms / 100) 4.827 -> 4.826 ( -0.02%) [ +0.19% +0.00% +0.04% / -0.02% +0.15% +0.21%] index_select random : Elapsed 0.048 ms (4.836 ms / 100) 4.840 -> 4.838 ( -0.04%) [ +0.21% +0.00% +0.14% / -0.04% +0.17% +0.06%] index_select random_sorted : Elapsed 0.048 ms (4.850 ms / 100) 4.874 -> 4.880 ( +0.12%) [ +0.25% +0.00% +0.06% / +0.14% +0.12% +0.18%] index_select perm : Elapsed 0.049 ms (4.886 ms / 100) 4.878 -> 4.873 ( -0.10%) [ +0.27% +0.00% +0.14% / +0.10% -0.10% +0.00%] index_select perm_sorted : Elapsed 0.049 ms (4.891 ms / 100) B = [50, 15, 250] (stride (1, 12500, 50)) A = [50, 150, 250] (stride (1, 50, 7500)) dim = 1 5.118 -> 5.116 ( -0.04%) [ +0.16% +0.00% +0.06% / +0.35% +0.08% -0.04%] index_select const : Elapsed 0.051 ms (5.126 ms / 100) 5.383 -> 5.384 ( +0.02%) [ +0.11% +0.00% +0.13% / +0.02% +0.48% +0.32%] index_select wrap : Elapsed 0.054 ms (5.389 ms / 100) 5.374 -> 5.391 ( +0.32%) [ +0.17% +0.09% +0.00% / +0.32% +0.54% +0.54%] index_select linear : Elapsed 0.054 ms (5.383 ms / 100) 5.425 -> 5.405 ( -0.37%) [ +0.02% +0.06% +0.00% / +0.18% -0.31% -0.37%] index_select reverse : Elapsed 0.054 ms (5.426 ms / 100) 5.144 -> 5.150 ( +0.12%) [ +0.10% +0.17% +0.00% / +0.12% +0.84% +0.86%] index_select skip64 : Elapsed 0.051 ms (5.149 ms / 100) 5.206 -> 5.159 ( -0.90%) [ +0.00% +0.04% +0.15% / +0.19% -0.83% -0.90%] index_select skip256 : Elapsed 0.052 ms (5.206 ms / 100) 5.471 -> 5.441 ( -0.55%) [ +0.00% +0.04% +0.05% / -0.07% -0.48% -0.55%] index_select spread : Elapsed 0.055 ms (5.471 ms / 100) 5.465 -> 5.418 ( -0.86%) [ +0.00% +0.02% +0.11% / -0.07% -0.71% -0.86%] index_select strided 3 : Elapsed 0.055 ms (5.465 ms / 100) 5.435 -> 5.404 ( -0.57%) [ +0.04% +0.13% +0.00% / -0.09% -0.57% -0.55%] index_select strided 5 : Elapsed 0.054 ms (5.437 ms / 100) 5.414 -> 5.424 ( +0.18%) [ +0.20% +0.28% +0.00% / +0.44% +0.18% +0.52%] index_select strided 7 : Elapsed 0.054 ms (5.425 ms / 100) 5.482 -> 5.442 ( -0.73%) [ +0.00% +0.20% +0.00% / +0.02% -0.58% -0.73%] index_select strided 8 : Elapsed 0.055 ms (5.482 ms / 100) 5.457 -> 5.446 ( -0.20%) [ +0.00% +0.02% +0.00% / -0.07% -0.20% -0.13%] index_select strided 16 : Elapsed 0.055 ms (5.457 ms / 100) 5.506 -> 5.425 ( -1.47%) [ +0.20% +0.27% +0.00% / +0.24% -1.22% -1.47%] index_select strided 64 : Elapsed 0.055 ms (5.517 ms / 100) 5.176 -> 5.177 ( +0.02%) [ +0.17% +0.04% +0.00% / +0.02% +0.46% +0.43%] index_select strided 100 : Elapsed 0.052 ms (5.185 ms / 100) 5.414 -> 5.402 ( -0.22%) [ +0.09% +0.04% +0.00% / +0.06% -0.09% -0.22%] index_select random : Elapsed 0.054 ms (5.419 ms / 100) 5.446 -> 5.402 ( -0.81%) [ +0.24% +0.15% +0.00% / +0.15% -0.81% -0.64%] index_select random_sorted : Elapsed 0.055 ms (5.459 ms / 100) 5.419 -> 5.418 ( -0.02%) [ +0.07% +0.00% +0.28% / +0.00% -0.02% +0.02%] index_select perm : Elapsed 0.054 ms (5.423 ms / 100) 5.422 -> 5.408 ( -0.26%) [ +0.11% +0.00% +0.02% / -0.26% +0.52% +0.42%] index_select perm_sorted : Elapsed 0.054 ms (5.428 ms / 100) B = [50, 15, 250] (stride (15, 1, 750)) A = [50, 150, 250] (stride (37500, 250, 1)) dim = 1 4.711 -> 4.685 ( -0.55%) [ +0.04% +0.00% +0.08% / +0.13% -0.28% -0.55%] index_select const : Elapsed 0.047 ms (4.713 ms / 100) 5.013 -> 4.990 ( -0.46%) [ +0.14% +0.00% +0.16% / +0.06% -0.46% -0.36%] index_select wrap : Elapsed 0.050 ms (5.020 ms / 100) 5.023 -> 4.989 ( -0.68%) [ +0.00% +0.08% +0.10% / +0.16% -0.64% -0.68%] index_select linear : Elapsed 0.050 ms (5.023 ms / 100) 5.003 -> 5.003 ( +0.00%) [ +0.10% +0.04% +0.00% / +0.06% +0.04% +0.00%] index_select reverse : Elapsed 0.050 ms (5.008 ms / 100) 4.710 -> 4.705 ( -0.11%) [ +0.00% +0.02% +0.15% / -0.06% -0.11% -0.02%] index_select skip64 : Elapsed 0.047 ms (4.710 ms / 100) 4.725 -> 4.693 ( -0.68%) [ +0.00% +0.02% +0.11% / -0.13% -0.68% -0.59%] index_select skip256 : Elapsed 0.047 ms (4.725 ms / 100) 5.004 -> 4.994 ( -0.20%) [ +0.10% +0.00% +0.04% / +0.24% -0.20% +0.14%] index_select spread : Elapsed 0.050 ms (5.009 ms / 100) 5.014 -> 5.005 ( -0.18%) [ +0.08% +0.08% +0.00% / +0.06% -0.18% -0.08%] index_select strided 3 : Elapsed 0.050 ms (5.018 ms / 100) 5.011 -> 5.011 ( +0.00%) [ +0.14% +0.00% +0.12% / +0.10% +0.20% +0.00%] index_select strided 5 : Elapsed 0.050 ms (5.018 ms / 100) 5.002 -> 5.007 ( +0.10%) [ +0.00% +0.14% +0.02% / +0.10% +0.10% +0.28%] index_select strided 7 : Elapsed 0.050 ms (5.002 ms / 100) 5.010 -> 5.002 ( -0.16%) [ +0.06% +0.00% +0.08% / +0.18% -0.08% -0.16%] index_select strided 8 : Elapsed 0.050 ms (5.013 ms / 100) 5.039 -> 5.041 ( +0.04%) [ +0.04% +0.00% +0.10% / +0.06% +0.04% +0.04%] index_select strided 16 : Elapsed 0.050 ms (5.041 ms / 100) 5.029 -> 5.004 ( -0.50%) [ +0.04% +0.00% +0.04% / -0.06% -0.38% -0.50%] index_select strided 64 : Elapsed 0.050 ms (5.031 ms / 100) 4.853 -> 4.804 ( -1.01%) [ +0.02% +0.08% +0.00% / +0.00% -1.01% -0.87%] index_select strided 100 : Elapsed 0.049 ms (4.854 ms / 100) 5.008 -> 5.006 ( -0.04%) [ +0.08% +0.12% +0.00% / -0.04% +0.02% +0.10%] index_select random : Elapsed 0.050 ms (5.012 ms / 100) 5.008 -> 5.007 ( -0.02%) [ +0.12% +0.00% +0.12% / -0.02% +0.18% +0.14%] index_select random_sorted : Elapsed 0.050 ms (5.014 ms / 100) 5.016 -> 5.014 ( -0.04%) [ +0.18% +0.10% +0.00% / +0.12% +0.00% -0.04%] index_select perm : Elapsed 0.050 ms (5.025 ms / 100) 5.026 -> 5.029 ( +0.06%) [ +0.00% +0.16% +0.16% / +0.38% +0.06% +0.24%] index_select perm_sorted : Elapsed 0.050 ms (5.026 ms / 100) B = [50, 15, 250] (stride (15, 1, 750)) A = [50, 150, 250] (stride (37500, 1, 150)) dim = 1 19.089 -> 19.088 ( -0.01%) [ +0.00% +0.05% +0.21% / -0.01% +0.68% +0.70%] index_select const : Elapsed 0.191 ms (19.089 ms / 100) 19.133 -> 19.085 ( -0.25%) [ +0.04% +0.18% +0.00% / -0.25% +0.79% +0.60%] index_select wrap : Elapsed 0.191 ms (19.140 ms / 100) 19.092 -> 19.139 ( +0.25%) [ +0.15% +0.00% +0.03% / +0.25% +0.95% +1.01%] index_select linear : Elapsed 0.191 ms (19.121 ms / 100) 19.070 -> 19.120 ( +0.26%) [ +0.30% +0.42% +0.00% / +0.26% +0.96% +0.53%] index_select reverse : Elapsed 0.191 ms (19.128 ms / 100) 19.091 -> 19.115 ( +0.13%) [ +0.07% +0.00% +0.01% / +0.13% +0.78% +0.87%] index_select skip64 : Elapsed 0.191 ms (19.104 ms / 100) 19.123 -> 19.136 ( +0.07%) [ +0.15% +0.14% +0.00% / +0.07% +0.72% +0.71%] index_select skip256 : Elapsed 0.192 ms (19.151 ms / 100) 19.776 -> 19.554 ( -1.12%) [ +0.00% +0.23% +0.16% / -0.01% -1.12% -0.98%] index_select spread : Elapsed 0.198 ms (19.776 ms / 100) 19.418 -> 19.250 ( -0.87%) [ +0.00% +0.26% +0.16% / +0.02% -0.87% -0.61%] index_select strided 3 : Elapsed 0.194 ms (19.418 ms / 100) 19.560 -> 19.327 ( -1.19%) [ +0.20% +0.34% +0.00% / +0.25% -1.01% -1.19%] index_select strided 5 : Elapsed 0.196 ms (19.599 ms / 100) 19.622 -> 19.457 ( -0.84%) [ +0.35% +0.15% +0.00% / +0.23% -0.75% -0.84%] index_select strided 7 : Elapsed 0.197 ms (19.690 ms / 100) 19.618 -> 19.538 ( -0.41%) [ +0.00% +0.17% +0.36% / +0.08% -0.36% -0.41%] index_select strided 8 : Elapsed 0.196 ms (19.618 ms / 100) 19.671 -> 19.632 ( -0.20%) [ +0.08% +0.07% +0.00% / +0.25% -0.11% -0.20%] index_select strided 16 : Elapsed 0.197 ms (19.687 ms / 100) 19.565 -> 19.552 ( -0.07%) [ +0.00% +0.39% +0.24% / +0.24% +0.03% -0.07%] index_select strided 64 : Elapsed 0.196 ms (19.565 ms / 100) 19.607 -> 19.631 ( +0.12%) [ +0.34% +0.11% +0.00% / +0.12% +0.75% +0.70%] index_select strided 100 : Elapsed 0.197 ms (19.673 ms / 100) 19.589 -> 19.607 ( +0.09%) [ +0.00% +0.29% +0.37% / +0.46% +0.13% +0.09%] index_select random : Elapsed 0.196 ms (19.589 ms / 100) 19.641 -> 19.477 ( -0.83%) [ +0.00% +0.23% +0.19% / +0.25% -0.73% -0.83%] index_select random_sorted : Elapsed 0.196 ms (19.641 ms / 100) 19.571 -> 19.593 ( +0.11%) [ +0.14% +0.00% +0.22% / +0.18% +0.11% +0.37%] index_select perm : Elapsed 0.196 ms (19.598 ms / 100) 19.546 -> 19.514 ( -0.16%) [ +0.00% +0.27% +0.48% / +0.34% -0.16% -0.04%] index_select perm_sorted : Elapsed 0.195 ms (19.546 ms / 100) B = [50, 15, 250] (stride (1, 50, 750)) A = [50, 150, 250] (stride (37500, 1, 150)) dim = 1 13.240 -> 12.686 ( -4.18%) [ +0.25% +0.52% +0.00% / +0.04% -4.18% -4.18%] index_select const : Elapsed 0.133 ms (13.273 ms / 100) 13.763 -> 13.245 ( -3.76%) [ +0.13% +0.38% +0.00% / +0.10% -3.52% -3.76%] index_select wrap : Elapsed 0.138 ms (13.781 ms / 100) 13.745 -> 13.276 ( -3.41%) [ +0.00% +0.23% +0.36% / +0.76% -3.32% -3.41%] index_select linear : Elapsed 0.137 ms (13.745 ms / 100) 13.609 -> 13.268 ( -2.51%) [ +0.00% +0.93% +0.63% / +0.77% -2.40% -2.51%] index_select reverse : Elapsed 0.136 ms (13.609 ms / 100) good 13.253 -> 12.582 ( -5.06%) [ +0.66% +0.00% +0.63% / +0.66% -5.06% -3.50%] index_select skip64 : Elapsed 0.133 ms (13.341 ms / 100) 13.313 -> 12.669 ( -4.84%) [ +0.44% +0.00% +0.58% / +0.47% -4.84% -4.50%] index_select skip256 : Elapsed 0.134 ms (13.372 ms / 100) 14.846 -> 14.709 ( -0.92%) [ +0.23% +0.29% +0.00% / +0.23% -0.67% -0.92%] index_select spread : Elapsed 0.149 ms (14.880 ms / 100) 14.070 -> 13.533 ( -3.82%) [ +0.00% +0.00% +0.10% / +0.06% -3.82% -3.62%] index_select strided 3 : Elapsed 0.141 ms (14.070 ms / 100) 14.466 -> 13.913 ( -3.82%) [ +0.50% +0.00% +0.13% / +0.14% -3.66% -3.82%] index_select strided 5 : Elapsed 0.145 ms (14.538 ms / 100) 14.710 -> 14.303 ( -2.77%) [ +0.58% +0.12% +0.00% / -0.01% -2.77% -2.36%] index_select strided 7 : Elapsed 0.148 ms (14.796 ms / 100) 14.847 -> 14.519 ( -2.21%) [ +0.43% +0.34% +0.00% / +0.24% -2.21% -2.00%] index_select strided 8 : Elapsed 0.149 ms (14.911 ms / 100) 14.935 -> 14.789 ( -0.98%) [ +0.16% +0.01% +0.00% / -0.29% -0.98% -0.94%] index_select strided 16 : Elapsed 0.150 ms (14.959 ms / 100) 15.060 -> 14.787 ( -1.81%) [ +0.00% +0.44% +0.15% / +0.11% -1.64% -1.81%] index_select strided 64 : Elapsed 0.151 ms (15.060 ms / 100) 15.040 -> 14.900 ( -0.93%) [ +0.11% +0.00% +0.10% / +0.15% -0.93% -0.70%] index_select strided 100 : Elapsed 0.151 ms (15.056 ms / 100) 14.890 -> 14.742 ( -0.99%) [ +0.34% +0.00% +0.09% / +0.01% -0.99% -0.96%] index_select random : Elapsed 0.149 ms (14.940 ms / 100) 14.410 -> 14.411 ( +0.01%) [ +0.10% +0.03% +0.00% / +0.09% +0.01% +0.10%] index_select random_sorted : Elapsed 0.144 ms (14.424 ms / 100) 14.859 -> 14.609 ( -1.68%) [ +0.73% +0.00% +0.43% / -0.02% -1.68% -1.46%] index_select perm : Elapsed 0.150 ms (14.967 ms / 100) 14.511 -> 14.152 ( -2.47%) [ +0.00% +0.14% +0.28% / -0.06% -2.25% -2.47%] index_select perm_sorted : Elapsed 0.145 ms (14.511 ms / 100) out_shape = [50, 150, 15] in_shape = [50, 150, 250] idx_dim = 2 B = [50, 150, 15] (stride (15, 750, 1)) A = [50, 150, 250] (stride (1, 12500, 50)) dim = 2 3.350 -> 3.355 ( +0.15%) [ +0.12% +0.00% +0.15% / +0.15% +2.51% +2.69%] index_select const : Elapsed 0.034 ms (3.354 ms / 100) 3.579 -> 3.545 ( -0.95%) [ +0.22% +0.08% +0.00% / +0.06% -0.95% -0.92%] index_select wrap : Elapsed 0.036 ms (3.587 ms / 100) 3.593 -> 3.550 ( -1.20%) [ +0.19% +0.00% +0.17% / +0.28% -1.20% -1.14%] index_select linear : Elapsed 0.036 ms (3.600 ms / 100) 3.618 -> 3.556 ( -1.71%) [ +0.17% +0.00% +0.36% / +0.33% -1.71% -1.52%] index_select reverse : Elapsed 0.036 ms (3.624 ms / 100) 3.428 -> 3.418 ( -0.29%) [ +0.00% +0.06% +0.06% / +0.03% -0.29% -0.09%] index_select skip64 : Elapsed 0.034 ms (3.428 ms / 100) 3.427 -> 3.418 ( -0.26%) [ +0.18% +0.09% +0.00% / +0.00% -0.12% -0.26%] index_select skip256 : Elapsed 0.034 ms (3.433 ms / 100) 3.583 -> 3.590 ( +0.20%) [ +0.00% +0.11% +0.22% / +0.20% +1.14% +1.17%] index_select spread : Elapsed 0.036 ms (3.583 ms / 100) 3.577 -> 3.584 ( +0.20%) [ +0.08% +0.00% +0.08% / +0.20% +1.54% +1.45%] index_select strided 3 : Elapsed 0.036 ms (3.580 ms / 100) 3.595 -> 3.594 ( -0.03%) [ +0.19% +0.00% +0.06% / -0.03% +0.86% +0.83%] index_select strided 5 : Elapsed 0.036 ms (3.602 ms / 100) 3.588 -> 3.589 ( +0.03%) [ +0.14% +0.33% +0.00% / +0.03% +0.75% +0.98%] index_select strided 7 : Elapsed 0.036 ms (3.593 ms / 100) 3.589 -> 3.603 ( +0.39%) [ +0.25% +0.00% +0.08% / +0.39% +1.20% +1.50%] index_select strided 8 : Elapsed 0.036 ms (3.598 ms / 100) 3.550 -> 3.563 ( +0.37%) [ +0.31% +0.37% +0.00% / +0.37% +2.11% +2.23%] index_select strided 16 : Elapsed 0.036 ms (3.561 ms / 100) 3.547 -> 3.547 ( +0.00%) [ +0.00% +0.17% +0.08% / +0.00% +1.69% +1.89%] index_select strided 64 : Elapsed 0.035 ms (3.547 ms / 100) 3.473 -> 3.471 ( -0.06%) [ +0.20% +0.06% +0.00% / +0.12% -0.06% -0.06%] index_select strided 100 : Elapsed 0.035 ms (3.480 ms / 100) 3.537 -> 3.536 ( -0.03%) [ +0.00% +0.17% +0.03% / -0.03% +3.48% +3.36%] index_select random : Elapsed 0.035 ms (3.537 ms / 100) 3.563 -> 3.571 ( +0.22%) [ +0.20% +0.00% +0.39% / +0.22% +3.09% +3.00%] index_select random_sorted : Elapsed 0.036 ms (3.570 ms / 100) 3.567 -> 3.568 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +1.12% +1.18%] index_select perm : Elapsed 0.036 ms (3.569 ms / 100) 3.593 -> 3.597 ( +0.11%) [ +0.14% +0.25% +0.00% / +0.31% +0.11% +0.28%] index_select perm_sorted : Elapsed 0.036 ms (3.598 ms / 100) B = [50, 150, 15] (stride (150, 1, 7500)) A = [50, 150, 250] (stride (1, 12500, 50)) dim = 2 5.614 -> 5.611 ( -0.05%) [ +0.02% +0.02% +0.00% / -0.05% +0.27% +0.34%] index_select const : Elapsed 0.056 ms (5.615 ms / 100) 5.876 -> 5.879 ( +0.05%) [ +0.03% +0.00% +0.12% / +0.05% +1.04% +1.09%] index_select wrap : Elapsed 0.059 ms (5.878 ms / 100) 5.876 -> 5.884 ( +0.14%) [ +0.12% +0.00% +0.12% / +0.14% +1.02% +0.90%] index_select linear : Elapsed 0.059 ms (5.883 ms / 100) 5.878 -> 5.877 ( -0.02%) [ +0.00% +0.00% +0.09% / -0.02% +0.78% +0.90%] index_select reverse : Elapsed 0.059 ms (5.878 ms / 100) 5.610 -> 5.611 ( +0.02%) [ +0.02% +0.00% +0.07% / +0.02% +0.39% +0.36%] index_select skip64 : Elapsed 0.056 ms (5.611 ms / 100) 5.608 -> 5.612 ( +0.07%) [ +0.12% +0.02% +0.00% / +0.07% +0.45% +0.46%] index_select skip256 : Elapsed 0.056 ms (5.615 ms / 100) 5.931 -> 5.922 ( -0.15%) [ +0.10% +0.00% +0.05% / -0.15% +0.05% +0.24%] index_select spread : Elapsed 0.059 ms (5.937 ms / 100) 5.937 -> 5.937 ( +0.00%) [ +0.03% +0.02% +0.00% / +0.12% +0.12% +0.00%] index_select strided 3 : Elapsed 0.059 ms (5.939 ms / 100) 5.916 -> 5.923 ( +0.12%) [ +0.17% +0.00% +0.02% / +0.12% +0.49% +0.64%] index_select strided 5 : Elapsed 0.059 ms (5.926 ms / 100) 5.922 -> 5.912 ( -0.17%) [ +0.00% +0.00% +0.08% / -0.17% +0.56% +0.64%] index_select strided 7 : Elapsed 0.059 ms (5.922 ms / 100) 5.919 -> 5.939 ( +0.34%) [ +0.10% +0.19% +0.00% / +0.34% +0.59% +0.61%] index_select strided 8 : Elapsed 0.059 ms (5.925 ms / 100) 5.927 -> 5.934 ( +0.12%) [ +0.08% +0.08% +0.00% / +0.12% +0.57% +0.74%] index_select strided 16 : Elapsed 0.059 ms (5.932 ms / 100) 5.909 -> 5.922 ( +0.22%) [ +0.17% +0.10% +0.00% / +0.22% +0.63% +0.76%] index_select strided 64 : Elapsed 0.059 ms (5.919 ms / 100) 5.673 -> 5.681 ( +0.14%) [ +0.07% +0.18% +0.00% / +0.14% +0.41% +0.51%] index_select strided 100 : Elapsed 0.057 ms (5.677 ms / 100) 5.901 -> 5.899 ( -0.03%) [ +0.10% +0.03% +0.00% / -0.03% +0.37% +0.41%] index_select random : Elapsed 0.059 ms (5.907 ms / 100) 5.901 -> 5.906 ( +0.08%) [ +0.00% +0.12% +0.03% / +0.08% +0.39% +0.20%] index_select random_sorted : Elapsed 0.059 ms (5.901 ms / 100) 5.912 -> 5.919 ( +0.12%) [ +0.02% +0.00% +0.10% / +0.12% +0.12% +0.17%] index_select perm : Elapsed 0.059 ms (5.913 ms / 100) 5.922 -> 5.926 ( +0.07%) [ +0.00% +0.02% +0.08% / +0.07% +0.20% +0.15%] index_select perm_sorted : Elapsed 0.059 ms (5.922 ms / 100) B = [50, 150, 15] (stride (150, 1, 7500)) A = [50, 150, 250] (stride (150, 1, 7500)) dim = 2 2.541 -> 2.546 ( +0.20%) [ +0.04% +0.12% +0.00% / +0.20% +0.51% +0.55%] index_select const : Elapsed 0.025 ms (2.542 ms / 100) 3.101 -> 3.108 ( +0.23%) [ +0.00% +0.19% +0.10% / +0.23% +0.45% +0.48%] index_select wrap : Elapsed 0.031 ms (3.101 ms / 100) 3.098 -> 3.102 ( +0.13%) [ +0.00% +0.16% +0.10% / +0.13% +0.48% +0.45%] index_select linear : Elapsed 0.031 ms (3.098 ms / 100) 3.104 -> 3.109 ( +0.16%) [ +0.06% +0.00% +0.19% / +0.16% +0.55% +0.42%] index_select reverse : Elapsed 0.031 ms (3.106 ms / 100) 2.551 -> 2.548 ( -0.12%) [ +0.08% +0.00% +0.00% / -0.12% +0.08% -0.08%] index_select skip64 : Elapsed 0.026 ms (2.553 ms / 100) 2.550 -> 2.546 ( -0.16%) [ +0.12% +0.00% +0.00% / -0.16% +0.16% +0.00%] index_select skip256 : Elapsed 0.026 ms (2.553 ms / 100) 3.113 -> 3.118 ( +0.16%) [ +0.00% +0.10% +0.00% / +0.16% +0.48% +0.71%] index_select spread : Elapsed 0.031 ms (3.113 ms / 100) 3.115 -> 3.111 ( -0.13%) [ +0.26% +0.16% +0.00% / -0.13% +0.32% +0.61%] index_select strided 3 : Elapsed 0.031 ms (3.123 ms / 100) 3.104 -> 3.106 ( +0.06%) [ +0.13% +0.16% +0.00% / +0.06% +0.71% +0.71%] index_select strided 5 : Elapsed 0.031 ms (3.108 ms / 100) 3.114 -> 3.122 ( +0.26%) [ +0.29% +0.45% +0.00% / +0.29% +0.39% +0.26%] index_select strided 7 : Elapsed 0.031 ms (3.123 ms / 100) 3.098 -> 3.098 ( +0.00%) [ +0.06% +0.00% +0.00% / +0.00% +0.10% +0.19%] index_select strided 8 : Elapsed 0.031 ms (3.100 ms / 100) 3.090 -> 3.092 ( +0.06%) [ +0.26% +0.00% +0.16% / +0.06% +0.49% +0.49%] index_select strided 16 : Elapsed 0.031 ms (3.098 ms / 100) 3.108 -> 3.113 ( +0.16%) [ +0.00% +0.03% +0.19% / +0.16% +0.51% +0.42%] index_select strided 64 : Elapsed 0.031 ms (3.108 ms / 100) 2.723 -> 2.724 ( +0.04%) [ +0.15% +0.22% +0.00% / +0.04% +0.73% +0.70%] index_select strided 100 : Elapsed 0.027 ms (2.727 ms / 100) 3.119 -> 3.117 ( -0.06%) [ +0.03% +0.13% +0.00% / -0.06% +0.77% +0.74%] index_select random : Elapsed 0.031 ms (3.120 ms / 100) 3.121 -> 3.116 ( -0.16%) [ +0.00% +0.06% +0.06% / -0.16% +0.67% +0.54%] index_select random_sorted : Elapsed 0.031 ms (3.121 ms / 100) 3.112 -> 3.113 ( +0.03%) [ +0.13% +0.03% +0.00% / +0.03% +0.67% +0.80%] index_select perm : Elapsed 0.031 ms (3.116 ms / 100) 3.107 -> 3.113 ( +0.19%) [ +0.13% +0.10% +0.00% / +0.19% +0.39% +0.68%] index_select perm_sorted : Elapsed 0.031 ms (3.111 ms / 100) B = [50, 150, 15] (stride (150, 1, 7500)) A = [50, 150, 250] (stride (1, 50, 7500)) dim = 2 5.581 -> 5.590 ( +0.16%) [ +0.00% +0.09% +0.00% / +0.16% +0.48% +0.50%] index_select const : Elapsed 0.056 ms (5.581 ms / 100) 5.702 -> 5.713 ( +0.19%) [ +0.14% +0.11% +0.00% / +0.19% +0.56% +0.46%] index_select wrap : Elapsed 0.057 ms (5.710 ms / 100) 5.708 -> 5.709 ( +0.02%) [ +0.04% +0.00% +0.02% / +0.02% +0.40% +0.35%] index_select linear : Elapsed 0.057 ms (5.710 ms / 100) 5.708 -> 5.709 ( +0.02%) [ +0.00% +0.02% +0.07% / +0.02% +0.33% +0.46%] index_select reverse : Elapsed 0.057 ms (5.708 ms / 100) 5.584 -> 5.584 ( +0.00%) [ +0.07% +0.02% +0.00% / +0.00% +0.48% +0.50%] index_select skip64 : Elapsed 0.056 ms (5.588 ms / 100) 5.583 -> 5.589 ( +0.11%) [ +0.00% +0.04% +0.04% / +0.11% +0.50% +0.50%] index_select skip256 : Elapsed 0.056 ms (5.583 ms / 100) 5.700 -> 5.703 ( +0.05%) [ +0.05% +0.00% +0.04% / +0.05% +0.39% +0.44%] index_select spread : Elapsed 0.057 ms (5.703 ms / 100) 5.712 -> 5.709 ( -0.05%) [ +0.04% +0.02% +0.00% / -0.05% +0.14% +0.09%] index_select strided 3 : Elapsed 0.057 ms (5.714 ms / 100) 5.705 -> 5.704 ( -0.02%) [ +0.02% +0.00% +0.02% / -0.02% +0.33% +0.35%] index_select strided 5 : Elapsed 0.057 ms (5.706 ms / 100) 5.706 -> 5.715 ( +0.16%) [ +0.25% +0.05% +0.00% / +0.16% +0.40% +0.56%] index_select strided 7 : Elapsed 0.057 ms (5.720 ms / 100) 5.693 -> 5.701 ( +0.14%) [ +0.05% +0.07% +0.00% / +0.14% +0.72% +0.65%] index_select strided 8 : Elapsed 0.057 ms (5.696 ms / 100) 5.703 -> 5.706 ( +0.05%) [ +0.00% +0.07% +0.09% / +0.05% +0.47% +0.42%] index_select strided 16 : Elapsed 0.057 ms (5.703 ms / 100) 5.711 -> 5.712 ( +0.02%) [ +0.09% +0.04% +0.00% / +0.02% +0.23% +0.18%] index_select strided 64 : Elapsed 0.057 ms (5.716 ms / 100) 5.596 -> 5.597 ( +0.02%) [ +0.05% +0.02% +0.00% / +0.02% +0.41% +0.43%] index_select strided 100 : Elapsed 0.056 ms (5.599 ms / 100) 5.690 -> 5.693 ( +0.05%) [ +0.11% +0.12% +0.00% / +0.05% +0.44% +0.47%] index_select random : Elapsed 0.057 ms (5.696 ms / 100) 5.687 -> 5.691 ( +0.07%) [ +0.02% +0.00% +0.09% / +0.07% +0.51% +0.60%] index_select random_sorted : Elapsed 0.057 ms (5.688 ms / 100) 5.708 -> 5.709 ( +0.02%) [ +0.04% +0.02% +0.00% / +0.02% +0.21% +0.26%] index_select perm : Elapsed 0.057 ms (5.710 ms / 100) 5.709 -> 5.709 ( +0.00%) [ +0.05% +0.16% +0.00% / +0.00% +0.16% +0.18%] index_select perm_sorted : Elapsed 0.057 ms (5.712 ms / 100) B = [50, 150, 15] (stride (1, 50, 7500)) A = [50, 150, 250] (stride (37500, 250, 1)) dim = 2 7.482 -> 7.485 ( +0.04%) [ +0.01% +0.12% +0.00% / +0.04% +0.27% +0.25%] index_select const : Elapsed 0.075 ms (7.483 ms / 100) 7.617 -> 7.624 ( +0.09%) [ +0.00% +0.05% +0.08% / +0.09% +2.55% +2.49%] index_select wrap : Elapsed 0.076 ms (7.617 ms / 100) 7.611 -> 7.619 ( +0.11%) [ +0.08% +0.09% +0.00% / +0.11% +2.63% +2.77%] index_select linear : Elapsed 0.076 ms (7.617 ms / 100) 7.610 -> 7.634 ( +0.32%) [ +0.05% +0.00% +0.17% / +0.32% +2.79% +2.83%] index_select reverse : Elapsed 0.076 ms (7.614 ms / 100) 7.484 -> 7.493 ( +0.12%) [ +0.04% +0.12% +0.00% / +0.12% +0.17% +0.25%] index_select skip64 : Elapsed 0.075 ms (7.487 ms / 100) 7.488 -> 7.478 ( -0.13%) [ +0.00% +0.03% +0.07% / -0.13% +0.09% +0.17%] index_select skip256 : Elapsed 0.075 ms (7.488 ms / 100) 10.192 -> 10.168 ( -0.24%) [ +0.00% +0.04% +0.00% / +0.12% -0.24% -0.22%] index_select spread : Elapsed 0.102 ms (10.192 ms / 100) 8.289 -> 8.292 ( +0.04%) [ +0.02% +0.11% +0.00% / +0.04% +0.53% +0.55%] index_select strided 3 : Elapsed 0.083 ms (8.291 ms / 100) 9.023 -> 9.027 ( +0.04%) [ +0.06% +0.00% +0.06% / +0.06% +0.04% +0.14%] index_select strided 5 : Elapsed 0.090 ms (9.028 ms / 100) 9.772 -> 9.799 ( +0.28%) [ +0.00% +0.11% +0.17% / +0.28% +0.32% +0.41%] index_select strided 7 : Elapsed 0.098 ms (9.772 ms / 100) 10.152 -> 10.146 ( -0.06%) [ +0.06% +0.02% +0.00% / -0.06% +0.04% +0.28%] index_select strided 8 : Elapsed 0.102 ms (10.158 ms / 100) 10.166 -> 10.164 ( -0.02%) [ +0.25% +0.00% +0.21% / +0.24% -0.02% +0.15%] index_select strided 16 : Elapsed 0.102 ms (10.191 ms / 100) 10.293 -> 10.241 ( -0.51%) [ +0.00% +0.06% +0.00% / +0.16% -0.51% -0.44%] index_select strided 64 : Elapsed 0.103 ms (10.293 ms / 100) 10.303 -> 10.254 ( -0.48%) [ +0.02% +0.00% +0.18% / +0.20% -0.21% -0.48%] index_select strided 100 : Elapsed 0.103 ms (10.305 ms / 100) 10.219 -> 10.189 ( -0.29%) [ +0.00% +0.08% +0.03% / +0.30% -0.29% -0.13%] index_select random : Elapsed 0.102 ms (10.219 ms / 100) 9.333 -> 9.322 ( -0.12%) [ +0.10% +0.00% +0.14% / +0.08% -0.12% -0.10%] index_select random_sorted : Elapsed 0.093 ms (9.342 ms / 100) 9.999 -> 9.977 ( -0.22%) [ +0.00% +0.00% +0.26% / +0.19% -0.19% -0.22%] index_select perm : Elapsed 0.100 ms (9.999 ms / 100) 9.774 -> 9.709 ( -0.67%) [ +0.00% +0.18% +0.01% / +0.10% -0.63% -0.67%] index_select perm_sorted : Elapsed 0.098 ms (9.774 ms / 100) B = [50, 150, 15] (stride (1, 50, 7500)) A = [50, 150, 250] (stride (250, 12500, 1)) dim = 2 4.329 -> 4.330 ( +0.02%) [ +0.00% +0.35% +0.16% / +0.02% +5.13% +5.24%] index_select const : Elapsed 0.043 ms (4.329 ms / 100) 4.920 -> 4.943 ( +0.47%) [ +0.16% +0.00% +0.47% / +0.47% +2.15% +2.13%] index_select wrap : Elapsed 0.049 ms (4.928 ms / 100) 4.929 -> 4.929 ( +0.00%) [ +0.10% +0.12% +0.00% / +0.00% +1.68% +1.74%] index_select linear : Elapsed 0.049 ms (4.934 ms / 100) 4.897 -> 4.910 ( +0.27%) [ +0.35% +0.00% +0.29% / +0.27% +2.04% +1.90%] index_select reverse : Elapsed 0.049 ms (4.914 ms / 100) 4.343 -> 4.358 ( +0.35%) [ +0.00% +0.07% +0.25% / +0.35% +4.95% +4.72%] index_select skip64 : Elapsed 0.043 ms (4.343 ms / 100) 4.339 -> 4.344 ( +0.12%) [ +0.00% +0.21% +0.25% / +0.12% +4.96% +5.07%] index_select skip256 : Elapsed 0.043 ms (4.339 ms / 100) 9.742 -> 9.748 ( +0.06%) [ +0.03% +0.00% +0.32% / +0.26% +0.20% +0.06%] index_select spread : Elapsed 0.097 ms (9.745 ms / 100) 6.472 -> 6.382 ( -1.39%) [ +0.00% +0.12% +0.05% / +0.08% -1.39% -1.28%] index_select strided 3 : Elapsed 0.065 ms (6.472 ms / 100) 7.756 -> 7.763 ( +0.09%) [ +0.04% +0.00% +0.13% / +0.09% +0.26% +0.22%] index_select strided 5 : Elapsed 0.078 ms (7.759 ms / 100) 9.089 -> 9.091 ( +0.02%) [ +0.00% +0.06% +0.29% / +0.21% +0.10% +0.02%] index_select strided 7 : Elapsed 0.091 ms (9.089 ms / 100) 9.745 -> 9.755 ( +0.10%) [ +0.11% +0.00% +0.15% / +0.10% +0.19% +0.14%] index_select strided 8 : Elapsed 0.098 ms (9.756 ms / 100) 9.737 -> 9.743 ( +0.06%) [ +0.11% +0.00% +0.06% / +0.31% +0.21% +0.06%] index_select strided 16 : Elapsed 0.097 ms (9.748 ms / 100) 9.803 -> 9.753 ( -0.51%) [ +0.00% +0.06% +0.16% / +0.12% -0.51% -0.44%] index_select strided 64 : Elapsed 0.098 ms (9.803 ms / 100) 9.826 -> 9.766 ( -0.61%) [ +0.26% +0.00% +0.27% / +0.08% -0.61% -0.57%] index_select strided 100 : Elapsed 0.099 ms (9.852 ms / 100) 9.648 -> 9.604 ( -0.46%) [ +0.00% +0.09% +0.15% / +0.19% -0.46% -0.23%] index_select random : Elapsed 0.096 ms (9.648 ms / 100) 9.248 -> 9.243 ( -0.05%) [ +0.22% +0.00% +0.24% / +0.06% -0.05% +0.00%] index_select random_sorted : Elapsed 0.093 ms (9.268 ms / 100) 9.272 -> 9.269 ( -0.03%) [ +0.01% +0.00% +0.32% / +0.20% -0.03% +0.01%] index_select perm : Elapsed 0.093 ms (9.273 ms / 100) 8.429 -> 8.426 ( -0.04%) [ +0.00% +0.07% +0.38% / +0.39% -0.04% +0.00%] index_select perm_sorted : Elapsed 0.084 ms (8.429 ms / 100) out_shape = [15, 250, 150] in_shape = [50, 250, 150] idx_dim = 0 B = [15, 250, 150] (stride (37500, 150, 1)) dim = 0 fill_cnt = 50 7.314 -> 7.322 ( +0.11%) [ +0.21% +0.07% +0.00% / +0.11% +0.45% +0.57%] index_fill_ const : Elapsed 0.073 ms (7.329 ms / 100) 7.433 -> 7.434 ( +0.01%) [ +0.13% +0.12% +0.00% / +0.01% +0.35% +0.54%] index_fill_ linear : Elapsed 0.074 ms (7.443 ms / 100) 7.429 -> 7.421 ( -0.11%) [ +0.09% +0.01% +0.00% / -0.11% +0.34% +0.32%] index_fill_ reverse : Elapsed 0.074 ms (7.436 ms / 100) 7.323 -> 7.320 ( -0.04%) [ +0.07% +0.00% +0.04% / -0.04% +0.46% +0.42%] index_fill_ skip64 : Elapsed 0.073 ms (7.328 ms / 100) 7.317 -> 7.313 ( -0.05%) [ +0.07% +0.10% +0.00% / -0.05% +0.46% +0.42%] index_fill_ skip256 : Elapsed 0.073 ms (7.322 ms / 100) 7.406 -> 7.396 ( -0.14%) [ +0.15% +0.01% +0.00% / -0.14% +0.39% +0.46%] index_fill_ spread : Elapsed 0.074 ms (7.417 ms / 100) 7.368 -> 7.355 ( -0.18%) [ +0.19% +0.00% +0.03% / -0.18% +0.37% +0.30%] index_fill_ strided 3 : Elapsed 0.074 ms (7.382 ms / 100) 7.357 -> 7.349 ( -0.11%) [ +0.00% +0.04% +0.00% / -0.11% +0.23% +0.15%] index_fill_ strided 5 : Elapsed 0.074 ms (7.357 ms / 100) 7.567 -> 7.571 ( +0.05%) [ +0.19% +0.05% +0.00% / +0.05% +0.41% +0.45%] index_fill_ strided 7 : Elapsed 0.076 ms (7.581 ms / 100) 7.573 -> 7.581 ( +0.11%) [ +0.25% +0.21% +0.00% / +0.11% +0.38% +0.46%] index_fill_ strided 8 : Elapsed 0.076 ms (7.592 ms / 100) 7.552 -> 7.555 ( +0.04%) [ +0.00% +0.00% +0.00% / +0.04% +0.25% +0.25%] index_fill_ random : Elapsed 0.076 ms (7.552 ms / 100) 7.392 -> 7.394 ( +0.03%) [ +0.07% +0.03% +0.00% / +0.03% +0.50% +0.51%] index_fill_ random_sorted : Elapsed 0.074 ms (7.397 ms / 100) B = [15, 250, 150] (stride (150, 2250, 1)) A = [50, 250, 150] (stride (37500, 1, 250)) dim = 0 8.435 -> 8.392 ( -0.51%) [ +0.08% +0.00% +0.34% / +0.12% -0.51% -0.40%] index_select const : Elapsed 0.084 ms (8.442 ms / 100) 9.805 -> 9.767 ( -0.39%) [ +0.00% +0.20% +0.13% / +0.24% -0.39% +0.04%] index_select wrap : Elapsed 0.098 ms (9.805 ms / 100) 9.809 -> 9.793 ( -0.16%) [ +0.00% +0.00% +0.15% / +0.21% -0.14% -0.16%] index_select linear : Elapsed 0.098 ms (9.809 ms / 100) 9.809 -> 9.760 ( -0.50%) [ +0.05% +0.21% +0.00% / +0.07% -0.50% -0.29%] index_select reverse : Elapsed 0.098 ms (9.814 ms / 100) 8.390 -> 8.357 ( -0.39%) [ +0.11% +0.02% +0.00% / +0.24% -0.36% -0.39%] index_select skip64 : Elapsed 0.084 ms (8.399 ms / 100) 8.494 -> 8.368 ( -1.48%) [ +0.34% +0.00% +0.12% / +0.04% -1.15% -1.48%] index_select skip256 : Elapsed 0.085 ms (8.523 ms / 100) 9.872 -> 9.763 ( -1.10%) [ +0.00% +0.12% +0.00% / +0.10% -0.88% -1.10%] index_select spread : Elapsed 0.099 ms (9.872 ms / 100) 9.807 -> 9.816 ( +0.09%) [ +0.28% +0.07% +0.00% / +0.09% +0.39% +0.13%] index_select strided 3 : Elapsed 0.098 ms (9.834 ms / 100) 9.796 -> 9.802 ( +0.06%) [ +0.00% +0.11% +0.03% / +0.06% +0.94% +0.87%] index_select strided 5 : Elapsed 0.098 ms (9.796 ms / 100) 9.822 -> 9.822 ( +0.00%) [ +0.00% +0.18% +0.03% / +0.20% +0.00% +0.15%] index_select strided 7 : Elapsed 0.098 ms (9.822 ms / 100) 9.824 -> 9.822 ( -0.02%) [ +0.00% +0.27% +0.58% / +0.20% +0.06% -0.02%] index_select strided 8 : Elapsed 0.098 ms (9.824 ms / 100) 9.891 -> 9.820 ( -0.72%) [ +0.04% +0.06% +0.00% / +0.01% -0.70% -0.72%] index_select strided 16 : Elapsed 0.099 ms (9.895 ms / 100) 9.719 -> 9.708 ( -0.11%) [ +0.00% +0.11% +0.37% / +0.14% -0.11% +0.00%] index_select random : Elapsed 0.097 ms (9.719 ms / 100) 9.512 -> 9.513 ( +0.01%) [ +0.00% +0.15% +0.18% / +0.11% +0.02% +0.01%] index_select random_sorted : Elapsed 0.095 ms (9.512 ms / 100) 9.814 -> 9.771 ( -0.44%) [ +0.09% +0.00% +0.39% / +0.29% -0.28% -0.44%] index_select perm : Elapsed 0.098 ms (9.823 ms / 100) 9.859 -> 9.782 ( -0.78%) [ +0.11% +0.12% +0.00% / +0.15% -0.78% -0.48%] index_select perm_sorted : Elapsed 0.099 ms (9.870 ms / 100) B = [15, 250, 150] (stride (1, 15, 3750)) A = [50, 250, 150] (stride (150, 7500, 1)) dim = 0 41.893 -> 41.702 ( -0.46%) [ +0.14% +0.00% +0.07% / +0.10% -0.45% -0.46%] index_select const : Elapsed 0.420 ms (41.951 ms / 100) 41.740 -> 41.622 ( -0.28%) [ +0.00% +0.10% +0.05% / +0.03% -0.20% -0.28%] index_select wrap : Elapsed 0.417 ms (41.740 ms / 100) 41.752 -> 41.705 ( -0.11%) [ +0.00% +0.06% +0.06% / -0.06% -0.11% -0.10%] index_select linear : Elapsed 0.418 ms (41.752 ms / 100) 41.784 -> 41.756 ( -0.07%) [ +0.11% +0.06% +0.00% / +0.04% -0.07% +0.01%] index_select reverse : Elapsed 0.418 ms (41.829 ms / 100) 41.871 -> 41.728 ( -0.34%) [ +0.00% +0.04% +0.02% / -0.01% -0.34% -0.34%] index_select skip64 : Elapsed 0.419 ms (41.871 ms / 100) 41.832 -> 41.702 ( -0.31%) [ +0.00% +0.13% +0.14% / +0.08% -0.31% -0.23%] index_select skip256 : Elapsed 0.418 ms (41.832 ms / 100) 41.792 -> 41.764 ( -0.07%) [ +0.00% +0.14% +0.19% / +0.16% -0.04% -0.07%] index_select spread : Elapsed 0.418 ms (41.792 ms / 100) 41.788 -> 41.721 ( -0.16%) [ +0.00% +0.03% +0.11% / +0.05% -0.08% -0.16%] index_select strided 3 : Elapsed 0.418 ms (41.788 ms / 100) 41.795 -> 41.699 ( -0.23%) [ +0.15% +0.12% +0.00% / +0.14% -0.14% -0.23%] index_select strided 5 : Elapsed 0.419 ms (41.858 ms / 100) 41.834 -> 41.834 ( +0.00%) [ +0.00% +0.17% +0.06% / +0.07% +0.11% +0.00%] index_select strided 7 : Elapsed 0.418 ms (41.834 ms / 100) 41.783 -> 41.673 ( -0.26%) [ +0.16% +0.10% +0.00% / +0.15% -0.26% -0.23%] index_select strided 8 : Elapsed 0.419 ms (41.851 ms / 100) 41.861 -> 41.617 ( -0.58%) [ +0.03% +0.05% +0.00% / +0.09% -0.48% -0.58%] index_select strided 16 : Elapsed 0.419 ms (41.875 ms / 100) 41.854 -> 41.702 ( -0.36%) [ +0.00% +0.01% +0.03% / -0.04% -0.31% -0.36%] index_select random : Elapsed 0.419 ms (41.854 ms / 100) 41.755 -> 41.708 ( -0.11%) [ +0.00% +0.00% +0.12% / +0.04% -0.11% -0.11%] index_select random_sorted : Elapsed 0.418 ms (41.755 ms / 100) 41.928 -> 41.811 ( -0.28%) [ +0.10% +0.00% +0.15% / +0.16% -0.24% -0.28%] index_select perm : Elapsed 0.420 ms (41.972 ms / 100) 41.867 -> 41.792 ( -0.18%) [ +0.14% +0.00% +0.13% / +0.05% -0.04% -0.18%] index_select perm_sorted : Elapsed 0.419 ms (41.927 ms / 100) out_shape = [50, 15, 150] in_shape = [50, 250, 150] idx_dim = 1 B = [50, 15, 150] (stride (1, 7500, 50)) A = [50, 250, 150] (stride (37500, 1, 250)) dim = 1 4.430 -> 4.362 ( -1.53%) [ +0.00% +0.11% +0.09% / +0.14% -1.53% -1.51%] index_select const : Elapsed 0.044 ms (4.430 ms / 100) 4.753 -> 4.760 ( +0.15%) [ +0.08% +0.21% +0.00% / +0.15% +2.99% +3.03%] index_select wrap : Elapsed 0.048 ms (4.757 ms / 100) 4.747 -> 4.757 ( +0.21%) [ +0.00% +0.23% +0.08% / +0.21% +3.08% +3.18%] index_select linear : Elapsed 0.047 ms (4.747 ms / 100) 4.731 -> 4.758 ( +0.57%) [ +0.15% +0.15% +0.00% / +0.57% +2.96% +2.77%] index_select reverse : Elapsed 0.047 ms (4.738 ms / 100) 4.441 -> 4.357 ( -1.89%) [ +0.11% +0.23% +0.00% / +0.18% -1.89% -1.85%] index_select skip64 : Elapsed 0.044 ms (4.446 ms / 100) 4.432 -> 4.364 ( -1.53%) [ +0.14% +0.00% +0.52% / +0.41% -1.53% -1.53%] index_select skip256 : Elapsed 0.044 ms (4.438 ms / 100) 7.999 -> 8.020 ( +0.26%) [ +0.25% +0.00% +0.46% / +0.29% +0.26% +0.59%] index_select spread : Elapsed 0.080 ms (8.019 ms / 100) 5.774 -> 5.781 ( +0.12%) [ +0.00% +0.03% +0.23% / +0.12% +0.29% +0.36%] index_select strided 3 : Elapsed 0.058 ms (5.774 ms / 100) 6.725 -> 6.727 ( +0.03%) [ +0.10% +0.00% +0.19% / +0.25% +0.22% +0.03%] index_select strided 5 : Elapsed 0.067 ms (6.732 ms / 100) 7.589 -> 7.603 ( +0.18%) [ +0.00% +0.11% +0.24% / +0.18% +0.51% +0.69%] index_select strided 7 : Elapsed 0.076 ms (7.589 ms / 100) 7.975 -> 7.990 ( +0.19%) [ +0.24% +0.00% +0.16% / +0.34% +0.19% +0.44%] index_select strided 8 : Elapsed 0.080 ms (7.994 ms / 100) 7.989 -> 8.009 ( +0.25%) [ +0.09% +0.00% +0.08% / +0.25% +0.61% +0.25%] index_select strided 16 : Elapsed 0.080 ms (7.996 ms / 100) 8.156 -> 8.152 ( -0.05%) [ +0.25% +0.00% +0.33% / +0.32% +0.02% -0.05%] index_select strided 64 : Elapsed 0.082 ms (8.176 ms / 100) 8.186 -> 8.193 ( +0.09%) [ +0.13% +0.00% +0.42% / +0.09% +0.10% +0.09%] index_select strided 100 : Elapsed 0.082 ms (8.197 ms / 100) 7.920 -> 7.864 ( -0.71%) [ +0.00% +0.53% +0.38% / +0.39% -0.64% -0.71%] index_select random : Elapsed 0.079 ms (7.920 ms / 100) 7.669 -> 7.624 ( -0.59%) [ +0.00% +0.78% +0.29% / +0.23% -0.43% -0.59%] index_select random_sorted : Elapsed 0.077 ms (7.669 ms / 100) 8.136 -> 8.138 ( +0.02%) [ +0.00% +0.43% +0.34% / +0.02% +0.49% +0.54%] index_select perm : Elapsed 0.081 ms (8.136 ms / 100) 7.287 -> 7.326 ( +0.54%) [ +0.03% +0.00% +0.59% / +0.54% +0.77% +0.97%] index_select perm_sorted : Elapsed 0.073 ms (7.289 ms / 100) B = [50, 15, 150] (stride (15, 1, 750)) A = [50, 250, 150] (stride (37500, 1, 250)) dim = 1 5.102 -> 5.116 ( +0.27%) [ +0.00% +0.29% +0.08% / +0.27% +4.61% +4.80%] index_select const : Elapsed 0.051 ms (5.102 ms / 100) 5.844 -> 5.872 ( +0.48%) [ +0.14% +0.00% +0.48% / +0.48% +1.51% +1.68%] index_select wrap : Elapsed 0.059 ms (5.852 ms / 100) 5.837 -> 5.853 ( +0.27%) [ +0.00% +0.10% +0.41% / +0.27% +1.35% +1.51%] index_select linear : Elapsed 0.058 ms (5.837 ms / 100) 5.806 -> 5.825 ( +0.33%) [ +0.00% +0.29% +0.14% / +0.33% +2.33% +2.19%] index_select reverse : Elapsed 0.058 ms (5.806 ms / 100) 5.121 -> 5.133 ( +0.23%) [ +0.02% +0.00% +0.43% / +0.23% +4.51% +4.24%] index_select skip64 : Elapsed 0.051 ms (5.122 ms / 100) 5.116 -> 5.135 ( +0.37%) [ +0.12% +0.00% +0.37% / +0.37% +3.93% +4.32%] index_select skip256 : Elapsed 0.051 ms (5.122 ms / 100) 9.166 -> 9.023 ( -1.56%) [ +0.00% +0.01% +0.37% / +0.21% -1.56% -1.55%] index_select spread : Elapsed 0.092 ms (9.166 ms / 100) 7.041 -> 6.956 ( -1.21%) [ +0.16% +0.00% +0.26% / +0.00% -1.21% -1.01%] index_select strided 3 : Elapsed 0.071 ms (7.052 ms / 100) 7.965 -> 7.817 ( -1.86%) [ +0.00% +0.03% +0.21% / +0.31% -1.75% -1.86%] index_select strided 5 : Elapsed 0.080 ms (7.965 ms / 100) 8.757 -> 8.559 ( -2.26%) [ +0.15% +0.00% +0.17% / +0.27% -2.24% -2.26%] index_select strided 7 : Elapsed 0.088 ms (8.770 ms / 100) 9.067 -> 8.885 ( -2.01%) [ +0.00% +0.04% +0.44% / +0.29% -1.97% -2.01%] index_select strided 8 : Elapsed 0.091 ms (9.067 ms / 100) 9.162 -> 9.021 ( -1.54%) [ +0.26% +0.00% +0.26% / +0.36% -1.54% -1.53%] index_select strided 16 : Elapsed 0.092 ms (9.186 ms / 100) 9.315 -> 9.251 ( -0.69%) [ +0.00% +0.27% +0.27% / +0.38% -0.69% -0.39%] index_select strided 64 : Elapsed 0.093 ms (9.315 ms / 100) 9.363 -> 9.279 ( -0.90%) [ +0.11% +0.00% +0.13% / +0.31% -0.73% -0.90%] index_select strided 100 : Elapsed 0.094 ms (9.373 ms / 100) 9.112 -> 9.022 ( -0.99%) [ +0.00% +0.10% +0.29% / +0.16% -0.99% -0.77%] index_select random : Elapsed 0.091 ms (9.112 ms / 100) 8.254 -> 8.095 ( -1.93%) [ +0.00% +0.15% +0.35% / +0.45% -1.93% -1.81%] index_select random_sorted : Elapsed 0.083 ms (8.254 ms / 100) 9.314 -> 9.241 ( -0.78%) [ +0.00% +0.20% +0.28% / +0.44% -0.78% -0.41%] index_select perm : Elapsed 0.093 ms (9.314 ms / 100) 8.605 -> 8.516 ( -1.03%) [ +0.00% +0.01% +0.05% / +0.16% -1.01% -1.03%] index_select perm_sorted : Elapsed 0.086 ms (8.605 ms / 100) B = [50, 15, 150] (stride (1, 50, 750)) A = [50, 250, 150] (stride (250, 1, 12500)) dim = 1 4.668 -> 4.544 ( -2.66%) [ +0.00% +0.19% +0.26% / +0.19% -2.59% -2.66%] index_select const : Elapsed 0.047 ms (4.668 ms / 100) 5.124 -> 5.132 ( +0.16%) [ +0.00% +0.12% +0.27% / +0.16% +2.13% +2.01%] index_select wrap : Elapsed 0.051 ms (5.124 ms / 100) 5.118 -> 5.133 ( +0.29%) [ +0.00% +0.37% +0.25% / +0.29% +2.11% +2.36%] index_select linear : Elapsed 0.051 ms (5.118 ms / 100) 5.110 -> 5.117 ( +0.14%) [ +0.00% +0.25% +0.41% / +0.14% +1.43% +1.47%] index_select reverse : Elapsed 0.051 ms (5.110 ms / 100) 4.686 -> 4.525 ( -3.44%) [ +0.00% +0.02% +0.15% / +0.04% -3.44% -3.24%] index_select skip64 : Elapsed 0.047 ms (4.686 ms / 100) 4.686 -> 4.536 ( -3.20%) [ +0.00% +0.02% +0.15% / +0.17% -3.20% -3.18%] index_select skip256 : Elapsed 0.047 ms (4.686 ms / 100) 9.853 -> 9.860 ( +0.07%) [ +0.01% +0.00% +0.12% / +0.07% +0.18% +0.14%] index_select spread : Elapsed 0.099 ms (9.854 ms / 100) 6.518 -> 6.495 ( -0.35%) [ +0.00% +0.23% +0.20% / -0.02% -0.35% -0.17%] index_select strided 3 : Elapsed 0.065 ms (6.518 ms / 100) 7.898 -> 7.859 ( -0.49%) [ +0.03% +0.00% +0.00% / -0.10% -0.49% -0.35%] index_select strided 5 : Elapsed 0.079 ms (7.900 ms / 100) 9.197 -> 9.207 ( +0.11%) [ +0.03% +0.00% +0.00% / +0.11% +0.12% +0.27%] index_select strided 7 : Elapsed 0.092 ms (9.200 ms / 100) 9.849 -> 9.855 ( +0.06%) [ +0.08% +0.00% +0.13% / +0.11% +0.07% +0.06%] index_select strided 8 : Elapsed 0.099 ms (9.857 ms / 100) 9.817 -> 9.845 ( +0.29%) [ +0.00% +0.13% +0.15% / +0.29% +0.38% +0.50%] index_select strided 16 : Elapsed 0.098 ms (9.817 ms / 100) 9.880 -> 9.889 ( +0.09%) [ +0.00% +0.12% +0.29% / +0.09% +0.32% +0.40%] index_select strided 64 : Elapsed 0.099 ms (9.880 ms / 100) 9.901 -> 9.912 ( +0.11%) [ +0.00% +0.22% +0.18% / +0.11% +0.88% +0.65%] index_select strided 100 : Elapsed 0.099 ms (9.901 ms / 100) 9.558 -> 9.574 ( +0.17%) [ +0.08% +0.00% +0.16% / +0.17% +0.71% +0.71%] index_select random : Elapsed 0.096 ms (9.566 ms / 100) 8.693 -> 8.714 ( +0.24%) [ +0.21% +0.17% +0.00% / +0.24% +0.52% +0.53%] index_select random_sorted : Elapsed 0.087 ms (8.711 ms / 100) 9.773 -> 9.779 ( +0.06%) [ +0.00% +0.02% +0.23% / +0.06% +0.60% +0.65%] index_select perm : Elapsed 0.098 ms (9.773 ms / 100) 8.394 -> 8.407 ( +0.15%) [ +0.10% +0.00% +0.07% / +0.15% +0.52% +0.61%] index_select perm_sorted : Elapsed 0.084 ms (8.402 ms / 100) out_shape = [50, 250, 15] in_shape = [50, 250, 150] idx_dim = 2 B = [50, 250, 15] (stride (3750, 15, 1)) A = [50, 250, 150] (stride (37500, 150, 1)) dim = 2 16.868 -> 16.753 ( -0.68%) [ +0.19% +0.00% +0.83% / +0.81% -0.20% -0.68%] index_select const : Elapsed 0.169 ms (16.900 ms / 100) 17.222 -> 16.973 ( -1.45%) [ +0.00% +0.18% +0.39% / +0.05% -1.45% -1.21%] index_select wrap : Elapsed 0.172 ms (17.222 ms / 100) 17.225 -> 17.002 ( -1.29%) [ +0.00% +0.09% +0.15% / +0.22% -1.29% -1.10%] index_select linear : Elapsed 0.172 ms (17.225 ms / 100) 17.106 -> 16.885 ( -1.29%) [ +0.00% +0.46% +0.63% / +0.44% -1.29% -0.44%] index_select reverse : Elapsed 0.171 ms (17.106 ms / 100) 16.652 -> 16.720 ( +0.41%) [ +0.00% +0.74% +2.49% / +1.60% +0.47% +0.41%] index_select skip64 : Elapsed 0.167 ms (16.652 ms / 100) 16.744 -> 16.765 ( +0.13%) [ +1.82% +0.00% +0.59% / +0.95% +0.13% +0.55%] index_select skip256 : Elapsed 0.170 ms (17.049 ms / 100) 17.739 -> 17.647 ( -0.52%) [ +0.17% +0.17% +0.00% / +0.11% -0.52% -0.51%] index_select spread : Elapsed 0.178 ms (17.770 ms / 100) 17.440 -> 17.231 ( -1.20%) [ +0.00% +0.13% +0.13% / +0.09% -1.20% -1.18%] index_select strided 3 : Elapsed 0.174 ms (17.440 ms / 100) 17.612 -> 17.405 ( -1.18%) [ +0.15% +0.05% +0.00% / -0.03% -1.18% -1.06%] index_select strided 5 : Elapsed 0.176 ms (17.639 ms / 100) 17.700 -> 17.570 ( -0.73%) [ +0.04% +0.00% +0.07% / +0.02% -0.68% -0.73%] index_select strided 7 : Elapsed 0.177 ms (17.707 ms / 100) 17.722 -> 17.629 ( -0.52%) [ +0.07% +0.10% +0.00% / +0.11% -0.47% -0.52%] index_select strided 8 : Elapsed 0.177 ms (17.734 ms / 100) 17.785 -> 17.705 ( -0.45%) [ +0.00% +0.08% +0.08% / +0.06% -0.45% -0.42%] index_select strided 16 : Elapsed 0.178 ms (17.785 ms / 100) 17.820 -> 17.778 ( -0.24%) [ +0.03% +0.02% +0.00% / -0.06% -0.22% -0.24%] index_select strided 64 : Elapsed 0.178 ms (17.825 ms / 100) 17.802 -> 17.735 ( -0.38%) [ +0.01% +0.02% +0.00% / +0.03% -0.38% -0.10%] index_select strided 100 : Elapsed 0.178 ms (17.803 ms / 100) 17.763 -> 17.753 ( -0.06%) [ +0.11% +0.02% +0.00% / +0.05% +0.09% -0.06%] index_select random : Elapsed 0.178 ms (17.783 ms / 100) 17.591 -> 17.525 ( -0.38%) [ +0.00% +0.14% +0.14% / +0.13% -0.38% -0.16%] index_select random_sorted : Elapsed 0.176 ms (17.591 ms / 100) 17.804 -> 17.771 ( -0.19%) [ +0.02% +0.00% +0.04% / +0.10% -0.19% -0.08%] index_select perm : Elapsed 0.178 ms (17.807 ms / 100) 17.605 -> 17.541 ( -0.36%) [ +0.18% +0.20% +0.00% / +0.15% -0.36% -0.24%] index_select perm_sorted : Elapsed 0.176 ms (17.637 ms / 100) B = [50, 250, 15] (stride (3750, 1, 250)) dim = 2 fill_cnt = 150 8.723 -> 8.716 ( -0.08%) [ +0.21% +0.33% +0.00% / -0.08% +0.56% +0.56%] index_fill_ const : Elapsed 0.087 ms (8.741 ms / 100) 8.783 -> 8.786 ( +0.03%) [ +0.23% +0.35% +0.00% / +0.03% +0.55% +0.54%] index_fill_ linear : Elapsed 0.088 ms (8.803 ms / 100) 8.780 -> 8.770 ( -0.11%) [ +0.17% +0.23% +0.00% / -0.11% +0.25% +0.25%] index_fill_ reverse : Elapsed 0.088 ms (8.795 ms / 100) 8.737 -> 8.725 ( -0.14%) [ +0.16% +0.16% +0.00% / -0.14% +0.46% +0.46%] index_fill_ skip64 : Elapsed 0.088 ms (8.751 ms / 100) 8.728 -> 8.717 ( -0.13%) [ +0.23% +0.30% +0.00% / -0.13% +0.58% +0.57%] index_fill_ skip256 : Elapsed 0.087 ms (8.748 ms / 100) 8.767 -> 8.752 ( -0.17%) [ +0.15% +0.17% +0.00% / -0.17% +0.43% +0.46%] index_fill_ spread : Elapsed 0.088 ms (8.780 ms / 100) 8.777 -> 8.749 ( -0.32%) [ +0.11% +0.07% +0.00% / -0.32% +0.19% +0.40%] index_fill_ strided 3 : Elapsed 0.088 ms (8.787 ms / 100) 8.773 -> 8.730 ( -0.49%) [ +0.03% +0.03% +0.00% / -0.49% +0.01% +0.36%] index_fill_ strided 5 : Elapsed 0.088 ms (8.776 ms / 100) 8.809 -> 8.784 ( -0.28%) [ +0.01% +0.12% +0.00% / -0.28% +0.12% +0.26%] index_fill_ strided 7 : Elapsed 0.088 ms (8.810 ms / 100) 8.803 -> 8.775 ( -0.32%) [ +0.03% +0.03% +0.00% / -0.32% +0.33% +0.34%] index_fill_ strided 8 : Elapsed 0.088 ms (8.806 ms / 100) 8.794 -> 8.780 ( -0.16%) [ +0.13% +0.08% +0.00% / -0.16% +0.41% +0.30%] index_fill_ random : Elapsed 0.088 ms (8.805 ms / 100) 8.775 -> 8.774 ( -0.01%) [ +0.08% +0.00% +0.01% / -0.01% +0.28% +0.30%] index_fill_ random_sorted : Elapsed 0.088 ms (8.782 ms / 100) B = [50, 250, 15] (stride (3750, 1, 250)) A = [50, 250, 150] (stride (1, 50, 12500)) dim = 2 4.399 -> 4.327 ( -1.64%) [ +0.07% +0.20% +0.00% / +0.18% -1.64% -1.05%] index_select const : Elapsed 0.044 ms (4.402 ms / 100) 4.599 -> 4.608 ( +0.20%) [ +0.00% +0.13% +0.07% / +0.20% +0.48% +0.76%] index_select wrap : Elapsed 0.046 ms (4.599 ms / 100) 4.596 -> 4.611 ( +0.33%) [ +0.20% +0.00% +0.07% / +0.33% +0.83% +0.57%] index_select linear : Elapsed 0.046 ms (4.605 ms / 100) 4.600 -> 4.603 ( +0.07%) [ +0.00% +0.02% +0.04% / +0.07% +0.50% +0.74%] index_select reverse : Elapsed 0.046 ms (4.600 ms / 100) 4.447 -> 4.451 ( +0.09%) [ +0.13% +0.00% +0.16% / +0.09% +0.65% +1.03%] index_select skip64 : Elapsed 0.045 ms (4.453 ms / 100) 4.513 -> 4.377 ( -3.01%) [ +0.27% +0.02% +0.00% / +0.02% -3.01% -2.84%] index_select skip256 : Elapsed 0.045 ms (4.525 ms / 100) 4.630 -> 4.616 ( -0.30%) [ +0.09% +0.02% +0.00% / +0.11% -0.19% -0.30%] index_select spread : Elapsed 0.046 ms (4.634 ms / 100) 4.610 -> 4.603 ( -0.15%) [ +0.24% +0.26% +0.00% / +0.22% -0.15% +0.00%] index_select strided 3 : Elapsed 0.046 ms (4.621 ms / 100) 4.603 -> 4.609 ( +0.13%) [ +0.24% +0.00% +0.13% / +0.13% +0.22% +0.41%] index_select strided 5 : Elapsed 0.046 ms (4.614 ms / 100) 4.611 -> 4.620 ( +0.20%) [ +0.28% +0.41% +0.00% / +0.20% +0.41% +0.35%] index_select strided 7 : Elapsed 0.046 ms (4.624 ms / 100) 4.611 -> 4.612 ( +0.02%) [ +0.13% +0.15% +0.00% / +0.02% +0.35% +0.28%] index_select strided 8 : Elapsed 0.046 ms (4.617 ms / 100) 4.601 -> 4.605 ( +0.09%) [ +0.17% +0.00% +0.04% / +0.09% +0.96% +0.78%] index_select strided 16 : Elapsed 0.046 ms (4.609 ms / 100) 4.625 -> 4.630 ( +0.11%) [ +0.00% +0.19% +0.02% / +0.11% +0.74% +0.67%] index_select strided 64 : Elapsed 0.046 ms (4.625 ms / 100) 4.454 -> 4.462 ( +0.18%) [ +0.22% +0.20% +0.00% / +0.18% +1.28% +1.17%] index_select strided 100 : Elapsed 0.045 ms (4.464 ms / 100) 4.624 -> 4.611 ( -0.28%) [ +0.06% +0.02% +0.00% / +0.04% -0.28% -0.06%] index_select random : Elapsed 0.046 ms (4.627 ms / 100) 4.636 -> 4.590 ( -0.99%) [ +0.00% +0.11% +0.06% / +0.09% -0.99% -0.93%] index_select random_sorted : Elapsed 0.046 ms (4.636 ms / 100) 4.607 -> 4.614 ( +0.15%) [ +0.11% +0.07% +0.00% / +0.15% +0.30% +0.33%] index_select perm : Elapsed 0.046 ms (4.612 ms / 100) 4.589 -> 4.586 ( -0.07%) [ +0.24% +0.15% +0.00% / -0.07% +0.59% +0.72%] index_select perm_sorted : Elapsed 0.046 ms (4.600 ms / 100) out_shape = [15, 50, 250] in_shape = [150, 50, 250] idx_dim = 0 B = [15, 50, 250] (stride (1, 3750, 15)) A = [150, 50, 250] (stride (12500, 250, 1)) dim = 0 4.196 -> 4.194 ( -0.05%) [ +0.00% +0.07% +0.07% / +0.02% +0.12% -0.05%] index_select const : Elapsed 0.042 ms (4.196 ms / 100) 4.599 -> 4.593 ( -0.13%) [ +0.09% +0.00% +0.02% / -0.07% +0.20% -0.13%] index_select wrap : Elapsed 0.046 ms (4.603 ms / 100) 4.593 -> 4.588 ( -0.11%) [ +0.00% +0.15% +0.15% / -0.11% +0.30% +0.17%] index_select linear : Elapsed 0.046 ms (4.593 ms / 100) 4.589 -> 4.589 ( +0.00%) [ +0.09% +0.00% +0.26% / +0.00% +0.20% +0.41%] index_select reverse : Elapsed 0.046 ms (4.593 ms / 100) 4.182 -> 4.184 ( +0.05%) [ +0.05% +0.00% +0.24% / +0.05% +0.29% +0.57%] index_select skip64 : Elapsed 0.042 ms (4.184 ms / 100) 4.184 -> 4.172 ( -0.29%) [ +0.00% +0.26% +0.02% / +0.02% -0.22% -0.29%] index_select skip256 : Elapsed 0.042 ms (4.184 ms / 100) 4.607 -> 4.610 ( +0.07%) [ +0.17% +0.09% +0.00% / +0.07% +0.15% +0.15%] index_select spread : Elapsed 0.046 ms (4.615 ms / 100) 4.614 -> 4.615 ( +0.02%) [ +0.00% +0.09% +0.04% / +0.04% +0.04% +0.02%] index_select strided 3 : Elapsed 0.046 ms (4.614 ms / 100) 4.634 -> 4.635 ( +0.02%) [ +0.17% +0.06% +0.00% / +0.22% +0.02% +0.35%] index_select strided 5 : Elapsed 0.046 ms (4.642 ms / 100) 4.602 -> 4.610 ( +0.17%) [ +0.00% +0.22% +0.22% / +0.17% +0.20% +0.22%] index_select strided 7 : Elapsed 0.046 ms (4.602 ms / 100) 4.573 -> 4.576 ( +0.07%) [ +0.00% +0.09% +0.26% / +0.11% +0.20% +0.07%] index_select strided 8 : Elapsed 0.046 ms (4.573 ms / 100) 4.597 -> 4.600 ( +0.07%) [ +0.17% +0.09% +0.00% / +0.11% +0.07% +0.07%] index_select strided 16 : Elapsed 0.046 ms (4.605 ms / 100) 4.595 -> 4.606 ( +0.24%) [ +0.00% +0.07% +0.26% / +0.24% +0.39% +0.35%] index_select strided 64 : Elapsed 0.046 ms (4.595 ms / 100) 4.294 -> 4.301 ( +0.16%) [ +0.23% +0.00% +0.21% / +0.21% +0.23% +0.16%] index_select strided 100 : Elapsed 0.043 ms (4.304 ms / 100) 4.589 -> 4.589 ( +0.00%) [ +0.11% +0.00% +0.17% / +0.00% +0.22% +0.28%] index_select random : Elapsed 0.046 ms (4.594 ms / 100) 4.579 -> 4.581 ( +0.04%) [ +0.00% +0.11% +0.09% / +0.04% +0.15% +0.15%] index_select random_sorted : Elapsed 0.046 ms (4.579 ms / 100) 4.611 -> 4.614 ( +0.07%) [ +0.26% +0.11% +0.00% / +0.07% +0.11% +0.50%] index_select perm : Elapsed 0.046 ms (4.623 ms / 100) 4.616 -> 4.617 ( +0.02%) [ +0.00% +0.17% +0.13% / +0.02% +0.19% +0.17%] index_select perm_sorted : Elapsed 0.046 ms (4.616 ms / 100) B = [15, 50, 250] (stride (1, 15, 750)) A = [150, 50, 250] (stride (12500, 1, 50)) dim = 0 5.316 -> 5.299 ( -0.32%) [ +0.00% +0.00% +0.08% / -0.02% -0.32% -0.32%] index_select const : Elapsed 0.053 ms (5.316 ms / 100) 5.472 -> 5.471 ( -0.02%) [ +0.00% +0.11% +0.11% / +0.22% +0.00% -0.02%] index_select wrap : Elapsed 0.055 ms (5.472 ms / 100) 5.474 -> 5.473 ( -0.02%) [ +0.02% +0.09% +0.00% / +0.11% +0.05% -0.02%] index_select linear : Elapsed 0.055 ms (5.475 ms / 100) 5.507 -> 5.495 ( -0.22%) [ +0.00% +0.04% +0.09% / +0.07% -0.22% -0.20%] index_select reverse : Elapsed 0.055 ms (5.507 ms / 100) 5.401 -> 5.386 ( -0.28%) [ +0.00% +0.15% +0.15% / +0.37% -0.26% -0.28%] index_select skip64 : Elapsed 0.054 ms (5.401 ms / 100) 5.457 -> 5.332 ( -2.29%) [ +0.22% +0.00% +0.13% / +0.15% -2.16% -2.29%] index_select skip256 : Elapsed 0.055 ms (5.469 ms / 100) 5.584 -> 5.492 ( -1.65%) [ +0.00% +0.18% +0.14% / +0.00% -1.63% -1.65%] index_select spread : Elapsed 0.056 ms (5.584 ms / 100) 5.539 -> 5.481 ( -1.05%) [ +0.00% +0.05% +0.11% / -0.02% -1.05% -0.94%] index_select strided 3 : Elapsed 0.055 ms (5.539 ms / 100) 5.497 -> 5.497 ( +0.00%) [ +0.00% +0.18% +0.33% / +0.05% +0.00% +0.04%] index_select strided 5 : Elapsed 0.055 ms (5.497 ms / 100) 5.482 -> 5.475 ( -0.13%) [ +0.00% +0.05% +0.11% / +0.11% +0.15% -0.13%] index_select strided 7 : Elapsed 0.055 ms (5.482 ms / 100) 5.541 -> 5.509 ( -0.58%) [ +0.09% +0.00% +0.07% / +0.14% -0.56% -0.58%] index_select strided 8 : Elapsed 0.055 ms (5.546 ms / 100) 5.506 -> 5.512 ( +0.11%) [ +0.07% +0.04% +0.00% / +0.11% +1.74% +1.67%] index_select strided 16 : Elapsed 0.055 ms (5.510 ms / 100) 5.543 -> 5.545 ( +0.04%) [ +0.07% +0.09% +0.00% / +0.04% +0.81% +0.69%] index_select strided 64 : Elapsed 0.055 ms (5.547 ms / 100) 5.450 -> 5.459 ( +0.17%) [ +0.15% +0.00% +0.17% / +0.17% +0.72% +0.84%] index_select strided 100 : Elapsed 0.055 ms (5.458 ms / 100) 5.534 -> 5.534 ( +0.00%) [ +0.11% +0.04% +0.00% / +0.07% +0.00% +0.07%] index_select random : Elapsed 0.055 ms (5.540 ms / 100) 5.474 -> 5.473 ( -0.02%) [ +0.07% +0.00% +0.04% / +0.04% +0.00% -0.02%] index_select random_sorted : Elapsed 0.055 ms (5.478 ms / 100) 5.511 -> 5.510 ( -0.02%) [ +0.00% +0.09% +0.20% / +0.25% -0.02% +0.09%] index_select perm : Elapsed 0.055 ms (5.511 ms / 100) 5.526 -> 5.534 ( +0.14%) [ +0.14% +0.00% +0.07% / +0.31% +0.14% +0.22%] index_select perm_sorted : Elapsed 0.055 ms (5.534 ms / 100) B = [15, 50, 250] (stride (1, 15, 750)) A = [150, 50, 250] (stride (50, 1, 7500)) dim = 0 5.501 -> 5.476 ( -0.45%) [ +0.00% +0.15% +0.20% / +0.11% -0.45% -0.27%] index_select const : Elapsed 0.055 ms (5.501 ms / 100) 6.084 -> 5.979 ( -1.73%) [ +0.23% +0.00% +0.33% / +0.02% -1.55% -1.73%] index_select wrap : Elapsed 0.061 ms (6.098 ms / 100) 6.091 -> 5.973 ( -1.94%) [ +0.03% +0.00% +0.02% / +0.07% -1.94% -1.84%] index_select linear : Elapsed 0.061 ms (6.093 ms / 100) 6.145 -> 6.034 ( -1.81%) [ +0.00% +0.07% +0.05% / +0.02% -1.77% -1.81%] index_select reverse : Elapsed 0.061 ms (6.145 ms / 100) 5.499 -> 5.479 ( -0.36%) [ +0.00% +0.16% +0.11% / +0.27% -0.27% -0.36%] index_select skip64 : Elapsed 0.055 ms (5.499 ms / 100) 5.661 -> 5.666 ( +0.09%) [ +0.34% +0.00% +0.44% / +0.09% +0.28% +0.32%] index_select skip256 : Elapsed 0.057 ms (5.680 ms / 100) 6.291 -> 6.114 ( -2.81%) [ +0.29% +0.00% +0.38% / +0.33% -2.81% -2.73%] index_select spread : Elapsed 0.063 ms (6.309 ms / 100) 6.178 -> 6.064 ( -1.85%) [ +0.18% +0.00% +0.26% / +0.21% -1.70% -1.85%] index_select strided 3 : Elapsed 0.062 ms (6.189 ms / 100) 6.172 -> 6.051 ( -1.96%) [ +0.29% +0.00% +0.28% / +0.31% -1.96% -1.83%] index_select strided 5 : Elapsed 0.062 ms (6.190 ms / 100) 6.219 -> 6.089 ( -2.09%) [ +0.29% +0.00% +0.55% / +0.50% -2.07% -2.09%] index_select strided 7 : Elapsed 0.062 ms (6.237 ms / 100) 6.299 -> 6.098 ( -3.19%) [ +0.02% +0.00% +0.00% / +0.03% -3.00% -3.19%] index_select strided 8 : Elapsed 0.063 ms (6.300 ms / 100) 6.261 -> 6.153 ( -1.72%) [ +0.00% +0.13% +0.05% / +0.22% -1.72% -1.65%] index_select strided 16 : Elapsed 0.063 ms (6.261 ms / 100) 6.239 -> 6.098 ( -2.26%) [ +0.11% +0.00% +0.22% / +0.27% -2.18% -2.26%] index_select strided 64 : Elapsed 0.062 ms (6.246 ms / 100) 5.799 -> 5.800 ( +0.02%) [ +0.00% +0.26% +0.16% / +0.02% +1.09% +1.03%] index_select strided 100 : Elapsed 0.058 ms (5.799 ms / 100) 6.197 -> 6.098 ( -1.60%) [ +0.00% +0.10% +0.31% / +0.32% -1.60% -1.60%] index_select random : Elapsed 0.062 ms (6.197 ms / 100) 6.191 -> 6.073 ( -1.91%) [ +0.00% +0.15% +0.40% / +0.26% -1.62% -1.91%] index_select random_sorted : Elapsed 0.062 ms (6.191 ms / 100) 6.237 -> 6.075 ( -2.60%) [ +0.00% +0.13% +0.40% / +0.55% -2.60% -2.47%] index_select perm : Elapsed 0.062 ms (6.237 ms / 100) 6.127 -> 6.030 ( -1.58%) [ +0.08% +0.00% +0.28% / +0.47% -1.55% -1.58%] index_select perm_sorted : Elapsed 0.061 ms (6.132 ms / 100) out_shape = [150, 15, 250] in_shape = [150, 50, 250] idx_dim = 1 B = [150, 15, 250] (stride (3750, 250, 1)) A = [150, 50, 250] (stride (12500, 1, 50)) dim = 1 25.047 -> 25.092 ( +0.18%) [ +0.43% +0.00% +0.16% / +0.22% +0.22% +0.18%] index_select const : Elapsed 0.252 ms (25.154 ms / 100) 25.251 -> 25.245 ( -0.02%) [ +0.11% +0.16% +0.00% / +0.21% -0.02% +0.08%] index_select wrap : Elapsed 0.253 ms (25.280 ms / 100) 25.227 -> 25.186 ( -0.16%) [ +0.01% +0.20% +0.00% / +0.30% +0.26% -0.16%] index_select linear : Elapsed 0.252 ms (25.229 ms / 100) 25.187 -> 25.199 ( +0.05%) [ +0.00% +0.41% +0.15% / +0.45% +0.05% +0.26%] index_select reverse : Elapsed 0.252 ms (25.187 ms / 100) 25.090 -> 25.024 ( -0.26%) [ +0.31% +0.00% +0.09% / -0.07% -0.26% -0.05%] index_select skip64 : Elapsed 0.252 ms (25.167 ms / 100) 25.281 -> 25.043 ( -0.94%) [ +0.08% +0.00% +0.11% / +0.14% -0.78% -0.94%] index_select skip256 : Elapsed 0.253 ms (25.302 ms / 100) 25.408 -> 25.259 ( -0.59%) [ +0.45% +0.00% +0.26% / +0.17% -0.37% -0.59%] index_select spread : Elapsed 0.255 ms (25.522 ms / 100) 25.260 -> 25.221 ( -0.15%) [ +0.19% +0.00% +0.13% / -0.15% +0.44% +0.39%] index_select strided 3 : Elapsed 0.253 ms (25.307 ms / 100) 25.232 -> 25.204 ( -0.11%) [ +0.00% +0.10% +0.06% / -0.02% -0.11% -0.10%] index_select strided 5 : Elapsed 0.252 ms (25.232 ms / 100) 25.249 -> 25.234 ( -0.06%) [ +0.21% +0.23% +0.00% / +0.17% -0.06% +0.17%] index_select strided 7 : Elapsed 0.253 ms (25.302 ms / 100) 25.202 -> 25.258 ( +0.22%) [ +0.35% +0.02% +0.00% / +0.22% +0.44% +0.57%] index_select strided 8 : Elapsed 0.253 ms (25.289 ms / 100) 25.226 -> 25.166 ( -0.24%) [ +0.22% +0.09% +0.00% / -0.13% -0.24% -0.04%] index_select strided 16 : Elapsed 0.253 ms (25.282 ms / 100) 25.264 -> 25.138 ( -0.50%) [ +0.45% +0.17% +0.00% / +0.32% -0.50% +0.05%] index_select random : Elapsed 0.254 ms (25.377 ms / 100) 25.167 -> 25.167 ( +0.00%) [ +0.42% +0.00% +0.18% / +0.00% +0.66% +0.35%] index_select random_sorted : Elapsed 0.253 ms (25.272 ms / 100) 25.180 -> 25.158 ( -0.09%) [ +0.00% +0.02% +0.27% / -0.00% +0.05% -0.09%] index_select perm : Elapsed 0.252 ms (25.180 ms / 100) 25.351 -> 25.156 ( -0.77%) [ +0.21% +0.00% +0.16% / +0.14% -0.52% -0.77%] index_select perm_sorted : Elapsed 0.254 ms (25.403 ms / 100) B = [150, 15, 250] (stride (3750, 250, 1)) A = [150, 50, 250] (stride (1, 37500, 150)) dim = 1 8.565 -> 8.462 ( -1.20%) [ +0.07% +0.06% +0.00% / +0.13% -0.91% -1.20%] index_select const : Elapsed 0.086 ms (8.571 ms / 100) 9.994 -> 9.898 ( -0.96%) [ +0.02% +0.13% +0.00% / +0.03% -0.96% -0.84%] index_select wrap : Elapsed 0.100 ms (9.996 ms / 100) 9.989 -> 9.895 ( -0.94%) [ +0.07% +0.00% +0.14% / -0.01% -0.92% -0.94%] index_select linear : Elapsed 0.100 ms (9.996 ms / 100) 9.994 -> 9.906 ( -0.88%) [ +0.00% +0.12% +0.10% / +0.13% -0.88% -0.71%] index_select reverse : Elapsed 0.100 ms (9.994 ms / 100) 8.528 -> 8.398 ( -1.52%) [ +0.00% +0.08% +0.23% / +0.35% -1.52% -1.49%] index_select skip64 : Elapsed 0.085 ms (8.528 ms / 100) 8.597 -> 8.470 ( -1.48%) [ +0.16% +0.00% +0.24% / -0.23% -1.23% -1.48%] index_select skip256 : Elapsed 0.086 ms (8.611 ms / 100) 10.064 -> 9.942 ( -1.21%) [ +0.04% +0.02% +0.00% / -0.05% -1.21% -1.08%] index_select spread : Elapsed 0.101 ms (10.068 ms / 100) 10.058 -> 9.953 ( -1.04%) [ +0.09% +0.02% +0.00% / -0.03% -0.98% -1.04%] index_select strided 3 : Elapsed 0.101 ms (10.067 ms / 100) 10.089 -> 9.982 ( -1.06%) [ +0.06% +0.07% +0.00% / +0.13% -1.06% -1.05%] index_select strided 5 : Elapsed 0.101 ms (10.095 ms / 100) 9.998 -> 9.908 ( -0.90%) [ +0.00% +0.06% +0.04% / -0.03% -0.90% -0.88%] index_select strided 7 : Elapsed 0.100 ms (9.998 ms / 100) 10.032 -> 9.974 ( -0.58%) [ +0.31% +0.00% +0.23% / +0.30% -0.58% -0.41%] index_select strided 8 : Elapsed 0.101 ms (10.063 ms / 100) 10.107 -> 9.902 ( -2.03%) [ +0.00% +0.14% +0.14% / -0.22% -1.91% -2.03%] index_select strided 16 : Elapsed 0.101 ms (10.107 ms / 100) 10.052 -> 9.995 ( -0.57%) [ +0.00% +0.07% +0.11% / +0.14% -0.57% -0.57%] index_select random : Elapsed 0.101 ms (10.052 ms / 100) 9.812 -> 9.753 ( -0.60%) [ +0.42% +0.38% +0.00% / +0.66% -0.56% -0.60%] index_select random_sorted : Elapsed 0.099 ms (9.853 ms / 100) 10.077 -> 9.901 ( -1.75%) [ +0.13% +0.00% +0.17% / +0.18% -1.67% -1.75%] index_select perm : Elapsed 0.101 ms (10.090 ms / 100) 10.108 -> 9.907 ( -1.99%) [ +0.03% +0.00% +0.17% / +0.05% -1.99% -1.74%] index_select perm_sorted : Elapsed 0.101 ms (10.111 ms / 100) B = [150, 15, 250] (stride (250, 37500, 1)) dim = 1 fill_cnt = 50 8.735 -> 8.738 ( +0.03%) [ +0.29% +0.11% +0.00% / +0.03% +0.48% +0.37%] index_fill_ const : Elapsed 0.088 ms (8.760 ms / 100) 8.873 -> 8.868 ( -0.06%) [ +0.24% +0.00% +0.12% / -0.06% +0.25% +0.29%] index_fill_ linear : Elapsed 0.089 ms (8.894 ms / 100) 8.874 -> 8.856 ( -0.20%) [ +0.05% +0.00% +0.09% / -0.20% +0.17% +0.26%] index_fill_ reverse : Elapsed 0.089 ms (8.878 ms / 100) 8.748 -> 8.730 ( -0.21%) [ +0.27% +0.00% +0.05% / -0.21% +0.33% +0.29%] index_fill_ skip64 : Elapsed 0.088 ms (8.772 ms / 100) 8.759 -> 8.736 ( -0.26%) [ +0.05% +0.00% +0.10% / -0.26% +0.15% +0.06%] index_fill_ skip256 : Elapsed 0.088 ms (8.763 ms / 100) 8.856 -> 8.847 ( -0.10%) [ +0.28% +0.11% +0.00% / -0.10% +0.64% +0.35%] index_fill_ spread : Elapsed 0.089 ms (8.881 ms / 100) 8.780 -> 8.780 ( +0.00%) [ +0.22% +0.19% +0.00% / +0.00% +0.32% +0.27%] index_fill_ strided 3 : Elapsed 0.088 ms (8.799 ms / 100) 8.778 -> 8.762 ( -0.18%) [ +0.02% +0.02% +0.00% / -0.18% +0.16% +0.18%] index_fill_ strided 5 : Elapsed 0.088 ms (8.780 ms / 100) 9.025 -> 9.012 ( -0.14%) [ +0.12% +0.10% +0.00% / -0.14% +0.27% +0.27%] index_fill_ strided 7 : Elapsed 0.090 ms (9.036 ms / 100) 9.002 -> 9.004 ( +0.02%) [ +0.12% +0.03% +0.00% / +0.02% +0.50% +0.47%] index_fill_ strided 8 : Elapsed 0.090 ms (9.013 ms / 100) 8.963 -> 8.945 ( -0.20%) [ +0.00% +0.03% +0.11% / -0.20% +0.38% +0.45%] index_fill_ random : Elapsed 0.090 ms (8.963 ms / 100) 8.840 -> 8.833 ( -0.08%) [ +0.00% +0.08% +0.14% / -0.08% +0.36% +0.40%] index_fill_ random_sorted : Elapsed 0.088 ms (8.840 ms / 100) B = [150, 15, 250] (stride (250, 37500, 1)) A = [150, 50, 250] (stride (12500, 1, 50)) dim = 1 24.150 -> 24.085 ( -0.27%) [ +0.18% +0.00% +0.02% / -0.17% -0.27% -0.26%] index_select const : Elapsed 0.242 ms (24.194 ms / 100) 24.291 -> 24.225 ( -0.27%) [ +0.00% +0.07% +0.07% / -0.27% +0.21% +0.05%] index_select wrap : Elapsed 0.243 ms (24.291 ms / 100) 24.230 -> 24.278 ( +0.20%) [ +0.07% +0.00% +0.04% / +0.20% +0.72% +0.43%] index_select linear : Elapsed 0.242 ms (24.246 ms / 100) 24.282 -> 24.253 ( -0.12%) [ +0.16% +0.16% +0.00% / -0.12% +0.29% -0.01%] index_select reverse : Elapsed 0.243 ms (24.322 ms / 100) 24.100 -> 24.109 ( +0.04%) [ +0.00% +0.17% +0.13% / +0.23% +0.04% +0.06%] index_select skip64 : Elapsed 0.241 ms (24.100 ms / 100) 24.292 -> 24.112 ( -0.74%) [ +0.08% +0.00% +0.32% / +0.05% -0.74% -0.53%] index_select skip256 : Elapsed 0.243 ms (24.312 ms / 100) 24.405 -> 24.311 ( -0.39%) [ +0.16% +0.00% +0.36% / +0.03% -0.39% -0.39%] index_select spread : Elapsed 0.244 ms (24.445 ms / 100) 24.290 -> 24.290 ( +0.00%) [ +0.00% +0.04% +0.01% / +0.14% +0.09% +0.00%] index_select strided 3 : Elapsed 0.243 ms (24.290 ms / 100) 24.220 -> 24.240 ( +0.08%) [ +0.02% +0.00% +0.02% / +0.08% +0.48% +0.21%] index_select strided 5 : Elapsed 0.242 ms (24.226 ms / 100) 24.192 -> 24.246 ( +0.22%) [ +0.36% +0.00% +0.27% / +0.22% +0.76% +0.31%] index_select strided 7 : Elapsed 0.243 ms (24.278 ms / 100) 24.213 -> 24.200 ( -0.05%) [ +0.00% +0.34% +0.43% / -0.05% +0.40% +0.56%] index_select strided 8 : Elapsed 0.242 ms (24.213 ms / 100) 24.204 -> 24.198 ( -0.02%) [ +0.23% +0.55% +0.00% / +0.21% +0.40% -0.02%] index_select strided 16 : Elapsed 0.243 ms (24.259 ms / 100) 24.277 -> 24.350 ( +0.30%) [ +0.28% +0.00% +0.07% / +0.43% +0.35% +0.30%] index_select random : Elapsed 0.243 ms (24.344 ms / 100) 24.213 -> 24.316 ( +0.43%) [ +0.03% +0.22% +0.00% / +0.43% +0.73% +0.57%] index_select random_sorted : Elapsed 0.242 ms (24.221 ms / 100) 24.228 -> 24.218 ( -0.04%) [ +0.00% +0.07% +0.04% / -0.04% +0.28% +0.03%] index_select perm : Elapsed 0.242 ms (24.228 ms / 100) 24.346 -> 24.280 ( -0.27%) [ +0.05% +0.04% +0.00% / +0.20% +0.00% -0.27%] index_select perm_sorted : Elapsed 0.244 ms (24.357 ms / 100) B = [150, 15, 250] (stride (1, 150, 2250)) A = [150, 50, 250] (stride (12500, 250, 1)) dim = 1 10.406 -> 10.408 ( +0.02%) [ +0.00% +1.39% +0.39% / +0.02% +1.34% +2.34%] index_select const : Elapsed 0.104 ms (10.406 ms / 100) 11.079 -> 11.186 ( +0.97%) [ +0.60% +0.00% +0.18% / +0.97% +1.49% +2.10%] index_select wrap : Elapsed 0.111 ms (11.146 ms / 100) 11.130 -> 11.135 ( +0.04%) [ +0.00% +0.05% +0.18% / +0.04% +2.05% +1.87%] index_select linear : Elapsed 0.111 ms (11.130 ms / 100) 11.436 -> 11.170 ( -2.33%) [ +0.84% +0.00% +0.45% / +0.40% -2.33% -1.85%] index_select reverse : Elapsed 0.115 ms (11.532 ms / 100) 10.366 -> 10.434 ( +0.66%) [ +0.00% +1.26% +1.71% / +0.66% +2.86% +2.55%] index_select skip64 : Elapsed 0.104 ms (10.366 ms / 100) 10.389 -> 10.386 ( -0.03%) [ +0.00% +1.12% +0.74% / -0.03% +2.75% +2.32%] index_select skip256 : Elapsed 0.104 ms (10.389 ms / 100) 11.079 -> 11.045 ( -0.31%) [ +0.29% +0.06% +0.00% / -0.31% +0.61% +0.48%] index_select spread : Elapsed 0.111 ms (11.111 ms / 100) 11.194 -> 11.098 ( -0.86%) [ +0.22% +0.00% +0.54% / -0.15% -0.69% -0.86%] index_select strided 3 : Elapsed 0.112 ms (11.219 ms / 100) 11.021 -> 11.013 ( -0.07%) [ +0.27% +0.23% +0.00% / +0.24% -0.04% -0.07%] index_select strided 5 : Elapsed 0.111 ms (11.051 ms / 100) 11.052 -> 10.973 ( -0.71%) [ +0.00% +0.53% +0.79% / +0.46% +0.01% -0.71%] index_select strided 7 : Elapsed 0.111 ms (11.052 ms / 100) 10.953 -> 10.965 ( +0.11%) [ +0.31% +0.41% +0.00% / +0.11% +1.75% +1.63%] index_select strided 8 : Elapsed 0.110 ms (10.987 ms / 100) 11.486 -> 11.464 ( -0.19%) [ +0.00% +0.46% +0.07% / +0.84% -0.19% +0.14%] index_select strided 16 : Elapsed 0.115 ms (11.486 ms / 100) 10.928 -> 10.979 ( +0.47%) [ +0.51% +0.52% +0.00% / +0.47% +3.12% +3.72%] index_select random : Elapsed 0.110 ms (10.984 ms / 100) 11.013 -> 11.035 ( +0.20%) [ +0.62% +0.00% +0.25% / +0.20% +1.16% +0.90%] index_select random_sorted : Elapsed 0.111 ms (11.081 ms / 100) 11.386 -> 10.984 ( -3.53%) [ +0.68% +0.37% +0.00% / +0.32% -3.28% -3.53%] index_select perm : Elapsed 0.115 ms (11.463 ms / 100) 11.080 -> 11.079 ( -0.01%) [ +0.00% +0.31% +0.51% / +0.47% -0.01% +0.06%] index_select perm_sorted : Elapsed 0.111 ms (11.080 ms / 100) B = [150, 15, 250] (stride (1, 150, 2250)) A = [150, 50, 250] (stride (250, 37500, 1)) dim = 1 9.501 -> 9.505 ( +0.04%) [ +0.26% +0.05% +0.00% / +0.04% +0.24% +0.12%] index_select const : Elapsed 0.095 ms (9.526 ms / 100) 10.425 -> 10.405 ( -0.19%) [ +0.20% +0.00% +0.07% / -0.19% +1.09% +0.60%] index_select wrap : Elapsed 0.104 ms (10.446 ms / 100) 10.415 -> 10.437 ( +0.21%) [ +0.00% +0.19% +0.02% / +0.21% +1.15% +1.39%] index_select linear : Elapsed 0.104 ms (10.415 ms / 100) 10.529 -> 10.583 ( +0.51%) [ +0.27% +0.00% +0.08% / +0.51% +1.01% +0.72%] index_select reverse : Elapsed 0.106 ms (10.557 ms / 100) 9.467 -> 9.484 ( +0.18%) [ +0.06% +0.14% +0.00% / +0.18% +0.76% +0.78%] index_select skip64 : Elapsed 0.095 ms (9.473 ms / 100) 9.491 -> 9.502 ( +0.12%) [ +0.00% +0.20% +0.26% / +0.12% +0.21% +0.15%] index_select skip256 : Elapsed 0.095 ms (9.491 ms / 100) 10.375 -> 10.383 ( +0.08%) [ +0.00% +0.09% +0.19% / +0.08% +1.15% +2.00%] index_select spread : Elapsed 0.104 ms (10.375 ms / 100) 10.386 -> 10.403 ( +0.16%) [ +0.00% +0.39% +0.64% / +0.16% +0.87% +0.73%] index_select strided 3 : Elapsed 0.104 ms (10.386 ms / 100) 10.279 -> 10.334 ( +0.54%) [ +0.00% +0.76% +0.84% / +0.54% +1.49% +2.09%] index_select strided 5 : Elapsed 0.103 ms (10.279 ms / 100) 10.560 -> 10.558 ( -0.02%) [ +0.00% +0.02% +0.37% / -0.02% +0.68% +0.98%] index_select strided 7 : Elapsed 0.106 ms (10.560 ms / 100) 10.138 -> 10.175 ( +0.36%) [ +0.00% +0.24% +0.12% / +0.36% +0.95% +0.57%] index_select strided 8 : Elapsed 0.101 ms (10.138 ms / 100) 10.271 -> 10.311 ( +0.39%) [ +0.00% +0.20% +0.07% / +0.39% +0.65% +0.50%] index_select strided 16 : Elapsed 0.103 ms (10.271 ms / 100) 10.343 -> 10.283 ( -0.58%) [ +0.00% +0.23% +0.57% / +0.21% -0.58% -0.08%] index_select random : Elapsed 0.103 ms (10.343 ms / 100) 10.318 -> 10.275 ( -0.42%) [ +0.00% +0.39% +0.40% / +0.75% -0.42% +0.00%] index_select random_sorted : Elapsed 0.103 ms (10.318 ms / 100) 10.398 -> 10.437 ( +0.38%) [ +0.00% +0.25% +0.58% / +0.38% +1.44% +1.02%] index_select perm : Elapsed 0.104 ms (10.398 ms / 100) 10.346 -> 10.341 ( -0.05%) [ +0.00% +0.29% +0.14% / -0.05% +0.46% +0.23%] index_select perm_sorted : Elapsed 0.103 ms (10.346 ms / 100) B = [150, 15, 250] (stride (1, 150, 2250)) A = [150, 50, 250] (stride (50, 1, 7500)) dim = 1 50.791 -> 50.577 ( -0.42%) [ +0.00% +0.15% +0.26% / +0.16% -0.42% -0.27%] index_select const : Elapsed 0.508 ms (50.791 ms / 100) 50.853 -> 50.657 ( -0.39%) [ +0.00% +0.00% +0.07% / +0.09% -0.39% -0.34%] index_select wrap : Elapsed 0.509 ms (50.855 ms / 100) 50.800 -> 50.702 ( -0.19%) [ +0.13% +0.06% +0.00% / +0.03% -0.19% -0.03%] index_select linear : Elapsed 0.509 ms (50.868 ms / 100) 50.679 -> 50.720 ( +0.08%) [ +0.30% +0.28% +0.00% / +0.49% +0.08% +0.20%] index_select reverse : Elapsed 0.508 ms (50.830 ms / 100) 50.793 -> 50.647 ( -0.29%) [ +0.21% +0.00% +0.13% / +0.69% -0.29% -0.17%] index_select skip64 : Elapsed 0.509 ms (50.901 ms / 100) 50.595 -> 50.601 ( +0.01%) [ +0.00% +0.44% +0.25% / +0.25% +0.01% +0.25%] index_select skip256 : Elapsed 0.506 ms (50.595 ms / 100) 50.711 -> 50.622 ( -0.18%) [ +0.21% +0.00% +0.63% / +0.16% -0.15% -0.18%] index_select spread : Elapsed 0.508 ms (50.820 ms / 100) 50.745 -> 50.670 ( -0.15%) [ +0.00% +0.11% +0.33% / -0.15% +0.03% -0.10%] index_select strided 3 : Elapsed 0.507 ms (50.745 ms / 100) 50.726 -> 50.455 ( -0.53%) [ +0.47% +0.00% +0.20% / +0.25% -0.53% -0.04%] index_select strided 5 : Elapsed 0.510 ms (50.962 ms / 100) 50.901 -> 50.612 ( -0.57%) [ +0.05% +0.07% +0.00% / +0.27% -0.57% -0.45%] index_select strided 7 : Elapsed 0.509 ms (50.926 ms / 100) 50.858 -> 50.605 ( -0.50%) [ +0.02% +0.17% +0.00% / +0.13% -0.47% -0.50%] index_select strided 8 : Elapsed 0.509 ms (50.870 ms / 100) 50.748 -> 50.548 ( -0.39%) [ +0.28% +0.00% +0.14% / -0.03% -0.39% -0.30%] index_select strided 16 : Elapsed 0.509 ms (50.889 ms / 100) 50.894 -> 50.498 ( -0.78%) [ +0.16% +0.00% +0.28% / +0.40% -0.78% -0.78%] index_select random : Elapsed 0.510 ms (50.976 ms / 100) 50.749 -> 50.598 ( -0.30%) [ +0.24% +0.00% +0.16% / +0.45% -0.30% +0.00%] index_select random_sorted : Elapsed 0.509 ms (50.871 ms / 100) 50.802 -> 50.600 ( -0.40%) [ +0.56% +0.00% +0.22% / +0.10% -0.23% -0.40%] index_select perm : Elapsed 0.511 ms (51.084 ms / 100) 50.824 -> 50.577 ( -0.49%) [ +0.09% +0.15% +0.00% / +0.21% -0.27% -0.49%] index_select perm_sorted : Elapsed 0.509 ms (50.870 ms / 100) out_shape = [150, 50, 15] in_shape = [150, 50, 250] idx_dim = 2 B = [150, 50, 15] (stride (750, 15, 1)) A = [150, 50, 250] (stride (50, 1, 7500)) dim = 2 2.752 -> 2.756 ( +0.15%) [ +0.00% +0.04% +0.00% / +0.15% +0.51% +0.76%] index_select const : Elapsed 0.028 ms (2.752 ms / 100) 3.145 -> 3.142 ( -0.10%) [ +0.25% +0.10% +0.00% / -0.10% +1.02% +0.16%] index_select wrap : Elapsed 0.032 ms (3.153 ms / 100) 3.141 -> 3.140 ( -0.03%) [ +0.00% +0.06% +0.06% / -0.03% +0.29% +0.22%] index_select linear : Elapsed 0.031 ms (3.141 ms / 100) 3.139 -> 3.145 ( +0.19%) [ +0.00% +0.29% +0.19% / +0.19% +0.38% +0.51%] index_select reverse : Elapsed 0.031 ms (3.139 ms / 100) 2.752 -> 2.755 ( +0.11%) [ +0.29% +0.04% +0.00% / +0.22% +0.18% +0.11%] index_select skip64 : Elapsed 0.028 ms (2.760 ms / 100) 2.750 -> 2.755 ( +0.18%) [ +0.07% +0.33% +0.00% / +0.40% +0.22% +0.18%] index_select skip256 : Elapsed 0.028 ms (2.752 ms / 100) 3.145 -> 3.150 ( +0.16%) [ +0.00% +0.22% +0.06% / +0.16% +0.38% +0.38%] index_select spread : Elapsed 0.031 ms (3.145 ms / 100) 3.146 -> 3.145 ( -0.03%) [ +0.00% +0.10% +0.06% / -0.03% +0.22% +0.41%] index_select strided 3 : Elapsed 0.031 ms (3.146 ms / 100) 3.145 -> 3.144 ( -0.03%) [ +0.00% +0.00% +0.19% / -0.03% +0.38% +0.29%] index_select strided 5 : Elapsed 0.031 ms (3.145 ms / 100) 3.146 -> 3.147 ( +0.03%) [ +0.16% +0.00% +0.22% / +0.03% +0.16% +0.25%] index_select strided 7 : Elapsed 0.032 ms (3.151 ms / 100) 3.138 -> 3.143 ( +0.16%) [ +0.16% +0.00% +0.10% / +0.16% +0.54% +0.41%] index_select strided 8 : Elapsed 0.031 ms (3.143 ms / 100) 3.134 -> 3.134 ( +0.00%) [ +0.13% +0.00% +0.03% / +0.00% +0.54% +0.45%] index_select strided 16 : Elapsed 0.031 ms (3.138 ms / 100) 3.137 -> 3.136 ( -0.03%) [ +0.10% +0.00% +0.29% / -0.03% +0.45% +0.51%] index_select strided 64 : Elapsed 0.031 ms (3.140 ms / 100) 2.890 -> 2.897 ( +0.24%) [ +0.10% +0.00% +0.17% / +0.24% +0.73% +0.73%] index_select strided 100 : Elapsed 0.029 ms (2.893 ms / 100) 3.109 -> 3.117 ( +0.26%) [ +0.00% +0.32% +0.06% / +0.26% +0.48% +0.32%] index_select random : Elapsed 0.031 ms (3.109 ms / 100) 3.087 -> 3.096 ( +0.29%) [ +0.26% +0.19% +0.00% / +0.29% +1.07% +0.71%] index_select random_sorted : Elapsed 0.031 ms (3.095 ms / 100) 3.150 -> 3.156 ( +0.19%) [ +0.00% +0.16% +0.00% / +0.19% +0.29% +0.35%] index_select perm : Elapsed 0.031 ms (3.150 ms / 100) 3.157 -> 3.158 ( +0.03%) [ +0.32% +0.00% +0.13% / +0.03% +0.32% +0.54%] index_select perm_sorted : Elapsed 0.032 ms (3.167 ms / 100) B = [150, 50, 15] (stride (50, 1, 7500)) dim = 2 fill_cnt = 250 7.312 -> 7.305 ( -0.10%) [ +0.08% +0.08% +0.00% / -0.10% +0.40% +0.40%] index_fill_ const : Elapsed 0.073 ms (7.318 ms / 100) 7.355 -> 7.355 ( +0.00%) [ +0.11% +0.00% +0.03% / +0.00% +0.18% +0.30%] index_fill_ linear : Elapsed 0.074 ms (7.363 ms / 100) 7.343 -> 7.349 ( +0.08%) [ +0.26% +0.00% +0.16% / +0.08% +0.49% +0.49%] index_fill_ reverse : Elapsed 0.074 ms (7.362 ms / 100) 7.310 -> 7.310 ( +0.00%) [ +0.00% +0.03% +0.01% / +0.00% +0.53% +0.44%] index_fill_ skip64 : Elapsed 0.073 ms (7.310 ms / 100) 7.307 -> 7.323 ( +0.22%) [ +0.15% +0.11% +0.00% / +0.22% +0.47% +0.38%] index_fill_ skip256 : Elapsed 0.073 ms (7.318 ms / 100) 7.336 -> 7.340 ( +0.05%) [ +0.08% +0.00% +0.10% / +0.05% +0.64% +0.60%] index_fill_ spread : Elapsed 0.073 ms (7.342 ms / 100) 7.339 -> 7.332 ( -0.10%) [ +0.10% +0.00% +0.04% / -0.10% +0.34% +0.40%] index_fill_ strided 3 : Elapsed 0.073 ms (7.346 ms / 100) 7.341 -> 7.336 ( -0.07%) [ +0.00% +0.12% +0.08% / -0.07% +0.11% +0.11%] index_fill_ strided 5 : Elapsed 0.073 ms (7.341 ms / 100) 7.352 -> 7.357 ( +0.07%) [ +0.10% +0.12% +0.00% / +0.07% +0.53% +0.54%] index_fill_ strided 7 : Elapsed 0.074 ms (7.359 ms / 100) 7.354 -> 7.361 ( +0.10%) [ +0.15% +0.03% +0.00% / +0.10% +0.41% +0.38%] index_fill_ strided 8 : Elapsed 0.074 ms (7.365 ms / 100) 7.354 -> 7.355 ( +0.01%) [ +0.00% +0.11% +0.00% / +0.01% +0.37% +0.44%] index_fill_ random : Elapsed 0.074 ms (7.354 ms / 100) 7.332 -> 7.337 ( +0.07%) [ +0.16% +0.19% +0.00% / +0.07% +0.56% +0.61%] index_fill_ random_sorted : Elapsed 0.073 ms (7.344 ms / 100) out_shape = [15, 250, 50] in_shape = [150, 250, 50] idx_dim = 0 B = [15, 250, 50] (stride (12500, 50, 1)) A = [150, 250, 50] (stride (250, 1, 37500)) dim = 0 7.555 -> 7.540 ( -0.20%) [ +0.00% +0.05% +0.05% / -0.04% -0.20% -0.11%] index_select const : Elapsed 0.076 ms (7.555 ms / 100) 8.023 -> 7.999 ( -0.30%) [ +0.14% +0.00% +0.11% / +0.12% -0.24% -0.30%] index_select wrap : Elapsed 0.080 ms (8.034 ms / 100) 8.032 -> 7.996 ( -0.45%) [ +0.14% +0.00% +0.06% / -0.04% -0.24% -0.45%] index_select linear : Elapsed 0.080 ms (8.043 ms / 100) 8.023 -> 8.000 ( -0.29%) [ +0.05% +0.00% +0.21% / +0.06% -0.16% -0.29%] index_select reverse : Elapsed 0.080 ms (8.027 ms / 100) 7.540 -> 7.543 ( +0.04%) [ +0.11% +0.00% +0.07% / +0.04% +0.20% +0.16%] index_select skip64 : Elapsed 0.075 ms (7.548 ms / 100) 7.549 -> 7.547 ( -0.03%) [ +0.00% +0.01% +0.04% / -0.03% +0.04% +0.01%] index_select skip256 : Elapsed 0.075 ms (7.549 ms / 100) 7.998 -> 7.996 ( -0.03%) [ +0.06% +0.06% +0.00% / -0.03% +0.31% +0.28%] index_select spread : Elapsed 0.080 ms (8.003 ms / 100) 8.008 -> 7.987 ( -0.26%) [ +0.06% +0.07% +0.00% / +0.22% -0.15% -0.26%] index_select strided 3 : Elapsed 0.080 ms (8.013 ms / 100) 8.023 -> 8.027 ( +0.05%) [ +0.00% +0.11% +0.09% / +0.07% +0.07% +0.05%] index_select strided 5 : Elapsed 0.080 ms (8.023 ms / 100) 7.966 -> 7.965 ( -0.01%) [ +0.09% +0.00% +0.00% / -0.01% +0.74% +0.89%] index_select strided 7 : Elapsed 0.080 ms (7.973 ms / 100) 7.958 -> 7.964 ( +0.08%) [ +0.06% +0.03% +0.00% / +0.08% +0.83% +0.90%] index_select strided 8 : Elapsed 0.080 ms (7.963 ms / 100) 8.008 -> 8.003 ( -0.06%) [ +0.10% +0.00% +0.19% / +0.16% -0.06% -0.01%] index_select strided 16 : Elapsed 0.080 ms (8.016 ms / 100) 7.993 -> 8.000 ( +0.09%) [ +0.05% +0.00% +0.13% / +0.09% +0.31% +0.31%] index_select strided 64 : Elapsed 0.080 ms (7.997 ms / 100) 7.733 -> 7.690 ( -0.56%) [ +0.04% +0.01% +0.00% / +0.03% -0.43% -0.56%] index_select strided 100 : Elapsed 0.077 ms (7.736 ms / 100) 7.997 -> 7.944 ( -0.66%) [ +0.08% +0.05% +0.00% / +0.03% -0.66% -0.58%] index_select random : Elapsed 0.080 ms (8.003 ms / 100) 7.969 -> 7.980 ( +0.14%) [ +0.00% +0.16% +0.19% / +0.14% +0.14% +0.18%] index_select random_sorted : Elapsed 0.080 ms (7.969 ms / 100) 8.047 -> 8.010 ( -0.46%) [ +0.24% +0.00% +0.09% / +0.09% -0.46% -0.30%] index_select perm : Elapsed 0.081 ms (8.066 ms / 100) 8.045 -> 8.000 ( -0.56%) [ +0.19% +0.00% +0.17% / +0.14% -0.56% -0.52%] index_select perm_sorted : Elapsed 0.081 ms (8.060 ms / 100) B = [15, 250, 50] (stride (250, 1, 3750)) A = [150, 250, 50] (stride (12500, 50, 1)) dim = 0 7.422 -> 7.425 ( +0.04%) [ +0.00% +0.09% +0.00% / +0.04% +0.44% +0.34%] index_select const : Elapsed 0.074 ms (7.422 ms / 100) 7.977 -> 7.994 ( +0.21%) [ +0.11% +0.00% +0.19% / +0.28% +0.23% +0.21%] index_select wrap : Elapsed 0.080 ms (7.986 ms / 100) 7.979 -> 7.996 ( +0.21%) [ +0.19% +0.00% +0.14% / +0.21% +0.36% +0.24%] index_select linear : Elapsed 0.080 ms (7.994 ms / 100) 7.987 -> 7.988 ( +0.01%) [ +0.18% +0.16% +0.00% / +0.01% +0.26% +0.35%] index_select reverse : Elapsed 0.080 ms (8.001 ms / 100) 7.418 -> 7.422 ( +0.05%) [ +0.08% +0.05% +0.00% / +0.05% +0.53% +0.40%] index_select skip64 : Elapsed 0.074 ms (7.424 ms / 100) 7.415 -> 7.406 ( -0.12%) [ +0.09% +0.09% +0.00% / -0.12% +0.46% +0.47%] index_select skip256 : Elapsed 0.074 ms (7.422 ms / 100) 7.999 -> 8.001 ( +0.03%) [ +0.04% +0.03% +0.00% / +0.03% +0.11% +0.10%] index_select spread : Elapsed 0.080 ms (8.002 ms / 100) 8.002 -> 7.986 ( -0.20%) [ +0.06% +0.00% +0.09% / -0.05% -0.20% -0.04%] index_select strided 3 : Elapsed 0.080 ms (8.007 ms / 100) 8.001 -> 8.007 ( +0.07%) [ +0.09% +0.00% +0.20% / +0.11% +0.07% +0.25%] index_select strided 5 : Elapsed 0.080 ms (8.008 ms / 100) 7.996 -> 8.004 ( +0.10%) [ +0.13% +0.09% +0.00% / +0.10% +0.10% +0.15%] index_select strided 7 : Elapsed 0.080 ms (8.006 ms / 100) 7.956 -> 7.959 ( +0.04%) [ +0.09% +0.00% +0.15% / +0.04% +0.31% +0.18%] index_select strided 8 : Elapsed 0.080 ms (7.963 ms / 100) 8.002 -> 7.997 ( -0.06%) [ +0.02% +0.00% +0.06% / -0.06% +0.12% +0.21%] index_select strided 16 : Elapsed 0.080 ms (8.004 ms / 100) 8.009 -> 8.009 ( +0.00%) [ +0.01% +0.00% +0.10% / +0.06% +0.05% +0.00%] index_select strided 64 : Elapsed 0.080 ms (8.010 ms / 100) 7.537 -> 7.536 ( -0.01%) [ +0.20% +0.00% +0.13% / -0.01% +0.61% +0.65%] index_select strided 100 : Elapsed 0.076 ms (7.552 ms / 100) 7.964 -> 7.963 ( -0.01%) [ +0.14% +0.00% +0.03% / -0.01% +0.10% -0.01%] index_select random : Elapsed 0.080 ms (7.975 ms / 100) 7.942 -> 7.951 ( +0.11%) [ +0.16% +0.24% +0.00% / +0.11% +0.33% +0.33%] index_select random_sorted : Elapsed 0.080 ms (7.955 ms / 100) 8.011 -> 8.000 ( -0.14%) [ +0.14% +0.00% +0.01% / +0.00% -0.14% -0.01%] index_select perm : Elapsed 0.080 ms (8.022 ms / 100) 7.988 -> 7.990 ( +0.03%) [ +0.11% +0.00% +0.25% / +0.18% +0.03% +0.09%] index_select perm_sorted : Elapsed 0.080 ms (7.997 ms / 100) B = [15, 250, 50] (stride (1, 15, 3750)) dim = 0 fill_cnt = 150 GOOD 16.883 -> 10.897 (-35.46%) [ +0.03% +0.00% +0.02% / -34.54% -35.38% -35.46%] index_fill_ const : Elapsed 0.169 ms (16.888 ms / 100) GOOD 16.859 -> 11.816 (-29.91%) [ +0.07% +0.07% +0.00% / -29.44% -29.88% -29.91%] index_fill_ linear : Elapsed 0.169 ms (16.871 ms / 100) Good 16.866 -> 14.440 (-14.38%) [ +0.02% +0.02% +0.00% / -14.38% -13.22% -13.15%] index_fill_ reverse : Elapsed 0.169 ms (16.870 ms / 100) GOOD 16.912 -> 11.771 (-30.40%) [ +0.00% +0.10% +0.17% / -29.18% -30.39% -30.40%] index_fill_ skip64 : Elapsed 0.169 ms (16.912 ms / 100) GOOD 16.907 -> 11.146 (-34.07%) [ +0.11% +0.05% +0.00% / -34.07% -32.87% -32.87%] index_fill_ skip256 : Elapsed 0.169 ms (16.926 ms / 100) GOOD 16.976 -> 11.861 (-30.13%) [ +0.03% +0.00% +0.04% / -30.13% -29.85% -29.85%] index_fill_ spread : Elapsed 0.170 ms (16.981 ms / 100) GOOD 17.333 -> 11.691 (-32.55%) [ +0.00% +0.09% +0.03% / -32.55% -32.46% -32.48%] index_fill_ strided 3 : Elapsed 0.173 ms (17.333 ms / 100) GOOD 17.249 -> 11.938 (-30.79%) [ +0.03% +0.02% +0.00% / -30.79% -30.11% -30.06%] index_fill_ strided 5 : Elapsed 0.173 ms (17.254 ms / 100) GOOD 16.967 -> 11.204 (-33.97%) [ +0.00% +0.04% +0.08% / -33.97% -32.78% -32.70%] index_fill_ strided 7 : Elapsed 0.170 ms (16.967 ms / 100) GOOD 16.949 -> 11.655 (-31.23%) [ +0.05% +0.03% +0.00% / -31.23% -30.43% -30.43%] index_fill_ strided 8 : Elapsed 0.170 ms (16.957 ms / 100) GOOD 16.946 -> 11.191 (-33.96%) [ +0.12% +0.04% +0.00% / -33.96% -33.62% -33.61%] index_fill_ random : Elapsed 0.170 ms (16.967 ms / 100) GOOD 16.832 -> 11.585 (-31.17%) [ +0.03% +0.00% +0.07% / -31.17% -30.82% -30.81%] index_fill_ random_sorted : Elapsed 0.168 ms (16.837 ms / 100) out_shape = [150, 15, 50] in_shape = [150, 250, 50] idx_dim = 1 B = [150, 15, 50] (stride (750, 50, 1)) A = [150, 250, 50] (stride (50, 7500, 1)) dim = 1 2.711 -> 2.708 ( -0.11%) [ +0.00% +0.07% +0.07% / -0.11% +0.33% +0.52%] index_select const : Elapsed 0.027 ms (2.711 ms / 100) 3.127 -> 3.128 ( +0.03%) [ +0.00% +0.00% +0.00% / +0.03% +0.29% +0.38%] index_select wrap : Elapsed 0.031 ms (3.127 ms / 100) 3.121 -> 3.124 ( +0.10%) [ +0.00% +0.13% +0.19% / +0.10% +0.42% +0.51%] index_select linear : Elapsed 0.031 ms (3.121 ms / 100) 3.123 -> 3.127 ( +0.13%) [ +0.00% +0.16% +0.03% / +0.13% +0.54% +0.48%] index_select reverse : Elapsed 0.031 ms (3.123 ms / 100) 2.709 -> 2.716 ( +0.26%) [ +0.04% +0.11% +0.00% / +0.26% +0.52% +0.48%] index_select skip64 : Elapsed 0.027 ms (2.710 ms / 100) 2.711 -> 2.711 ( +0.00%) [ +0.07% +0.11% +0.00% / +0.00% +0.44% +0.22%] index_select skip256 : Elapsed 0.027 ms (2.713 ms / 100) 3.134 -> 3.128 ( -0.19%) [ +0.00% +0.03% +0.29% / -0.19% +0.45% +0.64%] index_select spread : Elapsed 0.031 ms (3.134 ms / 100) 3.131 -> 3.130 ( -0.03%) [ +0.32% +0.19% +0.00% / -0.03% +0.61% +0.51%] index_select strided 3 : Elapsed 0.031 ms (3.141 ms / 100) 3.127 -> 3.131 ( +0.13%) [ +0.00% +0.13% +0.00% / +0.13% +0.74% +0.67%] index_select strided 5 : Elapsed 0.031 ms (3.127 ms / 100) 3.144 -> 3.145 ( +0.03%) [ +0.19% +0.13% +0.00% / +0.10% +0.32% +0.03%] index_select strided 7 : Elapsed 0.031 ms (3.150 ms / 100) 3.109 -> 3.114 ( +0.16%) [ +0.03% +0.00% +0.03% / +0.16% +0.51% +0.42%] index_select strided 8 : Elapsed 0.031 ms (3.110 ms / 100) 3.105 -> 3.113 ( +0.26%) [ +0.19% +0.00% +0.19% / +0.26% +0.64% +0.58%] index_select strided 16 : Elapsed 0.031 ms (3.111 ms / 100) 3.130 -> 3.138 ( +0.26%) [ +0.10% +0.00% +0.19% / +0.26% +0.51% +0.45%] index_select strided 64 : Elapsed 0.031 ms (3.133 ms / 100) 2.828 -> 2.827 ( -0.04%) [ +0.11% +0.07% +0.00% / -0.04% +0.85% +0.88%] index_select strided 100 : Elapsed 0.028 ms (2.831 ms / 100) 3.122 -> 3.131 ( +0.29%) [ +0.19% +0.00% +0.26% / +0.29% +0.38% +0.35%] index_select random : Elapsed 0.031 ms (3.128 ms / 100) 3.121 -> 3.124 ( +0.10%) [ +0.00% +0.16% +0.29% / +0.10% +0.45% +0.38%] index_select random_sorted : Elapsed 0.031 ms (3.121 ms / 100) 3.140 -> 3.146 ( +0.19%) [ +0.25% +0.45% +0.00% / +0.29% +0.22% +0.19%] index_select perm : Elapsed 0.031 ms (3.148 ms / 100) 3.125 -> 3.129 ( +0.13%) [ +0.13% +0.10% +0.00% / +0.13% +0.61% +0.74%] index_select perm_sorted : Elapsed 0.031 ms (3.129 ms / 100) B = [150, 15, 50] (stride (750, 1, 15)) A = [150, 250, 50] (stride (12500, 50, 1)) dim = 1 2.897 -> 2.900 ( +0.10%) [ +0.28% +0.00% +0.10% / +0.10% +0.41% +0.31%] index_select const : Elapsed 0.029 ms (2.905 ms / 100) 3.203 -> 3.203 ( +0.00%) [ +0.09% +0.00% +0.16% / +0.00% +0.75% +0.62%] index_select wrap : Elapsed 0.032 ms (3.206 ms / 100) 3.203 -> 3.202 ( -0.03%) [ +0.00% +0.19% +0.19% / -0.03% +0.72% +0.72%] index_select linear : Elapsed 0.032 ms (3.203 ms / 100) 3.222 -> 3.225 ( +0.09%) [ +0.25% +0.12% +0.00% / +0.09% +0.37% +0.37%] index_select reverse : Elapsed 0.032 ms (3.230 ms / 100) 2.903 -> 2.900 ( -0.10%) [ +0.07% +0.03% +0.00% / +0.17% +0.14% -0.10%] index_select skip64 : Elapsed 0.029 ms (2.905 ms / 100) 2.902 -> 2.900 ( -0.07%) [ +0.21% +0.00% +0.14% / +0.34% +0.14% -0.07%] index_select skip256 : Elapsed 0.029 ms (2.908 ms / 100) 3.232 -> 3.230 ( -0.06%) [ +0.09% +0.09% +0.00% / +0.09% -0.03% -0.06%] index_select spread : Elapsed 0.032 ms (3.235 ms / 100) 3.278 -> 3.252 ( -0.79%) [ +0.00% +0.06% +0.00% / +0.06% -0.76% -0.79%] index_select strided 3 : Elapsed 0.033 ms (3.278 ms / 100) 3.246 -> 3.242 ( -0.12%) [ +0.00% +0.00% +0.12% / +0.00% -0.12% +0.03%] index_select strided 5 : Elapsed 0.032 ms (3.246 ms / 100) 3.237 -> 3.237 ( +0.00%) [ +0.22% +0.12% +0.00% / +0.22% +0.00% +0.22%] index_select strided 7 : Elapsed 0.032 ms (3.244 ms / 100) 3.243 -> 3.237 ( -0.19%) [ +0.03% +0.06% +0.00% / -0.19% -0.06% -0.06%] index_select strided 8 : Elapsed 0.032 ms (3.244 ms / 100) 3.235 -> 3.234 ( -0.03%) [ +0.00% +0.12% +0.19% / +0.34% -0.03% +0.06%] index_select strided 16 : Elapsed 0.032 ms (3.235 ms / 100) 3.226 -> 3.233 ( +0.22%) [ +0.00% +0.19% +0.34% / +0.22% +0.34% +0.40%] index_select strided 64 : Elapsed 0.032 ms (3.226 ms / 100) 3.015 -> 3.015 ( +0.00%) [ +0.00% +0.20% +0.00% / +0.17% +0.03% +0.00%] index_select strided 100 : Elapsed 0.030 ms (3.015 ms / 100) 3.241 -> 3.243 ( +0.06%) [ +0.28% +0.15% +0.00% / +0.25% +0.19% +0.06%] index_select random : Elapsed 0.032 ms (3.250 ms / 100) 3.229 -> 3.235 ( +0.19%) [ +0.28% +0.09% +0.00% / +0.19% +0.22% +0.22%] index_select random_sorted : Elapsed 0.032 ms (3.238 ms / 100) 3.225 -> 3.225 ( +0.00%) [ +0.09% +0.00% +0.06% / +0.00% +1.49% +1.27%] index_select perm : Elapsed 0.032 ms (3.228 ms / 100) 3.247 -> 3.249 ( +0.06%) [ +0.12% +0.03% +0.00% / +0.06% +0.28% +0.31%] index_select perm_sorted : Elapsed 0.033 ms (3.251 ms / 100) B = [150, 15, 50] (stride (750, 1, 15)) A = [150, 250, 50] (stride (1, 7500, 150)) dim = 1 3.084 -> 3.084 ( +0.00%) [ +0.00% +0.23% +0.03% / +0.00% +0.88% +1.04%] index_select const : Elapsed 0.031 ms (3.084 ms / 100) 3.249 -> 3.252 ( +0.09%) [ +0.00% +0.18% +0.12% / +0.09% +0.37% +0.62%] index_select wrap : Elapsed 0.032 ms (3.249 ms / 100) 3.255 -> 3.256 ( +0.03%) [ +0.12% +0.06% +0.00% / +0.03% +0.25% +0.09%] index_select linear : Elapsed 0.033 ms (3.259 ms / 100) 3.265 -> 3.268 ( +0.09%) [ +0.06% +0.00% +0.03% / +0.12% +0.15% +0.09%] index_select reverse : Elapsed 0.033 ms (3.267 ms / 100) 3.100 -> 3.098 ( -0.06%) [ +0.00% +0.13% +0.06% / -0.06% +0.23% +0.23%] index_select skip64 : Elapsed 0.031 ms (3.100 ms / 100) 3.097 -> 3.099 ( +0.06%) [ +0.00% +0.19% +0.13% / +0.13% +0.06% +0.26%] index_select skip256 : Elapsed 0.031 ms (3.097 ms / 100) 3.254 -> 3.259 ( +0.15%) [ +0.00% +0.12% +0.22% / +0.15% +0.28% +0.43%] index_select spread : Elapsed 0.033 ms (3.254 ms / 100) 3.263 -> 3.264 ( +0.03%) [ +0.37% +0.00% +0.21% / +0.09% +0.03% +0.12%] index_select strided 3 : Elapsed 0.033 ms (3.275 ms / 100) 3.248 -> 3.247 ( -0.03%) [ +0.00% +0.12% +0.06% / -0.03% +0.58% +0.92%] index_select strided 5 : Elapsed 0.032 ms (3.248 ms / 100) 3.259 -> 3.261 ( +0.06%) [ +0.34% +0.21% +0.00% / +0.06% +0.37% +0.28%] index_select strided 7 : Elapsed 0.033 ms (3.270 ms / 100) 3.263 -> 3.264 ( +0.03%) [ +0.03% +0.12% +0.00% / +0.03% +0.40% +0.46%] index_select strided 8 : Elapsed 0.033 ms (3.264 ms / 100) 3.256 -> 3.255 ( -0.03%) [ +0.03% +0.09% +0.00% / -0.03% +0.37% +0.37%] index_select strided 16 : Elapsed 0.033 ms (3.257 ms / 100) 3.257 -> 3.260 ( +0.09%) [ +0.18% +0.09% +0.00% / +0.09% +0.12% +0.61%] index_select strided 64 : Elapsed 0.033 ms (3.263 ms / 100) 3.179 -> 3.177 ( -0.06%) [ +0.00% +0.16% +0.09% / -0.06% -0.03% -0.03%] index_select strided 100 : Elapsed 0.032 ms (3.179 ms / 100) 3.273 -> 3.267 ( -0.18%) [ +0.24% +0.18% +0.00% / +0.03% +0.03% -0.18%] index_select random : Elapsed 0.033 ms (3.281 ms / 100) 3.271 -> 3.270 ( -0.03%) [ +0.00% +0.00% +0.28% / +0.18% +0.12% -0.03%] index_select random_sorted : Elapsed 0.033 ms (3.271 ms / 100) 3.269 -> 3.268 ( -0.03%) [ +0.09% +0.00% +0.34% / -0.03% +0.06% +0.21%] index_select perm : Elapsed 0.033 ms (3.272 ms / 100) 3.264 -> 3.268 ( +0.12%) [ +0.00% +0.12% +0.03% / +0.12% +0.31% +0.25%] index_select perm_sorted : Elapsed 0.033 ms (3.264 ms / 100) B = [150, 15, 50] (stride (750, 1, 15)) A = [150, 250, 50] (stride (1, 150, 37500)) dim = 1 3.176 -> 3.180 ( +0.13%) [ +0.31% +0.00% +0.09% / +0.13% +3.40% +3.40%] index_select const : Elapsed 0.032 ms (3.186 ms / 100) 3.418 -> 3.399 ( -0.56%) [ +0.15% +0.00% +0.00% / +0.03% -0.56% -0.32%] index_select wrap : Elapsed 0.034 ms (3.423 ms / 100) 3.426 -> 3.407 ( -0.55%) [ +0.03% +0.12% +0.00% / +0.03% -0.55% -0.41%] index_select linear : Elapsed 0.034 ms (3.427 ms / 100) 3.428 -> 3.429 ( +0.03%) [ +0.09% +0.23% +0.00% / +0.15% +0.15% +0.03%] index_select reverse : Elapsed 0.034 ms (3.431 ms / 100) 3.162 -> 3.164 ( +0.06%) [ +0.00% +0.13% +0.03% / +0.06% +4.02% +3.92%] index_select skip64 : Elapsed 0.032 ms (3.162 ms / 100) 3.165 -> 3.163 ( -0.06%) [ +0.16% +0.06% +0.00% / -0.06% +3.57% +3.76%] index_select skip256 : Elapsed 0.032 ms (3.170 ms / 100) 3.464 -> 3.432 ( -0.92%) [ +0.00% +0.23% +0.14% / +0.00% -0.92% -0.69%] index_select spread : Elapsed 0.035 ms (3.464 ms / 100) 3.456 -> 3.437 ( -0.55%) [ +0.20% +0.00% +0.17% / +0.17% -0.32% -0.55%] index_select strided 3 : Elapsed 0.035 ms (3.463 ms / 100) 3.513 -> 3.481 ( -0.91%) [ +0.09% +0.23% +0.00% / +0.17% -0.91% -0.88%] index_select strided 5 : Elapsed 0.035 ms (3.516 ms / 100) 3.477 -> 3.478 ( +0.03%) [ +0.00% +0.09% +0.00% / +0.03% +0.09% +0.14%] index_select strided 7 : Elapsed 0.035 ms (3.477 ms / 100) 3.456 -> 3.458 ( +0.06%) [ +0.00% +0.00% +0.06% / +0.06% +0.29% +0.29%] index_select strided 8 : Elapsed 0.035 ms (3.456 ms / 100) 3.468 -> 3.459 ( -0.26%) [ +0.06% +0.17% +0.00% / +0.26% -0.17% -0.26%] index_select strided 16 : Elapsed 0.035 ms (3.470 ms / 100) 3.466 -> 3.449 ( -0.49%) [ +0.00% +0.23% +0.09% / +0.06% -0.49% -0.40%] index_select strided 64 : Elapsed 0.035 ms (3.466 ms / 100) 3.358 -> 3.288 ( -2.08%) [ +0.00% +0.09% +0.15% / +0.09% -2.08% -2.05%] index_select strided 100 : Elapsed 0.034 ms (3.358 ms / 100) 3.512 -> 3.473 ( -1.11%) [ +0.09% +0.11% +0.00% / +0.23% -0.91% -1.11%] index_select random : Elapsed 0.035 ms (3.515 ms / 100) 3.557 -> 3.448 ( -3.06%) [ +0.08% +0.14% +0.00% / -0.03% -3.06% -2.95%] index_select random_sorted : Elapsed 0.036 ms (3.560 ms / 100) 3.505 -> 3.429 ( -2.17%) [ +0.00% +0.00% +0.03% / +0.09% -2.17% -2.00%] index_select perm : Elapsed 0.035 ms (3.505 ms / 100) 3.529 -> 3.437 ( -2.61%) [ +0.00% +0.06% +0.06% / +0.06% -2.35% -2.61%] index_select perm_sorted : Elapsed 0.035 ms (3.529 ms / 100) B = [150, 15, 50] (stride (1, 150, 2250)) A = [150, 250, 50] (stride (1, 7500, 150)) dim = 1 3.235 -> 3.242 ( +0.22%) [ +0.12% +0.00% +0.06% / +0.22% +1.39% +1.45%] index_select const : Elapsed 0.032 ms (3.239 ms / 100) 3.413 -> 3.424 ( +0.32%) [ +0.35% +0.00% +1.38% / +0.32% +0.97% +1.14%] index_select wrap : Elapsed 0.034 ms (3.425 ms / 100) 3.422 -> 3.422 ( +0.00%) [ +0.00% +0.00% +0.06% / +0.00% +0.41% +0.58%] index_select linear : Elapsed 0.034 ms (3.422 ms / 100) 3.426 -> 3.426 ( +0.00%) [ +0.38% +0.00% +0.18% / +0.00% +0.47% +0.38%] index_select reverse : Elapsed 0.034 ms (3.439 ms / 100) 3.247 -> 3.253 ( +0.18%) [ +0.09% +0.00% +0.15% / +0.18% +1.11% +0.99%] index_select skip64 : Elapsed 0.033 ms (3.250 ms / 100) 3.250 -> 3.254 ( +0.12%) [ +0.00% +0.03% +0.18% / +0.12% +0.98% +0.89%] index_select skip256 : Elapsed 0.032 ms (3.250 ms / 100) 3.415 -> 3.419 ( +0.12%) [ +0.03% +0.00% +0.15% / +0.12% +0.41% +0.50%] index_select spread : Elapsed 0.034 ms (3.416 ms / 100) 3.420 -> 3.416 ( -0.12%) [ +0.06% +0.00% +0.09% / +0.23% -0.12% -0.12%] index_select strided 3 : Elapsed 0.034 ms (3.422 ms / 100) 3.406 -> 3.406 ( +0.00%) [ +0.18% +0.06% +0.00% / +0.00% +0.76% +0.56%] index_select strided 5 : Elapsed 0.034 ms (3.412 ms / 100) 3.407 -> 3.416 ( +0.26%) [ +0.50% +0.06% +0.00% / +0.26% +0.59% +0.70%] index_select strided 7 : Elapsed 0.034 ms (3.424 ms / 100) 3.418 -> 3.420 ( +0.06%) [ +0.26% +0.15% +0.00% / +0.06% +0.44% +0.70%] index_select strided 8 : Elapsed 0.034 ms (3.427 ms / 100) 3.415 -> 3.422 ( +0.20%) [ +0.29% +0.09% +0.00% / +0.23% +0.20% +0.50%] index_select strided 16 : Elapsed 0.034 ms (3.425 ms / 100) 3.418 -> 3.422 ( +0.12%) [ +0.26% +0.23% +0.00% / +0.23% +0.12% +0.12%] index_select strided 64 : Elapsed 0.034 ms (3.427 ms / 100) 3.315 -> 3.313 ( -0.06%) [ +0.09% +0.12% +0.00% / -0.06% +0.30% +0.30%] index_select strided 100 : Elapsed 0.033 ms (3.318 ms / 100) 3.414 -> 3.436 ( +0.64%) [ +0.21% +0.03% +0.00% / +1.82% +0.64% +0.76%] index_select random : Elapsed 0.034 ms (3.421 ms / 100) 3.419 -> 3.421 ( +0.06%) [ +0.00% +0.09% +0.23% / +0.06% +0.15% +0.18%] index_select random_sorted : Elapsed 0.034 ms (3.419 ms / 100) 3.417 -> 3.416 ( -0.03%) [ +0.06% +0.00% +0.18% / -0.03% +0.26% +0.09%] index_select perm : Elapsed 0.034 ms (3.419 ms / 100) 3.411 -> 3.414 ( +0.09%) [ +0.03% +0.12% +0.00% / +0.29% +0.21% +0.09%] index_select perm_sorted : Elapsed 0.034 ms (3.412 ms / 100) out_shape = [150, 250, 15] in_shape = [150, 250, 50] idx_dim = 2 B = [150, 250, 15] (stride (3750, 15, 1)) A = [150, 250, 50] (stride (250, 1, 37500)) dim = 2 24.654 -> 24.690 ( +0.15%) [ +0.00% +0.33% +0.61% / +0.15% +1.28% +1.42%] index_select const : Elapsed 0.247 ms (24.654 ms / 100) 26.336 -> 26.414 ( +0.30%) [ +0.08% +0.00% +0.11% / +0.30% +1.03% +1.11%] index_select wrap : Elapsed 0.264 ms (26.356 ms / 100) 26.417 -> 26.447 ( +0.11%) [ +0.00% +0.08% +0.17% / +0.11% +0.77% +0.97%] index_select linear : Elapsed 0.264 ms (26.417 ms / 100) 26.463 -> 26.489 ( +0.10%) [ +0.20% +0.00% +0.17% / +0.10% +1.07% +0.83%] index_select reverse : Elapsed 0.265 ms (26.515 ms / 100) 24.718 -> 24.771 ( +0.21%) [ +0.06% +0.00% +0.00% / +0.21% +1.14% +1.08%] index_select skip64 : Elapsed 0.247 ms (24.733 ms / 100) 24.780 -> 24.740 ( -0.16%) [ +0.00% +0.04% +0.36% / -0.16% +0.75% +0.61%] index_select skip256 : Elapsed 0.248 ms (24.780 ms / 100) 26.335 -> 26.385 ( +0.19%) [ +0.00% +0.14% +0.20% / +0.19% +1.05% +0.73%] index_select spread : Elapsed 0.263 ms (26.335 ms / 100) 26.447 -> 26.427 ( -0.08%) [ +0.00% +0.03% +0.06% / -0.08% +0.73% +0.73%] index_select strided 3 : Elapsed 0.264 ms (26.447 ms / 100) 26.298 -> 26.301 ( +0.01%) [ +0.00% +0.00% +0.14% / +0.01% +0.93% +0.83%] index_select strided 5 : Elapsed 0.263 ms (26.298 ms / 100) 26.410 -> 26.515 ( +0.40%) [ +0.30% +0.00% +0.28% / +0.40% +1.30% +1.15%] index_select strided 7 : Elapsed 0.265 ms (26.490 ms / 100) 26.006 -> 26.080 ( +0.28%) [ +0.00% +0.25% +0.32% / +0.28% +1.15% +0.95%] index_select strided 8 : Elapsed 0.260 ms (26.006 ms / 100) 26.055 -> 26.047 ( -0.03%) [ +0.00% +0.03% +0.06% / -0.03% +0.82% +1.10%] index_select strided 16 : Elapsed 0.261 ms (26.055 ms / 100) 26.371 -> 26.410 ( +0.15%) [ +0.00% +0.06% +0.10% / +0.15% +0.84% +0.96%] index_select random : Elapsed 0.264 ms (26.371 ms / 100) 26.263 -> 26.290 ( +0.10%) [ +0.22% +0.00% +0.14% / +0.10% +1.16% +1.12%] index_select random_sorted : Elapsed 0.263 ms (26.320 ms / 100) 26.340 -> 26.375 ( +0.13%) [ +0.00% +0.12% +0.04% / +0.13% +0.89% +0.79%] index_select perm : Elapsed 0.263 ms (26.340 ms / 100) 26.272 -> 26.275 ( +0.01%) [ +0.00% +0.05% +0.10% / +0.01% +0.91% +0.91%] index_select perm_sorted : Elapsed 0.263 ms (26.272 ms / 100) B = [150, 250, 15] (stride (3750, 1, 250)) A = [150, 250, 50] (stride (50, 7500, 1)) dim = 2 49.829 -> 49.774 ( -0.11%) [ +0.02% +0.00% +0.00% / -0.07% -0.11% +0.05%] index_select const : Elapsed 0.498 ms (49.840 ms / 100) 49.877 -> 49.816 ( -0.12%) [ +0.00% +0.00% +0.07% / +0.09% -0.05% -0.12%] index_select wrap : Elapsed 0.499 ms (49.877 ms / 100) 49.880 -> 49.773 ( -0.21%) [ +0.03% +0.02% +0.00% / +0.04% -0.11% -0.21%] index_select linear : Elapsed 0.499 ms (49.895 ms / 100) 49.861 -> 49.850 ( -0.02%) [ +0.00% +0.09% +0.13% / -0.02% +0.09% +0.01%] index_select reverse : Elapsed 0.499 ms (49.861 ms / 100) 49.814 -> 49.785 ( -0.06%) [ +0.09% +0.07% +0.00% / +0.11% -0.06% +0.05%] index_select skip64 : Elapsed 0.499 ms (49.860 ms / 100) 49.834 -> 49.790 ( -0.09%) [ +0.00% +0.21% +0.17% / +0.05% -0.05% -0.09%] index_select skip256 : Elapsed 0.498 ms (49.834 ms / 100) 49.971 -> 49.783 ( -0.38%) [ +0.05% +0.09% +0.00% / +0.08% -0.25% -0.38%] index_select spread : Elapsed 0.500 ms (49.996 ms / 100) 49.864 -> 49.826 ( -0.08%) [ +0.09% +0.00% +0.07% / +0.03% -0.02% -0.08%] index_select strided 3 : Elapsed 0.499 ms (49.911 ms / 100) 49.822 -> 49.749 ( -0.15%) [ +0.15% +0.01% +0.00% / +0.05% -0.15% -0.03%] index_select strided 5 : Elapsed 0.499 ms (49.897 ms / 100) 49.993 -> 49.858 ( -0.27%) [ +0.09% +0.06% +0.00% / +0.01% -0.26% -0.27%] index_select strided 7 : Elapsed 0.500 ms (50.036 ms / 100) 49.922 -> 49.833 ( -0.18%) [ +0.00% +0.18% +0.16% / +0.14% -0.16% -0.18%] index_select strided 8 : Elapsed 0.499 ms (49.922 ms / 100) 49.984 -> 49.853 ( -0.26%) [ +0.00% +0.05% +0.06% / +0.03% -0.26% -0.26%] index_select strided 16 : Elapsed 0.500 ms (49.984 ms / 100) 49.935 -> 49.799 ( -0.27%) [ +0.04% +0.23% +0.00% / +0.11% -0.27% -0.20%] index_select random : Elapsed 0.500 ms (49.953 ms / 100) 49.780 -> 49.810 ( +0.06%) [ +0.06% +0.14% +0.00% / +0.06% +0.06% +0.17%] index_select random_sorted : Elapsed 0.498 ms (49.808 ms / 100) 49.908 -> 49.876 ( -0.06%) [ +0.00% +0.09% +0.02% / +0.10% -0.05% -0.06%] index_select perm : Elapsed 0.499 ms (49.908 ms / 100) 49.834 -> 49.843 ( +0.02%) [ +0.14% +0.23% +0.00% / +0.09% +0.02% +0.10%] index_select perm_sorted : Elapsed 0.499 ms (49.903 ms / 100) out_shape = [15, 50, 150] in_shape = [250, 50, 150] idx_dim = 0 B = [15, 50, 150] (stride (150, 2250, 1)) A = [250, 50, 150] (stride (1, 37500, 250)) dim = 0 7.298 -> 7.302 ( +0.05%) [ +0.01% +0.01% +0.00% / +0.05% +0.75% +0.70%] index_select const : Elapsed 0.073 ms (7.299 ms / 100) 7.413 -> 7.421 ( +0.11%) [ +0.08% +0.00% +0.13% / +0.11% +2.74% +2.82%] index_select wrap : Elapsed 0.074 ms (7.419 ms / 100) 7.418 -> 7.415 ( -0.04%) [ +0.03% +0.03% +0.00% / -0.04% +2.82% +2.80%] index_select linear : Elapsed 0.074 ms (7.420 ms / 100) 7.433 -> 7.449 ( +0.22%) [ +0.00% +0.04% +0.13% / +0.22% +2.58% +2.61%] index_select reverse : Elapsed 0.074 ms (7.433 ms / 100) 7.298 -> 7.311 ( +0.18%) [ +0.00% +0.04% +0.12% / +0.18% +0.63% +0.69%] index_select skip64 : Elapsed 0.073 ms (7.298 ms / 100) 7.292 -> 7.304 ( +0.16%) [ +0.00% +0.12% +0.10% / +0.16% +0.71% +0.63%] index_select skip256 : Elapsed 0.073 ms (7.292 ms / 100) 10.133 -> 10.143 ( +0.10%) [ +0.00% +0.00% +0.12% / +0.25% +0.10% +0.27%] index_select spread : Elapsed 0.101 ms (10.133 ms / 100) 8.114 -> 8.132 ( +0.22%) [ +0.22% +0.00% +0.16% / +0.22% +1.05% +0.99%] index_select strided 3 : Elapsed 0.081 ms (8.132 ms / 100) 8.927 -> 8.919 ( -0.09%) [ +0.01% +0.20% +0.00% / -0.07% -0.09% +0.25%] index_select strided 5 : Elapsed 0.089 ms (8.928 ms / 100) 9.717 -> 9.723 ( +0.06%) [ +0.04% +0.00% +0.04% / +0.06% +0.25% +0.38%] index_select strided 7 : Elapsed 0.097 ms (9.721 ms / 100) 10.068 -> 10.071 ( +0.03%) [ +0.00% +0.16% +0.12% / +0.03% +0.58% +0.59%] index_select strided 8 : Elapsed 0.101 ms (10.068 ms / 100) 10.126 -> 10.133 ( +0.07%) [ +0.05% +0.00% +0.03% / +0.07% +0.38% +0.17%] index_select strided 16 : Elapsed 0.101 ms (10.131 ms / 100) 10.283 -> 10.287 ( +0.04%) [ +0.00% +0.06% +0.02% / +0.18% +0.04% +0.04%] index_select strided 64 : Elapsed 0.103 ms (10.283 ms / 100) 10.296 -> 10.288 ( -0.08%) [ +0.00% +0.02% +0.07% / -0.05% -0.08% -0.01%] index_select strided 100 : Elapsed 0.103 ms (10.296 ms / 100) 10.251 -> 10.254 ( +0.03%) [ +0.03% +0.11% +0.00% / +0.03% +0.08% +0.14%] index_select random : Elapsed 0.103 ms (10.254 ms / 100) 9.611 -> 9.595 ( -0.17%) [ +0.05% +0.00% +0.15% / +0.23% -0.17% -0.15%] index_select random_sorted : Elapsed 0.096 ms (9.616 ms / 100) 10.112 -> 10.134 ( +0.22%) [ +0.00% +0.04% +0.23% / +0.26% +0.34% +0.22%] index_select perm : Elapsed 0.101 ms (10.112 ms / 100) 9.644 -> 9.651 ( +0.07%) [ +0.00% +0.02% +0.18% / +0.07% +0.20% +0.31%] index_select perm_sorted : Elapsed 0.096 ms (9.644 ms / 100) B = [15, 50, 150] (stride (1, 2250, 15)) A = [250, 50, 150] (stride (1, 250, 12500)) dim = 0 9.324 -> 9.365 ( +0.44%) [ +0.03% +0.00% +0.54% / +0.44% +4.30% +4.38%] index_select const : Elapsed 0.093 ms (9.327 ms / 100) 9.948 -> 10.053 ( +1.06%) [ +0.18% +0.00% +0.47% / +1.06% +2.31% +2.06%] index_select wrap : Elapsed 0.100 ms (9.966 ms / 100) 9.967 -> 9.994 ( +0.27%) [ +0.00% +0.08% +0.22% / +0.27% +1.86% +1.97%] index_select linear : Elapsed 0.100 ms (9.967 ms / 100) 9.943 -> 9.983 ( +0.40%) [ +0.00% +0.11% +0.42% / +0.40% +2.08% +2.02%] index_select reverse : Elapsed 0.099 ms (9.943 ms / 100) 9.331 -> 9.373 ( +0.45%) [ +0.20% +0.00% +0.28% / +0.45% +4.14% +4.24%] index_select skip64 : Elapsed 0.094 ms (9.350 ms / 100) 9.325 -> 9.372 ( +0.50%) [ +0.10% +0.00% +0.47% / +0.50% +4.09% +4.36%] index_select skip256 : Elapsed 0.093 ms (9.334 ms / 100) 14.355 -> 14.139 ( -1.50%) [ +0.03% +0.00% +0.17% / +0.06% -1.27% -1.50%] index_select spread : Elapsed 0.144 ms (14.359 ms / 100) 11.281 -> 11.160 ( -1.07%) [ +0.00% +0.06% +0.33% / +0.28% -1.00% -1.07%] index_select strided 3 : Elapsed 0.113 ms (11.281 ms / 100) 12.446 -> 12.355 ( -0.73%) [ +0.11% +0.00% +0.26% / +0.27% -0.71% -0.73%] index_select strided 5 : Elapsed 0.125 ms (12.460 ms / 100) 13.626 -> 13.474 ( -1.12%) [ +0.12% +0.00% +0.16% / +0.26% -1.05% -1.12%] index_select strided 7 : Elapsed 0.136 ms (13.642 ms / 100) 14.176 -> 14.038 ( -0.97%) [ +0.06% +0.00% +0.35% / +0.18% -0.90% -0.97%] index_select strided 8 : Elapsed 0.142 ms (14.185 ms / 100) 14.354 -> 14.178 ( -1.23%) [ +0.02% +0.00% +0.10% / +0.24% -1.22% -1.23%] index_select strided 16 : Elapsed 0.144 ms (14.357 ms / 100) 14.476 -> 14.345 ( -0.90%) [ +0.00% +0.03% +0.19% / +0.27% -0.90% -0.85%] index_select strided 64 : Elapsed 0.145 ms (14.476 ms / 100) 14.500 -> 14.357 ( -0.99%) [ +0.00% +0.02% +0.17% / +0.17% -0.91% -0.99%] index_select strided 100 : Elapsed 0.145 ms (14.500 ms / 100) 14.117 -> 13.967 ( -1.06%) [ +0.06% +0.00% +0.50% / +0.19% -1.05% -1.06%] index_select random : Elapsed 0.141 ms (14.126 ms / 100) 13.475 -> 13.338 ( -1.02%) [ +0.00% +0.10% +0.27% / +0.22% -0.95% -1.02%] index_select random_sorted : Elapsed 0.135 ms (13.475 ms / 100) 14.348 -> 14.201 ( -1.02%) [ +0.01% +0.00% +0.07% / +0.10% -1.02% -0.85%] index_select perm : Elapsed 0.143 ms (14.350 ms / 100) 13.556 -> 13.433 ( -0.91%) [ +0.00% +0.05% +0.32% / +0.21% -0.86% -0.91%] index_select perm_sorted : Elapsed 0.136 ms (13.556 ms / 100) B = [15, 50, 150] (stride (50, 1, 750)) A = [250, 50, 150] (stride (7500, 1, 50)) dim = 0 3.125 -> 3.131 ( +0.19%) [ +0.16% +0.10% +0.00% / +0.19% +1.15% +1.22%] index_select const : Elapsed 0.031 ms (3.130 ms / 100) 3.319 -> 3.325 ( +0.18%) [ +0.00% +0.24% +0.18% / +0.18% +0.87% +0.84%] index_select wrap : Elapsed 0.033 ms (3.319 ms / 100) 3.332 -> 3.339 ( +0.21%) [ +0.27% +0.27% +0.00% / +0.21% +0.66% +0.60%] index_select linear : Elapsed 0.033 ms (3.341 ms / 100) 3.345 -> 3.339 ( -0.18%) [ +0.15% +0.00% +0.03% / -0.06% +0.09% -0.18%] index_select reverse : Elapsed 0.033 ms (3.350 ms / 100) 3.153 -> 3.154 ( +0.03%) [ +0.00% +0.03% +0.32% / +0.57% +0.03% +0.03%] index_select skip64 : Elapsed 0.032 ms (3.153 ms / 100) 3.155 -> 3.154 ( -0.03%) [ +0.00% +0.00% +0.03% / +0.22% +0.06% -0.03%] index_select skip256 : Elapsed 0.032 ms (3.155 ms / 100) 3.328 -> 3.329 ( +0.03%) [ +0.15% +0.00% +0.00% / +0.03% +0.57% +0.66%] index_select spread : Elapsed 0.033 ms (3.333 ms / 100) 3.343 -> 3.327 ( -0.48%) [ +0.00% +0.03% +0.39% / +0.36% -0.48% -0.24%] index_select strided 3 : Elapsed 0.033 ms (3.343 ms / 100) 3.326 -> 3.326 ( +0.00%) [ +0.15% +0.00% +0.09% / +0.00% +0.51% +0.33%] index_select strided 5 : Elapsed 0.033 ms (3.331 ms / 100) 3.333 -> 3.333 ( +0.00%) [ +0.15% +0.00% +0.24% / +0.06% +0.06% +0.00%] index_select strided 7 : Elapsed 0.033 ms (3.338 ms / 100) 3.317 -> 3.321 ( +0.12%) [ +0.00% +0.30% +0.09% / +0.12% +0.93% +0.75%] index_select strided 8 : Elapsed 0.033 ms (3.317 ms / 100) 3.321 -> 3.322 ( +0.03%) [ +0.00% +0.33% +0.18% / +0.03% +0.69% +0.57%] index_select strided 16 : Elapsed 0.033 ms (3.321 ms / 100) 3.331 -> 3.326 ( -0.15%) [ +0.00% +0.21% +0.03% / +0.27% -0.15% -0.06%] index_select strided 64 : Elapsed 0.033 ms (3.331 ms / 100) 3.222 -> 3.210 ( -0.37%) [ +0.06% +0.00% +0.06% / +0.12% -0.34% -0.37%] index_select strided 100 : Elapsed 0.032 ms (3.224 ms / 100) 3.331 -> 3.331 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.51% +0.45%] index_select random : Elapsed 0.033 ms (3.334 ms / 100) 3.338 -> 3.335 ( -0.09%) [ +0.06% +0.09% +0.00% / -0.09% +0.69% +0.69%] index_select random_sorted : Elapsed 0.033 ms (3.340 ms / 100) 3.331 -> 3.328 ( -0.09%) [ +0.03% +0.09% +0.00% / -0.09% +0.27% +0.39%] index_select perm : Elapsed 0.033 ms (3.332 ms / 100) 3.338 -> 3.338 ( +0.00%) [ +0.21% +0.00% +0.06% / +0.03% +0.15% +0.00%] index_select perm_sorted : Elapsed 0.033 ms (3.345 ms / 100) B = [15, 50, 150] (stride (1, 15, 750)) A = [250, 50, 150] (stride (1, 250, 12500)) dim = 0 5.250 -> 5.275 ( +0.48%) [ +0.00% +0.00% +0.30% / +0.48% +5.70% +5.35%] index_select const : Elapsed 0.052 ms (5.250 ms / 100) 6.175 -> 6.184 ( +0.15%) [ +0.02% +0.00% +0.26% / +0.15% +1.04% +0.96%] index_select wrap : Elapsed 0.062 ms (6.176 ms / 100) 6.177 -> 6.190 ( +0.21%) [ +0.00% +0.18% +0.40% / +0.21% +0.76% +1.07%] index_select linear : Elapsed 0.062 ms (6.177 ms / 100) 6.109 -> 6.097 ( -0.20%) [ +0.08% +0.00% +0.34% / -0.20% +2.41% +2.44%] index_select reverse : Elapsed 0.061 ms (6.114 ms / 100) 5.264 -> 5.282 ( +0.34%) [ +0.00% +0.25% +0.36% / +0.34% +5.02% +5.02%] index_select skip64 : Elapsed 0.053 ms (5.264 ms / 100) 5.269 -> 5.275 ( +0.11%) [ +0.11% +0.00% +0.17% / +0.11% +4.95% +5.11%] index_select skip256 : Elapsed 0.053 ms (5.275 ms / 100) 10.869 -> 10.728 ( -1.30%) [ +0.06% +0.00% +0.26% / +0.23% -1.26% -1.30%] index_select spread : Elapsed 0.109 ms (10.875 ms / 100) 7.760 -> 7.641 ( -1.53%) [ +0.00% +0.04% +0.15% / +0.18% -1.53% -1.46%] index_select strided 3 : Elapsed 0.078 ms (7.760 ms / 100) 9.082 -> 8.932 ( -1.65%) [ +0.00% +0.06% +0.11% / +0.01% -1.52% -1.65%] index_select strided 5 : Elapsed 0.091 ms (9.082 ms / 100) 10.252 -> 10.080 ( -1.68%) [ +0.00% +0.09% +0.14% / +0.13% -1.68% -1.60%] index_select strided 7 : Elapsed 0.103 ms (10.252 ms / 100) 10.808 -> 10.638 ( -1.57%) [ +0.00% +0.06% +0.18% / +0.18% -1.57% -1.52%] index_select strided 8 : Elapsed 0.108 ms (10.808 ms / 100) 10.887 -> 10.729 ( -1.45%) [ +0.00% +0.09% +0.11% / +0.06% -1.45% -1.31%] index_select strided 16 : Elapsed 0.109 ms (10.887 ms / 100) 10.936 -> 10.810 ( -1.15%) [ +0.00% +0.00% +0.04% / +0.08% -1.15% -0.99%] index_select strided 64 : Elapsed 0.109 ms (10.936 ms / 100) 10.966 -> 10.810 ( -1.42%) [ +0.00% +0.11% +0.32% / +0.22% -1.40% -1.42%] index_select strided 100 : Elapsed 0.110 ms (10.966 ms / 100) 10.608 -> 10.478 ( -1.23%) [ +0.00% +0.02% +0.38% / +0.08% -1.18% -1.23%] index_select random : Elapsed 0.106 ms (10.608 ms / 100) 9.586 -> 9.452 ( -1.40%) [ +0.04% +0.00% +0.09% / +0.49% -1.40% -1.26%] index_select random_sorted : Elapsed 0.096 ms (9.590 ms / 100) 10.697 -> 10.615 ( -0.77%) [ +0.00% +0.00% +0.18% / +0.29% -0.67% -0.77%] index_select perm : Elapsed 0.107 ms (10.697 ms / 100) 9.522 -> 9.383 ( -1.46%) [ +0.02% +0.00% +0.40% / +0.27% -1.46% -1.33%] index_select perm_sorted : Elapsed 0.095 ms (9.524 ms / 100) out_shape = [250, 15, 150] in_shape = [250, 50, 150] idx_dim = 1 B = [250, 15, 150] (stride (2250, 150, 1)) dim = 1 fill_cnt = 50 8.759 -> 8.768 ( +0.10%) [ +0.18% +0.06% +0.00% / +0.10% +0.31% +0.43%] index_fill_ const : Elapsed 0.088 ms (8.775 ms / 100) 8.897 -> 8.878 ( -0.21%) [ +0.02% +0.00% +0.03% / -0.21% +0.22% +0.18%] index_fill_ linear : Elapsed 0.089 ms (8.899 ms / 100) 8.881 -> 8.879 ( -0.02%) [ +0.26% +0.00% +0.01% / -0.02% +0.25% +0.30%] index_fill_ reverse : Elapsed 0.089 ms (8.904 ms / 100) 8.767 -> 8.765 ( -0.02%) [ +0.27% +0.02% +0.00% / -0.02% +0.48% +0.23%] index_fill_ skip64 : Elapsed 0.088 ms (8.791 ms / 100) 8.754 -> 8.773 ( +0.22%) [ +0.31% +0.00% +0.17% / +0.22% +0.32% +0.37%] index_fill_ skip256 : Elapsed 0.088 ms (8.781 ms / 100) 8.878 -> 8.868 ( -0.11%) [ +0.21% +0.11% +0.00% / -0.11% +0.23% +0.41%] index_fill_ spread : Elapsed 0.089 ms (8.897 ms / 100) 8.828 -> 8.823 ( -0.06%) [ +0.27% +0.03% +0.00% / -0.06% +0.37% +0.43%] index_fill_ strided 3 : Elapsed 0.089 ms (8.852 ms / 100) 8.804 -> 8.794 ( -0.11%) [ +0.25% +0.00% +0.06% / -0.11% +0.16% +0.11%] index_fill_ strided 5 : Elapsed 0.088 ms (8.826 ms / 100) 9.040 -> 9.046 ( +0.07%) [ +0.25% +0.00% +0.42% / +0.07% +0.31% +0.33%] index_fill_ strided 7 : Elapsed 0.091 ms (9.063 ms / 100) 9.049 -> 9.043 ( -0.07%) [ +0.06% +0.00% +0.10% / -0.07% +0.18% +0.28%] index_fill_ strided 8 : Elapsed 0.091 ms (9.054 ms / 100) 8.962 -> 8.969 ( +0.08%) [ +0.22% +0.00% +0.22% / +0.08% +0.48% +0.64%] index_fill_ random : Elapsed 0.090 ms (8.982 ms / 100) 8.880 -> 8.875 ( -0.06%) [ +0.03% +0.00% +0.09% / -0.06% +0.21% +0.34%] index_fill_ random_sorted : Elapsed 0.089 ms (8.883 ms / 100) B = [250, 15, 150] (stride (2250, 150, 1)) A = [250, 50, 150] (stride (50, 1, 12500)) dim = 1 49.656 -> 49.724 ( +0.14%) [ +0.00% +0.07% +0.04% / +0.14% +0.22% +0.42%] index_select const : Elapsed 0.497 ms (49.656 ms / 100) 49.712 -> 49.706 ( -0.01%) [ +0.09% +0.11% +0.00% / -0.01% +0.13% +0.10%] index_select wrap : Elapsed 0.498 ms (49.758 ms / 100) 49.687 -> 49.727 ( +0.08%) [ +0.00% +0.06% +0.03% / +0.08% +0.27% +0.18%] index_select linear : Elapsed 0.497 ms (49.687 ms / 100) 49.700 -> 49.681 ( -0.04%) [ +0.00% +0.07% +0.02% / -0.04% +0.09% +0.12%] index_select reverse : Elapsed 0.497 ms (49.700 ms / 100) 49.641 -> 49.665 ( +0.05%) [ +0.19% +0.02% +0.00% / +0.05% +0.34% +0.44%] index_select skip64 : Elapsed 0.497 ms (49.737 ms / 100) 49.685 -> 49.691 ( +0.01%) [ +0.00% +0.09% +0.08% / +0.16% +0.35% +0.01%] index_select skip256 : Elapsed 0.497 ms (49.685 ms / 100) 49.803 -> 49.773 ( -0.06%) [ +0.01% +0.00% +0.08% / -0.06% -0.01% +0.08%] index_select spread : Elapsed 0.498 ms (49.806 ms / 100) 49.723 -> 49.723 ( +0.00%) [ +0.00% +0.13% +0.04% / +0.18% +0.00% +0.22%] index_select strided 3 : Elapsed 0.497 ms (49.723 ms / 100) 49.621 -> 49.703 ( +0.17%) [ +0.21% +0.00% +0.10% / +0.17% +0.27% +0.36%] index_select strided 5 : Elapsed 0.497 ms (49.727 ms / 100) 49.756 -> 49.800 ( +0.09%) [ +0.00% +0.09% +0.03% / +0.09% +0.21% +0.17%] index_select strided 7 : Elapsed 0.498 ms (49.756 ms / 100) 49.669 -> 49.697 ( +0.06%) [ +0.00% +0.24% +0.31% / +0.06% +0.39% +0.23%] index_select strided 8 : Elapsed 0.497 ms (49.669 ms / 100) 49.755 -> 49.759 ( +0.01%) [ +0.00% +0.00% +0.07% / +0.01% +0.13% +0.19%] index_select strided 16 : Elapsed 0.498 ms (49.755 ms / 100) 49.673 -> 49.659 ( -0.03%) [ +0.06% +0.00% +0.14% / -0.03% +0.14% +0.25%] index_select random : Elapsed 0.497 ms (49.702 ms / 100) 49.671 -> 49.677 ( +0.01%) [ +0.00% +0.13% +0.05% / +0.01% +0.49% +0.37%] index_select random_sorted : Elapsed 0.497 ms (49.671 ms / 100) 49.648 -> 49.679 ( +0.06%) [ +0.13% +0.17% +0.00% / +0.06% +0.28% +0.16%] index_select perm : Elapsed 0.497 ms (49.713 ms / 100) 49.682 -> 49.715 ( +0.07%) [ +0.15% +0.00% +0.15% / +0.07% +0.20% +0.44%] index_select perm_sorted : Elapsed 0.498 ms (49.755 ms / 100) B = [250, 15, 150] (stride (1, 37500, 250)) A = [250, 50, 150] (stride (1, 250, 12500)) dim = 1 11.133 -> 11.142 ( +0.08%) [ +0.08% +0.00% +0.05% / +0.08% +2.41% +2.36%] index_select const : Elapsed 0.111 ms (11.142 ms / 100) 13.351 -> 13.300 ( -0.38%) [ +0.08% +0.00% +0.43% / +0.15% -0.38% -0.25%] index_select wrap : Elapsed 0.134 ms (13.362 ms / 100) 13.360 -> 13.325 ( -0.26%) [ +0.13% +0.00% +0.13% / +0.37% -0.26% -0.21%] index_select linear : Elapsed 0.134 ms (13.378 ms / 100) 13.474 -> 13.392 ( -0.61%) [ +0.04% +0.00% +0.20% / +0.24% -0.61% -0.46%] index_select reverse : Elapsed 0.135 ms (13.479 ms / 100) 11.114 -> 11.120 ( +0.05%) [ +0.12% +0.00% +0.29% / +0.05% +2.59% +2.53%] index_select skip64 : Elapsed 0.111 ms (11.127 ms / 100) 11.203 -> 11.242 ( +0.35%) [ +0.17% +0.27% +0.00% / +0.35% +2.02% +2.04%] index_select skip256 : Elapsed 0.112 ms (11.222 ms / 100) 13.119 -> 13.146 ( +0.21%) [ +0.00% +0.11% +0.11% / +0.36% +0.21% +0.25%] index_select spread : Elapsed 0.131 ms (13.119 ms / 100) 13.305 -> 13.325 ( +0.15%) [ +0.06% +0.00% +0.23% / +0.33% +0.20% +0.15%] index_select strided 3 : Elapsed 0.133 ms (13.313 ms / 100) 13.095 -> 12.988 ( -0.82%) [ +0.17% +0.00% +0.32% / +0.15% -0.64% -0.82%] index_select strided 5 : Elapsed 0.131 ms (13.117 ms / 100) 13.049 -> 12.937 ( -0.86%) [ +0.05% +0.03% +0.00% / +0.49% -0.69% -0.86%] index_select strided 7 : Elapsed 0.131 ms (13.056 ms / 100) 13.051 -> 12.864 ( -1.43%) [ +0.00% +0.20% +0.11% / +0.27% -1.43% -1.39%] index_select strided 8 : Elapsed 0.131 ms (13.051 ms / 100) 13.535 -> 13.527 ( -0.06%) [ +0.16% +0.00% +0.20% / +0.39% +0.05% -0.06%] index_select strided 16 : Elapsed 0.136 ms (13.556 ms / 100) 13.034 -> 13.064 ( +0.23%) [ +0.00% +0.23% +0.05% / +0.23% +0.68% +0.73%] index_select random : Elapsed 0.130 ms (13.034 ms / 100) 12.837 -> 12.866 ( +0.23%) [ +0.00% +0.14% +0.18% / +0.23% +0.42% +0.65%] index_select random_sorted : Elapsed 0.128 ms (12.837 ms / 100) 13.181 -> 13.111 ( -0.53%) [ +0.00% +0.02% +0.08% / -0.09% -0.53% -0.33%] index_select perm : Elapsed 0.132 ms (13.181 ms / 100) 13.102 -> 13.157 ( +0.42%) [ +0.00% +0.17% +0.32% / +0.42% +0.87% +1.44%] index_select perm_sorted : Elapsed 0.131 ms (13.102 ms / 100) B = [250, 15, 150] (stride (15, 1, 3750)) A = [250, 50, 150] (stride (7500, 1, 50)) dim = 1 61.464 -> 61.301 ( -0.27%) [ +0.08% +0.23% +0.00% / +0.31% -0.11% -0.27%] index_select const : Elapsed 0.615 ms (61.511 ms / 100) 61.541 -> 61.543 ( +0.00%) [ +0.21% +0.00% +0.13% / +0.27% +0.10% +0.00%] index_select wrap : Elapsed 0.617 ms (61.672 ms / 100) 61.540 -> 61.489 ( -0.08%) [ +0.25% +0.00% +0.11% / +0.19% +0.10% -0.08%] index_select linear : Elapsed 0.617 ms (61.692 ms / 100) 61.642 -> 61.455 ( -0.30%) [ +0.18% +0.17% +0.00% / -0.08% -0.30% -0.21%] index_select reverse : Elapsed 0.618 ms (61.756 ms / 100) 61.525 -> 61.418 ( -0.17%) [ +0.14% +0.19% +0.00% / +0.04% -0.12% -0.17%] index_select skip64 : Elapsed 0.616 ms (61.610 ms / 100) 61.465 -> 61.426 ( -0.06%) [ +0.00% +0.00% +0.15% / +0.16% -0.06% +0.11%] index_select skip256 : Elapsed 0.615 ms (61.465 ms / 100) 61.605 -> 61.575 ( -0.05%) [ +0.16% +0.00% +0.01% / +0.02% -0.00% -0.05%] index_select spread : Elapsed 0.617 ms (61.706 ms / 100) 61.555 -> 61.601 ( +0.07%) [ +0.06% +0.00% +0.06% / +0.32% +0.07% +0.15%] index_select strided 3 : Elapsed 0.616 ms (61.592 ms / 100) 61.619 -> 61.509 ( -0.18%) [ +0.14% +0.00% +0.09% / +0.10% -0.11% -0.18%] index_select strided 5 : Elapsed 0.617 ms (61.707 ms / 100) 61.636 -> 61.552 ( -0.14%) [ +0.13% +0.00% +0.04% / +0.03% -0.03% -0.14%] index_select strided 7 : Elapsed 0.617 ms (61.717 ms / 100) 61.596 -> 61.504 ( -0.15%) [ +0.19% +0.15% +0.00% / +0.26% -0.11% -0.15%] index_select strided 8 : Elapsed 0.617 ms (61.713 ms / 100) 61.665 -> 61.374 ( -0.47%) [ +0.17% +0.01% +0.00% / +0.07% -0.45% -0.47%] index_select strided 16 : Elapsed 0.618 ms (61.767 ms / 100) 61.747 -> 61.499 ( -0.40%) [ +0.06% +0.00% +0.13% / -0.08% -0.40% -0.32%] index_select random : Elapsed 0.618 ms (61.786 ms / 100) 61.600 -> 61.583 ( -0.03%) [ +0.05% +0.00% +0.10% / -0.03% -0.03% -0.02%] index_select random_sorted : Elapsed 0.616 ms (61.629 ms / 100) 61.552 -> 61.489 ( -0.10%) [ +0.32% +0.10% +0.00% / -0.01% -0.01% -0.10%] index_select perm : Elapsed 0.617 ms (61.746 ms / 100) 61.689 -> 61.551 ( -0.22%) [ +0.02% +0.00% +0.07% / -0.03% -0.22% -0.11%] index_select perm_sorted : Elapsed 0.617 ms (61.703 ms / 100) out_shape = [250, 50, 15] in_shape = [250, 50, 150] idx_dim = 2 B = [250, 50, 15] (stride (750, 15, 1)) A = [250, 50, 150] (stride (7500, 1, 50)) dim = 2 8.141 -> 8.137 ( -0.05%) [ +0.01% +0.00% +0.01% / -0.05% +0.04% +0.04%] index_select const : Elapsed 0.081 ms (8.142 ms / 100) 8.589 -> 8.558 ( -0.36%) [ +0.03% +0.00% +0.05% / -0.03% -0.36% -0.29%] index_select wrap : Elapsed 0.086 ms (8.592 ms / 100) 8.617 -> 8.590 ( -0.31%) [ +0.00% +0.05% +0.17% / -0.02% -0.20% -0.31%] index_select linear : Elapsed 0.086 ms (8.617 ms / 100) 8.608 -> 8.588 ( -0.23%) [ +0.05% +0.00% +0.03% / +0.08% -0.23% -0.15%] index_select reverse : Elapsed 0.086 ms (8.612 ms / 100) 8.137 -> 8.153 ( +0.20%) [ +0.26% +0.17% +0.00% / +0.20% +0.47% +0.52%] index_select skip64 : Elapsed 0.082 ms (8.158 ms / 100) 8.152 -> 8.152 ( +0.00%) [ +0.00% +0.04% +0.06% / +0.00% +0.21% +0.28%] index_select skip256 : Elapsed 0.082 ms (8.152 ms / 100) 8.656 -> 8.629 ( -0.31%) [ +0.00% +0.07% +0.01% / +0.13% -0.31% -0.24%] index_select spread : Elapsed 0.087 ms (8.656 ms / 100) 8.712 -> 8.664 ( -0.55%) [ +0.02% +0.01% +0.00% / +0.09% -0.55% -0.54%] index_select strided 3 : Elapsed 0.087 ms (8.714 ms / 100) 8.677 -> 8.648 ( -0.33%) [ +0.01% +0.00% +0.07% / +0.12% -0.33% -0.30%] index_select strided 5 : Elapsed 0.087 ms (8.678 ms / 100) 8.649 -> 8.621 ( -0.32%) [ +0.03% +0.00% +0.17% / +0.07% -0.30% -0.32%] index_select strided 7 : Elapsed 0.087 ms (8.652 ms / 100) 8.673 -> 8.659 ( -0.16%) [ +0.00% +0.01% +0.12% / +0.09% -0.16% -0.15%] index_select strided 8 : Elapsed 0.087 ms (8.673 ms / 100) 8.661 -> 8.645 ( -0.18%) [ +0.00% +0.08% +0.24% / +0.17% -0.18% -0.01%] index_select strided 16 : Elapsed 0.087 ms (8.661 ms / 100) 8.702 -> 8.672 ( -0.34%) [ +0.06% +0.00% +0.10% / +0.10% -0.34% -0.34%] index_select strided 64 : Elapsed 0.087 ms (8.707 ms / 100) 8.514 -> 8.521 ( +0.08%) [ +0.00% +0.07% +0.09% / +0.08% +0.41% +0.31%] index_select strided 100 : Elapsed 0.085 ms (8.514 ms / 100) 8.662 -> 8.619 ( -0.50%) [ +0.15% +0.00% +0.03% / +0.09% -0.50% -0.43%] index_select random : Elapsed 0.087 ms (8.675 ms / 100) 8.658 -> 8.618 ( -0.46%) [ +0.00% +0.07% +0.17% / +0.23% -0.46% -0.40%] index_select random_sorted : Elapsed 0.087 ms (8.658 ms / 100) 8.677 -> 8.643 ( -0.39%) [ +0.00% +0.14% +0.18% / +0.25% -0.39% -0.30%] index_select perm : Elapsed 0.087 ms (8.677 ms / 100) 8.704 -> 8.652 ( -0.60%) [ +0.17% +0.00% +0.17% / +0.22% -0.60% -0.55%] index_select perm_sorted : Elapsed 0.087 ms (8.719 ms / 100) B = [250, 50, 15] (stride (750, 1, 50)) A = [250, 50, 150] (stride (50, 1, 12500)) dim = 2 7.261 -> 7.272 ( +0.15%) [ +0.15% +0.00% +0.10% / +0.15% +0.43% +0.36%] index_select const : Elapsed 0.073 ms (7.272 ms / 100) 7.876 -> 7.871 ( -0.06%) [ +0.09% +0.01% +0.00% / -0.06% +0.25% +0.28%] index_select wrap : Elapsed 0.079 ms (7.883 ms / 100) 7.873 -> 7.878 ( +0.06%) [ +0.14% +0.04% +0.00% / +0.06% +0.51% +0.51%] index_select linear : Elapsed 0.079 ms (7.884 ms / 100) 7.866 -> 7.866 ( +0.00%) [ +0.06% +0.29% +0.00% / +0.00% +0.37% +0.51%] index_select reverse : Elapsed 0.079 ms (7.871 ms / 100) 7.257 -> 7.264 ( +0.10%) [ +0.06% +0.00% +0.04% / +0.10% +0.45% +0.43%] index_select skip64 : Elapsed 0.073 ms (7.261 ms / 100) 7.263 -> 7.266 ( +0.04%) [ +0.03% +0.00% +0.17% / +0.04% +0.36% +0.29%] index_select skip256 : Elapsed 0.073 ms (7.265 ms / 100) 7.898 -> 7.906 ( +0.10%) [ +0.08% +0.00% +0.03% / +0.10% +0.29% +0.38%] index_select spread : Elapsed 0.079 ms (7.904 ms / 100) 7.900 -> 7.900 ( +0.00%) [ +0.00% +0.06% +0.06% / +0.00% +0.24% +0.19%] index_select strided 3 : Elapsed 0.079 ms (7.900 ms / 100) 7.892 -> 7.899 ( +0.09%) [ +0.00% +0.10% +0.18% / +0.09% +0.39% +0.30%] index_select strided 5 : Elapsed 0.079 ms (7.892 ms / 100) 7.896 -> 7.900 ( +0.05%) [ +0.04% +0.00% +0.06% / +0.05% +0.38% +0.20%] index_select strided 7 : Elapsed 0.079 ms (7.899 ms / 100) 7.860 -> 7.863 ( +0.04%) [ +0.11% +0.14% +0.00% / +0.04% +0.27% +0.37%] index_select strided 8 : Elapsed 0.079 ms (7.869 ms / 100) 7.902 -> 7.908 ( +0.08%) [ +0.15% +0.00% +0.04% / +0.08% +0.40% +0.44%] index_select strided 16 : Elapsed 0.079 ms (7.914 ms / 100) 7.901 -> 7.910 ( +0.11%) [ +0.09% +0.11% +0.00% / +0.11% +0.38% +0.44%] index_select strided 64 : Elapsed 0.079 ms (7.908 ms / 100) 7.388 -> 7.389 ( +0.01%) [ +0.04% +0.04% +0.00% / +0.01% +0.39% +0.31%] index_select strided 100 : Elapsed 0.074 ms (7.391 ms / 100) 7.862 -> 7.865 ( +0.04%) [ +0.18% +0.17% +0.00% / +0.04% +0.41% +0.34%] index_select random : Elapsed 0.079 ms (7.876 ms / 100) 7.864 -> 7.868 ( +0.05%) [ +0.00% +0.11% +0.11% / +0.05% +0.36% +0.27%] index_select random_sorted : Elapsed 0.079 ms (7.864 ms / 100) 7.901 -> 7.908 ( +0.09%) [ +0.09% +0.13% +0.00% / +0.09% +0.18% +0.18%] index_select perm : Elapsed 0.079 ms (7.908 ms / 100) 7.892 -> 7.895 ( +0.04%) [ +0.10% +0.04% +0.00% / +0.04% +0.29% +0.29%] index_select perm_sorted : Elapsed 0.079 ms (7.900 ms / 100) B = [250, 50, 15] (stride (15, 3750, 1)) A = [250, 50, 150] (stride (7500, 150, 1)) dim = 2 23.741 -> 23.652 ( -0.37%) [ +0.03% +0.00% +0.09% / +0.07% -0.37% -0.34%] index_select const : Elapsed 0.237 ms (23.747 ms / 100) 23.821 -> 23.717 ( -0.44%) [ +0.00% +0.03% +0.10% / +0.15% -0.43% -0.44%] index_select wrap : Elapsed 0.238 ms (23.821 ms / 100) 23.830 -> 23.707 ( -0.52%) [ +0.01% +0.00% +0.05% / +0.03% -0.45% -0.52%] index_select linear : Elapsed 0.238 ms (23.832 ms / 100) 23.794 -> 23.684 ( -0.46%) [ +0.02% +0.00% +0.05% / -0.04% -0.46% -0.42%] index_select reverse : Elapsed 0.238 ms (23.798 ms / 100) 23.737 -> 23.648 ( -0.37%) [ +0.00% +0.06% +0.09% / +0.09% -0.35% -0.37%] index_select skip64 : Elapsed 0.237 ms (23.737 ms / 100) 23.747 -> 23.633 ( -0.48%) [ +0.00% +0.05% +0.08% / +0.02% -0.48% -0.41%] index_select skip256 : Elapsed 0.237 ms (23.747 ms / 100) 23.937 -> 23.851 ( -0.36%) [ +0.05% +0.01% +0.00% / +0.08% -0.28% -0.36%] index_select spread : Elapsed 0.240 ms (23.950 ms / 100) 23.877 -> 23.743 ( -0.56%) [ +0.08% +0.00% +0.08% / -0.00% -0.56% -0.55%] index_select strided 3 : Elapsed 0.239 ms (23.896 ms / 100) 23.913 -> 23.798 ( -0.48%) [ +0.11% +0.00% +0.05% / +0.17% -0.48% -0.41%] index_select strided 5 : Elapsed 0.239 ms (23.939 ms / 100) 23.932 -> 23.833 ( -0.41%) [ +0.02% +0.00% +0.00% / +0.00% -0.41% -0.35%] index_select strided 7 : Elapsed 0.239 ms (23.937 ms / 100) 23.906 -> 23.839 ( -0.28%) [ +0.00% +0.11% +0.06% / +0.06% -0.25% -0.28%] index_select strided 8 : Elapsed 0.239 ms (23.906 ms / 100) 23.942 -> 23.871 ( -0.30%) [ +0.00% +0.00% +0.06% / +0.00% -0.30% -0.29%] index_select strided 16 : Elapsed 0.239 ms (23.943 ms / 100) 23.926 -> 23.843 ( -0.35%) [ +0.03% +0.00% +0.05% / +0.06% -0.24% -0.35%] index_select strided 64 : Elapsed 0.239 ms (23.933 ms / 100) 23.924 -> 23.845 ( -0.33%) [ +0.05% +0.01% +0.00% / +0.02% -0.33% -0.25%] index_select strided 100 : Elapsed 0.239 ms (23.935 ms / 100) 23.873 -> 23.855 ( -0.08%) [ +0.04% +0.00% +0.12% / +0.11% -0.05% -0.08%] index_select random : Elapsed 0.239 ms (23.882 ms / 100) 23.895 -> 23.894 ( -0.00%) [ +0.02% +0.00% +0.11% / +0.09% -0.00% +0.03%] index_select random_sorted : Elapsed 0.239 ms (23.900 ms / 100) 23.933 -> 23.845 ( -0.37%) [ +0.03% +0.01% +0.00% / -0.03% -0.37% -0.29%] index_select perm : Elapsed 0.239 ms (23.939 ms / 100) 23.904 -> 23.829 ( -0.31%) [ +0.00% +0.02% +0.07% / -0.04% -0.31% -0.28%] index_select perm_sorted : Elapsed 0.239 ms (23.904 ms / 100) B = [250, 50, 15] (stride (15, 3750, 1)) A = [250, 50, 150] (stride (150, 37500, 1)) dim = 2 23.910 -> 23.949 ( +0.16%) [ +0.00% +0.16% +0.12% / +0.16% +1.74% +1.60%] index_select const : Elapsed 0.239 ms (23.910 ms / 100) 23.876 -> 23.865 ( -0.05%) [ +0.00% +0.04% +0.08% / -0.05% +1.52% +1.48%] index_select wrap : Elapsed 0.239 ms (23.876 ms / 100) 23.878 -> 23.887 ( +0.04%) [ +0.17% +0.00% +0.13% / +0.04% +1.49% +1.61%] index_select linear : Elapsed 0.239 ms (23.919 ms / 100) 23.727 -> 23.735 ( +0.03%) [ +0.00% +0.10% +0.02% / +0.03% +2.47% +2.51%] index_select reverse : Elapsed 0.237 ms (23.727 ms / 100) 23.896 -> 23.898 ( +0.01%) [ +0.10% +0.00% +0.04% / +0.01% +1.77% +1.77%] index_select skip64 : Elapsed 0.239 ms (23.919 ms / 100) 24.038 -> 24.072 ( +0.14%) [ +0.16% +0.01% +0.00% / +0.14% +1.14% +1.31%] index_select skip256 : Elapsed 0.241 ms (24.077 ms / 100) 24.915 -> 24.824 ( -0.37%) [ +0.18% +0.00% +0.04% / +0.05% -0.37% -0.33%] index_select spread : Elapsed 0.250 ms (24.961 ms / 100) 24.256 -> 24.275 ( +0.08%) [ +0.00% +0.11% +0.09% / +0.08% +0.26% +0.24%] index_select strided 3 : Elapsed 0.243 ms (24.256 ms / 100) 24.545 -> 24.370 ( -0.71%) [ +0.00% +0.02% +0.00% / +0.00% -0.64% -0.71%] index_select strided 5 : Elapsed 0.245 ms (24.545 ms / 100) 24.763 -> 24.619 ( -0.58%) [ +0.00% +0.04% +0.02% / +0.06% -0.58% -0.49%] index_select strided 7 : Elapsed 0.248 ms (24.763 ms / 100) 24.808 -> 24.765 ( -0.17%) [ +0.19% +0.07% +0.00% / +0.06% -0.17% -0.12%] index_select strided 8 : Elapsed 0.249 ms (24.855 ms / 100) 24.846 -> 24.828 ( -0.07%) [ +0.00% +0.01% +0.04% / +0.03% -0.01% -0.07%] index_select strided 16 : Elapsed 0.248 ms (24.846 ms / 100) 24.794 -> 24.817 ( +0.09%) [ +0.00% +0.02% +0.00% / +0.09% +0.38% +0.39%] index_select strided 64 : Elapsed 0.248 ms (24.795 ms / 100) 24.827 -> 24.857 ( +0.12%) [ +0.00% +0.12% +0.05% / +0.12% +0.20% +0.24%] index_select strided 100 : Elapsed 0.248 ms (24.827 ms / 100) 24.724 -> 24.741 ( +0.07%) [ +0.00% +0.08% +0.16% / +0.17% +0.07% +0.09%] index_select random : Elapsed 0.247 ms (24.724 ms / 100) 24.676 -> 24.675 ( -0.00%) [ +0.04% +0.00% +0.08% / +0.06% -0.00% +0.07%] index_select random_sorted : Elapsed 0.247 ms (24.685 ms / 100) 24.784 -> 24.775 ( -0.04%) [ +0.10% +0.08% +0.00% / +0.09% -0.04% -0.02%] index_select perm : Elapsed 0.248 ms (24.809 ms / 100) 24.583 -> 24.613 ( +0.12%) [ +0.11% +0.00% +0.20% / +0.12% +0.21% +0.20%] index_select perm_sorted : Elapsed 0.246 ms (24.609 ms / 100) B = [250, 50, 15] (stride (15, 3750, 1)) A = [250, 50, 150] (stride (1, 37500, 250)) dim = 2 5.533 -> 5.535 ( +0.04%) [ +0.11% +0.00% +0.09% / +0.04% +0.80% +0.67%] index_select const : Elapsed 0.055 ms (5.539 ms / 100) 5.863 -> 5.820 ( -0.73%) [ +0.10% +0.09% +0.00% / +0.10% -0.53% -0.73%] index_select wrap : Elapsed 0.059 ms (5.869 ms / 100) 5.863 -> 5.831 ( -0.55%) [ +0.00% +0.05% +0.20% / +0.09% -0.41% -0.55%] index_select linear : Elapsed 0.059 ms (5.863 ms / 100) 5.893 -> 5.846 ( -0.80%) [ +0.02% +0.00% +0.15% / +0.31% -0.80% -0.71%] index_select reverse : Elapsed 0.059 ms (5.894 ms / 100) 5.470 -> 5.465 ( -0.09%) [ +0.00% +0.02% +0.11% / -0.09% +2.05% +1.99%] index_select skip64 : Elapsed 0.055 ms (5.470 ms / 100) 5.470 -> 5.471 ( +0.02%) [ +0.05% +0.00% +0.04% / +0.02% +1.99% +1.81%] index_select skip256 : Elapsed 0.055 ms (5.473 ms / 100) 5.906 -> 5.807 ( -1.68%) [ +0.30% +0.24% +0.00% / +0.30% -1.68% -1.64%] index_select spread : Elapsed 0.059 ms (5.924 ms / 100) 5.875 -> 5.874 ( -0.02%) [ +0.22% +0.00% +0.20% / +0.34% +0.17% -0.02%] index_select strided 3 : Elapsed 0.059 ms (5.888 ms / 100) 5.887 -> 5.853 ( -0.58%) [ +0.02% +0.07% +0.00% / +0.00% -0.36% -0.58%] index_select strided 5 : Elapsed 0.059 ms (5.888 ms / 100) 5.813 -> 5.820 ( +0.12%) [ +0.00% +0.05% +0.03% / +0.12% +0.57% +0.31%] index_select strided 7 : Elapsed 0.058 ms (5.813 ms / 100) 5.810 -> 5.830 ( +0.34%) [ +0.43% +0.00% +0.50% / +0.40% +0.34% +0.36%] index_select strided 8 : Elapsed 0.058 ms (5.835 ms / 100) 6.004 -> 5.917 ( -1.45%) [ +0.00% +0.10% +0.25% / +0.20% -1.32% -1.45%] index_select strided 16 : Elapsed 0.060 ms (6.004 ms / 100) 5.989 -> 5.926 ( -1.05%) [ +0.00% +0.10% +0.03% / -0.08% -0.90% -1.05%] index_select strided 64 : Elapsed 0.060 ms (5.989 ms / 100) 5.742 -> 5.675 ( -1.17%) [ +0.00% +0.30% +0.16% / -0.05% -1.11% -1.17%] index_select strided 100 : Elapsed 0.057 ms (5.742 ms / 100) 5.992 -> 5.900 ( -1.54%) [ +0.00% +0.15% +0.10% / +0.22% -1.42% -1.54%] index_select random : Elapsed 0.060 ms (5.992 ms / 100) 5.874 -> 5.815 ( -1.00%) [ +0.05% +0.17% +0.00% / +0.19% -0.73% -1.00%] index_select random_sorted : Elapsed 0.059 ms (5.877 ms / 100) 5.883 -> 5.894 ( +0.19%) [ +0.08% +0.00% +0.15% / +0.19% +0.95% +0.85%] index_select perm : Elapsed 0.059 ms (5.888 ms / 100) 5.913 -> 5.924 ( +0.19%) [ +0.27% +0.15% +0.00% / +0.19% +0.61% +0.52%] index_select perm_sorted : Elapsed 0.059 ms (5.929 ms / 100) B = [250, 50, 15] (stride (50, 1, 12500)) A = [250, 50, 150] (stride (150, 37500, 1)) dim = 2 20.072 -> 20.078 ( +0.03%) [ +0.01% +0.04% +0.00% / +0.03% +0.79% +0.92%] index_select const : Elapsed 0.201 ms (20.074 ms / 100) 20.480 -> 20.371 ( -0.53%) [ +0.00% +0.06% +0.10% / +0.16% -0.45% -0.53%] index_select wrap : Elapsed 0.205 ms (20.480 ms / 100) 20.486 -> 20.395 ( -0.44%) [ +0.02% +0.00% +0.01% / +0.06% -0.44% -0.44%] index_select linear : Elapsed 0.205 ms (20.490 ms / 100) 20.396 -> 20.328 ( -0.33%) [ +0.06% +0.00% +0.04% / +0.10% -0.25% -0.33%] index_select reverse : Elapsed 0.204 ms (20.408 ms / 100) 20.066 -> 20.088 ( +0.11%) [ +0.00% +0.08% +0.04% / +0.11% +0.83% +0.86%] index_select skip64 : Elapsed 0.201 ms (20.066 ms / 100) 20.076 -> 20.082 ( +0.03%) [ +0.01% +0.01% +0.00% / +0.03% +0.87% +0.78%] index_select skip256 : Elapsed 0.201 ms (20.078 ms / 100) 22.471 -> 22.391 ( -0.36%) [ +0.07% +0.00% +0.18% / +0.16% -0.36% -0.20%] index_select spread : Elapsed 0.225 ms (22.486 ms / 100) 21.177 -> 20.853 ( -1.53%) [ +0.00% +0.07% +0.07% / +0.08% -1.53% -1.47%] index_select strided 3 : Elapsed 0.212 ms (21.177 ms / 100) 21.715 -> 21.386 ( -1.52%) [ +0.00% +0.04% +0.14% / +0.13% -1.52% -1.49%] index_select strided 5 : Elapsed 0.217 ms (21.715 ms / 100) 22.270 -> 22.086 ( -0.83%) [ +0.00% +0.03% +0.13% / +0.16% -0.83% -0.80%] index_select strided 7 : Elapsed 0.223 ms (22.270 ms / 100) 22.498 -> 22.373 ( -0.56%) [ +0.07% +0.00% +0.11% / +0.16% -0.56% -0.53%] index_select strided 8 : Elapsed 0.225 ms (22.514 ms / 100) 22.523 -> 22.426 ( -0.43%) [ +0.00% +0.00% +0.01% / +0.05% -0.43% -0.39%] index_select strided 16 : Elapsed 0.225 ms (22.524 ms / 100) 22.501 -> 22.472 ( -0.13%) [ +0.00% +0.02% +0.00% / +0.03% +0.00% -0.13%] index_select strided 64 : Elapsed 0.225 ms (22.502 ms / 100) 22.574 -> 22.533 ( -0.18%) [ +0.00% +0.02% +0.08% / +0.06% -0.18% -0.16%] index_select strided 100 : Elapsed 0.226 ms (22.574 ms / 100) 22.272 -> 22.269 ( -0.01%) [ +0.05% +0.00% +0.04% / +0.06% -0.01% -0.00%] index_select random : Elapsed 0.223 ms (22.283 ms / 100) 21.731 -> 21.688 ( -0.20%) [ +0.11% +0.00% +0.03% / +0.04% -0.16% -0.20%] index_select random_sorted : Elapsed 0.218 ms (21.754 ms / 100) 22.403 -> 22.375 ( -0.12%) [ +0.08% +0.00% +0.19% / +0.18% -0.12% -0.08%] index_select perm : Elapsed 0.224 ms (22.420 ms / 100) 21.906 -> 21.890 ( -0.07%) [ +0.00% +0.00% +0.07% / +0.15% -0.07% -0.03%] index_select perm_sorted : Elapsed 0.219 ms (21.906 ms / 100) out_shape = [15, 150, 50] in_shape = [250, 150, 50] idx_dim = 0 B = [15, 150, 50] (stride (7500, 50, 1)) A = [250, 150, 50] (stride (1, 12500, 250)) dim = 0 3.678 -> 3.601 ( -2.09%) [ +0.33% +0.00% +0.24% / +0.24% -1.82% -2.09%] index_select const : Elapsed 0.037 ms (3.690 ms / 100) 4.044 -> 4.056 ( +0.30%) [ +0.20% +0.00% +0.35% / +0.30% +1.88% +1.81%] index_select wrap : Elapsed 0.041 ms (4.052 ms / 100) 4.059 -> 4.060 ( +0.02%) [ +0.05% +0.00% +0.00% / +0.02% +1.45% +1.36%] index_select linear : Elapsed 0.041 ms (4.061 ms / 100) 4.053 -> 4.057 ( +0.10%) [ +0.30% +0.00% +0.05% / +0.10% +1.58% +1.38%] index_select reverse : Elapsed 0.041 ms (4.065 ms / 100) 3.692 -> 3.602 ( -2.44%) [ +0.08% +0.00% +0.00% / +0.08% -2.44% -2.41%] index_select skip64 : Elapsed 0.037 ms (3.695 ms / 100) 3.693 -> 3.599 ( -2.55%) [ +0.05% +0.00% +0.22% / +0.19% -2.55% -2.44%] index_select skip256 : Elapsed 0.037 ms (3.695 ms / 100) 7.620 -> 7.627 ( +0.09%) [ +0.00% +0.16% +0.16% / +0.09% +0.68% +0.43%] index_select spread : Elapsed 0.076 ms (7.620 ms / 100) 5.206 -> 5.212 ( +0.12%) [ +0.19% +0.00% +0.12% / +0.12% +0.58% +0.65%] index_select strided 3 : Elapsed 0.052 ms (5.216 ms / 100) 6.288 -> 6.290 ( +0.03%) [ +0.03% +0.05% +0.00% / +0.03% +0.76% +0.75%] index_select strided 5 : Elapsed 0.063 ms (6.290 ms / 100) 7.240 -> 7.247 ( +0.10%) [ +0.10% +0.08% +0.00% / +0.10% +0.80% +0.90%] index_select strided 7 : Elapsed 0.072 ms (7.247 ms / 100) 7.648 -> 7.654 ( +0.08%) [ +0.08% +0.00% +0.05% / +0.08% +0.71% +0.65%] index_select strided 8 : Elapsed 0.077 ms (7.654 ms / 100) 7.621 -> 7.627 ( +0.08%) [ +0.12% +0.00% +0.08% / +0.08% +0.29% +0.54%] index_select strided 16 : Elapsed 0.076 ms (7.630 ms / 100) 7.718 -> 7.709 ( -0.12%) [ +0.00% +0.19% +0.05% / +0.04% -0.12% -0.08%] index_select strided 64 : Elapsed 0.077 ms (7.718 ms / 100) 7.720 -> 7.727 ( +0.09%) [ +0.00% +0.03% +0.25% / +0.16% +0.10% +0.09%] index_select strided 100 : Elapsed 0.077 ms (7.720 ms / 100) 7.271 -> 7.265 ( -0.08%) [ +0.00% +0.06% +0.18% / -0.08% +0.15% +0.23%] index_select random : Elapsed 0.073 ms (7.271 ms / 100) 6.458 -> 6.460 ( +0.03%) [ +0.12% +0.00% +0.08% / +0.03% +0.53% +0.57%] index_select random_sorted : Elapsed 0.065 ms (6.466 ms / 100) 7.689 -> 7.703 ( +0.18%) [ +0.21% +0.00% +0.13% / +0.18% +0.31% +0.21%] index_select perm : Elapsed 0.077 ms (7.705 ms / 100) 7.267 -> 7.267 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.00% +0.41% +0.48%] index_select perm_sorted : Elapsed 0.073 ms (7.267 ms / 100) B = [15, 150, 50] (stride (7500, 1, 150)) A = [250, 150, 50] (stride (7500, 1, 150)) dim = 0 3.146 -> 3.145 ( -0.03%) [ +0.13% +0.03% +0.00% / -0.03% +0.60% +0.51%] index_select const : Elapsed 0.031 ms (3.150 ms / 100) 3.362 -> 3.357 ( -0.15%) [ +0.03% +0.12% +0.00% / -0.15% +0.62% +0.27%] index_select wrap : Elapsed 0.034 ms (3.363 ms / 100) 3.373 -> 3.373 ( +0.00%) [ +0.12% +0.24% +0.00% / +0.00% +0.15% +0.30%] index_select linear : Elapsed 0.034 ms (3.377 ms / 100) 3.356 -> 3.360 ( +0.12%) [ +0.00% +0.06% +0.09% / +0.12% +0.30% +0.27%] index_select reverse : Elapsed 0.034 ms (3.356 ms / 100) 3.218 -> 3.162 ( -1.74%) [ +0.03% +0.00% +0.28% / +0.16% -1.74% -1.68%] index_select skip64 : Elapsed 0.032 ms (3.219 ms / 100) 3.219 -> 3.163 ( -1.74%) [ +0.03% +0.00% +0.09% / -0.03% -1.74% -1.65%] index_select skip256 : Elapsed 0.032 ms (3.220 ms / 100) 3.367 -> 3.365 ( -0.06%) [ +0.00% +0.15% +0.03% / +0.09% +0.27% -0.06%] index_select spread : Elapsed 0.034 ms (3.367 ms / 100) 3.357 -> 3.365 ( +0.24%) [ +0.00% +0.33% +0.18% / +0.24% +0.48% +0.48%] index_select strided 3 : Elapsed 0.034 ms (3.357 ms / 100) 3.363 -> 3.360 ( -0.09%) [ +0.06% +0.03% +0.00% / -0.09% +0.54% +0.56%] index_select strided 5 : Elapsed 0.034 ms (3.365 ms / 100) 3.361 -> 3.369 ( +0.24%) [ +0.15% +0.00% +0.03% / +0.24% +0.62% +0.60%] index_select strided 7 : Elapsed 0.034 ms (3.366 ms / 100) 3.350 -> 3.366 ( +0.48%) [ +0.00% +0.18% +0.15% / +0.48% +0.75% +0.90%] index_select strided 8 : Elapsed 0.034 ms (3.350 ms / 100) 3.363 -> 3.371 ( +0.24%) [ +0.00% +0.21% +0.24% / +0.24% +0.42% +0.56%] index_select strided 16 : Elapsed 0.034 ms (3.363 ms / 100) 3.361 -> 3.352 ( -0.27%) [ +0.12% +0.12% +0.00% / +0.12% -0.27% -0.24%] index_select strided 64 : Elapsed 0.034 ms (3.365 ms / 100) 3.235 -> 3.223 ( -0.37%) [ +0.00% +0.46% +0.34% / +0.40% -0.37% -0.31%] index_select strided 100 : Elapsed 0.032 ms (3.235 ms / 100) 3.338 -> 3.343 ( +0.15%) [ +0.00% +0.21% +0.12% / +0.27% +0.45% +0.15%] index_select random : Elapsed 0.033 ms (3.338 ms / 100) 3.322 -> 3.330 ( +0.24%) [ +0.18% +0.21% +0.00% / +0.24% +0.66% +0.96%] index_select random_sorted : Elapsed 0.033 ms (3.328 ms / 100) 3.357 -> 3.356 ( -0.03%) [ +0.36% +0.06% +0.00% / +0.27% -0.03% +0.12%] index_select perm : Elapsed 0.034 ms (3.369 ms / 100) 3.360 -> 3.357 ( -0.09%) [ +0.15% +0.00% +0.21% / +0.21% +0.06% -0.09%] index_select perm_sorted : Elapsed 0.034 ms (3.365 ms / 100) B = [15, 150, 50] (stride (7500, 1, 150)) A = [250, 150, 50] (stride (50, 12500, 1)) dim = 0 2.937 -> 2.942 ( +0.17%) [ +0.14% +0.17% +0.00% / +0.54% +0.37% +0.17%] index_select const : Elapsed 0.029 ms (2.941 ms / 100) 3.307 -> 3.291 ( -0.48%) [ +0.12% +0.24% +0.00% / +0.15% -0.48% -0.48%] index_select wrap : Elapsed 0.033 ms (3.311 ms / 100) 3.303 -> 3.287 ( -0.48%) [ +0.45% +0.27% +0.00% / +0.12% -0.48% -0.42%] index_select linear : Elapsed 0.033 ms (3.318 ms / 100) 3.319 -> 3.286 ( -0.99%) [ +0.00% +0.03% +0.03% / +0.12% -0.78% -0.99%] index_select reverse : Elapsed 0.033 ms (3.319 ms / 100) 2.944 -> 2.942 ( -0.07%) [ +0.00% +0.03% +0.14% / +0.03% -0.07% +0.00%] index_select skip64 : Elapsed 0.029 ms (2.944 ms / 100) 2.943 -> 2.943 ( +0.00%) [ +0.00% +0.20% +0.00% / +0.27% +0.10% +0.00%] index_select skip256 : Elapsed 0.029 ms (2.943 ms / 100) 3.313 -> 3.316 ( +0.09%) [ +0.21% +0.03% +0.00% / +0.09% +0.30% +0.57%] index_select spread : Elapsed 0.033 ms (3.320 ms / 100) 3.321 -> 3.321 ( +0.00%) [ +0.15% +0.00% +0.09% / +0.00% +0.87% +0.96%] index_select strided 3 : Elapsed 0.033 ms (3.326 ms / 100) 3.312 -> 3.313 ( +0.03%) [ +0.00% +0.21% +0.00% / +0.03% +0.75% +0.60%] index_select strided 5 : Elapsed 0.033 ms (3.312 ms / 100) 3.317 -> 3.325 ( +0.24%) [ +0.15% +0.24% +0.00% / +0.24% +0.36% +0.48%] index_select strided 7 : Elapsed 0.033 ms (3.322 ms / 100) 3.323 -> 3.323 ( +0.00%) [ +0.12% +0.00% +0.15% / +0.00% +0.39% +0.18%] index_select strided 8 : Elapsed 0.033 ms (3.327 ms / 100) 3.312 -> 3.321 ( +0.27%) [ +0.03% +0.09% +0.00% / +0.36% +0.27% +0.27%] index_select strided 16 : Elapsed 0.033 ms (3.313 ms / 100) 3.302 -> 3.304 ( +0.06%) [ +0.00% +0.12% +0.03% / +0.06% +0.55% +0.55%] index_select strided 64 : Elapsed 0.033 ms (3.302 ms / 100) 3.036 -> 3.038 ( +0.07%) [ +0.07% +0.00% +0.13% / +0.07% +0.16% +0.23%] index_select strided 100 : Elapsed 0.030 ms (3.038 ms / 100) 3.314 -> 3.313 ( -0.03%) [ +0.00% +0.03% +0.12% / -0.03% +0.54% +0.39%] index_select random : Elapsed 0.033 ms (3.314 ms / 100) 3.322 -> 3.316 ( -0.18%) [ +0.12% +0.09% +0.00% / +0.21% -0.06% -0.18%] index_select random_sorted : Elapsed 0.033 ms (3.326 ms / 100) 3.303 -> 3.301 ( -0.06%) [ +0.15% +0.00% +0.12% / -0.06% +1.30% +1.27%] index_select perm : Elapsed 0.033 ms (3.308 ms / 100) 3.303 -> 3.299 ( -0.12%) [ +0.00% +0.09% +0.06% / -0.12% +1.12% +1.27%] index_select perm_sorted : Elapsed 0.033 ms (3.303 ms / 100) B = [15, 150, 50] (stride (50, 750, 1)) A = [250, 150, 50] (stride (7500, 50, 1)) dim = 0 5.176 -> 5.172 ( -0.08%) [ +0.06% +0.04% +0.00% / -0.08% +0.39% +0.25%] index_select const : Elapsed 0.052 ms (5.179 ms / 100) 5.546 -> 5.547 ( +0.02%) [ +0.14% +0.09% +0.00% / +0.02% +0.50% +0.36%] index_select wrap : Elapsed 0.056 ms (5.554 ms / 100) 5.540 -> 5.549 ( +0.16%) [ +0.13% +0.25% +0.00% / +0.16% +0.65% +0.60%] index_select linear : Elapsed 0.055 ms (5.547 ms / 100) 5.542 -> 5.542 ( +0.00%) [ +0.13% +0.20% +0.00% / +0.00% +0.58% +0.43%] index_select reverse : Elapsed 0.055 ms (5.549 ms / 100) 5.194 -> 5.187 ( -0.13%) [ +0.00% +0.12% +0.00% / -0.13% +0.29% +0.33%] index_select skip64 : Elapsed 0.052 ms (5.194 ms / 100) 5.173 -> 5.173 ( +0.00%) [ +0.12% +0.10% +0.00% / +0.00% +0.35% +0.37%] index_select skip256 : Elapsed 0.052 ms (5.179 ms / 100) 5.538 -> 5.540 ( +0.04%) [ +0.04% +0.00% +0.04% / +0.04% +0.14% +0.23%] index_select spread : Elapsed 0.055 ms (5.540 ms / 100) 5.545 -> 5.548 ( +0.05%) [ +0.05% +0.31% +0.00% / +0.05% +0.45% +0.61%] index_select strided 3 : Elapsed 0.055 ms (5.548 ms / 100) 5.548 -> 5.543 ( -0.09%) [ +0.00% +0.00% +0.02% / -0.09% +0.29% +0.25%] index_select strided 5 : Elapsed 0.055 ms (5.548 ms / 100) 5.564 -> 5.557 ( -0.13%) [ +0.00% +0.14% +0.02% / -0.13% +0.27% +0.54%] index_select strided 7 : Elapsed 0.056 ms (5.564 ms / 100) 5.482 -> 5.500 ( +0.33%) [ +0.15% +0.00% +0.24% / +0.33% +0.60% +0.57%] index_select strided 8 : Elapsed 0.055 ms (5.490 ms / 100) 5.471 -> 5.468 ( -0.05%) [ +0.00% +0.07% +0.07% / -0.05% +0.05% +0.44%] index_select strided 16 : Elapsed 0.055 ms (5.471 ms / 100) 5.560 -> 5.554 ( -0.11%) [ +0.07% +0.05% +0.00% / -0.11% +0.40% +0.58%] index_select strided 64 : Elapsed 0.056 ms (5.564 ms / 100) 5.287 -> 5.290 ( +0.06%) [ +0.06% +0.08% +0.00% / +0.06% +0.47% +0.44%] index_select strided 100 : Elapsed 0.053 ms (5.290 ms / 100) 5.531 -> 5.541 ( +0.18%) [ +0.09% +0.25% +0.00% / +0.18% +0.49% +0.34%] index_select random : Elapsed 0.055 ms (5.536 ms / 100) 5.534 -> 5.536 ( +0.04%) [ +0.04% +0.18% +0.00% / +0.04% +0.36% +0.47%] index_select random_sorted : Elapsed 0.055 ms (5.536 ms / 100) 5.568 -> 5.573 ( +0.09%) [ +0.00% +0.02% +0.02% / +0.09% +0.27% +0.36%] index_select perm : Elapsed 0.056 ms (5.568 ms / 100) 5.557 -> 5.556 ( -0.02%) [ +0.05% +0.00% +0.00% / -0.02% +0.34% +0.25%] index_select perm_sorted : Elapsed 0.056 ms (5.560 ms / 100) B = [15, 150, 50] (stride (50, 750, 1)) A = [250, 150, 50] (stride (7500, 1, 150)) dim = 0 2.817 -> 2.817 ( +0.00%) [ +0.07% +0.00% +0.18% / +0.00% +1.06% +0.89%] index_select const : Elapsed 0.028 ms (2.819 ms / 100) 3.061 -> 3.061 ( +0.00%) [ +0.00% +0.16% +0.03% / +0.00% +0.95% +0.98%] index_select wrap : Elapsed 0.031 ms (3.061 ms / 100) 3.060 -> 3.064 ( +0.13%) [ +0.10% +0.00% +0.07% / +0.13% +0.78% +0.75%] index_select linear : Elapsed 0.031 ms (3.063 ms / 100) 3.064 -> 3.068 ( +0.13%) [ +0.26% +0.00% +0.03% / +0.13% +0.65% +0.42%] index_select reverse : Elapsed 0.031 ms (3.072 ms / 100) 2.831 -> 2.832 ( +0.04%) [ +0.18% +0.00% +0.07% / +0.04% +0.35% +0.42%] index_select skip64 : Elapsed 0.028 ms (2.836 ms / 100) 2.832 -> 2.834 ( +0.07%) [ +0.00% +0.11% +0.00% / +0.07% +0.25% +0.28%] index_select skip256 : Elapsed 0.028 ms (2.832 ms / 100) 3.067 -> 3.069 ( +0.07%) [ +0.00% +0.29% +0.00% / +0.07% +0.46% +0.68%] index_select spread : Elapsed 0.031 ms (3.067 ms / 100) 3.070 -> 3.073 ( +0.10%) [ +0.10% +0.10% +0.00% / +0.10% +0.46% +0.36%] index_select strided 3 : Elapsed 0.031 ms (3.073 ms / 100) 3.057 -> 3.062 ( +0.16%) [ +0.03% +0.23% +0.00% / +0.16% +1.14% +1.01%] index_select strided 5 : Elapsed 0.031 ms (3.058 ms / 100) 3.066 -> 3.072 ( +0.20%) [ +0.36% +0.20% +0.00% / +0.20% +0.46% +0.55%] index_select strided 7 : Elapsed 0.031 ms (3.077 ms / 100) 3.067 -> 3.073 ( +0.20%) [ +0.07% +0.26% +0.00% / +0.20% +0.88% +0.75%] index_select strided 8 : Elapsed 0.031 ms (3.069 ms / 100) 3.073 -> 3.075 ( +0.07%) [ +0.00% +0.20% +0.10% / +0.07% +0.39% +0.39%] index_select strided 16 : Elapsed 0.031 ms (3.073 ms / 100) 3.067 -> 3.074 ( +0.23%) [ +0.00% +0.36% +0.20% / +0.23% +0.39% +0.39%] index_select strided 64 : Elapsed 0.031 ms (3.067 ms / 100) 2.910 -> 2.904 ( -0.21%) [ +0.17% +0.00% +0.07% / +0.10% -0.21% -0.14%] index_select strided 100 : Elapsed 0.029 ms (2.915 ms / 100) 3.053 -> 3.054 ( +0.03%) [ +0.00% +0.23% +0.13% / +0.03% +0.13% +0.26%] index_select random : Elapsed 0.031 ms (3.053 ms / 100) 3.047 -> 3.056 ( +0.30%) [ +0.23% +0.20% +0.00% / +0.30% +0.33% +0.46%] index_select random_sorted : Elapsed 0.031 ms (3.054 ms / 100) 3.076 -> 3.075 ( -0.03%) [ +0.33% +0.33% +0.00% / -0.03% +0.33% +0.23%] index_select perm : Elapsed 0.031 ms (3.086 ms / 100) 3.078 -> 3.078 ( +0.00%) [ +0.03% +0.00% +0.06% / +0.00% +0.42% +0.23%] index_select perm_sorted : Elapsed 0.031 ms (3.079 ms / 100) B = [15, 150, 50] (stride (1, 750, 15)) A = [250, 150, 50] (stride (7500, 1, 150)) dim = 0 5.752 -> 5.760 ( +0.14%) [ +0.02% +0.12% +0.00% / +0.14% +0.40% +0.47%] index_select const : Elapsed 0.058 ms (5.753 ms / 100) 5.888 -> 5.888 ( +0.00%) [ +0.00% +0.00% +0.03% / +0.00% +0.15% +0.32%] index_select wrap : Elapsed 0.059 ms (5.888 ms / 100) 5.888 -> 5.890 ( +0.03%) [ +0.00% +0.07% +0.05% / +0.03% +0.25% +0.24%] index_select linear : Elapsed 0.059 ms (5.888 ms / 100) 5.888 -> 5.889 ( +0.02%) [ +0.00% +0.00% +0.00% / +0.02% +0.27% +0.36%] index_select reverse : Elapsed 0.059 ms (5.888 ms / 100) 5.752 -> 5.756 ( +0.07%) [ +0.16% +0.16% +0.00% / +0.07% +0.52% +0.38%] index_select skip64 : Elapsed 0.058 ms (5.761 ms / 100) 5.752 -> 5.752 ( +0.00%) [ +0.14% +0.21% +0.00% / +0.00% +0.57% +0.45%] index_select skip256 : Elapsed 0.058 ms (5.760 ms / 100) 5.886 -> 5.889 ( +0.05%) [ +0.14% +0.00% +0.05% / +0.05% +0.12% +0.22%] index_select spread : Elapsed 0.059 ms (5.894 ms / 100) 5.887 -> 5.893 ( +0.10%) [ +0.05% +0.03% +0.00% / +0.10% +0.17% +0.20%] index_select strided 3 : Elapsed 0.059 ms (5.890 ms / 100) 5.889 -> 5.890 ( +0.02%) [ +0.10% +0.03% +0.00% / +0.02% +0.24% +0.22%] index_select strided 5 : Elapsed 0.059 ms (5.895 ms / 100) 5.894 -> 5.896 ( +0.03%) [ +0.03% +0.07% +0.00% / +0.03% +0.24% +0.25%] index_select strided 7 : Elapsed 0.059 ms (5.896 ms / 100) 5.883 -> 5.887 ( +0.07%) [ +0.02% +0.03% +0.00% / +0.07% +0.51% +0.42%] index_select strided 8 : Elapsed 0.059 ms (5.884 ms / 100) 5.890 -> 5.896 ( +0.10%) [ +0.00% +0.08% +0.07% / +0.10% +0.22% +0.37%] index_select strided 16 : Elapsed 0.059 ms (5.890 ms / 100) 5.886 -> 5.886 ( +0.00%) [ +0.07% +0.17% +0.00% / +0.00% +0.20% +0.22%] index_select strided 64 : Elapsed 0.059 ms (5.890 ms / 100) 5.832 -> 5.810 ( -0.38%) [ +0.05% +0.05% +0.00% / -0.02% -0.29% -0.38%] index_select strided 100 : Elapsed 0.058 ms (5.835 ms / 100) 5.877 -> 5.881 ( +0.07%) [ +0.14% +0.05% +0.00% / +0.07% +0.12% +0.10%] index_select random : Elapsed 0.059 ms (5.885 ms / 100) 5.885 -> 5.890 ( +0.08%) [ +0.03% +0.17% +0.00% / +0.08% +0.20% +0.29%] index_select random_sorted : Elapsed 0.059 ms (5.887 ms / 100) 5.878 -> 5.881 ( +0.05%) [ +0.00% +0.05% +0.14% / +0.05% +0.27% +0.43%] index_select perm : Elapsed 0.059 ms (5.878 ms / 100) 5.883 -> 5.885 ( +0.03%) [ +0.03% +0.10% +0.00% / +0.03% +0.25% +0.27%] index_select perm_sorted : Elapsed 0.059 ms (5.885 ms / 100) B = [15, 150, 50] (stride (150, 1, 2250)) A = [250, 150, 50] (stride (50, 12500, 1)) dim = 0 2.964 -> 2.962 ( -0.07%) [ +0.03% +0.03% +0.00% / -0.07% -0.07% -0.07%] index_select const : Elapsed 0.030 ms (2.965 ms / 100) 3.313 -> 3.296 ( -0.51%) [ +0.18% +0.12% +0.00% / +0.06% -0.36% -0.51%] index_select wrap : Elapsed 0.033 ms (3.319 ms / 100) 3.312 -> 3.294 ( -0.54%) [ +0.06% +0.03% +0.00% / +0.06% -0.42% -0.54%] index_select linear : Elapsed 0.033 ms (3.314 ms / 100) 3.324 -> 3.299 ( -0.75%) [ +0.12% +0.03% +0.00% / +0.00% -0.57% -0.75%] index_select reverse : Elapsed 0.033 ms (3.328 ms / 100) 2.959 -> 2.956 ( -0.10%) [ +0.27% +0.27% +0.00% / +0.17% +0.00% -0.10%] index_select skip64 : Elapsed 0.030 ms (2.967 ms / 100) 2.959 -> 2.957 ( -0.07%) [ +0.03% +0.27% +0.00% / +0.37% -0.07% +0.03%] index_select skip256 : Elapsed 0.030 ms (2.960 ms / 100) 3.316 -> 3.317 ( +0.03%) [ +0.06% +0.00% +0.12% / +0.03% +0.72% +0.66%] index_select spread : Elapsed 0.033 ms (3.318 ms / 100) 3.325 -> 3.323 ( -0.06%) [ +0.21% +0.00% +0.15% / -0.06% +1.08% +1.26%] index_select strided 3 : Elapsed 0.033 ms (3.332 ms / 100) 3.315 -> 3.316 ( +0.03%) [ +0.06% +0.36% +0.00% / +0.03% +0.90% +1.06%] index_select strided 5 : Elapsed 0.033 ms (3.317 ms / 100) 3.327 -> 3.328 ( +0.03%) [ +0.24% +0.12% +0.00% / +0.03% +0.48% +0.63%] index_select strided 7 : Elapsed 0.033 ms (3.335 ms / 100) 3.327 -> 3.325 ( -0.06%) [ +0.09% +0.06% +0.00% / -0.06% +0.33% +0.33%] index_select strided 8 : Elapsed 0.033 ms (3.330 ms / 100) 3.319 -> 3.324 ( +0.15%) [ +0.33% +0.00% +0.30% / +0.15% +0.33% +0.21%] index_select strided 16 : Elapsed 0.033 ms (3.330 ms / 100) 3.308 -> 3.309 ( +0.03%) [ +0.03% +0.09% +0.00% / +0.03% +0.54% +0.54%] index_select strided 64 : Elapsed 0.033 ms (3.309 ms / 100) 3.047 -> 3.049 ( +0.07%) [ +0.13% +0.00% +0.13% / +0.07% +0.30% +0.23%] index_select strided 100 : Elapsed 0.031 ms (3.051 ms / 100) 3.289 -> 3.290 ( +0.03%) [ +0.00% +0.15% +0.00% / +0.03% +0.36% +0.46%] index_select random : Elapsed 0.033 ms (3.289 ms / 100) 3.297 -> 3.297 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.09% +0.24% +0.00%] index_select random_sorted : Elapsed 0.033 ms (3.297 ms / 100) 3.311 -> 3.312 ( +0.03%) [ +0.00% +0.06% +0.06% / +0.03% +0.57% +0.85%] index_select perm : Elapsed 0.033 ms (3.311 ms / 100) 3.317 -> 3.323 ( +0.18%) [ +0.21% +0.21% +0.00% / +0.18% +0.78% +0.75%] index_select perm_sorted : Elapsed 0.033 ms (3.324 ms / 100) out_shape = [250, 15, 50] in_shape = [250, 150, 50] idx_dim = 1 B = [250, 15, 50] (stride (750, 50, 1)) A = [250, 150, 50] (stride (7500, 1, 150)) dim = 1 10.705 -> 10.392 ( -2.92%) [ +0.21% +0.16% +0.00% / -1.26% -2.77% -2.92%] index_select const : Elapsed 0.107 ms (10.728 ms / 100) 12.278 -> 12.323 ( +0.37%) [ +0.84% +1.09% +0.00% / +0.37% +1.34% +1.23%] index_select wrap : Elapsed 0.124 ms (12.381 ms / 100) 12.252 -> 12.265 ( +0.11%) [ +0.00% +1.39% +0.40% / +0.11% +0.11% +1.52%] index_select linear : Elapsed 0.123 ms (12.252 ms / 100) 11.710 -> 11.842 ( +1.13%) [ +0.00% +0.72% +0.38% / +1.13% +4.85% +4.69%] index_select reverse : Elapsed 0.117 ms (11.710 ms / 100) 10.579 -> 10.385 ( -1.83%) [ +0.14% +0.88% +0.00% / +1.39% -0.26% -1.83%] index_select skip64 : Elapsed 0.106 ms (10.594 ms / 100) 10.777 -> 10.549 ( -2.12%) [ +0.66% +0.00% +0.43% / +0.16% -1.35% -2.12%] index_select skip256 : Elapsed 0.108 ms (10.848 ms / 100) 14.439 -> 14.284 ( -1.07%) [ +0.00% +0.11% +0.11% / -0.06% -1.07% -0.59%] index_select spread : Elapsed 0.144 ms (14.439 ms / 100) 13.128 -> 13.038 ( -0.69%) [ +0.28% +0.24% +0.00% / +0.48% -0.69% -0.31%] index_select strided 3 : Elapsed 0.132 ms (13.165 ms / 100) 13.803 -> 13.584 ( -1.59%) [ +0.30% +0.06% +0.00% / +0.30% -1.59% -1.46%] index_select strided 5 : Elapsed 0.138 ms (13.844 ms / 100) 14.206 -> 14.037 ( -1.19%) [ +0.08% +0.12% +0.00% / +0.01% -0.82% -1.19%] index_select strided 7 : Elapsed 0.142 ms (14.218 ms / 100) 14.395 -> 14.323 ( -0.50%) [ +0.35% +0.01% +0.00% / -0.15% -0.50% -0.17%] index_select strided 8 : Elapsed 0.144 ms (14.446 ms / 100) 14.469 -> 14.506 ( +0.26%) [ +0.16% +0.00% +0.20% / +0.26% +0.43% +0.50%] index_select strided 16 : Elapsed 0.145 ms (14.492 ms / 100) 14.643 -> 14.578 ( -0.44%) [ +0.00% +0.57% +0.27% / -0.12% -0.44% -0.08%] index_select strided 64 : Elapsed 0.146 ms (14.643 ms / 100) 14.649 -> 14.654 ( +0.03%) [ +0.38% +0.03% +0.00% / +0.27% +0.22% +0.03%] index_select strided 100 : Elapsed 0.147 ms (14.705 ms / 100) 14.551 -> 14.542 ( -0.06%) [ +0.30% +0.54% +0.00% / -0.06% +1.07% +1.07%] index_select random : Elapsed 0.146 ms (14.594 ms / 100) 13.611 -> 13.583 ( -0.21%) [ +0.16% +0.00% +0.29% / -0.21% +0.49% +0.43%] index_select random_sorted : Elapsed 0.136 ms (13.633 ms / 100) 14.334 -> 14.230 ( -0.73%) [ +0.36% +0.19% +0.00% / +0.28% -0.63% -0.73%] index_select perm : Elapsed 0.144 ms (14.385 ms / 100) 13.832 -> 13.738 ( -0.68%) [ +0.00% +0.04% +0.31% / -0.12% -0.30% -0.68%] index_select perm_sorted : Elapsed 0.138 ms (13.832 ms / 100) B = [250, 15, 50] (stride (750, 1, 15)) dim = 1 fill_cnt = 150 GOOD 14.498 -> 10.875 (-24.99%) [ +0.07% +0.00% +0.04% / -23.89% -24.99% -24.99%] index_fill_ const : Elapsed 0.145 ms (14.508 ms / 100) Good 14.494 -> 11.761 (-18.86%) [ +0.03% +0.00% +0.03% / -18.26% -18.86% -18.85%] index_fill_ linear : Elapsed 0.145 ms (14.499 ms / 100) 14.488 -> 14.415 ( -0.50%) [ +0.13% +0.00% +0.03% / -0.50% +0.84% +0.85%] index_fill_ reverse : Elapsed 0.145 ms (14.507 ms / 100) Good 14.562 -> 11.741 (-19.37%) [ +0.19% +0.07% +0.00% / -17.84% -19.28% -19.37%] index_fill_ skip64 : Elapsed 0.146 ms (14.590 ms / 100) GOOD 14.510 -> 11.124 (-23.34%) [ +0.01% +0.00% +0.03% / -23.34% -21.70% -21.92%] index_fill_ skip256 : Elapsed 0.145 ms (14.512 ms / 100) Good 14.595 -> 11.799 (-19.16%) [ +0.02% +0.06% +0.00% / -19.16% -18.75% -18.84%] index_fill_ spread : Elapsed 0.146 ms (14.598 ms / 100) GOOD 14.935 -> 11.611 (-22.26%) [ +0.03% +0.02% +0.00% / -22.22% -22.26% -22.22%] index_fill_ strided 3 : Elapsed 0.149 ms (14.939 ms / 100) GOOD 14.833 -> 11.862 (-20.03%) [ +0.16% +0.07% +0.00% / -20.03% -19.23% -19.25%] index_fill_ strided 5 : Elapsed 0.149 ms (14.857 ms / 100) GOOD 14.582 -> 11.121 (-23.73%) [ +0.23% +0.00% +0.01% / -23.73% -22.27% -22.34%] index_fill_ strided 7 : Elapsed 0.146 ms (14.615 ms / 100) GOOD 14.586 -> 11.569 (-20.68%) [ +0.00% +0.08% +0.01% / -20.68% -19.80% -19.77%] index_fill_ strided 8 : Elapsed 0.146 ms (14.586 ms / 100) GOOD 14.597 -> 11.117 (-23.84%) [ +0.04% +0.00% +0.08% / -23.84% -23.55% -23.24%] index_fill_ random : Elapsed 0.146 ms (14.603 ms / 100) GOOD 14.503 -> 11.547 (-20.38%) [ +0.02% +0.00% +0.19% / -20.38% -20.02% -20.00%] index_fill_ random_sorted : Elapsed 0.145 ms (14.506 ms / 100) B = [250, 15, 50] (stride (750, 1, 15)) A = [250, 150, 50] (stride (150, 1, 37500)) dim = 1 24.052 -> 24.089 ( +0.15%) [ +0.06% +0.07% +0.00% / +0.15% +2.07% +2.19%] index_select const : Elapsed 0.241 ms (24.066 ms / 100) 24.079 -> 24.084 ( +0.02%) [ +0.10% +0.00% +0.10% / +0.02% +2.16% +2.28%] index_select wrap : Elapsed 0.241 ms (24.104 ms / 100) 24.077 -> 24.128 ( +0.21%) [ +0.03% +0.07% +0.00% / +0.21% +2.18% +2.15%] index_select linear : Elapsed 0.241 ms (24.085 ms / 100) 23.894 -> 23.904 ( +0.04%) [ +0.15% +0.03% +0.00% / +0.04% +2.90% +3.10%] index_select reverse : Elapsed 0.239 ms (23.929 ms / 100) 24.042 -> 24.052 ( +0.04%) [ +0.00% +0.01% +0.01% / +0.04% +2.11% +2.18%] index_select skip64 : Elapsed 0.240 ms (24.042 ms / 100) 24.157 -> 24.202 ( +0.19%) [ +0.21% +0.00% +0.32% / +0.19% +1.84% +1.73%] index_select skip256 : Elapsed 0.242 ms (24.208 ms / 100) 25.487 -> 25.385 ( -0.40%) [ +0.03% +0.00% +0.02% / +0.06% -0.35% -0.40%] index_select spread : Elapsed 0.255 ms (25.495 ms / 100) 24.553 -> 24.592 ( +0.16%) [ +0.11% +0.00% +0.10% / +0.16% +0.61% +0.58%] index_select strided 3 : Elapsed 0.246 ms (24.580 ms / 100) 24.958 -> 24.839 ( -0.48%) [ +0.01% +0.00% +0.06% / +0.06% -0.48% -0.40%] index_select strided 5 : Elapsed 0.250 ms (24.960 ms / 100) 25.266 -> 25.105 ( -0.64%) [ +0.00% +0.02% +0.05% / +0.09% -0.64% -0.54%] index_select strided 7 : Elapsed 0.253 ms (25.266 ms / 100) 25.342 -> 25.276 ( -0.26%) [ +0.00% +0.08% +0.06% / +0.09% -0.23% -0.26%] index_select strided 8 : Elapsed 0.253 ms (25.342 ms / 100) 25.356 -> 25.371 ( +0.06%) [ +0.05% +0.00% +0.00% / +0.06% +0.21% +0.18%] index_select strided 16 : Elapsed 0.254 ms (25.368 ms / 100) 25.318 -> 25.366 ( +0.19%) [ +0.04% +0.08% +0.00% / +0.19% +0.57% +0.55%] index_select strided 64 : Elapsed 0.253 ms (25.328 ms / 100) 25.378 -> 25.373 ( -0.02%) [ +0.04% +0.00% +0.06% / -0.02% +0.30% +0.20%] index_select strided 100 : Elapsed 0.254 ms (25.388 ms / 100) 25.160 -> 25.208 ( +0.19%) [ +0.00% +0.16% +0.02% / +0.19% +0.29% +0.37%] index_select random : Elapsed 0.252 ms (25.160 ms / 100) 25.103 -> 25.111 ( +0.03%) [ +0.06% +0.03% +0.00% / +0.03% +0.37% +0.30%] index_select random_sorted : Elapsed 0.251 ms (25.117 ms / 100) 25.276 -> 25.240 ( -0.14%) [ +0.00% +0.06% +0.05% / +0.08% -0.12% -0.14%] index_select perm : Elapsed 0.253 ms (25.276 ms / 100) 25.204 -> 25.220 ( +0.06%) [ +0.04% +0.00% +0.06% / +0.06% +0.32% +0.41%] index_select perm_sorted : Elapsed 0.252 ms (25.213 ms / 100) B = [250, 15, 50] (stride (50, 12500, 1)) A = [250, 150, 50] (stride (7500, 1, 150)) dim = 1 9.802 -> 9.930 ( +1.31%) [ +0.71% +1.12% +0.00% / +1.31% +4.23% +4.49%] index_select const : Elapsed 0.099 ms (9.872 ms / 100) 11.644 -> 11.747 ( +0.88%) [ +0.75% +0.00% +0.19% / +0.88% +1.10% +1.88%] index_select wrap : Elapsed 0.117 ms (11.731 ms / 100) 11.703 -> 11.711 ( +0.07%) [ +0.59% +0.00% +0.42% / +0.07% +0.62% +0.80%] index_select linear : Elapsed 0.118 ms (11.772 ms / 100) 11.058 -> 11.217 ( +1.44%) [ +1.10% +0.00% +1.10% / +1.44% +5.73% +6.34%] index_select reverse : Elapsed 0.112 ms (11.180 ms / 100) 9.825 -> 9.838 ( +0.13%) [ +0.11% +0.71% +0.00% / +0.13% +3.39% +3.80%] index_select skip64 : Elapsed 0.098 ms (9.836 ms / 100) 9.827 -> 9.942 ( +1.17%) [ +1.88% +0.03% +0.00% / +1.17% +3.59% +2.86%] index_select skip256 : Elapsed 0.100 ms (10.012 ms / 100) 13.859 -> 13.615 ( -1.76%) [ +0.00% +0.40% +0.07% / +0.16% -1.76% -1.76%] index_select spread : Elapsed 0.139 ms (13.859 ms / 100) 12.637 -> 12.526 ( -0.88%) [ +0.09% +0.00% +0.21% / +0.42% -0.64% -0.88%] index_select strided 3 : Elapsed 0.126 ms (12.648 ms / 100) 13.240 -> 12.998 ( -1.83%) [ +0.00% +0.23% +0.14% / +0.16% -1.66% -1.83%] index_select strided 5 : Elapsed 0.132 ms (13.240 ms / 100) 13.624 -> 13.368 ( -1.88%) [ +0.19% +0.04% +0.00% / +0.19% -1.88% -1.79%] index_select strided 7 : Elapsed 0.137 ms (13.650 ms / 100) 13.810 -> 13.592 ( -1.58%) [ +0.18% +0.00% +0.09% / -0.12% -1.50% -1.58%] index_select strided 8 : Elapsed 0.138 ms (13.835 ms / 100) 13.930 -> 13.804 ( -0.90%) [ +0.00% +0.18% +0.24% / +0.04% -0.74% -0.90%] index_select strided 16 : Elapsed 0.139 ms (13.930 ms / 100) 14.040 -> 13.931 ( -0.78%) [ +0.50% +0.31% +0.00% / +0.50% -0.73% -0.78%] index_select strided 64 : Elapsed 0.141 ms (14.110 ms / 100) 14.078 -> 13.987 ( -0.65%) [ +0.20% +0.25% +0.00% / -0.05% -0.65% -0.62%] index_select strided 100 : Elapsed 0.141 ms (14.106 ms / 100) 14.094 -> 13.943 ( -1.07%) [ +0.23% +0.17% +0.00% / +0.04% -1.07% -1.01%] index_select random : Elapsed 0.141 ms (14.126 ms / 100) 13.113 -> 13.073 ( -0.31%) [ +0.17% +0.03% +0.00% / +0.40% -0.31% +0.14%] index_select random_sorted : Elapsed 0.131 ms (13.135 ms / 100) 13.975 -> 13.828 ( -1.05%) [ +0.49% +0.77% +0.00% / +0.47% -1.05% -0.77%] index_select perm : Elapsed 0.140 ms (14.043 ms / 100) 13.322 -> 13.131 ( -1.43%) [ +0.46% +0.00% +0.51% / +0.32% -1.43% -1.34%] index_select perm_sorted : Elapsed 0.134 ms (13.383 ms / 100) B = [250, 15, 50] (stride (1, 12500, 250)) A = [250, 150, 50] (stride (7500, 50, 1)) dim = 1 4.543 -> 4.535 ( -0.18%) [ +0.04% +0.31% +0.00% / -0.02% -0.18% -0.13%] index_select const : Elapsed 0.045 ms (4.545 ms / 100) 4.934 -> 4.934 ( +0.00%) [ +0.12% +0.10% +0.00% / +0.00% +0.51% +0.34%] index_select wrap : Elapsed 0.049 ms (4.940 ms / 100) 4.931 -> 4.943 ( +0.24%) [ +0.34% +0.30% +0.00% / +0.24% +0.69% +0.73%] index_select linear : Elapsed 0.049 ms (4.948 ms / 100) 4.944 -> 4.939 ( -0.10%) [ +0.00% +0.02% +0.00% / -0.10% +0.18% +0.24%] index_select reverse : Elapsed 0.049 ms (4.944 ms / 100) 4.554 -> 4.549 ( -0.11%) [ +0.09% +0.00% +0.07% / -0.11% +0.33% +0.24%] index_select skip64 : Elapsed 0.046 ms (4.558 ms / 100) 4.576 -> 4.549 ( -0.59%) [ +0.00% +0.00% +0.07% / -0.02% -0.46% -0.59%] index_select skip256 : Elapsed 0.046 ms (4.576 ms / 100) 4.957 -> 4.958 ( +0.02%) [ +0.00% +0.08% +0.04% / +0.02% +0.26% +0.14%] index_select spread : Elapsed 0.050 ms (4.957 ms / 100) 4.963 -> 4.964 ( +0.02%) [ +0.22% +0.00% +0.10% / +0.02% +0.18% +0.12%] index_select strided 3 : Elapsed 0.050 ms (4.974 ms / 100) 4.958 -> 4.954 ( -0.08%) [ +0.10% +0.00% +0.02% / -0.02% -0.02% -0.08%] index_select strided 5 : Elapsed 0.050 ms (4.963 ms / 100) 4.951 -> 4.965 ( +0.28%) [ +0.30% +0.16% +0.00% / +0.38% +0.32% +0.28%] index_select strided 7 : Elapsed 0.050 ms (4.966 ms / 100) 4.964 -> 4.958 ( -0.12%) [ +0.20% +0.16% +0.00% / +0.08% -0.12% +0.04%] index_select strided 8 : Elapsed 0.050 ms (4.974 ms / 100) 4.949 -> 4.956 ( +0.14%) [ +0.18% +0.08% +0.00% / +0.14% +0.20% +0.30%] index_select strided 16 : Elapsed 0.050 ms (4.958 ms / 100) 4.968 -> 4.960 ( -0.16%) [ +0.06% +0.00% +0.00% / +0.04% -0.16% -0.14%] index_select strided 64 : Elapsed 0.050 ms (4.971 ms / 100) 4.617 -> 4.611 ( -0.13%) [ +0.00% +0.00% +0.02% / -0.13% +0.43% +0.43%] index_select strided 100 : Elapsed 0.046 ms (4.617 ms / 100) 4.946 -> 4.946 ( +0.00%) [ +0.00% +0.26% +0.20% / +0.00% +0.12% +0.20%] index_select random : Elapsed 0.049 ms (4.946 ms / 100) 4.961 -> 4.966 ( +0.10%) [ +0.10% +0.04% +0.00% / +0.20% +0.10% +0.24%] index_select random_sorted : Elapsed 0.050 ms (4.966 ms / 100) 4.970 -> 4.961 ( -0.18%) [ +0.00% +0.10% +0.14% / +0.32% -0.14% -0.18%] index_select perm : Elapsed 0.050 ms (4.970 ms / 100) 4.974 -> 4.949 ( -0.50%) [ +0.08% +0.00% +0.22% / +0.14% -0.44% -0.50%] index_select perm_sorted : Elapsed 0.050 ms (4.978 ms / 100) B = [250, 15, 50] (stride (1, 12500, 250)) A = [250, 150, 50] (stride (150, 1, 37500)) dim = 1 Good 15.909 -> 14.110 (-11.31%) [ +0.30% +0.40% +0.00% / +0.01% -10.63% -11.31%] index_select const : Elapsed 0.160 ms (15.956 ms / 100) 16.607 -> 15.805 ( -4.83%) [ +0.07% +0.05% +0.00% / -0.05% -4.83% -4.67%] index_select wrap : Elapsed 0.166 ms (16.618 ms / 100) 16.601 -> 15.789 ( -4.89%) [ +0.08% +0.09% +0.00% / +0.12% -4.89% -4.54%] index_select linear : Elapsed 0.166 ms (16.615 ms / 100) good 16.484 -> 15.642 ( -5.11%) [ +0.00% +0.18% +0.21% / +0.10% -5.11% -4.91%] index_select reverse : Elapsed 0.165 ms (16.484 ms / 100) Good 15.915 -> 14.181 (-10.90%) [ +0.45% +0.00% +0.03% / +0.08% -10.84% -10.90%] index_select skip64 : Elapsed 0.160 ms (15.986 ms / 100) Good 16.102 -> 14.173 (-11.98%) [ +0.00% +0.14% +0.06% / +0.07% -11.98% -11.92%] index_select skip256 : Elapsed 0.161 ms (16.102 ms / 100) 19.064 -> 18.965 ( -0.52%) [ +0.00% +0.22% +0.08% / +0.08% -0.52% -0.36%] index_select spread : Elapsed 0.191 ms (19.064 ms / 100) 17.276 -> 16.799 ( -2.76%) [ +0.03% +0.15% +0.00% / -0.01% -2.76% -2.63%] index_select strided 3 : Elapsed 0.173 ms (17.282 ms / 100) 18.035 -> 17.605 ( -2.38%) [ +0.00% +0.25% +0.10% / +0.19% -2.38% -1.93%] index_select strided 5 : Elapsed 0.180 ms (18.035 ms / 100) 18.707 -> 18.520 ( -1.00%) [ +0.12% +0.00% +0.10% / +0.10% -1.00% -0.79%] index_select strided 7 : Elapsed 0.187 ms (18.730 ms / 100) 19.027 -> 18.914 ( -0.59%) [ +0.05% +0.01% +0.00% / -0.01% -0.59% -0.40%] index_select strided 8 : Elapsed 0.190 ms (19.037 ms / 100) 19.104 -> 18.970 ( -0.70%) [ +0.07% +0.00% +0.03% / -0.01% -0.69% -0.70%] index_select strided 16 : Elapsed 0.191 ms (19.117 ms / 100) 19.186 -> 19.105 ( -0.42%) [ +0.04% +0.00% +0.15% / -0.04% -0.34% -0.42%] index_select strided 64 : Elapsed 0.192 ms (19.194 ms / 100) 19.120 -> 19.104 ( -0.08%) [ +0.28% +0.06% +0.00% / -0.04% -0.08% -0.08%] index_select strided 100 : Elapsed 0.192 ms (19.173 ms / 100) 19.125 -> 19.052 ( -0.38%) [ +0.01% +0.00% +0.02% / +0.00% -0.26% -0.38%] index_select random : Elapsed 0.191 ms (19.127 ms / 100) 18.234 -> 18.100 ( -0.73%) [ +0.08% +0.00% +0.09% / +0.14% -0.73% -0.63%] index_select random_sorted : Elapsed 0.182 ms (18.248 ms / 100) 18.911 -> 18.752 ( -0.84%) [ +0.00% +0.05% +0.17% / +0.14% -0.84% -0.82%] index_select perm : Elapsed 0.189 ms (18.911 ms / 100) 18.256 -> 18.157 ( -0.54%) [ +0.00% +0.13% +0.16% / +0.18% -0.54% -0.47%] index_select perm_sorted : Elapsed 0.183 ms (18.256 ms / 100) B = [250, 15, 50] (stride (15, 1, 3750)) A = [250, 150, 50] (stride (150, 1, 37500)) dim = 1 23.883 -> 23.935 ( +0.22%) [ +0.00% +0.03% +0.09% / +0.22% +1.84% +1.79%] index_select const : Elapsed 0.239 ms (23.883 ms / 100) 23.844 -> 23.863 ( +0.08%) [ +0.03% +0.00% +0.15% / +0.08% +1.68% +1.71%] index_select wrap : Elapsed 0.239 ms (23.851 ms / 100) 23.849 -> 23.840 ( -0.04%) [ +0.00% +0.08% +0.16% / -0.04% +1.63% +1.58%] index_select linear : Elapsed 0.238 ms (23.849 ms / 100) 23.712 -> 23.742 ( +0.13%) [ +0.00% +0.08% +0.08% / +0.13% +2.62% +2.62%] index_select reverse : Elapsed 0.237 ms (23.712 ms / 100) 23.859 -> 23.910 ( +0.21%) [ +0.14% +0.00% +0.10% / +0.21% +2.15% +1.90%] index_select skip64 : Elapsed 0.239 ms (23.892 ms / 100) 24.016 -> 24.049 ( +0.14%) [ +0.00% +0.10% +0.16% / +0.14% +1.29% +1.31%] index_select skip256 : Elapsed 0.240 ms (24.016 ms / 100) 24.897 -> 24.840 ( -0.23%) [ +0.04% +0.00% +0.10% / +0.06% -0.23% -0.18%] index_select spread : Elapsed 0.249 ms (24.907 ms / 100) 24.256 -> 24.258 ( +0.01%) [ +0.00% +0.00% +0.02% / +0.01% +0.23% +0.29%] index_select strided 3 : Elapsed 0.243 ms (24.257 ms / 100) 24.527 -> 24.385 ( -0.58%) [ +0.00% +0.07% +0.13% / +0.02% -0.58% -0.51%] index_select strided 5 : Elapsed 0.245 ms (24.527 ms / 100) 24.761 -> 24.613 ( -0.60%) [ +0.04% +0.01% +0.00% / +0.11% -0.52% -0.60%] index_select strided 7 : Elapsed 0.248 ms (24.770 ms / 100) 24.799 -> 24.736 ( -0.25%) [ +0.09% +0.00% +0.10% / +0.12% -0.22% -0.25%] index_select strided 8 : Elapsed 0.248 ms (24.822 ms / 100) 24.814 -> 24.817 ( +0.01%) [ +0.00% +0.07% +0.12% / +0.10% +0.04% +0.01%] index_select strided 16 : Elapsed 0.248 ms (24.814 ms / 100) 24.801 -> 24.782 ( -0.08%) [ +0.00% +0.00% +0.03% / -0.08% +0.29% +0.23%] index_select strided 64 : Elapsed 0.248 ms (24.801 ms / 100) 24.827 -> 24.865 ( +0.15%) [ +0.00% +0.04% +0.16% / +0.15% +0.21% +0.19%] index_select strided 100 : Elapsed 0.248 ms (24.827 ms / 100) 24.799 -> 24.718 ( -0.33%) [ +0.07% +0.00% +0.04% / -0.02% -0.31% -0.33%] index_select random : Elapsed 0.248 ms (24.816 ms / 100) 24.729 -> 24.563 ( -0.67%) [ +0.00% +0.00% +0.12% / -0.02% -0.59% -0.67%] index_select random_sorted : Elapsed 0.247 ms (24.730 ms / 100) 24.841 -> 24.737 ( -0.42%) [ +0.00% +0.01% +0.14% / +0.04% -0.41% -0.42%] index_select perm : Elapsed 0.248 ms (24.841 ms / 100) 24.653 -> 24.618 ( -0.14%) [ +0.09% +0.00% +0.09% / +0.21% -0.14% -0.14%] index_select perm_sorted : Elapsed 0.247 ms (24.675 ms / 100) out_shape = [250, 150, 15] in_shape = [250, 150, 50] idx_dim = 2 B = [250, 150, 15] (stride (150, 1, 37500)) A = [250, 150, 50] (stride (50, 12500, 1)) dim = 2 61.019 -> 61.081 ( +0.10%) [ +0.04% +0.00% +0.05% / +0.10% +1.13% +1.14%] index_select const : Elapsed 0.610 ms (61.044 ms / 100) 61.095 -> 61.126 ( +0.05%) [ +0.00% +0.10% +0.09% / +0.05% +0.91% +0.91%] index_select wrap : Elapsed 0.611 ms (61.095 ms / 100) 61.118 -> 61.129 ( +0.02%) [ +0.00% +0.03% +0.14% / +0.02% +0.96% +0.83%] index_select linear : Elapsed 0.611 ms (61.118 ms / 100) 61.099 -> 61.048 ( -0.08%) [ +0.04% +0.03% +0.00% / -0.08% +0.92% +0.95%] index_select reverse : Elapsed 0.611 ms (61.122 ms / 100) 61.073 -> 61.048 ( -0.04%) [ +0.05% +0.00% +0.00% / -0.04% +1.09% +1.18%] index_select skip64 : Elapsed 0.611 ms (61.104 ms / 100) 61.059 -> 61.044 ( -0.02%) [ +0.00% +0.01% +0.01% / -0.02% +1.05% +1.13%] index_select skip256 : Elapsed 0.611 ms (61.059 ms / 100) 61.224 -> 61.240 ( +0.03%) [ +0.00% +0.11% +0.07% / +0.03% +0.82% +0.74%] index_select spread : Elapsed 0.612 ms (61.224 ms / 100) 61.209 -> 61.244 ( +0.06%) [ +0.04% +0.00% +0.18% / +0.06% +0.62% +0.73%] index_select strided 3 : Elapsed 0.612 ms (61.232 ms / 100) 61.126 -> 61.104 ( -0.04%) [ +0.00% +0.08% +0.06% / -0.04% +0.74% +0.87%] index_select strided 5 : Elapsed 0.611 ms (61.126 ms / 100) 61.228 -> 61.357 ( +0.21%) [ +0.10% +0.00% +0.03% / +0.21% +0.79% +0.98%] index_select strided 7 : Elapsed 0.613 ms (61.291 ms / 100) 61.191 -> 61.221 ( +0.05%) [ +0.05% +0.00% +0.15% / +0.05% +0.93% +0.88%] index_select strided 8 : Elapsed 0.612 ms (61.222 ms / 100) 61.274 -> 61.327 ( +0.09%) [ +0.01% +0.11% +0.00% / +0.09% +0.66% +0.64%] index_select strided 16 : Elapsed 0.613 ms (61.283 ms / 100) 60.762 -> 60.734 ( -0.05%) [ +0.00% +0.06% +0.12% / -0.05% +0.88% +0.98%] index_select random : Elapsed 0.608 ms (60.762 ms / 100) 61.226 -> 61.312 ( +0.14%) [ +0.00% +0.02% +0.00% / +0.14% +0.75% +0.77%] index_select random_sorted : Elapsed 0.612 ms (61.227 ms / 100) 61.149 -> 61.161 ( +0.02%) [ +0.00% +0.00% +0.04% / +0.02% +0.74% +0.80%] index_select perm : Elapsed 0.611 ms (61.149 ms / 100) 61.248 -> 61.260 ( +0.02%) [ +0.00% +0.00% +0.04% / +0.02% +0.78% +0.64%] index_select perm_sorted : Elapsed 0.612 ms (61.249 ms / 100) out_shape = [50, 150, 250] in_shape = [15, 150, 250] idx_dim = 0 B = [50, 150, 250] (stride (250, 12500, 1)) A = [15, 150, 250] (stride (37500, 250, 1)) dim = 0 16.877 -> 16.884 ( +0.04%) [ +0.00% +0.00% +0.09% / +0.04% +0.20% +0.16%] index_add_ linear : Elapsed 0.169 ms (16.877 ms / 100) 16.015 -> 16.016 ( +0.01%) [ +0.00% +0.10% +0.17% / +0.01% +0.19% +0.19%] index_copy_ linear : Elapsed 0.160 ms (16.015 ms / 100) 16.718 -> 16.739 ( +0.13%) [ +0.00% +0.02% +0.14% / +0.13% +1.61% +1.57%] index_add_ reverse : Elapsed 0.167 ms (16.718 ms / 100) 15.950 -> 15.967 ( +0.11%) [ +0.06% +0.00% +0.08% / +0.11% +0.80% +0.77%] index_copy_ reverse : Elapsed 0.160 ms (15.960 ms / 100) 16.821 -> 16.813 ( -0.05%) [ +0.10% +0.00% +0.12% / -0.05% +0.48% +0.41%] index_add_ spread : Elapsed 0.168 ms (16.838 ms / 100) 16.048 -> 16.053 ( +0.03%) [ +0.07% +0.00% +0.00% / +0.03% +0.24% +0.36%] index_copy_ spread : Elapsed 0.161 ms (16.060 ms / 100) 16.839 -> 16.866 ( +0.16%) [ +0.27% +0.00% +0.07% / +0.16% +0.29% +0.31%] index_add_ strided 3 : Elapsed 0.169 ms (16.884 ms / 100) 16.062 -> 16.085 ( +0.14%) [ +0.00% +0.09% +0.07% / +0.14% +0.32% +0.22%] index_copy_ strided 3 : Elapsed 0.161 ms (16.062 ms / 100) 16.789 -> 16.803 ( +0.08%) [ +0.08% +0.00% +0.05% / +0.08% +0.12% +0.10%] index_add_ strided 7 : Elapsed 0.168 ms (16.802 ms / 100) 16.062 -> 16.055 ( -0.04%) [ +0.00% +0.14% +0.04% / -0.04% -0.02% +0.03%] index_copy_ strided 7 : Elapsed 0.161 ms (16.062 ms / 100) 16.757 -> 16.787 ( +0.18%) [ +0.00% +0.04% +0.13% / +0.18% +1.59% +1.60%] index_add_ perm : Elapsed 0.168 ms (16.757 ms / 100) 16.004 -> 16.027 ( +0.14%) [ +0.01% +0.11% +0.00% / +0.14% +1.12% +1.01%] index_copy_ perm : Elapsed 0.160 ms (16.005 ms / 100) 16.765 -> 16.782 ( +0.10%) [ +0.00% +0.08% +0.16% / +0.10% +1.12% +1.04%] index_add_ perm_sorted : Elapsed 0.168 ms (16.765 ms / 100) 15.987 -> 16.012 ( +0.16%) [ +0.06% +0.00% +0.16% / +0.16% +0.64% +0.63%] index_copy_ perm_sorted : Elapsed 0.160 ms (15.997 ms / 100) 47.372 -> 47.309 ( -0.13%) [ +0.08% +0.00% +0.03% / -0.13% +1.29% +1.41%] index_select const : Elapsed 0.474 ms (47.410 ms / 100) 50.855 -> 50.959 ( +0.20%) [ +0.11% +0.09% +0.00% / +0.20% +1.61% +1.78%] index_select wrap : Elapsed 0.509 ms (50.909 ms / 100) 47.568 -> 47.631 ( +0.13%) [ +0.00% +0.08% +0.17% / +0.13% +1.45% +1.43%] index_select linear : Elapsed 0.476 ms (47.568 ms / 100) 49.117 -> 49.115 ( -0.00%) [ +0.00% +0.08% +0.16% / -0.00% +1.70% +1.53%] index_select reverse : Elapsed 0.491 ms (49.117 ms / 100) 47.361 -> 47.438 ( +0.16%) [ +0.00% +0.10% +0.11% / +0.16% +1.14% +1.13%] index_select skip64 : Elapsed 0.474 ms (47.361 ms / 100) 47.332 -> 47.409 ( +0.16%) [ +0.06% +0.02% +0.00% / +0.16% +1.30% +1.37%] index_select skip256 : Elapsed 0.474 ms (47.359 ms / 100) 48.981 -> 49.146 ( +0.34%) [ +0.00% +0.22% +0.19% / +0.34% +1.74% +1.54%] index_select spread : Elapsed 0.490 ms (48.981 ms / 100) 51.165 -> 51.177 ( +0.02%) [ +0.00% +0.06% +0.03% / +0.02% +1.08% +1.30%] index_select strided 3 : Elapsed 0.512 ms (51.165 ms / 100) 50.096 -> 50.093 ( -0.01%) [ +0.00% +0.05% +0.14% / -0.01% +1.63% +1.77%] index_select strided 5 : Elapsed 0.501 ms (50.096 ms / 100) 51.196 -> 51.291 ( +0.19%) [ +0.00% +0.11% +0.24% / +0.19% +1.67% +1.67%] index_select strided 7 : Elapsed 0.512 ms (51.196 ms / 100) 51.219 -> 51.327 ( +0.21%) [ +0.14% +0.01% +0.00% / +0.21% +1.67% +1.63%] index_select strided 8 : Elapsed 0.513 ms (51.292 ms / 100) 50.728 -> 50.781 ( +0.10%) [ +0.00% +0.04% +0.03% / +0.10% +1.20% +1.17%] index_select random : Elapsed 0.507 ms (50.728 ms / 100) 49.070 -> 49.179 ( +0.22%) [ +0.07% +0.00% +0.04% / +0.22% +1.19% +1.20%] index_select random_sorted : Elapsed 0.491 ms (49.104 ms / 100) B = [50, 150, 250] (stride (250, 12500, 1)) A = [15, 150, 250] (stride (1, 15, 2250)) dim = 0 33.167 -> 33.246 ( +0.24%) [ +0.13% +0.00% +0.17% / +0.24% +1.63% +1.55%] index_add_ linear : Elapsed 0.332 ms (33.211 ms / 100) 29.502 -> 29.564 ( +0.21%) [ +0.13% +0.00% +0.14% / +0.21% +1.32% +1.29%] index_copy_ linear : Elapsed 0.295 ms (29.541 ms / 100) 32.693 -> 32.853 ( +0.49%) [ +0.31% +0.00% +0.18% / +0.49% +3.31% +3.42%] index_add_ reverse : Elapsed 0.328 ms (32.795 ms / 100) 29.146 -> 29.282 ( +0.47%) [ +0.40% +0.00% +0.32% / +0.47% +2.24% +2.50%] index_copy_ reverse : Elapsed 0.293 ms (29.264 ms / 100) 32.332 -> 32.353 ( +0.06%) [ +0.33% +0.00% +0.12% / +0.06% +1.49% +1.02%] index_add_ spread : Elapsed 0.324 ms (32.438 ms / 100) 29.208 -> 29.213 ( +0.02%) [ +0.14% +0.00% +0.06% / +0.09% +0.69% +0.02%] index_copy_ spread : Elapsed 0.292 ms (29.249 ms / 100) 32.539 -> 32.539 ( +0.00%) [ +0.12% +0.22% +0.00% / +0.00% +0.85% +1.01%] index_add_ strided 3 : Elapsed 0.326 ms (32.579 ms / 100) 29.130 -> 29.274 ( +0.49%) [ +0.00% +0.18% +0.25% / +0.49% +1.52% +1.53%] index_copy_ strided 3 : Elapsed 0.291 ms (29.130 ms / 100) 32.411 -> 32.154 ( -0.79%) [ +0.15% +0.00% +0.16% / +0.15% -0.56% -0.79%] index_add_ strided 7 : Elapsed 0.325 ms (32.461 ms / 100) 29.074 -> 29.148 ( +0.25%) [ +0.16% +0.00% +0.30% / +0.43% +0.55% +0.25%] index_copy_ strided 7 : Elapsed 0.291 ms (29.120 ms / 100) 32.227 -> 32.251 ( +0.07%) [ +0.17% +0.00% +0.47% / +0.07% +1.57% +1.54%] index_add_ perm : Elapsed 0.323 ms (32.282 ms / 100) 29.152 -> 29.051 ( -0.35%) [ +0.03% +0.00% +0.30% / -0.35% +1.00% +0.59%] index_copy_ perm : Elapsed 0.292 ms (29.162 ms / 100) 32.211 -> 32.269 ( +0.18%) [ +0.24% +0.02% +0.00% / +0.18% +2.12% +2.10%] index_add_ perm_sorted : Elapsed 0.323 ms (32.288 ms / 100) 29.136 -> 29.176 ( +0.14%) [ +0.23% +0.00% +0.09% / +0.14% +1.19% +1.15%] index_copy_ perm_sorted : Elapsed 0.292 ms (29.203 ms / 100) 110.001 -> 110.770 ( +0.70%) [ +0.31% +0.06% +0.00% / +0.70% +1.36% +1.68%] index_select const : Elapsed 1.103 ms (110.339 ms / 100) 113.381 -> 113.451 ( +0.06%) [ +0.31% +0.17% +0.00% / +0.06% +0.59% +0.58%] index_select wrap : Elapsed 1.137 ms (113.728 ms / 100) 110.546 -> 110.323 ( -0.20%) [ +0.09% +0.00% +0.27% / -0.20% +0.64% +0.65%] index_select linear : Elapsed 1.106 ms (110.640 ms / 100) 111.299 -> 111.666 ( +0.33%) [ +0.65% +0.66% +0.00% / +0.36% +0.33% +0.37%] index_select reverse : Elapsed 1.120 ms (112.019 ms / 100) 110.232 -> 110.505 ( +0.25%) [ +0.00% +0.41% +0.12% / +0.30% +0.25% +0.37%] index_select skip64 : Elapsed 1.102 ms (110.232 ms / 100) 110.856 -> 110.610 ( -0.22%) [ +0.50% +0.34% +0.00% / +0.28% -0.22% +0.04%] index_select skip256 : Elapsed 1.114 ms (111.413 ms / 100) 110.866 -> 110.808 ( -0.05%) [ +0.00% +0.10% +0.14% / -0.05% +0.92% +0.67%] index_select spread : Elapsed 1.109 ms (110.866 ms / 100) 117.842 -> 117.998 ( +0.13%) [ +0.15% +0.15% +0.00% / +0.13% +1.08% +0.87%] index_select strided 3 : Elapsed 1.180 ms (118.018 ms / 100) 119.790 -> 119.731 ( -0.05%) [ +0.00% +0.06% +0.00% / -0.05% +0.57% +0.40%] index_select strided 5 : Elapsed 1.198 ms (119.790 ms / 100) 121.199 -> 121.103 ( -0.08%) [ +0.05% +0.00% +0.10% / -0.08% +0.04% +0.27%] index_select strided 7 : Elapsed 1.213 ms (121.260 ms / 100) 121.075 -> 121.250 ( +0.14%) [ +0.00% +0.20% +0.46% / +0.18% +0.17% +0.14%] index_select strided 8 : Elapsed 1.211 ms (121.075 ms / 100) 118.093 -> 118.484 ( +0.33%) [ +0.00% +0.07% +0.23% / +0.33% +0.45% +0.43%] index_select random : Elapsed 1.181 ms (118.093 ms / 100) 110.692 -> 111.154 ( +0.42%) [ +0.21% +0.00% +0.43% / +0.42% +0.46% +0.83%] index_select random_sorted : Elapsed 1.109 ms (110.919 ms / 100) B = [50, 150, 250] (stride (1, 50, 7500)) dim = 0 fill_cnt = 15 38.857 -> 38.905 ( +0.12%) [ +0.00% +0.09% +0.18% / +0.12% +0.43% +0.44%] index_fill_ const : Elapsed 0.389 ms (38.857 ms / 100) 38.986 -> 39.012 ( +0.07%) [ +0.00% +0.03% +0.05% / +0.07% +0.40% +0.40%] index_fill_ linear : Elapsed 0.390 ms (38.986 ms / 100) 38.712 -> 38.761 ( +0.13%) [ +0.00% +0.00% +0.08% / +0.13% +0.42% +0.38%] index_fill_ reverse : Elapsed 0.387 ms (38.713 ms / 100) 38.882 -> 38.903 ( +0.05%) [ +0.00% +0.01% +0.08% / +0.05% +0.39% +0.35%] index_fill_ skip64 : Elapsed 0.389 ms (38.882 ms / 100) 39.009 -> 39.045 ( +0.09%) [ +0.04% +0.00% +0.07% / +0.09% +0.37% +0.35%] index_fill_ skip256 : Elapsed 0.390 ms (39.023 ms / 100) 39.039 -> 39.056 ( +0.04%) [ +0.04% +0.00% +0.03% / +0.04% +0.14% +0.17%] index_fill_ spread : Elapsed 0.391 ms (39.053 ms / 100) 38.925 -> 38.954 ( +0.07%) [ +0.06% +0.00% +0.12% / +0.07% +0.12% +0.16%] index_fill_ strided 3 : Elapsed 0.389 ms (38.949 ms / 100) 38.625 -> 38.679 ( +0.14%) [ +0.06% +0.00% +0.22% / +0.21% +0.14% +0.16%] index_fill_ strided 5 : Elapsed 0.386 ms (38.647 ms / 100) 39.005 -> 39.014 ( +0.02%) [ +0.02% +0.00% +0.02% / +0.02% +0.09% +0.11%] index_fill_ strided 7 : Elapsed 0.390 ms (39.013 ms / 100) 38.728 -> 38.759 ( +0.08%) [ +0.00% +0.05% +0.08% / +0.08% +0.20% +0.22%] index_fill_ strided 8 : Elapsed 0.387 ms (38.728 ms / 100) 38.843 -> 38.869 ( +0.07%) [ +0.00% +0.00% +0.06% / +0.07% +0.20% +0.22%] index_fill_ strided 16 : Elapsed 0.388 ms (38.843 ms / 100) 38.689 -> 38.695 ( +0.02%) [ +0.00% +0.01% +0.00% / +0.02% +0.25% +0.26%] index_fill_ random : Elapsed 0.387 ms (38.689 ms / 100) 38.702 -> 38.780 ( +0.20%) [ +0.00% +0.00% +0.22% / +0.21% +0.20% +0.22%] index_fill_ random_sorted : Elapsed 0.387 ms (38.703 ms / 100) 38.808 -> 38.837 ( +0.07%) [ +0.02% +0.00% +0.10% / +0.12% +0.07% +0.10%] index_fill_ perm : Elapsed 0.388 ms (38.814 ms / 100) 38.985 -> 39.046 ( +0.16%) [ +0.04% +0.00% +0.04% / +0.16% +0.21% +0.24%] index_fill_ perm_sorted : Elapsed 0.390 ms (38.999 ms / 100) out_shape = [15, 50, 250] in_shape = [15, 150, 250] idx_dim = 1 B = [15, 50, 250] (stride (12500, 250, 1)) A = [15, 150, 250] (stride (150, 1, 2250)) dim = 1 7.157 -> 7.087 ( -0.98%) [ +0.07% +0.00% +0.15% / +0.08% -0.89% -0.98%] index_select const : Elapsed 0.072 ms (7.162 ms / 100) 8.845 -> 8.636 ( -2.36%) [ +0.00% +0.09% +0.08% / +0.11% -2.36% -2.11%] index_select wrap : Elapsed 0.088 ms (8.845 ms / 100) 8.842 -> 8.652 ( -2.15%) [ +0.01% +0.11% +0.00% / +0.14% -2.07% -2.15%] index_select linear : Elapsed 0.088 ms (8.843 ms / 100) 8.841 -> 8.679 ( -1.83%) [ +0.00% +0.07% +0.27% / +0.09% -1.65% -1.83%] index_select reverse : Elapsed 0.088 ms (8.841 ms / 100) 7.169 -> 7.059 ( -1.53%) [ +0.08% +0.00% +0.27% / +0.35% -1.53% -1.30%] index_select skip64 : Elapsed 0.072 ms (7.175 ms / 100) 7.150 -> 7.080 ( -0.98%) [ +0.08% +0.00% +0.13% / +0.45% -0.98% -0.81%] index_select skip256 : Elapsed 0.072 ms (7.156 ms / 100) 10.986 -> 10.962 ( -0.22%) [ +0.10% +0.00% +0.23% / +0.18% -0.22% -0.18%] index_select spread : Elapsed 0.110 ms (10.997 ms / 100) 10.957 -> 10.970 ( +0.12%) [ +0.46% +0.00% +0.30% / +0.37% +0.12% +0.17%] index_select strided 3 : Elapsed 0.110 ms (11.007 ms / 100) 13.084 -> 12.984 ( -0.76%) [ +0.27% +0.14% +0.00% / +0.12% -0.35% -0.76%] index_select strided 5 : Elapsed 0.131 ms (13.119 ms / 100) 14.810 -> 14.709 ( -0.68%) [ +0.00% +0.23% +0.22% / +0.19% -0.34% -0.68%] index_select strided 7 : Elapsed 0.148 ms (14.810 ms / 100) 15.462 -> 15.417 ( -0.29%) [ +0.34% +0.06% +0.00% / +0.12% -0.29% +0.02%] index_select strided 8 : Elapsed 0.155 ms (15.515 ms / 100) 15.625 -> 15.543 ( -0.52%) [ +0.01% +0.00% +0.22% / +0.18% -0.43% -0.52%] index_select strided 16 : Elapsed 0.156 ms (15.626 ms / 100) 14.866 -> 14.806 ( -0.40%) [ +0.05% +0.07% +0.00% / +0.15% -0.37% -0.40%] index_select strided 64 : Elapsed 0.149 ms (14.874 ms / 100) 11.312 -> 11.277 ( -0.31%) [ +0.11% +0.07% +0.00% / +0.10% -0.31% -0.15%] index_select strided 100 : Elapsed 0.113 ms (11.324 ms / 100) 14.902 -> 14.878 ( -0.16%) [ +0.13% +0.00% +0.14% / +0.01% -0.15% -0.16%] index_select random : Elapsed 0.149 ms (14.922 ms / 100) 10.855 -> 10.795 ( -0.55%) [ +0.00% +0.18% +0.05% / +0.21% -0.43% -0.55%] index_select random_sorted : Elapsed 0.109 ms (10.855 ms / 100) 14.921 -> 14.869 ( -0.35%) [ +0.01% +0.02% +0.00% / -0.05% -0.35% -0.27%] index_select perm : Elapsed 0.149 ms (14.923 ms / 100) 10.900 -> 10.880 ( -0.18%) [ +0.00% +0.13% +0.01% / -0.18% -0.07% +0.10%] index_select perm_sorted : Elapsed 0.109 ms (10.900 ms / 100) B = [15, 50, 250] (stride (250, 3750, 1)) A = [15, 150, 250] (stride (37500, 1, 150)) dim = 1 6.920 -> 6.934 ( +0.20%) [ +0.00% +0.16% +0.36% / +0.20% +1.18% +1.01%] index_select const : Elapsed 0.069 ms (6.920 ms / 100) 8.295 -> 8.309 ( +0.17%) [ +0.07% +0.00% +0.06% / +0.17% +1.56% +1.66%] index_select wrap : Elapsed 0.083 ms (8.301 ms / 100) 8.293 -> 8.294 ( +0.01%) [ +0.07% +0.10% +0.00% / +0.01% +1.65% +1.76%] index_select linear : Elapsed 0.083 ms (8.299 ms / 100) 8.271 -> 8.284 ( +0.16%) [ +0.00% +0.04% +0.10% / +0.16% +1.80% +1.90%] index_select reverse : Elapsed 0.083 ms (8.271 ms / 100) 6.928 -> 6.936 ( +0.12%) [ +0.17% +0.00% +0.23% / +0.12% +0.71% +0.68%] index_select skip64 : Elapsed 0.069 ms (6.940 ms / 100) 6.916 -> 6.912 ( -0.06%) [ +0.00% +0.06% +0.16% / -0.06% +1.01% +0.82%] index_select skip256 : Elapsed 0.069 ms (6.916 ms / 100) 10.117 -> 10.076 ( -0.41%) [ +0.13% +0.04% +0.00% / -0.09% -0.11% -0.41%] index_select spread : Elapsed 0.101 ms (10.130 ms / 100) 10.102 -> 10.082 ( -0.20%) [ +0.13% +0.27% +0.00% / +0.23% +0.02% -0.20%] index_select strided 3 : Elapsed 0.101 ms (10.115 ms / 100) 11.554 -> 11.569 ( +0.13%) [ +0.35% +0.00% +0.42% / +0.32% +0.48% +0.13%] index_select strided 5 : Elapsed 0.116 ms (11.595 ms / 100) 13.000 -> 13.025 ( +0.19%) [ +0.15% +0.00% +0.27% / +0.19% +0.46% +0.44%] index_select strided 7 : Elapsed 0.130 ms (13.019 ms / 100) 13.344 -> 13.363 ( +0.14%) [ +0.00% +0.04% +0.30% / +0.14% +0.51% +0.34%] index_select strided 8 : Elapsed 0.133 ms (13.344 ms / 100) 13.540 -> 13.515 ( -0.18%) [ +0.00% +0.47% +0.27% / +0.41% -0.18% +0.24%] index_select strided 16 : Elapsed 0.135 ms (13.540 ms / 100) 13.032 -> 12.994 ( -0.29%) [ +0.00% +0.05% +0.02% / +0.35% -0.29% -0.01%] index_select strided 64 : Elapsed 0.130 ms (13.032 ms / 100) 10.225 -> 10.226 ( +0.01%) [ +0.03% +0.00% +0.00% / +0.01% +0.24% +0.45%] index_select strided 100 : Elapsed 0.102 ms (10.228 ms / 100) 12.903 -> 12.866 ( -0.29%) [ +0.07% +0.00% +0.04% / +0.12% -0.29% -0.12%] index_select random : Elapsed 0.129 ms (12.912 ms / 100) 9.823 -> 9.799 ( -0.24%) [ +0.26% +0.00% +0.06% / -0.01% -0.24% -0.21%] index_select random_sorted : Elapsed 0.098 ms (9.849 ms / 100) 13.050 -> 13.075 ( +0.19%) [ +0.00% +0.07% +0.41% / +0.29% +0.19% +0.32%] index_select perm : Elapsed 0.130 ms (13.050 ms / 100) 9.927 -> 9.930 ( +0.03%) [ +0.04% +0.00% +0.21% / +0.03% +0.07% +0.33%] index_select perm_sorted : Elapsed 0.099 ms (9.931 ms / 100) B = [15, 50, 250] (stride (1, 3750, 15)) dim = 1 fill_cnt = 150 6.045 -> 6.033 ( -0.20%) [ +0.03% +0.00% +0.13% / -0.12% -0.20% -0.07%] index_fill_ const : Elapsed 0.060 ms (6.047 ms / 100) 6.033 -> 6.020 ( -0.22%) [ +0.10% +0.00% +0.18% / -0.08% -0.12% -0.22%] index_fill_ linear : Elapsed 0.060 ms (6.039 ms / 100) 6.058 -> 6.045 ( -0.21%) [ +0.00% +0.03% +0.07% / -0.21% +0.05% -0.08%] index_fill_ reverse : Elapsed 0.061 ms (6.058 ms / 100) 6.059 -> 6.049 ( -0.17%) [ +0.41% +0.15% +0.00% / -0.02% -0.17% -0.10%] index_fill_ skip64 : Elapsed 0.061 ms (6.084 ms / 100) 6.049 -> 6.041 ( -0.13%) [ +0.08% +0.00% +0.28% / -0.08% -0.05% -0.13%] index_fill_ skip256 : Elapsed 0.061 ms (6.054 ms / 100) 5.934 -> 5.931 ( -0.05%) [ +0.02% +0.00% +0.03% / +0.08% -0.05% -0.03%] index_fill_ spread : Elapsed 0.059 ms (5.935 ms / 100) 5.947 -> 5.936 ( -0.18%) [ +0.05% +0.34% +0.00% / -0.18% +0.08% +0.02%] index_fill_ strided 3 : Elapsed 0.059 ms (5.950 ms / 100) 6.005 -> 5.987 ( -0.30%) [ +0.00% +0.22% +0.28% / -0.30% -0.23% -0.05%] index_fill_ strided 5 : Elapsed 0.060 ms (6.005 ms / 100) 5.919 -> 5.915 ( -0.07%) [ +0.15% +0.02% +0.00% / -0.07% +0.12% +0.25%] index_fill_ strided 7 : Elapsed 0.059 ms (5.928 ms / 100) 5.906 -> 5.899 ( -0.12%) [ +0.00% +0.17% +0.10% / +0.12% +0.08% -0.12%] index_fill_ strided 8 : Elapsed 0.059 ms (5.906 ms / 100) 5.955 -> 5.945 ( -0.17%) [ +0.00% +0.18% +0.13% / -0.10% +0.05% -0.17%] index_fill_ strided 16 : Elapsed 0.060 ms (5.955 ms / 100) 5.979 -> 5.989 ( +0.17%) [ +0.05% +0.15% +0.00% / +0.17% +0.22% +0.20%] index_fill_ random : Elapsed 0.060 ms (5.982 ms / 100) 5.903 -> 5.887 ( -0.27%) [ +0.17% +0.05% +0.00% / -0.27% +0.03% +0.03%] index_fill_ random_sorted : Elapsed 0.059 ms (5.913 ms / 100) B = [15, 50, 250] (stride (50, 1, 750)) A = [15, 150, 250] (stride (37500, 1, 150)) dim = 1 GOOD 8.784 -> 6.343 (-27.79%) [ +0.24% +0.00% +0.11% / -27.79% -27.63% -27.56%] index_select const : Elapsed 0.088 ms (8.805 ms / 100) GOOD 10.381 -> 6.643 (-36.01%) [ +0.09% +0.06% +0.00% / -35.48% -35.89% -36.01%] index_select wrap : Elapsed 0.104 ms (10.390 ms / 100) GOOD 10.350 -> 6.643 (-35.82%) [ +0.30% +0.00% +0.21% / -35.18% -35.69% -35.82%] index_select linear : Elapsed 0.104 ms (10.381 ms / 100) GOOD 10.362 -> 6.706 (-35.28%) [ +0.27% +0.00% +0.21% / -35.00% -35.25% -35.28%] index_select reverse : Elapsed 0.104 ms (10.390 ms / 100) GOOD 8.782 -> 6.356 (-27.62%) [ +0.05% +0.00% +0.08% / -27.14% -27.62% -27.54%] index_select skip64 : Elapsed 0.088 ms (8.786 ms / 100) GOOD 8.762 -> 6.326 (-27.80%) [ +0.03% +0.02% +0.00% / -27.80% -27.72% -27.59%] index_select skip256 : Elapsed 0.088 ms (8.765 ms / 100) GOOD 12.270 -> 7.313 (-40.40%) [ +0.03% +0.18% +0.00% / -40.29% -40.14% -40.40%] index_select spread : Elapsed 0.123 ms (12.274 ms / 100) GOOD 12.223 -> 7.331 (-40.02%) [ +0.29% +0.39% +0.00% / -39.96% -40.02% -39.93%] index_select strided 3 : Elapsed 0.123 ms (12.259 ms / 100) GOOD 14.046 -> 7.582 (-46.02%) [ +0.13% +0.19% +0.00% / -46.01% -45.84% -46.02%] index_select strided 5 : Elapsed 0.141 ms (14.064 ms / 100) GOOD 15.119 -> 7.629 (-49.54%) [ +0.00% +0.15% +0.12% / -49.46% -49.47% -49.54%] index_select strided 7 : Elapsed 0.151 ms (15.119 ms / 100) BEST 15.409 -> 7.624 (-50.52%) [ +0.04% +0.01% +0.00% / -50.23% -50.52% -50.46%] index_select strided 8 : Elapsed 0.154 ms (15.415 ms / 100) BEST 15.481 -> 7.713 (-50.18%) [ +0.12% +0.00% +0.08% / -49.85% -50.02% -50.18%] index_select strided 16 : Elapsed 0.155 ms (15.499 ms / 100) GOOD 15.155 -> 7.644 (-49.56%) [ +0.09% +0.06% +0.00% / -48.86% -49.56% -49.51%] index_select strided 64 : Elapsed 0.152 ms (15.169 ms / 100) GOOD 12.852 -> 6.713 (-47.77%) [ +0.16% +0.22% +0.00% / -47.60% -47.61% -47.77%] index_select strided 100 : Elapsed 0.129 ms (12.873 ms / 100) GOOD 15.067 -> 7.717 (-48.78%) [ +0.00% +0.09% +0.09% / -48.53% -48.78% -48.66%] index_select random : Elapsed 0.151 ms (15.067 ms / 100) GOOD 12.120 -> 7.365 (-39.23%) [ +0.26% +0.00% +0.24% / -39.08% -39.08% -39.23%] index_select random_sorted : Elapsed 0.122 ms (12.151 ms / 100) GOOD 15.234 -> 7.702 (-49.44%) [ +0.07% +0.00% +0.07% / -49.11% -49.36% -49.44%] index_select perm : Elapsed 0.152 ms (15.244 ms / 100) GOOD 12.054 -> 7.353 (-39.00%) [ +0.00% +0.07% +0.02% / -38.60% -38.94% -39.00%] index_select perm_sorted : Elapsed 0.121 ms (12.054 ms / 100) B = [15, 50, 250] (stride (50, 1, 750)) A = [15, 150, 250] (stride (1, 3750, 15)) dim = 1 Good 6.881 -> 6.183 (-10.14%) [ +0.15% +0.00% +0.13% / -10.01% -10.14% -9.94%] index_select const : Elapsed 0.069 ms (6.891 ms / 100) bad 7.890 -> 8.590 ( +8.87%) [ +0.39% +0.06% +0.00% / +9.68% +8.87% +9.06%] index_select wrap : Elapsed 0.079 ms (7.921 ms / 100) bad 7.885 -> 8.591 ( +8.95%) [ +0.36% +0.27% +0.00% / +9.68% +9.03% +8.95%] index_select linear : Elapsed 0.079 ms (7.913 ms / 100) bad 7.910 -> 8.621 ( +8.99%) [ +0.14% +0.00% +0.13% / +9.22% +8.99% +9.12%] index_select reverse : Elapsed 0.079 ms (7.921 ms / 100) good 6.896 -> 6.226 ( -9.72%) [ +0.13% +0.09% +0.00% / -8.82% -9.54% -9.72%] index_select skip64 : Elapsed 0.069 ms (6.905 ms / 100) good 6.863 -> 6.183 ( -9.91%) [ +0.35% +0.06% +0.00% / -9.49% -9.91% -9.73%] index_select skip256 : Elapsed 0.069 ms (6.887 ms / 100) Bad 7.925 -> 9.070 (+14.45%) [ +0.09% +0.00% +0.14% / +14.80% +14.66% +14.45%] index_select spread : Elapsed 0.079 ms (7.932 ms / 100) Bad 7.910 -> 9.079 (+14.78%) [ +0.00% +0.18% +0.32% / +15.15% +14.78% +14.82%] index_select strided 3 : Elapsed 0.079 ms (7.910 ms / 100) good 8.019 -> 7.446 ( -7.15%) [ +0.02% +0.00% +0.15% / -6.12% -7.13% -7.15%] index_select strided 5 : Elapsed 0.080 ms (8.021 ms / 100) Bad 8.031 -> 9.126 (+13.63%) [ +0.00% +0.17% +0.16% / +13.63% +14.59% +14.46%] index_select strided 7 : Elapsed 0.080 ms (8.031 ms / 100) Bad 7.917 -> 8.929 (+12.78%) [ +0.19% +0.05% +0.00% / +12.78% +16.36% +16.43%] index_select strided 8 : Elapsed 0.079 ms (7.932 ms / 100) Bad 7.942 -> 9.094 (+14.51%) [ +0.26% +0.00% +0.28% / +14.51% +14.54% +14.62%] index_select strided 16 : Elapsed 0.080 ms (7.963 ms / 100) Bad 7.945 -> 9.038 (+13.76%) [ +0.26% +0.19% +0.00% / +14.66% +13.76% +13.81%] index_select strided 64 : Elapsed 0.080 ms (7.966 ms / 100) good 6.923 -> 6.313 ( -8.81%) [ +0.27% +0.00% +0.01% / -8.81% -8.06% -7.96%] index_select strided 100 : Elapsed 0.069 ms (6.942 ms / 100) 7.947 -> 8.188 ( +3.03%) [ +0.48% +0.00% +0.13% / +6.37% +3.36% +3.03%] index_select random : Elapsed 0.080 ms (7.985 ms / 100) bad 7.805 -> 8.209 ( +5.18%) [ +0.03% +0.08% +0.00% / +8.03% +5.18% +5.24%] index_select random_sorted : Elapsed 0.078 ms (7.807 ms / 100) Bad 8.005 -> 8.855 (+10.62%) [ +0.10% +0.00% +0.01% / +15.80% +10.82% +10.62%] index_select perm : Elapsed 0.080 ms (8.013 ms / 100) Bad 7.967 -> 8.873 (+11.37%) [ +0.40% +0.14% +0.00% / +16.33% +11.37% +11.42%] index_select perm_sorted : Elapsed 0.080 ms (7.999 ms / 100) out_shape = [15, 150, 50] in_shape = [15, 150, 250] idx_dim = 2 B = [15, 150, 50] (stride (7500, 1, 150)) A = [15, 150, 250] (stride (37500, 250, 1)) dim = 2 9.327 -> 9.307 ( -0.21%) [ +0.00% +0.09% +0.05% / +0.00% -0.21% +0.02%] index_select const : Elapsed 0.093 ms (9.327 ms / 100) 10.204 -> 10.149 ( -0.54%) [ +0.00% +0.01% +0.00% / +0.12% -0.25% -0.54%] index_select wrap : Elapsed 0.102 ms (10.204 ms / 100) 10.198 -> 10.166 ( -0.31%) [ +0.00% +0.19% +0.05% / +0.05% -0.19% -0.31%] index_select linear : Elapsed 0.102 ms (10.198 ms / 100) 10.170 -> 10.158 ( -0.12%) [ +0.10% +0.06% +0.00% / +0.06% -0.06% -0.12%] index_select reverse : Elapsed 0.102 ms (10.180 ms / 100) 9.317 -> 9.307 ( -0.11%) [ +0.00% +0.10% +0.03% / +0.06% +0.04% -0.11%] index_select skip64 : Elapsed 0.093 ms (9.317 ms / 100) 9.323 -> 9.323 ( +0.00%) [ +0.04% +0.13% +0.00% / +0.00% +0.02% +0.14%] index_select skip256 : Elapsed 0.093 ms (9.327 ms / 100) 13.005 -> 13.041 ( +0.28%) [ +0.40% +0.18% +0.00% / +0.28% +0.40% +0.71%] index_select spread : Elapsed 0.131 ms (13.057 ms / 100) 11.699 -> 11.710 ( +0.09%) [ +0.27% +0.00% +0.32% / +0.18% +0.09% +0.09%] index_select strided 3 : Elapsed 0.117 ms (11.731 ms / 100) 13.017 -> 12.961 ( -0.43%) [ +0.12% +0.00% +0.38% / +0.10% -0.43% +0.44%] index_select strided 5 : Elapsed 0.130 ms (13.032 ms / 100) 13.939 -> 13.936 ( -0.02%) [ +0.00% +0.31% +0.19% / -0.02% +0.25% +0.37%] index_select strided 7 : Elapsed 0.139 ms (13.939 ms / 100) 14.280 -> 14.322 ( +0.29%) [ +0.13% +0.17% +0.00% / +0.35% +0.29% +0.57%] index_select strided 8 : Elapsed 0.143 ms (14.299 ms / 100) 14.665 -> 14.705 ( +0.27%) [ +0.10% +0.00% +0.29% / +0.27% +0.27% +0.33%] index_select strided 16 : Elapsed 0.147 ms (14.680 ms / 100) 14.284 -> 14.321 ( +0.26%) [ +0.00% +0.29% +0.18% / +0.27% +0.43% +0.26%] index_select strided 64 : Elapsed 0.143 ms (14.284 ms / 100) 13.015 -> 13.045 ( +0.23%) [ +0.00% +0.26% +0.18% / +0.23% +0.55% +0.27%] index_select strided 100 : Elapsed 0.130 ms (13.015 ms / 100) 14.380 -> 14.383 ( +0.02%) [ +0.07% +0.06% +0.00% / +0.02% +0.35% +0.33%] index_select random : Elapsed 0.144 ms (14.390 ms / 100) 12.604 -> 12.633 ( +0.23%) [ +0.00% +0.00% +0.01% / +0.53% +0.23% +0.29%] index_select random_sorted : Elapsed 0.126 ms (12.604 ms / 100) 14.224 -> 14.198 ( -0.18%) [ +0.08% +0.03% +0.00% / -0.18% +0.57% +0.42%] index_select perm : Elapsed 0.142 ms (14.235 ms / 100) 12.586 -> 12.581 ( -0.04%) [ +0.13% +0.23% +0.00% / +0.20% +0.10% -0.04%] index_select perm_sorted : Elapsed 0.126 ms (12.602 ms / 100) B = [15, 150, 50] (stride (1, 750, 15)) A = [15, 150, 250] (stride (1, 15, 2250)) dim = 2 5.565 -> 5.577 ( +0.22%) [ +0.00% +0.20% +0.00% / +0.32% +0.22% +0.22%] index_select const : Elapsed 0.056 ms (5.565 ms / 100) 5.947 -> 5.941 ( -0.10%) [ +0.13% +0.00% +0.17% / +0.12% +0.20% -0.10%] index_select wrap : Elapsed 0.060 ms (5.955 ms / 100) 5.956 -> 5.950 ( -0.10%) [ +0.39% +0.20% +0.00% / +0.34% +0.13% -0.10%] index_select linear : Elapsed 0.060 ms (5.979 ms / 100) 5.924 -> 5.941 ( +0.29%) [ +0.22% +0.00% +0.35% / +0.29% +0.86% +0.47%] index_select reverse : Elapsed 0.059 ms (5.937 ms / 100) 5.568 -> 5.569 ( +0.02%) [ +0.00% +0.00% +0.13% / +0.02% +0.40% +0.34%] index_select skip64 : Elapsed 0.056 ms (5.568 ms / 100) 5.564 -> 5.572 ( +0.14%) [ +0.18% +0.00% +0.14% / +0.14% +0.31% +0.45%] index_select skip256 : Elapsed 0.056 ms (5.574 ms / 100) 5.951 -> 5.952 ( +0.02%) [ +0.27% +0.00% +0.24% / +0.02% +1.23% +1.28%] index_select spread : Elapsed 0.060 ms (5.967 ms / 100) 5.934 -> 5.949 ( +0.25%) [ +0.03% +0.19% +0.00% / +0.25% +1.79% +1.65%] index_select strided 3 : Elapsed 0.059 ms (5.936 ms / 100) 5.958 -> 5.949 ( -0.15%) [ +0.10% +0.00% +0.03% / -0.15% +1.17% +1.39%] index_select strided 5 : Elapsed 0.060 ms (5.964 ms / 100) 5.920 -> 5.947 ( +0.46%) [ +0.19% +0.00% +0.41% / +0.46% +0.66% +0.71%] index_select strided 7 : Elapsed 0.059 ms (5.931 ms / 100) 5.969 -> 5.931 ( -0.64%) [ +0.10% +0.17% +0.00% / -0.03% -0.64% -0.57%] index_select strided 8 : Elapsed 0.060 ms (5.975 ms / 100) 5.950 -> 5.948 ( -0.03%) [ +0.00% +0.25% +0.30% / +0.44% -0.03% +0.07%] index_select strided 16 : Elapsed 0.059 ms (5.950 ms / 100) 5.975 -> 5.942 ( -0.55%) [ +0.25% +0.00% +0.23% / +0.22% -0.54% -0.55%] index_select strided 64 : Elapsed 0.060 ms (5.990 ms / 100) 5.618 -> 5.628 ( +0.18%) [ +0.18% +0.02% +0.00% / +0.18% +0.21% +0.20%] index_select strided 100 : Elapsed 0.056 ms (5.628 ms / 100) 5.957 -> 5.936 ( -0.35%) [ +0.00% +0.17% +0.35% / +0.07% -0.27% -0.35%] index_select random : Elapsed 0.060 ms (5.957 ms / 100) 5.919 -> 5.894 ( -0.42%) [ +0.54% +0.00% +0.25% / +0.39% -0.42% -0.20%] index_select random_sorted : Elapsed 0.060 ms (5.951 ms / 100) 5.929 -> 5.948 ( +0.32%) [ +0.00% +0.13% +0.19% / +0.32% +0.49% +0.54%] index_select perm : Elapsed 0.059 ms (5.929 ms / 100) 5.932 -> 5.930 ( -0.03%) [ +0.00% +0.17% +0.07% / -0.03% +0.61% +0.56%] index_select perm_sorted : Elapsed 0.059 ms (5.932 ms / 100) B = [15, 150, 50] (stride (150, 1, 2250)) A = [15, 150, 250] (stride (250, 3750, 1)) dim = 2 9.348 -> 9.312 ( -0.39%) [ +0.00% +0.06% +0.10% / +0.04% -0.39% -0.31%] index_select const : Elapsed 0.093 ms (9.348 ms / 100) 10.341 -> 10.309 ( -0.31%) [ +0.16% +0.00% +0.23% / +0.16% -0.31% -0.22%] index_select wrap : Elapsed 0.104 ms (10.358 ms / 100) 10.354 -> 10.308 ( -0.44%) [ +0.00% +0.00% +0.10% / -0.01% -0.25% -0.44%] index_select linear : Elapsed 0.104 ms (10.354 ms / 100) 10.349 -> 10.319 ( -0.29%) [ +0.03% +0.00% +0.02% / +0.04% -0.25% -0.29%] index_select reverse : Elapsed 0.104 ms (10.352 ms / 100) 9.350 -> 9.310 ( -0.43%) [ +0.19% +0.11% +0.00% / -0.03% -0.18% -0.43%] index_select skip64 : Elapsed 0.094 ms (9.368 ms / 100) 9.351 -> 9.310 ( -0.44%) [ +0.00% +0.01% +0.01% / -0.06% -0.44% -0.44%] index_select skip256 : Elapsed 0.094 ms (9.351 ms / 100) 13.472 -> 13.443 ( -0.22%) [ +0.19% +0.08% +0.00% / +0.07% -0.07% -0.22%] index_select spread : Elapsed 0.135 ms (13.497 ms / 100) 11.998 -> 12.003 ( +0.04%) [ +0.25% +0.00% +0.46% / +0.04% +0.16% +0.38%] index_select strided 3 : Elapsed 0.120 ms (12.028 ms / 100) 13.465 -> 13.461 ( -0.03%) [ +0.16% +0.00% +0.45% / +0.25% -0.03% +0.12%] index_select strided 5 : Elapsed 0.135 ms (13.487 ms / 100) 14.577 -> 14.593 ( +0.11%) [ +0.62% +0.00% +0.14% / +0.11% +0.45% +0.27%] index_select strided 7 : Elapsed 0.147 ms (14.667 ms / 100) 15.008 -> 15.038 ( +0.20%) [ +0.70% +0.35% +0.00% / +0.20% +0.42% +0.23%] index_select strided 8 : Elapsed 0.151 ms (15.113 ms / 100) 15.445 -> 15.452 ( +0.05%) [ +0.05% +0.00% +0.32% / +0.21% +0.17% +0.05%] index_select strided 16 : Elapsed 0.155 ms (15.453 ms / 100) 14.957 -> 14.948 ( -0.06%) [ +0.00% +0.29% +0.15% / -0.06% +0.24% +0.11%] index_select strided 64 : Elapsed 0.150 ms (14.957 ms / 100) 13.284 -> 13.272 ( -0.09%) [ +0.11% +0.00% +0.11% / -0.09% +0.07% +0.08%] index_select strided 100 : Elapsed 0.133 ms (13.298 ms / 100) 14.841 -> 14.842 ( +0.01%) [ +0.00% +0.36% +0.07% / +0.01% +0.18% +0.34%] index_select random : Elapsed 0.148 ms (14.841 ms / 100) 12.945 -> 12.906 ( -0.30%) [ +0.00% +0.31% +0.03% / +0.21% +0.22% -0.30%] index_select random_sorted : Elapsed 0.129 ms (12.945 ms / 100) 14.979 -> 15.013 ( +0.23%) [ +0.17% +0.36% +0.00% / +0.23% +0.60% +0.33%] index_select perm : Elapsed 0.150 ms (15.005 ms / 100) 12.999 -> 12.967 ( -0.25%) [ +0.00% +0.31% +0.34% / -0.25% +0.35% +0.44%] index_select perm_sorted : Elapsed 0.130 ms (12.999 ms / 100) out_shape = [50, 250, 150] in_shape = [15, 250, 150] idx_dim = 0 B = [50, 250, 150] (stride (37500, 150, 1)) A = [15, 250, 150] (stride (1, 15, 3750)) dim = 0 46.629 -> 46.683 ( +0.12%) [ +0.21% +0.00% +0.27% / +0.12% +0.73% +0.92%] index_add_ linear : Elapsed 0.467 ms (46.728 ms / 100) 45.713 -> 45.817 ( +0.23%) [ +0.34% +0.00% +0.09% / +0.23% +0.61% +0.79%] index_copy_ linear : Elapsed 0.459 ms (45.869 ms / 100) 46.801 -> 46.877 ( +0.16%) [ +0.32% +0.00% +0.13% / +0.16% +1.41% +1.50%] index_add_ reverse : Elapsed 0.469 ms (46.949 ms / 100) 45.880 -> 45.920 ( +0.09%) [ +0.38% +0.10% +0.00% / +0.09% +1.00% +1.08%] index_copy_ reverse : Elapsed 0.461 ms (46.055 ms / 100) 46.957 -> 46.987 ( +0.06%) [ +0.18% +0.13% +0.00% / +0.06% +0.85% +0.89%] index_add_ spread : Elapsed 0.470 ms (47.040 ms / 100) 46.006 -> 45.996 ( -0.02%) [ +0.24% +0.30% +0.00% / -0.02% +0.76% +0.64%] index_copy_ spread : Elapsed 0.461 ms (46.115 ms / 100) 46.887 -> 46.867 ( -0.04%) [ +0.00% +0.12% +0.18% / -0.04% +2.06% +2.27%] index_add_ strided 3 : Elapsed 0.469 ms (46.887 ms / 100) 46.028 -> 46.016 ( -0.03%) [ +0.00% +0.14% +0.09% / -0.03% +1.26% +1.21%] index_copy_ strided 3 : Elapsed 0.460 ms (46.028 ms / 100) 46.869 -> 46.906 ( +0.08%) [ +0.26% +0.00% +0.04% / +0.08% +0.42% +0.82%] index_add_ strided 7 : Elapsed 0.470 ms (46.990 ms / 100) 45.939 -> 45.973 ( +0.07%) [ +0.27% +0.00% +0.23% / +0.07% +0.64% +0.55%] index_copy_ strided 7 : Elapsed 0.461 ms (46.062 ms / 100) 46.718 -> 46.820 ( +0.22%) [ +0.02% +0.28% +0.00% / +0.22% +1.42% +1.68%] index_add_ perm : Elapsed 0.467 ms (46.726 ms / 100) 45.913 -> 45.967 ( +0.12%) [ +0.00% +0.02% +0.26% / +0.12% +0.78% +0.85%] index_copy_ perm : Elapsed 0.459 ms (45.913 ms / 100) 46.737 -> 46.774 ( +0.08%) [ +0.00% +0.02% +0.26% / +0.08% +1.50% +1.02%] index_add_ perm_sorted : Elapsed 0.467 ms (46.737 ms / 100) 45.828 -> 45.931 ( +0.22%) [ +0.28% +0.00% +0.26% / +0.22% +1.12% +0.77%] index_copy_ perm_sorted : Elapsed 0.460 ms (45.957 ms / 100) 150.987 -> 152.143 ( +0.77%) [ +0.00% +0.57% +0.31% / +0.77% +1.72% +1.26%] index_select const : Elapsed 1.510 ms (150.987 ms / 100) 155.050 -> 155.644 ( +0.38%) [ +0.00% +0.17% +0.48% / +0.38% +1.21% +1.21%] index_select wrap : Elapsed 1.550 ms (155.050 ms / 100) 151.775 -> 152.141 ( +0.24%) [ +0.13% +0.27% +0.00% / +0.24% +1.44% +1.36%] index_select linear : Elapsed 1.520 ms (151.979 ms / 100) 153.013 -> 153.650 ( +0.42%) [ +0.44% +0.33% +0.00% / +0.42% +1.44% +1.40%] index_select reverse : Elapsed 1.537 ms (153.693 ms / 100) 151.163 -> 151.795 ( +0.42%) [ +0.01% +0.00% +0.63% / +0.42% +1.36% +1.46%] index_select skip64 : Elapsed 1.512 ms (151.180 ms / 100) 151.551 -> 151.869 ( +0.21%) [ +0.00% +0.33% +0.08% / +0.21% +1.40% +1.44%] index_select skip256 : Elapsed 1.516 ms (151.551 ms / 100) 152.695 -> 152.666 ( -0.02%) [ +0.00% +0.12% +0.24% / -0.02% +1.10% +1.30%] index_select spread : Elapsed 1.527 ms (152.695 ms / 100) 158.885 -> 158.403 ( -0.30%) [ +0.00% +0.08% +0.00% / -0.30% +0.79% +0.90%] index_select strided 3 : Elapsed 1.589 ms (158.885 ms / 100) 159.100 -> 159.029 ( -0.04%) [ +0.00% +0.10% +0.00% / -0.04% +0.75% +0.87%] index_select strided 5 : Elapsed 1.591 ms (159.100 ms / 100) 160.054 -> 160.223 ( +0.11%) [ +0.00% +0.15% +0.13% / +0.11% +0.71% +0.70%] index_select strided 7 : Elapsed 1.601 ms (160.054 ms / 100) 160.019 -> 159.932 ( -0.05%) [ +0.00% +0.09% +0.12% / -0.05% +0.75% +0.65%] index_select strided 8 : Elapsed 1.600 ms (160.019 ms / 100) 157.373 -> 157.873 ( +0.32%) [ +0.39% +0.00% +0.46% / +0.32% +1.14% +1.12%] index_select random : Elapsed 1.580 ms (157.983 ms / 100) 152.689 -> 153.016 ( +0.21%) [ +0.41% +0.02% +0.00% / +0.21% +0.64% +1.44%] index_select random_sorted : Elapsed 1.533 ms (153.321 ms / 100) B = [50, 250, 150] (stride (37500, 1, 250)) dim = 0 fill_cnt = 15 6.433 -> 6.442 ( +0.14%) [ +0.00% +0.11% +0.03% / +0.14% +0.37% +0.19%] index_fill_ const : Elapsed 0.064 ms (6.433 ms / 100) 6.499 -> 6.509 ( +0.15%) [ +0.02% +0.00% +0.28% / +0.34% +0.15% +0.18%] index_fill_ linear : Elapsed 0.065 ms (6.500 ms / 100) 6.514 -> 6.516 ( +0.03%) [ +0.11% +0.00% +0.35% / +0.09% +0.03% +0.06%] index_fill_ reverse : Elapsed 0.065 ms (6.521 ms / 100) 6.447 -> 6.458 ( +0.17%) [ +0.22% +0.02% +0.00% / +0.17% +2.75% +2.36%] index_fill_ skip64 : Elapsed 0.065 ms (6.461 ms / 100) 6.454 -> 6.456 ( +0.03%) [ +0.00% +0.02% +0.02% / +0.03% +2.45% +2.43%] index_fill_ skip256 : Elapsed 0.065 ms (6.454 ms / 100) 6.503 -> 6.506 ( +0.05%) [ +0.00% +0.05% +0.08% / +0.05% +0.06% +0.20%] index_fill_ spread : Elapsed 0.065 ms (6.503 ms / 100) 6.502 -> 6.510 ( +0.12%) [ +0.00% +0.22% +0.29% / +0.15% +0.18% +0.12%] index_fill_ strided 3 : Elapsed 0.065 ms (6.502 ms / 100) 6.528 -> 6.508 ( -0.31%) [ +0.21% +0.00% +0.12% / +0.06% -0.31% -0.21%] index_fill_ strided 5 : Elapsed 0.065 ms (6.542 ms / 100) 6.499 -> 6.492 ( -0.11%) [ +0.17% +0.34% +0.00% / +0.18% -0.09% -0.11%] index_fill_ strided 7 : Elapsed 0.065 ms (6.510 ms / 100) 6.495 -> 6.499 ( +0.06%) [ +0.00% +0.26% +0.08% / +0.06% +0.11% +0.11%] index_fill_ strided 8 : Elapsed 0.065 ms (6.495 ms / 100) 6.479 -> 6.478 ( -0.02%) [ +0.00% +0.05% +0.09% / -0.02% +0.14% +0.28%] index_fill_ strided 16 : Elapsed 0.065 ms (6.479 ms / 100) 6.487 -> 6.480 ( -0.11%) [ +0.08% +0.00% +0.08% / +0.03% -0.11% -0.09%] index_fill_ random : Elapsed 0.065 ms (6.492 ms / 100) 6.475 -> 6.479 ( +0.06%) [ +0.15% +0.14% +0.00% / +0.11% +0.06% +0.09%] index_fill_ random_sorted : Elapsed 0.065 ms (6.485 ms / 100) 6.489 -> 6.497 ( +0.12%) [ +0.29% +0.00% +0.03% / +0.12% +0.28% +0.26%] index_fill_ perm : Elapsed 0.065 ms (6.508 ms / 100) 6.484 -> 6.493 ( +0.14%) [ +0.23% +0.26% +0.00% / +0.17% +0.32% +0.14%] index_fill_ perm_sorted : Elapsed 0.065 ms (6.499 ms / 100) B = [50, 250, 150] (stride (250, 1, 12500)) A = [15, 250, 150] (stride (150, 2250, 1)) dim = 0 17.133 -> 17.169 ( +0.21%) [ +0.20% +0.19% +0.00% / +0.21% +5.31% +5.22%] index_add_ linear : Elapsed 0.172 ms (17.168 ms / 100) 11.565 -> 11.614 ( +0.42%) [ +0.59% +0.37% +0.00% / +0.42% +1.33% +1.66%] index_copy_ linear : Elapsed 0.116 ms (11.633 ms / 100) 17.072 -> 17.196 ( +0.73%) [ +0.00% +0.68% +0.20% / +0.73% +5.00% +5.59%] index_add_ reverse : Elapsed 0.171 ms (17.072 ms / 100) 11.394 -> 11.350 ( -0.39%) [ +0.07% +0.15% +0.00% / -0.39% +2.09% +1.58%] index_copy_ reverse : Elapsed 0.114 ms (11.402 ms / 100) 16.456 -> 16.516 ( +0.36%) [ +0.00% +0.41% +0.24% / +0.36% +4.81% +4.90%] index_add_ spread : Elapsed 0.165 ms (16.456 ms / 100) 11.068 -> 11.128 ( +0.54%) [ +0.00% +0.05% +0.68% / +0.54% +4.35% +4.20%] index_copy_ spread : Elapsed 0.111 ms (11.068 ms / 100) 16.819 -> 16.809 ( -0.06%) [ +0.00% +0.33% +0.04% / -0.06% +2.22% +2.30%] index_add_ strided 3 : Elapsed 0.168 ms (16.819 ms / 100) 10.982 -> 10.976 ( -0.05%) [ +0.00% +0.20% +0.39% / -0.05% +3.31% +3.75%] index_copy_ strided 3 : Elapsed 0.110 ms (10.982 ms / 100) 16.115 -> 16.116 ( +0.01%) [ +0.00% +0.04% +0.24% / +0.01% +2.64% +2.20%] index_add_ strided 7 : Elapsed 0.161 ms (16.115 ms / 100) 11.575 -> 11.358 ( -1.87%) [ +0.14% +0.00% +0.16% / +0.29% -1.87% -1.15%] index_copy_ strided 7 : Elapsed 0.116 ms (11.591 ms / 100) 17.078 -> 17.078 ( +0.00%) [ +0.04% +0.36% +0.00% / +0.00% +0.81% +0.48%] index_add_ perm : Elapsed 0.171 ms (17.084 ms / 100) 11.892 -> 11.513 ( -3.19%) [ +1.23% +0.99% +0.00% / +0.70% -3.17% -3.19%] index_copy_ perm : Elapsed 0.120 ms (12.038 ms / 100) 16.396 -> 16.393 ( -0.02%) [ +0.27% +0.00% +0.01% / -0.02% +4.98% +5.07%] index_add_ perm_sorted : Elapsed 0.164 ms (16.440 ms / 100) 11.219 -> 11.241 ( +0.20%) [ +0.44% +0.00% +0.40% / +0.20% +3.20% +2.79%] index_copy_ perm_sorted : Elapsed 0.113 ms (11.268 ms / 100) 66.436 -> 65.649 ( -1.18%) [ +0.32% +0.00% +0.86% / -1.18% +8.19% +7.70%] index_select const : Elapsed 0.667 ms (66.650 ms / 100) 64.774 -> 67.638 ( +4.42%) [ +0.00% +2.90% +1.54% / +4.42% +16.09% +13.87%] index_select wrap : Elapsed 0.648 ms (64.774 ms / 100) 63.129 -> 63.272 ( +0.23%) [ +1.31% +0.74% +0.00% / +0.23% +15.52% +17.10%] index_select linear : Elapsed 0.640 ms (63.953 ms / 100) 59.721 -> 58.918 ( -1.34%) [ +0.00% +0.83% +0.58% / -1.34% +25.01% +26.12%] index_select reverse : Elapsed 0.597 ms (59.721 ms / 100) 63.157 -> 63.358 ( +0.32%) [ +0.87% +1.00% +0.00% / +0.32% +10.77% +11.46%] index_select skip64 : Elapsed 0.637 ms (63.704 ms / 100) 60.942 -> 61.661 ( +1.18%) [ +1.79% +2.26% +0.00% / +1.18% +16.25% +17.65%] index_select skip256 : Elapsed 0.620 ms (62.034 ms / 100) 63.253 -> 61.626 ( -2.57%) [ +1.27% +1.93% +0.00% / -0.83% -2.57% +0.41%] index_select spread : Elapsed 0.641 ms (64.054 ms / 100) 69.110 -> 68.534 ( -0.83%) [ +0.00% +0.10% +0.01% / -0.83% +10.75% +10.10%] index_select strided 3 : Elapsed 0.691 ms (69.110 ms / 100) 71.494 -> 71.995 ( +0.70%) [ +0.97% +1.77% +0.00% / +0.70% +7.72% +10.71%] index_select strided 5 : Elapsed 0.722 ms (72.185 ms / 100) 67.883 -> 69.459 ( +2.32%) [ +0.00% +0.76% +1.17% / +2.32% +11.47% +11.77%] index_select strided 7 : Elapsed 0.679 ms (67.883 ms / 100) 67.241 -> 67.340 ( +0.15%) [ +0.00% +1.67% +0.93% / +0.15% +16.00% +15.06%] index_select strided 8 : Elapsed 0.672 ms (67.241 ms / 100) 70.117 -> 70.037 ( -0.11%) [ +1.23% +0.00% +2.20% / -0.11% +7.10% +7.34%] index_select random : Elapsed 0.710 ms (70.981 ms / 100) 67.788 -> 67.728 ( -0.09%) [ +0.61% +2.36% +0.00% / -0.09% +2.75% +5.09%] index_select random_sorted : Elapsed 0.682 ms (68.203 ms / 100) B = [50, 250, 150] (stride (1, 50, 12500)) A = [15, 250, 150] (stride (37500, 1, 250)) dim = 0 98.422 -> 98.539 ( +0.12%) [ +0.00% +0.03% +0.08% / +0.12% +0.54% +0.48%] index_add_ linear : Elapsed 0.984 ms (98.422 ms / 100) 60.036 -> 60.028 ( -0.01%) [ +0.01% +0.16% +0.00% / -0.01% +0.89% +0.78%] index_copy_ linear : Elapsed 0.600 ms (60.044 ms / 100) 98.238 -> 98.166 ( -0.07%) [ +0.00% +0.11% +0.01% / -0.07% +0.54% +0.64%] index_add_ reverse : Elapsed 0.982 ms (98.238 ms / 100) 59.743 -> 59.814 ( +0.12%) [ +0.00% +0.23% +0.16% / +0.12% +1.02% +0.90%] index_copy_ reverse : Elapsed 0.597 ms (59.743 ms / 100) 98.306 -> 98.320 ( +0.01%) [ +0.02% +0.00% +0.01% / +0.01% +0.38% +0.48%] index_add_ spread : Elapsed 0.983 ms (98.327 ms / 100) 59.891 -> 59.925 ( +0.06%) [ +0.00% +0.00% +0.08% / +0.06% +0.55% +0.44%] index_copy_ spread : Elapsed 0.599 ms (59.891 ms / 100) 98.361 -> 98.339 ( -0.02%) [ +0.01% +0.00% +0.02% / -0.02% +0.42% +0.28%] index_add_ strided 3 : Elapsed 0.984 ms (98.375 ms / 100) 60.024 -> 60.050 ( +0.04%) [ +0.00% +0.26% +0.07% / +0.04% +0.29% +0.39%] index_copy_ strided 3 : Elapsed 0.600 ms (60.024 ms / 100) 98.202 -> 98.239 ( +0.04%) [ +0.09% +0.05% +0.00% / +0.04% +0.34% +0.34%] index_add_ strided 7 : Elapsed 0.983 ms (98.287 ms / 100) 59.835 -> 59.793 ( -0.07%) [ +0.36% +0.09% +0.00% / -0.07% +0.24% +0.14%] index_copy_ strided 7 : Elapsed 0.600 ms (60.049 ms / 100) 97.973 -> 97.992 ( +0.02%) [ +0.08% +0.00% +0.03% / +0.02% +0.50% +0.55%] index_add_ perm : Elapsed 0.981 ms (98.055 ms / 100) 59.598 -> 59.599 ( +0.00%) [ +0.00% +0.13% +0.19% / +0.00% +0.57% +0.70%] index_copy_ perm : Elapsed 0.596 ms (59.598 ms / 100) 98.370 -> 98.379 ( +0.01%) [ +0.11% +0.01% +0.00% / +0.01% +0.42% +0.33%] index_add_ perm_sorted : Elapsed 0.985 ms (98.480 ms / 100) 59.848 -> 60.090 ( +0.40%) [ +0.35% +0.00% +0.26% / +0.40% +0.57% +0.54%] index_copy_ perm_sorted : Elapsed 0.601 ms (60.057 ms / 100) BEST 184.067 -> 24.399 (-86.74%) [ +0.00% +0.00% +0.01% / -86.74% -85.73% -85.70%] index_select const : Elapsed 1.841 ms (184.072 ms / 100) BEST 228.833 -> 38.818 (-83.04%) [ +0.22% +0.00% +0.15% / -83.04% -81.96% -82.09%] index_select wrap : Elapsed 2.293 ms (229.341 ms / 100) BEST 189.601 -> 37.842 (-80.04%) [ +0.05% +0.00% +0.07% / -80.04% -78.93% -79.06%] index_select linear : Elapsed 1.897 ms (189.699 ms / 100) BEST 203.720 -> 38.040 (-81.33%) [ +0.00% +0.00% +0.08% / -81.33% -80.53% -80.50%] index_select reverse : Elapsed 2.037 ms (203.724 ms / 100) BEST 184.198 -> 23.794 (-87.08%) [ +0.02% +0.05% +0.00% / -87.08% -86.26% -86.21%] index_select skip64 : Elapsed 1.842 ms (184.231 ms / 100) BEST 184.227 -> 24.519 (-86.69%) [ +0.00% +0.06% +0.03% / -86.69% -85.93% -85.91%] index_select skip256 : Elapsed 1.842 ms (184.227 ms / 100) BEST 198.664 -> 37.652 (-81.05%) [ +0.08% +0.00% +0.06% / -81.05% -79.89% -79.88%] index_select spread : Elapsed 1.988 ms (198.817 ms / 100) BEST 232.218 -> 25.981 (-88.81%) [ +0.00% +0.15% +0.05% / -88.81% -88.10% -88.10%] index_select strided 3 : Elapsed 2.322 ms (232.218 ms / 100) BEST 217.448 -> 24.693 (-88.64%) [ +0.00% +0.01% +0.01% / -88.64% -87.53% -87.55%] index_select strided 5 : Elapsed 2.174 ms (217.448 ms / 100) BEST 230.326 -> 38.758 (-83.17%) [ +0.00% +0.00% +0.15% / -83.17% -82.30% -82.14%] index_select strided 7 : Elapsed 2.303 ms (230.329 ms / 100) BEST 230.096 -> 38.895 (-83.10%) [ +0.13% +0.16% +0.00% / -83.10% -82.08% -82.14%] index_select strided 8 : Elapsed 2.304 ms (230.391 ms / 100) BEST 224.388 -> 38.566 (-82.81%) [ +0.12% +0.00% +0.06% / -82.81% -81.67% -81.69%] index_select random : Elapsed 2.247 ms (224.668 ms / 100) BEST 198.658 -> 37.776 (-80.98%) [ +0.00% +0.12% +0.12% / -80.98% -79.94% -79.98%] index_select random_sorted : Elapsed 1.987 ms (198.658 ms / 100) out_shape = [15, 50, 150] in_shape = [15, 250, 150] idx_dim = 1 B = [15, 50, 150] (stride (50, 1, 750)) dim = 1 fill_cnt = 250 GOOD 6.534 -> 4.245 (-35.03%) [ +0.17% +0.00% +0.18% / -34.51% -35.03% -34.94%] index_fill_ const : Elapsed 0.065 ms (6.545 ms / 100) GOOD 6.598 -> 4.305 (-34.75%) [ +0.08% +0.14% +0.00% / -34.75% -34.62% -34.59%] index_fill_ linear : Elapsed 0.066 ms (6.603 ms / 100) GOOD 6.589 -> 4.347 (-34.03%) [ +0.00% +0.06% +0.11% / -34.01% -34.03% -34.00%] index_fill_ reverse : Elapsed 0.066 ms (6.589 ms / 100) GOOD 6.545 -> 4.279 (-34.62%) [ +0.00% +0.21% +0.14% / -34.62% -34.39% -34.38%] index_fill_ skip64 : Elapsed 0.065 ms (6.545 ms / 100) GOOD 6.546 -> 4.270 (-34.77%) [ +0.00% +0.12% +0.09% / -34.77% -34.54% -34.52%] index_fill_ skip256 : Elapsed 0.065 ms (6.546 ms / 100) GOOD 6.532 -> 4.295 (-34.25%) [ +0.00% +0.06% +0.05% / -34.25% -34.03% -33.88%] index_fill_ spread : Elapsed 0.065 ms (6.532 ms / 100) GOOD 6.639 -> 4.493 (-32.32%) [ +0.15% +0.05% +0.00% / -32.32% -31.89% -31.78%] index_fill_ strided 3 : Elapsed 0.066 ms (6.649 ms / 100) GOOD 6.844 -> 4.684 (-31.56%) [ +0.10% +0.00% +0.09% / -31.52% -31.56% -31.55%] index_fill_ strided 5 : Elapsed 0.069 ms (6.851 ms / 100) GOOD 6.627 -> 4.527 (-31.69%) [ +0.12% +0.27% +0.00% / -31.43% -31.61% -31.69%] index_fill_ strided 7 : Elapsed 0.066 ms (6.635 ms / 100) GOOD 6.784 -> 4.873 (-28.17%) [ +0.10% +0.16% +0.00% / -28.17% -28.05% -27.92%] index_fill_ strided 8 : Elapsed 0.068 ms (6.791 ms / 100) GOOD 6.791 -> 4.863 (-28.39%) [ +0.00% +0.19% +0.16% / -28.33% -28.39% -28.39%] index_fill_ strided 16 : Elapsed 0.068 ms (6.791 ms / 100) GOOD 6.640 -> 4.612 (-30.54%) [ +0.12% +0.14% +0.00% / -30.54% -30.32% -30.42%] index_fill_ random : Elapsed 0.066 ms (6.648 ms / 100) GOOD 6.575 -> 4.313 (-34.40%) [ +0.03% +0.00% +0.06% / -34.40% -34.07% -34.13%] index_fill_ random_sorted : Elapsed 0.066 ms (6.577 ms / 100) B = [15, 50, 150] (stride (50, 1, 750)) A = [15, 250, 150] (stride (37500, 1, 250)) dim = 1 good 6.181 -> 5.591 ( -9.55%) [ +0.00% +0.06% +0.06% / -9.55% -9.34% -9.38%] index_select const : Elapsed 0.062 ms (6.181 ms / 100) Good 6.951 -> 5.818 (-16.30%) [ +0.20% +0.00% +0.26% / -16.18% -16.29% -16.30%] index_select wrap : Elapsed 0.070 ms (6.965 ms / 100) Good 6.964 -> 5.821 (-16.41%) [ +0.00% +0.24% +0.33% / -16.41% -16.18% -16.20%] index_select linear : Elapsed 0.070 ms (6.964 ms / 100) Good 6.965 -> 5.833 (-16.25%) [ +0.06% +0.00% +0.03% / -16.25% -15.99% -16.12%] index_select reverse : Elapsed 0.070 ms (6.969 ms / 100) good 6.166 -> 5.583 ( -9.46%) [ +0.02% +0.00% +0.37% / -9.46% -9.05% -9.20%] index_select skip64 : Elapsed 0.062 ms (6.167 ms / 100) good 6.158 -> 5.580 ( -9.39%) [ +0.15% +0.00% +0.03% / -9.39% -9.19% -9.00%] index_select skip256 : Elapsed 0.062 ms (6.167 ms / 100) GOOD 9.576 -> 6.759 (-29.42%) [ +0.02% +0.00% +0.05% / -29.42% -29.04% -29.23%] index_select spread : Elapsed 0.096 ms (9.578 ms / 100) GOOD 8.323 -> 6.348 (-23.73%) [ +0.07% +0.01% +0.00% / -23.67% -23.73% -23.53%] index_select strided 3 : Elapsed 0.083 ms (8.329 ms / 100) GOOD 9.552 -> 6.764 (-29.19%) [ +0.45% +0.00% +0.15% / -29.08% -29.09% -29.19%] index_select strided 5 : Elapsed 0.096 ms (9.595 ms / 100) GOOD 10.424 -> 6.914 (-33.67%) [ +0.24% +0.00% +0.15% / -33.67% -33.62% -33.58%] index_select strided 7 : Elapsed 0.104 ms (10.449 ms / 100) GOOD 10.803 -> 6.938 (-35.78%) [ +0.15% +0.01% +0.00% / -35.78% -35.78% -35.74%] index_select strided 8 : Elapsed 0.108 ms (10.819 ms / 100) GOOD 10.951 -> 7.077 (-35.38%) [ +0.11% +0.00% +0.09% / -35.21% -35.27% -35.38%] index_select strided 16 : Elapsed 0.110 ms (10.963 ms / 100) GOOD 10.643 -> 7.017 (-34.07%) [ +0.00% +0.10% +0.26% / -33.98% -34.07% -33.94%] index_select strided 64 : Elapsed 0.106 ms (10.643 ms / 100) GOOD 9.227 -> 6.046 (-34.47%) [ +0.08% +0.00% +0.17% / -34.47% -34.38% -34.36%] index_select strided 100 : Elapsed 0.092 ms (9.234 ms / 100) GOOD 10.516 -> 6.878 (-34.59%) [ +0.16% +0.00% +0.11% / -34.54% -34.59% -34.53%] index_select random : Elapsed 0.105 ms (10.533 ms / 100) GOOD 8.929 -> 6.654 (-25.48%) [ +0.00% +0.30% +0.18% / -25.48% -25.21% -25.21%] index_select random_sorted : Elapsed 0.089 ms (8.929 ms / 100) GOOD 10.647 -> 7.012 (-34.14%) [ +0.00% +0.02% +0.15% / -34.01% -34.14% -34.05%] index_select perm : Elapsed 0.106 ms (10.647 ms / 100) GOOD 9.256 -> 6.740 (-27.18%) [ +0.21% +0.50% +0.00% / -27.18% -27.12% -27.01%] index_select perm_sorted : Elapsed 0.093 ms (9.275 ms / 100) out_shape = [15, 250, 50] in_shape = [15, 250, 150] idx_dim = 2 B = [15, 250, 50] (stride (50, 750, 1)) dim = 2 fill_cnt = 150 GOOD 6.617 -> 4.742 (-28.34%) [ +0.00% +0.18% +0.11% / -27.76% -28.34% -28.20%] index_fill_ const : Elapsed 0.066 ms (6.617 ms / 100) GOOD 6.752 -> 5.085 (-24.69%) [ +0.22% +0.00% +0.31% / -24.13% -24.69% -24.59%] index_fill_ linear : Elapsed 0.068 ms (6.767 ms / 100) Good 6.731 -> 5.826 (-13.45%) [ +0.00% +0.01% +0.13% / -13.45% -12.69% -12.69%] index_fill_ reverse : Elapsed 0.067 ms (6.731 ms / 100) GOOD 6.675 -> 4.983 (-25.35%) [ +0.13% +0.10% +0.00% / -24.25% -25.35% -25.33%] index_fill_ skip64 : Elapsed 0.067 ms (6.684 ms / 100) GOOD 6.639 -> 4.817 (-27.44%) [ +0.12% +0.00% +0.11% / -27.44% -26.51% -26.56%] index_fill_ skip256 : Elapsed 0.066 ms (6.647 ms / 100) GOOD 6.738 -> 5.116 (-24.07%) [ +0.12% +0.00% +0.33% / -24.07% -24.00% -23.78%] index_fill_ spread : Elapsed 0.067 ms (6.746 ms / 100) GOOD 6.789 -> 5.109 (-24.75%) [ +0.37% +0.00% +0.13% / -24.63% -24.75% -24.75%] index_fill_ strided 3 : Elapsed 0.068 ms (6.814 ms / 100) GOOD 7.113 -> 5.459 (-23.25%) [ +0.04% +0.13% +0.00% / -23.25% -22.65% -22.63%] index_fill_ strided 5 : Elapsed 0.071 ms (7.116 ms / 100) GOOD 6.819 -> 5.068 (-25.68%) [ +0.21% +0.00% +0.13% / -25.68% -24.64% -24.62%] index_fill_ strided 7 : Elapsed 0.068 ms (6.833 ms / 100) GOOD 7.129 -> 5.552 (-22.12%) [ +0.17% +0.13% +0.00% / -22.12% -21.42% -21.45%] index_fill_ strided 8 : Elapsed 0.071 ms (7.141 ms / 100) GOOD 7.122 -> 5.426 (-23.81%) [ +0.15% +0.08% +0.00% / -23.81% -23.57% -23.45%] index_fill_ strided 16 : Elapsed 0.071 ms (7.133 ms / 100) GOOD 6.861 -> 5.277 (-23.09%) [ +0.00% +0.06% +0.04% / -23.09% -22.85% -22.82%] index_fill_ random : Elapsed 0.069 ms (6.861 ms / 100) GOOD 6.810 -> 4.968 (-27.05%) [ +0.00% +0.03% +0.12% / -26.96% -26.99% -27.05%] index_fill_ random_sorted : Elapsed 0.068 ms (6.810 ms / 100) B = [15, 250, 50] (stride (1, 750, 15)) A = [15, 250, 150] (stride (250, 1, 3750)) dim = 2 10.502 -> 10.523 ( +0.20%) [ +0.06% +0.00% +0.20% / +0.20% +0.54% +0.52%] index_select const : Elapsed 0.105 ms (10.508 ms / 100) 10.968 -> 10.974 ( +0.05%) [ +0.17% +0.05% +0.00% / +0.05% +0.21% +0.55%] index_select wrap : Elapsed 0.110 ms (10.987 ms / 100) 10.962 -> 10.986 ( +0.22%) [ +0.00% +0.08% +0.13% / +0.22% +0.37% +0.40%] index_select linear : Elapsed 0.110 ms (10.962 ms / 100) 11.055 -> 11.069 ( +0.13%) [ +0.14% +0.00% +0.30% / +0.13% +0.43% +0.49%] index_select reverse : Elapsed 0.111 ms (11.070 ms / 100) 10.511 -> 10.509 ( -0.02%) [ +0.01% +0.02% +0.00% / -0.02% +0.39% +0.44%] index_select skip64 : Elapsed 0.105 ms (10.512 ms / 100) 10.502 -> 10.521 ( +0.18%) [ +0.00% +0.15% +0.12% / +0.18% +0.52% +0.67%] index_select skip256 : Elapsed 0.105 ms (10.502 ms / 100) 11.110 -> 11.117 ( +0.06%) [ +0.00% +0.01% +0.05% / +0.06% +0.25% +0.21%] index_select spread : Elapsed 0.111 ms (11.110 ms / 100) 11.102 -> 11.118 ( +0.14%) [ +0.00% +0.16% +0.27% / +0.40% +0.14% +0.38%] index_select strided 3 : Elapsed 0.111 ms (11.102 ms / 100) 11.045 -> 11.075 ( +0.27%) [ +0.00% +0.00% +0.08% / +0.27% +0.53% +0.81%] index_select strided 5 : Elapsed 0.110 ms (11.045 ms / 100) 11.078 -> 11.130 ( +0.47%) [ +0.20% +0.00% +0.14% / +0.47% +0.64% +0.68%] index_select strided 7 : Elapsed 0.111 ms (11.100 ms / 100) 11.082 -> 11.129 ( +0.42%) [ +0.15% +0.00% +0.32% / +0.42% +0.94% +0.78%] index_select strided 8 : Elapsed 0.111 ms (11.099 ms / 100) 11.096 -> 11.097 ( +0.01%) [ +0.00% +0.12% +0.06% / +0.01% +0.57% +0.43%] index_select strided 16 : Elapsed 0.111 ms (11.096 ms / 100) 11.071 -> 11.079 ( +0.07%) [ +0.06% +0.00% +0.14% / +0.07% +0.95% +1.04%] index_select strided 64 : Elapsed 0.111 ms (11.078 ms / 100) 10.646 -> 10.657 ( +0.10%) [ +0.11% +0.00% +0.20% / +0.10% +0.19% +0.31%] index_select strided 100 : Elapsed 0.107 ms (10.658 ms / 100) 11.106 -> 11.090 ( -0.14%) [ +0.00% +0.18% +0.12% / +0.14% -0.14% +0.01%] index_select random : Elapsed 0.111 ms (11.106 ms / 100) 11.024 -> 11.014 ( -0.09%) [ +0.13% +0.17% +0.00% / -0.09% +0.11% +0.14%] index_select random_sorted : Elapsed 0.110 ms (11.038 ms / 100) 11.072 -> 11.096 ( +0.22%) [ +0.15% +0.00% +0.16% / +0.22% +0.34% +0.65%] index_select perm : Elapsed 0.111 ms (11.089 ms / 100) 11.083 -> 11.077 ( -0.05%) [ +0.00% +0.14% +0.11% / -0.05% +0.55% +0.35%] index_select perm_sorted : Elapsed 0.111 ms (11.083 ms / 100) B = [15, 250, 50] (stride (1, 15, 3750)) A = [15, 250, 150] (stride (37500, 1, 250)) dim = 2 6.103 -> 6.084 ( -0.31%) [ +0.11% +0.00% +0.21% / -0.29% -0.31% -0.25%] index_select const : Elapsed 0.061 ms (6.110 ms / 100) 6.552 -> 6.473 ( -1.21%) [ +0.32% +0.06% +0.00% / -0.09% -1.21% -0.78%] index_select wrap : Elapsed 0.066 ms (6.573 ms / 100) 6.556 -> 6.490 ( -1.01%) [ +0.24% +0.23% +0.00% / +0.02% -0.88% -1.01%] index_select linear : Elapsed 0.066 ms (6.572 ms / 100) 6.564 -> 6.577 ( +0.20%) [ +0.40% +0.20% +0.00% / +0.21% +0.34% +0.20%] index_select reverse : Elapsed 0.066 ms (6.590 ms / 100) 6.105 -> 6.063 ( -0.69%) [ +0.00% +0.02% +0.02% / -0.15% -0.69% -0.61%] index_select skip64 : Elapsed 0.061 ms (6.105 ms / 100) 6.066 -> 6.054 ( -0.20%) [ +0.07% +0.23% +0.00% / +0.10% -0.20% -0.13%] index_select skip256 : Elapsed 0.061 ms (6.070 ms / 100) 6.546 -> 6.550 ( +0.06%) [ +0.50% +0.00% +0.14% / +0.06% +0.55% +0.67%] index_select spread : Elapsed 0.066 ms (6.579 ms / 100) 6.540 -> 6.551 ( +0.17%) [ +0.00% +0.26% +0.35% / +0.17% +0.60% +0.64%] index_select strided 3 : Elapsed 0.065 ms (6.540 ms / 100) 6.486 -> 6.487 ( +0.02%) [ +0.03% +0.00% +0.06% / +0.02% +1.17% +1.42%] index_select strided 5 : Elapsed 0.065 ms (6.488 ms / 100) 6.623 -> 6.539 ( -1.27%) [ +0.00% +0.57% +0.06% / +0.57% -1.27% -0.85%] index_select strided 7 : Elapsed 0.066 ms (6.623 ms / 100) 6.477 -> 6.469 ( -0.12%) [ +0.19% +0.39% +0.00% / -0.12% +0.51% +0.45%] index_select strided 8 : Elapsed 0.065 ms (6.489 ms / 100) 6.560 -> 6.510 ( -0.76%) [ +0.05% +0.00% +0.05% / +0.09% -0.67% -0.76%] index_select strided 16 : Elapsed 0.066 ms (6.563 ms / 100) 6.539 -> 6.549 ( +0.15%) [ +0.46% +0.00% +0.05% / +0.15% +0.24% +0.18%] index_select strided 64 : Elapsed 0.066 ms (6.569 ms / 100) 6.112 -> 6.106 ( -0.10%) [ +0.00% +0.08% +0.03% / -0.10% +1.19% +1.16%] index_select strided 100 : Elapsed 0.061 ms (6.112 ms / 100) 6.518 -> 6.505 ( -0.20%) [ +0.00% +0.54% +0.32% / +0.52% +0.17% -0.20%] index_select random : Elapsed 0.065 ms (6.518 ms / 100) 6.476 -> 6.439 ( -0.57%) [ +0.11% +0.00% +0.14% / +0.28% -0.37% -0.57%] index_select random_sorted : Elapsed 0.065 ms (6.483 ms / 100) 6.632 -> 6.571 ( -0.92%) [ +0.29% +0.11% +0.00% / -0.05% -0.92% -0.56%] index_select perm : Elapsed 0.067 ms (6.651 ms / 100) 6.546 -> 6.544 ( -0.03%) [ +0.29% +0.40% +0.00% / +0.64% +0.08% -0.03%] index_select perm_sorted : Elapsed 0.066 ms (6.565 ms / 100) out_shape = [50, 15, 250] in_shape = [150, 15, 250] idx_dim = 0 B = [50, 15, 250] (stride (3750, 250, 1)) A = [150, 15, 250] (stride (1, 37500, 150)) dim = 0 5.975 -> 5.955 ( -0.33%) [ +0.35% +0.00% +0.57% / +0.22% -0.33% -0.32%] index_select const : Elapsed 0.060 ms (5.996 ms / 100) 7.085 -> 7.058 ( -0.38%) [ +0.00% +0.08% +0.11% / -0.01% -0.32% -0.38%] index_select wrap : Elapsed 0.071 ms (7.085 ms / 100) 7.069 -> 7.056 ( -0.18%) [ +0.25% +0.30% +0.00% / +0.24% -0.18% -0.17%] index_select linear : Elapsed 0.071 ms (7.087 ms / 100) 7.094 -> 7.060 ( -0.48%) [ +0.18% +0.00% +0.25% / -0.14% -0.18% -0.48%] index_select reverse : Elapsed 0.071 ms (7.107 ms / 100) 6.007 -> 5.959 ( -0.80%) [ +0.00% +0.25% +0.28% / -0.07% -0.77% -0.80%] index_select skip64 : Elapsed 0.060 ms (6.007 ms / 100) 5.978 -> 5.944 ( -0.57%) [ +0.08% +0.00% +0.03% / +0.03% -0.47% -0.57%] index_select skip256 : Elapsed 0.060 ms (5.983 ms / 100) 8.643 -> 8.633 ( -0.12%) [ +0.00% +0.16% +0.24% / -0.03% -0.12% +0.07%] index_select spread : Elapsed 0.086 ms (8.643 ms / 100) 8.641 -> 8.626 ( -0.17%) [ +0.01% +0.00% +0.30% / +0.01% -0.03% -0.17%] index_select strided 3 : Elapsed 0.086 ms (8.642 ms / 100) 9.902 -> 9.912 ( +0.10%) [ +0.35% +0.10% +0.00% / +0.10% +0.19% +0.24%] index_select strided 5 : Elapsed 0.099 ms (9.937 ms / 100) 11.002 -> 10.994 ( -0.07%) [ +0.55% +0.43% +0.00% / -0.07% +0.48% +0.37%] index_select strided 7 : Elapsed 0.111 ms (11.062 ms / 100) 11.494 -> 11.506 ( +0.10%) [ +0.22% +0.00% +0.22% / +0.31% +0.30% +0.10%] index_select strided 8 : Elapsed 0.115 ms (11.519 ms / 100) 11.913 -> 11.847 ( -0.55%) [ +0.08% +0.00% +0.10% / -0.06% -0.55% -0.48%] index_select strided 16 : Elapsed 0.119 ms (11.923 ms / 100) 11.176 -> 11.184 ( +0.07%) [ +0.01% +0.36% +0.00% / +0.38% +0.07% +0.23%] index_select strided 64 : Elapsed 0.112 ms (11.177 ms / 100) 8.693 -> 8.687 ( -0.07%) [ +0.03% +0.09% +0.00% / -0.07% +0.48% +0.46%] index_select strided 100 : Elapsed 0.087 ms (8.696 ms / 100) 10.875 -> 10.857 ( -0.17%) [ +0.06% +0.17% +0.00% / +0.00% -0.01% -0.17%] index_select random : Elapsed 0.109 ms (10.882 ms / 100) 8.548 -> 8.484 ( -0.75%) [ +0.12% +0.00% +0.13% / -0.04% -0.53% -0.75%] index_select random_sorted : Elapsed 0.086 ms (8.558 ms / 100) 11.267 -> 11.281 ( +0.12%) [ +0.24% +0.00% +0.40% / +0.12% +0.65% +0.89%] index_select perm : Elapsed 0.113 ms (11.294 ms / 100) 8.518 -> 8.518 ( +0.00%) [ +0.14% +0.33% +0.00% / +0.25% +0.15% +0.00%] index_select perm_sorted : Elapsed 0.085 ms (8.530 ms / 100) B = [50, 15, 250] (stride (3750, 1, 15)) A = [150, 15, 250] (stride (250, 37500, 1)) dim = 0 6.103 -> 6.076 ( -0.44%) [ +0.21% +0.00% +0.05% / -0.20% -0.44% -0.10%] index_select const : Elapsed 0.061 ms (6.116 ms / 100) 6.535 -> 6.470 ( -0.99%) [ +0.28% +0.00% +0.32% / +0.29% -0.99% -0.72%] index_select wrap : Elapsed 0.066 ms (6.553 ms / 100) 6.547 -> 6.473 ( -1.13%) [ +0.12% +0.12% +0.00% / +0.18% -0.98% -1.13%] index_select linear : Elapsed 0.066 ms (6.555 ms / 100) 6.551 -> 6.555 ( +0.06%) [ +0.37% +0.00% +0.40% / +0.21% +0.06% +0.32%] index_select reverse : Elapsed 0.066 ms (6.575 ms / 100) 6.107 -> 6.058 ( -0.80%) [ +0.13% +0.10% +0.00% / +0.07% -0.54% -0.80%] index_select skip64 : Elapsed 0.061 ms (6.115 ms / 100) 6.068 -> 6.042 ( -0.43%) [ +0.08% +0.00% +0.02% / -0.18% -0.36% -0.43%] index_select skip256 : Elapsed 0.061 ms (6.073 ms / 100) 6.554 -> 6.553 ( -0.02%) [ +0.00% +0.05% +0.08% / -0.02% +0.41% +0.38%] index_select spread : Elapsed 0.066 ms (6.554 ms / 100) 6.544 -> 6.550 ( +0.09%) [ +0.28% +0.00% +0.09% / +0.09% +0.52% +0.87%] index_select strided 3 : Elapsed 0.066 ms (6.562 ms / 100) 6.478 -> 6.478 ( +0.00%) [ +0.02% +0.00% +0.23% / +0.00% +1.20% +1.64%] index_select strided 5 : Elapsed 0.065 ms (6.479 ms / 100) 6.638 -> 6.552 ( -1.30%) [ +0.05% +0.00% +0.08% / -0.21% -1.30% -1.28%] index_select strided 7 : Elapsed 0.066 ms (6.641 ms / 100) 6.487 -> 6.472 ( -0.23%) [ +0.00% +0.03% +0.12% / -0.23% +0.15% +0.25%] index_select strided 8 : Elapsed 0.065 ms (6.487 ms / 100) 6.542 -> 6.504 ( -0.58%) [ +0.05% +0.15% +0.00% / +0.12% -0.58% -0.31%] index_select strided 16 : Elapsed 0.065 ms (6.545 ms / 100) 6.538 -> 6.529 ( -0.14%) [ +0.44% +0.00% +0.23% / -0.14% +0.34% +0.63%] index_select strided 64 : Elapsed 0.066 ms (6.567 ms / 100) 6.107 -> 6.114 ( +0.11%) [ +0.21% +0.00% +0.08% / +0.11% +0.88% +1.05%] index_select strided 100 : Elapsed 0.061 ms (6.120 ms / 100) 6.516 -> 6.534 ( +0.28%) [ +0.08% +0.00% +0.35% / +0.28% +0.52% +0.61%] index_select random : Elapsed 0.065 ms (6.521 ms / 100) 6.449 -> 6.456 ( +0.11%) [ +0.00% +0.14% +0.22% / +0.11% +0.79% +0.73%] index_select random_sorted : Elapsed 0.064 ms (6.449 ms / 100) 6.543 -> 6.532 ( -0.17%) [ +0.00% +0.43% +0.52% / +0.18% +0.08% -0.17%] index_select perm : Elapsed 0.065 ms (6.543 ms / 100) 6.573 -> 6.487 ( -1.31%) [ +0.03% +0.00% +0.00% / +0.40% -1.23% -1.31%] index_select perm_sorted : Elapsed 0.066 ms (6.575 ms / 100) B = [50, 15, 250] (stride (1, 12500, 50)) A = [150, 15, 250] (stride (3750, 250, 1)) dim = 0 good 6.054 -> 5.677 ( -6.23%) [ +0.17% +0.00% +0.05% / -6.23% -6.14% -6.23%] index_select const : Elapsed 0.061 ms (6.064 ms / 100) 6.030 -> 6.292 ( +4.34%) [ +0.05% +0.00% +0.30% / +4.86% +4.34% +4.46%] index_select wrap : Elapsed 0.060 ms (6.033 ms / 100) 6.033 -> 6.292 ( +4.29%) [ +0.00% +0.03% +0.13% / +4.77% +4.29% +4.41%] index_select linear : Elapsed 0.060 ms (6.033 ms / 100) 6.117 -> 6.294 ( +2.89%) [ +0.15% +0.00% +0.13% / +4.33% +2.93% +2.89%] index_select reverse : Elapsed 0.061 ms (6.126 ms / 100) good 6.037 -> 5.714 ( -5.35%) [ +0.13% +0.07% +0.00% / -5.35% -5.35% -5.33%] index_select skip64 : Elapsed 0.060 ms (6.045 ms / 100) good 6.027 -> 5.672 ( -5.89%) [ +0.00% +0.03% +0.13% / -5.08% -5.89% -5.84%] index_select skip256 : Elapsed 0.060 ms (6.027 ms / 100) 6.097 -> 6.299 ( +3.31%) [ +0.03% +0.10% +0.00% / +4.35% +3.31% +3.31%] index_select spread : Elapsed 0.061 ms (6.099 ms / 100) 6.094 -> 6.266 ( +2.82%) [ +0.08% +0.00% +0.31% / +3.48% +2.82% +2.94%] index_select strided 3 : Elapsed 0.061 ms (6.099 ms / 100) 6.090 -> 6.174 ( +1.38%) [ +0.00% +0.15% +0.00% / +1.54% +1.46% +1.38%] index_select strided 5 : Elapsed 0.061 ms (6.090 ms / 100) 6.095 -> 6.340 ( +4.02%) [ +0.20% +0.07% +0.00% / +4.02% +4.18% +4.13%] index_select strided 7 : Elapsed 0.061 ms (6.107 ms / 100) 6.102 -> 6.277 ( +2.87%) [ +0.10% +0.16% +0.00% / +2.87% +4.24% +4.28%] index_select strided 8 : Elapsed 0.061 ms (6.108 ms / 100) 6.098 -> 6.316 ( +3.57%) [ +0.00% +0.07% +0.13% / +3.57% +4.17% +4.02%] index_select strided 16 : Elapsed 0.061 ms (6.098 ms / 100) 6.091 -> 6.313 ( +3.64%) [ +0.07% +0.11% +0.00% / +3.64% +4.37% +4.35%] index_select strided 64 : Elapsed 0.061 ms (6.095 ms / 100) 6.068 -> 5.785 ( -4.66%) [ +0.00% +0.23% +0.00% / -4.66% -4.27% -4.24%] index_select strided 100 : Elapsed 0.061 ms (6.068 ms / 100) 6.108 -> 6.328 ( +3.60%) [ +0.07% +0.02% +0.00% / +3.60% +3.77% +3.60%] index_select random : Elapsed 0.061 ms (6.112 ms / 100) 6.112 -> 6.226 ( +1.87%) [ +0.31% +0.21% +0.00% / +4.19% +1.87% +1.87%] index_select random_sorted : Elapsed 0.061 ms (6.131 ms / 100) 6.133 -> 6.328 ( +3.18%) [ +0.08% +0.15% +0.00% / +3.73% +3.18% +3.29%] index_select perm : Elapsed 0.061 ms (6.138 ms / 100) 6.099 -> 6.304 ( +3.36%) [ +0.00% +0.07% +0.23% / +3.46% +3.51% +3.36%] index_select perm_sorted : Elapsed 0.061 ms (6.099 ms / 100) B = [50, 15, 250] (stride (1, 50, 750)) A = [150, 15, 250] (stride (1, 37500, 150)) dim = 0 14.677 -> 14.713 ( +0.25%) [ +0.14% +0.00% +0.20% / +0.25% +1.23% +1.10%] index_select const : Elapsed 0.147 ms (14.697 ms / 100) 16.386 -> 16.411 ( +0.15%) [ +0.03% +0.09% +0.00% / +0.15% +2.23% +2.23%] index_select wrap : Elapsed 0.164 ms (16.391 ms / 100) 16.406 -> 16.418 ( +0.07%) [ +0.00% +0.05% +0.07% / +0.07% +2.11% +1.99%] index_select linear : Elapsed 0.164 ms (16.406 ms / 100) 16.365 -> 16.368 ( +0.02%) [ +0.00% +0.10% +0.13% / +0.02% +2.35% +2.38%] index_select reverse : Elapsed 0.164 ms (16.365 ms / 100) 14.703 -> 14.707 ( +0.03%) [ +0.00% +0.10% +0.00% / +0.03% +0.99% +1.01%] index_select skip64 : Elapsed 0.147 ms (14.703 ms / 100) 14.685 -> 14.709 ( +0.16%) [ +0.14% +0.08% +0.00% / +0.16% +1.04% +1.13%] index_select skip256 : Elapsed 0.147 ms (14.705 ms / 100) 18.737 -> 18.758 ( +0.11%) [ +0.00% +0.27% +0.22% / +0.33% +0.61% +0.11%] index_select spread : Elapsed 0.187 ms (18.737 ms / 100) 18.747 -> 18.780 ( +0.18%) [ +0.21% +0.00% +0.04% / +0.18% +0.37% +0.22%] index_select strided 3 : Elapsed 0.188 ms (18.786 ms / 100) 20.187 -> 20.168 ( -0.09%) [ +0.00% +0.06% +0.32% / -0.09% +0.42% +0.49%] index_select strided 5 : Elapsed 0.202 ms (20.187 ms / 100) 20.989 -> 21.011 ( +0.10%) [ +0.00% +0.17% +0.01% / +0.10% +0.31% +0.12%] index_select strided 7 : Elapsed 0.210 ms (20.989 ms / 100) 21.139 -> 21.231 ( +0.44%) [ +0.21% +0.00% +0.09% / +0.44% +0.52% +0.49%] index_select strided 8 : Elapsed 0.212 ms (21.184 ms / 100) 21.428 -> 21.423 ( -0.02%) [ +0.08% +0.11% +0.00% / -0.02% +0.12% +0.18%] index_select strided 16 : Elapsed 0.214 ms (21.446 ms / 100) 21.265 -> 21.316 ( +0.24%) [ +0.00% +0.17% +0.14% / +0.24% +0.34% +0.29%] index_select strided 64 : Elapsed 0.213 ms (21.265 ms / 100) 19.611 -> 19.636 ( +0.13%) [ +0.04% +0.03% +0.00% / +0.13% +0.18% +0.13%] index_select strided 100 : Elapsed 0.196 ms (19.619 ms / 100) 21.075 -> 21.090 ( +0.07%) [ +0.00% +0.09% +0.08% / +0.07% +0.28% +0.19%] index_select random : Elapsed 0.211 ms (21.075 ms / 100) 18.299 -> 18.342 ( +0.23%) [ +0.00% +0.21% +0.23% / +0.23% +0.47% +0.62%] index_select random_sorted : Elapsed 0.183 ms (18.299 ms / 100) 21.129 -> 21.161 ( +0.15%) [ +0.00% +0.27% +0.39% / +0.15% +0.41% +0.49%] index_select perm : Elapsed 0.211 ms (21.129 ms / 100) 18.471 -> 18.500 ( +0.16%) [ +0.04% +0.00% +0.12% / +0.16% +0.58% +0.51%] index_select perm_sorted : Elapsed 0.185 ms (18.479 ms / 100) B = [50, 15, 250] (stride (1, 50, 750)) A = [150, 15, 250] (stride (1, 150, 2250)) dim = 0 GOOD 8.901 -> 6.358 (-28.57%) [ +0.00% +0.19% +0.25% / -28.47% -28.26% -28.57%] index_select const : Elapsed 0.089 ms (8.901 ms / 100) GOOD 10.692 -> 6.708 (-37.26%) [ +0.22% +0.00% +0.37% / -36.78% -37.26% -37.19%] index_select wrap : Elapsed 0.107 ms (10.715 ms / 100) GOOD 10.733 -> 6.703 (-37.55%) [ +0.08% +0.00% +0.07% / -37.15% -37.55% -37.48%] index_select linear : Elapsed 0.107 ms (10.742 ms / 100) GOOD 10.710 -> 6.730 (-37.16%) [ +0.18% +0.15% +0.00% / -36.76% -37.15% -37.16%] index_select reverse : Elapsed 0.107 ms (10.729 ms / 100) GOOD 8.906 -> 6.365 (-28.53%) [ +0.17% +0.06% +0.00% / -27.82% -28.45% -28.53%] index_select skip64 : Elapsed 0.089 ms (8.921 ms / 100) GOOD 8.882 -> 6.343 (-28.59%) [ +0.14% +0.27% +0.00% / -28.33% -28.59% -28.57%] index_select skip256 : Elapsed 0.089 ms (8.894 ms / 100) GOOD 13.105 -> 7.435 (-43.27%) [ +0.00% +0.43% +0.20% / -43.27% -42.62% -42.74%] index_select spread : Elapsed 0.131 ms (13.105 ms / 100) GOOD 13.076 -> 7.458 (-42.96%) [ +0.00% +0.61% +1.02% / -42.96% -42.46% -42.52%] index_select strided 3 : Elapsed 0.131 ms (13.076 ms / 100) BEST 15.396 -> 7.569 (-50.84%) [ +0.18% +0.00% +0.22% / -50.84% -50.15% -50.15%] index_select strided 5 : Elapsed 0.154 ms (15.423 ms / 100) BEST 16.764 -> 7.643 (-54.41%) [ +0.00% +0.05% +0.04% / -54.41% -53.97% -53.89%] index_select strided 7 : Elapsed 0.168 ms (16.764 ms / 100) BEST 17.293 -> 7.635 (-55.85%) [ +0.02% +0.01% +0.00% / -55.85% -55.47% -55.28%] index_select strided 8 : Elapsed 0.173 ms (17.297 ms / 100) BEST 17.378 -> 7.797 (-55.13%) [ +0.19% +0.00% +0.10% / -55.13% -55.01% -55.09%] index_select strided 16 : Elapsed 0.174 ms (17.411 ms / 100) BEST 16.895 -> 7.749 (-54.13%) [ +0.01% +0.02% +0.00% / -54.08% -54.13% -54.02%] index_select strided 64 : Elapsed 0.169 ms (16.897 ms / 100) BEST 14.067 -> 6.730 (-52.16%) [ +0.45% +0.00% +0.18% / -52.16% -51.85% -51.65%] index_select strided 100 : Elapsed 0.141 ms (14.130 ms / 100) BEST 16.754 -> 7.718 (-53.93%) [ +0.00% +0.02% +0.01% / -53.93% -53.58% -53.58%] index_select random : Elapsed 0.168 ms (16.754 ms / 100) GOOD 13.046 -> 7.456 (-42.85%) [ +0.00% +0.21% +0.14% / -42.85% -42.68% -42.67%] index_select random_sorted : Elapsed 0.130 ms (13.046 ms / 100) BEST 16.761 -> 7.711 (-53.99%) [ +0.00% +0.00% +0.07% / -53.99% -53.70% -53.71%] index_select perm : Elapsed 0.168 ms (16.761 ms / 100) GOOD 13.057 -> 7.451 (-42.93%) [ +0.04% +0.00% +0.09% / -42.93% -42.65% -42.64%] index_select perm_sorted : Elapsed 0.131 ms (13.062 ms / 100) out_shape = [150, 50, 250] in_shape = [150, 15, 250] idx_dim = 1 B = [150, 50, 250] (stride (12500, 250, 1)) A = [150, 15, 250] (stride (1, 150, 2250)) dim = 1 12.846 -> 12.880 ( +0.26%) [ +0.06% +0.00% +0.13% / +0.26% +2.05% +1.94%] index_add_ linear : Elapsed 0.129 ms (12.854 ms / 100) 11.536 -> 11.541 ( +0.04%) [ +0.07% +0.00% +0.32% / +0.04% +0.35% +0.42%] index_copy_ linear : Elapsed 0.115 ms (11.544 ms / 100) 12.788 -> 12.815 ( +0.21%) [ +0.21% +0.00% +0.31% / +0.21% +3.07% +3.14%] index_add_ reverse : Elapsed 0.128 ms (12.815 ms / 100) 11.493 -> 11.522 ( +0.25%) [ +0.00% +0.03% +0.15% / +0.25% +0.77% +0.84%] index_copy_ reverse : Elapsed 0.115 ms (11.493 ms / 100) 12.745 -> 12.770 ( +0.20%) [ +0.00% +0.00% +0.12% / +0.20% +2.24% +2.00%] index_add_ spread : Elapsed 0.127 ms (12.745 ms / 100) 11.559 -> 11.585 ( +0.22%) [ +0.00% +0.20% +0.05% / +0.22% +0.74% +0.88%] index_copy_ spread : Elapsed 0.116 ms (11.559 ms / 100) 12.798 -> 12.807 ( +0.07%) [ +0.00% +0.16% +0.20% / +0.07% +1.62% +1.43%] index_add_ strided 3 : Elapsed 0.128 ms (12.798 ms / 100) 11.558 -> 11.580 ( +0.19%) [ +0.00% +0.05% +0.24% / +0.19% +0.88% +0.96%] index_copy_ strided 3 : Elapsed 0.116 ms (11.558 ms / 100) 12.678 -> 12.698 ( +0.16%) [ +0.02% +0.00% +0.00% / +0.16% +1.70% +1.79%] index_add_ strided 7 : Elapsed 0.127 ms (12.681 ms / 100) 11.539 -> 11.527 ( -0.10%) [ +0.15% +0.00% +0.22% / +0.30% -0.10% -0.01%] index_copy_ strided 7 : Elapsed 0.116 ms (11.556 ms / 100) 12.883 -> 12.860 ( -0.18%) [ +0.02% +0.05% +0.00% / -0.18% +1.16% +1.04%] index_add_ perm : Elapsed 0.129 ms (12.885 ms / 100) 11.629 -> 11.620 ( -0.08%) [ +0.19% +0.00% +0.12% / +0.11% +0.21% -0.08%] index_copy_ perm : Elapsed 0.117 ms (11.651 ms / 100) 12.837 -> 12.833 ( -0.03%) [ +0.25% +0.06% +0.00% / -0.03% +1.25% +1.19%] index_add_ perm_sorted : Elapsed 0.129 ms (12.869 ms / 100) 11.565 -> 11.593 ( +0.24%) [ +0.10% +0.00% +0.08% / +0.24% +0.34% +0.24%] index_copy_ perm_sorted : Elapsed 0.116 ms (11.577 ms / 100) 23.703 -> 23.798 ( +0.40%) [ +0.15% +0.10% +0.00% / +0.40% +4.68% +4.80%] index_select const : Elapsed 0.237 ms (23.739 ms / 100) 34.595 -> 34.835 ( +0.69%) [ +1.08% +0.79% +0.00% / +0.69% +9.89% +11.42%] index_select wrap : Elapsed 0.350 ms (34.969 ms / 100) 25.286 -> 25.437 ( +0.60%) [ +0.13% +0.28% +0.00% / +0.60% +4.31% +4.69%] index_select linear : Elapsed 0.253 ms (25.320 ms / 100) 38.970 -> 39.467 ( +1.28%) [ +1.44% +0.00% +2.62% / +1.28% +12.83% +13.27%] index_select reverse : Elapsed 0.395 ms (39.532 ms / 100) 23.792 -> 23.936 ( +0.61%) [ +0.41% +0.40% +0.00% / +0.61% +4.20% +4.11%] index_select skip64 : Elapsed 0.239 ms (23.889 ms / 100) 23.736 -> 23.782 ( +0.19%) [ +0.00% +0.52% +0.29% / +0.19% +4.42% +4.10%] index_select skip256 : Elapsed 0.237 ms (23.736 ms / 100) 31.407 -> 31.654 ( +0.79%) [ +0.23% +0.00% +0.31% / +0.79% +3.68% +3.09%] index_select spread : Elapsed 0.315 ms (31.478 ms / 100) 36.904 -> 37.064 ( +0.43%) [ +0.50% +0.00% +0.22% / +0.43% +1.53% +1.29%] index_select strided 3 : Elapsed 0.371 ms (37.090 ms / 100) 32.929 -> 32.938 ( +0.03%) [ +0.00% +0.03% +0.23% / +0.03% +5.04% +5.37%] index_select strided 5 : Elapsed 0.329 ms (32.929 ms / 100) 41.135 -> 41.312 ( +0.43%) [ +0.00% +0.64% +0.53% / +0.43% +3.14% +2.84%] index_select strided 7 : Elapsed 0.411 ms (41.135 ms / 100) 37.469 -> 38.084 ( +1.64%) [ +0.00% +0.50% +0.54% / +1.64% +10.13% +10.40%] index_select strided 8 : Elapsed 0.375 ms (37.469 ms / 100) 40.344 -> 40.710 ( +0.91%) [ +0.00% +0.98% +1.45% / +0.91% +3.63% +4.30%] index_select random : Elapsed 0.403 ms (40.344 ms / 100) 32.007 -> 31.808 ( -0.62%) [ +0.50% +0.00% +0.05% / -0.62% +2.74% +2.09%] index_select random_sorted : Elapsed 0.322 ms (32.167 ms / 100) B = [150, 50, 250] (stride (12500, 1, 50)) A = [150, 15, 250] (stride (3750, 1, 15)) dim = 1 79.785 -> 79.799 ( +0.02%) [ +0.10% +0.00% +0.10% / +0.05% +0.02% +0.06%] index_add_ linear : Elapsed 0.799 ms (79.862 ms / 100) 61.563 -> 61.682 ( +0.19%) [ +0.14% +0.05% +0.00% / +0.19% +0.26% +0.28%] index_copy_ linear : Elapsed 0.617 ms (61.650 ms / 100) 79.496 -> 79.729 ( +0.29%) [ +0.26% +0.00% +0.07% / +0.29% +0.80% +0.35%] index_add_ reverse : Elapsed 0.797 ms (79.699 ms / 100) 61.569 -> 61.625 ( +0.09%) [ +0.03% +0.00% +0.20% / +0.09% +0.31% +0.40%] index_copy_ reverse : Elapsed 0.616 ms (61.589 ms / 100) 79.565 -> 79.705 ( +0.18%) [ +0.00% +0.15% +0.24% / +0.18% +0.34% +0.30%] index_add_ spread : Elapsed 0.796 ms (79.565 ms / 100) 61.600 -> 61.679 ( +0.13%) [ +0.00% +0.17% +0.11% / +0.13% +0.21% +0.19%] index_copy_ spread : Elapsed 0.616 ms (61.600 ms / 100) 79.613 -> 79.656 ( +0.05%) [ +0.00% +0.12% +0.10% / +0.05% +0.43% +0.18%] index_add_ strided 3 : Elapsed 0.796 ms (79.613 ms / 100) 61.605 -> 61.676 ( +0.12%) [ +0.00% +0.10% +0.16% / +0.12% +0.24% +0.29%] index_copy_ strided 3 : Elapsed 0.616 ms (61.605 ms / 100) 79.507 -> 79.373 ( -0.17%) [ +0.12% +0.03% +0.00% / +0.10% -0.17% +0.06%] index_add_ strided 7 : Elapsed 0.796 ms (79.602 ms / 100) 61.580 -> 61.594 ( +0.02%) [ +0.06% +0.20% +0.00% / +0.08% +0.02% +0.25%] index_copy_ strided 7 : Elapsed 0.616 ms (61.620 ms / 100) 79.463 -> 79.555 ( +0.12%) [ +0.08% +0.00% +0.21% / +0.37% +0.51% +0.12%] index_add_ perm : Elapsed 0.795 ms (79.526 ms / 100) 61.673 -> 61.626 ( -0.08%) [ +0.00% +0.06% +0.07% / -0.07% -0.08% +0.00%] index_copy_ perm : Elapsed 0.617 ms (61.673 ms / 100) 79.561 -> 79.437 ( -0.16%) [ +0.27% +0.03% +0.00% / -0.16% +0.47% +0.62%] index_add_ perm_sorted : Elapsed 0.798 ms (79.779 ms / 100) 61.594 -> 61.646 ( +0.08%) [ +0.00% +0.13% +0.08% / +0.08% +0.27% +0.53%] index_copy_ perm_sorted : Elapsed 0.616 ms (61.594 ms / 100) BEST 196.186 -> 23.606 (-87.97%) [ +0.00% +0.15% +0.17% / -87.97% -87.80% -87.80%] index_select const : Elapsed 1.962 ms (196.186 ms / 100) BEST 198.011 -> 22.499 (-88.64%) [ +0.22% +0.00% +0.00% / -88.57% -88.64% -88.64%] index_select wrap : Elapsed 1.985 ms (198.456 ms / 100) BEST 196.721 -> 22.308 (-88.66%) [ +0.05% +0.00% +0.06% / -88.57% -88.65% -88.66%] index_select linear : Elapsed 1.968 ms (196.812 ms / 100) BEST 196.935 -> 22.582 (-88.53%) [ +0.06% +0.00% +0.11% / -88.32% -88.53% -88.53%] index_select reverse : Elapsed 1.971 ms (197.052 ms / 100) BEST 196.462 -> 22.196 (-88.70%) [ +0.00% +0.01% +0.01% / -88.46% -88.70% -88.70%] index_select skip64 : Elapsed 1.965 ms (196.462 ms / 100) BEST 196.693 -> 23.358 (-88.12%) [ +0.08% +0.00% +0.25% / -87.95% -88.12% -88.12%] index_select skip256 : Elapsed 1.968 ms (196.850 ms / 100) BEST 196.916 -> 23.338 (-88.15%) [ +0.12% +0.00% +0.24% / -88.12% -88.15% -88.14%] index_select spread : Elapsed 1.972 ms (197.157 ms / 100) BEST 198.637 -> 23.437 (-88.20%) [ +0.04% +0.04% +0.00% / -88.15% -88.20% -88.19%] index_select strided 3 : Elapsed 1.987 ms (198.713 ms / 100) BEST 198.398 -> 22.747 (-88.53%) [ +0.17% +0.18% +0.00% / -88.44% -88.52% -88.53%] index_select strided 5 : Elapsed 1.987 ms (198.736 ms / 100) BEST 199.036 -> 22.654 (-88.62%) [ +0.00% +0.07% +0.03% / -88.52% -88.62% -88.62%] index_select strided 7 : Elapsed 1.990 ms (199.036 ms / 100) BEST 199.195 -> 22.168 (-88.87%) [ +0.06% +0.00% +0.09% / -88.78% -88.87% -88.87%] index_select strided 8 : Elapsed 1.993 ms (199.315 ms / 100) BEST 198.530 -> 22.139 (-88.85%) [ +0.11% +0.09% +0.00% / -88.80% -88.85% -88.85%] index_select random : Elapsed 1.988 ms (198.757 ms / 100) BEST 196.980 -> 22.201 (-88.73%) [ +0.00% +0.08% +0.04% / -88.73% -88.71% -88.71%] index_select random_sorted : Elapsed 1.970 ms (196.980 ms / 100) B = [150, 50, 250] (stride (250, 37500, 1)) A = [150, 15, 250] (stride (250, 37500, 1)) dim = 1 10.028 -> 10.035 ( +0.07%) [ +0.02% +0.00% +0.03% / +0.07% +0.36% +0.38%] index_add_ linear : Elapsed 0.100 ms (10.030 ms / 100) 9.231 -> 9.239 ( +0.09%) [ +0.18% +0.00% +0.16% / +0.09% +0.42% +0.51%] index_copy_ linear : Elapsed 0.092 ms (9.248 ms / 100) 9.921 -> 9.956 ( +0.35%) [ +0.28% +0.00% +0.52% / +0.35% +1.18% +1.49%] index_add_ reverse : Elapsed 0.099 ms (9.949 ms / 100) 9.159 -> 9.196 ( +0.40%) [ +0.00% +0.12% +0.47% / +0.40% +0.91% +0.86%] index_copy_ reverse : Elapsed 0.092 ms (9.159 ms / 100) 10.005 -> 10.025 ( +0.20%) [ +0.31% +0.28% +0.00% / +0.20% +0.94% +0.58%] index_add_ spread : Elapsed 0.100 ms (10.036 ms / 100) 9.225 -> 9.245 ( +0.22%) [ +0.27% +0.00% +0.30% / +0.22% +0.36% +0.48%] index_copy_ spread : Elapsed 0.093 ms (9.250 ms / 100) 10.094 -> 10.123 ( +0.29%) [ +0.10% +0.00% +0.07% / +0.29% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.101 ms (10.104 ms / 100) 9.290 -> 9.284 ( -0.06%) [ +0.04% +0.00% +0.01% / +0.47% +0.38% -0.06%] index_copy_ strided 3 : Elapsed 0.093 ms (9.294 ms / 100) 10.149 -> 10.082 ( -0.66%) [ +0.13% +0.45% +0.00% / -0.06% -0.66% -0.48%] index_add_ strided 7 : Elapsed 0.102 ms (10.162 ms / 100) 9.305 -> 9.254 ( -0.55%) [ +0.29% +0.55% +0.00% / +0.15% -0.55% -0.48%] index_copy_ strided 7 : Elapsed 0.093 ms (9.332 ms / 100) 10.125 -> 10.134 ( +0.09%) [ +0.33% +0.39% +0.00% / +0.31% +0.25% +0.09%] index_add_ perm : Elapsed 0.102 ms (10.158 ms / 100) 9.326 -> 9.289 ( -0.40%) [ +0.39% +0.28% +0.00% / +0.33% -0.40% -0.15%] index_copy_ perm : Elapsed 0.094 ms (9.362 ms / 100) 10.118 -> 10.068 ( -0.49%) [ +0.00% +0.09% +0.02% / -0.14% -0.34% -0.49%] index_add_ perm_sorted : Elapsed 0.101 ms (10.118 ms / 100) 9.313 -> 9.247 ( -0.71%) [ +0.00% +0.06% +0.30% / -0.04% -0.32% -0.71%] index_copy_ perm_sorted : Elapsed 0.093 ms (9.313 ms / 100) 16.931 -> 16.955 ( +0.14%) [ +0.01% +0.00% +0.27% / +0.19% +0.14% +0.27%] index_select const : Elapsed 0.169 ms (16.932 ms / 100) 18.989 -> 18.989 ( +0.00%) [ +0.00% +0.07% +0.16% / +0.00% +0.39% +0.38%] index_select wrap : Elapsed 0.190 ms (18.989 ms / 100) 17.060 -> 17.119 ( +0.35%) [ +0.00% +0.30% +0.33% / +0.35% +0.67% +0.73%] index_select linear : Elapsed 0.171 ms (17.060 ms / 100) 18.004 -> 18.023 ( +0.11%) [ +0.13% +0.12% +0.00% / +0.11% +0.68% +0.59%] index_select reverse : Elapsed 0.180 ms (18.028 ms / 100) 16.906 -> 16.933 ( +0.16%) [ +0.02% +0.02% +0.00% / +0.22% +0.16% +0.17%] index_select skip64 : Elapsed 0.169 ms (16.910 ms / 100) 16.901 -> 16.929 ( +0.17%) [ +0.15% +0.00% +0.28% / +0.27% +0.28% +0.17%] index_select skip256 : Elapsed 0.169 ms (16.927 ms / 100) 17.624 -> 17.639 ( +0.09%) [ +0.00% +0.09% +0.16% / +0.09% +0.37% +0.22%] index_select spread : Elapsed 0.176 ms (17.624 ms / 100) 19.149 -> 19.045 ( -0.54%) [ +0.00% +0.06% +0.21% / -0.02% -0.54% -0.43%] index_select strided 3 : Elapsed 0.191 ms (19.149 ms / 100) 18.168 -> 18.165 ( -0.02%) [ +0.10% +0.00% +0.07% / -0.02% +1.43% +1.61%] index_select strided 5 : Elapsed 0.182 ms (18.186 ms / 100) 19.303 -> 19.308 ( +0.03%) [ +0.17% +0.01% +0.00% / +0.03% +0.32% +0.30%] index_select strided 7 : Elapsed 0.193 ms (19.336 ms / 100) 19.330 -> 19.332 ( +0.01%) [ +0.01% +0.00% +0.11% / +0.01% +0.08% +0.17%] index_select strided 8 : Elapsed 0.193 ms (19.331 ms / 100) 18.773 -> 18.772 ( -0.01%) [ +0.03% +0.02% +0.00% / -0.01% +0.54% +0.67%] index_select random : Elapsed 0.188 ms (18.779 ms / 100) 17.587 -> 17.608 ( +0.12%) [ +0.15% +0.00% +0.07% / +0.12% +0.23% +0.32%] index_select random_sorted : Elapsed 0.176 ms (17.614 ms / 100) B = [150, 50, 250] (stride (50, 1, 7500)) A = [150, 15, 250] (stride (3750, 250, 1)) dim = 1 95.880 -> 95.940 ( +0.06%) [ +0.00% +0.05% +0.02% / +0.06% +0.39% +0.41%] index_add_ linear : Elapsed 0.959 ms (95.880 ms / 100) 57.924 -> 57.956 ( +0.06%) [ +0.00% +0.02% +0.02% / +0.06% +0.62% +0.59%] index_copy_ linear : Elapsed 0.579 ms (57.924 ms / 100) 95.701 -> 95.710 ( +0.01%) [ +0.00% +0.02% +0.01% / +0.01% +0.43% +0.49%] index_add_ reverse : Elapsed 0.957 ms (95.701 ms / 100) 57.818 -> 57.853 ( +0.06%) [ +0.00% +0.06% +0.09% / +0.06% +0.66% +0.69%] index_copy_ reverse : Elapsed 0.578 ms (57.818 ms / 100) 95.923 -> 95.925 ( +0.00%) [ +0.06% +0.06% +0.00% / +0.00% +0.29% +0.26%] index_add_ spread : Elapsed 0.960 ms (95.979 ms / 100) 57.889 -> 57.924 ( +0.06%) [ +0.05% +0.00% +0.08% / +0.06% +0.53% +0.60%] index_copy_ spread : Elapsed 0.579 ms (57.916 ms / 100) 95.966 -> 96.011 ( +0.05%) [ +0.01% +0.00% +0.03% / +0.05% +0.23% +0.20%] index_add_ strided 3 : Elapsed 0.960 ms (95.974 ms / 100) 57.874 -> 57.929 ( +0.10%) [ +0.00% +0.08% +0.06% / +0.10% +0.61% +0.65%] index_copy_ strided 3 : Elapsed 0.579 ms (57.874 ms / 100) 95.921 -> 95.957 ( +0.04%) [ +0.00% +0.04% +0.04% / +0.04% +0.17% +0.19%] index_add_ strided 7 : Elapsed 0.959 ms (95.921 ms / 100) 57.851 -> 57.881 ( +0.05%) [ +0.01% +0.01% +0.00% / +0.05% +0.52% +0.54%] index_copy_ strided 7 : Elapsed 0.579 ms (57.855 ms / 100) 94.883 -> 94.912 ( +0.03%) [ +0.00% +0.03% +0.08% / +0.03% +0.13% +0.18%] index_add_ perm : Elapsed 0.949 ms (94.883 ms / 100) 57.234 -> 57.208 ( -0.05%) [ +0.01% +0.00% +0.02% / -0.05% +0.49% +0.58%] index_copy_ perm : Elapsed 0.572 ms (57.240 ms / 100) 95.848 -> 95.844 ( -0.00%) [ +0.00% +0.03% +0.07% / -0.00% +0.22% +0.25%] index_add_ perm_sorted : Elapsed 0.958 ms (95.848 ms / 100) 57.898 -> 57.886 ( -0.02%) [ +0.07% +0.00% +0.12% / -0.02% +0.56% +0.65%] index_copy_ perm_sorted : Elapsed 0.579 ms (57.940 ms / 100) BEST 182.404 -> 23.993 (-86.85%) [ +0.11% +0.00% +0.06% / -86.85% -86.66% -86.66%] index_select const : Elapsed 1.826 ms (182.609 ms / 100) BEST 182.810 -> 25.741 (-85.92%) [ +0.14% +0.04% +0.00% / -85.92% -85.90% -85.91%] index_select wrap : Elapsed 1.831 ms (183.057 ms / 100) BEST 182.039 -> 24.675 (-86.45%) [ +0.00% +0.12% +0.06% / -86.45% -86.43% -86.42%] index_select linear : Elapsed 1.820 ms (182.039 ms / 100) BEST 184.072 -> 24.934 (-86.45%) [ +0.09% +0.00% +0.03% / -86.19% -86.44% -86.45%] index_select reverse : Elapsed 1.842 ms (184.239 ms / 100) BEST 182.238 -> 22.659 (-87.57%) [ +0.00% +0.09% +0.05% / -87.25% -87.56% -87.57%] index_select skip64 : Elapsed 1.822 ms (182.238 ms / 100) BEST 183.389 -> 23.738 (-87.06%) [ +0.01% +0.07% +0.00% / -86.87% -87.05% -87.06%] index_select skip256 : Elapsed 1.834 ms (183.401 ms / 100) BEST 181.707 -> 25.735 (-85.84%) [ +0.00% +0.01% +0.01% / -85.80% -85.84% -85.81%] index_select spread : Elapsed 1.817 ms (181.707 ms / 100) BEST 183.143 -> 24.674 (-86.53%) [ +0.00% +0.09% +0.06% / -86.48% -86.53% -86.53%] index_select strided 3 : Elapsed 1.831 ms (183.143 ms / 100) BEST 183.589 -> 23.655 (-87.12%) [ +0.00% +0.13% +0.12% / -87.12% -87.05% -87.06%] index_select strided 5 : Elapsed 1.836 ms (183.589 ms / 100) BEST 183.725 -> 25.990 (-85.85%) [ +0.01% +0.00% +0.08% / -85.85% -85.68% -85.65%] index_select strided 7 : Elapsed 1.837 ms (183.736 ms / 100) BEST 183.238 -> 25.419 (-86.13%) [ +0.00% +0.09% +0.21% / -86.01% -86.13% -86.13%] index_select strided 8 : Elapsed 1.832 ms (183.238 ms / 100) BEST 182.517 -> 24.811 (-86.41%) [ +0.04% +0.00% +0.09% / -86.30% -86.41% -86.40%] index_select random : Elapsed 1.826 ms (182.584 ms / 100) BEST 181.680 -> 24.251 (-86.65%) [ +0.11% +0.17% +0.00% / -86.65% -86.63% -86.63%] index_select random_sorted : Elapsed 1.819 ms (181.871 ms / 100) B = [150, 50, 250] (stride (1, 150, 7500)) A = [150, 15, 250] (stride (250, 37500, 1)) dim = 1 16.005 -> 15.403 ( -3.76%) [ +0.00% +0.17% +0.09% / -0.21% -3.76% -3.64%] index_add_ linear : Elapsed 0.160 ms (16.005 ms / 100) 10.637 -> 10.588 ( -0.46%) [ +0.45% +0.02% +0.00% / +0.83% -0.46% -0.13%] index_copy_ linear : Elapsed 0.107 ms (10.685 ms / 100) 16.048 -> 15.265 ( -4.88%) [ +0.01% +0.00% +0.29% / -0.47% -4.88% -4.85%] index_add_ reverse : Elapsed 0.161 ms (16.050 ms / 100) 10.902 -> 10.561 ( -3.13%) [ +0.01% +0.00% +0.21% / -0.36% -2.79% -3.13%] index_copy_ reverse : Elapsed 0.109 ms (10.903 ms / 100) 16.653 -> 15.996 ( -3.95%) [ +0.40% +0.00% +0.45% / +0.40% -3.47% -3.95%] index_add_ spread : Elapsed 0.167 ms (16.719 ms / 100) 11.405 -> 11.373 ( -0.28%) [ +0.34% +0.55% +0.00% / -0.28% +1.46% +1.27%] index_copy_ spread : Elapsed 0.114 ms (11.444 ms / 100) 16.510 -> 16.252 ( -1.56%) [ +0.00% +0.08% +0.28% / -0.01% -1.28% -1.56%] index_add_ strided 3 : Elapsed 0.165 ms (16.510 ms / 100) 11.697 -> 11.454 ( -2.08%) [ +0.27% +0.00% +0.00% / +0.22% -2.08% -1.63%] index_copy_ strided 3 : Elapsed 0.117 ms (11.729 ms / 100) good 17.589 -> 16.449 ( -6.48%) [ +0.18% +0.15% +0.00% / +0.03% -6.42% -6.48%] index_add_ strided 7 : Elapsed 0.176 ms (17.620 ms / 100) 12.278 -> 11.890 ( -3.16%) [ +0.02% +0.00% +0.39% / +0.20% -3.16% -2.63%] index_copy_ strided 7 : Elapsed 0.123 ms (12.281 ms / 100) 17.287 -> 16.704 ( -3.37%) [ +0.49% +0.00% +0.30% / +0.23% -3.37% -3.01%] index_add_ perm : Elapsed 0.174 ms (17.372 ms / 100) 11.504 -> 11.545 ( +0.36%) [ +0.00% +0.50% +0.56% / +0.36% +4.96% +4.61%] index_copy_ perm : Elapsed 0.115 ms (11.504 ms / 100) 16.850 -> 16.169 ( -4.04%) [ +0.00% +0.43% +0.33% / +0.50% -4.04% -3.84%] index_add_ perm_sorted : Elapsed 0.168 ms (16.850 ms / 100) 11.487 -> 11.440 ( -0.41%) [ +0.00% +0.18% +0.36% / +0.11% -0.41% -0.17%] index_copy_ perm_sorted : Elapsed 0.115 ms (11.487 ms / 100) 41.130 -> 41.425 ( +0.72%) [ +3.66% +0.00% +2.66% / +0.72% +7.76% +9.25%] index_select const : Elapsed 0.426 ms (42.635 ms / 100) 39.743 -> 40.196 ( +1.14%) [ +0.00% +1.98% +0.89% / +1.14% +3.96% +2.90%] index_select wrap : Elapsed 0.397 ms (39.743 ms / 100) 34.815 -> 35.397 ( +1.67%) [ +3.32% +1.29% +0.00% / +1.67% +7.13% +8.33%] index_select linear : Elapsed 0.360 ms (35.971 ms / 100) 44.734 -> 45.622 ( +1.99%) [ +1.68% +0.25% +0.00% / +1.99% +2.52% +3.94%] index_select reverse : Elapsed 0.455 ms (45.487 ms / 100) 41.728 -> 40.627 ( -2.64%) [ +0.94% +0.00% +0.83% / -2.64% +1.63% -2.11%] index_select skip64 : Elapsed 0.421 ms (42.122 ms / 100) 41.113 -> 41.898 ( +1.91%) [ +1.06% +2.16% +0.00% / +1.91% +4.41% +3.48%] index_select skip256 : Elapsed 0.415 ms (41.549 ms / 100) 39.462 -> 39.263 ( -0.50%) [ +0.00% +1.21% +1.87% / +1.47% -0.50% +0.39%] index_select spread : Elapsed 0.395 ms (39.462 ms / 100) 48.685 -> 46.519 ( -4.45%) [ +0.10% +1.26% +0.00% / +0.82% -4.45% -4.08%] index_select strided 3 : Elapsed 0.487 ms (48.734 ms / 100) 45.559 -> 45.626 ( +0.15%) [ +0.67% +0.06% +0.00% / +0.15% +8.23% +7.59%] index_select strided 5 : Elapsed 0.459 ms (45.865 ms / 100) 45.734 -> 47.158 ( +3.11%) [ +0.85% +3.06% +0.00% / +3.11% +3.86% +4.27%] index_select strided 7 : Elapsed 0.461 ms (46.122 ms / 100) 47.217 -> 48.216 ( +2.12%) [ +0.69% +0.00% +0.20% / +2.12% +6.57% +3.13%] index_select strided 8 : Elapsed 0.475 ms (47.541 ms / 100) 43.286 -> 43.353 ( +0.15%) [ +0.14% +0.92% +0.00% / +0.15% +13.49% +10.56%] index_select random : Elapsed 0.433 ms (43.345 ms / 100) 40.739 -> 41.753 ( +2.49%) [ +2.57% +2.27% +0.00% / +2.49% +6.44% +5.71%] index_select random_sorted : Elapsed 0.418 ms (41.788 ms / 100) B = [150, 50, 250] (stride (1, 150, 7500)) A = [150, 15, 250] (stride (1, 150, 2250)) dim = 1 18.759 -> 18.352 ( -2.17%) [ +0.68% +0.00% +0.45% / +0.67% -2.00% -2.17%] index_add_ linear : Elapsed 0.189 ms (18.887 ms / 100) 13.523 -> 13.393 ( -0.96%) [ +0.05% +0.00% +0.27% / +0.10% -0.96% -0.70%] index_copy_ linear : Elapsed 0.135 ms (13.530 ms / 100) 18.778 -> 18.249 ( -2.82%) [ +0.10% +0.00% +0.31% / +0.40% -2.82% -2.14%] index_add_ reverse : Elapsed 0.188 ms (18.796 ms / 100) 13.581 -> 13.437 ( -1.06%) [ +0.04% +0.00% +0.04% / +0.01% -0.97% -1.06%] index_copy_ reverse : Elapsed 0.136 ms (13.587 ms / 100) 19.498 -> 18.794 ( -3.61%) [ +0.54% +0.00% +0.74% / +0.10% -3.36% -3.61%] index_add_ spread : Elapsed 0.196 ms (19.604 ms / 100) 13.642 -> 13.614 ( -0.21%) [ +0.05% +0.00% +0.20% / +0.06% -0.21% -0.19%] index_copy_ spread : Elapsed 0.136 ms (13.649 ms / 100) 19.438 -> 18.962 ( -2.45%) [ +0.27% +0.20% +0.00% / -0.04% -2.45% -2.09%] index_add_ strided 3 : Elapsed 0.195 ms (19.491 ms / 100) 13.767 -> 13.689 ( -0.57%) [ +0.00% +0.23% +0.24% / -0.04% -0.40% -0.57%] index_copy_ strided 3 : Elapsed 0.138 ms (13.767 ms / 100) 19.944 -> 18.995 ( -4.76%) [ +0.11% +0.47% +0.00% / +0.28% -4.70% -4.76%] index_add_ strided 7 : Elapsed 0.200 ms (19.966 ms / 100) 13.888 -> 13.556 ( -2.39%) [ +0.00% +0.06% +0.12% / -0.07% -2.35% -2.39%] index_copy_ strided 7 : Elapsed 0.139 ms (13.888 ms / 100) 19.796 -> 19.427 ( -1.86%) [ +0.73% +0.00% +0.72% / +0.49% -1.86% -1.58%] index_add_ perm : Elapsed 0.199 ms (19.941 ms / 100) 13.882 -> 13.702 ( -1.30%) [ +0.19% +0.00% +0.13% / +0.08% -1.28% -1.30%] index_copy_ perm : Elapsed 0.139 ms (13.908 ms / 100) 18.967 -> 18.604 ( -1.91%) [ +0.28% +0.00% +0.54% / +0.31% -1.91% -1.54%] index_add_ perm_sorted : Elapsed 0.190 ms (19.021 ms / 100) 13.771 -> 13.662 ( -0.79%) [ +0.00% +0.01% +0.06% / +0.11% -0.78% -0.79%] index_copy_ perm_sorted : Elapsed 0.138 ms (13.771 ms / 100) 45.806 -> 46.209 ( +0.88%) [ +0.52% +0.49% +0.00% / +0.88% +5.14% +5.01%] index_select const : Elapsed 0.460 ms (46.043 ms / 100) 46.212 -> 44.010 ( -4.76%) [ +4.57% +1.72% +0.00% / +5.70% -4.76% -3.92%] index_select wrap : Elapsed 0.483 ms (48.322 ms / 100) 34.850 -> 35.115 ( +0.76%) [ +0.12% +0.00% +1.59% / +0.76% +10.63% +9.60%] index_select linear : Elapsed 0.349 ms (34.891 ms / 100) 55.170 -> 54.722 ( -0.81%) [ +0.00% +1.00% +0.35% / -0.81% +9.14% +10.84%] index_select reverse : Elapsed 0.552 ms (55.170 ms / 100) 45.249 -> 45.336 ( +0.19%) [ +0.53% +0.04% +0.00% / +0.19% +5.88% +5.86%] index_select skip64 : Elapsed 0.455 ms (45.489 ms / 100) 44.825 -> 45.206 ( +0.85%) [ +0.86% +1.12% +0.00% / +0.85% +7.57% +8.11%] index_select skip256 : Elapsed 0.452 ms (45.212 ms / 100) Good 47.464 -> 41.641 (-12.27%) [ +0.70% +1.13% +0.00% / -1.31% -12.27% -11.33%] index_select spread : Elapsed 0.478 ms (47.798 ms / 100) 59.109 -> 59.967 ( +1.45%) [ +0.59% +1.55% +0.00% / +1.45% +1.68% +1.51%] index_select strided 3 : Elapsed 0.595 ms (59.460 ms / 100) Good 61.336 -> 51.345 (-16.29%) [ +0.00% +1.64% +1.76% / +1.40% -16.29% -14.16%] index_select strided 5 : Elapsed 0.613 ms (61.336 ms / 100) Good 55.908 -> 50.200 (-10.21%) [ +4.38% +0.00% +2.11% / -0.04% -10.21% -8.79%] index_select strided 7 : Elapsed 0.584 ms (58.356 ms / 100) good 54.604 -> 50.632 ( -7.27%) [ +0.37% +0.85% +0.00% / -0.64% -7.27% -6.18%] index_select strided 8 : Elapsed 0.548 ms (54.807 ms / 100) 61.766 -> 59.324 ( -3.95%) [ +2.48% +0.00% +0.68% / +2.17% -3.95% -3.21%] index_select random : Elapsed 0.633 ms (63.296 ms / 100) 42.542 -> 42.969 ( +1.00%) [ +0.00% +2.05% +2.17% / +1.00% +4.09% +1.15%] index_select random_sorted : Elapsed 0.425 ms (42.542 ms / 100) out_shape = [150, 15, 50] in_shape = [150, 15, 250] idx_dim = 2 B = [150, 15, 50] (stride (750, 50, 1)) A = [150, 15, 250] (stride (1, 37500, 150)) dim = 2 9.190 -> 9.191 ( +0.01%) [ +0.14% +0.00% +0.02% / +0.01% +0.66% +0.78%] index_select const : Elapsed 0.092 ms (9.203 ms / 100) 10.017 -> 10.002 ( -0.15%) [ +0.04% +0.00% +0.14% / +0.00% -0.15% +0.05%] index_select wrap : Elapsed 0.100 ms (10.021 ms / 100) 10.030 -> 10.002 ( -0.28%) [ +0.04% +0.00% +0.21% / -0.08% -0.28% +0.02%] index_select linear : Elapsed 0.100 ms (10.034 ms / 100) 10.058 -> 10.017 ( -0.41%) [ +0.06% +0.00% +0.14% / +0.23% -0.41% -0.06%] index_select reverse : Elapsed 0.101 ms (10.064 ms / 100) 9.201 -> 9.204 ( +0.03%) [ +0.10% +0.03% +0.00% / +0.03% +0.77% +0.55%] index_select skip64 : Elapsed 0.092 ms (9.210 ms / 100) 9.198 -> 9.203 ( +0.05%) [ +0.09% +0.00% +0.30% / +0.05% +0.78% +0.77%] index_select skip256 : Elapsed 0.092 ms (9.206 ms / 100) 10.160 -> 10.139 ( -0.21%) [ +0.12% +0.00% +0.16% / +0.04% +0.09% -0.21%] index_select spread : Elapsed 0.102 ms (10.172 ms / 100) 10.197 -> 10.154 ( -0.42%) [ +0.00% +0.01% +0.14% / +0.04% -0.27% -0.42%] index_select strided 3 : Elapsed 0.102 ms (10.197 ms / 100) 10.158 -> 10.130 ( -0.28%) [ +0.01% +0.00% +0.01% / +0.01% -0.01% -0.28%] index_select strided 5 : Elapsed 0.102 ms (10.159 ms / 100) 10.103 -> 10.101 ( -0.02%) [ +0.00% +0.07% +0.32% / -0.02% +0.17% +0.23%] index_select strided 7 : Elapsed 0.101 ms (10.103 ms / 100) 10.106 -> 10.093 ( -0.13%) [ +0.00% +0.13% +0.09% / +0.24% -0.12% -0.13%] index_select strided 8 : Elapsed 0.101 ms (10.106 ms / 100) 10.134 -> 10.126 ( -0.08%) [ +0.16% +0.01% +0.00% / -0.08% +0.31% +0.40%] index_select strided 16 : Elapsed 0.101 ms (10.150 ms / 100) 10.101 -> 10.112 ( +0.11%) [ +0.00% +0.03% +0.14% / +0.11% +0.88% +0.96%] index_select strided 64 : Elapsed 0.101 ms (10.101 ms / 100) 9.323 -> 9.328 ( +0.05%) [ +0.13% +0.00% +0.08% / +0.05% +0.42% +0.78%] index_select strided 100 : Elapsed 0.093 ms (9.335 ms / 100) 10.296 -> 10.245 ( -0.50%) [ +0.00% +0.16% +0.15% / +0.17% -0.50% -0.47%] index_select random : Elapsed 0.103 ms (10.296 ms / 100) 10.187 -> 10.139 ( -0.47%) [ +0.00% +0.07% +0.06% / -0.04% -0.47% -0.41%] index_select random_sorted : Elapsed 0.102 ms (10.187 ms / 100) 10.190 -> 10.135 ( -0.54%) [ +0.00% +0.08% +0.21% / +0.14% -0.48% -0.54%] index_select perm : Elapsed 0.102 ms (10.190 ms / 100) 10.094 -> 10.101 ( +0.07%) [ +0.00% +0.00% +0.09% / +0.07% +0.60% +0.46%] index_select perm_sorted : Elapsed 0.101 ms (10.094 ms / 100) B = [150, 15, 50] (stride (750, 1, 15)) A = [150, 15, 250] (stride (3750, 250, 1)) dim = 2 9.325 -> 9.326 ( +0.01%) [ +0.04% +0.00% +0.10% / +0.01% +0.84% +0.61%] index_select const : Elapsed 0.093 ms (9.329 ms / 100) 10.220 -> 10.230 ( +0.10%) [ +0.07% +0.33% +0.00% / +0.10% +0.27% +0.36%] index_select wrap : Elapsed 0.102 ms (10.227 ms / 100) 10.231 -> 10.218 ( -0.13%) [ +0.01% +0.00% +0.07% / -0.13% +0.43% +0.51%] index_select linear : Elapsed 0.102 ms (10.232 ms / 100) 10.241 -> 10.247 ( +0.06%) [ +0.02% +0.10% +0.00% / +0.06% +0.66% +0.32%] index_select reverse : Elapsed 0.102 ms (10.243 ms / 100) 9.331 -> 9.335 ( +0.04%) [ +0.02% +0.02% +0.00% / +0.04% +0.62% +0.89%] index_select skip64 : Elapsed 0.093 ms (9.333 ms / 100) 9.329 -> 9.335 ( +0.06%) [ +0.29% +0.06% +0.00% / +0.06% +0.62% +0.83%] index_select skip256 : Elapsed 0.094 ms (9.356 ms / 100) 13.136 -> 13.078 ( -0.44%) [ +0.21% +0.00% +0.13% / +0.21% -0.44% -0.34%] index_select spread : Elapsed 0.132 ms (13.163 ms / 100) 11.793 -> 11.724 ( -0.59%) [ +0.03% +0.00% +0.24% / -0.23% -0.03% -0.59%] index_select strided 3 : Elapsed 0.118 ms (11.796 ms / 100) 13.098 -> 13.059 ( -0.30%) [ +0.18% +0.79% +0.00% / +0.43% -0.30% +0.16%] index_select strided 5 : Elapsed 0.131 ms (13.122 ms / 100) 14.026 -> 14.053 ( +0.19%) [ +0.00% +0.41% +0.55% / +0.36% +0.19% +0.28%] index_select strided 7 : Elapsed 0.140 ms (14.026 ms / 100) 14.424 -> 14.347 ( -0.53%) [ +0.00% +0.48% +0.19% / -0.23% -0.36% -0.53%] index_select strided 8 : Elapsed 0.144 ms (14.424 ms / 100) 14.757 -> 14.761 ( +0.03%) [ +0.47% +0.00% +0.16% / +0.25% +0.22% +0.03%] index_select strided 16 : Elapsed 0.148 ms (14.826 ms / 100) 14.424 -> 14.395 ( -0.20%) [ +0.05% +0.10% +0.00% / +0.14% -0.20% -0.09%] index_select strided 64 : Elapsed 0.144 ms (14.431 ms / 100) 13.159 -> 13.141 ( -0.14%) [ +0.00% +0.02% +0.08% / -0.14% +0.11% -0.02%] index_select strided 100 : Elapsed 0.132 ms (13.159 ms / 100) 14.274 -> 14.262 ( -0.08%) [ +0.00% +0.13% +0.04% / -0.01% -0.08% -0.08%] index_select random : Elapsed 0.143 ms (14.274 ms / 100) 12.576 -> 12.499 ( -0.61%) [ +0.05% +0.22% +0.00% / -0.08% -0.29% -0.61%] index_select random_sorted : Elapsed 0.126 ms (12.582 ms / 100) 14.295 -> 14.228 ( -0.47%) [ +0.16% +0.00% +0.06% / +0.17% -0.14% -0.47%] index_select perm : Elapsed 0.143 ms (14.318 ms / 100) 12.721 -> 12.638 ( -0.65%) [ +0.02% +0.13% +0.00% / +0.05% -0.65% -0.31%] index_select perm_sorted : Elapsed 0.127 ms (12.723 ms / 100) B = [150, 15, 50] (stride (50, 7500, 1)) A = [150, 15, 250] (stride (1, 37500, 150)) dim = 2 good 5.911 -> 5.399 ( -8.66%) [ +0.08% +0.00% +0.08% / -8.66% -8.44% -8.46%] index_select const : Elapsed 0.059 ms (5.916 ms / 100) 6.124 -> 6.063 ( -1.00%) [ +0.00% +0.07% +0.03% / -0.65% -1.00% -0.73%] index_select wrap : Elapsed 0.061 ms (6.124 ms / 100) 6.131 -> 6.063 ( -1.11%) [ +0.28% +0.26% +0.00% / -1.11% -0.78% -0.55%] index_select linear : Elapsed 0.061 ms (6.148 ms / 100) 6.158 -> 6.068 ( -1.46%) [ +0.00% +0.05% +0.15% / -1.46% -1.17% -0.96%] index_select reverse : Elapsed 0.062 ms (6.158 ms / 100) good 5.848 -> 5.405 ( -7.58%) [ +0.17% +0.00% +0.15% / -7.58% -7.51% -7.49%] index_select skip64 : Elapsed 0.059 ms (5.858 ms / 100) good 5.859 -> 5.406 ( -7.73%) [ +0.00% +0.05% +0.12% / -7.68% -7.73% -7.71%] index_select skip256 : Elapsed 0.059 ms (5.859 ms / 100) 6.240 -> 6.253 ( +0.21%) [ +0.32% +0.26% +0.00% / +1.44% +0.21% +0.43%] index_select spread : Elapsed 0.063 ms (6.260 ms / 100) 6.257 -> 6.138 ( -1.90%) [ +0.00% +0.08% +0.02% / -1.04% -1.82% -1.90%] index_select strided 3 : Elapsed 0.063 ms (6.257 ms / 100) 6.245 -> 6.273 ( +0.45%) [ +0.13% +0.00% +0.00% / +1.28% +0.45% +0.56%] index_select strided 5 : Elapsed 0.063 ms (6.253 ms / 100) 6.219 -> 6.189 ( -0.48%) [ +0.37% +0.00% +0.47% / -0.45% -0.42% -0.48%] index_select strided 7 : Elapsed 0.062 ms (6.242 ms / 100) 6.205 -> 6.226 ( +0.34%) [ +0.16% +0.06% +0.00% / +0.34% +1.00% +0.87%] index_select strided 8 : Elapsed 0.062 ms (6.215 ms / 100) 6.214 -> 6.229 ( +0.24%) [ +0.26% +0.26% +0.00% / +0.24% +0.93% +0.82%] index_select strided 16 : Elapsed 0.062 ms (6.230 ms / 100) 6.196 -> 6.232 ( +0.58%) [ +0.00% +0.16% +0.15% / +0.58% +1.28% +1.18%] index_select strided 64 : Elapsed 0.062 ms (6.196 ms / 100) good 5.784 -> 5.480 ( -5.26%) [ +0.40% +0.14% +0.00% / -4.88% -5.17% -5.26%] index_select strided 100 : Elapsed 0.058 ms (5.807 ms / 100) 6.243 -> 6.193 ( -0.80%) [ +0.00% +0.22% +0.10% / -0.51% -0.50% -0.80%] index_select random : Elapsed 0.062 ms (6.243 ms / 100) 6.151 -> 6.182 ( +0.50%) [ +0.10% +0.00% +0.21% / +0.73% +0.50% +0.75%] index_select random_sorted : Elapsed 0.062 ms (6.157 ms / 100) 6.296 -> 6.250 ( -0.73%) [ +0.06% +0.00% +0.10% / -0.51% -0.73% -0.59%] index_select perm : Elapsed 0.063 ms (6.300 ms / 100) 6.209 -> 6.240 ( +0.50%) [ +0.00% +0.26% +0.03% / +0.71% +0.50% +0.52%] index_select perm_sorted : Elapsed 0.062 ms (6.209 ms / 100) B = [150, 15, 50] (stride (15, 1, 2250)) A = [150, 15, 250] (stride (3750, 250, 1)) dim = 2 5.182 -> 5.185 ( +0.06%) [ +0.15% +0.10% +0.00% / +0.06% +0.31% +0.52%] index_select const : Elapsed 0.052 ms (5.190 ms / 100) 5.827 -> 5.824 ( -0.05%) [ +0.41% +0.00% +0.21% / -0.05% +0.36% +0.27%] index_select wrap : Elapsed 0.059 ms (5.851 ms / 100) 5.827 -> 5.821 ( -0.10%) [ +0.05% +0.00% +0.22% / -0.10% +0.26% +0.36%] index_select linear : Elapsed 0.058 ms (5.830 ms / 100) 5.834 -> 5.837 ( +0.05%) [ +0.00% +0.21% +0.03% / +0.21% +0.09% +0.05%] index_select reverse : Elapsed 0.058 ms (5.834 ms / 100) 5.192 -> 5.188 ( -0.08%) [ +0.15% +0.00% +0.31% / +0.23% -0.08% +0.00%] index_select skip64 : Elapsed 0.052 ms (5.200 ms / 100) 5.195 -> 5.183 ( -0.23%) [ +0.13% +0.00% +0.31% / +0.17% -0.15% -0.23%] index_select skip256 : Elapsed 0.052 ms (5.202 ms / 100) 7.769 -> 7.762 ( -0.09%) [ +0.00% +0.37% +0.12% / +0.26% +0.17% -0.09%] index_select spread : Elapsed 0.078 ms (7.769 ms / 100) 6.860 -> 6.878 ( +0.26%) [ +0.76% +0.00% +0.60% / +0.26% +0.73% +0.61%] index_select strided 3 : Elapsed 0.069 ms (6.912 ms / 100) 7.776 -> 7.749 ( -0.35%) [ +0.01% +0.08% +0.00% / -0.35% +0.01% +0.04%] index_select strided 5 : Elapsed 0.078 ms (7.777 ms / 100) 8.577 -> 8.585 ( +0.09%) [ +0.00% +0.22% +0.13% / +0.09% +0.13% +0.33%] index_select strided 7 : Elapsed 0.086 ms (8.577 ms / 100) 8.986 -> 8.940 ( -0.51%) [ +0.17% +0.16% +0.00% / +0.06% -0.51% -0.26%] index_select strided 8 : Elapsed 0.090 ms (9.001 ms / 100) 9.282 -> 9.285 ( +0.03%) [ +0.22% +0.17% +0.00% / +0.62% +0.04% +0.03%] index_select strided 16 : Elapsed 0.093 ms (9.302 ms / 100) 8.882 -> 8.845 ( -0.42%) [ +0.01% +0.00% +0.34% / +0.15% -0.42% -0.25%] index_select strided 64 : Elapsed 0.089 ms (8.883 ms / 100) 7.348 -> 7.326 ( -0.30%) [ +0.01% +0.00% +0.24% / +0.10% -0.30% +0.05%] index_select strided 100 : Elapsed 0.073 ms (7.349 ms / 100) 8.717 -> 8.690 ( -0.31%) [ +0.24% +0.00% +0.08% / +0.25% -0.31% -0.21%] index_select random : Elapsed 0.087 ms (8.738 ms / 100) 7.465 -> 7.454 ( -0.15%) [ +0.08% +0.00% +0.11% / +0.16% -0.15% +0.04%] index_select random_sorted : Elapsed 0.075 ms (7.471 ms / 100) 8.836 -> 8.846 ( +0.11%) [ +0.00% +0.26% +0.20% / +0.18% +0.11% +0.37%] index_select perm : Elapsed 0.088 ms (8.836 ms / 100) 7.582 -> 7.603 ( +0.28%) [ +0.91% +0.00% +0.26% / +0.29% +0.42% +0.28%] index_select perm_sorted : Elapsed 0.077 ms (7.651 ms / 100) out_shape = [50, 250, 15] in_shape = [150, 250, 15] idx_dim = 0 B = [50, 250, 15] (stride (15, 750, 1)) A = [150, 250, 15] (stride (3750, 15, 1)) dim = 0 10.343 -> 10.338 ( -0.05%) [ +0.06% +0.13% +0.00% / -0.05% +0.36% +0.24%] index_select const : Elapsed 0.103 ms (10.349 ms / 100) 10.821 -> 10.833 ( +0.11%) [ +0.00% +0.04% +0.03% / +0.11% +0.38% +0.43%] index_select wrap : Elapsed 0.108 ms (10.821 ms / 100) 10.816 -> 10.832 ( +0.15%) [ +0.00% +0.10% +0.05% / +0.16% +0.15% +0.35%] index_select linear : Elapsed 0.108 ms (10.816 ms / 100) 10.911 -> 10.906 ( -0.05%) [ +0.00% +0.08% +0.03% / -0.05% +0.51% +0.66%] index_select reverse : Elapsed 0.109 ms (10.911 ms / 100) 10.338 -> 10.346 ( +0.08%) [ +0.00% +0.14% +0.22% / +0.08% +0.29% +0.45%] index_select skip64 : Elapsed 0.103 ms (10.338 ms / 100) 10.326 -> 10.341 ( +0.15%) [ +0.00% +0.29% +0.19% / +0.15% +0.65% +0.61%] index_select skip256 : Elapsed 0.103 ms (10.326 ms / 100) 10.957 -> 10.952 ( -0.05%) [ +0.00% +0.09% +0.09% / -0.05% +0.19% +0.14%] index_select spread : Elapsed 0.110 ms (10.957 ms / 100) 10.964 -> 10.975 ( +0.10%) [ +0.05% +0.04% +0.00% / +0.15% +0.22% +0.10%] index_select strided 3 : Elapsed 0.110 ms (10.969 ms / 100) 10.878 -> 10.907 ( +0.27%) [ +0.05% +0.00% +0.22% / +0.27% +0.73% +0.71%] index_select strided 5 : Elapsed 0.109 ms (10.883 ms / 100) 10.956 -> 10.975 ( +0.17%) [ +0.06% +0.00% +0.10% / +0.17% +0.39% +0.33%] index_select strided 7 : Elapsed 0.110 ms (10.963 ms / 100) 10.947 -> 10.951 ( +0.04%) [ +0.10% +0.06% +0.00% / +0.04% +0.77% +0.66%] index_select strided 8 : Elapsed 0.110 ms (10.958 ms / 100) 10.944 -> 10.951 ( +0.06%) [ +0.11% +0.07% +0.00% / +0.06% +0.47% +0.60%] index_select strided 16 : Elapsed 0.110 ms (10.956 ms / 100) 10.925 -> 10.937 ( +0.11%) [ +0.16% +0.00% +0.03% / +0.11% +0.95% +0.99%] index_select strided 64 : Elapsed 0.109 ms (10.943 ms / 100) 10.481 -> 10.476 ( -0.05%) [ +0.05% +0.00% +0.04% / -0.05% +0.20% +0.08%] index_select strided 100 : Elapsed 0.105 ms (10.486 ms / 100) 10.939 -> 10.954 ( +0.14%) [ +0.11% +0.05% +0.00% / +0.14% +0.26% +0.27%] index_select random : Elapsed 0.110 ms (10.951 ms / 100) 10.858 -> 10.870 ( +0.11%) [ +0.00% +0.18% +0.10% / +0.11% +0.33% +0.36%] index_select random_sorted : Elapsed 0.109 ms (10.858 ms / 100) 10.973 -> 10.968 ( -0.05%) [ +0.02% +0.09% +0.00% / -0.05% +0.07% +0.13%] index_select perm : Elapsed 0.110 ms (10.975 ms / 100) 10.941 -> 10.944 ( +0.03%) [ +0.06% +0.00% +0.15% / +0.03% +0.08% +0.27%] index_select perm_sorted : Elapsed 0.109 ms (10.948 ms / 100) B = [50, 250, 15] (stride (1, 50, 12500)) A = [150, 250, 15] (stride (1, 150, 37500)) dim = 0 GOOD 8.686 -> 6.330 (-27.12%) [ +0.45% +0.00% +0.32% / -27.12% -27.02% -26.86%] index_select const : Elapsed 0.087 ms (8.725 ms / 100) GOOD 10.279 -> 6.660 (-35.21%) [ +0.13% +0.01% +0.00% / -35.21% -35.04% -34.83%] index_select wrap : Elapsed 0.103 ms (10.292 ms / 100) GOOD 10.291 -> 6.647 (-35.41%) [ +0.06% +0.00% +0.02% / -35.41% -34.99% -35.04%] index_select linear : Elapsed 0.103 ms (10.297 ms / 100) GOOD 10.267 -> 6.638 (-35.35%) [ +0.30% +0.09% +0.00% / -35.35% -34.74% -34.46%] index_select reverse : Elapsed 0.103 ms (10.298 ms / 100) GOOD 8.692 -> 6.359 (-26.84%) [ +0.00% +0.10% +0.21% / -26.54% -26.61% -26.84%] index_select skip64 : Elapsed 0.087 ms (8.692 ms / 100) GOOD 8.692 -> 6.322 (-27.27%) [ +0.21% +0.08% +0.00% / -27.27% -26.79% -27.14%] index_select skip256 : Elapsed 0.087 ms (8.710 ms / 100) GOOD 12.743 -> 7.380 (-42.09%) [ +0.09% +0.00% +0.33% / -42.09% -41.67% -41.78%] index_select spread : Elapsed 0.128 ms (12.755 ms / 100) GOOD 12.754 -> 7.371 (-42.21%) [ +0.37% +0.12% +0.00% / -42.21% -41.80% -41.70%] index_select strided 3 : Elapsed 0.128 ms (12.801 ms / 100) GOOD 14.671 -> 7.493 (-48.93%) [ +0.18% +0.12% +0.00% / -48.93% -48.33% -48.29%] index_select strided 5 : Elapsed 0.147 ms (14.697 ms / 100) BEST 15.959 -> 7.575 (-52.53%) [ +0.12% +0.08% +0.00% / -52.53% -52.30% -52.13%] index_select strided 7 : Elapsed 0.160 ms (15.978 ms / 100) BEST 16.342 -> 7.589 (-53.56%) [ +0.27% +0.00% +0.21% / -53.56% -53.25% -53.24%] index_select strided 8 : Elapsed 0.164 ms (16.386 ms / 100) BEST 16.476 -> 7.693 (-53.31%) [ +0.11% +0.00% +0.30% / -53.31% -53.19% -53.12%] index_select strided 16 : Elapsed 0.165 ms (16.494 ms / 100) BEST 16.116 -> 7.663 (-52.45%) [ +0.00% +0.23% +0.01% / -52.45% -52.32% -52.41%] index_select strided 64 : Elapsed 0.161 ms (16.116 ms / 100) BEST 13.511 -> 6.700 (-50.41%) [ +0.29% +0.24% +0.00% / -50.41% -50.17% -50.28%] index_select strided 100 : Elapsed 0.136 ms (13.550 ms / 100) BEST 16.142 -> 7.655 (-52.58%) [ +0.00% +0.37% +0.16% / -52.58% -52.32% -52.37%] index_select random : Elapsed 0.161 ms (16.142 ms / 100) GOOD 12.489 -> 7.346 (-41.18%) [ +0.23% +0.10% +0.00% / -41.11% -41.18% -41.10%] index_select random_sorted : Elapsed 0.125 ms (12.518 ms / 100) BEST 15.883 -> 7.610 (-52.09%) [ +0.10% +0.11% +0.00% / -52.09% -51.80% -51.82%] index_select perm : Elapsed 0.159 ms (15.899 ms / 100) GOOD 12.431 -> 7.315 (-41.16%) [ +0.10% +0.01% +0.00% / -41.16% -40.83% -40.94%] index_select perm_sorted : Elapsed 0.124 ms (12.443 ms / 100) out_shape = [150, 50, 15] in_shape = [150, 250, 15] idx_dim = 1 B = [150, 50, 15] (stride (750, 15, 1)) A = [150, 250, 15] (stride (250, 1, 37500)) dim = 1 5.885 -> 5.897 ( +0.20%) [ +0.36% +0.22% +0.00% / +0.20% +0.27% +0.63%] index_select const : Elapsed 0.059 ms (5.906 ms / 100) 6.704 -> 6.723 ( +0.28%) [ +0.28% +0.30% +0.00% / +0.60% +0.31% +0.28%] index_select wrap : Elapsed 0.067 ms (6.723 ms / 100) 6.718 -> 6.721 ( +0.04%) [ +0.24% +0.00% +0.10% / +0.04% +0.16% +0.10%] index_select linear : Elapsed 0.067 ms (6.734 ms / 100) 6.727 -> 6.707 ( -0.30%) [ +0.00% +0.06% +0.01% / -0.15% -0.30% -0.15%] index_select reverse : Elapsed 0.067 ms (6.727 ms / 100) 5.882 -> 5.899 ( +0.29%) [ +0.46% +0.00% +0.54% / +0.29% +0.70% +0.58%] index_select skip64 : Elapsed 0.059 ms (5.909 ms / 100) 5.890 -> 5.907 ( +0.29%) [ +0.00% +0.12% +0.31% / +0.29% +0.49% +0.48%] index_select skip256 : Elapsed 0.059 ms (5.890 ms / 100) 9.319 -> 9.359 ( +0.43%) [ +0.44% +0.41% +0.00% / +0.85% +0.55% +0.43%] index_select spread : Elapsed 0.094 ms (9.360 ms / 100) 8.073 -> 8.067 ( -0.07%) [ +0.25% +0.21% +0.00% / +0.01% -0.07% +0.02%] index_select strided 3 : Elapsed 0.081 ms (8.093 ms / 100) 9.343 -> 9.358 ( +0.16%) [ +0.03% +0.00% +0.12% / +0.18% +0.16% +0.22%] index_select strided 5 : Elapsed 0.093 ms (9.346 ms / 100) 10.379 -> 10.350 ( -0.28%) [ +0.07% +0.17% +0.00% / -0.07% +0.04% -0.28%] index_select strided 7 : Elapsed 0.104 ms (10.386 ms / 100) 10.833 -> 10.754 ( -0.73%) [ +0.01% +0.15% +0.00% / -0.39% -0.68% -0.73%] index_select strided 8 : Elapsed 0.108 ms (10.834 ms / 100) 11.018 -> 10.951 ( -0.61%) [ +0.05% +0.25% +0.00% / -0.01% -0.61% -0.58%] index_select strided 16 : Elapsed 0.110 ms (11.023 ms / 100) 10.596 -> 10.591 ( -0.05%) [ +0.00% +0.28% +0.34% / -0.05% +0.03% -0.04%] index_select strided 64 : Elapsed 0.106 ms (10.596 ms / 100) 8.726 -> 8.700 ( -0.30%) [ +0.00% +0.02% +0.13% / +0.05% -0.30% -0.14%] index_select strided 100 : Elapsed 0.087 ms (8.726 ms / 100) 10.180 -> 10.176 ( -0.04%) [ +0.21% +0.13% +0.00% / -0.04% +0.10% +0.15%] index_select random : Elapsed 0.102 ms (10.201 ms / 100) 8.633 -> 8.623 ( -0.12%) [ +0.00% +0.05% +0.05% / +0.08% -0.12% -0.10%] index_select random_sorted : Elapsed 0.086 ms (8.633 ms / 100) 10.462 -> 10.419 ( -0.41%) [ +0.01% +0.09% +0.00% / -0.14% -0.36% -0.41%] index_select perm : Elapsed 0.105 ms (10.463 ms / 100) 8.908 -> 8.884 ( -0.27%) [ +0.00% +0.00% +0.33% / -0.10% -0.27% -0.20%] index_select perm_sorted : Elapsed 0.089 ms (8.908 ms / 100) B = [150, 50, 15] (stride (1, 2250, 150)) A = [150, 250, 15] (stride (3750, 1, 250)) dim = 1 5.926 -> 5.947 ( +0.35%) [ +0.34% +0.00% +0.39% / +0.35% +0.47% +0.56%] index_select const : Elapsed 0.059 ms (5.946 ms / 100) 6.553 -> 6.551 ( -0.03%) [ +0.00% +0.08% +0.05% / -0.03% +0.49% +0.37%] index_select wrap : Elapsed 0.066 ms (6.553 ms / 100) 6.562 -> 6.567 ( +0.08%) [ +0.00% +0.23% +0.30% / +0.08% +0.15% +0.53%] index_select linear : Elapsed 0.066 ms (6.562 ms / 100) 6.575 -> 6.572 ( -0.05%) [ +0.00% +0.05% +0.06% / -0.05% +0.09% +0.06%] index_select reverse : Elapsed 0.066 ms (6.575 ms / 100) 5.927 -> 5.947 ( +0.34%) [ +0.20% +0.02% +0.00% / +0.34% +0.47% +0.76%] index_select skip64 : Elapsed 0.059 ms (5.939 ms / 100) 5.929 -> 5.919 ( -0.17%) [ +0.08% +0.24% +0.00% / -0.17% +0.49% +0.73%] index_select skip256 : Elapsed 0.059 ms (5.934 ms / 100) 8.849 -> 8.860 ( +0.12%) [ +0.27% +0.38% +0.00% / +0.25% +0.12% +0.34%] index_select spread : Elapsed 0.089 ms (8.873 ms / 100) 7.768 -> 7.761 ( -0.09%) [ +0.14% +0.00% +0.18% / +0.14% -0.09% -0.06%] index_select strided 3 : Elapsed 0.078 ms (7.779 ms / 100) 8.841 -> 8.842 ( +0.01%) [ +0.28% +0.29% +0.00% / +0.28% +0.01% +0.07%] index_select strided 5 : Elapsed 0.089 ms (8.866 ms / 100) 9.769 -> 9.778 ( +0.09%) [ +0.20% +0.06% +0.00% / +0.09% +0.27% +0.23%] index_select strided 7 : Elapsed 0.098 ms (9.789 ms / 100) 10.154 -> 10.144 ( -0.10%) [ +0.00% +0.44% +0.21% / +0.17% +0.09% -0.10%] index_select strided 8 : Elapsed 0.102 ms (10.154 ms / 100) 10.344 -> 10.332 ( -0.12%) [ +0.28% +0.25% +0.00% / -0.12% -0.05% -0.11%] index_select strided 16 : Elapsed 0.104 ms (10.373 ms / 100) 9.977 -> 9.957 ( -0.20%) [ +0.13% +0.03% +0.00% / +0.25% -0.10% -0.20%] index_select strided 64 : Elapsed 0.100 ms (9.990 ms / 100) 8.407 -> 8.390 ( -0.20%) [ +0.06% +0.04% +0.00% / -0.20% -0.17% -0.07%] index_select strided 100 : Elapsed 0.084 ms (8.412 ms / 100) 9.821 -> 9.797 ( -0.24%) [ +0.00% +0.07% +0.09% / -0.05% -0.24% -0.10%] index_select random : Elapsed 0.098 ms (9.821 ms / 100) 8.490 -> 8.497 ( +0.08%) [ +0.13% +0.00% +0.14% / +0.08% +0.16% +0.20%] index_select random_sorted : Elapsed 0.085 ms (8.501 ms / 100) 9.785 -> 9.769 ( -0.16%) [ +0.21% +0.08% +0.00% / +0.12% -0.05% -0.16%] index_select perm : Elapsed 0.098 ms (9.806 ms / 100) 8.408 -> 8.390 ( -0.21%) [ +0.15% +0.04% +0.00% / -0.04% -0.11% -0.21%] index_select perm_sorted : Elapsed 0.084 ms (8.421 ms / 100) B = [150, 50, 15] (stride (1, 2250, 150)) A = [150, 250, 15] (stride (1, 150, 37500)) dim = 1 5.738 -> 5.752 ( +0.24%) [ +0.00% +0.14% +0.09% / +0.24% +0.52% +0.42%] index_select const : Elapsed 0.057 ms (5.738 ms / 100) 5.892 -> 5.897 ( +0.08%) [ +0.00% +0.17% +0.08% / +0.08% +0.78% +0.70%] index_select wrap : Elapsed 0.059 ms (5.892 ms / 100) 5.904 -> 5.917 ( +0.22%) [ +0.00% +0.34% +0.19% / +0.22% +0.49% +0.76%] index_select linear : Elapsed 0.059 ms (5.904 ms / 100) 5.922 -> 5.918 ( -0.07%) [ +0.00% +0.10% +0.20% / -0.02% +0.10% -0.07%] index_select reverse : Elapsed 0.059 ms (5.922 ms / 100) 5.697 -> 5.698 ( +0.02%) [ +0.11% +0.02% +0.00% / +0.02% +0.97% +0.95%] index_select skip64 : Elapsed 0.057 ms (5.703 ms / 100) 5.697 -> 5.697 ( +0.00%) [ +0.00% +0.16% +0.11% / +0.00% +1.07% +0.86%] index_select skip256 : Elapsed 0.057 ms (5.697 ms / 100) 5.996 -> 5.985 ( -0.18%) [ +0.22% +0.00% +0.28% / -0.18% -0.07% -0.05%] index_select spread : Elapsed 0.060 ms (6.009 ms / 100) 6.007 -> 5.953 ( -0.90%) [ +0.25% +0.02% +0.00% / +0.22% -0.77% -0.90%] index_select strided 3 : Elapsed 0.060 ms (6.022 ms / 100) 5.997 -> 5.981 ( -0.27%) [ +0.10% +0.10% +0.00% / +0.12% -0.08% -0.27%] index_select strided 5 : Elapsed 0.060 ms (6.003 ms / 100) 5.967 -> 5.975 ( +0.13%) [ +0.00% +0.03% +0.12% / +0.13% +0.65% +0.45%] index_select strided 7 : Elapsed 0.060 ms (5.967 ms / 100) 5.967 -> 5.972 ( +0.08%) [ +0.00% +0.50% +0.13% / +0.08% +0.27% +0.20%] index_select strided 8 : Elapsed 0.060 ms (5.967 ms / 100) 5.977 -> 5.996 ( +0.32%) [ +0.33% +0.00% +0.25% / +0.32% +0.47% +0.38%] index_select strided 16 : Elapsed 0.060 ms (5.997 ms / 100) 5.967 -> 5.969 ( +0.03%) [ +0.08% +0.00% +0.25% / +0.03% +0.82% +0.85%] index_select strided 64 : Elapsed 0.060 ms (5.972 ms / 100) 5.610 -> 5.608 ( -0.04%) [ +0.00% +0.09% +0.04% / -0.04% +0.59% +0.14%] index_select strided 100 : Elapsed 0.056 ms (5.610 ms / 100) 6.055 -> 6.044 ( -0.18%) [ +0.00% +0.51% +0.43% / +0.45% -0.18% -0.13%] index_select random : Elapsed 0.061 ms (6.055 ms / 100) 5.974 -> 5.978 ( +0.07%) [ +0.44% +0.00% +0.23% / +0.07% +0.89% +0.57%] index_select random_sorted : Elapsed 0.060 ms (6.000 ms / 100) 6.066 -> 6.026 ( -0.66%) [ +0.13% +0.00% +0.10% / -0.12% -0.41% -0.66%] index_select perm : Elapsed 0.061 ms (6.074 ms / 100) 5.975 -> 5.970 ( -0.08%) [ +0.05% +0.00% +0.15% / -0.08% +0.99% +0.95%] index_select perm_sorted : Elapsed 0.060 ms (5.978 ms / 100) out_shape = [150, 250, 50] in_shape = [150, 250, 15] idx_dim = 2 B = [150, 250, 50] (stride (50, 7500, 1)) A = [150, 250, 15] (stride (250, 1, 37500)) dim = 2 102.290 -> 102.286 ( -0.00%) [ +0.00% +0.01% +0.01% / -0.00% +0.32% +0.27%] index_add_ linear : Elapsed 1.023 ms (102.290 ms / 100) 65.165 -> 65.151 ( -0.02%) [ +0.00% +0.02% +0.03% / -0.02% +0.44% +0.42%] index_copy_ linear : Elapsed 0.652 ms (65.165 ms / 100) 102.202 -> 102.270 ( +0.07%) [ +0.00% +0.05% +0.06% / +0.07% +0.33% +0.28%] index_add_ reverse : Elapsed 1.022 ms (102.202 ms / 100) 65.078 -> 65.080 ( +0.00%) [ +0.04% +0.11% +0.00% / +0.00% +0.41% +0.36%] index_copy_ reverse : Elapsed 0.651 ms (65.107 ms / 100) 102.317 -> 102.330 ( +0.01%) [ +0.00% +0.06% +0.01% / +0.01% +0.08% +0.07%] index_add_ spread : Elapsed 1.023 ms (102.317 ms / 100) 65.155 -> 65.163 ( +0.01%) [ +0.00% +0.07% +0.05% / +0.01% +0.27% +0.21%] index_copy_ spread : Elapsed 0.652 ms (65.155 ms / 100) 102.308 -> 102.303 ( -0.00%) [ +0.00% +0.03% +0.05% / -0.00% +0.10% +0.13%] index_add_ strided 3 : Elapsed 1.023 ms (102.308 ms / 100) 65.155 -> 65.162 ( +0.01%) [ +0.02% +0.05% +0.00% / +0.01% +0.33% +0.27%] index_copy_ strided 3 : Elapsed 0.652 ms (65.169 ms / 100) 102.196 -> 102.214 ( +0.02%) [ +0.04% +0.06% +0.00% / +0.02% +0.09% +0.09%] index_add_ strided 7 : Elapsed 1.022 ms (102.232 ms / 100) 65.015 -> 65.041 ( +0.04%) [ +0.07% +0.00% +0.02% / +0.04% +0.24% +0.24%] index_copy_ strided 7 : Elapsed 0.651 ms (65.063 ms / 100) 101.396 -> 101.450 ( +0.05%) [ +0.05% +0.00% +0.08% / +0.13% +0.05% +0.11%] index_add_ perm : Elapsed 1.014 ms (101.447 ms / 100) 64.452 -> 64.451 ( -0.00%) [ +0.00% +0.01% +0.07% / -0.00% +0.25% +0.30%] index_copy_ perm : Elapsed 0.645 ms (64.452 ms / 100) 102.234 -> 102.271 ( +0.04%) [ +0.04% +0.01% +0.00% / +0.04% +0.18% +0.23%] index_add_ perm_sorted : Elapsed 1.023 ms (102.279 ms / 100) 65.152 -> 65.149 ( -0.00%) [ +0.01% +0.01% +0.00% / -0.00% +0.33% +0.29%] index_copy_ perm_sorted : Elapsed 0.652 ms (65.156 ms / 100) 215.610 -> 216.414 ( +0.37%) [ +0.48% +0.30% +0.00% / +0.37% +0.55% +0.37%] index_select const : Elapsed 2.166 ms (216.639 ms / 100) 216.989 -> 217.465 ( +0.22%) [ +0.45% +0.00% +0.04% / +0.22% +0.34% +0.24%] index_select wrap : Elapsed 2.180 ms (217.963 ms / 100) 215.335 -> 215.280 ( -0.03%) [ +0.20% +0.23% +0.00% / +0.46% -0.03% +0.18%] index_select linear : Elapsed 2.158 ms (215.761 ms / 100) 217.124 -> 216.546 ( -0.27%) [ +0.09% +0.13% +0.00% / +0.06% -0.19% -0.27%] index_select reverse : Elapsed 2.173 ms (217.314 ms / 100) 216.147 -> 216.246 ( +0.05%) [ +0.31% +0.00% +0.26% / +0.20% +0.20% +0.05%] index_select skip64 : Elapsed 2.168 ms (216.808 ms / 100) 216.453 -> 215.955 ( -0.23%) [ +0.00% +0.02% +0.09% / +0.15% -0.23% -0.03%] index_select skip256 : Elapsed 2.165 ms (216.453 ms / 100) 216.445 -> 216.836 ( +0.18%) [ +0.18% +0.00% +0.17% / +0.18% +0.25% +0.19%] index_select spread : Elapsed 2.168 ms (216.830 ms / 100) 218.004 -> 217.143 ( -0.39%) [ +0.08% +0.04% +0.00% / +0.08% -0.39% -0.09%] index_select strided 3 : Elapsed 2.182 ms (218.175 ms / 100) 217.089 -> 217.716 ( +0.29%) [ +0.00% +0.13% +0.37% / +0.29% +0.74% +0.97%] index_select strided 5 : Elapsed 2.171 ms (217.089 ms / 100) 217.443 -> 217.482 ( +0.02%) [ +0.18% +0.04% +0.00% / +0.02% +0.85% +0.79%] index_select strided 7 : Elapsed 2.178 ms (217.839 ms / 100) 217.203 -> 217.486 ( +0.13%) [ +0.27% +0.00% +0.20% / +0.23% +0.13% +0.28%] index_select strided 8 : Elapsed 2.178 ms (217.795 ms / 100) 216.754 -> 216.680 ( -0.03%) [ +0.00% +0.09% +0.49% / -0.03% +0.58% +0.29%] index_select random : Elapsed 2.168 ms (216.754 ms / 100) 216.463 -> 216.780 ( +0.15%) [ +0.12% +0.09% +0.00% / +0.15% +0.33% +0.23%] index_select random_sorted : Elapsed 2.167 ms (216.721 ms / 100) B = [150, 250, 50] (stride (1, 7500, 150)) A = [150, 250, 15] (stride (1, 150, 37500)) dim = 2 17.394 -> 16.816 ( -3.32%) [ +0.18% +0.00% +0.41% / +0.22% -3.23% -3.32%] index_add_ linear : Elapsed 0.174 ms (17.425 ms / 100) 12.198 -> 12.095 ( -0.84%) [ +0.27% +0.00% +0.41% / +0.25% -0.84% -0.76%] index_copy_ linear : Elapsed 0.122 ms (12.231 ms / 100) good 17.446 -> 16.564 ( -5.06%) [ +1.14% +0.00% +1.52% / +0.61% -5.06% -4.95%] index_add_ reverse : Elapsed 0.176 ms (17.645 ms / 100) 12.281 -> 12.094 ( -1.52%) [ +0.13% +0.00% +0.33% / +0.26% -1.36% -1.52%] index_copy_ reverse : Elapsed 0.123 ms (12.297 ms / 100) good 18.095 -> 17.104 ( -5.48%) [ +0.45% +0.18% +0.00% / -0.56% -5.48% -5.23%] index_add_ spread : Elapsed 0.182 ms (18.176 ms / 100) 12.360 -> 12.306 ( -0.44%) [ +0.18% +0.00% +0.11% / +0.32% -0.32% -0.44%] index_copy_ spread : Elapsed 0.124 ms (12.382 ms / 100) 17.806 -> 17.216 ( -3.31%) [ +0.00% +0.77% +0.08% / +0.35% -3.31% -3.01%] index_add_ strided 3 : Elapsed 0.178 ms (17.806 ms / 100) 12.464 -> 12.324 ( -1.12%) [ +0.00% +0.22% +0.05% / +0.08% -0.97% -1.12%] index_copy_ strided 3 : Elapsed 0.125 ms (12.464 ms / 100) good 18.473 -> 17.214 ( -6.82%) [ +1.10% +0.00% +0.83% / +1.01% -6.19% -6.82%] index_add_ strided 7 : Elapsed 0.187 ms (18.676 ms / 100) 12.560 -> 12.360 ( -1.59%) [ +0.00% +0.09% +0.06% / -0.02% -1.54% -1.59%] index_copy_ strided 7 : Elapsed 0.126 ms (12.560 ms / 100) 18.030 -> 17.602 ( -2.37%) [ +0.84% +0.00% +0.34% / +1.16% -2.33% -2.37%] index_add_ perm : Elapsed 0.182 ms (18.181 ms / 100) 12.435 -> 12.330 ( -0.84%) [ +0.00% +0.15% +0.27% / +0.10% -0.55% -0.84%] index_copy_ perm : Elapsed 0.124 ms (12.435 ms / 100) 17.800 -> 16.997 ( -4.51%) [ +0.30% +0.61% +0.00% / +0.77% -4.51% -3.98%] index_add_ perm_sorted : Elapsed 0.179 ms (17.854 ms / 100) 12.396 -> 12.228 ( -1.36%) [ +0.00% +0.20% +0.05% / -0.02% -1.36% -1.22%] index_copy_ perm_sorted : Elapsed 0.124 ms (12.396 ms / 100) 43.505 -> 44.081 ( +1.32%) [ +0.89% +0.37% +0.00% / +1.32% +9.73% +8.74%] index_select const : Elapsed 0.439 ms (43.892 ms / 100) 62.465 -> 61.571 ( -1.43%) [ +0.00% +0.59% +0.36% / -1.29% -0.64% -1.43%] index_select wrap : Elapsed 0.625 ms (62.465 ms / 100) 40.457 -> 41.067 ( +1.51%) [ +2.36% +0.00% +3.01% / +1.51% +10.88% +11.12%] index_select linear : Elapsed 0.414 ms (41.410 ms / 100) 54.553 -> 55.516 ( +1.77%) [ +1.91% +1.37% +0.00% / +1.77% +7.31% +7.15%] index_select reverse : Elapsed 0.556 ms (55.593 ms / 100) 42.880 -> 43.123 ( +0.57%) [ +1.53% +0.01% +0.00% / +0.57% +5.99% +7.87%] index_select skip64 : Elapsed 0.435 ms (43.538 ms / 100) 43.296 -> 43.049 ( -0.57%) [ +0.24% +0.00% +0.07% / -0.57% +7.28% +7.44%] index_select skip256 : Elapsed 0.434 ms (43.400 ms / 100) 46.862 -> 46.998 ( +0.29%) [ +1.18% +0.00% +0.74% / +0.29% +3.24% +3.12%] index_select spread : Elapsed 0.474 ms (47.417 ms / 100) 61.464 -> 60.565 ( -1.46%) [ +1.14% +0.37% +0.00% / +0.80% -1.46% -0.60%] index_select strided 3 : Elapsed 0.622 ms (62.163 ms / 100) 46.302 -> 46.225 ( -0.17%) [ +1.05% +0.00% +1.72% / -0.17% +16.39% +18.11%] index_select strided 5 : Elapsed 0.468 ms (46.786 ms / 100) 58.163 -> 57.795 ( -0.63%) [ +0.92% +0.00% +1.29% / -0.63% +2.84% +1.48%] index_select strided 7 : Elapsed 0.587 ms (58.701 ms / 100) good 62.065 -> 57.897 ( -6.72%) [ +0.00% +1.60% +1.02% / +1.53% -6.72% -4.31%] index_select strided 8 : Elapsed 0.621 ms (62.065 ms / 100) 59.598 -> 59.366 ( -0.39%) [ +0.00% +0.12% +0.29% / -0.39% +4.30% +1.36%] index_select random : Elapsed 0.596 ms (59.598 ms / 100) 48.940 -> 48.456 ( -0.99%) [ +0.15% +0.00% +0.75% / -0.35% -0.42% -0.99%] index_select random_sorted : Elapsed 0.490 ms (49.015 ms / 100) B = [150, 250, 50] (stride (250, 1, 37500)) A = [150, 250, 15] (stride (1, 2250, 150)) dim = 2 18.801 -> 18.819 ( +0.10%) [ +0.00% +0.15% +0.09% / +0.14% +0.17% +0.10%] index_add_ linear : Elapsed 0.188 ms (18.801 ms / 100) 18.233 -> 18.217 ( -0.09%) [ +0.01% +0.00% +0.03% / -0.09% +0.30% -0.06%] index_copy_ linear : Elapsed 0.182 ms (18.234 ms / 100) 18.824 -> 18.813 ( -0.06%) [ +0.10% +0.00% +0.12% / +0.18% -0.06% -0.04%] index_add_ reverse : Elapsed 0.188 ms (18.842 ms / 100) 18.227 -> 18.238 ( +0.06%) [ +0.05% +0.00% +0.20% / +0.39% +0.09% +0.06%] index_copy_ reverse : Elapsed 0.182 ms (18.236 ms / 100) 18.920 -> 18.863 ( -0.30%) [ +0.00% +0.11% +0.32% / +0.11% -0.13% -0.30%] index_add_ spread : Elapsed 0.189 ms (18.920 ms / 100) 18.307 -> 18.312 ( +0.03%) [ +0.00% +0.07% +0.25% / +0.23% +0.03% +0.17%] index_copy_ spread : Elapsed 0.183 ms (18.307 ms / 100) 18.841 -> 18.886 ( +0.24%) [ +0.00% +0.19% +0.14% / +0.24% +0.63% +0.53%] index_add_ strided 3 : Elapsed 0.188 ms (18.841 ms / 100) 18.250 -> 18.281 ( +0.17%) [ +0.01% +0.00% +0.17% / +0.17% +0.43% +0.45%] index_copy_ strided 3 : Elapsed 0.183 ms (18.251 ms / 100) 18.903 -> 18.858 ( -0.24%) [ +0.00% +0.19% +0.21% / +0.25% -0.24% -0.19%] index_add_ strided 7 : Elapsed 0.189 ms (18.903 ms / 100) 18.343 -> 18.306 ( -0.20%) [ +0.12% +0.00% +0.14% / +0.23% -0.10% -0.20%] index_copy_ strided 7 : Elapsed 0.184 ms (18.365 ms / 100) 18.852 -> 18.887 ( +0.19%) [ +0.00% +0.09% +0.05% / +0.19% +0.30% +0.36%] index_add_ perm : Elapsed 0.189 ms (18.852 ms / 100) 18.224 -> 18.263 ( +0.21%) [ +0.00% +0.23% +0.12% / +0.21% +0.46% +0.68%] index_copy_ perm : Elapsed 0.182 ms (18.224 ms / 100) 18.840 -> 18.843 ( +0.02%) [ +0.00% +0.12% +0.00% / +0.11% +0.10% +0.02%] index_add_ perm_sorted : Elapsed 0.188 ms (18.840 ms / 100) 18.255 -> 18.269 ( +0.08%) [ +0.00% +0.01% +0.08% / +0.08% +0.21% +0.20%] index_copy_ perm_sorted : Elapsed 0.183 ms (18.255 ms / 100) 46.073 -> 46.119 ( +0.10%) [ +0.00% +0.01% +0.05% / +0.10% +1.51% +1.43%] index_select const : Elapsed 0.461 ms (46.073 ms / 100) 70.469 -> 70.618 ( +0.21%) [ +1.26% +0.35% +0.00% / +0.84% +0.21% +0.51%] index_select wrap : Elapsed 0.714 ms (71.355 ms / 100) 50.366 -> 49.976 ( -0.77%) [ +0.04% +0.00% +0.03% / +0.08% -0.62% -0.77%] index_select linear : Elapsed 0.504 ms (50.387 ms / 100) 63.117 -> 63.213 ( +0.15%) [ +0.16% +0.20% +0.00% / +0.15% +1.61% +1.61%] index_select reverse : Elapsed 0.632 ms (63.221 ms / 100) 46.085 -> 46.158 ( +0.16%) [ +0.00% +0.00% +0.08% / +0.16% +1.43% +1.39%] index_select skip64 : Elapsed 0.461 ms (46.085 ms / 100) 46.058 -> 46.120 ( +0.13%) [ +0.04% +0.00% +0.12% / +0.13% +1.49% +1.44%] index_select skip256 : Elapsed 0.461 ms (46.076 ms / 100) 54.757 -> 54.278 ( -0.87%) [ +0.00% +0.02% +0.10% / +0.07% -0.74% -0.87%] index_select spread : Elapsed 0.548 ms (54.757 ms / 100) 70.695 -> 70.463 ( -0.33%) [ +0.08% +0.00% +0.16% / -0.33% +1.71% +1.61%] index_select strided 3 : Elapsed 0.708 ms (70.750 ms / 100) 63.439 -> 63.628 ( +0.30%) [ +0.10% +0.00% +0.17% / +0.30% +1.85% +1.89%] index_select strided 5 : Elapsed 0.635 ms (63.501 ms / 100) 74.118 -> 74.662 ( +0.73%) [ +0.69% +0.00% +0.52% / +0.73% +4.03% +3.93%] index_select strided 7 : Elapsed 0.746 ms (74.629 ms / 100) 71.367 -> 71.580 ( +0.30%) [ +0.42% +0.00% +0.17% / +0.30% +5.75% +5.81%] index_select strided 8 : Elapsed 0.717 ms (71.665 ms / 100) 73.231 -> 72.268 ( -1.32%) [ +0.73% +1.23% +0.00% / +0.27% -1.14% -1.32%] index_select random : Elapsed 0.738 ms (73.767 ms / 100) 53.358 -> 52.862 ( -0.93%) [ +0.00% +0.06% +0.10% / -0.03% -0.90% -0.93%] index_select random_sorted : Elapsed 0.534 ms (53.358 ms / 100) B = [150, 250, 50] (stride (1, 150, 37500)) dim = 2 fill_cnt = 15 6.447 -> 6.457 ( +0.16%) [ +0.05% +0.00% +0.00% / +0.16% +0.60% +0.45%] index_fill_ const : Elapsed 0.064 ms (6.450 ms / 100) 6.526 -> 6.523 ( -0.05%) [ +0.08% +0.00% +0.11% / +0.08% -0.05% -0.02%] index_fill_ linear : Elapsed 0.065 ms (6.531 ms / 100) 6.524 -> 6.529 ( +0.08%) [ +0.02% +0.00% +0.25% / +0.23% +0.18% +0.08%] index_fill_ reverse : Elapsed 0.065 ms (6.525 ms / 100) 6.424 -> 6.419 ( -0.08%) [ +0.03% +0.05% +0.00% / -0.08% +2.97% +2.94%] index_fill_ skip64 : Elapsed 0.064 ms (6.426 ms / 100) 6.427 -> 6.426 ( -0.02%) [ +0.02% +0.00% +0.09% / -0.02% +2.91% +2.77%] index_fill_ skip256 : Elapsed 0.064 ms (6.428 ms / 100) 6.523 -> 6.525 ( +0.03%) [ +0.00% +0.00% +0.09% / +0.03% +0.35% +0.06%] index_fill_ spread : Elapsed 0.065 ms (6.523 ms / 100) 6.517 -> 6.517 ( +0.00%) [ +0.09% +0.06% +0.00% / +0.00% +0.49% +0.25%] index_fill_ strided 3 : Elapsed 0.065 ms (6.523 ms / 100) 6.542 -> 6.508 ( -0.52%) [ +0.03% +0.06% +0.00% / +0.03% -0.52% -0.44%] index_fill_ strided 5 : Elapsed 0.065 ms (6.544 ms / 100) 6.530 -> 6.492 ( -0.58%) [ +0.18% +0.05% +0.00% / -0.03% -0.58% -0.52%] index_fill_ strided 7 : Elapsed 0.065 ms (6.542 ms / 100) 6.520 -> 6.519 ( -0.02%) [ +0.00% +0.03% +0.02% / -0.02% +0.32% +0.00%] index_fill_ strided 8 : Elapsed 0.065 ms (6.520 ms / 100) 6.505 -> 6.511 ( +0.09%) [ +0.00% +0.03% +0.20% / +0.09% +0.20% +0.14%] index_fill_ strided 16 : Elapsed 0.065 ms (6.505 ms / 100) 6.532 -> 6.519 ( -0.20%) [ +0.12% +0.00% +0.00% / -0.20% +0.61% +0.35%] index_fill_ random : Elapsed 0.065 ms (6.540 ms / 100) 6.533 -> 6.547 ( +0.21%) [ +0.37% +0.28% +0.00% / +0.21% +0.44% +0.49%] index_fill_ random_sorted : Elapsed 0.066 ms (6.557 ms / 100) 6.554 -> 6.521 ( -0.50%) [ +0.03% +0.00% +0.05% / -0.31% -0.50% -0.20%] index_fill_ perm : Elapsed 0.066 ms (6.556 ms / 100) 6.521 -> 6.526 ( +0.08%) [ +0.00% +0.15% +0.15% / +0.37% +0.46% +0.08%] index_fill_ perm_sorted : Elapsed 0.065 ms (6.521 ms / 100) out_shape = [50, 15, 150] in_shape = [250, 15, 150] idx_dim = 0 B = [50, 15, 150] (stride (2250, 1, 15)) A = [250, 15, 150] (stride (2250, 150, 1)) dim = 0 8.777 -> 8.776 ( -0.01%) [ +0.00% +0.03% +0.08% / -0.01% +0.19% +0.16%] index_select const : Elapsed 0.088 ms (8.777 ms / 100) 9.085 -> 9.091 ( +0.07%) [ +0.10% +0.00% +0.09% / +0.17% +0.18% +0.07%] index_select wrap : Elapsed 0.091 ms (9.094 ms / 100) 9.088 -> 9.087 ( -0.01%) [ +0.21% +0.12% +0.00% / +0.28% +0.07% -0.01%] index_select linear : Elapsed 0.091 ms (9.107 ms / 100) 9.117 -> 9.132 ( +0.16%) [ +0.12% +0.00% +0.16% / +0.16% +0.33% +0.30%] index_select reverse : Elapsed 0.091 ms (9.128 ms / 100) 8.784 -> 8.766 ( -0.20%) [ +0.00% +0.01% +0.11% / +0.05% +0.17% -0.20%] index_select skip64 : Elapsed 0.088 ms (8.784 ms / 100) 8.773 -> 8.762 ( -0.13%) [ +0.25% +0.09% +0.00% / -0.13% +0.34% +0.22%] index_select skip256 : Elapsed 0.088 ms (8.795 ms / 100) 9.159 -> 9.134 ( -0.27%) [ +0.10% +0.14% +0.00% / +0.00% -0.26% -0.27%] index_select spread : Elapsed 0.092 ms (9.168 ms / 100) 9.138 -> 9.130 ( -0.09%) [ +0.00% +0.18% +0.18% / +0.12% -0.09% -0.03%] index_select strided 3 : Elapsed 0.091 ms (9.138 ms / 100) 9.160 -> 9.152 ( -0.09%) [ +0.00% +0.14% +0.10% / -0.01% -0.09% -0.07%] index_select strided 5 : Elapsed 0.092 ms (9.160 ms / 100) 9.154 -> 9.164 ( +0.11%) [ +0.04% +0.00% +0.01% / +0.11% +0.33% +0.21%] index_select strided 7 : Elapsed 0.092 ms (9.158 ms / 100) 9.111 -> 9.106 ( -0.05%) [ +0.08% +0.00% +0.10% / -0.05% +0.72% +0.56%] index_select strided 8 : Elapsed 0.091 ms (9.118 ms / 100) 9.136 -> 9.139 ( +0.03%) [ +0.00% +0.07% +0.08% / +0.03% +0.61% +0.47%] index_select strided 16 : Elapsed 0.091 ms (9.136 ms / 100) 9.133 -> 9.143 ( +0.11%) [ +0.00% +0.15% +0.02% / +0.11% +0.47% +0.19%] index_select strided 64 : Elapsed 0.091 ms (9.133 ms / 100) 8.843 -> 8.857 ( +0.16%) [ +0.00% +0.23% +0.40% / +0.16% +0.53% +0.51%] index_select strided 100 : Elapsed 0.088 ms (8.843 ms / 100) 9.158 -> 9.161 ( +0.03%) [ +0.39% +0.11% +0.00% / +0.03% +0.26% +0.12%] index_select random : Elapsed 0.092 ms (9.194 ms / 100) 9.117 -> 9.136 ( +0.21%) [ +0.27% +0.00% +0.27% / +0.21% +0.34% +0.49%] index_select random_sorted : Elapsed 0.091 ms (9.142 ms / 100) 9.151 -> 9.153 ( +0.02%) [ +0.15% +0.00% +0.07% / +0.02% +0.07% +0.23%] index_select perm : Elapsed 0.092 ms (9.165 ms / 100) 9.142 -> 9.137 ( -0.05%) [ +0.10% +0.00% +0.16% / -0.05% +0.21% +0.02%] index_select perm_sorted : Elapsed 0.092 ms (9.151 ms / 100) B = [50, 15, 150] (stride (1, 7500, 50)) dim = 0 fill_cnt = 250 GOOD 6.432 -> 3.920 (-39.05%) [ +0.05% +0.00% +0.12% / -38.60% -39.02% -39.05%] index_fill_ const : Elapsed 0.064 ms (6.435 ms / 100) GOOD 6.489 -> 3.969 (-38.83%) [ +0.23% +0.00% +0.14% / -38.80% -38.83% -38.82%] index_fill_ linear : Elapsed 0.065 ms (6.504 ms / 100) GOOD 6.466 -> 4.014 (-37.92%) [ +0.00% +0.15% +0.00% / -37.83% -37.92% -37.91%] index_fill_ reverse : Elapsed 0.065 ms (6.466 ms / 100) GOOD 6.428 -> 3.963 (-38.35%) [ +0.03% +0.12% +0.00% / -38.35% -38.32% -38.30%] index_fill_ skip64 : Elapsed 0.064 ms (6.430 ms / 100) GOOD 6.423 -> 3.944 (-38.60%) [ +0.00% +0.25% +0.23% / -38.60% -38.55% -38.53%] index_fill_ skip256 : Elapsed 0.064 ms (6.423 ms / 100) GOOD 6.422 -> 3.957 (-38.38%) [ +0.00% +0.11% +0.12% / -38.38% -38.27% -38.37%] index_fill_ spread : Elapsed 0.064 ms (6.422 ms / 100) GOOD 6.543 -> 4.216 (-35.56%) [ +0.06% +0.00% +0.09% / -35.56% -35.18% -35.14%] index_fill_ strided 3 : Elapsed 0.065 ms (6.547 ms / 100) GOOD 6.734 -> 4.366 (-35.16%) [ +0.00% +0.16% +0.18% / -35.16% -34.73% -34.72%] index_fill_ strided 5 : Elapsed 0.067 ms (6.734 ms / 100) GOOD 6.528 -> 4.308 (-34.01%) [ +0.00% +0.03% +0.03% / -33.92% -33.62% -34.01%] index_fill_ strided 7 : Elapsed 0.065 ms (6.528 ms / 100) GOOD 6.695 -> 4.577 (-31.64%) [ +0.06% +0.12% +0.00% / -31.64% -31.43% -31.59%] index_fill_ strided 8 : Elapsed 0.067 ms (6.699 ms / 100) GOOD 6.665 -> 4.522 (-32.15%) [ +0.00% +0.23% +0.15% / -32.15% -31.94% -31.96%] index_fill_ strided 16 : Elapsed 0.067 ms (6.665 ms / 100) GOOD 6.510 -> 4.277 (-34.30%) [ +0.02% +0.05% +0.00% / -34.30% -34.29% -34.24%] index_fill_ random : Elapsed 0.065 ms (6.511 ms / 100) GOOD 6.444 -> 3.987 (-38.13%) [ +0.02% +0.00% +0.08% / -38.13% -37.86% -37.80%] index_fill_ random_sorted : Elapsed 0.064 ms (6.445 ms / 100) out_shape = [250, 50, 150] in_shape = [250, 15, 150] idx_dim = 1 B = [250, 50, 150] (stride (7500, 150, 1)) dim = 1 fill_cnt = 15 3.707 -> 3.705 ( -0.05%) [ +0.05% +0.00% +0.05% / -0.05% +0.24% +0.46%] index_fill_ const : Elapsed 0.037 ms (3.709 ms / 100) 3.758 -> 3.776 ( +0.48%) [ +0.40% +0.77% +0.00% / +0.48% +0.64% +0.72%] index_fill_ linear : Elapsed 0.038 ms (3.773 ms / 100) 3.761 -> 3.766 ( +0.13%) [ +0.11% +0.00% +0.11% / +0.13% +0.43% +0.40%] index_fill_ reverse : Elapsed 0.038 ms (3.765 ms / 100) 3.696 -> 3.710 ( +0.38%) [ +0.30% +0.00% +0.30% / +0.38% +0.65% +0.54%] index_fill_ skip64 : Elapsed 0.037 ms (3.707 ms / 100) 3.706 -> 3.707 ( +0.03%) [ +0.00% +0.08% +0.16% / +0.03% +0.84% +0.30%] index_fill_ skip256 : Elapsed 0.037 ms (3.706 ms / 100) 3.809 -> 3.808 ( -0.03%) [ +0.16% +0.00% +0.11% / -0.03% +0.53% +0.50%] index_fill_ spread : Elapsed 0.038 ms (3.815 ms / 100) 3.793 -> 3.796 ( +0.08%) [ +0.26% +0.29% +0.00% / +0.08% +0.76% +0.71%] index_fill_ strided 3 : Elapsed 0.038 ms (3.803 ms / 100) 3.807 -> 3.798 ( -0.24%) [ +0.21% +0.03% +0.00% / +0.18% -0.05% -0.24%] index_fill_ strided 5 : Elapsed 0.038 ms (3.815 ms / 100) 3.803 -> 3.797 ( -0.16%) [ +0.16% +0.00% +0.05% / -0.11% +0.03% -0.16%] index_fill_ strided 7 : Elapsed 0.038 ms (3.809 ms / 100) 3.817 -> 3.803 ( -0.37%) [ +0.05% +0.21% +0.00% / +0.29% +0.05% -0.37%] index_fill_ strided 8 : Elapsed 0.038 ms (3.819 ms / 100) 3.819 -> 3.819 ( +0.00%) [ +0.13% +0.10% +0.00% / +0.00% +0.31% +0.03%] index_fill_ strided 16 : Elapsed 0.038 ms (3.824 ms / 100) 3.777 -> 3.788 ( +0.29%) [ +0.21% +0.34% +0.00% / +0.29% +0.87% +0.74%] index_fill_ random : Elapsed 0.038 ms (3.785 ms / 100) 3.788 -> 3.792 ( +0.11%) [ +0.34% +0.26% +0.00% / +0.11% +0.53% +0.61%] index_fill_ random_sorted : Elapsed 0.038 ms (3.801 ms / 100) 3.801 -> 3.792 ( -0.24%) [ +0.00% +0.16% +0.29% / +0.45% +0.03% -0.24%] index_fill_ perm : Elapsed 0.038 ms (3.801 ms / 100) 3.807 -> 3.803 ( -0.11%) [ +0.00% +0.45% +0.32% / +0.24% +0.21% -0.11%] index_fill_ perm_sorted : Elapsed 0.038 ms (3.807 ms / 100) B = [250, 50, 150] (stride (7500, 150, 1)) A = [250, 15, 150] (stride (1, 37500, 250)) dim = 1 11.110 -> 10.923 ( -1.68%) [ +0.23% +0.00% +0.17% / +0.33% -1.68% -1.48%] index_add_ linear : Elapsed 0.111 ms (11.136 ms / 100) 10.004 -> 9.888 ( -1.16%) [ +0.30% +0.00% +0.16% / +0.16% -0.94% -1.16%] index_copy_ linear : Elapsed 0.100 ms (10.034 ms / 100) 11.153 -> 10.869 ( -2.55%) [ +0.00% +0.09% +0.05% / +0.15% -2.55% -2.41%] index_add_ reverse : Elapsed 0.112 ms (11.153 ms / 100) 10.106 -> 9.875 ( -2.29%) [ +0.07% +0.17% +0.00% / +0.11% -2.29% -2.19%] index_copy_ reverse : Elapsed 0.101 ms (10.113 ms / 100) 11.224 -> 11.189 ( -0.31%) [ +0.00% +0.19% +0.22% / +0.12% -0.31% -0.14%] index_add_ spread : Elapsed 0.112 ms (11.224 ms / 100) 10.253 -> 10.233 ( -0.20%) [ +0.00% +0.11% +0.17% / +0.16% -0.20% -0.11%] index_copy_ spread : Elapsed 0.103 ms (10.253 ms / 100) 11.272 -> 11.150 ( -1.08%) [ +0.08% +0.00% +0.23% / +0.17% -0.96% -1.08%] index_add_ strided 3 : Elapsed 0.113 ms (11.281 ms / 100) 10.276 -> 10.180 ( -0.93%) [ +0.11% +0.00% +0.13% / +0.22% -0.93% -0.92%] index_copy_ strided 3 : Elapsed 0.103 ms (10.287 ms / 100) 11.554 -> 11.091 ( -4.01%) [ +0.00% +0.03% +0.04% / +0.07% -3.87% -4.01%] index_add_ strided 7 : Elapsed 0.116 ms (11.554 ms / 100) 10.521 -> 10.149 ( -3.54%) [ +0.00% +0.24% +0.17% / +0.29% -3.54% -3.46%] index_copy_ strided 7 : Elapsed 0.105 ms (10.521 ms / 100) 11.235 -> 11.195 ( -0.36%) [ +0.10% +0.00% +0.21% / +0.14% -0.36% -0.08%] index_add_ perm : Elapsed 0.112 ms (11.246 ms / 100) 10.228 -> 10.239 ( +0.11%) [ +0.00% +0.01% +0.23% / +0.11% +0.49% +0.23%] index_copy_ perm : Elapsed 0.102 ms (10.228 ms / 100) 11.221 -> 11.101 ( -1.07%) [ +0.00% +0.02% +0.13% / -0.01% -1.07% -0.91%] index_add_ perm_sorted : Elapsed 0.112 ms (11.221 ms / 100) 10.180 -> 10.127 ( -0.52%) [ +0.00% +0.03% +0.09% / -0.07% -0.51% -0.52%] index_copy_ perm_sorted : Elapsed 0.102 ms (10.180 ms / 100) 23.662 -> 23.075 ( -2.48%) [ +0.29% +0.00% +0.11% / +0.27% -2.48% -2.23%] index_select const : Elapsed 0.237 ms (23.731 ms / 100) 38.817 -> 38.729 ( -0.23%) [ +0.50% +0.00% +0.08% / -0.23% +2.79% +1.52%] index_select wrap : Elapsed 0.390 ms (39.012 ms / 100) 24.204 -> 24.153 ( -0.21%) [ +0.40% +0.00% +0.07% / +0.32% -0.10% -0.21%] index_select linear : Elapsed 0.243 ms (24.302 ms / 100) 40.501 -> 40.525 ( +0.06%) [ +0.00% +0.60% +0.90% / +0.06% +1.02% +1.06%] index_select reverse : Elapsed 0.405 ms (40.501 ms / 100) 23.894 -> 23.362 ( -2.23%) [ +0.36% +1.20% +0.00% / +0.77% -1.74% -2.23%] index_select skip64 : Elapsed 0.240 ms (23.980 ms / 100) 23.880 -> 23.083 ( -3.34%) [ +0.58% +0.03% +0.00% / +0.57% -3.34% -3.12%] index_select skip256 : Elapsed 0.240 ms (24.019 ms / 100) 29.328 -> 28.302 ( -3.50%) [ +0.00% +0.01% +0.55% / +0.15% -3.23% -3.50%] index_select spread : Elapsed 0.293 ms (29.328 ms / 100) 34.842 -> 34.577 ( -0.76%) [ +0.42% +0.25% +0.00% / +0.51% -0.71% -0.76%] index_select strided 3 : Elapsed 0.350 ms (34.988 ms / 100) 28.475 -> 28.634 ( +0.56%) [ +0.42% +0.67% +0.00% / +0.56% +3.02% +3.23%] index_select strided 5 : Elapsed 0.286 ms (28.594 ms / 100) 36.470 -> 36.655 ( +0.51%) [ +0.00% +2.29% +0.43% / +0.51% +2.67% +4.15%] index_select strided 7 : Elapsed 0.365 ms (36.470 ms / 100) 36.877 -> 37.373 ( +1.35%) [ +1.09% +1.12% +0.00% / +1.35% +4.40% +4.39%] index_select strided 8 : Elapsed 0.373 ms (37.279 ms / 100) good 37.172 -> 34.977 ( -5.90%) [ +0.10% +1.21% +0.00% / -0.22% -5.90% -5.31%] index_select random : Elapsed 0.372 ms (37.211 ms / 100) 29.304 -> 28.063 ( -4.23%) [ +0.22% +0.00% +0.89% / +0.85% -4.23% -3.86%] index_select random_sorted : Elapsed 0.294 ms (29.369 ms / 100) B = [250, 50, 150] (stride (7500, 1, 50)) A = [250, 15, 150] (stride (2250, 1, 15)) dim = 1 79.600 -> 79.784 ( +0.23%) [ +0.00% +0.13% +0.38% / +0.23% +0.26% +0.30%] index_add_ linear : Elapsed 0.796 ms (79.600 ms / 100) 61.646 -> 61.638 ( -0.01%) [ +0.00% +0.00% +0.18% / -0.01% +0.12% +0.15%] index_copy_ linear : Elapsed 0.616 ms (61.646 ms / 100) 79.637 -> 79.694 ( +0.07%) [ +0.14% +0.00% +0.09% / +0.07% +0.39% +0.10%] index_add_ reverse : Elapsed 0.797 ms (79.750 ms / 100) 61.601 -> 61.642 ( +0.07%) [ +0.17% +0.17% +0.00% / +0.07% +0.38% +0.16%] index_copy_ reverse : Elapsed 0.617 ms (61.704 ms / 100) 79.748 -> 79.666 ( -0.10%) [ +0.00% +0.06% +0.04% / -0.10% +0.18% +0.14%] index_add_ spread : Elapsed 0.797 ms (79.748 ms / 100) 61.620 -> 61.662 ( +0.07%) [ +0.00% +0.21% +0.07% / +0.17% +0.07% +0.19%] index_copy_ spread : Elapsed 0.616 ms (61.620 ms / 100) 79.510 -> 79.647 ( +0.17%) [ +0.39% +0.22% +0.00% / +0.17% +0.33% +0.52%] index_add_ strided 3 : Elapsed 0.798 ms (79.823 ms / 100) 61.686 -> 61.675 ( -0.02%) [ +0.01% +0.05% +0.00% / -0.02% +0.17% +0.08%] index_copy_ strided 3 : Elapsed 0.617 ms (61.694 ms / 100) 79.455 -> 79.547 ( +0.12%) [ +0.25% +0.00% +0.04% / +0.12% +0.13% +0.51%] index_add_ strided 7 : Elapsed 0.797 ms (79.653 ms / 100) 61.577 -> 61.601 ( +0.04%) [ +0.10% +0.00% +0.04% / +0.04% +0.19% +0.19%] index_copy_ strided 7 : Elapsed 0.616 ms (61.641 ms / 100) 79.617 -> 79.438 ( -0.22%) [ +0.59% +0.02% +0.00% / +0.17% +0.27% -0.22%] index_add_ perm : Elapsed 0.801 ms (80.084 ms / 100) 61.614 -> 61.728 ( +0.19%) [ +0.07% +0.00% +0.14% / +0.19% +0.24% +0.19%] index_copy_ perm : Elapsed 0.617 ms (61.656 ms / 100) 79.599 -> 79.777 ( +0.22%) [ +0.00% +0.23% +0.17% / +0.36% +0.22% +0.23%] index_add_ perm_sorted : Elapsed 0.796 ms (79.599 ms / 100) 61.544 -> 61.592 ( +0.08%) [ +0.00% +0.04% +0.01% / +0.08% +0.42% +0.39%] index_copy_ perm_sorted : Elapsed 0.615 ms (61.544 ms / 100) BEST 196.261 -> 23.575 (-87.99%) [ +0.19% +0.00% +0.07% / -87.99% -87.80% -87.81%] index_select const : Elapsed 1.966 ms (196.640 ms / 100) BEST 198.000 -> 22.498 (-88.64%) [ +0.05% +0.01% +0.00% / -88.57% -88.63% -88.64%] index_select wrap : Elapsed 1.981 ms (198.092 ms / 100) BEST 196.598 -> 22.309 (-88.65%) [ +0.09% +0.00% +0.02% / -88.56% -88.65% -88.64%] index_select linear : Elapsed 1.968 ms (196.781 ms / 100) BEST 197.101 -> 22.574 (-88.55%) [ +0.00% +0.22% +0.01% / -88.34% -88.54% -88.55%] index_select reverse : Elapsed 1.971 ms (197.101 ms / 100) BEST 196.433 -> 22.195 (-88.70%) [ +0.21% +0.00% +0.06% / -88.46% -88.69% -88.70%] index_select skip64 : Elapsed 1.968 ms (196.842 ms / 100) BEST 196.850 -> 23.333 (-88.15%) [ +0.13% +0.00% +0.02% / -87.97% -88.14% -88.15%] index_select skip256 : Elapsed 1.971 ms (197.109 ms / 100) BEST 196.867 -> 23.328 (-88.15%) [ +0.18% +0.00% +0.14% / -88.12% -88.15% -88.15%] index_select spread : Elapsed 1.972 ms (197.223 ms / 100) BEST 198.361 -> 23.462 (-88.17%) [ +0.28% +0.04% +0.00% / -88.13% -88.17% -88.17%] index_select strided 3 : Elapsed 1.989 ms (198.924 ms / 100) BEST 198.663 -> 22.762 (-88.54%) [ +0.00% +0.00% +0.14% / -88.47% -88.54% -88.54%] index_select strided 5 : Elapsed 1.987 ms (198.663 ms / 100) BEST 199.008 -> 22.639 (-88.62%) [ +0.00% +0.02% +0.05% / -88.52% -88.62% -88.62%] index_select strided 7 : Elapsed 1.990 ms (199.008 ms / 100) BEST 198.930 -> 22.182 (-88.85%) [ +0.00% +0.23% +0.25% / -88.76% -88.85% -88.85%] index_select strided 8 : Elapsed 1.989 ms (198.930 ms / 100) BEST 198.326 -> 22.156 (-88.83%) [ +0.00% +0.20% +0.03% / -88.80% -88.83% -88.83%] index_select random : Elapsed 1.983 ms (198.326 ms / 100) BEST 196.808 -> 22.197 (-88.72%) [ +0.00% +0.12% +0.11% / -88.72% -88.69% -88.69%] index_select random_sorted : Elapsed 1.968 ms (196.808 ms / 100) B = [250, 50, 150] (stride (150, 37500, 1)) A = [250, 15, 150] (stride (15, 1, 3750)) dim = 1 32.022 -> 32.125 ( +0.32%) [ +0.54% +0.00% +0.02% / +0.32% +0.81% +0.87%] index_add_ linear : Elapsed 0.322 ms (32.196 ms / 100) 29.594 -> 29.850 ( +0.87%) [ +0.82% +0.00% +0.66% / +0.95% +0.92% +0.87%] index_copy_ linear : Elapsed 0.298 ms (29.838 ms / 100) 32.122 -> 32.078 ( -0.14%) [ +0.44% +0.55% +0.00% / -0.14% +1.34% +1.27%] index_add_ reverse : Elapsed 0.323 ms (32.264 ms / 100) 29.754 -> 29.710 ( -0.15%) [ +0.11% +0.29% +0.00% / -0.15% +1.01% +1.34%] index_copy_ reverse : Elapsed 0.298 ms (29.787 ms / 100) 31.974 -> 32.088 ( +0.36%) [ +0.29% +0.00% +0.31% / +0.36% +1.31% +1.32%] index_add_ spread : Elapsed 0.321 ms (32.067 ms / 100) 29.591 -> 29.779 ( +0.64%) [ +0.26% +0.00% +0.19% / +0.64% +1.53% +1.36%] index_copy_ spread : Elapsed 0.297 ms (29.669 ms / 100) 32.142 -> 32.207 ( +0.20%) [ +0.16% +0.00% +0.44% / +0.20% +1.46% +1.61%] index_add_ strided 3 : Elapsed 0.322 ms (32.194 ms / 100) 29.802 -> 29.858 ( +0.19%) [ +0.45% +0.14% +0.00% / +0.19% +1.05% +1.31%] index_copy_ strided 3 : Elapsed 0.299 ms (29.937 ms / 100) 32.088 -> 32.185 ( +0.30%) [ +0.63% +0.27% +0.00% / +0.30% +0.99% +1.28%] index_add_ strided 7 : Elapsed 0.323 ms (32.289 ms / 100) 29.643 -> 29.684 ( +0.14%) [ +0.91% +0.00% +0.05% / +0.14% +1.20% +1.30%] index_copy_ strided 7 : Elapsed 0.299 ms (29.912 ms / 100) 32.270 -> 32.226 ( -0.14%) [ +0.00% +0.11% +0.19% / -0.14% +0.44% +0.65%] index_add_ perm : Elapsed 0.323 ms (32.270 ms / 100) 29.969 -> 29.957 ( -0.04%) [ +0.00% +0.25% +0.30% / -0.04% +0.20% +0.53%] index_copy_ perm : Elapsed 0.300 ms (29.969 ms / 100) 32.228 -> 32.300 ( +0.22%) [ +0.94% +0.00% +0.47% / +0.22% +0.27% +0.32%] index_add_ perm_sorted : Elapsed 0.325 ms (32.530 ms / 100) 30.045 -> 29.980 ( -0.22%) [ +0.04% +0.00% +0.14% / -0.17% -0.22% -0.13%] index_copy_ perm_sorted : Elapsed 0.301 ms (30.056 ms / 100) 103.106 -> 103.483 ( +0.37%) [ +0.18% +0.57% +0.00% / +0.37% +0.43% +0.53%] index_select const : Elapsed 1.033 ms (103.291 ms / 100) 106.240 -> 106.246 ( +0.01%) [ +0.02% +0.14% +0.00% / +0.06% +0.01% +0.16%] index_select wrap : Elapsed 1.063 ms (106.262 ms / 100) 103.355 -> 103.024 ( -0.32%) [ +0.64% +0.10% +0.00% / +0.49% +0.27% -0.32%] index_select linear : Elapsed 1.040 ms (104.017 ms / 100) 104.330 -> 104.052 ( -0.27%) [ +0.00% +0.30% +0.36% / -0.27% +0.28% -0.12%] index_select reverse : Elapsed 1.043 ms (104.330 ms / 100) 103.189 -> 102.856 ( -0.32%) [ +0.00% +0.37% +0.05% / +0.19% -0.13% -0.32%] index_select skip64 : Elapsed 1.032 ms (103.189 ms / 100) 103.750 -> 102.831 ( -0.89%) [ +0.00% +0.29% +0.03% / +0.03% -0.89% -0.64%] index_select skip256 : Elapsed 1.037 ms (103.750 ms / 100) 104.117 -> 103.471 ( -0.62%) [ +0.15% +0.00% +0.02% / +0.09% -0.18% -0.62%] index_select spread : Elapsed 1.043 ms (104.272 ms / 100) 110.092 -> 110.315 ( +0.20%) [ +0.16% +0.00% +0.27% / +0.20% +0.48% +0.29%] index_select strided 3 : Elapsed 1.103 ms (110.268 ms / 100) 111.624 -> 112.033 ( +0.37%) [ +0.00% +0.32% +0.28% / +0.37% +0.38% +0.76%] index_select strided 5 : Elapsed 1.116 ms (111.624 ms / 100) 113.733 -> 113.587 ( -0.13%) [ +0.17% +0.00% +0.15% / +0.26% +0.09% -0.13%] index_select strided 7 : Elapsed 1.139 ms (113.921 ms / 100) 113.433 -> 113.336 ( -0.09%) [ +0.12% +0.00% +0.18% / +0.13% -0.09% -0.09%] index_select strided 8 : Elapsed 1.136 ms (113.571 ms / 100) 109.475 -> 108.930 ( -0.50%) [ +0.00% +0.16% +0.05% / -0.02% -0.50% -0.42%] index_select random : Elapsed 1.095 ms (109.475 ms / 100) 104.049 -> 103.734 ( -0.30%) [ +0.00% +0.28% +0.03% / -0.11% -0.03% -0.30%] index_select random_sorted : Elapsed 1.040 ms (104.049 ms / 100) B = [250, 50, 150] (stride (1, 37500, 250)) A = [250, 15, 150] (stride (1, 37500, 250)) dim = 1 13.030 -> 13.092 ( +0.48%) [ +0.00% +0.18% +0.29% / +0.48% +0.82% +0.49%] index_add_ linear : Elapsed 0.130 ms (13.030 ms / 100) 11.739 -> 11.755 ( +0.14%) [ +0.12% +0.03% +0.00% / +0.21% +0.14% +0.14%] index_copy_ linear : Elapsed 0.118 ms (11.753 ms / 100) 13.084 -> 13.060 ( -0.18%) [ +0.00% +0.01% +0.22% / +0.02% -0.05% -0.18%] index_add_ reverse : Elapsed 0.131 ms (13.084 ms / 100) 11.700 -> 11.687 ( -0.11%) [ +0.00% +0.06% +0.22% / +0.01% +0.13% -0.11%] index_copy_ reverse : Elapsed 0.117 ms (11.700 ms / 100) 13.121 -> 13.111 ( -0.08%) [ +0.03% +0.11% +0.00% / -0.08% +0.30% +0.45%] index_add_ spread : Elapsed 0.131 ms (13.125 ms / 100) 11.744 -> 11.694 ( -0.43%) [ +0.21% +0.00% +0.06% / +0.01% -0.43% -0.25%] index_copy_ spread : Elapsed 0.118 ms (11.769 ms / 100) 13.173 -> 13.168 ( -0.04%) [ +0.00% +0.37% +0.14% / -0.04% +0.80% +1.02%] index_add_ strided 3 : Elapsed 0.132 ms (13.173 ms / 100) 11.714 -> 11.737 ( +0.20%) [ +0.12% +0.00% +0.26% / +0.52% +0.20% +0.38%] index_copy_ strided 3 : Elapsed 0.117 ms (11.728 ms / 100) 13.088 -> 13.021 ( -0.51%) [ +0.00% +0.11% +0.08% / -0.04% -0.51% -0.49%] index_add_ strided 7 : Elapsed 0.131 ms (13.088 ms / 100) 11.712 -> 11.699 ( -0.11%) [ +0.19% +0.00% +0.10% / -0.11% +0.12% -0.02%] index_copy_ strided 7 : Elapsed 0.117 ms (11.734 ms / 100) 13.158 -> 13.198 ( +0.30%) [ +0.48% +0.59% +0.00% / +0.52% +0.30% +0.44%] index_add_ perm : Elapsed 0.132 ms (13.221 ms / 100) 11.772 -> 11.716 ( -0.48%) [ +0.08% +0.00% +0.13% / +0.14% -0.48% -0.32%] index_copy_ perm : Elapsed 0.118 ms (11.782 ms / 100) 13.137 -> 13.123 ( -0.11%) [ +0.00% +0.18% +0.54% / -0.11% +0.56% +0.81%] index_add_ perm_sorted : Elapsed 0.131 ms (13.137 ms / 100) 11.795 -> 11.731 ( -0.54%) [ +0.00% +0.19% +0.18% / +0.06% -0.53% -0.54%] index_copy_ perm_sorted : Elapsed 0.118 ms (11.795 ms / 100) 44.753 -> 45.817 ( +2.38%) [ +1.21% +0.00% +0.76% / +2.38% +13.04% +12.37%] index_select const : Elapsed 0.453 ms (45.296 ms / 100) 59.844 -> 61.577 ( +2.90%) [ +0.64% +0.50% +0.00% / +2.90% +3.19% +2.91%] index_select wrap : Elapsed 0.602 ms (60.226 ms / 100) 43.681 -> 44.270 ( +1.35%) [ +3.69% +0.42% +0.00% / +1.35% +2.18% +2.75%] index_select linear : Elapsed 0.453 ms (45.292 ms / 100) 57.351 -> 57.438 ( +0.15%) [ +0.00% +2.52% +2.32% / +2.20% +1.73% +0.15%] index_select reverse : Elapsed 0.574 ms (57.351 ms / 100) 43.588 -> 44.166 ( +1.33%) [ +1.00% +0.99% +0.00% / +1.33% +8.12% +7.67%] index_select skip64 : Elapsed 0.440 ms (44.026 ms / 100) 47.288 -> 47.316 ( +0.06%) [ +0.00% +0.13% +0.64% / +0.06% +4.53% +4.57%] index_select skip256 : Elapsed 0.473 ms (47.288 ms / 100) good 45.017 -> 41.929 ( -6.86%) [ +0.57% +0.40% +0.00% / -0.24% -6.86% -6.38%] index_select spread : Elapsed 0.453 ms (45.274 ms / 100) 57.399 -> 57.226 ( -0.30%) [ +0.61% +0.00% +0.05% / -0.30% +2.34% +4.04%] index_select strided 3 : Elapsed 0.577 ms (57.750 ms / 100) 42.365 -> 42.504 ( +0.33%) [ +0.65% +0.00% +0.31% / +0.33% +22.81% +21.05%] index_select strided 5 : Elapsed 0.426 ms (42.642 ms / 100) 55.712 -> 56.217 ( +0.91%) [ +0.00% +3.37% +1.79% / +0.91% +2.66% +1.81%] index_select strided 7 : Elapsed 0.557 ms (55.712 ms / 100) 56.475 -> 55.106 ( -2.42%) [ +0.16% +0.00% +0.02% / -1.38% -1.13% -2.42%] index_select strided 8 : Elapsed 0.566 ms (56.564 ms / 100) 62.409 -> 61.441 ( -1.55%) [ +2.04% +0.00% +0.44% / -1.05% -0.22% -1.55%] index_select random : Elapsed 0.637 ms (63.682 ms / 100) 45.878 -> 46.516 ( +1.39%) [ +0.00% +0.60% +0.71% / +1.39% +2.90% +4.25%] index_select random_sorted : Elapsed 0.459 ms (45.878 ms / 100) B = [250, 50, 150] (stride (50, 1, 12500)) A = [250, 15, 150] (stride (1, 37500, 250)) dim = 1 99.490 -> 99.613 ( +0.12%) [ +0.16% +0.00% +0.07% / +0.12% +0.76% +0.58%] index_add_ linear : Elapsed 0.996 ms (99.650 ms / 100) 61.346 -> 61.323 ( -0.04%) [ +0.24% +0.15% +0.00% / -0.04% +0.87% +0.94%] index_copy_ linear : Elapsed 0.615 ms (61.493 ms / 100) 99.419 -> 99.513 ( +0.09%) [ +0.09% +0.00% +0.11% / +0.09% +0.61% +0.56%] index_add_ reverse : Elapsed 0.995 ms (99.507 ms / 100) 61.238 -> 61.311 ( +0.12%) [ +0.07% +0.00% +0.04% / +0.12% +0.79% +0.66%] index_copy_ reverse : Elapsed 0.613 ms (61.279 ms / 100) 99.416 -> 99.562 ( +0.15%) [ +0.00% +0.04% +0.04% / +0.15% +0.48% +0.56%] index_add_ spread : Elapsed 0.994 ms (99.416 ms / 100) 61.232 -> 61.326 ( +0.15%) [ +0.16% +0.05% +0.00% / +0.15% +0.51% +0.42%] index_copy_ spread : Elapsed 0.613 ms (61.333 ms / 100) 99.413 -> 99.386 ( -0.03%) [ +0.00% +0.10% +0.11% / -0.03% +0.52% +0.50%] index_add_ strided 3 : Elapsed 0.994 ms (99.413 ms / 100) 61.383 -> 61.339 ( -0.07%) [ +0.00% +0.18% +0.23% / -0.07% +0.34% +0.36%] index_copy_ strided 3 : Elapsed 0.614 ms (61.383 ms / 100) 99.331 -> 99.330 ( -0.00%) [ +0.17% +0.00% +0.11% / -0.00% +0.32% +0.45%] index_add_ strided 7 : Elapsed 0.995 ms (99.496 ms / 100) 61.363 -> 61.207 ( -0.25%) [ +0.18% +0.00% +0.18% / -0.25% +0.02% +0.00%] index_copy_ strided 7 : Elapsed 0.615 ms (61.474 ms / 100) 98.982 -> 99.028 ( +0.05%) [ +0.06% +0.00% +0.09% / +0.05% +0.64% +0.51%] index_add_ perm : Elapsed 0.990 ms (99.038 ms / 100) 60.962 -> 60.966 ( +0.01%) [ +0.04% +0.00% +0.21% / +0.01% +0.61% +0.52%] index_copy_ perm : Elapsed 0.610 ms (60.987 ms / 100) 99.315 -> 99.509 ( +0.20%) [ +0.07% +0.00% +0.07% / +0.20% +0.52% +0.60%] index_add_ perm_sorted : Elapsed 0.994 ms (99.380 ms / 100) 61.126 -> 61.259 ( +0.22%) [ +0.14% +0.00% +0.17% / +0.22% +0.54% +0.61%] index_copy_ perm_sorted : Elapsed 0.612 ms (61.209 ms / 100) BEST 184.074 -> 24.373 (-86.76%) [ +0.06% +0.00% +0.04% / -86.76% -85.68% -85.72%] index_select const : Elapsed 1.842 ms (184.178 ms / 100) BEST 229.246 -> 38.737 (-83.10%) [ +0.06% +0.04% +0.00% / -83.10% -82.07% -82.02%] index_select wrap : Elapsed 2.294 ms (229.392 ms / 100) BEST 189.417 -> 37.849 (-80.02%) [ +0.00% +0.15% +0.15% / -80.02% -78.85% -78.94%] index_select linear : Elapsed 1.894 ms (189.417 ms / 100) BEST 203.385 -> 38.057 (-81.29%) [ +0.00% +0.19% +0.24% / -81.29% -80.50% -80.49%] index_select reverse : Elapsed 2.034 ms (203.385 ms / 100) BEST 184.017 -> 23.796 (-87.07%) [ +0.05% +0.02% +0.00% / -87.07% -86.19% -86.19%] index_select skip64 : Elapsed 1.841 ms (184.104 ms / 100) BEST 184.260 -> 24.568 (-86.67%) [ +0.07% +0.15% +0.00% / -86.67% -85.93% -85.97%] index_select skip256 : Elapsed 1.844 ms (184.395 ms / 100) BEST 198.607 -> 37.590 (-81.07%) [ +0.09% +0.11% +0.00% / -81.07% -79.93% -79.88%] index_select spread : Elapsed 1.988 ms (198.789 ms / 100) BEST 232.323 -> 26.078 (-88.78%) [ +0.08% +0.03% +0.00% / -88.78% -88.11% -88.15%] index_select strided 3 : Elapsed 2.325 ms (232.502 ms / 100) BEST 217.393 -> 24.625 (-88.67%) [ +0.07% +0.11% +0.00% / -88.67% -87.53% -87.55%] index_select strided 5 : Elapsed 2.175 ms (217.543 ms / 100) BEST 230.145 -> 38.801 (-83.14%) [ +0.10% +0.00% +0.15% / -83.14% -82.19% -82.20%] index_select strided 7 : Elapsed 2.304 ms (230.365 ms / 100) BEST 230.215 -> 38.630 (-83.22%) [ +0.26% +0.01% +0.00% / -83.22% -82.15% -82.05%] index_select strided 8 : Elapsed 2.308 ms (230.822 ms / 100) BEST 220.760 -> 38.372 (-82.62%) [ +0.00% +0.12% +0.20% / -82.62% -81.60% -81.56%] index_select random : Elapsed 2.208 ms (220.760 ms / 100) BEST 197.808 -> 37.764 (-80.91%) [ +0.07% +0.00% +0.16% / -80.91% -79.77% -79.84%] index_select random_sorted : Elapsed 1.979 ms (197.949 ms / 100) B = [250, 50, 150] (stride (1, 250, 12500)) A = [250, 15, 150] (stride (2250, 150, 1)) dim = 1 17.520 -> 17.519 ( -0.01%) [ +0.26% +0.02% +0.00% / -0.01% +5.35% +5.08%] index_add_ linear : Elapsed 0.176 ms (17.565 ms / 100) 12.065 -> 12.207 ( +1.18%) [ +0.56% +0.00% +0.81% / +1.35% +1.18% +2.32%] index_copy_ linear : Elapsed 0.121 ms (12.133 ms / 100) 17.538 -> 17.521 ( -0.10%) [ +0.05% +0.00% +0.14% / -0.10% +5.19% +5.34%] index_add_ reverse : Elapsed 0.175 ms (17.547 ms / 100) 12.065 -> 12.015 ( -0.41%) [ +0.08% +0.32% +0.00% / -0.41% +0.99% -0.15%] index_copy_ reverse : Elapsed 0.121 ms (12.075 ms / 100) 16.829 -> 16.928 ( +0.59%) [ +0.29% +0.08% +0.00% / +0.59% +4.76% +4.69%] index_add_ spread : Elapsed 0.169 ms (16.877 ms / 100) 11.527 -> 11.516 ( -0.10%) [ +0.10% +0.00% +0.06% / -0.10% +4.15% +4.76%] index_copy_ spread : Elapsed 0.115 ms (11.538 ms / 100) 17.173 -> 17.217 ( +0.26%) [ +0.25% +0.00% +0.36% / +0.26% +2.11% +2.07%] index_add_ strided 3 : Elapsed 0.172 ms (17.216 ms / 100) 11.342 -> 11.392 ( +0.44%) [ +0.27% +0.00% +0.98% / +0.44% +4.20% +4.34%] index_copy_ strided 3 : Elapsed 0.114 ms (11.373 ms / 100) 16.489 -> 16.555 ( +0.40%) [ +0.00% +0.10% +0.52% / +0.40% +1.93% +2.37%] index_add_ strided 7 : Elapsed 0.165 ms (16.489 ms / 100) 12.109 -> 12.017 ( -0.76%) [ +0.79% +0.00% +0.36% / +0.69% -0.55% -0.76%] index_copy_ strided 7 : Elapsed 0.122 ms (12.205 ms / 100) 16.472 -> 16.536 ( +0.39%) [ +0.07% +0.10% +0.00% / +0.39% +6.12% +6.34%] index_add_ perm : Elapsed 0.165 ms (16.483 ms / 100) 11.642 -> 11.733 ( +0.78%) [ +0.10% +0.81% +0.00% / +0.78% +4.66% +4.98%] index_copy_ perm : Elapsed 0.117 ms (11.654 ms / 100) 17.193 -> 17.331 ( +0.80%) [ +0.04% +0.00% +0.18% / +0.80% +5.96% +6.28%] index_add_ perm_sorted : Elapsed 0.172 ms (17.200 ms / 100) 12.276 -> 11.700 ( -4.69%) [ +0.78% +0.00% +0.21% / +0.96% -4.69% -4.19%] index_copy_ perm_sorted : Elapsed 0.124 ms (12.372 ms / 100) 65.585 -> 66.140 ( +0.85%) [ +0.00% +2.59% +1.13% / +0.85% +8.81% +12.09%] index_select const : Elapsed 0.656 ms (65.585 ms / 100) 65.571 -> 65.061 ( -0.78%) [ +0.00% +0.60% +1.05% / -0.78% +15.16% +15.60%] index_select wrap : Elapsed 0.656 ms (65.571 ms / 100) 61.690 -> 63.352 ( +2.69%) [ +4.18% +2.39% +0.00% / +2.69% +15.29% +20.47%] index_select linear : Elapsed 0.643 ms (64.270 ms / 100) 59.538 -> 60.039 ( +0.84%) [ +0.00% +1.42% +0.86% / +0.84% +23.72% +22.84%] index_select reverse : Elapsed 0.595 ms (59.538 ms / 100) 63.007 -> 62.293 ( -1.13%) [ +0.00% +2.08% +2.76% / -1.13% +11.35% +11.10%] index_select skip64 : Elapsed 0.630 ms (63.007 ms / 100) 60.920 -> 62.366 ( +2.37%) [ +3.97% +0.00% +1.19% / +2.37% +16.11% +18.55%] index_select skip256 : Elapsed 0.633 ms (63.339 ms / 100) 63.299 -> 62.352 ( -1.50%) [ +0.00% +0.63% +0.46% / +0.79% +1.18% -1.50%] index_select spread : Elapsed 0.633 ms (63.299 ms / 100) 68.709 -> 68.137 ( -0.83%) [ +1.54% +0.00% +0.59% / -0.83% +8.13% +8.04%] index_select strided 3 : Elapsed 0.698 ms (69.767 ms / 100) 71.678 -> 72.567 ( +1.24%) [ +2.63% +0.56% +0.00% / +1.24% +9.84% +8.41%] index_select strided 5 : Elapsed 0.736 ms (73.560 ms / 100) 69.308 -> 67.791 ( -2.19%) [ +0.15% +0.00% +1.80% / -2.19% +9.06% +9.11%] index_select strided 7 : Elapsed 0.694 ms (69.411 ms / 100) 67.774 -> 67.555 ( -0.32%) [ +1.07% +0.00% +0.84% / -0.32% +12.36% +14.61%] index_select strided 8 : Elapsed 0.685 ms (68.496 ms / 100) 64.633 -> 65.814 ( +1.83%) [ +1.58% +3.72% +0.00% / +1.83% +17.52% +16.61%] index_select random : Elapsed 0.657 ms (65.653 ms / 100) 65.967 -> 67.218 ( +1.90%) [ +0.00% +1.90% +1.47% / +1.90% +5.04% +4.16%] index_select random_sorted : Elapsed 0.660 ms (65.967 ms / 100) out_shape = [250, 15, 50] in_shape = [250, 15, 150] idx_dim = 2 B = [250, 15, 50] (stride (50, 12500, 1)) A = [250, 15, 150] (stride (15, 1, 3750)) dim = 2 11.875 -> 11.893 ( +0.15%) [ +0.17% +0.10% +0.00% / +0.15% +0.72% +0.85%] index_select const : Elapsed 0.119 ms (11.895 ms / 100) 12.548 -> 12.564 ( +0.13%) [ +0.09% +0.00% +0.29% / +0.13% +0.46% +0.37%] index_select wrap : Elapsed 0.126 ms (12.559 ms / 100) 12.548 -> 12.573 ( +0.20%) [ +0.21% +0.00% +0.18% / +0.20% +0.35% +0.29%] index_select linear : Elapsed 0.126 ms (12.574 ms / 100) 12.719 -> 12.738 ( +0.15%) [ +0.00% +0.06% +0.23% / +0.15% +0.39% +0.21%] index_select reverse : Elapsed 0.127 ms (12.719 ms / 100) 11.889 -> 11.914 ( +0.21%) [ +0.01% +0.03% +0.00% / +0.21% +0.75% +0.41%] index_select skip64 : Elapsed 0.119 ms (11.890 ms / 100) 11.876 -> 11.886 ( +0.08%) [ +0.00% +0.24% +0.06% / +0.08% +0.69% +0.68%] index_select skip256 : Elapsed 0.119 ms (11.876 ms / 100) 12.722 -> 12.724 ( +0.02%) [ +0.12% +0.09% +0.00% / +0.02% +0.86% +1.02%] index_select spread : Elapsed 0.127 ms (12.737 ms / 100) 12.723 -> 12.734 ( +0.09%) [ +0.00% +0.00% +0.18% / +0.09% +0.91% +1.03%] index_select strided 3 : Elapsed 0.127 ms (12.723 ms / 100) 12.679 -> 12.705 ( +0.21%) [ +0.17% +0.18% +0.00% / +0.21% +1.03% +1.06%] index_select strided 5 : Elapsed 0.127 ms (12.701 ms / 100) 12.755 -> 12.760 ( +0.04%) [ +0.04% +0.09% +0.00% / +0.04% +0.39% +0.40%] index_select strided 7 : Elapsed 0.128 ms (12.760 ms / 100) 12.799 -> 12.800 ( +0.01%) [ +0.14% +0.00% +0.02% / +0.06% +0.01% +0.16%] index_select strided 8 : Elapsed 0.128 ms (12.817 ms / 100) 12.741 -> 12.739 ( -0.02%) [ +0.00% +0.14% +0.23% / -0.02% +0.42% +0.57%] index_select strided 16 : Elapsed 0.127 ms (12.741 ms / 100) 12.741 -> 12.753 ( +0.09%) [ +0.00% +0.03% +0.02% / +0.09% +0.66% +0.88%] index_select strided 64 : Elapsed 0.127 ms (12.741 ms / 100) 12.120 -> 12.120 ( +0.00%) [ +0.09% +0.01% +0.00% / +0.00% +0.46% +0.44%] index_select strided 100 : Elapsed 0.121 ms (12.131 ms / 100) 12.790 -> 12.697 ( -0.73%) [ +0.00% +0.08% +0.08% / +0.06% -0.66% -0.73%] index_select random : Elapsed 0.128 ms (12.790 ms / 100) 12.672 -> 12.655 ( -0.13%) [ +0.05% +0.00% +0.06% / +0.06% -0.13% -0.13%] index_select random_sorted : Elapsed 0.127 ms (12.678 ms / 100) 12.707 -> 12.741 ( +0.27%) [ +0.20% +0.00% +0.20% / +0.27% +1.35% +1.29%] index_select perm : Elapsed 0.127 ms (12.732 ms / 100) 12.707 -> 12.686 ( -0.17%) [ +0.03% +0.00% +0.05% / -0.17% +0.88% +0.75%] index_select perm_sorted : Elapsed 0.127 ms (12.711 ms / 100) B = [250, 15, 50] (stride (1, 250, 3750)) A = [250, 15, 150] (stride (2250, 1, 15)) dim = 2 6.340 -> 6.349 ( +0.14%) [ +0.25% +0.46% +0.00% / +0.24% +0.14% +0.36%] index_select const : Elapsed 0.064 ms (6.356 ms / 100) 6.885 -> 6.884 ( -0.01%) [ +0.31% +0.17% +0.00% / +0.25% -0.01% +0.20%] index_select wrap : Elapsed 0.069 ms (6.906 ms / 100) 6.877 -> 6.869 ( -0.12%) [ +0.36% +0.00% +0.26% / -0.12% +0.03% -0.10%] index_select linear : Elapsed 0.069 ms (6.902 ms / 100) 6.899 -> 6.857 ( -0.61%) [ +0.00% +0.01% +0.12% / +0.17% -0.61% -0.41%] index_select reverse : Elapsed 0.069 ms (6.899 ms / 100) 6.349 -> 6.352 ( +0.05%) [ +0.22% +0.25% +0.00% / +0.46% +0.05% +0.09%] index_select skip64 : Elapsed 0.064 ms (6.363 ms / 100) 6.298 -> 6.310 ( +0.19%) [ +0.10% +0.00% +0.22% / +0.25% +0.19% +0.27%] index_select skip256 : Elapsed 0.063 ms (6.304 ms / 100) 7.235 -> 7.241 ( +0.08%) [ +0.06% +0.29% +0.00% / +0.08% +0.17% +0.26%] index_select spread : Elapsed 0.072 ms (7.239 ms / 100) 7.235 -> 7.244 ( +0.12%) [ +0.28% +0.00% +0.01% / +0.12% +0.14% +0.25%] index_select strided 3 : Elapsed 0.073 ms (7.255 ms / 100) 7.298 -> 7.296 ( -0.03%) [ +0.05% +0.01% +0.00% / +0.19% -0.03% +0.42%] index_select strided 5 : Elapsed 0.073 ms (7.302 ms / 100) 7.358 -> 7.363 ( +0.07%) [ +0.18% +0.00% +0.04% / +0.08% +0.07% +0.29%] index_select strided 7 : Elapsed 0.074 ms (7.371 ms / 100) 7.390 -> 7.347 ( -0.58%) [ +0.01% +0.04% +0.00% / -0.03% -0.58% -0.50%] index_select strided 8 : Elapsed 0.074 ms (7.391 ms / 100) 7.401 -> 7.411 ( +0.14%) [ +0.24% +0.00% +0.32% / +0.18% +0.14% +0.20%] index_select strided 16 : Elapsed 0.074 ms (7.419 ms / 100) 7.342 -> 7.357 ( +0.20%) [ +0.18% +0.08% +0.00% / +0.20% +0.52% +0.49%] index_select strided 64 : Elapsed 0.074 ms (7.355 ms / 100) 6.417 -> 6.437 ( +0.31%) [ +0.16% +0.00% +0.05% / +0.31% +1.42% +1.25%] index_select strided 100 : Elapsed 0.064 ms (6.427 ms / 100) 7.339 -> 7.347 ( +0.11%) [ +0.00% +0.01% +0.19% / +0.11% +0.42% +0.15%] index_select random : Elapsed 0.073 ms (7.339 ms / 100) 7.100 -> 7.112 ( +0.17%) [ +0.06% +0.25% +0.00% / +0.30% +0.46% +0.17%] index_select random_sorted : Elapsed 0.071 ms (7.104 ms / 100) 7.378 -> 7.344 ( -0.46%) [ +0.00% +0.30% +0.38% / +0.14% -0.43% -0.46%] index_select perm : Elapsed 0.074 ms (7.378 ms / 100) 7.147 -> 7.115 ( -0.45%) [ +0.03% +0.00% +0.18% / +0.08% -0.28% -0.45%] index_select perm_sorted : Elapsed 0.071 ms (7.149 ms / 100) B = [250, 15, 50] (stride (1, 250, 3750)) A = [250, 15, 150] (stride (1, 250, 3750)) dim = 2 6.401 -> 6.414 ( +0.20%) [ +0.11% +0.00% +0.16% / +0.27% +0.36% +0.20%] index_select const : Elapsed 0.064 ms (6.408 ms / 100) 6.755 -> 6.747 ( -0.12%) [ +0.00% +0.34% +0.15% / +0.30% -0.12% +0.19%] index_select wrap : Elapsed 0.068 ms (6.755 ms / 100) 6.766 -> 6.738 ( -0.41%) [ +0.25% +0.22% +0.00% / +0.09% -0.41% +0.00%] index_select linear : Elapsed 0.068 ms (6.783 ms / 100) 6.757 -> 6.764 ( +0.10%) [ +0.31% +0.00% +0.25% / +0.10% +0.19% +0.15%] index_select reverse : Elapsed 0.068 ms (6.778 ms / 100) 6.403 -> 6.405 ( +0.03%) [ +0.12% +0.23% +0.00% / +0.03% +0.20% +0.22%] index_select skip64 : Elapsed 0.064 ms (6.411 ms / 100) 6.333 -> 6.332 ( -0.02%) [ +0.19% +0.00% +0.03% / -0.02% +0.77% +0.87%] index_select skip256 : Elapsed 0.063 ms (6.345 ms / 100) 6.866 -> 6.775 ( -1.33%) [ +0.07% +0.28% +0.00% / +0.36% -1.33% -1.30%] index_select spread : Elapsed 0.069 ms (6.871 ms / 100) 6.883 -> 6.787 ( -1.39%) [ +0.09% +0.15% +0.00% / +0.17% -1.31% -1.39%] index_select strided 3 : Elapsed 0.069 ms (6.889 ms / 100) 6.769 -> 6.769 ( +0.00%) [ +0.10% +0.22% +0.00% / +0.00% +0.66% +0.84%] index_select strided 5 : Elapsed 0.068 ms (6.776 ms / 100) 6.857 -> 6.859 ( +0.03%) [ +0.07% +0.00% +0.12% / +0.25% +0.03% +0.22%] index_select strided 7 : Elapsed 0.069 ms (6.862 ms / 100) 6.814 -> 6.818 ( +0.06%) [ +0.15% +0.00% +0.16% / +0.06% +0.41% +0.29%] index_select strided 8 : Elapsed 0.068 ms (6.824 ms / 100) 6.781 -> 6.779 ( -0.03%) [ +0.24% +0.00% +0.18% / +0.18% +0.52% -0.03%] index_select strided 16 : Elapsed 0.068 ms (6.797 ms / 100) 6.830 -> 6.829 ( -0.01%) [ +0.32% +0.00% +0.07% / +0.28% +0.00% -0.01%] index_select strided 64 : Elapsed 0.069 ms (6.852 ms / 100) 6.428 -> 6.437 ( +0.14%) [ +0.00% +0.25% +0.26% / +0.14% +0.48% +0.54%] index_select strided 100 : Elapsed 0.064 ms (6.428 ms / 100) 6.742 -> 6.755 ( +0.19%) [ +0.00% +0.55% +0.64% / +0.19% +0.59% +0.56%] index_select random : Elapsed 0.067 ms (6.742 ms / 100) 6.684 -> 6.694 ( +0.15%) [ +0.00% +0.10% +0.18% / +0.15% +0.76% +0.45%] index_select random_sorted : Elapsed 0.067 ms (6.684 ms / 100) 6.804 -> 6.811 ( +0.10%) [ +0.00% +0.01% +0.07% / +0.10% +0.56% +0.26%] index_select perm : Elapsed 0.068 ms (6.804 ms / 100) 6.760 -> 6.768 ( +0.12%) [ +0.19% +0.00% +0.36% / +0.12% +1.14% +1.21%] index_select perm_sorted : Elapsed 0.068 ms (6.773 ms / 100) out_shape = [50, 150, 15] in_shape = [250, 150, 15] idx_dim = 0 B = [50, 150, 15] (stride (2250, 15, 1)) A = [250, 150, 15] (stride (2250, 1, 150)) dim = 0 8.770 -> 8.769 ( -0.01%) [ +0.00% +0.05% +0.15% / -0.01% +0.24% +0.19%] index_select const : Elapsed 0.088 ms (8.770 ms / 100) 9.200 -> 9.208 ( +0.09%) [ +0.30% +0.00% +0.15% / +0.09% +0.17% +0.22%] index_select wrap : Elapsed 0.092 ms (9.228 ms / 100) 9.209 -> 9.215 ( +0.07%) [ +0.07% +0.04% +0.00% / +0.27% +0.46% +0.07%] index_select linear : Elapsed 0.092 ms (9.215 ms / 100) 9.191 -> 9.192 ( +0.01%) [ +0.17% +0.00% +0.14% / +0.01% +0.34% +0.34%] index_select reverse : Elapsed 0.092 ms (9.207 ms / 100) 8.772 -> 8.767 ( -0.06%) [ +0.00% +0.08% +0.05% / -0.06% +0.22% +0.24%] index_select skip64 : Elapsed 0.088 ms (8.772 ms / 100) 8.757 -> 8.789 ( +0.37%) [ +0.00% +0.13% +0.40% / +0.37% +0.39% +0.67%] index_select skip256 : Elapsed 0.088 ms (8.757 ms / 100) 9.280 -> 9.264 ( -0.17%) [ +0.20% +0.00% +0.15% / +0.22% -0.17% -0.17%] index_select spread : Elapsed 0.093 ms (9.299 ms / 100) 9.278 -> 9.280 ( +0.02%) [ +0.00% +0.26% +0.10% / +0.09% +0.29% +0.02%] index_select strided 3 : Elapsed 0.093 ms (9.278 ms / 100) 9.291 -> 9.262 ( -0.31%) [ +0.00% +0.04% +0.12% / +0.14% -0.31% -0.30%] index_select strided 5 : Elapsed 0.093 ms (9.291 ms / 100) 9.304 -> 9.319 ( +0.16%) [ +0.18% +0.00% +0.34% / +0.16% +0.31% +0.51%] index_select strided 7 : Elapsed 0.093 ms (9.321 ms / 100) 9.217 -> 9.210 ( -0.08%) [ +0.10% +0.00% +0.21% / -0.08% +1.11% +0.87%] index_select strided 8 : Elapsed 0.092 ms (9.226 ms / 100) 9.290 -> 9.288 ( -0.02%) [ +0.00% +0.08% +0.02% / -0.02% +0.82% +0.78%] index_select strided 16 : Elapsed 0.093 ms (9.290 ms / 100) 9.266 -> 9.258 ( -0.09%) [ +0.14% +0.00% +0.10% / -0.09% +0.43% +0.47%] index_select strided 64 : Elapsed 0.093 ms (9.279 ms / 100) 8.843 -> 8.845 ( +0.02%) [ +0.03% +0.09% +0.00% / +0.11% +0.19% +0.02%] index_select strided 100 : Elapsed 0.088 ms (8.846 ms / 100) 9.248 -> 9.234 ( -0.15%) [ +0.01% +0.00% +0.03% / -0.15% +0.70% +0.72%] index_select random : Elapsed 0.092 ms (9.249 ms / 100) 9.199 -> 9.202 ( +0.03%) [ +0.13% +0.25% +0.00% / +0.03% +0.45% +0.60%] index_select random_sorted : Elapsed 0.092 ms (9.211 ms / 100) 9.300 -> 9.275 ( -0.27%) [ +0.00% +0.09% +0.00% / +0.04% -0.27% -0.17%] index_select perm : Elapsed 0.093 ms (9.300 ms / 100) 9.296 -> 9.267 ( -0.31%) [ +0.00% +0.27% +0.30% / +0.01% -0.06% -0.31%] index_select perm_sorted : Elapsed 0.093 ms (9.296 ms / 100) B = [50, 150, 15] (stride (2250, 1, 150)) A = [250, 150, 15] (stride (2250, 1, 150)) dim = 0 5.531 -> 5.539 ( +0.14%) [ +0.20% +0.00% +0.25% / +0.14% +0.47% +0.78%] index_select const : Elapsed 0.055 ms (5.542 ms / 100) 5.858 -> 5.857 ( -0.02%) [ +0.27% +0.00% +0.07% / +0.09% -0.02% +0.03%] index_select wrap : Elapsed 0.059 ms (5.874 ms / 100) 5.873 -> 5.871 ( -0.03%) [ +0.02% +0.12% +0.00% / +0.07% -0.03% -0.03%] index_select linear : Elapsed 0.059 ms (5.874 ms / 100) 5.876 -> 5.874 ( -0.03%) [ +0.03% +0.05% +0.00% / +0.26% +0.07% -0.03%] index_select reverse : Elapsed 0.059 ms (5.878 ms / 100) 5.541 -> 5.538 ( -0.05%) [ +0.31% +0.11% +0.00% / -0.05% +0.32% +0.16%] index_select skip64 : Elapsed 0.056 ms (5.558 ms / 100) 5.548 -> 5.530 ( -0.32%) [ +0.07% +0.00% +0.04% / -0.32% +0.11% +0.27%] index_select skip256 : Elapsed 0.056 ms (5.552 ms / 100) 5.897 -> 5.894 ( -0.05%) [ +0.00% +0.08% +0.25% / -0.05% +0.42% +0.44%] index_select spread : Elapsed 0.059 ms (5.897 ms / 100) 5.863 -> 5.867 ( +0.07%) [ +0.00% +0.02% +0.12% / +0.07% +1.07% +1.06%] index_select strided 3 : Elapsed 0.059 ms (5.863 ms / 100) 5.883 -> 5.898 ( +0.25%) [ +0.25% +0.22% +0.00% / +0.25% +0.66% +0.66%] index_select strided 5 : Elapsed 0.059 ms (5.898 ms / 100) 5.877 -> 5.872 ( -0.09%) [ +0.09% +0.07% +0.00% / +0.14% +0.19% -0.09%] index_select strided 7 : Elapsed 0.059 ms (5.882 ms / 100) 5.907 -> 5.867 ( -0.68%) [ +0.07% +0.00% +0.05% / +0.15% -0.61% -0.68%] index_select strided 8 : Elapsed 0.059 ms (5.911 ms / 100) 5.883 -> 5.847 ( -0.61%) [ +0.00% +0.15% +0.19% / +0.08% -0.53% -0.61%] index_select strided 16 : Elapsed 0.059 ms (5.883 ms / 100) 5.909 -> 5.877 ( -0.54%) [ +0.00% +0.19% +0.03% / -0.08% -0.54% -0.37%] index_select strided 64 : Elapsed 0.059 ms (5.909 ms / 100) 5.580 -> 5.586 ( +0.11%) [ +0.00% +0.13% +0.13% / +0.30% +0.11% +0.13%] index_select strided 100 : Elapsed 0.056 ms (5.580 ms / 100) 5.876 -> 5.877 ( +0.02%) [ +0.00% +0.12% +0.03% / +0.02% +0.05% +0.09%] index_select random : Elapsed 0.059 ms (5.876 ms / 100) 5.819 -> 5.828 ( +0.15%) [ +0.00% +0.00% +0.40% / +0.15% +0.19% +0.22%] index_select random_sorted : Elapsed 0.058 ms (5.819 ms / 100) 5.907 -> 5.894 ( -0.22%) [ +0.00% +0.03% +0.14% / -0.22% +0.03% -0.03%] index_select perm : Elapsed 0.059 ms (5.907 ms / 100) 5.883 -> 5.881 ( -0.03%) [ +0.25% +0.00% +0.15% / +0.12% +0.41% -0.03%] index_select perm_sorted : Elapsed 0.059 ms (5.898 ms / 100) B = [50, 150, 15] (stride (15, 750, 1)) A = [250, 150, 15] (stride (150, 1, 37500)) dim = 0 5.506 -> 5.514 ( +0.15%) [ +0.00% +0.07% +0.29% / +0.15% +0.49% +0.42%] index_select const : Elapsed 0.055 ms (5.506 ms / 100) 5.918 -> 5.930 ( +0.20%) [ +0.14% +0.44% +0.00% / +0.32% +0.20% +0.41%] index_select wrap : Elapsed 0.059 ms (5.926 ms / 100) 5.920 -> 5.923 ( +0.05%) [ +0.32% +0.00% +0.17% / +0.05% +0.86% +0.41%] index_select linear : Elapsed 0.059 ms (5.939 ms / 100) 5.929 -> 5.935 ( +0.10%) [ +0.00% +0.15% +0.24% / +0.10% +0.27% +0.44%] index_select reverse : Elapsed 0.059 ms (5.929 ms / 100) 5.479 -> 5.474 ( -0.09%) [ +0.18% +0.00% +0.13% / -0.09% +0.93% +1.06%] index_select skip64 : Elapsed 0.055 ms (5.489 ms / 100) 5.479 -> 5.483 ( +0.07%) [ +0.37% +0.00% +0.16% / +0.07% +1.15% +1.06%] index_select skip256 : Elapsed 0.055 ms (5.499 ms / 100) 6.036 -> 6.023 ( -0.22%) [ +0.20% +0.00% +0.25% / +0.13% -0.22% -0.15%] index_select spread : Elapsed 0.060 ms (6.048 ms / 100) 6.038 -> 5.972 ( -1.09%) [ +0.31% +0.00% +0.35% / -0.28% -1.09% -1.09%] index_select strided 3 : Elapsed 0.061 ms (6.057 ms / 100) 6.035 -> 6.019 ( -0.27%) [ +0.31% +0.00% +0.28% / +0.35% -0.07% -0.27%] index_select strided 5 : Elapsed 0.061 ms (6.054 ms / 100) 6.017 -> 6.018 ( +0.02%) [ +0.05% +0.00% +0.03% / +0.02% +0.12% +0.03%] index_select strided 7 : Elapsed 0.060 ms (6.020 ms / 100) 6.008 -> 6.017 ( +0.15%) [ +0.00% +0.05% +0.27% / +0.15% +0.30% +0.17%] index_select strided 8 : Elapsed 0.060 ms (6.008 ms / 100) 6.033 -> 6.031 ( -0.03%) [ +0.33% +0.00% +0.13% / -0.03% +0.50% +0.36%] index_select strided 16 : Elapsed 0.061 ms (6.053 ms / 100) 5.999 -> 6.002 ( +0.05%) [ +0.32% +0.00% +0.03% / +0.05% +0.95% +1.18%] index_select strided 64 : Elapsed 0.060 ms (6.018 ms / 100) 5.524 -> 5.516 ( -0.14%) [ +0.11% +0.45% +0.00% / -0.14% +0.04% +0.05%] index_select strided 100 : Elapsed 0.055 ms (5.530 ms / 100) 5.921 -> 5.937 ( +0.27%) [ +0.51% +0.00% +0.49% / +0.27% +2.52% +2.38%] index_select random : Elapsed 0.060 ms (5.951 ms / 100) 5.915 -> 5.929 ( +0.24%) [ +0.30% +0.30% +0.00% / +0.24% +1.42% +1.25%] index_select random_sorted : Elapsed 0.059 ms (5.933 ms / 100) 6.007 -> 6.016 ( +0.15%) [ +0.12% +0.00% +0.10% / +0.15% +0.42% +0.22%] index_select perm : Elapsed 0.060 ms (6.014 ms / 100) 6.007 -> 5.981 ( -0.43%) [ +0.18% +0.10% +0.00% / +0.07% -0.27% -0.43%] index_select perm_sorted : Elapsed 0.060 ms (6.018 ms / 100) B = [50, 150, 15] (stride (1, 750, 50)) A = [250, 150, 15] (stride (15, 3750, 1)) dim = 0 9.223 -> 9.234 ( +0.12%) [ +0.04% +0.23% +0.00% / +0.12% +0.66% +0.50%] index_select const : Elapsed 0.092 ms (9.227 ms / 100) 9.947 -> 9.952 ( +0.05%) [ +0.00% +0.13% +0.26% / +0.05% +0.64% +0.32%] index_select wrap : Elapsed 0.099 ms (9.947 ms / 100) 9.965 -> 9.966 ( +0.01%) [ +0.00% +0.09% +0.11% / +0.01% +0.23% +0.11%] index_select linear : Elapsed 0.100 ms (9.965 ms / 100) 9.944 -> 9.960 ( +0.16%) [ +0.00% +0.04% +0.02% / +0.16% +0.51% +0.55%] index_select reverse : Elapsed 0.099 ms (9.944 ms / 100) 9.223 -> 9.236 ( +0.14%) [ +0.13% +0.23% +0.00% / +0.14% +0.51% +0.46%] index_select skip64 : Elapsed 0.092 ms (9.235 ms / 100) 9.227 -> 9.232 ( +0.05%) [ +0.11% +0.00% +0.14% / +0.05% +0.52% +0.60%] index_select skip256 : Elapsed 0.092 ms (9.237 ms / 100) 10.384 -> 10.375 ( -0.09%) [ +0.00% +0.04% +0.10% / -0.09% +0.16% +0.23%] index_select spread : Elapsed 0.104 ms (10.384 ms / 100) 10.337 -> 10.340 ( +0.03%) [ +0.16% +0.00% +0.07% / +0.03% +0.41% +0.39%] index_select strided 3 : Elapsed 0.104 ms (10.354 ms / 100) 10.358 -> 10.376 ( +0.17%) [ +0.34% +0.00% +0.14% / +0.17% +0.45% +0.51%] index_select strided 5 : Elapsed 0.104 ms (10.393 ms / 100) 10.368 -> 10.378 ( +0.10%) [ +0.16% +0.00% +0.12% / +0.10% +1.03% +0.89%] index_select strided 7 : Elapsed 0.104 ms (10.385 ms / 100) 10.393 -> 10.416 ( +0.22%) [ +0.10% +0.00% +0.09% / +0.22% +0.46% +0.64%] index_select strided 8 : Elapsed 0.104 ms (10.403 ms / 100) 10.416 -> 10.425 ( +0.09%) [ +0.00% +0.05% +0.01% / +0.09% +0.51% +0.59%] index_select strided 16 : Elapsed 0.104 ms (10.416 ms / 100) 10.425 -> 10.441 ( +0.15%) [ +0.07% +0.08% +0.00% / +0.15% +0.80% +0.59%] index_select strided 64 : Elapsed 0.104 ms (10.432 ms / 100) 9.543 -> 9.561 ( +0.19%) [ +0.17% +0.10% +0.00% / +0.19% +0.80% +0.66%] index_select strided 100 : Elapsed 0.096 ms (9.559 ms / 100) 10.334 -> 10.330 ( -0.04%) [ +0.00% +0.05% +0.02% / -0.04% +0.70% +0.59%] index_select random : Elapsed 0.103 ms (10.334 ms / 100) 10.166 -> 10.175 ( +0.09%) [ +0.06% +0.00% +0.04% / +0.09% +0.75% +0.68%] index_select random_sorted : Elapsed 0.102 ms (10.172 ms / 100) 10.393 -> 10.403 ( +0.10%) [ +0.00% +0.13% +0.04% / +0.10% +0.66% +0.37%] index_select perm : Elapsed 0.104 ms (10.393 ms / 100) 10.279 -> 10.287 ( +0.08%) [ +0.00% +0.03% +0.21% / +0.08% +0.56% +0.60%] index_select perm_sorted : Elapsed 0.103 ms (10.279 ms / 100) B = [50, 150, 15] (stride (150, 1, 7500)) dim = 0 fill_cnt = 250 10.590 -> 10.586 ( -0.04%) [ +0.09% +0.10% +0.00% / +0.07% -0.04% +0.02%] index_fill_ const : Elapsed 0.106 ms (10.600 ms / 100) 9.691 -> 9.675 ( -0.17%) [ +0.00% +0.18% +0.10% / -0.08% -0.09% -0.17%] index_fill_ linear : Elapsed 0.097 ms (9.691 ms / 100) 9.731 -> 9.727 ( -0.04%) [ +0.12% +0.00% +0.18% / -0.04% +0.03% +0.04%] index_fill_ reverse : Elapsed 0.097 ms (9.743 ms / 100) 10.748 -> 10.724 ( -0.22%) [ +0.04% +0.05% +0.00% / +0.00% -0.12% -0.22%] index_fill_ skip64 : Elapsed 0.108 ms (10.752 ms / 100) 10.757 -> 10.737 ( -0.19%) [ +0.01% +0.00% +0.03% / -0.09% -0.15% -0.19%] index_fill_ skip256 : Elapsed 0.108 ms (10.758 ms / 100) 7.484 -> 7.478 ( -0.08%) [ +0.03% +0.00% +0.16% / -0.08% +0.47% +0.32%] index_fill_ spread : Elapsed 0.075 ms (7.486 ms / 100) 6.620 -> 6.612 ( -0.12%) [ +0.33% +0.02% +0.00% / -0.12% +0.17% -0.02%] index_fill_ strided 3 : Elapsed 0.066 ms (6.642 ms / 100) 6.614 -> 6.604 ( -0.15%) [ +0.18% +0.17% +0.00% / -0.15% +0.38% +0.09%] index_fill_ strided 5 : Elapsed 0.066 ms (6.626 ms / 100) 7.159 -> 7.151 ( -0.11%) [ +0.00% +0.28% +0.35% / -0.11% +0.25% +0.36%] index_fill_ strided 7 : Elapsed 0.072 ms (7.159 ms / 100) 6.548 -> 6.540 ( -0.12%) [ +0.31% +0.00% +0.21% / +0.00% +0.09% -0.12%] index_fill_ strided 8 : Elapsed 0.066 ms (6.568 ms / 100) 6.527 -> 6.506 ( -0.32%) [ +0.00% +0.00% +0.00% / -0.28% -0.18% -0.32%] index_fill_ strided 16 : Elapsed 0.065 ms (6.527 ms / 100) 6.861 -> 6.858 ( -0.04%) [ +0.00% +0.03% +0.01% / +0.03% -0.04% +0.10%] index_fill_ random : Elapsed 0.069 ms (6.861 ms / 100) 7.565 -> 7.575 ( +0.13%) [ +0.16% +0.00% +0.05% / +0.15% +0.13% +0.69%] index_fill_ random_sorted : Elapsed 0.076 ms (7.577 ms / 100) B = [50, 150, 15] (stride (150, 1, 7500)) A = [250, 150, 15] (stride (1, 250, 37500)) dim = 0 5.991 -> 5.974 ( -0.28%) [ +0.30% +0.00% +0.02% / -0.28% -0.08% -0.02%] index_select const : Elapsed 0.060 ms (6.009 ms / 100) 6.727 -> 6.721 ( -0.09%) [ +0.36% +0.00% +0.21% / +0.21% +0.00% -0.09%] index_select wrap : Elapsed 0.068 ms (6.751 ms / 100) 6.741 -> 6.723 ( -0.27%) [ +0.30% +0.00% +0.09% / +0.15% -0.27% +0.01%] index_select linear : Elapsed 0.068 ms (6.761 ms / 100) 6.741 -> 6.721 ( -0.30%) [ +0.33% +0.00% +0.22% / -0.07% -0.27% -0.30%] index_select reverse : Elapsed 0.068 ms (6.763 ms / 100) 5.991 -> 5.993 ( +0.03%) [ +0.30% +0.00% +0.03% / +0.17% +0.07% +0.03%] index_select skip64 : Elapsed 0.060 ms (6.009 ms / 100) 5.991 -> 5.991 ( +0.00%) [ +0.13% +0.12% +0.00% / +0.00% +0.03% +0.25%] index_select skip256 : Elapsed 0.060 ms (5.999 ms / 100) 9.202 -> 9.199 ( -0.03%) [ +0.11% +0.00% +0.51% / -0.03% +0.39% +0.32%] index_select spread : Elapsed 0.092 ms (9.212 ms / 100) 8.051 -> 8.031 ( -0.25%) [ +0.11% +0.00% +0.01% / -0.06% -0.21% -0.25%] index_select strided 3 : Elapsed 0.081 ms (8.060 ms / 100) 9.188 -> 9.204 ( +0.17%) [ +0.00% +0.23% +0.12% / +0.17% +0.26% +0.79%] index_select strided 5 : Elapsed 0.092 ms (9.188 ms / 100) 10.170 -> 10.156 ( -0.14%) [ +0.25% +0.39% +0.00% / -0.14% +0.52% +0.31%] index_select strided 7 : Elapsed 0.102 ms (10.195 ms / 100) 10.613 -> 10.591 ( -0.21%) [ +0.00% +0.11% +0.24% / +0.28% -0.21% -0.14%] index_select strided 8 : Elapsed 0.106 ms (10.613 ms / 100) 10.813 -> 10.783 ( -0.28%) [ +0.00% +0.20% +0.16% / +0.32% -0.28% -0.27%] index_select strided 16 : Elapsed 0.108 ms (10.813 ms / 100) 10.423 -> 10.408 ( -0.14%) [ +0.01% +0.17% +0.00% / +0.04% -0.14% -0.14%] index_select strided 64 : Elapsed 0.104 ms (10.424 ms / 100) 8.665 -> 8.645 ( -0.23%) [ +0.01% +0.02% +0.00% / -0.20% -0.23% -0.21%] index_select strided 100 : Elapsed 0.087 ms (8.666 ms / 100) 10.267 -> 10.266 ( -0.01%) [ +0.18% +0.00% +0.00% / +0.06% -0.01% +0.02%] index_select random : Elapsed 0.103 ms (10.285 ms / 100) 8.698 -> 8.717 ( +0.22%) [ +0.32% +0.07% +0.00% / +0.22% +0.36% +0.39%] index_select random_sorted : Elapsed 0.087 ms (8.726 ms / 100) 10.394 -> 10.375 ( -0.18%) [ +0.00% +0.03% +0.00% / +0.10% -0.18% -0.05%] index_select perm : Elapsed 0.104 ms (10.394 ms / 100) 8.924 -> 8.919 ( -0.06%) [ +0.04% +0.21% +0.00% / -0.06% +0.24% +0.09%] index_select perm_sorted : Elapsed 0.089 ms (8.928 ms / 100) B = [50, 150, 15] (stride (1, 50, 7500)) A = [250, 150, 15] (stride (2250, 15, 1)) dim = 0 9.130 -> 9.136 ( +0.07%) [ +0.00% +0.05% +0.15% / +0.07% +0.50% +0.68%] index_select const : Elapsed 0.091 ms (9.130 ms / 100) 9.508 -> 9.531 ( +0.24%) [ +0.00% +0.12% +0.15% / +0.24% +0.43% +0.56%] index_select wrap : Elapsed 0.095 ms (9.508 ms / 100) 9.508 -> 9.516 ( +0.08%) [ +0.05% +0.22% +0.00% / +0.08% +0.60% +0.58%] index_select linear : Elapsed 0.095 ms (9.513 ms / 100) 9.573 -> 9.604 ( +0.32%) [ +0.16% +0.19% +0.00% / +0.32% +0.38% +0.51%] index_select reverse : Elapsed 0.096 ms (9.588 ms / 100) 9.120 -> 9.128 ( +0.09%) [ +0.09% +0.00% +0.14% / +0.09% +0.76% +0.44%] index_select skip64 : Elapsed 0.091 ms (9.128 ms / 100) 9.128 -> 9.132 ( +0.04%) [ +0.02% +0.00% +0.11% / +0.04% +0.83% +0.45%] index_select skip256 : Elapsed 0.091 ms (9.130 ms / 100) 9.588 -> 9.594 ( +0.06%) [ +0.18% +0.00% +0.05% / +0.06% +0.32% +0.42%] index_select spread : Elapsed 0.096 ms (9.605 ms / 100) 9.598 -> 9.614 ( +0.17%) [ +0.04% +0.00% +0.08% / +0.17% +0.64% +0.59%] index_select strided 3 : Elapsed 0.096 ms (9.602 ms / 100) 9.587 -> 9.607 ( +0.21%) [ +0.03% +0.00% +0.13% / +0.21% +0.25% +0.40%] index_select strided 5 : Elapsed 0.096 ms (9.590 ms / 100) 9.592 -> 9.584 ( -0.08%) [ +0.05% +0.04% +0.00% / -0.08% +1.17% +0.93%] index_select strided 7 : Elapsed 0.096 ms (9.597 ms / 100) 9.620 -> 9.623 ( +0.03%) [ +0.15% +0.00% +0.14% / +0.03% +0.49% +0.44%] index_select strided 8 : Elapsed 0.096 ms (9.634 ms / 100) 9.585 -> 9.597 ( +0.13%) [ +0.00% +0.09% +0.23% / +0.13% +0.49% +0.69%] index_select strided 16 : Elapsed 0.096 ms (9.585 ms / 100) 9.575 -> 9.586 ( +0.11%) [ +0.00% +0.24% +0.13% / +0.11% +0.75% +0.66%] index_select strided 64 : Elapsed 0.096 ms (9.575 ms / 100) 9.248 -> 9.263 ( +0.16%) [ +0.04% +0.10% +0.00% / +0.16% +0.48% +0.28%] index_select strided 100 : Elapsed 0.093 ms (9.252 ms / 100) 9.593 -> 9.621 ( +0.29%) [ +0.00% +0.11% +0.06% / +0.33% +0.49% +0.29%] index_select random : Elapsed 0.096 ms (9.593 ms / 100) 9.600 -> 9.611 ( +0.11%) [ +0.00% +0.07% +0.07% / +0.21% +0.39% +0.11%] index_select random_sorted : Elapsed 0.096 ms (9.600 ms / 100) 9.595 -> 9.596 ( +0.01%) [ +0.03% +0.00% +0.01% / +0.01% +0.46% +0.29%] index_select perm : Elapsed 0.096 ms (9.598 ms / 100) 9.592 -> 9.602 ( +0.10%) [ +0.00% +0.15% +0.07% / +0.10% +0.65% +0.49%] index_select perm_sorted : Elapsed 0.096 ms (9.592 ms / 100) out_shape = [250, 50, 15] in_shape = [250, 150, 15] idx_dim = 1 B = [250, 50, 15] (stride (1, 3750, 250)) A = [250, 150, 15] (stride (2250, 15, 1)) dim = 1 6.330 -> 6.287 ( -0.68%) [ +0.41% +0.00% +0.16% / +0.13% -0.60% -0.68%] index_select const : Elapsed 0.064 ms (6.356 ms / 100) 6.966 -> 6.906 ( -0.86%) [ +0.23% +0.24% +0.00% / +0.16% -0.63% -0.86%] index_select wrap : Elapsed 0.070 ms (6.982 ms / 100) 6.955 -> 6.891 ( -0.92%) [ +0.00% +0.27% +0.36% / +0.33% -0.55% -0.92%] index_select linear : Elapsed 0.070 ms (6.955 ms / 100) 6.951 -> 6.928 ( -0.33%) [ +0.27% +0.00% +0.07% / +0.10% -0.33% -0.26%] index_select reverse : Elapsed 0.070 ms (6.970 ms / 100) 6.359 -> 6.280 ( -1.24%) [ +0.11% +0.00% +0.11% / +0.05% -1.02% -1.24%] index_select skip64 : Elapsed 0.064 ms (6.366 ms / 100) 6.295 -> 6.274 ( -0.33%) [ +0.19% +0.00% +0.00% / +0.19% -0.33% -0.19%] index_select skip256 : Elapsed 0.063 ms (6.307 ms / 100) 7.243 -> 7.257 ( +0.19%) [ +0.12% +0.17% +0.00% / +0.19% +0.21% +0.32%] index_select spread : Elapsed 0.073 ms (7.252 ms / 100) 7.244 -> 7.240 ( -0.06%) [ +0.00% +0.17% +0.04% / -0.06% +0.14% +0.11%] index_select strided 3 : Elapsed 0.072 ms (7.244 ms / 100) 7.316 -> 7.281 ( -0.48%) [ +0.00% +0.33% +0.03% / +0.01% -0.48% -0.27%] index_select strided 5 : Elapsed 0.073 ms (7.316 ms / 100) 7.358 -> 7.355 ( -0.04%) [ +0.10% +0.07% +0.00% / +0.10% +0.07% -0.04%] index_select strided 7 : Elapsed 0.074 ms (7.365 ms / 100) 7.401 -> 7.353 ( -0.65%) [ +0.00% +0.07% +0.19% / +0.16% -0.65% -0.43%] index_select strided 8 : Elapsed 0.074 ms (7.401 ms / 100) 7.409 -> 7.413 ( +0.05%) [ +0.05% +0.00% +0.13% / +0.24% +0.05% +0.34%] index_select strided 16 : Elapsed 0.074 ms (7.413 ms / 100) 7.361 -> 7.344 ( -0.23%) [ +0.07% +0.00% +0.12% / +0.07% -0.23% -0.11%] index_select strided 64 : Elapsed 0.074 ms (7.366 ms / 100) 6.467 -> 6.426 ( -0.63%) [ +0.25% +0.19% +0.00% / +0.31% -0.32% -0.63%] index_select strided 100 : Elapsed 0.065 ms (6.483 ms / 100) 7.307 -> 7.313 ( +0.08%) [ +0.26% +0.00% +0.08% / +0.08% +0.11% +0.27%] index_select random : Elapsed 0.073 ms (7.326 ms / 100) 7.093 -> 7.056 ( -0.52%) [ +0.03% +0.01% +0.00% / +0.24% -0.39% -0.52%] index_select random_sorted : Elapsed 0.071 ms (7.095 ms / 100) 7.369 -> 7.352 ( -0.23%) [ +0.01% +0.03% +0.00% / -0.14% -0.23% -0.07%] index_select perm : Elapsed 0.074 ms (7.370 ms / 100) 7.166 -> 7.149 ( -0.24%) [ +0.04% +0.04% +0.00% / +0.11% -0.24% -0.18%] index_select perm_sorted : Elapsed 0.072 ms (7.169 ms / 100) B = [250, 50, 15] (stride (1, 250, 12500)) A = [250, 150, 15] (stride (2250, 15, 1)) dim = 1 6.410 -> 6.416 ( +0.09%) [ +0.11% +0.16% +0.00% / +0.09% +0.51% +0.62%] index_select const : Elapsed 0.064 ms (6.417 ms / 100) 6.918 -> 6.909 ( -0.13%) [ +0.00% +0.13% +0.16% / +0.10% -0.09% -0.13%] index_select wrap : Elapsed 0.069 ms (6.918 ms / 100) 6.919 -> 6.904 ( -0.22%) [ +0.09% +0.00% +0.17% / +0.19% -0.22% +0.04%] index_select linear : Elapsed 0.069 ms (6.925 ms / 100) 6.914 -> 6.878 ( -0.52%) [ +0.27% +0.00% +0.29% / +0.12% -0.27% -0.52%] index_select reverse : Elapsed 0.069 ms (6.933 ms / 100) 6.444 -> 6.448 ( +0.06%) [ +0.03% +0.19% +0.00% / +0.06% +0.14% +0.14%] index_select skip64 : Elapsed 0.064 ms (6.446 ms / 100) 6.410 -> 6.419 ( +0.14%) [ +0.12% +0.00% +0.14% / +0.19% +0.42% +0.14%] index_select skip256 : Elapsed 0.064 ms (6.418 ms / 100) 7.237 -> 7.240 ( +0.04%) [ +0.21% +0.06% +0.00% / +0.04% +0.35% +0.83%] index_select spread : Elapsed 0.073 ms (7.252 ms / 100) 7.226 -> 7.224 ( -0.03%) [ +0.00% +0.40% +0.10% / -0.03% +0.47% +0.60%] index_select strided 3 : Elapsed 0.072 ms (7.226 ms / 100) 7.314 -> 7.293 ( -0.29%) [ +0.14% +0.00% +0.01% / -0.29% -0.07% +0.22%] index_select strided 5 : Elapsed 0.073 ms (7.324 ms / 100) 7.368 -> 7.374 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.23% +0.23%] index_select strided 7 : Elapsed 0.074 ms (7.377 ms / 100) 7.403 -> 7.377 ( -0.35%) [ +0.08% +0.00% +0.16% / +0.24% -0.35% -0.31%] index_select strided 8 : Elapsed 0.074 ms (7.409 ms / 100) 7.440 -> 7.422 ( -0.24%) [ +0.00% +0.13% +0.11% / +0.09% +0.09% -0.24%] index_select strided 16 : Elapsed 0.074 ms (7.440 ms / 100) 7.360 -> 7.374 ( +0.19%) [ +0.00% +0.10% +0.34% / +0.19% +0.56% +0.52%] index_select strided 64 : Elapsed 0.074 ms (7.360 ms / 100) 6.505 -> 6.500 ( -0.08%) [ +0.35% +0.00% +0.18% / -0.08% +0.78% +0.86%] index_select strided 100 : Elapsed 0.065 ms (6.528 ms / 100) 7.345 -> 7.341 ( -0.05%) [ +0.00% +0.12% +0.11% / -0.05% +0.10% +0.27%] index_select random : Elapsed 0.073 ms (7.345 ms / 100) 7.080 -> 7.067 ( -0.18%) [ +0.17% +0.00% +0.11% / +0.10% -0.03% -0.18%] index_select random_sorted : Elapsed 0.071 ms (7.092 ms / 100) 7.385 -> 7.346 ( -0.53%) [ +0.09% +0.07% +0.00% / +0.14% -0.53% -0.39%] index_select perm : Elapsed 0.074 ms (7.392 ms / 100) 7.152 -> 7.151 ( -0.01%) [ +0.32% +0.00% +0.06% / +0.08% +0.15% -0.01%] index_select perm_sorted : Elapsed 0.072 ms (7.175 ms / 100) out_shape = [250, 150, 50] in_shape = [250, 150, 15] idx_dim = 2 B = [250, 150, 50] (stride (7500, 50, 1)) A = [250, 150, 15] (stride (150, 1, 37500)) dim = 2 69.076 -> 69.121 ( +0.07%) [ +0.00% +0.16% +0.33% / +0.14% +0.08% +0.07%] index_add_ linear : Elapsed 0.691 ms (69.076 ms / 100) 46.675 -> 46.702 ( +0.06%) [ +0.00% +0.24% +0.03% / +0.06% +0.32% +0.26%] index_copy_ linear : Elapsed 0.467 ms (46.675 ms / 100) 68.839 -> 69.004 ( +0.24%) [ +0.05% +0.00% +0.00% / +0.24% +0.39% +0.46%] index_add_ reverse : Elapsed 0.689 ms (68.876 ms / 100) 46.563 -> 46.524 ( -0.08%) [ +0.04% +0.00% +0.08% / -0.08% +0.48% +0.39%] index_copy_ reverse : Elapsed 0.466 ms (46.581 ms / 100) 68.933 -> 68.974 ( +0.06%) [ +0.07% +0.13% +0.00% / +0.11% +0.06% +0.39%] index_add_ spread : Elapsed 0.690 ms (68.981 ms / 100) 46.638 -> 46.656 ( +0.04%) [ +0.00% +0.16% +0.10% / +0.04% +0.32% +0.27%] index_copy_ spread : Elapsed 0.466 ms (46.638 ms / 100) 68.963 -> 68.918 ( -0.07%) [ +0.00% +0.07% +0.17% / -0.07% +0.51% +0.40%] index_add_ strided 3 : Elapsed 0.690 ms (68.963 ms / 100) 46.588 -> 46.626 ( +0.08%) [ +0.00% +0.16% +0.22% / +0.08% +0.77% +0.56%] index_copy_ strided 3 : Elapsed 0.466 ms (46.588 ms / 100) 68.880 -> 68.909 ( +0.04%) [ +0.26% +0.24% +0.00% / +0.04% +0.34% +0.32%] index_add_ strided 7 : Elapsed 0.691 ms (69.056 ms / 100) 46.657 -> 46.612 ( -0.10%) [ +0.00% +0.02% +0.11% / -0.10% +0.39% +0.07%] index_copy_ strided 7 : Elapsed 0.467 ms (46.657 ms / 100) 68.901 -> 69.123 ( +0.32%) [ +0.00% +0.10% +0.06% / +0.32% +0.58% +0.57%] index_add_ perm : Elapsed 0.689 ms (68.901 ms / 100) 46.653 -> 46.692 ( +0.08%) [ +0.14% +0.06% +0.00% / +0.08% +0.27% +0.39%] index_copy_ perm : Elapsed 0.467 ms (46.717 ms / 100) 68.885 -> 69.081 ( +0.28%) [ +0.00% +0.55% +0.18% / +0.49% +0.28% +0.37%] index_add_ perm_sorted : Elapsed 0.689 ms (68.885 ms / 100) 46.676 -> 46.686 ( +0.02%) [ +0.03% +0.00% +0.06% / +0.02% +0.09% +0.27%] index_copy_ perm_sorted : Elapsed 0.467 ms (46.690 ms / 100) BEST 130.996 -> 20.786 (-84.13%) [ +0.30% +0.22% +0.00% / -84.12% -84.13% -84.13%] index_select const : Elapsed 1.314 ms (131.389 ms / 100) BEST 133.756 -> 23.317 (-82.57%) [ +0.36% +0.00% +0.20% / -82.28% -82.56% -82.57%] index_select wrap : Elapsed 1.342 ms (134.234 ms / 100) BEST 129.739 -> 22.182 (-82.90%) [ +0.11% +0.07% +0.00% / -82.65% -82.90% -82.89%] index_select linear : Elapsed 1.299 ms (129.880 ms / 100) BEST 133.465 -> 22.581 (-83.08%) [ +0.08% +0.00% +0.13% / -82.95% -83.08% -83.08%] index_select reverse : Elapsed 1.336 ms (133.575 ms / 100) BEST 130.840 -> 20.228 (-84.54%) [ +0.05% +0.27% +0.00% / -84.49% -84.53% -84.54%] index_select skip64 : Elapsed 1.309 ms (130.906 ms / 100) BEST 131.385 -> 20.648 (-84.28%) [ +0.00% +0.11% +0.49% / -84.20% -84.28% -84.28%] index_select skip256 : Elapsed 1.314 ms (131.385 ms / 100) BEST 133.202 -> 22.689 (-82.97%) [ +0.09% +0.13% +0.00% / -82.79% -82.95% -82.97%] index_select spread : Elapsed 1.333 ms (133.325 ms / 100) BEST 135.806 -> 21.744 (-83.99%) [ +0.00% +0.06% +0.18% / -83.76% -83.99% -83.99%] index_select strided 3 : Elapsed 1.358 ms (135.806 ms / 100) BEST 135.285 -> 21.165 (-84.36%) [ +0.05% +0.19% +0.00% / -84.28% -84.34% -84.36%] index_select strided 5 : Elapsed 1.354 ms (135.350 ms / 100) BEST 135.842 -> 23.435 (-82.75%) [ +0.00% +0.17% +0.22% / -82.60% -82.74% -82.75%] index_select strided 7 : Elapsed 1.358 ms (135.842 ms / 100) BEST 135.876 -> 23.413 (-82.77%) [ +0.00% +0.13% +0.23% / -82.77% -82.70% -82.71%] index_select strided 8 : Elapsed 1.359 ms (135.876 ms / 100) BEST 135.494 -> 22.982 (-83.04%) [ +0.05% +0.19% +0.00% / -83.04% -82.94% -82.94%] index_select random : Elapsed 1.356 ms (135.560 ms / 100) BEST 132.902 -> 22.303 (-83.22%) [ +0.16% +0.00% +0.14% / -83.08% -83.22% -83.22%] index_select random_sorted : Elapsed 1.331 ms (133.110 ms / 100) B = [250, 150, 50] (stride (1, 250, 37500)) A = [250, 150, 15] (stride (2250, 1, 150)) dim = 2 13.443 -> 13.423 ( -0.15%) [ +0.42% +0.00% +0.25% / -0.03% -0.15% +0.21%] index_add_ linear : Elapsed 0.135 ms (13.499 ms / 100) 10.879 -> 10.800 ( -0.73%) [ +0.25% +0.63% +0.00% / +0.20% -0.73% -0.66%] index_copy_ linear : Elapsed 0.109 ms (10.906 ms / 100) 13.401 -> 13.339 ( -0.46%) [ +0.00% +0.24% +0.08% / +0.30% -0.17% -0.46%] index_add_ reverse : Elapsed 0.134 ms (13.401 ms / 100) 10.857 -> 10.747 ( -1.01%) [ +0.38% +1.02% +0.00% / +0.05% -0.95% -1.01%] index_copy_ reverse : Elapsed 0.109 ms (10.898 ms / 100) 13.418 -> 13.476 ( +0.43%) [ +0.00% +0.13% +0.22% / +0.43% +0.62% +1.35%] index_add_ spread : Elapsed 0.134 ms (13.418 ms / 100) 10.822 -> 10.728 ( -0.87%) [ +0.00% +0.11% +0.55% / -0.87% -0.84% -0.66%] index_copy_ spread : Elapsed 0.108 ms (10.822 ms / 100) 13.597 -> 13.574 ( -0.17%) [ +0.00% +0.32% +0.30% / -0.17% +0.71% +0.79%] index_add_ strided 3 : Elapsed 0.136 ms (13.597 ms / 100) 10.870 -> 10.873 ( +0.03%) [ +0.00% +0.30% +0.40% / +0.03% +0.76% +0.89%] index_copy_ strided 3 : Elapsed 0.109 ms (10.870 ms / 100) 13.479 -> 13.404 ( -0.56%) [ +0.38% +0.67% +0.00% / +0.34% -0.56% -0.44%] index_add_ strided 7 : Elapsed 0.135 ms (13.530 ms / 100) 10.955 -> 10.836 ( -1.09%) [ +0.16% +0.00% +0.34% / -0.17% -0.12% -1.09%] index_copy_ strided 7 : Elapsed 0.110 ms (10.973 ms / 100) 13.331 -> 13.367 ( +0.27%) [ +0.00% +0.29% +0.18% / +0.27% +1.31% +1.56%] index_add_ perm : Elapsed 0.133 ms (13.331 ms / 100) 10.844 -> 10.755 ( -0.82%) [ +0.00% +0.22% +0.05% / -0.30% -0.53% -0.82%] index_copy_ perm : Elapsed 0.108 ms (10.844 ms / 100) 13.401 -> 13.401 ( +0.00%) [ +0.00% +0.16% +0.01% / +0.00% +0.51% +0.19%] index_add_ perm_sorted : Elapsed 0.134 ms (13.401 ms / 100) 10.928 -> 10.752 ( -1.61%) [ +0.00% +0.08% +0.02% / -0.50% -1.61% -1.24%] index_copy_ perm_sorted : Elapsed 0.109 ms (10.928 ms / 100) 54.882 -> 54.059 ( -1.50%) [ +0.00% +0.15% +0.32% / -1.50% +0.47% +0.25%] index_select const : Elapsed 0.549 ms (54.882 ms / 100) 56.658 -> 55.245 ( -2.49%) [ +0.00% +1.56% +0.31% / +0.41% -2.49% -1.18%] index_select wrap : Elapsed 0.567 ms (56.658 ms / 100) 57.353 -> 57.363 ( +0.02%) [ +1.33% +2.24% +0.00% / +0.02% +10.67% +11.23%] index_select linear : Elapsed 0.581 ms (58.116 ms / 100) 51.382 -> 52.949 ( +3.05%) [ +2.40% +5.50% +0.00% / +3.05% +11.18% +10.83%] index_select reverse : Elapsed 0.526 ms (52.616 ms / 100) 53.468 -> 53.790 ( +0.60%) [ +0.00% +0.94% +1.24% / +1.19% +2.49% +0.60%] index_select skip64 : Elapsed 0.535 ms (53.468 ms / 100) 54.003 -> 54.213 ( +0.39%) [ +0.00% +1.49% +2.83% / +2.92% +1.19% +0.39%] index_select skip256 : Elapsed 0.540 ms (54.003 ms / 100) 53.665 -> 52.642 ( -1.91%) [ +0.02% +0.00% +0.81% / -0.36% -1.91% -0.44%] index_select spread : Elapsed 0.537 ms (53.677 ms / 100) 56.173 -> 56.107 ( -0.12%) [ +0.20% +0.70% +0.00% / -0.12% +10.49% +9.94%] index_select strided 3 : Elapsed 0.563 ms (56.288 ms / 100) 59.961 -> 58.754 ( -2.01%) [ +3.10% +2.74% +0.00% / +2.10% -2.01% -1.17%] index_select strided 5 : Elapsed 0.618 ms (61.821 ms / 100) 56.633 -> 56.645 ( +0.02%) [ +0.20% +0.00% +0.45% / +0.90% +0.76% +0.02%] index_select strided 7 : Elapsed 0.567 ms (56.746 ms / 100) 56.052 -> 56.146 ( +0.17%) [ +1.18% +2.26% +0.00% / +0.17% +2.73% +1.35%] index_select strided 8 : Elapsed 0.567 ms (56.713 ms / 100) 56.364 -> 56.561 ( +0.35%) [ +0.00% +2.03% +1.02% / +1.96% +0.35% +1.68%] index_select random : Elapsed 0.564 ms (56.364 ms / 100) 56.731 -> 55.958 ( -1.36%) [ +0.00% +0.56% +0.16% / -0.22% -1.36% +0.47%] index_select random_sorted : Elapsed 0.567 ms (56.731 ms / 100) out_shape = [150, 50, 250] in_shape = [15, 50, 250] idx_dim = 0 B = [150, 50, 250] (stride (12500, 250, 1)) dim = 0 fill_cnt = 15 2.062 -> 2.066 ( +0.19%) [ +0.19% +0.05% +0.00% / +0.19% +0.48% +1.26%] index_fill_ const : Elapsed 0.021 ms (2.066 ms / 100) 2.089 -> 2.092 ( +0.14%) [ +0.00% +0.14% +0.00% / +0.14% +0.57% +0.72%] index_fill_ linear : Elapsed 0.021 ms (2.089 ms / 100) 2.101 -> 2.110 ( +0.43%) [ +0.14% +0.67% +0.00% / +0.43% +0.86% +0.76%] index_fill_ reverse : Elapsed 0.021 ms (2.104 ms / 100) 2.063 -> 2.065 ( +0.10%) [ +0.19% +0.00% +0.00% / +0.10% +0.78% +0.48%] index_fill_ skip64 : Elapsed 0.021 ms (2.067 ms / 100) 2.062 -> 2.064 ( +0.10%) [ +0.53% +0.00% +0.00% / +0.10% +0.73% +0.82%] index_fill_ skip256 : Elapsed 0.021 ms (2.073 ms / 100) 2.093 -> 2.098 ( +0.24%) [ +0.48% +0.29% +0.00% / +0.24% +0.43% +0.48%] index_fill_ spread : Elapsed 0.021 ms (2.103 ms / 100) 2.087 -> 2.087 ( +0.00%) [ +0.05% +1.68% +0.00% / +0.00% +0.57% +0.67%] index_fill_ strided 3 : Elapsed 0.021 ms (2.088 ms / 100) 2.091 -> 2.098 ( +0.33%) [ +0.14% +0.29% +0.00% / +0.33% +0.43% +0.48%] index_fill_ strided 5 : Elapsed 0.021 ms (2.094 ms / 100) 2.091 -> 2.097 ( +0.29%) [ +0.00% +0.14% +0.43% / +0.29% +0.38% +0.38%] index_fill_ strided 7 : Elapsed 0.021 ms (2.091 ms / 100) 2.125 -> 2.121 ( -0.19%) [ +0.00% +0.05% +0.19% / +0.05% -0.19% -0.14%] index_fill_ strided 8 : Elapsed 0.021 ms (2.125 ms / 100) 2.096 -> 2.098 ( +0.10%) [ +0.00% +0.24% +0.19% / +0.10% +0.48% +0.86%] index_fill_ strided 16 : Elapsed 0.021 ms (2.096 ms / 100) 2.093 -> 2.098 ( +0.24%) [ +0.10% +0.38% +0.00% / +0.24% +0.43% +0.81%] index_fill_ strided 64 : Elapsed 0.021 ms (2.095 ms / 100) 2.070 -> 2.072 ( +0.10%) [ +0.00% +0.14% +0.34% / +0.10% +0.87% +0.63%] index_fill_ strided 100 : Elapsed 0.021 ms (2.070 ms / 100) 2.098 -> 2.100 ( +0.10%) [ +0.05% +0.24% +0.00% / +0.10% +0.24% +0.24%] index_fill_ random : Elapsed 0.021 ms (2.099 ms / 100) 2.096 -> 2.097 ( +0.05%) [ +0.19% +0.29% +0.00% / +0.05% +0.14% +0.48%] index_fill_ random_sorted : Elapsed 0.021 ms (2.100 ms / 100) 2.096 -> 2.099 ( +0.14%) [ +0.00% +0.14% +0.05% / +0.14% +0.38% +0.62%] index_fill_ perm : Elapsed 0.021 ms (2.096 ms / 100) 2.100 -> 2.100 ( +0.00%) [ +0.05% +0.24% +0.00% / +0.00% +0.43% +0.33%] index_fill_ perm_sorted : Elapsed 0.021 ms (2.101 ms / 100) B = [150, 50, 250] (stride (50, 1, 7500)) dim = 0 fill_cnt = 15 3.197 -> 3.200 ( +0.09%) [ +0.00% +0.00% +0.06% / +0.09% +10.73% +10.73%] index_fill_ const : Elapsed 0.032 ms (3.197 ms / 100) 3.084 -> 3.083 ( -0.03%) [ +0.00% +0.16% +0.03% / +0.10% +0.45% -0.03%] index_fill_ linear : Elapsed 0.031 ms (3.084 ms / 100) 3.070 -> 3.068 ( -0.07%) [ +0.36% +0.26% +0.00% / +0.33% -0.07% -0.03%] index_fill_ reverse : Elapsed 0.031 ms (3.081 ms / 100) 3.196 -> 3.197 ( +0.03%) [ +0.00% +0.13% +0.09% / +0.03% +10.79% +10.76%] index_fill_ skip64 : Elapsed 0.032 ms (3.196 ms / 100) 3.194 -> 3.199 ( +0.16%) [ +0.19% +0.00% +0.19% / +0.16% +10.83% +10.86%] index_fill_ skip256 : Elapsed 0.032 ms (3.200 ms / 100) 3.042 -> 3.041 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +4.11% +4.11%] index_fill_ spread : Elapsed 0.030 ms (3.043 ms / 100) 3.018 -> 3.019 ( +0.03%) [ +0.07% +0.13% +0.00% / +0.03% +1.76% +1.59%] index_fill_ strided 3 : Elapsed 0.030 ms (3.020 ms / 100) 3.011 -> 3.015 ( +0.13%) [ +0.13% +0.10% +0.00% / +0.13% +1.49% +1.30%] index_fill_ strided 5 : Elapsed 0.030 ms (3.015 ms / 100) 3.096 -> 3.103 ( +0.23%) [ +0.10% +0.00% +0.13% / +0.23% +1.45% +1.39%] index_fill_ strided 7 : Elapsed 0.031 ms (3.099 ms / 100) 3.170 -> 3.041 ( -4.07%) [ +0.00% +0.06% +0.00% / +0.19% -3.85% -4.07%] index_fill_ strided 8 : Elapsed 0.032 ms (3.170 ms / 100) 3.066 -> 3.080 ( +0.46%) [ +0.00% +0.36% +0.55% / +0.46% +1.17% +0.98%] index_fill_ strided 16 : Elapsed 0.031 ms (3.066 ms / 100) 3.065 -> 3.042 ( -0.75%) [ +0.00% +0.10% +0.03% / +0.26% -0.72% -0.75%] index_fill_ strided 64 : Elapsed 0.031 ms (3.065 ms / 100) 3.070 -> 3.073 ( +0.10%) [ +0.20% +0.10% +0.00% / +0.10% +7.04% +7.04%] index_fill_ strided 100 : Elapsed 0.031 ms (3.076 ms / 100) 3.021 -> 3.027 ( +0.20%) [ +0.33% +0.26% +0.00% / +0.20% +1.13% +1.06%] index_fill_ random : Elapsed 0.030 ms (3.031 ms / 100) 3.038 -> 3.005 ( -1.09%) [ +0.36% +0.16% +0.00% / +0.10% -1.05% -1.09%] index_fill_ random_sorted : Elapsed 0.030 ms (3.049 ms / 100) 3.011 -> 3.011 ( +0.00%) [ +0.13% +0.00% +0.27% / +0.00% +1.39% +1.13%] index_fill_ perm : Elapsed 0.030 ms (3.015 ms / 100) 3.021 -> 3.021 ( +0.00%) [ +0.00% +0.07% +0.07% / +0.00% +0.99% +0.83%] index_fill_ perm_sorted : Elapsed 0.030 ms (3.021 ms / 100) out_shape = [15, 150, 250] in_shape = [15, 50, 250] idx_dim = 1 B = [15, 150, 250] (stride (250, 3750, 1)) dim = 1 fill_cnt = 50 2.938 -> 2.931 ( -0.24%) [ +0.03% +0.10% +0.00% / -0.24% +0.07% +0.14%] index_fill_ const : Elapsed 0.029 ms (2.939 ms / 100) 3.045 -> 3.030 ( -0.49%) [ +0.00% +0.07% +0.13% / -0.49% -0.20% -0.33%] index_fill_ linear : Elapsed 0.030 ms (3.045 ms / 100) 3.027 -> 3.009 ( -0.59%) [ +0.10% +0.00% +0.03% / -0.59% -0.40% -0.43%] index_fill_ reverse : Elapsed 0.030 ms (3.030 ms / 100) 2.950 -> 2.928 ( -0.75%) [ +0.24% +0.00% +0.00% / -0.75% -0.54% -0.64%] index_fill_ skip64 : Elapsed 0.030 ms (2.957 ms / 100) 2.953 -> 2.932 ( -0.71%) [ +0.07% +0.10% +0.00% / -0.68% -0.61% -0.71%] index_fill_ skip256 : Elapsed 0.030 ms (2.955 ms / 100) 3.021 -> 3.003 ( -0.60%) [ +0.56% +0.00% +0.33% / -0.60% -0.03% -0.26%] index_fill_ spread : Elapsed 0.030 ms (3.038 ms / 100) 3.021 -> 3.005 ( -0.53%) [ +0.10% +0.33% +0.00% / -0.53% -0.10% +0.53%] index_fill_ strided 3 : Elapsed 0.030 ms (3.024 ms / 100) 2.998 -> 2.986 ( -0.40%) [ +0.13% +0.30% +0.00% / -0.20% -0.40% -0.27%] index_fill_ strided 5 : Elapsed 0.030 ms (3.002 ms / 100) 3.029 -> 3.006 ( -0.76%) [ +0.53% +0.00% +0.03% / -0.76% -0.43% -0.46%] index_fill_ strided 7 : Elapsed 0.030 ms (3.045 ms / 100) 3.021 -> 3.005 ( -0.53%) [ +0.30% +0.46% +0.00% / -0.53% -0.10% -0.23%] index_fill_ strided 8 : Elapsed 0.030 ms (3.030 ms / 100) 3.013 -> 3.012 ( -0.03%) [ +0.00% +0.33% +0.40% / +0.20% -0.03% +0.17%] index_fill_ strided 16 : Elapsed 0.030 ms (3.013 ms / 100) 3.025 -> 3.010 ( -0.50%) [ +0.03% +0.00% +0.07% / -0.50% -0.03% -0.30%] index_fill_ strided 64 : Elapsed 0.030 ms (3.026 ms / 100) 2.955 -> 2.935 ( -0.68%) [ +0.14% +0.00% +0.03% / -0.68% -0.20% -0.51%] index_fill_ strided 100 : Elapsed 0.030 ms (2.959 ms / 100) 3.026 -> 3.007 ( -0.63%) [ +0.00% +0.43% +0.20% / -0.36% -0.63% -0.63%] index_fill_ random : Elapsed 0.030 ms (3.026 ms / 100) 3.020 -> 3.003 ( -0.56%) [ +0.36% +0.17% +0.00% / -0.56% -0.53% -0.53%] index_fill_ random_sorted : Elapsed 0.030 ms (3.031 ms / 100) 3.029 -> 3.009 ( -0.66%) [ +0.26% +0.00% +0.10% / -0.43% -0.63% -0.66%] index_fill_ perm : Elapsed 0.030 ms (3.037 ms / 100) 3.030 -> 3.006 ( -0.79%) [ +0.00% +0.50% +0.13% / -0.76% -0.69% -0.79%] index_fill_ perm_sorted : Elapsed 0.030 ms (3.030 ms / 100) B = [15, 150, 250] (stride (1, 3750, 15)) A = [15, 50, 250] (stride (1, 3750, 15)) dim = 1 7.305 -> 7.275 ( -0.41%) [ +0.33% +0.27% +0.00% / -0.30% -0.41% -0.15%] index_add_ linear : Elapsed 0.073 ms (7.329 ms / 100) 6.956 -> 6.922 ( -0.49%) [ +0.04% +0.09% +0.00% / -0.35% -0.35% -0.49%] index_copy_ linear : Elapsed 0.070 ms (6.959 ms / 100) 7.297 -> 7.190 ( -1.47%) [ +0.26% +0.05% +0.00% / +0.07% -1.47% -1.40%] index_add_ reverse : Elapsed 0.073 ms (7.316 ms / 100) 6.973 -> 6.860 ( -1.62%) [ +0.19% +0.22% +0.00% / -0.26% -1.22% -1.62%] index_copy_ reverse : Elapsed 0.070 ms (6.986 ms / 100) 7.346 -> 7.209 ( -1.86%) [ +0.00% +0.31% +0.07% / +0.12% -1.86% -1.77%] index_add_ spread : Elapsed 0.073 ms (7.346 ms / 100) 7.083 -> 6.925 ( -2.23%) [ +0.00% +0.17% +0.14% / -0.17% -2.23% -2.09%] index_copy_ spread : Elapsed 0.071 ms (7.083 ms / 100) 7.426 -> 7.377 ( -0.66%) [ +0.00% +0.03% +0.20% / +0.40% -0.62% -0.66%] index_add_ strided 7 : Elapsed 0.074 ms (7.426 ms / 100) 7.069 -> 7.036 ( -0.47%) [ +0.00% +0.21% +0.01% / -0.06% +0.13% -0.47%] index_copy_ strided 7 : Elapsed 0.071 ms (7.069 ms / 100) 7.416 -> 7.192 ( -3.02%) [ +0.12% +0.00% +0.01% / +0.08% -3.02% -3.02%] index_add_ perm : Elapsed 0.074 ms (7.425 ms / 100) 7.022 -> 6.824 ( -2.82%) [ +0.37% +0.95% +0.00% / +0.00% -2.08% -2.82%] index_copy_ perm : Elapsed 0.070 ms (7.048 ms / 100) 7.293 -> 7.225 ( -0.93%) [ +0.29% +0.00% +0.40% / -0.01% -0.92% -0.93%] index_add_ perm_sorted : Elapsed 0.073 ms (7.314 ms / 100) 6.982 -> 6.892 ( -1.29%) [ +0.00% +0.27% +0.06% / -1.29% -1.26% -1.05%] index_copy_ perm_sorted : Elapsed 0.070 ms (6.982 ms / 100) 9.908 -> 9.876 ( -0.32%) [ +0.00% +0.53% +0.99% / -0.32% +0.55% +0.09%] index_select const : Elapsed 0.099 ms (9.908 ms / 100) 15.086 -> 15.142 ( +0.37%) [ +1.95% +0.00% +0.34% / +0.37% +3.81% +4.60%] index_select wrap : Elapsed 0.154 ms (15.380 ms / 100) 10.951 -> 10.936 ( -0.14%) [ +0.05% +0.00% +0.02% / -0.14% +1.66% +1.25%] index_select linear : Elapsed 0.110 ms (10.957 ms / 100) 11.386 -> 11.200 ( -1.63%) [ +1.42% +0.00% +0.14% / -1.36% -0.34% -1.63%] index_select reverse : Elapsed 0.115 ms (11.548 ms / 100) 9.895 -> 9.845 ( -0.51%) [ +0.00% +0.98% +0.86% / +0.85% -0.34% -0.51%] index_select skip64 : Elapsed 0.099 ms (9.895 ms / 100) 9.965 -> 9.872 ( -0.93%) [ +0.23% +0.01% +0.00% / +0.42% -0.93% -0.82%] index_select skip256 : Elapsed 0.100 ms (9.988 ms / 100) 10.610 -> 10.497 ( -1.07%) [ +0.74% +0.41% +0.00% / +0.62% -0.61% -1.07%] index_select spread : Elapsed 0.107 ms (10.688 ms / 100) 15.073 -> 15.398 ( +2.16%) [ +2.08% +0.66% +0.00% / +2.16% +7.06% +6.68%] index_select strided 3 : Elapsed 0.154 ms (15.387 ms / 100) Good 12.367 -> 10.754 (-13.04%) [ +0.06% +0.00% +0.55% / +1.86% -11.64% -13.04%] index_select strided 5 : Elapsed 0.124 ms (12.374 ms / 100) 14.760 -> 14.796 ( +0.24%) [ +0.28% +2.36% +0.00% / +0.24% +3.92% +4.56%] index_select strided 7 : Elapsed 0.148 ms (14.801 ms / 100) 13.230 -> 13.173 ( -0.43%) [ +1.12% +1.21% +0.00% / -0.43% +8.30% +6.95%] index_select strided 8 : Elapsed 0.134 ms (13.378 ms / 100) 13.304 -> 13.229 ( -0.56%) [ +0.23% +0.56% +0.00% / -0.56% +6.71% +6.48%] index_select strided 16 : Elapsed 0.133 ms (13.335 ms / 100) 13.340 -> 13.376 ( +0.27%) [ +0.53% +2.18% +0.00% / +0.27% +4.36% +4.42%] index_select random : Elapsed 0.134 ms (13.411 ms / 100) 10.538 -> 10.512 ( -0.25%) [ +1.31% +1.56% +0.00% / -0.25% +0.74% -0.01%] index_select random_sorted : Elapsed 0.107 ms (10.676 ms / 100) out_shape = [15, 50, 150] in_shape = [15, 50, 250] idx_dim = 2 B = [15, 50, 150] (stride (7500, 1, 50)) A = [15, 50, 250] (stride (12500, 1, 50)) dim = 2 5.364 -> 5.370 ( +0.11%) [ +0.07% +0.00% +0.41% / +0.11% +0.35% +0.26%] index_select const : Elapsed 0.054 ms (5.368 ms / 100) 5.767 -> 5.747 ( -0.35%) [ +0.16% +0.00% +0.24% / +0.14% -0.14% -0.35%] index_select wrap : Elapsed 0.058 ms (5.776 ms / 100) 5.741 -> 5.742 ( +0.02%) [ +0.17% +0.00% +0.30% / +0.17% +0.16% +0.02%] index_select linear : Elapsed 0.058 ms (5.751 ms / 100) 5.738 -> 5.736 ( -0.03%) [ +0.17% +0.23% +0.00% / -0.03% +0.73% +0.94%] index_select reverse : Elapsed 0.057 ms (5.748 ms / 100) 5.377 -> 5.380 ( +0.06%) [ +0.11% +0.00% +0.02% / +0.09% +0.22% +0.06%] index_select skip64 : Elapsed 0.054 ms (5.383 ms / 100) 5.358 -> 5.365 ( +0.13%) [ +0.13% +0.00% +0.45% / +0.13% +0.50% +0.54%] index_select skip256 : Elapsed 0.054 ms (5.365 ms / 100) 5.768 -> 5.778 ( +0.17%) [ +0.00% +0.14% +0.12% / +0.23% +0.24% +0.17%] index_select spread : Elapsed 0.058 ms (5.768 ms / 100) 5.783 -> 5.794 ( +0.19%) [ +0.16% +0.00% +0.09% / +0.19% +0.66% +0.33%] index_select strided 3 : Elapsed 0.058 ms (5.792 ms / 100) 5.678 -> 5.683 ( +0.09%) [ +0.02% +0.02% +0.00% / +0.09% +0.65% +0.93%] index_select strided 5 : Elapsed 0.057 ms (5.679 ms / 100) 5.778 -> 5.788 ( +0.17%) [ +0.29% +0.00% +0.26% / +0.35% +0.17% +0.17%] index_select strided 7 : Elapsed 0.058 ms (5.795 ms / 100) 5.762 -> 5.772 ( +0.17%) [ +0.30% +0.00% +0.24% / +0.17% +0.83% +0.73%] index_select strided 8 : Elapsed 0.058 ms (5.779 ms / 100) 5.786 -> 5.773 ( -0.22%) [ +0.02% +0.00% +0.12% / +0.00% -0.07% -0.22%] index_select strided 16 : Elapsed 0.058 ms (5.787 ms / 100) 5.774 -> 5.785 ( +0.19%) [ +0.07% +0.00% +0.09% / +0.52% +0.31% +0.19%] index_select strided 64 : Elapsed 0.058 ms (5.778 ms / 100) 5.383 -> 5.387 ( +0.07%) [ +0.04% +0.06% +0.00% / +0.07% +0.82% +0.78%] index_select strided 100 : Elapsed 0.054 ms (5.385 ms / 100) 5.728 -> 5.723 ( -0.09%) [ +0.00% +0.16% +0.16% / -0.09% +0.33% +0.37%] index_select random : Elapsed 0.057 ms (5.728 ms / 100) 5.699 -> 5.706 ( +0.12%) [ +0.16% +0.00% +0.14% / +0.12% +0.16% +0.25%] index_select random_sorted : Elapsed 0.057 ms (5.708 ms / 100) 5.739 -> 5.737 ( -0.03%) [ +0.35% +0.00% +0.26% / -0.03% +1.03% +1.17%] index_select perm : Elapsed 0.058 ms (5.759 ms / 100) 5.741 -> 5.735 ( -0.10%) [ +0.00% +0.02% +0.03% / -0.10% +0.09% +0.16%] index_select perm_sorted : Elapsed 0.057 ms (5.741 ms / 100) B = [15, 50, 150] (stride (7500, 1, 50)) A = [15, 50, 250] (stride (50, 1, 750)) dim = 2 8.711 -> 8.713 ( +0.02%) [ +0.00% +0.08% +0.01% / +0.02% +0.29% +0.11%] index_select const : Elapsed 0.087 ms (8.711 ms / 100) 9.032 -> 9.046 ( +0.16%) [ +0.00% +0.02% +0.14% / +0.16% +0.58% +0.32%] index_select wrap : Elapsed 0.090 ms (9.032 ms / 100) 9.036 -> 9.033 ( -0.03%) [ +0.12% +0.00% +0.03% / -0.03% +0.35% +0.38%] index_select linear : Elapsed 0.090 ms (9.047 ms / 100) 9.090 -> 9.093 ( +0.03%) [ +0.24% +0.00% +0.01% / +0.03% +0.25% +0.17%] index_select reverse : Elapsed 0.091 ms (9.112 ms / 100) 8.720 -> 8.718 ( -0.02%) [ +0.06% +0.07% +0.00% / -0.02% +0.28% +0.05%] index_select skip64 : Elapsed 0.087 ms (8.725 ms / 100) 8.698 -> 8.710 ( +0.14%) [ +0.00% +0.11% +0.09% / +0.14% +0.31% +0.23%] index_select skip256 : Elapsed 0.087 ms (8.698 ms / 100) 9.085 -> 9.104 ( +0.21%) [ +0.03% +0.08% +0.00% / +0.21% +0.22% +0.26%] index_select spread : Elapsed 0.091 ms (9.088 ms / 100) 9.125 -> 9.100 ( -0.27%) [ +0.01% +0.00% +0.09% / -0.11% -0.03% -0.27%] index_select strided 3 : Elapsed 0.091 ms (9.126 ms / 100) 9.040 -> 8.997 ( -0.48%) [ +0.09% +0.17% +0.00% / +0.10% -0.25% -0.48%] index_select strided 5 : Elapsed 0.090 ms (9.048 ms / 100) 9.116 -> 9.122 ( +0.07%) [ +0.29% +0.02% +0.00% / +0.26% +0.12% +0.07%] index_select strided 7 : Elapsed 0.091 ms (9.142 ms / 100) 9.094 -> 9.089 ( -0.05%) [ +0.00% +0.14% +0.00% / -0.05% +0.31% +0.32%] index_select strided 8 : Elapsed 0.091 ms (9.094 ms / 100) 9.156 -> 9.153 ( -0.03%) [ +0.00% +0.13% +0.01% / -0.03% +0.39% +0.37%] index_select strided 16 : Elapsed 0.092 ms (9.156 ms / 100) 9.082 -> 9.084 ( +0.02%) [ +0.00% +0.22% +0.14% / +0.02% +0.88% +0.62%] index_select strided 64 : Elapsed 0.091 ms (9.082 ms / 100) 8.726 -> 8.745 ( +0.22%) [ +0.00% +0.07% +0.15% / +0.22% +0.33% +0.34%] index_select strided 100 : Elapsed 0.087 ms (8.726 ms / 100) 9.065 -> 9.087 ( +0.24%) [ +0.00% +0.15% +0.14% / +0.24% +0.34% +0.32%] index_select random : Elapsed 0.091 ms (9.065 ms / 100) 9.028 -> 9.025 ( -0.03%) [ +0.21% +0.21% +0.00% / -0.03% +0.27% +0.12%] index_select random_sorted : Elapsed 0.090 ms (9.047 ms / 100) 9.107 -> 9.116 ( +0.10%) [ +0.00% +0.16% +0.12% / +0.10% +0.40% +0.44%] index_select perm : Elapsed 0.091 ms (9.107 ms / 100) 9.097 -> 9.104 ( +0.08%) [ +0.02% +0.00% +0.02% / +0.08% +0.27% +0.29%] index_select perm_sorted : Elapsed 0.091 ms (9.099 ms / 100) B = [15, 50, 150] (stride (150, 2250, 1)) A = [15, 50, 250] (stride (1, 15, 750)) dim = 2 good 5.755 -> 5.360 ( -6.86%) [ +0.02% +0.02% +0.00% / -6.86% -6.38% -6.64%] index_select const : Elapsed 0.058 ms (5.756 ms / 100) 5.998 -> 6.230 ( +3.87%) [ +0.00% +0.07% +0.02% / +4.12% +3.88% +3.87%] index_select wrap : Elapsed 0.060 ms (5.998 ms / 100) 5.965 -> 6.224 ( +4.34%) [ +0.00% +0.17% +0.12% / +4.54% +4.34% +4.39%] index_select linear : Elapsed 0.060 ms (5.965 ms / 100) 5.970 -> 6.215 ( +4.10%) [ +0.03% +0.00% +0.10% / +4.49% +5.13% +4.10%] index_select reverse : Elapsed 0.060 ms (5.972 ms / 100) good 5.711 -> 5.359 ( -6.16%) [ +0.35% +0.00% +0.28% / -6.09% -6.16% -5.90%] index_select skip64 : Elapsed 0.057 ms (5.731 ms / 100) good 5.757 -> 5.360 ( -6.90%) [ +0.00% +0.00% +0.05% / -6.90% -6.77% -6.65%] index_select skip256 : Elapsed 0.058 ms (5.757 ms / 100) bad 5.955 -> 6.294 ( +5.69%) [ +0.34% +0.45% +0.00% / +5.69% +6.18% +5.83%] index_select spread : Elapsed 0.060 ms (5.975 ms / 100) 5.942 -> 6.238 ( +4.98%) [ +0.24% +0.13% +0.00% / +4.98% +5.99% +6.01%] index_select strided 3 : Elapsed 0.060 ms (5.956 ms / 100) 5.793 -> 5.671 ( -2.11%) [ +0.00% +0.02% +0.35% / -2.11% -0.64% -0.60%] index_select strided 5 : Elapsed 0.058 ms (5.793 ms / 100) bad 5.984 -> 6.302 ( +5.31%) [ +0.00% +0.02% +0.22% / +5.51% +5.31% +5.58%] index_select strided 7 : Elapsed 0.060 ms (5.984 ms / 100) 5.971 -> 6.137 ( +2.78%) [ +0.10% +0.00% +0.23% / +2.78% +2.98% +3.17%] index_select strided 8 : Elapsed 0.060 ms (5.977 ms / 100) 5.966 -> 6.125 ( +2.67%) [ +0.12% +0.00% +0.10% / +2.67% +2.98% +3.02%] index_select strided 16 : Elapsed 0.060 ms (5.973 ms / 100) 5.974 -> 6.139 ( +2.76%) [ +0.10% +0.00% +0.20% / +2.85% +2.76% +2.95%] index_select strided 64 : Elapsed 0.060 ms (5.980 ms / 100) good 5.755 -> 5.399 ( -6.19%) [ +0.02% +0.07% +0.00% / -5.80% -6.10% -6.19%] index_select strided 100 : Elapsed 0.058 ms (5.756 ms / 100) 5.892 -> 6.047 ( +2.63%) [ +0.25% +0.37% +0.00% / +2.63% +3.05% +2.92%] index_select random : Elapsed 0.059 ms (5.907 ms / 100) 5.892 -> 6.060 ( +2.85%) [ +0.10% +0.08% +0.00% / +2.85% +3.34% +3.16%] index_select random_sorted : Elapsed 0.059 ms (5.898 ms / 100) 5.999 -> 6.267 ( +4.47%) [ +0.07% +0.00% +0.00% / +5.37% +4.47% +4.58%] index_select perm : Elapsed 0.060 ms (6.003 ms / 100) 5.979 -> 6.259 ( +4.68%) [ +0.05% +0.00% +0.25% / +5.70% +4.85% +4.68%] index_select perm_sorted : Elapsed 0.060 ms (5.982 ms / 100) B = [15, 50, 150] (stride (1, 2250, 15)) A = [15, 50, 250] (stride (250, 3750, 1)) dim = 2 5.808 -> 5.808 ( +0.00%) [ +0.09% +0.00% +0.00% / +0.00% +0.31% +0.14%] index_select const : Elapsed 0.058 ms (5.813 ms / 100) 6.260 -> 6.263 ( +0.05%) [ +0.10% +0.10% +0.00% / +0.13% +0.22% +0.05%] index_select wrap : Elapsed 0.063 ms (6.266 ms / 100) 6.251 -> 6.246 ( -0.08%) [ +0.00% +0.22% +0.06% / -0.08% -0.02% +0.30%] index_select linear : Elapsed 0.063 ms (6.251 ms / 100) 6.238 -> 6.251 ( +0.21%) [ +0.24% +0.24% +0.00% / +0.29% +0.35% +0.21%] index_select reverse : Elapsed 0.063 ms (6.253 ms / 100) 5.872 -> 5.873 ( +0.02%) [ +0.00% +0.12% +0.07% / +0.02% +0.27% +0.53%] index_select skip64 : Elapsed 0.059 ms (5.872 ms / 100) 5.797 -> 5.794 ( -0.05%) [ +0.09% +0.10% +0.00% / -0.02% +0.12% -0.05%] index_select skip256 : Elapsed 0.058 ms (5.802 ms / 100) 6.545 -> 6.547 ( +0.03%) [ +0.00% +0.06% +0.03% / +0.08% +0.03% +0.34%] index_select spread : Elapsed 0.065 ms (6.545 ms / 100) 6.950 -> 6.945 ( -0.07%) [ +0.00% +0.16% +0.04% / +0.03% +0.17% -0.07%] index_select strided 3 : Elapsed 0.069 ms (6.950 ms / 100) 7.187 -> 7.189 ( +0.03%) [ +0.11% +0.17% +0.00% / +0.03% +0.63% +0.45%] index_select strided 5 : Elapsed 0.072 ms (7.195 ms / 100) 7.183 -> 7.172 ( -0.15%) [ +0.26% +0.24% +0.00% / -0.03% -0.15% +0.13%] index_select strided 7 : Elapsed 0.072 ms (7.202 ms / 100) 7.196 -> 7.205 ( +0.13%) [ +0.17% +0.14% +0.00% / +0.13% +0.38% +0.18%] index_select strided 8 : Elapsed 0.072 ms (7.208 ms / 100) 7.218 -> 7.212 ( -0.08%) [ +0.18% +0.14% +0.00% / +0.21% +0.18% -0.08%] index_select strided 16 : Elapsed 0.072 ms (7.231 ms / 100) 7.214 -> 7.200 ( -0.19%) [ +0.00% +0.12% +0.08% / +0.22% -0.19% +0.06%] index_select strided 64 : Elapsed 0.072 ms (7.214 ms / 100) 6.303 -> 6.286 ( -0.27%) [ +0.22% +0.00% +0.32% / -0.13% +0.00% -0.27%] index_select strided 100 : Elapsed 0.063 ms (6.317 ms / 100) 7.155 -> 7.132 ( -0.32%) [ +0.06% +0.13% +0.00% / -0.15% -0.32% -0.18%] index_select random : Elapsed 0.072 ms (7.159 ms / 100) 6.511 -> 6.514 ( +0.05%) [ +0.55% +0.12% +0.00% / +0.12% +0.35% +0.05%] index_select random_sorted : Elapsed 0.065 ms (6.547 ms / 100) 7.195 -> 7.179 ( -0.22%) [ +0.00% +0.14% +0.29% / +0.08% -0.22% -0.18%] index_select perm : Elapsed 0.072 ms (7.195 ms / 100) 6.545 -> 6.517 ( -0.43%) [ +0.00% +0.02% +0.09% / -0.43% -0.15% -0.12%] index_select perm_sorted : Elapsed 0.065 ms (6.545 ms / 100) B = [15, 50, 150] (stride (50, 1, 750)) A = [15, 50, 250] (stride (12500, 1, 50)) dim = 2 8.736 -> 8.725 ( -0.13%) [ +0.06% +0.00% +0.06% / -0.13% +0.37% +0.52%] index_select const : Elapsed 0.087 ms (8.741 ms / 100) 9.224 -> 9.225 ( +0.01%) [ +0.03% +0.05% +0.00% / +0.01% +0.30% +0.22%] index_select wrap : Elapsed 0.092 ms (9.227 ms / 100) 9.228 -> 9.228 ( +0.00%) [ +0.11% +0.00% +0.08% / +0.00% +0.26% +0.13%] index_select linear : Elapsed 0.092 ms (9.238 ms / 100) 9.223 -> 9.215 ( -0.09%) [ +0.00% +0.16% +0.13% / -0.09% +0.33% +0.30%] index_select reverse : Elapsed 0.092 ms (9.223 ms / 100) 8.738 -> 8.759 ( +0.24%) [ +0.00% +0.22% +0.16% / +0.24% +0.37% +0.33%] index_select skip64 : Elapsed 0.087 ms (8.738 ms / 100) 8.722 -> 8.731 ( +0.10%) [ +0.14% +0.05% +0.00% / +0.10% +0.53% +0.31%] index_select skip256 : Elapsed 0.087 ms (8.734 ms / 100) 9.272 -> 9.284 ( +0.13%) [ +0.04% +0.00% +0.27% / +0.13% +0.52% +0.39%] index_select spread : Elapsed 0.093 ms (9.276 ms / 100) 9.327 -> 9.316 ( -0.12%) [ +0.00% +0.04% +0.01% / -0.12% +0.16% +0.04%] index_select strided 3 : Elapsed 0.093 ms (9.327 ms / 100) 9.156 -> 9.146 ( -0.11%) [ +0.08% +0.00% +0.08% / -0.11% +0.24% +0.40%] index_select strided 5 : Elapsed 0.092 ms (9.163 ms / 100) 9.330 -> 9.340 ( +0.11%) [ +0.06% +0.00% +0.03% / +0.14% +0.13% +0.11%] index_select strided 7 : Elapsed 0.093 ms (9.336 ms / 100) 9.301 -> 9.309 ( +0.09%) [ +0.06% +0.00% +0.33% / +0.09% +0.41% +0.43%] index_select strided 8 : Elapsed 0.093 ms (9.307 ms / 100) 9.307 -> 9.307 ( +0.00%) [ +0.00% +0.00% +0.08% / +0.20% +0.42% +0.00%] index_select strided 16 : Elapsed 0.093 ms (9.307 ms / 100) 9.297 -> 9.303 ( +0.06%) [ +0.23% +0.00% +0.04% / +0.25% +0.13% +0.06%] index_select strided 64 : Elapsed 0.093 ms (9.318 ms / 100) 8.767 -> 8.769 ( +0.02%) [ +0.19% +0.02% +0.00% / +0.02% +0.37% +0.42%] index_select strided 100 : Elapsed 0.088 ms (8.784 ms / 100) 9.208 -> 9.217 ( +0.10%) [ +0.29% +0.13% +0.00% / +0.10% +0.29% +0.37%] index_select random : Elapsed 0.092 ms (9.235 ms / 100) 9.136 -> 9.124 ( -0.13%) [ +0.09% +0.10% +0.00% / +0.08% -0.13% +0.21%] index_select random_sorted : Elapsed 0.091 ms (9.144 ms / 100) 9.280 -> 9.283 ( +0.03%) [ +0.04% +0.11% +0.00% / +0.03% +0.55% +0.51%] index_select perm : Elapsed 0.093 ms (9.284 ms / 100) 9.247 -> 9.242 ( -0.05%) [ +0.08% +0.00% +0.34% / -0.05% +0.36% +0.40%] index_select perm_sorted : Elapsed 0.093 ms (9.254 ms / 100) B = [15, 50, 150] (stride (1, 15, 750)) A = [15, 50, 250] (stride (1, 3750, 15)) dim = 2 5.583 -> 5.583 ( +0.00%) [ +0.04% +0.00% +0.21% / +0.00% +0.90% +0.56%] index_select const : Elapsed 0.056 ms (5.585 ms / 100) 5.946 -> 5.939 ( -0.12%) [ +0.07% +0.00% +0.15% / -0.07% -0.12% +0.24%] index_select wrap : Elapsed 0.060 ms (5.950 ms / 100) 5.928 -> 5.937 ( +0.15%) [ +0.07% +0.19% +0.00% / +0.15% +0.44% +0.46%] index_select linear : Elapsed 0.059 ms (5.932 ms / 100) 5.943 -> 5.938 ( -0.08%) [ +0.03% +0.00% +0.02% / -0.08% +0.29% +0.30%] index_select reverse : Elapsed 0.059 ms (5.945 ms / 100) 5.589 -> 5.594 ( +0.09%) [ +0.09% +0.00% +0.00% / +0.09% +0.16% +0.21%] index_select skip64 : Elapsed 0.056 ms (5.594 ms / 100) 5.618 -> 5.617 ( -0.02%) [ +0.00% +0.09% +0.05% / -0.02% +0.04% +0.04%] index_select skip256 : Elapsed 0.056 ms (5.618 ms / 100) 6.102 -> 6.109 ( +0.11%) [ +0.00% +0.31% +0.41% / +0.11% +0.72% +0.72%] index_select spread : Elapsed 0.061 ms (6.102 ms / 100) 6.405 -> 6.407 ( +0.03%) [ +0.37% +0.00% +0.31% / +0.45% +0.06% +0.03%] index_select strided 3 : Elapsed 0.064 ms (6.429 ms / 100) 6.183 -> 6.193 ( +0.16%) [ +0.11% +0.00% +0.13% / +0.16% +1.08% +0.81%] index_select strided 5 : Elapsed 0.062 ms (6.190 ms / 100) 6.456 -> 6.469 ( +0.20%) [ +0.14% +0.43% +0.00% / +0.34% +0.39% +0.20%] index_select strided 7 : Elapsed 0.065 ms (6.465 ms / 100) 6.471 -> 6.467 ( -0.06%) [ +0.11% +0.00% +0.12% / -0.06% +0.22% +0.34%] index_select strided 8 : Elapsed 0.065 ms (6.478 ms / 100) 6.528 -> 6.494 ( -0.52%) [ +0.02% +0.00% +0.20% / -0.02% -0.15% -0.52%] index_select strided 16 : Elapsed 0.065 ms (6.529 ms / 100) 6.517 -> 6.511 ( -0.09%) [ +0.17% +0.00% +0.17% / +0.02% +0.02% -0.09%] index_select strided 64 : Elapsed 0.065 ms (6.528 ms / 100) 5.585 -> 5.600 ( +0.27%) [ +0.18% +0.04% +0.00% / +0.27% +0.59% +0.70%] index_select strided 100 : Elapsed 0.056 ms (5.595 ms / 100) 6.232 -> 6.225 ( -0.11%) [ +0.00% +0.03% +0.08% / +0.02% +0.42% -0.11%] index_select random : Elapsed 0.062 ms (6.232 ms / 100) 6.009 -> 6.012 ( +0.05%) [ +0.00% +0.15% +0.07% / +0.05% +0.15% +0.12%] index_select random_sorted : Elapsed 0.060 ms (6.009 ms / 100) 6.361 -> 6.373 ( +0.19%) [ +0.55% +0.00% +0.24% / +0.19% +0.50% +0.68%] index_select perm : Elapsed 0.064 ms (6.396 ms / 100) 6.061 -> 6.074 ( +0.21%) [ +0.00% +0.03% +0.21% / +0.21% +0.43% +0.28%] index_select perm_sorted : Elapsed 0.061 ms (6.061 ms / 100) out_shape = [150, 250, 50] in_shape = [15, 250, 50] idx_dim = 0 B = [150, 250, 50] (stride (1, 7500, 150)) A = [15, 250, 50] (stride (1, 750, 15)) dim = 0 26.952 -> 27.033 ( +0.30%) [ +0.33% +0.02% +0.00% / +0.30% +2.14% +2.07%] index_add_ linear : Elapsed 0.270 ms (27.042 ms / 100) 20.334 -> 20.354 ( +0.10%) [ +0.06% +0.00% +0.05% / +0.10% +1.00% +1.08%] index_copy_ linear : Elapsed 0.203 ms (20.346 ms / 100) 26.905 -> 26.933 ( +0.10%) [ +0.00% +0.31% +0.19% / +0.10% +2.45% +2.48%] index_add_ reverse : Elapsed 0.269 ms (26.905 ms / 100) 20.290 -> 20.277 ( -0.06%) [ +0.00% +0.02% +0.09% / -0.06% +1.15% +1.27%] index_copy_ reverse : Elapsed 0.203 ms (20.290 ms / 100) 30.721 -> 30.725 ( +0.01%) [ +0.00% +0.17% +0.13% / +0.01% +0.35% +0.23%] index_add_ spread : Elapsed 0.307 ms (30.721 ms / 100) 23.216 -> 23.241 ( +0.11%) [ +0.12% +0.00% +0.05% / +0.11% +0.28% +0.21%] index_copy_ spread : Elapsed 0.232 ms (23.245 ms / 100) 30.204 -> 30.215 ( +0.04%) [ +0.14% +0.11% +0.00% / +0.04% +0.27% +0.16%] index_add_ strided 7 : Elapsed 0.302 ms (30.246 ms / 100) 22.862 -> 22.814 ( -0.21%) [ +0.00% +0.06% +0.03% / +0.00% -0.03% -0.21%] index_copy_ strided 7 : Elapsed 0.229 ms (22.862 ms / 100) 31.509 -> 31.483 ( -0.08%) [ +0.10% +0.00% +0.07% / +0.03% -0.08% -0.01%] index_add_ perm : Elapsed 0.315 ms (31.539 ms / 100) 23.712 -> 23.658 ( -0.23%) [ +0.00% +0.09% +0.07% / +0.14% -0.17% -0.23%] index_copy_ perm : Elapsed 0.237 ms (23.712 ms / 100) 30.002 -> 29.894 ( -0.36%) [ +0.00% +0.02% +0.13% / +0.23% -0.36% -0.28%] index_add_ perm_sorted : Elapsed 0.300 ms (30.002 ms / 100) 22.582 -> 22.455 ( -0.56%) [ +0.03% +0.00% +0.01% / +0.09% -0.40% -0.56%] index_copy_ perm_sorted : Elapsed 0.226 ms (22.588 ms / 100) BEST 131.106 -> 14.639 (-88.83%) [ +0.07% +0.01% +0.00% / -88.83% -88.66% -88.66%] index_select const : Elapsed 1.312 ms (131.196 ms / 100) BEST 135.726 -> 18.564 (-86.32%) [ +0.00% +0.23% +0.06% / -86.14% -86.32% -86.32%] index_select wrap : Elapsed 1.357 ms (135.726 ms / 100) BEST 131.327 -> 16.071 (-87.76%) [ +0.03% +0.00% +0.12% / -87.69% -87.76% -87.75%] index_select linear : Elapsed 1.314 ms (131.360 ms / 100) BEST 131.454 -> 15.560 (-88.16%) [ +0.20% +0.00% +0.05% / -88.16% -88.16% -88.16%] index_select reverse : Elapsed 1.317 ms (131.712 ms / 100) BEST 130.539 -> 15.959 (-87.77%) [ +0.34% +0.00% +0.29% / -87.77% -87.77% -87.76%] index_select skip64 : Elapsed 1.310 ms (130.987 ms / 100) BEST 130.955 -> 15.401 (-88.24%) [ +0.09% +0.00% +0.06% / -88.24% -88.22% -88.23%] index_select skip256 : Elapsed 1.311 ms (131.076 ms / 100) BEST 131.689 -> 15.596 (-88.16%) [ +0.41% +0.00% +0.28% / -88.16% -88.10% -88.10%] index_select spread : Elapsed 1.322 ms (132.232 ms / 100) BEST 136.793 -> 15.177 (-88.91%) [ +0.00% +0.20% +0.39% / -88.91% -88.86% -88.85%] index_select strided 3 : Elapsed 1.368 ms (136.793 ms / 100) BEST 136.636 -> 15.858 (-88.39%) [ +0.08% +0.00% +0.16% / -88.39% -88.32% -88.32%] index_select strided 5 : Elapsed 1.368 ms (136.751 ms / 100) BEST 137.192 -> 15.209 (-88.91%) [ +0.08% +0.00% +0.08% / -88.89% -88.91% -88.91%] index_select strided 7 : Elapsed 1.373 ms (137.299 ms / 100) BEST 136.934 -> 15.889 (-88.40%) [ +0.00% +0.26% +0.28% / -88.20% -88.40% -88.40%] index_select strided 8 : Elapsed 1.369 ms (136.934 ms / 100) BEST 136.446 -> 15.356 (-88.75%) [ +0.16% +0.00% +0.23% / -88.75% -88.57% -88.57%] index_select random : Elapsed 1.367 ms (136.669 ms / 100) BEST 131.938 -> 15.656 (-88.13%) [ +0.15% +0.12% +0.00% / -88.00% -88.13% -88.13%] index_select random_sorted : Elapsed 1.321 ms (132.132 ms / 100) B = [150, 250, 50] (stride (250, 1, 37500)) dim = 0 fill_cnt = 15 3.566 -> 3.562 ( -0.11%) [ +0.14% +0.03% +0.00% / -0.11% +6.90% +6.81%] index_fill_ const : Elapsed 0.036 ms (3.571 ms / 100) 3.210 -> 3.208 ( -0.06%) [ +0.00% +0.03% +0.12% / -0.06% +3.08% +2.96%] index_fill_ linear : Elapsed 0.032 ms (3.210 ms / 100) 3.151 -> 3.150 ( -0.03%) [ +0.00% +0.25% +0.06% / -0.03% +4.70% +4.54%] index_fill_ reverse : Elapsed 0.032 ms (3.151 ms / 100) 3.566 -> 3.568 ( +0.06%) [ +0.00% +0.31% +0.14% / +0.06% +6.79% +6.73%] index_fill_ skip64 : Elapsed 0.036 ms (3.566 ms / 100) 3.564 -> 3.569 ( +0.14%) [ +0.00% +0.06% +0.06% / +0.14% +7.41% +7.27%] index_fill_ skip256 : Elapsed 0.036 ms (3.564 ms / 100) 3.154 -> 3.153 ( -0.03%) [ +0.03% +0.00% +0.13% / -0.03% +0.98% +0.82%] index_fill_ spread : Elapsed 0.032 ms (3.155 ms / 100) 3.136 -> 3.144 ( +0.26%) [ +0.00% +0.03% +0.10% / +0.26% +2.52% +2.77%] index_fill_ strided 3 : Elapsed 0.031 ms (3.136 ms / 100) 3.232 -> 3.166 ( -2.04%) [ +0.22% +0.00% +0.12% / -0.12% -1.95% -2.04%] index_fill_ strided 5 : Elapsed 0.032 ms (3.239 ms / 100) 3.078 -> 3.007 ( -2.31%) [ +0.19% +0.36% +0.00% / +0.42% -2.31% -1.85%] index_fill_ strided 7 : Elapsed 0.031 ms (3.084 ms / 100) 3.110 -> 3.092 ( -0.58%) [ +0.19% +0.16% +0.00% / +0.10% -0.51% -0.58%] index_fill_ strided 8 : Elapsed 0.031 ms (3.116 ms / 100) 3.444 -> 3.448 ( +0.12%) [ +0.15% +0.12% +0.00% / +0.12% +0.64% +0.81%] index_fill_ strided 16 : Elapsed 0.034 ms (3.449 ms / 100) 3.223 -> 3.219 ( -0.12%) [ +0.00% +0.47% +0.03% / -0.12% +3.20% +3.04%] index_fill_ strided 64 : Elapsed 0.032 ms (3.223 ms / 100) 3.361 -> 3.363 ( +0.06%) [ +0.00% +0.12% +0.15% / +0.06% +5.56% +5.36%] index_fill_ strided 100 : Elapsed 0.034 ms (3.361 ms / 100) 3.124 -> 3.129 ( +0.16%) [ +0.22% +0.00% +0.26% / +0.16% +2.59% +2.75%] index_fill_ random : Elapsed 0.031 ms (3.131 ms / 100) 3.218 -> 3.206 ( -0.37%) [ +0.16% +0.09% +0.00% / +0.12% -0.37% -0.37%] index_fill_ random_sorted : Elapsed 0.032 ms (3.223 ms / 100) 3.199 -> 3.206 ( +0.22%) [ +0.16% +0.19% +0.00% / +0.22% +1.31% +1.38%] index_fill_ perm : Elapsed 0.032 ms (3.204 ms / 100) 3.237 -> 3.184 ( -1.64%) [ +0.15% +0.06% +0.00% / +0.34% -1.51% -1.64%] index_fill_ perm_sorted : Elapsed 0.032 ms (3.242 ms / 100) B = [150, 250, 50] (stride (250, 1, 37500)) A = [15, 250, 50] (stride (12500, 50, 1)) dim = 0 8.667 -> 8.592 ( -0.87%) [ +0.06% +0.06% +0.00% / +0.12% -0.87% -0.80%] index_add_ linear : Elapsed 0.087 ms (8.672 ms / 100) 8.033 -> 8.035 ( +0.02%) [ +0.15% +0.07% +0.00% / +0.09% +0.02% +0.06%] index_copy_ linear : Elapsed 0.080 ms (8.045 ms / 100) 8.650 -> 8.575 ( -0.87%) [ +0.10% +0.00% +0.25% / +0.06% -0.87% -0.64%] index_add_ reverse : Elapsed 0.087 ms (8.659 ms / 100) 8.008 -> 8.003 ( -0.06%) [ +0.00% +0.06% +0.01% / -0.06% +0.27% +0.40%] index_copy_ reverse : Elapsed 0.080 ms (8.008 ms / 100) 8.560 -> 8.543 ( -0.20%) [ +0.00% +0.18% +0.35% / +0.01% -0.09% -0.20%] index_add_ spread : Elapsed 0.086 ms (8.560 ms / 100) 8.041 -> 8.046 ( +0.06%) [ +0.15% +0.00% +0.09% / +0.06% +0.20% +0.30%] index_copy_ spread : Elapsed 0.081 ms (8.053 ms / 100) 8.510 -> 8.499 ( -0.13%) [ +0.21% +0.32% +0.00% / -0.13% +0.49% +0.73%] index_add_ strided 7 : Elapsed 0.085 ms (8.528 ms / 100) 8.015 -> 8.025 ( +0.12%) [ +0.00% +0.12% +0.00% / +0.12% +0.82% +0.82%] index_copy_ strided 7 : Elapsed 0.080 ms (8.015 ms / 100) 8.538 -> 8.554 ( +0.19%) [ +0.00% +0.35% +0.08% / +0.33% +0.19% +0.54%] index_add_ perm : Elapsed 0.085 ms (8.538 ms / 100) 8.033 -> 8.043 ( +0.12%) [ +0.11% +0.00% +0.21% / +0.12% +0.19% +0.25%] index_copy_ perm : Elapsed 0.080 ms (8.042 ms / 100) 8.554 -> 8.566 ( +0.14%) [ +0.00% +0.16% +0.15% / +0.14% +0.35% +0.54%] index_add_ perm_sorted : Elapsed 0.086 ms (8.554 ms / 100) 8.039 -> 8.049 ( +0.12%) [ +0.00% +0.01% +0.09% / +0.12% +0.25% +0.24%] index_copy_ perm_sorted : Elapsed 0.080 ms (8.039 ms / 100) 80.012 -> 79.937 ( -0.09%) [ +0.53% +0.00% +0.68% / -0.09% +2.48% +2.24%] index_select const : Elapsed 0.804 ms (80.437 ms / 100) 84.594 -> 84.301 ( -0.35%) [ +0.24% +0.42% +0.00% / -0.35% +6.96% +6.63%] index_select wrap : Elapsed 0.848 ms (84.799 ms / 100) 74.769 -> 75.114 ( +0.46%) [ +0.00% +0.01% +0.02% / +0.46% +12.36% +11.45%] index_select linear : Elapsed 0.748 ms (74.769 ms / 100) 77.814 -> 78.361 ( +0.70%) [ +0.34% +0.85% +0.00% / +0.70% +14.16% +14.28%] index_select reverse : Elapsed 0.781 ms (78.080 ms / 100) 80.204 -> 80.926 ( +0.90%) [ +0.59% +0.95% +0.00% / +0.90% +2.03% +2.71%] index_select skip64 : Elapsed 0.807 ms (80.676 ms / 100) 79.548 -> 79.811 ( +0.33%) [ +0.17% +0.00% +0.84% / +0.33% +2.49% +2.73%] index_select skip256 : Elapsed 0.797 ms (79.684 ms / 100) 78.018 -> 78.365 ( +0.44%) [ +0.84% +0.12% +0.00% / +0.44% +6.97% +7.04%] index_select spread : Elapsed 0.787 ms (78.675 ms / 100) 84.451 -> 85.346 ( +1.06%) [ +0.76% +0.00% +0.39% / +1.06% +3.75% +2.70%] index_select strided 3 : Elapsed 0.851 ms (85.090 ms / 100) 84.850 -> 84.896 ( +0.05%) [ +0.82% +0.72% +0.00% / +0.05% +9.79% +9.50%] index_select strided 5 : Elapsed 0.855 ms (85.542 ms / 100) 82.288 -> 82.715 ( +0.52%) [ +0.41% +0.96% +0.00% / +0.52% +10.49% +11.05%] index_select strided 7 : Elapsed 0.826 ms (82.629 ms / 100) 84.484 -> 84.261 ( -0.26%) [ +0.36% +0.70% +0.00% / -0.26% +9.95% +9.92%] index_select strided 8 : Elapsed 0.848 ms (84.786 ms / 100) 81.646 -> 81.404 ( -0.30%) [ +0.00% +0.73% +0.30% / -0.30% +9.47% +8.63%] index_select random : Elapsed 0.816 ms (81.646 ms / 100) 76.933 -> 76.826 ( -0.14%) [ +0.60% +0.00% +0.18% / -0.14% +9.34% +9.67%] index_select random_sorted : Elapsed 0.774 ms (77.395 ms / 100) B = [150, 250, 50] (stride (250, 1, 37500)) A = [15, 250, 50] (stride (12500, 1, 250)) dim = 0 6.236 -> 6.165 ( -1.14%) [ +0.05% +0.93% +0.00% / +0.03% -1.14% -1.12%] index_add_ linear : Elapsed 0.062 ms (6.239 ms / 100) 5.512 -> 5.513 ( +0.02%) [ +0.20% +0.29% +0.00% / +0.02% +0.25% +0.11%] index_copy_ linear : Elapsed 0.055 ms (5.523 ms / 100) 6.236 -> 6.127 ( -1.75%) [ +0.24% +0.08% +0.00% / +0.13% -1.75% -1.60%] index_add_ reverse : Elapsed 0.063 ms (6.251 ms / 100) 5.487 -> 5.506 ( +0.35%) [ +0.24% +0.16% +0.00% / +0.35% +0.73% +0.73%] index_copy_ reverse : Elapsed 0.055 ms (5.500 ms / 100) 6.095 -> 5.987 ( -1.77%) [ +0.00% +0.08% +0.15% / +0.03% -1.72% -1.77%] index_add_ spread : Elapsed 0.061 ms (6.095 ms / 100) 5.429 -> 5.417 ( -0.22%) [ +0.15% +0.00% +0.00% / +0.02% -0.20% -0.22%] index_copy_ spread : Elapsed 0.054 ms (5.437 ms / 100) 6.015 -> 5.893 ( -2.03%) [ +0.18% +0.28% +0.00% / +0.15% -1.95% -2.03%] index_add_ strided 7 : Elapsed 0.060 ms (6.026 ms / 100) 5.398 -> 5.366 ( -0.59%) [ +0.22% +0.00% +0.22% / +0.30% -0.07% -0.59%] index_copy_ strided 7 : Elapsed 0.054 ms (5.410 ms / 100) good 6.481 -> 6.119 ( -5.59%) [ +0.12% +0.14% +0.00% / -0.03% -5.55% -5.59%] index_add_ perm : Elapsed 0.065 ms (6.489 ms / 100) 5.560 -> 5.490 ( -1.26%) [ +0.13% +0.16% +0.00% / +0.13% -1.21% -1.26%] index_copy_ perm : Elapsed 0.056 ms (5.567 ms / 100) good 6.443 -> 6.115 ( -5.09%) [ +0.00% +0.05% +0.11% / +0.03% -4.94% -5.09%] index_add_ perm_sorted : Elapsed 0.064 ms (6.443 ms / 100) 5.595 -> 5.472 ( -2.20%) [ +0.00% +0.04% +0.20% / +0.00% -1.95% -2.20%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.595 ms / 100) 33.511 -> 34.023 ( +1.53%) [ +0.00% +1.95% +1.21% / +1.53% +13.32% +12.96%] index_select const : Elapsed 0.335 ms (33.511 ms / 100) 46.881 -> 46.374 ( -1.08%) [ +0.35% +0.52% +0.00% / -1.08% +6.25% +6.36%] index_select wrap : Elapsed 0.470 ms (47.044 ms / 100) 33.327 -> 33.390 ( +0.19%) [ +0.11% +0.67% +0.00% / +0.19% +9.06% +9.41%] index_select linear : Elapsed 0.334 ms (33.365 ms / 100) 35.964 -> 36.765 ( +2.23%) [ +0.00% +1.56% +2.97% / +2.23% +13.87% +12.85%] index_select reverse : Elapsed 0.360 ms (35.964 ms / 100) 33.305 -> 33.240 ( -0.20%) [ +1.22% +0.00% +0.54% / -0.20% +15.82% +17.29%] index_select skip64 : Elapsed 0.337 ms (33.710 ms / 100) 33.198 -> 33.537 ( +1.02%) [ +0.36% +0.00% +1.56% / +1.02% +12.12% +13.03%] index_select skip256 : Elapsed 0.333 ms (33.316 ms / 100) 31.842 -> 32.135 ( +0.92%) [ +1.11% +1.13% +0.00% / +0.92% +4.89% +5.02%] index_select spread : Elapsed 0.322 ms (32.194 ms / 100) 43.079 -> 43.367 ( +0.67%) [ +0.00% +0.01% +0.91% / +0.67% +8.68% +9.09%] index_select strided 3 : Elapsed 0.431 ms (43.079 ms / 100) 39.330 -> 39.604 ( +0.70%) [ +0.00% +0.50% +1.16% / +0.70% +5.39% +4.53%] index_select strided 5 : Elapsed 0.393 ms (39.330 ms / 100) 44.760 -> 44.602 ( -0.35%) [ +0.37% +0.00% +0.67% / -0.35% +8.89% +10.22%] index_select strided 7 : Elapsed 0.449 ms (44.926 ms / 100) 45.722 -> 45.797 ( +0.16%) [ +0.00% +1.97% +1.70% / +0.16% +10.10% +9.73%] index_select strided 8 : Elapsed 0.457 ms (45.722 ms / 100) 42.211 -> 41.805 ( -0.96%) [ +0.86% +0.00% +1.56% / -0.96% +10.05% +11.16%] index_select random : Elapsed 0.426 ms (42.572 ms / 100) 31.583 -> 31.828 ( +0.78%) [ +0.47% +0.00% +0.70% / +0.78% +4.27% +4.04%] index_select random_sorted : Elapsed 0.317 ms (31.733 ms / 100) out_shape = [15, 150, 50] in_shape = [15, 250, 50] idx_dim = 1 B = [15, 150, 50] (stride (7500, 50, 1)) A = [15, 250, 50] (stride (12500, 1, 250)) dim = 1 5.516 -> 5.521 ( +0.09%) [ +0.00% +0.07% +0.00% / +0.20% +0.31% +0.09%] index_select const : Elapsed 0.055 ms (5.516 ms / 100) 6.107 -> 6.096 ( -0.18%) [ +0.03% +0.00% +0.23% / +0.23% -0.18% -0.03%] index_select wrap : Elapsed 0.061 ms (6.109 ms / 100) 6.089 -> 6.073 ( -0.26%) [ +0.15% +0.26% +0.00% / -0.26% +0.39% +0.53%] index_select linear : Elapsed 0.061 ms (6.098 ms / 100) 6.098 -> 6.095 ( -0.05%) [ +0.11% +0.00% +0.03% / +0.20% -0.05% +0.11%] index_select reverse : Elapsed 0.061 ms (6.105 ms / 100) 5.705 -> 5.720 ( +0.26%) [ +0.28% +0.37% +0.00% / +0.26% +0.60% +0.56%] index_select skip64 : Elapsed 0.057 ms (5.721 ms / 100) 5.513 -> 5.516 ( +0.05%) [ +0.05% +0.00% +0.07% / +0.05% +0.53% +0.38%] index_select skip256 : Elapsed 0.055 ms (5.516 ms / 100) 6.408 -> 6.413 ( +0.08%) [ +0.05% +0.00% +0.23% / +0.14% +0.08% +0.25%] index_select spread : Elapsed 0.064 ms (6.411 ms / 100) 6.801 -> 6.813 ( +0.18%) [ +0.22% +0.00% +0.75% / +0.18% +0.21% +0.32%] index_select strided 3 : Elapsed 0.068 ms (6.816 ms / 100) 7.025 -> 7.021 ( -0.06%) [ +0.01% +0.00% +0.26% / +0.30% -0.06% -0.04%] index_select strided 5 : Elapsed 0.070 ms (7.026 ms / 100) 7.019 -> 7.000 ( -0.27%) [ +0.00% +0.14% +0.07% / -0.01% -0.27% -0.16%] index_select strided 7 : Elapsed 0.070 ms (7.019 ms / 100) 7.009 -> 7.010 ( +0.01%) [ +0.04% +0.00% +0.04% / +0.07% +0.01% +0.11%] index_select strided 8 : Elapsed 0.070 ms (7.012 ms / 100) 7.035 -> 7.028 ( -0.10%) [ +0.00% +0.11% +0.07% / +0.16% -0.10% +0.03%] index_select strided 16 : Elapsed 0.070 ms (7.035 ms / 100) 7.037 -> 7.034 ( -0.04%) [ +0.21% +0.03% +0.00% / +0.03% -0.04% +0.26%] index_select strided 64 : Elapsed 0.071 ms (7.052 ms / 100) 6.128 -> 6.100 ( -0.46%) [ +0.08% +0.00% +0.11% / -0.15% -0.46% -0.41%] index_select strided 100 : Elapsed 0.061 ms (6.133 ms / 100) 6.990 -> 6.969 ( -0.30%) [ +0.07% +0.00% +0.04% / -0.30% -0.03% -0.13%] index_select random : Elapsed 0.070 ms (6.995 ms / 100) 6.404 -> 6.412 ( +0.12%) [ +0.00% +0.34% +0.45% / +0.39% +0.12% +0.33%] index_select random_sorted : Elapsed 0.064 ms (6.404 ms / 100) 6.998 -> 6.996 ( -0.03%) [ +0.00% +0.17% +0.30% / +0.10% -0.03% +0.01%] index_select perm : Elapsed 0.070 ms (6.998 ms / 100) 6.408 -> 6.401 ( -0.11%) [ +0.00% +0.22% +0.27% / +0.12% -0.11% +0.02%] index_select perm_sorted : Elapsed 0.064 ms (6.408 ms / 100) B = [15, 150, 50] (stride (50, 750, 1)) dim = 1 fill_cnt = 250 2.940 -> 2.921 ( -0.65%) [ +0.10% +0.07% +0.00% / -0.65% -0.54% -0.51%] index_fill_ const : Elapsed 0.029 ms (2.943 ms / 100) 3.000 -> 2.979 ( -0.70%) [ +0.00% +0.13% +0.00% / -0.40% -0.63% -0.70%] index_fill_ linear : Elapsed 0.030 ms (3.000 ms / 100) 2.971 -> 2.963 ( -0.27%) [ +0.30% +0.00% +0.30% / -0.27% -0.10% +0.07%] index_fill_ reverse : Elapsed 0.030 ms (2.980 ms / 100) 2.932 -> 2.927 ( -0.17%) [ +0.31% +0.41% +0.00% / -0.17% +0.27% +0.27%] index_fill_ skip64 : Elapsed 0.029 ms (2.941 ms / 100) 2.933 -> 2.920 ( -0.44%) [ +0.44% +0.34% +0.00% / -0.44% -0.34% -0.20%] index_fill_ skip256 : Elapsed 0.029 ms (2.946 ms / 100) 2.991 -> 2.977 ( -0.47%) [ +0.20% +0.57% +0.00% / -0.47% -0.03% -0.07%] index_fill_ spread : Elapsed 0.030 ms (2.997 ms / 100) 2.980 -> 2.954 ( -0.87%) [ +0.07% +0.23% +0.00% / -0.87% -0.50% -0.70%] index_fill_ strided 3 : Elapsed 0.030 ms (2.982 ms / 100) 2.974 -> 2.952 ( -0.74%) [ +0.34% +0.34% +0.00% / -0.74% -0.57% -0.67%] index_fill_ strided 5 : Elapsed 0.030 ms (2.984 ms / 100) 3.001 -> 2.981 ( -0.67%) [ +0.00% +0.07% +0.30% / -0.67% -0.43% -0.37%] index_fill_ strided 7 : Elapsed 0.030 ms (3.001 ms / 100) 2.982 -> 2.964 ( -0.60%) [ +0.23% +0.17% +0.00% / -0.10% -0.40% -0.60%] index_fill_ strided 8 : Elapsed 0.030 ms (2.989 ms / 100) 2.982 -> 2.973 ( -0.30%) [ +0.00% +0.20% +0.27% / -0.30% -0.23% -0.07%] index_fill_ strided 16 : Elapsed 0.030 ms (2.982 ms / 100) 2.976 -> 2.976 ( +0.00%) [ +0.37% +0.34% +0.00% / +0.07% +0.24% +0.00%] index_fill_ strided 64 : Elapsed 0.030 ms (2.987 ms / 100) 2.943 -> 2.923 ( -0.68%) [ +0.27% +0.00% +0.07% / -0.37% -0.58% -0.68%] index_fill_ strided 100 : Elapsed 0.030 ms (2.951 ms / 100) 2.990 -> 2.970 ( -0.67%) [ +0.03% +0.00% +0.13% / -0.67% -0.13% -0.17%] index_fill_ random : Elapsed 0.030 ms (2.991 ms / 100) 2.983 -> 2.964 ( -0.64%) [ +0.03% +0.00% +0.03% / -0.64% -0.20% -0.20%] index_fill_ random_sorted : Elapsed 0.030 ms (2.984 ms / 100) B = [15, 150, 50] (stride (50, 750, 1)) A = [15, 250, 50] (stride (1, 15, 3750)) dim = 1 5.391 -> 5.395 ( +0.07%) [ +0.00% +0.09% +0.32% / +0.07% +0.65% +0.74%] index_select const : Elapsed 0.054 ms (5.391 ms / 100) 5.876 -> 5.892 ( +0.27%) [ +0.07% +0.14% +0.00% / +0.27% +0.49% +0.37%] index_select wrap : Elapsed 0.059 ms (5.880 ms / 100) 5.863 -> 5.872 ( +0.15%) [ +0.10% +0.00% +0.02% / +0.15% +0.43% +0.63%] index_select linear : Elapsed 0.059 ms (5.869 ms / 100) 5.871 -> 5.881 ( +0.17%) [ +0.00% +0.07% +0.17% / +0.20% +0.17% +0.36%] index_select reverse : Elapsed 0.059 ms (5.871 ms / 100) 5.418 -> 5.423 ( +0.09%) [ +0.20% +0.00% +0.22% / +0.09% +0.52% +0.54%] index_select skip64 : Elapsed 0.054 ms (5.429 ms / 100) 5.411 -> 5.415 ( +0.07%) [ +0.20% +0.00% +0.06% / +0.07% +0.30% +0.20%] index_select skip256 : Elapsed 0.054 ms (5.422 ms / 100) 6.032 -> 6.047 ( +0.25%) [ +0.18% +0.00% +0.12% / +0.25% +0.75% +0.61%] index_select spread : Elapsed 0.060 ms (6.043 ms / 100) 6.351 -> 6.350 ( -0.02%) [ +0.19% +0.00% +0.41% / +0.24% -0.02% +0.13%] index_select strided 3 : Elapsed 0.064 ms (6.363 ms / 100) 6.097 -> 6.106 ( +0.15%) [ +0.10% +0.08% +0.00% / +0.15% +0.62% +0.75%] index_select strided 5 : Elapsed 0.061 ms (6.103 ms / 100) 6.400 -> 6.402 ( +0.03%) [ +0.28% +0.25% +0.00% / +0.28% +0.34% +0.03%] index_select strided 7 : Elapsed 0.064 ms (6.418 ms / 100) 6.405 -> 6.412 ( +0.11%) [ +0.00% +0.23% +0.02% / +0.11% +0.23% +0.20%] index_select strided 8 : Elapsed 0.064 ms (6.405 ms / 100) 6.440 -> 6.447 ( +0.11%) [ +0.09% +0.00% +0.33% / +0.16% +0.11% +0.12%] index_select strided 16 : Elapsed 0.064 ms (6.446 ms / 100) 6.454 -> 6.436 ( -0.28%) [ +0.00% +0.09% +0.14% / -0.12% +0.17% -0.28%] index_select strided 64 : Elapsed 0.065 ms (6.454 ms / 100) 5.424 -> 5.424 ( +0.00%) [ +0.29% +0.00% +0.11% / +0.00% +0.44% +0.66%] index_select strided 100 : Elapsed 0.054 ms (5.440 ms / 100) 6.238 -> 6.220 ( -0.29%) [ +0.00% +0.08% +0.03% / -0.18% -0.06% -0.29%] index_select random : Elapsed 0.062 ms (6.238 ms / 100) 5.927 -> 5.932 ( +0.08%) [ +0.27% +0.00% +0.05% / +0.08% +0.13% +0.17%] index_select random_sorted : Elapsed 0.059 ms (5.943 ms / 100) 6.313 -> 6.310 ( -0.05%) [ +0.02% +0.16% +0.00% / -0.05% +0.19% +0.14%] index_select perm : Elapsed 0.063 ms (6.314 ms / 100) 5.970 -> 5.978 ( +0.13%) [ +0.22% +0.00% +0.22% / +0.13% +0.85% +0.92%] index_select perm_sorted : Elapsed 0.060 ms (5.983 ms / 100) B = [15, 150, 50] (stride (1, 750, 15)) A = [15, 250, 50] (stride (1, 15, 3750)) dim = 1 5.604 -> 5.605 ( +0.02%) [ +0.00% +0.16% +0.21% / +0.02% +1.07% +0.61%] index_select const : Elapsed 0.056 ms (5.604 ms / 100) 5.853 -> 5.852 ( -0.02%) [ +0.34% +0.00% +0.32% / -0.02% +1.04% +0.94%] index_select wrap : Elapsed 0.059 ms (5.873 ms / 100) 5.863 -> 5.857 ( -0.10%) [ +0.15% +0.00% +0.00% / -0.10% +0.68% +0.78%] index_select linear : Elapsed 0.059 ms (5.872 ms / 100) 5.890 -> 5.884 ( -0.10%) [ +0.07% +0.07% +0.00% / -0.10% +0.53% +0.10%] index_select reverse : Elapsed 0.059 ms (5.894 ms / 100) 5.563 -> 5.571 ( +0.14%) [ +0.09% +0.00% +0.22% / +0.14% +0.54% +0.32%] index_select skip64 : Elapsed 0.056 ms (5.568 ms / 100) 5.597 -> 5.607 ( +0.18%) [ +0.07% +0.29% +0.00% / +0.18% +0.80% +0.89%] index_select skip256 : Elapsed 0.056 ms (5.601 ms / 100) 6.102 -> 6.092 ( -0.16%) [ +0.18% +0.15% +0.00% / -0.16% +0.64% +0.52%] index_select spread : Elapsed 0.061 ms (6.113 ms / 100) 6.392 -> 6.392 ( +0.00%) [ +0.30% +0.58% +0.00% / +0.00% +0.99% +0.74%] index_select strided 3 : Elapsed 0.064 ms (6.411 ms / 100) 6.184 -> 6.199 ( +0.24%) [ +0.00% +0.06% +0.05% / +0.24% +0.71% +0.63%] index_select strided 5 : Elapsed 0.062 ms (6.184 ms / 100) 6.457 -> 6.455 ( -0.03%) [ +0.09% +0.03% +0.00% / +0.00% +0.17% -0.03%] index_select strided 7 : Elapsed 0.065 ms (6.463 ms / 100) 6.477 -> 6.458 ( -0.29%) [ +0.03% +0.00% +0.14% / -0.29% -0.09% -0.05%] index_select strided 8 : Elapsed 0.065 ms (6.479 ms / 100) 6.487 -> 6.503 ( +0.25%) [ +0.00% +0.06% +0.17% / +0.25% +0.57% +0.69%] index_select strided 16 : Elapsed 0.065 ms (6.487 ms / 100) 6.523 -> 6.498 ( -0.38%) [ +0.00% +0.17% +0.12% / -0.20% -0.38% -0.08%] index_select strided 64 : Elapsed 0.065 ms (6.523 ms / 100) 5.584 -> 5.583 ( -0.02%) [ +0.13% +0.00% +0.20% / -0.02% +0.45% +0.43%] index_select strided 100 : Elapsed 0.056 ms (5.591 ms / 100) 6.296 -> 6.290 ( -0.10%) [ +0.00% +0.10% +0.10% / -0.10% +0.08% +0.05%] index_select random : Elapsed 0.063 ms (6.296 ms / 100) 5.990 -> 5.996 ( +0.10%) [ +0.28% +0.37% +0.00% / +0.10% +0.78% +0.83%] index_select random_sorted : Elapsed 0.060 ms (6.007 ms / 100) 6.362 -> 6.376 ( +0.22%) [ +0.19% +0.00% +0.03% / +0.22% +0.31% +0.30%] index_select perm : Elapsed 0.064 ms (6.374 ms / 100) 6.041 -> 6.036 ( -0.08%) [ +0.08% +0.00% +0.00% / -0.08% +0.48% +0.30%] index_select perm_sorted : Elapsed 0.060 ms (6.046 ms / 100) out_shape = [15, 250, 150] in_shape = [15, 250, 50] idx_dim = 2 B = [15, 250, 150] (stride (37500, 150, 1)) A = [15, 250, 50] (stride (1, 750, 15)) dim = 2 16.237 -> 15.682 ( -3.42%) [ +0.00% +0.01% +0.08% / -0.86% -3.20% -3.42%] index_add_ linear : Elapsed 0.162 ms (16.237 ms / 100) 14.506 -> 14.137 ( -2.54%) [ +0.12% +0.10% +0.00% / -0.77% -2.30% -2.54%] index_copy_ linear : Elapsed 0.145 ms (14.524 ms / 100) 16.274 -> 15.779 ( -3.04%) [ +0.26% +0.28% +0.00% / -0.88% -3.04% -2.96%] index_add_ reverse : Elapsed 0.163 ms (16.317 ms / 100) 14.555 -> 14.239 ( -2.17%) [ +0.04% +0.06% +0.00% / -0.81% -2.12% -2.17%] index_copy_ reverse : Elapsed 0.146 ms (14.561 ms / 100) 19.246 -> 19.031 ( -1.12%) [ +0.00% +0.09% +0.02% / -1.12% -0.63% -0.54%] index_add_ spread : Elapsed 0.192 ms (19.246 ms / 100) 17.084 -> 16.893 ( -1.12%) [ +0.08% +0.00% +0.05% / -1.12% -0.49% -0.26%] index_copy_ spread : Elapsed 0.171 ms (17.097 ms / 100) 23.102 -> 23.027 ( -0.32%) [ +0.00% +0.26% +0.06% / -0.32% -0.23% -0.04%] index_add_ strided 7 : Elapsed 0.231 ms (23.102 ms / 100) 21.121 -> 20.828 ( -1.39%) [ +0.07% +0.23% +0.00% / -0.67% -1.31% -1.39%] index_copy_ strided 7 : Elapsed 0.211 ms (21.136 ms / 100) 24.552 -> 24.603 ( +0.21%) [ +0.02% +0.02% +0.00% / +0.23% +0.37% +0.21%] index_add_ perm : Elapsed 0.246 ms (24.557 ms / 100) 22.760 -> 22.778 ( +0.08%) [ +0.00% +0.00% +0.02% / +0.33% +0.40% +0.08%] index_copy_ perm : Elapsed 0.228 ms (22.760 ms / 100) 19.158 -> 18.908 ( -1.30%) [ +0.15% +0.00% +0.13% / -1.21% -1.22% -1.30%] index_add_ perm_sorted : Elapsed 0.192 ms (19.186 ms / 100) 17.035 -> 16.843 ( -1.13%) [ +0.15% +0.08% +0.00% / -0.99% -0.79% -1.13%] index_copy_ perm_sorted : Elapsed 0.171 ms (17.060 ms / 100) 25.647 -> 25.361 ( -1.12%) [ +0.00% +0.27% +0.07% / +0.21% -0.95% -1.12%] index_select const : Elapsed 0.256 ms (25.647 ms / 100) 32.484 -> 32.538 ( +0.17%) [ +0.00% +0.66% +0.31% / +0.38% +0.50% +0.17%] index_select wrap : Elapsed 0.325 ms (32.484 ms / 100) 27.013 -> 26.974 ( -0.14%) [ +0.13% +0.00% +0.12% / -0.14% +0.20% +0.28%] index_select linear : Elapsed 0.270 ms (27.047 ms / 100) 28.970 -> 28.841 ( -0.45%) [ +0.25% +0.00% +0.14% / +0.05% -0.37% -0.45%] index_select reverse : Elapsed 0.290 ms (29.041 ms / 100) 25.645 -> 25.377 ( -1.05%) [ +0.04% +0.00% +0.28% / +0.27% -0.94% -1.05%] index_select skip64 : Elapsed 0.257 ms (25.656 ms / 100) 25.667 -> 25.377 ( -1.13%) [ +0.25% +0.11% +0.00% / +0.18% -1.13% -0.99%] index_select skip256 : Elapsed 0.257 ms (25.732 ms / 100) 28.096 -> 27.952 ( -0.51%) [ +0.00% +0.23% +0.05% / +0.22% -0.26% -0.51%] index_select spread : Elapsed 0.281 ms (28.096 ms / 100) 40.418 -> 40.456 ( +0.09%) [ +0.44% +0.35% +0.00% / +0.28% +0.15% +0.09%] index_select strided 3 : Elapsed 0.406 ms (40.594 ms / 100) 35.080 -> 34.494 ( -1.67%) [ +0.63% +0.56% +0.00% / +0.57% -1.67% -1.38%] index_select strided 5 : Elapsed 0.353 ms (35.302 ms / 100) 36.110 -> 36.083 ( -0.07%) [ +0.23% +0.31% +0.00% / -0.07% +1.24% +0.60%] index_select strided 7 : Elapsed 0.362 ms (36.194 ms / 100) 39.209 -> 39.304 ( +0.24%) [ +0.20% +0.27% +0.00% / +0.24% +0.71% +0.55%] index_select strided 8 : Elapsed 0.393 ms (39.288 ms / 100) 38.779 -> 38.925 ( +0.38%) [ +0.19% +0.00% +0.12% / +0.38% +1.21% +1.54%] index_select strided 16 : Elapsed 0.389 ms (38.853 ms / 100) 36.436 -> 36.378 ( -0.16%) [ +0.23% +0.00% +0.17% / +0.00% +0.13% -0.16%] index_select random : Elapsed 0.365 ms (36.519 ms / 100) 28.090 -> 28.066 ( -0.09%) [ +0.14% +0.00% +0.11% / -0.01% -0.06% -0.09%] index_select random_sorted : Elapsed 0.281 ms (28.128 ms / 100) B = [15, 250, 150] (stride (37500, 1, 250)) A = [15, 250, 50] (stride (250, 1, 3750)) dim = 2 10.941 -> 10.962 ( +0.19%) [ +0.02% +0.00% +0.03% / +0.19% +0.55% +0.58%] index_add_ linear : Elapsed 0.109 ms (10.943 ms / 100) 10.676 -> 10.684 ( +0.07%) [ +0.04% +0.00% +0.04% / +0.07% +0.46% +0.37%] index_copy_ linear : Elapsed 0.107 ms (10.680 ms / 100) 10.953 -> 10.945 ( -0.07%) [ +0.09% +0.10% +0.00% / -0.07% +0.34% +0.33%] index_add_ reverse : Elapsed 0.110 ms (10.963 ms / 100) 10.701 -> 10.692 ( -0.08%) [ +0.07% +0.00% +0.09% / -0.08% +0.17% +0.13%] index_copy_ reverse : Elapsed 0.107 ms (10.708 ms / 100) 10.998 -> 10.991 ( -0.06%) [ +0.05% +0.00% +0.05% / +0.06% -0.06% -0.03%] index_add_ spread : Elapsed 0.110 ms (11.004 ms / 100) 10.755 -> 10.748 ( -0.07%) [ +0.00% +0.01% +0.07% / +0.04% +0.15% -0.07%] index_copy_ spread : Elapsed 0.108 ms (10.755 ms / 100) 10.992 -> 10.985 ( -0.06%) [ +0.00% +0.02% +0.04% / -0.06% +0.85% +0.76%] index_add_ strided 7 : Elapsed 0.110 ms (10.992 ms / 100) 10.726 -> 10.721 ( -0.05%) [ +0.00% +0.07% +0.10% / -0.05% +0.81% +0.48%] index_copy_ strided 7 : Elapsed 0.107 ms (10.726 ms / 100) 11.076 -> 11.052 ( -0.22%) [ +0.00% +0.14% +0.05% / -0.11% -0.14% -0.22%] index_add_ perm : Elapsed 0.111 ms (11.076 ms / 100) 10.773 -> 10.764 ( -0.08%) [ +0.00% +0.10% +0.06% / -0.08% +0.17% -0.01%] index_copy_ perm : Elapsed 0.108 ms (10.773 ms / 100) 11.074 -> 11.010 ( -0.58%) [ +0.18% +0.00% +0.08% / -0.15% -0.58% -0.58%] index_add_ perm_sorted : Elapsed 0.111 ms (11.094 ms / 100) 10.783 -> 10.754 ( -0.27%) [ +0.08% +0.00% +0.23% / -0.03% -0.06% -0.27%] index_copy_ perm_sorted : Elapsed 0.108 ms (10.792 ms / 100) 17.304 -> 17.267 ( -0.21%) [ +0.00% +0.05% +0.07% / -0.06% -0.09% -0.21%] index_select const : Elapsed 0.173 ms (17.304 ms / 100) 18.984 -> 18.996 ( +0.06%) [ +0.08% +0.00% +0.11% / +0.06% +0.56% +0.63%] index_select wrap : Elapsed 0.190 ms (19.000 ms / 100) 17.629 -> 17.638 ( +0.05%) [ +0.00% +0.09% +0.02% / +0.05% +0.73% +0.66%] index_select linear : Elapsed 0.176 ms (17.629 ms / 100) 18.002 -> 18.007 ( +0.03%) [ +0.04% +0.00% +0.01% / +0.03% +0.66% +0.62%] index_select reverse : Elapsed 0.180 ms (18.010 ms / 100) 17.278 -> 17.279 ( +0.01%) [ +0.20% +0.00% +0.10% / +0.14% +0.05% +0.01%] index_select skip64 : Elapsed 0.173 ms (17.313 ms / 100) 17.280 -> 17.284 ( +0.02%) [ +0.13% +0.00% +0.08% / +0.13% +0.10% +0.02%] index_select skip256 : Elapsed 0.173 ms (17.302 ms / 100) 18.100 -> 18.126 ( +0.14%) [ +0.07% +0.00% +0.07% / +0.14% +0.66% +0.58%] index_select spread : Elapsed 0.181 ms (18.112 ms / 100) 19.044 -> 19.056 ( +0.06%) [ +0.00% +0.02% +0.04% / +0.06% +0.79% +0.80%] index_select strided 3 : Elapsed 0.190 ms (19.044 ms / 100) 18.291 -> 18.273 ( -0.10%) [ +0.00% +0.22% +0.08% / -0.03% +0.14% -0.10%] index_select strided 5 : Elapsed 0.183 ms (18.291 ms / 100) 19.061 -> 19.079 ( +0.09%) [ +0.16% +0.00% +0.08% / +0.09% +0.44% +0.50%] index_select strided 7 : Elapsed 0.191 ms (19.092 ms / 100) 18.930 -> 18.935 ( +0.03%) [ +0.00% +0.16% +0.02% / +0.03% +0.13% +0.06%] index_select strided 8 : Elapsed 0.189 ms (18.930 ms / 100) 18.938 -> 18.905 ( -0.17%) [ +0.00% +0.12% +0.24% / +0.27% -0.11% -0.17%] index_select strided 16 : Elapsed 0.189 ms (18.938 ms / 100) 18.812 -> 18.762 ( -0.27%) [ +0.01% +0.00% +0.02% / -0.04% -0.27% -0.23%] index_select random : Elapsed 0.188 ms (18.814 ms / 100) 18.020 -> 18.027 ( +0.04%) [ +0.00% +0.01% +0.13% / +0.04% +0.25% +0.26%] index_select random_sorted : Elapsed 0.180 ms (18.020 ms / 100) B = [15, 250, 150] (stride (250, 1, 3750)) A = [15, 250, 50] (stride (1, 750, 15)) dim = 2 11.850 -> 11.801 ( -0.41%) [ +0.15% +0.02% +0.00% / -0.41% +0.67% +0.62%] index_add_ linear : Elapsed 0.119 ms (11.868 ms / 100) 11.657 -> 11.608 ( -0.42%) [ +0.08% +0.06% +0.00% / -0.42% +0.49% +0.44%] index_copy_ linear : Elapsed 0.117 ms (11.666 ms / 100) 11.951 -> 11.898 ( -0.44%) [ +0.23% +0.02% +0.00% / -0.44% +0.62% +0.33%] index_add_ reverse : Elapsed 0.120 ms (11.979 ms / 100) 11.708 -> 11.678 ( -0.26%) [ +0.15% +0.00% +0.08% / -0.26% +0.56% +0.27%] index_copy_ reverse : Elapsed 0.117 ms (11.726 ms / 100) 12.045 -> 12.016 ( -0.24%) [ +0.07% +0.00% +0.25% / -0.24% -0.11% -0.08%] index_add_ spread : Elapsed 0.121 ms (12.053 ms / 100) 11.779 -> 11.746 ( -0.28%) [ +0.00% +0.15% +0.07% / -0.15% -0.06% -0.28%] index_copy_ spread : Elapsed 0.118 ms (11.779 ms / 100) 12.044 -> 12.036 ( -0.07%) [ +0.08% +0.03% +0.00% / -0.07% +0.34% +0.29%] index_add_ strided 7 : Elapsed 0.121 ms (12.054 ms / 100) 11.772 -> 11.744 ( -0.24%) [ +0.00% +0.13% +0.03% / -0.24% +0.34% +0.21%] index_copy_ strided 7 : Elapsed 0.118 ms (11.772 ms / 100) 12.055 -> 12.039 ( -0.13%) [ +0.07% +0.00% +0.20% / -0.13% -0.05% +0.07%] index_add_ perm : Elapsed 0.121 ms (12.063 ms / 100) 11.750 -> 11.738 ( -0.10%) [ +0.20% +0.00% +0.35% / -0.10% +0.31% +0.49%] index_copy_ perm : Elapsed 0.118 ms (11.773 ms / 100) 12.021 -> 11.980 ( -0.34%) [ +0.15% +0.00% +0.13% / -0.34% -0.14% -0.20%] index_add_ perm_sorted : Elapsed 0.120 ms (12.039 ms / 100) 11.757 -> 11.722 ( -0.30%) [ +0.01% +0.04% +0.00% / -0.30% -0.02% -0.14%] index_copy_ perm_sorted : Elapsed 0.118 ms (11.758 ms / 100) 17.514 -> 17.523 ( +0.05%) [ +0.10% +0.00% +0.12% / +0.05% +0.15% +0.19%] index_select const : Elapsed 0.175 ms (17.531 ms / 100) 21.957 -> 21.980 ( +0.10%) [ +0.04% +0.16% +0.00% / +0.10% +1.78% +1.76%] index_select wrap : Elapsed 0.220 ms (21.966 ms / 100) 18.816 -> 18.841 ( +0.13%) [ +0.00% +0.06% +0.06% / +0.13% +0.44% +0.33%] index_select linear : Elapsed 0.188 ms (18.816 ms / 100) 20.152 -> 20.185 ( +0.16%) [ +0.02% +0.08% +0.00% / +0.16% +0.53% +0.24%] index_select reverse : Elapsed 0.202 ms (20.157 ms / 100) 17.529 -> 17.534 ( +0.03%) [ +0.07% +0.05% +0.00% / +0.04% +0.03% +0.09%] index_select skip64 : Elapsed 0.175 ms (17.542 ms / 100) 17.505 -> 17.519 ( +0.08%) [ +0.09% +0.00% +0.04% / +0.08% +0.13% +0.19%] index_select skip256 : Elapsed 0.175 ms (17.520 ms / 100) 19.133 -> 19.157 ( +0.13%) [ +0.03% +0.00% +0.18% / +0.13% +0.58% +0.43%] index_select spread : Elapsed 0.191 ms (19.139 ms / 100) 26.629 -> 26.668 ( +0.15%) [ +0.12% +0.07% +0.00% / +0.15% +0.33% +0.51%] index_select strided 3 : Elapsed 0.267 ms (26.660 ms / 100) 21.804 -> 21.777 ( -0.12%) [ +0.00% +0.03% +0.06% / +0.05% -0.12% +0.10%] index_select strided 5 : Elapsed 0.218 ms (21.804 ms / 100) 24.011 -> 24.001 ( -0.04%) [ +0.08% +0.14% +0.00% / -0.04% +0.53% +0.53%] index_select strided 7 : Elapsed 0.240 ms (24.031 ms / 100) 25.640 -> 25.565 ( -0.29%) [ +0.17% +0.00% +0.01% / -0.11% +0.00% -0.29%] index_select strided 8 : Elapsed 0.257 ms (25.683 ms / 100) 25.349 -> 25.382 ( +0.13%) [ +0.00% +0.31% +0.53% / +0.13% +1.25% +1.15%] index_select strided 16 : Elapsed 0.253 ms (25.349 ms / 100) 23.732 -> 23.745 ( +0.05%) [ +0.00% +0.21% +0.39% / +0.05% +1.05% +1.07%] index_select random : Elapsed 0.237 ms (23.732 ms / 100) 19.103 -> 19.129 ( +0.14%) [ +0.06% +0.00% +0.06% / +0.14% +0.58% +0.62%] index_select random_sorted : Elapsed 0.191 ms (19.115 ms / 100) B = [15, 250, 150] (stride (1, 15, 3750)) dim = 2 fill_cnt = 50 3.525 -> 3.496 ( -0.82%) [ +0.00% +0.17% +0.00% / -0.82% -0.43% -0.57%] index_fill_ const : Elapsed 0.035 ms (3.525 ms / 100) 3.513 -> 3.497 ( -0.46%) [ +0.00% +0.17% +0.06% / -0.46% -0.23% -0.31%] index_fill_ linear : Elapsed 0.035 ms (3.513 ms / 100) 3.511 -> 3.495 ( -0.46%) [ +0.14% +0.00% +0.06% / -0.46% -0.11% -0.17%] index_fill_ reverse : Elapsed 0.035 ms (3.516 ms / 100) 3.438 -> 3.413 ( -0.73%) [ +0.00% +0.20% +0.17% / -0.73% +1.80% +1.89%] index_fill_ skip64 : Elapsed 0.034 ms (3.438 ms / 100) 3.440 -> 3.412 ( -0.81%) [ +0.00% +0.32% +0.06% / -0.81% +1.80% +1.72%] index_fill_ skip256 : Elapsed 0.034 ms (3.440 ms / 100) 3.528 -> 3.492 ( -1.02%) [ +0.00% +0.14% +0.09% / -0.45% -0.71% -1.02%] index_fill_ spread : Elapsed 0.035 ms (3.528 ms / 100) 3.529 -> 3.493 ( -1.02%) [ +0.09% +0.26% +0.00% / -0.43% -0.71% -1.02%] index_fill_ strided 3 : Elapsed 0.035 ms (3.532 ms / 100) 3.471 -> 3.455 ( -0.46%) [ +0.00% +0.55% +0.26% / -0.46% +0.35% +0.37%] index_fill_ strided 5 : Elapsed 0.035 ms (3.471 ms / 100) 3.502 -> 3.494 ( -0.23%) [ +0.17% +0.37% +0.00% / -0.23% +0.57% +0.31%] index_fill_ strided 7 : Elapsed 0.035 ms (3.508 ms / 100) 3.509 -> 3.496 ( -0.37%) [ +0.00% +0.20% +0.23% / -0.37% +1.14% +0.94%] index_fill_ strided 8 : Elapsed 0.035 ms (3.509 ms / 100) 3.514 -> 3.500 ( -0.40%) [ +0.20% +0.00% +0.14% / -0.40% +0.46% +0.40%] index_fill_ strided 16 : Elapsed 0.035 ms (3.521 ms / 100) 3.511 -> 3.500 ( -0.31%) [ +0.11% +0.00% +0.09% / -0.31% +0.51% +0.40%] index_fill_ strided 64 : Elapsed 0.035 ms (3.515 ms / 100) 3.401 -> 3.380 ( -0.62%) [ +0.50% +0.18% +0.00% / -0.62% +3.20% +3.35%] index_fill_ strided 100 : Elapsed 0.034 ms (3.418 ms / 100) 3.530 -> 3.519 ( -0.31%) [ +0.11% +0.00% +0.11% / -0.31% +0.06% -0.08%] index_fill_ random : Elapsed 0.035 ms (3.534 ms / 100) 3.498 -> 3.485 ( -0.37%) [ +0.49% +0.00% +0.26% / -0.37% +0.97% +0.86%] index_fill_ random_sorted : Elapsed 0.035 ms (3.515 ms / 100) 3.484 -> 3.482 ( -0.06%) [ +0.00% +0.00% +0.29% / -0.06% +1.69% +1.38%] index_fill_ perm : Elapsed 0.035 ms (3.484 ms / 100) 3.514 -> 3.517 ( +0.09%) [ +0.14% +0.00% +0.09% / +0.09% +0.83% +0.60%] index_fill_ perm_sorted : Elapsed 0.035 ms (3.519 ms / 100) out_shape = [150, 15, 250] in_shape = [50, 15, 250] idx_dim = 0 B = [150, 15, 250] (stride (3750, 1, 15)) A = [50, 15, 250] (stride (250, 12500, 1)) dim = 0 7.063 -> 7.006 ( -0.81%) [ +0.00% +0.21% +0.14% / -0.24% -0.81% -0.81%] index_add_ linear : Elapsed 0.071 ms (7.063 ms / 100) 6.566 -> 6.517 ( -0.75%) [ +0.23% +0.00% +0.64% / -0.08% -0.75% -0.70%] index_copy_ linear : Elapsed 0.066 ms (6.581 ms / 100) 7.040 -> 6.931 ( -1.55%) [ +0.13% +0.01% +0.00% / -0.17% -1.55% -1.52%] index_add_ reverse : Elapsed 0.070 ms (7.049 ms / 100) 6.579 -> 6.492 ( -1.32%) [ +0.53% +0.00% +0.35% / -0.44% -1.19% -1.32%] index_copy_ reverse : Elapsed 0.066 ms (6.614 ms / 100) 7.126 -> 6.969 ( -2.20%) [ +0.00% +0.24% +0.22% / +0.17% -2.20% -1.94%] index_add_ spread : Elapsed 0.071 ms (7.126 ms / 100) 6.670 -> 6.536 ( -2.01%) [ +0.00% +0.09% +0.54% / +0.04% -1.86% -2.01%] index_copy_ spread : Elapsed 0.067 ms (6.670 ms / 100) 7.189 -> 7.073 ( -1.61%) [ +0.24% +0.38% +0.00% / +0.04% -1.61% -1.49%] index_add_ strided 7 : Elapsed 0.072 ms (7.206 ms / 100) 6.680 -> 6.602 ( -1.17%) [ +0.30% +0.34% +0.00% / -0.31% -1.17% -1.15%] index_copy_ strided 7 : Elapsed 0.067 ms (6.700 ms / 100) 7.172 -> 7.122 ( -0.70%) [ +0.00% +0.17% +0.22% / -0.29% -0.70% -0.63%] index_add_ perm : Elapsed 0.072 ms (7.172 ms / 100) 6.671 -> 6.586 ( -1.27%) [ +0.00% +0.13% +0.48% / -0.36% -1.06% -1.27%] index_copy_ perm : Elapsed 0.067 ms (6.671 ms / 100) 7.139 -> 7.059 ( -1.12%) [ +0.10% +0.32% +0.00% / -0.17% -1.12% -0.90%] index_add_ perm_sorted : Elapsed 0.071 ms (7.146 ms / 100) 6.664 -> 6.597 ( -1.01%) [ +0.00% +0.11% +0.06% / -0.21% -0.84% -1.01%] index_copy_ perm_sorted : Elapsed 0.067 ms (6.664 ms / 100) 9.268 -> 9.257 ( -0.12%) [ +0.00% +0.31% +0.74% / -0.12% +1.40% +1.36%] index_select const : Elapsed 0.093 ms (9.268 ms / 100) 11.439 -> 11.439 ( +0.00%) [ +0.73% +0.00% +0.94% / +0.00% +0.78% +2.83%] index_select wrap : Elapsed 0.115 ms (11.523 ms / 100) 10.018 -> 9.919 ( -0.99%) [ +0.09% +0.48% +0.00% / +0.30% -0.99% -0.51%] index_select linear : Elapsed 0.100 ms (10.027 ms / 100) 9.749 -> 9.732 ( -0.17%) [ +0.21% +0.00% +0.08% / -0.17% +0.47% +0.36%] index_select reverse : Elapsed 0.098 ms (9.769 ms / 100) 9.346 -> 9.366 ( +0.21%) [ +0.00% +0.41% +0.10% / +0.21% +0.70% +1.23%] index_select skip64 : Elapsed 0.093 ms (9.346 ms / 100) 9.343 -> 9.328 ( -0.16%) [ +0.15% +0.39% +0.00% / -0.02% -0.02% -0.16%] index_select skip256 : Elapsed 0.094 ms (9.357 ms / 100) 9.874 -> 9.914 ( +0.41%) [ +0.32% +0.00% +0.21% / +0.41% +2.79% +2.26%] index_select spread : Elapsed 0.099 ms (9.906 ms / 100) 11.389 -> 11.442 ( +0.47%) [ +0.17% +0.00% +0.19% / +0.69% +0.47% +0.57%] index_select strided 3 : Elapsed 0.114 ms (11.408 ms / 100) 9.998 -> 10.012 ( +0.14%) [ +0.22% +0.00% +0.36% / +0.14% +12.18% +12.09%] index_select strided 5 : Elapsed 0.100 ms (10.020 ms / 100) 11.389 -> 11.381 ( -0.07%) [ +0.00% +0.03% +0.80% / -0.07% +1.91% +2.09%] index_select strided 7 : Elapsed 0.114 ms (11.389 ms / 100) 11.364 -> 11.260 ( -0.92%) [ +1.42% +0.00% +0.96% / +0.14% -0.82% -0.92%] index_select strided 8 : Elapsed 0.115 ms (11.525 ms / 100) 11.239 -> 11.121 ( -1.05%) [ +1.86% +0.00% +0.91% / +0.01% -1.05% -1.05%] index_select strided 16 : Elapsed 0.114 ms (11.448 ms / 100) 10.745 -> 10.700 ( -0.42%) [ +0.87% +0.74% +0.00% / -0.42% +2.58% +4.52%] index_select random : Elapsed 0.108 ms (10.838 ms / 100) 9.826 -> 9.848 ( +0.22%) [ +0.05% +0.41% +0.00% / +0.22% +1.07% +0.67%] index_select random_sorted : Elapsed 0.098 ms (9.831 ms / 100) B = [150, 15, 250] (stride (3750, 1, 15)) A = [50, 15, 250] (stride (1, 50, 750)) dim = 0 8.736 -> 8.733 ( -0.03%) [ +0.17% +0.14% +0.00% / -0.02% -0.03% +0.11%] index_add_ linear : Elapsed 0.088 ms (8.751 ms / 100) 8.267 -> 8.267 ( +0.00%) [ +0.00% +0.11% +0.34% / +0.00% +0.00% +0.18%] index_copy_ linear : Elapsed 0.083 ms (8.267 ms / 100) 8.756 -> 8.718 ( -0.43%) [ +0.00% +0.05% +0.27% / -0.26% -0.43% -0.35%] index_add_ reverse : Elapsed 0.088 ms (8.756 ms / 100) 8.266 -> 8.263 ( -0.04%) [ +0.00% +0.21% +0.36% / +0.29% +0.22% -0.04%] index_copy_ reverse : Elapsed 0.083 ms (8.266 ms / 100) 8.830 -> 8.798 ( -0.36%) [ +0.12% +0.00% +0.06% / -0.36% -0.14% +0.18%] index_add_ spread : Elapsed 0.088 ms (8.841 ms / 100) 8.326 -> 8.324 ( -0.02%) [ +0.20% +0.00% +0.07% / -0.02% +0.28% +0.77%] index_copy_ spread : Elapsed 0.083 ms (8.343 ms / 100) 8.879 -> 8.844 ( -0.39%) [ +0.20% +0.24% +0.00% / -0.39% +0.11% +0.16%] index_add_ strided 7 : Elapsed 0.089 ms (8.897 ms / 100) 8.358 -> 8.365 ( +0.08%) [ +0.00% +0.57% +0.53% / +0.08% +0.08% +0.17%] index_copy_ strided 7 : Elapsed 0.084 ms (8.358 ms / 100) 8.882 -> 8.767 ( -1.29%) [ +0.41% +0.00% +0.11% / +0.19% -1.29% -1.25%] index_add_ perm : Elapsed 0.089 ms (8.918 ms / 100) 8.307 -> 8.307 ( +0.00%) [ +0.36% +0.33% +0.00% / +0.11% +0.04% +0.00%] index_copy_ perm : Elapsed 0.083 ms (8.337 ms / 100) 8.843 -> 8.755 ( -1.00%) [ +0.00% +0.03% +0.27% / +0.00% -1.00% -0.89%] index_add_ perm_sorted : Elapsed 0.088 ms (8.843 ms / 100) 8.256 -> 8.266 ( +0.12%) [ +0.17% +0.00% +0.22% / +0.12% +0.74% +0.84%] index_copy_ perm_sorted : Elapsed 0.083 ms (8.270 ms / 100) good 15.749 -> 14.698 ( -6.67%) [ +0.50% +0.74% +0.00% / -0.28% -6.10% -6.67%] index_select const : Elapsed 0.158 ms (15.828 ms / 100) 17.261 -> 17.374 ( +0.65%) [ +0.00% +1.72% +0.93% / +0.65% +0.99% +1.43%] index_select wrap : Elapsed 0.173 ms (17.261 ms / 100) 15.349 -> 14.708 ( -4.18%) [ +0.87% +0.20% +0.00% / +0.31% -4.18% -3.27%] index_select linear : Elapsed 0.155 ms (15.482 ms / 100) good 17.278 -> 16.158 ( -6.48%) [ +0.75% +0.54% +0.00% / +0.61% -5.59% -6.48%] index_select reverse : Elapsed 0.174 ms (17.407 ms / 100) good 15.696 -> 14.741 ( -6.08%) [ +0.24% +0.00% +0.80% / +0.61% -5.42% -6.08%] index_select skip64 : Elapsed 0.157 ms (15.734 ms / 100) good 15.659 -> 14.706 ( -6.09%) [ +0.20% +0.64% +0.00% / +0.43% -6.01% -6.09%] index_select skip256 : Elapsed 0.157 ms (15.690 ms / 100) 15.484 -> 15.543 ( +0.38%) [ +0.73% +0.00% +0.35% / +0.38% +0.91% +1.35%] index_select spread : Elapsed 0.156 ms (15.597 ms / 100) 19.251 -> 19.088 ( -0.85%) [ +0.00% +0.22% +0.63% / -0.39% -0.85% -0.12%] index_select strided 3 : Elapsed 0.193 ms (19.251 ms / 100) 19.723 -> 19.788 ( +0.33%) [ +0.05% +0.06% +0.00% / +0.44% +0.65% +0.33%] index_select strided 5 : Elapsed 0.197 ms (19.732 ms / 100) 19.737 -> 19.799 ( +0.31%) [ +0.53% +0.13% +0.00% / +0.41% +1.11% +0.31%] index_select strided 7 : Elapsed 0.198 ms (19.841 ms / 100) 19.930 -> 19.934 ( +0.02%) [ +0.03% +0.15% +0.00% / +0.02% +1.10% +1.38%] index_select strided 8 : Elapsed 0.199 ms (19.936 ms / 100) 19.589 -> 19.580 ( -0.05%) [ +0.42% +0.78% +0.00% / +0.29% -0.05% +0.32%] index_select strided 16 : Elapsed 0.197 ms (19.672 ms / 100) 19.619 -> 19.696 ( +0.39%) [ +0.75% +0.31% +0.00% / +0.75% +0.39% +1.46%] index_select random : Elapsed 0.198 ms (19.767 ms / 100) 15.478 -> 15.614 ( +0.88%) [ +0.02% +0.05% +0.00% / +1.10% +1.45% +0.88%] index_select random_sorted : Elapsed 0.155 ms (15.481 ms / 100) B = [150, 15, 250] (stride (250, 37500, 1)) A = [50, 15, 250] (stride (250, 12500, 1)) dim = 0 6.805 -> 6.730 ( -1.10%) [ +0.19% +0.00% +0.13% / -0.47% -1.10% -1.03%] index_add_ linear : Elapsed 0.068 ms (6.818 ms / 100) 6.455 -> 6.383 ( -1.12%) [ +0.26% +0.00% +0.03% / -0.31% -0.99% -1.12%] index_copy_ linear : Elapsed 0.065 ms (6.472 ms / 100) 6.835 -> 6.725 ( -1.61%) [ +0.37% +0.00% +0.37% / -0.50% -1.61% -1.29%] index_add_ reverse : Elapsed 0.069 ms (6.860 ms / 100) 6.477 -> 6.388 ( -1.37%) [ +0.80% +0.00% +0.59% / -0.22% -1.37% -1.36%] index_copy_ reverse : Elapsed 0.065 ms (6.529 ms / 100) 6.878 -> 6.771 ( -1.56%) [ +0.44% +0.32% +0.00% / -0.70% -1.42% -1.56%] index_add_ spread : Elapsed 0.069 ms (6.908 ms / 100) 6.535 -> 6.427 ( -1.65%) [ +0.35% +0.32% +0.00% / -0.26% -1.65% -1.33%] index_copy_ spread : Elapsed 0.066 ms (6.558 ms / 100) 6.915 -> 6.783 ( -1.91%) [ +0.29% +0.26% +0.00% / -0.54% -1.91% -1.84%] index_add_ strided 7 : Elapsed 0.069 ms (6.935 ms / 100) 6.549 -> 6.430 ( -1.82%) [ +0.00% +0.52% +0.32% / +0.03% -1.82% -1.73%] index_copy_ strided 7 : Elapsed 0.065 ms (6.549 ms / 100) 6.949 -> 6.796 ( -2.20%) [ +0.00% +0.46% +0.42% / -0.55% -2.20% -1.89%] index_add_ perm : Elapsed 0.069 ms (6.949 ms / 100) 6.580 -> 6.433 ( -2.23%) [ +0.00% +0.47% +0.00% / -0.47% -2.23% -2.16%] index_copy_ perm : Elapsed 0.066 ms (6.580 ms / 100) 6.864 -> 6.736 ( -1.86%) [ +0.50% +0.29% +0.00% / -0.20% -1.86% -1.56%] index_add_ perm_sorted : Elapsed 0.069 ms (6.898 ms / 100) 6.531 -> 6.411 ( -1.84%) [ +0.05% +0.00% +0.09% / -0.11% -1.78% -1.84%] index_copy_ perm_sorted : Elapsed 0.065 ms (6.534 ms / 100) 8.362 -> 8.363 ( +0.01%) [ +0.18% +0.05% +0.00% / +0.01% +0.69% +0.49%] index_select const : Elapsed 0.084 ms (8.377 ms / 100) 9.644 -> 9.658 ( +0.15%) [ +0.00% +0.11% +0.08% / +0.15% +0.72% +0.69%] index_select wrap : Elapsed 0.096 ms (9.644 ms / 100) 8.906 -> 8.862 ( -0.49%) [ +0.11% +0.00% +0.08% / +0.04% -0.49% -0.45%] index_select linear : Elapsed 0.089 ms (8.916 ms / 100) 8.789 -> 8.808 ( +0.22%) [ +0.00% +0.06% +0.26% / +0.22% +0.61% +0.84%] index_select reverse : Elapsed 0.088 ms (8.789 ms / 100) 8.346 -> 8.370 ( +0.29%) [ +0.17% +0.00% +0.10% / +0.29% +0.54% +0.55%] index_select skip64 : Elapsed 0.084 ms (8.360 ms / 100) 8.355 -> 8.369 ( +0.17%) [ +0.13% +0.04% +0.00% / +0.17% +0.59% +0.55%] index_select skip256 : Elapsed 0.084 ms (8.366 ms / 100) 8.959 -> 8.971 ( +0.13%) [ +0.00% +0.15% +0.13% / +0.13% +0.28% +0.41%] index_select spread : Elapsed 0.090 ms (8.959 ms / 100) 9.633 -> 9.604 ( -0.30%) [ +0.30% +0.00% +0.29% / -0.30% +0.89% +0.65%] index_select strided 3 : Elapsed 0.097 ms (9.662 ms / 100) 8.785 -> 8.841 ( +0.64%) [ +0.40% +0.00% +0.48% / +0.64% +4.42% +4.08%] index_select strided 5 : Elapsed 0.088 ms (8.820 ms / 100) 9.627 -> 9.655 ( +0.29%) [ +0.45% +0.00% +0.92% / +0.29% +1.54% +0.98%] index_select strided 7 : Elapsed 0.097 ms (9.670 ms / 100) 9.449 -> 9.453 ( +0.04%) [ +0.11% +0.06% +0.00% / +0.04% +0.66% +0.48%] index_select strided 8 : Elapsed 0.095 ms (9.459 ms / 100) 9.391 -> 9.417 ( +0.28%) [ +0.07% +0.00% +0.30% / +0.28% +0.68% +0.50%] index_select strided 16 : Elapsed 0.094 ms (9.398 ms / 100) 9.391 -> 9.383 ( -0.09%) [ +0.00% +0.22% +0.46% / -0.09% +1.48% +1.68%] index_select random : Elapsed 0.094 ms (9.391 ms / 100) 8.940 -> 8.914 ( -0.29%) [ +0.00% +0.07% +0.44% / +0.30% -0.09% -0.29%] index_select random_sorted : Elapsed 0.089 ms (8.940 ms / 100) B = [150, 15, 250] (stride (250, 37500, 1)) A = [50, 15, 250] (stride (1, 12500, 50)) dim = 0 13.484 -> 13.520 ( +0.27%) [ +0.00% +0.16% +0.18% / +0.27% +0.67% +0.62%] index_add_ linear : Elapsed 0.135 ms (13.484 ms / 100) 13.083 -> 13.122 ( +0.30%) [ +0.00% +0.24% +0.63% / +0.30% +0.67% +0.63%] index_copy_ linear : Elapsed 0.131 ms (13.083 ms / 100) 13.521 -> 13.492 ( -0.21%) [ +0.30% +0.00% +0.20% / -0.21% +0.33% +0.18%] index_add_ reverse : Elapsed 0.136 ms (13.561 ms / 100) 13.142 -> 13.075 ( -0.51%) [ +0.12% +0.00% +0.29% / -0.51% +0.02% +0.06%] index_copy_ reverse : Elapsed 0.132 ms (13.158 ms / 100) 13.649 -> 13.601 ( -0.35%) [ +0.00% +0.06% +0.19% / -0.07% -0.23% -0.35%] index_add_ spread : Elapsed 0.136 ms (13.649 ms / 100) 13.261 -> 13.215 ( -0.35%) [ +0.19% +0.00% +0.17% / -0.26% -0.32% -0.35%] index_copy_ spread : Elapsed 0.133 ms (13.286 ms / 100) 13.563 -> 13.525 ( -0.28%) [ +0.10% +0.08% +0.00% / -0.28% +1.26% +1.19%] index_add_ strided 7 : Elapsed 0.136 ms (13.577 ms / 100) 13.199 -> 13.146 ( -0.40%) [ +0.00% +0.13% +0.13% / -0.40% +0.86% +0.70%] index_copy_ strided 7 : Elapsed 0.132 ms (13.199 ms / 100) 13.764 -> 13.579 ( -1.34%) [ +0.00% +0.10% +0.02% / -0.32% -1.23% -1.34%] index_add_ perm : Elapsed 0.138 ms (13.764 ms / 100) 13.345 -> 13.196 ( -1.12%) [ +0.22% +0.19% +0.00% / -0.16% -1.06% -1.12%] index_copy_ perm : Elapsed 0.134 ms (13.375 ms / 100) 13.667 -> 13.586 ( -0.59%) [ +0.13% +0.09% +0.00% / +0.06% -0.59% -0.50%] index_add_ perm_sorted : Elapsed 0.137 ms (13.685 ms / 100) 13.308 -> 13.192 ( -0.87%) [ +0.08% +0.11% +0.00% / -0.22% -0.80% -0.87%] index_copy_ perm_sorted : Elapsed 0.133 ms (13.319 ms / 100) 22.797 -> 22.755 ( -0.18%) [ +0.06% +0.00% +0.07% / +0.01% -0.16% -0.18%] index_select const : Elapsed 0.228 ms (22.811 ms / 100) 27.438 -> 27.497 ( +0.22%) [ +0.00% +0.07% +0.09% / +0.31% +0.22% +0.24%] index_select wrap : Elapsed 0.274 ms (27.438 ms / 100) 23.917 -> 23.932 ( +0.06%) [ +0.00% +0.15% +0.09% / +0.06% +0.23% +0.28%] index_select linear : Elapsed 0.239 ms (23.917 ms / 100) 24.480 -> 24.513 ( +0.13%) [ +0.00% +0.15% +0.29% / +0.21% +0.13% +0.16%] index_select reverse : Elapsed 0.245 ms (24.480 ms / 100) 22.791 -> 22.768 ( -0.10%) [ +0.08% +0.26% +0.00% / +0.06% -0.10% -0.05%] index_select skip64 : Elapsed 0.228 ms (22.809 ms / 100) 22.762 -> 22.730 ( -0.14%) [ +0.12% +0.20% +0.00% / +0.25% -0.14% +0.07%] index_select skip256 : Elapsed 0.228 ms (22.790 ms / 100) 24.763 -> 24.779 ( +0.06%) [ +0.06% +0.04% +0.00% / +0.06% +0.89% +0.96%] index_select spread : Elapsed 0.248 ms (24.779 ms / 100) 29.241 -> 29.261 ( +0.07%) [ +0.00% +0.16% +0.15% / +0.22% +0.07% +0.27%] index_select strided 3 : Elapsed 0.292 ms (29.241 ms / 100) 29.336 -> 29.345 ( +0.03%) [ +0.07% +0.01% +0.00% / +0.03% +0.48% +0.37%] index_select strided 5 : Elapsed 0.294 ms (29.356 ms / 100) 29.188 -> 29.232 ( +0.15%) [ +0.17% +0.00% +0.18% / +0.15% +0.24% +0.52%] index_select strided 7 : Elapsed 0.292 ms (29.238 ms / 100) 29.290 -> 29.314 ( +0.08%) [ +0.02% +0.18% +0.00% / +0.08% +0.25% +0.20%] index_select strided 8 : Elapsed 0.293 ms (29.295 ms / 100) 29.401 -> 29.387 ( -0.05%) [ +0.00% +0.02% +0.11% / -0.05% -0.03% +0.23%] index_select strided 16 : Elapsed 0.294 ms (29.401 ms / 100) 29.179 -> 29.181 ( +0.01%) [ +0.00% +0.08% +0.13% / +0.01% +0.17% +0.06%] index_select random : Elapsed 0.292 ms (29.179 ms / 100) 24.598 -> 24.632 ( +0.14%) [ +0.07% +0.00% +0.11% / +0.14% +0.78% +0.60%] index_select random_sorted : Elapsed 0.246 ms (24.615 ms / 100) B = [150, 15, 250] (stride (1, 37500, 150)) A = [50, 15, 250] (stride (3750, 1, 15)) dim = 0 15.256 -> 14.783 ( -3.10%) [ +0.20% +0.24% +0.00% / -0.39% -3.05% -3.10%] index_add_ linear : Elapsed 0.153 ms (15.286 ms / 100) 13.574 -> 13.211 ( -2.67%) [ +0.11% +0.22% +0.00% / -0.57% -2.45% -2.67%] index_copy_ linear : Elapsed 0.136 ms (13.589 ms / 100) 15.287 -> 14.776 ( -3.34%) [ +0.05% +0.00% +0.05% / -0.48% -3.34% -3.16%] index_add_ reverse : Elapsed 0.153 ms (15.294 ms / 100) 13.584 -> 13.227 ( -2.63%) [ +0.04% +0.09% +0.00% / -0.63% -2.61% -2.63%] index_copy_ reverse : Elapsed 0.136 ms (13.589 ms / 100) 18.348 -> 18.158 ( -1.04%) [ +0.15% +0.00% +0.04% / -0.51% -1.04% -0.78%] index_add_ spread : Elapsed 0.184 ms (18.375 ms / 100) 16.255 -> 16.102 ( -0.94%) [ +0.22% +0.26% +0.00% / -0.26% -0.83% -0.94%] index_copy_ spread : Elapsed 0.163 ms (16.291 ms / 100) 22.117 -> 22.016 ( -0.46%) [ +0.00% +0.00% +0.24% / -0.13% -0.05% -0.46%] index_add_ strided 7 : Elapsed 0.221 ms (22.117 ms / 100) 20.342 -> 20.019 ( -1.59%) [ +0.22% +0.00% +0.35% / -0.57% -1.35% -1.59%] index_copy_ strided 7 : Elapsed 0.204 ms (20.387 ms / 100) 23.897 -> 23.798 ( -0.41%) [ +0.00% +0.05% +0.13% / +0.06% -0.28% -0.41%] index_add_ perm : Elapsed 0.239 ms (23.897 ms / 100) 21.927 -> 21.820 ( -0.49%) [ +0.05% +0.00% +0.03% / -0.11% -0.46% -0.49%] index_copy_ perm : Elapsed 0.219 ms (21.937 ms / 100) 18.164 -> 17.998 ( -0.91%) [ +0.06% +0.03% +0.00% / -0.50% -0.83% -0.91%] index_add_ perm_sorted : Elapsed 0.182 ms (18.175 ms / 100) 16.168 -> 15.942 ( -1.40%) [ +0.19% +0.00% +0.13% / -0.91% -1.40% -1.33%] index_copy_ perm_sorted : Elapsed 0.162 ms (16.198 ms / 100) 25.592 -> 25.138 ( -1.77%) [ +0.00% +0.16% +0.17% / +0.17% -1.73% -1.77%] index_select const : Elapsed 0.256 ms (25.592 ms / 100) 30.342 -> 30.039 ( -1.00%) [ +0.00% +0.30% +0.42% / -0.02% -0.64% -1.00%] index_select wrap : Elapsed 0.303 ms (30.342 ms / 100) 26.369 -> 26.100 ( -1.02%) [ +0.39% +0.20% +0.00% / -0.02% -0.94% -1.02%] index_select linear : Elapsed 0.265 ms (26.472 ms / 100) 27.667 -> 27.236 ( -1.56%) [ +0.21% +0.00% +0.23% / +0.02% -1.28% -1.56%] index_select reverse : Elapsed 0.277 ms (27.724 ms / 100) 25.587 -> 25.112 ( -1.86%) [ +0.07% +0.03% +0.00% / +0.09% -1.63% -1.86%] index_select skip64 : Elapsed 0.256 ms (25.604 ms / 100) 25.560 -> 25.097 ( -1.81%) [ +0.00% +0.22% +0.22% / +0.16% -1.67% -1.81%] index_select skip256 : Elapsed 0.256 ms (25.560 ms / 100) 27.025 -> 26.856 ( -0.63%) [ +0.15% +0.17% +0.00% / +0.19% -0.53% -0.63%] index_select spread : Elapsed 0.271 ms (27.066 ms / 100) 30.390 -> 30.355 ( -0.12%) [ +0.02% +0.00% +0.10% / -0.04% +0.03% -0.12%] index_select strided 3 : Elapsed 0.304 ms (30.395 ms / 100) 27.947 -> 27.339 ( -2.18%) [ +0.08% +0.00% +0.04% / +0.00% -2.05% -2.18%] index_select strided 5 : Elapsed 0.280 ms (27.968 ms / 100) 30.117 -> 30.197 ( +0.27%) [ +0.00% +0.40% +0.61% / +0.27% +0.63% +0.81%] index_select strided 7 : Elapsed 0.301 ms (30.117 ms / 100) 29.874 -> 29.820 ( -0.18%) [ +0.26% +0.00% +0.26% / -0.12% -0.16% -0.18%] index_select strided 8 : Elapsed 0.300 ms (29.953 ms / 100) 29.881 -> 29.794 ( -0.29%) [ +0.00% +0.02% +0.00% / +0.11% +0.11% -0.29%] index_select strided 16 : Elapsed 0.299 ms (29.881 ms / 100) 29.438 -> 29.272 ( -0.56%) [ +0.00% +0.30% +0.05% / -0.03% -0.52% -0.56%] index_select random : Elapsed 0.294 ms (29.438 ms / 100) 26.866 -> 26.761 ( -0.39%) [ +0.00% +0.04% +0.04% / -0.16% -0.39% -0.39%] index_select random_sorted : Elapsed 0.269 ms (26.866 ms / 100) B = [150, 15, 250] (stride (1, 37500, 150)) A = [50, 15, 250] (stride (1, 50, 750)) dim = 0 18.925 -> 18.455 ( -2.48%) [ +0.18% +0.00% +0.14% / -0.41% -2.43% -2.48%] index_add_ linear : Elapsed 0.190 ms (18.959 ms / 100) 16.986 -> 16.679 ( -1.81%) [ +0.03% +0.00% +0.04% / -0.55% -1.65% -1.81%] index_copy_ linear : Elapsed 0.170 ms (16.991 ms / 100) 18.909 -> 18.500 ( -2.16%) [ +0.00% +0.15% +0.15% / -0.53% -2.16% -2.15%] index_add_ reverse : Elapsed 0.189 ms (18.909 ms / 100) 16.941 -> 16.706 ( -1.39%) [ +0.00% +0.14% +0.09% / -0.63% -1.33% -1.39%] index_copy_ reverse : Elapsed 0.169 ms (16.941 ms / 100) 21.657 -> 21.502 ( -0.72%) [ +0.39% +0.00% +0.11% / -0.72% +0.19% +0.06%] index_add_ spread : Elapsed 0.217 ms (21.742 ms / 100) 19.270 -> 19.118 ( -0.79%) [ +0.20% +0.00% +0.06% / -0.79% +0.50% +0.43%] index_copy_ spread : Elapsed 0.193 ms (19.309 ms / 100) 24.961 -> 24.867 ( -0.38%) [ +0.16% +0.00% +0.00% / -0.11% -0.38% -0.16%] index_add_ strided 7 : Elapsed 0.250 ms (25.001 ms / 100) 22.995 -> 22.754 ( -1.05%) [ +0.12% +0.01% +0.00% / -0.73% -1.05% -1.02%] index_copy_ strided 7 : Elapsed 0.230 ms (23.022 ms / 100) 26.229 -> 26.186 ( -0.16%) [ +0.05% +0.12% +0.00% / -0.16% +0.19% +0.20%] index_add_ perm : Elapsed 0.262 ms (26.242 ms / 100) 25.191 -> 25.086 ( -0.42%) [ +0.00% +0.06% +0.17% / -0.14% -0.10% -0.42%] index_copy_ perm : Elapsed 0.252 ms (25.191 ms / 100) 21.287 -> 21.181 ( -0.50%) [ +0.03% +0.20% +0.00% / -0.50% +0.14% +0.04%] index_add_ perm_sorted : Elapsed 0.213 ms (21.294 ms / 100) 18.988 -> 18.848 ( -0.74%) [ +0.10% +0.37% +0.00% / -0.74% +0.67% +0.80%] index_copy_ perm_sorted : Elapsed 0.190 ms (19.007 ms / 100) 34.616 -> 34.647 ( +0.09%) [ +0.00% +0.11% +0.10% / +0.09% +2.38% +2.36%] index_select const : Elapsed 0.346 ms (34.616 ms / 100) 41.037 -> 41.143 ( +0.26%) [ +0.16% +0.00% +0.48% / +0.26% +1.86% +1.95%] index_select wrap : Elapsed 0.411 ms (41.104 ms / 100) 35.871 -> 35.923 ( +0.14%) [ +0.31% +0.28% +0.00% / +0.14% +2.03% +2.22%] index_select linear : Elapsed 0.360 ms (35.983 ms / 100) 37.221 -> 37.200 ( -0.06%) [ +0.30% +0.12% +0.00% / -0.06% +4.00% +3.99%] index_select reverse : Elapsed 0.373 ms (37.334 ms / 100) 34.570 -> 34.639 ( +0.20%) [ +0.22% +0.00% +0.14% / +0.20% +2.26% +2.40%] index_select skip64 : Elapsed 0.346 ms (34.645 ms / 100) 34.642 -> 34.733 ( +0.26%) [ +0.19% +0.24% +0.00% / +0.26% +2.01% +2.20%] index_select skip256 : Elapsed 0.347 ms (34.709 ms / 100) 37.358 -> 37.478 ( +0.32%) [ +0.04% +0.09% +0.00% / +0.32% +3.01% +3.10%] index_select spread : Elapsed 0.374 ms (37.374 ms / 100) 44.182 -> 44.192 ( +0.02%) [ +0.00% +0.18% +0.12% / +0.02% +1.72% +1.67%] index_select strided 3 : Elapsed 0.442 ms (44.182 ms / 100) 44.513 -> 44.470 ( -0.10%) [ +0.15% +0.00% +0.26% / -0.10% +2.04% +2.06%] index_select strided 5 : Elapsed 0.446 ms (44.580 ms / 100) 44.668 -> 44.668 ( +0.00%) [ +0.02% +0.27% +0.00% / +0.00% +1.83% +1.89%] index_select strided 7 : Elapsed 0.447 ms (44.679 ms / 100) 44.694 -> 44.730 ( +0.08%) [ +0.00% +0.04% +0.26% / +0.08% +2.15% +1.98%] index_select strided 8 : Elapsed 0.447 ms (44.694 ms / 100) 44.422 -> 44.511 ( +0.20%) [ +0.09% +0.00% +0.20% / +0.20% +2.04% +1.95%] index_select strided 16 : Elapsed 0.445 ms (44.463 ms / 100) 44.199 -> 44.208 ( +0.02%) [ +0.22% +0.18% +0.00% / +0.02% +2.16% +2.07%] index_select random : Elapsed 0.443 ms (44.298 ms / 100) 37.567 -> 37.659 ( +0.24%) [ +0.00% +0.11% +0.18% / +0.24% +2.03% +2.32%] index_select random_sorted : Elapsed 0.376 ms (37.567 ms / 100) B = [150, 15, 250] (stride (15, 1, 2250)) dim = 0 fill_cnt = 50 4.164 -> 4.158 ( -0.14%) [ +0.00% +0.22% +0.19% / -0.14% +0.70% +0.89%] index_fill_ const : Elapsed 0.042 ms (4.164 ms / 100) 3.924 -> 3.907 ( -0.43%) [ +0.00% +0.23% +0.05% / -0.43% -0.20% -0.20%] index_fill_ linear : Elapsed 0.039 ms (3.924 ms / 100) 3.872 -> 3.864 ( -0.21%) [ +0.03% +0.05% +0.00% / -0.21% +0.85% +1.24%] index_fill_ reverse : Elapsed 0.039 ms (3.873 ms / 100) 4.093 -> 4.078 ( -0.37%) [ +0.20% +0.00% +0.00% / -0.37% +2.54% +2.44%] index_fill_ skip64 : Elapsed 0.041 ms (4.101 ms / 100) 4.089 -> 4.082 ( -0.17%) [ +0.17% +0.20% +0.00% / -0.17% +2.52% +2.57%] index_fill_ skip256 : Elapsed 0.041 ms (4.096 ms / 100) 3.822 -> 3.798 ( -0.63%) [ +0.29% +0.29% +0.00% / -0.05% -0.29% -0.63%] index_fill_ spread : Elapsed 0.038 ms (3.833 ms / 100) 3.830 -> 3.795 ( -0.91%) [ +0.10% +0.31% +0.00% / -0.44% -0.76% -0.91%] index_fill_ strided 3 : Elapsed 0.038 ms (3.834 ms / 100) 3.841 -> 3.813 ( -0.73%) [ +0.10% +0.00% +0.00% / -0.23% -0.73% -0.57%] index_fill_ strided 5 : Elapsed 0.038 ms (3.845 ms / 100) 3.840 -> 3.822 ( -0.47%) [ +0.05% +0.03% +0.00% / -0.31% -0.42% -0.47%] index_fill_ strided 7 : Elapsed 0.038 ms (3.842 ms / 100) 3.918 -> 3.888 ( -0.77%) [ +0.00% +0.10% +0.03% / -0.77% -0.74% -0.48%] index_fill_ strided 8 : Elapsed 0.039 ms (3.918 ms / 100) 3.847 -> 3.813 ( -0.88%) [ +0.00% +0.68% +0.18% / -0.78% -0.70% -0.88%] index_fill_ strided 16 : Elapsed 0.038 ms (3.847 ms / 100) 3.934 -> 3.921 ( -0.33%) [ +0.48% +0.00% +0.15% / -0.33% +0.28% +0.18%] index_fill_ strided 64 : Elapsed 0.040 ms (3.953 ms / 100) 3.772 -> 3.758 ( -0.37%) [ +0.16% +0.03% +0.00% / -0.37% +1.22% +1.38%] index_fill_ strided 100 : Elapsed 0.038 ms (3.778 ms / 100) 3.871 -> 3.820 ( -1.32%) [ +0.13% +0.39% +0.00% / -0.31% -1.29% -1.32%] index_fill_ random : Elapsed 0.039 ms (3.876 ms / 100) 3.868 -> 3.803 ( -1.68%) [ +0.36% +0.00% +0.18% / -0.18% -1.68% -1.65%] index_fill_ random_sorted : Elapsed 0.039 ms (3.882 ms / 100) 3.846 -> 3.832 ( -0.36%) [ +0.26% +0.49% +0.00% / -0.36% +0.29% +0.36%] index_fill_ perm : Elapsed 0.039 ms (3.856 ms / 100) 3.875 -> 3.826 ( -1.26%) [ +0.34% +0.28% +0.00% / -0.05% -1.03% -1.26%] index_fill_ perm_sorted : Elapsed 0.039 ms (3.888 ms / 100) B = [150, 15, 250] (stride (15, 1, 2250)) A = [50, 15, 250] (stride (15, 1, 750)) dim = 0 8.789 -> 8.650 ( -1.58%) [ +0.00% +0.03% +0.01% / -0.09% -1.58% -1.41%] index_add_ linear : Elapsed 0.088 ms (8.789 ms / 100) 7.404 -> 7.308 ( -1.30%) [ +0.00% +0.27% +0.14% / -0.31% -1.30% -1.11%] index_copy_ linear : Elapsed 0.074 ms (7.404 ms / 100) 8.838 -> 8.705 ( -1.50%) [ +0.00% +0.27% +0.42% / -0.08% -1.37% -1.50%] index_add_ reverse : Elapsed 0.088 ms (8.838 ms / 100) 7.396 -> 7.341 ( -0.74%) [ +0.00% +0.22% +0.05% / -0.16% -0.68% -0.74%] index_copy_ reverse : Elapsed 0.074 ms (7.396 ms / 100) 10.034 -> 10.107 ( +0.73%) [ +0.00% +1.31% +1.25% / +0.73% +0.86% +0.77%] index_add_ spread : Elapsed 0.100 ms (10.034 ms / 100) 8.660 -> 8.607 ( -0.61%) [ +0.09% +0.28% +0.00% / -0.61% +0.23% +0.30%] index_copy_ spread : Elapsed 0.087 ms (8.668 ms / 100) 10.488 -> 10.425 ( -0.60%) [ +0.00% +0.10% +0.47% / -0.10% -0.51% -0.60%] index_add_ strided 7 : Elapsed 0.105 ms (10.488 ms / 100) 8.992 -> 8.917 ( -0.83%) [ +0.31% +0.52% +0.00% / -0.68% -0.58% -0.83%] index_copy_ strided 7 : Elapsed 0.090 ms (9.020 ms / 100) 10.336 -> 10.341 ( +0.05%) [ +0.33% +0.11% +0.00% / +0.05% +3.63% +3.64%] index_add_ perm : Elapsed 0.104 ms (10.370 ms / 100) 8.541 -> 8.457 ( -0.98%) [ +0.30% +0.00% +0.23% / -0.98% +0.37% +0.12%] index_copy_ perm : Elapsed 0.086 ms (8.567 ms / 100) 9.516 -> 9.541 ( +0.26%) [ +0.64% +0.41% +0.00% / +0.26% +1.29% +1.49%] index_add_ perm_sorted : Elapsed 0.096 ms (9.577 ms / 100) 8.131 -> 8.072 ( -0.73%) [ +0.49% +0.26% +0.00% / -0.73% +0.39% +0.69%] index_copy_ perm_sorted : Elapsed 0.082 ms (8.171 ms / 100) 11.347 -> 11.363 ( +0.14%) [ +0.07% +0.16% +0.00% / +0.14% +0.66% +0.70%] index_select const : Elapsed 0.114 ms (11.355 ms / 100) 13.189 -> 13.200 ( +0.08%) [ +1.19% +0.00% +0.60% / +0.08% +0.99% +0.79%] index_select wrap : Elapsed 0.133 ms (13.346 ms / 100) 12.045 -> 12.006 ( -0.32%) [ +0.00% +0.06% +0.05% / +0.02% -0.24% -0.32%] index_select linear : Elapsed 0.120 ms (12.045 ms / 100) 12.267 -> 12.299 ( +0.26%) [ +0.00% +0.12% +0.27% / +0.26% +0.54% +0.51%] index_select reverse : Elapsed 0.123 ms (12.267 ms / 100) 11.378 -> 11.380 ( +0.02%) [ +0.05% +0.18% +0.00% / +0.02% +0.03% +0.03%] index_select skip64 : Elapsed 0.114 ms (11.384 ms / 100) 11.320 -> 11.324 ( +0.04%) [ +0.10% +0.00% +0.04% / +0.04% +0.09% +0.14%] index_select skip256 : Elapsed 0.113 ms (11.331 ms / 100) 11.532 -> 11.566 ( +0.29%) [ +0.24% +0.00% +0.33% / +0.29% +0.69% +0.58%] index_select spread : Elapsed 0.116 ms (11.560 ms / 100) 17.490 -> 17.338 ( -0.87%) [ +0.27% +0.01% +0.00% / +0.25% -0.87% -0.47%] index_select strided 3 : Elapsed 0.175 ms (17.538 ms / 100) 14.486 -> 14.541 ( +0.38%) [ +0.83% +0.00% +0.23% / +0.38% +2.34% +2.44%] index_select strided 5 : Elapsed 0.146 ms (14.606 ms / 100) 15.102 -> 15.155 ( +0.35%) [ +0.00% +0.60% +0.46% / +0.35% +2.21% +1.75%] index_select strided 7 : Elapsed 0.151 ms (15.102 ms / 100) 16.571 -> 16.648 ( +0.46%) [ +1.14% +1.30% +0.00% / +0.60% +0.46% +1.62%] index_select strided 8 : Elapsed 0.168 ms (16.760 ms / 100) 16.337 -> 16.237 ( -0.61%) [ +0.10% +0.50% +0.00% / -0.61% +0.93% +0.96%] index_select strided 16 : Elapsed 0.164 ms (16.353 ms / 100) 15.496 -> 15.628 ( +0.85%) [ +0.15% +0.00% +0.51% / +0.85% +3.15% +3.30%] index_select random : Elapsed 0.155 ms (15.519 ms / 100) 11.596 -> 11.620 ( +0.21%) [ +0.00% +0.20% +0.07% / +0.21% +0.76% +0.58%] index_select random_sorted : Elapsed 0.116 ms (11.596 ms / 100) B = [150, 15, 250] (stride (1, 150, 2250)) A = [50, 15, 250] (stride (1, 12500, 50)) dim = 0 18.568 -> 18.064 ( -2.71%) [ +0.00% +0.21% +0.04% / -0.46% -2.71% -2.52%] index_add_ linear : Elapsed 0.186 ms (18.568 ms / 100) 16.045 -> 15.868 ( -1.10%) [ +0.01% +0.00% +0.12% / -0.41% -1.10% -0.84%] index_copy_ linear : Elapsed 0.160 ms (16.047 ms / 100) 18.521 -> 18.025 ( -2.68%) [ +0.22% +0.28% +0.00% / -0.45% -2.47% -2.68%] index_add_ reverse : Elapsed 0.186 ms (18.561 ms / 100) 16.034 -> 15.833 ( -1.25%) [ +0.22% +0.00% +0.07% / -0.48% -1.18% -1.25%] index_copy_ reverse : Elapsed 0.161 ms (16.069 ms / 100) 22.114 -> 21.903 ( -0.95%) [ +0.05% +0.00% +0.12% / -0.95% -0.78% -0.95%] index_add_ spread : Elapsed 0.221 ms (22.126 ms / 100) 18.265 -> 18.177 ( -0.48%) [ +0.00% +0.18% +0.09% / -0.48% +0.73% +0.64%] index_copy_ spread : Elapsed 0.183 ms (18.265 ms / 100) 26.591 -> 26.315 ( -1.04%) [ +0.27% +0.20% +0.00% / -0.70% -0.87% -1.04%] index_add_ strided 7 : Elapsed 0.267 ms (26.663 ms / 100) 22.507 -> 22.095 ( -1.83%) [ +0.41% +0.00% +0.23% / -1.21% -1.70% -1.83%] index_copy_ strided 7 : Elapsed 0.226 ms (22.600 ms / 100) 27.358 -> 27.189 ( -0.62%) [ +0.00% +0.07% +0.04% / -0.33% -0.62% -0.49%] index_add_ perm : Elapsed 0.274 ms (27.358 ms / 100) 24.045 -> 23.858 ( -0.78%) [ +0.00% +0.00% +0.11% / -0.42% -0.71% -0.78%] index_copy_ perm : Elapsed 0.240 ms (24.045 ms / 100) 21.861 -> 21.664 ( -0.90%) [ +0.24% +0.00% +0.06% / -0.72% -0.90% -0.87%] index_add_ perm_sorted : Elapsed 0.219 ms (21.914 ms / 100) 18.152 -> 18.036 ( -0.64%) [ +0.13% +0.00% +0.08% / -0.64% +0.20% +0.19%] index_copy_ perm_sorted : Elapsed 0.182 ms (18.176 ms / 100) 32.924 -> 32.981 ( +0.17%) [ +0.34% +0.00% +0.40% / +0.17% +1.21% +0.97%] index_select const : Elapsed 0.330 ms (33.036 ms / 100) 37.052 -> 37.063 ( +0.03%) [ +0.00% +0.03% +0.17% / +0.03% +3.78% +3.77%] index_select wrap : Elapsed 0.371 ms (37.052 ms / 100) 33.735 -> 33.871 ( +0.40%) [ +0.00% +0.14% +0.25% / +0.40% +2.98% +3.26%] index_select linear : Elapsed 0.337 ms (33.735 ms / 100) 34.585 -> 34.577 ( -0.02%) [ +0.00% +0.10% +0.19% / -0.02% +2.35% +2.76%] index_select reverse : Elapsed 0.346 ms (34.585 ms / 100) 32.908 -> 32.992 ( +0.26%) [ +0.00% +0.23% +0.15% / +0.26% +0.98% +1.00%] index_select skip64 : Elapsed 0.329 ms (32.908 ms / 100) 33.017 -> 32.974 ( -0.13%) [ +0.27% +0.00% +0.07% / -0.13% +0.97% +0.89%] index_select skip256 : Elapsed 0.331 ms (33.105 ms / 100) 34.713 -> 34.750 ( +0.11%) [ +0.12% +0.42% +0.00% / +0.11% +3.17% +3.39%] index_select spread : Elapsed 0.348 ms (34.753 ms / 100) 39.491 -> 39.566 ( +0.19%) [ +0.14% +0.07% +0.00% / +0.19% +2.69% +2.82%] index_select strided 3 : Elapsed 0.395 ms (39.545 ms / 100) 39.836 -> 39.848 ( +0.03%) [ +0.00% +0.14% +0.05% / +0.03% +2.58% +2.54%] index_select strided 5 : Elapsed 0.398 ms (39.836 ms / 100) 39.686 -> 39.688 ( +0.01%) [ +0.10% +0.22% +0.00% / +0.01% +2.70% +2.84%] index_select strided 7 : Elapsed 0.397 ms (39.726 ms / 100) 39.961 -> 39.946 ( -0.04%) [ +0.10% +0.07% +0.00% / -0.04% +2.65% +2.56%] index_select strided 8 : Elapsed 0.400 ms (40.001 ms / 100) 39.798 -> 39.820 ( +0.06%) [ +0.08% +0.04% +0.00% / +0.06% +2.84% +2.75%] index_select strided 16 : Elapsed 0.398 ms (39.831 ms / 100) 39.420 -> 39.433 ( +0.03%) [ +0.00% +0.07% +0.02% / +0.03% +2.85% +2.65%] index_select random : Elapsed 0.394 ms (39.420 ms / 100) 34.741 -> 34.804 ( +0.18%) [ +0.24% +0.16% +0.00% / +0.18% +2.87% +2.93%] index_select random_sorted : Elapsed 0.348 ms (34.824 ms / 100) out_shape = [50, 150, 250] in_shape = [50, 15, 250] idx_dim = 1 B = [50, 150, 250] (stride (37500, 250, 1)) A = [50, 15, 250] (stride (250, 12500, 1)) dim = 1 4.879 -> 4.853 ( -0.53%) [ +0.12% +0.10% +0.00% / +0.12% -0.49% -0.53%] index_add_ linear : Elapsed 0.049 ms (4.885 ms / 100) 4.694 -> 4.674 ( -0.43%) [ +0.19% +0.02% +0.00% / +0.15% -0.17% -0.43%] index_copy_ linear : Elapsed 0.047 ms (4.703 ms / 100) 4.875 -> 4.838 ( -0.76%) [ +0.02% +0.00% +0.21% / +0.08% -0.35% -0.76%] index_add_ reverse : Elapsed 0.049 ms (4.876 ms / 100) 4.683 -> 4.673 ( -0.21%) [ +0.00% +0.11% +0.00% / +0.21% -0.04% -0.21%] index_copy_ reverse : Elapsed 0.047 ms (4.683 ms / 100) 4.875 -> 4.869 ( -0.12%) [ +0.02% +0.00% +0.04% / -0.12% +0.10% +0.04%] index_add_ spread : Elapsed 0.049 ms (4.876 ms / 100) 4.703 -> 4.712 ( +0.19%) [ +0.00% +0.21% +0.11% / +0.19% +0.49% +0.34%] index_copy_ spread : Elapsed 0.047 ms (4.703 ms / 100) 4.883 -> 4.870 ( -0.27%) [ +0.08% +0.00% +0.18% / +0.00% -0.16% -0.27%] index_add_ strided 7 : Elapsed 0.049 ms (4.887 ms / 100) 4.719 -> 4.713 ( -0.13%) [ +0.23% +0.13% +0.00% / +0.06% -0.11% -0.13%] index_copy_ strided 7 : Elapsed 0.047 ms (4.730 ms / 100) 4.894 -> 4.875 ( -0.39%) [ +0.00% +0.12% +0.08% / +0.12% -0.35% -0.39%] index_add_ perm : Elapsed 0.049 ms (4.894 ms / 100) 4.719 -> 4.713 ( -0.13%) [ +0.30% +0.25% +0.00% / +0.19% -0.08% -0.13%] index_copy_ perm : Elapsed 0.047 ms (4.733 ms / 100) 4.875 -> 4.875 ( +0.00%) [ +0.18% +0.29% +0.00% / +0.00% +0.21% +0.10%] index_add_ perm_sorted : Elapsed 0.049 ms (4.884 ms / 100) 4.707 -> 4.711 ( +0.08%) [ +0.15% +0.15% +0.00% / +0.08% +0.30% +0.21%] index_copy_ perm_sorted : Elapsed 0.047 ms (4.714 ms / 100) 17.654 -> 17.663 ( +0.05%) [ +0.00% +0.42% +0.34% / +0.05% +3.82% +4.04%] index_select const : Elapsed 0.177 ms (17.654 ms / 100) 21.698 -> 21.210 ( -2.25%) [ +0.00% +0.11% +0.00% / +0.14% -2.25% -2.06%] index_select wrap : Elapsed 0.217 ms (21.698 ms / 100) 18.052 -> 17.666 ( -2.14%) [ +0.00% +0.17% +0.15% / +0.14% -2.01% -2.14%] index_select linear : Elapsed 0.181 ms (18.052 ms / 100) 18.510 -> 18.325 ( -1.00%) [ +0.00% +0.10% +0.02% / -0.15% -0.96% -1.00%] index_select reverse : Elapsed 0.185 ms (18.510 ms / 100) 17.506 -> 17.507 ( +0.01%) [ +0.02% +0.07% +0.00% / +0.01% +3.75% +3.87%] index_select skip64 : Elapsed 0.175 ms (17.509 ms / 100) 17.557 -> 17.579 ( +0.13%) [ +0.06% +0.28% +0.00% / +0.13% +3.30% +3.71%] index_select skip256 : Elapsed 0.176 ms (17.568 ms / 100) 18.749 -> 18.696 ( -0.28%) [ +0.00% +0.10% +0.42% / +0.20% -0.28% -0.23%] index_select spread : Elapsed 0.187 ms (18.749 ms / 100) 20.314 -> 20.027 ( -1.41%) [ +0.00% +0.08% +0.07% / +0.12% -0.96% -1.41%] index_select strided 3 : Elapsed 0.203 ms (20.314 ms / 100) 19.776 -> 19.232 ( -2.75%) [ +0.00% +0.83% +0.48% / +0.66% -2.75% -2.34%] index_select strided 5 : Elapsed 0.198 ms (19.776 ms / 100) 21.968 -> 21.639 ( -1.50%) [ +0.00% +0.34% +0.58% / +0.25% -1.28% -1.50%] index_select strided 7 : Elapsed 0.220 ms (21.968 ms / 100) 21.939 -> 21.625 ( -1.43%) [ +0.17% +0.77% +0.00% / +0.09% -1.43% -1.37%] index_select strided 8 : Elapsed 0.220 ms (21.976 ms / 100) 21.354 -> 21.115 ( -1.12%) [ +0.35% +0.06% +0.00% / +0.07% -1.12% -1.09%] index_select random : Elapsed 0.214 ms (21.428 ms / 100) 18.531 -> 18.538 ( +0.04%) [ +0.29% +0.00% +0.25% / +0.04% +0.36% +0.71%] index_select random_sorted : Elapsed 0.186 ms (18.584 ms / 100) B = [50, 150, 250] (stride (37500, 1, 150)) A = [50, 15, 250] (stride (15, 1, 750)) dim = 1 35.544 -> 35.488 ( -0.16%) [ +0.00% +0.15% +0.09% / +0.15% -0.16% -0.05%] index_add_ linear : Elapsed 0.355 ms (35.544 ms / 100) 28.212 -> 27.973 ( -0.85%) [ +0.06% +0.00% +0.00% / -0.02% -0.85% -0.77%] index_copy_ linear : Elapsed 0.282 ms (28.229 ms / 100) 35.505 -> 35.560 ( +0.15%) [ +0.17% +0.00% +0.02% / +0.15% +0.21% +0.37%] index_add_ reverse : Elapsed 0.356 ms (35.564 ms / 100) 28.169 -> 28.078 ( -0.32%) [ +0.00% +0.09% +0.11% / +0.26% -0.22% -0.32%] index_copy_ reverse : Elapsed 0.282 ms (28.169 ms / 100) 37.955 -> 37.942 ( -0.03%) [ +0.00% +0.01% +0.14% / +0.04% -0.03% +0.01%] index_add_ spread : Elapsed 0.380 ms (37.955 ms / 100) 30.639 -> 30.654 ( +0.05%) [ +0.01% +0.12% +0.00% / +0.08% +0.06% +0.05%] index_copy_ spread : Elapsed 0.306 ms (30.642 ms / 100) 37.613 -> 37.425 ( -0.50%) [ +0.00% +0.02% +0.17% / +0.09% -0.50% -0.39%] index_add_ strided 7 : Elapsed 0.376 ms (37.613 ms / 100) 30.342 -> 30.194 ( -0.49%) [ +0.04% +0.00% +0.14% / +0.12% -0.49% -0.40%] index_copy_ strided 7 : Elapsed 0.304 ms (30.354 ms / 100) 38.216 -> 38.108 ( -0.28%) [ +0.00% +0.11% +0.02% / +0.16% -0.20% -0.28%] index_add_ perm : Elapsed 0.382 ms (38.216 ms / 100) 30.865 -> 30.723 ( -0.46%) [ +0.06% +0.02% +0.00% / -0.00% -0.46% -0.42%] index_copy_ perm : Elapsed 0.309 ms (30.882 ms / 100) 37.129 -> 36.837 ( -0.79%) [ +0.01% +0.02% +0.00% / -0.11% -0.79% +1.47%] index_add_ perm_sorted : Elapsed 0.371 ms (37.134 ms / 100) 29.852 -> 29.554 ( -1.00%) [ +0.00% +0.10% +0.11% / +0.12% -0.97% -1.00%] index_copy_ perm_sorted : Elapsed 0.299 ms (29.852 ms / 100) BEST 175.982 -> 18.793 (-89.32%) [ +0.00% +0.15% +0.03% / -89.14% -89.30% -89.32%] index_select const : Elapsed 1.760 ms (175.982 ms / 100) BEST 182.431 -> 22.320 (-87.77%) [ +0.30% +0.16% +0.00% / -87.77% -87.66% -87.67%] index_select wrap : Elapsed 1.830 ms (182.985 ms / 100) BEST 175.883 -> 18.604 (-89.42%) [ +0.29% +0.51% +0.00% / -89.01% -89.42% -89.40%] index_select linear : Elapsed 1.764 ms (176.389 ms / 100) BEST 176.173 -> 17.982 (-89.79%) [ +0.07% +0.29% +0.00% / -89.69% -89.79% -89.79%] index_select reverse : Elapsed 1.763 ms (176.291 ms / 100) BEST 175.541 -> 18.618 (-89.39%) [ +0.30% +0.31% +0.00% / -89.10% -89.39% -89.39%] index_select skip64 : Elapsed 1.761 ms (176.074 ms / 100) BEST 176.153 -> 17.563 (-90.03%) [ +0.09% +0.00% +0.11% / -89.91% -90.02% -90.03%] index_select skip256 : Elapsed 1.763 ms (176.315 ms / 100) BEST 176.550 -> 19.144 (-89.16%) [ +0.00% +0.09% +0.03% / -89.16% -88.73% -88.73%] index_select spread : Elapsed 1.765 ms (176.550 ms / 100) BEST 185.504 -> 18.424 (-90.07%) [ +0.37% +0.18% +0.00% / -89.83% -90.06% -90.07%] index_select strided 3 : Elapsed 1.862 ms (186.186 ms / 100) BEST 185.080 -> 19.349 (-89.55%) [ +0.12% +0.00% +0.04% / -89.55% -89.28% -89.28%] index_select strided 5 : Elapsed 1.853 ms (185.302 ms / 100) BEST 185.623 -> 18.251 (-90.17%) [ +0.07% +0.12% +0.00% / -89.86% -90.17% -90.16%] index_select strided 7 : Elapsed 1.857 ms (185.749 ms / 100) BEST 185.634 -> 19.511 (-89.49%) [ +0.21% +0.00% +0.11% / -89.49% -89.31% -89.31%] index_select strided 8 : Elapsed 1.860 ms (186.030 ms / 100) BEST 184.554 -> 17.894 (-90.30%) [ +0.05% +0.13% +0.00% / -90.12% -90.30% -90.30%] index_select random : Elapsed 1.847 ms (184.653 ms / 100) BEST 176.317 -> 19.450 (-88.97%) [ +0.18% +0.19% +0.00% / -88.97% -88.80% -88.79%] index_select random_sorted : Elapsed 1.766 ms (176.632 ms / 100) out_shape = [50, 15, 150] in_shape = [50, 15, 250] idx_dim = 2 B = [50, 15, 150] (stride (2250, 150, 1)) A = [50, 15, 250] (stride (1, 12500, 50)) dim = 2 8.999 -> 9.020 ( +0.23%) [ +0.00% +0.02% +0.00% / +0.27% +0.33% +0.23%] index_select const : Elapsed 0.090 ms (8.999 ms / 100) 9.667 -> 9.695 ( +0.29%) [ +0.11% +0.00% +0.04% / +0.33% +0.29% +0.60%] index_select wrap : Elapsed 0.097 ms (9.678 ms / 100) 9.669 -> 9.670 ( +0.01%) [ +0.07% +0.00% +0.10% / +0.01% +0.22% +0.44%] index_select linear : Elapsed 0.097 ms (9.676 ms / 100) 9.655 -> 9.676 ( +0.22%) [ +0.00% +0.24% +0.15% / +0.22% +0.87% +0.92%] index_select reverse : Elapsed 0.097 ms (9.655 ms / 100) 9.024 -> 9.032 ( +0.09%) [ +0.00% +0.01% +0.03% / +0.09% +0.11% +0.23%] index_select skip64 : Elapsed 0.090 ms (9.024 ms / 100) 8.999 -> 9.008 ( +0.10%) [ +0.18% +0.00% +0.21% / +0.10% +0.46% +0.20%] index_select skip256 : Elapsed 0.090 ms (9.015 ms / 100) 9.867 -> 9.889 ( +0.22%) [ +0.06% +0.02% +0.00% / +0.22% +0.49% +0.40%] index_select spread : Elapsed 0.099 ms (9.873 ms / 100) 9.989 -> 9.964 ( -0.25%) [ +0.07% +0.00% +0.11% / +0.00% -0.25% -0.18%] index_select strided 3 : Elapsed 0.100 ms (9.996 ms / 100) 9.673 -> 9.688 ( +0.16%) [ +0.07% +0.14% +0.00% / +0.16% +0.33% +0.36%] index_select strided 5 : Elapsed 0.097 ms (9.680 ms / 100) 10.067 -> 10.073 ( +0.06%) [ +0.15% +0.00% +0.06% / +0.15% +0.08% +0.06%] index_select strided 7 : Elapsed 0.101 ms (10.082 ms / 100) 9.991 -> 10.010 ( +0.19%) [ +0.00% +0.01% +0.01% / +0.19% +0.50% +0.55%] index_select strided 8 : Elapsed 0.100 ms (9.991 ms / 100) 9.979 -> 10.006 ( +0.27%) [ +0.15% +0.00% +0.15% / +0.27% +0.81% +0.82%] index_select strided 16 : Elapsed 0.100 ms (9.994 ms / 100) 10.016 -> 9.998 ( -0.18%) [ +0.14% +0.06% +0.00% / +0.00% -0.18% +0.37%] index_select strided 64 : Elapsed 0.100 ms (10.030 ms / 100) 9.046 -> 9.062 ( +0.18%) [ +0.00% +0.09% +0.13% / +0.18% +0.42% +0.31%] index_select strided 100 : Elapsed 0.090 ms (9.046 ms / 100) 9.804 -> 9.812 ( +0.08%) [ +0.00% +0.12% +0.03% / +0.08% +0.31% +0.60%] index_select random : Elapsed 0.098 ms (9.804 ms / 100) 9.616 -> 9.640 ( +0.25%) [ +0.22% +0.18% +0.00% / +0.25% +0.26% +0.55%] index_select random_sorted : Elapsed 0.096 ms (9.637 ms / 100) 9.926 -> 9.925 ( -0.01%) [ +0.11% +0.00% +0.05% / -0.01% +0.79% +0.65%] index_select perm : Elapsed 0.099 ms (9.937 ms / 100) 9.810 -> 9.835 ( +0.25%) [ +0.10% +0.02% +0.00% / +0.25% +0.40% +0.69%] index_select perm_sorted : Elapsed 0.098 ms (9.820 ms / 100) B = [50, 15, 150] (stride (1, 7500, 50)) A = [50, 15, 250] (stride (3750, 250, 1)) dim = 2 8.974 -> 8.997 ( +0.26%) [ +0.14% +0.00% +0.13% / +0.26% +0.30% +0.35%] index_select const : Elapsed 0.090 ms (8.987 ms / 100) 9.678 -> 9.700 ( +0.23%) [ +0.25% +0.14% +0.00% / +0.24% +0.37% +0.23%] index_select wrap : Elapsed 0.097 ms (9.702 ms / 100) 9.695 -> 9.697 ( +0.02%) [ +0.21% +0.00% +0.42% / +0.15% +0.02% +0.08%] index_select linear : Elapsed 0.097 ms (9.715 ms / 100) 9.683 -> 9.688 ( +0.05%) [ +0.08% +0.00% +0.06% / +0.29% +0.11% +0.05%] index_select reverse : Elapsed 0.097 ms (9.691 ms / 100) 9.140 -> 9.152 ( +0.13%) [ +0.01% +0.30% +0.00% / +0.13% +0.16% +0.23%] index_select skip64 : Elapsed 0.091 ms (9.141 ms / 100) 8.978 -> 8.986 ( +0.09%) [ +0.04% +0.00% +0.07% / +0.09% +0.27% +0.23%] index_select skip256 : Elapsed 0.090 ms (8.982 ms / 100) 10.121 -> 10.137 ( +0.16%) [ +0.00% +0.19% +0.09% / +0.24% +0.16% +0.17%] index_select spread : Elapsed 0.101 ms (10.121 ms / 100) 10.764 -> 10.777 ( +0.12%) [ +0.00% +0.20% +0.29% / +0.12% +0.16% +0.19%] index_select strided 3 : Elapsed 0.108 ms (10.764 ms / 100) 11.328 -> 11.373 ( +0.40%) [ +0.09% +0.00% +0.16% / +0.40% +0.40% +0.47%] index_select strided 5 : Elapsed 0.113 ms (11.338 ms / 100) 11.333 -> 11.346 ( +0.11%) [ +0.19% +0.19% +0.00% / +0.13% +0.11% +0.19%] index_select strided 7 : Elapsed 0.114 ms (11.355 ms / 100) 11.338 -> 11.365 ( +0.24%) [ +0.13% +0.00% +0.40% / +0.24% +0.43% +0.24%] index_select strided 8 : Elapsed 0.114 ms (11.353 ms / 100) 11.398 -> 11.396 ( -0.02%) [ +0.16% +0.04% +0.00% / -0.02% +0.15% +0.14%] index_select strided 16 : Elapsed 0.114 ms (11.416 ms / 100) 11.431 -> 11.453 ( +0.19%) [ +0.25% +0.36% +0.00% / +0.43% +0.23% +0.19%] index_select strided 64 : Elapsed 0.115 ms (11.460 ms / 100) 9.854 -> 9.856 ( +0.02%) [ +0.27% +0.00% +0.13% / +0.17% +0.45% +0.02%] index_select strided 100 : Elapsed 0.099 ms (9.881 ms / 100) 11.323 -> 11.331 ( +0.07%) [ +0.09% +0.00% +0.08% / +0.15% +0.17% +0.07%] index_select random : Elapsed 0.113 ms (11.333 ms / 100) 10.070 -> 10.055 ( -0.15%) [ +0.15% +0.00% +0.09% / -0.15% +0.09% +0.22%] index_select random_sorted : Elapsed 0.101 ms (10.085 ms / 100) 11.336 -> 11.361 ( +0.22%) [ +0.00% +0.04% +0.11% / +0.28% +0.22% +0.34%] index_select perm : Elapsed 0.113 ms (11.336 ms / 100) 10.136 -> 10.143 ( +0.07%) [ +0.00% +0.13% +0.19% / +0.07% +0.14% +0.15%] index_select perm_sorted : Elapsed 0.101 ms (10.136 ms / 100) B = [50, 15, 150] (stride (1, 7500, 50)) A = [50, 15, 250] (stride (3750, 1, 15)) dim = 2 5.514 -> 5.519 ( +0.09%) [ +0.42% +0.00% +0.27% / +0.09% +0.33% +0.15%] index_select const : Elapsed 0.055 ms (5.537 ms / 100) 5.847 -> 5.837 ( -0.17%) [ +0.00% +0.46% +0.02% / +0.26% -0.17% +0.19%] index_select wrap : Elapsed 0.058 ms (5.847 ms / 100) 5.829 -> 5.832 ( +0.05%) [ +0.12% +0.00% +0.05% / +0.12% +0.05% +0.17%] index_select linear : Elapsed 0.058 ms (5.836 ms / 100) 5.851 -> 5.846 ( -0.09%) [ +0.34% +0.17% +0.00% / +0.14% -0.09% +0.02%] index_select reverse : Elapsed 0.059 ms (5.871 ms / 100) 5.524 -> 5.520 ( -0.07%) [ +0.00% +0.11% +0.00% / -0.05% -0.05% -0.07%] index_select skip64 : Elapsed 0.055 ms (5.524 ms / 100) 5.500 -> 5.512 ( +0.22%) [ +0.31% +0.00% +0.40% / +0.33% +0.45% +0.22%] index_select skip256 : Elapsed 0.055 ms (5.517 ms / 100) 5.947 -> 5.956 ( +0.15%) [ +0.00% +0.22% +0.18% / +0.15% +0.79% +1.03%] index_select spread : Elapsed 0.059 ms (5.947 ms / 100) 6.065 -> 6.069 ( +0.07%) [ +0.12% +0.00% +0.23% / +0.07% +0.61% +0.40%] index_select strided 3 : Elapsed 0.061 ms (6.072 ms / 100) 5.997 -> 5.996 ( -0.02%) [ +0.15% +0.22% +0.00% / -0.02% +0.68% +0.72%] index_select strided 5 : Elapsed 0.060 ms (6.006 ms / 100) 6.083 -> 6.089 ( +0.10%) [ +0.16% +0.15% +0.00% / +0.10% +0.36% +0.39%] index_select strided 7 : Elapsed 0.061 ms (6.093 ms / 100) 6.079 -> 6.093 ( +0.23%) [ +0.00% +0.26% +0.30% / +0.23% +0.95% +0.72%] index_select strided 8 : Elapsed 0.061 ms (6.079 ms / 100) 6.099 -> 6.107 ( +0.13%) [ +0.00% +0.08% +0.20% / +0.13% +0.41% +0.41%] index_select strided 16 : Elapsed 0.061 ms (6.099 ms / 100) 6.100 -> 6.109 ( +0.15%) [ +0.28% +0.00% +0.10% / +0.15% +0.31% +0.43%] index_select strided 64 : Elapsed 0.061 ms (6.117 ms / 100) 5.554 -> 5.536 ( -0.32%) [ +0.02% +0.00% +0.27% / -0.32% -0.11% +0.11%] index_select strided 100 : Elapsed 0.056 ms (5.555 ms / 100) 5.978 -> 5.980 ( +0.03%) [ +0.25% +0.10% +0.00% / +0.03% +0.55% +0.85%] index_select random : Elapsed 0.060 ms (5.993 ms / 100) 5.838 -> 5.852 ( +0.24%) [ +0.00% +0.03% +0.22% / +0.24% +0.86% +0.67%] index_select random_sorted : Elapsed 0.058 ms (5.838 ms / 100) 6.042 -> 6.053 ( +0.18%) [ +0.18% +0.23% +0.00% / +0.18% +0.55% +0.76%] index_select perm : Elapsed 0.061 ms (6.053 ms / 100) 5.897 -> 5.907 ( +0.17%) [ +0.17% +0.47% +0.00% / +0.17% +1.15% +1.09%] index_select perm_sorted : Elapsed 0.059 ms (5.907 ms / 100) out_shape = [150, 250, 15] in_shape = [50, 250, 15] idx_dim = 0 B = [150, 250, 15] (stride (3750, 15, 1)) A = [50, 250, 15] (stride (250, 1, 12500)) dim = 0 11.496 -> 11.519 ( +0.20%) [ +0.32% +0.28% +0.00% / +0.21% +0.32% +0.20%] index_add_ linear : Elapsed 0.115 ms (11.533 ms / 100) 11.305 -> 11.338 ( +0.29%) [ +0.33% +0.07% +0.00% / +0.51% +0.54% +0.29%] index_copy_ linear : Elapsed 0.113 ms (11.342 ms / 100) 11.579 -> 11.550 ( -0.25%) [ +0.00% +0.01% +0.05% / +0.31% -0.25% -0.14%] index_add_ reverse : Elapsed 0.116 ms (11.579 ms / 100) 11.364 -> 11.376 ( +0.11%) [ +0.13% +0.00% +0.20% / +0.37% +0.11% +0.21%] index_copy_ reverse : Elapsed 0.114 ms (11.379 ms / 100) 11.634 -> 11.604 ( -0.26%) [ +0.15% +0.00% +0.15% / +0.42% -0.10% -0.26%] index_add_ spread : Elapsed 0.117 ms (11.652 ms / 100) 11.420 -> 11.362 ( -0.51%) [ +0.11% +0.02% +0.00% / +0.17% -0.42% -0.51%] index_copy_ spread : Elapsed 0.114 ms (11.432 ms / 100) 11.649 -> 11.667 ( +0.15%) [ +0.00% +0.07% +0.27% / +0.15% +0.28% +0.20%] index_add_ strided 7 : Elapsed 0.116 ms (11.649 ms / 100) 11.411 -> 11.435 ( +0.21%) [ +0.00% +0.02% +0.20% / +0.32% +0.36% +0.21%] index_copy_ strided 7 : Elapsed 0.114 ms (11.411 ms / 100) 11.573 -> 11.612 ( +0.34%) [ +0.00% +0.06% +0.33% / +0.82% +0.47% +0.34%] index_add_ perm : Elapsed 0.116 ms (11.573 ms / 100) 11.343 -> 11.400 ( +0.50%) [ +0.11% +0.00% +0.41% / +0.69% +0.52% +0.50%] index_copy_ perm : Elapsed 0.114 ms (11.356 ms / 100) 11.589 -> 11.607 ( +0.16%) [ +0.00% +0.18% +0.99% / +0.46% +0.22% +0.16%] index_add_ perm_sorted : Elapsed 0.116 ms (11.589 ms / 100) 11.367 -> 11.383 ( +0.14%) [ +0.00% +0.10% +0.26% / +0.60% +0.14% +0.17%] index_copy_ perm_sorted : Elapsed 0.114 ms (11.367 ms / 100) 17.311 -> 17.382 ( +0.41%) [ +0.10% +0.00% +0.18% / +0.56% +0.47% +0.41%] index_select const : Elapsed 0.173 ms (17.329 ms / 100) 20.805 -> 20.859 ( +0.26%) [ +0.00% +0.02% +0.11% / +0.26% +0.80% +0.91%] index_select wrap : Elapsed 0.208 ms (20.805 ms / 100) 18.400 -> 18.444 ( +0.24%) [ +0.00% +0.04% +0.21% / +0.52% +0.24% +0.24%] index_select linear : Elapsed 0.184 ms (18.400 ms / 100) 19.175 -> 19.258 ( +0.43%) [ +0.00% +0.20% +0.25% / +0.43% +0.92% +0.84%] index_select reverse : Elapsed 0.192 ms (19.175 ms / 100) 17.310 -> 17.360 ( +0.29%) [ +0.00% +0.02% +0.13% / +0.44% +0.29% +0.45%] index_select skip64 : Elapsed 0.173 ms (17.310 ms / 100) 17.317 -> 17.373 ( +0.32%) [ +0.00% +0.03% +0.06% / +0.47% +0.40% +0.32%] index_select skip256 : Elapsed 0.173 ms (17.317 ms / 100) 18.800 -> 18.853 ( +0.28%) [ +0.10% +0.00% +0.26% / +0.28% +0.52% +0.49%] index_select spread : Elapsed 0.188 ms (18.818 ms / 100) 21.065 -> 21.156 ( +0.43%) [ +0.14% +0.00% +0.28% / +0.43% +1.47% +1.43%] index_select strided 3 : Elapsed 0.211 ms (21.094 ms / 100) 18.593 -> 18.635 ( +0.23%) [ +0.06% +0.00% +0.02% / +0.23% +1.43% +1.51%] index_select strided 5 : Elapsed 0.186 ms (18.604 ms / 100) 21.070 -> 21.106 ( +0.17%) [ +0.00% +0.15% +0.07% / +0.17% +0.73% +0.76%] index_select strided 7 : Elapsed 0.211 ms (21.070 ms / 100) 20.354 -> 20.428 ( +0.36%) [ +0.00% +0.24% +0.16% / +0.36% +0.60% +0.69%] index_select strided 8 : Elapsed 0.204 ms (20.354 ms / 100) 20.692 -> 20.755 ( +0.30%) [ +0.00% +0.06% +0.18% / +0.30% +1.54% +1.58%] index_select strided 16 : Elapsed 0.207 ms (20.692 ms / 100) 20.141 -> 20.209 ( +0.34%) [ +0.14% +0.00% +0.23% / +0.34% +1.63% +1.64%] index_select random : Elapsed 0.202 ms (20.169 ms / 100) 18.745 -> 18.794 ( +0.26%) [ +0.10% +0.00% +0.06% / +0.26% +0.56% +0.49%] index_select random_sorted : Elapsed 0.188 ms (18.764 ms / 100) B = [150, 250, 15] (stride (3750, 15, 1)) A = [50, 250, 15] (stride (1, 50, 12500)) dim = 0 13.515 -> 13.475 ( -0.30%) [ +0.00% +0.04% +0.13% / -0.30% -0.03% -0.15%] index_add_ linear : Elapsed 0.135 ms (13.515 ms / 100) 13.283 -> 13.239 ( -0.33%) [ +0.00% +0.00% +0.08% / -0.33% -0.05% -0.30%] index_copy_ linear : Elapsed 0.133 ms (13.283 ms / 100) 13.664 -> 13.641 ( -0.17%) [ +0.13% +0.02% +0.00% / -0.17% +0.20% -0.16%] index_add_ reverse : Elapsed 0.137 ms (13.682 ms / 100) 13.386 -> 13.369 ( -0.13%) [ +0.43% +0.00% +0.02% / -0.13% -0.04% -0.12%] index_copy_ reverse : Elapsed 0.134 ms (13.443 ms / 100) 13.776 -> 13.745 ( -0.23%) [ +0.33% +0.05% +0.00% / -0.23% +0.41% +0.35%] index_add_ spread : Elapsed 0.138 ms (13.821 ms / 100) 13.474 -> 13.442 ( -0.24%) [ +0.19% +0.00% +0.04% / -0.24% +0.24% +0.09%] index_copy_ spread : Elapsed 0.135 ms (13.499 ms / 100) 13.823 -> 13.798 ( -0.18%) [ +0.08% +0.07% +0.00% / +0.08% -0.18% -0.08%] index_add_ strided 7 : Elapsed 0.138 ms (13.834 ms / 100) 13.503 -> 13.489 ( -0.10%) [ +0.04% +0.00% +0.18% / +0.05% -0.08% -0.10%] index_copy_ strided 7 : Elapsed 0.135 ms (13.509 ms / 100) 13.857 -> 13.751 ( -0.76%) [ +0.00% +0.14% +0.14% / -0.03% -0.76% -0.70%] index_add_ perm : Elapsed 0.139 ms (13.857 ms / 100) 13.546 -> 13.436 ( -0.81%) [ +0.01% +0.00% +0.06% / -0.15% -0.81% -0.77%] index_copy_ perm : Elapsed 0.135 ms (13.548 ms / 100) 13.733 -> 13.705 ( -0.20%) [ +0.12% +0.08% +0.00% / -0.07% -0.20% +0.01%] index_add_ perm_sorted : Elapsed 0.137 ms (13.749 ms / 100) 13.440 -> 13.410 ( -0.22%) [ +0.12% +0.00% +0.13% / -0.19% -0.22% -0.08%] index_copy_ perm_sorted : Elapsed 0.135 ms (13.456 ms / 100) 22.109 -> 21.976 ( -0.60%) [ +0.07% +0.00% +0.17% / +0.14% -0.60% -0.57%] index_select const : Elapsed 0.221 ms (22.125 ms / 100) 26.944 -> 27.022 ( +0.29%) [ +0.12% +0.00% +0.12% / +0.29% +0.58% +0.54%] index_select wrap : Elapsed 0.270 ms (26.977 ms / 100) 23.413 -> 23.378 ( -0.15%) [ +0.00% +0.04% +0.08% / +0.06% -0.15% -0.01%] index_select linear : Elapsed 0.234 ms (23.413 ms / 100) 24.096 -> 24.087 ( -0.04%) [ +0.03% +0.00% +0.20% / +0.22% +0.15% -0.04%] index_select reverse : Elapsed 0.241 ms (24.103 ms / 100) 22.106 -> 21.980 ( -0.57%) [ +0.19% +0.00% +0.17% / +0.24% -0.56% -0.57%] index_select skip64 : Elapsed 0.221 ms (22.148 ms / 100) 22.135 -> 21.968 ( -0.75%) [ +0.00% +0.02% +0.06% / +0.10% -0.74% -0.75%] index_select skip256 : Elapsed 0.221 ms (22.135 ms / 100) 24.132 -> 24.162 ( +0.12%) [ +0.06% +0.00% +0.19% / +0.12% +0.56% +0.47%] index_select spread : Elapsed 0.241 ms (24.146 ms / 100) 29.066 -> 29.039 ( -0.09%) [ +0.00% +0.10% +0.31% / +0.24% -0.07% -0.09%] index_select strided 3 : Elapsed 0.291 ms (29.066 ms / 100) 29.098 -> 29.092 ( -0.02%) [ +0.07% +0.00% +0.04% / +0.22% +0.01% -0.02%] index_select strided 5 : Elapsed 0.291 ms (29.118 ms / 100) 29.032 -> 28.981 ( -0.18%) [ +0.11% +0.00% +0.05% / +0.21% -0.12% -0.18%] index_select strided 7 : Elapsed 0.291 ms (29.065 ms / 100) 28.926 -> 28.948 ( +0.08%) [ +0.09% +0.00% +0.09% / +0.13% +0.10% +0.08%] index_select strided 8 : Elapsed 0.290 ms (28.953 ms / 100) 29.162 -> 29.083 ( -0.27%) [ +0.13% +0.00% +0.05% / +0.05% -0.25% -0.27%] index_select strided 16 : Elapsed 0.292 ms (29.200 ms / 100) 28.940 -> 28.948 ( +0.03%) [ +0.00% +0.16% +0.15% / +0.18% +0.03% +0.05%] index_select random : Elapsed 0.289 ms (28.940 ms / 100) 24.077 -> 24.113 ( +0.15%) [ +0.15% +0.00% +0.00% / +0.15% +0.21% +0.27%] index_select random_sorted : Elapsed 0.241 ms (24.114 ms / 100) B = [150, 250, 15] (stride (3750, 1, 250)) A = [50, 250, 15] (stride (3750, 1, 250)) dim = 0 7.390 -> 7.389 ( -0.01%) [ +0.05% +0.12% +0.00% / -0.01% +0.24% +0.14%] index_add_ linear : Elapsed 0.074 ms (7.394 ms / 100) 6.815 -> 6.779 ( -0.53%) [ +0.22% +0.00% +0.09% / -0.13% -0.50% -0.53%] index_copy_ linear : Elapsed 0.068 ms (6.830 ms / 100) 7.338 -> 7.333 ( -0.07%) [ +0.44% +0.00% +0.31% / +0.48% -0.07% +0.18%] index_add_ reverse : Elapsed 0.074 ms (7.370 ms / 100) 6.803 -> 6.743 ( -0.88%) [ +0.22% +0.00% +0.29% / -0.32% -0.63% -0.88%] index_copy_ reverse : Elapsed 0.068 ms (6.818 ms / 100) 7.610 -> 7.458 ( -2.00%) [ +0.28% +0.00% +0.17% / +0.30% -1.91% -2.00%] index_add_ spread : Elapsed 0.076 ms (7.631 ms / 100) 6.877 -> 6.762 ( -1.67%) [ +0.10% +0.49% +0.00% / -0.19% -1.67% -1.60%] index_copy_ spread : Elapsed 0.069 ms (6.884 ms / 100) 7.665 -> 7.622 ( -0.56%) [ +0.00% +0.33% +0.05% / +0.46% -0.56% -0.42%] index_add_ strided 7 : Elapsed 0.077 ms (7.665 ms / 100) 6.916 -> 6.818 ( -1.42%) [ +0.00% +0.19% +0.01% / -0.10% -1.42% -0.88%] index_copy_ strided 7 : Elapsed 0.069 ms (6.916 ms / 100) 7.600 -> 7.634 ( +0.45%) [ +0.43% +0.00% +0.12% / +0.45% +0.95% +0.86%] index_add_ perm : Elapsed 0.076 ms (7.633 ms / 100) 6.807 -> 6.772 ( -0.51%) [ +0.00% +0.19% +0.41% / -0.51% -0.15% -0.09%] index_copy_ perm : Elapsed 0.068 ms (6.807 ms / 100) 7.465 -> 7.464 ( -0.01%) [ +0.00% +0.17% +0.31% / -0.01% +1.03% +1.26%] index_add_ perm_sorted : Elapsed 0.075 ms (7.465 ms / 100) 6.787 -> 6.781 ( -0.09%) [ +0.35% +0.52% +0.00% / -0.09% +0.38% +0.04%] index_copy_ perm_sorted : Elapsed 0.068 ms (6.811 ms / 100) 10.323 -> 10.332 ( +0.09%) [ +0.03% +0.00% +0.15% / +0.09% +0.60% +0.65%] index_select const : Elapsed 0.103 ms (10.326 ms / 100) 11.680 -> 11.764 ( +0.72%) [ +0.15% +0.00% +0.18% / +0.72% +2.28% +2.63%] index_select wrap : Elapsed 0.117 ms (11.698 ms / 100) 10.936 -> 10.940 ( +0.04%) [ +0.00% +0.05% +0.02% / +0.04% +0.09% +0.10%] index_select linear : Elapsed 0.109 ms (10.936 ms / 100) 10.855 -> 10.772 ( -0.76%) [ +0.41% +0.00% +0.10% / +0.06% -0.71% -0.76%] index_select reverse : Elapsed 0.109 ms (10.899 ms / 100) 10.226 -> 10.241 ( +0.15%) [ +0.19% +0.04% +0.00% / +0.16% +0.15% +0.15%] index_select skip64 : Elapsed 0.102 ms (10.245 ms / 100) 10.399 -> 10.370 ( -0.28%) [ +0.09% +0.00% +0.01% / +0.12% -0.26% -0.28%] index_select skip256 : Elapsed 0.104 ms (10.408 ms / 100) 10.341 -> 10.347 ( +0.06%) [ +0.05% +0.00% +0.16% / +0.06% +0.44% +0.25%] index_select spread : Elapsed 0.103 ms (10.346 ms / 100) 11.903 -> 11.911 ( +0.07%) [ +0.00% +0.05% +0.03% / +0.07% +1.55% +1.17%] index_select strided 3 : Elapsed 0.119 ms (11.903 ms / 100) 10.646 -> 10.571 ( -0.70%) [ +0.17% +0.00% +0.16% / +0.15% -0.70% -0.64%] index_select strided 5 : Elapsed 0.107 ms (10.664 ms / 100) 11.594 -> 11.569 ( -0.22%) [ +0.50% +0.00% +0.15% / -0.22% +1.99% +2.53%] index_select strided 7 : Elapsed 0.117 ms (11.652 ms / 100) 11.275 -> 11.198 ( -0.68%) [ +0.00% +0.04% +0.00% / -0.68% +1.61% +2.10%] index_select strided 8 : Elapsed 0.113 ms (11.275 ms / 100) 11.382 -> 11.311 ( -0.62%) [ +0.42% +0.33% +0.00% / -0.62% +1.63% +1.76%] index_select strided 16 : Elapsed 0.114 ms (11.430 ms / 100) 10.858 -> 10.864 ( +0.06%) [ +0.29% +0.17% +0.00% / +0.06% +4.12% +3.72%] index_select random : Elapsed 0.109 ms (10.889 ms / 100) 10.362 -> 10.380 ( +0.17%) [ +0.00% +0.03% +0.04% / +0.17% +0.24% +0.23%] index_select random_sorted : Elapsed 0.104 ms (10.362 ms / 100) B = [150, 250, 15] (stride (1, 2250, 150)) A = [50, 250, 15] (stride (3750, 1, 250)) dim = 0 15.385 -> 14.945 ( -2.86%) [ +0.10% +0.19% +0.00% / -0.23% -2.58% -2.86%] index_add_ linear : Elapsed 0.154 ms (15.400 ms / 100) 13.717 -> 13.432 ( -2.08%) [ +0.16% +0.00% +0.09% / -0.13% -2.06% -2.08%] index_copy_ linear : Elapsed 0.137 ms (13.739 ms / 100) 15.463 -> 15.023 ( -2.85%) [ +0.28% +0.00% +0.12% / -0.25% -2.82% -2.85%] index_add_ reverse : Elapsed 0.155 ms (15.506 ms / 100) 13.772 -> 13.470 ( -2.19%) [ +0.36% +0.00% +0.22% / -0.17% -2.19% -2.19%] index_copy_ reverse : Elapsed 0.138 ms (13.822 ms / 100) 18.556 -> 18.397 ( -0.86%) [ +0.00% +0.18% +0.00% / -0.57% -0.86% -0.67%] index_add_ spread : Elapsed 0.186 ms (18.556 ms / 100) 16.470 -> 16.315 ( -0.94%) [ +0.26% +0.05% +0.00% / -0.65% -0.86% -0.94%] index_copy_ spread : Elapsed 0.165 ms (16.512 ms / 100) 22.350 -> 22.250 ( -0.45%) [ +0.09% +0.00% +0.16% / +0.11% -0.27% -0.45%] index_add_ strided 7 : Elapsed 0.224 ms (22.371 ms / 100) 20.564 -> 20.218 ( -1.68%) [ +0.00% +0.01% +0.35% / -0.72% -1.47% -1.68%] index_copy_ strided 7 : Elapsed 0.206 ms (20.564 ms / 100) 23.849 -> 23.821 ( -0.12%) [ +0.15% +0.04% +0.00% / -0.12% +0.03% -0.03%] index_add_ perm : Elapsed 0.239 ms (23.884 ms / 100) 22.028 -> 22.021 ( -0.03%) [ +0.05% +0.08% +0.00% / -0.03% +0.09% +0.04%] index_copy_ perm : Elapsed 0.220 ms (22.040 ms / 100) 18.181 -> 18.169 ( -0.07%) [ +0.43% +0.44% +0.00% / +0.05% -0.07% +0.08%] index_add_ perm_sorted : Elapsed 0.183 ms (18.260 ms / 100) 16.237 -> 16.090 ( -0.91%) [ +0.25% +0.09% +0.00% / -0.73% -0.91% -0.53%] index_copy_ perm_sorted : Elapsed 0.163 ms (16.277 ms / 100) 25.486 -> 25.180 ( -1.20%) [ +0.00% +0.13% +0.26% / +0.33% -1.15% -1.20%] index_select const : Elapsed 0.255 ms (25.486 ms / 100) 30.607 -> 30.475 ( -0.43%) [ +0.00% +0.11% +0.27% / -0.43% +0.38% +0.34%] index_select wrap : Elapsed 0.306 ms (30.607 ms / 100) 26.192 -> 26.151 ( -0.16%) [ +0.00% +0.13% +0.17% / +0.19% -0.08% -0.16%] index_select linear : Elapsed 0.262 ms (26.192 ms / 100) 28.178 -> 27.820 ( -1.27%) [ +0.00% +0.11% +0.05% / -0.10% -1.27% -1.22%] index_select reverse : Elapsed 0.282 ms (28.178 ms / 100) 25.518 -> 25.131 ( -1.52%) [ +0.07% +0.00% +0.10% / -0.09% -1.36% -1.52%] index_select skip64 : Elapsed 0.255 ms (25.535 ms / 100) 25.591 -> 25.173 ( -1.63%) [ +0.21% +0.00% +0.21% / -0.13% -1.59% -1.63%] index_select skip256 : Elapsed 0.256 ms (25.646 ms / 100) 26.765 -> 26.600 ( -0.62%) [ +0.32% +0.09% +0.00% / +0.10% -0.55% -0.62%] index_select spread : Elapsed 0.269 ms (26.851 ms / 100) 31.148 -> 31.045 ( -0.33%) [ +0.00% +0.13% +0.06% / -0.33% +0.86% +1.31%] index_select strided 3 : Elapsed 0.311 ms (31.148 ms / 100) 27.429 -> 27.450 ( +0.08%) [ +0.00% +0.00% +0.01% / +0.08% +3.74% +3.52%] index_select strided 5 : Elapsed 0.274 ms (27.429 ms / 100) 30.514 -> 30.447 ( -0.22%) [ +0.12% +0.20% +0.00% / -0.22% +3.13% +2.89%] index_select strided 7 : Elapsed 0.306 ms (30.551 ms / 100) 31.309 -> 30.551 ( -2.42%) [ +0.29% +0.00% +0.22% / +0.70% -2.22% -2.42%] index_select strided 8 : Elapsed 0.314 ms (31.401 ms / 100) 30.820 -> 30.391 ( -1.39%) [ +0.02% +0.31% +0.00% / +0.26% -1.39% -1.20%] index_select strided 16 : Elapsed 0.308 ms (30.825 ms / 100) 30.590 -> 29.882 ( -2.31%) [ +0.03% +0.00% +0.01% / -0.12% -2.31% -1.98%] index_select random : Elapsed 0.306 ms (30.598 ms / 100) 26.823 -> 26.739 ( -0.31%) [ +0.00% +0.16% +0.07% / -0.07% -0.12% -0.31%] index_select random_sorted : Elapsed 0.268 ms (26.823 ms / 100) B = [150, 250, 15] (stride (250, 1, 37500)) A = [50, 250, 15] (stride (3750, 1, 250)) dim = 0 7.858 -> 7.883 ( +0.32%) [ +0.04% +0.25% +0.00% / +0.32% +1.78% +1.60%] index_add_ linear : Elapsed 0.079 ms (7.861 ms / 100) 6.877 -> 6.854 ( -0.33%) [ +0.00% +0.42% +0.29% / -0.33% +0.23% +0.36%] index_copy_ linear : Elapsed 0.069 ms (6.877 ms / 100) 7.870 -> 7.908 ( +0.48%) [ +0.05% +0.52% +0.00% / +0.48% +2.15% +2.27%] index_add_ reverse : Elapsed 0.079 ms (7.874 ms / 100) 6.867 -> 6.842 ( -0.36%) [ +0.00% +0.33% +0.09% / -0.36% -0.07% +0.23%] index_copy_ reverse : Elapsed 0.069 ms (6.867 ms / 100) 7.963 -> 7.882 ( -1.02%) [ +0.05% +0.23% +0.00% / +0.29% -1.02% -0.79%] index_add_ spread : Elapsed 0.080 ms (7.967 ms / 100) 6.993 -> 6.917 ( -1.09%) [ +0.01% +0.34% +0.00% / -0.49% -0.94% -1.09%] index_copy_ spread : Elapsed 0.070 ms (6.994 ms / 100) 8.077 -> 8.122 ( +0.56%) [ +0.00% +0.21% +0.40% / +0.56% +2.74% +2.48%] index_add_ strided 7 : Elapsed 0.081 ms (8.077 ms / 100) 6.990 -> 6.965 ( -0.36%) [ +0.06% +0.31% +0.00% / +0.03% -0.09% -0.36%] index_copy_ strided 7 : Elapsed 0.070 ms (6.994 ms / 100) 8.257 -> 8.188 ( -0.84%) [ +0.00% +0.04% +0.10% / +0.08% -0.84% -0.82%] index_add_ perm : Elapsed 0.083 ms (8.257 ms / 100) 7.003 -> 6.971 ( -0.46%) [ +0.26% +0.00% +0.27% / -0.13% -0.46% -0.30%] index_copy_ perm : Elapsed 0.070 ms (7.021 ms / 100) 8.109 -> 8.176 ( +0.83%) [ +0.16% +0.00% +0.05% / +0.83% +1.50% +1.09%] index_add_ perm_sorted : Elapsed 0.081 ms (8.122 ms / 100) 6.929 -> 6.893 ( -0.52%) [ +0.25% +0.00% +0.25% / -0.45% -0.35% -0.52%] index_copy_ perm_sorted : Elapsed 0.069 ms (6.946 ms / 100) 10.567 -> 10.575 ( +0.08%) [ +0.19% +0.00% +0.07% / +0.08% +0.98% +0.95%] index_select const : Elapsed 0.106 ms (10.587 ms / 100) 11.676 -> 11.652 ( -0.21%) [ +0.27% +0.00% +0.48% / -0.21% +1.09% +1.70%] index_select wrap : Elapsed 0.117 ms (11.707 ms / 100) 11.037 -> 11.050 ( +0.12%) [ +0.06% +0.05% +0.00% / +0.12% +0.42% +0.45%] index_select linear : Elapsed 0.110 ms (11.044 ms / 100) 11.197 -> 11.211 ( +0.13%) [ +0.00% +0.54% +0.34% / +0.13% +0.76% +1.11%] index_select reverse : Elapsed 0.112 ms (11.197 ms / 100) 10.498 -> 10.508 ( +0.10%) [ +0.04% +0.00% +0.05% / +0.10% +0.82% +0.95%] index_select skip64 : Elapsed 0.105 ms (10.502 ms / 100) 10.619 -> 10.634 ( +0.14%) [ +0.00% +0.13% +0.10% / +0.14% +0.74% +0.70%] index_select skip256 : Elapsed 0.106 ms (10.619 ms / 100) 10.708 -> 10.739 ( +0.29%) [ +0.15% +0.00% +0.13% / +0.29% +1.05% +1.03%] index_select spread : Elapsed 0.107 ms (10.724 ms / 100) 11.767 -> 11.826 ( +0.50%) [ +0.34% +0.37% +0.00% / +0.50% +1.58% +1.69%] index_select strided 3 : Elapsed 0.118 ms (11.807 ms / 100) 10.842 -> 10.860 ( +0.17%) [ +0.00% +0.04% +0.10% / +0.17% +0.37% +0.45%] index_select strided 5 : Elapsed 0.108 ms (10.842 ms / 100) 11.646 -> 11.622 ( -0.21%) [ +0.07% +0.00% +0.11% / -0.21% +1.55% +1.81%] index_select strided 7 : Elapsed 0.117 ms (11.654 ms / 100) 11.467 -> 11.467 ( +0.00%) [ +0.00% +0.05% +0.08% / +0.00% +2.34% +1.85%] index_select strided 8 : Elapsed 0.115 ms (11.467 ms / 100) 11.491 -> 11.490 ( -0.01%) [ +0.48% +0.33% +0.00% / -0.01% +2.77% +2.17%] index_select strided 16 : Elapsed 0.115 ms (11.546 ms / 100) 11.238 -> 11.271 ( +0.29%) [ +0.10% +0.34% +0.00% / +0.29% +1.58% +1.79%] index_select random : Elapsed 0.112 ms (11.249 ms / 100) 10.698 -> 10.711 ( +0.12%) [ +0.11% +0.05% +0.00% / +0.12% +0.70% +0.70%] index_select random_sorted : Elapsed 0.107 ms (10.710 ms / 100) B = [150, 250, 15] (stride (1, 150, 37500)) A = [50, 250, 15] (stride (1, 750, 50)) dim = 0 18.357 -> 17.911 ( -2.43%) [ +0.07% +0.16% +0.00% / -0.61% -2.34% -2.43%] index_add_ linear : Elapsed 0.184 ms (18.369 ms / 100) 16.054 -> 15.896 ( -0.98%) [ +0.02% +0.01% +0.00% / -0.62% -0.97% -0.98%] index_copy_ linear : Elapsed 0.161 ms (16.058 ms / 100) 18.320 -> 17.899 ( -2.30%) [ +0.23% +0.29% +0.00% / -0.52% -2.24% -2.30%] index_add_ reverse : Elapsed 0.184 ms (18.362 ms / 100) 16.033 -> 15.849 ( -1.15%) [ +0.21% +0.00% +0.04% / -0.48% -1.12% -1.15%] index_copy_ reverse : Elapsed 0.161 ms (16.066 ms / 100) 21.474 -> 21.318 ( -0.73%) [ +0.15% +0.00% +0.03% / -0.73% -0.63% -0.69%] index_add_ spread : Elapsed 0.215 ms (21.506 ms / 100) 18.260 -> 18.152 ( -0.59%) [ +0.00% +0.07% +0.24% / -0.59% +0.86% +0.66%] index_copy_ spread : Elapsed 0.183 ms (18.260 ms / 100) 25.327 -> 25.161 ( -0.66%) [ +0.13% +0.00% +0.12% / -0.38% -0.56% -0.66%] index_add_ strided 7 : Elapsed 0.254 ms (25.359 ms / 100) 22.440 -> 22.022 ( -1.86%) [ +0.00% +0.25% +0.06% / -1.06% -1.86% -1.79%] index_copy_ strided 7 : Elapsed 0.224 ms (22.440 ms / 100) 26.478 -> 26.343 ( -0.51%) [ +0.08% +0.06% +0.00% / -0.17% -0.48% -0.51%] index_add_ perm : Elapsed 0.265 ms (26.500 ms / 100) 24.344 -> 24.081 ( -1.08%) [ +0.07% +0.01% +0.00% / -0.50% -1.06% -1.08%] index_copy_ perm : Elapsed 0.244 ms (24.361 ms / 100) 21.248 -> 21.119 ( -0.61%) [ +0.00% +0.04% +0.05% / -0.61% -0.48% -0.60%] index_add_ perm_sorted : Elapsed 0.212 ms (21.248 ms / 100) 18.194 -> 18.073 ( -0.67%) [ +0.08% +0.14% +0.00% / -0.67% +0.24% +0.37%] index_copy_ perm_sorted : Elapsed 0.182 ms (18.209 ms / 100) 32.993 -> 32.998 ( +0.02%) [ +0.11% +0.00% +0.05% / +0.02% +0.85% +0.88%] index_select const : Elapsed 0.330 ms (33.028 ms / 100) 37.070 -> 37.090 ( +0.05%) [ +0.21% +0.00% +0.04% / +0.05% +3.74% +3.61%] index_select wrap : Elapsed 0.371 ms (37.147 ms / 100) 33.723 -> 33.843 ( +0.36%) [ +0.17% +0.20% +0.00% / +0.36% +3.11% +3.09%] index_select linear : Elapsed 0.338 ms (33.780 ms / 100) 34.602 -> 34.608 ( +0.02%) [ +0.10% +0.00% +0.00% / +0.02% +2.37% +2.55%] index_select reverse : Elapsed 0.346 ms (34.638 ms / 100) 32.975 -> 32.953 ( -0.07%) [ +0.02% +0.00% +0.03% / -0.07% +0.79% +0.86%] index_select skip64 : Elapsed 0.330 ms (32.982 ms / 100) 32.985 -> 33.011 ( +0.08%) [ +0.24% +0.03% +0.00% / +0.08% +1.22% +0.92%] index_select skip256 : Elapsed 0.331 ms (33.063 ms / 100) 34.711 -> 34.752 ( +0.12%) [ +0.00% +0.13% +0.16% / +0.12% +3.17% +2.99%] index_select spread : Elapsed 0.347 ms (34.711 ms / 100) 39.475 -> 39.465 ( -0.03%) [ +0.18% +0.17% +0.00% / -0.03% +2.87% +2.66%] index_select strided 3 : Elapsed 0.395 ms (39.547 ms / 100) 39.810 -> 39.820 ( +0.03%) [ +0.03% +0.00% +0.07% / +0.03% +2.51% +2.70%] index_select strided 5 : Elapsed 0.398 ms (39.822 ms / 100) 39.661 -> 39.619 ( -0.11%) [ +0.00% +0.25% +0.08% / -0.11% +2.81% +2.79%] index_select strided 7 : Elapsed 0.397 ms (39.661 ms / 100) 39.893 -> 39.908 ( +0.04%) [ +0.11% +0.00% +0.00% / +0.04% +2.72% +2.74%] index_select strided 8 : Elapsed 0.399 ms (39.937 ms / 100) 39.759 -> 39.761 ( +0.01%) [ +0.00% +0.13% +0.17% / +0.01% +2.84% +2.93%] index_select strided 16 : Elapsed 0.398 ms (39.759 ms / 100) 39.343 -> 39.417 ( +0.19%) [ +0.15% +0.14% +0.00% / +0.19% +3.07% +3.06%] index_select random : Elapsed 0.394 ms (39.401 ms / 100) 34.816 -> 34.854 ( +0.11%) [ +0.14% +0.00% +0.01% / +0.11% +2.46% +2.45%] index_select random_sorted : Elapsed 0.349 ms (34.866 ms / 100) out_shape = [50, 150, 15] in_shape = [50, 250, 15] idx_dim = 1 B = [50, 150, 15] (stride (2250, 15, 1)) dim = 1 fill_cnt = 250 2.959 -> 2.945 ( -0.47%) [ +0.17% +0.17% +0.00% / -0.41% -0.44% -0.47%] index_fill_ const : Elapsed 0.030 ms (2.964 ms / 100) 3.042 -> 3.011 ( -1.02%) [ +0.43% +0.00% +0.10% / -0.30% -1.02% -0.69%] index_fill_ linear : Elapsed 0.031 ms (3.055 ms / 100) 2.990 -> 2.976 ( -0.47%) [ +0.07% +0.10% +0.00% / +0.07% -0.23% -0.47%] index_fill_ reverse : Elapsed 0.030 ms (2.992 ms / 100) 2.953 -> 2.944 ( -0.30%) [ +0.00% +0.37% +0.17% / -0.10% -0.30% +0.00%] index_fill_ skip64 : Elapsed 0.030 ms (2.953 ms / 100) 2.954 -> 2.938 ( -0.54%) [ +0.03% +0.00% +0.07% / +0.10% -0.41% -0.54%] index_fill_ skip256 : Elapsed 0.030 ms (2.955 ms / 100) 3.023 -> 3.010 ( -0.43%) [ +0.00% +0.20% +0.36% / -0.43% -0.40% -0.36%] index_fill_ spread : Elapsed 0.030 ms (3.023 ms / 100) 3.038 -> 3.025 ( -0.43%) [ +0.00% +0.13% +0.03% / -0.13% -0.43% -0.20%] index_fill_ strided 3 : Elapsed 0.030 ms (3.038 ms / 100) 3.031 -> 2.999 ( -1.06%) [ +0.16% +0.00% +0.30% / -0.36% -0.96% -1.06%] index_fill_ strided 5 : Elapsed 0.030 ms (3.036 ms / 100) 3.036 -> 3.028 ( -0.26%) [ +0.36% +0.00% +0.00% / -0.23% -0.16% -0.26%] index_fill_ strided 7 : Elapsed 0.030 ms (3.047 ms / 100) 3.110 -> 3.097 ( -0.42%) [ +0.29% +0.42% +0.00% / +0.13% -0.39% -0.42%] index_fill_ strided 8 : Elapsed 0.031 ms (3.119 ms / 100) 3.113 -> 3.110 ( -0.10%) [ +0.00% +0.67% +0.42% / +0.03% -0.10% -0.06%] index_fill_ strided 16 : Elapsed 0.031 ms (3.113 ms / 100) 3.104 -> 3.109 ( +0.16%) [ +0.19% +0.00% +0.13% / +0.32% +0.48% +0.16%] index_fill_ strided 64 : Elapsed 0.031 ms (3.110 ms / 100) 2.957 -> 2.942 ( -0.51%) [ +0.37% +0.10% +0.00% / +0.03% -0.44% -0.51%] index_fill_ strided 100 : Elapsed 0.030 ms (2.968 ms / 100) 3.055 -> 3.049 ( -0.20%) [ +0.00% +0.36% +0.03% / -0.20% +0.20% +0.23%] index_fill_ random : Elapsed 0.031 ms (3.055 ms / 100) 3.034 -> 3.021 ( -0.43%) [ +0.30% +0.00% +0.26% / -0.20% -0.36% -0.43%] index_fill_ random_sorted : Elapsed 0.030 ms (3.043 ms / 100) B = [50, 150, 15] (stride (15, 750, 1)) dim = 1 fill_cnt = 250 2.939 -> 2.925 ( -0.48%) [ +0.07% +0.00% +0.20% / -0.34% -0.48% -0.44%] index_fill_ const : Elapsed 0.029 ms (2.941 ms / 100) 2.998 -> 2.982 ( -0.53%) [ +0.00% +0.03% +0.10% / -0.10% -0.50% -0.53%] index_fill_ linear : Elapsed 0.030 ms (2.998 ms / 100) 2.975 -> 2.965 ( -0.34%) [ +0.10% +0.00% +0.10% / +0.10% -0.24% -0.34%] index_fill_ reverse : Elapsed 0.030 ms (2.978 ms / 100) 2.937 -> 2.937 ( +0.00%) [ +0.14% +0.00% +0.03% / +0.00% +0.10% +0.00%] index_fill_ skip64 : Elapsed 0.029 ms (2.941 ms / 100) 2.940 -> 2.925 ( -0.51%) [ +0.14% +0.14% +0.00% / -0.07% -0.48% -0.51%] index_fill_ skip256 : Elapsed 0.029 ms (2.944 ms / 100) 2.997 -> 2.984 ( -0.43%) [ +0.00% +0.07% +0.03% / -0.37% -0.13% -0.43%] index_fill_ spread : Elapsed 0.030 ms (2.997 ms / 100) 2.983 -> 2.962 ( -0.70%) [ +0.00% +0.10% +0.20% / -0.64% -0.70% -0.54%] index_fill_ strided 3 : Elapsed 0.030 ms (2.983 ms / 100) 2.977 -> 2.951 ( -0.87%) [ +0.00% +0.10% +0.17% / -0.54% -0.74% -0.87%] index_fill_ strided 5 : Elapsed 0.030 ms (2.977 ms / 100) 3.001 -> 2.989 ( -0.40%) [ +0.00% +0.30% +0.07% / -0.30% -0.40% -0.40%] index_fill_ strided 7 : Elapsed 0.030 ms (3.001 ms / 100) 2.987 -> 2.967 ( -0.67%) [ +0.57% +0.00% +0.17% / -0.40% -0.40% -0.67%] index_fill_ strided 8 : Elapsed 0.030 ms (3.004 ms / 100) 2.985 -> 2.971 ( -0.47%) [ +0.03% +0.27% +0.00% / -0.10% -0.47% -0.40%] index_fill_ strided 16 : Elapsed 0.030 ms (2.986 ms / 100) 2.989 -> 2.979 ( -0.33%) [ +0.00% +0.13% +0.07% / -0.33% -0.13% -0.07%] index_fill_ strided 64 : Elapsed 0.030 ms (2.989 ms / 100) 2.946 -> 2.925 ( -0.71%) [ +0.14% +0.00% +0.10% / -0.20% -0.61% -0.71%] index_fill_ strided 100 : Elapsed 0.030 ms (2.950 ms / 100) 2.986 -> 2.977 ( -0.30%) [ +0.00% +0.44% +0.07% / -0.30% -0.10% +0.13%] index_fill_ random : Elapsed 0.030 ms (2.986 ms / 100) 2.983 -> 2.975 ( -0.27%) [ +0.03% +0.03% +0.00% / -0.27% -0.10% -0.10%] index_fill_ random_sorted : Elapsed 0.030 ms (2.984 ms / 100) B = [50, 150, 15] (stride (15, 750, 1)) A = [50, 250, 15] (stride (15, 750, 1)) dim = 1 5.298 -> 5.309 ( +0.21%) [ +0.08% +0.13% +0.00% / +0.21% +0.49% +0.62%] index_select const : Elapsed 0.053 ms (5.302 ms / 100) 5.521 -> 5.506 ( -0.27%) [ +0.00% +0.04% +0.11% / +0.11% -0.27% -0.22%] index_select wrap : Elapsed 0.055 ms (5.521 ms / 100) 5.505 -> 5.509 ( +0.07%) [ +0.13% +0.04% +0.00% / +0.07% +0.11% +0.25%] index_select linear : Elapsed 0.055 ms (5.512 ms / 100) 5.610 -> 5.608 ( -0.04%) [ +0.07% +0.00% +0.11% / -0.04% +0.34% +0.36%] index_select reverse : Elapsed 0.056 ms (5.614 ms / 100) 5.305 -> 5.310 ( +0.09%) [ +0.00% +0.11% +0.13% / +0.09% +0.36% +0.30%] index_select skip64 : Elapsed 0.053 ms (5.305 ms / 100) 5.292 -> 5.296 ( +0.08%) [ +0.13% +0.00% +0.40% / +0.08% +0.70% +0.87%] index_select skip256 : Elapsed 0.053 ms (5.299 ms / 100) 5.612 -> 5.602 ( -0.18%) [ +0.11% +0.00% +0.00% / -0.18% +0.75% +0.37%] index_select spread : Elapsed 0.056 ms (5.618 ms / 100) 5.588 -> 5.586 ( -0.04%) [ +0.00% +0.05% +0.02% / -0.04% +0.89% +1.06%] index_select strided 3 : Elapsed 0.056 ms (5.588 ms / 100) 5.450 -> 5.468 ( +0.33%) [ +0.18% +0.00% +0.18% / +0.33% +2.50% +2.28%] index_select strided 5 : Elapsed 0.055 ms (5.460 ms / 100) 5.596 -> 5.608 ( +0.21%) [ +0.00% +0.16% +0.27% / +0.21% +0.57% +0.80%] index_select strided 7 : Elapsed 0.056 ms (5.596 ms / 100) 5.607 -> 5.600 ( -0.12%) [ +0.12% +0.04% +0.00% / -0.12% +0.96% +0.89%] index_select strided 8 : Elapsed 0.056 ms (5.614 ms / 100) 5.593 -> 5.620 ( +0.48%) [ +0.18% +0.00% +0.25% / +0.48% +0.95% +1.14%] index_select strided 16 : Elapsed 0.056 ms (5.603 ms / 100) 5.603 -> 5.605 ( +0.04%) [ +0.00% +0.11% +0.18% / +0.04% +0.12% +0.37%] index_select strided 64 : Elapsed 0.056 ms (5.603 ms / 100) 5.319 -> 5.319 ( +0.00%) [ +0.41% +0.23% +0.00% / +0.11% +0.00% +0.06%] index_select strided 100 : Elapsed 0.053 ms (5.341 ms / 100) 5.566 -> 5.571 ( +0.09%) [ +0.04% +0.00% +0.07% / +0.09% +0.49% +0.47%] index_select random : Elapsed 0.056 ms (5.568 ms / 100) 5.560 -> 5.559 ( -0.02%) [ +0.25% +0.09% +0.00% / -0.02% +0.04% +0.09%] index_select random_sorted : Elapsed 0.056 ms (5.574 ms / 100) 5.614 -> 5.614 ( +0.00%) [ +0.07% +0.00% +0.02% / +0.00% +0.52% +0.61%] index_select perm : Elapsed 0.056 ms (5.618 ms / 100) 5.630 -> 5.636 ( +0.11%) [ +0.09% +0.00% +0.27% / +0.21% +0.11% +0.23%] index_select perm_sorted : Elapsed 0.056 ms (5.635 ms / 100) B = [50, 150, 15] (stride (15, 750, 1)) A = [50, 250, 15] (stride (1, 50, 12500)) dim = 1 5.466 -> 5.463 ( -0.05%) [ +0.22% +0.00% +0.22% / -0.05% +0.07% +0.18%] index_select const : Elapsed 0.055 ms (5.478 ms / 100) 5.844 -> 5.827 ( -0.29%) [ +0.00% +0.21% +0.17% / -0.29% +0.38% +0.14%] index_select wrap : Elapsed 0.058 ms (5.844 ms / 100) 5.843 -> 5.832 ( -0.19%) [ +0.26% +0.00% +0.14% / -0.19% +0.29% +0.07%] index_select linear : Elapsed 0.059 ms (5.858 ms / 100) 5.839 -> 5.849 ( +0.17%) [ +0.00% +0.50% +0.09% / +0.17% +0.39% +0.36%] index_select reverse : Elapsed 0.058 ms (5.839 ms / 100) 5.387 -> 5.393 ( +0.11%) [ +0.22% +0.07% +0.00% / +0.20% +0.46% +0.11%] index_select skip64 : Elapsed 0.054 ms (5.399 ms / 100) 5.445 -> 5.438 ( -0.13%) [ +0.00% +0.18% +0.15% / +0.28% -0.13% -0.13%] index_select skip256 : Elapsed 0.054 ms (5.445 ms / 100) 5.948 -> 5.955 ( +0.12%) [ +0.08% +0.00% +0.15% / +0.24% +0.12% +0.15%] index_select spread : Elapsed 0.060 ms (5.953 ms / 100) 6.075 -> 6.041 ( -0.56%) [ +0.23% +0.02% +0.00% / +0.18% -0.48% -0.56%] index_select strided 3 : Elapsed 0.061 ms (6.089 ms / 100) 5.765 -> 5.757 ( -0.14%) [ +0.12% +0.00% +0.12% / -0.14% +0.83% +0.73%] index_select strided 5 : Elapsed 0.058 ms (5.772 ms / 100) 6.070 -> 6.075 ( +0.08%) [ +0.03% +0.00% +0.35% / +0.18% +0.12% +0.08%] index_select strided 7 : Elapsed 0.061 ms (6.072 ms / 100) 6.059 -> 6.057 ( -0.03%) [ +0.08% +0.07% +0.00% / +0.13% -0.03% +0.07%] index_select strided 8 : Elapsed 0.061 ms (6.064 ms / 100) 6.052 -> 6.044 ( -0.13%) [ +0.33% +0.00% +0.30% / +0.15% +0.07% -0.13%] index_select strided 16 : Elapsed 0.061 ms (6.072 ms / 100) 6.055 -> 6.068 ( +0.21%) [ +0.50% +0.03% +0.00% / +0.21% +0.30% +0.31%] index_select strided 64 : Elapsed 0.061 ms (6.085 ms / 100) 5.400 -> 5.398 ( -0.04%) [ +0.06% +0.00% +0.06% / +0.07% -0.04% +0.00%] index_select strided 100 : Elapsed 0.054 ms (5.403 ms / 100) 5.937 -> 5.941 ( +0.07%) [ +0.03% +0.03% +0.00% / +0.07% +0.12% +0.10%] index_select random : Elapsed 0.059 ms (5.939 ms / 100) 5.803 -> 5.805 ( +0.03%) [ +0.48% +0.00% +0.31% / +0.03% +0.45% +0.33%] index_select random_sorted : Elapsed 0.058 ms (5.831 ms / 100) 6.024 -> 6.007 ( -0.28%) [ +0.18% +0.00% +0.28% / +0.12% -0.03% -0.28%] index_select perm : Elapsed 0.060 ms (6.035 ms / 100) 5.928 -> 5.909 ( -0.32%) [ +0.17% +0.13% +0.00% / -0.32% +0.08% +0.02%] index_select perm_sorted : Elapsed 0.059 ms (5.938 ms / 100) B = [50, 150, 15] (stride (1, 750, 50)) A = [50, 250, 15] (stride (15, 750, 1)) dim = 1 5.424 -> 5.434 ( +0.18%) [ +0.39% +0.00% +0.04% / +0.18% +0.42% +0.46%] index_select const : Elapsed 0.054 ms (5.445 ms / 100) 5.583 -> 5.586 ( +0.05%) [ +0.32% +0.00% +0.20% / +0.05% +0.50% +0.21%] index_select wrap : Elapsed 0.056 ms (5.601 ms / 100) 5.572 -> 5.565 ( -0.13%) [ +0.11% +0.00% +0.23% / -0.13% +0.47% +0.11%] index_select linear : Elapsed 0.056 ms (5.578 ms / 100) 5.688 -> 5.699 ( +0.19%) [ +0.09% +0.00% +0.00% / +0.19% +0.51% +0.35%] index_select reverse : Elapsed 0.057 ms (5.693 ms / 100) 5.422 -> 5.431 ( +0.17%) [ +0.28% +0.00% +0.00% / +0.17% +0.26% +0.37%] index_select skip64 : Elapsed 0.054 ms (5.437 ms / 100) 5.429 -> 5.441 ( +0.22%) [ +0.00% +0.07% +0.18% / +0.22% +0.33% +0.31%] index_select skip256 : Elapsed 0.054 ms (5.429 ms / 100) 5.664 -> 5.686 ( +0.39%) [ +0.00% +0.11% +0.41% / +0.39% +0.78% +1.09%] index_select spread : Elapsed 0.057 ms (5.664 ms / 100) 5.648 -> 5.668 ( +0.35%) [ +0.34% +0.00% +0.25% / +0.35% +1.12% +1.10%] index_select strided 3 : Elapsed 0.057 ms (5.667 ms / 100) 5.548 -> 5.576 ( +0.50%) [ +0.22% +0.00% +0.20% / +0.50% +2.69% +2.69%] index_select strided 5 : Elapsed 0.056 ms (5.560 ms / 100) 5.663 -> 5.664 ( +0.02%) [ +0.00% +0.26% +0.23% / +0.02% +0.97% +1.04%] index_select strided 7 : Elapsed 0.057 ms (5.663 ms / 100) 5.666 -> 5.666 ( +0.00%) [ +0.04% +0.00% +0.21% / +0.00% +1.62% +1.61%] index_select strided 8 : Elapsed 0.057 ms (5.668 ms / 100) 5.679 -> 5.686 ( +0.12%) [ +0.00% +0.07% +0.19% / +0.12% +0.92% +0.53%] index_select strided 16 : Elapsed 0.057 ms (5.679 ms / 100) 5.688 -> 5.689 ( +0.02%) [ +0.00% +0.33% +0.19% / +0.02% +0.16% +0.33%] index_select strided 64 : Elapsed 0.057 ms (5.688 ms / 100) 5.459 -> 5.445 ( -0.26%) [ +0.13% +0.05% +0.00% / -0.15% -0.26% -0.15%] index_select strided 100 : Elapsed 0.055 ms (5.466 ms / 100) 5.666 -> 5.664 ( -0.04%) [ +0.00% +0.32% +0.04% / -0.04% +0.21% +0.07%] index_select random : Elapsed 0.057 ms (5.666 ms / 100) 5.620 -> 5.635 ( +0.27%) [ +0.16% +0.25% +0.00% / +0.59% +0.27% +0.41%] index_select random_sorted : Elapsed 0.056 ms (5.629 ms / 100) 5.720 -> 5.700 ( -0.35%) [ +0.00% +0.00% +0.05% / -0.35% -0.17% -0.23%] index_select perm : Elapsed 0.057 ms (5.720 ms / 100) 5.675 -> 5.679 ( +0.07%) [ +0.14% +0.00% +0.21% / +0.07% +0.41% +0.39%] index_select perm_sorted : Elapsed 0.057 ms (5.683 ms / 100) B = [50, 150, 15] (stride (150, 1, 7500)) A = [50, 250, 15] (stride (3750, 1, 250)) dim = 1 good 5.788 -> 5.430 ( -6.19%) [ +0.14% +0.07% +0.00% / -5.86% -6.13% -6.19%] index_select const : Elapsed 0.058 ms (5.796 ms / 100) good 6.303 -> 5.708 ( -9.44%) [ +0.00% +0.41% +0.30% / -9.08% -9.34% -9.44%] index_select wrap : Elapsed 0.063 ms (6.303 ms / 100) good 6.296 -> 5.708 ( -9.34%) [ +0.11% +0.00% +0.08% / -9.05% -9.34% -9.24%] index_select linear : Elapsed 0.063 ms (6.303 ms / 100) good 6.277 -> 5.728 ( -8.75%) [ +0.16% +0.00% +0.27% / -8.75% -8.65% -8.65%] index_select reverse : Elapsed 0.063 ms (6.287 ms / 100) good 5.902 -> 5.528 ( -6.34%) [ +0.22% +0.00% +0.29% / -6.32% -6.34% -6.32%] index_select skip64 : Elapsed 0.059 ms (5.915 ms / 100) good 5.781 -> 5.430 ( -6.07%) [ +0.14% +0.00% +0.22% / -5.86% -5.86% -6.07%] index_select skip256 : Elapsed 0.058 ms (5.789 ms / 100) Good 6.570 -> 5.841 (-11.10%) [ +0.43% +0.35% +0.00% / -10.78% -11.10% -11.07%] index_select spread : Elapsed 0.066 ms (6.598 ms / 100) Good 7.026 -> 5.910 (-15.88%) [ +0.61% +0.00% +0.23% / -15.56% -15.88% -15.71%] index_select strided 3 : Elapsed 0.071 ms (7.069 ms / 100) Good 7.318 -> 5.932 (-18.94%) [ +0.11% +0.00% +0.01% / -18.94% -18.50% -18.69%] index_select strided 5 : Elapsed 0.073 ms (7.326 ms / 100) Good 7.313 -> 5.957 (-18.54%) [ +0.00% +0.14% +0.14% / -18.26% -18.54% -18.41%] index_select strided 7 : Elapsed 0.073 ms (7.313 ms / 100) Good 7.312 -> 5.968 (-18.38%) [ +0.23% +0.00% +0.33% / -18.24% -18.27% -18.38%] index_select strided 8 : Elapsed 0.073 ms (7.329 ms / 100) Good 7.350 -> 5.977 (-18.68%) [ +0.00% +0.14% +0.33% / -18.20% -18.68% -18.60%] index_select strided 16 : Elapsed 0.074 ms (7.350 ms / 100) Good 7.333 -> 5.983 (-18.41%) [ +0.14% +0.00% +0.04% / -18.36% -18.41% -18.33%] index_select strided 64 : Elapsed 0.073 ms (7.343 ms / 100) Good 6.370 -> 5.638 (-11.49%) [ +0.36% +0.00% +0.25% / -11.41% -11.41% -11.49%] index_select strided 100 : Elapsed 0.064 ms (6.393 ms / 100) Good 7.282 -> 6.009 (-17.48%) [ +0.34% +0.00% +0.23% / -17.23% -17.29% -17.48%] index_select random : Elapsed 0.073 ms (7.307 ms / 100) Good 6.562 -> 5.840 (-11.00%) [ +0.27% +0.15% +0.00% / -11.00% -10.99% -10.74%] index_select random_sorted : Elapsed 0.066 ms (6.580 ms / 100) Good 7.293 -> 6.006 (-17.65%) [ +0.25% +0.19% +0.00% / -17.56% -17.65% -17.59%] index_select perm : Elapsed 0.073 ms (7.311 ms / 100) Good 6.573 -> 5.856 (-10.91%) [ +0.37% +0.00% +0.35% / -10.85% -10.83% -10.91%] index_select perm_sorted : Elapsed 0.066 ms (6.597 ms / 100) B = [50, 150, 15] (stride (150, 1, 7500)) A = [50, 250, 15] (stride (15, 750, 1)) dim = 1 5.517 -> 5.355 ( -2.94%) [ +0.09% +0.00% +0.09% / -2.61% -2.68% -2.94%] index_select const : Elapsed 0.055 ms (5.522 ms / 100) 5.674 -> 5.933 ( +4.56%) [ +0.33% +0.00% +0.32% / +4.56% +4.97% +5.11%] index_select wrap : Elapsed 0.057 ms (5.693 ms / 100) 5.660 -> 5.929 ( +4.75%) [ +0.02% +0.34% +0.00% / +4.75% +5.39% +5.09%] index_select linear : Elapsed 0.057 ms (5.661 ms / 100) 5.803 -> 5.919 ( +2.00%) [ +0.00% +0.00% +0.26% / +2.00% +2.60% +2.67%] index_select reverse : Elapsed 0.058 ms (5.803 ms / 100) 5.512 -> 5.355 ( -2.85%) [ +0.33% +0.00% +0.15% / -2.70% -2.85% -2.76%] index_select skip64 : Elapsed 0.055 ms (5.530 ms / 100) 5.515 -> 5.357 ( -2.86%) [ +0.13% +0.00% +0.11% / -2.61% -2.86% -2.72%] index_select skip256 : Elapsed 0.055 ms (5.522 ms / 100) 5.791 -> 5.887 ( +1.66%) [ +0.10% +0.00% +0.26% / +1.66% +2.95% +2.85%] index_select spread : Elapsed 0.058 ms (5.797 ms / 100) 5.779 -> 5.889 ( +1.90%) [ +0.03% +0.05% +0.00% / +1.90% +3.36% +3.34%] index_select strided 3 : Elapsed 0.058 ms (5.781 ms / 100) 5.663 -> 5.566 ( -1.71%) [ +0.18% +0.05% +0.00% / -1.71% -0.04% +0.07%] index_select strided 5 : Elapsed 0.057 ms (5.673 ms / 100) 5.780 -> 5.928 ( +2.56%) [ +0.36% +0.14% +0.00% / +2.56% +3.20% +3.24%] index_select strided 7 : Elapsed 0.058 ms (5.801 ms / 100) 5.780 -> 5.815 ( +0.61%) [ +0.00% +0.28% +0.10% / +0.61% +1.78% +2.02%] index_select strided 8 : Elapsed 0.058 ms (5.780 ms / 100) 5.785 -> 5.822 ( +0.64%) [ +0.09% +0.10% +0.00% / +0.64% +1.45% +1.43%] index_select strided 16 : Elapsed 0.058 ms (5.790 ms / 100) 5.781 -> 5.810 ( +0.50%) [ +0.19% +0.00% +0.00% / +0.50% +1.59% +1.40%] index_select strided 64 : Elapsed 0.058 ms (5.792 ms / 100) 5.539 -> 5.384 ( -2.80%) [ +0.09% +0.13% +0.00% / -2.65% -2.80% -2.73%] index_select strided 100 : Elapsed 0.055 ms (5.544 ms / 100) 5.717 -> 5.742 ( +0.44%) [ +0.16% +0.00% +0.26% / +0.44% +1.80% +1.64%] index_select random : Elapsed 0.057 ms (5.726 ms / 100) 5.712 -> 5.730 ( +0.32%) [ +0.11% +0.00% +0.00% / +0.32% +1.84% +1.80%] index_select random_sorted : Elapsed 0.057 ms (5.718 ms / 100) 5.809 -> 5.910 ( +1.74%) [ +0.02% +0.03% +0.00% / +1.74% +2.36% +2.41%] index_select perm : Elapsed 0.058 ms (5.810 ms / 100) 5.772 -> 5.892 ( +2.08%) [ +0.35% +0.21% +0.00% / +2.08% +2.79% +2.70%] index_select perm_sorted : Elapsed 0.058 ms (5.792 ms / 100) B = [50, 150, 15] (stride (150, 1, 7500)) A = [50, 250, 15] (stride (250, 1, 12500)) dim = 1 good 5.803 -> 5.435 ( -6.34%) [ +0.10% +0.03% +0.00% / -6.07% -6.34% -6.22%] index_select const : Elapsed 0.058 ms (5.809 ms / 100) good 6.355 -> 5.728 ( -9.87%) [ +0.00% +0.25% +0.41% / -9.85% -9.79% -9.87%] index_select wrap : Elapsed 0.064 ms (6.355 ms / 100) Good 6.353 -> 5.715 (-10.04%) [ +0.30% +0.00% +0.02% / -10.04% -9.85% -9.55%] index_select linear : Elapsed 0.064 ms (6.372 ms / 100) good 6.338 -> 5.711 ( -9.89%) [ +0.00% +0.05% +0.00% / -9.89% -9.62% -9.77%] index_select reverse : Elapsed 0.063 ms (6.338 ms / 100) good 5.925 -> 5.526 ( -6.73%) [ +0.15% +0.00% +0.22% / -6.58% -6.73% -6.60%] index_select skip64 : Elapsed 0.059 ms (5.934 ms / 100) good 5.786 -> 5.441 ( -5.96%) [ +0.03% +0.00% +0.02% / -5.84% -5.93% -5.96%] index_select skip256 : Elapsed 0.058 ms (5.788 ms / 100) Good 6.660 -> 5.847 (-12.21%) [ +0.06% +0.00% +0.06% / -12.21% -11.76% -11.61%] index_select spread : Elapsed 0.067 ms (6.664 ms / 100) Good 7.072 -> 5.913 (-16.39%) [ +0.00% +0.45% +0.07% / -16.39% -16.15% -16.04%] index_select strided 3 : Elapsed 0.071 ms (7.072 ms / 100) Good 7.373 -> 5.949 (-19.31%) [ +0.00% +0.16% +0.20% / -19.31% -18.84% -18.87%] index_select strided 5 : Elapsed 0.074 ms (7.373 ms / 100) Good 7.355 -> 5.968 (-18.86%) [ +0.34% +0.20% +0.00% / -18.86% -18.71% -18.49%] index_select strided 7 : Elapsed 0.074 ms (7.380 ms / 100) Good 7.360 -> 5.971 (-18.87%) [ +0.14% +0.41% +0.00% / -18.87% -18.42% -18.44%] index_select strided 8 : Elapsed 0.074 ms (7.370 ms / 100) Good 7.391 -> 6.004 (-18.77%) [ +0.00% +0.28% +0.28% / -18.68% -18.63% -18.77%] index_select strided 16 : Elapsed 0.074 ms (7.391 ms / 100) Good 7.396 -> 5.968 (-19.31%) [ +0.32% +0.00% +0.16% / -19.31% -18.79% -18.70%] index_select strided 64 : Elapsed 0.074 ms (7.420 ms / 100) Good 6.397 -> 5.646 (-11.74%) [ +0.16% +0.00% +0.25% / -11.74% -11.61% -11.66%] index_select strided 100 : Elapsed 0.064 ms (6.407 ms / 100) Good 7.315 -> 6.016 (-17.76%) [ +0.00% +0.25% +0.44% / -17.76% -17.33% -17.50%] index_select random : Elapsed 0.073 ms (7.315 ms / 100) Good 6.646 -> 5.848 (-12.01%) [ +0.30% +0.21% +0.00% / -12.01% -11.38% -11.54%] index_select random_sorted : Elapsed 0.067 ms (6.666 ms / 100) Good 7.327 -> 5.993 (-18.21%) [ +0.29% +0.52% +0.00% / -18.21% -17.77% -17.66%] index_select perm : Elapsed 0.073 ms (7.348 ms / 100) Good 6.644 -> 5.848 (-11.98%) [ +0.00% +0.21% +0.17% / -11.98% -11.62% -11.38%] index_select perm_sorted : Elapsed 0.066 ms (6.644 ms / 100) out_shape = [50, 250, 150] in_shape = [50, 250, 15] idx_dim = 2 B = [50, 250, 150] (stride (37500, 1, 250)) A = [50, 250, 15] (stride (3750, 15, 1)) dim = 2 9.633 -> 9.621 ( -0.12%) [ +0.06% +0.00% +0.24% / +0.37% -0.11% -0.12%] index_add_ linear : Elapsed 0.096 ms (9.639 ms / 100) 9.233 -> 9.213 ( -0.22%) [ +0.12% +0.00% +0.17% / +0.39% -0.09% -0.22%] index_copy_ linear : Elapsed 0.092 ms (9.244 ms / 100) 9.602 -> 9.608 ( +0.06%) [ +0.00% +0.17% +0.19% / +0.42% +0.09% +0.06%] index_add_ reverse : Elapsed 0.096 ms (9.602 ms / 100) 9.222 -> 9.214 ( -0.09%) [ +0.00% +0.09% +0.22% / +0.40% -0.04% -0.09%] index_copy_ reverse : Elapsed 0.092 ms (9.222 ms / 100) 9.636 -> 9.603 ( -0.34%) [ +0.00% +0.02% +0.05% / +0.20% -0.34% -0.28%] index_add_ spread : Elapsed 0.096 ms (9.636 ms / 100) 9.267 -> 9.236 ( -0.33%) [ +0.00% +0.12% +0.12% / +0.40% -0.33% -0.33%] index_copy_ spread : Elapsed 0.093 ms (9.267 ms / 100) 9.634 -> 9.626 ( -0.08%) [ +0.00% +0.04% +0.10% / +0.49% -0.03% -0.08%] index_add_ strided 7 : Elapsed 0.096 ms (9.634 ms / 100) 9.265 -> 9.269 ( +0.04%) [ +0.08% +0.00% +0.01% / +0.56% +0.15% +0.04%] index_copy_ strided 7 : Elapsed 0.093 ms (9.272 ms / 100) 9.662 -> 9.631 ( -0.32%) [ +0.00% +0.04% +0.26% / +0.49% -0.18% -0.32%] index_add_ perm : Elapsed 0.097 ms (9.662 ms / 100) 9.291 -> 9.263 ( -0.30%) [ +0.00% +0.02% +0.10% / +0.52% -0.17% -0.30%] index_copy_ perm : Elapsed 0.093 ms (9.291 ms / 100) 9.665 -> 9.599 ( -0.68%) [ +0.07% +0.00% +0.17% / +0.50% -0.64% -0.68%] index_add_ perm_sorted : Elapsed 0.097 ms (9.672 ms / 100) 9.284 -> 9.243 ( -0.44%) [ +0.04% +0.00% +0.23% / +0.56% -0.41% -0.44%] index_copy_ perm_sorted : Elapsed 0.093 ms (9.288 ms / 100) 72.476 -> 72.296 ( -0.25%) [ +0.25% +0.00% +0.17% / +0.34% -0.25% +0.21%] index_select const : Elapsed 0.727 ms (72.657 ms / 100) 75.725 -> 75.671 ( -0.07%) [ +0.07% +0.31% +0.00% / +0.33% +0.02% -0.07%] index_select wrap : Elapsed 0.758 ms (75.777 ms / 100) 72.870 -> 72.800 ( -0.10%) [ +0.00% +0.00% +0.19% / +0.36% +0.28% -0.10%] index_select linear : Elapsed 0.729 ms (72.872 ms / 100) 73.117 -> 72.897 ( -0.30%) [ +0.00% +0.11% +0.03% / +0.25% -0.22% -0.30%] index_select reverse : Elapsed 0.731 ms (73.117 ms / 100) 72.510 -> 72.392 ( -0.16%) [ +0.07% +0.00% +0.00% / -0.07% +0.13% -0.16%] index_select skip64 : Elapsed 0.726 ms (72.560 ms / 100) 72.372 -> 72.323 ( -0.07%) [ +0.00% +0.34% +0.10% / +0.42% +0.50% -0.07%] index_select skip256 : Elapsed 0.724 ms (72.372 ms / 100) 74.023 -> 74.003 ( -0.03%) [ +0.00% +0.11% +0.27% / +0.09% -0.03% +0.18%] index_select spread : Elapsed 0.740 ms (74.023 ms / 100) 75.736 -> 75.683 ( -0.07%) [ +0.00% +0.10% +0.02% / +0.27% -0.03% -0.07%] index_select strided 3 : Elapsed 0.757 ms (75.736 ms / 100) 75.162 -> 75.047 ( -0.15%) [ +0.00% +0.34% +0.40% / +0.02% -0.10% -0.15%] index_select strided 5 : Elapsed 0.752 ms (75.162 ms / 100) 75.889 -> 75.440 ( -0.59%) [ +0.00% +0.26% +0.02% / +0.11% -0.49% -0.59%] index_select strided 7 : Elapsed 0.759 ms (75.889 ms / 100) 75.601 -> 75.645 ( +0.06%) [ +0.42% +0.46% +0.00% / +0.45% +0.27% +0.06%] index_select strided 8 : Elapsed 0.759 ms (75.922 ms / 100) 75.562 -> 75.441 ( -0.16%) [ +0.00% +0.24% +0.47% / +0.08% -0.16% +0.31%] index_select random : Elapsed 0.756 ms (75.562 ms / 100) 73.953 -> 74.036 ( +0.11%) [ +0.19% +0.00% +0.13% / +0.48% +0.20% +0.11%] index_select random_sorted : Elapsed 0.741 ms (74.090 ms / 100) B = [50, 250, 150] (stride (1, 50, 12500)) A = [50, 250, 15] (stride (250, 1, 12500)) dim = 2 8.134 -> 8.150 ( +0.20%) [ +0.09% +0.20% +0.00% / +0.52% +0.33% +0.20%] index_add_ linear : Elapsed 0.081 ms (8.141 ms / 100) 7.949 -> 7.972 ( +0.29%) [ +0.15% +0.00% +0.01% / +0.45% +0.36% +0.29%] index_copy_ linear : Elapsed 0.080 ms (7.961 ms / 100) 8.138 -> 8.156 ( +0.22%) [ +0.10% +0.00% +0.06% / +0.22% +0.34% +0.32%] index_add_ reverse : Elapsed 0.081 ms (8.146 ms / 100) 7.954 -> 7.970 ( +0.20%) [ +0.00% +0.14% +0.03% / +0.20% +0.24% +0.29%] index_copy_ reverse : Elapsed 0.080 ms (7.954 ms / 100) 8.147 -> 8.159 ( +0.15%) [ +0.00% +0.16% +0.00% / +0.42% +0.15% +0.18%] index_add_ spread : Elapsed 0.081 ms (8.147 ms / 100) 7.951 -> 7.984 ( +0.42%) [ +0.00% +0.06% +0.16% / +0.57% +0.42% +0.54%] index_copy_ spread : Elapsed 0.080 ms (7.951 ms / 100) 8.172 -> 8.169 ( -0.04%) [ +0.09% +0.05% +0.00% / +0.35% +0.07% -0.04%] index_add_ strided 7 : Elapsed 0.082 ms (8.179 ms / 100) 7.984 -> 7.986 ( +0.03%) [ +0.00% +0.06% +0.01% / +0.44% +0.11% +0.03%] index_copy_ strided 7 : Elapsed 0.080 ms (7.984 ms / 100) 8.127 -> 8.157 ( +0.37%) [ +0.00% +0.12% +0.06% / +0.41% +0.37% +0.62%] index_add_ perm : Elapsed 0.081 ms (8.127 ms / 100) 7.943 -> 7.984 ( +0.52%) [ +0.16% +0.26% +0.00% / +0.63% +0.59% +0.52%] index_copy_ perm : Elapsed 0.080 ms (7.956 ms / 100) 8.138 -> 8.173 ( +0.43%) [ +0.05% +0.00% +0.10% / +0.50% +0.43% +0.43%] index_add_ perm_sorted : Elapsed 0.081 ms (8.142 ms / 100) 7.970 -> 7.990 ( +0.25%) [ +0.00% +0.05% +0.04% / +0.34% +0.29% +0.25%] index_copy_ perm_sorted : Elapsed 0.080 ms (7.970 ms / 100) good 80.031 -> 75.526 ( -5.63%) [ +0.00% +0.50% +0.58% / -0.55% -5.63% -4.81%] index_select const : Elapsed 0.800 ms (80.031 ms / 100) 83.083 -> 82.499 ( -0.70%) [ +0.01% +0.42% +0.00% / -0.70% +0.70% +1.94%] index_select wrap : Elapsed 0.831 ms (83.091 ms / 100) 75.914 -> 75.894 ( -0.03%) [ +0.66% +1.09% +0.00% / -0.03% +1.39% +0.16%] index_select linear : Elapsed 0.764 ms (76.415 ms / 100) 78.684 -> 78.067 ( -0.78%) [ +1.02% +0.59% +0.00% / -0.78% +2.05% +2.10%] index_select reverse : Elapsed 0.795 ms (79.489 ms / 100) 79.540 -> 75.811 ( -4.69%) [ +0.00% +0.12% +0.71% / -0.18% -4.62% -4.69%] index_select skip64 : Elapsed 0.795 ms (79.540 ms / 100) 79.006 -> 75.705 ( -4.18%) [ +0.62% +0.89% +0.00% / -0.25% -3.67% -4.18%] index_select skip256 : Elapsed 0.795 ms (79.493 ms / 100) 77.616 -> 76.997 ( -0.80%) [ +1.05% +0.00% +0.28% / -0.16% -0.80% -0.07%] index_select spread : Elapsed 0.784 ms (78.431 ms / 100) 83.597 -> 80.253 ( -4.00%) [ +0.80% +1.01% +0.00% / +0.07% -4.00% -3.12%] index_select strided 3 : Elapsed 0.843 ms (84.269 ms / 100) 82.107 -> 82.601 ( +0.60%) [ +1.02% +1.27% +0.00% / +0.60% +5.44% +5.54%] index_select strided 5 : Elapsed 0.829 ms (82.946 ms / 100) 82.584 -> 82.123 ( -0.56%) [ +0.00% +0.76% +0.27% / -0.56% +2.12% +1.78%] index_select strided 7 : Elapsed 0.826 ms (82.584 ms / 100) 82.697 -> 82.301 ( -0.48%) [ +0.64% +0.00% +0.09% / -0.48% +1.49% +2.25%] index_select strided 8 : Elapsed 0.832 ms (83.225 ms / 100) 82.774 -> 82.506 ( -0.32%) [ +0.00% +0.80% +0.17% / -0.32% +1.31% +1.22%] index_select random : Elapsed 0.828 ms (82.774 ms / 100) 76.720 -> 76.619 ( -0.13%) [ +0.00% +0.77% +1.10% / -0.13% +1.49% +2.24%] index_select random_sorted : Elapsed 0.767 ms (76.720 ms / 100) out_shape = [150, 15, 50] in_shape = [250, 15, 50] idx_dim = 0 B = [150, 15, 50] (stride (750, 50, 1)) A = [250, 15, 50] (stride (50, 12500, 1)) dim = 0 8.736 -> 8.739 ( +0.03%) [ +0.01% +0.06% +0.00% / +0.09% +0.27% +0.03%] index_select const : Elapsed 0.087 ms (8.737 ms / 100) 9.224 -> 9.235 ( +0.12%) [ +0.04% +0.00% +0.17% / +0.21% +0.12% +0.15%] index_select wrap : Elapsed 0.092 ms (9.228 ms / 100) 9.240 -> 9.235 ( -0.05%) [ +0.03% +0.00% +0.06% / +0.14% -0.05% +0.13%] index_select linear : Elapsed 0.092 ms (9.243 ms / 100) 9.225 -> 9.244 ( +0.21%) [ +0.12% +0.07% +0.00% / +0.27% +0.21% +0.37%] index_select reverse : Elapsed 0.092 ms (9.236 ms / 100) 8.742 -> 8.755 ( +0.15%) [ +0.00% +0.17% +0.14% / +0.18% +0.15% +0.27%] index_select skip64 : Elapsed 0.087 ms (8.742 ms / 100) 8.738 -> 8.749 ( +0.13%) [ +0.00% +0.01% +0.05% / +0.21% +0.27% +0.13%] index_select skip256 : Elapsed 0.087 ms (8.738 ms / 100) 9.278 -> 9.310 ( +0.34%) [ +0.09% +0.00% +0.13% / +0.40% +0.34% +0.38%] index_select spread : Elapsed 0.093 ms (9.286 ms / 100) 9.317 -> 9.324 ( +0.08%) [ +0.00% +0.02% +0.12% / +0.08% +0.21% +0.16%] index_select strided 3 : Elapsed 0.093 ms (9.317 ms / 100) 9.150 -> 9.156 ( +0.07%) [ +0.02% +0.07% +0.00% / +0.35% +0.07% +0.15%] index_select strided 5 : Elapsed 0.092 ms (9.152 ms / 100) 9.340 -> 9.324 ( -0.17%) [ +0.01% +0.06% +0.00% / -0.04% -0.17% -0.04%] index_select strided 7 : Elapsed 0.093 ms (9.341 ms / 100) 9.302 -> 9.325 ( +0.25%) [ +0.00% +0.24% +0.14% / +0.25% +0.34% +0.32%] index_select strided 8 : Elapsed 0.093 ms (9.302 ms / 100) 9.310 -> 9.300 ( -0.11%) [ +0.12% +0.10% +0.00% / +0.14% +0.20% -0.11%] index_select strided 16 : Elapsed 0.093 ms (9.321 ms / 100) 9.312 -> 9.308 ( -0.04%) [ +0.01% +0.15% +0.00% / -0.01% +0.20% -0.04%] index_select strided 64 : Elapsed 0.093 ms (9.313 ms / 100) 8.753 -> 8.788 ( +0.40%) [ +0.01% +0.00% +0.09% / +0.40% +0.51% +0.56%] index_select strided 100 : Elapsed 0.088 ms (8.754 ms / 100) 9.212 -> 9.220 ( +0.09%) [ +0.11% +0.18% +0.00% / +0.09% +0.34% +0.45%] index_select random : Elapsed 0.092 ms (9.222 ms / 100) 9.161 -> 9.130 ( -0.34%) [ +0.17% +0.00% +0.10% / +0.12% -0.34% -0.01%] index_select random_sorted : Elapsed 0.092 ms (9.177 ms / 100) 9.293 -> 9.302 ( +0.10%) [ +0.10% +0.00% +0.10% / +0.10% +0.31% +0.33%] index_select perm : Elapsed 0.093 ms (9.302 ms / 100) 9.270 -> 9.287 ( +0.18%) [ +0.00% +0.09% +0.05% / +0.18% +0.26% +0.23%] index_select perm_sorted : Elapsed 0.093 ms (9.270 ms / 100) out_shape = [250, 150, 50] in_shape = [250, 15, 50] idx_dim = 1 B = [250, 150, 50] (stride (7500, 1, 150)) A = [250, 15, 50] (stride (50, 12500, 1)) dim = 1 18.092 -> 17.779 ( -1.73%) [ +0.06% +0.00% +0.01% / +0.17% -1.73% -1.40%] index_add_ linear : Elapsed 0.181 ms (18.102 ms / 100) 13.491 -> 13.513 ( +0.16%) [ +0.06% +0.00% +0.12% / +0.16% +0.64% +0.59%] index_copy_ linear : Elapsed 0.135 ms (13.499 ms / 100) 17.797 -> 17.806 ( +0.05%) [ +0.00% +0.06% +0.21% / +0.05% +3.65% +3.56%] index_add_ reverse : Elapsed 0.178 ms (17.797 ms / 100) 13.398 -> 13.438 ( +0.30%) [ +0.13% +0.00% +0.10% / +0.30% +3.10% +3.08%] index_copy_ reverse : Elapsed 0.134 ms (13.416 ms / 100) 25.903 -> 25.909 ( +0.02%) [ +0.00% +0.22% +0.16% / +0.02% +1.95% +2.02%] index_add_ spread : Elapsed 0.259 ms (25.903 ms / 100) 17.425 -> 17.459 ( +0.20%) [ +0.04% +0.00% +0.17% / +0.20% +0.60% +0.70%] index_copy_ spread : Elapsed 0.174 ms (17.432 ms / 100) 24.841 -> 24.916 ( +0.30%) [ +0.00% +0.02% +0.16% / +0.30% +2.05% +2.10%] index_add_ strided 7 : Elapsed 0.248 ms (24.841 ms / 100) 16.918 -> 16.949 ( +0.18%) [ +0.01% +0.00% +0.05% / +0.18% +0.53% +0.56%] index_copy_ strided 7 : Elapsed 0.169 ms (16.919 ms / 100) 27.343 -> 27.318 ( -0.09%) [ +0.00% +0.07% +0.07% / +0.15% +0.05% -0.09%] index_add_ perm : Elapsed 0.273 ms (27.343 ms / 100) 18.132 -> 18.143 ( +0.06%) [ +0.00% +0.06% +0.17% / +0.06% +0.07% +0.06%] index_copy_ perm : Elapsed 0.181 ms (18.132 ms / 100) 24.501 -> 23.906 ( -2.43%) [ +0.19% +0.00% +0.18% / +0.16% -2.11% -2.43%] index_add_ perm_sorted : Elapsed 0.245 ms (24.547 ms / 100) 16.582 -> 16.346 ( -1.42%) [ +0.10% +0.22% +0.00% / +0.22% -1.28% -1.42%] index_copy_ perm_sorted : Elapsed 0.166 ms (16.599 ms / 100) BEST 121.918 -> 17.146 (-85.94%) [ +0.27% +0.30% +0.00% / -85.94% -85.87% -85.88%] index_select const : Elapsed 1.223 ms (122.252 ms / 100) BEST 127.908 -> 23.408 (-81.70%) [ +0.31% +0.24% +0.00% / -81.70% -81.62% -81.61%] index_select wrap : Elapsed 1.283 ms (128.307 ms / 100) BEST 122.525 -> 19.725 (-83.90%) [ +0.20% +0.00% +0.22% / -83.30% -83.88% -83.90%] index_select linear : Elapsed 1.228 ms (122.767 ms / 100) BEST 123.736 -> 19.255 (-84.44%) [ +0.14% +0.09% +0.00% / -84.44% -84.00% -84.02%] index_select reverse : Elapsed 1.239 ms (123.909 ms / 100) BEST 121.569 -> 19.167 (-84.23%) [ +0.11% +0.46% +0.00% / -84.23% -83.88% -83.88%] index_select skip64 : Elapsed 1.217 ms (121.705 ms / 100) BEST 121.989 -> 18.740 (-84.64%) [ +0.32% +0.12% +0.00% / -84.44% -84.64% -84.64%] index_select skip256 : Elapsed 1.224 ms (122.381 ms / 100) BEST 124.739 -> 19.067 (-84.71%) [ +0.35% +0.08% +0.00% / -84.71% -84.18% -84.22%] index_select spread : Elapsed 1.252 ms (125.174 ms / 100) BEST 127.408 -> 18.246 (-85.68%) [ +0.04% +0.00% +0.14% / -85.68% -85.45% -85.45%] index_select strided 3 : Elapsed 1.275 ms (127.457 ms / 100) BEST 126.034 -> 18.730 (-85.14%) [ +0.26% +0.13% +0.00% / -85.14% -84.45% -84.47%] index_select strided 5 : Elapsed 1.264 ms (126.357 ms / 100) BEST 129.328 -> 20.892 (-83.85%) [ +0.24% +0.00% +0.14% / -83.81% -83.85% -83.83%] index_select strided 7 : Elapsed 1.296 ms (129.642 ms / 100) BEST 128.914 -> 21.514 (-83.31%) [ +0.29% +0.26% +0.00% / -83.02% -83.30% -83.31%] index_select strided 8 : Elapsed 1.293 ms (129.293 ms / 100) BEST 128.520 -> 20.481 (-84.06%) [ +0.27% +0.22% +0.00% / -84.06% -83.75% -83.74%] index_select random : Elapsed 1.289 ms (128.870 ms / 100) BEST 124.478 -> 19.235 (-84.55%) [ +0.47% +0.34% +0.00% / -84.12% -84.54% -84.55%] index_select random_sorted : Elapsed 1.251 ms (125.060 ms / 100) B = [250, 150, 50] (stride (7500, 1, 150)) A = [250, 15, 50] (stride (1, 250, 3750)) dim = 1 26.814 -> 26.811 ( -0.01%) [ +0.04% +0.00% +0.12% / -0.01% +0.66% +0.76%] index_add_ linear : Elapsed 0.268 ms (26.824 ms / 100) 19.334 -> 19.310 ( -0.12%) [ +0.00% +0.00% +0.34% / +0.09% +0.01% -0.12%] index_copy_ linear : Elapsed 0.193 ms (19.334 ms / 100) 26.752 -> 26.795 ( +0.16%) [ +0.13% +0.00% +0.12% / +0.16% +0.54% +0.52%] index_add_ reverse : Elapsed 0.268 ms (26.786 ms / 100) 19.226 -> 19.250 ( +0.12%) [ +0.00% +0.19% +0.21% / +0.12% +1.10% +0.97%] index_copy_ reverse : Elapsed 0.192 ms (19.226 ms / 100) 30.765 -> 30.668 ( -0.32%) [ +0.00% +0.01% +0.00% / -0.01% -0.32% -0.24%] index_add_ spread : Elapsed 0.308 ms (30.765 ms / 100) 22.187 -> 22.177 ( -0.05%) [ +0.00% +0.03% +0.09% / +0.08% -0.05% +0.14%] index_copy_ spread : Elapsed 0.222 ms (22.187 ms / 100) 30.242 -> 30.032 ( -0.69%) [ +0.01% +0.00% +0.12% / +0.06% -0.69% -0.63%] index_add_ strided 7 : Elapsed 0.302 ms (30.246 ms / 100) 21.875 -> 21.719 ( -0.71%) [ +0.07% +0.12% +0.00% / +0.16% -0.71% -0.68%] index_copy_ strided 7 : Elapsed 0.219 ms (21.890 ms / 100) 31.610 -> 31.495 ( -0.36%) [ +0.00% +0.09% +0.04% / +0.13% -0.35% -0.36%] index_add_ perm : Elapsed 0.316 ms (31.610 ms / 100) 22.768 -> 22.717 ( -0.22%) [ +0.00% +0.08% +0.12% / +0.13% -0.17% -0.22%] index_copy_ perm : Elapsed 0.228 ms (22.768 ms / 100) 29.782 -> 29.461 ( -1.08%) [ +0.03% +0.00% +0.03% / +0.02% -1.08% -0.99%] index_add_ perm_sorted : Elapsed 0.298 ms (29.792 ms / 100) 21.583 -> 21.332 ( -1.16%) [ +0.02% +0.03% +0.00% / +0.19% -1.16% -1.16%] index_copy_ perm_sorted : Elapsed 0.216 ms (21.588 ms / 100) BEST 120.813 -> 18.765 (-84.47%) [ +0.02% +0.00% +0.11% / -84.13% -84.47% -84.45%] index_select const : Elapsed 1.208 ms (120.832 ms / 100) BEST 154.625 -> 25.208 (-83.70%) [ +0.00% +0.19% +0.00% / -83.70% -83.63% -83.62%] index_select wrap : Elapsed 1.546 ms (154.625 ms / 100) BEST 122.650 -> 19.602 (-84.02%) [ +0.33% +0.00% +0.08% / -83.78% -84.02% -84.01%] index_select linear : Elapsed 1.230 ms (123.049 ms / 100) BEST 128.429 -> 19.618 (-84.72%) [ +0.41% +0.00% +0.00% / -84.64% -84.71% -84.72%] index_select reverse : Elapsed 1.290 ms (128.954 ms / 100) BEST 120.743 -> 18.551 (-84.64%) [ +0.17% +0.00% +0.07% / -84.20% -84.64% -84.63%] index_select skip64 : Elapsed 1.210 ms (120.953 ms / 100) BEST 120.773 -> 17.250 (-85.72%) [ +0.66% +0.42% +0.00% / -85.69% -85.71% -85.72%] index_select skip256 : Elapsed 1.216 ms (121.575 ms / 100) BEST 124.864 -> 19.843 (-84.11%) [ +0.13% +0.00% +0.11% / -84.11% -83.45% -83.46%] index_select spread : Elapsed 1.250 ms (125.023 ms / 100) BEST 143.097 -> 19.121 (-86.64%) [ +0.02% +0.00% +0.09% / -86.33% -86.63% -86.64%] index_select strided 3 : Elapsed 1.431 ms (143.130 ms / 100) BEST 127.252 -> 19.793 (-84.45%) [ +0.00% +0.27% +0.20% / -84.45% -83.99% -84.00%] index_select strided 5 : Elapsed 1.273 ms (127.252 ms / 100) BEST 158.976 -> 21.001 (-86.79%) [ +0.67% +0.97% +0.00% / -86.36% -86.79% -86.77%] index_select strided 7 : Elapsed 1.600 ms (160.041 ms / 100) BEST 159.208 -> 22.383 (-85.94%) [ +0.15% +0.24% +0.00% / -85.94% -85.80% -85.82%] index_select strided 8 : Elapsed 1.595 ms (159.452 ms / 100) BEST 146.372 -> 19.970 (-86.36%) [ +0.40% +0.05% +0.00% / -86.09% -86.34% -86.36%] index_select random : Elapsed 1.470 ms (146.955 ms / 100) BEST 124.510 -> 20.099 (-83.86%) [ +0.00% +0.09% +0.25% / -83.86% -83.74% -83.74%] index_select random_sorted : Elapsed 1.245 ms (124.510 ms / 100) B = [250, 150, 50] (stride (50, 12500, 1)) dim = 1 fill_cnt = 15 2.210 -> 2.208 ( -0.09%) [ +0.05% +0.32% +0.00% / +0.45% +0.00% -0.09%] index_fill_ const : Elapsed 0.022 ms (2.211 ms / 100) 2.232 -> 2.231 ( -0.04%) [ +0.00% +0.18% +0.09% / +0.54% +0.04% -0.04%] index_fill_ linear : Elapsed 0.022 ms (2.232 ms / 100) 2.249 -> 2.254 ( +0.22%) [ +0.09% +0.09% +0.00% / +0.58% +0.44% +0.22%] index_fill_ reverse : Elapsed 0.023 ms (2.251 ms / 100) 2.208 -> 2.205 ( -0.14%) [ +0.23% +0.14% +0.00% / +0.59% -0.05% -0.14%] index_fill_ skip64 : Elapsed 0.022 ms (2.213 ms / 100) 2.207 -> 2.207 ( +0.00%) [ +0.18% +0.09% +0.00% / +0.54% +0.00% +0.00%] index_fill_ skip256 : Elapsed 0.022 ms (2.211 ms / 100) 2.241 -> 2.233 ( -0.36%) [ +0.00% +0.00% +0.04% / +0.36% -0.36% -0.22%] index_fill_ spread : Elapsed 0.022 ms (2.241 ms / 100) 2.232 -> 2.230 ( -0.09%) [ +0.00% +0.27% +0.09% / +0.40% +0.09% -0.09%] index_fill_ strided 3 : Elapsed 0.022 ms (2.232 ms / 100) 2.227 -> 2.239 ( +0.54%) [ +0.00% +0.13% +0.04% / +0.58% +0.54% +0.67%] index_fill_ strided 5 : Elapsed 0.022 ms (2.227 ms / 100) 2.229 -> 2.241 ( +0.54%) [ +0.22% +0.00% +0.27% / +0.54% +0.72% +0.76%] index_fill_ strided 7 : Elapsed 0.022 ms (2.234 ms / 100) 2.256 -> 2.266 ( +0.44%) [ +0.00% +0.09% +0.09% / +0.49% +0.44% +0.53%] index_fill_ strided 8 : Elapsed 0.023 ms (2.256 ms / 100) 2.233 -> 2.238 ( +0.22%) [ +0.00% +0.04% +0.27% / +0.22% +0.76% +0.72%] index_fill_ strided 16 : Elapsed 0.022 ms (2.233 ms / 100) 2.230 -> 2.236 ( +0.27%) [ +0.00% +0.00% +0.04% / +0.27% +0.90% +0.94%] index_fill_ strided 64 : Elapsed 0.022 ms (2.230 ms / 100) 2.203 -> 2.209 ( +0.27%) [ +0.00% +0.00% +0.14% / +0.27% +0.54% +0.59%] index_fill_ strided 100 : Elapsed 0.022 ms (2.203 ms / 100) 2.229 -> 2.238 ( +0.40%) [ +0.00% +0.09% +0.36% / +0.40% +0.99% +0.81%] index_fill_ random : Elapsed 0.022 ms (2.229 ms / 100) 2.223 -> 2.230 ( +0.31%) [ +0.00% +0.13% +0.04% / +0.31% +1.08% +1.03%] index_fill_ random_sorted : Elapsed 0.022 ms (2.223 ms / 100) 2.230 -> 2.236 ( +0.27%) [ +0.09% +0.13% +0.00% / +0.27% +0.27% +0.27%] index_fill_ perm : Elapsed 0.022 ms (2.232 ms / 100) 2.233 -> 2.242 ( +0.40%) [ +0.00% +0.04% +0.09% / +0.49% +0.49% +0.40%] index_fill_ perm_sorted : Elapsed 0.022 ms (2.233 ms / 100) B = [250, 150, 50] (stride (50, 12500, 1)) A = [250, 15, 50] (stride (50, 12500, 1)) dim = 1 4.855 -> 4.867 ( +0.25%) [ +0.16% +0.16% +0.00% / +0.29% +0.25% +0.29%] index_add_ linear : Elapsed 0.049 ms (4.863 ms / 100) 4.670 -> 4.674 ( +0.09%) [ +0.09% +0.00% +0.11% / +0.28% +0.19% +0.09%] index_copy_ linear : Elapsed 0.047 ms (4.674 ms / 100) 4.853 -> 4.862 ( +0.19%) [ +0.06% +0.02% +0.00% / +0.19% +0.33% +0.19%] index_add_ reverse : Elapsed 0.049 ms (4.856 ms / 100) 4.673 -> 4.674 ( +0.02%) [ +0.00% +0.06% +0.11% / +0.02% +0.19% +0.17%] index_copy_ reverse : Elapsed 0.047 ms (4.673 ms / 100) 4.839 -> 4.857 ( +0.37%) [ +0.27% +0.19% +0.00% / +0.48% +0.52% +0.37%] index_add_ spread : Elapsed 0.049 ms (4.852 ms / 100) 4.668 -> 4.680 ( +0.26%) [ +0.11% +0.00% +0.06% / +0.26% +0.45% +0.54%] index_copy_ spread : Elapsed 0.047 ms (4.673 ms / 100) 4.860 -> 4.866 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.33% +0.12% +0.14%] index_add_ strided 7 : Elapsed 0.049 ms (4.866 ms / 100) 4.680 -> 4.685 ( +0.11%) [ +0.21% +0.00% +0.06% / +0.24% +0.21% +0.11%] index_copy_ strided 7 : Elapsed 0.047 ms (4.690 ms / 100) 4.844 -> 4.869 ( +0.52%) [ +0.10% +0.12% +0.00% / +0.52% +0.58% +0.72%] index_add_ perm : Elapsed 0.048 ms (4.849 ms / 100) 4.677 -> 4.685 ( +0.17%) [ +0.00% +0.06% +0.02% / +0.17% +0.34% +0.28%] index_copy_ perm : Elapsed 0.047 ms (4.677 ms / 100) 4.855 -> 4.862 ( +0.14%) [ +0.25% +0.37% +0.00% / +0.49% +0.14% +0.21%] index_add_ perm_sorted : Elapsed 0.049 ms (4.867 ms / 100) 4.686 -> 4.683 ( -0.06%) [ +0.02% +0.00% +0.02% / +0.17% -0.06% -0.06%] index_copy_ perm_sorted : Elapsed 0.047 ms (4.687 ms / 100) 16.392 -> 16.520 ( +0.78%) [ +0.09% +0.00% +0.27% / +0.78% +1.08% +1.06%] index_select const : Elapsed 0.164 ms (16.407 ms / 100) 18.842 -> 18.906 ( +0.34%) [ +0.00% +0.15% +0.19% / +0.34% +0.82% +0.74%] index_select wrap : Elapsed 0.188 ms (18.842 ms / 100) 16.583 -> 16.587 ( +0.02%) [ +0.15% +0.00% +0.34% / +0.52% +0.04% +0.02%] index_select linear : Elapsed 0.166 ms (16.608 ms / 100) 16.837 -> 16.883 ( +0.27%) [ +0.10% +0.00% +0.34% / +0.67% +0.27% +0.38%] index_select reverse : Elapsed 0.169 ms (16.853 ms / 100) 16.376 -> 16.464 ( +0.54%) [ +0.00% +0.04% +0.16% / +0.54% +0.86% +0.78%] index_select skip64 : Elapsed 0.164 ms (16.376 ms / 100) 16.408 -> 16.493 ( +0.52%) [ +0.00% +0.02% +0.01% / +0.52% +0.70% +0.58%] index_select skip256 : Elapsed 0.164 ms (16.408 ms / 100) 16.958 -> 17.040 ( +0.48%) [ +0.05% +0.18% +0.00% / +0.48% +0.55% +0.50%] index_select spread : Elapsed 0.170 ms (16.967 ms / 100) 17.522 -> 17.603 ( +0.46%) [ +0.15% +0.16% +0.00% / +0.66% +0.46% +0.50%] index_select strided 3 : Elapsed 0.175 ms (17.548 ms / 100) 17.142 -> 17.113 ( -0.17%) [ +0.00% +0.02% +0.38% / +0.46% -0.17% -0.17%] index_select strided 5 : Elapsed 0.171 ms (17.142 ms / 100) 19.059 -> 19.128 ( +0.36%) [ +0.00% +0.01% +0.27% / +0.36% +0.56% +0.45%] index_select strided 7 : Elapsed 0.191 ms (19.059 ms / 100) 19.034 -> 19.082 ( +0.25%) [ +0.00% +0.12% +0.21% / +0.25% +0.71% +0.73%] index_select strided 8 : Elapsed 0.190 ms (19.034 ms / 100) 18.440 -> 18.522 ( +0.44%) [ +0.23% +0.00% +0.06% / +0.44% +0.48% +0.51%] index_select random : Elapsed 0.185 ms (18.482 ms / 100) 16.906 -> 16.969 ( +0.37%) [ +0.24% +0.00% +0.12% / +0.60% +0.45% +0.37%] index_select random_sorted : Elapsed 0.169 ms (16.946 ms / 100) B = [250, 150, 50] (stride (1, 12500, 250)) A = [250, 15, 50] (stride (50, 12500, 1)) dim = 1 5.070 -> 5.056 ( -0.28%) [ +0.00% +0.26% +0.20% / +0.34% -0.28% -0.16%] index_add_ linear : Elapsed 0.051 ms (5.070 ms / 100) 4.874 -> 4.881 ( +0.14%) [ +0.00% +0.00% +0.02% / +0.18% +0.16% +0.14%] index_copy_ linear : Elapsed 0.049 ms (4.874 ms / 100) 5.055 -> 5.063 ( +0.16%) [ +0.00% +0.04% +0.04% / +0.16% +2.31% +0.44%] index_add_ reverse : Elapsed 0.051 ms (5.055 ms / 100) 4.857 -> 4.878 ( +0.43%) [ +0.00% +0.10% +0.39% / +0.43% +0.49% +0.49%] index_copy_ reverse : Elapsed 0.049 ms (4.857 ms / 100) 5.130 -> 5.124 ( -0.12%) [ +0.00% +0.21% +0.21% / +0.21% -0.12% +0.02%] index_add_ spread : Elapsed 0.051 ms (5.130 ms / 100) 4.879 -> 4.883 ( +0.08%) [ +0.00% +0.06% +0.10% / +0.33% +0.25% +0.08%] index_copy_ spread : Elapsed 0.049 ms (4.879 ms / 100) 5.098 -> 5.087 ( -0.22%) [ +0.00% +0.12% +0.06% / +0.14% -0.10% -0.22%] index_add_ strided 7 : Elapsed 0.051 ms (5.098 ms / 100) 4.874 -> 4.874 ( +0.00%) [ +0.10% +0.00% +0.35% / +0.41% +0.39% +0.00%] index_copy_ strided 7 : Elapsed 0.049 ms (4.879 ms / 100) 5.110 -> 5.109 ( -0.02%) [ +0.20% +0.31% +0.00% / +0.06% +0.22% -0.02%] index_add_ perm : Elapsed 0.051 ms (5.120 ms / 100) 4.874 -> 4.879 ( +0.10%) [ +0.06% +0.06% +0.00% / +0.16% +0.84% +0.10%] index_copy_ perm : Elapsed 0.049 ms (4.877 ms / 100) 5.053 -> 5.062 ( +0.18%) [ +0.08% +0.06% +0.00% / +0.18% +0.97% +0.79%] index_add_ perm_sorted : Elapsed 0.051 ms (5.057 ms / 100) 4.871 -> 4.886 ( +0.31%) [ +0.00% +0.10% +0.06% / +0.31% +0.66% +0.31%] index_copy_ perm_sorted : Elapsed 0.049 ms (4.871 ms / 100) Good 31.787 -> 28.273 (-11.05%) [ +2.14% +2.29% +0.00% / -0.06% -11.05% -9.89%] index_select const : Elapsed 0.325 ms (32.467 ms / 100) 44.708 -> 43.279 ( -3.20%) [ +1.81% +1.18% +0.00% / +1.11% -3.20% -0.88%] index_select wrap : Elapsed 0.455 ms (45.515 ms / 100) Good 31.988 -> 28.784 (-10.02%) [ +0.00% +3.87% +0.68% / -2.25% -10.02% -8.84%] index_select linear : Elapsed 0.320 ms (31.988 ms / 100) Good 37.297 -> 33.078 (-11.31%) [ +1.25% +0.00% +0.64% / -0.25% -10.56% -11.31%] index_select reverse : Elapsed 0.378 ms (37.762 ms / 100) good 30.516 -> 28.051 ( -8.08%) [ +0.00% +0.84% +0.10% / -1.76% -8.08% -5.84%] index_select skip64 : Elapsed 0.305 ms (30.516 ms / 100) Good 37.542 -> 33.676 (-10.30%) [ +1.93% +0.86% +0.00% / +0.14% -8.74% -10.30%] index_select skip256 : Elapsed 0.383 ms (38.268 ms / 100) 34.646 -> 34.301 ( -1.00%) [ +0.94% +1.31% +0.00% / -1.00% +6.39% +6.05%] index_select spread : Elapsed 0.350 ms (34.973 ms / 100) good 48.836 -> 45.596 ( -6.63%) [ +1.19% +0.77% +0.00% / +0.03% -6.63% -6.25%] index_select strided 3 : Elapsed 0.494 ms (49.417 ms / 100) 39.324 -> 39.190 ( -0.34%) [ +2.09% +0.00% +1.85% / -0.34% +16.12% +16.44%] index_select strided 5 : Elapsed 0.401 ms (40.145 ms / 100) 49.078 -> 48.240 ( -1.71%) [ +1.13% +0.50% +0.00% / -1.71% +1.15% +1.74%] index_select strided 7 : Elapsed 0.496 ms (49.631 ms / 100) 47.170 -> 47.281 ( +0.24%) [ +1.31% +0.00% +0.38% / +0.24% +4.13% +4.65%] index_select strided 8 : Elapsed 0.478 ms (47.788 ms / 100) 43.281 -> 43.321 ( +0.09%) [ +1.04% +0.00% +0.20% / +0.09% +2.44% +3.41%] index_select random : Elapsed 0.437 ms (43.730 ms / 100) 34.477 -> 35.532 ( +3.06%) [ +0.00% +0.66% +2.07% / +3.06% +5.27% +5.33%] index_select random_sorted : Elapsed 0.345 ms (34.477 ms / 100) B = [250, 150, 50] (stride (150, 1, 37500)) A = [250, 15, 50] (stride (15, 1, 3750)) dim = 1 38.451 -> 38.459 ( +0.02%) [ +0.07% +0.03% +0.00% / +0.02% +0.60% +0.53%] index_add_ linear : Elapsed 0.385 ms (38.476 ms / 100) 28.107 -> 28.104 ( -0.01%) [ +0.13% +0.00% +0.30% / -0.01% +0.24% +0.31%] index_copy_ linear : Elapsed 0.281 ms (28.143 ms / 100) 38.549 -> 38.590 ( +0.11%) [ +0.04% +0.00% +0.08% / +0.11% +0.43% +0.56%] index_add_ reverse : Elapsed 0.386 ms (38.563 ms / 100) 28.126 -> 28.176 ( +0.18%) [ +0.06% +0.00% +0.06% / +0.18% +0.54% +0.61%] index_copy_ reverse : Elapsed 0.281 ms (28.144 ms / 100) 42.040 -> 41.855 ( -0.44%) [ +0.02% +0.00% +0.00% / -0.06% -0.44% -0.38%] index_add_ spread : Elapsed 0.420 ms (42.048 ms / 100) 30.872 -> 30.834 ( -0.12%) [ +0.12% +0.00% +0.14% / +0.06% -0.12% -0.04%] index_copy_ spread : Elapsed 0.309 ms (30.908 ms / 100) 41.635 -> 41.281 ( -0.85%) [ +0.01% +0.01% +0.00% / +0.00% -0.83% -0.85%] index_add_ strided 7 : Elapsed 0.416 ms (41.638 ms / 100) 30.582 -> 30.354 ( -0.75%) [ +0.09% +0.00% +0.11% / +0.10% -0.70% -0.75%] index_copy_ strided 7 : Elapsed 0.306 ms (30.609 ms / 100) 42.553 -> 42.557 ( +0.01%) [ +0.00% +0.00% +0.03% / +0.04% +0.03% +0.01%] index_add_ perm : Elapsed 0.426 ms (42.553 ms / 100) 31.280 -> 31.286 ( +0.02%) [ +0.03% +0.00% +0.08% / +0.02% +0.26% +0.18%] index_copy_ perm : Elapsed 0.313 ms (31.290 ms / 100) 41.148 -> 41.121 ( -0.07%) [ +0.00% +0.05% +0.05% / +0.04% +0.00% -0.07%] index_add_ perm_sorted : Elapsed 0.411 ms (41.148 ms / 100) 30.293 -> 30.242 ( -0.17%) [ +0.09% +0.00% +0.10% / +0.10% -0.10% -0.17%] index_copy_ perm_sorted : Elapsed 0.303 ms (30.320 ms / 100) BEST 175.882 -> 19.731 (-88.78%) [ +0.00% +0.19% +0.02% / -88.55% -88.78% -88.73%] index_select const : Elapsed 1.759 ms (175.882 ms / 100) BEST 183.238 -> 21.003 (-88.54%) [ +0.01% +0.04% +0.00% / -88.51% -88.54% -88.54%] index_select wrap : Elapsed 1.833 ms (183.252 ms / 100) BEST 176.185 -> 20.453 (-88.39%) [ +0.00% +0.17% +0.07% / -87.76% -88.38% -88.39%] index_select linear : Elapsed 1.762 ms (176.185 ms / 100) BEST 176.647 -> 20.613 (-88.33%) [ +0.53% +0.08% +0.00% / -88.26% -88.33% -88.32%] index_select reverse : Elapsed 1.776 ms (177.589 ms / 100) BEST 175.711 -> 20.435 (-88.37%) [ +0.05% +0.00% +0.02% / -88.37% -88.37% -88.36%] index_select skip64 : Elapsed 1.758 ms (175.792 ms / 100) BEST 175.489 -> 20.025 (-88.59%) [ +0.19% +0.00% +0.22% / -88.32% -88.59% -88.55%] index_select skip256 : Elapsed 1.758 ms (175.822 ms / 100) BEST 176.666 -> 20.151 (-88.59%) [ +0.00% +0.00% +0.01% / -88.56% -88.58% -88.59%] index_select spread : Elapsed 1.767 ms (176.669 ms / 100) BEST 186.635 -> 20.109 (-89.23%) [ +0.18% +0.00% +0.21% / -88.94% -89.23% -89.21%] index_select strided 3 : Elapsed 1.870 ms (186.973 ms / 100) BEST 185.438 -> 20.239 (-89.09%) [ +0.29% +0.24% +0.00% / -89.00% -89.08% -89.09%] index_select strided 5 : Elapsed 1.860 ms (185.982 ms / 100) BEST 186.627 -> 20.233 (-89.16%) [ +0.00% +0.22% +0.17% / -88.71% -89.16% -89.12%] index_select strided 7 : Elapsed 1.866 ms (186.627 ms / 100) BEST 186.572 -> 20.048 (-89.25%) [ +0.19% +0.15% +0.00% / -88.98% -89.25% -89.23%] index_select strided 8 : Elapsed 1.869 ms (186.920 ms / 100) BEST 185.793 -> 20.084 (-89.19%) [ +0.00% +0.16% +0.15% / -88.97% -89.19% -89.17%] index_select random : Elapsed 1.858 ms (185.793 ms / 100) BEST 176.430 -> 19.979 (-88.68%) [ +0.10% +0.19% +0.00% / -88.40% -88.68% -88.68%] index_select random_sorted : Elapsed 1.766 ms (176.602 ms / 100) out_shape = [250, 15, 150] in_shape = [250, 15, 50] idx_dim = 2 B = [250, 15, 150] (stride (2250, 150, 1)) A = [250, 15, 50] (stride (15, 1, 3750)) dim = 2 Good 7.531 -> 6.508 (-13.58%) [ +0.00% +0.31% +0.24% / -13.58% -13.31% -13.29%] index_add_ linear : Elapsed 0.075 ms (7.531 ms / 100) 6.176 -> 6.407 ( +3.74%) [ +0.00% +0.21% +0.24% / +3.74% +3.89% +3.85%] index_copy_ linear : Elapsed 0.062 ms (6.176 ms / 100) Good 7.499 -> 6.512 (-13.16%) [ +0.52% +0.00% +0.12% / -13.16% -12.86% -12.92%] index_add_ reverse : Elapsed 0.075 ms (7.538 ms / 100) 6.154 -> 6.387 ( +3.79%) [ +0.26% +0.16% +0.00% / +3.79% +4.29% +4.35%] index_copy_ reverse : Elapsed 0.062 ms (6.170 ms / 100) GOOD 9.847 -> 7.467 (-24.17%) [ +0.00% +0.24% +0.20% / -24.17% -23.82% -23.89%] index_add_ spread : Elapsed 0.098 ms (9.847 ms / 100) good 8.283 -> 7.611 ( -8.11%) [ +0.07% +0.00% +0.13% / -8.11% -7.73% -7.57%] index_copy_ spread : Elapsed 0.083 ms (8.289 ms / 100) GOOD 15.186 -> 7.621 (-49.82%) [ +0.43% +0.82% +0.00% / -49.82% -49.59% -49.59%] index_add_ strided 7 : Elapsed 0.153 ms (15.251 ms / 100) GOOD 12.782 -> 7.650 (-40.15%) [ +0.00% +0.10% +0.21% / -40.15% -39.76% -39.70%] index_copy_ strided 7 : Elapsed 0.128 ms (12.782 ms / 100) BEST 15.662 -> 7.704 (-50.81%) [ +0.14% +0.07% +0.00% / -50.81% -50.61% -50.63%] index_add_ perm : Elapsed 0.157 ms (15.684 ms / 100) GOOD 13.309 -> 7.574 (-43.09%) [ +0.00% +0.24% +0.09% / -43.09% -42.75% -42.77%] index_copy_ perm : Elapsed 0.133 ms (13.309 ms / 100) GOOD 9.817 -> 7.440 (-24.21%) [ +0.73% +0.41% +0.00% / -24.21% -23.97% -24.06%] index_add_ perm_sorted : Elapsed 0.099 ms (9.889 ms / 100) good 8.315 -> 7.547 ( -9.24%) [ +0.05% +0.00% +0.12% / -9.24% -8.79% -8.79%] index_copy_ perm_sorted : Elapsed 0.083 ms (8.319 ms / 100) GOOD 9.281 -> 7.380 (-20.48%) [ +0.02% +0.00% +0.00% / -20.48% -19.61% -19.48%] index_select const : Elapsed 0.093 ms (9.283 ms / 100) bad 9.478 -> 10.271 ( +8.37%) [ +0.06% +0.00% +0.02% / +8.93% +8.39% +8.37%] index_select wrap : Elapsed 0.095 ms (9.484 ms / 100) good 9.315 -> 8.546 ( -8.26%) [ +0.15% +0.00% +0.11% / -7.71% -8.26% -8.19%] index_select linear : Elapsed 0.093 ms (9.329 ms / 100) Good 9.423 -> 8.296 (-11.96%) [ +0.08% +0.00% +0.03% / -11.96% -10.48% -10.45%] index_select reverse : Elapsed 0.094 ms (9.431 ms / 100) Good 9.326 -> 7.680 (-17.65%) [ +0.01% +0.01% +0.00% / -17.65% -17.45% -17.54%] index_select skip64 : Elapsed 0.093 ms (9.327 ms / 100) Good 9.281 -> 7.544 (-18.72%) [ +0.16% +0.05% +0.00% / -18.64% -18.72% -18.69%] index_select skip256 : Elapsed 0.093 ms (9.296 ms / 100) good 9.295 -> 8.420 ( -9.41%) [ +0.01% +0.00% +0.10% / -9.41% -8.59% -8.54%] index_select spread : Elapsed 0.093 ms (9.296 ms / 100) 9.597 -> 9.343 ( -2.65%) [ +0.06% +0.00% +0.01% / -2.65% -1.94% -1.97%] index_select strided 3 : Elapsed 0.096 ms (9.603 ms / 100) Good 9.372 -> 8.361 (-10.79%) [ +0.19% +0.00% +0.07% / -10.79% -10.42% -10.56%] index_select strided 5 : Elapsed 0.094 ms (9.390 ms / 100) 9.545 -> 9.337 ( -2.18%) [ +0.10% +0.05% +0.00% / -2.18% -1.30% -1.33%] index_select strided 7 : Elapsed 0.096 ms (9.555 ms / 100) 9.478 -> 9.162 ( -3.33%) [ +0.04% +0.00% +0.00% / -2.45% -3.01% -3.33%] index_select strided 8 : Elapsed 0.095 ms (9.482 ms / 100) good 9.462 -> 8.971 ( -5.19%) [ +0.00% +0.01% +0.02% / -5.19% -3.03% -3.05%] index_select strided 16 : Elapsed 0.095 ms (9.462 ms / 100) 9.493 -> 9.142 ( -3.70%) [ +0.00% +0.00% +0.02% / -2.89% -3.55% -3.70%] index_select random : Elapsed 0.095 ms (9.493 ms / 100) Good 9.308 -> 8.112 (-12.85%) [ +0.08% +0.00% +0.17% / -12.85% -11.77% -11.79%] index_select random_sorted : Elapsed 0.093 ms (9.315 ms / 100) B = [250, 15, 150] (stride (2250, 1, 15)) A = [250, 15, 50] (stride (1, 250, 3750)) dim = 2 7.467 -> 7.317 ( -2.01%) [ +0.21% +0.00% +0.15% / +0.20% -1.55% -2.01%] index_add_ linear : Elapsed 0.075 ms (7.483 ms / 100) 6.938 -> 6.778 ( -2.31%) [ +0.82% +0.00% +0.58% / +0.19% -1.86% -2.31%] index_copy_ linear : Elapsed 0.070 ms (6.995 ms / 100) 7.438 -> 7.354 ( -1.13%) [ +0.69% +0.04% +0.00% / +0.59% -1.10% -1.13%] index_add_ reverse : Elapsed 0.075 ms (7.489 ms / 100) 6.894 -> 6.849 ( -0.65%) [ +0.57% +0.00% +0.33% / +0.01% -0.20% -0.65%] index_copy_ reverse : Elapsed 0.069 ms (6.933 ms / 100) 8.434 -> 8.426 ( -0.09%) [ +0.00% +0.41% +0.14% / +0.09% +0.21% -0.09%] index_add_ spread : Elapsed 0.084 ms (8.434 ms / 100) 8.109 -> 8.075 ( -0.42%) [ +0.00% +0.04% +0.01% / -0.42% +0.35% +0.35%] index_copy_ spread : Elapsed 0.081 ms (8.109 ms / 100) 8.784 -> 8.727 ( -0.65%) [ +0.79% +0.00% +0.68% / +0.16% -0.28% -0.65%] index_add_ strided 7 : Elapsed 0.089 ms (8.853 ms / 100) 8.401 -> 8.338 ( -0.75%) [ +0.20% +0.00% +0.01% / -0.75% -0.56% -0.57%] index_copy_ strided 7 : Elapsed 0.084 ms (8.418 ms / 100) 8.712 -> 8.548 ( -1.88%) [ +0.31% +0.46% +0.00% / -0.13% -1.81% -1.88%] index_add_ perm : Elapsed 0.087 ms (8.739 ms / 100) 8.222 -> 8.136 ( -1.05%) [ +0.17% +0.39% +0.00% / -0.35% -1.05% -0.96%] index_copy_ perm : Elapsed 0.082 ms (8.236 ms / 100) 8.168 -> 8.117 ( -0.62%) [ +0.09% +0.05% +0.00% / +0.16% -0.62% -0.60%] index_add_ perm_sorted : Elapsed 0.082 ms (8.175 ms / 100) 7.806 -> 7.789 ( -0.22%) [ +0.33% +0.00% +0.05% / -0.01% -0.12% -0.22%] index_copy_ perm_sorted : Elapsed 0.078 ms (7.832 ms / 100) 9.749 -> 9.575 ( -1.78%) [ +0.21% +0.80% +0.00% / +0.53% -1.78% -1.74%] index_select const : Elapsed 0.098 ms (9.769 ms / 100) 12.327 -> 12.253 ( -0.60%) [ +0.05% +0.11% +0.00% / -0.51% -0.60% -0.06%] index_select wrap : Elapsed 0.123 ms (12.333 ms / 100) 10.000 -> 10.057 ( +0.57%) [ +0.76% +0.00% +0.52% / +0.77% +0.57% +1.20%] index_select linear : Elapsed 0.101 ms (10.076 ms / 100) 11.295 -> 11.349 ( +0.48%) [ +0.35% +0.00% +0.27% / +0.48% +0.50% +1.00%] index_select reverse : Elapsed 0.113 ms (11.334 ms / 100) 9.687 -> 9.559 ( -1.32%) [ +0.73% +1.03% +0.00% / +0.83% -1.32% -0.93%] index_select skip64 : Elapsed 0.098 ms (9.758 ms / 100) 9.723 -> 9.498 ( -2.31%) [ +0.01% +0.24% +0.00% / -0.07% -2.31% -1.27%] index_select skip256 : Elapsed 0.097 ms (9.724 ms / 100) 9.938 -> 9.948 ( +0.10%) [ +0.00% +0.24% +0.22% / +0.10% +0.83% +0.75%] index_select spread : Elapsed 0.099 ms (9.938 ms / 100) 12.220 -> 12.286 ( +0.54%) [ +0.38% +0.00% +0.65% / +0.54% +0.90% +1.05%] index_select strided 3 : Elapsed 0.123 ms (12.266 ms / 100) 10.504 -> 10.446 ( -0.55%) [ +0.00% +0.10% +0.09% / -0.55% +0.58% +0.42%] index_select strided 5 : Elapsed 0.105 ms (10.504 ms / 100) 12.217 -> 12.077 ( -1.15%) [ +0.68% +0.00% +0.01% / -0.20% -1.08% -1.15%] index_select strided 7 : Elapsed 0.123 ms (12.300 ms / 100) 11.706 -> 11.526 ( -1.54%) [ +0.00% +0.08% +0.09% / +0.06% -1.43% -1.54%] index_select strided 8 : Elapsed 0.117 ms (11.706 ms / 100) 11.511 -> 11.521 ( +0.09%) [ +0.00% +0.13% +0.18% / +0.33% +0.09% +0.80%] index_select strided 16 : Elapsed 0.115 ms (11.511 ms / 100) 11.458 -> 11.498 ( +0.35%) [ +0.38% +0.00% +0.22% / +0.35% +0.80% +1.03%] index_select random : Elapsed 0.115 ms (11.502 ms / 100) 9.996 -> 9.979 ( -0.17%) [ +0.86% +0.00% +0.37% / -0.10% -0.17% +0.80%] index_select random_sorted : Elapsed 0.101 ms (10.082 ms / 100) out_shape = [150, 50, 15] in_shape = [250, 50, 15] idx_dim = 0 B = [150, 50, 15] (stride (750, 15, 1)) A = [250, 50, 15] (stride (750, 15, 1)) dim = 0 4.863 -> 4.873 ( +0.21%) [ +0.00% +0.21% +0.21% / +0.21% +0.58% +0.76%] index_select const : Elapsed 0.049 ms (4.863 ms / 100) 5.068 -> 5.070 ( +0.04%) [ +0.00% +0.12% +0.10% / +0.04% +0.24% +0.04%] index_select wrap : Elapsed 0.051 ms (5.068 ms / 100) 5.071 -> 5.070 ( -0.02%) [ +0.12% +0.00% +0.24% / +0.02% +0.22% -0.02%] index_select linear : Elapsed 0.051 ms (5.077 ms / 100) 5.099 -> 5.100 ( +0.02%) [ +0.00% +0.24% +0.12% / +0.08% +0.18% +0.02%] index_select reverse : Elapsed 0.051 ms (5.099 ms / 100) 4.867 -> 4.872 ( +0.10%) [ +0.23% +0.00% +0.27% / +0.16% +0.12% +0.10%] index_select skip64 : Elapsed 0.049 ms (4.878 ms / 100) 4.858 -> 4.867 ( +0.19%) [ +0.27% +0.00% +0.27% / +0.19% +0.47% +0.37%] index_select skip256 : Elapsed 0.049 ms (4.871 ms / 100) 5.097 -> 5.109 ( +0.24%) [ +0.00% +0.10% +0.14% / +0.24% +0.67% +0.59%] index_select spread : Elapsed 0.051 ms (5.097 ms / 100) 5.092 -> 5.104 ( +0.24%) [ +0.35% +0.00% +0.33% / +0.24% +0.45% +0.57%] index_select strided 3 : Elapsed 0.051 ms (5.110 ms / 100) 4.979 -> 4.974 ( -0.10%) [ +0.18% +0.04% +0.00% / +0.08% -0.10% +0.20%] index_select strided 5 : Elapsed 0.050 ms (4.988 ms / 100) 5.134 -> 5.105 ( -0.56%) [ +0.00% +0.23% +0.16% / +0.18% -0.37% -0.56%] index_select strided 7 : Elapsed 0.051 ms (5.134 ms / 100) 5.077 -> 5.090 ( +0.26%) [ +0.37% +0.00% +0.45% / +0.51% +0.26% +0.28%] index_select strided 8 : Elapsed 0.051 ms (5.096 ms / 100) 5.080 -> 5.067 ( -0.26%) [ +0.10% +0.00% +0.06% / -0.26% +0.35% +0.06%] index_select strided 16 : Elapsed 0.051 ms (5.085 ms / 100) 5.106 -> 5.103 ( -0.06%) [ +0.20% +0.04% +0.00% / -0.06% +0.14% +0.06%] index_select strided 64 : Elapsed 0.051 ms (5.116 ms / 100) 4.864 -> 4.868 ( +0.08%) [ +0.41% +0.21% +0.00% / +0.31% +0.08% +0.14%] index_select strided 100 : Elapsed 0.049 ms (4.884 ms / 100) 5.084 -> 5.059 ( -0.49%) [ +0.00% +0.00% +0.08% / -0.04% -0.49% -0.39%] index_select random : Elapsed 0.051 ms (5.084 ms / 100) 5.051 -> 5.042 ( -0.18%) [ +0.20% +0.00% +0.22% / +0.02% -0.12% -0.18%] index_select random_sorted : Elapsed 0.051 ms (5.061 ms / 100) 5.110 -> 5.112 ( +0.04%) [ +0.27% +0.00% +0.29% / +0.04% +0.27% +0.22%] index_select perm : Elapsed 0.051 ms (5.124 ms / 100) 5.117 -> 5.086 ( -0.61%) [ +0.27% +0.00% +0.31% / +0.27% -0.43% -0.61%] index_select perm_sorted : Elapsed 0.051 ms (5.131 ms / 100) B = [150, 50, 15] (stride (750, 1, 50)) A = [250, 50, 15] (stride (15, 3750, 1)) dim = 0 5.474 -> 5.486 ( +0.22%) [ +0.29% +0.15% +0.00% / +0.22% +0.60% +0.22%] index_select const : Elapsed 0.055 ms (5.490 ms / 100) 5.904 -> 5.921 ( +0.29%) [ +0.34% +0.14% +0.00% / +0.29% +0.61% +0.76%] index_select wrap : Elapsed 0.059 ms (5.924 ms / 100) 5.884 -> 5.886 ( +0.03%) [ +0.14% +0.00% +0.02% / +0.03% +1.10% +0.92%] index_select linear : Elapsed 0.059 ms (5.892 ms / 100) 5.902 -> 5.900 ( -0.03%) [ +0.00% +0.05% +0.10% / -0.03% +0.32% +0.46%] index_select reverse : Elapsed 0.059 ms (5.902 ms / 100) 5.489 -> 5.506 ( +0.31%) [ +0.04% +0.04% +0.00% / +0.33% +0.60% +0.31%] index_select skip64 : Elapsed 0.055 ms (5.491 ms / 100) 5.473 -> 5.477 ( +0.07%) [ +0.00% +0.26% +0.31% / +0.07% +0.46% +0.46%] index_select skip256 : Elapsed 0.055 ms (5.473 ms / 100) 5.962 -> 5.972 ( +0.17%) [ +0.00% +0.08% +0.08% / +0.17% +0.55% +0.39%] index_select spread : Elapsed 0.060 ms (5.962 ms / 100) 6.087 -> 6.097 ( +0.16%) [ +0.11% +0.05% +0.00% / +0.18% +0.21% +0.16%] index_select strided 3 : Elapsed 0.061 ms (6.094 ms / 100) 5.981 -> 5.997 ( +0.27%) [ +0.25% +0.00% +0.12% / +0.27% +1.20% +1.10%] index_select strided 5 : Elapsed 0.060 ms (5.996 ms / 100) 6.076 -> 6.094 ( +0.30%) [ +0.00% +0.16% +0.36% / +0.30% +0.31% +0.44%] index_select strided 7 : Elapsed 0.061 ms (6.076 ms / 100) 6.091 -> 6.097 ( +0.10%) [ +0.00% +0.10% +0.34% / +0.10% +0.72% +0.72%] index_select strided 8 : Elapsed 0.061 ms (6.091 ms / 100) 6.087 -> 6.094 ( +0.11%) [ +0.39% +0.00% +0.20% / +0.11% +0.13% +0.18%] index_select strided 16 : Elapsed 0.061 ms (6.111 ms / 100) 6.094 -> 6.091 ( -0.05%) [ +0.23% +0.00% +0.11% / -0.05% +0.34% +0.30%] index_select strided 64 : Elapsed 0.061 ms (6.108 ms / 100) 5.520 -> 5.520 ( +0.00%) [ +0.04% +0.00% +0.27% / +0.00% +0.60% +0.82%] index_select strided 100 : Elapsed 0.055 ms (5.522 ms / 100) 5.988 -> 5.995 ( +0.12%) [ +0.00% +0.00% +0.08% / +0.30% +0.12% +0.17%] index_select random : Elapsed 0.060 ms (5.988 ms / 100) 5.874 -> 5.873 ( -0.02%) [ +0.10% +0.00% +0.14% / -0.02% +0.53% +0.29%] index_select random_sorted : Elapsed 0.059 ms (5.880 ms / 100) 6.033 -> 6.022 ( -0.18%) [ +0.10% +0.10% +0.00% / -0.18% +0.58% +0.41%] index_select perm : Elapsed 0.060 ms (6.039 ms / 100) 5.916 -> 5.929 ( +0.22%) [ +0.22% +0.00% +0.30% / +0.22% +0.68% +0.74%] index_select perm_sorted : Elapsed 0.059 ms (5.929 ms / 100) B = [150, 50, 15] (stride (1, 2250, 150)) A = [250, 50, 15] (stride (750, 1, 50)) dim = 0 8.995 -> 9.017 ( +0.24%) [ +0.00% +0.13% +0.24% / +0.28% +0.24% +0.38%] index_select const : Elapsed 0.090 ms (8.995 ms / 100) 9.439 -> 9.440 ( +0.01%) [ +0.01% +0.18% +0.00% / +0.20% +0.01% +0.13%] index_select wrap : Elapsed 0.094 ms (9.440 ms / 100) 9.436 -> 9.425 ( -0.12%) [ +0.17% +0.39% +0.00% / +0.07% +0.00% -0.12%] index_select linear : Elapsed 0.095 ms (9.452 ms / 100) 9.411 -> 9.423 ( +0.13%) [ +0.00% +0.11% +0.18% / +0.13% +0.53% +0.20%] index_select reverse : Elapsed 0.094 ms (9.411 ms / 100) 8.993 -> 9.017 ( +0.27%) [ +0.02% +0.08% +0.00% / +0.27% +0.46% +0.48%] index_select skip64 : Elapsed 0.090 ms (8.995 ms / 100) 9.000 -> 9.002 ( +0.02%) [ +0.04% +0.00% +0.16% / +0.10% +0.02% +0.27%] index_select skip256 : Elapsed 0.090 ms (9.004 ms / 100) 9.406 -> 9.427 ( +0.22%) [ +0.17% +0.00% +0.23% / +0.22% +0.90% +0.86%] index_select spread : Elapsed 0.094 ms (9.422 ms / 100) 9.436 -> 9.449 ( +0.14%) [ +0.42% +0.00% +0.23% / +0.14% +0.69% +0.85%] index_select strided 3 : Elapsed 0.095 ms (9.476 ms / 100) 9.297 -> 9.324 ( +0.29%) [ +0.00% +0.08% +0.05% / +0.29% +1.05% +0.95%] index_select strided 5 : Elapsed 0.093 ms (9.297 ms / 100) 9.495 -> 9.504 ( +0.09%) [ +0.00% +0.15% +0.13% / +0.09% +0.18% +0.13%] index_select strided 7 : Elapsed 0.095 ms (9.495 ms / 100) 9.469 -> 9.468 ( -0.01%) [ +0.00% +0.06% +0.17% / -0.01% +0.50% +0.54%] index_select strided 8 : Elapsed 0.095 ms (9.469 ms / 100) 9.537 -> 9.554 ( +0.18%) [ +0.15% +0.05% +0.00% / +0.28% +0.43% +0.18%] index_select strided 16 : Elapsed 0.096 ms (9.551 ms / 100) 9.468 -> 9.459 ( -0.10%) [ +0.00% +0.05% +0.23% / -0.10% +0.26% +0.13%] index_select strided 64 : Elapsed 0.095 ms (9.468 ms / 100) 9.048 -> 9.034 ( -0.15%) [ +0.03% +0.17% +0.00% / +0.10% -0.13% -0.15%] index_select strided 100 : Elapsed 0.091 ms (9.051 ms / 100) 9.410 -> 9.440 ( +0.32%) [ +0.22% +0.00% +0.20% / +0.32% +0.82% +0.88%] index_select random : Elapsed 0.094 ms (9.431 ms / 100) 9.404 -> 9.388 ( -0.17%) [ +0.05% +0.00% +0.05% / +0.13% -0.14% -0.17%] index_select random_sorted : Elapsed 0.094 ms (9.409 ms / 100) 9.494 -> 9.491 ( -0.03%) [ +0.00% +0.01% +0.04% / -0.03% +0.23% +0.39%] index_select perm : Elapsed 0.095 ms (9.494 ms / 100) 9.425 -> 9.439 ( +0.15%) [ +0.08% +0.00% +0.08% / +0.25% +0.15% +0.15%] index_select perm_sorted : Elapsed 0.094 ms (9.433 ms / 100) B = [150, 50, 15] (stride (1, 2250, 150)) A = [250, 50, 15] (stride (15, 3750, 1)) dim = 0 8.986 -> 9.004 ( +0.20%) [ +0.00% +0.01% +0.04% / +0.20% +0.30% +0.21%] index_select const : Elapsed 0.090 ms (8.986 ms / 100) 9.630 -> 9.642 ( +0.12%) [ +0.15% +0.05% +0.00% / +0.12% +0.46% +0.62%] index_select wrap : Elapsed 0.096 ms (9.644 ms / 100) 9.629 -> 9.659 ( +0.31%) [ +0.00% +0.07% +0.24% / +0.31% +0.82% +0.54%] index_select linear : Elapsed 0.096 ms (9.629 ms / 100) 9.604 -> 9.622 ( +0.19%) [ +0.00% +0.11% +0.14% / +0.19% +0.91% +1.04%] index_select reverse : Elapsed 0.096 ms (9.604 ms / 100) 8.996 -> 9.015 ( +0.21%) [ +0.23% +0.26% +0.00% / +0.21% +0.44% +0.31%] index_select skip64 : Elapsed 0.090 ms (9.017 ms / 100) 8.980 -> 8.980 ( +0.00%) [ +0.20% +0.00% +0.20% / +0.00% +0.37% +0.28%] index_select skip256 : Elapsed 0.090 ms (8.998 ms / 100) 9.795 -> 9.808 ( +0.13%) [ +0.00% +0.08% +0.07% / +0.13% +0.52% +0.51%] index_select spread : Elapsed 0.098 ms (9.795 ms / 100) 9.997 -> 9.988 ( -0.09%) [ +0.08% +0.00% +0.09% / +0.12% +0.02% -0.09%] index_select strided 3 : Elapsed 0.100 ms (10.005 ms / 100) 9.819 -> 9.855 ( +0.37%) [ +0.00% +0.08% +0.26% / +0.37% +0.70% +0.59%] index_select strided 5 : Elapsed 0.098 ms (9.819 ms / 100) 10.019 -> 10.020 ( +0.01%) [ +0.00% +0.02% +0.11% / +0.10% +0.01% +0.01%] index_select strided 7 : Elapsed 0.100 ms (10.019 ms / 100) 10.011 -> 10.029 ( +0.18%) [ +0.00% +0.14% +0.09% / +0.18% +0.26% +0.24%] index_select strided 8 : Elapsed 0.100 ms (10.011 ms / 100) 10.030 -> 10.013 ( -0.17%) [ +0.00% +0.12% +0.15% / +0.05% +0.10% -0.17%] index_select strided 16 : Elapsed 0.100 ms (10.030 ms / 100) 10.027 -> 10.029 ( +0.02%) [ +0.05% +0.00% +0.04% / +0.24% +0.09% +0.02%] index_select strided 64 : Elapsed 0.100 ms (10.032 ms / 100) 9.055 -> 9.062 ( +0.08%) [ +0.14% +0.08% +0.00% / +0.08% +0.22% +0.36%] index_select strided 100 : Elapsed 0.091 ms (9.068 ms / 100) 9.841 -> 9.869 ( +0.28%) [ +0.00% +0.03% +0.16% / +0.28% +0.47% +0.35%] index_select random : Elapsed 0.098 ms (9.841 ms / 100) 9.573 -> 9.567 ( -0.06%) [ +0.00% +0.11% +0.05% / -0.06% +0.44% +0.45%] index_select random_sorted : Elapsed 0.096 ms (9.573 ms / 100) 9.964 -> 9.986 ( +0.22%) [ +0.23% +0.22% +0.00% / +0.22% +0.52% +0.37%] index_select perm : Elapsed 0.100 ms (9.987 ms / 100) 9.732 -> 9.758 ( +0.27%) [ +0.00% +0.22% +0.42% / +0.27% +0.32% +0.34%] index_select perm_sorted : Elapsed 0.097 ms (9.732 ms / 100) B = [150, 50, 15] (stride (50, 1, 7500)) A = [250, 50, 15] (stride (750, 1, 50)) dim = 0 5.648 -> 5.655 ( +0.12%) [ +0.00% +0.19% +0.04% / +0.12% +0.28% +0.21%] index_select const : Elapsed 0.056 ms (5.648 ms / 100) 5.884 -> 5.877 ( -0.12%) [ +0.22% +0.00% +0.25% / +0.14% -0.12% -0.03%] index_select wrap : Elapsed 0.059 ms (5.897 ms / 100) 5.863 -> 5.876 ( +0.22%) [ +0.00% +0.26% +0.38% / +0.31% +0.31% +0.22%] index_select linear : Elapsed 0.059 ms (5.863 ms / 100) 5.864 -> 5.863 ( -0.02%) [ +0.07% +0.00% +0.10% / -0.02% +0.41% +0.29%] index_select reverse : Elapsed 0.059 ms (5.868 ms / 100) 5.635 -> 5.626 ( -0.16%) [ +0.00% +0.14% +0.12% / +0.02% -0.05% -0.16%] index_select skip64 : Elapsed 0.056 ms (5.635 ms / 100) 5.651 -> 5.661 ( +0.18%) [ +0.07% +0.05% +0.00% / +0.18% +0.39% +0.39%] index_select skip256 : Elapsed 0.057 ms (5.655 ms / 100) 5.858 -> 5.858 ( +0.00%) [ +0.27% +0.00% +0.22% / +0.00% +0.60% +0.39%] index_select spread : Elapsed 0.059 ms (5.874 ms / 100) 5.838 -> 5.829 ( -0.15%) [ +0.00% +0.02% +0.05% / -0.15% +1.03% +0.87%] index_select strided 3 : Elapsed 0.058 ms (5.838 ms / 100) 5.709 -> 5.713 ( +0.07%) [ +0.19% +0.00% +0.09% / +0.07% +2.19% +1.86%] index_select strided 5 : Elapsed 0.057 ms (5.720 ms / 100) 5.869 -> 5.879 ( +0.17%) [ +0.00% +0.27% +0.20% / +0.17% +0.70% +0.61%] index_select strided 7 : Elapsed 0.059 ms (5.869 ms / 100) 5.849 -> 5.844 ( -0.09%) [ +0.00% +0.17% +0.43% / -0.09% +1.06% +0.99%] index_select strided 8 : Elapsed 0.058 ms (5.849 ms / 100) 5.861 -> 5.850 ( -0.19%) [ +0.00% +0.05% +0.03% / -0.19% +0.89% +0.73%] index_select strided 16 : Elapsed 0.059 ms (5.861 ms / 100) 5.861 -> 5.844 ( -0.29%) [ +0.19% +0.00% +0.38% / +0.46% +0.03% -0.29%] index_select strided 64 : Elapsed 0.059 ms (5.872 ms / 100) 5.642 -> 5.627 ( -0.27%) [ +0.11% +0.16% +0.00% / +0.07% -0.09% -0.27%] index_select strided 100 : Elapsed 0.056 ms (5.648 ms / 100) 5.822 -> 5.833 ( +0.19%) [ +0.15% +0.00% +0.02% / +0.19% +0.29% +0.26%] index_select random : Elapsed 0.058 ms (5.831 ms / 100) 5.757 -> 5.778 ( +0.36%) [ +0.23% +0.00% +0.28% / +0.36% +0.96% +0.85%] index_select random_sorted : Elapsed 0.058 ms (5.770 ms / 100) 5.906 -> 5.859 ( -0.80%) [ +0.02% +0.08% +0.00% / +0.30% -0.69% -0.80%] index_select perm : Elapsed 0.059 ms (5.907 ms / 100) 5.890 -> 5.833 ( -0.97%) [ +0.00% +0.14% +0.10% / +0.10% -0.97% -0.54%] index_select perm_sorted : Elapsed 0.059 ms (5.890 ms / 100) B = [150, 50, 15] (stride (50, 1, 7500)) A = [250, 50, 15] (stride (15, 3750, 1)) dim = 0 5.510 -> 5.513 ( +0.05%) [ +0.27% +0.25% +0.00% / +0.13% +0.05% +0.18%] index_select const : Elapsed 0.055 ms (5.525 ms / 100) 5.935 -> 5.935 ( +0.00%) [ +0.02% +0.27% +0.00% / +0.00% +0.45% +0.19%] index_select wrap : Elapsed 0.059 ms (5.936 ms / 100) 5.899 -> 5.926 ( +0.46%) [ +0.08% +0.00% +0.25% / +0.46% +0.68% +0.86%] index_select linear : Elapsed 0.059 ms (5.904 ms / 100) 5.908 -> 5.920 ( +0.20%) [ +0.00% +0.25% +0.54% / +0.37% +0.24% +0.20%] index_select reverse : Elapsed 0.059 ms (5.908 ms / 100) 5.528 -> 5.518 ( -0.18%) [ +0.11% +0.04% +0.00% / -0.18% +0.20% -0.11%] index_select skip64 : Elapsed 0.055 ms (5.534 ms / 100) 5.497 -> 5.502 ( +0.09%) [ +0.20% +0.13% +0.00% / +0.15% +0.16% +0.09%] index_select skip256 : Elapsed 0.055 ms (5.508 ms / 100) 5.980 -> 5.983 ( +0.05%) [ +0.25% +0.02% +0.00% / +0.05% +0.10% +0.33%] index_select spread : Elapsed 0.060 ms (5.995 ms / 100) 6.117 -> 6.095 ( -0.36%) [ +0.13% +0.00% +0.10% / +0.07% -0.36% -0.23%] index_select strided 3 : Elapsed 0.061 ms (6.125 ms / 100) 5.996 -> 6.016 ( +0.33%) [ +0.17% +0.00% +0.22% / +0.33% +0.77% +0.83%] index_select strided 5 : Elapsed 0.060 ms (6.006 ms / 100) 6.099 -> 6.100 ( +0.02%) [ +0.20% +0.00% +0.08% / +0.25% +0.20% +0.02%] index_select strided 7 : Elapsed 0.061 ms (6.111 ms / 100) 6.106 -> 6.117 ( +0.18%) [ +0.00% +0.11% +0.16% / +0.18% +0.34% +0.33%] index_select strided 8 : Elapsed 0.061 ms (6.106 ms / 100) 6.111 -> 6.107 ( -0.07%) [ +0.08% +0.08% +0.00% / +0.11% +0.03% -0.07%] index_select strided 16 : Elapsed 0.061 ms (6.116 ms / 100) 6.104 -> 6.106 ( +0.03%) [ +0.25% +0.00% +0.13% / +0.03% +0.08% +0.31%] index_select strided 64 : Elapsed 0.061 ms (6.119 ms / 100) 5.544 -> 5.551 ( +0.13%) [ +0.00% +0.20% +0.25% / +0.13% +0.41% +0.38%] index_select strided 100 : Elapsed 0.055 ms (5.544 ms / 100) 5.989 -> 5.994 ( +0.08%) [ +0.02% +0.15% +0.00% / +0.08% +0.60% +0.47%] index_select random : Elapsed 0.060 ms (5.990 ms / 100) 5.861 -> 5.870 ( +0.15%) [ +0.14% +0.00% +0.22% / +0.15% +0.20% +0.36%] index_select random_sorted : Elapsed 0.059 ms (5.869 ms / 100) 6.040 -> 6.047 ( +0.12%) [ +0.00% +0.25% +0.08% / +0.12% +0.63% +0.48%] index_select perm : Elapsed 0.060 ms (6.040 ms / 100) 5.933 -> 5.957 ( +0.40%) [ +0.20% +0.00% +0.32% / +0.40% +0.59% +0.69%] index_select perm_sorted : Elapsed 0.059 ms (5.945 ms / 100) out_shape = [250, 150, 15] in_shape = [250, 50, 15] idx_dim = 1 B = [250, 150, 15] (stride (2250, 15, 1)) dim = 1 fill_cnt = 50 2.961 -> 2.961 ( +0.00%) [ +0.24% +0.00% +0.17% / +0.00% +0.27% +0.17%] index_fill_ const : Elapsed 0.030 ms (2.968 ms / 100) 3.066 -> 3.037 ( -0.95%) [ +0.20% +0.13% +0.00% / -0.65% -0.95% -0.88%] index_fill_ linear : Elapsed 0.031 ms (3.072 ms / 100) 3.061 -> 3.042 ( -0.62%) [ +0.20% +0.00% +0.16% / -0.26% -0.42% -0.62%] index_fill_ reverse : Elapsed 0.031 ms (3.067 ms / 100) 2.969 -> 2.965 ( -0.13%) [ +0.17% +0.00% +0.00% / -0.13% +1.11% +1.11%] index_fill_ skip64 : Elapsed 0.030 ms (2.974 ms / 100) 2.970 -> 2.961 ( -0.30%) [ +0.24% +0.03% +0.00% / -0.30% +1.08% +0.94%] index_fill_ skip256 : Elapsed 0.030 ms (2.977 ms / 100) 3.459 -> 3.442 ( -0.49%) [ +0.00% +0.75% +0.46% / -0.49% +0.78% +0.75%] index_fill_ spread : Elapsed 0.035 ms (3.459 ms / 100) 3.468 -> 3.452 ( -0.46%) [ +0.17% +0.00% +0.14% / -0.46% -0.37% -0.37%] index_fill_ strided 3 : Elapsed 0.035 ms (3.474 ms / 100) 3.418 -> 3.371 ( -1.38%) [ +0.06% +0.06% +0.00% / -0.79% -1.38% -1.20%] index_fill_ strided 5 : Elapsed 0.034 ms (3.420 ms / 100) 3.627 -> 3.582 ( -1.24%) [ +0.28% +0.17% +0.00% / -0.99% -1.24% -1.13%] index_fill_ strided 7 : Elapsed 0.036 ms (3.637 ms / 100) 3.504 -> 3.476 ( -0.80%) [ +0.40% +0.17% +0.00% / -0.80% -0.26% -0.57%] index_fill_ strided 8 : Elapsed 0.035 ms (3.518 ms / 100) 3.592 -> 3.567 ( -0.70%) [ +0.86% +0.00% +0.45% / -0.47% -0.70% -0.61%] index_fill_ strided 16 : Elapsed 0.036 ms (3.623 ms / 100) 3.426 -> 3.389 ( -1.08%) [ +0.26% +0.00% +0.12% / -1.02% -1.08% -0.88%] index_fill_ strided 64 : Elapsed 0.034 ms (3.435 ms / 100) 3.000 -> 2.985 ( -0.50%) [ +0.13% +0.00% +0.03% / -0.50% -0.13% -0.30%] index_fill_ strided 100 : Elapsed 0.030 ms (3.004 ms / 100) 3.240 -> 3.205 ( -1.08%) [ +0.25% +0.00% +0.43% / -1.05% -0.80% -1.08%] index_fill_ random : Elapsed 0.032 ms (3.248 ms / 100) 3.200 -> 3.175 ( -0.78%) [ +0.00% +0.66% +0.19% / -0.78% -0.78% -0.41%] index_fill_ random_sorted : Elapsed 0.032 ms (3.200 ms / 100) 3.352 -> 3.325 ( -0.81%) [ +0.30% +0.00% +0.81% / -0.36% -0.81% -0.36%] index_fill_ perm : Elapsed 0.034 ms (3.362 ms / 100) 3.271 -> 3.231 ( -1.22%) [ +0.00% +0.37% +0.43% / -1.19% -1.22% -0.98%] index_fill_ perm_sorted : Elapsed 0.033 ms (3.271 ms / 100) B = [250, 150, 15] (stride (2250, 15, 1)) A = [250, 50, 15] (stride (1, 3750, 250)) dim = 1 7.430 -> 7.340 ( -1.21%) [ +0.11% +0.61% +0.00% / +0.46% -1.21% -0.97%] index_add_ linear : Elapsed 0.074 ms (7.438 ms / 100) 6.924 -> 6.790 ( -1.94%) [ +0.00% +0.58% +0.27% / -0.10% -1.79% -1.94%] index_copy_ linear : Elapsed 0.069 ms (6.924 ms / 100) 7.427 -> 7.342 ( -1.14%) [ +0.53% +0.65% +0.00% / +0.30% -0.96% -1.14%] index_add_ reverse : Elapsed 0.075 ms (7.466 ms / 100) 6.893 -> 6.859 ( -0.49%) [ +0.51% +0.35% +0.00% / -0.04% -0.49% -0.45%] index_copy_ reverse : Elapsed 0.069 ms (6.928 ms / 100) 8.435 -> 8.408 ( -0.32%) [ +0.12% +0.00% +0.49% / -0.18% -0.32% -0.05%] index_add_ spread : Elapsed 0.084 ms (8.445 ms / 100) 8.134 -> 8.083 ( -0.63%) [ +0.00% +0.06% +0.26% / -0.63% -0.07% -0.22%] index_copy_ spread : Elapsed 0.081 ms (8.134 ms / 100) 8.804 -> 8.745 ( -0.67%) [ +0.00% +0.30% +0.30% / +0.00% -0.67% -0.52%] index_add_ strided 7 : Elapsed 0.088 ms (8.804 ms / 100) 8.396 -> 8.322 ( -0.88%) [ +0.55% +0.00% +0.19% / -0.88% -0.25% -0.38%] index_copy_ strided 7 : Elapsed 0.084 ms (8.442 ms / 100) 8.601 -> 8.547 ( -0.63%) [ +0.73% +0.00% +0.36% / -0.12% -0.63% -0.33%] index_add_ perm : Elapsed 0.087 ms (8.664 ms / 100) 8.137 -> 8.081 ( -0.69%) [ +0.31% +0.00% +0.01% / -0.58% -0.69% -0.64%] index_copy_ perm : Elapsed 0.082 ms (8.162 ms / 100) 8.125 -> 8.120 ( -0.06%) [ +0.43% +0.02% +0.00% / +0.01% +0.10% -0.06%] index_add_ perm_sorted : Elapsed 0.082 ms (8.160 ms / 100) 7.770 -> 7.743 ( -0.35%) [ +0.60% +0.00% +0.24% / -0.35% +0.48% +0.66%] index_copy_ perm_sorted : Elapsed 0.078 ms (7.817 ms / 100) 9.793 -> 9.590 ( -2.07%) [ +0.00% +0.04% +0.22% / +0.07% -2.07% -1.88%] index_select const : Elapsed 0.098 ms (9.793 ms / 100) 12.197 -> 12.220 ( +0.19%) [ +0.00% +0.29% +0.70% / +0.19% +1.39% +1.27%] index_select wrap : Elapsed 0.122 ms (12.197 ms / 100) 10.126 -> 10.097 ( -0.29%) [ +0.00% +0.14% +0.25% / +0.00% -0.29% -0.12%] index_select linear : Elapsed 0.101 ms (10.126 ms / 100) 11.395 -> 11.110 ( -2.50%) [ +1.25% +0.00% +0.42% / +0.34% -2.33% -2.50%] index_select reverse : Elapsed 0.115 ms (11.538 ms / 100) 9.815 -> 9.574 ( -2.46%) [ +0.46% +0.30% +0.00% / -0.44% -2.26% -2.46%] index_select skip64 : Elapsed 0.099 ms (9.860 ms / 100) 9.749 -> 9.594 ( -1.59%) [ +0.94% +0.00% +0.64% / +0.66% -1.59% -1.28%] index_select skip256 : Elapsed 0.098 ms (9.841 ms / 100) 9.984 -> 9.928 ( -0.56%) [ +0.68% +0.06% +0.00% / -0.22% -0.56% -0.51%] index_select spread : Elapsed 0.101 ms (10.052 ms / 100) 12.179 -> 12.185 ( +0.05%) [ +0.00% +0.36% +0.25% / +0.05% +1.54% +1.54%] index_select strided 3 : Elapsed 0.122 ms (12.179 ms / 100) 10.630 -> 10.439 ( -1.80%) [ +0.38% +0.27% +0.00% / +0.09% -1.57% -1.80%] index_select strided 5 : Elapsed 0.107 ms (10.670 ms / 100) 11.981 -> 12.013 ( +0.27%) [ +0.39% +0.61% +0.00% / +0.27% +1.33% +2.28%] index_select strided 7 : Elapsed 0.120 ms (12.028 ms / 100) 11.575 -> 11.616 ( +0.35%) [ +0.47% +0.04% +0.00% / +0.35% +2.05% +1.75%] index_select strided 8 : Elapsed 0.116 ms (11.629 ms / 100) 11.578 -> 11.613 ( +0.30%) [ +0.03% +0.00% +0.04% / +0.30% +0.74% +0.96%] index_select strided 16 : Elapsed 0.116 ms (11.581 ms / 100) 11.463 -> 11.479 ( +0.14%) [ +0.33% +0.31% +0.00% / +0.14% +1.12% +0.93%] index_select random : Elapsed 0.115 ms (11.501 ms / 100) 10.026 -> 10.016 ( -0.10%) [ +0.92% +0.00% +0.41% / +0.58% +0.14% -0.10%] index_select random_sorted : Elapsed 0.101 ms (10.118 ms / 100) B = [250, 150, 15] (stride (2250, 1, 150)) A = [250, 50, 15] (stride (750, 15, 1)) dim = 1 GOOD 10.374 -> 7.273 (-29.89%) [ +0.50% +0.29% +0.00% / -28.76% -29.53% -29.89%] index_add_ linear : Elapsed 0.104 ms (10.426 ms / 100) Good 8.382 -> 7.097 (-15.33%) [ +0.51% +0.00% +0.21% / -14.05% -15.29% -15.33%] index_copy_ linear : Elapsed 0.084 ms (8.425 ms / 100) GOOD 10.423 -> 7.273 (-30.22%) [ +0.12% +0.43% +0.00% / -29.08% -29.91% -30.22%] index_add_ reverse : Elapsed 0.104 ms (10.436 ms / 100) Good 8.406 -> 7.115 (-15.36%) [ +0.19% +0.04% +0.00% / -14.32% -15.36% -15.29%] index_copy_ reverse : Elapsed 0.084 ms (8.422 ms / 100) GOOD 13.371 -> 8.662 (-35.22%) [ +0.00% +0.17% +0.14% / -34.91% -35.22% -35.17%] index_add_ spread : Elapsed 0.134 ms (13.371 ms / 100) GOOD 11.330 -> 8.804 (-22.29%) [ +0.00% +0.29% +0.30% / -21.50% -22.29% -22.20%] index_copy_ spread : Elapsed 0.113 ms (11.330 ms / 100) BEST 18.332 -> 8.745 (-52.30%) [ +0.00% +0.96% +0.21% / -52.03% -52.30% -52.29%] index_add_ strided 7 : Elapsed 0.183 ms (18.332 ms / 100) GOOD 16.833 -> 8.975 (-46.68%) [ +0.12% +0.31% +0.00% / -46.13% -46.68% -46.58%] index_copy_ strided 7 : Elapsed 0.169 ms (16.854 ms / 100) BEST 19.755 -> 8.720 (-55.86%) [ +0.19% +0.17% +0.00% / -55.54% -55.73% -55.86%] index_add_ perm : Elapsed 0.198 ms (19.792 ms / 100) GOOD 17.453 -> 8.819 (-49.47%) [ +0.16% +0.18% +0.00% / -49.09% -49.47% -49.47%] index_copy_ perm : Elapsed 0.175 ms (17.481 ms / 100) GOOD 13.029 -> 8.609 (-33.92%) [ +0.35% +0.38% +0.00% / -33.52% -33.92% -33.78%] index_add_ perm_sorted : Elapsed 0.131 ms (13.074 ms / 100) GOOD 11.219 -> 8.659 (-22.82%) [ +0.00% +0.53% +0.03% / -22.18% -22.82% -22.74%] index_copy_ perm_sorted : Elapsed 0.112 ms (11.219 ms / 100) GOOD 14.066 -> 8.729 (-37.94%) [ +0.25% +0.06% +0.00% / -37.33% -37.86% -37.94%] index_select const : Elapsed 0.141 ms (14.101 ms / 100) GOOD 17.695 -> 10.577 (-40.23%) [ +0.21% +0.20% +0.00% / -40.23% -40.01% -40.01%] index_select wrap : Elapsed 0.177 ms (17.732 ms / 100) GOOD 15.184 -> 9.146 (-39.77%) [ +0.23% +0.53% +0.00% / -39.03% -39.70% -39.77%] index_select linear : Elapsed 0.152 ms (15.219 ms / 100) GOOD 15.401 -> 8.981 (-41.69%) [ +0.57% +0.10% +0.00% / -41.69% -41.47% -41.49%] index_select reverse : Elapsed 0.155 ms (15.489 ms / 100) GOOD 14.014 -> 8.656 (-38.23%) [ +0.51% +0.56% +0.00% / -36.89% -38.23% -38.21%] index_select skip64 : Elapsed 0.141 ms (14.085 ms / 100) GOOD 13.953 -> 8.436 (-39.54%) [ +0.62% +0.52% +0.00% / -39.45% -39.47% -39.54%] index_select skip256 : Elapsed 0.140 ms (14.039 ms / 100) GOOD 15.342 -> 9.327 (-39.21%) [ +0.20% +0.17% +0.00% / -39.21% -37.58% -37.70%] index_select spread : Elapsed 0.154 ms (15.373 ms / 100) GOOD 18.173 -> 10.186 (-43.95%) [ +0.45% +0.00% +0.82% / -43.32% -43.95% -43.94%] index_select strided 3 : Elapsed 0.183 ms (18.254 ms / 100) GOOD 17.226 -> 9.363 (-45.65%) [ +0.41% +0.00% +0.61% / -45.65% -44.60% -44.55%] index_select strided 5 : Elapsed 0.173 ms (17.296 ms / 100) GOOD 18.048 -> 9.830 (-45.53%) [ +0.00% +0.31% +0.23% / -44.79% -45.53% -45.45%] index_select strided 7 : Elapsed 0.180 ms (18.048 ms / 100) GOOD 18.146 -> 10.087 (-44.41%) [ +0.29% +0.03% +0.00% / -44.41% -44.18% -44.10%] index_select strided 8 : Elapsed 0.182 ms (18.199 ms / 100) GOOD 18.156 -> 9.638 (-46.92%) [ +0.15% +0.08% +0.00% / -46.43% -46.92% -46.88%] index_select strided 16 : Elapsed 0.182 ms (18.184 ms / 100) GOOD 17.800 -> 9.948 (-44.11%) [ +0.54% +0.30% +0.00% / -44.11% -43.72% -43.78%] index_select random : Elapsed 0.179 ms (17.897 ms / 100) GOOD 15.411 -> 9.248 (-39.99%) [ +0.29% +0.00% +0.16% / -39.86% -39.93% -39.99%] index_select random_sorted : Elapsed 0.155 ms (15.455 ms / 100) B = [250, 150, 15] (stride (15, 3750, 1)) A = [250, 50, 15] (stride (750, 1, 50)) dim = 1 8.066 -> 8.024 ( -0.52%) [ +0.41% +0.00% +0.25% / -0.52% +0.29% +0.09%] index_add_ linear : Elapsed 0.081 ms (8.099 ms / 100) 7.741 -> 7.710 ( -0.40%) [ +0.26% +0.03% +0.00% / -0.40% +0.13% +0.10%] index_copy_ linear : Elapsed 0.078 ms (7.761 ms / 100) 8.169 -> 8.119 ( -0.61%) [ +0.02% +0.12% +0.00% / -0.61% -0.01% +0.01%] index_add_ reverse : Elapsed 0.082 ms (8.171 ms / 100) 7.804 -> 7.771 ( -0.42%) [ +0.00% +0.00% +0.38% / -0.42% +0.15% -0.21%] index_copy_ reverse : Elapsed 0.078 ms (7.804 ms / 100) 8.239 -> 8.194 ( -0.55%) [ +0.08% +0.00% +0.06% / -0.55% -0.11% -0.28%] index_add_ spread : Elapsed 0.082 ms (8.246 ms / 100) 7.831 -> 7.800 ( -0.40%) [ +0.13% +0.00% +0.03% / -0.40% -0.06% -0.13%] index_copy_ spread : Elapsed 0.078 ms (7.841 ms / 100) 8.193 -> 8.193 ( +0.00%) [ +0.00% +0.71% +0.83% / +0.00% +1.07% +1.24%] index_add_ strided 7 : Elapsed 0.082 ms (8.193 ms / 100) 7.833 -> 7.782 ( -0.65%) [ +0.00% +0.03% +0.23% / -0.65% +0.71% +0.84%] index_copy_ strided 7 : Elapsed 0.078 ms (7.833 ms / 100) 8.232 -> 8.202 ( -0.36%) [ +0.00% +0.13% +0.19% / -0.36% +0.01% -0.06%] index_add_ perm : Elapsed 0.082 ms (8.232 ms / 100) 7.841 -> 7.801 ( -0.51%) [ +0.03% +0.00% +0.05% / -0.51% -0.05% -0.13%] index_copy_ perm : Elapsed 0.078 ms (7.843 ms / 100) 8.193 -> 8.154 ( -0.48%) [ +0.00% +0.22% +0.44% / -0.48% +0.02% +0.15%] index_add_ perm_sorted : Elapsed 0.082 ms (8.193 ms / 100) 7.819 -> 7.785 ( -0.43%) [ +0.00% +0.09% +0.03% / -0.43% +0.01% +0.03%] index_copy_ perm_sorted : Elapsed 0.078 ms (7.819 ms / 100) 10.796 -> 10.808 ( +0.11%) [ +0.00% +0.15% +0.14% / +0.11% +0.36% +0.30%] index_select const : Elapsed 0.108 ms (10.796 ms / 100) 13.607 -> 13.646 ( +0.29%) [ +0.35% +0.00% +0.31% / +0.29% +1.94% +1.85%] index_select wrap : Elapsed 0.137 ms (13.655 ms / 100) 11.681 -> 11.724 ( +0.37%) [ +0.19% +0.00% +0.21% / +0.37% +0.79% +0.63%] index_select linear : Elapsed 0.117 ms (11.703 ms / 100) 11.982 -> 11.997 ( +0.13%) [ +0.13% +0.00% +0.12% / +0.13% +1.19% +1.12%] index_select reverse : Elapsed 0.120 ms (11.998 ms / 100) 10.796 -> 10.810 ( +0.13%) [ +0.00% +0.00% +0.16% / +0.13% +0.26% +0.28%] index_select skip64 : Elapsed 0.108 ms (10.796 ms / 100) 10.785 -> 10.823 ( +0.35%) [ +0.10% +0.07% +0.00% / +0.36% +0.51% +0.35%] index_select skip256 : Elapsed 0.108 ms (10.796 ms / 100) 12.461 -> 12.456 ( -0.04%) [ +0.00% +0.08% +0.06% / -0.04% +1.75% +1.72%] index_select spread : Elapsed 0.125 ms (12.461 ms / 100) 14.024 -> 14.098 ( +0.53%) [ +0.50% +0.00% +0.54% / +0.53% +2.69% +2.67%] index_select strided 3 : Elapsed 0.141 ms (14.094 ms / 100) 14.006 -> 14.024 ( +0.13%) [ +0.10% +0.00% +0.06% / +0.13% +2.36% +2.11%] index_select strided 5 : Elapsed 0.140 ms (14.020 ms / 100) 14.063 -> 14.102 ( +0.28%) [ +0.38% +0.00% +0.42% / +0.28% +1.96% +1.93%] index_select strided 7 : Elapsed 0.141 ms (14.116 ms / 100) 14.034 -> 14.071 ( +0.26%) [ +0.00% +0.01% +0.19% / +0.26% +2.23% +2.16%] index_select strided 8 : Elapsed 0.140 ms (14.034 ms / 100) 14.074 -> 14.095 ( +0.15%) [ +0.00% +0.22% +0.03% / +0.15% +2.03% +1.92%] index_select strided 16 : Elapsed 0.141 ms (14.074 ms / 100) 13.986 -> 14.056 ( +0.50%) [ +0.39% +0.00% +0.31% / +0.50% +2.27% +2.37%] index_select random : Elapsed 0.140 ms (14.041 ms / 100) 12.426 -> 12.412 ( -0.11%) [ +0.10% +0.00% +0.10% / -0.11% +1.53% +1.56%] index_select random_sorted : Elapsed 0.124 ms (12.439 ms / 100) B = [250, 150, 15] (stride (150, 1, 37500)) A = [250, 50, 15] (stride (50, 1, 12500)) dim = 1 GOOD 12.888 -> 7.366 (-42.85%) [ +0.00% +0.81% +0.31% / -42.33% -42.77% -42.85%] index_add_ linear : Elapsed 0.129 ms (12.888 ms / 100) GOOD 10.072 -> 6.977 (-30.73%) [ +0.00% +0.51% +0.26% / -30.16% -30.50% -30.73%] index_copy_ linear : Elapsed 0.101 ms (10.072 ms / 100) GOOD 12.954 -> 7.360 (-43.18%) [ +0.02% +0.26% +0.00% / -42.61% -43.18% -43.14%] index_add_ reverse : Elapsed 0.130 ms (12.956 ms / 100) GOOD 10.085 -> 6.989 (-30.70%) [ +0.00% +0.22% +0.06% / -30.39% -30.67% -30.70%] index_copy_ reverse : Elapsed 0.101 ms (10.085 ms / 100) GOOD 16.182 -> 8.742 (-45.98%) [ +0.09% +0.09% +0.00% / -45.98% -45.77% -45.77%] index_add_ spread : Elapsed 0.162 ms (16.197 ms / 100) GOOD 12.619 -> 8.786 (-30.37%) [ +0.10% +0.00% +0.22% / -30.37% -30.26% -30.29%] index_copy_ spread : Elapsed 0.126 ms (12.632 ms / 100) BEST 20.380 -> 8.829 (-56.68%) [ +0.34% +0.00% +0.23% / -56.67% -56.68% -56.65%] index_add_ strided 7 : Elapsed 0.205 ms (20.450 ms / 100) BEST 18.845 -> 8.889 (-52.83%) [ +0.16% +0.31% +0.00% / -52.62% -52.66% -52.83%] index_copy_ strided 7 : Elapsed 0.189 ms (18.876 ms / 100) BEST 22.742 -> 8.931 (-60.73%) [ +0.32% +0.05% +0.00% / -60.73% -60.55% -60.59%] index_add_ perm : Elapsed 0.228 ms (22.814 ms / 100) BEST 20.321 -> 8.892 (-56.24%) [ +0.20% +0.00% +0.12% / -56.17% -56.17% -56.24%] index_copy_ perm : Elapsed 0.204 ms (20.361 ms / 100) GOOD 15.888 -> 8.728 (-45.07%) [ +0.04% +0.75% +0.00% / -45.07% -44.91% -44.86%] index_add_ perm_sorted : Elapsed 0.159 ms (15.894 ms / 100) GOOD 12.587 -> 8.753 (-30.46%) [ +0.00% +0.06% +0.06% / -30.25% -30.36% -30.46%] index_copy_ perm_sorted : Elapsed 0.126 ms (12.587 ms / 100) BEST 19.535 -> 8.941 (-54.23%) [ +0.05% +0.11% +0.00% / -53.88% -54.08% -54.23%] index_select const : Elapsed 0.195 ms (19.545 ms / 100) BEST 22.521 -> 9.902 (-56.03%) [ +0.87% +0.48% +0.00% / -56.03% -55.87% -55.90%] index_select wrap : Elapsed 0.227 ms (22.716 ms / 100) BEST 20.051 -> 9.279 (-53.72%) [ +0.00% +0.19% +0.11% / -53.56% -53.72% -53.67%] index_select linear : Elapsed 0.201 ms (20.051 ms / 100) BEST 21.017 -> 9.282 (-55.84%) [ +0.00% +0.27% +0.42% / -55.78% -55.84% -55.77%] index_select reverse : Elapsed 0.210 ms (21.017 ms / 100) BEST 19.443 -> 8.918 (-54.13%) [ +0.58% +0.35% +0.00% / -53.60% -54.07% -54.13%] index_select skip64 : Elapsed 0.196 ms (19.555 ms / 100) BEST 19.413 -> 8.857 (-54.38%) [ +0.07% +0.09% +0.00% / -53.98% -54.38% -54.36%] index_select skip256 : Elapsed 0.194 ms (19.426 ms / 100) BEST 20.936 -> 9.407 (-55.07%) [ +0.22% +0.36% +0.00% / -55.07% -54.41% -54.29%] index_select spread : Elapsed 0.210 ms (20.982 ms / 100) BEST 24.501 -> 9.238 (-62.30%) [ +0.11% +0.05% +0.00% / -62.18% -62.30% -62.30%] index_select strided 3 : Elapsed 0.245 ms (24.527 ms / 100) BEST 24.816 -> 9.314 (-62.47%) [ +0.33% +0.03% +0.00% / -62.47% -61.85% -61.92%] index_select strided 5 : Elapsed 0.249 ms (24.897 ms / 100) BEST 24.884 -> 9.223 (-62.94%) [ +0.07% +0.14% +0.00% / -62.72% -62.89% -62.94%] index_select strided 7 : Elapsed 0.249 ms (24.902 ms / 100) BEST 24.950 -> 9.351 (-62.52%) [ +0.00% +0.04% +0.26% / -62.46% -62.52% -62.49%] index_select strided 8 : Elapsed 0.250 ms (24.950 ms / 100) BEST 24.684 -> 9.217 (-62.66%) [ +0.07% +0.00% +0.00% / -62.38% -62.54% -62.66%] index_select strided 16 : Elapsed 0.247 ms (24.702 ms / 100) BEST 24.626 -> 9.378 (-61.92%) [ +0.21% +0.00% +0.07% / -61.80% -61.92% -61.89%] index_select random : Elapsed 0.247 ms (24.678 ms / 100) BEST 20.901 -> 9.414 (-54.96%) [ +0.44% +0.00% +0.14% / -54.96% -54.91% -54.86%] index_select random_sorted : Elapsed 0.210 ms (20.994 ms / 100) out_shape = [250, 50, 150] in_shape = [250, 50, 15] idx_dim = 2 B = [250, 50, 150] (stride (7500, 150, 1)) dim = 2 fill_cnt = 15 8.638 -> 8.659 ( +0.24%) [ +0.31% +0.10% +0.00% / +0.24% +3.06% +2.92%] index_fill_ const : Elapsed 0.087 ms (8.665 ms / 100) 9.078 -> 9.058 ( -0.22%) [ +0.08% +0.00% +0.08% / -0.22% +3.35% +3.50%] index_fill_ linear : Elapsed 0.091 ms (9.085 ms / 100) 9.222 -> 9.245 ( +0.25%) [ +0.22% +0.47% +0.00% / +0.25% +3.08% +2.86%] index_fill_ reverse : Elapsed 0.092 ms (9.242 ms / 100) 8.631 -> 8.666 ( +0.41%) [ +0.19% +0.00% +0.02% / +0.41% +5.04% +5.11%] index_fill_ skip64 : Elapsed 0.086 ms (8.647 ms / 100) 8.600 -> 8.610 ( +0.12%) [ +0.02% +0.07% +0.00% / +0.12% +5.70% +5.59%] index_fill_ skip256 : Elapsed 0.086 ms (8.602 ms / 100) 12.759 -> 12.770 ( +0.09%) [ +0.43% +0.00% +0.07% / +0.09% +0.69% +0.49%] index_fill_ spread : Elapsed 0.128 ms (12.814 ms / 100) 10.079 -> 10.093 ( +0.14%) [ +0.03% +0.00% +0.08% / +0.14% +4.03% +4.00%] index_fill_ strided 3 : Elapsed 0.101 ms (10.082 ms / 100) 11.179 -> 11.195 ( +0.14%) [ +0.00% +0.07% +0.14% / +0.14% +1.63% +1.61%] index_fill_ strided 5 : Elapsed 0.112 ms (11.179 ms / 100) 12.244 -> 12.279 ( +0.29%) [ +0.17% +0.00% +0.09% / +0.29% +1.64% +1.32%] index_fill_ strided 7 : Elapsed 0.123 ms (12.265 ms / 100) 12.761 -> 12.771 ( +0.08%) [ +0.00% +0.10% +0.05% / +0.08% +0.54% +0.53%] index_fill_ strided 8 : Elapsed 0.128 ms (12.761 ms / 100) 12.985 -> 12.987 ( +0.02%) [ +0.02% +0.05% +0.00% / +0.02% +0.26% +0.25%] index_fill_ strided 16 : Elapsed 0.130 ms (12.988 ms / 100) 13.460 -> 13.453 ( -0.05%) [ +0.00% +0.04% +0.11% / -0.05% +0.12% +0.11%] index_fill_ strided 64 : Elapsed 0.135 ms (13.460 ms / 100) 13.427 -> 13.439 ( +0.09%) [ +0.09% +0.00% +0.10% / +0.09% +0.27% +0.20%] index_fill_ strided 100 : Elapsed 0.134 ms (13.439 ms / 100) 13.359 -> 13.347 ( -0.09%) [ +0.00% +0.00% +0.05% / +0.04% -0.09% -0.03%] index_fill_ random : Elapsed 0.134 ms (13.359 ms / 100) 11.657 -> 11.672 ( +0.13%) [ +0.00% +0.01% +0.04% / +0.13% +0.54% +0.49%] index_fill_ random_sorted : Elapsed 0.117 ms (11.657 ms / 100) 12.689 -> 12.690 ( +0.01%) [ +0.12% +0.13% +0.00% / +0.02% +0.03% +0.01%] index_fill_ perm : Elapsed 0.127 ms (12.704 ms / 100) 11.650 -> 11.649 ( -0.01%) [ +0.00% +0.10% +0.10% / -0.01% +0.35% +0.39%] index_fill_ perm_sorted : Elapsed 0.116 ms (11.650 ms / 100) B = [250, 50, 150] (stride (150, 37500, 1)) A = [250, 50, 15] (stride (15, 3750, 1)) dim = 2 38.431 -> 38.465 ( +0.09%) [ +0.13% +0.07% +0.00% / +0.09% +0.56% +0.54%] index_add_ linear : Elapsed 0.385 ms (38.482 ms / 100) 28.117 -> 28.115 ( -0.01%) [ +0.09% +0.00% +0.13% / -0.01% +0.13% +0.18%] index_copy_ linear : Elapsed 0.281 ms (28.141 ms / 100) 38.576 -> 38.508 ( -0.18%) [ +0.00% +0.01% +0.01% / -0.18% +0.28% +0.40%] index_add_ reverse : Elapsed 0.386 ms (38.576 ms / 100) 28.176 -> 28.141 ( -0.12%) [ +0.00% +0.03% +0.04% / -0.12% +0.31% +0.26%] index_copy_ reverse : Elapsed 0.282 ms (28.176 ms / 100) 42.011 -> 41.867 ( -0.34%) [ +0.00% +0.08% +0.07% / +0.06% -0.29% -0.34%] index_add_ spread : Elapsed 0.420 ms (42.011 ms / 100) 30.864 -> 30.833 ( -0.10%) [ +0.04% +0.00% +0.01% / +0.07% -0.06% -0.10%] index_copy_ spread : Elapsed 0.309 ms (30.876 ms / 100) 41.653 -> 41.267 ( -0.93%) [ +0.01% +0.00% +0.06% / -0.02% -0.90% -0.93%] index_add_ strided 7 : Elapsed 0.417 ms (41.659 ms / 100) 30.585 -> 30.371 ( -0.70%) [ +0.04% +0.00% +0.12% / +0.12% -0.70% -0.67%] index_copy_ strided 7 : Elapsed 0.306 ms (30.596 ms / 100) 42.403 -> 42.371 ( -0.08%) [ +0.00% +0.09% +0.04% / +0.02% -0.06% -0.08%] index_add_ perm : Elapsed 0.424 ms (42.403 ms / 100) 31.167 -> 31.179 ( +0.04%) [ +0.10% +0.00% +0.06% / +0.10% +0.04% +0.10%] index_copy_ perm : Elapsed 0.312 ms (31.197 ms / 100) 41.253 -> 41.222 ( -0.08%) [ +0.00% +0.07% +0.06% / +0.04% -0.08% -0.06%] index_add_ perm_sorted : Elapsed 0.413 ms (41.253 ms / 100) 30.364 -> 30.290 ( -0.24%) [ +0.07% +0.00% +0.01% / +0.07% -0.24% -0.15%] index_copy_ perm_sorted : Elapsed 0.304 ms (30.385 ms / 100) BEST 176.013 -> 19.688 (-88.81%) [ +0.15% +0.00% +0.33% / -88.56% -88.81% -88.78%] index_select const : Elapsed 1.763 ms (176.279 ms / 100) BEST 182.727 -> 21.010 (-88.50%) [ +0.39% +0.18% +0.00% / -88.48% -88.50% -88.50%] index_select wrap : Elapsed 1.834 ms (183.446 ms / 100) BEST 176.119 -> 20.476 (-88.37%) [ +0.19% +0.06% +0.00% / -87.77% -88.37% -88.37%] index_select linear : Elapsed 1.764 ms (176.445 ms / 100) BEST 176.524 -> 20.616 (-88.32%) [ +0.14% +0.15% +0.00% / -88.22% -88.28% -88.32%] index_select reverse : Elapsed 1.768 ms (176.764 ms / 100) BEST 175.295 -> 20.417 (-88.35%) [ +0.16% +0.00% +0.61% / -88.31% -88.35% -88.32%] index_select skip64 : Elapsed 1.756 ms (175.577 ms / 100) BEST 175.631 -> 20.028 (-88.60%) [ +0.10% +0.26% +0.00% / -88.32% -88.60% -88.54%] index_select skip256 : Elapsed 1.758 ms (175.813 ms / 100) BEST 176.763 -> 20.122 (-88.62%) [ +0.10% +0.07% +0.00% / -88.54% -88.60% -88.62%] index_select spread : Elapsed 1.769 ms (176.947 ms / 100) BEST 186.944 -> 20.151 (-89.22%) [ +0.00% +0.04% +0.01% / -88.95% -89.20% -89.22%] index_select strided 3 : Elapsed 1.869 ms (186.944 ms / 100) BEST 185.970 -> 20.226 (-89.12%) [ +0.00% +0.13% +0.07% / -89.10% -89.09% -89.12%] index_select strided 5 : Elapsed 1.860 ms (185.970 ms / 100) BEST 186.728 -> 20.240 (-89.16%) [ +0.00% +0.23% +0.09% / -88.77% -89.15% -89.16%] index_select strided 7 : Elapsed 1.867 ms (186.728 ms / 100) BEST 186.612 -> 20.089 (-89.23%) [ +0.20% +0.12% +0.00% / -88.95% -89.21% -89.23%] index_select strided 8 : Elapsed 1.870 ms (186.994 ms / 100) BEST 185.692 -> 20.103 (-89.17%) [ +0.04% +0.27% +0.00% / -88.99% -89.17% -89.17%] index_select random : Elapsed 1.858 ms (185.758 ms / 100) BEST 176.707 -> 19.964 (-88.70%) [ +0.00% +0.07% +0.09% / -88.41% -88.70% -88.70%] index_select random_sorted : Elapsed 1.767 ms (176.707 ms / 100) B = [250, 50, 150] (stride (150, 37500, 1)) A = [250, 50, 15] (stride (1, 250, 12500)) dim = 2 28.893 -> 28.908 ( +0.05%) [ +0.00% +0.05% +0.03% / +0.05% +0.80% +0.63%] index_add_ linear : Elapsed 0.289 ms (28.893 ms / 100) 19.049 -> 19.022 ( -0.14%) [ +0.15% +0.07% +0.00% / +0.13% -0.14% -0.10%] index_copy_ linear : Elapsed 0.191 ms (19.078 ms / 100) 28.933 -> 28.920 ( -0.04%) [ +0.04% +0.03% +0.00% / -0.04% +1.07% +1.16%] index_add_ reverse : Elapsed 0.289 ms (28.946 ms / 100) 19.018 -> 19.050 ( +0.17%) [ +0.01% +0.10% +0.00% / +0.17% +0.23% +0.28%] index_copy_ reverse : Elapsed 0.190 ms (19.020 ms / 100) 34.736 -> 34.645 ( -0.26%) [ +0.00% +0.09% +0.02% / +0.15% -0.20% -0.26%] index_add_ spread : Elapsed 0.347 ms (34.736 ms / 100) 22.033 -> 22.058 ( +0.11%) [ +0.00% +0.05% +0.16% / +0.11% +0.32% +0.12%] index_copy_ spread : Elapsed 0.220 ms (22.033 ms / 100) 34.074 -> 33.665 ( -1.20%) [ +0.00% +0.02% +0.11% / +0.09% -1.14% -1.20%] index_add_ strided 7 : Elapsed 0.341 ms (34.074 ms / 100) 21.701 -> 21.484 ( -1.00%) [ +0.00% +0.03% +0.29% / +0.24% -0.79% -1.00%] index_copy_ strided 7 : Elapsed 0.217 ms (21.701 ms / 100) 35.378 -> 35.200 ( -0.50%) [ +0.05% +0.05% +0.00% / +0.11% -0.50% -0.42%] index_add_ perm : Elapsed 0.354 ms (35.397 ms / 100) 22.507 -> 22.524 ( +0.08%) [ +0.00% +0.08% +0.14% / +0.22% +0.28% +0.08%] index_copy_ perm : Elapsed 0.225 ms (22.507 ms / 100) 33.107 -> 32.798 ( -0.93%) [ +0.00% +0.02% +0.12% / +0.03% -0.89% -0.93%] index_add_ perm_sorted : Elapsed 0.331 ms (33.107 ms / 100) 21.264 -> 21.125 ( -0.65%) [ +0.10% +0.00% +0.19% / +0.10% -0.55% -0.65%] index_copy_ perm_sorted : Elapsed 0.213 ms (21.286 ms / 100) BEST 120.331 -> 19.382 (-83.89%) [ +0.25% +0.34% +0.00% / -83.47% -83.89% -83.85%] index_select const : Elapsed 1.206 ms (120.635 ms / 100) BEST 152.601 -> 25.492 (-83.29%) [ +0.94% +0.64% +0.00% / -83.29% -83.23% -83.23%] index_select wrap : Elapsed 1.540 ms (154.028 ms / 100) BEST 122.560 -> 22.878 (-81.33%) [ +0.00% +0.05% +0.20% / -80.68% -81.33% -81.29%] index_select linear : Elapsed 1.226 ms (122.560 ms / 100) BEST 126.767 -> 22.966 (-81.88%) [ +0.09% +0.09% +0.00% / -81.20% -81.84% -81.88%] index_select reverse : Elapsed 1.269 ms (126.877 ms / 100) BEST 120.540 -> 18.983 (-84.25%) [ +0.00% +0.61% +0.32% / -83.51% -84.25% -84.22%] index_select skip64 : Elapsed 1.205 ms (120.540 ms / 100) BEST 120.886 -> 18.973 (-84.31%) [ +0.00% +0.07% +0.09% / -83.38% -84.31% -84.21%] index_select skip256 : Elapsed 1.209 ms (120.886 ms / 100) BEST 123.922 -> 23.335 (-81.17%) [ +0.02% +0.00% +0.06% / -80.92% -81.17% -81.15%] index_select spread : Elapsed 1.239 ms (123.947 ms / 100) BEST 139.244 -> 20.076 (-85.58%) [ +0.24% +0.20% +0.00% / -84.97% -85.58% -85.53%] index_select strided 3 : Elapsed 1.396 ms (139.573 ms / 100) BEST 126.350 -> 20.581 (-83.71%) [ +0.07% +0.14% +0.00% / -83.71% -83.53% -83.52%] index_select strided 5 : Elapsed 1.264 ms (126.436 ms / 100) BEST 153.993 -> 22.402 (-85.45%) [ +0.00% +0.22% +0.31% / -85.14% -85.45% -85.44%] index_select strided 7 : Elapsed 1.540 ms (153.993 ms / 100) BEST 153.496 -> 23.266 (-84.84%) [ +0.96% +1.09% +0.00% / -84.84% -84.75% -84.74%] index_select strided 8 : Elapsed 1.550 ms (154.963 ms / 100) BEST 145.010 -> 22.712 (-84.34%) [ +0.89% +0.52% +0.00% / -84.01% -84.34% -84.27%] index_select random : Elapsed 1.463 ms (146.296 ms / 100) BEST 123.587 -> 22.789 (-81.56%) [ +0.24% +0.14% +0.00% / -80.99% -81.54% -81.56%] index_select random_sorted : Elapsed 1.239 ms (123.883 ms / 100) B = [250, 50, 150] (stride (1, 37500, 250)) A = [250, 50, 15] (stride (50, 1, 12500)) dim = 2 8.630 -> 8.592 ( -0.44%) [ +0.17% +0.00% +0.01% / +0.31% -0.38% -0.44%] index_add_ linear : Elapsed 0.086 ms (8.645 ms / 100) 8.032 -> 8.032 ( +0.00%) [ +0.07% +0.05% +0.00% / +0.32% +0.04% +0.00%] index_copy_ linear : Elapsed 0.080 ms (8.038 ms / 100) 8.601 -> 8.564 ( -0.43%) [ +0.00% +0.23% +0.21% / +0.40% -0.35% -0.43%] index_add_ reverse : Elapsed 0.086 ms (8.601 ms / 100) 7.994 -> 8.018 ( +0.30%) [ +0.14% +0.05% +0.00% / +0.46% +0.30% +0.45%] index_copy_ reverse : Elapsed 0.080 ms (8.005 ms / 100) 8.566 -> 8.518 ( -0.56%) [ +0.16% +0.05% +0.00% / +0.33% -0.44% -0.56%] index_add_ spread : Elapsed 0.086 ms (8.580 ms / 100) 8.043 -> 8.032 ( -0.14%) [ +0.00% +0.05% +0.05% / +0.58% +0.12% -0.14%] index_copy_ spread : Elapsed 0.080 ms (8.043 ms / 100) 8.469 -> 8.501 ( +0.38%) [ +0.05% +0.11% +0.00% / +0.38% +1.05% +0.80%] index_add_ strided 7 : Elapsed 0.085 ms (8.473 ms / 100) 8.014 -> 8.049 ( +0.44%) [ +0.02% +0.00% +0.00% / +0.44% +0.64% +0.61%] index_copy_ strided 7 : Elapsed 0.080 ms (8.016 ms / 100) 8.666 -> 8.523 ( -1.65%) [ +0.00% +0.01% +0.01% / +0.09% -1.62% -1.65%] index_add_ perm : Elapsed 0.087 ms (8.666 ms / 100) 8.032 -> 8.040 ( +0.10%) [ +0.06% +0.04% +0.00% / +0.42% +0.35% +0.10%] index_copy_ perm : Elapsed 0.080 ms (8.037 ms / 100) 8.712 -> 8.506 ( -2.36%) [ +0.00% +0.00% +0.06% / +0.46% -2.25% -2.36%] index_add_ perm_sorted : Elapsed 0.087 ms (8.712 ms / 100) 8.029 -> 8.049 ( +0.25%) [ +0.00% +0.14% +0.30% / +0.54% +0.45% +0.25%] index_copy_ perm_sorted : Elapsed 0.080 ms (8.029 ms / 100) 77.473 -> 76.824 ( -0.84%) [ +0.31% +0.00% +0.06% / -0.84% +10.95% +10.78%] index_select const : Elapsed 0.777 ms (77.710 ms / 100) 83.999 -> 82.796 ( -1.43%) [ +0.61% +0.20% +0.00% / -1.43% +7.63% +7.74%] index_select wrap : Elapsed 0.845 ms (84.512 ms / 100) 75.753 -> 75.026 ( -0.96%) [ +1.60% +0.38% +0.00% / -0.96% +7.17% +7.27%] index_select linear : Elapsed 0.770 ms (76.967 ms / 100) 78.188 -> 77.688 ( -0.64%) [ +0.00% +1.30% +0.34% / -0.64% +9.75% +9.74%] index_select reverse : Elapsed 0.782 ms (78.188 ms / 100) 77.811 -> 76.827 ( -1.26%) [ +0.05% +0.00% +0.23% / -1.26% +11.41% +11.24%] index_select skip64 : Elapsed 0.779 ms (77.852 ms / 100) 77.278 -> 77.330 ( +0.07%) [ +0.75% +0.77% +0.00% / +0.07% +10.48% +10.71%] index_select skip256 : Elapsed 0.779 ms (77.855 ms / 100) 78.236 -> 77.699 ( -0.69%) [ +0.00% +0.65% +0.04% / -0.69% +9.01% +9.06%] index_select spread : Elapsed 0.782 ms (78.236 ms / 100) 82.331 -> 82.400 ( +0.08%) [ +0.74% +0.03% +0.00% / +0.08% +11.71% +12.00%] index_select strided 3 : Elapsed 0.829 ms (82.941 ms / 100) 81.703 -> 81.075 ( -0.77%) [ +0.74% +0.00% +0.40% / -0.77% +12.15% +11.88%] index_select strided 5 : Elapsed 0.823 ms (82.305 ms / 100) 84.188 -> 83.313 ( -1.04%) [ +0.31% +0.53% +0.00% / -1.04% +9.47% +9.12%] index_select strided 7 : Elapsed 0.844 ms (84.445 ms / 100) 84.055 -> 83.447 ( -0.72%) [ +0.03% +0.00% +0.22% / -0.72% +11.05% +10.38%] index_select strided 8 : Elapsed 0.841 ms (84.080 ms / 100) 80.719 -> 80.657 ( -0.08%) [ +1.19% +0.76% +0.00% / -0.08% +13.33% +13.94%] index_select random : Elapsed 0.817 ms (81.683 ms / 100) 77.870 -> 77.064 ( -1.04%) [ +0.86% +0.22% +0.00% / -1.04% +9.23% +8.78%] index_select random_sorted : Elapsed 0.785 ms (78.542 ms / 100) B = [250, 50, 150] (stride (1, 250, 12500)) A = [250, 50, 15] (stride (750, 1, 50)) dim = 2 5.180 -> 5.157 ( -0.44%) [ +0.00% +0.21% +0.06% / +0.14% -0.44% -0.14%] index_add_ linear : Elapsed 0.052 ms (5.180 ms / 100) 4.922 -> 4.929 ( +0.14%) [ +0.00% +0.16% +0.02% / +0.20% +0.14% +0.22%] index_copy_ linear : Elapsed 0.049 ms (4.922 ms / 100) 5.167 -> 5.168 ( +0.02%) [ +0.00% +0.27% +0.00% / +0.31% +0.02% +0.04%] index_add_ reverse : Elapsed 0.052 ms (5.167 ms / 100) 4.915 -> 4.932 ( +0.35%) [ +0.24% +0.04% +0.00% / +0.49% +0.35% +0.43%] index_copy_ reverse : Elapsed 0.049 ms (4.927 ms / 100) 5.247 -> 5.227 ( -0.38%) [ +0.17% +0.00% +0.34% / +0.04% -0.38% -0.30%] index_add_ spread : Elapsed 0.053 ms (5.256 ms / 100) 4.924 -> 4.934 ( +0.20%) [ +0.18% +0.12% +0.00% / +0.26% +0.35% +0.20%] index_copy_ spread : Elapsed 0.049 ms (4.933 ms / 100) 5.215 -> 5.200 ( -0.29%) [ +0.23% +0.00% +0.02% / +0.17% -0.29% -0.13%] index_add_ strided 7 : Elapsed 0.052 ms (5.227 ms / 100) 4.939 -> 4.939 ( +0.00%) [ +0.08% +0.00% +0.04% / +0.22% +0.00% +0.00%] index_copy_ strided 7 : Elapsed 0.049 ms (4.943 ms / 100) 5.247 -> 5.200 ( -0.90%) [ +0.08% +0.00% +0.06% / +0.15% -0.88% -0.90%] index_add_ perm : Elapsed 0.053 ms (5.251 ms / 100) 4.925 -> 4.930 ( +0.10%) [ +0.14% +0.00% +0.08% / +0.10% +0.26% +0.37%] index_copy_ perm : Elapsed 0.049 ms (4.932 ms / 100) 5.249 -> 5.216 ( -0.63%) [ +0.02% +0.06% +0.00% / +0.02% -0.44% -0.63%] index_add_ perm_sorted : Elapsed 0.052 ms (5.250 ms / 100) 4.931 -> 4.934 ( +0.06%) [ +0.00% +0.20% +0.12% / +0.28% +0.26% +0.06%] index_copy_ perm_sorted : Elapsed 0.049 ms (4.931 ms / 100) 37.968 -> 37.944 ( -0.06%) [ +0.40% +0.00% +0.92% / -0.06% +3.11% +4.13%] index_select const : Elapsed 0.381 ms (38.121 ms / 100) 56.912 -> 55.741 ( -2.06%) [ +1.36% +0.00% +0.60% / +0.46% -2.06% -1.63%] index_select wrap : Elapsed 0.577 ms (57.686 ms / 100) 43.439 -> 43.472 ( +0.08%) [ +0.00% +1.10% +1.54% / +0.08% +5.92% +7.02%] index_select linear : Elapsed 0.434 ms (43.439 ms / 100) 40.777 -> 40.536 ( -0.59%) [ +1.69% +0.00% +0.95% / -0.59% +5.39% +7.37%] index_select reverse : Elapsed 0.415 ms (41.467 ms / 100) 36.736 -> 36.999 ( +0.72%) [ +1.71% +0.00% +1.19% / +0.72% +6.94% +7.28%] index_select skip64 : Elapsed 0.374 ms (37.364 ms / 100) 42.948 -> 42.900 ( -0.11%) [ +1.22% +0.78% +0.00% / -0.11% +4.15% +3.11%] index_select skip256 : Elapsed 0.435 ms (43.472 ms / 100) 43.791 -> 43.047 ( -1.70%) [ +2.00% +0.00% +0.14% / -1.70% +2.21% +3.18%] index_select spread : Elapsed 0.447 ms (44.669 ms / 100) good 57.517 -> 54.462 ( -5.31%) [ +0.00% +0.71% +0.16% / -0.67% -5.31% -4.55%] index_select strided 3 : Elapsed 0.575 ms (57.517 ms / 100) 50.689 -> 51.049 ( +0.71%) [ +1.09% +1.26% +0.00% / +0.71% +5.06% +4.71%] index_select strided 5 : Elapsed 0.512 ms (51.243 ms / 100) 56.798 -> 56.145 ( -1.15%) [ +0.72% +0.38% +0.00% / -0.38% -0.74% -1.15%] index_select strided 7 : Elapsed 0.572 ms (57.207 ms / 100) 57.494 -> 55.834 ( -2.89%) [ +0.34% +0.40% +0.00% / -0.16% -1.25% -2.89%] index_select strided 8 : Elapsed 0.577 ms (57.688 ms / 100) 54.541 -> 54.109 ( -0.79%) [ +0.76% +0.04% +0.00% / -0.48% +0.57% -0.79%] index_select random : Elapsed 0.550 ms (54.956 ms / 100) 44.259 -> 43.252 ( -2.28%) [ +0.38% +0.00% +1.55% / -0.77% -1.83% -2.28%] index_select random_sorted : Elapsed 0.444 ms (44.425 ms / 100) out_shape = [250, 50, 150] in_shape = [15, 50, 150] idx_dim = 0 B = [250, 50, 150] (stride (7500, 1, 50)) A = [15, 50, 150] (stride (1, 15, 750)) dim = 0 3.521 -> 3.535 ( +0.40%) [ +0.17% +0.37% +0.00% / +0.40% +0.91% +0.74%] index_add_ linear : Elapsed 0.035 ms (3.527 ms / 100) 3.476 -> 3.489 ( +0.37%) [ +0.00% +0.00% +0.03% / +0.37% +0.55% +0.40%] index_copy_ linear : Elapsed 0.035 ms (3.476 ms / 100) 3.529 -> 3.542 ( +0.37%) [ +0.00% +0.03% +0.23% / +0.37% +0.45% +0.60%] index_add_ reverse : Elapsed 0.035 ms (3.529 ms / 100) 3.476 -> 3.489 ( +0.37%) [ +0.17% +0.00% +0.12% / +0.37% +0.49% +0.52%] index_copy_ reverse : Elapsed 0.035 ms (3.482 ms / 100) 3.541 -> 3.546 ( +0.14%) [ +0.14% +0.00% +0.08% / +0.14% +0.79% +0.62%] index_add_ spread : Elapsed 0.035 ms (3.546 ms / 100) 3.495 -> 3.500 ( +0.14%) [ +0.00% +0.40% +0.09% / +0.29% +0.14% +0.43%] index_copy_ spread : Elapsed 0.035 ms (3.495 ms / 100) 3.531 -> 3.539 ( +0.23%) [ +0.14% +0.28% +0.00% / +0.23% +1.10% +1.13%] index_add_ strided 3 : Elapsed 0.035 ms (3.536 ms / 100) 3.474 -> 3.483 ( +0.26%) [ +0.20% +0.06% +0.00% / +0.26% +0.98% +0.98%] index_copy_ strided 3 : Elapsed 0.035 ms (3.481 ms / 100) 3.575 -> 3.557 ( -0.50%) [ +0.25% +0.31% +0.00% / +0.28% -0.50% -0.50%] index_add_ strided 7 : Elapsed 0.036 ms (3.584 ms / 100) 3.520 -> 3.494 ( -0.74%) [ +0.31% +0.00% +0.06% / +0.26% -0.54% -0.74%] index_copy_ strided 7 : Elapsed 0.035 ms (3.531 ms / 100) 3.567 -> 3.549 ( -0.50%) [ +0.00% +0.03% +0.08% / +0.36% -0.50% -0.25%] index_add_ perm : Elapsed 0.036 ms (3.567 ms / 100) 3.494 -> 3.484 ( -0.29%) [ +0.17% +0.00% +0.20% / +0.52% +0.03% -0.29%] index_copy_ perm : Elapsed 0.035 ms (3.500 ms / 100) 3.559 -> 3.553 ( -0.17%) [ +0.08% +0.14% +0.00% / +0.11% -0.14% -0.17%] index_add_ perm_sorted : Elapsed 0.036 ms (3.562 ms / 100) 3.490 -> 3.484 ( -0.17%) [ +0.00% +0.11% +0.29% / +0.43% -0.14% -0.17%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.490 ms / 100) 69.635 -> 68.565 ( -1.54%) [ +0.19% +0.22% +0.00% / +0.09% -0.18% -1.54%] index_select const : Elapsed 0.698 ms (69.765 ms / 100) 67.820 -> 67.783 ( -0.05%) [ +0.00% +0.55% +0.30% / +0.18% +0.32% -0.05%] index_select wrap : Elapsed 0.678 ms (67.820 ms / 100) 68.303 -> 68.101 ( -0.30%) [ +0.00% +0.17% +0.05% / -0.30% -0.28% -0.26%] index_select linear : Elapsed 0.683 ms (68.303 ms / 100) 69.448 -> 69.528 ( +0.12%) [ +0.49% +0.00% +0.91% / +0.16% +0.35% +0.12%] index_select reverse : Elapsed 0.698 ms (69.788 ms / 100) 69.539 -> 69.014 ( -0.75%) [ +0.28% +0.44% +0.00% / -0.14% -0.62% -0.75%] index_select skip64 : Elapsed 0.697 ms (69.731 ms / 100) 69.196 -> 68.750 ( -0.64%) [ +1.46% +1.08% +0.00% / -0.03% -0.64% -0.06%] index_select skip256 : Elapsed 0.702 ms (70.209 ms / 100) 68.001 -> 67.116 ( -1.30%) [ +0.32% +0.71% +0.00% / -1.13% -1.30% -0.99%] index_select spread : Elapsed 0.682 ms (68.219 ms / 100) 68.961 -> 69.164 ( +0.29%) [ +0.00% +1.12% +0.64% / +0.29% +0.56% +0.53%] index_select strided 3 : Elapsed 0.690 ms (68.961 ms / 100) 69.192 -> 69.061 ( -0.19%) [ +0.00% +1.25% +1.11% / -0.19% +2.33% +0.94%] index_select strided 5 : Elapsed 0.692 ms (69.192 ms / 100) 69.669 -> 69.635 ( -0.05%) [ +0.28% +0.00% +0.08% / -0.05% +1.62% +0.28%] index_select strided 7 : Elapsed 0.699 ms (69.865 ms / 100) 69.524 -> 69.460 ( -0.09%) [ +1.07% +0.00% +0.33% / -0.09% +1.50% +0.89%] index_select strided 8 : Elapsed 0.703 ms (70.268 ms / 100) 69.434 -> 69.525 ( +0.13%) [ +0.00% +0.38% +0.20% / +0.19% +0.29% +0.13%] index_select random : Elapsed 0.694 ms (69.434 ms / 100) 67.850 -> 67.108 ( -1.09%) [ +0.68% +0.00% +0.56% / -0.64% -1.07% -1.09%] index_select random_sorted : Elapsed 0.683 ms (68.309 ms / 100) B = [250, 50, 150] (stride (150, 37500, 1)) dim = 0 fill_cnt = 15 1.436 -> 1.430 ( -0.42%) [ +0.91% +0.42% +0.00% / +1.11% -0.42% -0.14%] index_fill_ const : Elapsed 0.014 ms (1.449 ms / 100) 1.574 -> 1.567 ( -0.44%) [ +0.00% +0.32% +0.19% / +0.64% -0.25% -0.44%] index_fill_ linear : Elapsed 0.016 ms (1.574 ms / 100) 1.559 -> 1.572 ( +0.83%) [ +0.19% +0.32% +0.00% / +0.96% +0.90% +0.83%] index_fill_ reverse : Elapsed 0.016 ms (1.562 ms / 100) 1.437 -> 1.434 ( -0.21%) [ +0.00% +0.42% +0.00% / +0.49% +0.35% -0.21%] index_fill_ skip64 : Elapsed 0.014 ms (1.437 ms / 100) 1.432 -> 1.438 ( +0.42%) [ +0.00% +0.14% +0.00% / +0.42% +0.42% +0.70%] index_fill_ skip256 : Elapsed 0.014 ms (1.432 ms / 100) 1.568 -> 1.580 ( +0.77%) [ +0.06% +0.00% +0.00% / +0.77% +1.40% +0.96%] index_fill_ spread : Elapsed 0.016 ms (1.569 ms / 100) 1.568 -> 1.575 ( +0.45%) [ +0.32% +0.38% +0.00% / +0.77% +0.83% +0.45%] index_fill_ strided 3 : Elapsed 0.016 ms (1.573 ms / 100) 1.566 -> 1.575 ( +0.57%) [ +0.19% +0.26% +0.00% / +0.57% +0.96% +0.96%] index_fill_ strided 5 : Elapsed 0.016 ms (1.569 ms / 100) 1.574 -> 1.567 ( -0.44%) [ +0.06% +1.40% +0.00% / +0.38% -0.44% -0.19%] index_fill_ strided 7 : Elapsed 0.016 ms (1.575 ms / 100) 1.568 -> 1.576 ( +0.51%) [ +0.13% +0.13% +0.00% / +0.51% +0.77% +0.57%] index_fill_ strided 8 : Elapsed 0.016 ms (1.570 ms / 100) 1.574 -> 1.572 ( -0.13%) [ +0.25% +0.00% +0.06% / +0.70% +0.25% -0.13%] index_fill_ strided 16 : Elapsed 0.016 ms (1.578 ms / 100) 1.565 -> 1.566 ( +0.06%) [ +0.38% +0.70% +0.00% / +0.45% +0.64% +0.06%] index_fill_ strided 64 : Elapsed 0.016 ms (1.571 ms / 100) 1.494 -> 1.496 ( +0.13%) [ +0.00% +0.07% +0.54% / +0.74% +0.33% +0.13%] index_fill_ strided 100 : Elapsed 0.015 ms (1.494 ms / 100) 1.564 -> 1.573 ( +0.58%) [ +0.00% +0.00% +0.13% / +0.58% +1.02% +1.15%] index_fill_ random : Elapsed 0.016 ms (1.564 ms / 100) 1.554 -> 1.569 ( +0.97%) [ +0.58% +0.26% +0.00% / +0.97% +1.42% +1.22%] index_fill_ random_sorted : Elapsed 0.016 ms (1.563 ms / 100) 1.564 -> 1.573 ( +0.58%) [ +0.19% +0.00% +0.13% / +0.58% +1.73% +1.28%] index_fill_ perm : Elapsed 0.016 ms (1.567 ms / 100) 1.568 -> 1.574 ( +0.38%) [ +0.00% +0.32% +0.13% / +0.38% +1.28% +0.89%] index_fill_ perm_sorted : Elapsed 0.016 ms (1.568 ms / 100) B = [250, 50, 150] (stride (150, 37500, 1)) A = [15, 50, 150] (stride (7500, 150, 1)) dim = 0 5.741 -> 5.772 ( +0.54%) [ +0.17% +0.00% +0.19% / +0.70% +0.57% +0.54%] index_add_ linear : Elapsed 0.058 ms (5.751 ms / 100) 5.556 -> 5.573 ( +0.31%) [ +0.13% +0.00% +0.04% / +0.59% +0.31% +0.47%] index_copy_ linear : Elapsed 0.056 ms (5.563 ms / 100) 5.758 -> 5.777 ( +0.33%) [ +0.00% +0.12% +0.02% / +0.50% +0.33% +0.45%] index_add_ reverse : Elapsed 0.058 ms (5.758 ms / 100) 5.561 -> 5.579 ( +0.32%) [ +0.00% +0.02% +0.11% / +0.52% +0.32% +0.36%] index_copy_ reverse : Elapsed 0.056 ms (5.561 ms / 100) 5.766 -> 5.769 ( +0.05%) [ +0.00% +0.00% +0.00% / +0.52% +0.16% +0.05%] index_add_ spread : Elapsed 0.058 ms (5.766 ms / 100) 5.583 -> 5.588 ( +0.09%) [ +0.04% +0.07% +0.00% / +0.50% +0.09% +0.29%] index_copy_ spread : Elapsed 0.056 ms (5.585 ms / 100) 5.755 -> 5.767 ( +0.21%) [ +0.00% +0.10% +0.00% / +0.61% +0.43% +0.21%] index_add_ strided 3 : Elapsed 0.058 ms (5.755 ms / 100) 5.572 -> 5.598 ( +0.47%) [ +0.00% +0.09% +0.13% / +0.59% +0.47% +0.48%] index_copy_ strided 3 : Elapsed 0.056 ms (5.572 ms / 100) 5.764 -> 5.782 ( +0.31%) [ +0.16% +0.16% +0.00% / +0.78% +0.31% +0.33%] index_add_ strided 7 : Elapsed 0.058 ms (5.773 ms / 100) 5.578 -> 5.595 ( +0.30%) [ +0.25% +0.30% +0.00% / +0.75% +0.30% +0.57%] index_copy_ strided 7 : Elapsed 0.056 ms (5.592 ms / 100) 5.769 -> 5.771 ( +0.03%) [ +0.00% +0.05% +0.00% / +0.47% +0.03% +0.19%] index_add_ perm : Elapsed 0.058 ms (5.769 ms / 100) 5.589 -> 5.599 ( +0.18%) [ +0.05% +0.09% +0.00% / +0.63% +0.20% +0.18%] index_copy_ perm : Elapsed 0.056 ms (5.592 ms / 100) 5.765 -> 5.786 ( +0.36%) [ +0.00% +0.07% +0.19% / +0.64% +0.36% +0.40%] index_add_ perm_sorted : Elapsed 0.058 ms (5.765 ms / 100) 5.591 -> 5.596 ( +0.09%) [ +0.02% +0.00% +0.00% / +0.43% +0.09% +0.16%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.592 ms / 100) 44.162 -> 44.230 ( +0.15%) [ +0.00% +0.05% +0.01% / +0.42% +0.20% +0.15%] index_select const : Elapsed 0.442 ms (44.162 ms / 100) 49.858 -> 49.959 ( +0.20%) [ +0.06% +0.00% +0.20% / +0.38% +0.27% +0.20%] index_select wrap : Elapsed 0.499 ms (49.888 ms / 100) 44.053 -> 44.296 ( +0.55%) [ +0.16% +0.00% +0.10% / +0.55% +1.40% +1.38%] index_select linear : Elapsed 0.441 ms (44.124 ms / 100) 44.444 -> 44.624 ( +0.41%) [ +0.11% +0.00% +0.13% / +0.41% +1.42% +1.51%] index_select reverse : Elapsed 0.445 ms (44.493 ms / 100) 44.127 -> 44.191 ( +0.15%) [ +0.09% +0.06% +0.00% / +0.47% +0.15% +0.24%] index_select skip64 : Elapsed 0.442 ms (44.165 ms / 100) 44.195 -> 44.242 ( +0.11%) [ +0.08% +0.00% +0.08% / +0.55% +0.14% +0.11%] index_select skip256 : Elapsed 0.442 ms (44.232 ms / 100) 45.484 -> 45.312 ( -0.38%) [ +0.00% +0.05% +0.11% / +0.30% -0.38% -0.21%] index_select spread : Elapsed 0.455 ms (45.484 ms / 100) 47.817 -> 47.703 ( -0.24%) [ +0.05% +0.00% +0.01% / +0.16% -0.24% -0.18%] index_select strided 3 : Elapsed 0.478 ms (47.842 ms / 100) 46.390 -> 45.496 ( -1.93%) [ +0.07% +0.00% +0.16% / +0.39% -1.92% -1.93%] index_select strided 5 : Elapsed 0.464 ms (46.421 ms / 100) 49.871 -> 50.016 ( +0.29%) [ +0.18% +0.00% +0.18% / +0.29% +0.45% +0.31%] index_select strided 7 : Elapsed 0.500 ms (49.959 ms / 100) 49.905 -> 50.098 ( +0.39%) [ +0.18% +0.00% +0.15% / +0.39% +0.45% +0.60%] index_select strided 8 : Elapsed 0.500 ms (49.997 ms / 100) 49.089 -> 49.201 ( +0.23%) [ +0.00% +0.00% +0.19% / +0.23% +0.47% +0.37%] index_select random : Elapsed 0.491 ms (49.091 ms / 100) 45.449 -> 45.360 ( -0.20%) [ +0.09% +0.00% +0.22% / +0.41% -0.20% +0.02%] index_select random_sorted : Elapsed 0.455 ms (45.491 ms / 100) B = [250, 50, 150] (stride (150, 37500, 1)) A = [15, 50, 150] (stride (7500, 1, 50)) dim = 0 3.266 -> 3.263 ( -0.09%) [ +0.00% +0.49% +0.52% / +0.43% +0.12% -0.09%] index_add_ linear : Elapsed 0.033 ms (3.266 ms / 100) 3.037 -> 3.039 ( +0.07%) [ +0.00% +0.10% +0.10% / +0.23% +0.10% +0.07%] index_copy_ linear : Elapsed 0.030 ms (3.037 ms / 100) 3.253 -> 3.278 ( +0.77%) [ +0.00% +0.28% +0.15% / +1.44% +0.77% +1.32%] index_add_ reverse : Elapsed 0.033 ms (3.253 ms / 100) 3.035 -> 3.037 ( +0.07%) [ +0.10% +0.26% +0.00% / +0.23% +0.07% +0.40%] index_copy_ reverse : Elapsed 0.030 ms (3.038 ms / 100) 3.244 -> 3.260 ( +0.49%) [ +0.03% +0.00% +0.83% / +0.49% +1.70% +0.92%] index_add_ spread : Elapsed 0.032 ms (3.245 ms / 100) 3.039 -> 3.046 ( +0.23%) [ +0.00% +0.10% +0.26% / +0.26% +0.23% +0.69%] index_copy_ spread : Elapsed 0.030 ms (3.039 ms / 100) 3.252 -> 3.262 ( +0.31%) [ +0.00% +0.40% +0.77% / +2.34% +0.31% +0.83%] index_add_ strided 3 : Elapsed 0.033 ms (3.252 ms / 100) 3.039 -> 3.038 ( -0.03%) [ +0.20% +0.00% +0.00% / +0.33% -0.03% +0.36%] index_copy_ strided 3 : Elapsed 0.030 ms (3.045 ms / 100) 3.241 -> 3.246 ( +0.15%) [ +0.25% +0.00% +0.00% / +0.49% +0.15% +0.62%] index_add_ strided 7 : Elapsed 0.032 ms (3.249 ms / 100) 3.044 -> 3.042 ( -0.07%) [ +0.00% +0.07% +0.07% / +0.39% -0.07% -0.03%] index_copy_ strided 7 : Elapsed 0.030 ms (3.044 ms / 100) 3.250 -> 3.250 ( +0.00%) [ +0.00% +0.34% +0.92% / +1.08% +0.25% +0.00%] index_add_ perm : Elapsed 0.032 ms (3.250 ms / 100) 3.042 -> 3.043 ( +0.03%) [ +0.20% +0.30% +0.00% / +0.46% +0.07% +0.03%] index_copy_ perm : Elapsed 0.030 ms (3.048 ms / 100) 3.252 -> 3.255 ( +0.09%) [ +0.00% +0.06% +0.03% / +0.31% +0.09% +0.58%] index_add_ perm_sorted : Elapsed 0.033 ms (3.252 ms / 100) 3.041 -> 3.040 ( -0.03%) [ +0.20% +0.00% +0.03% / +0.36% +0.23% -0.03%] index_copy_ perm_sorted : Elapsed 0.030 ms (3.047 ms / 100) 20.834 -> 20.657 ( -0.85%) [ +0.00% +0.25% +0.02% / +0.10% -0.85% -0.78%] index_select const : Elapsed 0.208 ms (20.834 ms / 100) 28.004 -> 27.716 ( -1.03%) [ +0.29% +0.22% +0.00% / +0.11% -1.03% -1.01%] index_select wrap : Elapsed 0.281 ms (28.085 ms / 100) 21.436 -> 21.078 ( -1.67%) [ +0.07% +0.22% +0.00% / +0.35% -1.67% -1.61%] index_select linear : Elapsed 0.215 ms (21.451 ms / 100) 21.366 -> 21.441 ( +0.35%) [ +0.06% +0.01% +0.00% / +0.35% +1.61% +1.69%] index_select reverse : Elapsed 0.214 ms (21.378 ms / 100) 21.218 -> 20.667 ( -2.60%) [ +0.21% +0.00% +0.06% / +0.12% -2.56% -2.60%] index_select skip64 : Elapsed 0.213 ms (21.262 ms / 100) 20.721 -> 20.731 ( +0.05%) [ +0.20% +0.00% +0.07% / +0.05% +1.94% +2.08%] index_select skip256 : Elapsed 0.208 ms (20.762 ms / 100) 21.016 -> 21.002 ( -0.07%) [ +0.45% +0.00% +0.15% / +0.69% -0.07% +0.35%] index_select spread : Elapsed 0.211 ms (21.111 ms / 100) 22.668 -> 22.780 ( +0.49%) [ +0.36% +0.00% +0.00% / +0.49% +0.50% +1.65%] index_select strided 3 : Elapsed 0.227 ms (22.749 ms / 100) 21.816 -> 21.404 ( -1.89%) [ +0.22% +0.24% +0.00% / +0.14% -1.89% -1.84%] index_select strided 5 : Elapsed 0.219 ms (21.864 ms / 100) 27.817 -> 27.469 ( -1.25%) [ +0.28% +0.02% +0.00% / +0.20% -1.25% -0.93%] index_select strided 7 : Elapsed 0.279 ms (27.894 ms / 100) 27.726 -> 27.431 ( -1.06%) [ +0.22% +0.22% +0.00% / +0.51% -1.06% -0.68%] index_select strided 8 : Elapsed 0.278 ms (27.786 ms / 100) 26.710 -> 26.490 ( -0.82%) [ +0.00% +0.34% +0.65% / -0.01% -0.65% -0.82%] index_select random : Elapsed 0.267 ms (26.710 ms / 100) 21.506 -> 21.055 ( -2.10%) [ +0.34% +0.47% +0.00% / +0.17% -1.93% -2.10%] index_select random_sorted : Elapsed 0.216 ms (21.580 ms / 100) B = [250, 50, 150] (stride (1, 250, 12500)) A = [15, 50, 150] (stride (1, 2250, 15)) dim = 0 9.926 -> 9.964 ( +0.38%) [ +0.05% +0.00% +0.12% / +0.38% +2.54% +4.17%] index_add_ linear : Elapsed 0.099 ms (9.931 ms / 100) 8.326 -> 8.360 ( +0.41%) [ +0.05% +0.00% +0.06% / +0.41% +1.74% +1.73%] index_copy_ linear : Elapsed 0.083 ms (8.330 ms / 100) 9.881 -> 9.924 ( +0.44%) [ +0.07% +0.00% +0.22% / +0.44% +3.20% +3.11%] index_add_ reverse : Elapsed 0.099 ms (9.888 ms / 100) 8.304 -> 8.333 ( +0.35%) [ +0.04% +0.00% +0.01% / +0.35% +2.12% +2.07%] index_copy_ reverse : Elapsed 0.083 ms (8.307 ms / 100) 20.056 -> 20.052 ( -0.02%) [ +0.00% +0.06% +0.16% / +0.12% -0.02% -0.01%] index_add_ spread : Elapsed 0.201 ms (20.056 ms / 100) 13.418 -> 13.379 ( -0.29%) [ +0.00% +0.00% +0.41% / +0.57% -0.29% -0.28%] index_copy_ spread : Elapsed 0.134 ms (13.418 ms / 100) 11.728 -> 11.717 ( -0.09%) [ +0.02% +0.00% +0.14% / +0.11% -0.09% -0.07%] index_add_ strided 3 : Elapsed 0.117 ms (11.730 ms / 100) 8.744 -> 8.785 ( +0.47%) [ +0.08% +0.00% +0.17% / +0.47% +0.62% +0.78%] index_copy_ strided 3 : Elapsed 0.088 ms (8.751 ms / 100) 16.796 -> 16.809 ( +0.08%) [ +0.01% +0.00% +0.14% / +0.15% +0.23% +0.08%] index_add_ strided 7 : Elapsed 0.168 ms (16.797 ms / 100) 11.428 -> 11.423 ( -0.04%) [ +0.14% +0.00% +0.23% / +0.51% -0.04% +0.10%] index_copy_ strided 7 : Elapsed 0.114 ms (11.444 ms / 100) 21.965 -> 22.010 ( +0.20%) [ +0.00% +0.02% +0.10% / +0.20% +0.30% +0.26%] index_add_ perm : Elapsed 0.220 ms (21.965 ms / 100) 14.776 -> 14.817 ( +0.28%) [ +0.00% +0.04% +0.39% / +0.28% +0.29% +0.30%] index_copy_ perm : Elapsed 0.148 ms (14.776 ms / 100) 18.743 -> 18.793 ( +0.27%) [ +0.00% +0.20% +0.02% / +0.27% +0.52% +0.36%] index_add_ perm_sorted : Elapsed 0.187 ms (18.743 ms / 100) 12.716 -> 12.696 ( -0.16%) [ +0.00% +0.07% +0.09% / +0.31% -0.16% -0.08%] index_copy_ perm_sorted : Elapsed 0.127 ms (12.716 ms / 100) 136.188 -> 135.224 ( -0.71%) [ +0.00% +0.45% +0.27% / -0.06% -0.63% -0.71%] index_select const : Elapsed 1.362 ms (136.188 ms / 100) 139.016 -> 137.638 ( -0.99%) [ +0.58% +0.46% +0.00% / -0.05% -0.44% -0.99%] index_select wrap : Elapsed 1.398 ms (139.824 ms / 100) 136.153 -> 134.984 ( -0.86%) [ +0.28% +0.43% +0.00% / +0.17% -0.86% -0.86%] index_select linear : Elapsed 1.365 ms (136.539 ms / 100) 136.303 -> 134.882 ( -1.04%) [ +0.00% +0.11% +0.23% / +0.10% -0.56% -1.04%] index_select reverse : Elapsed 1.363 ms (136.303 ms / 100) 136.177 -> 134.699 ( -1.09%) [ +0.24% +0.06% +0.00% / -0.11% -0.99% -1.09%] index_select skip64 : Elapsed 1.365 ms (136.501 ms / 100) 135.152 -> 134.624 ( -0.39%) [ +0.27% +0.00% +0.15% / +0.26% -0.39% -0.29%] index_select skip256 : Elapsed 1.355 ms (135.512 ms / 100) 136.609 -> 135.128 ( -1.08%) [ +0.00% +0.29% +0.09% / -0.25% -0.92% -1.08%] index_select spread : Elapsed 1.366 ms (136.609 ms / 100) 139.979 -> 138.487 ( -1.07%) [ +0.03% +0.20% +0.00% / -0.09% -1.07% -0.59%] index_select strided 3 : Elapsed 1.400 ms (140.017 ms / 100) 139.229 -> 138.552 ( -0.49%) [ +0.38% +0.00% +0.50% / +0.60% -0.27% -0.49%] index_select strided 5 : Elapsed 1.398 ms (139.756 ms / 100) 139.330 -> 138.179 ( -0.83%) [ +0.64% +0.00% +0.57% / +0.30% -0.76% -0.83%] index_select strided 7 : Elapsed 1.402 ms (140.219 ms / 100) 139.370 -> 139.234 ( -0.10%) [ +0.71% +0.31% +0.00% / +0.64% -0.06% -0.10%] index_select strided 8 : Elapsed 1.404 ms (140.366 ms / 100) 139.658 -> 138.384 ( -0.91%) [ +0.32% +0.00% +0.41% / +0.12% -0.63% -0.91%] index_select random : Elapsed 1.401 ms (140.101 ms / 100) 136.215 -> 135.071 ( -0.84%) [ +0.42% +0.40% +0.00% / +0.55% -0.54% -0.84%] index_select random_sorted : Elapsed 1.368 ms (136.787 ms / 100) out_shape = [15, 250, 150] in_shape = [15, 50, 150] idx_dim = 1 B = [15, 250, 150] (stride (37500, 1, 250)) A = [15, 50, 150] (stride (7500, 150, 1)) dim = 1 Good 6.998 -> 6.186 (-11.60%) [ +0.61% +0.00% +0.47% / -11.22% -11.53% -11.60%] index_add_ linear : Elapsed 0.070 ms (7.041 ms / 100) 6.204 -> 6.033 ( -2.76%) [ +0.16% +0.05% +0.00% / -2.76% -2.45% -2.56%] index_copy_ linear : Elapsed 0.062 ms (6.214 ms / 100) Good 7.013 -> 6.188 (-11.76%) [ +0.10% +0.00% +0.44% / -11.56% -11.72% -11.76%] index_add_ reverse : Elapsed 0.070 ms (7.020 ms / 100) 6.205 -> 6.034 ( -2.76%) [ +0.23% +0.00% +0.06% / -2.76% -2.51% -2.58%] index_copy_ reverse : Elapsed 0.062 ms (6.219 ms / 100) GOOD 10.438 -> 7.936 (-23.97%) [ +0.29% +0.00% +0.39% / -23.97% -23.87% -23.94%] index_add_ spread : Elapsed 0.105 ms (10.468 ms / 100) Good 9.011 -> 8.034 (-10.84%) [ +0.00% +0.18% +0.20% / -10.79% -10.72% -10.84%] index_copy_ spread : Elapsed 0.090 ms (9.011 ms / 100) Good 8.887 -> 7.170 (-19.32%) [ +0.00% +0.37% +0.60% / -19.24% -19.32% -19.22%] index_add_ strided 3 : Elapsed 0.089 ms (8.887 ms / 100) good 7.754 -> 7.223 ( -6.85%) [ +0.00% +0.35% +0.52% / -6.85% -6.85% -6.60%] index_copy_ strided 3 : Elapsed 0.078 ms (7.754 ms / 100) GOOD 11.676 -> 7.985 (-31.61%) [ +0.00% +1.15% +0.92% / -31.53% -31.61% -31.59%] index_add_ strided 7 : Elapsed 0.117 ms (11.676 ms / 100) GOOD 10.248 -> 8.080 (-21.16%) [ +0.00% +0.33% +0.41% / -21.00% -21.15% -21.16%] index_copy_ strided 7 : Elapsed 0.102 ms (10.248 ms / 100) GOOD 13.568 -> 7.964 (-41.30%) [ +0.13% +0.29% +0.00% / -41.16% -41.20% -41.30%] index_add_ perm : Elapsed 0.136 ms (13.586 ms / 100) GOOD 11.505 -> 7.885 (-31.46%) [ +0.09% +0.00% +0.08% / -31.46% -31.41% -31.32%] index_copy_ perm : Elapsed 0.115 ms (11.515 ms / 100) GOOD 10.255 -> 7.832 (-23.63%) [ +0.07% +0.00% +0.20% / -23.56% -23.55% -23.63%] index_add_ perm_sorted : Elapsed 0.103 ms (10.262 ms / 100) Good 8.936 -> 7.831 (-12.37%) [ +0.00% +0.07% +0.12% / -12.33% -12.37% -12.29%] index_copy_ perm_sorted : Elapsed 0.089 ms (8.936 ms / 100) GOOD 11.550 -> 8.337 (-27.82%) [ +0.00% +1.08% +0.83% / -26.97% -27.63% -27.82%] index_select const : Elapsed 0.115 ms (11.550 ms / 100) GOOD 13.946 -> 9.718 (-30.32%) [ +0.00% +0.22% +0.03% / -29.54% -30.18% -30.32%] index_select wrap : Elapsed 0.139 ms (13.946 ms / 100) GOOD 11.940 -> 8.836 (-26.00%) [ +0.72% +0.00% +0.28% / -26.00% -25.28% -25.38%] index_select linear : Elapsed 0.120 ms (12.026 ms / 100) GOOD 12.105 -> 8.837 (-27.00%) [ +0.50% +0.74% +0.00% / -27.00% -26.18% -26.20%] index_select reverse : Elapsed 0.122 ms (12.166 ms / 100) GOOD 11.644 -> 8.312 (-28.62%) [ +0.62% +0.00% +0.24% / -28.22% -28.30% -28.62%] index_select skip64 : Elapsed 0.117 ms (11.716 ms / 100) GOOD 11.677 -> 8.342 (-28.56%) [ +1.24% +0.66% +0.00% / -28.56% -27.74% -27.57%] index_select skip256 : Elapsed 0.118 ms (11.822 ms / 100) GOOD 11.986 -> 8.930 (-25.50%) [ +0.73% +0.00% +1.68% / -25.50% -25.48% -25.35%] index_select spread : Elapsed 0.121 ms (12.073 ms / 100) GOOD 13.958 -> 9.519 (-31.80%) [ +0.08% +0.00% +0.36% / -31.80% -30.33% -30.36%] index_select strided 3 : Elapsed 0.140 ms (13.969 ms / 100) GOOD 12.763 -> 8.546 (-33.04%) [ +0.78% +0.34% +0.00% / -32.30% -33.04% -32.93%] index_select strided 5 : Elapsed 0.129 ms (12.862 ms / 100) GOOD 13.618 -> 9.710 (-28.70%) [ +0.76% +0.00% +0.41% / -28.14% -28.63% -28.70%] index_select strided 7 : Elapsed 0.137 ms (13.722 ms / 100) GOOD 13.579 -> 9.390 (-30.85%) [ +0.82% +0.74% +0.00% / -30.85% -30.29% -30.36%] index_select strided 8 : Elapsed 0.137 ms (13.690 ms / 100) GOOD 13.775 -> 9.135 (-33.68%) [ +0.36% +0.00% +0.44% / -33.68% -32.03% -32.02%] index_select strided 16 : Elapsed 0.138 ms (13.825 ms / 100) GOOD 13.650 -> 9.110 (-33.26%) [ +0.30% +0.00% +0.29% / -32.43% -33.26% -33.10%] index_select random : Elapsed 0.137 ms (13.691 ms / 100) GOOD 12.210 -> 8.821 (-27.76%) [ +0.56% +0.66% +0.00% / -27.76% -26.36% -26.45%] index_select random_sorted : Elapsed 0.123 ms (12.278 ms / 100) B = [15, 250, 150] (stride (1, 2250, 15)) A = [15, 50, 150] (stride (7500, 1, 50)) dim = 1 6.518 -> 6.480 ( -0.58%) [ +0.05% +0.00% +0.02% / -0.58% +0.06% -0.37%] index_add_ linear : Elapsed 0.065 ms (6.521 ms / 100) 6.245 -> 6.215 ( -0.48%) [ +0.00% +0.14% +0.18% / -0.48% -0.08% -0.26%] index_copy_ linear : Elapsed 0.062 ms (6.245 ms / 100) 6.528 -> 6.486 ( -0.64%) [ +0.37% +0.00% +0.11% / -0.46% -0.51% -0.64%] index_add_ reverse : Elapsed 0.066 ms (6.552 ms / 100) 6.231 -> 6.222 ( -0.14%) [ +0.42% +0.00% +0.30% / +0.06% -0.02% -0.14%] index_copy_ reverse : Elapsed 0.063 ms (6.257 ms / 100) 6.541 -> 6.499 ( -0.64%) [ +0.00% +0.32% +0.21% / -0.18% -0.55% -0.64%] index_add_ spread : Elapsed 0.065 ms (6.541 ms / 100) 6.272 -> 6.226 ( -0.73%) [ +0.00% +0.13% +0.37% / -0.37% -0.72% -0.73%] index_copy_ spread : Elapsed 0.063 ms (6.272 ms / 100) 6.537 -> 6.511 ( -0.40%) [ +0.00% +0.24% +0.02% / -0.40% -0.18% -0.31%] index_add_ strided 3 : Elapsed 0.065 ms (6.537 ms / 100) 6.267 -> 6.234 ( -0.53%) [ +0.00% +0.45% +0.19% / -0.53% -0.30% -0.43%] index_copy_ strided 3 : Elapsed 0.063 ms (6.267 ms / 100) 6.546 -> 6.513 ( -0.50%) [ +0.00% +0.21% +0.12% / -0.50% -0.46% -0.38%] index_add_ strided 7 : Elapsed 0.065 ms (6.546 ms / 100) 6.265 -> 6.217 ( -0.77%) [ +0.13% +0.00% +0.69% / -0.46% -0.77% -0.72%] index_copy_ strided 7 : Elapsed 0.063 ms (6.273 ms / 100) 6.619 -> 6.512 ( -1.62%) [ +0.18% +0.33% +0.00% / -0.47% -1.62% -1.41%] index_add_ perm : Elapsed 0.066 ms (6.631 ms / 100) 6.297 -> 6.227 ( -1.11%) [ +0.06% +0.00% +0.19% / -0.79% -1.02% -1.11%] index_copy_ perm : Elapsed 0.063 ms (6.301 ms / 100) 6.623 -> 6.539 ( -1.27%) [ +0.35% +0.00% +0.15% / -0.08% -1.27% -1.04%] index_add_ perm_sorted : Elapsed 0.066 ms (6.646 ms / 100) 6.288 -> 6.239 ( -0.78%) [ +0.75% +0.00% +0.32% / -0.33% -0.72% -0.78%] index_copy_ perm_sorted : Elapsed 0.063 ms (6.335 ms / 100) 12.712 -> 12.237 ( -3.74%) [ +2.44% +1.71% +0.00% / +1.52% -3.74% -2.79%] index_select const : Elapsed 0.130 ms (13.022 ms / 100) 14.084 -> 14.063 ( -0.15%) [ +2.25% +1.27% +0.00% / +0.21% +0.97% -0.15%] index_select wrap : Elapsed 0.144 ms (14.401 ms / 100) 12.553 -> 12.166 ( -3.08%) [ +1.43% +0.00% +0.33% / +0.00% -3.08% -2.21%] index_select linear : Elapsed 0.127 ms (12.733 ms / 100) 13.356 -> 12.727 ( -4.71%) [ +0.00% +1.23% +1.55% / +0.16% -4.71% -4.52%] index_select reverse : Elapsed 0.134 ms (13.356 ms / 100) good 12.898 -> 12.226 ( -5.21%) [ +0.00% +0.83% +0.24% / +0.21% -4.78% -5.21%] index_select skip64 : Elapsed 0.129 ms (12.898 ms / 100) 12.635 -> 12.173 ( -3.66%) [ +0.58% +0.14% +0.00% / +0.12% -3.66% -3.00%] index_select skip256 : Elapsed 0.127 ms (12.708 ms / 100) 12.888 -> 12.722 ( -1.29%) [ +0.85% +0.85% +0.00% / +0.21% -1.29% -1.23%] index_select spread : Elapsed 0.130 ms (12.997 ms / 100) 14.922 -> 14.905 ( -0.11%) [ +0.12% +1.13% +0.00% / +0.56% +0.84% -0.11%] index_select strided 3 : Elapsed 0.149 ms (14.940 ms / 100) 15.058 -> 14.954 ( -0.69%) [ +0.68% +0.51% +0.00% / +1.27% -0.69% +0.92%] index_select strided 5 : Elapsed 0.152 ms (15.160 ms / 100) 15.048 -> 14.854 ( -1.29%) [ +0.79% +2.01% +0.00% / +0.18% -0.72% -1.29%] index_select strided 7 : Elapsed 0.152 ms (15.167 ms / 100) 15.222 -> 14.865 ( -2.35%) [ +0.00% +0.78% +1.20% / +0.07% -2.35% -1.05%] index_select strided 8 : Elapsed 0.152 ms (15.222 ms / 100) 14.865 -> 14.899 ( +0.23%) [ +0.00% +0.32% +0.96% / +0.63% +0.23% +1.13%] index_select strided 16 : Elapsed 0.149 ms (14.865 ms / 100) 15.018 -> 14.810 ( -1.39%) [ +0.95% +0.00% +0.17% / +0.93% -0.92% -1.39%] index_select random : Elapsed 0.152 ms (15.160 ms / 100) 12.814 -> 12.793 ( -0.16%) [ +0.83% +0.30% +0.00% / +0.43% -0.16% +0.48%] index_select random_sorted : Elapsed 0.129 ms (12.920 ms / 100) B = [15, 250, 150] (stride (250, 1, 3750)) A = [15, 50, 150] (stride (150, 2250, 1)) dim = 1 good 6.910 -> 6.265 ( -9.33%) [ +0.17% +0.26% +0.00% / -8.84% -9.03% -9.33%] index_add_ linear : Elapsed 0.069 ms (6.922 ms / 100) 5.975 -> 6.076 ( +1.69%) [ +0.00% +0.10% +0.17% / +2.14% +1.81% +1.69%] index_copy_ linear : Elapsed 0.060 ms (5.975 ms / 100) good 6.908 -> 6.262 ( -9.35%) [ +0.00% +0.32% +0.28% / -8.64% -9.13% -9.35%] index_add_ reverse : Elapsed 0.069 ms (6.908 ms / 100) 5.966 -> 6.070 ( +1.74%) [ +0.00% +0.67% +0.13% / +2.21% +1.86% +1.74%] index_copy_ reverse : Elapsed 0.060 ms (5.966 ms / 100) GOOD 10.253 -> 7.987 (-22.10%) [ +0.00% +0.08% +0.18% / -21.07% -21.96% -22.10%] index_add_ spread : Elapsed 0.103 ms (10.253 ms / 100) good 8.717 -> 8.109 ( -6.97%) [ +0.14% +0.46% +0.00% / -6.47% -6.93% -6.97%] index_copy_ spread : Elapsed 0.087 ms (8.729 ms / 100) Good 8.785 -> 7.160 (-18.50%) [ +0.24% +0.00% +0.28% / -17.30% -18.50% -18.50%] index_add_ strided 3 : Elapsed 0.088 ms (8.806 ms / 100) 7.525 -> 7.245 ( -3.72%) [ +0.17% +0.00% +0.13% / -3.08% -3.53% -3.72%] index_copy_ strided 3 : Elapsed 0.075 ms (7.538 ms / 100) GOOD 11.836 -> 8.017 (-32.27%) [ +0.82% +0.00% +0.17% / -31.45% -32.27% -32.07%] index_add_ strided 7 : Elapsed 0.119 ms (11.933 ms / 100) Good 10.033 -> 8.121 (-19.06%) [ +1.27% +0.00% +0.57% / -18.50% -19.06% -18.99%] index_copy_ strided 7 : Elapsed 0.102 ms (10.160 ms / 100) GOOD 14.191 -> 7.933 (-44.10%) [ +0.15% +0.00% +0.14% / -43.34% -43.97% -44.10%] index_add_ perm : Elapsed 0.142 ms (14.212 ms / 100) GOOD 11.544 -> 7.878 (-31.76%) [ +0.17% +0.00% +0.09% / -31.09% -31.57% -31.76%] index_copy_ perm : Elapsed 0.116 ms (11.564 ms / 100) GOOD 9.851 -> 7.815 (-20.67%) [ +0.68% +0.37% +0.00% / -19.95% -20.49% -20.67%] index_add_ perm_sorted : Elapsed 0.099 ms (9.918 ms / 100) good 8.390 -> 7.865 ( -6.26%) [ +0.00% +0.19% +0.01% / -6.05% -6.25% -6.26%] index_copy_ perm_sorted : Elapsed 0.084 ms (8.390 ms / 100) GOOD 11.472 -> 8.492 (-25.98%) [ +0.00% +0.37% +0.73% / -25.05% -25.65% -25.98%] index_select const : Elapsed 0.115 ms (11.472 ms / 100) GOOD 13.208 -> 9.946 (-24.70%) [ +0.40% +0.42% +0.00% / -24.69% -24.70% -24.57%] index_select wrap : Elapsed 0.133 ms (13.261 ms / 100) Good 11.452 -> 9.288 (-18.90%) [ +0.41% +0.00% +0.52% / -17.79% -18.90% -18.79%] index_select linear : Elapsed 0.115 ms (11.499 ms / 100) GOOD 11.966 -> 9.261 (-22.61%) [ +0.00% +0.14% +0.64% / -21.79% -22.50% -22.61%] index_select reverse : Elapsed 0.120 ms (11.966 ms / 100) GOOD 11.166 -> 8.504 (-23.84%) [ +0.42% +0.00% +0.16% / -21.89% -23.84% -23.77%] index_select skip64 : Elapsed 0.112 ms (11.213 ms / 100) GOOD 11.381 -> 8.556 (-24.82%) [ +1.53% +0.00% +0.18% / -24.54% -24.72% -24.82%] index_select skip256 : Elapsed 0.116 ms (11.555 ms / 100) GOOD 11.985 -> 9.203 (-23.21%) [ +0.40% +0.26% +0.00% / -22.31% -23.21% -23.15%] index_select spread : Elapsed 0.120 ms (12.033 ms / 100) GOOD 13.595 -> 9.810 (-27.84%) [ +0.00% +0.04% +0.29% / -27.84% -26.31% -26.26%] index_select strided 3 : Elapsed 0.136 ms (13.595 ms / 100) GOOD 12.254 -> 8.764 (-28.48%) [ +1.40% +0.00% +0.34% / -27.71% -28.31% -28.48%] index_select strided 5 : Elapsed 0.124 ms (12.426 ms / 100) GOOD 13.488 -> 9.791 (-27.41%) [ +0.00% +0.43% +1.00% / -27.41% -25.56% -25.67%] index_select strided 7 : Elapsed 0.135 ms (13.488 ms / 100) GOOD 12.937 -> 9.490 (-26.64%) [ +0.00% +0.12% +0.81% / -25.14% -26.64% -26.64%] index_select strided 8 : Elapsed 0.129 ms (12.937 ms / 100) GOOD 13.049 -> 9.472 (-27.41%) [ +0.00% +0.26% +0.51% / -27.41% -26.21% -26.21%] index_select strided 16 : Elapsed 0.130 ms (13.049 ms / 100) GOOD 13.258 -> 9.488 (-28.44%) [ +0.80% +1.42% +0.00% / -27.92% -28.33% -28.44%] index_select random : Elapsed 0.134 ms (13.364 ms / 100) GOOD 12.134 -> 9.204 (-24.15%) [ +0.26% +0.00% +0.23% / -23.27% -24.00% -24.15%] index_select random_sorted : Elapsed 0.122 ms (12.166 ms / 100) B = [15, 250, 150] (stride (1, 15, 3750)) dim = 1 fill_cnt = 50 3.215 -> 3.176 ( -1.21%) [ +0.00% +0.16% +0.12% / -0.25% -1.15% -1.21%] index_fill_ const : Elapsed 0.032 ms (3.215 ms / 100) 3.233 -> 3.186 ( -1.45%) [ +0.09% +0.00% +0.34% / +0.12% -1.33% -1.45%] index_fill_ linear : Elapsed 0.032 ms (3.236 ms / 100) 3.191 -> 3.179 ( -0.38%) [ +0.25% +0.28% +0.00% / -0.16% -0.38% -0.22%] index_fill_ reverse : Elapsed 0.032 ms (3.199 ms / 100) 3.262 -> 3.232 ( -0.92%) [ +0.00% +0.09% +0.31% / -0.28% -0.92% -0.64%] index_fill_ skip64 : Elapsed 0.033 ms (3.262 ms / 100) 3.208 -> 3.179 ( -0.90%) [ +0.34% +0.22% +0.00% / +0.00% -0.50% -0.90%] index_fill_ skip256 : Elapsed 0.032 ms (3.219 ms / 100) 3.197 -> 3.186 ( -0.34%) [ +0.00% +0.00% +0.13% / -0.03% +0.13% -0.34%] index_fill_ spread : Elapsed 0.032 ms (3.197 ms / 100) 3.204 -> 3.185 ( -0.59%) [ +0.19% +0.22% +0.00% / -0.12% -0.59% -0.41%] index_fill_ strided 3 : Elapsed 0.032 ms (3.210 ms / 100) 3.210 -> 3.175 ( -1.09%) [ +0.16% +0.31% +0.00% / +0.03% -1.09% -1.06%] index_fill_ strided 5 : Elapsed 0.032 ms (3.215 ms / 100) 3.252 -> 3.233 ( -0.58%) [ +0.34% +0.03% +0.00% / -0.34% -0.58% -0.09%] index_fill_ strided 7 : Elapsed 0.033 ms (3.263 ms / 100) 3.237 -> 3.228 ( -0.28%) [ +0.43% +0.19% +0.00% / -0.28% -0.03% +0.06%] index_fill_ strided 8 : Elapsed 0.033 ms (3.251 ms / 100) 3.252 -> 3.218 ( -1.05%) [ +0.22% +0.00% +0.00% / -0.77% -0.65% -1.05%] index_fill_ strided 16 : Elapsed 0.033 ms (3.259 ms / 100) 3.246 -> 3.202 ( -1.36%) [ +0.00% +0.00% +0.18% / -0.74% -1.17% -1.36%] index_fill_ strided 64 : Elapsed 0.032 ms (3.246 ms / 100) 3.151 -> 3.130 ( -0.67%) [ +0.06% +0.06% +0.00% / -0.67% +0.22% -0.32%] index_fill_ strided 100 : Elapsed 0.032 ms (3.153 ms / 100) 3.252 -> 3.229 ( -0.71%) [ +0.58% +0.03% +0.00% / -0.46% -0.62% -0.71%] index_fill_ random : Elapsed 0.033 ms (3.271 ms / 100) 3.204 -> 3.206 ( +0.06%) [ +0.28% +0.31% +0.00% / +0.06% +0.53% +0.66%] index_fill_ random_sorted : Elapsed 0.032 ms (3.213 ms / 100) 3.246 -> 3.232 ( -0.43%) [ +0.09% +0.43% +0.00% / -0.34% -0.43% -0.43%] index_fill_ perm : Elapsed 0.032 ms (3.249 ms / 100) 3.215 -> 3.200 ( -0.47%) [ +0.16% +0.00% +0.09% / -0.47% -0.22% -0.19%] index_fill_ perm_sorted : Elapsed 0.032 ms (3.220 ms / 100) out_shape = [15, 50, 250] in_shape = [15, 50, 150] idx_dim = 2 B = [15, 50, 250] (stride (12500, 250, 1)) A = [15, 50, 150] (stride (50, 1, 750)) dim = 2 good 5.829 -> 5.495 ( -5.73%) [ +0.15% +0.09% +0.00% / -5.73% -5.54% -5.28%] index_add_ linear : Elapsed 0.058 ms (5.838 ms / 100) 5.269 -> 5.403 ( +2.54%) [ +0.04% +0.38% +0.00% / +2.54% +2.70% +3.02%] index_copy_ linear : Elapsed 0.053 ms (5.271 ms / 100) good 5.826 -> 5.513 ( -5.37%) [ +0.00% +0.26% +0.05% / -5.37% -4.82% -5.36%] index_add_ reverse : Elapsed 0.058 ms (5.826 ms / 100) 5.260 -> 5.391 ( +2.49%) [ +0.00% +0.25% +0.13% / +2.83% +2.78% +2.49%] index_copy_ reverse : Elapsed 0.053 ms (5.260 ms / 100) good 6.155 -> 5.674 ( -7.81%) [ +0.31% +0.00% +0.08% / -7.81% -7.28% -7.23%] index_add_ spread : Elapsed 0.062 ms (6.174 ms / 100) 5.632 -> 5.774 ( +2.52%) [ +0.55% +0.00% +0.21% / +2.52% +3.53% +3.04%] index_copy_ spread : Elapsed 0.057 ms (5.663 ms / 100) good 6.035 -> 5.702 ( -5.52%) [ +0.00% +0.20% +0.15% / -5.52% -5.20% -5.15%] index_add_ strided 3 : Elapsed 0.060 ms (6.035 ms / 100) 5.714 -> 5.776 ( +1.09%) [ +0.00% +0.18% +0.16% / +1.09% +1.54% +1.66%] index_copy_ strided 3 : Elapsed 0.057 ms (5.714 ms / 100) 6.017 -> 5.844 ( -2.88%) [ +0.00% +0.13% +0.00% / -2.59% -2.88% -2.73%] index_add_ strided 7 : Elapsed 0.060 ms (6.017 ms / 100) 5.794 -> 5.815 ( +0.36%) [ +0.00% +0.17% +0.09% / +0.43% +0.38% +0.36%] index_copy_ strided 7 : Elapsed 0.058 ms (5.794 ms / 100) 6.094 -> 5.923 ( -2.81%) [ +0.38% +0.46% +0.00% / -2.81% -2.59% -2.69%] index_add_ perm : Elapsed 0.061 ms (6.117 ms / 100) 5.816 -> 5.848 ( +0.55%) [ +0.26% +0.21% +0.00% / +0.55% +0.93% +0.95%] index_copy_ perm : Elapsed 0.058 ms (5.831 ms / 100) good 6.156 -> 5.668 ( -7.93%) [ +0.00% +0.28% +0.18% / -7.62% -7.63% -7.93%] index_add_ perm_sorted : Elapsed 0.062 ms (6.156 ms / 100) 5.639 -> 5.774 ( +2.39%) [ +0.00% +0.16% +0.18% / +2.98% +2.62% +2.39%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.639 ms / 100) Good 5.847 -> 5.231 (-10.54%) [ +0.00% +0.00% +0.07% / -10.54% -10.09% -10.02%] index_select const : Elapsed 0.058 ms (5.847 ms / 100) 5.889 -> 5.798 ( -1.55%) [ +0.00% +0.07% +0.03% / -0.98% -1.32% -1.55%] index_select wrap : Elapsed 0.059 ms (5.889 ms / 100) 5.869 -> 5.691 ( -3.03%) [ +0.12% +0.27% +0.00% / -3.03% -2.83% -3.02%] index_select linear : Elapsed 0.059 ms (5.876 ms / 100) 5.875 -> 5.694 ( -3.08%) [ +0.00% +0.19% +0.29% / -3.08% -2.93% -2.89%] index_select reverse : Elapsed 0.059 ms (5.875 ms / 100) Good 5.848 -> 5.232 (-10.53%) [ +0.00% +0.10% +0.17% / -10.53% -10.14% -10.31%] index_select skip64 : Elapsed 0.058 ms (5.848 ms / 100) Good 5.840 -> 5.229 (-10.46%) [ +0.09% +0.12% +0.00% / -10.19% -10.46% -10.22%] index_select skip256 : Elapsed 0.058 ms (5.845 ms / 100) 5.852 -> 5.690 ( -2.77%) [ +0.05% +0.03% +0.00% / -2.77% -2.48% -2.39%] index_select spread : Elapsed 0.059 ms (5.855 ms / 100) 5.902 -> 5.930 ( +0.47%) [ +0.19% +0.07% +0.00% / +0.83% +0.59% +0.47%] index_select strided 3 : Elapsed 0.059 ms (5.913 ms / 100) 5.908 -> 5.864 ( -0.74%) [ +0.27% +0.19% +0.00% / -0.52% -0.59% -0.74%] index_select strided 5 : Elapsed 0.059 ms (5.924 ms / 100) 5.894 -> 5.782 ( -1.90%) [ +0.00% +0.15% +0.15% / -1.75% -1.82% -1.90%] index_select strided 7 : Elapsed 0.059 ms (5.894 ms / 100) 5.917 -> 5.741 ( -2.97%) [ +0.05% +0.00% +0.00% / -2.97% -2.75% -2.82%] index_select strided 8 : Elapsed 0.059 ms (5.920 ms / 100) 5.906 -> 5.710 ( -3.32%) [ +0.02% +0.00% +0.10% / -3.30% -3.27% -3.32%] index_select strided 16 : Elapsed 0.059 ms (5.907 ms / 100) 5.872 -> 5.653 ( -3.73%) [ +0.14% +0.00% +0.15% / -3.73% -2.38% -2.35%] index_select strided 64 : Elapsed 0.059 ms (5.880 ms / 100) Good 5.853 -> 5.239 (-10.49%) [ +0.05% +0.00% +0.12% / -9.69% -10.40% -10.49%] index_select strided 100 : Elapsed 0.059 ms (5.856 ms / 100) 5.882 -> 5.690 ( -3.26%) [ +0.09% +0.00% +0.14% / -2.77% -3.26% -3.21%] index_select random : Elapsed 0.059 ms (5.887 ms / 100) 5.851 -> 5.591 ( -4.44%) [ +0.14% +0.00% +0.07% / -4.39% -4.38% -4.44%] index_select random_sorted : Elapsed 0.059 ms (5.859 ms / 100) B = [15, 50, 250] (stride (250, 3750, 1)) A = [15, 50, 150] (stride (7500, 150, 1)) dim = 2 10.514 -> 10.421 ( -0.88%) [ +0.00% +0.16% +0.04% / -0.68% -0.88% -0.83%] index_add_ linear : Elapsed 0.105 ms (10.514 ms / 100) 9.874 -> 9.821 ( -0.54%) [ +0.04% +0.09% +0.00% / -0.18% -0.36% -0.54%] index_copy_ linear : Elapsed 0.099 ms (9.878 ms / 100) 10.545 -> 10.403 ( -1.35%) [ +0.08% +0.25% +0.00% / -0.85% -1.28% -1.35%] index_add_ reverse : Elapsed 0.106 ms (10.553 ms / 100) 9.894 -> 9.794 ( -1.01%) [ +0.10% +0.36% +0.00% / -0.54% -0.80% -1.01%] index_copy_ reverse : Elapsed 0.099 ms (9.904 ms / 100) 11.142 -> 11.007 ( -1.21%) [ +0.18% +0.18% +0.00% / -1.21% -0.99% -1.18%] index_add_ spread : Elapsed 0.112 ms (11.162 ms / 100) 10.463 -> 10.385 ( -0.75%) [ +0.17% +0.00% +0.22% / -0.50% -0.60% -0.75%] index_copy_ spread : Elapsed 0.105 ms (10.481 ms / 100) 12.037 -> 11.885 ( -1.26%) [ +0.25% +0.00% +0.25% / -1.00% -1.07% -1.26%] index_add_ strided 3 : Elapsed 0.121 ms (12.067 ms / 100) 11.330 -> 11.217 ( -1.00%) [ +0.10% +0.00% +0.08% / -0.61% -0.79% -1.00%] index_copy_ strided 3 : Elapsed 0.113 ms (11.341 ms / 100) 12.533 -> 12.501 ( -0.26%) [ +0.00% +0.24% +0.03% / +0.06% -0.26% -0.02%] index_add_ strided 7 : Elapsed 0.125 ms (12.533 ms / 100) 11.851 -> 11.809 ( -0.35%) [ +0.06% +0.00% +0.03% / +0.00% -0.35% -0.20%] index_copy_ strided 7 : Elapsed 0.119 ms (11.858 ms / 100) 12.539 -> 12.509 ( -0.24%) [ +0.02% +0.00% +0.01% / -0.10% -0.24% -0.17%] index_add_ perm : Elapsed 0.125 ms (12.542 ms / 100) 11.750 -> 11.730 ( -0.17%) [ +0.13% +0.00% +0.10% / +0.02% +0.24% -0.17%] index_copy_ perm : Elapsed 0.118 ms (11.765 ms / 100) 11.118 -> 10.980 ( -1.24%) [ +0.31% +0.22% +0.00% / -0.97% -1.06% -1.24%] index_add_ perm_sorted : Elapsed 0.112 ms (11.153 ms / 100) 10.436 -> 10.317 ( -1.14%) [ +0.18% +0.20% +0.00% / -0.57% -0.73% -1.14%] index_copy_ perm_sorted : Elapsed 0.105 ms (10.455 ms / 100) 10.931 -> 10.964 ( +0.30%) [ +0.00% +0.21% +0.40% / +0.30% +0.48% +0.33%] index_select const : Elapsed 0.109 ms (10.931 ms / 100) 12.139 -> 12.081 ( -0.48%) [ +0.27% +0.00% +0.05% / +0.10% -0.26% -0.48%] index_select wrap : Elapsed 0.122 ms (12.172 ms / 100) 11.668 -> 11.685 ( +0.15%) [ +0.00% +0.05% +0.04% / +0.19% +0.19% +0.15%] index_select linear : Elapsed 0.117 ms (11.668 ms / 100) 11.770 -> 11.774 ( +0.03%) [ +0.13% +0.10% +0.00% / +0.34% +0.07% +0.03%] index_select reverse : Elapsed 0.118 ms (11.785 ms / 100) 11.171 -> 11.193 ( +0.20%) [ +0.00% +0.07% +0.02% / +0.29% +0.21% +0.20%] index_select skip64 : Elapsed 0.112 ms (11.171 ms / 100) 10.943 -> 10.967 ( +0.22%) [ +0.08% +0.00% +0.01% / +0.22% +0.46% +0.36%] index_select skip256 : Elapsed 0.110 ms (10.952 ms / 100) 11.767 -> 11.719 ( -0.41%) [ +0.09% +0.18% +0.00% / +0.38% -0.41% -0.36%] index_select spread : Elapsed 0.118 ms (11.778 ms / 100) 13.035 -> 13.028 ( -0.05%) [ +0.00% +0.03% +0.06% / +0.15% +0.03% -0.05%] index_select strided 3 : Elapsed 0.130 ms (13.035 ms / 100) 12.840 -> 12.856 ( +0.12%) [ +0.00% +0.00% +0.24% / +0.21% +0.23% +0.12%] index_select strided 5 : Elapsed 0.128 ms (12.840 ms / 100) 12.845 -> 12.854 ( +0.07%) [ +0.00% +0.19% +0.23% / +0.22% +0.13% +0.07%] index_select strided 7 : Elapsed 0.128 ms (12.845 ms / 100) 12.845 -> 12.840 ( -0.04%) [ +0.19% +0.01% +0.00% / +0.01% +0.30% -0.04%] index_select strided 8 : Elapsed 0.129 ms (12.869 ms / 100) 12.796 -> 12.784 ( -0.09%) [ +0.05% +0.09% +0.00% / +0.13% -0.09% +0.09%] index_select strided 16 : Elapsed 0.128 ms (12.803 ms / 100) 12.843 -> 12.855 ( +0.09%) [ +0.17% +0.02% +0.00% / +0.20% +0.18% +0.09%] index_select strided 64 : Elapsed 0.129 ms (12.865 ms / 100) 11.695 -> 11.678 ( -0.15%) [ +0.00% +0.15% +0.03% / +0.29% -0.15% -0.09%] index_select strided 100 : Elapsed 0.117 ms (11.695 ms / 100) 12.890 -> 12.881 ( -0.07%) [ +0.19% +0.12% +0.00% / +0.13% -0.07% -0.02%] index_select random : Elapsed 0.129 ms (12.915 ms / 100) 11.724 -> 11.719 ( -0.04%) [ +0.13% +0.00% +0.00% / +0.19% -0.04% +0.13%] index_select random_sorted : Elapsed 0.117 ms (11.739 ms / 100) B = [15, 50, 250] (stride (50, 1, 750)) A = [15, 50, 150] (stride (1, 2250, 15)) dim = 2 9.443 -> 9.392 ( -0.54%) [ +0.00% +0.01% +0.11% / -0.10% -0.54% -0.08%] index_add_ linear : Elapsed 0.094 ms (9.443 ms / 100) 9.328 -> 9.314 ( -0.15%) [ +0.03% +0.00% +0.13% / +0.06% -0.15% +0.09%] index_copy_ linear : Elapsed 0.093 ms (9.331 ms / 100) 9.451 -> 9.440 ( -0.12%) [ +0.00% +0.08% +0.02% / -0.12% -0.08% +0.04%] index_add_ reverse : Elapsed 0.095 ms (9.451 ms / 100) 9.326 -> 9.323 ( -0.03%) [ +0.00% +0.25% +0.04% / -0.03% +0.00% +0.08%] index_copy_ reverse : Elapsed 0.093 ms (9.326 ms / 100) 9.441 -> 9.433 ( -0.08%) [ +0.00% +0.01% +0.13% / -0.08% +0.59% +0.85%] index_add_ spread : Elapsed 0.094 ms (9.441 ms / 100) 9.330 -> 9.320 ( -0.11%) [ +0.01% +0.00% +0.06% / -0.11% +0.32% +0.42%] index_copy_ spread : Elapsed 0.093 ms (9.331 ms / 100) 9.454 -> 9.444 ( -0.11%) [ +0.04% +0.00% +0.13% / -0.11% +0.03% -0.01%] index_add_ strided 3 : Elapsed 0.095 ms (9.458 ms / 100) 9.324 -> 9.331 ( +0.08%) [ +0.23% +0.00% +0.08% / +0.08% +0.17% +0.19%] index_copy_ strided 3 : Elapsed 0.093 ms (9.345 ms / 100) 9.474 -> 9.475 ( +0.01%) [ +0.16% +0.00% +0.36% / +0.33% +0.06% +0.01%] index_add_ strided 7 : Elapsed 0.095 ms (9.489 ms / 100) 9.353 -> 9.331 ( -0.24%) [ +0.14% +0.00% +0.10% / -0.02% -0.24% +0.00%] index_copy_ strided 7 : Elapsed 0.094 ms (9.366 ms / 100) 9.516 -> 9.447 ( -0.73%) [ +0.00% +0.06% +0.15% / -0.04% -0.36% -0.73%] index_add_ perm : Elapsed 0.095 ms (9.516 ms / 100) 9.368 -> 9.300 ( -0.73%) [ +0.17% +0.00% +0.23% / +0.13% -0.28% -0.73%] index_copy_ perm : Elapsed 0.094 ms (9.384 ms / 100) 9.451 -> 9.441 ( -0.11%) [ +0.10% +0.00% +0.05% / +0.04% -0.05% -0.11%] index_add_ perm_sorted : Elapsed 0.095 ms (9.460 ms / 100) 9.345 -> 9.316 ( -0.31%) [ +0.03% +0.00% +0.12% / +0.00% -0.09% -0.31%] index_copy_ perm_sorted : Elapsed 0.093 ms (9.348 ms / 100) 10.271 -> 10.307 ( +0.35%) [ +0.00% +0.20% +0.26% / +0.35% +0.47% +0.35%] index_select const : Elapsed 0.103 ms (10.271 ms / 100) 11.185 -> 11.126 ( -0.53%) [ +0.09% +0.00% +0.08% / +0.45% -0.17% -0.53%] index_select wrap : Elapsed 0.112 ms (11.195 ms / 100) 10.884 -> 10.897 ( +0.12%) [ +0.00% +0.08% +0.10% / +0.12% +0.40% +0.21%] index_select linear : Elapsed 0.109 ms (10.884 ms / 100) 10.898 -> 10.940 ( +0.39%) [ +0.03% +0.00% +0.03% / +0.39% +0.55% +0.78%] index_select reverse : Elapsed 0.109 ms (10.901 ms / 100) 10.290 -> 10.316 ( +0.25%) [ +0.00% +0.20% +0.09% / +0.25% +0.53% +0.55%] index_select skip64 : Elapsed 0.103 ms (10.290 ms / 100) 10.288 -> 10.298 ( +0.10%) [ +0.13% +0.02% +0.00% / +0.11% +0.29% +0.10%] index_select skip256 : Elapsed 0.103 ms (10.301 ms / 100) 10.878 -> 10.901 ( +0.21%) [ +0.07% +0.00% +0.09% / +0.21% +0.26% +0.45%] index_select spread : Elapsed 0.109 ms (10.886 ms / 100) 11.768 -> 11.778 ( +0.08%) [ +0.00% +0.06% +0.03% / +0.08% +0.21% +0.17%] index_select strided 3 : Elapsed 0.118 ms (11.768 ms / 100) 11.126 -> 11.112 ( -0.13%) [ +0.03% +0.01% +0.00% / -0.07% -0.13% -0.07%] index_select strided 5 : Elapsed 0.111 ms (11.129 ms / 100) 12.004 -> 12.020 ( +0.13%) [ +0.00% +0.00% +0.02% / +0.13% +0.28% +0.81%] index_select strided 7 : Elapsed 0.120 ms (12.004 ms / 100) 11.916 -> 11.941 ( +0.21%) [ +0.00% +0.04% +0.17% / +0.21% +0.23% +0.24%] index_select strided 8 : Elapsed 0.119 ms (11.916 ms / 100) 11.931 -> 11.932 ( +0.01%) [ +0.00% +0.07% +0.03% / +0.08% +0.19% +0.01%] index_select strided 16 : Elapsed 0.119 ms (11.931 ms / 100) 11.874 -> 11.843 ( -0.26%) [ +0.03% +0.00% +0.01% / +0.14% +0.15% -0.26%] index_select strided 64 : Elapsed 0.119 ms (11.877 ms / 100) 10.331 -> 10.353 ( +0.21%) [ +0.02% +0.00% +0.03% / +0.21% +0.22% +0.29%] index_select strided 100 : Elapsed 0.103 ms (10.333 ms / 100) 11.636 -> 11.629 ( -0.06%) [ +0.00% +0.04% +0.15% / +0.09% -0.06% -0.01%] index_select random : Elapsed 0.116 ms (11.636 ms / 100) 10.870 -> 10.875 ( +0.05%) [ +0.00% +0.08% +0.05% / +0.19% +0.14% +0.05%] index_select random_sorted : Elapsed 0.109 ms (10.870 ms / 100) out_shape = [250, 150, 50] in_shape = [15, 150, 50] idx_dim = 0 B = [250, 150, 50] (stride (7500, 50, 1)) A = [15, 150, 50] (stride (7500, 50, 1)) dim = 0 3.341 -> 3.334 ( -0.21%) [ +0.00% +0.30% +0.00% / +0.27% +0.21% -0.21%] index_add_ linear : Elapsed 0.033 ms (3.341 ms / 100) 3.093 -> 3.106 ( +0.42%) [ +0.13% +0.00% +0.29% / +0.42% +0.48% +0.61%] index_copy_ linear : Elapsed 0.031 ms (3.097 ms / 100) 3.340 -> 3.341 ( +0.03%) [ +0.00% +0.09% +0.09% / +0.03% +0.54% +0.39%] index_add_ reverse : Elapsed 0.033 ms (3.340 ms / 100) 3.093 -> 3.105 ( +0.39%) [ +0.00% +0.06% +0.06% / +0.42% +0.61% +0.39%] index_copy_ reverse : Elapsed 0.031 ms (3.093 ms / 100) 3.339 -> 3.338 ( -0.03%) [ +0.24% +0.18% +0.00% / -0.03% +0.54% +0.30%] index_add_ spread : Elapsed 0.033 ms (3.347 ms / 100) 3.098 -> 3.106 ( +0.26%) [ +0.16% +0.10% +0.00% / +0.26% +0.48% +0.61%] index_copy_ spread : Elapsed 0.031 ms (3.103 ms / 100) 3.332 -> 3.347 ( +0.45%) [ +0.33% +0.00% +0.09% / +0.54% +0.45% +0.57%] index_add_ strided 3 : Elapsed 0.033 ms (3.343 ms / 100) 3.093 -> 3.102 ( +0.29%) [ +0.00% +0.13% +0.00% / +0.29% +0.32% +0.45%] index_copy_ strided 3 : Elapsed 0.031 ms (3.093 ms / 100) 3.341 -> 3.345 ( +0.12%) [ +0.00% +0.24% +0.57% / +0.12% +0.27% +0.48%] index_add_ strided 7 : Elapsed 0.033 ms (3.341 ms / 100) 3.102 -> 3.109 ( +0.23%) [ +0.00% +0.13% +0.32% / +0.45% +0.23% +0.35%] index_copy_ strided 7 : Elapsed 0.031 ms (3.102 ms / 100) 3.341 -> 3.345 ( +0.12%) [ +0.12% +0.00% +0.09% / +0.39% +0.18% +0.12%] index_add_ perm : Elapsed 0.033 ms (3.345 ms / 100) 3.098 -> 3.104 ( +0.19%) [ +0.03% +0.03% +0.00% / +0.26% +0.19% +0.45%] index_copy_ perm : Elapsed 0.031 ms (3.099 ms / 100) 3.329 -> 3.344 ( +0.45%) [ +0.36% +0.33% +0.00% / +0.45% +0.75% +0.75%] index_add_ perm_sorted : Elapsed 0.033 ms (3.341 ms / 100) 3.090 -> 3.102 ( +0.39%) [ +0.19% +0.19% +0.00% / +0.39% +0.71% +0.61%] index_copy_ perm_sorted : Elapsed 0.031 ms (3.096 ms / 100) 12.722 -> 12.792 ( +0.55%) [ +0.13% +0.00% +0.05% / +0.55% +0.83% +0.84%] index_select const : Elapsed 0.127 ms (12.739 ms / 100) 14.812 -> 14.896 ( +0.57%) [ +0.09% +0.00% +0.06% / +0.63% +0.63% +0.57%] index_select wrap : Elapsed 0.148 ms (14.825 ms / 100) 12.969 -> 12.995 ( +0.20%) [ +0.16% +0.00% +0.06% / +0.49% +0.20% +0.24%] index_select linear : Elapsed 0.130 ms (12.990 ms / 100) 13.193 -> 13.262 ( +0.52%) [ +0.00% +0.01% +0.07% / +0.61% +0.53% +0.52%] index_select reverse : Elapsed 0.132 ms (13.193 ms / 100) 12.733 -> 12.787 ( +0.42%) [ +0.02% +0.12% +0.00% / +0.42% +0.68% +0.60%] index_select skip64 : Elapsed 0.127 ms (12.736 ms / 100) 12.723 -> 12.797 ( +0.58%) [ +0.00% +0.04% +0.05% / +0.58% +1.01% +0.75%] index_select skip256 : Elapsed 0.127 ms (12.723 ms / 100) 13.031 -> 13.047 ( +0.12%) [ +0.00% +0.18% +0.12% / +0.57% +0.16% +0.12%] index_select spread : Elapsed 0.130 ms (13.031 ms / 100) 13.535 -> 13.539 ( +0.03%) [ +0.07% +0.02% +0.00% / +0.35% +0.03% +0.07%] index_select strided 3 : Elapsed 0.135 ms (13.545 ms / 100) 13.012 -> 13.073 ( +0.47%) [ +0.08% +0.17% +0.00% / +0.47% +0.51% +0.59%] index_select strided 5 : Elapsed 0.130 ms (13.023 ms / 100) 14.854 -> 14.905 ( +0.34%) [ +0.10% +0.00% +0.03% / +0.34% +0.70% +0.60%] index_select strided 7 : Elapsed 0.149 ms (14.869 ms / 100) 14.838 -> 14.876 ( +0.26%) [ +0.24% +0.03% +0.00% / +0.26% +0.86% +0.57%] index_select strided 8 : Elapsed 0.149 ms (14.873 ms / 100) 14.404 -> 14.468 ( +0.44%) [ +0.03% +0.00% +0.03% / +0.44% +0.72% +0.62%] index_select random : Elapsed 0.144 ms (14.409 ms / 100) 13.025 -> 13.043 ( +0.14%) [ +0.08% +0.00% +0.11% / +0.48% +0.14% +0.31%] index_select random_sorted : Elapsed 0.130 ms (13.035 ms / 100) B = [250, 150, 50] (stride (7500, 50, 1)) A = [15, 150, 50] (stride (1, 15, 2250)) dim = 0 6.246 -> 6.230 ( -0.26%) [ +0.18% +0.18% +0.00% / +0.61% -0.16% -0.26%] index_add_ linear : Elapsed 0.063 ms (6.257 ms / 100) 6.004 -> 6.002 ( -0.03%) [ +0.17% +0.00% +0.12% / +0.57% -0.03% -0.03%] index_copy_ linear : Elapsed 0.060 ms (6.014 ms / 100) 6.249 -> 6.229 ( -0.32%) [ +0.14% +0.19% +0.00% / +0.45% -0.32% -0.13%] index_add_ reverse : Elapsed 0.063 ms (6.258 ms / 100) 5.996 -> 6.001 ( +0.08%) [ +0.23% +0.12% +0.00% / +0.65% +0.17% +0.08%] index_copy_ reverse : Elapsed 0.060 ms (6.010 ms / 100) 6.246 -> 6.240 ( -0.10%) [ +0.00% +0.24% +0.10% / +0.70% -0.10% +0.00%] index_add_ spread : Elapsed 0.062 ms (6.246 ms / 100) 6.007 -> 6.006 ( -0.02%) [ +0.05% +0.00% +0.02% / +0.53% +0.00% -0.02%] index_copy_ spread : Elapsed 0.060 ms (6.010 ms / 100) 6.246 -> 6.240 ( -0.10%) [ +0.16% +0.00% +0.19% / +0.51% -0.10% +0.50%] index_add_ strided 3 : Elapsed 0.063 ms (6.256 ms / 100) 6.002 -> 6.001 ( -0.02%) [ +0.00% +0.02% +0.20% / +0.42% +0.07% -0.02%] index_copy_ strided 3 : Elapsed 0.060 ms (6.002 ms / 100) 6.289 -> 6.236 ( -0.84%) [ +0.00% +0.14% +0.22% / +0.65% -0.70% -0.84%] index_add_ strided 7 : Elapsed 0.063 ms (6.289 ms / 100) 6.046 -> 6.009 ( -0.61%) [ +0.03% +0.18% +0.00% / +0.61% -0.58% -0.61%] index_copy_ strided 7 : Elapsed 0.060 ms (6.048 ms / 100) 6.223 -> 6.244 ( +0.34%) [ +0.00% +0.13% +0.16% / +0.53% +0.34% +0.35%] index_add_ perm : Elapsed 0.062 ms (6.223 ms / 100) 5.979 -> 6.005 ( +0.43%) [ +0.00% +0.08% +0.02% / +0.70% +0.50% +0.43%] index_copy_ perm : Elapsed 0.060 ms (5.979 ms / 100) 6.222 -> 6.242 ( +0.32%) [ +0.27% +0.26% +0.00% / +0.63% +0.40% +0.32%] index_add_ perm_sorted : Elapsed 0.062 ms (6.239 ms / 100) 5.983 -> 6.007 ( +0.40%) [ +0.02% +0.00% +0.07% / +0.55% +0.52% +0.40%] index_copy_ perm_sorted : Elapsed 0.060 ms (5.984 ms / 100) 56.922 -> 57.014 ( +0.16%) [ +0.00% +0.02% +0.10% / +0.16% +0.64% +0.63%] index_select const : Elapsed 0.569 ms (56.922 ms / 100) 58.121 -> 58.341 ( +0.38%) [ +0.11% +0.00% +0.15% / +0.38% +0.86% +0.79%] index_select wrap : Elapsed 0.582 ms (58.187 ms / 100) 57.181 -> 57.366 ( +0.32%) [ +0.00% +0.08% +0.11% / +0.32% +0.66% +0.67%] index_select linear : Elapsed 0.572 ms (57.181 ms / 100) 57.376 -> 57.513 ( +0.24%) [ +0.08% +0.00% +0.23% / +0.24% +0.73% +0.76%] index_select reverse : Elapsed 0.574 ms (57.420 ms / 100) 56.863 -> 56.987 ( +0.22%) [ +0.12% +0.00% +0.27% / +0.22% +0.99% +0.92%] index_select skip64 : Elapsed 0.569 ms (56.934 ms / 100) 56.941 -> 57.052 ( +0.19%) [ +0.11% +0.00% +0.15% / +0.19% +0.62% +0.58%] index_select skip256 : Elapsed 0.570 ms (57.001 ms / 100) 57.703 -> 57.763 ( +0.10%) [ +0.02% +0.06% +0.00% / +0.10% +0.48% +0.51%] index_select spread : Elapsed 0.577 ms (57.715 ms / 100) 58.031 -> 58.154 ( +0.21%) [ +0.00% +0.08% +0.15% / +0.21% +0.69% +0.75%] index_select strided 3 : Elapsed 0.580 ms (58.031 ms / 100) 58.035 -> 58.143 ( +0.19%) [ +0.01% +0.00% +0.28% / +0.19% +0.81% +0.82%] index_select strided 5 : Elapsed 0.580 ms (58.041 ms / 100) 58.007 -> 58.131 ( +0.21%) [ +0.00% +0.05% +0.08% / +0.21% +0.84% +0.81%] index_select strided 7 : Elapsed 0.580 ms (58.007 ms / 100) 58.236 -> 58.396 ( +0.27%) [ +0.05% +0.00% +0.11% / +0.27% +0.30% +0.43%] index_select strided 8 : Elapsed 0.583 ms (58.266 ms / 100) 58.043 -> 58.173 ( +0.22%) [ +0.00% +0.00% +0.04% / +0.22% +0.76% +0.69%] index_select random : Elapsed 0.580 ms (58.043 ms / 100) 57.708 -> 57.803 ( +0.16%) [ +0.04% +0.00% +0.20% / +0.16% +0.55% +0.60%] index_select random_sorted : Elapsed 0.577 ms (57.730 ms / 100) B = [250, 150, 50] (stride (50, 12500, 1)) A = [15, 150, 50] (stride (7500, 50, 1)) dim = 0 5.771 -> 5.779 ( +0.14%) [ +0.16% +0.10% +0.00% / +0.69% +0.47% +0.14%] index_add_ linear : Elapsed 0.058 ms (5.780 ms / 100) 5.576 -> 5.587 ( +0.20%) [ +0.16% +0.18% +0.00% / +0.68% +0.20% +0.34%] index_copy_ linear : Elapsed 0.056 ms (5.585 ms / 100) 5.774 -> 5.779 ( +0.09%) [ +0.00% +0.00% +0.02% / +0.48% +0.26% +0.09%] index_add_ reverse : Elapsed 0.058 ms (5.774 ms / 100) 5.563 -> 5.581 ( +0.32%) [ +0.20% +0.00% +0.00% / +0.63% +0.45% +0.32%] index_copy_ reverse : Elapsed 0.056 ms (5.574 ms / 100) 5.826 -> 5.843 ( +0.29%) [ +0.07% +0.00% +0.15% / +0.48% +0.60% +0.29%] index_add_ spread : Elapsed 0.058 ms (5.830 ms / 100) 5.647 -> 5.662 ( +0.27%) [ +0.00% +0.14% +0.05% / +0.27% +0.50% +0.37%] index_copy_ spread : Elapsed 0.056 ms (5.647 ms / 100) 5.782 -> 5.809 ( +0.47%) [ +0.12% +0.00% +0.09% / +0.47% +1.05% +1.02%] index_add_ strided 3 : Elapsed 0.058 ms (5.789 ms / 100) 5.616 -> 5.645 ( +0.52%) [ +0.00% +0.23% +0.05% / +0.52% +1.10% +1.12%] index_copy_ strided 3 : Elapsed 0.056 ms (5.616 ms / 100) 5.838 -> 5.873 ( +0.60%) [ +0.00% +0.10% +0.21% / +0.60% +0.75% +0.72%] index_add_ strided 7 : Elapsed 0.058 ms (5.838 ms / 100) 5.662 -> 5.700 ( +0.67%) [ +0.14% +0.00% +0.07% / +0.67% +0.92% +0.72%] index_copy_ strided 7 : Elapsed 0.057 ms (5.670 ms / 100) 5.805 -> 5.837 ( +0.55%) [ +0.12% +0.07% +0.00% / +0.55% +0.91% +0.83%] index_add_ perm : Elapsed 0.058 ms (5.812 ms / 100) 5.636 -> 5.665 ( +0.51%) [ +0.11% +0.00% +0.12% / +0.64% +0.51% +0.67%] index_copy_ perm : Elapsed 0.056 ms (5.642 ms / 100) 5.812 -> 5.835 ( +0.40%) [ +0.00% +0.07% +0.05% / +0.46% +0.40% +0.52%] index_add_ perm_sorted : Elapsed 0.058 ms (5.812 ms / 100) 5.635 -> 5.662 ( +0.48%) [ +0.07% +0.11% +0.00% / +0.48% +0.73% +0.64%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.639 ms / 100) 45.907 -> 45.897 ( -0.02%) [ +0.09% +0.00% +0.03% / +0.48% -0.02% +0.17%] index_select const : Elapsed 0.459 ms (45.949 ms / 100) 52.268 -> 52.407 ( +0.27%) [ +0.00% +0.14% +0.06% / +0.27% +2.30% +1.92%] index_select wrap : Elapsed 0.523 ms (52.268 ms / 100) 46.142 -> 46.345 ( +0.44%) [ +0.00% +0.09% +0.15% / +0.44% +1.21% +1.06%] index_select linear : Elapsed 0.461 ms (46.142 ms / 100) 46.547 -> 46.760 ( +0.46%) [ +0.00% +0.08% +0.06% / +0.46% +1.13% +1.33%] index_select reverse : Elapsed 0.465 ms (46.547 ms / 100) 45.905 -> 45.891 ( -0.03%) [ +0.00% +0.05% +0.22% / +0.51% -0.03% +0.28%] index_select skip64 : Elapsed 0.459 ms (45.905 ms / 100) 45.856 -> 45.860 ( +0.01%) [ +0.05% +0.00% +0.20% / +0.58% +0.27% +0.01%] index_select skip256 : Elapsed 0.459 ms (45.881 ms / 100) 47.300 -> 47.451 ( +0.32%) [ +0.07% +0.00% +0.14% / +0.32% +2.33% +2.22%] index_select spread : Elapsed 0.473 ms (47.333 ms / 100) 49.742 -> 49.916 ( +0.35%) [ +0.00% +0.11% +0.02% / +0.35% +0.66% +1.05%] index_select strided 3 : Elapsed 0.497 ms (49.742 ms / 100) 48.131 -> 47.850 ( -0.58%) [ +0.24% +0.00% +0.17% / +0.50% -0.55% -0.58%] index_select strided 5 : Elapsed 0.482 ms (48.248 ms / 100) 52.345 -> 52.456 ( +0.21%) [ +0.10% +0.00% +0.35% / +0.21% +1.87% +1.40%] index_select strided 7 : Elapsed 0.524 ms (52.399 ms / 100) 52.353 -> 52.482 ( +0.25%) [ +0.10% +0.00% +0.12% / +0.25% +1.72% +2.19%] index_select strided 8 : Elapsed 0.524 ms (52.405 ms / 100) 51.150 -> 51.426 ( +0.54%) [ +0.00% +0.20% +0.18% / +0.54% +2.53% +2.34%] index_select random : Elapsed 0.512 ms (51.150 ms / 100) 47.234 -> 47.433 ( +0.42%) [ +0.11% +0.00% +0.15% / +0.42% +2.38% +2.40%] index_select random_sorted : Elapsed 0.473 ms (47.287 ms / 100) B = [250, 150, 50] (stride (1, 12500, 250)) A = [15, 150, 50] (stride (1, 750, 15)) dim = 0 6.289 -> 6.285 ( -0.06%) [ +0.00% +0.17% +0.14% / -0.06% +5.25% +5.23%] index_add_ linear : Elapsed 0.063 ms (6.289 ms / 100) 4.800 -> 4.806 ( +0.13%) [ +0.02% +0.04% +0.00% / +0.13% +4.46% +4.44%] index_copy_ linear : Elapsed 0.048 ms (4.801 ms / 100) 6.320 -> 6.328 ( +0.13%) [ +0.47% +0.00% +0.21% / +0.13% +6.11% +6.46%] index_add_ reverse : Elapsed 0.064 ms (6.350 ms / 100) 4.836 -> 4.836 ( +0.00%) [ +0.00% +0.00% +0.04% / +0.00% +4.82% +4.43%] index_copy_ reverse : Elapsed 0.048 ms (4.836 ms / 100) 16.538 -> 16.536 ( -0.01%) [ +0.08% +0.00% +0.02% / +0.11% -0.01% +0.04%] index_add_ spread : Elapsed 0.166 ms (16.551 ms / 100) 10.477 -> 10.418 ( -0.56%) [ +0.00% +0.02% +0.29% / +0.22% -0.56% -0.39%] index_copy_ spread : Elapsed 0.105 ms (10.477 ms / 100) 8.423 -> 8.430 ( +0.08%) [ +0.08% +0.00% +0.14% / +0.08% +0.27% +0.09%] index_add_ strided 3 : Elapsed 0.084 ms (8.430 ms / 100) 5.948 -> 5.863 ( -1.43%) [ +0.00% +0.17% +0.18% / +0.10% -1.29% -1.43%] index_copy_ strided 3 : Elapsed 0.059 ms (5.948 ms / 100) 13.157 -> 13.167 ( +0.08%) [ +0.00% +0.16% +0.09% / +0.08% +0.99% +1.09%] index_add_ strided 7 : Elapsed 0.132 ms (13.157 ms / 100) 8.656 -> 8.591 ( -0.75%) [ +0.09% +0.00% +0.24% / +0.23% -0.75% -0.70%] index_copy_ strided 7 : Elapsed 0.087 ms (8.664 ms / 100) 18.663 -> 18.663 ( +0.00%) [ +0.01% +0.00% +0.03% / +0.00% +0.17% +0.20%] index_add_ perm : Elapsed 0.187 ms (18.665 ms / 100) 11.914 -> 11.923 ( +0.08%) [ +0.01% +0.00% +0.08% / +0.08% +0.30% +0.31%] index_copy_ perm : Elapsed 0.119 ms (11.915 ms / 100) 14.239 -> 14.270 ( +0.22%) [ +0.00% +0.05% +0.19% / +0.22% +1.21% +1.29%] index_add_ perm_sorted : Elapsed 0.142 ms (14.239 ms / 100) 9.300 -> 9.304 ( +0.04%) [ +0.00% +0.05% +0.20% / +0.04% +0.43% +0.26%] index_copy_ perm_sorted : Elapsed 0.093 ms (9.300 ms / 100) BEST 74.893 -> 13.417 (-82.09%) [ +0.72% +0.14% +0.00% / -82.07% -82.09% -82.08%] index_select const : Elapsed 0.754 ms (75.432 ms / 100) BEST 78.763 -> 13.504 (-82.85%) [ +0.06% +0.63% +0.00% / -82.80% -82.84% -82.85%] index_select wrap : Elapsed 0.788 ms (78.809 ms / 100) BEST 75.360 -> 13.793 (-81.70%) [ +1.89% +0.78% +0.00% / -81.70% -81.65% -81.65%] index_select linear : Elapsed 0.768 ms (76.788 ms / 100) BEST 75.413 -> 13.738 (-81.78%) [ +0.69% +0.00% +0.23% / -81.78% -81.70% -81.72%] index_select reverse : Elapsed 0.759 ms (75.933 ms / 100) BEST 75.002 -> 13.449 (-82.07%) [ +0.32% +0.43% +0.00% / -82.07% -82.05% -82.06%] index_select skip64 : Elapsed 0.752 ms (75.242 ms / 100) BEST 75.450 -> 13.447 (-82.18%) [ +0.40% +1.03% +0.00% / -82.15% -82.18% -82.16%] index_select skip256 : Elapsed 0.758 ms (75.753 ms / 100) BEST 75.686 -> 13.857 (-81.69%) [ +0.00% +0.41% +0.53% / -81.69% -81.67% -81.69%] index_select spread : Elapsed 0.757 ms (75.686 ms / 100) BEST 78.511 -> 13.508 (-82.79%) [ +0.47% +0.14% +0.00% / -82.79% -82.78% -82.79%] index_select strided 3 : Elapsed 0.789 ms (78.879 ms / 100) BEST 78.632 -> 13.515 (-82.81%) [ +0.34% +0.51% +0.00% / -82.78% -82.81% -82.81%] index_select strided 5 : Elapsed 0.789 ms (78.900 ms / 100) BEST 78.548 -> 13.552 (-82.75%) [ +0.33% +0.77% +0.00% / -82.75% -82.72% -82.72%] index_select strided 7 : Elapsed 0.788 ms (78.809 ms / 100) BEST 78.924 -> 13.559 (-82.82%) [ +0.60% +0.57% +0.00% / -82.82% -82.79% -82.78%] index_select strided 8 : Elapsed 0.794 ms (79.401 ms / 100) BEST 78.893 -> 13.552 (-82.82%) [ +0.43% +0.34% +0.00% / -82.82% -82.82% -82.81%] index_select random : Elapsed 0.792 ms (79.232 ms / 100) BEST 75.597 -> 13.862 (-81.66%) [ +0.41% +0.36% +0.00% / -81.64% -81.66% -81.66%] index_select random_sorted : Elapsed 0.759 ms (75.905 ms / 100) B = [250, 150, 50] (stride (150, 1, 37500)) A = [15, 150, 50] (stride (1, 15, 2250)) dim = 0 4.155 -> 4.141 ( -0.34%) [ +0.12% +0.00% +0.10% / -0.05% -0.34% -0.14%] index_add_ linear : Elapsed 0.042 ms (4.160 ms / 100) 3.704 -> 3.709 ( +0.13%) [ +0.00% +0.16% +0.13% / +0.13% +1.46% +1.54%] index_copy_ linear : Elapsed 0.037 ms (3.704 ms / 100) 4.143 -> 4.132 ( -0.27%) [ +0.07% +0.07% +0.00% / +0.31% -0.12% -0.27%] index_add_ reverse : Elapsed 0.041 ms (4.146 ms / 100) 3.713 -> 3.722 ( +0.24%) [ +0.11% +0.00% +0.27% / +0.24% +0.67% +0.67%] index_copy_ reverse : Elapsed 0.037 ms (3.717 ms / 100) 3.952 -> 3.953 ( +0.03%) [ +0.18% +0.10% +0.00% / +0.03% +1.75% +1.80%] index_add_ spread : Elapsed 0.040 ms (3.959 ms / 100) 3.587 -> 3.596 ( +0.25%) [ +0.00% +0.06% +0.00% / +0.25% +1.28% +1.20%] index_copy_ spread : Elapsed 0.036 ms (3.587 ms / 100) 4.041 -> 4.040 ( -0.02%) [ +0.12% +0.00% +0.00% / -0.02% +2.43% +2.50%] index_add_ strided 3 : Elapsed 0.040 ms (4.046 ms / 100) 3.619 -> 3.622 ( +0.08%) [ +0.03% +0.00% +0.08% / +0.08% +2.18% +2.13%] index_copy_ strided 3 : Elapsed 0.036 ms (3.620 ms / 100) 3.944 -> 3.951 ( +0.18%) [ +0.00% +0.10% +0.05% / +0.20% +0.43% +0.18%] index_add_ strided 7 : Elapsed 0.039 ms (3.944 ms / 100) 3.616 -> 3.614 ( -0.06%) [ +0.06% +0.39% +0.00% / +0.17% -0.06% -0.06%] index_copy_ strided 7 : Elapsed 0.036 ms (3.618 ms / 100) 4.109 -> 4.112 ( +0.07%) [ +0.19% +0.00% +0.17% / +0.07% +1.39% +1.27%] index_add_ perm : Elapsed 0.041 ms (4.117 ms / 100) 3.639 -> 3.644 ( +0.14%) [ +0.00% +0.08% +0.05% / +0.14% +1.26% +1.26%] index_copy_ perm : Elapsed 0.036 ms (3.639 ms / 100) 4.045 -> 4.005 ( -0.99%) [ +0.00% +0.12% +0.00% / -0.10% -0.96% -0.99%] index_add_ perm_sorted : Elapsed 0.040 ms (4.045 ms / 100) 3.631 -> 3.620 ( -0.30%) [ +0.00% +0.11% +0.00% / +0.25% -0.06% -0.30%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.631 ms / 100) 69.540 -> 69.079 ( -0.66%) [ +0.00% +0.24% +0.13% / -0.66% +1.68% +1.83%] index_select const : Elapsed 0.695 ms (69.540 ms / 100) 68.071 -> 67.659 ( -0.61%) [ +0.00% +0.67% +0.43% / -0.61% +1.85% +1.43%] index_select wrap : Elapsed 0.681 ms (68.071 ms / 100) 67.763 -> 67.719 ( -0.06%) [ +0.00% +0.70% +0.33% / -0.06% +2.32% +3.15%] index_select linear : Elapsed 0.678 ms (67.763 ms / 100) 69.500 -> 69.132 ( -0.53%) [ +0.07% +0.02% +0.00% / -0.53% +1.70% +1.57%] index_select reverse : Elapsed 0.696 ms (69.552 ms / 100) 69.244 -> 69.078 ( -0.24%) [ +0.00% +0.09% +0.44% / -0.24% +1.68% +1.87%] index_select skip64 : Elapsed 0.692 ms (69.244 ms / 100) 68.937 -> 68.730 ( -0.30%) [ +0.00% +0.39% +0.15% / -0.30% +3.14% +2.54%] index_select skip256 : Elapsed 0.689 ms (68.937 ms / 100) 66.630 -> 66.650 ( +0.03%) [ +0.21% +0.00% +1.03% / +0.03% +2.99% +2.73%] index_select spread : Elapsed 0.668 ms (66.773 ms / 100) 69.511 -> 70.097 ( +0.84%) [ +0.67% +1.08% +0.00% / +0.84% +2.96% +3.64%] index_select strided 3 : Elapsed 0.700 ms (69.975 ms / 100) 69.592 -> 69.617 ( +0.04%) [ +0.00% +0.36% +0.05% / +0.04% +2.34% +3.03%] index_select strided 5 : Elapsed 0.696 ms (69.592 ms / 100) 69.827 -> 69.877 ( +0.07%) [ +0.14% +0.67% +0.00% / +0.07% +1.60% +2.72%] index_select strided 7 : Elapsed 0.699 ms (69.927 ms / 100) 69.637 -> 69.766 ( +0.19%) [ +0.00% +0.65% +0.69% / +0.19% +1.58% +2.38%] index_select strided 8 : Elapsed 0.696 ms (69.637 ms / 100) 68.406 -> 69.074 ( +0.98%) [ +1.11% +1.00% +0.00% / +0.98% +3.17% +1.92%] index_select random : Elapsed 0.692 ms (69.163 ms / 100) 66.809 -> 66.590 ( -0.33%) [ +0.68% +0.00% +0.51% / -0.33% +1.00% +1.37%] index_select random_sorted : Elapsed 0.673 ms (67.266 ms / 100) B = [250, 150, 50] (stride (1, 250, 37500)) A = [15, 150, 50] (stride (150, 1, 2250)) dim = 0 6.938 -> 6.951 ( +0.19%) [ +0.14% +0.26% +0.00% / +0.19% +4.90% +5.02%] index_add_ linear : Elapsed 0.069 ms (6.948 ms / 100) 4.913 -> 4.914 ( +0.02%) [ +0.04% +0.00% +0.00% / +0.02% +3.54% +3.91%] index_copy_ linear : Elapsed 0.049 ms (4.915 ms / 100) 7.040 -> 7.070 ( +0.43%) [ +0.00% +0.18% +0.28% / +0.43% +5.80% +5.41%] index_add_ reverse : Elapsed 0.070 ms (7.040 ms / 100) 4.939 -> 4.945 ( +0.12%) [ +0.32% +0.16% +0.00% / +0.12% +5.14% +5.00%] index_copy_ reverse : Elapsed 0.050 ms (4.955 ms / 100) 17.687 -> 17.579 ( -0.61%) [ +0.00% +0.01% +0.13% / +0.11% -0.57% -0.61%] index_add_ spread : Elapsed 0.177 ms (17.687 ms / 100) 10.374 -> 10.293 ( -0.78%) [ +0.00% +0.00% +0.41% / +0.43% -0.53% -0.78%] index_copy_ spread : Elapsed 0.104 ms (10.374 ms / 100) 9.357 -> 9.261 ( -1.03%) [ +0.00% +0.16% +0.02% / -0.01% -0.87% -1.03%] index_add_ strided 3 : Elapsed 0.094 ms (9.357 ms / 100) 6.057 -> 5.966 ( -1.50%) [ +0.20% +0.00% +0.25% / +0.00% -1.50% -1.35%] index_copy_ strided 3 : Elapsed 0.061 ms (6.069 ms / 100) 14.625 -> 14.540 ( -0.58%) [ +0.00% +0.03% +0.04% / +0.09% -0.58% -0.50%] index_add_ strided 7 : Elapsed 0.146 ms (14.625 ms / 100) 8.794 -> 8.686 ( -1.23%) [ +0.00% +0.07% +0.31% / +0.17% -1.23% -1.14%] index_copy_ strided 7 : Elapsed 0.088 ms (8.794 ms / 100) 19.629 -> 19.547 ( -0.42%) [ +0.02% +0.00% +0.10% / +0.14% -0.42% -0.27%] index_add_ perm : Elapsed 0.196 ms (19.632 ms / 100) 11.762 -> 11.717 ( -0.38%) [ +0.08% +0.00% +0.41% / +0.46% -0.38% -0.25%] index_copy_ perm : Elapsed 0.118 ms (11.771 ms / 100) 15.485 -> 15.377 ( -0.70%) [ +0.00% +0.01% +0.12% / +0.06% -0.70% -0.65%] index_add_ perm_sorted : Elapsed 0.155 ms (15.485 ms / 100) 9.441 -> 9.366 ( -0.79%) [ +0.08% +0.00% +0.65% / +0.69% -0.79% -0.68%] index_copy_ perm_sorted : Elapsed 0.094 ms (9.449 ms / 100) BEST 78.205 -> 18.990 (-75.72%) [ +0.04% +0.00% +0.03% / -75.06% -75.72% -75.64%] index_select const : Elapsed 0.782 ms (78.238 ms / 100) BEST 101.194 -> 21.871 (-78.39%) [ +0.50% +0.00% +0.21% / -78.33% -78.39% -78.30%] index_select wrap : Elapsed 1.017 ms (101.702 ms / 100) BEST 78.201 -> 21.886 (-72.01%) [ +0.52% +0.73% +0.00% / -71.37% -72.01% -71.93%] index_select linear : Elapsed 0.786 ms (78.604 ms / 100) BEST 80.409 -> 22.313 (-72.25%) [ +0.43% +0.18% +0.00% / -72.22% -72.21% -72.25%] index_select reverse : Elapsed 0.808 ms (80.754 ms / 100) BEST 78.178 -> 19.168 (-75.48%) [ +0.30% +0.00% +0.06% / -75.40% -75.48% -75.43%] index_select skip64 : Elapsed 0.784 ms (78.415 ms / 100) BEST 78.174 -> 18.983 (-75.72%) [ +0.00% +0.18% +0.09% / -74.24% -75.65% -75.72%] index_select skip256 : Elapsed 0.782 ms (78.174 ms / 100) BEST 80.735 -> 21.769 (-73.04%) [ +0.28% +0.17% +0.00% / -72.82% -73.04% -72.89%] index_select spread : Elapsed 0.810 ms (80.962 ms / 100) BEST 88.220 -> 20.480 (-76.79%) [ +0.00% +0.44% +0.17% / -76.79% -76.68% -76.73%] index_select strided 3 : Elapsed 0.882 ms (88.220 ms / 100) BEST 83.164 -> 19.524 (-76.52%) [ +0.73% +0.30% +0.00% / -76.01% -76.38% -76.52%] index_select strided 5 : Elapsed 0.838 ms (83.775 ms / 100) BEST 100.914 -> 22.026 (-78.17%) [ +0.00% +0.45% +0.65% / -78.17% -78.09% -78.07%] index_select strided 7 : Elapsed 1.009 ms (100.914 ms / 100) BEST 100.688 -> 21.779 (-78.37%) [ +0.00% +0.40% +0.25% / -77.91% -78.35% -78.37%] index_select strided 8 : Elapsed 1.007 ms (100.688 ms / 100) BEST 94.072 -> 21.706 (-76.93%) [ +0.05% +0.00% +0.05% / -76.76% -76.86% -76.93%] index_select random : Elapsed 0.941 ms (94.120 ms / 100) BEST 80.826 -> 21.912 (-72.89%) [ +0.77% +0.00% +0.64% / -72.89% -72.83% -72.84%] index_select random_sorted : Elapsed 0.814 ms (81.448 ms / 100) out_shape = [15, 250, 50] in_shape = [15, 150, 50] idx_dim = 1 B = [15, 250, 50] (stride (12500, 50, 1)) A = [15, 150, 50] (stride (7500, 50, 1)) dim = 1 6.064 -> 6.013 ( -0.84%) [ +0.25% +0.00% +0.15% / -0.71% -0.81% -0.84%] index_add_ linear : Elapsed 0.061 ms (6.079 ms / 100) 5.774 -> 5.730 ( -0.76%) [ +0.00% +0.05% +0.10% / -0.68% -0.73% -0.76%] index_copy_ linear : Elapsed 0.058 ms (5.774 ms / 100) 6.073 -> 6.026 ( -0.77%) [ +0.25% +0.02% +0.00% / -0.77% -0.26% -0.43%] index_add_ reverse : Elapsed 0.061 ms (6.088 ms / 100) 5.804 -> 5.755 ( -0.84%) [ +0.14% +0.00% +0.02% / -0.84% -0.72% -0.55%] index_copy_ reverse : Elapsed 0.058 ms (5.812 ms / 100) 6.154 -> 6.112 ( -0.68%) [ +0.00% +0.02% +0.18% / -0.68% +0.02% -0.21%] index_add_ spread : Elapsed 0.062 ms (6.154 ms / 100) 5.889 -> 5.864 ( -0.42%) [ +0.00% +0.32% +0.63% / -0.42% -0.20% -0.14%] index_copy_ spread : Elapsed 0.059 ms (5.889 ms / 100) 6.205 -> 6.152 ( -0.85%) [ +0.10% +0.00% +0.45% / -0.85% -0.27% -0.68%] index_add_ strided 3 : Elapsed 0.062 ms (6.211 ms / 100) 5.923 -> 5.876 ( -0.79%) [ +0.39% +0.00% +0.39% / -0.79% -0.62% -0.64%] index_copy_ strided 3 : Elapsed 0.059 ms (5.946 ms / 100) 6.256 -> 6.164 ( -1.47%) [ +0.11% +0.00% +0.11% / -1.13% -1.41% -1.47%] index_add_ strided 7 : Elapsed 0.063 ms (6.263 ms / 100) 5.958 -> 5.897 ( -1.02%) [ +0.54% +0.00% +0.25% / -0.76% -1.02% -0.94%] index_copy_ strided 7 : Elapsed 0.060 ms (5.990 ms / 100) 6.173 -> 6.128 ( -0.73%) [ +0.19% +0.11% +0.00% / -0.73% -0.29% -0.28%] index_add_ perm : Elapsed 0.062 ms (6.185 ms / 100) 5.905 -> 5.854 ( -0.86%) [ +0.03% +0.22% +0.00% / -0.86% -0.66% -0.61%] index_copy_ perm : Elapsed 0.059 ms (5.907 ms / 100) 6.120 -> 6.091 ( -0.47%) [ +0.02% +0.00% +0.08% / -0.47% -0.36% -0.11%] index_add_ perm_sorted : Elapsed 0.061 ms (6.121 ms / 100) 5.854 -> 5.820 ( -0.58%) [ +0.00% +0.09% +0.44% / -0.58% -0.58% -0.53%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.854 ms / 100) 5.907 -> 5.905 ( -0.03%) [ +0.00% +0.20% +0.02% / +0.29% +0.10% -0.03%] index_select const : Elapsed 0.059 ms (5.907 ms / 100) 6.472 -> 6.493 ( +0.32%) [ +0.00% +0.53% +0.31% / +0.32% +1.17% +1.11%] index_select wrap : Elapsed 0.065 ms (6.472 ms / 100) 6.342 -> 6.340 ( -0.03%) [ +0.13% +0.05% +0.00% / -0.03% +0.52% +0.14%] index_select linear : Elapsed 0.063 ms (6.350 ms / 100) 6.342 -> 6.348 ( +0.09%) [ +0.00% +0.05% +0.33% / +0.09% +0.36% +0.16%] index_select reverse : Elapsed 0.063 ms (6.342 ms / 100) 5.943 -> 5.962 ( +0.32%) [ +0.13% +0.00% +0.30% / +0.32% +0.47% +0.37%] index_select skip64 : Elapsed 0.060 ms (5.951 ms / 100) 5.905 -> 5.914 ( +0.15%) [ +0.24% +0.29% +0.00% / +0.15% +0.83% +0.78%] index_select skip256 : Elapsed 0.059 ms (5.919 ms / 100) 6.387 -> 6.391 ( +0.06%) [ +0.22% +0.00% +0.03% / +0.39% +0.06% +0.20%] index_select spread : Elapsed 0.064 ms (6.401 ms / 100) 6.362 -> 6.344 ( -0.28%) [ +0.00% +0.14% +0.03% / -0.28% +0.69% +0.82%] index_select strided 3 : Elapsed 0.064 ms (6.362 ms / 100) 6.217 -> 6.233 ( +0.26%) [ +0.00% +0.42% +0.43% / +0.26% +1.13% +0.80%] index_select strided 5 : Elapsed 0.062 ms (6.217 ms / 100) 6.529 -> 6.552 ( +0.35%) [ +0.25% +0.20% +0.00% / +0.35% +0.63% +0.83%] index_select strided 7 : Elapsed 0.065 ms (6.545 ms / 100) 6.493 -> 6.475 ( -0.28%) [ +0.14% +0.00% +0.06% / -0.22% -0.28% -0.17%] index_select strided 8 : Elapsed 0.065 ms (6.502 ms / 100) 6.473 -> 6.486 ( +0.20%) [ +0.23% +0.00% +0.12% / +0.20% +0.76% +0.79%] index_select strided 16 : Elapsed 0.065 ms (6.488 ms / 100) 6.432 -> 6.455 ( +0.36%) [ +0.79% +0.00% +0.25% / +0.36% +1.49% +1.32%] index_select strided 64 : Elapsed 0.065 ms (6.483 ms / 100) 5.943 -> 5.963 ( +0.34%) [ +0.37% +0.00% +0.19% / +0.34% +0.98% +0.96%] index_select strided 100 : Elapsed 0.060 ms (5.965 ms / 100) 6.500 -> 6.480 ( -0.31%) [ +0.00% +0.06% +0.00% / -0.09% -0.20% -0.31%] index_select random : Elapsed 0.065 ms (6.500 ms / 100) 6.326 -> 6.307 ( -0.30%) [ +0.14% +0.13% +0.00% / +0.02% -0.30% -0.22%] index_select random_sorted : Elapsed 0.063 ms (6.335 ms / 100) B = [15, 250, 50] (stride (12500, 1, 250)) dim = 1 fill_cnt = 150 Good 3.095 -> 2.698 (-12.83%) [ +0.16% +0.42% +0.00% / -12.83% -12.76% -12.70%] index_fill_ const : Elapsed 0.031 ms (3.100 ms / 100) Good 3.137 -> 2.768 (-11.76%) [ +0.06% +0.00% +0.06% / -11.60% -11.76% -11.76%] index_fill_ linear : Elapsed 0.031 ms (3.139 ms / 100) Good 3.154 -> 2.780 (-11.86%) [ +0.00% +0.19% +0.03% / -11.35% -11.86% -11.60%] index_fill_ reverse : Elapsed 0.032 ms (3.154 ms / 100) Good 3.069 -> 2.698 (-12.09%) [ +0.00% +0.33% +0.23% / -11.80% -11.89% -12.09%] index_fill_ skip64 : Elapsed 0.031 ms (3.069 ms / 100) Good 3.115 -> 2.719 (-12.71%) [ +0.39% +0.32% +0.00% / -12.71% -12.33% -12.52%] index_fill_ skip256 : Elapsed 0.031 ms (3.127 ms / 100) Good 3.479 -> 3.104 (-10.78%) [ +0.06% +0.00% +0.17% / -10.38% -10.46% -10.78%] index_fill_ spread : Elapsed 0.035 ms (3.481 ms / 100) good 3.503 -> 3.168 ( -9.56%) [ +0.09% +0.06% +0.00% / -9.56% -9.33% -9.36%] index_fill_ strided 3 : Elapsed 0.035 ms (3.506 ms / 100) good 3.546 -> 3.204 ( -9.64%) [ +0.00% +0.03% +0.08% / -9.64% -9.08% -9.17%] index_fill_ strided 5 : Elapsed 0.035 ms (3.546 ms / 100) good 3.591 -> 3.311 ( -7.80%) [ +0.08% +0.25% +0.00% / -7.80% -7.44% -7.60%] index_fill_ strided 7 : Elapsed 0.036 ms (3.594 ms / 100) good 3.622 -> 3.365 ( -7.10%) [ +0.06% +0.00% +0.19% / -6.65% -7.10% -7.01%] index_fill_ strided 8 : Elapsed 0.036 ms (3.624 ms / 100) good 3.621 -> 3.436 ( -5.11%) [ +0.50% +0.00% +0.08% / -5.11% -4.92% -4.83%] index_fill_ strided 16 : Elapsed 0.036 ms (3.639 ms / 100) 3.629 -> 3.459 ( -4.68%) [ +0.47% +0.00% +0.30% / -4.27% -4.68% -4.44%] index_fill_ strided 64 : Elapsed 0.036 ms (3.646 ms / 100) good 3.183 -> 2.922 ( -8.20%) [ +0.03% +0.00% +0.22% / -8.17% -8.11% -8.20%] index_fill_ strided 100 : Elapsed 0.032 ms (3.184 ms / 100) good 3.613 -> 3.383 ( -6.37%) [ +0.00% +0.06% +0.14% / -6.37% -6.37% -6.31%] index_fill_ random : Elapsed 0.036 ms (3.613 ms / 100) Good 3.463 -> 3.109 (-10.22%) [ +0.00% +0.26% +0.17% / -10.22% -9.96% -9.99%] index_fill_ random_sorted : Elapsed 0.035 ms (3.463 ms / 100) good 3.616 -> 3.392 ( -6.19%) [ +0.00% +0.19% +0.00% / -6.19% -6.14% -6.06%] index_fill_ perm : Elapsed 0.036 ms (3.616 ms / 100) Good 3.452 -> 3.105 (-10.05%) [ +0.00% +0.32% +0.06% / -9.70% -10.05% -10.02%] index_fill_ perm_sorted : Elapsed 0.035 ms (3.452 ms / 100) B = [15, 250, 50] (stride (12500, 1, 250)) A = [15, 150, 50] (stride (1, 15, 2250)) dim = 1 6.631 -> 6.489 ( -2.14%) [ +0.65% +0.00% +0.39% / -2.13% -2.14% -1.95%] index_add_ linear : Elapsed 0.067 ms (6.674 ms / 100) 6.095 -> 6.304 ( +3.43%) [ +0.36% +0.00% +0.05% / +3.43% +3.82% +3.76%] index_copy_ linear : Elapsed 0.061 ms (6.117 ms / 100) 6.660 -> 6.506 ( -2.31%) [ +0.32% +0.00% +0.39% / -2.15% -2.16% -2.31%] index_add_ reverse : Elapsed 0.067 ms (6.681 ms / 100) 6.121 -> 6.305 ( +3.01%) [ +0.00% +0.15% +0.18% / +3.28% +3.25% +3.01%] index_copy_ reverse : Elapsed 0.061 ms (6.121 ms / 100) 7.063 -> 6.725 ( -4.79%) [ +0.00% +0.14% +0.16% / -4.79% -3.98% -3.92%] index_add_ spread : Elapsed 0.071 ms (7.063 ms / 100) 6.559 -> 6.734 ( +2.67%) [ +0.38% +0.00% +0.69% / +2.67% +3.31% +3.54%] index_copy_ spread : Elapsed 0.066 ms (6.584 ms / 100) Good 7.592 -> 6.832 (-10.01%) [ +0.18% +0.46% +0.00% / -9.79% -10.01% -9.97%] index_add_ strided 3 : Elapsed 0.076 ms (7.606 ms / 100) 7.082 -> 6.830 ( -3.56%) [ +0.14% +0.06% +0.00% / -3.56% -3.33% -3.32%] index_copy_ strided 3 : Elapsed 0.071 ms (7.092 ms / 100) Good 7.788 -> 6.865 (-11.85%) [ +0.00% +0.13% +0.48% / -11.85% -11.66% -11.83%] index_add_ strided 7 : Elapsed 0.078 ms (7.788 ms / 100) 7.177 -> 6.849 ( -4.57%) [ +0.10% +0.00% +0.22% / -4.57% -3.90% -4.11%] index_copy_ strided 7 : Elapsed 0.072 ms (7.184 ms / 100) Good 7.869 -> 6.889 (-12.45%) [ +0.01% +0.13% +0.00% / -12.21% -12.45% -12.25%] index_add_ perm : Elapsed 0.079 ms (7.870 ms / 100) 7.140 -> 6.859 ( -3.94%) [ +0.00% +0.15% +0.06% / -3.94% -3.63% -3.57%] index_copy_ perm : Elapsed 0.071 ms (7.140 ms / 100) 7.078 -> 6.763 ( -4.45%) [ +0.42% +0.14% +0.00% / -4.45% -3.93% -4.28%] index_add_ perm_sorted : Elapsed 0.071 ms (7.108 ms / 100) 6.614 -> 6.768 ( +2.33%) [ +0.30% +0.00% +0.41% / +2.33% +2.93% +2.60%] index_copy_ perm_sorted : Elapsed 0.066 ms (6.634 ms / 100) Good 6.990 -> 5.912 (-15.42%) [ +0.09% +0.00% +0.04% / -15.09% -15.19% -15.42%] index_select const : Elapsed 0.070 ms (6.996 ms / 100) 7.092 -> 6.959 ( -1.88%) [ +0.07% +0.00% +0.08% / -1.11% -1.88% -1.82%] index_select wrap : Elapsed 0.071 ms (7.097 ms / 100) 7.081 -> 6.937 ( -2.03%) [ +0.16% +0.14% +0.00% / -0.75% -2.03% -1.96%] index_select linear : Elapsed 0.071 ms (7.092 ms / 100) 6.874 -> 6.972 ( +1.43%) [ +0.09% +0.19% +0.00% / +1.48% +1.47% +1.43%] index_select reverse : Elapsed 0.069 ms (6.880 ms / 100) Good 6.823 -> 5.950 (-12.79%) [ +0.00% +0.22% +0.15% / -12.33% -12.28% -12.79%] index_select skip64 : Elapsed 0.068 ms (6.823 ms / 100) Good 6.974 -> 5.905 (-15.33%) [ +0.11% +0.13% +0.00% / -15.23% -15.17% -15.33%] index_select skip256 : Elapsed 0.070 ms (6.982 ms / 100) 6.849 -> 6.949 ( +1.46%) [ +0.23% +0.06% +0.00% / +2.73% +1.46% +1.46%] index_select spread : Elapsed 0.069 ms (6.865 ms / 100) Good 7.515 -> 6.699 (-10.86%) [ +0.20% +0.00% +0.20% / -10.86% -10.23% -10.30%] index_select strided 3 : Elapsed 0.075 ms (7.530 ms / 100) good 7.049 -> 6.376 ( -9.55%) [ +0.14% +0.00% +0.10% / -9.46% -9.55% -9.35%] index_select strided 5 : Elapsed 0.071 ms (7.059 ms / 100) good 7.681 -> 6.990 ( -9.00%) [ +0.21% +0.12% +0.00% / -7.84% -8.76% -9.00%] index_select strided 7 : Elapsed 0.077 ms (7.697 ms / 100) good 7.627 -> 6.929 ( -9.15%) [ +0.04% +0.00% +0.28% / -9.06% -9.15% -9.06%] index_select strided 8 : Elapsed 0.076 ms (7.630 ms / 100) good 7.621 -> 6.963 ( -8.63%) [ +0.09% +0.00% +0.28% / -8.21% -8.63% -8.38%] index_select strided 16 : Elapsed 0.076 ms (7.628 ms / 100) good 7.570 -> 6.961 ( -8.04%) [ +0.00% +0.00% +0.22% / -7.71% -8.01% -8.04%] index_select strided 64 : Elapsed 0.076 ms (7.570 ms / 100) Good 6.646 -> 5.959 (-10.34%) [ +0.03% +0.03% +0.00% / -10.34% -10.17% -10.11%] index_select strided 100 : Elapsed 0.066 ms (6.648 ms / 100) good 7.425 -> 6.986 ( -5.91%) [ +0.36% +0.05% +0.00% / -4.74% -5.54% -5.91%] index_select random : Elapsed 0.075 ms (7.452 ms / 100) 6.835 -> 6.939 ( +1.52%) [ +0.22% +0.01% +0.00% / +1.68% +1.61% +1.52%] index_select random_sorted : Elapsed 0.069 ms (6.850 ms / 100) B = [15, 250, 50] (stride (1, 750, 15)) A = [15, 150, 50] (stride (7500, 50, 1)) dim = 1 6.079 -> 6.042 ( -0.61%) [ +0.33% +0.08% +0.00% / -0.61% +0.61% +0.46%] index_add_ linear : Elapsed 0.061 ms (6.099 ms / 100) 5.776 -> 5.765 ( -0.19%) [ +0.55% +0.52% +0.00% / -0.19% +0.50% +0.59%] index_copy_ linear : Elapsed 0.058 ms (5.808 ms / 100) 6.095 -> 6.051 ( -0.72%) [ +0.20% +0.00% +0.10% / -0.72% -0.48% -0.28%] index_add_ reverse : Elapsed 0.061 ms (6.107 ms / 100) 5.803 -> 5.762 ( -0.71%) [ +0.00% +0.28% +0.22% / -0.71% -0.31% -0.41%] index_copy_ reverse : Elapsed 0.058 ms (5.803 ms / 100) 6.072 -> 6.044 ( -0.46%) [ +0.56% +0.23% +0.00% / -0.46% +1.20% +1.19%] index_add_ spread : Elapsed 0.061 ms (6.106 ms / 100) 5.793 -> 5.758 ( -0.60%) [ +0.45% +0.59% +0.00% / -0.60% +0.50% +0.45%] index_copy_ spread : Elapsed 0.058 ms (5.819 ms / 100) 6.050 -> 6.020 ( -0.50%) [ +0.12% +0.00% +0.25% / -0.50% +1.06% +0.99%] index_add_ strided 3 : Elapsed 0.061 ms (6.057 ms / 100) 5.770 -> 5.733 ( -0.64%) [ +0.00% +0.21% +0.29% / -0.64% -0.17% +0.19%] index_copy_ strided 3 : Elapsed 0.058 ms (5.770 ms / 100) 6.093 -> 6.074 ( -0.31%) [ +0.00% +0.25% +0.38% / -0.31% +0.13% +0.28%] index_add_ strided 7 : Elapsed 0.061 ms (6.093 ms / 100) 5.795 -> 5.776 ( -0.33%) [ +0.17% +0.28% +0.00% / -0.33% +0.09% +0.17%] index_copy_ strided 7 : Elapsed 0.058 ms (5.805 ms / 100) 6.149 -> 6.072 ( -1.25%) [ +0.23% +0.00% +0.02% / -0.07% -1.25% -1.24%] index_add_ perm : Elapsed 0.062 ms (6.163 ms / 100) 5.817 -> 5.779 ( -0.65%) [ +0.31% +0.00% +0.41% / -0.53% -0.40% -0.65%] index_copy_ perm : Elapsed 0.058 ms (5.835 ms / 100) 6.078 -> 6.056 ( -0.36%) [ +0.16% +0.31% +0.00% / -0.36% -0.18% -0.08%] index_add_ perm_sorted : Elapsed 0.061 ms (6.088 ms / 100) 5.811 -> 5.765 ( -0.79%) [ +0.22% +0.02% +0.00% / -0.40% -0.79% -0.67%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.824 ms / 100) 6.084 -> 6.084 ( +0.00%) [ +0.16% +0.00% +0.13% / +0.00% +1.17% +0.92%] index_select const : Elapsed 0.061 ms (6.094 ms / 100) 6.546 -> 6.567 ( +0.32%) [ +0.03% +0.00% +0.34% / +0.32% +1.27% +1.25%] index_select wrap : Elapsed 0.065 ms (6.548 ms / 100) 6.406 -> 6.423 ( +0.27%) [ +0.11% +0.55% +0.00% / +0.27% +0.48% +0.55%] index_select linear : Elapsed 0.064 ms (6.413 ms / 100) 6.408 -> 6.432 ( +0.37%) [ +0.28% +0.39% +0.00% / +0.37% +0.98% +0.78%] index_select reverse : Elapsed 0.064 ms (6.426 ms / 100) 6.055 -> 6.072 ( +0.28%) [ +0.38% +0.00% +0.38% / +0.28% +1.11% +1.01%] index_select skip64 : Elapsed 0.061 ms (6.078 ms / 100) 6.067 -> 6.074 ( +0.12%) [ +0.12% +0.00% +0.07% / +0.12% +0.12% +0.26%] index_select skip256 : Elapsed 0.061 ms (6.074 ms / 100) 6.446 -> 6.440 ( -0.09%) [ +0.08% +0.00% +0.05% / -0.09% +0.06% +0.39%] index_select spread : Elapsed 0.065 ms (6.451 ms / 100) 6.522 -> 6.524 ( +0.03%) [ +0.20% +0.00% +0.15% / +0.03% +0.46% +0.43%] index_select strided 3 : Elapsed 0.065 ms (6.535 ms / 100) 6.362 -> 6.392 ( +0.47%) [ +0.28% +0.00% +0.47% / +0.47% +0.50% +0.85%] index_select strided 5 : Elapsed 0.064 ms (6.380 ms / 100) 6.651 -> 6.619 ( -0.48%) [ +0.44% +0.00% +0.08% / -0.48% +0.66% +0.48%] index_select strided 7 : Elapsed 0.067 ms (6.680 ms / 100) 6.548 -> 6.583 ( +0.53%) [ +0.27% +0.00% +0.89% / +0.53% +1.27% +1.16%] index_select strided 8 : Elapsed 0.066 ms (6.566 ms / 100) 6.591 -> 6.613 ( +0.33%) [ +0.41% +0.32% +0.00% / +0.33% +0.65% +0.70%] index_select strided 16 : Elapsed 0.066 ms (6.618 ms / 100) 6.581 -> 6.602 ( +0.32%) [ +0.18% +0.12% +0.00% / +0.32% +0.64% +0.62%] index_select strided 64 : Elapsed 0.066 ms (6.593 ms / 100) 6.086 -> 6.098 ( +0.20%) [ +0.12% +0.00% +0.25% / +0.38% +0.20% +0.38%] index_select strided 100 : Elapsed 0.061 ms (6.093 ms / 100) 6.472 -> 6.483 ( +0.17%) [ +0.34% +0.45% +0.00% / +0.17% +1.76% +1.81%] index_select random : Elapsed 0.065 ms (6.494 ms / 100) 6.340 -> 6.349 ( +0.14%) [ +0.14% +0.00% +0.03% / +0.30% +0.16% +0.14%] index_select random_sorted : Elapsed 0.063 ms (6.349 ms / 100) B = [15, 250, 50] (stride (1, 750, 15)) A = [15, 150, 50] (stride (7500, 1, 150)) dim = 1 6.346 -> 6.319 ( -0.43%) [ +0.08% +0.06% +0.00% / -0.39% -0.43% -0.08%] index_add_ linear : Elapsed 0.064 ms (6.351 ms / 100) 6.087 -> 6.065 ( -0.36%) [ +0.16% +0.16% +0.00% / -0.36% -0.23% -0.18%] index_copy_ linear : Elapsed 0.061 ms (6.097 ms / 100) 6.351 -> 6.292 ( -0.93%) [ +0.41% +0.00% +0.44% / -0.33% -0.85% -0.93%] index_add_ reverse : Elapsed 0.064 ms (6.377 ms / 100) 6.111 -> 6.049 ( -1.01%) [ +0.08% +0.00% +0.26% / -0.69% -1.01% -0.92%] index_copy_ reverse : Elapsed 0.061 ms (6.116 ms / 100) 6.358 -> 6.312 ( -0.72%) [ +0.16% +0.00% +0.06% / -0.72% +0.19% +0.30%] index_add_ spread : Elapsed 0.064 ms (6.368 ms / 100) 6.088 -> 6.083 ( -0.08%) [ +0.00% +0.10% +0.36% / -0.08% +0.23% +0.00%] index_copy_ spread : Elapsed 0.061 ms (6.088 ms / 100) 6.341 -> 6.308 ( -0.52%) [ +0.17% +0.33% +0.00% / -0.52% +0.85% +0.49%] index_add_ strided 3 : Elapsed 0.064 ms (6.352 ms / 100) 6.109 -> 6.088 ( -0.34%) [ +0.21% +0.34% +0.00% / -0.34% -0.07% -0.03%] index_copy_ strided 3 : Elapsed 0.061 ms (6.122 ms / 100) 6.377 -> 6.353 ( -0.38%) [ +0.34% +0.00% +0.22% / +0.00% -0.31% -0.38%] index_add_ strided 7 : Elapsed 0.064 ms (6.399 ms / 100) 6.105 -> 6.094 ( -0.18%) [ +0.46% +0.00% +0.13% / -0.18% +0.21% -0.11%] index_copy_ strided 7 : Elapsed 0.061 ms (6.133 ms / 100) 6.495 -> 6.371 ( -1.91%) [ +0.18% +0.34% +0.00% / -0.32% -1.91% -1.88%] index_add_ perm : Elapsed 0.065 ms (6.507 ms / 100) 6.150 -> 6.067 ( -1.35%) [ +0.07% +0.00% +0.00% / +0.02% -1.35% -1.33%] index_copy_ perm : Elapsed 0.062 ms (6.154 ms / 100) 6.370 -> 6.309 ( -0.96%) [ +0.08% +0.00% +0.03% / -0.42% -0.93% -0.96%] index_add_ perm_sorted : Elapsed 0.064 ms (6.375 ms / 100) 6.112 -> 6.062 ( -0.82%) [ +0.47% +0.00% +0.18% / -0.31% -0.82% -0.70%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.141 ms / 100) 6.418 -> 6.433 ( +0.23%) [ +0.19% +0.00% +0.16% / +0.23% +1.12% +0.93%] index_select const : Elapsed 0.064 ms (6.430 ms / 100) 7.073 -> 7.037 ( -0.51%) [ +0.00% +0.16% +0.10% / +0.30% -0.42% -0.51%] index_select wrap : Elapsed 0.071 ms (7.073 ms / 100) 6.950 -> 6.908 ( -0.60%) [ +0.04% +0.30% +0.00% / +0.30% -0.60% -0.49%] index_select linear : Elapsed 0.070 ms (6.953 ms / 100) 6.876 -> 6.810 ( -0.96%) [ +0.15% +0.00% +0.07% / +0.00% -0.96% -0.60%] index_select reverse : Elapsed 0.069 ms (6.886 ms / 100) 6.577 -> 6.600 ( +0.35%) [ +0.00% +0.12% +0.32% / +0.59% +0.47% +0.35%] index_select skip64 : Elapsed 0.066 ms (6.577 ms / 100) 6.410 -> 6.417 ( +0.11%) [ +0.20% +0.00% +0.42% / +0.11% +0.64% +0.67%] index_select skip256 : Elapsed 0.064 ms (6.423 ms / 100) 6.861 -> 6.813 ( -0.70%) [ +0.00% +0.03% +0.03% / +0.19% -0.32% -0.70%] index_select spread : Elapsed 0.069 ms (6.861 ms / 100) 7.339 -> 7.305 ( -0.46%) [ +0.00% +0.01% +0.16% / +0.19% -0.35% -0.46%] index_select strided 3 : Elapsed 0.073 ms (7.339 ms / 100) 7.331 -> 7.280 ( -0.70%) [ +0.11% +0.00% +0.34% / -0.10% -0.70% -0.41%] index_select strided 5 : Elapsed 0.073 ms (7.339 ms / 100) 7.346 -> 7.314 ( -0.44%) [ +0.18% +0.10% +0.00% / +0.07% -0.44% -0.44%] index_select strided 7 : Elapsed 0.074 ms (7.359 ms / 100) 7.330 -> 7.310 ( -0.27%) [ +0.00% +0.08% +0.22% / +0.08% -0.27% -0.03%] index_select strided 8 : Elapsed 0.073 ms (7.330 ms / 100) 7.318 -> 7.304 ( -0.19%) [ +0.46% +0.18% +0.00% / +0.10% -0.19% +0.04%] index_select strided 16 : Elapsed 0.074 ms (7.352 ms / 100) 7.314 -> 7.278 ( -0.49%) [ +0.07% +0.00% +0.11% / +0.00% -0.34% -0.49%] index_select strided 64 : Elapsed 0.073 ms (7.319 ms / 100) 6.732 -> 6.722 ( -0.15%) [ +0.16% +0.00% +0.06% / -0.15% +0.03% -0.03%] index_select strided 100 : Elapsed 0.067 ms (6.743 ms / 100) 7.318 -> 7.293 ( -0.34%) [ +0.04% +0.12% +0.00% / +0.10% -0.34% -0.27%] index_select random : Elapsed 0.073 ms (7.321 ms / 100) 6.833 -> 6.845 ( +0.18%) [ +0.13% +0.47% +0.00% / +0.51% +0.18% +0.60%] index_select random_sorted : Elapsed 0.068 ms (6.842 ms / 100) B = [15, 250, 50] (stride (1, 750, 15)) A = [15, 150, 50] (stride (150, 1, 2250)) dim = 1 6.378 -> 6.348 ( -0.47%) [ +0.25% +0.00% +0.22% / -0.47% +0.55% +0.36%] index_add_ linear : Elapsed 0.064 ms (6.394 ms / 100) 6.122 -> 6.107 ( -0.25%) [ +0.20% +0.00% +0.38% / -0.25% +0.25% +0.25%] index_copy_ linear : Elapsed 0.061 ms (6.134 ms / 100) 6.390 -> 6.354 ( -0.56%) [ +0.11% +0.00% +0.16% / -0.56% -0.52% -0.53%] index_add_ reverse : Elapsed 0.064 ms (6.397 ms / 100) 6.139 -> 6.102 ( -0.60%) [ +0.02% +0.07% +0.00% / -0.54% -0.41% -0.60%] index_copy_ reverse : Elapsed 0.061 ms (6.140 ms / 100) 6.413 -> 6.360 ( -0.83%) [ +0.06% +0.06% +0.00% / -0.83% +0.48% +0.48%] index_add_ spread : Elapsed 0.064 ms (6.417 ms / 100) 6.147 -> 6.115 ( -0.52%) [ +0.00% +0.23% +0.42% / -0.52% +0.07% -0.03%] index_copy_ spread : Elapsed 0.061 ms (6.147 ms / 100) 6.374 -> 6.357 ( -0.27%) [ +0.44% +0.00% +0.28% / -0.27% +1.13% +1.26%] index_add_ strided 3 : Elapsed 0.064 ms (6.402 ms / 100) 6.120 -> 6.105 ( -0.25%) [ +0.00% +0.08% +0.38% / -0.25% +0.46% +0.42%] index_copy_ strided 3 : Elapsed 0.061 ms (6.120 ms / 100) 6.432 -> 6.405 ( -0.42%) [ +0.06% +0.22% +0.00% / -0.42% -0.28% -0.09%] index_add_ strided 7 : Elapsed 0.064 ms (6.436 ms / 100) 6.151 -> 6.127 ( -0.39%) [ +0.00% +0.21% +0.02% / -0.39% -0.03% +0.07%] index_copy_ strided 7 : Elapsed 0.062 ms (6.151 ms / 100) 6.433 -> 6.382 ( -0.79%) [ +0.00% +0.20% +0.25% / -0.79% -0.54% -0.51%] index_add_ perm : Elapsed 0.064 ms (6.433 ms / 100) 6.139 -> 6.117 ( -0.36%) [ +0.00% +0.20% +0.21% / -0.36% -0.23% -0.18%] index_copy_ perm : Elapsed 0.061 ms (6.139 ms / 100) 6.371 -> 6.349 ( -0.35%) [ +0.17% +0.02% +0.00% / -0.31% -0.35% -0.31%] index_add_ perm_sorted : Elapsed 0.064 ms (6.382 ms / 100) 6.133 -> 6.106 ( -0.44%) [ +0.00% +0.21% +0.07% / -0.44% -0.33% -0.39%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.133 ms / 100) 6.491 -> 6.494 ( +0.05%) [ +0.00% +0.08% +0.15% / +0.05% +0.28% +0.59%] index_select const : Elapsed 0.065 ms (6.491 ms / 100) 7.123 -> 7.124 ( +0.01%) [ +0.06% +0.00% +0.03% / +0.06% +0.01% +0.37%] index_select wrap : Elapsed 0.071 ms (7.127 ms / 100) 6.955 -> 6.971 ( +0.23%) [ +0.20% +0.17% +0.00% / +0.32% +0.23% +0.30%] index_select linear : Elapsed 0.070 ms (6.969 ms / 100) 6.920 -> 6.916 ( -0.06%) [ +0.19% +0.00% +0.07% / -0.06% +0.53% +0.35%] index_select reverse : Elapsed 0.069 ms (6.933 ms / 100) 6.608 -> 6.615 ( +0.11%) [ +0.00% +0.09% +0.00% / +0.11% +0.67% +0.62%] index_select skip64 : Elapsed 0.066 ms (6.608 ms / 100) 6.469 -> 6.453 ( -0.25%) [ +0.73% +0.00% +0.12% / +0.20% -0.25% -0.03%] index_select skip256 : Elapsed 0.065 ms (6.516 ms / 100) 6.927 -> 6.933 ( +0.09%) [ +0.25% +0.00% +0.46% / +0.09% +0.45% +0.61%] index_select spread : Elapsed 0.069 ms (6.944 ms / 100) 7.367 -> 7.374 ( +0.10%) [ +0.16% +0.11% +0.00% / +0.10% +0.46% +0.46%] index_select strided 3 : Elapsed 0.074 ms (7.379 ms / 100) 7.382 -> 7.389 ( +0.09%) [ +0.20% +0.05% +0.00% / +0.31% +0.09% +0.15%] index_select strided 5 : Elapsed 0.074 ms (7.397 ms / 100) 7.375 -> 7.363 ( -0.16%) [ +0.14% +0.00% +0.20% / -0.16% +0.31% +0.34%] index_select strided 7 : Elapsed 0.074 ms (7.385 ms / 100) 7.355 -> 7.352 ( -0.04%) [ +0.07% +0.00% +0.30% / -0.04% +0.46% +0.33%] index_select strided 8 : Elapsed 0.074 ms (7.360 ms / 100) 7.361 -> 7.357 ( -0.05%) [ +0.11% +0.00% +0.26% / -0.05% +0.24% +0.31%] index_select strided 16 : Elapsed 0.074 ms (7.369 ms / 100) 7.333 -> 7.345 ( +0.16%) [ +0.00% +0.10% +0.26% / +0.19% +0.35% +0.16%] index_select strided 64 : Elapsed 0.073 ms (7.333 ms / 100) 6.768 -> 6.769 ( +0.01%) [ +0.28% +0.00% +0.22% / +0.01% +0.38% +0.04%] index_select strided 100 : Elapsed 0.068 ms (6.787 ms / 100) 7.382 -> 7.373 ( -0.12%) [ +0.03% +0.00% +0.14% / -0.12% -0.05% +0.18%] index_select random : Elapsed 0.074 ms (7.384 ms / 100) 6.916 -> 6.942 ( +0.38%) [ +0.30% +0.32% +0.00% / +0.38% +0.59% +0.61%] index_select random_sorted : Elapsed 0.069 ms (6.937 ms / 100) B = [15, 250, 50] (stride (250, 1, 3750)) dim = 1 fill_cnt = 150 Good 3.109 -> 2.709 (-12.87%) [ +0.00% +0.16% +0.39% / -12.87% -12.54% -12.42%] index_fill_ const : Elapsed 0.031 ms (3.109 ms / 100) Good 3.145 -> 2.769 (-11.96%) [ +0.00% +0.25% +0.19% / -11.86% -11.96% -11.76%] index_fill_ linear : Elapsed 0.031 ms (3.145 ms / 100) Good 3.128 -> 2.787 (-10.90%) [ +0.00% +0.19% +0.06% / -10.90% -9.94% -10.13%] index_fill_ reverse : Elapsed 0.031 ms (3.128 ms / 100) Good 3.066 -> 2.686 (-12.39%) [ +0.46% +0.39% +0.00% / -12.39% -12.00% -11.87%] index_fill_ skip64 : Elapsed 0.031 ms (3.080 ms / 100) Good 3.094 -> 2.724 (-11.96%) [ +0.16% +0.00% +0.23% / -11.60% -11.96% -11.93%] index_fill_ skip256 : Elapsed 0.031 ms (3.099 ms / 100) good 3.471 -> 3.127 ( -9.91%) [ +0.09% +0.37% +0.00% / -9.85% -9.88% -9.91%] index_fill_ spread : Elapsed 0.035 ms (3.474 ms / 100) good 3.512 -> 3.176 ( -9.57%) [ +0.37% +0.00% +0.37% / -9.57% -9.28% -9.23%] index_fill_ strided 3 : Elapsed 0.035 ms (3.525 ms / 100) good 3.551 -> 3.229 ( -9.07%) [ +0.31% +0.00% +0.14% / -9.07% -8.90% -8.59%] index_fill_ strided 5 : Elapsed 0.036 ms (3.562 ms / 100) good 3.611 -> 3.324 ( -7.95%) [ +0.11% +0.22% +0.00% / -7.95% -7.86% -7.89%] index_fill_ strided 7 : Elapsed 0.036 ms (3.615 ms / 100) good 3.625 -> 3.376 ( -6.87%) [ +0.00% +0.17% +0.11% / -6.87% -6.37% -6.37%] index_fill_ strided 8 : Elapsed 0.036 ms (3.625 ms / 100) good 3.632 -> 3.433 ( -5.48%) [ +0.28% +0.00% +0.14% / -5.48% -5.40% -5.07%] index_fill_ strided 16 : Elapsed 0.036 ms (3.642 ms / 100) 3.612 -> 3.475 ( -3.79%) [ +0.00% +0.00% +0.14% / -3.79% -3.52% -3.49%] index_fill_ strided 64 : Elapsed 0.036 ms (3.612 ms / 100) good 3.181 -> 2.926 ( -8.02%) [ +0.09% +0.00% +0.41% / -7.98% -8.02% -7.95%] index_fill_ strided 100 : Elapsed 0.032 ms (3.184 ms / 100) good 3.616 -> 3.404 ( -5.86%) [ +0.30% +0.00% +0.06% / -5.86% -5.12% -4.84%] index_fill_ random : Elapsed 0.036 ms (3.627 ms / 100) good 3.461 -> 3.134 ( -9.45%) [ +0.06% +0.43% +0.00% / -9.45% -8.99% -8.96%] index_fill_ random_sorted : Elapsed 0.035 ms (3.463 ms / 100) good 3.628 -> 3.405 ( -6.15%) [ +0.06% +0.28% +0.00% / -6.15% -5.62% -5.68%] index_fill_ perm : Elapsed 0.036 ms (3.630 ms / 100) good 3.470 -> 3.133 ( -9.71%) [ +0.00% +0.14% +0.06% / -9.71% -9.71% -9.65%] index_fill_ perm_sorted : Elapsed 0.035 ms (3.470 ms / 100) out_shape = [15, 150, 250] in_shape = [15, 150, 50] idx_dim = 2 B = [15, 150, 250] (stride (37500, 1, 150)) dim = 2 fill_cnt = 50 2.661 -> 2.657 ( -0.15%) [ +0.08% +0.00% +0.04% / -0.15% +0.04% -0.04%] index_fill_ const : Elapsed 0.027 ms (2.663 ms / 100) 2.732 -> 2.712 ( -0.73%) [ +0.00% +0.44% +0.04% / -0.70% -0.70% -0.73%] index_fill_ linear : Elapsed 0.027 ms (2.732 ms / 100) 2.735 -> 2.710 ( -0.91%) [ +0.00% +0.18% +0.26% / -0.88% -0.91% -0.80%] index_fill_ reverse : Elapsed 0.027 ms (2.735 ms / 100) 2.665 -> 2.652 ( -0.49%) [ +0.00% +0.15% +0.11% / -0.49% -0.38% +0.00%] index_fill_ skip64 : Elapsed 0.027 ms (2.665 ms / 100) 2.666 -> 2.652 ( -0.53%) [ +0.00% +0.15% +0.11% / -0.26% -0.53% -0.19%] index_fill_ skip256 : Elapsed 0.027 ms (2.666 ms / 100) 2.748 -> 2.721 ( -0.98%) [ +0.00% +0.11% +0.18% / -0.84% -0.98% -0.87%] index_fill_ spread : Elapsed 0.027 ms (2.748 ms / 100) 2.741 -> 2.723 ( -0.66%) [ +0.15% +0.47% +0.00% / -0.66% -0.55% -0.55%] index_fill_ strided 3 : Elapsed 0.027 ms (2.745 ms / 100) 2.743 -> 2.724 ( -0.69%) [ +0.00% +0.36% +0.62% / -0.47% -0.51% -0.69%] index_fill_ strided 5 : Elapsed 0.027 ms (2.743 ms / 100) 2.743 -> 2.727 ( -0.58%) [ +0.00% +0.33% +0.80% / -0.58% -0.18% -0.58%] index_fill_ strided 7 : Elapsed 0.027 ms (2.743 ms / 100) 2.738 -> 2.721 ( -0.62%) [ +0.15% +0.00% +0.15% / -0.47% -0.37% -0.62%] index_fill_ strided 8 : Elapsed 0.027 ms (2.742 ms / 100) 2.733 -> 2.718 ( -0.55%) [ +0.00% +0.73% +0.18% / -0.55% -0.15% -0.22%] index_fill_ strided 16 : Elapsed 0.027 ms (2.733 ms / 100) 2.738 -> 2.722 ( -0.58%) [ +0.00% +0.00% +0.11% / -0.58% -0.11% +0.04%] index_fill_ strided 64 : Elapsed 0.027 ms (2.738 ms / 100) 2.679 -> 2.662 ( -0.63%) [ +0.15% +0.75% +0.00% / -0.63% -0.04% -0.37%] index_fill_ strided 100 : Elapsed 0.027 ms (2.683 ms / 100) 2.742 -> 2.716 ( -0.95%) [ +0.00% +0.15% +0.44% / -0.55% -0.95% -0.95%] index_fill_ random : Elapsed 0.027 ms (2.742 ms / 100) 2.743 -> 2.707 ( -1.31%) [ +0.04% +0.00% +0.33% / -0.51% -1.31% -1.13%] index_fill_ random_sorted : Elapsed 0.027 ms (2.744 ms / 100) 2.739 -> 2.723 ( -0.58%) [ +0.40% +0.15% +0.00% / -0.22% -0.58% -0.58%] index_fill_ perm : Elapsed 0.027 ms (2.750 ms / 100) 2.741 -> 2.722 ( -0.69%) [ +0.11% +0.04% +0.00% / -0.29% -0.69% -0.69%] index_fill_ perm_sorted : Elapsed 0.027 ms (2.744 ms / 100) B = [15, 150, 250] (stride (1, 3750, 15)) dim = 2 fill_cnt = 50 3.205 -> 3.180 ( -0.78%) [ +0.12% +0.00% +0.25% / +0.22% -0.53% -0.78%] index_fill_ const : Elapsed 0.032 ms (3.209 ms / 100) 3.219 -> 3.181 ( -1.18%) [ +0.12% +0.16% +0.00% / +0.00% -0.78% -1.18%] index_fill_ linear : Elapsed 0.032 ms (3.223 ms / 100) 3.242 -> 3.182 ( -1.85%) [ +0.25% +0.09% +0.00% / -0.15% -1.85% -1.60%] index_fill_ reverse : Elapsed 0.032 ms (3.250 ms / 100) 3.210 -> 3.204 ( -0.19%) [ +0.40% +0.00% +0.16% / -0.19% +0.12% +0.09%] index_fill_ skip64 : Elapsed 0.032 ms (3.223 ms / 100) 3.211 -> 3.201 ( -0.31%) [ +0.12% +0.00% +0.22% / -0.31% -0.12% +0.00%] index_fill_ skip256 : Elapsed 0.032 ms (3.215 ms / 100) 3.225 -> 3.176 ( -1.52%) [ +0.00% +0.31% +0.22% / -0.62% -1.52% -1.33%] index_fill_ spread : Elapsed 0.032 ms (3.225 ms / 100) 3.201 -> 3.181 ( -0.62%) [ +0.00% +0.37% +0.28% / -0.19% -0.56% -0.62%] index_fill_ strided 3 : Elapsed 0.032 ms (3.201 ms / 100) 3.201 -> 3.177 ( -0.75%) [ +0.31% +0.00% +0.12% / +0.06% -0.66% -0.75%] index_fill_ strided 5 : Elapsed 0.032 ms (3.211 ms / 100) 3.239 -> 3.220 ( -0.59%) [ +0.25% +0.00% +0.62% / -0.31% -0.52% -0.59%] index_fill_ strided 7 : Elapsed 0.032 ms (3.247 ms / 100) 3.235 -> 3.227 ( -0.25%) [ +0.53% +0.00% +0.62% / -0.25% +0.15% +0.40%] index_fill_ strided 8 : Elapsed 0.033 ms (3.252 ms / 100) 3.232 -> 3.212 ( -0.62%) [ +0.65% +0.00% +0.15% / -0.46% -0.62% -0.22%] index_fill_ strided 16 : Elapsed 0.033 ms (3.253 ms / 100) 3.224 -> 3.205 ( -0.59%) [ +0.00% +0.03% +0.03% / -0.53% -0.56% -0.59%] index_fill_ strided 64 : Elapsed 0.032 ms (3.224 ms / 100) 3.152 -> 3.118 ( -1.08%) [ +0.13% +0.00% +0.19% / -0.48% -1.08% -0.95%] index_fill_ strided 100 : Elapsed 0.032 ms (3.156 ms / 100) 3.240 -> 3.224 ( -0.49%) [ +0.77% +0.09% +0.00% / -0.46% -0.46% -0.49%] index_fill_ random : Elapsed 0.033 ms (3.265 ms / 100) 3.245 -> 3.208 ( -1.14%) [ +0.15% +0.31% +0.00% / -0.46% -1.14% -0.77%] index_fill_ random_sorted : Elapsed 0.032 ms (3.250 ms / 100) 3.248 -> 3.222 ( -0.80%) [ +0.00% +0.15% +0.55% / -0.80% -0.55% -0.49%] index_fill_ perm : Elapsed 0.032 ms (3.248 ms / 100) 3.238 -> 3.218 ( -0.62%) [ +0.00% +0.25% +0.06% / -0.46% -0.62% -0.28%] index_fill_ perm_sorted : Elapsed 0.032 ms (3.238 ms / 100) B = [15, 150, 250] (stride (1, 3750, 15)) A = [15, 150, 50] (stride (150, 1, 2250)) dim = 2 9.701 -> 9.582 ( -1.23%) [ +0.15% +0.00% +0.20% / -0.15% -1.07% -1.23%] index_add_ linear : Elapsed 0.097 ms (9.716 ms / 100) 9.170 -> 9.134 ( -0.39%) [ +0.03% +0.00% +0.08% / -0.02% -0.39% -0.33%] index_copy_ linear : Elapsed 0.092 ms (9.173 ms / 100) 9.693 -> 9.595 ( -1.01%) [ +0.08% +0.00% +0.06% / -0.25% -1.01% -1.00%] index_add_ reverse : Elapsed 0.097 ms (9.701 ms / 100) 9.162 -> 9.149 ( -0.14%) [ +0.10% +0.01% +0.00% / +0.05% -0.09% -0.14%] index_copy_ reverse : Elapsed 0.092 ms (9.171 ms / 100) 10.949 -> 10.848 ( -0.92%) [ +0.00% +0.40% +0.12% / -0.17% -0.92% -0.79%] index_add_ spread : Elapsed 0.109 ms (10.949 ms / 100) 10.095 -> 10.058 ( -0.37%) [ +0.00% +0.05% +0.16% / +0.03% -0.30% -0.37%] index_copy_ spread : Elapsed 0.101 ms (10.095 ms / 100) 10.809 -> 10.742 ( -0.62%) [ +0.18% +0.00% +0.19% / +0.13% -0.62% -0.62%] index_add_ strided 3 : Elapsed 0.108 ms (10.828 ms / 100) 10.032 -> 10.033 ( +0.01%) [ +0.15% +0.01% +0.00% / +0.11% +0.01% +0.01%] index_copy_ strided 3 : Elapsed 0.100 ms (10.047 ms / 100) 10.999 -> 10.955 ( -0.40%) [ +0.17% +0.00% +0.06% / +0.03% -0.40% -0.26%] index_add_ strided 7 : Elapsed 0.110 ms (11.018 ms / 100) 10.063 -> 10.070 ( +0.07%) [ +0.20% +0.00% +0.14% / +0.08% +0.07% +0.15%] index_copy_ strided 7 : Elapsed 0.101 ms (10.083 ms / 100) 11.121 -> 11.019 ( -0.92%) [ +0.00% +0.13% +0.17% / -0.24% -0.89% -0.92%] index_add_ perm : Elapsed 0.111 ms (11.121 ms / 100) 9.983 -> 9.953 ( -0.30%) [ +0.18% +0.13% +0.00% / +0.12% -0.15% -0.30%] index_copy_ perm : Elapsed 0.100 ms (10.001 ms / 100) 10.588 -> 10.516 ( -0.68%) [ +0.00% +0.40% +0.15% / +0.04% -0.59% -0.68%] index_add_ perm_sorted : Elapsed 0.106 ms (10.588 ms / 100) 9.787 -> 9.787 ( +0.00%) [ +0.00% +0.27% +0.03% / +0.00% +0.07% +0.09%] index_copy_ perm_sorted : Elapsed 0.098 ms (9.787 ms / 100) 19.396 -> 19.293 ( -0.53%) [ +0.00% +0.62% +0.31% / +0.14% -0.53% -0.12%] index_select const : Elapsed 0.194 ms (19.396 ms / 100) 20.929 -> 20.956 ( +0.13%) [ +0.00% +0.16% +0.39% / +0.13% +2.05% +1.78%] index_select wrap : Elapsed 0.209 ms (20.929 ms / 100) 19.423 -> 19.404 ( -0.10%) [ +0.00% +0.06% +0.03% / -0.10% -0.05% -0.03%] index_select linear : Elapsed 0.194 ms (19.423 ms / 100) 19.881 -> 19.925 ( +0.22%) [ +0.34% +0.00% +0.50% / +0.31% +0.22% +0.51%] index_select reverse : Elapsed 0.199 ms (19.948 ms / 100) 19.476 -> 19.272 ( -1.05%) [ +0.33% +0.00% +0.17% / +0.20% -0.62% -1.05%] index_select skip64 : Elapsed 0.195 ms (19.541 ms / 100) 19.425 -> 19.302 ( -0.63%) [ +0.40% +0.12% +0.00% / +0.54% -0.60% -0.63%] index_select skip256 : Elapsed 0.195 ms (19.502 ms / 100) 19.454 -> 19.479 ( +0.13%) [ +0.00% +0.26% +0.38% / +0.13% +1.14% +1.14%] index_select spread : Elapsed 0.195 ms (19.454 ms / 100) 21.179 -> 21.187 ( +0.04%) [ +0.00% +0.14% +0.11% / +0.04% +0.52% +0.86%] index_select strided 3 : Elapsed 0.212 ms (21.179 ms / 100) 20.110 -> 20.113 ( +0.01%) [ +0.00% +0.13% +0.49% / +0.01% +1.56% +1.94%] index_select strided 5 : Elapsed 0.201 ms (20.110 ms / 100) 21.131 -> 21.168 ( +0.18%) [ +0.00% +0.23% +0.00% / +0.18% +1.38% +1.12%] index_select strided 7 : Elapsed 0.211 ms (21.132 ms / 100) 21.033 -> 20.814 ( -1.04%) [ +0.51% +0.00% +0.62% / +0.64% -1.04% -0.94%] index_select strided 8 : Elapsed 0.211 ms (21.141 ms / 100) 21.114 -> 20.828 ( -1.35%) [ +0.27% +0.00% +0.09% / -0.06% -1.35% -1.08%] index_select strided 16 : Elapsed 0.212 ms (21.170 ms / 100) 20.744 -> 20.814 ( +0.34%) [ +0.33% +0.35% +0.00% / +0.34% +1.05% +0.93%] index_select random : Elapsed 0.208 ms (20.812 ms / 100) 19.446 -> 19.450 ( +0.02%) [ +0.00% +0.21% +0.22% / +0.02% +0.34% +0.48%] index_select random_sorted : Elapsed 0.194 ms (19.446 ms / 100) B = [15, 150, 250] (stride (1, 15, 2250)) A = [15, 150, 50] (stride (50, 750, 1)) dim = 2 6.593 -> 6.557 ( -0.55%) [ +0.00% +0.00% +0.18% / -0.55% +0.20% +0.06%] index_add_ linear : Elapsed 0.066 ms (6.593 ms / 100) 6.320 -> 6.288 ( -0.51%) [ +0.00% +0.25% +0.00% / -0.51% +0.27% -0.02%] index_copy_ linear : Elapsed 0.063 ms (6.320 ms / 100) 6.585 -> 6.574 ( -0.17%) [ +0.00% +0.24% +0.00% / -0.17% +0.11% -0.03%] index_add_ reverse : Elapsed 0.066 ms (6.585 ms / 100) 6.326 -> 6.297 ( -0.46%) [ +0.02% +0.02% +0.00% / -0.25% -0.03% -0.46%] index_copy_ reverse : Elapsed 0.063 ms (6.327 ms / 100) 6.633 -> 6.588 ( -0.68%) [ +0.30% +0.29% +0.00% / -0.24% -0.68% -0.35%] index_add_ spread : Elapsed 0.067 ms (6.653 ms / 100) 6.359 -> 6.322 ( -0.58%) [ +0.05% +0.36% +0.00% / -0.31% -0.58% -0.49%] index_copy_ spread : Elapsed 0.064 ms (6.362 ms / 100) 6.629 -> 6.606 ( -0.35%) [ +0.00% +0.02% +0.35% / -0.35% -0.09% +0.09%] index_add_ strided 3 : Elapsed 0.066 ms (6.629 ms / 100) 6.354 -> 6.323 ( -0.49%) [ +0.00% +0.08% +0.30% / -0.49% -0.14% -0.09%] index_copy_ strided 3 : Elapsed 0.064 ms (6.354 ms / 100) 6.667 -> 6.631 ( -0.54%) [ +0.00% +0.43% +0.06% / -0.34% -0.48% -0.54%] index_add_ strided 7 : Elapsed 0.067 ms (6.667 ms / 100) 6.361 -> 6.313 ( -0.75%) [ +0.27% +0.16% +0.00% / -0.39% -0.75% -0.69%] index_copy_ strided 7 : Elapsed 0.064 ms (6.378 ms / 100) 6.680 -> 6.634 ( -0.69%) [ +0.01% +0.00% +0.27% / -0.69% -0.12% -0.33%] index_add_ perm : Elapsed 0.067 ms (6.681 ms / 100) 6.346 -> 6.326 ( -0.32%) [ +0.00% +0.39% +0.63% / -0.32% +0.09% +0.20%] index_copy_ perm : Elapsed 0.063 ms (6.346 ms / 100) 6.676 -> 6.646 ( -0.45%) [ +0.18% +0.04% +0.00% / -0.45% -0.22% -0.39%] index_add_ perm_sorted : Elapsed 0.067 ms (6.688 ms / 100) 6.361 -> 6.342 ( -0.30%) [ +0.24% +0.36% +0.00% / -0.30% +0.06% +0.08%] index_copy_ perm_sorted : Elapsed 0.064 ms (6.376 ms / 100) 12.684 -> 12.150 ( -4.21%) [ +0.17% +0.00% +0.58% / +0.05% -4.21% -3.19%] index_select const : Elapsed 0.127 ms (12.705 ms / 100) 13.988 -> 13.830 ( -1.13%) [ +0.07% +0.65% +0.00% / +0.18% -1.13% -1.04%] index_select wrap : Elapsed 0.140 ms (13.998 ms / 100) 12.363 -> 11.902 ( -3.73%) [ +1.02% +0.00% +0.33% / -0.06% -3.73% -3.36%] index_select linear : Elapsed 0.125 ms (12.489 ms / 100) 13.145 -> 12.654 ( -3.74%) [ +0.91% +0.00% +0.92% / +0.09% -3.48% -3.74%] index_select reverse : Elapsed 0.133 ms (13.265 ms / 100) 12.693 -> 12.064 ( -4.96%) [ +0.07% +0.00% +1.91% / +0.98% -4.96% -3.74%] index_select skip64 : Elapsed 0.127 ms (12.702 ms / 100) 12.489 -> 12.200 ( -2.31%) [ +0.10% +0.53% +0.00% / +0.53% -2.31% -2.19%] index_select skip256 : Elapsed 0.125 ms (12.501 ms / 100) 12.858 -> 12.686 ( -1.34%) [ +0.17% +0.00% +0.53% / -0.18% -1.34% -0.82%] index_select spread : Elapsed 0.129 ms (12.880 ms / 100) 14.802 -> 14.658 ( -0.97%) [ +1.20% +0.00% +0.36% / +0.48% -0.97% +0.11%] index_select strided 3 : Elapsed 0.150 ms (14.979 ms / 100) 14.996 -> 15.056 ( +0.40%) [ +0.79% +0.58% +0.00% / +0.77% +0.69% +0.40%] index_select strided 5 : Elapsed 0.151 ms (15.114 ms / 100) 15.106 -> 14.722 ( -2.54%) [ +0.91% +1.28% +0.00% / +0.75% -2.54% -0.55%] index_select strided 7 : Elapsed 0.152 ms (15.244 ms / 100) 15.255 -> 14.940 ( -2.06%) [ +0.00% +0.54% +0.39% / +0.01% -2.06% -1.44%] index_select strided 8 : Elapsed 0.153 ms (15.255 ms / 100) 14.875 -> 14.852 ( -0.15%) [ +0.63% +0.00% +0.43% / +0.89% +0.37% -0.15%] index_select strided 16 : Elapsed 0.150 ms (14.969 ms / 100) 14.965 -> 14.680 ( -1.90%) [ +0.00% +0.04% +0.03% / -0.05% -1.44% -1.90%] index_select random : Elapsed 0.150 ms (14.965 ms / 100) 12.879 -> 12.761 ( -0.92%) [ +0.29% +0.51% +0.00% / +0.26% -0.92% -0.53%] index_select random_sorted : Elapsed 0.129 ms (12.916 ms / 100) B = [15, 150, 250] (stride (1, 15, 2250)) A = [15, 150, 50] (stride (150, 1, 2250)) dim = 2 9.232 -> 9.206 ( -0.28%) [ +0.12% +0.00% +0.17% / +0.10% -0.22% -0.28%] index_add_ linear : Elapsed 0.092 ms (9.243 ms / 100) 9.105 -> 9.076 ( -0.32%) [ +0.03% +0.12% +0.00% / +0.04% -0.32% -0.20%] index_copy_ linear : Elapsed 0.091 ms (9.108 ms / 100) 9.220 -> 9.206 ( -0.15%) [ +0.00% +0.17% +0.04% / -0.07% -0.15% +0.22%] index_add_ reverse : Elapsed 0.092 ms (9.220 ms / 100) 9.092 -> 9.082 ( -0.11%) [ +0.08% +0.01% +0.00% / +0.18% -0.11% +0.20%] index_copy_ reverse : Elapsed 0.091 ms (9.099 ms / 100) 9.253 -> 9.230 ( -0.25%) [ +0.00% +0.08% +0.04% / -0.25% -0.16% +0.23%] index_add_ spread : Elapsed 0.093 ms (9.253 ms / 100) 9.100 -> 9.102 ( +0.02%) [ +0.07% +0.00% +0.29% / +0.02% +0.08% +0.09%] index_copy_ spread : Elapsed 0.091 ms (9.106 ms / 100) 9.254 -> 9.230 ( -0.26%) [ +0.01% +0.15% +0.00% / +0.02% -0.03% -0.26%] index_add_ strided 3 : Elapsed 0.093 ms (9.255 ms / 100) 9.114 -> 9.105 ( -0.10%) [ +0.12% +0.07% +0.00% / -0.10% +0.01% -0.10%] index_copy_ strided 3 : Elapsed 0.091 ms (9.125 ms / 100) 9.260 -> 9.260 ( +0.00%) [ +0.00% +0.27% +0.12% / +0.00% +0.11% +0.40%] index_add_ strided 7 : Elapsed 0.093 ms (9.260 ms / 100) 9.120 -> 9.111 ( -0.10%) [ +0.00% +0.11% +0.05% / -0.10% +0.01% +0.22%] index_copy_ strided 7 : Elapsed 0.091 ms (9.120 ms / 100) 9.289 -> 9.282 ( -0.08%) [ +0.00% +0.09% +0.05% / +0.08% -0.08% -0.02%] index_add_ perm : Elapsed 0.093 ms (9.289 ms / 100) 9.130 -> 9.124 ( -0.07%) [ +0.00% +0.00% +0.07% / -0.07% +0.02% -0.02%] index_copy_ perm : Elapsed 0.091 ms (9.130 ms / 100) 9.309 -> 9.253 ( -0.60%) [ +0.19% +0.00% +0.25% / +0.34% -0.60% -0.41%] index_add_ perm_sorted : Elapsed 0.093 ms (9.327 ms / 100) 9.142 -> 9.111 ( -0.34%) [ +0.09% +0.00% +0.09% / +0.11% -0.34% -0.31%] index_copy_ perm_sorted : Elapsed 0.092 ms (9.150 ms / 100) 18.902 -> 18.857 ( -0.24%) [ +0.00% +0.10% +0.29% / +0.11% -0.24% -0.20%] index_select const : Elapsed 0.189 ms (18.902 ms / 100) 20.504 -> 20.386 ( -0.58%) [ +0.00% +0.51% +0.22% / +0.20% -0.05% -0.58%] index_select wrap : Elapsed 0.205 ms (20.504 ms / 100) 18.942 -> 18.857 ( -0.45%) [ +0.49% +0.00% +0.48% / +0.39% -0.35% -0.45%] index_select linear : Elapsed 0.190 ms (19.034 ms / 100) 19.536 -> 19.462 ( -0.38%) [ +0.24% +0.01% +0.00% / +0.03% -0.31% -0.38%] index_select reverse : Elapsed 0.196 ms (19.582 ms / 100) 18.819 -> 18.873 ( +0.29%) [ +0.00% +0.21% +0.47% / +0.29% +0.79% +0.60%] index_select skip64 : Elapsed 0.188 ms (18.819 ms / 100) 18.892 -> 18.792 ( -0.53%) [ +0.09% +0.20% +0.00% / +0.24% -0.53% -0.12%] index_select skip256 : Elapsed 0.189 ms (18.909 ms / 100) 18.939 -> 18.904 ( -0.18%) [ +0.00% +0.03% +0.39% / -0.06% -0.18% -0.17%] index_select spread : Elapsed 0.189 ms (18.939 ms / 100) 20.518 -> 20.483 ( -0.17%) [ +0.42% +0.00% +0.33% / +0.22% -0.17% -0.02%] index_select strided 3 : Elapsed 0.206 ms (20.605 ms / 100) 19.755 -> 19.713 ( -0.21%) [ +0.06% +0.00% +0.13% / +0.19% -0.11% -0.21%] index_select strided 5 : Elapsed 0.198 ms (19.767 ms / 100) 20.552 -> 20.555 ( +0.01%) [ +0.35% +0.00% +0.29% / +0.23% +0.01% +0.23%] index_select strided 7 : Elapsed 0.206 ms (20.624 ms / 100) 20.010 -> 20.037 ( +0.13%) [ +0.00% +0.00% +0.01% / +0.13% +3.00% +2.85%] index_select strided 8 : Elapsed 0.200 ms (20.010 ms / 100) 19.991 -> 20.076 ( +0.43%) [ +0.13% +0.00% +0.26% / +0.43% +2.64% +2.90%] index_select strided 16 : Elapsed 0.200 ms (20.017 ms / 100) 20.241 -> 20.163 ( -0.39%) [ +0.00% +0.49% +0.23% / -0.13% -0.39% -0.30%] index_select random : Elapsed 0.202 ms (20.241 ms / 100) 18.957 -> 18.886 ( -0.37%) [ +0.21% +0.20% +0.00% / +0.30% -0.37% -0.21%] index_select random_sorted : Elapsed 0.190 ms (18.997 ms / 100) B = [15, 150, 250] (stride (1, 15, 2250)) A = [15, 150, 50] (stride (1, 15, 2250)) dim = 2 6.158 -> 6.128 ( -0.49%) [ +0.24% +0.00% +0.15% / -0.49% -0.18% -0.32%] index_add_ linear : Elapsed 0.062 ms (6.173 ms / 100) 5.932 -> 5.901 ( -0.52%) [ +0.47% +0.00% +0.19% / -0.52% -0.49% -0.32%] index_copy_ linear : Elapsed 0.060 ms (5.960 ms / 100) 6.133 -> 6.126 ( -0.11%) [ +0.62% +0.16% +0.00% / -0.11% +0.10% -0.03%] index_add_ reverse : Elapsed 0.062 ms (6.171 ms / 100) 5.933 -> 5.883 ( -0.84%) [ +0.12% +0.00% +0.02% / -0.84% -0.32% -0.64%] index_copy_ reverse : Elapsed 0.059 ms (5.940 ms / 100) 6.148 -> 6.129 ( -0.31%) [ +0.00% +0.00% +0.63% / -0.11% -0.21% -0.31%] index_add_ spread : Elapsed 0.061 ms (6.148 ms / 100) 5.937 -> 5.907 ( -0.51%) [ +0.07% +0.00% +0.59% / -0.45% -0.20% -0.51%] index_copy_ spread : Elapsed 0.059 ms (5.941 ms / 100) 6.178 -> 6.113 ( -1.05%) [ +0.06% +0.00% +0.06% / -0.44% -1.02% -1.05%] index_add_ strided 3 : Elapsed 0.062 ms (6.182 ms / 100) 5.956 -> 5.890 ( -1.11%) [ +0.05% +0.00% +0.18% / -0.74% -1.11% -1.01%] index_copy_ strided 3 : Elapsed 0.060 ms (5.959 ms / 100) 6.160 -> 6.123 ( -0.60%) [ +0.16% +0.05% +0.00% / -0.52% -0.58% -0.60%] index_add_ strided 7 : Elapsed 0.062 ms (6.170 ms / 100) 5.938 -> 5.891 ( -0.79%) [ +0.35% +0.02% +0.00% / -0.39% -0.79% -0.71%] index_copy_ strided 7 : Elapsed 0.060 ms (5.959 ms / 100) 6.181 -> 6.181 ( +0.00%) [ +0.32% +0.00% +0.60% / +0.00% +1.16% +1.00%] index_add_ perm : Elapsed 0.062 ms (6.201 ms / 100) 5.914 -> 5.878 ( -0.61%) [ +0.57% +0.00% +0.73% / -0.61% +1.39% +1.47%] index_copy_ perm : Elapsed 0.059 ms (5.948 ms / 100) 6.226 -> 6.226 ( +0.00%) [ +0.19% +0.00% +0.24% / +0.00% +0.19% +0.40%] index_add_ perm_sorted : Elapsed 0.062 ms (6.238 ms / 100) 5.939 -> 5.904 ( -0.59%) [ +0.62% +0.25% +0.00% / -0.59% +1.25% +1.38%] index_copy_ perm_sorted : Elapsed 0.060 ms (5.976 ms / 100) 9.877 -> 9.885 ( +0.08%) [ +0.57% +0.42% +0.00% / +0.08% +1.09% +0.74%] index_select const : Elapsed 0.099 ms (9.933 ms / 100) 14.099 -> 13.993 ( -0.75%) [ +0.08% +0.01% +0.00% / -0.75% +0.06% +1.86%] index_select wrap : Elapsed 0.141 ms (14.110 ms / 100) 10.387 -> 10.400 ( +0.13%) [ +0.00% +0.32% +0.13% / +0.13% +0.69% +1.18%] index_select linear : Elapsed 0.104 ms (10.387 ms / 100) 10.565 -> 10.613 ( +0.45%) [ +0.20% +0.53% +0.00% / +0.45% +1.56% +1.42%] index_select reverse : Elapsed 0.106 ms (10.586 ms / 100) 9.997 -> 10.015 ( +0.18%) [ +0.10% +0.00% +0.73% / +0.18% +0.88% +0.98%] index_select skip64 : Elapsed 0.100 ms (10.007 ms / 100) 9.817 -> 9.844 ( +0.28%) [ +0.00% +0.03% +0.22% / +0.28% +1.09% +1.49%] index_select skip256 : Elapsed 0.098 ms (9.817 ms / 100) 9.839 -> 9.886 ( +0.48%) [ +0.02% +0.00% +0.15% / +0.48% +0.79% +0.95%] index_select spread : Elapsed 0.098 ms (9.841 ms / 100) 14.297 -> 14.437 ( +0.98%) [ +1.34% +1.30% +0.00% / +1.37% +1.55% +0.98%] index_select strided 3 : Elapsed 0.145 ms (14.488 ms / 100) 10.451 -> 10.458 ( +0.07%) [ +1.54% +0.00% +1.98% / +0.07% +2.17% +1.69%] index_select strided 5 : Elapsed 0.106 ms (10.612 ms / 100) 13.722 -> 13.789 ( +0.49%) [ +0.20% +0.00% +0.17% / +0.49% +3.29% +2.72%] index_select strided 7 : Elapsed 0.138 ms (13.750 ms / 100) 12.084 -> 12.033 ( -0.42%) [ +0.75% +0.00% +0.38% / -0.42% -0.12% -0.12%] index_select strided 8 : Elapsed 0.122 ms (12.175 ms / 100) 11.699 -> 12.014 ( +2.69%) [ +2.61% +1.28% +0.00% / +2.93% +3.44% +2.69%] index_select strided 16 : Elapsed 0.120 ms (12.004 ms / 100) 12.194 -> 12.383 ( +1.55%) [ +0.89% +0.00% +0.78% / +1.55% +5.21% +5.96%] index_select random : Elapsed 0.123 ms (12.303 ms / 100) 10.082 -> 9.978 ( -1.03%) [ +0.00% +0.28% +0.27% / -0.33% -0.87% -1.03%] index_select random_sorted : Elapsed 0.101 ms (10.082 ms / 100) out_shape = [250, 15, 150] in_shape = [50, 15, 150] idx_dim = 0 B = [250, 15, 150] (stride (2250, 150, 1)) A = [50, 15, 150] (stride (2250, 150, 1)) dim = 0 5.229 -> 5.209 ( -0.38%) [ +0.00% +0.06% +0.13% / -0.08% -0.31% -0.38%] index_add_ linear : Elapsed 0.052 ms (5.229 ms / 100) 5.082 -> 5.061 ( -0.41%) [ +0.24% +0.30% +0.00% / -0.18% -0.41% -0.22%] index_copy_ linear : Elapsed 0.051 ms (5.094 ms / 100) 5.245 -> 5.226 ( -0.36%) [ +0.00% +0.04% +0.02% / -0.13% -0.11% -0.36%] index_add_ reverse : Elapsed 0.052 ms (5.245 ms / 100) 5.090 -> 5.064 ( -0.51%) [ +0.06% +0.00% +0.08% / -0.29% -0.29% -0.51%] index_copy_ reverse : Elapsed 0.051 ms (5.093 ms / 100) 5.247 -> 5.223 ( -0.46%) [ +0.00% +0.21% +0.15% / -0.36% -0.46% -0.15%] index_add_ spread : Elapsed 0.052 ms (5.247 ms / 100) 5.087 -> 5.065 ( -0.43%) [ +0.00% +0.10% +0.16% / -0.43% -0.14% -0.26%] index_copy_ spread : Elapsed 0.051 ms (5.087 ms / 100) 5.255 -> 5.240 ( -0.29%) [ +0.34% +0.00% +0.00% / -0.29% +0.00% -0.10%] index_add_ strided 3 : Elapsed 0.053 ms (5.273 ms / 100) 5.096 -> 5.066 ( -0.59%) [ +0.22% +0.10% +0.00% / -0.35% -0.59% -0.45%] index_copy_ strided 3 : Elapsed 0.051 ms (5.107 ms / 100) 5.278 -> 5.225 ( -1.00%) [ +0.15% +0.15% +0.00% / -0.25% -0.93% -1.00%] index_add_ strided 7 : Elapsed 0.053 ms (5.286 ms / 100) 5.102 -> 5.056 ( -0.90%) [ +0.00% +0.02% +0.04% / -0.18% -0.90% -0.90%] index_copy_ strided 7 : Elapsed 0.051 ms (5.102 ms / 100) 5.249 -> 5.225 ( -0.46%) [ +0.27% +0.02% +0.00% / -0.46% +0.32% +0.30%] index_add_ perm : Elapsed 0.053 ms (5.263 ms / 100) 5.079 -> 5.056 ( -0.45%) [ +0.00% +0.16% +0.24% / -0.45% -0.06% +0.22%] index_copy_ perm : Elapsed 0.051 ms (5.079 ms / 100) 5.245 -> 5.223 ( -0.42%) [ +0.00% +0.21% +0.06% / -0.42% +0.08% +0.34%] index_add_ perm_sorted : Elapsed 0.052 ms (5.245 ms / 100) 5.078 -> 5.069 ( -0.18%) [ +0.00% +0.12% +0.12% / -0.18% +0.00% +0.28%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.078 ms / 100) 6.890 -> 6.912 ( +0.32%) [ +0.00% +0.17% +0.04% / +0.32% +0.33% +0.52%] index_select const : Elapsed 0.069 ms (6.890 ms / 100) 7.603 -> 7.608 ( +0.07%) [ +0.17% +0.00% +0.13% / +0.26% +0.07% +0.22%] index_select wrap : Elapsed 0.076 ms (7.616 ms / 100) 7.084 -> 7.078 ( -0.08%) [ +0.00% +0.01% +0.10% / +0.16% -0.08% +0.16%] index_select linear : Elapsed 0.071 ms (7.084 ms / 100) 7.178 -> 7.191 ( +0.18%) [ +0.13% +0.00% +0.15% / +0.18% +0.20% +0.29%] index_select reverse : Elapsed 0.072 ms (7.187 ms / 100) 6.892 -> 6.902 ( +0.15%) [ +0.00% +0.20% +0.03% / +0.15% +0.35% +0.32%] index_select skip64 : Elapsed 0.069 ms (6.892 ms / 100) 6.890 -> 6.911 ( +0.30%) [ +0.23% +0.00% +0.00% / +0.49% +0.48% +0.30%] index_select skip256 : Elapsed 0.069 ms (6.906 ms / 100) 7.215 -> 7.213 ( -0.03%) [ +0.00% +0.11% +0.03% / +0.18% -0.03% +0.08%] index_select spread : Elapsed 0.072 ms (7.215 ms / 100) 7.635 -> 7.631 ( -0.05%) [ +0.07% +0.03% +0.00% / +0.30% -0.05% +0.03%] index_select strided 3 : Elapsed 0.076 ms (7.640 ms / 100) 7.010 -> 7.039 ( +0.41%) [ +0.17% +0.11% +0.00% / +0.41% +0.90% +0.96%] index_select strided 5 : Elapsed 0.070 ms (7.022 ms / 100) 7.634 -> 7.637 ( +0.04%) [ +0.03% +0.00% +0.10% / +0.24% +0.35% +0.04%] index_select strided 7 : Elapsed 0.076 ms (7.636 ms / 100) 7.317 -> 7.327 ( +0.14%) [ +0.16% +0.00% +0.00% / +0.19% +0.14% +0.31%] index_select strided 8 : Elapsed 0.073 ms (7.329 ms / 100) 7.295 -> 7.327 ( +0.44%) [ +0.26% +0.00% +0.11% / +0.59% +0.55% +0.44%] index_select strided 16 : Elapsed 0.073 ms (7.314 ms / 100) 7.450 -> 7.439 ( -0.15%) [ +0.00% +0.00% +0.13% / +0.07% -0.15% -0.12%] index_select random : Elapsed 0.075 ms (7.450 ms / 100) 7.176 -> 7.192 ( +0.22%) [ +0.01% +0.00% +0.04% / +0.22% +0.35% +0.33%] index_select random_sorted : Elapsed 0.072 ms (7.177 ms / 100) B = [250, 15, 150] (stride (1, 37500, 250)) A = [50, 15, 150] (stride (2250, 1, 15)) dim = 0 10.658 -> 10.577 ( -0.76%) [ +0.08% +0.00% +0.07% / -0.23% -0.65% -0.76%] index_add_ linear : Elapsed 0.107 ms (10.666 ms / 100) 9.873 -> 9.822 ( -0.52%) [ +0.10% +0.01% +0.00% / -0.02% -0.47% -0.52%] index_copy_ linear : Elapsed 0.099 ms (9.883 ms / 100) 10.661 -> 10.580 ( -0.76%) [ +0.14% +0.00% +0.01% / -0.01% -0.76% -0.63%] index_add_ reverse : Elapsed 0.107 ms (10.676 ms / 100) 9.869 -> 9.802 ( -0.68%) [ +0.18% +0.17% +0.00% / +0.35% -0.68% -0.43%] index_copy_ reverse : Elapsed 0.099 ms (9.887 ms / 100) 14.469 -> 14.472 ( +0.02%) [ +0.65% +0.00% +0.07% / +0.20% +0.02% +0.06%] index_add_ spread : Elapsed 0.146 ms (14.563 ms / 100) 12.788 -> 12.741 ( -0.37%) [ +0.12% +0.00% +0.19% / -0.30% -0.22% -0.37%] index_copy_ spread : Elapsed 0.128 ms (12.803 ms / 100) 12.788 -> 12.681 ( -0.84%) [ +0.23% +0.00% +0.20% / -0.38% -0.84% -0.51%] index_add_ strided 3 : Elapsed 0.128 ms (12.817 ms / 100) 11.463 -> 11.400 ( -0.55%) [ +0.09% +0.00% +0.10% / -0.38% -0.36% -0.55%] index_copy_ strided 3 : Elapsed 0.115 ms (11.473 ms / 100) 15.664 -> 15.631 ( -0.21%) [ +0.21% +0.04% +0.00% / -0.21% -0.17% -0.19%] index_add_ strided 7 : Elapsed 0.157 ms (15.697 ms / 100) 13.843 -> 13.802 ( -0.30%) [ +0.00% +0.04% +0.01% / -0.30% +0.04% -0.18%] index_copy_ strided 7 : Elapsed 0.138 ms (13.843 ms / 100) 17.304 -> 17.365 ( +0.35%) [ +0.00% +0.14% +0.09% / +0.38% +0.35% +0.36%] index_add_ perm : Elapsed 0.173 ms (17.304 ms / 100) 15.347 -> 15.356 ( +0.06%) [ +0.07% +0.03% +0.00% / +0.14% +0.08% +0.06%] index_copy_ perm : Elapsed 0.154 ms (15.357 ms / 100) 13.939 -> 13.885 ( -0.39%) [ +0.00% +0.32% +0.32% / -0.20% -0.39% -0.21%] index_add_ perm_sorted : Elapsed 0.139 ms (13.939 ms / 100) 12.399 -> 12.335 ( -0.52%) [ +0.15% +0.00% +0.07% / -0.52% -0.40% -0.24%] index_copy_ perm_sorted : Elapsed 0.124 ms (12.417 ms / 100) 22.193 -> 22.197 ( +0.02%) [ +0.11% +0.05% +0.00% / +0.13% +0.27% +0.02%] index_select const : Elapsed 0.222 ms (22.217 ms / 100) 26.302 -> 25.972 ( -1.25%) [ +0.00% +0.06% +0.09% / -0.27% -1.25% -1.02%] index_select wrap : Elapsed 0.263 ms (26.302 ms / 100) 22.489 -> 22.469 ( -0.09%) [ +0.00% +0.06% +0.06% / -0.09% +0.84% +0.72%] index_select linear : Elapsed 0.225 ms (22.489 ms / 100) 23.138 -> 23.133 ( -0.02%) [ +0.00% +0.26% +0.09% / -0.02% +1.34% +1.47%] index_select reverse : Elapsed 0.231 ms (23.138 ms / 100) 22.193 -> 22.209 ( +0.07%) [ +0.10% +0.00% +0.09% / +0.07% +0.33% +0.30%] index_select skip64 : Elapsed 0.222 ms (22.216 ms / 100) 22.213 -> 22.239 ( +0.12%) [ +0.25% +0.01% +0.00% / +0.23% +0.18% +0.12%] index_select skip256 : Elapsed 0.223 ms (22.268 ms / 100) 22.925 -> 22.910 ( -0.07%) [ +0.30% +0.03% +0.00% / +0.03% +0.11% -0.07%] index_select spread : Elapsed 0.230 ms (22.993 ms / 100) 26.408 -> 26.095 ( -1.19%) [ +0.00% +0.23% +0.33% / +0.30% -1.19% -1.17%] index_select strided 3 : Elapsed 0.264 ms (26.408 ms / 100) 23.184 -> 23.194 ( +0.04%) [ +0.00% +0.10% +0.10% / +0.04% +0.64% +0.78%] index_select strided 5 : Elapsed 0.232 ms (23.184 ms / 100) 26.091 -> 26.075 ( -0.06%) [ +0.00% +0.32% +0.23% / +0.34% +0.01% -0.06%] index_select strided 7 : Elapsed 0.261 ms (26.091 ms / 100) 25.683 -> 25.479 ( -0.79%) [ +0.08% +0.09% +0.00% / +0.21% -0.76% -0.79%] index_select strided 8 : Elapsed 0.257 ms (25.704 ms / 100) 25.585 -> 25.393 ( -0.75%) [ +0.00% +0.02% +0.06% / +0.14% -0.52% -0.75%] index_select strided 16 : Elapsed 0.256 ms (25.585 ms / 100) 25.154 -> 25.159 ( +0.02%) [ +0.00% +0.35% +0.00% / +0.02% +0.35% +0.60%] index_select random : Elapsed 0.252 ms (25.154 ms / 100) 22.985 -> 22.875 ( -0.48%) [ +0.31% +0.00% +0.45% / +0.31% -0.48% -0.27%] index_select random_sorted : Elapsed 0.231 ms (23.056 ms / 100) B = [250, 15, 150] (stride (1, 250, 3750)) A = [50, 15, 150] (stride (2250, 1, 15)) dim = 0 good 7.163 -> 6.726 ( -6.10%) [ +0.00% +0.50% +0.47% / -5.78% -6.10% -6.03%] index_add_ linear : Elapsed 0.072 ms (7.163 ms / 100) 6.357 -> 6.488 ( +2.06%) [ +0.00% +0.46% +0.11% / +2.52% +2.06% +2.30%] index_copy_ linear : Elapsed 0.064 ms (6.357 ms / 100) good 7.158 -> 6.718 ( -6.15%) [ +0.49% +0.00% +0.34% / -5.45% -6.15% -5.74%] index_add_ reverse : Elapsed 0.072 ms (7.193 ms / 100) 6.331 -> 6.475 ( +2.27%) [ +0.81% +0.00% +0.62% / +2.83% +2.27% +2.48%] index_copy_ reverse : Elapsed 0.064 ms (6.382 ms / 100) GOOD 10.712 -> 8.554 (-20.15%) [ +0.00% +0.22% +0.21% / -19.44% -20.08% -20.15%] index_add_ spread : Elapsed 0.107 ms (10.712 ms / 100) 9.075 -> 8.705 ( -4.08%) [ +0.00% +0.40% +0.17% / -3.50% -3.88% -4.08%] index_copy_ spread : Elapsed 0.091 ms (9.075 ms / 100) Good 9.167 -> 7.671 (-16.32%) [ +0.19% +0.26% +0.00% / -15.45% -16.23% -16.32%] index_add_ strided 3 : Elapsed 0.092 ms (9.184 ms / 100) 7.869 -> 7.819 ( -0.64%) [ +0.30% +0.41% +0.00% / -0.25% -0.56% -0.64%] index_copy_ strided 3 : Elapsed 0.079 ms (7.893 ms / 100) GOOD 12.005 -> 8.572 (-28.60%) [ +0.69% +0.00% +0.46% / -27.80% -28.60% -28.48%] index_add_ strided 7 : Elapsed 0.121 ms (12.088 ms / 100) Good 10.359 -> 8.757 (-15.46%) [ +0.02% +0.00% +0.20% / -15.01% -15.46% -15.39%] index_copy_ strided 7 : Elapsed 0.104 ms (10.361 ms / 100) GOOD 14.252 -> 8.610 (-39.59%) [ +0.21% +0.08% +0.00% / -39.01% -39.57% -39.59%] index_add_ perm : Elapsed 0.143 ms (14.282 ms / 100) GOOD 11.859 -> 8.611 (-27.39%) [ +0.00% +0.08% +0.07% / -26.93% -27.35% -27.39%] index_copy_ perm : Elapsed 0.119 ms (11.859 ms / 100) Good 10.455 -> 8.465 (-19.03%) [ +0.00% +0.22% +0.35% / -18.57% -19.03% -18.89%] index_add_ perm_sorted : Elapsed 0.105 ms (10.455 ms / 100) 8.898 -> 8.574 ( -3.64%) [ +0.00% +0.13% +0.03% / -3.38% -3.64% -3.64%] index_copy_ perm_sorted : Elapsed 0.089 ms (8.898 ms / 100) GOOD 11.843 -> 8.590 (-27.47%) [ +0.44% +0.00% +1.25% / -27.46% -27.47% -27.38%] index_select const : Elapsed 0.119 ms (11.895 ms / 100) GOOD 16.704 -> 11.668 (-30.15%) [ +0.00% +1.22% +0.80% / -29.29% -30.11% -30.15%] index_select wrap : Elapsed 0.167 ms (16.704 ms / 100) good 12.052 -> 11.248 ( -6.67%) [ +1.10% +0.00% +0.98% / -5.37% -6.65% -6.67%] index_select linear : Elapsed 0.122 ms (12.184 ms / 100) Good 12.505 -> 11.224 (-10.24%) [ +0.77% +0.51% +0.00% / -9.28% -10.22% -10.24%] index_select reverse : Elapsed 0.126 ms (12.601 ms / 100) GOOD 11.652 -> 8.575 (-26.41%) [ +0.27% +0.00% +0.10% / -25.18% -26.41% -26.25%] index_select skip64 : Elapsed 0.117 ms (11.683 ms / 100) GOOD 11.782 -> 8.600 (-27.01%) [ +0.01% +0.00% +0.26% / -27.01% -26.78% -26.68%] index_select skip256 : Elapsed 0.118 ms (11.783 ms / 100) Good 12.797 -> 11.318 (-11.56%) [ +0.59% +1.42% +0.00% / -10.53% -11.56% -11.46%] index_select spread : Elapsed 0.129 ms (12.872 ms / 100) GOOD 16.824 -> 11.642 (-30.80%) [ +0.00% +0.67% +1.17% / -30.12% -30.80% -30.77%] index_select strided 3 : Elapsed 0.168 ms (16.824 ms / 100) GOOD 13.241 -> 9.184 (-30.64%) [ +0.32% +1.23% +0.00% / -30.64% -30.62% -30.53%] index_select strided 5 : Elapsed 0.133 ms (13.284 ms / 100) GOOD 16.747 -> 11.642 (-30.48%) [ +0.00% +0.82% +0.10% / -29.60% -30.37% -30.48%] index_select strided 7 : Elapsed 0.167 ms (16.747 ms / 100) GOOD 14.328 -> 10.095 (-29.54%) [ +0.00% +0.57% +0.40% / -29.54% -29.39% -29.39%] index_select strided 8 : Elapsed 0.143 ms (14.328 ms / 100) GOOD 14.410 -> 10.028 (-30.41%) [ +0.00% +1.41% +0.29% / -30.41% -29.14% -28.86%] index_select strided 16 : Elapsed 0.144 ms (14.410 ms / 100) GOOD 15.245 -> 11.633 (-23.69%) [ +0.03% +0.00% +0.21% / -22.96% -23.69% -23.67%] index_select random : Elapsed 0.152 ms (15.250 ms / 100) Good 12.945 -> 11.344 (-12.37%) [ +0.67% +0.15% +0.00% / -11.60% -12.37% -12.29%] index_select random_sorted : Elapsed 0.130 ms (13.032 ms / 100) out_shape = [50, 250, 150] in_shape = [50, 15, 150] idx_dim = 1 B = [50, 250, 150] (stride (37500, 150, 1)) A = [50, 15, 150] (stride (1, 7500, 50)) dim = 1 3.260 -> 3.277 ( +0.52%) [ +0.25% +0.21% +0.00% / +0.58% +0.52% +1.66%] index_add_ linear : Elapsed 0.033 ms (3.268 ms / 100) 3.035 -> 3.037 ( +0.07%) [ +0.03% +0.00% +0.00% / +0.07% +0.16% +0.33%] index_copy_ linear : Elapsed 0.030 ms (3.036 ms / 100) 3.258 -> 3.265 ( +0.21%) [ +0.00% +1.04% +0.49% / +0.21% +1.69% +1.23%] index_add_ reverse : Elapsed 0.033 ms (3.258 ms / 100) 3.033 -> 3.036 ( +0.10%) [ +0.07% +0.26% +0.00% / +0.20% +0.36% +0.10%] index_copy_ reverse : Elapsed 0.030 ms (3.035 ms / 100) 3.244 -> 3.259 ( +0.46%) [ +0.12% +0.06% +0.00% / +0.46% +1.94% +1.57%] index_add_ spread : Elapsed 0.032 ms (3.248 ms / 100) 3.041 -> 3.052 ( +0.36%) [ +0.00% +0.33% +0.30% / +0.46% +0.36% +0.43%] index_copy_ spread : Elapsed 0.030 ms (3.041 ms / 100) 3.250 -> 3.269 ( +0.58%) [ +0.40% +0.37% +0.00% / +0.92% +0.58% +1.85%] index_add_ strided 3 : Elapsed 0.033 ms (3.263 ms / 100) 3.042 -> 3.050 ( +0.26%) [ +0.13% +0.00% +0.10% / +0.36% +0.39% +0.26%] index_copy_ strided 3 : Elapsed 0.030 ms (3.046 ms / 100) 3.260 -> 3.258 ( -0.06%) [ +0.03% +0.40% +0.00% / -0.06% +0.18% +0.46%] index_add_ strided 7 : Elapsed 0.033 ms (3.261 ms / 100) 3.052 -> 3.040 ( -0.39%) [ +0.00% +0.10% +0.03% / +0.00% -0.39% -0.29%] index_copy_ strided 7 : Elapsed 0.031 ms (3.052 ms / 100) 3.269 -> 3.269 ( +0.00%) [ +0.09% +0.00% +0.31% / +0.43% +0.00% +1.87%] index_add_ perm : Elapsed 0.033 ms (3.272 ms / 100) 3.051 -> 3.048 ( -0.10%) [ +0.00% +0.10% +0.07% / +0.33% -0.10% +0.03%] index_copy_ perm : Elapsed 0.031 ms (3.051 ms / 100) 3.249 -> 3.257 ( +0.25%) [ +0.25% +0.55% +0.00% / +0.25% +0.28% +0.83%] index_add_ perm_sorted : Elapsed 0.033 ms (3.257 ms / 100) 3.045 -> 3.047 ( +0.07%) [ +0.13% +0.07% +0.00% / +0.07% +0.13% +0.16%] index_copy_ perm_sorted : Elapsed 0.030 ms (3.049 ms / 100) 20.943 -> 20.794 ( -0.71%) [ +0.00% +0.08% +0.05% / +0.06% -0.69% -0.71%] index_select const : Elapsed 0.209 ms (20.943 ms / 100) 27.388 -> 27.513 ( +0.46%) [ +0.00% +0.74% +0.30% / +0.46% +3.16% +3.34%] index_select wrap : Elapsed 0.274 ms (27.388 ms / 100) 21.327 -> 21.309 ( -0.08%) [ +0.07% +0.00% +0.07% / -0.08% +0.11% +0.16%] index_select linear : Elapsed 0.213 ms (21.342 ms / 100) 21.512 -> 21.481 ( -0.14%) [ +0.00% +0.08% +0.01% / +0.32% -0.14% +0.01%] index_select reverse : Elapsed 0.215 ms (21.512 ms / 100) 20.589 -> 20.660 ( +0.34%) [ +0.15% +0.04% +0.00% / +0.41% +0.65% +0.34%] index_select skip64 : Elapsed 0.206 ms (20.620 ms / 100) 20.730 -> 20.737 ( +0.03%) [ +0.01% +0.00% +0.06% / +0.03% +1.99% +2.15%] index_select skip256 : Elapsed 0.207 ms (20.733 ms / 100) 21.638 -> 21.300 ( -1.56%) [ +0.77% +0.48% +0.00% / +0.01% -1.56% -1.56%] index_select spread : Elapsed 0.218 ms (21.805 ms / 100) 22.424 -> 22.389 ( -0.16%) [ +0.23% +0.00% +0.00% / -0.16% +3.71% +3.57%] index_select strided 3 : Elapsed 0.225 ms (22.475 ms / 100) 21.532 -> 21.281 ( -1.17%) [ +0.55% +0.23% +0.00% / -0.32% -1.17% -1.06%] index_select strided 5 : Elapsed 0.217 ms (21.651 ms / 100) 27.209 -> 27.301 ( +0.34%) [ +0.25% +0.00% +0.07% / +0.34% +3.32% +3.22%] index_select strided 7 : Elapsed 0.273 ms (27.277 ms / 100) 27.360 -> 27.333 ( -0.10%) [ +0.07% +0.12% +0.00% / -0.10% +2.54% +2.21%] index_select strided 8 : Elapsed 0.274 ms (27.379 ms / 100) 26.131 -> 26.165 ( +0.13%) [ +0.00% +0.07% +0.07% / +0.13% +2.61% +2.62%] index_select random : Elapsed 0.261 ms (26.131 ms / 100) 21.420 -> 21.193 ( -1.06%) [ +0.00% +0.14% +0.50% / +0.50% -0.87% -1.06%] index_select random_sorted : Elapsed 0.214 ms (21.420 ms / 100) B = [50, 250, 150] (stride (37500, 150, 1)) A = [50, 15, 150] (stride (1, 50, 750)) dim = 1 3.398 -> 3.408 ( +0.29%) [ +0.00% +0.18% +0.12% / +0.32% +0.53% +0.29%] index_add_ linear : Elapsed 0.034 ms (3.398 ms / 100) 3.135 -> 3.142 ( +0.22%) [ +0.00% +0.13% +0.10% / +0.26% +0.22% +0.41%] index_copy_ linear : Elapsed 0.031 ms (3.135 ms / 100) 3.389 -> 3.403 ( +0.41%) [ +0.12% +0.56% +0.00% / +0.47% +0.41% +0.68%] index_add_ reverse : Elapsed 0.034 ms (3.393 ms / 100) 3.136 -> 3.139 ( +0.10%) [ +0.22% +0.13% +0.00% / +0.29% +0.10% +0.10%] index_copy_ reverse : Elapsed 0.031 ms (3.143 ms / 100) 3.393 -> 3.402 ( +0.27%) [ +0.47% +0.29% +0.00% / +0.27% +0.56% +0.77%] index_add_ spread : Elapsed 0.034 ms (3.409 ms / 100) 3.157 -> 3.160 ( +0.10%) [ +0.00% +0.29% +0.22% / +0.25% +0.10% +0.25%] index_copy_ spread : Elapsed 0.032 ms (3.157 ms / 100) 3.391 -> 3.404 ( +0.38%) [ +0.32% +0.00% +0.32% / +0.38% +0.88% +0.88%] index_add_ strided 3 : Elapsed 0.034 ms (3.402 ms / 100) 3.152 -> 3.161 ( +0.29%) [ +0.10% +0.00% +0.03% / +0.29% +0.29% +0.41%] index_copy_ strided 3 : Elapsed 0.032 ms (3.155 ms / 100) 3.381 -> 3.396 ( +0.44%) [ +0.50% +0.27% +0.00% / +0.44% +0.80% +1.12%] index_add_ strided 7 : Elapsed 0.034 ms (3.398 ms / 100) 3.151 -> 3.158 ( +0.22%) [ +0.19% +0.16% +0.00% / +0.41% +0.22% +0.32%] index_copy_ strided 7 : Elapsed 0.032 ms (3.157 ms / 100) 3.395 -> 3.404 ( +0.27%) [ +0.29% +0.00% +0.24% / +0.27% +0.82% +0.59%] index_add_ perm : Elapsed 0.034 ms (3.405 ms / 100) 3.160 -> 3.164 ( +0.13%) [ +0.00% +0.03% +0.16% / +0.41% +0.22% +0.13%] index_copy_ perm : Elapsed 0.032 ms (3.160 ms / 100) 3.382 -> 3.394 ( +0.35%) [ +0.50% +0.00% +0.21% / +0.35% +0.71% +1.27%] index_add_ perm_sorted : Elapsed 0.034 ms (3.399 ms / 100) 3.158 -> 3.155 ( -0.09%) [ +0.00% +0.13% +0.03% / +0.35% +0.03% -0.09%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.158 ms / 100) 20.740 -> 20.953 ( +1.03%) [ +0.41% +0.00% +0.47% / +1.03% +4.08% +3.79%] index_select const : Elapsed 0.208 ms (20.824 ms / 100) 28.309 -> 28.470 ( +0.57%) [ +0.22% +0.24% +0.00% / +0.57% +1.75% +1.52%] index_select wrap : Elapsed 0.284 ms (28.370 ms / 100) 21.585 -> 21.020 ( -2.62%) [ +0.00% +0.04% +0.36% / +0.28% -2.39% -2.62%] index_select linear : Elapsed 0.216 ms (21.585 ms / 100) 22.085 -> 22.031 ( -0.24%) [ +0.00% +0.04% +0.07% / -0.24% +1.46% +1.68%] index_select reverse : Elapsed 0.221 ms (22.085 ms / 100) 21.130 -> 21.162 ( +0.15%) [ +0.10% +0.00% +0.17% / +0.15% +5.13% +5.04%] index_select skip64 : Elapsed 0.212 ms (21.151 ms / 100) 21.155 -> 21.221 ( +0.31%) [ +0.41% +0.27% +0.00% / +0.31% +1.54% +1.73%] index_select skip256 : Elapsed 0.212 ms (21.242 ms / 100) 21.827 -> 21.880 ( +0.24%) [ +0.00% +0.28% +0.35% / +0.24% +0.51% +0.35%] index_select spread : Elapsed 0.218 ms (21.827 ms / 100) 24.068 -> 23.994 ( -0.31%) [ +0.00% +0.44% +0.46% / -0.31% +3.59% +2.90%] index_select strided 3 : Elapsed 0.241 ms (24.068 ms / 100) 22.889 -> 23.018 ( +0.56%) [ +0.00% +0.48% +0.48% / +0.69% +0.76% +0.56%] index_select strided 5 : Elapsed 0.229 ms (22.889 ms / 100) 28.451 -> 28.516 ( +0.23%) [ +0.00% +0.20% +0.02% / +0.23% +2.10% +2.08%] index_select strided 7 : Elapsed 0.285 ms (28.451 ms / 100) 28.433 -> 28.534 ( +0.36%) [ +0.00% +0.01% +0.10% / +0.36% +1.23% +1.48%] index_select strided 8 : Elapsed 0.284 ms (28.433 ms / 100) 27.644 -> 27.896 ( +0.91%) [ +0.00% +0.41% +0.52% / +0.91% +2.69% +2.90%] index_select random : Elapsed 0.276 ms (27.644 ms / 100) 21.930 -> 22.020 ( +0.41%) [ +0.00% +0.43% +0.86% / +0.41% +1.73% +2.08%] index_select random_sorted : Elapsed 0.219 ms (21.930 ms / 100) B = [50, 250, 150] (stride (37500, 1, 250)) A = [50, 15, 150] (stride (2250, 1, 15)) dim = 1 6.848 -> 6.863 ( +0.22%) [ +0.34% +0.04% +0.00% / +0.22% +5.71% +5.97%] index_add_ linear : Elapsed 0.069 ms (6.871 ms / 100) 5.340 -> 5.346 ( +0.11%) [ +0.00% +0.13% +0.13% / +0.11% +6.24% +6.42%] index_copy_ linear : Elapsed 0.053 ms (5.340 ms / 100) 6.916 -> 6.938 ( +0.32%) [ +0.00% +0.20% +0.19% / +0.32% +7.17% +7.19%] index_add_ reverse : Elapsed 0.069 ms (6.916 ms / 100) 5.397 -> 5.399 ( +0.04%) [ +0.13% +0.00% +0.17% / +0.04% +6.17% +6.13%] index_copy_ reverse : Elapsed 0.054 ms (5.404 ms / 100) 17.208 -> 17.241 ( +0.19%) [ +0.15% +0.00% +0.05% / +0.19% +0.35% +0.27%] index_add_ spread : Elapsed 0.172 ms (17.234 ms / 100) 11.351 -> 11.364 ( +0.11%) [ +0.11% +0.14% +0.00% / +0.33% +0.11% +0.17%] index_copy_ spread : Elapsed 0.114 ms (11.364 ms / 100) 9.021 -> 9.019 ( -0.02%) [ +0.08% +0.00% +0.10% / -0.02% +0.80% +0.67%] index_add_ strided 3 : Elapsed 0.090 ms (9.028 ms / 100) 6.536 -> 6.561 ( +0.38%) [ +0.00% +0.23% +0.29% / +0.38% +1.06% +1.12%] index_copy_ strided 3 : Elapsed 0.065 ms (6.536 ms / 100) 13.829 -> 13.832 ( +0.02%) [ +0.20% +0.00% +0.12% / +0.02% +0.77% +0.72%] index_add_ strided 7 : Elapsed 0.139 ms (13.856 ms / 100) 9.425 -> 9.438 ( +0.14%) [ +0.10% +0.00% +0.21% / +0.14% +0.25% +0.18%] index_copy_ strided 7 : Elapsed 0.094 ms (9.434 ms / 100) 19.087 -> 19.063 ( -0.13%) [ +0.11% +0.00% +0.23% / +0.15% -0.02% -0.13%] index_add_ perm : Elapsed 0.191 ms (19.108 ms / 100) 12.745 -> 12.727 ( -0.14%) [ +0.11% +0.00% +0.13% / +0.23% -0.14% -0.12%] index_copy_ perm : Elapsed 0.128 ms (12.759 ms / 100) 15.290 -> 15.277 ( -0.09%) [ +0.21% +0.09% +0.00% / +0.00% -0.09% -0.05%] index_add_ perm_sorted : Elapsed 0.153 ms (15.322 ms / 100) 10.339 -> 10.348 ( +0.09%) [ +0.27% +0.00% +0.12% / +0.37% +0.10% +0.09%] index_copy_ perm_sorted : Elapsed 0.104 ms (10.367 ms / 100) BEST 107.574 -> 17.171 (-84.04%) [ +0.15% +0.13% +0.00% / -83.99% -84.03% -84.04%] index_select const : Elapsed 1.077 ms (107.732 ms / 100) BEST 111.302 -> 17.337 (-84.42%) [ +0.42% +0.00% +0.10% / -84.41% -84.42% -84.42%] index_select wrap : Elapsed 1.118 ms (111.771 ms / 100) BEST 107.131 -> 17.386 (-83.77%) [ +0.35% +0.00% +0.17% / -83.71% -83.77% -83.76%] index_select linear : Elapsed 1.075 ms (107.502 ms / 100) BEST 107.455 -> 17.373 (-83.83%) [ +0.38% +0.44% +0.00% / -83.83% -83.73% -83.72%] index_select reverse : Elapsed 1.079 ms (107.860 ms / 100) BEST 107.006 -> 17.173 (-83.95%) [ +0.17% +0.00% +0.28% / -83.59% -83.94% -83.95%] index_select skip64 : Elapsed 1.072 ms (107.188 ms / 100) BEST 107.148 -> 17.176 (-83.97%) [ +0.25% +0.82% +0.00% / -83.97% -83.82% -83.83%] index_select skip256 : Elapsed 1.074 ms (107.418 ms / 100) BEST 108.124 -> 17.246 (-84.05%) [ +0.03% +0.21% +0.00% / -84.05% -84.01% -84.02%] index_select spread : Elapsed 1.082 ms (108.161 ms / 100) BEST 112.035 -> 17.321 (-84.54%) [ +0.44% +0.18% +0.00% / -84.54% -84.51% -84.53%] index_select strided 3 : Elapsed 1.125 ms (112.528 ms / 100) BEST 111.781 -> 17.301 (-84.52%) [ +0.18% +0.20% +0.00% / -84.52% -84.50% -84.51%] index_select strided 5 : Elapsed 1.120 ms (111.986 ms / 100) BEST 111.934 -> 17.320 (-84.53%) [ +0.45% +0.00% +0.30% / -84.53% -84.51% -84.53%] index_select strided 7 : Elapsed 1.124 ms (112.436 ms / 100) BEST 111.922 -> 17.279 (-84.56%) [ +0.00% +0.18% +0.02% / -84.25% -84.56% -84.54%] index_select strided 8 : Elapsed 1.119 ms (111.922 ms / 100) BEST 111.266 -> 17.368 (-84.39%) [ +0.00% +0.34% +0.77% / -84.39% -84.24% -84.21%] index_select random : Elapsed 1.113 ms (111.266 ms / 100) BEST 108.040 -> 17.304 (-83.98%) [ +0.00% +0.22% +0.05% / -83.94% -83.97% -83.98%] index_select random_sorted : Elapsed 1.080 ms (108.040 ms / 100) B = [50, 250, 150] (stride (150, 7500, 1)) A = [50, 15, 150] (stride (2250, 150, 1)) dim = 1 3.313 -> 3.316 ( +0.09%) [ +0.36% +0.21% +0.00% / +0.33% +0.36% +0.09%] index_add_ linear : Elapsed 0.033 ms (3.325 ms / 100) 3.141 -> 3.148 ( +0.22%) [ +0.25% +0.22% +0.00% / +0.29% +0.22% +0.22%] index_copy_ linear : Elapsed 0.031 ms (3.149 ms / 100) 3.322 -> 3.328 ( +0.18%) [ +0.03% +0.00% +0.03% / +0.21% +0.24% +0.18%] index_add_ reverse : Elapsed 0.033 ms (3.323 ms / 100) 3.144 -> 3.142 ( -0.06%) [ +0.38% +0.00% +0.10% / +0.19% +0.13% -0.06%] index_copy_ reverse : Elapsed 0.032 ms (3.156 ms / 100) 3.310 -> 3.322 ( +0.36%) [ +0.60% +0.27% +0.00% / +0.36% +0.66% +0.45%] index_add_ spread : Elapsed 0.033 ms (3.330 ms / 100) 3.154 -> 3.162 ( +0.25%) [ +0.38% +0.00% +0.03% / +0.38% +0.29% +0.25%] index_copy_ spread : Elapsed 0.032 ms (3.166 ms / 100) 3.309 -> 3.317 ( +0.24%) [ +0.60% +0.00% +0.12% / +0.66% +0.24% +0.33%] index_add_ strided 3 : Elapsed 0.033 ms (3.329 ms / 100) 3.146 -> 3.150 ( +0.13%) [ +0.10% +0.03% +0.00% / +0.29% +0.13% +0.22%] index_copy_ strided 3 : Elapsed 0.031 ms (3.149 ms / 100) 3.316 -> 3.317 ( +0.03%) [ +0.30% +0.12% +0.00% / +0.42% +0.12% +0.03%] index_add_ strided 7 : Elapsed 0.033 ms (3.326 ms / 100) 3.151 -> 3.147 ( -0.13%) [ +0.25% +0.03% +0.00% / +0.29% -0.10% -0.13%] index_copy_ strided 7 : Elapsed 0.032 ms (3.159 ms / 100) 3.311 -> 3.321 ( +0.30%) [ +0.27% +0.00% +0.15% / +0.30% +0.36% +0.63%] index_add_ perm : Elapsed 0.033 ms (3.320 ms / 100) 3.142 -> 3.154 ( +0.38%) [ +0.29% +0.00% +0.13% / +0.38% +0.48% +0.45%] index_copy_ perm : Elapsed 0.032 ms (3.151 ms / 100) 3.308 -> 3.320 ( +0.36%) [ +0.39% +0.12% +0.00% / +0.36% +0.57% +0.48%] index_add_ perm_sorted : Elapsed 0.033 ms (3.321 ms / 100) 3.144 -> 3.139 ( -0.16%) [ +0.35% +0.13% +0.00% / +0.19% -0.16% +0.10%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.155 ms / 100) 16.627 -> 16.536 ( -0.55%) [ +0.23% +0.00% +0.09% / +0.35% -0.40% -0.55%] index_select const : Elapsed 0.167 ms (16.665 ms / 100) 18.692 -> 18.749 ( +0.30%) [ +0.35% +0.00% +0.02% / +0.30% +1.56% +1.69%] index_select wrap : Elapsed 0.188 ms (18.757 ms / 100) 16.710 -> 16.743 ( +0.20%) [ +0.35% +0.00% +0.19% / +0.58% +0.21% +0.20%] index_select linear : Elapsed 0.168 ms (16.768 ms / 100) 16.689 -> 16.783 ( +0.56%) [ +0.50% +0.00% +0.21% / +0.56% +0.70% +0.59%] index_select reverse : Elapsed 0.168 ms (16.772 ms / 100) 16.615 -> 16.559 ( -0.34%) [ +0.48% +0.00% +0.05% / +0.42% -0.33% -0.34%] index_select skip64 : Elapsed 0.167 ms (16.694 ms / 100) 16.668 -> 16.572 ( -0.58%) [ +0.23% +0.00% +0.13% / +0.46% -0.58% -0.44%] index_select skip256 : Elapsed 0.167 ms (16.707 ms / 100) 16.863 -> 16.958 ( +0.56%) [ +0.54% +0.00% +0.24% / +0.56% +0.89% +0.89%] index_select spread : Elapsed 0.170 ms (16.954 ms / 100) 17.751 -> 17.800 ( +0.28%) [ +0.26% +0.00% +0.16% / +0.28% +3.84% +3.86%] index_select strided 3 : Elapsed 0.178 ms (17.798 ms / 100) 17.290 -> 17.086 ( -1.18%) [ +0.43% +0.11% +0.00% / +0.46% -1.18% -1.15%] index_select strided 5 : Elapsed 0.174 ms (17.364 ms / 100) 18.730 -> 18.809 ( +0.42%) [ +0.24% +0.00% +0.07% / +0.42% +1.43% +1.27%] index_select strided 7 : Elapsed 0.188 ms (18.775 ms / 100) 18.718 -> 18.753 ( +0.19%) [ +0.32% +0.00% +0.02% / +0.19% +1.69% +1.55%] index_select strided 8 : Elapsed 0.188 ms (18.778 ms / 100) 18.465 -> 18.522 ( +0.31%) [ +0.30% +0.18% +0.00% / +0.31% +0.78% +0.59%] index_select random : Elapsed 0.185 ms (18.520 ms / 100) 16.883 -> 16.973 ( +0.53%) [ +0.54% +0.00% +0.17% / +0.53% +1.16% +1.15%] index_select random_sorted : Elapsed 0.170 ms (16.974 ms / 100) B = [50, 250, 150] (stride (1, 7500, 50)) A = [50, 15, 150] (stride (15, 1, 750)) dim = 1 3.530 -> 3.537 ( +0.20%) [ +0.48% +0.31% +0.00% / +0.20% +0.59% +0.59%] index_add_ linear : Elapsed 0.035 ms (3.547 ms / 100) 3.482 -> 3.489 ( +0.20%) [ +0.17% +0.11% +0.00% / +0.20% +0.37% +0.43%] index_copy_ linear : Elapsed 0.035 ms (3.488 ms / 100) 3.535 -> 3.547 ( +0.34%) [ +0.28% +0.23% +0.00% / +0.34% +0.37% +0.34%] index_add_ reverse : Elapsed 0.035 ms (3.545 ms / 100) 3.485 -> 3.489 ( +0.11%) [ +0.55% +0.00% +0.03% / +0.11% +0.40% +0.55%] index_copy_ reverse : Elapsed 0.035 ms (3.504 ms / 100) 3.522 -> 3.532 ( +0.28%) [ +0.54% +0.11% +0.00% / +0.28% +1.28% +1.05%] index_add_ spread : Elapsed 0.035 ms (3.541 ms / 100) 3.479 -> 3.494 ( +0.43%) [ +0.09% +0.06% +0.00% / +0.43% +0.78% +0.89%] index_copy_ spread : Elapsed 0.035 ms (3.482 ms / 100) 3.527 -> 3.551 ( +0.68%) [ +0.34% +0.40% +0.00% / +0.68% +0.88% +0.74%] index_add_ strided 3 : Elapsed 0.035 ms (3.539 ms / 100) 3.482 -> 3.495 ( +0.37%) [ +0.23% +0.00% +0.23% / +0.40% +0.37% +0.69%] index_copy_ strided 3 : Elapsed 0.035 ms (3.490 ms / 100) 3.552 -> 3.554 ( +0.06%) [ +0.48% +0.14% +0.00% / +0.34% +0.06% +0.06%] index_add_ strided 7 : Elapsed 0.036 ms (3.569 ms / 100) 3.506 -> 3.490 ( -0.46%) [ +0.17% +0.00% +0.06% / +0.14% -0.46% -0.37%] index_copy_ strided 7 : Elapsed 0.035 ms (3.512 ms / 100) 3.567 -> 3.552 ( -0.42%) [ +0.08% +0.00% +0.17% / +0.20% -0.34% -0.42%] index_add_ perm : Elapsed 0.036 ms (3.570 ms / 100) 3.511 -> 3.495 ( -0.46%) [ +0.20% +0.23% +0.00% / +0.17% -0.46% -0.43%] index_copy_ perm : Elapsed 0.035 ms (3.518 ms / 100) 3.541 -> 3.545 ( +0.11%) [ +0.28% +0.17% +0.00% / +0.37% +0.23% +0.11%] index_add_ perm_sorted : Elapsed 0.036 ms (3.551 ms / 100) 3.482 -> 3.485 ( +0.09%) [ +0.34% +0.00% +0.17% / +0.32% +0.20% +0.09%] index_copy_ perm_sorted : Elapsed 0.035 ms (3.494 ms / 100) 71.939 -> 70.555 ( -1.92%) [ +0.00% +0.12% +0.48% / +0.14% -0.92% -1.92%] index_select const : Elapsed 0.719 ms (71.939 ms / 100) 71.791 -> 70.224 ( -2.18%) [ +0.00% +0.77% +1.36% / +0.61% -2.18% -2.04%] index_select wrap : Elapsed 0.718 ms (71.791 ms / 100) 70.511 -> 70.129 ( -0.54%) [ +0.00% +0.64% +0.26% / +0.27% -0.43% -0.54%] index_select linear : Elapsed 0.705 ms (70.511 ms / 100) 72.410 -> 70.730 ( -2.32%) [ +0.40% +0.00% +0.55% / +0.10% -2.32% -1.12%] index_select reverse : Elapsed 0.727 ms (72.703 ms / 100) 71.342 -> 71.041 ( -0.42%) [ +0.00% +1.86% +1.41% / +1.22% -0.33% -0.42%] index_select skip64 : Elapsed 0.713 ms (71.342 ms / 100) 71.914 -> 70.773 ( -1.59%) [ +0.41% +0.00% +0.50% / +0.93% -1.05% -1.59%] index_select skip256 : Elapsed 0.722 ms (72.207 ms / 100) 70.486 -> 68.884 ( -2.27%) [ +0.26% +1.24% +0.00% / +0.70% -2.27% -1.97%] index_select spread : Elapsed 0.707 ms (70.669 ms / 100) 72.800 -> 71.551 ( -1.72%) [ +0.00% +1.08% +0.68% / +1.38% -1.72% -1.57%] index_select strided 3 : Elapsed 0.728 ms (72.800 ms / 100) 73.294 -> 71.385 ( -2.60%) [ +0.50% +0.95% +0.00% / +0.24% -2.60% -2.19%] index_select strided 5 : Elapsed 0.737 ms (73.664 ms / 100) 72.538 -> 71.065 ( -2.03%) [ +0.00% +1.78% +1.19% / +1.70% -1.41% -2.03%] index_select strided 7 : Elapsed 0.725 ms (72.538 ms / 100) 73.097 -> 71.296 ( -2.46%) [ +0.90% +0.46% +0.00% / -0.70% -2.46% -2.23%] index_select strided 8 : Elapsed 0.738 ms (73.756 ms / 100) 72.685 -> 71.189 ( -2.06%) [ +0.66% +0.00% +0.59% / +0.18% -2.01% -2.06%] index_select random : Elapsed 0.732 ms (73.162 ms / 100) 69.551 -> 68.424 ( -1.62%) [ +0.40% +0.00% +0.21% / +0.59% -1.17% -1.62%] index_select random_sorted : Elapsed 0.698 ms (69.828 ms / 100) B = [50, 250, 150] (stride (250, 1, 12500)) dim = 1 fill_cnt = 15 good 2.464 -> 2.335 ( -5.24%) [ +0.00% +0.32% +0.04% / +0.37% -4.95% -5.24%] index_fill_ const : Elapsed 0.025 ms (2.464 ms / 100) 2.577 -> 2.590 ( +0.50%) [ +0.27% +0.27% +0.00% / +0.50% +3.34% +3.26%] index_fill_ linear : Elapsed 0.026 ms (2.584 ms / 100) 2.532 -> 2.549 ( +0.67%) [ +0.36% +0.39% +0.00% / +0.67% +5.29% +5.41%] index_fill_ reverse : Elapsed 0.025 ms (2.541 ms / 100) 2.404 -> 2.330 ( -3.08%) [ +0.50% +0.00% +0.46% / +0.42% -3.08% -3.08%] index_fill_ skip64 : Elapsed 0.024 ms (2.416 ms / 100) 2.409 -> 2.328 ( -3.36%) [ +0.50% +0.50% +0.00% / +0.37% -3.36% -3.20%] index_fill_ skip256 : Elapsed 0.024 ms (2.421 ms / 100) 6.867 -> 6.868 ( +0.01%) [ +0.15% +0.00% +0.15% / +0.13% +0.01% +0.01%] index_fill_ spread : Elapsed 0.069 ms (6.877 ms / 100) 3.470 -> 3.476 ( +0.17%) [ +0.03% +0.00% +0.29% / +0.17% +0.61% +0.17%] index_fill_ strided 3 : Elapsed 0.035 ms (3.471 ms / 100) 4.538 -> 4.540 ( +0.04%) [ +0.00% +0.09% +0.09% / +0.04% +0.46% +0.64%] index_fill_ strided 5 : Elapsed 0.045 ms (4.538 ms / 100) 5.820 -> 5.823 ( +0.05%) [ +0.07% +0.03% +0.00% / +0.10% +0.05% +0.19%] index_fill_ strided 7 : Elapsed 0.058 ms (5.824 ms / 100) 6.428 -> 6.430 ( +0.03%) [ +0.00% +0.11% +0.02% / +0.19% +0.12% +0.03%] index_fill_ strided 8 : Elapsed 0.064 ms (6.428 ms / 100) 6.815 -> 6.823 ( +0.12%) [ +0.06% +0.00% +0.03% / +0.12% +0.15% +0.13%] index_fill_ strided 16 : Elapsed 0.068 ms (6.819 ms / 100) 8.281 -> 8.280 ( -0.01%) [ +0.00% +0.06% +0.06% / +0.10% -0.01% +0.01%] index_fill_ strided 64 : Elapsed 0.083 ms (8.281 ms / 100) 8.294 -> 8.304 ( +0.12%) [ +0.00% +0.07% +0.14% / +0.28% +0.12% +0.13%] index_fill_ strided 100 : Elapsed 0.083 ms (8.294 ms / 100) 7.190 -> 7.117 ( -1.02%) [ +0.04% +0.00% +0.07% / +0.15% -0.92% -1.02%] index_fill_ random : Elapsed 0.072 ms (7.193 ms / 100) 5.752 -> 5.660 ( -1.60%) [ +0.07% +0.00% +0.00% / +0.14% -1.58% -1.60%] index_fill_ random_sorted : Elapsed 0.058 ms (5.756 ms / 100) 7.941 -> 7.942 ( +0.01%) [ +0.05% +0.00% +0.00% / +0.04% +0.01% +0.18%] index_fill_ perm : Elapsed 0.079 ms (7.945 ms / 100) 6.247 -> 6.258 ( +0.18%) [ +0.05% +0.00% +0.10% / +0.18% +0.26% +0.35%] index_fill_ perm_sorted : Elapsed 0.063 ms (6.250 ms / 100) B = [50, 250, 150] (stride (250, 1, 12500)) A = [50, 15, 150] (stride (1, 50, 750)) dim = 1 7.156 -> 7.164 ( +0.11%) [ +0.11% +0.35% +0.00% / +0.11% +4.49% +5.00%] index_add_ linear : Elapsed 0.072 ms (7.164 ms / 100) 5.045 -> 5.078 ( +0.65%) [ +0.30% +0.50% +0.00% / +0.65% +4.04% +4.00%] index_copy_ linear : Elapsed 0.051 ms (5.060 ms / 100) 7.234 -> 7.234 ( +0.00%) [ +0.15% +0.00% +0.15% / +0.00% +5.34% +5.32%] index_add_ reverse : Elapsed 0.072 ms (7.245 ms / 100) 5.078 -> 5.086 ( +0.16%) [ +0.30% +0.00% +0.14% / +0.16% +4.65% +4.59%] index_copy_ reverse : Elapsed 0.051 ms (5.093 ms / 100) 17.857 -> 17.781 ( -0.43%) [ +0.08% +0.00% +0.17% / +0.20% -0.33% -0.43%] index_add_ spread : Elapsed 0.179 ms (17.871 ms / 100) 10.599 -> 10.523 ( -0.72%) [ +0.16% +0.00% +0.48% / +0.49% -0.54% -0.72%] index_copy_ spread : Elapsed 0.106 ms (10.616 ms / 100) 9.553 -> 9.402 ( -1.58%) [ +0.07% +0.00% +0.14% / +0.14% -1.58% -1.43%] index_add_ strided 3 : Elapsed 0.096 ms (9.560 ms / 100) 6.293 -> 6.114 ( -2.84%) [ +0.02% +0.03% +0.00% / -0.13% -2.84% -2.75%] index_copy_ strided 3 : Elapsed 0.063 ms (6.294 ms / 100) 14.797 -> 14.763 ( -0.23%) [ +0.07% +0.00% +0.08% / +0.08% -0.22% -0.23%] index_add_ strided 7 : Elapsed 0.148 ms (14.808 ms / 100) 8.987 -> 8.878 ( -1.21%) [ +0.14% +0.00% +0.13% / +0.04% -1.15% -1.21%] index_copy_ strided 7 : Elapsed 0.090 ms (9.000 ms / 100) 18.834 -> 18.858 ( +0.13%) [ +0.00% +0.04% +0.24% / +0.19% +0.20% +0.13%] index_add_ perm : Elapsed 0.188 ms (18.834 ms / 100) 11.463 -> 11.436 ( -0.24%) [ +0.16% +0.00% +0.39% / +0.48% -0.24% -0.17%] index_copy_ perm : Elapsed 0.115 ms (11.481 ms / 100) 16.653 -> 16.441 ( -1.27%) [ +0.00% +0.01% +0.14% / +0.10% -1.27% -1.19%] index_add_ perm_sorted : Elapsed 0.167 ms (16.653 ms / 100) 10.018 -> 9.871 ( -1.47%) [ +0.00% +0.18% +0.26% / +0.18% -1.47% -1.38%] index_copy_ perm_sorted : Elapsed 0.100 ms (10.018 ms / 100) BEST 80.828 -> 20.567 (-74.55%) [ +0.32% +0.36% +0.00% / -74.55% -73.56% -73.60%] index_select const : Elapsed 0.811 ms (81.088 ms / 100) BEST 102.217 -> 26.779 (-73.80%) [ +0.65% +0.20% +0.00% / -73.80% -72.84% -72.83%] index_select wrap : Elapsed 1.029 ms (102.879 ms / 100) BEST 80.565 -> 23.881 (-70.36%) [ +0.02% +0.01% +0.00% / -70.36% -69.13% -69.25%] index_select linear : Elapsed 0.806 ms (80.584 ms / 100) BEST 82.284 -> 23.841 (-71.03%) [ +0.00% +0.42% +0.38% / -71.03% -69.65% -69.59%] index_select reverse : Elapsed 0.823 ms (82.284 ms / 100) BEST 80.228 -> 20.374 (-74.60%) [ +0.00% +0.74% +0.38% / -74.60% -73.43% -73.42%] index_select skip64 : Elapsed 0.802 ms (80.228 ms / 100) BEST 80.278 -> 20.888 (-73.98%) [ +0.92% +0.19% +0.00% / -73.98% -73.36% -73.45%] index_select skip256 : Elapsed 0.810 ms (81.017 ms / 100) BEST 80.952 -> 24.702 (-69.49%) [ +0.48% +0.53% +0.00% / -69.49% -68.40% -68.48%] index_select spread : Elapsed 0.813 ms (81.337 ms / 100) BEST 89.444 -> 22.403 (-74.95%) [ +0.05% +0.56% +0.00% / -74.95% -73.34% -73.48%] index_select strided 3 : Elapsed 0.895 ms (89.492 ms / 100) BEST 84.722 -> 21.770 (-74.30%) [ +0.46% +0.28% +0.00% / -74.30% -73.34% -73.27%] index_select strided 5 : Elapsed 0.851 ms (85.114 ms / 100) BEST 103.395 -> 26.836 (-74.05%) [ +0.22% +0.14% +0.00% / -74.05% -73.19% -73.17%] index_select strided 7 : Elapsed 1.036 ms (103.620 ms / 100) BEST 103.150 -> 27.044 (-73.78%) [ +0.00% +0.30% +0.12% / -73.78% -73.10% -73.05%] index_select strided 8 : Elapsed 1.032 ms (103.150 ms / 100) BEST 97.182 -> 26.791 (-72.43%) [ +0.36% +0.69% +0.00% / -72.43% -71.32% -71.44%] index_select random : Elapsed 0.975 ms (97.531 ms / 100) BEST 80.257 -> 24.561 (-69.40%) [ +0.53% +0.61% +0.00% / -69.40% -67.94% -67.95%] index_select random_sorted : Elapsed 0.807 ms (80.680 ms / 100) B = [50, 250, 150] (stride (1, 50, 12500)) A = [50, 15, 150] (stride (1, 50, 750)) dim = 1 4.336 -> 4.223 ( -2.61%) [ +0.18% +0.35% +0.00% / +0.12% -2.61% -2.31%] index_add_ linear : Elapsed 0.043 ms (4.344 ms / 100) 3.635 -> 3.587 ( -1.32%) [ +0.17% +0.00% +0.22% / +0.41% -1.32% -1.16%] index_copy_ linear : Elapsed 0.036 ms (3.641 ms / 100) 4.332 -> 4.280 ( -1.20%) [ +0.32% +0.00% +0.02% / -0.05% -1.20% -0.81%] index_add_ reverse : Elapsed 0.043 ms (4.346 ms / 100) 3.625 -> 3.628 ( +0.08%) [ +0.06% +0.25% +0.00% / +0.08% +0.30% +0.41%] index_copy_ reverse : Elapsed 0.036 ms (3.627 ms / 100) 4.297 -> 4.311 ( +0.33%) [ +0.00% +0.37% +0.02% / +0.33% +6.82% +6.63%] index_add_ spread : Elapsed 0.043 ms (4.297 ms / 100) 3.598 -> 3.610 ( +0.33%) [ +0.22% +0.00% +0.00% / +0.33% +2.53% +2.70%] index_copy_ spread : Elapsed 0.036 ms (3.606 ms / 100) 4.277 -> 4.287 ( +0.23%) [ +0.00% +0.16% +0.02% / +0.23% +5.70% +5.85%] index_add_ strided 3 : Elapsed 0.043 ms (4.277 ms / 100) 3.616 -> 3.622 ( +0.17%) [ +0.17% +0.17% +0.00% / +0.17% +1.88% +2.18%] index_copy_ strided 3 : Elapsed 0.036 ms (3.622 ms / 100) 4.386 -> 4.406 ( +0.46%) [ +0.00% +0.18% +0.02% / +0.46% +3.72% +3.83%] index_add_ strided 7 : Elapsed 0.044 ms (4.386 ms / 100) 3.630 -> 3.643 ( +0.36%) [ +0.28% +0.00% +0.19% / +0.36% +1.38% +1.49%] index_copy_ strided 7 : Elapsed 0.036 ms (3.640 ms / 100) 4.300 -> 4.302 ( +0.05%) [ +0.40% +0.00% +0.00% / +0.05% +2.65% +2.84%] index_add_ perm : Elapsed 0.043 ms (4.317 ms / 100) 3.609 -> 3.619 ( +0.28%) [ +0.14% +0.14% +0.00% / +0.28% +2.11% +1.33%] index_copy_ perm : Elapsed 0.036 ms (3.614 ms / 100) 4.317 -> 4.341 ( +0.56%) [ +0.58% +0.00% +0.30% / +0.56% +3.80% +3.66%] index_add_ perm_sorted : Elapsed 0.043 ms (4.342 ms / 100) 3.619 -> 3.634 ( +0.41%) [ +0.22% +0.06% +0.00% / +0.41% +1.11% +1.19%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.627 ms / 100) 37.745 -> 38.383 ( +1.69%) [ +0.14% +0.00% +4.12% / +1.69% +8.39% +6.05%] index_select const : Elapsed 0.378 ms (37.798 ms / 100) 56.513 -> 57.358 ( +1.50%) [ +0.69% +0.00% +1.80% / +1.50% +16.76% +16.26%] index_select wrap : Elapsed 0.569 ms (56.905 ms / 100) 35.635 -> 35.399 ( -0.66%) [ +0.00% +0.61% +2.28% / -0.66% +13.43% +13.14%] index_select linear : Elapsed 0.356 ms (35.635 ms / 100) 38.025 -> 38.546 ( +1.37%) [ +1.30% +2.04% +0.00% / +1.37% +16.45% +15.68%] index_select reverse : Elapsed 0.385 ms (38.518 ms / 100) 39.096 -> 38.454 ( -1.64%) [ +0.00% +3.41% +1.71% / -1.64% +4.42% +5.94%] index_select skip64 : Elapsed 0.391 ms (39.096 ms / 100) 38.780 -> 38.585 ( -0.50%) [ +1.62% +0.69% +0.00% / -0.50% +9.70% +7.00%] index_select skip256 : Elapsed 0.394 ms (39.410 ms / 100) 35.389 -> 36.373 ( +2.78%) [ +1.40% +0.00% +2.26% / +2.78% +13.72% +11.69%] index_select spread : Elapsed 0.359 ms (35.883 ms / 100) 61.131 -> 60.353 ( -1.27%) [ +0.34% +0.00% +1.12% / -1.27% +4.47% +4.42%] index_select strided 3 : Elapsed 0.613 ms (61.338 ms / 100) 52.017 -> 51.565 ( -0.87%) [ +0.16% +1.96% +0.00% / -0.87% +4.90% +3.05%] index_select strided 5 : Elapsed 0.521 ms (52.100 ms / 100) 61.566 -> 64.160 ( +4.21%) [ +0.63% +0.00% +1.01% / +4.21% +14.37% +15.11%] index_select strided 7 : Elapsed 0.620 ms (61.955 ms / 100) 58.084 -> 58.367 ( +0.49%) [ +0.00% +2.27% +2.61% / +0.49% +18.44% +16.06%] index_select strided 8 : Elapsed 0.581 ms (58.084 ms / 100) 61.098 -> 61.060 ( -0.06%) [ +0.00% +0.36% +0.47% / -0.06% +9.32% +6.43%] index_select random : Elapsed 0.611 ms (61.098 ms / 100) 34.076 -> 34.240 ( +0.48%) [ +0.44% +0.00% +0.58% / +0.48% +16.79% +15.41%] index_select random_sorted : Elapsed 0.342 ms (34.227 ms / 100) out_shape = [50, 15, 250] in_shape = [50, 15, 150] idx_dim = 2 B = [50, 15, 250] (stride (1, 50, 750)) A = [50, 15, 150] (stride (15, 1, 750)) dim = 2 9.243 -> 9.218 ( -0.27%) [ +0.03% +0.00% +0.00% / +0.03% -0.17% -0.27%] index_add_ linear : Elapsed 0.092 ms (9.246 ms / 100) 9.081 -> 9.100 ( +0.21%) [ +0.31% +0.00% +0.18% / +0.22% +0.21% +0.21%] index_copy_ linear : Elapsed 0.091 ms (9.109 ms / 100) 9.210 -> 9.219 ( +0.10%) [ +0.08% +0.02% +0.00% / +0.18% +0.10% +0.21%] index_add_ reverse : Elapsed 0.092 ms (9.217 ms / 100) 9.076 -> 9.081 ( +0.06%) [ +0.20% +0.06% +0.00% / +0.07% +0.06% +0.23%] index_copy_ reverse : Elapsed 0.091 ms (9.094 ms / 100) 9.219 -> 9.222 ( +0.03%) [ +0.17% +0.02% +0.00% / +0.03% +0.46% +0.85%] index_add_ spread : Elapsed 0.092 ms (9.235 ms / 100) 9.068 -> 9.077 ( +0.10%) [ +0.04% +0.15% +0.00% / +0.10% +0.45% +0.67%] index_copy_ spread : Elapsed 0.091 ms (9.072 ms / 100) 9.232 -> 9.227 ( -0.05%) [ +0.19% +0.00% +0.12% / -0.05% +0.67% +0.97%] index_add_ strided 3 : Elapsed 0.093 ms (9.250 ms / 100) 9.078 -> 9.086 ( +0.09%) [ +0.11% +0.00% +0.14% / +0.09% +0.47% +0.58%] index_copy_ strided 3 : Elapsed 0.091 ms (9.088 ms / 100) 9.312 -> 9.311 ( -0.01%) [ +0.03% +0.06% +0.00% / -0.01% +0.09% +0.04%] index_add_ strided 7 : Elapsed 0.093 ms (9.315 ms / 100) 9.119 -> 9.127 ( +0.09%) [ +0.03% +0.16% +0.00% / +0.09% +0.26% +0.14%] index_copy_ strided 7 : Elapsed 0.091 ms (9.122 ms / 100) 9.283 -> 9.292 ( +0.10%) [ +0.20% +0.04% +0.00% / +0.10% +0.42% +0.33%] index_add_ perm : Elapsed 0.093 ms (9.302 ms / 100) 9.087 -> 9.105 ( +0.20%) [ +0.40% +0.00% +0.07% / +0.20% +0.50% +0.36%] index_copy_ perm : Elapsed 0.091 ms (9.123 ms / 100) 9.195 -> 9.210 ( +0.16%) [ +0.25% +0.00% +0.08% / +0.16% +0.88% +0.61%] index_add_ perm_sorted : Elapsed 0.092 ms (9.218 ms / 100) 9.065 -> 9.062 ( -0.03%) [ +0.18% +0.00% +0.01% / -0.03% +0.53% +0.58%] index_copy_ perm_sorted : Elapsed 0.091 ms (9.081 ms / 100) 10.320 -> 10.345 ( +0.24%) [ +0.26% +0.16% +0.00% / +0.29% +0.24% +0.29%] index_select const : Elapsed 0.103 ms (10.347 ms / 100) 10.780 -> 10.826 ( +0.43%) [ +0.42% +0.43% +0.00% / +0.43% +0.58% +0.50%] index_select wrap : Elapsed 0.108 ms (10.825 ms / 100) 10.599 -> 10.607 ( +0.08%) [ +0.12% +0.00% +0.05% / +0.20% +0.10% +0.08%] index_select linear : Elapsed 0.106 ms (10.612 ms / 100) 10.700 -> 10.729 ( +0.27%) [ +0.25% +0.11% +0.00% / +0.35% +0.27% +0.30%] index_select reverse : Elapsed 0.107 ms (10.727 ms / 100) 10.320 -> 10.363 ( +0.42%) [ +0.14% +0.03% +0.00% / +0.44% +0.42% +0.47%] index_select skip64 : Elapsed 0.103 ms (10.334 ms / 100) 10.333 -> 10.347 ( +0.14%) [ +0.12% +0.05% +0.00% / +0.15% +0.14% +0.20%] index_select skip256 : Elapsed 0.103 ms (10.345 ms / 100) 10.657 -> 10.668 ( +0.10%) [ +0.03% +0.06% +0.00% / +0.38% +0.32% +0.10%] index_select spread : Elapsed 0.107 ms (10.660 ms / 100) 10.788 -> 10.722 ( -0.61%) [ +0.30% +0.00% +0.12% / +0.33% -0.61% -0.60%] index_select strided 3 : Elapsed 0.108 ms (10.820 ms / 100) 10.545 -> 10.567 ( +0.21%) [ +0.18% +0.00% +0.18% / +0.21% +1.35% +1.45%] index_select strided 5 : Elapsed 0.106 ms (10.564 ms / 100) 10.898 -> 10.914 ( +0.15%) [ +0.13% +0.01% +0.00% / +0.22% +0.38% +0.15%] index_select strided 7 : Elapsed 0.109 ms (10.912 ms / 100) 10.801 -> 10.769 ( -0.30%) [ +0.06% +0.00% +0.16% / +0.07% -0.27% -0.30%] index_select strided 8 : Elapsed 0.108 ms (10.808 ms / 100) 10.866 -> 10.778 ( -0.81%) [ +0.09% +0.02% +0.00% / +0.24% -0.81% -0.66%] index_select strided 16 : Elapsed 0.109 ms (10.876 ms / 100) 10.815 -> 10.769 ( -0.43%) [ +0.27% +0.00% +0.26% / +0.10% -0.43% -0.41%] index_select strided 64 : Elapsed 0.108 ms (10.844 ms / 100) 10.357 -> 10.357 ( +0.00%) [ +0.24% +0.05% +0.00% / +0.19% +0.00% +0.05%] index_select strided 100 : Elapsed 0.104 ms (10.382 ms / 100) 10.780 -> 10.797 ( +0.16%) [ +0.06% +0.19% +0.00% / +0.16% +0.60% +0.66%] index_select random : Elapsed 0.108 ms (10.787 ms / 100) 10.582 -> 10.608 ( +0.25%) [ +0.20% +0.00% +0.00% / +0.25% +0.31% +0.35%] index_select random_sorted : Elapsed 0.106 ms (10.603 ms / 100) out_shape = [250, 150, 15] in_shape = [50, 150, 15] idx_dim = 0 B = [250, 150, 15] (stride (2250, 15, 1)) A = [50, 150, 15] (stride (2250, 15, 1)) dim = 0 5.212 -> 5.210 ( -0.04%) [ +0.52% +0.31% +0.00% / +0.04% +0.25% -0.04%] index_add_ linear : Elapsed 0.052 ms (5.239 ms / 100) 5.078 -> 5.069 ( -0.18%) [ +0.28% +0.26% +0.00% / -0.18% +0.06% -0.14%] index_copy_ linear : Elapsed 0.051 ms (5.092 ms / 100) 5.248 -> 5.228 ( -0.38%) [ +0.00% +0.38% +0.13% / -0.29% -0.21% -0.38%] index_add_ reverse : Elapsed 0.052 ms (5.248 ms / 100) 5.084 -> 5.071 ( -0.26%) [ +0.00% +0.43% +0.33% / -0.20% -0.26% -0.26%] index_copy_ reverse : Elapsed 0.051 ms (5.084 ms / 100) 5.240 -> 5.225 ( -0.29%) [ +0.00% +0.59% +0.06% / -0.29% -0.13% -0.04%] index_add_ spread : Elapsed 0.052 ms (5.240 ms / 100) 5.076 -> 5.078 ( +0.04%) [ +0.00% +0.61% +0.35% / +0.08% +0.10% +0.04%] index_copy_ spread : Elapsed 0.051 ms (5.076 ms / 100) 5.256 -> 5.234 ( -0.42%) [ +0.02% +0.00% +0.06% / -0.42% -0.21% -0.02%] index_add_ strided 3 : Elapsed 0.053 ms (5.257 ms / 100) 5.083 -> 5.066 ( -0.33%) [ +0.00% +0.12% +0.18% / -0.33% +0.00% -0.06%] index_copy_ strided 3 : Elapsed 0.051 ms (5.083 ms / 100) 5.286 -> 5.215 ( -1.34%) [ +0.21% +0.36% +0.00% / -0.38% -1.02% -1.34%] index_add_ strided 7 : Elapsed 0.053 ms (5.297 ms / 100) 5.103 -> 5.054 ( -0.96%) [ +0.00% +0.45% +0.16% / -0.20% -0.69% -0.96%] index_copy_ strided 7 : Elapsed 0.051 ms (5.103 ms / 100) 5.253 -> 5.249 ( -0.08%) [ +0.10% +0.00% +0.06% / -0.08% +0.27% +0.51%] index_add_ perm : Elapsed 0.053 ms (5.258 ms / 100) 5.086 -> 5.069 ( -0.33%) [ +0.16% +0.00% +0.14% / -0.28% -0.33% -0.14%] index_copy_ perm : Elapsed 0.051 ms (5.094 ms / 100) 5.247 -> 5.234 ( -0.25%) [ +0.00% +0.29% +0.15% / -0.25% +0.53% +0.59%] index_add_ perm_sorted : Elapsed 0.052 ms (5.247 ms / 100) 5.078 -> 5.047 ( -0.61%) [ +0.00% +0.16% +0.16% / -0.61% +0.08% +0.14%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.078 ms / 100) 6.889 -> 6.905 ( +0.23%) [ +0.32% +0.25% +0.00% / +0.45% +0.23% +0.46%] index_select const : Elapsed 0.069 ms (6.911 ms / 100) 7.613 -> 7.611 ( -0.03%) [ +0.11% +0.00% +0.01% / +0.33% -0.03% +0.21%] index_select wrap : Elapsed 0.076 ms (7.621 ms / 100) 7.081 -> 7.088 ( +0.10%) [ +0.55% +0.00% +0.17% / +0.35% +0.13% +0.10%] index_select linear : Elapsed 0.071 ms (7.120 ms / 100) 7.189 -> 7.194 ( +0.07%) [ +0.00% +0.45% +0.00% / +0.11% +0.07% +0.17%] index_select reverse : Elapsed 0.072 ms (7.189 ms / 100) 6.899 -> 6.911 ( +0.17%) [ +0.26% +0.00% +0.10% / +0.17% +0.23% +0.23%] index_select skip64 : Elapsed 0.069 ms (6.917 ms / 100) 6.898 -> 6.898 ( +0.00%) [ +0.25% +0.07% +0.00% / +0.25% +0.00% +0.26%] index_select skip256 : Elapsed 0.069 ms (6.915 ms / 100) 7.217 -> 7.216 ( -0.01%) [ +0.24% +0.00% +0.04% / +0.17% -0.01% +0.06%] index_select spread : Elapsed 0.072 ms (7.234 ms / 100) 7.636 -> 7.635 ( -0.01%) [ +0.07% +0.03% +0.00% / +0.43% +0.07% -0.01%] index_select strided 3 : Elapsed 0.076 ms (7.641 ms / 100) 7.021 -> 7.046 ( +0.36%) [ +0.28% +0.11% +0.00% / +0.36% +0.68% +0.66%] index_select strided 5 : Elapsed 0.070 ms (7.041 ms / 100) 7.633 -> 7.642 ( +0.12%) [ +0.08% +0.04% +0.00% / +0.12% +0.17% +0.12%] index_select strided 7 : Elapsed 0.076 ms (7.639 ms / 100) 7.320 -> 7.319 ( -0.01%) [ +0.07% +0.00% +0.03% / -0.01% +0.30% +0.20%] index_select strided 8 : Elapsed 0.073 ms (7.325 ms / 100) 7.309 -> 7.314 ( +0.07%) [ +0.11% +0.00% +0.00% / +0.07% +0.44% +0.47%] index_select strided 16 : Elapsed 0.073 ms (7.317 ms / 100) 7.435 -> 7.444 ( +0.12%) [ +0.11% +0.00% +0.08% / +0.12% +0.40% +0.54%] index_select random : Elapsed 0.074 ms (7.443 ms / 100) 7.195 -> 7.218 ( +0.32%) [ +0.42% +0.00% +0.14% / +0.32% +0.44% +0.43%] index_select random_sorted : Elapsed 0.072 ms (7.225 ms / 100) B = [250, 150, 15] (stride (2250, 15, 1)) A = [50, 150, 15] (stride (1, 50, 7500)) dim = 0 10.070 -> 10.031 ( -0.39%) [ +0.10% +0.06% +0.00% / -0.39% -0.27% -0.39%] index_add_ linear : Elapsed 0.101 ms (10.080 ms / 100) 9.924 -> 9.907 ( -0.17%) [ +0.00% +0.07% +0.00% / +0.01% -0.17% -0.05%] index_copy_ linear : Elapsed 0.099 ms (9.924 ms / 100) 10.098 -> 10.065 ( -0.33%) [ +0.10% +0.10% +0.00% / -0.29% -0.23% -0.33%] index_add_ reverse : Elapsed 0.101 ms (10.108 ms / 100) 9.953 -> 9.923 ( -0.30%) [ +0.02% +0.00% +0.02% / -0.30% -0.08% -0.22%] index_copy_ reverse : Elapsed 0.100 ms (9.955 ms / 100) 10.124 -> 10.106 ( -0.18%) [ +0.30% +0.14% +0.00% / -0.08% -0.18% -0.12%] index_add_ spread : Elapsed 0.102 ms (10.154 ms / 100) 9.957 -> 9.940 ( -0.17%) [ +0.00% +0.07% +0.01% / -0.17% -0.09% -0.15%] index_copy_ spread : Elapsed 0.100 ms (9.957 ms / 100) 10.140 -> 10.100 ( -0.39%) [ +0.00% +0.21% +0.28% / -0.15% -0.39% -0.35%] index_add_ strided 3 : Elapsed 0.101 ms (10.140 ms / 100) 9.962 -> 9.936 ( -0.26%) [ +0.00% +0.11% +0.16% / -0.13% -0.26% -0.26%] index_copy_ strided 3 : Elapsed 0.100 ms (9.962 ms / 100) 10.193 -> 10.162 ( -0.30%) [ +0.00% +0.02% +0.09% / -0.30% -0.08% +0.05%] index_add_ strided 7 : Elapsed 0.102 ms (10.193 ms / 100) 9.999 -> 9.973 ( -0.26%) [ +0.16% +0.00% +0.06% / -0.16% -0.26% -0.26%] index_copy_ strided 7 : Elapsed 0.100 ms (10.015 ms / 100) 10.253 -> 10.146 ( -1.04%) [ +0.11% +0.02% +0.00% / -0.41% -1.04% -1.01%] index_add_ perm : Elapsed 0.103 ms (10.264 ms / 100) 10.059 -> 9.963 ( -0.95%) [ +0.09% +0.16% +0.00% / -0.20% -0.95% -0.88%] index_copy_ perm : Elapsed 0.101 ms (10.068 ms / 100) 10.190 -> 10.090 ( -0.98%) [ +0.36% +0.00% +0.02% / +0.09% -0.91% -0.98%] index_add_ perm_sorted : Elapsed 0.102 ms (10.227 ms / 100) 10.004 -> 9.912 ( -0.92%) [ +0.32% +0.00% +0.18% / +0.02% -0.57% -0.92%] index_copy_ perm_sorted : Elapsed 0.100 ms (10.036 ms / 100) 19.666 -> 19.725 ( +0.30%) [ +0.23% +0.00% +0.08% / +0.30% +0.30% +0.39%] index_select const : Elapsed 0.197 ms (19.711 ms / 100) 22.395 -> 22.460 ( +0.29%) [ +0.06% +0.00% +0.07% / +0.29% +0.51% +0.47%] index_select wrap : Elapsed 0.224 ms (22.408 ms / 100) 20.334 -> 20.354 ( +0.10%) [ +0.21% +0.00% +0.04% / +0.31% +0.10% +0.15%] index_select linear : Elapsed 0.204 ms (20.377 ms / 100) 20.487 -> 20.501 ( +0.07%) [ +0.09% +0.00% +0.14% / +0.33% +0.25% +0.07%] index_select reverse : Elapsed 0.205 ms (20.506 ms / 100) 19.647 -> 19.719 ( +0.37%) [ +0.17% +0.00% +0.11% / +0.37% +0.46% +0.44%] index_select skip64 : Elapsed 0.197 ms (19.680 ms / 100) 19.684 -> 19.731 ( +0.24%) [ +0.23% +0.00% +0.17% / +0.27% +0.27% +0.24%] index_select skip256 : Elapsed 0.197 ms (19.730 ms / 100) 20.823 -> 20.840 ( +0.08%) [ +0.11% +0.00% +0.07% / +0.08% +0.55% +0.61%] index_select spread : Elapsed 0.208 ms (20.846 ms / 100) 21.949 -> 21.991 ( +0.19%) [ +0.14% +0.00% +0.03% / +0.19% +0.48% +0.58%] index_select strided 3 : Elapsed 0.220 ms (21.980 ms / 100) 21.919 -> 21.958 ( +0.18%) [ +0.03% +0.00% +0.09% / +0.18% +0.55% +0.46%] index_select strided 5 : Elapsed 0.219 ms (21.926 ms / 100) 21.878 -> 21.942 ( +0.29%) [ +0.20% +0.08% +0.00% / +0.29% +0.75% +0.58%] index_select strided 7 : Elapsed 0.219 ms (21.921 ms / 100) 21.905 -> 21.934 ( +0.13%) [ +0.12% +0.00% +0.01% / +0.21% +0.13% +0.16%] index_select strided 8 : Elapsed 0.219 ms (21.931 ms / 100) 21.930 -> 21.947 ( +0.08%) [ +0.08% +0.00% +0.07% / +0.08% +0.42% +0.47%] index_select strided 16 : Elapsed 0.219 ms (21.948 ms / 100) 21.977 -> 22.025 ( +0.22%) [ +0.01% +0.00% +0.09% / +0.22% +0.62% +0.59%] index_select random : Elapsed 0.220 ms (21.979 ms / 100) 20.816 -> 20.878 ( +0.30%) [ +0.18% +0.07% +0.00% / +0.30% +0.50% +0.53%] index_select random_sorted : Elapsed 0.209 ms (20.853 ms / 100) B = [250, 150, 15] (stride (2250, 1, 150)) dim = 0 fill_cnt = 50 3.483 -> 3.480 ( -0.09%) [ +0.14% +0.00% +0.06% / +0.09% -0.09% +0.23%] index_fill_ const : Elapsed 0.035 ms (3.488 ms / 100) 3.100 -> 3.091 ( -0.29%) [ +0.16% +0.58% +0.00% / +0.39% +0.00% -0.29%] index_fill_ linear : Elapsed 0.031 ms (3.105 ms / 100) 3.114 -> 3.108 ( -0.19%) [ +0.00% +0.00% +0.29% / +0.06% -0.13% -0.19%] index_fill_ reverse : Elapsed 0.031 ms (3.114 ms / 100) 3.483 -> 3.478 ( -0.14%) [ +0.00% +0.23% +0.03% / +0.06% -0.14% +0.03%] index_fill_ skip64 : Elapsed 0.035 ms (3.483 ms / 100) 3.479 -> 3.478 ( -0.03%) [ +0.14% +0.14% +0.00% / +0.14% -0.03% +0.06%] index_fill_ skip256 : Elapsed 0.035 ms (3.484 ms / 100) 3.142 -> 3.126 ( -0.51%) [ +0.41% +0.32% +0.00% / +0.03% -0.51% -0.38%] index_fill_ spread : Elapsed 0.032 ms (3.155 ms / 100) 3.135 -> 3.124 ( -0.35%) [ +0.06% +0.22% +0.00% / -0.35% -0.03% -0.26%] index_fill_ strided 3 : Elapsed 0.031 ms (3.137 ms / 100) 3.142 -> 3.114 ( -0.89%) [ +0.03% +0.13% +0.00% / -0.03% -0.80% -0.89%] index_fill_ strided 5 : Elapsed 0.031 ms (3.143 ms / 100) 3.150 -> 3.143 ( -0.22%) [ +0.00% +0.13% +0.13% / -0.13% +0.00% -0.22%] index_fill_ strided 7 : Elapsed 0.032 ms (3.150 ms / 100) 3.142 -> 3.137 ( -0.16%) [ +0.03% +0.13% +0.00% / -0.16% +0.32% +0.38%] index_fill_ strided 8 : Elapsed 0.031 ms (3.143 ms / 100) 3.161 -> 3.139 ( -0.70%) [ +0.44% +0.03% +0.00% / -0.70% +0.54% +0.51%] index_fill_ strided 16 : Elapsed 0.032 ms (3.175 ms / 100) 3.125 -> 3.100 ( -0.80%) [ +0.00% +0.06% +0.03% / -0.19% -0.80% -0.61%] index_fill_ strided 64 : Elapsed 0.031 ms (3.125 ms / 100) 3.169 -> 3.158 ( -0.35%) [ +0.09% +0.00% +0.32% / -0.35% +0.09% -0.28%] index_fill_ strided 100 : Elapsed 0.032 ms (3.172 ms / 100) 3.175 -> 3.110 ( -2.05%) [ +0.00% +0.28% +0.03% / -0.28% -2.05% -1.73%] index_fill_ random : Elapsed 0.032 ms (3.175 ms / 100) 3.141 -> 3.122 ( -0.60%) [ +0.16% +0.00% +0.00% / -0.41% -0.60% -0.45%] index_fill_ random_sorted : Elapsed 0.031 ms (3.146 ms / 100) 3.142 -> 3.138 ( -0.13%) [ +0.00% +0.06% +0.00% / -0.13% +0.16% +0.19%] index_fill_ perm : Elapsed 0.031 ms (3.142 ms / 100) 3.146 -> 3.125 ( -0.67%) [ +0.00% +0.35% +0.13% / -0.22% -0.38% -0.67%] index_fill_ perm_sorted : Elapsed 0.031 ms (3.146 ms / 100) B = [250, 150, 15] (stride (1, 3750, 250)) A = [50, 150, 15] (stride (1, 750, 50)) dim = 0 GOOD 6.810 -> 5.398 (-20.73%) [ +0.07% +0.00% +0.25% / -20.73% -20.37% -20.43%] index_add_ linear : Elapsed 0.068 ms (6.815 ms / 100) Good 5.974 -> 5.227 (-12.50%) [ +0.08% +0.35% +0.00% / -12.50% -12.05% -12.37%] index_copy_ linear : Elapsed 0.060 ms (5.979 ms / 100) GOOD 6.803 -> 5.409 (-20.49%) [ +0.25% +0.41% +0.00% / -20.49% -20.14% -20.06%] index_add_ reverse : Elapsed 0.068 ms (6.820 ms / 100) Good 5.962 -> 5.231 (-12.26%) [ +0.00% +0.05% +0.05% / -12.26% -12.06% -11.71%] index_copy_ reverse : Elapsed 0.060 ms (5.962 ms / 100) GOOD 10.000 -> 6.777 (-32.23%) [ +0.32% +0.00% +0.26% / -32.15% -32.13% -32.23%] index_add_ spread : Elapsed 0.100 ms (10.032 ms / 100) GOOD 8.809 -> 6.856 (-22.17%) [ +0.16% +0.26% +0.00% / -21.94% -22.10% -22.17%] index_copy_ spread : Elapsed 0.088 ms (8.823 ms / 100) GOOD 8.521 -> 6.100 (-28.41%) [ +0.00% +0.01% +0.27% / -28.25% -28.41% -28.24%] index_add_ strided 3 : Elapsed 0.085 ms (8.521 ms / 100) Good 7.511 -> 6.166 (-17.91%) [ +0.37% +0.05% +0.00% / -17.91% -17.65% -17.67%] index_copy_ strided 3 : Elapsed 0.075 ms (7.539 ms / 100) GOOD 11.521 -> 6.843 (-40.60%) [ +0.63% +0.78% +0.00% / -40.46% -40.51% -40.60%] index_add_ strided 7 : Elapsed 0.116 ms (11.594 ms / 100) GOOD 10.076 -> 6.919 (-31.33%) [ +0.52% +0.41% +0.00% / -31.13% -31.33% -31.29%] index_copy_ strided 7 : Elapsed 0.101 ms (10.128 ms / 100) BEST 13.828 -> 6.744 (-51.23%) [ +0.00% +0.48% +0.38% / -51.14% -51.23% -51.23%] index_add_ perm : Elapsed 0.138 ms (13.828 ms / 100) GOOD 11.791 -> 6.572 (-44.26%) [ +0.00% +0.06% +0.05% / -43.98% -44.26% -44.14%] index_copy_ perm : Elapsed 0.118 ms (11.791 ms / 100) GOOD 9.558 -> 6.573 (-31.23%) [ +0.12% +0.23% +0.00% / -31.23% -31.08% -31.17%] index_add_ perm_sorted : Elapsed 0.096 ms (9.569 ms / 100) GOOD 8.442 -> 6.541 (-22.52%) [ +0.00% +0.39% +0.02% / -22.36% -22.39% -22.52%] index_copy_ perm_sorted : Elapsed 0.084 ms (8.442 ms / 100) GOOD 10.568 -> 7.071 (-33.09%) [ +0.16% +0.05% +0.00% / -33.09% -33.04% -33.09%] index_select const : Elapsed 0.106 ms (10.585 ms / 100) GOOD 10.968 -> 7.307 (-33.38%) [ +0.16% +0.00% +0.30% / -33.38% -33.21% -33.29%] index_select wrap : Elapsed 0.110 ms (10.986 ms / 100) GOOD 10.895 -> 7.378 (-32.28%) [ +0.00% +0.06% +0.06% / -32.23% -32.23% -32.28%] index_select linear : Elapsed 0.109 ms (10.895 ms / 100) GOOD 10.671 -> 7.370 (-30.93%) [ +0.12% +0.00% +0.08% / -30.93% -30.78% -30.78%] index_select reverse : Elapsed 0.107 ms (10.684 ms / 100) GOOD 10.628 -> 7.084 (-33.35%) [ +0.08% +0.07% +0.00% / -33.32% -33.32% -33.35%] index_select skip64 : Elapsed 0.106 ms (10.636 ms / 100) GOOD 10.664 -> 7.061 (-33.79%) [ +0.01% +0.11% +0.00% / -33.79% -33.30% -33.57%] index_select skip256 : Elapsed 0.107 ms (10.665 ms / 100) GOOD 10.633 -> 7.532 (-29.16%) [ +0.14% +0.00% +0.20% / -29.06% -29.02% -29.16%] index_select spread : Elapsed 0.106 ms (10.648 ms / 100) GOOD 10.965 -> 7.234 (-34.03%) [ +0.00% +0.07% +0.00% / -34.03% -33.91% -33.93%] index_select strided 3 : Elapsed 0.110 ms (10.965 ms / 100) GOOD 11.036 -> 7.202 (-34.74%) [ +0.25% +0.10% +0.00% / -34.62% -34.74% -34.71%] index_select strided 5 : Elapsed 0.111 ms (11.064 ms / 100) GOOD 10.991 -> 7.230 (-34.22%) [ +0.22% +0.27% +0.00% / -34.21% -34.13% -34.22%] index_select strided 7 : Elapsed 0.110 ms (11.015 ms / 100) GOOD 11.054 -> 7.239 (-34.51%) [ +0.20% +0.02% +0.00% / -34.06% -34.51% -34.41%] index_select strided 8 : Elapsed 0.111 ms (11.076 ms / 100) GOOD 11.061 -> 7.235 (-34.59%) [ +0.00% +0.06% +0.08% / -34.59% -33.93% -33.88%] index_select strided 16 : Elapsed 0.111 ms (11.061 ms / 100) GOOD 10.998 -> 7.220 (-34.35%) [ +0.00% +0.15% +0.08% / -33.96% -34.14% -34.35%] index_select random : Elapsed 0.110 ms (10.998 ms / 100) GOOD 10.644 -> 7.523 (-29.32%) [ +0.12% +0.08% +0.00% / -29.32% -29.02% -29.04%] index_select random_sorted : Elapsed 0.107 ms (10.657 ms / 100) B = [250, 150, 15] (stride (150, 1, 37500)) A = [50, 150, 15] (stride (1, 750, 50)) dim = 0 10.482 -> 10.399 ( -0.79%) [ +0.07% +0.18% +0.00% / +0.07% -0.63% -0.79%] index_add_ linear : Elapsed 0.105 ms (10.489 ms / 100) 9.903 -> 9.894 ( -0.09%) [ +0.33% +0.05% +0.00% / -0.09% +0.12% +0.00%] index_copy_ linear : Elapsed 0.099 ms (9.936 ms / 100) 10.397 -> 10.364 ( -0.32%) [ +0.00% +0.10% +0.07% / -0.32% +0.31% +0.22%] index_add_ reverse : Elapsed 0.104 ms (10.397 ms / 100) 9.889 -> 9.885 ( -0.04%) [ +0.27% +0.16% +0.00% / -0.04% +0.07% -0.02%] index_copy_ reverse : Elapsed 0.099 ms (9.916 ms / 100) 10.538 -> 10.507 ( -0.29%) [ +0.06% +0.00% +0.05% / -0.06% -0.28% -0.29%] index_add_ spread : Elapsed 0.105 ms (10.544 ms / 100) 9.972 -> 9.959 ( -0.13%) [ +0.00% +0.24% +0.22% / -0.11% -0.06% -0.13%] index_copy_ spread : Elapsed 0.100 ms (9.972 ms / 100) 10.592 -> 10.451 ( -1.33%) [ +0.18% +0.34% +0.00% / +0.22% -1.33% -1.15%] index_add_ strided 3 : Elapsed 0.106 ms (10.611 ms / 100) 10.030 -> 9.932 ( -0.98%) [ +0.07% +0.07% +0.00% / -0.18% -0.92% -0.98%] index_copy_ strided 3 : Elapsed 0.100 ms (10.037 ms / 100) 10.512 -> 10.479 ( -0.31%) [ +0.00% +0.03% +0.08% / -0.31% -0.09% +0.28%] index_add_ strided 7 : Elapsed 0.105 ms (10.512 ms / 100) 9.984 -> 9.970 ( -0.14%) [ +0.26% +0.00% +0.12% / -0.14% +0.03% +0.27%] index_copy_ strided 7 : Elapsed 0.100 ms (10.010 ms / 100) 10.518 -> 10.506 ( -0.11%) [ +0.14% +0.00% +0.12% / -0.07% -0.10% -0.11%] index_add_ perm : Elapsed 0.105 ms (10.533 ms / 100) 9.980 -> 9.941 ( -0.39%) [ +0.10% +0.00% +0.08% / -0.35% -0.39% -0.22%] index_copy_ perm : Elapsed 0.100 ms (9.990 ms / 100) 10.450 -> 10.390 ( -0.57%) [ +0.23% +0.27% +0.00% / +0.14% -0.39% -0.57%] index_add_ perm_sorted : Elapsed 0.105 ms (10.474 ms / 100) 9.994 -> 9.930 ( -0.64%) [ +0.10% +0.09% +0.00% / -0.14% -0.58% -0.64%] index_copy_ perm_sorted : Elapsed 0.100 ms (10.004 ms / 100) 21.806 -> 21.833 ( +0.12%) [ +0.00% +0.23% +0.13% / +0.12% +0.67% +0.69%] index_select const : Elapsed 0.218 ms (21.806 ms / 100) 24.643 -> 24.663 ( +0.08%) [ +0.00% +0.37% +0.10% / +0.08% +1.60% +1.66%] index_select wrap : Elapsed 0.246 ms (24.643 ms / 100) 22.219 -> 22.235 ( +0.07%) [ +0.00% +0.17% +0.05% / +0.07% +0.59% +0.57%] index_select linear : Elapsed 0.222 ms (22.219 ms / 100) 22.547 -> 22.542 ( -0.02%) [ +0.07% +0.00% +0.25% / -0.02% +0.86% +0.83%] index_select reverse : Elapsed 0.226 ms (22.563 ms / 100) 21.789 -> 21.873 ( +0.39%) [ +0.49% +0.00% +0.63% / +0.39% +0.48% +0.61%] index_select skip64 : Elapsed 0.219 ms (21.895 ms / 100) 21.863 -> 21.873 ( +0.05%) [ +0.09% +0.17% +0.00% / +0.05% +0.22% +0.64%] index_select skip256 : Elapsed 0.219 ms (21.883 ms / 100) 22.703 -> 22.731 ( +0.12%) [ +0.02% +0.00% +0.11% / +0.12% +0.52% +0.52%] index_select spread : Elapsed 0.227 ms (22.708 ms / 100) 24.813 -> 24.934 ( +0.49%) [ +0.11% +0.19% +0.00% / +0.49% +2.03% +2.04%] index_select strided 3 : Elapsed 0.248 ms (24.840 ms / 100) 25.119 -> 25.061 ( -0.23%) [ +0.00% +0.10% +0.15% / -0.23% +1.73% +1.67%] index_select strided 5 : Elapsed 0.251 ms (25.119 ms / 100) 24.979 -> 24.956 ( -0.09%) [ +0.03% +0.36% +0.00% / -0.09% +1.81% +1.89%] index_select strided 7 : Elapsed 0.250 ms (24.986 ms / 100) 25.034 -> 25.044 ( +0.04%) [ +0.06% +0.13% +0.00% / +0.04% +1.61% +1.69%] index_select strided 8 : Elapsed 0.250 ms (25.049 ms / 100) 24.913 -> 25.003 ( +0.36%) [ +0.00% +0.17% +0.10% / +0.36% +1.92% +2.01%] index_select strided 16 : Elapsed 0.249 ms (24.913 ms / 100) 25.071 -> 25.076 ( +0.02%) [ +0.00% +0.08% +0.04% / +0.02% +1.18% +1.29%] index_select random : Elapsed 0.251 ms (25.071 ms / 100) 22.763 -> 22.719 ( -0.19%) [ +0.05% +0.07% +0.00% / -0.19% +0.33% +0.42%] index_select random_sorted : Elapsed 0.228 ms (22.774 ms / 100) B = [250, 150, 15] (stride (1, 250, 37500)) A = [50, 150, 15] (stride (1, 750, 50)) dim = 0 11.705 -> 11.595 ( -0.94%) [ +0.14% +0.00% +0.06% / -0.38% -0.94% -0.94%] index_add_ linear : Elapsed 0.117 ms (11.721 ms / 100) 10.685 -> 10.586 ( -0.93%) [ +0.21% +0.00% +0.13% / -0.26% -0.93% -0.83%] index_copy_ linear : Elapsed 0.107 ms (10.707 ms / 100) 11.713 -> 11.592 ( -1.03%) [ +0.04% +0.06% +0.00% / -0.69% -0.93% -1.03%] index_add_ reverse : Elapsed 0.117 ms (11.718 ms / 100) 10.688 -> 10.593 ( -0.89%) [ +0.18% +0.09% +0.00% / -0.61% -0.83% -0.89%] index_copy_ reverse : Elapsed 0.107 ms (10.707 ms / 100) 15.770 -> 15.676 ( -0.60%) [ +0.00% +0.03% +0.41% / -0.60% -0.13% -0.25%] index_add_ spread : Elapsed 0.158 ms (15.770 ms / 100) 13.392 -> 13.324 ( -0.51%) [ +0.00% +0.13% +0.03% / -0.51% -0.07% -0.32%] index_copy_ spread : Elapsed 0.134 ms (13.392 ms / 100) 14.030 -> 13.919 ( -0.79%) [ +0.00% +0.37% +0.10% / -0.79% -0.49% -0.37%] index_add_ strided 3 : Elapsed 0.140 ms (14.030 ms / 100) 12.212 -> 12.131 ( -0.66%) [ +0.02% +0.25% +0.00% / -0.66% -0.25% -0.16%] index_copy_ strided 3 : Elapsed 0.122 ms (12.215 ms / 100) 17.079 -> 17.071 ( -0.05%) [ +0.00% +0.23% +0.11% / +0.11% +0.05% -0.05%] index_add_ strided 7 : Elapsed 0.171 ms (17.079 ms / 100) 14.392 -> 14.327 ( -0.45%) [ +0.10% +0.08% +0.00% / -0.45% -0.10% -0.19%] index_copy_ strided 7 : Elapsed 0.144 ms (14.407 ms / 100) 17.812 -> 17.753 ( -0.33%) [ +0.08% +0.11% +0.00% / -0.33% -0.25% -0.26%] index_add_ perm : Elapsed 0.178 ms (17.826 ms / 100) 15.765 -> 15.691 ( -0.47%) [ +0.14% +0.00% +0.02% / -0.47% -0.34% -0.40%] index_copy_ perm : Elapsed 0.158 ms (15.787 ms / 100) 15.020 -> 14.912 ( -0.72%) [ +0.23% +0.00% +0.19% / -0.72% -0.47% -0.35%] index_add_ perm_sorted : Elapsed 0.151 ms (15.054 ms / 100) 12.821 -> 12.755 ( -0.51%) [ +0.34% +0.15% +0.00% / -0.51% -0.02% +0.24%] index_copy_ perm_sorted : Elapsed 0.129 ms (12.864 ms / 100) 26.525 -> 26.340 ( -0.70%) [ +0.17% +0.18% +0.00% / +0.36% -0.70% -0.56%] index_select const : Elapsed 0.266 ms (26.570 ms / 100) 30.414 -> 30.205 ( -0.69%) [ +0.09% +0.00% +0.35% / +0.17% -0.50% -0.69%] index_select wrap : Elapsed 0.304 ms (30.440 ms / 100) 27.121 -> 26.897 ( -0.83%) [ +0.00% +0.35% +0.08% / -0.19% -0.83% -0.72%] index_select linear : Elapsed 0.271 ms (27.121 ms / 100) 27.621 -> 27.268 ( -1.28%) [ +0.25% +0.00% +0.04% / +0.04% -1.28% -1.07%] index_select reverse : Elapsed 0.277 ms (27.691 ms / 100) 26.526 -> 26.317 ( -0.79%) [ +0.30% +0.00% +0.36% / +0.29% -0.56% -0.79%] index_select skip64 : Elapsed 0.266 ms (26.605 ms / 100) 26.552 -> 26.337 ( -0.81%) [ +0.00% +0.08% +0.08% / +0.20% -0.75% -0.81%] index_select skip256 : Elapsed 0.266 ms (26.552 ms / 100) 27.723 -> 27.580 ( -0.52%) [ +0.00% +0.14% +0.28% / +0.25% -0.52% -0.30%] index_select spread : Elapsed 0.277 ms (27.723 ms / 100) 30.916 -> 30.845 ( -0.23%) [ +0.00% +0.09% +0.21% / +0.23% -0.11% -0.23%] index_select strided 3 : Elapsed 0.309 ms (30.916 ms / 100) 31.082 -> 31.030 ( -0.17%) [ +0.14% +0.27% +0.00% / -0.17% +0.06% +0.08%] index_select strided 5 : Elapsed 0.311 ms (31.127 ms / 100) 31.008 -> 30.914 ( -0.30%) [ +0.00% +0.28% +0.30% / +0.48% -0.30% -0.30%] index_select strided 7 : Elapsed 0.310 ms (31.008 ms / 100) 30.937 -> 30.947 ( +0.03%) [ +0.00% +0.21% +0.00% / +0.03% +0.13% +0.17%] index_select strided 8 : Elapsed 0.309 ms (30.938 ms / 100) 30.986 -> 30.905 ( -0.26%) [ +0.00% +0.15% +0.03% / -0.11% -0.26% -0.18%] index_select strided 16 : Elapsed 0.310 ms (30.986 ms / 100) 30.882 -> 30.808 ( -0.24%) [ +0.50% +0.00% +0.21% / -0.06% -0.24% +0.03%] index_select random : Elapsed 0.310 ms (31.036 ms / 100) 27.598 -> 27.592 ( -0.02%) [ +0.01% +0.00% +0.22% / +0.39% +0.21% -0.02%] index_select random_sorted : Elapsed 0.276 ms (27.601 ms / 100) out_shape = [50, 250, 15] in_shape = [50, 150, 15] idx_dim = 1 B = [50, 250, 15] (stride (3750, 15, 1)) A = [50, 150, 15] (stride (15, 750, 1)) dim = 1 5.848 -> 5.810 ( -0.65%) [ +0.00% +0.12% +0.15% / -0.65% +0.41% +0.12%] index_add_ linear : Elapsed 0.058 ms (5.848 ms / 100) 5.568 -> 5.522 ( -0.83%) [ +0.00% +0.23% +0.09% / -0.83% +0.31% -0.14%] index_copy_ linear : Elapsed 0.056 ms (5.568 ms / 100) 5.882 -> 5.832 ( -0.85%) [ +0.19% +0.00% +0.29% / -0.32% -0.68% -0.85%] index_add_ reverse : Elapsed 0.059 ms (5.893 ms / 100) 5.600 -> 5.559 ( -0.73%) [ +0.20% +0.00% +0.23% / -0.73% -0.70% -0.61%] index_copy_ reverse : Elapsed 0.056 ms (5.611 ms / 100) 6.097 -> 6.044 ( -0.87%) [ +0.00% +0.30% +0.07% / -0.87% +0.15% +0.43%] index_add_ spread : Elapsed 0.061 ms (6.097 ms / 100) 5.893 -> 5.840 ( -0.90%) [ +0.00% +0.46% +0.08% / -0.90% -0.15% -0.10%] index_copy_ spread : Elapsed 0.059 ms (5.893 ms / 100) 6.340 -> 6.269 ( -1.12%) [ +0.17% +0.00% +0.25% / -1.12% -0.58% -0.57%] index_add_ strided 3 : Elapsed 0.064 ms (6.351 ms / 100) 6.030 -> 5.969 ( -1.01%) [ +0.23% +0.00% +0.20% / -1.01% -0.40% -0.61%] index_copy_ strided 3 : Elapsed 0.060 ms (6.044 ms / 100) 6.338 -> 6.284 ( -0.85%) [ +0.00% +0.22% +0.60% / -0.85% -0.43% -0.33%] index_add_ strided 7 : Elapsed 0.063 ms (6.338 ms / 100) 6.041 -> 5.993 ( -0.79%) [ +0.00% +0.13% +0.43% / -0.79% -0.40% -0.43%] index_copy_ strided 7 : Elapsed 0.060 ms (6.041 ms / 100) 6.280 -> 6.195 ( -1.35%) [ +0.08% +0.00% +0.22% / -1.35% -1.08% -0.96%] index_add_ perm : Elapsed 0.063 ms (6.285 ms / 100) 5.927 -> 5.855 ( -1.21%) [ +0.25% +0.00% +0.49% / -1.21% -0.66% -0.73%] index_copy_ perm : Elapsed 0.059 ms (5.942 ms / 100) 6.055 -> 6.035 ( -0.33%) [ +0.20% +0.53% +0.00% / -0.33% +0.31% +0.08%] index_add_ perm_sorted : Elapsed 0.061 ms (6.067 ms / 100) 5.816 -> 5.770 ( -0.79%) [ +0.00% +0.46% +0.07% / -0.79% -0.38% +0.03%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.816 ms / 100) 5.935 -> 5.931 ( -0.07%) [ +0.02% +0.00% +0.03% / -0.07% +0.72% +0.67%] index_select const : Elapsed 0.059 ms (5.936 ms / 100) 6.228 -> 6.236 ( +0.13%) [ +0.03% +0.00% +0.16% / +0.13% +2.38% +2.15%] index_select wrap : Elapsed 0.062 ms (6.230 ms / 100) 6.100 -> 6.115 ( +0.25%) [ +0.41% +0.00% +0.07% / +0.25% +1.39% +1.70%] index_select linear : Elapsed 0.061 ms (6.125 ms / 100) 6.262 -> 6.254 ( -0.13%) [ +0.24% +0.00% +0.21% / -0.13% +1.82% +1.07%] index_select reverse : Elapsed 0.063 ms (6.277 ms / 100) 5.930 -> 5.964 ( +0.57%) [ +0.00% +0.17% +0.47% / +0.57% +1.53% +1.52%] index_select skip64 : Elapsed 0.059 ms (5.930 ms / 100) 5.943 -> 5.953 ( +0.17%) [ +0.07% +0.24% +0.00% / +0.17% +1.68% +1.78%] index_select skip256 : Elapsed 0.059 ms (5.947 ms / 100) 6.287 -> 6.309 ( +0.35%) [ +0.29% +0.37% +0.00% / +0.35% +1.53% +1.30%] index_select spread : Elapsed 0.063 ms (6.305 ms / 100) 6.308 -> 6.264 ( -0.70%) [ +0.00% +0.48% +0.11% / +0.44% -0.36% -0.70%] index_select strided 3 : Elapsed 0.063 ms (6.308 ms / 100) 6.072 -> 6.105 ( +0.54%) [ +0.26% +0.49% +0.00% / +0.54% +2.22% +2.19%] index_select strided 5 : Elapsed 0.061 ms (6.088 ms / 100) 6.406 -> 6.403 ( -0.05%) [ +0.08% +0.05% +0.00% / -0.05% +1.92% +1.28%] index_select strided 7 : Elapsed 0.064 ms (6.411 ms / 100) 6.366 -> 6.388 ( +0.35%) [ +0.39% +0.00% +0.38% / +0.55% +0.50% +0.35%] index_select strided 8 : Elapsed 0.064 ms (6.391 ms / 100) 6.375 -> 6.359 ( -0.25%) [ +0.00% +0.24% +0.08% / -0.25% +0.13% +0.05%] index_select strided 16 : Elapsed 0.064 ms (6.375 ms / 100) 6.372 -> 6.377 ( +0.08%) [ +0.00% +0.17% +0.36% / +0.08% +0.33% +0.16%] index_select strided 64 : Elapsed 0.064 ms (6.372 ms / 100) 5.953 -> 5.972 ( +0.32%) [ +0.50% +0.00% +0.49% / +0.32% +1.93% +2.23%] index_select strided 100 : Elapsed 0.060 ms (5.983 ms / 100) 6.370 -> 6.367 ( -0.05%) [ +0.00% +0.14% +0.09% / -0.05% +1.74% +1.49%] index_select random : Elapsed 0.064 ms (6.370 ms / 100) 6.203 -> 6.206 ( +0.05%) [ +0.11% +0.00% +0.02% / +0.05% +2.32% +2.05%] index_select random_sorted : Elapsed 0.062 ms (6.210 ms / 100) B = [50, 250, 15] (stride (3750, 15, 1)) A = [50, 150, 15] (stride (150, 1, 7500)) dim = 1 6.323 -> 6.278 ( -0.71%) [ +0.08% +0.13% +0.00% / -0.71% -0.36% -0.28%] index_add_ linear : Elapsed 0.063 ms (6.328 ms / 100) 6.096 -> 6.056 ( -0.66%) [ +0.03% +0.03% +0.00% / -0.66% -0.18% -0.13%] index_copy_ linear : Elapsed 0.061 ms (6.098 ms / 100) 6.337 -> 6.285 ( -0.82%) [ +0.00% +0.79% +0.03% / -0.62% -0.60% -0.82%] index_add_ reverse : Elapsed 0.063 ms (6.337 ms / 100) 6.109 -> 6.063 ( -0.75%) [ +0.08% +0.74% +0.00% / -0.44% -0.51% -0.75%] index_copy_ reverse : Elapsed 0.061 ms (6.114 ms / 100) 6.577 -> 6.564 ( -0.20%) [ +0.00% +0.09% +0.44% / -0.20% +0.03% +0.32%] index_add_ spread : Elapsed 0.066 ms (6.577 ms / 100) 6.456 -> 6.417 ( -0.60%) [ +0.02% +0.15% +0.00% / -0.59% -0.60% -0.42%] index_copy_ spread : Elapsed 0.065 ms (6.457 ms / 100) 6.856 -> 6.859 ( +0.04%) [ +0.25% +0.00% +0.55% / +0.04% +0.51% +0.50%] index_add_ strided 3 : Elapsed 0.069 ms (6.873 ms / 100) 6.639 -> 6.620 ( -0.29%) [ +0.00% +0.27% +0.45% / -0.14% -0.17% -0.29%] index_copy_ strided 3 : Elapsed 0.066 ms (6.639 ms / 100) 6.927 -> 6.879 ( -0.69%) [ +0.00% +0.00% +0.26% / -0.61% -0.69% -0.59%] index_add_ strided 7 : Elapsed 0.069 ms (6.927 ms / 100) 6.626 -> 6.627 ( +0.02%) [ +0.00% +0.32% +0.47% / +0.15% +0.02% +0.03%] index_copy_ strided 7 : Elapsed 0.066 ms (6.626 ms / 100) 6.830 -> 6.780 ( -0.73%) [ +0.44% +0.00% +0.38% / -0.31% -0.72% -0.73%] index_add_ perm : Elapsed 0.069 ms (6.860 ms / 100) 6.533 -> 6.477 ( -0.86%) [ +0.32% +0.35% +0.00% / -0.31% -0.69% -0.86%] index_copy_ perm : Elapsed 0.066 ms (6.554 ms / 100) 6.559 -> 6.518 ( -0.63%) [ +0.50% +0.08% +0.00% / -0.29% -0.37% -0.63%] index_add_ perm_sorted : Elapsed 0.066 ms (6.592 ms / 100) 6.387 -> 6.344 ( -0.67%) [ +0.00% +0.08% +0.09% / -0.53% -0.67% -0.42%] index_copy_ perm_sorted : Elapsed 0.064 ms (6.387 ms / 100) 6.237 -> 6.236 ( -0.02%) [ +0.30% +0.00% +0.06% / -0.02% +1.07% +1.25%] index_select const : Elapsed 0.063 ms (6.256 ms / 100) 7.007 -> 7.017 ( +0.14%) [ +0.11% +0.00% +0.23% / +0.14% +0.90% +0.70%] index_select wrap : Elapsed 0.070 ms (7.015 ms / 100) 6.810 -> 6.789 ( -0.31%) [ +0.00% +0.01% +0.06% / +0.18% +0.01% -0.31%] index_select linear : Elapsed 0.068 ms (6.810 ms / 100) 6.851 -> 6.852 ( +0.01%) [ +0.03% +0.18% +0.00% / +0.01% +0.34% +0.48%] index_select reverse : Elapsed 0.069 ms (6.853 ms / 100) 6.449 -> 6.455 ( +0.09%) [ +0.00% +0.06% +0.08% / +0.09% +0.65% +0.39%] index_select skip64 : Elapsed 0.064 ms (6.449 ms / 100) 6.222 -> 6.236 ( +0.23%) [ +0.26% +0.00% +0.14% / +0.23% +0.79% +0.90%] index_select skip256 : Elapsed 0.062 ms (6.238 ms / 100) 6.843 -> 6.833 ( -0.15%) [ +0.07% +0.00% +0.38% / +0.13% +0.34% -0.15%] index_select spread : Elapsed 0.068 ms (6.848 ms / 100) 7.254 -> 7.259 ( +0.07%) [ +0.00% +0.07% +0.00% / +0.07% +0.10% +0.23%] index_select strided 3 : Elapsed 0.073 ms (7.254 ms / 100) 7.231 -> 7.239 ( +0.11%) [ +0.00% +0.06% +0.11% / +0.32% +0.24% +0.11%] index_select strided 5 : Elapsed 0.072 ms (7.231 ms / 100) 7.226 -> 7.227 ( +0.01%) [ +0.15% +0.00% +0.21% / +0.11% +0.01% +0.33%] index_select strided 7 : Elapsed 0.072 ms (7.237 ms / 100) 7.218 -> 7.223 ( +0.07%) [ +0.00% +0.10% +0.14% / +0.07% +0.39% +0.65%] index_select strided 8 : Elapsed 0.072 ms (7.218 ms / 100) 7.227 -> 7.226 ( -0.01%) [ +0.29% +0.06% +0.00% / +0.04% -0.01% +0.12%] index_select strided 16 : Elapsed 0.072 ms (7.248 ms / 100) 7.203 -> 7.214 ( +0.15%) [ +0.32% +0.17% +0.00% / +0.28% +0.28% +0.15%] index_select strided 64 : Elapsed 0.072 ms (7.226 ms / 100) 6.606 -> 6.623 ( +0.26%) [ +0.20% +0.14% +0.00% / +0.26% +0.76% +0.38%] index_select strided 100 : Elapsed 0.066 ms (6.619 ms / 100) 7.256 -> 7.257 ( +0.01%) [ +0.00% +0.14% +0.04% / +0.25% +0.06% +0.01%] index_select random : Elapsed 0.073 ms (7.256 ms / 100) 6.860 -> 6.859 ( -0.01%) [ +0.10% +0.20% +0.00% / -0.01% +0.15% +0.09%] index_select random_sorted : Elapsed 0.069 ms (6.867 ms / 100) B = [50, 250, 15] (stride (3750, 1, 250)) A = [50, 150, 15] (stride (2250, 1, 150)) dim = 1 Good 6.841 -> 5.885 (-13.97%) [ +0.00% +0.15% +0.45% / -13.97% -13.13% -13.07%] index_add_ linear : Elapsed 0.068 ms (6.841 ms / 100) good 6.256 -> 5.668 ( -9.40%) [ +0.00% +0.32% +0.14% / -9.40% -8.60% -8.79%] index_copy_ linear : Elapsed 0.063 ms (6.256 ms / 100) Good 6.877 -> 5.917 (-13.96%) [ +0.00% +0.42% +0.44% / -13.96% -13.87% -13.86%] index_add_ reverse : Elapsed 0.069 ms (6.877 ms / 100) good 6.310 -> 5.687 ( -9.87%) [ +0.08% +0.22% +0.00% / -9.78% -9.87% -9.54%] index_copy_ reverse : Elapsed 0.063 ms (6.315 ms / 100) Good 7.297 -> 6.137 (-15.90%) [ +0.37% +0.00% +0.40% / -15.90% -14.92% -14.90%] index_add_ spread : Elapsed 0.073 ms (7.324 ms / 100) Good 6.815 -> 6.086 (-10.70%) [ +0.10% +0.00% +0.19% / -10.70% -9.74% -9.80%] index_copy_ spread : Elapsed 0.068 ms (6.822 ms / 100) GOOD 7.895 -> 6.205 (-21.41%) [ +0.03% +0.28% +0.00% / -21.41% -21.05% -21.14%] index_add_ strided 3 : Elapsed 0.079 ms (7.897 ms / 100) Good 7.272 -> 6.130 (-15.70%) [ +0.43% +0.00% +0.36% / -15.70% -15.04% -15.40%] index_copy_ strided 3 : Elapsed 0.073 ms (7.303 ms / 100) GOOD 8.156 -> 6.218 (-23.76%) [ +0.39% +0.02% +0.00% / -23.76% -23.61% -23.63%] index_add_ strided 7 : Elapsed 0.082 ms (8.188 ms / 100) Good 7.429 -> 6.171 (-16.93%) [ +0.13% +0.00% +0.07% / -16.93% -16.88% -16.93%] index_copy_ strided 7 : Elapsed 0.074 ms (7.439 ms / 100) GOOD 8.196 -> 6.271 (-23.49%) [ +0.00% +0.00% +0.06% / -23.47% -23.49% -23.28%] index_add_ perm : Elapsed 0.082 ms (8.196 ms / 100) Good 7.342 -> 6.174 (-15.91%) [ +0.14% +0.00% +0.33% / -15.91% -15.65% -15.76%] index_copy_ perm : Elapsed 0.074 ms (7.352 ms / 100) Good 7.319 -> 6.140 (-16.11%) [ +0.00% +0.29% +0.10% / -16.11% -15.52% -15.62%] index_add_ perm_sorted : Elapsed 0.073 ms (7.319 ms / 100) Good 6.818 -> 6.090 (-10.68%) [ +0.00% +0.40% +0.12% / -10.68% -10.38% -9.97%] index_copy_ perm_sorted : Elapsed 0.068 ms (6.818 ms / 100) Good 6.696 -> 6.014 (-10.19%) [ +0.01% +0.07% +0.00% / -10.07% -10.16% -10.19%] index_select const : Elapsed 0.067 ms (6.697 ms / 100) Good 7.360 -> 6.243 (-15.18%) [ +0.27% +0.24% +0.00% / -15.18% -14.71% -14.52%] index_select wrap : Elapsed 0.074 ms (7.380 ms / 100) Good 7.184 -> 6.205 (-13.63%) [ +0.00% +0.32% +0.03% / -13.49% -13.63% -13.28%] index_select linear : Elapsed 0.072 ms (7.184 ms / 100) Good 7.126 -> 6.227 (-12.62%) [ +0.10% +0.00% +0.18% / -12.41% -12.50% -12.62%] index_select reverse : Elapsed 0.071 ms (7.133 ms / 100) Good 6.806 -> 6.061 (-10.95%) [ +0.03% +0.12% +0.00% / -10.78% -10.95% -10.65%] index_select skip64 : Elapsed 0.068 ms (6.808 ms / 100) Good 6.662 -> 5.969 (-10.40%) [ +0.38% +0.36% +0.00% / -9.95% -10.40% -10.39%] index_select skip256 : Elapsed 0.067 ms (6.687 ms / 100) Good 7.130 -> 6.239 (-12.50%) [ +0.01% +0.04% +0.00% / -12.19% -12.37% -12.50%] index_select spread : Elapsed 0.071 ms (7.131 ms / 100) Good 7.690 -> 6.265 (-18.53%) [ +0.21% +0.00% +0.18% / -18.53% -18.39% -18.06%] index_select strided 3 : Elapsed 0.077 ms (7.706 ms / 100) Good 7.709 -> 6.271 (-18.65%) [ +0.03% +0.00% +0.04% / -18.36% -18.65% -18.52%] index_select strided 5 : Elapsed 0.077 ms (7.711 ms / 100) Good 7.680 -> 6.266 (-18.41%) [ +0.00% +0.17% +0.07% / -18.41% -17.89% -18.07%] index_select strided 7 : Elapsed 0.077 ms (7.680 ms / 100) Good 7.663 -> 6.279 (-18.06%) [ +0.26% +0.00% +0.20% / -18.06% -17.90% -17.90%] index_select strided 8 : Elapsed 0.077 ms (7.683 ms / 100) Good 7.663 -> 6.281 (-18.03%) [ +0.00% +0.16% +0.18% / -18.00% -17.98% -18.03%] index_select strided 16 : Elapsed 0.077 ms (7.663 ms / 100) Good 7.656 -> 6.259 (-18.25%) [ +0.14% +0.00% +0.14% / -17.83% -17.97% -18.25%] index_select strided 64 : Elapsed 0.077 ms (7.667 ms / 100) Good 7.015 -> 6.104 (-12.99%) [ +0.00% +0.10% +0.16% / -12.60% -12.92% -12.99%] index_select strided 100 : Elapsed 0.070 ms (7.015 ms / 100) Good 7.677 -> 6.254 (-18.54%) [ +0.07% +0.14% +0.00% / -18.54% -17.64% -17.88%] index_select random : Elapsed 0.077 ms (7.682 ms / 100) Good 7.091 -> 6.235 (-12.07%) [ +0.00% +0.28% +0.07% / -11.73% -12.07% -11.87%] index_select random_sorted : Elapsed 0.071 ms (7.091 ms / 100) B = [50, 250, 15] (stride (15, 750, 1)) A = [50, 150, 15] (stride (2250, 15, 1)) dim = 1 6.031 -> 6.010 ( -0.35%) [ +0.36% +0.00% +0.13% / -0.35% -0.02% +0.02%] index_add_ linear : Elapsed 0.061 ms (6.053 ms / 100) 5.823 -> 5.800 ( -0.39%) [ +0.39% +0.00% +0.03% / -0.39% +0.43% +0.07%] index_copy_ linear : Elapsed 0.058 ms (5.846 ms / 100) 6.077 -> 6.047 ( -0.49%) [ +0.33% +0.00% +0.44% / -0.26% -0.49% -0.02%] index_add_ reverse : Elapsed 0.061 ms (6.097 ms / 100) 5.859 -> 5.817 ( -0.72%) [ +0.34% +0.00% +0.49% / -0.26% -0.72% -0.41%] index_copy_ reverse : Elapsed 0.059 ms (5.879 ms / 100) 6.032 -> 6.022 ( -0.17%) [ +0.23% +0.41% +0.00% / -0.17% +0.83% +0.83%] index_add_ spread : Elapsed 0.060 ms (6.046 ms / 100) 5.814 -> 5.797 ( -0.29%) [ +0.22% +0.67% +0.00% / -0.29% +0.64% +0.96%] index_copy_ spread : Elapsed 0.058 ms (5.827 ms / 100) 6.063 -> 6.024 ( -0.64%) [ +0.00% +0.51% +0.16% / -0.64% +0.26% +0.23%] index_add_ strided 3 : Elapsed 0.061 ms (6.063 ms / 100) 5.841 -> 5.813 ( -0.48%) [ +0.00% +0.14% +0.34% / -0.48% +0.27% +0.33%] index_copy_ strided 3 : Elapsed 0.058 ms (5.841 ms / 100) 6.060 -> 6.032 ( -0.46%) [ +0.05% +0.15% +0.00% / -0.46% +0.38% +0.38%] index_add_ strided 7 : Elapsed 0.061 ms (6.063 ms / 100) 5.843 -> 5.818 ( -0.43%) [ +0.07% +0.02% +0.00% / -0.43% +0.29% +0.27%] index_copy_ strided 7 : Elapsed 0.058 ms (5.847 ms / 100) 6.080 -> 6.072 ( -0.13%) [ +0.39% +0.41% +0.00% / -0.13% +0.35% +0.56%] index_add_ perm : Elapsed 0.061 ms (6.104 ms / 100) 5.844 -> 5.816 ( -0.48%) [ +0.14% +0.19% +0.00% / -0.48% +0.44% +0.50%] index_copy_ perm : Elapsed 0.059 ms (5.852 ms / 100) 6.086 -> 6.055 ( -0.51%) [ +0.02% +0.00% +0.03% / -0.51% +1.08% +1.53%] index_add_ perm_sorted : Elapsed 0.061 ms (6.087 ms / 100) 5.846 -> 5.824 ( -0.38%) [ +0.00% +0.14% +0.07% / -0.38% +1.20% +1.08%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.846 ms / 100) 5.921 -> 5.929 ( +0.14%) [ +0.24% +0.05% +0.00% / +0.14% +0.66% +0.76%] index_select const : Elapsed 0.059 ms (5.935 ms / 100) 6.550 -> 6.592 ( +0.64%) [ +0.79% +0.00% +0.35% / +0.90% +0.85% +0.64%] index_select wrap : Elapsed 0.066 ms (6.602 ms / 100) 6.400 -> 6.391 ( -0.14%) [ +0.06% +0.02% +0.00% / +0.08% -0.06% -0.14%] index_select linear : Elapsed 0.064 ms (6.404 ms / 100) 6.421 -> 6.411 ( -0.16%) [ +0.20% +0.00% +0.17% / +0.17% -0.16% -0.11%] index_select reverse : Elapsed 0.064 ms (6.434 ms / 100) 5.957 -> 5.941 ( -0.27%) [ +0.13% +0.00% +0.34% / +0.22% -0.15% -0.27%] index_select skip64 : Elapsed 0.060 ms (5.965 ms / 100) 5.916 -> 5.904 ( -0.20%) [ +0.12% +0.42% +0.00% / +0.20% +0.07% -0.20%] index_select skip256 : Elapsed 0.059 ms (5.923 ms / 100) 6.410 -> 6.391 ( -0.30%) [ +0.22% +0.00% +0.12% / -0.08% -0.30% -0.14%] index_select spread : Elapsed 0.064 ms (6.424 ms / 100) 6.575 -> 6.598 ( +0.35%) [ +0.46% +0.15% +0.00% / +0.35% +0.62% +0.71%] index_select strided 3 : Elapsed 0.066 ms (6.605 ms / 100) 6.391 -> 6.386 ( -0.08%) [ +0.20% +0.02% +0.00% / -0.08% +0.30% +0.13%] index_select strided 5 : Elapsed 0.064 ms (6.404 ms / 100) 6.749 -> 6.710 ( -0.58%) [ +0.00% +0.12% +0.09% / +0.15% -0.33% -0.58%] index_select strided 7 : Elapsed 0.067 ms (6.749 ms / 100) 6.656 -> 6.656 ( +0.00%) [ +0.18% +0.26% +0.00% / +0.00% +0.60% +0.57%] index_select strided 8 : Elapsed 0.067 ms (6.668 ms / 100) 6.657 -> 6.664 ( +0.11%) [ +0.00% +0.14% +0.12% / +0.14% +0.15% +0.11%] index_select strided 16 : Elapsed 0.067 ms (6.657 ms / 100) 6.663 -> 6.645 ( -0.27%) [ +0.00% +0.09% +0.15% / -0.27% +0.09% +0.14%] index_select strided 64 : Elapsed 0.067 ms (6.663 ms / 100) 5.939 -> 5.936 ( -0.05%) [ +0.40% +0.00% +0.20% / -0.05% +0.45% +0.61%] index_select strided 100 : Elapsed 0.060 ms (5.963 ms / 100) 6.631 -> 6.609 ( -0.33%) [ +0.00% +0.03% +0.09% / +0.12% -0.26% -0.33%] index_select random : Elapsed 0.066 ms (6.631 ms / 100) 6.332 -> 6.338 ( +0.09%) [ +0.17% +0.00% +0.09% / +0.09% +0.66% +0.74%] index_select random_sorted : Elapsed 0.063 ms (6.343 ms / 100) B = [50, 250, 15] (stride (1, 750, 50)) A = [50, 150, 15] (stride (2250, 15, 1)) dim = 1 6.198 -> 6.171 ( -0.44%) [ +0.00% +0.06% +0.29% / -0.44% +0.10% +0.18%] index_add_ linear : Elapsed 0.062 ms (6.198 ms / 100) 5.890 -> 5.854 ( -0.61%) [ +0.12% +0.00% +0.36% / -0.61% -0.22% -0.20%] index_copy_ linear : Elapsed 0.059 ms (5.897 ms / 100) 6.214 -> 6.171 ( -0.69%) [ +0.19% +0.35% +0.00% / -0.43% -0.69% -0.39%] index_add_ reverse : Elapsed 0.062 ms (6.226 ms / 100) 5.888 -> 5.852 ( -0.61%) [ +0.10% +0.00% +0.03% / -0.44% -0.61% -0.44%] index_copy_ reverse : Elapsed 0.059 ms (5.894 ms / 100) 6.218 -> 6.183 ( -0.56%) [ +0.14% +0.00% +0.18% / -0.56% +0.66% +0.71%] index_add_ spread : Elapsed 0.062 ms (6.227 ms / 100) 5.886 -> 5.865 ( -0.36%) [ +0.31% +0.02% +0.00% / -0.36% +0.15% +0.34%] index_copy_ spread : Elapsed 0.059 ms (5.904 ms / 100) 6.189 -> 6.196 ( +0.11%) [ +0.13% +0.00% +0.13% / +0.11% +0.78% +0.84%] index_add_ strided 3 : Elapsed 0.062 ms (6.197 ms / 100) 5.852 -> 5.833 ( -0.32%) [ +0.00% +0.07% +0.09% / -0.32% +0.39% +0.07%] index_copy_ strided 3 : Elapsed 0.059 ms (5.852 ms / 100) 6.274 -> 6.252 ( -0.35%) [ +0.13% +0.00% +0.11% / -0.35% -0.29% -0.26%] index_add_ strided 7 : Elapsed 0.063 ms (6.282 ms / 100) 5.905 -> 5.869 ( -0.61%) [ +0.00% +0.07% +0.07% / -0.61% -0.17% -0.41%] index_copy_ strided 7 : Elapsed 0.059 ms (5.905 ms / 100) 6.405 -> 6.253 ( -2.37%) [ +0.05% +0.08% +0.00% / -0.59% -2.08% -2.37%] index_add_ perm : Elapsed 0.064 ms (6.408 ms / 100) 5.937 -> 5.864 ( -1.23%) [ +0.00% +0.45% +0.22% / -0.32% -1.23% -1.23%] index_copy_ perm : Elapsed 0.059 ms (5.937 ms / 100) 6.332 -> 6.168 ( -2.59%) [ +0.00% +0.27% +0.38% / -0.32% -2.59% -2.50%] index_add_ perm_sorted : Elapsed 0.063 ms (6.332 ms / 100) 5.917 -> 5.853 ( -1.08%) [ +0.00% +0.54% +0.44% / -0.52% -1.08% -0.93%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.917 ms / 100) 6.274 -> 6.278 ( +0.06%) [ +0.14% +0.14% +0.00% / +0.06% +0.49% +0.29%] index_select const : Elapsed 0.063 ms (6.283 ms / 100) 6.736 -> 6.700 ( -0.53%) [ +0.00% +0.31% +0.10% / +0.25% -0.53% -0.36%] index_select wrap : Elapsed 0.067 ms (6.736 ms / 100) 6.591 -> 6.597 ( +0.09%) [ +0.64% +0.00% +0.03% / +0.64% +0.12% +0.09%] index_select linear : Elapsed 0.066 ms (6.633 ms / 100) 6.543 -> 6.537 ( -0.09%) [ +0.34% +0.08% +0.00% / -0.09% +1.16% +1.13%] index_select reverse : Elapsed 0.066 ms (6.565 ms / 100) 6.221 -> 6.233 ( +0.19%) [ +0.10% +0.02% +0.00% / +0.19% +0.21% +0.34%] index_select skip64 : Elapsed 0.062 ms (6.227 ms / 100) 6.232 -> 6.231 ( -0.02%) [ +0.08% +0.00% +0.11% / +0.19% +0.08% -0.02%] index_select skip256 : Elapsed 0.062 ms (6.237 ms / 100) 6.523 -> 6.540 ( +0.26%) [ +0.08% +0.00% +0.09% / +0.34% +0.40% +0.26%] index_select spread : Elapsed 0.065 ms (6.528 ms / 100) 6.863 -> 6.874 ( +0.16%) [ +0.26% +0.00% +0.26% / +0.16% +0.60% +0.85%] index_select strided 3 : Elapsed 0.069 ms (6.881 ms / 100) 6.695 -> 6.705 ( +0.15%) [ +0.00% +0.30% +0.31% / +0.15% +0.63% +0.25%] index_select strided 5 : Elapsed 0.067 ms (6.695 ms / 100) 6.963 -> 6.984 ( +0.30%) [ +0.00% +0.20% +0.06% / +0.30% +0.79% +0.75%] index_select strided 7 : Elapsed 0.070 ms (6.963 ms / 100) 6.899 -> 6.911 ( +0.17%) [ +0.39% +0.00% +0.36% / +0.17% +0.42% +0.41%] index_select strided 8 : Elapsed 0.069 ms (6.926 ms / 100) 6.930 -> 6.922 ( -0.12%) [ +0.00% +0.12% +0.03% / -0.12% -0.04% -0.01%] index_select strided 16 : Elapsed 0.069 ms (6.930 ms / 100) 6.883 -> 6.898 ( +0.22%) [ +0.23% +0.22% +0.00% / +0.22% +0.49% +0.67%] index_select strided 64 : Elapsed 0.069 ms (6.899 ms / 100) 6.260 -> 6.260 ( +0.00%) [ +0.11% +0.00% +0.18% / +0.16% +0.13% +0.00%] index_select strided 100 : Elapsed 0.063 ms (6.267 ms / 100) 6.814 -> 6.796 ( -0.26%) [ +0.07% +0.13% +0.00% / -0.26% +0.78% +0.81%] index_select random : Elapsed 0.068 ms (6.819 ms / 100) 6.477 -> 6.483 ( +0.09%) [ +0.12% +0.00% +0.00% / +0.09% +0.40% +0.83%] index_select random_sorted : Elapsed 0.065 ms (6.485 ms / 100) B = [50, 250, 15] (stride (250, 1, 12500)) A = [50, 150, 15] (stride (2250, 15, 1)) dim = 1 good 6.720 -> 6.082 ( -9.49%) [ +0.15% +0.00% +0.01% / -9.08% -9.49% -9.45%] index_add_ linear : Elapsed 0.067 ms (6.730 ms / 100) 6.038 -> 5.908 ( -2.15%) [ +0.18% +0.00% +0.03% / -1.85% -2.07% -2.15%] index_copy_ linear : Elapsed 0.060 ms (6.049 ms / 100) Good 6.770 -> 6.089 (-10.06%) [ +0.00% +0.15% +0.06% / -9.94% -10.06% -9.99%] index_add_ reverse : Elapsed 0.068 ms (6.770 ms / 100) 6.076 -> 5.905 ( -2.81%) [ +0.21% +0.79% +0.00% / -2.50% -2.50% -2.81%] index_copy_ reverse : Elapsed 0.061 ms (6.089 ms / 100) Good 7.140 -> 6.335 (-11.27%) [ +0.00% +0.38% +0.14% / -11.27% -10.99% -11.04%] index_add_ spread : Elapsed 0.071 ms (7.140 ms / 100) 6.555 -> 6.337 ( -3.33%) [ +0.29% +0.00% +0.66% / -3.33% -3.13% -3.20%] index_copy_ spread : Elapsed 0.066 ms (6.574 ms / 100) Good 7.650 -> 6.390 (-16.47%) [ +0.20% +0.00% +0.42% / -16.34% -16.47% -16.38%] index_add_ strided 3 : Elapsed 0.077 ms (7.665 ms / 100) good 7.019 -> 6.377 ( -9.15%) [ +0.27% +0.00% +0.13% / -8.98% -8.90% -9.15%] index_copy_ strided 3 : Elapsed 0.070 ms (7.038 ms / 100) Good 7.708 -> 6.415 (-16.77%) [ +0.51% +0.65% +0.00% / -16.77% -16.42% -16.20%] index_add_ strided 7 : Elapsed 0.077 ms (7.747 ms / 100) good 7.054 -> 6.387 ( -9.46%) [ +0.13% +0.18% +0.00% / -9.46% -8.79% -8.58%] index_copy_ strided 7 : Elapsed 0.071 ms (7.063 ms / 100) Good 7.823 -> 6.486 (-17.09%) [ +0.13% +0.13% +0.00% / -17.03% -17.07% -17.09%] index_add_ perm : Elapsed 0.078 ms (7.833 ms / 100) good 7.018 -> 6.389 ( -8.96%) [ +0.00% +0.26% +0.10% / -8.96% -8.85% -8.52%] index_copy_ perm : Elapsed 0.070 ms (7.018 ms / 100) Good 7.160 -> 6.350 (-11.31%) [ +0.17% +0.00% +0.11% / -11.28% -11.31% -11.16%] index_add_ perm_sorted : Elapsed 0.072 ms (7.172 ms / 100) 6.549 -> 6.336 ( -3.25%) [ +0.99% +0.00% +0.35% / -3.01% -3.25% -3.19%] index_copy_ perm_sorted : Elapsed 0.066 ms (6.614 ms / 100) good 6.355 -> 5.967 ( -6.11%) [ +0.27% +0.03% +0.00% / -5.95% -5.92% -6.11%] index_select const : Elapsed 0.064 ms (6.372 ms / 100) good 7.003 -> 6.416 ( -8.38%) [ +0.00% +0.24% +0.27% / -8.27% -8.35% -8.38%] index_select wrap : Elapsed 0.070 ms (7.003 ms / 100) 6.759 -> 6.425 ( -4.94%) [ +0.12% +0.00% +0.07% / -4.14% -4.93% -4.94%] index_select linear : Elapsed 0.068 ms (6.767 ms / 100) good 6.778 -> 6.436 ( -5.05%) [ +0.32% +0.06% +0.00% / -4.84% -4.82% -5.05%] index_select reverse : Elapsed 0.068 ms (6.800 ms / 100) good 6.383 -> 5.991 ( -6.14%) [ +0.00% +0.11% +0.11% / -6.06% -6.14% -6.00%] index_select skip64 : Elapsed 0.064 ms (6.383 ms / 100) good 6.341 -> 5.956 ( -6.07%) [ +0.24% +0.00% +0.35% / -5.95% -6.07% -5.90%] index_select skip256 : Elapsed 0.064 ms (6.356 ms / 100) 6.752 -> 6.420 ( -4.92%) [ +0.00% +0.13% +0.10% / -4.10% -4.92% -4.65%] index_select spread : Elapsed 0.068 ms (6.752 ms / 100) Good 7.123 -> 6.315 (-11.34%) [ +0.01% +0.22% +0.00% / -11.34% -10.21% -9.98%] index_select strided 3 : Elapsed 0.071 ms (7.124 ms / 100) Good 6.913 -> 6.216 (-10.08%) [ +0.00% +0.20% +0.09% / -10.08% -9.72% -9.65%] index_select strided 5 : Elapsed 0.069 ms (6.913 ms / 100) Good 7.270 -> 6.390 (-12.10%) [ +0.23% +0.07% +0.00% / -11.25% -12.04% -12.10%] index_select strided 7 : Elapsed 0.073 ms (7.287 ms / 100) Good 7.194 -> 6.391 (-11.16%) [ +0.00% +0.11% +0.15% / -11.16% -10.45% -10.58%] index_select strided 8 : Elapsed 0.072 ms (7.194 ms / 100) Good 7.200 -> 6.352 (-11.78%) [ +0.12% +0.08% +0.00% / -11.78% -11.43% -11.32%] index_select strided 16 : Elapsed 0.072 ms (7.209 ms / 100) Good 7.170 -> 6.355 (-11.37%) [ +0.18% +0.00% +0.03% / -11.13% -11.14% -11.37%] index_select strided 64 : Elapsed 0.072 ms (7.183 ms / 100) good 6.371 -> 5.997 ( -5.87%) [ +0.09% +0.00% +0.28% / -5.60% -5.68% -5.87%] index_select strided 100 : Elapsed 0.064 ms (6.377 ms / 100) good 7.096 -> 6.391 ( -9.94%) [ +0.59% +0.04% +0.00% / -9.12% -9.94% -9.85%] index_select random : Elapsed 0.071 ms (7.138 ms / 100) 6.691 -> 6.401 ( -4.33%) [ +0.00% +0.10% +0.03% / -4.33% -3.83% -3.84%] index_select random_sorted : Elapsed 0.067 ms (6.691 ms / 100) B = [50, 250, 15] (stride (1, 50, 12500)) A = [50, 150, 15] (stride (1, 50, 7500)) dim = 1 6.599 -> 6.592 ( -0.11%) [ +0.06% +0.00% +0.00% / -0.11% +0.39% +0.11%] index_add_ linear : Elapsed 0.066 ms (6.603 ms / 100) 5.947 -> 5.933 ( -0.24%) [ +0.00% +0.10% +0.27% / -0.24% +0.99% +0.91%] index_copy_ linear : Elapsed 0.059 ms (5.947 ms / 100) 6.599 -> 6.603 ( +0.06%) [ +0.00% +0.08% +0.03% / +0.06% +0.44% +0.35%] index_add_ reverse : Elapsed 0.066 ms (6.599 ms / 100) 5.964 -> 5.949 ( -0.25%) [ +0.00% +0.44% +0.39% / -0.25% +0.27% +0.10%] index_copy_ reverse : Elapsed 0.060 ms (5.964 ms / 100) 6.673 -> 6.663 ( -0.15%) [ +0.00% +0.03% +0.34% / -0.15% +0.61% +0.69%] index_add_ spread : Elapsed 0.067 ms (6.673 ms / 100) 6.074 -> 6.045 ( -0.48%) [ +0.15% +0.00% +0.61% / -0.48% +0.26% +0.36%] index_copy_ spread : Elapsed 0.061 ms (6.083 ms / 100) 6.694 -> 6.693 ( -0.01%) [ +0.45% +0.00% +0.33% / -0.01% +0.25% +0.43%] index_add_ strided 3 : Elapsed 0.067 ms (6.724 ms / 100) 6.113 -> 6.089 ( -0.39%) [ +0.21% +0.00% +0.18% / -0.39% -0.25% -0.28%] index_copy_ strided 3 : Elapsed 0.061 ms (6.126 ms / 100) 6.775 -> 6.779 ( +0.06%) [ +0.31% +0.00% +0.07% / +0.06% +0.22% +0.61%] index_add_ strided 7 : Elapsed 0.068 ms (6.796 ms / 100) 6.170 -> 6.131 ( -0.63%) [ +0.41% +0.19% +0.00% / -0.50% -0.47% -0.63%] index_copy_ strided 7 : Elapsed 0.062 ms (6.195 ms / 100) 6.772 -> 6.696 ( -1.12%) [ +0.00% +0.21% +0.31% / +0.03% -0.95% -1.12%] index_add_ perm : Elapsed 0.068 ms (6.772 ms / 100) 6.102 -> 6.060 ( -0.69%) [ +0.00% +0.52% +0.28% / -0.69% -0.25% -0.59%] index_copy_ perm : Elapsed 0.061 ms (6.102 ms / 100) 6.644 -> 6.607 ( -0.56%) [ +0.17% +0.00% +0.21% / -0.05% -0.56% -0.33%] index_add_ perm_sorted : Elapsed 0.067 ms (6.655 ms / 100) 6.055 -> 6.016 ( -0.64%) [ +0.00% +0.00% +0.13% / -0.64% -0.50% -0.61%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.055 ms / 100) 6.607 -> 6.615 ( +0.12%) [ +0.06% +0.00% +0.02% / +0.12% +0.33% +0.35%] index_select const : Elapsed 0.066 ms (6.611 ms / 100) 6.861 -> 6.861 ( +0.00%) [ +0.13% +0.00% +0.13% / +0.00% +0.52% +0.76%] index_select wrap : Elapsed 0.069 ms (6.870 ms / 100) 6.859 -> 6.865 ( +0.09%) [ +0.07% +0.00% +0.06% / +0.09% +0.50% +0.74%] index_select linear : Elapsed 0.069 ms (6.864 ms / 100) 6.643 -> 6.662 ( +0.29%) [ +0.06% +0.18% +0.00% / +0.29% +0.71% +0.60%] index_select reverse : Elapsed 0.066 ms (6.647 ms / 100) 6.639 -> 6.636 ( -0.05%) [ +0.02% +0.09% +0.00% / -0.05% +0.27% +0.30%] index_select skip64 : Elapsed 0.066 ms (6.640 ms / 100) 6.603 -> 6.619 ( +0.24%) [ +0.00% +0.18% +0.18% / +0.24% +0.24% +0.36%] index_select skip256 : Elapsed 0.066 ms (6.603 ms / 100) 6.717 -> 6.717 ( +0.00%) [ +0.18% +0.00% +0.25% / +0.00% +0.34% +0.24%] index_select spread : Elapsed 0.067 ms (6.729 ms / 100) 6.857 -> 6.858 ( +0.01%) [ +0.00% +0.16% +0.18% / +0.01% +0.04% +0.06%] index_select strided 3 : Elapsed 0.069 ms (6.857 ms / 100) 6.702 -> 6.699 ( -0.04%) [ +0.22% +0.00% +0.03% / +0.16% +0.03% -0.04%] index_select strided 5 : Elapsed 0.067 ms (6.717 ms / 100) 7.062 -> 7.068 ( +0.08%) [ +0.00% +0.07% +0.13% / +0.08% +0.34% +0.28%] index_select strided 7 : Elapsed 0.071 ms (7.062 ms / 100) 6.967 -> 6.972 ( +0.07%) [ +0.22% +0.27% +0.00% / +0.23% +0.30% +0.07%] index_select strided 8 : Elapsed 0.070 ms (6.982 ms / 100) 6.989 -> 6.984 ( -0.07%) [ +0.04% +0.31% +0.00% / +0.00% +0.06% -0.07%] index_select strided 16 : Elapsed 0.070 ms (6.992 ms / 100) 6.953 -> 6.951 ( -0.03%) [ +0.04% +0.00% +0.12% / +0.03% +0.00% -0.03%] index_select strided 64 : Elapsed 0.070 ms (6.956 ms / 100) 6.636 -> 6.625 ( -0.17%) [ +0.11% +0.06% +0.00% / +0.14% -0.17% -0.17%] index_select strided 100 : Elapsed 0.066 ms (6.643 ms / 100) 6.844 -> 6.840 ( -0.06%) [ +0.00% +0.15% +0.03% / -0.06% +1.43% +1.33%] index_select random : Elapsed 0.068 ms (6.844 ms / 100) 6.693 -> 6.692 ( -0.01%) [ +0.00% +0.09% +0.10% / -0.01% +0.15% +0.33%] index_select random_sorted : Elapsed 0.067 ms (6.693 ms / 100) out_shape = [50, 150, 250] in_shape = [50, 150, 15] idx_dim = 2 B = [50, 150, 250] (stride (37500, 250, 1)) A = [50, 150, 15] (stride (1, 750, 50)) dim = 2 9.370 -> 9.391 ( +0.22%) [ +0.34% +0.03% +0.00% / +0.22% +4.90% +4.77%] index_add_ linear : Elapsed 0.094 ms (9.402 ms / 100) 7.937 -> 7.958 ( +0.26%) [ +0.28% +0.31% +0.00% / +0.26% +1.89% +1.83%] index_copy_ linear : Elapsed 0.080 ms (7.959 ms / 100) 9.403 -> 9.425 ( +0.23%) [ +0.29% +0.10% +0.00% / +0.23% +5.34% +5.31%] index_add_ reverse : Elapsed 0.094 ms (9.430 ms / 100) 7.889 -> 7.915 ( +0.33%) [ +0.48% +0.08% +0.00% / +0.33% +2.81% +2.42%] index_copy_ reverse : Elapsed 0.079 ms (7.927 ms / 100) 18.425 -> 18.424 ( -0.01%) [ +0.00% +0.17% +0.26% / +0.36% +0.17% -0.01%] index_add_ spread : Elapsed 0.184 ms (18.425 ms / 100) 12.954 -> 12.873 ( -0.63%) [ +0.06% +0.07% +0.00% / +0.20% -0.56% -0.63%] index_copy_ spread : Elapsed 0.130 ms (12.962 ms / 100) 11.129 -> 11.142 ( +0.12%) [ +0.22% +0.00% +0.00% / +0.16% +0.12% +0.15%] index_add_ strided 3 : Elapsed 0.112 ms (11.153 ms / 100) 8.399 -> 8.429 ( +0.36%) [ +0.43% +0.12% +0.00% / +0.36% +0.74% +0.75%] index_copy_ strided 3 : Elapsed 0.084 ms (8.435 ms / 100) 15.498 -> 15.486 ( -0.08%) [ +0.18% +0.08% +0.00% / +0.09% -0.08% +0.09%] index_add_ strided 7 : Elapsed 0.155 ms (15.526 ms / 100) 11.249 -> 11.052 ( -1.75%) [ +0.18% +0.05% +0.00% / -0.04% -1.64% -1.75%] index_copy_ strided 7 : Elapsed 0.113 ms (11.269 ms / 100) 20.208 -> 20.278 ( +0.35%) [ +0.18% +0.00% +0.20% / +0.35% +0.35% +0.40%] index_add_ perm : Elapsed 0.202 ms (20.245 ms / 100) 14.148 -> 14.131 ( -0.12%) [ +0.16% +0.00% +0.07% / +0.05% +0.11% -0.12%] index_copy_ perm : Elapsed 0.142 ms (14.171 ms / 100) 17.312 -> 17.336 ( +0.14%) [ +0.00% +0.11% +0.25% / +0.14% +0.58% +0.78%] index_add_ perm_sorted : Elapsed 0.173 ms (17.312 ms / 100) 12.355 -> 12.282 ( -0.59%) [ +0.00% +0.16% +0.00% / +0.03% -0.56% -0.59%] index_copy_ perm_sorted : Elapsed 0.124 ms (12.355 ms / 100) 117.948 -> 117.745 ( -0.17%) [ +0.24% +0.00% +0.33% / -0.17% +0.57% +0.66%] index_select const : Elapsed 1.182 ms (118.230 ms / 100) 140.346 -> 140.356 ( +0.01%) [ +0.28% +0.36% +0.00% / +0.01% +3.02% +3.14%] index_select wrap : Elapsed 1.407 ms (140.739 ms / 100) 116.753 -> 117.339 ( +0.50%) [ +0.13% +0.20% +0.00% / +0.50% +1.11% +1.34%] index_select linear : Elapsed 1.169 ms (116.902 ms / 100) 119.856 -> 121.221 ( +1.14%) [ +0.81% +1.07% +0.00% / +1.14% +1.53% +1.97%] index_select reverse : Elapsed 1.208 ms (120.828 ms / 100) 117.666 -> 117.952 ( +0.24%) [ +0.69% +0.47% +0.00% / +0.24% +0.87% +0.54%] index_select skip64 : Elapsed 1.185 ms (118.481 ms / 100) 118.172 -> 118.016 ( -0.13%) [ +0.00% +0.00% +0.13% / -0.06% -0.13% -0.05%] index_select skip256 : Elapsed 1.182 ms (118.172 ms / 100) 117.701 -> 118.116 ( +0.35%) [ +0.36% +0.69% +0.00% / +0.35% +0.68% +0.62%] index_select spread : Elapsed 1.181 ms (118.128 ms / 100) 127.566 -> 127.581 ( +0.01%) [ +0.31% +0.45% +0.00% / +0.01% +1.14% +1.51%] index_select strided 3 : Elapsed 1.280 ms (127.964 ms / 100) 121.419 -> 121.312 ( -0.09%) [ +0.42% +0.00% +0.25% / -0.09% +1.84% +0.84%] index_select strided 5 : Elapsed 1.219 ms (121.929 ms / 100) 140.790 -> 141.286 ( +0.35%) [ +0.22% +0.00% +0.17% / +0.35% +3.09% +2.84%] index_select strided 7 : Elapsed 1.411 ms (141.103 ms / 100) 140.888 -> 140.969 ( +0.06%) [ +0.49% +0.26% +0.00% / +0.06% +2.98% +2.19%] index_select strided 8 : Elapsed 1.416 ms (141.583 ms / 100) 136.354 -> 137.114 ( +0.56%) [ +0.00% +0.66% +0.46% / +0.56% +2.62% +3.10%] index_select random : Elapsed 1.364 ms (136.354 ms / 100) 118.132 -> 118.268 ( +0.12%) [ +0.00% +0.02% +0.56% / +0.12% +0.33% +0.41%] index_select random_sorted : Elapsed 1.181 ms (118.132 ms / 100) B = [50, 150, 250] (stride (37500, 1, 150)) A = [50, 150, 15] (stride (15, 750, 1)) dim = 2 3.468 -> 3.484 ( +0.46%) [ +0.32% +0.00% +0.12% / +0.46% +0.61% +0.81%] index_add_ linear : Elapsed 0.035 ms (3.479 ms / 100) 3.278 -> 3.288 ( +0.31%) [ +0.31% +0.21% +0.00% / +0.31% +0.58% +0.52%] index_copy_ linear : Elapsed 0.033 ms (3.288 ms / 100) 3.468 -> 3.484 ( +0.46%) [ +0.52% +0.17% +0.00% / +0.46% +0.92% +1.15%] index_add_ reverse : Elapsed 0.035 ms (3.486 ms / 100) 3.273 -> 3.286 ( +0.40%) [ +0.46% +0.12% +0.00% / +0.40% +0.86% +0.79%] index_copy_ reverse : Elapsed 0.033 ms (3.288 ms / 100) 3.473 -> 3.484 ( +0.32%) [ +0.20% +0.03% +0.00% / +0.43% +0.32% +0.75%] index_add_ spread : Elapsed 0.035 ms (3.480 ms / 100) 3.302 -> 3.318 ( +0.48%) [ +0.30% +0.39% +0.00% / +0.51% +0.73% +0.48%] index_copy_ spread : Elapsed 0.033 ms (3.312 ms / 100) 3.467 -> 3.476 ( +0.26%) [ +0.32% +0.00% +0.09% / +0.26% +0.98% +0.95%] index_add_ strided 3 : Elapsed 0.035 ms (3.478 ms / 100) 3.302 -> 3.316 ( +0.42%) [ +0.12% +0.00% +0.18% / +0.42% +0.70% +0.82%] index_copy_ strided 3 : Elapsed 0.033 ms (3.306 ms / 100) 3.466 -> 3.479 ( +0.38%) [ +0.29% +0.29% +0.00% / +0.38% +0.55% +0.61%] index_add_ strided 7 : Elapsed 0.035 ms (3.476 ms / 100) 3.304 -> 3.313 ( +0.27%) [ +0.36% +0.06% +0.00% / +0.27% +0.27% +0.48%] index_copy_ strided 7 : Elapsed 0.033 ms (3.316 ms / 100) 3.458 -> 3.471 ( +0.38%) [ +0.06% +0.00% +0.03% / +0.38% +1.50% +1.47%] index_add_ perm : Elapsed 0.035 ms (3.460 ms / 100) 3.285 -> 3.306 ( +0.64%) [ +0.64% +0.00% +0.40% / +0.64% +1.58% +1.61%] index_copy_ perm : Elapsed 0.033 ms (3.306 ms / 100) 3.478 -> 3.481 ( +0.09%) [ +0.26% +0.06% +0.00% / +0.09% +0.58% +0.63%] index_add_ perm_sorted : Elapsed 0.035 ms (3.487 ms / 100) 3.296 -> 3.313 ( +0.52%) [ +0.24% +0.00% +0.12% / +0.52% +0.73% +0.67%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.304 ms / 100) 28.654 -> 28.756 ( +0.36%) [ +0.00% +0.14% +0.28% / +0.36% +0.37% +0.36%] index_select const : Elapsed 0.287 ms (28.654 ms / 100) 29.176 -> 29.129 ( -0.16%) [ +0.08% +0.00% +0.01% / -0.16% +0.07% -0.08%] index_select wrap : Elapsed 0.292 ms (29.198 ms / 100) 28.725 -> 28.794 ( +0.24%) [ +0.26% +0.34% +0.00% / +0.45% +0.24% +0.27%] index_select linear : Elapsed 0.288 ms (28.800 ms / 100) 29.078 -> 29.070 ( -0.03%) [ +0.27% +0.00% +0.24% / -0.03% +0.05% +0.17%] index_select reverse : Elapsed 0.292 ms (29.157 ms / 100) 28.671 -> 28.702 ( +0.11%) [ +0.21% +0.00% +0.32% / +0.11% +0.39% +0.36%] index_select skip64 : Elapsed 0.287 ms (28.732 ms / 100) 28.656 -> 28.669 ( +0.05%) [ +0.00% +0.00% +0.06% / +0.05% +0.28% +0.26%] index_select skip256 : Elapsed 0.287 ms (28.656 ms / 100) 29.071 -> 29.082 ( +0.04%) [ +0.00% +0.13% +0.12% / +0.04% +0.58% +0.39%] index_select spread : Elapsed 0.291 ms (29.071 ms / 100) 29.095 -> 29.137 ( +0.14%) [ +0.00% +0.37% +0.26% / +0.14% +0.38% +0.27%] index_select strided 3 : Elapsed 0.291 ms (29.095 ms / 100) 29.111 -> 29.234 ( +0.42%) [ +0.20% +0.00% +0.34% / +0.52% +0.42% +0.53%] index_select strided 5 : Elapsed 0.292 ms (29.169 ms / 100) 29.184 -> 29.136 ( -0.16%) [ +0.06% +0.03% +0.00% / -0.16% +0.24% +0.25%] index_select strided 7 : Elapsed 0.292 ms (29.201 ms / 100) 29.219 -> 29.162 ( -0.20%) [ +0.11% +0.00% +0.12% / +0.26% -0.20% +0.01%] index_select strided 8 : Elapsed 0.293 ms (29.252 ms / 100) 29.099 -> 29.080 ( -0.07%) [ +0.44% +0.03% +0.00% / -0.07% +0.73% +0.32%] index_select random : Elapsed 0.292 ms (29.227 ms / 100) 29.073 -> 29.113 ( +0.14%) [ +0.02% +0.06% +0.00% / +0.14% +0.41% +0.40%] index_select random_sorted : Elapsed 0.291 ms (29.079 ms / 100) B = [50, 150, 250] (stride (1, 12500, 50)) A = [50, 150, 15] (stride (1, 750, 50)) dim = 2 4.337 -> 4.219 ( -2.72%) [ +0.09% +0.16% +0.00% / +0.37% -2.72% -2.44%] index_add_ linear : Elapsed 0.043 ms (4.341 ms / 100) 3.635 -> 3.589 ( -1.27%) [ +0.19% +0.00% +0.03% / +0.58% -1.27% -1.27%] index_copy_ linear : Elapsed 0.036 ms (3.642 ms / 100) 4.327 -> 4.300 ( -0.62%) [ +0.00% +0.07% +0.21% / +0.16% -0.51% -0.62%] index_add_ reverse : Elapsed 0.043 ms (4.327 ms / 100) 3.627 -> 3.628 ( +0.03%) [ +0.25% +0.00% +0.08% / +0.03% +0.58% +0.39%] index_copy_ reverse : Elapsed 0.036 ms (3.636 ms / 100) 4.304 -> 4.316 ( +0.28%) [ +0.23% +0.33% +0.00% / +0.28% +6.48% +6.55%] index_add_ spread : Elapsed 0.043 ms (4.314 ms / 100) 3.601 -> 3.603 ( +0.06%) [ +0.22% +0.00% +0.03% / +0.06% +2.53% +2.47%] index_copy_ spread : Elapsed 0.036 ms (3.609 ms / 100) 4.269 -> 4.281 ( +0.28%) [ +0.14% +0.33% +0.00% / +0.28% +5.97% +5.88%] index_add_ strided 3 : Elapsed 0.043 ms (4.275 ms / 100) 3.608 -> 3.614 ( +0.17%) [ +0.19% +0.17% +0.00% / +0.17% +2.13% +2.41%] index_copy_ strided 3 : Elapsed 0.036 ms (3.615 ms / 100) 4.383 -> 4.407 ( +0.55%) [ +0.00% +0.23% +0.05% / +0.55% +3.95% +3.76%] index_add_ strided 7 : Elapsed 0.044 ms (4.383 ms / 100) 3.638 -> 3.644 ( +0.16%) [ +0.16% +0.00% +0.05% / +0.16% +1.32% +1.15%] index_copy_ strided 7 : Elapsed 0.036 ms (3.644 ms / 100) 4.340 -> 4.350 ( +0.23%) [ +0.32% +0.32% +0.00% / +0.23% +0.71% +0.94%] index_add_ perm : Elapsed 0.044 ms (4.354 ms / 100) 3.625 -> 3.629 ( +0.11%) [ +0.33% +0.36% +0.00% / +0.11% +0.74% +1.05%] index_copy_ perm : Elapsed 0.036 ms (3.637 ms / 100) 4.339 -> 4.345 ( +0.14%) [ +0.00% +0.09% +0.05% / +0.14% +2.17% +1.82%] index_add_ perm_sorted : Elapsed 0.043 ms (4.339 ms / 100) 3.618 -> 3.615 ( -0.08%) [ +0.47% +0.00% +0.11% / -0.08% +1.58% +1.88%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.635 ms / 100) 38.260 -> 37.327 ( -2.44%) [ +0.73% +0.25% +0.00% / -2.44% +5.34% +4.26%] index_select const : Elapsed 0.385 ms (38.540 ms / 100) 56.673 -> 57.789 ( +1.97%) [ +0.70% +0.00% +0.82% / +1.97% +15.38% +15.18%] index_select wrap : Elapsed 0.571 ms (57.067 ms / 100) 35.555 -> 35.940 ( +1.08%) [ +0.00% +0.50% +1.14% / +1.08% +14.99% +12.92%] index_select linear : Elapsed 0.356 ms (35.555 ms / 100) 37.896 -> 38.653 ( +2.00%) [ +0.00% +1.94% +2.84% / +2.00% +17.85% +16.95%] index_select reverse : Elapsed 0.379 ms (37.896 ms / 100) 39.139 -> 40.235 ( +2.80%) [ +1.53% +2.00% +0.00% / +2.80% +3.05% +3.91%] index_select skip64 : Elapsed 0.397 ms (39.739 ms / 100) 38.993 -> 39.171 ( +0.46%) [ +0.00% +1.29% +2.07% / +0.46% +8.39% +9.02%] index_select skip256 : Elapsed 0.390 ms (38.993 ms / 100) 35.443 -> 35.839 ( +1.12%) [ +0.00% +1.02% +2.13% / +1.12% +12.34% +12.54%] index_select spread : Elapsed 0.354 ms (35.443 ms / 100) 60.798 -> 61.759 ( +1.58%) [ +0.00% +0.84% +0.78% / +1.58% +3.57% +5.88%] index_select strided 3 : Elapsed 0.608 ms (60.798 ms / 100) 52.218 -> 52.154 ( -0.12%) [ +1.98% +0.00% +1.42% / -0.12% +3.48% +4.17%] index_select strided 5 : Elapsed 0.533 ms (53.254 ms / 100) 62.385 -> 62.590 ( +0.33%) [ +2.42% +0.00% +2.13% / +0.33% +13.15% +15.04%] index_select strided 7 : Elapsed 0.639 ms (63.895 ms / 100) 58.177 -> 57.945 ( -0.40%) [ +0.81% +0.43% +0.00% / -0.40% +16.97% +17.07%] index_select strided 8 : Elapsed 0.586 ms (58.648 ms / 100) 61.708 -> 61.097 ( -0.99%) [ +0.78% +0.00% +2.44% / -0.99% +8.04% +9.00%] index_select random : Elapsed 0.622 ms (62.189 ms / 100) 34.135 -> 34.289 ( +0.45%) [ +1.35% +0.43% +0.00% / +0.45% +11.42% +11.11%] index_select random_sorted : Elapsed 0.346 ms (34.596 ms / 100) B = [50, 150, 250] (stride (150, 1, 7500)) A = [50, 150, 15] (stride (2250, 15, 1)) dim = 2 3.254 -> 3.250 ( -0.12%) [ +0.06% +0.00% +0.25% / +0.15% -0.12% -0.09%] index_add_ linear : Elapsed 0.033 ms (3.256 ms / 100) 3.059 -> 3.053 ( -0.20%) [ +0.39% +0.16% +0.00% / +0.49% -0.20% -0.16%] index_copy_ linear : Elapsed 0.031 ms (3.071 ms / 100) 3.257 -> 3.255 ( -0.06%) [ +0.15% +0.28% +0.00% / +0.34% +0.18% -0.06%] index_add_ reverse : Elapsed 0.033 ms (3.262 ms / 100) 3.061 -> 3.062 ( +0.03%) [ +0.52% +0.56% +0.00% / +0.52% +0.23% +0.03%] index_copy_ reverse : Elapsed 0.031 ms (3.077 ms / 100) 3.248 -> 3.256 ( +0.25%) [ +0.46% +0.12% +0.00% / +0.46% +0.31% +0.25%] index_add_ spread : Elapsed 0.033 ms (3.263 ms / 100) 3.066 -> 3.059 ( -0.23%) [ +0.36% +0.20% +0.00% / +0.36% -0.16% -0.23%] index_copy_ spread : Elapsed 0.031 ms (3.077 ms / 100) 3.250 -> 3.251 ( +0.03%) [ +0.22% +0.25% +0.00% / +0.43% +0.03% +0.31%] index_add_ strided 3 : Elapsed 0.033 ms (3.257 ms / 100) 3.060 -> 3.053 ( -0.23%) [ +0.36% +0.00% +0.13% / +0.49% -0.23% -0.16%] index_copy_ strided 3 : Elapsed 0.031 ms (3.071 ms / 100) 3.239 -> 3.249 ( +0.31%) [ +0.22% +0.22% +0.00% / +0.31% +0.43% +0.65%] index_add_ strided 7 : Elapsed 0.032 ms (3.246 ms / 100) 3.057 -> 3.051 ( -0.20%) [ +0.29% +0.00% +0.07% / +0.23% -0.20% -0.13%] index_copy_ strided 7 : Elapsed 0.031 ms (3.066 ms / 100) 3.256 -> 3.251 ( -0.15%) [ +0.43% +1.54% +0.00% / +0.49% +0.03% -0.15%] index_add_ perm : Elapsed 0.033 ms (3.270 ms / 100) 3.061 -> 3.052 ( -0.29%) [ +0.29% +0.07% +0.00% / +0.39% -0.29% +0.00%] index_copy_ perm : Elapsed 0.031 ms (3.070 ms / 100) 3.253 -> 3.255 ( +0.06%) [ +0.40% +0.25% +0.00% / +0.40% +0.09% +0.06%] index_add_ perm_sorted : Elapsed 0.033 ms (3.266 ms / 100) 3.070 -> 3.054 ( -0.52%) [ +0.23% +0.00% +0.00% / +0.26% -0.52% -0.42%] index_copy_ perm_sorted : Elapsed 0.031 ms (3.077 ms / 100) 16.051 -> 15.949 ( -0.64%) [ +0.21% +0.00% +0.06% / +0.16% -0.60% -0.64%] index_select const : Elapsed 0.161 ms (16.085 ms / 100) 16.083 -> 16.050 ( -0.21%) [ +0.19% +0.00% +0.02% / +0.11% -0.21% +0.17%] index_select wrap : Elapsed 0.161 ms (16.114 ms / 100) 16.149 -> 16.047 ( -0.63%) [ +0.12% +0.00% +0.16% / +0.19% -0.59% -0.63%] index_select linear : Elapsed 0.162 ms (16.169 ms / 100) 15.868 -> 15.902 ( +0.21%) [ +0.16% +0.00% +0.03% / +0.21% +1.62% +1.68%] index_select reverse : Elapsed 0.159 ms (15.894 ms / 100) 15.916 -> 15.968 ( +0.33%) [ +0.19% +0.00% +0.12% / +0.33% +1.01% +0.94%] index_select skip64 : Elapsed 0.159 ms (15.946 ms / 100) 15.998 -> 15.969 ( -0.18%) [ +0.26% +0.21% +0.00% / +0.24% +0.03% -0.18%] index_select skip256 : Elapsed 0.160 ms (16.039 ms / 100) 16.188 -> 16.094 ( -0.58%) [ +0.15% +0.04% +0.00% / +0.14% -0.57% -0.58%] index_select spread : Elapsed 0.162 ms (16.213 ms / 100) 16.002 -> 16.041 ( +0.24%) [ +0.17% +0.09% +0.00% / +0.24% +0.44% +0.48%] index_select strided 3 : Elapsed 0.160 ms (16.029 ms / 100) 16.047 -> 16.075 ( +0.17%) [ +0.20% +0.00% +0.17% / +0.17% +1.22% +1.22%] index_select strided 5 : Elapsed 0.161 ms (16.079 ms / 100) 16.186 -> 16.209 ( +0.14%) [ +0.10% +0.00% +0.17% / +0.14% +0.23% +0.24%] index_select strided 7 : Elapsed 0.162 ms (16.202 ms / 100) 16.171 -> 16.144 ( -0.17%) [ +0.19% +0.00% +0.06% / +0.10% -0.17% -0.11%] index_select strided 8 : Elapsed 0.162 ms (16.201 ms / 100) 16.043 -> 16.084 ( +0.26%) [ +0.36% +0.00% +0.19% / +0.26% +0.34% +0.49%] index_select random : Elapsed 0.161 ms (16.101 ms / 100) 15.997 -> 16.019 ( +0.14%) [ +0.17% +0.03% +0.00% / +0.14% +0.85% +0.99%] index_select random_sorted : Elapsed 0.160 ms (16.024 ms / 100) B = [50, 150, 250] (stride (1, 50, 7500)) A = [50, 150, 15] (stride (2250, 15, 1)) dim = 2 6.070 -> 6.080 ( +0.16%) [ +0.43% +0.00% +0.00% / +0.40% +0.38% +0.16%] index_add_ linear : Elapsed 0.061 ms (6.096 ms / 100) 5.889 -> 5.910 ( +0.36%) [ +0.73% +0.22% +0.00% / +0.58% +0.36% +0.42%] index_copy_ linear : Elapsed 0.059 ms (5.932 ms / 100) 6.055 -> 6.092 ( +0.61%) [ +0.59% +0.00% +0.12% / +0.61% +0.64% +0.61%] index_add_ reverse : Elapsed 0.061 ms (6.091 ms / 100) 5.883 -> 5.909 ( +0.44%) [ +0.70% +0.08% +0.00% / +0.56% +0.44% +0.48%] index_copy_ reverse : Elapsed 0.059 ms (5.924 ms / 100) 6.053 -> 6.081 ( +0.46%) [ +0.59% +0.03% +0.00% / +0.46% +0.68% +0.93%] index_add_ spread : Elapsed 0.061 ms (6.089 ms / 100) 5.882 -> 5.911 ( +0.49%) [ +0.41% +0.00% +0.00% / +0.49% +0.56% +0.87%] index_copy_ spread : Elapsed 0.059 ms (5.906 ms / 100) 6.067 -> 6.096 ( +0.48%) [ +0.68% +0.03% +0.00% / +0.48% +0.64% +0.58%] index_add_ strided 3 : Elapsed 0.061 ms (6.108 ms / 100) 5.898 -> 5.928 ( +0.51%) [ +0.61% +0.08% +0.00% / +0.51% +0.66% +0.61%] index_copy_ strided 3 : Elapsed 0.059 ms (5.934 ms / 100) 6.079 -> 6.107 ( +0.46%) [ +0.48% +0.15% +0.00% / +0.46% +0.54% +0.76%] index_add_ strided 7 : Elapsed 0.061 ms (6.108 ms / 100) 5.905 -> 5.937 ( +0.54%) [ +0.64% +0.12% +0.00% / +0.54% +0.58% +0.63%] index_copy_ strided 7 : Elapsed 0.059 ms (5.943 ms / 100) 6.067 -> 6.094 ( +0.45%) [ +0.51% +0.07% +0.00% / +0.49% +0.56% +0.45%] index_add_ perm : Elapsed 0.061 ms (6.098 ms / 100) 5.894 -> 5.915 ( +0.36%) [ +0.61% +0.03% +0.00% / +0.56% +0.46% +0.36%] index_copy_ perm : Elapsed 0.059 ms (5.930 ms / 100) 6.075 -> 6.103 ( +0.46%) [ +0.64% +0.00% +0.03% / +0.58% +0.54% +0.46%] index_add_ perm_sorted : Elapsed 0.061 ms (6.114 ms / 100) 5.900 -> 5.926 ( +0.44%) [ +0.73% +0.00% +0.14% / +0.63% +0.44% +0.51%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.943 ms / 100) 99.414 -> 99.471 ( +0.06%) [ +0.32% +0.73% +0.00% / +0.06% +2.10% +1.55%] index_select const : Elapsed 0.997 ms (99.734 ms / 100) 103.154 -> 103.727 ( +0.56%) [ +0.00% +0.57% +0.29% / +0.56% +1.05% +1.02%] index_select wrap : Elapsed 1.032 ms (103.154 ms / 100) 99.167 -> 99.537 ( +0.37%) [ +0.00% +0.73% +0.97% / +0.37% +2.07% +1.83%] index_select linear : Elapsed 0.992 ms (99.167 ms / 100) 100.094 -> 99.780 ( -0.31%) [ +0.05% +0.56% +0.00% / -0.31% +1.05% +1.20%] index_select reverse : Elapsed 1.001 ms (100.147 ms / 100) 99.672 -> 99.469 ( -0.20%) [ +0.00% +1.03% +0.11% / -0.20% +1.20% +1.47%] index_select skip64 : Elapsed 0.997 ms (99.672 ms / 100) 99.751 -> 99.630 ( -0.12%) [ +0.09% +0.00% +0.15% / -0.12% +1.30% +1.23%] index_select skip256 : Elapsed 0.998 ms (99.836 ms / 100) 99.906 -> 99.865 ( -0.04%) [ +0.00% +0.52% +0.47% / -0.04% +2.01% +1.77%] index_select spread : Elapsed 0.999 ms (99.906 ms / 100) 103.944 -> 103.862 ( -0.08%) [ +0.00% +0.31% +0.20% / -0.08% +0.84% +1.27%] index_select strided 3 : Elapsed 1.039 ms (103.944 ms / 100) 103.231 -> 103.842 ( +0.59%) [ +0.06% +0.05% +0.00% / +0.59% +1.68% +2.54%] index_select strided 5 : Elapsed 1.033 ms (103.289 ms / 100) 103.011 -> 103.895 ( +0.86%) [ +0.75% +0.57% +0.00% / +0.86% +1.93% +2.33%] index_select strided 7 : Elapsed 1.038 ms (103.785 ms / 100) 102.760 -> 102.252 ( -0.49%) [ +0.00% +0.76% +0.87% / -0.49% +2.23% +1.96%] index_select strided 8 : Elapsed 1.028 ms (102.760 ms / 100) 103.332 -> 103.128 ( -0.20%) [ +0.23% +0.00% +0.39% / -0.20% +1.13% +1.18%] index_select random : Elapsed 1.036 ms (103.574 ms / 100) 99.370 -> 99.988 ( +0.62%) [ +0.00% +0.69% +0.69% / +0.62% +2.44% +2.29%] index_select random_sorted : Elapsed 0.994 ms (99.370 ms / 100) out_shape = [250, 15, 50] in_shape = [150, 15, 50] idx_dim = 0 B = [250, 15, 50] (stride (750, 50, 1)) A = [150, 15, 50] (stride (15, 1, 2250)) dim = 0 9.439 -> 9.421 ( -0.19%) [ +0.15% +0.04% +0.00% / -0.19% -0.03% +0.19%] index_add_ linear : Elapsed 0.095 ms (9.453 ms / 100) 9.349 -> 9.328 ( -0.22%) [ +0.12% +0.00% +0.11% / -0.22% -0.13% +0.02%] index_copy_ linear : Elapsed 0.094 ms (9.360 ms / 100) 9.477 -> 9.442 ( -0.37%) [ +0.00% +0.03% +0.06% / -0.37% +0.06% +0.11%] index_add_ reverse : Elapsed 0.095 ms (9.477 ms / 100) 9.346 -> 9.338 ( -0.09%) [ +0.11% +0.25% +0.00% / -0.09% +0.18% +0.29%] index_copy_ reverse : Elapsed 0.094 ms (9.356 ms / 100) 9.459 -> 9.457 ( -0.02%) [ +0.14% +0.00% +0.10% / -0.02% +0.86% +1.13%] index_add_ spread : Elapsed 0.095 ms (9.472 ms / 100) 9.348 -> 9.357 ( +0.10%) [ +0.00% +0.03% +0.05% / +0.10% +0.70% +0.77%] index_copy_ spread : Elapsed 0.093 ms (9.348 ms / 100) 9.502 -> 9.478 ( -0.25%) [ +0.08% +0.00% +0.19% / -0.25% +0.14% +0.16%] index_add_ strided 3 : Elapsed 0.095 ms (9.510 ms / 100) 9.370 -> 9.350 ( -0.21%) [ +0.02% +0.02% +0.00% / -0.21% +0.22% +0.10%] index_copy_ strided 3 : Elapsed 0.094 ms (9.372 ms / 100) 9.495 -> 9.500 ( +0.05%) [ +0.13% +0.08% +0.00% / +0.05% +0.49% +0.40%] index_add_ strided 7 : Elapsed 0.095 ms (9.507 ms / 100) 9.372 -> 9.375 ( +0.03%) [ +0.01% +0.01% +0.00% / +0.03% +0.36% +0.27%] index_copy_ strided 7 : Elapsed 0.094 ms (9.373 ms / 100) 9.533 -> 9.501 ( -0.34%) [ +0.15% +0.00% +0.10% / -0.34% -0.25% -0.15%] index_add_ perm : Elapsed 0.095 ms (9.547 ms / 100) 9.365 -> 9.359 ( -0.06%) [ +0.16% +0.35% +0.00% / +0.02% -0.06% +0.12%] index_copy_ perm : Elapsed 0.094 ms (9.380 ms / 100) 9.487 -> 9.476 ( -0.12%) [ +0.07% +0.00% +0.15% / -0.12% +0.00% +0.11%] index_add_ perm_sorted : Elapsed 0.095 ms (9.494 ms / 100) 9.364 -> 9.353 ( -0.12%) [ +0.03% +0.00% +0.34% / -0.12% +0.15% +0.10%] index_copy_ perm_sorted : Elapsed 0.094 ms (9.367 ms / 100) 10.300 -> 10.310 ( +0.10%) [ +0.22% +0.00% +0.13% / +0.10% +0.32% +0.17%] index_select const : Elapsed 0.103 ms (10.323 ms / 100) 11.184 -> 11.209 ( +0.22%) [ +0.04% +0.00% +0.04% / +0.22% +0.36% +0.41%] index_select wrap : Elapsed 0.112 ms (11.188 ms / 100) 10.906 -> 10.927 ( +0.19%) [ +0.17% +0.00% +0.09% / +0.19% +0.39% +0.20%] index_select linear : Elapsed 0.109 ms (10.924 ms / 100) 10.950 -> 10.973 ( +0.21%) [ +0.16% +0.11% +0.00% / +0.21% +0.89% +0.76%] index_select reverse : Elapsed 0.110 ms (10.967 ms / 100) 10.342 -> 10.353 ( +0.11%) [ +0.10% +0.06% +0.00% / +0.11% +0.13% +0.18%] index_select skip64 : Elapsed 0.104 ms (10.352 ms / 100) 10.299 -> 10.320 ( +0.20%) [ +0.15% +0.09% +0.00% / +0.20% +0.32% +0.46%] index_select skip256 : Elapsed 0.103 ms (10.314 ms / 100) 10.916 -> 10.936 ( +0.18%) [ +0.06% +0.00% +0.27% / +0.18% +0.43% +0.53%] index_select spread : Elapsed 0.109 ms (10.923 ms / 100) 11.801 -> 11.818 ( +0.14%) [ +0.27% +0.00% +0.10% / +0.14% +1.03% +0.97%] index_select strided 3 : Elapsed 0.118 ms (11.833 ms / 100) 11.115 -> 11.151 ( +0.32%) [ +0.22% +0.00% +0.13% / +0.32% +0.61% +0.70%] index_select strided 5 : Elapsed 0.111 ms (11.140 ms / 100) 12.024 -> 12.058 ( +0.28%) [ +0.15% +0.00% +0.22% / +0.28% +1.56% +1.48%] index_select strided 7 : Elapsed 0.120 ms (12.042 ms / 100) 11.948 -> 11.958 ( +0.08%) [ +0.00% +0.08% +0.16% / +0.08% +1.29% +1.31%] index_select strided 8 : Elapsed 0.119 ms (11.948 ms / 100) 11.915 -> 11.940 ( +0.21%) [ +0.19% +0.00% +0.14% / +0.21% +1.50% +1.50%] index_select strided 16 : Elapsed 0.119 ms (11.938 ms / 100) 11.865 -> 11.892 ( +0.23%) [ +0.04% +0.02% +0.00% / +0.23% +1.47% +1.37%] index_select strided 64 : Elapsed 0.119 ms (11.870 ms / 100) 10.357 -> 10.373 ( +0.15%) [ +0.34% +0.00% +0.03% / +0.27% +0.58% +0.15%] index_select strided 100 : Elapsed 0.104 ms (10.392 ms / 100) 11.633 -> 11.659 ( +0.22%) [ +0.09% +0.00% +0.26% / +0.22% +1.05% +1.04%] index_select random : Elapsed 0.116 ms (11.644 ms / 100) 10.918 -> 10.907 ( -0.10%) [ +0.25% +0.00% +0.03% / +0.04% -0.10% +0.19%] index_select random_sorted : Elapsed 0.109 ms (10.945 ms / 100) B = [250, 15, 50] (stride (1, 12500, 250)) A = [150, 15, 50] (stride (1, 7500, 150)) dim = 0 Good 6.205 -> 5.278 (-14.94%) [ +0.00% +0.60% +0.27% / -14.73% -14.94% -14.86%] index_add_ linear : Elapsed 0.062 ms (6.205 ms / 100) good 5.642 -> 5.120 ( -9.25%) [ +0.00% +0.34% +0.28% / -9.09% -9.25% -9.18%] index_copy_ linear : Elapsed 0.056 ms (5.642 ms / 100) Good 6.206 -> 5.278 (-14.95%) [ +0.00% +0.39% +0.23% / -14.95% -14.60% -14.68%] index_add_ reverse : Elapsed 0.062 ms (6.206 ms / 100) good 5.664 -> 5.107 ( -9.83%) [ +0.00% +0.09% +0.21% / -9.83% -9.43% -9.39%] index_copy_ reverse : Elapsed 0.057 ms (5.664 ms / 100) Good 6.546 -> 5.485 (-16.21%) [ +1.13% +0.00% +0.11% / -16.18% -16.21% -16.16%] index_add_ spread : Elapsed 0.066 ms (6.620 ms / 100) good 6.075 -> 5.483 ( -9.74%) [ +0.10% +0.00% +0.46% / -9.58% -9.61% -9.74%] index_copy_ spread : Elapsed 0.061 ms (6.081 ms / 100) Good 6.763 -> 5.491 (-18.81%) [ +0.13% +0.00% +0.28% / -18.81% -18.50% -18.41%] index_add_ strided 3 : Elapsed 0.068 ms (6.772 ms / 100) Good 6.323 -> 5.463 (-13.60%) [ +0.00% +0.16% +0.28% / -13.60% -13.16% -13.16%] index_copy_ strided 3 : Elapsed 0.063 ms (6.323 ms / 100) Good 6.782 -> 5.584 (-17.66%) [ +0.00% +0.53% +0.31% / -17.66% -17.21% -17.35%] index_add_ strided 7 : Elapsed 0.068 ms (6.782 ms / 100) Good 6.406 -> 5.497 (-14.19%) [ +0.00% +0.44% +0.11% / -14.19% -13.89% -13.96%] index_copy_ strided 7 : Elapsed 0.064 ms (6.406 ms / 100) Good 6.940 -> 5.650 (-18.59%) [ +0.00% +0.09% +0.12% / -18.59% -18.31% -18.36%] index_add_ perm : Elapsed 0.069 ms (6.940 ms / 100) Good 6.380 -> 5.522 (-13.45%) [ +0.00% +0.99% +0.47% / -13.45% -13.42% -13.26%] index_copy_ perm : Elapsed 0.064 ms (6.380 ms / 100) Good 6.578 -> 5.477 (-16.74%) [ +0.59% +0.41% +0.00% / -16.74% -16.65% -16.72%] index_add_ perm_sorted : Elapsed 0.066 ms (6.617 ms / 100) good 6.055 -> 5.468 ( -9.69%) [ +0.26% +0.50% +0.00% / -9.63% -9.63% -9.69%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.071 ms / 100) Good 6.316 -> 5.253 (-16.83%) [ +0.00% +0.06% +0.16% / -16.83% -16.47% -16.55%] index_select const : Elapsed 0.063 ms (6.316 ms / 100) Good 6.458 -> 5.476 (-15.21%) [ +0.02% +0.00% +0.34% / -14.82% -15.21% -14.93%] index_select wrap : Elapsed 0.065 ms (6.459 ms / 100) Good 6.503 -> 5.519 (-15.13%) [ +0.05% +0.00% +0.06% / -15.13% -15.01% -15.01%] index_select linear : Elapsed 0.065 ms (6.506 ms / 100) Good 6.287 -> 5.503 (-12.47%) [ +0.27% +0.00% +0.25% / -12.31% -12.47% -12.12%] index_select reverse : Elapsed 0.063 ms (6.304 ms / 100) Good 6.337 -> 5.337 (-15.78%) [ +0.25% +0.00% +0.16% / -15.56% -15.78% -15.56%] index_select skip64 : Elapsed 0.064 ms (6.353 ms / 100) Good 6.346 -> 5.242 (-17.40%) [ +0.06% +0.03% +0.00% / -17.22% -17.40% -17.24%] index_select skip256 : Elapsed 0.064 ms (6.350 ms / 100) Good 6.412 -> 5.521 (-13.90%) [ +0.02% +0.00% +0.20% / -13.90% -13.57% -13.65%] index_select spread : Elapsed 0.064 ms (6.413 ms / 100) Good 6.680 -> 5.427 (-18.76%) [ +0.19% +0.16% +0.00% / -18.76% -18.76% -18.64%] index_select strided 3 : Elapsed 0.067 ms (6.693 ms / 100) Good 6.648 -> 5.442 (-18.14%) [ +0.00% +0.33% +0.36% / -18.14% -18.11% -18.13%] index_select strided 5 : Elapsed 0.066 ms (6.648 ms / 100) Good 6.664 -> 5.434 (-18.46%) [ +0.09% +0.00% +0.12% / -18.38% -18.34% -18.46%] index_select strided 7 : Elapsed 0.067 ms (6.670 ms / 100) Good 6.664 -> 5.421 (-18.65%) [ +0.24% +0.08% +0.00% / -18.65% -18.44% -18.53%] index_select strided 8 : Elapsed 0.067 ms (6.680 ms / 100) Good 6.692 -> 5.425 (-18.93%) [ +0.12% +0.00% +0.22% / -18.72% -18.78% -18.93%] index_select strided 16 : Elapsed 0.067 ms (6.700 ms / 100) Good 6.675 -> 5.451 (-18.34%) [ +0.12% +0.00% +0.34% / -18.19% -18.20% -18.34%] index_select strided 64 : Elapsed 0.067 ms (6.683 ms / 100) Good 6.392 -> 5.300 (-17.08%) [ +0.33% +0.17% +0.00% / -17.08% -17.04% -17.07%] index_select strided 100 : Elapsed 0.064 ms (6.413 ms / 100) Good 6.674 -> 5.463 (-18.15%) [ +0.00% +0.13% +0.01% / -18.15% -18.10% -18.15%] index_select random : Elapsed 0.067 ms (6.674 ms / 100) Good 6.439 -> 5.526 (-14.18%) [ +0.00% +0.14% +0.19% / -14.18% -14.13% -14.16%] index_select random_sorted : Elapsed 0.064 ms (6.439 ms / 100) B = [250, 15, 50] (stride (15, 1, 3750)) A = [150, 15, 50] (stride (750, 1, 15)) dim = 0 6.244 -> 6.228 ( -0.26%) [ +0.00% +0.50% +0.50% / -0.26% +0.11% -0.13%] index_add_ linear : Elapsed 0.062 ms (6.244 ms / 100) 5.898 -> 5.856 ( -0.71%) [ +0.00% +0.12% +0.53% / -0.71% +0.59% +0.56%] index_copy_ linear : Elapsed 0.059 ms (5.898 ms / 100) 6.286 -> 6.220 ( -1.05%) [ +0.00% +0.38% +0.14% / -0.45% -0.94% -1.05%] index_add_ reverse : Elapsed 0.063 ms (6.286 ms / 100) 5.961 -> 5.898 ( -1.06%) [ +0.29% +0.47% +0.00% / -0.50% -1.06% -0.97%] index_copy_ reverse : Elapsed 0.060 ms (5.978 ms / 100) 6.578 -> 6.567 ( -0.17%) [ +0.15% +0.15% +0.00% / -0.17% +0.09% +0.00%] index_add_ spread : Elapsed 0.066 ms (6.588 ms / 100) 6.309 -> 6.274 ( -0.55%) [ +0.00% +0.27% +0.33% / -0.55% -0.08% +0.00%] index_copy_ spread : Elapsed 0.063 ms (6.309 ms / 100) 6.867 -> 6.845 ( -0.32%) [ +0.44% +0.84% +0.00% / -0.32% +0.31% +0.38%] index_add_ strided 3 : Elapsed 0.069 ms (6.897 ms / 100) 6.528 -> 6.502 ( -0.40%) [ +0.40% +0.29% +0.00% / -0.40% +0.05% -0.18%] index_copy_ strided 3 : Elapsed 0.066 ms (6.554 ms / 100) 6.985 -> 6.962 ( -0.33%) [ +0.06% +0.00% +0.23% / -0.33% +0.16% +0.20%] index_add_ strided 7 : Elapsed 0.070 ms (6.989 ms / 100) 6.529 -> 6.497 ( -0.49%) [ +0.49% +0.52% +0.00% / -0.49% -0.20% -0.09%] index_copy_ strided 7 : Elapsed 0.066 ms (6.561 ms / 100) 6.873 -> 6.829 ( -0.64%) [ +0.00% +0.13% +0.51% / -0.28% -0.38% -0.64%] index_add_ perm : Elapsed 0.069 ms (6.873 ms / 100) 6.364 -> 6.348 ( -0.25%) [ +0.13% +0.00% +0.16% / -0.25% -0.03% +0.08%] index_copy_ perm : Elapsed 0.064 ms (6.372 ms / 100) 6.516 -> 6.503 ( -0.20%) [ +0.46% +0.35% +0.00% / -0.20% +0.03% +0.03%] index_add_ perm_sorted : Elapsed 0.065 ms (6.546 ms / 100) 6.217 -> 6.187 ( -0.48%) [ +0.58% +0.00% +0.55% / -0.48% -0.05% +0.02%] index_copy_ perm_sorted : Elapsed 0.063 ms (6.253 ms / 100) 6.743 -> 6.743 ( +0.00%) [ +0.15% +0.00% +0.09% / +0.00% +0.13% +0.10%] index_select const : Elapsed 0.068 ms (6.753 ms / 100) 6.845 -> 6.846 ( +0.01%) [ +0.00% +0.01% +0.03% / +0.01% +0.61% +0.67%] index_select wrap : Elapsed 0.068 ms (6.845 ms / 100) 6.914 -> 6.926 ( +0.17%) [ +0.00% +0.13% +0.29% / +0.20% +0.35% +0.17%] index_select linear : Elapsed 0.069 ms (6.914 ms / 100) 6.562 -> 6.617 ( +0.84%) [ +0.66% +0.00% +0.29% / +0.84% +1.54% +0.85%] index_select reverse : Elapsed 0.066 ms (6.605 ms / 100) 6.628 -> 6.633 ( +0.08%) [ +0.17% +0.18% +0.00% / +0.08% +0.69% +0.97%] index_select skip64 : Elapsed 0.066 ms (6.639 ms / 100) 6.816 -> 6.805 ( -0.16%) [ +0.10% +0.09% +0.00% / +0.09% -0.16% -0.03%] index_select skip256 : Elapsed 0.068 ms (6.823 ms / 100) 6.637 -> 6.632 ( -0.08%) [ +0.24% +0.12% +0.00% / -0.08% +0.60% +0.98%] index_select spread : Elapsed 0.067 ms (6.653 ms / 100) 6.691 -> 6.668 ( -0.34%) [ +0.00% +0.12% +0.04% / -0.01% -0.16% -0.34%] index_select strided 3 : Elapsed 0.067 ms (6.691 ms / 100) 6.580 -> 6.602 ( +0.33%) [ +0.29% +0.00% +0.15% / +0.46% +0.33% +0.44%] index_select strided 5 : Elapsed 0.066 ms (6.599 ms / 100) 6.898 -> 6.901 ( +0.04%) [ +0.00% +0.39% +0.00% / +0.04% +0.87% +1.00%] index_select strided 7 : Elapsed 0.069 ms (6.898 ms / 100) 6.730 -> 6.742 ( +0.18%) [ +0.55% +0.00% +0.33% / +0.18% +0.80% +0.62%] index_select strided 8 : Elapsed 0.068 ms (6.767 ms / 100) 6.742 -> 6.760 ( +0.27%) [ +0.25% +0.24% +0.00% / +0.27% +0.43% +0.53%] index_select strided 16 : Elapsed 0.068 ms (6.759 ms / 100) 6.738 -> 6.738 ( +0.00%) [ +0.00% +0.03% +0.04% / +0.00% +0.82% +0.88%] index_select strided 64 : Elapsed 0.067 ms (6.738 ms / 100) 6.515 -> 6.513 ( -0.03%) [ +0.00% +0.03% +0.23% / -0.03% +0.17% +0.23%] index_select strided 100 : Elapsed 0.065 ms (6.515 ms / 100) 6.705 -> 6.713 ( +0.12%) [ +0.00% +0.04% +0.01% / +0.12% +1.36% +1.80%] index_select random : Elapsed 0.067 ms (6.705 ms / 100) 6.629 -> 6.631 ( +0.03%) [ +0.00% +0.15% +0.09% / +0.03% +0.81% +0.69%] index_select random_sorted : Elapsed 0.066 ms (6.629 ms / 100) out_shape = [150, 250, 50] in_shape = [150, 15, 50] idx_dim = 1 B = [150, 250, 50] (stride (12500, 50, 1)) dim = 1 fill_cnt = 15 1.443 -> 1.443 ( +0.00%) [ +0.62% +2.56% +0.00% / +1.18% +0.21% +0.00%] index_fill_ const : Elapsed 0.015 ms (1.452 ms / 100) 1.570 -> 1.580 ( +0.64%) [ +0.57% +0.38% +0.00% / +0.89% +0.83% +0.64%] index_fill_ linear : Elapsed 0.016 ms (1.579 ms / 100) 1.574 -> 1.572 ( -0.13%) [ +0.44% +0.00% +0.06% / +0.51% -0.13% -0.06%] index_fill_ reverse : Elapsed 0.016 ms (1.581 ms / 100) 1.439 -> 1.446 ( +0.49%) [ +0.49% +0.00% +0.14% / +0.49% +0.69% +2.22%] index_fill_ skip64 : Elapsed 0.014 ms (1.446 ms / 100) 1.439 -> 1.448 ( +0.63%) [ +0.90% +0.35% +0.00% / +0.63% +0.97% +0.76%] index_fill_ skip256 : Elapsed 0.015 ms (1.452 ms / 100) 1.594 -> 1.603 ( +0.56%) [ +0.63% +0.00% +0.19% / +0.56% +1.25% +1.19%] index_fill_ spread : Elapsed 0.016 ms (1.604 ms / 100) 1.592 -> 1.599 ( +0.44%) [ +0.63% +0.00% +0.06% / +0.44% +0.63% +0.44%] index_fill_ strided 3 : Elapsed 0.016 ms (1.602 ms / 100) 1.614 -> 1.619 ( +0.31%) [ +6.01% +0.31% +0.00% / +0.31% +0.50% +0.56%] index_fill_ strided 5 : Elapsed 0.017 ms (1.711 ms / 100) 1.623 -> 1.628 ( +0.31%) [ +0.43% +0.00% +0.00% / +0.49% +0.55% +0.31%] index_fill_ strided 7 : Elapsed 0.016 ms (1.630 ms / 100) 1.621 -> 1.619 ( -0.12%) [ +0.12% +0.00% +1.11% / -0.12% -0.06% +0.06%] index_fill_ strided 8 : Elapsed 0.016 ms (1.623 ms / 100) 1.611 -> 1.616 ( +0.31%) [ +0.50% +0.37% +0.00% / +0.31% +0.81% +0.43%] index_fill_ strided 16 : Elapsed 0.016 ms (1.619 ms / 100) 1.623 -> 1.627 ( +0.25%) [ +0.62% +0.18% +0.00% / +0.92% +0.55% +0.25%] index_fill_ strided 64 : Elapsed 0.016 ms (1.633 ms / 100) 1.530 -> 1.531 ( +0.07%) [ +0.46% +0.00% +0.13% / +0.46% +0.13% +0.07%] index_fill_ strided 100 : Elapsed 0.015 ms (1.537 ms / 100) 1.603 -> 1.607 ( +0.25%) [ +0.19% +0.00% +0.19% / +0.25% +0.37% +0.81%] index_fill_ random : Elapsed 0.016 ms (1.606 ms / 100) 1.602 -> 1.610 ( +0.50%) [ +0.56% +0.31% +0.00% / +0.50% +0.62% +0.56%] index_fill_ random_sorted : Elapsed 0.016 ms (1.611 ms / 100) 1.590 -> 1.603 ( +0.82%) [ +0.50% +0.06% +0.00% / +0.82% +1.95% +1.76%] index_fill_ perm : Elapsed 0.016 ms (1.598 ms / 100) 1.590 -> 1.597 ( +0.44%) [ +0.44% +0.00% +0.00% / +0.44% +0.69% +0.94%] index_fill_ perm_sorted : Elapsed 0.016 ms (1.597 ms / 100) B = [150, 250, 50] (stride (12500, 50, 1)) A = [150, 15, 50] (stride (15, 1, 2250)) dim = 1 3.546 -> 3.547 ( +0.03%) [ +0.28% +0.11% +0.00% / +0.37% +0.03% +0.14%] index_add_ linear : Elapsed 0.036 ms (3.556 ms / 100) 3.305 -> 3.318 ( +0.39%) [ +0.54% +0.06% +0.00% / +0.39% +0.42% +0.42%] index_copy_ linear : Elapsed 0.033 ms (3.323 ms / 100) 3.560 -> 3.535 ( -0.70%) [ +0.51% +0.14% +0.00% / +0.37% -0.62% -0.70%] index_add_ reverse : Elapsed 0.036 ms (3.578 ms / 100) 3.314 -> 3.315 ( +0.03%) [ +0.33% +0.12% +0.00% / +0.30% +0.39% +0.03%] index_copy_ reverse : Elapsed 0.033 ms (3.325 ms / 100) 3.615 -> 3.631 ( +0.44%) [ +0.36% +0.03% +0.00% / +0.44% +0.94% +1.11%] index_add_ spread : Elapsed 0.036 ms (3.628 ms / 100) 3.398 -> 3.404 ( +0.18%) [ +0.26% +0.06% +0.00% / +0.18% +0.62% +0.94%] index_copy_ spread : Elapsed 0.034 ms (3.407 ms / 100) 3.585 -> 3.602 ( +0.47%) [ +0.64% +0.11% +0.00% / +0.47% +3.57% +1.73%] index_add_ strided 3 : Elapsed 0.036 ms (3.608 ms / 100) 3.375 -> 3.387 ( +0.36%) [ +0.18% +0.09% +0.00% / +0.36% +1.07% +0.92%] index_copy_ strided 3 : Elapsed 0.034 ms (3.381 ms / 100) 3.664 -> 3.663 ( -0.03%) [ +0.14% +0.00% +0.00% / -0.03% +0.85% +0.93%] index_add_ strided 7 : Elapsed 0.037 ms (3.669 ms / 100) 3.417 -> 3.425 ( +0.23%) [ +0.50% +0.09% +0.00% / +0.23% +0.61% +0.76%] index_copy_ strided 7 : Elapsed 0.034 ms (3.434 ms / 100) 3.595 -> 3.612 ( +0.47%) [ +0.47% +0.00% +0.08% / +0.47% +1.11% +1.14%] index_add_ perm : Elapsed 0.036 ms (3.612 ms / 100) 3.387 -> 3.402 ( +0.44%) [ +0.41% +0.00% +0.12% / +0.44% +0.44% +0.77%] index_copy_ perm : Elapsed 0.034 ms (3.401 ms / 100) 3.603 -> 3.606 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.17% +0.08% +0.19%] index_add_ perm_sorted : Elapsed 0.036 ms (3.609 ms / 100) 3.384 -> 3.388 ( +0.12%) [ +0.33% +0.00% +0.00% / +0.38% +0.21% +0.12%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.395 ms / 100) 32.951 -> 33.052 ( +0.31%) [ +0.01% +0.00% +0.27% / +0.31% +4.99% +4.53%] index_select const : Elapsed 0.330 ms (32.955 ms / 100) 33.413 -> 33.428 ( +0.04%) [ +0.28% +0.35% +0.00% / +0.04% +5.41% +5.49%] index_select wrap : Elapsed 0.335 ms (33.506 ms / 100) 32.909 -> 33.063 ( +0.47%) [ +0.34% +0.00% +0.34% / +0.47% +5.39% +5.03%] index_select linear : Elapsed 0.330 ms (33.020 ms / 100) 33.242 -> 33.428 ( +0.56%) [ +0.00% +0.35% +0.16% / +0.56% +4.34% +5.03%] index_select reverse : Elapsed 0.332 ms (33.242 ms / 100) 32.976 -> 32.899 ( -0.23%) [ +0.14% +0.00% +0.26% / -0.23% +5.19% +4.83%] index_select skip64 : Elapsed 0.330 ms (33.021 ms / 100) 32.884 -> 32.935 ( +0.16%) [ +0.33% +0.21% +0.00% / +0.16% +5.50% +5.43%] index_select skip256 : Elapsed 0.330 ms (32.991 ms / 100) 33.328 -> 33.460 ( +0.40%) [ +0.49% +0.00% +0.36% / +0.40% +5.92% +6.25%] index_select spread : Elapsed 0.335 ms (33.490 ms / 100) 33.468 -> 33.611 ( +0.43%) [ +0.04% +0.16% +0.00% / +0.43% +5.49% +5.44%] index_select strided 3 : Elapsed 0.335 ms (33.482 ms / 100) 33.534 -> 33.638 ( +0.31%) [ +0.20% +0.00% +0.21% / +0.31% +5.46% +5.40%] index_select strided 5 : Elapsed 0.336 ms (33.601 ms / 100) 33.538 -> 33.674 ( +0.41%) [ +0.25% +0.00% +0.26% / +0.41% +5.02% +5.84%] index_select strided 7 : Elapsed 0.336 ms (33.623 ms / 100) 33.389 -> 33.499 ( +0.33%) [ +0.00% +0.51% +0.63% / +0.33% +5.62% +6.12%] index_select strided 8 : Elapsed 0.334 ms (33.389 ms / 100) 33.429 -> 33.623 ( +0.58%) [ +0.34% +0.25% +0.00% / +0.58% +5.86% +6.17%] index_select random : Elapsed 0.335 ms (33.543 ms / 100) 33.214 -> 33.402 ( +0.57%) [ +0.86% +0.00% +0.62% / +0.57% +6.01% +5.77%] index_select random_sorted : Elapsed 0.335 ms (33.501 ms / 100) B = [150, 250, 50] (stride (12500, 50, 1)) A = [150, 15, 50] (stride (1, 150, 2250)) dim = 1 3.407 -> 3.398 ( -0.26%) [ +0.21% +0.00% +0.38% / +0.06% -0.21% -0.26%] index_add_ linear : Elapsed 0.034 ms (3.414 ms / 100) 3.157 -> 3.153 ( -0.13%) [ +0.41% +0.00% +0.16% / +0.29% +0.16% -0.13%] index_copy_ linear : Elapsed 0.032 ms (3.170 ms / 100) 3.402 -> 3.407 ( +0.15%) [ +0.50% +0.15% +0.00% / +0.29% +0.15% +0.15%] index_add_ reverse : Elapsed 0.034 ms (3.419 ms / 100) 3.162 -> 3.157 ( -0.16%) [ +0.28% +0.00% +0.19% / +0.25% -0.09% -0.16%] index_copy_ reverse : Elapsed 0.032 ms (3.171 ms / 100) 3.424 -> 3.445 ( +0.61%) [ +0.96% +0.55% +0.00% / +0.61% +1.02% +1.14%] index_add_ spread : Elapsed 0.035 ms (3.457 ms / 100) 3.218 -> 3.231 ( +0.40%) [ +0.34% +0.19% +0.00% / +0.40% +0.56% +0.59%] index_copy_ spread : Elapsed 0.032 ms (3.229 ms / 100) 3.406 -> 3.403 ( -0.09%) [ +0.21% +0.68% +0.00% / -0.09% +0.97% +0.94%] index_add_ strided 3 : Elapsed 0.034 ms (3.413 ms / 100) 3.207 -> 3.219 ( +0.37%) [ +0.31% +0.03% +0.00% / +0.37% +0.56% +0.84%] index_copy_ strided 3 : Elapsed 0.032 ms (3.217 ms / 100) 3.441 -> 3.465 ( +0.70%) [ +0.64% +0.26% +0.00% / +0.87% +0.70% +0.81%] index_add_ strided 7 : Elapsed 0.035 ms (3.463 ms / 100) 3.226 -> 3.241 ( +0.46%) [ +0.40% +0.00% +0.00% / +0.50% +0.65% +0.46%] index_copy_ strided 7 : Elapsed 0.032 ms (3.239 ms / 100) 3.436 -> 3.451 ( +0.44%) [ +0.49% +0.35% +0.00% / +0.44% +0.96% +0.76%] index_add_ perm : Elapsed 0.035 ms (3.453 ms / 100) 3.222 -> 3.226 ( +0.12%) [ +0.28% +0.16% +0.00% / +0.12% +0.74% +0.37%] index_copy_ perm : Elapsed 0.032 ms (3.231 ms / 100) 3.431 -> 3.447 ( +0.47%) [ +0.38% +0.87% +0.00% / +0.50% +0.64% +0.47%] index_add_ perm_sorted : Elapsed 0.034 ms (3.444 ms / 100) 3.219 -> 3.228 ( +0.28%) [ +0.16% +0.03% +0.00% / +0.28% +0.40% +0.47%] index_copy_ perm_sorted : Elapsed 0.032 ms (3.224 ms / 100) 24.110 -> 24.269 ( +0.66%) [ +0.10% +0.95% +0.00% / +0.66% +4.75% +4.11%] index_select const : Elapsed 0.241 ms (24.134 ms / 100) 31.142 -> 31.001 ( -0.45%) [ +0.00% +0.09% +0.09% / -0.45% +6.71% +6.99%] index_select wrap : Elapsed 0.311 ms (31.142 ms / 100) 23.129 -> 23.347 ( +0.94%) [ +0.00% +0.13% +1.12% / +0.94% +9.30% +8.40%] index_select linear : Elapsed 0.231 ms (23.129 ms / 100) 25.867 -> 26.013 ( +0.56%) [ +0.14% +0.00% +0.72% / +0.56% +5.13% +5.26%] index_select reverse : Elapsed 0.259 ms (25.904 ms / 100) 24.458 -> 24.485 ( +0.11%) [ +0.00% +0.10% +0.99% / +0.11% +2.68% +1.83%] index_select skip64 : Elapsed 0.245 ms (24.458 ms / 100) 24.287 -> 24.061 ( -0.93%) [ +0.28% +0.01% +0.00% / -0.93% +3.25% +2.25%] index_select skip256 : Elapsed 0.244 ms (24.356 ms / 100) 25.244 -> 25.419 ( +0.69%) [ +0.00% +0.20% +0.48% / +0.69% +6.52% +6.19%] index_select spread : Elapsed 0.252 ms (25.244 ms / 100) 27.289 -> 27.354 ( +0.24%) [ +0.00% +0.14% +0.26% / +0.24% +4.87% +4.32%] index_select strided 3 : Elapsed 0.273 ms (27.289 ms / 100) 26.577 -> 26.675 ( +0.37%) [ +0.18% +0.84% +0.00% / +0.37% +2.42% +2.15%] index_select strided 5 : Elapsed 0.266 ms (26.625 ms / 100) 31.678 -> 31.858 ( +0.57%) [ +0.21% +0.00% +0.22% / +0.57% +6.92% +6.88%] index_select strided 7 : Elapsed 0.317 ms (31.746 ms / 100) 31.587 -> 31.681 ( +0.30%) [ +0.16% +0.00% +0.62% / +0.30% +6.79% +6.40%] index_select strided 8 : Elapsed 0.316 ms (31.636 ms / 100) 30.381 -> 30.389 ( +0.03%) [ +0.42% +0.17% +0.00% / +0.03% +8.17% +7.39%] index_select random : Elapsed 0.305 ms (30.508 ms / 100) 25.890 -> 26.010 ( +0.46%) [ +0.00% +0.28% +0.33% / +0.46% +6.99% +6.87%] index_select random_sorted : Elapsed 0.259 ms (25.890 ms / 100) B = [150, 250, 50] (stride (1, 7500, 150)) A = [150, 15, 50] (stride (1, 7500, 150)) dim = 1 3.487 -> 3.480 ( -0.20%) [ +0.17% +0.14% +0.00% / +0.06% -0.20% +0.54%] index_add_ linear : Elapsed 0.035 ms (3.493 ms / 100) 3.370 -> 3.379 ( +0.27%) [ +0.27% +0.03% +0.00% / +0.27% +0.59% +0.30%] index_copy_ linear : Elapsed 0.034 ms (3.379 ms / 100) 3.471 -> 3.486 ( +0.43%) [ +0.43% +0.06% +0.00% / +0.43% +0.86% +1.15%] index_add_ reverse : Elapsed 0.035 ms (3.486 ms / 100) 3.371 -> 3.375 ( +0.12%) [ +0.33% +0.00% +0.15% / +0.27% +0.42% +0.12%] index_copy_ reverse : Elapsed 0.034 ms (3.382 ms / 100) 3.471 -> 3.483 ( +0.35%) [ +0.58% +0.00% +0.00% / +0.35% +0.43% +0.69%] index_add_ spread : Elapsed 0.035 ms (3.491 ms / 100) 3.367 -> 3.370 ( +0.09%) [ +0.30% +0.03% +0.00% / +0.09% +0.39% +0.42%] index_copy_ spread : Elapsed 0.034 ms (3.377 ms / 100) 3.476 -> 3.477 ( +0.03%) [ +0.46% +0.00% +0.35% / +0.32% +0.03% +0.17%] index_add_ strided 3 : Elapsed 0.035 ms (3.492 ms / 100) 3.360 -> 3.373 ( +0.39%) [ +0.27% +0.09% +0.00% / +0.39% +0.68% +0.71%] index_copy_ strided 3 : Elapsed 0.034 ms (3.369 ms / 100) 3.479 -> 3.479 ( +0.00%) [ +0.66% +0.06% +0.00% / +0.37% +0.03% +0.00%] index_add_ strided 7 : Elapsed 0.035 ms (3.502 ms / 100) 3.361 -> 3.367 ( +0.18%) [ +0.48% +0.00% +0.09% / +0.30% +0.18% +0.36%] index_copy_ strided 7 : Elapsed 0.034 ms (3.377 ms / 100) 3.503 -> 3.496 ( -0.20%) [ +0.09% +0.00% +0.06% / +0.49% -0.11% -0.20%] index_add_ perm : Elapsed 0.035 ms (3.506 ms / 100) 3.364 -> 3.376 ( +0.36%) [ +0.39% +0.00% +0.09% / +0.54% +0.36% +0.56%] index_copy_ perm : Elapsed 0.034 ms (3.377 ms / 100) 3.471 -> 3.489 ( +0.52%) [ +0.69% +0.26% +0.00% / +0.52% +0.55% +0.75%] index_add_ perm_sorted : Elapsed 0.035 ms (3.495 ms / 100) 3.369 -> 3.376 ( +0.21%) [ +0.18% +0.06% +0.00% / +0.21% +0.21% +0.27%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.375 ms / 100) good 35.288 -> 33.487 ( -5.10%) [ +0.00% +0.43% +0.48% / -1.02% -4.66% -5.10%] index_select const : Elapsed 0.353 ms (35.288 ms / 100) 51.340 -> 49.447 ( -3.69%) [ +1.04% +0.00% +0.33% / -2.33% -3.46% -3.69%] index_select wrap : Elapsed 0.519 ms (51.876 ms / 100) good 30.915 -> 29.214 ( -5.50%) [ +0.63% +0.54% +0.00% / -1.47% -5.03% -5.50%] index_select linear : Elapsed 0.311 ms (31.110 ms / 100) 31.442 -> 31.407 ( -0.11%) [ +1.32% +0.00% +0.51% / -0.11% +8.66% +8.54%] index_select reverse : Elapsed 0.319 ms (31.856 ms / 100) good 32.563 -> 30.384 ( -6.69%) [ +0.00% +1.32% +0.80% / -0.68% -5.98% -6.69%] index_select skip64 : Elapsed 0.326 ms (32.563 ms / 100) 30.075 -> 29.884 ( -0.64%) [ +0.00% +0.09% +0.14% / -0.64% +8.94% +9.04%] index_select skip256 : Elapsed 0.301 ms (30.075 ms / 100) 28.445 -> 28.230 ( -0.76%) [ +0.00% +0.92% +0.97% / -0.76% +2.74% +4.07%] index_select spread : Elapsed 0.284 ms (28.445 ms / 100) 42.079 -> 41.787 ( -0.69%) [ +0.23% +2.07% +0.00% / -0.69% +4.99% +3.74%] index_select strided 3 : Elapsed 0.422 ms (42.176 ms / 100) good 37.397 -> 35.345 ( -5.49%) [ +0.00% +1.41% +0.63% / +0.26% -5.49% -4.44%] index_select strided 5 : Elapsed 0.374 ms (37.397 ms / 100) 47.749 -> 47.491 ( -0.54%) [ +0.00% +1.52% +0.61% / -0.54% +0.20% +1.02%] index_select strided 7 : Elapsed 0.477 ms (47.749 ms / 100) 45.242 -> 45.576 ( +0.74%) [ +0.72% +0.00% +1.65% / +1.46% +1.23% +0.74%] index_select strided 8 : Elapsed 0.456 ms (45.569 ms / 100) 41.653 -> 41.816 ( +0.39%) [ +0.73% +0.00% +0.69% / +0.39% +6.02% +6.89%] index_select random : Elapsed 0.420 ms (41.957 ms / 100) 29.292 -> 28.162 ( -3.86%) [ +0.74% +1.35% +0.00% / +0.27% -3.15% -3.86%] index_select random_sorted : Elapsed 0.295 ms (29.510 ms / 100) B = [150, 250, 50] (stride (250, 1, 37500)) A = [150, 15, 50] (stride (750, 1, 15)) dim = 1 7.159 -> 7.162 ( +0.04%) [ +0.06% +0.00% +0.14% / +0.04% +4.82% +4.78%] index_add_ linear : Elapsed 0.072 ms (7.163 ms / 100) 5.401 -> 5.416 ( +0.28%) [ +0.24% +0.00% +0.43% / +0.28% +4.28% +4.24%] index_copy_ linear : Elapsed 0.054 ms (5.414 ms / 100) 7.271 -> 7.263 ( -0.11%) [ +0.22% +0.00% +0.11% / -0.11% +5.17% +5.30%] index_add_ reverse : Elapsed 0.073 ms (7.287 ms / 100) 5.459 -> 5.446 ( -0.24%) [ +0.00% +0.04% +0.33% / -0.24% +4.30% +4.23%] index_copy_ reverse : Elapsed 0.055 ms (5.459 ms / 100) 18.373 -> 18.304 ( -0.38%) [ +0.05% +0.00% +0.11% / +0.19% -0.21% -0.38%] index_add_ spread : Elapsed 0.184 ms (18.382 ms / 100) 11.391 -> 11.335 ( -0.49%) [ +0.00% +0.12% +0.25% / +0.34% -0.42% -0.49%] index_copy_ spread : Elapsed 0.114 ms (11.391 ms / 100) 9.569 -> 9.542 ( -0.28%) [ +0.21% +0.00% +0.06% / +0.03% -0.28% -0.22%] index_add_ strided 3 : Elapsed 0.096 ms (9.589 ms / 100) 6.600 -> 6.528 ( -1.09%) [ +0.35% +0.00% +0.00% / +0.00% -1.09% -0.67%] index_copy_ strided 3 : Elapsed 0.066 ms (6.623 ms / 100) 14.967 -> 14.976 ( +0.06%) [ +0.13% +0.09% +0.00% / +0.06% +0.12% +0.15%] index_add_ strided 7 : Elapsed 0.150 ms (14.987 ms / 100) 9.473 -> 9.432 ( -0.43%) [ +0.00% +0.12% +0.13% / +0.22% -0.43% -0.24%] index_copy_ strided 7 : Elapsed 0.095 ms (9.473 ms / 100) 18.692 -> 18.642 ( -0.27%) [ +0.07% +0.00% +0.14% / +0.01% -0.27% -0.22%] index_add_ perm : Elapsed 0.187 ms (18.706 ms / 100) 11.913 -> 11.890 ( -0.19%) [ +0.00% +0.00% +0.21% / +0.12% -0.19% -0.13%] index_copy_ perm : Elapsed 0.119 ms (11.913 ms / 100) 15.038 -> 14.751 ( -1.91%) [ +0.00% +0.11% +0.00% / +0.05% -1.90% -1.91%] index_add_ perm_sorted : Elapsed 0.150 ms (15.038 ms / 100) 9.694 -> 9.535 ( -1.64%) [ +0.00% +0.03% +0.26% / +0.07% -1.64% -1.59%] index_copy_ perm_sorted : Elapsed 0.097 ms (9.694 ms / 100) BEST 107.725 -> 19.073 (-82.29%) [ +0.48% +0.00% +0.14% / -81.93% -82.28% -82.29%] index_select const : Elapsed 1.082 ms (108.242 ms / 100) BEST 111.880 -> 19.252 (-82.79%) [ +0.00% +0.01% +0.44% / -82.51% -82.79% -82.79%] index_select wrap : Elapsed 1.119 ms (111.880 ms / 100) BEST 107.626 -> 19.167 (-82.19%) [ +0.25% +0.07% +0.00% / -81.61% -82.19% -82.14%] index_select linear : Elapsed 1.079 ms (107.890 ms / 100) BEST 107.791 -> 19.742 (-81.68%) [ +0.00% +0.22% +0.03% / -81.68% -81.57% -81.56%] index_select reverse : Elapsed 1.078 ms (107.791 ms / 100) BEST 107.668 -> 19.218 (-82.15%) [ +0.36% +0.18% +0.00% / -81.98% -82.15% -82.12%] index_select skip64 : Elapsed 1.081 ms (108.056 ms / 100) BEST 107.324 -> 19.149 (-82.16%) [ +0.00% +0.26% +0.02% / -81.38% -82.16% -82.15%] index_select skip256 : Elapsed 1.073 ms (107.324 ms / 100) BEST 108.484 -> 18.912 (-82.57%) [ +0.00% +0.21% +0.14% / -82.28% -82.57% -82.55%] index_select spread : Elapsed 1.085 ms (108.484 ms / 100) BEST 111.970 -> 19.277 (-82.78%) [ +0.03% +0.00% +0.27% / -82.42% -82.78% -82.78%] index_select strided 3 : Elapsed 1.120 ms (112.004 ms / 100) BEST 111.853 -> 19.268 (-82.77%) [ +0.00% +0.06% +0.14% / -82.55% -82.77% -82.77%] index_select strided 5 : Elapsed 1.119 ms (111.853 ms / 100) BEST 112.430 -> 19.498 (-82.66%) [ +0.00% +0.04% +0.11% / -82.57% -82.66% -82.61%] index_select strided 7 : Elapsed 1.124 ms (112.430 ms / 100) BEST 112.002 -> 19.238 (-82.82%) [ +0.37% +0.00% +0.31% / -82.33% -82.80% -82.82%] index_select strided 8 : Elapsed 1.124 ms (112.419 ms / 100) BEST 112.316 -> 19.149 (-82.95%) [ +0.10% +0.00% +0.27% / -82.56% -82.95% -82.87%] index_select random : Elapsed 1.124 ms (112.432 ms / 100) BEST 108.215 -> 19.063 (-82.38%) [ +0.18% +0.00% +0.39% / -82.26% -82.38% -82.35%] index_select random_sorted : Elapsed 1.084 ms (108.408 ms / 100) B = [150, 250, 50] (stride (1, 150, 37500)) dim = 1 fill_cnt = 15 2.323 -> 2.330 ( +0.30%) [ +0.43% +0.13% +0.00% / +0.30% +2.76% +2.84%] index_fill_ const : Elapsed 0.023 ms (2.333 ms / 100) 2.063 -> 2.040 ( -1.11%) [ +0.39% +0.00% +0.05% / +0.44% -1.11% -0.87%] index_fill_ linear : Elapsed 0.021 ms (2.071 ms / 100) 2.009 -> 2.008 ( -0.05%) [ +0.25% +0.00% +0.20% / -0.05% +4.18% +4.28%] index_fill_ reverse : Elapsed 0.020 ms (2.014 ms / 100) 2.314 -> 2.322 ( +0.35%) [ +0.13% +0.00% +0.09% / +0.35% +8.51% +8.69%] index_fill_ skip64 : Elapsed 0.023 ms (2.317 ms / 100) 2.326 -> 2.332 ( +0.26%) [ +0.09% +0.00% +0.52% / +0.26% +7.95% +7.70%] index_fill_ skip256 : Elapsed 0.023 ms (2.328 ms / 100) 1.890 -> 1.895 ( +0.26%) [ +0.42% +0.00% +0.42% / +0.26% +2.33% +2.12%] index_fill_ spread : Elapsed 0.019 ms (1.898 ms / 100) 1.986 -> 1.993 ( +0.35%) [ +0.40% +0.00% +0.35% / +0.35% +1.96% +1.76%] index_fill_ strided 3 : Elapsed 0.020 ms (1.994 ms / 100) 1.943 -> 1.916 ( -1.39%) [ +0.82% +0.00% +0.15% / +0.62% -1.39% -1.34%] index_fill_ strided 5 : Elapsed 0.020 ms (1.959 ms / 100) 1.893 -> 1.891 ( -0.11%) [ +0.48% +0.11% +0.00% / +0.42% +0.16% -0.11%] index_fill_ strided 7 : Elapsed 0.019 ms (1.902 ms / 100) 1.947 -> 1.935 ( -0.62%) [ +0.67% +0.00% +0.41% / +0.36% -0.62% -0.21%] index_fill_ strided 8 : Elapsed 0.020 ms (1.960 ms / 100) 1.921 -> 1.885 ( -1.87%) [ +0.52% +0.10% +0.00% / +0.00% -1.61% -1.87%] index_fill_ strided 16 : Elapsed 0.019 ms (1.931 ms / 100) 1.936 -> 1.902 ( -1.76%) [ +0.00% +0.00% +0.21% / -0.05% -1.03% -1.76%] index_fill_ strided 64 : Elapsed 0.019 ms (1.936 ms / 100) 1.838 -> 1.818 ( -1.09%) [ +0.38% +0.05% +0.00% / +0.16% -1.09% -0.87%] index_fill_ strided 100 : Elapsed 0.018 ms (1.845 ms / 100) 1.929 -> 1.930 ( +0.05%) [ +0.47% +0.05% +0.00% / +0.05% +4.04% +3.89%] index_fill_ random : Elapsed 0.019 ms (1.938 ms / 100) 1.935 -> 1.944 ( +0.47%) [ +0.10% +0.41% +0.00% / +0.47% +1.19% +1.50%] index_fill_ random_sorted : Elapsed 0.019 ms (1.937 ms / 100) 1.992 -> 1.991 ( -0.05%) [ +0.15% +0.10% +0.00% / -0.05% +1.26% +1.31%] index_fill_ perm : Elapsed 0.020 ms (1.995 ms / 100) 2.019 -> 2.029 ( +0.50%) [ +0.54% +0.15% +0.00% / +0.50% +0.54% +0.50%] index_fill_ perm_sorted : Elapsed 0.020 ms (2.030 ms / 100) out_shape = [150, 15, 250] in_shape = [150, 15, 50] idx_dim = 2 B = [150, 15, 250] (stride (1, 37500, 150)) A = [150, 15, 50] (stride (1, 7500, 150)) dim = 2 6.525 -> 6.531 ( +0.09%) [ +0.23% +0.12% +0.00% / +0.09% +1.52% +1.59%] index_add_ linear : Elapsed 0.065 ms (6.540 ms / 100) 5.950 -> 5.909 ( -0.69%) [ +0.13% +0.20% +0.00% / -0.69% -0.02% -0.20%] index_copy_ linear : Elapsed 0.060 ms (5.958 ms / 100) 6.544 -> 6.543 ( -0.02%) [ +0.41% +0.00% +0.17% / -0.02% +1.27% +0.90%] index_add_ reverse : Elapsed 0.066 ms (6.571 ms / 100) 5.964 -> 5.938 ( -0.44%) [ +0.10% +0.30% +0.00% / -0.44% -0.07% -0.15%] index_copy_ reverse : Elapsed 0.060 ms (5.970 ms / 100) 6.663 -> 6.616 ( -0.71%) [ +0.00% +0.03% +0.51% / -0.09% -0.71% -0.30%] index_add_ spread : Elapsed 0.067 ms (6.663 ms / 100) 6.035 -> 5.960 ( -1.24%) [ +0.65% +0.00% +0.81% / -0.05% -1.24% -1.16%] index_copy_ spread : Elapsed 0.061 ms (6.074 ms / 100) 6.721 -> 6.557 ( -2.44%) [ +0.00% +0.03% +0.54% / -0.30% -2.31% -2.44%] index_add_ strided 3 : Elapsed 0.067 ms (6.721 ms / 100) 6.077 -> 5.960 ( -1.93%) [ +0.00% +0.13% +0.77% / -0.46% -1.93% -1.93%] index_copy_ strided 3 : Elapsed 0.061 ms (6.077 ms / 100) 6.591 -> 6.617 ( +0.39%) [ +0.49% +0.00% +0.56% / +0.39% +1.55% +1.49%] index_add_ strided 7 : Elapsed 0.066 ms (6.623 ms / 100) 6.035 -> 6.006 ( -0.48%) [ +0.08% +0.00% +0.41% / -0.48% +0.13% -0.10%] index_copy_ strided 7 : Elapsed 0.060 ms (6.040 ms / 100) 6.779 -> 6.700 ( -1.17%) [ +0.00% +0.55% +0.63% / +0.32% -1.17% -0.97%] index_add_ perm : Elapsed 0.068 ms (6.779 ms / 100) 6.091 -> 5.986 ( -1.72%) [ +0.00% +0.03% +0.30% / -1.05% -1.67% -1.72%] index_copy_ perm : Elapsed 0.061 ms (6.091 ms / 100) 6.668 -> 6.623 ( -0.67%) [ +0.00% +0.13% +0.36% / -0.28% -0.67% -0.51%] index_add_ perm_sorted : Elapsed 0.067 ms (6.668 ms / 100) 6.036 -> 5.970 ( -1.09%) [ +0.30% +0.00% +0.00% / -0.89% -0.98% -1.09%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.054 ms / 100) 10.918 -> 10.933 ( +0.14%) [ +0.05% +0.00% +0.09% / +0.14% +0.76% +0.67%] index_select const : Elapsed 0.109 ms (10.924 ms / 100) 11.550 -> 11.608 ( +0.50%) [ +0.75% +0.35% +0.00% / +0.50% +1.98% +1.88%] index_select wrap : Elapsed 0.116 ms (11.637 ms / 100) 11.165 -> 11.163 ( -0.02%) [ +0.03% +0.00% +0.01% / +0.07% -0.02% +0.12%] index_select linear : Elapsed 0.112 ms (11.168 ms / 100) 10.972 -> 10.983 ( +0.10%) [ +0.00% +0.00% +0.09% / +0.10% +1.76% +1.80%] index_select reverse : Elapsed 0.110 ms (10.972 ms / 100) 10.971 -> 10.988 ( +0.15%) [ +0.04% +0.05% +0.00% / +0.15% +0.42% +0.57%] index_select skip64 : Elapsed 0.110 ms (10.975 ms / 100) 10.892 -> 10.900 ( +0.07%) [ +0.06% +0.00% +0.17% / +0.07% +1.07% +1.13%] index_select skip256 : Elapsed 0.109 ms (10.898 ms / 100) 10.774 -> 10.795 ( +0.19%) [ +0.19% +0.10% +0.00% / +0.19% +0.62% +0.75%] index_select spread : Elapsed 0.108 ms (10.795 ms / 100) 11.664 -> 11.729 ( +0.56%) [ +0.00% +0.44% +0.60% / +0.56% +3.29% +2.89%] index_select strided 3 : Elapsed 0.117 ms (11.664 ms / 100) 10.690 -> 10.707 ( +0.16%) [ +0.06% +0.00% +0.04% / +0.16% +0.65% +0.68%] index_select strided 5 : Elapsed 0.107 ms (10.696 ms / 100) 11.636 -> 11.620 ( -0.14%) [ +0.21% +0.09% +0.00% / -0.14% +2.70% +2.60%] index_select strided 7 : Elapsed 0.117 ms (11.661 ms / 100) 11.215 -> 11.227 ( +0.11%) [ +0.00% +0.03% +0.16% / +0.11% +2.27% +2.20%] index_select strided 8 : Elapsed 0.112 ms (11.215 ms / 100) 11.151 -> 11.133 ( -0.16%) [ +0.02% +0.00% +0.22% / -0.16% +2.94% +2.42%] index_select strided 16 : Elapsed 0.112 ms (11.153 ms / 100) 11.135 -> 11.159 ( +0.22%) [ +0.00% +0.17% +0.06% / +0.22% +1.61% +1.95%] index_select random : Elapsed 0.111 ms (11.135 ms / 100) 10.720 -> 10.741 ( +0.20%) [ +0.04% +0.00% +0.07% / +0.20% +1.33% +1.39%] index_select random_sorted : Elapsed 0.107 ms (10.724 ms / 100) B = [150, 15, 250] (stride (15, 1, 2250)) A = [150, 15, 50] (stride (15, 1, 2250)) dim = 2 5.209 -> 5.204 ( -0.10%) [ +0.00% +0.31% +0.27% / -0.10% +0.36% +0.23%] index_add_ linear : Elapsed 0.052 ms (5.209 ms / 100) 5.070 -> 5.075 ( +0.10%) [ +0.00% +0.43% +0.20% / +0.10% +0.39% +0.49%] index_copy_ linear : Elapsed 0.051 ms (5.070 ms / 100) 5.240 -> 5.233 ( -0.13%) [ +0.17% +0.00% +0.57% / +0.11% +0.00% -0.13%] index_add_ reverse : Elapsed 0.052 ms (5.249 ms / 100) 5.085 -> 5.059 ( -0.51%) [ +0.00% +0.00% +0.28% / -0.10% -0.35% -0.51%] index_copy_ reverse : Elapsed 0.051 ms (5.085 ms / 100) 5.250 -> 5.227 ( -0.44%) [ +0.00% +0.13% +0.00% / -0.44% -0.32% -0.27%] index_add_ spread : Elapsed 0.052 ms (5.250 ms / 100) 5.087 -> 5.081 ( -0.12%) [ +0.00% +0.31% +0.06% / -0.12% +0.06% -0.08%] index_copy_ spread : Elapsed 0.051 ms (5.087 ms / 100) 5.258 -> 5.243 ( -0.29%) [ +0.00% +0.04% +0.04% / -0.27% -0.27% -0.29%] index_add_ strided 3 : Elapsed 0.053 ms (5.258 ms / 100) 5.090 -> 5.061 ( -0.57%) [ +0.08% +0.04% +0.00% / -0.43% -0.24% -0.57%] index_copy_ strided 3 : Elapsed 0.051 ms (5.094 ms / 100) 5.277 -> 5.226 ( -0.97%) [ +0.00% +0.19% +0.19% / -0.04% -0.91% -0.97%] index_add_ strided 7 : Elapsed 0.053 ms (5.277 ms / 100) 5.097 -> 5.060 ( -0.73%) [ +0.00% +0.08% +0.20% / +0.08% -0.61% -0.73%] index_copy_ strided 7 : Elapsed 0.051 ms (5.097 ms / 100) 5.269 -> 5.241 ( -0.53%) [ +0.00% +0.08% +0.93% / -0.02% -0.42% -0.53%] index_add_ perm : Elapsed 0.053 ms (5.269 ms / 100) 5.095 -> 5.062 ( -0.65%) [ +0.06% +0.00% +0.49% / +0.02% -0.65% -0.63%] index_copy_ perm : Elapsed 0.051 ms (5.098 ms / 100) 5.260 -> 5.243 ( -0.32%) [ +0.13% +0.00% +0.02% / -0.32% -0.10% -0.30%] index_add_ perm_sorted : Elapsed 0.053 ms (5.267 ms / 100) 5.085 -> 5.077 ( -0.16%) [ +0.20% +0.16% +0.00% / -0.16% -0.06% -0.02%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.095 ms / 100) 6.875 -> 6.906 ( +0.45%) [ +0.36% +0.00% +0.23% / +0.45% +0.65% +0.57%] index_select const : Elapsed 0.069 ms (6.900 ms / 100) 7.610 -> 7.626 ( +0.21%) [ +0.16% +0.00% +0.25% / +0.33% +0.21% +0.42%] index_select wrap : Elapsed 0.076 ms (7.622 ms / 100) 7.061 -> 7.087 ( +0.37%) [ +0.31% +0.00% +0.17% / +0.37% +0.44% +0.64%] index_select linear : Elapsed 0.071 ms (7.083 ms / 100) 7.179 -> 7.194 ( +0.21%) [ +0.49% +0.00% +0.08% / +0.22% +0.28% +0.21%] index_select reverse : Elapsed 0.072 ms (7.214 ms / 100) 6.886 -> 6.902 ( +0.23%) [ +0.49% +0.00% +0.09% / +0.38% +0.35% +0.23%] index_select skip64 : Elapsed 0.069 ms (6.920 ms / 100) 6.884 -> 6.893 ( +0.13%) [ +0.22% +0.06% +0.00% / +0.13% +0.62% +0.70%] index_select skip256 : Elapsed 0.069 ms (6.899 ms / 100) 7.194 -> 7.209 ( +0.21%) [ +0.28% +0.00% +0.14% / +0.21% +0.29% +0.44%] index_select spread : Elapsed 0.072 ms (7.214 ms / 100) 7.638 -> 7.634 ( -0.05%) [ +0.21% +0.00% +0.00% / +0.27% +0.05% -0.05%] index_select strided 3 : Elapsed 0.077 ms (7.654 ms / 100) 7.010 -> 7.028 ( +0.26%) [ +0.43% +0.16% +0.00% / +0.26% +0.47% +0.37%] index_select strided 5 : Elapsed 0.070 ms (7.040 ms / 100) 7.625 -> 7.641 ( +0.21%) [ +0.10% +0.01% +0.00% / +0.26% +0.21% +0.22%] index_select strided 7 : Elapsed 0.076 ms (7.633 ms / 100) 7.313 -> 7.282 ( -0.42%) [ +0.15% +0.00% +0.26% / +0.31% -0.36% -0.42%] index_select strided 8 : Elapsed 0.073 ms (7.324 ms / 100) 7.318 -> 7.299 ( -0.26%) [ +0.14% +0.10% +0.00% / +0.03% -0.26% -0.26%] index_select strided 16 : Elapsed 0.073 ms (7.328 ms / 100) 7.457 -> 7.459 ( +0.03%) [ +0.19% +0.00% +0.09% / +0.03% +0.34% +0.34%] index_select random : Elapsed 0.075 ms (7.471 ms / 100) 7.194 -> 7.207 ( +0.18%) [ +0.21% +0.03% +0.00% / +0.18% +0.35% +0.40%] index_select random_sorted : Elapsed 0.072 ms (7.209 ms / 100) B = [150, 15, 250] (stride (1, 150, 2250)) A = [150, 15, 50] (stride (1, 7500, 150)) dim = 2 6.233 -> 6.207 ( -0.42%) [ +0.00% +0.14% +0.00% / -0.42% +0.69% +0.59%] index_add_ linear : Elapsed 0.062 ms (6.233 ms / 100) 5.924 -> 5.895 ( -0.49%) [ +0.19% +0.12% +0.00% / -0.49% +0.20% -0.05%] index_copy_ linear : Elapsed 0.059 ms (5.935 ms / 100) 6.262 -> 6.251 ( -0.18%) [ +0.32% +0.21% +0.00% / -0.18% -0.05% +0.14%] index_add_ reverse : Elapsed 0.063 ms (6.282 ms / 100) 5.928 -> 5.895 ( -0.56%) [ +0.24% +0.30% +0.00% / -0.56% +0.08% +0.25%] index_copy_ reverse : Elapsed 0.059 ms (5.942 ms / 100) 6.326 -> 6.354 ( +0.44%) [ +0.00% +0.25% +0.36% / +0.44% +0.82% +1.20%] index_add_ spread : Elapsed 0.063 ms (6.326 ms / 100) 5.938 -> 5.889 ( -0.83%) [ +0.03% +0.20% +0.00% / -0.83% +0.25% +0.29%] index_copy_ spread : Elapsed 0.059 ms (5.940 ms / 100) 6.410 -> 6.313 ( -1.51%) [ +0.08% +0.02% +0.00% / -0.51% -1.51% -1.42%] index_add_ strided 3 : Elapsed 0.064 ms (6.415 ms / 100) 5.980 -> 5.944 ( -0.60%) [ +0.45% +0.23% +0.00% / -0.54% -0.60% -0.48%] index_copy_ strided 3 : Elapsed 0.060 ms (6.007 ms / 100) 6.392 -> 6.370 ( -0.34%) [ +0.16% +0.00% +0.28% / -0.34% +0.13% +0.17%] index_add_ strided 7 : Elapsed 0.064 ms (6.402 ms / 100) 5.955 -> 5.916 ( -0.65%) [ +0.07% +0.00% +0.32% / -0.65% -0.57% -0.13%] index_copy_ strided 7 : Elapsed 0.060 ms (5.959 ms / 100) 6.430 -> 6.411 ( -0.30%) [ +0.00% +0.26% +0.09% / -0.30% +0.76% +0.70%] index_add_ perm : Elapsed 0.064 ms (6.430 ms / 100) 5.986 -> 5.945 ( -0.68%) [ +0.12% +0.17% +0.00% / -0.68% -0.38% -0.32%] index_copy_ perm : Elapsed 0.060 ms (5.993 ms / 100) 6.376 -> 6.289 ( -1.36%) [ +0.38% +0.08% +0.00% / -0.17% -1.14% -1.36%] index_add_ perm_sorted : Elapsed 0.064 ms (6.400 ms / 100) 5.957 -> 5.916 ( -0.69%) [ +0.42% +0.00% +0.35% / -0.30% -0.22% -0.69%] index_copy_ perm_sorted : Elapsed 0.060 ms (5.982 ms / 100) 10.757 -> 10.781 ( +0.22%) [ +0.00% +0.14% +0.10% / +0.22% +0.58% +0.58%] index_select const : Elapsed 0.108 ms (10.757 ms / 100) 11.735 -> 11.708 ( -0.23%) [ +0.18% +0.00% +0.18% / -0.23% +1.82% +1.91%] index_select wrap : Elapsed 0.118 ms (11.756 ms / 100) 10.929 -> 10.911 ( -0.16%) [ +0.05% +0.02% +0.00% / +0.00% -0.16% -0.12%] index_select linear : Elapsed 0.109 ms (10.934 ms / 100) 10.633 -> 10.644 ( +0.10%) [ +0.00% +0.06% +0.00% / +0.10% +0.38% +0.44%] index_select reverse : Elapsed 0.106 ms (10.633 ms / 100) 10.753 -> 10.754 ( +0.01%) [ +0.18% +0.00% +0.10% / +0.01% +0.60% +0.60%] index_select skip64 : Elapsed 0.108 ms (10.772 ms / 100) 11.041 -> 11.052 ( +0.10%) [ +0.05% +0.00% +0.03% / +0.10% +0.33% +0.28%] index_select skip256 : Elapsed 0.110 ms (11.047 ms / 100) 10.497 -> 10.470 ( -0.26%) [ +0.20% +0.00% +0.24% / +0.06% -0.26% -0.16%] index_select spread : Elapsed 0.105 ms (10.518 ms / 100) 12.058 -> 12.034 ( -0.20%) [ +0.00% +0.04% +0.25% / +0.10% -0.20% -0.02%] index_select strided 3 : Elapsed 0.121 ms (12.058 ms / 100) 10.519 -> 10.523 ( +0.04%) [ +0.01% +0.06% +0.00% / +0.04% +0.30% +0.31%] index_select strided 5 : Elapsed 0.105 ms (10.520 ms / 100) 11.659 -> 11.654 ( -0.04%) [ +0.00% +0.55% +0.54% / -0.04% +1.80% +1.44%] index_select strided 7 : Elapsed 0.117 ms (11.659 ms / 100) 11.283 -> 11.154 ( -1.14%) [ +0.00% +0.04% +0.10% / +0.12% -1.14% -1.09%] index_select strided 8 : Elapsed 0.113 ms (11.283 ms / 100) 11.241 -> 11.053 ( -1.67%) [ +0.00% +0.12% +0.24% / -0.28% -1.62% -1.67%] index_select strided 16 : Elapsed 0.112 ms (11.241 ms / 100) 11.117 -> 11.151 ( +0.31%) [ +0.31% +0.00% +0.22% / +0.31% +0.72% +1.16%] index_select random : Elapsed 0.112 ms (11.152 ms / 100) 10.449 -> 10.464 ( +0.14%) [ +0.08% +0.00% +0.12% / +0.14% +0.42% +0.40%] index_select random_sorted : Elapsed 0.105 ms (10.457 ms / 100) out_shape = [250, 50, 15] in_shape = [150, 50, 15] idx_dim = 0 B = [250, 50, 15] (stride (750, 15, 1)) A = [150, 50, 15] (stride (15, 2250, 1)) dim = 0 9.473 -> 9.464 ( -0.10%) [ +0.11% +0.03% +0.00% / +0.03% -0.10% +0.11%] index_add_ linear : Elapsed 0.095 ms (9.483 ms / 100) 9.351 -> 9.346 ( -0.05%) [ +0.04% +0.01% +0.00% / +0.03% -0.05% +0.07%] index_copy_ linear : Elapsed 0.094 ms (9.355 ms / 100) 9.509 -> 9.500 ( -0.09%) [ +0.00% +0.12% +0.08% / +0.13% -0.01% -0.09%] index_add_ reverse : Elapsed 0.095 ms (9.509 ms / 100) 9.373 -> 9.362 ( -0.12%) [ +0.01% +0.07% +0.00% / +0.22% -0.12% -0.06%] index_copy_ reverse : Elapsed 0.094 ms (9.374 ms / 100) 9.470 -> 9.474 ( +0.04%) [ +0.08% +0.00% +0.20% / +0.04% +0.91% +0.97%] index_add_ spread : Elapsed 0.095 ms (9.478 ms / 100) 9.336 -> 9.326 ( -0.11%) [ +0.05% +0.00% +0.16% / -0.11% +0.62% +0.82%] index_copy_ spread : Elapsed 0.093 ms (9.341 ms / 100) 9.522 -> 9.524 ( +0.02%) [ +0.14% +0.04% +0.00% / +0.03% +0.02% +0.21%] index_add_ strided 3 : Elapsed 0.095 ms (9.535 ms / 100) 9.371 -> 9.366 ( -0.05%) [ +0.00% +0.06% +0.06% / -0.05% +0.04% +0.10%] index_copy_ strided 3 : Elapsed 0.094 ms (9.371 ms / 100) 9.534 -> 9.550 ( +0.17%) [ +0.01% +0.00% +0.15% / +0.17% +0.22% +0.33%] index_add_ strided 7 : Elapsed 0.095 ms (9.535 ms / 100) 9.386 -> 9.377 ( -0.10%) [ +0.04% +0.14% +0.00% / -0.10% +0.01% +0.31%] index_copy_ strided 7 : Elapsed 0.094 ms (9.390 ms / 100) 9.581 -> 9.588 ( +0.07%) [ +0.04% +0.00% +0.08% / +0.14% +0.21% +0.07%] index_add_ perm : Elapsed 0.096 ms (9.585 ms / 100) 9.399 -> 9.392 ( -0.07%) [ +0.13% +0.00% +0.19% / +0.17% -0.07% +0.18%] index_copy_ perm : Elapsed 0.094 ms (9.411 ms / 100) 9.530 -> 9.520 ( -0.10%) [ +0.00% +0.05% +0.12% / -0.09% -0.10% -0.05%] index_add_ perm_sorted : Elapsed 0.095 ms (9.530 ms / 100) 9.385 -> 9.359 ( -0.28%) [ +0.00% +0.05% +0.15% / -0.05% -0.28% -0.07%] index_copy_ perm_sorted : Elapsed 0.094 ms (9.385 ms / 100) 10.213 -> 10.228 ( +0.15%) [ +0.27% +0.14% +0.00% / +0.15% +0.31% +0.29%] index_select const : Elapsed 0.102 ms (10.241 ms / 100) 11.121 -> 11.122 ( +0.01%) [ +0.02% +0.00% +0.09% / +0.01% +0.16% +0.22%] index_select wrap : Elapsed 0.111 ms (11.123 ms / 100) 10.803 -> 10.817 ( +0.13%) [ +0.31% +0.13% +0.00% / +0.30% +0.14% +0.13%] index_select linear : Elapsed 0.108 ms (10.837 ms / 100) 10.815 -> 10.847 ( +0.30%) [ +0.15% +0.00% +0.06% / +0.30% +0.69% +0.64%] index_select reverse : Elapsed 0.108 ms (10.831 ms / 100) 10.237 -> 10.268 ( +0.30%) [ +0.25% +0.16% +0.00% / +0.30% +0.59% +0.48%] index_select skip64 : Elapsed 0.103 ms (10.263 ms / 100) 10.217 -> 10.233 ( +0.16%) [ +0.17% +0.00% +0.00% / +0.16% +0.20% +0.39%] index_select skip256 : Elapsed 0.102 ms (10.234 ms / 100) 10.803 -> 10.837 ( +0.31%) [ +0.21% +0.17% +0.00% / +0.31% +0.59% +0.44%] index_select spread : Elapsed 0.108 ms (10.826 ms / 100) 11.324 -> 11.349 ( +0.22%) [ +0.07% +0.00% +0.00% / +0.22% +0.50% +0.49%] index_select strided 3 : Elapsed 0.113 ms (11.332 ms / 100) 11.006 -> 11.060 ( +0.49%) [ +0.23% +0.00% +0.17% / +0.49% +0.65% +0.64%] index_select strided 5 : Elapsed 0.110 ms (11.031 ms / 100) 11.508 -> 11.530 ( +0.19%) [ +0.17% +0.00% +0.03% / +0.19% +0.77% +0.56%] index_select strided 7 : Elapsed 0.115 ms (11.527 ms / 100) 11.396 -> 11.407 ( +0.10%) [ +0.04% +0.00% +0.12% / +0.10% +0.51% +0.53%] index_select strided 8 : Elapsed 0.114 ms (11.400 ms / 100) 11.417 -> 11.445 ( +0.25%) [ +0.00% +0.23% +0.07% / +0.25% +0.46% +0.25%] index_select strided 16 : Elapsed 0.114 ms (11.417 ms / 100) 11.331 -> 11.359 ( +0.25%) [ +0.07% +0.00% +0.03% / +0.25% +0.96% +0.92%] index_select strided 64 : Elapsed 0.113 ms (11.339 ms / 100) 10.278 -> 10.278 ( +0.00%) [ +0.18% +0.17% +0.00% / +0.35% +0.11% +0.00%] index_select strided 100 : Elapsed 0.103 ms (10.296 ms / 100) 11.250 -> 11.256 ( +0.05%) [ +0.01% +0.00% +0.12% / +0.05% +0.67% +0.61%] index_select random : Elapsed 0.113 ms (11.251 ms / 100) 10.792 -> 10.782 ( -0.09%) [ +0.00% +0.09% +0.02% / -0.09% +0.06% +0.13%] index_select random_sorted : Elapsed 0.108 ms (10.792 ms / 100) B = [250, 50, 15] (stride (750, 15, 1)) A = [150, 50, 15] (stride (50, 1, 7500)) dim = 0 9.417 -> 9.402 ( -0.16%) [ +0.12% +0.00% +0.28% / +0.38% -0.01% -0.16%] index_add_ linear : Elapsed 0.094 ms (9.428 ms / 100) 9.329 -> 9.310 ( -0.20%) [ +0.03% +0.12% +0.00% / +0.18% -0.20% -0.09%] index_copy_ linear : Elapsed 0.093 ms (9.332 ms / 100) 9.471 -> 9.448 ( -0.24%) [ +0.26% +0.00% +0.16% / +0.00% -0.24% -0.01%] index_add_ reverse : Elapsed 0.095 ms (9.496 ms / 100) 9.328 -> 9.339 ( +0.12%) [ +0.36% +0.32% +0.00% / +0.20% +0.12% +0.17%] index_copy_ reverse : Elapsed 0.094 ms (9.362 ms / 100) 9.438 -> 9.470 ( +0.34%) [ +0.20% +0.00% +0.22% / +0.34% +0.72% +0.48%] index_add_ spread : Elapsed 0.095 ms (9.457 ms / 100) 9.313 -> 9.328 ( +0.16%) [ +0.19% +0.02% +0.00% / +0.16% +0.77% +0.35%] index_copy_ spread : Elapsed 0.093 ms (9.331 ms / 100) 9.464 -> 9.481 ( +0.18%) [ +0.13% +0.00% +0.03% / +0.24% +0.18% +0.44%] index_add_ strided 3 : Elapsed 0.095 ms (9.476 ms / 100) 9.330 -> 9.351 ( +0.23%) [ +0.06% +0.20% +0.00% / +0.27% +0.23% +0.34%] index_copy_ strided 3 : Elapsed 0.093 ms (9.336 ms / 100) 9.505 -> 9.502 ( -0.03%) [ +0.09% +0.02% +0.00% / +0.37% -0.03% +0.21%] index_add_ strided 7 : Elapsed 0.095 ms (9.514 ms / 100) 9.367 -> 9.369 ( +0.02%) [ +0.11% +0.00% +0.12% / +0.31% +0.02% +0.06%] index_copy_ strided 7 : Elapsed 0.094 ms (9.377 ms / 100) 9.521 -> 9.533 ( +0.13%) [ +0.24% +0.00% +0.14% / +0.22% +0.20% +0.13%] index_add_ perm : Elapsed 0.095 ms (9.544 ms / 100) 9.388 -> 9.369 ( -0.20%) [ +0.00% +0.05% +0.04% / +0.16% -0.20% -0.05%] index_copy_ perm : Elapsed 0.094 ms (9.388 ms / 100) 9.457 -> 9.455 ( -0.02%) [ +0.00% +0.03% +0.13% / +0.40% -0.01% -0.02%] index_add_ perm_sorted : Elapsed 0.095 ms (9.457 ms / 100) 9.349 -> 9.333 ( -0.17%) [ +0.15% +0.00% +0.09% / +0.31% -0.17% -0.11%] index_copy_ perm_sorted : Elapsed 0.094 ms (9.363 ms / 100) 10.238 -> 10.272 ( +0.33%) [ +0.19% +0.00% +0.05% / +0.33% +0.38% +0.40%] index_select const : Elapsed 0.103 ms (10.257 ms / 100) 11.192 -> 11.155 ( -0.33%) [ +0.26% +0.06% +0.00% / +0.07% -0.18% -0.33%] index_select wrap : Elapsed 0.112 ms (11.221 ms / 100) 10.869 -> 10.880 ( +0.10%) [ +0.06% +0.06% +0.00% / +0.13% +0.10% +0.23%] index_select linear : Elapsed 0.109 ms (10.875 ms / 100) 10.919 -> 10.919 ( +0.00%) [ +0.00% +0.04% +0.05% / +0.27% +0.00% +0.16%] index_select reverse : Elapsed 0.109 ms (10.919 ms / 100) 10.261 -> 10.280 ( +0.19%) [ +0.04% +0.00% +0.03% / +0.19% +0.28% +0.43%] index_select skip64 : Elapsed 0.103 ms (10.265 ms / 100) 10.242 -> 10.258 ( +0.16%) [ +0.05% +0.00% +0.12% / +0.16% +0.44% +0.32%] index_select skip256 : Elapsed 0.102 ms (10.247 ms / 100) 10.859 -> 10.827 ( -0.29%) [ +0.15% +0.05% +0.00% / +0.28% -0.15% -0.29%] index_select spread : Elapsed 0.109 ms (10.875 ms / 100) 11.144 -> 11.113 ( -0.28%) [ +0.12% +0.19% +0.00% / +0.17% -0.28% -0.20%] index_select strided 3 : Elapsed 0.112 ms (11.157 ms / 100) 10.676 -> 10.698 ( +0.21%) [ +0.21% +0.00% +0.09% / +0.21% +0.51% +0.51%] index_select strided 5 : Elapsed 0.107 ms (10.698 ms / 100) 11.565 -> 11.532 ( -0.29%) [ +0.18% +0.00% +0.08% / +0.19% -0.19% -0.29%] index_select strided 7 : Elapsed 0.116 ms (11.586 ms / 100) 11.432 -> 11.388 ( -0.38%) [ +0.27% +0.00% +0.10% / +0.08% -0.38% -0.13%] index_select strided 8 : Elapsed 0.115 ms (11.463 ms / 100) 11.411 -> 11.414 ( +0.03%) [ +0.28% +0.00% +0.02% / +0.05% +0.04% +0.03%] index_select strided 16 : Elapsed 0.114 ms (11.443 ms / 100) 11.335 -> 11.339 ( +0.04%) [ +0.00% +0.05% +0.11% / +0.26% +0.04% +0.06%] index_select strided 64 : Elapsed 0.113 ms (11.335 ms / 100) 10.271 -> 10.290 ( +0.18%) [ +0.13% +0.07% +0.00% / +0.18% +0.39% +0.32%] index_select strided 100 : Elapsed 0.103 ms (10.284 ms / 100) 11.286 -> 11.214 ( -0.64%) [ +0.19% +0.13% +0.00% / +0.25% -0.64% -0.49%] index_select random : Elapsed 0.113 ms (11.308 ms / 100) 10.773 -> 10.785 ( +0.11%) [ +0.24% +0.00% +0.08% / +0.19% +0.11% +0.16%] index_select random_sorted : Elapsed 0.108 ms (10.799 ms / 100) B = [250, 50, 15] (stride (750, 15, 1)) A = [150, 50, 15] (stride (1, 150, 7500)) dim = 0 9.720 -> 9.717 ( -0.03%) [ +0.00% +0.04% +0.02% / -0.03% +0.35% +0.23%] index_add_ linear : Elapsed 0.097 ms (9.720 ms / 100) 9.609 -> 9.601 ( -0.08%) [ +0.12% +0.00% +0.09% / -0.08% +0.27% +0.07%] index_copy_ linear : Elapsed 0.096 ms (9.621 ms / 100) 9.750 -> 9.717 ( -0.34%) [ +0.16% +0.00% +0.24% / -0.34% -0.08% +0.01%] index_add_ reverse : Elapsed 0.098 ms (9.766 ms / 100) 9.614 -> 9.591 ( -0.24%) [ +0.00% +0.01% +0.02% / -0.24% +0.03% +0.07%] index_copy_ reverse : Elapsed 0.096 ms (9.614 ms / 100) 9.739 -> 9.709 ( -0.31%) [ +0.01% +0.00% +0.05% / -0.31% +0.80% +0.97%] index_add_ spread : Elapsed 0.097 ms (9.740 ms / 100) 9.619 -> 9.594 ( -0.26%) [ +0.00% +0.02% +0.03% / -0.26% +0.48% +0.48%] index_copy_ spread : Elapsed 0.096 ms (9.619 ms / 100) 9.752 -> 9.733 ( -0.19%) [ +0.06% +0.00% +0.00% / -0.19% +0.33% +0.68%] index_add_ strided 3 : Elapsed 0.098 ms (9.758 ms / 100) 9.605 -> 9.599 ( -0.06%) [ +0.21% +0.00% +0.01% / -0.06% +0.36% +0.70%] index_copy_ strided 3 : Elapsed 0.096 ms (9.625 ms / 100) 9.800 -> 9.795 ( -0.05%) [ +0.12% +0.21% +0.00% / -0.05% -0.01% +0.01%] index_add_ strided 7 : Elapsed 0.098 ms (9.812 ms / 100) 9.638 -> 9.634 ( -0.04%) [ +0.15% +0.00% +0.27% / -0.04% +0.17% +0.22%] index_copy_ strided 7 : Elapsed 0.097 ms (9.652 ms / 100) 9.857 -> 9.843 ( -0.14%) [ +0.39% +0.00% +0.08% / +0.02% -0.14% +0.09%] index_add_ perm : Elapsed 0.099 ms (9.895 ms / 100) 9.704 -> 9.669 ( -0.36%) [ +0.18% +0.11% +0.00% / -0.24% -0.36% -0.19%] index_copy_ perm : Elapsed 0.097 ms (9.721 ms / 100) 9.780 -> 9.738 ( -0.43%) [ +0.00% +0.21% +0.02% / -0.43% +0.03% +0.08%] index_add_ perm_sorted : Elapsed 0.098 ms (9.780 ms / 100) 9.638 -> 9.597 ( -0.43%) [ +0.01% +0.07% +0.00% / -0.43% +0.00% -0.21%] index_copy_ perm_sorted : Elapsed 0.096 ms (9.639 ms / 100) 10.517 -> 10.533 ( +0.15%) [ +0.17% +0.00% +0.03% / +0.15% +0.24% +0.27%] index_select const : Elapsed 0.105 ms (10.535 ms / 100) 11.595 -> 11.602 ( +0.06%) [ +0.09% +0.09% +0.00% / +0.06% +0.21% +0.41%] index_select wrap : Elapsed 0.116 ms (11.606 ms / 100) 11.250 -> 11.266 ( +0.14%) [ +0.00% +0.05% +0.04% / +0.14% +0.39% +0.42%] index_select linear : Elapsed 0.113 ms (11.250 ms / 100) 11.310 -> 11.319 ( +0.08%) [ +0.16% +0.00% +0.19% / +0.09% +0.08% +0.21%] index_select reverse : Elapsed 0.113 ms (11.328 ms / 100) 10.685 -> 10.710 ( +0.23%) [ +0.19% +0.00% +0.08% / +0.23% +0.44% +0.39%] index_select skip64 : Elapsed 0.107 ms (10.705 ms / 100) 10.517 -> 10.532 ( +0.14%) [ +0.28% +0.00% +0.14% / +0.29% +0.22% +0.14%] index_select skip256 : Elapsed 0.105 ms (10.546 ms / 100) 11.273 -> 11.264 ( -0.08%) [ +0.01% +0.00% +0.04% / -0.08% +0.16% +0.36%] index_select spread : Elapsed 0.113 ms (11.274 ms / 100) 12.239 -> 12.228 ( -0.09%) [ +0.08% +0.00% +0.01% / +0.07% -0.08% -0.09%] index_select strided 3 : Elapsed 0.122 ms (12.249 ms / 100) 11.968 -> 12.002 ( +0.28%) [ +0.08% +0.03% +0.00% / +0.28% +0.31% +0.30%] index_select strided 5 : Elapsed 0.120 ms (11.977 ms / 100) 11.955 -> 11.970 ( +0.13%) [ +0.08% +0.00% +0.08% / +0.19% +0.13% +0.13%] index_select strided 7 : Elapsed 0.120 ms (11.965 ms / 100) 11.956 -> 11.965 ( +0.08%) [ +0.00% +0.11% +0.17% / +0.17% +0.30% +0.08%] index_select strided 8 : Elapsed 0.120 ms (11.956 ms / 100) 11.911 -> 11.922 ( +0.09%) [ +0.14% +0.00% +0.12% / +0.09% +0.16% +0.24%] index_select strided 16 : Elapsed 0.119 ms (11.928 ms / 100) 12.018 -> 12.014 ( -0.03%) [ +0.03% +0.04% +0.00% / +0.16% +0.20% -0.03%] index_select strided 64 : Elapsed 0.120 ms (12.022 ms / 100) 11.097 -> 11.078 ( -0.17%) [ +0.12% +0.00% +0.03% / +0.14% -0.10% -0.17%] index_select strided 100 : Elapsed 0.111 ms (11.110 ms / 100) 12.049 -> 12.077 ( +0.23%) [ +0.17% +0.07% +0.00% / +0.23% +0.25% +0.34%] index_select random : Elapsed 0.121 ms (12.069 ms / 100) 11.245 -> 11.280 ( +0.31%) [ +0.12% +0.00% +0.13% / +0.31% +0.33% +0.43%] index_select random_sorted : Elapsed 0.113 ms (11.258 ms / 100) B = [250, 50, 15] (stride (750, 1, 50)) A = [150, 50, 15] (stride (750, 15, 1)) dim = 0 9.233 -> 9.210 ( -0.25%) [ +0.17% +0.00% +0.05% / +0.04% -0.18% -0.25%] index_add_ linear : Elapsed 0.092 ms (9.249 ms / 100) 9.087 -> 9.073 ( -0.15%) [ +0.00% +0.14% +0.28% / +0.04% -0.15% -0.02%] index_copy_ linear : Elapsed 0.091 ms (9.087 ms / 100) 9.227 -> 9.220 ( -0.08%) [ +0.21% +0.00% +0.10% / -0.08% +0.23% +0.13%] index_add_ reverse : Elapsed 0.092 ms (9.246 ms / 100) 9.092 -> 9.089 ( -0.03%) [ +0.10% +0.00% +0.14% / -0.03% +0.24% +0.25%] index_copy_ reverse : Elapsed 0.091 ms (9.101 ms / 100) 9.223 -> 9.226 ( +0.03%) [ +0.03% +0.00% +0.18% / +0.03% +0.95% +0.66%] index_add_ spread : Elapsed 0.092 ms (9.226 ms / 100) 9.075 -> 9.078 ( +0.03%) [ +0.12% +0.00% +0.11% / +0.03% +0.89% +0.66%] index_copy_ spread : Elapsed 0.091 ms (9.086 ms / 100) 9.220 -> 9.222 ( +0.02%) [ +0.27% +0.00% +0.17% / +0.02% +1.00% +0.94%] index_add_ strided 3 : Elapsed 0.092 ms (9.245 ms / 100) 9.081 -> 9.085 ( +0.04%) [ +0.00% +0.10% +0.11% / +0.04% +0.48% +0.57%] index_copy_ strided 3 : Elapsed 0.091 ms (9.081 ms / 100) 9.288 -> 9.294 ( +0.06%) [ +0.00% +0.15% +0.08% / +0.06% +0.23% +0.28%] index_add_ strided 7 : Elapsed 0.093 ms (9.288 ms / 100) 9.111 -> 9.122 ( +0.12%) [ +0.04% +0.15% +0.00% / +0.12% +0.46% +0.26%] index_copy_ strided 7 : Elapsed 0.091 ms (9.115 ms / 100) 9.337 -> 9.303 ( -0.36%) [ +0.15% +0.00% +0.06% / +0.37% -0.36% -0.15%] index_add_ perm : Elapsed 0.094 ms (9.351 ms / 100) 9.117 -> 9.124 ( +0.08%) [ +0.32% +0.00% +0.26% / +0.37% +0.08% +0.23%] index_copy_ perm : Elapsed 0.091 ms (9.146 ms / 100) 9.251 -> 9.236 ( -0.16%) [ +0.12% +0.12% +0.00% / -0.16% +0.62% +0.72%] index_add_ perm_sorted : Elapsed 0.093 ms (9.262 ms / 100) 9.101 -> 9.087 ( -0.15%) [ +0.10% +0.00% +0.08% / -0.15% +0.43% +0.49%] index_copy_ perm_sorted : Elapsed 0.091 ms (9.110 ms / 100) 10.330 -> 10.314 ( -0.15%) [ +0.15% +0.00% +0.03% / +0.11% -0.15% +0.00%] index_select const : Elapsed 0.103 ms (10.345 ms / 100) 10.799 -> 10.819 ( +0.19%) [ +0.24% +0.12% +0.00% / +0.19% +0.33% +0.27%] index_select wrap : Elapsed 0.108 ms (10.825 ms / 100) 10.588 -> 10.601 ( +0.12%) [ +0.22% +0.15% +0.00% / +0.35% +0.12% +0.14%] index_select linear : Elapsed 0.106 ms (10.611 ms / 100) 10.698 -> 10.711 ( +0.12%) [ +0.00% +0.03% +0.05% / +0.17% +0.16% +0.12%] index_select reverse : Elapsed 0.107 ms (10.698 ms / 100) 10.330 -> 10.338 ( +0.08%) [ +0.24% +0.00% +0.27% / +0.39% +0.08% +0.12%] index_select skip64 : Elapsed 0.104 ms (10.355 ms / 100) 10.327 -> 10.323 ( -0.04%) [ +0.10% +0.00% +0.07% / +0.30% +0.01% -0.04%] index_select skip256 : Elapsed 0.103 ms (10.337 ms / 100) 10.681 -> 10.675 ( -0.06%) [ +0.38% +0.00% +0.07% / +0.21% -0.04% -0.06%] index_select spread : Elapsed 0.107 ms (10.722 ms / 100) 10.671 -> 10.672 ( +0.01%) [ +0.23% +0.00% +0.05% / +0.01% +0.79% +0.73%] index_select strided 3 : Elapsed 0.107 ms (10.696 ms / 100) 10.652 -> 10.613 ( -0.37%) [ +0.16% +0.12% +0.00% / +0.18% -0.33% -0.37%] index_select strided 5 : Elapsed 0.107 ms (10.669 ms / 100) 10.905 -> 10.901 ( -0.04%) [ +0.30% +0.02% +0.00% / +0.13% -0.04% +0.01%] index_select strided 7 : Elapsed 0.109 ms (10.938 ms / 100) 10.814 -> 10.775 ( -0.36%) [ +0.22% +0.08% +0.00% / +0.21% -0.36% -0.31%] index_select strided 8 : Elapsed 0.108 ms (10.838 ms / 100) 10.827 -> 10.785 ( -0.39%) [ +0.34% +0.00% +0.05% / +0.15% -0.39% -0.38%] index_select strided 16 : Elapsed 0.109 ms (10.864 ms / 100) 10.802 -> 10.761 ( -0.38%) [ +0.06% +0.00% +0.14% / +0.16% -0.38% -0.15%] index_select strided 64 : Elapsed 0.108 ms (10.808 ms / 100) 10.342 -> 10.348 ( +0.06%) [ +0.07% +0.00% +0.10% / +0.08% +0.14% +0.06%] index_select strided 100 : Elapsed 0.103 ms (10.349 ms / 100) 10.780 -> 10.779 ( -0.01%) [ +0.04% +0.00% +0.07% / -0.01% +0.07% +0.04%] index_select random : Elapsed 0.108 ms (10.784 ms / 100) 10.575 -> 10.604 ( +0.27%) [ +0.17% +0.00% +0.21% / +0.43% +0.40% +0.27%] index_select random_sorted : Elapsed 0.106 ms (10.593 ms / 100) B = [250, 50, 15] (stride (50, 1, 12500)) A = [150, 50, 15] (stride (15, 2250, 1)) dim = 0 6.547 -> 6.500 ( -0.72%) [ +0.08% +0.00% +0.09% / -0.17% -0.64% -0.72%] index_add_ linear : Elapsed 0.066 ms (6.552 ms / 100) 5.897 -> 5.887 ( -0.17%) [ +0.00% +0.14% +0.20% / -0.17% +0.46% +0.36%] index_copy_ linear : Elapsed 0.059 ms (5.897 ms / 100) 6.543 -> 6.529 ( -0.21%) [ +0.12% +0.00% +0.43% / +0.06% -0.21% +0.00%] index_add_ reverse : Elapsed 0.066 ms (6.551 ms / 100) 5.924 -> 5.876 ( -0.81%) [ +0.15% +0.00% +0.29% / -0.81% -0.44% -0.32%] index_copy_ reverse : Elapsed 0.059 ms (5.933 ms / 100) 6.612 -> 6.623 ( +0.17%) [ +0.12% +0.00% +0.27% / +0.17% +0.44% +0.70%] index_add_ spread : Elapsed 0.066 ms (6.620 ms / 100) 6.023 -> 5.994 ( -0.48%) [ +0.37% +0.25% +0.00% / -0.48% +0.56% +0.38%] index_copy_ spread : Elapsed 0.060 ms (6.045 ms / 100) 6.696 -> 6.680 ( -0.24%) [ +0.24% +0.37% +0.00% / -0.04% -0.24% -0.03%] index_add_ strided 3 : Elapsed 0.067 ms (6.712 ms / 100) 6.078 -> 6.055 ( -0.38%) [ +0.00% +0.35% +0.05% / -0.18% -0.25% -0.38%] index_copy_ strided 3 : Elapsed 0.061 ms (6.078 ms / 100) 6.753 -> 6.735 ( -0.27%) [ +0.09% +0.00% +0.31% / -0.27% -0.10% -0.06%] index_add_ strided 7 : Elapsed 0.068 ms (6.759 ms / 100) 6.116 -> 6.085 ( -0.51%) [ +0.39% +0.00% +0.69% / -0.51% -0.36% -0.36%] index_copy_ strided 7 : Elapsed 0.061 ms (6.140 ms / 100) 6.626 -> 6.631 ( +0.08%) [ +0.00% +0.17% +0.47% / +0.08% +1.19% +0.83%] index_add_ perm : Elapsed 0.066 ms (6.626 ms / 100) 6.004 -> 5.969 ( -0.58%) [ +0.37% +0.00% +0.35% / -0.58% +0.27% +0.12%] index_copy_ perm : Elapsed 0.060 ms (6.026 ms / 100) 6.544 -> 6.546 ( +0.03%) [ +0.89% +0.00% +1.04% / +0.66% +0.23% +0.03%] index_add_ perm_sorted : Elapsed 0.066 ms (6.602 ms / 100) 5.964 -> 5.941 ( -0.39%) [ +0.40% +0.00% +0.79% / -0.39% +0.02% +0.27%] index_copy_ perm_sorted : Elapsed 0.060 ms (5.988 ms / 100) 6.402 -> 6.403 ( +0.02%) [ +0.09% +0.00% +0.06% / +0.02% +0.39% +0.34%] index_select const : Elapsed 0.064 ms (6.408 ms / 100) 6.791 -> 6.745 ( -0.68%) [ +0.12% +0.12% +0.00% / -0.01% -0.68% -0.52%] index_select wrap : Elapsed 0.068 ms (6.799 ms / 100) 6.686 -> 6.682 ( -0.06%) [ +0.25% +0.00% +0.12% / -0.03% -0.06% +0.04%] index_select linear : Elapsed 0.067 ms (6.703 ms / 100) 6.523 -> 6.544 ( +0.32%) [ +0.77% +0.00% +0.18% / +0.32% +1.26% +1.36%] index_select reverse : Elapsed 0.066 ms (6.573 ms / 100) 6.389 -> 6.387 ( -0.03%) [ +0.00% +0.06% +0.16% / -0.03% +0.19% +0.16%] index_select skip64 : Elapsed 0.064 ms (6.389 ms / 100) 6.392 -> 6.374 ( -0.28%) [ +0.14% +0.08% +0.00% / +0.03% +0.03% -0.28%] index_select skip256 : Elapsed 0.064 ms (6.401 ms / 100) 6.581 -> 6.581 ( +0.00%) [ +0.61% +0.00% +0.20% / +0.38% +0.00% +0.20%] index_select spread : Elapsed 0.066 ms (6.621 ms / 100) 6.927 -> 6.946 ( +0.27%) [ +0.36% +0.33% +0.00% / +0.27% +0.48% +0.40%] index_select strided 3 : Elapsed 0.070 ms (6.952 ms / 100) 6.775 -> 6.733 ( -0.62%) [ +0.03% +0.00% +0.09% / +0.31% -0.38% -0.62%] index_select strided 5 : Elapsed 0.068 ms (6.777 ms / 100) 7.010 -> 7.018 ( +0.11%) [ +0.14% +0.00% +0.04% / +0.11% +0.44% +0.34%] index_select strided 7 : Elapsed 0.070 ms (7.020 ms / 100) 6.955 -> 6.963 ( +0.12%) [ +0.24% +0.16% +0.00% / +0.12% +0.40% +0.27%] index_select strided 8 : Elapsed 0.070 ms (6.972 ms / 100) 6.980 -> 6.958 ( -0.32%) [ +0.07% +0.00% +0.16% / -0.24% -0.32% +0.14%] index_select strided 16 : Elapsed 0.070 ms (6.985 ms / 100) 6.943 -> 6.930 ( -0.19%) [ +0.00% +0.16% +0.06% / -0.19% +0.35% -0.16%] index_select strided 64 : Elapsed 0.069 ms (6.943 ms / 100) 6.402 -> 6.400 ( -0.03%) [ +0.05% +0.06% +0.00% / -0.03% +0.11% +0.11%] index_select strided 100 : Elapsed 0.064 ms (6.405 ms / 100) 6.863 -> 6.864 ( +0.01%) [ +0.12% +0.13% +0.00% / +0.01% +0.83% +0.98%] index_select random : Elapsed 0.069 ms (6.871 ms / 100) 6.571 -> 6.553 ( -0.27%) [ +0.29% +0.00% +0.24% / +0.49% +0.02% -0.27%] index_select random_sorted : Elapsed 0.066 ms (6.590 ms / 100) B = [250, 50, 15] (stride (50, 1, 12500)) A = [150, 50, 15] (stride (50, 1, 7500)) dim = 0 6.641 -> 6.583 ( -0.87%) [ +0.00% +0.09% +0.14% / -0.20% -0.87% -0.72%] index_add_ linear : Elapsed 0.066 ms (6.641 ms / 100) 5.978 -> 5.944 ( -0.57%) [ +0.00% +0.49% +0.13% / -0.57% -0.33% -0.27%] index_copy_ linear : Elapsed 0.060 ms (5.978 ms / 100) 6.624 -> 6.606 ( -0.27%) [ +0.03% +0.23% +0.00% / -0.27% -0.14% -0.23%] index_add_ reverse : Elapsed 0.066 ms (6.626 ms / 100) 6.005 -> 5.939 ( -1.10%) [ +0.00% +0.33% +0.03% / -0.68% -1.10% -1.02%] index_copy_ reverse : Elapsed 0.060 ms (6.005 ms / 100) 6.670 -> 6.663 ( -0.10%) [ +0.06% +0.00% +0.18% / -0.10% +0.34% -0.10%] index_add_ spread : Elapsed 0.067 ms (6.674 ms / 100) 6.088 -> 6.076 ( -0.20%) [ +0.28% +0.00% +0.03% / -0.10% -0.20% +0.05%] index_copy_ spread : Elapsed 0.061 ms (6.105 ms / 100) 6.782 -> 6.716 ( -0.97%) [ +0.16% +0.07% +0.00% / -0.24% -0.87% -0.97%] index_add_ strided 3 : Elapsed 0.068 ms (6.793 ms / 100) 6.161 -> 6.104 ( -0.93%) [ +0.19% +0.28% +0.00% / -0.52% -0.93% -0.68%] index_copy_ strided 3 : Elapsed 0.062 ms (6.173 ms / 100) 6.761 -> 6.725 ( -0.53%) [ +0.16% +0.33% +0.00% / -0.53% +0.25% +0.04%] index_add_ strided 7 : Elapsed 0.068 ms (6.772 ms / 100) 6.179 -> 6.114 ( -1.05%) [ +0.06% +0.28% +0.00% / -0.61% -1.05% -0.79%] index_copy_ strided 7 : Elapsed 0.062 ms (6.183 ms / 100) 6.711 -> 6.609 ( -1.52%) [ +0.00% +0.33% +0.09% / -0.13% -1.52% -1.43%] index_add_ perm : Elapsed 0.067 ms (6.711 ms / 100) 6.080 -> 6.002 ( -1.28%) [ +0.18% +0.67% +0.00% / -0.69% -1.28% -1.12%] index_copy_ perm : Elapsed 0.061 ms (6.091 ms / 100) 6.621 -> 6.570 ( -0.77%) [ +0.11% +0.05% +0.00% / +0.11% -0.77% -0.76%] index_add_ perm_sorted : Elapsed 0.066 ms (6.628 ms / 100) 6.015 -> 5.982 ( -0.55%) [ +0.03% +0.71% +0.00% / -0.55% -0.15% -0.17%] index_copy_ perm_sorted : Elapsed 0.060 ms (6.017 ms / 100) 6.951 -> 6.949 ( -0.03%) [ +0.01% +0.04% +0.00% / -0.03% +0.65% +0.47%] index_select const : Elapsed 0.070 ms (6.952 ms / 100) 6.908 -> 6.860 ( -0.69%) [ +0.00% +0.09% +0.06% / -0.06% -0.69% -0.58%] index_select wrap : Elapsed 0.069 ms (6.908 ms / 100) 6.922 -> 6.915 ( -0.10%) [ +0.06% +0.01% +0.00% / +0.13% -0.09% -0.10%] index_select linear : Elapsed 0.069 ms (6.926 ms / 100) 6.691 -> 6.673 ( -0.27%) [ +0.18% +0.00% +0.27% / +0.18% -0.27% -0.06%] index_select reverse : Elapsed 0.067 ms (6.703 ms / 100) 6.627 -> 6.631 ( +0.06%) [ +0.17% +0.00% +0.09% / +0.17% +0.06% +0.36%] index_select skip64 : Elapsed 0.066 ms (6.638 ms / 100) 6.881 -> 6.889 ( +0.12%) [ +0.00% +0.09% +0.20% / +0.12% +0.36% +0.38%] index_select skip256 : Elapsed 0.069 ms (6.881 ms / 100) 6.769 -> 6.755 ( -0.21%) [ +0.27% +0.00% +0.01% / +0.03% -0.09% -0.21%] index_select spread : Elapsed 0.068 ms (6.787 ms / 100) 6.834 -> 6.848 ( +0.20%) [ +0.15% +0.22% +0.00% / +0.20% +0.23% +0.44%] index_select strided 3 : Elapsed 0.068 ms (6.844 ms / 100) 6.698 -> 6.706 ( +0.12%) [ +0.00% +0.19% +0.13% / +0.12% +0.39% +0.31%] index_select strided 5 : Elapsed 0.067 ms (6.698 ms / 100) 7.043 -> 7.055 ( +0.17%) [ +0.00% +0.03% +0.11% / +0.17% +0.24% +0.30%] index_select strided 7 : Elapsed 0.070 ms (7.043 ms / 100) 6.985 -> 6.988 ( +0.04%) [ +0.26% +0.00% +0.14% / +0.04% +0.42% +0.50%] index_select strided 8 : Elapsed 0.070 ms (7.003 ms / 100) 6.963 -> 6.948 ( -0.22%) [ +0.07% +0.00% +0.07% / -0.22% +0.07% +0.23%] index_select strided 16 : Elapsed 0.070 ms (6.968 ms / 100) 6.934 -> 6.932 ( -0.03%) [ +0.26% +0.01% +0.00% / -0.03% +0.59% +0.69%] index_select strided 64 : Elapsed 0.070 ms (6.952 ms / 100) 6.647 -> 6.640 ( -0.11%) [ +0.09% +0.08% +0.00% / -0.05% -0.11% -0.02%] index_select strided 100 : Elapsed 0.067 ms (6.653 ms / 100) 6.916 -> 6.891 ( -0.36%) [ +0.06% +0.13% +0.00% / +0.14% -0.32% -0.36%] index_select random : Elapsed 0.069 ms (6.920 ms / 100) 6.712 -> 6.701 ( -0.16%) [ +0.00% +0.01% +0.01% / +0.03% -0.16% -0.12%] index_select random_sorted : Elapsed 0.067 ms (6.712 ms / 100) out_shape = [150, 250, 15] in_shape = [150, 50, 15] idx_dim = 1 B = [150, 250, 15] (stride (3750, 15, 1)) A = [150, 50, 15] (stride (50, 1, 7500)) dim = 1 6.666 -> 6.582 ( -1.26%) [ +0.00% +0.51% +1.02% / +0.11% -1.22% -1.26%] index_add_ linear : Elapsed 0.067 ms (6.666 ms / 100) 6.324 -> 6.248 ( -1.20%) [ +0.00% +0.30% +0.46% / -0.57% -1.00% -1.20%] index_copy_ linear : Elapsed 0.063 ms (6.324 ms / 100) 6.683 -> 6.562 ( -1.81%) [ +0.31% +0.00% +0.33% / -0.58% -1.77% -1.81%] index_add_ reverse : Elapsed 0.067 ms (6.704 ms / 100) 6.326 -> 6.240 ( -1.36%) [ +0.30% +0.00% +0.52% / -0.43% -1.14% -1.36%] index_copy_ reverse : Elapsed 0.063 ms (6.345 ms / 100) 7.394 -> 7.308 ( -1.16%) [ +0.05% +0.00% +0.38% / -0.55% -0.96% -1.16%] index_add_ spread : Elapsed 0.074 ms (7.398 ms / 100) 7.091 -> 6.995 ( -1.35%) [ +0.18% +0.00% +0.30% / -0.63% -1.35% -1.23%] index_copy_ spread : Elapsed 0.071 ms (7.104 ms / 100) 7.240 -> 7.188 ( -0.72%) [ +0.48% +0.36% +0.00% / +0.11% -0.47% -0.72%] index_add_ strided 3 : Elapsed 0.073 ms (7.275 ms / 100) 7.013 -> 6.939 ( -1.06%) [ +0.23% +0.00% +0.11% / -0.56% -1.01% -1.06%] index_copy_ strided 3 : Elapsed 0.070 ms (7.029 ms / 100) 7.432 -> 7.356 ( -1.02%) [ +0.30% +0.00% +0.54% / -0.38% -1.02% -0.96%] index_add_ strided 7 : Elapsed 0.075 ms (7.454 ms / 100) 7.110 -> 7.033 ( -1.08%) [ +0.25% +0.00% +0.39% / -0.90% -1.04% -1.08%] index_copy_ strided 7 : Elapsed 0.071 ms (7.128 ms / 100) 7.465 -> 7.408 ( -0.76%) [ +0.00% +0.01% +0.16% / -0.76% -0.15% -0.03%] index_add_ perm : Elapsed 0.075 ms (7.465 ms / 100) 7.065 -> 7.014 ( -0.72%) [ +0.00% +0.14% +0.17% / -0.72% -0.17% -0.08%] index_copy_ perm : Elapsed 0.071 ms (7.065 ms / 100) 7.236 -> 7.165 ( -0.98%) [ +0.00% +0.22% +0.01% / -0.55% -0.98% -0.80%] index_add_ perm_sorted : Elapsed 0.072 ms (7.236 ms / 100) 6.933 -> 6.885 ( -0.69%) [ +0.00% +0.25% +0.25% / -0.50% -0.61% -0.69%] index_copy_ perm_sorted : Elapsed 0.069 ms (6.933 ms / 100) 11.268 -> 11.199 ( -0.61%) [ +0.28% +0.00% +0.15% / +0.27% -0.61% -0.48%] index_select const : Elapsed 0.113 ms (11.299 ms / 100) 12.383 -> 12.421 ( +0.31%) [ +0.40% +0.00% +0.29% / +0.31% +1.55% +1.46%] index_select wrap : Elapsed 0.124 ms (12.433 ms / 100) 11.248 -> 11.279 ( +0.28%) [ +0.00% +0.13% +0.10% / +0.28% +0.42% +0.32%] index_select linear : Elapsed 0.112 ms (11.248 ms / 100) 11.751 -> 11.770 ( +0.16%) [ +0.67% +0.00% +0.82% / +0.16% +0.61% +1.01%] index_select reverse : Elapsed 0.118 ms (11.830 ms / 100) 11.252 -> 11.197 ( -0.49%) [ +0.00% +0.02% +0.12% / +0.07% -0.34% -0.49%] index_select skip64 : Elapsed 0.113 ms (11.252 ms / 100) 11.271 -> 11.207 ( -0.57%) [ +0.17% +0.00% +0.19% / -0.53% -0.57% -0.35%] index_select skip256 : Elapsed 0.113 ms (11.290 ms / 100) 11.810 -> 11.783 ( -0.23%) [ +0.32% +0.00% +0.02% / -0.23% +0.35% +0.33%] index_select spread : Elapsed 0.118 ms (11.848 ms / 100) 12.592 -> 12.597 ( +0.04%) [ +0.07% +0.22% +0.00% / +0.04% +0.44% +0.64%] index_select strided 3 : Elapsed 0.126 ms (12.601 ms / 100) 12.651 -> 12.714 ( +0.50%) [ +0.55% +0.00% +0.47% / +0.50% +1.22% +0.79%] index_select strided 5 : Elapsed 0.127 ms (12.721 ms / 100) 12.633 -> 12.655 ( +0.17%) [ +0.00% +0.41% +0.31% / +0.17% +0.91% +1.12%] index_select strided 7 : Elapsed 0.126 ms (12.633 ms / 100) 12.620 -> 12.633 ( +0.10%) [ +0.30% +0.00% +0.33% / +0.10% +0.98% +1.38%] index_select strided 8 : Elapsed 0.127 ms (12.658 ms / 100) 12.586 -> 12.653 ( +0.53%) [ +0.00% +0.16% +0.55% / +0.53% +1.13% +1.18%] index_select strided 16 : Elapsed 0.126 ms (12.586 ms / 100) 12.638 -> 12.616 ( -0.17%) [ +0.12% +0.00% +0.10% / -0.17% +0.65% +0.75%] index_select random : Elapsed 0.127 ms (12.653 ms / 100) 11.772 -> 11.778 ( +0.05%) [ +0.03% +0.13% +0.00% / +0.05% +0.65% +0.88%] index_select random_sorted : Elapsed 0.118 ms (11.776 ms / 100) B = [150, 250, 15] (stride (15, 2250, 1)) A = [150, 50, 15] (stride (1, 2250, 150)) dim = 1 5.982 -> 5.957 ( -0.42%) [ +0.10% +0.15% +0.00% / -0.28% -0.42% -0.38%] index_add_ linear : Elapsed 0.060 ms (5.988 ms / 100) 5.812 -> 5.791 ( -0.36%) [ +0.38% +0.15% +0.00% / -0.36% -0.34% -0.15%] index_copy_ linear : Elapsed 0.058 ms (5.834 ms / 100) 6.009 -> 5.982 ( -0.45%) [ +0.33% +0.00% +0.38% / -0.45% -0.30% -0.23%] index_add_ reverse : Elapsed 0.060 ms (6.029 ms / 100) 5.838 -> 5.794 ( -0.75%) [ +0.00% +0.05% +0.51% / -0.41% -0.75% -0.57%] index_copy_ reverse : Elapsed 0.058 ms (5.838 ms / 100) 6.048 -> 5.986 ( -1.03%) [ +0.00% +0.05% +0.20% / -0.43% -1.03% -0.68%] index_add_ spread : Elapsed 0.060 ms (6.048 ms / 100) 5.872 -> 5.810 ( -1.06%) [ +0.00% +0.22% +0.22% / -0.54% -0.95% -1.06%] index_copy_ spread : Elapsed 0.059 ms (5.872 ms / 100) 6.031 -> 5.969 ( -1.03%) [ +0.00% +0.12% +0.58% / -0.53% -1.03% -0.75%] index_add_ strided 3 : Elapsed 0.060 ms (6.031 ms / 100) 5.845 -> 5.802 ( -0.74%) [ +0.12% +0.00% +0.31% / -0.17% -0.74% -0.65%] index_copy_ strided 3 : Elapsed 0.059 ms (5.852 ms / 100) 6.042 -> 5.980 ( -1.03%) [ +0.00% +0.28% +0.15% / -0.53% -1.03% -0.89%] index_add_ strided 7 : Elapsed 0.060 ms (6.042 ms / 100) 5.847 -> 5.790 ( -0.97%) [ +0.00% +0.62% +0.46% / -0.38% -0.86% -0.97%] index_copy_ strided 7 : Elapsed 0.058 ms (5.847 ms / 100) 6.004 -> 5.967 ( -0.62%) [ +0.00% +0.03% +0.10% / -0.62% +0.03% -0.20%] index_add_ perm : Elapsed 0.060 ms (6.004 ms / 100) 5.813 -> 5.791 ( -0.38%) [ +0.33% +0.29% +0.00% / -0.38% -0.33% +0.03%] index_copy_ perm : Elapsed 0.058 ms (5.832 ms / 100) 6.028 -> 5.973 ( -0.91%) [ +0.00% +0.55% +0.56% / -0.27% -0.91% -0.83%] index_add_ perm_sorted : Elapsed 0.060 ms (6.028 ms / 100) 5.836 -> 5.791 ( -0.77%) [ +0.09% +0.36% +0.00% / -0.34% -0.55% -0.77%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.841 ms / 100) 8.514 -> 8.527 ( +0.15%) [ +0.13% +0.00% +0.01% / +0.15% +0.73% +0.79%] index_select const : Elapsed 0.085 ms (8.525 ms / 100) 10.191 -> 10.224 ( +0.32%) [ +0.35% +0.00% +0.37% / +0.32% +1.90% +1.87%] index_select wrap : Elapsed 0.102 ms (10.227 ms / 100) 9.088 -> 9.076 ( -0.13%) [ +0.23% +0.26% +0.00% / -0.13% +2.30% +2.32%] index_select linear : Elapsed 0.091 ms (9.109 ms / 100) 9.105 -> 9.114 ( +0.10%) [ +0.24% +0.01% +0.00% / +0.10% +2.39% +2.54%] index_select reverse : Elapsed 0.091 ms (9.127 ms / 100) 8.636 -> 8.636 ( +0.00%) [ +0.00% +0.00% +0.09% / +0.00% +1.39% +1.48%] index_select skip64 : Elapsed 0.086 ms (8.636 ms / 100) 8.544 -> 8.567 ( +0.27%) [ +0.14% +0.00% +0.14% / +0.27% +0.36% +0.33%] index_select skip256 : Elapsed 0.086 ms (8.556 ms / 100) 8.812 -> 8.802 ( -0.11%) [ +0.12% +0.00% +0.03% / +0.09% -0.11% +0.26%] index_select spread : Elapsed 0.088 ms (8.823 ms / 100) 10.242 -> 10.241 ( -0.01%) [ +0.17% +0.00% +0.26% / -0.01% +1.41% +1.46%] index_select strided 3 : Elapsed 0.103 ms (10.259 ms / 100) 8.733 -> 8.746 ( +0.15%) [ +0.37% +0.00% +0.11% / +0.15% +0.66% +0.81%] index_select strided 5 : Elapsed 0.088 ms (8.765 ms / 100) 10.078 -> 10.110 ( +0.32%) [ +0.04% +0.00% +0.59% / +0.32% +3.03% +3.24%] index_select strided 7 : Elapsed 0.101 ms (10.082 ms / 100) 9.393 -> 9.402 ( +0.10%) [ +0.17% +0.00% +0.00% / +0.10% +0.87% +0.98%] index_select strided 8 : Elapsed 0.094 ms (9.409 ms / 100) 9.351 -> 9.375 ( +0.26%) [ +0.15% +0.07% +0.00% / +0.26% +1.20% +1.41%] index_select strided 16 : Elapsed 0.094 ms (9.365 ms / 100) 9.692 -> 9.726 ( +0.35%) [ +0.30% +0.00% +0.45% / +0.35% +1.91% +1.88%] index_select random : Elapsed 0.097 ms (9.721 ms / 100) 8.854 -> 8.839 ( -0.17%) [ +0.16% +0.11% +0.00% / +0.28% -0.17% -0.12%] index_select random_sorted : Elapsed 0.089 ms (8.868 ms / 100) B = [150, 250, 15] (stride (1, 150, 37500)) dim = 1 fill_cnt = 50 4.845 -> 4.827 ( -0.37%) [ +0.06% +0.00% +0.00% / -0.37% +6.56% +6.42%] index_fill_ const : Elapsed 0.048 ms (4.848 ms / 100) 3.210 -> 3.209 ( -0.03%) [ +0.00% +0.03% +0.09% / -0.03% +1.00% +0.62%] index_fill_ linear : Elapsed 0.032 ms (3.210 ms / 100) 3.230 -> 3.223 ( -0.22%) [ +0.19% +0.00% +0.19% / -0.22% -0.12% +0.03%] index_fill_ reverse : Elapsed 0.032 ms (3.236 ms / 100) 4.849 -> 4.825 ( -0.49%) [ +0.00% +0.06% +0.00% / -0.49% +7.59% +7.67%] index_fill_ skip64 : Elapsed 0.048 ms (4.849 ms / 100) 4.843 -> 4.831 ( -0.25%) [ +0.02% +0.08% +0.00% / -0.25% +7.95% +7.70%] index_fill_ skip256 : Elapsed 0.048 ms (4.844 ms / 100) 3.169 -> 3.160 ( -0.28%) [ +0.25% +0.00% +0.09% / -0.28% +0.19% +0.16%] index_fill_ spread : Elapsed 0.032 ms (3.177 ms / 100) 3.170 -> 3.154 ( -0.50%) [ +0.13% +0.09% +0.00% / -0.50% -0.09% +0.50%] index_fill_ strided 3 : Elapsed 0.032 ms (3.174 ms / 100) 3.181 -> 3.175 ( -0.19%) [ +0.06% +0.00% +0.16% / -0.13% -0.19% -0.19%] index_fill_ strided 5 : Elapsed 0.032 ms (3.183 ms / 100) 3.209 -> 3.199 ( -0.31%) [ +0.03% +0.00% +0.19% / -0.16% -0.09% -0.31%] index_fill_ strided 7 : Elapsed 0.032 ms (3.210 ms / 100) 3.185 -> 3.161 ( -0.75%) [ +0.13% +0.31% +0.00% / -0.50% -0.75% -0.66%] index_fill_ strided 8 : Elapsed 0.032 ms (3.189 ms / 100) 3.150 -> 3.152 ( +0.06%) [ +0.10% +0.10% +0.00% / +0.06% +0.13% +0.32%] index_fill_ strided 16 : Elapsed 0.032 ms (3.153 ms / 100) 3.155 -> 3.151 ( -0.13%) [ +0.00% +0.32% +0.16% / -0.13% +0.32% +0.54%] index_fill_ strided 64 : Elapsed 0.032 ms (3.155 ms / 100) 3.394 -> 3.391 ( -0.09%) [ +0.27% +0.32% +0.00% / -0.09% +5.98% +5.89%] index_fill_ strided 100 : Elapsed 0.034 ms (3.403 ms / 100) 3.232 -> 3.208 ( -0.74%) [ +0.15% +0.00% +0.00% / -0.74% -0.12% +0.00%] index_fill_ random : Elapsed 0.032 ms (3.237 ms / 100) 3.217 -> 3.186 ( -0.96%) [ +0.12% +0.00% +0.03% / -0.50% -0.96% -0.78%] index_fill_ random_sorted : Elapsed 0.032 ms (3.221 ms / 100) 3.321 -> 3.266 ( -1.66%) [ +0.12% +0.00% +0.27% / -0.60% -1.66% -1.26%] index_fill_ perm : Elapsed 0.033 ms (3.325 ms / 100) 3.202 -> 3.212 ( +0.31%) [ +0.41% +0.00% +0.53% / +0.47% +0.50% +0.31%] index_fill_ perm_sorted : Elapsed 0.032 ms (3.215 ms / 100) out_shape = [150, 50, 250] in_shape = [150, 50, 15] idx_dim = 2 B = [150, 50, 250] (stride (12500, 250, 1)) dim = 2 fill_cnt = 15 2.358 -> 2.282 ( -3.22%) [ +0.17% +0.00% +0.04% / +0.34% -3.22% -2.54%] index_fill_ const : Elapsed 0.024 ms (2.362 ms / 100) 2.530 -> 2.532 ( +0.08%) [ +0.28% +0.08% +0.00% / +0.08% +2.29% +2.45%] index_fill_ linear : Elapsed 0.025 ms (2.537 ms / 100) 2.504 -> 2.517 ( +0.52%) [ +0.32% +0.20% +0.00% / +0.52% +6.11% +6.39%] index_fill_ reverse : Elapsed 0.025 ms (2.512 ms / 100) 2.354 -> 2.303 ( -2.17%) [ +0.42% +0.13% +0.00% / +0.21% -1.83% -2.17%] index_fill_ skip64 : Elapsed 0.024 ms (2.364 ms / 100) 2.360 -> 2.301 ( -2.50%) [ +0.17% +0.08% +0.00% / +0.47% -2.42% -2.50%] index_fill_ skip256 : Elapsed 0.024 ms (2.364 ms / 100) 6.793 -> 6.801 ( +0.12%) [ +0.16% +0.00% +0.22% / +0.12% +0.28% +0.29%] index_fill_ spread : Elapsed 0.068 ms (6.804 ms / 100) 3.430 -> 3.412 ( -0.52%) [ +0.00% +0.03% +0.06% / +0.00% -0.52% -0.52%] index_fill_ strided 3 : Elapsed 0.034 ms (3.430 ms / 100) 4.462 -> 4.457 ( -0.11%) [ +0.07% +0.13% +0.00% / -0.11% +1.23% +1.14%] index_fill_ strided 5 : Elapsed 0.045 ms (4.465 ms / 100) 5.747 -> 5.732 ( -0.26%) [ +0.00% +0.00% +0.09% / +0.00% -0.09% -0.26%] index_fill_ strided 7 : Elapsed 0.057 ms (5.747 ms / 100) 6.394 -> 6.376 ( -0.28%) [ +0.05% +0.00% +0.06% / +0.13% +0.08% -0.28%] index_fill_ strided 8 : Elapsed 0.064 ms (6.397 ms / 100) 6.732 -> 6.745 ( +0.19%) [ +0.07% +0.00% +0.06% / +0.19% +0.30% +0.34%] index_fill_ strided 16 : Elapsed 0.067 ms (6.737 ms / 100) 8.203 -> 8.207 ( +0.05%) [ +0.00% +0.06% +0.11% / +0.06% +0.20% +0.05%] index_fill_ strided 64 : Elapsed 0.082 ms (8.203 ms / 100) 8.224 -> 8.221 ( -0.04%) [ +0.02% +0.00% +0.06% / -0.04% +0.11% +0.05%] index_fill_ strided 100 : Elapsed 0.082 ms (8.226 ms / 100) 7.734 -> 7.685 ( -0.63%) [ +0.00% +0.03% +0.04% / +0.01% -0.63% -0.63%] index_fill_ random : Elapsed 0.077 ms (7.734 ms / 100) 6.236 -> 6.240 ( +0.06%) [ +0.11% +0.11% +0.00% / +0.06% +0.58% +0.55%] index_fill_ random_sorted : Elapsed 0.062 ms (6.243 ms / 100) 7.245 -> 7.210 ( -0.48%) [ +0.03% +0.00% +0.10% / +0.08% -0.48% -0.35%] index_fill_ perm : Elapsed 0.072 ms (7.247 ms / 100) 5.769 -> 5.687 ( -1.42%) [ +0.16% +0.19% +0.00% / +0.09% -1.33% -1.42%] index_fill_ perm_sorted : Elapsed 0.058 ms (5.778 ms / 100) B = [150, 50, 250] (stride (250, 37500, 1)) A = [150, 50, 15] (stride (1, 2250, 150)) dim = 2 6.901 -> 6.914 ( +0.19%) [ +0.39% +0.00% +0.41% / +0.19% +5.97% +5.75%] index_add_ linear : Elapsed 0.069 ms (6.928 ms / 100) 4.928 -> 4.939 ( +0.22%) [ +0.26% +0.00% +0.51% / +0.22% +4.65% +4.55%] index_copy_ linear : Elapsed 0.049 ms (4.941 ms / 100) 7.062 -> 7.087 ( +0.35%) [ +0.28% +0.00% +0.40% / +0.35% +5.40% +5.44%] index_add_ reverse : Elapsed 0.071 ms (7.082 ms / 100) 4.971 -> 4.987 ( +0.32%) [ +0.00% +0.04% +0.28% / +0.32% +5.15% +4.71%] index_copy_ reverse : Elapsed 0.050 ms (4.971 ms / 100) 17.723 -> 17.639 ( -0.47%) [ +0.00% +0.04% +0.12% / +0.07% -0.47% -0.39%] index_add_ spread : Elapsed 0.177 ms (17.723 ms / 100) 10.428 -> 10.395 ( -0.32%) [ +0.00% +0.07% +0.38% / +0.36% -0.32% -0.23%] index_copy_ spread : Elapsed 0.104 ms (10.428 ms / 100) 9.331 -> 9.291 ( -0.43%) [ +0.02% +0.02% +0.00% / -0.01% -0.43% -0.27%] index_add_ strided 3 : Elapsed 0.093 ms (9.333 ms / 100) 6.037 -> 6.020 ( -0.28%) [ +0.22% +0.00% +0.08% / +0.17% -0.22% -0.28%] index_copy_ strided 3 : Elapsed 0.060 ms (6.050 ms / 100) 14.724 -> 14.604 ( -0.81%) [ +0.03% +0.04% +0.00% / +0.16% -0.70% -0.81%] index_add_ strided 7 : Elapsed 0.147 ms (14.729 ms / 100) 8.829 -> 8.759 ( -0.79%) [ +0.16% +0.00% +0.34% / +0.20% -0.71% -0.79%] index_copy_ strided 7 : Elapsed 0.088 ms (8.843 ms / 100) 18.220 -> 18.166 ( -0.30%) [ +0.00% +0.01% +0.23% / +0.21% -0.30% -0.21%] index_add_ perm : Elapsed 0.182 ms (18.220 ms / 100) 11.117 -> 11.161 ( +0.40%) [ +0.00% +0.04% +0.76% / +0.40% +0.44% +0.60%] index_copy_ perm : Elapsed 0.111 ms (11.117 ms / 100) 13.935 -> 13.963 ( +0.20%) [ +0.04% +0.00% +0.11% / +0.20% +1.40% +1.26%] index_add_ perm_sorted : Elapsed 0.139 ms (13.941 ms / 100) 8.482 -> 8.519 ( +0.44%) [ +0.00% +0.26% +0.48% / +0.44% +1.91% +1.89%] index_copy_ perm_sorted : Elapsed 0.085 ms (8.482 ms / 100) BEST 78.981 -> 18.889 (-76.08%) [ +0.00% +0.29% +0.12% / -75.32% -76.08% -76.02%] index_select const : Elapsed 0.790 ms (78.981 ms / 100) BEST 102.134 -> 21.945 (-78.51%) [ +0.00% +0.23% +0.04% / -78.39% -78.51% -78.47%] index_select wrap : Elapsed 1.021 ms (102.134 ms / 100) BEST 78.964 -> 21.998 (-72.14%) [ +0.14% +0.00% +0.48% / -71.79% -72.14% -72.05%] index_select linear : Elapsed 0.791 ms (79.078 ms / 100) BEST 81.991 -> 22.186 (-72.94%) [ +0.89% +0.49% +0.00% / -72.90% -72.94% -72.89%] index_select reverse : Elapsed 0.827 ms (82.717 ms / 100) BEST 79.568 -> 18.947 (-76.19%) [ +0.00% +0.01% +0.05% / -75.74% -76.19% -76.17%] index_select skip64 : Elapsed 0.796 ms (79.568 ms / 100) BEST 79.367 -> 19.228 (-75.77%) [ +0.00% +0.22% +0.21% / -75.68% -75.77% -75.77%] index_select skip256 : Elapsed 0.794 ms (79.367 ms / 100) BEST 81.248 -> 22.116 (-72.78%) [ +0.08% +0.09% +0.00% / -72.35% -72.76% -72.78%] index_select spread : Elapsed 0.813 ms (81.311 ms / 100) BEST 86.815 -> 20.259 (-76.66%) [ +0.00% +0.37% +0.24% / -76.32% -76.60% -76.66%] index_select strided 3 : Elapsed 0.868 ms (86.815 ms / 100) BEST 83.239 -> 19.468 (-76.61%) [ +0.19% +0.00% +0.08% / -76.17% -76.48% -76.61%] index_select strided 5 : Elapsed 0.834 ms (83.394 ms / 100) BEST 100.759 -> 21.977 (-78.19%) [ +0.41% +0.06% +0.00% / -78.12% -78.15% -78.19%] index_select strided 7 : Elapsed 1.012 ms (101.173 ms / 100) BEST 100.686 -> 21.935 (-78.21%) [ +0.00% +0.27% +0.25% / -78.21% -78.15% -78.15%] index_select strided 8 : Elapsed 1.007 ms (100.686 ms / 100) BEST 94.754 -> 21.996 (-76.79%) [ +0.31% +0.41% +0.00% / -76.73% -76.76% -76.79%] index_select random : Elapsed 0.950 ms (95.043 ms / 100) BEST 80.969 -> 21.936 (-72.91%) [ +0.09% +0.77% +0.00% / -72.65% -72.91% -72.84%] index_select random_sorted : Elapsed 0.810 ms (81.041 ms / 100) B = [150, 50, 250] (stride (1, 37500, 150)) A = [150, 50, 15] (stride (15, 2250, 1)) dim = 2 4.137 -> 4.149 ( +0.29%) [ +0.22% +0.27% +0.00% / +0.29% +0.63% +0.77%] index_add_ linear : Elapsed 0.041 ms (4.146 ms / 100) 3.699 -> 3.718 ( +0.51%) [ +0.32% +0.22% +0.00% / +0.51% +2.14% +2.27%] index_copy_ linear : Elapsed 0.037 ms (3.711 ms / 100) 4.117 -> 4.125 ( +0.19%) [ +0.12% +0.00% +0.19% / +0.19% +0.58% +0.75%] index_add_ reverse : Elapsed 0.041 ms (4.122 ms / 100) 3.716 -> 3.727 ( +0.30%) [ +0.24% +0.00% +0.08% / +0.30% +0.51% +0.65%] index_copy_ reverse : Elapsed 0.037 ms (3.725 ms / 100) 3.969 -> 3.980 ( +0.28%) [ +0.05% +0.13% +0.00% / +0.28% +1.71% +1.69%] index_add_ spread : Elapsed 0.040 ms (3.971 ms / 100) 3.601 -> 3.603 ( +0.06%) [ +0.08% +0.00% +0.00% / +0.06% +0.39% +0.47%] index_copy_ spread : Elapsed 0.036 ms (3.604 ms / 100) 4.015 -> 4.028 ( +0.32%) [ +0.30% +0.35% +0.00% / +0.32% +2.69% +2.89%] index_add_ strided 3 : Elapsed 0.040 ms (4.027 ms / 100) 3.629 -> 3.634 ( +0.14%) [ +0.19% +0.25% +0.00% / +0.14% +1.57% +1.79%] index_copy_ strided 3 : Elapsed 0.036 ms (3.636 ms / 100) 3.936 -> 3.948 ( +0.30%) [ +0.33% +0.28% +0.00% / +0.30% +1.24% +1.42%] index_add_ strided 7 : Elapsed 0.039 ms (3.949 ms / 100) 3.610 -> 3.623 ( +0.36%) [ +0.42% +0.00% +0.08% / +0.36% +0.97% +1.08%] index_copy_ strided 7 : Elapsed 0.036 ms (3.625 ms / 100) 4.129 -> 4.138 ( +0.22%) [ +0.22% +0.12% +0.00% / +0.22% +0.22% +0.36%] index_add_ perm : Elapsed 0.041 ms (4.138 ms / 100) 3.637 -> 3.650 ( +0.36%) [ +0.27% +0.00% +0.05% / +0.36% +1.15% +1.13%] index_copy_ perm : Elapsed 0.036 ms (3.647 ms / 100) 4.048 -> 4.045 ( -0.07%) [ +0.22% +0.00% +0.00% / -0.07% +0.27% +0.54%] index_add_ perm_sorted : Elapsed 0.041 ms (4.057 ms / 100) 3.613 -> 3.617 ( +0.11%) [ +0.11% +0.00% +0.08% / +0.11% +1.30% +1.13%] index_copy_ perm_sorted : Elapsed 0.036 ms (3.617 ms / 100) 66.079 -> 66.486 ( +0.62%) [ +0.96% +0.00% +0.80% / +0.62% +7.03% +6.89%] index_select const : Elapsed 0.667 ms (66.712 ms / 100) 66.152 -> 66.282 ( +0.20%) [ +0.42% +0.00% +0.01% / +0.20% +6.72% +6.39%] index_select wrap : Elapsed 0.664 ms (66.428 ms / 100) 65.496 -> 65.061 ( -0.66%) [ +0.00% +0.30% +0.00% / -0.66% +6.70% +7.49%] index_select linear : Elapsed 0.655 ms (65.496 ms / 100) 66.668 -> 67.240 ( +0.86%) [ +0.00% +0.35% +0.36% / +0.86% +5.36% +6.00%] index_select reverse : Elapsed 0.667 ms (66.668 ms / 100) 66.101 -> 66.540 ( +0.66%) [ +0.16% +1.20% +0.00% / +0.66% +7.52% +7.02%] index_select skip64 : Elapsed 0.662 ms (66.210 ms / 100) 65.608 -> 65.447 ( -0.25%) [ +0.45% +0.00% +0.83% / -0.25% +8.90% +7.44%] index_select skip256 : Elapsed 0.659 ms (65.903 ms / 100) 64.917 -> 65.113 ( +0.30%) [ +0.00% +1.34% +0.99% / +0.30% +6.95% +6.80%] index_select spread : Elapsed 0.649 ms (64.917 ms / 100) 66.607 -> 67.040 ( +0.65%) [ +0.15% +0.90% +0.00% / +0.65% +8.02% +8.59%] index_select strided 3 : Elapsed 0.667 ms (66.707 ms / 100) 67.453 -> 66.963 ( -0.73%) [ +0.46% +0.00% +0.26% / -0.73% +7.37% +7.45%] index_select strided 5 : Elapsed 0.678 ms (67.762 ms / 100) 67.541 -> 67.373 ( -0.25%) [ +0.00% +0.08% +0.30% / -0.25% +7.08% +7.54%] index_select strided 7 : Elapsed 0.675 ms (67.541 ms / 100) 67.257 -> 67.595 ( +0.50%) [ +0.42% +0.00% +0.79% / +0.50% +6.59% +7.15%] index_select strided 8 : Elapsed 0.675 ms (67.542 ms / 100) 67.436 -> 67.524 ( +0.13%) [ +0.01% +0.00% +0.27% / +0.13% +7.06% +7.35%] index_select random : Elapsed 0.674 ms (67.441 ms / 100) 65.738 -> 65.477 ( -0.40%) [ +0.09% +0.09% +0.00% / -0.40% +5.77% +5.79%] index_select random_sorted : Elapsed 0.658 ms (65.797 ms / 100) B = [150, 50, 250] (stride (1, 150, 7500)) A = [150, 50, 15] (stride (750, 15, 1)) dim = 2 6.071 -> 6.085 ( +0.23%) [ +0.61% +0.08% +0.00% / +0.40% +0.40% +0.23%] index_add_ linear : Elapsed 0.061 ms (6.108 ms / 100) 5.890 -> 5.905 ( +0.25%) [ +0.34% +0.08% +0.00% / +0.54% +0.25% +0.29%] index_copy_ linear : Elapsed 0.059 ms (5.910 ms / 100) 6.062 -> 6.096 ( +0.56%) [ +0.48% +0.13% +0.00% / +0.56% +0.64% +0.66%] index_add_ reverse : Elapsed 0.061 ms (6.091 ms / 100) 5.880 -> 5.913 ( +0.56%) [ +0.46% +0.00% +0.09% / +0.66% +0.66% +0.56%] index_copy_ reverse : Elapsed 0.059 ms (5.907 ms / 100) 6.061 -> 6.095 ( +0.56%) [ +0.41% +0.00% +0.05% / +0.56% +0.56% +0.71%] index_add_ spread : Elapsed 0.061 ms (6.086 ms / 100) 5.877 -> 5.912 ( +0.60%) [ +0.49% +0.00% +0.02% / +0.60% +0.90% +0.87%] index_copy_ spread : Elapsed 0.059 ms (5.906 ms / 100) 6.077 -> 6.113 ( +0.59%) [ +0.44% +0.13% +0.00% / +0.59% +0.66% +0.64%] index_add_ strided 3 : Elapsed 0.061 ms (6.104 ms / 100) 5.890 -> 5.924 ( +0.58%) [ +0.63% +0.15% +0.00% / +0.68% +0.58% +0.75%] index_copy_ strided 3 : Elapsed 0.059 ms (5.927 ms / 100) 6.079 -> 6.110 ( +0.51%) [ +0.59% +0.15% +0.00% / +0.51% +0.97% +0.82%] index_add_ strided 7 : Elapsed 0.061 ms (6.115 ms / 100) 5.897 -> 5.928 ( +0.53%) [ +0.44% +0.03% +0.00% / +0.53% +0.66% +0.73%] index_copy_ strided 7 : Elapsed 0.059 ms (5.923 ms / 100) 6.076 -> 6.094 ( +0.30%) [ +0.28% +0.13% +0.00% / +0.43% +0.58% +0.30%] index_add_ perm : Elapsed 0.061 ms (6.093 ms / 100) 5.881 -> 5.916 ( +0.60%) [ +0.51% +0.00% +0.15% / +0.60% +0.66% +1.92%] index_copy_ perm : Elapsed 0.059 ms (5.911 ms / 100) 6.068 -> 6.095 ( +0.44%) [ +0.46% +0.00% +0.08% / +0.44% +0.82% +0.81%] index_add_ perm_sorted : Elapsed 0.061 ms (6.096 ms / 100) 5.889 -> 5.927 ( +0.65%) [ +0.70% +0.02% +0.00% / +0.65% +0.66% +0.82%] index_copy_ perm_sorted : Elapsed 0.059 ms (5.930 ms / 100) 98.325 -> 98.531 ( +0.21%) [ +0.09% +0.00% +0.36% / +0.21% +0.91% +0.99%] index_select const : Elapsed 0.984 ms (98.411 ms / 100) 102.062 -> 102.016 ( -0.05%) [ +0.00% +0.24% +0.15% / -0.05% +0.43% +0.43%] index_select wrap : Elapsed 1.021 ms (102.062 ms / 100) 98.101 -> 98.444 ( +0.35%) [ +0.00% +0.45% +0.56% / +0.35% +1.24% +1.31%] index_select linear : Elapsed 0.981 ms (98.101 ms / 100) 98.515 -> 98.572 ( +0.06%) [ +0.46% +0.51% +0.00% / +0.06% +1.05% +0.67%] index_select reverse : Elapsed 0.990 ms (98.966 ms / 100) 97.947 -> 98.076 ( +0.13%) [ +0.00% +0.38% +0.65% / +0.13% +1.64% +1.51%] index_select skip64 : Elapsed 0.979 ms (97.947 ms / 100) 98.522 -> 98.339 ( -0.19%) [ +0.46% +0.00% +0.08% / -0.19% +0.96% +1.26%] index_select skip256 : Elapsed 0.990 ms (98.975 ms / 100) 98.701 -> 98.980 ( +0.28%) [ +0.00% +0.52% +0.53% / +0.28% +1.44% +1.38%] index_select spread : Elapsed 0.987 ms (98.701 ms / 100) 102.369 -> 102.591 ( +0.22%) [ +0.00% +0.62% +0.08% / +0.22% +0.53% +0.70%] index_select strided 3 : Elapsed 1.024 ms (102.369 ms / 100) 102.156 -> 101.913 ( -0.24%) [ +0.00% +0.48% +0.38% / -0.24% +0.97% +1.21%] index_select strided 5 : Elapsed 1.022 ms (102.156 ms / 100) 102.333 -> 101.846 ( -0.48%) [ +0.00% +0.52% +0.08% / -0.48% +0.60% +1.16%] index_select strided 7 : Elapsed 1.023 ms (102.333 ms / 100) 101.703 -> 101.848 ( +0.14%) [ +0.03% +0.94% +0.00% / +0.14% +1.75% +1.62%] index_select strided 8 : Elapsed 1.017 ms (101.731 ms / 100) 101.904 -> 101.902 ( -0.00%) [ +0.25% +0.04% +0.00% / -0.00% +0.74% +1.05%] index_select random : Elapsed 1.022 ms (102.159 ms / 100) 98.177 -> 98.330 ( +0.16%) [ +0.00% +0.40% +0.90% / +0.16% +1.73% +1.65%] index_select random_sorted : Elapsed 0.982 ms (98.177 ms / 100) ==================== rep_count = 100 dimensions = [1, 5, 200, 500] out_shape = [1, 200, 500] in_shape = [5, 200, 500] idx_dim = 0 B = [1, 200, 500] (stride (100000, 500, 1)) A = [5, 200, 500] (stride (1, 2500, 5)) dim = 0 5.323 -> 5.325 ( +0.04%) [ +0.09% +0.00% +0.00% / +0.04% +0.19% +0.24%] index_select const : Elapsed 0.053 ms (5.328 ms / 100) 5.318 -> 5.316 ( -0.04%) [ +0.39% +0.00% +0.21% / -0.04% +0.30% +0.36%] index_select wrap : Elapsed 0.053 ms (5.339 ms / 100) 5.322 -> 5.328 ( +0.11%) [ +0.08% +0.00% +0.00% / +0.11% +0.28% +0.21%] index_select linear : Elapsed 0.053 ms (5.326 ms / 100) 5.309 -> 5.332 ( +0.43%) [ +0.40% +0.17% +0.00% / +0.43% +0.62% +0.43%] index_select reverse : Elapsed 0.053 ms (5.330 ms / 100) 5.318 -> 5.327 ( +0.17%) [ +0.06% +0.00% +0.08% / +0.38% +0.17% +0.26%] index_select skip64 : Elapsed 0.053 ms (5.321 ms / 100) 5.322 -> 5.330 ( +0.15%) [ +0.00% +0.00% +0.00% / +0.24% +0.36% +0.15%] index_select skip256 : Elapsed 0.053 ms (5.322 ms / 100) 5.303 -> 5.318 ( +0.28%) [ +0.40% +0.00% +0.23% / +0.28% +0.55% +0.53%] index_select spread : Elapsed 0.053 ms (5.324 ms / 100) 5.322 -> 5.321 ( -0.02%) [ +0.06% +0.00% +0.09% / -0.02% +0.26% +0.11%] index_select strided 3 : Elapsed 0.053 ms (5.325 ms / 100) 5.315 -> 5.315 ( +0.00%) [ +0.13% +0.02% +0.00% / +0.00% +0.28% +0.26%] index_select random : Elapsed 0.053 ms (5.322 ms / 100) 5.310 -> 5.319 ( +0.17%) [ +0.00% +0.17% +0.02% / +0.17% +0.30% +0.21%] index_select random_sorted : Elapsed 0.053 ms (5.310 ms / 100) 5.316 -> 5.322 ( +0.11%) [ +0.13% +0.15% +0.00% / +0.11% +0.40% +0.17%] index_select perm : Elapsed 0.053 ms (5.323 ms / 100) 5.322 -> 5.324 ( +0.04%) [ +0.13% +0.09% +0.00% / +0.04% +0.30% +0.26%] index_select perm_sorted : Elapsed 0.053 ms (5.329 ms / 100) B = [1, 200, 500] (stride (500, 500, 1)) A = [5, 200, 500] (stride (1, 2500, 5)) dim = 0 5.329 -> 5.321 ( -0.15%) [ +0.21% +0.00% +0.02% / +0.02% -0.15% +0.15%] index_select const : Elapsed 0.053 ms (5.340 ms / 100) 5.321 -> 5.320 ( -0.02%) [ +0.19% +0.26% +0.00% / +0.00% -0.02% +0.19%] index_select wrap : Elapsed 0.053 ms (5.331 ms / 100) 5.322 -> 5.335 ( +0.24%) [ +0.15% +0.02% +0.00% / +0.24% +0.32% +0.24%] index_select linear : Elapsed 0.053 ms (5.330 ms / 100) 5.319 -> 5.325 ( +0.11%) [ +0.15% +0.00% +0.24% / +0.11% +0.19% +0.36%] index_select reverse : Elapsed 0.053 ms (5.327 ms / 100) 5.317 -> 5.334 ( +0.32%) [ +0.23% +0.09% +0.00% / +0.36% +0.38% +0.32%] index_select skip64 : Elapsed 0.053 ms (5.329 ms / 100) 5.317 -> 5.324 ( +0.13%) [ +0.11% +0.00% +0.11% / +0.13% +0.38% +0.24%] index_select skip256 : Elapsed 0.053 ms (5.323 ms / 100) 5.319 -> 5.325 ( +0.11%) [ +0.23% +0.00% +0.02% / +0.11% +0.19% +0.24%] index_select spread : Elapsed 0.053 ms (5.331 ms / 100) 5.319 -> 5.327 ( +0.15%) [ +0.00% +0.15% +0.19% / +0.15% +0.15% +0.23%] index_select strided 3 : Elapsed 0.053 ms (5.319 ms / 100) 5.310 -> 5.314 ( +0.08%) [ +0.00% +0.06% +0.17% / +0.08% +0.28% +0.28%] index_select random : Elapsed 0.053 ms (5.310 ms / 100) 5.314 -> 5.317 ( +0.06%) [ +0.08% +0.00% +0.08% / +0.19% +0.06% +0.30%] index_select random_sorted : Elapsed 0.053 ms (5.318 ms / 100) 5.308 -> 5.318 ( +0.19%) [ +0.15% +0.00% +0.30% / +0.28% +0.19% +0.30%] index_select perm : Elapsed 0.053 ms (5.316 ms / 100) 5.313 -> 5.311 ( -0.04%) [ +0.28% +0.21% +0.00% / +0.38% -0.04% +0.19%] index_select perm_sorted : Elapsed 0.053 ms (5.328 ms / 100) B = [1, 200, 500] (stride (1, 1, 200)) A = [5, 200, 500] (stride (200, 1, 1000)) dim = 0 5.460 -> 5.462 ( +0.04%) [ +0.24% +0.18% +0.00% / +0.04% +0.05% +0.11%] index_select const : Elapsed 0.055 ms (5.473 ms / 100) 5.455 -> 5.454 ( -0.02%) [ +0.16% +0.00% +0.20% / +0.31% +0.00% -0.02%] index_select wrap : Elapsed 0.055 ms (5.464 ms / 100) 5.455 -> 5.459 ( +0.07%) [ +0.42% +0.00% +0.24% / +0.22% +0.07% +0.20%] index_select linear : Elapsed 0.055 ms (5.478 ms / 100) 5.463 -> 5.458 ( -0.09%) [ +0.00% +0.04% +0.00% / -0.09% -0.02% +0.13%] index_select reverse : Elapsed 0.055 ms (5.463 ms / 100) 5.455 -> 5.459 ( +0.07%) [ +0.13% +0.00% +0.04% / +0.15% +0.07% +0.07%] index_select skip64 : Elapsed 0.055 ms (5.462 ms / 100) 5.454 -> 5.454 ( +0.00%) [ +0.00% +0.22% +0.04% / +0.07% +0.29% +0.00%] index_select skip256 : Elapsed 0.055 ms (5.454 ms / 100) 5.450 -> 5.454 ( +0.07%) [ +0.22% +0.20% +0.00% / +0.26% +0.15% +0.07%] index_select spread : Elapsed 0.055 ms (5.462 ms / 100) 5.452 -> 5.457 ( +0.09%) [ +0.00% +0.04% +0.17% / +0.18% +0.09% +0.18%] index_select strided 3 : Elapsed 0.055 ms (5.452 ms / 100) 5.449 -> 5.460 ( +0.20%) [ +0.11% +0.04% +0.00% / +0.20% +0.33% +0.57%] index_select random : Elapsed 0.055 ms (5.455 ms / 100) 5.450 -> 5.458 ( +0.15%) [ +0.04% +0.02% +0.00% / +0.15% +0.40% +0.28%] index_select random_sorted : Elapsed 0.055 ms (5.452 ms / 100) 5.448 -> 5.449 ( +0.02%) [ +0.09% +0.00% +0.00% / +0.20% +0.20% +0.02%] index_select perm : Elapsed 0.055 ms (5.453 ms / 100) 5.454 -> 5.458 ( +0.07%) [ +0.00% +0.07% +0.07% / +0.15% +0.11% +0.07%] index_select perm_sorted : Elapsed 0.055 ms (5.454 ms / 100) out_shape = [5, 1, 500] in_shape = [5, 200, 500] idx_dim = 1 B = [5, 1, 500] (stride (500, 1, 1)) A = [5, 200, 500] (stride (1, 5, 1000)) dim = 1 0.516 -> 0.514 ( -0.39%) [ +2.91% +0.00% +1.94% / +3.88% +0.78% -0.39%] index_select const : Elapsed 0.005 ms (0.531 ms / 100) 0.521 -> 0.516 ( -0.96%) [ +3.84% +0.00% +0.77% / +14.20% +1.92% -0.96%] index_select wrap : Elapsed 0.005 ms (0.541 ms / 100) 0.519 -> 0.513 ( -1.16%) [ +1.54% +0.58% +0.00% / +2.31% -0.39% -1.16%] index_select linear : Elapsed 0.005 ms (0.527 ms / 100) 0.520 -> 0.514 ( -1.15%) [ +2.12% +0.19% +0.00% / +1.73% -1.15% -0.77%] index_select reverse : Elapsed 0.005 ms (0.531 ms / 100) 0.518 -> 0.513 ( -0.97%) [ +5.41% +7.53% +0.00% / +2.32% -0.97% +0.00%] index_select skip64 : Elapsed 0.005 ms (0.546 ms / 100) 0.530 -> 0.511 ( -3.58%) [ +0.00% +3.77% +3.58% / +0.38% -3.58% -2.64%] index_select skip256 : Elapsed 0.005 ms (0.530 ms / 100) 0.516 -> 0.509 ( -1.36%) [ +3.29% +0.00% +10.08% / +2.71% -1.36% +6.98%] index_select spread : Elapsed 0.005 ms (0.533 ms / 100) 0.520 -> 0.518 ( -0.38%) [ +0.77% +6.15% +0.00% / +2.50% +11.35% -0.38%] index_select strided 3 : Elapsed 0.005 ms (0.524 ms / 100) 0.523 -> 0.508 ( -2.87%) [ +1.34% +4.97% +0.00% / +7.84% -2.87% +0.96%] index_select strided 5 : Elapsed 0.005 ms (0.530 ms / 100) 0.520 -> 0.511 ( -1.73%) [ +4.04% +0.00% +0.00% / +3.46% -1.73% -0.96%] index_select strided 7 : Elapsed 0.005 ms (0.541 ms / 100) 0.521 -> 0.515 ( -1.15%) [ +0.77% +0.00% +1.34% / +13.82% -1.15% +0.19%] index_select strided 8 : Elapsed 0.005 ms (0.525 ms / 100) 0.520 -> 0.512 ( -1.54%) [ +3.08% +0.00% +0.77% / +3.85% -1.54% -0.38%] index_select strided 16 : Elapsed 0.005 ms (0.536 ms / 100) 0.520 -> 0.515 ( -0.96%) [ +1.54% +10.00% +0.00% / +2.31% -0.96% -0.58%] index_select strided 64 : Elapsed 0.005 ms (0.528 ms / 100) 0.516 -> 0.509 ( -1.36%) [ +9.69% +0.00% +0.78% / +3.10% -1.36% -0.58%] index_select strided 100 : Elapsed 0.006 ms (0.566 ms / 100) 0.510 -> 0.512 ( +0.39%) [ +4.51% +0.00% +7.25% / +4.12% +0.39% +0.39%] index_select random : Elapsed 0.005 ms (0.533 ms / 100) 0.513 -> 0.511 ( -0.39%) [ +3.90% +0.00% +0.19% / +3.51% -0.39% +8.97%] index_select random_sorted : Elapsed 0.005 ms (0.533 ms / 100) 0.511 -> 0.516 ( +0.98%) [ +3.91% +14.87% +0.00% / +4.11% +12.52% +0.98%] index_select perm : Elapsed 0.005 ms (0.531 ms / 100) 0.514 -> 0.510 ( -0.78%) [ +2.92% +0.00% +1.17% / +2.53% -0.78% +0.00%] index_select perm_sorted : Elapsed 0.005 ms (0.529 ms / 100) B = [5, 1, 500] (stride (500, 2500, 1)) dim = 1 fill_cnt = 200 8.889 -> 9.010 ( +1.36%) [ +1.18% +0.04% +0.00% / +1.36% +1.42% +1.44%] index_fill_ const : Elapsed 0.090 ms (8.994 ms / 100) 8.880 -> 9.002 ( +1.37%) [ +1.24% +0.06% +0.00% / +1.40% +1.42% +1.37%] index_fill_ linear : Elapsed 0.090 ms (8.990 ms / 100) 8.879 -> 9.001 ( +1.37%) [ +1.16% +0.07% +0.00% / +1.37% +1.40% +1.40%] index_fill_ reverse : Elapsed 0.090 ms (8.982 ms / 100) 8.898 -> 9.007 ( +1.22%) [ +1.20% +0.08% +0.00% / +1.35% +1.24% +1.22%] index_fill_ skip64 : Elapsed 0.090 ms (9.005 ms / 100) 8.903 -> 9.009 ( +1.19%) [ +1.22% +0.07% +0.00% / +1.36% +1.19% +1.19%] index_fill_ skip256 : Elapsed 0.090 ms (9.012 ms / 100) 8.889 -> 9.008 ( +1.34%) [ +1.19% +0.04% +0.00% / +1.34% +1.47% +1.50%] index_fill_ spread : Elapsed 0.090 ms (8.995 ms / 100) 8.892 -> 9.012 ( +1.35%) [ +1.24% +0.04% +0.00% / +1.35% +1.35% +1.37%] index_fill_ random : Elapsed 0.090 ms (9.002 ms / 100) 8.897 -> 9.007 ( +1.24%) [ +1.18% +0.10% +0.00% / +1.38% +1.24% +1.24%] index_fill_ random_sorted : Elapsed 0.090 ms (9.002 ms / 100) B = [5, 1, 500] (stride (500, 2500, 1)) A = [5, 200, 500] (stride (200, 1, 1000)) dim = 1 0.516 -> 0.514 ( -0.39%) [ +5.62% +2.71% +0.00% / +3.29% -0.39% -0.19%] index_select const : Elapsed 0.005 ms (0.545 ms / 100) 0.518 -> 0.513 ( -0.97%) [ +2.12% +0.00% +6.95% / +2.51% -0.97% +8.11%] index_select wrap : Elapsed 0.005 ms (0.529 ms / 100) 0.529 -> 0.510 ( -3.59%) [ +0.00% +0.00% +5.29% / +0.38% -3.59% +9.83%] index_select linear : Elapsed 0.005 ms (0.529 ms / 100) 0.517 -> 0.525 ( +1.55%) [ +3.48% +0.00% +0.97% / +2.90% +12.19% +1.55%] index_select reverse : Elapsed 0.005 ms (0.535 ms / 100) 0.515 -> 0.513 ( -0.39%) [ +2.72% +0.00% +0.19% / +8.16% -0.39% +1.94%] index_select skip64 : Elapsed 0.005 ms (0.529 ms / 100) 0.512 -> 0.510 ( -0.39%) [ +2.93% +0.00% +2.54% / +3.71% -0.39% +1.95%] index_select skip256 : Elapsed 0.005 ms (0.527 ms / 100) 0.513 -> 0.509 ( -0.78%) [ +4.48% +0.00% +0.78% / +4.29% -0.78% +2.14%] index_select spread : Elapsed 0.005 ms (0.536 ms / 100) 0.516 -> 0.513 ( -0.58%) [ +2.91% +0.19% +0.00% / +4.07% -0.58% +1.36%] index_select strided 3 : Elapsed 0.005 ms (0.531 ms / 100) 0.512 -> 0.506 ( -1.17%) [ +3.71% +0.00% +1.95% / +7.23% -1.17% +4.10%] index_select strided 5 : Elapsed 0.005 ms (0.531 ms / 100) 0.512 -> 0.514 ( +0.39%) [ +3.91% +0.00% +1.76% / +4.69% +0.78% +0.39%] index_select strided 7 : Elapsed 0.005 ms (0.532 ms / 100) 0.514 -> 0.505 ( -1.75%) [ +6.23% +0.00% +0.97% / +3.11% -1.75% +0.58%] index_select strided 8 : Elapsed 0.005 ms (0.546 ms / 100) 0.515 -> 0.509 ( -1.17%) [ +2.91% +0.00% +0.58% / +3.11% -1.17% +2.14%] index_select strided 16 : Elapsed 0.005 ms (0.530 ms / 100) 0.513 -> 0.512 ( -0.19%) [ +4.68% +0.00% +0.00% / +4.09% -0.19% +0.97%] index_select strided 64 : Elapsed 0.005 ms (0.537 ms / 100) 0.514 -> 0.520 ( +1.17%) [ +2.72% +0.00% +0.19% / +3.50% +1.95% +1.17%] index_select strided 100 : Elapsed 0.005 ms (0.528 ms / 100) 0.518 -> 0.512 ( -1.16%) [ +4.44% +0.00% +5.79% / +2.90% -1.16% +2.51%] index_select random : Elapsed 0.005 ms (0.541 ms / 100) 0.512 -> 0.515 ( +0.59%) [ +3.91% +0.00% +8.20% / +6.45% +0.59% +16.60%] index_select random_sorted : Elapsed 0.005 ms (0.532 ms / 100) 0.515 -> 0.517 ( +0.39%) [ +3.11% +0.97% +0.00% / +2.91% +6.41% +0.39%] index_select perm : Elapsed 0.005 ms (0.531 ms / 100) 0.514 -> 0.510 ( -0.78%) [ +3.31% +0.00% +0.39% / +3.11% -0.78% +1.17%] index_select perm_sorted : Elapsed 0.005 ms (0.531 ms / 100) B = [5, 1, 500] (stride (1, 2500, 5)) A = [5, 200, 500] (stride (500, 2500, 1)) dim = 1 0.510 -> 0.512 ( +0.39%) [ +6.47% +0.00% +2.35% / +4.71% +0.39% +1.18%] index_select const : Elapsed 0.005 ms (0.543 ms / 100) 0.518 -> 0.528 ( +1.93%) [ +2.51% +0.00% +0.58% / +2.90% +3.47% +1.93%] index_select wrap : Elapsed 0.005 ms (0.531 ms / 100) 0.511 -> 0.511 ( +0.00%) [+15.66% +0.00% +1.37% / +4.70% +0.00% +0.20%] index_select linear : Elapsed 0.006 ms (0.591 ms / 100) 0.514 -> 0.515 ( +0.19%) [ +3.89% +0.00% +1.75% / +3.31% +0.19% +0.19%] index_select reverse : Elapsed 0.005 ms (0.534 ms / 100) 0.513 -> 0.513 ( +0.00%) [ +3.51% +0.00% +0.58% / +2.14% +0.00% +1.56%] index_select skip64 : Elapsed 0.005 ms (0.531 ms / 100) 0.510 -> 0.512 ( +0.39%) [ +4.90% +0.00% +1.37% / +4.51% +1.18% +0.39%] index_select skip256 : Elapsed 0.005 ms (0.535 ms / 100) 0.512 -> 0.510 ( -0.39%) [ +2.73% +0.98% +0.00% / +2.73% +1.37% -0.39%] index_select spread : Elapsed 0.005 ms (0.526 ms / 100) 0.513 -> 0.514 ( +0.19%) [ +2.92% +0.00% +7.60% / +3.51% +0.58% +0.19%] index_select strided 3 : Elapsed 0.005 ms (0.528 ms / 100) 0.508 -> 0.521 ( +2.56%) [+11.81% +0.00% +8.46% / +4.33% +2.56% +13.58%] index_select strided 5 : Elapsed 0.006 ms (0.568 ms / 100) 0.505 -> 0.526 ( +4.16%) [ +5.54% +0.00% +2.57% / +4.75% +8.51% +4.16%] index_select strided 7 : Elapsed 0.005 ms (0.533 ms / 100) 0.514 -> 0.512 ( -0.39%) [ +4.09% +0.00% +1.36% / +2.53% -0.39% +0.00%] index_select strided 8 : Elapsed 0.005 ms (0.535 ms / 100) 0.512 -> 0.517 ( +0.98%) [ +3.52% +0.00% +1.37% / +4.49% +0.98% +1.56%] index_select strided 16 : Elapsed 0.005 ms (0.530 ms / 100) 0.511 -> 0.512 ( +0.20%) [ +3.33% +0.00% +0.59% / +3.52% +0.20% +4.70%] index_select strided 64 : Elapsed 0.005 ms (0.528 ms / 100) 0.514 -> 0.513 ( -0.19%) [ +4.67% +0.00% +0.19% / +6.03% -0.19% +0.58%] index_select strided 100 : Elapsed 0.005 ms (0.538 ms / 100) 0.508 -> 0.516 ( +1.57%) [ +4.13% +1.77% +0.00% / +4.53% +1.57% +1.77%] index_select random : Elapsed 0.005 ms (0.529 ms / 100) 0.518 -> 0.511 ( -1.35%) [ +2.70% +0.00% +3.67% / +2.90% -1.35% +0.19%] index_select random_sorted : Elapsed 0.005 ms (0.532 ms / 100) 0.519 -> 0.511 ( -1.54%) [ +2.31% +0.00% +0.00% / +2.31% -1.54% -1.35%] index_select perm : Elapsed 0.005 ms (0.531 ms / 100) 0.521 -> 0.517 ( -0.77%) [ +4.41% +1.34% +0.00% / +2.30% -0.77% +0.77%] index_select perm_sorted : Elapsed 0.005 ms (0.544 ms / 100) B = [5, 1, 500] (stride (1, 2500, 5)) A = [5, 200, 500] (stride (1, 5, 1000)) dim = 1 0.513 -> 0.515 ( +0.39%) [ +3.90% +0.00% +9.36% / +3.90% +0.39% +7.99%] index_select const : Elapsed 0.005 ms (0.533 ms / 100) 0.514 -> 0.514 ( +0.00%) [ +3.11% +0.00% +0.58% / +2.33% +6.81% +0.00%] index_select wrap : Elapsed 0.005 ms (0.530 ms / 100) 0.511 -> 0.515 ( +0.78%) [ +2.94% +0.00% +1.76% / +4.11% +0.78% +2.35%] index_select linear : Elapsed 0.005 ms (0.526 ms / 100) 0.518 -> 0.511 ( -1.35%) [ +2.32% +0.00% +0.39% / +3.28% -1.35% -0.39%] index_select reverse : Elapsed 0.005 ms (0.530 ms / 100) 0.518 -> 0.515 ( -0.58%) [ +2.12% +0.00% +0.00% / +11.58% -0.58% +1.74%] index_select skip64 : Elapsed 0.005 ms (0.529 ms / 100) 0.515 -> 0.520 ( +0.97%) [ +2.52% +1.75% +0.00% / +3.30% +0.97% +2.72%] index_select skip256 : Elapsed 0.005 ms (0.528 ms / 100) 0.512 -> 0.511 ( -0.20%) [ +3.32% +0.00% +1.37% / +5.86% -0.20% +0.59%] index_select spread : Elapsed 0.005 ms (0.529 ms / 100) 0.518 -> 0.519 ( +0.19%) [ +2.51% +0.00% +0.58% / +2.70% +0.19% +1.35%] index_select strided 3 : Elapsed 0.005 ms (0.531 ms / 100) 0.511 -> 0.515 ( +0.78%) [ +7.24% +0.00% +1.96% / +4.11% +0.78% +0.78%] index_select strided 5 : Elapsed 0.005 ms (0.548 ms / 100) 0.508 -> 0.512 ( +0.79%) [ +4.53% +0.00% +15.16% / +4.92% +0.79% +15.94%] index_select strided 7 : Elapsed 0.005 ms (0.531 ms / 100) 0.522 -> 0.516 ( -1.15%) [ +2.49% +13.60% +0.00% / +1.72% +4.41% -1.15%] index_select strided 8 : Elapsed 0.005 ms (0.535 ms / 100) 0.507 -> 0.511 ( +0.79%) [ +5.13% +0.00% +3.16% / +3.94% +0.79% +2.56%] index_select strided 16 : Elapsed 0.005 ms (0.533 ms / 100) 0.513 -> 0.510 ( -0.58%) [ +4.29% +0.00% +1.36% / +3.31% -0.58% +1.36%] index_select strided 64 : Elapsed 0.005 ms (0.535 ms / 100) 0.515 -> 0.513 ( -0.39%) [ +3.50% +0.00% +0.00% / +6.60% -0.39% +0.58%] index_select strided 100 : Elapsed 0.005 ms (0.533 ms / 100) 0.518 -> 0.506 ( -2.32%) [ +2.70% +0.00% +0.77% / +2.90% -2.32% +0.00%] index_select random : Elapsed 0.005 ms (0.532 ms / 100) 0.510 -> 0.511 ( +0.20%) [ +3.53% +0.00% +1.57% / +4.31% +0.20% +1.96%] index_select random_sorted : Elapsed 0.005 ms (0.528 ms / 100) 0.512 -> 0.512 ( +0.00%) [ +5.86% +0.00% +2.54% / +3.91% +0.00% +0.39%] index_select perm : Elapsed 0.005 ms (0.542 ms / 100) 0.517 -> 0.514 ( -0.58%) [ +2.51% +0.00% +0.77% / +2.71% -0.58% +2.71%] index_select perm_sorted : Elapsed 0.005 ms (0.530 ms / 100) B = [5, 1, 500] (stride (1, 5, 5)) dim = 1 fill_cnt = 200 8.895 -> 9.014 ( +1.34%) [ +1.21% +0.03% +0.00% / +1.34% +1.41% +1.42%] index_fill_ const : Elapsed 0.090 ms (9.003 ms / 100) 8.881 -> 9.003 ( +1.37%) [ +1.24% +0.07% +0.00% / +1.40% +1.41% +1.37%] index_fill_ linear : Elapsed 0.090 ms (8.991 ms / 100) 8.878 -> 9.003 ( +1.41%) [ +1.18% +0.06% +0.00% / +1.41% +1.43% +1.41%] index_fill_ reverse : Elapsed 0.090 ms (8.983 ms / 100) 8.897 -> 9.009 ( +1.26%) [ +1.25% +0.10% +0.00% / +1.36% +1.27% +1.26%] index_fill_ skip64 : Elapsed 0.090 ms (9.008 ms / 100) 8.912 -> 9.015 ( +1.16%) [ +1.26% +0.08% +0.00% / +1.35% +1.18% +1.16%] index_fill_ skip256 : Elapsed 0.090 ms (9.024 ms / 100) 8.925 -> 9.039 ( +1.28%) [ +1.12% +0.00% +0.00% / +1.32% +1.28% +1.28%] index_fill_ spread : Elapsed 0.090 ms (9.025 ms / 100) 8.930 -> 9.043 ( +1.27%) [ +1.19% +0.06% +0.00% / +1.27% +1.37% +1.35%] index_fill_ random : Elapsed 0.090 ms (9.036 ms / 100) 8.949 -> 9.072 ( +1.37%) [ +1.20% +0.09% +0.00% / +1.45% +1.41% +1.37%] index_fill_ random_sorted : Elapsed 0.091 ms (9.056 ms / 100) out_shape = [5, 200, 1] in_shape = [5, 200, 500] idx_dim = 2 B = [5, 200, 1] (stride (1, 5, 1)) A = [5, 200, 500] (stride (1, 2500, 5)) dim = 2 0.518 -> 0.513 ( -0.97%) [ +3.28% +0.00% +1.16% / +2.90% -0.97% -0.77%] index_select const : Elapsed 0.005 ms (0.535 ms / 100) 0.511 -> 0.512 ( +0.20%) [ +2.74% +0.00% +2.15% / +3.72% +0.20% +0.78%] index_select wrap : Elapsed 0.005 ms (0.525 ms / 100) 0.516 -> 0.513 ( -0.58%) [ +9.50% +0.78% +0.00% / +2.71% +0.19% -0.58%] index_select linear : Elapsed 0.006 ms (0.565 ms / 100) 0.519 -> 0.514 ( -0.96%) [ +9.44% +0.77% +0.00% / +6.17% -0.96% -0.19%] index_select reverse : Elapsed 0.006 ms (0.568 ms / 100) 0.522 -> 0.512 ( -1.92%) [ +1.34% +0.00% +0.38% / +2.11% -1.92% +2.68%] index_select skip64 : Elapsed 0.005 ms (0.529 ms / 100) 0.509 -> 0.515 ( +1.18%) [+11.59% +0.00% +9.63% / +4.52% +1.18% +16.31%] index_select skip256 : Elapsed 0.006 ms (0.568 ms / 100) 0.531 -> 0.518 ( -2.45%) [ +6.78% +0.00% +3.58% / -0.56% +0.75% -2.45%] index_select spread : Elapsed 0.006 ms (0.567 ms / 100) 0.510 -> 0.520 ( +1.96%) [+16.08% +0.00% +1.37% / +4.12% +15.10% +1.96%] index_select strided 3 : Elapsed 0.006 ms (0.592 ms / 100) 0.513 -> 0.513 ( +0.00%) [+12.87% +0.00% +1.17% / +1.95% +0.00% +1.75%] index_select strided 5 : Elapsed 0.006 ms (0.579 ms / 100) 0.514 -> 0.511 ( -0.58%) [ +8.37% +0.00% +2.14% / +2.72% -0.58% +0.00%] index_select strided 7 : Elapsed 0.006 ms (0.557 ms / 100) 0.513 -> 0.516 ( +0.58%) [+16.57% +0.00% +2.34% / +9.94% +0.58% +0.78%] index_select strided 8 : Elapsed 0.006 ms (0.598 ms / 100) 0.521 -> 0.518 ( -0.58%) [ +1.92% +2.88% +0.00% / +0.77% -0.58% -0.19%] index_select strided 16 : Elapsed 0.005 ms (0.531 ms / 100) 0.515 -> 0.511 ( -0.78%) [+10.49% +0.00% +1.55% / +3.88% -0.78% +0.19%] index_select strided 64 : Elapsed 0.006 ms (0.569 ms / 100) 0.511 -> 0.510 ( -0.20%) [+10.76% +0.00% +1.96% / +2.94% -0.20% +0.78%] index_select strided 100 : Elapsed 0.006 ms (0.566 ms / 100) 0.512 -> 0.513 ( +0.20%) [ +3.91% +0.00% +0.98% / +2.93% +0.20% +0.59%] index_select strided 255 : Elapsed 0.005 ms (0.532 ms / 100) 0.512 -> 0.515 ( +0.59%) [+10.55% +0.00% +1.76% / +4.10% +0.59% +0.59%] index_select strided 256 : Elapsed 0.006 ms (0.566 ms / 100) 0.508 -> 0.511 ( +0.59%) [ +4.13% +0.00% +1.77% / +4.33% +0.59% +2.76%] index_select strided 257 : Elapsed 0.005 ms (0.529 ms / 100) 0.513 -> 0.521 ( +1.56%) [ +3.51% +0.00% +8.38% / +4.09% +1.56% +11.50%] index_select random : Elapsed 0.005 ms (0.531 ms / 100) 0.513 -> 0.515 ( +0.39%) [+10.14% +0.00% +18.32% / +4.48% +0.39% +1.36%] index_select random_sorted : Elapsed 0.006 ms (0.565 ms / 100) 0.509 -> 0.519 ( +1.96%) [ +3.54% +0.00% +1.38% / +4.52% +5.89% +1.96%] index_select perm : Elapsed 0.005 ms (0.527 ms / 100) 0.518 -> 0.512 ( -1.16%) [ +2.90% +0.19% +0.00% / +1.74% -1.16% -0.97%] index_select perm_sorted : Elapsed 0.005 ms (0.533 ms / 100) B = [5, 200, 1] (stride (1, 5, 1000)) A = [5, 200, 500] (stride (100000, 500, 1)) dim = 2 0.563 -> 0.567 ( +0.71%) [ +7.28% +0.00% +0.00% / +0.89% +1.07% +0.71%] index_select const : Elapsed 0.006 ms (0.604 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +7.99% +0.00% +0.00% / +5.15% +0.89% +0.89%] index_select wrap : Elapsed 0.006 ms (0.608 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +6.41% +0.18% +0.00% / +0.89% +1.07% +0.89%] index_select linear : Elapsed 0.006 ms (0.598 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +0.89% +0.18% +0.00% / +1.25% +1.07% +0.89%] index_select reverse : Elapsed 0.006 ms (0.567 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.00% +0.00% / +0.89% +0.89% +0.71%] index_select skip64 : Elapsed 0.006 ms (0.568 ms / 100) 0.562 -> 0.568 ( +1.07%) [ +0.89% +0.36% +0.00% / +1.07% +1.07% +1.07%] index_select skip256 : Elapsed 0.006 ms (0.567 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.71% +0.89% +0.71%] index_select spread : Elapsed 0.006 ms (0.568 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +1.07% +0.18% +0.00% / +1.07% +0.89% +1.07%] index_select strided 3 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.71% +0.00% +0.00% / +0.89% +0.89% +1.07%] index_select strided 5 : Elapsed 0.006 ms (0.567 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +1.07% +0.18% +0.00% / +1.07% +1.07% +0.89%] index_select strided 7 : Elapsed 0.006 ms (0.568 ms / 100) 0.562 -> 0.568 ( +1.07%) [ +0.89% +0.00% +3.74% / +1.25% +1.07% +1.07%] index_select strided 8 : Elapsed 0.006 ms (0.567 ms / 100) 0.562 -> 0.568 ( +1.07%) [ +1.07% +0.00% +0.36% / +1.25% +6.05% +1.07%] index_select strided 16 : Elapsed 0.006 ms (0.568 ms / 100) 0.562 -> 0.568 ( +1.07%) [ +0.89% +0.00% +0.18% / +1.07% +1.07% +1.07%] index_select strided 64 : Elapsed 0.006 ms (0.567 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +1.07% +0.18% +0.00% / +0.89% +0.89% +0.71%] index_select strided 100 : Elapsed 0.006 ms (0.569 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.89% +0.89% +0.71%] index_select strided 255 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +1.07% +0.00% +0.18% / +0.89% +0.71% +0.89%] index_select strided 256 : Elapsed 0.006 ms (0.569 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +1.07% +0.00% +0.18% / +1.25% +1.07% +0.89%] index_select strided 257 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.18% +0.00% / +0.89% +1.07% +0.89%] index_select random : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.71% +0.00% +0.00% / +0.89% +0.89% +0.71%] index_select random_sorted : Elapsed 0.006 ms (0.567 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.00% +0.18% / +1.07% +0.89% +0.71%] index_select perm : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.566 ( +0.53%) [ +0.89% +0.00% +0.18% / +0.89% +0.89% +0.53%] index_select perm_sorted : Elapsed 0.006 ms (0.568 ms / 100) out_shape = [1, 500, 200] in_shape = [5, 500, 200] idx_dim = 0 B = [1, 500, 200] (stride (100000, 200, 1)) A = [5, 500, 200] (stride (1, 5, 2500)) dim = 0 9.503 -> 9.511 ( +0.08%) [ +0.36% +0.15% +0.00% / +0.08% +0.69% +1.00%] index_select const : Elapsed 0.095 ms (9.537 ms / 100) 9.496 -> 9.496 ( +0.00%) [ +0.28% +0.32% +0.00% / +0.00% +0.90% +0.46%] index_select wrap : Elapsed 0.095 ms (9.523 ms / 100) 9.500 -> 9.494 ( -0.06%) [ +0.00% +0.14% +0.35% / -0.06% +0.77% +0.73%] index_select linear : Elapsed 0.095 ms (9.500 ms / 100) 9.473 -> 9.561 ( +0.93%) [ +0.21% +0.34% +0.00% / +0.93% +1.12% +0.97%] index_select reverse : Elapsed 0.095 ms (9.493 ms / 100) 9.438 -> 9.483 ( +0.48%) [ +0.00% +0.99% +0.79% / +0.48% +1.09% +1.10%] index_select skip64 : Elapsed 0.094 ms (9.438 ms / 100) 9.483 -> 9.542 ( +0.62%) [ +0.73% +0.37% +0.00% / +0.62% +0.71% +0.97%] index_select skip256 : Elapsed 0.096 ms (9.552 ms / 100) 9.499 -> 9.489 ( -0.11%) [ +0.71% +0.07% +0.00% / -0.11% +0.23% +1.07%] index_select spread : Elapsed 0.096 ms (9.566 ms / 100) 9.491 -> 9.457 ( -0.36%) [ +0.23% +0.16% +0.00% / -0.36% +0.79% +0.54%] index_select strided 3 : Elapsed 0.095 ms (9.513 ms / 100) 9.470 -> 9.532 ( +0.65%) [ +0.00% +0.38% +1.05% / +0.65% +0.96% +0.84%] index_select random : Elapsed 0.095 ms (9.470 ms / 100) 9.474 -> 9.451 ( -0.24%) [ +0.00% +0.12% +0.30% / -0.24% +1.15% +0.39%] index_select random_sorted : Elapsed 0.095 ms (9.474 ms / 100) 9.464 -> 9.529 ( +0.69%) [ +0.21% +0.03% +0.00% / +0.69% +1.76% +0.88%] index_select perm : Elapsed 0.095 ms (9.484 ms / 100) 9.473 -> 9.494 ( +0.22%) [ +0.46% +0.21% +0.00% / +0.22% +1.13% +1.02%] index_select perm_sorted : Elapsed 0.095 ms (9.517 ms / 100) B = [1, 500, 200] (stride (100000, 1, 500)) A = [5, 500, 200] (stride (1, 1000, 5)) dim = 0 8.207 -> 8.225 ( +0.22%) [ +0.07% +0.00% +0.09% / +0.22% +0.39% +0.71%] index_select const : Elapsed 0.082 ms (8.213 ms / 100) 8.219 -> 8.240 ( +0.26%) [ +0.00% +0.11% +0.16% / +0.47% +0.35% +0.26%] index_select wrap : Elapsed 0.082 ms (8.219 ms / 100) 8.223 -> 8.234 ( +0.13%) [ +0.09% +0.10% +0.00% / +0.13% +0.83% +0.34%] index_select linear : Elapsed 0.082 ms (8.230 ms / 100) 8.206 -> 8.227 ( +0.26%) [ +0.43% +0.21% +0.00% / +0.26% +0.46% +0.43%] index_select reverse : Elapsed 0.082 ms (8.241 ms / 100) 8.212 -> 8.220 ( +0.10%) [ +0.18% +0.07% +0.00% / +0.10% +0.28% +0.21%] index_select skip64 : Elapsed 0.082 ms (8.227 ms / 100) 8.222 -> 8.231 ( +0.11%) [ +0.00% +0.11% +0.10% / +0.24% +0.11% +0.28%] index_select skip256 : Elapsed 0.082 ms (8.222 ms / 100) 8.214 -> 8.222 ( +0.10%) [ +0.26% +0.30% +0.00% / +0.10% +0.38% +0.28%] index_select spread : Elapsed 0.082 ms (8.235 ms / 100) 8.213 -> 8.233 ( +0.24%) [ +0.13% +0.18% +0.00% / +0.38% +0.24% +0.27%] index_select strided 3 : Elapsed 0.082 ms (8.224 ms / 100) 8.211 -> 8.223 ( +0.15%) [ +0.16% +0.18% +0.00% / +0.45% +0.55% +0.15%] index_select random : Elapsed 0.082 ms (8.224 ms / 100) 8.209 -> 8.221 ( +0.15%) [ +0.13% +0.00% +0.10% / +0.15% +0.39% +0.55%] index_select random_sorted : Elapsed 0.082 ms (8.220 ms / 100) 8.220 -> 8.244 ( +0.29%) [ +0.00% +0.02% +0.15% / +0.29% +0.49% +0.30%] index_select perm : Elapsed 0.082 ms (8.220 ms / 100) 8.223 -> 8.226 ( +0.04%) [ +0.12% +0.15% +0.00% / +0.04% +0.39% +0.29%] index_select perm_sorted : Elapsed 0.082 ms (8.233 ms / 100) B = [1, 500, 200] (stride (200, 200, 1)) A = [5, 500, 200] (stride (1, 1000, 5)) dim = 0 5.320 -> 5.324 ( +0.08%) [ +0.08% +0.04% +0.00% / +0.08% +0.45% +0.30%] index_select const : Elapsed 0.053 ms (5.324 ms / 100) 5.316 -> 5.321 ( +0.09%) [ +0.43% +0.15% +0.00% / +0.28% +0.09% +0.09%] index_select wrap : Elapsed 0.053 ms (5.339 ms / 100) 5.327 -> 5.325 ( -0.04%) [ +0.04% +0.00% +0.00% / -0.04% +0.02% +0.08%] index_select linear : Elapsed 0.053 ms (5.329 ms / 100) 5.320 -> 5.332 ( +0.23%) [ +0.00% +0.04% +0.02% / +0.23% +0.30% +0.26%] index_select reverse : Elapsed 0.053 ms (5.320 ms / 100) 5.313 -> 5.323 ( +0.19%) [ +0.45% +0.34% +0.00% / +0.19% +0.36% +0.34%] index_select skip64 : Elapsed 0.053 ms (5.337 ms / 100) 5.315 -> 5.325 ( +0.19%) [ +0.15% +0.00% +0.08% / +0.19% +0.45% +0.36%] index_select skip256 : Elapsed 0.053 ms (5.323 ms / 100) 5.317 -> 5.326 ( +0.17%) [ +0.13% +0.19% +0.00% / +0.17% +0.38% +0.19%] index_select spread : Elapsed 0.053 ms (5.324 ms / 100) 5.316 -> 5.317 ( +0.02%) [ +0.15% +0.09% +0.00% / +0.02% +0.43% +0.55%] index_select strided 3 : Elapsed 0.053 ms (5.324 ms / 100) 5.314 -> 5.324 ( +0.19%) [ +0.24% +0.00% +0.04% / +0.21% +0.19% +0.34%] index_select random : Elapsed 0.053 ms (5.327 ms / 100) 5.315 -> 5.327 ( +0.23%) [ +0.13% +0.23% +0.00% / +0.23% +0.36% +0.38%] index_select random_sorted : Elapsed 0.053 ms (5.322 ms / 100) 5.310 -> 5.317 ( +0.13%) [ +0.06% +0.00% +0.13% / +0.15% +0.13% +0.36%] index_select perm : Elapsed 0.053 ms (5.313 ms / 100) 5.318 -> 5.316 ( -0.04%) [ +0.41% +0.17% +0.00% / +0.23% -0.04% +0.09%] index_select perm_sorted : Elapsed 0.053 ms (5.340 ms / 100) B = [1, 500, 200] (stride (500, 1, 500)) dim = 0 fill_cnt = 5 5.783 -> 5.785 ( +0.03%) [ +0.00% +0.12% +0.14% / +0.29% +0.03% +0.07%] index_fill_ const : Elapsed 0.058 ms (5.783 ms / 100) 5.764 -> 5.767 ( +0.05%) [ +0.03% +0.00% +0.02% / +0.14% +0.05% +0.09%] index_fill_ linear : Elapsed 0.058 ms (5.766 ms / 100) 5.762 -> 5.764 ( +0.03%) [ +0.03% +0.09% +0.00% / +0.17% +0.14% +0.03%] index_fill_ reverse : Elapsed 0.058 ms (5.764 ms / 100) 5.762 -> 5.755 ( -0.12%) [ +0.17% +0.00% +0.05% / +0.02% -0.12% -0.02%] index_fill_ skip64 : Elapsed 0.058 ms (5.772 ms / 100) 5.774 -> 5.769 ( -0.09%) [ +0.10% +0.00% +0.05% / -0.07% -0.05% -0.09%] index_fill_ skip256 : Elapsed 0.058 ms (5.780 ms / 100) 5.768 -> 5.770 ( +0.03%) [ +0.02% +0.16% +0.00% / +0.33% +0.03% +0.17%] index_fill_ spread : Elapsed 0.058 ms (5.769 ms / 100) 5.767 -> 5.771 ( +0.07%) [ +0.36% +0.09% +0.00% / +0.14% +0.26% +0.07%] index_fill_ random : Elapsed 0.058 ms (5.788 ms / 100) 5.775 -> 5.774 ( -0.02%) [ +0.03% +0.00% +0.03% / +0.28% +0.07% -0.02%] index_fill_ random_sorted : Elapsed 0.058 ms (5.777 ms / 100) B = [1, 500, 200] (stride (500, 1, 500)) A = [5, 500, 200] (stride (100000, 200, 1)) dim = 0 7.796 -> 7.801 ( +0.06%) [ +0.04% +0.09% +0.00% / +0.06% +0.21% +0.26%] index_select const : Elapsed 0.078 ms (7.799 ms / 100) 7.796 -> 7.807 ( +0.14%) [ +0.10% +0.06% +0.00% / +0.14% +0.18% +0.45%] index_select wrap : Elapsed 0.078 ms (7.804 ms / 100) 7.805 -> 7.809 ( +0.05%) [ +0.00% +0.04% +0.00% / +0.05% +0.08% +0.23%] index_select linear : Elapsed 0.078 ms (7.805 ms / 100) 7.795 -> 7.801 ( +0.08%) [ +0.14% +0.13% +0.00% / +0.10% +0.08% +0.36%] index_select reverse : Elapsed 0.078 ms (7.806 ms / 100) 7.797 -> 7.807 ( +0.13%) [ +0.15% +0.00% +0.17% / +0.13% +0.18% +0.51%] index_select skip64 : Elapsed 0.078 ms (7.809 ms / 100) 7.796 -> 7.802 ( +0.08%) [ +0.13% +0.13% +0.00% / +0.08% +0.24% +0.28%] index_select skip256 : Elapsed 0.078 ms (7.806 ms / 100) 7.799 -> 7.794 ( -0.06%) [ +0.15% +0.00% +0.10% / -0.06% +0.28% +0.22%] index_select spread : Elapsed 0.078 ms (7.811 ms / 100) 7.797 -> 7.808 ( +0.14%) [ +0.31% +0.12% +0.00% / +0.15% +0.26% +0.14%] index_select strided 3 : Elapsed 0.078 ms (7.821 ms / 100) 7.797 -> 7.803 ( +0.08%) [ +0.26% +0.00% +0.01% / +0.08% +0.29% +0.14%] index_select random : Elapsed 0.078 ms (7.817 ms / 100) 7.795 -> 7.823 ( +0.36%) [ +0.22% +0.00% +0.08% / +0.36% +0.36% +0.36%] index_select random_sorted : Elapsed 0.078 ms (7.812 ms / 100) 7.807 -> 7.806 ( -0.01%) [ +0.00% +0.06% +0.26% / -0.01% +0.12% +0.15%] index_select perm : Elapsed 0.078 ms (7.807 ms / 100) 7.799 -> 7.809 ( +0.13%) [ +0.05% +0.06% +0.00% / +0.13% +0.33% +0.18%] index_select perm_sorted : Elapsed 0.078 ms (7.803 ms / 100) B = [1, 500, 200] (stride (500, 1, 500)) A = [5, 500, 200] (stride (1, 1000, 5)) dim = 0 8.220 -> 8.228 ( +0.10%) [ +0.33% +0.00% +0.05% / +0.10% +0.21% +0.34%] index_select const : Elapsed 0.082 ms (8.247 ms / 100) 8.212 -> 8.231 ( +0.23%) [ +0.15% +0.27% +0.00% / +0.35% +0.30% +0.23%] index_select wrap : Elapsed 0.082 ms (8.224 ms / 100) 8.219 -> 8.230 ( +0.13%) [ +0.35% +0.22% +0.00% / +0.13% +0.30% +0.47%] index_select linear : Elapsed 0.082 ms (8.248 ms / 100) 8.220 -> 8.224 ( +0.05%) [ +0.00% +0.49% +0.21% / +0.30% +0.05% +0.19%] index_select reverse : Elapsed 0.082 ms (8.220 ms / 100) 8.224 -> 8.226 ( +0.02%) [ +0.09% +0.28% +0.00% / +0.02% +0.12% +0.15%] index_select skip64 : Elapsed 0.082 ms (8.231 ms / 100) 8.234 -> 8.217 ( -0.21%) [ +0.00% +0.06% +0.12% / -0.21% +0.02% +0.32%] index_select skip256 : Elapsed 0.082 ms (8.234 ms / 100) 8.225 -> 8.233 ( +0.10%) [ +0.21% +0.11% +0.00% / +0.10% +0.17% +0.12%] index_select spread : Elapsed 0.082 ms (8.242 ms / 100) 8.219 -> 8.223 ( +0.05%) [ +0.26% +0.00% +0.41% / +0.05% +0.17% +0.15%] index_select strided 3 : Elapsed 0.082 ms (8.240 ms / 100) 8.230 -> 8.216 ( -0.17%) [ +0.12% +0.01% +0.00% / -0.17% -0.05% +0.01%] index_select random : Elapsed 0.082 ms (8.240 ms / 100) 8.225 -> 8.220 ( -0.06%) [ +0.00% +0.07% +0.09% / +0.10% -0.06% -0.05%] index_select random_sorted : Elapsed 0.082 ms (8.225 ms / 100) 8.229 -> 8.225 ( -0.05%) [ +0.00% +0.04% +0.11% / -0.05% +0.24% +0.09%] index_select perm : Elapsed 0.082 ms (8.229 ms / 100) 8.218 -> 8.214 ( -0.05%) [ +0.10% +0.00% +0.28% / +0.33% -0.05% +0.33%] index_select perm_sorted : Elapsed 0.082 ms (8.226 ms / 100) B = [1, 500, 200] (stride (1, 1, 500)) A = [5, 500, 200] (stride (500, 1, 2500)) dim = 0 5.481 -> 5.468 ( -0.24%) [ +0.00% +0.26% +0.27% / +0.22% -0.02% -0.24%] index_select const : Elapsed 0.055 ms (5.481 ms / 100) 5.485 -> 5.462 ( -0.42%) [ +0.13% +0.11% +0.00% / -0.13% -0.42% -0.18%] index_select wrap : Elapsed 0.055 ms (5.492 ms / 100) 5.481 -> 5.461 ( -0.36%) [ +0.20% +0.00% +0.05% / +0.26% -0.22% -0.36%] index_select linear : Elapsed 0.055 ms (5.492 ms / 100) 5.479 -> 5.461 ( -0.33%) [ +0.09% +0.00% +0.13% / +0.13% -0.33% +0.00%] index_select reverse : Elapsed 0.055 ms (5.484 ms / 100) 5.468 -> 5.462 ( -0.11%) [ +0.40% +0.00% +0.04% / +0.24% +0.11% -0.11%] index_select skip64 : Elapsed 0.055 ms (5.490 ms / 100) 5.481 -> 5.462 ( -0.35%) [ +0.15% +0.00% +0.18% / +0.13% -0.26% -0.35%] index_select skip256 : Elapsed 0.055 ms (5.489 ms / 100) 5.488 -> 5.468 ( -0.36%) [ +0.04% +0.00% +0.11% / -0.02% -0.36% -0.33%] index_select spread : Elapsed 0.055 ms (5.490 ms / 100) 5.473 -> 5.464 ( -0.16%) [ +0.22% +0.00% +0.44% / +0.20% -0.16% -0.13%] index_select strided 3 : Elapsed 0.055 ms (5.485 ms / 100) 5.465 -> 5.466 ( +0.02%) [ +0.31% +0.00% +0.16% / +0.11% +0.02% +0.20%] index_select random : Elapsed 0.055 ms (5.482 ms / 100) 5.459 -> 5.465 ( +0.11%) [ +0.13% +0.09% +0.00% / +0.11% +0.38% +0.31%] index_select random_sorted : Elapsed 0.055 ms (5.466 ms / 100) 5.445 -> 5.442 ( -0.06%) [ +0.31% +0.00% +0.28% / +0.02% +0.11% -0.06%] index_select perm : Elapsed 0.055 ms (5.462 ms / 100) 5.458 -> 5.439 ( -0.35%) [ +0.02% +0.22% +0.00% / +0.11% -0.07% -0.35%] index_select perm_sorted : Elapsed 0.055 ms (5.459 ms / 100) out_shape = [5, 1, 200] in_shape = [5, 500, 200] idx_dim = 1 B = [5, 1, 200] (stride (200, 200, 1)) dim = 1 fill_cnt = 500 21.702 -> 22.019 ( +1.46%) [ +1.16% +0.00% +0.00% / +1.46% +1.53% +1.53%] index_fill_ const : Elapsed 0.220 ms (21.953 ms / 100) 21.712 -> 22.030 ( +1.46%) [ +1.17% +0.00% +0.00% / +1.46% +1.50% +1.50%] index_fill_ linear : Elapsed 0.220 ms (21.965 ms / 100) 21.710 -> 22.020 ( +1.43%) [ +1.13% +0.01% +0.00% / +1.43% +1.46% +1.46%] index_fill_ reverse : Elapsed 0.220 ms (21.955 ms / 100) 21.749 -> 22.039 ( +1.33%) [ +1.14% +0.00% +0.00% / +1.44% +1.33% +1.33%] index_fill_ skip64 : Elapsed 0.220 ms (21.996 ms / 100) 21.725 -> 22.041 ( +1.45%) [ +1.17% +0.00% +0.00% / +1.45% +1.56% +1.56%] index_fill_ skip256 : Elapsed 0.220 ms (21.979 ms / 100) 21.741 -> 22.043 ( +1.39%) [ +1.15% +0.00% +0.00% / +1.46% +1.39% +1.39%] index_fill_ spread : Elapsed 0.220 ms (21.992 ms / 100) 21.698 -> 22.018 ( +1.47%) [ +1.15% +0.00% +0.00% / +1.47% +1.57% +1.56%] index_fill_ random : Elapsed 0.219 ms (21.948 ms / 100) 21.710 -> 22.026 ( +1.46%) [ +1.17% +0.00% +0.00% / +1.46% +1.54% +1.54%] index_fill_ random_sorted : Elapsed 0.220 ms (21.963 ms / 100) B = [5, 1, 200] (stride (200, 1000, 1)) dim = 1 fill_cnt = 500 21.701 -> 22.018 ( +1.46%) [ +1.17% +0.01% +0.00% / +1.46% +1.54% +1.53%] index_fill_ const : Elapsed 0.220 ms (21.955 ms / 100) 21.713 -> 22.030 ( +1.46%) [ +1.16% +0.00% +0.00% / +1.46% +1.50% +1.51%] index_fill_ linear : Elapsed 0.220 ms (21.965 ms / 100) 21.706 -> 22.021 ( +1.45%) [ +1.15% +0.00% +0.02% / +1.45% +1.48% +1.48%] index_fill_ reverse : Elapsed 0.220 ms (21.955 ms / 100) 21.747 -> 22.038 ( +1.34%) [ +1.14% +0.00% +0.00% / +1.45% +1.34% +1.34%] index_fill_ skip64 : Elapsed 0.220 ms (21.996 ms / 100) 21.724 -> 22.041 ( +1.46%) [ +1.16% +0.00% +0.00% / +1.46% +1.56% +1.56%] index_fill_ skip256 : Elapsed 0.220 ms (21.977 ms / 100) 21.740 -> 22.042 ( +1.39%) [ +1.16% +0.00% +0.00% / +1.46% +1.40% +1.39%] index_fill_ spread : Elapsed 0.220 ms (21.993 ms / 100) 21.697 -> 22.017 ( +1.47%) [ +1.17% +0.00% +0.00% / +1.47% +1.56% +1.57%] index_fill_ random : Elapsed 0.219 ms (21.950 ms / 100) 21.711 -> 22.026 ( +1.45%) [ +1.16% +0.00% +0.00% / +1.45% +1.53% +1.53%] index_fill_ random_sorted : Elapsed 0.220 ms (21.962 ms / 100) B = [5, 1, 200] (stride (200, 1000, 1)) A = [5, 500, 200] (stride (100000, 200, 1)) dim = 1 0.516 -> 0.517 ( +0.19%) [ +3.88% +0.00% +7.75% / +3.88% +0.19% +1.16%] index_select const : Elapsed 0.005 ms (0.536 ms / 100) 0.520 -> 0.512 ( -1.54%) [ +2.12% +0.00% +11.92% / +2.50% -1.54% -0.58%] index_select wrap : Elapsed 0.005 ms (0.531 ms / 100) 0.517 -> 0.509 ( -1.55%) [ +2.32% +0.00% +0.58% / +6.58% -1.55% +1.35%] index_select linear : Elapsed 0.005 ms (0.529 ms / 100) 0.516 -> 0.516 ( +0.00%) [ +1.74% +3.29% +0.00% / +2.91% +0.39% +0.00%] index_select reverse : Elapsed 0.005 ms (0.525 ms / 100) 0.519 -> 0.513 ( -1.16%) [ +0.39% +0.00% +1.35% / +8.86% -1.16% +0.00%] index_select skip64 : Elapsed 0.005 ms (0.521 ms / 100) 0.513 -> 0.511 ( -0.39%) [ +2.92% +0.00% +0.78% / +15.01% -0.39% +3.12%] index_select skip256 : Elapsed 0.005 ms (0.528 ms / 100) 0.518 -> 0.513 ( -0.97%) [ +1.16% +0.00% +1.93% / +2.32% -0.97% +12.36%] index_select spread : Elapsed 0.005 ms (0.524 ms / 100) 0.515 -> 0.509 ( -1.17%) [ +2.14% +0.00% +1.36% / +3.11% -1.17% +1.36%] index_select strided 3 : Elapsed 0.005 ms (0.526 ms / 100) 0.529 -> 0.515 ( -2.65%) [ +0.00% +0.95% +1.70% / +0.00% +3.21% -2.65%] index_select strided 5 : Elapsed 0.005 ms (0.529 ms / 100) 0.510 -> 0.520 ( +1.96%) [ +2.55% +0.00% +1.96% / +3.73% +10.59% +1.96%] index_select strided 7 : Elapsed 0.005 ms (0.523 ms / 100) 0.520 -> 0.511 ( -1.73%) [ +1.15% +0.00% +0.58% / +0.96% -0.58% -1.73%] index_select strided 8 : Elapsed 0.005 ms (0.526 ms / 100) 0.514 -> 0.510 ( -0.78%) [ +2.53% +0.00% +0.97% / +3.31% -0.39% -0.78%] index_select strided 16 : Elapsed 0.005 ms (0.527 ms / 100) 0.516 -> 0.511 ( -0.97%) [ +1.74% +0.00% +7.17% / +9.88% -0.39% -0.97%] index_select strided 64 : Elapsed 0.005 ms (0.525 ms / 100) 0.513 -> 0.515 ( +0.39%) [ +1.95% +0.00% +16.37% / +3.51% +1.17% +0.39%] index_select strided 100 : Elapsed 0.005 ms (0.523 ms / 100) 0.513 -> 0.514 ( +0.19%) [ +2.14% +0.00% +0.58% / +4.87% +0.19% +0.39%] index_select strided 255 : Elapsed 0.005 ms (0.524 ms / 100) 0.520 -> 0.515 ( -0.96%) [ +1.54% +6.15% +0.00% / +1.73% -0.96% -0.96%] index_select strided 256 : Elapsed 0.005 ms (0.528 ms / 100) 0.523 -> 0.510 ( -2.49%) [ +0.76% +3.06% +0.00% / +2.49% -2.49% -1.72%] index_select strided 257 : Elapsed 0.005 ms (0.527 ms / 100) 0.518 -> 0.510 ( -1.54%) [ +1.74% +0.00% +0.19% / +1.93% -1.54% -1.16%] index_select random : Elapsed 0.005 ms (0.527 ms / 100) 0.517 -> 0.511 ( -1.16%) [ +4.84% +0.19% +0.00% / +1.55% -0.58% -1.16%] index_select random_sorted : Elapsed 0.005 ms (0.542 ms / 100) 0.512 -> 0.510 ( -0.39%) [ +3.52% +1.37% +0.00% / +2.15% +0.59% -0.39%] index_select perm : Elapsed 0.005 ms (0.530 ms / 100) 0.513 -> 0.518 ( +0.97%) [ +3.31% +1.75% +0.00% / +2.34% +0.97% +7.60%] index_select perm_sorted : Elapsed 0.005 ms (0.530 ms / 100) B = [5, 1, 200] (stride (200, 1000, 1)) A = [5, 500, 200] (stride (1, 5, 2500)) dim = 1 0.512 -> 0.512 ( +0.00%) [ +3.91% +0.00% +1.37% / +3.52% +6.84% +0.00%] index_select const : Elapsed 0.005 ms (0.532 ms / 100) 0.514 -> 0.509 ( -0.97%) [ +3.50% +0.00% +3.11% / +7.00% +0.19% -0.97%] index_select wrap : Elapsed 0.005 ms (0.532 ms / 100) 0.516 -> 0.510 ( -1.16%) [ +2.71% +0.00% +2.91% / +6.59% -1.16% -1.16%] index_select linear : Elapsed 0.005 ms (0.530 ms / 100) 0.522 -> 0.505 ( -3.26%) [ +3.07% +3.26% +0.00% / +2.30% -2.68% -3.26%] index_select reverse : Elapsed 0.005 ms (0.538 ms / 100) 0.517 -> 0.513 ( -0.77%) [+15.47% +0.00% +0.77% / +4.06% -0.39% -0.77%] index_select skip64 : Elapsed 0.006 ms (0.597 ms / 100) 0.517 -> 0.511 ( -1.16%) [+11.22% +0.00% +10.83% / +3.48% +1.35% -1.16%] index_select skip256 : Elapsed 0.006 ms (0.575 ms / 100) 0.523 -> 0.510 ( -2.49%) [+11.28% +0.38% +0.00% / +0.96% -1.53% -2.49%] index_select spread : Elapsed 0.006 ms (0.582 ms / 100) 0.517 -> 0.525 ( +1.55%) [+10.64% +0.00% +1.16% / +2.51% +1.55% +2.13%] index_select strided 3 : Elapsed 0.006 ms (0.572 ms / 100) 0.511 -> 0.511 ( +0.00%) [+16.24% +0.00% +2.15% / +3.91% +0.59% +0.00%] index_select strided 5 : Elapsed 0.006 ms (0.594 ms / 100) 0.511 -> 0.511 ( +0.00%) [ +2.74% +0.00% +8.81% / +4.11% +6.46% +0.00%] index_select strided 7 : Elapsed 0.005 ms (0.525 ms / 100) 0.512 -> 0.517 ( +0.98%) [+10.35% +0.00% +3.52% / +3.71% +0.98% +1.17%] index_select strided 8 : Elapsed 0.006 ms (0.565 ms / 100) 0.519 -> 0.518 ( -0.19%) [ +0.58% +0.00% +7.90% / +9.63% -0.19% +0.39%] index_select strided 16 : Elapsed 0.005 ms (0.522 ms / 100) 0.518 -> 0.509 ( -1.74%) [ +6.95% +0.00% +0.77% / +2.32% -0.19% -1.74%] index_select strided 64 : Elapsed 0.006 ms (0.554 ms / 100) 0.515 -> 0.511 ( -0.78%) [ +7.96% +0.00% +8.16% / +3.11% -0.78% +0.19%] index_select strided 100 : Elapsed 0.006 ms (0.556 ms / 100) 0.518 -> 0.514 ( -0.77%) [ +6.37% +0.39% +0.00% / +1.93% -0.77% -0.77%] index_select strided 255 : Elapsed 0.006 ms (0.551 ms / 100) 0.515 -> 0.513 ( -0.39%) [+12.62% +0.00% +0.58% / +2.52% -0.39% +0.00%] index_select strided 256 : Elapsed 0.006 ms (0.580 ms / 100) 0.520 -> 0.510 ( -1.92%) [ +4.23% +2.88% +0.00% / +1.73% -1.92% -0.38%] index_select strided 257 : Elapsed 0.005 ms (0.542 ms / 100) 0.521 -> 0.504 ( -3.26%) [+13.63% +0.19% +0.00% / +4.03% -3.26% +5.37%] index_select random : Elapsed 0.006 ms (0.592 ms / 100) 0.513 -> 0.513 ( +0.00%) [ +3.70% +0.00% +2.14% / +3.70% +5.65% +0.00%] index_select random_sorted : Elapsed 0.005 ms (0.532 ms / 100) 0.518 -> 0.513 ( -0.97%) [ +1.74% +0.00% +0.58% / +2.51% -0.97% -0.77%] index_select perm : Elapsed 0.005 ms (0.527 ms / 100) 0.514 -> 0.516 ( +0.39%) [ +4.86% +0.00% +10.12% / +9.73% +0.39% +1.17%] index_select perm_sorted : Elapsed 0.005 ms (0.539 ms / 100) B = [5, 1, 200] (stride (1, 1000, 5)) A = [5, 500, 200] (stride (1, 1000, 5)) dim = 1 0.520 -> 0.515 ( -0.96%) [ +3.27% +0.00% +0.38% / +1.92% -0.96% +12.88%] index_select const : Elapsed 0.005 ms (0.537 ms / 100) 0.515 -> 0.515 ( +0.00%) [ +3.11% +0.00% +0.58% / +3.11% +0.00% +0.78%] index_select wrap : Elapsed 0.005 ms (0.531 ms / 100) 0.515 -> 0.512 ( -0.58%) [ +2.91% +0.00% +0.58% / +2.52% -0.58% -0.19%] index_select linear : Elapsed 0.005 ms (0.530 ms / 100) 0.511 -> 0.514 ( +0.59%) [ +9.98% +0.00% +2.15% / +4.31% +0.59% +0.59%] index_select reverse : Elapsed 0.006 ms (0.562 ms / 100) 0.517 -> 0.514 ( -0.58%) [ +7.74% +0.58% +0.00% / +3.29% +0.39% -0.58%] index_select skip64 : Elapsed 0.006 ms (0.557 ms / 100) 0.508 -> 0.516 ( +1.57%) [ +4.33% +0.00% +1.97% / +4.33% +1.57% +2.95%] index_select skip256 : Elapsed 0.005 ms (0.530 ms / 100) 0.512 -> 0.514 ( +0.39%) [ +3.32% +0.00% +1.95% / +3.32% +0.39% +7.03%] index_select spread : Elapsed 0.005 ms (0.529 ms / 100) 0.517 -> 0.513 ( -0.77%) [ +2.90% +0.39% +0.00% / +2.32% -0.77% +0.19%] index_select strided 3 : Elapsed 0.005 ms (0.532 ms / 100) 0.518 -> 0.515 ( -0.58%) [ +2.12% +0.00% +0.77% / +2.70% +4.44% -0.58%] index_select strided 5 : Elapsed 0.005 ms (0.529 ms / 100) 0.518 -> 0.514 ( -0.77%) [ +2.70% +0.00% +0.19% / +2.32% -0.77% +1.16%] index_select strided 7 : Elapsed 0.005 ms (0.532 ms / 100) 0.524 -> 0.511 ( -2.48%) [ +0.95% +0.00% +0.00% / +1.34% +0.19% -2.48%] index_select strided 8 : Elapsed 0.005 ms (0.529 ms / 100) 0.512 -> 0.510 ( -0.39%) [ +3.13% +0.00% +13.87% / +4.49% +0.39% -0.39%] index_select strided 16 : Elapsed 0.005 ms (0.528 ms / 100) 0.515 -> 0.510 ( -0.97%) [ +2.52% +0.00% +0.97% / +2.52% -0.97% -0.97%] index_select strided 64 : Elapsed 0.005 ms (0.528 ms / 100) 0.518 -> 0.511 ( -1.35%) [ +2.90% +0.39% +0.00% / +3.28% -1.35% -1.35%] index_select strided 100 : Elapsed 0.005 ms (0.533 ms / 100) 0.513 -> 0.509 ( -0.78%) [ +2.92% +0.39% +0.00% / +4.68% -0.39% -0.78%] index_select strided 255 : Elapsed 0.005 ms (0.528 ms / 100) 0.512 -> 0.512 ( +0.00%) [ +3.91% +0.00% +3.52% / +3.71% +0.00% +0.00%] index_select strided 256 : Elapsed 0.005 ms (0.532 ms / 100) 0.511 -> 0.510 ( -0.20%) [ +5.09% +0.20% +0.00% / +5.09% -0.20% +0.20%] index_select strided 257 : Elapsed 0.005 ms (0.537 ms / 100) 0.510 -> 0.509 ( -0.20%) [ +2.94% +0.00% +1.37% / +4.71% +0.78% -0.20%] index_select random : Elapsed 0.005 ms (0.525 ms / 100) 0.511 -> 0.509 ( -0.39%) [ +2.94% +0.00% +3.91% / +4.11% -0.39% +11.35%] index_select random_sorted : Elapsed 0.005 ms (0.526 ms / 100) 0.519 -> 0.511 ( -1.54%) [ +5.01% +2.50% +0.00% / +3.28% -1.54% -0.77%] index_select perm : Elapsed 0.005 ms (0.545 ms / 100) 0.512 -> 0.513 ( +0.20%) [ +4.10% +0.00% +0.98% / +4.30% +5.08% +0.20%] index_select perm_sorted : Elapsed 0.005 ms (0.533 ms / 100) out_shape = [5, 500, 1] in_shape = [5, 500, 200] idx_dim = 2 B = [5, 500, 1] (stride (500, 1, 1)) dim = 2 fill_cnt = 200 7.992 -> 8.119 ( +1.59%) [ +1.18% +0.01% +0.00% / +1.59% +1.68% +1.68%] index_fill_ const : Elapsed 0.081 ms (8.086 ms / 100) 7.989 -> 8.127 ( +1.73%) [ +1.13% +0.00% +0.00% / +1.74% +1.76% +1.73%] index_fill_ linear : Elapsed 0.081 ms (8.079 ms / 100) 7.986 -> 8.132 ( +1.83%) [ +1.11% +0.04% +0.00% / +1.83% +1.84% +1.84%] index_fill_ reverse : Elapsed 0.081 ms (8.075 ms / 100) 8.002 -> 8.110 ( +1.35%) [ +1.17% +0.04% +0.00% / +1.36% +1.35% +1.35%] index_fill_ skip64 : Elapsed 0.081 ms (8.096 ms / 100) 8.009 -> 8.125 ( +1.45%) [ +1.21% +0.07% +0.00% / +1.59% +1.45% +1.45%] index_fill_ skip256 : Elapsed 0.081 ms (8.106 ms / 100) 7.991 -> 8.100 ( +1.36%) [ +1.19% +0.04% +0.00% / +1.36% +1.46% +1.49%] index_fill_ spread : Elapsed 0.081 ms (8.086 ms / 100) 7.997 -> 8.122 ( +1.56%) [ +1.20% +0.03% +0.00% / +1.68% +1.58% +1.56%] index_fill_ random : Elapsed 0.081 ms (8.093 ms / 100) 8.000 -> 8.097 ( +1.21%) [ +1.24% +0.08% +0.00% / +1.23% +1.21% +1.21%] index_fill_ random_sorted : Elapsed 0.081 ms (8.099 ms / 100) B = [5, 500, 1] (stride (500, 1, 500)) A = [5, 500, 200] (stride (1, 5, 2500)) dim = 2 0.821 -> 0.826 ( +0.61%) [ +0.24% +0.12% +0.00% / +0.61% +0.73% +0.61%] index_select const : Elapsed 0.008 ms (0.823 ms / 100) 0.824 -> 0.822 ( -0.24%) [ +0.12% +0.12% +0.00% / -0.12% -0.24% +0.12%] index_select wrap : Elapsed 0.008 ms (0.825 ms / 100) 0.822 -> 0.824 ( +0.24%) [ +0.00% +0.49% +0.36% / +0.24% +0.49% +0.36%] index_select linear : Elapsed 0.008 ms (0.822 ms / 100) 0.820 -> 0.825 ( +0.61%) [ +0.61% +0.00% +0.12% / +0.73% +0.73% +0.61%] index_select reverse : Elapsed 0.008 ms (0.825 ms / 100) 0.822 -> 0.823 ( +0.12%) [ +0.36% +0.12% +0.00% / +0.49% +0.24% +0.12%] index_select skip64 : Elapsed 0.008 ms (0.825 ms / 100) 0.823 -> 0.823 ( +0.00%) [ +0.49% +0.00% +0.12% / +0.49% +0.00% +0.00%] index_select skip256 : Elapsed 0.008 ms (0.827 ms / 100) 0.821 -> 0.824 ( +0.37%) [ +0.73% +0.00% +0.24% / +0.37% +0.37% +0.85%] index_select spread : Elapsed 0.008 ms (0.827 ms / 100) 0.822 -> 0.820 ( -0.24%) [ +0.36% +0.12% +0.00% / +0.73% +0.24% -0.24%] index_select strided 3 : Elapsed 0.008 ms (0.825 ms / 100) 0.819 -> 0.820 ( +0.12%) [ +0.61% +0.00% +0.49% / +1.34% +0.49% +0.12%] index_select strided 5 : Elapsed 0.008 ms (0.824 ms / 100) 0.821 -> 0.824 ( +0.37%) [ +0.61% +0.00% +0.24% / +0.61% +0.37% +0.85%] index_select strided 7 : Elapsed 0.008 ms (0.826 ms / 100) 0.825 -> 0.823 ( -0.24%) [ +0.00% +0.00% +0.00% / -0.24% +0.12% +0.36%] index_select strided 8 : Elapsed 0.008 ms (0.825 ms / 100) 0.824 -> 0.822 ( -0.24%) [ +0.00% +0.24% +0.00% / -0.24% +0.24% +0.12%] index_select strided 16 : Elapsed 0.008 ms (0.824 ms / 100) 0.822 -> 0.824 ( +0.24%) [ +0.61% +0.24% +0.00% / +0.24% +1.22% +1.58%] index_select strided 64 : Elapsed 0.008 ms (0.827 ms / 100) 0.821 -> 0.826 ( +0.61%) [ +0.37% +0.24% +0.00% / +0.85% +0.97% +0.61%] index_select strided 100 : Elapsed 0.008 ms (0.824 ms / 100) 0.820 -> 0.823 ( +0.37%) [ +0.24% +0.00% +0.00% / +0.37% +0.85% +0.98%] index_select random : Elapsed 0.008 ms (0.822 ms / 100) 0.819 -> 0.820 ( +0.12%) [ +0.00% +0.12% +0.24% / +0.12% +0.85% +1.47%] index_select random_sorted : Elapsed 0.008 ms (0.819 ms / 100) 0.817 -> 0.821 ( +0.49%) [ +0.73% +0.00% +0.12% / +0.49% +2.57% +2.69%] index_select perm : Elapsed 0.008 ms (0.823 ms / 100) 0.816 -> 0.817 ( +0.12%) [ +0.61% +0.00% +0.00% / +0.12% +0.98% +1.59%] index_select perm_sorted : Elapsed 0.008 ms (0.821 ms / 100) B = [5, 500, 1] (stride (1, 5, 1)) A = [5, 500, 200] (stride (1, 5, 2500)) dim = 2 0.519 -> 0.520 ( +0.19%) [ +2.31% +0.00% +1.54% / +2.70% +0.19% +0.19%] index_select const : Elapsed 0.005 ms (0.531 ms / 100) 0.515 -> 0.513 ( -0.39%) [ +1.36% +0.00% +1.55% / +2.91% -0.39% +7.96%] index_select wrap : Elapsed 0.005 ms (0.522 ms / 100) 0.515 -> 0.516 ( +0.19%) [ +2.33% +0.00% +0.58% / +3.11% +0.39% +0.19%] index_select linear : Elapsed 0.005 ms (0.527 ms / 100) 0.521 -> 0.521 ( +0.00%) [ +4.22% +3.07% +0.00% / +2.11% +12.09% +0.00%] index_select reverse : Elapsed 0.005 ms (0.543 ms / 100) 0.522 -> 0.514 ( -1.53%) [ +1.53% +0.00% +0.19% / +1.92% +4.79% -1.53%] index_select skip64 : Elapsed 0.005 ms (0.530 ms / 100) 0.527 -> 0.522 ( -0.95%) [ +0.00% +1.90% +6.26% / +0.57% -0.95% -0.57%] index_select skip256 : Elapsed 0.005 ms (0.527 ms / 100) 0.520 -> 0.517 ( -0.58%) [ +0.77% +0.00% +0.96% / +2.31% +0.38% -0.58%] index_select spread : Elapsed 0.005 ms (0.524 ms / 100) 0.524 -> 0.516 ( -1.53%) [ +0.57% +0.76% +0.00% / +1.34% -1.34% -1.53%] index_select strided 3 : Elapsed 0.005 ms (0.527 ms / 100) 0.521 -> 0.520 ( -0.19%) [ +1.54% +0.58% +0.00% / +2.11% +0.00% -0.19%] index_select strided 5 : Elapsed 0.005 ms (0.529 ms / 100) 0.518 -> 0.525 ( +1.35%) [ +2.70% +0.00% +0.97% / +2.51% +1.93% +1.35%] index_select strided 7 : Elapsed 0.005 ms (0.532 ms / 100) 0.525 -> 0.516 ( -1.71%) [ +7.05% +0.57% +0.00% / +1.14% -1.71% -1.14%] index_select strided 8 : Elapsed 0.006 ms (0.562 ms / 100) 0.518 -> 0.520 ( +0.39%) [ +4.83% +0.00% +0.00% / +2.12% +2.51% +0.39%] index_select strided 16 : Elapsed 0.005 ms (0.543 ms / 100) 0.516 -> 0.515 ( -0.19%) [ +3.88% +0.00% +1.36% / +4.46% -0.19% +0.97%] index_select strided 64 : Elapsed 0.005 ms (0.536 ms / 100) 0.516 -> 0.510 ( -1.16%) [ +4.65% +0.78% +0.00% / +10.85% -1.16% +0.58%] index_select strided 100 : Elapsed 0.005 ms (0.540 ms / 100) 0.514 -> 0.510 ( -0.78%) [+13.42% +0.00% +11.67% / +2.92% -0.78% +2.14%] index_select random : Elapsed 0.006 ms (0.583 ms / 100) 0.513 -> 0.513 ( +0.00%) [ +2.92% +0.00% +1.95% / +2.73% +0.00% +1.75%] index_select random_sorted : Elapsed 0.005 ms (0.528 ms / 100) 0.511 -> 0.512 ( +0.20%) [ +4.50% +0.00% +2.15% / +3.91% +0.20% +11.35%] index_select perm : Elapsed 0.005 ms (0.534 ms / 100) 0.512 -> 0.510 ( -0.39%) [ +7.23% +0.00% +1.76% / +3.32% -0.39% +13.67%] index_select perm_sorted : Elapsed 0.005 ms (0.549 ms / 100) B = [5, 500, 1] (stride (1, 5, 5)) A = [5, 500, 200] (stride (500, 1, 2500)) dim = 2 0.827 -> 0.832 ( +0.60%) [ +0.48% +0.00% +0.36% / +0.60% +0.85% +0.85%] index_select const : Elapsed 0.008 ms (0.831 ms / 100) 0.828 -> 0.831 ( +0.36%) [ +0.48% +0.00% +0.12% / +0.36% +0.97% +0.48%] index_select wrap : Elapsed 0.008 ms (0.832 ms / 100) 0.827 -> 0.830 ( +0.36%) [ +0.24% +0.48% +0.00% / +0.97% +0.60% +0.36%] index_select linear : Elapsed 0.008 ms (0.829 ms / 100) 0.829 -> 0.830 ( +0.12%) [ +0.84% +0.00% +0.00% / +0.24% +0.12% +0.12%] index_select reverse : Elapsed 0.008 ms (0.836 ms / 100) 0.829 -> 0.831 ( +0.24%) [ +0.36% +0.12% +0.00% / +0.36% +0.72% +0.24%] index_select skip64 : Elapsed 0.008 ms (0.832 ms / 100) 0.829 -> 0.832 ( +0.36%) [ +0.00% +0.00% +0.00% / +0.48% +0.60% +0.36%] index_select skip256 : Elapsed 0.008 ms (0.829 ms / 100) 0.829 -> 0.831 ( +0.24%) [ +0.00% +0.36% +0.00% / +0.36% +0.36% +0.24%] index_select spread : Elapsed 0.008 ms (0.829 ms / 100) 0.828 -> 0.829 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.12% +0.24%] index_select strided 3 : Elapsed 0.008 ms (0.829 ms / 100) 0.826 -> 0.829 ( +0.36%) [ +0.12% +0.61% +0.00% / +0.61% +0.73% +0.36%] index_select strided 5 : Elapsed 0.008 ms (0.827 ms / 100) 0.828 -> 0.831 ( +0.36%) [ +0.00% +0.00% +0.24% / +0.36% +0.60% +25.60%] index_select strided 7 : Elapsed 0.008 ms (0.828 ms / 100) 0.827 -> 0.831 ( +0.48%) [ +0.36% +0.36% +0.00% / +0.48% +0.60% +10.40%] index_select strided 8 : Elapsed 0.008 ms (0.830 ms / 100) 0.828 -> 0.833 ( +0.60%) [ +0.24% +0.48% +0.00% / +0.60% +1.09% +11.71%] index_select strided 16 : Elapsed 0.008 ms (0.830 ms / 100) 0.827 -> 0.834 ( +0.85%) [ +0.36% +0.00% +0.36% / +0.85% +1.09% +10.28%] index_select strided 64 : Elapsed 0.008 ms (0.830 ms / 100) 0.828 -> 0.831 ( +0.36%) [ +0.48% +0.12% +0.00% / +0.36% +0.48% +11.11%] index_select strided 100 : Elapsed 0.008 ms (0.832 ms / 100) 0.814 -> 0.817 ( +0.37%) [ +0.86% +0.00% +0.00% / +0.37% +1.35% +12.65%] index_select random : Elapsed 0.008 ms (0.821 ms / 100) 0.813 -> 0.818 ( +0.62%) [ +0.74% +0.62% +0.00% / +0.62% +1.60% +11.93%] index_select random_sorted : Elapsed 0.008 ms (0.819 ms / 100) 0.818 -> 0.828 ( +1.22%) [ +1.47% +0.00% +1.47% / +1.22% +1.83% +11.86%] index_select perm : Elapsed 0.008 ms (0.830 ms / 100) 0.821 -> 0.828 ( +0.85%) [ +0.73% +0.00% +0.61% / +1.46% +0.85% +11.94%] index_select perm_sorted : Elapsed 0.008 ms (0.827 ms / 100) B = [5, 500, 1] (stride (500, 1, 2500)) dim = 2 fill_cnt = 200 7.992 -> 8.099 ( +1.34%) [ +1.16% +0.04% +0.00% / +1.34% +1.43% +1.51%] index_fill_ const : Elapsed 0.081 ms (8.085 ms / 100) 7.985 -> 8.093 ( +1.35%) [ +1.19% +0.06% +0.00% / +1.35% +1.35% +1.35%] index_fill_ linear : Elapsed 0.081 ms (8.080 ms / 100) 7.987 -> 8.090 ( +1.29%) [ +1.11% +0.00% +0.00% / +1.29% +1.40% +1.40%] index_fill_ reverse : Elapsed 0.081 ms (8.076 ms / 100) 8.003 -> 8.101 ( +1.22%) [ +1.19% +0.04% +0.00% / +1.36% +1.22% +1.22%] index_fill_ skip64 : Elapsed 0.081 ms (8.098 ms / 100) 8.011 -> 8.100 ( +1.11%) [ +1.21% +0.05% +0.00% / +1.35% +1.11% +1.12%] index_fill_ skip256 : Elapsed 0.081 ms (8.108 ms / 100) 7.991 -> 8.100 ( +1.36%) [ +1.18% +0.05% +0.00% / +1.36% +1.51% +1.50%] index_fill_ spread : Elapsed 0.081 ms (8.085 ms / 100) 7.997 -> 8.104 ( +1.34%) [ +1.18% +0.03% +0.00% / +1.34% +1.44% +1.40%] index_fill_ random : Elapsed 0.081 ms (8.091 ms / 100) 8.001 -> 8.101 ( +1.25%) [ +1.19% +0.06% +0.00% / +1.37% +1.25% +1.25%] index_fill_ random_sorted : Elapsed 0.081 ms (8.096 ms / 100) B = [5, 500, 1] (stride (500, 1, 2500)) A = [5, 500, 200] (stride (500, 1, 2500)) dim = 2 0.520 -> 0.515 ( -0.96%) [ +2.69% +1.15% +0.00% / +1.15% -0.96% +7.69%] index_select const : Elapsed 0.005 ms (0.534 ms / 100) 0.514 -> 0.519 ( +0.97%) [ +3.11% +0.00% +0.39% / +6.42% +1.56% +0.97%] index_select wrap : Elapsed 0.005 ms (0.530 ms / 100) 0.517 -> 0.514 ( -0.58%) [ +6.96% +2.13% +0.00% / +14.70% -0.58% +0.97%] index_select linear : Elapsed 0.006 ms (0.553 ms / 100) 0.519 -> 0.515 ( -0.77%) [ +2.70% +0.58% +0.00% / +2.50% -0.77% -0.19%] index_select reverse : Elapsed 0.005 ms (0.533 ms / 100) 0.513 -> 0.522 ( +1.75%) [ +3.51% +4.87% +0.00% / +6.24% +6.82% +1.75%] index_select skip64 : Elapsed 0.005 ms (0.531 ms / 100) 0.526 -> 0.520 ( -1.14%) [ +1.71% +0.00% +1.14% / +12.55% +11.22% -1.14%] index_select skip256 : Elapsed 0.005 ms (0.535 ms / 100) 0.513 -> 0.513 ( +0.00%) [ +3.51% +2.53% +0.00% / +4.68% +0.00% +1.56%] index_select spread : Elapsed 0.005 ms (0.531 ms / 100) 0.515 -> 0.513 ( -0.39%) [ +3.50% +0.97% +0.00% / +6.80% -0.39% +1.94%] index_select strided 3 : Elapsed 0.005 ms (0.533 ms / 100) 0.512 -> 0.515 ( +0.59%) [ +3.71% +0.59% +0.00% / +14.06% +0.59% +3.71%] index_select strided 5 : Elapsed 0.005 ms (0.531 ms / 100) 0.516 -> 0.512 ( -0.78%) [+10.47% +0.00% +7.17% / +3.10% -0.78% +2.13%] index_select strided 7 : Elapsed 0.006 ms (0.570 ms / 100) 0.522 -> 0.515 ( -1.34%) [ +2.11% +0.00% +9.20% / +3.64% -1.34% -0.96%] index_select strided 8 : Elapsed 0.005 ms (0.533 ms / 100) 0.532 -> 0.509 ( -4.32%) [ +0.00% +0.56% +3.76% / -0.19% -4.32% -0.38%] index_select strided 16 : Elapsed 0.005 ms (0.532 ms / 100) 0.514 -> 0.510 ( -0.78%) [ +3.11% +0.78% +0.00% / +4.28% -0.78% +0.78%] index_select strided 64 : Elapsed 0.005 ms (0.530 ms / 100) 0.515 -> 0.510 ( -0.97%) [ +3.30% +0.78% +0.00% / +6.99% -0.97% +7.57%] index_select strided 100 : Elapsed 0.005 ms (0.532 ms / 100) 0.520 -> 0.508 ( -2.31%) [ +1.73% +2.31% +0.00% / +6.92% -2.31% +0.19%] index_select random : Elapsed 0.005 ms (0.529 ms / 100) 0.517 -> 0.512 ( -0.97%) [ +3.09% +0.00% +1.93% / +3.29% -0.97% -0.19%] index_select random_sorted : Elapsed 0.005 ms (0.533 ms / 100) 0.518 -> 0.510 ( -1.54%) [ +2.70% +0.77% +0.00% / +4.83% -1.54% +2.12%] index_select perm : Elapsed 0.005 ms (0.532 ms / 100) 0.518 -> 0.524 ( +1.16%) [ +1.74% +0.39% +0.00% / +2.90% +4.63% +1.16%] index_select perm_sorted : Elapsed 0.005 ms (0.527 ms / 100) B = [5, 500, 1] (stride (1, 5, 2500)) A = [5, 500, 200] (stride (1, 1000, 5)) dim = 2 0.515 -> 0.513 ( -0.39%) [ +3.30% +0.00% +3.88% / +4.27% -0.39% +0.58%] index_select const : Elapsed 0.005 ms (0.532 ms / 100) 0.515 -> 0.510 ( -0.97%) [ +2.52% +0.00% +0.58% / +10.10% +0.78% -0.97%] index_select wrap : Elapsed 0.005 ms (0.528 ms / 100) 0.515 -> 0.510 ( -0.97%) [ +4.27% +0.00% +1.75% / +3.88% -0.97% -0.39%] index_select linear : Elapsed 0.005 ms (0.537 ms / 100) 0.518 -> 0.511 ( -1.35%) [ +1.16% +0.00% +3.67% / +2.70% -1.35% -0.97%] index_select reverse : Elapsed 0.005 ms (0.524 ms / 100) 0.520 -> 0.516 ( -0.77%) [ +0.58% +0.00% +4.23% / +2.31% +2.12% -0.77%] index_select skip64 : Elapsed 0.005 ms (0.523 ms / 100) 0.517 -> 0.515 ( -0.39%) [ +1.55% +0.00% +0.00% / +2.71% +10.83% -0.39%] index_select skip256 : Elapsed 0.005 ms (0.525 ms / 100) 0.514 -> 0.515 ( +0.19%) [ +2.92% +0.00% +1.95% / +4.47% +0.19% +1.17%] index_select spread : Elapsed 0.005 ms (0.529 ms / 100) 0.517 -> 0.518 ( +0.19%) [ +2.13% +1.35% +0.00% / +3.09% +0.19% +13.93%] index_select strided 3 : Elapsed 0.005 ms (0.528 ms / 100) 0.520 -> 0.520 ( +0.00%) [ +1.35% +0.00% +1.92% / +2.12% +4.81% +0.00%] index_select strided 5 : Elapsed 0.005 ms (0.527 ms / 100) 0.512 -> 0.514 ( +0.39%) [ +4.69% +0.00% +1.95% / +5.47% +0.39% +0.39%] index_select strided 7 : Elapsed 0.005 ms (0.536 ms / 100) 0.517 -> 0.517 ( +0.00%) [+17.99% +1.55% +0.00% / +1.74% +0.00% +0.00%] index_select strided 8 : Elapsed 0.006 ms (0.610 ms / 100) 0.517 -> 0.521 ( +0.77%) [ +2.90% +0.00% +1.16% / +8.12% +5.03% +0.77%] index_select strided 16 : Elapsed 0.005 ms (0.532 ms / 100) 0.517 -> 0.513 ( -0.77%) [ +2.51% +3.68% +0.00% / +4.26% -0.77% +0.19%] index_select strided 64 : Elapsed 0.005 ms (0.530 ms / 100) 0.518 -> 0.520 ( +0.39%) [ +1.74% +0.00% +0.00% / +2.51% +6.37% +0.39%] index_select strided 100 : Elapsed 0.005 ms (0.527 ms / 100) 0.517 -> 0.518 ( +0.19%) [ +2.51% +0.00% +3.29% / +2.13% +5.03% +0.19%] index_select random : Elapsed 0.005 ms (0.530 ms / 100) 0.523 -> 0.522 ( -0.19%) [ +1.72% +0.00% +6.69% / +1.34% +9.56% -0.19%] index_select random_sorted : Elapsed 0.005 ms (0.532 ms / 100) 0.510 -> 0.520 ( +1.96%) [ +3.73% +0.00% +4.51% / +3.33% +7.06% +1.96%] index_select perm : Elapsed 0.005 ms (0.529 ms / 100) 0.519 -> 0.516 ( -0.58%) [ +1.54% +0.39% +0.00% / +2.12% -0.58% +0.19%] index_select perm_sorted : Elapsed 0.005 ms (0.527 ms / 100) out_shape = [1, 5, 500] in_shape = [200, 5, 500] idx_dim = 0 B = [1, 5, 500] (stride (2500, 500, 1)) dim = 0 fill_cnt = 200 7.990 -> 8.098 ( +1.35%) [ +1.21% +0.05% +0.00% / +1.35% +1.48% +1.45%] index_fill_ const : Elapsed 0.081 ms (8.087 ms / 100) 7.988 -> 8.093 ( +1.31%) [ +1.15% +0.00% +0.01% / +1.31% +1.31% +1.31%] index_fill_ linear : Elapsed 0.081 ms (8.080 ms / 100) 7.985 -> 8.093 ( +1.35%) [ +1.14% +0.04% +0.00% / +1.35% +1.43% +1.45%] index_fill_ reverse : Elapsed 0.081 ms (8.076 ms / 100) 8.004 -> 8.101 ( +1.21%) [ +1.14% +0.02% +0.00% / +1.34% +1.21% +1.21%] index_fill_ skip64 : Elapsed 0.081 ms (8.095 ms / 100) 8.010 -> 8.100 ( +1.12%) [ +1.20% +0.05% +0.00% / +1.35% +1.12% +1.14%] index_fill_ skip256 : Elapsed 0.081 ms (8.106 ms / 100) 7.992 -> 8.098 ( +1.33%) [ +1.18% +0.05% +0.00% / +1.33% +1.48% +1.49%] index_fill_ spread : Elapsed 0.081 ms (8.086 ms / 100) 7.996 -> 8.106 ( +1.38%) [ +1.18% +0.05% +0.00% / +1.38% +1.41% +1.43%] index_fill_ random : Elapsed 0.081 ms (8.090 ms / 100) 8.001 -> 8.101 ( +1.25%) [ +1.17% +0.06% +0.00% / +1.39% +1.25% +1.25%] index_fill_ random_sorted : Elapsed 0.081 ms (8.095 ms / 100) B = [1, 5, 500] (stride (2500, 500, 1)) A = [200, 5, 500] (stride (2500, 1, 5)) dim = 0 0.821 -> 0.823 ( +0.24%) [ +0.97% +0.00% +0.61% / +0.61% +0.24% +0.37%] index_select const : Elapsed 0.008 ms (0.829 ms / 100) 0.824 -> 0.826 ( +0.24%) [ +0.24% +0.24% +0.00% / +0.49% +0.36% +0.24%] index_select wrap : Elapsed 0.008 ms (0.826 ms / 100) 0.823 -> 0.822 ( -0.12%) [ +0.73% +0.24% +0.00% / +0.73% +0.49% -0.12%] index_select linear : Elapsed 0.008 ms (0.829 ms / 100) 0.825 -> 0.824 ( -0.12%) [ +0.00% +0.12% +0.00% / -0.12% +0.00% -0.12%] index_select reverse : Elapsed 0.008 ms (0.825 ms / 100) 0.822 -> 0.821 ( -0.12%) [ +0.85% +0.00% +0.24% / +0.61% +0.24% -0.12%] index_select skip64 : Elapsed 0.008 ms (0.829 ms / 100) 0.823 -> 0.822 ( -0.12%) [ +0.00% +0.61% +0.00% / +0.61% +0.49% -0.12%] index_select skip256 : Elapsed 0.008 ms (0.823 ms / 100) 0.824 -> 0.825 ( +0.12%) [ +0.24% +0.24% +0.00% / +0.24% +0.24% +0.12%] index_select spread : Elapsed 0.008 ms (0.826 ms / 100) 0.823 -> 0.825 ( +0.24%) [ +0.36% +0.12% +0.00% / +0.24% +0.36% +0.24%] index_select strided 3 : Elapsed 0.008 ms (0.826 ms / 100) 0.825 -> 0.825 ( +0.00%) [ +0.00% +0.48% +0.00% / +0.24% +0.00% +0.00%] index_select strided 5 : Elapsed 0.008 ms (0.825 ms / 100) 0.823 -> 0.824 ( +0.12%) [ +0.49% +0.24% +0.00% / +0.12% +0.73% +0.73%] index_select strided 7 : Elapsed 0.008 ms (0.827 ms / 100) 0.824 -> 0.826 ( +0.24%) [ +0.00% +0.00% +0.00% / +0.24% +0.73% +0.73%] index_select strided 8 : Elapsed 0.008 ms (0.824 ms / 100) 0.823 -> 0.827 ( +0.49%) [ +0.00% +0.12% +0.12% / +0.49% +0.85% +0.97%] index_select strided 16 : Elapsed 0.008 ms (0.823 ms / 100) 0.824 -> 0.826 ( +0.24%) [ +0.49% +0.00% +0.12% / +0.24% +0.97% +0.97%] index_select strided 64 : Elapsed 0.008 ms (0.828 ms / 100) 0.823 -> 0.827 ( +0.49%) [ +0.12% +0.24% +0.00% / +0.49% +0.85% +0.85%] index_select strided 100 : Elapsed 0.008 ms (0.824 ms / 100) 0.817 -> 0.825 ( +0.98%) [ +0.86% +0.37% +0.00% / +0.98% +1.10% +1.71%] index_select random : Elapsed 0.008 ms (0.824 ms / 100) 0.818 -> 0.824 ( +0.73%) [ +0.86% +0.00% +1.10% / +0.73% +1.59% +1.34%] index_select random_sorted : Elapsed 0.008 ms (0.825 ms / 100) 0.821 -> 0.821 ( +0.00%) [ +0.37% +0.00% +0.00% / +0.00% +2.07% +1.83%] index_select perm : Elapsed 0.008 ms (0.824 ms / 100) 0.819 -> 0.826 ( +0.85%) [ +0.37% +0.00% +0.24% / +0.85% +0.85% +0.98%] index_select perm_sorted : Elapsed 0.008 ms (0.822 ms / 100) B = [1, 5, 500] (stride (2500, 1, 5)) A = [200, 5, 500] (stride (500, 100000, 1)) dim = 0 0.530 -> 0.511 ( -3.58%) [ +0.75% +1.51% +0.00% / +0.57% -3.58% +5.28%] index_select const : Elapsed 0.005 ms (0.534 ms / 100) 0.510 -> 0.513 ( +0.59%) [ +3.92% +0.00% +1.37% / +4.31% +0.59% +1.76%] index_select wrap : Elapsed 0.005 ms (0.530 ms / 100) 0.515 -> 0.521 ( +1.17%) [ +2.52% +0.00% +2.14% / +3.30% +1.17% +1.94%] index_select linear : Elapsed 0.005 ms (0.528 ms / 100) 0.517 -> 0.524 ( +1.35%) [ +3.09% +0.00% +0.77% / +2.32% +4.84% +1.35%] index_select reverse : Elapsed 0.005 ms (0.533 ms / 100) 0.516 -> 0.512 ( -0.78%) [ +2.91% +0.00% +1.74% / +2.91% -0.78% +2.91%] index_select skip64 : Elapsed 0.005 ms (0.531 ms / 100) 0.520 -> 0.515 ( -0.96%) [ +2.12% +2.31% +0.00% / +1.92% -0.96% -0.19%] index_select skip256 : Elapsed 0.005 ms (0.531 ms / 100) 0.509 -> 0.514 ( +0.98%) [ +5.70% +0.00% +2.36% / +4.13% +0.98% +2.95%] index_select spread : Elapsed 0.005 ms (0.538 ms / 100) 0.517 -> 0.514 ( -0.58%) [ +2.90% +0.00% +6.38% / +1.55% -0.58% +0.58%] index_select strided 3 : Elapsed 0.005 ms (0.532 ms / 100) 0.516 -> 0.512 ( -0.78%) [ +2.91% +0.00% +16.28% / +1.94% -0.78% +0.58%] index_select strided 5 : Elapsed 0.005 ms (0.531 ms / 100) 0.515 -> 0.516 ( +0.19%) [ +2.91% +0.00% +1.75% / +2.72% +0.19% +0.78%] index_select strided 7 : Elapsed 0.005 ms (0.530 ms / 100) 0.510 -> 0.515 ( +0.98%) [ +4.12% +0.00% +1.96% / +4.12% +0.98% +6.08%] index_select strided 8 : Elapsed 0.005 ms (0.531 ms / 100) 0.510 -> 0.511 ( +0.20%) [ +4.31% +0.00% +2.16% / +3.53% +0.20% +1.57%] index_select strided 16 : Elapsed 0.005 ms (0.532 ms / 100) 0.515 -> 0.518 ( +0.58%) [ +1.75% +0.00% +4.08% / +10.49% +6.41% +0.58%] index_select strided 64 : Elapsed 0.005 ms (0.524 ms / 100) 0.513 -> 0.513 ( +0.00%) [ +3.90% +0.00% +1.17% / +1.95% +0.00% +2.73%] index_select strided 100 : Elapsed 0.005 ms (0.533 ms / 100) 0.509 -> 0.519 ( +1.96%) [ +4.13% +0.00% +3.14% / +3.73% +6.48% +1.96%] index_select random : Elapsed 0.005 ms (0.530 ms / 100) 0.516 -> 0.509 ( -1.36%) [ +2.13% +0.00% +1.55% / +2.91% -1.36% +0.78%] index_select random_sorted : Elapsed 0.005 ms (0.527 ms / 100) 0.512 -> 0.514 ( +0.39%) [ +3.52% +0.00% +5.08% / +2.93% +0.39% +0.59%] index_select perm : Elapsed 0.005 ms (0.530 ms / 100) 0.512 -> 0.516 ( +0.78%) [ +3.91% +0.00% +1.37% / +3.52% +0.78% +1.56%] index_select perm_sorted : Elapsed 0.005 ms (0.532 ms / 100) B = [1, 5, 500] (stride (5, 1, 5)) A = [200, 5, 500] (stride (500, 100000, 1)) dim = 0 0.513 -> 0.515 ( +0.39%) [ +3.51% +0.00% +9.16% / +3.12% +0.39% +2.92%] index_select const : Elapsed 0.005 ms (0.531 ms / 100) 0.514 -> 0.514 ( +0.00%) [ +5.84% +0.00% +1.36% / +3.50% +0.00% +3.89%] index_select wrap : Elapsed 0.005 ms (0.544 ms / 100) 0.521 -> 0.516 ( -0.96%) [ +1.15% +1.34% +0.00% / +1.54% -0.96% +0.77%] index_select linear : Elapsed 0.005 ms (0.527 ms / 100) 0.513 -> 0.514 ( +0.19%) [ +3.12% +0.00% +3.90% / +3.90% +0.19% +8.58%] index_select reverse : Elapsed 0.005 ms (0.529 ms / 100) 0.509 -> 0.518 ( +1.77%) [ +3.93% +0.00% +1.57% / +4.72% +1.77% +3.54%] index_select skip64 : Elapsed 0.005 ms (0.529 ms / 100) 0.511 -> 0.514 ( +0.59%) [ +3.72% +0.00% +1.57% / +6.46% +0.59% +1.57%] index_select skip256 : Elapsed 0.005 ms (0.530 ms / 100) 0.513 -> 0.524 ( +2.14%) [ +3.90% +0.00% +1.36% / +4.09% +3.31% +2.14%] index_select spread : Elapsed 0.005 ms (0.533 ms / 100) 0.513 -> 0.516 ( +0.58%) [ +3.70% +0.00% +8.38% / +3.31% +6.63% +0.58%] index_select strided 3 : Elapsed 0.005 ms (0.532 ms / 100) 0.524 -> 0.512 ( -2.29%) [ +1.72% +20.23% +0.00% / +8.40% -2.29% +0.57%] index_select strided 5 : Elapsed 0.005 ms (0.533 ms / 100) good 0.567 -> 0.512 ( -9.70%) [ +0.00% +6.00% +0.53% / -4.23% -9.70% -8.64%] index_select strided 7 : Elapsed 0.006 ms (0.567 ms / 100) 0.520 -> 0.507 ( -2.50%) [+12.50% +0.00% +0.00% / +1.92% -2.50% -0.58%] index_select strided 8 : Elapsed 0.006 ms (0.585 ms / 100) 0.513 -> 0.509 ( -0.78%) [ +3.70% +0.00% +9.36% / +3.70% -0.78% +1.56%] index_select strided 16 : Elapsed 0.005 ms (0.532 ms / 100) 0.516 -> 0.513 ( -0.58%) [+15.31% +0.00% +0.78% / +2.33% -0.58% +0.78%] index_select strided 64 : Elapsed 0.006 ms (0.595 ms / 100) 0.514 -> 0.510 ( -0.78%) [ +3.89% +0.00% +1.17% / +3.31% -0.78% +2.72%] index_select strided 100 : Elapsed 0.005 ms (0.534 ms / 100) 0.514 -> 0.513 ( -0.19%) [ +4.47% +0.00% +11.87% / +2.72% -0.19% +4.09%] index_select random : Elapsed 0.005 ms (0.537 ms / 100) 0.524 -> 0.510 ( -2.67%) [ +1.91% +0.00% +0.00% / +2.86% -2.67% +13.36%] index_select random_sorted : Elapsed 0.005 ms (0.534 ms / 100) 0.515 -> 0.509 ( -1.17%) [ +3.30% +0.00% +4.27% / +3.11% -1.17% +2.52%] index_select perm : Elapsed 0.005 ms (0.532 ms / 100) 0.515 -> 0.510 ( -0.97%) [ +6.41% +0.00% +2.14% / +5.63% -0.97% +0.97%] index_select perm_sorted : Elapsed 0.005 ms (0.548 ms / 100) B = [1, 5, 500] (stride (5, 1, 5)) A = [200, 5, 500] (stride (5, 1, 1000)) dim = 0 0.524 -> 0.523 ( -0.19%) [ +0.00% +9.16% +0.95% / +7.25% +7.06% -0.19%] index_select const : Elapsed 0.005 ms (0.524 ms / 100) 0.517 -> 0.527 ( +1.93%) [ +2.51% +0.00% +2.90% / +9.28% +13.15% +1.93%] index_select wrap : Elapsed 0.005 ms (0.530 ms / 100) 0.512 -> 0.514 ( +0.39%) [ +3.52% +0.00% +1.76% / +3.71% +0.39% +2.73%] index_select linear : Elapsed 0.005 ms (0.530 ms / 100) 0.512 -> 0.515 ( +0.59%) [ +3.91% +0.00% +2.34% / +3.52% +0.59% +3.91%] index_select reverse : Elapsed 0.005 ms (0.532 ms / 100) 0.514 -> 0.513 ( -0.19%) [ +3.50% +0.00% +5.45% / +3.31% -0.19% +1.17%] index_select skip64 : Elapsed 0.005 ms (0.532 ms / 100) 0.513 -> 0.512 ( -0.19%) [ +2.73% +0.00% +8.38% / +3.70% -0.19% +2.53%] index_select skip256 : Elapsed 0.005 ms (0.527 ms / 100) 0.521 -> 0.520 ( -0.19%) [ +1.54% +5.57% +0.00% / +1.92% -0.19% +0.96%] index_select spread : Elapsed 0.005 ms (0.529 ms / 100) 0.519 -> 0.512 ( -1.35%) [ +3.28% +0.00% +2.50% / +2.70% -1.35% +1.35%] index_select strided 3 : Elapsed 0.005 ms (0.536 ms / 100) 0.514 -> 0.513 ( -0.19%) [ +3.70% +0.00% +0.97% / +3.89% -0.19% +4.86%] index_select strided 5 : Elapsed 0.005 ms (0.533 ms / 100) 0.517 -> 0.523 ( +1.16%) [ +2.51% +0.00% +1.16% / +3.68% +1.16% +10.44%] index_select strided 7 : Elapsed 0.005 ms (0.530 ms / 100) 0.520 -> 0.513 ( -1.35%) [ +2.12% +0.00% +3.08% / +2.50% -1.35% +0.00%] index_select strided 8 : Elapsed 0.005 ms (0.531 ms / 100) 0.517 -> 0.511 ( -1.16%) [ +2.13% +0.00% +0.58% / +2.71% -1.16% +2.32%] index_select strided 16 : Elapsed 0.005 ms (0.528 ms / 100) 0.517 -> 0.515 ( -0.39%) [ +2.71% +1.16% +0.00% / +2.51% -0.39% +0.58%] index_select strided 64 : Elapsed 0.005 ms (0.531 ms / 100) 0.520 -> 0.520 ( +0.00%) [ +1.73% +0.00% +5.19% / +2.12% +4.04% +0.00%] index_select strided 100 : Elapsed 0.005 ms (0.529 ms / 100) 0.516 -> 0.509 ( -1.36%) [ +2.33% +0.00% +10.47% / +3.49% -1.36% +2.71%] index_select random : Elapsed 0.005 ms (0.528 ms / 100) 0.526 -> 0.510 ( -3.04%) [ +0.00% +2.66% +4.75% / +1.14% -3.04% +0.38%] index_select random_sorted : Elapsed 0.005 ms (0.526 ms / 100) 0.524 -> 0.515 ( -1.72%) [ +0.00% +4.39% +10.69% / +1.72% -1.72% +4.01%] index_select perm : Elapsed 0.005 ms (0.524 ms / 100) 0.520 -> 0.510 ( -1.92%) [ +2.31% +0.00% +6.73% / +2.50% -1.92% +0.00%] index_select perm_sorted : Elapsed 0.005 ms (0.532 ms / 100) B = [1, 5, 500] (stride (1, 1, 5)) dim = 0 fill_cnt = 200 8.893 -> 9.026 ( +1.50%) [ +1.26% +0.04% +0.00% / +1.50% +1.56% +1.53%] index_fill_ const : Elapsed 0.090 ms (9.005 ms / 100) 8.881 -> 9.025 ( +1.62%) [ +1.24% +0.05% +0.00% / +1.67% +1.62% +1.62%] index_fill_ linear : Elapsed 0.090 ms (8.991 ms / 100) 8.881 -> 9.030 ( +1.68%) [ +1.17% +0.00% +0.00% / +1.77% +1.68% +1.68%] index_fill_ reverse : Elapsed 0.090 ms (8.985 ms / 100) 8.898 -> 9.021 ( +1.38%) [ +1.19% +0.04% +0.00% / +1.38% +1.40% +1.39%] index_fill_ skip64 : Elapsed 0.090 ms (9.004 ms / 100) 8.919 -> 9.031 ( +1.26%) [ +1.14% +0.00% +0.10% / +1.44% +1.26% +1.26%] index_fill_ skip256 : Elapsed 0.090 ms (9.021 ms / 100) 8.918 -> 9.005 ( +0.98%) [ +1.23% +0.13% +0.00% / +0.98% +1.00% +0.99%] index_fill_ spread : Elapsed 0.090 ms (9.028 ms / 100) 8.933 -> 9.013 ( +0.90%) [ +1.11% +0.06% +0.00% / +0.90% +1.03% +1.04%] index_fill_ random : Elapsed 0.090 ms (9.032 ms / 100) 8.946 -> 9.008 ( +0.69%) [ +1.18% +0.06% +0.00% / +0.72% +0.70% +0.69%] index_fill_ random_sorted : Elapsed 0.091 ms (9.052 ms / 100) B = [1, 5, 500] (stride (1, 1, 5)) A = [200, 5, 500] (stride (500, 100000, 1)) dim = 0 0.524 -> 0.528 ( +0.76%) [ +1.91% +11.26% +0.00% / +1.91% +8.97% +0.76%] index_select const : Elapsed 0.005 ms (0.534 ms / 100) 0.517 -> 0.513 ( -0.77%) [ +3.68% +0.00% +2.32% / +3.87% -0.77% +1.16%] index_select wrap : Elapsed 0.005 ms (0.536 ms / 100) 0.513 -> 0.519 ( +1.17%) [+15.40% +0.00% +2.34% / +4.87% +1.17% +1.17%] index_select linear : Elapsed 0.006 ms (0.592 ms / 100) 0.518 -> 0.514 ( -0.77%) [ +4.05% +0.58% +0.00% / +2.90% -0.77% +0.00%] index_select reverse : Elapsed 0.005 ms (0.539 ms / 100) 0.524 -> 0.518 ( -1.15%) [ +1.91% +0.00% +5.53% / +1.72% -1.15% -0.38%] index_select skip64 : Elapsed 0.005 ms (0.534 ms / 100) 0.519 -> 0.520 ( +0.19%) [ +2.50% +0.00% +0.19% / +2.50% +1.35% +0.19%] index_select skip256 : Elapsed 0.005 ms (0.532 ms / 100) 0.520 -> 0.515 ( -0.96%) [ +4.81% +0.19% +0.00% / +2.69% -0.96% +1.15%] index_select spread : Elapsed 0.005 ms (0.545 ms / 100) 0.522 -> 0.510 ( -2.30%) [ +1.34% +0.00% +0.77% / +1.72% -2.30% +7.28%] index_select strided 3 : Elapsed 0.005 ms (0.529 ms / 100) 0.524 -> 0.510 ( -2.67%) [ +1.53% +0.57% +0.00% / +1.15% -2.67% +0.38%] index_select strided 5 : Elapsed 0.005 ms (0.532 ms / 100) 0.515 -> 0.511 ( -0.78%) [ +3.88% +0.00% +2.52% / +3.50% -0.78% +1.94%] index_select strided 7 : Elapsed 0.005 ms (0.535 ms / 100) 0.512 -> 0.510 ( -0.39%) [ +4.10% +0.00% +2.15% / +2.93% -0.39% +2.73%] index_select strided 8 : Elapsed 0.005 ms (0.533 ms / 100) 0.511 -> 0.524 ( +2.54%) [ +4.11% +0.00% +1.76% / +3.72% +7.05% +2.54%] index_select strided 16 : Elapsed 0.005 ms (0.532 ms / 100) 0.510 -> 0.513 ( +0.59%) [ +4.90% +0.00% +3.14% / +7.25% +0.59% +3.33%] index_select strided 64 : Elapsed 0.005 ms (0.535 ms / 100) 0.510 -> 0.516 ( +1.18%) [ +4.12% +0.00% +2.16% / +3.33% +1.18% +2.75%] index_select strided 100 : Elapsed 0.005 ms (0.531 ms / 100) 0.520 -> 0.508 ( -2.31%) [ +3.08% +0.00% +0.58% / +2.12% -2.31% +0.96%] index_select random : Elapsed 0.005 ms (0.536 ms / 100) 0.523 -> 0.511 ( -2.29%) [ +2.10% +0.00% +1.53% / +1.53% -2.29% +0.76%] index_select random_sorted : Elapsed 0.005 ms (0.534 ms / 100) 0.517 -> 0.508 ( -1.74%) [ +3.09% +0.00% +13.54% / +2.51% -1.74% +2.13%] index_select perm : Elapsed 0.005 ms (0.533 ms / 100) 0.522 -> 0.507 ( -2.87%) [ +2.30% +2.11% +0.00% / +2.49% -2.87% +2.87%] index_select perm_sorted : Elapsed 0.005 ms (0.534 ms / 100) out_shape = [200, 1, 500] in_shape = [200, 5, 500] idx_dim = 1 B = [200, 1, 500] (stride (500, 1, 1)) dim = 1 fill_cnt = 5 3.222 -> 3.223 ( +0.03%) [ +0.00% +0.12% +0.28% / +0.34% +0.16% +0.03%] index_fill_ const : Elapsed 0.032 ms (3.222 ms / 100) 3.216 -> 3.218 ( +0.06%) [ +0.53% +0.22% +0.00% / +0.28% +0.12% +0.06%] index_fill_ linear : Elapsed 0.032 ms (3.233 ms / 100) 3.228 -> 3.225 ( -0.09%) [ +0.43% +0.06% +0.00% / +0.40% -0.03% -0.09%] index_fill_ reverse : Elapsed 0.032 ms (3.242 ms / 100) 3.232 -> 3.219 ( -0.40%) [ +0.12% +0.00% +0.34% / +0.25% -0.40% -0.09%] index_fill_ skip64 : Elapsed 0.032 ms (3.236 ms / 100) 3.228 -> 3.219 ( -0.28%) [ +0.19% +0.00% +0.37% / +0.03% -0.28% -0.28%] index_fill_ skip256 : Elapsed 0.032 ms (3.234 ms / 100) 3.219 -> 3.216 ( -0.09%) [ +0.78% +0.25% +0.00% / +0.56% -0.09% -0.03%] index_fill_ spread : Elapsed 0.032 ms (3.244 ms / 100) 3.217 -> 3.218 ( +0.03%) [ +0.75% +0.37% +0.00% / +0.34% +0.44% +0.03%] index_fill_ random : Elapsed 0.032 ms (3.241 ms / 100) 3.222 -> 3.222 ( +0.00%) [ +0.62% +0.00% +0.16% / +0.40% +0.03% +0.00%] index_fill_ random_sorted : Elapsed 0.032 ms (3.242 ms / 100) B = [200, 1, 500] (stride (1, 1, 200)) A = [200, 5, 500] (stride (2500, 1, 5)) dim = 1 5.536 -> 5.547 ( +0.20%) [ +0.14% +0.43% +0.00% / +0.20% +0.27% +0.36%] index_select const : Elapsed 0.055 ms (5.544 ms / 100) 5.537 -> 5.541 ( +0.07%) [ +0.05% +0.34% +0.00% / +0.25% +0.29% +0.07%] index_select wrap : Elapsed 0.055 ms (5.540 ms / 100) 5.543 -> 5.545 ( +0.04%) [ +0.20% +0.00% +0.20% / +0.04% +0.09% +0.09%] index_select linear : Elapsed 0.056 ms (5.554 ms / 100) 5.542 -> 5.551 ( +0.16%) [ +0.00% +0.04% +0.13% / +0.29% +0.16% +0.31%] index_select reverse : Elapsed 0.055 ms (5.542 ms / 100) 5.535 -> 5.543 ( +0.14%) [ +0.14% +0.25% +0.00% / +0.14% +0.18% +0.16%] index_select skip64 : Elapsed 0.055 ms (5.543 ms / 100) 5.541 -> 5.549 ( +0.14%) [ +0.23% +0.22% +0.00% / +0.25% +0.18% +0.14%] index_select skip256 : Elapsed 0.056 ms (5.554 ms / 100) 5.543 -> 5.545 ( +0.04%) [ +0.29% +0.00% +0.11% / +0.43% +0.16% +0.04%] index_select spread : Elapsed 0.056 ms (5.559 ms / 100) 5.538 -> 5.547 ( +0.16%) [ +0.29% +0.00% +0.02% / +0.22% +0.16% +0.29%] index_select strided 3 : Elapsed 0.056 ms (5.554 ms / 100) 5.533 -> 5.537 ( +0.07%) [ +0.11% +0.25% +0.00% / +0.22% +0.11% +0.07%] index_select random : Elapsed 0.055 ms (5.539 ms / 100) 5.524 -> 5.529 ( +0.09%) [ +0.16% +0.25% +0.00% / +0.09% +0.29% +0.42%] index_select random_sorted : Elapsed 0.055 ms (5.533 ms / 100) 5.532 -> 5.534 ( +0.04%) [ +0.16% +0.00% +0.05% / +0.04% +0.23% +0.29%] index_select perm : Elapsed 0.055 ms (5.541 ms / 100) 5.544 -> 5.549 ( +0.09%) [ +0.04% +0.00% +0.05% / +0.11% +0.18% +0.09%] index_select perm_sorted : Elapsed 0.055 ms (5.546 ms / 100) B = [200, 1, 500] (stride (1, 1, 200)) A = [200, 5, 500] (stride (500, 100000, 1)) dim = 1 5.155 -> 5.162 ( +0.14%) [ +0.17% +0.14% +0.00% / +0.14% +0.29% +0.43%] index_select const : Elapsed 0.052 ms (5.164 ms / 100) 5.158 -> 5.159 ( +0.02%) [ +0.12% +0.08% +0.00% / +0.08% +0.31% +0.02%] index_select wrap : Elapsed 0.052 ms (5.164 ms / 100) 5.155 -> 5.168 ( +0.25%) [ +0.00% +0.27% +0.17% / +0.43% +0.27% +0.25%] index_select linear : Elapsed 0.052 ms (5.155 ms / 100) 5.157 -> 5.167 ( +0.19%) [ +0.16% +0.00% +0.23% / +0.37% +0.27% +0.19%] index_select reverse : Elapsed 0.052 ms (5.165 ms / 100) 5.164 -> 5.164 ( +0.00%) [ +0.02% +0.02% +0.00% / +0.00% +0.33% +0.19%] index_select skip64 : Elapsed 0.052 ms (5.165 ms / 100) 5.165 -> 5.164 ( -0.02%) [ +0.00% +0.04% +0.06% / +0.06% -0.02% +0.02%] index_select skip256 : Elapsed 0.052 ms (5.165 ms / 100) 5.156 -> 5.153 ( -0.06%) [ +0.12% +0.08% +0.00% / +0.45% +0.33% -0.06%] index_select spread : Elapsed 0.052 ms (5.162 ms / 100) 5.148 -> 5.161 ( +0.25%) [ +0.17% +0.00% +0.12% / +0.25% +0.43% +0.33%] index_select strided 3 : Elapsed 0.052 ms (5.157 ms / 100) 5.160 -> 5.161 ( +0.02%) [ +0.00% +0.10% +0.02% / +0.02% +0.33% +0.21%] index_select random : Elapsed 0.052 ms (5.160 ms / 100) 5.158 -> 5.167 ( +0.17%) [ +0.04% +0.00% +0.17% / +0.19% +0.33% +0.17%] index_select random_sorted : Elapsed 0.052 ms (5.160 ms / 100) 5.157 -> 5.156 ( -0.02%) [ +0.16% +0.02% +0.00% / -0.02% +0.31% +0.23%] index_select perm : Elapsed 0.052 ms (5.165 ms / 100) 5.157 -> 5.169 ( +0.23%) [ +0.17% +0.00% +0.10% / +0.25% +0.23% +0.31%] index_select perm_sorted : Elapsed 0.052 ms (5.166 ms / 100) out_shape = [200, 5, 1] in_shape = [200, 5, 500] idx_dim = 2 B = [200, 5, 1] (stride (5, 1, 1)) A = [200, 5, 500] (stride (2500, 500, 1)) dim = 2 0.518 -> 0.513 ( -0.97%) [ +3.47% +1.54% +0.00% / +2.90% -0.97% +2.32%] index_select const : Elapsed 0.005 ms (0.536 ms / 100) 0.516 -> 0.510 ( -1.16%) [ +3.49% +0.00% +0.58% / +3.49% -1.16% +1.36%] index_select wrap : Elapsed 0.005 ms (0.534 ms / 100) 0.514 -> 0.509 ( -0.97%) [ +3.89% +0.00% +8.37% / +3.70% -0.97% +1.36%] index_select linear : Elapsed 0.005 ms (0.534 ms / 100) 0.518 -> 0.522 ( +0.77%) [ +2.51% +0.00% +0.97% / +2.32% +0.77% +2.32%] index_select reverse : Elapsed 0.005 ms (0.531 ms / 100) 0.522 -> 0.516 ( -1.15%) [ +1.34% +2.11% +0.00% / +2.30% -1.15% +0.00%] index_select skip64 : Elapsed 0.005 ms (0.529 ms / 100) 0.511 -> 0.515 ( +0.78%) [ +3.72% +0.00% +1.37% / +6.26% +0.78% +3.13%] index_select skip256 : Elapsed 0.005 ms (0.530 ms / 100) 0.514 -> 0.511 ( -0.58%) [ +3.50% +0.00% +0.00% / +15.18% -0.58% +9.92%] index_select spread : Elapsed 0.005 ms (0.532 ms / 100) 0.513 -> 0.514 ( +0.19%) [ +3.51% +0.78% +0.00% / +4.48% +0.19% +15.20%] index_select strided 3 : Elapsed 0.005 ms (0.531 ms / 100) 0.514 -> 0.514 ( +0.00%) [ +5.84% +0.97% +0.00% / +9.14% +0.00% +1.75%] index_select strided 5 : Elapsed 0.005 ms (0.544 ms / 100) 0.513 -> 0.509 ( -0.78%) [ +2.53% +0.97% +0.00% / +3.51% -0.78% +3.51%] index_select strided 7 : Elapsed 0.005 ms (0.526 ms / 100) 0.512 -> 0.522 ( +1.95%) [ +3.52% +4.69% +0.00% / +8.79% +1.95% +3.32%] index_select strided 8 : Elapsed 0.005 ms (0.530 ms / 100) 0.514 -> 0.521 ( +1.36%) [ +2.33% +0.00% +0.39% / +6.03% +1.56% +1.36%] index_select strided 16 : Elapsed 0.005 ms (0.526 ms / 100) 0.513 -> 0.523 ( +1.95%) [ +2.73% +0.78% +0.00% / +11.50% +11.70% +1.95%] index_select strided 64 : Elapsed 0.005 ms (0.527 ms / 100) 0.516 -> 0.510 ( -1.16%) [ +2.71% +0.00% +0.19% / +3.29% -1.16% +0.58%] index_select strided 100 : Elapsed 0.005 ms (0.530 ms / 100) 0.521 -> 0.515 ( -1.15%) [ +2.30% +0.77% +0.00% / +1.92% -1.15% -0.19%] index_select strided 255 : Elapsed 0.005 ms (0.533 ms / 100) 0.514 -> 0.508 ( -1.17%) [+10.89% +0.78% +0.00% / +2.92% -1.17% +0.78%] index_select strided 256 : Elapsed 0.006 ms (0.570 ms / 100) good 0.541 -> 0.508 ( -6.10%) [+12.20% +0.00% +1.66% / -1.29% -6.10% -4.81%] index_select strided 257 : Elapsed 0.006 ms (0.607 ms / 100) 0.516 -> 0.511 ( -0.97%) [ +3.10% +1.55% +0.00% / +2.91% -0.97% +1.16%] index_select random : Elapsed 0.005 ms (0.532 ms / 100) 0.515 -> 0.511 ( -0.78%) [ +3.50% +0.00% +0.58% / +2.91% -0.78% +0.97%] index_select random_sorted : Elapsed 0.005 ms (0.533 ms / 100) 0.514 -> 0.518 ( +0.78%) [ +2.53% +0.00% +0.19% / +3.11% +0.78% +1.36%] index_select perm : Elapsed 0.005 ms (0.527 ms / 100) 0.514 -> 0.518 ( +0.78%) [ +2.72% +0.78% +0.00% / +3.11% +0.78% +14.98%] index_select perm_sorted : Elapsed 0.005 ms (0.528 ms / 100) B = [200, 5, 1] (stride (5, 1, 1)) A = [200, 5, 500] (stride (1, 100000, 200)) dim = 2 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.71% +0.89% +0.89%] index_select const : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.566 ( +0.89%) [ +0.89% +0.00% +0.18% / +0.89% +0.89% +0.89%] index_select wrap : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.53% +0.00% +0.18% / +0.71% +1.07% +0.53%] index_select linear : Elapsed 0.006 ms (0.565 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.53% +0.18% +0.00% / +0.53% +1.07% +0.53%] index_select reverse : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.00% +0.18% / +3.39% +1.07% +0.71%] index_select skip64 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.71% +0.89% +1.07%] index_select skip256 : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.53% +0.00% +0.18% / +0.71% +0.71% +0.53%] index_select spread : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +1.07% +0.00% +0.18% / +0.89% +0.89% +0.71%] index_select strided 3 : Elapsed 0.006 ms (0.567 ms / 100) 0.562 -> 0.566 ( +0.71%) [ +1.78% +0.00% +0.00% / +0.71% +0.71% +0.71%] index_select strided 5 : Elapsed 0.006 ms (0.572 ms / 100) 0.562 -> 0.566 ( +0.71%) [ +0.71% +0.00% +0.18% / +0.71% +3.02% +0.71%] index_select strided 7 : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.71% +0.36% +0.00% / +0.53% +0.53% +0.53%] index_select strided 8 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.71% +1.60% +0.89%] index_select strided 16 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.566 ( +0.89%) [ +0.89% +0.36% +0.00% / +2.32% +0.89% +1.25%] index_select strided 64 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.89% +0.18% +0.00% / +0.89% +0.71% +0.53%] index_select strided 100 : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.564 ( +0.36%) [ +0.71% +0.00% +0.00% / +0.71% +0.89% +0.36%] index_select strided 255 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +1.07% +0.00% +0.18% / +0.89% +4.63% +0.53%] index_select strided 256 : Elapsed 0.006 ms (0.567 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.71% +0.18% +0.00% / +0.71% +0.71% +0.53%] index_select strided 257 : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.564 ( +0.36%) [ +0.53% +0.00% +0.00% / +1.07% +5.16% +0.36%] index_select random : Elapsed 0.006 ms (0.565 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.71% +0.00% +0.00% / +0.71% +0.53% +0.53%] index_select random_sorted : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +1.78% +0.71% +0.00% / +0.53% +0.71% +0.53%] index_select perm : Elapsed 0.006 ms (0.572 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.71% +0.18% +0.00% / +0.71% +1.25% +0.53%] index_select perm_sorted : Elapsed 0.006 ms (0.566 ms / 100) B = [200, 5, 1] (stride (5, 1, 5)) dim = 2 fill_cnt = 500 19.469 -> 19.757 ( +1.48%) [ +1.15% +0.00% +0.01% / +1.48% +1.53% +1.54%] index_fill_ const : Elapsed 0.197 ms (19.693 ms / 100) 19.485 -> 19.775 ( +1.49%) [ +1.15% +0.06% +0.00% / +1.49% +1.53% +1.53%] index_fill_ linear : Elapsed 0.197 ms (19.709 ms / 100) 19.473 -> 19.758 ( +1.46%) [ +1.13% +0.01% +0.00% / +1.46% +1.50% +1.50%] index_fill_ reverse : Elapsed 0.197 ms (19.694 ms / 100) 19.514 -> 19.784 ( +1.38%) [ +1.15% +0.01% +0.00% / +1.48% +1.38% +1.38%] index_fill_ skip64 : Elapsed 0.197 ms (19.738 ms / 100) 19.495 -> 19.784 ( +1.48%) [ +1.14% +0.01% +0.00% / +1.48% +1.58% +1.57%] index_fill_ skip256 : Elapsed 0.197 ms (19.718 ms / 100) 19.511 -> 19.785 ( +1.40%) [ +1.14% +0.00% +0.00% / +1.46% +1.40% +1.41%] index_fill_ spread : Elapsed 0.197 ms (19.734 ms / 100) 19.464 -> 19.756 ( +1.50%) [ +1.16% +0.01% +0.00% / +1.50% +1.58% +1.59%] index_fill_ random : Elapsed 0.197 ms (19.690 ms / 100) 19.482 -> 19.770 ( +1.48%) [ +1.14% +0.00% +0.00% / +1.48% +1.56% +1.56%] index_fill_ random_sorted : Elapsed 0.197 ms (19.705 ms / 100) B = [200, 5, 1] (stride (5, 1, 5)) A = [200, 5, 500] (stride (1, 200, 1000)) dim = 2 0.560 -> 0.564 ( +0.71%) [ +1.25% +0.36% +0.00% / +0.71% +0.89% +0.89%] index_select const : Elapsed 0.006 ms (0.567 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.71% +0.00% +0.18% / +0.71% +1.25% +0.71%] index_select wrap : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.71% +0.00% +0.18% / +0.71% +6.60% +0.71%] index_select linear : Elapsed 0.006 ms (0.565 ms / 100) 0.560 -> 0.565 ( +0.89%) [ +0.89% +0.36% +0.00% / +0.89% +0.89% +1.07%] index_select reverse : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.71% +0.00% +0.00% / +0.71% +0.89% +0.71%] index_select skip64 : Elapsed 0.006 ms (0.565 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.53% +0.00% +0.18% / +0.53% +0.53% +0.53%] index_select skip256 : Elapsed 0.006 ms (0.565 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.53% +0.00% +0.00% / +0.53% +0.71% +0.53%] index_select spread : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.71% +0.71% +0.71%] index_select strided 3 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.18% +0.00% / +4.81% +5.70% +0.71%] index_select strided 5 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.71% +0.00% +0.00% / +0.53% +0.71% +0.71%] index_select strided 7 : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.89% +0.71% +0.71%] index_select strided 8 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.71% +0.00% +0.18% / +0.53% +0.89% +0.71%] index_select strided 16 : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.36% +0.00% / +0.71% +0.71% +0.71%] index_select strided 64 : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.564 ( +0.36%) [ +0.53% +0.00% +4.09% / +0.53% +0.36% +0.36%] index_select strided 100 : Elapsed 0.006 ms (0.565 ms / 100) 0.562 -> 0.566 ( +0.71%) [ +0.53% +0.00% +0.00% / +0.71% +0.71% +4.45%] index_select strided 255 : Elapsed 0.006 ms (0.565 ms / 100) 0.562 -> 0.563 ( +0.18%) [ +0.71% +0.18% +0.00% / +0.89% +0.53% +0.18%] index_select strided 256 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +1.07% +0.36% +0.00% / +0.71% +0.71% +0.71%] index_select strided 257 : Elapsed 0.006 ms (0.567 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.71% +0.00% +0.00% / +0.71% +0.53% +0.53%] index_select random : Elapsed 0.006 ms (0.565 ms / 100) 0.562 -> 0.564 ( +0.36%) [ +0.71% +0.18% +0.00% / +5.52% +6.41% +0.36%] index_select random_sorted : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +5.17% +0.00% +0.18% / +0.89% +0.53% +0.71%] index_select perm : Elapsed 0.006 ms (0.590 ms / 100) 0.562 -> 0.565 ( +0.53%) [ +0.71% +0.00% +0.18% / +0.71% +0.53% +0.53%] index_select perm_sorted : Elapsed 0.006 ms (0.566 ms / 100) B = [200, 5, 1] (stride (1, 200, 1)) A = [200, 5, 500] (stride (500, 100000, 1)) dim = 2 0.521 -> 0.512 ( -1.73%) [ +2.11% +2.50% +0.00% / +1.54% -1.73% +0.19%] index_select const : Elapsed 0.005 ms (0.532 ms / 100) 0.513 -> 0.516 ( +0.58%) [ +6.24% +0.00% +9.16% / +3.90% +0.58% +1.36%] index_select wrap : Elapsed 0.005 ms (0.545 ms / 100) 0.514 -> 0.521 ( +1.36%) [ +4.86% +0.00% +15.37% / +2.72% +4.67% +1.36%] index_select linear : Elapsed 0.005 ms (0.539 ms / 100) 0.511 -> 0.521 ( +1.96%) [+16.44% +0.00% +1.96% / +2.74% +1.96% +8.02%] index_select reverse : Elapsed 0.006 ms (0.595 ms / 100) 0.508 -> 0.519 ( +2.17%) [ +4.92% +0.00% +3.74% / +3.35% +2.17% +2.56%] index_select skip64 : Elapsed 0.005 ms (0.533 ms / 100) 0.514 -> 0.515 ( +0.19%) [ +3.11% +0.00% +4.67% / +4.86% +0.19% +1.95%] index_select skip256 : Elapsed 0.005 ms (0.530 ms / 100) 0.515 -> 0.512 ( -0.58%) [ +3.11% +0.00% +0.00% / +10.10% -0.58% +1.17%] index_select spread : Elapsed 0.005 ms (0.531 ms / 100) 0.517 -> 0.530 ( +2.51%) [ +2.90% +0.00% +0.19% / +2.51% +3.09% +2.90%] index_select strided 3 : Elapsed 0.005 ms (0.532 ms / 100) 0.517 -> 0.522 ( +0.97%) [ +2.32% +0.00% +0.00% / +13.15% +1.55% +0.97%] index_select strided 5 : Elapsed 0.005 ms (0.529 ms / 100) 0.524 -> 0.513 ( -2.10%) [ +2.10% +2.48% +0.00% / +1.53% -2.10% -0.19%] index_select strided 7 : Elapsed 0.005 ms (0.535 ms / 100) 0.514 -> 0.519 ( +0.97%) [ +2.53% +0.00% +1.56% / +3.50% +2.14% +0.97%] index_select strided 8 : Elapsed 0.005 ms (0.527 ms / 100) 0.512 -> 0.513 ( +0.20%) [ +2.93% +0.00% +2.15% / +3.91% +0.20% +2.34%] index_select strided 16 : Elapsed 0.005 ms (0.527 ms / 100) 0.527 -> 0.513 ( -2.66%) [ +0.76% +0.00% +4.55% / +1.33% -2.66% -1.90%] index_select strided 64 : Elapsed 0.005 ms (0.531 ms / 100) 0.516 -> 0.514 ( -0.39%) [ +1.74% +0.19% +0.00% / +4.84% -0.39% +0.39%] index_select strided 100 : Elapsed 0.005 ms (0.525 ms / 100) 0.517 -> 0.516 ( -0.19%) [ +2.13% +0.00% +0.39% / +3.09% -0.19% +7.54%] index_select strided 255 : Elapsed 0.005 ms (0.528 ms / 100) 0.512 -> 0.524 ( +2.34%) [+10.74% +0.00% +1.37% / +4.30% +2.34% +3.32%] index_select strided 256 : Elapsed 0.006 ms (0.567 ms / 100) 0.514 -> 0.511 ( -0.58%) [ +4.09% +0.00% +1.75% / +3.70% -0.58% +0.78%] index_select strided 257 : Elapsed 0.005 ms (0.535 ms / 100) 0.514 -> 0.511 ( -0.58%) [ +3.50% +0.00% +2.33% / +12.84% -0.58% +0.78%] index_select random : Elapsed 0.005 ms (0.532 ms / 100) 0.521 -> 0.521 ( +0.00%) [ +1.34% +1.92% +0.00% / +14.40% +0.58% +0.00%] index_select random_sorted : Elapsed 0.005 ms (0.528 ms / 100) 0.519 -> 0.524 ( +0.96%) [ +1.93% +0.19% +0.00% / +7.71% +5.78% +0.96%] index_select perm : Elapsed 0.005 ms (0.529 ms / 100) 0.525 -> 0.511 ( -2.67%) [ +0.95% +5.52% +0.00% / +1.52% -2.67% -0.95%] index_select perm_sorted : Elapsed 0.005 ms (0.530 ms / 100) B = [200, 5, 1] (stride (1, 200, 200)) A = [200, 5, 500] (stride (500, 100000, 1)) dim = 2 0.517 -> 0.513 ( -0.77%) [ +2.13% +0.00% +0.58% / +2.51% -0.77% +1.55%] index_select const : Elapsed 0.005 ms (0.528 ms / 100) 0.515 -> 0.510 ( -0.97%) [ +3.11% +0.00% +2.14% / +2.91% -0.97% +1.94%] index_select wrap : Elapsed 0.005 ms (0.531 ms / 100) 0.518 -> 0.515 ( -0.58%) [ +2.70% +0.00% +8.11% / +5.98% -0.58% +0.77%] index_select linear : Elapsed 0.005 ms (0.532 ms / 100) 0.514 -> 0.511 ( -0.58%) [ +3.11% +0.00% +0.97% / +3.70% -0.58% +2.92%] index_select reverse : Elapsed 0.005 ms (0.530 ms / 100) 0.517 -> 0.512 ( -0.97%) [ +1.93% +0.00% +1.35% / +3.09% -0.97% +9.67%] index_select skip64 : Elapsed 0.005 ms (0.527 ms / 100) 0.517 -> 0.511 ( -1.16%) [ +1.93% +14.51% +0.00% / +2.71% -1.16% +2.51%] index_select skip256 : Elapsed 0.005 ms (0.527 ms / 100) 0.519 -> 0.512 ( -1.35%) [ +3.28% +0.00% +2.50% / +2.50% -1.35% +1.16%] index_select spread : Elapsed 0.005 ms (0.536 ms / 100) 0.516 -> 0.510 ( -1.16%) [ +1.55% +1.36% +0.00% / +3.10% -1.16% +0.97%] index_select strided 3 : Elapsed 0.005 ms (0.524 ms / 100) 0.516 -> 0.518 ( +0.39%) [ +9.50% +20.54% +0.00% / +9.69% +0.39% +0.78%] index_select strided 5 : Elapsed 0.006 ms (0.565 ms / 100) 0.513 -> 0.518 ( +0.97%) [ +9.94% +0.00% +0.97% / +3.70% +0.97% +1.95%] index_select strided 7 : Elapsed 0.006 ms (0.564 ms / 100) 0.518 -> 0.521 ( +0.58%) [ +2.70% +0.00% +0.97% / +3.09% +0.58% +0.97%] index_select strided 8 : Elapsed 0.005 ms (0.532 ms / 100) 0.520 -> 0.511 ( -1.73%) [+13.46% +11.35% +0.00% / +2.88% -1.73% +0.19%] index_select strided 16 : Elapsed 0.006 ms (0.590 ms / 100) 0.526 -> 0.515 ( -2.09%) [ +8.94% +0.00% +0.57% / +0.95% -2.09% +0.00%] index_select strided 64 : Elapsed 0.006 ms (0.573 ms / 100) 0.508 -> 0.514 ( +1.18%) [ +4.33% +0.00% +3.74% / +4.53% +1.18% +2.56%] index_select strided 100 : Elapsed 0.005 ms (0.530 ms / 100) 0.512 -> 0.511 ( -0.20%) [ +8.20% +0.00% +7.62% / +3.52% -0.20% +4.88%] index_select strided 255 : Elapsed 0.006 ms (0.554 ms / 100) 0.513 -> 0.510 ( -0.58%) [ +3.70% +0.00% +1.95% / +3.31% -0.58% +12.09%] index_select strided 256 : Elapsed 0.005 ms (0.532 ms / 100) 0.513 -> 0.508 ( -0.97%) [ +2.73% +0.00% +1.17% / +6.04% -0.97% +2.34%] index_select strided 257 : Elapsed 0.005 ms (0.527 ms / 100) 0.515 -> 0.514 ( -0.19%) [ +3.11% +0.00% +1.17% / +3.11% -0.19% +0.19%] index_select random : Elapsed 0.005 ms (0.531 ms / 100) 0.518 -> 0.515 ( -0.58%) [ +1.54% +0.00% +0.58% / +5.02% -0.58% +1.35%] index_select random_sorted : Elapsed 0.005 ms (0.526 ms / 100) 0.515 -> 0.509 ( -1.17%) [+18.83% +0.00% +0.78% / +10.49% -1.17% +1.75%] index_select perm : Elapsed 0.006 ms (0.612 ms / 100) 0.512 -> 0.519 ( +1.37%) [ +3.71% +0.00% +1.56% / +13.67% +14.26% +1.37%] index_select perm_sorted : Elapsed 0.005 ms (0.531 ms / 100) B = [200, 5, 1] (stride (1, 200, 200)) A = [200, 5, 500] (stride (1, 100000, 200)) dim = 2 0.511 -> 0.515 ( +0.78%) [ +4.11% +0.00% +2.35% / +9.98% +0.78% +2.54%] index_select const : Elapsed 0.005 ms (0.532 ms / 100) 0.518 -> 0.517 ( -0.19%) [ +0.77% +0.00% +0.97% / +3.28% +0.00% -0.19%] index_select wrap : Elapsed 0.005 ms (0.522 ms / 100) 0.514 -> 0.515 ( +0.19%) [ +1.75% +0.00% +2.53% / +3.50% +0.19% +3.89%] index_select linear : Elapsed 0.005 ms (0.523 ms / 100) 0.512 -> 0.518 ( +1.17%) [ +2.34% +0.00% +1.76% / +9.57% +1.17% +1.17%] index_select reverse : Elapsed 0.005 ms (0.524 ms / 100) 0.512 -> 0.513 ( +0.20%) [ +2.73% +0.00% +9.57% / +3.52% +0.20% +0.59%] index_select skip64 : Elapsed 0.005 ms (0.526 ms / 100) 0.507 -> 0.509 ( +0.39%) [ +3.75% +0.00% +3.35% / +4.93% +0.39% +9.47%] index_select skip256 : Elapsed 0.005 ms (0.526 ms / 100) 0.509 -> 0.518 ( +1.77%) [ +2.95% +0.00% +2.55% / +9.82% +1.77% +10.02%] index_select spread : Elapsed 0.005 ms (0.524 ms / 100) 0.519 -> 0.519 ( +0.00%) [ +1.73% +0.00% +0.58% / +2.50% +0.00% +1.16%] index_select strided 3 : Elapsed 0.005 ms (0.528 ms / 100) 0.518 -> 0.529 ( +2.12%) [ +1.93% +0.00% +3.09% / +3.28% +2.51% +2.12%] index_select strided 5 : Elapsed 0.005 ms (0.528 ms / 100) 0.512 -> 0.520 ( +1.56%) [ +2.54% +0.00% +16.21% / +9.18% +1.56% +1.95%] index_select strided 7 : Elapsed 0.005 ms (0.525 ms / 100) 0.515 -> 0.513 ( -0.39%) [ +3.11% +0.00% +0.00% / +3.88% -0.39% +0.97%] index_select strided 8 : Elapsed 0.005 ms (0.531 ms / 100) 0.512 -> 0.531 ( +3.71%) [+13.09% +0.00% +2.54% / +3.91% +6.84% +3.71%] index_select strided 16 : Elapsed 0.006 ms (0.579 ms / 100) 0.511 -> 0.524 ( +2.54%) [ +2.94% +0.00% +4.50% / +9.39% +12.33% +2.54%] index_select strided 64 : Elapsed 0.005 ms (0.526 ms / 100) 0.511 -> 0.507 ( -0.78%) [ +3.91% +0.00% +0.98% / +4.11% -0.78% +2.35%] index_select strided 100 : Elapsed 0.005 ms (0.531 ms / 100) 0.510 -> 0.509 ( -0.20%) [ +3.92% +0.00% +3.33% / +6.27% -0.20% +2.55%] index_select strided 255 : Elapsed 0.005 ms (0.530 ms / 100) 0.516 -> 0.513 ( -0.58%) [ +1.94% +0.00% +0.58% / +8.33% -0.58% +2.13%] index_select strided 256 : Elapsed 0.005 ms (0.526 ms / 100) 0.514 -> 0.511 ( -0.58%) [ +3.50% +0.00% +3.50% / +3.31% -0.58% +2.92%] index_select strided 257 : Elapsed 0.005 ms (0.532 ms / 100) 0.512 -> 0.512 ( +0.00%) [ +3.71% +0.00% +11.52% / +4.69% +0.00% +2.15%] index_select random : Elapsed 0.005 ms (0.531 ms / 100) 0.514 -> 0.521 ( +1.36%) [ +6.23% +0.00% +1.36% / +3.70% +1.36% +9.34%] index_select random_sorted : Elapsed 0.005 ms (0.546 ms / 100) 0.513 -> 0.514 ( +0.19%) [ +5.26% +0.00% +1.36% / +3.51% +0.19% +2.73%] index_select perm : Elapsed 0.005 ms (0.540 ms / 100) 0.520 -> 0.510 ( -1.92%) [ +1.15% +0.00% +0.77% / +3.65% -1.92% +1.73%] index_select perm_sorted : Elapsed 0.005 ms (0.526 ms / 100) out_shape = [1, 500, 5] in_shape = [200, 500, 5] idx_dim = 0 B = [1, 500, 5] (stride (1, 5, 1)) A = [200, 500, 5] (stride (2500, 1, 500)) dim = 0 0.832 -> 0.830 ( -0.24%) [ +0.84% +0.00% +0.12% / +0.24% -0.24% +0.24%] index_select const : Elapsed 0.008 ms (0.839 ms / 100) 0.829 -> 0.831 ( +0.24%) [ +0.48% +0.00% +0.48% / +0.36% +0.24% +0.24%] index_select wrap : Elapsed 0.008 ms (0.833 ms / 100) 0.831 -> 0.830 ( -0.12%) [ +0.36% +0.24% +0.00% / +0.36% +0.12% -0.12%] index_select linear : Elapsed 0.008 ms (0.834 ms / 100) 0.832 -> 0.834 ( +0.24%) [ +0.60% +0.24% +0.00% / +0.24% +0.24% +0.24%] index_select reverse : Elapsed 0.008 ms (0.837 ms / 100) 0.834 -> 0.827 ( -0.84%) [ +0.12% +0.00% +0.00% / +0.00% -0.60% -0.84%] index_select skip64 : Elapsed 0.008 ms (0.835 ms / 100) 0.829 -> 0.830 ( +0.12%) [ +0.60% +0.48% +0.00% / +0.97% +0.36% +0.12%] index_select skip256 : Elapsed 0.008 ms (0.834 ms / 100) 0.833 -> 0.827 ( -0.72%) [ +0.00% +0.00% +0.12% / +0.12% -0.72% -0.12%] index_select spread : Elapsed 0.008 ms (0.833 ms / 100) 0.832 -> 0.829 ( -0.36%) [ +0.48% +0.12% +0.00% / +0.48% +0.00% -0.36%] index_select strided 3 : Elapsed 0.008 ms (0.836 ms / 100) 0.831 -> 0.828 ( -0.36%) [ +0.24% +0.12% +0.00% / +0.48% -0.36% -0.36%] index_select strided 5 : Elapsed 0.008 ms (0.833 ms / 100) 0.830 -> 0.833 ( +0.36%) [ +0.72% +0.00% +0.12% / +0.72% +0.48% +0.36%] index_select strided 7 : Elapsed 0.008 ms (0.836 ms / 100) 0.834 -> 0.833 ( -0.12%) [ +0.48% +0.00% +0.00% / -0.12% +0.00% +0.00%] index_select strided 8 : Elapsed 0.008 ms (0.838 ms / 100) 0.832 -> 0.831 ( -0.12%) [ +0.48% +0.36% +0.00% / +0.48% +0.48% -0.12%] index_select strided 16 : Elapsed 0.008 ms (0.836 ms / 100) 0.831 -> 0.835 ( +0.48%) [ +0.24% +0.00% +0.12% / +0.48% +0.96% +0.84%] index_select strided 64 : Elapsed 0.008 ms (0.833 ms / 100) 0.832 -> 0.833 ( +0.12%) [ +0.36% +0.12% +0.00% / +0.12% +0.36% +0.96%] index_select strided 100 : Elapsed 0.008 ms (0.835 ms / 100) 0.823 -> 0.825 ( +0.24%) [ +0.00% +0.36% +0.00% / +0.24% +0.61% +0.97%] index_select random : Elapsed 0.008 ms (0.823 ms / 100) 0.819 -> 0.828 ( +1.10%) [ +0.85% +0.00% +0.85% / +1.10% +1.59% +1.47%] index_select random_sorted : Elapsed 0.008 ms (0.826 ms / 100) 0.819 -> 0.820 ( +0.12%) [ +2.81% +0.00% +0.12% / +0.12% +2.69% +2.20%] index_select perm : Elapsed 0.008 ms (0.842 ms / 100) 0.818 -> 0.823 ( +0.61%) [ +0.37% +0.37% +0.00% / +0.61% +1.10% +0.98%] index_select perm_sorted : Elapsed 0.008 ms (0.821 ms / 100) B = [1, 500, 5] (stride (1, 5, 1)) A = [200, 500, 5] (stride (500, 1, 100000)) dim = 0 0.833 -> 0.824 ( -1.08%) [ +0.00% +0.12% +0.00% / +0.24% -1.08% -0.84%] index_select const : Elapsed 0.008 ms (0.833 ms / 100) 0.833 -> 0.825 ( -0.96%) [ +0.12% +0.00% +0.12% / +0.12% -0.96% -0.60%] index_select wrap : Elapsed 0.008 ms (0.834 ms / 100) 0.832 -> 0.830 ( -0.24%) [ +0.24% +0.00% +0.00% / +0.00% -0.24% +0.00%] index_select linear : Elapsed 0.008 ms (0.834 ms / 100) 0.832 -> 0.830 ( -0.24%) [ +0.48% +0.00% +0.12% / +0.60% +0.12% -0.24%] index_select reverse : Elapsed 0.008 ms (0.836 ms / 100) 0.832 -> 0.829 ( -0.36%) [ +0.36% +0.24% +0.00% / +0.24% -0.36% -0.36%] index_select skip64 : Elapsed 0.008 ms (0.835 ms / 100) 0.830 -> 0.826 ( -0.48%) [ +0.72% +0.48% +0.00% / +0.48% -0.48% -0.12%] index_select skip256 : Elapsed 0.008 ms (0.836 ms / 100) 0.832 -> 0.821 ( -1.32%) [ +0.24% +0.36% +0.00% / +0.48% -0.48% -1.32%] index_select spread : Elapsed 0.008 ms (0.834 ms / 100) 0.832 -> 0.825 ( -0.84%) [ +0.24% +0.12% +0.00% / +0.72% -0.48% -0.84%] index_select strided 3 : Elapsed 0.008 ms (0.834 ms / 100) 0.831 -> 0.823 ( -0.96%) [ +0.84% +0.00% +0.36% / +0.48% -0.60% -0.96%] index_select strided 5 : Elapsed 0.008 ms (0.838 ms / 100) 0.829 -> 0.828 ( -0.12%) [ +0.84% +0.00% +0.48% / +1.09% +0.24% -0.12%] index_select strided 7 : Elapsed 0.008 ms (0.836 ms / 100) 0.831 -> 0.827 ( -0.48%) [ +0.36% +0.00% +0.12% / +0.36% -0.24% -0.48%] index_select strided 8 : Elapsed 0.008 ms (0.834 ms / 100) 0.832 -> 0.826 ( -0.72%) [ +0.36% +0.12% +0.00% / +0.60% -0.12% -0.72%] index_select strided 16 : Elapsed 0.008 ms (0.835 ms / 100) 0.828 -> 0.831 ( +0.36%) [ +0.85% +0.48% +0.00% / +1.45% +0.72% +0.36%] index_select strided 64 : Elapsed 0.008 ms (0.835 ms / 100) 0.830 -> 0.829 ( -0.12%) [ +0.72% +0.00% +0.12% / +0.24% +0.36% -0.12%] index_select strided 100 : Elapsed 0.008 ms (0.836 ms / 100) 0.826 -> 0.827 ( +0.12%) [ +0.36% +0.00% +0.12% / +0.36% +0.12% +0.36%] index_select random : Elapsed 0.008 ms (0.829 ms / 100) 0.827 -> 0.829 ( +0.24%) [ +0.36% +0.00% +0.00% / +0.48% +0.24% +0.24%] index_select random_sorted : Elapsed 0.008 ms (0.830 ms / 100) 0.823 -> 0.827 ( +0.49%) [ +0.61% +0.00% +0.24% / +0.49% +2.19% +2.55%] index_select perm : Elapsed 0.008 ms (0.828 ms / 100) 0.819 -> 0.829 ( +1.22%) [ +1.22% +0.00% +1.10% / +1.22% +1.47% +1.34%] index_select perm_sorted : Elapsed 0.008 ms (0.829 ms / 100) out_shape = [200, 1, 5] in_shape = [200, 500, 5] idx_dim = 1 B = [200, 1, 5] (stride (5, 1000, 1)) A = [200, 500, 5] (stride (500, 1, 100000)) dim = 1 0.511 -> 0.515 ( +0.78%) [ +2.94% +0.00% +1.37% / +4.11% +0.78% +9.39%] index_select const : Elapsed 0.005 ms (0.526 ms / 100) 0.512 -> 0.509 ( -0.59%) [ +3.71% +0.00% +2.54% / +2.54% -0.59% +2.93%] index_select wrap : Elapsed 0.005 ms (0.531 ms / 100) 0.517 -> 0.517 ( +0.00%) [ +2.32% +0.39% +0.00% / +3.09% +0.00% +0.77%] index_select linear : Elapsed 0.005 ms (0.529 ms / 100) 0.510 -> 0.516 ( +1.18%) [ +3.92% +0.00% +2.16% / +4.51% +1.18% +2.75%] index_select reverse : Elapsed 0.005 ms (0.530 ms / 100) 0.516 -> 0.512 ( -0.78%) [ +2.52% +0.00% +2.52% / +3.49% -0.78% +1.74%] index_select skip64 : Elapsed 0.005 ms (0.529 ms / 100) 0.521 -> 0.522 ( +0.19%) [ +1.73% +2.69% +0.00% / +9.40% +0.19% +2.30%] index_select skip256 : Elapsed 0.005 ms (0.530 ms / 100) 0.514 -> 0.515 ( +0.19%) [ +3.89% +0.00% +1.36% / +3.11% +0.19% +0.58%] index_select spread : Elapsed 0.005 ms (0.534 ms / 100) 0.513 -> 0.517 ( +0.78%) [ +3.31% +0.00% +1.95% / +7.02% +5.26% +0.78%] index_select strided 3 : Elapsed 0.005 ms (0.530 ms / 100) 0.515 -> 0.508 ( -1.36%) [ +2.14% +0.00% +1.75% / +3.30% -1.36% +1.94%] index_select strided 5 : Elapsed 0.005 ms (0.526 ms / 100) 0.513 -> 0.514 ( +0.19%) [ +2.73% +0.00% +1.95% / +3.51% +0.19% +1.56%] index_select strided 7 : Elapsed 0.005 ms (0.527 ms / 100) 0.517 -> 0.516 ( -0.19%) [ +1.93% +0.00% +0.19% / +4.06% -0.19% +0.39%] index_select strided 8 : Elapsed 0.005 ms (0.527 ms / 100) 0.522 -> 0.513 ( -1.72%) [ +1.72% +2.68% +0.00% / +1.72% -1.72% +0.57%] index_select strided 16 : Elapsed 0.005 ms (0.531 ms / 100) 0.512 -> 0.523 ( +2.15%) [ +3.91% +0.00% +0.98% / +3.91% +2.15% +2.15%] index_select strided 64 : Elapsed 0.005 ms (0.532 ms / 100) 0.508 -> 0.511 ( +0.59%) [ +3.54% +0.00% +13.19% / +4.13% +0.59% +10.24%] index_select strided 100 : Elapsed 0.005 ms (0.526 ms / 100) 0.509 -> 0.511 ( +0.39%) [ +4.32% +0.00% +0.79% / +4.52% +0.39% +12.18%] index_select strided 255 : Elapsed 0.005 ms (0.531 ms / 100) 0.514 -> 0.513 ( -0.19%) [ +6.23% +2.53% +0.00% / +3.50% -0.19% +1.95%] index_select strided 256 : Elapsed 0.005 ms (0.546 ms / 100) 0.513 -> 0.506 ( -1.36%) [ +4.68% +0.00% +0.97% / +3.70% -1.36% +3.12%] index_select strided 257 : Elapsed 0.005 ms (0.537 ms / 100) 0.514 -> 0.518 ( +0.78%) [ +2.92% +0.00% +1.56% / +3.11% +3.31% +0.78%] index_select random : Elapsed 0.005 ms (0.529 ms / 100) 0.520 -> 0.514 ( -1.15%) [ +0.38% +2.50% +0.00% / +2.50% -1.15% +0.77%] index_select random_sorted : Elapsed 0.005 ms (0.522 ms / 100) 0.516 -> 0.507 ( -1.74%) [ +2.71% +0.00% +0.39% / +5.81% -1.74% +2.13%] index_select perm : Elapsed 0.005 ms (0.530 ms / 100) 0.523 -> 0.519 ( -0.76%) [ +1.15% +13.38% +0.00% / +3.06% +4.02% -0.76%] index_select perm_sorted : Elapsed 0.005 ms (0.529 ms / 100) B = [200, 1, 5] (stride (1, 1000, 200)) A = [200, 500, 5] (stride (2500, 1, 500)) dim = 1 0.518 -> 0.507 ( -2.12%) [ +1.74% +7.53% +0.00% / +2.32% -2.12% +1.74%] index_select const : Elapsed 0.005 ms (0.527 ms / 100) 0.512 -> 0.506 ( -1.17%) [ +4.10% +0.00% +1.37% / +3.52% -1.17% +0.39%] index_select wrap : Elapsed 0.005 ms (0.533 ms / 100) 0.514 -> 0.516 ( +0.39%) [ +4.47% +1.75% +0.00% / +2.53% +0.58% +0.39%] index_select linear : Elapsed 0.005 ms (0.537 ms / 100) 0.509 -> 0.508 ( -0.20%) [ +5.70% +0.00% +1.18% / +4.13% -0.20% +1.18%] index_select reverse : Elapsed 0.005 ms (0.538 ms / 100) 0.515 -> 0.509 ( -1.17%) [ +3.30% +0.00% +7.38% / +2.72% -1.17% +5.83%] index_select skip64 : Elapsed 0.005 ms (0.532 ms / 100) 0.522 -> 0.512 ( -1.92%) [ +4.21% +0.00% +0.00% / +1.72% -1.92% -1.53%] index_select skip256 : Elapsed 0.005 ms (0.544 ms / 100) 0.507 -> 0.511 ( +0.79%) [ +3.75% +0.00% +2.56% / +4.73% +0.79% +2.56%] index_select spread : Elapsed 0.005 ms (0.526 ms / 100) 0.515 -> 0.508 ( -1.36%) [ +2.72% +0.00% +0.97% / +14.76% -1.36% +0.19%] index_select strided 3 : Elapsed 0.005 ms (0.529 ms / 100) 0.509 -> 0.506 ( -0.59%) [ +3.54% +0.00% +2.36% / +13.56% -0.59% +1.96%] index_select strided 5 : Elapsed 0.005 ms (0.527 ms / 100) 0.510 -> 0.513 ( +0.59%) [ +3.92% +0.00% +1.76% / +12.35% +0.59% +1.57%] index_select strided 7 : Elapsed 0.005 ms (0.530 ms / 100) 0.519 -> 0.519 ( +0.00%) [ +2.12% +0.96% +0.00% / +2.12% +12.72% +0.00%] index_select strided 8 : Elapsed 0.005 ms (0.530 ms / 100) 0.516 -> 0.509 ( -1.36%) [ +3.68% +0.00% +0.19% / +3.10% -1.36% +1.55%] index_select strided 16 : Elapsed 0.005 ms (0.535 ms / 100) 0.521 -> 0.508 ( -2.50%) [ +1.73% +2.11% +0.00% / +2.50% -2.50% -0.38%] index_select strided 64 : Elapsed 0.005 ms (0.530 ms / 100) 0.520 -> 0.515 ( -0.96%) [ +1.73% +0.00% +0.77% / +2.31% +3.85% -0.96%] index_select strided 100 : Elapsed 0.005 ms (0.529 ms / 100) 0.515 -> 0.518 ( +0.58%) [ +2.52% +0.00% +0.97% / +4.85% +0.78% +0.58%] index_select strided 255 : Elapsed 0.005 ms (0.528 ms / 100) 0.515 -> 0.512 ( -0.58%) [ +2.14% +0.00% +9.51% / +3.11% -0.58% +8.54%] index_select strided 256 : Elapsed 0.005 ms (0.526 ms / 100) 0.520 -> 0.517 ( -0.58%) [ +2.50% +1.35% +0.00% / +2.50% -0.58% +0.38%] index_select strided 257 : Elapsed 0.005 ms (0.533 ms / 100) 0.512 -> 0.514 ( +0.39%) [ +3.13% +0.00% +2.93% / +4.49% +0.39% +1.17%] index_select random : Elapsed 0.005 ms (0.528 ms / 100) 0.513 -> 0.519 ( +1.17%) [ +3.31% +0.00% +16.37% / +4.09% +2.53% +1.17%] index_select random_sorted : Elapsed 0.005 ms (0.530 ms / 100) 0.506 -> 0.512 ( +1.19%) [ +5.93% +0.00% +2.96% / +12.65% +1.19% +3.16%] index_select perm : Elapsed 0.005 ms (0.536 ms / 100) 0.515 -> 0.515 ( +0.00%) [ +3.11% +0.00% +8.16% / +3.88% +0.00% +0.39%] index_select perm_sorted : Elapsed 0.005 ms (0.531 ms / 100) B = [200, 1, 5] (stride (1, 1000, 200)) A = [200, 500, 5] (stride (5, 1000, 1)) dim = 1 0.524 -> 0.526 ( +0.38%) [ +1.34% +0.57% +0.00% / +14.31% +3.63% +0.38%] index_select const : Elapsed 0.005 ms (0.531 ms / 100) 0.522 -> 0.509 ( -2.49%) [ +1.72% +0.00% +7.09% / +16.09% -2.49% +0.00%] index_select wrap : Elapsed 0.005 ms (0.531 ms / 100) 0.516 -> 0.513 ( -0.58%) [ +1.94% +0.00% +4.65% / +2.91% -0.58% +1.16%] index_select linear : Elapsed 0.005 ms (0.526 ms / 100) 0.521 -> 0.512 ( -1.73%) [ +1.34% +2.30% +0.00% / +2.69% -1.73% -0.58%] index_select reverse : Elapsed 0.005 ms (0.528 ms / 100) 0.516 -> 0.510 ( -1.16%) [ +2.91% +0.00% +8.53% / +3.29% -1.16% +5.43%] index_select skip64 : Elapsed 0.005 ms (0.531 ms / 100) 0.520 -> 0.506 ( -2.69%) [ +2.50% +2.12% +0.00% / +1.92% -2.69% +5.77%] index_select skip256 : Elapsed 0.005 ms (0.533 ms / 100) 0.516 -> 0.512 ( -0.78%) [ +2.91% +0.00% +6.01% / +3.49% -0.78% +0.78%] index_select spread : Elapsed 0.005 ms (0.531 ms / 100) 0.527 -> 0.509 ( -3.42%) [ +0.76% +0.00% +9.87% / +6.83% -3.42% -1.14%] index_select strided 3 : Elapsed 0.005 ms (0.531 ms / 100) 0.513 -> 0.505 ( -1.56%) [ +6.63% +0.00% +1.56% / +6.04% -1.56% +0.39%] index_select strided 5 : Elapsed 0.005 ms (0.547 ms / 100) 0.515 -> 0.502 ( -2.52%) [ +2.91% +0.00% +2.33% / +8.74% -2.52% +0.58%] index_select strided 7 : Elapsed 0.005 ms (0.530 ms / 100) 0.517 -> 0.520 ( +0.58%) [ +1.55% +0.00% +0.97% / +2.13% +4.06% +0.58%] index_select strided 8 : Elapsed 0.005 ms (0.525 ms / 100) 0.515 -> 0.509 ( -1.17%) [ +3.11% +0.00% +1.75% / +3.11% -1.17% -0.58%] index_select strided 16 : Elapsed 0.005 ms (0.531 ms / 100) 0.519 -> 0.515 ( -0.77%) [ +1.73% +1.16% +0.00% / +0.77% +4.24% -0.77%] index_select strided 64 : Elapsed 0.005 ms (0.528 ms / 100) 0.514 -> 0.515 ( +0.19%) [ +5.06% +0.00% +0.39% / +2.33% +0.19% +1.75%] index_select strided 100 : Elapsed 0.005 ms (0.540 ms / 100) 0.513 -> 0.510 ( -0.58%) [ +4.09% +0.00% +1.56% / +2.92% -0.58% +1.17%] index_select strided 255 : Elapsed 0.005 ms (0.534 ms / 100) 0.512 -> 0.515 ( +0.59%) [ +3.13% +0.00% +10.55% / +3.71% +0.59% +7.42%] index_select strided 256 : Elapsed 0.005 ms (0.528 ms / 100) 0.517 -> 0.514 ( -0.58%) [ +2.32% +0.00% +1.55% / +3.87% -0.39% -0.58%] index_select strided 257 : Elapsed 0.005 ms (0.529 ms / 100) 0.514 -> 0.513 ( -0.19%) [ +3.50% +0.00% +1.56% / +2.92% +2.92% -0.19%] index_select random : Elapsed 0.005 ms (0.532 ms / 100) 0.514 -> 0.511 ( -0.58%) [ +6.42% +1.75% +0.00% / +3.31% -0.58% +1.95%] index_select random_sorted : Elapsed 0.005 ms (0.547 ms / 100) 0.516 -> 0.512 ( -0.78%) [ +3.10% +0.00% +0.39% / +6.01% -0.78% +0.39%] index_select perm : Elapsed 0.005 ms (0.532 ms / 100) 0.511 -> 0.510 ( -0.20%) [ +4.31% +0.00% +1.76% / +13.50% -0.20% +1.76%] index_select perm_sorted : Elapsed 0.005 ms (0.533 ms / 100) out_shape = [200, 500, 1] in_shape = [200, 500, 5] idx_dim = 2 B = [200, 500, 1] (stride (500, 1, 1)) A = [200, 500, 5] (stride (2500, 5, 1)) dim = 2 5.325 -> 5.331 ( +0.11%) [ +0.00% +0.02% +0.02% / +0.11% +0.17% +0.17%] index_select const : Elapsed 0.053 ms (5.325 ms / 100) 5.320 -> 5.327 ( +0.13%) [ +0.19% +0.26% +0.00% / +0.24% +0.24% +0.13%] index_select wrap : Elapsed 0.053 ms (5.330 ms / 100) 5.321 -> 5.322 ( +0.02%) [ +0.23% +0.06% +0.00% / +0.13% +0.09% +0.02%] index_select linear : Elapsed 0.053 ms (5.333 ms / 100) 5.322 -> 5.317 ( -0.09%) [ +0.09% +0.00% +0.02% / -0.09% +0.30% +0.19%] index_select reverse : Elapsed 0.053 ms (5.327 ms / 100) 5.318 -> 5.323 ( +0.09%) [ +0.36% +0.00% +0.11% / +0.09% +0.32% +0.15%] index_select skip64 : Elapsed 0.053 ms (5.337 ms / 100) 5.319 -> 5.314 ( -0.09%) [ +0.13% +0.00% +0.04% / -0.09% +0.34% +0.39%] index_select skip256 : Elapsed 0.053 ms (5.326 ms / 100) 5.318 -> 5.318 ( +0.00%) [ +0.34% +0.00% +0.13% / +0.00% +0.21% +0.09%] index_select spread : Elapsed 0.053 ms (5.336 ms / 100) 5.328 -> 5.322 ( -0.11%) [ +0.02% +0.00% +0.08% / -0.11% +0.11% +0.04%] index_select strided 3 : Elapsed 0.053 ms (5.329 ms / 100) 5.318 -> 5.323 ( +0.09%) [ +0.15% +0.02% +0.00% / +0.09% +0.26% +0.11%] index_select random : Elapsed 0.053 ms (5.326 ms / 100) 5.311 -> 5.310 ( -0.02%) [ +0.21% +0.15% +0.00% / -0.02% +0.36% +0.56%] index_select random_sorted : Elapsed 0.053 ms (5.322 ms / 100) 5.305 -> 5.320 ( +0.28%) [ +0.28% +0.13% +0.00% / +0.30% +0.55% +0.28%] index_select perm : Elapsed 0.053 ms (5.320 ms / 100) 5.317 -> 5.319 ( +0.04%) [ +0.32% +0.04% +0.00% / +0.06% +0.38% +0.04%] index_select perm_sorted : Elapsed 0.053 ms (5.334 ms / 100) B = [200, 500, 1] (stride (500, 1, 1)) A = [200, 500, 5] (stride (500, 1, 100000)) dim = 2 4.706 -> 4.711 ( +0.11%) [ +0.00% +0.13% +0.25% / +0.32% +0.11% +0.15%] index_select const : Elapsed 0.047 ms (4.706 ms / 100) 4.699 -> 4.696 ( -0.06%) [ +0.02% +0.15% +0.00% / +0.11% -0.06% +0.21%] index_select wrap : Elapsed 0.047 ms (4.700 ms / 100) 4.699 -> 4.693 ( -0.13%) [ +0.45% +0.00% +0.21% / +0.40% -0.13% +0.26%] index_select linear : Elapsed 0.047 ms (4.720 ms / 100) 4.706 -> 4.707 ( +0.02%) [ +0.02% +0.23% +0.00% / +0.11% +0.11% +0.02%] index_select reverse : Elapsed 0.047 ms (4.707 ms / 100) 4.702 -> 4.709 ( +0.15%) [ +0.13% +0.00% +0.15% / +0.15% +0.26% +0.36%] index_select skip64 : Elapsed 0.047 ms (4.708 ms / 100) 4.698 -> 4.702 ( +0.09%) [ +0.45% +0.00% +0.23% / +0.09% +0.30% +0.55%] index_select skip256 : Elapsed 0.047 ms (4.719 ms / 100) 4.705 -> 4.708 ( +0.06%) [ +0.15% +0.19% +0.00% / +0.19% +0.06% +0.06%] index_select spread : Elapsed 0.047 ms (4.712 ms / 100) 4.705 -> 4.699 ( -0.13%) [ +0.04% +0.09% +0.00% / -0.02% -0.13% +0.09%] index_select strided 3 : Elapsed 0.047 ms (4.707 ms / 100) 4.704 -> 4.714 ( +0.21%) [ +0.45% +0.15% +0.00% / +0.21% +0.32% +0.34%] index_select random : Elapsed 0.047 ms (4.725 ms / 100) 4.710 -> 4.715 ( +0.11%) [ +0.11% +0.00% +0.28% / +0.11% +0.19% +0.34%] index_select random_sorted : Elapsed 0.047 ms (4.715 ms / 100) 4.705 -> 4.707 ( +0.04%) [ +0.17% +0.00% +0.11% / +0.04% +0.06% +0.09%] index_select perm : Elapsed 0.047 ms (4.713 ms / 100) 4.711 -> 4.707 ( -0.08%) [ +0.04% +0.02% +0.00% / +0.15% -0.08% +0.06%] index_select perm_sorted : Elapsed 0.047 ms (4.713 ms / 100) B = [200, 500, 1] (stride (1, 200, 1)) A = [200, 500, 5] (stride (1, 1000, 200)) dim = 2 5.504 -> 5.514 ( +0.18%) [ +0.00% +0.24% +0.13% / +0.27% +0.18% +0.20%] index_select const : Elapsed 0.055 ms (5.504 ms / 100) 5.507 -> 5.507 ( +0.00%) [ +0.20% +0.18% +0.00% / +0.07% +0.13% +0.00%] index_select wrap : Elapsed 0.055 ms (5.518 ms / 100) 5.506 -> 5.502 ( -0.07%) [ +0.00% +0.36% +0.11% / +0.04% -0.07% -0.02%] index_select linear : Elapsed 0.055 ms (5.506 ms / 100) 5.503 -> 5.506 ( +0.05%) [ +0.00% +0.31% +0.15% / +0.05% +0.25% +0.35%] index_select reverse : Elapsed 0.055 ms (5.503 ms / 100) 5.514 -> 5.498 ( -0.29%) [ +0.07% +0.00% +0.00% / -0.22% -0.07% -0.29%] index_select skip64 : Elapsed 0.055 ms (5.518 ms / 100) 5.506 -> 5.514 ( +0.15%) [ +0.02% +0.11% +0.00% / +0.22% +0.20% +0.15%] index_select skip256 : Elapsed 0.055 ms (5.507 ms / 100) 5.511 -> 5.507 ( -0.07%) [ +0.02% +0.00% +0.13% / +0.16% +0.02% -0.07%] index_select spread : Elapsed 0.055 ms (5.512 ms / 100) 5.505 -> 5.509 ( +0.07%) [ +0.18% +0.00% +0.22% / +0.13% +0.11% +0.07%] index_select strided 3 : Elapsed 0.055 ms (5.515 ms / 100) 5.485 -> 5.495 ( +0.18%) [ +0.60% +0.00% +0.46% / +0.46% +0.36% +0.18%] index_select random : Elapsed 0.055 ms (5.518 ms / 100) 5.486 -> 5.496 ( +0.18%) [ +0.04% +0.15% +0.00% / +0.18% +0.33% +0.35%] index_select random_sorted : Elapsed 0.055 ms (5.488 ms / 100) 5.497 -> 5.501 ( +0.07%) [ +0.07% +0.00% +0.20% / +0.07% +0.33% +0.18%] index_select perm : Elapsed 0.055 ms (5.501 ms / 100) 5.510 -> 5.514 ( +0.07%) [ +0.00% +0.40% +0.07% / +0.07% +0.18% +0.31%] index_select perm_sorted : Elapsed 0.055 ms (5.510 ms / 100) B = [200, 500, 1] (stride (1, 200, 200)) A = [200, 500, 5] (stride (5, 1000, 1)) dim = 2 6.046 -> 6.040 ( -0.10%) [ +0.05% +0.22% +0.00% / +0.20% -0.02% -0.10%] index_select const : Elapsed 0.060 ms (6.049 ms / 100) 6.051 -> 6.039 ( -0.20%) [ +0.00% +0.17% +0.15% / +0.10% -0.20% +0.36%] index_select wrap : Elapsed 0.061 ms (6.051 ms / 100) 6.054 -> 6.026 ( -0.46%) [ +0.18% +0.00% +0.00% / +0.03% -0.07% -0.46%] index_select linear : Elapsed 0.061 ms (6.065 ms / 100) 6.044 -> 6.045 ( +0.02%) [ +0.43% +0.31% +0.00% / +0.20% +0.02% +0.17%] index_select reverse : Elapsed 0.061 ms (6.070 ms / 100) 6.039 -> 6.048 ( +0.15%) [ +0.00% +0.23% +0.31% / +0.15% +0.38% +0.33%] index_select skip64 : Elapsed 0.060 ms (6.039 ms / 100) 6.040 -> 6.034 ( -0.10%) [ +0.15% +0.43% +0.00% / +0.35% +0.56% -0.10%] index_select skip256 : Elapsed 0.060 ms (6.049 ms / 100) 6.045 -> 6.037 ( -0.13%) [ +0.28% +0.00% +0.00% / -0.13% +0.33% +0.05%] index_select spread : Elapsed 0.061 ms (6.062 ms / 100) 6.025 -> 6.051 ( +0.43%) [ +0.40% +0.37% +0.00% / +0.48% +0.65% +0.43%] index_select strided 3 : Elapsed 0.060 ms (6.049 ms / 100) 6.053 -> 6.036 ( -0.28%) [ +0.12% +0.15% +0.00% / -0.07% -0.28% -0.20%] index_select random : Elapsed 0.061 ms (6.060 ms / 100) 6.035 -> 6.040 ( +0.08%) [ +0.45% +0.12% +0.00% / +0.41% +0.23% +0.08%] index_select random_sorted : Elapsed 0.061 ms (6.062 ms / 100) 6.034 -> 6.030 ( -0.07%) [ +0.51% +0.25% +0.00% / +0.40% +0.22% -0.07%] index_select perm : Elapsed 0.061 ms (6.065 ms / 100) 6.045 -> 6.031 ( -0.23%) [ +0.26% +0.00% +0.17% / +0.41% -0.23% -0.18%] index_select perm_sorted : Elapsed 0.061 ms (6.061 ms / 100) B = [200, 500, 1] (stride (500, 1, 100000)) dim = 2 fill_cnt = 5 2.865 -> 2.860 ( -0.17%) [ +0.38% +0.14% +0.00% / +0.03% +0.07% -0.17%] index_fill_ const : Elapsed 0.029 ms (2.876 ms / 100) 2.863 -> 2.850 ( -0.45%) [ +0.17% +0.14% +0.00% / +0.03% -0.45% -0.42%] index_fill_ linear : Elapsed 0.029 ms (2.868 ms / 100) 2.867 -> 2.853 ( -0.49%) [ +0.21% +0.00% +0.07% / +0.17% -0.49% -0.45%] index_fill_ reverse : Elapsed 0.029 ms (2.873 ms / 100) 2.867 -> 2.858 ( -0.31%) [ +0.49% +0.00% +0.03% / +0.07% -0.31% -0.31%] index_fill_ skip64 : Elapsed 0.029 ms (2.881 ms / 100) 2.874 -> 2.856 ( -0.63%) [ +0.00% +0.14% +0.07% / +0.24% -0.63% -0.49%] index_fill_ skip256 : Elapsed 0.029 ms (2.874 ms / 100) 2.862 -> 2.860 ( -0.07%) [ +0.35% +0.00% +0.10% / +0.28% -0.07% +0.03%] index_fill_ spread : Elapsed 0.029 ms (2.872 ms / 100) 2.857 -> 2.852 ( -0.18%) [ +0.00% +0.07% +0.28% / +0.32% -0.18% +0.07%] index_fill_ random : Elapsed 0.029 ms (2.857 ms / 100) 2.859 -> 2.856 ( -0.10%) [ +0.56% +0.00% +0.31% / +0.35% -0.03% -0.10%] index_fill_ random_sorted : Elapsed 0.029 ms (2.875 ms / 100) B = [200, 500, 1] (stride (500, 1, 100000)) A = [200, 500, 5] (stride (2500, 5, 1)) dim = 2 5.328 -> 5.329 ( +0.02%) [ +0.08% +0.02% +0.00% / +0.02% +0.34% +0.17%] index_select const : Elapsed 0.053 ms (5.332 ms / 100) 5.321 -> 5.322 ( +0.02%) [ +0.21% +0.08% +0.00% / +0.02% +0.28% +0.15%] index_select wrap : Elapsed 0.053 ms (5.332 ms / 100) 5.328 -> 5.331 ( +0.06%) [ +0.06% +0.06% +0.00% / +0.08% +0.24% +0.06%] index_select linear : Elapsed 0.053 ms (5.331 ms / 100) 5.328 -> 5.326 ( -0.04%) [ +0.09% +0.00% +0.02% / -0.04% +0.13% +0.24%] index_select reverse : Elapsed 0.053 ms (5.333 ms / 100) 5.320 -> 5.329 ( +0.17%) [ +0.08% +0.19% +0.00% / +0.17% +0.43% +0.36%] index_select skip64 : Elapsed 0.053 ms (5.324 ms / 100) 5.320 -> 5.331 ( +0.21%) [ +0.13% +0.04% +0.00% / +0.21% +0.23% +0.36%] index_select skip256 : Elapsed 0.053 ms (5.327 ms / 100) 5.312 -> 5.324 ( +0.23%) [ +0.26% +0.13% +0.00% / +0.23% +0.47% +0.49%] index_select spread : Elapsed 0.053 ms (5.326 ms / 100) 5.322 -> 5.321 ( -0.02%) [ +0.08% +0.00% +0.02% / +0.21% -0.02% +0.19%] index_select strided 3 : Elapsed 0.053 ms (5.326 ms / 100) 5.316 -> 5.323 ( +0.13%) [ +0.00% +0.11% +0.06% / +0.13% +0.24% +0.13%] index_select random : Elapsed 0.053 ms (5.316 ms / 100) 5.310 -> 5.314 ( +0.08%) [ +0.36% +0.06% +0.00% / +0.08% +0.40% +0.23%] index_select random_sorted : Elapsed 0.053 ms (5.329 ms / 100) 5.320 -> 5.324 ( +0.08%) [ +0.08% +0.17% +0.00% / +0.21% +0.39% +0.08%] index_select perm : Elapsed 0.053 ms (5.324 ms / 100) 5.324 -> 5.330 ( +0.11%) [ +0.13% +0.00% +0.09% / +0.11% +0.26% +0.11%] index_select perm_sorted : Elapsed 0.053 ms (5.331 ms / 100) out_shape = [1, 5, 200] in_shape = [500, 5, 200] idx_dim = 0 B = [1, 5, 200] (stride (1000, 1, 5)) A = [500, 5, 200] (stride (200, 100000, 1)) dim = 0 0.518 -> 0.522 ( +0.77%) [ +6.18% +0.00% +14.48% / +3.09% +1.54% +0.77%] index_select const : Elapsed 0.005 ms (0.550 ms / 100) 0.515 -> 0.518 ( +0.58%) [ +2.14% +0.00% +1.36% / +3.50% +1.17% +0.58%] index_select wrap : Elapsed 0.005 ms (0.526 ms / 100) 0.510 -> 0.517 ( +1.37%) [ +4.90% +0.00% +1.18% / +4.31% +1.37% +1.57%] index_select linear : Elapsed 0.005 ms (0.535 ms / 100) 0.510 -> 0.520 ( +1.96%) [ +3.92% +0.00% +2.35% / +4.12% +4.12% +1.96%] index_select reverse : Elapsed 0.005 ms (0.530 ms / 100) 0.514 -> 0.511 ( -0.58%) [ +4.47% +0.00% +4.09% / +14.20% -0.58% +3.89%] index_select skip64 : Elapsed 0.005 ms (0.537 ms / 100) 0.514 -> 0.522 ( +1.56%) [ +2.92% +0.00% +2.33% / +3.50% +1.56% +3.31%] index_select skip256 : Elapsed 0.005 ms (0.529 ms / 100) 0.509 -> 0.520 ( +2.16%) [ +4.32% +0.00% +5.30% / +3.73% +8.64% +2.16%] index_select spread : Elapsed 0.005 ms (0.531 ms / 100) 0.514 -> 0.515 ( +0.19%) [ +3.50% +7.20% +0.00% / +3.50% +0.19% +2.14%] index_select strided 3 : Elapsed 0.005 ms (0.532 ms / 100) 0.517 -> 0.512 ( -0.97%) [ +2.32% +0.00% +1.74% / +2.32% -0.97% +0.97%] index_select strided 5 : Elapsed 0.005 ms (0.529 ms / 100) 0.517 -> 0.507 ( -1.93%) [ +2.51% +0.00% +0.19% / +1.93% -1.93% +1.55%] index_select strided 7 : Elapsed 0.005 ms (0.530 ms / 100) 0.523 -> 0.506 ( -3.25%) [ +1.34% +0.00% +2.10% / +0.76% -3.25% +6.31%] index_select strided 8 : Elapsed 0.005 ms (0.530 ms / 100) 0.513 -> 0.505 ( -1.56%) [ +2.34% +0.00% +10.72% / +3.70% -1.56% +0.97%] index_select strided 16 : Elapsed 0.005 ms (0.525 ms / 100) 0.513 -> 0.510 ( -0.58%) [ +5.65% +0.00% +1.75% / +3.51% -0.58% +1.95%] index_select strided 64 : Elapsed 0.005 ms (0.542 ms / 100) 0.518 -> 0.507 ( -2.12%) [ +1.54% +0.00% +7.72% / +3.28% -2.12% +1.35%] index_select strided 100 : Elapsed 0.005 ms (0.526 ms / 100) 0.508 -> 0.517 ( +1.77%) [ +6.69% +0.00% +3.15% / +5.91% +3.15% +1.77%] index_select strided 255 : Elapsed 0.005 ms (0.542 ms / 100) 0.520 -> 0.504 ( -3.08%) [ +1.35% +1.35% +0.00% / +1.73% -3.08% +0.19%] index_select strided 256 : Elapsed 0.005 ms (0.527 ms / 100) 0.513 -> 0.510 ( -0.58%) [ +3.70% +0.00% +2.53% / +5.46% -0.58% +0.78%] index_select strided 257 : Elapsed 0.005 ms (0.532 ms / 100) 0.511 -> 0.518 ( +1.37%) [ +3.52% +0.00% +2.54% / +4.11% +5.87% +1.37%] index_select random : Elapsed 0.005 ms (0.529 ms / 100) 0.511 -> 0.508 ( -0.59%) [ +4.11% +0.00% +1.96% / +4.31% -0.59% +0.98%] index_select random_sorted : Elapsed 0.005 ms (0.532 ms / 100) 0.511 -> 0.508 ( -0.59%) [ +3.91% +0.00% +1.76% / +4.89% -0.59% +1.17%] index_select perm : Elapsed 0.005 ms (0.531 ms / 100) 0.510 -> 0.512 ( +0.39%) [ +5.88% +0.00% +2.35% / +4.31% +0.39% +1.96%] index_select perm_sorted : Elapsed 0.005 ms (0.540 ms / 100) B = [1, 5, 200] (stride (200, 200, 1)) A = [500, 5, 200] (stride (1, 500, 2500)) dim = 0 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.00% +0.18% / +0.89% +0.89% +1.07%] index_select const : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.00% +1.60% / +0.71% +0.89% +0.89%] index_select wrap : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.00% +0.00% / +0.89% +0.89% +0.89%] index_select linear : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.00% +0.00% / +0.71% +0.89% +1.07%] index_select reverse : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.18% +0.00% / +0.89% +1.07% +0.89%] index_select skip64 : Elapsed 0.006 ms (0.568 ms / 100) 0.562 -> 0.568 ( +1.07%) [ +1.07% +0.36% +0.00% / +7.83% +1.07% +1.25%] index_select skip256 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.00% +0.00% / +0.89% +1.24% +0.89%] index_select spread : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.00% +0.18% / +0.89% +0.89% +0.89%] index_select strided 3 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +7.82% +0.00% / +0.89% +0.89% +0.89%] index_select strided 5 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +1.07% +0.00% +0.00% / +0.89% +0.89% +0.89%] index_select strided 7 : Elapsed 0.006 ms (0.569 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +1.07% +0.00% +0.00% / +0.89% +1.07% +1.24%] index_select strided 8 : Elapsed 0.006 ms (0.569 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.00% +7.46% / +0.89% +1.07% +1.07%] index_select strided 16 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +1.07% +0.00% +0.18% / +0.89% +1.07% +0.89%] index_select strided 64 : Elapsed 0.006 ms (0.569 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.00% +0.00% / +0.89% +1.07% +0.71%] index_select strided 100 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +6.39% +0.00% +0.18% / +0.89% +0.89% +0.89%] index_select strided 255 : Elapsed 0.006 ms (0.599 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +1.78% +0.00% +0.18% / +1.24% +0.89% +0.71%] index_select strided 256 : Elapsed 0.006 ms (0.573 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +1.07% +0.18% +0.00% / +0.71% +0.89% +0.89%] index_select strided 257 : Elapsed 0.006 ms (0.569 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.00% +0.00% / +2.31% +1.07% +0.71%] index_select random : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.00% +0.00% / +0.89% +0.89% +0.89%] index_select random_sorted : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.71% +0.89% +0.89%] index_select perm : Elapsed 0.006 ms (0.568 ms / 100) 0.564 -> 0.568 ( +0.71%) [ +0.71% +0.00% +0.00% / +0.89% +0.71% +0.71%] index_select perm_sorted : Elapsed 0.006 ms (0.568 ms / 100) B = [1, 5, 200] (stride (5, 1, 5)) A = [500, 5, 200] (stride (1000, 1, 5)) dim = 0 0.516 -> 0.515 ( -0.19%) [ +2.71% +0.00% +7.75% / +2.91% -0.19% +2.13%] index_select const : Elapsed 0.005 ms (0.530 ms / 100) 0.510 -> 0.513 ( +0.59%) [ +3.33% +0.00% +1.37% / +3.73% +0.59% +2.35%] index_select wrap : Elapsed 0.005 ms (0.527 ms / 100) 0.510 -> 0.514 ( +0.78%) [ +3.73% +0.00% +2.94% / +4.31% +0.78% +3.92%] index_select linear : Elapsed 0.005 ms (0.529 ms / 100) 0.520 -> 0.509 ( -2.12%) [ +1.54% +0.19% +0.00% / +1.73% -2.12% +1.35%] index_select reverse : Elapsed 0.005 ms (0.528 ms / 100) 0.511 -> 0.508 ( -0.59%) [ +4.11% +0.00% +2.94% / +10.57% -0.59% +2.15%] index_select skip64 : Elapsed 0.005 ms (0.532 ms / 100) 0.519 -> 0.523 ( +0.77%) [ +3.28% +2.50% +0.00% / +2.50% +4.24% +0.77%] index_select skip256 : Elapsed 0.005 ms (0.536 ms / 100) 0.518 -> 0.532 ( +2.70%) [ +2.70% +0.00% +0.19% / +2.90% +4.63% +2.70%] index_select spread : Elapsed 0.005 ms (0.532 ms / 100) 0.516 -> 0.511 ( -0.97%) [ +1.74% +0.00% +3.68% / +3.29% -0.97% +2.52%] index_select strided 3 : Elapsed 0.005 ms (0.525 ms / 100) 0.512 -> 0.514 ( +0.39%) [ +3.32% +0.00% +1.37% / +3.91% +0.39% +2.15%] index_select strided 5 : Elapsed 0.005 ms (0.529 ms / 100) 0.527 -> 0.515 ( -2.28%) [ +0.57% +0.00% +0.57% / -0.19% -2.28% -0.95%] index_select strided 7 : Elapsed 0.005 ms (0.530 ms / 100) 0.517 -> 0.524 ( +1.35%) [ +2.51% +0.00% +0.00% / +8.90% +1.35% +9.67%] index_select strided 8 : Elapsed 0.005 ms (0.530 ms / 100) 0.528 -> 0.514 ( -2.65%) [ +0.38% +0.00% +6.44% / -0.38% -2.65% +0.00%] index_select strided 16 : Elapsed 0.005 ms (0.530 ms / 100) 0.515 -> 0.514 ( -0.19%) [ +3.30% +0.00% +3.30% / +3.11% -0.19% +1.55%] index_select strided 64 : Elapsed 0.005 ms (0.532 ms / 100) 0.521 -> 0.514 ( -1.34%) [ +2.88% +0.38% +0.00% / +6.72% -1.34% +3.26%] index_select strided 100 : Elapsed 0.005 ms (0.536 ms / 100) 0.518 -> 0.525 ( +1.35%) [ +2.51% +0.00% +0.00% / +10.04% +1.35% +1.35%] index_select strided 255 : Elapsed 0.005 ms (0.531 ms / 100) 0.514 -> 0.512 ( -0.39%) [ +5.64% +0.00% +0.78% / +11.48% -0.39% +2.33%] index_select strided 256 : Elapsed 0.005 ms (0.543 ms / 100) 0.515 -> 0.519 ( +0.78%) [ +2.72% +0.00% +0.97% / +3.11% +0.78% +1.55%] index_select strided 257 : Elapsed 0.005 ms (0.529 ms / 100) 0.530 -> 0.522 ( -1.51%) [ +0.19% +0.38% +0.00% / +0.94% +2.64% -1.51%] index_select random : Elapsed 0.005 ms (0.531 ms / 100) 0.516 -> 0.515 ( -0.19%) [ +2.13% +0.00% +0.78% / +3.68% -0.19% +1.55%] index_select random_sorted : Elapsed 0.005 ms (0.527 ms / 100) 0.519 -> 0.514 ( -0.96%) [ +2.31% +0.39% +0.00% / +4.62% -0.96% +0.77%] index_select perm : Elapsed 0.005 ms (0.531 ms / 100) 0.514 -> 0.517 ( +0.58%) [ +3.31% +0.00% +1.56% / +4.09% +0.58% +1.75%] index_select perm_sorted : Elapsed 0.005 ms (0.531 ms / 100) B = [1, 5, 200] (stride (1, 1, 5)) dim = 0 fill_cnt = 500 21.694 -> 22.041 ( +1.60%) [ +1.17% +0.01% +0.00% / +1.60% +1.64% +1.64%] index_fill_ const : Elapsed 0.219 ms (21.947 ms / 100) 21.703 -> 22.062 ( +1.65%) [ +1.17% +0.01% +0.00% / +1.65% +1.69% +1.69%] index_fill_ linear : Elapsed 0.220 ms (21.956 ms / 100) 21.701 -> 22.093 ( +1.81%) [ +1.15% +0.00% +0.00% / +1.83% +1.81% +1.81%] index_fill_ reverse : Elapsed 0.220 ms (21.951 ms / 100) 21.744 -> 22.100 ( +1.64%) [ +1.16% +0.03% +0.00% / +1.66% +1.64% +1.64%] index_fill_ skip64 : Elapsed 0.220 ms (21.996 ms / 100) 21.713 -> 22.068 ( +1.63%) [ +1.17% +0.02% +0.00% / +1.63% +1.68% +1.68%] index_fill_ skip256 : Elapsed 0.220 ms (21.968 ms / 100) 21.737 -> 22.086 ( +1.61%) [ +1.16% +0.01% +0.00% / +1.61% +1.64% +1.62%] index_fill_ spread : Elapsed 0.220 ms (21.989 ms / 100) 21.695 -> 22.081 ( +1.78%) [ +1.15% +0.00% +0.00% / +1.78% +1.84% +1.84%] index_fill_ random : Elapsed 0.219 ms (21.945 ms / 100) 21.700 -> 22.072 ( +1.71%) [ +1.17% +0.01% +0.00% / +1.71% +1.80% +1.78%] index_fill_ random_sorted : Elapsed 0.220 ms (21.954 ms / 100) out_shape = [500, 1, 200] in_shape = [500, 5, 200] idx_dim = 1 B = [500, 1, 200] (stride (200, 100000, 1)) A = [500, 5, 200] (stride (1, 500, 2500)) dim = 1 5.065 -> 5.080 ( +0.30%) [ +0.34% +0.00% +0.16% / +0.30% +0.81% +0.93%] index_select const : Elapsed 0.051 ms (5.082 ms / 100) 5.070 -> 5.075 ( +0.10%) [ +0.26% +0.00% +0.16% / +0.10% +0.91% +0.99%] index_select wrap : Elapsed 0.051 ms (5.083 ms / 100) 5.068 -> 5.083 ( +0.30%) [ +0.20% +0.00% +0.16% / +0.30% +0.69% +0.83%] index_select linear : Elapsed 0.051 ms (5.078 ms / 100) 5.074 -> 5.071 ( -0.06%) [ +0.00% +0.20% +0.08% / -0.06% +0.65% +0.57%] index_select reverse : Elapsed 0.051 ms (5.074 ms / 100) 5.075 -> 5.074 ( -0.02%) [ +0.10% +0.00% +0.00% / -0.02% +0.77% +0.81%] index_select skip64 : Elapsed 0.051 ms (5.080 ms / 100) 5.079 -> 5.064 ( -0.30%) [ +0.02% +0.04% +0.00% / -0.30% +0.71% +0.63%] index_select skip256 : Elapsed 0.051 ms (5.080 ms / 100) 5.073 -> 5.084 ( +0.22%) [ +0.18% +0.10% +0.00% / +0.22% +0.67% +0.61%] index_select spread : Elapsed 0.051 ms (5.082 ms / 100) 5.075 -> 5.070 ( -0.10%) [ +0.20% +0.04% +0.00% / -0.10% +0.69% +0.61%] index_select strided 3 : Elapsed 0.051 ms (5.085 ms / 100) 5.070 -> 5.069 ( -0.02%) [ +0.06% +0.00% +0.12% / -0.02% +0.87% +0.45%] index_select random : Elapsed 0.051 ms (5.073 ms / 100) 5.064 -> 5.074 ( +0.20%) [ +0.00% +0.26% +0.04% / +0.20% +1.03% +1.01%] index_select random_sorted : Elapsed 0.051 ms (5.064 ms / 100) 5.080 -> 5.069 ( -0.22%) [ +0.28% +0.37% +0.00% / +0.33% +0.10% -0.22%] index_select perm : Elapsed 0.051 ms (5.094 ms / 100) 5.087 -> 5.068 ( -0.37%) [ +0.37% +0.00% +0.10% / +0.31% -0.37% -0.31%] index_select perm_sorted : Elapsed 0.051 ms (5.106 ms / 100) B = [500, 1, 200] (stride (1, 100000, 500)) A = [500, 5, 200] (stride (1000, 200, 1)) dim = 1 5.153 -> 5.172 ( +0.37%) [ +0.27% +0.00% +0.00% / +0.39% +0.62% +0.37%] index_select const : Elapsed 0.052 ms (5.167 ms / 100) 5.162 -> 5.169 ( +0.14%) [ +0.00% +0.02% +0.04% / +0.14% +0.21% +0.17%] index_select wrap : Elapsed 0.052 ms (5.162 ms / 100) 5.157 -> 5.165 ( +0.16%) [ +0.16% +0.16% +0.00% / +0.31% +0.16% +0.43%] index_select linear : Elapsed 0.052 ms (5.165 ms / 100) 5.167 -> 5.167 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.15%] index_select reverse : Elapsed 0.052 ms (5.167 ms / 100) 5.157 -> 5.170 ( +0.25%) [ +0.33% +0.00% +0.02% / +0.33% +0.25% +0.39%] index_select skip64 : Elapsed 0.052 ms (5.174 ms / 100) 5.157 -> 5.160 ( +0.06%) [ +0.00% +0.19% +0.12% / +0.06% +0.16% +0.43%] index_select skip256 : Elapsed 0.052 ms (5.157 ms / 100) 5.155 -> 5.167 ( +0.23%) [ +0.35% +0.27% +0.00% / +0.31% +0.23% +0.25%] index_select spread : Elapsed 0.052 ms (5.173 ms / 100) 5.159 -> 5.160 ( +0.02%) [ +0.25% +0.06% +0.00% / +0.02% +0.17% +0.16%] index_select strided 3 : Elapsed 0.052 ms (5.172 ms / 100) 5.166 -> 5.166 ( +0.00%) [ +0.21% +0.00% +0.00% / +0.00% +0.50% +0.29%] index_select random : Elapsed 0.052 ms (5.177 ms / 100) 5.163 -> 5.175 ( +0.23%) [ +0.12% +0.00% +0.02% / +0.23% +0.41% +0.45%] index_select random_sorted : Elapsed 0.052 ms (5.169 ms / 100) 5.159 -> 5.159 ( +0.00%) [ +0.00% +0.00% +0.27% / +0.62% +0.08% +0.00%] index_select perm : Elapsed 0.052 ms (5.159 ms / 100) 5.159 -> 5.161 ( +0.04%) [ +0.37% +0.23% +0.00% / +0.16% +0.25% +0.04%] index_select perm_sorted : Elapsed 0.052 ms (5.178 ms / 100) B = [500, 1, 200] (stride (1, 1, 500)) A = [500, 5, 200] (stride (5, 1, 2500)) dim = 1 6.180 -> 6.173 ( -0.11%) [ +0.00% +0.28% +0.10% / +0.21% -0.11% +0.03%] index_select const : Elapsed 0.062 ms (6.180 ms / 100) 6.189 -> 6.168 ( -0.34%) [ +0.13% +0.13% +0.00% / +0.10% -0.34% -0.21%] index_select wrap : Elapsed 0.062 ms (6.197 ms / 100) 6.165 -> 6.182 ( +0.28%) [ +0.83% +0.00% +0.47% / +0.39% +0.28% +0.50%] index_select linear : Elapsed 0.062 ms (6.216 ms / 100) 6.176 -> 6.180 ( +0.06%) [ +0.21% +0.03% +0.00% / +0.10% +0.06% +0.16%] index_select reverse : Elapsed 0.062 ms (6.189 ms / 100) 6.159 -> 6.177 ( +0.29%) [ +0.26% +0.00% +0.42% / +0.37% +0.29% +0.42%] index_select skip64 : Elapsed 0.062 ms (6.175 ms / 100) 6.168 -> 6.176 ( +0.13%) [ +0.55% +0.00% +0.21% / +0.24% +0.13% +0.19%] index_select skip256 : Elapsed 0.062 ms (6.202 ms / 100) 6.181 -> 6.184 ( +0.05%) [ +0.16% +0.00% +0.03% / +0.23% +0.05% +0.15%] index_select spread : Elapsed 0.062 ms (6.191 ms / 100) 6.179 -> 6.168 ( -0.18%) [ +0.24% +0.40% +0.00% / +0.00% -0.18% +0.02%] index_select strided 3 : Elapsed 0.062 ms (6.194 ms / 100) 6.172 -> 6.182 ( +0.16%) [ +0.00% +0.11% +0.52% / +0.41% +0.16% +0.32%] index_select random : Elapsed 0.062 ms (6.172 ms / 100) 6.173 -> 6.176 ( +0.05%) [ +0.39% +0.00% +0.10% / +0.05% +0.49% +0.18%] index_select random_sorted : Elapsed 0.062 ms (6.197 ms / 100) 6.172 -> 6.171 ( -0.02%) [ +0.24% +0.00% +0.03% / -0.02% +0.49% +0.63%] index_select perm : Elapsed 0.062 ms (6.187 ms / 100) 6.172 -> 6.173 ( +0.02%) [ +0.26% +0.00% +0.15% / +0.26% +0.02% +0.31%] index_select perm_sorted : Elapsed 0.062 ms (6.188 ms / 100) B = [500, 1, 200] (stride (1, 500, 500)) A = [500, 5, 200] (stride (1000, 200, 1)) dim = 1 5.165 -> 5.162 ( -0.06%) [ +0.08% +0.06% +0.00% / +0.08% -0.06% +0.19%] index_select const : Elapsed 0.052 ms (5.169 ms / 100) 5.160 -> 5.166 ( +0.12%) [ +0.02% +0.25% +0.00% / +0.12% +0.19% +0.12%] index_select wrap : Elapsed 0.052 ms (5.161 ms / 100) 5.160 -> 5.169 ( +0.17%) [ +0.16% +0.08% +0.00% / +0.47% +0.31% +0.17%] index_select linear : Elapsed 0.052 ms (5.168 ms / 100) 5.165 -> 5.168 ( +0.06%) [ +0.14% +0.08% +0.00% / +0.14% +0.15% +0.06%] index_select reverse : Elapsed 0.052 ms (5.172 ms / 100) 5.155 -> 5.156 ( +0.02%) [ +0.25% +0.21% +0.00% / +0.33% +0.02% +0.27%] index_select skip64 : Elapsed 0.052 ms (5.168 ms / 100) 5.158 -> 5.168 ( +0.19%) [ +0.08% +0.10% +0.00% / +0.19% +0.29% +0.19%] index_select skip256 : Elapsed 0.052 ms (5.162 ms / 100) 5.168 -> 5.165 ( -0.06%) [ +0.06% +0.00% +0.02% / -0.06% -0.04% +0.02%] index_select spread : Elapsed 0.052 ms (5.171 ms / 100) 5.156 -> 5.158 ( +0.04%) [ +0.00% +0.04% +0.33% / +0.04% +0.48% +0.60%] index_select strided 3 : Elapsed 0.052 ms (5.156 ms / 100) 5.158 -> 5.154 ( -0.08%) [ +0.19% +0.39% +0.00% / -0.08% +0.29% +0.10%] index_select random : Elapsed 0.052 ms (5.168 ms / 100) 5.159 -> 5.167 ( +0.16%) [ +0.29% +0.43% +0.00% / +0.19% +0.25% +0.16%] index_select random_sorted : Elapsed 0.052 ms (5.174 ms / 100) 5.185 -> 5.187 ( +0.04%) [ +0.12% +0.00% +0.23% / +0.04% +0.23% +0.12%] index_select perm : Elapsed 0.052 ms (5.191 ms / 100) 5.191 -> 5.189 ( -0.04%) [ +0.23% +0.00% +0.10% / +0.06% -0.04% +0.21%] index_select perm_sorted : Elapsed 0.052 ms (5.203 ms / 100) out_shape = [500, 5, 1] in_shape = [500, 5, 200] idx_dim = 2 B = [500, 5, 1] (stride (5, 1, 1)) A = [500, 5, 200] (stride (200, 100000, 1)) dim = 2 0.881 -> 0.873 ( -0.91%) [ +0.11% +0.00% +0.23% / +0.23% -0.68% -0.91%] index_select const : Elapsed 0.009 ms (0.882 ms / 100) 0.885 -> 0.882 ( -0.34%) [ +0.56% +0.00% +0.00% / +0.23% -0.34% -0.34%] index_select wrap : Elapsed 0.009 ms (0.890 ms / 100) 0.868 -> 0.874 ( +0.69%) [ +0.46% +0.00% +0.35% / +0.69% +0.69% +0.92%] index_select linear : Elapsed 0.009 ms (0.872 ms / 100) 0.879 -> 0.879 ( +0.00%) [ +0.11% +0.00% +0.68% / +1.48% +0.34% +0.00%] index_select reverse : Elapsed 0.009 ms (0.880 ms / 100) 0.868 -> 0.873 ( +0.58%) [ +0.35% +0.12% +0.00% / +0.58% +1.73% +1.27%] index_select skip64 : Elapsed 0.009 ms (0.871 ms / 100) 0.867 -> 0.870 ( +0.35%) [ +0.58% +0.23% +0.00% / +0.35% +1.50% +1.85%] index_select skip256 : Elapsed 0.009 ms (0.872 ms / 100) 0.867 -> 0.871 ( +0.46%) [ +0.23% +0.00% +0.23% / +0.46% +1.50% +1.96%] index_select spread : Elapsed 0.009 ms (0.869 ms / 100) 0.878 -> 0.872 ( -0.68%) [ +0.57% +0.00% +0.11% / +0.34% -0.68% +0.23%] index_select strided 3 : Elapsed 0.009 ms (0.883 ms / 100) 0.877 -> 0.881 ( +0.46%) [ +0.34% +0.23% +0.00% / +0.46% +0.68% +0.57%] index_select strided 5 : Elapsed 0.009 ms (0.880 ms / 100) 0.877 -> 0.881 ( +0.46%) [ +0.00% +0.00% +0.46% / +0.46% +0.91% +0.57%] index_select strided 7 : Elapsed 0.009 ms (0.877 ms / 100) 0.878 -> 0.880 ( +0.23%) [ +0.00% +0.11% +0.23% / +0.23% +0.57% +0.57%] index_select strided 8 : Elapsed 0.009 ms (0.878 ms / 100) 0.869 -> 0.870 ( +0.12%) [ +0.12% +0.00% +0.46% / +0.12% +1.73% +1.50%] index_select strided 16 : Elapsed 0.009 ms (0.870 ms / 100) 0.868 -> 0.873 ( +0.58%) [ +0.92% +0.00% +0.46% / +0.58% +1.61% +1.84%] index_select strided 64 : Elapsed 0.009 ms (0.876 ms / 100) 0.871 -> 0.872 ( +0.11%) [ +0.00% +0.00% +0.11% / +0.11% +1.61% +1.72%] index_select strided 100 : Elapsed 0.009 ms (0.871 ms / 100) 0.863 -> 0.868 ( +0.58%) [ +0.23% +0.00% +0.23% / +0.58% +1.39% +1.51%] index_select random : Elapsed 0.009 ms (0.865 ms / 100) 0.860 -> 0.863 ( +0.35%) [ +1.05% +0.35% +0.00% / +0.35% +2.33% +2.21%] index_select random_sorted : Elapsed 0.009 ms (0.869 ms / 100) 0.881 -> 0.880 ( -0.11%) [ +0.34% +0.11% +0.00% / +0.23% -0.11% +0.34%] index_select perm : Elapsed 0.009 ms (0.884 ms / 100) 0.879 -> 0.875 ( -0.46%) [ +0.34% +0.00% +0.46% / +0.57% -0.46% -0.46%] index_select perm_sorted : Elapsed 0.009 ms (0.882 ms / 100) B = [500, 5, 1] (stride (1, 500, 500)) A = [500, 5, 200] (stride (200, 100000, 1)) dim = 2 0.523 -> 0.521 ( -0.38%) [ +1.53% +0.00% +6.88% / +3.25% -0.38% +1.34%] index_select const : Elapsed 0.005 ms (0.531 ms / 100) 0.527 -> 0.521 ( -1.14%) [ +0.00% +0.76% +29.41% / +0.57% -1.14% +0.00%] index_select wrap : Elapsed 0.005 ms (0.527 ms / 100) 0.507 -> 0.520 ( +2.56%) [ +3.75% +0.00% +12.03% / +4.34% +2.56% +3.35%] index_select linear : Elapsed 0.005 ms (0.526 ms / 100) 0.510 -> 0.515 ( +0.98%) [ +3.33% +0.00% +7.06% / +17.84% +0.98% +3.92%] index_select reverse : Elapsed 0.005 ms (0.527 ms / 100) 0.519 -> 0.523 ( +0.77%) [ +5.39% +0.00% +0.96% / +2.89% +1.54% +0.77%] index_select skip64 : Elapsed 0.005 ms (0.547 ms / 100) 0.511 -> 0.524 ( +2.54%) [ +2.35% +0.00% +3.13% / +3.52% +14.09% +2.54%] index_select skip256 : Elapsed 0.005 ms (0.523 ms / 100) 0.516 -> 0.519 ( +0.58%) [ +2.91% +0.00% +1.94% / +3.68% +0.58% +2.33%] index_select spread : Elapsed 0.005 ms (0.531 ms / 100) 0.519 -> 0.518 ( -0.19%) [ +2.12% +0.00% +1.35% / +2.70% -0.19% +1.16%] index_select strided 3 : Elapsed 0.005 ms (0.530 ms / 100) 0.521 -> 0.517 ( -0.77%) [ +4.22% +0.19% +0.00% / +5.18% -0.77% +8.64%] index_select strided 5 : Elapsed 0.005 ms (0.543 ms / 100) 0.525 -> 0.521 ( -0.76%) [ +0.00% +13.71% +0.00% / +1.52% -0.76% +0.00%] index_select strided 7 : Elapsed 0.005 ms (0.525 ms / 100) 0.518 -> 0.518 ( +0.00%) [ +2.90% +0.00% +11.20% / +2.90% +0.00% +0.39%] index_select strided 8 : Elapsed 0.005 ms (0.533 ms / 100) 0.515 -> 0.517 ( +0.39%) [ +3.30% +0.00% +13.40% / +3.88% +0.39% +2.33%] index_select strided 16 : Elapsed 0.005 ms (0.532 ms / 100) 0.515 -> 0.518 ( +0.58%) [ +3.11% +0.00% +13.01% / +3.69% +0.58% +0.97%] index_select strided 64 : Elapsed 0.005 ms (0.531 ms / 100) 0.511 -> 0.520 ( +1.76%) [ +5.68% +0.00% +9.39% / +4.70% +1.76% +1.96%] index_select strided 100 : Elapsed 0.005 ms (0.540 ms / 100) 0.511 -> 0.518 ( +1.37%) [ +6.65% +6.26% +0.00% / +20.16% +1.37% +2.35%] index_select random : Elapsed 0.005 ms (0.545 ms / 100) 0.516 -> 0.521 ( +0.97%) [ +1.74% +16.47% +0.00% / +3.88% +2.71% +0.97%] index_select random_sorted : Elapsed 0.005 ms (0.525 ms / 100) 0.516 -> 0.525 ( +1.74%) [ +2.91% +1.74% +0.00% / +4.65% +11.43% +1.74%] index_select perm : Elapsed 0.005 ms (0.531 ms / 100) 0.515 -> 0.520 ( +0.97%) [ +2.91% +0.00% +0.58% / +5.05% +2.52% +0.97%] index_select perm_sorted : Elapsed 0.005 ms (0.530 ms / 100) B = [500, 5, 1] (stride (5, 1, 2500)) A = [500, 5, 200] (stride (1, 500, 2500)) dim = 2 0.830 -> 0.829 ( -0.12%) [ +0.48% +0.48% +0.00% / +0.60% +0.00% -0.12%] index_select const : Elapsed 0.008 ms (0.834 ms / 100) 0.829 -> 0.825 ( -0.48%) [ +0.60% +0.36% +0.00% / +0.60% +0.00% -0.48%] index_select wrap : Elapsed 0.008 ms (0.834 ms / 100) 0.830 -> 0.827 ( -0.36%) [ +0.48% +0.12% +0.00% / +0.72% -0.24% -0.36%] index_select linear : Elapsed 0.008 ms (0.834 ms / 100) 0.832 -> 0.828 ( -0.48%) [ +0.36% +0.00% +0.00% / +0.24% +0.00% -0.48%] index_select reverse : Elapsed 0.008 ms (0.835 ms / 100) 0.831 -> 0.827 ( -0.48%) [ +0.36% +0.00% +0.00% / +0.24% -0.48% -0.48%] index_select skip64 : Elapsed 0.008 ms (0.834 ms / 100) 0.831 -> 0.829 ( -0.24%) [ +0.36% +0.00% +0.12% / +0.12% -0.24% -0.12%] index_select skip256 : Elapsed 0.008 ms (0.834 ms / 100) 0.831 -> 0.827 ( -0.48%) [ +0.60% +0.00% +0.24% / +0.36% -0.36% -0.48%] index_select spread : Elapsed 0.008 ms (0.836 ms / 100) 0.831 -> 0.825 ( -0.72%) [ +0.12% +0.12% +0.00% / +0.00% -0.48% -0.72%] index_select strided 3 : Elapsed 0.008 ms (0.832 ms / 100) 0.830 -> 0.829 ( -0.12%) [ +0.48% +0.00% +0.12% / +0.36% -0.12% -0.12%] index_select strided 5 : Elapsed 0.008 ms (0.834 ms / 100) 0.829 -> 0.831 ( +0.24%) [ +0.24% +0.00% +0.24% / +0.48% +0.24% +0.48%] index_select strided 7 : Elapsed 0.008 ms (0.831 ms / 100) 0.828 -> 0.832 ( +0.48%) [ +0.85% +0.00% +0.60% / +0.72% +0.48% +0.48%] index_select strided 8 : Elapsed 0.008 ms (0.835 ms / 100) 0.830 -> 0.827 ( -0.36%) [ +0.36% +0.00% +0.36% / +0.48% -0.36% +0.12%] index_select strided 16 : Elapsed 0.008 ms (0.833 ms / 100) 0.831 -> 0.832 ( +0.12%) [ +0.12% +0.00% +0.00% / +0.12% +0.96% +0.72%] index_select strided 64 : Elapsed 0.008 ms (0.832 ms / 100) 0.830 -> 0.834 ( +0.48%) [ +0.48% +0.00% +0.12% / +0.48% +0.60% +0.48%] index_select strided 100 : Elapsed 0.008 ms (0.834 ms / 100) 0.817 -> 0.822 ( +0.61%) [ +0.37% +0.00% +0.49% / +0.61% +1.35% +1.59%] index_select random : Elapsed 0.008 ms (0.820 ms / 100) 0.815 -> 0.824 ( +1.10%) [ +0.86% +0.00% +0.37% / +1.10% +2.21% +1.72%] index_select random_sorted : Elapsed 0.008 ms (0.822 ms / 100) 0.821 -> 0.826 ( +0.61%) [ +0.61% +0.61% +0.00% / +0.61% +1.34% +1.58%] index_select perm : Elapsed 0.008 ms (0.826 ms / 100) 0.821 -> 0.826 ( +0.61%) [ +0.49% +0.37% +0.00% / +0.73% +0.73% +0.61%] index_select perm_sorted : Elapsed 0.008 ms (0.825 ms / 100) out_shape = [1, 200, 5] in_shape = [500, 200, 5] idx_dim = 0 B = [1, 200, 5] (stride (5, 5, 1)) A = [500, 200, 5] (stride (1000, 5, 1)) dim = 0 0.514 -> 0.516 ( +0.39%) [ +4.09% +0.97% +0.00% / +3.31% +0.39% +9.92%] index_select const : Elapsed 0.005 ms (0.535 ms / 100) 0.515 -> 0.512 ( -0.58%) [ +3.69% +0.00% +7.18% / +3.30% -0.58% +15.73%] index_select wrap : Elapsed 0.005 ms (0.534 ms / 100) 0.513 -> 0.515 ( +0.39%) [ +4.48% +0.00% +8.77% / +3.70% +0.39% +1.56%] index_select linear : Elapsed 0.005 ms (0.536 ms / 100) 0.521 -> 0.514 ( -1.34%) [ +2.88% +0.00% +2.69% / +2.11% -1.34% +0.77%] index_select reverse : Elapsed 0.005 ms (0.536 ms / 100) 0.516 -> 0.521 ( +0.97%) [+10.27% +0.00% +53.10% / +3.10% +0.97% +2.13%] index_select skip64 : Elapsed 0.006 ms (0.569 ms / 100) 0.530 -> 0.519 ( -2.08%) [+11.32% +0.00% +3.77% / -0.75% -2.08% -0.94%] index_select skip256 : Elapsed 0.006 ms (0.590 ms / 100) 0.525 -> 0.520 ( -0.95%) [ +1.14% +0.00% +4.95% / +1.33% -0.95% -0.19%] index_select spread : Elapsed 0.005 ms (0.531 ms / 100) 0.521 -> 0.520 ( -0.19%) [ +2.11% +0.00% +4.80% / +1.15% -0.19% +1.92%] index_select strided 3 : Elapsed 0.005 ms (0.532 ms / 100) 0.533 -> 0.520 ( -2.44%) [ +0.00% +0.00% +11.26% / +10.51% -2.44% -1.50%] index_select strided 5 : Elapsed 0.005 ms (0.533 ms / 100) 0.508 -> 0.523 ( +2.95%) [ +4.72% +2.76% +0.00% / +14.76% +2.95% +5.91%] index_select strided 7 : Elapsed 0.005 ms (0.532 ms / 100) 0.514 -> 0.522 ( +1.56%) [ +7.20% +0.39% +0.00% / +2.92% +1.56% +1.75%] index_select strided 8 : Elapsed 0.006 ms (0.551 ms / 100) 0.515 -> 0.525 ( +1.94%) [+10.29% +0.00% +9.13% / +2.14% +6.99% +1.94%] index_select strided 16 : Elapsed 0.006 ms (0.568 ms / 100) 0.517 -> 0.518 ( +0.19%) [ +3.48% +0.19% +0.00% / +3.09% +0.19% +0.58%] index_select strided 64 : Elapsed 0.005 ms (0.535 ms / 100) 0.515 -> 0.521 ( +1.17%) [ +2.91% +0.00% +5.24% / +2.33% +1.17% +1.94%] index_select strided 100 : Elapsed 0.005 ms (0.530 ms / 100) 0.519 -> 0.519 ( +0.00%) [ +2.89% +0.00% +8.29% / +5.39% +0.00% +8.09%] index_select strided 255 : Elapsed 0.005 ms (0.534 ms / 100) 0.515 -> 0.521 ( +1.17%) [ +4.47% +0.78% +0.00% / +2.91% +1.17% +1.94%] index_select strided 256 : Elapsed 0.005 ms (0.538 ms / 100) 0.517 -> 0.525 ( +1.55%) [ +2.32% +0.58% +0.00% / +3.29% +1.55% +1.74%] index_select strided 257 : Elapsed 0.005 ms (0.529 ms / 100) 0.520 -> 0.518 ( -0.38%) [ +2.31% +0.00% +5.96% / +3.08% -0.38% +0.58%] index_select random : Elapsed 0.005 ms (0.532 ms / 100) 0.513 -> 0.520 ( +1.36%) [ +3.12% +0.00% +14.04% / +4.29% +3.31% +1.36%] index_select random_sorted : Elapsed 0.005 ms (0.529 ms / 100) 0.515 -> 0.519 ( +0.78%) [ +3.69% +0.00% +0.39% / +3.50% +0.78% +3.11%] index_select perm : Elapsed 0.005 ms (0.534 ms / 100) 0.515 -> 0.519 ( +0.78%) [ +3.11% +0.00% +2.14% / +3.11% +0.78% +1.94%] index_select perm_sorted : Elapsed 0.005 ms (0.531 ms / 100) B = [1, 200, 5] (stride (5, 5, 1)) A = [500, 200, 5] (stride (1, 500, 100000)) dim = 0 0.564 -> 0.568 ( +0.71%) [ +0.89% +0.00% +0.00% / +3.01% +6.38% +0.71%] index_select const : Elapsed 0.006 ms (0.569 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.00% +0.18% / +0.89% +0.71% +0.89%] index_select wrap : Elapsed 0.006 ms (0.568 ms / 100) 0.564 -> 0.568 ( +0.71%) [ +0.89% +0.00% +0.18% / +0.71% +0.71% +0.71%] index_select linear : Elapsed 0.006 ms (0.569 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +1.07% +0.00% +0.00% / +0.89% +1.42% +0.71%] index_select reverse : Elapsed 0.006 ms (0.569 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.71% +1.07% +0.89%] index_select skip64 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +1.07% +0.00% +0.18% / +0.89% +0.89% +4.26%] index_select skip256 : Elapsed 0.006 ms (0.569 ms / 100) 0.564 -> 0.568 ( +0.71%) [ +0.71% +0.00% +0.00% / +6.91% +0.89% +0.71%] index_select spread : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.00% +5.51% / +0.71% +0.89% +0.89%] index_select strided 3 : Elapsed 0.006 ms (0.568 ms / 100) 0.564 -> 0.569 ( +0.89%) [ +0.71% +0.00% +0.00% / +0.89% +0.89% +0.89%] index_select strided 5 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.00% +0.18% / +0.89% +1.07% +0.89%] index_select strided 7 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.18% +0.00% / +0.89% +1.07% +0.89%] index_select strided 8 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.00% +0.71% / +6.22% +0.89% +0.89%] index_select strided 16 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.567 ( +0.71%) [ +0.89% +0.00% +0.00% / +0.89% +0.71% +0.89%] index_select strided 64 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +0.89% +0.18% +0.00% / +0.89% +1.07% +0.89%] index_select strided 100 : Elapsed 0.006 ms (0.568 ms / 100) 0.565 -> 0.567 ( +0.35%) [ +0.53% +6.02% +0.00% / +0.53% +4.07% +0.35%] index_select strided 255 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +1.07% +0.00% +0.18% / +0.89% +1.07% +1.07%] index_select strided 256 : Elapsed 0.006 ms (0.569 ms / 100) 0.564 -> 0.568 ( +0.71%) [ +0.71% +0.00% +0.00% / +0.71% +0.89% +7.09%] index_select strided 257 : Elapsed 0.006 ms (0.568 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +1.07% +0.00% +0.18% / +0.89% +0.89% +0.89%] index_select random : Elapsed 0.006 ms (0.569 ms / 100) 0.564 -> 0.568 ( +0.71%) [ +2.48% +0.00% +0.18% / +0.71% +0.71% +0.71%] index_select random_sorted : Elapsed 0.006 ms (0.578 ms / 100) 0.563 -> 0.568 ( +0.89%) [ +1.07% +0.00% +0.18% / +0.89% +0.89% +0.89%] index_select perm : Elapsed 0.006 ms (0.569 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +1.07% +0.18% +0.00% / +1.07% +1.42% +0.89%] index_select perm_sorted : Elapsed 0.006 ms (0.568 ms / 100) B = [1, 200, 5] (stride (1, 1, 200)) dim = 0 fill_cnt = 500 21.690 -> 22.042 ( +1.62%) [ +1.17% +0.01% +0.00% / +1.62% +1.66% +1.65%] index_fill_ const : Elapsed 0.219 ms (21.943 ms / 100) 21.704 -> 22.063 ( +1.65%) [ +1.17% +0.02% +0.00% / +1.65% +1.68% +1.68%] index_fill_ linear : Elapsed 0.220 ms (21.957 ms / 100) 21.698 -> 22.093 ( +1.82%) [ +1.15% +0.00% +0.00% / +1.85% +1.82% +1.84%] index_fill_ reverse : Elapsed 0.219 ms (21.948 ms / 100) 21.739 -> 22.093 ( +1.63%) [ +1.16% +0.01% +0.00% / +1.68% +1.63% +1.63%] index_fill_ skip64 : Elapsed 0.220 ms (21.991 ms / 100) 21.713 -> 22.069 ( +1.64%) [ +1.18% +0.02% +0.00% / +1.64% +1.68% +1.71%] index_fill_ skip256 : Elapsed 0.220 ms (21.969 ms / 100) 21.732 -> 22.087 ( +1.63%) [ +1.17% +0.00% +0.00% / +1.63% +1.65% +1.65%] index_fill_ spread : Elapsed 0.220 ms (21.986 ms / 100) 21.691 -> 22.087 ( +1.83%) [ +1.17% +0.00% +0.00% / +1.83% +1.83% +1.83%] index_fill_ random : Elapsed 0.219 ms (21.944 ms / 100) 21.699 -> 22.071 ( +1.71%) [ +1.18% +0.03% +0.00% / +1.71% +1.77% +1.78%] index_fill_ random_sorted : Elapsed 0.220 ms (21.954 ms / 100) out_shape = [500, 1, 5] in_shape = [500, 200, 5] idx_dim = 1 B = [500, 1, 5] (stride (5, 5, 1)) A = [500, 200, 5] (stride (1, 500, 100000)) dim = 1 0.523 -> 0.516 ( -1.34%) [ +2.68% +0.38% +0.00% / +2.10% -1.34% -1.34%] index_select const : Elapsed 0.005 ms (0.537 ms / 100) 0.514 -> 0.515 ( +0.19%) [ +3.70% +0.00% +0.58% / +3.31% +1.17% +0.19%] index_select wrap : Elapsed 0.005 ms (0.533 ms / 100) 0.518 -> 0.515 ( -0.58%) [ +2.12% +0.00% +0.19% / +2.70% -0.58% +0.58%] index_select linear : Elapsed 0.005 ms (0.529 ms / 100) 0.518 -> 0.519 ( +0.19%) [ +2.32% +0.39% +0.00% / +3.28% +4.63% +0.19%] index_select reverse : Elapsed 0.005 ms (0.530 ms / 100) 0.512 -> 0.525 ( +2.54%) [ +3.32% +0.00% +0.98% / +4.88% +2.54% +3.52%] index_select skip64 : Elapsed 0.005 ms (0.529 ms / 100) 0.517 -> 0.524 ( +1.35%) [ +2.32% +0.00% +0.97% / +4.64% +1.35% +13.93%] index_select skip256 : Elapsed 0.005 ms (0.529 ms / 100) 0.514 -> 0.519 ( +0.97%) [ +1.95% +0.19% +0.00% / +6.03% +1.95% +0.97%] index_select spread : Elapsed 0.005 ms (0.524 ms / 100) 0.515 -> 0.513 ( -0.39%) [ +1.75% +0.00% +1.75% / +3.50% -0.39% +0.97%] index_select strided 3 : Elapsed 0.005 ms (0.524 ms / 100) 0.517 -> 0.516 ( -0.19%) [ +1.74% +0.00% +0.39% / +2.13% -0.19% +6.19%] index_select strided 5 : Elapsed 0.005 ms (0.526 ms / 100) 0.519 -> 0.517 ( -0.39%) [ +1.73% +0.00% +16.76% / +2.89% -0.39% +0.19%] index_select strided 7 : Elapsed 0.005 ms (0.528 ms / 100) 0.517 -> 0.520 ( +0.58%) [ +4.64% +0.00% +2.71% / +3.68% +0.58% +0.58%] index_select strided 8 : Elapsed 0.005 ms (0.541 ms / 100) 0.517 -> 0.522 ( +0.97%) [ +2.90% +0.00% +1.16% / +16.05% +4.45% +0.97%] index_select strided 16 : Elapsed 0.005 ms (0.532 ms / 100) 0.519 -> 0.520 ( +0.19%) [ +1.73% +0.39% +0.00% / +6.17% +0.39% +0.19%] index_select strided 64 : Elapsed 0.005 ms (0.528 ms / 100) 0.515 -> 0.531 ( +3.11%) [ +2.33% +0.00% +1.36% / +3.11% +5.83% +5.05%] index_select strided 100 : Elapsed 0.005 ms (0.527 ms / 100) 0.517 -> 0.517 ( +0.00%) [ +2.13% +0.00% +0.58% / +2.71% +0.00% +0.77%] index_select random : Elapsed 0.005 ms (0.528 ms / 100) 0.513 -> 0.516 ( +0.58%) [ +4.48% +0.00% +0.78% / +4.29% +0.58% +8.38%] index_select random_sorted : Elapsed 0.005 ms (0.536 ms / 100) 0.513 -> 0.517 ( +0.78%) [ +5.07% +0.00% +1.95% / +3.31% +0.97% +0.78%] index_select perm : Elapsed 0.005 ms (0.539 ms / 100) 0.514 -> 0.521 ( +1.36%) [ +2.14% +0.00% +1.36% / +3.50% +1.36% +1.75%] index_select perm_sorted : Elapsed 0.005 ms (0.525 ms / 100) B = [500, 1, 5] (stride (5, 2500, 1)) A = [500, 200, 5] (stride (1, 2500, 500)) dim = 1 0.515 -> 0.520 ( +0.97%) [ +2.72% +0.00% +0.58% / +2.33% +0.97% +0.97%] index_select const : Elapsed 0.005 ms (0.529 ms / 100) 0.517 -> 0.519 ( +0.39%) [ +2.13% +0.00% +5.03% / +3.48% +0.39% +0.77%] index_select wrap : Elapsed 0.005 ms (0.528 ms / 100) 0.518 -> 0.518 ( +0.00%) [ +2.32% +0.00% +7.72% / +2.32% +0.00% +0.58%] index_select linear : Elapsed 0.005 ms (0.530 ms / 100) 0.516 -> 0.517 ( +0.19%) [ +3.10% +0.00% +0.19% / +2.52% +0.19% +3.10%] index_select reverse : Elapsed 0.005 ms (0.532 ms / 100) 0.515 -> 0.512 ( -0.58%) [ +2.33% +0.00% +0.78% / +9.90% -0.58% +0.78%] index_select skip64 : Elapsed 0.005 ms (0.527 ms / 100) 0.510 -> 0.520 ( +1.96%) [ +3.33% +0.00% +1.57% / +2.75% +2.75% +1.96%] index_select skip256 : Elapsed 0.005 ms (0.527 ms / 100) 0.520 -> 0.517 ( -0.58%) [ +1.54% +2.50% +0.00% / +0.58% +0.00% -0.58%] index_select spread : Elapsed 0.005 ms (0.528 ms / 100) 0.516 -> 0.520 ( +0.78%) [ +1.74% +0.00% +1.36% / +1.74% +6.40% +0.78%] index_select strided 3 : Elapsed 0.005 ms (0.525 ms / 100) 0.518 -> 0.516 ( -0.39%) [ +1.54% +0.39% +0.00% / +1.93% -0.39% +0.19%] index_select strided 5 : Elapsed 0.005 ms (0.526 ms / 100) 0.514 -> 0.522 ( +1.56%) [ +2.92% +0.00% +1.17% / +3.31% +1.56% +7.78%] index_select strided 7 : Elapsed 0.005 ms (0.529 ms / 100) 0.516 -> 0.518 ( +0.39%) [ +2.52% +0.00% +0.58% / +3.10% +0.39% +8.53%] index_select strided 8 : Elapsed 0.005 ms (0.529 ms / 100) 0.516 -> 0.519 ( +0.58%) [ +2.13% +0.78% +0.00% / +2.52% +0.58% +0.78%] index_select strided 16 : Elapsed 0.005 ms (0.527 ms / 100) 0.524 -> 0.521 ( -0.57%) [ +0.76% +2.48% +0.00% / +0.57% +1.15% -0.57%] index_select strided 64 : Elapsed 0.005 ms (0.528 ms / 100) 0.514 -> 0.515 ( +0.19%) [ +2.33% +0.00% +0.58% / +2.72% +0.19% +0.39%] index_select strided 100 : Elapsed 0.005 ms (0.526 ms / 100) 0.516 -> 0.515 ( -0.19%) [ +2.13% +0.00% +7.56% / +1.74% -0.19% -0.19%] index_select random : Elapsed 0.005 ms (0.527 ms / 100) 0.525 -> 0.520 ( -0.95%) [ +0.00% +3.43% +13.14% / +0.19% -0.95% -0.95%] index_select random_sorted : Elapsed 0.005 ms (0.525 ms / 100) 0.514 -> 0.512 ( -0.39%) [ +2.72% +0.00% +1.56% / +2.14% +0.97% -0.39%] index_select perm : Elapsed 0.005 ms (0.528 ms / 100) 0.514 -> 0.517 ( +0.58%) [ +2.33% +0.00% +0.97% / +2.92% +0.58% +1.56%] index_select perm_sorted : Elapsed 0.005 ms (0.526 ms / 100) B = [500, 1, 5] (stride (1, 2500, 500)) dim = 1 fill_cnt = 200 8.896 -> 9.017 ( +1.36%) [ +1.21% +0.07% +0.00% / +1.36% +1.41% +1.43%] index_fill_ const : Elapsed 0.090 ms (9.004 ms / 100) 8.880 -> 9.002 ( +1.37%) [ +1.23% +0.09% +0.00% / +1.37% +1.41% +1.42%] index_fill_ linear : Elapsed 0.090 ms (8.989 ms / 100) 8.881 -> 9.002 ( +1.36%) [ +1.27% +0.07% +0.00% / +1.36% +1.41% +1.42%] index_fill_ reverse : Elapsed 0.090 ms (8.994 ms / 100) 8.899 -> 9.009 ( +1.24%) [ +1.19% +0.07% +0.00% / +1.33% +1.25% +1.24%] index_fill_ skip64 : Elapsed 0.090 ms (9.005 ms / 100) 8.937 -> 9.032 ( +1.06%) [ +1.12% +0.09% +0.00% / +1.43% +1.06% +1.10%] index_fill_ skip256 : Elapsed 0.090 ms (9.037 ms / 100) 8.916 -> 9.032 ( +1.30%) [ +1.24% +0.12% +0.00% / +1.30% +1.83% +1.79%] index_fill_ spread : Elapsed 0.090 ms (9.027 ms / 100) 8.960 -> 9.079 ( +1.33%) [ +1.14% +0.06% +0.00% / +1.41% +1.36% +1.33%] index_fill_ random : Elapsed 0.091 ms (9.062 ms / 100) 8.968 -> 9.095 ( +1.42%) [ +1.26% +0.14% +0.00% / +1.42% +1.58% +1.63%] index_fill_ random_sorted : Elapsed 0.091 ms (9.081 ms / 100) B = [500, 1, 5] (stride (1, 2500, 500)) A = [500, 200, 5] (stride (1000, 1, 200)) dim = 1 0.517 -> 0.523 ( +1.16%) [ +2.71% +0.39% +0.00% / +3.68% +1.16% +1.55%] index_select const : Elapsed 0.005 ms (0.531 ms / 100) 0.511 -> 0.521 ( +1.96%) [ +3.72% +0.00% +0.98% / +4.70% +2.74% +1.96%] index_select wrap : Elapsed 0.005 ms (0.530 ms / 100) 0.514 -> 0.522 ( +1.56%) [ +3.50% +0.00% +11.87% / +4.86% +1.75% +1.56%] index_select linear : Elapsed 0.005 ms (0.532 ms / 100) 0.520 -> 0.517 ( -0.58%) [ +2.50% +0.00% +7.50% / +10.58% +0.38% -0.58%] index_select reverse : Elapsed 0.005 ms (0.533 ms / 100) 0.512 -> 0.517 ( +0.98%) [ +3.52% +1.37% +0.00% / +4.88% +0.98% +2.15%] index_select skip64 : Elapsed 0.005 ms (0.530 ms / 100) 0.515 -> 0.513 ( -0.39%) [ +3.30% +0.00% +11.26% / +11.65% -0.39% +0.58%] index_select skip256 : Elapsed 0.005 ms (0.532 ms / 100) 0.518 -> 0.524 ( +1.16%) [ +1.35% +0.19% +0.00% / +3.86% +5.21% +1.16%] index_select spread : Elapsed 0.005 ms (0.525 ms / 100) 0.511 -> 0.523 ( +2.35%) [ +4.11% +0.00% +1.76% / +5.87% +2.35% +8.02%] index_select strided 3 : Elapsed 0.005 ms (0.532 ms / 100) 0.516 -> 0.517 ( +0.19%) [ +3.29% +0.78% +0.00% / +9.11% +0.19% +6.78%] index_select strided 5 : Elapsed 0.005 ms (0.533 ms / 100) 0.509 -> 0.523 ( +2.75%) [ +5.11% +0.00% +1.96% / +9.82% +5.30% +2.75%] index_select strided 7 : Elapsed 0.005 ms (0.535 ms / 100) 0.513 -> 0.520 ( +1.36%) [ +3.90% +0.00% +2.14% / +4.29% +1.36% +1.56%] index_select strided 8 : Elapsed 0.005 ms (0.533 ms / 100) 0.510 -> 0.516 ( +1.18%) [ +4.12% +0.00% +1.57% / +4.90% +1.18% +2.16%] index_select strided 16 : Elapsed 0.005 ms (0.531 ms / 100) 0.513 -> 0.521 ( +1.56%) [ +5.65% +0.00% +15.79% / +16.96% +1.56% +1.56%] index_select strided 64 : Elapsed 0.005 ms (0.542 ms / 100) 0.507 -> 0.517 ( +1.97%) [ +5.52% +0.00% +12.23% / +4.54% +3.16% +1.97%] index_select strided 100 : Elapsed 0.005 ms (0.535 ms / 100) 0.515 -> 0.515 ( +0.00%) [ +6.41% +0.00% +11.07% / +10.87% +2.91% +0.00%] index_select random : Elapsed 0.005 ms (0.548 ms / 100) 0.516 -> 0.521 ( +0.97%) [ +1.94% +0.00% +1.94% / +3.29% +0.97% +0.97%] index_select random_sorted : Elapsed 0.005 ms (0.526 ms / 100) 0.517 -> 0.520 ( +0.58%) [ +1.93% +0.19% +0.00% / +1.93% +0.97% +0.58%] index_select perm : Elapsed 0.005 ms (0.527 ms / 100) 0.515 -> 0.524 ( +1.75%) [ +3.30% +0.00% +0.97% / +3.11% +6.60% +1.75%] index_select perm_sorted : Elapsed 0.005 ms (0.532 ms / 100) B = [500, 1, 5] (stride (1, 2500, 500)) A = [500, 200, 5] (stride (1, 2500, 500)) dim = 1 0.516 -> 0.521 ( +0.97%) [ +2.71% +0.00% +0.39% / +2.71% +0.97% +6.98%] index_select const : Elapsed 0.005 ms (0.530 ms / 100) 0.517 -> 0.522 ( +0.97%) [ +2.71% +0.19% +0.00% / +2.90% +0.97% +6.96%] index_select wrap : Elapsed 0.005 ms (0.531 ms / 100) 0.509 -> 0.520 ( +2.16%) [ +6.29% +0.00% +1.18% / +4.13% +2.16% +5.11%] index_select linear : Elapsed 0.005 ms (0.541 ms / 100) 0.513 -> 0.519 ( +1.17%) [ +3.31% +0.58% +0.00% / +3.12% +1.17% +1.75%] index_select reverse : Elapsed 0.005 ms (0.530 ms / 100) 0.515 -> 0.517 ( +0.39%) [ +3.11% +0.19% +0.00% / +1.94% +0.39% +1.55%] index_select skip64 : Elapsed 0.005 ms (0.531 ms / 100) 0.515 -> 0.513 ( -0.39%) [ +3.69% +0.00% +1.75% / +3.69% -0.39% +0.97%] index_select skip256 : Elapsed 0.005 ms (0.534 ms / 100) 0.521 -> 0.517 ( -0.77%) [ +2.11% +0.00% +8.06% / +1.54% -0.38% -0.77%] index_select spread : Elapsed 0.005 ms (0.532 ms / 100) 0.515 -> 0.514 ( -0.19%) [ +2.33% +0.00% +7.38% / +1.94% +0.19% -0.19%] index_select strided 3 : Elapsed 0.005 ms (0.527 ms / 100) 0.517 -> 0.519 ( +0.39%) [ +6.00% +0.00% +14.12% / +2.51% +0.39% +1.74%] index_select strided 5 : Elapsed 0.005 ms (0.548 ms / 100) 0.511 -> 0.517 ( +1.17%) [ +2.74% +0.00% +0.78% / +9.98% +1.17% +2.54%] index_select strided 7 : Elapsed 0.005 ms (0.525 ms / 100) 0.508 -> 0.516 ( +1.57%) [ +3.94% +0.00% +2.36% / +6.10% +1.57% +5.71%] index_select strided 8 : Elapsed 0.005 ms (0.528 ms / 100) 0.510 -> 0.520 ( +1.96%) [ +8.82% +0.00% +0.78% / +4.90% +7.45% +1.96%] index_select strided 16 : Elapsed 0.006 ms (0.555 ms / 100) 0.507 -> 0.517 ( +1.97%) [ +4.93% +0.00% +1.58% / +3.75% +1.97% +2.96%] index_select strided 64 : Elapsed 0.005 ms (0.532 ms / 100) 0.512 -> 0.517 ( +0.98%) [ +3.91% +0.00% +0.39% / +4.30% +0.98% +9.38%] index_select strided 100 : Elapsed 0.005 ms (0.532 ms / 100) 0.513 -> 0.516 ( +0.58%) [ +5.46% +0.00% +0.78% / +6.82% +0.58% +8.97%] index_select random : Elapsed 0.005 ms (0.541 ms / 100) 0.515 -> 0.517 ( +0.39%) [ +2.72% +1.55% +0.00% / +3.30% +0.39% +2.72%] index_select random_sorted : Elapsed 0.005 ms (0.529 ms / 100) 0.516 -> 0.519 ( +0.58%) [ +3.10% +0.39% +0.00% / +3.68% +0.58% +1.16%] index_select perm : Elapsed 0.005 ms (0.532 ms / 100) 0.516 -> 0.518 ( +0.39%) [ +3.68% +0.58% +0.00% / +4.26% +0.39% +2.71%] index_select perm_sorted : Elapsed 0.005 ms (0.535 ms / 100) out_shape = [500, 200, 1] in_shape = [500, 200, 5] idx_dim = 2 B = [500, 200, 1] (stride (200, 1, 1)) A = [500, 200, 5] (stride (1000, 1, 200)) dim = 2 7.761 -> 7.758 ( -0.04%) [ +0.39% +0.18% +0.00% / -0.04% +0.26% +0.28%] index_select const : Elapsed 0.078 ms (7.791 ms / 100) 7.761 -> 7.764 ( +0.04%) [ +0.19% +0.06% +0.00% / +0.04% +0.19% +0.15%] index_select wrap : Elapsed 0.078 ms (7.776 ms / 100) 7.762 -> 7.768 ( +0.08%) [ +0.00% +0.17% +0.12% / +0.08% +0.10% +0.26%] index_select linear : Elapsed 0.078 ms (7.762 ms / 100) 7.761 -> 7.775 ( +0.18%) [ +0.08% +0.15% +0.00% / +0.18% +0.18% +0.27%] index_select reverse : Elapsed 0.078 ms (7.767 ms / 100) 7.771 -> 7.769 ( -0.03%) [ +0.04% +0.00% +0.14% / -0.03% +0.05% +0.08%] index_select skip64 : Elapsed 0.078 ms (7.774 ms / 100) 7.756 -> 7.768 ( +0.15%) [ +0.17% +0.00% +0.24% / +0.31% +0.15% +0.28%] index_select skip256 : Elapsed 0.078 ms (7.769 ms / 100) 7.760 -> 7.763 ( +0.04%) [ +0.14% +0.06% +0.00% / +0.17% +0.04% +0.22%] index_select spread : Elapsed 0.078 ms (7.771 ms / 100) 7.761 -> 7.770 ( +0.12%) [ +0.15% +0.00% +0.19% / +0.12% +0.13% +0.19%] index_select strided 3 : Elapsed 0.078 ms (7.773 ms / 100) 7.774 -> 7.780 ( +0.08%) [ +0.31% +0.09% +0.00% / +0.08% +0.13% +0.13%] index_select random : Elapsed 0.078 ms (7.798 ms / 100) 7.770 -> 7.776 ( +0.08%) [ +0.08% +0.00% +0.14% / +0.35% +0.08% +0.14%] index_select random_sorted : Elapsed 0.078 ms (7.776 ms / 100) 7.777 -> 7.785 ( +0.10%) [ +0.36% +0.10% +0.00% / +0.36% +0.14% +0.10%] index_select perm : Elapsed 0.078 ms (7.805 ms / 100) 7.780 -> 7.783 ( +0.04%) [ +0.14% +0.03% +0.00% / +0.19% +0.04% +0.06%] index_select perm_sorted : Elapsed 0.078 ms (7.791 ms / 100) B = [500, 200, 1] (stride (200, 1, 200)) A = [500, 200, 5] (stride (1000, 1, 200)) dim = 2 7.771 -> 7.773 ( +0.03%) [ +0.00% +0.05% +0.01% / +0.10% +0.03% +0.24%] index_select const : Elapsed 0.078 ms (7.771 ms / 100) 7.765 -> 7.772 ( +0.09%) [ +0.17% +0.00% +0.00% / +0.13% +0.27% +0.09%] index_select wrap : Elapsed 0.078 ms (7.778 ms / 100) 7.761 -> 7.771 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.37% +0.18%] index_select linear : Elapsed 0.078 ms (7.771 ms / 100) 7.770 -> 7.767 ( -0.04%) [ +0.15% +0.00% +0.10% / +0.05% +0.09% -0.04%] index_select reverse : Elapsed 0.078 ms (7.782 ms / 100) 7.764 -> 7.768 ( +0.05%) [ +0.00% +0.10% +0.06% / +0.05% +0.23% +0.33%] index_select skip64 : Elapsed 0.078 ms (7.764 ms / 100) 7.757 -> 7.775 ( +0.23%) [ +0.24% +0.00% +0.23% / +0.26% +0.23% +0.26%] index_select skip256 : Elapsed 0.078 ms (7.776 ms / 100) 7.769 -> 7.775 ( +0.08%) [ +0.17% +0.00% +0.00% / +0.08% +0.10% +0.09%] index_select spread : Elapsed 0.078 ms (7.782 ms / 100) 7.767 -> 7.770 ( +0.04%) [ +0.04% +0.08% +0.00% / +0.05% +0.04% +0.26%] index_select strided 3 : Elapsed 0.078 ms (7.770 ms / 100) 7.754 -> 7.774 ( +0.26%) [ +0.19% +0.28% +0.00% / +0.26% +0.30% +0.52%] index_select random : Elapsed 0.078 ms (7.769 ms / 100) 7.765 -> 7.780 ( +0.19%) [ +0.13% +0.01% +0.00% / +0.23% +0.19% +0.22%] index_select random_sorted : Elapsed 0.078 ms (7.775 ms / 100) 7.766 -> 7.774 ( +0.10%) [ +0.00% +0.05% +0.18% / +0.27% +0.10% +0.19%] index_select perm : Elapsed 0.078 ms (7.766 ms / 100) 7.766 -> 7.768 ( +0.03%) [ +0.06% +0.00% +0.04% / +0.06% +0.19% +0.03%] index_select perm_sorted : Elapsed 0.078 ms (7.771 ms / 100) B = [500, 200, 1] (stride (200, 1, 100000)) A = [500, 200, 5] (stride (1000, 1, 200)) dim = 2 7.758 -> 7.760 ( +0.03%) [ +0.21% +0.15% +0.00% / +0.03% +0.12% +0.17%] index_select const : Elapsed 0.078 ms (7.774 ms / 100) 7.754 -> 7.768 ( +0.18%) [ +0.17% +0.00% +0.10% / +0.18% +0.28% +0.35%] index_select wrap : Elapsed 0.078 ms (7.767 ms / 100) 7.771 -> 7.757 ( -0.18%) [ +0.09% +0.00% +0.01% / -0.18% +0.12% +0.00%] index_select linear : Elapsed 0.078 ms (7.778 ms / 100) 7.763 -> 7.765 ( +0.03%) [ +0.00% +0.01% +0.08% / +0.14% +0.18% +0.03%] index_select reverse : Elapsed 0.078 ms (7.763 ms / 100) 7.761 -> 7.770 ( +0.12%) [ +0.13% +0.14% +0.00% / +0.12% +0.15% +0.12%] index_select skip64 : Elapsed 0.078 ms (7.771 ms / 100) 7.754 -> 7.769 ( +0.19%) [ +0.25% +0.00% +0.06% / +0.19% +0.26% +0.44%] index_select skip256 : Elapsed 0.078 ms (7.773 ms / 100) 7.754 -> 7.768 ( +0.18%) [ +0.28% +0.01% +0.00% / +0.18% +0.18% +0.32%] index_select spread : Elapsed 0.078 ms (7.776 ms / 100) 7.769 -> 7.766 ( -0.04%) [ +0.04% +0.00% +0.04% / -0.04% +0.26% +0.00%] index_select strided 3 : Elapsed 0.078 ms (7.772 ms / 100) 7.767 -> 7.773 ( +0.08%) [ +0.04% +0.00% +0.10% / +0.15% +0.08% +0.22%] index_select random : Elapsed 0.078 ms (7.770 ms / 100) 7.757 -> 7.766 ( +0.12%) [ +0.12% +0.00% +0.12% / +0.14% +0.17% +0.12%] index_select random_sorted : Elapsed 0.078 ms (7.766 ms / 100) 7.759 -> 7.770 ( +0.14%) [ +0.05% +0.00% +0.01% / +0.15% +0.37% +0.14%] index_select perm : Elapsed 0.078 ms (7.763 ms / 100) 7.761 -> 7.762 ( +0.01%) [ +0.00% +0.05% +0.08% / +0.01% +0.23% +0.24%] index_select perm_sorted : Elapsed 0.078 ms (7.761 ms / 100) B = [500, 200, 1] (stride (1, 500, 100000)) A = [500, 200, 5] (stride (5, 2500, 1)) dim = 2 6.177 -> 6.169 ( -0.13%) [ +0.31% +0.08% +0.00% / +0.24% -0.13% +0.00%] index_select const : Elapsed 0.062 ms (6.196 ms / 100) 6.162 -> 6.177 ( +0.24%) [ +0.00% +0.05% +0.34% / +0.41% +0.58% +0.24%] index_select wrap : Elapsed 0.062 ms (6.162 ms / 100) 6.163 -> 6.165 ( +0.03%) [ +0.32% +0.00% +0.57% / +0.03% +0.19% +0.03%] index_select linear : Elapsed 0.062 ms (6.183 ms / 100) 6.168 -> 6.190 ( +0.36%) [ +0.00% +0.21% +0.28% / +0.54% +0.39% +0.36%] index_select reverse : Elapsed 0.062 ms (6.168 ms / 100) 6.162 -> 6.171 ( +0.15%) [ +0.00% +0.28% +0.15% / +0.15% +0.37% +0.26%] index_select skip64 : Elapsed 0.062 ms (6.162 ms / 100) 6.175 -> 6.177 ( +0.03%) [ +0.00% +0.02% +0.05% / +0.03% +0.45% +0.99%] index_select skip256 : Elapsed 0.062 ms (6.175 ms / 100) 6.186 -> 6.172 ( -0.23%) [ +0.24% +0.00% +0.05% / +0.10% -0.23% +0.10%] index_select spread : Elapsed 0.062 ms (6.201 ms / 100) 6.169 -> 6.178 ( +0.15%) [ +0.26% +0.00% +0.34% / +0.15% +0.44% +0.49%] index_select strided 3 : Elapsed 0.062 ms (6.185 ms / 100) 6.172 -> 6.185 ( +0.21%) [ +0.50% +0.00% +0.24% / +0.31% +0.49% +0.21%] index_select random : Elapsed 0.062 ms (6.203 ms / 100) 6.171 -> 6.173 ( +0.03%) [ +0.05% +0.29% +0.00% / +0.55% +0.03% +0.11%] index_select random_sorted : Elapsed 0.062 ms (6.174 ms / 100) 6.153 -> 6.179 ( +0.42%) [ +0.00% +0.55% +0.10% / +0.42% +0.54% +0.67%] index_select perm : Elapsed 0.062 ms (6.153 ms / 100) 6.172 -> 6.172 ( +0.00%) [ +0.00% +0.31% +0.18% / +0.21% +0.23% +0.00%] index_select perm_sorted : Elapsed 0.062 ms (6.172 ms / 100) B = [500, 200, 1] (stride (1, 500, 100000)) A = [500, 200, 5] (stride (1, 500, 100000)) dim = 2 5.449 -> 5.456 ( +0.13%) [ +0.09% +0.40% +0.00% / +0.50% +0.53% +0.13%] index_select const : Elapsed 0.055 ms (5.454 ms / 100) 5.460 -> 5.465 ( +0.09%) [ +0.11% +0.00% +0.11% / +0.31% +0.13% +0.09%] index_select wrap : Elapsed 0.055 ms (5.466 ms / 100) 5.462 -> 5.455 ( -0.13%) [ +0.07% +0.15% +0.00% / -0.13% +0.07% +0.07%] index_select linear : Elapsed 0.055 ms (5.466 ms / 100) 5.458 -> 5.455 ( -0.05%) [ +0.00% +0.11% +0.24% / -0.05% +0.51% +0.18%] index_select reverse : Elapsed 0.055 ms (5.458 ms / 100) 5.454 -> 5.456 ( +0.04%) [ +0.00% +0.17% +0.11% / +0.28% +0.18% +0.04%] index_select skip64 : Elapsed 0.055 ms (5.454 ms / 100) 5.459 -> 5.448 ( -0.20%) [ +0.00% +0.13% +0.07% / +0.07% +0.29% -0.20%] index_select skip256 : Elapsed 0.055 ms (5.459 ms / 100) 5.465 -> 5.470 ( +0.09%) [ +0.07% +0.00% +0.04% / +0.11% +0.09% +0.15%] index_select spread : Elapsed 0.055 ms (5.469 ms / 100) 5.451 -> 5.445 ( -0.11%) [ +0.07% +0.00% +0.15% / -0.11% +0.28% +0.31%] index_select strided 3 : Elapsed 0.055 ms (5.455 ms / 100) 5.450 -> 5.458 ( +0.15%) [ +0.13% +0.00% +0.24% / +0.20% +0.15% +0.33%] index_select random : Elapsed 0.055 ms (5.457 ms / 100) 5.446 -> 5.455 ( +0.17%) [ +0.22% +0.00% +0.13% / +0.35% +0.17% +0.37%] index_select random_sorted : Elapsed 0.055 ms (5.458 ms / 100) 5.450 -> 5.449 ( -0.02%) [ +0.04% +0.00% +0.29% / -0.02% +0.20% +0.11%] index_select perm : Elapsed 0.055 ms (5.452 ms / 100) 5.451 -> 5.442 ( -0.17%) [ +0.28% +0.44% +0.00% / +0.37% +0.04% -0.17%] index_select perm_sorted : Elapsed 0.055 ms (5.466 ms / 100) out_shape = [5, 200, 500] in_shape = [1, 200, 500] idx_dim = 0 B = [5, 200, 500] (stride (100000, 500, 1)) dim = 0 fill_cnt = 1 2.429 -> 2.422 ( -0.29%) [ +0.21% +0.00% +0.00% / -0.04% +0.29% -0.29%] index_fill_ const : Elapsed 0.024 ms (2.434 ms / 100) 2.422 -> 2.427 ( +0.21%) [ +0.25% +0.00% +0.33% / +0.33% +0.29% +0.21%] index_fill_ linear : Elapsed 0.024 ms (2.428 ms / 100) 2.430 -> 2.429 ( -0.04%) [ +0.29% +0.00% +0.21% / +0.16% +0.00% -0.04%] index_fill_ reverse : Elapsed 0.024 ms (2.437 ms / 100) 2.432 -> 2.425 ( -0.29%) [ +0.00% +0.08% +0.45% / -0.12% -0.16% -0.29%] index_fill_ skip64 : Elapsed 0.024 ms (2.432 ms / 100) 2.428 -> 2.432 ( +0.16%) [ +0.16% +0.00% +0.41% / +0.16% +0.21% +0.41%] index_fill_ skip256 : Elapsed 0.024 ms (2.432 ms / 100) 2.423 -> 2.419 ( -0.17%) [ +0.00% +0.04% +0.33% / -0.17% +0.29% +0.66%] index_fill_ spread : Elapsed 0.024 ms (2.423 ms / 100) 2.426 -> 2.421 ( -0.21%) [ +0.04% +0.00% +0.29% / +0.12% -0.21% +0.25%] index_fill_ strided 3 : Elapsed 0.024 ms (2.427 ms / 100) 2.432 -> 2.426 ( -0.25%) [ +0.16% +0.00% +0.12% / -0.25% -0.21% -0.12%] index_fill_ random : Elapsed 0.024 ms (2.436 ms / 100) 2.430 -> 2.432 ( +0.08%) [ +0.00% +0.25% +0.21% / +0.21% +0.08% +0.12%] index_fill_ random_sorted : Elapsed 0.024 ms (2.430 ms / 100) 2.421 -> 2.424 ( +0.12%) [ +0.12% +0.00% +0.29% / +0.12% +0.25% +0.17%] index_fill_ perm : Elapsed 0.024 ms (2.424 ms / 100) 2.424 -> 2.426 ( +0.08%) [ +0.00% +0.21% +0.12% / +0.25% +0.12% +0.08%] index_fill_ perm_sorted : Elapsed 0.024 ms (2.424 ms / 100) B = [5, 200, 500] (stride (200, 1, 1000)) A = [1, 200, 500] (stride (500, 500, 1)) dim = 0 8.073 -> 8.056 ( -0.21%) [ +0.00% +0.07% +0.04% / -0.21% +0.15% +0.10%] index_add_ linear : Elapsed 0.081 ms (8.073 ms / 100) 7.805 -> 7.820 ( +0.19%) [ +0.12% +0.00% +0.42% / +0.19% +0.36% +0.24%] index_copy_ linear : Elapsed 0.078 ms (7.814 ms / 100) 8.062 -> 8.064 ( +0.02%) [ +0.16% +0.00% +0.21% / +0.35% +0.02% +0.21%] index_add_ reverse : Elapsed 0.081 ms (8.075 ms / 100) 7.806 -> 7.821 ( +0.19%) [ +0.18% +0.00% +0.32% / +0.20% +0.19% +0.37%] index_copy_ reverse : Elapsed 0.078 ms (7.820 ms / 100) 8.066 -> 8.072 ( +0.07%) [ +0.16% +0.00% +0.09% / +0.07% +0.30% +0.29%] index_add_ spread : Elapsed 0.081 ms (8.079 ms / 100) 7.803 -> 7.825 ( +0.28%) [ +0.12% +0.00% +0.31% / +0.32% +0.36% +0.28%] index_copy_ spread : Elapsed 0.078 ms (7.812 ms / 100) 8.070 -> 8.069 ( -0.01%) [ +0.02% +0.00% +0.07% / -0.01% +0.27% +0.41%] index_add_ strided 3 : Elapsed 0.081 ms (8.072 ms / 100) 7.803 -> 7.821 ( +0.23%) [ +0.09% +0.00% +0.26% / +0.23% +0.29% +0.56%] index_copy_ strided 3 : Elapsed 0.078 ms (7.810 ms / 100) 8.052 -> 8.038 ( -0.17%) [ +0.02% +0.00% +0.21% / -0.17% +0.24% +0.32%] index_add_ perm : Elapsed 0.081 ms (8.054 ms / 100) 7.806 -> 7.809 ( +0.04%) [ +0.20% +0.00% +0.01% / +0.04% +0.22% +0.27%] index_copy_ perm : Elapsed 0.078 ms (7.822 ms / 100) 8.045 -> 8.052 ( +0.09%) [ +0.10% +0.00% +0.12% / +0.09% +0.31% +0.20%] index_add_ perm_sorted : Elapsed 0.081 ms (8.053 ms / 100) 7.801 -> 7.810 ( +0.12%) [ +0.15% +0.00% +0.10% / +0.22% +0.42% +0.12%] index_copy_ perm_sorted : Elapsed 0.078 ms (7.813 ms / 100) 14.113 -> 14.153 ( +0.28%) [ +0.36% +0.00% +0.06% / +0.42% +0.28% +0.45%] index_select const : Elapsed 0.142 ms (14.164 ms / 100) 14.125 -> 14.167 ( +0.30%) [ +0.23% +0.01% +0.00% / +0.34% +0.30% +0.39%] index_select wrap : Elapsed 0.142 ms (14.157 ms / 100) 14.123 -> 14.163 ( +0.28%) [ +0.47% +0.00% +0.11% / +0.28% +0.41% +0.40%] index_select linear : Elapsed 0.142 ms (14.189 ms / 100) 14.113 -> 14.158 ( +0.32%) [ +0.40% +0.00% +0.20% / +0.36% +0.40% +0.32%] index_select reverse : Elapsed 0.142 ms (14.169 ms / 100) 14.122 -> 14.154 ( +0.23%) [ +0.33% +0.00% +0.02% / +0.23% +0.28% +0.35%] index_select skip64 : Elapsed 0.142 ms (14.168 ms / 100) 14.131 -> 14.156 ( +0.18%) [ +0.27% +0.08% +0.00% / +0.18% +0.30% +0.21%] index_select skip256 : Elapsed 0.142 ms (14.169 ms / 100) 14.113 -> 14.163 ( +0.35%) [ +0.35% +0.00% +0.05% / +0.35% +0.51% +0.46%] index_select spread : Elapsed 0.142 ms (14.162 ms / 100) 14.114 -> 14.163 ( +0.35%) [ +0.35% +0.00% +0.06% / +0.35% +0.50% +0.38%] index_select random : Elapsed 0.142 ms (14.164 ms / 100) 14.124 -> 14.156 ( +0.23%) [ +0.21% +0.03% +0.00% / +0.25% +0.28% +0.23%] index_select random_sorted : Elapsed 0.142 ms (14.154 ms / 100) out_shape = [1, 5, 500] in_shape = [1, 200, 500] idx_dim = 1 B = [1, 5, 500] (stride (2500, 500, 1)) A = [1, 200, 500] (stride (100000, 500, 1)) dim = 1 0.607 -> 0.608 ( +0.16%) [ +0.99% +0.16% +0.00% / +0.82% +0.33% +0.16%] index_select const : Elapsed 0.006 ms (0.613 ms / 100) 0.606 -> 0.610 ( +0.66%) [ +0.99% +0.00% +0.00% / +0.83% +0.66% +0.66%] index_select wrap : Elapsed 0.006 ms (0.612 ms / 100) 0.606 -> 0.609 ( +0.50%) [ +0.83% +0.17% +0.00% / +0.83% +0.50% +0.50%] index_select linear : Elapsed 0.006 ms (0.611 ms / 100) 0.606 -> 0.609 ( +0.50%) [ +0.83% +0.17% +0.00% / +0.83% +0.66% +0.50%] index_select reverse : Elapsed 0.006 ms (0.611 ms / 100) 0.606 -> 0.610 ( +0.66%) [ +0.83% +0.17% +0.00% / +0.99% +0.66% +0.66%] index_select skip64 : Elapsed 0.006 ms (0.611 ms / 100) 0.606 -> 0.610 ( +0.66%) [ +0.83% +0.17% +0.00% / +0.83% +0.83% +0.66%] index_select skip256 : Elapsed 0.006 ms (0.611 ms / 100) 0.605 -> 0.609 ( +0.66%) [ +4.63% +0.00% +0.00% / +0.99% +0.83% +0.66%] index_select spread : Elapsed 0.006 ms (0.633 ms / 100) 0.605 -> 0.609 ( +0.66%) [ +0.99% +0.00% +0.17% / +0.83% +0.99% +0.66%] index_select strided 3 : Elapsed 0.006 ms (0.611 ms / 100) 0.604 -> 0.609 ( +0.83%) [ +0.99% +0.33% +0.00% / +0.99% +0.99% +0.83%] index_select strided 5 : Elapsed 0.006 ms (0.610 ms / 100) 0.605 -> 0.609 ( +0.66%) [ +1.16% +0.00% +0.00% / +0.99% +1.16% +0.66%] index_select strided 7 : Elapsed 0.006 ms (0.612 ms / 100) 0.604 -> 0.610 ( +0.99%) [ +0.99% +0.33% +0.00% / +0.99% +0.99% +0.99%] index_select strided 8 : Elapsed 0.006 ms (0.610 ms / 100) 0.603 -> 0.609 ( +1.00%) [ +1.00% +0.17% +0.00% / +1.00% +1.49% +1.49%] index_select strided 16 : Elapsed 0.006 ms (0.609 ms / 100) 0.603 -> 0.609 ( +1.00%) [ +1.00% +0.17% +0.00% / +1.00% +1.33% +5.80%] index_select strided 64 : Elapsed 0.006 ms (0.609 ms / 100) 0.604 -> 0.609 ( +0.83%) [ +1.16% +0.17% +0.00% / +0.83% +0.99% +1.16%] index_select strided 100 : Elapsed 0.006 ms (0.611 ms / 100) 0.603 -> 0.610 ( +1.16%) [ +1.00% +0.17% +0.00% / +1.16% +1.33% +1.33%] index_select random : Elapsed 0.006 ms (0.609 ms / 100) 0.604 -> 0.610 ( +0.99%) [ +0.83% +0.00% +0.17% / +0.99% +2.15% +1.16%] index_select random_sorted : Elapsed 0.006 ms (0.609 ms / 100) 0.605 -> 0.611 ( +0.99%) [ +0.83% +0.00% +0.00% / +0.99% +1.16% +0.99%] index_select perm : Elapsed 0.006 ms (0.610 ms / 100) 0.605 -> 0.611 ( +0.99%) [ +0.83% +0.00% +0.00% / +1.16% +0.99% +0.99%] index_select perm_sorted : Elapsed 0.006 ms (0.610 ms / 100) B = [1, 5, 500] (stride (500, 500, 1)) A = [1, 200, 500] (stride (1, 1, 200)) dim = 1 0.642 -> 0.648 ( +0.93%) [ +0.93% +0.00% +0.00% / +0.93% +1.25% +0.93%] index_select const : Elapsed 0.006 ms (0.648 ms / 100) 0.641 -> 0.647 ( +0.94%) [ +1.25% +0.00% +0.00% / +0.94% +1.56% +1.25%] index_select wrap : Elapsed 0.006 ms (0.649 ms / 100) 0.642 -> 0.647 ( +0.78%) [ +0.93% +0.00% +0.00% / +0.78% +1.09% +1.09%] index_select linear : Elapsed 0.006 ms (0.648 ms / 100) 0.641 -> 0.647 ( +0.94%) [ +1.09% +0.16% +0.00% / +0.94% +1.09% +1.25%] index_select reverse : Elapsed 0.006 ms (0.648 ms / 100) 0.640 -> 0.648 ( +1.25%) [ +0.94% +0.16% +0.00% / +1.25% +1.41% +1.25%] index_select skip64 : Elapsed 0.006 ms (0.646 ms / 100) 0.641 -> 0.648 ( +1.09%) [ +1.09% +0.00% +0.00% / +1.09% +1.25% +1.40%] index_select skip256 : Elapsed 0.006 ms (0.648 ms / 100) 0.640 -> 0.642 ( +0.31%) [ +0.47% +0.16% +0.00% / +0.31% +0.78% +0.47%] index_select spread : Elapsed 0.006 ms (0.643 ms / 100) 0.643 -> 0.648 ( +0.78%) [ +0.93% +0.00% +0.00% / +0.78% +0.93% +0.78%] index_select strided 3 : Elapsed 0.006 ms (0.649 ms / 100) 0.642 -> 0.648 ( +0.93%) [ +0.93% +0.00% +0.16% / +0.93% +1.25% +1.40%] index_select strided 5 : Elapsed 0.006 ms (0.648 ms / 100) 0.641 -> 0.648 ( +1.09%) [ +1.09% +0.16% +0.00% / +1.09% +1.25% +1.25%] index_select strided 7 : Elapsed 0.006 ms (0.648 ms / 100) 0.641 -> 0.647 ( +0.94%) [ +0.94% +0.00% +0.00% / +0.94% +1.09% +1.72%] index_select strided 8 : Elapsed 0.006 ms (0.647 ms / 100) 0.640 -> 0.642 ( +0.31%) [ +0.78% +0.00% +0.00% / +0.31% +0.63% +0.63%] index_select strided 16 : Elapsed 0.006 ms (0.645 ms / 100) 0.642 -> 0.646 ( +0.62%) [ +0.93% +0.00% +0.00% / +0.93% +0.62% +0.93%] index_select strided 64 : Elapsed 0.006 ms (0.648 ms / 100) 0.639 -> 0.641 ( +0.31%) [ +0.31% +0.00% +0.31% / +0.31% +0.63% +0.63%] index_select strided 100 : Elapsed 0.006 ms (0.641 ms / 100) 0.640 -> 0.645 ( +0.78%) [ +0.78% +0.00% +0.00% / +0.78% +1.25% +1.25%] index_select random : Elapsed 0.006 ms (0.645 ms / 100) 0.643 -> 0.648 ( +0.78%) [ +0.93% +0.00% +0.62% / +0.78% +1.09% +1.09%] index_select random_sorted : Elapsed 0.006 ms (0.649 ms / 100) 0.639 -> 0.642 ( +0.47%) [ +0.78% +0.00% +0.16% / +0.63% +0.47% +1.72%] index_select perm : Elapsed 0.006 ms (0.644 ms / 100) 0.642 -> 0.647 ( +0.78%) [ +1.09% +0.00% +0.00% / +0.78% +1.25% +1.25%] index_select perm_sorted : Elapsed 0.006 ms (0.649 ms / 100) B = [1, 5, 500] (stride (5, 1, 5)) A = [1, 200, 500] (stride (100000, 1, 200)) dim = 1 0.649 -> 0.654 ( +0.77%) [ +1.85% +0.00% +0.15% / +0.77% +1.23% +0.92%] index_select const : Elapsed 0.007 ms (0.661 ms / 100) 0.650 -> 0.656 ( +0.92%) [ +0.77% +0.00% +0.00% / +0.92% +1.08% +0.92%] index_select wrap : Elapsed 0.007 ms (0.655 ms / 100) 0.650 -> 0.656 ( +0.92%) [ +1.08% +0.31% +0.00% / +0.92% +1.08% +1.08%] index_select linear : Elapsed 0.007 ms (0.657 ms / 100) 0.649 -> 0.656 ( +1.08%) [ +1.08% +0.00% +0.46% / +1.39% +1.08% +1.39%] index_select reverse : Elapsed 0.007 ms (0.656 ms / 100) 0.650 -> 0.655 ( +0.77%) [ +0.77% +0.00% +0.00% / +0.77% +1.08% +0.92%] index_select skip64 : Elapsed 0.007 ms (0.655 ms / 100) 0.649 -> 0.656 ( +1.08%) [ +0.62% +0.00% +0.15% / +1.08% +1.54% +1.08%] index_select skip256 : Elapsed 0.007 ms (0.653 ms / 100) 0.641 -> 0.647 ( +0.94%) [ +0.62% +0.00% +0.16% / +0.94% +1.09% +0.94%] index_select spread : Elapsed 0.006 ms (0.645 ms / 100) 0.651 -> 0.655 ( +0.61%) [ +0.92% +16.28% +0.00% / +0.61% +1.08% +1.23%] index_select strided 3 : Elapsed 0.007 ms (0.657 ms / 100) 0.649 -> 0.655 ( +0.92%) [ +0.77% +0.00% +0.15% / +0.92% +1.54% +1.69%] index_select strided 5 : Elapsed 0.007 ms (0.654 ms / 100) 0.649 -> 0.654 ( +0.77%) [ +0.92% +0.15% +0.00% / +0.77% +1.08% +1.08%] index_select strided 7 : Elapsed 0.007 ms (0.655 ms / 100) 0.646 -> 0.653 ( +1.08%) [ +0.93% +0.31% +0.00% / +1.08% +1.08% +1.24%] index_select strided 8 : Elapsed 0.007 ms (0.652 ms / 100) 0.648 -> 0.654 ( +0.93%) [ +0.93% +0.31% +0.00% / +0.93% +1.39% +1.23%] index_select strided 16 : Elapsed 0.007 ms (0.654 ms / 100) 0.641 -> 0.646 ( +0.78%) [ +0.94% +0.00% +0.16% / +0.78% +1.25% +1.09%] index_select strided 64 : Elapsed 0.006 ms (0.647 ms / 100) 0.645 -> 0.653 ( +1.24%) [ +1.24% +0.31% +0.00% / +1.40% +1.24% +2.02%] index_select strided 100 : Elapsed 0.007 ms (0.653 ms / 100) 0.648 -> 0.654 ( +0.93%) [ +1.08% +0.15% +0.00% / +0.93% +1.08% +1.39%] index_select random : Elapsed 0.007 ms (0.655 ms / 100) 0.639 -> 0.644 ( +0.78%) [ +0.78% +0.16% +0.00% / +0.78% +1.41% +1.10%] index_select random_sorted : Elapsed 0.006 ms (0.644 ms / 100) 0.646 -> 0.650 ( +0.62%) [ +0.93% +0.00% +0.15% / +1.08% +0.77% +0.62%] index_select perm : Elapsed 0.007 ms (0.652 ms / 100) 0.645 -> 0.650 ( +0.78%) [ +0.78% +0.31% +0.00% / +1.09% +0.78% +0.78%] index_select perm_sorted : Elapsed 0.007 ms (0.650 ms / 100) out_shape = [1, 200, 5] in_shape = [1, 200, 500] idx_dim = 2 B = [1, 200, 5] (stride (5, 5, 1)) A = [1, 200, 500] (stride (1, 500, 1)) dim = 2 0.599 -> 0.598 ( -0.17%) [ +0.33% +0.17% +0.00% / +0.33% +0.00% -0.17%] index_select const : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.598 ( -0.17%) [ +0.50% +1.00% +0.00% / +0.50% +0.17% -0.17%] index_select wrap : Elapsed 0.006 ms (0.602 ms / 100) 0.588 -> 0.589 ( +0.17%) [ +1.02% +0.34% +0.00% / +0.85% +0.68% +0.17%] index_select linear : Elapsed 0.006 ms (0.594 ms / 100) 0.598 -> 0.598 ( +0.00%) [ +0.33% +0.17% +0.00% / +0.33% +0.84% +0.00%] index_select reverse : Elapsed 0.006 ms (0.600 ms / 100) 0.599 -> 0.599 ( +0.00%) [ +0.33% +1.17% +0.00% / +0.33% +0.00% +0.00%] index_select skip64 : Elapsed 0.006 ms (0.601 ms / 100) 0.587 -> 0.591 ( +0.68%) [ +1.02% +0.00% +0.00% / +1.19% +0.68% +0.68%] index_select skip256 : Elapsed 0.006 ms (0.593 ms / 100) 0.588 -> 0.594 ( +1.02%) [ +1.19% +0.34% +0.00% / +1.02% +1.02% +2.21%] index_select spread : Elapsed 0.006 ms (0.595 ms / 100) 0.600 -> 0.599 ( -0.17%) [ +0.00% +0.00% +0.00% / +0.00% -0.17% +0.00%] index_select strided 3 : Elapsed 0.006 ms (0.600 ms / 100) 0.591 -> 0.594 ( +0.51%) [ +1.52% +0.51% +0.00% / +1.18% +1.18% +0.51%] index_select strided 5 : Elapsed 0.006 ms (0.600 ms / 100) 0.600 -> 0.602 ( +0.33%) [ +0.67% +0.17% +0.00% / +0.50% +0.50% +0.33%] index_select strided 7 : Elapsed 0.006 ms (0.604 ms / 100) 0.593 -> 0.597 ( +0.67%) [ +1.18% +0.00% +0.34% / +1.18% +1.35% +0.67%] index_select strided 8 : Elapsed 0.006 ms (0.600 ms / 100) 0.587 -> 0.590 ( +0.51%) [ +0.85% +0.00% +0.17% / +0.51% +0.51% +0.68%] index_select strided 16 : Elapsed 0.006 ms (0.592 ms / 100) 0.593 -> 0.595 ( +0.34%) [ +1.01% +0.00% +0.17% / +0.34% +0.84% +0.51%] index_select strided 64 : Elapsed 0.006 ms (0.599 ms / 100) 0.589 -> 0.593 ( +0.68%) [ +1.87% +0.00% +0.17% / +1.53% +0.85% +0.68%] index_select strided 100 : Elapsed 0.006 ms (0.600 ms / 100) 0.593 -> 0.596 ( +0.51%) [ +1.01% +0.00% +0.00% / +1.18% +0.51% +0.51%] index_select strided 255 : Elapsed 0.006 ms (0.599 ms / 100) 0.593 -> 0.597 ( +0.67%) [ +1.18% +0.17% +0.00% / +1.18% +0.67% +0.84%] index_select strided 256 : Elapsed 0.006 ms (0.600 ms / 100) 0.594 -> 0.599 ( +0.84%) [ +1.01% +0.00% +0.00% / +0.84% +1.01% +1.01%] index_select strided 257 : Elapsed 0.006 ms (0.600 ms / 100) 0.592 -> 0.599 ( +1.18%) [ +8.78% +0.34% +0.00% / +1.35% +1.18% +1.18%] index_select random : Elapsed 0.006 ms (0.644 ms / 100) 0.591 -> 0.598 ( +1.18%) [ +1.02% +0.00% +1.52% / +3.38% +1.18% +1.35%] index_select random_sorted : Elapsed 0.006 ms (0.597 ms / 100) 0.601 -> 0.600 ( -0.17%) [ +0.00% +0.00% +0.00% / +0.17% +0.00% -0.17%] index_select perm : Elapsed 0.006 ms (0.601 ms / 100) 0.602 -> 0.607 ( +0.83%) [ +0.83% +0.17% +0.00% / +0.83% +1.00% +0.83%] index_select perm_sorted : Elapsed 0.006 ms (0.607 ms / 100) B = [1, 200, 5] (stride (1, 5, 1)) A = [1, 200, 500] (stride (200, 1, 200)) dim = 2 0.557 -> 0.562 ( +0.90%) [ +0.72% +0.36% +0.00% / +0.90% +1.26% +3.95%] index_select const : Elapsed 0.006 ms (0.561 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +0.54% +0.00% +0.00% / +0.54% +1.07% +1.07%] index_select wrap : Elapsed 0.006 ms (0.563 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.54% +0.00% +0.36% / +0.71% +1.07% +2.68%] index_select linear : Elapsed 0.006 ms (0.563 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.53% +0.18% +0.00% / +0.53% +0.89% +1.25%] index_select reverse : Elapsed 0.006 ms (0.564 ms / 100) 0.560 -> 0.562 ( +0.36%) [ +0.54% +0.00% +0.00% / +0.54% +0.36% +0.54%] index_select skip64 : Elapsed 0.006 ms (0.563 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.72% +0.72% +0.54%] index_select skip256 : Elapsed 0.006 ms (0.563 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +0.54% +0.00% +0.00% / +0.54% +1.25% +1.07%] index_select spread : Elapsed 0.006 ms (0.563 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.00% +3.58% / +0.72% +1.43% +1.25%] index_select strided 3 : Elapsed 0.006 ms (0.563 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +2.33% +0.00% +0.00% / +0.72% +1.25% +1.25%] index_select strided 5 : Elapsed 0.006 ms (0.572 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +2.15% +0.00% +0.00% / +0.90% +1.61% +1.43%] index_select strided 7 : Elapsed 0.006 ms (0.570 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +2.50% +0.00% +0.00% / +0.54% +0.89% +0.71%] index_select strided 8 : Elapsed 0.006 ms (0.574 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +2.68% +0.54% +0.00% / +0.89% +1.07% +1.07%] index_select strided 16 : Elapsed 0.006 ms (0.574 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +6.98% +1.07% +0.00% / +0.72% +1.25% +1.25%] index_select strided 64 : Elapsed 0.006 ms (0.598 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.54% +0.00% +0.00% / +0.72% +1.25% +1.25%] index_select strided 100 : Elapsed 0.006 ms (0.562 ms / 100) 0.565 -> 0.565 ( +0.00%) [ +0.00% +0.88% +6.02% / +4.25% +0.18% +0.00%] index_select strided 255 : Elapsed 0.006 ms (0.565 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +0.89% +0.36% +0.00% / +0.89% +1.25% +1.07%] index_select strided 256 : Elapsed 0.006 ms (0.564 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.53% +0.18% +0.00% / +1.25% +0.89% +0.53%] index_select strided 257 : Elapsed 0.006 ms (0.564 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.89% +0.00% +0.00% / +8.91% +5.17% +0.53%] index_select random : Elapsed 0.006 ms (0.566 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.71% +0.00% +0.00% / +0.71% +1.07% +0.71%] index_select random_sorted : Elapsed 0.006 ms (0.564 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.71% +1.07% +0.89%] index_select perm : Elapsed 0.006 ms (0.565 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.89% +0.71% +0.71%] index_select perm_sorted : Elapsed 0.006 ms (0.565 ms / 100) B = [1, 200, 5] (stride (200, 1, 200)) A = [1, 200, 500] (stride (1, 500, 1)) dim = 2 0.591 -> 0.598 ( +1.18%) [ +1.35% +0.17% +0.00% / +1.35% +1.18% +1.35%] index_select const : Elapsed 0.006 ms (0.599 ms / 100) 0.589 -> 0.598 ( +1.53%) [ +1.19% +0.00% +0.00% / +1.53% +1.53% +3.90%] index_select wrap : Elapsed 0.006 ms (0.596 ms / 100) 0.589 -> 0.600 ( +1.87%) [ +1.19% +1.70% +0.00% / +2.04% +1.87% +1.87%] index_select linear : Elapsed 0.006 ms (0.596 ms / 100) 0.590 -> 0.599 ( +1.53%) [ +0.51% +0.00% +0.00% / +1.53% +1.69% +1.53%] index_select reverse : Elapsed 0.006 ms (0.593 ms / 100) 0.592 -> 0.597 ( +0.84%) [ +1.35% +0.00% +0.00% / +0.84% +1.01% +1.18%] index_select skip64 : Elapsed 0.006 ms (0.600 ms / 100) 0.590 -> 0.599 ( +1.53%) [ +2.20% +0.00% +0.00% / +2.03% +1.53% +1.53%] index_select skip256 : Elapsed 0.006 ms (0.603 ms / 100) 0.580 -> 0.585 ( +0.86%) [ +1.03% +0.17% +0.00% / +0.86% +1.55% +1.38%] index_select spread : Elapsed 0.006 ms (0.586 ms / 100) 0.592 -> 0.597 ( +0.84%) [ +1.52% +0.00% +0.00% / +0.84% +1.18% +1.35%] index_select strided 3 : Elapsed 0.006 ms (0.601 ms / 100) 0.588 -> 0.598 ( +1.70%) [ +5.10% +0.00% +0.34% / +1.70% +1.70% +1.70%] index_select strided 5 : Elapsed 0.006 ms (0.618 ms / 100) 0.590 -> 0.597 ( +1.19%) [ +1.36% +0.17% +0.00% / +1.69% +1.19% +1.69%] index_select strided 7 : Elapsed 0.006 ms (0.598 ms / 100) 0.595 -> 0.599 ( +0.67%) [ +0.67% +0.34% +0.00% / +1.18% +0.67% +1.01%] index_select strided 8 : Elapsed 0.006 ms (0.599 ms / 100) 0.590 -> 0.595 ( +0.85%) [ +1.19% +0.17% +0.00% / +1.19% +1.69% +0.85%] index_select strided 16 : Elapsed 0.006 ms (0.597 ms / 100) 0.594 -> 0.600 ( +1.01%) [ +1.18% +0.00% +1.01% / +1.18% +1.01% +1.18%] index_select strided 64 : Elapsed 0.006 ms (0.601 ms / 100) 0.580 -> 0.586 ( +1.03%) [ +0.86% +0.00% +0.17% / +1.03% +1.38% +1.38%] index_select strided 100 : Elapsed 0.006 ms (0.585 ms / 100) 0.583 -> 0.589 ( +1.03%) [ +0.69% +0.17% +0.00% / +1.20% +1.03% +1.20%] index_select strided 255 : Elapsed 0.006 ms (0.587 ms / 100) 0.585 -> 0.590 ( +0.85%) [ +0.34% +0.17% +0.00% / +0.85% +0.85% +1.20%] index_select strided 256 : Elapsed 0.006 ms (0.587 ms / 100) 0.583 -> 0.589 ( +1.03%) [ +0.86% +0.69% +0.00% / +1.03% +1.54% +1.37%] index_select strided 257 : Elapsed 0.006 ms (0.588 ms / 100) 0.582 -> 0.588 ( +1.03%) [ +1.20% +1.03% +0.00% / +1.03% +1.20% +1.03%] index_select random : Elapsed 0.006 ms (0.589 ms / 100) 0.580 -> 0.584 ( +0.69%) [ +0.69% +0.17% +0.00% / +0.69% +1.03% +0.69%] index_select random_sorted : Elapsed 0.006 ms (0.584 ms / 100) 0.596 -> 0.599 ( +0.50%) [ +0.67% +0.17% +0.00% / +0.84% +0.84% +0.50%] index_select perm : Elapsed 0.006 ms (0.600 ms / 100) 0.585 -> 0.589 ( +0.68%) [ +1.20% +0.00% +2.74% / +0.68% +0.85% +0.85%] index_select perm_sorted : Elapsed 0.006 ms (0.592 ms / 100) B = [1, 200, 5] (stride (200, 1, 200)) A = [1, 200, 500] (stride (1, 1, 200)) dim = 2 0.554 -> 0.558 ( +0.72%) [ +3.79% +0.00% +0.00% / +0.72% +1.26% +1.26%] index_select const : Elapsed 0.006 ms (0.575 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +3.42% +0.18% +0.00% / +0.72% +1.08% +1.08%] index_select wrap : Elapsed 0.006 ms (0.574 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +2.70% +0.00% +0.00% / +1.08% +1.08% +1.08%] index_select linear : Elapsed 0.006 ms (0.570 ms / 100) 0.555 -> 0.560 ( +0.90%) [+11.71% +0.00% +0.00% / +0.90% +1.08% +1.08%] index_select reverse : Elapsed 0.006 ms (0.620 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +3.25% +0.00% +0.00% / +0.72% +1.26% +1.08%] index_select skip64 : Elapsed 0.006 ms (0.572 ms / 100) 0.553 -> 0.560 ( +1.27%) [ +3.98% +1.99% +0.00% / +9.04% +1.45% +1.27%] index_select skip256 : Elapsed 0.006 ms (0.575 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +2.88% +3.06% +0.00% / +2.16% +1.26% +1.08%] index_select spread : Elapsed 0.006 ms (0.571 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +4.14% +0.00% +0.00% / +1.26% +1.08% +1.08%] index_select strided 3 : Elapsed 0.006 ms (0.578 ms / 100) 0.556 -> 0.561 ( +0.90%) [ +6.83% +6.83% +0.00% / +2.52% +1.08% +0.90%] index_select strided 5 : Elapsed 0.006 ms (0.594 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +0.54% +0.18% +0.00% / +0.90% +1.08% +1.08%] index_select strided 7 : Elapsed 0.006 ms (0.558 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.54% +0.18% +0.00% / +0.72% +1.26% +1.26%] index_select strided 8 : Elapsed 0.006 ms (0.557 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.00% +0.18% / +0.90% +1.26% +1.26%] index_select strided 16 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +1.08% +9.55%] index_select strided 64 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +0.54% +0.00% +0.18% / +0.54% +1.08% +1.08%] index_select strided 100 : Elapsed 0.006 ms (0.558 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +1.08% +0.00% +0.00% / +1.08% +1.08% +1.26%] index_select strided 255 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +3.78% +0.18% +0.00% / +1.08% +1.26% +1.08%] index_select strided 256 : Elapsed 0.006 ms (0.576 ms / 100) 0.556 -> 0.560 ( +0.72%) [+10.43% +0.00% +1.80% / +0.72% +1.08% +3.24%] index_select strided 257 : Elapsed 0.006 ms (0.614 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +5.59% +0.00% +0.18% / +1.08% +1.26% +0.90%] index_select random : Elapsed 0.006 ms (0.586 ms / 100) 0.556 -> 0.560 ( +0.72%) [+39.75% +0.18% +0.00% / +0.90% +0.72% +0.90%] index_select random_sorted : Elapsed 0.008 ms (0.777 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +8.47% +0.18% +0.00% / +0.90% +1.08% +1.08%] index_select perm : Elapsed 0.006 ms (0.602 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +8.47% +0.00% +0.00% / +1.08% +1.08% +1.08%] index_select perm_sorted : Elapsed 0.006 ms (0.602 ms / 100) B = [1, 200, 5] (stride (1, 1, 200)) dim = 2 fill_cnt = 500 4.124 -> 4.169 ( +1.09%) [ +1.12% +0.02% +0.00% / +1.09% +1.16% +1.21%] index_fill_ const : Elapsed 0.042 ms (4.170 ms / 100) 4.122 -> 4.165 ( +1.04%) [ +1.14% +0.02% +0.00% / +1.07% +1.04% +1.04%] index_fill_ linear : Elapsed 0.042 ms (4.169 ms / 100) 4.123 -> 4.164 ( +0.99%) [ +1.24% +0.07% +0.00% / +1.09% +0.99% +1.02%] index_fill_ reverse : Elapsed 0.042 ms (4.174 ms / 100) 4.124 -> 4.166 ( +1.02%) [ +1.19% +0.02% +0.00% / +1.09% +1.04% +1.02%] index_fill_ skip64 : Elapsed 0.042 ms (4.173 ms / 100) 4.124 -> 4.169 ( +1.09%) [ +1.12% +0.02% +0.00% / +1.09% +1.24% +1.24%] index_fill_ skip256 : Elapsed 0.042 ms (4.170 ms / 100) 4.125 -> 4.168 ( +1.04%) [ +1.14% +0.05% +0.00% / +1.09% +1.04% +1.04%] index_fill_ spread : Elapsed 0.042 ms (4.172 ms / 100) 4.120 -> 4.165 ( +1.09%) [ +1.24% +0.05% +0.00% / +1.09% +1.19% +1.21%] index_fill_ strided 3 : Elapsed 0.042 ms (4.171 ms / 100) 4.121 -> 4.167 ( +1.12%) [ +1.12% +0.02% +0.00% / +1.12% +1.16% +1.14%] index_fill_ random : Elapsed 0.042 ms (4.167 ms / 100) 4.128 -> 4.168 ( +0.97%) [ +1.14% +0.02% +0.00% / +1.04% +0.99% +0.97%] index_fill_ random_sorted : Elapsed 0.042 ms (4.175 ms / 100) out_shape = [5, 500, 200] in_shape = [1, 500, 200] idx_dim = 0 B = [5, 500, 200] (stride (100000, 200, 1)) A = [1, 500, 200] (stride (200, 200, 1)) dim = 0 4.856 -> 4.851 ( -0.10%) [ +0.12% +0.00% +0.29% / -0.10% +0.02% +0.33%] index_add_ linear : Elapsed 0.049 ms (4.862 ms / 100) 4.705 -> 4.705 ( +0.00%) [ +0.32% +0.00% +0.11% / +0.00% +0.30% +0.17%] index_copy_ linear : Elapsed 0.047 ms (4.720 ms / 100) 4.862 -> 4.863 ( +0.02%) [ +0.00% +0.00% +0.21% / +0.04% +0.06% +0.02%] index_add_ reverse : Elapsed 0.049 ms (4.862 ms / 100) 4.702 -> 4.704 ( +0.04%) [ +0.40% +0.00% +0.15% / +0.04% +0.36% +0.40%] index_copy_ reverse : Elapsed 0.047 ms (4.721 ms / 100) 4.864 -> 4.854 ( -0.21%) [ +0.33% +0.00% +0.08% / +0.04% -0.21% -0.04%] index_add_ spread : Elapsed 0.049 ms (4.880 ms / 100) 4.700 -> 4.711 ( +0.23%) [ +0.38% +0.00% +0.13% / +0.23% +0.23% +0.26%] index_copy_ spread : Elapsed 0.047 ms (4.718 ms / 100) 4.850 -> 4.865 ( +0.31%) [ +0.00% +0.12% +0.31% / +0.49% +0.31% +0.37%] index_add_ strided 3 : Elapsed 0.048 ms (4.850 ms / 100) 4.697 -> 4.716 ( +0.40%) [ +0.32% +0.00% +0.30% / +0.40% +0.60% +0.64%] index_copy_ strided 3 : Elapsed 0.047 ms (4.712 ms / 100) 4.865 -> 4.865 ( +0.00%) [ +0.25% +0.00% +0.00% / +0.29% +0.00% +0.06%] index_add_ perm : Elapsed 0.049 ms (4.877 ms / 100) 4.703 -> 4.702 ( -0.02%) [ +0.28% +0.00% +0.34% / -0.02% +0.17% +0.15%] index_copy_ perm : Elapsed 0.047 ms (4.716 ms / 100) 4.855 -> 4.853 ( -0.04%) [ +0.31% +0.00% +0.12% / +0.14% +0.04% -0.04%] index_add_ perm_sorted : Elapsed 0.049 ms (4.870 ms / 100) 4.704 -> 4.702 ( -0.04%) [ +0.40% +0.00% +0.21% / -0.04% +0.30% +0.00%] index_copy_ perm_sorted : Elapsed 0.047 ms (4.723 ms / 100) 6.489 -> 6.502 ( +0.20%) [ +0.00% +0.34% +0.26% / +0.40% +0.28% +0.20%] index_select const : Elapsed 0.065 ms (6.489 ms / 100) 6.463 -> 6.481 ( +0.28%) [ +0.26% +0.00% +0.42% / +0.28% +0.40% +0.59%] index_select wrap : Elapsed 0.065 ms (6.480 ms / 100) 6.465 -> 6.480 ( +0.23%) [ +0.12% +0.00% +0.34% / +0.23% +0.56% +0.46%] index_select linear : Elapsed 0.065 ms (6.473 ms / 100) 6.469 -> 6.500 ( +0.48%) [ +0.59% +0.70% +0.00% / +0.59% +0.54% +0.48%] index_select reverse : Elapsed 0.065 ms (6.507 ms / 100) 6.468 -> 6.476 ( +0.12%) [ +0.00% +0.14% +0.26% / +0.32% +0.12% +0.29%] index_select skip64 : Elapsed 0.065 ms (6.468 ms / 100) 6.483 -> 6.480 ( -0.05%) [ +0.00% +0.17% +0.14% / -0.05% +0.35% +0.19%] index_select skip256 : Elapsed 0.065 ms (6.483 ms / 100) 6.470 -> 6.480 ( +0.15%) [ +0.00% +0.06% +0.23% / +0.15% +0.37% +0.45%] index_select spread : Elapsed 0.065 ms (6.470 ms / 100) 6.479 -> 6.496 ( +0.26%) [ +0.14% +0.12% +0.00% / +0.37% +0.43% +0.26%] index_select random : Elapsed 0.065 ms (6.488 ms / 100) 6.461 -> 6.468 ( +0.11%) [ +0.39% +0.00% +0.12% / +0.19% +0.56% +0.11%] index_select random_sorted : Elapsed 0.065 ms (6.486 ms / 100) B = [5, 500, 200] (stride (200, 1000, 1)) A = [1, 500, 200] (stride (1, 1, 500)) dim = 0 5.074 -> 5.074 ( +0.00%) [ +0.49% +0.06% +0.00% / +0.16% +0.02% +0.00%] index_add_ linear : Elapsed 0.051 ms (5.099 ms / 100) 4.925 -> 4.922 ( -0.06%) [ +0.14% +0.14% +0.00% / +0.08% +0.10% -0.06%] index_copy_ linear : Elapsed 0.049 ms (4.932 ms / 100) 5.073 -> 5.071 ( -0.04%) [ +0.26% +0.26% +0.00% / +0.26% +0.08% -0.04%] index_add_ reverse : Elapsed 0.051 ms (5.086 ms / 100) 4.914 -> 4.927 ( +0.26%) [ +0.49% +0.00% +0.22% / +0.26% +0.35% +0.33%] index_copy_ reverse : Elapsed 0.049 ms (4.938 ms / 100) 5.080 -> 5.077 ( -0.06%) [ +0.00% +0.04% +0.00% / -0.06% -0.02% +0.04%] index_add_ spread : Elapsed 0.051 ms (5.080 ms / 100) 4.930 -> 4.928 ( -0.04%) [ +0.06% +0.08% +0.00% / -0.04% +0.04% +0.00%] index_copy_ spread : Elapsed 0.049 ms (4.933 ms / 100) 5.077 -> 5.077 ( +0.00%) [ +0.28% +0.24% +0.00% / +0.00% +0.08% +0.00%] index_add_ strided 3 : Elapsed 0.051 ms (5.091 ms / 100) 4.918 -> 4.929 ( +0.22%) [ +0.37% +0.47% +0.00% / +0.28% +0.22% +0.47%] index_copy_ strided 3 : Elapsed 0.049 ms (4.936 ms / 100) 5.111 -> 5.103 ( -0.16%) [ +0.04% +0.00% +0.06% / -0.02% -0.16% -0.08%] index_add_ perm : Elapsed 0.051 ms (5.113 ms / 100) 4.920 -> 4.916 ( -0.08%) [ +0.04% +0.04% +0.00% / +0.08% -0.08% +0.16%] index_copy_ perm : Elapsed 0.049 ms (4.922 ms / 100) 5.111 -> 5.108 ( -0.06%) [ +0.20% +0.00% +0.10% / +0.04% -0.06% +0.06%] index_add_ perm_sorted : Elapsed 0.051 ms (5.121 ms / 100) 4.915 -> 4.917 ( +0.04%) [ +0.20% +0.08% +0.00% / +0.31% +0.04% +0.18%] index_copy_ perm_sorted : Elapsed 0.049 ms (4.925 ms / 100) 7.849 -> 7.853 ( +0.05%) [ +0.09% +0.00% +0.15% / +0.05% +0.29% +0.05%] index_select const : Elapsed 0.079 ms (7.856 ms / 100) 7.843 -> 7.852 ( +0.11%) [ +0.10% +0.00% +0.08% / +0.11% +0.14% +0.29%] index_select wrap : Elapsed 0.079 ms (7.851 ms / 100) 7.851 -> 7.844 ( -0.09%) [ +0.06% +0.00% +0.05% / +0.18% -0.09% -0.05%] index_select linear : Elapsed 0.079 ms (7.856 ms / 100) 7.858 -> 7.847 ( -0.14%) [ +0.03% +0.00% +0.09% / +0.09% -0.11% -0.14%] index_select reverse : Elapsed 0.079 ms (7.860 ms / 100) 7.853 -> 7.845 ( -0.10%) [ +0.22% +0.00% +0.00% / +0.51% -0.10% -0.05%] index_select skip64 : Elapsed 0.079 ms (7.870 ms / 100) 7.843 -> 7.844 ( +0.01%) [ +0.22% +0.00% +0.09% / +0.15% +0.01% +0.10%] index_select skip256 : Elapsed 0.079 ms (7.860 ms / 100) 7.844 -> 7.845 ( +0.01%) [ +0.11% +0.00% +0.03% / +0.11% +0.01% +0.13%] index_select spread : Elapsed 0.079 ms (7.853 ms / 100) 7.852 -> 7.849 ( -0.04%) [ +0.09% +0.08% +0.00% / -0.04% -0.04% +0.08%] index_select random : Elapsed 0.079 ms (7.859 ms / 100) 7.846 -> 7.858 ( +0.15%) [ +0.05% +0.00% +0.00% / +0.17% +0.24% +0.15%] index_select random_sorted : Elapsed 0.079 ms (7.850 ms / 100) out_shape = [1, 5, 200] in_shape = [1, 500, 200] idx_dim = 1 B = [1, 5, 200] (stride (5, 1, 5)) A = [1, 500, 200] (stride (1, 200, 1)) dim = 1 0.611 -> 0.615 ( +0.65%) [ +1.15% +0.33% +0.00% / +1.15% +0.65% +0.82%] index_select const : Elapsed 0.006 ms (0.618 ms / 100) 0.613 -> 0.615 ( +0.33%) [ +0.98% +0.16% +0.00% / +0.98% +0.49% +0.33%] index_select wrap : Elapsed 0.006 ms (0.619 ms / 100) 0.613 -> 0.616 ( +0.49%) [ +0.82% +0.00% +0.00% / +0.98% +0.49% +0.65%] index_select linear : Elapsed 0.006 ms (0.618 ms / 100) 0.613 -> 0.616 ( +0.49%) [ +0.82% +0.00% +0.16% / +0.65% +0.49% +0.65%] index_select reverse : Elapsed 0.006 ms (0.618 ms / 100) 0.612 -> 0.615 ( +0.49%) [ +0.98% +0.00% +0.16% / +0.98% +0.65% +0.49%] index_select skip64 : Elapsed 0.006 ms (0.618 ms / 100) 0.611 -> 0.615 ( +0.65%) [ +1.15% +0.00% +0.00% / +0.98% +0.82% +0.65%] index_select skip256 : Elapsed 0.006 ms (0.618 ms / 100) 0.612 -> 0.616 ( +0.65%) [ +0.65% +0.00% +0.00% / +0.65% +1.80% +0.82%] index_select spread : Elapsed 0.006 ms (0.616 ms / 100) 0.613 -> 0.616 ( +0.49%) [ +0.82% +0.00% +0.00% / +0.98% +0.49% +0.49%] index_select strided 3 : Elapsed 0.006 ms (0.618 ms / 100) 0.613 -> 0.616 ( +0.49%) [ +0.98% +0.00% +0.16% / +0.98% +0.49% +0.65%] index_select strided 5 : Elapsed 0.006 ms (0.619 ms / 100) 0.612 -> 0.616 ( +0.65%) [ +0.98% +0.00% +0.16% / +0.82% +0.65% +0.65%] index_select strided 7 : Elapsed 0.006 ms (0.618 ms / 100) 0.612 -> 0.616 ( +0.65%) [ +1.14% +0.49% +0.00% / +0.82% +0.65% +0.65%] index_select strided 8 : Elapsed 0.006 ms (0.619 ms / 100) 0.612 -> 0.618 ( +0.98%) [ +0.98% +0.00% +0.00% / +0.98% +1.63% +0.98%] index_select strided 16 : Elapsed 0.006 ms (0.618 ms / 100) 0.612 -> 0.616 ( +0.65%) [ +0.98% +0.00% +0.16% / +0.82% +0.65% +0.65%] index_select strided 64 : Elapsed 0.006 ms (0.618 ms / 100) 0.612 -> 0.616 ( +0.65%) [ +2.12% +0.00% +3.76% / +0.98% +0.65% +0.82%] index_select strided 100 : Elapsed 0.006 ms (0.625 ms / 100) 0.612 -> 0.616 ( +0.65%) [ +0.98% +0.16% +0.00% / +0.98% +0.65% +0.98%] index_select strided 255 : Elapsed 0.006 ms (0.618 ms / 100) 0.610 -> 0.615 ( +0.82%) [ +0.98% +0.16% +0.00% / +0.82% +0.98% +1.31%] index_select strided 256 : Elapsed 0.006 ms (0.616 ms / 100) 0.611 -> 0.618 ( +1.15%) [ +0.82% +0.16% +0.00% / +1.15% +1.15% +1.15%] index_select strided 257 : Elapsed 0.006 ms (0.616 ms / 100) 0.612 -> 0.616 ( +0.65%) [ +0.65% +0.16% +0.00% / +0.98% +0.65% +0.82%] index_select random : Elapsed 0.006 ms (0.616 ms / 100) 0.611 -> 0.616 ( +0.82%) [ +0.82% +0.16% +0.00% / +0.98% +0.82% +0.98%] index_select random_sorted : Elapsed 0.006 ms (0.616 ms / 100) 0.612 -> 0.616 ( +0.65%) [ +0.82% +0.00% +0.00% / +0.82% +0.65% +0.65%] index_select perm : Elapsed 0.006 ms (0.617 ms / 100) 0.612 -> 0.615 ( +0.49%) [ +0.82% +0.00% +0.00% / +0.82% +0.82% +0.49%] index_select perm_sorted : Elapsed 0.006 ms (0.617 ms / 100) B = [1, 5, 200] (stride (1, 1, 5)) A = [1, 500, 200] (stride (1, 200, 1)) dim = 1 0.609 -> 0.614 ( +0.82%) [ +0.82% +4.43% +0.00% / +0.99% +1.15% +0.82%] index_select const : Elapsed 0.006 ms (0.614 ms / 100) 0.610 -> 0.616 ( +0.98%) [ +0.98% +0.00% +0.00% / +0.98% +1.80% +1.15%] index_select wrap : Elapsed 0.006 ms (0.616 ms / 100) 0.610 -> 0.617 ( +1.15%) [ +1.15% +0.00% +0.16% / +1.15% +1.15% +1.15%] index_select linear : Elapsed 0.006 ms (0.617 ms / 100) 0.610 -> 0.616 ( +0.98%) [ +1.15% +0.16% +0.00% / +0.98% +1.31% +1.64%] index_select reverse : Elapsed 0.006 ms (0.617 ms / 100) 0.609 -> 0.615 ( +0.99%) [ +1.15% +0.33% +0.00% / +0.99% +0.99% +1.15%] index_select skip64 : Elapsed 0.006 ms (0.616 ms / 100) 0.610 -> 0.615 ( +0.82%) [ +0.98% +0.00% +0.00% / +0.82% +0.98% +2.95%] index_select skip256 : Elapsed 0.006 ms (0.616 ms / 100) 0.610 -> 0.615 ( +0.82%) [ +0.82% +0.16% +0.00% / +0.82% +1.31% +1.31%] index_select spread : Elapsed 0.006 ms (0.615 ms / 100) 0.610 -> 0.618 ( +1.31%) [ +0.98% +0.00% +0.33% / +5.74% +1.31% +1.31%] index_select strided 3 : Elapsed 0.006 ms (0.616 ms / 100) 0.611 -> 0.618 ( +1.15%) [ +0.82% +0.00% +0.00% / +1.15% +1.15% +1.31%] index_select strided 5 : Elapsed 0.006 ms (0.616 ms / 100) 0.609 -> 0.616 ( +1.15%) [ +0.99% +0.00% +0.00% / +1.15% +1.64% +1.81%] index_select strided 7 : Elapsed 0.006 ms (0.615 ms / 100) 0.610 -> 0.615 ( +0.82%) [ +0.98% +0.16% +0.00% / +0.82% +0.98% +2.30%] index_select strided 8 : Elapsed 0.006 ms (0.616 ms / 100) 0.610 -> 0.616 ( +0.98%) [ +0.98% +0.00% +0.00% / +4.59% +0.98% +1.15%] index_select strided 16 : Elapsed 0.006 ms (0.616 ms / 100) 0.609 -> 0.617 ( +1.31%) [ +1.15% +0.16% +0.00% / +1.31% +1.48% +1.48%] index_select strided 64 : Elapsed 0.006 ms (0.616 ms / 100) 0.610 -> 0.616 ( +0.98%) [ +0.82% +0.00% +0.00% / +0.98% +1.31% +1.31%] index_select strided 100 : Elapsed 0.006 ms (0.615 ms / 100) 0.610 -> 0.616 ( +0.98%) [ +1.15% +0.16% +0.00% / +1.15% +1.15% +0.98%] index_select strided 255 : Elapsed 0.006 ms (0.617 ms / 100) 0.610 -> 0.616 ( +0.98%) [ +0.98% +0.00% +0.16% / +1.15% +0.98% +0.98%] index_select strided 256 : Elapsed 0.006 ms (0.616 ms / 100) 0.611 -> 0.616 ( +0.82%) [ +0.98% +0.16% +0.00% / +0.98% +0.98% +0.82%] index_select strided 257 : Elapsed 0.006 ms (0.617 ms / 100) 0.612 -> 0.617 ( +0.82%) [ +0.65% +0.00% +0.00% / +0.82% +0.82% +0.82%] index_select random : Elapsed 0.006 ms (0.616 ms / 100) 0.612 -> 0.615 ( +0.49%) [ +0.82% +0.00% +0.16% / +0.82% +0.65% +0.49%] index_select random_sorted : Elapsed 0.006 ms (0.617 ms / 100) 0.610 -> 0.616 ( +0.98%) [ +0.98% +0.16% +0.00% / +0.98% +1.15% +0.98%] index_select perm : Elapsed 0.006 ms (0.616 ms / 100) 0.611 -> 0.617 ( +0.98%) [ +0.82% +0.33% +0.00% / +0.98% +0.98% +0.98%] index_select perm_sorted : Elapsed 0.006 ms (0.616 ms / 100) out_shape = [1, 500, 5] in_shape = [1, 500, 200] idx_dim = 2 B = [1, 500, 5] (stride (2500, 1, 500)) A = [1, 500, 200] (stride (200, 200, 1)) dim = 2 0.600 -> 0.602 ( +0.33%) [ +1.33% +0.00% +0.33% / +0.33% +0.83% +0.83%] index_select const : Elapsed 0.006 ms (0.608 ms / 100) 0.599 -> 0.600 ( +0.17%) [ +0.50% +0.17% +0.00% / +0.50% +0.17% +0.17%] index_select wrap : Elapsed 0.006 ms (0.602 ms / 100) 0.603 -> 0.606 ( +0.50%) [ +0.83% +0.00% +0.00% / +1.00% +0.50% +0.50%] index_select linear : Elapsed 0.006 ms (0.608 ms / 100) 0.603 -> 0.606 ( +0.50%) [ +0.83% +0.00% +1.16% / +1.66% +0.66% +0.50%] index_select reverse : Elapsed 0.006 ms (0.608 ms / 100) 0.599 -> 0.600 ( +0.17%) [ +0.50% +0.17% +0.00% / +0.50% +0.17% +0.17%] index_select skip64 : Elapsed 0.006 ms (0.602 ms / 100) 0.599 -> 0.600 ( +0.17%) [ +1.17% +0.00% +0.50% / +1.34% +1.34% +0.17%] index_select skip256 : Elapsed 0.006 ms (0.606 ms / 100) 0.599 -> 0.601 ( +0.33%) [ +0.67% +0.00% +0.17% / +4.34% +0.33% +0.33%] index_select spread : Elapsed 0.006 ms (0.603 ms / 100) 0.598 -> 0.600 ( +0.33%) [ +0.50% +0.00% +0.17% / +0.50% +0.33% +0.50%] index_select strided 3 : Elapsed 0.006 ms (0.601 ms / 100) 0.602 -> 0.607 ( +0.83%) [ +1.00% +0.00% +0.00% / +1.00% +1.00% +0.83%] index_select strided 5 : Elapsed 0.006 ms (0.608 ms / 100) 0.600 -> 0.600 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.33% +0.33%] index_select strided 7 : Elapsed 0.006 ms (0.601 ms / 100) 0.600 -> 0.606 ( +1.00%) [ +1.00% +0.00% +0.17% / +1.00% +1.33% +1.00%] index_select strided 8 : Elapsed 0.006 ms (0.606 ms / 100) 0.600 -> 0.605 ( +0.83%) [ +1.00% +0.00% +0.17% / +1.00% +1.17% +0.83%] index_select strided 16 : Elapsed 0.006 ms (0.606 ms / 100) 0.589 -> 0.593 ( +0.68%) [ +1.53% +0.17% +0.00% / +0.68% +2.21% +1.53%] index_select strided 64 : Elapsed 0.006 ms (0.598 ms / 100) 0.599 -> 0.601 ( +0.33%) [ +0.17% +0.00% +0.00% / +0.33% +2.84% +0.33%] index_select strided 100 : Elapsed 0.006 ms (0.600 ms / 100) 0.599 -> 0.600 ( +0.17%) [ +0.17% +0.00% +0.00% / +0.17% +0.17% +0.33%] index_select random : Elapsed 0.006 ms (0.600 ms / 100) 0.597 -> 0.601 ( +0.67%) [ +0.50% +0.00% +0.00% / +0.67% +0.84% +0.67%] index_select random_sorted : Elapsed 0.006 ms (0.600 ms / 100) 0.601 -> 0.602 ( +0.17%) [ +0.67% +0.17% +0.00% / +0.17% +0.50% +2.16%] index_select perm : Elapsed 0.006 ms (0.605 ms / 100) 0.593 -> 0.600 ( +1.18%) [ +1.35% +0.00% +0.00% / +1.18% +1.52% +1.35%] index_select perm_sorted : Elapsed 0.006 ms (0.601 ms / 100) B = [1, 500, 5] (stride (2500, 1, 500)) A = [1, 500, 200] (stride (1, 1, 500)) dim = 2 0.556 -> 0.561 ( +0.90%) [ +0.36% +0.00% +0.00% / +0.90% +0.90% +0.90%] index_select const : Elapsed 0.006 ms (0.558 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.18% +0.00% / +0.72% +1.08% +1.26%] index_select wrap : Elapsed 0.006 ms (0.560 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +2.52% +0.00% +7.19% / +0.54% +1.08% +10.97%] index_select linear : Elapsed 0.006 ms (0.570 ms / 100) 0.553 -> 0.559 ( +1.08%) [ +0.90% +0.54% +0.00% / +1.08% +1.63% +1.63%] index_select reverse : Elapsed 0.006 ms (0.558 ms / 100) 0.556 -> 0.561 ( +0.90%) [ +0.72% +0.18% +0.00% / +1.44% +0.90% +0.90%] index_select skip64 : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +0.90% +0.00% +0.00% / +2.16% +2.16% +1.08%] index_select skip256 : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +0.72% +1.98% +0.00% / +1.26% +1.08% +1.08%] index_select spread : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.560 ( +1.08%) [ +0.90% +0.36% +0.00% / +1.08% +1.26% +1.44%] index_select strided 3 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.54% +0.00% / +0.72% +1.08% +1.08%] index_select strided 5 : Elapsed 0.006 ms (0.560 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.72% +0.00% +0.00% / +0.54% +1.08% +0.90%] index_select strided 7 : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.563 ( +1.44%) [ +1.26% +0.18% +0.00% / +1.80% +1.44% +1.44%] index_select strided 8 : Elapsed 0.006 ms (0.562 ms / 100) 0.555 -> 0.562 ( +1.26%) [ +1.26% +0.18% +0.00% / +1.26% +1.26% +6.85%] index_select strided 16 : Elapsed 0.006 ms (0.562 ms / 100) 0.556 -> 0.561 ( +0.90%) [ +1.08% +0.00% +0.00% / +0.90% +0.90% +1.08%] index_select strided 64 : Elapsed 0.006 ms (0.562 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.08% +0.00% +0.00% / +0.90% +1.26% +1.08%] index_select strided 100 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.18% +0.00% / +0.72% +1.08% +4.32%] index_select random : Elapsed 0.006 ms (0.560 ms / 100) 0.556 -> 0.562 ( +1.08%) [ +0.90% +0.00% +0.00% / +1.08% +1.26% +1.08%] index_select random_sorted : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +0.90% +0.00% +0.18% / +0.90% +1.08% +1.08%] index_select perm : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +1.26% +1.26%] index_select perm_sorted : Elapsed 0.006 ms (0.559 ms / 100) B = [1, 500, 5] (stride (1, 5, 1)) A = [1, 500, 200] (stride (100000, 1, 500)) dim = 2 0.561 -> 0.566 ( +0.89%) [ +1.07% +0.00% +5.70% / +0.89% +1.07% +1.07%] index_select const : Elapsed 0.006 ms (0.567 ms / 100) 0.562 -> 0.566 ( +0.71%) [ +0.71% +0.00% +0.00% / +0.71% +1.07% +1.07%] index_select wrap : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.567 ( +1.07%) [ +0.89% +0.18% +0.00% / +1.07% +1.25% +1.07%] index_select linear : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +0.89% +0.18% +0.00% / +4.98% +0.89% +0.89%] index_select reverse : Elapsed 0.006 ms (0.567 ms / 100) 0.561 -> 0.566 ( +0.89%) [ +0.89% +0.18% +0.00% / +0.89% +1.25% +1.07%] index_select skip64 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +1.07% +0.00% +0.00% / +0.71% +1.07% +1.07%] index_select skip256 : Elapsed 0.006 ms (0.567 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +0.89% +0.00% +0.18% / +0.89% +1.25% +0.89%] index_select spread : Elapsed 0.006 ms (0.567 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +1.07% +0.00% +0.00% / +0.89% +1.07% +0.89%] index_select strided 3 : Elapsed 0.006 ms (0.568 ms / 100) 0.561 -> 0.567 ( +1.07%) [ +0.89% +0.00% +0.00% / +1.07% +1.43% +1.78%] index_select strided 5 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.566 ( +0.89%) [ +1.07% +0.18% +0.00% / +0.89% +2.14% +1.43%] index_select strided 7 : Elapsed 0.006 ms (0.567 ms / 100) 0.562 -> 0.566 ( +0.71%) [ +0.71% +0.00% +0.00% / +0.71% +5.52% +1.25%] index_select strided 8 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.566 ( +0.89%) [ +1.60% +0.18% +0.00% / +0.89% +1.25% +1.25%] index_select strided 16 : Elapsed 0.006 ms (0.570 ms / 100) 0.562 -> 0.567 ( +0.89%) [ +0.71% +0.00% +0.00% / +0.89% +0.89% +1.78%] index_select strided 64 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.566 ( +0.89%) [ +1.07% +0.00% +0.00% / +0.89% +1.25% +10.52%] index_select strided 100 : Elapsed 0.006 ms (0.567 ms / 100) 0.561 -> 0.567 ( +1.07%) [ +1.07% +0.00% +0.00% / +1.07% +1.25% +1.25%] index_select random : Elapsed 0.006 ms (0.567 ms / 100) 0.561 -> 0.566 ( +0.89%) [ +0.89% +0.18% +0.00% / +0.89% +1.43% +1.25%] index_select random_sorted : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.566 ( +0.71%) [ +1.25% +0.36% +0.00% / +1.25% +0.89% +0.71%] index_select perm : Elapsed 0.006 ms (0.569 ms / 100) 0.565 -> 0.566 ( +0.18%) [ +0.71% +0.00% +0.00% / +0.71% +0.18% +0.35%] index_select perm_sorted : Elapsed 0.006 ms (0.569 ms / 100) B = [1, 500, 5] (stride (1, 5, 1)) A = [1, 500, 200] (stride (1, 200, 1)) dim = 2 0.607 -> 0.612 ( +0.82%) [ +1.48% +0.16% +0.00% / +0.82% +0.82% +0.82%] index_select const : Elapsed 0.006 ms (0.616 ms / 100) 0.607 -> 0.609 ( +0.33%) [ +0.82% +0.16% +0.00% / +0.82% +0.82% +0.33%] index_select wrap : Elapsed 0.006 ms (0.612 ms / 100) 0.608 -> 0.611 ( +0.49%) [ +0.66% +0.16% +0.00% / +0.66% +0.66% +0.49%] index_select linear : Elapsed 0.006 ms (0.612 ms / 100) 0.609 -> 0.612 ( +0.49%) [ +0.66% +0.00% +0.00% / +1.48% +0.49% +0.49%] index_select reverse : Elapsed 0.006 ms (0.613 ms / 100) 0.607 -> 0.610 ( +0.49%) [ +0.99% +0.00% +0.00% / +0.82% +0.66% +0.49%] index_select skip64 : Elapsed 0.006 ms (0.613 ms / 100) 0.607 -> 0.612 ( +0.82%) [ +0.99% +1.48% +0.00% / +0.82% +0.82% +0.82%] index_select skip256 : Elapsed 0.006 ms (0.613 ms / 100) 0.600 -> 0.604 ( +0.67%) [ +0.83% +0.17% +0.00% / +0.83% +0.67% +0.67%] index_select spread : Elapsed 0.006 ms (0.605 ms / 100) 0.607 -> 0.610 ( +0.49%) [ +0.99% +0.33% +0.00% / +0.82% +0.82% +0.49%] index_select strided 3 : Elapsed 0.006 ms (0.613 ms / 100) 0.607 -> 0.610 ( +0.49%) [ +0.82% +0.00% +0.33% / +0.66% +0.49% +0.49%] index_select strided 5 : Elapsed 0.006 ms (0.612 ms / 100) 0.608 -> 0.611 ( +0.49%) [ +0.99% +0.00% +0.33% / +1.48% +0.66% +0.49%] index_select strided 7 : Elapsed 0.006 ms (0.614 ms / 100) 0.607 -> 0.609 ( +0.33%) [ +0.66% +0.16% +0.00% / +0.66% +0.49% +0.33%] index_select strided 8 : Elapsed 0.006 ms (0.611 ms / 100) 0.606 -> 0.609 ( +0.50%) [ +0.99% +0.00% +0.17% / +0.83% +0.50% +0.83%] index_select strided 16 : Elapsed 0.006 ms (0.612 ms / 100) 0.600 -> 0.604 ( +0.67%) [ +1.00% +0.33% +0.00% / +1.00% +0.67% +0.83%] index_select strided 64 : Elapsed 0.006 ms (0.606 ms / 100) 0.599 -> 0.602 ( +0.50%) [ +1.17% +0.33% +0.00% / +1.17% +0.67% +0.50%] index_select strided 100 : Elapsed 0.006 ms (0.606 ms / 100) 0.599 -> 0.602 ( +0.50%) [ +1.00% +0.00% +0.33% / +0.83% +0.67% +0.50%] index_select random : Elapsed 0.006 ms (0.605 ms / 100) 0.605 -> 0.606 ( +0.17%) [ +0.50% +0.00% +0.00% / +0.66% +1.16% +0.17%] index_select random_sorted : Elapsed 0.006 ms (0.608 ms / 100) 0.606 -> 0.608 ( +0.33%) [ +0.50% +0.00% +0.17% / +0.83% +0.33% +1.32%] index_select perm : Elapsed 0.006 ms (0.609 ms / 100) 0.596 -> 0.598 ( +0.34%) [ +1.01% +0.00% +2.68% / +0.50% +0.67% +0.34%] index_select perm_sorted : Elapsed 0.006 ms (0.602 ms / 100) B = [1, 500, 5] (stride (1, 1, 500)) dim = 2 fill_cnt = 200 1.847 -> 1.857 ( +0.54%) [ +0.97% +0.00% +0.05% / +0.76% +0.54% +0.60%] index_fill_ const : Elapsed 0.019 ms (1.865 ms / 100) 1.841 -> 1.856 ( +0.81%) [ +1.14% +0.05% +0.00% / +0.81% +1.14% +1.14%] index_fill_ linear : Elapsed 0.019 ms (1.862 ms / 100) 1.848 -> 1.853 ( +0.27%) [ +1.14% +0.11% +0.00% / +0.92% +0.27% +0.27%] index_fill_ reverse : Elapsed 0.019 ms (1.869 ms / 100) 1.843 -> 1.857 ( +0.76%) [ +1.03% +0.00% +0.00% / +0.76% +0.98% +0.98%] index_fill_ skip64 : Elapsed 0.019 ms (1.862 ms / 100) 1.840 -> 1.854 ( +0.76%) [ +1.20% +0.11% +0.00% / +0.76% +1.20% +1.14%] index_fill_ skip256 : Elapsed 0.019 ms (1.862 ms / 100) 1.850 -> 1.854 ( +0.22%) [ +1.08% +0.05% +0.00% / +0.81% +0.27% +0.22%] index_fill_ spread : Elapsed 0.019 ms (1.870 ms / 100) 1.844 -> 1.857 ( +0.70%) [ +1.08% +0.11% +0.00% / +0.87% +0.70% +0.70%] index_fill_ strided 3 : Elapsed 0.019 ms (1.864 ms / 100) 1.849 -> 1.857 ( +0.43%) [ +1.08% +0.00% +0.22% / +0.43% +0.65% +0.92%] index_fill_ random : Elapsed 0.019 ms (1.869 ms / 100) 1.842 -> 1.857 ( +0.81%) [ +1.09% +0.00% +0.00% / +0.81% +1.03% +1.09%] index_fill_ random_sorted : Elapsed 0.019 ms (1.862 ms / 100) out_shape = [5, 1, 500] in_shape = [200, 1, 500] idx_dim = 0 B = [5, 1, 500] (stride (500, 500, 1)) A = [200, 1, 500] (stride (500, 100000, 1)) dim = 0 0.556 -> 0.558 ( +0.36%) [ +0.90% +0.18% +0.00% / +0.90% +0.54% +0.36%] index_select const : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +2.70% +0.18% +0.00% / +1.08% +0.54% +0.72%] index_select wrap : Elapsed 0.006 ms (0.570 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +1.08% +0.00% +8.27% / +0.90% +0.54% +0.54%] index_select linear : Elapsed 0.006 ms (0.562 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +7.19% / +0.90% +0.90% +0.54%] index_select reverse : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.08% +0.36% +0.00% / +2.70% +3.24% +0.90%] index_select skip64 : Elapsed 0.006 ms (0.561 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +1.26% +0.36% +0.00% / +1.26% +0.72% +1.81%] index_select skip256 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.08% +0.18% +0.00% / +1.08% +0.90% +2.16%] index_select spread : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +0.00% / +0.72% +0.54% +0.54%] index_select strided 3 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +1.08% +0.54% +0.00% / +0.90% +0.72% +0.72%] index_select strided 5 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +3.60% +0.00% +0.00% / +0.54% +0.90% +0.54%] index_select strided 7 : Elapsed 0.006 ms (0.576 ms / 100) 0.556 -> 0.560 ( +0.72%) [ +5.76% +0.00% +0.00% / +1.26% +0.72% +0.72%] index_select strided 8 : Elapsed 0.006 ms (0.588 ms / 100) 0.554 -> 0.561 ( +1.26%) [ +0.90% +0.36% +0.00% / +3.43% +1.44% +1.26%] index_select strided 16 : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.90% +0.00% +3.97% / +0.72% +1.26% +4.33%] index_select strided 64 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.08% +0.00% +1.44% / +0.90% +1.08% +1.08%] index_select strided 100 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.54% +0.90% +7.03%] index_select random : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +1.08% +0.00% +7.04% / +8.48% +0.90% +1.08%] index_select random_sorted : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.00% +0.18% / +0.90% +0.90% +0.72%] index_select perm : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.18% +0.00% / +6.85% +0.90% +0.72%] index_select perm_sorted : Elapsed 0.006 ms (0.560 ms / 100) B = [5, 1, 500] (stride (500, 500, 1)) A = [200, 1, 500] (stride (1, 200, 200)) dim = 0 0.599 -> 0.605 ( +1.00%) [ +1.00% +0.17% +0.00% / +1.00% +1.34% +3.84%] index_select const : Elapsed 0.006 ms (0.605 ms / 100) 0.599 -> 0.605 ( +1.00%) [ +1.00% +0.33% +0.00% / +1.00% +1.50% +1.34%] index_select wrap : Elapsed 0.006 ms (0.605 ms / 100) 0.599 -> 0.604 ( +0.83%) [ +0.83% +0.17% +0.00% / +0.83% +1.34% +1.17%] index_select linear : Elapsed 0.006 ms (0.604 ms / 100) 0.600 -> 0.604 ( +0.67%) [ +1.00% +0.00% +0.00% / +0.67% +1.50% +1.17%] index_select reverse : Elapsed 0.006 ms (0.606 ms / 100) 0.599 -> 0.604 ( +0.83%) [ +0.83% +0.00% +0.00% / +0.83% +3.17% +1.17%] index_select skip64 : Elapsed 0.006 ms (0.604 ms / 100) 0.599 -> 0.605 ( +1.00%) [ +1.00% +0.17% +0.00% / +1.00% +1.17% +1.34%] index_select skip256 : Elapsed 0.006 ms (0.605 ms / 100) 0.593 -> 0.600 ( +1.18%) [ +0.84% +0.17% +0.00% / +1.18% +1.35% +6.75%] index_select spread : Elapsed 0.006 ms (0.598 ms / 100) 0.601 -> 0.605 ( +0.67%) [ +0.67% +1.00% +0.00% / +0.67% +1.33% +0.83%] index_select strided 3 : Elapsed 0.006 ms (0.605 ms / 100) 0.600 -> 0.604 ( +0.67%) [ +0.67% +0.50% +0.00% / +0.67% +0.83% +0.83%] index_select strided 5 : Elapsed 0.006 ms (0.604 ms / 100) 0.599 -> 0.604 ( +0.83%) [ +1.00% +0.00% +0.00% / +0.83% +1.17% +1.34%] index_select strided 7 : Elapsed 0.006 ms (0.605 ms / 100) 0.601 -> 0.606 ( +0.83%) [ +0.83% +0.00% +0.17% / +0.83% +1.00% +0.83%] index_select strided 8 : Elapsed 0.006 ms (0.606 ms / 100) 0.598 -> 0.600 ( +0.33%) [ +0.50% +0.50% +0.00% / +0.33% +0.33% +0.67%] index_select strided 16 : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.601 ( +0.33%) [ +0.67% +0.00% +11.02% / +0.50% +0.50% +0.33%] index_select strided 64 : Elapsed 0.006 ms (0.603 ms / 100) 0.592 -> 0.595 ( +0.51%) [ +1.52% +0.00% +3.55% / +2.03% +1.35% +0.51%] index_select strided 100 : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.601 ( +0.33%) [ +0.33% +0.00% +2.67% / +0.83% +0.33% +0.33%] index_select random : Elapsed 0.006 ms (0.601 ms / 100) 0.597 -> 0.600 ( +0.50%) [ +0.84% +0.00% +0.17% / +4.19% +0.50% +0.67%] index_select random_sorted : Elapsed 0.006 ms (0.602 ms / 100) 0.591 -> 0.599 ( +1.35%) [ +1.18% +0.00% +0.00% / +1.52% +1.52% +1.35%] index_select perm : Elapsed 0.006 ms (0.598 ms / 100) 0.591 -> 0.599 ( +1.35%) [ +1.69% +0.51% +0.00% / +1.69% +1.35% +1.69%] index_select perm_sorted : Elapsed 0.006 ms (0.601 ms / 100) B = [5, 1, 500] (stride (500, 1, 1)) A = [200, 1, 500] (stride (500, 100000, 1)) dim = 0 0.554 -> 0.558 ( +0.72%) [ +1.26% +0.00% +0.00% / +0.72% +1.08% +0.72%] index_select const : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.54% +0.00% / +0.72% +1.08% +0.72%] index_select wrap : Elapsed 0.006 ms (0.560 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.72% +0.00% +0.00% / +0.54% +6.83% +0.90%] index_select linear : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.08% +0.36% +0.00% / +0.90% +1.08% +1.08%] index_select reverse : Elapsed 0.006 ms (0.561 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +1.08% +0.18% +0.00% / +0.72% +1.26% +11.19%] index_select skip64 : Elapsed 0.006 ms (0.560 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.90% +0.00% +0.18% / +0.72% +1.26% +1.62%] index_select skip256 : Elapsed 0.006 ms (0.559 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +0.54% +0.00% +0.00% / +0.36% +0.90% +0.72%] index_select spread : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +2.53% +0.36% +0.00% / +0.72% +1.26% +0.90%] index_select strided 3 : Elapsed 0.006 ms (0.568 ms / 100) 0.553 -> 0.558 ( +0.90%) [ +0.90% +0.36% +0.00% / +0.90% +1.45% +1.45%] index_select strided 5 : Elapsed 0.006 ms (0.558 ms / 100) 0.553 -> 0.558 ( +0.90%) [ +1.08% +0.36% +0.00% / +0.90% +1.45% +1.63%] index_select strided 7 : Elapsed 0.006 ms (0.559 ms / 100) 0.553 -> 0.558 ( +0.90%) [ +1.08% +0.00% +0.36% / +0.90% +1.27% +1.45%] index_select strided 8 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +0.72% +0.00% +15.68% / +0.54% +1.08% +1.08%] index_select strided 16 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.72% +0.00% +26.31% / +0.72% +0.90% +1.08%] index_select strided 64 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.54% +0.00% +0.72% / +0.72% +1.08% +1.08%] index_select strided 100 : Elapsed 0.006 ms (0.558 ms / 100) 0.554 -> 0.561 ( +1.26%) [ +0.72% +0.36% +0.00% / +1.26% +1.44% +1.26%] index_select random : Elapsed 0.006 ms (0.558 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.72% +1.81% +0.00% / +0.72% +1.26% +1.62%] index_select random_sorted : Elapsed 0.006 ms (0.558 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +0.00% / +1.08% +0.54% +0.54%] index_select perm : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +1.62% +0.18% +0.00% / +1.08% +0.72% +0.54%] index_select perm_sorted : Elapsed 0.006 ms (0.564 ms / 100) B = [5, 1, 500] (stride (500, 2500, 1)) A = [200, 1, 500] (stride (500, 1, 1)) dim = 0 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.00% +0.00% / +0.90% +1.08% +0.72%] index_select const : Elapsed 0.006 ms (0.560 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +1.08% +1.80% +0.00% / +0.90% +0.54% +0.54%] index_select wrap : Elapsed 0.006 ms (0.562 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +0.54% / +0.90% +3.60% +0.54%] index_select linear : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +1.08% +0.18% +0.00% / +0.90% +0.90% +0.54%] index_select reverse : Elapsed 0.006 ms (0.562 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +0.90% +0.18% +0.00% / +1.08% +1.08% +4.68%] index_select skip64 : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.08% +0.00% +0.00% / +0.90% +0.90% +1.08%] index_select skip256 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +0.90% +0.00% +0.00% / +0.90% +0.90% +0.36%] index_select spread : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.560 ( +0.72%) [ +1.08% +0.00% +0.00% / +0.90% +0.72% +0.72%] index_select strided 3 : Elapsed 0.006 ms (0.562 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +1.26% +0.90% +0.00% / +1.08% +0.72% +0.72%] index_select strided 5 : Elapsed 0.006 ms (0.562 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +10.07% / +0.90% +0.54% +0.54%] index_select strided 7 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +13.67% / +0.90% +0.54% +0.54%] index_select strided 8 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +4.14% / +0.90% +0.72% +0.54%] index_select strided 16 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.561 ( +0.90%) [ +1.26% +0.00% +6.65% / +0.90% +1.08% +0.90%] index_select strided 64 : Elapsed 0.006 ms (0.563 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +1.08% +0.00% +6.67% / +1.08% +1.08% +6.67%] index_select strided 100 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +1.44% +0.00% +0.00% / +0.90% +0.72% +0.54%] index_select random : Elapsed 0.006 ms (0.564 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +1.80% +0.00% / +1.08% +0.72% +0.54%] index_select random_sorted : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +3.24% +0.54% +0.00% / +1.08% +0.36% +0.54%] index_select perm : Elapsed 0.006 ms (0.574 ms / 100) 0.556 -> 0.560 ( +0.72%) [ +1.08% +0.36% +0.00% / +1.08% +0.90% +0.72%] index_select perm_sorted : Elapsed 0.006 ms (0.562 ms / 100) B = [5, 1, 500] (stride (1, 2500, 5)) dim = 0 fill_cnt = 200 1.848 -> 1.865 ( +0.92%) [ +1.03% +0.00% +0.11% / +0.97% +0.92% +0.92%] index_fill_ const : Elapsed 0.019 ms (1.867 ms / 100) 1.842 -> 1.869 ( +1.47%) [ +1.09% +0.05% +0.00% / +1.47% +1.57% +1.57%] index_fill_ linear : Elapsed 0.019 ms (1.862 ms / 100) 1.849 -> 1.864 ( +0.81%) [ +1.14% +0.11% +0.00% / +1.30% +0.81% +0.87%] index_fill_ reverse : Elapsed 0.019 ms (1.870 ms / 100) 1.852 -> 1.870 ( +0.97%) [ +0.92% +0.00% +0.05% / +0.97% +1.19% +1.13%] index_fill_ skip64 : Elapsed 0.019 ms (1.869 ms / 100) 1.845 -> 1.868 ( +1.25%) [ +0.98% +0.00% +0.16% / +1.25% +1.52% +1.52%] index_fill_ skip256 : Elapsed 0.019 ms (1.863 ms / 100) 1.851 -> 1.865 ( +0.76%) [ +1.03% +0.05% +0.00% / +1.30% +0.76% +0.76%] index_fill_ spread : Elapsed 0.019 ms (1.870 ms / 100) 1.851 -> 1.862 ( +0.59%) [ +1.30% +0.11% +0.00% / +0.59% +0.86% +0.86%] index_fill_ strided 3 : Elapsed 0.019 ms (1.875 ms / 100) 1.851 -> 1.869 ( +0.97%) [ +0.97% +0.05% +0.00% / +1.24% +1.03% +0.97%] index_fill_ random : Elapsed 0.019 ms (1.869 ms / 100) 1.844 -> 1.862 ( +0.98%) [ +1.08% +0.16% +0.00% / +0.98% +1.14% +1.14%] index_fill_ random_sorted : Elapsed 0.019 ms (1.864 ms / 100) out_shape = [200, 5, 500] in_shape = [200, 1, 500] idx_dim = 1 B = [200, 5, 500] (stride (500, 100000, 1)) A = [200, 1, 500] (stride (500, 100000, 1)) dim = 1 5.026 -> 5.032 ( +0.12%) [ +0.00% +0.16% +0.24% / +0.12% +0.14% +0.12%] index_add_ linear : Elapsed 0.050 ms (5.026 ms / 100) 4.893 -> 4.904 ( +0.22%) [ +0.00% +0.22% +0.33% / +0.22% +0.33% +0.37%] index_copy_ linear : Elapsed 0.049 ms (4.893 ms / 100) 5.021 -> 5.012 ( -0.18%) [ +0.16% +0.24% +0.00% / +0.26% +0.24% -0.18%] index_add_ reverse : Elapsed 0.050 ms (5.029 ms / 100) 4.885 -> 4.901 ( +0.33%) [ +0.00% +0.33% +0.14% / +0.39% +0.33% +0.33%] index_copy_ reverse : Elapsed 0.049 ms (4.885 ms / 100) 5.030 -> 5.021 ( -0.18%) [ +0.00% +0.00% +0.08% / +0.06% -0.18% -0.10%] index_add_ spread : Elapsed 0.050 ms (5.030 ms / 100) 4.883 -> 4.902 ( +0.39%) [ +0.14% +0.00% +0.55% / +0.39% +0.57% +0.41%] index_copy_ spread : Elapsed 0.049 ms (4.890 ms / 100) 5.026 -> 5.030 ( +0.08%) [ +0.10% +0.00% +0.00% / +0.24% +0.22% +0.08%] index_add_ strided 3 : Elapsed 0.050 ms (5.031 ms / 100) 4.885 -> 4.887 ( +0.04%) [ +0.02% +0.00% +0.25% / +0.04% +0.37% +0.31%] index_copy_ strided 3 : Elapsed 0.049 ms (4.886 ms / 100) 5.026 -> 5.015 ( -0.22%) [ +0.00% +0.10% +0.28% / -0.22% -0.14% -0.06%] index_add_ perm : Elapsed 0.050 ms (5.026 ms / 100) 4.895 -> 4.887 ( -0.16%) [ +0.18% +0.29% +0.00% / -0.06% -0.16% +0.04%] index_copy_ perm : Elapsed 0.049 ms (4.904 ms / 100) 5.014 -> 5.014 ( +0.00%) [ +0.00% +0.08% +0.16% / +0.00% +0.14% +0.38%] index_add_ perm_sorted : Elapsed 0.050 ms (5.014 ms / 100) 4.885 -> 4.890 ( +0.10%) [ +0.06% +0.00% +0.27% / +0.23% +0.10% +0.33%] index_copy_ perm_sorted : Elapsed 0.049 ms (4.888 ms / 100) 7.629 -> 7.627 ( -0.03%) [ +0.31% +0.20% +0.00% / -0.03% +0.09% +0.29%] index_select const : Elapsed 0.077 ms (7.653 ms / 100) 7.627 -> 7.610 ( -0.22%) [ +0.00% +0.07% +0.13% / +0.29% -0.22% -0.13%] index_select wrap : Elapsed 0.076 ms (7.627 ms / 100) 7.628 -> 7.630 ( +0.03%) [ +0.12% +0.00% +0.25% / +0.09% +0.03% +0.03%] index_select linear : Elapsed 0.076 ms (7.637 ms / 100) 7.619 -> 7.637 ( +0.24%) [ +0.22% +0.00% +0.32% / +0.33% +0.32% +0.24%] index_select reverse : Elapsed 0.076 ms (7.636 ms / 100) 7.625 -> 7.607 ( -0.24%) [ +0.30% +0.00% +0.07% / -0.05% -0.24% +0.03%] index_select skip64 : Elapsed 0.076 ms (7.648 ms / 100) 7.632 -> 7.628 ( -0.05%) [ +0.12% +0.07% +0.00% / +0.08% -0.03% -0.05%] index_select skip256 : Elapsed 0.076 ms (7.641 ms / 100) 7.622 -> 7.625 ( +0.04%) [ +0.18% +0.00% +0.38% / +0.16% +0.04% +0.12%] index_select spread : Elapsed 0.076 ms (7.636 ms / 100) 7.617 -> 7.626 ( +0.12%) [ +0.22% +0.00% +0.24% / +0.14% +0.22% +0.12%] index_select random : Elapsed 0.076 ms (7.634 ms / 100) 7.612 -> 7.601 ( -0.14%) [ +0.20% +0.00% +0.17% / -0.14% +0.12% +0.12%] index_select random_sorted : Elapsed 0.076 ms (7.627 ms / 100) B = [200, 5, 500] (stride (1, 100000, 200)) A = [200, 1, 500] (stride (500, 100000, 1)) dim = 1 5.476 -> 5.485 ( +0.16%) [ +0.05% +0.00% +0.09% / +0.16% +0.33% +0.47%] index_add_ linear : Elapsed 0.055 ms (5.479 ms / 100) 5.167 -> 5.161 ( -0.12%) [ +0.02% +0.00% +0.10% / -0.10% -0.08% -0.12%] index_copy_ linear : Elapsed 0.052 ms (5.168 ms / 100) 5.468 -> 5.481 ( +0.24%) [ +0.38% +0.00% +0.22% / +0.33% +0.48% +0.24%] index_add_ reverse : Elapsed 0.055 ms (5.489 ms / 100) 5.154 -> 5.161 ( +0.14%) [ +0.21% +0.00% +0.10% / +0.19% +0.19% +0.14%] index_copy_ reverse : Elapsed 0.052 ms (5.165 ms / 100) 5.473 -> 5.479 ( +0.11%) [ +0.00% +0.15% +0.18% / +0.11% +0.46% +0.40%] index_add_ spread : Elapsed 0.055 ms (5.473 ms / 100) 5.159 -> 5.169 ( +0.19%) [ +0.06% +0.00% +0.23% / +0.19% +0.21% +0.33%] index_copy_ spread : Elapsed 0.052 ms (5.162 ms / 100) 5.452 -> 5.466 ( +0.26%) [ +0.53% +0.00% +0.35% / +0.26% +0.95% +0.92%] index_add_ strided 3 : Elapsed 0.055 ms (5.481 ms / 100) 5.156 -> 5.152 ( -0.08%) [ +0.19% +0.00% +0.02% / -0.08% +0.12% +0.14%] index_copy_ strided 3 : Elapsed 0.052 ms (5.166 ms / 100) 5.491 -> 5.488 ( -0.05%) [ +0.18% +0.00% +0.00% / +0.15% +0.00% -0.05%] index_add_ perm : Elapsed 0.055 ms (5.501 ms / 100) 5.158 -> 5.161 ( +0.06%) [ +0.16% +0.02% +0.00% / +0.06% +0.23% +0.08%] index_copy_ perm : Elapsed 0.052 ms (5.166 ms / 100) 5.483 -> 5.485 ( +0.04%) [ +0.20% +0.00% +0.38% / +0.29% +0.11% +0.04%] index_add_ perm_sorted : Elapsed 0.055 ms (5.494 ms / 100) 5.156 -> 5.153 ( -0.06%) [ +0.02% +0.00% +0.29% / +0.14% +0.16% -0.06%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.157 ms / 100) 8.515 -> 8.510 ( -0.06%) [ +0.07% +0.00% +0.08% / -0.01% -0.06% -0.02%] index_select const : Elapsed 0.085 ms (8.521 ms / 100) 8.498 -> 8.503 ( +0.06%) [ +0.20% +0.00% +0.12% / +0.35% +0.16% +0.06%] index_select wrap : Elapsed 0.085 ms (8.515 ms / 100) 8.507 -> 8.503 ( -0.05%) [ +0.15% +0.01% +0.00% / +0.02% +0.08% -0.05%] index_select linear : Elapsed 0.085 ms (8.520 ms / 100) 8.497 -> 8.508 ( +0.13%) [ +0.36% +0.00% +0.21% / +0.45% +0.13% +0.13%] index_select reverse : Elapsed 0.085 ms (8.528 ms / 100) 8.487 -> 8.498 ( +0.13%) [ +0.33% +0.32% +0.00% / +0.18% +0.32% +0.13%] index_select skip64 : Elapsed 0.085 ms (8.515 ms / 100) 8.509 -> 8.515 ( +0.07%) [ +0.16% +0.05% +0.00% / +0.26% +0.24% +0.07%] index_select skip256 : Elapsed 0.085 ms (8.523 ms / 100) 8.497 -> 8.514 ( +0.20%) [ +0.31% +0.00% +0.18% / +0.20% +0.31% +0.38%] index_select spread : Elapsed 0.085 ms (8.523 ms / 100) 8.500 -> 8.504 ( +0.05%) [ +0.20% +0.15% +0.00% / +0.05% +0.26% +0.09%] index_select random : Elapsed 0.085 ms (8.517 ms / 100) 8.509 -> 8.488 ( -0.25%) [ +0.00% +0.02% +0.07% / +0.05% -0.09% -0.25%] index_select random_sorted : Elapsed 0.085 ms (8.509 ms / 100) B = [200, 5, 500] (stride (5, 1, 1000)) A = [200, 1, 500] (stride (500, 100000, 1)) dim = 1 7.696 -> 7.665 ( -0.40%) [ +0.00% +0.22% +0.13% / +0.06% -0.35% -0.40%] index_add_ linear : Elapsed 0.077 ms (7.696 ms / 100) 6.748 -> 6.737 ( -0.16%) [ +0.28% +0.30% +0.00% / +0.30% -0.15% -0.16%] index_copy_ linear : Elapsed 0.068 ms (6.767 ms / 100) 7.661 -> 7.636 ( -0.33%) [ +0.26% +0.00% +0.50% / +0.40% -0.33% +0.17%] index_add_ reverse : Elapsed 0.077 ms (7.681 ms / 100) 6.749 -> 6.709 ( -0.59%) [ +0.00% +0.34% +0.34% / +0.25% -0.59% -0.19%] index_copy_ reverse : Elapsed 0.067 ms (6.749 ms / 100) 7.684 -> 7.657 ( -0.35%) [ +0.00% +0.01% +0.26% / -0.04% -0.35% +0.31%] index_add_ spread : Elapsed 0.077 ms (7.684 ms / 100) 6.754 -> 6.742 ( -0.18%) [ +0.21% +0.18% +0.00% / +0.15% -0.18% -0.04%] index_copy_ spread : Elapsed 0.068 ms (6.768 ms / 100) 7.684 -> 7.662 ( -0.29%) [ +0.00% +0.34% +0.42% / +0.27% -0.29% -0.14%] index_add_ strided 3 : Elapsed 0.077 ms (7.684 ms / 100) 6.772 -> 6.747 ( -0.37%) [ +0.00% +0.04% +0.41% / +0.31% -0.37% -0.32%] index_copy_ strided 3 : Elapsed 0.068 ms (6.772 ms / 100) 7.669 -> 7.680 ( +0.14%) [ +0.00% +0.43% +0.00% / +0.22% +0.14% +0.42%] index_add_ perm : Elapsed 0.077 ms (7.669 ms / 100) 6.751 -> 6.756 ( +0.07%) [ +0.00% +0.07% +0.01% / +0.13% +0.07% +0.36%] index_copy_ perm : Elapsed 0.068 ms (6.751 ms / 100) 7.686 -> 7.648 ( -0.49%) [ +0.00% +0.03% +0.08% / +0.13% -0.49% +0.23%] index_add_ perm_sorted : Elapsed 0.077 ms (7.686 ms / 100) 6.751 -> 6.759 ( +0.12%) [ +0.00% +0.33% +0.16% / +0.37% +0.12% +0.21%] index_copy_ perm_sorted : Elapsed 0.068 ms (6.751 ms / 100) 22.786 -> 22.418 ( -1.62%) [ +0.47% +0.42% +0.00% / +0.12% -1.62% -1.11%] index_select const : Elapsed 0.229 ms (22.893 ms / 100) 22.837 -> 22.464 ( -1.63%) [ +0.00% +0.11% +0.14% / +0.14% -1.63% -1.37%] index_select wrap : Elapsed 0.228 ms (22.837 ms / 100) 22.821 -> 22.447 ( -1.64%) [ +0.31% +0.00% +0.30% / -0.31% -1.42% -1.64%] index_select linear : Elapsed 0.229 ms (22.892 ms / 100) 22.773 -> 22.427 ( -1.52%) [ +0.29% +0.13% +0.00% / +0.75% -1.52% -1.32%] index_select reverse : Elapsed 0.228 ms (22.840 ms / 100) 22.775 -> 22.438 ( -1.48%) [ +0.00% +0.08% +0.05% / +0.17% -1.19% -1.48%] index_select skip64 : Elapsed 0.228 ms (22.775 ms / 100) 22.842 -> 22.448 ( -1.72%) [ +0.36% +0.00% +0.30% / -0.17% -1.72% -1.45%] index_select skip256 : Elapsed 0.229 ms (22.924 ms / 100) 22.754 -> 22.408 ( -1.52%) [ +0.33% +0.26% +0.00% / +0.52% -1.52% -0.99%] index_select spread : Elapsed 0.228 ms (22.829 ms / 100) 22.765 -> 22.414 ( -1.54%) [ +0.55% +0.09% +0.00% / +0.32% -1.23% -1.54%] index_select random : Elapsed 0.229 ms (22.891 ms / 100) 22.810 -> 22.452 ( -1.57%) [ +0.11% +0.02% +0.00% / -0.05% -1.53% -1.57%] index_select random_sorted : Elapsed 0.228 ms (22.834 ms / 100) B = [200, 5, 500] (stride (1, 200, 1000)) A = [200, 1, 500] (stride (1, 200, 200)) dim = 1 5.943 -> 5.945 ( +0.03%) [ +0.29% +0.02% +0.00% / +0.17% +0.03% +0.32%] index_add_ linear : Elapsed 0.060 ms (5.960 ms / 100) 5.557 -> 5.558 ( +0.02%) [ +0.16% +0.00% +0.14% / +0.13% +0.02% +0.05%] index_copy_ linear : Elapsed 0.056 ms (5.566 ms / 100) 5.937 -> 5.949 ( +0.20%) [ +0.25% +0.15% +0.00% / +0.20% +0.20% +0.24%] index_add_ reverse : Elapsed 0.060 ms (5.952 ms / 100) 5.556 -> 5.539 ( -0.31%) [ +0.09% +0.23% +0.00% / +0.20% -0.31% -0.05%] index_copy_ reverse : Elapsed 0.056 ms (5.561 ms / 100) 5.936 -> 5.939 ( +0.05%) [ +0.10% +0.07% +0.00% / +0.34% +0.35% +0.05%] index_add_ spread : Elapsed 0.059 ms (5.942 ms / 100) 5.549 -> 5.552 ( +0.05%) [ +0.00% +0.07% +0.09% / +0.29% +0.29% +0.05%] index_copy_ spread : Elapsed 0.055 ms (5.549 ms / 100) 5.934 -> 5.946 ( +0.20%) [ +0.25% +0.20% +0.00% / +0.20% +0.29% +0.35%] index_add_ strided 3 : Elapsed 0.059 ms (5.949 ms / 100) 5.549 -> 5.548 ( -0.02%) [ +0.14% +0.36% +0.00% / +0.25% +0.16% -0.02%] index_copy_ strided 3 : Elapsed 0.056 ms (5.557 ms / 100) 5.938 -> 5.944 ( +0.10%) [ +0.00% +0.15% +0.00% / +0.15% +0.10% +0.44%] index_add_ perm : Elapsed 0.059 ms (5.938 ms / 100) 5.553 -> 5.542 ( -0.20%) [ +0.00% +0.14% +0.00% / +0.16% -0.20% +0.00%] index_copy_ perm : Elapsed 0.056 ms (5.553 ms / 100) 5.929 -> 5.935 ( +0.10%) [ +0.32% +0.00% +0.17% / +0.20% +0.10% +0.25%] index_add_ perm_sorted : Elapsed 0.059 ms (5.948 ms / 100) 5.546 -> 5.547 ( +0.02%) [ +0.22% +0.00% +0.29% / +0.32% +0.29% +0.02%] index_copy_ perm_sorted : Elapsed 0.056 ms (5.558 ms / 100) 10.112 -> 10.137 ( +0.25%) [ +0.16% +0.36% +0.00% / +0.37% +0.27% +0.25%] index_select const : Elapsed 0.101 ms (10.128 ms / 100) 10.121 -> 10.142 ( +0.21%) [ +0.28% +0.00% +0.29% / +0.35% +0.21% +0.54%] index_select wrap : Elapsed 0.101 ms (10.149 ms / 100) 10.121 -> 10.142 ( +0.21%) [ +0.17% +0.09% +0.00% / +0.21% +0.24% +0.23%] index_select linear : Elapsed 0.101 ms (10.138 ms / 100) 10.102 -> 10.123 ( +0.21%) [ +0.42% +0.00% +0.57% / +0.35% +0.45% +0.21%] index_select reverse : Elapsed 0.101 ms (10.144 ms / 100) 10.123 -> 10.126 ( +0.03%) [ +0.25% +0.00% +0.53% / +0.07% +0.23% +0.03%] index_select skip64 : Elapsed 0.101 ms (10.148 ms / 100) 10.122 -> 10.127 ( +0.05%) [ +0.08% +0.00% +0.52% / +0.08% +0.17% +0.05%] index_select skip256 : Elapsed 0.101 ms (10.130 ms / 100) 10.110 -> 10.127 ( +0.17%) [ +0.25% +0.00% +0.73% / +0.24% +0.17% +0.18%] index_select spread : Elapsed 0.101 ms (10.135 ms / 100) 10.118 -> 10.121 ( +0.03%) [ +0.08% +0.00% +0.09% / +0.03% +0.26% +0.37%] index_select random : Elapsed 0.101 ms (10.126 ms / 100) 10.113 -> 10.133 ( +0.20%) [ +0.55% +0.00% +0.10% / +0.20% +0.38% +0.49%] index_select random_sorted : Elapsed 0.102 ms (10.169 ms / 100) out_shape = [200, 1, 5] in_shape = [200, 1, 500] idx_dim = 2 B = [200, 1, 5] (stride (5, 1, 1)) A = [200, 1, 500] (stride (1, 1, 200)) dim = 2 1.174 -> 1.183 ( +0.77%) [ +0.94% +0.00% +0.60% / +1.02% +0.77% +0.94%] index_select const : Elapsed 0.012 ms (1.185 ms / 100) 1.177 -> 1.184 ( +0.59%) [ +0.85% +0.00% +0.51% / +0.59% +0.93% +0.76%] index_select wrap : Elapsed 0.012 ms (1.187 ms / 100) 1.177 -> 1.186 ( +0.76%) [ +0.93% +0.00% +0.42% / +0.76% +0.76% +0.76%] index_select linear : Elapsed 0.012 ms (1.188 ms / 100) 1.175 -> 1.186 ( +0.94%) [ +0.85% +0.00% +0.34% / +0.94% +1.11% +0.94%] index_select reverse : Elapsed 0.012 ms (1.185 ms / 100) 1.179 -> 1.186 ( +0.59%) [ +0.51% +0.00% +0.51% / +0.59% +0.59% +0.59%] index_select skip64 : Elapsed 0.012 ms (1.185 ms / 100) 1.174 -> 1.184 ( +0.85%) [ +0.85% +0.34% +0.00% / +1.11% +0.85% +0.94%] index_select skip256 : Elapsed 0.012 ms (1.184 ms / 100) 1.179 -> 1.186 ( +0.59%) [ +0.68% +0.00% +0.00% / +0.68% +0.59% +0.59%] index_select spread : Elapsed 0.012 ms (1.187 ms / 100) 1.181 -> 1.186 ( +0.42%) [ +0.51% +0.08% +0.00% / +0.51% +0.68% +0.42%] index_select strided 3 : Elapsed 0.012 ms (1.187 ms / 100) 1.176 -> 1.185 ( +0.77%) [ +1.11% +0.00% +0.26% / +0.85% +0.77% +1.02%] index_select strided 5 : Elapsed 0.012 ms (1.189 ms / 100) 1.181 -> 1.185 ( +0.34%) [ +0.42% +0.00% +0.17% / +0.42% +0.42% +0.34%] index_select strided 7 : Elapsed 0.012 ms (1.186 ms / 100) 1.175 -> 1.183 ( +0.68%) [ +0.94% +0.00% +0.43% / +1.02% +0.85% +0.68%] index_select strided 8 : Elapsed 0.012 ms (1.186 ms / 100) 1.176 -> 1.184 ( +0.68%) [ +0.77% +0.00% +0.68% / +0.85% +0.68% +0.77%] index_select strided 16 : Elapsed 0.012 ms (1.185 ms / 100) 1.177 -> 1.185 ( +0.68%) [ +0.76% +0.17% +0.00% / +0.76% +0.68% +0.76%] index_select strided 64 : Elapsed 0.012 ms (1.186 ms / 100) 1.174 -> 1.186 ( +1.02%) [ +0.94% +0.00% +1.28% / +1.02% +1.11% +1.11%] index_select strided 100 : Elapsed 0.012 ms (1.185 ms / 100) 1.177 -> 1.183 ( +0.51%) [ +0.68% +0.59% +0.00% / +0.85% +0.68% +0.51%] index_select strided 255 : Elapsed 0.012 ms (1.185 ms / 100) 1.175 -> 1.184 ( +0.77%) [ +1.02% +0.00% +0.60% / +0.77% +0.85% +1.02%] index_select strided 256 : Elapsed 0.012 ms (1.187 ms / 100) 1.176 -> 1.184 ( +0.68%) [ +1.02% +0.00% +2.89% / +0.77% +0.68% +0.94%] index_select strided 257 : Elapsed 0.012 ms (1.188 ms / 100) 1.175 -> 1.184 ( +0.77%) [ +0.94% +0.00% +0.43% / +0.77% +1.11% +0.85%] index_select random : Elapsed 0.012 ms (1.186 ms / 100) 1.175 -> 1.185 ( +0.85%) [ +0.85% +0.00% +0.26% / +0.85% +1.02% +0.94%] index_select random_sorted : Elapsed 0.012 ms (1.185 ms / 100) 1.174 -> 1.186 ( +1.02%) [ +1.19% +0.43% +0.00% / +1.02% +1.19% +1.19%] index_select perm : Elapsed 0.012 ms (1.188 ms / 100) 1.177 -> 1.184 ( +0.59%) [ +0.85% +0.68% +0.00% / +0.68% +0.59% +0.93%] index_select perm_sorted : Elapsed 0.012 ms (1.187 ms / 100) B = [200, 1, 5] (stride (5, 1000, 1)) A = [200, 1, 500] (stride (1, 1, 200)) dim = 2 1.174 -> 1.184 ( +0.85%) [ +0.94% +0.00% +0.00% / +0.85% +0.85% +1.02%] index_select const : Elapsed 0.012 ms (1.185 ms / 100) 1.175 -> 1.185 ( +0.85%) [ +0.94% +0.00% +0.68% / +0.85% +0.94% +0.85%] index_select wrap : Elapsed 0.012 ms (1.186 ms / 100) 1.175 -> 1.185 ( +0.85%) [ +1.02% +0.00% +1.19% / +0.85% +0.85% +0.94%] index_select linear : Elapsed 0.012 ms (1.187 ms / 100) 1.177 -> 1.186 ( +0.76%) [ +0.85% +0.42% +0.00% / +0.85% +0.85% +0.76%] index_select reverse : Elapsed 0.012 ms (1.187 ms / 100) 1.174 -> 1.186 ( +1.02%) [ +0.94% +0.00% +0.26% / +1.11% +1.19% +1.02%] index_select skip64 : Elapsed 0.012 ms (1.185 ms / 100) 1.175 -> 1.185 ( +0.85%) [ +0.94% +0.00% +0.77% / +0.85% +1.02% +0.85%] index_select skip256 : Elapsed 0.012 ms (1.186 ms / 100) 1.172 -> 1.186 ( +1.19%) [ +1.19% +0.00% +0.43% / +1.28% +1.19% +1.19%] index_select spread : Elapsed 0.012 ms (1.186 ms / 100) 1.171 -> 1.184 ( +1.11%) [ +1.11% +0.26% +0.00% / +1.11% +1.28% +1.37%] index_select strided 3 : Elapsed 0.012 ms (1.184 ms / 100) 1.172 -> 1.184 ( +1.02%) [ +1.02% +0.17% +0.00% / +1.02% +1.37% +1.11%] index_select strided 5 : Elapsed 0.012 ms (1.184 ms / 100) 1.172 -> 1.184 ( +1.02%) [ +1.19% +0.00% +0.00% / +1.02% +1.37% +1.11%] index_select strided 7 : Elapsed 0.012 ms (1.186 ms / 100) 1.175 -> 1.185 ( +0.85%) [ +1.02% +0.26% +0.00% / +0.85% +1.11% +0.85%] index_select strided 8 : Elapsed 0.012 ms (1.187 ms / 100) 1.175 -> 1.185 ( +0.85%) [ +0.77% +0.00% +0.34% / +0.85% +1.11% +0.85%] index_select strided 16 : Elapsed 0.012 ms (1.184 ms / 100) 1.173 -> 1.185 ( +1.02%) [ +1.11% +0.00% +0.68% / +1.19% +1.02% +1.19%] index_select strided 64 : Elapsed 0.012 ms (1.186 ms / 100) 1.173 -> 1.184 ( +0.94%) [ +1.19% +0.00% +0.26% / +0.94% +1.02% +1.28%] index_select strided 100 : Elapsed 0.012 ms (1.187 ms / 100) 1.177 -> 1.185 ( +0.68%) [ +0.68% +0.00% +0.17% / +0.68% +0.76% +0.68%] index_select strided 255 : Elapsed 0.012 ms (1.185 ms / 100) 1.174 -> 1.185 ( +0.94%) [ +1.02% +0.09% +0.00% / +1.02% +1.11% +0.94%] index_select strided 256 : Elapsed 0.012 ms (1.186 ms / 100) 1.178 -> 1.185 ( +0.59%) [ +0.85% +0.00% +0.68% / +0.76% +0.85% +0.59%] index_select strided 257 : Elapsed 0.012 ms (1.188 ms / 100) 1.176 -> 1.185 ( +0.77%) [ +0.77% +0.09% +0.00% / +1.02% +0.85% +0.77%] index_select random : Elapsed 0.012 ms (1.185 ms / 100) 1.176 -> 1.185 ( +0.77%) [ +0.85% +0.26% +0.00% / +0.77% +0.85% +0.77%] index_select random_sorted : Elapsed 0.012 ms (1.186 ms / 100) 1.177 -> 1.184 ( +0.59%) [ +0.85% +0.00% +0.68% / +0.68% +0.85% +0.59%] index_select perm : Elapsed 0.012 ms (1.187 ms / 100) 1.173 -> 1.185 ( +1.02%) [ +1.19% +0.60% +0.00% / +1.11% +1.19% +1.02%] index_select perm_sorted : Elapsed 0.012 ms (1.187 ms / 100) B = [200, 1, 5] (stride (1, 1000, 200)) A = [200, 1, 500] (stride (500, 1, 1)) dim = 2 0.638 -> 0.640 ( +0.31%) [ +0.16% +0.00% +0.00% / +0.31% +0.31% +0.31%] index_select const : Elapsed 0.006 ms (0.639 ms / 100) 0.633 -> 0.640 ( +1.11%) [ +1.11% +0.00% +1.26% / +1.11% +1.26% +1.11%] index_select wrap : Elapsed 0.006 ms (0.640 ms / 100) 0.639 -> 0.644 ( +0.78%) [ +0.00% +0.00% +10.95% / +0.78% +1.10% +1.10%] index_select linear : Elapsed 0.006 ms (0.639 ms / 100) 0.636 -> 0.640 ( +0.63%) [ +0.63% +0.00% +0.63% / +0.63% +1.10% +1.10%] index_select reverse : Elapsed 0.006 ms (0.640 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.63% +0.00% +0.16% / +0.31% +0.16% +0.94%] index_select skip64 : Elapsed 0.006 ms (0.642 ms / 100) 0.637 -> 0.639 ( +0.31%) [ +0.94% +0.00% +0.00% / +0.31% +1.26% +2.04%] index_select skip256 : Elapsed 0.006 ms (0.643 ms / 100) 0.630 -> 0.638 ( +1.27%) [ +1.27% +0.00% +0.00% / +1.27% +1.43% +1.59%] index_select spread : Elapsed 0.006 ms (0.638 ms / 100) 0.632 -> 0.640 ( +1.27%) [ +1.27% +0.00% +1.11% / +1.27% +2.37% +1.27%] index_select strided 3 : Elapsed 0.006 ms (0.640 ms / 100) 0.639 -> 0.643 ( +0.63%) [ +0.63% +0.31% +0.00% / +3.29% +1.10% +0.63%] index_select strided 5 : Elapsed 0.006 ms (0.643 ms / 100) 0.632 -> 0.641 ( +1.42%) [ +1.27% +0.00% +0.16% / +1.42% +1.42% +1.58%] index_select strided 7 : Elapsed 0.006 ms (0.640 ms / 100) 0.637 -> 0.641 ( +0.63%) [ +0.63% +0.31% +0.00% / +0.63% +0.78% +0.63%] index_select strided 8 : Elapsed 0.006 ms (0.641 ms / 100) 0.640 -> 0.642 ( +0.31%) [ +0.31% +0.00% +0.00% / +0.31% +1.09% +0.31%] index_select strided 16 : Elapsed 0.006 ms (0.642 ms / 100) 0.635 -> 0.640 ( +0.79%) [ +0.94% +0.00% +0.16% / +0.79% +1.10% +0.94%] index_select strided 64 : Elapsed 0.006 ms (0.641 ms / 100) 0.629 -> 0.638 ( +1.43%) [ +1.27% +0.00% +0.16% / +1.43% +1.59% +1.75%] index_select strided 100 : Elapsed 0.006 ms (0.637 ms / 100) 0.634 -> 0.639 ( +0.79%) [ +0.63% +0.00% +0.00% / +0.79% +0.79% +0.95%] index_select strided 255 : Elapsed 0.006 ms (0.638 ms / 100) 0.625 -> 0.631 ( +0.96%) [ +0.96% +0.00% +0.00% / +0.96% +1.28% +1.28%] index_select strided 256 : Elapsed 0.006 ms (0.631 ms / 100) 0.640 -> 0.645 ( +0.78%) [ +0.94% +0.00% +0.63% / +0.94% +0.78% +0.94%] index_select strided 257 : Elapsed 0.006 ms (0.646 ms / 100) 0.630 -> 0.637 ( +1.11%) [ +1.27% +0.48% +0.00% / +1.59% +1.27% +1.11%] index_select random : Elapsed 0.006 ms (0.638 ms / 100) 0.628 -> 0.630 ( +0.32%) [ +0.48% +0.00% +0.00% / +0.32% +0.80% +0.64%] index_select random_sorted : Elapsed 0.006 ms (0.631 ms / 100) 0.635 -> 0.640 ( +0.79%) [ +0.94% +0.16% +0.00% / +0.79% +0.94% +0.79%] index_select perm : Elapsed 0.006 ms (0.641 ms / 100) 0.640 -> 0.645 ( +0.78%) [ +0.78% +0.16% +0.00% / +0.78% +0.78% +0.94%] index_select perm_sorted : Elapsed 0.006 ms (0.645 ms / 100) B = [200, 1, 5] (stride (1, 200, 200)) A = [200, 1, 500] (stride (500, 500, 1)) dim = 2 1.273 -> 1.287 ( +1.10%) [ +1.26% +0.24% +0.00% / +1.18% +1.10% +1.10%] index_select const : Elapsed 0.013 ms (1.289 ms / 100) 1.273 -> 1.286 ( +1.02%) [ +1.02% +0.08% +0.00% / +1.18% +1.10% +1.02%] index_select wrap : Elapsed 0.013 ms (1.286 ms / 100) 1.274 -> 1.286 ( +0.94%) [ +1.02% +0.00% +0.00% / +1.02% +0.94% +0.94%] index_select linear : Elapsed 0.013 ms (1.287 ms / 100) 1.274 -> 1.287 ( +1.02%) [ +1.02% +0.08% +0.00% / +1.02% +1.10% +1.10%] index_select reverse : Elapsed 0.013 ms (1.287 ms / 100) 1.273 -> 1.285 ( +0.94%) [ +1.26% +0.24% +0.00% / +0.94% +1.18% +1.18%] index_select skip64 : Elapsed 0.013 ms (1.289 ms / 100) 1.273 -> 1.288 ( +1.18%) [ +1.02% +0.00% +0.00% / +1.18% +1.26% +1.34%] index_select skip256 : Elapsed 0.013 ms (1.286 ms / 100) 1.262 -> 1.274 ( +0.95%) [ +1.11% +0.00% +0.32% / +0.95% +1.19% +1.43%] index_select spread : Elapsed 0.013 ms (1.276 ms / 100) 1.273 -> 1.285 ( +0.94%) [ +1.26% +0.00% +0.00% / +0.94% +1.41% +1.34%] index_select strided 3 : Elapsed 0.013 ms (1.289 ms / 100) 1.269 -> 1.279 ( +0.79%) [ +1.34% +0.00% +0.47% / +1.10% +1.34% +0.79%] index_select strided 5 : Elapsed 0.013 ms (1.286 ms / 100) 1.261 -> 1.273 ( +0.95%) [ +1.11% +0.24% +0.00% / +1.03% +0.95% +1.03%] index_select strided 7 : Elapsed 0.013 ms (1.275 ms / 100) 1.257 -> 1.269 ( +0.95%) [ +1.27% +0.00% +0.00% / +0.95% +1.35% +0.95%] index_select strided 8 : Elapsed 0.013 ms (1.273 ms / 100) 1.255 -> 1.266 ( +0.88%) [ +0.96% +0.00% +0.00% / +0.88% +1.20% +0.96%] index_select strided 16 : Elapsed 0.013 ms (1.267 ms / 100) 1.270 -> 1.283 ( +1.02%) [ +1.02% +0.08% +0.00% / +1.10% +1.26% +1.02%] index_select strided 64 : Elapsed 0.013 ms (1.283 ms / 100) 1.241 -> 1.253 ( +0.97%) [ +0.64% +0.00% +0.00% / +0.97% +0.97% +1.05%] index_select strided 100 : Elapsed 0.012 ms (1.249 ms / 100) 1.249 -> 1.262 ( +1.04%) [ +1.20% +0.08% +0.00% / +1.04% +1.28% +1.36%] index_select strided 255 : Elapsed 0.013 ms (1.264 ms / 100) 1.246 -> 1.258 ( +0.96%) [ +1.12% +0.00% +0.00% / +0.96% +1.20% +1.20%] index_select strided 256 : Elapsed 0.013 ms (1.260 ms / 100) 1.239 -> 1.253 ( +1.13%) [ +1.13% +0.00% +0.08% / +1.13% +1.29% +1.37%] index_select strided 257 : Elapsed 0.013 ms (1.253 ms / 100) 1.250 -> 1.265 ( +1.20%) [ +1.20% +0.00% +0.00% / +1.20% +1.36% +1.20%] index_select random : Elapsed 0.013 ms (1.265 ms / 100) 1.237 -> 1.249 ( +0.97%) [ +1.05% +0.00% +0.00% / +0.97% +1.29% +1.29%] index_select random_sorted : Elapsed 0.013 ms (1.250 ms / 100) 1.233 -> 1.247 ( +1.14%) [ +1.14% +0.00% +0.08% / +1.14% +1.46% +1.30%] index_select perm : Elapsed 0.012 ms (1.247 ms / 100) 1.244 -> 1.256 ( +0.96%) [ +1.05% +0.00% +0.00% / +1.05% +0.96% +1.05%] index_select perm_sorted : Elapsed 0.013 ms (1.257 ms / 100) out_shape = [5, 500, 1] in_shape = [200, 500, 1] idx_dim = 0 B = [5, 500, 1] (stride (500, 1, 1)) A = [200, 500, 1] (stride (500, 1, 1)) dim = 0 0.556 -> 0.557 ( +0.18%) [ +0.90% +0.36% +0.00% / +0.72% +8.27% +0.18%] index_select const : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +0.90% +0.00% +0.00% / +0.90% +0.54% +0.36%] index_select wrap : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +0.00% / +1.44% +0.72% +0.54%] index_select linear : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +1.08% +0.00% +0.00% / +0.90% +0.54% +0.36%] index_select reverse : Elapsed 0.006 ms (0.562 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +2.16% / +0.90% +0.72% +0.54%] index_select skip64 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +0.90% +0.00% +2.88% / +0.90% +0.54% +0.36%] index_select skip256 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +3.24% / +0.90% +0.72% +0.54%] index_select spread : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +1.08% +0.00% +0.00% / +0.90% +0.54% +0.72%] index_select strided 3 : Elapsed 0.006 ms (0.562 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +0.72% / +0.72% +0.54% +0.72%] index_select strided 5 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +1.08% +0.00% +0.18% / +1.08% +0.72% +0.72%] index_select strided 7 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +1.08% +0.00% +0.54% / +1.08% +1.08% +0.72%] index_select strided 8 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +1.08% +1.26%] index_select strided 16 : Elapsed 0.006 ms (0.559 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +0.36% +0.00% +22.12% / +0.36% +0.90% +0.90%] index_select strided 64 : Elapsed 0.006 ms (0.558 ms / 100) 0.556 -> 0.560 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +0.90% +0.72%] index_select strided 100 : Elapsed 0.006 ms (0.560 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.18% +0.00% / +0.90% +1.26% +1.44%] index_select random : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.00% +0.00% / +0.72% +0.90% +1.08%] index_select random_sorted : Elapsed 0.006 ms (0.560 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +1.08% +0.00% +0.18% / +0.72% +1.08% +1.08%] index_select perm : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +0.54% +0.18% +0.00% / +0.54% +1.08% +10.99%] index_select perm_sorted : Elapsed 0.006 ms (0.558 ms / 100) B = [5, 500, 1] (stride (500, 1, 500)) A = [200, 500, 1] (stride (500, 1, 1)) dim = 0 0.554 -> 0.561 ( +1.26%) [ +0.72% +0.18% +0.00% / +2.71% +1.26% +1.26%] index_select const : Elapsed 0.006 ms (0.558 ms / 100) 0.553 -> 0.559 ( +1.08%) [ +0.90% +0.18% +0.00% / +1.08% +1.45% +1.45%] index_select wrap : Elapsed 0.006 ms (0.558 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +0.54% +0.00% +0.54% / +0.90% +1.08% +1.08%] index_select linear : Elapsed 0.006 ms (0.558 ms / 100) 0.553 -> 0.559 ( +1.08%) [ +1.08% +0.36% +0.00% / +1.08% +1.63% +1.45%] index_select reverse : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.00% +0.72% / +0.72% +1.08% +1.08%] index_select skip64 : Elapsed 0.006 ms (0.560 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.00% +2.53% / +0.90% +1.26% +1.26%] index_select skip256 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.54% +0.00% +1.98% / +0.72% +1.26% +1.08%] index_select spread : Elapsed 0.006 ms (0.558 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.72% +0.00% +3.07% / +0.72% +1.44% +1.26%] index_select strided 3 : Elapsed 0.006 ms (0.558 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.54% +1.26% +1.08%] index_select strided 5 : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.18% +0.00% / +0.90% +1.26% +1.26%] index_select strided 7 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +0.90% +0.00% +0.36% / +0.90% +2.70% +1.08%] index_select strided 8 : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +0.90% +0.90%] index_select strided 16 : Elapsed 0.006 ms (0.559 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +0.54% +0.00% +0.00% / +0.36% +0.72% +0.72%] index_select strided 64 : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +2.53% +0.36% +0.00% / +0.90% +1.26% +1.08%] index_select strided 100 : Elapsed 0.006 ms (0.568 ms / 100) 0.554 -> 0.561 ( +1.26%) [ +3.07% +0.00% +0.00% / +2.53% +1.44% +1.26%] index_select random : Elapsed 0.006 ms (0.571 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +0.72% +0.00% +1.08% / +0.54% +1.26% +2.52%] index_select random_sorted : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.18% +0.00% / +0.90% +1.26% +13.72%] index_select perm : Elapsed 0.006 ms (0.559 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.54% +0.18% +0.00% / +0.54% +1.08% +5.94%] index_select perm_sorted : Elapsed 0.006 ms (0.559 ms / 100) B = [5, 500, 1] (stride (500, 1, 2500)) A = [200, 500, 1] (stride (500, 1, 500)) dim = 0 0.555 -> 0.558 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.54% +0.72% +0.54%] index_select const : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.00% +0.18% / +0.72% +1.08% +1.26%] index_select wrap : Elapsed 0.006 ms (0.560 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +2.16% +0.00% +0.00% / +0.54% +1.08% +0.90%] index_select linear : Elapsed 0.006 ms (0.568 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +0.54% +0.18% +0.00% / +0.36% +0.90% +0.72%] index_select reverse : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.560 ( +1.08%) [ +0.90% +0.00% +0.18% / +1.99% +1.26% +1.08%] index_select skip64 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +0.54% +0.00% +0.00% / +4.14% +1.08% +1.08%] index_select skip256 : Elapsed 0.006 ms (0.558 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +0.72% +0.00% +0.00% / +0.54% +1.08% +1.26%] index_select spread : Elapsed 0.006 ms (0.559 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.54% +0.00% +0.18% / +0.54% +0.72% +1.08%] index_select strided 3 : Elapsed 0.006 ms (0.559 ms / 100) 0.553 -> 0.559 ( +1.08%) [ +1.27% +0.36% +0.00% / +1.08% +1.45% +1.63%] index_select strided 5 : Elapsed 0.006 ms (0.560 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +1.08% +0.36% +0.00% / +0.90% +1.81% +1.26%] index_select strided 7 : Elapsed 0.006 ms (0.560 ms / 100) 0.553 -> 0.559 ( +1.08%) [ +1.08% +0.00% +1.08% / +1.08% +7.96% +1.45%] index_select strided 8 : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.72% +0.00% +3.43% / +0.90% +1.44% +1.08%] index_select strided 16 : Elapsed 0.006 ms (0.558 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +1.08% +1.08%] index_select strided 64 : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.18% +0.00% / +0.90% +1.26% +1.08%] index_select strided 100 : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.90% +0.36% +0.00% / +0.72% +1.26% +1.26%] index_select random : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.54% +1.08% +1.44%] index_select random_sorted : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.26% +0.18% +0.00% / +1.08% +0.90% +10.99%] index_select perm : Elapsed 0.006 ms (0.562 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +0.00% / +0.90% +0.72% +0.54%] index_select perm_sorted : Elapsed 0.006 ms (0.561 ms / 100) out_shape = [200, 5, 1] in_shape = [200, 500, 1] idx_dim = 1 B = [200, 5, 1] (stride (5, 1, 1)) A = [200, 500, 1] (stride (500, 1, 1)) dim = 1 0.599 -> 0.599 ( +0.00%) [ +0.50% +0.17% +0.00% / +0.33% +0.00% +0.00%] index_select const : Elapsed 0.006 ms (0.602 ms / 100) 0.598 -> 0.599 ( +0.17%) [ +0.50% +0.17% +0.00% / +0.67% +0.17% +0.17%] index_select wrap : Elapsed 0.006 ms (0.601 ms / 100) 0.588 -> 0.588 ( +0.00%) [ +1.53% +0.00% +0.00% / +1.02% +0.17% +0.00%] index_select linear : Elapsed 0.006 ms (0.597 ms / 100) 0.597 -> 0.601 ( +0.67%) [ +0.50% +0.17% +0.00% / +0.67% +0.67% +2.18%] index_select reverse : Elapsed 0.006 ms (0.600 ms / 100) 0.598 -> 0.599 ( +0.17%) [ +0.50% +0.33% +0.00% / +0.50% +0.17% +1.34%] index_select skip64 : Elapsed 0.006 ms (0.601 ms / 100) 0.587 -> 0.590 ( +0.51%) [ +1.02% +0.00% +0.17% / +0.68% +0.51% +0.51%] index_select skip256 : Elapsed 0.006 ms (0.593 ms / 100) 0.587 -> 0.593 ( +1.02%) [ +1.70% +0.17% +0.00% / +1.70% +1.19% +1.02%] index_select spread : Elapsed 0.006 ms (0.597 ms / 100) 0.597 -> 0.598 ( +0.17%) [ +0.67% +0.00% +0.84% / +0.67% +0.17% +0.34%] index_select strided 3 : Elapsed 0.006 ms (0.601 ms / 100) 0.592 -> 0.595 ( +0.51%) [ +1.18% +0.00% +0.00% / +1.18% +0.51% +0.51%] index_select strided 5 : Elapsed 0.006 ms (0.599 ms / 100) 0.600 -> 0.603 ( +0.50%) [ +0.67% +0.17% +0.00% / +0.50% +0.50% +0.50%] index_select strided 7 : Elapsed 0.006 ms (0.604 ms / 100) 0.594 -> 0.596 ( +0.34%) [ +1.01% +0.00% +0.67% / +1.01% +0.51% +0.34%] index_select strided 8 : Elapsed 0.006 ms (0.600 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +0.85% +0.00% +0.68% / +1.02% +0.51% +0.68%] index_select strided 16 : Elapsed 0.006 ms (0.591 ms / 100) 0.591 -> 0.594 ( +0.51%) [ +1.52% +0.00% +0.34% / +0.85% +1.02% +0.51%] index_select strided 64 : Elapsed 0.006 ms (0.600 ms / 100) 0.592 -> 0.593 ( +0.17%) [ +0.84% +0.00% +0.00% / +0.68% +0.17% +0.34%] index_select strided 100 : Elapsed 0.006 ms (0.597 ms / 100) 0.593 -> 0.595 ( +0.34%) [ +1.18% +0.34% +0.00% / +1.01% +1.35% +0.34%] index_select strided 255 : Elapsed 0.006 ms (0.600 ms / 100) 0.594 -> 0.599 ( +0.84%) [ +1.35% +0.00% +0.00% / +1.01% +0.84% +4.88%] index_select strided 256 : Elapsed 0.006 ms (0.602 ms / 100) 0.593 -> 0.600 ( +1.18%) [ +1.35% +0.00% +0.00% / +1.18% +2.70% +1.35%] index_select strided 257 : Elapsed 0.006 ms (0.601 ms / 100) 0.593 -> 0.597 ( +0.67%) [ +1.01% +0.17% +0.00% / +1.52% +1.18% +0.67%] index_select random : Elapsed 0.006 ms (0.599 ms / 100) 0.587 -> 0.592 ( +0.85%) [ +1.02% +0.17% +0.00% / +0.85% +1.02% +0.85%] index_select random_sorted : Elapsed 0.006 ms (0.593 ms / 100) 0.592 -> 0.597 ( +0.84%) [ +1.01% +0.51% +0.00% / +1.86% +0.84% +1.01%] index_select perm : Elapsed 0.006 ms (0.598 ms / 100) 0.589 -> 0.591 ( +0.34%) [ +0.68% +0.00% +0.17% / +0.68% +0.51% +0.34%] index_select perm_sorted : Elapsed 0.006 ms (0.593 ms / 100) B = [200, 5, 1] (stride (5, 1, 1)) A = [200, 500, 1] (stride (1, 200, 200)) dim = 1 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +1.08% +0.72%] index_select const : Elapsed 0.006 ms (0.562 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.89% +0.00% +0.00% / +0.72% +1.43% +1.07%] index_select wrap : Elapsed 0.006 ms (0.564 ms / 100) 0.559 -> 0.565 ( +1.07%) [ +1.07% +0.00% +0.18% / +1.07% +3.22% +1.07%] index_select linear : Elapsed 0.006 ms (0.565 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.71% +0.18% +0.00% / +0.71% +0.89% +1.07%] index_select reverse : Elapsed 0.006 ms (0.564 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +3.22% +0.18% +0.00% / +0.89% +0.72% +0.72%] index_select skip64 : Elapsed 0.006 ms (0.577 ms / 100) 0.560 -> 0.562 ( +0.36%) [ +6.25% +0.18% +0.00% / +0.54% +0.36% +0.54%] index_select skip256 : Elapsed 0.006 ms (0.595 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +8.23% +0.18% +0.00% / +0.72% +1.25% +6.62%] index_select spread : Elapsed 0.006 ms (0.605 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +0.71% +0.00% +0.00% / +0.54% +1.07% +1.07%] index_select strided 3 : Elapsed 0.006 ms (0.564 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.00% +0.72% / +0.72% +1.25% +1.25%] index_select strided 5 : Elapsed 0.006 ms (0.563 ms / 100) 0.558 -> 0.561 ( +0.54%) [ +0.72% +0.36% +0.00% / +0.54% +7.17% +1.43%] index_select strided 7 : Elapsed 0.006 ms (0.562 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.89% +0.18% +0.00% / +0.72% +0.89% +0.89%] index_select strided 8 : Elapsed 0.006 ms (0.564 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.36% +0.00% +0.00% / +0.36% +0.71% +6.95%] index_select strided 16 : Elapsed 0.006 ms (0.563 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.89% +5.37% +0.00% / +0.72% +5.01% +7.16%] index_select strided 64 : Elapsed 0.006 ms (0.564 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.54% +1.25% +1.43%] index_select strided 100 : Elapsed 0.006 ms (0.563 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.71% +0.00% +0.00% / +0.71% +0.89% +0.71%] index_select strided 255 : Elapsed 0.006 ms (0.565 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.71% +0.18% +0.00% / +0.71% +0.89% +0.89%] index_select strided 256 : Elapsed 0.006 ms (0.564 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.89% +0.00% +0.00% / +0.71% +0.53% +0.71%] index_select strided 257 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.566 ( +0.89%) [ +0.71% +0.00% +0.00% / +0.89% +0.89% +0.89%] index_select random : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.71% +0.18% +0.00% / +0.53% +0.89% +0.71%] index_select random_sorted : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +1.96% +0.00% +0.00% / +0.53% +0.71% +0.89%] index_select perm : Elapsed 0.006 ms (0.572 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +2.67% +0.00% +0.00% / +0.71% +0.53% +0.53%] index_select perm_sorted : Elapsed 0.006 ms (0.576 ms / 100) B = [200, 5, 1] (stride (5, 1, 5)) A = [200, 500, 1] (stride (500, 1, 100000)) dim = 1 0.599 -> 0.604 ( +0.83%) [ +0.67% +0.00% +0.00% / +0.83% +1.50% +1.17%] index_select const : Elapsed 0.006 ms (0.603 ms / 100) 0.598 -> 0.600 ( +0.33%) [ +0.50% +0.00% +0.17% / +0.33% +1.17% +0.67%] index_select wrap : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.603 ( +0.67%) [ +0.00% +0.17% +0.00% / +0.83% +0.67% +1.17%] index_select linear : Elapsed 0.006 ms (0.599 ms / 100) 0.596 -> 0.600 ( +0.67%) [ +0.50% +0.00% +0.17% / +0.67% +1.34% +1.34%] index_select reverse : Elapsed 0.006 ms (0.599 ms / 100) 0.599 -> 0.603 ( +0.67%) [ +0.83% +0.00% +0.00% / +0.67% +1.00% +1.17%] index_select skip64 : Elapsed 0.006 ms (0.604 ms / 100) 0.596 -> 0.602 ( +1.01%) [ +1.68% +0.34% +0.00% / +1.34% +1.68% +1.01%] index_select skip256 : Elapsed 0.006 ms (0.606 ms / 100) 0.588 -> 0.592 ( +0.68%) [ +0.85% +0.17% +0.00% / +0.68% +1.53% +1.53%] index_select spread : Elapsed 0.006 ms (0.593 ms / 100) 0.599 -> 0.604 ( +0.83%) [ +1.00% +0.00% +0.17% / +0.83% +1.67% +1.67%] index_select strided 3 : Elapsed 0.006 ms (0.605 ms / 100) 0.595 -> 0.601 ( +1.01%) [ +0.84% +0.00% +0.50% / +1.01% +1.01% +6.39%] index_select strided 5 : Elapsed 0.006 ms (0.600 ms / 100) 0.598 -> 0.601 ( +0.50%) [ +0.67% +0.00% +0.33% / +0.50% +0.67% +0.67%] index_select strided 7 : Elapsed 0.006 ms (0.602 ms / 100) 0.600 -> 0.604 ( +0.67%) [ +0.83% +0.17% +0.00% / +1.00% +1.00% +0.67%] index_select strided 8 : Elapsed 0.006 ms (0.605 ms / 100) 0.598 -> 0.602 ( +0.67%) [ +0.67% +0.33% +0.00% / +0.84% +0.84% +0.67%] index_select strided 16 : Elapsed 0.006 ms (0.602 ms / 100) 0.601 -> 0.605 ( +0.67%) [ +0.83% +0.00% +1.16% / +0.67% +1.00% +1.16%] index_select strided 64 : Elapsed 0.006 ms (0.606 ms / 100) 0.588 -> 0.594 ( +1.02%) [ +0.68% +0.17% +0.00% / +1.02% +1.53% +3.74%] index_select strided 100 : Elapsed 0.006 ms (0.592 ms / 100) 0.589 -> 0.596 ( +1.19%) [ +1.19% +0.00% +0.34% / +1.36% +1.19% +1.36%] index_select strided 255 : Elapsed 0.006 ms (0.596 ms / 100) 0.588 -> 0.596 ( +1.36%) [ +1.53% +0.17% +0.00% / +1.36% +1.36% +1.53%] index_select strided 256 : Elapsed 0.006 ms (0.597 ms / 100) 0.591 -> 0.596 ( +0.85%) [ +1.35% +0.51% +0.00% / +1.52% +0.85% +1.35%] index_select strided 257 : Elapsed 0.006 ms (0.599 ms / 100) 0.588 -> 0.591 ( +0.51%) [ +0.68% +0.00% +0.17% / +0.68% +0.68% +0.51%] index_select random : Elapsed 0.006 ms (0.592 ms / 100) 0.587 -> 0.591 ( +0.68%) [ +0.85% +0.00% +0.17% / +0.68% +0.85% +1.02%] index_select random_sorted : Elapsed 0.006 ms (0.592 ms / 100) 0.593 -> 0.600 ( +1.18%) [ +1.18% +0.51% +0.00% / +1.18% +1.52% +1.18%] index_select perm : Elapsed 0.006 ms (0.600 ms / 100) 0.598 -> 0.600 ( +0.33%) [ +0.50% +0.00% +0.17% / +0.33% +0.50% +0.33%] index_select perm_sorted : Elapsed 0.006 ms (0.601 ms / 100) B = [200, 5, 1] (stride (1, 200, 1)) A = [200, 500, 1] (stride (500, 1, 100000)) dim = 1 0.592 -> 0.598 ( +1.01%) [ +0.84% +0.34% +0.00% / +1.18% +1.18% +1.01%] index_select const : Elapsed 0.006 ms (0.597 ms / 100) 0.588 -> 0.595 ( +1.19%) [ +1.53% +0.34% +0.00% / +1.87% +1.19% +2.04%] index_select wrap : Elapsed 0.006 ms (0.597 ms / 100) 0.590 -> 0.597 ( +1.19%) [ +2.54% +0.34% +0.00% / +1.36% +1.19% +1.69%] index_select linear : Elapsed 0.006 ms (0.605 ms / 100) 0.588 -> 0.599 ( +1.87%) [ +1.70% +0.34% +0.00% / +1.87% +1.87% +5.95%] index_select reverse : Elapsed 0.006 ms (0.598 ms / 100) 0.589 -> 0.599 ( +1.70%) [ +1.36% +0.68% +0.00% / +1.70% +2.04% +2.21%] index_select skip64 : Elapsed 0.006 ms (0.597 ms / 100) 0.589 -> 0.598 ( +1.53%) [ +1.36% +0.34% +0.00% / +1.70% +1.53% +1.70%] index_select skip256 : Elapsed 0.006 ms (0.597 ms / 100) 0.583 -> 0.587 ( +0.69%) [ +1.03% +0.17% +0.00% / +0.69% +0.86% +0.86%] index_select spread : Elapsed 0.006 ms (0.589 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +1.53% +0.00% +0.17% / +1.02% +1.36% +1.36%] index_select strided 3 : Elapsed 0.006 ms (0.598 ms / 100) 0.597 -> 0.600 ( +0.50%) [ +0.67% +0.00% +0.67% / +0.50% +0.50% +0.50%] index_select strided 5 : Elapsed 0.006 ms (0.601 ms / 100) 0.591 -> 0.599 ( +1.35%) [ +1.35% +0.00% +0.00% / +1.52% +1.69% +1.35%] index_select strided 7 : Elapsed 0.006 ms (0.599 ms / 100) 0.592 -> 0.597 ( +0.84%) [ +1.18% +0.00% +0.00% / +1.52% +1.35% +0.84%] index_select strided 8 : Elapsed 0.006 ms (0.599 ms / 100) 0.590 -> 0.597 ( +1.19%) [ +1.86% +0.00% +0.34% / +1.53% +1.19% +1.53%] index_select strided 16 : Elapsed 0.006 ms (0.601 ms / 100) 0.589 -> 0.593 ( +0.68%) [ +1.19% +0.00% +0.00% / +0.68% +1.02% +1.02%] index_select strided 64 : Elapsed 0.006 ms (0.596 ms / 100) 0.583 -> 0.589 ( +1.03%) [ +0.86% +0.17% +0.00% / +1.03% +1.03% +1.37%] index_select strided 100 : Elapsed 0.006 ms (0.588 ms / 100) 0.583 -> 0.588 ( +0.86%) [ +0.86% +0.00% +0.00% / +0.86% +1.54% +1.20%] index_select strided 255 : Elapsed 0.006 ms (0.588 ms / 100) 0.582 -> 0.587 ( +0.86%) [ +0.86% +0.00% +0.34% / +0.86% +1.20% +1.03%] index_select strided 256 : Elapsed 0.006 ms (0.587 ms / 100) 0.582 -> 0.587 ( +0.86%) [ +0.86% +0.17% +0.00% / +0.86% +1.55% +1.37%] index_select strided 257 : Elapsed 0.006 ms (0.587 ms / 100) 0.580 -> 0.585 ( +0.86%) [ +0.69% +0.17% +0.00% / +0.86% +1.55% +1.55%] index_select random : Elapsed 0.006 ms (0.584 ms / 100) 0.580 -> 0.587 ( +1.21%) [ +0.86% +0.00% +0.34% / +1.21% +1.55% +1.21%] index_select random_sorted : Elapsed 0.006 ms (0.585 ms / 100) 0.584 -> 0.590 ( +1.03%) [ +0.86% +0.00% +0.51% / +1.03% +1.54% +1.54%] index_select perm : Elapsed 0.006 ms (0.589 ms / 100) 0.581 -> 0.584 ( +0.52%) [ +0.86% +0.17% +0.00% / +1.03% +0.52% +0.69%] index_select perm_sorted : Elapsed 0.006 ms (0.586 ms / 100) B = [200, 5, 1] (stride (1, 200, 200)) A = [200, 500, 1] (stride (500, 1, 100000)) dim = 1 0.591 -> 0.597 ( +1.02%) [ +1.18% +0.00% +0.00% / +1.02% +1.52% +4.74%] index_select const : Elapsed 0.006 ms (0.598 ms / 100) 0.589 -> 0.596 ( +1.19%) [ +1.02% +0.00% +0.00% / +1.19% +1.36% +1.53%] index_select wrap : Elapsed 0.006 ms (0.595 ms / 100) 0.596 -> 0.600 ( +0.67%) [ +0.00% +0.34% +0.34% / +0.84% +0.67% +1.34%] index_select linear : Elapsed 0.006 ms (0.596 ms / 100) 0.588 -> 0.599 ( +1.87%) [ +1.70% +0.17% +0.00% / +1.87% +2.04% +1.87%] index_select reverse : Elapsed 0.006 ms (0.598 ms / 100) 0.593 -> 0.599 ( +1.01%) [ +1.01% +0.00% +0.00% / +1.52% +1.01% +1.18%] index_select skip64 : Elapsed 0.006 ms (0.599 ms / 100) 0.589 -> 0.599 ( +1.70%) [ +2.21% +1.87% +0.00% / +1.87% +1.70% +2.38%] index_select skip256 : Elapsed 0.006 ms (0.602 ms / 100) 0.582 -> 0.585 ( +0.52%) [ +0.69% +0.00% +0.00% / +0.52% +1.03% +2.06%] index_select spread : Elapsed 0.006 ms (0.586 ms / 100) 0.592 -> 0.598 ( +1.01%) [ +1.18% +0.17% +0.00% / +1.01% +1.35% +1.18%] index_select strided 3 : Elapsed 0.006 ms (0.599 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +1.36% +0.34% +0.00% / +1.02% +2.21% +1.02%] index_select strided 5 : Elapsed 0.006 ms (0.597 ms / 100) 0.590 -> 0.599 ( +1.53%) [ +1.19% +2.88% +0.00% / +1.69% +1.86% +1.53%] index_select strided 7 : Elapsed 0.006 ms (0.597 ms / 100) 0.595 -> 0.599 ( +0.67%) [ +1.01% +0.00% +0.00% / +0.67% +0.84% +0.84%] index_select strided 8 : Elapsed 0.006 ms (0.601 ms / 100) 0.589 -> 0.597 ( +1.36%) [ +1.70% +0.34% +0.00% / +1.36% +1.53% +1.36%] index_select strided 16 : Elapsed 0.006 ms (0.599 ms / 100) 0.595 -> 0.598 ( +0.50%) [ +1.18% +0.00% +0.00% / +0.50% +1.01% +0.84%] index_select strided 64 : Elapsed 0.006 ms (0.602 ms / 100) 0.581 -> 0.585 ( +0.69%) [ +0.86% +0.00% +0.17% / +0.69% +1.38% +1.20%] index_select strided 100 : Elapsed 0.006 ms (0.586 ms / 100) 0.583 -> 0.588 ( +0.86%) [ +1.03% +0.17% +0.00% / +1.20% +0.86% +1.03%] index_select strided 255 : Elapsed 0.006 ms (0.589 ms / 100) 0.583 -> 0.588 ( +0.86%) [ +1.03% +0.00% +0.00% / +0.86% +1.20% +1.54%] index_select strided 256 : Elapsed 0.006 ms (0.589 ms / 100) 0.587 -> 0.588 ( +0.17%) [ +0.85% +0.17% +0.00% / +0.17% +0.34% +0.51%] index_select strided 257 : Elapsed 0.006 ms (0.592 ms / 100) 0.593 -> 0.601 ( +1.35%) [ +1.01% +0.34% +0.00% / +1.35% +1.52% +3.04%] index_select random : Elapsed 0.006 ms (0.599 ms / 100) 0.580 -> 0.585 ( +0.86%) [ +0.86% +0.86% +0.00% / +0.86% +1.55% +1.21%] index_select random_sorted : Elapsed 0.006 ms (0.585 ms / 100) 0.588 -> 0.595 ( +1.19%) [ +1.53% +0.00% +0.17% / +1.19% +1.53% +1.19%] index_select perm : Elapsed 0.006 ms (0.597 ms / 100) 0.583 -> 0.588 ( +0.86%) [ +0.86% +0.00% +0.00% / +0.86% +0.86% +0.86%] index_select perm_sorted : Elapsed 0.006 ms (0.588 ms / 100) B = [200, 5, 1] (stride (5, 1, 1000)) A = [200, 500, 1] (stride (500, 1, 1)) dim = 1 0.599 -> 0.600 ( +0.17%) [ +0.33% +0.00% +0.00% / +0.17% +0.50% +0.33%] index_select const : Elapsed 0.006 ms (0.601 ms / 100) 0.597 -> 0.599 ( +0.34%) [ +0.50% +0.00% +0.50% / +0.34% +0.84% +0.84%] index_select wrap : Elapsed 0.006 ms (0.600 ms / 100) 0.598 -> 0.601 ( +0.50%) [ +1.34% +0.33% +0.00% / +0.50% +0.50% +0.50%] index_select linear : Elapsed 0.006 ms (0.606 ms / 100) 0.596 -> 0.599 ( +0.50%) [ +0.67% +0.67% +0.00% / +0.67% +0.50% +0.67%] index_select reverse : Elapsed 0.006 ms (0.600 ms / 100) 0.598 -> 0.599 ( +0.17%) [ +0.17% +0.33% +0.00% / +0.17% +1.84% +0.50%] index_select skip64 : Elapsed 0.006 ms (0.599 ms / 100) 0.599 -> 0.600 ( +0.17%) [ +0.17% +0.50% +0.00% / +0.17% +0.67% +0.50%] index_select skip256 : Elapsed 0.006 ms (0.600 ms / 100) 0.588 -> 0.594 ( +1.02%) [ +0.85% +0.00% +0.34% / +1.02% +1.53% +1.19%] index_select spread : Elapsed 0.006 ms (0.593 ms / 100) 0.594 -> 0.599 ( +0.84%) [ +1.18% +0.67% +0.00% / +0.84% +0.84% +1.18%] index_select strided 3 : Elapsed 0.006 ms (0.601 ms / 100) 0.600 -> 0.604 ( +0.67%) [ +1.17% +0.00% +0.00% / +1.00% +0.67% +0.83%] index_select strided 5 : Elapsed 0.006 ms (0.607 ms / 100) 0.600 -> 0.602 ( +0.33%) [ +0.50% +0.00% +0.17% / +0.50% +0.67% +0.33%] index_select strided 7 : Elapsed 0.006 ms (0.603 ms / 100) 0.599 -> 0.604 ( +0.83%) [ +0.83% +0.17% +0.00% / +0.83% +1.00% +4.34%] index_select strided 8 : Elapsed 0.006 ms (0.604 ms / 100) 0.599 -> 0.605 ( +1.00%) [ +1.00% +0.00% +0.33% / +1.00% +1.00% +1.00%] index_select strided 16 : Elapsed 0.006 ms (0.605 ms / 100) 0.598 -> 0.600 ( +0.33%) [ +0.67% +0.00% +0.17% / +0.33% +0.84% +2.17%] index_select strided 64 : Elapsed 0.006 ms (0.602 ms / 100) 0.588 -> 0.593 ( +0.85%) [ +2.72% +0.34% +0.00% / +0.85% +1.87% +1.36%] index_select strided 100 : Elapsed 0.006 ms (0.604 ms / 100) 0.590 -> 0.598 ( +1.36%) [ +1.69% +0.34% +0.00% / +1.69% +1.36% +1.86%] index_select strided 255 : Elapsed 0.006 ms (0.600 ms / 100) 0.588 -> 0.594 ( +1.02%) [ +1.02% +0.00% +0.00% / +1.02% +2.04% +1.02%] index_select strided 256 : Elapsed 0.006 ms (0.594 ms / 100) 0.590 -> 0.598 ( +1.36%) [ +1.19% +0.00% +0.00% / +1.36% +1.69% +1.69%] index_select strided 257 : Elapsed 0.006 ms (0.597 ms / 100) 0.599 -> 0.604 ( +0.83%) [ +1.00% +0.00% +0.00% / +0.83% +1.34% +1.50%] index_select random : Elapsed 0.006 ms (0.605 ms / 100) 0.590 -> 0.598 ( +1.36%) [ +1.36% +0.17% +0.00% / +1.36% +1.36% +1.69%] index_select random_sorted : Elapsed 0.006 ms (0.598 ms / 100) 0.595 -> 0.599 ( +0.67%) [ +1.01% +0.00% +0.00% / +0.67% +0.84% +1.18%] index_select perm : Elapsed 0.006 ms (0.601 ms / 100) 0.599 -> 0.601 ( +0.33%) [ +0.50% +0.00% +0.17% / +0.33% +1.00% +0.33%] index_select perm_sorted : Elapsed 0.006 ms (0.602 ms / 100) B = [200, 5, 1] (stride (5, 1, 1000)) A = [200, 500, 1] (stride (1, 200, 200)) dim = 1 0.558 -> 0.563 ( +0.90%) [ +0.72% +0.18% +0.00% / +0.90% +1.08% +4.30%] index_select const : Elapsed 0.006 ms (0.562 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.00% +0.00% / +0.71% +0.71% +1.25%] index_select wrap : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.89% +0.00% +0.00% / +0.89% +0.53% +0.36%] index_select linear : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.89% +0.00% +0.00% / +0.89% +0.71% +0.53%] index_select reverse : Elapsed 0.006 ms (0.566 ms / 100) 0.557 -> 0.561 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +1.44% +1.62%] index_select skip64 : Elapsed 0.006 ms (0.561 ms / 100) 0.557 -> 0.561 ( +0.72%) [ +0.90% +0.18% +0.00% / +0.72% +1.44% +1.26%] index_select skip256 : Elapsed 0.006 ms (0.562 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.89% +8.57% +0.00% / +0.71% +0.89% +0.71%] index_select spread : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.53% +0.00% +0.00% / +0.71% +0.89% +0.53%] index_select strided 3 : Elapsed 0.006 ms (0.564 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.71% +0.00% +0.18% / +0.53% +0.89% +0.71%] index_select strided 5 : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.53% +0.00% +0.00% / +0.36% +3.21% +8.91%] index_select strided 7 : Elapsed 0.006 ms (0.564 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.89% +0.00% +0.18% / +1.07% +0.71% +1.07%] index_select strided 8 : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.71% +0.00% +0.00% / +1.60% +0.71% +0.53%] index_select strided 16 : Elapsed 0.006 ms (0.565 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.89% +0.00% +0.18% / +0.72% +1.25% +1.25%] index_select strided 64 : Elapsed 0.006 ms (0.564 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.89% +0.18% +0.00% / +0.72% +1.25% +1.25%] index_select strided 100 : Elapsed 0.006 ms (0.564 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.71% +0.18% +0.00% / +0.71% +1.07% +0.89%] index_select strided 255 : Elapsed 0.006 ms (0.564 ms / 100) 0.558 -> 0.564 ( +1.08%) [ +0.90% +0.00% +0.36% / +1.08% +1.43% +1.25%] index_select strided 256 : Elapsed 0.006 ms (0.563 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.89% +0.00% +0.00% / +3.74% +0.53% +0.53%] index_select strided 257 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.566 ( +0.89%) [ +0.71% +0.00% +0.18% / +0.89% +0.89% +0.89%] index_select random : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.36% +0.36% +0.00% / +0.53% +0.71% +1.07%] index_select random_sorted : Elapsed 0.006 ms (0.563 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +1.07% +0.18% +0.00% / +0.89% +1.07% +1.07%] index_select perm : Elapsed 0.006 ms (0.565 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +0.71% +0.00% +0.18% / +0.54% +1.07% +0.89%] index_select perm_sorted : Elapsed 0.006 ms (0.564 ms / 100) B = [200, 5, 1] (stride (1, 200, 1000)) A = [200, 500, 1] (stride (500, 1, 1)) dim = 1 0.592 -> 0.595 ( +0.51%) [ +1.01% +0.17% +0.00% / +0.51% +1.18% +1.35%] index_select const : Elapsed 0.006 ms (0.598 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +2.04% +0.17% +0.00% / +1.70% +1.87% +1.02%] index_select wrap : Elapsed 0.006 ms (0.601 ms / 100) 0.588 -> 0.597 ( +1.53%) [ +1.87% +0.17% +0.00% / +1.53% +1.70% +1.87%] index_select linear : Elapsed 0.006 ms (0.599 ms / 100) 0.585 -> 0.590 ( +0.85%) [ +1.03% +0.51% +0.00% / +0.85% +2.22% +2.39%] index_select reverse : Elapsed 0.006 ms (0.591 ms / 100) 0.588 -> 0.597 ( +1.53%) [ +1.19% +0.85% +0.00% / +1.53% +1.87% +1.87%] index_select skip64 : Elapsed 0.006 ms (0.595 ms / 100) 0.590 -> 0.598 ( +1.36%) [ +1.36% +0.00% +0.34% / +1.36% +1.53% +9.32%] index_select skip256 : Elapsed 0.006 ms (0.598 ms / 100) 0.578 -> 0.584 ( +1.04%) [ +1.04% +0.00% +0.00% / +1.04% +1.04% +1.21%] index_select spread : Elapsed 0.006 ms (0.584 ms / 100) 0.588 -> 0.597 ( +1.53%) [ +1.87% +0.34% +0.00% / +2.38% +1.53% +1.87%] index_select strided 3 : Elapsed 0.006 ms (0.599 ms / 100) 0.600 -> 0.602 ( +0.33%) [ +0.33% +0.00% +0.00% / +0.33% +0.50% +0.50%] index_select strided 5 : Elapsed 0.006 ms (0.602 ms / 100) 0.589 -> 0.597 ( +1.36%) [ +1.53% +0.68% +0.00% / +1.87% +1.36% +1.36%] index_select strided 7 : Elapsed 0.006 ms (0.598 ms / 100) 0.592 -> 0.598 ( +1.01%) [ +1.52% +0.34% +0.00% / +1.35% +1.01% +1.18%] index_select strided 8 : Elapsed 0.006 ms (0.601 ms / 100) 0.583 -> 0.588 ( +0.86%) [ +0.86% +0.00% +0.00% / +1.37% +0.86% +1.03%] index_select strided 16 : Elapsed 0.006 ms (0.588 ms / 100) 0.589 -> 0.592 ( +0.51%) [ +1.19% +0.17% +0.00% / +0.51% +1.87% +1.36%] index_select strided 64 : Elapsed 0.006 ms (0.596 ms / 100) 0.578 -> 0.582 ( +0.69%) [ +0.87% +0.17% +0.00% / +0.69% +1.38% +1.04%] index_select strided 100 : Elapsed 0.006 ms (0.583 ms / 100) 0.584 -> 0.588 ( +0.68%) [ +0.86% +0.00% +0.00% / +0.68% +1.03% +0.86%] index_select strided 255 : Elapsed 0.006 ms (0.589 ms / 100) 0.583 -> 0.588 ( +0.86%) [ +0.69% +0.00% +0.00% / +1.03% +0.86% +2.40%] index_select strided 256 : Elapsed 0.006 ms (0.587 ms / 100) 0.583 -> 0.587 ( +0.69%) [ +0.69% +0.00% +0.17% / +0.69% +0.86% +0.69%] index_select strided 257 : Elapsed 0.006 ms (0.587 ms / 100) 0.591 -> 0.599 ( +1.35%) [ +1.52% +0.34% +0.00% / +1.35% +1.35% +1.35%] index_select random : Elapsed 0.006 ms (0.600 ms / 100) 0.583 -> 0.588 ( +0.86%) [ +1.03% +2.92% +0.00% / +0.86% +1.20% +0.86%] index_select random_sorted : Elapsed 0.006 ms (0.589 ms / 100) 0.585 -> 0.588 ( +0.51%) [ +0.51% +0.00% +0.00% / +0.51% +1.20% +1.03%] index_select perm : Elapsed 0.006 ms (0.588 ms / 100) 0.582 -> 0.586 ( +0.69%) [ +0.69% +0.00% +0.00% / +0.86% +0.69% +0.86%] index_select perm_sorted : Elapsed 0.006 ms (0.586 ms / 100) out_shape = [200, 500, 5] in_shape = [200, 500, 1] idx_dim = 2 B = [200, 500, 5] (stride (2500, 5, 1)) A = [200, 500, 1] (stride (500, 1, 500)) dim = 2 6.205 -> 6.206 ( +0.02%) [ +0.00% +0.19% +0.24% / +0.35% +0.02% +0.03%] index_add_ linear : Elapsed 0.062 ms (6.205 ms / 100) 6.088 -> 6.074 ( -0.23%) [ +0.23% +0.03% +0.00% / +0.15% -0.23% -0.15%] index_copy_ linear : Elapsed 0.061 ms (6.102 ms / 100) 6.210 -> 6.191 ( -0.31%) [ +0.18% +0.00% +0.16% / +0.39% -0.05% -0.31%] index_add_ reverse : Elapsed 0.062 ms (6.221 ms / 100) 6.099 -> 6.083 ( -0.26%) [ +0.16% +0.13% +0.00% / +0.20% -0.13% -0.26%] index_copy_ reverse : Elapsed 0.061 ms (6.109 ms / 100) 6.214 -> 6.199 ( -0.24%) [ +0.06% +1.05% +0.00% / +0.21% -0.14% -0.24%] index_add_ spread : Elapsed 0.062 ms (6.218 ms / 100) 6.099 -> 6.086 ( -0.21%) [ +0.16% +0.00% +0.11% / +0.16% -0.10% -0.21%] index_copy_ spread : Elapsed 0.061 ms (6.109 ms / 100) 6.220 -> 6.203 ( -0.27%) [ +0.05% +0.10% +0.00% / +0.06% -0.27% -0.13%] index_add_ strided 3 : Elapsed 0.062 ms (6.223 ms / 100) 6.093 -> 6.078 ( -0.25%) [ +0.07% +0.00% +0.18% / +0.02% -0.25% -0.10%] index_copy_ strided 3 : Elapsed 0.061 ms (6.097 ms / 100) 6.228 -> 6.223 ( -0.08%) [ +0.00% +0.11% +0.40% / +0.18% -0.08% +0.05%] index_add_ perm : Elapsed 0.062 ms (6.228 ms / 100) 6.099 -> 6.084 ( -0.25%) [ +0.03% +0.00% +0.00% / +0.08% -0.05% -0.25%] index_copy_ perm : Elapsed 0.061 ms (6.101 ms / 100) 6.228 -> 6.203 ( -0.40%) [ +0.02% +0.13% +0.00% / -0.10% -0.14% -0.40%] index_add_ perm_sorted : Elapsed 0.062 ms (6.229 ms / 100) 6.088 -> 6.072 ( -0.26%) [ +0.25% +0.20% +0.00% / +0.13% -0.10% -0.26%] index_copy_ perm_sorted : Elapsed 0.061 ms (6.103 ms / 100) 14.990 -> 14.957 ( -0.22%) [ +0.07% +0.01% +0.00% / -0.10% -0.16% -0.22%] index_select const : Elapsed 0.150 ms (15.001 ms / 100) 14.967 -> 14.976 ( +0.06%) [ +0.00% +0.12% +0.18% / +0.09% +0.15% +0.06%] index_select wrap : Elapsed 0.150 ms (14.967 ms / 100) 14.944 -> 14.957 ( +0.09%) [ +0.17% +0.00% +0.27% / +0.09% +0.25% +0.25%] index_select linear : Elapsed 0.150 ms (14.969 ms / 100) 14.958 -> 14.959 ( +0.01%) [ +0.11% +0.23% +0.00% / +0.31% +0.11% +0.01%] index_select reverse : Elapsed 0.150 ms (14.975 ms / 100) 14.939 -> 14.945 ( +0.04%) [ +0.00% +0.02% +0.21% / +0.04% +0.26% +0.25%] index_select skip64 : Elapsed 0.149 ms (14.939 ms / 100) 14.965 -> 14.956 ( -0.06%) [ +0.07% +0.00% +0.12% / -0.06% +0.37% +0.05%] index_select skip256 : Elapsed 0.150 ms (14.976 ms / 100) 14.970 -> 14.964 ( -0.04%) [ +0.00% +0.13% +0.05% / -0.01% +0.00% -0.04%] index_select spread : Elapsed 0.150 ms (14.970 ms / 100) 14.963 -> 14.961 ( -0.01%) [ +0.14% +0.00% +0.02% / +0.25% +0.07% -0.01%] index_select random : Elapsed 0.150 ms (14.984 ms / 100) 14.950 -> 14.953 ( +0.02%) [ +0.25% +0.23% +0.00% / +0.02% +0.21% +0.03%] index_select random_sorted : Elapsed 0.150 ms (14.987 ms / 100) B = [200, 500, 5] (stride (1, 1000, 200)) A = [200, 500, 1] (stride (500, 1, 1)) dim = 2 8.077 -> 8.070 ( -0.09%) [ +0.05% +0.00% +0.01% / -0.09% +0.09% -0.04%] index_add_ linear : Elapsed 0.081 ms (8.081 ms / 100) 7.816 -> 7.815 ( -0.01%) [ +0.05% +0.00% +0.05% / -0.01% +0.09% +0.05%] index_copy_ linear : Elapsed 0.078 ms (7.820 ms / 100) 8.068 -> 8.069 ( +0.01%) [ +0.10% +0.00% +0.04% / +0.01% +0.29% +0.06%] index_add_ reverse : Elapsed 0.081 ms (8.076 ms / 100) 7.808 -> 7.816 ( +0.10%) [ +0.04% +0.05% +0.00% / +0.10% +0.17% +0.32%] index_copy_ reverse : Elapsed 0.078 ms (7.811 ms / 100) 8.063 -> 8.069 ( +0.07%) [ +0.09% +0.00% +0.00% / +0.07% +0.19% +0.25%] index_add_ spread : Elapsed 0.081 ms (8.070 ms / 100) 7.802 -> 7.801 ( -0.01%) [ +0.15% +0.00% +0.06% / +0.28% -0.01% +0.31%] index_copy_ spread : Elapsed 0.078 ms (7.814 ms / 100) 8.061 -> 8.064 ( +0.04%) [ +0.15% +0.11% +0.00% / +0.30% +0.04% +0.05%] index_add_ strided 3 : Elapsed 0.081 ms (8.073 ms / 100) 7.802 -> 7.810 ( +0.10%) [ +0.13% +0.00% +0.04% / +0.27% +0.10% +0.10%] index_copy_ strided 3 : Elapsed 0.078 ms (7.812 ms / 100) 8.047 -> 8.051 ( +0.05%) [ +0.00% +0.06% +0.09% / +0.05% +0.19% +0.25%] index_add_ perm : Elapsed 0.080 ms (8.047 ms / 100) 7.795 -> 7.805 ( +0.13%) [ +0.21% +0.00% +0.17% / +0.13% +0.18% +0.35%] index_copy_ perm : Elapsed 0.078 ms (7.811 ms / 100) 8.044 -> 8.053 ( +0.11%) [ +0.00% +0.06% +0.11% / +0.11% +0.12% +0.22%] index_add_ perm_sorted : Elapsed 0.080 ms (8.044 ms / 100) 7.804 -> 7.807 ( +0.04%) [ +0.00% +0.00% +0.27% / +0.04% +0.06% +0.08%] index_copy_ perm_sorted : Elapsed 0.078 ms (7.804 ms / 100) 14.157 -> 14.164 ( +0.05%) [ +0.30% +0.07% +0.00% / +0.08% +0.05% +0.07%] index_select const : Elapsed 0.142 ms (14.200 ms / 100) 14.145 -> 14.158 ( +0.09%) [ +0.37% +0.00% +0.02% / +0.30% +0.09% +0.11%] index_select wrap : Elapsed 0.142 ms (14.197 ms / 100) 14.149 -> 14.167 ( +0.13%) [ +0.21% +0.00% +0.08% / +0.25% +0.13% +0.18%] index_select linear : Elapsed 0.142 ms (14.179 ms / 100) 14.170 -> 14.168 ( -0.01%) [ +0.09% +0.02% +0.00% / +0.16% +0.06% -0.01%] index_select reverse : Elapsed 0.142 ms (14.183 ms / 100) 14.150 -> 14.161 ( +0.08%) [ +0.25% +0.01% +0.00% / +0.23% +0.08% +0.18%] index_select skip64 : Elapsed 0.142 ms (14.185 ms / 100) 14.153 -> 14.159 ( +0.04%) [ +0.35% +0.04% +0.00% / +0.25% +0.15% +0.04%] index_select skip256 : Elapsed 0.142 ms (14.202 ms / 100) 14.162 -> 14.169 ( +0.05%) [ +0.10% +0.00% +0.01% / +0.25% +0.05% +0.10%] index_select spread : Elapsed 0.142 ms (14.176 ms / 100) 14.126 -> 14.158 ( +0.23%) [ +0.30% +0.00% +0.16% / +0.38% +0.44% +0.23%] index_select random : Elapsed 0.142 ms (14.169 ms / 100) 14.150 -> 14.170 ( +0.14%) [ +0.29% +0.02% +0.00% / +0.25% +0.14% +0.15%] index_select random_sorted : Elapsed 0.142 ms (14.191 ms / 100) B = [200, 500, 5] (stride (500, 1, 100000)) A = [200, 500, 1] (stride (500, 1, 500)) dim = 2 4.859 -> 4.850 ( -0.19%) [ +0.06% +0.29% +0.00% / -0.04% -0.10% -0.19%] index_add_ linear : Elapsed 0.049 ms (4.862 ms / 100) 4.701 -> 4.712 ( +0.23%) [ +0.19% +0.23% +0.00% / +0.26% +0.23% +0.30%] index_copy_ linear : Elapsed 0.047 ms (4.710 ms / 100) 4.853 -> 4.860 ( +0.14%) [ +0.12% +0.10% +0.00% / +0.19% +0.16% +0.14%] index_add_ reverse : Elapsed 0.049 ms (4.859 ms / 100) 4.707 -> 4.718 ( +0.23%) [ +0.00% +0.08% +0.21% / +0.34% +0.38% +0.23%] index_copy_ reverse : Elapsed 0.047 ms (4.707 ms / 100) 4.868 -> 4.857 ( -0.23%) [ +0.02% +0.00% +0.00% / +0.08% -0.23% +0.00%] index_add_ spread : Elapsed 0.049 ms (4.869 ms / 100) 4.714 -> 4.708 ( -0.13%) [ +0.00% +0.11% +0.08% / +0.34% -0.13% +0.19%] index_copy_ spread : Elapsed 0.047 ms (4.714 ms / 100) 4.867 -> 4.852 ( -0.31%) [ +0.10% +0.00% +0.14% / +0.00% -0.31% +0.02%] index_add_ strided 3 : Elapsed 0.049 ms (4.872 ms / 100) 4.716 -> 4.710 ( -0.13%) [ +0.30% +0.15% +0.00% / +0.00% -0.13% +0.23%] index_copy_ strided 3 : Elapsed 0.047 ms (4.730 ms / 100) 4.864 -> 4.856 ( -0.16%) [ +0.00% +0.14% +0.04% / +0.10% -0.14% -0.16%] index_add_ perm : Elapsed 0.049 ms (4.864 ms / 100) 4.710 -> 4.713 ( +0.06%) [ +0.17% +0.25% +0.00% / +0.28% +0.11% +0.06%] index_copy_ perm : Elapsed 0.047 ms (4.718 ms / 100) 4.865 -> 4.865 ( +0.00%) [ +0.08% +0.00% +0.21% / +0.08% +0.08% +0.00%] index_add_ perm_sorted : Elapsed 0.049 ms (4.869 ms / 100) 4.717 -> 4.712 ( -0.11%) [ +0.15% +0.00% +0.25% / -0.11% -0.06% -0.08%] index_copy_ perm_sorted : Elapsed 0.047 ms (4.724 ms / 100) 6.477 -> 6.502 ( +0.39%) [ +0.12% +0.06% +0.00% / +0.39% +0.57% +0.56%] index_select const : Elapsed 0.065 ms (6.485 ms / 100) 6.494 -> 6.493 ( -0.02%) [ +0.00% +0.15% +0.25% / +0.15% -0.02% +0.26%] index_select wrap : Elapsed 0.065 ms (6.494 ms / 100) 6.489 -> 6.498 ( +0.14%) [ +0.31% +0.00% +0.25% / +0.14% +0.59% +0.43%] index_select linear : Elapsed 0.065 ms (6.509 ms / 100) 6.489 -> 6.497 ( +0.12%) [ +0.29% +0.00% +0.09% / +0.12% +0.20% +0.49%] index_select reverse : Elapsed 0.065 ms (6.508 ms / 100) 6.480 -> 6.503 ( +0.35%) [ +0.15% +0.17% +0.00% / +0.39% +0.35% +0.80%] index_select skip64 : Elapsed 0.065 ms (6.490 ms / 100) 6.476 -> 6.489 ( +0.20%) [ +0.25% +0.00% +0.20% / +0.20% +0.23% +0.54%] index_select skip256 : Elapsed 0.065 ms (6.492 ms / 100) 6.477 -> 6.475 ( -0.03%) [ +0.08% +0.00% +0.42% / -0.03% +0.42% +0.83%] index_select spread : Elapsed 0.065 ms (6.482 ms / 100) 6.480 -> 6.504 ( +0.37%) [ +0.37% +0.23% +0.00% / +0.57% +0.83% +0.37%] index_select random : Elapsed 0.065 ms (6.504 ms / 100) 6.480 -> 6.480 ( +0.00%) [ +0.00% +0.03% +0.35% / +0.00% +0.43% +0.37%] index_select random_sorted : Elapsed 0.065 ms (6.480 ms / 100) B = [200, 500, 5] (stride (500, 1, 100000)) A = [200, 500, 1] (stride (1, 200, 100000)) dim = 2 7.817 -> 7.838 ( +0.27%) [ +0.15% +0.00% +0.36% / +0.45% +0.35% +0.27%] index_add_ linear : Elapsed 0.078 ms (7.829 ms / 100) 7.755 -> 7.762 ( +0.09%) [ +0.19% +0.00% +0.08% / +0.31% +0.22% +0.09%] index_copy_ linear : Elapsed 0.078 ms (7.770 ms / 100) 7.834 -> 7.824 ( -0.13%) [ +0.00% +0.08% +0.23% / +0.11% +0.04% -0.13%] index_add_ reverse : Elapsed 0.078 ms (7.834 ms / 100) 7.765 -> 7.767 ( +0.03%) [ +0.00% +0.00% +0.06% / +0.14% +0.06% +0.03%] index_copy_ reverse : Elapsed 0.078 ms (7.765 ms / 100) 7.824 -> 7.821 ( -0.04%) [ +0.38% +0.20% +0.00% / +0.09% -0.04% +0.19%] index_add_ spread : Elapsed 0.079 ms (7.854 ms / 100) 7.764 -> 7.760 ( -0.05%) [ +0.15% +0.00% +0.10% / +0.10% -0.05% +0.08%] index_copy_ spread : Elapsed 0.078 ms (7.776 ms / 100) 7.826 -> 7.836 ( +0.13%) [ +0.05% +0.00% +0.22% / +0.13% +0.23% +0.18%] index_add_ strided 3 : Elapsed 0.078 ms (7.830 ms / 100) 7.762 -> 7.760 ( -0.03%) [ +0.09% +0.12% +0.00% / +0.08% -0.03% -0.01%] index_copy_ strided 3 : Elapsed 0.078 ms (7.769 ms / 100) 7.827 -> 7.840 ( +0.17%) [ +0.10% +0.14% +0.00% / +0.29% +0.18% +0.17%] index_add_ perm : Elapsed 0.078 ms (7.835 ms / 100) 7.765 -> 7.764 ( -0.01%) [ +0.22% +0.00% +0.00% / +0.06% +0.08% -0.01%] index_copy_ perm : Elapsed 0.078 ms (7.782 ms / 100) 7.836 -> 7.829 ( -0.09%) [ +0.00% +0.06% +0.00% / -0.09% +0.04% +0.06%] index_add_ perm_sorted : Elapsed 0.078 ms (7.836 ms / 100) 7.764 -> 7.760 ( -0.05%) [ +0.10% +0.05% +0.00% / -0.04% -0.05% +0.30%] index_copy_ perm_sorted : Elapsed 0.078 ms (7.772 ms / 100) 13.265 -> 13.297 ( +0.24%) [ +0.24% +0.00% +0.06% / +0.50% +0.24% +0.37%] index_select const : Elapsed 0.133 ms (13.297 ms / 100) 13.263 -> 13.309 ( +0.35%) [ +0.28% +0.00% +0.13% / +0.41% +0.35% +0.38%] index_select wrap : Elapsed 0.133 ms (13.300 ms / 100) 13.272 -> 13.268 ( -0.03%) [ +0.36% +0.00% +0.01% / +0.33% -0.03% +0.24%] index_select linear : Elapsed 0.133 ms (13.320 ms / 100) 13.278 -> 13.305 ( +0.20%) [ +0.26% +0.00% +0.08% / +0.26% +0.20% +0.22%] index_select reverse : Elapsed 0.133 ms (13.313 ms / 100) 13.273 -> 13.301 ( +0.21%) [ +0.32% +0.00% +0.11% / +0.30% +0.21% +0.23%] index_select skip64 : Elapsed 0.133 ms (13.316 ms / 100) 13.265 -> 13.294 ( +0.22%) [ +0.34% +0.06% +0.00% / +0.30% +0.23% +0.22%] index_select skip256 : Elapsed 0.133 ms (13.310 ms / 100) 13.278 -> 13.294 ( +0.12%) [ +0.31% +0.05% +0.00% / +0.41% +0.12% +0.17%] index_select spread : Elapsed 0.133 ms (13.319 ms / 100) 13.278 -> 13.261 ( -0.13%) [ +0.14% +0.00% +0.09% / +0.29% +0.11% -0.13%] index_select random : Elapsed 0.133 ms (13.297 ms / 100) 13.277 -> 13.290 ( +0.10%) [ +0.14% +0.00% +0.02% / +0.35% +0.14% +0.10%] index_select random_sorted : Elapsed 0.133 ms (13.296 ms / 100) B = [200, 500, 5] (stride (1, 200, 100000)) A = [200, 500, 1] (stride (500, 1, 500)) dim = 2 7.949 -> 7.934 ( -0.19%) [ +0.10% +0.00% +0.08% / +0.20% -0.19% -0.04%] index_add_ linear : Elapsed 0.080 ms (7.957 ms / 100) 7.789 -> 7.793 ( +0.05%) [ +0.13% +0.00% +0.04% / +0.37% +0.05% +0.27%] index_copy_ linear : Elapsed 0.078 ms (7.799 ms / 100) 7.948 -> 7.946 ( -0.03%) [ +0.05% +0.00% +0.18% / +0.04% +0.09% -0.03%] index_add_ reverse : Elapsed 0.080 ms (7.952 ms / 100) 7.789 -> 7.809 ( +0.26%) [ +0.09% +0.00% +0.10% / +0.32% +0.41% +0.26%] index_copy_ reverse : Elapsed 0.078 ms (7.796 ms / 100) 7.948 -> 7.942 ( -0.08%) [ +0.00% +0.08% +0.06% / -0.03% -0.08% +0.04%] index_add_ spread : Elapsed 0.079 ms (7.948 ms / 100) 7.794 -> 7.789 ( -0.06%) [ +0.13% +0.01% +0.00% / +0.05% -0.06% +0.17%] index_copy_ spread : Elapsed 0.078 ms (7.804 ms / 100) 7.940 -> 7.942 ( +0.03%) [ +0.21% +0.16% +0.00% / +0.03% +0.14% +0.28%] index_add_ strided 3 : Elapsed 0.080 ms (7.957 ms / 100) 7.797 -> 7.797 ( +0.00%) [ +0.18% +0.06% +0.00% / +0.08% +0.00% +0.14%] index_copy_ strided 3 : Elapsed 0.078 ms (7.811 ms / 100) 7.956 -> 7.953 ( -0.04%) [ +0.24% +0.28% +0.00% / +0.19% -0.04% +0.04%] index_add_ perm : Elapsed 0.080 ms (7.975 ms / 100) 7.801 -> 7.805 ( +0.05%) [ +0.19% +0.14% +0.00% / +0.05% +0.06% +0.17%] index_copy_ perm : Elapsed 0.078 ms (7.816 ms / 100) 7.962 -> 7.942 ( -0.25%) [ +0.06% +0.00% +0.00% / -0.10% -0.09% -0.25%] index_add_ perm_sorted : Elapsed 0.080 ms (7.967 ms / 100) 7.795 -> 7.785 ( -0.13%) [ +0.15% +0.00% +0.08% / +0.18% +0.00% -0.13%] index_copy_ perm_sorted : Elapsed 0.078 ms (7.807 ms / 100) 13.940 -> 13.983 ( +0.31%) [ +0.32% +0.00% +0.14% / +0.31% +0.32% +0.37%] index_select const : Elapsed 0.140 ms (13.985 ms / 100) 13.949 -> 13.984 ( +0.25%) [ +0.30% +0.00% +0.06% / +0.25% +0.29% +0.27%] index_select wrap : Elapsed 0.140 ms (13.991 ms / 100) 13.939 -> 13.985 ( +0.33%) [ +0.44% +0.17% +0.00% / +0.38% +0.33% +0.52%] index_select linear : Elapsed 0.140 ms (14.000 ms / 100) 13.950 -> 13.985 ( +0.25%) [ +0.32% +0.00% +0.06% / +0.25% +0.33% +0.39%] index_select reverse : Elapsed 0.140 ms (13.995 ms / 100) 13.950 -> 13.982 ( +0.23%) [ +0.32% +0.00% +0.04% / +0.31% +0.24% +0.23%] index_select skip64 : Elapsed 0.140 ms (13.995 ms / 100) 13.956 -> 13.983 ( +0.19%) [ +0.17% +0.09% +0.00% / +0.27% +0.23% +0.19%] index_select skip256 : Elapsed 0.140 ms (13.980 ms / 100) 13.942 -> 13.976 ( +0.24%) [ +0.37% +0.00% +0.19% / +0.33% +0.37% +0.24%] index_select spread : Elapsed 0.140 ms (13.993 ms / 100) 13.944 -> 13.977 ( +0.24%) [ +0.28% +0.00% +0.06% / +0.24% +0.36% +0.39%] index_select random : Elapsed 0.140 ms (13.983 ms / 100) 13.943 -> 13.976 ( +0.24%) [ +0.31% +0.03% +0.00% / +0.42% +0.24% +0.32%] index_select random_sorted : Elapsed 0.140 ms (13.986 ms / 100) out_shape = [5, 1, 200] in_shape = [500, 1, 200] idx_dim = 0 B = [5, 1, 200] (stride (200, 1, 1)) dim = 0 fill_cnt = 500 4.120 -> 4.165 ( +1.09%) [ +1.17% +0.02% +0.00% / +1.12% +1.09% +1.09%] index_fill_ const : Elapsed 0.042 ms (4.168 ms / 100) 4.123 -> 4.167 ( +1.07%) [ +1.14% +0.05% +0.00% / +1.07% +1.14% +1.19%] index_fill_ linear : Elapsed 0.042 ms (4.170 ms / 100) 4.119 -> 4.165 ( +1.12%) [ +1.14% +0.02% +0.00% / +1.12% +1.17% +1.19%] index_fill_ reverse : Elapsed 0.042 ms (4.166 ms / 100) 4.129 -> 4.170 ( +0.99%) [ +1.21% +0.05% +0.00% / +1.07% +1.02% +0.99%] index_fill_ skip64 : Elapsed 0.042 ms (4.179 ms / 100) 4.126 -> 4.171 ( +1.09%) [ +1.09% +0.00% +0.00% / +1.09% +1.16% +1.16%] index_fill_ skip256 : Elapsed 0.042 ms (4.171 ms / 100) 4.129 -> 4.171 ( +1.02%) [ +1.14% +0.05% +0.00% / +1.09% +1.04% +1.02%] index_fill_ spread : Elapsed 0.042 ms (4.176 ms / 100) 4.118 -> 4.164 ( +1.12%) [ +1.12% +0.00% +0.00% / +1.12% +1.17% +1.19%] index_fill_ strided 3 : Elapsed 0.042 ms (4.164 ms / 100) 4.125 -> 4.168 ( +1.04%) [ +1.09% +0.05% +0.00% / +1.04% +1.12% +1.09%] index_fill_ random : Elapsed 0.042 ms (4.170 ms / 100) 4.119 -> 4.164 ( +1.09%) [ +1.19% +0.07% +0.00% / +1.09% +1.19% +1.17%] index_fill_ random_sorted : Elapsed 0.042 ms (4.168 ms / 100) B = [5, 1, 200] (stride (1, 5, 5)) A = [500, 1, 200] (stride (200, 1, 1)) dim = 0 0.560 -> 0.562 ( +0.36%) [ +0.89% +0.00% +0.18% / +1.07% +0.36% +0.36%] index_select const : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +1.07% +0.18% +0.00% / +1.07% +0.53% +0.36%] index_select wrap : Elapsed 0.006 ms (0.567 ms / 100) 0.561 -> 0.562 ( +0.18%) [ +1.07% +0.00% +0.00% / +1.78% +0.36% +0.18%] index_select linear : Elapsed 0.006 ms (0.567 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.89% +0.00% +0.00% / +7.66% +1.60% +0.36%] index_select reverse : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.562 ( +0.18%) [ +0.71% +0.00% +0.00% / +0.53% +0.18% +0.18%] index_select skip64 : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.89% +0.00% +0.00% / +0.71% +0.53% +0.36%] index_select skip256 : Elapsed 0.006 ms (0.566 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +1.07% +0.18% +0.00% / +0.89% +0.89% +0.54%] index_select spread : Elapsed 0.006 ms (0.566 ms / 100) 0.562 -> 0.563 ( +0.18%) [ +0.89% +0.00% +0.00% / +0.71% +0.36% +0.18%] index_select strided 3 : Elapsed 0.006 ms (0.567 ms / 100) 0.561 -> 0.562 ( +0.18%) [ +0.89% +0.18% +0.00% / +0.89% +0.36% +0.18%] index_select strided 5 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.71% +0.00% +0.00% / +0.71% +0.53% +0.53%] index_select strided 7 : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +2.50% +0.00% +0.18% / +0.89% +0.53% +0.53%] index_select strided 8 : Elapsed 0.006 ms (0.575 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +1.78% +0.00% +3.74% / +0.71% +0.71% +0.53%] index_select strided 16 : Elapsed 0.006 ms (0.571 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +1.78% +0.00% +0.00% / +0.53% +0.71% +1.78%] index_select strided 64 : Elapsed 0.006 ms (0.571 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.89% +0.00% +0.18% / +0.89% +0.36% +0.36%] index_select strided 100 : Elapsed 0.006 ms (0.566 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +1.96% +0.00% +0.18% / +0.71% +0.53% +0.36%] index_select strided 255 : Elapsed 0.006 ms (0.572 ms / 100) 0.561 -> 0.564 ( +0.53%) [ +0.71% +0.18% +0.00% / +0.71% +1.60% +0.53%] index_select strided 256 : Elapsed 0.006 ms (0.565 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.71% +0.00% +0.00% / +0.71% +1.25% +0.89%] index_select strided 257 : Elapsed 0.006 ms (0.564 ms / 100) 0.561 -> 0.565 ( +0.71%) [ +0.89% +0.00% +0.00% / +0.71% +1.25% +0.71%] index_select random : Elapsed 0.006 ms (0.566 ms / 100) 0.560 -> 0.564 ( +0.71%) [ +0.89% +0.18% +0.00% / +0.89% +0.89% +0.71%] index_select random_sorted : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.71% +0.00% +0.00% / +6.06% +0.36% +0.53%] index_select perm : Elapsed 0.006 ms (0.565 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.89% +0.89% +0.00% / +7.31% +0.36% +0.36%] index_select perm_sorted : Elapsed 0.006 ms (0.566 ms / 100) out_shape = [500, 5, 200] in_shape = [500, 1, 200] idx_dim = 1 B = [500, 5, 200] (stride (1000, 200, 1)) A = [500, 1, 200] (stride (200, 1, 1)) dim = 1 5.035 -> 5.041 ( +0.12%) [ +0.02% +0.00% +0.08% / +0.34% +0.12% +0.24%] index_add_ linear : Elapsed 0.050 ms (5.036 ms / 100) 4.888 -> 4.900 ( +0.25%) [ +0.00% +0.37% +0.08% / +0.25% +0.57% +0.53%] index_copy_ linear : Elapsed 0.049 ms (4.888 ms / 100) 5.035 -> 5.038 ( +0.06%) [ +0.06% +0.00% +0.04% / +0.40% +0.06% +0.48%] index_add_ reverse : Elapsed 0.050 ms (5.038 ms / 100) 4.889 -> 4.898 ( +0.18%) [ +0.27% +0.18% +0.00% / +0.33% +0.39% +0.18%] index_copy_ reverse : Elapsed 0.049 ms (4.902 ms / 100) 5.036 -> 5.040 ( +0.08%) [ +0.00% +0.18% +0.04% / +0.44% +0.10% +0.08%] index_add_ spread : Elapsed 0.050 ms (5.036 ms / 100) 4.894 -> 4.890 ( -0.08%) [ +0.18% +0.25% +0.00% / +0.59% +0.00% -0.08%] index_copy_ spread : Elapsed 0.049 ms (4.903 ms / 100) 5.043 -> 5.032 ( -0.22%) [ +0.14% +0.00% +0.10% / -0.22% -0.14% +0.14%] index_add_ strided 3 : Elapsed 0.050 ms (5.050 ms / 100) 4.898 -> 4.898 ( +0.00%) [ +0.24% +0.00% +0.06% / +0.02% +0.00% +0.14%] index_copy_ strided 3 : Elapsed 0.049 ms (4.910 ms / 100) 5.097 -> 5.096 ( -0.02%) [ +0.16% +0.06% +0.00% / -0.02% +0.06% +0.00%] index_add_ perm : Elapsed 0.051 ms (5.105 ms / 100) 4.897 -> 4.898 ( +0.02%) [ +0.35% +0.39% +0.00% / +0.14% +0.02% +0.08%] index_copy_ perm : Elapsed 0.049 ms (4.914 ms / 100) 5.088 -> 5.097 ( +0.18%) [ +0.39% +0.31% +0.00% / +0.18% +0.18% +0.31%] index_add_ perm_sorted : Elapsed 0.051 ms (5.108 ms / 100) 4.897 -> 4.900 ( +0.06%) [ +0.14% +0.06% +0.00% / +0.06% +0.27% +0.39%] index_copy_ perm_sorted : Elapsed 0.049 ms (4.904 ms / 100) 7.686 -> 7.671 ( -0.20%) [ +0.01% +0.04% +0.00% / +0.09% -0.10% -0.20%] index_select const : Elapsed 0.077 ms (7.687 ms / 100) 7.669 -> 7.672 ( +0.04%) [ +0.07% +0.00% +0.34% / +0.26% +0.08% +0.04%] index_select wrap : Elapsed 0.077 ms (7.674 ms / 100) 7.670 -> 7.661 ( -0.12%) [ +0.04% +0.00% +0.31% / +0.18% -0.12% +0.12%] index_select linear : Elapsed 0.077 ms (7.673 ms / 100) 7.675 -> 7.662 ( -0.17%) [ +0.07% +0.00% +0.14% / +0.20% -0.17% +0.03%] index_select reverse : Elapsed 0.077 ms (7.680 ms / 100) 7.649 -> 7.664 ( +0.20%) [ +0.59% +0.00% +0.46% / +0.29% +0.20% +0.34%] index_select skip64 : Elapsed 0.077 ms (7.694 ms / 100) 7.688 -> 7.680 ( -0.10%) [ +0.07% +0.00% +0.00% / -0.09% -0.10% +0.08%] index_select skip256 : Elapsed 0.077 ms (7.693 ms / 100) 7.691 -> 7.678 ( -0.17%) [ +0.17% +0.05% +0.00% / +0.01% -0.17% -0.09%] index_select spread : Elapsed 0.077 ms (7.704 ms / 100) 7.673 -> 7.662 ( -0.14%) [ +0.01% +0.00% +0.08% / +0.13% +0.36% -0.14%] index_select random : Elapsed 0.077 ms (7.674 ms / 100) 7.663 -> 7.649 ( -0.18%) [ +0.20% +0.26% +0.00% / -0.13% -0.18% +0.07%] index_select random_sorted : Elapsed 0.077 ms (7.678 ms / 100) B = [500, 5, 200] (stride (1000, 200, 1)) A = [500, 1, 200] (stride (1, 100000, 500)) dim = 1 5.123 -> 5.105 ( -0.35%) [ +0.00% +0.18% +0.14% / +0.25% -0.35% +0.02%] index_add_ linear : Elapsed 0.051 ms (5.123 ms / 100) 4.972 -> 4.970 ( -0.04%) [ +0.08% +0.24% +0.00% / +0.04% -0.04% +0.10%] index_copy_ linear : Elapsed 0.050 ms (4.976 ms / 100) 5.119 -> 5.106 ( -0.25%) [ +0.10% +0.20% +0.00% / +0.23% -0.04% -0.25%] index_add_ reverse : Elapsed 0.051 ms (5.124 ms / 100) 4.965 -> 4.967 ( +0.04%) [ +0.08% +0.00% +0.02% / +0.30% +0.22% +0.04%] index_copy_ reverse : Elapsed 0.050 ms (4.969 ms / 100) 5.116 -> 5.111 ( -0.10%) [ +0.12% +0.10% +0.00% / +0.14% -0.10% -0.08%] index_add_ spread : Elapsed 0.051 ms (5.122 ms / 100) 4.961 -> 4.969 ( +0.16%) [ +0.18% +0.08% +0.00% / +0.22% +0.16% +0.22%] index_copy_ spread : Elapsed 0.050 ms (4.970 ms / 100) 5.116 -> 5.116 ( +0.00%) [ +0.22% +0.29% +0.00% / +0.31% +0.00% +0.35%] index_add_ strided 3 : Elapsed 0.051 ms (5.127 ms / 100) 4.972 -> 4.957 ( -0.30%) [ +0.04% +0.08% +0.00% / +0.02% -0.30% -0.10%] index_copy_ strided 3 : Elapsed 0.050 ms (4.974 ms / 100) 5.098 -> 5.105 ( +0.14%) [ +0.31% +0.45% +0.00% / +0.29% +0.49% +0.14%] index_add_ perm : Elapsed 0.051 ms (5.114 ms / 100) 4.945 -> 4.951 ( +0.12%) [ +0.51% +0.16% +0.00% / +0.40% +0.28% +0.12%] index_copy_ perm : Elapsed 0.050 ms (4.970 ms / 100) 5.102 -> 5.105 ( +0.06%) [ +0.00% +0.16% +0.02% / +0.06% +0.25% +0.22%] index_add_ perm_sorted : Elapsed 0.051 ms (5.102 ms / 100) 4.950 -> 4.962 ( +0.24%) [ +0.18% +0.24% +0.00% / +0.24% +0.28% +0.48%] index_copy_ perm_sorted : Elapsed 0.050 ms (4.959 ms / 100) 7.961 -> 7.960 ( -0.01%) [ +0.09% +0.08% +0.00% / -0.01% +0.13% +0.25%] index_select const : Elapsed 0.080 ms (7.968 ms / 100) 7.950 -> 7.952 ( +0.03%) [ +0.25% +0.15% +0.00% / +0.03% +0.14% +0.26%] index_select wrap : Elapsed 0.080 ms (7.970 ms / 100) 7.959 -> 7.963 ( +0.05%) [ +0.34% +0.04% +0.00% / +0.13% +0.21% +0.05%] index_select linear : Elapsed 0.080 ms (7.986 ms / 100) 7.963 -> 7.961 ( -0.03%) [ +0.11% +0.01% +0.00% / +0.24% -0.03% +0.00%] index_select reverse : Elapsed 0.080 ms (7.972 ms / 100) 7.968 -> 7.964 ( -0.05%) [ +0.21% +0.00% +0.04% / +0.11% -0.03% -0.05%] index_select skip64 : Elapsed 0.080 ms (7.985 ms / 100) 7.958 -> 7.965 ( +0.09%) [ +0.30% +0.00% +0.06% / +0.09% +0.09% +0.23%] index_select skip256 : Elapsed 0.080 ms (7.982 ms / 100) 7.953 -> 7.966 ( +0.16%) [ +0.18% +0.21% +0.00% / +0.34% +0.16% +0.25%] index_select spread : Elapsed 0.080 ms (7.967 ms / 100) 7.950 -> 7.970 ( +0.25%) [ +0.15% +0.29% +0.00% / +0.25% +0.36% +0.30%] index_select random : Elapsed 0.080 ms (7.962 ms / 100) 7.957 -> 7.959 ( +0.03%) [ +0.16% +0.20% +0.00% / +0.03% +0.06% +0.21%] index_select random_sorted : Elapsed 0.080 ms (7.970 ms / 100) B = [500, 5, 200] (stride (1, 100000, 500)) A = [500, 1, 200] (stride (200, 1, 1)) dim = 1 5.444 -> 5.452 ( +0.15%) [ +0.31% +0.22% +0.00% / +0.15% +0.46% +0.20%] index_add_ linear : Elapsed 0.055 ms (5.461 ms / 100) 5.166 -> 5.162 ( -0.08%) [ +0.00% +0.02% +0.04% / +0.17% +0.10% -0.08%] index_copy_ linear : Elapsed 0.052 ms (5.166 ms / 100) 5.453 -> 5.463 ( +0.18%) [ +0.22% +0.00% +0.04% / +0.20% +0.18% +0.20%] index_add_ reverse : Elapsed 0.055 ms (5.465 ms / 100) 5.154 -> 5.163 ( +0.17%) [ +0.50% +0.25% +0.00% / +0.50% +0.29% +0.17%] index_copy_ reverse : Elapsed 0.052 ms (5.180 ms / 100) 5.454 -> 5.454 ( +0.00%) [ +0.07% +0.07% +0.00% / +0.00% +0.13% +0.33%] index_add_ spread : Elapsed 0.055 ms (5.458 ms / 100) 5.162 -> 5.167 ( +0.10%) [ +0.41% +0.15% +0.00% / +0.33% +0.29% +0.10%] index_copy_ spread : Elapsed 0.052 ms (5.183 ms / 100) 5.443 -> 5.450 ( +0.13%) [ +0.00% +0.48% +0.11% / +0.57% +0.13% +0.22%] index_add_ strided 3 : Elapsed 0.054 ms (5.443 ms / 100) 5.162 -> 5.166 ( +0.08%) [ +0.12% +0.39% +0.00% / +0.33% +0.08% +0.29%] index_copy_ strided 3 : Elapsed 0.052 ms (5.168 ms / 100) 5.455 -> 5.460 ( +0.09%) [ +0.00% +0.09% +0.20% / +0.09% +0.11% +0.24%] index_add_ perm : Elapsed 0.055 ms (5.455 ms / 100) 5.156 -> 5.173 ( +0.33%) [ +0.06% +0.04% +0.00% / +0.33% +0.35% +0.45%] index_copy_ perm : Elapsed 0.052 ms (5.159 ms / 100) 5.460 -> 5.467 ( +0.13%) [ +0.00% +0.07% +0.02% / +0.24% +0.16% +0.13%] index_add_ perm_sorted : Elapsed 0.055 ms (5.460 ms / 100) 5.160 -> 5.165 ( +0.10%) [ +0.00% +0.02% +0.10% / +0.12% +0.41% +0.10%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.160 ms / 100) 8.483 -> 8.473 ( -0.12%) [ +0.08% +0.00% +0.08% / +0.22% -0.12% +0.09%] index_select const : Elapsed 0.085 ms (8.490 ms / 100) 8.469 -> 8.486 ( +0.20%) [ +0.24% +0.18% +0.00% / +0.27% +0.21% +0.20%] index_select wrap : Elapsed 0.085 ms (8.489 ms / 100) 8.490 -> 8.500 ( +0.12%) [ +0.12% +0.07% +0.00% / +0.19% +0.14% +0.12%] index_select linear : Elapsed 0.085 ms (8.500 ms / 100) 8.476 -> 8.500 ( +0.28%) [ +0.25% +0.09% +0.00% / +0.33% +0.29% +0.28%] index_select reverse : Elapsed 0.085 ms (8.497 ms / 100) 8.485 -> 8.480 ( -0.06%) [ +0.00% +0.06% +0.06% / -0.06% +0.12% +0.00%] index_select skip64 : Elapsed 0.085 ms (8.485 ms / 100) 8.463 -> 8.496 ( +0.39%) [ +0.18% +0.28% +0.00% / +0.40% +0.39% +0.39%] index_select skip256 : Elapsed 0.085 ms (8.478 ms / 100) 8.472 -> 8.478 ( +0.07%) [ +0.19% +0.00% +0.14% / +0.20% +0.22% +0.07%] index_select spread : Elapsed 0.085 ms (8.488 ms / 100) 8.482 -> 8.475 ( -0.08%) [ +0.18% +0.15% +0.00% / -0.08% +0.24% +0.06%] index_select random : Elapsed 0.085 ms (8.497 ms / 100) 8.474 -> 8.478 ( +0.05%) [ +0.14% +0.00% +0.13% / +0.05% +0.34% +0.31%] index_select random_sorted : Elapsed 0.085 ms (8.486 ms / 100) B = [500, 5, 200] (stride (1, 100000, 500)) A = [500, 1, 200] (stride (200, 100000, 1)) dim = 1 5.446 -> 5.459 ( +0.24%) [ +0.09% +0.00% +0.02% / +0.29% +0.24% +0.48%] index_add_ linear : Elapsed 0.055 ms (5.451 ms / 100) 5.164 -> 5.159 ( -0.10%) [ +0.15% +0.12% +0.00% / +0.31% -0.10% -0.06%] index_copy_ linear : Elapsed 0.052 ms (5.172 ms / 100) 5.444 -> 5.456 ( +0.22%) [ +0.09% +0.04% +0.00% / +0.22% +0.53% +0.42%] index_add_ reverse : Elapsed 0.054 ms (5.449 ms / 100) 5.159 -> 5.162 ( +0.06%) [ +0.19% +0.00% +0.10% / +0.29% +0.43% +0.06%] index_copy_ reverse : Elapsed 0.052 ms (5.169 ms / 100) 5.438 -> 5.446 ( +0.15%) [ +0.18% +0.00% +0.15% / +0.15% +0.39% +0.17%] index_add_ spread : Elapsed 0.054 ms (5.448 ms / 100) 5.165 -> 5.159 ( -0.12%) [ +0.00% +0.00% +0.12% / -0.10% -0.12% -0.04%] index_copy_ spread : Elapsed 0.052 ms (5.165 ms / 100) 5.442 -> 5.435 ( -0.13%) [ +0.17% +0.00% +0.07% / -0.13% +0.51% +0.55%] index_add_ strided 3 : Elapsed 0.055 ms (5.451 ms / 100) 5.156 -> 5.161 ( +0.10%) [ +0.39% +0.00% +0.33% / +0.33% +0.21% +0.10%] index_copy_ strided 3 : Elapsed 0.052 ms (5.176 ms / 100) 5.446 -> 5.461 ( +0.28%) [ +0.29% +0.31% +0.00% / +0.28% +0.55% +0.46%] index_add_ perm : Elapsed 0.055 ms (5.462 ms / 100) 5.151 -> 5.150 ( -0.02%) [ +0.02% +0.19% +0.00% / +0.14% -0.02% +0.31%] index_copy_ perm : Elapsed 0.052 ms (5.152 ms / 100) 5.444 -> 5.445 ( +0.02%) [ +0.50% +0.33% +0.00% / +0.02% +0.42% +0.51%] index_add_ perm_sorted : Elapsed 0.055 ms (5.471 ms / 100) 5.152 -> 5.155 ( +0.06%) [ +0.19% +0.00% +0.00% / +0.06% +0.06% +0.29%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.162 ms / 100) 8.490 -> 8.484 ( -0.07%) [ +0.31% +0.00% +0.02% / -0.07% -0.05% +0.04%] index_select const : Elapsed 0.085 ms (8.516 ms / 100) 8.484 -> 8.484 ( +0.00%) [ +0.14% +0.01% +0.00% / +0.04% +0.00% +0.13%] index_select wrap : Elapsed 0.085 ms (8.496 ms / 100) 8.483 -> 8.477 ( -0.07%) [ +0.24% +0.00% +0.05% / +0.13% +0.06% -0.07%] index_select linear : Elapsed 0.085 ms (8.503 ms / 100) 8.497 -> 8.475 ( -0.26%) [ +0.00% +0.13% +0.02% / +0.08% -0.15% -0.26%] index_select reverse : Elapsed 0.085 ms (8.497 ms / 100) 8.489 -> 8.485 ( -0.05%) [ +0.06% +0.07% +0.00% / -0.05% +0.06% +0.04%] index_select skip64 : Elapsed 0.085 ms (8.494 ms / 100) 8.485 -> 8.478 ( -0.08%) [ +0.14% +0.00% +0.09% / +0.16% -0.08% -0.05%] index_select skip256 : Elapsed 0.085 ms (8.497 ms / 100) 8.487 -> 8.490 ( +0.04%) [ +0.09% +0.00% +0.07% / +0.05% +0.04% +0.04%] index_select spread : Elapsed 0.085 ms (8.495 ms / 100) 8.477 -> 8.466 ( -0.13%) [ +0.05% +0.00% +0.02% / +0.00% -0.13% +0.25%] index_select random : Elapsed 0.085 ms (8.481 ms / 100) 8.478 -> 8.470 ( -0.09%) [ +0.22% +0.00% +0.04% / +0.26% -0.09% +0.00%] index_select random_sorted : Elapsed 0.085 ms (8.497 ms / 100) B = [500, 5, 200] (stride (1, 500, 2500)) A = [500, 1, 200] (stride (200, 200, 1)) dim = 1 5.610 -> 5.631 ( +0.37%) [ +0.27% +0.00% +0.07% / +0.37% +0.70% +0.86%] index_add_ linear : Elapsed 0.056 ms (5.625 ms / 100) 5.197 -> 5.213 ( +0.31%) [ +0.25% +0.00% +0.40% / +0.46% +0.54% +0.31%] index_copy_ linear : Elapsed 0.052 ms (5.210 ms / 100) 5.620 -> 5.628 ( +0.14%) [ +0.23% +0.00% +0.16% / +0.14% +0.66% +0.66%] index_add_ reverse : Elapsed 0.056 ms (5.633 ms / 100) 5.207 -> 5.210 ( +0.06%) [ +0.00% +0.10% +0.02% / +0.06% +0.08% +0.15%] index_copy_ reverse : Elapsed 0.052 ms (5.207 ms / 100) 5.616 -> 5.621 ( +0.09%) [ +0.07% +0.00% +0.21% / +0.09% +0.64% +0.61%] index_add_ spread : Elapsed 0.056 ms (5.620 ms / 100) 5.203 -> 5.205 ( +0.04%) [ +0.17% +0.00% +0.08% / +0.27% +0.27% +0.04%] index_copy_ spread : Elapsed 0.052 ms (5.212 ms / 100) 5.624 -> 5.629 ( +0.09%) [ +0.04% +0.04% +0.00% / +0.09% +0.37% +0.75%] index_add_ strided 3 : Elapsed 0.056 ms (5.626 ms / 100) 5.198 -> 5.205 ( +0.13%) [ +0.29% +0.00% +0.29% / +0.27% +0.13% +0.35%] index_copy_ strided 3 : Elapsed 0.052 ms (5.213 ms / 100) 5.617 -> 5.617 ( +0.00%) [ +0.18% +0.00% +0.16% / +0.00% +0.61% +0.75%] index_add_ perm : Elapsed 0.056 ms (5.627 ms / 100) 5.219 -> 5.223 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.17% +0.08% +0.10%] index_copy_ perm : Elapsed 0.052 ms (5.219 ms / 100) 5.619 -> 5.618 ( -0.02%) [ +0.43% +0.00% +0.02% / -0.02% +0.27% +0.59%] index_add_ perm_sorted : Elapsed 0.056 ms (5.643 ms / 100) 5.215 -> 5.218 ( +0.06%) [ +0.25% +0.00% +0.25% / +0.08% +0.06% +0.21%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.228 ms / 100) 8.625 -> 8.641 ( +0.19%) [ +0.13% +0.00% +0.21% / +0.19% +0.68% +0.34%] index_select const : Elapsed 0.086 ms (8.636 ms / 100) 8.620 -> 8.645 ( +0.29%) [ +0.14% +0.00% +0.27% / +0.29% +0.81% +0.72%] index_select wrap : Elapsed 0.086 ms (8.632 ms / 100) 8.649 -> 8.685 ( +0.42%) [ +0.25% +0.34% +0.00% / +0.44% +0.47% +0.42%] index_select linear : Elapsed 0.087 ms (8.671 ms / 100) 8.674 -> 8.662 ( -0.14%) [ +0.01% +0.00% +0.03% / -0.14% +0.07% +0.06%] index_select reverse : Elapsed 0.087 ms (8.675 ms / 100) 8.641 -> 8.659 ( +0.21%) [ +0.22% +0.00% +0.09% / +0.21% +0.27% +0.52%] index_select skip64 : Elapsed 0.087 ms (8.660 ms / 100) 8.625 -> 8.635 ( +0.12%) [ +0.24% +0.00% +0.17% / +0.12% +0.64% +0.46%] index_select skip256 : Elapsed 0.086 ms (8.646 ms / 100) 8.627 -> 8.636 ( +0.10%) [ +0.06% +0.00% +0.02% / +0.10% +0.64% +0.46%] index_select spread : Elapsed 0.086 ms (8.632 ms / 100) 8.634 -> 8.639 ( +0.06%) [ +0.10% +0.00% +0.01% / +0.06% +0.47% +0.64%] index_select random : Elapsed 0.086 ms (8.643 ms / 100) 8.631 -> 8.645 ( +0.16%) [ +0.14% +0.00% +0.08% / +0.16% +0.37% +0.53%] index_select random_sorted : Elapsed 0.086 ms (8.643 ms / 100) out_shape = [500, 1, 5] in_shape = [500, 1, 200] idx_dim = 2 B = [500, 1, 5] (stride (5, 2500, 1)) A = [500, 1, 200] (stride (1, 1, 500)) dim = 2 1.182 -> 1.184 ( +0.17%) [ +0.42% +0.00% +0.08% / +0.51% +0.25% +0.17%] index_select const : Elapsed 0.012 ms (1.187 ms / 100) 1.179 -> 1.185 ( +0.51%) [ +0.76% +0.25% +0.00% / +0.59% +0.68% +0.51%] index_select wrap : Elapsed 0.012 ms (1.188 ms / 100) 1.179 -> 1.186 ( +0.59%) [ +0.68% +0.00% +0.34% / +0.68% +0.59% +0.68%] index_select linear : Elapsed 0.012 ms (1.187 ms / 100) 1.179 -> 1.186 ( +0.59%) [ +0.68% +0.00% +0.76% / +0.59% +0.68% +0.68%] index_select reverse : Elapsed 0.012 ms (1.187 ms / 100) 1.179 -> 1.186 ( +0.59%) [ +0.68% +0.08% +0.00% / +0.68% +0.59% +0.59%] index_select skip64 : Elapsed 0.012 ms (1.187 ms / 100) 1.177 -> 1.186 ( +0.76%) [ +0.93% +0.00% +0.25% / +0.76% +0.85% +0.93%] index_select skip256 : Elapsed 0.012 ms (1.188 ms / 100) 1.181 -> 1.186 ( +0.42%) [ +0.42% +0.00% +0.00% / +0.59% +0.85% +0.42%] index_select spread : Elapsed 0.012 ms (1.186 ms / 100) 1.177 -> 1.186 ( +0.76%) [ +0.76% +0.34% +0.00% / +0.76% +0.76% +0.85%] index_select strided 3 : Elapsed 0.012 ms (1.186 ms / 100) 1.184 -> 1.185 ( +0.08%) [ +0.34% +0.00% +0.25% / +0.17% +0.17% +0.08%] index_select strided 5 : Elapsed 0.012 ms (1.188 ms / 100) 1.178 -> 1.186 ( +0.68%) [ +0.85% +0.00% +0.25% / +0.68% +0.68% +0.68%] index_select strided 7 : Elapsed 0.012 ms (1.188 ms / 100) 1.178 -> 1.185 ( +0.59%) [ +0.68% +0.00% +0.68% / +0.68% +0.76% +0.59%] index_select strided 8 : Elapsed 0.012 ms (1.186 ms / 100) 1.177 -> 1.186 ( +0.76%) [ +1.02% +0.00% +0.08% / +0.76% +1.02% +1.02%] index_select strided 16 : Elapsed 0.012 ms (1.189 ms / 100) 1.177 -> 1.184 ( +0.59%) [ +0.85% +0.08% +0.00% / +0.59% +0.76% +0.85%] index_select strided 64 : Elapsed 0.012 ms (1.187 ms / 100) 1.178 -> 1.185 ( +0.59%) [ +0.85% +0.00% +0.00% / +0.59% +0.68% +0.68%] index_select strided 100 : Elapsed 0.012 ms (1.188 ms / 100) 1.177 -> 1.186 ( +0.76%) [ +0.93% +0.00% +0.42% / +0.76% +0.93% +1.02%] index_select random : Elapsed 0.012 ms (1.188 ms / 100) 1.176 -> 1.188 ( +1.02%) [ +0.94% +0.00% +0.26% / +1.02% +1.02% +1.02%] index_select random_sorted : Elapsed 0.012 ms (1.187 ms / 100) 1.178 -> 1.186 ( +0.68%) [ +0.93% +0.17% +0.00% / +0.68% +0.93% +0.85%] index_select perm : Elapsed 0.012 ms (1.189 ms / 100) 1.178 -> 1.185 ( +0.59%) [ +0.59% +0.25% +0.00% / +0.59% +0.76% +0.76%] index_select perm_sorted : Elapsed 0.012 ms (1.185 ms / 100) out_shape = [5, 200, 1] in_shape = [500, 200, 1] idx_dim = 0 B = [5, 200, 1] (stride (200, 1, 1)) dim = 0 fill_cnt = 500 4.120 -> 4.164 ( +1.07%) [ +1.17% +0.00% +0.19% / +1.09% +1.07% +1.09%] index_fill_ const : Elapsed 0.042 ms (4.168 ms / 100) 4.124 -> 4.168 ( +1.07%) [ +1.12% +0.00% +0.02% / +1.07% +1.16% +1.12%] index_fill_ linear : Elapsed 0.042 ms (4.170 ms / 100) 4.120 -> 4.164 ( +1.07%) [ +1.12% +0.00% +0.00% / +1.07% +1.17% +1.19%] index_fill_ reverse : Elapsed 0.042 ms (4.166 ms / 100) 4.130 -> 4.170 ( +0.97%) [ +1.28% +0.00% +0.00% / +1.04% +0.99% +0.97%] index_fill_ skip64 : Elapsed 0.042 ms (4.183 ms / 100) 4.126 -> 4.171 ( +1.09%) [ +1.14% +0.00% +0.05% / +1.09% +1.16% +1.16%] index_fill_ skip256 : Elapsed 0.042 ms (4.173 ms / 100) 4.128 -> 4.170 ( +1.02%) [ +1.21% +0.02% +0.00% / +1.11% +1.04% +1.02%] index_fill_ spread : Elapsed 0.042 ms (4.178 ms / 100) 4.118 -> 4.164 ( +1.12%) [ +1.14% +0.02% +0.00% / +1.12% +1.19% +1.21%] index_fill_ strided 3 : Elapsed 0.042 ms (4.165 ms / 100) 4.125 -> 4.170 ( +1.09%) [ +1.09% +0.00% +0.15% / +1.09% +1.09% +1.09%] index_fill_ random : Elapsed 0.042 ms (4.170 ms / 100) 4.120 -> 4.165 ( +1.09%) [ +1.14% +0.02% +0.00% / +1.09% +1.14% +1.17%] index_fill_ random_sorted : Elapsed 0.042 ms (4.167 ms / 100) B = [5, 200, 1] (stride (200, 1, 1)) A = [500, 200, 1] (stride (200, 1, 200)) dim = 0 0.555 -> 0.558 ( +0.54%) [+10.63% +0.00% +0.00% / +0.72% +0.90% +0.54%] index_select const : Elapsed 0.006 ms (0.614 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +1.08% +0.00% +0.00% / +1.26% +1.08% +1.08%] index_select wrap : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.08% +0.00% +0.18% / +0.90% +0.90% +0.90%] index_select linear : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +4.14% +0.00% +0.72% / +0.90% +0.90% +13.33%] index_select reverse : Elapsed 0.006 ms (0.578 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +4.14% +0.00% +1.80% / +0.54% +0.54% +0.36%] index_select skip64 : Elapsed 0.006 ms (0.579 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.72% +0.18% +0.00% / +1.26% +0.72% +0.72%] index_select skip256 : Elapsed 0.006 ms (0.558 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +1.08% +0.00% +0.36% / +1.08% +0.90% +1.08%] index_select spread : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +2.34% +0.00% +0.54% / +0.90% +0.90% +1.08%] index_select strided 3 : Elapsed 0.006 ms (0.568 ms / 100) 0.556 -> 0.561 ( +0.90%) [ +2.88% +0.00% +3.96% / +0.90% +0.90% +0.90%] index_select strided 5 : Elapsed 0.006 ms (0.572 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +7.75% +0.00% +9.55% / +1.08% +7.39% +0.90%] index_select strided 7 : Elapsed 0.006 ms (0.598 ms / 100) 0.559 -> 0.559 ( +0.00%) [ +0.36% +0.00% +3.04% / +0.18% +0.00% +0.18%] index_select strided 8 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.00% +48.29% / +0.72% +0.72% +0.72%] index_select strided 16 : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +3.96% +0.00% +1.62% / +0.90% +0.90% +0.72%] index_select strided 64 : Elapsed 0.006 ms (0.577 ms / 100) 0.555 -> 0.559 ( +0.72%) [+11.71% +0.00% +0.00% / +0.72% +1.08% +0.90%] index_select strided 100 : Elapsed 0.006 ms (0.620 ms / 100) 0.554 -> 0.561 ( +1.26%) [ +1.26% +0.00% +1.08% / +4.51% +1.26% +1.26%] index_select strided 255 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.54% +0.00% +0.00% / +0.54% +0.90% +0.72%] index_select strided 256 : Elapsed 0.006 ms (0.559 ms / 100) 0.554 -> 0.561 ( +1.26%) [ +1.44% +0.18% +0.00% / +1.26% +1.26% +4.69%] index_select strided 257 : Elapsed 0.006 ms (0.562 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +1.08% +0.18% +0.00% / +1.08% +2.88% +1.08%] index_select random : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.561 ( +0.90%) [ +0.72% +0.00% +0.00% / +2.70% +0.90% +4.32%] index_select random_sorted : Elapsed 0.006 ms (0.560 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +1.08% +0.00% +0.54% / +1.08% +1.08% +1.08%] index_select perm : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +1.08% +0.18% +0.00% / +1.08% +1.08% +1.08%] index_select perm_sorted : Elapsed 0.006 ms (0.561 ms / 100) B = [5, 200, 1] (stride (200, 1, 200)) A = [500, 200, 1] (stride (1, 500, 500)) dim = 0 0.590 -> 0.597 ( +1.19%) [ +1.53% +0.34% +0.00% / +1.36% +1.19% +7.29%] index_select const : Elapsed 0.006 ms (0.599 ms / 100) 0.587 -> 0.593 ( +1.02%) [ +1.87% +0.00% +0.34% / +1.02% +2.21% +1.70%] index_select wrap : Elapsed 0.006 ms (0.598 ms / 100) 0.591 -> 0.599 ( +1.35%) [ +1.18% +0.34% +0.00% / +1.35% +1.35% +1.35%] index_select linear : Elapsed 0.006 ms (0.598 ms / 100) 0.587 -> 0.592 ( +0.85%) [ +1.02% +0.00% +0.34% / +0.85% +1.87% +2.21%] index_select reverse : Elapsed 0.006 ms (0.593 ms / 100) 0.591 -> 0.598 ( +1.18%) [ +1.18% +0.00% +0.17% / +1.18% +1.35% +1.35%] index_select skip64 : Elapsed 0.006 ms (0.598 ms / 100) 0.592 -> 0.599 ( +1.18%) [ +1.01% +0.00% +1.01% / +1.18% +1.18% +1.18%] index_select skip256 : Elapsed 0.006 ms (0.598 ms / 100) 0.582 -> 0.585 ( +0.52%) [ +0.69% +0.00% +1.89% / +0.52% +1.20% +3.95%] index_select spread : Elapsed 0.006 ms (0.586 ms / 100) 0.592 -> 0.599 ( +1.18%) [ +1.35% +0.00% +0.34% / +1.18% +1.18% +1.52%] index_select strided 3 : Elapsed 0.006 ms (0.600 ms / 100) 0.595 -> 0.600 ( +0.84%) [ +0.84% +0.00% +0.17% / +0.84% +1.01% +1.01%] index_select strided 5 : Elapsed 0.006 ms (0.600 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +1.02% +0.00% +0.34% / +1.02% +2.04% +1.70%] index_select strided 7 : Elapsed 0.006 ms (0.595 ms / 100) 0.592 -> 0.596 ( +0.68%) [ +1.35% +0.00% +0.00% / +0.68% +1.35% +1.18%] index_select strided 8 : Elapsed 0.006 ms (0.600 ms / 100) 0.582 -> 0.586 ( +0.69%) [ +0.69% +0.00% +2.06% / +0.69% +0.69% +1.03%] index_select strided 16 : Elapsed 0.006 ms (0.586 ms / 100) 0.591 -> 0.598 ( +1.18%) [ +1.69% +0.34% +0.00% / +1.35% +1.18% +1.35%] index_select strided 64 : Elapsed 0.006 ms (0.601 ms / 100) 0.582 -> 0.587 ( +0.86%) [ +0.52% +0.00% +0.00% / +0.86% +0.86% +0.86%] index_select strided 100 : Elapsed 0.006 ms (0.585 ms / 100) 0.587 -> 0.592 ( +0.85%) [ +1.02% +0.17% +0.00% / +0.85% +1.87% +1.53%] index_select strided 255 : Elapsed 0.006 ms (0.593 ms / 100) 0.582 -> 0.587 ( +0.86%) [ +0.86% +0.00% +0.34% / +0.86% +1.03% +0.86%] index_select strided 256 : Elapsed 0.006 ms (0.587 ms / 100) 0.582 -> 0.587 ( +0.86%) [ +0.86% +0.00% +0.34% / +1.03% +0.86% +1.20%] index_select strided 257 : Elapsed 0.006 ms (0.587 ms / 100) 0.590 -> 0.597 ( +1.19%) [ +1.19% +0.00% +0.34% / +1.19% +2.03% +4.24%] index_select random : Elapsed 0.006 ms (0.597 ms / 100) 0.583 -> 0.586 ( +0.51%) [ +0.51% +0.17% +0.00% / +0.51% +0.69% +0.69%] index_select random_sorted : Elapsed 0.006 ms (0.586 ms / 100) 0.592 -> 0.598 ( +1.01%) [ +1.01% +0.00% +0.34% / +1.18% +1.18% +1.01%] index_select perm : Elapsed 0.006 ms (0.598 ms / 100) 0.590 -> 0.597 ( +1.19%) [ +1.69% +0.00% +0.00% / +1.53% +1.53% +1.19%] index_select perm_sorted : Elapsed 0.006 ms (0.600 ms / 100) out_shape = [500, 5, 1] in_shape = [500, 200, 1] idx_dim = 1 B = [500, 5, 1] (stride (5, 1, 1)) dim = 1 fill_cnt = 200 1.853 -> 1.870 ( +0.92%) [ +0.97% +0.00% +0.05% / +0.92% +1.08% +1.13%] index_fill_ const : Elapsed 0.019 ms (1.871 ms / 100) 1.849 -> 1.868 ( +1.03%) [ +1.03% +0.00% +0.05% / +1.30% +1.03% +1.08%] index_fill_ linear : Elapsed 0.019 ms (1.868 ms / 100) 1.846 -> 1.870 ( +1.30%) [ +1.03% +0.00% +0.05% / +1.52% +1.30% +1.35%] index_fill_ reverse : Elapsed 0.019 ms (1.865 ms / 100) 1.844 -> 1.868 ( +1.30%) [ +1.03% +0.05% +0.00% / +1.30% +1.63% +1.68%] index_fill_ skip64 : Elapsed 0.019 ms (1.863 ms / 100) 1.857 -> 1.870 ( +0.70%) [ +0.86% +0.00% +0.05% / +0.86% +0.70% +0.70%] index_fill_ skip256 : Elapsed 0.019 ms (1.873 ms / 100) 1.858 -> 1.865 ( +0.38%) [ +1.02% +0.16% +0.00% / +0.38% +0.54% +0.59%] index_fill_ spread : Elapsed 0.019 ms (1.877 ms / 100) 1.864 -> 1.866 ( +0.11%) [ +1.13% +0.00% +0.91% / +0.27% +0.11% +0.16%] index_fill_ strided 3 : Elapsed 0.019 ms (1.885 ms / 100) 1.873 -> 1.865 ( -0.43%) [ +0.85% +0.00% +0.16% / -0.27% -0.27% -0.43%] index_fill_ random : Elapsed 0.019 ms (1.889 ms / 100) 1.848 -> 1.868 ( +1.08%) [ +1.14% +0.00% +0.05% / +1.08% +1.57% +1.73%] index_fill_ random_sorted : Elapsed 0.019 ms (1.869 ms / 100) B = [500, 5, 1] (stride (1, 500, 2500)) A = [500, 200, 1] (stride (1, 500, 1)) dim = 1 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +1.80% / +0.90% +0.72% +0.54%] index_select const : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.560 ( +0.72%) [ +0.90% +0.00% +0.00% / +0.90% +0.72% +1.80%] index_select wrap : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +1.08% +0.00% +0.00% / +0.90% +0.54% +0.36%] index_select linear : Elapsed 0.006 ms (0.562 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.90% +0.00% +0.00% / +1.08% +0.54% +0.72%] index_select reverse : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +1.08% +0.00% +7.57% / +1.08% +1.08% +10.45%] index_select skip64 : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.08% +0.00% +0.00% / +1.08% +1.80% +0.90%] index_select skip256 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.558 ( +0.36%) [ +0.90% +0.00% +0.00% / +0.90% +0.54% +0.36%] index_select spread : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.561 ( +0.90%) [ +0.90% +0.00% +0.18% / +1.08% +0.90% +6.12%] index_select strided 3 : Elapsed 0.006 ms (0.561 ms / 100) 0.556 -> 0.561 ( +0.90%) [ +0.90% +0.00% +7.01% / +1.08% +0.90% +0.90%] index_select strided 5 : Elapsed 0.006 ms (0.561 ms / 100) 0.557 -> 0.561 ( +0.72%) [ +0.90% +0.00% +10.41% / +3.41% +0.72% +0.72%] index_select strided 7 : Elapsed 0.006 ms (0.562 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.26% +0.00% +28.65% / +0.90% +1.26% +0.90%] index_select strided 8 : Elapsed 0.006 ms (0.562 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.54% +0.00% +0.18% / +0.72% +1.26% +1.08%] index_select strided 16 : Elapsed 0.006 ms (0.558 ms / 100) 0.556 -> 0.559 ( +0.54%) [ +0.54% +0.00% +7.73% / +0.54% +0.72% +0.90%] index_select strided 64 : Elapsed 0.006 ms (0.559 ms / 100) 0.555 -> 0.562 ( +1.26%) [ +2.34% +0.00% +0.00% / +4.32% +1.26% +2.88%] index_select strided 100 : Elapsed 0.006 ms (0.568 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.54% +0.18% +0.00% / +0.72% +6.85% +1.08%] index_select random : Elapsed 0.006 ms (0.558 ms / 100) 0.555 -> 0.561 ( +1.08%) [ +1.08% +0.00% +0.18% / +1.08% +1.08% +1.26%] index_select random_sorted : Elapsed 0.006 ms (0.561 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.26% +0.00% +0.18% / +0.90% +1.08% +0.90%] index_select perm : Elapsed 0.006 ms (0.562 ms / 100) 0.555 -> 0.560 ( +0.90%) [ +1.08% +0.18% +0.00% / +0.90% +1.08% +1.08%] index_select perm_sorted : Elapsed 0.006 ms (0.561 ms / 100) out_shape = [500, 200, 5] in_shape = [500, 200, 1] idx_dim = 2 B = [500, 200, 5] (stride (1000, 5, 1)) A = [500, 200, 1] (stride (1, 500, 100000)) dim = 2 9.156 -> 9.171 ( +0.16%) [ +0.00% +0.15% +0.11% / +0.16% +0.43% +0.46%] index_add_ linear : Elapsed 0.092 ms (9.156 ms / 100) 9.465 -> 9.471 ( +0.06%) [ +0.02% +0.00% +0.36% / +0.06% +0.39% +0.25%] index_copy_ linear : Elapsed 0.095 ms (9.467 ms / 100) 9.160 -> 9.178 ( +0.20%) [ +0.00% +0.05% +0.04% / +0.20% +0.38% +0.41%] index_add_ reverse : Elapsed 0.092 ms (9.160 ms / 100) 9.458 -> 9.479 ( +0.22%) [ +0.00% +0.07% +0.21% / +0.23% +0.39% +0.22%] index_copy_ reverse : Elapsed 0.095 ms (9.458 ms / 100) 9.159 -> 9.177 ( +0.20%) [ +0.00% +0.08% +0.03% / +0.40% +0.28% +0.20%] index_add_ spread : Elapsed 0.092 ms (9.159 ms / 100) 9.464 -> 9.466 ( +0.02%) [ +0.12% +0.00% +0.21% / +0.20% +0.12% +0.02%] index_copy_ spread : Elapsed 0.095 ms (9.475 ms / 100) 9.129 -> 9.161 ( +0.35%) [ +0.48% +0.00% +0.51% / +0.35% +0.72% +0.77%] index_add_ strided 3 : Elapsed 0.092 ms (9.173 ms / 100) 9.448 -> 9.434 ( -0.15%) [ +0.08% +0.03% +0.00% / -0.15% +0.62% +0.50%] index_copy_ strided 3 : Elapsed 0.095 ms (9.456 ms / 100) 9.164 -> 9.171 ( +0.08%) [ +0.16% +0.02% +0.00% / +0.08% +0.23% +0.20%] index_add_ perm : Elapsed 0.092 ms (9.179 ms / 100) 9.442 -> 9.461 ( +0.20%) [ +0.00% +0.04% +0.13% / +0.28% +0.55% +0.20%] index_copy_ perm : Elapsed 0.094 ms (9.442 ms / 100) 9.166 -> 9.158 ( -0.09%) [ +0.11% +0.10% +0.00% / -0.09% +0.11% +0.05%] index_add_ perm_sorted : Elapsed 0.092 ms (9.176 ms / 100) 9.443 -> 9.457 ( +0.15%) [ +0.22% +0.00% +0.13% / +0.15% +0.40% +0.31%] index_copy_ perm_sorted : Elapsed 0.095 ms (9.464 ms / 100) 24.699 -> 24.766 ( +0.27%) [ +0.14% +0.25% +0.00% / +0.27% +0.31% +0.67%] index_select const : Elapsed 0.247 ms (24.733 ms / 100) 24.718 -> 24.720 ( +0.01%) [ +0.07% +0.00% +0.03% / +0.01% +0.40% +0.40%] index_select wrap : Elapsed 0.247 ms (24.735 ms / 100) 24.724 -> 24.717 ( -0.03%) [ +0.25% +0.10% +0.00% / -0.03% +0.37% +0.47%] index_select linear : Elapsed 0.248 ms (24.787 ms / 100) 24.739 -> 24.748 ( +0.04%) [ +0.07% +0.19% +0.00% / +0.04% +0.21% +0.28%] index_select reverse : Elapsed 0.248 ms (24.756 ms / 100) 24.743 -> 24.728 ( -0.06%) [ +0.08% +0.00% +0.01% / -0.06% +0.20% +0.44%] index_select skip64 : Elapsed 0.248 ms (24.763 ms / 100) 24.724 -> 24.785 ( +0.25%) [ +0.25% +0.03% +0.00% / +0.25% +0.34% +0.30%] index_select skip256 : Elapsed 0.248 ms (24.786 ms / 100) 24.730 -> 24.779 ( +0.20%) [ +0.10% +0.20% +0.00% / +0.20% +0.32% +0.36%] index_select spread : Elapsed 0.248 ms (24.755 ms / 100) 24.727 -> 24.716 ( -0.04%) [ +0.11% +0.00% +0.19% / -0.04% +0.47% +0.33%] index_select random : Elapsed 0.248 ms (24.753 ms / 100) 24.696 -> 24.765 ( +0.28%) [ +0.18% +0.00% +0.16% / +0.28% +0.63% +0.48%] index_select random_sorted : Elapsed 0.247 ms (24.740 ms / 100) B = [500, 200, 5] (stride (5, 2500, 1)) A = [500, 200, 1] (stride (200, 1, 200)) dim = 2 10.042 -> 10.039 ( -0.03%) [ +0.35% +0.29% +0.00% / -0.03% +0.35% +0.09%] index_add_ linear : Elapsed 0.101 ms (10.077 ms / 100) 9.202 -> 9.198 ( -0.04%) [ +0.52% +0.00% +0.00% / -0.03% +0.15% -0.04%] index_copy_ linear : Elapsed 0.093 ms (9.250 ms / 100) 10.044 -> 10.059 ( +0.15%) [ +0.12% +0.40% +0.00% / +0.15% +0.26% +0.29%] index_add_ reverse : Elapsed 0.101 ms (10.056 ms / 100) 9.192 -> 9.206 ( +0.15%) [ +0.17% +0.27% +0.00% / +0.30% +0.15% +0.28%] index_copy_ reverse : Elapsed 0.092 ms (9.208 ms / 100) 10.042 -> 10.061 ( +0.19%) [ +0.31% +0.06% +0.00% / +0.44% +0.19% +0.25%] index_add_ spread : Elapsed 0.101 ms (10.073 ms / 100) 9.206 -> 9.192 ( -0.15%) [ +0.12% +0.00% +0.02% / +0.27% -0.15% +0.07%] index_copy_ spread : Elapsed 0.092 ms (9.217 ms / 100) 10.075 -> 10.041 ( -0.34%) [ +0.51% +0.05% +0.00% / -0.34% +0.10% -0.32%] index_add_ strided 3 : Elapsed 0.101 ms (10.126 ms / 100) 9.209 -> 9.201 ( -0.09%) [ +0.00% +0.16% +0.31% / +0.07% -0.08% -0.09%] index_copy_ strided 3 : Elapsed 0.092 ms (9.209 ms / 100) 10.050 -> 10.006 ( -0.44%) [ +0.14% +0.00% +0.28% / +0.13% -0.44% -0.42%] index_add_ perm : Elapsed 0.101 ms (10.064 ms / 100) 9.215 -> 9.178 ( -0.40%) [ +0.02% +0.00% +0.03% / +0.13% -0.36% -0.40%] index_copy_ perm : Elapsed 0.092 ms (9.217 ms / 100) 10.021 -> 10.004 ( -0.17%) [ +0.24% +0.00% +0.61% / -0.17% +0.17% +0.18%] index_add_ perm_sorted : Elapsed 0.100 ms (10.045 ms / 100) 9.188 -> 9.177 ( -0.12%) [ +0.34% +0.00% +0.73% / +0.52% -0.12% +0.02%] index_copy_ perm_sorted : Elapsed 0.092 ms (9.219 ms / 100) 26.081 -> 26.057 ( -0.09%) [ +1.63% +0.00% +1.49% / +1.26% -0.09% +0.58%] index_select const : Elapsed 0.265 ms (26.505 ms / 100) 26.497 -> 26.173 ( -1.22%) [ +0.30% +0.58% +0.00% / +0.33% -0.76% -1.22%] index_select wrap : Elapsed 0.266 ms (26.576 ms / 100) 26.464 -> 26.062 ( -1.52%) [ +0.39% +0.00% +0.63% / -0.20% -0.30% -1.52%] index_select linear : Elapsed 0.266 ms (26.568 ms / 100) 26.571 -> 26.110 ( -1.73%) [ +0.50% +0.41% +0.00% / +0.58% -0.48% -1.73%] index_select reverse : Elapsed 0.267 ms (26.705 ms / 100) 26.387 -> 26.232 ( -0.59%) [ +0.20% +0.00% +0.43% / -0.59% -0.53% -0.10%] index_select skip64 : Elapsed 0.264 ms (26.439 ms / 100) 26.395 -> 26.059 ( -1.27%) [ +0.00% +1.39% +0.55% / +1.67% -1.27% -0.24%] index_select skip256 : Elapsed 0.264 ms (26.395 ms / 100) 26.456 -> 25.892 ( -2.13%) [ +0.03% +0.15% +0.00% / -0.52% -1.08% -2.13%] index_select spread : Elapsed 0.265 ms (26.464 ms / 100) 26.544 -> 26.093 ( -1.70%) [ +0.00% +0.96% +0.65% / -0.21% -1.14% -1.70%] index_select random : Elapsed 0.265 ms (26.544 ms / 100) 26.508 -> 26.176 ( -1.25%) [ +0.00% +0.14% +0.46% / -0.69% -1.04% -1.25%] index_select random_sorted : Elapsed 0.265 ms (26.508 ms / 100) B = [500, 200, 5] (stride (1, 2500, 500)) dim = 2 fill_cnt = 1 3.032 -> 3.032 ( +0.00%) [ +0.00% +0.10% +0.03% / +0.03% +0.46% +0.00%] index_fill_ const : Elapsed 0.030 ms (3.032 ms / 100) 3.029 -> 3.031 ( +0.07%) [ +0.26% +0.00% +0.30% / +0.07% +0.36% +0.10%] index_fill_ linear : Elapsed 0.030 ms (3.037 ms / 100) 3.045 -> 3.033 ( -0.39%) [ +0.00% +0.07% +0.03% / -0.20% -0.16% -0.39%] index_fill_ reverse : Elapsed 0.030 ms (3.045 ms / 100) 3.038 -> 3.027 ( -0.36%) [ +0.30% +0.00% +0.07% / -0.20% -0.33% -0.36%] index_fill_ skip64 : Elapsed 0.030 ms (3.047 ms / 100) 3.033 -> 3.025 ( -0.26%) [ +0.00% +0.23% +0.33% / +0.00% -0.10% -0.26%] index_fill_ skip256 : Elapsed 0.030 ms (3.033 ms / 100) 3.031 -> 3.021 ( -0.33%) [ +0.00% +0.00% +0.33% / -0.16% -0.13% -0.33%] index_fill_ spread : Elapsed 0.030 ms (3.031 ms / 100) 3.027 -> 3.033 ( +0.20%) [ +0.00% +0.03% +0.20% / +0.20% +0.40% +0.30%] index_fill_ strided 3 : Elapsed 0.030 ms (3.027 ms / 100) 3.049 -> 3.044 ( -0.16%) [ +0.20% +0.10% +0.00% / +0.13% -0.10% -0.16%] index_fill_ random : Elapsed 0.031 ms (3.055 ms / 100) 3.047 -> 3.047 ( +0.00%) [ +0.00% +0.16% +0.10% / +0.20% +0.00% +0.20%] index_fill_ random_sorted : Elapsed 0.030 ms (3.047 ms / 100) 3.042 -> 3.042 ( +0.00%) [ +0.39% +0.00% +0.30% / +0.49% +0.00% +0.03%] index_fill_ perm : Elapsed 0.031 ms (3.054 ms / 100) 3.047 -> 3.033 ( -0.46%) [ +0.00% +0.33% +0.20% / +0.00% -0.03% -0.46%] index_fill_ perm_sorted : Elapsed 0.030 ms (3.047 ms / 100) B = [500, 200, 5] (stride (200, 1, 100000)) A = [500, 200, 1] (stride (1, 500, 500)) dim = 2 7.813 -> 7.823 ( +0.13%) [ +0.00% +0.10% +0.15% / +0.13% +0.44% +0.40%] index_add_ linear : Elapsed 0.078 ms (7.813 ms / 100) 7.749 -> 7.755 ( +0.08%) [ +0.04% +0.08% +0.00% / +0.08% +0.37% +0.25%] index_copy_ linear : Elapsed 0.078 ms (7.752 ms / 100) 7.816 -> 7.837 ( +0.27%) [ +0.06% +0.00% +0.03% / +0.27% +0.28% +0.28%] index_add_ reverse : Elapsed 0.078 ms (7.821 ms / 100) 7.747 -> 7.766 ( +0.25%) [ +0.05% +0.00% +0.08% / +0.36% +0.37% +0.25%] index_copy_ reverse : Elapsed 0.078 ms (7.751 ms / 100) 7.821 -> 7.831 ( +0.13%) [ +0.06% +0.09% +0.00% / +0.13% +0.24% +0.22%] index_add_ spread : Elapsed 0.078 ms (7.826 ms / 100) 7.746 -> 7.759 ( +0.17%) [ +0.00% +0.10% +0.14% / +0.31% +0.34% +0.17%] index_copy_ spread : Elapsed 0.077 ms (7.746 ms / 100) 7.826 -> 7.821 ( -0.06%) [ +0.00% +0.00% +0.14% / -0.06% +0.32% +0.06%] index_add_ strided 3 : Elapsed 0.078 ms (7.826 ms / 100) 7.749 -> 7.762 ( +0.17%) [ +0.14% +0.00% +0.10% / +0.27% +0.34% +0.17%] index_copy_ strided 3 : Elapsed 0.078 ms (7.760 ms / 100) 7.802 -> 7.819 ( +0.22%) [ +0.38% +0.09% +0.00% / +0.22% +0.60% +0.35%] index_add_ perm : Elapsed 0.078 ms (7.832 ms / 100) 7.737 -> 7.760 ( +0.30%) [ +0.17% +0.08% +0.00% / +0.30% +0.40% +0.36%] index_copy_ perm : Elapsed 0.078 ms (7.750 ms / 100) 7.813 -> 7.823 ( +0.13%) [ +0.20% +0.06% +0.00% / +0.13% +0.32% +0.22%] index_add_ perm_sorted : Elapsed 0.078 ms (7.829 ms / 100) 7.743 -> 7.765 ( +0.28%) [ +0.17% +0.01% +0.00% / +0.28% +0.41% +0.32%] index_copy_ perm_sorted : Elapsed 0.078 ms (7.756 ms / 100) 13.319 -> 13.370 ( +0.38%) [ +0.26% +0.00% +0.11% / +0.38% +0.53% +0.49%] index_select const : Elapsed 0.134 ms (13.354 ms / 100) 13.321 -> 13.361 ( +0.30%) [ +0.28% +0.00% +0.16% / +0.30% +0.49% +0.59%] index_select wrap : Elapsed 0.134 ms (13.358 ms / 100) 13.322 -> 13.365 ( +0.32%) [ +0.36% +0.00% +0.02% / +0.32% +0.47% +0.51%] index_select linear : Elapsed 0.134 ms (13.370 ms / 100) 13.320 -> 13.364 ( +0.33%) [ +0.41% +0.00% +0.15% / +0.33% +0.45% +0.56%] index_select reverse : Elapsed 0.134 ms (13.375 ms / 100) 13.329 -> 13.368 ( +0.29%) [ +0.21% +0.00% +0.02% / +0.29% +0.56% +0.56%] index_select skip64 : Elapsed 0.134 ms (13.357 ms / 100) 13.323 -> 13.369 ( +0.35%) [ +0.29% +0.00% +0.01% / +0.35% +0.54% +0.57%] index_select skip256 : Elapsed 0.134 ms (13.362 ms / 100) 13.324 -> 13.366 ( +0.32%) [ +0.26% +0.00% +0.06% / +0.32% +0.47% +0.51%] index_select spread : Elapsed 0.134 ms (13.358 ms / 100) 13.319 -> 13.383 ( +0.48%) [ +0.31% +0.00% +0.02% / +0.48% +0.56% +0.59%] index_select random : Elapsed 0.134 ms (13.360 ms / 100) 13.321 -> 13.374 ( +0.40%) [ +0.31% +0.00% +0.01% / +0.40% +0.51% +0.57%] index_select random_sorted : Elapsed 0.134 ms (13.362 ms / 100) B = [500, 200, 5] (stride (1, 500, 100000)) A = [500, 200, 1] (stride (200, 1, 200)) dim = 2 7.943 -> 7.964 ( +0.26%) [ +0.30% +0.00% +0.35% / +0.42% +0.26% +0.44%] index_add_ linear : Elapsed 0.080 ms (7.967 ms / 100) 7.790 -> 7.809 ( +0.24%) [ +0.15% +0.00% +0.18% / +0.44% +0.24% +0.24%] index_copy_ linear : Elapsed 0.078 ms (7.802 ms / 100) 7.954 -> 7.962 ( +0.10%) [ +0.23% +0.18% +0.00% / +0.10% +0.29% +0.18%] index_add_ reverse : Elapsed 0.080 ms (7.972 ms / 100) 7.805 -> 7.798 ( -0.09%) [ +0.01% +0.00% +0.10% / -0.06% -0.09% +0.01%] index_copy_ reverse : Elapsed 0.078 ms (7.806 ms / 100) 7.959 -> 7.955 ( -0.05%) [ +0.19% +0.05% +0.00% / +0.14% +0.05% -0.05%] index_add_ spread : Elapsed 0.080 ms (7.974 ms / 100) 7.794 -> 7.791 ( -0.04%) [ +0.17% +0.00% +0.12% / +0.00% +0.21% -0.04%] index_copy_ spread : Elapsed 0.078 ms (7.807 ms / 100) 7.952 -> 7.955 ( +0.04%) [ +0.10% +0.00% +0.19% / +0.04% +0.21% +0.18%] index_add_ strided 3 : Elapsed 0.080 ms (7.960 ms / 100) 7.789 -> 7.801 ( +0.15%) [ +0.22% +0.00% +0.23% / +0.17% +0.17% +0.15%] index_copy_ strided 3 : Elapsed 0.078 ms (7.806 ms / 100) 7.949 -> 7.946 ( -0.04%) [ +0.05% +0.00% +0.26% / +0.23% +0.10% -0.04%] index_add_ perm : Elapsed 0.080 ms (7.953 ms / 100) 7.804 -> 7.797 ( -0.09%) [ +0.24% +0.00% +0.32% / -0.06% -0.05% -0.09%] index_copy_ perm : Elapsed 0.078 ms (7.823 ms / 100) 7.940 -> 7.957 ( +0.21%) [ +0.20% +0.00% +0.16% / +0.29% +0.29% +0.21%] index_add_ perm_sorted : Elapsed 0.080 ms (7.956 ms / 100) 7.799 -> 7.810 ( +0.14%) [ +0.08% +0.00% +0.18% / +0.15% +0.14% +0.19%] index_copy_ perm_sorted : Elapsed 0.078 ms (7.805 ms / 100) 13.803 -> 13.832 ( +0.21%) [ +0.17% +0.00% +0.15% / +0.24% +0.21% +0.30%] index_select const : Elapsed 0.138 ms (13.827 ms / 100) 13.779 -> 13.827 ( +0.35%) [ +0.34% +0.00% +0.36% / +0.35% +0.46% +0.39%] index_select wrap : Elapsed 0.138 ms (13.826 ms / 100) 13.786 -> 13.825 ( +0.28%) [ +0.38% +0.00% +0.19% / +0.40% +0.41% +0.28%] index_select linear : Elapsed 0.138 ms (13.838 ms / 100) 13.782 -> 13.816 ( +0.25%) [ +0.36% +0.00% +0.15% / +0.51% +0.38% +0.25%] index_select reverse : Elapsed 0.138 ms (13.832 ms / 100) 13.783 -> 13.819 ( +0.26%) [ +0.41% +0.00% +0.14% / +0.56% +0.31% +0.26%] index_select skip64 : Elapsed 0.138 ms (13.839 ms / 100) 13.792 -> 13.826 ( +0.25%) [ +0.31% +0.00% +0.11% / +0.30% +0.33% +0.25%] index_select skip256 : Elapsed 0.138 ms (13.835 ms / 100) 13.801 -> 13.825 ( +0.17%) [ +0.27% +0.00% +0.21% / +0.27% +0.19% +0.17%] index_select spread : Elapsed 0.138 ms (13.838 ms / 100) 13.786 -> 13.831 ( +0.33%) [ +0.30% +0.00% +0.11% / +0.36% +0.46% +0.33%] index_select random : Elapsed 0.138 ms (13.828 ms / 100) 13.792 -> 13.841 ( +0.36%) [ +0.35% +0.00% +0.07% / +0.36% +0.51% +0.41%] index_select random_sorted : Elapsed 0.138 ms (13.840 ms / 100) out_shape = [200, 5, 500] in_shape = [1, 5, 500] idx_dim = 0 B = [200, 5, 500] (stride (2500, 1, 5)) dim = 0 fill_cnt = 1 0.461 -> 0.465 ( +0.87%) [ +0.00% +8.68% +0.87% / +8.24% +0.87% +3.04%] index_fill_ const : Elapsed 0.005 ms (0.461 ms / 100) 0.465 -> 0.459 ( -1.29%) [+11.40% +1.51% +0.00% / +1.72% -1.29% +0.00%] index_fill_ linear : Elapsed 0.005 ms (0.518 ms / 100) 0.479 -> 0.462 ( -3.55%) [ +0.00% +4.80% +8.35% / -1.67% -3.55% +5.22%] index_fill_ reverse : Elapsed 0.005 ms (0.479 ms / 100) 0.466 -> 0.458 ( -1.72%) [ +2.79% +0.64% +0.00% / +3.65% -1.72% +0.43%] index_fill_ skip64 : Elapsed 0.005 ms (0.479 ms / 100) 0.464 -> 0.458 ( -1.29%) [ +0.86% +1.29% +0.00% / +0.86% -1.29% -0.22%] index_fill_ skip256 : Elapsed 0.005 ms (0.468 ms / 100) 0.465 -> 0.460 ( -1.08%) [ +0.00% +0.86% +7.10% / +9.03% -1.08% +8.39%] index_fill_ spread : Elapsed 0.005 ms (0.465 ms / 100) 0.461 -> 0.455 ( -1.30%) [ +1.74% +1.52% +0.00% / +4.99% -1.30% +3.04%] index_fill_ strided 3 : Elapsed 0.005 ms (0.469 ms / 100) 0.461 -> 0.473 ( +2.60%) [ +0.65% +1.74% +0.00% / +2.60% +9.33% +11.06%] index_fill_ strided 5 : Elapsed 0.005 ms (0.464 ms / 100) 0.460 -> 0.459 ( -0.22%) [ +8.91% +1.30% +0.00% / +10.87% -0.22% +5.43%] index_fill_ strided 7 : Elapsed 0.005 ms (0.501 ms / 100) 0.461 -> 0.458 ( -0.65%) [ +3.47% +7.81% +0.00% / +1.95% -0.65% +3.25%] index_fill_ strided 8 : Elapsed 0.005 ms (0.477 ms / 100) 0.458 -> 0.457 ( -0.22%) [ +0.00% +4.15% +15.50% / +10.26% -0.22% +12.45%] index_fill_ strided 16 : Elapsed 0.005 ms (0.458 ms / 100) bad 0.462 -> 0.491 ( +6.28%) [ +6.49% +1.73% +0.00% / +6.28% +9.31% +8.66%] index_fill_ strided 64 : Elapsed 0.005 ms (0.492 ms / 100) 0.460 -> 0.460 ( +0.00%) [ +0.00% +6.09% +0.22% / +2.39% +0.00% +8.04%] index_fill_ strided 100 : Elapsed 0.005 ms (0.460 ms / 100) 0.463 -> 0.456 ( -1.51%) [+13.17% +2.81% +0.00% / +10.15% -1.51% -1.08%] index_fill_ random : Elapsed 0.005 ms (0.524 ms / 100) 0.461 -> 0.463 ( +0.43%) [ +0.87% +9.11% +0.00% / +1.95% +1.52% +0.43%] index_fill_ random_sorted : Elapsed 0.005 ms (0.465 ms / 100) 0.466 -> 0.455 ( -2.36%) [ +0.00% +5.79% +6.01% / +10.52% -2.36% +0.43%] index_fill_ perm : Elapsed 0.005 ms (0.466 ms / 100) 0.475 -> 0.462 ( -2.74%) [ +6.11% +0.84% +0.00% / -0.84% +6.32% -2.74%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.504 ms / 100) B = [200, 5, 500] (stride (500, 100000, 1)) A = [1, 5, 500] (stride (1, 1, 5)) dim = 0 0.500 -> 0.499 ( -0.20%) [ +0.00% +2.60% +0.20% / +0.80% +4.00% -0.20%] index_add_ linear : Elapsed 0.005 ms (0.500 ms / 100) 0.492 -> 0.491 ( -0.20%) [ +2.24% +2.03% +0.00% / +4.88% +4.88% -0.20%] index_copy_ linear : Elapsed 0.005 ms (0.503 ms / 100) 0.499 -> 0.497 ( -0.40%) [ +0.00% +2.61% +0.40% / +8.42% -0.40% +2.20%] index_add_ reverse : Elapsed 0.005 ms (0.499 ms / 100) 0.496 -> 0.485 ( -2.22%) [ +5.85% +3.23% +0.00% / +3.83% -2.22% +4.03%] index_copy_ reverse : Elapsed 0.005 ms (0.525 ms / 100) 0.497 -> 0.492 ( -1.01%) [ +0.00% +8.45% +5.43% / +2.01% +1.21% -1.01%] index_add_ spread : Elapsed 0.005 ms (0.497 ms / 100) 0.495 -> 0.488 ( -1.41%) [ +1.82% +1.21% +0.00% / +0.81% -1.21% -1.41%] index_copy_ spread : Elapsed 0.005 ms (0.504 ms / 100) 0.495 -> 0.488 ( -1.41%) [ +0.00% +4.04% +12.12% / +3.64% -1.41% +0.40%] index_add_ strided 3 : Elapsed 0.005 ms (0.495 ms / 100) 0.503 -> 0.486 ( -3.38%) [ +0.99% +8.95% +0.00% / -0.20% -3.38% +4.97%] index_copy_ strided 3 : Elapsed 0.005 ms (0.508 ms / 100) 0.503 -> 0.499 ( -0.80%) [ +7.16% +1.59% +0.00% / +0.80% +4.37% -0.80%] index_add_ strided 7 : Elapsed 0.005 ms (0.539 ms / 100) 0.501 -> 0.489 ( -2.40%) [ +3.79% +0.40% +0.00% / -0.40% -2.40% -0.40%] index_copy_ strided 7 : Elapsed 0.005 ms (0.520 ms / 100) 0.505 -> 0.500 ( -0.99%) [ +0.00% +6.53% +3.56% / +9.70% +0.20% -0.99%] index_add_ perm : Elapsed 0.005 ms (0.505 ms / 100) 0.496 -> 0.487 ( -1.81%) [ +2.02% +1.81% +0.00% / +1.01% +1.61% -1.81%] index_copy_ perm : Elapsed 0.005 ms (0.506 ms / 100) 0.497 -> 0.490 ( -1.41%) [ +0.00% +4.43% +1.81% / +1.41% -0.80% -1.41%] index_add_ perm_sorted : Elapsed 0.005 ms (0.497 ms / 100) 0.503 -> 0.487 ( -3.18%) [ +0.00% +2.98% +3.98% / +4.17% -3.18% -2.19%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.503 ms / 100) 8.186 -> 8.218 ( +0.39%) [ +0.31% +0.09% +0.00% / +0.39% +0.97% +0.99%] index_select const : Elapsed 0.082 ms (8.211 ms / 100) 8.175 -> 8.222 ( +0.57%) [ +0.28% +0.00% +0.27% / +0.57% +0.87% +0.88%] index_select wrap : Elapsed 0.082 ms (8.198 ms / 100) 8.180 -> 8.194 ( +0.17%) [ +0.27% +0.00% +0.09% / +0.17% +0.71% +0.92%] index_select linear : Elapsed 0.082 ms (8.202 ms / 100) 8.203 -> 8.216 ( +0.16%) [ +0.04% +0.00% +0.26% / +0.16% +0.54% +0.62%] index_select reverse : Elapsed 0.082 ms (8.206 ms / 100) 8.177 -> 8.213 ( +0.44%) [ +0.26% +0.00% +0.26% / +0.44% +0.93% +1.10%] index_select skip64 : Elapsed 0.082 ms (8.198 ms / 100) 8.171 -> 8.221 ( +0.61%) [ +0.00% +0.42% +0.43% / +0.61% +0.69% +0.87%] index_select skip256 : Elapsed 0.082 ms (8.171 ms / 100) 8.210 -> 8.235 ( +0.30%) [ +0.30% +0.00% +0.26% / +0.43% +0.38% +0.30%] index_select spread : Elapsed 0.082 ms (8.235 ms / 100) 8.177 -> 8.214 ( +0.45%) [ +0.18% +0.18% +0.00% / +0.45% +0.59% +0.57%] index_select random : Elapsed 0.082 ms (8.192 ms / 100) 8.196 -> 8.206 ( +0.12%) [ +0.09% +0.00% +0.00% / +0.12% +0.63% +0.37%] index_select random_sorted : Elapsed 0.082 ms (8.203 ms / 100) B = [200, 5, 500] (stride (1, 100000, 200)) A = [1, 5, 500] (stride (2500, 1, 5)) dim = 0 1.091 -> 1.092 ( +0.09%) [ +0.46% +0.37% +0.00% / +0.09% +1.10% +0.73%] index_add_ linear : Elapsed 0.011 ms (1.096 ms / 100) 1.010 -> 1.015 ( +0.50%) [ +0.30% +0.00% +0.40% / +0.50% +0.99% +0.69%] index_copy_ linear : Elapsed 0.010 ms (1.013 ms / 100) 1.095 -> 1.089 ( -0.55%) [ +0.09% +0.00% +0.46% / +0.27% +0.00% -0.55%] index_add_ reverse : Elapsed 0.011 ms (1.096 ms / 100) 1.013 -> 1.014 ( +0.10%) [ +0.00% +0.10% +0.10% / +0.10% +0.69% +0.59%] index_copy_ reverse : Elapsed 0.010 ms (1.013 ms / 100) 1.093 -> 1.092 ( -0.09%) [ +0.73% +0.00% +0.09% / +0.27% +0.09% -0.09%] index_add_ spread : Elapsed 0.011 ms (1.101 ms / 100) 1.013 -> 1.012 ( -0.10%) [ +0.20% +0.00% +0.10% / -0.10% +0.69% +0.59%] index_copy_ spread : Elapsed 0.010 ms (1.015 ms / 100) 1.094 -> 1.094 ( +0.00%) [ +0.18% +0.09% +0.00% / +0.00% +1.46% +1.65%] index_add_ strided 3 : Elapsed 0.011 ms (1.096 ms / 100) 1.008 -> 1.015 ( +0.69%) [ +0.69% +0.00% +0.50% / +0.69% +1.39% +0.79%] index_copy_ strided 3 : Elapsed 0.010 ms (1.015 ms / 100) 1.080 -> 1.088 ( +0.74%) [ +0.37% +0.00% +1.30% / +0.74% +1.30% +1.02%] index_add_ strided 7 : Elapsed 0.011 ms (1.084 ms / 100) 1.008 -> 1.014 ( +0.60%) [ +0.69% +0.00% +0.99% / +0.60% +1.29% +1.09%] index_copy_ strided 7 : Elapsed 0.010 ms (1.015 ms / 100) 1.081 -> 1.082 ( +0.09%) [ +0.46% +0.00% +0.00% / +0.09% +1.20% +1.85%] index_add_ perm : Elapsed 0.011 ms (1.086 ms / 100) 1.016 -> 1.017 ( +0.10%) [ +0.59% +0.00% +0.10% / +0.39% +0.59% +0.10%] index_copy_ perm : Elapsed 0.010 ms (1.022 ms / 100) 1.082 -> 1.084 ( +0.18%) [ +0.37% +0.37% +0.00% / +0.18% +0.83% +1.39%] index_add_ perm_sorted : Elapsed 0.011 ms (1.086 ms / 100) 1.018 -> 1.019 ( +0.10%) [ +0.00% +0.10% +0.00% / +0.20% +0.10% +0.79%] index_copy_ perm_sorted : Elapsed 0.010 ms (1.018 ms / 100) 20.441 -> 20.404 ( -0.18%) [ +0.25% +0.03% +0.00% / +0.10% -0.18% +0.11%] index_select const : Elapsed 0.205 ms (20.493 ms / 100) 20.440 -> 20.427 ( -0.06%) [ +0.08% +0.03% +0.00% / +0.02% -0.06% -0.04%] index_select wrap : Elapsed 0.205 ms (20.456 ms / 100) 20.461 -> 20.406 ( -0.27%) [ +0.05% +0.00% +0.00% / -0.11% -0.27% -0.22%] index_select linear : Elapsed 0.205 ms (20.472 ms / 100) 20.448 -> 20.444 ( -0.02%) [ +0.00% +0.10% +0.06% / +0.03% -0.01% -0.02%] index_select reverse : Elapsed 0.204 ms (20.448 ms / 100) 20.442 -> 20.448 ( +0.03%) [ +0.01% +0.11% +0.00% / +0.03% +0.04% +0.33%] index_select skip64 : Elapsed 0.204 ms (20.444 ms / 100) 20.402 -> 20.432 ( +0.15%) [ +0.36% +0.00% +0.14% / +0.35% +0.26% +0.15%] index_select skip256 : Elapsed 0.205 ms (20.475 ms / 100) 20.463 -> 20.436 ( -0.13%) [ +0.07% +0.04% +0.00% / +0.48% -0.13% -0.08%] index_select spread : Elapsed 0.205 ms (20.478 ms / 100) 20.449 -> 20.415 ( -0.17%) [ +0.01% +0.00% +0.09% / +0.22% -0.17% -0.16%] index_select random : Elapsed 0.205 ms (20.452 ms / 100) 20.458 -> 20.413 ( -0.22%) [ +0.10% +0.08% +0.00% / -0.06% -0.22% -0.12%] index_select random_sorted : Elapsed 0.205 ms (20.478 ms / 100) B = [200, 5, 500] (stride (5, 1, 1000)) dim = 0 fill_cnt = 1 0.462 -> 0.459 ( -0.65%) [ +0.00% +6.93% +0.65% / +5.19% -0.65% +0.65%] index_fill_ const : Elapsed 0.005 ms (0.462 ms / 100) 0.463 -> 0.458 ( -1.08%) [ +0.00% +1.73% +0.43% / +9.29% -1.08% +0.00%] index_fill_ linear : Elapsed 0.005 ms (0.463 ms / 100) 0.466 -> 0.456 ( -2.15%) [ +0.00% +0.64% +1.07% / +1.72% -2.15% -0.64%] index_fill_ reverse : Elapsed 0.005 ms (0.466 ms / 100) 0.461 -> 0.454 ( -1.52%) [ +0.87% +3.47% +0.00% / +2.60% -1.52% +12.58%] index_fill_ skip64 : Elapsed 0.005 ms (0.465 ms / 100) 0.468 -> 0.463 ( -1.07%) [ +7.48% +0.00% +2.35% / -0.21% +11.32% -1.07%] index_fill_ skip256 : Elapsed 0.005 ms (0.503 ms / 100) 0.464 -> 0.458 ( -1.29%) [ +0.00% +7.33% +4.09% / +13.15% -1.29% +2.16%] index_fill_ spread : Elapsed 0.005 ms (0.464 ms / 100) 0.461 -> 0.458 ( -0.65%) [ +0.00% +1.74% +0.87% / +2.17% -0.65% +10.20%] index_fill_ strided 3 : Elapsed 0.005 ms (0.461 ms / 100) 0.472 -> 0.457 ( -3.18%) [ +6.99% +0.00% +54.24% / +17.80% -3.18% -1.27%] index_fill_ strided 5 : Elapsed 0.005 ms (0.505 ms / 100) 0.462 -> 0.458 ( -0.87%) [ +0.00% +1.52% +0.22% / +0.87% -0.87% -0.43%] index_fill_ strided 7 : Elapsed 0.005 ms (0.462 ms / 100) 0.469 -> 0.460 ( -1.92%) [ +0.85% +0.00% +2.77% / +6.82% -1.92% -1.92%] index_fill_ strided 8 : Elapsed 0.005 ms (0.473 ms / 100) 0.464 -> 0.455 ( -1.94%) [+10.13% +1.29% +0.00% / +0.65% -1.94% +9.48%] index_fill_ strided 16 : Elapsed 0.005 ms (0.511 ms / 100) 0.463 -> 0.458 ( -1.08%) [ +0.43% +1.30% +0.00% / +1.51% +10.80% -1.08%] index_fill_ strided 64 : Elapsed 0.005 ms (0.465 ms / 100) 0.464 -> 0.460 ( -0.86%) [ +0.86% +10.56% +0.00% / +1.08% -0.86% +10.13%] index_fill_ strided 100 : Elapsed 0.005 ms (0.468 ms / 100) 0.463 -> 0.461 ( -0.43%) [ +9.29% +1.51% +0.00% / +2.16% +7.78% -0.43%] index_fill_ random : Elapsed 0.005 ms (0.506 ms / 100) 0.464 -> 0.458 ( -1.29%) [ +0.00% +1.51% +21.77% / +11.85% -1.29% -1.08%] index_fill_ random_sorted : Elapsed 0.005 ms (0.464 ms / 100) 0.465 -> 0.458 ( -1.51%) [ +0.22% +1.29% +0.00% / +0.65% -1.51% -0.43%] index_fill_ perm : Elapsed 0.005 ms (0.466 ms / 100) 0.467 -> 0.460 ( -1.50%) [ +0.00% +7.28% +5.14% / +0.64% +1.93% -1.50%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.467 ms / 100) out_shape = [1, 200, 500] in_shape = [1, 5, 500] idx_dim = 1 B = [1, 200, 500] (stride (1, 1, 200)) A = [1, 5, 500] (stride (5, 1, 5)) dim = 1 0.627 -> 0.630 ( +0.48%) [ +0.96% +0.16% +0.00% / +1.28% +0.48% +0.48%] index_add_ linear : Elapsed 0.006 ms (0.633 ms / 100) 0.644 -> 0.647 ( +0.47%) [ +0.78% +0.00% +0.00% / +0.78% +0.47% +0.62%] index_copy_ linear : Elapsed 0.006 ms (0.649 ms / 100) 0.628 -> 0.630 ( +0.32%) [ +2.23% +0.16% +0.00% / +0.96% +0.48% +0.32%] index_add_ reverse : Elapsed 0.006 ms (0.642 ms / 100) 0.643 -> 0.646 ( +0.47%) [ +0.78% +0.00% +0.00% / +0.78% +0.47% +0.62%] index_copy_ reverse : Elapsed 0.006 ms (0.648 ms / 100) 0.627 -> 0.631 ( +0.64%) [ +0.96% +0.16% +0.00% / +0.96% +0.64% +0.64%] index_add_ spread : Elapsed 0.006 ms (0.633 ms / 100) 0.644 -> 0.647 ( +0.47%) [ +0.62% +0.00% +0.31% / +0.62% +0.62% +0.47%] index_copy_ spread : Elapsed 0.006 ms (0.648 ms / 100) 0.628 -> 0.630 ( +0.32%) [ +0.80% +0.00% +0.00% / +0.64% +0.48% +0.32%] index_add_ strided 3 : Elapsed 0.006 ms (0.633 ms / 100) 0.643 -> 0.647 ( +0.62%) [ +0.78% +0.16% +0.00% / +0.78% +0.62% +0.62%] index_copy_ strided 3 : Elapsed 0.006 ms (0.648 ms / 100) 0.627 -> 0.631 ( +0.64%) [ +1.12% +0.00% +0.16% / +0.80% +0.64% +0.64%] index_add_ strided 7 : Elapsed 0.006 ms (0.634 ms / 100) 0.642 -> 0.647 ( +0.78%) [ +0.93% +0.00% +0.62% / +0.93% +0.78% +0.93%] index_copy_ strided 7 : Elapsed 0.006 ms (0.648 ms / 100) 0.628 -> 0.631 ( +0.48%) [ +0.64% +0.00% +0.48% / +0.80% +0.64% +0.48%] index_add_ perm : Elapsed 0.006 ms (0.632 ms / 100) 0.643 -> 0.647 ( +0.62%) [ +0.78% +0.00% +0.31% / +2.80% +0.62% +0.78%] index_copy_ perm : Elapsed 0.006 ms (0.648 ms / 100) 0.628 -> 0.632 ( +0.64%) [ +0.80% +0.16% +0.00% / +0.64% +0.64% +0.64%] index_add_ perm_sorted : Elapsed 0.006 ms (0.633 ms / 100) 0.642 -> 0.648 ( +0.93%) [ +0.78% +0.00% +0.16% / +0.93% +0.93% +0.93%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.647 ms / 100) 5.422 -> 5.223 ( -3.67%) [ +0.11% +0.00% +0.24% / -3.67% -3.28% -3.26%] index_select const : Elapsed 0.054 ms (5.428 ms / 100) 5.456 -> 5.232 ( -4.11%) [ +0.05% +0.00% +0.09% / -3.87% -3.76% -4.11%] index_select wrap : Elapsed 0.055 ms (5.459 ms / 100) 5.455 -> 5.244 ( -3.87%) [ +0.07% +0.00% +0.13% / -3.76% -3.78% -3.87%] index_select linear : Elapsed 0.055 ms (5.459 ms / 100) 5.415 -> 5.225 ( -3.51%) [ +0.37% +0.00% +0.50% / -3.51% -2.79% -2.83%] index_select reverse : Elapsed 0.054 ms (5.435 ms / 100) 5.412 -> 5.230 ( -3.36%) [ +0.42% +0.00% +0.18% / -3.36% -3.12% -2.99%] index_select skip64 : Elapsed 0.054 ms (5.435 ms / 100) 5.432 -> 5.223 ( -3.85%) [ +0.00% +0.13% +0.31% / -3.85% -3.61% -3.28%] index_select skip256 : Elapsed 0.054 ms (5.432 ms / 100) 5.423 -> 5.226 ( -3.63%) [ +0.02% +0.04% +0.00% / -3.63% -3.30% -3.36%] index_select spread : Elapsed 0.054 ms (5.424 ms / 100) 5.425 -> 5.219 ( -3.80%) [ +0.02% +0.00% +0.31% / -3.80% -3.13% -3.30%] index_select strided 3 : Elapsed 0.054 ms (5.426 ms / 100) 5.425 -> 5.233 ( -3.54%) [ +0.24% +0.00% +0.35% / -3.23% -3.54% -3.24%] index_select random : Elapsed 0.054 ms (5.438 ms / 100) 5.428 -> 5.227 ( -3.70%) [ +0.00% +0.07% +0.02% / -3.70% -2.95% -3.08%] index_select random_sorted : Elapsed 0.054 ms (5.428 ms / 100) out_shape = [1, 5, 200] in_shape = [1, 5, 500] idx_dim = 2 B = [1, 5, 200] (stride (1000, 200, 1)) A = [1, 5, 500] (stride (1, 500, 1)) dim = 2 0.526 -> 0.527 ( +0.19%) [ +2.09% +0.00% +1.90% / +1.71% +0.19% +0.57%] index_select const : Elapsed 0.005 ms (0.537 ms / 100) 0.524 -> 0.528 ( +0.76%) [ +1.91% +0.00% +1.15% / +1.15% +0.76% +10.31%] index_select wrap : Elapsed 0.005 ms (0.534 ms / 100) 0.520 -> 0.527 ( +1.35%) [ +5.00% +0.00% +1.15% / +2.69% +1.35% +1.35%] index_select linear : Elapsed 0.005 ms (0.546 ms / 100) 0.529 -> 0.525 ( -0.76%) [ +0.76% +0.76% +0.00% / +1.13% -0.57% -0.76%] index_select reverse : Elapsed 0.005 ms (0.533 ms / 100) 0.522 -> 0.522 ( +0.00%) [ +2.68% +0.00% +0.77% / +4.79% +7.47% +0.00%] index_select skip64 : Elapsed 0.005 ms (0.536 ms / 100) 0.528 -> 0.526 ( -0.38%) [ +1.89% +0.00% +0.19% / +1.33% +14.77% -0.38%] index_select skip256 : Elapsed 0.005 ms (0.538 ms / 100) 0.524 -> 0.520 ( -0.76%) [ +3.05% +0.57% +0.00% / +1.72% -0.76% +4.77%] index_select spread : Elapsed 0.005 ms (0.540 ms / 100) 0.525 -> 0.524 ( -0.19%) [ +2.67% +0.00% +0.00% / +8.57% -0.19% +9.33%] index_select strided 3 : Elapsed 0.005 ms (0.539 ms / 100) 0.524 -> 0.531 ( +1.34%) [ +2.10% +1.15% +0.00% / +8.78% +1.34% +16.03%] index_select strided 5 : Elapsed 0.005 ms (0.535 ms / 100) 0.524 -> 0.525 ( +0.19%) [ +2.29% +0.57% +0.00% / +19.47% +0.19% +1.34%] index_select strided 7 : Elapsed 0.005 ms (0.536 ms / 100) 0.519 -> 0.524 ( +0.96%) [ +2.70% +5.01% +0.00% / +2.89% +0.96% +2.31%] index_select strided 8 : Elapsed 0.005 ms (0.533 ms / 100) 0.521 -> 0.520 ( -0.19%) [ +9.21% +1.15% +0.00% / +1.92% -0.19% +2.11%] index_select strided 16 : Elapsed 0.006 ms (0.569 ms / 100) 0.522 -> 0.521 ( -0.19%) [ +3.64% +1.15% +0.00% / +1.92% -0.19% +0.57%] index_select strided 64 : Elapsed 0.005 ms (0.541 ms / 100) 0.528 -> 0.520 ( -1.52%) [ +1.89% +0.00% +1.52% / +1.33% -0.57% -1.52%] index_select strided 100 : Elapsed 0.005 ms (0.538 ms / 100) 0.534 -> 0.532 ( -0.37%) [ +1.12% +0.00% +6.74% / -0.37% +1.69% -0.19%] index_select strided 255 : Elapsed 0.005 ms (0.540 ms / 100) 0.530 -> 0.524 ( -1.13%) [ +1.89% +0.00% +12.26% / +0.57% -1.13% +0.19%] index_select strided 256 : Elapsed 0.005 ms (0.540 ms / 100) 0.533 -> 0.520 ( -2.44%) [ +1.31% +3.38% +0.00% / +0.19% -2.44% -1.50%] index_select strided 257 : Elapsed 0.005 ms (0.540 ms / 100) 0.527 -> 0.530 ( +0.57%) [ +3.23% +0.00% +8.92% / +2.28% +0.57% +7.59%] index_select random : Elapsed 0.005 ms (0.544 ms / 100) 0.525 -> 0.519 ( -1.14%) [ +2.48% +0.00% +14.67% / +8.57% -1.14% +0.76%] index_select random_sorted : Elapsed 0.005 ms (0.538 ms / 100) 0.530 -> 0.520 ( -1.89%) [ +1.89% +0.38% +0.00% / +18.87% -1.89% -0.19%] index_select perm : Elapsed 0.005 ms (0.540 ms / 100) 0.522 -> 0.520 ( -0.38%) [ +4.21% +2.11% +0.00% / +2.11% -0.38% +1.34%] index_select perm_sorted : Elapsed 0.005 ms (0.544 ms / 100) B = [1, 5, 200] (stride (1000, 1, 5)) A = [1, 5, 500] (stride (500, 500, 1)) dim = 2 0.522 -> 0.531 ( +1.72%) [ +3.83% +0.57% +0.00% / +2.49% +1.72% +3.26%] index_select const : Elapsed 0.005 ms (0.542 ms / 100) 0.530 -> 0.515 ( -2.83%) [ +0.57% +3.77% +0.00% / +1.13% -2.83% +0.19%] index_select wrap : Elapsed 0.005 ms (0.533 ms / 100) 0.521 -> 0.513 ( -1.54%) [ +3.65% +12.86% +0.00% / +21.50% -1.54% +1.73%] index_select linear : Elapsed 0.005 ms (0.540 ms / 100) 0.523 -> 0.512 ( -2.10%) [ +4.59% +0.57% +0.00% / +20.65% -2.10% +1.72%] index_select reverse : Elapsed 0.005 ms (0.547 ms / 100) 0.520 -> 0.514 ( -1.15%) [ +4.04% +1.54% +0.00% / +2.88% -1.15% +13.65%] index_select skip64 : Elapsed 0.005 ms (0.541 ms / 100) 0.516 -> 0.516 ( +0.00%) [ +5.43% +2.52% +0.00% / +4.65% +0.00% +23.26%] index_select skip256 : Elapsed 0.005 ms (0.544 ms / 100) 0.529 -> 0.517 ( -2.27%) [ +2.08% +0.00% +0.76% / +0.57% -2.27% +3.59%] index_select spread : Elapsed 0.005 ms (0.540 ms / 100) 0.523 -> 0.520 ( -0.57%) [ +3.82% +0.57% +0.00% / +3.44% -0.57% +2.29%] index_select strided 3 : Elapsed 0.005 ms (0.543 ms / 100) 0.526 -> 0.517 ( -1.71%) [ +2.85% +0.76% +0.00% / +2.28% -1.71% +1.33%] index_select strided 5 : Elapsed 0.005 ms (0.541 ms / 100) 0.525 -> 0.530 ( +0.95%) [ +2.86% +0.00% +0.19% / +2.67% +0.95% +1.52%] index_select strided 7 : Elapsed 0.005 ms (0.540 ms / 100) 0.526 -> 0.524 ( -0.38%) [ +2.85% +0.00% +1.33% / +1.90% -0.38% +1.71%] index_select strided 8 : Elapsed 0.005 ms (0.541 ms / 100) 0.532 -> 0.523 ( -1.69%) [ +0.94% +0.00% +8.08% / +1.32% -1.69% -0.19%] index_select strided 16 : Elapsed 0.005 ms (0.537 ms / 100) 0.530 -> 0.514 ( -3.02%) [ +2.26% +0.00% +4.91% / +1.51% -3.02% +0.00%] index_select strided 64 : Elapsed 0.005 ms (0.542 ms / 100) 0.528 -> 0.520 ( -1.52%) [ +9.09% +0.57% +0.00% / +2.27% -1.52% +2.27%] index_select strided 100 : Elapsed 0.006 ms (0.576 ms / 100) 0.525 -> 0.525 ( +0.00%) [ +5.90% +0.00% +6.86% / +4.57% +0.00% +0.95%] index_select strided 255 : Elapsed 0.006 ms (0.556 ms / 100) 0.544 -> 0.519 ( -4.60%) [ +0.00% +0.18% +4.04% / +0.00% -4.60% -2.57%] index_select strided 256 : Elapsed 0.005 ms (0.544 ms / 100) 0.520 -> 0.523 ( +0.58%) [ +5.38% +0.58% +0.00% / +2.31% +0.58% +2.12%] index_select strided 257 : Elapsed 0.005 ms (0.548 ms / 100) 0.518 -> 0.520 ( +0.39%) [ +3.67% +0.97% +0.00% / +3.28% +0.39% +2.70%] index_select random : Elapsed 0.005 ms (0.537 ms / 100) 0.524 -> 0.520 ( -0.76%) [ +4.96% +0.00% +0.57% / +2.67% -0.76% +2.48%] index_select random_sorted : Elapsed 0.006 ms (0.550 ms / 100) 0.530 -> 0.518 ( -2.26%) [ +1.70% +2.45% +0.00% / +0.94% -2.26% +1.51%] index_select perm : Elapsed 0.005 ms (0.539 ms / 100) 0.526 -> 0.519 ( -1.33%) [ +2.28% +7.41% +0.00% / +1.71% -1.33% +0.76%] index_select perm_sorted : Elapsed 0.005 ms (0.538 ms / 100) B = [1, 5, 200] (stride (200, 200, 1)) A = [1, 5, 500] (stride (2500, 1, 5)) dim = 2 0.526 -> 0.522 ( -0.76%) [ +3.04% +1.14% +0.00% / +1.33% -0.76% +1.14%] index_select const : Elapsed 0.005 ms (0.542 ms / 100) 0.522 -> 0.522 ( +0.00%) [ +3.07% +3.07% +0.00% / +2.49% +0.00% +5.94%] index_select wrap : Elapsed 0.005 ms (0.538 ms / 100) 0.529 -> 0.520 ( -1.70%) [ +1.32% +6.81% +0.00% / +0.57% -1.70% +11.53%] index_select linear : Elapsed 0.005 ms (0.536 ms / 100) 0.522 -> 0.521 ( -0.19%) [ +2.30% +6.70% +0.00% / +1.92% -0.19% +0.38%] index_select reverse : Elapsed 0.005 ms (0.534 ms / 100) 0.519 -> 0.519 ( +0.00%) [ +3.66% +0.77% +0.00% / +2.12% +0.00% +4.82%] index_select skip64 : Elapsed 0.005 ms (0.538 ms / 100) 0.523 -> 0.521 ( -0.38%) [ +3.63% +0.38% +0.00% / +1.53% -0.38% +1.91%] index_select skip256 : Elapsed 0.005 ms (0.542 ms / 100) 0.525 -> 0.520 ( -0.95%) [ +2.86% +0.00% +0.57% / +9.14% -0.76% -0.95%] index_select spread : Elapsed 0.005 ms (0.540 ms / 100) 0.535 -> 0.524 ( -2.06%) [ +0.75% +0.00% +5.61% / -0.93% -2.06% -0.93%] index_select strided 3 : Elapsed 0.005 ms (0.539 ms / 100) 0.527 -> 0.520 ( -1.33%) [ +3.80% +0.00% +12.90% / +0.00% -1.33% -1.14%] index_select strided 5 : Elapsed 0.005 ms (0.547 ms / 100) 0.526 -> 0.520 ( -1.14%) [ +2.66% +0.19% +0.00% / +7.79% -1.14% +0.19%] index_select strided 7 : Elapsed 0.005 ms (0.540 ms / 100) 0.525 -> 0.517 ( -1.52%) [ +2.10% +2.10% +0.00% / +8.76% -1.52% -0.38%] index_select strided 8 : Elapsed 0.005 ms (0.536 ms / 100) 0.536 -> 0.519 ( -3.17%) [ +0.00% +5.04% +5.04% / +12.13% -3.17% -1.49%] index_select strided 16 : Elapsed 0.005 ms (0.536 ms / 100) 0.534 -> 0.522 ( -2.25%) [ +0.00% +4.12% +10.49% / -0.19% -2.06% -2.25%] index_select strided 64 : Elapsed 0.005 ms (0.534 ms / 100) 0.524 -> 0.515 ( -1.72%) [ +1.91% +0.38% +0.00% / +1.34% -1.72% +0.19%] index_select strided 100 : Elapsed 0.005 ms (0.534 ms / 100) 0.527 -> 0.522 ( -0.95%) [ +5.88% +0.95% +0.00% / +1.33% -0.95% -0.19%] index_select strided 255 : Elapsed 0.006 ms (0.558 ms / 100) 0.520 -> 0.521 ( +0.19%) [ +3.65% +1.54% +0.00% / +2.88% +0.19% +0.96%] index_select strided 256 : Elapsed 0.005 ms (0.539 ms / 100) 0.525 -> 0.523 ( -0.38%) [ +9.14% +5.52% +0.00% / +1.52% +0.00% -0.38%] index_select strided 257 : Elapsed 0.006 ms (0.573 ms / 100) 0.526 -> 0.522 ( -0.76%) [ +7.22% +0.19% +0.00% / +1.14% +0.57% -0.76%] index_select random : Elapsed 0.006 ms (0.564 ms / 100) 0.520 -> 0.523 ( +0.58%) [ +3.65% +1.92% +0.00% / +11.35% +0.58% +8.85%] index_select random_sorted : Elapsed 0.005 ms (0.539 ms / 100) 0.524 -> 0.520 ( -0.76%) [ +3.44% +1.15% +0.00% / +3.24% -0.76% -0.57%] index_select perm : Elapsed 0.005 ms (0.542 ms / 100) 0.532 -> 0.525 ( -1.32%) [ +1.13% +0.00% +1.88% / +0.00% -1.32% +6.02%] index_select perm_sorted : Elapsed 0.005 ms (0.538 ms / 100) B = [1, 5, 200] (stride (1, 200, 1)) A = [1, 5, 500] (stride (500, 500, 1)) dim = 2 0.525 -> 0.522 ( -0.57%) [ +9.33% +1.33% +0.00% / +1.33% -0.57% +11.05%] index_select const : Elapsed 0.006 ms (0.574 ms / 100) 0.520 -> 0.525 ( +0.96%) [ +7.69% +0.77% +0.00% / +1.92% +0.96% +1.92%] index_select wrap : Elapsed 0.006 ms (0.560 ms / 100) 0.524 -> 0.524 ( +0.00%) [ +2.67% +0.00% +0.38% / +1.72% +8.59% +0.00%] index_select linear : Elapsed 0.005 ms (0.538 ms / 100) 0.523 -> 0.522 ( -0.19%) [ +3.44% +0.76% +0.00% / +1.34% +0.76% -0.19%] index_select reverse : Elapsed 0.005 ms (0.541 ms / 100) 0.534 -> 0.522 ( -2.25%) [ +2.25% +0.00% +5.81% / +5.43% -2.25% +5.24%] index_select skip64 : Elapsed 0.005 ms (0.546 ms / 100) 0.524 -> 0.527 ( +0.57%) [ +2.86% +0.95% +0.00% / +1.15% +0.76% +0.57%] index_select skip256 : Elapsed 0.005 ms (0.539 ms / 100) 0.524 -> 0.525 ( +0.19%) [ +9.16% +0.57% +0.00% / +0.19% +0.38% +1.72%] index_select spread : Elapsed 0.006 ms (0.572 ms / 100) 0.521 -> 0.523 ( +0.38%) [ +8.25% +0.77% +0.00% / +2.11% +0.38% +0.77%] index_select strided 3 : Elapsed 0.006 ms (0.564 ms / 100) 0.536 -> 0.525 ( -2.05%) [ +0.19% +0.00% +3.73% / -1.49% -1.68% -2.05%] index_select strided 5 : Elapsed 0.005 ms (0.537 ms / 100) 0.527 -> 0.519 ( -1.52%) [ +9.68% +0.00% +6.83% / +0.76% -1.52% +0.95%] index_select strided 7 : Elapsed 0.006 ms (0.578 ms / 100) 0.522 -> 0.527 ( +0.96%) [ +4.02% +0.00% +14.75% / +9.58% +0.96% +1.92%] index_select strided 8 : Elapsed 0.005 ms (0.543 ms / 100) 0.519 -> 0.525 ( +1.16%) [ +2.70% +1.16% +0.00% / +18.69% +1.16% +1.35%] index_select strided 16 : Elapsed 0.005 ms (0.533 ms / 100) 0.524 -> 0.526 ( +0.38%) [ +2.29% +0.00% +0.76% / +1.53% +0.38% +1.15%] index_select strided 64 : Elapsed 0.005 ms (0.536 ms / 100) 0.522 -> 0.526 ( +0.77%) [ +9.00% +0.00% +0.96% / +1.92% +0.77% +1.53%] index_select strided 100 : Elapsed 0.006 ms (0.569 ms / 100) 0.523 -> 0.523 ( +0.00%) [+10.71% +0.57% +0.00% / +1.53% +0.00% +0.57%] index_select strided 255 : Elapsed 0.006 ms (0.579 ms / 100) 0.523 -> 0.523 ( +0.00%) [+14.53% +0.00% +0.38% / +12.62% +0.00% +1.91%] index_select strided 256 : Elapsed 0.006 ms (0.599 ms / 100) 0.522 -> 0.519 ( -0.57%) [ +2.49% +0.19% +0.00% / +3.64% -0.57% +1.92%] index_select strided 257 : Elapsed 0.005 ms (0.535 ms / 100) 0.525 -> 0.523 ( -0.38%) [ +2.29% +0.00% +0.95% / +1.33% -0.38% +6.67%] index_select random : Elapsed 0.005 ms (0.537 ms / 100) 0.520 -> 0.521 ( +0.19%) [ +5.58% +0.00% +0.00% / +2.31% +0.19% +21.73%] index_select random_sorted : Elapsed 0.005 ms (0.549 ms / 100) 0.521 -> 0.522 ( +0.19%) [ +3.45% +0.00% +3.65% / +2.50% +0.19% +1.73%] index_select perm : Elapsed 0.005 ms (0.539 ms / 100) 0.519 -> 0.525 ( +1.16%) [ +4.43% +0.00% +0.39% / +2.70% +1.16% +2.50%] index_select perm_sorted : Elapsed 0.005 ms (0.542 ms / 100) B = [1, 5, 200] (stride (5, 1, 5)) A = [1, 5, 500] (stride (1, 500, 1)) dim = 2 0.519 -> 0.521 ( +0.39%) [ +4.43% +0.00% +9.06% / +3.08% +0.39% +1.73%] index_select const : Elapsed 0.005 ms (0.542 ms / 100) 0.519 -> 0.517 ( -0.39%) [ +5.01% +0.00% +15.41% / +3.47% -0.39% +2.89%] index_select wrap : Elapsed 0.005 ms (0.545 ms / 100) 0.526 -> 0.514 ( -2.28%) [ +3.42% +0.00% +0.38% / +2.09% -2.28% +1.52%] index_select linear : Elapsed 0.005 ms (0.544 ms / 100) 0.519 -> 0.516 ( -0.58%) [ +4.82% +0.00% +9.06% / +3.47% -0.58% +2.89%] index_select reverse : Elapsed 0.005 ms (0.544 ms / 100) 0.521 -> 0.522 ( +0.19%) [ +4.99% +0.00% +1.73% / +2.69% +0.19% +0.96%] index_select skip64 : Elapsed 0.005 ms (0.547 ms / 100) 0.518 -> 0.519 ( +0.19%) [ +6.18% +0.00% +5.98% / +3.28% +0.19% +2.51%] index_select skip256 : Elapsed 0.006 ms (0.550 ms / 100) 0.530 -> 0.516 ( -2.64%) [ +2.45% +0.00% +10.57% / +1.89% -2.64% +0.38%] index_select spread : Elapsed 0.005 ms (0.543 ms / 100) 0.522 -> 0.523 ( +0.19%) [ +4.02% +0.19% +0.00% / +2.49% +0.19% +1.53%] index_select strided 3 : Elapsed 0.005 ms (0.543 ms / 100) 0.523 -> 0.517 ( -1.15%) [+14.15% +0.38% +0.00% / +4.02% -1.15% +0.19%] index_select strided 5 : Elapsed 0.006 ms (0.597 ms / 100) 0.520 -> 0.525 ( +0.96%) [+21.15% +0.00% +0.00% / +3.46% +13.46% +0.96%] index_select strided 7 : Elapsed 0.006 ms (0.630 ms / 100) 0.519 -> 0.520 ( +0.19%) [ +6.36% +0.77% +0.00% / +4.82% +0.19% +2.12%] index_select strided 8 : Elapsed 0.006 ms (0.552 ms / 100) 0.522 -> 0.519 ( -0.57%) [ +3.64% +0.57% +0.00% / +9.00% -0.57% +1.92%] index_select strided 16 : Elapsed 0.005 ms (0.541 ms / 100) 0.518 -> 0.524 ( +1.16%) [ +5.02% +0.00% +0.19% / +36.10% +1.16% +2.51%] index_select strided 64 : Elapsed 0.005 ms (0.544 ms / 100) 0.530 -> 0.518 ( -2.26%) [ +2.64% +0.00% +0.00% / +8.87% -2.26% -0.19%] index_select strided 100 : Elapsed 0.005 ms (0.544 ms / 100) 0.527 -> 0.514 ( -2.47%) [ +8.35% +0.00% +0.95% / +15.18% -2.47% +4.55%] index_select strided 255 : Elapsed 0.006 ms (0.571 ms / 100) 0.525 -> 0.523 ( -0.38%) [ +3.05% +0.95% +0.00% / +1.90% -0.38% +14.10%] index_select strided 256 : Elapsed 0.005 ms (0.541 ms / 100) 0.523 -> 0.524 ( +0.19%) [ +3.44% +0.00% +1.15% / +2.10% +0.19% +1.34%] index_select strided 257 : Elapsed 0.005 ms (0.541 ms / 100) 0.528 -> 0.520 ( -1.52%) [ +3.03% +0.00% +0.00% / +5.87% -1.52% +0.57%] index_select random : Elapsed 0.005 ms (0.544 ms / 100) 0.528 -> 0.525 ( -0.57%) [ +3.98% +0.00% +1.52% / +1.52% +1.89% -0.57%] index_select random_sorted : Elapsed 0.005 ms (0.549 ms / 100) 0.522 -> 0.518 ( -0.77%) [ +3.83% +0.00% +0.00% / +3.45% -0.77% +1.34%] index_select perm : Elapsed 0.005 ms (0.542 ms / 100) 0.520 -> 0.517 ( -0.58%) [ +4.04% +2.88% +0.00% / +3.65% -0.58% +2.69%] index_select perm_sorted : Elapsed 0.005 ms (0.541 ms / 100) B = [1, 5, 200] (stride (1, 1, 5)) A = [1, 5, 500] (stride (2500, 1, 5)) dim = 2 0.526 -> 0.523 ( -0.57%) [ +2.85% +0.00% +1.52% / +8.94% -0.57% +1.14%] index_select const : Elapsed 0.005 ms (0.541 ms / 100) 0.525 -> 0.520 ( -0.95%) [ +4.00% +0.00% +0.00% / +10.67% -0.95% +1.33%] index_select wrap : Elapsed 0.005 ms (0.546 ms / 100) 0.529 -> 0.521 ( -1.51%) [ +2.08% +0.00% +6.24% / +2.46% -1.51% +0.57%] index_select linear : Elapsed 0.005 ms (0.540 ms / 100) 0.524 -> 0.521 ( -0.57%) [ +3.44% +0.00% +6.68% / +4.20% -0.57% +1.91%] index_select reverse : Elapsed 0.005 ms (0.542 ms / 100) 0.525 -> 0.521 ( -0.76%) [ +3.81% +0.00% +0.57% / +3.43% -0.76% +1.14%] index_select skip64 : Elapsed 0.005 ms (0.545 ms / 100) 0.520 -> 0.525 ( +0.96%) [ +5.00% +1.35% +0.00% / +3.27% +0.96% +2.12%] index_select skip256 : Elapsed 0.005 ms (0.546 ms / 100) 0.520 -> 0.520 ( +0.00%) [ +3.46% +0.00% +0.58% / +4.04% +0.00% +1.92%] index_select spread : Elapsed 0.005 ms (0.538 ms / 100) 0.525 -> 0.531 ( +1.14%) [+11.81% +0.76% +0.00% / +2.86% +6.29% +1.14%] index_select strided 3 : Elapsed 0.006 ms (0.587 ms / 100) 0.523 -> 0.539 ( +3.06%) [ +2.87% +0.38% +0.00% / +4.59% +13.19% +3.06%] index_select strided 5 : Elapsed 0.005 ms (0.538 ms / 100) 0.527 -> 0.525 ( -0.38%) [ +1.52% +0.57% +0.00% / +4.17% -0.38% +0.76%] index_select strided 7 : Elapsed 0.005 ms (0.535 ms / 100) 0.525 -> 0.524 ( -0.19%) [ +1.71% +2.86% +0.00% / +2.67% -0.19% +0.38%] index_select strided 8 : Elapsed 0.005 ms (0.534 ms / 100) 0.522 -> 0.526 ( +0.77%) [ +2.49% +0.00% +0.19% / +11.11% +0.77% +6.32%] index_select strided 16 : Elapsed 0.005 ms (0.535 ms / 100) 0.524 -> 0.542 ( +3.44%) [ +1.72% +0.00% +0.00% / +3.44% +3.44% +9.73%] index_select strided 64 : Elapsed 0.005 ms (0.533 ms / 100) 0.524 -> 0.529 ( +0.95%) [ +2.29% +0.00% +0.38% / +3.44% +0.95% +13.17%] index_select strided 100 : Elapsed 0.005 ms (0.536 ms / 100) 0.519 -> 0.524 ( +0.96%) [ +4.05% +1.54% +0.00% / +3.66% +0.96% +2.12%] index_select strided 255 : Elapsed 0.005 ms (0.540 ms / 100) 0.520 -> 0.526 ( +1.15%) [ +3.08% +0.77% +0.00% / +3.08% +1.15% +1.92%] index_select strided 256 : Elapsed 0.005 ms (0.536 ms / 100) 0.528 -> 0.535 ( +1.33%) [ +1.52% +0.00% +0.00% / +1.33% +2.65% +4.92%] index_select strided 257 : Elapsed 0.005 ms (0.536 ms / 100) 0.517 -> 0.530 ( +2.51%) [ +4.64% +1.16% +0.00% / +3.87% +2.51% +3.48%] index_select random : Elapsed 0.005 ms (0.541 ms / 100) 0.523 -> 0.530 ( +1.34%) [ +3.25% +0.00% +7.46% / +3.82% +2.87% +1.34%] index_select random_sorted : Elapsed 0.005 ms (0.540 ms / 100) 0.517 -> 0.523 ( +1.16%) [ +3.68% +0.00% +8.51% / +11.41% +1.16% +2.90%] index_select perm : Elapsed 0.005 ms (0.536 ms / 100) 0.527 -> 0.525 ( -0.38%) [ +1.71% +0.00% +5.50% / +1.52% -0.38% +1.33%] index_select perm_sorted : Elapsed 0.005 ms (0.536 ms / 100) out_shape = [200, 500, 5] in_shape = [1, 500, 5] idx_dim = 0 B = [200, 500, 5] (stride (5, 1000, 1)) A = [1, 500, 5] (stride (1, 5, 1)) dim = 0 0.913 -> 0.917 ( +0.44%) [ +0.00% +0.11% +13.25% / +0.66% +0.55% +0.44%] index_add_ linear : Elapsed 0.009 ms (0.913 ms / 100) 0.893 -> 0.896 ( +0.34%) [ +0.11% +0.00% +1.57% / +0.67% +0.67% +0.34%] index_copy_ linear : Elapsed 0.009 ms (0.894 ms / 100) 0.913 -> 0.915 ( +0.22%) [ +0.44% +0.00% +6.90% / +0.55% +0.22% +0.55%] index_add_ reverse : Elapsed 0.009 ms (0.917 ms / 100) 0.892 -> 0.897 ( +0.56%) [ +0.45% +0.00% +2.58% / +0.56% +0.78% +1.01%] index_copy_ reverse : Elapsed 0.009 ms (0.896 ms / 100) 0.911 -> 0.917 ( +0.66%) [ +0.11% +0.00% +2.41% / +0.77% +0.77% +0.66%] index_add_ spread : Elapsed 0.009 ms (0.912 ms / 100) 0.892 -> 0.897 ( +0.56%) [ +0.22% +0.00% +0.34% / +0.67% +0.56% +0.56%] index_copy_ spread : Elapsed 0.009 ms (0.894 ms / 100) 0.913 -> 0.918 ( +0.55%) [ +0.33% +0.00% +0.66% / +0.55% +0.55% +0.55%] index_add_ strided 3 : Elapsed 0.009 ms (0.916 ms / 100) 0.894 -> 0.896 ( +0.22%) [ +0.34% +0.00% +0.78% / +0.22% +0.22% +0.89%] index_copy_ strided 3 : Elapsed 0.009 ms (0.897 ms / 100) 0.914 -> 0.918 ( +0.44%) [ +0.22% +0.11% +0.00% / +0.55% +0.55% +0.44%] index_add_ strided 7 : Elapsed 0.009 ms (0.916 ms / 100) 0.891 -> 0.895 ( +0.45%) [ +0.34% +0.00% +0.67% / +0.67% +0.45% +0.90%] index_copy_ strided 7 : Elapsed 0.009 ms (0.894 ms / 100) 0.913 -> 0.914 ( +0.11%) [ +0.11% +0.00% +0.22% / +0.11% +1.31% +1.75%] index_add_ perm : Elapsed 0.009 ms (0.914 ms / 100) 0.886 -> 0.895 ( +1.02%) [ +0.90% +0.00% +0.56% / +1.02% +2.37% +2.37%] index_copy_ perm : Elapsed 0.009 ms (0.894 ms / 100) 0.913 -> 0.913 ( +0.00%) [ +0.11% +0.00% +0.00% / +0.00% +2.30% +1.53%] index_add_ perm_sorted : Elapsed 0.009 ms (0.914 ms / 100) 0.887 -> 0.887 ( +0.00%) [ +0.00% +0.45% +0.00% / +0.00% +2.59% +2.48%] index_copy_ perm_sorted : Elapsed 0.009 ms (0.887 ms / 100) 17.501 -> 17.473 ( -0.16%) [ +0.23% +0.00% +0.23% / +0.37% -0.16% -0.04%] index_select const : Elapsed 0.175 ms (17.541 ms / 100) 17.522 -> 17.484 ( -0.22%) [ +0.19% +0.00% +0.07% / +0.10% -0.22% -0.10%] index_select wrap : Elapsed 0.176 ms (17.555 ms / 100) 17.531 -> 17.487 ( -0.25%) [ +0.20% +0.14% +0.00% / +0.31% -0.25% -0.15%] index_select linear : Elapsed 0.176 ms (17.566 ms / 100) 17.503 -> 17.477 ( -0.15%) [ +0.29% +0.00% +0.23% / +0.47% -0.03% -0.15%] index_select reverse : Elapsed 0.176 ms (17.553 ms / 100) 17.522 -> 17.504 ( -0.10%) [ +0.38% +0.00% +0.10% / +0.16% -0.05% -0.10%] index_select skip64 : Elapsed 0.176 ms (17.588 ms / 100) 17.526 -> 17.500 ( -0.15%) [ +0.29% +0.00% +0.02% / +0.18% -0.15% -0.03%] index_select skip256 : Elapsed 0.176 ms (17.576 ms / 100) 17.529 -> 17.481 ( -0.27%) [ +0.18% +0.00% +0.15% / +0.21% -0.14% -0.27%] index_select spread : Elapsed 0.176 ms (17.561 ms / 100) 17.513 -> 17.478 ( -0.20%) [ +0.23% +0.00% +0.08% / +0.21% -0.19% -0.20%] index_select random : Elapsed 0.176 ms (17.554 ms / 100) 17.516 -> 17.480 ( -0.21%) [ +0.28% +0.00% +0.21% / +0.17% -0.19% -0.21%] index_select random_sorted : Elapsed 0.176 ms (17.565 ms / 100) B = [200, 500, 5] (stride (1, 1000, 200)) dim = 0 fill_cnt = 1 0.468 -> 0.463 ( -1.07%) [ +7.69% +0.00% +18.16% / +0.00% -1.07% +0.00%] index_fill_ const : Elapsed 0.005 ms (0.504 ms / 100) 0.465 -> 0.457 ( -1.72%) [ +0.00% +0.65% +18.06% / +0.22% -1.72% +0.86%] index_fill_ linear : Elapsed 0.005 ms (0.465 ms / 100) 0.467 -> 0.461 ( -1.28%) [ +0.00% +1.07% +22.27% / +20.34% -1.28% +0.64%] index_fill_ reverse : Elapsed 0.005 ms (0.467 ms / 100) 0.467 -> 0.462 ( -1.07%) [ +0.00% +0.86% +25.70% / -1.07% +7.92% +15.20%] index_fill_ skip64 : Elapsed 0.005 ms (0.467 ms / 100) good 0.497 -> 0.464 ( -6.64%) [ +0.60% +0.00% +10.46% / -6.64% -5.43% -5.43%] index_fill_ skip256 : Elapsed 0.005 ms (0.500 ms / 100) 0.468 -> 0.450 ( -3.85%) [ +0.00% +0.21% +23.08% / +5.98% -3.85% +0.21%] index_fill_ spread : Elapsed 0.005 ms (0.468 ms / 100) 0.464 -> 0.456 ( -1.72%) [ +0.00% +4.09% +18.32% / +1.08% -1.72% +9.27%] index_fill_ strided 3 : Elapsed 0.005 ms (0.464 ms / 100) 0.467 -> 0.460 ( -1.50%) [ +0.00% +7.49% +19.70% / -0.64% +7.28% -1.50%] index_fill_ strided 5 : Elapsed 0.005 ms (0.467 ms / 100) 0.468 -> 0.459 ( -1.92%) [+10.68% +0.00% +17.31% / +9.62% -1.92% +1.50%] index_fill_ strided 7 : Elapsed 0.005 ms (0.518 ms / 100) 0.465 -> 0.466 ( +0.22%) [ +0.00% +0.43% +17.63% / +0.86% +0.22% +10.54%] index_fill_ strided 8 : Elapsed 0.005 ms (0.465 ms / 100) 0.466 -> 0.465 ( -0.21%) [ +0.00% +8.37% +23.39% / +0.21% +8.15% -0.21%] index_fill_ strided 16 : Elapsed 0.005 ms (0.466 ms / 100) 0.463 -> 0.454 ( -1.94%) [ +0.00% +1.08% +21.17% / +9.94% -1.94% +0.65%] index_fill_ strided 64 : Elapsed 0.005 ms (0.463 ms / 100) 0.462 -> 0.455 ( -1.52%) [ +0.00% +2.16% +18.61% / +0.00% -1.52% +10.82%] index_fill_ strided 100 : Elapsed 0.005 ms (0.462 ms / 100) 0.462 -> 0.453 ( -1.95%) [ +0.00% +2.81% +24.68% / +0.65% -1.95% +0.43%] index_fill_ random : Elapsed 0.005 ms (0.462 ms / 100) 0.464 -> 0.456 ( -1.72%) [ +0.00% +1.29% +18.10% / +0.65% -1.72% +0.86%] index_fill_ random_sorted : Elapsed 0.005 ms (0.464 ms / 100) 0.463 -> 0.455 ( -1.73%) [ +0.00% +1.08% +21.17% / +11.23% -1.73% +3.89%] index_fill_ perm : Elapsed 0.005 ms (0.463 ms / 100) 0.474 -> 0.451 ( -4.85%) [ +8.65% +0.00% +18.35% / -1.69% -4.85% -1.48%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.515 ms / 100) B = [200, 500, 5] (stride (500, 1, 100000)) A = [1, 500, 5] (stride (2500, 1, 500)) dim = 0 0.510 -> 0.493 ( -3.33%) [ +0.00% +5.88% +17.65% / -0.98% +5.49% -3.33%] index_add_ linear : Elapsed 0.005 ms (0.510 ms / 100) 0.506 -> 0.488 ( -3.56%) [ +0.00% +0.20% +1.98% / -0.99% -3.56% -3.16%] index_copy_ linear : Elapsed 0.005 ms (0.506 ms / 100) 0.515 -> 0.494 ( -4.08%) [ +0.00% +4.85% +20.78% / -1.94% +4.27% -4.08%] index_add_ reverse : Elapsed 0.005 ms (0.515 ms / 100) 0.494 -> 0.485 ( -1.82%) [ +1.42% +1.42% +0.00% / +1.82% -1.82% -1.21%] index_copy_ reverse : Elapsed 0.005 ms (0.501 ms / 100) 0.495 -> 0.491 ( -0.81%) [ +0.00% +3.23% +0.20% / +1.01% -0.81% +0.81%] index_add_ spread : Elapsed 0.005 ms (0.495 ms / 100) 0.500 -> 0.487 ( -2.60%) [ +2.60% +0.80% +0.00% / +0.00% -2.60% -1.60%] index_copy_ spread : Elapsed 0.005 ms (0.513 ms / 100) 0.496 -> 0.487 ( -1.81%) [ +0.40% +3.23% +0.00% / +1.01% -1.81% +0.00%] index_add_ strided 3 : Elapsed 0.005 ms (0.498 ms / 100) 0.502 -> 0.484 ( -3.59%) [ +3.59% +0.00% +2.39% / -0.80% -3.59% -1.39%] index_copy_ strided 3 : Elapsed 0.005 ms (0.520 ms / 100) 0.502 -> 0.496 ( -1.20%) [ +3.19% +2.79% +0.00% / -0.20% -1.20% -0.60%] index_add_ strided 7 : Elapsed 0.005 ms (0.518 ms / 100) good 0.526 -> 0.488 ( -7.22%) [ +0.00% +3.80% +10.08% / +0.95% -7.22% -5.70%] index_copy_ strided 7 : Elapsed 0.005 ms (0.526 ms / 100) 0.499 -> 0.489 ( -2.00%) [ +7.82% +2.81% +0.00% / +4.41% -2.00% -0.20%] index_add_ perm : Elapsed 0.005 ms (0.538 ms / 100) 0.502 -> 0.491 ( -2.19%) [ +5.18% +0.00% +0.00% / +5.18% -2.19% -1.59%] index_copy_ perm : Elapsed 0.005 ms (0.528 ms / 100) 0.498 -> 0.489 ( -1.81%) [ +7.23% +3.41% +0.00% / +9.64% -1.81% -0.20%] index_add_ perm_sorted : Elapsed 0.005 ms (0.534 ms / 100) 0.508 -> 0.486 ( -4.33%) [ +0.79% +6.30% +0.00% / -1.57% -4.33% -4.13%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.512 ms / 100) 8.580 -> 8.593 ( +0.15%) [ +0.00% +0.30% +0.22% / +0.15% +1.56% +1.29%] index_select const : Elapsed 0.086 ms (8.580 ms / 100) 8.546 -> 8.567 ( +0.25%) [ +0.25% +0.00% +0.23% / +0.25% +1.79% +1.71%] index_select wrap : Elapsed 0.086 ms (8.567 ms / 100) 8.560 -> 8.572 ( +0.14%) [ +0.08% +0.00% +0.28% / +0.14% +1.02% +1.03%] index_select linear : Elapsed 0.086 ms (8.567 ms / 100) 8.620 -> 8.617 ( -0.03%) [ +0.21% +0.06% +0.00% / -0.03% +1.10% +0.99%] index_select reverse : Elapsed 0.086 ms (8.638 ms / 100) 8.556 -> 8.578 ( +0.26%) [ +0.26% +0.00% +0.19% / +0.26% +1.51% +1.53%] index_select skip64 : Elapsed 0.086 ms (8.578 ms / 100) 8.554 -> 8.603 ( +0.57%) [ +0.00% +0.32% +0.51% / +0.57% +1.26% +1.25%] index_select skip256 : Elapsed 0.086 ms (8.554 ms / 100) 8.600 -> 8.621 ( +0.24%) [ +0.31% +0.00% +0.45% / +0.24% +0.50% +0.58%] index_select spread : Elapsed 0.086 ms (8.627 ms / 100) 8.570 -> 8.588 ( +0.21%) [ +0.35% +0.00% +0.06% / +0.21% +1.66% +1.97%] index_select random : Elapsed 0.086 ms (8.600 ms / 100) 8.618 -> 8.629 ( +0.13%) [ +0.26% +0.15% +0.00% / +0.13% +0.52% +0.93%] index_select random_sorted : Elapsed 0.086 ms (8.640 ms / 100) out_shape = [1, 200, 5] in_shape = [1, 500, 5] idx_dim = 1 B = [1, 200, 5] (stride (1000, 5, 1)) A = [1, 500, 5] (stride (1, 5, 1)) dim = 1 0.522 -> 0.529 ( +1.34%) [ +4.79% +2.11% +0.00% / +8.81% +1.34% +2.11%] index_select const : Elapsed 0.005 ms (0.547 ms / 100) 0.522 -> 0.533 ( +2.11%) [ +3.83% +1.15% +0.00% / +2.11% +6.32% +2.49%] index_select wrap : Elapsed 0.005 ms (0.542 ms / 100) 0.521 -> 0.530 ( +1.73%) [ +3.84% +2.11% +0.00% / +3.07% +1.73% +1.73%] index_select linear : Elapsed 0.005 ms (0.541 ms / 100) 0.523 -> 0.525 ( +0.38%) [ +3.44% +0.00% +7.65% / +3.44% +0.38% +1.53%] index_select reverse : Elapsed 0.005 ms (0.541 ms / 100) 0.530 -> 0.529 ( -0.19%) [ +2.45% +0.00% +9.25% / +9.62% -0.19% +0.38%] index_select skip64 : Elapsed 0.005 ms (0.543 ms / 100) 0.532 -> 0.535 ( +0.56%) [ +2.26% +0.00% +9.96% / +19.36% +0.56% +1.88%] index_select skip256 : Elapsed 0.005 ms (0.544 ms / 100) 0.520 -> 0.527 ( +1.35%) [ +4.23% +1.73% +0.00% / +9.23% +1.35% +3.65%] index_select spread : Elapsed 0.005 ms (0.542 ms / 100) 0.524 -> 0.532 ( +1.53%) [ +2.86% +1.91% +0.00% / +3.24% +1.53% +3.05%] index_select strided 3 : Elapsed 0.005 ms (0.539 ms / 100) 0.525 -> 0.525 ( +0.00%) [ +3.05% +0.76% +0.00% / +2.67% +0.00% +1.90%] index_select strided 5 : Elapsed 0.005 ms (0.541 ms / 100) 0.547 -> 0.539 ( -1.46%) [ +0.00% +0.18% +3.29% / -0.55% +2.01% -1.46%] index_select strided 7 : Elapsed 0.005 ms (0.547 ms / 100) 0.519 -> 0.536 ( +3.28%) [ +3.66% +1.54% +0.00% / +3.28% +17.15% +3.85%] index_select strided 8 : Elapsed 0.005 ms (0.538 ms / 100) 0.531 -> 0.534 ( +0.56%) [ +1.32% +0.00% +5.84% / +0.75% +13.18% +0.56%] index_select strided 16 : Elapsed 0.005 ms (0.538 ms / 100) 0.539 -> 0.521 ( -3.34%) [ +0.00% +2.78% +23.19% / -0.74% -3.34% -0.56%] index_select strided 64 : Elapsed 0.005 ms (0.539 ms / 100) 0.525 -> 0.523 ( -0.38%) [ +2.48% +0.00% +12.95% / +1.90% -0.38% +4.38%] index_select strided 100 : Elapsed 0.005 ms (0.538 ms / 100) 0.523 -> 0.522 ( -0.19%) [ +3.82% +0.00% +9.75% / +2.87% -0.19% +13.38%] index_select strided 255 : Elapsed 0.005 ms (0.543 ms / 100) 0.525 -> 0.535 ( +1.90%) [ +2.86% +0.00% +11.81% / +1.90% +7.43% +6.10%] index_select strided 256 : Elapsed 0.005 ms (0.540 ms / 100) 0.520 -> 0.535 ( +2.88%) [ +2.50% +1.35% +0.00% / +4.04% +8.46% +2.88%] index_select strided 257 : Elapsed 0.005 ms (0.533 ms / 100) 0.529 -> 0.538 ( +1.70%) [ +1.70% +0.00% +5.86% / +1.70% +18.53% +1.89%] index_select random : Elapsed 0.005 ms (0.538 ms / 100) 0.541 -> 0.527 ( -2.59%) [ +0.00% +0.92% +2.03% / -0.74% -2.59% -0.92%] index_select random_sorted : Elapsed 0.005 ms (0.541 ms / 100) 0.521 -> 0.537 ( +3.07%) [ +3.26% +0.00% +0.77% / +3.07% +6.33% +3.45%] index_select perm : Elapsed 0.005 ms (0.538 ms / 100) 0.522 -> 0.536 ( +2.68%) [ +9.77% +0.00% +0.19% / +2.87% +7.09% +2.68%] index_select perm_sorted : Elapsed 0.006 ms (0.573 ms / 100) B = [1, 200, 5] (stride (200, 1, 200)) A = [1, 500, 5] (stride (2500, 5, 1)) dim = 1 0.530 -> 0.522 ( -1.51%) [ +2.64% +0.00% +8.49% / +1.51% -1.51% +2.83%] index_select const : Elapsed 0.005 ms (0.544 ms / 100) 0.529 -> 0.525 ( -0.76%) [ +5.86% +0.00% +5.67% / +1.13% -0.76% +3.02%] index_select wrap : Elapsed 0.006 ms (0.560 ms / 100) 0.520 -> 0.524 ( +0.77%) [ +3.85% +1.92% +0.00% / +3.46% +0.77% +12.50%] index_select linear : Elapsed 0.005 ms (0.540 ms / 100) 0.526 -> 0.524 ( -0.38%) [ +2.28% +2.85% +0.00% / +2.28% -0.38% +3.42%] index_select reverse : Elapsed 0.005 ms (0.538 ms / 100) 0.536 -> 0.523 ( -2.43%) [ +0.37% +0.00% +1.12% / +5.97% -2.43% +0.93%] index_select skip64 : Elapsed 0.005 ms (0.538 ms / 100) 0.528 -> 0.520 ( -1.52%) [ +2.65% +0.00% +7.77% / +19.32% -1.52% +1.89%] index_select skip256 : Elapsed 0.005 ms (0.542 ms / 100) 0.531 -> 0.523 ( -1.51%) [ +0.56% +0.00% +10.92% / +2.07% -1.51% +3.77%] index_select spread : Elapsed 0.005 ms (0.534 ms / 100) 0.516 -> 0.521 ( +0.97%) [ +4.07% +2.71% +0.00% / +3.68% +0.97% +4.84%] index_select strided 3 : Elapsed 0.005 ms (0.537 ms / 100) 0.528 -> 0.522 ( -1.14%) [ +1.89% +0.00% +6.25% / +2.84% -1.14% +8.33%] index_select strided 5 : Elapsed 0.005 ms (0.538 ms / 100) 0.526 -> 0.517 ( -1.71%) [ +2.09% +0.00% +11.41% / +2.66% -1.71% +2.66%] index_select strided 7 : Elapsed 0.005 ms (0.537 ms / 100) 0.539 -> 0.516 ( -4.27%) [ +0.00% +1.30% +8.35% / +12.62% -4.27% -0.93%] index_select strided 8 : Elapsed 0.005 ms (0.539 ms / 100) 0.526 -> 0.535 ( +1.71%) [ +2.66% +0.00% +7.03% / +3.80% +1.71% +10.08%] index_select strided 16 : Elapsed 0.005 ms (0.540 ms / 100) 0.520 -> 0.518 ( -0.38%) [ +3.65% +0.00% +7.88% / +4.62% -0.38% +19.81%] index_select strided 64 : Elapsed 0.005 ms (0.539 ms / 100) 0.530 -> 0.534 ( +0.75%) [ +1.51% +0.00% +5.09% / +2.83% +4.91% +0.75%] index_select strided 100 : Elapsed 0.005 ms (0.538 ms / 100) 0.532 -> 0.533 ( +0.19%) [ +3.20% +0.00% +4.51% / +1.69% +11.84% +0.19%] index_select strided 255 : Elapsed 0.005 ms (0.549 ms / 100) 0.517 -> 0.520 ( +0.58%) [ +4.45% +2.51% +0.00% / +4.06% +0.58% +5.03%] index_select strided 256 : Elapsed 0.005 ms (0.540 ms / 100) 0.521 -> 0.520 ( -0.19%) [ +3.45% +1.54% +0.00% / +4.22% -0.19% +3.84%] index_select strided 257 : Elapsed 0.005 ms (0.539 ms / 100) 0.527 -> 0.521 ( -1.14%) [ +2.47% +0.00% +8.16% / +3.61% -1.14% +1.33%] index_select random : Elapsed 0.005 ms (0.540 ms / 100) 0.534 -> 0.520 ( -2.62%) [ +1.12% +0.00% +9.18% / +1.31% -2.62% -0.37%] index_select random_sorted : Elapsed 0.005 ms (0.540 ms / 100) 0.526 -> 0.519 ( -1.33%) [ +2.47% +0.00% +11.98% / +3.99% -1.33% +8.94%] index_select perm : Elapsed 0.005 ms (0.539 ms / 100) 0.515 -> 0.524 ( +1.75%) [ +5.44% +2.72% +0.00% / +5.63% +1.75% +5.05%] index_select perm_sorted : Elapsed 0.005 ms (0.543 ms / 100) B = [1, 200, 5] (stride (200, 1, 200)) A = [1, 500, 5] (stride (1, 5, 1)) dim = 1 0.532 -> 0.516 ( -3.01%) [ +2.63% +0.00% +3.95% / +2.63% -3.01% +0.56%] index_select const : Elapsed 0.005 ms (0.546 ms / 100) 0.533 -> 0.514 ( -3.56%) [ +1.50% +0.00% +4.88% / +0.75% -3.56% -0.19%] index_select wrap : Elapsed 0.005 ms (0.541 ms / 100) 0.525 -> 0.524 ( -0.19%) [ +3.24% +0.00% +12.95% / +3.05% -0.19% +9.14%] index_select linear : Elapsed 0.005 ms (0.542 ms / 100) 0.517 -> 0.530 ( +2.51%) [ +8.70% +2.51% +0.00% / +2.51% +2.90% +9.48%] index_select reverse : Elapsed 0.006 ms (0.562 ms / 100) 0.529 -> 0.519 ( -1.89%) [ +2.08% +0.00% +4.35% / +3.21% -1.89% +0.38%] index_select skip64 : Elapsed 0.005 ms (0.540 ms / 100) 0.532 -> 0.521 ( -2.07%) [ +0.75% +0.00% +4.89% / +2.44% -2.07% -0.19%] index_select skip256 : Elapsed 0.005 ms (0.536 ms / 100) 0.532 -> 0.522 ( -1.88%) [ +3.01% +0.00% +4.89% / +9.77% -1.88% +0.56%] index_select spread : Elapsed 0.005 ms (0.548 ms / 100) 0.530 -> 0.518 ( -2.26%) [ +3.21% +0.00% +11.51% / +9.43% -2.26% +1.32%] index_select strided 3 : Elapsed 0.005 ms (0.547 ms / 100) 0.527 -> 0.525 ( -0.38%) [ +3.42% +0.00% +8.54% / +6.83% -0.38% +8.54%] index_select strided 5 : Elapsed 0.005 ms (0.545 ms / 100) 0.525 -> 0.519 ( -1.14%) [ +2.86% +0.00% +6.48% / +4.00% -1.14% +17.14%] index_select strided 7 : Elapsed 0.005 ms (0.540 ms / 100) 0.540 -> 0.533 ( -1.30%) [ +0.00% +0.00% +2.96% / -0.56% +0.00% -1.30%] index_select strided 8 : Elapsed 0.005 ms (0.540 ms / 100) 0.526 -> 0.540 ( +2.66%) [ +3.80% +0.19% +0.00% / +2.66% +4.94% +3.61%] index_select strided 16 : Elapsed 0.005 ms (0.546 ms / 100) 0.525 -> 0.539 ( +2.67%) [ +5.33% +0.57% +0.00% / +2.67% +4.00% +2.67%] index_select strided 64 : Elapsed 0.006 ms (0.553 ms / 100) 0.536 -> 0.532 ( -0.75%) [ +0.00% +1.87% +5.04% / +0.93% +3.73% -0.75%] index_select strided 100 : Elapsed 0.005 ms (0.536 ms / 100) 0.536 -> 0.540 ( +0.75%) [ +0.37% +0.00% +7.09% / +0.75% +8.96% +5.04%] index_select strided 255 : Elapsed 0.005 ms (0.538 ms / 100) 0.529 -> 0.522 ( -1.32%) [ +3.21% +0.00% +10.96% / +2.08% -1.32% +2.27%] index_select strided 256 : Elapsed 0.005 ms (0.546 ms / 100) 0.516 -> 0.518 ( +0.39%) [ +5.04% +2.33% +0.00% / +5.04% +0.39% +6.78%] index_select strided 257 : Elapsed 0.005 ms (0.542 ms / 100) 0.517 -> 0.519 ( +0.39%) [ +4.26% +1.93% +0.00% / +4.06% +0.39% +4.06%] index_select random : Elapsed 0.005 ms (0.539 ms / 100) 0.524 -> 0.520 ( -0.76%) [+11.45% +0.00% +4.96% / +5.15% -0.76% +3.05%] index_select random_sorted : Elapsed 0.006 ms (0.584 ms / 100) 0.529 -> 0.522 ( -1.32%) [+19.28% +0.00% +5.67% / +2.46% -1.32% +2.65%] index_select perm : Elapsed 0.006 ms (0.631 ms / 100) 0.512 -> 0.520 ( +1.56%) [+10.35% +2.93% +0.00% / +11.33% +1.56% +5.27%] index_select perm_sorted : Elapsed 0.006 ms (0.565 ms / 100) B = [1, 200, 5] (stride (1, 1, 200)) A = [1, 500, 5] (stride (5, 5, 1)) dim = 1 0.521 -> 0.529 ( +1.54%) [+13.44% +0.00% +9.02% / +4.61% +1.54% +2.30%] index_select const : Elapsed 0.006 ms (0.591 ms / 100) 0.526 -> 0.523 ( -0.57%) [ +2.47% +0.00% +10.65% / +9.89% -0.57% +1.71%] index_select wrap : Elapsed 0.005 ms (0.539 ms / 100) 0.537 -> 0.520 ( -3.17%) [ +0.00% +0.56% +4.28% / +6.33% -3.17% -0.19%] index_select linear : Elapsed 0.005 ms (0.537 ms / 100) 0.538 -> 0.520 ( -3.35%) [ +0.00% +1.67% +8.92% / +1.30% -3.35% -0.19%] index_select reverse : Elapsed 0.005 ms (0.538 ms / 100) 0.526 -> 0.524 ( -0.38%) [ +5.13% +0.00% +9.70% / +23.19% -0.38% +0.95%] index_select skip64 : Elapsed 0.006 ms (0.553 ms / 100) 0.529 -> 0.520 ( -1.70%) [ +2.65% +0.00% +5.86% / +2.65% -1.70% +6.24%] index_select skip256 : Elapsed 0.005 ms (0.543 ms / 100) 0.523 -> 0.521 ( -0.38%) [ +4.21% +1.34% +0.00% / +11.85% -0.38% +14.34%] index_select spread : Elapsed 0.005 ms (0.545 ms / 100) 0.526 -> 0.533 ( +1.33%) [ +3.42% +3.80% +0.00% / +9.89% +5.70% +1.33%] index_select strided 3 : Elapsed 0.005 ms (0.544 ms / 100) 0.525 -> 0.544 ( +3.62%) [ +3.05% +1.14% +0.00% / +4.38% +16.00% +3.62%] index_select strided 5 : Elapsed 0.005 ms (0.541 ms / 100) 0.524 -> 0.537 ( +2.48%) [ +3.82% +0.00% +12.79% / +2.67% +6.49% +2.48%] index_select strided 7 : Elapsed 0.005 ms (0.544 ms / 100) 0.530 -> 0.534 ( +0.75%) [ +2.26% +0.00% +8.49% / +2.64% +8.87% +0.75%] index_select strided 8 : Elapsed 0.005 ms (0.542 ms / 100) 0.523 -> 0.533 ( +1.91%) [ +4.21% +0.00% +11.28% / +4.40% +5.93% +1.91%] index_select strided 16 : Elapsed 0.005 ms (0.545 ms / 100) 0.522 -> 0.545 ( +4.41%) [ +4.21% +5.17% +0.00% / +4.41% +20.11% +11.49%] index_select strided 64 : Elapsed 0.005 ms (0.544 ms / 100) 0.541 -> 0.527 ( -2.59%) [ +0.18% +0.00% +0.55% / -0.18% -1.85% -2.59%] index_select strided 100 : Elapsed 0.005 ms (0.542 ms / 100) 0.528 -> 0.525 ( -0.57%) [ +3.03% +0.00% +10.23% / +3.41% -0.57% +0.19%] index_select strided 255 : Elapsed 0.005 ms (0.544 ms / 100) 0.524 -> 0.525 ( +0.19%) [ +4.77% +0.00% +0.00% / +4.01% +0.19% +1.34%] index_select strided 256 : Elapsed 0.005 ms (0.549 ms / 100) 0.528 -> 0.525 ( -0.57%) [ +6.82% +0.95% +0.00% / +7.39% -0.57% +0.57%] index_select strided 257 : Elapsed 0.006 ms (0.564 ms / 100) 0.520 -> 0.525 ( +0.96%) [ +3.65% +0.00% +7.88% / +25.00% +0.96% +3.08%] index_select random : Elapsed 0.005 ms (0.539 ms / 100) 0.523 -> 0.531 ( +1.53%) [ +2.29% +0.00% +7.65% / +20.84% +6.31% +1.53%] index_select random_sorted : Elapsed 0.005 ms (0.535 ms / 100) 0.531 -> 0.532 ( +0.19%) [ +1.51% +0.00% +7.16% / +7.72% +18.08% +0.19%] index_select perm : Elapsed 0.005 ms (0.539 ms / 100) 0.530 -> 0.522 ( -1.51%) [ +7.92% +0.00% +5.47% / +19.06% -1.51% +0.19%] index_select perm_sorted : Elapsed 0.006 ms (0.572 ms / 100) out_shape = [1, 500, 200] in_shape = [1, 500, 5] idx_dim = 2 B = [1, 500, 200] (stride (100000, 1, 500)) A = [1, 500, 5] (stride (5, 5, 1)) dim = 2 0.576 -> 0.578 ( +0.35%) [ +0.87% +0.00% +0.17% / +1.22% +0.52% +0.35%] index_add_ linear : Elapsed 0.006 ms (0.581 ms / 100) 0.561 -> 0.563 ( +0.36%) [ +0.53% +0.00% +0.00% / +0.53% +0.36% +0.36%] index_copy_ linear : Elapsed 0.006 ms (0.564 ms / 100) 0.575 -> 0.578 ( +0.52%) [ +1.04% +0.00% +0.35% / +0.87% +0.52% +0.70%] index_add_ reverse : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.89% +0.18% +0.00% / +0.89% +0.54% +0.72%] index_copy_ reverse : Elapsed 0.006 ms (0.564 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +1.04% +0.00% +0.52% / +1.22% +0.87% +0.70%] index_add_ spread : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.89% +0.00% +0.18% / +0.89% +1.25% +0.72%] index_copy_ spread : Elapsed 0.006 ms (0.564 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +1.04% +0.17% +0.00% / +1.22% +0.87% +0.87%] index_add_ strided 3 : Elapsed 0.006 ms (0.581 ms / 100) 0.560 -> 0.562 ( +0.36%) [ +0.71% +0.00% +0.00% / +1.25% +0.71% +0.36%] index_copy_ strided 3 : Elapsed 0.006 ms (0.564 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +0.87% +0.00% +0.00% / +1.22% +0.87% +0.70%] index_add_ strided 7 : Elapsed 0.006 ms (0.580 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +0.71% +0.00% +0.00% / +6.43% +0.71% +0.54%] index_copy_ strided 7 : Elapsed 0.006 ms (0.564 ms / 100) 0.576 -> 0.579 ( +0.52%) [ +1.04% +0.00% +0.00% / +1.04% +0.69% +0.52%] index_add_ perm : Elapsed 0.006 ms (0.582 ms / 100) 0.560 -> 0.562 ( +0.36%) [ +2.86% +0.18% +0.00% / +3.93% +0.54% +0.36%] index_copy_ perm : Elapsed 0.006 ms (0.576 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +1.22% +0.00% +0.17% / +0.87% +0.70% +0.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.582 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +3.58% +0.00% +1.25% / +0.72% +1.07% +0.89%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.579 ms / 100) 4.808 -> 4.814 ( +0.12%) [ +0.21% +0.00% +0.10% / +0.25% +0.12% +0.19%] index_select const : Elapsed 0.048 ms (4.818 ms / 100) 4.816 -> 4.807 ( -0.19%) [ +0.10% +0.00% +0.04% / -0.06% +0.04% -0.19%] index_select wrap : Elapsed 0.048 ms (4.821 ms / 100) 4.819 -> 4.810 ( -0.19%) [ +0.17% +0.00% +0.15% / +0.15% -0.19% -0.19%] index_select linear : Elapsed 0.048 ms (4.827 ms / 100) 4.818 -> 4.820 ( +0.04%) [ +0.17% +0.00% +0.02% / +0.04% +0.08% +0.04%] index_select reverse : Elapsed 0.048 ms (4.826 ms / 100) 4.803 -> 4.811 ( +0.17%) [ +0.37% +0.00% +0.44% / +0.17% +0.27% +0.17%] index_select skip64 : Elapsed 0.048 ms (4.821 ms / 100) 4.803 -> 4.810 ( +0.15%) [ +0.33% +0.37% +0.00% / +0.17% +0.15% +0.31%] index_select skip256 : Elapsed 0.048 ms (4.819 ms / 100) 4.815 -> 4.809 ( -0.12%) [ +0.00% +0.15% +0.12% / -0.04% -0.12% -0.10%] index_select spread : Elapsed 0.048 ms (4.815 ms / 100) 4.811 -> 4.813 ( +0.04%) [ +0.00% +0.02% +0.21% / +0.19% +0.15% +0.04%] index_select strided 3 : Elapsed 0.048 ms (4.811 ms / 100) 4.842 -> 4.813 ( -0.60%) [ +0.00% +0.21% +0.12% / +0.25% -0.60% -0.50%] index_select random : Elapsed 0.048 ms (4.842 ms / 100) 4.806 -> 4.819 ( +0.27%) [ +0.29% +0.00% +0.23% / +0.27% +0.56% +0.81%] index_select random_sorted : Elapsed 0.048 ms (4.820 ms / 100) B = [1, 500, 200] (stride (200, 200, 1)) A = [1, 500, 5] (stride (1, 5, 1)) dim = 2 0.576 -> 0.579 ( +0.52%) [ +1.04% +0.00% +0.35% / +0.87% +0.87% +0.52%] index_add_ linear : Elapsed 0.006 ms (0.582 ms / 100) 0.589 -> 0.597 ( +1.36%) [ +1.53% +0.00% +0.34% / +1.70% +1.36% +1.53%] index_copy_ linear : Elapsed 0.006 ms (0.598 ms / 100) 0.576 -> 0.579 ( +0.52%) [ +0.69% +0.00% +0.52% / +0.87% +0.52% +0.87%] index_add_ reverse : Elapsed 0.006 ms (0.580 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +1.02% +0.00% +0.34% / +1.02% +1.36% +1.19%] index_copy_ reverse : Elapsed 0.006 ms (0.595 ms / 100) 0.577 -> 0.579 ( +0.35%) [ +0.87% +0.00% +0.00% / +0.87% +0.35% +0.69%] index_add_ spread : Elapsed 0.006 ms (0.582 ms / 100) 0.591 -> 0.594 ( +0.51%) [ +0.51% +0.00% +4.40% / +0.51% +1.18% +0.51%] index_copy_ spread : Elapsed 0.006 ms (0.594 ms / 100) 0.577 -> 0.581 ( +0.69%) [ +0.69% +0.00% +0.00% / +0.87% +0.69% +0.69%] index_add_ strided 3 : Elapsed 0.006 ms (0.581 ms / 100) 0.590 -> 0.597 ( +1.19%) [ +1.19% +0.00% +0.00% / +1.19% +1.36% +1.19%] index_copy_ strided 3 : Elapsed 0.006 ms (0.597 ms / 100) 0.576 -> 0.580 ( +0.69%) [ +1.04% +0.00% +0.00% / +0.69% +1.22% +0.87%] index_add_ strided 7 : Elapsed 0.006 ms (0.582 ms / 100) 0.591 -> 0.596 ( +0.85%) [ +1.52% +0.85% +0.00% / +1.02% +1.02% +0.85%] index_copy_ strided 7 : Elapsed 0.006 ms (0.600 ms / 100) 0.576 -> 0.580 ( +0.69%) [ +0.87% +0.00% +0.35% / +0.87% +0.69% +0.87%] index_add_ perm : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.597 ( +1.36%) [ +1.53% +0.00% +0.85% / +1.36% +1.36% +1.36%] index_copy_ perm : Elapsed 0.006 ms (0.598 ms / 100) 0.578 -> 0.580 ( +0.35%) [ +0.52% +0.00% +0.00% / +1.56% +0.35% +0.52%] index_add_ perm_sorted : Elapsed 0.006 ms (0.581 ms / 100) 0.590 -> 0.595 ( +0.85%) [ +1.02% +0.00% +0.34% / +0.85% +1.53% +1.02%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.596 ms / 100) good 5.174 -> 4.799 ( -7.25%) [ +0.25% +0.00% +0.35% / -7.17% -7.21% -7.25%] index_select const : Elapsed 0.052 ms (5.187 ms / 100) good 5.140 -> 4.778 ( -7.04%) [ +0.00% +0.08% +0.31% / -6.89% -6.71% -7.04%] index_select wrap : Elapsed 0.051 ms (5.140 ms / 100) good 5.157 -> 4.776 ( -7.39%) [ +0.06% +0.02% +0.00% / -6.94% -7.19% -7.39%] index_select linear : Elapsed 0.052 ms (5.160 ms / 100) good 5.152 -> 4.790 ( -7.03%) [ +0.16% +0.00% +0.23% / -7.03% -6.95% -7.01%] index_select reverse : Elapsed 0.052 ms (5.160 ms / 100) good 5.155 -> 4.781 ( -7.26%) [ +0.00% +0.00% +0.08% / -6.77% -7.26% -7.26%] index_select skip64 : Elapsed 0.052 ms (5.155 ms / 100) good 5.149 -> 4.783 ( -7.11%) [ +0.00% +0.10% +0.17% / -6.86% -6.91% -7.11%] index_select skip256 : Elapsed 0.051 ms (5.149 ms / 100) good 5.158 -> 4.782 ( -7.29%) [ +0.12% +0.00% +0.12% / -6.94% -7.29% -7.19%] index_select spread : Elapsed 0.052 ms (5.164 ms / 100) good 5.145 -> 4.786 ( -6.98%) [ +0.23% +0.23% +0.00% / -6.98% -6.90% -6.78%] index_select strided 3 : Elapsed 0.052 ms (5.157 ms / 100) good 5.206 -> 4.797 ( -7.86%) [ +0.02% +0.00% +0.23% / -7.66% -7.74% -7.86%] index_select random : Elapsed 0.052 ms (5.207 ms / 100) good 5.178 -> 4.795 ( -7.40%) [ +0.15% +0.00% +0.21% / -7.26% -7.40% -7.28%] index_select random_sorted : Elapsed 0.052 ms (5.186 ms / 100) B = [1, 500, 200] (stride (1, 1, 500)) A = [1, 500, 5] (stride (1, 1, 500)) dim = 2 0.573 -> 0.572 ( -0.17%) [ +0.35% +0.00% +0.17% / +0.35% +0.17% -0.17%] index_add_ linear : Elapsed 0.006 ms (0.575 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +0.90% +0.00% +0.36% / +1.08% +0.54% +0.36%] index_copy_ linear : Elapsed 0.006 ms (0.559 ms / 100) 0.572 -> 0.571 ( -0.17%) [ +0.35% +0.00% +0.52% / +0.17% -0.17% +0.00%] index_add_ reverse : Elapsed 0.006 ms (0.574 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +0.72% +0.00% +0.36% / +0.54% +0.54% +0.36%] index_copy_ reverse : Elapsed 0.006 ms (0.558 ms / 100) 0.572 -> 0.572 ( +0.00%) [ +0.70% +0.00% +0.17% / +0.52% +0.00% +0.17%] index_add_ spread : Elapsed 0.006 ms (0.576 ms / 100) 0.555 -> 0.556 ( +0.18%) [ +0.72% +0.00% +0.36% / +0.90% +0.36% +0.18%] index_copy_ spread : Elapsed 0.006 ms (0.559 ms / 100) 0.572 -> 0.573 ( +0.17%) [ +0.35% +0.00% +0.17% / +0.52% +0.17% +0.17%] index_add_ strided 3 : Elapsed 0.006 ms (0.574 ms / 100) 0.555 -> 0.557 ( +0.36%) [ +0.54% +0.00% +0.36% / +1.44% +0.54% +0.36%] index_copy_ strided 3 : Elapsed 0.006 ms (0.558 ms / 100) 0.572 -> 0.573 ( +0.17%) [ +0.35% +0.00% +0.17% / +0.35% +0.17% +0.17%] index_add_ strided 7 : Elapsed 0.006 ms (0.574 ms / 100) 0.555 -> 0.557 ( +0.36%) [ +0.72% +0.90% +0.00% / +0.72% +0.54% +0.36%] index_copy_ strided 7 : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.53% +0.00% +1.23% / +0.70% +0.35% +0.35%] index_add_ perm : Elapsed 0.006 ms (0.574 ms / 100) 0.555 -> 0.557 ( +0.36%) [ +0.72% +0.00% +0.36% / +0.54% +0.36% +0.36%] index_copy_ perm : Elapsed 0.006 ms (0.559 ms / 100) 0.572 -> 0.572 ( +0.00%) [ +0.17% +0.35% +0.00% / +0.52% +0.00% +0.17%] index_add_ perm_sorted : Elapsed 0.006 ms (0.573 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +0.90% +0.00% +0.18% / +0.90% +0.36% +0.54%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.559 ms / 100) 4.795 -> 4.792 ( -0.06%) [ +0.08% +0.00% +0.08% / +0.04% +0.10% -0.06%] index_select const : Elapsed 0.048 ms (4.799 ms / 100) 4.795 -> 4.800 ( +0.10%) [ +0.21% +0.00% +0.15% / +0.10% +0.19% +0.19%] index_select wrap : Elapsed 0.048 ms (4.805 ms / 100) 4.794 -> 4.788 ( -0.13%) [ +0.06% +0.04% +0.00% / +0.04% -0.13% +0.15%] index_select linear : Elapsed 0.048 ms (4.797 ms / 100) 4.800 -> 4.798 ( -0.04%) [ +0.00% +0.02% +0.27% / -0.04% -0.02% +0.00%] index_select reverse : Elapsed 0.048 ms (4.800 ms / 100) 4.793 -> 4.791 ( -0.04%) [ +0.10% +0.00% +0.29% / -0.04% -0.04% -0.02%] index_select skip64 : Elapsed 0.048 ms (4.798 ms / 100) 4.792 -> 4.789 ( -0.06%) [ +0.17% +0.00% +0.04% / -0.06% +0.02% +0.23%] index_select skip256 : Elapsed 0.048 ms (4.800 ms / 100) 4.794 -> 4.777 ( -0.35%) [ +0.08% +0.17% +0.00% / +0.17% -0.17% -0.35%] index_select spread : Elapsed 0.048 ms (4.798 ms / 100) 4.788 -> 4.793 ( +0.10%) [ +0.00% +0.04% +0.23% / +0.33% +0.15% +0.10%] index_select strided 3 : Elapsed 0.048 ms (4.788 ms / 100) 4.827 -> 4.803 ( -0.50%) [ +0.04% +0.00% +0.15% / -0.17% -0.46% -0.50%] index_select random : Elapsed 0.048 ms (4.829 ms / 100) 4.793 -> 4.795 ( +0.04%) [ +0.23% +0.10% +0.00% / +0.04% +0.65% +0.83%] index_select random_sorted : Elapsed 0.048 ms (4.804 ms / 100) out_shape = [200, 1, 500] in_shape = [5, 1, 500] idx_dim = 0 B = [200, 1, 500] (stride (500, 1, 1)) A = [5, 1, 500] (stride (500, 500, 1)) dim = 0 0.571 -> 0.571 ( +0.00%) [ +0.70% +0.00% +0.18% / +0.70% +0.00% +0.00%] index_add_ linear : Elapsed 0.006 ms (0.575 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +0.90% +0.00% +0.72% / +3.25% +0.54% +0.36%] index_copy_ linear : Elapsed 0.006 ms (0.559 ms / 100) 0.569 -> 0.572 ( +0.53%) [ +0.88% +0.53% +0.00% / +0.88% +0.53% +0.53%] index_add_ reverse : Elapsed 0.006 ms (0.574 ms / 100) 0.553 -> 0.555 ( +0.36%) [ +0.72% +0.18% +0.00% / +1.08% +0.54% +0.36%] index_copy_ reverse : Elapsed 0.006 ms (0.557 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +0.53% +0.18% +0.00% / +0.53% +0.18% +0.00%] index_add_ spread : Elapsed 0.006 ms (0.574 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +0.90% +0.00% +0.54% / +0.90% +0.36% +0.36%] index_copy_ spread : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.53% +0.00% +0.00% / +0.35% +1.23% +0.18%] index_add_ strided 3 : Elapsed 0.006 ms (0.574 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +2.17% +0.00% +0.00% / +0.90% +0.54% +0.36%] index_copy_ strided 3 : Elapsed 0.006 ms (0.566 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.53% +0.00% +0.35% / +0.35% +0.35% +0.18%] index_add_ strided 7 : Elapsed 0.006 ms (0.574 ms / 100) 0.554 -> 0.555 ( +0.18%) [ +0.54% +0.00% +1.26% / +1.08% +1.08% +0.18%] index_copy_ strided 7 : Elapsed 0.006 ms (0.557 ms / 100) 0.570 -> 0.572 ( +0.35%) [ +0.53% +0.18% +0.00% / +0.53% +0.35% +0.70%] index_add_ perm : Elapsed 0.006 ms (0.573 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +0.90% +0.00% +0.18% / +0.72% +0.72% +1.99%] index_copy_ perm : Elapsed 0.006 ms (0.558 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.35% +0.00% +0.00% / +0.35% +0.35% +0.18%] index_add_ perm_sorted : Elapsed 0.006 ms (0.573 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +0.54% +0.00% +0.36% / +0.72% +3.07% +0.36%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.557 ms / 100) 4.791 -> 4.783 ( -0.17%) [ +0.23% +0.17% +0.00% / -0.17% +0.06% -0.06%] index_select const : Elapsed 0.048 ms (4.802 ms / 100) 4.797 -> 4.784 ( -0.27%) [ +0.00% +0.06% +0.06% / -0.13% -0.04% -0.27%] index_select wrap : Elapsed 0.048 ms (4.797 ms / 100) 4.796 -> 4.793 ( -0.06%) [ +0.00% +0.13% +0.15% / +0.10% +0.06% -0.06%] index_select linear : Elapsed 0.048 ms (4.796 ms / 100) 4.793 -> 4.802 ( +0.19%) [ +0.27% +0.04% +0.00% / +0.19% +0.25% +0.29%] index_select reverse : Elapsed 0.048 ms (4.806 ms / 100) 4.795 -> 4.789 ( -0.13%) [ +0.10% +0.00% +0.25% / +0.04% +0.04% -0.13%] index_select skip64 : Elapsed 0.048 ms (4.800 ms / 100) 4.784 -> 4.788 ( +0.08%) [ +0.06% +0.17% +0.00% / +0.13% +0.08% +0.46%] index_select skip256 : Elapsed 0.048 ms (4.787 ms / 100) 4.798 -> 4.781 ( -0.35%) [ +0.00% +0.10% +0.04% / +0.02% -0.35% +0.15%] index_select spread : Elapsed 0.048 ms (4.798 ms / 100) 4.788 -> 4.793 ( +0.10%) [ +0.36% +0.00% +0.15% / +0.10% +0.15% +0.10%] index_select strided 3 : Elapsed 0.048 ms (4.805 ms / 100) 4.841 -> 4.788 ( -1.09%) [ +0.00% +0.04% +0.00% / -0.10% -0.87% -1.09%] index_select random : Elapsed 0.048 ms (4.841 ms / 100) 4.796 -> 4.796 ( +0.00%) [ +0.06% +0.00% +0.08% / +0.00% +0.67% +0.65%] index_select random_sorted : Elapsed 0.048 ms (4.799 ms / 100) B = [200, 1, 500] (stride (500, 100000, 1)) A = [5, 1, 500] (stride (1, 5, 5)) dim = 0 0.574 -> 0.578 ( +0.70%) [ +1.05% +0.17% +0.00% / +1.05% +0.70% +0.70%] index_add_ linear : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.54% +0.18% +0.00% / +0.72% +0.89% +0.72%] index_copy_ linear : Elapsed 0.006 ms (0.562 ms / 100) 0.575 -> 0.578 ( +0.52%) [ +0.87% +0.00% +0.00% / +0.70% +0.52% +0.70%] index_add_ reverse : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.18% +0.00% / +7.87% +0.72% +0.89%] index_copy_ reverse : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.578 ( +0.70%) [ +1.05% +0.17% +0.00% / +1.22% +0.87% +0.70%] index_add_ spread : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.90% +0.36% +0.00% / +0.90% +0.90% +0.72%] index_copy_ spread : Elapsed 0.006 ms (0.563 ms / 100) 0.573 -> 0.577 ( +0.70%) [ +1.05% +0.17% +0.00% / +0.70% +1.22% +1.05%] index_add_ strided 3 : Elapsed 0.006 ms (0.579 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.00% +11.27% / +0.89% +1.07% +0.72%] index_copy_ strided 3 : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +1.05% +0.00% +0.17% / +0.87% +0.87% +0.87%] index_add_ strided 7 : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +1.08% +0.36% +0.00% / +0.72% +1.08% +1.08%] index_copy_ strided 7 : Elapsed 0.006 ms (0.564 ms / 100) 0.574 -> 0.578 ( +0.70%) [ +0.70% +0.17% +0.00% / +0.70% +1.22% +2.44%] index_add_ perm : Elapsed 0.006 ms (0.578 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +0.54% +0.00% +0.18% / +0.54% +0.71% +5.00%] index_copy_ perm : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +0.70% +0.00% +0.00% / +0.70% +0.87% +0.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.579 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.72% +0.00% +0.00% / +0.54% +0.89% +0.89%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.563 ms / 100) 4.812 -> 4.819 ( +0.15%) [ +0.17% +0.00% +0.33% / +0.15% +0.81% +0.71%] index_select const : Elapsed 0.048 ms (4.820 ms / 100) 4.806 -> 4.810 ( +0.08%) [ +0.27% +0.00% +0.35% / +0.08% +0.10% +0.23%] index_select wrap : Elapsed 0.048 ms (4.819 ms / 100) 4.809 -> 4.809 ( +0.00%) [ +0.40% +0.00% +0.23% / +0.46% +0.00% +0.02%] index_select linear : Elapsed 0.048 ms (4.828 ms / 100) 4.817 -> 4.811 ( -0.12%) [ +0.06% +0.08% +0.00% / +0.04% -0.12% +0.10%] index_select reverse : Elapsed 0.048 ms (4.820 ms / 100) 4.811 -> 4.815 ( +0.08%) [ +0.21% +0.00% +0.33% / +0.21% +0.12% +0.08%] index_select skip64 : Elapsed 0.048 ms (4.821 ms / 100) 4.818 -> 4.811 ( -0.15%) [ +0.10% +0.00% +0.12% / +0.04% +0.00% -0.15%] index_select skip256 : Elapsed 0.048 ms (4.823 ms / 100) 4.824 -> 4.816 ( -0.17%) [ +0.10% +0.00% +0.08% / +0.06% -0.17% -0.04%] index_select spread : Elapsed 0.048 ms (4.829 ms / 100) 4.815 -> 4.815 ( +0.00%) [ +0.08% +0.08% +0.00% / +0.08% +0.00% +0.02%] index_select strided 3 : Elapsed 0.048 ms (4.819 ms / 100) 4.843 -> 4.823 ( -0.41%) [ +0.17% +0.00% +0.06% / +0.25% -0.37% -0.41%] index_select random : Elapsed 0.049 ms (4.851 ms / 100) 4.816 -> 4.826 ( +0.21%) [ +0.21% +0.02% +0.00% / +0.21% +0.58% +0.52%] index_select random_sorted : Elapsed 0.048 ms (4.826 ms / 100) B = [200, 1, 500] (stride (1, 200, 200)) A = [5, 1, 500] (stride (500, 2500, 1)) dim = 0 0.571 -> 0.572 ( +0.18%) [ +0.53% +0.00% +0.18% / +0.53% +0.18% +0.18%] index_add_ linear : Elapsed 0.006 ms (0.574 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +1.19% +0.17% +0.00% / +1.54% +0.51% +0.51%] index_copy_ linear : Elapsed 0.006 ms (0.593 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.53% +0.00% +0.00% / +0.35% +0.35% +0.88%] index_add_ reverse : Elapsed 0.006 ms (0.574 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +0.85% +0.00% +0.00% / +1.02% +0.51% +0.51%] index_copy_ reverse : Elapsed 0.006 ms (0.591 ms / 100) 0.569 -> 0.572 ( +0.53%) [ +0.88% +0.53% +0.00% / +0.88% +0.53% +0.53%] index_add_ spread : Elapsed 0.006 ms (0.574 ms / 100) 0.585 -> 0.588 ( +0.51%) [ +1.20% +0.00% +0.34% / +7.18% +0.68% +0.51%] index_copy_ spread : Elapsed 0.006 ms (0.592 ms / 100) 0.570 -> 0.572 ( +0.35%) [ +0.53% +0.35% +0.00% / +0.53% +0.35% +0.53%] index_add_ strided 3 : Elapsed 0.006 ms (0.573 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +0.85% +0.00% +0.00% / +0.85% +0.51% +0.85%] index_copy_ strided 3 : Elapsed 0.006 ms (0.591 ms / 100) 0.570 -> 0.572 ( +0.35%) [ +0.70% +0.00% +0.18% / +0.53% +0.53% +0.35%] index_add_ strided 7 : Elapsed 0.006 ms (0.574 ms / 100) 0.585 -> 0.589 ( +0.68%) [ +1.03% +0.00% +0.17% / +0.85% +0.85% +0.68%] index_copy_ strided 7 : Elapsed 0.006 ms (0.591 ms / 100) 0.570 -> 0.571 ( +0.18%) [ +0.70% +0.00% +0.18% / +0.35% +0.18% +0.35%] index_add_ perm : Elapsed 0.006 ms (0.574 ms / 100) 0.586 -> 0.590 ( +0.68%) [ +0.85% +0.00% +0.00% / +0.68% +0.68% +0.68%] index_copy_ perm : Elapsed 0.006 ms (0.591 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.35% +0.00% +0.00% / +2.10% +0.18% +0.18%] index_add_ perm_sorted : Elapsed 0.006 ms (0.573 ms / 100) 0.585 -> 0.590 ( +0.85%) [ +0.68% +0.17% +0.00% / +0.85% +0.85% +0.85%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.589 ms / 100) good 5.059 -> 4.781 ( -5.50%) [ +0.14% +0.00% +0.02% / -5.24% -5.24% -5.50%] index_select const : Elapsed 0.051 ms (5.066 ms / 100) 5.069 -> 4.816 ( -4.99%) [ +0.16% +0.14% +0.00% / -4.18% -4.99% -4.68%] index_select wrap : Elapsed 0.051 ms (5.077 ms / 100) good 5.068 -> 4.799 ( -5.31%) [ +0.16% +0.28% +0.00% / -5.17% -5.31% -5.25%] index_select linear : Elapsed 0.051 ms (5.076 ms / 100) good 5.059 -> 4.798 ( -5.16%) [ +0.32% +0.00% +0.32% / -5.16% -4.76% -4.96%] index_select reverse : Elapsed 0.051 ms (5.075 ms / 100) good 5.056 -> 4.793 ( -5.20%) [ +0.14% +0.04% +0.00% / -5.12% -5.14% -5.20%] index_select skip64 : Elapsed 0.051 ms (5.063 ms / 100) good 5.069 -> 4.768 ( -5.94%) [ +0.00% +0.10% +0.02% / -5.94% -5.37% -5.33%] index_select skip256 : Elapsed 0.051 ms (5.069 ms / 100) good 5.060 -> 4.787 ( -5.40%) [ +0.10% +0.04% +0.00% / -5.16% -5.40% -5.32%] index_select spread : Elapsed 0.051 ms (5.065 ms / 100) 5.071 -> 4.832 ( -4.71%) [ +0.00% +0.14% +0.12% / -4.69% -4.58% -4.71%] index_select strided 3 : Elapsed 0.051 ms (5.071 ms / 100) good 5.099 -> 4.810 ( -5.67%) [ +0.00% +0.00% +0.06% / -5.10% -5.59% -5.67%] index_select random : Elapsed 0.051 ms (5.099 ms / 100) 5.043 -> 4.810 ( -4.62%) [ +0.30% +0.00% +0.30% / -4.62% -4.50% -4.48%] index_select random_sorted : Elapsed 0.051 ms (5.058 ms / 100) out_shape = [5, 200, 500] in_shape = [5, 1, 500] idx_dim = 1 B = [5, 200, 500] (stride (100000, 500, 1)) A = [5, 1, 500] (stride (500, 500, 1)) dim = 1 0.502 -> 0.489 ( -2.59%) [ +2.39% +2.99% +0.00% / +4.38% -2.59% -1.00%] index_add_ linear : Elapsed 0.005 ms (0.514 ms / 100) 0.496 -> 0.486 ( -2.02%) [ +1.21% +1.61% +0.00% / +16.33% -2.02% -0.20%] index_copy_ linear : Elapsed 0.005 ms (0.502 ms / 100) 0.501 -> 0.490 ( -2.20%) [ +1.40% +6.19% +0.00% / +5.79% -2.20% +9.98%] index_add_ reverse : Elapsed 0.005 ms (0.508 ms / 100) 0.496 -> 0.488 ( -1.61%) [+11.49% +7.86% +0.00% / +1.81% -1.61% -0.40%] index_copy_ reverse : Elapsed 0.006 ms (0.553 ms / 100) good 0.536 -> 0.507 ( -5.41%) [ +0.00% +0.19% +1.87% / -1.12% +2.80% -5.41%] index_add_ spread : Elapsed 0.005 ms (0.536 ms / 100) 0.492 -> 0.487 ( -1.02%) [ +5.28% +4.47% +0.00% / +1.22% -1.02% +1.22%] index_copy_ spread : Elapsed 0.005 ms (0.518 ms / 100) 0.513 -> 0.499 ( -2.73%) [ +5.26% +0.00% +16.57% / -2.73% -2.14% -2.14%] index_add_ strided 3 : Elapsed 0.005 ms (0.540 ms / 100) 0.491 -> 0.489 ( -0.41%) [ +5.50% +3.46% +0.00% / +9.78% -0.41% +0.00%] index_copy_ strided 3 : Elapsed 0.005 ms (0.518 ms / 100) 0.501 -> 0.504 ( +0.60%) [ +0.00% +1.40% +0.80% / +9.98% +3.79% +0.60%] index_add_ strided 7 : Elapsed 0.005 ms (0.501 ms / 100) 0.496 -> 0.499 ( +0.60%) [ +0.81% +7.66% +0.00% / +0.60% +0.81% +8.27%] index_copy_ strided 7 : Elapsed 0.005 ms (0.500 ms / 100) 0.501 -> 0.489 ( -2.40%) [ +4.59% +2.99% +0.00% / +0.60% -2.40% +10.78%] index_add_ perm : Elapsed 0.005 ms (0.524 ms / 100) 0.513 -> 0.489 ( -4.68%) [ +1.75% +1.17% +0.00% / -2.92% -4.68% -2.34%] index_copy_ perm : Elapsed 0.005 ms (0.522 ms / 100) 0.496 -> 0.493 ( -0.60%) [ +9.27% +9.48% +0.00% / +1.41% -0.60% -0.20%] index_add_ perm_sorted : Elapsed 0.005 ms (0.542 ms / 100) 0.499 -> 0.489 ( -2.00%) [ +0.00% +0.00% +10.02% / +0.40% +4.61% -2.00%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.499 ms / 100) 7.915 -> 7.925 ( +0.13%) [ +0.18% +0.00% +0.11% / +0.13% +0.73% +0.82%] index_select const : Elapsed 0.079 ms (7.929 ms / 100) 7.905 -> 7.919 ( +0.18%) [ +0.28% +0.05% +0.00% / +0.18% +0.40% +0.49%] index_select wrap : Elapsed 0.079 ms (7.927 ms / 100) 7.913 -> 7.921 ( +0.10%) [ +0.14% +0.00% +0.05% / +0.10% +0.44% +0.24%] index_select linear : Elapsed 0.079 ms (7.924 ms / 100) 7.910 -> 7.929 ( +0.24%) [ +0.16% +0.00% +0.24% / +0.24% +0.63% +0.68%] index_select reverse : Elapsed 0.079 ms (7.923 ms / 100) 7.911 -> 7.945 ( +0.43%) [ +0.09% +0.06% +0.00% / +0.43% +0.48% +0.51%] index_select skip64 : Elapsed 0.079 ms (7.918 ms / 100) 7.919 -> 7.931 ( +0.15%) [ +0.00% +0.03% +0.18% / +0.33% +0.15% +0.42%] index_select skip256 : Elapsed 0.079 ms (7.919 ms / 100) 7.911 -> 7.919 ( +0.10%) [ +0.29% +0.00% +0.08% / +0.39% +0.35% +0.10%] index_select spread : Elapsed 0.079 ms (7.934 ms / 100) 7.906 -> 7.943 ( +0.47%) [ +0.16% +0.04% +0.00% / +0.48% +0.62% +0.47%] index_select random : Elapsed 0.079 ms (7.919 ms / 100) 7.896 -> 7.941 ( +0.57%) [ +0.11% +0.00% +0.49% / +0.68% +0.86% +0.57%] index_select random_sorted : Elapsed 0.079 ms (7.905 ms / 100) B = [5, 200, 500] (stride (100000, 500, 1)) A = [5, 1, 500] (stride (500, 1, 1)) dim = 1 0.500 -> 0.497 ( -0.60%) [ +7.40% +2.40% +0.00% / +24.00% +6.20% -0.60%] index_add_ linear : Elapsed 0.005 ms (0.537 ms / 100) 0.498 -> 0.487 ( -2.21%) [ +1.61% +8.84% +0.00% / +4.02% -2.21% -1.20%] index_copy_ linear : Elapsed 0.005 ms (0.506 ms / 100) 0.495 -> 0.484 ( -2.22%) [ +0.00% +9.29% +2.22% / +11.11% -2.22% +1.21%] index_add_ reverse : Elapsed 0.005 ms (0.495 ms / 100) 0.496 -> 0.485 ( -2.22%) [ +1.01% +1.81% +0.00% / +1.81% -2.22% -0.81%] index_copy_ reverse : Elapsed 0.005 ms (0.501 ms / 100) 0.500 -> 0.500 ( +0.00%) [ +3.20% +2.40% +0.00% / +0.80% +8.00% +0.00%] index_add_ spread : Elapsed 0.005 ms (0.516 ms / 100) 0.493 -> 0.491 ( -0.41%) [ +4.87% +2.64% +0.00% / +1.62% -0.41% +6.69%] index_copy_ spread : Elapsed 0.005 ms (0.517 ms / 100) 0.501 -> 0.497 ( -0.80%) [ +8.18% +2.40% +0.00% / +0.60% -0.80% +10.38%] index_add_ strided 3 : Elapsed 0.005 ms (0.542 ms / 100) 0.495 -> 0.489 ( -1.21%) [ +2.42% +1.82% +0.00% / +2.42% -1.21% -0.40%] index_copy_ strided 3 : Elapsed 0.005 ms (0.507 ms / 100) 0.500 -> 0.499 ( -0.20%) [ +0.00% +3.40% +0.20% / +7.80% -0.20% +0.20%] index_add_ strided 7 : Elapsed 0.005 ms (0.500 ms / 100) 0.493 -> 0.500 ( +1.42%) [ +4.46% +8.52% +0.00% / +2.43% +3.45% +1.42%] index_copy_ strided 7 : Elapsed 0.005 ms (0.515 ms / 100) 0.499 -> 0.501 ( +0.40%) [ +0.00% +12.22% +4.21% / +0.60% +2.20% +0.40%] index_add_ perm : Elapsed 0.005 ms (0.499 ms / 100) 0.493 -> 0.496 ( +0.61%) [+13.18% +2.43% +0.00% / +0.61% +3.85% +1.42%] index_copy_ perm : Elapsed 0.006 ms (0.558 ms / 100) 0.500 -> 0.498 ( -0.40%) [ +0.20% +1.80% +0.00% / +2.20% +6.20% -0.40%] index_add_ perm_sorted : Elapsed 0.005 ms (0.501 ms / 100) 0.497 -> 0.488 ( -1.81%) [ +4.02% +2.62% +0.00% / +0.00% -1.81% +10.26%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.517 ms / 100) 7.880 -> 7.886 ( +0.08%) [ +0.33% +0.00% +0.27% / +0.08% +0.89% +0.82%] index_select const : Elapsed 0.079 ms (7.906 ms / 100) 7.869 -> 7.901 ( +0.41%) [ +0.33% +0.08% +0.00% / +0.41% +1.03% +1.08%] index_select wrap : Elapsed 0.079 ms (7.895 ms / 100) 7.873 -> 7.902 ( +0.37%) [ +0.60% +0.14% +0.00% / +0.37% +0.70% +0.83%] index_select linear : Elapsed 0.079 ms (7.920 ms / 100) 7.917 -> 7.925 ( +0.10%) [ +0.27% +0.06% +0.00% / +0.10% +0.76% +0.75%] index_select reverse : Elapsed 0.079 ms (7.938 ms / 100) 7.879 -> 7.901 ( +0.28%) [ +0.14% +0.00% +0.34% / +0.28% +0.84% +0.94%] index_select skip64 : Elapsed 0.079 ms (7.890 ms / 100) 7.885 -> 7.906 ( +0.27%) [ +0.14% +0.00% +0.08% / +0.27% +0.57% +0.57%] index_select skip256 : Elapsed 0.079 ms (7.896 ms / 100) 7.888 -> 7.915 ( +0.34%) [ +0.34% +0.00% +0.15% / +0.34% +0.66% +0.72%] index_select spread : Elapsed 0.079 ms (7.915 ms / 100) 7.876 -> 7.891 ( +0.19%) [ +0.29% +0.24% +0.00% / +0.19% +1.02% +0.76%] index_select random : Elapsed 0.079 ms (7.899 ms / 100) 7.873 -> 7.899 ( +0.33%) [ +0.37% +0.00% +0.29% / +0.33% +0.72% +0.88%] index_select random_sorted : Elapsed 0.079 ms (7.902 ms / 100) B = [5, 200, 500] (stride (100000, 1, 200)) A = [5, 1, 500] (stride (500, 2500, 1)) dim = 1 0.518 -> 0.522 ( +0.77%) [ +0.00% +0.39% +2.51% / +0.97% +0.77% +1.35%] index_add_ linear : Elapsed 0.005 ms (0.518 ms / 100) 0.504 -> 0.488 ( -3.17%) [ +0.20% +0.00% +4.17% / -0.60% -3.17% -2.58%] index_copy_ linear : Elapsed 0.005 ms (0.505 ms / 100) 0.525 -> 0.521 ( -0.76%) [ +0.19% +0.00% +0.57% / +1.52% +4.19% -0.76%] index_add_ reverse : Elapsed 0.005 ms (0.526 ms / 100) 0.498 -> 0.486 ( -2.41%) [ +0.40% +0.80% +0.00% / +2.61% -2.41% +5.02%] index_copy_ reverse : Elapsed 0.005 ms (0.500 ms / 100) 0.525 -> 0.524 ( -0.19%) [ +2.67% +0.00% +0.38% / +7.05% +0.38% -0.19%] index_add_ spread : Elapsed 0.005 ms (0.539 ms / 100) 0.497 -> 0.498 ( +0.20%) [ +3.62% +3.82% +0.00% / +1.41% +2.82% +0.20%] index_copy_ spread : Elapsed 0.005 ms (0.515 ms / 100) 0.528 -> 0.519 ( -1.70%) [ +0.00% +0.00% +0.00% / +0.00% -1.70% -0.57%] index_add_ strided 3 : Elapsed 0.005 ms (0.528 ms / 100) 0.499 -> 0.494 ( -1.00%) [ +2.61% +1.60% +0.00% / +1.00% -1.00% +4.81%] index_copy_ strided 3 : Elapsed 0.005 ms (0.512 ms / 100) 0.526 -> 0.521 ( -0.95%) [ +0.57% +0.57% +0.00% / +0.38% -0.57% -0.95%] index_add_ strided 7 : Elapsed 0.005 ms (0.529 ms / 100) 0.495 -> 0.496 ( +0.20%) [ +2.22% +2.22% +0.00% / +2.02% +3.23% +0.20%] index_copy_ strided 7 : Elapsed 0.005 ms (0.506 ms / 100) bad 0.507 -> 0.534 ( +5.33%) [ +0.59% +6.90% +0.00% / +6.51% +5.33% +7.50%] index_add_ perm : Elapsed 0.005 ms (0.510 ms / 100) 0.498 -> 0.506 ( +1.61%) [ +1.61% +2.21% +0.00% / +2.61% +2.21% +1.61%] index_copy_ perm : Elapsed 0.005 ms (0.506 ms / 100) 0.515 -> 0.514 ( -0.19%) [ +0.00% +0.58% +0.97% / -0.19% +2.33% +0.97%] index_add_ perm_sorted : Elapsed 0.005 ms (0.515 ms / 100) 0.502 -> 0.489 ( -2.59%) [ +0.00% +1.00% +3.98% / +0.20% -2.59% -0.20%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.502 ms / 100) GOOD 10.489 -> 8.135 (-22.44%) [ +0.07% +0.00% +0.77% / -22.44% -22.05% -21.89%] index_select const : Elapsed 0.105 ms (10.496 ms / 100) GOOD 10.388 -> 7.997 (-23.02%) [ +1.21% +0.83% +0.00% / -23.02% -21.93% -21.99%] index_select wrap : Elapsed 0.105 ms (10.514 ms / 100) GOOD 10.453 -> 8.130 (-22.22%) [ +0.51% +0.00% +0.67% / -22.22% -21.60% -21.71%] index_select linear : Elapsed 0.105 ms (10.506 ms / 100) GOOD 10.421 -> 8.124 (-22.04%) [ +0.95% +0.00% +0.93% / -20.98% -22.04% -21.97%] index_select reverse : Elapsed 0.105 ms (10.520 ms / 100) GOOD 10.274 -> 8.077 (-21.38%) [ +0.69% +0.89% +0.00% / -20.35% -21.38% -21.34%] index_select skip64 : Elapsed 0.103 ms (10.345 ms / 100) GOOD 10.527 -> 8.049 (-23.54%) [ +0.19% +0.47% +0.00% / -23.54% -22.27% -22.18%] index_select skip256 : Elapsed 0.105 ms (10.547 ms / 100) GOOD 10.434 -> 8.087 (-22.49%) [ +0.00% +1.14% +0.48% / -22.49% -21.74% -21.75%] index_select spread : Elapsed 0.104 ms (10.434 ms / 100) GOOD 10.435 -> 8.030 (-23.05%) [ +0.35% +0.81% +0.00% / -21.45% -23.05% -22.83%] index_select random : Elapsed 0.105 ms (10.472 ms / 100) GOOD 10.449 -> 8.140 (-22.10%) [ +0.25% +0.00% +0.69% / -21.49% -22.10% -21.99%] index_select random_sorted : Elapsed 0.105 ms (10.475 ms / 100) B = [5, 200, 500] (stride (1, 2500, 5)) A = [5, 1, 500] (stride (500, 500, 1)) dim = 1 0.500 -> 0.490 ( -2.00%) [ +6.20% +2.20% +0.00% / +2.60% -2.00% +9.80%] index_add_ linear : Elapsed 0.005 ms (0.531 ms / 100) 0.499 -> 0.487 ( -2.40%) [ +6.41% +0.60% +0.00% / +5.01% -2.40% +1.80%] index_copy_ linear : Elapsed 0.005 ms (0.531 ms / 100) 0.502 -> 0.486 ( -3.19%) [ +3.39% +1.39% +0.00% / +6.77% -3.19% -0.20%] index_add_ reverse : Elapsed 0.005 ms (0.519 ms / 100) 0.499 -> 0.488 ( -2.20%) [ +4.61% +0.40% +0.00% / +1.80% -2.20% -1.00%] index_copy_ reverse : Elapsed 0.005 ms (0.522 ms / 100) 0.507 -> 0.489 ( -3.55%) [ +5.33% +1.18% +0.00% / +2.76% -3.55% -1.78%] index_add_ spread : Elapsed 0.005 ms (0.534 ms / 100) 0.496 -> 0.489 ( -1.41%) [ +4.03% +1.01% +0.00% / +3.83% -1.41% +5.04%] index_copy_ spread : Elapsed 0.005 ms (0.516 ms / 100) 0.508 -> 0.487 ( -4.13%) [ +6.89% +0.00% +5.51% / +5.51% -4.13% -2.36%] index_add_ strided 3 : Elapsed 0.005 ms (0.543 ms / 100) 0.504 -> 0.489 ( -2.98%) [ +5.36% +0.00% +4.17% / +0.79% -2.98% -0.40%] index_copy_ strided 3 : Elapsed 0.005 ms (0.531 ms / 100) good 0.519 -> 0.481 ( -7.32%) [ +3.28% +0.00% +5.97% / +4.62% -7.32% -4.62%] index_add_ strided 7 : Elapsed 0.005 ms (0.536 ms / 100) 0.494 -> 0.483 ( -2.23%) [ +3.44% +62.55% +0.00% / +1.21% -2.23% -0.61%] index_copy_ strided 7 : Elapsed 0.005 ms (0.511 ms / 100) 0.509 -> 0.488 ( -4.13%) [ +4.72% +0.00% +19.06% / -1.18% -4.13% -2.55%] index_add_ perm : Elapsed 0.005 ms (0.533 ms / 100) 0.501 -> 0.487 ( -2.79%) [ +2.00% +3.39% +0.00% / +2.20% -2.79% -1.20%] index_copy_ perm : Elapsed 0.005 ms (0.511 ms / 100) good 0.529 -> 0.490 ( -7.37%) [ +2.84% +0.00% +13.23% / -2.65% -7.37% -5.29%] index_add_ perm_sorted : Elapsed 0.005 ms (0.544 ms / 100) 0.498 -> 0.484 ( -2.81%) [ +1.41% +5.42% +0.00% / +7.83% -2.81% -0.40%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.505 ms / 100) 8.287 -> 8.296 ( +0.11%) [ +0.22% +0.23% +0.00% / +0.45% +0.11% +0.11%] index_select const : Elapsed 0.083 ms (8.305 ms / 100) 8.270 -> 8.285 ( +0.18%) [ +0.50% +0.00% +0.44% / +0.18% +0.68% +0.79%] index_select wrap : Elapsed 0.083 ms (8.311 ms / 100) 8.332 -> 8.265 ( -0.80%) [ +0.20% +0.00% +0.00% / -0.19% -0.60% -0.80%] index_select linear : Elapsed 0.083 ms (8.349 ms / 100) 8.349 -> 8.301 ( -0.57%) [ +0.00% +0.74% +0.90% / +0.06% -0.54% -0.57%] index_select reverse : Elapsed 0.083 ms (8.349 ms / 100) 8.278 -> 8.296 ( +0.22%) [ +0.39% +0.00% +0.01% / +0.22% +0.42% +0.36%] index_select skip64 : Elapsed 0.083 ms (8.310 ms / 100) 8.298 -> 8.300 ( +0.02%) [ +0.04% +0.00% +0.24% / +0.02% +0.10% +0.33%] index_select skip256 : Elapsed 0.083 ms (8.301 ms / 100) 8.321 -> 8.260 ( -0.73%) [ +0.41% +0.00% +0.41% / +0.07% -0.73% -0.08%] index_select spread : Elapsed 0.084 ms (8.355 ms / 100) 8.240 -> 8.288 ( +0.58%) [ +0.92% +0.72% +0.00% / +0.58% +0.93% +0.63%] index_select random : Elapsed 0.083 ms (8.316 ms / 100) 8.311 -> 8.281 ( -0.36%) [ +0.36% +0.00% +0.61% / +0.06% -0.01% -0.36%] index_select random_sorted : Elapsed 0.083 ms (8.341 ms / 100) B = [5, 200, 500] (stride (1, 2500, 5)) A = [5, 1, 500] (stride (1, 2500, 5)) dim = 1 good 0.518 -> 0.490 ( -5.41%) [+36.29% +0.00% +15.83% / -2.51% -5.41% -3.28%] index_add_ linear : Elapsed 0.007 ms (0.706 ms / 100) 0.492 -> 0.485 ( -1.42%) [ +5.28% +7.72% +0.00% / +2.44% -1.42% +1.02%] index_copy_ linear : Elapsed 0.005 ms (0.518 ms / 100) 0.510 -> 0.489 ( -4.12%) [+17.25% +0.00% +17.25% / -0.59% -4.12% -2.16%] index_add_ reverse : Elapsed 0.006 ms (0.598 ms / 100) 0.495 -> 0.482 ( -2.63%) [ +8.28% +5.86% +0.00% / +1.82% -2.63% -0.61%] index_copy_ reverse : Elapsed 0.005 ms (0.536 ms / 100) good 0.518 -> 0.488 ( -5.79%) [+16.60% +0.00% +15.06% / -2.51% -5.79% -4.44%] index_add_ spread : Elapsed 0.006 ms (0.604 ms / 100) 0.498 -> 0.482 ( -3.21%) [ +4.82% +5.42% +0.00% / +0.60% -3.21% +10.84%] index_copy_ spread : Elapsed 0.005 ms (0.522 ms / 100) 0.508 -> 0.490 ( -3.54%) [+18.11% +0.00% +17.52% / -1.57% -3.54% -1.97%] index_add_ strided 3 : Elapsed 0.006 ms (0.600 ms / 100) 0.499 -> 0.488 ( -2.20%) [ +4.21% +0.00% +1.20% / +0.40% -2.20% +0.00%] index_copy_ strided 3 : Elapsed 0.005 ms (0.520 ms / 100) 0.510 -> 0.488 ( -4.31%) [+18.82% +0.00% +18.24% / -1.18% -4.31% -2.94%] index_add_ strided 7 : Elapsed 0.006 ms (0.606 ms / 100) 0.511 -> 0.488 ( -4.50%) [ +8.81% +0.98% +0.00% / -2.35% -4.50% -3.52%] index_copy_ strided 7 : Elapsed 0.006 ms (0.556 ms / 100) good 0.517 -> 0.483 ( -6.58%) [+20.70% +0.00% +16.63% / -1.55% -6.58% -3.29%] index_add_ perm : Elapsed 0.006 ms (0.624 ms / 100) 0.503 -> 0.483 ( -3.98%) [ +0.00% +2.39% +2.39% / -0.99% -3.98% -1.19%] index_copy_ perm : Elapsed 0.005 ms (0.503 ms / 100) 0.498 -> 0.484 ( -2.81%) [ +0.00% +6.22% +21.29% / +1.61% -2.81% -1.20%] index_add_ perm_sorted : Elapsed 0.005 ms (0.498 ms / 100) 0.495 -> 0.483 ( -2.42%) [ +1.82% +4.44% +0.00% / +1.01% -2.42% +0.40%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.504 ms / 100) 9.020 -> 8.875 ( -1.61%) [ +0.76% +0.00% +0.58% / -0.60% -1.61% -1.52%] index_select const : Elapsed 0.091 ms (9.089 ms / 100) 9.077 -> 8.917 ( -1.76%) [ +0.73% +0.00% +0.00% / +0.08% -1.76% -1.61%] index_select wrap : Elapsed 0.091 ms (9.143 ms / 100) 9.040 -> 8.852 ( -2.08%) [ +0.00% +0.58% +1.44% / -0.66% -1.63% -2.08%] index_select linear : Elapsed 0.090 ms (9.040 ms / 100) 9.048 -> 8.868 ( -1.99%) [ +0.00% +0.53% +0.35% / -0.39% -1.99% -1.87%] index_select reverse : Elapsed 0.090 ms (9.048 ms / 100) 8.899 -> 8.836 ( -0.71%) [ +0.00% +0.60% +0.28% / +0.47% -0.71% +0.06%] index_select skip64 : Elapsed 0.089 ms (8.899 ms / 100) 9.076 -> 8.829 ( -2.72%) [ +0.54% +0.00% +0.04% / -0.28% -2.72% -2.37%] index_select skip256 : Elapsed 0.091 ms (9.125 ms / 100) 9.010 -> 8.775 ( -2.61%) [ +0.20% +0.00% +0.10% / +0.38% -2.61% -1.91%] index_select spread : Elapsed 0.090 ms (9.028 ms / 100) 8.903 -> 8.859 ( -0.49%) [ +0.60% +0.00% +1.17% / +0.82% -0.49% -0.25%] index_select random : Elapsed 0.090 ms (8.956 ms / 100) 9.006 -> 8.866 ( -1.55%) [ +0.00% +0.19% +0.26% / +0.93% -1.55% -0.62%] index_select random_sorted : Elapsed 0.090 ms (9.006 ms / 100) B = [5, 200, 500] (stride (1, 5, 1000)) A = [5, 1, 500] (stride (1, 1, 5)) dim = 1 0.499 -> 0.489 ( -2.00%) [ +0.00% +3.21% +20.04% / +1.00% -2.00% +0.00%] index_add_ linear : Elapsed 0.005 ms (0.499 ms / 100) 0.498 -> 0.484 ( -2.81%) [ +1.61% +8.84% +0.00% / +0.60% -2.81% +5.22%] index_copy_ linear : Elapsed 0.005 ms (0.506 ms / 100) 0.501 -> 0.498 ( -0.60%) [ +0.00% +2.00% +18.96% / +2.00% -0.60% +4.79%] index_add_ reverse : Elapsed 0.005 ms (0.501 ms / 100) 0.500 -> 0.493 ( -1.40%) [ +0.00% +5.00% +2.20% / +0.40% +0.20% -1.40%] index_copy_ reverse : Elapsed 0.005 ms (0.500 ms / 100) 0.510 -> 0.485 ( -4.90%) [ +0.00% +3.33% +17.84% / -1.37% -4.90% +7.25%] index_add_ spread : Elapsed 0.005 ms (0.510 ms / 100) 0.503 -> 0.483 ( -3.98%) [ +0.00% +2.39% +1.39% / +0.00% -3.98% -0.80%] index_copy_ spread : Elapsed 0.005 ms (0.503 ms / 100) good 0.513 -> 0.481 ( -6.24%) [ +4.87% +0.00% +16.76% / -1.75% -6.24% -3.12%] index_add_ strided 3 : Elapsed 0.005 ms (0.538 ms / 100) 0.510 -> 0.492 ( -3.53%) [ +1.76% +0.00% +0.00% / +5.88% -3.53% -2.35%] index_copy_ strided 3 : Elapsed 0.005 ms (0.519 ms / 100) 0.508 -> 0.488 ( -3.94%) [ +7.68% +0.00% +17.32% / +3.15% -3.94% -2.17%] index_add_ strided 7 : Elapsed 0.005 ms (0.547 ms / 100) 0.496 -> 0.485 ( -2.22%) [ +2.02% +0.00% +0.20% / +5.24% -2.22% -1.01%] index_copy_ strided 7 : Elapsed 0.005 ms (0.506 ms / 100) 0.506 -> 0.489 ( -3.36%) [ +0.00% +1.38% +17.39% / +6.32% -3.36% -2.57%] index_add_ perm : Elapsed 0.005 ms (0.506 ms / 100) 0.508 -> 0.489 ( -3.74%) [ +0.00% +0.00% +7.28% / +0.39% -3.74% -2.36%] index_copy_ perm : Elapsed 0.005 ms (0.508 ms / 100) 0.498 -> 0.487 ( -2.21%) [ +0.00% +1.81% +19.88% / +1.81% -2.21% +2.21%] index_add_ perm_sorted : Elapsed 0.005 ms (0.498 ms / 100) 0.499 -> 0.482 ( -3.41%) [ +0.40% +0.20% +0.00% / +1.80% -3.41% -1.60%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.501 ms / 100) 9.793 -> 9.714 ( -0.81%) [ +0.10% +0.31% +0.00% / +0.25% -0.81% -0.76%] index_select const : Elapsed 0.098 ms (9.803 ms / 100) 9.724 -> 9.657 ( -0.69%) [ +0.31% +0.09% +0.00% / +0.07% -0.69% -0.58%] index_select wrap : Elapsed 0.098 ms (9.754 ms / 100) 9.728 -> 9.639 ( -0.91%) [ +0.21% +0.00% +0.10% / +0.13% -0.85% -0.91%] index_select linear : Elapsed 0.097 ms (9.748 ms / 100) 9.820 -> 9.671 ( -1.52%) [ +0.19% +0.00% +0.06% / +0.14% -1.52% -1.42%] index_select reverse : Elapsed 0.098 ms (9.839 ms / 100) 9.700 -> 9.693 ( -0.07%) [ +0.00% +0.49% +0.59% / +0.38% +0.01% -0.07%] index_select skip64 : Elapsed 0.097 ms (9.700 ms / 100) 9.738 -> 9.658 ( -0.82%) [ +0.11% +0.26% +0.00% / +0.09% -0.80% -0.82%] index_select skip256 : Elapsed 0.097 ms (9.749 ms / 100) 9.786 -> 9.672 ( -1.16%) [ +0.02% +0.00% +0.06% / -0.15% -1.16% -1.10%] index_select spread : Elapsed 0.098 ms (9.788 ms / 100) 9.753 -> 9.700 ( -0.54%) [ +0.00% +0.09% +0.33% / -0.10% -0.54% -0.46%] index_select random : Elapsed 0.098 ms (9.753 ms / 100) 9.730 -> 9.748 ( +0.18%) [ +0.12% +0.00% +0.35% / +0.18% +0.24% +0.33%] index_select random_sorted : Elapsed 0.097 ms (9.742 ms / 100) out_shape = [5, 1, 200] in_shape = [5, 1, 500] idx_dim = 2 B = [5, 1, 200] (stride (200, 200, 1)) A = [5, 1, 500] (stride (500, 500, 1)) dim = 2 0.524 -> 0.531 ( +1.34%) [ +3.63% +6.49% +0.00% / +2.10% +4.58% +1.34%] index_select const : Elapsed 0.005 ms (0.543 ms / 100) 0.562 -> 0.534 ( -4.98%) [ +0.71% +0.00% +0.71% / -4.98% -3.56% -0.71%] index_select wrap : Elapsed 0.006 ms (0.566 ms / 100) 0.546 -> 0.543 ( -0.55%) [ +0.00% +9.16% +6.59% / -0.55% +0.55% +6.41%] index_select linear : Elapsed 0.005 ms (0.546 ms / 100) 0.539 -> 0.545 ( +1.11%) [ +0.93% +0.00% +5.01% / +6.12% +1.11% +5.75%] index_select reverse : Elapsed 0.005 ms (0.544 ms / 100) 0.531 -> 0.540 ( +1.69%) [ +4.14% +0.00% +5.46% / +20.34% +1.69% +6.40%] index_select skip64 : Elapsed 0.006 ms (0.553 ms / 100) 0.529 -> 0.539 ( +1.89%) [ +2.27% +0.00% +0.00% / +1.89% +5.29% +6.99%] index_select skip256 : Elapsed 0.005 ms (0.541 ms / 100) 0.521 -> 0.532 ( +2.11%) [ +5.18% +1.15% +0.00% / +2.11% +4.61% +17.27%] index_select spread : Elapsed 0.005 ms (0.548 ms / 100) bad 0.516 -> 0.550 ( +6.59%) [ +5.62% +1.55% +0.00% / +6.59% +6.59% +10.27%] index_select strided 3 : Elapsed 0.005 ms (0.545 ms / 100) 0.523 -> 0.534 ( +2.10%) [ +4.40% +0.96% +0.00% / +2.10% +4.97% +13.00%] index_select strided 5 : Elapsed 0.005 ms (0.546 ms / 100) 0.524 -> 0.535 ( +2.10%) [+10.11% +3.05% +0.00% / +2.10% +4.01% +18.89%] index_select strided 7 : Elapsed 0.006 ms (0.577 ms / 100) 0.524 -> 0.526 ( +0.38%) [ +3.24% +0.00% +0.95% / +1.72% +5.34% +0.38%] index_select strided 8 : Elapsed 0.005 ms (0.541 ms / 100) 0.529 -> 0.529 ( +0.00%) [ +1.51% +0.19% +0.00% / +0.95% +9.07% +0.00%] index_select strided 16 : Elapsed 0.005 ms (0.537 ms / 100) 0.524 -> 0.533 ( +1.72%) [ +2.48% +0.76% +0.00% / +1.72% +15.27% +3.05%] index_select strided 64 : Elapsed 0.005 ms (0.537 ms / 100) 0.521 -> 0.539 ( +3.45%) [ +3.65% +4.61% +0.00% / +3.45% +6.91% +8.83%] index_select strided 100 : Elapsed 0.005 ms (0.540 ms / 100) bad 0.518 -> 0.550 ( +6.18%) [ +4.44% +0.97% +0.00% / +6.18% +16.60% +8.88%] index_select strided 255 : Elapsed 0.005 ms (0.541 ms / 100) 0.524 -> 0.545 ( +4.01%) [ +2.10% +0.19% +0.00% / +9.54% +4.01% +7.63%] index_select strided 256 : Elapsed 0.005 ms (0.535 ms / 100) 0.522 -> 0.545 ( +4.41%) [ +2.49% +0.00% +0.00% / +18.01% +4.41% +10.73%] index_select strided 257 : Elapsed 0.005 ms (0.535 ms / 100) 0.521 -> 0.535 ( +2.69%) [ +2.69% +0.00% +0.00% / +2.69% +5.37% +13.82%] index_select random : Elapsed 0.005 ms (0.535 ms / 100) 0.525 -> 0.526 ( +0.19%) [ +2.10% +1.14% +0.00% / +1.71% +4.95% +0.19%] index_select random_sorted : Elapsed 0.005 ms (0.536 ms / 100) 0.521 -> 0.525 ( +0.77%) [ +3.65% +0.77% +0.00% / +3.07% +5.76% +0.77%] index_select perm : Elapsed 0.005 ms (0.540 ms / 100) 0.522 -> 0.539 ( +3.26%) [+10.73% +0.00% +7.66% / +3.26% +9.20% +7.85%] index_select perm_sorted : Elapsed 0.006 ms (0.578 ms / 100) B = [5, 1, 200] (stride (200, 200, 1)) A = [5, 1, 500] (stride (1, 2500, 5)) dim = 2 0.555 -> 0.555 ( +0.00%) [ +0.00% +0.90% +2.34% / +13.87% +0.00% +8.47%] index_select const : Elapsed 0.006 ms (0.555 ms / 100) 0.552 -> 0.555 ( +0.54%) [ +4.71% +0.00% +1.99% / +1.09% +0.54% +3.08%] index_select wrap : Elapsed 0.006 ms (0.578 ms / 100) 0.550 -> 0.554 ( +0.73%) [+11.64% +0.00% +0.00% / +0.73% +0.73% +2.91%] index_select linear : Elapsed 0.006 ms (0.614 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.54% +0.00% +0.00% / +0.54% +3.09% +13.79%] index_select reverse : Elapsed 0.006 ms (0.554 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +0.91% +0.18% +0.00% / +0.73% +2.91% +3.27%] index_select skip64 : Elapsed 0.006 ms (0.555 ms / 100) bad 0.551 -> 0.585 ( +6.17%) [ +0.91% +0.00% +0.00% / +6.17% +7.80% +9.80%] index_select skip256 : Elapsed 0.006 ms (0.556 ms / 100) 0.550 -> 0.555 ( +0.91%) [ +0.91% +0.18% +0.00% / +2.00% +0.91% +13.45%] index_select spread : Elapsed 0.006 ms (0.555 ms / 100) 0.550 -> 0.553 ( +0.55%) [ +0.91% +0.00% +0.18% / +0.55% +2.18% +0.73%] index_select strided 3 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.553 ( +0.36%) [ +0.36% +0.00% +0.00% / +0.36% +3.45% +0.73%] index_select strided 5 : Elapsed 0.006 ms (0.553 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +1.27% +0.73% +0.00% / +0.73% +11.25% +0.73%] index_select strided 7 : Elapsed 0.006 ms (0.558 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +0.73% +1.64% +0.00% / +0.73% +5.82% +1.09%] index_select strided 8 : Elapsed 0.006 ms (0.554 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +0.18% +0.00% / +0.91% +0.73% +0.54%] index_select strided 16 : Elapsed 0.006 ms (0.555 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +6.73% +0.00% +0.18% / +0.73% +1.09% +0.91%] index_select strided 64 : Elapsed 0.006 ms (0.587 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.91% +0.00% +0.00% / +1.09% +0.91% +0.73%] index_select strided 100 : Elapsed 0.006 ms (0.556 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +0.00% +0.00% / +0.54% +0.73% +0.54%] index_select strided 255 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +0.00% +0.00% / +0.54% +0.54% +0.73%] index_select strided 256 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.73% +0.00% +0.00% / +0.73% +0.73% +3.99%] index_select strided 257 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.54% +0.00% +0.00% / +0.73% +0.73% +13.07%] index_select random : Elapsed 0.006 ms (0.554 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +3.99% +0.00% +2.18% / +0.73% +0.54% +13.97%] index_select random_sorted : Elapsed 0.006 ms (0.573 ms / 100) 0.551 -> 0.562 ( +2.00%) [ +0.91% +0.00% +8.89% / +2.00% +2.18% +3.81%] index_select perm : Elapsed 0.006 ms (0.556 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.54% +0.00% +0.00% / +0.73% +0.73% +11.98%] index_select perm_sorted : Elapsed 0.006 ms (0.554 ms / 100) B = [5, 1, 200] (stride (200, 1, 1)) A = [5, 1, 500] (stride (1, 1, 5)) dim = 2 0.550 -> 0.554 ( +0.73%) [ +1.09% +0.18% +0.00% / +0.73% +0.73% +2.18%] index_select const : Elapsed 0.006 ms (0.556 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +1.64% +1.09% +0.00% / +0.73% +0.73% +1.82%] index_select wrap : Elapsed 0.006 ms (0.559 ms / 100) 0.549 -> 0.554 ( +0.91%) [ +0.91% +0.36% +0.00% / +5.83% +3.10% +0.91%] index_select linear : Elapsed 0.006 ms (0.554 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +4.91% +0.18% +0.00% / +0.73% +3.45% +3.27%] index_select reverse : Elapsed 0.006 ms (0.577 ms / 100) 0.551 -> 0.556 ( +0.91%) [ +0.73% +0.18% +0.00% / +0.91% +10.71% +3.63%] index_select skip64 : Elapsed 0.006 ms (0.555 ms / 100) 0.550 -> 0.559 ( +1.64%) [ +0.73% +0.18% +0.00% / +1.64% +13.45% +3.82%] index_select skip256 : Elapsed 0.006 ms (0.554 ms / 100) 0.550 -> 0.553 ( +0.55%) [ +5.45% +0.18% +0.00% / +4.36% +0.55% +6.18%] index_select spread : Elapsed 0.006 ms (0.580 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +0.00% +0.73% / +7.44% +0.54% +12.89%] index_select strided 3 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.553 ( +0.36%) [ +0.54% +0.00% +0.00% / +0.54% +0.91% +0.36%] index_select strided 5 : Elapsed 0.006 ms (0.554 ms / 100) 0.552 -> 0.554 ( +0.36%) [ +0.54% +0.18% +0.00% / +0.36% +0.54% +0.54%] index_select strided 7 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +0.00% +0.00% / +0.73% +0.54% +3.63%] index_select strided 8 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +0.00% +0.00% / +0.73% +0.54% +13.25%] index_select strided 16 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.54% +0.54% +0.00% / +0.54% +0.73% +3.99%] index_select strided 64 : Elapsed 0.006 ms (0.554 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +0.91% +0.18% +0.00% / +0.91% +0.73% +2.91%] index_select strided 100 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +7.44% +0.00% +2.18% / +0.54% +0.73% +4.17%] index_select strided 255 : Elapsed 0.006 ms (0.592 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +0.00% +2.90% / +0.54% +0.73% +3.81%] index_select strided 256 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.558 ( +1.27%) [ +3.81% +0.00% +2.18% / +1.27% +1.27% +4.54%] index_select strided 257 : Elapsed 0.006 ms (0.572 ms / 100) 0.551 -> 0.553 ( +0.36%) [ +0.54% +0.00% +0.00% / +0.36% +0.54% +2.72%] index_select random : Elapsed 0.006 ms (0.554 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.91% +0.00% +0.00% / +0.54% +0.54% +6.90%] index_select random_sorted : Elapsed 0.006 ms (0.556 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +0.18% +0.00% / +0.73% +0.54% +0.54%] index_select perm : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.54% +0.00% +0.18% / +0.54% +0.54% +2.36%] index_select perm_sorted : Elapsed 0.006 ms (0.554 ms / 100) B = [5, 1, 200] (stride (200, 1000, 1)) A = [5, 1, 500] (stride (500, 2500, 1)) dim = 2 0.521 -> 0.524 ( +0.58%) [ +4.03% +1.34% +0.00% / +8.45% +0.58% +2.88%] index_select const : Elapsed 0.005 ms (0.542 ms / 100) 0.517 -> 0.531 ( +2.71%) [+12.57% +2.90% +0.00% / +2.71% +5.03% +4.06%] index_select wrap : Elapsed 0.006 ms (0.582 ms / 100) 0.520 -> 0.531 ( +2.12%) [ +8.85% +1.35% +0.00% / +2.12% +15.00% +2.69%] index_select linear : Elapsed 0.006 ms (0.566 ms / 100) 0.520 -> 0.528 ( +1.54%) [ +3.85% +2.31% +0.00% / +2.12% +1.54% +11.15%] index_select reverse : Elapsed 0.005 ms (0.540 ms / 100) 0.516 -> 0.523 ( +1.36%) [ +4.84% +4.46% +0.00% / +2.91% +1.36% +24.03%] index_select skip64 : Elapsed 0.005 ms (0.541 ms / 100) 0.511 -> 0.531 ( +3.91%) [ +5.48% +3.91% +0.00% / +6.65% +3.91% +12.33%] index_select skip256 : Elapsed 0.005 ms (0.539 ms / 100) 0.515 -> 0.524 ( +1.75%) [ +6.41% +3.11% +0.00% / +11.46% +1.75% +19.22%] index_select spread : Elapsed 0.005 ms (0.548 ms / 100) 0.521 -> 0.532 ( +2.11%) [ +2.69% +16.31% +0.00% / +21.50% +2.11% +3.07%] index_select strided 3 : Elapsed 0.005 ms (0.535 ms / 100) 0.521 -> 0.532 ( +2.11%) [ +3.26% +16.51% +0.00% / +8.06% +2.11% +2.69%] index_select strided 5 : Elapsed 0.005 ms (0.538 ms / 100) 0.517 -> 0.530 ( +2.51%) [ +3.87% +16.83% +0.00% / +3.87% +2.51% +10.83%] index_select strided 7 : Elapsed 0.005 ms (0.537 ms / 100) 0.528 -> 0.540 ( +2.27%) [ +7.39% +0.38% +0.00% / +3.41% +2.27% +8.71%] index_select strided 8 : Elapsed 0.006 ms (0.567 ms / 100) 0.534 -> 0.530 ( -0.75%) [ +1.12% +0.00% +5.62% / +0.00% -0.75% +7.30%] index_select strided 16 : Elapsed 0.005 ms (0.540 ms / 100) 0.532 -> 0.532 ( +0.00%) [ +1.13% +0.00% +11.47% / +0.00% +0.56% +7.71%] index_select strided 64 : Elapsed 0.005 ms (0.538 ms / 100) bad 0.526 -> 0.562 ( +6.84%) [ +2.28% +0.00% +5.70% / +17.11% +6.84% +8.75%] index_select strided 100 : Elapsed 0.005 ms (0.538 ms / 100) 0.528 -> 0.535 ( +1.33%) [ +2.08% +0.00% +5.68% / +2.84% +2.08% +1.33%] index_select strided 255 : Elapsed 0.005 ms (0.539 ms / 100) 0.519 -> 0.537 ( +3.47%) [ +5.59% +1.54% +0.00% / +3.47% +3.85% +5.78%] index_select strided 256 : Elapsed 0.005 ms (0.548 ms / 100) 0.515 -> 0.532 ( +3.30%) [ +4.47% +3.69% +0.00% / +3.30% +4.66% +13.01%] index_select strided 257 : Elapsed 0.005 ms (0.538 ms / 100) 0.520 -> 0.539 ( +3.65%) [ +9.42% +1.92% +0.00% / +9.04% +3.65% +15.00%] index_select random : Elapsed 0.006 ms (0.569 ms / 100) 0.516 -> 0.539 ( +4.46%) [ +3.68% +2.33% +0.00% / +10.47% +4.46% +19.19%] index_select random_sorted : Elapsed 0.005 ms (0.535 ms / 100) 0.515 -> 0.535 ( +3.88%) [+12.43% +2.14% +0.00% / +5.63% +4.47% +3.88%] index_select perm : Elapsed 0.006 ms (0.579 ms / 100) 0.517 -> 0.535 ( +3.48%) [ +5.22% +1.55% +0.00% / +4.64% +9.48% +3.48%] index_select perm_sorted : Elapsed 0.005 ms (0.544 ms / 100) B = [5, 1, 200] (stride (200, 1000, 1)) A = [5, 1, 500] (stride (1, 1, 5)) dim = 2 0.551 -> 0.554 ( +0.54%) [ +0.54% +0.00% +0.00% / +0.54% +11.62% +0.54%] index_select const : Elapsed 0.006 ms (0.554 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +0.00% +0.00% / +0.54% +0.73% +3.63%] index_select wrap : Elapsed 0.006 ms (0.555 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +7.82% +0.18% +0.00% / +1.82% +0.73% +0.91%] index_select linear : Elapsed 0.006 ms (0.593 ms / 100) 0.550 -> 0.556 ( +1.09%) [ +0.91% +0.18% +0.00% / +2.91% +1.09% +6.18%] index_select reverse : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.54% +0.00% +0.00% / +5.63% +0.73% +0.54%] index_select skip64 : Elapsed 0.006 ms (0.554 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +0.73% +0.18% +0.00% / +0.73% +0.91% +1.09%] index_select skip256 : Elapsed 0.006 ms (0.554 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +4.90% +0.00% +0.00% / +0.54% +0.73% +6.72%] index_select spread : Elapsed 0.006 ms (0.578 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.73% +0.00% +1.81% / +3.81% +0.73% +3.63%] index_select strided 3 : Elapsed 0.006 ms (0.555 ms / 100) 0.552 -> 0.554 ( +0.36%) [ +0.36% +0.00% +11.78% / +12.86% +0.36% +3.99%] index_select strided 5 : Elapsed 0.006 ms (0.554 ms / 100) 0.552 -> 0.555 ( +0.54%) [ +0.36% +0.00% +2.54% / +0.54% +0.54% +3.80%] index_select strided 7 : Elapsed 0.006 ms (0.554 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +2.54% +0.00% +0.18% / +0.73% +0.91% +4.36%] index_select strided 8 : Elapsed 0.006 ms (0.565 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +6.18% +0.18% +0.00% / +1.45% +0.73% +5.82%] index_select strided 16 : Elapsed 0.006 ms (0.584 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +0.73% +0.18% +0.00% / +0.73% +1.09% +0.91%] index_select strided 64 : Elapsed 0.006 ms (0.554 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +6.55% +0.18% +0.00% / +0.91% +0.91% +0.73%] index_select strided 100 : Elapsed 0.006 ms (0.586 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +0.91% +0.18% +0.00% / +0.73% +0.91% +1.45%] index_select strided 255 : Elapsed 0.006 ms (0.555 ms / 100) 0.550 -> 0.555 ( +0.91%) [ +1.09% +0.18% +0.00% / +3.09% +2.18% +0.91%] index_select strided 256 : Elapsed 0.006 ms (0.556 ms / 100) 0.550 -> 0.555 ( +0.91%) [ +0.91% +0.18% +0.00% / +0.91% +1.09% +2.00%] index_select strided 257 : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +4.36% +0.00% +0.73% / +0.54% +3.09% +12.70%] index_select random : Elapsed 0.006 ms (0.575 ms / 100) 0.550 -> 0.553 ( +0.55%) [ +0.91% +0.18% +0.00% / +0.55% +8.18% +4.18%] index_select random_sorted : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.554 ( +0.54%) [ +0.73% +1.09% +0.00% / +0.54% +1.09% +9.98%] index_select perm : Elapsed 0.006 ms (0.555 ms / 100) 0.550 -> 0.554 ( +0.73%) [ +1.09% +0.00% +0.18% / +0.73% +0.91% +0.91%] index_select perm_sorted : Elapsed 0.006 ms (0.556 ms / 100) B = [5, 1, 200] (stride (1, 5, 5)) dim = 2 fill_cnt = 500 0.438 -> 0.437 ( -0.23%) [ +0.00% +1.14% +0.68% / +1.37% -0.23% +1.37%] index_fill_ const : Elapsed 0.004 ms (0.438 ms / 100) 0.428 -> 0.436 ( +1.87%) [+10.05% +4.44% +0.00% / +3.27% +1.87% +2.34%] index_fill_ linear : Elapsed 0.005 ms (0.471 ms / 100) 0.428 -> 0.432 ( +0.93%) [ +3.27% +3.50% +0.00% / +21.73% +0.93% +10.98%] index_fill_ reverse : Elapsed 0.004 ms (0.442 ms / 100) 0.428 -> 0.433 ( +1.17%) [ +6.54% +9.11% +0.00% / +2.10% +1.17% +10.05%] index_fill_ skip64 : Elapsed 0.005 ms (0.456 ms / 100) 0.430 -> 0.436 ( +1.40%) [ +8.60% +2.33% +0.00% / +8.60% +1.40% +8.60%] index_fill_ skip256 : Elapsed 0.005 ms (0.467 ms / 100) 0.443 -> 0.436 ( -1.58%) [+18.51% +0.00% +4.74% / -1.58% +2.03% +6.09%] index_fill_ spread : Elapsed 0.005 ms (0.525 ms / 100) 0.443 -> 0.432 ( -2.48%) [ +6.09% +0.00% +3.61% / -0.23% -2.48% +6.77%] index_fill_ strided 3 : Elapsed 0.005 ms (0.470 ms / 100) 0.436 -> 0.430 ( -1.38%) [ +0.00% +1.61% +9.40% / +24.54% -1.38% +22.48%] index_fill_ strided 5 : Elapsed 0.004 ms (0.436 ms / 100) 0.438 -> 0.435 ( -0.68%) [ +0.00% +1.14% +7.53% / +8.90% -0.68% +1.60%] index_fill_ strided 7 : Elapsed 0.004 ms (0.438 ms / 100) 0.428 -> 0.443 ( +3.50%) [ +5.14% +3.50% +0.00% / +20.33% +3.74% +3.50%] index_fill_ strided 8 : Elapsed 0.004 ms (0.450 ms / 100) 0.437 -> 0.429 ( -1.83%) [ +3.20% +1.60% +0.00% / +7.78% -1.83% +1.37%] index_fill_ strided 16 : Elapsed 0.005 ms (0.451 ms / 100) 0.429 -> 0.436 ( +1.63%) [ +2.33% +3.50% +0.00% / +1.63% +9.09% +5.13%] index_fill_ strided 64 : Elapsed 0.004 ms (0.439 ms / 100) 0.427 -> 0.440 ( +3.04%) [ +5.39% +3.98% +0.00% / +3.04% +8.43% +10.30%] index_fill_ strided 100 : Elapsed 0.005 ms (0.450 ms / 100) 0.429 -> 0.440 ( +2.56%) [ +2.33% +3.50% +0.00% / +2.56% +5.13% +4.20%] index_fill_ random : Elapsed 0.004 ms (0.439 ms / 100) 0.427 -> 0.431 ( +0.94%) [ +3.75% +8.43% +0.00% / +4.22% +0.94% +2.81%] index_fill_ random_sorted : Elapsed 0.004 ms (0.443 ms / 100) B = [5, 1, 200] (stride (1, 5, 5)) A = [5, 1, 500] (stride (500, 500, 1)) dim = 2 0.550 -> 0.554 ( +0.73%) [ +0.91% +0.00% +0.18% / +0.73% +2.73% +0.91%] index_select const : Elapsed 0.006 ms (0.555 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.91% +1.09% +0.00% / +0.73% +0.73% +4.36%] index_select wrap : Elapsed 0.006 ms (0.556 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +2.00% +0.00% +0.00% / +0.73% +0.73% +7.08%] index_select linear : Elapsed 0.006 ms (0.562 ms / 100) 0.550 -> 0.555 ( +0.91%) [ +0.73% +0.36% +0.00% / +0.91% +0.91% +4.36%] index_select reverse : Elapsed 0.006 ms (0.554 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.91% +1.09% +0.00% / +3.27% +0.73% +0.73%] index_select skip64 : Elapsed 0.006 ms (0.556 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.73% +0.00% +6.72% / +16.33% +0.73% +0.91%] index_select skip256 : Elapsed 0.006 ms (0.555 ms / 100) 0.550 -> 0.554 ( +0.73%) [+10.36% +0.18% +0.00% / +0.73% +0.91% +2.36%] index_select spread : Elapsed 0.006 ms (0.607 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.91% +0.18% +0.00% / +0.73% +0.91% +3.81%] index_select strided 3 : Elapsed 0.006 ms (0.556 ms / 100) 0.551 -> 0.556 ( +0.91%) [ +1.27% +0.00% +0.00% / +0.91% +1.09% +3.27%] index_select strided 5 : Elapsed 0.006 ms (0.558 ms / 100) 0.551 -> 0.557 ( +1.09%) [ +1.09% +0.18% +0.00% / +1.09% +1.09% +3.09%] index_select strided 7 : Elapsed 0.006 ms (0.557 ms / 100) 0.557 -> 0.557 ( +0.00%) [ +0.00% +1.62% +1.97% / +12.03% +0.00% +1.80%] index_select strided 8 : Elapsed 0.006 ms (0.557 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +1.08% +0.00% +1.44% / +0.36% +0.54% +7.58%] index_select strided 16 : Elapsed 0.006 ms (0.560 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +0.54% +0.00% +4.51% / +0.72% +0.54% +0.54%] index_select strided 64 : Elapsed 0.006 ms (0.557 ms / 100) 0.553 -> 0.559 ( +1.08%) [ +0.90% +0.18% +0.00% / +1.08% +1.08% +1.08%] index_select strided 100 : Elapsed 0.006 ms (0.558 ms / 100) 0.558 -> 0.557 ( -0.18%) [ +0.00% +0.00% +0.90% / -0.18% -0.18% -0.18%] index_select strided 255 : Elapsed 0.006 ms (0.558 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +1.26% +0.00% +8.30% / +3.07% +0.54% +1.62%] index_select strided 256 : Elapsed 0.006 ms (0.561 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +0.72% +0.00% +0.00% / +3.79% +0.72% +0.54%] index_select strided 257 : Elapsed 0.006 ms (0.558 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +0.90% +0.90%] index_select random : Elapsed 0.006 ms (0.557 ms / 100) 0.551 -> 0.556 ( +0.91%) [ +0.73% +0.18% +0.00% / +0.91% +1.81% +7.44%] index_select random_sorted : Elapsed 0.006 ms (0.555 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +1.08% +1.27% +0.00% / +0.72% +4.52% +0.90%] index_select perm : Elapsed 0.006 ms (0.559 ms / 100) 0.551 -> 0.555 ( +0.73%) [ +0.73% +0.18% +0.00% / +0.73% +0.91% +0.91%] index_select perm_sorted : Elapsed 0.006 ms (0.555 ms / 100) B = [5, 1, 200] (stride (1, 5, 5)) A = [5, 1, 500] (stride (500, 2500, 1)) dim = 2 0.526 -> 0.527 ( +0.19%) [ +3.23% +1.14% +0.00% / +1.71% +2.66% +0.19%] index_select const : Elapsed 0.005 ms (0.543 ms / 100) 0.520 -> 0.519 ( -0.19%) [ +4.04% +1.54% +0.00% / +4.23% +0.38% -0.19%] index_select wrap : Elapsed 0.005 ms (0.541 ms / 100) 0.524 -> 0.523 ( -0.19%) [ +8.97% +0.95% +0.00% / +3.05% -0.19% +10.50%] index_select linear : Elapsed 0.006 ms (0.571 ms / 100) 0.526 -> 0.525 ( -0.19%) [ +2.85% +1.90% +0.00% / +2.66% -0.19% +15.21%] index_select reverse : Elapsed 0.005 ms (0.541 ms / 100) 0.528 -> 0.522 ( -1.14%) [ +2.27% +0.95% +0.00% / +1.14% -1.14% +6.44%] index_select skip64 : Elapsed 0.005 ms (0.540 ms / 100) 0.531 -> 0.525 ( -1.13%) [ +1.69% +0.00% +5.08% / +6.78% -1.13% +6.40%] index_select skip256 : Elapsed 0.005 ms (0.540 ms / 100) 0.533 -> 0.525 ( -1.50%) [ +1.13% +0.00% +6.75% / +4.32% -1.50% +11.26%] index_select spread : Elapsed 0.005 ms (0.539 ms / 100) 0.543 -> 0.523 ( -3.68%) [ +0.00% +1.29% +2.76% / +12.89% -3.68% +14.00%] index_select strided 3 : Elapsed 0.005 ms (0.543 ms / 100) 0.527 -> 0.526 ( -0.19%) [ +2.47% +0.76% +0.00% / +3.04% -0.19% +15.37%] index_select strided 5 : Elapsed 0.005 ms (0.540 ms / 100) 0.522 -> 0.526 ( +0.77%) [ +3.26% +0.00% +1.34% / +3.45% +1.72% +0.77%] index_select strided 7 : Elapsed 0.005 ms (0.539 ms / 100) 0.528 -> 0.528 ( +0.00%) [ +2.46% +0.00% +0.19% / +2.27% +3.98% +0.00%] index_select strided 8 : Elapsed 0.005 ms (0.541 ms / 100) 0.528 -> 0.518 ( -1.89%) [ +4.36% +0.00% +0.00% / +0.57% -1.89% +6.82%] index_select strided 16 : Elapsed 0.006 ms (0.551 ms / 100) 0.522 -> 0.515 ( -1.34%) [ +9.00% +1.92% +0.00% / +4.60% -1.34% +3.83%] index_select strided 64 : Elapsed 0.006 ms (0.569 ms / 100) 0.530 -> 0.515 ( -2.83%) [ +1.13% +0.00% +0.94% / +0.94% -2.83% -1.32%] index_select strided 100 : Elapsed 0.005 ms (0.536 ms / 100) 0.525 -> 0.526 ( +0.19%) [ +3.05% +1.71% +0.00% / +2.86% +6.86% +0.19%] index_select strided 255 : Elapsed 0.005 ms (0.541 ms / 100) bad 0.526 -> 0.561 ( +6.65%) [ +1.90% +4.37% +0.00% / +9.32% +6.65% +7.22%] index_select strided 256 : Elapsed 0.005 ms (0.536 ms / 100) Bad 0.524 -> 0.599 (+14.31%) [ +2.48% +0.38% +0.00% / +14.89% +14.31% +18.89%] index_select strided 257 : Elapsed 0.005 ms (0.537 ms / 100) 0.520 -> 0.528 ( +1.54%) [ +3.46% +3.65% +0.00% / +9.42% +1.54% +5.96%] index_select random : Elapsed 0.005 ms (0.538 ms / 100) 0.527 -> 0.525 ( -0.38%) [ +7.78% +0.95% +0.00% / +1.71% +1.14% -0.38%] index_select random_sorted : Elapsed 0.006 ms (0.568 ms / 100) 0.519 -> 0.523 ( +0.77%) [ +3.47% +2.12% +0.00% / +3.08% +0.77% +5.20%] index_select perm : Elapsed 0.005 ms (0.537 ms / 100) 0.519 -> 0.525 ( +1.16%) [+12.72% +2.31% +0.00% / +3.28% +1.16% +9.06%] index_select perm_sorted : Elapsed 0.006 ms (0.585 ms / 100) B = [5, 1, 200] (stride (1, 5, 5)) A = [5, 1, 500] (stride (1, 2500, 5)) dim = 2 0.525 -> 0.522 ( -0.57%) [ +1.52% +7.43% +0.00% / +2.48% -0.57% +14.48%] index_select const : Elapsed 0.005 ms (0.533 ms / 100) 0.525 -> 0.525 ( +0.00%) [ +3.24% +17.14% +0.00% / +9.71% +0.00% +6.86%] index_select wrap : Elapsed 0.005 ms (0.542 ms / 100) 0.525 -> 0.520 ( -0.95%) [ +3.05% +3.24% +0.00% / +2.86% -0.95% +8.19%] index_select linear : Elapsed 0.005 ms (0.541 ms / 100) 0.517 -> 0.540 ( +4.45%) [ +5.61% +2.13% +0.00% / +4.64% +4.45% +17.60%] index_select reverse : Elapsed 0.005 ms (0.546 ms / 100) 0.531 -> 0.523 ( -1.51%) [ +7.16% +1.32% +0.00% / +1.88% -1.51% +0.38%] index_select skip64 : Elapsed 0.006 ms (0.569 ms / 100) 0.521 -> 0.533 ( +2.30%) [ +3.45% +1.73% +0.00% / +3.26% +2.69% +2.30%] index_select skip256 : Elapsed 0.005 ms (0.539 ms / 100) 0.527 -> 0.525 ( -0.38%) [ +7.78% +0.38% +0.00% / +3.23% -0.38% +2.47%] index_select spread : Elapsed 0.006 ms (0.568 ms / 100) 0.518 -> 0.527 ( +1.74%) [ +5.41% +1.54% +0.00% / +3.28% +1.74% +1.93%] index_select strided 3 : Elapsed 0.005 ms (0.546 ms / 100) 0.520 -> 0.530 ( +1.92%) [ +3.65% +1.54% +0.00% / +3.46% +5.19% +1.92%] index_select strided 5 : Elapsed 0.005 ms (0.539 ms / 100) 0.519 -> 0.521 ( +0.39%) [+10.98% +2.50% +0.00% / +4.24% +0.39% +1.73%] index_select strided 7 : Elapsed 0.006 ms (0.576 ms / 100) 0.521 -> 0.521 ( +0.00%) [ +3.07% +1.54% +0.00% / +5.37% +0.00% +2.88%] index_select strided 8 : Elapsed 0.005 ms (0.537 ms / 100) 0.520 -> 0.529 ( +1.73%) [ +7.69% +2.50% +0.00% / +3.65% +1.73% +9.81%] index_select strided 16 : Elapsed 0.006 ms (0.560 ms / 100) 0.517 -> 0.526 ( +1.74%) [ +4.64% +8.90% +0.00% / +5.61% +7.35% +1.74%] index_select strided 64 : Elapsed 0.005 ms (0.541 ms / 100) 0.513 -> 0.529 ( +3.12%) [ +5.46% +21.05% +0.00% / +5.26% +4.48% +3.12%] index_select strided 100 : Elapsed 0.005 ms (0.541 ms / 100) 0.519 -> 0.521 ( +0.39%) [ +4.82% +2.50% +0.00% / +4.62% +0.39% +1.93%] index_select strided 255 : Elapsed 0.005 ms (0.544 ms / 100) 0.523 -> 0.519 ( -0.76%) [ +3.82% +1.15% +0.00% / +10.52% -0.76% +1.15%] index_select strided 256 : Elapsed 0.005 ms (0.543 ms / 100) 0.520 -> 0.520 ( +0.00%) [+11.92% +2.31% +0.00% / +10.38% +0.00% +7.31%] index_select strided 257 : Elapsed 0.006 ms (0.582 ms / 100) 0.517 -> 0.524 ( +1.35%) [+21.47% +3.87% +0.00% / +17.21% +1.35% +9.28%] index_select random : Elapsed 0.006 ms (0.628 ms / 100) 0.518 -> 0.532 ( +2.70%) [ +7.53% +1.74% +0.00% / +11.58% +2.70% +17.37%] index_select random_sorted : Elapsed 0.006 ms (0.557 ms / 100) 0.520 -> 0.525 ( +0.96%) [ +4.23% +2.31% +0.00% / +4.42% +0.96% +10.77%] index_select perm : Elapsed 0.005 ms (0.542 ms / 100) 0.517 -> 0.521 ( +0.77%) [ +5.42% +1.35% +0.00% / +3.68% +0.77% +8.90%] index_select perm_sorted : Elapsed 0.005 ms (0.545 ms / 100) out_shape = [200, 500, 1] in_shape = [5, 500, 1] idx_dim = 0 B = [200, 500, 1] (stride (1, 200, 200)) A = [5, 500, 1] (stride (500, 1, 1)) dim = 0 0.571 -> 0.571 ( +0.00%) [ +0.70% +0.18% +0.00% / +0.70% +0.00% +0.18%] index_add_ linear : Elapsed 0.006 ms (0.575 ms / 100) 0.587 -> 0.588 ( +0.17%) [ +1.19% +0.00% +0.00% / +0.85% +0.34% +0.17%] index_copy_ linear : Elapsed 0.006 ms (0.594 ms / 100) 0.572 -> 0.572 ( +0.00%) [ +0.52% +0.00% +0.00% / +0.35% +0.00% +0.00%] index_add_ reverse : Elapsed 0.006 ms (0.575 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +1.02% +0.00% +0.00% / +0.68% +0.51% +0.51%] index_copy_ reverse : Elapsed 0.006 ms (0.592 ms / 100) 0.570 -> 0.571 ( +0.18%) [ +0.70% +0.00% +0.18% / +0.53% +0.18% +0.35%] index_add_ spread : Elapsed 0.006 ms (0.574 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +1.37% +0.00% +0.00% / +0.85% +0.68% +0.51%] index_copy_ spread : Elapsed 0.006 ms (0.594 ms / 100) 0.570 -> 0.571 ( +0.18%) [ +0.53% +0.18% +0.00% / +0.53% +0.18% +0.18%] index_add_ strided 3 : Elapsed 0.006 ms (0.573 ms / 100) 0.586 -> 0.590 ( +0.68%) [ +0.85% +0.00% +0.00% / +0.85% +0.85% +0.68%] index_copy_ strided 3 : Elapsed 0.006 ms (0.591 ms / 100) 0.570 -> 0.572 ( +0.35%) [ +0.70% +0.35% +0.00% / +0.70% +0.53% +0.35%] index_add_ strided 7 : Elapsed 0.006 ms (0.574 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +1.19% +0.00% +0.00% / +1.37% +2.39% +0.51%] index_copy_ strided 7 : Elapsed 0.006 ms (0.593 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.35% +0.35% +0.00% / +0.53% +0.18% +0.35%] index_add_ perm : Elapsed 0.006 ms (0.573 ms / 100) 0.585 -> 0.590 ( +0.85%) [ +0.85% +0.00% +0.00% / +1.03% +1.20% +0.85%] index_copy_ perm : Elapsed 0.006 ms (0.590 ms / 100) 0.570 -> 0.572 ( +0.35%) [ +0.70% +0.00% +0.35% / +0.35% +0.53% +0.53%] index_add_ perm_sorted : Elapsed 0.006 ms (0.574 ms / 100) 0.585 -> 0.590 ( +0.85%) [ +1.03% +0.00% +0.34% / +0.85% +0.85% +0.85%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.591 ms / 100) good 5.062 -> 4.779 ( -5.59%) [ +0.00% +0.28% +0.14% / -5.24% -5.59% -5.39%] index_select const : Elapsed 0.051 ms (5.062 ms / 100) 5.065 -> 4.834 ( -4.56%) [ +0.28% +0.00% +0.10% / -4.28% -4.56% -4.52%] index_select wrap : Elapsed 0.051 ms (5.079 ms / 100) good 5.064 -> 4.793 ( -5.35%) [ +0.22% +0.08% +0.00% / -5.21% -5.21% -5.35%] index_select linear : Elapsed 0.051 ms (5.075 ms / 100) good 5.079 -> 4.801 ( -5.47%) [ +0.00% +0.06% +0.00% / -5.47% -5.38% -5.22%] index_select reverse : Elapsed 0.051 ms (5.079 ms / 100) good 5.066 -> 4.779 ( -5.67%) [ +0.04% +0.00% +0.26% / -5.53% -5.61% -5.67%] index_select skip64 : Elapsed 0.051 ms (5.068 ms / 100) good 5.061 -> 4.773 ( -5.69%) [ +0.10% +0.12% +0.00% / -5.69% -5.00% -5.18%] index_select skip256 : Elapsed 0.051 ms (5.066 ms / 100) good 5.054 -> 4.793 ( -5.16%) [ +0.12% +0.00% +0.34% / -4.89% -4.99% -5.16%] index_select spread : Elapsed 0.051 ms (5.060 ms / 100) 5.066 -> 4.829 ( -4.68%) [ +0.26% +0.18% +0.00% / -4.58% -4.68% -4.64%] index_select strided 3 : Elapsed 0.051 ms (5.079 ms / 100) good 5.098 -> 4.807 ( -5.71%) [ +0.06% +0.00% +0.08% / -4.81% -5.49% -5.71%] index_select random : Elapsed 0.051 ms (5.101 ms / 100) 5.055 -> 4.808 ( -4.89%) [ +0.20% +0.04% +0.00% / -4.83% -4.89% -4.63%] index_select random_sorted : Elapsed 0.051 ms (5.065 ms / 100) B = [200, 500, 1] (stride (1, 200, 200)) A = [5, 500, 1] (stride (1, 5, 5)) dim = 0 0.573 -> 0.579 ( +1.05%) [ +1.05% +0.17% +0.00% / +1.22% +1.05% +1.05%] index_add_ linear : Elapsed 0.006 ms (0.579 ms / 100) 0.590 -> 0.597 ( +1.19%) [ +1.36% +0.00% +0.17% / +1.36% +1.19% +1.36%] index_copy_ linear : Elapsed 0.006 ms (0.598 ms / 100) 0.574 -> 0.578 ( +0.70%) [ +1.05% +0.17% +0.00% / +0.87% +0.70% +0.70%] index_add_ reverse : Elapsed 0.006 ms (0.580 ms / 100) 0.590 -> 0.596 ( +1.02%) [ +1.36% +0.00% +0.00% / +1.02% +1.53% +1.36%] index_copy_ reverse : Elapsed 0.006 ms (0.598 ms / 100) 0.575 -> 0.578 ( +0.52%) [ +0.87% +1.39% +0.00% / +0.87% +0.70% +0.52%] index_add_ spread : Elapsed 0.006 ms (0.580 ms / 100) 0.590 -> 0.597 ( +1.19%) [ +1.19% +0.17% +0.00% / +1.69% +1.19% +1.53%] index_copy_ spread : Elapsed 0.006 ms (0.597 ms / 100) 0.573 -> 0.578 ( +0.87%) [ +1.05% +0.17% +0.00% / +0.87% +1.22% +1.22%] index_add_ strided 3 : Elapsed 0.006 ms (0.579 ms / 100) 0.590 -> 0.596 ( +1.02%) [ +0.85% +0.34% +0.00% / +1.19% +1.86% +1.02%] index_copy_ strided 3 : Elapsed 0.006 ms (0.595 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +0.87% +0.00% +0.00% / +0.87% +1.05% +1.57%] index_add_ strided 7 : Elapsed 0.006 ms (0.579 ms / 100) 0.590 -> 0.597 ( +1.19%) [ +1.19% +0.00% +2.71% / +1.36% +1.19% +1.36%] index_copy_ strided 7 : Elapsed 0.006 ms (0.597 ms / 100) 0.573 -> 0.578 ( +0.87%) [ +1.05% +0.00% +0.35% / +0.87% +1.05% +1.22%] index_add_ perm : Elapsed 0.006 ms (0.579 ms / 100) 0.590 -> 0.595 ( +0.85%) [ +0.85% +0.00% +5.93% / +1.02% +0.85% +1.36%] index_copy_ perm : Elapsed 0.006 ms (0.595 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +0.70% +0.00% +0.17% / +0.70% +0.87% +1.04%] index_add_ perm_sorted : Elapsed 0.006 ms (0.579 ms / 100) 0.589 -> 0.598 ( +1.53%) [ +1.53% +0.00% +0.34% / +1.53% +1.53% +1.53%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.598 ms / 100) good 5.172 -> 4.801 ( -7.17%) [ +0.25% +0.00% +0.10% / -7.13% -7.17% -7.10%] index_select const : Elapsed 0.052 ms (5.185 ms / 100) good 5.142 -> 4.793 ( -6.79%) [ +0.06% +0.12% +0.00% / -6.79% -6.79% -6.77%] index_select wrap : Elapsed 0.051 ms (5.145 ms / 100) good 5.152 -> 4.777 ( -7.28%) [ +0.17% +0.00% +0.10% / -6.89% -7.28% -7.20%] index_select linear : Elapsed 0.052 ms (5.161 ms / 100) good 5.148 -> 4.783 ( -7.09%) [ +0.00% +0.06% +0.04% / -6.88% -7.09% -6.93%] index_select reverse : Elapsed 0.051 ms (5.148 ms / 100) good 5.149 -> 4.776 ( -7.24%) [ +0.08% +0.00% +0.10% / -6.89% -7.24% -7.09%] index_select skip64 : Elapsed 0.052 ms (5.153 ms / 100) good 5.147 -> 4.786 ( -7.01%) [ +0.00% +0.14% +0.12% / -7.01% -6.72% -6.99%] index_select skip256 : Elapsed 0.051 ms (5.147 ms / 100) good 5.151 -> 4.772 ( -7.36%) [ +0.00% +0.02% +0.06% / -6.93% -7.18% -7.36%] index_select spread : Elapsed 0.052 ms (5.151 ms / 100) good 5.136 -> 4.790 ( -6.74%) [ +0.23% +0.00% +0.25% / -6.58% -6.74% -6.70%] index_select strided 3 : Elapsed 0.051 ms (5.148 ms / 100) good 5.205 -> 4.795 ( -7.88%) [ +0.19% +0.00% +0.00% / -7.53% -7.80% -7.88%] index_select random : Elapsed 0.052 ms (5.215 ms / 100) good 5.180 -> 4.792 ( -7.49%) [ +0.00% +0.02% +0.00% / -7.37% -7.34% -7.49%] index_select random_sorted : Elapsed 0.052 ms (5.180 ms / 100) B = [200, 500, 1] (stride (1, 200, 100000)) dim = 0 fill_cnt = 5 0.486 -> 0.489 ( +0.62%) [ +0.82% +0.00% +0.00% / +0.82% +0.62% +0.62%] index_fill_ const : Elapsed 0.005 ms (0.490 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.82% +0.00% +0.21% / +0.82% +0.62% +7.61%] index_fill_ linear : Elapsed 0.005 ms (0.490 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +6.17% +0.00% +0.00% / +0.62% +0.62% +4.53%] index_fill_ reverse : Elapsed 0.005 ms (0.516 ms / 100) 0.485 -> 0.489 ( +0.82%) [ +1.03% +0.21% +0.00% / +0.82% +1.44% +0.82%] index_fill_ skip64 : Elapsed 0.005 ms (0.490 ms / 100) 0.485 -> 0.489 ( +0.82%) [ +1.03% +0.21% +0.00% / +0.82% +1.24% +0.82%] index_fill_ skip256 : Elapsed 0.005 ms (0.490 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.62% +2.26% +0.00% / +0.62% +5.76% +0.62%] index_fill_ spread : Elapsed 0.005 ms (0.489 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.62% +0.00% +0.00% / +0.62% +0.62% +0.82%] index_fill_ strided 3 : Elapsed 0.005 ms (0.489 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.82% +0.21% +0.00% / +0.82% +0.62% +1.23%] index_fill_ strided 5 : Elapsed 0.005 ms (0.490 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.82% +0.21% +0.00% / +0.62% +0.62% +0.62%] index_fill_ strided 7 : Elapsed 0.005 ms (0.490 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.82% +0.00% +0.00% / +0.62% +0.62% +0.62%] index_fill_ strided 8 : Elapsed 0.005 ms (0.490 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.62% +0.41% +0.00% / +0.62% +0.62% +0.62%] index_fill_ strided 16 : Elapsed 0.005 ms (0.489 ms / 100) 0.486 -> 0.488 ( +0.41%) [ +0.82% +0.21% +0.00% / +0.82% +0.41% +0.41%] index_fill_ strided 64 : Elapsed 0.005 ms (0.490 ms / 100) 0.486 -> 0.487 ( +0.21%) [ +1.03% +0.21% +0.00% / +0.82% +0.62% +0.21%] index_fill_ strided 100 : Elapsed 0.005 ms (0.491 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.82% +0.21% +0.00% / +7.41% +0.62% +0.62%] index_fill_ random : Elapsed 0.005 ms (0.490 ms / 100) 0.487 -> 0.488 ( +0.21%) [ +0.82% +1.23% +0.00% / +0.62% +0.21% +0.41%] index_fill_ random_sorted : Elapsed 0.005 ms (0.491 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.82% +0.00% +0.21% / +0.62% +0.62% +5.14%] index_fill_ perm : Elapsed 0.005 ms (0.490 ms / 100) 0.486 -> 0.489 ( +0.62%) [ +0.62% +0.00% +0.21% / +0.82% +0.62% +0.62%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.489 ms / 100) B = [200, 500, 1] (stride (1, 200, 100000)) A = [5, 500, 1] (stride (500, 1, 1)) dim = 0 0.571 -> 0.572 ( +0.18%) [ +0.53% +0.00% +0.00% / +0.70% +0.18% +0.18%] index_add_ linear : Elapsed 0.006 ms (0.574 ms / 100) 0.587 -> 0.588 ( +0.17%) [ +0.85% +0.00% +0.00% / +1.02% +0.51% +0.17%] index_copy_ linear : Elapsed 0.006 ms (0.592 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +0.53% +0.35% +0.00% / +0.53% +0.18% +0.00%] index_add_ reverse : Elapsed 0.006 ms (0.574 ms / 100) 0.585 -> 0.588 ( +0.51%) [ +1.03% +0.00% +0.00% / +1.20% +0.68% +0.51%] index_copy_ reverse : Elapsed 0.006 ms (0.591 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.70% +0.00% +0.18% / +0.70% +0.18% +0.18%] index_add_ spread : Elapsed 0.006 ms (0.575 ms / 100) 0.585 -> 0.589 ( +0.68%) [ +0.85% +0.00% +0.17% / +1.20% +0.68% +0.68%] index_copy_ spread : Elapsed 0.006 ms (0.590 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.35% +0.00% +0.18% / +0.35% +0.35% +0.35%] index_add_ strided 3 : Elapsed 0.006 ms (0.573 ms / 100) 0.585 -> 0.588 ( +0.51%) [ +1.20% +0.00% +0.34% / +1.20% +0.85% +0.51%] index_copy_ strided 3 : Elapsed 0.006 ms (0.592 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.53% +0.00% +0.00% / +0.35% +0.18% +0.18%] index_add_ strided 7 : Elapsed 0.006 ms (0.574 ms / 100) 0.586 -> 0.588 ( +0.34%) [ +1.02% +0.00% +0.34% / +0.68% +0.34% +0.68%] index_copy_ strided 7 : Elapsed 0.006 ms (0.592 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.53% +0.18% +0.00% / +0.88% +0.35% +0.35%] index_add_ perm : Elapsed 0.006 ms (0.574 ms / 100) 0.585 -> 0.588 ( +0.51%) [ +1.03% +0.17% +0.00% / +1.03% +0.85% +0.51%] index_copy_ perm : Elapsed 0.006 ms (0.591 ms / 100) 0.570 -> 0.572 ( +0.35%) [ +0.88% +0.00% +0.18% / +5.44% +0.35% +0.88%] index_add_ perm_sorted : Elapsed 0.006 ms (0.575 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +0.68% +0.00% +0.00% / +0.85% +0.68% +0.51%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.590 ms / 100) good 5.062 -> 4.788 ( -5.41%) [ +0.12% +0.18% +0.00% / -5.33% -5.41% -5.41%] index_select const : Elapsed 0.051 ms (5.068 ms / 100) 5.077 -> 4.826 ( -4.94%) [ +0.06% +0.00% +0.06% / -4.51% -4.94% -4.81%] index_select wrap : Elapsed 0.051 ms (5.080 ms / 100) good 5.065 -> 4.811 ( -5.01%) [ +0.00% +0.16% +0.00% / -5.01% -4.92% -4.92%] index_select linear : Elapsed 0.051 ms (5.065 ms / 100) good 5.059 -> 4.803 ( -5.06%) [ +0.34% +0.00% +0.47% / -4.98% -4.98% -5.06%] index_select reverse : Elapsed 0.051 ms (5.076 ms / 100) good 5.060 -> 4.787 ( -5.40%) [ +0.18% +0.00% +0.12% / -5.34% -5.40% -5.28%] index_select skip64 : Elapsed 0.051 ms (5.069 ms / 100) good 5.064 -> 4.790 ( -5.41%) [ +0.04% +0.00% +0.12% / -5.41% -5.31% -5.31%] index_select skip256 : Elapsed 0.051 ms (5.066 ms / 100) good 5.062 -> 4.786 ( -5.45%) [ +0.16% +0.00% +0.08% / -5.20% -5.45% -5.18%] index_select spread : Elapsed 0.051 ms (5.070 ms / 100) 5.070 -> 4.827 ( -4.79%) [ +0.02% +0.00% +0.04% / -4.79% -4.73% -4.48%] index_select strided 3 : Elapsed 0.051 ms (5.071 ms / 100) good 5.092 -> 4.799 ( -5.75%) [ +0.08% +0.00% +0.29% / -4.87% -5.75% -5.46%] index_select random : Elapsed 0.051 ms (5.096 ms / 100) good 5.052 -> 4.793 ( -5.13%) [ +0.12% +0.22% +0.00% / -5.13% -4.61% -4.85%] index_select random_sorted : Elapsed 0.051 ms (5.058 ms / 100) out_shape = [5, 200, 1] in_shape = [5, 500, 1] idx_dim = 1 B = [5, 200, 1] (stride (200, 1, 1)) A = [5, 500, 1] (stride (1, 5, 2500)) dim = 1 0.551 -> 0.531 ( -3.63%) [ +1.63% +2.54% +0.00% / +0.73% -1.09% -3.63%] index_select const : Elapsed 0.006 ms (0.560 ms / 100) 0.521 -> 0.540 ( +3.65%) [ +4.22% +0.00% +12.28% / +9.60% +13.63% +3.65%] index_select wrap : Elapsed 0.005 ms (0.543 ms / 100) 0.524 -> 0.526 ( +0.38%) [ +8.40% +0.95% +0.00% / +14.89% +0.95% +0.38%] index_select linear : Elapsed 0.006 ms (0.568 ms / 100) 0.518 -> 0.526 ( +1.54%) [ +5.02% +3.28% +0.00% / +19.50% +1.93% +1.54%] index_select reverse : Elapsed 0.005 ms (0.544 ms / 100) 0.527 -> 0.526 ( -0.19%) [+10.25% +0.00% +0.76% / +1.52% +0.38% -0.19%] index_select skip64 : Elapsed 0.006 ms (0.581 ms / 100) 0.531 -> 0.521 ( -1.88%) [ +6.03% +0.00% +4.71% / +0.38% -1.13% -1.88%] index_select skip256 : Elapsed 0.006 ms (0.563 ms / 100) 0.524 -> 0.521 ( -0.57%) [ +3.63% +0.00% +6.11% / +3.05% -0.57% -0.19%] index_select spread : Elapsed 0.005 ms (0.543 ms / 100) 0.526 -> 0.527 ( +0.19%) [ +2.47% +0.00% +6.08% / +8.75% +0.19% +0.38%] index_select strided 3 : Elapsed 0.005 ms (0.539 ms / 100) 0.531 -> 0.524 ( -1.32%) [ +2.07% +0.00% +7.16% / +0.75% +3.20% -1.32%] index_select strided 5 : Elapsed 0.005 ms (0.542 ms / 100) 0.521 -> 0.519 ( -0.38%) [ +5.37% +0.00% +7.49% / +2.69% -0.38% +0.96%] index_select strided 7 : Elapsed 0.005 ms (0.549 ms / 100) 0.518 -> 0.535 ( +3.28%) [ +4.83% +0.00% +14.48% / +5.02% +3.28% +4.25%] index_select strided 8 : Elapsed 0.005 ms (0.543 ms / 100) 0.520 -> 0.524 ( +0.77%) [ +9.42% +0.00% +1.73% / +2.31% +0.77% +0.96%] index_select strided 16 : Elapsed 0.006 ms (0.569 ms / 100) 0.523 -> 0.525 ( +0.38%) [ +4.40% +0.00% +0.38% / +1.72% +0.38% +0.96%] index_select strided 64 : Elapsed 0.005 ms (0.546 ms / 100) 0.522 -> 0.527 ( +0.96%) [ +4.21% +0.00% +7.28% / +2.68% +0.96% +1.34%] index_select strided 100 : Elapsed 0.005 ms (0.544 ms / 100) 0.521 -> 0.523 ( +0.38%) [+11.32% +0.00% +9.98% / +2.50% +0.38% +0.58%] index_select strided 255 : Elapsed 0.006 ms (0.580 ms / 100) 0.522 -> 0.523 ( +0.19%) [ +3.83% +0.00% +5.75% / +2.30% +0.38% +0.19%] index_select strided 256 : Elapsed 0.005 ms (0.542 ms / 100) 0.521 -> 0.525 ( +0.77%) [ +9.40% +0.77% +0.00% / +9.02% +0.96% +0.77%] index_select strided 257 : Elapsed 0.006 ms (0.570 ms / 100) 0.523 -> 0.519 ( -0.76%) [ +3.82% +2.10% +0.00% / +11.09% -0.57% -0.76%] index_select random : Elapsed 0.005 ms (0.543 ms / 100) 0.521 -> 0.526 ( +0.96%) [ +4.22% +0.00% +6.72% / +2.30% +0.96% +1.54%] index_select random_sorted : Elapsed 0.005 ms (0.543 ms / 100) 0.541 -> 0.532 ( -1.66%) [ +0.00% +0.37% +2.77% / -0.18% +4.07% -1.66%] index_select perm : Elapsed 0.005 ms (0.541 ms / 100) 0.531 -> 0.521 ( -1.88%) [ +1.32% +0.00% +10.92% / -0.19% +5.65% -1.88%] index_select perm_sorted : Elapsed 0.005 ms (0.538 ms / 100) B = [5, 200, 1] (stride (200, 1, 200)) A = [5, 500, 1] (stride (500, 1, 1)) dim = 1 0.525 -> 0.528 ( +0.57%) [ +3.43% +0.00% +3.24% / +4.76% +3.43% +0.57%] index_select const : Elapsed 0.005 ms (0.543 ms / 100) 0.522 -> 0.527 ( +0.96%) [ +4.41% +0.00% +5.94% / +9.58% +1.15% +0.96%] index_select wrap : Elapsed 0.005 ms (0.545 ms / 100) 0.522 -> 0.526 ( +0.77%) [ +9.39% +0.00% +6.51% / +6.32% +1.15% +0.77%] index_select linear : Elapsed 0.006 ms (0.571 ms / 100) 0.540 -> 0.527 ( -2.41%) [ +0.37% +0.00% +2.96% / -1.67% -2.04% -2.41%] index_select reverse : Elapsed 0.005 ms (0.542 ms / 100) 0.519 -> 0.523 ( +0.77%) [ +4.05% +0.00% +9.44% / +2.89% +0.96% +0.77%] index_select skip64 : Elapsed 0.005 ms (0.540 ms / 100) 0.526 -> 0.523 ( -0.57%) [ +4.18% +0.00% +4.75% / +0.76% -0.57% +0.38%] index_select skip256 : Elapsed 0.005 ms (0.548 ms / 100) 0.525 -> 0.521 ( -0.76%) [ +3.62% +0.00% +12.95% / +0.95% -0.76% +1.52%] index_select spread : Elapsed 0.005 ms (0.544 ms / 100) 0.511 -> 0.525 ( +2.74%) [ +6.07% +3.33% +0.00% / +4.50% +2.74% +3.91%] index_select strided 3 : Elapsed 0.005 ms (0.542 ms / 100) 0.520 -> 0.522 ( +0.38%) [+11.15% +3.65% +0.00% / +16.54% +3.27% +0.38%] index_select strided 5 : Elapsed 0.006 ms (0.578 ms / 100) 0.526 -> 0.523 ( -0.57%) [ +2.85% +0.00% +8.17% / +0.76% +0.19% -0.57%] index_select strided 7 : Elapsed 0.005 ms (0.541 ms / 100) 0.525 -> 0.522 ( -0.57%) [ +3.24% +0.00% +9.33% / +1.33% +1.14% -0.57%] index_select strided 8 : Elapsed 0.005 ms (0.542 ms / 100) 0.522 -> 0.530 ( +1.53%) [ +3.83% +0.00% +8.05% / +2.11% +1.92% +1.53%] index_select strided 16 : Elapsed 0.005 ms (0.542 ms / 100) 0.519 -> 0.523 ( +0.77%) [ +4.24% +0.00% +0.77% / +2.50% +0.77% +0.96%] index_select strided 64 : Elapsed 0.005 ms (0.541 ms / 100) 0.519 -> 0.526 ( +1.35%) [ +4.82% +0.19% +0.00% / +2.31% +1.93% +1.35%] index_select strided 100 : Elapsed 0.005 ms (0.544 ms / 100) 0.529 -> 0.530 ( +0.19%) [ +4.73% +0.00% +5.86% / +0.19% +0.38% +0.38%] index_select strided 255 : Elapsed 0.006 ms (0.554 ms / 100) 0.522 -> 0.524 ( +0.38%) [ +4.02% +0.00% +11.69% / +1.53% +2.11% +0.38%] index_select strided 256 : Elapsed 0.005 ms (0.543 ms / 100) 0.543 -> 0.526 ( -3.13%) [ +7.00% +0.00% +2.03% / +4.05% -3.13% -3.13%] index_select strided 257 : Elapsed 0.006 ms (0.581 ms / 100) 0.523 -> 0.525 ( +0.38%) [ +8.80% +0.00% +15.30% / +4.02% +8.99% +0.38%] index_select random : Elapsed 0.006 ms (0.569 ms / 100) 0.520 -> 0.524 ( +0.77%) [ +5.58% +0.00% +9.04% / +9.62% +1.92% +0.77%] index_select random_sorted : Elapsed 0.005 ms (0.549 ms / 100) 0.525 -> 0.525 ( +0.00%) [ +4.38% +0.00% +10.10% / +7.24% +0.19% +0.00%] index_select perm : Elapsed 0.005 ms (0.548 ms / 100) 0.520 -> 0.532 ( +2.31%) [+10.77% +0.00% +10.19% / +2.88% +3.08% +2.31%] index_select perm_sorted : Elapsed 0.006 ms (0.576 ms / 100) B = [5, 200, 1] (stride (1, 5, 1)) A = [5, 500, 1] (stride (500, 1, 500)) dim = 1 0.519 -> 0.534 ( +2.89%) [ +4.62% +0.00% +0.39% / +3.66% +3.47% +2.89%] index_select const : Elapsed 0.005 ms (0.543 ms / 100) 0.519 -> 0.523 ( +0.77%) [ +5.01% +0.00% +11.37% / +2.70% +0.77% +3.28%] index_select wrap : Elapsed 0.005 ms (0.545 ms / 100) 0.522 -> 0.527 ( +0.96%) [ +6.51% +0.00% +14.94% / +3.07% +0.96% +3.07%] index_select linear : Elapsed 0.006 ms (0.556 ms / 100) 0.514 -> 0.527 ( +2.53%) [ +5.25% +0.00% +1.36% / +5.06% +2.53% +5.64%] index_select reverse : Elapsed 0.005 ms (0.541 ms / 100) 0.517 -> 0.525 ( +1.55%) [ +6.19% +0.00% +1.74% / +3.68% +1.55% +4.26%] index_select skip64 : Elapsed 0.005 ms (0.549 ms / 100) 0.519 -> 0.527 ( +1.54%) [ +5.20% +0.00% +7.71% / +17.34% +1.54% +3.28%] index_select skip256 : Elapsed 0.005 ms (0.546 ms / 100) 0.529 -> 0.521 ( -1.51%) [ +9.26% +0.00% +4.54% / +1.89% -1.51% +1.13%] index_select spread : Elapsed 0.006 ms (0.578 ms / 100) 0.523 -> 0.526 ( +0.57%) [ +3.44% +0.19% +0.00% / +2.68% +0.57% +2.29%] index_select strided 3 : Elapsed 0.005 ms (0.541 ms / 100) 0.522 -> 0.531 ( +1.72%) [ +4.41% +3.26% +0.00% / +3.07% +6.70% +1.72%] index_select strided 5 : Elapsed 0.005 ms (0.545 ms / 100) 0.522 -> 0.519 ( -0.57%) [ +3.64% +0.57% +0.00% / +2.49% -0.57% +1.72%] index_select strided 7 : Elapsed 0.005 ms (0.541 ms / 100) 0.522 -> 0.522 ( +0.00%) [ +9.77% +0.00% +0.00% / +3.26% +0.00% +2.87%] index_select strided 8 : Elapsed 0.006 ms (0.573 ms / 100) 0.520 -> 0.526 ( +1.15%) [ +5.58% +1.35% +0.00% / +3.08% +1.15% +1.92%] index_select strided 16 : Elapsed 0.005 ms (0.549 ms / 100) 0.521 -> 0.529 ( +1.54%) [ +4.03% +0.00% +5.57% / +3.07% +1.54% +4.41%] index_select strided 64 : Elapsed 0.005 ms (0.542 ms / 100) 0.529 -> 0.533 ( +0.76%) [ +2.84% +0.00% +10.78% / +5.86% +5.86% +0.76%] index_select strided 100 : Elapsed 0.005 ms (0.544 ms / 100) 0.523 -> 0.535 ( +2.29%) [ +5.16% +0.00% +8.41% / +2.29% +6.31% +2.87%] index_select strided 255 : Elapsed 0.006 ms (0.550 ms / 100) 0.523 -> 0.531 ( +1.53%) [ +4.21% +0.00% +8.99% / +1.72% +12.43% +1.53%] index_select strided 256 : Elapsed 0.005 ms (0.545 ms / 100) 0.541 -> 0.521 ( -3.70%) [ +0.92% +0.00% +2.59% / -0.74% -3.70% -1.66%] index_select strided 257 : Elapsed 0.005 ms (0.546 ms / 100) 0.519 -> 0.522 ( +0.58%) [ +3.85% +0.00% +0.58% / +2.50% +0.58% +2.70%] index_select random : Elapsed 0.005 ms (0.539 ms / 100) 0.524 -> 0.519 ( -0.95%) [ +5.73% +0.95% +0.00% / +9.54% -0.95% +1.34%] index_select random_sorted : Elapsed 0.006 ms (0.554 ms / 100) 0.521 -> 0.518 ( -0.58%) [ +3.65% +0.19% +0.00% / +16.51% -0.58% +2.69%] index_select perm : Elapsed 0.005 ms (0.540 ms / 100) 0.522 -> 0.522 ( +0.00%) [ +9.20% +0.96% +0.00% / +3.64% +0.00% +0.57%] index_select perm_sorted : Elapsed 0.006 ms (0.570 ms / 100) B = [5, 200, 1] (stride (1, 5, 5)) A = [5, 500, 1] (stride (500, 1, 500)) dim = 1 0.518 -> 0.517 ( -0.19%) [ +5.21% +5.21% +0.00% / +3.86% -0.19% +1.74%] index_select const : Elapsed 0.005 ms (0.545 ms / 100) 0.519 -> 0.528 ( +1.73%) [ +5.59% +0.00% +4.43% / +8.86% +2.50% +1.73%] index_select wrap : Elapsed 0.005 ms (0.548 ms / 100) 0.530 -> 0.520 ( -1.89%) [ +2.45% +0.00% +5.09% / +2.26% -1.89% +0.38%] index_select linear : Elapsed 0.005 ms (0.543 ms / 100) 0.525 -> 0.521 ( -0.76%) [ +2.86% +0.00% +2.67% / +2.86% -0.76% +0.76%] index_select reverse : Elapsed 0.005 ms (0.540 ms / 100) 0.522 -> 0.532 ( +1.92%) [ +3.64% +0.77% +0.00% / +2.87% +1.92% +1.92%] index_select skip64 : Elapsed 0.005 ms (0.541 ms / 100) 0.520 -> 0.521 ( +0.19%) [ +9.23% +0.77% +0.00% / +2.88% +0.19% +2.50%] index_select skip256 : Elapsed 0.006 ms (0.568 ms / 100) 0.516 -> 0.520 ( +0.78%) [ +5.81% +1.36% +0.00% / +3.29% +0.78% +2.91%] index_select spread : Elapsed 0.005 ms (0.546 ms / 100) 0.515 -> 0.521 ( +1.17%) [ +6.02% +7.18% +0.00% / +3.88% +1.17% +3.11%] index_select strided 3 : Elapsed 0.005 ms (0.546 ms / 100) 0.517 -> 0.521 ( +0.77%) [ +5.80% +0.77% +0.00% / +9.48% +0.77% +3.09%] index_select strided 5 : Elapsed 0.005 ms (0.547 ms / 100) 0.517 -> 0.524 ( +1.35%) [ +4.84% +0.00% +7.54% / +4.26% +1.35% +2.51%] index_select strided 7 : Elapsed 0.005 ms (0.542 ms / 100) 0.533 -> 0.520 ( -2.44%) [ +1.69% +0.00% +8.82% / +5.44% -2.44% -0.38%] index_select strided 8 : Elapsed 0.005 ms (0.542 ms / 100) 0.524 -> 0.530 ( +1.15%) [ +3.24% +0.00% +11.83% / +2.86% +7.06% +1.15%] index_select strided 16 : Elapsed 0.005 ms (0.541 ms / 100) 0.526 -> 0.535 ( +1.71%) [ +2.85% +0.00% +3.04% / +4.56% +10.84% +1.71%] index_select strided 64 : Elapsed 0.005 ms (0.541 ms / 100) 0.518 -> 0.526 ( +1.54%) [ +4.44% +0.00% +0.00% / +11.00% +1.54% +1.54%] index_select strided 100 : Elapsed 0.005 ms (0.541 ms / 100) 0.523 -> 0.521 ( -0.38%) [ +4.78% +0.00% +1.53% / +10.13% -0.38% +1.15%] index_select strided 255 : Elapsed 0.005 ms (0.548 ms / 100) 0.511 -> 0.530 ( +3.72%) [ +6.85% +2.54% +0.00% / +11.74% +3.72% +3.72%] index_select strided 256 : Elapsed 0.005 ms (0.546 ms / 100) 0.517 -> 0.525 ( +1.55%) [+10.25% +2.51% +0.00% / +18.38% +1.55% +3.09%] index_select strided 257 : Elapsed 0.006 ms (0.570 ms / 100) 0.513 -> 0.531 ( +3.51%) [ +5.26% +6.43% +0.00% / +5.26% +3.51% +4.48%] index_select random : Elapsed 0.005 ms (0.540 ms / 100) 0.520 -> 0.520 ( +0.00%) [+10.58% +0.19% +0.00% / +3.85% +0.00% +1.92%] index_select random_sorted : Elapsed 0.006 ms (0.575 ms / 100) 0.520 -> 0.520 ( +0.00%) [ +3.85% +0.00% +7.69% / +3.08% +0.00% +2.50%] index_select perm : Elapsed 0.005 ms (0.540 ms / 100) 0.519 -> 0.527 ( +1.54%) [ +5.20% +0.00% +10.98% / +3.66% +2.31% +1.54%] index_select perm_sorted : Elapsed 0.005 ms (0.546 ms / 100) out_shape = [5, 500, 200] in_shape = [5, 500, 1] idx_dim = 2 B = [5, 500, 200] (stride (200, 1000, 1)) A = [5, 500, 1] (stride (500, 1, 500)) dim = 2 1.103 -> 1.099 ( -0.36%) [ +0.54% +0.00% +0.09% / +0.27% -0.27% -0.36%] index_add_ linear : Elapsed 0.011 ms (1.109 ms / 100) 1.037 -> 1.035 ( -0.19%) [ +0.29% +0.29% +0.00% / +0.77% -0.19% +0.10%] index_copy_ linear : Elapsed 0.010 ms (1.040 ms / 100) 1.105 -> 1.107 ( +0.18%) [ +0.00% +0.18% +0.00% / +0.18% +0.36% +1.00%] index_add_ reverse : Elapsed 0.011 ms (1.105 ms / 100) 1.038 -> 1.038 ( +0.00%) [ +0.19% +0.00% +0.19% / +0.00% +0.39% +0.19%] index_copy_ reverse : Elapsed 0.010 ms (1.040 ms / 100) 1.096 -> 1.096 ( +0.00%) [ +0.18% +0.00% +0.46% / +0.00% +0.46% +0.36%] index_add_ spread : Elapsed 0.011 ms (1.098 ms / 100) 1.035 -> 1.036 ( +0.10%) [ +0.00% +0.39% +0.39% / +0.68% +0.39% +0.10%] index_copy_ spread : Elapsed 0.010 ms (1.035 ms / 100) 1.111 -> 1.108 ( -0.27%) [ +0.36% +0.00% +0.00% / +0.18% -0.27% +0.09%] index_add_ strided 3 : Elapsed 0.011 ms (1.115 ms / 100) 1.033 -> 1.036 ( +0.29%) [ +0.29% +0.48% +0.00% / +0.39% +0.58% +0.29%] index_copy_ strided 3 : Elapsed 0.010 ms (1.036 ms / 100) 1.101 -> 1.099 ( -0.18%) [ +0.00% +0.00% +0.09% / -0.18% +0.54% +0.54%] index_add_ strided 7 : Elapsed 0.011 ms (1.101 ms / 100) 1.040 -> 1.036 ( -0.38%) [ +0.00% +0.00% +0.19% / -0.38% -0.19% -0.10%] index_copy_ strided 7 : Elapsed 0.010 ms (1.040 ms / 100) 1.102 -> 1.098 ( -0.36%) [ +0.00% +0.09% +0.00% / +0.36% -0.09% -0.36%] index_add_ perm : Elapsed 0.011 ms (1.102 ms / 100) 1.036 -> 1.034 ( -0.19%) [ +0.19% +0.00% +0.00% / +0.48% -0.19% +0.39%] index_copy_ perm : Elapsed 0.010 ms (1.038 ms / 100) 1.102 -> 1.102 ( +0.00%) [ +0.00% +0.18% +0.36% / +0.18% +0.18% +0.00%] index_add_ perm_sorted : Elapsed 0.011 ms (1.102 ms / 100) 1.040 -> 1.037 ( -0.29%) [ +0.19% +0.10% +0.00% / -0.19% -0.29% -0.29%] index_copy_ perm_sorted : Elapsed 0.010 ms (1.042 ms / 100) 20.104 -> 20.104 ( +0.00%) [ +0.02% +0.00% +0.16% / +0.00% +0.20% +0.35%] index_select const : Elapsed 0.201 ms (20.108 ms / 100) 20.113 -> 20.127 ( +0.07%) [ +0.00% +0.22% +0.13% / +0.07% +0.13% +0.12%] index_select wrap : Elapsed 0.201 ms (20.113 ms / 100) 20.123 -> 20.136 ( +0.06%) [ +0.12% +0.00% +0.12% / +0.06% +0.19% +0.13%] index_select linear : Elapsed 0.201 ms (20.148 ms / 100) 20.124 -> 20.118 ( -0.03%) [ +0.21% +0.16% +0.00% / +0.14% -0.03% -0.02%] index_select reverse : Elapsed 0.202 ms (20.167 ms / 100) 20.108 -> 20.153 ( +0.22%) [ +0.12% +0.05% +0.00% / +0.22% +0.35% +0.38%] index_select skip64 : Elapsed 0.201 ms (20.132 ms / 100) 20.074 -> 20.114 ( +0.20%) [ +0.50% +0.21% +0.00% / +0.28% +0.52% +0.20%] index_select skip256 : Elapsed 0.202 ms (20.175 ms / 100) 20.105 -> 20.154 ( +0.24%) [ +0.21% +0.40% +0.00% / +0.30% +0.38% +0.24%] index_select spread : Elapsed 0.201 ms (20.148 ms / 100) 20.120 -> 20.117 ( -0.01%) [ +0.04% +0.20% +0.00% / +0.16% -0.01% +0.08%] index_select random : Elapsed 0.201 ms (20.128 ms / 100) 20.072 -> 20.142 ( +0.35%) [ +0.40% +0.16% +0.00% / +0.35% +0.43% +0.61%] index_select random_sorted : Elapsed 0.202 ms (20.152 ms / 100) B = [5, 500, 200] (stride (200, 1000, 1)) A = [5, 500, 1] (stride (500, 1, 2500)) dim = 2 1.103 -> 1.114 ( +1.00%) [ +0.18% +0.00% +0.00% / +1.00% +1.09% +1.18%] index_add_ linear : Elapsed 0.011 ms (1.105 ms / 100) 1.034 -> 1.041 ( +0.68%) [ +0.29% +0.10% +0.00% / +0.68% +0.77% +1.26%] index_copy_ linear : Elapsed 0.010 ms (1.037 ms / 100) 1.103 -> 1.107 ( +0.36%) [ +0.36% +0.36% +0.00% / +0.45% +0.36% +0.54%] index_add_ reverse : Elapsed 0.011 ms (1.107 ms / 100) 1.033 -> 1.038 ( +0.48%) [ +0.19% +0.00% +0.29% / +0.48% +0.87% +1.45%] index_copy_ reverse : Elapsed 0.010 ms (1.035 ms / 100) 1.104 -> 1.103 ( -0.09%) [ +0.36% +0.27% +0.00% / +0.36% +0.27% -0.09%] index_add_ spread : Elapsed 0.011 ms (1.108 ms / 100) 1.034 -> 1.043 ( +0.87%) [ +0.00% +0.29% +0.39% / +0.87% +1.45% +0.87%] index_copy_ spread : Elapsed 0.010 ms (1.034 ms / 100) 1.101 -> 1.104 ( +0.27%) [ +0.54% +0.00% +0.36% / +0.27% +1.63% +1.45%] index_add_ strided 3 : Elapsed 0.011 ms (1.107 ms / 100) 1.030 -> 1.035 ( +0.49%) [ +0.87% +0.00% +0.19% / +0.49% +1.84% +1.36%] index_copy_ strided 3 : Elapsed 0.010 ms (1.039 ms / 100) 1.095 -> 1.100 ( +0.46%) [ +0.00% +0.18% +0.09% / +0.46% +1.10% +1.28%] index_add_ strided 7 : Elapsed 0.011 ms (1.095 ms / 100) 1.033 -> 1.038 ( +0.48%) [ +0.58% +0.10% +0.00% / +0.48% +0.97% +1.16%] index_copy_ strided 7 : Elapsed 0.010 ms (1.039 ms / 100) 1.098 -> 1.105 ( +0.64%) [ +0.36% +0.27% +0.00% / +0.64% +1.37% +1.82%] index_add_ perm : Elapsed 0.011 ms (1.102 ms / 100) 1.028 -> 1.037 ( +0.88%) [ +0.19% +0.00% +0.49% / +0.88% +1.46% +1.26%] index_copy_ perm : Elapsed 0.010 ms (1.030 ms / 100) 1.089 -> 1.100 ( +1.01%) [ +0.64% +0.00% +0.46% / +1.01% +1.93% +2.11%] index_add_ perm_sorted : Elapsed 0.011 ms (1.096 ms / 100) 1.034 -> 1.041 ( +0.68%) [ +0.00% +0.10% +0.19% / +0.77% +0.68% +0.97%] index_copy_ perm_sorted : Elapsed 0.010 ms (1.034 ms / 100) 20.150 -> 20.147 ( -0.01%) [ +0.08% +0.00% +0.20% / -0.01% +0.20% +0.01%] index_select const : Elapsed 0.202 ms (20.167 ms / 100) 20.157 -> 20.170 ( +0.06%) [ +0.12% +0.10% +0.00% / +0.15% +0.06% +0.38%] index_select wrap : Elapsed 0.202 ms (20.181 ms / 100) 20.139 -> 20.141 ( +0.01%) [ +0.13% +0.17% +0.00% / +0.01% +0.27% +0.05%] index_select linear : Elapsed 0.202 ms (20.165 ms / 100) 20.149 -> 20.121 ( -0.14%) [ +0.18% +0.06% +0.00% / +0.23% +0.04% -0.14%] index_select reverse : Elapsed 0.202 ms (20.186 ms / 100) 20.142 -> 20.147 ( +0.02%) [ +0.07% +0.19% +0.00% / +0.02% +0.20% +0.04%] index_select skip64 : Elapsed 0.202 ms (20.156 ms / 100) 20.153 -> 20.147 ( -0.03%) [ +0.14% +0.00% +0.12% / +0.03% +0.28% -0.03%] index_select skip256 : Elapsed 0.202 ms (20.181 ms / 100) 20.163 -> 20.097 ( -0.33%) [ +0.10% +0.05% +0.00% / +0.17% -0.33% -0.12%] index_select spread : Elapsed 0.202 ms (20.183 ms / 100) 20.174 -> 20.129 ( -0.22%) [ +0.07% +0.05% +0.00% / -0.01% -0.17% -0.22%] index_select random : Elapsed 0.202 ms (20.189 ms / 100) 20.124 -> 20.140 ( +0.08%) [ +0.17% +0.30% +0.00% / +0.14% +0.22% +0.08%] index_select random_sorted : Elapsed 0.202 ms (20.158 ms / 100) B = [5, 500, 200] (stride (500, 1, 2500)) A = [5, 500, 1] (stride (1, 5, 5)) dim = 2 0.834 -> 0.837 ( +0.36%) [ +1.20% +0.00% +0.24% / +1.08% +0.72% +0.36%] index_add_ linear : Elapsed 0.008 ms (0.844 ms / 100) 0.822 -> 0.822 ( +0.00%) [ +0.49% +0.12% +0.00% / +0.24% +0.00% +0.12%] index_copy_ linear : Elapsed 0.008 ms (0.826 ms / 100) 0.837 -> 0.839 ( +0.24%) [ +0.60% +0.24% +0.00% / +0.24% +0.24% +0.24%] index_add_ reverse : Elapsed 0.008 ms (0.842 ms / 100) 0.818 -> 0.819 ( +0.12%) [ +0.73% +0.24% +0.00% / +1.22% +0.12% +0.37%] index_copy_ reverse : Elapsed 0.008 ms (0.824 ms / 100) 0.839 -> 0.836 ( -0.36%) [ +0.00% +0.00% +0.60% / +0.00% +0.48% -0.36%] index_add_ spread : Elapsed 0.008 ms (0.839 ms / 100) 0.825 -> 0.821 ( -0.48%) [ +0.00% +0.00% +5.45% / +0.12% -0.36% -0.48%] index_copy_ spread : Elapsed 0.008 ms (0.825 ms / 100) 0.837 -> 0.836 ( -0.12%) [ +0.00% +0.00% +2.03% / +0.12% -0.12% +0.12%] index_add_ strided 3 : Elapsed 0.008 ms (0.837 ms / 100) 0.824 -> 0.818 ( -0.73%) [ +0.00% +0.24% +18.45% / +0.61% -0.12% -0.73%] index_copy_ strided 3 : Elapsed 0.008 ms (0.824 ms / 100) 0.837 -> 0.838 ( +0.12%) [ +0.12% +0.00% +16.49% / +0.24% +0.12% +0.12%] index_add_ strided 7 : Elapsed 0.008 ms (0.838 ms / 100) 0.820 -> 0.821 ( +0.12%) [ +0.98% +0.00% +1.83% / +0.98% +0.37% +0.12%] index_copy_ strided 7 : Elapsed 0.008 ms (0.828 ms / 100) 0.831 -> 0.835 ( +0.48%) [ +0.72% +0.12% +0.00% / +0.48% +0.60% +0.96%] index_add_ perm : Elapsed 0.008 ms (0.837 ms / 100) 0.813 -> 0.821 ( +0.98%) [ +1.11% +0.00% +0.49% / +0.98% +1.35% +1.11%] index_copy_ perm : Elapsed 0.008 ms (0.822 ms / 100) 0.832 -> 0.834 ( +0.24%) [ +0.12% +0.48% +0.00% / +0.24% +0.60% +0.84%] index_add_ perm_sorted : Elapsed 0.008 ms (0.833 ms / 100) 0.812 -> 0.819 ( +0.86%) [ +0.74% +0.00% +0.49% / +1.35% +0.99% +0.86%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.818 ms / 100) 16.031 -> 16.092 ( +0.38%) [ +0.22% +0.04% +0.00% / +0.38% +0.52% +0.43%] index_select const : Elapsed 0.161 ms (16.067 ms / 100) 16.021 -> 16.074 ( +0.33%) [ +0.34% +0.00% +0.14% / +0.37% +0.46% +0.33%] index_select wrap : Elapsed 0.161 ms (16.075 ms / 100) 16.029 -> 16.074 ( +0.28%) [ +0.37% +0.00% +0.04% / +0.32% +0.28% +0.51%] index_select linear : Elapsed 0.161 ms (16.088 ms / 100) 16.006 -> 16.090 ( +0.52%) [ +0.39% +0.00% +0.31% / +0.55% +0.71% +0.52%] index_select reverse : Elapsed 0.161 ms (16.069 ms / 100) 16.034 -> 16.086 ( +0.32%) [ +0.24% +0.05% +0.00% / +0.45% +0.32% +0.41%] index_select skip64 : Elapsed 0.161 ms (16.072 ms / 100) 16.017 -> 16.103 ( +0.54%) [ +0.35% +0.00% +0.14% / +0.56% +0.58% +0.54%] index_select skip256 : Elapsed 0.161 ms (16.073 ms / 100) 16.021 -> 16.086 ( +0.41%) [ +0.44% +0.00% +0.24% / +0.41% +0.51% +0.45%] index_select spread : Elapsed 0.161 ms (16.091 ms / 100) 16.044 -> 16.085 ( +0.26%) [ +0.20% +0.00% +0.04% / +0.26% +0.35% +0.26%] index_select random : Elapsed 0.161 ms (16.076 ms / 100) 16.021 -> 16.084 ( +0.39%) [ +0.34% +0.00% +0.00% / +0.42% +0.39% +0.44%] index_select random_sorted : Elapsed 0.161 ms (16.075 ms / 100) B = [5, 500, 200] (stride (1, 5, 2500)) A = [5, 500, 1] (stride (1, 5, 1)) dim = 2 0.496 -> 0.495 ( -0.20%) [ +0.00% +4.84% +1.61% / +1.01% -0.20% +1.61%] index_add_ linear : Elapsed 0.005 ms (0.496 ms / 100) 0.499 -> 0.497 ( -0.40%) [ +0.00% +3.01% +0.00% / +0.60% +2.00% -0.40%] index_copy_ linear : Elapsed 0.005 ms (0.499 ms / 100) 0.500 -> 0.495 ( -1.00%) [ +0.40% +6.20% +0.00% / -0.20% +1.60% -1.00%] index_add_ reverse : Elapsed 0.005 ms (0.502 ms / 100) 0.498 -> 0.489 ( -1.81%) [ +0.80% +3.41% +0.00% / +20.08% -1.81% -0.40%] index_copy_ reverse : Elapsed 0.005 ms (0.502 ms / 100) 0.502 -> 0.493 ( -1.79%) [ +0.00% +1.59% +0.20% / +0.00% -1.79% -1.39%] index_add_ spread : Elapsed 0.005 ms (0.502 ms / 100) 0.501 -> 0.485 ( -3.19%) [ +0.20% +2.00% +0.00% / -1.00% -3.19% -0.80%] index_copy_ spread : Elapsed 0.005 ms (0.502 ms / 100) 0.508 -> 0.494 ( -2.76%) [ +0.00% +0.39% +0.59% / -1.38% -2.76% -0.79%] index_add_ strided 3 : Elapsed 0.005 ms (0.508 ms / 100) 0.501 -> 0.485 ( -3.19%) [ +0.00% +1.20% +0.20% / +5.99% -3.19% +0.00%] index_copy_ strided 3 : Elapsed 0.005 ms (0.501 ms / 100) 0.499 -> 0.489 ( -2.00%) [ +0.00% +2.40% +1.00% / -0.40% -2.00% +4.81%] index_add_ strided 7 : Elapsed 0.005 ms (0.499 ms / 100) 0.497 -> 0.489 ( -1.61%) [ +0.80% +1.41% +0.00% / +6.44% -1.61% -0.60%] index_copy_ strided 7 : Elapsed 0.005 ms (0.501 ms / 100) 0.501 -> 0.494 ( -1.40%) [ +0.00% +1.80% +0.60% / -0.40% -1.40% -0.40%] index_add_ perm : Elapsed 0.005 ms (0.501 ms / 100) 0.495 -> 0.487 ( -1.62%) [ +1.21% +0.00% +1.01% / +8.89% -1.62% +1.01%] index_copy_ perm : Elapsed 0.005 ms (0.501 ms / 100) 0.498 -> 0.487 ( -2.21%) [ +0.00% +2.61% +1.00% / +2.61% -2.21% +1.00%] index_add_ perm_sorted : Elapsed 0.005 ms (0.498 ms / 100) 0.499 -> 0.498 ( -0.20%) [ +0.00% +1.00% +0.20% / +5.01% +0.20% -0.20%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.499 ms / 100) 8.953 -> 8.878 ( -0.84%) [ +0.00% +0.18% +0.08% / -0.82% -0.39% -0.84%] index_select const : Elapsed 0.090 ms (8.953 ms / 100) 8.860 -> 8.883 ( +0.26%) [ +0.00% +0.50% +0.91% / +0.26% +0.76% +0.91%] index_select wrap : Elapsed 0.089 ms (8.860 ms / 100) 8.894 -> 8.833 ( -0.69%) [ +0.00% +0.52% +1.15% / +0.27% -0.66% -0.69%] index_select linear : Elapsed 0.089 ms (8.894 ms / 100) 8.985 -> 8.866 ( -1.32%) [ +0.14% +0.00% +0.06% / -0.24% -1.32% -0.75%] index_select reverse : Elapsed 0.090 ms (8.998 ms / 100) 8.863 -> 8.885 ( +0.25%) [ +0.09% +0.15% +0.00% / +0.91% +0.25% +0.50%] index_select skip64 : Elapsed 0.089 ms (8.871 ms / 100) 8.989 -> 8.870 ( -1.32%) [ +0.02% +0.60% +0.00% / -0.04% -1.32% -1.13%] index_select skip256 : Elapsed 0.090 ms (8.991 ms / 100) 8.950 -> 8.876 ( -0.83%) [ +0.74% +0.00% +0.95% / +0.29% +0.11% -0.83%] index_select spread : Elapsed 0.090 ms (9.016 ms / 100) 8.925 -> 8.859 ( -0.74%) [ +0.00% +0.16% +0.15% / -0.74% +0.57% +0.22%] index_select random : Elapsed 0.089 ms (8.925 ms / 100) 8.915 -> 8.861 ( -0.61%) [ +0.00% +0.11% +0.38% / -0.35% -0.10% -0.61%] index_select random_sorted : Elapsed 0.089 ms (8.915 ms / 100) out_shape = [200, 1, 5] in_shape = [500, 1, 5] idx_dim = 0 B = [200, 1, 5] (stride (1, 1000, 200)) A = [500, 1, 5] (stride (5, 1, 1)) dim = 0 0.533 -> 0.522 ( -2.06%) [ +4.50% +0.00% +3.56% / +2.25% -2.06% -0.94%] index_select const : Elapsed 0.006 ms (0.557 ms / 100) 0.526 -> 0.521 ( -0.95%) [ +4.18% +0.00% +2.47% / +1.52% -0.95% +0.38%] index_select wrap : Elapsed 0.005 ms (0.548 ms / 100) 0.525 -> 0.522 ( -0.57%) [ +4.00% +0.95% +0.00% / +1.52% -0.57% +1.14%] index_select linear : Elapsed 0.005 ms (0.546 ms / 100) 0.523 -> 0.523 ( +0.00%) [ +4.78% +0.19% +0.00% / +2.49% +0.57% +0.00%] index_select reverse : Elapsed 0.005 ms (0.548 ms / 100) 0.520 -> 0.524 ( +0.77%) [ +4.81% +2.69% +0.00% / +3.27% +0.77% +8.65%] index_select skip64 : Elapsed 0.005 ms (0.545 ms / 100) 0.528 -> 0.523 ( -0.95%) [ +8.90% +0.00% +0.00% / +2.27% -0.95% +6.06%] index_select skip256 : Elapsed 0.006 ms (0.575 ms / 100) 0.522 -> 0.524 ( +0.38%) [ +4.02% +0.00% +1.15% / +2.68% +0.38% +15.33%] index_select spread : Elapsed 0.005 ms (0.543 ms / 100) 0.525 -> 0.533 ( +1.52%) [ +4.19% +0.00% +0.38% / +2.48% +1.52% +7.43%] index_select strided 3 : Elapsed 0.005 ms (0.547 ms / 100) bad 0.524 -> 0.565 ( +7.82%) [ +4.20% +0.19% +0.00% / +10.50% +7.82% +21.56%] index_select strided 5 : Elapsed 0.005 ms (0.546 ms / 100) bad 0.525 -> 0.558 ( +6.29%) [ +4.19% +0.38% +0.00% / +6.29% +15.05% +8.95%] index_select strided 7 : Elapsed 0.005 ms (0.547 ms / 100) 0.520 -> 0.525 ( +0.96%) [ +4.62% +1.15% +0.00% / +3.27% +0.96% +1.35%] index_select strided 8 : Elapsed 0.005 ms (0.544 ms / 100) 0.523 -> 0.529 ( +1.15%) [ +4.78% +0.00% +0.96% / +2.10% +1.91% +1.15%] index_select strided 16 : Elapsed 0.005 ms (0.548 ms / 100) 0.520 -> 0.526 ( +1.15%) [ +5.19% +0.00% +0.38% / +2.50% +1.15% +2.31%] index_select strided 64 : Elapsed 0.005 ms (0.547 ms / 100) 0.515 -> 0.522 ( +1.36%) [ +6.02% +0.00% +6.60% / +4.08% +1.36% +2.72%] index_select strided 100 : Elapsed 0.005 ms (0.546 ms / 100) 0.516 -> 0.517 ( +0.19%) [ +7.75% +1.16% +0.00% / +6.59% +0.19% +3.10%] index_select strided 255 : Elapsed 0.006 ms (0.556 ms / 100) 0.521 -> 0.522 ( +0.19%) [ +5.18% +1.15% +0.00% / +9.02% +0.19% +1.73%] index_select strided 256 : Elapsed 0.005 ms (0.548 ms / 100) 0.520 -> 0.521 ( +0.19%) [ +6.73% +0.00% +0.96% / +25.77% +0.19% +3.08%] index_select strided 257 : Elapsed 0.006 ms (0.555 ms / 100) 0.520 -> 0.527 ( +1.35%) [ +3.85% +0.38% +0.00% / +4.04% +1.35% +1.54%] index_select random : Elapsed 0.005 ms (0.540 ms / 100) 0.520 -> 0.522 ( +0.38%) [ +4.62% +0.96% +0.00% / +3.27% +0.38% +0.58%] index_select random_sorted : Elapsed 0.005 ms (0.544 ms / 100) 0.524 -> 0.520 ( -0.76%) [ +9.16% +0.00% +7.82% / +2.67% -0.76% +0.76%] index_select perm : Elapsed 0.006 ms (0.572 ms / 100) 0.532 -> 0.524 ( -1.50%) [ +2.82% +0.00% +3.76% / +0.19% -1.50% -1.32%] index_select perm_sorted : Elapsed 0.005 ms (0.547 ms / 100) B = [200, 1, 5] (stride (1, 1000, 200)) A = [500, 1, 5] (stride (1, 2500, 500)) dim = 0 0.518 -> 0.531 ( +2.51%) [ +5.41% +3.47% +0.00% / +2.90% +2.51% +2.90%] index_select const : Elapsed 0.005 ms (0.546 ms / 100) 0.519 -> 0.518 ( -0.19%) [+12.72% +0.77% +0.00% / +2.70% -0.19% +1.54%] index_select wrap : Elapsed 0.006 ms (0.585 ms / 100) 0.521 -> 0.529 ( +1.54%) [ +3.84% +0.00% +8.45% / +5.95% +1.54% +1.92%] index_select linear : Elapsed 0.005 ms (0.541 ms / 100) 0.525 -> 0.530 ( +0.95%) [ +7.81% +0.00% +7.05% / +0.95% +0.95% +0.95%] index_select reverse : Elapsed 0.006 ms (0.566 ms / 100) 0.523 -> 0.520 ( -0.57%) [ +4.40% +0.19% +0.00% / +1.91% -0.57% +1.34%] index_select skip64 : Elapsed 0.005 ms (0.546 ms / 100) 0.517 -> 0.522 ( +0.97%) [ +6.00% +1.16% +0.00% / +3.29% +0.97% +11.03%] index_select skip256 : Elapsed 0.005 ms (0.548 ms / 100) 0.517 -> 0.528 ( +2.13%) [ +5.42% +0.77% +0.00% / +7.54% +2.13% +17.99%] index_select spread : Elapsed 0.005 ms (0.545 ms / 100) 0.516 -> 0.535 ( +3.68%) [ +6.20% +1.16% +0.00% / +3.68% +14.73% +21.51%] index_select strided 3 : Elapsed 0.005 ms (0.548 ms / 100) 0.520 -> 0.524 ( +0.77%) [+12.31% +1.73% +0.00% / +12.31% +0.77% +1.35%] index_select strided 5 : Elapsed 0.006 ms (0.584 ms / 100) 0.520 -> 0.524 ( +0.77%) [ +9.04% +0.00% +1.54% / +8.08% +0.77% +7.50%] index_select strided 7 : Elapsed 0.006 ms (0.567 ms / 100) 0.519 -> 0.524 ( +0.96%) [ +5.39% +1.93% +0.00% / +9.63% +0.96% +2.12%] index_select strided 8 : Elapsed 0.005 ms (0.547 ms / 100) 0.520 -> 0.523 ( +0.58%) [ +5.58% +0.96% +0.00% / +9.42% +0.58% +1.92%] index_select strided 16 : Elapsed 0.005 ms (0.549 ms / 100) 0.518 -> 0.521 ( +0.58%) [ +5.79% +1.16% +0.00% / +9.27% +0.58% +2.32%] index_select strided 64 : Elapsed 0.005 ms (0.548 ms / 100) 0.519 -> 0.523 ( +0.77%) [+11.56% +3.66% +0.00% / +2.31% +0.77% +2.89%] index_select strided 100 : Elapsed 0.006 ms (0.579 ms / 100) 0.524 -> 0.529 ( +0.95%) [ +2.67% +0.76% +0.00% / +17.94% +1.53% +0.95%] index_select strided 255 : Elapsed 0.005 ms (0.538 ms / 100) 0.521 -> 0.529 ( +1.54%) [ +4.80% +0.77% +0.00% / +3.26% +1.54% +1.73%] index_select strided 256 : Elapsed 0.005 ms (0.546 ms / 100) 0.518 -> 0.521 ( +0.58%) [ +6.37% +0.97% +0.00% / +5.60% +0.58% +11.00%] index_select strided 257 : Elapsed 0.006 ms (0.551 ms / 100) 0.526 -> 0.520 ( -1.14%) [ +2.28% +0.00% +1.33% / +1.71% -1.14% +0.38%] index_select random : Elapsed 0.005 ms (0.538 ms / 100) 0.524 -> 0.521 ( -0.57%) [ +4.20% +0.00% +6.87% / +2.48% -0.57% +2.10%] index_select random_sorted : Elapsed 0.005 ms (0.546 ms / 100) 0.528 -> 0.523 ( -0.95%) [ +2.27% +0.00% +3.03% / +0.95% +1.70% -0.95%] index_select perm : Elapsed 0.005 ms (0.540 ms / 100) 0.521 -> 0.520 ( -0.19%) [ +9.79% +1.15% +0.00% / +3.45% -0.19% -0.19%] index_select perm_sorted : Elapsed 0.006 ms (0.572 ms / 100) B = [200, 1, 5] (stride (1, 1, 200)) dim = 0 fill_cnt = 500 0.435 -> 0.433 ( -0.46%) [ +8.28% +2.76% +0.00% / +4.37% -0.46% +0.00%] index_fill_ const : Elapsed 0.005 ms (0.471 ms / 100) 0.436 -> 0.436 ( +0.00%) [ +5.05% +1.61% +0.00% / +0.92% +0.00% +0.92%] index_fill_ linear : Elapsed 0.005 ms (0.458 ms / 100) 0.443 -> 0.441 ( -0.45%) [ +0.23% +0.23% +0.00% / +0.45% +3.16% -0.45%] index_fill_ reverse : Elapsed 0.004 ms (0.444 ms / 100) 0.437 -> 0.432 ( -1.14%) [ +2.06% +1.14% +0.00% / +1.37% -1.14% +9.84%] index_fill_ skip64 : Elapsed 0.004 ms (0.446 ms / 100) 0.435 -> 0.440 ( +1.15%) [ +2.07% +0.92% +0.00% / +1.15% +4.83% +8.97%] index_fill_ skip256 : Elapsed 0.004 ms (0.444 ms / 100) 0.433 -> 0.437 ( +0.92%) [ +5.54% +3.93% +0.00% / +1.62% +12.01% +0.92%] index_fill_ spread : Elapsed 0.005 ms (0.457 ms / 100) 0.438 -> 0.437 ( -0.23%) [ +1.83% +1.14% +0.00% / +0.68% +0.46% -0.23%] index_fill_ strided 3 : Elapsed 0.004 ms (0.446 ms / 100) 0.437 -> 0.434 ( -0.69%) [ +2.06% +1.83% +0.00% / +1.14% -0.69% -0.23%] index_fill_ strided 5 : Elapsed 0.004 ms (0.446 ms / 100) 0.436 -> 0.431 ( -1.15%) [ +7.11% +1.83% +0.00% / +7.80% -1.15% -0.23%] index_fill_ strided 7 : Elapsed 0.005 ms (0.467 ms / 100) 0.445 -> 0.435 ( -2.25%) [ +0.00% +8.54% +4.49% / +10.34% -2.25% +0.45%] index_fill_ strided 8 : Elapsed 0.004 ms (0.445 ms / 100) 0.442 -> 0.435 ( -1.58%) [ +0.00% +6.33% +1.13% / -0.90% -1.58% -0.23%] index_fill_ strided 16 : Elapsed 0.004 ms (0.442 ms / 100) 0.433 -> 0.437 ( +0.92%) [ +2.31% +1.85% +0.00% / +1.85% +0.92% +2.31%] index_fill_ strided 64 : Elapsed 0.004 ms (0.443 ms / 100) 0.430 -> 0.435 ( +1.16%) [ +5.58% +2.56% +0.00% / +3.49% +1.16% +1.40%] index_fill_ strided 100 : Elapsed 0.005 ms (0.454 ms / 100) 0.433 -> 0.433 ( +0.00%) [ +7.39% +4.16% +0.00% / +1.15% +0.69% +0.00%] index_fill_ random : Elapsed 0.005 ms (0.465 ms / 100) 0.443 -> 0.434 ( -2.03%) [ +0.45% +0.00% +4.97% / -1.35% -2.03% -1.13%] index_fill_ random_sorted : Elapsed 0.004 ms (0.445 ms / 100) out_shape = [500, 200, 5] in_shape = [500, 1, 5] idx_dim = 1 B = [500, 200, 5] (stride (1000, 1, 200)) A = [500, 1, 5] (stride (1, 1, 500)) dim = 1 0.520 -> 0.524 ( +0.77%) [ +3.08% +0.00% +4.04% / +5.77% +0.77% +1.54%] index_add_ linear : Elapsed 0.005 ms (0.536 ms / 100) 0.494 -> 0.492 ( -0.40%) [ +4.86% +11.54% +0.00% / +5.06% +6.68% -0.40%] index_copy_ linear : Elapsed 0.005 ms (0.518 ms / 100) 0.523 -> 0.522 ( -0.19%) [ +1.15% +0.76% +0.00% / +8.22% -0.19% +0.19%] index_add_ reverse : Elapsed 0.005 ms (0.529 ms / 100) 0.495 -> 0.486 ( -1.82%) [ +1.62% +1.41% +0.00% / +13.74% -1.82% +0.40%] index_copy_ reverse : Elapsed 0.005 ms (0.503 ms / 100) 0.525 -> 0.524 ( -0.19%) [ +0.00% +2.86% +0.19% / +0.76% +2.29% -0.19%] index_add_ spread : Elapsed 0.005 ms (0.525 ms / 100) 0.496 -> 0.491 ( -1.01%) [ +2.62% +1.01% +0.00% / +21.37% -1.01% -0.20%] index_copy_ spread : Elapsed 0.005 ms (0.509 ms / 100) 0.527 -> 0.525 ( -0.38%) [ +5.31% +0.38% +0.00% / +2.66% -0.38% +0.76%] index_add_ strided 3 : Elapsed 0.006 ms (0.555 ms / 100) 0.496 -> 0.506 ( +2.02%) [ +2.62% +6.45% +0.00% / +7.46% +2.02% +4.03%] index_copy_ strided 3 : Elapsed 0.005 ms (0.509 ms / 100) 0.527 -> 0.521 ( -1.14%) [ +1.52% +0.19% +0.00% / -0.38% -1.14% -0.95%] index_add_ strided 7 : Elapsed 0.005 ms (0.535 ms / 100) 0.492 -> 0.489 ( -0.61%) [ +2.64% +1.22% +0.00% / +14.43% -0.61% +0.20%] index_copy_ strided 7 : Elapsed 0.005 ms (0.505 ms / 100) 0.513 -> 0.520 ( +1.36%) [ +0.39% +0.00% +0.58% / +2.14% +1.75% +1.36%] index_add_ perm : Elapsed 0.005 ms (0.515 ms / 100) 0.494 -> 0.494 ( +0.00%) [ +1.42% +3.04% +0.00% / +1.42% +0.00% +1.21%] index_copy_ perm : Elapsed 0.005 ms (0.501 ms / 100) 0.517 -> 0.531 ( +2.71%) [ +5.80% +1.74% +0.00% / +23.21% +2.71% +5.42%] index_add_ perm_sorted : Elapsed 0.005 ms (0.547 ms / 100) 0.494 -> 0.491 ( -0.61%) [ +1.42% +3.04% +0.00% / +4.45% -0.61% +4.45%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.501 ms / 100) GOOD 11.067 -> 8.163 (-26.24%) [ +0.00% +0.17% +0.21% / -26.24% -25.88% -25.84%] index_select const : Elapsed 0.111 ms (11.067 ms / 100) GOOD 11.069 -> 8.024 (-27.51%) [ +0.21% +0.00% +0.17% / -27.51% -26.37% -26.34%] index_select wrap : Elapsed 0.111 ms (11.092 ms / 100) GOOD 11.052 -> 8.152 (-26.24%) [ +0.00% +0.52% +0.13% / -26.24% -25.95% -25.81%] index_select linear : Elapsed 0.111 ms (11.052 ms / 100) GOOD 10.989 -> 8.137 (-25.95%) [ +0.35% +0.00% +0.36% / -24.79% -25.95% -25.90%] index_select reverse : Elapsed 0.110 ms (11.027 ms / 100) GOOD 10.930 -> 8.060 (-26.26%) [ +0.51% +0.00% +0.34% / -24.97% -26.08% -26.26%] index_select skip64 : Elapsed 0.110 ms (10.986 ms / 100) GOOD 11.060 -> 8.062 (-27.11%) [ +0.00% +0.18% +0.47% / -27.11% -25.87% -25.92%] index_select skip256 : Elapsed 0.111 ms (11.060 ms / 100) GOOD 11.028 -> 8.099 (-26.56%) [ +0.01% +0.00% +0.34% / -26.56% -25.74% -25.83%] index_select spread : Elapsed 0.110 ms (11.029 ms / 100) GOOD 10.966 -> 8.037 (-26.71%) [ +0.16% +0.00% +0.15% / -25.38% -26.71% -26.66%] index_select random : Elapsed 0.110 ms (10.983 ms / 100) GOOD 11.011 -> 8.141 (-26.06%) [ +0.00% +0.45% +0.24% / -25.41% -26.06% -25.96%] index_select random_sorted : Elapsed 0.110 ms (11.011 ms / 100) B = [500, 200, 5] (stride (1000, 1, 200)) A = [500, 1, 5] (stride (1, 500, 500)) dim = 1 0.535 -> 0.526 ( -1.68%) [ +5.05% +2.06% +0.00% / +3.36% -0.37% -1.68%] index_add_ linear : Elapsed 0.006 ms (0.562 ms / 100) 0.501 -> 0.487 ( -2.79%) [ +1.20% +0.00% +0.40% / +4.59% -2.79% -1.20%] index_copy_ linear : Elapsed 0.005 ms (0.507 ms / 100) 0.526 -> 0.531 ( +0.95%) [ +0.00% +4.18% +0.38% / +3.99% +0.95% +0.95%] index_add_ reverse : Elapsed 0.005 ms (0.526 ms / 100) 0.511 -> 0.492 ( -3.72%) [ +0.98% +0.00% +3.13% / -1.76% -3.72% -2.54%] index_copy_ reverse : Elapsed 0.005 ms (0.516 ms / 100) 0.527 -> 0.530 ( +0.57%) [ +0.19% +2.09% +0.00% / +0.76% +0.57% +6.07%] index_add_ spread : Elapsed 0.005 ms (0.528 ms / 100) 0.499 -> 0.499 ( +0.00%) [ +2.61% +9.42% +0.00% / +0.00% +2.20% +1.60%] index_copy_ spread : Elapsed 0.005 ms (0.512 ms / 100) 0.529 -> 0.540 ( +2.08%) [ +5.86% +0.00% +0.19% / +5.48% +2.46% +2.08%] index_add_ strided 3 : Elapsed 0.006 ms (0.560 ms / 100) 0.499 -> 0.505 ( +1.20%) [ +2.20% +1.20% +0.00% / +18.04% +1.20% +1.80%] index_copy_ strided 3 : Elapsed 0.005 ms (0.510 ms / 100) 0.526 -> 0.528 ( +0.38%) [ +1.71% +0.00% +0.57% / +0.38% +3.23% +3.04%] index_add_ strided 7 : Elapsed 0.005 ms (0.535 ms / 100) 0.502 -> 0.500 ( -0.40%) [ +0.00% +5.18% +5.38% / -0.40% +0.40% +0.60%] index_copy_ strided 7 : Elapsed 0.005 ms (0.502 ms / 100) 0.523 -> 0.498 ( -4.78%) [ +0.76% +0.00% +0.19% / +0.76% -4.78% -0.38%] index_add_ perm : Elapsed 0.005 ms (0.527 ms / 100) 0.492 -> 0.490 ( -0.41%) [ +3.05% +2.85% +0.00% / +1.83% -0.41% +1.63%] index_copy_ perm : Elapsed 0.005 ms (0.507 ms / 100) 0.526 -> 0.500 ( -4.94%) [ +5.13% +0.00% +0.19% / +0.00% -4.94% -4.37%] index_add_ perm_sorted : Elapsed 0.006 ms (0.553 ms / 100) 0.505 -> 0.484 ( -4.16%) [ +0.00% +1.58% +1.19% / +0.59% -4.16% -0.40%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.505 ms / 100) GOOD 11.047 -> 8.033 (-27.28%) [ +0.69% +0.76% +0.00% / -27.28% -26.05% -25.89%] index_select const : Elapsed 0.111 ms (11.123 ms / 100) GOOD 11.038 -> 8.020 (-27.34%) [ +0.00% +0.24% +0.48% / -26.46% -27.34% -27.26%] index_select wrap : Elapsed 0.110 ms (11.038 ms / 100) GOOD 11.076 -> 8.025 (-27.55%) [ +0.87% +0.41% +0.00% / -27.55% -26.73% -26.55%] index_select linear : Elapsed 0.112 ms (11.172 ms / 100) GOOD 10.971 -> 8.165 (-25.58%) [ +0.46% +0.00% +0.38% / -24.00% -25.50% -25.58%] index_select reverse : Elapsed 0.110 ms (11.022 ms / 100) GOOD 11.059 -> 8.133 (-26.46%) [ +0.18% +0.28% +0.00% / -26.46% -25.03% -24.88%] index_select skip64 : Elapsed 0.111 ms (11.079 ms / 100) GOOD 11.113 -> 8.130 (-26.84%) [ +0.04% +0.00% +0.84% / -26.84% -25.30% -25.19%] index_select skip256 : Elapsed 0.111 ms (11.118 ms / 100) GOOD 11.046 -> 8.146 (-26.25%) [ +0.23% +0.00% +0.72% / -24.64% -26.25% -26.15%] index_select spread : Elapsed 0.111 ms (11.071 ms / 100) GOOD 10.981 -> 8.043 (-26.76%) [ +0.99% +1.76% +0.00% / -26.76% -25.30% -25.44%] index_select random : Elapsed 0.111 ms (11.090 ms / 100) GOOD 11.046 -> 8.038 (-27.23%) [ +0.00% +0.24% +0.49% / -25.95% -27.23% -27.18%] index_select random_sorted : Elapsed 0.110 ms (11.046 ms / 100) B = [500, 200, 5] (stride (200, 1, 100000)) A = [500, 1, 5] (stride (5, 1, 1)) dim = 1 0.518 -> 0.535 ( +3.28%) [ +3.67% +0.00% +4.25% / +11.20% +3.28% +7.72%] index_add_ linear : Elapsed 0.005 ms (0.537 ms / 100) 0.503 -> 0.489 ( -2.78%) [ +1.79% +2.78% +0.00% / +7.95% -2.78% -1.59%] index_copy_ linear : Elapsed 0.005 ms (0.512 ms / 100) 0.526 -> 0.520 ( -1.14%) [ +0.00% +3.61% +0.38% / +0.76% -0.76% -1.14%] index_add_ reverse : Elapsed 0.005 ms (0.526 ms / 100) 0.497 -> 0.485 ( -2.41%) [ +0.60% +1.01% +0.00% / +5.84% -2.41% +0.20%] index_copy_ reverse : Elapsed 0.005 ms (0.500 ms / 100) 0.524 -> 0.521 ( -0.57%) [ +0.57% +0.00% +0.19% / +7.44% +0.38% -0.57%] index_add_ spread : Elapsed 0.005 ms (0.527 ms / 100) 0.496 -> 0.489 ( -1.41%) [ +1.21% +2.22% +0.00% / +1.41% -1.41% +0.40%] index_copy_ spread : Elapsed 0.005 ms (0.502 ms / 100) 0.524 -> 0.521 ( -0.57%) [ +0.76% +0.38% +0.00% / +4.58% -0.57% +0.00%] index_add_ strided 3 : Elapsed 0.005 ms (0.528 ms / 100) 0.494 -> 0.490 ( -0.81%) [ +1.42% +1.42% +0.00% / +2.83% -0.81% -0.40%] index_copy_ strided 3 : Elapsed 0.005 ms (0.501 ms / 100) 0.523 -> 0.520 ( -0.57%) [ +0.96% +0.38% +0.00% / +0.38% +0.19% -0.57%] index_add_ strided 7 : Elapsed 0.005 ms (0.528 ms / 100) 0.493 -> 0.485 ( -1.62%) [ +1.62% +8.52% +0.00% / +8.52% -1.62% +0.81%] index_copy_ strided 7 : Elapsed 0.005 ms (0.501 ms / 100) 0.516 -> 0.517 ( +0.19%) [ +0.00% +0.19% +1.94% / +0.19% +1.36% +1.74%] index_add_ perm : Elapsed 0.005 ms (0.516 ms / 100) 0.494 -> 0.499 ( +1.01%) [ +4.25% +7.29% +0.00% / +4.05% +3.04% +1.01%] index_copy_ perm : Elapsed 0.005 ms (0.515 ms / 100) 0.508 -> 0.513 ( +0.98%) [ +1.97% +5.12% +0.00% / +0.98% +4.72% +3.15%] index_add_ perm_sorted : Elapsed 0.005 ms (0.518 ms / 100) 0.497 -> 0.494 ( -0.60%) [ +4.02% +4.43% +0.00% / +6.64% -0.40% -0.60%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.517 ms / 100) GOOD 10.485 -> 8.181 (-21.97%) [ +0.36% +0.00% +0.15% / -21.97% -21.09% -21.00%] index_select const : Elapsed 0.105 ms (10.523 ms / 100) GOOD 10.386 -> 8.066 (-22.34%) [ +0.00% +0.67% +0.37% / -22.34% -21.00% -21.13%] index_select wrap : Elapsed 0.104 ms (10.386 ms / 100) GOOD 10.454 -> 8.188 (-21.68%) [ +0.00% +0.26% +0.57% / -21.68% -20.81% -21.02%] index_select linear : Elapsed 0.105 ms (10.454 ms / 100) GOOD 10.411 -> 8.223 (-21.02%) [ +0.11% +0.86% +0.00% / -20.39% -21.02% -20.99%] index_select reverse : Elapsed 0.104 ms (10.422 ms / 100) GOOD 10.309 -> 8.170 (-20.75%) [ +0.00% +0.14% +0.49% / -20.33% -20.72% -20.75%] index_select skip64 : Elapsed 0.103 ms (10.309 ms / 100) GOOD 10.449 -> 8.121 (-22.28%) [ +0.22% +0.00% +0.37% / -22.28% -20.75% -20.75%] index_select skip256 : Elapsed 0.105 ms (10.472 ms / 100) GOOD 10.441 -> 8.159 (-21.86%) [ +0.00% +0.43% +0.64% / -21.86% -20.98% -20.93%] index_select spread : Elapsed 0.104 ms (10.441 ms / 100) GOOD 10.364 -> 8.126 (-21.59%) [ +0.29% +0.36% +0.00% / -20.56% -21.55% -21.59%] index_select random : Elapsed 0.104 ms (10.394 ms / 100) GOOD 10.369 -> 8.223 (-20.70%) [ +0.00% +1.17% +0.79% / -20.43% -20.64% -20.70%] index_select random_sorted : Elapsed 0.104 ms (10.369 ms / 100) out_shape = [500, 1, 200] in_shape = [500, 1, 5] idx_dim = 2 B = [500, 1, 200] (stride (200, 200, 1)) A = [500, 1, 5] (stride (5, 5, 1)) dim = 2 0.575 -> 0.577 ( +0.35%) [ +1.22% +0.35% +0.00% / +1.04% +0.35% +0.52%] index_add_ linear : Elapsed 0.006 ms (0.582 ms / 100) 0.591 -> 0.595 ( +0.68%) [ +1.18% +0.00% +0.00% / +1.18% +0.68% +1.18%] index_copy_ linear : Elapsed 0.006 ms (0.598 ms / 100) 0.574 -> 0.578 ( +0.70%) [ +2.09% +0.17% +0.00% / +1.39% +0.70% +0.70%] index_add_ reverse : Elapsed 0.006 ms (0.586 ms / 100) 0.588 -> 0.595 ( +1.19%) [ +1.19% +0.00% +0.17% / +1.36% +1.19% +1.70%] index_copy_ reverse : Elapsed 0.006 ms (0.595 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +1.04% +0.17% +0.00% / +1.57% +0.70% +0.70%] index_add_ spread : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.593 ( +0.68%) [ +1.19% +0.00% +0.17% / +1.36% +1.53% +0.68%] index_copy_ spread : Elapsed 0.006 ms (0.596 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +1.04% +0.00% +0.00% / +0.87% +0.87% +0.70%] index_add_ strided 3 : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.597 ( +1.36%) [ +1.70% +0.00% +0.34% / +1.53% +1.53% +1.36%] index_copy_ strided 3 : Elapsed 0.006 ms (0.599 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +0.87% +0.00% +0.17% / +0.87% +0.87% +0.70%] index_add_ strided 7 : Elapsed 0.006 ms (0.580 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +1.19% +0.00% +0.17% / +1.02% +1.02% +1.02%] index_copy_ strided 7 : Elapsed 0.006 ms (0.596 ms / 100) 0.576 -> 0.579 ( +0.52%) [ +0.69% +0.00% +0.69% / +0.52% +0.52% +0.52%] index_add_ perm : Elapsed 0.006 ms (0.580 ms / 100) 0.588 -> 0.596 ( +1.36%) [ +1.53% +0.00% +0.17% / +1.53% +1.53% +1.36%] index_copy_ perm : Elapsed 0.006 ms (0.597 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +1.04% +0.00% +0.00% / +0.87% +0.87% +0.87%] index_add_ perm_sorted : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +1.19% +0.17% +0.00% / +1.36% +1.53% +1.02%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.596 ms / 100) good 5.128 -> 4.780 ( -6.79%) [ +0.47% +0.00% +0.33% / -6.45% -6.53% -6.79%] index_select const : Elapsed 0.052 ms (5.152 ms / 100) good 5.155 -> 4.802 ( -6.85%) [ +0.00% +0.04% +0.08% / -6.69% -6.85% -6.81%] index_select wrap : Elapsed 0.052 ms (5.155 ms / 100) good 5.151 -> 4.802 ( -6.78%) [ +0.02% +0.00% +0.10% / -6.78% -6.78% -6.78%] index_select linear : Elapsed 0.052 ms (5.152 ms / 100) good 5.157 -> 4.786 ( -7.19%) [ +0.02% +0.04% +0.00% / -7.19% -6.86% -6.67%] index_select reverse : Elapsed 0.052 ms (5.158 ms / 100) good 5.141 -> 4.781 ( -7.00%) [ +0.00% +0.00% +0.08% / -6.50% -7.00% -6.81%] index_select skip64 : Elapsed 0.051 ms (5.141 ms / 100) good 5.156 -> 4.791 ( -7.08%) [ +0.23% +0.12% +0.00% / -7.08% -6.89% -6.98%] index_select skip256 : Elapsed 0.052 ms (5.168 ms / 100) good 5.153 -> 4.783 ( -7.18%) [ +0.08% +0.00% +0.08% / -6.97% -7.01% -7.18%] index_select spread : Elapsed 0.052 ms (5.157 ms / 100) good 5.167 -> 4.785 ( -7.39%) [ +0.33% +0.00% +0.04% / -7.39% -7.06% -7.24%] index_select strided 3 : Elapsed 0.052 ms (5.184 ms / 100) good 5.181 -> 4.802 ( -7.32%) [ +0.00% +0.23% +0.00% / -7.18% -7.32% -7.32%] index_select random : Elapsed 0.052 ms (5.181 ms / 100) good 5.149 -> 4.787 ( -7.03%) [ +0.37% +0.00% +0.19% / -7.03% -6.56% -6.76%] index_select random_sorted : Elapsed 0.052 ms (5.168 ms / 100) B = [500, 1, 200] (stride (200, 1, 1)) A = [500, 1, 5] (stride (1, 2500, 500)) dim = 2 0.622 -> 0.628 ( +0.96%) [ +0.96% +0.00% +0.00% / +0.96% +0.96% +0.96%] index_add_ linear : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.643 ( +0.94%) [ +1.26% +0.16% +0.00% / +1.26% +0.94% +0.94%] index_copy_ linear : Elapsed 0.006 ms (0.645 ms / 100) 0.622 -> 0.627 ( +0.80%) [ +0.96% +0.00% +0.16% / +0.96% +0.80% +0.96%] index_add_ reverse : Elapsed 0.006 ms (0.628 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +1.10% +0.00% +0.31% / +0.94% +0.78% +0.78%] index_copy_ reverse : Elapsed 0.006 ms (0.645 ms / 100) 0.622 -> 0.627 ( +0.80%) [ +1.13% +0.00% +0.16% / +0.96% +0.96% +0.80%] index_add_ spread : Elapsed 0.006 ms (0.629 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +1.10% +0.00% +0.00% / +0.94% +0.78% +0.78%] index_copy_ spread : Elapsed 0.006 ms (0.645 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +0.96% +0.00% +0.00% / +0.96% +1.29% +0.96%] index_add_ strided 3 : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.643 ( +0.94%) [ +1.10% +0.00% +0.16% / +0.94% +1.10% +0.94%] index_copy_ strided 3 : Elapsed 0.006 ms (0.644 ms / 100) 0.622 -> 0.627 ( +0.80%) [ +1.13% +0.16% +0.00% / +0.80% +0.96% +0.96%] index_add_ strided 7 : Elapsed 0.006 ms (0.629 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +0.78% +0.16% +0.00% / +0.78% +0.78% +0.78%] index_copy_ strided 7 : Elapsed 0.006 ms (0.643 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +1.13% +0.00% +0.16% / +0.96% +1.13% +1.13%] index_add_ perm : Elapsed 0.006 ms (0.629 ms / 100) 0.637 -> 0.643 ( +0.94%) [ +0.78% +0.31% +0.00% / +1.10% +1.10% +0.94%] index_copy_ perm : Elapsed 0.006 ms (0.642 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +1.13% +0.16% +0.00% / +1.13% +0.96% +0.96%] index_add_ perm_sorted : Elapsed 0.006 ms (0.629 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +0.78% +0.16% +0.00% / +0.78% +0.78% +0.78%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.643 ms / 100) 5.366 -> 5.222 ( -2.68%) [ +0.11% +0.07% +0.00% / -2.68% -1.86% -1.88%] index_select const : Elapsed 0.054 ms (5.372 ms / 100) 5.356 -> 5.255 ( -1.89%) [ +0.09% +0.00% +0.24% / -1.89% -1.46% -1.53%] index_select wrap : Elapsed 0.054 ms (5.361 ms / 100) 5.374 -> 5.248 ( -2.34%) [ +0.06% +0.00% +0.09% / -2.34% -2.27% -2.14%] index_select linear : Elapsed 0.054 ms (5.377 ms / 100) 5.370 -> 5.251 ( -2.22%) [ +0.00% +0.07% +0.13% / -2.22% -1.86% -1.84%] index_select reverse : Elapsed 0.054 ms (5.370 ms / 100) 5.367 -> 5.202 ( -3.07%) [ +0.13% +0.02% +0.00% / -3.07% -2.61% -2.52%] index_select skip64 : Elapsed 0.054 ms (5.374 ms / 100) 5.372 -> 5.204 ( -3.13%) [ +0.00% +0.07% +0.11% / -3.13% -2.46% -2.55%] index_select skip256 : Elapsed 0.054 ms (5.372 ms / 100) 5.376 -> 5.247 ( -2.40%) [ +0.09% +0.00% +0.07% / -2.40% -1.97% -1.90%] index_select spread : Elapsed 0.054 ms (5.381 ms / 100) 5.368 -> 5.259 ( -2.03%) [ +0.06% +0.00% +0.20% / -2.03% -1.47% -1.73%] index_select strided 3 : Elapsed 0.054 ms (5.371 ms / 100) 5.395 -> 5.270 ( -2.32%) [ +0.24% +0.00% +0.15% / -2.11% -2.04% -2.32%] index_select random : Elapsed 0.054 ms (5.408 ms / 100) 5.382 -> 5.257 ( -2.32%) [ +0.19% +0.00% +0.41% / -2.32% -1.73% -1.52%] index_select random_sorted : Elapsed 0.054 ms (5.392 ms / 100) B = [500, 1, 200] (stride (200, 1, 1)) A = [500, 1, 5] (stride (1, 1, 500)) dim = 2 1.216 -> 1.228 ( +0.99%) [ +1.15% +0.08% +0.00% / +1.23% +0.99% +1.15%] index_add_ linear : Elapsed 0.012 ms (1.230 ms / 100) 1.176 -> 1.183 ( +0.60%) [ +0.85% +0.00% +0.34% / +0.77% +0.68% +0.60%] index_copy_ linear : Elapsed 0.012 ms (1.186 ms / 100) 1.216 -> 1.228 ( +0.99%) [ +1.07% +0.00% +0.08% / +1.32% +1.15% +0.99%] index_add_ reverse : Elapsed 0.012 ms (1.229 ms / 100) 1.178 -> 1.184 ( +0.51%) [ +0.59% +0.00% +0.17% / +0.51% +0.51% +0.59%] index_copy_ reverse : Elapsed 0.012 ms (1.185 ms / 100) 1.217 -> 1.230 ( +1.07%) [ +1.15% +0.00% +0.00% / +1.07% +1.07% +1.15%] index_add_ spread : Elapsed 0.012 ms (1.231 ms / 100) 1.175 -> 1.183 ( +0.68%) [ +0.77% +0.00% +0.85% / +0.77% +0.68% +0.85%] index_copy_ spread : Elapsed 0.012 ms (1.184 ms / 100) 1.216 -> 1.229 ( +1.07%) [ +1.15% +0.08% +0.00% / +1.15% +1.07% +1.07%] index_add_ strided 3 : Elapsed 0.012 ms (1.230 ms / 100) 1.175 -> 1.184 ( +0.77%) [ +0.77% +0.00% +0.00% / +0.77% +0.77% +0.85%] index_copy_ strided 3 : Elapsed 0.012 ms (1.184 ms / 100) 1.217 -> 1.230 ( +1.07%) [ +1.15% +0.00% +0.00% / +1.07% +1.07% +1.07%] index_add_ strided 7 : Elapsed 0.012 ms (1.231 ms / 100) 1.173 -> 1.184 ( +0.94%) [ +0.85% +0.00% +0.94% / +1.02% +1.19% +0.94%] index_copy_ strided 7 : Elapsed 0.012 ms (1.183 ms / 100) 1.216 -> 1.230 ( +1.15%) [ +1.23% +0.08% +0.00% / +1.15% +1.15% +1.15%] index_add_ perm : Elapsed 0.012 ms (1.231 ms / 100) 1.175 -> 1.184 ( +0.77%) [ +0.85% +0.26% +0.00% / +0.77% +0.77% +0.77%] index_copy_ perm : Elapsed 0.012 ms (1.185 ms / 100) 1.217 -> 1.229 ( +0.99%) [ +1.15% +0.00% +0.00% / +1.15% +0.99% +1.07%] index_add_ perm_sorted : Elapsed 0.012 ms (1.231 ms / 100) 1.173 -> 1.184 ( +0.94%) [ +0.94% +0.60% +0.00% / +0.94% +1.02% +0.94%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.184 ms / 100) 8.577 -> 8.582 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.26% +0.21%] index_select const : Elapsed 0.086 ms (8.580 ms / 100) 8.580 -> 8.598 ( +0.21%) [ +0.13% +0.00% +0.07% / +0.21% +0.34% +0.28%] index_select wrap : Elapsed 0.086 ms (8.591 ms / 100) 8.582 -> 8.590 ( +0.09%) [ +0.00% +0.01% +0.03% / +0.34% +0.14% +0.09%] index_select linear : Elapsed 0.086 ms (8.582 ms / 100) 8.571 -> 8.584 ( +0.15%) [ +0.19% +0.19% +0.00% / +0.26% +0.36% +0.15%] index_select reverse : Elapsed 0.086 ms (8.587 ms / 100) 8.569 -> 8.594 ( +0.29%) [ +0.00% +0.02% +0.05% / +0.29% +0.39% +0.36%] index_select skip64 : Elapsed 0.086 ms (8.569 ms / 100) 8.570 -> 8.582 ( +0.14%) [ +0.16% +0.00% +0.06% / +0.30% +0.14% +0.16%] index_select skip256 : Elapsed 0.086 ms (8.584 ms / 100) 8.584 -> 8.596 ( +0.14%) [ +0.05% +0.00% +0.03% / +0.14% +0.19% +0.27%] index_select spread : Elapsed 0.086 ms (8.588 ms / 100) 8.580 -> 8.595 ( +0.17%) [ +0.28% +0.00% +0.08% / +0.24% +0.17% +0.33%] index_select strided 3 : Elapsed 0.086 ms (8.604 ms / 100) 8.587 -> 8.596 ( +0.10%) [ +0.05% +0.12% +0.00% / +0.15% +0.22% +0.10%] index_select random : Elapsed 0.086 ms (8.591 ms / 100) 8.585 -> 8.583 ( -0.02%) [ +0.05% +0.17% +0.00% / -0.02% +0.10% +0.05%] index_select random_sorted : Elapsed 0.086 ms (8.589 ms / 100) B = [500, 1, 200] (stride (200, 100000, 1)) A = [500, 1, 5] (stride (5, 2500, 1)) dim = 2 0.626 -> 0.631 ( +0.80%) [ +0.96% +0.16% +0.00% / +0.96% +0.96% +0.80%] index_add_ linear : Elapsed 0.006 ms (0.632 ms / 100) 0.643 -> 0.648 ( +0.78%) [ +0.78% +0.00% +0.00% / +0.78% +0.78% +0.78%] index_copy_ linear : Elapsed 0.006 ms (0.648 ms / 100) 0.627 -> 0.632 ( +0.80%) [ +1.44% +0.00% +0.00% / +0.80% +0.80% +0.80%] index_add_ reverse : Elapsed 0.006 ms (0.636 ms / 100) 0.643 -> 0.648 ( +0.78%) [ +0.78% +0.16% +0.00% / +0.93% +0.78% +0.78%] index_copy_ reverse : Elapsed 0.006 ms (0.648 ms / 100) 0.626 -> 0.631 ( +0.80%) [ +1.12% +0.16% +0.00% / +0.80% +1.12% +0.80%] index_add_ spread : Elapsed 0.006 ms (0.633 ms / 100) 0.643 -> 0.648 ( +0.78%) [ +0.93% +0.00% +0.00% / +0.78% +0.78% +0.78%] index_copy_ spread : Elapsed 0.006 ms (0.649 ms / 100) 0.626 -> 0.631 ( +0.80%) [ +0.80% +0.00% +0.16% / +0.80% +1.28% +0.96%] index_add_ strided 3 : Elapsed 0.006 ms (0.631 ms / 100) 0.642 -> 0.648 ( +0.93%) [ +1.09% +0.00% +0.16% / +0.93% +0.93% +1.09%] index_copy_ strided 3 : Elapsed 0.006 ms (0.649 ms / 100) 0.626 -> 0.631 ( +0.80%) [ +1.28% +0.00% +0.16% / +0.80% +1.12% +0.96%] index_add_ strided 7 : Elapsed 0.006 ms (0.634 ms / 100) 0.642 -> 0.648 ( +0.93%) [ +0.93% +0.00% +0.16% / +0.93% +0.93% +0.93%] index_copy_ strided 7 : Elapsed 0.006 ms (0.648 ms / 100) 0.626 -> 0.632 ( +0.96%) [ +0.80% +0.16% +0.00% / +0.96% +1.12% +0.96%] index_add_ perm : Elapsed 0.006 ms (0.631 ms / 100) 0.642 -> 0.648 ( +0.93%) [ +0.93% +0.00% +0.00% / +0.93% +1.09% +0.93%] index_copy_ perm : Elapsed 0.006 ms (0.648 ms / 100) 0.626 -> 0.633 ( +1.12%) [ +0.96% +0.16% +0.00% / +1.12% +1.12% +1.12%] index_add_ perm_sorted : Elapsed 0.006 ms (0.632 ms / 100) 0.641 -> 0.648 ( +1.09%) [ +0.94% +0.00% +0.16% / +1.09% +1.09% +1.25%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.647 ms / 100) 5.431 -> 5.241 ( -3.50%) [ +0.18% +0.00% +0.13% / -3.50% -2.91% -2.80%] index_select const : Elapsed 0.054 ms (5.441 ms / 100) 5.423 -> 5.234 ( -3.49%) [ +0.06% +0.00% +0.02% / -3.49% -3.12% -3.23%] index_select wrap : Elapsed 0.054 ms (5.426 ms / 100) 5.431 -> 5.233 ( -3.65%) [ +0.00% +0.09% +0.02% / -3.65% -3.39% -3.54%] index_select linear : Elapsed 0.054 ms (5.431 ms / 100) 5.428 -> 5.245 ( -3.37%) [ +0.06% +0.00% +0.20% / -3.37% -3.24% -3.26%] index_select reverse : Elapsed 0.054 ms (5.431 ms / 100) 5.421 -> 5.240 ( -3.34%) [ +0.13% +0.00% +0.30% / -3.34% -3.21% -2.97%] index_select skip64 : Elapsed 0.054 ms (5.428 ms / 100) 5.426 -> 5.240 ( -3.43%) [ +0.18% +0.22% +0.00% / -3.43% -3.13% -3.26%] index_select skip256 : Elapsed 0.054 ms (5.436 ms / 100) 5.421 -> 5.238 ( -3.38%) [ +0.15% +0.00% +0.31% / -3.38% -3.21% -3.10%] index_select spread : Elapsed 0.054 ms (5.429 ms / 100) 5.422 -> 5.234 ( -3.47%) [ +0.11% +0.30% +0.00% / -3.47% -3.25% -3.10%] index_select strided 3 : Elapsed 0.054 ms (5.428 ms / 100) 5.446 -> 5.243 ( -3.73%) [ +0.00% +0.09% +0.09% / -3.54% -3.65% -3.73%] index_select random : Elapsed 0.054 ms (5.446 ms / 100) 5.443 -> 5.252 ( -3.51%) [ +0.00% +0.07% +0.18% / -3.51% -3.10% -3.03%] index_select random_sorted : Elapsed 0.054 ms (5.443 ms / 100) B = [500, 1, 200] (stride (200, 100000, 1)) A = [500, 1, 5] (stride (1, 2500, 500)) dim = 2 0.623 -> 0.627 ( +0.64%) [ +0.96% +0.00% +0.16% / +0.80% +0.64% +0.80%] index_add_ linear : Elapsed 0.006 ms (0.629 ms / 100) 0.640 -> 0.644 ( +0.63%) [ +1.09% +0.00% +0.16% / +0.94% +0.63% +0.78%] index_copy_ linear : Elapsed 0.006 ms (0.647 ms / 100) 0.623 -> 0.626 ( +0.48%) [ +1.12% +0.00% +0.00% / +0.80% +0.48% +0.48%] index_add_ reverse : Elapsed 0.006 ms (0.630 ms / 100) 0.638 -> 0.642 ( +0.63%) [ +1.10% +0.16% +0.00% / +1.10% +0.78% +0.63%] index_copy_ reverse : Elapsed 0.006 ms (0.645 ms / 100) 0.623 -> 0.627 ( +0.64%) [ +0.80% +0.16% +0.00% / +0.80% +0.64% +0.64%] index_add_ spread : Elapsed 0.006 ms (0.628 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +1.10% +0.00% +0.00% / +0.94% +0.78% +0.78%] index_copy_ spread : Elapsed 0.006 ms (0.645 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +1.13% +0.32% +0.00% / +0.96% +0.96% +0.96%] index_add_ strided 3 : Elapsed 0.006 ms (0.629 ms / 100) 0.638 -> 0.644 ( +0.94%) [ +0.78% +0.16% +0.00% / +0.94% +0.94% +1.25%] index_copy_ strided 3 : Elapsed 0.006 ms (0.643 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +1.13% +0.16% +0.00% / +1.13% +0.96% +0.96%] index_add_ strided 7 : Elapsed 0.006 ms (0.629 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +1.10% +0.00% +0.00% / +1.10% +0.78% +0.78%] index_copy_ strided 7 : Elapsed 0.006 ms (0.645 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +1.13% +0.00% +0.32% / +1.13% +1.13% +0.96%] index_add_ perm : Elapsed 0.006 ms (0.629 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +0.94% +0.00% +0.00% / +0.94% +0.78% +0.78%] index_copy_ perm : Elapsed 0.006 ms (0.644 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +1.13% +0.16% +0.00% / +1.13% +0.96% +1.13%] index_add_ perm_sorted : Elapsed 0.006 ms (0.629 ms / 100) 0.637 -> 0.643 ( +0.94%) [ +1.10% +0.16% +0.00% / +1.73% +0.94% +0.94%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.644 ms / 100) 5.355 -> 5.212 ( -2.67%) [ +0.26% +0.00% +0.13% / -2.67% -1.98% -2.09%] index_select const : Elapsed 0.054 ms (5.369 ms / 100) 5.400 -> 5.269 ( -2.43%) [ +0.07% +0.00% +0.20% / -2.43% -2.39% -2.41%] index_select wrap : Elapsed 0.054 ms (5.404 ms / 100) 5.392 -> 5.245 ( -2.73%) [ +0.20% +0.26% +0.00% / -2.61% -2.73% -2.41%] index_select linear : Elapsed 0.054 ms (5.403 ms / 100) 5.364 -> 5.236 ( -2.39%) [ +0.34% +0.00% +0.24% / -2.39% -1.34% -1.60%] index_select reverse : Elapsed 0.054 ms (5.382 ms / 100) 5.349 -> 5.213 ( -2.54%) [ +0.37% +0.00% +0.11% / -2.54% -1.98% -1.93%] index_select skip64 : Elapsed 0.054 ms (5.369 ms / 100) 5.358 -> 5.201 ( -2.93%) [ +0.17% +0.17% +0.00% / -2.93% -2.50% -2.15%] index_select skip256 : Elapsed 0.054 ms (5.367 ms / 100) 5.366 -> 5.238 ( -2.39%) [ +0.09% +0.00% +0.17% / -2.39% -2.03% -2.09%] index_select spread : Elapsed 0.054 ms (5.371 ms / 100) 5.381 -> 5.244 ( -2.55%) [ +0.00% +0.04% +0.02% / -2.55% -2.17% -2.16%] index_select strided 3 : Elapsed 0.054 ms (5.381 ms / 100) 5.375 -> 5.264 ( -2.07%) [ +0.09% +0.15% +0.00% / -1.86% -1.95% -2.07%] index_select random : Elapsed 0.054 ms (5.380 ms / 100) 5.368 -> 5.244 ( -2.31%) [ +0.32% +0.00% +0.09% / -2.31% -1.66% -1.73%] index_select random_sorted : Elapsed 0.054 ms (5.385 ms / 100) B = [500, 1, 200] (stride (200, 100000, 1)) A = [500, 1, 5] (stride (1, 500, 500)) dim = 2 0.621 -> 0.627 ( +0.97%) [ +1.13% +0.16% +0.00% / +1.13% +1.13% +0.97%] index_add_ linear : Elapsed 0.006 ms (0.628 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +1.10% +0.00% +0.00% / +0.78% +0.94% +0.78%] index_copy_ linear : Elapsed 0.006 ms (0.645 ms / 100) 0.622 -> 0.627 ( +0.80%) [ +0.96% +0.16% +0.00% / +0.96% +0.96% +0.80%] index_add_ reverse : Elapsed 0.006 ms (0.628 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +0.94% +0.16% +0.00% / +1.25% +0.78% +0.78%] index_copy_ reverse : Elapsed 0.006 ms (0.644 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +1.13% +0.16% +0.00% / +0.96% +0.96% +0.96%] index_add_ spread : Elapsed 0.006 ms (0.629 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +0.94% +0.16% +0.00% / +0.94% +0.78% +0.78%] index_copy_ spread : Elapsed 0.006 ms (0.644 ms / 100) 0.623 -> 0.628 ( +0.80%) [ +0.80% +0.00% +0.00% / +0.80% +0.80% +0.80%] index_add_ strided 3 : Elapsed 0.006 ms (0.628 ms / 100) 0.639 -> 0.643 ( +0.63%) [ +0.78% +0.47% +0.00% / +0.63% +1.41% +0.63%] index_copy_ strided 3 : Elapsed 0.006 ms (0.644 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +1.13% +0.00% +0.00% / +0.96% +0.96% +0.96%] index_add_ strided 7 : Elapsed 0.006 ms (0.629 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +0.94% +0.00% +0.00% / +0.94% +0.78% +0.94%] index_copy_ strided 7 : Elapsed 0.006 ms (0.644 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +0.96% +0.16% +0.00% / +0.96% +0.96% +0.96%] index_add_ perm : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.643 ( +0.94%) [ +0.94% +0.16% +0.00% / +0.94% +1.10% +1.10%] index_copy_ perm : Elapsed 0.006 ms (0.643 ms / 100) 0.622 -> 0.628 ( +0.96%) [ +0.96% +0.00% +0.00% / +0.96% +0.96% +0.96%] index_add_ perm_sorted : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.643 ( +0.94%) [ +0.94% +0.00% +0.00% / +1.10% +0.94% +0.94%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.643 ms / 100) 5.358 -> 5.220 ( -2.58%) [ +0.00% +0.21% +0.06% / -2.58% -1.61% -1.59%] index_select const : Elapsed 0.054 ms (5.358 ms / 100) 5.373 -> 5.249 ( -2.31%) [ +0.04% +0.07% +0.00% / -2.31% -1.79% -1.75%] index_select wrap : Elapsed 0.054 ms (5.375 ms / 100) 5.367 -> 5.234 ( -2.48%) [ +0.34% +0.00% +0.24% / -2.48% -2.03% -2.01%] index_select linear : Elapsed 0.054 ms (5.385 ms / 100) 5.366 -> 5.253 ( -2.11%) [ +0.20% +0.17% +0.00% / -2.11% -1.92% -1.73%] index_select reverse : Elapsed 0.054 ms (5.377 ms / 100) 5.361 -> 5.214 ( -2.74%) [ +0.32% +0.26% +0.00% / -2.74% -2.35% -2.16%] index_select skip64 : Elapsed 0.054 ms (5.378 ms / 100) 5.362 -> 5.207 ( -2.89%) [ +0.00% +0.28% +0.11% / -2.89% -2.07% -2.50%] index_select skip256 : Elapsed 0.054 ms (5.362 ms / 100) 5.376 -> 5.244 ( -2.46%) [ +0.00% +0.00% +0.09% / -2.46% -2.01% -2.03%] index_select spread : Elapsed 0.054 ms (5.376 ms / 100) 5.366 -> 5.254 ( -2.09%) [ +0.39% +0.00% +0.04% / -2.09% -1.55% -1.77%] index_select strided 3 : Elapsed 0.054 ms (5.387 ms / 100) 5.395 -> 5.277 ( -2.19%) [ +0.00% +0.02% +0.11% / -2.06% -2.19% -2.09%] index_select random : Elapsed 0.054 ms (5.395 ms / 100) 5.383 -> 5.262 ( -2.25%) [ +0.00% +0.04% +0.35% / -2.25% -1.76% -1.54%] index_select random_sorted : Elapsed 0.054 ms (5.383 ms / 100) B = [500, 1, 200] (stride (1, 100000, 500)) A = [500, 1, 5] (stride (5, 5, 1)) dim = 2 1.219 -> 1.230 ( +0.90%) [ +1.07% +0.16% +0.00% / +1.23% +0.90% +0.90%] index_add_ linear : Elapsed 0.012 ms (1.232 ms / 100) 1.179 -> 1.186 ( +0.59%) [ +0.76% +0.08% +0.00% / +0.68% +0.68% +0.59%] index_copy_ linear : Elapsed 0.012 ms (1.188 ms / 100) 1.219 -> 1.230 ( +0.90%) [ +1.15% +0.08% +0.00% / +0.98% +0.90% +0.90%] index_add_ reverse : Elapsed 0.012 ms (1.233 ms / 100) 1.179 -> 1.185 ( +0.51%) [ +0.76% +0.00% +0.25% / +0.68% +0.68% +0.51%] index_copy_ reverse : Elapsed 0.012 ms (1.188 ms / 100) 1.220 -> 1.230 ( +0.82%) [ +0.98% +0.08% +0.00% / +1.07% +0.82% +0.82%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.184 -> 1.186 ( +0.17%) [ +0.25% +0.08% +0.00% / +0.25% +0.17% +0.17%] index_copy_ spread : Elapsed 0.012 ms (1.187 ms / 100) 1.219 -> 1.230 ( +0.90%) [ +0.98% +0.08% +0.00% / +1.23% +0.90% +0.90%] index_add_ strided 3 : Elapsed 0.012 ms (1.231 ms / 100) 1.182 -> 1.187 ( +0.42%) [ +0.42% +0.00% +0.08% / +0.59% +0.42% +0.42%] index_copy_ strided 3 : Elapsed 0.012 ms (1.187 ms / 100) 1.219 -> 1.230 ( +0.90%) [ +1.07% +0.08% +0.00% / +0.98% +0.98% +0.90%] index_add_ strided 7 : Elapsed 0.012 ms (1.232 ms / 100) 1.183 -> 1.186 ( +0.25%) [ +0.34% +0.00% +0.25% / +0.25% +0.25% +0.25%] index_copy_ strided 7 : Elapsed 0.012 ms (1.187 ms / 100) 1.218 -> 1.230 ( +0.99%) [ +1.15% +0.16% +0.00% / +1.07% +0.99% +0.99%] index_add_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.181 -> 1.185 ( +0.34%) [ +0.51% +0.00% +0.08% / +0.51% +0.51% +0.34%] index_copy_ perm : Elapsed 0.012 ms (1.187 ms / 100) 1.219 -> 1.230 ( +0.90%) [ +0.98% +0.00% +0.00% / +0.98% +0.90% +0.90%] index_add_ perm_sorted : Elapsed 0.012 ms (1.231 ms / 100) 1.179 -> 1.186 ( +0.59%) [ +0.68% +0.00% +0.51% / +0.59% +0.59% +0.68%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.187 ms / 100) 8.427 -> 8.450 ( +0.27%) [ +0.12% +0.00% +0.06% / +0.27% +0.40% +0.56%] index_select const : Elapsed 0.084 ms (8.437 ms / 100) 8.422 -> 8.431 ( +0.11%) [ +0.24% +0.11% +0.00% / +0.11% +0.42% +0.40%] index_select wrap : Elapsed 0.084 ms (8.442 ms / 100) 8.426 -> 8.446 ( +0.24%) [ +0.20% +0.15% +0.00% / +0.24% +0.59% +0.36%] index_select linear : Elapsed 0.084 ms (8.443 ms / 100) 8.445 -> 8.432 ( -0.15%) [ +0.00% +0.06% +0.00% / -0.15% +0.09% +0.07%] index_select reverse : Elapsed 0.084 ms (8.445 ms / 100) 8.422 -> 8.437 ( +0.18%) [ +0.09% +0.00% +0.04% / +0.18% +0.42% +0.56%] index_select skip64 : Elapsed 0.084 ms (8.430 ms / 100) 8.430 -> 8.454 ( +0.28%) [ +0.19% +0.08% +0.00% / +0.42% +0.50% +0.28%] index_select skip256 : Elapsed 0.084 ms (8.446 ms / 100) 8.404 -> 8.429 ( +0.30%) [ +0.56% +0.13% +0.00% / +0.30% +0.71% +0.70%] index_select spread : Elapsed 0.085 ms (8.451 ms / 100) 8.427 -> 8.429 ( +0.02%) [ +0.12% +0.00% +0.06% / +0.02% +0.32% +0.15%] index_select strided 3 : Elapsed 0.084 ms (8.437 ms / 100) 8.429 -> 8.439 ( +0.12%) [ +0.05% +0.00% +0.19% / +0.12% +0.37% +0.27%] index_select random : Elapsed 0.084 ms (8.433 ms / 100) 8.421 -> 8.441 ( +0.24%) [ +0.20% +0.00% +0.12% / +0.24% +0.34% +0.29%] index_select random_sorted : Elapsed 0.084 ms (8.438 ms / 100) B = [500, 1, 200] (stride (1, 1, 500)) A = [500, 1, 5] (stride (5, 1, 1)) dim = 2 1.219 -> 1.231 ( +0.98%) [ +1.07% +0.08% +0.00% / +1.07% +0.98% +1.23%] index_add_ linear : Elapsed 0.012 ms (1.232 ms / 100) 1.180 -> 1.187 ( +0.59%) [ +0.59% +0.00% +0.17% / +0.76% +0.59% +0.59%] index_copy_ linear : Elapsed 0.012 ms (1.187 ms / 100) 1.219 -> 1.232 ( +1.07%) [ +1.07% +0.08% +0.00% / +1.07% +1.07% +1.07%] index_add_ reverse : Elapsed 0.012 ms (1.232 ms / 100) 1.180 -> 1.188 ( +0.68%) [ +0.68% +0.00% +0.17% / +0.68% +0.76% +0.68%] index_copy_ reverse : Elapsed 0.012 ms (1.188 ms / 100) 1.219 -> 1.231 ( +0.98%) [ +1.07% +0.08% +0.00% / +0.98% +1.15% +0.98%] index_add_ spread : Elapsed 0.012 ms (1.232 ms / 100) 1.182 -> 1.187 ( +0.42%) [ +0.51% +0.25% +0.00% / +0.42% +0.51% +0.51%] index_copy_ spread : Elapsed 0.012 ms (1.188 ms / 100) 1.219 -> 1.230 ( +0.90%) [ +0.98% +0.08% +0.00% / +0.90% +0.98% +0.98%] index_add_ strided 3 : Elapsed 0.012 ms (1.231 ms / 100) 1.182 -> 1.187 ( +0.42%) [ +0.42% +0.00% +0.42% / +0.51% +0.42% +0.42%] index_copy_ strided 3 : Elapsed 0.012 ms (1.187 ms / 100) 1.219 -> 1.231 ( +0.98%) [ +0.98% +0.00% +0.00% / +0.98% +0.98% +1.07%] index_add_ strided 7 : Elapsed 0.012 ms (1.231 ms / 100) 1.184 -> 1.186 ( +0.17%) [ +0.25% +0.00% +0.00% / +0.25% +0.34% +0.17%] index_copy_ strided 7 : Elapsed 0.012 ms (1.187 ms / 100) 1.220 -> 1.231 ( +0.90%) [ +0.98% +0.00% +0.00% / +0.98% +0.90% +0.98%] index_add_ perm : Elapsed 0.012 ms (1.232 ms / 100) 1.179 -> 1.187 ( +0.68%) [ +0.68% +0.00% +0.42% / +0.68% +0.76% +0.68%] index_copy_ perm : Elapsed 0.012 ms (1.187 ms / 100) 1.220 -> 1.232 ( +0.98%) [ +0.98% +0.00% +0.08% / +0.98% +1.07% +0.98%] index_add_ perm_sorted : Elapsed 0.012 ms (1.232 ms / 100) 1.176 -> 1.188 ( +1.02%) [ +1.11% +0.51% +0.00% / +1.02% +1.11% +1.02%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.189 ms / 100) 8.469 -> 8.499 ( +0.35%) [ +0.02% +0.00% +0.19% / +0.35% +0.40% +0.46%] index_select const : Elapsed 0.085 ms (8.471 ms / 100) 8.478 -> 8.489 ( +0.13%) [ +0.00% +0.00% +0.04% / +0.13% +0.44% +0.45%] index_select wrap : Elapsed 0.085 ms (8.478 ms / 100) 8.472 -> 8.496 ( +0.28%) [ +0.00% +0.18% +0.15% / +0.28% +0.38% +0.38%] index_select linear : Elapsed 0.085 ms (8.472 ms / 100) 8.468 -> 8.478 ( +0.12%) [ +0.15% +0.00% +0.24% / +0.12% +0.37% +0.57%] index_select reverse : Elapsed 0.085 ms (8.481 ms / 100) 8.472 -> 8.488 ( +0.19%) [ +0.14% +0.09% +0.00% / +0.19% +0.52% +0.48%] index_select skip64 : Elapsed 0.085 ms (8.484 ms / 100) 8.487 -> 8.488 ( +0.01%) [ +0.15% +0.00% +0.16% / +0.01% +0.16% +0.25%] index_select skip256 : Elapsed 0.085 ms (8.500 ms / 100) 8.476 -> 8.500 ( +0.28%) [ +0.00% +0.04% +0.04% / +0.28% +0.37% +0.47%] index_select spread : Elapsed 0.085 ms (8.476 ms / 100) 8.471 -> 8.482 ( +0.13%) [ +0.33% +0.00% +0.02% / +0.13% +0.61% +0.57%] index_select strided 3 : Elapsed 0.085 ms (8.499 ms / 100) 8.481 -> 8.502 ( +0.25%) [ +0.18% +0.00% +0.20% / +0.25% +0.42% +0.40%] index_select random : Elapsed 0.085 ms (8.496 ms / 100) 8.468 -> 8.483 ( +0.18%) [ +0.21% +0.00% +0.01% / +0.18% +0.53% +0.54%] index_select random_sorted : Elapsed 0.085 ms (8.486 ms / 100) out_shape = [200, 5, 1] in_shape = [500, 5, 1] idx_dim = 0 B = [200, 5, 1] (stride (5, 1, 1)) dim = 0 fill_cnt = 500 0.439 -> 0.436 ( -0.68%) [ +0.00% +1.82% +12.53% / +1.14% -0.68% +8.66%] index_fill_ const : Elapsed 0.004 ms (0.439 ms / 100) 0.433 -> 0.441 ( +1.85%) [ +0.92% +3.23% +0.00% / +1.85% +3.70% +17.78%] index_fill_ linear : Elapsed 0.004 ms (0.437 ms / 100) 0.434 -> 0.437 ( +0.69%) [ +1.61% +3.23% +0.00% / +1.84% +0.69% +0.69%] index_fill_ reverse : Elapsed 0.004 ms (0.441 ms / 100) 0.440 -> 0.438 ( -0.45%) [ +0.00% +1.82% +0.45% / -0.45% +0.23% +0.23%] index_fill_ skip64 : Elapsed 0.004 ms (0.440 ms / 100) 0.432 -> 0.439 ( +1.62%) [ +1.39% +3.01% +0.00% / +2.31% +1.62% +2.31%] index_fill_ skip256 : Elapsed 0.004 ms (0.438 ms / 100) bad 0.433 -> 0.464 ( +7.16%) [ +1.85% +2.31% +0.00% / +7.62% +7.16% +8.78%] index_fill_ spread : Elapsed 0.004 ms (0.441 ms / 100) 0.434 -> 0.437 ( +0.69%) [ +0.46% +3.23% +0.00% / +17.97% +1.84% +0.69%] index_fill_ strided 3 : Elapsed 0.004 ms (0.436 ms / 100) 0.436 -> 0.438 ( +0.46%) [ +0.00% +2.98% +0.69% / +0.69% +1.38% +0.46%] index_fill_ strided 5 : Elapsed 0.004 ms (0.436 ms / 100) 0.434 -> 0.435 ( +0.23%) [ +0.69% +3.69% +0.00% / +1.84% +0.92% +0.23%] index_fill_ strided 7 : Elapsed 0.004 ms (0.437 ms / 100) 0.434 -> 0.437 ( +0.69%) [ +0.69% +3.92% +0.00% / +3.46% +1.38% +0.69%] index_fill_ strided 8 : Elapsed 0.004 ms (0.437 ms / 100) 0.440 -> 0.435 ( -1.14%) [ +0.23% +2.50% +0.00% / +0.00% +5.68% -1.14%] index_fill_ strided 16 : Elapsed 0.004 ms (0.441 ms / 100) 0.439 -> 0.439 ( +0.00%) [ +0.00% +1.59% +0.00% / +0.00% +15.26% +0.46%] index_fill_ strided 64 : Elapsed 0.004 ms (0.439 ms / 100) 0.438 -> 0.431 ( -1.60%) [ +0.00% +1.37% +0.00% / +0.23% -1.60% -1.14%] index_fill_ strided 100 : Elapsed 0.004 ms (0.438 ms / 100) 0.436 -> 0.437 ( +0.23%) [ +1.15% +2.52% +0.00% / +2.06% +0.23% +8.94%] index_fill_ random : Elapsed 0.004 ms (0.441 ms / 100) bad 0.437 -> 0.464 ( +6.18%) [ +2.52% +2.52% +0.00% / +12.59% +6.18% +11.44%] index_fill_ random_sorted : Elapsed 0.004 ms (0.448 ms / 100) B = [200, 5, 1] (stride (1, 200, 200)) A = [500, 5, 1] (stride (1, 500, 2500)) dim = 0 0.518 -> 0.525 ( +1.35%) [ +5.79% +2.12% +0.00% / +2.70% +9.27% +1.35%] index_select const : Elapsed 0.005 ms (0.548 ms / 100) 0.525 -> 0.517 ( -1.52%) [ +3.05% +0.00% +3.62% / +1.90% -1.52% +0.38%] index_select wrap : Elapsed 0.005 ms (0.541 ms / 100) 0.526 -> 0.520 ( -1.14%) [ +2.47% +0.00% +15.59% / +0.95% -1.14% +0.76%] index_select linear : Elapsed 0.005 ms (0.539 ms / 100) 0.522 -> 0.531 ( +1.72%) [ +5.75% +1.92% +0.00% / +1.72% +2.68% +14.94%] index_select reverse : Elapsed 0.006 ms (0.552 ms / 100) 0.517 -> 0.521 ( +0.77%) [ +4.84% +1.35% +0.00% / +3.29% +0.77% +9.86%] index_select skip64 : Elapsed 0.005 ms (0.542 ms / 100) 0.511 -> 0.517 ( +1.17%) [ +6.46% +2.74% +0.00% / +4.70% +1.17% +4.31%] index_select skip256 : Elapsed 0.005 ms (0.544 ms / 100) 0.522 -> 0.515 ( -1.34%) [ +4.60% +0.77% +0.00% / +2.87% -1.34% +1.72%] index_select spread : Elapsed 0.005 ms (0.546 ms / 100) 0.519 -> 0.519 ( +0.00%) [ +4.05% +0.19% +0.00% / +4.82% +0.00% +6.74%] index_select strided 3 : Elapsed 0.005 ms (0.540 ms / 100) 0.521 -> 0.530 ( +1.73%) [ +4.99% +2.88% +0.00% / +3.26% +1.73% +1.92%] index_select strided 5 : Elapsed 0.005 ms (0.547 ms / 100) 0.517 -> 0.518 ( +0.19%) [ +4.26% +0.97% +0.00% / +2.71% +0.19% +3.48%] index_select strided 7 : Elapsed 0.005 ms (0.539 ms / 100) 0.515 -> 0.518 ( +0.58%) [ +5.44% +1.75% +0.00% / +3.88% +0.58% +2.91%] index_select strided 8 : Elapsed 0.005 ms (0.543 ms / 100) 0.522 -> 0.522 ( +0.00%) [ +3.83% +0.19% +0.00% / +2.49% +0.00% +3.64%] index_select strided 16 : Elapsed 0.005 ms (0.542 ms / 100) 0.514 -> 0.515 ( +0.19%) [+10.51% +1.75% +0.00% / +4.67% +0.19% +4.67%] index_select strided 64 : Elapsed 0.006 ms (0.568 ms / 100) 0.514 -> 0.532 ( +3.50%) [ +5.84% +0.97% +0.00% / +12.06% +5.25% +3.50%] index_select strided 100 : Elapsed 0.005 ms (0.544 ms / 100) 0.519 -> 0.534 ( +2.89%) [ +5.39% +1.16% +0.00% / +10.21% +8.48% +2.89%] index_select strided 255 : Elapsed 0.005 ms (0.547 ms / 100) 0.521 -> 0.519 ( -0.38%) [ +4.61% +0.00% +0.58% / +15.55% -0.38% +3.45%] index_select strided 256 : Elapsed 0.005 ms (0.545 ms / 100) 0.520 -> 0.520 ( +0.00%) [ +5.38% +0.19% +0.00% / +4.81% +0.00% +3.65%] index_select strided 257 : Elapsed 0.005 ms (0.548 ms / 100) 0.519 -> 0.514 ( -0.96%) [ +5.39% +0.77% +0.00% / +3.08% -0.96% +5.59%] index_select random : Elapsed 0.005 ms (0.547 ms / 100) 0.521 -> 0.515 ( -1.15%) [ +5.37% +1.15% +0.00% / +7.49% -1.15% +2.11%] index_select random_sorted : Elapsed 0.005 ms (0.549 ms / 100) 0.517 -> 0.519 ( +0.39%) [ +5.61% +1.74% +0.00% / +2.90% +0.39% +4.84%] index_select perm : Elapsed 0.005 ms (0.546 ms / 100) 0.524 -> 0.519 ( -0.95%) [ +4.58% +0.00% +0.19% / +1.72% -0.95% +1.53%] index_select perm_sorted : Elapsed 0.005 ms (0.548 ms / 100) out_shape = [500, 200, 1] in_shape = [500, 5, 1] idx_dim = 1 B = [500, 200, 1] (stride (200, 1, 1)) A = [500, 5, 1] (stride (5, 1, 5)) dim = 1 0.576 -> 0.579 ( +0.52%) [ +1.04% +0.17% +0.00% / +1.04% +0.52% +0.52%] index_add_ linear : Elapsed 0.006 ms (0.582 ms / 100) 0.591 -> 0.597 ( +1.02%) [ +1.02% +0.00% +0.17% / +1.86% +1.52% +1.02%] index_copy_ linear : Elapsed 0.006 ms (0.597 ms / 100) 0.575 -> 0.578 ( +0.52%) [ +0.87% +0.17% +0.00% / +1.22% +0.52% +0.52%] index_add_ reverse : Elapsed 0.006 ms (0.580 ms / 100) 0.590 -> 0.594 ( +0.68%) [ +1.19% +0.00% +0.00% / +0.68% +1.19% +1.02%] index_copy_ reverse : Elapsed 0.006 ms (0.597 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +2.09% +0.17% +0.00% / +0.87% +0.70% +0.87%] index_add_ spread : Elapsed 0.006 ms (0.587 ms / 100) 0.590 -> 0.594 ( +0.68%) [ +1.19% +0.34% +0.00% / +1.36% +0.68% +1.02%] index_copy_ spread : Elapsed 0.006 ms (0.597 ms / 100) 0.575 -> 0.578 ( +0.52%) [ +1.22% +0.17% +0.00% / +0.87% +0.87% +0.52%] index_add_ strided 3 : Elapsed 0.006 ms (0.582 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +1.19% +0.00% +0.34% / +1.02% +1.36% +1.36%] index_copy_ strided 3 : Elapsed 0.006 ms (0.596 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +1.04% +0.17% +0.00% / +0.87% +0.87% +0.87%] index_add_ strided 7 : Elapsed 0.006 ms (0.581 ms / 100) 0.588 -> 0.595 ( +1.19%) [ +1.36% +0.00% +0.17% / +1.53% +1.19% +1.53%] index_copy_ strided 7 : Elapsed 0.006 ms (0.596 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +1.04% +0.17% +0.00% / +1.04% +0.87% +0.70%] index_add_ perm : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.596 ( +1.19%) [ +0.85% +0.00% +0.00% / +1.19% +1.53% +1.53%] index_copy_ perm : Elapsed 0.006 ms (0.594 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +1.04% +0.00% +0.00% / +1.04% +0.70% +0.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.581 ms / 100) 0.588 -> 0.597 ( +1.53%) [ +1.70% +0.51% +0.00% / +1.87% +1.87% +1.53%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.598 ms / 100) good 5.141 -> 4.788 ( -6.87%) [ +0.00% +0.10% +0.18% / -6.44% -6.87% -6.83%] index_select const : Elapsed 0.051 ms (5.141 ms / 100) good 5.157 -> 4.795 ( -7.02%) [ +0.00% +0.00% +0.02% / -7.02% -6.90% -6.90%] index_select wrap : Elapsed 0.052 ms (5.157 ms / 100) good 5.148 -> 4.795 ( -6.86%) [ +0.00% +0.17% +0.14% / -6.74% -6.86% -6.82%] index_select linear : Elapsed 0.051 ms (5.148 ms / 100) good 5.152 -> 4.790 ( -7.03%) [ +0.12% +0.00% +0.12% / -7.03% -6.60% -6.75%] index_select reverse : Elapsed 0.052 ms (5.158 ms / 100) good 5.137 -> 4.781 ( -6.93%) [ +0.25% +0.00% +0.27% / -6.64% -6.93% -6.83%] index_select skip64 : Elapsed 0.052 ms (5.150 ms / 100) good 5.161 -> 4.790 ( -7.19%) [ +0.00% +0.08% +0.17% / -7.11% -7.09% -7.19%] index_select skip256 : Elapsed 0.052 ms (5.161 ms / 100) good 5.143 -> 4.788 ( -6.90%) [ +0.29% +0.04% +0.00% / -6.44% -6.82% -6.90%] index_select spread : Elapsed 0.052 ms (5.158 ms / 100) good 5.168 -> 4.796 ( -7.20%) [ +0.00% +0.06% +0.08% / -7.20% -7.08% -7.20%] index_select strided 3 : Elapsed 0.052 ms (5.168 ms / 100) good 5.183 -> 4.800 ( -7.39%) [ +0.04% +0.08% +0.00% / -7.22% -7.35% -7.39%] index_select random : Elapsed 0.052 ms (5.185 ms / 100) good 5.148 -> 4.797 ( -6.82%) [ +0.17% +0.00% +0.10% / -6.76% -6.82% -6.49%] index_select random_sorted : Elapsed 0.052 ms (5.157 ms / 100) B = [500, 200, 1] (stride (200, 1, 1)) A = [500, 5, 1] (stride (1, 500, 1)) dim = 1 0.571 -> 0.572 ( +0.18%) [ +0.53% +0.18% +0.00% / +1.23% +0.35% +0.18%] index_add_ linear : Elapsed 0.006 ms (0.574 ms / 100) 0.584 -> 0.588 ( +0.68%) [ +0.86% +0.34% +0.00% / +0.86% +0.68% +0.68%] index_copy_ linear : Elapsed 0.006 ms (0.589 ms / 100) 0.572 -> 0.573 ( +0.17%) [ +0.52% +0.00% +0.87% / +0.70% +0.17% +0.17%] index_add_ reverse : Elapsed 0.006 ms (0.575 ms / 100) 0.585 -> 0.589 ( +0.68%) [ +1.20% +0.00% +0.00% / +1.71% +0.68% +0.68%] index_copy_ reverse : Elapsed 0.006 ms (0.592 ms / 100) 0.571 -> 0.574 ( +0.53%) [ +0.70% +0.18% +0.00% / +0.70% +0.53% +0.53%] index_add_ spread : Elapsed 0.006 ms (0.575 ms / 100) 0.585 -> 0.589 ( +0.68%) [ +0.51% +0.17% +0.00% / +0.68% +0.85% +0.68%] index_copy_ spread : Elapsed 0.006 ms (0.588 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.70% +0.00% +0.00% / +0.35% +0.35% +0.35%] index_add_ strided 3 : Elapsed 0.006 ms (0.575 ms / 100) 0.586 -> 0.588 ( +0.34%) [ +0.51% +0.00% +0.00% / +0.68% +0.51% +0.34%] index_copy_ strided 3 : Elapsed 0.006 ms (0.589 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.53% +0.00% +0.00% / +0.53% +0.53% +0.35%] index_add_ strided 7 : Elapsed 0.006 ms (0.574 ms / 100) 0.585 -> 0.588 ( +0.51%) [ +0.85% +0.00% +0.17% / +0.85% +0.51% +0.51%] index_copy_ strided 7 : Elapsed 0.006 ms (0.590 ms / 100) 0.571 -> 0.574 ( +0.53%) [ +0.53% +0.18% +0.00% / +0.70% +0.53% +0.53%] index_add_ perm : Elapsed 0.006 ms (0.574 ms / 100) 0.584 -> 0.589 ( +0.86%) [ +2.05% +0.17% +0.00% / +0.86% +1.03% +19.86%] index_copy_ perm : Elapsed 0.006 ms (0.596 ms / 100) 0.572 -> 0.574 ( +0.35%) [ +0.35% +0.17% +0.00% / +0.52% +0.35% +1.22%] index_add_ perm_sorted : Elapsed 0.006 ms (0.574 ms / 100) 0.584 -> 0.588 ( +0.68%) [ +0.86% +0.00% +0.17% / +1.03% +0.86% +0.68%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.589 ms / 100) good 5.071 -> 4.792 ( -5.50%) [ +0.02% +0.00% +0.08% / -5.50% -5.48% -5.44%] index_select const : Elapsed 0.051 ms (5.072 ms / 100) 5.046 -> 4.819 ( -4.50%) [ +0.28% +0.12% +0.00% / -4.50% -4.50% -4.34%] index_select wrap : Elapsed 0.051 ms (5.060 ms / 100) good 5.065 -> 4.786 ( -5.51%) [ +0.02% +0.04% +0.00% / -4.98% -5.31% -5.51%] index_select linear : Elapsed 0.051 ms (5.066 ms / 100) good 5.062 -> 4.792 ( -5.33%) [ +0.20% +0.06% +0.00% / -5.29% -5.04% -5.33%] index_select reverse : Elapsed 0.051 ms (5.072 ms / 100) good 5.085 -> 4.778 ( -6.04%) [ +0.14% +0.00% +0.10% / -5.68% -6.04% -5.72%] index_select skip64 : Elapsed 0.051 ms (5.092 ms / 100) good 5.060 -> 4.788 ( -5.38%) [ +0.18% +0.00% +0.28% / -5.36% -5.38% -5.34%] index_select skip256 : Elapsed 0.051 ms (5.069 ms / 100) good 5.048 -> 4.786 ( -5.19%) [ +0.32% +0.32% +0.00% / -4.95% -5.13% -5.19%] index_select spread : Elapsed 0.051 ms (5.064 ms / 100) 5.046 -> 4.815 ( -4.58%) [ +0.28% +0.10% +0.00% / -4.36% -4.58% -4.50%] index_select strided 3 : Elapsed 0.051 ms (5.060 ms / 100) good 5.115 -> 4.827 ( -5.63%) [ +0.10% +0.00% +0.12% / -5.20% -5.63% -5.51%] index_select random : Elapsed 0.051 ms (5.120 ms / 100) good 5.080 -> 4.802 ( -5.47%) [ +0.20% +0.00% +0.20% / -5.47% -5.28% -5.37%] index_select random_sorted : Elapsed 0.051 ms (5.090 ms / 100) B = [500, 200, 1] (stride (200, 1, 200)) A = [500, 5, 1] (stride (5, 1, 5)) dim = 1 0.575 -> 0.578 ( +0.52%) [ +1.04% +0.17% +0.00% / +7.48% +0.52% +0.70%] index_add_ linear : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.596 ( +1.19%) [ +1.19% +0.17% +0.00% / +2.38% +1.19% +2.38%] index_copy_ linear : Elapsed 0.006 ms (0.596 ms / 100) 0.575 -> 0.578 ( +0.52%) [ +1.04% +0.17% +0.00% / +1.04% +0.52% +0.52%] index_add_ reverse : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.594 ( +0.85%) [ +1.36% +0.17% +0.00% / +1.19% +1.19% +0.85%] index_copy_ reverse : Elapsed 0.006 ms (0.597 ms / 100) 0.575 -> 0.578 ( +0.52%) [ +1.04% +0.17% +0.00% / +1.04% +0.52% +1.91%] index_add_ spread : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +1.02% +0.00% +0.34% / +1.36% +1.02% +2.21%] index_copy_ spread : Elapsed 0.006 ms (0.595 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +1.04% +0.00% +0.00% / +1.22% +0.70% +0.70%] index_add_ strided 3 : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.595 ( +1.02%) [ +1.19% +0.00% +0.00% / +1.36% +1.02% +1.19%] index_copy_ strided 3 : Elapsed 0.006 ms (0.596 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +1.04% +0.17% +0.00% / +0.87% +0.87% +0.70%] index_add_ strided 7 : Elapsed 0.006 ms (0.581 ms / 100) 0.589 -> 0.597 ( +1.36%) [ +1.19% +0.17% +0.00% / +1.36% +1.36% +1.53%] index_copy_ strided 7 : Elapsed 0.006 ms (0.596 ms / 100) 0.576 -> 0.578 ( +0.35%) [ +0.69% +0.00% +0.00% / +0.69% +0.52% +0.35%] index_add_ perm : Elapsed 0.006 ms (0.580 ms / 100) 0.589 -> 0.596 ( +1.19%) [ +1.02% +0.00% +0.00% / +1.36% +1.53% +1.19%] index_copy_ perm : Elapsed 0.006 ms (0.595 ms / 100) 0.574 -> 0.578 ( +0.70%) [ +1.22% +0.35% +0.00% / +0.87% +0.87% +0.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.581 ms / 100) 0.588 -> 0.598 ( +1.70%) [ +1.19% +0.00% +0.00% / +1.70% +1.70% +1.70%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.595 ms / 100) good 5.143 -> 4.781 ( -7.04%) [ +0.27% +0.00% +0.02% / -6.59% -7.04% -6.90%] index_select const : Elapsed 0.052 ms (5.157 ms / 100) good 5.150 -> 4.799 ( -6.82%) [ +0.14% +0.00% +0.14% / -6.82% -6.72% -6.82%] index_select wrap : Elapsed 0.052 ms (5.157 ms / 100) good 5.152 -> 4.795 ( -6.93%) [ +0.14% +0.00% +0.02% / -6.70% -6.93% -6.87%] index_select linear : Elapsed 0.052 ms (5.159 ms / 100) good 5.152 -> 4.803 ( -6.77%) [ +0.14% +0.00% +0.25% / -6.77% -6.58% -6.62%] index_select reverse : Elapsed 0.052 ms (5.159 ms / 100) good 5.150 -> 4.783 ( -7.13%) [ +0.00% +0.00% +0.08% / -7.09% -6.99% -7.13%] index_select skip64 : Elapsed 0.052 ms (5.150 ms / 100) good 5.157 -> 4.785 ( -7.21%) [ +0.12% +0.12% +0.00% / -7.21% -6.90% -6.71%] index_select skip256 : Elapsed 0.052 ms (5.163 ms / 100) good 5.151 -> 4.778 ( -7.24%) [ +0.02% +0.00% +0.00% / -6.81% -7.24% -7.14%] index_select spread : Elapsed 0.052 ms (5.152 ms / 100) good 5.171 -> 4.785 ( -7.46%) [ +0.02% +0.00% +0.08% / -7.46% -7.17% -7.16%] index_select strided 3 : Elapsed 0.052 ms (5.172 ms / 100) good 5.180 -> 4.800 ( -7.34%) [ +0.06% +0.00% +0.12% / -7.05% -7.22% -7.34%] index_select random : Elapsed 0.052 ms (5.183 ms / 100) good 5.148 -> 4.771 ( -7.32%) [ +0.04% +0.00% +0.17% / -7.32% -6.68% -6.64%] index_select random_sorted : Elapsed 0.051 ms (5.150 ms / 100) B = [500, 200, 1] (stride (1, 500, 1)) dim = 1 fill_cnt = 5 0.451 -> 0.456 ( +1.11%) [ +2.44% +0.89% +0.00% / +1.33% +1.11% +105.32%] index_fill_ const : Elapsed 0.005 ms (0.462 ms / 100) 0.452 -> 0.453 ( +0.22%) [ +1.11% +0.00% +0.00% / +1.33% +0.22% +81.64%] index_fill_ linear : Elapsed 0.005 ms (0.457 ms / 100) 0.452 -> 0.453 ( +0.22%) [ +0.88% +2.43% +0.00% / +3.10% +0.22% +83.41%] index_fill_ reverse : Elapsed 0.005 ms (0.456 ms / 100) 0.452 -> 0.455 ( +0.66%) [ +1.99% +9.73% +0.00% / +13.05% +0.66% +83.41%] index_fill_ skip64 : Elapsed 0.005 ms (0.461 ms / 100) good 0.483 -> 0.454 ( -6.00%) [ +0.00% +4.97% +0.62% / -5.38% -6.00% +70.39%] index_fill_ skip256 : Elapsed 0.005 ms (0.483 ms / 100) 0.452 -> 0.453 ( +0.22%) [ +0.88% +0.00% +0.66% / +1.33% +0.22% +95.80%] index_fill_ spread : Elapsed 0.005 ms (0.456 ms / 100) 0.451 -> 0.457 ( +1.33%) [ +1.33% +0.00% +0.22% / +1.33% +2.22% +110.20%] index_fill_ strided 3 : Elapsed 0.005 ms (0.457 ms / 100) 0.452 -> 0.455 ( +0.66%) [ +1.11% +0.00% +0.00% / +0.88% +0.66% +2.21%] index_fill_ strided 5 : Elapsed 0.005 ms (0.457 ms / 100) 0.452 -> 0.454 ( +0.44%) [ +1.11% +0.00% +0.00% / +1.33% +0.44% +2.43%] index_fill_ strided 7 : Elapsed 0.005 ms (0.457 ms / 100) 0.451 -> 0.455 ( +0.89%) [ +0.89% +0.00% +0.00% / +0.89% +0.89% +2.22%] index_fill_ strided 8 : Elapsed 0.005 ms (0.455 ms / 100) 0.451 -> 0.454 ( +0.67%) [ +1.11% +0.00% +2.88% / +16.41% +0.67% +0.67%] index_fill_ strided 16 : Elapsed 0.005 ms (0.456 ms / 100) 0.450 -> 0.454 ( +0.89%) [ +1.11% +0.22% +0.00% / +0.89% +1.56% +1.33%] index_fill_ strided 64 : Elapsed 0.005 ms (0.455 ms / 100) 0.451 -> 0.458 ( +1.55%) [ +0.89% +0.22% +0.00% / +2.88% +1.55% +1.55%] index_fill_ strided 100 : Elapsed 0.005 ms (0.455 ms / 100) 0.451 -> 0.455 ( +0.89%) [ +0.89% +0.22% +0.00% / +12.86% +11.09% +0.89%] index_fill_ random : Elapsed 0.005 ms (0.455 ms / 100) 0.451 -> 0.453 ( +0.44%) [ +0.67% +0.22% +0.00% / +0.44% +1.55% +1.11%] index_fill_ random_sorted : Elapsed 0.005 ms (0.454 ms / 100) 0.451 -> 0.454 ( +0.67%) [ +8.20% +1.77% +0.00% / +0.67% +1.33% +1.33%] index_fill_ perm : Elapsed 0.005 ms (0.488 ms / 100) 0.451 -> 0.454 ( +0.67%) [ +0.67% +0.00% +0.00% / +0.67% +1.33% +1.33%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.454 ms / 100) B = [500, 200, 1] (stride (1, 500, 500)) A = [500, 5, 1] (stride (5, 1, 5)) dim = 1 0.574 -> 0.580 ( +1.05%) [ +1.05% +0.35% +0.00% / +1.22% +1.05% +1.05%] index_add_ linear : Elapsed 0.006 ms (0.580 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +0.89% +0.00% +0.00% / +0.71% +0.71% +0.54%] index_copy_ linear : Elapsed 0.006 ms (0.565 ms / 100) 0.576 -> 0.580 ( +0.69%) [ +0.87% +0.00% +0.00% / +0.87% +0.69% +0.69%] index_add_ reverse : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.00% +0.36% / +0.89% +0.89% +0.72%] index_copy_ reverse : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +1.04% +0.17% +0.00% / +1.04% +1.04% +0.87%] index_add_ spread : Elapsed 0.006 ms (0.581 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +0.71% +0.18% +0.00% / +0.71% +0.89% +0.54%] index_copy_ spread : Elapsed 0.006 ms (0.564 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +0.87% +0.35% +0.00% / +1.22% +1.05% +1.57%] index_add_ strided 3 : Elapsed 0.006 ms (0.579 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +0.89% +0.18% +0.00% / +0.89% +0.89% +0.89%] index_copy_ strided 3 : Elapsed 0.006 ms (0.564 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +1.04% +0.00% +0.17% / +0.87% +1.04% +1.39%] index_add_ strided 7 : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +0.72% +0.00% +0.00% / +0.89% +0.89% +0.89%] index_copy_ strided 7 : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +1.05% +0.17% +0.00% / +0.87% +1.39% +1.39%] index_add_ perm : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +0.89% +0.00% +0.36% / +0.89% +0.89% +0.89%] index_copy_ perm : Elapsed 0.006 ms (0.564 ms / 100) 0.576 -> 0.580 ( +0.69%) [ +0.69% +0.00% +0.00% / +0.69% +0.69% +0.87%] index_add_ perm_sorted : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +0.89% +0.18% +0.00% / +0.89% +0.89% +1.07%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.564 ms / 100) 4.818 -> 4.821 ( +0.06%) [ +0.17% +0.00% +0.08% / +0.06% +0.66% +0.64%] index_select const : Elapsed 0.048 ms (4.826 ms / 100) 4.801 -> 4.812 ( +0.23%) [ +0.40% +0.00% +0.17% / +0.23% +0.35% +0.23%] index_select wrap : Elapsed 0.048 ms (4.820 ms / 100) 4.817 -> 4.805 ( -0.25%) [ +0.17% +0.08% +0.00% / +0.15% -0.25% -0.17%] index_select linear : Elapsed 0.048 ms (4.825 ms / 100) 4.812 -> 4.812 ( +0.00%) [ +0.21% +0.10% +0.00% / +0.19% +0.00% +0.17%] index_select reverse : Elapsed 0.048 ms (4.822 ms / 100) 4.820 -> 4.811 ( -0.19%) [ +0.04% +0.06% +0.00% / -0.08% -0.19% -0.04%] index_select skip64 : Elapsed 0.048 ms (4.822 ms / 100) 4.810 -> 4.816 ( +0.12%) [ +0.00% +0.31% +0.10% / +0.12% +0.48% +0.23%] index_select skip256 : Elapsed 0.048 ms (4.810 ms / 100) 4.816 -> 4.807 ( -0.19%) [ +0.27% +0.00% +0.02% / +0.08% +0.06% -0.19%] index_select spread : Elapsed 0.048 ms (4.829 ms / 100) 4.816 -> 4.811 ( -0.10%) [ +0.00% +0.02% +0.00% / +0.12% -0.10% -0.08%] index_select strided 3 : Elapsed 0.048 ms (4.816 ms / 100) 4.843 -> 4.822 ( -0.43%) [ +0.10% +0.14% +0.00% / +0.02% -0.39% -0.43%] index_select random : Elapsed 0.048 ms (4.848 ms / 100) 4.816 -> 4.821 ( +0.10%) [ +0.08% +0.00% +0.12% / +0.10% +0.64% +0.56%] index_select random_sorted : Elapsed 0.048 ms (4.820 ms / 100) B = [500, 200, 1] (stride (1, 500, 100000)) A = [500, 5, 1] (stride (5, 1, 5)) dim = 1 0.575 -> 0.578 ( +0.52%) [ +1.04% +0.17% +0.00% / +1.22% +0.52% +0.52%] index_add_ linear : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +1.07% +0.18% +0.00% / +0.89% +0.89% +0.72%] index_copy_ linear : Elapsed 0.006 ms (0.565 ms / 100) 0.574 -> 0.578 ( +0.70%) [ +1.22% +0.35% +0.00% / +6.27% +0.70% +0.70%] index_add_ reverse : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.89% +0.18% +0.00% / +0.89% +0.54% +0.72%] index_copy_ reverse : Elapsed 0.006 ms (0.564 ms / 100) 0.575 -> 0.578 ( +0.52%) [ +1.04% +0.00% +0.00% / +1.04% +0.52% +0.70%] index_add_ spread : Elapsed 0.006 ms (0.581 ms / 100) 0.560 -> 0.562 ( +0.36%) [ +0.54% +0.00% +0.18% / +0.71% +0.54% +0.36%] index_copy_ spread : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +1.05% +0.35% +0.00% / +1.05% +0.87% +1.05%] index_add_ strided 3 : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.89% +0.00% +0.18% / +0.89% +0.89% +0.72%] index_copy_ strided 3 : Elapsed 0.006 ms (0.564 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +1.22% +0.35% +0.00% / +1.22% +0.87% +0.87%] index_add_ strided 7 : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.89% +0.00% +0.00% / +0.72% +0.89% +0.54%] index_copy_ strided 7 : Elapsed 0.006 ms (0.564 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +1.22% +0.17% +0.00% / +1.05% +1.22% +1.22%] index_add_ perm : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.00% +0.18% / +2.33% +0.72% +0.72%] index_copy_ perm : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +1.22% +0.00% +0.17% / +1.39% +0.87% +1.05%] index_add_ perm_sorted : Elapsed 0.006 ms (0.581 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +2.14% +0.00% +0.00% / +0.71% +0.54% +0.54%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.572 ms / 100) 4.807 -> 4.813 ( +0.12%) [ +0.21% +0.00% +0.17% / +0.17% +0.12% +0.23%] index_select const : Elapsed 0.048 ms (4.817 ms / 100) 4.814 -> 4.811 ( -0.06%) [ +0.00% +0.02% +0.04% / +0.02% -0.06% -0.02%] index_select wrap : Elapsed 0.048 ms (4.814 ms / 100) 4.817 -> 4.814 ( -0.06%) [ +0.04% +0.00% +0.15% / +0.02% +0.00% -0.06%] index_select linear : Elapsed 0.048 ms (4.819 ms / 100) 4.805 -> 4.812 ( +0.15%) [ +0.17% +0.00% +0.37% / +0.15% +0.40% +0.19%] index_select reverse : Elapsed 0.048 ms (4.813 ms / 100) 4.813 -> 4.813 ( +0.00%) [ +0.31% +0.00% +0.29% / +0.10% +0.00% +0.02%] index_select skip64 : Elapsed 0.048 ms (4.828 ms / 100) 4.809 -> 4.807 ( -0.04%) [ +0.06% +0.00% +0.04% / +0.25% -0.04% -0.04%] index_select skip256 : Elapsed 0.048 ms (4.812 ms / 100) 4.806 -> 4.800 ( -0.12%) [ +0.25% +0.00% +0.31% / +0.21% +0.00% -0.12%] index_select spread : Elapsed 0.048 ms (4.818 ms / 100) 4.802 -> 4.811 ( +0.19%) [ +0.15% +0.08% +0.00% / +0.19% +0.33% +0.21%] index_select strided 3 : Elapsed 0.048 ms (4.809 ms / 100) 4.841 -> 4.812 ( -0.60%) [ +0.23% +0.00% +0.19% / +0.17% -0.48% -0.60%] index_select random : Elapsed 0.049 ms (4.852 ms / 100) 4.810 -> 4.816 ( +0.12%) [ +0.44% +0.00% +0.27% / +0.12% +0.69% +0.83%] index_select random_sorted : Elapsed 0.048 ms (4.831 ms / 100) out_shape = [500, 5, 200] in_shape = [500, 5, 1] idx_dim = 2 B = [500, 5, 200] (stride (1000, 200, 1)) A = [500, 5, 1] (stride (5, 1, 5)) dim = 2 0.512 -> 0.515 ( +0.59%) [ +0.98% +1.37% +0.00% / +0.59% +1.37% +3.52%] index_add_ linear : Elapsed 0.005 ms (0.517 ms / 100) 0.511 -> 0.501 ( -1.96%) [ +2.94% +0.00% +2.74% / -0.78% +4.50% -1.96%] index_copy_ linear : Elapsed 0.005 ms (0.526 ms / 100) 0.539 -> 0.514 ( -4.64%) [ +1.48% +0.00% +0.19% / -3.34% -0.74% -4.64%] index_add_ reverse : Elapsed 0.005 ms (0.547 ms / 100) 0.498 -> 0.491 ( -1.41%) [ +1.61% +6.02% +0.00% / +2.01% -1.41% +0.20%] index_copy_ reverse : Elapsed 0.005 ms (0.506 ms / 100) 0.518 -> 0.514 ( -0.77%) [ +0.00% +3.28% +0.00% / +4.63% -0.77% -0.58%] index_add_ spread : Elapsed 0.005 ms (0.518 ms / 100) 0.502 -> 0.493 ( -1.79%) [ +0.40% +3.59% +0.00% / +0.20% +1.00% -1.79%] index_copy_ spread : Elapsed 0.005 ms (0.504 ms / 100) 0.519 -> 0.513 ( -1.16%) [ +0.19% +0.00% +0.58% / +1.16% -1.16% -0.96%] index_add_ strided 3 : Elapsed 0.005 ms (0.520 ms / 100) 0.498 -> 0.496 ( -0.40%) [ +0.20% +3.21% +0.00% / +0.80% +4.42% -0.40%] index_copy_ strided 3 : Elapsed 0.005 ms (0.499 ms / 100) 0.518 -> 0.511 ( -1.35%) [ +0.39% +0.00% +3.28% / +0.19% -0.77% -1.35%] index_add_ strided 7 : Elapsed 0.005 ms (0.520 ms / 100) 0.498 -> 0.490 ( -1.61%) [ +1.00% +10.04% +0.00% / +1.00% -1.61% +5.02%] index_copy_ strided 7 : Elapsed 0.005 ms (0.503 ms / 100) 0.525 -> 0.527 ( +0.38%) [ +0.19% +0.57% +0.00% / +0.95% +0.38% +0.95%] index_add_ perm : Elapsed 0.005 ms (0.526 ms / 100) 0.498 -> 0.494 ( -0.80%) [ +8.43% +6.43% +0.00% / +13.05% -0.80% -0.40%] index_copy_ perm : Elapsed 0.005 ms (0.540 ms / 100) 0.526 -> 0.520 ( -1.14%) [ +6.46% +4.94% +0.00% / +4.56% -1.14% -1.14%] index_add_ perm_sorted : Elapsed 0.006 ms (0.560 ms / 100) 0.498 -> 0.492 ( -1.20%) [ +0.20% +2.61% +0.00% / +5.42% -1.20% -0.40%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.499 ms / 100) Good 8.545 -> 6.990 (-18.20%) [ +0.13% +0.19% +0.00% / -18.20% -17.88% -17.73%] index_select const : Elapsed 0.086 ms (8.556 ms / 100) Good 8.506 -> 6.943 (-18.38%) [ +0.05% +0.04% +0.00% / -18.38% -17.76% -17.86%] index_select wrap : Elapsed 0.085 ms (8.510 ms / 100) Good 8.532 -> 6.984 (-18.14%) [ +0.13% +0.12% +0.00% / -18.14% -17.70% -17.63%] index_select linear : Elapsed 0.085 ms (8.543 ms / 100) Good 8.589 -> 7.024 (-18.22%) [ +0.00% +0.24% +0.10% / -18.07% -18.22% -18.20%] index_select reverse : Elapsed 0.086 ms (8.589 ms / 100) Good 8.630 -> 6.976 (-19.17%) [ +0.03% +0.00% +0.08% / -18.20% -19.13% -19.17%] index_select skip64 : Elapsed 0.086 ms (8.633 ms / 100) Good 8.561 -> 6.953 (-18.78%) [ +0.06% +0.20% +0.00% / -18.78% -17.67% -17.75%] index_select skip256 : Elapsed 0.086 ms (8.566 ms / 100) Good 8.617 -> 6.967 (-19.15%) [ +0.12% +0.06% +0.00% / -19.15% -17.98% -18.06%] index_select spread : Elapsed 0.086 ms (8.627 ms / 100) Good 8.565 -> 6.967 (-18.66%) [ +0.15% +0.00% +0.20% / -17.62% -18.66% -18.61%] index_select random : Elapsed 0.086 ms (8.578 ms / 100) Good 8.548 -> 7.031 (-17.75%) [ +0.06% +0.00% +0.06% / -17.66% -17.68% -17.75%] index_select random_sorted : Elapsed 0.086 ms (8.553 ms / 100) B = [500, 5, 200] (stride (1000, 1, 5)) A = [500, 5, 1] (stride (1, 500, 500)) dim = 2 0.506 -> 0.492 ( -2.77%) [ +0.00% +1.58% +0.59% / +0.59% -2.77% +3.36%] index_add_ linear : Elapsed 0.005 ms (0.506 ms / 100) 0.494 -> 0.491 ( -0.61%) [ +0.81% +2.43% +0.00% / +0.40% -0.61% -0.61%] index_copy_ linear : Elapsed 0.005 ms (0.498 ms / 100) good 0.535 -> 0.499 ( -6.73%) [ +0.00% +1.12% +0.37% / -3.74% -4.11% -6.73%] index_add_ reverse : Elapsed 0.005 ms (0.535 ms / 100) 0.500 -> 0.492 ( -1.60%) [ +1.80% +0.80% +0.00% / -1.60% -0.60% -0.80%] index_copy_ reverse : Elapsed 0.005 ms (0.509 ms / 100) 0.496 -> 0.496 ( +0.00%) [ +0.00% +4.44% +1.01% / +10.08% +74.19% +0.00%] index_add_ spread : Elapsed 0.005 ms (0.496 ms / 100) 0.518 -> 0.499 ( -3.67%) [ +0.58% +4.05% +0.00% / -1.35% +7.92% -3.67%] index_copy_ spread : Elapsed 0.005 ms (0.521 ms / 100) 0.498 -> 0.491 ( -1.41%) [ +3.01% +2.61% +0.00% / +0.40% -1.41% -1.00%] index_add_ strided 3 : Elapsed 0.005 ms (0.513 ms / 100) 0.497 -> 0.486 ( -2.21%) [ +2.01% +1.81% +0.00% / -0.20% -2.21% +1.81%] index_copy_ strided 3 : Elapsed 0.005 ms (0.507 ms / 100) 0.497 -> 0.488 ( -1.81%) [ +2.01% +8.65% +0.00% / +0.80% -1.81% +1.01%] index_add_ strided 7 : Elapsed 0.005 ms (0.507 ms / 100) 0.496 -> 0.490 ( -1.21%) [ +0.60% +0.60% +0.00% / -0.60% -1.21% -1.21%] index_copy_ strided 7 : Elapsed 0.005 ms (0.499 ms / 100) 0.508 -> 0.502 ( -1.18%) [ +9.25% +0.39% +0.00% / +7.87% -1.18% -1.18%] index_add_ perm : Elapsed 0.006 ms (0.555 ms / 100) 0.502 -> 0.494 ( -1.59%) [ +0.00% +3.39% +1.00% / -0.20% -1.39% -1.59%] index_copy_ perm : Elapsed 0.005 ms (0.502 ms / 100) 0.497 -> 0.484 ( -2.62%) [ +0.00% +3.22% +0.80% / +1.41% -2.62% +0.20%] index_add_ perm_sorted : Elapsed 0.005 ms (0.497 ms / 100) 0.499 -> 0.484 ( -3.01%) [ +0.20% +2.61% +0.00% / +1.80% -3.01% +0.20%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.500 ms / 100) 9.334 -> 9.316 ( -0.19%) [ +0.56% +0.29% +0.00% / +0.17% -0.19% +0.67%] index_select const : Elapsed 0.094 ms (9.386 ms / 100) 9.308 -> 9.319 ( +0.12%) [ +0.30% +0.00% +0.02% / +0.12% +0.76% +0.50%] index_select wrap : Elapsed 0.093 ms (9.336 ms / 100) 9.356 -> 9.309 ( -0.50%) [ +0.16% +0.00% +0.35% / +0.27% -0.26% -0.50%] index_select linear : Elapsed 0.094 ms (9.371 ms / 100) 9.384 -> 9.354 ( -0.32%) [ +0.00% +0.23% +0.44% / +0.06% +0.14% -0.32%] index_select reverse : Elapsed 0.094 ms (9.384 ms / 100) 9.299 -> 9.273 ( -0.28%) [ +0.30% +0.26% +0.00% / -0.28% +0.68% +0.34%] index_select skip64 : Elapsed 0.093 ms (9.327 ms / 100) 9.380 -> 9.282 ( -1.04%) [ +0.09% +0.00% +0.33% / -0.47% -1.04% +0.00%] index_select skip256 : Elapsed 0.094 ms (9.388 ms / 100) 9.367 -> 9.319 ( -0.51%) [ +0.00% +0.44% +0.03% / +0.21% -0.51% -0.12%] index_select spread : Elapsed 0.094 ms (9.367 ms / 100) 9.320 -> 9.296 ( -0.26%) [ +0.09% +0.42% +0.00% / -0.26% +0.45% +0.45%] index_select random : Elapsed 0.093 ms (9.328 ms / 100) 9.307 -> 9.326 ( +0.20%) [ +0.31% +0.60% +0.00% / +0.56% +0.20% +1.13%] index_select random_sorted : Elapsed 0.093 ms (9.336 ms / 100) B = [500, 5, 200] (stride (5, 1, 2500)) A = [500, 5, 1] (stride (1, 500, 2500)) dim = 2 0.834 -> 0.839 ( +0.60%) [ +0.84% +0.36% +0.00% / +0.60% +1.68% +1.92%] index_add_ linear : Elapsed 0.008 ms (0.841 ms / 100) 0.825 -> 0.828 ( +0.36%) [ +0.00% +0.12% +0.36% / +0.36% +0.48% +0.85%] index_copy_ linear : Elapsed 0.008 ms (0.825 ms / 100) 0.835 -> 0.842 ( +0.84%) [ +0.60% +0.24% +0.00% / +0.84% +1.68% +1.44%] index_add_ reverse : Elapsed 0.008 ms (0.840 ms / 100) 0.824 -> 0.826 ( +0.24%) [ +0.12% +0.24% +0.00% / +0.24% +0.36% +1.09%] index_copy_ reverse : Elapsed 0.008 ms (0.825 ms / 100) 0.837 -> 0.843 ( +0.72%) [ +0.24% +0.00% +0.12% / +0.72% +2.27% +1.43%] index_add_ spread : Elapsed 0.008 ms (0.839 ms / 100) 0.820 -> 0.826 ( +0.73%) [ +0.49% +0.37% +0.00% / +0.85% +1.10% +0.73%] index_copy_ spread : Elapsed 0.008 ms (0.824 ms / 100) 0.835 -> 0.841 ( +0.72%) [ +0.36% +0.00% +0.24% / +0.72% +1.20% +1.20%] index_add_ strided 3 : Elapsed 0.008 ms (0.838 ms / 100) 0.821 -> 0.825 ( +0.49%) [ +0.37% +0.00% +0.00% / +0.49% +0.85% +0.85%] index_copy_ strided 3 : Elapsed 0.008 ms (0.824 ms / 100) 0.837 -> 0.838 ( +0.12%) [ +0.36% +0.48% +0.00% / +0.12% +1.08% +1.31%] index_add_ strided 7 : Elapsed 0.008 ms (0.840 ms / 100) 0.822 -> 0.824 ( +0.24%) [ +0.36% +0.12% +0.00% / +0.24% +0.24% +0.85%] index_copy_ strided 7 : Elapsed 0.008 ms (0.825 ms / 100) 0.831 -> 0.838 ( +0.84%) [ +0.84% +0.48% +0.00% / +0.84% +2.29% +1.93%] index_add_ perm : Elapsed 0.008 ms (0.838 ms / 100) 0.818 -> 0.819 ( +0.12%) [ +0.37% +0.00% +0.49% / +0.12% +1.59% +1.83%] index_copy_ perm : Elapsed 0.008 ms (0.821 ms / 100) 0.831 -> 0.835 ( +0.48%) [ +1.08% +0.00% +0.36% / +0.48% +2.41% +2.17%] index_add_ perm_sorted : Elapsed 0.008 ms (0.840 ms / 100) 0.816 -> 0.822 ( +0.74%) [ +0.25% +0.12% +0.00% / +0.74% +1.72% +2.08%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.818 ms / 100) 16.043 -> 16.102 ( +0.37%) [ +0.14% +0.00% +0.02% / +0.37% +0.41% +0.44%] index_select const : Elapsed 0.161 ms (16.066 ms / 100) 16.017 -> 16.076 ( +0.37%) [ +0.39% +0.00% +0.27% / +0.54% +0.37% +0.47%] index_select wrap : Elapsed 0.161 ms (16.080 ms / 100) 16.040 -> 16.082 ( +0.26%) [ +0.31% +0.08% +0.00% / +0.26% +0.34% +0.38%] index_select linear : Elapsed 0.161 ms (16.090 ms / 100) 16.038 -> 16.082 ( +0.27%) [ +0.30% +0.00% +0.09% / +0.27% +0.29% +0.42%] index_select reverse : Elapsed 0.161 ms (16.086 ms / 100) 16.036 -> 16.095 ( +0.37%) [ +0.30% +0.00% +0.16% / +0.45% +0.37% +0.37%] index_select skip64 : Elapsed 0.161 ms (16.084 ms / 100) 16.035 -> 16.086 ( +0.32%) [ +0.30% +0.00% +0.12% / +0.32% +0.38% +0.47%] index_select skip256 : Elapsed 0.161 ms (16.083 ms / 100) 16.039 -> 16.088 ( +0.31%) [ +0.37% +0.00% +0.02% / +0.34% +0.31% +0.36%] index_select spread : Elapsed 0.161 ms (16.099 ms / 100) 16.046 -> 16.075 ( +0.18%) [ +0.31% +0.00% +0.03% / +0.35% +0.18% +0.27%] index_select random : Elapsed 0.161 ms (16.095 ms / 100) 16.030 -> 16.089 ( +0.37%) [ +0.38% +0.00% +0.04% / +0.41% +0.37% +0.46%] index_select random_sorted : Elapsed 0.161 ms (16.091 ms / 100) B = [500, 5, 200] (stride (1, 500, 2500)) A = [500, 5, 1] (stride (5, 1, 5)) dim = 2 0.834 -> 0.839 ( +0.60%) [ +0.48% +0.36% +0.00% / +0.60% +0.96% +1.92%] index_add_ linear : Elapsed 0.008 ms (0.838 ms / 100) 0.817 -> 0.820 ( +0.37%) [ +0.61% +0.37% +0.00% / +0.37% +1.59% +1.59%] index_copy_ linear : Elapsed 0.008 ms (0.822 ms / 100) 0.835 -> 0.840 ( +0.60%) [ +0.48% +0.36% +0.00% / +0.60% +1.08% +1.32%] index_add_ reverse : Elapsed 0.008 ms (0.839 ms / 100) 0.817 -> 0.821 ( +0.49%) [ +0.61% +0.24% +0.00% / +0.49% +1.10% +1.84%] index_copy_ reverse : Elapsed 0.008 ms (0.822 ms / 100) 0.835 -> 0.838 ( +0.36%) [ +0.24% +0.24% +0.00% / +0.36% +1.08% +1.08%] index_add_ spread : Elapsed 0.008 ms (0.837 ms / 100) 0.820 -> 0.821 ( +0.12%) [ +0.12% +0.12% +0.00% / +0.12% +1.46% +1.22%] index_copy_ spread : Elapsed 0.008 ms (0.821 ms / 100) 0.835 -> 0.835 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +1.32% +1.44%] index_add_ strided 3 : Elapsed 0.008 ms (0.835 ms / 100) 0.816 -> 0.826 ( +1.23%) [ +0.25% +0.00% +0.25% / +1.23% +1.59% +1.84%] index_copy_ strided 3 : Elapsed 0.008 ms (0.818 ms / 100) 0.833 -> 0.839 ( +0.72%) [ +0.48% +0.00% +0.48% / +0.72% +1.68% +1.68%] index_add_ strided 7 : Elapsed 0.008 ms (0.837 ms / 100) 0.817 -> 0.826 ( +1.10%) [ +0.49% +0.24% +0.00% / +1.10% +1.47% +1.35%] index_copy_ strided 7 : Elapsed 0.008 ms (0.821 ms / 100) 0.834 -> 0.836 ( +0.24%) [ +0.00% +0.12% +0.12% / +0.24% +2.76% +2.52%] index_add_ perm : Elapsed 0.008 ms (0.834 ms / 100) 0.817 -> 0.817 ( +0.00%) [ +0.49% +0.00% +0.12% / +0.00% +2.82% +2.57%] index_copy_ perm : Elapsed 0.008 ms (0.821 ms / 100) 0.833 -> 0.835 ( +0.24%) [ +0.60% +0.00% +0.00% / +0.24% +3.12% +2.52%] index_add_ perm_sorted : Elapsed 0.008 ms (0.838 ms / 100) 0.817 -> 0.819 ( +0.24%) [ +0.37% +0.00% +0.00% / +0.24% +2.69% +2.82%] index_copy_ perm_sorted : Elapsed 0.008 ms (0.820 ms / 100) 16.610 -> 16.620 ( +0.06%) [ +0.17% +0.00% +0.11% / +0.06% +0.08% +0.07%] index_select const : Elapsed 0.166 ms (16.639 ms / 100) 16.588 -> 16.613 ( +0.15%) [ +0.25% +0.02% +0.00% / +0.33% +0.26% +0.15%] index_select wrap : Elapsed 0.166 ms (16.629 ms / 100) 16.591 -> 16.622 ( +0.19%) [ +0.35% +0.00% +0.28% / +0.27% +0.19% +0.23%] index_select linear : Elapsed 0.166 ms (16.649 ms / 100) 16.623 -> 16.609 ( -0.08%) [ +0.17% +0.00% +0.07% / +0.19% -0.01% -0.08%] index_select reverse : Elapsed 0.167 ms (16.652 ms / 100) 16.605 -> 16.639 ( +0.20%) [ +0.11% +0.00% +0.09% / +0.25% +0.20% +0.20%] index_select skip64 : Elapsed 0.166 ms (16.624 ms / 100) 16.599 -> 16.624 ( +0.15%) [ +0.17% +0.00% +0.07% / +0.32% +0.15% +0.37%] index_select skip256 : Elapsed 0.166 ms (16.628 ms / 100) 16.608 -> 16.608 ( +0.00%) [ +0.33% +0.00% +0.01% / +0.25% +0.04% +0.00%] index_select spread : Elapsed 0.167 ms (16.662 ms / 100) 16.608 -> 16.610 ( +0.01%) [ +0.28% +0.00% +0.01% / +0.17% +0.01% +0.07%] index_select random : Elapsed 0.167 ms (16.655 ms / 100) 16.577 -> 16.591 ( +0.08%) [ +0.36% +0.26% +0.00% / +0.44% +0.33% +0.08%] index_select random_sorted : Elapsed 0.166 ms (16.636 ms / 100) out_shape = [500, 5, 200] in_shape = [1, 5, 200] idx_dim = 0 B = [500, 5, 200] (stride (1000, 200, 1)) A = [1, 5, 200] (stride (5, 1, 5)) dim = 0 0.574 -> 0.581 ( +1.22%) [ +1.39% +0.35% +0.00% / +1.22% +1.39% +1.22%] index_add_ linear : Elapsed 0.006 ms (0.582 ms / 100) 0.563 -> 0.564 ( +0.18%) [ +0.36% +1.42% +0.00% / +0.36% +0.36% +0.18%] index_copy_ linear : Elapsed 0.006 ms (0.565 ms / 100) 0.574 -> 0.581 ( +1.22%) [ +1.22% +0.17% +0.00% / +2.44% +1.57% +1.22%] index_add_ reverse : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +0.89% +0.89%] index_copy_ reverse : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +1.05% +0.17% +0.00% / +1.05% +1.22% +1.39%] index_add_ spread : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +2.68% +0.36% +0.00% / +0.72% +1.07% +0.89%] index_copy_ spread : Elapsed 0.006 ms (0.574 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +1.39% +0.87% +0.00% / +1.05% +1.22% +1.39%] index_add_ strided 3 : Elapsed 0.006 ms (0.582 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +4.12% +0.54% +0.00% / +0.90% +1.08% +0.90%] index_copy_ strided 3 : Elapsed 0.006 ms (0.581 ms / 100) 0.575 -> 0.581 ( +1.04%) [ +1.04% +0.00% +2.43% / +1.22% +1.04% +4.17%] index_add_ strided 7 : Elapsed 0.006 ms (0.581 ms / 100) 0.560 -> 0.563 ( +0.54%) [ +0.71% +0.00% +0.18% / +0.54% +0.54% +0.71%] index_copy_ strided 7 : Elapsed 0.006 ms (0.564 ms / 100) 0.574 -> 0.581 ( +1.22%) [ +1.22% +0.17% +0.00% / +1.22% +1.22% +1.22%] index_add_ strided 257 : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +0.89% +0.72%] index_copy_ strided 257 : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.581 ( +1.22%) [ +2.79% +0.17% +0.00% / +1.22% +1.39% +2.26%] index_add_ perm : Elapsed 0.006 ms (0.590 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +0.89% +0.72%] index_copy_ perm : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +1.91% +0.17% +0.00% / +1.04% +1.22% +0.87%] index_add_ perm_sorted : Elapsed 0.006 ms (0.586 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.72% +0.36% +0.00% / +0.72% +1.43% +0.54%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.563 ms / 100) 15.978 -> 16.042 ( +0.40%) [ +0.35% +0.00% +0.09% / +0.40% +0.68% +0.76%] index_select const : Elapsed 0.160 ms (16.034 ms / 100) 15.985 -> 16.049 ( +0.40%) [ +0.36% +0.00% +0.05% / +0.40% +0.50% +0.54%] index_select wrap : Elapsed 0.160 ms (16.042 ms / 100) 15.997 -> 16.060 ( +0.39%) [ +0.26% +0.00% +0.01% / +0.39% +0.43% +0.47%] index_select linear : Elapsed 0.160 ms (16.039 ms / 100) 15.994 -> 16.049 ( +0.34%) [ +0.32% +0.00% +0.12% / +0.34% +0.52% +0.69%] index_select reverse : Elapsed 0.160 ms (16.045 ms / 100) 16.005 -> 16.054 ( +0.31%) [ +0.24% +0.00% +0.01% / +0.37% +0.31% +0.44%] index_select skip64 : Elapsed 0.160 ms (16.044 ms / 100) 15.980 -> 16.046 ( +0.41%) [ +0.36% +0.00% +0.23% / +0.45% +0.43% +0.41%] index_select skip256 : Elapsed 0.160 ms (16.038 ms / 100) 15.982 -> 16.040 ( +0.36%) [ +0.31% +0.00% +0.21% / +0.36% +0.43% +0.44%] index_select spread : Elapsed 0.160 ms (16.032 ms / 100) 15.989 -> 16.046 ( +0.36%) [ +0.31% +0.00% +0.03% / +0.36% +0.56% +0.65%] index_select random : Elapsed 0.160 ms (16.038 ms / 100) 16.007 -> 16.051 ( +0.27%) [ +0.32% +0.00% +0.14% / +0.37% +0.35% +0.27%] index_select random_sorted : Elapsed 0.161 ms (16.059 ms / 100) B = [500, 5, 200] (stride (5, 1, 2500)) A = [1, 5, 200] (stride (200, 200, 1)) dim = 0 0.574 -> 0.580 ( +1.05%) [ +0.87% +0.00% +1.57% / +2.79% +1.22% +1.05%] index_add_ linear : Elapsed 0.006 ms (0.579 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +0.90% +0.90%] index_copy_ linear : Elapsed 0.006 ms (0.562 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +0.87% +0.00% +0.35% / +3.14% +1.05% +1.39%] index_add_ reverse : Elapsed 0.006 ms (0.579 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +0.72% +0.00% +0.00% / +12.72% +0.90% +0.90%] index_copy_ reverse : Elapsed 0.006 ms (0.562 ms / 100) 0.574 -> 0.581 ( +1.22%) [ +1.05% +0.00% +0.17% / +1.92% +1.22% +1.22%] index_add_ spread : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.90% +0.00% +0.00% / +1.79% +0.72% +0.72%] index_copy_ spread : Elapsed 0.006 ms (0.563 ms / 100) 0.573 -> 0.581 ( +1.40%) [ +1.05% +0.00% +1.57% / +3.32% +1.40% +1.57%] index_add_ strided 3 : Elapsed 0.006 ms (0.579 ms / 100) 0.558 -> 0.561 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.54% +0.72% +1.25%] index_copy_ strided 3 : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.582 ( +1.57%) [ +1.40% +0.00% +0.70% / +3.32% +1.57% +1.92%] index_add_ strided 7 : Elapsed 0.006 ms (0.581 ms / 100) 0.557 -> 0.562 ( +0.90%) [ +0.90% +0.36% +0.00% / +1.08% +1.08% +0.90%] index_copy_ strided 7 : Elapsed 0.006 ms (0.562 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +4.01% +0.17% +0.00% / +3.48% +1.05% +1.92%] index_add_ strided 257 : Elapsed 0.006 ms (0.597 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +0.72% +0.72%] index_copy_ strided 257 : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.583 ( +1.75%) [ +3.14% +0.87% +0.00% / +1.92% +1.75% +1.75%] index_add_ perm : Elapsed 0.006 ms (0.591 ms / 100) 0.560 -> 0.562 ( +0.36%) [ +0.54% +0.00% +0.18% / +0.36% +0.36% +0.36%] index_copy_ perm : Elapsed 0.006 ms (0.563 ms / 100) 0.573 -> 0.580 ( +1.22%) [ +1.57% +0.17% +0.00% / +2.09% +1.22% +1.75%] index_add_ perm_sorted : Elapsed 0.006 ms (0.582 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +0.72% +0.72%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.562 ms / 100) 17.901 -> 17.838 ( -0.35%) [ +0.00% +0.09% +0.05% / +0.34% -0.33% -0.35%] index_select const : Elapsed 0.179 ms (17.901 ms / 100) 17.882 -> 17.897 ( +0.08%) [ +0.00% +0.21% +0.30% / +0.26% +0.08% +0.12%] index_select wrap : Elapsed 0.179 ms (17.882 ms / 100) 17.849 -> 17.855 ( +0.03%) [ +0.49% +0.00% +0.29% / +0.45% +0.03% +0.17%] index_select linear : Elapsed 0.179 ms (17.937 ms / 100) 17.893 -> 17.813 ( -0.45%) [ +0.29% +0.00% +0.12% / -0.02% -0.22% -0.45%] index_select reverse : Elapsed 0.179 ms (17.945 ms / 100) 17.880 -> 17.850 ( -0.17%) [ +0.35% +0.00% +0.42% / +0.11% -0.15% -0.17%] index_select skip64 : Elapsed 0.179 ms (17.942 ms / 100) 17.909 -> 17.911 ( +0.01%) [ +0.12% +0.00% +0.10% / +0.10% +0.01% +0.15%] index_select skip256 : Elapsed 0.179 ms (17.930 ms / 100) 17.913 -> 17.890 ( -0.13%) [ +0.22% +0.00% +0.07% / +0.30% +0.09% -0.13%] index_select spread : Elapsed 0.180 ms (17.952 ms / 100) 17.867 -> 17.876 ( +0.05%) [ +0.06% +0.10% +0.00% / +0.33% +0.05% +0.33%] index_select random : Elapsed 0.179 ms (17.877 ms / 100) 17.925 -> 17.868 ( -0.32%) [ +0.04% +0.09% +0.00% / -0.15% -0.28% -0.32%] index_select random_sorted : Elapsed 0.179 ms (17.933 ms / 100) B = [500, 5, 200] (stride (1, 500, 2500)) dim = 0 fill_cnt = 1 0.461 -> 0.463 ( +0.43%) [ +8.24% +2.17% +0.00% / +9.98% +0.43% +7.38%] index_fill_ const : Elapsed 0.005 ms (0.499 ms / 100) 0.457 -> 0.456 ( -0.22%) [ +5.69% +2.63% +0.00% / +9.63% -0.22% +3.50%] index_fill_ linear : Elapsed 0.005 ms (0.483 ms / 100) 0.459 -> 0.462 ( +0.65%) [ +0.00% +8.50% +8.50% / +2.18% +0.65% +0.87%] index_fill_ reverse : Elapsed 0.005 ms (0.459 ms / 100) 0.462 -> 0.455 ( -1.52%) [ +0.22% +6.71% +0.00% / +1.73% -1.52% +5.84%] index_fill_ skip64 : Elapsed 0.005 ms (0.463 ms / 100) 0.460 -> 0.456 ( -0.87%) [ +9.35% +1.96% +0.00% / +9.57% -0.87% -0.22%] index_fill_ skip256 : Elapsed 0.005 ms (0.503 ms / 100) 0.461 -> 0.456 ( -1.08%) [ +0.00% +2.82% +0.65% / +1.74% -1.08% +0.65%] index_fill_ spread : Elapsed 0.005 ms (0.461 ms / 100) 0.475 -> 0.459 ( -3.37%) [ +0.63% +4.00% +0.00% / -1.26% -3.37% -2.74%] index_fill_ strided 3 : Elapsed 0.005 ms (0.478 ms / 100) 0.461 -> 0.457 ( -0.87%) [ +0.43% +12.15% +0.00% / +1.30% -0.87% +5.42%] index_fill_ strided 5 : Elapsed 0.005 ms (0.463 ms / 100) 0.465 -> 0.463 ( -0.43%) [ +5.16% +3.01% +0.00% / +1.72% +4.73% -0.43%] index_fill_ strided 7 : Elapsed 0.005 ms (0.489 ms / 100) 0.461 -> 0.461 ( +0.00%) [ +0.00% +1.52% +6.29% / +2.17% +8.03% +0.00%] index_fill_ strided 8 : Elapsed 0.005 ms (0.461 ms / 100) 0.469 -> 0.454 ( -3.20%) [ +0.00% +3.20% +4.26% / +19.83% -3.20% +3.84%] index_fill_ strided 16 : Elapsed 0.005 ms (0.469 ms / 100) 0.460 -> 0.458 ( -0.43%) [ +8.04% +4.57% +0.00% / +1.52% +6.30% -0.43%] index_fill_ strided 64 : Elapsed 0.005 ms (0.497 ms / 100) 0.459 -> 0.463 ( +0.87%) [ +7.84% +2.18% +0.00% / +2.61% +5.01% +0.87%] index_fill_ strided 100 : Elapsed 0.005 ms (0.495 ms / 100) 0.461 -> 0.454 ( -1.52%) [ +0.00% +6.51% +4.34% / +14.53% -1.52% -0.22%] index_fill_ strided 255 : Elapsed 0.005 ms (0.461 ms / 100) 0.459 -> 0.459 ( +0.00%) [ +0.87% +2.40% +0.00% / +2.18% +0.00% +3.92%] index_fill_ strided 256 : Elapsed 0.005 ms (0.463 ms / 100) 0.459 -> 0.477 ( +3.92%) [+11.55% +2.18% +0.00% / +4.79% +5.66% +3.92%] index_fill_ strided 257 : Elapsed 0.005 ms (0.512 ms / 100) 0.459 -> 0.458 ( -0.22%) [ +4.14% +7.84% +0.00% / +23.53% -0.22% +1.09%] index_fill_ random : Elapsed 0.005 ms (0.478 ms / 100) 0.460 -> 0.458 ( -0.43%) [ +0.00% +8.04% +5.43% / +10.00% -0.43% +0.65%] index_fill_ random_sorted : Elapsed 0.005 ms (0.460 ms / 100) 0.459 -> 0.462 ( +0.65%) [ +0.87% +7.19% +0.00% / +2.40% +5.23% +0.65%] index_fill_ perm : Elapsed 0.005 ms (0.463 ms / 100) 0.454 -> 0.457 ( +0.66%) [ +5.07% +2.20% +0.00% / +2.86% +0.66% +2.42%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.477 ms / 100) B = [500, 5, 200] (stride (1, 500, 2500)) A = [1, 5, 200] (stride (5, 1, 5)) dim = 0 0.496 -> 0.493 ( -0.60%) [ +0.40% +3.63% +0.00% / +1.61% -0.60% +1.01%] index_add_ linear : Elapsed 0.005 ms (0.498 ms / 100) 0.497 -> 0.489 ( -1.61%) [ +1.81% +0.80% +0.00% / +0.80% -1.61% -1.41%] index_copy_ linear : Elapsed 0.005 ms (0.506 ms / 100) 0.500 -> 0.514 ( +2.80%) [ +0.00% +5.00% +4.60% / +20.20% +2.80% +4.60%] index_add_ reverse : Elapsed 0.005 ms (0.500 ms / 100) 0.503 -> 0.497 ( -1.19%) [ +0.00% +1.79% +6.36% / +0.00% -1.19% -0.40%] index_copy_ reverse : Elapsed 0.005 ms (0.503 ms / 100) 0.494 -> 0.498 ( +0.81%) [ +0.00% +3.24% +2.02% / +1.62% +1.21% +0.81%] index_add_ spread : Elapsed 0.005 ms (0.494 ms / 100) 0.492 -> 0.489 ( -0.61%) [ +2.44% +5.08% +0.00% / +2.85% -0.61% +0.41%] index_copy_ spread : Elapsed 0.005 ms (0.504 ms / 100) 0.498 -> 0.503 ( +1.00%) [ +0.00% +2.41% +8.63% / +3.41% +1.61% +1.00%] index_add_ strided 3 : Elapsed 0.005 ms (0.498 ms / 100) 0.499 -> 0.491 ( -1.60%) [ +8.02% +3.01% +0.00% / +1.20% -1.60% -0.60%] index_copy_ strided 3 : Elapsed 0.005 ms (0.539 ms / 100) 0.496 -> 0.493 ( -0.60%) [ +0.40% +4.84% +0.00% / +2.82% -0.60% +1.01%] index_add_ strided 7 : Elapsed 0.005 ms (0.498 ms / 100) 0.493 -> 0.497 ( +0.81%) [ +1.83% +8.52% +0.00% / +5.68% +1.22% +0.81%] index_copy_ strided 7 : Elapsed 0.005 ms (0.502 ms / 100) 0.499 -> 0.491 ( -1.60%) [ +0.00% +2.81% +5.81% / +0.40% -1.60% +0.20%] index_add_ strided 257 : Elapsed 0.005 ms (0.499 ms / 100) 0.492 -> 0.488 ( -0.81%) [ +2.64% +2.64% +0.00% / +1.42% -0.81% +1.02%] index_copy_ strided 257 : Elapsed 0.005 ms (0.505 ms / 100) 0.499 -> 0.487 ( -2.40%) [ +0.00% +4.21% +0.20% / +4.41% -2.40% -0.60%] index_add_ perm : Elapsed 0.005 ms (0.499 ms / 100) 0.506 -> 0.485 ( -4.15%) [ +2.37% +3.16% +0.00% / +1.78% -4.15% -1.58%] index_copy_ perm : Elapsed 0.005 ms (0.518 ms / 100) 0.502 -> 0.496 ( -1.20%) [+12.35% +2.39% +0.00% / +0.40% -1.20% -0.40%] index_add_ perm_sorted : Elapsed 0.006 ms (0.564 ms / 100) 0.499 -> 0.497 ( -0.40%) [ +4.21% +1.20% +0.00% / +0.80% -0.40% +4.21%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.520 ms / 100) Good 10.040 -> 8.143 (-18.89%) [ +0.04% +0.00% +0.11% / -18.89% -18.50% -18.54%] index_select const : Elapsed 0.100 ms (10.044 ms / 100) Good 10.042 -> 8.125 (-19.09%) [ +0.20% +0.10% +0.00% / -19.09% -18.92% -19.09%] index_select wrap : Elapsed 0.101 ms (10.062 ms / 100) Good 10.102 -> 8.127 (-19.55%) [ +0.08% +0.10% +0.00% / -19.15% -19.55% -19.53%] index_select linear : Elapsed 0.101 ms (10.110 ms / 100) Good 9.985 -> 8.113 (-18.75%) [ +0.00% +0.32% +0.46% / -18.75% -17.39% -16.95%] index_select reverse : Elapsed 0.100 ms (9.985 ms / 100) Good 10.071 -> 8.090 (-19.67%) [ +0.00% +0.18% +0.27% / -19.03% -19.67% -19.31%] index_select skip64 : Elapsed 0.101 ms (10.071 ms / 100) Good 10.039 -> 8.115 (-19.17%) [ +0.00% +0.28% +0.01% / -19.17% -18.86% -19.10%] index_select skip256 : Elapsed 0.100 ms (10.039 ms / 100) Good 9.990 -> 8.112 (-18.80%) [ +0.14% +0.31% +0.00% / -18.80% -18.42% -18.60%] index_select spread : Elapsed 0.100 ms (10.004 ms / 100) Good 10.043 -> 8.155 (-18.80%) [ +0.03% +0.00% +0.17% / -18.80% -18.15% -18.02%] index_select random : Elapsed 0.100 ms (10.046 ms / 100) Good 10.068 -> 8.096 (-19.59%) [ +0.00% +0.07% +0.01% / -18.71% -19.54% -19.59%] index_select random_sorted : Elapsed 0.101 ms (10.068 ms / 100) out_shape = [1, 500, 200] in_shape = [1, 5, 200] idx_dim = 1 B = [1, 500, 200] (stride (100000, 1, 500)) A = [1, 5, 200] (stride (1000, 200, 1)) dim = 1 0.622 -> 0.624 ( +0.32%) [ +0.80% +0.00% +0.00% / +0.64% +0.32% +0.48%] index_add_ linear : Elapsed 0.006 ms (0.627 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.78% +0.00% +0.00% / +0.78% +0.16% +0.63%] index_copy_ linear : Elapsed 0.006 ms (0.643 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +0.81% +0.16% +0.00% / +0.97% +0.48% +0.64%] index_add_ reverse : Elapsed 0.006 ms (0.626 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.78% +0.16% +0.00% / +0.78% +0.16% +0.47%] index_copy_ reverse : Elapsed 0.006 ms (0.643 ms / 100) 0.622 -> 0.625 ( +0.48%) [ +0.96% +0.16% +0.00% / +0.96% +0.48% +0.48%] index_add_ spread : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.640 ( +0.47%) [ +0.78% +0.00% +0.16% / +1.26% +0.47% +0.47%] index_copy_ spread : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +0.81% +0.16% +0.00% / +0.81% +0.64% +0.64%] index_add_ strided 3 : Elapsed 0.006 ms (0.626 ms / 100) 0.637 -> 0.641 ( +0.63%) [ +0.78% +0.00% +0.00% / +0.94% +0.63% +0.63%] index_copy_ strided 3 : Elapsed 0.006 ms (0.642 ms / 100) 0.622 -> 0.625 ( +0.48%) [ +0.80% +0.00% +0.00% / +0.64% +0.48% +0.64%] index_add_ strided 7 : Elapsed 0.006 ms (0.627 ms / 100) 0.638 -> 0.641 ( +0.47%) [ +0.63% +0.31% +0.00% / +0.63% +0.47% +0.47%] index_copy_ strided 7 : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.626 ( +0.81%) [ +1.13% +0.00% +0.16% / +1.13% +0.81% +0.81%] index_add_ strided 257 : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.641 ( +0.63%) [ +0.63% +0.00% +0.00% / +0.78% +0.63% +0.78%] index_copy_ strided 257 : Elapsed 0.006 ms (0.641 ms / 100) 0.621 -> 0.626 ( +0.81%) [ +1.13% +0.00% +0.00% / +0.97% +0.81% +0.97%] index_add_ perm : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.642 ( +0.78%) [ +0.63% +0.00% +0.00% / +0.78% +0.78% +0.78%] index_copy_ perm : Elapsed 0.006 ms (0.641 ms / 100) 0.622 -> 0.625 ( +0.48%) [ +0.64% +0.00% +0.00% / +0.80% +0.48% +0.48%] index_add_ perm_sorted : Elapsed 0.006 ms (0.626 ms / 100) 0.637 -> 0.639 ( +0.31%) [ +0.94% +0.00% +0.00% / +0.94% +0.31% +0.47%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.643 ms / 100) 5.344 -> 5.210 ( -2.51%) [ +0.07% +0.00% +0.49% / -2.51% -1.76% -1.76%] index_select const : Elapsed 0.053 ms (5.348 ms / 100) 5.368 -> 5.275 ( -1.73%) [ +0.13% +0.11% +0.00% / -1.58% -1.73% -1.55%] index_select wrap : Elapsed 0.054 ms (5.375 ms / 100) 5.371 -> 5.249 ( -2.27%) [ +0.41% +0.00% +0.35% / -2.27% -1.68% -2.05%] index_select linear : Elapsed 0.054 ms (5.393 ms / 100) 5.362 -> 5.225 ( -2.56%) [ +0.15% +0.00% +0.02% / -2.56% -2.14% -2.24%] index_select reverse : Elapsed 0.054 ms (5.370 ms / 100) 5.353 -> 5.202 ( -2.82%) [ +0.26% +0.00% +0.11% / -2.82% -2.22% -2.30%] index_select skip64 : Elapsed 0.054 ms (5.367 ms / 100) 5.382 -> 5.255 ( -2.36%) [ +0.19% +0.00% +0.24% / -2.36% -2.16% -2.17%] index_select skip256 : Elapsed 0.054 ms (5.392 ms / 100) 5.386 -> 5.250 ( -2.53%) [ +0.15% +0.00% +0.17% / -2.53% -1.97% -1.99%] index_select spread : Elapsed 0.054 ms (5.394 ms / 100) 5.356 -> 5.234 ( -2.28%) [ +0.02% +0.09% +0.00% / -2.28% -1.77% -1.85%] index_select strided 3 : Elapsed 0.054 ms (5.357 ms / 100) 5.354 -> 5.235 ( -2.22%) [ +0.00% +0.06% +0.00% / -2.22% -1.76% -1.87%] index_select random : Elapsed 0.054 ms (5.354 ms / 100) 5.384 -> 5.250 ( -2.49%) [ +0.17% +0.00% +0.06% / -2.49% -1.75% -1.78%] index_select random_sorted : Elapsed 0.054 ms (5.393 ms / 100) B = [1, 500, 200] (stride (100000, 1, 500)) A = [1, 5, 200] (stride (1000, 1, 5)) dim = 1 0.623 -> 0.628 ( +0.80%) [ +1.12% +0.48% +0.00% / +1.12% +0.80% +0.80%] index_add_ linear : Elapsed 0.006 ms (0.630 ms / 100) 0.637 -> 0.645 ( +1.26%) [ +1.26% +0.16% +0.00% / +1.26% +1.73% +1.26%] index_copy_ linear : Elapsed 0.006 ms (0.645 ms / 100) 0.624 -> 0.630 ( +0.96%) [ +1.12% +0.32% +0.00% / +0.96% +0.96% +0.96%] index_add_ reverse : Elapsed 0.006 ms (0.631 ms / 100) 0.637 -> 0.644 ( +1.10%) [ +1.10% +0.16% +0.00% / +1.10% +1.10% +1.26%] index_copy_ reverse : Elapsed 0.006 ms (0.644 ms / 100) 0.624 -> 0.629 ( +0.80%) [ +1.12% +0.16% +0.00% / +0.96% +0.80% +0.96%] index_add_ spread : Elapsed 0.006 ms (0.631 ms / 100) 0.638 -> 0.644 ( +0.94%) [ +0.78% +0.00% +0.00% / +0.94% +1.10% +1.10%] index_copy_ spread : Elapsed 0.006 ms (0.643 ms / 100) 0.624 -> 0.628 ( +0.64%) [ +0.96% +0.16% +0.00% / +0.96% +0.64% +0.80%] index_add_ strided 3 : Elapsed 0.006 ms (0.630 ms / 100) 0.637 -> 0.644 ( +1.10%) [ +1.41% +0.16% +0.00% / +1.41% +1.10% +1.10%] index_copy_ strided 3 : Elapsed 0.006 ms (0.646 ms / 100) 0.624 -> 0.628 ( +0.64%) [ +0.96% +0.00% +0.16% / +0.96% +0.64% +0.64%] index_add_ strided 7 : Elapsed 0.006 ms (0.630 ms / 100) 0.638 -> 0.643 ( +0.78%) [ +1.10% +0.16% +0.00% / +0.94% +0.78% +0.78%] index_copy_ strided 7 : Elapsed 0.006 ms (0.645 ms / 100) 0.625 -> 0.628 ( +0.48%) [ +0.80% +0.00% +0.00% / +0.64% +0.48% +0.48%] index_add_ strided 257 : Elapsed 0.006 ms (0.630 ms / 100) 0.638 -> 0.642 ( +0.63%) [ +1.25% +0.00% +0.31% / +1.25% +0.63% +0.78%] index_copy_ strided 257 : Elapsed 0.006 ms (0.646 ms / 100) 0.624 -> 0.627 ( +0.48%) [ +1.12% +0.16% +0.00% / +1.12% +0.48% +0.48%] index_add_ perm : Elapsed 0.006 ms (0.631 ms / 100) 0.638 -> 0.642 ( +0.63%) [ +0.94% +0.00% +0.16% / +1.25% +0.63% +0.94%] index_copy_ perm : Elapsed 0.006 ms (0.644 ms / 100) 0.625 -> 0.628 ( +0.48%) [ +0.96% +0.16% +0.00% / +0.80% +0.48% +0.64%] index_add_ perm_sorted : Elapsed 0.006 ms (0.631 ms / 100) 0.639 -> 0.643 ( +0.63%) [ +0.94% +0.00% +0.47% / +1.10% +0.78% +0.63%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.645 ms / 100) 5.410 -> 5.219 ( -3.53%) [ +0.07% +0.57% +0.00% / -3.53% -2.94% -2.99%] index_select const : Elapsed 0.054 ms (5.414 ms / 100) 5.427 -> 5.246 ( -3.34%) [ +0.13% +0.00% +0.06% / -3.34% -2.95% -3.04%] index_select wrap : Elapsed 0.054 ms (5.434 ms / 100) 5.445 -> 5.236 ( -3.84%) [ +0.15% +0.00% +0.13% / -3.84% -3.32% -3.21%] index_select linear : Elapsed 0.055 ms (5.453 ms / 100) 5.419 -> 5.211 ( -3.84%) [ +0.22% +0.00% +0.06% / -3.84% -3.19% -3.41%] index_select reverse : Elapsed 0.054 ms (5.431 ms / 100) 5.425 -> 5.226 ( -3.67%) [ +0.00% +0.15% +0.07% / -3.67% -3.41% -3.47%] index_select skip64 : Elapsed 0.054 ms (5.425 ms / 100) 5.442 -> 5.265 ( -3.25%) [ +0.04% +0.00% +0.00% / -3.20% -3.11% -3.25%] index_select skip256 : Elapsed 0.054 ms (5.444 ms / 100) 5.449 -> 5.220 ( -4.20%) [ +0.28% +0.00% +0.00% / -4.20% -3.21% -3.25%] index_select spread : Elapsed 0.055 ms (5.464 ms / 100) 5.422 -> 5.221 ( -3.71%) [ +0.15% +0.00% +0.28% / -3.71% -3.41% -3.41%] index_select strided 3 : Elapsed 0.054 ms (5.430 ms / 100) 5.427 -> 5.226 ( -3.70%) [ +0.09% +0.00% +0.15% / -3.70% -3.22% -3.34%] index_select random : Elapsed 0.054 ms (5.432 ms / 100) 5.451 -> 5.233 ( -4.00%) [ +0.07% +0.07% +0.00% / -4.00% -2.94% -3.05%] index_select random_sorted : Elapsed 0.055 ms (5.455 ms / 100) B = [1, 500, 200] (stride (100000, 1, 500)) A = [1, 5, 200] (stride (200, 200, 1)) dim = 1 0.621 -> 0.624 ( +0.48%) [ +0.97% +0.32% +0.00% / +1.13% +0.64% +0.48%] index_add_ linear : Elapsed 0.006 ms (0.627 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.78% +0.00% +0.00% / +0.78% +0.31% +0.16%] index_copy_ linear : Elapsed 0.006 ms (0.643 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +0.97% +0.16% +0.00% / +1.13% +0.48% +0.48%] index_add_ reverse : Elapsed 0.006 ms (0.627 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.94% +0.00% +0.00% / +0.78% +0.16% +0.16%] index_copy_ reverse : Elapsed 0.006 ms (0.644 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +1.13% +0.16% +0.00% / +1.13% +0.48% +0.81%] index_add_ spread : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.640 ( +0.47%) [ +0.94% +0.16% +0.00% / +0.94% +0.47% +0.47%] index_copy_ spread : Elapsed 0.006 ms (0.643 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +0.81% +0.16% +0.00% / +0.97% +0.64% +0.48%] index_add_ strided 3 : Elapsed 0.006 ms (0.626 ms / 100) 0.637 -> 0.640 ( +0.47%) [ +0.78% +0.16% +0.00% / +0.78% +0.47% +0.63%] index_copy_ strided 3 : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +0.81% +0.00% +0.00% / +0.81% +0.64% +0.48%] index_add_ strided 7 : Elapsed 0.006 ms (0.626 ms / 100) 0.637 -> 0.640 ( +0.47%) [ +0.78% +0.00% +0.00% / +0.78% +0.63% +0.47%] index_copy_ strided 7 : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +0.97% +0.16% +0.00% / +1.13% +0.64% +0.81%] index_add_ strided 257 : Elapsed 0.006 ms (0.627 ms / 100) 0.637 -> 0.642 ( +0.78%) [ +0.63% +0.00% +0.00% / +0.78% +0.78% +0.94%] index_copy_ strided 257 : Elapsed 0.006 ms (0.641 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +1.13% +0.00% +0.00% / +1.13% +0.64% +0.64%] index_add_ perm : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.641 ( +0.63%) [ +0.63% +0.00% +2.35% / +1.10% +0.63% +0.78%] index_copy_ perm : Elapsed 0.006 ms (0.641 ms / 100) 0.622 -> 0.625 ( +0.48%) [ +0.96% +0.16% +0.00% / +0.96% +0.64% +0.48%] index_add_ perm_sorted : Elapsed 0.006 ms (0.628 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.78% +0.00% +0.00% / +0.78% +0.31% +0.16%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.643 ms / 100) 5.343 -> 5.205 ( -2.58%) [ +0.37% +0.00% +0.09% / -2.58% -1.63% -1.59%] index_select const : Elapsed 0.054 ms (5.363 ms / 100) 5.356 -> 5.274 ( -1.53%) [ +0.30% +0.35% +0.00% / -1.51% -1.53% -1.33%] index_select wrap : Elapsed 0.054 ms (5.372 ms / 100) 5.384 -> 5.233 ( -2.80%) [ +0.13% +0.00% +0.11% / -2.80% -2.41% -2.17%] index_select linear : Elapsed 0.054 ms (5.391 ms / 100) 5.368 -> 5.227 ( -2.63%) [ +0.19% +0.13% +0.00% / -2.63% -2.20% -2.09%] index_select reverse : Elapsed 0.054 ms (5.378 ms / 100) 5.348 -> 5.201 ( -2.75%) [ +0.07% +0.13% +0.00% / -2.75% -2.15% -2.17%] index_select skip64 : Elapsed 0.054 ms (5.352 ms / 100) 5.384 -> 5.256 ( -2.38%) [ +0.00% +0.11% +0.19% / -2.38% -2.12% -2.27%] index_select skip256 : Elapsed 0.054 ms (5.384 ms / 100) 5.385 -> 5.241 ( -2.67%) [ +0.00% +0.20% +0.06% / -2.67% -2.02% -2.08%] index_select spread : Elapsed 0.054 ms (5.385 ms / 100) 5.358 -> 5.238 ( -2.24%) [ +0.00% +0.00% +0.13% / -2.24% -1.92% -1.81%] index_select strided 3 : Elapsed 0.054 ms (5.358 ms / 100) 5.350 -> 5.235 ( -2.15%) [ +0.28% +0.00% +0.22% / -2.15% -1.61% -1.76%] index_select random : Elapsed 0.054 ms (5.365 ms / 100) 5.385 -> 5.252 ( -2.47%) [ +0.28% +0.00% +0.09% / -2.47% -1.75% -1.67%] index_select random_sorted : Elapsed 0.054 ms (5.400 ms / 100) B = [1, 500, 200] (stride (200, 200, 1)) A = [1, 5, 200] (stride (1000, 200, 1)) dim = 1 0.621 -> 0.625 ( +0.64%) [ +0.81% +0.00% +0.00% / +0.81% +0.81% +0.64%] index_add_ linear : Elapsed 0.006 ms (0.626 ms / 100) 0.605 -> 0.609 ( +0.66%) [ +3.47% +0.00% +0.00% / +0.83% +0.66% +0.66%] index_copy_ linear : Elapsed 0.006 ms (0.626 ms / 100) 0.622 -> 0.626 ( +0.64%) [ +0.64% +0.00% +0.00% / +0.64% +0.64% +0.80%] index_add_ reverse : Elapsed 0.006 ms (0.626 ms / 100) 0.605 -> 0.610 ( +0.83%) [ +0.99% +0.00% +0.17% / +0.99% +0.83% +0.83%] index_copy_ reverse : Elapsed 0.006 ms (0.611 ms / 100) 0.621 -> 0.626 ( +0.81%) [ +0.97% +0.16% +0.00% / +1.13% +0.81% +0.97%] index_add_ spread : Elapsed 0.006 ms (0.627 ms / 100) 0.604 -> 0.610 ( +0.99%) [ +1.32% +0.00% +0.17% / +0.99% +0.99% +1.16%] index_copy_ spread : Elapsed 0.006 ms (0.612 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +0.81% +0.00% +0.16% / +0.64% +0.64% +0.81%] index_add_ strided 3 : Elapsed 0.006 ms (0.626 ms / 100) 0.605 -> 0.609 ( +0.66%) [ +0.83% +0.00% +0.33% / +0.99% +1.16% +0.66%] index_copy_ strided 3 : Elapsed 0.006 ms (0.610 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +0.97% +0.32% +0.00% / +0.64% +0.64% +0.64%] index_add_ strided 7 : Elapsed 0.006 ms (0.627 ms / 100) 0.604 -> 0.608 ( +0.66%) [ +0.99% +0.50% +0.00% / +0.83% +0.66% +0.66%] index_copy_ strided 7 : Elapsed 0.006 ms (0.610 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +0.97% +0.00% +0.16% / +0.97% +0.48% +0.64%] index_add_ strided 257 : Elapsed 0.006 ms (0.627 ms / 100) 0.605 -> 0.608 ( +0.50%) [ +0.83% +0.17% +0.00% / +0.99% +0.66% +0.50%] index_copy_ strided 257 : Elapsed 0.006 ms (0.610 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +0.97% +0.00% +0.16% / +1.13% +0.81% +0.48%] index_add_ perm : Elapsed 0.006 ms (0.627 ms / 100) 0.604 -> 0.608 ( +0.66%) [ +0.99% +0.17% +0.00% / +2.15% +0.66% +0.83%] index_copy_ perm : Elapsed 0.006 ms (0.610 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +0.81% +0.00% +0.00% / +0.97% +0.64% +0.81%] index_add_ perm_sorted : Elapsed 0.006 ms (0.626 ms / 100) 0.604 -> 0.608 ( +0.66%) [ +0.99% +0.00% +0.17% / +1.16% +0.66% +0.83%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.610 ms / 100) 5.183 -> 5.184 ( +0.02%) [ +0.50% +0.12% +0.00% / +0.02% +0.71% +0.56%] index_select const : Elapsed 0.052 ms (5.209 ms / 100) 5.215 -> 5.216 ( +0.02%) [ +0.08% +0.00% +0.31% / +0.02% +0.59% +0.61%] index_select wrap : Elapsed 0.052 ms (5.219 ms / 100) 5.218 -> 5.223 ( +0.10%) [ +0.10% +0.00% +0.31% / +0.10% +0.19% +0.42%] index_select linear : Elapsed 0.052 ms (5.223 ms / 100) 5.196 -> 5.193 ( -0.06%) [ +0.29% +0.00% +0.23% / -0.06% +0.81% +0.67%] index_select reverse : Elapsed 0.052 ms (5.211 ms / 100) 5.185 -> 5.197 ( +0.23%) [ +0.48% +0.10% +0.00% / +0.23% +0.48% +0.71%] index_select skip64 : Elapsed 0.052 ms (5.210 ms / 100) 5.215 -> 5.216 ( +0.02%) [ +0.00% +0.04% +0.08% / +0.12% +0.33% +0.02%] index_select skip256 : Elapsed 0.052 ms (5.215 ms / 100) 5.221 -> 5.217 ( -0.08%) [ +0.11% +0.00% +0.13% / -0.08% +0.25% +0.19%] index_select spread : Elapsed 0.052 ms (5.227 ms / 100) 5.196 -> 5.187 ( -0.17%) [ +0.00% +0.08% +0.13% / -0.17% +0.42% +0.54%] index_select strided 3 : Elapsed 0.052 ms (5.196 ms / 100) 5.194 -> 5.207 ( +0.25%) [ +0.29% +0.00% +0.04% / +0.25% +0.48% +0.54%] index_select random : Elapsed 0.052 ms (5.209 ms / 100) 5.209 -> 5.217 ( +0.15%) [ +0.06% +0.00% +0.13% / +0.15% +0.81% +0.23%] index_select random_sorted : Elapsed 0.052 ms (5.212 ms / 100) B = [1, 500, 200] (stride (200, 200, 1)) A = [1, 5, 200] (stride (5, 1, 5)) dim = 1 0.624 -> 0.627 ( +0.48%) [ +1.12% +0.48% +0.00% / +0.96% +0.64% +0.48%] index_add_ linear : Elapsed 0.006 ms (0.631 ms / 100) 0.610 -> 0.611 ( +0.16%) [ +0.66% +0.00% +0.00% / +0.66% +0.49% +0.16%] index_copy_ linear : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.628 ( +0.48%) [ +0.80% +0.16% +0.00% / +0.96% +0.64% +0.48%] index_add_ reverse : Elapsed 0.006 ms (0.630 ms / 100) 0.610 -> 0.611 ( +0.16%) [ +0.66% +0.00% +0.16% / +0.82% +0.16% +0.16%] index_copy_ reverse : Elapsed 0.006 ms (0.614 ms / 100) 0.626 -> 0.627 ( +0.16%) [ +0.80% +0.00% +0.00% / +0.80% +0.16% +0.16%] index_add_ spread : Elapsed 0.006 ms (0.631 ms / 100) 0.610 -> 0.612 ( +0.33%) [ +0.66% +0.16% +0.00% / +0.98% +0.49% +0.33%] index_copy_ spread : Elapsed 0.006 ms (0.614 ms / 100) 0.624 -> 0.628 ( +0.64%) [ +0.80% +0.00% +0.00% / +0.96% +0.64% +0.64%] index_add_ strided 3 : Elapsed 0.006 ms (0.629 ms / 100) 0.609 -> 0.613 ( +0.66%) [ +0.82% +0.00% +0.00% / +0.66% +0.66% +0.66%] index_copy_ strided 3 : Elapsed 0.006 ms (0.614 ms / 100) 0.624 -> 0.628 ( +0.64%) [ +0.96% +0.16% +0.00% / +0.96% +0.64% +0.64%] index_add_ strided 7 : Elapsed 0.006 ms (0.630 ms / 100) 0.609 -> 0.612 ( +0.49%) [ +0.66% +0.00% +0.00% / +0.82% +0.82% +0.49%] index_copy_ strided 7 : Elapsed 0.006 ms (0.613 ms / 100) 0.624 -> 0.629 ( +0.80%) [ +0.96% +0.16% +0.00% / +0.96% +0.80% +0.96%] index_add_ strided 257 : Elapsed 0.006 ms (0.630 ms / 100) 0.608 -> 0.613 ( +0.82%) [ +0.82% +0.00% +0.00% / +1.15% +0.82% +0.99%] index_copy_ strided 257 : Elapsed 0.006 ms (0.613 ms / 100) 0.623 -> 0.629 ( +0.96%) [ +0.96% +0.16% +0.00% / +1.12% +0.96% +0.96%] index_add_ perm : Elapsed 0.006 ms (0.629 ms / 100) 0.608 -> 0.614 ( +0.99%) [ +0.99% +0.00% +0.00% / +0.99% +0.99% +0.99%] index_copy_ perm : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.627 ( +0.32%) [ +0.96% +0.16% +0.00% / +0.96% +0.48% +0.32%] index_add_ perm_sorted : Elapsed 0.006 ms (0.631 ms / 100) 0.610 -> 0.612 ( +0.33%) [ +0.82% +0.00% +0.00% / +0.66% +0.49% +0.33%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.615 ms / 100) 5.222 -> 5.241 ( +0.36%) [ +0.36% +0.00% +0.25% / +0.36% +0.92% +0.80%] index_select const : Elapsed 0.052 ms (5.241 ms / 100) 5.251 -> 5.265 ( +0.27%) [ +0.04% +0.06% +0.00% / +0.27% +0.80% +0.84%] index_select wrap : Elapsed 0.053 ms (5.253 ms / 100) 5.265 -> 5.265 ( +0.00%) [ +0.09% +0.13% +0.00% / +0.00% +0.36% +0.17%] index_select linear : Elapsed 0.053 ms (5.270 ms / 100) 5.239 -> 5.244 ( +0.10%) [ +0.11% +0.00% +0.00% / +0.10% +0.84% +0.50%] index_select reverse : Elapsed 0.052 ms (5.245 ms / 100) 5.234 -> 5.252 ( +0.34%) [ +0.02% +0.00% +0.13% / +0.34% +0.59% +0.69%] index_select skip64 : Elapsed 0.052 ms (5.235 ms / 100) 5.267 -> 5.263 ( -0.08%) [ +0.04% +0.15% +0.00% / -0.08% +0.21% +0.63%] index_select skip256 : Elapsed 0.053 ms (5.269 ms / 100) 5.254 -> 5.258 ( +0.08%) [ +0.30% +0.13% +0.00% / +0.08% +0.51% +0.49%] index_select spread : Elapsed 0.053 ms (5.270 ms / 100) 5.239 -> 5.251 ( +0.23%) [ +0.29% +0.23% +0.00% / +0.23% +0.63% +0.65%] index_select strided 3 : Elapsed 0.053 ms (5.254 ms / 100) 5.233 -> 5.250 ( +0.32%) [ +0.13% +0.06% +0.00% / +0.32% +0.52% +0.71%] index_select random : Elapsed 0.052 ms (5.240 ms / 100) 5.247 -> 5.259 ( +0.23%) [ +0.34% +0.00% +0.30% / +0.23% +0.76% +0.69%] index_select random_sorted : Elapsed 0.053 ms (5.265 ms / 100) B = [1, 500, 200] (stride (1, 200, 1)) A = [1, 5, 200] (stride (1000, 1, 5)) dim = 1 0.625 -> 0.628 ( +0.48%) [ +0.80% +0.00% +0.16% / +1.12% +0.48% +0.48%] index_add_ linear : Elapsed 0.006 ms (0.630 ms / 100) 0.608 -> 0.614 ( +0.99%) [ +1.15% +0.00% +0.33% / +0.99% +0.99% +0.99%] index_copy_ linear : Elapsed 0.006 ms (0.615 ms / 100) 0.624 -> 0.629 ( +0.80%) [ +1.12% +0.00% +0.00% / +1.12% +0.80% +0.96%] index_add_ reverse : Elapsed 0.006 ms (0.631 ms / 100) 0.608 -> 0.613 ( +0.82%) [ +0.99% +0.16% +0.00% / +1.15% +0.82% +0.99%] index_copy_ reverse : Elapsed 0.006 ms (0.614 ms / 100) 0.623 -> 0.628 ( +0.80%) [ +1.12% +0.16% +0.00% / +1.12% +0.96% +0.80%] index_add_ spread : Elapsed 0.006 ms (0.630 ms / 100) 0.609 -> 0.613 ( +0.66%) [ +0.66% +0.00% +0.00% / +1.48% +0.66% +0.82%] index_copy_ spread : Elapsed 0.006 ms (0.613 ms / 100) 0.624 -> 0.627 ( +0.48%) [ +0.96% +0.16% +0.00% / +0.96% +0.64% +0.48%] index_add_ strided 3 : Elapsed 0.006 ms (0.630 ms / 100) 0.608 -> 0.612 ( +0.66%) [ +0.99% +0.33% +0.00% / +0.99% +0.82% +0.66%] index_copy_ strided 3 : Elapsed 0.006 ms (0.614 ms / 100) 0.624 -> 0.627 ( +0.48%) [ +1.12% +0.00% +0.00% / +0.96% +0.64% +0.48%] index_add_ strided 7 : Elapsed 0.006 ms (0.631 ms / 100) 0.609 -> 0.613 ( +0.66%) [ +0.82% +0.00% +0.00% / +0.82% +0.66% +0.66%] index_copy_ strided 7 : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.627 ( +0.32%) [ +0.96% +0.16% +0.00% / +0.96% +0.32% +0.32%] index_add_ strided 257 : Elapsed 0.006 ms (0.631 ms / 100) 0.609 -> 0.612 ( +0.49%) [ +0.66% +0.16% +0.00% / +0.99% +0.49% +0.49%] index_copy_ strided 257 : Elapsed 0.006 ms (0.613 ms / 100) 0.626 -> 0.628 ( +0.32%) [ +0.64% +0.00% +0.16% / +0.64% +0.48% +0.32%] index_add_ perm : Elapsed 0.006 ms (0.630 ms / 100) 0.609 -> 0.612 ( +0.49%) [ +0.82% +0.00% +1.48% / +0.99% +0.49% +0.49%] index_copy_ perm : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.627 ( +0.32%) [ +1.28% +0.00% +0.00% / +1.12% +0.32% +0.48%] index_add_ perm_sorted : Elapsed 0.006 ms (0.633 ms / 100) 0.609 -> 0.612 ( +0.49%) [ +0.66% +0.16% +0.00% / +0.66% +0.49% +0.66%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.613 ms / 100) 5.231 -> 5.246 ( +0.29%) [ +0.11% +0.15% +0.00% / +0.29% +0.55% +0.67%] index_select const : Elapsed 0.052 ms (5.237 ms / 100) 5.251 -> 5.258 ( +0.13%) [ +0.17% +0.04% +0.00% / +0.13% +0.48% +0.59%] index_select wrap : Elapsed 0.053 ms (5.260 ms / 100) 5.253 -> 5.255 ( +0.04%) [ +0.00% +0.06% +0.17% / +0.04% +0.13% +0.48%] index_select linear : Elapsed 0.053 ms (5.253 ms / 100) 5.237 -> 5.228 ( -0.17%) [ +0.08% +0.08% +0.00% / -0.17% +0.23% +0.36%] index_select reverse : Elapsed 0.052 ms (5.241 ms / 100) 5.224 -> 5.230 ( +0.11%) [ +0.31% +0.00% +0.17% / +0.11% +0.48% +0.65%] index_select skip64 : Elapsed 0.052 ms (5.240 ms / 100) 5.237 -> 5.252 ( +0.29%) [ +0.21% +0.00% +0.31% / +0.29% +0.61% +0.78%] index_select skip256 : Elapsed 0.052 ms (5.248 ms / 100) 5.248 -> 5.260 ( +0.23%) [ +0.30% +0.00% +0.04% / +0.23% +0.63% +0.76%] index_select spread : Elapsed 0.053 ms (5.264 ms / 100) 5.234 -> 5.248 ( +0.27%) [ +0.13% +0.00% +0.02% / +0.27% +0.53% +0.46%] index_select strided 3 : Elapsed 0.052 ms (5.241 ms / 100) 5.229 -> 5.238 ( +0.17%) [ +0.19% +0.00% +0.13% / +0.17% +0.76% +0.71%] index_select random : Elapsed 0.052 ms (5.239 ms / 100) 5.251 -> 5.253 ( +0.04%) [ +0.06% +0.02% +0.00% / +0.04% +0.51% +0.38%] index_select random_sorted : Elapsed 0.053 ms (5.254 ms / 100) B = [1, 500, 200] (stride (500, 1, 500)) A = [1, 5, 200] (stride (200, 200, 1)) dim = 1 0.621 -> 0.624 ( +0.48%) [ +0.81% +0.16% +0.00% / +0.81% +0.48% +0.64%] index_add_ linear : Elapsed 0.006 ms (0.626 ms / 100) 0.638 -> 0.640 ( +0.31%) [ +0.63% +0.16% +0.00% / +0.63% +0.31% +0.31%] index_copy_ linear : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +1.13% +0.16% +0.00% / +0.97% +0.48% +0.64%] index_add_ reverse : Elapsed 0.006 ms (0.628 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.63% +0.16% +0.00% / +0.78% +0.31% +0.16%] index_copy_ reverse : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +1.13% +0.00% +0.16% / +1.13% +0.48% +0.48%] index_add_ spread : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.639 ( +0.31%) [ +0.94% +0.00% +0.00% / +1.10% +0.31% +0.31%] index_copy_ spread : Elapsed 0.006 ms (0.643 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +0.97% +0.00% +0.32% / +0.97% +0.64% +0.64%] index_add_ strided 3 : Elapsed 0.006 ms (0.627 ms / 100) 0.637 -> 0.640 ( +0.47%) [ +0.78% +0.00% +0.00% / +0.78% +0.47% +0.94%] index_copy_ strided 3 : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +1.13% +0.16% +0.00% / +0.81% +0.48% +0.48%] index_add_ strided 7 : Elapsed 0.006 ms (0.628 ms / 100) 0.637 -> 0.641 ( +0.63%) [ +0.78% +0.00% +0.16% / +0.78% +0.63% +0.63%] index_copy_ strided 7 : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +1.61% +0.00% +0.32% / +0.97% +0.81% +0.64%] index_add_ strided 257 : Elapsed 0.006 ms (0.631 ms / 100) 0.637 -> 0.641 ( +0.63%) [ +0.78% +1.41% +0.00% / +0.94% +0.63% +0.94%] index_copy_ strided 257 : Elapsed 0.006 ms (0.642 ms / 100) 0.622 -> 0.626 ( +0.64%) [ +0.80% +0.00% +0.00% / +0.80% +0.64% +0.80%] index_add_ perm : Elapsed 0.006 ms (0.627 ms / 100) 0.636 -> 0.641 ( +0.79%) [ +0.94% +0.00% +0.16% / +0.94% +0.79% +0.79%] index_copy_ perm : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +1.13% +0.16% +0.00% / +1.13% +0.97% +0.48%] index_add_ perm_sorted : Elapsed 0.006 ms (0.628 ms / 100) 0.638 -> 0.639 ( +0.16%) [ +0.78% +0.00% +0.00% / +0.78% +0.16% +0.16%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.643 ms / 100) 5.360 -> 5.205 ( -2.89%) [ +0.19% +0.00% +0.21% / -2.89% -1.96% -2.03%] index_select const : Elapsed 0.054 ms (5.370 ms / 100) 5.373 -> 5.265 ( -2.01%) [ +0.04% +0.04% +0.00% / -2.01% -1.66% -1.73%] index_select wrap : Elapsed 0.054 ms (5.375 ms / 100) 5.381 -> 5.245 ( -2.53%) [ +0.19% +0.00% +0.00% / -2.53% -1.97% -2.10%] index_select linear : Elapsed 0.054 ms (5.391 ms / 100) 5.358 -> 5.239 ( -2.22%) [ +0.21% +0.00% +0.24% / -2.22% -2.13% -2.20%] index_select reverse : Elapsed 0.054 ms (5.369 ms / 100) 5.343 -> 5.211 ( -2.47%) [ +0.47% +0.00% +0.32% / -2.47% -1.85% -2.08%] index_select skip64 : Elapsed 0.054 ms (5.368 ms / 100) 5.375 -> 5.253 ( -2.27%) [ +0.07% +0.00% +0.30% / -2.27% -1.95% -2.07%] index_select skip256 : Elapsed 0.054 ms (5.379 ms / 100) 5.382 -> 5.246 ( -2.53%) [ +0.22% +0.00% +0.41% / -2.53% -1.88% -1.84%] index_select spread : Elapsed 0.054 ms (5.394 ms / 100) 5.347 -> 5.246 ( -1.89%) [ +0.00% +0.11% +0.30% / -1.89% -1.81% -1.68%] index_select strided 3 : Elapsed 0.053 ms (5.347 ms / 100) 5.347 -> 5.241 ( -1.98%) [ +0.32% +0.00% +0.04% / -1.98% -1.87% -1.57%] index_select random : Elapsed 0.054 ms (5.364 ms / 100) 5.381 -> 5.260 ( -2.25%) [ +0.00% +0.15% +0.02% / -2.25% -1.67% -1.64%] index_select random_sorted : Elapsed 0.054 ms (5.381 ms / 100) B = [1, 500, 200] (stride (500, 1, 500)) A = [1, 5, 200] (stride (1, 200, 1)) dim = 1 0.622 -> 0.624 ( +0.32%) [ +0.80% +0.00% +0.16% / +0.48% +0.64% +0.32%] index_add_ linear : Elapsed 0.006 ms (0.627 ms / 100) 0.637 -> 0.641 ( +0.63%) [ +0.63% +0.00% +0.16% / +0.78% +0.63% +0.63%] index_copy_ linear : Elapsed 0.006 ms (0.641 ms / 100) 0.621 -> 0.626 ( +0.81%) [ +0.81% +0.00% +0.00% / +0.97% +0.81% +0.81%] index_add_ reverse : Elapsed 0.006 ms (0.626 ms / 100) 0.637 -> 0.642 ( +0.78%) [ +0.63% +0.16% +0.00% / +0.78% +0.78% +0.78%] index_copy_ reverse : Elapsed 0.006 ms (0.641 ms / 100) 0.622 -> 0.626 ( +0.64%) [ +0.64% +0.16% +0.00% / +0.64% +0.80% +0.64%] index_add_ spread : Elapsed 0.006 ms (0.626 ms / 100) 0.637 -> 0.642 ( +0.78%) [ +0.63% +0.00% +0.16% / +1.10% +0.94% +0.78%] index_copy_ spread : Elapsed 0.006 ms (0.641 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +0.81% +0.00% +0.16% / +1.93% +0.81% +0.64%] index_add_ strided 3 : Elapsed 0.006 ms (0.626 ms / 100) 0.636 -> 0.641 ( +0.79%) [ +0.94% +0.31% +0.00% / +1.10% +0.79% +0.79%] index_copy_ strided 3 : Elapsed 0.006 ms (0.642 ms / 100) 0.622 -> 0.625 ( +0.48%) [ +0.64% +0.00% +0.00% / +0.64% +0.48% +0.64%] index_add_ strided 7 : Elapsed 0.006 ms (0.626 ms / 100) 0.637 -> 0.641 ( +0.63%) [ +0.94% +0.00% +0.00% / +0.78% +0.63% +0.63%] index_copy_ strided 7 : Elapsed 0.006 ms (0.643 ms / 100) 0.622 -> 0.625 ( +0.48%) [ +0.80% +0.00% +0.32% / +0.48% +0.48% +0.48%] index_add_ strided 257 : Elapsed 0.006 ms (0.627 ms / 100) 0.635 -> 0.640 ( +0.79%) [ +1.10% +0.47% +0.00% / +1.10% +0.79% +1.10%] index_copy_ strided 257 : Elapsed 0.006 ms (0.642 ms / 100) 0.620 -> 0.625 ( +0.81%) [ +1.13% +0.32% +0.00% / +1.29% +0.81% +0.81%] index_add_ perm : Elapsed 0.006 ms (0.627 ms / 100) 0.637 -> 0.639 ( +0.31%) [ +0.78% +0.16% +0.00% / +0.78% +0.31% +0.47%] index_copy_ perm : Elapsed 0.006 ms (0.642 ms / 100) 0.622 -> 0.624 ( +0.32%) [ +0.64% +0.00% +0.00% / +0.80% +0.32% +0.48%] index_add_ perm_sorted : Elapsed 0.006 ms (0.626 ms / 100) 0.638 -> 0.640 ( +0.31%) [ +0.47% +0.00% +0.00% / +0.63% +0.31% +0.31%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.641 ms / 100) 5.344 -> 5.209 ( -2.53%) [ +0.07% +0.00% +0.19% / -2.53% -1.85% -1.78%] index_select const : Elapsed 0.053 ms (5.348 ms / 100) 5.359 -> 5.272 ( -1.62%) [ +0.00% +0.02% +0.02% / -1.62% -1.51% -1.46%] index_select wrap : Elapsed 0.054 ms (5.359 ms / 100) 5.373 -> 5.232 ( -2.62%) [ +0.35% +0.00% +0.06% / -2.62% -2.12% -1.95%] index_select linear : Elapsed 0.054 ms (5.392 ms / 100) 5.346 -> 5.212 ( -2.51%) [ +0.34% +0.00% +0.21% / -2.51% -1.95% -2.08%] index_select reverse : Elapsed 0.054 ms (5.364 ms / 100) 5.358 -> 5.203 ( -2.89%) [ +0.28% +0.00% +0.24% / -2.89% -2.37% -2.41%] index_select skip64 : Elapsed 0.054 ms (5.373 ms / 100) 5.376 -> 5.242 ( -2.49%) [ +0.17% +0.06% +0.00% / -2.49% -2.33% -2.12%] index_select skip256 : Elapsed 0.054 ms (5.385 ms / 100) 5.368 -> 5.236 ( -2.46%) [ +0.19% +0.00% +0.19% / -2.46% -1.94% -1.90%] index_select spread : Elapsed 0.054 ms (5.378 ms / 100) 5.324 -> 5.235 ( -1.67%) [ +0.00% +0.24% +0.32% / -1.67% -1.28% -1.15%] index_select strided 3 : Elapsed 0.053 ms (5.324 ms / 100) 5.339 -> 5.229 ( -2.06%) [ +0.30% +0.00% +0.19% / -2.06% -1.85% -1.69%] index_select random : Elapsed 0.054 ms (5.355 ms / 100) 5.369 -> 5.245 ( -2.31%) [ +0.19% +0.13% +0.00% / -2.31% -1.64% -1.47%] index_select random_sorted : Elapsed 0.054 ms (5.379 ms / 100) out_shape = [1, 5, 500] in_shape = [1, 5, 200] idx_dim = 2 out_shape = [500, 200, 5] in_shape = [1, 200, 5] idx_dim = 0 B = [500, 200, 5] (stride (1000, 5, 1)) A = [1, 200, 5] (stride (5, 5, 1)) dim = 0 0.500 -> 0.491 ( -1.80%) [ +5.60% +3.80% +0.00% / +10.20% +3.00% -1.80%] index_add_ linear : Elapsed 0.005 ms (0.528 ms / 100) 0.500 -> 0.487 ( -2.60%) [ +0.60% +4.40% +0.00% / +1.20% -2.60% -1.40%] index_copy_ linear : Elapsed 0.005 ms (0.503 ms / 100) 0.494 -> 0.488 ( -1.21%) [ +0.00% +8.91% +2.63% / +1.62% +0.20% -1.21%] index_add_ reverse : Elapsed 0.005 ms (0.494 ms / 100) 0.499 -> 0.492 ( -1.40%) [ +1.20% +1.40% +0.00% / +0.40% -1.00% -1.40%] index_copy_ reverse : Elapsed 0.005 ms (0.505 ms / 100) 0.505 -> 0.494 ( -2.18%) [ +5.74% +2.97% +0.00% / +0.79% -2.18% +3.56%] index_add_ spread : Elapsed 0.005 ms (0.534 ms / 100) 0.506 -> 0.495 ( -2.17%) [ +0.40% +0.00% +7.11% / -0.79% -1.38% -2.17%] index_copy_ spread : Elapsed 0.005 ms (0.508 ms / 100) 0.494 -> 0.489 ( -1.01%) [ +0.00% +4.05% +7.49% / +13.77% +4.66% -1.01%] index_add_ strided 3 : Elapsed 0.005 ms (0.494 ms / 100) 0.500 -> 0.495 ( -1.00%) [ +0.80% +0.80% +0.00% / +1.60% -0.80% -1.00%] index_copy_ strided 3 : Elapsed 0.005 ms (0.504 ms / 100) 0.500 -> 0.493 ( -1.40%) [ +8.40% +1.40% +0.00% / +1.00% -0.20% -1.40%] index_add_ strided 7 : Elapsed 0.005 ms (0.542 ms / 100) 0.500 -> 0.494 ( -1.20%) [ +0.00% +7.00% +2.00% / +1.80% -1.00% -1.20%] index_copy_ strided 7 : Elapsed 0.005 ms (0.500 ms / 100) 0.498 -> 0.496 ( -0.40%) [ +0.00% +7.03% +0.20% / +1.61% +3.01% -0.40%] index_add_ strided 257 : Elapsed 0.005 ms (0.498 ms / 100) good 0.524 -> 0.492 ( -6.11%) [ +3.63% +0.00% +3.05% / +9.54% -6.11% -5.92%] index_copy_ strided 257 : Elapsed 0.005 ms (0.543 ms / 100) 0.508 -> 0.491 ( -3.35%) [ +4.33% +0.00% +4.92% / -1.18% +3.94% -3.35%] index_add_ perm : Elapsed 0.005 ms (0.530 ms / 100) 0.500 -> 0.488 ( -2.40%) [ +1.00% +1.60% +0.00% / +7.00% +1.40% -2.40%] index_copy_ perm : Elapsed 0.005 ms (0.505 ms / 100) 0.495 -> 0.491 ( -0.81%) [ +0.00% +2.42% +0.00% / +16.16% +0.81% -0.81%] index_add_ perm_sorted : Elapsed 0.005 ms (0.495 ms / 100) 0.491 -> 0.493 ( +0.41%) [ +1.43% +2.65% +0.00% / +5.50% +1.83% +0.41%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.498 ms / 100) 6.584 -> 6.624 ( +0.61%) [ +0.33% +0.00% +0.20% / +0.74% +0.70% +0.61%] index_select const : Elapsed 0.066 ms (6.606 ms / 100) 6.587 -> 6.620 ( +0.50%) [ +0.67% +0.00% +0.11% / +0.53% +0.59% +0.50%] index_select wrap : Elapsed 0.066 ms (6.631 ms / 100) 6.584 -> 6.624 ( +0.61%) [ +0.62% +0.00% +0.20% / +0.67% +0.61% +0.84%] index_select linear : Elapsed 0.066 ms (6.625 ms / 100) 6.608 -> 6.621 ( +0.20%) [ +0.42% +0.12% +0.00% / +0.20% +0.30% +0.36%] index_select reverse : Elapsed 0.066 ms (6.636 ms / 100) 6.577 -> 6.620 ( +0.65%) [ +0.75% +0.00% +0.24% / +0.65% +1.09% +0.79%] index_select skip64 : Elapsed 0.066 ms (6.626 ms / 100) 6.579 -> 6.624 ( +0.68%) [ +0.82% +0.05% +0.00% / +0.68% +0.90% +0.74%] index_select skip256 : Elapsed 0.066 ms (6.633 ms / 100) 6.594 -> 6.620 ( +0.39%) [ +0.26% +0.00% +0.12% / +0.39% +0.61% +0.59%] index_select spread : Elapsed 0.066 ms (6.611 ms / 100) 6.581 -> 6.618 ( +0.56%) [ +0.56% +0.00% +0.17% / +0.56% +0.76% +0.81%] index_select random : Elapsed 0.066 ms (6.618 ms / 100) 6.588 -> 6.620 ( +0.49%) [ +0.76% +0.00% +0.26% / +0.61% +0.59% +0.49%] index_select random_sorted : Elapsed 0.066 ms (6.638 ms / 100) B = [500, 200, 5] (stride (1000, 5, 1)) A = [1, 200, 5] (stride (1, 1, 200)) dim = 0 0.575 -> 0.580 ( +0.87%) [ +0.87% +0.00% +0.00% / +1.04% +0.87% +1.04%] index_add_ linear : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +0.89% +0.36% +0.00% / +0.89% +3.04% +0.89%] index_copy_ linear : Elapsed 0.006 ms (0.564 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +1.04% +0.00% +0.17% / +1.04% +0.87% +3.30%] index_add_ reverse : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +1.25% +0.89%] index_copy_ reverse : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +1.05% +0.35% +0.00% / +0.87% +1.22% +1.22%] index_add_ spread : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.54% +0.72% +0.72%] index_copy_ spread : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.582 ( +1.22%) [ +0.87% +0.00% +0.00% / +3.13% +1.74% +1.22%] index_add_ strided 3 : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +0.90% +0.18% +0.00% / +0.90% +1.08% +1.25%] index_copy_ strided 3 : Elapsed 0.006 ms (0.563 ms / 100) 0.573 -> 0.580 ( +1.22%) [ +1.22% +0.52% +0.00% / +1.22% +1.22% +1.22%] index_add_ strided 7 : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +1.07% +0.00% +0.18% / +0.72% +0.72% +0.89%] index_copy_ strided 7 : Elapsed 0.006 ms (0.565 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +1.22% +0.35% +0.00% / +1.39% +1.05% +1.05%] index_add_ strided 257 : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.89% +0.89% +0.72%] index_copy_ strided 257 : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +0.87% +0.17% +0.00% / +1.04% +0.87% +1.04%] index_add_ perm : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.36% +0.00% / +0.72% +0.89% +0.89%] index_copy_ perm : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +1.04% +0.17% +0.00% / +1.04% +0.87% +0.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.581 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +0.90% +0.36% +0.00% / +0.90% +1.08% +0.90%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.563 ms / 100) 16.009 -> 16.077 ( +0.42%) [ +0.32% +0.00% +0.02% / +0.42% +0.44% +0.44%] index_select const : Elapsed 0.161 ms (16.061 ms / 100) 16.016 -> 16.061 ( +0.28%) [ +0.19% +0.00% +0.01% / +0.38% +0.28% +0.31%] index_select wrap : Elapsed 0.160 ms (16.046 ms / 100) 16.021 -> 16.061 ( +0.25%) [ +0.22% +0.09% +0.00% / +0.36% +0.27% +0.25%] index_select linear : Elapsed 0.161 ms (16.056 ms / 100) 16.001 -> 16.079 ( +0.49%) [ +0.32% +0.00% +0.06% / +0.49% +0.56% +0.52%] index_select reverse : Elapsed 0.161 ms (16.053 ms / 100) 16.011 -> 16.048 ( +0.23%) [ +0.39% +0.00% +0.15% / +0.34% +0.34% +0.23%] index_select skip64 : Elapsed 0.161 ms (16.073 ms / 100) 16.015 -> 16.046 ( +0.19%) [ +0.32% +0.00% +0.12% / +0.41% +0.19% +0.21%] index_select skip256 : Elapsed 0.161 ms (16.067 ms / 100) 16.011 -> 16.046 ( +0.22%) [ +0.27% +0.00% +0.16% / +0.42% +0.28% +0.22%] index_select spread : Elapsed 0.161 ms (16.054 ms / 100) 16.019 -> 16.071 ( +0.32%) [ +0.38% +0.00% +0.07% / +0.32% +0.34% +0.36%] index_select random : Elapsed 0.161 ms (16.080 ms / 100) 16.013 -> 16.047 ( +0.21%) [ +0.50% +0.00% +0.10% / +0.38% +0.26% +0.21%] index_select random_sorted : Elapsed 0.161 ms (16.093 ms / 100) B = [500, 200, 5] (stride (1000, 1, 200)) A = [1, 200, 5] (stride (1, 1, 200)) dim = 0 0.500 -> 0.499 ( -0.20%) [+11.20% +2.60% +0.00% / -0.20% -0.20% -0.20%] index_add_ linear : Elapsed 0.006 ms (0.556 ms / 100) 0.495 -> 0.488 ( -1.41%) [ +2.02% +1.82% +0.00% / +3.43% -1.41% +3.03%] index_copy_ linear : Elapsed 0.005 ms (0.505 ms / 100) 0.509 -> 0.497 ( -2.36%) [ +0.00% +5.30% +2.95% / -0.20% +0.20% -2.36%] index_add_ reverse : Elapsed 0.005 ms (0.509 ms / 100) 0.506 -> 0.489 ( -3.36%) [ +6.72% +0.00% +0.20% / -1.38% -3.36% -1.98%] index_copy_ reverse : Elapsed 0.005 ms (0.540 ms / 100) 0.497 -> 0.484 ( -2.62%) [ +0.20% +2.21% +0.00% / +10.66% -2.62% -0.20%] index_add_ spread : Elapsed 0.005 ms (0.498 ms / 100) 0.500 -> 0.485 ( -3.00%) [ +0.20% +7.80% +0.00% / +0.00% -3.00% -1.20%] index_copy_ spread : Elapsed 0.005 ms (0.501 ms / 100) 0.510 -> 0.487 ( -4.51%) [ +5.69% +0.00% +0.20% / +2.16% -4.51% -3.73%] index_add_ strided 3 : Elapsed 0.005 ms (0.539 ms / 100) 0.494 -> 0.485 ( -1.82%) [ +3.04% +7.69% +0.00% / +3.85% -1.82% -1.62%] index_copy_ strided 3 : Elapsed 0.005 ms (0.509 ms / 100) 0.499 -> 0.491 ( -1.60%) [ +0.00% +8.22% +1.60% / +0.00% -1.60% +1.20%] index_add_ strided 7 : Elapsed 0.005 ms (0.499 ms / 100) 0.491 -> 0.485 ( -1.22%) [ +6.72% +2.85% +0.00% / +1.02% -1.22% +3.46%] index_copy_ strided 7 : Elapsed 0.005 ms (0.524 ms / 100) 0.494 -> 0.490 ( -0.81%) [ +1.21% +6.48% +0.00% / +1.21% +3.64% -0.81%] index_add_ strided 257 : Elapsed 0.005 ms (0.500 ms / 100) 0.506 -> 0.487 ( -3.75%) [ +0.00% +6.92% +2.77% / -0.79% -3.16% -3.75%] index_copy_ strided 257 : Elapsed 0.005 ms (0.506 ms / 100) 0.497 -> 0.492 ( -1.01%) [ +0.80% +2.62% +0.00% / +9.05% -1.01% -0.60%] index_add_ perm : Elapsed 0.005 ms (0.501 ms / 100) 0.491 -> 0.484 ( -1.43%) [ +2.85% +2.24% +0.00% / +3.05% -1.43% +0.41%] index_copy_ perm : Elapsed 0.005 ms (0.505 ms / 100) good 0.534 -> 0.489 ( -8.43%) [ +2.06% +0.00% +0.19% / -6.74% -8.43% -7.68%] index_add_ perm_sorted : Elapsed 0.005 ms (0.545 ms / 100) 0.494 -> 0.482 ( -2.43%) [ +2.02% +1.01% +0.00% / +0.81% -2.43% +0.00%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.504 ms / 100) 8.632 -> 8.539 ( -1.08%) [ +0.24% +0.00% +0.24% / +0.19% -1.08% -0.67%] index_select const : Elapsed 0.087 ms (8.653 ms / 100) 8.567 -> 8.549 ( -0.21%) [ +0.23% +0.00% +0.27% / +0.35% -0.16% -0.21%] index_select wrap : Elapsed 0.086 ms (8.587 ms / 100) 8.659 -> 8.575 ( -0.97%) [ +0.13% +0.03% +0.00% / +0.22% -0.88% -0.97%] index_select linear : Elapsed 0.087 ms (8.670 ms / 100) 8.572 -> 8.534 ( -0.44%) [ +0.00% +0.03% +0.05% / +0.08% -0.44% -0.20%] index_select reverse : Elapsed 0.086 ms (8.572 ms / 100) 8.609 -> 8.514 ( -1.10%) [ +0.17% +0.00% +0.20% / +0.22% -0.69% -1.10%] index_select skip64 : Elapsed 0.086 ms (8.624 ms / 100) 8.615 -> 8.527 ( -1.02%) [ +0.02% +0.00% +0.10% / -0.12% -1.02% -0.67%] index_select skip256 : Elapsed 0.086 ms (8.617 ms / 100) 8.591 -> 8.515 ( -0.88%) [ +0.19% +0.14% +0.00% / +0.19% -0.85% -0.88%] index_select spread : Elapsed 0.086 ms (8.607 ms / 100) 8.657 -> 8.602 ( -0.64%) [ +0.03% +0.06% +0.00% / +0.17% -0.52% -0.64%] index_select random : Elapsed 0.087 ms (8.660 ms / 100) 8.638 -> 8.544 ( -1.09%) [ +0.37% +0.09% +0.00% / +0.00% -1.09% -0.97%] index_select random_sorted : Elapsed 0.087 ms (8.670 ms / 100) B = [500, 200, 5] (stride (5, 2500, 1)) A = [1, 200, 5] (stride (1000, 5, 1)) dim = 0 0.575 -> 0.580 ( +0.87%) [ +2.78% +0.00% +0.17% / +1.04% +1.22% +0.87%] index_add_ linear : Elapsed 0.006 ms (0.591 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +0.90% +0.90%] index_copy_ linear : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.580 ( +1.22%) [ +1.05% +0.17% +0.00% / +1.75% +1.75% +1.22%] index_add_ reverse : Elapsed 0.006 ms (0.579 ms / 100) 0.557 -> 0.562 ( +0.90%) [ +0.72% +0.18% +0.00% / +0.90% +1.26% +0.90%] index_copy_ reverse : Elapsed 0.006 ms (0.561 ms / 100) 0.581 -> 0.579 ( -0.34%) [ +0.00% +2.58% +1.20% / -0.34% +0.00% -0.34%] index_add_ spread : Elapsed 0.006 ms (0.581 ms / 100) 0.558 -> 0.561 ( +0.54%) [ +0.72% +0.00% +0.00% / +0.54% +0.90% +0.54%] index_copy_ spread : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.580 ( +1.22%) [ +1.05% +0.17% +0.00% / +1.57% +1.22% +1.40%] index_add_ strided 3 : Elapsed 0.006 ms (0.579 ms / 100) 0.557 -> 0.561 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +0.90% +5.03%] index_copy_ strided 3 : Elapsed 0.006 ms (0.561 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +1.05% +1.39% +0.00% / +1.05% +0.87% +1.22%] index_add_ strided 7 : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.54% +0.00% +0.00% / +0.72% +0.54% +0.72%] index_copy_ strided 7 : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.581 ( +1.40%) [ +1.75% +0.17% +0.00% / +2.09% +1.40% +1.40%] index_add_ strided 257 : Elapsed 0.006 ms (0.583 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.36% +0.00% +0.00% / +0.54% +0.54% +0.54%] index_copy_ strided 257 : Elapsed 0.006 ms (0.561 ms / 100) 0.573 -> 0.579 ( +1.05%) [ +1.40% +0.35% +0.00% / +1.05% +1.22% +1.40%] index_add_ perm : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.54% +0.00% +0.00% / +0.72% +1.07% +0.72%] index_copy_ perm : Elapsed 0.006 ms (0.562 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +3.31% +0.17% +0.00% / +1.22% +0.87% +1.05%] index_add_ perm_sorted : Elapsed 0.006 ms (0.593 ms / 100) 0.558 -> 0.561 ( +0.54%) [ +0.54% +0.18% +0.00% / +0.54% +0.54% +0.72%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.561 ms / 100) 17.316 -> 17.275 ( -0.24%) [ +0.09% +0.00% +0.08% / +0.02% -0.24% -0.15%] index_select const : Elapsed 0.173 ms (17.331 ms / 100) 17.286 -> 17.280 ( -0.03%) [ +0.25% +0.00% +0.14% / +0.29% +0.05% -0.03%] index_select wrap : Elapsed 0.173 ms (17.329 ms / 100) 17.306 -> 17.271 ( -0.20%) [ +0.29% +0.00% +0.09% / -0.02% -0.10% -0.20%] index_select linear : Elapsed 0.174 ms (17.357 ms / 100) 17.296 -> 17.288 ( -0.05%) [ +0.43% +0.00% +0.23% / +0.36% +0.23% -0.05%] index_select reverse : Elapsed 0.174 ms (17.371 ms / 100) 17.291 -> 17.233 ( -0.34%) [ +0.23% +0.07% +0.00% / +0.27% -0.34% +0.02%] index_select skip64 : Elapsed 0.173 ms (17.330 ms / 100) 17.293 -> 17.290 ( -0.02%) [ +0.16% +0.00% +0.01% / +0.54% -0.01% -0.02%] index_select skip256 : Elapsed 0.173 ms (17.320 ms / 100) 17.301 -> 17.243 ( -0.34%) [ +0.17% +0.00% +0.04% / +0.29% +0.15% -0.34%] index_select spread : Elapsed 0.173 ms (17.331 ms / 100) 17.290 -> 17.293 ( +0.02%) [ +0.24% +0.00% +0.03% / +0.02% +0.12% +0.02%] index_select random : Elapsed 0.173 ms (17.331 ms / 100) 17.308 -> 17.272 ( -0.21%) [ +0.28% +0.01% +0.00% / +0.23% -0.16% -0.21%] index_select random_sorted : Elapsed 0.174 ms (17.357 ms / 100) B = [500, 200, 5] (stride (1, 2500, 500)) A = [1, 200, 5] (stride (1000, 5, 1)) dim = 0 0.498 -> 0.487 ( -2.21%) [ +7.43% +5.02% +0.00% / +5.42% +4.42% -2.21%] index_add_ linear : Elapsed 0.005 ms (0.535 ms / 100) 0.494 -> 0.490 ( -0.81%) [ +1.21% +2.23% +0.00% / +5.06% -0.81% +4.66%] index_copy_ linear : Elapsed 0.005 ms (0.500 ms / 100) 0.494 -> 0.499 ( +1.01%) [ +8.30% +3.64% +0.00% / +2.43% +1.01% +1.01%] index_add_ reverse : Elapsed 0.005 ms (0.535 ms / 100) 0.496 -> 0.490 ( -1.21%) [ +0.60% +7.46% +0.00% / +3.23% -1.21% +1.21%] index_copy_ reverse : Elapsed 0.005 ms (0.499 ms / 100) 0.496 -> 0.493 ( -0.60%) [ +8.87% +4.64% +0.00% / +9.68% -0.60% +3.43%] index_add_ spread : Elapsed 0.005 ms (0.540 ms / 100) 0.497 -> 0.490 ( -1.41%) [ +0.80% +5.63% +0.00% / +1.21% -0.40% -1.41%] index_copy_ spread : Elapsed 0.005 ms (0.501 ms / 100) 0.494 -> 0.491 ( -0.61%) [ +0.00% +8.30% +0.00% / +10.93% -0.61% +4.66%] index_add_ strided 3 : Elapsed 0.005 ms (0.494 ms / 100) 0.498 -> 0.492 ( -1.20%) [ +0.20% +4.22% +0.00% / +1.00% +0.00% -1.20%] index_copy_ strided 3 : Elapsed 0.005 ms (0.499 ms / 100) 0.492 -> 0.486 ( -1.22%) [ +1.02% +9.15% +0.00% / +2.24% -1.22% -0.20%] index_add_ strided 7 : Elapsed 0.005 ms (0.497 ms / 100) 0.493 -> 0.490 ( -0.61%) [ +1.22% +3.04% +0.00% / +1.83% +0.61% -0.61%] index_copy_ strided 7 : Elapsed 0.005 ms (0.499 ms / 100) 0.498 -> 0.484 ( -2.81%) [ +0.20% +6.83% +0.00% / +8.84% -2.81% -1.20%] index_add_ strided 257 : Elapsed 0.005 ms (0.499 ms / 100) 0.499 -> 0.491 ( -1.60%) [ +0.00% +1.80% +0.20% / +0.80% +4.01% -1.60%] index_copy_ strided 257 : Elapsed 0.005 ms (0.499 ms / 100) 0.499 -> 0.490 ( -1.80%) [ +0.00% +2.00% +0.60% / +0.00% -1.80% +0.00%] index_add_ perm : Elapsed 0.005 ms (0.499 ms / 100) 0.493 -> 0.494 ( +0.20%) [ +1.62% +5.88% +0.00% / +0.41% +3.04% +0.20%] index_copy_ perm : Elapsed 0.005 ms (0.501 ms / 100) 0.496 -> 0.491 ( -1.01%) [ +1.21% +2.02% +0.00% / +1.41% -1.01% +0.60%] index_add_ perm_sorted : Elapsed 0.005 ms (0.502 ms / 100) 0.493 -> 0.498 ( +1.01%) [ +1.62% +3.04% +0.00% / +1.01% +6.69% +1.01%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.501 ms / 100) GOOD 8.804 -> 6.602 (-25.01%) [ +0.02% +0.00% +0.18% / -25.01% -24.61% -24.77%] index_select const : Elapsed 0.088 ms (8.806 ms / 100) GOOD 8.839 -> 6.611 (-25.21%) [ +0.02% +0.00% +0.03% / -25.20% -25.21% -24.98%] index_select wrap : Elapsed 0.088 ms (8.841 ms / 100) GOOD 8.882 -> 6.614 (-25.53%) [ +0.10% +0.00% +0.06% / -25.47% -25.53% -25.52%] index_select linear : Elapsed 0.089 ms (8.891 ms / 100) GOOD 8.805 -> 6.699 (-23.92%) [ +0.06% +0.06% +0.00% / -23.92% -23.30% -23.40%] index_select reverse : Elapsed 0.088 ms (8.810 ms / 100) GOOD 8.872 -> 6.616 (-25.43%) [ +0.20% +0.11% +0.00% / -25.24% -25.43% -25.33%] index_select skip64 : Elapsed 0.089 ms (8.890 ms / 100) GOOD 8.836 -> 6.624 (-25.03%) [ +0.23% +0.15% +0.00% / -25.02% -25.03% -24.92%] index_select skip256 : Elapsed 0.089 ms (8.856 ms / 100) GOOD 8.795 -> 6.611 (-24.83%) [ +0.23% +0.00% +0.11% / -24.83% -24.66% -24.59%] index_select spread : Elapsed 0.088 ms (8.815 ms / 100) GOOD 8.851 -> 6.621 (-25.19%) [ +0.01% +0.00% +0.02% / -25.05% -25.19% -25.05%] index_select random : Elapsed 0.089 ms (8.852 ms / 100) GOOD 8.865 -> 6.623 (-25.29%) [ +0.29% +0.11% +0.00% / -25.25% -25.29% -25.20%] index_select random_sorted : Elapsed 0.089 ms (8.891 ms / 100) B = [500, 200, 5] (stride (1, 2500, 500)) A = [1, 200, 5] (stride (1, 1, 200)) dim = 0 0.575 -> 0.581 ( +1.04%) [ +1.04% +2.61% +0.00% / +1.04% +1.39% +1.22%] index_add_ linear : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +0.72% +0.18% +0.00% / +0.89% +0.89% +1.07%] index_copy_ linear : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.581 ( +1.04%) [ +1.04% +1.57% +0.00% / +1.04% +1.39% +1.22%] index_add_ reverse : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.36% +0.00% / +0.72% +1.43% +0.72%] index_copy_ reverse : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.581 ( +1.04%) [ +1.22% +1.22% +0.00% / +1.04% +1.22% +1.04%] index_add_ spread : Elapsed 0.006 ms (0.582 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +0.72% +0.36% +0.00% / +0.90% +1.25% +0.90%] index_copy_ spread : Elapsed 0.006 ms (0.562 ms / 100) 0.575 -> 0.581 ( +1.04%) [ +1.04% +1.22% +0.00% / +1.04% +1.04% +1.22%] index_add_ strided 3 : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +0.89% +0.89%] index_copy_ strided 3 : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +1.39% +1.74% +0.00% / +1.39% +1.05% +1.39%] index_add_ strided 7 : Elapsed 0.006 ms (0.582 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +0.90% +0.18% +0.00% / +0.90% +1.08% +1.08%] index_copy_ strided 7 : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.581 ( +1.04%) [ +1.04% +0.35% +0.00% / +1.04% +1.39% +1.04%] index_add_ strided 257 : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +0.89% +0.18% +0.00% / +0.89% +1.07% +0.89%] index_copy_ strided 257 : Elapsed 0.006 ms (0.564 ms / 100) 0.575 -> 0.581 ( +1.04%) [ +0.87% +0.17% +0.00% / +1.04% +1.39% +1.04%] index_add_ perm : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +1.07% +0.72%] index_copy_ perm : Elapsed 0.006 ms (0.563 ms / 100) 0.576 -> 0.580 ( +0.69%) [ +0.87% +0.00% +0.00% / +0.69% +1.39% +1.22%] index_add_ perm_sorted : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +1.07% +0.72%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.563 ms / 100) 18.864 -> 18.668 ( -1.04%) [ +0.00% +0.17% +0.25% / +0.26% -0.88% -1.04%] index_select const : Elapsed 0.189 ms (18.864 ms / 100) 18.854 -> 18.656 ( -1.05%) [ +0.00% +0.07% +0.11% / +0.40% -1.05% -0.83%] index_select wrap : Elapsed 0.189 ms (18.854 ms / 100) 18.896 -> 18.646 ( -1.32%) [ +0.22% +0.07% +0.00% / -0.01% -1.32% -1.19%] index_select linear : Elapsed 0.189 ms (18.938 ms / 100) 18.905 -> 18.673 ( -1.23%) [ +0.00% +0.08% +0.06% / -0.02% -1.23% -1.09%] index_select reverse : Elapsed 0.189 ms (18.905 ms / 100) 18.863 -> 18.669 ( -1.03%) [ +0.00% +0.37% +0.13% / +0.32% -1.03% -0.98%] index_select skip64 : Elapsed 0.189 ms (18.863 ms / 100) 18.880 -> 18.662 ( -1.15%) [ +0.00% +0.11% +0.01% / +0.01% -1.15% -1.00%] index_select skip256 : Elapsed 0.189 ms (18.880 ms / 100) 18.900 -> 18.656 ( -1.29%) [ +0.02% +0.00% +0.13% / +0.06% -1.29% -1.22%] index_select spread : Elapsed 0.189 ms (18.904 ms / 100) 18.808 -> 18.672 ( -0.72%) [ +0.38% +0.00% +0.26% / +0.38% -0.66% -0.72%] index_select random : Elapsed 0.189 ms (18.880 ms / 100) 18.837 -> 18.659 ( -0.94%) [ +0.38% +0.00% +0.26% / +0.24% -0.94% -0.90%] index_select random_sorted : Elapsed 0.189 ms (18.909 ms / 100) B = [500, 200, 5] (stride (1, 500, 100000)) A = [1, 200, 5] (stride (1000, 5, 1)) dim = 0 0.574 -> 0.581 ( +1.22%) [ +1.22% +0.35% +0.00% / +1.22% +1.39% +1.39%] index_add_ linear : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.72% +0.72% +0.00% / +0.54% +0.72% +0.72%] index_copy_ linear : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.581 ( +1.22%) [ +1.05% +0.17% +0.00% / +1.22% +1.39% +1.39%] index_add_ reverse : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.90% +0.18% +0.00% / +0.72% +1.61% +0.90%] index_copy_ reverse : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.581 ( +1.22%) [ +1.05% +0.17% +0.00% / +1.22% +1.39% +1.39%] index_add_ spread : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.90% +0.00% +0.00% / +0.72% +0.90% +0.72%] index_copy_ spread : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +1.04% +1.22% +0.00% / +0.87% +0.87% +1.57%] index_add_ strided 3 : Elapsed 0.006 ms (0.581 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +0.90% +1.08%] index_copy_ strided 3 : Elapsed 0.006 ms (0.562 ms / 100) 0.575 -> 0.582 ( +1.22%) [ +1.04% +0.17% +0.00% / +7.48% +1.22% +1.91%] index_add_ strided 7 : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +1.25% +0.00% +0.00% / +0.54% +0.72% +2.68%] index_copy_ strided 7 : Elapsed 0.006 ms (0.566 ms / 100) 0.574 -> 0.582 ( +1.39%) [ +6.27% +0.17% +0.00% / +4.01% +1.57% +1.39%] index_add_ strided 257 : Elapsed 0.006 ms (0.610 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +0.72% +0.72%] index_copy_ strided 257 : Elapsed 0.006 ms (0.562 ms / 100) 0.575 -> 0.580 ( +0.87%) [ +1.04% +0.17% +0.00% / +1.04% +0.87% +1.22%] index_add_ perm : Elapsed 0.006 ms (0.581 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.54% +0.00% +0.00% / +0.54% +0.54% +0.72%] index_copy_ perm : Elapsed 0.006 ms (0.562 ms / 100) 0.575 -> 0.582 ( +1.22%) [ +1.22% +0.00% +0.00% / +1.57% +1.22% +4.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.582 ms / 100) 0.559 -> 0.564 ( +0.89%) [ +0.72% +0.00% +0.00% / +2.15% +0.89% +1.07%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.563 ms / 100) 18.463 -> 18.428 ( -0.19%) [ +0.42% +0.00% +0.02% / +0.44% -0.19% -0.18%] index_select const : Elapsed 0.185 ms (18.541 ms / 100) 18.487 -> 18.395 ( -0.50%) [ +0.18% +0.08% +0.00% / +0.04% -0.50% -0.36%] index_select wrap : Elapsed 0.185 ms (18.520 ms / 100) 18.473 -> 18.396 ( -0.42%) [ +0.09% +0.00% +0.03% / +0.02% -0.42% -0.35%] index_select linear : Elapsed 0.185 ms (18.489 ms / 100) 18.438 -> 18.395 ( -0.23%) [ +0.60% +0.00% +0.15% / +0.40% -0.22% -0.23%] index_select reverse : Elapsed 0.185 ms (18.549 ms / 100) 18.481 -> 18.392 ( -0.48%) [ +0.21% +0.06% +0.00% / -0.02% -0.48% -0.45%] index_select skip64 : Elapsed 0.185 ms (18.519 ms / 100) 18.444 -> 18.382 ( -0.34%) [ +0.29% +0.00% +0.12% / +0.22% -0.34% -0.24%] index_select skip256 : Elapsed 0.185 ms (18.498 ms / 100) 18.442 -> 18.386 ( -0.30%) [ +0.27% +0.27% +0.00% / +0.37% -0.30% -0.23%] index_select spread : Elapsed 0.185 ms (18.492 ms / 100) 18.418 -> 18.391 ( -0.15%) [ +0.25% +0.29% +0.00% / +0.14% +0.01% -0.15%] index_select random : Elapsed 0.185 ms (18.464 ms / 100) 18.447 -> 18.391 ( -0.30%) [ +0.23% +0.18% +0.00% / +0.23% -0.30% -0.28%] index_select random_sorted : Elapsed 0.185 ms (18.490 ms / 100) out_shape = [1, 500, 5] in_shape = [1, 200, 5] idx_dim = 1 B = [1, 500, 5] (stride (2500, 1, 500)) A = [1, 200, 5] (stride (1, 5, 1)) dim = 1 0.505 -> 0.518 ( +2.57%) [ +1.58% +9.11% +0.00% / +4.55% +3.17% +2.57%] index_add_ linear : Elapsed 0.005 ms (0.513 ms / 100) 0.486 -> 0.503 ( +3.50%) [+13.17% +0.00% +2.88% / +8.02% +3.50% +5.35%] index_copy_ linear : Elapsed 0.005 ms (0.550 ms / 100) 0.478 -> 0.473 ( -1.05%) [ +5.23% +0.00% +0.63% / -1.05% +3.14% +5.02%] index_add_ reverse : Elapsed 0.005 ms (0.503 ms / 100) 0.465 -> 0.473 ( +1.72%) [+11.18% +3.23% +0.00% / +1.72% +9.89% +10.75%] index_copy_ reverse : Elapsed 0.005 ms (0.517 ms / 100) 0.464 -> 0.464 ( +0.00%) [ +1.51% +2.80% +0.00% / +2.16% +0.00% +4.74%] index_add_ spread : Elapsed 0.005 ms (0.471 ms / 100) 0.461 -> 0.472 ( +2.39%) [ +2.39% +3.90% +0.00% / +2.39% +2.39% +3.69%] index_copy_ spread : Elapsed 0.005 ms (0.472 ms / 100) 0.469 -> 0.469 ( +0.00%) [ +0.00% +1.92% +0.64% / +2.99% +0.00% +1.07%] index_add_ strided 3 : Elapsed 0.005 ms (0.469 ms / 100) 0.465 -> 0.468 ( +0.65%) [ +5.59% +3.66% +0.00% / +0.86% +0.65% +1.72%] index_copy_ strided 3 : Elapsed 0.005 ms (0.491 ms / 100) 0.471 -> 0.466 ( -1.06%) [ +0.00% +1.70% +0.00% / +0.85% -1.06% +1.06%] index_add_ strided 7 : Elapsed 0.005 ms (0.471 ms / 100) 0.468 -> 0.463 ( -1.07%) [ +0.00% +1.28% +0.21% / +0.43% -1.07% +6.84%] index_copy_ strided 7 : Elapsed 0.005 ms (0.468 ms / 100) 0.465 -> 0.469 ( +0.86%) [ +0.00% +3.01% +0.43% / +1.72% +0.86% +2.37%] index_add_ strided 257 : Elapsed 0.005 ms (0.465 ms / 100) 0.465 -> 0.463 ( -0.43%) [ +0.65% +3.87% +0.00% / +1.72% -0.43% +1.94%] index_copy_ strided 257 : Elapsed 0.005 ms (0.468 ms / 100) 0.470 -> 0.468 ( -0.43%) [ +0.64% +2.34% +0.00% / +0.00% -0.43% +5.53%] index_add_ perm : Elapsed 0.005 ms (0.473 ms / 100) 0.477 -> 0.461 ( -3.35%) [ +3.77% +0.00% +4.61% / -1.05% -3.35% +3.98%] index_copy_ perm : Elapsed 0.005 ms (0.495 ms / 100) 0.474 -> 0.463 ( -2.32%) [ +2.11% +0.00% +5.27% / +0.00% -2.32% -0.42%] index_add_ perm_sorted : Elapsed 0.005 ms (0.484 ms / 100) 0.479 -> 0.464 ( -3.13%) [ +1.04% +0.00% +2.92% / -1.25% -3.13% -1.46%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.484 ms / 100) 0.520 -> 0.532 ( +2.31%) [ +4.62% +1.73% +0.00% / +4.04% +2.31% +9.81%] index_select const : Elapsed 0.005 ms (0.544 ms / 100) 0.523 -> 0.529 ( +1.15%) [ +8.80% +2.29% +0.00% / +3.06% +1.15% +7.84%] index_select wrap : Elapsed 0.006 ms (0.569 ms / 100) 0.522 -> 0.528 ( +1.15%) [ +4.41% +1.53% +0.00% / +17.62% +1.15% +2.49%] index_select linear : Elapsed 0.005 ms (0.545 ms / 100) 0.521 -> 0.529 ( +1.54%) [ +4.41% +2.11% +0.00% / +5.18% +1.54% +2.69%] index_select reverse : Elapsed 0.005 ms (0.544 ms / 100) 0.529 -> 0.530 ( +0.19%) [ +9.83% +0.00% +0.38% / +1.70% +0.19% +0.76%] index_select skip64 : Elapsed 0.006 ms (0.581 ms / 100) 0.522 -> 0.523 ( +0.19%) [ +3.26% +3.26% +0.00% / +4.21% +0.19% +1.92%] index_select skip256 : Elapsed 0.005 ms (0.539 ms / 100) 0.523 -> 0.539 ( +3.06%) [ +8.41% +0.19% +0.00% / +3.06% +4.97% +3.44%] index_select spread : Elapsed 0.006 ms (0.567 ms / 100) 0.520 -> 0.542 ( +4.23%) [ +4.81% +1.73% +0.00% / +4.23% +16.35% +9.23%] index_select strided 3 : Elapsed 0.005 ms (0.545 ms / 100) 0.514 -> 0.529 ( +2.92%) [ +6.81% +1.95% +0.00% / +5.06% +2.92% +15.37%] index_select strided 5 : Elapsed 0.005 ms (0.549 ms / 100) 0.523 -> 0.527 ( +0.76%) [ +4.02% +0.00% +1.34% / +3.06% +0.76% +2.49%] index_select strided 7 : Elapsed 0.005 ms (0.544 ms / 100) 0.521 -> 0.526 ( +0.96%) [ +4.80% +0.77% +0.00% / +3.84% +0.96% +1.73%] index_select strided 8 : Elapsed 0.005 ms (0.546 ms / 100) 0.516 -> 0.530 ( +2.71%) [+11.43% +1.74% +0.00% / +11.82% +2.71% +3.68%] index_select strided 16 : Elapsed 0.006 ms (0.575 ms / 100) 0.517 -> 0.532 ( +2.90%) [ +8.90% +0.97% +0.00% / +3.09% +2.90% +4.06%] index_select strided 64 : Elapsed 0.006 ms (0.563 ms / 100) 0.522 -> 0.528 ( +1.15%) [ +4.60% +0.38% +0.00% / +3.45% +1.15% +2.49%] index_select strided 100 : Elapsed 0.005 ms (0.546 ms / 100) 0.518 -> 0.526 ( +1.54%) [ +4.63% +0.58% +0.00% / +3.86% +1.54% +2.90%] index_select random : Elapsed 0.005 ms (0.542 ms / 100) 0.526 -> 0.529 ( +0.57%) [ +3.04% +0.00% +0.57% / +2.28% +0.57% +5.13%] index_select random_sorted : Elapsed 0.005 ms (0.542 ms / 100) B = [1, 500, 5] (stride (5, 5, 1)) A = [1, 200, 5] (stride (1000, 5, 1)) dim = 1 0.466 -> 0.472 ( +1.29%) [ +0.00% +1.72% +17.17% / +1.50% +1.29% +2.36%] index_add_ linear : Elapsed 0.005 ms (0.466 ms / 100) 0.461 -> 0.463 ( +0.43%) [ +2.82% +3.04% +0.00% / +3.90% +0.43% +3.04%] index_copy_ linear : Elapsed 0.005 ms (0.474 ms / 100) 0.469 -> 0.473 ( +0.85%) [ +0.00% +3.20% +0.43% / +1.07% +1.92% +0.85%] index_add_ reverse : Elapsed 0.005 ms (0.469 ms / 100) 0.465 -> 0.456 ( -1.94%) [ +0.86% +1.72% +0.00% / +17.85% -1.94% +2.37%] index_copy_ reverse : Elapsed 0.005 ms (0.469 ms / 100) 0.469 -> 0.466 ( -0.64%) [ +2.99% +4.48% +0.00% / +1.28% -0.64% +2.56%] index_add_ spread : Elapsed 0.005 ms (0.483 ms / 100) 0.466 -> 0.463 ( -0.64%) [ +1.50% +2.15% +0.00% / +1.72% -0.64% +1.07%] index_copy_ spread : Elapsed 0.005 ms (0.473 ms / 100) 0.465 -> 0.462 ( -0.65%) [ +1.29% +2.80% +0.00% / +1.29% -0.65% +0.65%] index_add_ strided 3 : Elapsed 0.005 ms (0.471 ms / 100) 0.463 -> 0.471 ( +1.73%) [ +2.81% +2.81% +0.00% / +4.10% +2.16% +1.73%] index_copy_ strided 3 : Elapsed 0.005 ms (0.476 ms / 100) 0.466 -> 0.463 ( -0.64%) [ +0.00% +5.79% +2.15% / +1.50% -0.64% +0.21%] index_add_ strided 7 : Elapsed 0.005 ms (0.466 ms / 100) 0.467 -> 0.462 ( -1.07%) [ +5.78% +1.28% +0.00% / +3.85% -1.07% +0.86%] index_copy_ strided 7 : Elapsed 0.005 ms (0.494 ms / 100) 0.465 -> 0.465 ( +0.00%) [ +0.65% +2.58% +0.00% / +1.72% +0.00% +2.58%] index_add_ strided 257 : Elapsed 0.005 ms (0.468 ms / 100) 0.465 -> 0.479 ( +3.01%) [ +0.86% +3.01% +0.00% / +3.01% +6.24% +7.74%] index_copy_ strided 257 : Elapsed 0.005 ms (0.469 ms / 100) 0.467 -> 0.474 ( +1.50%) [ +0.64% +2.36% +0.00% / +1.50% +6.42% +5.14%] index_add_ perm : Elapsed 0.005 ms (0.470 ms / 100) 0.464 -> 0.475 ( +2.37%) [ +7.54% +3.02% +0.00% / +3.45% +15.52% +2.37%] index_copy_ perm : Elapsed 0.005 ms (0.499 ms / 100) 0.469 -> 0.468 ( -0.21%) [ +0.00% +2.56% +0.85% / +1.07% +0.21% -0.21%] index_add_ perm_sorted : Elapsed 0.005 ms (0.469 ms / 100) 0.469 -> 0.459 ( -2.13%) [ +4.90% +5.33% +0.00% / +3.62% -2.13% +0.00%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.492 ms / 100) 0.529 -> 0.520 ( -1.70%) [ +3.21% +0.00% +3.40% / +0.76% -1.70% +1.51%] index_select const : Elapsed 0.005 ms (0.546 ms / 100) 0.521 -> 0.525 ( +0.77%) [ +5.95% +2.11% +0.00% / +7.49% +0.77% +1.92%] index_select wrap : Elapsed 0.006 ms (0.552 ms / 100) 0.523 -> 0.524 ( +0.19%) [ +4.02% +3.63% +0.00% / +2.68% +0.19% +0.96%] index_select linear : Elapsed 0.005 ms (0.544 ms / 100) 0.519 -> 0.523 ( +0.77%) [ +4.82% +1.73% +0.00% / +6.17% +0.77% +2.50%] index_select reverse : Elapsed 0.005 ms (0.544 ms / 100) 0.517 -> 0.525 ( +1.55%) [ +4.06% +2.90% +0.00% / +5.22% +1.55% +2.51%] index_select skip64 : Elapsed 0.005 ms (0.538 ms / 100) 0.529 -> 0.523 ( -1.13%) [ +4.35% +0.00% +1.13% / +1.13% -1.13% +0.19%] index_select skip256 : Elapsed 0.006 ms (0.552 ms / 100) 0.526 -> 0.528 ( +0.38%) [+10.84% +0.00% +6.08% / +2.66% +0.38% +0.95%] index_select spread : Elapsed 0.006 ms (0.583 ms / 100) 0.520 -> 0.519 ( -0.19%) [+16.92% +1.73% +0.00% / +3.27% -0.19% +2.12%] index_select strided 3 : Elapsed 0.006 ms (0.608 ms / 100) 0.517 -> 0.522 ( +0.97%) [ +4.84% +4.64% +0.00% / +4.26% +0.97% +2.90%] index_select strided 5 : Elapsed 0.005 ms (0.542 ms / 100) 0.519 -> 0.520 ( +0.19%) [ +5.39% +0.96% +0.00% / +3.85% +0.19% +2.12%] index_select strided 7 : Elapsed 0.005 ms (0.547 ms / 100) 0.520 -> 0.526 ( +1.15%) [ +4.42% +0.58% +0.00% / +3.27% +1.15% +1.15%] index_select strided 8 : Elapsed 0.005 ms (0.543 ms / 100) 0.521 -> 0.521 ( +0.00%) [ +3.84% +2.11% +0.00% / +1.73% +0.00% +2.11%] index_select strided 16 : Elapsed 0.005 ms (0.541 ms / 100) 0.519 -> 0.530 ( +2.12%) [ +4.43% +2.70% +0.00% / +2.89% +2.12% +3.28%] index_select strided 64 : Elapsed 0.005 ms (0.542 ms / 100) 0.516 -> 0.530 ( +2.71%) [ +5.23% +3.10% +0.00% / +4.26% +2.71% +9.50%] index_select strided 100 : Elapsed 0.005 ms (0.543 ms / 100) 0.511 -> 0.535 ( +4.70%) [+12.33% +4.11% +0.00% / +4.70% +12.13% +18.79%] index_select random : Elapsed 0.006 ms (0.574 ms / 100) 0.526 -> 0.522 ( -0.76%) [ +2.28% +4.56% +0.00% / +2.09% -0.76% +2.09%] index_select random_sorted : Elapsed 0.005 ms (0.538 ms / 100) B = [1, 500, 5] (stride (1, 5, 1)) A = [1, 200, 5] (stride (1000, 1, 200)) dim = 1 0.467 -> 0.460 ( -1.50%) [ +2.78% +3.64% +0.00% / +1.71% -1.50% +7.28%] index_add_ linear : Elapsed 0.005 ms (0.480 ms / 100) 0.461 -> 0.460 ( -0.22%) [ +4.56% +2.17% +0.00% / +4.12% -0.22% +14.32%] index_copy_ linear : Elapsed 0.005 ms (0.482 ms / 100) 0.470 -> 0.465 ( -1.06%) [ +2.77% +1.70% +0.00% / +5.11% -1.06% -0.43%] index_add_ reverse : Elapsed 0.005 ms (0.483 ms / 100) 0.462 -> 0.468 ( +1.30%) [ +6.93% +2.38% +0.00% / +7.14% +1.30% +2.16%] index_copy_ reverse : Elapsed 0.005 ms (0.494 ms / 100) 0.468 -> 0.467 ( -0.21%) [ +0.00% +8.76% +1.07% / +1.50% -0.21% +1.50%] index_add_ spread : Elapsed 0.005 ms (0.468 ms / 100) 0.461 -> 0.461 ( +0.00%) [ +2.60% +10.41% +0.00% / +3.47% +0.00% +3.25%] index_copy_ spread : Elapsed 0.005 ms (0.473 ms / 100) 0.464 -> 0.460 ( -0.86%) [ +1.29% +9.91% +0.00% / +2.16% -0.86% +1.72%] index_add_ strided 3 : Elapsed 0.005 ms (0.470 ms / 100) 0.460 -> 0.458 ( -0.43%) [+10.87% +18.04% +0.00% / +4.35% -0.43% +3.70%] index_copy_ strided 3 : Elapsed 0.005 ms (0.510 ms / 100) 0.463 -> 0.464 ( +0.22%) [ +0.65% +1.94% +0.00% / +2.16% +0.22% +5.83%] index_add_ strided 7 : Elapsed 0.005 ms (0.466 ms / 100) 0.462 -> 0.466 ( +0.87%) [ +1.52% +3.90% +0.00% / +3.46% +0.87% +1.73%] index_copy_ strided 7 : Elapsed 0.005 ms (0.469 ms / 100) 0.466 -> 0.460 ( -1.29%) [ +0.00% +1.29% +7.08% / +1.93% -1.29% +0.00%] index_add_ strided 257 : Elapsed 0.005 ms (0.466 ms / 100) 0.470 -> 0.458 ( -2.55%) [ +0.00% +2.13% +4.47% / +8.09% -2.55% -0.21%] index_copy_ strided 257 : Elapsed 0.005 ms (0.470 ms / 100) 0.467 -> 0.469 ( +0.43%) [ +0.00% +1.93% +9.21% / +37.69% +3.43% +0.43%] index_add_ perm : Elapsed 0.005 ms (0.467 ms / 100) 0.461 -> 0.459 ( -0.43%) [ +1.74% +3.25% +0.00% / +3.69% -0.43% +4.56%] index_copy_ perm : Elapsed 0.005 ms (0.469 ms / 100) 0.463 -> 0.463 ( +0.00%) [ +1.73% +10.58% +0.00% / +2.38% +0.00% +1.73%] index_add_ perm_sorted : Elapsed 0.005 ms (0.471 ms / 100) 0.461 -> 0.456 ( -1.08%) [ +0.87% +12.36% +0.00% / +2.39% -1.08% +2.60%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.465 ms / 100) 0.519 -> 0.524 ( +0.96%) [ +4.24% +2.12% +0.00% / +3.66% +0.96% +2.50%] index_select const : Elapsed 0.005 ms (0.541 ms / 100) 0.528 -> 0.522 ( -1.14%) [ +5.87% +0.00% +1.52% / +1.70% -1.14% +1.70%] index_select wrap : Elapsed 0.006 ms (0.559 ms / 100) 0.521 -> 0.522 ( +0.19%) [ +4.99% +4.99% +0.00% / +3.26% +0.19% +8.83%] index_select linear : Elapsed 0.005 ms (0.547 ms / 100) 0.524 -> 0.546 ( +4.20%) [ +4.01% +1.34% +0.00% / +9.92% +4.20% +9.16%] index_select reverse : Elapsed 0.005 ms (0.545 ms / 100) 0.522 -> 0.537 ( +2.87%) [ +4.02% +0.00% +0.00% / +16.86% +13.03% +2.87%] index_select skip64 : Elapsed 0.005 ms (0.543 ms / 100) 0.516 -> 0.524 ( +1.55%) [ +5.62% +3.88% +0.00% / +4.46% +1.55% +3.68%] index_select skip256 : Elapsed 0.005 ms (0.545 ms / 100) 0.515 -> 0.523 ( +1.55%) [+12.23% +2.72% +0.00% / +4.47% +1.55% +4.47%] index_select spread : Elapsed 0.006 ms (0.578 ms / 100) 0.518 -> 0.526 ( +1.54%) [ +3.47% +1.93% +0.00% / +4.25% +1.54% +3.47%] index_select strided 3 : Elapsed 0.005 ms (0.536 ms / 100) 0.525 -> 0.518 ( -1.33%) [ +2.29% +4.57% +0.00% / +3.62% -1.33% +1.90%] index_select strided 5 : Elapsed 0.005 ms (0.537 ms / 100) 0.528 -> 0.520 ( -1.52%) [ +2.27% +0.00% +1.52% / +1.89% -1.52% +1.33%] index_select strided 7 : Elapsed 0.005 ms (0.540 ms / 100) 0.521 -> 0.520 ( -0.19%) [ +4.80% +0.77% +0.00% / +4.22% -0.19% +3.07%] index_select strided 8 : Elapsed 0.005 ms (0.546 ms / 100) 0.523 -> 0.528 ( +0.96%) [ +8.99% +0.96% +0.00% / +3.44% +0.96% +1.72%] index_select strided 16 : Elapsed 0.006 ms (0.570 ms / 100) 0.526 -> 0.525 ( -0.19%) [ +2.85% +0.00% +3.61% / +3.42% -0.19% +2.28%] index_select strided 64 : Elapsed 0.005 ms (0.541 ms / 100) 0.531 -> 0.526 ( -0.94%) [ +2.07% +0.00% +12.99% / +2.07% -0.94% +1.13%] index_select strided 100 : Elapsed 0.005 ms (0.542 ms / 100) 0.524 -> 0.523 ( -0.19%) [ +3.24% +3.44% +0.00% / +3.44% -0.19% +5.15%] index_select random : Elapsed 0.005 ms (0.541 ms / 100) 0.518 -> 0.516 ( -0.39%) [+11.20% +3.86% +0.00% / +3.47% -0.39% +2.70%] index_select random_sorted : Elapsed 0.006 ms (0.576 ms / 100) B = [1, 500, 5] (stride (500, 1, 500)) A = [1, 200, 5] (stride (1, 5, 1)) dim = 1 0.468 -> 0.465 ( -0.64%) [ +0.00% +1.07% +1.07% / +0.43% -0.64% +3.42%] index_add_ linear : Elapsed 0.005 ms (0.468 ms / 100) 0.460 -> 0.459 ( -0.22%) [ +7.83% +2.61% +0.00% / +3.26% -0.22% +3.26%] index_copy_ linear : Elapsed 0.005 ms (0.496 ms / 100) 0.461 -> 0.468 ( +1.52%) [ +1.95% +3.90% +0.00% / +2.60% +1.52% +1.95%] index_add_ reverse : Elapsed 0.005 ms (0.470 ms / 100) 0.458 -> 0.461 ( +0.66%) [ +2.18% +4.15% +0.00% / +3.49% +0.66% +4.59%] index_copy_ reverse : Elapsed 0.005 ms (0.468 ms / 100) 0.461 -> 0.472 ( +2.39%) [ +2.17% +3.47% +0.00% / +2.39% +2.39% +2.39%] index_add_ spread : Elapsed 0.005 ms (0.471 ms / 100) 0.458 -> 0.459 ( +0.22%) [ +3.28% +2.62% +0.00% / +3.49% +0.22% +9.39%] index_copy_ spread : Elapsed 0.005 ms (0.473 ms / 100) 0.466 -> 0.471 ( +1.07%) [ +0.43% +1.72% +0.00% / +1.07% +7.30% +10.73%] index_add_ strided 3 : Elapsed 0.005 ms (0.468 ms / 100) 0.458 -> 0.475 ( +3.71%) [ +2.62% +2.40% +0.00% / +3.71% +7.64% +4.15%] index_copy_ strided 3 : Elapsed 0.005 ms (0.470 ms / 100) 0.460 -> 0.464 ( +0.87%) [ +0.87% +6.09% +0.00% / +2.39% +0.87% +5.22%] index_add_ strided 7 : Elapsed 0.005 ms (0.464 ms / 100) 0.458 -> 0.466 ( +1.75%) [ +4.15% +5.24% +0.00% / +3.28% +1.75% +3.71%] index_copy_ strided 7 : Elapsed 0.005 ms (0.477 ms / 100) 0.463 -> 0.472 ( +1.94%) [ +6.48% +2.59% +0.00% / +1.94% +3.24% +2.16%] index_add_ strided 257 : Elapsed 0.005 ms (0.493 ms / 100) 0.461 -> 0.461 ( +0.00%) [ +1.74% +1.95% +0.00% / +3.25% +0.00% +2.82%] index_copy_ strided 257 : Elapsed 0.005 ms (0.469 ms / 100) 0.467 -> 0.468 ( +0.21%) [ +0.64% +2.14% +0.00% / +1.07% +0.43% +0.21%] index_add_ perm : Elapsed 0.005 ms (0.470 ms / 100) 0.460 -> 0.462 ( +0.43%) [ +2.83% +6.52% +0.00% / +10.00% +0.43% +2.39%] index_copy_ perm : Elapsed 0.005 ms (0.473 ms / 100) 0.464 -> 0.473 ( +1.94%) [ +1.94% +3.66% +0.00% / +1.94% +2.16% +9.48%] index_add_ perm_sorted : Elapsed 0.005 ms (0.473 ms / 100) 0.459 -> 0.461 ( +0.44%) [ +3.70% +4.14% +0.00% / +2.61% +0.44% +9.37%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.476 ms / 100) 0.519 -> 0.525 ( +1.16%) [ +4.05% +2.31% +0.00% / +3.66% +1.16% +15.61%] index_select const : Elapsed 0.005 ms (0.540 ms / 100) 0.527 -> 0.519 ( -1.52%) [ +2.66% +0.76% +0.00% / +0.19% -1.52% +3.23%] index_select wrap : Elapsed 0.005 ms (0.541 ms / 100) 0.533 -> 0.519 ( -2.63%) [ +1.31% +0.00% +4.88% / +0.56% -2.63% +0.38%] index_select linear : Elapsed 0.005 ms (0.540 ms / 100) 0.531 -> 0.525 ( -1.13%) [ +2.64% +0.00% +13.75% / +0.38% -1.13% +0.19%] index_select reverse : Elapsed 0.005 ms (0.545 ms / 100) 0.517 -> 0.526 ( +1.74%) [ +4.84% +1.74% +0.00% / +5.61% +1.74% +3.87%] index_select skip64 : Elapsed 0.005 ms (0.542 ms / 100) 0.521 -> 0.522 ( +0.19%) [ +3.65% +0.77% +0.00% / +2.30% +0.19% +3.26%] index_select skip256 : Elapsed 0.005 ms (0.540 ms / 100) 0.520 -> 0.529 ( +1.73%) [ +4.42% +1.15% +0.00% / +3.27% +1.73% +10.38%] index_select spread : Elapsed 0.005 ms (0.543 ms / 100) 0.520 -> 0.521 ( +0.19%) [ +5.38% +3.27% +0.00% / +9.42% +0.19% +15.00%] index_select strided 3 : Elapsed 0.005 ms (0.548 ms / 100) 0.519 -> 0.518 ( -0.19%) [ +4.05% +2.31% +0.00% / +5.78% -0.19% +10.60%] index_select strided 5 : Elapsed 0.005 ms (0.540 ms / 100) 0.527 -> 0.545 ( +3.42%) [ +2.66% +0.00% +0.57% / +3.42% +5.31% +11.57%] index_select strided 7 : Elapsed 0.005 ms (0.541 ms / 100) bad 0.521 -> 0.549 ( +5.37%) [ +4.61% +1.15% +0.00% / +5.37% +7.29% +9.79%] index_select strided 8 : Elapsed 0.005 ms (0.545 ms / 100) 0.518 -> 0.517 ( -0.19%) [ +6.18% +3.09% +0.00% / +3.28% -0.19% +17.57%] index_select strided 16 : Elapsed 0.006 ms (0.550 ms / 100) 0.520 -> 0.525 ( +0.96%) [ +5.00% +1.92% +0.00% / +4.81% +0.96% +2.69%] index_select strided 64 : Elapsed 0.005 ms (0.546 ms / 100) 0.530 -> 0.520 ( -1.89%) [ +2.26% +0.00% +3.96% / +2.26% -1.89% +1.13%] index_select strided 100 : Elapsed 0.005 ms (0.542 ms / 100) 0.519 -> 0.525 ( +1.16%) [ +3.66% +2.12% +0.00% / +3.85% +1.16% +16.76%] index_select random : Elapsed 0.005 ms (0.538 ms / 100) 0.517 -> 0.529 ( +2.32%) [ +5.03% +3.29% +0.00% / +4.26% +2.32% +15.47%] index_select random_sorted : Elapsed 0.005 ms (0.543 ms / 100) out_shape = [1, 200, 500] in_shape = [1, 200, 5] idx_dim = 2 B = [1, 200, 500] (stride (100000, 500, 1)) A = [1, 200, 5] (stride (1000, 1, 200)) dim = 2 0.571 -> 0.573 ( +0.35%) [ +0.88% +0.18% +0.00% / +0.88% +0.35% +0.53%] index_add_ linear : Elapsed 0.006 ms (0.576 ms / 100) 0.585 -> 0.588 ( +0.51%) [ +0.51% +0.00% +0.00% / +0.85% +0.51% +0.85%] index_copy_ linear : Elapsed 0.006 ms (0.588 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +2.10% +0.18% +0.00% / +0.70% +0.35% +1.58%] index_add_ reverse : Elapsed 0.006 ms (0.583 ms / 100) 0.586 -> 0.587 ( +0.17%) [ +0.68% +0.00% +0.00% / +0.68% +0.17% +0.17%] index_copy_ reverse : Elapsed 0.006 ms (0.590 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.70% +0.18% +0.00% / +0.70% +0.35% +0.70%] index_add_ spread : Elapsed 0.006 ms (0.575 ms / 100) 0.585 -> 0.586 ( +0.17%) [ +0.68% +0.00% +0.17% / +0.85% +0.17% +0.85%] index_copy_ spread : Elapsed 0.006 ms (0.589 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +1.23% +0.18% +0.00% / +1.05% +0.18% +0.35%] index_add_ strided 3 : Elapsed 0.006 ms (0.578 ms / 100) 0.585 -> 0.588 ( +0.51%) [ +0.68% +0.17% +0.00% / +0.85% +0.51% +0.51%] index_copy_ strided 3 : Elapsed 0.006 ms (0.589 ms / 100) 0.572 -> 0.573 ( +0.17%) [ +0.87% +0.00% +0.17% / +0.87% +0.17% +0.17%] index_add_ strided 7 : Elapsed 0.006 ms (0.577 ms / 100) 0.586 -> 0.587 ( +0.17%) [ +0.51% +0.00% +0.00% / +0.51% +0.17% +0.34%] index_copy_ strided 7 : Elapsed 0.006 ms (0.589 ms / 100) 0.573 -> 0.572 ( -0.17%) [ +0.87% +0.00% +0.00% / +0.70% -0.17% -0.17%] index_add_ strided 257 : Elapsed 0.006 ms (0.578 ms / 100) 0.584 -> 0.587 ( +0.51%) [ +0.86% +0.00% +0.17% / +0.51% +1.20% +0.68%] index_copy_ strided 257 : Elapsed 0.006 ms (0.589 ms / 100) 0.572 -> 0.572 ( +0.00%) [ +1.05% +0.00% +0.70% / +1.05% +0.00% +1.92%] index_add_ perm : Elapsed 0.006 ms (0.578 ms / 100) 0.584 -> 0.589 ( +0.86%) [ +0.68% +0.00% +0.00% / +0.86% +1.03% +1.20%] index_copy_ perm : Elapsed 0.006 ms (0.588 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +2.63% +0.18% +0.00% / +0.70% +0.35% +0.53%] index_add_ perm_sorted : Elapsed 0.006 ms (0.586 ms / 100) 0.586 -> 0.586 ( +0.00%) [ +0.68% +0.00% +0.00% / +0.85% +0.17% +0.00%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.590 ms / 100) good 5.070 -> 4.785 ( -5.62%) [ +0.00% +0.12% +0.14% / -5.62% -5.52% -5.60%] index_select const : Elapsed 0.051 ms (5.070 ms / 100) 5.063 -> 4.827 ( -4.66%) [ +0.06% +0.00% +0.12% / -4.52% -4.64% -4.66%] index_select wrap : Elapsed 0.051 ms (5.066 ms / 100) good 5.102 -> 4.824 ( -5.45%) [ +0.14% +0.12% +0.00% / -5.43% -5.45% -5.37%] index_select linear : Elapsed 0.051 ms (5.109 ms / 100) good 5.076 -> 4.787 ( -5.69%) [ +0.10% +0.20% +0.00% / -5.69% -5.69% -5.69%] index_select reverse : Elapsed 0.051 ms (5.081 ms / 100) good 5.064 -> 4.792 ( -5.37%) [ +0.00% +0.08% +0.18% / -5.29% -5.37% -5.27%] index_select skip64 : Elapsed 0.051 ms (5.064 ms / 100) good 5.114 -> 4.799 ( -6.16%) [ +0.00% +0.31% +0.20% / -6.16% -5.67% -5.61%] index_select skip256 : Elapsed 0.051 ms (5.114 ms / 100) good 5.110 -> 4.815 ( -5.77%) [ +0.16% +0.00% +0.22% / -5.73% -5.73% -5.77%] index_select spread : Elapsed 0.051 ms (5.118 ms / 100) good 5.045 -> 4.791 ( -5.03%) [ +0.22% +0.00% +0.10% / -4.78% -5.03% -4.70%] index_select strided 3 : Elapsed 0.051 ms (5.056 ms / 100) good 5.055 -> 4.799 ( -5.06%) [ +0.00% +0.12% +0.12% / -4.87% -5.06% -4.93%] index_select random : Elapsed 0.051 ms (5.055 ms / 100) good 5.112 -> 4.802 ( -6.06%) [ +0.00% +0.16% +0.25% / -6.06% -5.63% -5.75%] index_select random_sorted : Elapsed 0.051 ms (5.112 ms / 100) B = [1, 200, 500] (stride (100000, 500, 1)) A = [1, 200, 5] (stride (200, 1, 200)) dim = 2 0.570 -> 0.573 ( +0.53%) [ +1.05% +0.18% +0.00% / +1.05% +0.53% +0.88%] index_add_ linear : Elapsed 0.006 ms (0.576 ms / 100) 0.584 -> 0.588 ( +0.68%) [ +0.86% +0.17% +0.00% / +0.68% +0.86% +0.86%] index_copy_ linear : Elapsed 0.006 ms (0.589 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +1.05% +0.18% +0.00% / +1.05% +0.18% +0.35%] index_add_ reverse : Elapsed 0.006 ms (0.577 ms / 100) 0.582 -> 0.588 ( +1.03%) [ +1.03% +0.17% +0.00% / +1.03% +1.03% +1.37%] index_copy_ reverse : Elapsed 0.006 ms (0.588 ms / 100) 0.572 -> 0.574 ( +0.35%) [ +1.05% +0.00% +0.00% / +0.87% +0.35% +0.35%] index_add_ spread : Elapsed 0.006 ms (0.578 ms / 100) 0.582 -> 0.588 ( +1.03%) [ +0.86% +0.17% +0.00% / +1.03% +1.37% +1.20%] index_copy_ spread : Elapsed 0.006 ms (0.587 ms / 100) 0.571 -> 0.575 ( +0.70%) [ +0.88% +0.18% +0.00% / +0.70% +0.70% +0.70%] index_add_ strided 3 : Elapsed 0.006 ms (0.576 ms / 100) 0.584 -> 0.587 ( +0.51%) [ +0.68% +0.00% +0.17% / +0.86% +0.51% +0.68%] index_copy_ strided 3 : Elapsed 0.006 ms (0.588 ms / 100) 0.570 -> 0.574 ( +0.70%) [ +0.88% +0.18% +0.00% / +0.88% +0.70% +0.88%] index_add_ strided 7 : Elapsed 0.006 ms (0.575 ms / 100) 0.584 -> 0.587 ( +0.51%) [ +0.68% +0.00% +0.00% / +1.20% +0.51% +0.51%] index_copy_ strided 7 : Elapsed 0.006 ms (0.588 ms / 100) 0.571 -> 0.574 ( +0.53%) [ +0.53% +0.00% +0.18% / +0.53% +0.53% +0.88%] index_add_ strided 257 : Elapsed 0.006 ms (0.574 ms / 100) 0.586 -> 0.586 ( +0.00%) [ +0.85% +0.68% +0.00% / +0.51% +0.00% +0.17%] index_copy_ strided 257 : Elapsed 0.006 ms (0.591 ms / 100) 0.570 -> 0.573 ( +0.53%) [ +0.70% +0.18% +0.00% / +0.53% +0.88% +0.88%] index_add_ perm : Elapsed 0.006 ms (0.574 ms / 100) 0.584 -> 0.587 ( +0.51%) [ +0.86% +0.00% +0.34% / +0.68% +0.51% +0.51%] index_copy_ perm : Elapsed 0.006 ms (0.589 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +1.05% +0.00% +0.00% / +2.28% +0.18% +0.53%] index_add_ perm_sorted : Elapsed 0.006 ms (0.577 ms / 100) 0.585 -> 0.587 ( +0.34%) [ +0.85% +0.00% +0.00% / +0.34% +0.34% +0.51%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.590 ms / 100) good 5.067 -> 4.788 ( -5.51%) [ +0.02% +0.00% +0.14% / -5.47% -5.47% -5.51%] index_select const : Elapsed 0.051 ms (5.068 ms / 100) 5.044 -> 4.827 ( -4.30%) [ +0.00% +0.02% +0.00% / -4.26% -4.30% -4.20%] index_select wrap : Elapsed 0.050 ms (5.044 ms / 100) good 5.092 -> 4.808 ( -5.58%) [ +0.00% +0.10% +0.16% / -5.58% -5.40% -5.42%] index_select linear : Elapsed 0.051 ms (5.092 ms / 100) good 5.081 -> 4.779 ( -5.94%) [ +0.04% +0.00% +0.04% / -5.94% -5.90% -5.79%] index_select reverse : Elapsed 0.051 ms (5.083 ms / 100) good 5.081 -> 4.786 ( -5.81%) [ +0.24% +0.00% +0.35% / -5.69% -5.71% -5.81%] index_select skip64 : Elapsed 0.051 ms (5.093 ms / 100) good 5.103 -> 4.802 ( -5.90%) [ +0.10% +0.00% +0.10% / -5.90% -5.60% -5.70%] index_select skip256 : Elapsed 0.051 ms (5.108 ms / 100) good 5.084 -> 4.815 ( -5.29%) [ +0.20% +0.00% +0.14% / -5.29% -5.15% -5.09%] index_select spread : Elapsed 0.051 ms (5.094 ms / 100) 5.057 -> 4.806 ( -4.96%) [ +0.00% +0.06% +0.04% / -4.81% -4.96% -4.96%] index_select strided 3 : Elapsed 0.051 ms (5.057 ms / 100) good 5.060 -> 4.802 ( -5.10%) [ +0.06% +0.00% +0.02% / -4.58% -5.10% -4.98%] index_select random : Elapsed 0.051 ms (5.063 ms / 100) good 5.076 -> 4.807 ( -5.30%) [ +0.22% +0.00% +0.22% / -5.30% -5.08% -5.20%] index_select random_sorted : Elapsed 0.051 ms (5.087 ms / 100) B = [1, 200, 500] (stride (100000, 1, 200)) A = [1, 200, 5] (stride (1, 1, 200)) dim = 2 0.571 -> 0.572 ( +0.18%) [ +0.70% +0.00% +0.00% / +0.70% +0.18% +0.53%] index_add_ linear : Elapsed 0.006 ms (0.575 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.00% +0.00% / +0.90% +0.90% +1.08%] index_copy_ linear : Elapsed 0.006 ms (0.559 ms / 100) 0.572 -> 0.572 ( +0.00%) [ +0.35% +0.00% +0.35% / +0.35% +0.00% +0.70%] index_add_ reverse : Elapsed 0.006 ms (0.574 ms / 100) 0.555 -> 0.559 ( +0.72%) [ +0.90% +0.00% +0.00% / +0.72% +0.72% +0.72%] index_copy_ reverse : Elapsed 0.006 ms (0.560 ms / 100) 0.569 -> 0.572 ( +0.53%) [ +0.88% +0.35% +0.00% / +0.88% +0.53% +1.58%] index_add_ spread : Elapsed 0.006 ms (0.574 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.00% +0.18% / +0.90% +1.08% +1.26%] index_copy_ spread : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.88% +0.00% +0.18% / +0.88% +0.18% +0.18%] index_add_ strided 3 : Elapsed 0.006 ms (0.576 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +1.08% +0.54% +0.00% / +1.08% +0.90% +0.72%] index_copy_ strided 3 : Elapsed 0.006 ms (0.559 ms / 100) 0.570 -> 0.571 ( +0.18%) [ +1.23% +0.88% +0.00% / +1.40% +0.18% +0.53%] index_add_ strided 7 : Elapsed 0.006 ms (0.577 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +1.08% +0.18% +0.00% / +1.08% +0.72% +0.72%] index_copy_ strided 7 : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +1.05% +0.00% +0.00% / +1.05% +0.18% +26.27%] index_add_ strided 257 : Elapsed 0.006 ms (0.577 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +1.08% +0.18% +0.00% / +1.08% +0.54% +33.75%] index_copy_ strided 257 : Elapsed 0.006 ms (0.560 ms / 100) 0.572 -> 0.572 ( +0.00%) [ +2.45% +0.00% +0.00% / +2.10% +0.00% +9.79%] index_add_ perm : Elapsed 0.006 ms (0.586 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.90% +0.00% +0.36% / +0.90% +0.72% +1.99%] index_copy_ perm : Elapsed 0.006 ms (0.559 ms / 100) 0.570 -> 0.573 ( +0.53%) [ +0.88% +0.18% +0.00% / +0.70% +0.53% +0.88%] index_add_ perm_sorted : Elapsed 0.006 ms (0.575 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.90% +0.00% +0.00% / +0.90% +1.08% +0.72%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.559 ms / 100) 4.787 -> 4.782 ( -0.10%) [ +0.13% +0.00% +0.13% / -0.10% +0.08% +0.27%] index_select const : Elapsed 0.048 ms (4.793 ms / 100) 4.801 -> 4.808 ( +0.15%) [ +0.00% +0.04% +0.02% / +0.15% +0.31% +0.54%] index_select wrap : Elapsed 0.048 ms (4.801 ms / 100) 4.820 -> 4.802 ( -0.37%) [ +0.04% +0.08% +0.00% / -0.06% -0.37% -0.25%] index_select linear : Elapsed 0.048 ms (4.822 ms / 100) 4.798 -> 4.789 ( -0.19%) [ +0.08% +0.00% +0.06% / -0.06% -0.10% -0.19%] index_select reverse : Elapsed 0.048 ms (4.802 ms / 100) 4.787 -> 4.792 ( +0.10%) [ +0.17% +0.00% +0.06% / +0.27% +0.10% +0.13%] index_select skip64 : Elapsed 0.048 ms (4.795 ms / 100) 4.820 -> 4.795 ( -0.52%) [ +0.12% +0.08% +0.00% / +0.29% -0.46% -0.52%] index_select skip256 : Elapsed 0.048 ms (4.826 ms / 100) 4.824 -> 4.799 ( -0.52%) [ +0.00% +0.10% +0.10% / +0.15% -0.52% -0.41%] index_select spread : Elapsed 0.048 ms (4.824 ms / 100) 4.792 -> 4.789 ( -0.06%) [ +0.06% +0.00% +0.08% / +0.21% -0.06% +0.38%] index_select strided 3 : Elapsed 0.048 ms (4.795 ms / 100) 4.793 -> 4.791 ( -0.04%) [ +0.15% +0.00% +0.06% / +0.00% -0.04% +0.08%] index_select random : Elapsed 0.048 ms (4.800 ms / 100) 4.821 -> 4.806 ( -0.31%) [ +0.00% +0.12% +0.27% / +0.15% -0.31% -0.25%] index_select random_sorted : Elapsed 0.048 ms (4.821 ms / 100) B = [1, 200, 500] (stride (200, 1, 200)) A = [1, 200, 5] (stride (1, 1, 200)) dim = 2 0.571 -> 0.572 ( +0.18%) [ +0.88% +0.00% +0.00% / +0.70% +0.18% +0.53%] index_add_ linear : Elapsed 0.006 ms (0.576 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.00% +0.00% / +1.08% +1.08% +0.90%] index_copy_ linear : Elapsed 0.006 ms (0.559 ms / 100) 0.570 -> 0.571 ( +0.18%) [ +1.05% +0.18% +0.00% / +1.05% +0.18% +0.53%] index_add_ reverse : Elapsed 0.006 ms (0.576 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +0.72% +0.18% +0.00% / +4.51% +0.54% +0.72%] index_copy_ reverse : Elapsed 0.006 ms (0.558 ms / 100) 0.572 -> 0.572 ( +0.00%) [ +0.70% +0.00% +0.00% / +0.70% +0.00% +0.52%] index_add_ spread : Elapsed 0.006 ms (0.576 ms / 100) 0.555 -> 0.558 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.72% +0.54% +0.72%] index_copy_ spread : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.574 ( +0.53%) [ +0.53% +0.00% +0.00% / +0.53% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.006 ms (0.574 ms / 100) 0.553 -> 0.558 ( +0.90%) [ +1.08% +0.00% +0.00% / +1.08% +0.90% +1.08%] index_copy_ strided 3 : Elapsed 0.006 ms (0.559 ms / 100) 0.570 -> 0.574 ( +0.70%) [ +0.88% +0.35% +0.00% / +0.70% +0.70% +0.70%] index_add_ strided 7 : Elapsed 0.006 ms (0.575 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +0.72% +0.00% +0.00% / +0.90% +0.54% +1.08%] index_copy_ strided 7 : Elapsed 0.006 ms (0.558 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.53% +0.18% +0.00% / +0.35% +0.35% +0.53%] index_add_ strided 257 : Elapsed 0.006 ms (0.574 ms / 100) 0.553 -> 0.558 ( +0.90%) [ +1.08% +0.00% +0.18% / +1.08% +1.08% +0.90%] index_copy_ strided 257 : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.35% +0.00% +0.53% / +0.53% +0.35% +0.70%] index_add_ perm : Elapsed 0.006 ms (0.573 ms / 100) 0.553 -> 0.559 ( +1.08%) [ +1.08% +0.18% +0.00% / +1.27% +1.63% +1.08%] index_copy_ perm : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.88% +0.18% +0.00% / +0.70% +0.18% +0.18%] index_add_ perm_sorted : Elapsed 0.006 ms (0.576 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +0.90% +0.00% +0.00% / +1.08% +0.54% +0.72%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.559 ms / 100) 4.782 -> 4.784 ( +0.04%) [ +0.10% +0.00% +0.17% / +0.13% +0.04% +0.23%] index_select const : Elapsed 0.048 ms (4.787 ms / 100) 4.797 -> 4.801 ( +0.08%) [ +0.17% +0.21% +0.00% / +0.08% +0.42% +0.52%] index_select wrap : Elapsed 0.048 ms (4.805 ms / 100) 4.817 -> 4.795 ( -0.46%) [ +0.19% +0.17% +0.00% / +0.10% -0.46% -0.31%] index_select linear : Elapsed 0.048 ms (4.826 ms / 100) 4.794 -> 4.786 ( -0.17%) [ +0.00% +0.10% +0.06% / +0.10% -0.17% +0.10%] index_select reverse : Elapsed 0.048 ms (4.794 ms / 100) 4.789 -> 4.783 ( -0.13%) [ +0.21% +0.27% +0.00% / -0.06% -0.13% -0.04%] index_select skip64 : Elapsed 0.048 ms (4.799 ms / 100) 4.814 -> 4.791 ( -0.48%) [ +0.00% +0.21% +0.12% / +0.39% -0.48% -0.25%] index_select skip256 : Elapsed 0.048 ms (4.814 ms / 100) 4.823 -> 4.796 ( -0.56%) [ +0.00% +0.21% +0.19% / +0.10% -0.56% -0.33%] index_select spread : Elapsed 0.048 ms (4.823 ms / 100) 4.770 -> 4.789 ( +0.40%) [ +0.71% +0.00% +0.42% / +0.63% +0.50% +0.40%] index_select strided 3 : Elapsed 0.048 ms (4.804 ms / 100) 4.796 -> 4.781 ( -0.31%) [ +0.00% +0.13% +0.19% / +0.17% -0.17% -0.31%] index_select random : Elapsed 0.048 ms (4.796 ms / 100) 4.829 -> 4.797 ( -0.66%) [ +0.04% +0.00% +0.21% / -0.02% -0.66% -0.56%] index_select random_sorted : Elapsed 0.048 ms (4.831 ms / 100) B = [1, 200, 500] (stride (1, 1, 200)) dim = 2 fill_cnt = 5 0.453 -> 0.455 ( +0.44%) [ +1.10% +1.99% +0.00% / +1.10% +0.44% +0.88%] index_fill_ const : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +0.88% +0.44% +0.00% / +0.66% +0.66% +0.00%] index_fill_ linear : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.458 ( +0.88%) [ +0.88% +0.00% +0.22% / +0.88% +2.64% +3.74%] index_fill_ reverse : Elapsed 0.005 ms (0.458 ms / 100) 0.452 -> 0.457 ( +1.11%) [ +1.33% +0.00% +0.00% / +1.11% +1.11% +3.32%] index_fill_ skip64 : Elapsed 0.005 ms (0.458 ms / 100) 0.452 -> 0.456 ( +0.88%) [ +1.55% +0.00% +2.21% / +1.33% +0.88% +1.33%] index_fill_ skip256 : Elapsed 0.005 ms (0.459 ms / 100) 0.452 -> 0.456 ( +0.88%) [ +1.11% +0.00% +0.00% / +1.11% +0.88% +2.65%] index_fill_ spread : Elapsed 0.005 ms (0.457 ms / 100) 0.451 -> 0.457 ( +1.33%) [ +1.11% +0.00% +0.00% / +1.33% +1.55% +3.10%] index_fill_ strided 3 : Elapsed 0.005 ms (0.456 ms / 100) 0.453 -> 0.455 ( +0.44%) [ +1.10% +0.22% +0.00% / +0.88% +0.44% +10.60%] index_fill_ strided 5 : Elapsed 0.005 ms (0.458 ms / 100) 0.452 -> 0.456 ( +0.88%) [ +1.33% +1.11% +0.00% / +0.88% +1.11% +2.88%] index_fill_ strided 7 : Elapsed 0.005 ms (0.458 ms / 100) 0.453 -> 0.455 ( +0.44%) [ +1.10% +0.22% +0.00% / +0.88% +0.44% +0.44%] index_fill_ strided 8 : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.455 ( +0.22%) [ +0.88% +0.00% +0.22% / +0.44% +0.22% +2.20%] index_fill_ strided 16 : Elapsed 0.005 ms (0.458 ms / 100) 0.456 -> 0.454 ( -0.44%) [ +0.44% +0.22% +0.00% / +7.24% -0.44% +2.19%] index_fill_ strided 64 : Elapsed 0.005 ms (0.458 ms / 100) 0.456 -> 0.454 ( -0.44%) [ +0.44% +0.44% +0.00% / +7.24% -0.44% +8.99%] index_fill_ strided 100 : Elapsed 0.005 ms (0.458 ms / 100) 0.455 -> 0.454 ( -0.22%) [ +0.66% +0.44% +0.00% / +3.08% -0.22% +10.55%] index_fill_ strided 255 : Elapsed 0.005 ms (0.458 ms / 100) 0.455 -> 0.453 ( -0.44%) [ +0.88% +0.22% +0.00% / +3.08% -0.44% +3.08%] index_fill_ strided 256 : Elapsed 0.005 ms (0.459 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +0.88% +0.22% +0.00% / +0.88% +0.00% +2.86%] index_fill_ strided 257 : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +7.05% +1.54% +0.00% / +0.66% +0.00% +1.98%] index_fill_ random : Elapsed 0.005 ms (0.486 ms / 100) 0.452 -> 0.456 ( +0.88%) [ +1.11% +0.44% +0.00% / +1.33% +0.88% +7.52%] index_fill_ random_sorted : Elapsed 0.005 ms (0.457 ms / 100) 0.453 -> 0.457 ( +0.88%) [ +5.74% +0.22% +0.00% / +2.43% +0.88% +3.53%] index_fill_ perm : Elapsed 0.005 ms (0.479 ms / 100) 0.451 -> 0.457 ( +1.33%) [ +1.55% +0.22% +0.00% / +1.33% +2.88% +1.33%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.458 ms / 100) out_shape = [500, 1, 200] in_shape = [5, 1, 200] idx_dim = 0 B = [500, 1, 200] (stride (1, 100000, 500)) A = [5, 1, 200] (stride (200, 1, 1)) dim = 0 0.571 -> 0.573 ( +0.35%) [ +0.70% +0.00% +0.18% / +1.58% +0.35% +0.53%] index_add_ linear : Elapsed 0.006 ms (0.575 ms / 100) 0.584 -> 0.586 ( +0.34%) [ +0.68% +0.00% +0.17% / +0.68% +0.34% +0.51%] index_copy_ linear : Elapsed 0.006 ms (0.588 ms / 100) 0.570 -> 0.574 ( +0.70%) [ +0.88% +0.18% +0.00% / +0.88% +0.70% +0.88%] index_add_ reverse : Elapsed 0.006 ms (0.575 ms / 100) 0.585 -> 0.586 ( +0.17%) [ +0.51% +0.00% +0.00% / +0.51% +0.34% +0.17%] index_copy_ reverse : Elapsed 0.006 ms (0.588 ms / 100) 0.571 -> 0.574 ( +0.53%) [ +0.70% +0.00% +0.00% / +0.70% +0.53% +1.05%] index_add_ spread : Elapsed 0.006 ms (0.575 ms / 100) 0.584 -> 0.585 ( +0.17%) [ +0.86% +0.17% +0.00% / +0.51% +0.17% +0.34%] index_copy_ spread : Elapsed 0.006 ms (0.589 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +1.05% +0.00% +0.00% / +1.05% +0.18% +0.18%] index_add_ strided 3 : Elapsed 0.006 ms (0.577 ms / 100) 0.584 -> 0.587 ( +0.51%) [ +1.37% +0.00% +0.00% / +3.08% +2.23% +0.51%] index_copy_ strided 3 : Elapsed 0.006 ms (0.592 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +1.23% +0.18% +0.00% / +1.05% +0.18% +0.35%] index_add_ strided 7 : Elapsed 0.006 ms (0.578 ms / 100) 0.583 -> 0.585 ( +0.34%) [ +0.69% +0.17% +0.00% / +1.72% +0.34% +0.51%] index_copy_ strided 7 : Elapsed 0.006 ms (0.587 ms / 100) 0.572 -> 0.571 ( -0.17%) [ +1.05% +0.17% +0.00% / +1.05% -0.17% +0.17%] index_add_ strided 257 : Elapsed 0.006 ms (0.578 ms / 100) 0.582 -> 0.586 ( +0.69%) [ +0.86% +0.00% +0.00% / +0.86% +0.86% +0.69%] index_copy_ strided 257 : Elapsed 0.006 ms (0.587 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +1.40% +0.35% +0.00% / +1.40% +0.00% +0.35%] index_add_ perm : Elapsed 0.006 ms (0.579 ms / 100) 0.582 -> 0.587 ( +0.86%) [ +1.72% +0.00% +0.17% / +1.89% +0.86% +0.86%] index_copy_ perm : Elapsed 0.006 ms (0.592 ms / 100) 0.570 -> 0.574 ( +0.70%) [ +0.88% +0.18% +0.00% / +0.88% +0.70% +0.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.575 ms / 100) 0.583 -> 0.585 ( +0.34%) [ +0.86% +0.17% +0.00% / +1.03% +0.51% +0.34%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.588 ms / 100) good 5.068 -> 4.784 ( -5.60%) [ +0.00% +0.24% +0.08% / -5.60% -5.58% -5.29%] index_select const : Elapsed 0.051 ms (5.068 ms / 100) 5.056 -> 4.828 ( -4.51%) [ +0.28% +0.00% +0.32% / -4.41% -4.31% -4.51%] index_select wrap : Elapsed 0.051 ms (5.070 ms / 100) good 5.101 -> 4.809 ( -5.72%) [ +0.14% +0.10% +0.00% / -5.72% -5.67% -5.49%] index_select linear : Elapsed 0.051 ms (5.108 ms / 100) good 5.081 -> 4.784 ( -5.85%) [ +0.00% +0.02% +0.12% / -5.65% -5.85% -5.71%] index_select reverse : Elapsed 0.051 ms (5.081 ms / 100) good 5.067 -> 4.789 ( -5.49%) [ +0.04% +0.04% +0.00% / -5.43% -5.49% -5.29%] index_select skip64 : Elapsed 0.051 ms (5.069 ms / 100) good 5.110 -> 4.808 ( -5.91%) [ +0.06% +0.00% +0.20% / -5.91% -5.62% -5.58%] index_select skip256 : Elapsed 0.051 ms (5.113 ms / 100) good 5.111 -> 4.810 ( -5.89%) [ +0.18% +0.00% +0.18% / -5.89% -5.87% -5.77%] index_select spread : Elapsed 0.051 ms (5.120 ms / 100) 5.047 -> 4.797 ( -4.95%) [ +0.26% +0.00% +0.18% / -4.74% -4.83% -4.95%] index_select strided 3 : Elapsed 0.051 ms (5.060 ms / 100) 5.044 -> 4.798 ( -4.88%) [ +0.26% +0.20% +0.00% / -4.74% -4.76% -4.88%] index_select random : Elapsed 0.051 ms (5.057 ms / 100) good 5.102 -> 4.804 ( -5.84%) [ +0.14% +0.00% +0.12% / -5.78% -5.53% -5.84%] index_select random_sorted : Elapsed 0.051 ms (5.109 ms / 100) B = [500, 1, 200] (stride (1, 100000, 500)) A = [5, 1, 200] (stride (1, 1, 5)) dim = 0 0.573 -> 0.576 ( +0.52%) [ +0.87% +0.00% +0.87% / +0.70% +0.52% +1.05%] index_add_ linear : Elapsed 0.006 ms (0.578 ms / 100) 0.585 -> 0.590 ( +0.85%) [ +0.68% +0.00% +0.00% / +1.20% +1.03% +0.85%] index_copy_ linear : Elapsed 0.006 ms (0.589 ms / 100) 0.573 -> 0.576 ( +0.52%) [ +1.22% +0.17% +0.00% / +0.70% +0.52% +0.52%] index_add_ reverse : Elapsed 0.006 ms (0.580 ms / 100) 0.585 -> 0.588 ( +0.51%) [ +0.85% +0.00% +0.00% / +0.51% +0.85% +1.03%] index_copy_ reverse : Elapsed 0.006 ms (0.590 ms / 100) 0.573 -> 0.575 ( +0.35%) [ +1.05% +0.35% +0.00% / +0.87% +0.35% +0.70%] index_add_ spread : Elapsed 0.006 ms (0.579 ms / 100) 0.585 -> 0.589 ( +0.68%) [ +1.03% +0.17% +0.00% / +0.68% +1.03% +1.20%] index_copy_ spread : Elapsed 0.006 ms (0.591 ms / 100) 0.572 -> 0.576 ( +0.70%) [ +1.05% +0.70% +0.00% / +0.87% +0.70% +1.05%] index_add_ strided 3 : Elapsed 0.006 ms (0.578 ms / 100) 0.585 -> 0.589 ( +0.68%) [ +1.03% +0.00% +0.34% / +0.85% +1.03% +0.68%] index_copy_ strided 3 : Elapsed 0.006 ms (0.591 ms / 100) 0.572 -> 0.576 ( +0.70%) [ +1.05% +0.17% +0.00% / +1.05% +0.70% +0.87%] index_add_ strided 7 : Elapsed 0.006 ms (0.578 ms / 100) 0.586 -> 0.590 ( +0.68%) [ +0.68% +0.00% +0.00% / +0.68% +0.85% +0.68%] index_copy_ strided 7 : Elapsed 0.006 ms (0.590 ms / 100) 0.573 -> 0.576 ( +0.52%) [ +0.70% +0.00% +0.17% / +0.70% +0.52% +0.70%] index_add_ strided 257 : Elapsed 0.006 ms (0.577 ms / 100) 0.586 -> 0.588 ( +0.34%) [ +0.85% +0.00% +0.17% / +1.02% +0.68% +0.34%] index_copy_ strided 257 : Elapsed 0.006 ms (0.591 ms / 100) 0.572 -> 0.576 ( +0.70%) [ +0.87% +0.17% +0.00% / +0.87% +0.70% +0.70%] index_add_ perm : Elapsed 0.006 ms (0.577 ms / 100) 0.585 -> 0.590 ( +0.85%) [ +0.85% +0.34% +0.00% / +1.03% +0.85% +0.85%] index_copy_ perm : Elapsed 0.006 ms (0.590 ms / 100) 0.573 -> 0.575 ( +0.35%) [ +0.87% +0.17% +0.00% / +1.05% +0.35% +0.52%] index_add_ perm_sorted : Elapsed 0.006 ms (0.578 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +0.68% +0.00% +0.17% / +0.85% +0.51% +0.51%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.590 ms / 100) good 5.183 -> 4.784 ( -7.70%) [ +0.10% +0.00% +0.12% / -7.70% -7.68% -7.62%] index_select const : Elapsed 0.052 ms (5.188 ms / 100) good 5.195 -> 4.819 ( -7.24%) [ +0.19% +0.00% +0.10% / -7.22% -7.05% -7.24%] index_select wrap : Elapsed 0.052 ms (5.205 ms / 100) good 5.220 -> 4.809 ( -7.87%) [ +0.11% +0.15% +0.00% / -7.87% -7.70% -7.70%] index_select linear : Elapsed 0.052 ms (5.226 ms / 100) good 5.208 -> 4.778 ( -8.26%) [ +0.00% +0.06% +0.12% / -8.26% -8.20% -8.12%] index_select reverse : Elapsed 0.052 ms (5.208 ms / 100) good 5.198 -> 4.780 ( -8.04%) [ +0.06% +0.12% +0.00% / -7.66% -8.04% -7.81%] index_select skip64 : Elapsed 0.052 ms (5.201 ms / 100) good 5.217 -> 4.807 ( -7.86%) [ +0.10% +0.00% +0.04% / -7.86% -7.86% -7.74%] index_select skip256 : Elapsed 0.052 ms (5.222 ms / 100) good 5.217 -> 4.815 ( -7.71%) [ +0.31% +0.00% +0.13% / -7.71% -7.69% -7.71%] index_select spread : Elapsed 0.052 ms (5.233 ms / 100) good 5.196 -> 4.779 ( -8.03%) [ +0.00% +0.21% +0.27% / -7.89% -8.03% -7.95%] index_select strided 3 : Elapsed 0.052 ms (5.196 ms / 100) good 5.202 -> 4.785 ( -8.02%) [ +0.00% +0.02% +0.02% / -7.71% -8.02% -7.73%] index_select random : Elapsed 0.052 ms (5.202 ms / 100) good 5.226 -> 4.803 ( -8.09%) [ +0.04% +0.02% +0.00% / -8.09% -7.90% -7.75%] index_select random_sorted : Elapsed 0.052 ms (5.228 ms / 100) B = [500, 1, 200] (stride (1, 1, 500)) dim = 0 fill_cnt = 5 0.483 -> 0.488 ( +1.04%) [ +1.04% +0.21% +0.00% / +1.45% +1.24% +1.04%] index_fill_ const : Elapsed 0.005 ms (0.488 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +0.83% +0.00% +0.00% / +0.83% +1.03% +1.24%] index_fill_ linear : Elapsed 0.005 ms (0.488 ms / 100) 0.484 -> 0.487 ( +0.62%) [ +1.03% +0.00% +0.00% / +0.62% +1.03% +1.03%] index_fill_ reverse : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.487 ( +0.62%) [ +1.03% +0.21% +0.00% / +0.62% +1.03% +0.62%] index_fill_ skip64 : Elapsed 0.005 ms (0.489 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +0.62% +0.00% +0.00% / +0.62% +0.41% +0.41%] index_fill_ skip256 : Elapsed 0.005 ms (0.488 ms / 100) 0.485 -> 0.489 ( +0.82%) [ +1.86% +0.00% +0.00% / +6.19% +0.82% +3.51%] index_fill_ spread : Elapsed 0.005 ms (0.494 ms / 100) 0.485 -> 0.488 ( +0.62%) [ +0.82% +0.21% +0.00% / +0.82% +0.62% +2.06%] index_fill_ strided 3 : Elapsed 0.005 ms (0.489 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +0.82% +0.21% +0.00% / +0.82% +0.41% +0.62%] index_fill_ strided 5 : Elapsed 0.005 ms (0.489 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +0.82% +0.21% +0.00% / +0.82% +0.41% +4.95%] index_fill_ strided 7 : Elapsed 0.005 ms (0.489 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +5.36% +0.21% +0.00% / +0.62% +0.41% +2.06%] index_fill_ strided 8 : Elapsed 0.005 ms (0.511 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +0.82% +0.21% +0.00% / +0.82% +0.41% +0.82%] index_fill_ strided 16 : Elapsed 0.005 ms (0.489 ms / 100) 0.483 -> 0.488 ( +1.04%) [ +3.52% +0.21% +0.00% / +1.04% +1.04% +1.24%] index_fill_ strided 64 : Elapsed 0.005 ms (0.500 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +1.03% +0.21% +0.00% / +0.83% +1.03% +1.24%] index_fill_ strided 100 : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.487 ( +0.62%) [ +1.03% +0.21% +0.00% / +1.03% +0.62% +2.07%] index_fill_ strided 255 : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +0.83% +0.21% +0.00% / +1.03% +0.83% +1.03%] index_fill_ strided 256 : Elapsed 0.005 ms (0.488 ms / 100) 0.483 -> 0.489 ( +1.24%) [ +0.83% +0.00% +0.00% / +1.24% +1.24% +1.24%] index_fill_ strided 257 : Elapsed 0.005 ms (0.487 ms / 100) 0.483 -> 0.488 ( +1.04%) [ +1.04% +0.21% +0.00% / +1.04% +1.24% +1.45%] index_fill_ random : Elapsed 0.005 ms (0.488 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +1.03% +0.00% +0.00% / +1.03% +0.83% +0.83%] index_fill_ random_sorted : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +1.03% +0.00% +0.00% / +0.83% +1.03% +0.83%] index_fill_ perm : Elapsed 0.005 ms (0.489 ms / 100) 0.485 -> 0.486 ( +0.21%) [ +0.82% +0.00% +0.00% / +0.82% +0.21% +0.82%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.489 ms / 100) B = [500, 1, 200] (stride (1, 1, 500)) A = [5, 1, 200] (stride (200, 1, 1)) dim = 0 0.571 -> 0.572 ( +0.18%) [ +0.88% +0.18% +0.00% / +0.88% +0.18% +0.53%] index_add_ linear : Elapsed 0.006 ms (0.576 ms / 100) 0.583 -> 0.586 ( +0.51%) [ +1.03% +0.00% +0.17% / +0.69% +0.51% +0.69%] index_copy_ linear : Elapsed 0.006 ms (0.589 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.70% +0.00% +0.00% / +0.53% +0.35% +0.53%] index_add_ reverse : Elapsed 0.006 ms (0.575 ms / 100) 0.583 -> 0.586 ( +0.51%) [ +1.03% +0.00% +0.34% / +0.86% +0.51% +0.51%] index_copy_ reverse : Elapsed 0.006 ms (0.589 ms / 100) 0.570 -> 0.574 ( +0.70%) [ +0.88% +0.18% +0.00% / +0.88% +0.70% +0.88%] index_add_ spread : Elapsed 0.006 ms (0.575 ms / 100) 0.584 -> 0.584 ( +0.00%) [ +1.03% +0.00% +0.17% / +0.86% +0.17% +0.00%] index_copy_ spread : Elapsed 0.006 ms (0.590 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +1.05% +0.18% +0.00% / +1.05% +0.00% +0.53%] index_add_ strided 3 : Elapsed 0.006 ms (0.577 ms / 100) 0.583 -> 0.586 ( +0.51%) [ +0.86% +0.00% +0.00% / +0.69% +0.51% +0.86%] index_copy_ strided 3 : Elapsed 0.006 ms (0.588 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.88% +0.35% +0.00% / +0.88% +0.18% +0.53%] index_add_ strided 7 : Elapsed 0.006 ms (0.576 ms / 100) 0.583 -> 0.586 ( +0.51%) [ +0.69% +0.00% +0.17% / +0.69% +0.51% +0.51%] index_copy_ strided 7 : Elapsed 0.006 ms (0.587 ms / 100) 0.573 -> 0.571 ( -0.35%) [ +1.22% +0.52% +0.00% / +0.70% -0.35% -0.17%] index_add_ strided 257 : Elapsed 0.006 ms (0.580 ms / 100) 0.582 -> 0.586 ( +0.69%) [ +2.06% +0.00% +0.00% / +0.86% +0.69% +0.86%] index_copy_ strided 257 : Elapsed 0.006 ms (0.594 ms / 100) 0.572 -> 0.571 ( -0.17%) [ +1.05% +0.17% +0.00% / +1.05% -0.17% +0.00%] index_add_ perm : Elapsed 0.006 ms (0.578 ms / 100) 0.582 -> 0.585 ( +0.52%) [ +0.69% +0.00% +0.34% / +0.86% +0.69% +0.52%] index_copy_ perm : Elapsed 0.006 ms (0.586 ms / 100) 0.572 -> 0.574 ( +0.35%) [ +0.52% +0.00% +0.17% / +0.35% +0.35% +0.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.575 ms / 100) 0.585 -> 0.586 ( +0.17%) [ +0.85% +0.00% +0.00% / +0.51% +0.17% +0.17%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.590 ms / 100) good 5.068 -> 4.781 ( -5.66%) [ +0.16% +0.00% +0.16% / -5.49% -5.62% -5.66%] index_select const : Elapsed 0.051 ms (5.076 ms / 100) 5.070 -> 4.829 ( -4.75%) [ +0.02% +0.00% +0.14% / -4.75% -4.75% -4.65%] index_select wrap : Elapsed 0.051 ms (5.071 ms / 100) good 5.103 -> 4.821 ( -5.53%) [ +0.08% +0.16% +0.00% / -5.47% -5.53% -5.47%] index_select linear : Elapsed 0.051 ms (5.107 ms / 100) good 5.082 -> 4.786 ( -5.82%) [ +0.00% +0.04% +0.02% / -5.82% -5.45% -5.80%] index_select reverse : Elapsed 0.051 ms (5.082 ms / 100) good 5.065 -> 4.784 ( -5.55%) [ +0.16% +0.00% +0.34% / -5.29% -5.55% -5.25%] index_select skip64 : Elapsed 0.051 ms (5.073 ms / 100) good 5.112 -> 4.802 ( -6.06%) [ +0.00% +0.12% +0.08% / -6.06% -5.71% -5.83%] index_select skip256 : Elapsed 0.051 ms (5.112 ms / 100) good 5.114 -> 4.812 ( -5.91%) [ +0.29% +0.08% +0.00% / -5.65% -5.85% -5.91%] index_select spread : Elapsed 0.051 ms (5.129 ms / 100) 5.048 -> 4.796 ( -4.99%) [ +0.00% +0.18% +0.04% / -4.99% -4.97% -4.97%] index_select strided 3 : Elapsed 0.050 ms (5.048 ms / 100) good 5.053 -> 4.799 ( -5.03%) [ +0.12% +0.00% +0.14% / -4.81% -5.03% -4.93%] index_select random : Elapsed 0.051 ms (5.059 ms / 100) good 5.105 -> 4.813 ( -5.72%) [ +0.16% +0.16% +0.00% / -5.54% -5.54% -5.72%] index_select random_sorted : Elapsed 0.051 ms (5.113 ms / 100) B = [500, 1, 200] (stride (1, 500, 500)) dim = 0 fill_cnt = 5 0.485 -> 0.486 ( +0.21%) [ +0.82% +0.21% +0.00% / +0.82% +0.21% +0.62%] index_fill_ const : Elapsed 0.005 ms (0.489 ms / 100) 0.486 -> 0.487 ( +0.21%) [ +0.62% +0.00% +0.00% / +0.62% +0.21% +1.03%] index_fill_ linear : Elapsed 0.005 ms (0.489 ms / 100) 0.486 -> 0.486 ( +0.00%) [ +0.62% +0.41% +0.00% / +0.62% +0.00% +0.00%] index_fill_ reverse : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +6.20% +1.24% +0.00% / +0.83% +0.83% +1.03%] index_fill_ skip64 : Elapsed 0.005 ms (0.514 ms / 100) 0.484 -> 0.487 ( +0.62%) [ +1.03% +0.00% +0.00% / +1.03% +0.62% +7.44%] index_fill_ skip256 : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +1.03% +0.00% +0.00% / +0.83% +0.83% +0.83%] index_fill_ spread : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +1.03% +0.00% +0.21% / +0.83% +0.83% +0.83%] index_fill_ strided 3 : Elapsed 0.005 ms (0.489 ms / 100) 0.486 -> 0.486 ( +0.00%) [ +0.62% +0.00% +7.00% / +10.49% +0.00% +0.21%] index_fill_ strided 5 : Elapsed 0.005 ms (0.489 ms / 100) 0.485 -> 0.486 ( +0.21%) [ +0.82% +0.41% +0.00% / +0.82% +0.21% +1.03%] index_fill_ strided 7 : Elapsed 0.005 ms (0.489 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +0.82% +0.00% +0.00% / +4.54% +0.62% +0.41%] index_fill_ strided 8 : Elapsed 0.005 ms (0.489 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +0.82% +0.21% +0.00% / +0.82% +0.41% +0.62%] index_fill_ strided 16 : Elapsed 0.005 ms (0.489 ms / 100) 0.483 -> 0.487 ( +0.83%) [ +1.24% +0.21% +0.00% / +1.04% +1.04% +0.83%] index_fill_ strided 64 : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.487 ( +0.62%) [ +1.03% +0.00% +0.00% / +1.03% +0.62% +1.03%] index_fill_ strided 100 : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.487 ( +0.62%) [ +1.03% +0.62% +0.00% / +1.03% +0.62% +0.83%] index_fill_ strided 255 : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +1.03% +0.21% +0.00% / +1.03% +2.69% +0.83%] index_fill_ strided 256 : Elapsed 0.005 ms (0.489 ms / 100) 0.483 -> 0.488 ( +1.04%) [ +0.83% +1.04% +0.00% / +1.04% +1.24% +1.24%] index_fill_ strided 257 : Elapsed 0.005 ms (0.487 ms / 100) 0.483 -> 0.487 ( +0.83%) [ +1.24% +0.21% +0.00% / +0.83% +1.24% +1.24%] index_fill_ random : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.487 ( +0.62%) [ +1.03% +0.00% +0.00% / +3.72% +0.62% +4.13%] index_fill_ random_sorted : Elapsed 0.005 ms (0.489 ms / 100) 0.484 -> 0.488 ( +0.83%) [ +0.83% +0.00% +0.00% / +1.03% +0.83% +0.83%] index_fill_ perm : Elapsed 0.005 ms (0.488 ms / 100) 0.485 -> 0.487 ( +0.41%) [ +1.03% +0.62% +0.00% / +0.82% +0.41% +0.41%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.490 ms / 100) out_shape = [5, 500, 200] in_shape = [5, 1, 200] idx_dim = 1 B = [5, 500, 200] (stride (100000, 1, 500)) A = [5, 1, 200] (stride (200, 1, 1)) dim = 1 0.511 -> 0.511 ( +0.00%) [ +6.07% +0.00% +1.17% / +0.00% +63.99% +17.03%] index_add_ linear : Elapsed 0.005 ms (0.542 ms / 100) 0.498 -> 0.495 ( -0.60%) [ +3.21% +1.41% +0.00% / +9.24% +58.23% -0.60%] index_copy_ linear : Elapsed 0.005 ms (0.514 ms / 100) 0.495 -> 0.513 ( +3.64%) [ +1.62% +3.43% +0.00% / +3.64% +59.39% +20.40%] index_add_ reverse : Elapsed 0.005 ms (0.503 ms / 100) 0.498 -> 0.509 ( +2.21%) [ +1.81% +3.82% +0.00% / +2.21% +57.63% +6.63%] index_copy_ reverse : Elapsed 0.005 ms (0.507 ms / 100) Bad 0.493 -> 0.548 (+11.16%) [ +1.22% +11.36% +0.00% / +11.16% +62.07% +20.89%] index_add_ spread : Elapsed 0.005 ms (0.499 ms / 100) 0.496 -> 0.501 ( +1.01%) [ +2.02% +1.41% +0.00% / +2.82% +66.33% +1.01%] index_copy_ spread : Elapsed 0.005 ms (0.506 ms / 100) 0.495 -> 0.491 ( -0.81%) [ +9.09% +10.71% +0.00% / +1.62% -0.81% +26.46%] index_add_ strided 3 : Elapsed 0.005 ms (0.540 ms / 100) 0.494 -> 0.485 ( -1.82%) [ +2.02% +3.44% +0.00% / +2.02% -1.82% +1.82%] index_copy_ strided 3 : Elapsed 0.005 ms (0.504 ms / 100) 0.494 -> 0.492 ( -0.40%) [ +1.01% +3.44% +0.00% / +41.30% -0.40% +19.84%] index_add_ strided 7 : Elapsed 0.005 ms (0.499 ms / 100) 0.501 -> 0.496 ( -1.00%) [ +0.80% +0.00% +5.39% / +15.37% -1.00% +6.59%] index_copy_ strided 7 : Elapsed 0.005 ms (0.505 ms / 100) 0.500 -> 0.491 ( -1.80%) [ +0.00% +2.80% +0.00% / +38.60% -1.80% +19.80%] index_add_ strided 257 : Elapsed 0.005 ms (0.500 ms / 100) 0.499 -> 0.484 ( -3.01%) [ +0.80% +4.21% +0.00% / +18.04% -3.01% +4.41%] index_copy_ strided 257 : Elapsed 0.005 ms (0.503 ms / 100) 0.498 -> 0.486 ( -2.41%) [ +9.04% +3.61% +0.00% / +38.96% -2.41% +20.08%] index_add_ perm : Elapsed 0.005 ms (0.543 ms / 100) 0.504 -> 0.479 ( -4.96%) [ +0.00% +0.20% +1.98% / +12.90% -4.96% -1.79%] index_copy_ perm : Elapsed 0.005 ms (0.504 ms / 100) 0.492 -> 0.497 ( +1.02%) [ +1.02% +4.07% +0.00% / +26.42% +1.02% +21.34%] index_add_ perm_sorted : Elapsed 0.005 ms (0.497 ms / 100) 0.490 -> 0.478 ( -2.45%) [ +4.29% +1.22% +0.00% / +2.86% -2.45% +4.08%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.511 ms / 100) Good 9.577 -> 7.944 (-17.05%) [ +0.00% +0.07% +0.02% / -17.05% -16.93% -16.85%] index_select const : Elapsed 0.096 ms (9.577 ms / 100) Good 9.534 -> 7.913 (-17.00%) [ +0.19% +0.03% +0.00% / -17.00% -16.82% -16.86%] index_select wrap : Elapsed 0.096 ms (9.552 ms / 100) Good 9.578 -> 7.915 (-17.36%) [ +0.51% +0.10% +0.00% / -16.72% -17.36% -17.24%] index_select linear : Elapsed 0.096 ms (9.627 ms / 100) Good 9.499 -> 7.926 (-16.56%) [ +0.00% +0.12% +0.04% / -16.56% -14.80% -15.02%] index_select reverse : Elapsed 0.095 ms (9.499 ms / 100) Good 9.576 -> 7.904 (-17.46%) [ +0.04% +0.00% +0.03% / -16.78% -17.46% -17.28%] index_select skip64 : Elapsed 0.096 ms (9.580 ms / 100) Good 9.520 -> 7.932 (-16.68%) [ +0.32% +0.37% +0.00% / -16.64% -16.68% -16.50%] index_select skip256 : Elapsed 0.096 ms (9.550 ms / 100) Good 9.480 -> 7.927 (-16.38%) [ +0.02% +0.00% +0.03% / -16.35% -16.38% -16.28%] index_select spread : Elapsed 0.095 ms (9.482 ms / 100) Good 9.546 -> 7.967 (-16.54%) [ +0.18% +0.00% +0.07% / -16.54% -15.90% -16.24%] index_select random : Elapsed 0.096 ms (9.563 ms / 100) Good 9.573 -> 7.895 (-17.53%) [ +0.00% +0.04% +0.10% / -16.58% -17.36% -17.53%] index_select random_sorted : Elapsed 0.096 ms (9.573 ms / 100) B = [5, 500, 200] (stride (1, 1000, 5)) dim = 1 fill_cnt = 1 0.465 -> 0.457 ( -1.72%) [ +3.23% +2.80% +0.00% / +28.39% -1.72% -0.43%] index_fill_ const : Elapsed 0.005 ms (0.480 ms / 100) 0.465 -> 0.463 ( -0.43%) [ +3.01% +0.43% +0.00% / +28.39% -0.43% +0.43%] index_fill_ linear : Elapsed 0.005 ms (0.479 ms / 100) 0.465 -> 0.462 ( -0.65%) [ +0.00% +1.51% +5.59% / +20.86% +13.98% -0.65%] index_fill_ reverse : Elapsed 0.005 ms (0.465 ms / 100) 0.458 -> 0.458 ( +0.00%) [ +1.09% +1.97% +0.00% / +31.00% +0.00% +2.62%] index_fill_ skip64 : Elapsed 0.005 ms (0.463 ms / 100) 0.468 -> 0.456 ( -2.56%) [ +8.12% +0.00% +5.13% / +25.64% -2.56% -1.07%] index_fill_ skip256 : Elapsed 0.005 ms (0.506 ms / 100) 0.460 -> 0.457 ( -0.65%) [ +0.43% +2.39% +0.00% / +22.61% -0.65% -0.22%] index_fill_ spread : Elapsed 0.005 ms (0.462 ms / 100) 0.467 -> 0.460 ( -1.50%) [ +2.78% +6.21% +0.00% / +27.62% -1.50% -0.21%] index_fill_ strided 3 : Elapsed 0.005 ms (0.480 ms / 100) 0.458 -> 0.457 ( -0.22%) [ +0.87% +2.18% +0.00% / +26.86% -0.22% +1.53%] index_fill_ strided 5 : Elapsed 0.005 ms (0.462 ms / 100) 0.462 -> 0.458 ( -0.87%) [ +1.08% +8.44% +0.00% / +29.00% +4.55% -0.87%] index_fill_ strided 7 : Elapsed 0.005 ms (0.467 ms / 100) bad 0.457 -> 0.485 ( +6.13%) [ +1.09% +10.07% +0.00% / +29.54% +6.13% +7.22%] index_fill_ strided 8 : Elapsed 0.005 ms (0.462 ms / 100) 0.474 -> 0.460 ( -2.95%) [ +7.59% +0.00% +3.59% / +24.05% -2.95% -0.21%] index_fill_ strided 16 : Elapsed 0.005 ms (0.510 ms / 100) 0.466 -> 0.454 ( -2.58%) [ +0.00% +7.51% +0.21% / +0.43% -2.58% +1.07%] index_fill_ strided 64 : Elapsed 0.005 ms (0.466 ms / 100) 0.460 -> 0.451 ( -1.96%) [+10.22% +2.17% +0.00% / +1.74% -1.96% +7.39%] index_fill_ strided 100 : Elapsed 0.005 ms (0.507 ms / 100) 0.462 -> 0.460 ( -0.43%) [ +0.00% +0.65% +0.00% / +1.52% +0.22% -0.43%] index_fill_ strided 255 : Elapsed 0.005 ms (0.462 ms / 100) 0.463 -> 0.480 ( +3.67%) [ +0.00% +8.21% +6.70% / +9.72% +3.67% +6.26%] index_fill_ strided 256 : Elapsed 0.005 ms (0.463 ms / 100) 0.460 -> 0.456 ( -0.87%) [ +0.43% +0.43% +0.00% / +10.87% -0.87% +3.26%] index_fill_ strided 257 : Elapsed 0.005 ms (0.462 ms / 100) 0.461 -> 0.467 ( +1.30%) [ +0.00% +2.17% +0.43% / +1.95% +1.30% +1.30%] index_fill_ random : Elapsed 0.005 ms (0.461 ms / 100) good 0.501 -> 0.455 ( -9.18%) [ +3.59% +2.59% +0.00% / +2.40% -9.18% -2.00%] index_fill_ random_sorted : Elapsed 0.005 ms (0.519 ms / 100) 0.458 -> 0.459 ( +0.22%) [ +1.75% +2.62% +0.00% / +2.18% +5.68% +0.22%] index_fill_ perm : Elapsed 0.005 ms (0.466 ms / 100) 0.467 -> 0.468 ( +0.21%) [ +0.00% +0.43% +4.71% / +0.43% +1.07% +0.21%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.467 ms / 100) B = [5, 500, 200] (stride (1, 1000, 5)) A = [5, 1, 200] (stride (1, 5, 5)) dim = 1 0.497 -> 0.485 ( -2.41%) [ +1.21% +2.21% +0.00% / +2.01% -2.41% +0.60%] index_add_ linear : Elapsed 0.005 ms (0.503 ms / 100) 0.499 -> 0.484 ( -3.01%) [ +1.20% +0.00% +0.40% / +0.60% -3.01% +0.00%] index_copy_ linear : Elapsed 0.005 ms (0.505 ms / 100) 0.493 -> 0.485 ( -1.62%) [ +1.62% +2.84% +0.00% / +2.03% -1.62% -0.20%] index_add_ reverse : Elapsed 0.005 ms (0.501 ms / 100) 0.502 -> 0.484 ( -3.59%) [ +0.80% +0.00% +0.80% / +4.78% -3.59% -1.59%] index_copy_ reverse : Elapsed 0.005 ms (0.506 ms / 100) 0.496 -> 0.485 ( -2.22%) [ +1.61% +2.62% +0.00% / +1.21% -2.22% +0.60%] index_add_ spread : Elapsed 0.005 ms (0.504 ms / 100) 0.497 -> 0.486 ( -2.21%) [ +1.01% +1.21% +0.00% / +5.63% -2.21% +0.60%] index_copy_ spread : Elapsed 0.005 ms (0.502 ms / 100) 0.494 -> 0.488 ( -1.21%) [ +2.63% +3.24% +0.00% / +1.01% -1.21% -0.40%] index_add_ strided 3 : Elapsed 0.005 ms (0.507 ms / 100) 0.496 -> 0.490 ( -1.21%) [ +1.81% +2.22% +0.00% / +7.26% +4.64% -1.21%] index_copy_ strided 3 : Elapsed 0.005 ms (0.505 ms / 100) 0.493 -> 0.483 ( -2.03%) [ +1.62% +4.26% +0.00% / +2.23% -2.03% +1.01%] index_add_ strided 7 : Elapsed 0.005 ms (0.501 ms / 100) 0.501 -> 0.495 ( -1.20%) [ +7.58% +1.40% +0.00% / +5.59% +2.40% -1.20%] index_copy_ strided 7 : Elapsed 0.005 ms (0.539 ms / 100) good 0.512 -> 0.484 ( -5.47%) [ +0.00% +5.47% +2.34% / -1.95% -5.47% -2.73%] index_add_ strided 257 : Elapsed 0.005 ms (0.512 ms / 100) 0.501 -> 0.491 ( -2.00%) [ +7.58% +8.38% +0.00% / +5.39% +2.79% -2.00%] index_copy_ strided 257 : Elapsed 0.005 ms (0.539 ms / 100) 0.496 -> 0.491 ( -1.01%) [ +2.62% +2.82% +0.00% / +0.81% -1.01% +0.00%] index_add_ perm : Elapsed 0.005 ms (0.509 ms / 100) 0.499 -> 0.478 ( -4.21%) [ +3.81% +0.20% +0.00% / +7.82% -4.21% +4.81%] index_copy_ perm : Elapsed 0.005 ms (0.518 ms / 100) 0.504 -> 0.487 ( -3.37%) [ +0.00% +2.18% +2.58% / +0.00% -3.37% -1.19%] index_add_ perm_sorted : Elapsed 0.005 ms (0.504 ms / 100) 0.501 -> 0.482 ( -3.79%) [ +4.99% +0.40% +0.00% / -0.80% -3.79% +5.39%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.526 ms / 100) 9.000 -> 8.795 ( -2.28%) [ +0.00% +1.08% +0.36% / +0.96% -2.28% -1.72%] index_select const : Elapsed 0.090 ms (9.000 ms / 100) 8.839 -> 8.745 ( -1.06%) [ +0.60% +0.00% +0.52% / +0.41% -1.06% -0.66%] index_select wrap : Elapsed 0.089 ms (8.892 ms / 100) 8.975 -> 8.786 ( -2.11%) [ +0.39% +0.14% +0.00% / -0.68% -1.96% -2.11%] index_select linear : Elapsed 0.090 ms (9.010 ms / 100) 8.759 -> 8.718 ( -0.47%) [ +0.00% +1.56% +0.48% / +0.39% -0.47% -0.46%] index_select reverse : Elapsed 0.088 ms (8.759 ms / 100) 8.840 -> 8.695 ( -1.64%) [ +0.69% +0.25% +0.00% / +0.63% -1.49% -1.64%] index_select skip64 : Elapsed 0.089 ms (8.901 ms / 100) 8.868 -> 8.737 ( -1.48%) [ +1.33% +0.28% +0.00% / +0.21% -0.65% -1.48%] index_select skip256 : Elapsed 0.090 ms (8.986 ms / 100) 8.934 -> 8.770 ( -1.84%) [ +0.93% +0.00% +0.96% / -0.18% -1.34% -1.84%] index_select spread : Elapsed 0.090 ms (9.017 ms / 100) 8.816 -> 8.783 ( -0.37%) [ +0.54% +0.48% +0.00% / +0.64% -0.25% -0.37%] index_select random : Elapsed 0.089 ms (8.864 ms / 100) 8.878 -> 8.710 ( -1.89%) [ +1.04% +0.63% +0.00% / -0.36% -1.77% -1.89%] index_select random_sorted : Elapsed 0.090 ms (8.970 ms / 100) out_shape = [5, 1, 500] in_shape = [5, 1, 200] idx_dim = 2 B = [5, 1, 500] (stride (500, 500, 1)) dim = 2 fill_cnt = 200 0.435 -> 0.442 ( +1.61%) [ +2.53% +2.76% +0.00% / +3.91% +1.61% +8.05%] index_fill_ const : Elapsed 0.004 ms (0.446 ms / 100) 0.436 -> 0.441 ( +1.15%) [ +3.21% +2.29% +0.00% / +1.15% +1.38% +2.06%] index_fill_ linear : Elapsed 0.004 ms (0.450 ms / 100) 0.439 -> 0.437 ( -0.46%) [ +0.46% +1.14% +0.00% / +1.37% -0.46% +1.59%] index_fill_ reverse : Elapsed 0.004 ms (0.441 ms / 100) 0.433 -> 0.438 ( +1.15%) [ +0.00% +3.23% +8.08% / +2.54% +1.15% +2.08%] index_fill_ skip64 : Elapsed 0.004 ms (0.433 ms / 100) 0.436 -> 0.443 ( +1.61%) [ +0.00% +2.29% +15.83% / +1.61% +2.06% +2.29%] index_fill_ skip256 : Elapsed 0.004 ms (0.436 ms / 100) 0.437 -> 0.437 ( +0.00%) [ +0.00% +2.52% +1.60% / +0.00% +0.00% +1.37%] index_fill_ spread : Elapsed 0.004 ms (0.437 ms / 100) 0.437 -> 0.437 ( +0.00%) [ +0.00% +2.06% +0.46% / +0.00% +0.46% +9.38%] index_fill_ strided 3 : Elapsed 0.004 ms (0.437 ms / 100) 0.435 -> 0.436 ( +0.23%) [ +0.00% +3.45% +0.92% / +0.92% +0.23% +9.20%] index_fill_ strided 5 : Elapsed 0.004 ms (0.435 ms / 100) 0.435 -> 0.440 ( +1.15%) [ +0.00% +1.84% +1.15% / +2.99% +1.15% +17.93%] index_fill_ strided 7 : Elapsed 0.004 ms (0.435 ms / 100) 0.440 -> 0.436 ( -0.91%) [ +0.00% +2.27% +3.86% / +0.91% -0.91% +3.41%] index_fill_ strided 8 : Elapsed 0.004 ms (0.440 ms / 100) 0.437 -> 0.437 ( +0.00%) [ +0.23% +1.83% +0.00% / +0.92% +0.00% +1.60%] index_fill_ strided 16 : Elapsed 0.004 ms (0.438 ms / 100) 0.437 -> 0.433 ( -0.92%) [ +1.14% +2.06% +0.00% / +0.00% -0.92% +2.52%] index_fill_ strided 64 : Elapsed 0.004 ms (0.442 ms / 100) 0.437 -> 0.437 ( +0.00%) [ +4.81% +1.60% +0.00% / +0.00% +8.92% +1.37%] index_fill_ strided 100 : Elapsed 0.005 ms (0.458 ms / 100) 0.435 -> 0.437 ( +0.46%) [ +1.38% +1.84% +0.00% / +1.38% +0.46% +1.84%] index_fill_ strided 255 : Elapsed 0.004 ms (0.441 ms / 100) 0.436 -> 0.434 ( -0.46%) [ +0.92% +1.38% +0.00% / +1.15% -0.46% +1.38%] index_fill_ strided 256 : Elapsed 0.004 ms (0.440 ms / 100) 0.438 -> 0.447 ( +2.05%) [ +0.00% +1.14% +0.23% / +2.28% +2.05% +2.28%] index_fill_ strided 257 : Elapsed 0.004 ms (0.438 ms / 100) 0.434 -> 0.434 ( +0.00%) [ +3.46% +1.84% +0.00% / +7.14% +0.00% +9.45%] index_fill_ random : Elapsed 0.004 ms (0.449 ms / 100) 0.436 -> 0.434 ( -0.46%) [ +2.06% +2.29% +0.00% / -0.46% +0.46% +7.11%] index_fill_ random_sorted : Elapsed 0.004 ms (0.445 ms / 100) 0.438 -> 0.436 ( -0.46%) [ +7.08% +1.14% +0.00% / +1.37% -0.46% +0.23%] index_fill_ perm : Elapsed 0.005 ms (0.469 ms / 100) 0.438 -> 0.435 ( -0.68%) [+12.33% +1.14% +0.00% / -0.46% -0.68% +1.14%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.492 ms / 100) B = [5, 1, 500] (stride (500, 2500, 1)) A = [5, 1, 200] (stride (200, 1, 1)) dim = 2 0.473 -> 0.473 ( +0.00%) [ +0.00% +0.85% +1.69% / +7.40% +0.00% +0.00%] index_add_ linear : Elapsed 0.005 ms (0.473 ms / 100) 0.468 -> 0.463 ( -1.07%) [ +1.07% +6.20% +0.00% / +13.68% -1.07% +2.99%] index_copy_ linear : Elapsed 0.005 ms (0.473 ms / 100) 0.471 -> 0.469 ( -0.42%) [ +0.00% +0.00% +0.00% / -0.21% +0.64% -0.42%] index_add_ reverse : Elapsed 0.005 ms (0.471 ms / 100) 0.467 -> 0.466 ( -0.21%) [ +0.86% +0.00% +6.21% / +0.64% -0.21% +0.64%] index_copy_ reverse : Elapsed 0.005 ms (0.471 ms / 100) 0.467 -> 0.470 ( +0.64%) [ +1.07% +1.28% +0.00% / +1.28% +0.64% +1.93%] index_add_ spread : Elapsed 0.005 ms (0.472 ms / 100) 0.467 -> 0.461 ( -1.28%) [ +1.71% +1.71% +0.00% / +3.21% -1.28% +1.93%] index_copy_ spread : Elapsed 0.005 ms (0.475 ms / 100) good 0.501 -> 0.471 ( -5.99%) [ +0.00% +2.40% +0.60% / -4.59% -5.19% -5.99%] index_add_ strided 3 : Elapsed 0.005 ms (0.501 ms / 100) 0.471 -> 0.465 ( -1.27%) [ +0.00% +7.43% +7.43% / +1.27% -1.27% +0.64%] index_copy_ strided 3 : Elapsed 0.005 ms (0.471 ms / 100) 0.471 -> 0.471 ( +0.00%) [ +0.00% +14.86% +0.21% / +0.00% +0.21% +0.85%] index_add_ strided 7 : Elapsed 0.005 ms (0.471 ms / 100) 0.467 -> 0.465 ( -0.43%) [ +0.86% +7.92% +0.00% / +1.50% -0.43% +1.71%] index_copy_ strided 7 : Elapsed 0.005 ms (0.471 ms / 100) 0.467 -> 0.469 ( +0.43%) [ +0.00% +1.28% +0.21% / +0.43% +2.14% +0.86%] index_add_ strided 257 : Elapsed 0.005 ms (0.467 ms / 100) 0.464 -> 0.465 ( +0.22%) [ +1.08% +3.45% +0.00% / +3.66% +0.22% +1.51%] index_copy_ strided 257 : Elapsed 0.005 ms (0.469 ms / 100) 0.465 -> 0.472 ( +1.51%) [ +0.00% +2.80% +1.08% / +1.72% +1.51% +1.72%] index_add_ perm : Elapsed 0.005 ms (0.465 ms / 100) 0.463 -> 0.466 ( +0.65%) [ +1.08% +3.46% +0.00% / +3.46% +0.65% +1.94%] index_copy_ perm : Elapsed 0.005 ms (0.468 ms / 100) 0.468 -> 0.469 ( +0.21%) [ +1.07% +2.35% +0.00% / +6.62% +1.71% +0.21%] index_add_ perm_sorted : Elapsed 0.005 ms (0.473 ms / 100) 0.462 -> 0.464 ( +0.43%) [ +1.73% +2.81% +0.00% / +1.30% +0.43% +1.30%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.470 ms / 100) 0.526 -> 0.527 ( +0.19%) [ +3.23% +1.33% +0.00% / +2.28% +0.19% +2.66%] index_select const : Elapsed 0.005 ms (0.543 ms / 100) 0.526 -> 0.530 ( +0.76%) [ +2.28% +1.14% +0.00% / +1.71% +0.76% +2.28%] index_select wrap : Elapsed 0.005 ms (0.538 ms / 100) 0.525 -> 0.524 ( -0.19%) [ +4.00% +1.52% +0.00% / +2.29% -0.19% +9.33%] index_select linear : Elapsed 0.005 ms (0.546 ms / 100) 0.523 -> 0.529 ( +1.15%) [ +3.06% +2.10% +0.00% / +2.10% +1.15% +3.06%] index_select reverse : Elapsed 0.005 ms (0.539 ms / 100) 0.515 -> 0.522 ( +1.36%) [ +6.02% +3.11% +0.00% / +3.88% +1.36% +5.44%] index_select skip64 : Elapsed 0.005 ms (0.546 ms / 100) 0.519 -> 0.525 ( +1.16%) [ +4.05% +2.31% +0.00% / +6.17% +1.16% +4.82%] index_select skip256 : Elapsed 0.005 ms (0.540 ms / 100) 0.520 -> 0.522 ( +0.38%) [ +5.77% +1.92% +0.00% / +15.77% +0.38% +4.23%] index_select spread : Elapsed 0.005 ms (0.550 ms / 100) 0.519 -> 0.527 ( +1.54%) [ +5.39% +2.12% +0.00% / +7.71% +1.54% +3.66%] index_select strided 3 : Elapsed 0.005 ms (0.547 ms / 100) 0.515 -> 0.527 ( +2.33%) [ +5.05% +3.50% +0.00% / +6.41% +2.33% +4.85%] index_select strided 5 : Elapsed 0.005 ms (0.541 ms / 100) 0.532 -> 0.531 ( -0.19%) [ +2.26% +0.00% +1.50% / +0.00% -0.19% +1.50%] index_select strided 7 : Elapsed 0.005 ms (0.544 ms / 100) 0.532 -> 0.529 ( -0.56%) [ +3.57% +0.00% +5.26% / +0.75% -0.56% +0.94%] index_select strided 8 : Elapsed 0.006 ms (0.551 ms / 100) 0.543 -> 0.532 ( -2.03%) [ +0.00% +2.39% +7.92% / -0.37% -2.03% +0.00%] index_select strided 16 : Elapsed 0.005 ms (0.543 ms / 100) 0.520 -> 0.529 ( +1.73%) [ +4.04% +7.88% +0.00% / +4.62% +1.73% +4.42%] index_select strided 64 : Elapsed 0.005 ms (0.541 ms / 100) 0.525 -> 0.534 ( +1.71%) [ +3.24% +9.71% +0.00% / +2.67% +1.71% +4.38%] index_select strided 100 : Elapsed 0.005 ms (0.542 ms / 100) 0.519 -> 0.528 ( +1.73%) [ +4.62% +2.31% +0.00% / +3.08% +1.73% +4.24%] index_select random : Elapsed 0.005 ms (0.543 ms / 100) 0.519 -> 0.529 ( +1.93%) [ +3.85% +2.50% +0.00% / +3.08% +1.93% +4.82%] index_select random_sorted : Elapsed 0.005 ms (0.539 ms / 100) B = [5, 1, 500] (stride (1, 2500, 5)) A = [5, 1, 200] (stride (200, 1, 1)) dim = 2 0.467 -> 0.468 ( +0.21%) [ +0.00% +1.93% +0.00% / +1.71% +0.21% +0.21%] index_add_ linear : Elapsed 0.005 ms (0.467 ms / 100) 0.467 -> 0.468 ( +0.21%) [ +0.00% +2.57% +0.64% / +2.78% +0.21% +1.28%] index_copy_ linear : Elapsed 0.005 ms (0.467 ms / 100) 0.468 -> 0.471 ( +0.64%) [ +0.21% +1.71% +0.00% / +0.85% +0.64% +1.07%] index_add_ reverse : Elapsed 0.005 ms (0.469 ms / 100) 0.467 -> 0.468 ( +0.21%) [ +1.93% +1.28% +0.00% / +2.78% +0.21% +6.85%] index_copy_ reverse : Elapsed 0.005 ms (0.476 ms / 100) 0.466 -> 0.468 ( +0.43%) [ +0.00% +2.58% +0.64% / +1.72% +0.43% +15.45%] index_add_ spread : Elapsed 0.005 ms (0.466 ms / 100) 0.466 -> 0.470 ( +0.86%) [ +0.64% +4.29% +0.00% / +3.43% +0.86% +0.86%] index_copy_ spread : Elapsed 0.005 ms (0.469 ms / 100) 0.465 -> 0.475 ( +2.15%) [ +5.38% +3.01% +0.00% / +3.44% +2.58% +2.15%] index_add_ strided 3 : Elapsed 0.005 ms (0.490 ms / 100) 0.468 -> 0.465 ( -0.64%) [ +1.28% +2.14% +0.00% / +7.69% -0.64% +5.56%] index_copy_ strided 3 : Elapsed 0.005 ms (0.474 ms / 100) 0.467 -> 0.468 ( +0.21%) [ +0.00% +2.78% +0.21% / +9.64% +0.21% +1.50%] index_add_ strided 7 : Elapsed 0.005 ms (0.467 ms / 100) 0.464 -> 0.468 ( +0.86%) [ +3.23% +2.80% +0.00% / +10.78% +0.86% +2.80%] index_copy_ strided 7 : Elapsed 0.005 ms (0.479 ms / 100) 0.466 -> 0.466 ( +0.00%) [ +1.07% +2.36% +0.00% / +13.73% +2.58% +0.00%] index_add_ strided 257 : Elapsed 0.005 ms (0.471 ms / 100) 0.461 -> 0.462 ( +0.22%) [ +2.17% +4.56% +0.00% / +8.89% +0.22% +4.56%] index_copy_ strided 257 : Elapsed 0.005 ms (0.471 ms / 100) 0.478 -> 0.469 ( -1.88%) [ +6.07% +0.21% +0.00% / -0.63% -1.88% +2.30%] index_add_ perm : Elapsed 0.005 ms (0.507 ms / 100) 0.465 -> 0.468 ( +0.65%) [ +0.65% +2.15% +0.00% / +2.80% +0.65% +2.80%] index_copy_ perm : Elapsed 0.005 ms (0.468 ms / 100) 0.470 -> 0.469 ( -0.21%) [ +0.00% +1.06% +6.38% / +2.13% -0.21% +0.64%] index_add_ perm_sorted : Elapsed 0.005 ms (0.470 ms / 100) 0.478 -> 0.466 ( -2.51%) [ +3.14% +0.00% +16.74% / -0.42% -2.51% -1.05%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.493 ms / 100) 0.522 -> 0.524 ( +0.38%) [ +3.45% +2.49% +0.00% / +4.41% +0.38% +3.45%] index_select const : Elapsed 0.005 ms (0.540 ms / 100) bad 0.527 -> 0.572 ( +8.54%) [ +2.09% +0.19% +0.00% / +10.06% +13.09% +8.54%] index_select wrap : Elapsed 0.005 ms (0.538 ms / 100) 0.523 -> 0.526 ( +0.57%) [+12.81% +1.34% +0.00% / +11.47% +0.57% +9.75%] index_select linear : Elapsed 0.006 ms (0.590 ms / 100) 0.521 -> 0.529 ( +1.54%) [ +3.26% +5.18% +0.00% / +4.03% +1.54% +3.84%] index_select reverse : Elapsed 0.005 ms (0.538 ms / 100) 0.521 -> 0.543 ( +4.22%) [ +2.50% +2.11% +0.00% / +4.22% +7.49% +4.22%] index_select skip64 : Elapsed 0.005 ms (0.534 ms / 100) 0.521 -> 0.529 ( +1.54%) [ +2.88% +1.54% +0.00% / +4.41% +1.54% +3.26%] index_select skip256 : Elapsed 0.005 ms (0.536 ms / 100) 0.520 -> 0.527 ( +1.35%) [ +5.77% +1.54% +0.00% / +3.65% +1.35% +10.58%] index_select spread : Elapsed 0.005 ms (0.550 ms / 100) 0.512 -> 0.523 ( +2.15%) [ +5.47% +3.52% +0.00% / +6.64% +2.15% +19.53%] index_select strided 3 : Elapsed 0.005 ms (0.540 ms / 100) 0.523 -> 0.528 ( +0.96%) [ +2.87% +0.96% +0.00% / +3.63% +0.96% +5.35%] index_select strided 5 : Elapsed 0.005 ms (0.538 ms / 100) 0.521 -> 0.526 ( +0.96%) [ +3.65% +0.77% +0.00% / +4.22% +0.96% +2.69%] index_select strided 7 : Elapsed 0.005 ms (0.540 ms / 100) 0.517 -> 0.526 ( +1.74%) [ +5.61% +2.13% +0.00% / +11.80% +1.74% +4.26%] index_select strided 8 : Elapsed 0.005 ms (0.546 ms / 100) 0.517 -> 0.522 ( +0.97%) [ +4.06% +2.51% +0.00% / +22.82% +0.97% +2.71%] index_select strided 16 : Elapsed 0.005 ms (0.538 ms / 100) 0.522 -> 0.528 ( +1.15%) [ +4.02% +1.34% +0.00% / +3.64% +1.15% +4.60%] index_select strided 64 : Elapsed 0.005 ms (0.543 ms / 100) 0.520 -> 0.523 ( +0.58%) [ +3.65% +1.54% +0.00% / +4.81% +0.58% +5.96%] index_select strided 100 : Elapsed 0.005 ms (0.539 ms / 100) 0.516 -> 0.528 ( +2.33%) [ +4.84% +1.94% +0.00% / +10.85% +2.33% +3.88%] index_select random : Elapsed 0.005 ms (0.541 ms / 100) 0.532 -> 0.532 ( +0.00%) [ +7.33% +0.00% +4.89% / +19.74% +0.00% +0.75%] index_select random_sorted : Elapsed 0.006 ms (0.571 ms / 100) B = [5, 1, 500] (stride (1, 2500, 5)) A = [5, 1, 200] (stride (1, 1000, 5)) dim = 2 0.467 -> 0.472 ( +1.07%) [ +1.28% +1.28% +0.00% / +1.07% +3.85% +1.07%] index_add_ linear : Elapsed 0.005 ms (0.473 ms / 100) 0.474 -> 0.464 ( -2.11%) [ +0.21% +0.42% +0.00% / +0.21% -2.11% -0.42%] index_copy_ linear : Elapsed 0.005 ms (0.475 ms / 100) 0.475 -> 0.462 ( -2.74%) [ +1.47% +0.00% +15.79% / -1.26% -2.74% -0.42%] index_add_ reverse : Elapsed 0.005 ms (0.482 ms / 100) 0.463 -> 0.459 ( -0.86%) [ +2.16% +2.38% +0.00% / +2.38% -0.86% +2.81%] index_copy_ reverse : Elapsed 0.005 ms (0.473 ms / 100) 0.468 -> 0.467 ( -0.21%) [ +1.07% +4.06% +0.00% / +0.43% -0.21% +0.85%] index_add_ spread : Elapsed 0.005 ms (0.473 ms / 100) 0.471 -> 0.466 ( -1.06%) [ +0.21% +0.00% +1.06% / +0.85% -1.06% +1.06%] index_copy_ spread : Elapsed 0.005 ms (0.472 ms / 100) 0.468 -> 0.475 ( +1.50%) [ +0.00% +1.71% +2.56% / +6.41% +1.50% +4.49%] index_add_ strided 3 : Elapsed 0.005 ms (0.468 ms / 100) 0.468 -> 0.471 ( +0.64%) [ +0.64% +1.07% +0.00% / +0.64% +19.23% +2.99%] index_copy_ strided 3 : Elapsed 0.005 ms (0.471 ms / 100) 0.464 -> 0.469 ( +1.08%) [ +0.00% +3.23% +2.37% / +1.51% +1.08% +3.66%] index_add_ strided 7 : Elapsed 0.005 ms (0.464 ms / 100) 0.468 -> 0.468 ( +0.00%) [ +0.43% +1.28% +0.00% / +1.07% +0.00% +6.84%] index_copy_ strided 7 : Elapsed 0.005 ms (0.470 ms / 100) 0.467 -> 0.467 ( +0.00%) [ +0.43% +1.28% +0.00% / +0.21% +0.00% +9.64%] index_add_ strided 257 : Elapsed 0.005 ms (0.469 ms / 100) 0.464 -> 0.470 ( +1.29%) [ +1.94% +2.80% +0.00% / +1.29% +1.29% +6.47%] index_copy_ strided 257 : Elapsed 0.005 ms (0.473 ms / 100) 0.468 -> 0.467 ( -0.21%) [ +0.00% +1.28% +1.28% / -0.21% +0.00% +0.85%] index_add_ perm : Elapsed 0.005 ms (0.468 ms / 100) 0.470 -> 0.463 ( -1.49%) [ +1.06% +1.28% +0.00% / +1.91% -1.49% +1.28%] index_copy_ perm : Elapsed 0.005 ms (0.475 ms / 100) 0.464 -> 0.468 ( +0.86%) [ +0.00% +2.37% +0.86% / +7.76% +0.86% +1.72%] index_add_ perm_sorted : Elapsed 0.005 ms (0.464 ms / 100) 0.469 -> 0.466 ( -0.64%) [ +5.76% +0.85% +0.00% / +18.55% -0.64% +0.85%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.496 ms / 100) 0.524 -> 0.533 ( +1.72%) [ +3.82% +0.19% +0.00% / +1.72% +2.67% +2.29%] index_select const : Elapsed 0.005 ms (0.544 ms / 100) 0.525 -> 0.532 ( +1.33%) [+10.29% +0.57% +0.00% / +2.86% +9.71% +1.33%] index_select wrap : Elapsed 0.006 ms (0.579 ms / 100) 0.521 -> 0.528 ( +1.34%) [ +4.41% +1.92% +0.00% / +4.61% +1.34% +2.30%] index_select linear : Elapsed 0.005 ms (0.544 ms / 100) 0.541 -> 0.531 ( -1.85%) [ +0.74% +0.00% +3.70% / -0.55% -1.85% +0.00%] index_select reverse : Elapsed 0.005 ms (0.545 ms / 100) 0.543 -> 0.537 ( -1.10%) [ +0.92% +0.92% +0.00% / -0.74% -1.10% -1.10%] index_select skip64 : Elapsed 0.005 ms (0.548 ms / 100) 0.522 -> 0.529 ( +1.34%) [ +4.79% +1.92% +0.00% / +3.64% +1.34% +2.30%] index_select skip256 : Elapsed 0.005 ms (0.547 ms / 100) 0.528 -> 0.532 ( +0.76%) [ +2.46% +1.52% +0.00% / +3.22% +0.76% +1.33%] index_select spread : Elapsed 0.005 ms (0.541 ms / 100) 0.519 -> 0.527 ( +1.54%) [ +9.25% +3.08% +0.00% / +10.79% +1.54% +2.70%] index_select strided 3 : Elapsed 0.006 ms (0.567 ms / 100) 0.519 -> 0.530 ( +2.12%) [ +4.82% +2.70% +0.00% / +3.85% +2.12% +3.47%] index_select strided 5 : Elapsed 0.005 ms (0.544 ms / 100) 0.526 -> 0.531 ( +0.95%) [ +3.61% +1.52% +0.00% / +1.52% +7.22% +0.95%] index_select strided 7 : Elapsed 0.005 ms (0.545 ms / 100) 0.521 -> 0.531 ( +1.92%) [+10.75% +1.92% +0.00% / +4.41% +1.92% +2.11%] index_select strided 8 : Elapsed 0.006 ms (0.577 ms / 100) 0.523 -> 0.534 ( +2.10%) [ +3.44% +2.10% +0.00% / +2.10% +2.10% +9.56%] index_select strided 16 : Elapsed 0.005 ms (0.541 ms / 100) 0.522 -> 0.534 ( +2.30%) [ +4.02% +1.34% +0.00% / +2.30% +2.87% +10.34%] index_select strided 64 : Elapsed 0.005 ms (0.543 ms / 100) 0.521 -> 0.530 ( +1.73%) [ +4.80% +5.18% +0.00% / +6.53% +1.73% +2.69%] index_select strided 100 : Elapsed 0.005 ms (0.546 ms / 100) 0.522 -> 0.528 ( +1.15%) [ +9.00% +16.86% +0.00% / +11.49% +1.15% +2.11%] index_select random : Elapsed 0.006 ms (0.569 ms / 100) 0.520 -> 0.530 ( +1.92%) [ +3.65% +2.12% +0.00% / +14.23% +1.92% +2.31%] index_select random_sorted : Elapsed 0.005 ms (0.539 ms / 100) B = [5, 1, 500] (stride (1, 2500, 5)) A = [5, 1, 200] (stride (1, 1, 5)) dim = 2 0.562 -> 0.556 ( -1.07%) [ +0.89% +0.00% +0.00% / -1.07% -0.89% -1.07%] index_add_ linear : Elapsed 0.006 ms (0.567 ms / 100) 0.558 -> 0.551 ( -1.25%) [ +0.90% +0.18% +0.00% / -1.25% -0.90% -1.25%] index_copy_ linear : Elapsed 0.006 ms (0.563 ms / 100) 0.561 -> 0.556 ( -0.89%) [ +1.25% +0.18% +0.00% / -0.89% -0.89% -0.89%] index_add_ reverse : Elapsed 0.006 ms (0.568 ms / 100) 0.558 -> 0.551 ( -1.25%) [ +2.51% +0.00% +0.00% / -1.08% -1.25% -1.08%] index_copy_ reverse : Elapsed 0.006 ms (0.572 ms / 100) 0.562 -> 0.556 ( -1.07%) [ +0.89% +0.00% +0.00% / -1.07% -1.07% -1.07%] index_add_ spread : Elapsed 0.006 ms (0.567 ms / 100) 0.558 -> 0.551 ( -1.25%) [ +0.54% +0.18% +0.00% / +1.97% -1.08% -1.25%] index_copy_ spread : Elapsed 0.006 ms (0.561 ms / 100) 0.562 -> 0.556 ( -1.07%) [ +0.89% +0.18% +0.00% / -1.07% -0.89% -0.89%] index_add_ strided 3 : Elapsed 0.006 ms (0.567 ms / 100) 0.557 -> 0.551 ( -1.08%) [ +0.72% +0.36% +0.00% / -1.08% -1.08% -1.08%] index_copy_ strided 3 : Elapsed 0.006 ms (0.561 ms / 100) 0.562 -> 0.556 ( -1.07%) [ +1.07% +0.00% +0.00% / -0.89% -1.07% -1.07%] index_add_ strided 7 : Elapsed 0.006 ms (0.568 ms / 100) 0.558 -> 0.551 ( -1.25%) [ +0.54% +0.00% +0.00% / -1.25% -1.25% -1.25%] index_copy_ strided 7 : Elapsed 0.006 ms (0.561 ms / 100) 0.562 -> 0.556 ( -1.07%) [ +1.07% +0.00% +0.00% / -1.07% -0.71% -1.07%] index_add_ strided 257 : Elapsed 0.006 ms (0.568 ms / 100) 0.557 -> 0.551 ( -1.08%) [ +1.08% +0.54% +0.00% / -1.08% -0.90% -1.08%] index_copy_ strided 257 : Elapsed 0.006 ms (0.563 ms / 100) 0.562 -> 0.556 ( -1.07%) [ +1.07% +0.00% +0.00% / -0.89% -0.89% -1.07%] index_add_ perm : Elapsed 0.006 ms (0.568 ms / 100) 0.558 -> 0.551 ( -1.25%) [ +0.72% +0.00% +0.00% / -1.25% -1.08% -1.25%] index_copy_ perm : Elapsed 0.006 ms (0.562 ms / 100) 0.561 -> 0.556 ( -0.89%) [ +1.25% +0.00% +0.36% / -0.89% -0.71% -0.71%] index_add_ perm_sorted : Elapsed 0.006 ms (0.568 ms / 100) 0.558 -> 0.550 ( -1.43%) [ +0.54% +0.00% +0.18% / -1.43% -1.08% -1.08%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.561 ms / 100) 0.808 -> 0.813 ( +0.62%) [ +0.12% +0.12% +0.00% / +0.62% +5.82% +5.57%] index_select const : Elapsed 0.008 ms (0.809 ms / 100) 0.809 -> 0.812 ( +0.37%) [ +0.25% +0.00% +0.25% / +0.37% +4.33% +4.70%] index_select wrap : Elapsed 0.008 ms (0.811 ms / 100) 0.839 -> 0.829 ( -1.19%) [ +0.72% +0.00% +0.00% / +0.24% -1.19% -1.07%] index_select linear : Elapsed 0.008 ms (0.845 ms / 100) 0.808 -> 0.810 ( +0.25%) [ +0.50% +0.00% +0.37% / +0.25% +2.48% +2.23%] index_select reverse : Elapsed 0.008 ms (0.812 ms / 100) good 0.880 -> 0.817 ( -7.16%) [ +0.57% +0.00% +0.68% / +0.57% -7.16% -6.82%] index_select skip64 : Elapsed 0.009 ms (0.885 ms / 100) 0.828 -> 0.821 ( -0.85%) [ +0.48% +0.00% +0.48% / +0.48% -0.36% -0.85%] index_select skip256 : Elapsed 0.008 ms (0.832 ms / 100) 0.832 -> 0.828 ( -0.48%) [ +0.00% +0.12% +0.24% / +0.24% -0.48% -0.36%] index_select spread : Elapsed 0.008 ms (0.832 ms / 100) good 0.882 -> 0.819 ( -7.14%) [ +0.23% +0.00% +0.00% / +0.23% -6.92% -7.14%] index_select strided 3 : Elapsed 0.009 ms (0.884 ms / 100) 0.825 -> 0.819 ( -0.73%) [ +0.85% +0.00% +0.24% / +0.48% -0.48% -0.73%] index_select strided 5 : Elapsed 0.008 ms (0.832 ms / 100) 0.808 -> 0.813 ( +0.62%) [ +0.37% +0.12% +0.00% / +0.62% +1.24% +1.36%] index_select strided 7 : Elapsed 0.008 ms (0.811 ms / 100) 0.810 -> 0.812 ( +0.25%) [ +0.00% +0.00% +0.00% / +0.25% +0.86% +1.36%] index_select strided 8 : Elapsed 0.008 ms (0.810 ms / 100) 0.809 -> 0.810 ( +0.12%) [ +0.37% +0.12% +0.00% / +0.12% +1.48% +0.99%] index_select strided 16 : Elapsed 0.008 ms (0.812 ms / 100) 0.809 -> 0.811 ( +0.25%) [ +0.37% +0.25% +0.00% / +0.25% +1.36% +1.85%] index_select strided 64 : Elapsed 0.008 ms (0.812 ms / 100) 0.810 -> 0.811 ( +0.12%) [ +0.37% +0.00% +0.12% / +0.12% +1.60% +1.60%] index_select strided 100 : Elapsed 0.008 ms (0.813 ms / 100) 0.808 -> 0.814 ( +0.74%) [ +0.99% +0.62% +0.00% / +0.74% +2.48% +1.86%] index_select random : Elapsed 0.008 ms (0.816 ms / 100) 0.807 -> 0.814 ( +0.87%) [ +0.87% +1.36% +0.00% / +0.87% +2.48% +2.35%] index_select random_sorted : Elapsed 0.008 ms (0.814 ms / 100) B = [5, 1, 500] (stride (1, 5, 5)) A = [5, 1, 200] (stride (200, 1000, 1)) dim = 2 0.471 -> 0.465 ( -1.27%) [ +0.00% +0.85% +0.42% / +0.42% -1.27% +0.85%] index_add_ linear : Elapsed 0.005 ms (0.471 ms / 100) 0.466 -> 0.467 ( +0.21%) [ +0.86% +1.29% +0.00% / +2.36% +0.21% +3.65%] index_copy_ linear : Elapsed 0.005 ms (0.470 ms / 100) 0.472 -> 0.470 ( -0.42%) [ +5.51% +0.85% +0.00% / -0.42% +1.91% -0.21%] index_add_ reverse : Elapsed 0.005 ms (0.498 ms / 100) 0.479 -> 0.462 ( -3.55%) [ +0.00% +0.00% +9.60% / -1.04% -3.55% -0.84%] index_copy_ reverse : Elapsed 0.005 ms (0.479 ms / 100) 0.469 -> 0.466 ( -0.64%) [ +0.00% +2.77% +18.55% / +1.07% -0.64% +0.21%] index_add_ spread : Elapsed 0.005 ms (0.469 ms / 100) 0.470 -> 0.466 ( -0.85%) [ +0.00% +1.70% +0.43% / +1.70% -0.85% +0.21%] index_copy_ spread : Elapsed 0.005 ms (0.470 ms / 100) 0.477 -> 0.466 ( -2.31%) [ +6.08% +0.42% +0.00% / -1.47% -2.31% -1.89%] index_add_ strided 3 : Elapsed 0.005 ms (0.506 ms / 100) 0.469 -> 0.466 ( -0.64%) [ +0.21% +4.48% +0.00% / +2.35% -0.64% +1.28%] index_copy_ strided 3 : Elapsed 0.005 ms (0.470 ms / 100) 0.475 -> 0.467 ( -1.68%) [ +0.00% +2.74% +0.84% / -0.42% -1.68% -1.26%] index_add_ strided 7 : Elapsed 0.005 ms (0.475 ms / 100) 0.468 -> 0.464 ( -0.85%) [ +0.43% +2.35% +0.00% / +8.33% -0.85% +1.07%] index_copy_ strided 7 : Elapsed 0.005 ms (0.470 ms / 100) 0.470 -> 0.464 ( -1.28%) [ +0.43% +1.91% +0.00% / +2.77% -1.28% +0.00%] index_add_ strided 257 : Elapsed 0.005 ms (0.472 ms / 100) 0.468 -> 0.466 ( -0.43%) [ +0.00% +2.14% +4.91% / +1.07% -0.43% +0.43%] index_copy_ strided 257 : Elapsed 0.005 ms (0.468 ms / 100) 0.468 -> 0.468 ( +0.00%) [ +4.49% +1.50% +0.00% / +0.43% +0.00% +1.92%] index_add_ perm : Elapsed 0.005 ms (0.489 ms / 100) 0.474 -> 0.471 ( -0.63%) [ +0.21% +0.00% +0.21% / +0.42% +5.49% -0.63%] index_copy_ perm : Elapsed 0.005 ms (0.475 ms / 100) 0.476 -> 0.466 ( -2.10%) [ +1.68% +0.42% +0.00% / -1.26% -2.10% +5.04%] index_add_ perm_sorted : Elapsed 0.005 ms (0.484 ms / 100) 0.465 -> 0.463 ( -0.43%) [ +4.30% +1.51% +0.00% / +1.51% -0.43% +7.31%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.485 ms / 100) 0.515 -> 0.529 ( +2.72%) [ +4.85% +2.52% +0.00% / +4.66% +2.72% +4.47%] index_select const : Elapsed 0.005 ms (0.540 ms / 100) 0.519 -> 0.531 ( +2.31%) [ +3.66% +1.16% +0.00% / +7.90% +2.31% +4.24%] index_select wrap : Elapsed 0.005 ms (0.538 ms / 100) 0.528 -> 0.533 ( +0.95%) [ +7.58% +0.00% +0.38% / +8.90% +1.70% +0.95%] index_select linear : Elapsed 0.006 ms (0.568 ms / 100) 0.523 -> 0.528 ( +0.96%) [+11.66% +3.44% +0.00% / +2.29% +0.96% +2.10%] index_select reverse : Elapsed 0.006 ms (0.584 ms / 100) 0.514 -> 0.525 ( +2.14%) [ +5.45% +2.72% +0.00% / +4.67% +2.14% +4.67%] index_select skip64 : Elapsed 0.005 ms (0.542 ms / 100) 0.515 -> 0.525 ( +1.94%) [ +5.24% +3.30% +0.00% / +4.08% +1.94% +11.26%] index_select skip256 : Elapsed 0.005 ms (0.542 ms / 100) 0.526 -> 0.533 ( +1.33%) [ +2.66% +1.33% +0.00% / +1.90% +1.33% +8.94%] index_select spread : Elapsed 0.005 ms (0.540 ms / 100) 0.535 -> 0.526 ( -1.68%) [ +1.50% +0.00% +1.87% / +2.06% -1.68% -0.37%] index_select strided 3 : Elapsed 0.005 ms (0.543 ms / 100) 0.527 -> 0.527 ( +0.00%) [ +2.85% +0.00% +14.42% / +2.47% +0.00% +1.52%] index_select strided 5 : Elapsed 0.005 ms (0.542 ms / 100) 0.537 -> 0.530 ( -1.30%) [ +1.49% +0.00% +1.30% / +0.74% -1.30% -0.74%] index_select strided 7 : Elapsed 0.005 ms (0.545 ms / 100) 0.522 -> 0.527 ( +0.96%) [ +4.02% +1.15% +0.00% / +3.26% +0.96% +2.30%] index_select strided 8 : Elapsed 0.005 ms (0.543 ms / 100) 0.530 -> 0.523 ( -1.32%) [ +8.49% +1.70% +0.00% / +7.55% -1.32% +0.94%] index_select strided 16 : Elapsed 0.006 ms (0.575 ms / 100) 0.522 -> 0.524 ( +0.38%) [ +2.68% +1.15% +0.00% / +3.64% +0.38% +1.34%] index_select strided 64 : Elapsed 0.005 ms (0.536 ms / 100) 0.523 -> 0.527 ( +0.76%) [ +3.82% +1.91% +0.00% / +1.72% +0.76% +1.53%] index_select strided 100 : Elapsed 0.005 ms (0.543 ms / 100) 0.529 -> 0.535 ( +1.13%) [ +7.56% +3.02% +0.00% / +1.13% +4.91% +6.99%] index_select random : Elapsed 0.006 ms (0.569 ms / 100) 0.521 -> 0.537 ( +3.07%) [ +4.41% +3.65% +0.00% / +3.07% +4.80% +3.84%] index_select random_sorted : Elapsed 0.005 ms (0.544 ms / 100) out_shape = [500, 200, 1] in_shape = [5, 200, 1] idx_dim = 0 B = [500, 200, 1] (stride (200, 1, 1)) dim = 0 fill_cnt = 5 0.453 -> 0.456 ( +0.66%) [ +1.10% +0.44% +0.00% / +1.10% +0.88% +0.66%] index_fill_ const : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +1.32% +0.22% +0.00% / +3.08% +0.00% +0.00%] index_fill_ linear : Elapsed 0.005 ms (0.460 ms / 100) 0.455 -> 0.453 ( -0.44%) [ +0.44% +0.22% +0.00% / +8.35% -0.44% +0.00%] index_fill_ reverse : Elapsed 0.005 ms (0.457 ms / 100) 0.451 -> 0.457 ( +1.33%) [ +1.11% +0.44% +0.00% / +1.33% +1.55% +1.33%] index_fill_ skip64 : Elapsed 0.005 ms (0.456 ms / 100) 0.452 -> 0.457 ( +1.11%) [ +1.33% +0.22% +0.00% / +1.33% +1.11% +1.33%] index_fill_ skip256 : Elapsed 0.005 ms (0.458 ms / 100) 0.451 -> 0.457 ( +1.33%) [ +2.44% +0.22% +0.00% / +1.33% +1.55% +1.55%] index_fill_ spread : Elapsed 0.005 ms (0.462 ms / 100) 0.452 -> 0.457 ( +1.11%) [ +1.11% +0.00% +0.00% / +1.11% +1.33% +1.11%] index_fill_ strided 3 : Elapsed 0.005 ms (0.457 ms / 100) 0.453 -> 0.454 ( +0.22%) [ +1.10% +0.44% +0.00% / +1.10% +0.22% +0.44%] index_fill_ strided 5 : Elapsed 0.005 ms (0.458 ms / 100) 0.453 -> 0.454 ( +0.22%) [ +0.88% +0.00% +1.99% / +0.88% +0.44% +0.22%] index_fill_ strided 7 : Elapsed 0.005 ms (0.457 ms / 100) 0.453 -> 0.457 ( +0.88%) [ +1.10% +0.00% +12.36% / +1.10% +0.88% +0.88%] index_fill_ strided 8 : Elapsed 0.005 ms (0.458 ms / 100) 0.452 -> 0.454 ( +0.44%) [ +1.33% +0.00% +1.55% / +0.66% +1.11% +0.44%] index_fill_ strided 16 : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +0.88% +0.22% +0.00% / +0.88% +0.00% +0.22%] index_fill_ strided 64 : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +0.88% +0.66% +0.00% / +0.88% +0.00% +0.00%] index_fill_ strided 100 : Elapsed 0.005 ms (0.458 ms / 100) 0.457 -> 0.455 ( -0.44%) [ +0.22% +0.00% +0.00% / +0.22% -0.44% +2.19%] index_fill_ strided 255 : Elapsed 0.005 ms (0.458 ms / 100) 0.457 -> 0.455 ( -0.44%) [ +0.22% +0.22% +0.00% / +0.22% +7.00% -0.44%] index_fill_ strided 256 : Elapsed 0.005 ms (0.458 ms / 100) 0.452 -> 0.457 ( +1.11%) [ +1.33% +0.00% +0.22% / +1.11% +1.11% +3.98%] index_fill_ strided 257 : Elapsed 0.005 ms (0.458 ms / 100) 0.451 -> 0.457 ( +1.33%) [ +1.33% +0.44% +0.00% / +1.33% +1.33% +14.19%] index_fill_ random : Elapsed 0.005 ms (0.457 ms / 100) 0.453 -> 0.454 ( +0.22%) [ +2.43% +0.22% +0.00% / +0.88% +0.22% +0.44%] index_fill_ random_sorted : Elapsed 0.005 ms (0.464 ms / 100) 0.453 -> 0.455 ( +0.44%) [ +1.10% +3.75% +0.00% / +3.31% +0.44% +0.66%] index_fill_ perm : Elapsed 0.005 ms (0.458 ms / 100) 0.457 -> 0.453 ( -0.88%) [ +0.22% +0.00% +0.00% / +3.72% -0.66% -0.88%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.458 ms / 100) B = [500, 200, 1] (stride (1, 500, 100000)) A = [5, 200, 1] (stride (1, 5, 5)) dim = 0 0.571 -> 0.576 ( +0.88%) [ +1.40% +0.00% +0.00% / +1.23% +0.88% +1.05%] index_add_ linear : Elapsed 0.006 ms (0.579 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +0.85% +0.17% +0.00% / +1.02% +0.68% +0.51%] index_copy_ linear : Elapsed 0.006 ms (0.591 ms / 100) 0.573 -> 0.575 ( +0.35%) [ +0.70% +0.00% +0.00% / +0.70% +0.35% +0.35%] index_add_ reverse : Elapsed 0.006 ms (0.577 ms / 100) 0.587 -> 0.589 ( +0.34%) [ +0.85% +0.00% +0.00% / +1.36% +0.51% +0.34%] index_copy_ reverse : Elapsed 0.006 ms (0.592 ms / 100) 0.572 -> 0.575 ( +0.52%) [ +1.05% +0.00% +0.17% / +0.87% +0.87% +0.52%] index_add_ spread : Elapsed 0.006 ms (0.578 ms / 100) 0.587 -> 0.588 ( +0.17%) [ +1.02% +0.00% +0.00% / +0.85% +0.68% +0.17%] index_copy_ spread : Elapsed 0.006 ms (0.593 ms / 100) 0.572 -> 0.575 ( +0.52%) [ +2.10% +0.17% +0.00% / +2.45% +0.70% +0.52%] index_add_ strided 3 : Elapsed 0.006 ms (0.584 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +1.19% +0.00% +0.17% / +1.02% +0.68% +0.51%] index_copy_ strided 3 : Elapsed 0.006 ms (0.593 ms / 100) 0.572 -> 0.576 ( +0.70%) [ +2.97% +0.17% +0.00% / +1.05% +0.70% +0.70%] index_add_ strided 7 : Elapsed 0.006 ms (0.589 ms / 100) 0.586 -> 0.590 ( +0.68%) [ +1.02% +0.00% +0.17% / +1.37% +1.02% +0.68%] index_copy_ strided 7 : Elapsed 0.006 ms (0.592 ms / 100) 0.573 -> 0.576 ( +0.52%) [ +1.22% +0.17% +0.00% / +1.22% +5.58% +0.52%] index_add_ strided 257 : Elapsed 0.006 ms (0.580 ms / 100) 0.586 -> 0.590 ( +0.68%) [ +0.85% +0.00% +0.34% / +1.02% +1.02% +0.68%] index_copy_ strided 257 : Elapsed 0.006 ms (0.591 ms / 100) 0.573 -> 0.576 ( +0.52%) [ +1.92% +0.17% +0.00% / +1.75% +0.52% +0.52%] index_add_ perm : Elapsed 0.006 ms (0.584 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +0.85% +0.00% +0.00% / +0.51% +0.85% +1.02%] index_copy_ perm : Elapsed 0.006 ms (0.591 ms / 100) 0.571 -> 0.575 ( +0.70%) [ +1.23% +0.35% +0.00% / +2.10% +0.70% +0.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.578 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +1.02% +0.00% +0.00% / +1.19% +0.51% +0.51%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.592 ms / 100) good 5.171 -> 4.794 ( -7.29%) [ +0.21% +0.00% +0.33% / -7.21% -7.29% -7.17%] index_select const : Elapsed 0.052 ms (5.182 ms / 100) good 5.193 -> 4.801 ( -7.55%) [ +0.00% +0.02% +0.10% / -7.16% -7.09% -7.55%] index_select wrap : Elapsed 0.052 ms (5.193 ms / 100) good 5.215 -> 4.813 ( -7.71%) [ +0.00% +0.10% +0.08% / -7.65% -7.71% -7.65%] index_select linear : Elapsed 0.052 ms (5.215 ms / 100) good 5.176 -> 4.784 ( -7.57%) [ +0.00% +0.02% +0.06% / -7.48% -7.55% -7.57%] index_select reverse : Elapsed 0.052 ms (5.176 ms / 100) good 5.177 -> 4.786 ( -7.55%) [ +0.21% +0.10% +0.00% / -7.32% -7.55% -7.20%] index_select skip64 : Elapsed 0.052 ms (5.188 ms / 100) good 5.212 -> 4.804 ( -7.83%) [ +0.06% +0.10% +0.00% / -7.83% -7.77% -7.58%] index_select skip256 : Elapsed 0.052 ms (5.215 ms / 100) good 5.211 -> 4.812 ( -7.66%) [ +0.10% +0.15% +0.00% / -7.52% -7.48% -7.66%] index_select spread : Elapsed 0.052 ms (5.216 ms / 100) good 5.182 -> 4.793 ( -7.51%) [ +0.04% +0.06% +0.00% / -7.51% -7.51% -7.45%] index_select strided 3 : Elapsed 0.052 ms (5.184 ms / 100) good 5.177 -> 4.795 ( -7.38%) [ +0.17% +0.00% +0.02% / -7.28% -7.30% -7.38%] index_select random : Elapsed 0.052 ms (5.186 ms / 100) good 5.219 -> 4.819 ( -7.66%) [ +0.27% +0.02% +0.00% / -7.66% -7.61% -7.66%] index_select random_sorted : Elapsed 0.052 ms (5.233 ms / 100) B = [500, 200, 1] (stride (1, 500, 100000)) A = [5, 200, 1] (stride (1, 5, 1000)) dim = 0 0.572 -> 0.576 ( +0.70%) [ +1.40% +0.35% +0.00% / +1.05% +0.70% +0.70%] index_add_ linear : Elapsed 0.006 ms (0.580 ms / 100) 0.585 -> 0.589 ( +0.68%) [ +1.03% +0.17% +0.00% / +0.68% +0.68% +1.03%] index_copy_ linear : Elapsed 0.006 ms (0.591 ms / 100) 0.573 -> 0.577 ( +0.70%) [ +1.40% +0.17% +0.00% / +0.87% +0.70% +0.70%] index_add_ reverse : Elapsed 0.006 ms (0.581 ms / 100) 0.584 -> 0.589 ( +0.86%) [ +1.03% +0.00% +0.17% / +1.03% +1.37% +0.86%] index_copy_ reverse : Elapsed 0.006 ms (0.590 ms / 100) 0.573 -> 0.576 ( +0.52%) [ +1.22% +0.17% +0.00% / +1.05% +0.52% +0.52%] index_add_ spread : Elapsed 0.006 ms (0.580 ms / 100) 0.584 -> 0.589 ( +0.86%) [ +0.86% +0.17% +0.00% / +0.86% +1.03% +1.03%] index_copy_ spread : Elapsed 0.006 ms (0.589 ms / 100) 0.573 -> 0.576 ( +0.52%) [ +1.05% +1.57% +0.00% / +0.70% +4.01% +0.52%] index_add_ strided 3 : Elapsed 0.006 ms (0.579 ms / 100) 0.585 -> 0.590 ( +0.85%) [ +1.20% +0.17% +0.00% / +0.85% +0.85% +0.85%] index_copy_ strided 3 : Elapsed 0.006 ms (0.592 ms / 100) 0.572 -> 0.576 ( +0.70%) [ +0.87% +3.15% +0.00% / +0.87% +0.87% +0.70%] index_add_ strided 7 : Elapsed 0.006 ms (0.577 ms / 100) 0.585 -> 0.589 ( +0.68%) [ +1.03% +0.00% +0.17% / +1.03% +0.85% +0.68%] index_copy_ strided 7 : Elapsed 0.006 ms (0.591 ms / 100) 0.571 -> 0.575 ( +0.70%) [ +2.28% +0.35% +0.00% / +1.23% +0.88% +0.70%] index_add_ strided 257 : Elapsed 0.006 ms (0.584 ms / 100) 0.586 -> 0.589 ( +0.51%) [ +0.85% +0.00% +0.00% / +1.02% +0.51% +0.68%] index_copy_ strided 257 : Elapsed 0.006 ms (0.591 ms / 100) 0.571 -> 0.576 ( +0.88%) [ +1.23% +0.53% +0.00% / +1.05% +0.88% +0.88%] index_add_ perm : Elapsed 0.006 ms (0.578 ms / 100) 0.586 -> 0.588 ( +0.34%) [ +0.85% +0.00% +0.00% / +0.85% +0.34% +0.51%] index_copy_ perm : Elapsed 0.006 ms (0.591 ms / 100) 0.573 -> 0.575 ( +0.35%) [ +1.22% +0.17% +0.00% / +0.87% +0.35% +0.52%] index_add_ perm_sorted : Elapsed 0.006 ms (0.580 ms / 100) 0.587 -> 0.589 ( +0.34%) [ +1.53% +0.00% +0.17% / +0.85% +0.34% +0.68%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.596 ms / 100) good 5.184 -> 4.781 ( -7.77%) [ +0.14% +0.00% +0.10% / -7.70% -7.77% -7.62%] index_select const : Elapsed 0.052 ms (5.191 ms / 100) good 5.184 -> 4.817 ( -7.08%) [ +0.25% +0.00% +0.23% / -7.08% -7.08% -7.08%] index_select wrap : Elapsed 0.052 ms (5.197 ms / 100) good 5.228 -> 4.809 ( -8.01%) [ +0.00% +0.04% +0.00% / -7.98% -8.01% -7.79%] index_select linear : Elapsed 0.052 ms (5.228 ms / 100) good 5.208 -> 4.779 ( -8.24%) [ +0.10% +0.12% +0.00% / -8.12% -8.24% -8.20%] index_select reverse : Elapsed 0.052 ms (5.213 ms / 100) good 5.200 -> 4.788 ( -7.92%) [ +0.00% +0.23% +0.00% / -7.71% -7.85% -7.92%] index_select skip64 : Elapsed 0.052 ms (5.200 ms / 100) good 5.212 -> 4.793 ( -8.04%) [ +0.15% +0.13% +0.00% / -8.04% -7.73% -7.87%] index_select skip256 : Elapsed 0.052 ms (5.220 ms / 100) good 5.224 -> 4.801 ( -8.10%) [ +0.00% +0.08% +0.06% / -7.68% -8.10% -7.96%] index_select spread : Elapsed 0.052 ms (5.224 ms / 100) good 5.198 -> 4.778 ( -8.08%) [ +0.23% +0.00% +0.08% / -7.95% -7.81% -8.08%] index_select strided 3 : Elapsed 0.052 ms (5.210 ms / 100) good 5.190 -> 4.784 ( -7.82%) [ +0.19% +0.00% +0.33% / -7.40% -7.71% -7.82%] index_select random : Elapsed 0.052 ms (5.200 ms / 100) good 5.222 -> 4.806 ( -7.97%) [ +0.00% +0.06% +0.10% / -7.97% -7.85% -7.93%] index_select random_sorted : Elapsed 0.052 ms (5.222 ms / 100) out_shape = [5, 500, 1] in_shape = [5, 200, 1] idx_dim = 1 B = [5, 500, 1] (stride (500, 1, 1)) dim = 1 fill_cnt = 200 0.436 -> 0.441 ( +1.15%) [ +0.92% +2.52% +0.00% / +1.38% +1.15% +2.29%] index_fill_ const : Elapsed 0.004 ms (0.440 ms / 100) 0.436 -> 0.435 ( -0.23%) [ +0.92% +1.83% +0.00% / +2.52% -0.23% +0.92%] index_fill_ linear : Elapsed 0.004 ms (0.440 ms / 100) 0.436 -> 0.439 ( +0.69%) [ +7.80% +4.13% +0.00% / +1.61% +1.61% +0.69%] index_fill_ reverse : Elapsed 0.005 ms (0.470 ms / 100) 0.436 -> 0.436 ( +0.00%) [ +3.90% +1.38% +0.00% / +0.69% +0.00% +3.90%] index_fill_ skip64 : Elapsed 0.005 ms (0.453 ms / 100) 0.433 -> 0.439 ( +1.39%) [ +1.85% +2.54% +0.00% / +1.39% +3.93% +6.93%] index_fill_ skip256 : Elapsed 0.004 ms (0.441 ms / 100) 0.445 -> 0.439 ( -1.35%) [ +0.90% +0.00% +1.57% / -0.90% -1.35% +16.40%] index_fill_ spread : Elapsed 0.004 ms (0.449 ms / 100) 0.438 -> 0.434 ( -0.91%) [ +0.68% +5.71% +0.00% / +1.14% -0.91% +7.99%] index_fill_ strided 3 : Elapsed 0.004 ms (0.441 ms / 100) 0.436 -> 0.437 ( +0.23%) [ +0.00% +6.65% +5.96% / +1.83% +0.23% +0.46%] index_fill_ strided 5 : Elapsed 0.004 ms (0.436 ms / 100) 0.441 -> 0.439 ( -0.45%) [ +0.00% +0.45% +13.15% / +6.35% -0.45% +0.45%] index_fill_ strided 7 : Elapsed 0.004 ms (0.441 ms / 100) 0.434 -> 0.439 ( +1.15%) [ +1.38% +2.07% +0.00% / +8.53% +1.15% +7.60%] index_fill_ strided 8 : Elapsed 0.004 ms (0.440 ms / 100) 0.436 -> 0.437 ( +0.23%) [ +0.23% +5.73% +0.00% / +7.34% +0.23% +7.80%] index_fill_ strided 16 : Elapsed 0.004 ms (0.437 ms / 100) 0.437 -> 0.440 ( +0.69%) [ +0.23% +1.83% +0.00% / +5.72% +0.69% +4.81%] index_fill_ strided 64 : Elapsed 0.004 ms (0.438 ms / 100) 0.434 -> 0.436 ( +0.46%) [ +1.15% +1.38% +0.00% / +3.92% +0.46% +2.07%] index_fill_ strided 100 : Elapsed 0.004 ms (0.439 ms / 100) 0.434 -> 0.435 ( +0.23%) [ +0.92% +2.76% +0.00% / +1.84% +0.23% +2.07%] index_fill_ strided 255 : Elapsed 0.004 ms (0.438 ms / 100) 0.436 -> 0.435 ( -0.23%) [ +0.69% +0.69% +0.00% / +7.34% -0.23% +2.52%] index_fill_ strided 256 : Elapsed 0.004 ms (0.439 ms / 100) 0.435 -> 0.436 ( +0.23%) [ +0.23% +3.68% +0.00% / +0.23% +2.99% +2.07%] index_fill_ strided 257 : Elapsed 0.004 ms (0.436 ms / 100) 0.437 -> 0.436 ( -0.23%) [ +0.00% +0.46% +3.43% / -0.23% +0.46% +1.14%] index_fill_ random : Elapsed 0.004 ms (0.437 ms / 100) 0.438 -> 0.437 ( -0.23%) [ +0.00% +1.14% +0.46% / +0.00% -0.23% +1.37%] index_fill_ random_sorted : Elapsed 0.004 ms (0.438 ms / 100) 0.436 -> 0.437 ( +0.23%) [ +0.00% +4.82% +0.69% / +0.23% +0.69% +1.38%] index_fill_ perm : Elapsed 0.004 ms (0.436 ms / 100) 0.437 -> 0.430 ( -1.60%) [ +0.00% +1.83% +6.18% / -1.60% -0.92% +0.00%] index_fill_ perm_sorted : Elapsed 0.004 ms (0.437 ms / 100) B = [5, 500, 1] (stride (1, 5, 2500)) A = [5, 200, 1] (stride (200, 1, 1)) dim = 1 0.467 -> 0.462 ( -1.07%) [ +0.21% +1.93% +0.00% / +1.50% -1.07% +6.21%] index_add_ linear : Elapsed 0.005 ms (0.468 ms / 100) 0.464 -> 0.463 ( -0.22%) [ +0.86% +1.94% +0.00% / +2.16% -0.22% +15.30%] index_copy_ linear : Elapsed 0.005 ms (0.468 ms / 100) 0.465 -> 0.464 ( -0.22%) [ +3.44% +3.23% +0.00% / +2.15% -0.22% +0.43%] index_add_ reverse : Elapsed 0.005 ms (0.481 ms / 100) 0.461 -> 0.469 ( +1.74%) [ +4.56% +6.51% +0.00% / +2.39% +1.95% +1.74%] index_copy_ reverse : Elapsed 0.005 ms (0.482 ms / 100) 0.463 -> 0.468 ( +1.08%) [ +0.22% +2.81% +0.00% / +1.51% +1.08% +2.38%] index_add_ spread : Elapsed 0.005 ms (0.464 ms / 100) 0.466 -> 0.470 ( +0.86%) [ +1.29% +1.07% +0.00% / +0.86% +8.58% +0.86%] index_copy_ spread : Elapsed 0.005 ms (0.472 ms / 100) 0.461 -> 0.473 ( +2.60%) [ +4.34% +2.82% +0.00% / +2.60% +7.16% +8.89%] index_add_ strided 3 : Elapsed 0.005 ms (0.481 ms / 100) 0.461 -> 0.470 ( +1.95%) [ +4.56% +4.77% +0.00% / +1.95% +13.23% +8.24%] index_copy_ strided 3 : Elapsed 0.005 ms (0.482 ms / 100) 0.464 -> 0.470 ( +1.29%) [ +4.09% +4.31% +0.00% / +2.59% +1.29% +1.94%] index_add_ strided 7 : Elapsed 0.005 ms (0.483 ms / 100) 0.471 -> 0.468 ( -0.64%) [ +2.34% +1.49% +0.00% / -0.64% +1.70% +0.42%] index_copy_ strided 7 : Elapsed 0.005 ms (0.482 ms / 100) 0.470 -> 0.484 ( +2.98%) [ +0.00% +1.49% +5.74% / +2.98% +5.74% +7.02%] index_add_ strided 257 : Elapsed 0.005 ms (0.470 ms / 100) bad 0.471 -> 0.502 ( +6.58%) [ +0.00% +3.18% +11.68% / +6.79% +7.86% +6.58%] index_copy_ strided 257 : Elapsed 0.005 ms (0.471 ms / 100) bad 0.463 -> 0.494 ( +6.70%) [ +0.86% +2.16% +0.00% / +9.94% +7.56% +6.70%] index_add_ perm : Elapsed 0.005 ms (0.467 ms / 100) bad 0.462 -> 0.486 ( +5.19%) [ +1.73% +3.68% +0.00% / +19.48% +12.55% +5.19%] index_copy_ perm : Elapsed 0.005 ms (0.470 ms / 100) 0.469 -> 0.469 ( +0.00%) [ +2.35% +2.56% +0.00% / +5.33% +0.00% +1.49%] index_add_ perm_sorted : Elapsed 0.005 ms (0.480 ms / 100) 0.463 -> 0.475 ( +2.59%) [ +1.30% +1.51% +0.00% / +2.59% +4.32% +4.75%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.469 ms / 100) 0.525 -> 0.543 ( +3.43%) [ +3.62% +4.00% +0.00% / +3.43% +6.29% +3.62%] index_select const : Elapsed 0.005 ms (0.544 ms / 100) 0.522 -> 0.535 ( +2.49%) [ +4.21% +0.00% +0.38% / +3.26% +15.33% +2.49%] index_select wrap : Elapsed 0.005 ms (0.544 ms / 100) 0.532 -> 0.529 ( -0.56%) [ +2.07% +0.00% +4.51% / +1.32% -0.56% +1.13%] index_select linear : Elapsed 0.005 ms (0.543 ms / 100) 0.527 -> 0.531 ( +0.76%) [ +7.40% +0.00% +13.28% / +2.47% +0.76% +2.28%] index_select reverse : Elapsed 0.006 ms (0.566 ms / 100) 0.526 -> 0.548 ( +4.18%) [ +4.37% +0.00% +0.38% / +4.18% +7.79% +5.89%] index_select skip64 : Elapsed 0.005 ms (0.549 ms / 100) 0.524 -> 0.540 ( +3.05%) [ +4.20% +0.76% +0.00% / +3.63% +5.53% +3.05%] index_select skip256 : Elapsed 0.005 ms (0.546 ms / 100) 0.524 -> 0.531 ( +1.34%) [ +4.20% +4.58% +0.00% / +4.20% +1.34% +4.39%] index_select spread : Elapsed 0.005 ms (0.546 ms / 100) 0.516 -> 0.541 ( +4.84%) [ +6.59% +0.00% +1.16% / +5.62% +5.43% +4.84%] index_select strided 3 : Elapsed 0.006 ms (0.550 ms / 100) 0.520 -> 0.540 ( +3.85%) [ +4.62% +0.19% +0.00% / +3.85% +9.81% +8.85%] index_select strided 5 : Elapsed 0.005 ms (0.544 ms / 100) 0.525 -> 0.531 ( +1.14%) [ +4.38% +0.00% +0.38% / +3.43% +1.14% +14.67%] index_select strided 7 : Elapsed 0.005 ms (0.548 ms / 100) 0.523 -> 0.536 ( +2.49%) [ +4.40% +0.00% +0.76% / +9.94% +2.49% +4.21%] index_select strided 8 : Elapsed 0.005 ms (0.546 ms / 100) 0.524 -> 0.536 ( +2.29%) [ +4.96% +0.00% +5.15% / +2.86% +2.86% +2.29%] index_select strided 16 : Elapsed 0.005 ms (0.550 ms / 100) 0.527 -> 0.533 ( +1.14%) [ +3.04% +0.00% +5.50% / +10.06% +1.14% +1.90%] index_select strided 64 : Elapsed 0.005 ms (0.543 ms / 100) 0.527 -> 0.539 ( +2.28%) [+10.44% +0.00% +1.14% / +19.54% +2.28% +2.28%] index_select strided 100 : Elapsed 0.006 ms (0.582 ms / 100) 0.526 -> 0.531 ( +0.95%) [ +4.56% +0.19% +0.00% / +3.99% +0.95% +1.90%] index_select random : Elapsed 0.005 ms (0.550 ms / 100) 0.520 -> 0.533 ( +2.50%) [ +4.81% +0.00% +0.58% / +4.62% +5.96% +2.50%] index_select random_sorted : Elapsed 0.005 ms (0.545 ms / 100) B = [5, 500, 1] (stride (1, 5, 2500)) A = [5, 200, 1] (stride (200, 1, 200)) dim = 1 0.465 -> 0.473 ( +1.72%) [ +0.86% +3.01% +0.00% / +1.72% +12.69% +2.15%] index_add_ linear : Elapsed 0.005 ms (0.469 ms / 100) 0.469 -> 0.468 ( -0.21%) [ +0.00% +2.35% +0.85% / +0.21% -0.21% +1.07%] index_copy_ linear : Elapsed 0.005 ms (0.469 ms / 100) 0.465 -> 0.465 ( +0.00%) [ +1.08% +7.10% +0.00% / +1.94% +0.00% +4.09%] index_add_ reverse : Elapsed 0.005 ms (0.470 ms / 100) 0.467 -> 0.468 ( +0.21%) [ +7.07% +0.43% +0.00% / +1.28% +0.21% +7.49%] index_copy_ reverse : Elapsed 0.005 ms (0.500 ms / 100) 0.479 -> 0.467 ( -2.51%) [+15.87% +0.00% +2.92% / -0.84% -2.51% +5.22%] index_add_ spread : Elapsed 0.006 ms (0.555 ms / 100) 0.464 -> 0.465 ( +0.22%) [ +1.29% +1.72% +0.00% / +7.54% +0.22% +10.99%] index_copy_ spread : Elapsed 0.005 ms (0.470 ms / 100) 0.463 -> 0.467 ( +0.86%) [ +1.08% +5.62% +0.00% / +1.30% +0.86% +1.30%] index_add_ strided 3 : Elapsed 0.005 ms (0.468 ms / 100) 0.465 -> 0.462 ( -0.65%) [ +1.29% +1.94% +0.00% / +1.51% -0.65% +0.86%] index_copy_ strided 3 : Elapsed 0.005 ms (0.471 ms / 100) 0.458 -> 0.464 ( +1.31%) [ +3.93% +4.59% +0.00% / +3.28% +1.31% +3.93%] index_add_ strided 7 : Elapsed 0.005 ms (0.476 ms / 100) 0.462 -> 0.464 ( +0.43%) [ +1.52% +2.60% +0.00% / +2.81% +0.43% +1.08%] index_copy_ strided 7 : Elapsed 0.005 ms (0.469 ms / 100) 0.460 -> 0.468 ( +1.74%) [ +1.74% +4.57% +0.00% / +1.74% +4.35% +2.83%] index_add_ strided 257 : Elapsed 0.005 ms (0.468 ms / 100) 0.460 -> 0.469 ( +1.96%) [ +2.17% +3.48% +0.00% / +1.96% +8.26% +2.61%] index_copy_ strided 257 : Elapsed 0.005 ms (0.470 ms / 100) 0.466 -> 0.469 ( +0.64%) [ +1.29% +2.79% +0.00% / +0.64% +6.01% +7.94%] index_add_ perm : Elapsed 0.005 ms (0.472 ms / 100) 0.464 -> 0.475 ( +2.37%) [ +1.51% +2.37% +0.00% / +2.37% +14.44% +19.18%] index_copy_ perm : Elapsed 0.005 ms (0.471 ms / 100) 0.462 -> 0.469 ( +1.52%) [ +1.52% +4.11% +0.00% / +2.38% +1.52% +4.33%] index_add_ perm_sorted : Elapsed 0.005 ms (0.469 ms / 100) 0.460 -> 0.472 ( +2.61%) [ +2.61% +3.91% +0.00% / +3.26% +2.61% +3.91%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.472 ms / 100) 0.523 -> 0.530 ( +1.34%) [+11.28% +0.00% +8.60% / +4.21% +1.34% +2.87%] index_select const : Elapsed 0.006 ms (0.582 ms / 100) 0.521 -> 0.533 ( +2.30%) [ +4.03% +0.00% +0.77% / +12.86% +2.30% +2.88%] index_select wrap : Elapsed 0.005 ms (0.542 ms / 100) bad 0.514 -> 0.540 ( +5.06%) [ +5.84% +1.36% +0.00% / +13.62% +5.64% +5.06%] index_select linear : Elapsed 0.005 ms (0.544 ms / 100) 0.520 -> 0.528 ( +1.54%) [ +4.04% +0.96% +0.00% / +2.69% +1.54% +4.42%] index_select reverse : Elapsed 0.005 ms (0.541 ms / 100) 0.520 -> 0.529 ( +1.73%) [ +4.42% +0.58% +0.00% / +2.69% +1.73% +6.92%] index_select skip64 : Elapsed 0.005 ms (0.543 ms / 100) 0.520 -> 0.530 ( +1.92%) [ +5.77% +0.77% +0.00% / +8.08% +1.92% +4.23%] index_select skip256 : Elapsed 0.005 ms (0.550 ms / 100) 0.521 -> 0.528 ( +1.34%) [ +3.45% +0.00% +0.38% / +3.07% +1.34% +2.30%] index_select spread : Elapsed 0.005 ms (0.539 ms / 100) 0.518 -> 0.536 ( +3.47%) [ +4.25% +0.19% +0.00% / +4.63% +3.47% +3.86%] index_select strided 3 : Elapsed 0.005 ms (0.540 ms / 100) 0.520 -> 0.525 ( +0.96%) [ +4.42% +0.96% +0.00% / +3.65% +0.96% +3.85%] index_select strided 5 : Elapsed 0.005 ms (0.543 ms / 100) 0.513 -> 0.526 ( +2.53%) [ +7.41% +3.31% +0.00% / +6.24% +2.53% +5.07%] index_select strided 7 : Elapsed 0.006 ms (0.551 ms / 100) 0.521 -> 0.525 ( +0.77%) [ +4.03% +0.96% +0.00% / +4.22% +0.77% +2.30%] index_select strided 8 : Elapsed 0.005 ms (0.542 ms / 100) 0.517 -> 0.521 ( +0.77%) [ +5.03% +0.97% +0.00% / +5.80% +0.77% +4.45%] index_select strided 16 : Elapsed 0.005 ms (0.543 ms / 100) 0.524 -> 0.538 ( +2.67%) [ +8.21% +0.19% +0.00% / +3.44% +3.44% +2.67%] index_select strided 64 : Elapsed 0.006 ms (0.567 ms / 100) Bad 0.517 -> 0.571 (+10.44%) [ +5.42% +3.09% +0.00% / +10.44% +15.67% +11.03%] index_select strided 100 : Elapsed 0.005 ms (0.545 ms / 100) 0.518 -> 0.523 ( +0.97%) [ +5.21% +1.93% +0.00% / +3.09% +0.97% +16.60%] index_select random : Elapsed 0.005 ms (0.545 ms / 100) 0.517 -> 0.531 ( +2.71%) [ +5.61% +1.74% +0.00% / +3.68% +2.71% +4.06%] index_select random_sorted : Elapsed 0.005 ms (0.546 ms / 100) out_shape = [5, 200, 500] in_shape = [5, 200, 1] idx_dim = 2 B = [5, 200, 500] (stride (100000, 500, 1)) A = [5, 200, 1] (stride (200, 1, 1)) dim = 2 bad 0.500 -> 0.535 ( +7.00%) [ +8.20% +6.40% +0.00% / +7.00% +94.60% +96.00%] index_add_ linear : Elapsed 0.005 ms (0.541 ms / 100) 0.499 -> 0.521 ( +4.41%) [ +0.00% +2.40% +0.00% / +4.41% +62.73% +66.53%] index_copy_ linear : Elapsed 0.005 ms (0.499 ms / 100) Bad 0.496 -> 0.546 (+10.08%) [ +0.00% +8.67% +1.21% / +10.08% +99.19% +94.76%] index_add_ reverse : Elapsed 0.005 ms (0.496 ms / 100) 0.495 -> 0.504 ( +1.82%) [ +0.20% +3.03% +0.00% / +1.82% +63.84% +70.10%] index_copy_ reverse : Elapsed 0.005 ms (0.496 ms / 100) 0.495 -> 0.510 ( +3.03%) [ +0.00% +1.62% +1.41% / +3.03% +79.39% +19.60%] index_add_ spread : Elapsed 0.005 ms (0.495 ms / 100) 0.499 -> 0.504 ( +1.00%) [ +0.00% +2.20% +5.01% / +1.00% +13.23% +3.61%] index_copy_ spread : Elapsed 0.005 ms (0.499 ms / 100) 0.503 -> 0.505 ( +0.40%) [ +7.16% +0.00% +6.16% / +0.40% +17.50% +17.30%] index_add_ strided 3 : Elapsed 0.005 ms (0.539 ms / 100) 0.498 -> 0.493 ( -1.00%) [ +0.20% +0.00% +0.00% / +9.24% -1.00% +6.63%] index_copy_ strided 3 : Elapsed 0.005 ms (0.499 ms / 100) 0.499 -> 0.521 ( +4.41%) [ +0.20% +7.01% +0.00% / +4.41% +19.24% +19.04%] index_add_ strided 7 : Elapsed 0.005 ms (0.500 ms / 100) 0.493 -> 0.503 ( +2.03%) [ +0.81% +4.26% +0.00% / +5.07% +2.03% +9.13%] index_copy_ strided 7 : Elapsed 0.005 ms (0.497 ms / 100) bad 0.500 -> 0.547 ( +9.40%) [ +0.00% +2.40% +0.80% / +9.40% +17.60% +18.20%] index_add_ strided 257 : Elapsed 0.005 ms (0.500 ms / 100) 0.497 -> 0.499 ( +0.40%) [ +9.66% +1.41% +0.00% / +0.60% +0.40% +5.23%] index_copy_ strided 257 : Elapsed 0.005 ms (0.545 ms / 100) bad 0.495 -> 0.520 ( +5.05%) [ +0.00% +3.23% +1.62% / +5.05% +18.79% +20.20%] index_add_ perm : Elapsed 0.005 ms (0.495 ms / 100) 0.498 -> 0.493 ( -1.00%) [ +0.00% +1.20% +0.40% / -0.40% -1.00% +2.21%] index_copy_ perm : Elapsed 0.005 ms (0.498 ms / 100) bad 0.501 -> 0.534 ( +6.59%) [ +0.00% +0.80% +6.79% / +6.59% +18.56% +17.17%] index_add_ perm_sorted : Elapsed 0.005 ms (0.501 ms / 100) 0.497 -> 0.500 ( +0.60%) [ +0.60% +3.42% +0.00% / +4.23% +0.60% +1.01%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.500 ms / 100) GOOD 8.804 -> 6.624 (-24.76%) [ +0.14% +0.08% +0.00% / -24.64% -24.72% -24.76%] index_select const : Elapsed 0.088 ms (8.816 ms / 100) GOOD 8.831 -> 6.596 (-25.31%) [ +0.14% +0.00% +0.06% / -25.31% -25.13% -25.17%] index_select wrap : Elapsed 0.088 ms (8.843 ms / 100) GOOD 8.880 -> 6.607 (-25.60%) [ +0.12% +0.00% +0.14% / -25.60% -25.45% -25.45%] index_select linear : Elapsed 0.089 ms (8.891 ms / 100) GOOD 8.816 -> 6.701 (-23.99%) [ +0.08% +0.05% +0.00% / -23.99% -23.39% -23.43%] index_select reverse : Elapsed 0.088 ms (8.823 ms / 100) GOOD 8.881 -> 6.626 (-25.39%) [ +0.00% +0.08% +0.02% / -25.39% -25.30% -25.26%] index_select skip64 : Elapsed 0.089 ms (8.881 ms / 100) GOOD 8.843 -> 6.625 (-25.08%) [ +0.00% +0.05% +0.05% / -24.99% -25.08% -24.91%] index_select skip256 : Elapsed 0.088 ms (8.843 ms / 100) GOOD 8.793 -> 6.614 (-24.78%) [ +0.14% +0.00% +0.09% / -24.78% -24.52% -24.53%] index_select spread : Elapsed 0.088 ms (8.805 ms / 100) GOOD 8.839 -> 6.608 (-25.24%) [ +0.09% +0.11% +0.00% / -24.90% -25.24% -25.05%] index_select random : Elapsed 0.088 ms (8.847 ms / 100) GOOD 8.875 -> 6.618 (-25.43%) [ +0.09% +0.08% +0.00% / -25.43% -25.30% -25.32%] index_select random_sorted : Elapsed 0.089 ms (8.883 ms / 100) B = [5, 200, 500] (stride (100000, 1, 200)) A = [5, 200, 1] (stride (1, 5, 1000)) dim = 2 0.500 -> 0.492 ( -1.60%) [ +0.20% +2.60% +0.00% / +7.00% -1.60% +0.40%] index_add_ linear : Elapsed 0.005 ms (0.501 ms / 100) 0.494 -> 0.489 ( -1.01%) [ +2.83% +6.07% +0.00% / +1.21% -1.01% +0.40%] index_copy_ linear : Elapsed 0.005 ms (0.508 ms / 100) 0.500 -> 0.490 ( -2.00%) [ +1.40% +1.80% +0.00% / +0.40% -2.00% +0.00%] index_add_ reverse : Elapsed 0.005 ms (0.507 ms / 100) 0.497 -> 0.500 ( +0.60%) [+10.06% +6.24% +0.00% / +7.04% +0.60% +0.60%] index_copy_ reverse : Elapsed 0.005 ms (0.547 ms / 100) 0.498 -> 0.486 ( -2.41%) [ +0.00% +6.63% +0.20% / +0.60% -2.41% +0.40%] index_add_ spread : Elapsed 0.005 ms (0.498 ms / 100) 0.490 -> 0.486 ( -0.82%) [ +2.04% +7.14% +0.00% / +3.27% -0.82% +12.45%] index_copy_ spread : Elapsed 0.005 ms (0.500 ms / 100) 0.500 -> 0.490 ( -2.00%) [ +8.40% +4.80% +0.00% / +8.80% -2.00% +1.60%] index_add_ strided 3 : Elapsed 0.005 ms (0.542 ms / 100) 0.495 -> 0.484 ( -2.22%) [ +2.02% +2.02% +0.00% / +0.40% -2.22% +1.01%] index_copy_ strided 3 : Elapsed 0.005 ms (0.505 ms / 100) 0.495 -> 0.488 ( -1.41%) [ +1.01% +3.23% +0.00% / +2.02% -1.41% +7.68%] index_add_ strided 7 : Elapsed 0.005 ms (0.500 ms / 100) 0.492 -> 0.485 ( -1.42%) [ +1.63% +2.03% +0.00% / +0.00% -1.42% +1.22%] index_copy_ strided 7 : Elapsed 0.005 ms (0.500 ms / 100) 0.496 -> 0.495 ( -0.20%) [ +0.00% +2.42% +0.60% / +0.40% +2.22% -0.20%] index_add_ strided 257 : Elapsed 0.005 ms (0.496 ms / 100) 0.494 -> 0.491 ( -0.61%) [ +1.01% +6.88% +0.00% / +0.61% -0.61% +10.73%] index_copy_ strided 257 : Elapsed 0.005 ms (0.499 ms / 100) 0.510 -> 0.491 ( -3.73%) [ +4.71% +0.00% +0.98% / +4.12% -3.73% -3.14%] index_add_ perm : Elapsed 0.005 ms (0.534 ms / 100) 0.502 -> 0.489 ( -2.59%) [ +1.39% +0.40% +0.00% / +2.19% -2.59% -1.59%] index_copy_ perm : Elapsed 0.005 ms (0.509 ms / 100) 0.498 -> 0.489 ( -1.81%) [ +2.81% +2.21% +0.00% / +1.41% -1.81% +0.20%] index_add_ perm_sorted : Elapsed 0.005 ms (0.512 ms / 100) 0.498 -> 0.492 ( -1.20%) [ +3.82% +0.40% +0.00% / +0.40% -1.20% +7.43%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.517 ms / 100) 8.283 -> 8.264 ( -0.23%) [ +0.06% +0.00% +0.04% / +0.05% -0.23% -0.18%] index_select const : Elapsed 0.083 ms (8.288 ms / 100) 8.251 -> 8.241 ( -0.12%) [ +0.19% +0.13% +0.00% / +0.29% -0.12% +0.12%] index_select wrap : Elapsed 0.083 ms (8.267 ms / 100) 8.282 -> 8.236 ( -0.56%) [ +0.10% +0.12% +0.00% / +0.07% -0.31% -0.56%] index_select linear : Elapsed 0.083 ms (8.290 ms / 100) 8.255 -> 8.273 ( +0.22%) [ +0.13% +0.00% +0.06% / +0.22% +0.42% +0.38%] index_select reverse : Elapsed 0.083 ms (8.266 ms / 100) 8.358 -> 8.276 ( -0.98%) [ +0.16% +0.00% +0.22% / +0.11% -0.96% -0.98%] index_select skip64 : Elapsed 0.084 ms (8.371 ms / 100) 8.263 -> 8.228 ( -0.42%) [ +0.08% +0.00% +0.16% / -0.06% -0.17% -0.42%] index_select skip256 : Elapsed 0.083 ms (8.270 ms / 100) 8.250 -> 8.238 ( -0.15%) [ +0.40% +0.00% +0.21% / +0.25% -0.15% -0.02%] index_select spread : Elapsed 0.083 ms (8.283 ms / 100) 8.309 -> 8.302 ( -0.08%) [ +0.35% +0.00% +0.02% / -0.08% -0.02% -0.07%] index_select random : Elapsed 0.083 ms (8.338 ms / 100) 8.371 -> 8.299 ( -0.86%) [ +0.17% +0.12% +0.00% / +0.06% -0.82% -0.86%] index_select random_sorted : Elapsed 0.084 ms (8.385 ms / 100) B = [5, 200, 500] (stride (500, 2500, 1)) A = [5, 200, 1] (stride (200, 1, 1000)) dim = 2 0.574 -> 0.580 ( +1.05%) [ +1.22% +0.87% +0.00% / +1.05% +1.22% +5.92%] index_add_ linear : Elapsed 0.006 ms (0.581 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +0.90% +0.18% +0.00% / +0.90% +1.08% +0.90%] index_copy_ linear : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +1.92% +0.00% +0.00% / +3.66% +1.22% +1.05%] index_add_ reverse : Elapsed 0.006 ms (0.585 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +0.54% +0.18% +0.00% / +0.90% +1.08% +0.90%] index_copy_ reverse : Elapsed 0.006 ms (0.561 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +0.87% +0.00% +0.35% / +0.70% +0.70% +1.74%] index_add_ spread : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.90% +0.00% +0.00% / +0.72% +0.90% +0.90%] index_copy_ spread : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +0.87% +1.39% +0.00% / +1.05% +1.74% +1.05%] index_add_ strided 3 : Elapsed 0.006 ms (0.579 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.00% +0.18% / +0.72% +0.72% +0.72%] index_copy_ strided 3 : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.579 ( +1.05%) [ +2.62% +0.52% +0.00% / +1.05% +1.22% +1.40%] index_add_ strided 7 : Elapsed 0.006 ms (0.588 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +1.08% +0.90%] index_copy_ strided 7 : Elapsed 0.006 ms (0.562 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +1.05% +0.17% +0.00% / +1.39% +1.05% +1.05%] index_add_ strided 257 : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.18% +0.00% / +1.25% +0.72% +1.08%] index_copy_ strided 257 : Elapsed 0.006 ms (0.562 ms / 100) 0.574 -> 0.580 ( +1.05%) [ +1.05% +0.35% +0.00% / +1.22% +1.05% +1.05%] index_add_ perm : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.90% +1.25% +0.72%] index_copy_ perm : Elapsed 0.006 ms (0.562 ms / 100) 0.575 -> 0.581 ( +1.04%) [ +0.87% +0.00% +0.70% / +1.39% +1.04% +1.22%] index_add_ perm_sorted : Elapsed 0.006 ms (0.580 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.90% +0.36% +0.00% / +1.08% +1.25% +0.72%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.563 ms / 100) 18.447 -> 18.379 ( -0.37%) [ +0.08% +0.00% +0.19% / +0.14% -0.33% -0.37%] index_select const : Elapsed 0.185 ms (18.461 ms / 100) 18.428 -> 18.396 ( -0.17%) [ +0.00% +0.23% +0.38% / +0.29% -0.17% -0.16%] index_select wrap : Elapsed 0.184 ms (18.428 ms / 100) 18.473 -> 18.389 ( -0.45%) [ +0.00% +0.05% +0.13% / +0.08% -0.45% -0.27%] index_select linear : Elapsed 0.185 ms (18.473 ms / 100) 18.476 -> 18.375 ( -0.55%) [ +0.06% +0.27% +0.00% / -0.02% -0.30% -0.55%] index_select reverse : Elapsed 0.185 ms (18.487 ms / 100) 18.467 -> 18.394 ( -0.40%) [ +0.00% +0.08% +0.07% / -0.14% -0.40% -0.30%] index_select skip64 : Elapsed 0.185 ms (18.467 ms / 100) 18.452 -> 18.414 ( -0.21%) [ +0.34% +0.15% +0.00% / -0.04% -0.16% -0.21%] index_select skip256 : Elapsed 0.185 ms (18.514 ms / 100) 18.469 -> 18.385 ( -0.45%) [ +0.31% +0.00% +0.09% / +0.10% -0.45% -0.14%] index_select spread : Elapsed 0.185 ms (18.527 ms / 100) 18.416 -> 18.406 ( -0.05%) [ +0.12% +0.35% +0.00% / +0.24% +0.01% -0.05%] index_select random : Elapsed 0.184 ms (18.438 ms / 100) 18.476 -> 18.403 ( -0.40%) [ +0.02% +0.17% +0.00% / +0.02% -0.40% -0.24%] index_select random_sorted : Elapsed 0.185 ms (18.479 ms / 100) B = [5, 200, 500] (stride (1, 5, 1000)) dim = 2 fill_cnt = 1 0.469 -> 0.469 ( +0.00%) [ +0.00% +25.37% +3.62% / +3.84% +3.41% +0.00%] index_fill_ const : Elapsed 0.005 ms (0.469 ms / 100) good 0.490 -> 0.462 ( -5.71%) [ +2.24% +13.88% +0.00% / +2.24% -5.71% -5.71%] index_fill_ linear : Elapsed 0.005 ms (0.501 ms / 100) 0.464 -> 0.455 ( -1.94%) [ +8.84% +22.20% +0.00% / +8.84% -1.94% +3.23%] index_fill_ reverse : Elapsed 0.005 ms (0.505 ms / 100) 0.462 -> 0.466 ( +0.87%) [ +0.00% +33.12% +0.22% / +1.08% +2.38% +0.87%] index_fill_ skip64 : Elapsed 0.005 ms (0.462 ms / 100) 0.463 -> 0.459 ( -0.86%) [ +0.00% +21.60% +0.86% / +1.30% -0.86% +0.86%] index_fill_ skip256 : Elapsed 0.005 ms (0.463 ms / 100) 0.460 -> 0.458 ( -0.43%) [ +1.09% +22.39% +0.00% / +1.74% -0.43% +0.87%] index_fill_ spread : Elapsed 0.005 ms (0.465 ms / 100) 0.459 -> 0.454 ( -1.09%) [ +0.44% +27.45% +0.00% / +2.83% -1.09% +4.36%] index_fill_ strided 3 : Elapsed 0.005 ms (0.461 ms / 100) 0.460 -> 0.455 ( -1.09%) [ +1.30% +33.70% +0.00% / +1.74% -1.09% +0.87%] index_fill_ strided 5 : Elapsed 0.005 ms (0.466 ms / 100) 0.467 -> 0.458 ( -1.93%) [ +0.00% +20.56% +4.07% / +0.64% -1.93% -1.93%] index_fill_ strided 7 : Elapsed 0.005 ms (0.467 ms / 100) 0.477 -> 0.456 ( -4.40%) [ +0.00% +17.61% +1.68% / -2.31% -4.40% -2.73%] index_fill_ strided 8 : Elapsed 0.005 ms (0.477 ms / 100) 0.463 -> 0.454 ( -1.94%) [ +0.43% +21.60% +0.00% / +0.86% -1.94% +7.56%] index_fill_ strided 16 : Elapsed 0.005 ms (0.465 ms / 100) 0.459 -> 0.455 ( -0.87%) [ +1.31% +40.96% +0.00% / +1.96% -0.87% -0.65%] index_fill_ strided 64 : Elapsed 0.005 ms (0.465 ms / 100) 0.462 -> 0.462 ( +0.00%) [ +0.65% +22.08% +0.00% / +7.58% +0.00% +0.43%] index_fill_ strided 100 : Elapsed 0.005 ms (0.465 ms / 100) 0.468 -> 0.455 ( -2.78%) [ +8.12% +19.02% +0.00% / +6.20% -2.78% +4.27%] index_fill_ strided 255 : Elapsed 0.005 ms (0.506 ms / 100) 0.460 -> 0.456 ( -0.87%) [+11.52% +34.78% +0.00% / +1.74% -0.87% +0.65%] index_fill_ strided 256 : Elapsed 0.005 ms (0.513 ms / 100) 0.466 -> 0.453 ( -2.79%) [ +0.00% +31.55% +4.72% / +0.21% -2.79% +8.37%] index_fill_ strided 257 : Elapsed 0.005 ms (0.466 ms / 100) 0.458 -> 0.454 ( -0.87%) [ +1.09% +1.97% +0.00% / +2.84% -0.87% +1.09%] index_fill_ random : Elapsed 0.005 ms (0.463 ms / 100) 0.461 -> 0.456 ( -1.08%) [ +0.00% +1.30% +0.43% / +2.17% -1.08% +1.52%] index_fill_ random_sorted : Elapsed 0.005 ms (0.461 ms / 100) 0.458 -> 0.453 ( -1.09%) [ +1.09% +2.18% +0.00% / +8.73% -1.09% +1.75%] index_fill_ perm : Elapsed 0.005 ms (0.463 ms / 100) 0.458 -> 0.455 ( -0.66%) [ +1.31% +2.18% +0.00% / +2.18% -0.66% +3.06%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.464 ms / 100) B = [5, 200, 500] (stride (1, 5, 1000)) A = [5, 200, 1] (stride (200, 1, 1)) dim = 2 0.574 -> 0.579 ( +0.87%) [ +0.70% +3.14% +0.00% / +0.87% +1.22% +1.22%] index_add_ linear : Elapsed 0.006 ms (0.578 ms / 100) 0.558 -> 0.561 ( +0.54%) [ +0.72% +0.18% +0.00% / +0.54% +0.90% +0.90%] index_copy_ linear : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.578 ( +0.87%) [ +0.87% +0.00% +0.00% / +0.87% +1.05% +1.05%] index_add_ reverse : Elapsed 0.006 ms (0.578 ms / 100) 0.557 -> 0.562 ( +0.90%) [ +0.90% +0.00% +0.00% / +2.33% +0.90% +1.08%] index_copy_ reverse : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.579 ( +1.05%) [ +1.05% +0.70% +0.00% / +1.22% +1.05% +1.40%] index_add_ spread : Elapsed 0.006 ms (0.579 ms / 100) 0.558 -> 0.563 ( +0.90%) [ +1.08% +0.00% +0.00% / +0.90% +0.90% +0.90%] index_copy_ spread : Elapsed 0.006 ms (0.564 ms / 100) 0.573 -> 0.579 ( +1.05%) [ +0.87% +0.17% +0.00% / +1.05% +1.05% +1.05%] index_add_ strided 3 : Elapsed 0.006 ms (0.578 ms / 100) 0.558 -> 0.561 ( +0.54%) [ +0.72% +0.54% +0.00% / +0.54% +0.90% +0.90%] index_copy_ strided 3 : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.579 ( +1.05%) [ +1.05% +0.52% +0.00% / +1.05% +1.05% +1.22%] index_add_ strided 7 : Elapsed 0.006 ms (0.579 ms / 100) 0.557 -> 0.561 ( +0.72%) [ +0.90% +0.18% +0.00% / +0.72% +1.08% +0.90%] index_copy_ strided 7 : Elapsed 0.006 ms (0.562 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +1.05% +0.00% +0.00% / +0.87% +1.05% +0.87%] index_add_ strided 257 : Elapsed 0.006 ms (0.580 ms / 100) 0.559 -> 0.562 ( +0.54%) [ +0.72% +0.00% +0.00% / +0.54% +0.72% +0.89%] index_copy_ strided 257 : Elapsed 0.006 ms (0.563 ms / 100) 0.575 -> 0.579 ( +0.70%) [ +0.70% +0.00% +0.00% / +0.70% +1.39% +1.04%] index_add_ perm : Elapsed 0.006 ms (0.579 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.90% +0.18% +0.00% / +0.72% +0.90% +0.72%] index_copy_ perm : Elapsed 0.006 ms (0.563 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +0.87% +0.17% +0.00% / +0.87% +0.87% +1.05%] index_add_ perm_sorted : Elapsed 0.006 ms (0.579 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.90% +0.00% +0.18% / +0.72% +0.90% +0.90%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.563 ms / 100) 16.840 -> 16.850 ( +0.06%) [ +0.17% +0.02% +0.00% / +0.38% +0.12% +0.06%] index_select const : Elapsed 0.169 ms (16.869 ms / 100) 16.857 -> 16.781 ( -0.45%) [ +0.00% +0.04% +0.17% / -0.15% -0.14% -0.45%] index_select wrap : Elapsed 0.169 ms (16.857 ms / 100) 16.835 -> 16.822 ( -0.08%) [ +0.21% +0.00% +0.23% / +0.49% -0.08% +0.05%] index_select linear : Elapsed 0.169 ms (16.871 ms / 100) 16.788 -> 16.825 ( +0.22%) [ +0.93% +0.45% +0.00% / +0.32% +0.22% +0.37%] index_select reverse : Elapsed 0.169 ms (16.944 ms / 100) 16.776 -> 16.780 ( +0.02%) [ +0.52% +0.55% +0.00% / +0.57% +0.02% +0.27%] index_select skip64 : Elapsed 0.169 ms (16.864 ms / 100) 16.837 -> 16.806 ( -0.18%) [ +0.38% +0.23% +0.00% / +0.33% -0.18% -0.17%] index_select skip256 : Elapsed 0.169 ms (16.901 ms / 100) 16.774 -> 16.822 ( +0.29%) [ +0.29% +0.58% +0.00% / +0.59% +0.36% +0.29%] index_select spread : Elapsed 0.168 ms (16.822 ms / 100) 16.824 -> 16.740 ( -0.50%) [ +0.00% +0.02% +0.16% / +0.31% +0.15% -0.50%] index_select random : Elapsed 0.168 ms (16.824 ms / 100) 16.808 -> 16.833 ( +0.15%) [ +0.00% +0.02% +0.18% / +0.50% +0.18% +0.15%] index_select random_sorted : Elapsed 0.168 ms (16.808 ms / 100) B = [5, 200, 500] (stride (1, 5, 1000)) A = [5, 200, 1] (stride (1, 5, 1000)) dim = 2 0.502 -> 0.498 ( -0.80%) [ +2.19% +1.00% +0.00% / -0.80% +1.79% +3.98%] index_add_ linear : Elapsed 0.005 ms (0.513 ms / 100) good 0.523 -> 0.491 ( -6.12%) [ +0.00% +5.93% +5.16% / -4.78% -6.12% -5.35%] index_copy_ linear : Elapsed 0.005 ms (0.523 ms / 100) 0.498 -> 0.494 ( -0.80%) [ +0.00% +2.41% +0.20% / -0.40% +0.60% -0.80%] index_add_ reverse : Elapsed 0.005 ms (0.498 ms / 100) 0.502 -> 0.489 ( -2.59%) [ +0.00% +10.36% +4.58% / -1.00% -2.59% -1.20%] index_copy_ reverse : Elapsed 0.005 ms (0.502 ms / 100) 0.498 -> 0.491 ( -1.41%) [ +0.00% +4.02% +0.60% / +0.20% -1.41% +0.60%] index_add_ spread : Elapsed 0.005 ms (0.498 ms / 100) 0.495 -> 0.487 ( -1.62%) [ +0.81% +8.08% +0.00% / +2.83% -1.62% +8.48%] index_copy_ spread : Elapsed 0.005 ms (0.499 ms / 100) 0.500 -> 0.490 ( -2.00%) [ +0.00% +10.80% +1.60% / +0.20% -2.00% +0.40%] index_add_ strided 3 : Elapsed 0.005 ms (0.500 ms / 100) 0.501 -> 0.500 ( -0.20%) [ +0.00% +4.79% +3.59% / -0.20% +1.80% +4.59%] index_copy_ strided 3 : Elapsed 0.005 ms (0.501 ms / 100) 0.495 -> 0.502 ( +1.41%) [ +0.00% +12.32% +6.87% / +1.41% +4.44% +18.38%] index_add_ strided 7 : Elapsed 0.005 ms (0.495 ms / 100) 0.496 -> 0.489 ( -1.41%) [ +2.02% +5.24% +0.00% / +6.65% -1.41% +2.22%] index_copy_ strided 7 : Elapsed 0.005 ms (0.506 ms / 100) 0.500 -> 0.489 ( -2.20%) [ +5.20% +1.20% +0.00% / +4.40% -2.20% +7.20%] index_add_ strided 257 : Elapsed 0.005 ms (0.526 ms / 100) 0.494 -> 0.484 ( -2.02%) [+12.75% +1.82% +0.00% / +2.63% -2.02% +1.01%] index_copy_ strided 257 : Elapsed 0.006 ms (0.557 ms / 100) 0.499 -> 0.487 ( -2.40%) [ +0.00% +2.00% +1.00% / -0.60% -2.40% -1.00%] index_add_ perm : Elapsed 0.005 ms (0.499 ms / 100) 0.496 -> 0.490 ( -1.21%) [ +0.81% +1.41% +0.00% / -0.40% -1.21% -0.60%] index_copy_ perm : Elapsed 0.005 ms (0.500 ms / 100) 0.496 -> 0.489 ( -1.41%) [ +0.81% +9.88% +0.00% / +4.03% -1.41% +1.41%] index_add_ perm_sorted : Elapsed 0.005 ms (0.500 ms / 100) 0.492 -> 0.485 ( -1.42%) [ +6.91% +2.03% +0.00% / +1.22% -1.42% +1.02%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.526 ms / 100) 9.148 -> 8.896 ( -2.75%) [ +0.00% +0.60% +0.75% / -1.03% -2.75% -2.62%] index_select const : Elapsed 0.091 ms (9.148 ms / 100) 8.896 -> 8.883 ( -0.15%) [ +0.00% +1.56% +0.55% / +0.67% -0.15% +0.71%] index_select wrap : Elapsed 0.089 ms (8.896 ms / 100) 8.954 -> 8.850 ( -1.16%) [ +0.47% +0.82% +0.00% / -0.55% -1.16% -0.88%] index_select linear : Elapsed 0.090 ms (8.996 ms / 100) 8.910 -> 8.801 ( -1.22%) [ +0.67% +0.63% +0.00% / -0.33% -1.22% -1.05%] index_select reverse : Elapsed 0.090 ms (8.970 ms / 100) 8.914 -> 8.690 ( -2.51%) [ +0.50% +0.00% +0.35% / +0.98% -1.33% -2.51%] index_select skip64 : Elapsed 0.090 ms (8.959 ms / 100) 8.990 -> 8.867 ( -1.37%) [ +0.48% +0.00% +0.06% / -0.67% -1.37% -0.59%] index_select skip256 : Elapsed 0.090 ms (9.033 ms / 100) 8.965 -> 8.855 ( -1.23%) [ +0.00% +0.31% +0.55% / +0.00% -0.62% -1.23%] index_select spread : Elapsed 0.090 ms (8.965 ms / 100) 8.960 -> 8.889 ( -0.79%) [ +0.00% +0.28% +0.28% / -0.79% +0.07% +0.02%] index_select random : Elapsed 0.090 ms (8.960 ms / 100) 8.951 -> 8.747 ( -2.28%) [ +0.13% +0.00% +0.10% / +0.21% -2.28% -1.46%] index_select random_sorted : Elapsed 0.090 ms (8.963 ms / 100) out_shape = [500, 1, 5] in_shape = [200, 1, 5] idx_dim = 0 B = [500, 1, 5] (stride (5, 1, 1)) A = [200, 1, 5] (stride (5, 1000, 1)) dim = 0 0.466 -> 0.473 ( +1.50%) [ +2.58% +2.36% +0.00% / +1.50% +10.30% +4.72%] index_add_ linear : Elapsed 0.005 ms (0.478 ms / 100) 0.467 -> 0.465 ( -0.43%) [ +0.43% +2.57% +0.00% / +0.21% -0.43% +1.50%] index_copy_ linear : Elapsed 0.005 ms (0.469 ms / 100) 0.470 -> 0.468 ( -0.43%) [ +0.21% +2.55% +0.00% / +0.21% -0.43% -0.43%] index_add_ reverse : Elapsed 0.005 ms (0.471 ms / 100) 0.468 -> 0.464 ( -0.85%) [ +0.64% +1.50% +0.00% / +0.21% -0.85% +0.64%] index_copy_ reverse : Elapsed 0.005 ms (0.471 ms / 100) 0.466 -> 0.466 ( +0.00%) [ +8.80% +1.93% +0.00% / +0.86% +0.00% +1.93%] index_add_ spread : Elapsed 0.005 ms (0.507 ms / 100) 0.463 -> 0.465 ( +0.43%) [ +1.94% +1.08% +0.00% / +5.18% +0.43% +2.16%] index_copy_ spread : Elapsed 0.005 ms (0.472 ms / 100) 0.465 -> 0.468 ( +0.65%) [ +1.72% +3.44% +0.00% / +9.03% +0.65% +2.58%] index_add_ strided 3 : Elapsed 0.005 ms (0.473 ms / 100) 0.461 -> 0.464 ( +0.65%) [ +3.04% +2.82% +0.00% / +21.26% +0.65% +10.20%] index_copy_ strided 3 : Elapsed 0.005 ms (0.475 ms / 100) 0.468 -> 0.463 ( -1.07%) [ +0.00% +2.35% +2.14% / +6.62% -1.07% +3.21%] index_add_ strided 7 : Elapsed 0.005 ms (0.468 ms / 100) 0.463 -> 0.464 ( +0.22%) [ +0.65% +4.32% +0.00% / +3.46% +0.22% +0.86%] index_copy_ strided 7 : Elapsed 0.005 ms (0.466 ms / 100) 0.465 -> 0.468 ( +0.65%) [ +1.29% +3.01% +0.00% / +0.86% +0.65% +7.31%] index_add_ strided 257 : Elapsed 0.005 ms (0.471 ms / 100) 0.462 -> 0.464 ( +0.43%) [ +1.30% +2.81% +0.00% / +3.68% +0.43% +8.23%] index_copy_ strided 257 : Elapsed 0.005 ms (0.468 ms / 100) 0.466 -> 0.469 ( +0.64%) [ +9.01% +2.79% +0.00% / +11.80% +0.64% +10.52%] index_add_ perm : Elapsed 0.005 ms (0.508 ms / 100) bad 0.462 -> 0.508 ( +9.96%) [ +0.65% +2.81% +0.00% / +12.55% +9.96% +18.18%] index_copy_ perm : Elapsed 0.005 ms (0.465 ms / 100) 0.468 -> 0.461 ( -1.50%) [ +0.00% +2.78% +5.34% / +1.28% -1.50% +0.21%] index_add_ perm_sorted : Elapsed 0.005 ms (0.468 ms / 100) 0.465 -> 0.464 ( -0.22%) [ +0.00% +2.15% +7.31% / +2.80% -0.22% +1.51%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.465 ms / 100) 0.515 -> 0.540 ( +4.85%) [ +9.90% +0.00% +8.93% / +5.05% +4.85% +4.85%] index_select const : Elapsed 0.006 ms (0.566 ms / 100) 0.519 -> 0.525 ( +1.16%) [ +6.94% +0.00% +6.74% / +3.47% +1.16% +5.59%] index_select wrap : Elapsed 0.006 ms (0.555 ms / 100) 0.518 -> 0.533 ( +2.90%) [ +5.21% +0.00% +12.36% / +4.25% +2.90% +3.47%] index_select linear : Elapsed 0.005 ms (0.545 ms / 100) 0.520 -> 0.528 ( +1.54%) [ +4.62% +0.00% +5.38% / +3.65% +1.54% +3.27%] index_select reverse : Elapsed 0.005 ms (0.544 ms / 100) 0.528 -> 0.535 ( +1.33%) [ +3.41% +0.00% +0.00% / +2.08% +1.33% +1.33%] index_select skip64 : Elapsed 0.005 ms (0.546 ms / 100) 0.523 -> 0.537 ( +2.68%) [+10.52% +0.00% +0.00% / +2.68% +4.21% +2.87%] index_select skip256 : Elapsed 0.006 ms (0.578 ms / 100) 0.520 -> 0.539 ( +3.65%) [ +4.23% +0.00% +0.38% / +3.65% +10.77% +17.12%] index_select spread : Elapsed 0.005 ms (0.542 ms / 100) 0.516 -> 0.533 ( +3.29%) [ +9.88% +0.00% +1.16% / +3.29% +17.64% +3.49%] index_select strided 3 : Elapsed 0.006 ms (0.567 ms / 100) 0.518 -> 0.536 ( +3.47%) [ +5.41% +0.00% +1.16% / +3.67% +5.98% +3.47%] index_select strided 5 : Elapsed 0.005 ms (0.546 ms / 100) 0.525 -> 0.531 ( +1.14%) [ +4.19% +0.00% +0.00% / +5.52% +1.14% +6.86%] index_select strided 7 : Elapsed 0.005 ms (0.547 ms / 100) 0.523 -> 0.534 ( +2.10%) [ +6.69% +0.00% +0.38% / +2.10% +2.29% +14.72%] index_select strided 8 : Elapsed 0.006 ms (0.558 ms / 100) 0.517 -> 0.540 ( +4.45%) [+10.64% +0.00% +1.16% / +8.90% +4.45% +20.50%] index_select strided 16 : Elapsed 0.006 ms (0.572 ms / 100) 0.516 -> 0.539 ( +4.46%) [ +5.81% +0.00% +2.33% / +10.85% +4.46% +10.08%] index_select strided 64 : Elapsed 0.005 ms (0.546 ms / 100) 0.518 -> 0.538 ( +3.86%) [ +5.41% +4.25% +0.00% / +19.31% +3.86% +33.40%] index_select strided 100 : Elapsed 0.005 ms (0.546 ms / 100) 0.526 -> 0.539 ( +2.47%) [ +3.04% +6.46% +0.00% / +2.66% +2.47% +8.56%] index_select random : Elapsed 0.005 ms (0.542 ms / 100) 0.521 -> 0.538 ( +3.26%) [ +9.02% +0.00% +1.15% / +3.26% +3.26% +18.43%] index_select random_sorted : Elapsed 0.006 ms (0.568 ms / 100) B = [500, 1, 5] (stride (5, 1, 1)) A = [200, 1, 5] (stride (1, 200, 200)) dim = 0 0.467 -> 0.469 ( +0.43%) [ +0.00% +1.93% +0.64% / +4.93% +0.43% +8.78%] index_add_ linear : Elapsed 0.005 ms (0.467 ms / 100) 0.464 -> 0.466 ( +0.43%) [ +1.08% +2.16% +0.00% / +0.86% +0.43% +25.43%] index_copy_ linear : Elapsed 0.005 ms (0.469 ms / 100) 0.469 -> 0.468 ( -0.21%) [ +5.33% +1.07% +0.00% / +0.43% -0.21% +7.46%] index_add_ reverse : Elapsed 0.005 ms (0.494 ms / 100) 0.464 -> 0.462 ( -0.43%) [ +0.00% +1.94% +0.22% / +3.23% -0.43% +1.72%] index_copy_ reverse : Elapsed 0.005 ms (0.464 ms / 100) 0.467 -> 0.462 ( -1.07%) [ +3.00% +2.57% +0.00% / +0.64% -1.07% +1.93%] index_add_ spread : Elapsed 0.005 ms (0.481 ms / 100) 0.469 -> 0.460 ( -1.92%) [ +0.00% +1.28% +3.20% / +1.28% -1.92% +1.07%] index_copy_ spread : Elapsed 0.005 ms (0.469 ms / 100) 0.466 -> 0.463 ( -0.64%) [ +0.00% +2.36% +18.88% / +1.50% -0.64% +1.50%] index_add_ strided 3 : Elapsed 0.005 ms (0.466 ms / 100) 0.465 -> 0.473 ( +1.72%) [ +0.00% +2.15% +1.94% / +2.58% +2.15% +1.72%] index_copy_ strided 3 : Elapsed 0.005 ms (0.465 ms / 100) 0.468 -> 0.463 ( -1.07%) [ +7.05% +1.92% +0.00% / +0.21% -1.07% +0.43%] index_add_ strided 7 : Elapsed 0.005 ms (0.501 ms / 100) 0.467 -> 0.460 ( -1.50%) [ +0.00% +1.71% +6.00% / +3.43% -1.50% +0.00%] index_copy_ strided 7 : Elapsed 0.005 ms (0.467 ms / 100) 0.465 -> 0.460 ( -1.08%) [ +0.22% +3.01% +0.00% / +0.43% -1.08% +1.72%] index_add_ strided 257 : Elapsed 0.005 ms (0.466 ms / 100) 0.459 -> 0.468 ( +1.96%) [ +2.40% +3.49% +0.00% / +3.05% +1.96% +1.96%] index_copy_ strided 257 : Elapsed 0.005 ms (0.470 ms / 100) 0.464 -> 0.468 ( +0.86%) [ +0.00% +3.23% +0.22% / +0.86% +7.33% +1.72%] index_add_ perm : Elapsed 0.005 ms (0.464 ms / 100) 0.468 -> 0.470 ( +0.43%) [ +5.13% +7.69% +0.00% / +0.43% +12.61% +0.64%] index_copy_ perm : Elapsed 0.005 ms (0.492 ms / 100) 0.462 -> 0.468 ( +1.30%) [ +1.52% +3.46% +0.00% / +1.73% +9.31% +1.30%] index_add_ perm_sorted : Elapsed 0.005 ms (0.469 ms / 100) 0.465 -> 0.465 ( +0.00%) [ +0.86% +2.15% +0.00% / +2.80% +0.00% +0.86%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.469 ms / 100) 0.519 -> 0.539 ( +3.85%) [ +5.39% +0.00% +1.35% / +3.85% +4.43% +4.05%] index_select const : Elapsed 0.005 ms (0.547 ms / 100) 0.521 -> 0.532 ( +2.11%) [ +4.80% +0.19% +0.00% / +5.76% +3.65% +2.11%] index_select wrap : Elapsed 0.005 ms (0.546 ms / 100) 0.519 -> 0.534 ( +2.89%) [+11.56% +0.00% +1.73% / +15.61% +8.86% +2.89%] index_select linear : Elapsed 0.006 ms (0.579 ms / 100) bad 0.522 -> 0.556 ( +6.51%) [ +4.41% +1.53% +0.00% / +7.47% +18.39% +6.51%] index_select reverse : Elapsed 0.005 ms (0.545 ms / 100) 0.522 -> 0.528 ( +1.15%) [ +3.83% +0.57% +0.00% / +11.11% +1.15% +6.90%] index_select skip64 : Elapsed 0.005 ms (0.542 ms / 100) 0.525 -> 0.534 ( +1.71%) [ +3.43% +0.00% +0.00% / +1.90% +1.71% +10.86%] index_select skip256 : Elapsed 0.005 ms (0.543 ms / 100) 0.522 -> 0.535 ( +2.49%) [ +5.36% +0.19% +0.00% / +3.26% +2.49% +13.98%] index_select spread : Elapsed 0.006 ms (0.550 ms / 100) 0.525 -> 0.530 ( +0.95%) [ +3.62% +0.00% +0.76% / +2.86% +1.33% +0.95%] index_select strided 3 : Elapsed 0.005 ms (0.544 ms / 100) 0.524 -> 0.530 ( +1.15%) [ +4.39% +0.00% +4.96% / +4.01% +1.15% +1.72%] index_select strided 5 : Elapsed 0.005 ms (0.547 ms / 100) 0.522 -> 0.536 ( +2.68%) [ +6.32% +0.00% +0.00% / +3.45% +2.68% +2.68%] index_select strided 7 : Elapsed 0.006 ms (0.555 ms / 100) 0.523 -> 0.535 ( +2.29%) [ +3.25% +0.00% +2.29% / +3.25% +6.69% +2.29%] index_select strided 8 : Elapsed 0.005 ms (0.540 ms / 100) 0.524 -> 0.531 ( +1.34%) [ +3.82% +0.00% +6.11% / +2.48% +1.34% +1.91%] index_select strided 16 : Elapsed 0.005 ms (0.544 ms / 100) 0.547 -> 0.533 ( -2.56%) [ +0.00% +2.01% +7.50% / -1.28% -1.65% -2.56%] index_select strided 64 : Elapsed 0.005 ms (0.547 ms / 100) 0.538 -> 0.533 ( -0.93%) [ +6.51% +0.00% +0.74% / +2.79% -0.37% -0.93%] index_select strided 100 : Elapsed 0.006 ms (0.573 ms / 100) 0.520 -> 0.533 ( +2.50%) [ +4.62% +0.00% +0.96% / +2.69% +2.50% +2.69%] index_select random : Elapsed 0.005 ms (0.544 ms / 100) 0.522 -> 0.535 ( +2.49%) [+10.34% +0.19% +0.00% / +3.26% +4.41% +2.49%] index_select random_sorted : Elapsed 0.006 ms (0.576 ms / 100) B = [500, 1, 5] (stride (1, 500, 500)) A = [200, 1, 5] (stride (5, 5, 1)) dim = 0 0.469 -> 0.469 ( +0.00%) [ +3.41% +1.07% +0.00% / +0.64% +10.87% +0.00%] index_add_ linear : Elapsed 0.005 ms (0.485 ms / 100) 0.466 -> 0.469 ( +0.64%) [ +1.93% +1.07% +0.00% / +1.29% +6.01% +0.64%] index_copy_ linear : Elapsed 0.005 ms (0.475 ms / 100) 0.464 -> 0.472 ( +1.72%) [ +1.29% +3.66% +0.00% / +1.72% +12.72% +1.94%] index_add_ reverse : Elapsed 0.005 ms (0.470 ms / 100) 0.464 -> 0.466 ( +0.43%) [ +8.62% +1.51% +0.00% / +1.72% +0.43% +2.16%] index_copy_ reverse : Elapsed 0.005 ms (0.504 ms / 100) 0.466 -> 0.469 ( +0.64%) [+14.81% +2.58% +0.00% / +0.86% +0.64% +1.29%] index_add_ spread : Elapsed 0.005 ms (0.535 ms / 100) 0.471 -> 0.462 ( -1.91%) [+10.19% +0.00% +0.85% / +0.42% -1.91% +0.42%] index_copy_ spread : Elapsed 0.005 ms (0.519 ms / 100) 0.467 -> 0.467 ( +0.00%) [ +0.00% +2.36% +4.07% / +6.85% +0.00% +1.07%] index_add_ strided 3 : Elapsed 0.005 ms (0.467 ms / 100) 0.465 -> 0.463 ( -0.43%) [ +1.72% +2.80% +0.00% / +10.75% -0.43% +1.51%] index_copy_ strided 3 : Elapsed 0.005 ms (0.473 ms / 100) 0.468 -> 0.464 ( -0.85%) [ +2.78% +2.56% +0.00% / +0.43% -0.85% +15.38%] index_add_ strided 7 : Elapsed 0.005 ms (0.481 ms / 100) 0.466 -> 0.460 ( -1.29%) [ +1.29% +1.93% +0.00% / +0.64% -1.29% +9.23%] index_copy_ strided 7 : Elapsed 0.005 ms (0.472 ms / 100) 0.467 -> 0.468 ( +0.21%) [ +3.85% +2.14% +0.00% / +0.21% +3.21% +0.21%] index_add_ strided 257 : Elapsed 0.005 ms (0.485 ms / 100) 0.465 -> 0.468 ( +0.65%) [ +0.65% +2.37% +0.00% / +0.86% +5.38% +0.65%] index_copy_ strided 257 : Elapsed 0.005 ms (0.468 ms / 100) 0.469 -> 0.469 ( +0.00%) [ +0.43% +2.35% +0.00% / +0.00% +11.09% +0.85%] index_add_ perm : Elapsed 0.005 ms (0.471 ms / 100) 0.466 -> 0.465 ( -0.21%) [ +1.50% +2.79% +0.00% / +1.93% -0.21% +1.07%] index_copy_ perm : Elapsed 0.005 ms (0.473 ms / 100) 0.468 -> 0.462 ( -1.28%) [ +0.00% +2.14% +0.21% / +0.43% -1.28% +2.99%] index_add_ perm_sorted : Elapsed 0.005 ms (0.468 ms / 100) 0.466 -> 0.462 ( -0.86%) [ +0.64% +2.36% +0.00% / +0.64% -0.86% +0.86%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.469 ms / 100) 0.521 -> 0.530 ( +1.73%) [ +5.37% +0.00% +0.77% / +3.07% +5.95% +1.73%] index_select const : Elapsed 0.005 ms (0.549 ms / 100) 0.548 -> 0.527 ( -3.83%) [ +0.00% +4.74% +1.46% / -2.37% -3.83% -2.92%] index_select wrap : Elapsed 0.005 ms (0.548 ms / 100) 0.548 -> 0.529 ( -3.47%) [ +0.00% +0.55% +1.64% / -1.64% -3.47% -3.47%] index_select linear : Elapsed 0.005 ms (0.548 ms / 100) 0.520 -> 0.528 ( +1.54%) [ +5.77% +0.00% +10.19% / +3.08% +1.54% +3.08%] index_select reverse : Elapsed 0.006 ms (0.550 ms / 100) 0.521 -> 0.532 ( +2.11%) [ +6.14% +0.00% +0.19% / +2.50% +2.11% +2.11%] index_select skip64 : Elapsed 0.006 ms (0.553 ms / 100) 0.520 -> 0.525 ( +0.96%) [ +5.19% +0.96% +0.00% / +2.88% +0.96% +2.31%] index_select skip256 : Elapsed 0.005 ms (0.547 ms / 100) 0.525 -> 0.528 ( +0.57%) [ +3.81% +0.00% +0.57% / +1.90% +0.57% +0.76%] index_select spread : Elapsed 0.005 ms (0.545 ms / 100) 0.526 -> 0.529 ( +0.57%) [ +2.66% +0.19% +0.00% / +1.14% +8.56% +0.57%] index_select strided 3 : Elapsed 0.005 ms (0.540 ms / 100) 0.522 -> 0.531 ( +1.72%) [ +5.75% +0.00% +0.19% / +2.30% +15.33% +1.72%] index_select strided 5 : Elapsed 0.006 ms (0.552 ms / 100) 0.520 -> 0.530 ( +1.92%) [+10.58% +0.00% +0.77% / +2.88% +2.31% +1.92%] index_select strided 7 : Elapsed 0.006 ms (0.575 ms / 100) 0.521 -> 0.525 ( +0.77%) [ +5.18% +1.34% +0.00% / +2.69% +0.77% +2.11%] index_select strided 8 : Elapsed 0.005 ms (0.548 ms / 100) 0.517 -> 0.533 ( +3.09%) [+12.19% +0.00% +0.39% / +12.77% +3.09% +3.09%] index_select strided 16 : Elapsed 0.006 ms (0.580 ms / 100) 0.521 -> 0.530 ( +1.73%) [ +9.21% +11.32% +0.00% / +9.79% +1.73% +15.74%] index_select strided 64 : Elapsed 0.006 ms (0.569 ms / 100) 0.523 -> 0.530 ( +1.34%) [ +6.31% +2.87% +0.00% / +14.91% +1.34% +8.41%] index_select strided 100 : Elapsed 0.006 ms (0.556 ms / 100) 0.523 -> 0.529 ( +1.15%) [ +4.21% +0.00% +0.00% / +1.91% +1.15% +14.91%] index_select random : Elapsed 0.005 ms (0.545 ms / 100) 0.522 -> 0.531 ( +1.72%) [ +5.36% +0.00% +5.56% / +2.68% +1.92% +1.72%] index_select random_sorted : Elapsed 0.006 ms (0.550 ms / 100) out_shape = [200, 500, 5] in_shape = [200, 1, 5] idx_dim = 1 B = [200, 500, 5] (stride (1, 1000, 200)) A = [200, 1, 5] (stride (5, 5, 1)) dim = 1 0.500 -> 0.490 ( -2.00%) [ +4.60% +6.80% +0.00% / +1.80% -2.00% +0.60%] index_add_ linear : Elapsed 0.005 ms (0.523 ms / 100) 0.505 -> 0.487 ( -3.56%) [ +2.38% +3.37% +0.00% / +2.18% -3.56% -2.18%] index_copy_ linear : Elapsed 0.005 ms (0.517 ms / 100) 0.495 -> 0.485 ( -2.02%) [ +9.09% +13.54% +0.00% / +2.83% -2.02% +0.00%] index_add_ reverse : Elapsed 0.005 ms (0.540 ms / 100) 0.496 -> 0.494 ( -0.40%) [ +1.61% +2.82% +0.00% / +1.61% +3.02% -0.40%] index_copy_ reverse : Elapsed 0.005 ms (0.504 ms / 100) 0.492 -> 0.500 ( +1.63%) [ +9.55% +14.23% +0.00% / +2.85% +3.86% +1.63%] index_add_ spread : Elapsed 0.005 ms (0.539 ms / 100) 0.494 -> 0.485 ( -1.82%) [ +1.62% +2.23% +0.00% / +1.21% -1.82% +0.81%] index_copy_ spread : Elapsed 0.005 ms (0.502 ms / 100) 0.492 -> 0.496 ( +0.81%) [+10.98% +14.63% +0.00% / +3.25% +4.88% +0.81%] index_add_ strided 3 : Elapsed 0.005 ms (0.546 ms / 100) 0.503 -> 0.482 ( -4.17%) [ +0.20% +0.00% +0.20% / -0.60% -4.17% -2.58%] index_copy_ strided 3 : Elapsed 0.005 ms (0.504 ms / 100) 0.492 -> 0.501 ( +1.83%) [+11.38% +14.23% +0.00% / +6.91% +6.71% +1.83%] index_add_ strided 7 : Elapsed 0.005 ms (0.548 ms / 100) 0.495 -> 0.485 ( -2.02%) [ +0.81% +0.81% +0.00% / +4.04% -2.02% +0.40%] index_copy_ strided 7 : Elapsed 0.005 ms (0.499 ms / 100) 0.496 -> 0.495 ( -0.20%) [ +9.07% +2.82% +0.00% / +11.09% +6.25% -0.20%] index_add_ strided 257 : Elapsed 0.005 ms (0.541 ms / 100) 0.495 -> 0.486 ( -1.82%) [ +1.01% +1.01% +0.00% / +0.81% -1.82% -1.01%] index_copy_ strided 257 : Elapsed 0.005 ms (0.500 ms / 100) 0.497 -> 0.491 ( -1.21%) [ +0.00% +1.61% +0.00% / +8.85% -1.21% +1.01%] index_add_ perm : Elapsed 0.005 ms (0.497 ms / 100) 0.497 -> 0.485 ( -2.41%) [ +0.60% +0.00% +4.63% / +0.20% -2.41% -0.20%] index_copy_ perm : Elapsed 0.005 ms (0.500 ms / 100) 0.493 -> 0.489 ( -0.81%) [ +1.01% +3.85% +0.00% / +2.64% -0.81% +1.83%] index_add_ perm_sorted : Elapsed 0.005 ms (0.498 ms / 100) 0.500 -> 0.484 ( -3.20%) [ +0.00% +0.20% +1.60% / -0.20% -3.20% -0.40%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.500 ms / 100) 8.219 -> 8.216 ( -0.04%) [ +0.55% +0.00% +0.18% / +0.32% +0.02% -0.04%] index_select const : Elapsed 0.083 ms (8.264 ms / 100) 8.184 -> 8.191 ( +0.09%) [ +0.50% +0.00% +0.35% / +0.60% +0.09% +0.13%] index_select wrap : Elapsed 0.082 ms (8.225 ms / 100) 8.236 -> 8.186 ( -0.61%) [ +0.33% +0.19% +0.00% / +0.06% -0.61% -0.35%] index_select linear : Elapsed 0.083 ms (8.263 ms / 100) 8.177 -> 8.190 ( +0.16%) [ +0.46% +0.00% +0.15% / +0.16% +0.39% +0.55%] index_select reverse : Elapsed 0.082 ms (8.215 ms / 100) 8.212 -> 8.169 ( -0.52%) [ +0.21% +0.15% +0.00% / +0.39% -0.52% -0.16%] index_select skip64 : Elapsed 0.082 ms (8.229 ms / 100) 8.201 -> 8.217 ( +0.20%) [ +0.24% +0.01% +0.00% / +0.24% +0.24% +0.20%] index_select skip256 : Elapsed 0.082 ms (8.221 ms / 100) 8.190 -> 8.181 ( -0.11%) [ +0.16% +0.11% +0.00% / +0.11% -0.10% -0.11%] index_select spread : Elapsed 0.082 ms (8.203 ms / 100) 8.225 -> 8.234 ( +0.11%) [ +0.46% +0.00% +0.02% / +0.26% +0.22% +0.11%] index_select random : Elapsed 0.083 ms (8.263 ms / 100) 8.217 -> 8.177 ( -0.49%) [ +0.33% +0.13% +0.00% / +0.29% -0.49% -0.38%] index_select random_sorted : Elapsed 0.082 ms (8.244 ms / 100) B = [200, 500, 5] (stride (1, 1000, 200)) A = [200, 1, 5] (stride (5, 1000, 1)) dim = 1 0.500 -> 0.503 ( +0.60%) [ +0.60% +22.80% +0.00% / +0.80% +4.20% +0.60%] index_add_ linear : Elapsed 0.005 ms (0.503 ms / 100) 0.500 -> 0.490 ( -2.00%) [ +0.40% +0.00% +5.60% / +0.00% -2.00% -0.80%] index_copy_ linear : Elapsed 0.005 ms (0.502 ms / 100) 0.495 -> 0.498 ( +0.61%) [ +1.21% +22.83% +0.00% / +1.62% +3.64% +0.61%] index_add_ reverse : Elapsed 0.005 ms (0.501 ms / 100) 0.493 -> 0.487 ( -1.22%) [ +3.65% +1.83% +0.00% / +1.62% -1.22% +0.00%] index_copy_ reverse : Elapsed 0.005 ms (0.511 ms / 100) 0.501 -> 0.493 ( -1.60%) [ +5.79% +25.75% +0.00% / +0.20% -1.60% +48.90%] index_add_ spread : Elapsed 0.005 ms (0.530 ms / 100) 0.507 -> 0.495 ( -2.37%) [ +0.00% +2.17% +3.75% / -1.38% -2.37% +24.26%] index_copy_ spread : Elapsed 0.005 ms (0.507 ms / 100) 0.502 -> 0.488 ( -2.79%) [ +7.97% +19.72% +0.00% / +1.79% -2.79% -1.00%] index_add_ strided 3 : Elapsed 0.005 ms (0.542 ms / 100) 0.502 -> 0.486 ( -3.19%) [ +0.00% +4.38% +2.39% / -0.60% -3.19% +2.19%] index_copy_ strided 3 : Elapsed 0.005 ms (0.502 ms / 100) 0.500 -> 0.484 ( -3.20%) [ +8.00% +22.40% +0.00% / +0.80% -3.20% -1.20%] index_add_ strided 7 : Elapsed 0.005 ms (0.540 ms / 100) 0.505 -> 0.481 ( -4.75%) [ +0.00% +2.77% +2.77% / +2.97% -4.75% -0.99%] index_copy_ strided 7 : Elapsed 0.005 ms (0.505 ms / 100) 0.501 -> 0.486 ( -2.99%) [ +8.38% +21.76% +0.00% / +0.80% -2.99% -0.40%] index_add_ strided 257 : Elapsed 0.005 ms (0.543 ms / 100) 0.505 -> 0.488 ( -3.37%) [ +0.00% +9.31% +0.00% / +7.33% -3.37% +5.15%] index_copy_ strided 257 : Elapsed 0.005 ms (0.505 ms / 100) 0.500 -> 0.487 ( -2.60%) [ +0.00% +27.40% +5.00% / +1.00% -2.60% +1.60%] index_add_ perm : Elapsed 0.005 ms (0.500 ms / 100) 0.496 -> 0.484 ( -2.42%) [ +1.21% +1.01% +0.00% / +8.47% -2.42% +2.82%] index_copy_ perm : Elapsed 0.005 ms (0.502 ms / 100) 0.496 -> 0.488 ( -1.61%) [ +1.01% +6.25% +0.00% / +2.62% -1.61% +9.68%] index_add_ perm_sorted : Elapsed 0.005 ms (0.501 ms / 100) 0.500 -> 0.485 ( -3.00%) [ +0.40% +0.00% +1.80% / -0.80% -3.00% -0.40%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.502 ms / 100) 8.189 -> 8.244 ( +0.67%) [ +0.57% +0.00% +0.45% / +0.67% +0.70% +0.73%] index_select const : Elapsed 0.082 ms (8.236 ms / 100) 8.185 -> 8.186 ( +0.01%) [ +0.15% +0.00% +0.22% / +0.15% +0.09% +0.01%] index_select wrap : Elapsed 0.082 ms (8.197 ms / 100) 8.209 -> 8.203 ( -0.07%) [ +0.24% +0.00% +0.04% / +0.10% -0.07% -0.02%] index_select linear : Elapsed 0.082 ms (8.229 ms / 100) 8.185 -> 8.193 ( +0.10%) [ +0.15% +0.00% +0.06% / +0.10% +0.83% +0.78%] index_select reverse : Elapsed 0.082 ms (8.197 ms / 100) 8.219 -> 8.186 ( -0.40%) [ +0.06% +0.00% +0.06% / +0.05% -0.34% -0.40%] index_select skip64 : Elapsed 0.082 ms (8.224 ms / 100) 8.169 -> 8.194 ( +0.31%) [ +0.61% +0.00% +0.38% / +0.31% +0.36% +0.47%] index_select skip256 : Elapsed 0.082 ms (8.219 ms / 100) 8.181 -> 8.195 ( +0.17%) [ +0.53% +0.00% +0.07% / +0.21% +0.40% +0.17%] index_select spread : Elapsed 0.082 ms (8.224 ms / 100) 8.224 -> 8.224 ( +0.00%) [ +0.35% +0.04% +0.00% / +0.00% +0.74% +0.72%] index_select random : Elapsed 0.083 ms (8.253 ms / 100) 8.214 -> 8.178 ( -0.44%) [ +0.29% +0.00% +0.04% / +0.15% -0.44% -0.43%] index_select random_sorted : Elapsed 0.082 ms (8.238 ms / 100) B = [200, 500, 5] (stride (1, 200, 100000)) A = [200, 1, 5] (stride (1, 1000, 200)) dim = 1 0.501 -> 0.489 ( -2.40%) [ +0.00% +2.59% +0.40% / +8.18% -2.40% -0.40%] index_add_ linear : Elapsed 0.005 ms (0.501 ms / 100) 0.498 -> 0.491 ( -1.41%) [ +4.82% +0.40% +0.00% / +1.41% -1.41% -0.40%] index_copy_ linear : Elapsed 0.005 ms (0.522 ms / 100) 0.500 -> 0.499 ( -0.20%) [ +1.20% +0.80% +0.00% / +1.00% +5.80% -0.20%] index_add_ reverse : Elapsed 0.005 ms (0.506 ms / 100) 0.491 -> 0.486 ( -1.02%) [ +2.44% +0.81% +0.00% / +3.87% -1.02% +11.61%] index_copy_ reverse : Elapsed 0.005 ms (0.503 ms / 100) 0.500 -> 0.488 ( -2.40%) [ +0.00% +3.00% +0.20% / +10.00% -2.40% -0.60%] index_add_ spread : Elapsed 0.005 ms (0.500 ms / 100) 0.493 -> 0.484 ( -1.83%) [ +1.42% +2.64% +0.00% / +2.64% -1.83% +0.41%] index_copy_ spread : Elapsed 0.005 ms (0.500 ms / 100) 0.503 -> 0.486 ( -3.38%) [ +0.00% +1.19% +0.20% / +0.99% -3.38% +1.19%] index_add_ strided 3 : Elapsed 0.005 ms (0.503 ms / 100) 0.501 -> 0.483 ( -3.59%) [ +0.40% +0.00% +6.59% / +0.60% -3.59% +4.79%] index_copy_ strided 3 : Elapsed 0.005 ms (0.503 ms / 100) 0.502 -> 0.485 ( -3.39%) [ +0.00% +1.59% +6.37% / +0.20% -3.39% -0.20%] index_add_ strided 7 : Elapsed 0.005 ms (0.502 ms / 100) 0.491 -> 0.492 ( +0.20%) [ +7.74% +3.26% +0.00% / +1.02% +0.61% +0.20%] index_copy_ strided 7 : Elapsed 0.005 ms (0.529 ms / 100) 0.498 -> 0.483 ( -3.01%) [ +0.20% +2.81% +0.00% / +4.42% -3.01% +0.00%] index_add_ strided 257 : Elapsed 0.005 ms (0.499 ms / 100) 0.490 -> 0.482 ( -1.63%) [ +3.27% +3.67% +0.00% / +6.53% -1.63% +1.22%] index_copy_ strided 257 : Elapsed 0.005 ms (0.506 ms / 100) 0.512 -> 0.495 ( -3.32%) [ +0.00% +1.56% +0.98% / -1.95% -0.39% -3.32%] index_add_ perm : Elapsed 0.005 ms (0.512 ms / 100) 0.498 -> 0.485 ( -2.61%) [ +6.02% +0.40% +0.00% / -0.40% -2.61% -1.41%] index_copy_ perm : Elapsed 0.005 ms (0.528 ms / 100) 0.502 -> 0.484 ( -3.59%) [ +0.00% +2.19% +1.79% / +0.40% -3.59% -1.20%] index_add_ perm_sorted : Elapsed 0.005 ms (0.502 ms / 100) 0.494 -> 0.484 ( -2.02%) [ +3.04% +1.42% +0.00% / +8.91% -2.02% +3.85%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.509 ms / 100) 8.636 -> 8.646 ( +0.12%) [ +0.05% +0.12% +0.00% / +0.17% +0.12% +0.20%] index_select const : Elapsed 0.086 ms (8.640 ms / 100) 8.603 -> 8.619 ( +0.19%) [ +0.00% +0.19% +0.07% / +0.19% +0.86% +0.70%] index_select wrap : Elapsed 0.086 ms (8.603 ms / 100) 8.663 -> 8.634 ( -0.33%) [ +0.07% +0.05% +0.00% / +0.02% -0.32% -0.33%] index_select linear : Elapsed 0.087 ms (8.669 ms / 100) 8.577 -> 8.610 ( +0.38%) [ +0.17% +0.01% +0.00% / +0.38% +1.25% +1.19%] index_select reverse : Elapsed 0.086 ms (8.592 ms / 100) 8.626 -> 8.608 ( -0.21%) [ +0.00% +0.29% +0.13% / +0.14% -0.21% -0.06%] index_select skip64 : Elapsed 0.086 ms (8.626 ms / 100) 8.612 -> 8.620 ( +0.09%) [ +0.31% +0.01% +0.00% / +0.09% +0.56% +0.74%] index_select skip256 : Elapsed 0.086 ms (8.639 ms / 100) 8.590 -> 8.623 ( +0.38%) [ +0.08% +0.08% +0.00% / +0.38% +0.70% +0.38%] index_select spread : Elapsed 0.086 ms (8.597 ms / 100) 8.634 -> 8.682 ( +0.56%) [ +0.09% +0.00% +0.05% / +0.56% +1.20% +0.65%] index_select random : Elapsed 0.086 ms (8.642 ms / 100) 8.638 -> 8.635 ( -0.03%) [ +0.21% +0.00% +0.21% / +0.21% -0.02% -0.03%] index_select random_sorted : Elapsed 0.087 ms (8.656 ms / 100) out_shape = [200, 1, 500] in_shape = [200, 1, 5] idx_dim = 2 B = [200, 1, 500] (stride (500, 500, 1)) A = [200, 1, 5] (stride (5, 1, 1)) dim = 2 1.219 -> 1.229 ( +0.82%) [ +0.98% +0.00% +0.00% / +0.90% +0.90% +0.82%] index_add_ linear : Elapsed 0.012 ms (1.231 ms / 100) 1.179 -> 1.184 ( +0.42%) [ +0.59% +0.17% +0.00% / +0.59% +0.59% +0.42%] index_copy_ linear : Elapsed 0.012 ms (1.186 ms / 100) 1.218 -> 1.229 ( +0.90%) [ +0.99% +0.08% +0.00% / +0.99% +0.99% +0.90%] index_add_ reverse : Elapsed 0.012 ms (1.230 ms / 100) 1.177 -> 1.184 ( +0.59%) [ +0.85% +0.25% +0.00% / +0.68% +0.76% +0.59%] index_copy_ reverse : Elapsed 0.012 ms (1.187 ms / 100) 1.218 -> 1.229 ( +0.90%) [ +1.07% +0.00% +0.00% / +0.90% +0.90% +0.99%] index_add_ spread : Elapsed 0.012 ms (1.231 ms / 100) 1.180 -> 1.185 ( +0.42%) [ +0.51% +0.00% +0.25% / +0.42% +0.42% +0.42%] index_copy_ spread : Elapsed 0.012 ms (1.186 ms / 100) 1.218 -> 1.229 ( +0.90%) [ +0.99% +0.00% +0.00% / +0.90% +0.90% +0.90%] index_add_ strided 3 : Elapsed 0.012 ms (1.230 ms / 100) 1.181 -> 1.185 ( +0.34%) [ +0.42% +0.00% +0.17% / +0.34% +0.42% +0.42%] index_copy_ strided 3 : Elapsed 0.012 ms (1.186 ms / 100) 1.218 -> 1.229 ( +0.90%) [ +0.99% +0.00% +0.00% / +0.90% +0.90% +0.90%] index_add_ strided 7 : Elapsed 0.012 ms (1.230 ms / 100) 1.177 -> 1.185 ( +0.68%) [ +0.68% +0.00% +0.68% / +0.85% +0.76% +0.68%] index_copy_ strided 7 : Elapsed 0.012 ms (1.185 ms / 100) 1.218 -> 1.229 ( +0.90%) [ +0.99% +0.00% +0.00% / +0.90% +1.07% +0.90%] index_add_ strided 257 : Elapsed 0.012 ms (1.230 ms / 100) 1.182 -> 1.184 ( +0.17%) [ +0.34% +0.00% +0.34% / +0.34% +0.25% +0.17%] index_copy_ strided 257 : Elapsed 0.012 ms (1.186 ms / 100) 1.218 -> 1.229 ( +0.90%) [ +0.99% +0.08% +0.00% / +0.90% +0.90% +0.90%] index_add_ perm : Elapsed 0.012 ms (1.230 ms / 100) 1.179 -> 1.184 ( +0.42%) [ +0.85% +0.00% +0.51% / +0.51% +0.51% +0.42%] index_copy_ perm : Elapsed 0.012 ms (1.189 ms / 100) 1.219 -> 1.229 ( +0.82%) [ +0.90% +0.00% +0.00% / +0.82% +0.82% +0.90%] index_add_ perm_sorted : Elapsed 0.012 ms (1.230 ms / 100) 1.177 -> 1.184 ( +0.59%) [ +0.85% +0.17% +0.00% / +1.95% +0.76% +0.59%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.187 ms / 100) 8.598 -> 8.615 ( +0.20%) [ +0.22% +0.13% +0.00% / +0.20% +0.47% +0.50%] index_select const : Elapsed 0.086 ms (8.617 ms / 100) 8.605 -> 8.629 ( +0.28%) [ +0.35% +0.14% +0.00% / +0.52% +0.28% +0.37%] index_select wrap : Elapsed 0.086 ms (8.635 ms / 100) 8.593 -> 8.609 ( +0.19%) [ +0.05% +0.16% +0.00% / +0.19% +0.67% +0.61%] index_select linear : Elapsed 0.086 ms (8.597 ms / 100) 8.591 -> 8.612 ( +0.24%) [ +0.02% +0.06% +0.00% / +0.24% +0.71% +0.66%] index_select reverse : Elapsed 0.086 ms (8.593 ms / 100) 8.594 -> 8.598 ( +0.05%) [ +0.09% +0.00% +0.12% / +0.05% +0.48% +0.47%] index_select skip64 : Elapsed 0.086 ms (8.602 ms / 100) 8.603 -> 8.607 ( +0.05%) [ +0.14% +0.00% +0.01% / +0.05% +0.51% +0.62%] index_select skip256 : Elapsed 0.086 ms (8.615 ms / 100) 8.596 -> 8.610 ( +0.16%) [ +0.28% +0.00% +0.05% / +0.16% +0.63% +0.56%] index_select spread : Elapsed 0.086 ms (8.620 ms / 100) 8.589 -> 8.607 ( +0.21%) [ +0.26% +0.00% +0.15% / +0.21% +0.73% +0.94%] index_select strided 3 : Elapsed 0.086 ms (8.611 ms / 100) 8.602 -> 8.619 ( +0.20%) [ +0.19% +0.00% +0.05% / +0.20% +0.37% +0.29%] index_select random : Elapsed 0.086 ms (8.618 ms / 100) 8.600 -> 8.602 ( +0.02%) [ +0.13% +0.10% +0.00% / +0.02% +0.42% +0.36%] index_select random_sorted : Elapsed 0.086 ms (8.611 ms / 100) B = [200, 1, 500] (stride (500, 100000, 1)) A = [200, 1, 5] (stride (1, 1, 200)) dim = 2 1.216 -> 1.228 ( +0.99%) [ +1.07% +0.00% +0.00% / +0.99% +1.32% +0.99%] index_add_ linear : Elapsed 0.012 ms (1.229 ms / 100) 1.173 -> 1.182 ( +0.77%) [ +0.85% +0.00% +0.77% / +0.77% +1.02% +0.94%] index_copy_ linear : Elapsed 0.012 ms (1.183 ms / 100) 1.216 -> 1.228 ( +0.99%) [ +1.07% +0.08% +0.00% / +1.15% +0.99% +1.07%] index_add_ reverse : Elapsed 0.012 ms (1.229 ms / 100) 1.174 -> 1.184 ( +0.85%) [ +0.85% +0.00% +0.09% / +0.85% +1.02% +0.85%] index_copy_ reverse : Elapsed 0.012 ms (1.184 ms / 100) 1.216 -> 1.228 ( +0.99%) [ +1.15% +0.08% +0.00% / +0.99% +1.15% +1.23%] index_add_ spread : Elapsed 0.012 ms (1.230 ms / 100) 1.174 -> 1.185 ( +0.94%) [ +0.85% +0.00% +0.51% / +0.94% +1.02% +0.94%] index_copy_ spread : Elapsed 0.012 ms (1.184 ms / 100) 1.215 -> 1.227 ( +0.99%) [ +1.15% +0.00% +0.00% / +1.07% +1.07% +0.99%] index_add_ strided 3 : Elapsed 0.012 ms (1.229 ms / 100) 1.172 -> 1.184 ( +1.02%) [ +0.94% +0.00% +0.51% / +1.02% +1.02% +1.02%] index_copy_ strided 3 : Elapsed 0.012 ms (1.183 ms / 100) 1.215 -> 1.227 ( +0.99%) [ +1.32% +0.00% +0.00% / +1.15% +1.15% +0.99%] index_add_ strided 7 : Elapsed 0.012 ms (1.231 ms / 100) 1.174 -> 1.183 ( +0.77%) [ +1.02% +0.00% +0.26% / +0.94% +0.77% +0.94%] index_copy_ strided 7 : Elapsed 0.012 ms (1.186 ms / 100) 1.215 -> 1.226 ( +0.91%) [ +1.23% +0.08% +0.00% / +1.23% +0.99% +0.91%] index_add_ strided 257 : Elapsed 0.012 ms (1.230 ms / 100) 1.173 -> 1.184 ( +0.94%) [ +0.85% +0.43% +0.00% / +0.94% +1.11% +1.11%] index_copy_ strided 257 : Elapsed 0.012 ms (1.183 ms / 100) 1.215 -> 1.226 ( +0.91%) [ +1.15% +0.08% +0.00% / +1.23% +0.91% +0.99%] index_add_ perm : Elapsed 0.012 ms (1.229 ms / 100) 1.178 -> 1.183 ( +0.42%) [ +0.51% +0.00% +0.00% / +0.59% +0.51% +0.42%] index_copy_ perm : Elapsed 0.012 ms (1.184 ms / 100) 1.216 -> 1.225 ( +0.74%) [ +1.15% +0.08% +0.00% / +1.15% +0.82% +0.74%] index_add_ perm_sorted : Elapsed 0.012 ms (1.230 ms / 100) 1.175 -> 1.184 ( +0.77%) [ +0.77% +0.60% +0.00% / +0.85% +0.77% +0.77%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.184 ms / 100) 8.534 -> 8.564 ( +0.35%) [ +0.07% +0.00% +0.06% / +0.35% +0.50% +0.75%] index_select const : Elapsed 0.085 ms (8.540 ms / 100) 8.543 -> 8.550 ( +0.08%) [ +0.22% +0.00% +0.00% / +0.08% +0.39% +0.33%] index_select wrap : Elapsed 0.086 ms (8.562 ms / 100) 8.515 -> 8.534 ( +0.22%) [ +0.46% +0.00% +0.29% / +0.22% +0.75% +0.74%] index_select linear : Elapsed 0.086 ms (8.554 ms / 100) 8.537 -> 8.555 ( +0.21%) [ +0.25% +0.00% +0.04% / +0.21% +0.43% +0.40%] index_select reverse : Elapsed 0.086 ms (8.558 ms / 100) 8.535 -> 8.546 ( +0.13%) [ +0.15% +0.00% +0.05% / +0.13% +0.42% +0.50%] index_select skip64 : Elapsed 0.085 ms (8.548 ms / 100) 8.542 -> 8.538 ( -0.05%) [ +0.00% +0.00% +0.01% / -0.05% +0.63% +0.33%] index_select skip256 : Elapsed 0.085 ms (8.542 ms / 100) 8.528 -> 8.540 ( +0.14%) [ +0.23% +0.00% +0.16% / +0.14% +0.60% +0.66%] index_select spread : Elapsed 0.085 ms (8.548 ms / 100) 8.536 -> 8.547 ( +0.13%) [ +0.00% +0.02% +0.12% / +0.13% +0.61% +0.43%] index_select strided 3 : Elapsed 0.085 ms (8.536 ms / 100) 8.540 -> 8.551 ( +0.13%) [ +0.16% +0.00% +0.09% / +0.13% +0.59% +0.43%] index_select random : Elapsed 0.086 ms (8.554 ms / 100) 8.529 -> 8.550 ( +0.25%) [ +0.40% +0.00% +0.21% / +0.25% +0.62% +0.50%] index_select random_sorted : Elapsed 0.086 ms (8.563 ms / 100) B = [200, 1, 500] (stride (500, 100000, 1)) A = [200, 1, 5] (stride (1, 200, 200)) dim = 2 0.622 -> 0.624 ( +0.32%) [ +0.64% +0.00% +0.16% / +0.64% +0.48% +0.32%] index_add_ linear : Elapsed 0.006 ms (0.626 ms / 100) 0.638 -> 0.640 ( +0.31%) [ +0.78% +0.00% +0.16% / +0.63% +0.47% +0.31%] index_copy_ linear : Elapsed 0.006 ms (0.643 ms / 100) 0.620 -> 0.624 ( +0.65%) [ +1.13% +0.32% +0.00% / +0.97% +0.65% +0.65%] index_add_ reverse : Elapsed 0.006 ms (0.627 ms / 100) 0.637 -> 0.639 ( +0.31%) [ +0.94% +0.16% +0.00% / +2.35% +0.31% +0.47%] index_copy_ reverse : Elapsed 0.006 ms (0.643 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +1.61% +0.00% +0.32% / +1.13% +0.48% +0.48%] index_add_ spread : Elapsed 0.006 ms (0.631 ms / 100) 0.636 -> 0.640 ( +0.63%) [ +1.42% +0.00% +0.31% / +1.10% +0.79% +0.63%] index_copy_ spread : Elapsed 0.006 ms (0.645 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +0.97% +0.00% +0.16% / +0.97% +0.81% +0.64%] index_add_ strided 3 : Elapsed 0.006 ms (0.627 ms / 100) 0.636 -> 0.640 ( +0.63%) [ +0.94% +0.31% +0.00% / +0.94% +1.10% +0.63%] index_copy_ strided 3 : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +0.81% +0.16% +0.00% / +0.97% +0.64% +0.48%] index_add_ strided 7 : Elapsed 0.006 ms (0.626 ms / 100) 0.636 -> 0.640 ( +0.63%) [ +0.79% +0.00% +0.16% / +0.94% +0.94% +0.63%] index_copy_ strided 7 : Elapsed 0.006 ms (0.641 ms / 100) 0.621 -> 0.625 ( +0.64%) [ +0.97% +0.32% +0.00% / +0.81% +0.64% +0.64%] index_add_ strided 257 : Elapsed 0.006 ms (0.627 ms / 100) 0.636 -> 0.641 ( +0.79%) [ +0.94% +0.00% +0.00% / +0.79% +0.94% +0.94%] index_copy_ strided 257 : Elapsed 0.006 ms (0.642 ms / 100) 0.622 -> 0.626 ( +0.64%) [ +0.80% +0.00% +0.00% / +0.80% +0.64% +0.64%] index_add_ perm : Elapsed 0.006 ms (0.627 ms / 100) 0.637 -> 0.641 ( +0.63%) [ +0.78% +0.00% +0.00% / +0.63% +0.63% +0.63%] index_copy_ perm : Elapsed 0.006 ms (0.642 ms / 100) 0.621 -> 0.624 ( +0.48%) [ +1.13% +0.32% +0.00% / +0.97% +0.64% +0.48%] index_add_ perm_sorted : Elapsed 0.006 ms (0.628 ms / 100) 0.639 -> 0.639 ( +0.00%) [ +0.63% +0.00% +0.00% / +0.47% +0.16% +0.00%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.643 ms / 100) 5.356 -> 5.202 ( -2.88%) [ +0.17% +0.00% +0.00% / -2.88% -1.87% -1.76%] index_select const : Elapsed 0.054 ms (5.365 ms / 100) 5.366 -> 5.276 ( -1.68%) [ +0.20% +0.00% +0.09% / -1.64% -1.68% -1.51%] index_select wrap : Elapsed 0.054 ms (5.377 ms / 100) 5.381 -> 5.237 ( -2.68%) [ +0.24% +0.06% +0.00% / -2.68% -2.21% -2.19%] index_select linear : Elapsed 0.054 ms (5.394 ms / 100) 5.359 -> 5.231 ( -2.39%) [ +0.45% +0.00% +0.09% / -2.39% -2.15% -2.18%] index_select reverse : Elapsed 0.054 ms (5.383 ms / 100) 5.348 -> 5.226 ( -2.28%) [ +0.00% +0.07% +0.22% / -2.28% -1.85% -1.91%] index_select skip64 : Elapsed 0.053 ms (5.348 ms / 100) 5.389 -> 5.257 ( -2.45%) [ +0.06% +0.00% +0.09% / -2.45% -2.39% -2.19%] index_select skip256 : Elapsed 0.054 ms (5.392 ms / 100) 5.391 -> 5.242 ( -2.76%) [ +0.17% +0.00% +0.04% / -2.76% -2.10% -1.87%] index_select spread : Elapsed 0.054 ms (5.400 ms / 100) 5.338 -> 5.235 ( -1.93%) [ +0.41% +0.00% +0.22% / -1.93% -1.65% -1.67%] index_select strided 3 : Elapsed 0.054 ms (5.360 ms / 100) 5.352 -> 5.226 ( -2.35%) [ +0.26% +0.00% +0.24% / -2.35% -2.00% -1.66%] index_select random : Elapsed 0.054 ms (5.366 ms / 100) 5.381 -> 5.254 ( -2.36%) [ +0.15% +0.00% +0.09% / -2.36% -1.93% -1.67%] index_select random_sorted : Elapsed 0.054 ms (5.389 ms / 100) B = [200, 1, 500] (stride (1, 1, 200)) dim = 2 fill_cnt = 5 0.453 -> 0.455 ( +0.44%) [ +1.10% +0.44% +0.00% / +1.10% +0.44% +0.44%] index_fill_ const : Elapsed 0.005 ms (0.458 ms / 100) 0.452 -> 0.455 ( +0.66%) [ +1.33% +0.22% +0.00% / +1.11% +1.11% +0.66%] index_fill_ linear : Elapsed 0.005 ms (0.458 ms / 100) 0.452 -> 0.457 ( +1.11%) [ +1.11% +0.00% +0.00% / +1.11% +1.11% +1.11%] index_fill_ reverse : Elapsed 0.005 ms (0.457 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +1.10% +0.00% +2.64% / +0.66% +0.00% +1.76%] index_fill_ skip64 : Elapsed 0.005 ms (0.459 ms / 100) 0.458 -> 0.454 ( -0.87%) [ +0.00% +1.75% +18.78% / -0.22% -0.87% -0.87%] index_fill_ skip256 : Elapsed 0.005 ms (0.458 ms / 100) 0.457 -> 0.453 ( -0.88%) [ +0.22% +0.22% +0.00% / +3.72% -0.66% -0.88%] index_fill_ spread : Elapsed 0.005 ms (0.458 ms / 100) 0.455 -> 0.453 ( -0.44%) [ +0.88% +0.44% +0.00% / +0.66% +0.00% -0.44%] index_fill_ strided 3 : Elapsed 0.005 ms (0.459 ms / 100) 0.451 -> 0.456 ( +1.11%) [ +1.55% +0.44% +0.00% / +1.33% +1.55% +1.11%] index_fill_ strided 5 : Elapsed 0.005 ms (0.458 ms / 100) 0.453 -> 0.457 ( +0.88%) [ +7.06% +0.00% +0.00% / +0.88% +3.31% +1.77%] index_fill_ strided 7 : Elapsed 0.005 ms (0.485 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +0.88% +0.22% +0.00% / +0.88% +0.22% +0.00%] index_fill_ strided 8 : Elapsed 0.005 ms (0.458 ms / 100) 0.453 -> 0.458 ( +1.10%) [ +1.10% +0.22% +0.00% / +1.10% +1.10% +1.10%] index_fill_ strided 16 : Elapsed 0.005 ms (0.458 ms / 100) 0.456 -> 0.457 ( +0.22%) [ +0.44% +0.00% +0.00% / +0.22% +0.22% +2.63%] index_fill_ strided 64 : Elapsed 0.005 ms (0.458 ms / 100) 0.456 -> 0.453 ( -0.66%) [ +0.44% +0.00% +1.10% / +0.22% -0.44% -0.66%] index_fill_ strided 100 : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +0.66% +0.00% +0.00% / +0.66% +0.00% +0.00%] index_fill_ strided 255 : Elapsed 0.005 ms (0.457 ms / 100) 0.455 -> 0.454 ( -0.22%) [ +0.66% +0.00% +0.22% / +0.66% -0.22% +0.00%] index_fill_ strided 256 : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.453 ( -0.22%) [ +0.88% +0.00% +0.00% / +0.66% -0.22% -0.22%] index_fill_ strided 257 : Elapsed 0.005 ms (0.458 ms / 100) 0.455 -> 0.454 ( -0.22%) [ +0.44% +0.00% +0.00% / +0.44% -0.22% +1.10%] index_fill_ random : Elapsed 0.005 ms (0.457 ms / 100) 0.453 -> 0.457 ( +0.88%) [ +1.10% +0.44% +0.00% / +0.88% +0.88% +1.55%] index_fill_ random_sorted : Elapsed 0.005 ms (0.458 ms / 100) 0.453 -> 0.455 ( +0.44%) [ +7.51% +0.00% +0.88% / +1.32% +0.44% +0.88%] index_fill_ perm : Elapsed 0.005 ms (0.487 ms / 100) 0.452 -> 0.457 ( +1.11%) [ +1.11% +0.00% +0.00% / +1.11% +1.33% +1.11%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.457 ms / 100) B = [200, 1, 500] (stride (1, 200, 200)) A = [200, 1, 5] (stride (5, 1, 1)) dim = 2 0.625 -> 0.628 ( +0.48%) [ +0.80% +0.00% +0.00% / +0.80% +0.48% +0.64%] index_add_ linear : Elapsed 0.006 ms (0.630 ms / 100) 0.609 -> 0.614 ( +0.82%) [ +0.82% +0.16% +0.00% / +0.82% +0.82% +0.82%] index_copy_ linear : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.629 ( +0.64%) [ +0.96% +0.16% +0.00% / +0.64% +1.12% +0.64%] index_add_ reverse : Elapsed 0.006 ms (0.631 ms / 100) 0.609 -> 0.613 ( +0.66%) [ +0.82% +0.00% +0.00% / +0.82% +0.66% +0.82%] index_copy_ reverse : Elapsed 0.006 ms (0.614 ms / 100) 0.624 -> 0.630 ( +0.96%) [ +0.96% +0.00% +0.00% / +0.96% +0.96% +1.76%] index_add_ spread : Elapsed 0.006 ms (0.630 ms / 100) 0.608 -> 0.614 ( +0.99%) [ +0.99% +0.16% +0.00% / +0.99% +0.99% +0.99%] index_copy_ spread : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.628 ( +0.48%) [ +0.80% +0.16% +0.00% / +0.64% +0.64% +0.48%] index_add_ strided 3 : Elapsed 0.006 ms (0.630 ms / 100) 0.609 -> 0.613 ( +0.66%) [ +0.82% +0.16% +0.00% / +0.82% +0.66% +0.66%] index_copy_ strided 3 : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.628 ( +0.48%) [ +0.80% +0.00% +0.00% / +0.80% +0.48% +0.48%] index_add_ strided 7 : Elapsed 0.006 ms (0.630 ms / 100) 0.608 -> 0.612 ( +0.66%) [ +0.99% +0.00% +0.00% / +0.99% +0.82% +0.66%] index_copy_ strided 7 : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.628 ( +0.48%) [ +0.80% +0.00% +0.00% / +0.80% +0.48% +0.48%] index_add_ strided 257 : Elapsed 0.006 ms (0.630 ms / 100) 0.610 -> 0.612 ( +0.33%) [ +0.66% +0.00% +0.00% / +0.66% +0.33% +0.33%] index_copy_ strided 257 : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.628 ( +0.48%) [ +0.80% +0.16% +0.00% / +0.80% +0.48% +0.48%] index_add_ perm : Elapsed 0.006 ms (0.630 ms / 100) 0.609 -> 0.612 ( +0.49%) [ +0.82% +0.00% +0.16% / +0.82% +0.66% +0.49%] index_copy_ perm : Elapsed 0.006 ms (0.614 ms / 100) 0.625 -> 0.627 ( +0.32%) [ +0.96% +0.00% +0.16% / +0.64% +0.48% +0.32%] index_add_ perm_sorted : Elapsed 0.006 ms (0.631 ms / 100) 0.610 -> 0.612 ( +0.33%) [ +0.66% +0.00% +0.33% / +0.66% +0.49% +0.33%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.614 ms / 100) 5.235 -> 5.239 ( +0.08%) [ +0.06% +0.17% +0.00% / +0.08% +0.55% +0.63%] index_select const : Elapsed 0.052 ms (5.238 ms / 100) 5.245 -> 5.256 ( +0.21%) [ +0.00% +0.15% +0.29% / +0.21% +0.80% +0.63%] index_select wrap : Elapsed 0.052 ms (5.245 ms / 100) 5.260 -> 5.253 ( -0.13%) [ +0.13% +0.00% +0.04% / -0.13% +0.10% +0.19%] index_select linear : Elapsed 0.053 ms (5.267 ms / 100) 5.223 -> 5.226 ( +0.06%) [ +0.33% +0.00% +0.50% / +0.06% +0.69% +0.88%] index_select reverse : Elapsed 0.052 ms (5.240 ms / 100) 5.227 -> 5.238 ( +0.21%) [ +0.19% +0.04% +0.00% / +0.21% +0.38% +0.40%] index_select skip64 : Elapsed 0.052 ms (5.237 ms / 100) 5.244 -> 5.253 ( +0.17%) [ +0.42% +0.00% +0.21% / +0.17% +0.25% +0.29%] index_select skip256 : Elapsed 0.053 ms (5.266 ms / 100) 5.243 -> 5.252 ( +0.17%) [ +0.34% +0.00% +0.27% / +0.17% +0.50% +0.55%] index_select spread : Elapsed 0.053 ms (5.261 ms / 100) 5.230 -> 5.227 ( -0.06%) [ +0.00% +0.23% +0.19% / -0.06% +0.61% +0.40%] index_select strided 3 : Elapsed 0.052 ms (5.230 ms / 100) 5.228 -> 5.233 ( +0.10%) [ +0.06% +0.00% +0.40% / +0.10% +0.63% +0.42%] index_select random : Elapsed 0.052 ms (5.231 ms / 100) 5.247 -> 5.244 ( -0.06%) [ +0.11% +0.00% +0.29% / -0.06% +0.61% +0.67%] index_select random_sorted : Elapsed 0.053 ms (5.253 ms / 100) out_shape = [500, 5, 1] in_shape = [200, 5, 1] idx_dim = 0 B = [500, 5, 1] (stride (5, 1, 1)) dim = 0 fill_cnt = 200 0.450 -> 0.428 ( -4.89%) [+12.22% +0.00% +4.00% / -1.56% -4.89% -0.89%] index_fill_ const : Elapsed 0.005 ms (0.505 ms / 100) 0.437 -> 0.432 ( -1.14%) [ +0.00% +2.29% +0.46% / +0.46% -1.14% +0.23%] index_fill_ linear : Elapsed 0.004 ms (0.437 ms / 100) 0.433 -> 0.438 ( +1.15%) [ +1.39% +3.46% +0.00% / +2.54% +8.08% +1.15%] index_fill_ reverse : Elapsed 0.004 ms (0.439 ms / 100) 0.435 -> 0.435 ( +0.00%) [ +1.61% +3.45% +0.00% / +2.07% +0.00% +1.38%] index_fill_ skip64 : Elapsed 0.004 ms (0.442 ms / 100) 0.441 -> 0.434 ( -1.59%) [ +1.59% +2.72% +0.00% / +3.40% -1.59% +2.49%] index_fill_ skip256 : Elapsed 0.004 ms (0.448 ms / 100) good 0.461 -> 0.436 ( -5.42%) [ +1.30% +0.00% +1.52% / -4.12% -5.42% -4.77%] index_fill_ spread : Elapsed 0.005 ms (0.467 ms / 100) 0.444 -> 0.446 ( +0.45%) [ +5.63% +0.00% +19.14% / +2.70% +6.76% +0.45%] index_fill_ strided 3 : Elapsed 0.005 ms (0.469 ms / 100) 0.437 -> 0.434 ( -0.69%) [+10.07% +2.52% +0.00% / +5.03% -0.69% +0.92%] index_fill_ strided 5 : Elapsed 0.005 ms (0.481 ms / 100) 0.438 -> 0.431 ( -1.60%) [ +0.00% +2.74% +0.00% / +4.57% -1.60% +0.46%] index_fill_ strided 7 : Elapsed 0.004 ms (0.438 ms / 100) 0.433 -> 0.436 ( +0.69%) [ +0.69% +3.70% +0.00% / +11.09% +0.69% +2.31%] index_fill_ strided 8 : Elapsed 0.004 ms (0.436 ms / 100) 0.436 -> 0.432 ( -0.92%) [ +0.23% +2.98% +0.00% / +9.40% -0.92% +0.92%] index_fill_ strided 16 : Elapsed 0.004 ms (0.437 ms / 100) 0.440 -> 0.434 ( -1.36%) [ +0.00% +2.27% +1.14% / +10.23% -1.36% +13.41%] index_fill_ strided 64 : Elapsed 0.004 ms (0.440 ms / 100) 0.434 -> 0.427 ( -1.61%) [ +1.15% +3.92% +0.00% / +19.12% -1.61% +8.76%] index_fill_ strided 100 : Elapsed 0.004 ms (0.439 ms / 100) 0.436 -> 0.427 ( -2.06%) [ +1.38% +2.75% +0.00% / +3.21% -2.06% +3.21%] index_fill_ strided 255 : Elapsed 0.004 ms (0.442 ms / 100) 0.435 -> 0.435 ( +0.00%) [ +1.61% +5.98% +0.00% / +3.22% +0.00% +1.15%] index_fill_ strided 256 : Elapsed 0.004 ms (0.442 ms / 100) 0.437 -> 0.439 ( +0.46%) [ +1.37% +1.60% +0.00% / +8.92% +5.95% +0.46%] index_fill_ strided 257 : Elapsed 0.004 ms (0.443 ms / 100) 0.437 -> 0.434 ( -0.69%) [ +0.23% +3.20% +0.00% / +8.47% +9.38% -0.69%] index_fill_ random : Elapsed 0.004 ms (0.438 ms / 100) 0.436 -> 0.436 ( +0.00%) [ +8.49% +3.21% +0.00% / +17.89% +0.92% +0.00%] index_fill_ random_sorted : Elapsed 0.005 ms (0.473 ms / 100) 0.435 -> 0.435 ( +0.00%) [ +0.00% +2.99% +0.69% / +2.53% +0.00% +1.38%] index_fill_ perm : Elapsed 0.004 ms (0.435 ms / 100) 0.436 -> 0.432 ( -0.92%) [ +0.00% +3.67% +0.23% / +4.59% -0.92% +0.23%] index_fill_ perm_sorted : Elapsed 0.004 ms (0.436 ms / 100) B = [500, 5, 1] (stride (5, 1, 5)) A = [200, 5, 1] (stride (5, 1, 5)) dim = 0 0.465 -> 0.464 ( -0.22%) [ +0.43% +4.95% +0.00% / +18.49% -0.22% +0.00%] index_add_ linear : Elapsed 0.005 ms (0.467 ms / 100) 0.467 -> 0.464 ( -0.64%) [ +0.00% +3.00% +3.43% / +2.57% -0.64% +0.86%] index_copy_ linear : Elapsed 0.005 ms (0.467 ms / 100) 0.462 -> 0.466 ( +0.87%) [ +0.00% +5.84% +1.08% / +2.60% +0.87% +5.19%] index_add_ reverse : Elapsed 0.005 ms (0.462 ms / 100) 0.463 -> 0.464 ( +0.22%) [ +0.00% +4.75% +1.30% / +2.38% +0.22% +8.86%] index_copy_ reverse : Elapsed 0.005 ms (0.463 ms / 100) 0.462 -> 0.470 ( +1.73%) [ +0.00% +5.19% +1.08% / +3.03% +1.73% +10.39%] index_add_ spread : Elapsed 0.005 ms (0.462 ms / 100) 0.461 -> 0.466 ( +1.08%) [ +0.00% +6.72% +1.08% / +10.85% +1.08% +8.89%] index_copy_ spread : Elapsed 0.005 ms (0.461 ms / 100) 0.465 -> 0.468 ( +0.65%) [ +0.00% +5.38% +7.74% / +15.70% +0.65% +1.94%] index_add_ strided 3 : Elapsed 0.005 ms (0.465 ms / 100) 0.470 -> 0.465 ( -1.06%) [ +0.00% +4.89% +16.81% / +0.21% -1.06% +0.85%] index_copy_ strided 3 : Elapsed 0.005 ms (0.470 ms / 100) 0.483 -> 0.470 ( -2.69%) [ +3.31% +0.00% +0.21% / -0.83% -2.69% -1.66%] index_add_ strided 7 : Elapsed 0.005 ms (0.499 ms / 100) 0.469 -> 0.467 ( -0.43%) [ +7.25% +2.77% +0.00% / +2.13% -0.43% +3.20%] index_copy_ strided 7 : Elapsed 0.005 ms (0.503 ms / 100) 0.467 -> 0.464 ( -0.64%) [+13.49% +3.43% +0.00% / +1.71% -0.64% +1.93%] index_add_ strided 257 : Elapsed 0.005 ms (0.530 ms / 100) 0.464 -> 0.465 ( +0.22%) [ +0.00% +3.23% +0.43% / +8.84% +0.22% +2.16%] index_copy_ strided 257 : Elapsed 0.005 ms (0.464 ms / 100) 0.465 -> 0.463 ( -0.43%) [ +2.15% +4.09% +0.00% / +13.55% -0.43% +1.08%] index_add_ perm : Elapsed 0.005 ms (0.475 ms / 100) 0.465 -> 0.466 ( +0.22%) [ +0.00% +4.09% +0.65% / +2.58% +0.22% +1.08%] index_copy_ perm : Elapsed 0.005 ms (0.465 ms / 100) 0.462 -> 0.467 ( +1.08%) [ +0.00% +5.19% +1.52% / +3.25% +1.08% +1.95%] index_add_ perm_sorted : Elapsed 0.005 ms (0.462 ms / 100) 0.462 -> 0.464 ( +0.43%) [ +0.00% +3.68% +0.65% / +2.60% +0.43% +5.41%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.462 ms / 100) 0.525 -> 0.530 ( +0.95%) [+10.86% +3.24% +0.00% / +8.76% +0.95% +2.67%] index_select const : Elapsed 0.006 ms (0.582 ms / 100) 0.529 -> 0.538 ( +1.70%) [+13.23% +0.19% +0.00% / +14.56% +5.10% +1.70%] index_select wrap : Elapsed 0.006 ms (0.599 ms / 100) 0.530 -> 0.541 ( +2.08%) [+10.75% +0.00% +0.00% / +2.26% +15.28% +2.08%] index_select linear : Elapsed 0.006 ms (0.587 ms / 100) 0.531 -> 0.543 ( +2.26%) [+17.70% +0.00% +4.14% / +2.26% +6.78% +2.45%] index_select reverse : Elapsed 0.006 ms (0.625 ms / 100) 0.520 -> 0.527 ( +1.35%) [+12.31% +2.31% +0.00% / +5.19% +1.35% +3.46%] index_select skip64 : Elapsed 0.006 ms (0.584 ms / 100) 0.524 -> 0.525 ( +0.19%) [+19.85% +1.91% +0.00% / +3.44% +0.19% +2.48%] index_select skip256 : Elapsed 0.006 ms (0.628 ms / 100) 0.521 -> 0.529 ( +1.54%) [+12.09% +2.69% +0.00% / +3.84% +1.54% +3.45%] index_select spread : Elapsed 0.006 ms (0.584 ms / 100) 0.527 -> 0.531 ( +0.76%) [+21.44% +0.00% +1.33% / +2.66% +0.76% +2.09%] index_select strided 3 : Elapsed 0.006 ms (0.640 ms / 100) 0.523 -> 0.527 ( +0.76%) [ +9.94% +1.34% +0.00% / +4.02% +0.76% +1.34%] index_select strided 5 : Elapsed 0.006 ms (0.575 ms / 100) 0.522 -> 0.525 ( +0.57%) [+14.37% +1.92% +0.00% / +3.26% +0.57% +2.68%] index_select strided 7 : Elapsed 0.006 ms (0.597 ms / 100) 0.522 -> 0.539 ( +3.26%) [ +5.94% +2.11% +0.00% / +11.88% +4.02% +3.26%] index_select strided 8 : Elapsed 0.006 ms (0.553 ms / 100) 0.517 -> 0.522 ( +0.97%) [ +5.61% +2.13% +0.00% / +19.34% +0.97% +3.09%] index_select strided 16 : Elapsed 0.005 ms (0.546 ms / 100) 0.530 -> 0.525 ( -0.94%) [ +2.83% +0.00% +4.53% / +2.64% -0.94% +1.13%] index_select strided 64 : Elapsed 0.005 ms (0.545 ms / 100) 0.527 -> 0.518 ( -1.71%) [ +7.59% +0.00% +9.87% / +2.66% -1.71% +1.90%] index_select strided 100 : Elapsed 0.006 ms (0.567 ms / 100) 0.523 -> 0.528 ( +0.96%) [ +3.82% +1.91% +0.00% / +3.25% +0.96% +3.06%] index_select random : Elapsed 0.005 ms (0.543 ms / 100) 0.520 -> 0.531 ( +2.12%) [ +5.58% +2.31% +0.00% / +2.50% +2.12% +2.88%] index_select random_sorted : Elapsed 0.005 ms (0.549 ms / 100) B = [500, 5, 1] (stride (5, 1, 2500)) A = [200, 5, 1] (stride (1, 200, 1)) dim = 0 0.462 -> 0.464 ( +0.43%) [ +0.43% +5.41% +0.00% / +3.46% +0.43% +1.52%] index_add_ linear : Elapsed 0.005 ms (0.464 ms / 100) 0.459 -> 0.466 ( +1.53%) [ +1.53% +4.58% +0.00% / +4.36% +1.53% +2.61%] index_copy_ linear : Elapsed 0.005 ms (0.466 ms / 100) 0.467 -> 0.462 ( -1.07%) [ +6.64% +3.64% +0.00% / +1.71% -1.07% +14.56%] index_add_ reverse : Elapsed 0.005 ms (0.498 ms / 100) 0.462 -> 0.459 ( -0.65%) [+11.90% +4.11% +0.00% / +3.25% -0.65% +8.87%] index_copy_ reverse : Elapsed 0.005 ms (0.517 ms / 100) 0.469 -> 0.465 ( -0.85%) [ +0.85% +3.20% +0.00% / +2.99% -0.85% +13.86%] index_add_ spread : Elapsed 0.005 ms (0.473 ms / 100) 0.463 -> 0.470 ( +1.51%) [ +0.65% +4.97% +0.00% / +3.67% +1.51% +8.21%] index_copy_ spread : Elapsed 0.005 ms (0.466 ms / 100) 0.465 -> 0.465 ( +0.00%) [ +0.00% +5.16% +1.08% / +1.72% +0.00% +1.29%] index_add_ strided 3 : Elapsed 0.005 ms (0.465 ms / 100) 0.468 -> 0.458 ( -2.14%) [ +0.00% +2.35% +0.21% / +1.71% -2.14% +0.43%] index_copy_ strided 3 : Elapsed 0.005 ms (0.468 ms / 100) 0.464 -> 0.477 ( +2.80%) [ +0.00% +4.31% +0.65% / +2.80% +7.54% +3.88%] index_add_ strided 7 : Elapsed 0.005 ms (0.464 ms / 100) 0.462 -> 0.469 ( +1.52%) [ +1.30% +3.68% +0.00% / +10.17% +9.74% +1.52%] index_copy_ strided 7 : Elapsed 0.005 ms (0.468 ms / 100) 0.463 -> 0.460 ( -0.65%) [ +0.86% +7.56% +0.00% / +7.34% -0.65% +2.16%] index_add_ strided 257 : Elapsed 0.005 ms (0.467 ms / 100) 0.465 -> 0.462 ( -0.65%) [ +0.22% +2.15% +0.00% / +9.89% -0.65% +1.94%] index_copy_ strided 257 : Elapsed 0.005 ms (0.466 ms / 100) 0.463 -> 0.466 ( +0.65%) [ +0.00% +4.32% +0.65% / +9.50% +0.86% +0.65%] index_add_ perm : Elapsed 0.005 ms (0.463 ms / 100) 0.468 -> 0.461 ( -1.50%) [ +0.00% +3.21% +0.00% / +15.60% -1.50% +0.21%] index_copy_ perm : Elapsed 0.005 ms (0.468 ms / 100) 0.465 -> 0.469 ( +0.86%) [ +0.43% +3.44% +0.00% / +2.37% +0.86% +2.15%] index_add_ perm_sorted : Elapsed 0.005 ms (0.467 ms / 100) 0.462 -> 0.462 ( +0.00%) [ +3.25% +2.60% +0.00% / +3.03% +0.00% +1.73%] index_copy_ perm_sorted : Elapsed 0.005 ms (0.477 ms / 100) 0.520 -> 0.532 ( +2.31%) [ +5.38% +3.46% +0.00% / +4.23% +2.31% +2.88%] index_select const : Elapsed 0.005 ms (0.548 ms / 100) 0.522 -> 0.527 ( +0.96%) [ +4.21% +1.15% +0.00% / +3.26% +0.96% +3.45%] index_select wrap : Elapsed 0.005 ms (0.544 ms / 100) 0.528 -> 0.525 ( -0.57%) [ +3.03% +0.00% +4.73% / +2.27% -0.57% +1.70%] index_select linear : Elapsed 0.005 ms (0.544 ms / 100) 0.526 -> 0.529 ( +0.57%) [ +3.61% +0.00% +6.46% / +2.09% +0.57% +7.98%] index_select reverse : Elapsed 0.005 ms (0.545 ms / 100) 0.530 -> 0.529 ( -0.19%) [ +3.40% +0.00% +7.74% / +6.60% -0.19% +0.38%] index_select skip64 : Elapsed 0.005 ms (0.548 ms / 100) 0.535 -> 0.531 ( -0.75%) [ +5.79% +0.00% +0.75% / +0.75% +0.75% -0.75%] index_select skip256 : Elapsed 0.006 ms (0.566 ms / 100) 0.519 -> 0.533 ( +2.70%) [+13.10% +1.73% +0.00% / +4.62% +8.09% +2.70%] index_select spread : Elapsed 0.006 ms (0.587 ms / 100) 0.524 -> 0.527 ( +0.57%) [+20.80% +1.15% +0.00% / +2.86% +0.57% +2.10%] index_select strided 3 : Elapsed 0.006 ms (0.633 ms / 100) 0.530 -> 0.527 ( -0.57%) [ +3.02% +1.13% +0.00% / +1.89% -0.57% +2.45%] index_select strided 5 : Elapsed 0.005 ms (0.546 ms / 100) 0.520 -> 0.527 ( +1.35%) [ +4.42% +2.31% +0.00% / +3.46% +1.35% +10.96%] index_select strided 7 : Elapsed 0.005 ms (0.543 ms / 100) 0.524 -> 0.531 ( +1.34%) [ +3.63% +0.19% +0.00% / +3.44% +1.34% +4.96%] index_select strided 8 : Elapsed 0.005 ms (0.543 ms / 100) 0.522 -> 0.524 ( +0.38%) [ +4.98% +1.53% +0.00% / +3.26% +0.38% +2.87%] index_select strided 16 : Elapsed 0.005 ms (0.548 ms / 100) 0.527 -> 0.530 ( +0.57%) [ +9.87% +0.95% +0.00% / +2.66% +0.57% +1.90%] index_select strided 64 : Elapsed 0.006 ms (0.579 ms / 100) 0.525 -> 0.535 ( +1.90%) [ +5.14% +0.95% +0.00% / +2.10% +2.48% +1.90%] index_select strided 100 : Elapsed 0.006 ms (0.552 ms / 100) 0.521 -> 0.534 ( +2.50%) [ +4.03% +3.26% +0.00% / +3.07% +8.64% +2.50%] index_select random : Elapsed 0.005 ms (0.542 ms / 100) 0.527 -> 0.534 ( +1.33%) [ +3.23% +0.76% +0.00% / +2.66% +16.89% +1.33%] index_select random_sorted : Elapsed 0.005 ms (0.544 ms / 100) out_shape = [200, 500, 1] in_shape = [200, 5, 1] idx_dim = 1 B = [200, 500, 1] (stride (500, 1, 1)) A = [200, 5, 1] (stride (1, 200, 1)) dim = 1 0.570 -> 0.574 ( +0.70%) [ +1.05% +0.18% +0.00% / +1.05% +0.70% +0.70%] index_add_ linear : Elapsed 0.006 ms (0.576 ms / 100) 0.585 -> 0.587 ( +0.34%) [ +0.68% +0.00% +0.00% / +0.68% +0.34% +0.34%] index_copy_ linear : Elapsed 0.006 ms (0.589 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.70% +0.00% +0.00% / +0.53% +0.35% +0.53%] index_add_ reverse : Elapsed 0.006 ms (0.575 ms / 100) 0.586 -> 0.586 ( +0.00%) [ +0.85% +0.00% +0.00% / +0.85% +0.17% +0.00%] index_copy_ reverse : Elapsed 0.006 ms (0.591 ms / 100) 0.571 -> 0.574 ( +0.53%) [ +0.70% +0.18% +0.00% / +0.53% +0.53% +0.53%] index_add_ spread : Elapsed 0.006 ms (0.575 ms / 100) 0.585 -> 0.585 ( +0.00%) [ +1.03% +0.00% +0.17% / +0.85% +0.17% +0.00%] index_copy_ spread : Elapsed 0.006 ms (0.591 ms / 100) 0.572 -> 0.572 ( +0.00%) [ +1.75% +0.00% +0.87% / +0.70% +0.00% +0.00%] index_add_ strided 3 : Elapsed 0.006 ms (0.582 ms / 100) 0.584 -> 0.587 ( +0.51%) [ +0.68% +0.00% +0.17% / +0.68% +0.51% +0.68%] index_copy_ strided 3 : Elapsed 0.006 ms (0.588 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +1.05% +0.00% +0.00% / +1.05% +0.35% +0.35%] index_add_ strided 7 : Elapsed 0.006 ms (0.577 ms / 100) 0.585 -> 0.587 ( +0.34%) [ +0.51% +0.17% +0.00% / +0.68% +0.51% +0.34%] index_copy_ strided 7 : Elapsed 0.006 ms (0.588 ms / 100) 0.572 -> 0.571 ( -0.17%) [ +1.05% +0.17% +0.00% / +0.70% -0.17% +0.17%] index_add_ strided 257 : Elapsed 0.006 ms (0.578 ms / 100) 0.583 -> 0.587 ( +0.69%) [ +0.69% +0.00% +0.00% / +0.86% +0.69% +1.03%] index_copy_ strided 257 : Elapsed 0.006 ms (0.587 ms / 100) 0.572 -> 0.571 ( -0.17%) [ +1.05% +0.17% +0.00% / +0.87% -0.17% +0.00%] index_add_ perm : Elapsed 0.006 ms (0.578 ms / 100) 0.583 -> 0.588 ( +0.86%) [ +0.86% +0.34% +0.00% / +0.86% +1.03% +1.03%] index_copy_ perm : Elapsed 0.006 ms (0.588 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.70% +0.00% +0.00% / +0.35% +0.53% +0.53%] index_add_ perm_sorted : Elapsed 0.006 ms (0.575 ms / 100) 0.585 -> 0.587 ( +0.34%) [ +0.85% +0.17% +0.00% / +0.85% +0.34% +0.34%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.590 ms / 100) good 5.070 -> 4.785 ( -5.62%) [ +0.06% +0.00% +0.08% / -5.44% -5.62% -5.54%] index_select const : Elapsed 0.051 ms (5.073 ms / 100) 5.070 -> 4.829 ( -4.75%) [ +0.00% +0.10% +0.22% / -4.73% -4.75% -4.71%] index_select wrap : Elapsed 0.051 ms (5.070 ms / 100) good 5.103 -> 4.819 ( -5.57%) [ +0.00% +0.06% +0.20% / -5.57% -5.45% -5.41%] index_select linear : Elapsed 0.051 ms (5.103 ms / 100) good 5.077 -> 4.780 ( -5.85%) [ +0.00% +0.04% +0.24% / -5.61% -5.69% -5.85%] index_select reverse : Elapsed 0.051 ms (5.077 ms / 100) good 5.064 -> 4.784 ( -5.53%) [ +0.02% +0.00% +0.02% / -5.45% -5.53% -5.41%] index_select skip64 : Elapsed 0.051 ms (5.065 ms / 100) good 5.121 -> 4.808 ( -6.11%) [ +0.00% +0.06% +0.10% / -6.11% -5.74% -5.99%] index_select skip256 : Elapsed 0.051 ms (5.121 ms / 100) good 5.109 -> 4.817 ( -5.72%) [ +0.25% +0.00% +0.22% / -5.64% -5.70% -5.72%] index_select spread : Elapsed 0.051 ms (5.122 ms / 100) good 5.052 -> 4.792 ( -5.15%) [ +0.18% +0.12% +0.00% / -4.95% -5.15% -5.13%] index_select strided 3 : Elapsed 0.051 ms (5.061 ms / 100) good 5.053 -> 4.798 ( -5.05%) [ +0.00% +0.04% +0.00% / -4.81% -4.93% -5.05%] index_select random : Elapsed 0.051 ms (5.053 ms / 100) good 5.112 -> 4.806 ( -5.99%) [ +0.00% +0.00% +0.08% / -5.99% -5.71% -5.59%] index_select random_sorted : Elapsed 0.051 ms (5.112 ms / 100) B = [200, 500, 1] (stride (500, 1, 1)) A = [200, 5, 1] (stride (1, 200, 200)) dim = 1 0.570 -> 0.574 ( +0.70%) [ +1.23% +0.18% +0.00% / +1.23% +0.70% +0.70%] index_add_ linear : Elapsed 0.006 ms (0.577 ms / 100) 0.583 -> 0.587 ( +0.69%) [ +1.20% +0.00% +0.17% / +0.69% +0.86% +0.86%] index_copy_ linear : Elapsed 0.006 ms (0.590 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +1.23% +0.18% +0.00% / +1.05% +0.18% +0.18%] index_add_ reverse : Elapsed 0.006 ms (0.578 ms / 100) 0.582 -> 0.587 ( +0.86%) [ +1.03% +0.00% +0.17% / +0.86% +1.20% +1.03%] index_copy_ reverse : Elapsed 0.006 ms (0.588 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +1.40% +0.18% +0.00% / +2.10% +0.35% +0.35%] index_add_ spread : Elapsed 0.006 ms (0.579 ms / 100) 0.582 -> 0.588 ( +1.03%) [ +0.86% +0.00% +0.00% / +1.55% +1.20% +1.03%] index_copy_ spread : Elapsed 0.006 ms (0.587 ms / 100) 0.571 -> 0.574 ( +0.53%) [ +0.88% +0.00% +0.00% / +0.53% +0.53% +0.53%] index_add_ strided 3 : Elapsed 0.006 ms (0.576 ms / 100) 0.583 -> 0.587 ( +0.69%) [ +1.03% +0.17% +0.00% / +1.03% +1.03% +0.69%] index_copy_ strided 3 : Elapsed 0.006 ms (0.589 ms / 100) 0.571 -> 0.574 ( +0.53%) [ +0.88% +0.00% +0.00% / +0.70% +0.53% +0.70%] index_add_ strided 7 : Elapsed 0.006 ms (0.576 ms / 100) 0.584 -> 0.587 ( +0.51%) [ +0.51% +0.00% +0.00% / +0.68% +0.51% +0.51%] index_copy_ strided 7 : Elapsed 0.006 ms (0.587 ms / 100) 0.571 -> 0.573 ( +0.35%) [ +0.53% +0.00% +0.00% / +0.35% +0.53% +0.70%] index_add_ strided 257 : Elapsed 0.006 ms (0.574 ms / 100) 0.585 -> 0.587 ( +0.34%) [ +0.85% +0.00% +0.34% / +0.68% +0.34% +0.34%] index_copy_ strided 257 : Elapsed 0.006 ms (0.590 ms / 100) 0.570 -> 0.574 ( +0.70%) [ +0.70% +0.00% +0.00% / +0.70% +0.88% +0.88%] index_add_ perm : Elapsed 0.006 ms (0.574 ms / 100) 0.585 -> 0.586 ( +0.17%) [ +0.68% +0.17% +0.00% / +0.68% +0.34% +0.17%] index_copy_ perm : Elapsed 0.006 ms (0.589 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.88% +0.00% +0.00% / +0.88% +0.53% +0.18%] index_add_ perm_sorted : Elapsed 0.006 ms (0.576 ms / 100) 0.586 -> 0.585 ( -0.17%) [ +0.51% +0.00% +0.00% / +0.34% +0.00% -0.17%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.589 ms / 100) good 5.069 -> 4.779 ( -5.72%) [ +0.00% +0.06% +0.14% / -5.68% -5.64% -5.72%] index_select const : Elapsed 0.051 ms (5.069 ms / 100) 5.041 -> 4.824 ( -4.30%) [ +0.16% +0.06% +0.00% / -4.21% -4.30% -4.21%] index_select wrap : Elapsed 0.050 ms (5.049 ms / 100) good 5.089 -> 4.809 ( -5.50%) [ +0.12% +0.00% +0.12% / -5.50% -5.31% -5.46%] index_select linear : Elapsed 0.051 ms (5.095 ms / 100) good 5.067 -> 4.785 ( -5.57%) [ +0.20% +0.34% +0.00% / -5.55% -5.53% -5.57%] index_select reverse : Elapsed 0.051 ms (5.077 ms / 100) good 5.092 -> 4.784 ( -6.05%) [ +0.22% +0.00% +0.16% / -5.89% -6.05% -5.95%] index_select skip64 : Elapsed 0.051 ms (5.103 ms / 100) good 5.110 -> 4.796 ( -6.14%) [ +0.02% +0.00% +0.06% / -6.14% -5.83% -5.85%] index_select skip256 : Elapsed 0.051 ms (5.111 ms / 100) good 5.085 -> 4.815 ( -5.31%) [ +0.12% +0.00% +0.26% / -5.19% -5.27% -5.31%] index_select spread : Elapsed 0.051 ms (5.091 ms / 100) 5.053 -> 4.804 ( -4.93%) [ +0.04% +0.00% +0.04% / -4.93% -4.93% -4.91%] index_select strided 3 : Elapsed 0.051 ms (5.055 ms / 100) 5.046 -> 4.804 ( -4.80%) [ +0.18% +0.00% +0.04% / -4.54% -4.52% -4.80%] index_select random : Elapsed 0.051 ms (5.055 ms / 100) good 5.083 -> 4.810 ( -5.37%) [ +0.20% +0.00% +0.14% / -5.37% -5.27% -5.15%] index_select random_sorted : Elapsed 0.051 ms (5.093 ms / 100) B = [200, 500, 1] (stride (1, 200, 1)) A = [200, 5, 1] (stride (1, 200, 200)) dim = 1 0.571 -> 0.572 ( +0.18%) [ +0.88% +0.00% +0.00% / +0.88% +0.18% +0.35%] index_add_ linear : Elapsed 0.006 ms (0.576 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +1.08% +0.00% +0.18% / +1.08% +0.90% +0.72%] index_copy_ linear : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.53% +0.00% +0.35% / +0.53% +0.35% +0.18%] index_add_ reverse : Elapsed 0.006 ms (0.574 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +0.90% +0.18% +0.00% / +0.72% +1.08% +0.54%] index_copy_ reverse : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.574 ( +0.53%) [ +0.53% +0.00% +0.00% / +0.53% +1.23% +0.53%] index_add_ spread : Elapsed 0.006 ms (0.574 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.90% +0.00% +0.00% / +0.72% +0.72% +0.90%] index_copy_ spread : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +1.05% +0.00% +0.53% / +0.88% +0.00% +5.25%] index_add_ strided 3 : Elapsed 0.006 ms (0.577 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.90% +0.54% +0.00% / +0.72% +0.72% +0.72%] index_copy_ strided 3 : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.88% +0.18% +0.00% / +1.05% +0.18% +0.35%] index_add_ strided 7 : Elapsed 0.006 ms (0.576 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +0.90% +0.00% +0.18% / +0.72% +0.54% +0.72%] index_copy_ strided 7 : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +1.23% +0.00% +0.88% / +0.88% +0.18% +0.00%] index_add_ strided 257 : Elapsed 0.006 ms (0.578 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +5.06% +0.18% +0.00% / +1.08% +0.72% +0.72%] index_copy_ strided 257 : Elapsed 0.006 ms (0.581 ms / 100) 0.571 -> 0.571 ( +0.00%) [ +1.05% +0.00% +0.00% / +0.88% +0.00% +0.00%] index_add_ perm : Elapsed 0.006 ms (0.577 ms / 100) 0.554 -> 0.556 ( +0.36%) [ +0.90% +0.00% +0.00% / +1.08% +0.72% +0.36%] index_copy_ perm : Elapsed 0.006 ms (0.559 ms / 100) 0.570 -> 0.573 ( +0.53%) [ +0.70% +0.00% +0.88% / +0.70% +0.53% +0.70%] index_add_ perm_sorted : Elapsed 0.006 ms (0.574 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +0.90% +0.00% +0.00% / +0.90% +0.90% +1.62%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.559 ms / 100) 4.791 -> 4.782 ( -0.19%) [ +0.10% +0.00% +0.10% / -0.19% -0.08% -0.06%] index_select const : Elapsed 0.048 ms (4.796 ms / 100) 4.791 -> 4.803 ( +0.25%) [ +0.29% +0.00% +0.21% / +0.25% +0.75% +0.86%] index_select wrap : Elapsed 0.048 ms (4.805 ms / 100) 4.826 -> 4.799 ( -0.56%) [ +0.00% +0.04% +0.02% / -0.08% -0.41% -0.56%] index_select linear : Elapsed 0.048 ms (4.826 ms / 100) 4.787 -> 4.791 ( +0.08%) [ +0.00% +0.19% +0.19% / +0.08% +0.15% +0.19%] index_select reverse : Elapsed 0.048 ms (4.787 ms / 100) 4.790 -> 4.790 ( +0.00%) [ +0.00% +0.15% +0.27% / +0.08% +0.00% +0.02%] index_select skip64 : Elapsed 0.048 ms (4.790 ms / 100) 4.826 -> 4.793 ( -0.68%) [ +0.00% +0.00% +0.06% / -0.02% -0.68% -0.66%] index_select skip256 : Elapsed 0.048 ms (4.826 ms / 100) 4.824 -> 4.795 ( -0.60%) [ +0.15% +0.00% +0.12% / +0.04% -0.46% -0.60%] index_select spread : Elapsed 0.048 ms (4.831 ms / 100) 4.799 -> 4.795 ( -0.08%) [ +0.17% +0.23% +0.00% / -0.08% +0.04% +0.00%] index_select strided 3 : Elapsed 0.048 ms (4.807 ms / 100) 4.792 -> 4.791 ( -0.02%) [ +0.17% +0.08% +0.00% / +0.19% +0.02% -0.02%] index_select random : Elapsed 0.048 ms (4.800 ms / 100) 4.828 -> 4.802 ( -0.54%) [ +0.00% +0.02% +0.00% / -0.02% -0.54% -0.39%] index_select random_sorted : Elapsed 0.048 ms (4.828 ms / 100) B = [200, 500, 1] (stride (1, 200, 1)) A = [200, 5, 1] (stride (1, 200, 1000)) dim = 1 0.570 -> 0.571 ( +0.18%) [ +1.05% +0.18% +0.00% / +1.05% +0.70% +0.18%] index_add_ linear : Elapsed 0.006 ms (0.576 ms / 100) 0.554 -> 0.557 ( +0.54%) [ +1.08% +0.00% +0.00% / +0.90% +0.72% +0.54%] index_copy_ linear : Elapsed 0.006 ms (0.560 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +1.05% +0.00% +0.00% / +0.88% +0.18% +0.35%] index_add_ reverse : Elapsed 0.006 ms (0.577 ms / 100) 0.554 -> 0.559 ( +0.90%) [ +1.08% +0.00% +0.18% / +0.90% +1.44% +0.90%] index_copy_ reverse : Elapsed 0.006 ms (0.560 ms / 100) 0.572 -> 0.571 ( -0.17%) [ +0.87% +0.00% +0.00% / +0.52% -0.17% +0.70%] index_add_ spread : Elapsed 0.006 ms (0.577 ms / 100) 0.554 -> 0.558 ( +0.72%) [ +0.90% +0.18% +0.00% / +0.72% +0.90% +3.07%] index_copy_ spread : Elapsed 0.006 ms (0.559 ms / 100) 0.569 -> 0.574 ( +0.88%) [ +0.88% +0.35% +0.00% / +0.88% +0.88% +0.88%] index_add_ strided 3 : Elapsed 0.006 ms (0.574 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +1.08% +0.00% +0.18% / +1.08% +0.90% +0.72%] index_copy_ strided 3 : Elapsed 0.006 ms (0.559 ms / 100) 0.570 -> 0.573 ( +0.53%) [ +0.70% +0.18% +0.00% / +0.70% +0.53% +0.70%] index_add_ strided 7 : Elapsed 0.006 ms (0.574 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +1.08% +0.18% +0.00% / +1.08% +0.72% +1.81%] index_copy_ strided 7 : Elapsed 0.006 ms (0.559 ms / 100) 0.570 -> 0.573 ( +0.53%) [ +0.70% +0.00% +0.00% / +0.53% +1.58% +0.70%] index_add_ strided 257 : Elapsed 0.006 ms (0.574 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +1.08% +0.18% +0.00% / +1.08% +1.27% +0.72%] index_copy_ strided 257 : Elapsed 0.006 ms (0.559 ms / 100) 0.570 -> 0.573 ( +0.53%) [ +0.70% +0.18% +0.00% / +0.53% +0.88% +0.70%] index_add_ perm : Elapsed 0.006 ms (0.574 ms / 100) 0.553 -> 0.558 ( +0.90%) [ +1.08% +0.00% +0.00% / +1.27% +0.90% +1.08%] index_copy_ perm : Elapsed 0.006 ms (0.559 ms / 100) 0.571 -> 0.572 ( +0.18%) [ +0.88% +0.18% +0.00% / +0.70% +0.18% +0.18%] index_add_ perm_sorted : Elapsed 0.006 ms (0.576 ms / 100) 0.553 -> 0.557 ( +0.72%) [ +1.08% +0.00% +0.18% / +1.08% +0.72% +0.72%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.559 ms / 100) 4.785 -> 4.791 ( +0.13%) [ +0.08% +0.00% +0.21% / +0.13% +0.23% +0.25%] index_select const : Elapsed 0.048 ms (4.789 ms / 100) 4.787 -> 4.795 ( +0.17%) [ +0.00% +0.33% +0.08% / +0.17% +0.75% +0.73%] index_select wrap : Elapsed 0.048 ms (4.787 ms / 100) 4.807 -> 4.785 ( -0.46%) [ +0.00% +0.48% +0.25% / +0.46% +0.04% -0.46%] index_select linear : Elapsed 0.048 ms (4.807 ms / 100) 4.795 -> 4.789 ( -0.13%) [ +0.17% +0.00% +0.06% / -0.13% -0.13% +0.06%] index_select reverse : Elapsed 0.048 ms (4.803 ms / 100) 4.790 -> 4.784 ( -0.13%) [ +0.10% +0.00% +0.17% / -0.13% +0.00% -0.06%] index_select skip64 : Elapsed 0.048 ms (4.795 ms / 100) 4.822 -> 4.795 ( -0.56%) [ +0.00% +0.04% +0.12% / +0.04% -0.56% -0.50%] index_select skip256 : Elapsed 0.048 ms (4.822 ms / 100) 4.816 -> 4.803 ( -0.27%) [ +0.00% +0.35% +0.06% / +0.04% -0.27% -0.27%] index_select spread : Elapsed 0.048 ms (4.816 ms / 100) 4.792 -> 4.784 ( -0.17%) [ +0.15% +0.00% +0.10% / -0.17% -0.10% -0.04%] index_select strided 3 : Elapsed 0.048 ms (4.799 ms / 100) 4.786 -> 4.781 ( -0.10%) [ +0.19% +0.00% +0.19% / -0.10% +0.04% +0.10%] index_select random : Elapsed 0.048 ms (4.795 ms / 100) 4.825 -> 4.799 ( -0.54%) [ +0.00% +0.15% +0.17% / +0.02% -0.50% -0.54%] index_select random_sorted : Elapsed 0.048 ms (4.825 ms / 100) B = [200, 500, 1] (stride (1, 200, 200)) dim = 1 fill_cnt = 5 0.453 -> 0.456 ( +0.66%) [ +1.10% +0.44% +0.00% / +4.19% +0.88% +0.66%] index_fill_ const : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +0.88% +0.00% +0.22% / +0.66% +0.22% +0.00%] index_fill_ linear : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.455 ( +0.22%) [ +1.98% +0.22% +0.00% / +0.66% +0.22% +0.44%] index_fill_ reverse : Elapsed 0.005 ms (0.463 ms / 100) 0.451 -> 0.457 ( +1.33%) [ +1.33% +0.00% +0.22% / +1.33% +1.33% +1.33%] index_fill_ skip64 : Elapsed 0.005 ms (0.457 ms / 100) 0.457 -> 0.457 ( +0.00%) [ +0.22% +0.00% +1.31% / +0.22% +0.00% +0.22%] index_fill_ skip256 : Elapsed 0.005 ms (0.458 ms / 100) 0.452 -> 0.457 ( +1.11%) [ +0.88% +0.00% +15.04% / +1.11% +3.32% +1.11%] index_fill_ spread : Elapsed 0.005 ms (0.456 ms / 100) 0.453 -> 0.456 ( +0.66%) [ +0.66% +0.00% +2.43% / +0.88% +0.66% +0.88%] index_fill_ strided 3 : Elapsed 0.005 ms (0.456 ms / 100) 0.453 -> 0.454 ( +0.22%) [ +1.10% +0.00% +2.21% / +0.88% +0.22% +0.22%] index_fill_ strided 5 : Elapsed 0.005 ms (0.458 ms / 100) 0.458 -> 0.456 ( -0.44%) [ +0.00% +0.44% +0.44% / -0.22% +0.00% -0.44%] index_fill_ strided 7 : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.455 ( +0.22%) [ +0.66% +0.00% +1.98% / +0.88% +0.22% +0.22%] index_fill_ strided 8 : Elapsed 0.005 ms (0.457 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +1.54% +0.00% +5.95% / +0.88% +0.00% +0.00%] index_fill_ strided 16 : Elapsed 0.005 ms (0.461 ms / 100) 0.457 -> 0.453 ( -0.88%) [ +0.22% +0.00% +0.00% / +0.22% -0.88% -0.88%] index_fill_ strided 64 : Elapsed 0.005 ms (0.458 ms / 100) 0.457 -> 0.453 ( -0.88%) [ +0.22% +0.00% +0.00% / +0.22% -0.88% -0.88%] index_fill_ strided 100 : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.454 ( +0.00%) [ +0.66% +0.00% +2.20% / +0.66% +0.22% +0.00%] index_fill_ strided 255 : Elapsed 0.005 ms (0.457 ms / 100) 0.455 -> 0.453 ( -0.44%) [ +0.66% +0.00% +2.86% / +0.44% -0.44% -0.22%] index_fill_ strided 256 : Elapsed 0.005 ms (0.458 ms / 100) 0.455 -> 0.454 ( -0.22%) [ +0.66% +0.00% +1.32% / +0.44% -0.22% -0.22%] index_fill_ strided 257 : Elapsed 0.005 ms (0.458 ms / 100) 0.457 -> 0.453 ( -0.88%) [ +0.22% +0.00% +1.31% / +0.22% -0.88% -0.88%] index_fill_ random : Elapsed 0.005 ms (0.458 ms / 100) 0.453 -> 0.455 ( +0.44%) [ +1.10% +0.00% +10.15% / +0.88% +0.44% +0.88%] index_fill_ random_sorted : Elapsed 0.005 ms (0.458 ms / 100) 0.454 -> 0.456 ( +0.44%) [ +0.88% +0.00% +11.67% / +0.88% +0.44% +5.95%] index_fill_ perm : Elapsed 0.005 ms (0.458 ms / 100) 0.451 -> 0.456 ( +1.11%) [ +1.55% +0.00% +2.00% / +1.11% +1.55% +1.11%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.458 ms / 100) out_shape = [200, 5, 500] in_shape = [200, 5, 1] idx_dim = 2 B = [200, 5, 500] (stride (5, 1, 1000)) dim = 2 fill_cnt = 1 0.459 -> 0.477 ( +3.92%) [+100.65% +6.54% +0.00% / +21.79% +3.92% +6.75%] index_fill_ const : Elapsed 0.009 ms (0.921 ms / 100) 0.467 -> 0.454 ( -2.78%) [+99.14% +0.00% +2.78% / +21.63% -2.78% +1.50%] index_fill_ linear : Elapsed 0.009 ms (0.930 ms / 100) 0.459 -> 0.456 ( -0.65%) [+105.23% +1.74% +0.00% / +27.23% -0.65% -0.22%] index_fill_ reverse : Elapsed 0.009 ms (0.942 ms / 100) 0.457 -> 0.467 ( +2.19%) [+100.88% +1.75% +0.00% / +21.88% +2.19% +2.41%] index_fill_ skip64 : Elapsed 0.009 ms (0.918 ms / 100) 0.458 -> 0.457 ( -0.22%) [+24.02% +4.80% +0.00% / +25.98% -0.22% +2.84%] index_fill_ skip256 : Elapsed 0.006 ms (0.568 ms / 100) 0.455 -> 0.464 ( +1.98%) [+32.75% +3.08% +0.00% / +22.42% +6.15% +1.98%] index_fill_ spread : Elapsed 0.006 ms (0.604 ms / 100) 0.462 -> 0.456 ( -1.30%) [+27.49% +5.84% +0.00% / +24.68% -1.30% +0.43%] index_fill_ strided 3 : Elapsed 0.006 ms (0.589 ms / 100) 0.456 -> 0.455 ( -0.22%) [+27.41% +3.07% +0.00% / +26.75% -0.22% +1.75%] index_fill_ strided 5 : Elapsed 0.006 ms (0.581 ms / 100) 0.458 -> 0.455 ( -0.66%) [+12.45% +0.87% +0.00% / +25.11% -0.66% +11.79%] index_fill_ strided 7 : Elapsed 0.005 ms (0.515 ms / 100) 0.464 -> 0.467 ( +0.65%) [ +0.00% +0.22% +4.31% / +19.61% +3.02% +0.65%] index_fill_ strided 8 : Elapsed 0.005 ms (0.464 ms / 100) 0.458 -> 0.471 ( +2.84%) [ +1.75% +6.99% +0.00% / +21.40% +5.90% +2.84%] index_fill_ strided 16 : Elapsed 0.005 ms (0.466 ms / 100) 0.472 -> 0.459 ( -2.75%) [ +5.72% +0.00% +0.21% / +17.80% -2.75% +5.30%] index_fill_ strided 64 : Elapsed 0.005 ms (0.499 ms / 100) 0.457 -> 0.455 ( -0.44%) [ +0.66% +1.97% +0.00% / +6.13% -0.44% +1.53%] index_fill_ strided 100 : Elapsed 0.005 ms (0.460 ms / 100) 0.463 -> 0.452 ( -2.38%) [+18.36% +1.08% +0.00% / +0.86% -2.38% +13.17%] index_fill_ strided 255 : Elapsed 0.005 ms (0.548 ms / 100) 0.457 -> 0.462 ( +1.09%) [ +1.31% +2.84% +0.00% / +1.09% +5.03% +1.31%] index_fill_ strided 256 : Elapsed 0.005 ms (0.463 ms / 100) 0.459 -> 0.456 ( -0.65%) [ +8.06% +2.61% +0.00% / +0.65% -0.65% +15.47%] index_fill_ strided 257 : Elapsed 0.005 ms (0.496 ms / 100) 0.456 -> 0.454 ( -0.44%) [ +1.97% +30.92% +0.00% / +2.41% -0.44% +1.10%] index_fill_ random : Elapsed 0.005 ms (0.465 ms / 100) 0.459 -> 0.466 ( +1.53%) [ +0.87% +7.63% +0.00% / +1.96% +9.80% +1.53%] index_fill_ random_sorted : Elapsed 0.005 ms (0.463 ms / 100) 0.457 -> 0.461 ( +0.88%) [ +1.75% +4.60% +0.00% / +1.31% +0.88% +9.63%] index_fill_ perm : Elapsed 0.005 ms (0.465 ms / 100) 0.462 -> 0.453 ( -1.95%) [ +1.08% +5.84% +0.00% / +0.87% -1.95% +1.08%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.467 ms / 100) B = [200, 5, 500] (stride (1, 200, 1000)) dim = 2 fill_cnt = 1 0.465 -> 0.481 ( +3.44%) [ +0.00% +1.51% +3.01% / +26.02% +3.44% +9.25%] index_fill_ const : Elapsed 0.005 ms (0.465 ms / 100) 0.461 -> 0.452 ( -1.95%) [ +2.60% +1.74% +0.00% / +1.08% -1.95% +6.07%] index_fill_ linear : Elapsed 0.005 ms (0.473 ms / 100) 0.461 -> 0.453 ( -1.74%) [ +7.81% +2.82% +0.00% / +1.08% -1.74% +1.30%] index_fill_ reverse : Elapsed 0.005 ms (0.497 ms / 100) 0.468 -> 0.463 ( -1.07%) [ +2.35% +0.00% +4.70% / +17.95% -1.07% -1.07%] index_fill_ skip64 : Elapsed 0.005 ms (0.479 ms / 100) 0.469 -> 0.456 ( -2.77%) [ +0.00% +4.05% +4.26% / +85.07% -2.77% +9.81%] index_fill_ skip256 : Elapsed 0.005 ms (0.469 ms / 100) 0.457 -> 0.456 ( -0.22%) [ +1.31% +2.19% +0.00% / +1.75% -0.22% +9.19%] index_fill_ spread : Elapsed 0.005 ms (0.463 ms / 100) 0.458 -> 0.467 ( +1.97%) [ +1.53% +1.75% +0.00% / +7.42% +3.71% +1.97%] index_fill_ strided 3 : Elapsed 0.005 ms (0.465 ms / 100) 0.466 -> 0.453 ( -2.79%) [ +6.01% +0.43% +0.00% / +6.01% -2.79% -0.64%] index_fill_ strided 5 : Elapsed 0.005 ms (0.494 ms / 100) 0.458 -> 0.456 ( -0.44%) [ +1.09% +1.97% +0.00% / +2.18% -0.44% +0.87%] index_fill_ strided 7 : Elapsed 0.005 ms (0.463 ms / 100) 0.465 -> 0.466 ( +0.22%) [ +0.00% +0.65% +6.24% / +0.22% +4.52% +6.02%] index_fill_ strided 8 : Elapsed 0.005 ms (0.465 ms / 100) 0.460 -> 0.462 ( +0.43%) [ +0.87% +0.87% +0.00% / +2.39% +1.52% +0.43%] index_fill_ strided 16 : Elapsed 0.005 ms (0.464 ms / 100) 0.463 -> 0.455 ( -1.73%) [ +7.34% +6.26% +0.00% / +0.43% -1.73% +0.22%] index_fill_ strided 64 : Elapsed 0.005 ms (0.497 ms / 100) 0.458 -> 0.462 ( +0.87%) [ +3.93% +2.40% +0.00% / +1.09% +1.75% +0.87%] index_fill_ strided 100 : Elapsed 0.005 ms (0.476 ms / 100) 0.464 -> 0.468 ( +0.86%) [ +0.00% +1.08% +0.00% / +0.86% +0.86% +7.54%] index_fill_ strided 255 : Elapsed 0.005 ms (0.464 ms / 100) 0.468 -> 0.454 ( -2.99%) [ +1.07% +0.00% +51.50% / -1.07% -2.99% +6.20%] index_fill_ strided 256 : Elapsed 0.005 ms (0.473 ms / 100) 0.458 -> 0.461 ( +0.66%) [ +7.64% +6.55% +0.00% / +1.75% +0.66% +2.62%] index_fill_ strided 257 : Elapsed 0.005 ms (0.493 ms / 100) 0.461 -> 0.457 ( -0.87%) [ +7.16% +1.95% +0.00% / +2.17% +0.87% -0.87%] index_fill_ random : Elapsed 0.005 ms (0.494 ms / 100) 0.462 -> 0.456 ( -1.30%) [ +0.87% +1.08% +0.00% / +3.68% -1.30% -0.22%] index_fill_ random_sorted : Elapsed 0.005 ms (0.466 ms / 100) 0.461 -> 0.461 ( +0.00%) [ +1.30% +1.74% +0.00% / +5.64% +0.00% +5.42%] index_fill_ perm : Elapsed 0.005 ms (0.467 ms / 100) 0.461 -> 0.462 ( +0.22%) [ +7.59% +0.65% +0.00% / +1.52% +2.82% +0.22%] index_fill_ perm_sorted : Elapsed 0.005 ms (0.496 ms / 100) B = [200, 5, 500] (stride (1, 200, 1000)) A = [200, 5, 1] (stride (5, 1, 5)) dim = 2 0.573 -> 0.579 ( +1.05%) [ +2.44% +0.35% +0.00% / +1.05% +1.22% +1.05%] index_add_ linear : Elapsed 0.006 ms (0.587 ms / 100) 0.559 -> 0.563 ( +0.72%) [ +0.54% +0.00% +0.00% / +0.72% +1.25% +0.89%] index_copy_ linear : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.578 ( +0.87%) [ +0.87% +0.17% +0.00% / +0.87% +1.22% +1.05%] index_add_ reverse : Elapsed 0.006 ms (0.578 ms / 100) 0.557 -> 0.561 ( +0.72%) [ +0.90% +0.18% +0.00% / +0.72% +1.26% +0.90%] index_copy_ reverse : Elapsed 0.006 ms (0.562 ms / 100) 0.572 -> 0.580 ( +1.40%) [ +1.22% +1.22% +0.00% / +1.40% +1.75% +3.32%] index_add_ spread : Elapsed 0.006 ms (0.579 ms / 100) 0.557 -> 0.562 ( +0.90%) [ +1.62% +0.00% +0.18% / +0.90% +1.62% +1.08%] index_copy_ spread : Elapsed 0.006 ms (0.566 ms / 100) 0.573 -> 0.579 ( +1.05%) [ +1.05% +0.17% +0.00% / +1.05% +1.22% +1.75%] index_add_ strided 3 : Elapsed 0.006 ms (0.579 ms / 100) 0.562 -> 0.562 ( +0.00%) [ +0.00% +6.05% +1.25% / +0.00% +0.00% +0.00%] index_copy_ strided 3 : Elapsed 0.006 ms (0.562 ms / 100) 0.574 -> 0.578 ( +0.70%) [ +0.87% +0.00% +0.00% / +0.70% +0.87% +0.87%] index_add_ strided 7 : Elapsed 0.006 ms (0.579 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.00% +0.00% / +0.72% +0.90% +1.43%] index_copy_ strided 7 : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.579 ( +1.05%) [ +2.97% +0.17% +0.00% / +1.05% +1.05% +2.27%] index_add_ strided 257 : Elapsed 0.006 ms (0.590 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +1.08% +1.25%] index_copy_ strided 257 : Elapsed 0.006 ms (0.562 ms / 100) 0.574 -> 0.579 ( +0.87%) [ +0.87% +0.00% +0.00% / +1.05% +0.87% +1.05%] index_add_ perm : Elapsed 0.006 ms (0.579 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.18% +0.00% / +1.08% +0.90% +0.72%] index_copy_ perm : Elapsed 0.006 ms (0.562 ms / 100) 0.573 -> 0.578 ( +0.87%) [ +1.40% +0.52% +0.00% / +1.05% +0.87% +1.05%] index_add_ perm_sorted : Elapsed 0.006 ms (0.581 ms / 100) 0.558 -> 0.562 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +0.90% +0.72%] index_copy_ perm_sorted : Elapsed 0.006 ms (0.562 ms / 100) 16.511 -> 16.567 ( +0.34%) [ +0.31% +0.14% +0.00% / +0.34% +0.38% +0.41%] index_select const : Elapsed 0.166 ms (16.563 ms / 100) 16.531 -> 16.535 ( +0.02%) [ +0.18% +0.00% +0.04% / +0.20% +0.02% +0.18%] index_select wrap : Elapsed 0.166 ms (16.560 ms / 100) 16.546 -> 16.546 ( +0.00%) [ +0.05% +0.04% +0.00% / +0.25% +0.04% +0.00%] index_select linear : Elapsed 0.166 ms (16.555 ms / 100) 16.510 -> 16.533 ( +0.14%) [ +0.38% +0.13% +0.00% / +0.14% +0.35% +0.34%] index_select reverse : Elapsed 0.166 ms (16.572 ms / 100) 16.540 -> 16.547 ( +0.04%) [ +0.18% +0.00% +0.00% / +0.07% +0.13% +0.04%] index_select skip64 : Elapsed 0.166 ms (16.570 ms / 100) 16.547 -> 16.555 ( +0.05%) [ +0.02% +0.00% +0.11% / +0.23% +0.05% +0.08%] index_select skip256 : Elapsed 0.166 ms (16.551 ms / 100) 16.524 -> 16.563 ( +0.24%) [ +0.16% +0.01% +0.00% / +0.34% +0.24% +0.24%] index_select spread : Elapsed 0.166 ms (16.550 ms / 100) 16.522 -> 16.561 ( +0.24%) [ +0.24% +0.03% +0.00% / +0.24% +0.30% +0.24%] index_select random : Elapsed 0.166 ms (16.562 ms / 100) 16.530 -> 16.536 ( +0.04%) [ +0.21% +0.02% +0.00% / +0.22% +0.04% +0.16%] index_select random_sorted : Elapsed 0.166 ms (16.564 ms / 100) ==================== rep_count = 1000 dimensions = [2, 3, 5] out_shape = [2, 5] in_shape = [3, 5] idx_dim = 0 B = [2, 5] (stride (5, 1)) dim = 0 fill_cnt = 3 4.262 -> 4.231 ( -0.73%) [ +0.49% +1.95% +0.00% / +0.56% -0.73% +0.16%] index_fill_ const : Elapsed 0.004 ms (4.283 ms / 1000) 4.235 -> 4.269 ( +0.80%) [ +0.00% +2.43% +0.05% / +0.80% +1.75% +1.89%] index_fill_ linear : Elapsed 0.004 ms (4.235 ms / 1000) 4.222 -> 4.256 ( +0.81%) [ +0.47% +3.34% +0.00% / +0.97% +2.79% +0.81%] index_fill_ reverse : Elapsed 0.004 ms (4.242 ms / 1000) 4.167 -> 4.273 ( +2.54%) [ +1.90% +4.42% +0.00% / +5.18% +2.54% +2.86%] index_fill_ skip64 : Elapsed 0.004 ms (4.246 ms / 1000) 4.223 -> 4.236 ( +0.31%) [ +0.00% +3.65% +2.06% / +4.88% +4.00% +0.31%] index_fill_ skip256 : Elapsed 0.004 ms (4.223 ms / 1000) 4.176 -> 4.227 ( +1.22%) [ +1.99% +4.86% +0.00% / +1.22% +1.92% +1.48%] index_fill_ spread : Elapsed 0.004 ms (4.259 ms / 1000) 4.195 -> 4.246 ( +1.22%) [ +0.62% +4.48% +0.00% / +1.55% +1.22% +2.41%] index_fill_ random : Elapsed 0.004 ms (4.221 ms / 1000) 4.176 -> 4.225 ( +1.17%) [ +0.29% +3.98% +0.00% / +1.39% +1.17% +4.55%] index_fill_ random_sorted : Elapsed 0.004 ms (4.188 ms / 1000) B = [2, 5] (stride (5, 1)) A = [3, 5] (stride (5, 1)) dim = 0 5.038 -> 4.996 ( -0.83%) [ +2.70% +0.95% +0.00% / +1.51% +2.88% -0.83%] index_select const : Elapsed 0.005 ms (5.174 ms / 1000) 4.894 -> 4.962 ( +1.39%) [+11.71% +4.23% +0.00% / +4.80% +1.39% +2.08%] index_select wrap : Elapsed 0.005 ms (5.467 ms / 1000) 4.964 -> 4.973 ( +0.18%) [ +8.86% +2.36% +0.00% / +3.57% +0.18% +2.28%] index_select linear : Elapsed 0.005 ms (5.404 ms / 1000) 5.082 -> 5.084 ( +0.04%) [ +7.02% +1.67% +0.00% / +0.04% +2.20% +1.89%] index_select reverse : Elapsed 0.005 ms (5.439 ms / 1000) 4.987 -> 5.007 ( +0.40%) [ +8.00% +5.73% +0.00% / +2.39% +0.40% +1.66%] index_select skip64 : Elapsed 0.005 ms (5.386 ms / 1000) 4.902 -> 5.028 ( +2.57%) [ +6.79% +3.81% +0.00% / +4.24% +2.57% +3.28%] index_select skip256 : Elapsed 0.005 ms (5.235 ms / 1000) 5.049 -> 5.003 ( -0.91%) [ +3.33% +0.59% +0.00% / +0.95% +2.38% -0.91%] index_select spread : Elapsed 0.005 ms (5.217 ms / 1000) 4.896 -> 4.931 ( +0.71%) [ +5.76% +5.82% +0.00% / +3.86% +0.71% +3.64%] index_select random : Elapsed 0.005 ms (5.178 ms / 1000) 5.055 -> 4.944 ( -2.20%) [ +5.06% +0.00% +1.31% / +0.81% -2.20% +1.40%] index_select random_sorted : Elapsed 0.005 ms (5.311 ms / 1000) 5.175 -> 5.096 ( -1.53%) [ +3.79% +0.00% +0.58% / -1.02% +0.15% -1.53%] index_select perm : Elapsed 0.005 ms (5.371 ms / 1000) 4.953 -> 4.977 ( +0.48%) [ +4.87% +2.36% +0.00% / +2.75% +0.48% +2.04%] index_select perm_sorted : Elapsed 0.005 ms (5.194 ms / 1000) B = [2, 5] (stride (5, 1)) A = [3, 5] (stride (1, 3)) dim = 0 4.968 -> 4.980 ( +0.24%) [ +4.69% +2.48% +0.00% / +2.86% +0.24% +4.39%] index_select const : Elapsed 0.005 ms (5.201 ms / 1000) 5.010 -> 5.082 ( +1.44%) [ +4.47% +1.20% +0.00% / +1.98% +2.61% +1.44%] index_select wrap : Elapsed 0.005 ms (5.234 ms / 1000) 5.038 -> 5.090 ( +1.03%) [ +3.20% +0.83% +0.00% / +1.03% +2.86% +1.55%] index_select linear : Elapsed 0.005 ms (5.199 ms / 1000) 4.905 -> 4.984 ( +1.61%) [ +4.75% +3.32% +0.00% / +8.64% +1.61% +7.48%] index_select reverse : Elapsed 0.005 ms (5.138 ms / 1000) 4.942 -> 5.060 ( +2.39%) [ +3.66% +3.36% +0.00% / +5.48% +3.26% +2.39%] index_select skip64 : Elapsed 0.005 ms (5.123 ms / 1000) 5.057 -> 5.011 ( -0.91%) [ +2.16% +0.00% +0.10% / +4.61% -0.91% -0.91%] index_select skip256 : Elapsed 0.005 ms (5.166 ms / 1000) 4.962 -> 4.910 ( -1.05%) [ +4.51% +2.56% +0.00% / +5.99% -1.05% +3.87%] index_select spread : Elapsed 0.005 ms (5.186 ms / 1000) 4.972 -> 4.991 ( +0.38%) [ +4.02% +2.19% +0.00% / +6.21% +0.38% +1.01%] index_select random : Elapsed 0.005 ms (5.172 ms / 1000) 5.050 -> 5.049 ( -0.02%) [ +5.50% +0.00% +0.59% / +0.87% -0.02% +0.59%] index_select random_sorted : Elapsed 0.005 ms (5.328 ms / 1000) 4.951 -> 4.920 ( -0.63%) [+15.09% +3.25% +0.00% / +2.71% -0.63% +4.20%] index_select perm : Elapsed 0.006 ms (5.698 ms / 1000) 4.964 -> 5.008 ( +0.89%) [+10.50% +2.84% +0.00% / +1.83% +0.89% +2.01%] index_select perm_sorted : Elapsed 0.005 ms (5.485 ms / 1000) B = [2, 5] (stride (1, 2)) dim = 0 fill_cnt = 3 4.377 -> 4.256 ( -2.76%) [ +1.55% +0.00% +0.66% / -2.76% +1.85% -1.90%] index_fill_ const : Elapsed 0.004 ms (4.445 ms / 1000) 4.196 -> 4.217 ( +0.50%) [ +4.96% +4.86% +0.00% / +0.93% +0.50% +1.91%] index_fill_ linear : Elapsed 0.004 ms (4.404 ms / 1000) 4.169 -> 4.191 ( +0.53%) [ +6.96% +4.92% +0.00% / +2.61% +0.53% +4.89%] index_fill_ reverse : Elapsed 0.004 ms (4.459 ms / 1000) 4.181 -> 4.271 ( +2.15%) [ +1.08% +5.19% +0.00% / +3.87% +3.18% +2.15%] index_fill_ skip64 : Elapsed 0.004 ms (4.226 ms / 1000) 4.257 -> 4.265 ( +0.19%) [ +0.00% +3.81% +1.46% / +0.19% +6.11% +1.46%] index_fill_ skip256 : Elapsed 0.004 ms (4.257 ms / 1000) 4.194 -> 4.184 ( -0.24%) [ +3.22% +3.81% +0.00% / +2.31% -0.24% +3.74%] index_fill_ spread : Elapsed 0.004 ms (4.329 ms / 1000) 4.211 -> 4.273 ( +1.47%) [ +0.00% +3.51% +0.17% / +1.59% +3.35% +1.47%] index_fill_ random : Elapsed 0.004 ms (4.211 ms / 1000) 4.223 -> 4.266 ( +1.02%) [ +0.00% +2.63% +0.02% / +1.02% +3.48% +1.04%] index_fill_ random_sorted : Elapsed 0.004 ms (4.223 ms / 1000) B = [2, 5] (stride (1, 2)) A = [3, 5] (stride (5, 1)) dim = 0 4.959 -> 4.962 ( +0.06%) [ +3.09% +2.74% +0.00% / +5.00% +0.06% +2.02%] index_select const : Elapsed 0.005 ms (5.112 ms / 1000) 4.952 -> 4.892 ( -1.21%) [ +3.51% +2.26% +0.00% / +2.97% -1.21% +2.02%] index_select wrap : Elapsed 0.005 ms (5.126 ms / 1000) 4.936 -> 5.044 ( +2.19%) [ +4.92% +3.24% +0.00% / +2.19% +3.00% +5.98%] index_select linear : Elapsed 0.005 ms (5.179 ms / 1000) 5.063 -> 4.918 ( -2.86%) [ +2.25% +0.69% +0.00% / -0.26% -2.86% +4.19%] index_select reverse : Elapsed 0.005 ms (5.177 ms / 1000) 4.899 -> 4.916 ( +0.35%) [ +6.76% +3.37% +0.00% / +5.55% +0.35% +8.21%] index_select skip64 : Elapsed 0.005 ms (5.230 ms / 1000) 4.894 -> 5.046 ( +3.11%) [ +8.83% +3.78% +0.00% / +4.39% +3.11% +8.09%] index_select skip256 : Elapsed 0.005 ms (5.326 ms / 1000) 5.043 -> 4.911 ( -2.62%) [ +2.97% +0.50% +0.00% / +1.57% -2.62% +0.26%] index_select spread : Elapsed 0.005 ms (5.193 ms / 1000) 4.887 -> 4.950 ( +1.29%) [ +6.34% +4.87% +0.00% / +5.44% +1.29% +5.98%] index_select random : Elapsed 0.005 ms (5.197 ms / 1000) 4.917 -> 4.985 ( +1.38%) [ +6.35% +3.46% +0.00% / +4.01% +1.38% +2.77%] index_select random_sorted : Elapsed 0.005 ms (5.229 ms / 1000) 5.141 -> 5.037 ( -2.02%) [ +0.33% +1.91% +0.00% / -1.03% -1.01% -2.02%] index_select perm : Elapsed 0.005 ms (5.158 ms / 1000) 4.936 -> 4.967 ( +0.63%) [+10.01% +3.46% +0.00% / +2.76% +0.63% +3.42%] index_select perm_sorted : Elapsed 0.005 ms (5.430 ms / 1000) B = [2, 5] (stride (1, 2)) A = [3, 5] (stride (1, 3)) dim = 0 5.099 -> 4.977 ( -2.39%) [ +0.98% +0.16% +0.00% / -1.12% -2.39% -1.12%] index_select const : Elapsed 0.005 ms (5.149 ms / 1000) 5.070 -> 5.002 ( -1.34%) [ +1.10% +0.00% +3.43% / -0.73% +1.10% -1.34%] index_select wrap : Elapsed 0.005 ms (5.126 ms / 1000) 5.071 -> 5.083 ( +0.24%) [ +1.79% +0.83% +0.00% / +0.51% +1.06% +0.24%] index_select linear : Elapsed 0.005 ms (5.162 ms / 1000) 5.066 -> 4.982 ( -1.66%) [ +1.80% +0.00% +4.70% / +1.86% -1.66% -1.66%] index_select reverse : Elapsed 0.005 ms (5.157 ms / 1000) 5.193 -> 5.059 ( -2.58%) [ +0.00% +0.10% +2.70% / -1.52% -2.06% -2.58%] index_select skip64 : Elapsed 0.005 ms (5.193 ms / 1000) 4.937 -> 4.931 ( -0.12%) [ +4.70% +2.23% +0.00% / +4.09% -0.12% +3.10%] index_select skip256 : Elapsed 0.005 ms (5.169 ms / 1000) 4.941 -> 4.966 ( +0.51%) [ +8.30% +3.74% +0.00% / +2.63% +0.51% +0.87%] index_select spread : Elapsed 0.005 ms (5.351 ms / 1000) 5.104 -> 5.046 ( -1.14%) [ +2.00% +0.16% +0.00% / -0.37% -0.43% -1.14%] index_select random : Elapsed 0.005 ms (5.206 ms / 1000) 5.125 -> 4.900 ( -4.39%) [ +1.48% +0.00% +0.14% / -0.33% -4.39% +0.10%] index_select random_sorted : Elapsed 0.005 ms (5.201 ms / 1000) 5.074 -> 4.884 ( -3.74%) [ +1.77% +0.73% +0.00% / -0.37% -3.74% -1.04%] index_select perm : Elapsed 0.005 ms (5.164 ms / 1000) 5.092 -> 5.049 ( -0.84%) [ +2.28% +0.57% +0.00% / -0.84% +1.28% +2.16%] index_select perm_sorted : Elapsed 0.005 ms (5.208 ms / 1000) out_shape = [3, 2] in_shape = [3, 5] idx_dim = 1 B = [3, 2] (stride (2, 1)) dim = 1 fill_cnt = 5 4.223 -> 4.253 ( +0.71%) [ +0.00% +3.58% +0.07% / +1.66% +0.71% +4.69%] index_fill_ const : Elapsed 0.004 ms (4.223 ms / 1000) 4.184 -> 4.225 ( +0.98%) [ +3.63% +3.90% +0.00% / +0.98% +2.32% +6.79%] index_fill_ linear : Elapsed 0.004 ms (4.336 ms / 1000) 4.215 -> 4.246 ( +0.74%) [ +0.21% +3.39% +0.00% / +0.74% +1.61% +1.90%] index_fill_ reverse : Elapsed 0.004 ms (4.224 ms / 1000) 4.222 -> 4.240 ( +0.43%) [ +0.00% +5.47% +4.57% / +0.43% +4.67% +2.98%] index_fill_ skip64 : Elapsed 0.004 ms (4.222 ms / 1000) 4.185 -> 4.262 ( +1.84%) [ +5.76% +4.30% +0.00% / +5.47% +1.84% +4.87%] index_fill_ skip256 : Elapsed 0.004 ms (4.426 ms / 1000) 4.201 -> 4.241 ( +0.95%) [ +1.52% +4.12% +0.00% / +1.48% +0.95% +2.45%] index_fill_ spread : Elapsed 0.004 ms (4.265 ms / 1000) 4.209 -> 4.261 ( +1.24%) [ +0.57% +3.85% +0.00% / +1.24% +1.33% +1.85%] index_fill_ random : Elapsed 0.004 ms (4.233 ms / 1000) 4.355 -> 4.236 ( -2.73%) [ +1.70% +0.69% +0.00% / -2.64% +0.73% -2.73%] index_fill_ random_sorted : Elapsed 0.004 ms (4.429 ms / 1000) B = [3, 2] (stride (2, 1)) A = [3, 5] (stride (5, 1)) dim = 1 4.951 -> 4.962 ( +0.22%) [ +8.73% +2.95% +0.00% / +2.81% +0.22% +3.29%] index_select const : Elapsed 0.005 ms (5.383 ms / 1000) 4.966 -> 5.021 ( +1.11%) [ +4.41% +5.58% +0.00% / +1.41% +1.15% +1.11%] index_select wrap : Elapsed 0.005 ms (5.185 ms / 1000) 5.073 -> 5.011 ( -1.22%) [ +4.49% +0.00% +0.99% / +2.17% +2.15% -1.22%] index_select linear : Elapsed 0.005 ms (5.301 ms / 1000) 4.925 -> 4.978 ( +1.08%) [ +3.88% +3.96% +0.00% / +3.15% +1.08% +4.51%] index_select reverse : Elapsed 0.005 ms (5.116 ms / 1000) 4.969 -> 4.905 ( -1.29%) [ +3.40% +2.64% +0.00% / +2.33% -1.29% +1.39%] index_select skip64 : Elapsed 0.005 ms (5.138 ms / 1000) 5.100 -> 5.035 ( -1.27%) [ +1.18% +0.71% +0.00% / +0.16% +1.71% -1.27%] index_select skip256 : Elapsed 0.005 ms (5.160 ms / 1000) 4.952 -> 4.959 ( +0.14%) [ +2.95% +2.71% +0.00% / +3.45% +0.14% +2.77%] index_select spread : Elapsed 0.005 ms (5.098 ms / 1000) 4.943 -> 4.938 ( -0.10%) [ +3.84% +3.05% +0.00% / +4.92% -0.10% +2.53%] index_select strided 3 : Elapsed 0.005 ms (5.133 ms / 1000) 5.060 -> 5.011 ( -0.97%) [ +1.78% +0.00% +1.19% / -0.69% +0.71% -0.97%] index_select random : Elapsed 0.005 ms (5.150 ms / 1000) 4.938 -> 4.996 ( +1.17%) [ +3.81% +2.79% +0.00% / +2.77% +1.17% +3.81%] index_select random_sorted : Elapsed 0.005 ms (5.126 ms / 1000) 4.948 -> 4.965 ( +0.34%) [ +3.72% +2.65% +0.00% / +2.67% +0.34% +3.68%] index_select perm : Elapsed 0.005 ms (5.132 ms / 1000) 5.067 -> 5.054 ( -0.26%) [ +1.54% +0.02% +0.00% / +0.22% +1.38% -0.26%] index_select perm_sorted : Elapsed 0.005 ms (5.145 ms / 1000) B = [3, 2] (stride (2, 1)) A = [3, 5] (stride (1, 3)) dim = 1 4.985 -> 4.983 ( -0.04%) [ +3.03% +2.53% +0.00% / +2.05% -0.04% +5.34%] index_select const : Elapsed 0.005 ms (5.136 ms / 1000) 4.944 -> 4.988 ( +0.89%) [ +3.36% +2.95% +0.00% / +2.83% +0.89% +2.99%] index_select wrap : Elapsed 0.005 ms (5.110 ms / 1000) 4.890 -> 5.078 ( +3.84%) [ +4.93% +4.15% +0.00% / +3.91% +4.50% +3.84%] index_select linear : Elapsed 0.005 ms (5.131 ms / 1000) 5.037 -> 5.082 ( +0.89%) [ +2.72% +0.91% +0.00% / +0.89% +1.13% +2.14%] index_select reverse : Elapsed 0.005 ms (5.174 ms / 1000) 4.915 -> 5.037 ( +2.48%) [ +6.12% +3.89% +0.00% / +2.79% +4.11% +2.48%] index_select skip64 : Elapsed 0.005 ms (5.216 ms / 1000) 4.935 -> 5.058 ( +2.49%) [ +5.23% +4.26% +0.00% / +2.49% +3.36% +3.16%] index_select skip256 : Elapsed 0.005 ms (5.193 ms / 1000) 5.093 -> 5.089 ( -0.08%) [ +1.20% +0.27% +0.00% / +0.16% -0.08% +0.00%] index_select spread : Elapsed 0.005 ms (5.154 ms / 1000) 4.888 -> 4.976 ( +1.80%) [ +5.81% +4.30% +0.00% / +3.50% +1.80% +2.70%] index_select strided 3 : Elapsed 0.005 ms (5.172 ms / 1000) 4.964 -> 4.989 ( +0.50%) [ +2.94% +2.94% +0.00% / +2.50% +0.50% +1.29%] index_select random : Elapsed 0.005 ms (5.110 ms / 1000) 5.075 -> 4.987 ( -1.73%) [ +2.17% +1.16% +0.00% / +0.65% -1.73% +1.77%] index_select random_sorted : Elapsed 0.005 ms (5.185 ms / 1000) 4.946 -> 4.970 ( +0.49%) [ +4.19% +4.93% +0.00% / +4.99% +0.49% +3.64%] index_select perm : Elapsed 0.005 ms (5.153 ms / 1000) 4.915 -> 4.915 ( +0.00%) [ +4.88% +3.56% +0.00% / +5.98% +0.00% +2.30%] index_select perm_sorted : Elapsed 0.005 ms (5.155 ms / 1000) B = [3, 2] (stride (1, 3)) dim = 1 fill_cnt = 5 4.231 -> 4.262 ( +0.73%) [ +0.00% +3.33% +2.60% / +0.73% +3.90% +3.47%] index_fill_ const : Elapsed 0.004 ms (4.231 ms / 1000) 4.331 -> 4.285 ( -1.06%) [ +4.85% +0.51% +0.00% / +0.37% -1.06% +0.18%] index_fill_ linear : Elapsed 0.005 ms (4.541 ms / 1000) 4.223 -> 4.261 ( +0.90%) [ +0.05% +4.88% +0.00% / +0.90% +1.40% +1.30%] index_fill_ reverse : Elapsed 0.004 ms (4.225 ms / 1000) 4.229 -> 4.260 ( +0.73%) [ +3.31% +2.98% +0.00% / +0.73% +3.57% +1.68%] index_fill_ skip64 : Elapsed 0.004 ms (4.369 ms / 1000) 4.247 -> 4.271 ( +0.57%) [ +0.00% +1.95% +3.34% / +0.57% +1.37% +2.59%] index_fill_ skip256 : Elapsed 0.004 ms (4.247 ms / 1000) 4.188 -> 4.240 ( +1.24%) [ +1.81% +3.99% +0.00% / +1.24% +2.39% +6.16%] index_fill_ spread : Elapsed 0.004 ms (4.264 ms / 1000) 4.224 -> 4.223 ( -0.02%) [ +0.00% +3.41% +3.98% / -0.02% +2.13% +1.33%] index_fill_ random : Elapsed 0.004 ms (4.224 ms / 1000) 4.243 -> 4.246 ( +0.07%) [ +0.00% +2.22% +0.09% / +0.07% +3.18% +0.54%] index_fill_ random_sorted : Elapsed 0.004 ms (4.243 ms / 1000) B = [3, 2] (stride (1, 3)) A = [3, 5] (stride (5, 1)) dim = 1 5.005 -> 4.955 ( -1.00%) [ +2.40% +1.50% +0.00% / +4.42% -1.00% +3.70%] index_select const : Elapsed 0.005 ms (5.125 ms / 1000) 4.946 -> 5.039 ( +1.88%) [ +4.14% +2.53% +0.00% / +5.82% +3.03% +1.88%] index_select wrap : Elapsed 0.005 ms (5.151 ms / 1000) 4.962 -> 5.046 ( +1.69%) [ +4.80% +6.03% +0.00% / +2.34% +2.36% +1.69%] index_select linear : Elapsed 0.005 ms (5.200 ms / 1000) 5.089 -> 4.929 ( -3.14%) [ +1.57% +0.00% +0.47% / +2.63% -3.14% +0.59%] index_select reverse : Elapsed 0.005 ms (5.169 ms / 1000) 4.978 -> 4.926 ( -1.04%) [ +4.22% +1.12% +0.00% / +3.64% -1.04% +0.82%] index_select skip64 : Elapsed 0.005 ms (5.188 ms / 1000) 4.961 -> 5.008 ( +0.95%) [ +4.15% +3.55% +0.00% / +3.29% +3.55% +0.95%] index_select skip256 : Elapsed 0.005 ms (5.167 ms / 1000) 5.071 -> 4.960 ( -2.19%) [ +2.48% +0.79% +0.00% / +1.68% -2.19% +0.34%] index_select spread : Elapsed 0.005 ms (5.197 ms / 1000) 4.951 -> 5.016 ( +1.31%) [ +4.46% +3.23% +0.00% / +3.33% +1.31% +2.28%] index_select strided 3 : Elapsed 0.005 ms (5.172 ms / 1000) 4.968 -> 4.997 ( +0.58%) [ +6.68% +4.39% +0.00% / +4.79% +0.58% +6.12%] index_select random : Elapsed 0.005 ms (5.300 ms / 1000) 5.061 -> 5.034 ( -0.53%) [ +2.57% +0.00% +1.24% / +0.71% -0.53% +0.47%] index_select random_sorted : Elapsed 0.005 ms (5.191 ms / 1000) 4.964 -> 4.927 ( -0.75%) [ +9.29% +2.96% +0.00% / +3.10% -0.75% +1.33%] index_select perm : Elapsed 0.005 ms (5.425 ms / 1000) 4.934 -> 4.939 ( +0.10%) [ +8.29% +3.93% +0.00% / +3.47% +0.10% +2.15%] index_select perm_sorted : Elapsed 0.005 ms (5.343 ms / 1000) B = [3, 2] (stride (1, 3)) A = [3, 5] (stride (1, 3)) dim = 1 5.093 -> 5.094 ( +0.02%) [ +4.52% +0.00% +0.57% / +0.02% +0.77% +0.24%] index_select const : Elapsed 0.005 ms (5.323 ms / 1000) 5.083 -> 4.961 ( -2.40%) [ +4.17% +0.00% +1.32% / -0.06% -2.40% -1.93%] index_select wrap : Elapsed 0.005 ms (5.295 ms / 1000) 4.956 -> 4.949 ( -0.14%) [ +7.30% +2.87% +0.00% / +1.76% -0.14% +1.17%] index_select linear : Elapsed 0.005 ms (5.318 ms / 1000) 5.074 -> 5.148 ( +1.46%) [ +1.22% +0.22% +0.00% / +2.17% +37.37% +1.46%] index_select reverse : Elapsed 0.005 ms (5.136 ms / 1000) 4.941 -> 5.027 ( +1.74%) [ +4.15% +3.60% +0.00% / +2.47% +75.45% +1.74%] index_select skip64 : Elapsed 0.005 ms (5.146 ms / 1000) 4.994 -> 5.071 ( +1.54%) [ +2.30% +1.86% +0.00% / +1.78% +36.48% +1.54%] index_select skip256 : Elapsed 0.005 ms (5.109 ms / 1000) 5.100 -> 4.965 ( -2.65%) [ +0.00% +0.20% +0.24% / +0.69% -2.65% -1.02%] index_select spread : Elapsed 0.005 ms (5.100 ms / 1000) 4.924 -> 5.104 ( +3.66%) [ +3.94% +4.85% +0.00% / +4.16% +5.77% +3.66%] index_select strided 3 : Elapsed 0.005 ms (5.118 ms / 1000) 4.956 -> 5.020 ( +1.29%) [ +3.95% +2.80% +0.00% / +1.69% +4.22% +1.29%] index_select random : Elapsed 0.005 ms (5.152 ms / 1000) 5.058 -> 5.040 ( -0.36%) [ +4.78% +1.60% +0.00% / +0.69% +1.54% -0.36%] index_select random_sorted : Elapsed 0.005 ms (5.300 ms / 1000) 5.086 -> 5.090 ( +0.08%) [ +1.67% +1.20% +0.00% / +2.93% +2.54% +0.08%] index_select perm : Elapsed 0.005 ms (5.171 ms / 1000) 4.960 -> 5.011 ( +1.03%) [ +3.69% +3.17% +0.00% / +2.54% +5.58% +1.03%] index_select perm_sorted : Elapsed 0.005 ms (5.143 ms / 1000) out_shape = [2, 3] in_shape = [5, 3] idx_dim = 0 B = [2, 3] (stride (3, 1)) dim = 0 fill_cnt = 5 4.185 -> 4.244 ( +1.41%) [ +0.93% +4.40% +0.00% / +1.41% +5.54% +2.41%] index_fill_ const : Elapsed 0.004 ms (4.224 ms / 1000) 4.262 -> 4.270 ( +0.19%) [ +0.00% +2.09% +1.29% / +0.19% +2.16% +0.80%] index_fill_ linear : Elapsed 0.004 ms (4.262 ms / 1000) 4.201 -> 4.258 ( +1.36%) [ +0.48% +3.50% +0.00% / +1.36% +5.26% +2.38%] index_fill_ reverse : Elapsed 0.004 ms (4.221 ms / 1000) 4.218 -> 4.251 ( +0.78%) [ +0.09% +4.29% +0.00% / +0.78% +2.02% +1.49%] index_fill_ skip64 : Elapsed 0.004 ms (4.222 ms / 1000) 4.227 -> 4.238 ( +0.26%) [ +0.00% +4.00% +0.24% / +1.44% +0.26% +1.02%] index_fill_ skip256 : Elapsed 0.004 ms (4.227 ms / 1000) 4.224 -> 4.251 ( +0.64%) [ +0.00% +3.57% +2.63% / +0.64% +1.07% +3.20%] index_fill_ spread : Elapsed 0.004 ms (4.224 ms / 1000) 4.191 -> 4.271 ( +1.91%) [ +0.76% +5.30% +0.00% / +1.91% +4.13% +1.96%] index_fill_ random : Elapsed 0.004 ms (4.223 ms / 1000) 4.260 -> 4.270 ( +0.23%) [ +0.00% +2.07% +0.07% / +0.23% +0.87% +0.94%] index_fill_ random_sorted : Elapsed 0.004 ms (4.260 ms / 1000) B = [2, 3] (stride (3, 1)) A = [5, 3] (stride (3, 1)) dim = 0 5.191 -> 5.018 ( -3.33%) [ +3.97% +0.42% +0.00% / -1.48% -3.33% +1.64%] index_select const : Elapsed 0.005 ms (5.397 ms / 1000) 4.950 -> 5.071 ( +2.44%) [ +5.19% +2.55% +0.00% / +2.44% +4.67% +5.21%] index_select wrap : Elapsed 0.005 ms (5.207 ms / 1000) 4.986 -> 5.022 ( +0.72%) [ +4.89% +2.45% +0.00% / +1.42% +0.72% +1.78%] index_select linear : Elapsed 0.005 ms (5.230 ms / 1000) 5.098 -> 5.000 ( -1.92%) [ +1.75% +0.10% +0.00% / +0.14% -1.92% +3.22%] index_select reverse : Elapsed 0.005 ms (5.187 ms / 1000) 4.915 -> 5.056 ( +2.87%) [ +5.11% +4.33% +0.00% / +2.87% +4.03% +9.05%] index_select skip64 : Elapsed 0.005 ms (5.166 ms / 1000) 4.921 -> 5.092 ( +3.47%) [ +5.43% +3.66% +0.00% / +3.47% +4.51% +7.60%] index_select skip256 : Elapsed 0.005 ms (5.188 ms / 1000) 4.969 -> 4.994 ( +0.50%) [ +5.57% +2.66% +0.00% / +3.50% +0.50% +9.66%] index_select spread : Elapsed 0.005 ms (5.246 ms / 1000) 5.044 -> 5.084 ( +0.79%) [ +4.52% +3.29% +0.00% / +0.79% +2.40% +3.49%] index_select strided 3 : Elapsed 0.005 ms (5.272 ms / 1000) 4.920 -> 5.030 ( +2.24%) [ +5.57% +5.00% +0.00% / +3.23% +2.24% +6.71%] index_select random : Elapsed 0.005 ms (5.194 ms / 1000) 4.952 -> 5.007 ( +1.11%) [ +4.08% +2.18% +0.00% / +3.25% +1.11% +4.97%] index_select random_sorted : Elapsed 0.005 ms (5.154 ms / 1000) 5.108 -> 5.079 ( -0.57%) [ +2.13% +1.84% +0.00% / -0.23% +2.92% -0.57%] index_select perm : Elapsed 0.005 ms (5.217 ms / 1000) 4.905 -> 4.996 ( +1.86%) [ +5.08% +3.53% +0.00% / +11.42% +1.86% +5.28%] index_select perm_sorted : Elapsed 0.005 ms (5.154 ms / 1000) B = [2, 3] (stride (3, 1)) A = [5, 3] (stride (1, 5)) dim = 0 4.935 -> 5.029 ( +1.90%) [ +4.46% +3.16% +0.00% / +10.54% +1.90% +8.29%] index_select const : Elapsed 0.005 ms (5.155 ms / 1000) 5.091 -> 5.082 ( -0.18%) [ +1.85% +0.00% +0.18% / +3.73% -0.18% +0.12%] index_select wrap : Elapsed 0.005 ms (5.185 ms / 1000) 4.914 -> 5.028 ( +2.32%) [ +5.60% +3.32% +0.00% / +8.51% +2.32% +9.28%] index_select linear : Elapsed 0.005 ms (5.189 ms / 1000) 5.058 -> 5.119 ( +1.21%) [ +2.77% +0.00% +2.21% / +4.82% +1.21% +2.61%] index_select reverse : Elapsed 0.005 ms (5.198 ms / 1000) 5.069 -> 5.065 ( -0.08%) [ +2.13% +0.00% +1.85% / +2.64% -0.08% +1.20%] index_select skip64 : Elapsed 0.005 ms (5.177 ms / 1000) 5.067 -> 5.046 ( -0.41%) [ +2.55% +0.00% +0.22% / +2.31% -0.41% +0.55%] index_select skip256 : Elapsed 0.005 ms (5.196 ms / 1000) 5.036 -> 5.010 ( -0.52%) [ +3.79% +0.00% +0.77% / +1.67% -0.52% +1.15%] index_select spread : Elapsed 0.005 ms (5.227 ms / 1000) 5.105 -> 5.024 ( -1.59%) [ +1.74% +0.00% +0.33% / -0.06% -1.59% -0.24%] index_select strided 3 : Elapsed 0.005 ms (5.194 ms / 1000) 4.969 -> 5.041 ( +1.45%) [ +4.63% +2.72% +0.00% / +2.01% +2.94% +1.45%] index_select random : Elapsed 0.005 ms (5.199 ms / 1000) 4.932 -> 4.975 ( +0.87%) [ +6.14% +3.45% +0.00% / +3.47% +0.87% +2.37%] index_select random_sorted : Elapsed 0.005 ms (5.235 ms / 1000) 5.064 -> 5.016 ( -0.95%) [ +2.78% +0.57% +0.00% / -0.83% -0.95% +0.26%] index_select perm : Elapsed 0.005 ms (5.205 ms / 1000) 4.902 -> 5.077 ( +3.57%) [ +6.71% +3.57% +0.00% / +3.57% +4.49% +5.53%] index_select perm_sorted : Elapsed 0.005 ms (5.231 ms / 1000) B = [2, 3] (stride (1, 2)) dim = 0 fill_cnt = 5 4.212 -> 4.273 ( +1.45%) [ +4.51% +3.42% +0.00% / +1.45% +1.52% +3.58%] index_fill_ const : Elapsed 0.004 ms (4.402 ms / 1000) 4.224 -> 4.280 ( +1.33%) [ +0.26% +2.79% +0.00% / +1.66% +1.33% +2.01%] index_fill_ linear : Elapsed 0.004 ms (4.235 ms / 1000) 4.287 -> 4.248 ( -0.91%) [ +0.12% +1.21% +0.00% / -0.28% +1.77% -0.91%] index_fill_ reverse : Elapsed 0.004 ms (4.292 ms / 1000) 4.225 -> 4.246 ( +0.50%) [ +3.64% +3.34% +0.00% / +1.33% +0.50% +0.80%] index_fill_ skip64 : Elapsed 0.004 ms (4.379 ms / 1000) 4.185 -> 4.237 ( +1.24%) [ +1.74% +3.13% +0.00% / +2.80% +1.24% +3.58%] index_fill_ skip256 : Elapsed 0.004 ms (4.258 ms / 1000) 4.252 -> 4.249 ( -0.07%) [ +0.00% +2.61% +2.35% / +0.33% -0.07% +1.29%] index_fill_ spread : Elapsed 0.004 ms (4.252 ms / 1000) 4.210 -> 4.232 ( +0.52%) [ +0.76% +4.28% +0.00% / +0.52% +5.13% +1.50%] index_fill_ random : Elapsed 0.004 ms (4.242 ms / 1000) 4.221 -> 4.221 ( +0.00%) [ +1.35% +4.26% +0.00% / +0.00% +4.05% +0.47%] index_fill_ random_sorted : Elapsed 0.004 ms (4.278 ms / 1000) B = [2, 3] (stride (1, 2)) A = [5, 3] (stride (3, 1)) dim = 0 4.928 -> 5.026 ( +1.99%) [+11.00% +2.90% +0.00% / +3.10% +1.99% +3.59%] index_select const : Elapsed 0.005 ms (5.470 ms / 1000) 5.088 -> 5.031 ( -1.12%) [ +2.77% +0.28% +0.00% / -1.12% +0.22% +3.56%] index_select wrap : Elapsed 0.005 ms (5.229 ms / 1000) 4.930 -> 4.980 ( +1.01%) [ +5.58% +3.04% +0.00% / +2.56% +1.01% +27.28%] index_select linear : Elapsed 0.005 ms (5.205 ms / 1000) 4.959 -> 5.044 ( +1.71%) [ +4.48% +2.14% +0.00% / +3.17% +3.21% +1.71%] index_select reverse : Elapsed 0.005 ms (5.181 ms / 1000) 5.085 -> 5.017 ( -1.34%) [ +1.91% +0.00% +0.45% / -0.33% -0.16% -1.34%] index_select skip64 : Elapsed 0.005 ms (5.182 ms / 1000) 4.923 -> 4.954 ( +0.63%) [ +4.67% +3.88% +0.00% / +2.64% +0.63% +3.84%] index_select skip256 : Elapsed 0.005 ms (5.153 ms / 1000) 4.918 -> 4.968 ( +1.02%) [ +8.46% +3.56% +0.00% / +4.80% +1.02% +3.68%] index_select spread : Elapsed 0.005 ms (5.334 ms / 1000) 5.078 -> 4.992 ( -1.69%) [ +1.69% +2.03% +0.00% / +2.13% -1.69% -0.59%] index_select strided 3 : Elapsed 0.005 ms (5.164 ms / 1000) 4.939 -> 5.025 ( +1.74%) [ +5.37% +2.47% +0.00% / +2.96% +1.74% +2.37%] index_select random : Elapsed 0.005 ms (5.204 ms / 1000) 4.991 -> 4.978 ( -0.26%) [ +4.79% +2.72% +0.00% / +2.40% -0.26% +5.39%] index_select random_sorted : Elapsed 0.005 ms (5.230 ms / 1000) 5.059 -> 4.972 ( -1.72%) [ +2.23% +0.12% +0.00% / -0.12% -1.72% -0.43%] index_select perm : Elapsed 0.005 ms (5.172 ms / 1000) 4.901 -> 5.015 ( +2.33%) [ +5.61% +6.43% +0.00% / +2.33% +4.24% +2.51%] index_select perm_sorted : Elapsed 0.005 ms (5.176 ms / 1000) B = [2, 3] (stride (1, 2)) A = [5, 3] (stride (1, 5)) dim = 0 4.896 -> 5.016 ( +2.45%) [ +5.19% +4.04% +0.00% / +3.80% +2.45% +3.53%] index_select const : Elapsed 0.005 ms (5.150 ms / 1000) 5.101 -> 4.974 ( -2.49%) [ +0.88% +0.00% +0.33% / -0.61% -2.49% -0.59%] index_select wrap : Elapsed 0.005 ms (5.146 ms / 1000) 4.952 -> 4.983 ( +0.63%) [ +2.99% +6.18% +0.00% / +1.96% +3.70% +0.63%] index_select linear : Elapsed 0.005 ms (5.100 ms / 1000) 4.928 -> 5.082 ( +3.12%) [ +4.12% +3.67% +0.00% / +3.12% +3.47% +4.50%] index_select reverse : Elapsed 0.005 ms (5.131 ms / 1000) 4.984 -> 4.959 ( -0.50%) [ +2.99% +2.57% +0.00% / +2.09% -0.50% +2.17%] index_select skip64 : Elapsed 0.005 ms (5.133 ms / 1000) 4.890 -> 5.070 ( +3.68%) [ +5.99% +4.81% +0.00% / +3.68% +4.17% +4.19%] index_select skip256 : Elapsed 0.005 ms (5.183 ms / 1000) 4.913 -> 4.966 ( +1.08%) [ +5.45% +4.44% +0.00% / +4.34% +1.08% +5.23%] index_select spread : Elapsed 0.005 ms (5.181 ms / 1000) 5.081 -> 4.960 ( -2.38%) [ +1.71% +0.28% +0.00% / -0.47% -2.38% -0.12%] index_select strided 3 : Elapsed 0.005 ms (5.168 ms / 1000) 5.054 -> 5.070 ( +0.32%) [ +2.55% +1.07% +0.00% / +0.32% +1.62% +1.40%] index_select random : Elapsed 0.005 ms (5.183 ms / 1000) 4.957 -> 5.010 ( +1.07%) [ +6.35% +3.25% +0.00% / +1.90% +1.07% +2.54%] index_select random_sorted : Elapsed 0.005 ms (5.272 ms / 1000) 4.946 -> 4.981 ( +0.71%) [ +4.67% +5.36% +0.00% / +3.13% +0.71% +2.79%] index_select perm : Elapsed 0.005 ms (5.177 ms / 1000) 5.087 -> 5.081 ( -0.12%) [ +2.30% +0.33% +0.00% / -0.12% +1.34% +0.61%] index_select perm_sorted : Elapsed 0.005 ms (5.204 ms / 1000) out_shape = [5, 2] in_shape = [5, 3] idx_dim = 1 B = [5, 2] (stride (2, 1)) dim = 1 fill_cnt = 3 4.202 -> 4.243 ( +0.98%) [ +6.83% +4.31% +0.00% / +0.98% +1.12% +4.02%] index_fill_ const : Elapsed 0.004 ms (4.489 ms / 1000) 4.211 -> 4.292 ( +1.92%) [ +1.78% +6.34% +0.00% / +2.64% +1.92% +2.35%] index_fill_ linear : Elapsed 0.004 ms (4.286 ms / 1000) 4.237 -> 4.253 ( +0.38%) [ +0.00% +0.92% +3.14% / +0.38% +1.63% +1.53%] index_fill_ reverse : Elapsed 0.004 ms (4.237 ms / 1000) 4.341 -> 4.252 ( -2.05%) [ +0.00% +2.56% +1.08% / -2.05% +1.70% -0.46%] index_fill_ skip64 : Elapsed 0.004 ms (4.341 ms / 1000) 4.247 -> 4.284 ( +0.87%) [ +0.02% +2.61% +0.00% / +0.87% +1.55% +1.70%] index_fill_ skip256 : Elapsed 0.004 ms (4.248 ms / 1000) 4.201 -> 4.264 ( +1.50%) [ +0.00% +2.00% +1.76% / +1.95% +1.50% +2.98%] index_fill_ spread : Elapsed 0.004 ms (4.201 ms / 1000) 4.217 -> 4.250 ( +0.78%) [ +0.00% +2.94% +2.75% / +0.78% +2.54% +2.25%] index_fill_ random : Elapsed 0.004 ms (4.217 ms / 1000) 4.236 -> 4.262 ( +0.61%) [ +2.08% +2.43% +0.00% / +0.61% +0.73% +1.37%] index_fill_ random_sorted : Elapsed 0.004 ms (4.324 ms / 1000) B = [5, 2] (stride (2, 1)) A = [5, 3] (stride (3, 1)) dim = 1 4.964 -> 5.103 ( +2.80%) [ +3.02% +2.82% +0.00% / +2.80% +3.61% +5.80%] index_select const : Elapsed 0.005 ms (5.114 ms / 1000) 5.057 -> 5.013 ( -0.87%) [ +1.82% +1.44% +0.00% / +1.78% -0.87% -0.02%] index_select wrap : Elapsed 0.005 ms (5.149 ms / 1000) 4.930 -> 5.048 ( +2.39%) [ +3.85% +3.69% +0.00% / +2.70% +3.75% +2.39%] index_select linear : Elapsed 0.005 ms (5.120 ms / 1000) 4.883 -> 5.021 ( +2.83%) [ +5.22% +5.39% +0.00% / +4.24% +2.83% +7.23%] index_select reverse : Elapsed 0.005 ms (5.138 ms / 1000) 4.965 -> 4.972 ( +0.14%) [ +4.07% +3.56% +0.00% / +1.97% +0.14% +2.09%] index_select skip64 : Elapsed 0.005 ms (5.167 ms / 1000) 4.967 -> 5.080 ( +2.28%) [ +3.20% +1.71% +0.00% / +4.03% +2.28% +2.44%] index_select skip256 : Elapsed 0.005 ms (5.126 ms / 1000) 4.925 -> 4.977 ( +1.06%) [ +4.91% +4.04% +0.00% / +3.41% +1.06% +4.12%] index_select spread : Elapsed 0.005 ms (5.167 ms / 1000) 4.938 -> 5.021 ( +1.68%) [ +3.83% +4.01% +0.00% / +1.68% +3.38% +2.90%] index_select random : Elapsed 0.005 ms (5.127 ms / 1000) 5.091 -> 5.041 ( -0.98%) [ +0.73% +0.69% +0.00% / -0.98% +0.24% +2.10%] index_select random_sorted : Elapsed 0.005 ms (5.128 ms / 1000) 4.941 -> 5.010 ( +1.40%) [ +7.37% +3.76% +0.00% / +3.00% +1.40% +3.46%] index_select perm : Elapsed 0.005 ms (5.305 ms / 1000) 4.911 -> 5.001 ( +1.83%) [ +8.06% +4.79% +0.00% / +3.64% +1.83% +2.48%] index_select perm_sorted : Elapsed 0.005 ms (5.307 ms / 1000) B = [5, 2] (stride (2, 1)) A = [5, 3] (stride (1, 5)) dim = 1 5.114 -> 4.996 ( -2.31%) [ +6.02% +0.22% +0.00% / -0.72% +0.68% -2.31%] index_select const : Elapsed 0.005 ms (5.422 ms / 1000) 4.924 -> 5.025 ( +2.05%) [+10.42% +4.26% +0.00% / +3.07% +2.05% +4.00%] index_select wrap : Elapsed 0.005 ms (5.437 ms / 1000) 4.915 -> 5.020 ( +2.14%) [ +8.32% +4.13% +0.00% / +4.19% +2.28% +2.14%] index_select linear : Elapsed 0.005 ms (5.324 ms / 1000) 5.097 -> 5.057 ( -0.78%) [ +4.30% +0.06% +0.00% / -0.67% +0.39% -0.78%] index_select reverse : Elapsed 0.005 ms (5.316 ms / 1000) 4.928 -> 5.077 ( +3.02%) [ +4.67% +3.57% +0.00% / +3.02% +3.08% +6.17%] index_select skip64 : Elapsed 0.005 ms (5.158 ms / 1000) 4.977 -> 5.046 ( +1.39%) [ +2.55% +3.42% +0.00% / +2.23% +1.39% +3.98%] index_select skip256 : Elapsed 0.005 ms (5.104 ms / 1000) 5.067 -> 5.059 ( -0.16%) [ +1.93% +0.16% +0.00% / -0.16% +0.83% -0.02%] index_select spread : Elapsed 0.005 ms (5.165 ms / 1000) 4.906 -> 5.103 ( +4.02%) [ +4.16% +4.77% +0.00% / +4.14% +4.02% +4.22%] index_select random : Elapsed 0.005 ms (5.110 ms / 1000) 4.925 -> 4.994 ( +1.40%) [ +3.72% +3.76% +0.00% / +4.93% +2.52% +1.40%] index_select random_sorted : Elapsed 0.005 ms (5.108 ms / 1000) 5.028 -> 5.062 ( +0.68%) [ +2.25% +4.24% +0.00% / +0.89% +2.23% +0.68%] index_select perm : Elapsed 0.005 ms (5.141 ms / 1000) 4.966 -> 4.974 ( +0.16%) [ +3.16% +2.66% +0.00% / +2.52% +0.64% +0.16%] index_select perm_sorted : Elapsed 0.005 ms (5.123 ms / 1000) B = [5, 2] (stride (1, 5)) dim = 1 fill_cnt = 3 4.196 -> 4.245 ( +1.17%) [ +3.67% +3.62% +0.00% / +2.65% +1.17% +3.05%] index_fill_ const : Elapsed 0.004 ms (4.350 ms / 1000) 4.204 -> 4.260 ( +1.33%) [ +6.14% +3.02% +0.00% / +4.28% +1.33% +2.57%] index_fill_ linear : Elapsed 0.004 ms (4.462 ms / 1000) 4.257 -> 4.245 ( -0.28%) [ +0.00% +0.49% +1.03% / -0.28% +3.17% +1.36%] index_fill_ reverse : Elapsed 0.004 ms (4.257 ms / 1000) 4.256 -> 4.250 ( -0.14%) [ +3.41% +2.35% +0.00% / -0.14% +0.28% +3.41%] index_fill_ skip64 : Elapsed 0.004 ms (4.401 ms / 1000) 4.177 -> 4.222 ( +1.08%) [ +4.09% +4.12% +0.00% / +2.03% +1.08% +2.39%] index_fill_ skip256 : Elapsed 0.004 ms (4.348 ms / 1000) 4.247 -> 4.254 ( +0.16%) [ +4.33% +2.78% +0.00% / +0.16% +3.23% +0.71%] index_fill_ spread : Elapsed 0.004 ms (4.431 ms / 1000) 4.289 -> 4.241 ( -1.12%) [ +5.29% +1.00% +0.00% / -1.12% -0.65% +1.24%] index_fill_ random : Elapsed 0.005 ms (4.516 ms / 1000) 4.166 -> 4.251 ( +2.04%) [ +8.57% +4.68% +0.00% / +2.04% +2.52% +3.58%] index_fill_ random_sorted : Elapsed 0.005 ms (4.523 ms / 1000) B = [5, 2] (stride (1, 5)) A = [5, 3] (stride (3, 1)) dim = 1 4.970 -> 5.018 ( +0.97%) [ +4.29% +2.52% +0.00% / +1.79% +0.97% +1.47%] index_select const : Elapsed 0.005 ms (5.183 ms / 1000) 5.054 -> 5.074 ( +0.40%) [ +2.37% +1.03% +0.00% / +0.40% +1.60% +1.66%] index_select wrap : Elapsed 0.005 ms (5.174 ms / 1000) 4.908 -> 4.974 ( +1.34%) [ +4.56% +4.32% +0.00% / +3.79% +1.34% +3.73%] index_select linear : Elapsed 0.005 ms (5.132 ms / 1000) 4.965 -> 5.007 ( +0.85%) [ +3.00% +2.96% +0.00% / +2.48% +0.85% +3.77%] index_select reverse : Elapsed 0.005 ms (5.114 ms / 1000) 5.099 -> 5.059 ( -0.78%) [ +0.00% +0.35% +0.65% / -0.78% +0.39% +3.53%] index_select skip64 : Elapsed 0.005 ms (5.099 ms / 1000) 4.971 -> 5.018 ( +0.95%) [ +3.00% +1.83% +0.00% / +2.41% +0.95% +7.22%] index_select skip256 : Elapsed 0.005 ms (5.120 ms / 1000) 4.964 -> 5.028 ( +1.29%) [ +3.99% +2.10% +0.00% / +2.84% +1.29% +2.92%] index_select spread : Elapsed 0.005 ms (5.162 ms / 1000) 5.103 -> 5.071 ( -0.63%) [ +1.02% +0.00% +0.04% / -0.63% +0.96% +2.41%] index_select random : Elapsed 0.005 ms (5.155 ms / 1000) 4.920 -> 5.010 ( +1.83%) [ +9.37% +3.74% +0.00% / +4.39% +1.83% +3.90%] index_select random_sorted : Elapsed 0.005 ms (5.381 ms / 1000) 4.904 -> 5.009 ( +2.14%) [ +8.46% +4.53% +0.00% / +6.69% +2.14% +3.38%] index_select perm : Elapsed 0.005 ms (5.319 ms / 1000) 5.025 -> 5.038 ( +0.26%) [ +2.71% +1.37% +0.00% / +0.70% +1.45% +0.26%] index_select perm_sorted : Elapsed 0.005 ms (5.161 ms / 1000) B = [5, 2] (stride (1, 5)) A = [5, 3] (stride (1, 5)) dim = 1 4.994 -> 5.005 ( +0.22%) [ +3.34% +2.70% +0.00% / +3.00% +0.22% +1.74%] index_select const : Elapsed 0.005 ms (5.161 ms / 1000) 4.946 -> 4.961 ( +0.30%) [ +6.83% +3.34% +0.00% / +0.83% +0.30% +1.56%] index_select wrap : Elapsed 0.005 ms (5.284 ms / 1000) 4.934 -> 5.046 ( +2.27%) [ +5.45% +3.95% +0.00% / +3.26% +3.79% +2.27%] index_select linear : Elapsed 0.005 ms (5.203 ms / 1000) 5.026 -> 5.027 ( +0.02%) [ +2.45% +1.61% +0.00% / +0.62% +0.02% +2.15%] index_select reverse : Elapsed 0.005 ms (5.149 ms / 1000) 4.965 -> 5.041 ( +1.53%) [ +3.26% +2.52% +0.00% / +3.38% +1.69% +1.53%] index_select skip64 : Elapsed 0.005 ms (5.127 ms / 1000) 4.959 -> 5.063 ( +2.10%) [ +3.49% +3.47% +0.00% / +2.40% +6.84% +2.10%] index_select skip256 : Elapsed 0.005 ms (5.132 ms / 1000) 5.083 -> 5.113 ( +0.59%) [ +1.63% +0.59% +0.00% / +0.59% +1.67% +1.06%] index_select spread : Elapsed 0.005 ms (5.166 ms / 1000) 4.934 -> 5.061 ( +2.57%) [ +3.73% +3.51% +0.00% / +3.87% +4.84% +2.57%] index_select random : Elapsed 0.005 ms (5.118 ms / 1000) 5.134 -> 5.037 ( -1.89%) [ +2.03% +4.52% +0.00% / +0.90% -0.56% -1.89%] index_select random_sorted : Elapsed 0.005 ms (5.238 ms / 1000) 5.037 -> 5.007 ( -0.60%) [ +4.73% +1.51% +0.00% / +1.15% -0.60% +1.95%] index_select perm : Elapsed 0.005 ms (5.275 ms / 1000) 5.215 -> 4.960 ( -4.89%) [ +0.00% +3.03% +0.19% / -2.63% -4.89% -2.42%] index_select perm_sorted : Elapsed 0.005 ms (5.215 ms / 1000) out_shape = [3, 5] in_shape = [2, 5] idx_dim = 0 B = [3, 5] (stride (5, 1)) dim = 0 fill_cnt = 2 4.328 -> 4.241 ( -2.01%) [ +0.79% +1.34% +0.00% / -1.85% -2.01% -0.81%] index_fill_ const : Elapsed 0.004 ms (4.362 ms / 1000) 4.288 -> 4.282 ( -0.14%) [ +0.00% +2.05% +1.84% / -0.14% +2.01% +1.33%] index_fill_ linear : Elapsed 0.004 ms (4.288 ms / 1000) 4.220 -> 4.307 ( +2.06%) [ +0.00% +3.96% +4.57% / +3.36% +2.06% +4.12%] index_fill_ reverse : Elapsed 0.004 ms (4.220 ms / 1000) 4.224 -> 4.239 ( +0.36%) [ +0.85% +2.89% +0.00% / +1.99% +0.36% +1.96%] index_fill_ skip64 : Elapsed 0.004 ms (4.260 ms / 1000) 4.242 -> 4.303 ( +1.44%) [ +0.59% +2.26% +0.00% / +1.44% +3.70% +3.47%] index_fill_ skip256 : Elapsed 0.004 ms (4.267 ms / 1000) 4.264 -> 4.247 ( -0.40%) [ +0.00% +2.09% +2.95% / +0.40% -0.40% +0.59%] index_fill_ spread : Elapsed 0.004 ms (4.264 ms / 1000) 4.187 -> 4.248 ( +1.46%) [ +1.39% +3.18% +0.00% / +3.46% +1.46% +3.30%] index_fill_ random : Elapsed 0.004 ms (4.245 ms / 1000) 4.221 -> 4.211 ( -0.24%) [ +5.83% +3.15% +0.00% / +0.92% -0.24% +1.78%] index_fill_ random_sorted : Elapsed 0.004 ms (4.467 ms / 1000) 4.267 -> 4.278 ( +0.26%) [ +3.84% +6.47% +0.00% / +0.26% +3.37% +0.77%] index_fill_ perm : Elapsed 0.004 ms (4.431 ms / 1000) 4.260 -> 4.242 ( -0.42%) [ +0.00% +2.35% +0.42% / +0.38% -0.42% +1.48%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.260 ms / 1000) B = [3, 5] (stride (5, 1)) A = [2, 5] (stride (5, 1)) dim = 0 4.485 -> 4.452 ( -0.74%) [ +0.00% +3.66% +0.49% / +5.64% -0.74% +0.82%] index_add_ linear : Elapsed 0.004 ms (4.485 ms / 1000) 4.480 -> 4.470 ( -0.22%) [ +0.51% +3.39% +0.00% / +1.72% -0.22% -0.20%] index_copy_ linear : Elapsed 0.005 ms (4.503 ms / 1000) 4.472 -> 4.476 ( +0.09%) [ +0.00% +4.61% +2.84% / +3.29% +2.33% +0.09%] index_add_ reverse : Elapsed 0.004 ms (4.472 ms / 1000) 4.442 -> 4.438 ( -0.09%) [ +0.00% +3.47% +0.14% / +3.33% -0.09% +2.77%] index_copy_ reverse : Elapsed 0.004 ms (4.442 ms / 1000) 4.437 -> 4.493 ( +1.26%) [ +0.00% +5.09% +1.44% / +3.61% +1.26% +2.86%] index_add_ spread : Elapsed 0.004 ms (4.437 ms / 1000) 4.432 -> 4.573 ( +3.18%) [ +0.00% +4.90% +1.11% / +3.18% +3.20% +4.96%] index_copy_ spread : Elapsed 0.004 ms (4.432 ms / 1000) 4.417 -> 4.456 ( +0.88%) [ +0.00% +5.37% +4.39% / +4.37% +0.88% +2.11%] index_add_ perm : Elapsed 0.004 ms (4.417 ms / 1000) 4.414 -> 4.465 ( +1.16%) [ +0.00% +4.60% +1.16% / +4.71% +1.31% +1.16%] index_copy_ perm : Elapsed 0.004 ms (4.414 ms / 1000) 4.461 -> 4.458 ( -0.07%) [ +0.00% +4.84% +0.56% / +5.49% +0.25% -0.07%] index_add_ perm_sorted : Elapsed 0.004 ms (4.461 ms / 1000) 4.478 -> 4.477 ( -0.02%) [ +0.00% +3.13% +3.39% / +2.14% +2.32% -0.02%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.478 ms / 1000) 4.945 -> 4.995 ( +1.01%) [ +5.04% +3.54% +0.00% / +5.20% +1.01% +2.69%] index_select const : Elapsed 0.005 ms (5.194 ms / 1000) 4.940 -> 4.952 ( +0.24%) [ +5.55% +3.22% +0.00% / +5.65% +0.24% +3.40%] index_select wrap : Elapsed 0.005 ms (5.214 ms / 1000) 4.997 -> 5.126 ( +2.58%) [ +4.68% +1.78% +0.00% / +2.58% +2.78% +3.10%] index_select linear : Elapsed 0.005 ms (5.231 ms / 1000) 4.941 -> 4.958 ( +0.34%) [ +4.78% +3.02% +0.00% / +6.44% +0.34% +2.83%] index_select reverse : Elapsed 0.005 ms (5.177 ms / 1000) 4.941 -> 4.981 ( +0.81%) [ +9.67% +3.34% +0.00% / +4.13% +0.81% +2.53%] index_select skip64 : Elapsed 0.005 ms (5.419 ms / 1000) 4.963 -> 5.089 ( +2.54%) [ +4.17% +2.36% +0.00% / +2.70% +2.54% +2.76%] index_select skip256 : Elapsed 0.005 ms (5.170 ms / 1000) 5.101 -> 4.963 ( -2.71%) [ +2.37% +0.00% +2.47% / +0.29% -2.71% +0.69%] index_select spread : Elapsed 0.005 ms (5.222 ms / 1000) 4.921 -> 4.920 ( -0.02%) [ +4.49% +3.54% +0.00% / +3.25% -0.02% +5.16%] index_select random : Elapsed 0.005 ms (5.142 ms / 1000) 4.910 -> 5.121 ( +4.30%) [ +5.30% +3.75% +0.00% / +6.19% +4.30% +5.80%] index_select random_sorted : Elapsed 0.005 ms (5.170 ms / 1000) B = [3, 5] (stride (5, 1)) A = [2, 5] (stride (1, 2)) dim = 0 4.459 -> 4.408 ( -1.14%) [ +0.00% +5.16% +5.67% / +4.87% -1.14% +2.38%] index_add_ linear : Elapsed 0.004 ms (4.459 ms / 1000) 4.463 -> 4.545 ( +1.84%) [ +0.36% +3.56% +0.00% / +3.47% +1.84% +5.11%] index_copy_ linear : Elapsed 0.004 ms (4.479 ms / 1000) 4.445 -> 4.490 ( +1.01%) [ +0.81% +5.78% +0.00% / +3.96% +1.01% +5.42%] index_add_ reverse : Elapsed 0.004 ms (4.481 ms / 1000) 4.496 -> 4.577 ( +1.80%) [ +0.00% +3.47% +2.62% / +1.80% +1.98% +4.78%] index_copy_ reverse : Elapsed 0.004 ms (4.496 ms / 1000) 4.442 -> 4.524 ( +1.85%) [ +0.00% +5.81% +1.67% / +4.14% +1.85% +1.91%] index_add_ spread : Elapsed 0.004 ms (4.442 ms / 1000) 4.492 -> 4.447 ( -1.00%) [ +0.16% +2.85% +0.00% / +1.20% -1.00% +0.07%] index_copy_ spread : Elapsed 0.004 ms (4.499 ms / 1000) 4.470 -> 4.461 ( -0.20%) [ +5.73% +5.01% +0.00% / +3.62% +2.75% -0.20%] index_add_ perm : Elapsed 0.005 ms (4.726 ms / 1000) good 4.708 -> 4.444 ( -5.61%) [ +2.53% +1.19% +0.00% / -2.27% -5.61% -4.35%] index_copy_ perm : Elapsed 0.005 ms (4.827 ms / 1000) 4.473 -> 4.461 ( -0.27%) [ +2.75% +4.34% +0.00% / +2.08% -0.27% -0.16%] index_add_ perm_sorted : Elapsed 0.005 ms (4.596 ms / 1000) 4.476 -> 4.441 ( -0.78%) [ +3.80% +4.11% +0.00% / +2.30% +0.02% -0.78%] index_copy_ perm_sorted : Elapsed 0.005 ms (4.646 ms / 1000) 5.133 -> 5.055 ( -1.52%) [ +4.68% +0.04% +0.00% / -1.13% -0.18% -1.52%] index_select const : Elapsed 0.005 ms (5.373 ms / 1000) 4.906 -> 5.058 ( +3.10%) [ +5.67% +4.77% +0.00% / +8.70% +3.10% +3.73%] index_select wrap : Elapsed 0.005 ms (5.184 ms / 1000) 4.926 -> 4.969 ( +0.87%) [ +5.24% +3.65% +0.00% / +5.24% +0.87% +3.02%] index_select linear : Elapsed 0.005 ms (5.184 ms / 1000) 4.972 -> 5.065 ( +1.87%) [ +2.98% +3.30% +0.00% / +1.87% +3.68% +2.74%] index_select reverse : Elapsed 0.005 ms (5.120 ms / 1000) 5.071 -> 4.963 ( -2.13%) [ +1.58% +0.51% +0.00% / +3.06% -2.13% +0.93%] index_select skip64 : Elapsed 0.005 ms (5.151 ms / 1000) 4.952 -> 4.941 ( -0.22%) [ +3.39% +3.29% +0.00% / +5.39% -0.22% +2.34%] index_select skip256 : Elapsed 0.005 ms (5.120 ms / 1000) 4.986 -> 5.061 ( +1.50%) [ +4.09% +2.53% +0.00% / +1.74% +1.50% +2.13%] index_select spread : Elapsed 0.005 ms (5.190 ms / 1000) 5.088 -> 4.982 ( -2.08%) [ +1.59% +0.24% +0.00% / +0.92% -2.08% +1.04%] index_select random : Elapsed 0.005 ms (5.169 ms / 1000) 4.924 -> 5.042 ( +2.40%) [ +6.01% +8.00% +0.00% / +6.62% +3.86% +2.40%] index_select random_sorted : Elapsed 0.005 ms (5.220 ms / 1000) B = [3, 5] (stride (1, 3)) dim = 0 fill_cnt = 2 4.186 -> 4.270 ( +2.01%) [ +1.34% +4.20% +0.00% / +2.29% +2.01% +3.54%] index_fill_ const : Elapsed 0.004 ms (4.242 ms / 1000) 4.244 -> 4.336 ( +2.17%) [ +0.00% +2.54% +2.54% / +4.08% +2.50% +2.17%] index_fill_ linear : Elapsed 0.004 ms (4.244 ms / 1000) 4.177 -> 4.247 ( +1.68%) [ +3.73% +5.10% +0.00% / +2.82% +1.80% +1.68%] index_fill_ reverse : Elapsed 0.004 ms (4.333 ms / 1000) 4.190 -> 4.263 ( +1.74%) [ +0.74% +3.94% +0.00% / +2.29% +2.32% +1.74%] index_fill_ skip64 : Elapsed 0.004 ms (4.221 ms / 1000) 4.175 -> 4.235 ( +1.44%) [ +5.01% +5.27% +0.00% / +1.44% +4.48% +3.19%] index_fill_ skip256 : Elapsed 0.004 ms (4.384 ms / 1000) 4.367 -> 4.284 ( -1.90%) [ +0.55% +0.00% +0.30% / -1.76% -1.90% -1.90%] index_fill_ spread : Elapsed 0.004 ms (4.391 ms / 1000) 4.170 -> 4.227 ( +1.37%) [ +4.39% +4.82% +0.00% / +2.71% +1.37% +2.06%] index_fill_ random : Elapsed 0.004 ms (4.353 ms / 1000) 4.244 -> 4.241 ( -0.07%) [ +2.52% +3.18% +0.00% / +1.41% -0.07% +5.14%] index_fill_ random_sorted : Elapsed 0.004 ms (4.351 ms / 1000) 4.256 -> 4.316 ( +1.41%) [ +0.00% +2.42% +1.50% / +1.41% +2.84% +2.00%] index_fill_ perm : Elapsed 0.004 ms (4.256 ms / 1000) 4.185 -> 4.267 ( +1.96%) [ +0.29% +3.89% +0.00% / +1.96% +1.96% +3.44%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.197 ms / 1000) B = [3, 5] (stride (1, 3)) A = [2, 5] (stride (5, 1)) dim = 0 4.471 -> 4.546 ( +1.68%) [ +0.00% +4.41% +0.29% / +3.11% +2.28% +1.68%] index_add_ linear : Elapsed 0.004 ms (4.471 ms / 1000) 4.460 -> 4.476 ( +0.36%) [ +1.05% +4.33% +0.00% / +3.77% +3.52% +0.36%] index_copy_ linear : Elapsed 0.005 ms (4.507 ms / 1000) 4.514 -> 4.503 ( -0.24%) [ +0.00% +2.61% +3.99% / +4.65% -0.24% -0.09%] index_add_ reverse : Elapsed 0.005 ms (4.514 ms / 1000) 4.460 -> 4.460 ( +0.00%) [ +1.14% +3.16% +0.00% / +2.29% +0.25% +0.00%] index_copy_ reverse : Elapsed 0.005 ms (4.511 ms / 1000) 4.483 -> 4.450 ( -0.74%) [ +0.60% +5.15% +0.00% / +3.12% -0.74% -0.40%] index_add_ spread : Elapsed 0.005 ms (4.510 ms / 1000) 4.516 -> 4.467 ( -1.09%) [ +0.00% +2.35% +0.86% / +1.90% +2.52% -1.09%] index_copy_ spread : Elapsed 0.005 ms (4.516 ms / 1000) 4.456 -> 4.475 ( +0.43%) [ +0.00% +5.05% +4.42% / +3.14% +0.43% +5.57%] index_add_ perm : Elapsed 0.004 ms (4.456 ms / 1000) 4.435 -> 4.456 ( +0.47%) [ +1.87% +5.37% +0.00% / +3.81% +0.47% +0.92%] index_copy_ perm : Elapsed 0.005 ms (4.518 ms / 1000) 4.494 -> 4.484 ( -0.22%) [ +0.00% +3.87% +2.63% / +1.89% +0.45% -0.22%] index_add_ perm_sorted : Elapsed 0.004 ms (4.494 ms / 1000) 4.471 -> 4.528 ( +1.27%) [ +0.00% +3.76% +2.48% / +3.44% +1.27% +1.99%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.471 ms / 1000) 4.935 -> 4.970 ( +0.71%) [ +6.00% +3.59% +0.00% / +3.49% +0.71% +2.70%] index_select const : Elapsed 0.005 ms (5.231 ms / 1000) 4.927 -> 4.947 ( +0.41%) [ +4.97% +3.47% +0.00% / +3.59% +0.41% +2.09%] index_select wrap : Elapsed 0.005 ms (5.172 ms / 1000) 5.040 -> 5.078 ( +0.75%) [ +3.02% +1.39% +0.00% / +2.30% +2.14% +0.75%] index_select linear : Elapsed 0.005 ms (5.192 ms / 1000) 4.967 -> 4.968 ( +0.02%) [ +4.17% +3.78% +0.00% / +3.04% +0.74% +0.02%] index_select reverse : Elapsed 0.005 ms (5.174 ms / 1000) 4.956 -> 5.014 ( +1.17%) [ +5.33% +2.80% +0.00% / +2.60% +1.39% +1.17%] index_select skip64 : Elapsed 0.005 ms (5.220 ms / 1000) 4.973 -> 5.078 ( +2.11%) [ +3.76% +3.50% +0.00% / +3.72% +3.14% +2.11%] index_select skip256 : Elapsed 0.005 ms (5.160 ms / 1000) 5.063 -> 4.983 ( -1.58%) [ +3.22% +1.15% +0.00% / -0.22% -1.58% -1.03%] index_select spread : Elapsed 0.005 ms (5.226 ms / 1000) 4.955 -> 4.965 ( +0.20%) [ +3.81% +8.66% +0.00% / +2.85% +0.20% +4.90%] index_select random : Elapsed 0.005 ms (5.144 ms / 1000) 4.952 -> 5.098 ( +2.95%) [ +4.56% +3.35% +0.00% / +3.39% +3.39% +2.95%] index_select random_sorted : Elapsed 0.005 ms (5.178 ms / 1000) B = [3, 5] (stride (1, 3)) A = [2, 5] (stride (1, 2)) dim = 0 4.593 -> 4.443 ( -3.27%) [ +0.00% +2.29% +0.17% / -0.46% -2.24% -3.27%] index_add_ linear : Elapsed 0.005 ms (4.593 ms / 1000) 4.492 -> 4.438 ( -1.20%) [ +4.45% +3.01% +0.00% / +2.67% -0.07% -1.20%] index_copy_ linear : Elapsed 0.005 ms (4.692 ms / 1000) 4.450 -> 4.444 ( -0.13%) [ +2.47% +5.46% +0.00% / +4.70% +2.61% -0.13%] index_add_ reverse : Elapsed 0.005 ms (4.560 ms / 1000) 4.478 -> 4.535 ( +1.27%) [ +1.09% +3.62% +0.00% / +5.16% +1.94% +1.27%] index_copy_ reverse : Elapsed 0.005 ms (4.527 ms / 1000) 4.462 -> 4.459 ( -0.07%) [ +0.00% +6.36% +2.91% / +3.38% -0.07% +5.06%] index_add_ spread : Elapsed 0.004 ms (4.462 ms / 1000) 4.493 -> 4.457 ( -0.80%) [ +0.58% +3.69% +0.00% / +2.23% -0.80% -0.42%] index_copy_ spread : Elapsed 0.005 ms (4.519 ms / 1000) 4.459 -> 4.537 ( +1.75%) [ +0.00% +5.14% +9.44% / +4.31% +2.96% +1.75%] index_add_ perm : Elapsed 0.004 ms (4.459 ms / 1000) 4.523 -> 4.449 ( -1.64%) [ +0.00% +2.45% +1.33% / +1.19% -1.59% -1.64%] index_copy_ perm : Elapsed 0.005 ms (4.523 ms / 1000) 4.416 -> 4.462 ( +1.04%) [ +0.00% +5.93% +2.63% / +7.25% +1.04% +1.06%] index_add_ perm_sorted : Elapsed 0.004 ms (4.416 ms / 1000) 4.497 -> 4.457 ( -0.89%) [ +1.65% +5.98% +0.00% / +2.27% -0.89% +1.62%] index_copy_ perm_sorted : Elapsed 0.005 ms (4.571 ms / 1000) 5.118 -> 5.095 ( -0.45%) [ +2.23% +1.33% +0.00% / -0.21% -0.45% -0.29%] index_select const : Elapsed 0.005 ms (5.232 ms / 1000) 4.915 -> 5.006 ( +1.85%) [ +6.25% +6.08% +0.00% / +3.44% +1.85% +2.54%] index_select wrap : Elapsed 0.005 ms (5.222 ms / 1000) 4.966 -> 5.006 ( +0.81%) [ +8.74% +3.79% +0.00% / +2.15% +0.81% +5.30%] index_select linear : Elapsed 0.005 ms (5.400 ms / 1000) 5.033 -> 5.094 ( +1.21%) [ +3.02% +1.45% +0.00% / +1.21% +2.28% +2.66%] index_select reverse : Elapsed 0.005 ms (5.185 ms / 1000) 4.964 -> 4.963 ( -0.02%) [ +5.80% +3.59% +0.00% / +8.72% -0.02% +4.27%] index_select skip64 : Elapsed 0.005 ms (5.252 ms / 1000) 4.953 -> 4.949 ( -0.08%) [ +5.88% +4.44% +0.00% / +1.74% -0.08% +1.37%] index_select skip256 : Elapsed 0.005 ms (5.244 ms / 1000) 5.066 -> 5.083 ( +0.34%) [ +2.31% +0.00% +0.00% / +0.41% +0.34% +0.51%] index_select spread : Elapsed 0.005 ms (5.183 ms / 1000) 5.002 -> 5.000 ( -0.04%) [ +2.74% +3.12% +0.00% / +1.28% -0.04% +0.70%] index_select random : Elapsed 0.005 ms (5.139 ms / 1000) 4.956 -> 5.011 ( +1.11%) [ +4.02% +3.97% +0.00% / +2.64% +1.11% +2.34%] index_select random_sorted : Elapsed 0.005 ms (5.155 ms / 1000) out_shape = [2, 3] in_shape = [2, 5] idx_dim = 1 B = [2, 3] (stride (3, 1)) dim = 1 fill_cnt = 5 4.286 -> 4.272 ( -0.33%) [ +1.21% +1.59% +0.00% / -0.33% +3.45% +1.59%] index_fill_ const : Elapsed 0.004 ms (4.338 ms / 1000) 4.348 -> 4.239 ( -2.51%) [ +0.39% +1.20% +0.00% / -2.21% -0.74% -2.51%] index_fill_ linear : Elapsed 0.004 ms (4.365 ms / 1000) 4.185 -> 4.288 ( +2.46%) [ +1.86% +4.90% +0.00% / +2.46% +5.97% +3.42%] index_fill_ reverse : Elapsed 0.004 ms (4.263 ms / 1000) 4.214 -> 4.268 ( +1.28%) [ +4.27% +3.44% +0.00% / +1.99% +2.82% +1.28%] index_fill_ skip64 : Elapsed 0.004 ms (4.394 ms / 1000) 4.287 -> 4.318 ( +0.72%) [ +0.00% +2.17% +4.71% / +0.75% +3.27% +0.72%] index_fill_ skip256 : Elapsed 0.004 ms (4.287 ms / 1000) 4.185 -> 4.242 ( +1.36%) [ +2.49% +4.97% +0.00% / +1.36% +3.46% +1.84%] index_fill_ spread : Elapsed 0.004 ms (4.289 ms / 1000) 4.184 -> 4.243 ( +1.41%) [ +5.26% +5.47% +0.00% / +1.41% +2.92% +4.02%] index_fill_ random : Elapsed 0.004 ms (4.404 ms / 1000) 4.260 -> 4.286 ( +0.61%) [ +0.00% +3.38% +2.07% / +0.63% +3.40% +0.61%] index_fill_ random_sorted : Elapsed 0.004 ms (4.260 ms / 1000) B = [2, 3] (stride (3, 1)) A = [2, 5] (stride (5, 1)) dim = 1 5.096 -> 5.003 ( -1.82%) [ +0.00% +0.51% +0.55% / -1.39% -1.82% +0.59%] index_select const : Elapsed 0.005 ms (5.096 ms / 1000) 4.901 -> 4.940 ( +0.80%) [ +4.37% +4.82% +0.00% / +3.10% +0.80% +6.75%] index_select wrap : Elapsed 0.005 ms (5.115 ms / 1000) 4.959 -> 5.041 ( +1.65%) [ +3.25% +2.66% +0.00% / +3.02% +1.65% +3.17%] index_select linear : Elapsed 0.005 ms (5.120 ms / 1000) 5.100 -> 5.065 ( -0.69%) [ +0.51% +3.45% +0.00% / -0.37% -0.69% -0.41%] index_select reverse : Elapsed 0.005 ms (5.126 ms / 1000) 4.923 -> 4.965 ( +0.85%) [ +4.43% +4.10% +0.00% / +3.07% +0.85% +3.03%] index_select skip64 : Elapsed 0.005 ms (5.141 ms / 1000) 4.949 -> 5.012 ( +1.27%) [ +3.68% +2.42% +0.00% / +3.37% +1.27% +2.89%] index_select skip256 : Elapsed 0.005 ms (5.131 ms / 1000) 5.102 -> 5.051 ( -1.00%) [ +0.74% +0.08% +0.00% / -1.00% +1.51% -0.37%] index_select spread : Elapsed 0.005 ms (5.140 ms / 1000) 4.968 -> 5.044 ( +1.53%) [ +3.72% +2.36% +0.00% / +2.29% +1.53% +1.75%] index_select strided 3 : Elapsed 0.005 ms (5.153 ms / 1000) 4.976 -> 4.988 ( +0.24%) [ +3.84% +2.93% +0.00% / +2.49% +0.24% +0.28%] index_select random : Elapsed 0.005 ms (5.167 ms / 1000) 5.036 -> 5.071 ( +0.69%) [ +1.77% +1.71% +0.00% / +0.69% +1.97% +0.75%] index_select random_sorted : Elapsed 0.005 ms (5.125 ms / 1000) 5.019 -> 5.013 ( -0.12%) [ +1.91% +2.63% +0.00% / +0.72% +0.22% -0.12%] index_select perm : Elapsed 0.005 ms (5.115 ms / 1000) 4.988 -> 5.013 ( +0.50%) [ +2.81% +1.20% +0.00% / +2.13% +0.50% +0.72%] index_select perm_sorted : Elapsed 0.005 ms (5.128 ms / 1000) B = [2, 3] (stride (3, 1)) A = [2, 5] (stride (1, 2)) dim = 1 4.976 -> 5.096 ( +2.41%) [ +4.34% +3.52% +0.00% / +3.07% +3.16% +2.41%] index_select const : Elapsed 0.005 ms (5.192 ms / 1000) 5.051 -> 5.059 ( +0.16%) [ +1.96% +1.17% +0.00% / +3.13% +0.69% +0.16%] index_select wrap : Elapsed 0.005 ms (5.150 ms / 1000) 4.919 -> 5.059 ( +2.85%) [ +7.38% +3.27% +0.00% / +3.11% +4.23% +2.85%] index_select linear : Elapsed 0.005 ms (5.282 ms / 1000) bad 4.930 -> 5.214 ( +5.76%) [ +4.20% +8.38% +0.00% / +6.55% +8.30% +5.76%] index_select reverse : Elapsed 0.005 ms (5.137 ms / 1000) 5.096 -> 4.980 ( -2.28%) [ +1.04% +1.22% +0.00% / +0.37% -2.28% -1.26%] index_select skip64 : Elapsed 0.005 ms (5.149 ms / 1000) 4.955 -> 4.981 ( +0.52%) [ +4.00% +4.04% +0.00% / +2.56% +0.52% +2.06%] index_select skip256 : Elapsed 0.005 ms (5.153 ms / 1000) 4.966 -> 5.084 ( +2.38%) [ +3.75% +3.75% +0.00% / +2.82% +2.38% +3.54%] index_select spread : Elapsed 0.005 ms (5.152 ms / 1000) 5.123 -> 4.940 ( -3.57%) [ +0.74% +0.00% +0.86% / -1.76% -3.57% +2.44%] index_select strided 3 : Elapsed 0.005 ms (5.161 ms / 1000) 4.944 -> 5.013 ( +1.40%) [ +4.27% +3.48% +0.00% / +3.03% +1.40% +5.56%] index_select random : Elapsed 0.005 ms (5.155 ms / 1000) 4.948 -> 5.101 ( +3.09%) [ +5.19% +4.67% +0.00% / +3.13% +3.09% +3.90%] index_select random_sorted : Elapsed 0.005 ms (5.205 ms / 1000) 5.040 -> 5.002 ( -0.75%) [ +2.82% +2.62% +0.00% / +0.28% -0.75% -0.14%] index_select perm : Elapsed 0.005 ms (5.182 ms / 1000) 4.920 -> 5.007 ( +1.77%) [ +4.72% +8.52% +0.00% / +3.29% +1.77% +2.50%] index_select perm_sorted : Elapsed 0.005 ms (5.152 ms / 1000) B = [2, 3] (stride (1, 2)) dim = 1 fill_cnt = 5 4.183 -> 4.258 ( +1.79%) [ +4.97% +11.31% +0.00% / +3.94% +3.32% +1.79%] index_fill_ const : Elapsed 0.004 ms (4.391 ms / 1000) 4.214 -> 4.285 ( +1.68%) [ +5.20% +10.61% +0.00% / +1.68% +2.33% +6.57%] index_fill_ linear : Elapsed 0.004 ms (4.433 ms / 1000) 4.361 -> 4.300 ( -1.40%) [ +1.81% +4.98% +0.00% / -1.40% +1.01% -1.15%] index_fill_ reverse : Elapsed 0.004 ms (4.440 ms / 1000) 4.254 -> 4.248 ( -0.14%) [ +0.00% +5.12% +0.71% / +0.35% +1.20% -0.14%] index_fill_ skip64 : Elapsed 0.004 ms (4.254 ms / 1000) 4.189 -> 4.279 ( +2.15%) [ +1.93% +8.93% +0.00% / +2.15% +5.90% +4.94%] index_fill_ skip256 : Elapsed 0.004 ms (4.270 ms / 1000) 4.298 -> 4.273 ( -0.58%) [ +0.00% +2.12% +1.40% / -0.58% -0.12% -0.05%] index_fill_ spread : Elapsed 0.004 ms (4.298 ms / 1000) 4.214 -> 4.287 ( +1.73%) [ +1.59% +9.49% +0.00% / +1.73% +1.83% +2.75%] index_fill_ random : Elapsed 0.004 ms (4.281 ms / 1000) 4.233 -> 4.234 ( +0.02%) [ +3.33% +6.14% +0.00% / +2.01% +0.02% +1.16%] index_fill_ random_sorted : Elapsed 0.004 ms (4.374 ms / 1000) B = [2, 3] (stride (1, 2)) A = [2, 5] (stride (5, 1)) dim = 1 4.946 -> 5.092 ( +2.95%) [ +3.42% +8.25% +0.00% / +2.95% +6.49% +4.65%] index_select const : Elapsed 0.005 ms (5.115 ms / 1000) 5.044 -> 5.076 ( +0.63%) [ +2.54% +6.54% +0.00% / +1.07% +0.85% +0.63%] index_select wrap : Elapsed 0.005 ms (5.172 ms / 1000) 4.934 -> 5.097 ( +3.30%) [ +3.51% +5.65% +0.00% / +3.77% +3.38% +3.30%] index_select linear : Elapsed 0.005 ms (5.107 ms / 1000) 4.920 -> 5.128 ( +4.23%) [ +4.17% +3.13% +0.00% / +4.23% +8.31% +7.44%] index_select reverse : Elapsed 0.005 ms (5.125 ms / 1000) 5.127 -> 5.055 ( -1.40%) [ +0.57% +0.64% +0.00% / -0.49% -1.40% -1.03%] index_select skip64 : Elapsed 0.005 ms (5.156 ms / 1000) 4.963 -> 5.016 ( +1.07%) [ +4.07% +7.60% +0.00% / +4.39% +1.07% +1.49%] index_select skip256 : Elapsed 0.005 ms (5.165 ms / 1000) 5.055 -> 5.101 ( +0.91%) [ +3.28% +1.09% +0.00% / +0.91% +0.97% +1.29%] index_select spread : Elapsed 0.005 ms (5.221 ms / 1000) 5.088 -> 4.945 ( -2.81%) [ +2.32% +3.18% +0.00% / +0.85% -2.81% +0.71%] index_select strided 3 : Elapsed 0.005 ms (5.206 ms / 1000) 4.960 -> 5.099 ( +2.80%) [ +3.27% +2.82% +0.00% / +3.04% +4.29% +2.80%] index_select random : Elapsed 0.005 ms (5.122 ms / 1000) 4.925 -> 5.141 ( +4.39%) [ +4.20% +3.43% +0.00% / +4.41% +5.50% +4.39%] index_select random_sorted : Elapsed 0.005 ms (5.132 ms / 1000) 5.111 -> 5.096 ( -0.29%) [ +0.63% +0.12% +0.00% / +2.84% +1.37% -0.29%] index_select perm : Elapsed 0.005 ms (5.143 ms / 1000) 4.929 -> 5.086 ( +3.19%) [ +3.57% +2.17% +0.00% / +4.59% +5.32% +3.19%] index_select perm_sorted : Elapsed 0.005 ms (5.105 ms / 1000) B = [2, 3] (stride (1, 2)) A = [2, 5] (stride (1, 2)) dim = 1 4.971 -> 5.099 ( +2.57%) [ +3.28% +2.31% +0.00% / +5.55% +3.44% +2.57%] index_select const : Elapsed 0.005 ms (5.134 ms / 1000) 5.144 -> 5.069 ( -1.46%) [ +3.99% +0.00% +0.21% / -0.64% -1.46% +1.21%] index_select wrap : Elapsed 0.005 ms (5.349 ms / 1000) 5.012 -> 5.011 ( -0.02%) [ +2.15% +2.73% +0.00% / +4.77% -0.02% +1.70%] index_select linear : Elapsed 0.005 ms (5.120 ms / 1000) 4.930 -> 5.105 ( +3.55%) [ +5.01% +4.10% +0.00% / +4.22% +3.55% +4.73%] index_select reverse : Elapsed 0.005 ms (5.177 ms / 1000) 5.024 -> 4.971 ( -1.05%) [ +2.47% +1.29% +0.00% / +1.05% -1.05% +0.24%] index_select skip64 : Elapsed 0.005 ms (5.148 ms / 1000) 4.998 -> 4.973 ( -0.50%) [ +5.90% +2.50% +0.00% / +1.90% -0.50% +3.74%] index_select skip256 : Elapsed 0.005 ms (5.293 ms / 1000) 4.983 -> 5.101 ( +2.37%) [ +4.48% +3.75% +0.00% / +2.39% +4.07% +2.37%] index_select spread : Elapsed 0.005 ms (5.206 ms / 1000) 4.917 -> 5.021 ( +2.12%) [+12.69% +3.46% +0.00% / +2.56% +2.12% +3.09%] index_select strided 3 : Elapsed 0.006 ms (5.541 ms / 1000) 5.045 -> 5.009 ( -0.71%) [ +5.29% +0.00% +0.67% / +0.02% +0.79% -0.71%] index_select random : Elapsed 0.005 ms (5.312 ms / 1000) 4.892 -> 5.074 ( +3.72%) [ +8.50% +4.95% +0.00% / +3.72% +4.97% +5.68%] index_select random_sorted : Elapsed 0.005 ms (5.308 ms / 1000) 4.923 -> 4.990 ( +1.36%) [ +5.26% +3.86% +0.00% / +3.82% +1.36% +3.80%] index_select perm : Elapsed 0.005 ms (5.182 ms / 1000) 5.108 -> 5.010 ( -1.92%) [ +3.31% +0.00% +0.96% / -0.63% -1.92% -0.84%] index_select perm_sorted : Elapsed 0.005 ms (5.277 ms / 1000) out_shape = [3, 2] in_shape = [5, 2] idx_dim = 0 B = [3, 2] (stride (2, 1)) dim = 0 fill_cnt = 5 4.223 -> 4.303 ( +1.89%) [ +0.26% +3.48% +0.00% / +1.89% +2.42% +2.51%] index_fill_ const : Elapsed 0.004 ms (4.234 ms / 1000) 4.241 -> 4.240 ( -0.02%) [ +0.14% +2.83% +0.00% / -0.02% +2.95% +2.36%] index_fill_ linear : Elapsed 0.004 ms (4.247 ms / 1000) 4.359 -> 4.234 ( -2.87%) [ +0.00% +1.03% +0.34% / -1.08% -2.87% -1.95%] index_fill_ reverse : Elapsed 0.004 ms (4.359 ms / 1000) 4.225 -> 4.256 ( +0.73%) [ +2.20% +4.19% +0.00% / +1.33% +1.73% +0.73%] index_fill_ skip64 : Elapsed 0.004 ms (4.318 ms / 1000) 4.223 -> 4.318 ( +2.25%) [ +0.00% +6.51% +0.78% / +2.25% +4.45% +4.29%] index_fill_ skip256 : Elapsed 0.004 ms (4.223 ms / 1000) 4.216 -> 4.258 ( +1.00%) [ +0.21% +5.38% +0.00% / +1.45% +1.00% +2.11%] index_fill_ spread : Elapsed 0.004 ms (4.225 ms / 1000) 4.235 -> 4.243 ( +0.19%) [ +0.00% +3.64% +4.20% / +0.19% +0.76% +1.82%] index_fill_ random : Elapsed 0.004 ms (4.235 ms / 1000) 4.211 -> 4.306 ( +2.26%) [ +3.42% +4.77% +0.00% / +2.26% +2.28% +3.09%] index_fill_ random_sorted : Elapsed 0.004 ms (4.355 ms / 1000) B = [3, 2] (stride (2, 1)) A = [5, 2] (stride (2, 1)) dim = 0 4.907 -> 5.077 ( +3.46%) [ +6.17% +3.87% +0.00% / +7.54% +5.85% +3.46%] index_select const : Elapsed 0.005 ms (5.210 ms / 1000) 5.029 -> 4.994 ( -0.70%) [ +6.10% +1.33% +0.00% / +2.07% -0.70% +0.80%] index_select wrap : Elapsed 0.005 ms (5.336 ms / 1000) 4.959 -> 4.961 ( +0.04%) [ +4.46% +3.35% +0.00% / +3.17% +0.04% +3.15%] index_select linear : Elapsed 0.005 ms (5.180 ms / 1000) 4.945 -> 5.094 ( +3.01%) [ +5.48% +3.13% +0.00% / +3.01% +3.70% +3.72%] index_select reverse : Elapsed 0.005 ms (5.216 ms / 1000) 4.972 -> 4.991 ( +0.38%) [ +5.67% +2.82% +0.00% / +2.19% +0.38% +1.87%] index_select skip64 : Elapsed 0.005 ms (5.254 ms / 1000) 5.043 -> 4.997 ( -0.91%) [ +1.96% +2.14% +0.00% / +0.89% -0.91% +1.33%] index_select skip256 : Elapsed 0.005 ms (5.142 ms / 1000) 4.956 -> 5.119 ( +3.29%) [ +5.61% +2.06% +0.00% / +5.19% +4.62% +3.29%] index_select spread : Elapsed 0.005 ms (5.234 ms / 1000) 4.912 -> 5.010 ( +2.00%) [ +4.62% +3.68% +0.00% / +3.46% +2.00% +3.89%] index_select strided 3 : Elapsed 0.005 ms (5.139 ms / 1000) 5.110 -> 5.004 ( -2.07%) [ +0.92% +2.47% +0.00% / -0.23% -2.07% +0.08%] index_select random : Elapsed 0.005 ms (5.157 ms / 1000) 4.948 -> 5.114 ( +3.35%) [ +4.45% +2.32% +0.00% / +5.96% +3.35% +4.87%] index_select random_sorted : Elapsed 0.005 ms (5.168 ms / 1000) 4.920 -> 4.988 ( +1.38%) [ +5.85% +3.05% +0.00% / +3.11% +1.38% +3.25%] index_select perm : Elapsed 0.005 ms (5.208 ms / 1000) 5.110 -> 5.008 ( -2.00%) [ +0.53% +4.23% +0.00% / -0.14% -2.00% -1.70%] index_select perm_sorted : Elapsed 0.005 ms (5.137 ms / 1000) B = [3, 2] (stride (2, 1)) A = [5, 2] (stride (1, 5)) dim = 0 4.968 -> 5.105 ( +2.76%) [ +4.29% +2.68% +0.00% / +2.86% +2.76% +4.01%] index_select const : Elapsed 0.005 ms (5.181 ms / 1000) 4.955 -> 5.010 ( +1.11%) [ +3.94% +3.35% +0.00% / +2.30% +1.11% +2.81%] index_select wrap : Elapsed 0.005 ms (5.150 ms / 1000) 5.062 -> 5.019 ( -0.85%) [ +6.62% +0.00% +0.99% / +0.53% -0.85% +0.51%] index_select linear : Elapsed 0.005 ms (5.397 ms / 1000) 4.939 -> 5.024 ( +1.72%) [ +5.51% +3.28% +0.00% / +3.64% +1.72% +6.84%] index_select reverse : Elapsed 0.005 ms (5.211 ms / 1000) 4.957 -> 5.035 ( +1.57%) [ +4.88% +3.35% +0.00% / +2.80% +2.99% +1.57%] index_select skip64 : Elapsed 0.005 ms (5.199 ms / 1000) 5.069 -> 5.008 ( -1.20%) [ +2.80% +0.30% +0.00% / +0.26% -1.20% +6.00%] index_select skip256 : Elapsed 0.005 ms (5.211 ms / 1000) 4.937 -> 4.952 ( +0.30%) [ +5.04% +3.46% +0.00% / +3.52% +0.30% +8.67%] index_select spread : Elapsed 0.005 ms (5.186 ms / 1000) 4.944 -> 5.120 ( +3.56%) [ +5.20% +3.64% +0.00% / +3.56% +3.64% +4.81%] index_select strided 3 : Elapsed 0.005 ms (5.201 ms / 1000) 5.059 -> 4.977 ( -1.62%) [ +1.05% +0.93% +0.00% / +1.13% -1.62% +1.82%] index_select random : Elapsed 0.005 ms (5.112 ms / 1000) 4.910 -> 4.961 ( +1.04%) [ +4.38% +4.48% +0.00% / +4.42% +1.04% +9.55%] index_select random_sorted : Elapsed 0.005 ms (5.125 ms / 1000) 4.920 -> 5.086 ( +3.37%) [ +4.41% +3.46% +0.00% / +3.37% +4.59% +3.84%] index_select perm : Elapsed 0.005 ms (5.137 ms / 1000) 4.948 -> 4.923 ( -0.51%) [ +3.68% +5.98% +0.00% / +3.66% -0.51% +2.28%] index_select perm_sorted : Elapsed 0.005 ms (5.130 ms / 1000) B = [3, 2] (stride (1, 3)) dim = 0 fill_cnt = 5 4.227 -> 4.268 ( +0.97%) [ +0.00% +3.86% +2.79% / +1.30% +0.97% +1.49%] index_fill_ const : Elapsed 0.004 ms (4.227 ms / 1000) 4.184 -> 4.246 ( +1.48%) [ +1.98% +4.76% +0.00% / +1.48% +6.62% +8.46%] index_fill_ linear : Elapsed 0.004 ms (4.267 ms / 1000) 4.226 -> 4.241 ( +0.35%) [ +8.19% +4.16% +0.00% / +0.99% +0.35% +1.59%] index_fill_ reverse : Elapsed 0.005 ms (4.572 ms / 1000) 4.250 -> 4.264 ( +0.33%) [ +0.00% +2.92% +3.04% / +0.33% +2.82% +0.61%] index_fill_ skip64 : Elapsed 0.004 ms (4.250 ms / 1000) 4.202 -> 4.305 ( +2.45%) [ +4.16% +3.93% +0.00% / +2.78% +2.45% +2.69%] index_fill_ skip256 : Elapsed 0.004 ms (4.377 ms / 1000) 4.236 -> 4.243 ( +0.17%) [ +0.00% +3.21% +0.28% / +0.17% +3.75% +0.17%] index_fill_ spread : Elapsed 0.004 ms (4.236 ms / 1000) 4.269 -> 4.261 ( -0.19%) [ +0.00% +1.87% +0.23% / +0.54% +0.84% -0.19%] index_fill_ random : Elapsed 0.004 ms (4.269 ms / 1000) 4.287 -> 4.249 ( -0.89%) [ +0.00% +2.10% +3.10% / -0.89% -0.16% +0.65%] index_fill_ random_sorted : Elapsed 0.004 ms (4.287 ms / 1000) B = [3, 2] (stride (1, 3)) A = [5, 2] (stride (2, 1)) dim = 0 4.927 -> 5.139 ( +4.30%) [ +4.99% +4.12% +0.00% / +6.31% +4.30% +4.59%] index_select const : Elapsed 0.005 ms (5.173 ms / 1000) 4.962 -> 4.998 ( +0.73%) [ +3.47% +2.70% +0.00% / +5.66% +0.73% +5.40%] index_select wrap : Elapsed 0.005 ms (5.134 ms / 1000) 5.076 -> 5.023 ( -1.04%) [ +1.08% +2.09% +0.00% / -0.06% -1.04% +0.32%] index_select linear : Elapsed 0.005 ms (5.131 ms / 1000) 4.948 -> 5.058 ( +2.22%) [ +4.22% +3.29% +0.00% / +2.81% +2.22% +2.83%] index_select reverse : Elapsed 0.005 ms (5.157 ms / 1000) 4.930 -> 5.039 ( +2.21%) [ +5.27% +3.20% +0.00% / +5.21% +2.45% +2.21%] index_select skip64 : Elapsed 0.005 ms (5.190 ms / 1000) 5.111 -> 4.976 ( -2.64%) [ +0.98% +0.29% +0.00% / -0.74% -2.64% -1.64%] index_select skip256 : Elapsed 0.005 ms (5.161 ms / 1000) 4.947 -> 5.030 ( +1.68%) [ +4.35% +2.83% +0.00% / +3.23% +1.68% +1.92%] index_select spread : Elapsed 0.005 ms (5.162 ms / 1000) 4.972 -> 5.006 ( +0.68%) [ +4.59% +2.13% +0.00% / +2.39% +0.68% +1.55%] index_select strided 3 : Elapsed 0.005 ms (5.200 ms / 1000) 5.006 -> 4.976 ( -0.60%) [ +3.58% +2.92% +0.00% / +2.14% +0.98% -0.60%] index_select random : Elapsed 0.005 ms (5.185 ms / 1000) 4.927 -> 5.012 ( +1.73%) [ +7.18% +3.19% +0.00% / +3.53% +1.73% +4.06%] index_select random_sorted : Elapsed 0.005 ms (5.281 ms / 1000) 4.972 -> 5.043 ( +1.43%) [ +3.70% +2.19% +0.00% / +2.15% +3.20% +1.43%] index_select perm : Elapsed 0.005 ms (5.156 ms / 1000) 4.943 -> 5.000 ( +1.15%) [ +5.18% +3.08% +0.00% / +3.32% +1.15% +2.00%] index_select perm_sorted : Elapsed 0.005 ms (5.199 ms / 1000) B = [3, 2] (stride (1, 3)) A = [5, 2] (stride (1, 5)) dim = 0 5.129 -> 5.024 ( -2.05%) [ +1.87% +0.00% +0.12% / +0.21% -2.05% +0.02%] index_select const : Elapsed 0.005 ms (5.225 ms / 1000) 4.963 -> 5.071 ( +2.18%) [ +9.55% +2.22% +0.00% / +2.18% +3.12% +3.02%] index_select wrap : Elapsed 0.005 ms (5.437 ms / 1000) 4.969 -> 4.981 ( +0.24%) [ +3.84% +2.54% +0.00% / +1.43% +0.24% +2.76%] index_select linear : Elapsed 0.005 ms (5.160 ms / 1000) 5.081 -> 4.979 ( -2.01%) [ +2.03% +0.00% +0.87% / +4.98% -2.01% +1.63%] index_select reverse : Elapsed 0.005 ms (5.184 ms / 1000) 4.951 -> 5.072 ( +2.44%) [ +4.52% +5.64% +0.00% / +2.65% +3.49% +2.44%] index_select skip64 : Elapsed 0.005 ms (5.175 ms / 1000) 5.073 -> 5.005 ( -1.34%) [ +2.09% +0.34% +0.00% / -0.28% -1.34% -0.10%] index_select skip256 : Elapsed 0.005 ms (5.179 ms / 1000) 5.046 -> 5.027 ( -0.38%) [ +1.92% +1.82% +0.00% / +1.98% -0.38% +2.72%] index_select spread : Elapsed 0.005 ms (5.143 ms / 1000) 4.934 -> 5.048 ( +2.31%) [ +4.18% +3.69% +0.00% / +2.96% +4.34% +2.31%] index_select strided 3 : Elapsed 0.005 ms (5.140 ms / 1000) 4.916 -> 5.018 ( +2.07%) [ +4.19% +3.36% +0.00% / +2.44% +2.07% +3.42%] index_select random : Elapsed 0.005 ms (5.122 ms / 1000) 5.078 -> 5.027 ( -1.00%) [ +1.54% +0.53% +0.00% / +0.22% -1.00% +3.74%] index_select random_sorted : Elapsed 0.005 ms (5.156 ms / 1000) 4.912 -> 5.062 ( +3.05%) [ +4.85% +4.44% +0.00% / +3.68% +3.05% +3.46%] index_select perm : Elapsed 0.005 ms (5.150 ms / 1000) 5.034 -> 5.005 ( -0.58%) [ +5.48% +0.91% +0.00% / +0.64% -0.58% +0.64%] index_select perm_sorted : Elapsed 0.005 ms (5.310 ms / 1000) out_shape = [5, 3] in_shape = [5, 2] idx_dim = 1 B = [5, 3] (stride (3, 1)) dim = 1 fill_cnt = 2 4.201 -> 4.283 ( +1.95%) [ +0.00% +5.26% +2.26% / +2.29% +1.95% +2.59%] index_fill_ const : Elapsed 0.004 ms (4.201 ms / 1000) 4.221 -> 4.276 ( +1.30%) [ +0.00% +2.80% +1.21% / +1.30% +5.83% +2.39%] index_fill_ linear : Elapsed 0.004 ms (4.221 ms / 1000) 4.200 -> 4.241 ( +0.98%) [ +0.19% +3.52% +0.00% / +0.98% +5.45% +3.10%] index_fill_ reverse : Elapsed 0.004 ms (4.208 ms / 1000) 4.229 -> 4.245 ( +0.38%) [ +0.00% +3.14% +1.25% / +0.38% +5.75% +0.71%] index_fill_ skip64 : Elapsed 0.004 ms (4.229 ms / 1000) 4.334 -> 4.261 ( -1.68%) [ +0.00% +1.18% +0.44% / -1.68% +3.07% -0.05%] index_fill_ skip256 : Elapsed 0.004 ms (4.334 ms / 1000) 4.204 -> 4.233 ( +0.69%) [ +0.00% +4.52% +0.14% / +0.69% +7.94% +2.47%] index_fill_ spread : Elapsed 0.004 ms (4.204 ms / 1000) 4.311 -> 4.217 ( -2.18%) [ +0.07% +4.78% +0.00% / -2.18% +2.39% -1.35%] index_fill_ random : Elapsed 0.004 ms (4.314 ms / 1000) 4.164 -> 4.318 ( +3.70%) [ +0.00% +5.57% +0.96% / +4.42% +7.23% +3.70%] index_fill_ random_sorted : Elapsed 0.004 ms (4.164 ms / 1000) 4.183 -> 4.279 ( +2.30%) [ +0.00% +4.23% +4.16% / +2.30% +4.97% +4.37%] index_fill_ perm : Elapsed 0.004 ms (4.183 ms / 1000) 4.224 -> 4.252 ( +0.66%) [ +4.59% +4.38% +0.00% / +0.66% +2.72% +3.41%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.418 ms / 1000) B = [5, 3] (stride (3, 1)) A = [5, 2] (stride (2, 1)) dim = 1 4.488 -> 4.492 ( +0.09%) [ +0.00% +3.74% +1.74% / +3.39% +0.71% +0.09%] index_add_ linear : Elapsed 0.004 ms (4.488 ms / 1000) 4.528 -> 4.458 ( -1.55%) [ +0.00% +1.68% +0.73% / +3.18% -1.55% +0.18%] index_copy_ linear : Elapsed 0.005 ms (4.528 ms / 1000) 4.515 -> 4.494 ( -0.47%) [ +0.00% +2.95% +2.48% / +1.44% +1.90% -0.47%] index_add_ reverse : Elapsed 0.005 ms (4.515 ms / 1000) 4.504 -> 4.460 ( -0.98%) [ +0.00% +2.62% +0.02% / +1.02% -0.80% -0.98%] index_copy_ reverse : Elapsed 0.005 ms (4.504 ms / 1000) 4.466 -> 4.476 ( +0.22%) [ +0.00% +6.83% +0.18% / +2.78% +0.22% +1.77%] index_add_ spread : Elapsed 0.004 ms (4.466 ms / 1000) 4.476 -> 4.477 ( +0.02%) [ +0.00% +5.38% +3.08% / +1.94% +0.02% +1.92%] index_copy_ spread : Elapsed 0.004 ms (4.476 ms / 1000) 4.482 -> 4.520 ( +0.85%) [ +1.87% +3.50% +0.00% / +0.85% +3.03% +0.87%] index_add_ perm : Elapsed 0.005 ms (4.566 ms / 1000) 4.427 -> 4.472 ( +1.02%) [ +0.00% +6.14% +0.68% / +2.28% +1.02% +2.28%] index_copy_ perm : Elapsed 0.004 ms (4.427 ms / 1000) 4.410 -> 4.505 ( +2.15%) [ +0.00% +7.37% +3.08% / +3.11% +2.68% +2.15%] index_add_ perm_sorted : Elapsed 0.004 ms (4.410 ms / 1000) 4.430 -> 4.504 ( +1.67%) [ +0.00% +4.63% +3.43% / +1.67% +4.79% +3.81%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.430 ms / 1000) 4.981 -> 5.113 ( +2.65%) [ +2.93% +3.01% +0.00% / +6.40% +3.27% +2.65%] index_select const : Elapsed 0.005 ms (5.127 ms / 1000) 4.931 -> 5.110 ( +3.63%) [ +3.75% +4.48% +0.00% / +4.40% +4.08% +3.63%] index_select wrap : Elapsed 0.005 ms (5.116 ms / 1000) 5.028 -> 5.092 ( +1.27%) [ +2.13% +2.43% +0.00% / +4.85% +1.27% +2.27%] index_select linear : Elapsed 0.005 ms (5.135 ms / 1000) 4.927 -> 5.001 ( +1.50%) [ +4.99% +4.85% +0.00% / +3.96% +1.50% +3.63%] index_select reverse : Elapsed 0.005 ms (5.173 ms / 1000) 4.918 -> 5.014 ( +1.95%) [ +8.38% +6.22% +0.00% / +2.95% +1.95% +4.11%] index_select skip64 : Elapsed 0.005 ms (5.330 ms / 1000) 5.115 -> 5.057 ( -1.13%) [ +1.21% +0.51% +0.00% / +0.27% -1.13% +0.66%] index_select skip256 : Elapsed 0.005 ms (5.177 ms / 1000) 5.080 -> 5.032 ( -0.94%) [ +1.83% +1.42% +0.00% / -0.94% -0.57% +0.61%] index_select spread : Elapsed 0.005 ms (5.173 ms / 1000) 4.962 -> 4.966 ( +0.08%) [ +4.35% +4.07% +0.00% / +1.73% +0.08% +3.43%] index_select random : Elapsed 0.005 ms (5.178 ms / 1000) 5.075 -> 4.995 ( -1.58%) [ +0.37% +1.46% +0.00% / -0.43% -1.58% +4.53%] index_select random_sorted : Elapsed 0.005 ms (5.094 ms / 1000) B = [5, 3] (stride (3, 1)) A = [5, 2] (stride (1, 5)) dim = 1 4.427 -> 4.496 ( +1.56%) [ +0.00% +5.51% +1.49% / +2.26% +3.43% +1.56%] index_add_ linear : Elapsed 0.004 ms (4.427 ms / 1000) 4.433 -> 4.430 ( -0.07%) [ +0.00% +3.90% +0.23% / +1.78% -0.07% +1.24%] index_copy_ linear : Elapsed 0.004 ms (4.433 ms / 1000) 4.390 -> 4.484 ( +2.14%) [ +0.00% +6.01% +2.07% / +4.83% +2.64% +2.14%] index_add_ reverse : Elapsed 0.004 ms (4.390 ms / 1000) 4.431 -> 4.544 ( +2.55%) [ +0.00% +3.75% +2.78% / +2.55% +3.75% +2.66%] index_copy_ reverse : Elapsed 0.004 ms (4.431 ms / 1000) 4.487 -> 4.499 ( +0.27%) [ +1.78% +4.50% +0.00% / +1.63% +0.27% +0.87%] index_add_ spread : Elapsed 0.005 ms (4.567 ms / 1000) 4.446 -> 4.445 ( -0.02%) [ +0.00% +4.27% +3.85% / +2.32% -0.02% +3.37%] index_copy_ spread : Elapsed 0.004 ms (4.446 ms / 1000) 4.583 -> 4.506 ( -1.68%) [ +0.00% +2.31% +1.20% / +0.17% -1.68% -1.18%] index_add_ perm : Elapsed 0.005 ms (4.583 ms / 1000) 4.473 -> 4.478 ( +0.11%) [ +2.24% +3.78% +0.00% / +1.45% +6.55% +0.11%] index_copy_ perm : Elapsed 0.005 ms (4.573 ms / 1000) 4.433 -> 4.496 ( +1.42%) [ +0.00% +6.02% +1.49% / +2.17% +1.94% +1.42%] index_add_ perm_sorted : Elapsed 0.004 ms (4.433 ms / 1000) 4.436 -> 4.468 ( +0.72%) [ +0.00% +4.15% +0.25% / +3.00% +0.72% +1.85%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.436 ms / 1000) 5.125 -> 5.098 ( -0.53%) [ +0.00% +0.00% +0.18% / -0.53% +2.87% +0.00%] index_select const : Elapsed 0.005 ms (5.125 ms / 1000) 4.956 -> 5.022 ( +1.33%) [ +3.37% +3.59% +0.00% / +3.01% +1.33% +3.33%] index_select wrap : Elapsed 0.005 ms (5.123 ms / 1000) 4.940 -> 5.013 ( +1.48%) [ +4.01% +2.57% +0.00% / +3.97% +1.48% +2.69%] index_select linear : Elapsed 0.005 ms (5.138 ms / 1000) 5.100 -> 5.104 ( +0.08%) [ +1.53% +0.00% +0.57% / +3.04% +0.41% +0.08%] index_select reverse : Elapsed 0.005 ms (5.178 ms / 1000) 4.970 -> 4.958 ( -0.24%) [ +5.09% +3.16% +0.00% / +1.91% -0.24% +2.47%] index_select skip64 : Elapsed 0.005 ms (5.223 ms / 1000) 4.901 -> 5.070 ( +3.45%) [ +5.43% +3.69% +0.00% / +5.33% +3.45% +4.20%] index_select skip256 : Elapsed 0.005 ms (5.167 ms / 1000) 5.141 -> 5.133 ( -0.16%) [ +0.88% +0.00% +1.30% / -0.16% +1.65% +0.62%] index_select spread : Elapsed 0.005 ms (5.186 ms / 1000) 4.937 -> 5.071 ( +2.71%) [ +4.98% +3.40% +0.00% / +2.71% +2.90% +2.71%] index_select random : Elapsed 0.005 ms (5.183 ms / 1000) 4.954 -> 5.078 ( +2.50%) [ +4.34% +2.99% +0.00% / +5.59% +2.99% +2.50%] index_select random_sorted : Elapsed 0.005 ms (5.169 ms / 1000) B = [5, 3] (stride (1, 5)) dim = 1 fill_cnt = 2 4.357 -> 4.262 ( -2.18%) [ +0.71% +0.71% +0.00% / -2.18% -0.18% +4.41%] index_fill_ const : Elapsed 0.004 ms (4.388 ms / 1000) 4.215 -> 4.268 ( +1.26%) [ +8.02% +4.82% +0.00% / +1.26% +2.59% +1.83%] index_fill_ linear : Elapsed 0.005 ms (4.553 ms / 1000) 4.221 -> 4.262 ( +0.97%) [ +0.28% +3.20% +0.00% / +0.97% +1.02% +2.06%] index_fill_ reverse : Elapsed 0.004 ms (4.233 ms / 1000) 4.216 -> 4.219 ( +0.07%) [ +0.00% +3.08% +0.38% / +0.78% +0.07% +3.37%] index_fill_ skip64 : Elapsed 0.004 ms (4.216 ms / 1000) 4.214 -> 4.275 ( +1.45%) [ +0.00% +3.06% +2.78% / +1.45% +7.50% +5.27%] index_fill_ skip256 : Elapsed 0.004 ms (4.214 ms / 1000) 4.180 -> 4.226 ( +1.10%) [ +4.52% +4.38% +0.00% / +2.32% +1.10% +4.55%] index_fill_ spread : Elapsed 0.004 ms (4.369 ms / 1000) 4.184 -> 4.231 ( +1.12%) [ +1.34% +4.33% +0.00% / +2.65% +2.61% +1.12%] index_fill_ random : Elapsed 0.004 ms (4.240 ms / 1000) 4.193 -> 4.266 ( +1.74%) [ +0.88% +4.70% +0.00% / +2.55% +1.74% +3.94%] index_fill_ random_sorted : Elapsed 0.004 ms (4.230 ms / 1000) 4.205 -> 4.253 ( +1.14%) [ +0.00% +6.87% +3.88% / +1.45% +4.95% +1.14%] index_fill_ perm : Elapsed 0.004 ms (4.205 ms / 1000) 4.210 -> 4.273 ( +1.50%) [ +5.56% +5.06% +0.00% / +1.50% +2.14% +2.52%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.444 ms / 1000) B = [5, 3] (stride (1, 5)) A = [5, 2] (stride (2, 1)) dim = 1 4.587 -> 4.497 ( -1.96%) [ +0.00% +0.94% +1.29% / -0.02% -1.96% +2.01%] index_add_ linear : Elapsed 0.005 ms (4.587 ms / 1000) 4.618 -> 4.503 ( -2.49%) [ +0.65% +0.00% +4.03% / +0.26% -2.49% -1.30%] index_copy_ linear : Elapsed 0.005 ms (4.648 ms / 1000) 4.500 -> 4.492 ( -0.18%) [ +0.00% +3.84% +2.38% / +1.22% -0.18% +4.20%] index_add_ reverse : Elapsed 0.004 ms (4.500 ms / 1000) 4.508 -> 4.461 ( -1.04%) [ +5.28% +2.82% +0.00% / +0.82% -1.04% +0.80%] index_copy_ reverse : Elapsed 0.005 ms (4.746 ms / 1000) 4.493 -> 4.460 ( -0.73%) [ +0.00% +4.30% +1.89% / +3.14% -0.73% +3.32%] index_add_ spread : Elapsed 0.004 ms (4.493 ms / 1000) 4.440 -> 4.534 ( +2.12%) [ +0.00% +3.74% +3.15% / +6.82% +4.08% +2.12%] index_copy_ spread : Elapsed 0.004 ms (4.440 ms / 1000) 4.416 -> 4.463 ( +1.06%) [ +0.00% +6.00% +1.81% / +6.41% +1.06% +3.67%] index_add_ perm : Elapsed 0.004 ms (4.416 ms / 1000) 4.417 -> 4.441 ( +0.54%) [ +0.00% +3.94% +1.47% / +2.65% +0.54% +2.56%] index_copy_ perm : Elapsed 0.004 ms (4.417 ms / 1000) 4.408 -> 4.480 ( +1.63%) [ +0.00% +5.65% +5.88% / +3.13% +1.63% +2.27%] index_add_ perm_sorted : Elapsed 0.004 ms (4.408 ms / 1000) 4.444 -> 4.436 ( -0.18%) [ +0.00% +3.87% +0.32% / +1.44% +3.35% -0.18%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.444 ms / 1000) 4.946 -> 4.984 ( +0.77%) [ +4.37% +3.21% +0.00% / +3.19% +1.66% +0.77%] index_select const : Elapsed 0.005 ms (5.162 ms / 1000) 4.968 -> 5.012 ( +0.89%) [ +3.18% +3.52% +0.00% / +3.00% +0.89% +2.74%] index_select wrap : Elapsed 0.005 ms (5.126 ms / 1000) 4.967 -> 5.030 ( +1.27%) [ +2.72% +1.85% +0.00% / +2.48% +2.50% +1.27%] index_select linear : Elapsed 0.005 ms (5.102 ms / 1000) 4.946 -> 5.092 ( +2.95%) [ +4.02% +2.61% +0.00% / +3.44% +3.03% +2.95%] index_select reverse : Elapsed 0.005 ms (5.145 ms / 1000) 4.949 -> 4.988 ( +0.79%) [ +4.65% +3.41% +0.00% / +3.74% +0.79% +3.37%] index_select skip64 : Elapsed 0.005 ms (5.179 ms / 1000) 5.136 -> 5.093 ( -0.84%) [ +0.00% +0.00% +0.14% / -0.76% -0.72% -0.84%] index_select skip256 : Elapsed 0.005 ms (5.136 ms / 1000) 4.978 -> 5.019 ( +0.82%) [ +2.69% +3.88% +0.00% / +2.27% +1.95% +0.82%] index_select spread : Elapsed 0.005 ms (5.112 ms / 1000) 5.147 -> 4.986 ( -3.13%) [ +0.00% +2.95% +2.74% / -0.31% -3.13% -0.06%] index_select random : Elapsed 0.005 ms (5.147 ms / 1000) 5.082 -> 5.009 ( -1.44%) [ +0.41% +0.00% +1.04% / -0.28% -0.22% -1.44%] index_select random_sorted : Elapsed 0.005 ms (5.103 ms / 1000) B = [5, 3] (stride (1, 5)) A = [5, 2] (stride (1, 5)) dim = 1 4.479 -> 4.503 ( +0.54%) [ +0.04% +6.41% +0.00% / +2.77% +0.54% +5.54%] index_add_ linear : Elapsed 0.004 ms (4.481 ms / 1000) 4.444 -> 4.483 ( +0.88%) [ +1.31% +2.95% +0.00% / +5.51% +0.88% +3.76%] index_copy_ linear : Elapsed 0.005 ms (4.502 ms / 1000) 4.443 -> 4.502 ( +1.33%) [ +0.00% +3.47% +2.63% / +3.20% +2.36% +1.33%] index_add_ reverse : Elapsed 0.004 ms (4.443 ms / 1000) 4.451 -> 4.547 ( +2.16%) [ +0.13% +3.03% +0.00% / +5.86% +2.16% +4.54%] index_copy_ reverse : Elapsed 0.004 ms (4.457 ms / 1000) 4.481 -> 4.474 ( -0.16%) [ +0.00% +6.76% +0.16% / +3.28% -0.16% +0.67%] index_add_ spread : Elapsed 0.004 ms (4.481 ms / 1000) 4.466 -> 4.505 ( +0.87%) [ +0.78% +3.58% +0.00% / +3.43% +0.87% +5.69%] index_copy_ spread : Elapsed 0.005 ms (4.501 ms / 1000) 4.447 -> 4.529 ( +1.84%) [ +0.00% +3.80% +2.50% / +3.35% +4.21% +1.84%] index_add_ perm : Elapsed 0.004 ms (4.447 ms / 1000) 4.399 -> 4.481 ( +1.86%) [ +1.86% +4.05% +0.00% / +7.09% +1.86% +2.02%] index_copy_ perm : Elapsed 0.004 ms (4.481 ms / 1000) 4.411 -> 4.477 ( +1.50%) [ +0.97% +4.44% +0.00% / +4.49% +1.50% +2.06%] index_add_ perm_sorted : Elapsed 0.004 ms (4.454 ms / 1000) 4.472 -> 4.491 ( +0.42%) [ +0.00% +2.57% +1.07% / +1.21% +0.42% +5.77%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.472 ms / 1000) 5.043 -> 5.041 ( -0.04%) [ +2.70% +1.84% +0.00% / -0.04% +0.67% +0.83%] index_select const : Elapsed 0.005 ms (5.179 ms / 1000) 5.052 -> 5.012 ( -0.79%) [ +2.43% +1.58% +0.00% / +0.87% -0.79% -0.06%] index_select wrap : Elapsed 0.005 ms (5.175 ms / 1000) 5.039 -> 5.035 ( -0.08%) [ +2.74% +1.39% +0.00% / +1.65% -0.08% +1.61%] index_select linear : Elapsed 0.005 ms (5.177 ms / 1000) 4.905 -> 5.084 ( +3.65%) [ +7.09% +4.26% +0.00% / +3.83% +4.30% +3.65%] index_select reverse : Elapsed 0.005 ms (5.253 ms / 1000) 4.940 -> 5.049 ( +2.21%) [ +3.91% +4.64% +0.00% / +3.34% +2.21% +3.97%] index_select skip64 : Elapsed 0.005 ms (5.133 ms / 1000) 4.929 -> 5.068 ( +2.82%) [ +4.02% +4.30% +0.00% / +6.76% +2.82% +3.59%] index_select skip256 : Elapsed 0.005 ms (5.127 ms / 1000) 5.119 -> 5.080 ( -0.76%) [ +4.32% +0.45% +0.00% / -0.76% +2.42% -0.20%] index_select spread : Elapsed 0.005 ms (5.340 ms / 1000) 4.938 -> 5.044 ( +2.15%) [ +4.13% +2.92% +0.00% / +2.98% +2.15% +3.12%] index_select random : Elapsed 0.005 ms (5.142 ms / 1000) 4.931 -> 5.019 ( +1.78%) [ +4.58% +3.85% +0.00% / +3.81% +1.78% +5.19%] index_select random_sorted : Elapsed 0.005 ms (5.157 ms / 1000) out_shape = [5, 3] in_shape = [2, 3] idx_dim = 0 B = [5, 3] (stride (3, 1)) dim = 0 fill_cnt = 2 4.223 -> 4.262 ( +0.92%) [ +0.00% +4.40% +3.84% / +0.92% +6.42% +2.01%] index_fill_ const : Elapsed 0.004 ms (4.223 ms / 1000) 4.219 -> 4.220 ( +0.02%) [ +3.03% +2.99% +0.00% / +1.23% +0.02% +2.23%] index_fill_ linear : Elapsed 0.004 ms (4.347 ms / 1000) 4.355 -> 4.242 ( -2.59%) [ +0.00% +0.09% +1.68% / -1.56% -2.59% -1.24%] index_fill_ reverse : Elapsed 0.004 ms (4.355 ms / 1000) 4.257 -> 4.286 ( +0.68%) [ +0.00% +2.16% +4.93% / +0.68% +0.68% +1.53%] index_fill_ skip64 : Elapsed 0.004 ms (4.257 ms / 1000) 4.182 -> 4.262 ( +1.91%) [ +0.00% +4.28% +5.24% / +1.91% +3.85% +2.53%] index_fill_ skip256 : Elapsed 0.004 ms (4.182 ms / 1000) 4.218 -> 4.230 ( +0.28%) [ +0.00% +1.85% +2.37% / +0.28% +5.90% +1.64%] index_fill_ spread : Elapsed 0.004 ms (4.218 ms / 1000) 4.358 -> 4.245 ( -2.59%) [ +0.18% +0.00% +0.14% / -1.97% -2.59% -0.80%] index_fill_ strided 3 : Elapsed 0.004 ms (4.366 ms / 1000) 4.243 -> 4.231 ( -0.28%) [ +0.00% +2.97% +4.76% / -0.28% +4.17% -0.09%] index_fill_ random : Elapsed 0.004 ms (4.243 ms / 1000) 4.246 -> 4.231 ( -0.35%) [ +0.14% +2.21% +0.00% / -0.35% +0.33% +0.02%] index_fill_ random_sorted : Elapsed 0.004 ms (4.252 ms / 1000) 4.326 -> 4.258 ( -1.57%) [ +0.90% +0.14% +0.00% / -1.57% +1.23% -0.88%] index_fill_ perm : Elapsed 0.004 ms (4.365 ms / 1000) 4.230 -> 4.263 ( +0.78%) [ +2.88% +5.11% +0.00% / +0.78% +1.42% +2.67%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.352 ms / 1000) B = [5, 3] (stride (3, 1)) A = [2, 3] (stride (3, 1)) dim = 0 4.625 -> 4.480 ( -3.14%) [ +0.04% +1.58% +0.00% / -0.76% -0.95% -3.14%] index_add_ linear : Elapsed 0.005 ms (4.627 ms / 1000) 4.474 -> 4.462 ( -0.27%) [ +3.87% +4.16% +0.00% / +2.26% -0.27% +1.56%] index_copy_ linear : Elapsed 0.005 ms (4.647 ms / 1000) 4.445 -> 4.451 ( +0.13%) [ +0.45% +44.43% +0.00% / +6.68% +0.13% +1.10%] index_add_ reverse : Elapsed 0.004 ms (4.465 ms / 1000) 4.509 -> 4.514 ( +0.11%) [ +0.00% +22.98% +1.26% / +1.26% +0.42% +0.11%] index_copy_ reverse : Elapsed 0.005 ms (4.509 ms / 1000) 4.496 -> 4.481 ( -0.33%) [ +0.00% +3.58% +0.24% / +2.09% -0.33% +0.96%] index_add_ spread : Elapsed 0.004 ms (4.496 ms / 1000) 4.506 -> 4.453 ( -1.18%) [ +0.53% +1.44% +0.00% / +1.29% -1.18% +1.13%] index_copy_ spread : Elapsed 0.005 ms (4.530 ms / 1000) 4.478 -> 4.472 ( -0.13%) [ +2.21% +3.66% +0.00% / +3.35% -0.13% +1.59%] index_add_ strided 3 : Elapsed 0.005 ms (4.577 ms / 1000) 4.583 -> 4.479 ( -2.27%) [ +2.81% +0.00% +2.47% / -1.31% -0.57% -2.27%] index_copy_ strided 3 : Elapsed 0.005 ms (4.712 ms / 1000) 4.469 -> 4.462 ( -0.16%) [ +0.00% +4.52% +0.07% / +2.69% -0.16% +1.25%] index_add_ perm : Elapsed 0.004 ms (4.469 ms / 1000) 4.472 -> 4.452 ( -0.45%) [ +0.07% +2.73% +0.00% / +2.42% -0.45% +2.33%] index_copy_ perm : Elapsed 0.004 ms (4.475 ms / 1000) 4.451 -> 4.472 ( +0.47%) [ +0.00% +4.31% +4.11% / +3.19% +1.55% +0.47%] index_add_ perm_sorted : Elapsed 0.004 ms (4.451 ms / 1000) 4.505 -> 4.502 ( -0.07%) [ +0.00% +2.04% +0.67% / +1.95% -0.07% +0.02%] index_copy_ perm_sorted : Elapsed 0.005 ms (4.505 ms / 1000) 5.001 -> 5.000 ( -0.02%) [ +4.52% +2.34% +0.00% / +1.08% -0.02% +1.70%] index_select const : Elapsed 0.005 ms (5.227 ms / 1000) 4.991 -> 4.991 ( +0.00%) [ +3.53% +1.82% +0.00% / +1.44% +0.00% +2.44%] index_select wrap : Elapsed 0.005 ms (5.167 ms / 1000) 5.073 -> 5.107 ( +0.67%) [ +1.95% +0.00% +0.71% / +4.73% +1.56% +0.67%] index_select linear : Elapsed 0.005 ms (5.172 ms / 1000) 4.951 -> 5.086 ( +2.73%) [ +3.94% +3.35% +0.00% / +5.66% +3.55% +2.73%] index_select reverse : Elapsed 0.005 ms (5.146 ms / 1000) 5.031 -> 5.015 ( -0.32%) [ +2.17% +1.25% +0.00% / +4.31% -0.32% +1.51%] index_select skip64 : Elapsed 0.005 ms (5.140 ms / 1000) 5.120 -> 5.077 ( -0.84%) [ +0.53% +0.00% +1.04% / -0.37% +0.66% -0.84%] index_select skip256 : Elapsed 0.005 ms (5.147 ms / 1000) 4.958 -> 5.000 ( +0.85%) [ +4.26% +2.24% +0.00% / +2.94% +0.85% +1.73%] index_select spread : Elapsed 0.005 ms (5.169 ms / 1000) 4.942 -> 5.006 ( +1.30%) [ +4.69% +4.05% +0.00% / +3.54% +1.30% +7.35%] index_select random : Elapsed 0.005 ms (5.174 ms / 1000) 5.100 -> 5.060 ( -0.78%) [ +1.69% +0.00% +1.10% / -0.78% +0.25% +0.22%] index_select random_sorted : Elapsed 0.005 ms (5.186 ms / 1000) B = [5, 3] (stride (3, 1)) A = [2, 3] (stride (1, 2)) dim = 0 4.473 -> 4.416 ( -1.27%) [ +0.07% +3.80% +0.00% / +4.38% -1.27% +3.22%] index_add_ linear : Elapsed 0.004 ms (4.476 ms / 1000) 4.475 -> 4.461 ( -0.31%) [ +0.11% +2.86% +0.00% / +2.37% -0.31% +1.47%] index_copy_ linear : Elapsed 0.004 ms (4.480 ms / 1000) 4.473 -> 4.555 ( +1.83%) [ +0.00% +4.14% +3.91% / +2.68% +2.91% +1.83%] index_add_ reverse : Elapsed 0.004 ms (4.473 ms / 1000) 4.472 -> 4.492 ( +0.45%) [ +2.30% +3.31% +0.00% / +2.53% +1.50% +0.45%] index_copy_ reverse : Elapsed 0.005 ms (4.575 ms / 1000) 4.458 -> 4.426 ( -0.72%) [ +0.00% +4.51% +0.11% / +3.28% -0.72% -0.49%] index_add_ spread : Elapsed 0.004 ms (4.458 ms / 1000) 4.395 -> 4.426 ( +0.71%) [ +0.00% +5.46% +2.28% / +3.55% +0.71% +2.71%] index_copy_ spread : Elapsed 0.004 ms (4.395 ms / 1000) 4.419 -> 4.458 ( +0.88%) [ +0.00% +5.02% +4.91% / +3.64% +3.60% +0.88%] index_add_ strided 3 : Elapsed 0.004 ms (4.419 ms / 1000) 4.451 -> 4.430 ( -0.47%) [ +0.49% +3.91% +0.00% / +2.43% -0.47% +0.74%] index_copy_ strided 3 : Elapsed 0.004 ms (4.473 ms / 1000) 4.452 -> 4.431 ( -0.47%) [ +0.00% +6.54% +0.58% / +4.25% -0.47% +1.19%] index_add_ perm : Elapsed 0.004 ms (4.452 ms / 1000) 4.463 -> 4.438 ( -0.56%) [ +0.00% +3.50% +1.57% / +1.99% -0.56% +1.75%] index_copy_ perm : Elapsed 0.004 ms (4.463 ms / 1000) 4.544 -> 4.448 ( -2.11%) [ +1.65% +2.13% +0.00% / +1.25% +0.77% -2.11%] index_add_ perm_sorted : Elapsed 0.005 ms (4.619 ms / 1000) 4.479 -> 4.441 ( -0.85%) [ +0.00% +3.06% +0.63% / +2.66% -0.85% -0.76%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.479 ms / 1000) 4.965 -> 4.991 ( +0.52%) [ +5.44% +1.77% +0.00% / +2.03% +0.52% +2.72%] index_select const : Elapsed 0.005 ms (5.235 ms / 1000) 5.059 -> 5.054 ( -0.10%) [ +2.57% +0.00% +0.81% / +0.36% +3.24% -0.10%] index_select wrap : Elapsed 0.005 ms (5.189 ms / 1000) 4.942 -> 5.036 ( +1.90%) [ +5.69% +3.16% +0.00% / +2.97% +3.26% +1.90%] index_select linear : Elapsed 0.005 ms (5.223 ms / 1000) 4.956 -> 5.126 ( +3.43%) [ +6.92% +2.14% +0.00% / +5.39% +3.43% +4.18%] index_select reverse : Elapsed 0.005 ms (5.299 ms / 1000) 5.102 -> 5.063 ( -0.76%) [ +2.78% +0.00% +0.82% / -0.76% +0.16% +2.29%] index_select skip64 : Elapsed 0.005 ms (5.244 ms / 1000) 4.938 -> 5.014 ( +1.54%) [ +4.60% +3.38% +0.00% / +2.53% +2.90% +1.54%] index_select skip256 : Elapsed 0.005 ms (5.165 ms / 1000) 4.969 -> 5.038 ( +1.39%) [ +3.74% +2.31% +0.00% / +2.88% +1.39% +4.59%] index_select spread : Elapsed 0.005 ms (5.155 ms / 1000) 5.075 -> 5.023 ( -1.02%) [ +2.38% +0.22% +0.00% / -0.89% +1.36% -1.02%] index_select random : Elapsed 0.005 ms (5.196 ms / 1000) 4.944 -> 4.990 ( +0.93%) [ +7.77% +2.85% +0.00% / +1.62% +0.93% +1.78%] index_select random_sorted : Elapsed 0.005 ms (5.328 ms / 1000) B = [5, 3] (stride (1, 5)) dim = 0 fill_cnt = 2 4.163 -> 4.235 ( +1.73%) [ +1.99% +5.16% +0.00% / +3.03% +1.73% +3.17%] index_fill_ const : Elapsed 0.004 ms (4.246 ms / 1000) 4.174 -> 4.259 ( +2.04%) [ +5.27% +3.69% +0.00% / +2.04% +2.42% +3.19%] index_fill_ linear : Elapsed 0.004 ms (4.394 ms / 1000) 4.267 -> 4.248 ( -0.45%) [ +0.00% +2.51% +1.12% / -0.45% +1.34% -0.26%] index_fill_ reverse : Elapsed 0.004 ms (4.267 ms / 1000) 4.199 -> 4.219 ( +0.48%) [ +3.45% +3.60% +0.00% / +1.02% +0.48% +1.31%] index_fill_ skip64 : Elapsed 0.004 ms (4.344 ms / 1000) 4.208 -> 4.334 ( +2.99%) [ +0.12% +3.59% +0.00% / +2.99% +3.64% +3.04%] index_fill_ skip256 : Elapsed 0.004 ms (4.213 ms / 1000) 4.233 -> 4.248 ( +0.35%) [ +0.00% +3.61% +0.85% / +0.35% +2.83% +1.54%] index_fill_ spread : Elapsed 0.004 ms (4.233 ms / 1000) 4.219 -> 4.239 ( +0.47%) [ +0.00% +4.05% +1.97% / +0.73% +0.47% +7.18%] index_fill_ strided 3 : Elapsed 0.004 ms (4.219 ms / 1000) 4.181 -> 4.241 ( +1.44%) [ +0.05% +4.74% +0.00% / +3.04% +1.44% +2.94%] index_fill_ random : Elapsed 0.004 ms (4.183 ms / 1000) 4.181 -> 4.243 ( +1.48%) [ +3.49% +4.42% +0.00% / +1.48% +1.70% +3.35%] index_fill_ random_sorted : Elapsed 0.004 ms (4.327 ms / 1000) 4.182 -> 4.217 ( +0.84%) [ +0.00% +3.95% +4.57% / +0.84% +3.99% +2.46%] index_fill_ perm : Elapsed 0.004 ms (4.182 ms / 1000) 4.163 -> 4.224 ( +1.47%) [ +6.22% +4.71% +0.00% / +2.69% +1.47% +3.00%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.422 ms / 1000) B = [5, 3] (stride (1, 5)) A = [2, 3] (stride (3, 1)) dim = 0 4.410 -> 4.453 ( +0.98%) [ +1.47% +5.94% +0.00% / +5.78% +0.98% +7.89%] index_add_ linear : Elapsed 0.004 ms (4.475 ms / 1000) 4.408 -> 4.488 ( +1.81%) [ +2.81% +6.22% +0.00% / +5.40% +1.81% +2.02%] index_copy_ linear : Elapsed 0.005 ms (4.532 ms / 1000) 4.480 -> 4.481 ( +0.02%) [ +0.00% +3.86% +3.44% / +3.64% +1.72% +0.02%] index_add_ reverse : Elapsed 0.004 ms (4.480 ms / 1000) 4.471 -> 4.415 ( -1.25%) [ +0.85% +3.82% +0.00% / +3.60% -1.25% -0.72%] index_copy_ reverse : Elapsed 0.005 ms (4.509 ms / 1000) 4.447 -> 4.415 ( -0.72%) [ +0.00% +5.22% +1.33% / +3.62% -0.72% +1.91%] index_add_ spread : Elapsed 0.004 ms (4.447 ms / 1000) 4.561 -> 4.535 ( -0.57%) [ +3.03% +1.60% +0.00% / +0.31% +0.55% -0.57%] index_copy_ spread : Elapsed 0.005 ms (4.699 ms / 1000) 4.431 -> 4.455 ( +0.54%) [ +0.00% +8.12% +0.41% / +3.93% +0.70% +0.54%] index_add_ strided 3 : Elapsed 0.004 ms (4.431 ms / 1000) 4.431 -> 4.505 ( +1.67%) [ +1.60% +3.68% +0.00% / +3.81% +1.67% +2.39%] index_copy_ strided 3 : Elapsed 0.005 ms (4.502 ms / 1000) 4.420 -> 4.447 ( +0.61%) [ +0.00% +6.02% +0.16% / +4.21% +0.61% +2.40%] index_add_ perm : Elapsed 0.004 ms (4.420 ms / 1000) 4.432 -> 4.480 ( +1.08%) [ +0.00% +4.76% +3.84% / +3.66% +3.95% +1.08%] index_copy_ perm : Elapsed 0.004 ms (4.432 ms / 1000) 4.426 -> 4.495 ( +1.56%) [ +0.00% +5.22% +0.18% / +4.95% +4.38% +1.56%] index_add_ perm_sorted : Elapsed 0.004 ms (4.426 ms / 1000) 4.441 -> 4.462 ( +0.47%) [ +0.27% +3.58% +0.00% / +3.49% +0.47% +2.68%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.453 ms / 1000) 5.103 -> 5.065 ( -0.74%) [ +5.39% +0.08% +0.00% / -0.41% +0.84% -0.74%] index_select const : Elapsed 0.005 ms (5.378 ms / 1000) 4.973 -> 5.046 ( +1.47%) [ +5.17% +2.05% +0.00% / +2.15% +5.95% +1.47%] index_select wrap : Elapsed 0.005 ms (5.230 ms / 1000) 4.956 -> 5.120 ( +3.31%) [ +4.44% +2.85% +0.00% / +3.31% +5.15% +4.18%] index_select linear : Elapsed 0.005 ms (5.176 ms / 1000) 4.997 -> 5.085 ( +1.76%) [ +3.38% +2.54% +0.00% / +2.28% +5.34% +1.76%] index_select reverse : Elapsed 0.005 ms (5.166 ms / 1000) 5.060 -> 5.112 ( +1.03%) [ +1.68% +1.64% +0.00% / +1.03% +2.53% +2.23%] index_select skip64 : Elapsed 0.005 ms (5.145 ms / 1000) 4.943 -> 5.126 ( +3.70%) [ +3.70% +3.24% +0.00% / +3.74% +3.70% +4.77%] index_select skip256 : Elapsed 0.005 ms (5.126 ms / 1000) 5.078 -> 5.046 ( -0.63%) [ +1.36% +0.00% +0.35% / +2.36% +3.03% -0.63%] index_select spread : Elapsed 0.005 ms (5.147 ms / 1000) 5.065 -> 5.111 ( +0.91%) [ +1.20% +0.43% +0.00% / +0.91% +2.43% +2.80%] index_select random : Elapsed 0.005 ms (5.126 ms / 1000) 4.961 -> 5.006 ( +0.91%) [ +4.70% +2.60% +0.00% / +3.00% +0.91% +3.99%] index_select random_sorted : Elapsed 0.005 ms (5.194 ms / 1000) B = [5, 3] (stride (1, 5)) A = [2, 3] (stride (1, 2)) dim = 0 4.512 -> 4.545 ( +0.73%) [ +1.13% +2.77% +0.00% / +1.95% +0.73% +0.86%] index_add_ linear : Elapsed 0.005 ms (4.563 ms / 1000) 4.457 -> 4.444 ( -0.29%) [ +0.00% +3.37% +3.77% / +3.12% -0.29% +2.18%] index_copy_ linear : Elapsed 0.004 ms (4.457 ms / 1000) 4.451 -> 4.481 ( +0.67%) [ +0.29% +4.27% +0.00% / +4.94% +2.63% +0.67%] index_add_ reverse : Elapsed 0.004 ms (4.464 ms / 1000) 4.440 -> 4.444 ( +0.09%) [ +0.00% +2.45% +0.59% / +3.27% +0.09% +1.51%] index_copy_ reverse : Elapsed 0.004 ms (4.440 ms / 1000) 4.419 -> 4.464 ( +1.02%) [ +0.00% +5.05% +2.69% / +4.75% +3.39% +1.02%] index_add_ spread : Elapsed 0.004 ms (4.419 ms / 1000) 4.456 -> 4.410 ( -1.03%) [ +0.00% +3.93% +3.30% / +3.57% -1.03% -0.43%] index_copy_ spread : Elapsed 0.004 ms (4.456 ms / 1000) 4.435 -> 4.457 ( +0.50%) [ +0.18% +5.48% +0.00% / +3.81% +0.50% +2.64%] index_add_ strided 3 : Elapsed 0.004 ms (4.443 ms / 1000) 4.459 -> 4.485 ( +0.58%) [ +0.43% +3.41% +0.00% / +3.48% +0.58% +0.87%] index_copy_ strided 3 : Elapsed 0.004 ms (4.478 ms / 1000) 4.477 -> 4.464 ( -0.29%) [ +0.00% +4.36% +3.86% / +4.49% +1.74% -0.29%] index_add_ perm : Elapsed 0.004 ms (4.477 ms / 1000) 4.480 -> 4.477 ( -0.07%) [ +0.00% +2.75% +2.08% / +4.04% -0.07% +2.70%] index_copy_ perm : Elapsed 0.004 ms (4.480 ms / 1000) 4.483 -> 4.446 ( -0.83%) [ +0.25% +3.48% +0.00% / +3.37% -0.83% +1.61%] index_add_ perm_sorted : Elapsed 0.004 ms (4.494 ms / 1000) 4.447 -> 4.467 ( +0.45%) [ +1.71% +3.58% +0.00% / +3.94% +2.23% +0.45%] index_copy_ perm_sorted : Elapsed 0.005 ms (4.523 ms / 1000) 5.076 -> 5.013 ( -1.24%) [ +3.21% +0.00% +0.33% / +1.22% -1.24% +1.48%] index_select const : Elapsed 0.005 ms (5.239 ms / 1000) 4.923 -> 5.019 ( +1.95%) [ +5.44% +3.92% +0.00% / +7.29% +1.95% +5.12%] index_select wrap : Elapsed 0.005 ms (5.191 ms / 1000) 4.926 -> 5.090 ( +3.33%) [ +8.77% +3.05% +0.00% / +3.33% +3.76% +3.61%] index_select linear : Elapsed 0.005 ms (5.358 ms / 1000) 5.088 -> 5.022 ( -1.30%) [ +2.02% +0.00% +1.45% / +1.51% -1.22% -1.30%] index_select reverse : Elapsed 0.005 ms (5.191 ms / 1000) 4.948 -> 4.990 ( +0.85%) [ +4.67% +3.27% +0.00% / +3.76% +0.85% +4.61%] index_select skip64 : Elapsed 0.005 ms (5.179 ms / 1000) 4.928 -> 5.100 ( +3.49%) [ +4.55% +8.30% +0.00% / +3.49% +4.10% +4.79%] index_select skip256 : Elapsed 0.005 ms (5.152 ms / 1000) 5.088 -> 4.991 ( -1.91%) [ +1.40% +0.00% +1.69% / +0.43% -1.91% +2.24%] index_select spread : Elapsed 0.005 ms (5.159 ms / 1000) 4.984 -> 5.010 ( +0.52%) [ +2.87% +4.13% +0.00% / +1.95% +0.52% +2.67%] index_select random : Elapsed 0.005 ms (5.127 ms / 1000) 4.976 -> 5.048 ( +1.45%) [ +4.02% +3.86% +0.00% / +2.97% +3.38% +1.45%] index_select random_sorted : Elapsed 0.005 ms (5.176 ms / 1000) out_shape = [2, 5] in_shape = [2, 3] idx_dim = 1 B = [2, 5] (stride (5, 1)) dim = 1 fill_cnt = 3 4.239 -> 4.236 ( -0.07%) [ +0.00% +3.26% +3.33% / +1.01% -0.07% +2.29%] index_fill_ const : Elapsed 0.004 ms (4.239 ms / 1000) 4.222 -> 4.259 ( +0.88%) [ +0.00% +3.60% +3.34% / +1.82% +0.92% +0.88%] index_fill_ linear : Elapsed 0.004 ms (4.222 ms / 1000) 4.194 -> 4.242 ( +1.14%) [ +0.79% +3.65% +0.00% / +2.31% +1.14% +2.50%] index_fill_ reverse : Elapsed 0.004 ms (4.227 ms / 1000) 4.194 -> 4.266 ( +1.72%) [ +1.69% +8.01% +0.00% / +1.72% +4.48% +3.55%] index_fill_ skip64 : Elapsed 0.004 ms (4.265 ms / 1000) 4.222 -> 4.263 ( +0.97%) [ +0.00% +4.17% +4.29% / +1.44% +0.97% +1.66%] index_fill_ skip256 : Elapsed 0.004 ms (4.222 ms / 1000) 4.195 -> 4.251 ( +1.33%) [ +0.74% +3.81% +0.00% / +1.33% +4.05% +3.55%] index_fill_ spread : Elapsed 0.004 ms (4.226 ms / 1000) 4.225 -> 4.250 ( +0.59%) [ +0.00% +3.05% +0.24% / +0.59% +1.21% +4.80%] index_fill_ strided 3 : Elapsed 0.004 ms (4.225 ms / 1000) 4.237 -> 4.260 ( +0.54%) [ +0.00% +2.60% +0.83% / +1.18% +3.45% +0.54%] index_fill_ random : Elapsed 0.004 ms (4.237 ms / 1000) 4.254 -> 4.286 ( +0.75%) [ +4.33% +2.23% +0.00% / +4.72% +0.75% +4.16%] index_fill_ random_sorted : Elapsed 0.004 ms (4.438 ms / 1000) 4.196 -> 4.219 ( +0.55%) [ +1.41% +4.00% +0.00% / +4.67% +0.55% +2.91%] index_fill_ perm : Elapsed 0.004 ms (4.255 ms / 1000) 4.234 -> 4.258 ( +0.57%) [ +3.19% +3.78% +0.00% / +0.57% +3.21% +1.18%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.369 ms / 1000) B = [2, 5] (stride (5, 1)) A = [2, 3] (stride (3, 1)) dim = 1 4.484 -> 4.491 ( +0.16%) [ +0.00% +3.79% +3.26% / +2.50% +0.16% +0.20%] index_add_ linear : Elapsed 0.004 ms (4.484 ms / 1000) 4.467 -> 4.499 ( +0.72%) [ +0.47% +2.89% +0.00% / +2.48% +3.58% +0.72%] index_copy_ linear : Elapsed 0.004 ms (4.488 ms / 1000) 4.483 -> 4.469 ( -0.31%) [ +0.38% +4.24% +0.00% / +2.07% -0.31% +1.18%] index_add_ reverse : Elapsed 0.004 ms (4.500 ms / 1000) 4.483 -> 4.501 ( +0.40%) [ +0.00% +4.17% +0.04% / +1.14% +3.03% +0.40%] index_copy_ reverse : Elapsed 0.004 ms (4.483 ms / 1000) 4.409 -> 4.485 ( +1.72%) [ +0.00% +8.94% +6.94% / +4.65% +2.09% +1.72%] index_add_ spread : Elapsed 0.004 ms (4.409 ms / 1000) 4.441 -> 4.444 ( +0.07%) [ +0.47% +4.19% +0.00% / +10.27% +0.07% +2.48%] index_copy_ spread : Elapsed 0.004 ms (4.462 ms / 1000) 4.411 -> 4.425 ( +0.32%) [ +0.16% +4.87% +0.00% / +3.45% +3.33% +0.32%] index_add_ strided 3 : Elapsed 0.004 ms (4.418 ms / 1000) 4.439 -> 4.523 ( +1.89%) [ +0.00% +4.26% +3.87% / +2.32% +2.50% +1.89%] index_copy_ strided 3 : Elapsed 0.004 ms (4.439 ms / 1000) 4.468 -> 4.459 ( -0.20%) [ +2.22% +4.36% +0.00% / +2.55% +1.23% -0.20%] index_add_ perm : Elapsed 0.005 ms (4.567 ms / 1000) 4.464 -> 4.494 ( +0.67%) [ +0.16% +4.23% +0.00% / +4.19% +0.67% +4.30%] index_copy_ perm : Elapsed 0.004 ms (4.471 ms / 1000) 4.474 -> 4.452 ( -0.49%) [ +4.11% +4.18% +0.00% / +5.57% +2.91% -0.49%] index_add_ perm_sorted : Elapsed 0.005 ms (4.658 ms / 1000) 4.472 -> 4.497 ( +0.56%) [ +0.00% +3.09% +3.51% / +8.99% +0.56% +7.67%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.472 ms / 1000) 4.979 -> 5.129 ( +3.01%) [ +4.64% +2.73% +0.00% / +3.01% +4.82% +3.68%] index_select const : Elapsed 0.005 ms (5.210 ms / 1000) 4.944 -> 5.038 ( +1.90%) [ +4.35% +3.74% +0.00% / +4.57% +3.84% +1.90%] index_select wrap : Elapsed 0.005 ms (5.159 ms / 1000) 5.121 -> 5.015 ( -2.07%) [ +3.20% +0.00% +1.45% / +0.51% -2.07% -1.54%] index_select linear : Elapsed 0.005 ms (5.285 ms / 1000) 4.933 -> 4.993 ( +1.22%) [ +3.34% +3.20% +0.00% / +3.32% +1.22% +4.34%] index_select reverse : Elapsed 0.005 ms (5.098 ms / 1000) 4.935 -> 5.028 ( +1.88%) [ +4.56% +3.08% +0.00% / +2.45% +1.88% +3.79%] index_select skip64 : Elapsed 0.005 ms (5.160 ms / 1000) 5.085 -> 5.071 ( -0.28%) [ +0.63% +0.00% +0.87% / +0.20% -0.28% +2.03%] index_select skip256 : Elapsed 0.005 ms (5.117 ms / 1000) 4.967 -> 5.049 ( +1.65%) [ +7.49% +2.03% +0.00% / +2.46% +1.65% +4.61%] index_select spread : Elapsed 0.005 ms (5.339 ms / 1000) 5.094 -> 5.052 ( -0.82%) [ +1.55% +0.00% +0.00% / -0.49% -0.41% -0.82%] index_select random : Elapsed 0.005 ms (5.173 ms / 1000) 5.076 -> 5.079 ( +0.06%) [ +2.03% +0.00% +0.20% / +0.53% +0.06% +0.16%] index_select random_sorted : Elapsed 0.005 ms (5.179 ms / 1000) B = [2, 5] (stride (5, 1)) A = [2, 3] (stride (1, 2)) dim = 1 4.459 -> 4.497 ( +0.85%) [ +0.00% +4.13% +0.40% / +2.83% +0.85% +1.23%] index_add_ linear : Elapsed 0.004 ms (4.459 ms / 1000) 4.474 -> 4.490 ( +0.36%) [ +0.72% +2.91% +0.00% / +2.95% +0.36% +1.90%] index_copy_ linear : Elapsed 0.005 ms (4.506 ms / 1000) 4.478 -> 4.451 ( -0.60%) [ +0.00% +3.37% +0.31% / +2.86% +3.15% -0.60%] index_add_ reverse : Elapsed 0.004 ms (4.478 ms / 1000) 4.450 -> 4.456 ( +0.13%) [ +0.00% +4.22% +4.38% / +1.89% +0.13% +0.74%] index_copy_ reverse : Elapsed 0.004 ms (4.450 ms / 1000) 4.565 -> 4.478 ( -1.91%) [ +0.00% +1.93% +3.44% / +0.42% -1.64% -1.91%] index_add_ spread : Elapsed 0.005 ms (4.565 ms / 1000) 4.438 -> 4.479 ( +0.92%) [ +0.00% +3.47% +1.15% / +2.86% +0.92% +1.71%] index_copy_ spread : Elapsed 0.004 ms (4.438 ms / 1000) 4.625 -> 4.464 ( -3.48%) [ +0.09% +0.02% +0.00% / -0.24% -2.53% -3.48%] index_add_ strided 3 : Elapsed 0.005 ms (4.629 ms / 1000) 4.486 -> 4.461 ( -0.56%) [ +0.00% +3.92% +0.51% / +2.03% -0.56% +0.38%] index_copy_ strided 3 : Elapsed 0.004 ms (4.486 ms / 1000) 4.497 -> 4.499 ( +0.04%) [ +2.96% +3.62% +0.00% / +1.87% +0.04% +0.07%] index_add_ perm : Elapsed 0.005 ms (4.630 ms / 1000) 4.472 -> 4.537 ( +1.45%) [ +0.00% +2.08% +0.11% / +3.13% +2.80% +1.45%] index_copy_ perm : Elapsed 0.004 ms (4.472 ms / 1000) 4.616 -> 4.486 ( -2.82%) [ +0.00% +1.06% +1.60% / +1.60% -2.82% -1.04%] index_add_ perm_sorted : Elapsed 0.005 ms (4.616 ms / 1000) 4.489 -> 4.483 ( -0.13%) [ +1.45% +2.61% +0.00% / +2.45% -0.13% +2.74%] index_copy_ perm_sorted : Elapsed 0.005 ms (4.554 ms / 1000) 4.957 -> 5.069 ( +2.26%) [ +4.82% +2.90% +0.00% / +2.30% +2.32% +2.26%] index_select const : Elapsed 0.005 ms (5.196 ms / 1000) 5.052 -> 5.104 ( +1.03%) [ +0.87% +4.41% +0.00% / +1.21% +2.47% +1.03%] index_select wrap : Elapsed 0.005 ms (5.096 ms / 1000) 4.932 -> 5.047 ( +2.33%) [ +4.03% +5.68% +0.00% / +2.33% +2.90% +5.23%] index_select linear : Elapsed 0.005 ms (5.131 ms / 1000) 4.960 -> 5.021 ( +1.23%) [ +3.49% +2.66% +0.00% / +1.23% +1.92% +3.87%] index_select reverse : Elapsed 0.005 ms (5.133 ms / 1000) 5.017 -> 5.105 ( +1.75%) [ +2.01% +1.46% +0.00% / +2.27% +1.75% +5.10%] index_select skip64 : Elapsed 0.005 ms (5.118 ms / 1000) 5.016 -> 5.026 ( +0.20%) [ +5.90% +1.57% +0.00% / +1.36% +0.20% +2.09%] index_select skip256 : Elapsed 0.005 ms (5.312 ms / 1000) 4.975 -> 5.046 ( +1.43%) [ +4.76% +2.19% +0.00% / +3.14% +1.43% +2.65%] index_select spread : Elapsed 0.005 ms (5.212 ms / 1000) 4.931 -> 5.019 ( +1.78%) [ +7.73% +4.48% +0.00% / +6.79% +3.55% +1.78%] index_select random : Elapsed 0.005 ms (5.312 ms / 1000) 5.120 -> 5.041 ( -1.54%) [ +1.50% +0.82% +0.00% / -0.72% -1.54% -1.37%] index_select random_sorted : Elapsed 0.005 ms (5.197 ms / 1000) B = [2, 5] (stride (1, 2)) dim = 1 fill_cnt = 3 4.204 -> 4.213 ( +0.21%) [ +1.05% +5.88% +0.00% / +1.38% +0.21% +5.09%] index_fill_ const : Elapsed 0.004 ms (4.248 ms / 1000) 4.246 -> 4.253 ( +0.16%) [ +0.71% +5.58% +0.00% / +0.16% +3.20% +1.13%] index_fill_ linear : Elapsed 0.004 ms (4.276 ms / 1000) 4.247 -> 4.205 ( -0.99%) [ +0.00% +2.83% +3.72% / +1.34% -0.99% +0.09%] index_fill_ reverse : Elapsed 0.004 ms (4.247 ms / 1000) 4.189 -> 4.222 ( +0.79%) [ +0.00% +3.65% +1.24% / +1.07% +0.79% +3.53%] index_fill_ skip64 : Elapsed 0.004 ms (4.189 ms / 1000) 4.243 -> 4.215 ( -0.66%) [ +0.00% +5.84% +0.52% / +0.49% -0.66% +1.98%] index_fill_ skip256 : Elapsed 0.004 ms (4.243 ms / 1000) 4.259 -> 4.250 ( -0.21%) [ +1.24% +7.80% +0.00% / +0.09% +1.90% -0.21%] index_fill_ spread : Elapsed 0.004 ms (4.312 ms / 1000) 4.340 -> 4.233 ( -2.47%) [ +0.00% +0.35% +1.20% / -1.77% -2.47% +0.44%] index_fill_ strided 3 : Elapsed 0.004 ms (4.340 ms / 1000) 4.198 -> 4.226 ( +0.67%) [ +1.14% +6.50% +0.00% / +1.64% +0.67% +5.17%] index_fill_ random : Elapsed 0.004 ms (4.246 ms / 1000) 4.262 -> 4.294 ( +0.75%) [ +0.00% +2.77% +4.62% / +3.24% +3.21% +0.75%] index_fill_ random_sorted : Elapsed 0.004 ms (4.262 ms / 1000) 4.333 -> 4.264 ( -1.59%) [ +1.22% +0.05% +0.00% / -0.99% -1.45% -1.59%] index_fill_ perm : Elapsed 0.004 ms (4.386 ms / 1000) 4.200 -> 4.337 ( +3.26%) [ +7.64% +3.93% +0.00% / +4.40% +4.98% +3.26%] index_fill_ perm_sorted : Elapsed 0.005 ms (4.521 ms / 1000) B = [2, 5] (stride (1, 2)) A = [2, 3] (stride (3, 1)) dim = 1 4.494 -> 4.451 ( -0.96%) [ +0.22% +4.34% +0.00% / +2.71% -0.96% -0.24%] index_add_ linear : Elapsed 0.005 ms (4.504 ms / 1000) 4.480 -> 4.524 ( +0.98%) [ +0.00% +3.17% +1.45% / +2.75% +3.84% +0.98%] index_copy_ linear : Elapsed 0.004 ms (4.480 ms / 1000) 4.479 -> 4.487 ( +0.18%) [ +0.00% +3.48% +4.49% / +7.93% +0.18% +0.76%] index_add_ reverse : Elapsed 0.004 ms (4.479 ms / 1000) 4.470 -> 4.486 ( +0.36%) [ +0.00% +3.15% +1.66% / +2.24% +0.36% +5.70%] index_copy_ reverse : Elapsed 0.004 ms (4.470 ms / 1000) 4.453 -> 4.494 ( +0.92%) [ +0.22% +4.92% +0.00% / +4.09% +1.08% +0.92%] index_add_ spread : Elapsed 0.004 ms (4.463 ms / 1000) 4.461 -> 4.515 ( +1.21%) [ +0.00% +3.45% +0.83% / +3.18% +2.91% +1.21%] index_copy_ spread : Elapsed 0.004 ms (4.461 ms / 1000) 4.568 -> 4.490 ( -1.71%) [ +1.49% +1.97% +0.00% / -0.02% -1.71% -1.05%] index_add_ strided 3 : Elapsed 0.005 ms (4.636 ms / 1000) 4.465 -> 4.460 ( -0.11%) [ +0.92% +3.94% +0.00% / +2.64% +0.31% -0.11%] index_copy_ strided 3 : Elapsed 0.005 ms (4.506 ms / 1000) 4.478 -> 4.476 ( -0.04%) [ +0.51% +4.18% +0.00% / +5.67% +5.63% -0.04%] index_add_ perm : Elapsed 0.005 ms (4.501 ms / 1000) 4.509 -> 4.455 ( -1.20%) [ +0.00% +2.75% +2.24% / +2.02% -0.58% -1.20%] index_copy_ perm : Elapsed 0.005 ms (4.509 ms / 1000) 4.417 -> 4.575 ( +3.58%) [ +2.88% +5.48% +0.00% / +4.53% +4.62% +3.58%] index_add_ perm_sorted : Elapsed 0.005 ms (4.544 ms / 1000) 4.471 -> 4.525 ( +1.21%) [ +1.95% +3.56% +0.00% / +2.84% +1.21% +1.52%] index_copy_ perm_sorted : Elapsed 0.005 ms (4.558 ms / 1000) 5.044 -> 5.134 ( +1.78%) [ +3.35% +0.75% +0.00% / +1.90% +1.92% +1.78%] index_select const : Elapsed 0.005 ms (5.213 ms / 1000) 5.061 -> 5.008 ( -1.05%) [ +2.75% +0.02% +0.00% / -0.26% -1.05% +5.77%] index_select wrap : Elapsed 0.005 ms (5.200 ms / 1000) 4.956 -> 4.991 ( +0.71%) [ +3.51% +3.53% +0.00% / +1.29% +0.71% +5.73%] index_select linear : Elapsed 0.005 ms (5.130 ms / 1000) 5.101 -> 5.092 ( -0.18%) [ +0.84% +0.00% +3.12% / -0.18% +0.24% +5.12%] index_select reverse : Elapsed 0.005 ms (5.144 ms / 1000) 5.109 -> 4.994 ( -2.25%) [ +1.23% +0.00% +3.48% / +0.10% -2.25% +5.25%] index_select skip64 : Elapsed 0.005 ms (5.172 ms / 1000) 5.119 -> 4.997 ( -2.38%) [ +1.97% +0.00% +0.08% / -0.74% -2.38% -0.92%] index_select skip256 : Elapsed 0.005 ms (5.220 ms / 1000) 5.105 -> 5.122 ( +0.33%) [ +1.98% +0.00% +1.94% / +0.33% +2.06% +3.60%] index_select spread : Elapsed 0.005 ms (5.206 ms / 1000) 5.062 -> 5.010 ( -1.03%) [ +2.86% +0.00% +0.79% / +1.01% -1.03% +7.96%] index_select random : Elapsed 0.005 ms (5.207 ms / 1000) 4.978 -> 4.991 ( +0.26%) [ +4.72% +2.69% +0.00% / +2.55% +0.26% +10.20%] index_select random_sorted : Elapsed 0.005 ms (5.213 ms / 1000) B = [2, 5] (stride (1, 2)) A = [2, 3] (stride (1, 2)) dim = 1 4.456 -> 4.628 ( +3.86%) [ +0.61% +4.49% +0.00% / +3.86% +3.95% +6.93%] index_add_ linear : Elapsed 0.004 ms (4.483 ms / 1000) 4.435 -> 4.488 ( +1.20%) [ +0.00% +4.26% +3.34% / +6.20% +1.20% +10.94%] index_copy_ linear : Elapsed 0.004 ms (4.435 ms / 1000) 4.402 -> 4.580 ( +4.04%) [ +0.52% +6.16% +0.00% / +4.04% +4.91% +10.27%] index_add_ reverse : Elapsed 0.004 ms (4.425 ms / 1000) 4.416 -> 4.493 ( +1.74%) [ +1.09% +6.57% +0.00% / +3.46% +1.74% +10.44%] index_copy_ reverse : Elapsed 0.004 ms (4.464 ms / 1000) 4.441 -> 4.576 ( +3.04%) [ +0.16% +5.40% +0.00% / +3.74% +3.04% +6.67%] index_add_ spread : Elapsed 0.004 ms (4.448 ms / 1000) 4.560 -> 4.487 ( -1.60%) [ +1.29% +0.00% +0.57% / -0.37% -1.60% +8.60%] index_copy_ spread : Elapsed 0.005 ms (4.619 ms / 1000) 4.460 -> 4.480 ( +0.45%) [ +1.14% +3.90% +0.00% / +3.34% +0.45% +6.05%] index_add_ strided 3 : Elapsed 0.005 ms (4.511 ms / 1000) 4.418 -> 4.608 ( +4.30%) [ +3.80% +4.69% +0.00% / +4.46% +4.30% +20.53%] index_copy_ strided 3 : Elapsed 0.005 ms (4.586 ms / 1000) 4.474 -> 4.488 ( +0.31%) [ +0.00% +4.00% +3.53% / +6.68% +0.31% +11.76%] index_add_ perm : Elapsed 0.004 ms (4.474 ms / 1000) 4.467 -> 4.483 ( +0.36%) [ +4.52% +2.66% +0.00% / +2.53% +0.36% +7.16%] index_copy_ perm : Elapsed 0.005 ms (4.669 ms / 1000) 4.447 -> 4.570 ( +2.77%) [ +0.00% +7.89% +0.13% / +2.77% +6.34% +7.22%] index_add_ perm_sorted : Elapsed 0.004 ms (4.447 ms / 1000) 4.477 -> 4.606 ( +2.88%) [ +0.51% +2.81% +0.00% / +3.66% +2.88% +9.85%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.500 ms / 1000) 5.064 -> 5.048 ( -0.32%) [ +2.78% +0.00% +0.36% / +0.53% -0.32% +5.39%] index_select const : Elapsed 0.005 ms (5.205 ms / 1000) 4.933 -> 4.998 ( +1.32%) [ +3.85% +3.59% +0.00% / +3.61% +1.32% +11.60%] index_select wrap : Elapsed 0.005 ms (5.123 ms / 1000) 4.929 -> 5.157 ( +4.63%) [ +4.30% +3.33% +0.00% / +4.63% +6.74% +21.18%] index_select linear : Elapsed 0.005 ms (5.141 ms / 1000) 5.112 -> 5.005 ( -2.09%) [ +4.03% +0.29% +0.00% / -0.61% -2.09% +9.68%] index_select reverse : Elapsed 0.005 ms (5.318 ms / 1000) 4.936 -> 5.020 ( +1.70%) [ +4.19% +2.78% +0.00% / +2.67% +1.70% +19.45%] index_select skip64 : Elapsed 0.005 ms (5.143 ms / 1000) 4.929 -> 5.134 ( +4.16%) [ +5.44% +3.71% +0.00% / +4.16% +4.79% +9.68%] index_select skip256 : Elapsed 0.005 ms (5.197 ms / 1000) 5.084 -> 5.084 ( +0.00%) [ +2.89% +0.47% +0.00% / +0.00% +0.53% +7.08%] index_select spread : Elapsed 0.005 ms (5.231 ms / 1000) 4.990 -> 5.023 ( +0.66%) [ +3.23% +3.45% +0.00% / +2.53% +0.66% +7.60%] index_select random : Elapsed 0.005 ms (5.151 ms / 1000) 4.958 -> 5.159 ( +4.05%) [ +4.64% +5.49% +0.00% / +4.05% +4.90% +10.17%] index_select random_sorted : Elapsed 0.005 ms (5.188 ms / 1000) out_shape = [5, 2] in_shape = [3, 2] idx_dim = 0 B = [5, 2] (stride (2, 1)) dim = 0 fill_cnt = 3 4.379 -> 4.260 ( -2.72%) [ +0.87% +0.34% +0.00% / -2.37% -2.72% +3.91%] index_fill_ const : Elapsed 0.004 ms (4.417 ms / 1000) 4.196 -> 4.277 ( +1.93%) [ +0.00% +7.63% +0.43% / +1.93% +7.82% +10.51%] index_fill_ linear : Elapsed 0.004 ms (4.196 ms / 1000) 4.179 -> 4.282 ( +2.46%) [ +0.62% +5.00% +0.00% / +2.46% +3.57% +12.75%] index_fill_ reverse : Elapsed 0.004 ms (4.205 ms / 1000) 4.182 -> 4.320 ( +3.30%) [ +2.39% +4.30% +0.00% / +3.30% +6.77% +5.33%] index_fill_ skip64 : Elapsed 0.004 ms (4.282 ms / 1000) 4.230 -> 4.264 ( +0.80%) [ +0.00% +3.81% +3.12% / +1.30% +0.80% +11.09%] index_fill_ skip256 : Elapsed 0.004 ms (4.230 ms / 1000) 4.185 -> 4.229 ( +1.05%) [ +0.62% +4.18% +0.00% / +2.27% +1.05% +9.15%] index_fill_ spread : Elapsed 0.004 ms (4.211 ms / 1000) 4.172 -> 4.320 ( +3.55%) [ +1.94% +5.87% +0.00% / +3.55% +5.30% +11.53%] index_fill_ strided 3 : Elapsed 0.004 ms (4.253 ms / 1000) 4.198 -> 4.253 ( +1.31%) [ +1.17% +4.29% +0.00% / +1.83% +1.31% +7.05%] index_fill_ random : Elapsed 0.004 ms (4.247 ms / 1000) 4.242 -> 4.229 ( -0.31%) [ +0.00% +3.35% +2.71% / +0.97% -0.31% +7.31%] index_fill_ random_sorted : Elapsed 0.004 ms (4.242 ms / 1000) 4.239 -> 4.264 ( +0.59%) [ +0.87% +2.74% +0.00% / +0.99% +0.59% +10.80%] index_fill_ perm : Elapsed 0.004 ms (4.276 ms / 1000) 4.229 -> 4.284 ( +1.30%) [ +0.00% +5.49% +0.31% / +1.30% +3.97% +8.84%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.229 ms / 1000) B = [5, 2] (stride (2, 1)) A = [3, 2] (stride (2, 1)) dim = 0 4.437 -> 4.482 ( +1.01%) [ +0.00% +5.68% +5.07% / +4.12% +1.01% +8.86%] index_add_ linear : Elapsed 0.004 ms (4.437 ms / 1000) 4.452 -> 4.436 ( -0.36%) [ +0.00% +2.85% +0.61% / +3.39% -0.36% +10.49%] index_copy_ linear : Elapsed 0.004 ms (4.452 ms / 1000) 4.516 -> 4.575 ( +1.31%) [ +0.75% +3.26% +0.00% / +2.24% +1.31% +6.64%] index_add_ reverse : Elapsed 0.005 ms (4.550 ms / 1000) 4.448 -> 4.455 ( +0.16%) [ +0.00% +3.21% +1.33% / +3.12% +0.16% +8.66%] index_copy_ reverse : Elapsed 0.004 ms (4.448 ms / 1000) 4.406 -> 4.455 ( +1.11%) [ +0.00% +5.33% +5.95% / +4.56% +1.11% +51.97%] index_add_ spread : Elapsed 0.004 ms (4.406 ms / 1000) 4.432 -> 4.461 ( +0.65%) [ +0.00% +4.20% +0.97% / +5.17% +0.65% +8.30%] index_copy_ spread : Elapsed 0.004 ms (4.432 ms / 1000) 4.403 -> 4.595 ( +4.36%) [ +4.47% +5.86% +0.00% / +5.27% +4.36% +9.22%] index_add_ strided 3 : Elapsed 0.005 ms (4.600 ms / 1000) 4.535 -> 4.519 ( -0.35%) [ +1.35% +2.25% +0.00% / +0.31% -0.35% +6.00%] index_copy_ strided 3 : Elapsed 0.005 ms (4.596 ms / 1000) 4.465 -> 4.543 ( +1.75%) [ +0.00% +4.23% +0.69% / +3.31% +1.75% +10.24%] index_add_ perm : Elapsed 0.004 ms (4.465 ms / 1000) 4.489 -> 4.541 ( +1.16%) [ +0.00% +2.61% +0.09% / +2.32% +1.16% +8.13%] index_copy_ perm : Elapsed 0.004 ms (4.489 ms / 1000) 4.415 -> 4.477 ( +1.40%) [ +0.61% +6.02% +0.00% / +4.51% +1.40% +10.89%] index_add_ perm_sorted : Elapsed 0.004 ms (4.442 ms / 1000) 4.480 -> 4.438 ( -0.94%) [ +0.00% +3.55% +2.83% / +0.92% -0.94% +10.07%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.480 ms / 1000) 4.985 -> 4.999 ( +0.28%) [ +3.71% +1.79% +0.00% / +1.52% +0.28% +9.55%] index_select const : Elapsed 0.005 ms (5.170 ms / 1000) 4.929 -> 5.084 ( +3.14%) [ +4.59% +2.94% +0.00% / +3.14% +5.27% +9.21%] index_select wrap : Elapsed 0.005 ms (5.155 ms / 1000) 5.042 -> 5.040 ( -0.04%) [ +5.51% +3.17% +0.00% / +0.60% -0.04% +8.57%] index_select linear : Elapsed 0.005 ms (5.320 ms / 1000) 4.939 -> 5.099 ( +3.24%) [ +6.34% +3.16% +0.00% / +3.24% +5.08% +8.42%] index_select reverse : Elapsed 0.005 ms (5.252 ms / 1000) 4.943 -> 5.110 ( +3.38%) [ +9.47% +2.89% +0.00% / +3.38% +5.12% +9.45%] index_select skip64 : Elapsed 0.005 ms (5.411 ms / 1000) 5.053 -> 5.022 ( -0.61%) [ +3.15% +1.23% +0.00% / +0.36% -0.61% +9.99%] index_select skip256 : Elapsed 0.005 ms (5.212 ms / 1000) 4.952 -> 4.990 ( +0.77%) [ +5.23% +7.05% +0.00% / +3.21% +0.77% +9.81%] index_select spread : Elapsed 0.005 ms (5.211 ms / 1000) 4.987 -> 5.111 ( +2.49%) [ +5.27% +2.27% +0.00% / +2.49% +3.41% +9.34%] index_select random : Elapsed 0.005 ms (5.250 ms / 1000) 4.925 -> 4.997 ( +1.46%) [ +5.20% +3.80% +0.00% / +2.54% +1.46% +12.28%] index_select random_sorted : Elapsed 0.005 ms (5.181 ms / 1000) B = [5, 2] (stride (2, 1)) A = [3, 2] (stride (1, 3)) dim = 0 4.436 -> 4.436 ( +0.00%) [ +0.00% +5.39% +3.27% / +4.40% +0.00% +7.57%] index_add_ linear : Elapsed 0.004 ms (4.436 ms / 1000) 4.456 -> 4.531 ( +1.68%) [ +0.00% +2.20% +0.45% / +3.05% +1.68% +8.55%] index_copy_ linear : Elapsed 0.004 ms (4.456 ms / 1000) 4.416 -> 4.436 ( +0.45%) [ +3.17% +5.89% +0.00% / +3.96% +0.45% +9.15%] index_add_ reverse : Elapsed 0.005 ms (4.556 ms / 1000) 4.564 -> 4.440 ( -2.72%) [ +0.22% +1.58% +0.00% / +0.94% -2.72% +6.24%] index_copy_ reverse : Elapsed 0.005 ms (4.574 ms / 1000) 4.415 -> 4.447 ( +0.72%) [ +0.48% +5.66% +0.00% / +4.01% +0.72% +9.72%] index_add_ spread : Elapsed 0.004 ms (4.436 ms / 1000) 4.442 -> 4.597 ( +3.49%) [ +0.36% +4.86% +0.00% / +5.07% +3.49% +6.64%] index_copy_ spread : Elapsed 0.004 ms (4.458 ms / 1000) 4.420 -> 4.428 ( +0.18%) [ +0.57% +6.27% +0.00% / +5.72% +0.18% +7.85%] index_add_ strided 3 : Elapsed 0.004 ms (4.445 ms / 1000) 4.473 -> 4.557 ( +1.88%) [ +0.00% +3.60% +3.20% / +2.10% +1.88% +6.60%] index_copy_ strided 3 : Elapsed 0.004 ms (4.473 ms / 1000) 4.457 -> 4.562 ( +2.36%) [ +3.57% +5.63% +0.00% / +3.23% +2.36% +7.72%] index_add_ perm : Elapsed 0.005 ms (4.616 ms / 1000) 4.452 -> 4.564 ( +2.52%) [ +1.01% +4.51% +0.00% / +3.23% +2.52% +8.72%] index_copy_ perm : Elapsed 0.004 ms (4.497 ms / 1000) 4.460 -> 4.498 ( +0.85%) [ +0.00% +7.09% +2.02% / +2.53% +0.85% +5.70%] index_add_ perm_sorted : Elapsed 0.004 ms (4.460 ms / 1000) 4.470 -> 4.457 ( -0.29%) [ +0.02% +3.31% +0.00% / +1.43% -0.29% +3.53%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.471 ms / 1000) 4.937 -> 5.085 ( +3.00%) [ +4.98% +3.40% +0.00% / +3.00% +3.48% +7.47%] index_select const : Elapsed 0.005 ms (5.183 ms / 1000) 4.924 -> 5.062 ( +2.80%) [ +8.23% +4.16% +0.00% / +3.31% +2.80% +9.52%] index_select wrap : Elapsed 0.005 ms (5.329 ms / 1000) 5.055 -> 5.032 ( -0.45%) [ +2.81% +1.11% +0.00% / +0.71% -0.45% +1.50%] index_select linear : Elapsed 0.005 ms (5.197 ms / 1000) 4.943 -> 5.071 ( +2.59%) [ +5.73% +2.35% +0.00% / +2.59% +4.53% +9.51%] index_select reverse : Elapsed 0.005 ms (5.226 ms / 1000) 5.055 -> 4.991 ( -1.27%) [ +3.70% +2.45% +0.00% / +1.01% -1.27% +4.09%] index_select skip64 : Elapsed 0.005 ms (5.242 ms / 1000) 5.112 -> 4.992 ( -2.35%) [ +4.23% +9.72% +0.00% / -1.72% -2.35% +1.33%] index_select skip256 : Elapsed 0.005 ms (5.328 ms / 1000) 4.960 -> 5.088 ( +2.58%) [ +6.96% +5.81% +0.00% / +3.39% +2.58% +7.26%] index_select spread : Elapsed 0.005 ms (5.305 ms / 1000) 4.964 -> 5.018 ( +1.09%) [ +4.67% +2.50% +0.00% / +2.90% +1.09% +7.33%] index_select random : Elapsed 0.005 ms (5.196 ms / 1000) 5.050 -> 5.064 ( +0.28%) [ +5.43% +0.00% +1.39% / +0.81% +0.28% +6.61%] index_select random_sorted : Elapsed 0.005 ms (5.324 ms / 1000) B = [5, 2] (stride (1, 5)) dim = 0 fill_cnt = 3 4.163 -> 4.262 ( +2.38%) [ +1.68% +3.84% +0.00% / +3.48% +2.38% +7.57%] index_fill_ const : Elapsed 0.004 ms (4.233 ms / 1000) 4.172 -> 4.296 ( +2.97%) [+10.57% +4.58% +0.00% / +5.25% +2.97% +4.31%] index_fill_ linear : Elapsed 0.005 ms (4.613 ms / 1000) 4.220 -> 4.260 ( +0.95%) [ +5.73% +3.98% +0.00% / +1.11% +0.95% +1.21%] index_fill_ reverse : Elapsed 0.004 ms (4.462 ms / 1000) 4.283 -> 4.214 ( -1.61%) [ +7.94% +2.22% +0.00% / -0.75% -1.61% -0.42%] index_fill_ skip64 : Elapsed 0.005 ms (4.623 ms / 1000) 4.179 -> 4.241 ( +1.48%) [ +6.77% +4.59% +0.00% / +2.54% +3.69% +1.48%] index_fill_ skip256 : Elapsed 0.004 ms (4.462 ms / 1000) 4.164 -> 4.218 ( +1.30%) [ +7.73% +7.95% +0.00% / +1.32% +1.30% +1.51%] index_fill_ spread : Elapsed 0.004 ms (4.486 ms / 1000) 4.344 -> 4.230 ( -2.62%) [ +2.51% +0.92% +0.00% / -2.62% -2.39% -0.78%] index_fill_ strided 3 : Elapsed 0.004 ms (4.453 ms / 1000) 4.178 -> 4.233 ( +1.32%) [ +5.17% +4.60% +0.00% / +2.03% +1.32% +3.23%] index_fill_ random : Elapsed 0.004 ms (4.394 ms / 1000) 4.201 -> 4.237 ( +0.86%) [ +1.40% +3.33% +0.00% / +2.00% +3.52% +0.86%] index_fill_ random_sorted : Elapsed 0.004 ms (4.260 ms / 1000) 4.211 -> 4.215 ( +0.09%) [ +3.25% +4.08% +0.00% / +0.93% +0.09% +0.97%] index_fill_ perm : Elapsed 0.004 ms (4.348 ms / 1000) 4.236 -> 4.220 ( -0.38%) [ +0.00% +4.18% +4.96% / +0.92% -0.38% +0.09%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.236 ms / 1000) B = [5, 2] (stride (1, 5)) A = [3, 2] (stride (2, 1)) dim = 0 4.432 -> 4.538 ( +2.39%) [ +3.45% +5.87% +0.00% / +5.32% +2.57% +2.39%] index_add_ linear : Elapsed 0.005 ms (4.585 ms / 1000) 4.469 -> 4.519 ( +1.12%) [ +6.76% +4.16% +0.00% / +5.12% +1.90% +1.12%] index_copy_ linear : Elapsed 0.005 ms (4.771 ms / 1000) 4.506 -> 4.492 ( -0.31%) [ +0.00% +4.17% +0.78% / +2.02% +0.00% -0.31%] index_add_ reverse : Elapsed 0.005 ms (4.506 ms / 1000) 4.538 -> 4.474 ( -1.41%) [ +2.27% +1.23% +0.00% / +1.01% -1.41% +1.45%] index_copy_ reverse : Elapsed 0.005 ms (4.641 ms / 1000) 4.440 -> 4.550 ( +2.48%) [ +0.00% +5.83% +0.20% / +6.44% +2.97% +2.48%] index_add_ spread : Elapsed 0.004 ms (4.440 ms / 1000) 4.458 -> 4.487 ( +0.65%) [ +0.56% +7.81% +0.00% / +4.64% +0.65% +5.45%] index_copy_ spread : Elapsed 0.004 ms (4.483 ms / 1000) 4.431 -> 4.514 ( +1.87%) [ +0.00% +5.12% +3.79% / +7.40% +3.72% +1.87%] index_add_ strided 3 : Elapsed 0.004 ms (4.431 ms / 1000) 4.415 -> 4.464 ( +1.11%) [ +0.79% +5.35% +0.00% / +4.17% +1.11% +2.31%] index_copy_ strided 3 : Elapsed 0.004 ms (4.450 ms / 1000) 4.441 -> 4.461 ( +0.45%) [ +0.00% +5.07% +0.00% / +4.64% +3.67% +0.45%] index_add_ perm : Elapsed 0.004 ms (4.441 ms / 1000) 4.431 -> 4.449 ( +0.41%) [ +0.11% +4.31% +0.00% / +1.81% +0.41% +1.20%] index_copy_ perm : Elapsed 0.004 ms (4.436 ms / 1000) 4.435 -> 4.474 ( +0.88%) [ +0.00% +4.98% +3.40% / +3.74% +0.88% +0.97%] index_add_ perm_sorted : Elapsed 0.004 ms (4.435 ms / 1000) 4.434 -> 4.471 ( +0.83%) [ +1.47% +4.13% +0.00% / +3.41% +4.49% +0.83%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.499 ms / 1000) 4.963 -> 5.002 ( +0.79%) [ +5.10% +2.84% +0.00% / +2.50% +1.29% +0.79%] index_select const : Elapsed 0.005 ms (5.216 ms / 1000) 5.090 -> 5.018 ( -1.41%) [ +1.93% +0.00% +1.43% / +0.20% -1.22% -1.41%] index_select wrap : Elapsed 0.005 ms (5.188 ms / 1000) 4.940 -> 5.045 ( +2.13%) [ +5.24% +3.66% +0.00% / +3.28% +3.32% +2.13%] index_select linear : Elapsed 0.005 ms (5.199 ms / 1000) 4.987 -> 4.989 ( +0.04%) [ +3.63% +2.67% +0.00% / +2.79% +0.04% +0.72%] index_select reverse : Elapsed 0.005 ms (5.168 ms / 1000) 5.091 -> 5.014 ( -1.51%) [ +1.08% +0.00% +4.83% / -0.33% -1.51% +1.45%] index_select skip64 : Elapsed 0.005 ms (5.146 ms / 1000) 4.928 -> 5.065 ( +2.78%) [ +4.36% +3.47% +0.00% / +4.00% +3.81% +2.78%] index_select skip256 : Elapsed 0.005 ms (5.143 ms / 1000) 4.918 -> 5.013 ( +1.93%) [ +5.41% +4.66% +0.00% / +4.03% +1.93% +2.60%] index_select spread : Elapsed 0.005 ms (5.184 ms / 1000) 5.056 -> 5.002 ( -1.07%) [ +2.12% +0.83% +0.00% / +0.87% -1.07% +0.73%] index_select random : Elapsed 0.005 ms (5.163 ms / 1000) 4.921 -> 5.031 ( +2.24%) [ +5.79% +3.80% +0.00% / +4.19% +2.34% +2.24%] index_select random_sorted : Elapsed 0.005 ms (5.206 ms / 1000) B = [5, 2] (stride (1, 5)) A = [3, 2] (stride (1, 3)) dim = 0 4.436 -> 4.505 ( +1.56%) [ +5.18% +5.12% +0.00% / +3.90% +2.21% +1.56%] index_add_ linear : Elapsed 0.005 ms (4.666 ms / 1000) 4.474 -> 4.466 ( -0.18%) [ +0.74% +3.33% +0.00% / +2.28% -0.18% -0.11%] index_copy_ linear : Elapsed 0.005 ms (4.507 ms / 1000) 4.469 -> 4.483 ( +0.31%) [ +0.00% +3.74% +2.80% / +3.24% +0.31% +6.20%] index_add_ reverse : Elapsed 0.004 ms (4.469 ms / 1000) 4.443 -> 4.455 ( +0.27%) [ +0.92% +2.95% +0.00% / +7.56% +2.93% +0.27%] index_copy_ reverse : Elapsed 0.004 ms (4.484 ms / 1000) 4.432 -> 4.480 ( +1.08%) [ +0.00% +5.01% +3.50% / +3.47% +1.08% +1.24%] index_add_ spread : Elapsed 0.004 ms (4.432 ms / 1000) 4.496 -> 4.472 ( -0.53%) [ +0.00% +3.56% +2.49% / +1.98% -0.09% -0.53%] index_copy_ spread : Elapsed 0.004 ms (4.496 ms / 1000) 4.453 -> 4.499 ( +1.03%) [ +0.94% +3.73% +0.00% / +4.96% +1.03% +1.71%] index_add_ strided 3 : Elapsed 0.004 ms (4.495 ms / 1000) 4.487 -> 4.516 ( +0.65%) [ +0.67% +3.66% +0.00% / +2.67% +2.45% +0.65%] index_copy_ strided 3 : Elapsed 0.005 ms (4.517 ms / 1000) 4.429 -> 4.425 ( -0.09%) [ +0.41% +5.60% +0.00% / +4.40% +0.81% -0.09%] index_add_ perm : Elapsed 0.004 ms (4.447 ms / 1000) 4.482 -> 4.471 ( -0.25%) [ +0.00% +3.24% +1.94% / +6.89% -0.07% -0.25%] index_copy_ perm : Elapsed 0.004 ms (4.482 ms / 1000) 4.424 -> 4.453 ( +0.66%) [ +1.42% +5.31% +0.00% / +4.18% +4.36% +0.66%] index_add_ perm_sorted : Elapsed 0.004 ms (4.487 ms / 1000) 4.431 -> 4.455 ( +0.54%) [ +1.40% +2.69% +0.00% / +3.27% +0.54% +1.62%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.493 ms / 1000) 5.063 -> 5.041 ( -0.43%) [ +2.78% +0.00% +0.91% / -0.12% -0.43% +0.22%] index_select const : Elapsed 0.005 ms (5.204 ms / 1000) 4.945 -> 5.070 ( +2.53%) [ +5.88% +10.13% +0.00% / +5.24% +2.53% +5.44%] index_select wrap : Elapsed 0.005 ms (5.236 ms / 1000) 4.936 -> 4.991 ( +1.11%) [ +5.04% +20.77% +0.00% / +3.65% +1.11% +2.67%] index_select linear : Elapsed 0.005 ms (5.185 ms / 1000) 4.981 -> 4.991 ( +0.20%) [ +4.84% +2.15% +0.00% / +1.85% +0.20% +2.31%] index_select reverse : Elapsed 0.005 ms (5.222 ms / 1000) 5.037 -> 5.006 ( -0.62%) [ +3.47% +2.40% +0.00% / +3.24% -0.62% +1.81%] index_select skip64 : Elapsed 0.005 ms (5.212 ms / 1000) 4.929 -> 5.083 ( +3.12%) [ +5.19% +5.11% +0.00% / +3.12% +3.37% +3.51%] index_select skip256 : Elapsed 0.005 ms (5.185 ms / 1000) 4.960 -> 5.061 ( +2.04%) [ +5.46% +3.15% +0.00% / +2.04% +2.40% +2.52%] index_select spread : Elapsed 0.005 ms (5.231 ms / 1000) 5.112 -> 5.039 ( -1.43%) [ +1.58% +0.00% +0.78% / -0.14% -1.43% -1.37%] index_select random : Elapsed 0.005 ms (5.193 ms / 1000) 4.921 -> 5.018 ( +1.97%) [ +7.58% +3.47% +0.00% / +3.13% +4.90% +1.97%] index_select random_sorted : Elapsed 0.005 ms (5.294 ms / 1000) out_shape = [3, 5] in_shape = [3, 2] idx_dim = 1 B = [3, 5] (stride (5, 1)) dim = 1 fill_cnt = 2 4.210 -> 4.271 ( +1.45%) [ +3.87% +2.61% +0.00% / +1.85% +4.18% +1.45%] index_fill_ const : Elapsed 0.004 ms (4.373 ms / 1000) 4.222 -> 4.236 ( +0.33%) [ +0.00% +3.27% +2.91% / +2.08% +0.33% +2.25%] index_fill_ linear : Elapsed 0.004 ms (4.222 ms / 1000) 4.163 -> 4.226 ( +1.51%) [ +1.51% +5.67% +0.00% / +1.51% +4.64% +3.51%] index_fill_ reverse : Elapsed 0.004 ms (4.226 ms / 1000) 4.204 -> 4.225 ( +0.50%) [ +0.12% +3.07% +0.00% / +0.50% +0.62% +3.57%] index_fill_ skip64 : Elapsed 0.004 ms (4.209 ms / 1000) 4.221 -> 4.249 ( +0.66%) [ +0.00% +2.53% +1.18% / +4.15% +0.66% +1.85%] index_fill_ skip256 : Elapsed 0.004 ms (4.221 ms / 1000) 4.217 -> 4.231 ( +0.33%) [ +0.00% +1.90% +4.81% / +1.28% +0.33% +1.71%] index_fill_ spread : Elapsed 0.004 ms (4.217 ms / 1000) 4.220 -> 4.244 ( +0.57%) [ +1.85% +1.87% +0.00% / +1.28% +3.29% +0.57%] index_fill_ strided 3 : Elapsed 0.004 ms (4.298 ms / 1000) 4.158 -> 4.251 ( +2.24%) [ +0.00% +3.75% +0.99% / +2.72% +2.74% +2.24%] index_fill_ random : Elapsed 0.004 ms (4.158 ms / 1000) 4.194 -> 4.231 ( +0.88%) [ +0.00% +2.81% +1.96% / +0.88% +1.19% +2.43%] index_fill_ random_sorted : Elapsed 0.004 ms (4.194 ms / 1000) 4.189 -> 4.271 ( +1.96%) [ +0.00% +6.09% +0.29% / +1.96% +2.65% +2.96%] index_fill_ perm : Elapsed 0.004 ms (4.189 ms / 1000) 4.224 -> 4.263 ( +0.92%) [ +0.00% +1.61% +0.02% / +0.92% +3.12% +1.09%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.224 ms / 1000) B = [3, 5] (stride (5, 1)) A = [3, 2] (stride (2, 1)) dim = 1 4.441 -> 4.479 ( +0.86%) [ +0.00% +4.53% +0.59% / +5.31% +0.86% +2.77%] index_add_ linear : Elapsed 0.004 ms (4.441 ms / 1000) 4.467 -> 4.504 ( +0.83%) [ +0.00% +4.10% +3.38% / +8.91% +0.83% +4.12%] index_copy_ linear : Elapsed 0.004 ms (4.467 ms / 1000) 4.449 -> 4.534 ( +1.91%) [ +0.00% +7.06% +1.19% / +6.29% +3.64% +1.91%] index_add_ reverse : Elapsed 0.004 ms (4.449 ms / 1000) 4.438 -> 4.475 ( +0.83%) [ +0.00% +4.30% +1.62% / +2.93% +0.83% +4.19%] index_copy_ reverse : Elapsed 0.004 ms (4.438 ms / 1000) 4.497 -> 4.460 ( -0.82%) [ +0.00% +3.18% +1.25% / +7.76% +1.85% -0.82%] index_add_ spread : Elapsed 0.004 ms (4.497 ms / 1000) 4.608 -> 4.498 ( -2.39%) [ +0.00% +0.69% +1.02% / -0.17% -2.37% -2.39%] index_copy_ spread : Elapsed 0.005 ms (4.608 ms / 1000) 4.448 -> 4.469 ( +0.47%) [ +0.00% +5.22% +2.18% / +3.30% +4.43% +0.47%] index_add_ strided 3 : Elapsed 0.004 ms (4.448 ms / 1000) 4.443 -> 4.481 ( +0.86%) [ +0.00% +3.06% +1.67% / +2.45% +0.92% +0.86%] index_copy_ strided 3 : Elapsed 0.004 ms (4.443 ms / 1000) 4.432 -> 4.511 ( +1.78%) [ +0.00% +4.17% +4.67% / +3.16% +1.78% +2.19%] index_add_ perm : Elapsed 0.004 ms (4.432 ms / 1000) 4.463 -> 4.479 ( +0.36%) [ +2.29% +2.78% +0.00% / +1.32% +3.54% +0.36%] index_copy_ perm : Elapsed 0.005 ms (4.565 ms / 1000) 4.425 -> 4.485 ( +1.36%) [ +0.00% +4.63% +0.86% / +2.89% +1.36% +2.92%] index_add_ perm_sorted : Elapsed 0.004 ms (4.425 ms / 1000) 4.452 -> 4.467 ( +0.34%) [ +0.00% +3.01% +1.33% / +1.48% +0.34% +2.81%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.452 ms / 1000) 5.139 -> 5.021 ( -2.30%) [ +1.54% +0.00% +1.63% / -0.95% -2.30% +0.49%] index_select const : Elapsed 0.005 ms (5.218 ms / 1000) 5.124 -> 5.062 ( -1.21%) [ +0.84% +0.00% +3.65% / -1.21% -0.59% -1.13%] index_select wrap : Elapsed 0.005 ms (5.167 ms / 1000) 5.100 -> 5.008 ( -1.80%) [ +0.96% +0.00% +1.94% / +2.14% -1.80% +0.37%] index_select linear : Elapsed 0.005 ms (5.149 ms / 1000) 5.116 -> 5.016 ( -1.95%) [ +0.00% +0.16% +1.02% / +0.29% -1.95% -0.49%] index_select reverse : Elapsed 0.005 ms (5.116 ms / 1000) 5.119 -> 5.058 ( -1.19%) [ +0.20% +0.00% +2.29% / -1.19% +1.15% -1.13%] index_select skip64 : Elapsed 0.005 ms (5.129 ms / 1000) 4.960 -> 4.991 ( +0.62%) [ +2.78% +2.74% +0.00% / +2.04% +0.62% +3.17%] index_select skip256 : Elapsed 0.005 ms (5.098 ms / 1000) 5.107 -> 5.056 ( -1.00%) [ +0.00% +0.98% +2.55% / +0.25% -1.00% -0.16%] index_select spread : Elapsed 0.005 ms (5.107 ms / 1000) 4.959 -> 5.062 ( +2.08%) [ +4.05% +7.22% +0.00% / +2.08% +4.05% +5.28%] index_select random : Elapsed 0.005 ms (5.160 ms / 1000) 4.927 -> 4.991 ( +1.30%) [ +4.18% +2.44% +0.00% / +2.86% +1.30% +2.54%] index_select random_sorted : Elapsed 0.005 ms (5.133 ms / 1000) B = [3, 5] (stride (5, 1)) A = [3, 2] (stride (1, 3)) dim = 1 4.441 -> 4.435 ( -0.14%) [ +0.00% +4.66% +4.44% / +7.72% -0.14% +1.73%] index_add_ linear : Elapsed 0.004 ms (4.441 ms / 1000) 4.451 -> 4.507 ( +1.26%) [ +0.00% +5.23% +5.03% / +4.20% +3.06% +1.26%] index_copy_ linear : Elapsed 0.004 ms (4.451 ms / 1000) 4.443 -> 4.509 ( +1.49%) [ +0.00% +5.06% +2.86% / +3.38% +1.49% +4.52%] index_add_ reverse : Elapsed 0.004 ms (4.443 ms / 1000) 4.515 -> 4.501 ( -0.31%) [ +0.00% +1.71% +4.39% / +3.15% -0.31% +0.04%] index_copy_ reverse : Elapsed 0.005 ms (4.515 ms / 1000) 4.386 -> 4.589 ( +4.63%) [ +0.00% +6.09% +6.22% / +4.63% +5.84% +6.61%] index_add_ spread : Elapsed 0.004 ms (4.386 ms / 1000) 4.481 -> 4.498 ( +0.38%) [ +0.00% +2.70% +4.35% / +2.57% +3.48% +0.38%] index_copy_ spread : Elapsed 0.004 ms (4.481 ms / 1000) 4.451 -> 4.499 ( +1.08%) [ +0.00% +4.49% +2.52% / +1.86% +2.22% +1.08%] index_add_ strided 3 : Elapsed 0.004 ms (4.451 ms / 1000) 4.432 -> 4.477 ( +1.02%) [ +0.00% +3.16% +8.42% / +2.80% +1.02% +2.89%] index_copy_ strided 3 : Elapsed 0.004 ms (4.432 ms / 1000) 4.548 -> 4.527 ( -0.46%) [ +0.95% +1.01% +0.00% / -0.46% +0.02% +3.41%] index_add_ perm : Elapsed 0.005 ms (4.591 ms / 1000) 4.483 -> 4.430 ( -1.18%) [ +3.37% +5.87% +0.00% / +1.63% -1.18% +0.51%] index_copy_ perm : Elapsed 0.005 ms (4.634 ms / 1000) 4.555 -> 4.471 ( -1.84%) [ +1.73% +2.04% +0.00% / -0.66% +1.80% -1.84%] index_add_ perm_sorted : Elapsed 0.005 ms (4.634 ms / 1000) 4.482 -> 4.505 ( +0.51%) [ +0.00% +4.19% +2.81% / +1.54% +0.51% +1.54%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.482 ms / 1000) 5.002 -> 5.060 ( +1.16%) [ +4.52% +1.62% +0.00% / +1.16% +3.64% +1.46%] index_select const : Elapsed 0.005 ms (5.228 ms / 1000) 5.028 -> 5.010 ( -0.36%) [ +2.86% +2.13% +0.00% / +1.09% -0.36% +0.36%] index_select wrap : Elapsed 0.005 ms (5.172 ms / 1000) 5.087 -> 5.021 ( -1.30%) [ +0.83% +0.37% +0.00% / +1.26% -1.30% +6.86%] index_select linear : Elapsed 0.005 ms (5.129 ms / 1000) 5.035 -> 5.030 ( -0.10%) [ +2.01% +1.31% +0.00% / +0.66% +2.34% -0.10%] index_select reverse : Elapsed 0.005 ms (5.136 ms / 1000) 5.035 -> 4.990 ( -0.89%) [ +1.15% +1.51% +0.00% / +0.70% -0.89% +5.66%] index_select skip64 : Elapsed 0.005 ms (5.093 ms / 1000) 5.075 -> 4.991 ( -1.66%) [ +1.87% +0.00% +4.18% / -0.30% -1.66% +0.33%] index_select skip256 : Elapsed 0.005 ms (5.170 ms / 1000) 4.932 -> 5.055 ( +2.49%) [ +7.40% +3.93% +0.00% / +2.49% +3.20% +6.85%] index_select spread : Elapsed 0.005 ms (5.297 ms / 1000) 4.922 -> 4.993 ( +1.44%) [ +5.83% +4.14% +0.00% / +3.51% +1.44% +8.11%] index_select random : Elapsed 0.005 ms (5.209 ms / 1000) 5.093 -> 4.991 ( -2.00%) [ +2.65% +0.00% +0.75% / +1.06% -2.00% +3.30%] index_select random_sorted : Elapsed 0.005 ms (5.228 ms / 1000) B = [3, 5] (stride (1, 3)) dim = 1 fill_cnt = 2 bad 4.179 -> 4.389 ( +5.03%) [ +2.49% +5.74% +0.00% / +8.16% +5.03% +8.04%] index_fill_ const : Elapsed 0.004 ms (4.283 ms / 1000) 4.247 -> 4.212 ( -0.82%) [ +4.33% +1.95% +0.00% / +0.45% -0.82% +4.76%] index_fill_ linear : Elapsed 0.004 ms (4.431 ms / 1000) 4.300 -> 4.275 ( -0.58%) [ +0.00% +1.65% +1.70% / -0.58% +0.60% +3.33%] index_fill_ reverse : Elapsed 0.004 ms (4.300 ms / 1000) 4.259 -> 4.224 ( -0.82%) [ +0.00% +2.23% +2.35% / +1.20% -0.82% +2.37%] index_fill_ skip64 : Elapsed 0.004 ms (4.259 ms / 1000) 4.236 -> 4.250 ( +0.33%) [ +0.00% +2.57% +4.37% / +0.33% +3.56% +1.44%] index_fill_ skip256 : Elapsed 0.004 ms (4.236 ms / 1000) 4.228 -> 4.225 ( -0.07%) [ +0.00% +2.72% +4.56% / +0.52% -0.07% +0.78%] index_fill_ spread : Elapsed 0.004 ms (4.228 ms / 1000) 4.315 -> 4.217 ( -2.27%) [ +0.00% +1.41% +5.26% / +1.81% -2.27% -0.97%] index_fill_ strided 3 : Elapsed 0.004 ms (4.315 ms / 1000) 4.247 -> 4.247 ( +0.00%) [ +0.71% +3.44% +0.00% / +0.00% +0.54% +1.13%] index_fill_ random : Elapsed 0.004 ms (4.277 ms / 1000) 4.216 -> 4.229 ( +0.31%) [ +0.09% +2.44% +0.00% / +0.31% +1.54% +0.83%] index_fill_ random_sorted : Elapsed 0.004 ms (4.220 ms / 1000) 4.125 -> 4.221 ( +2.33%) [ +3.22% +5.62% +0.00% / +2.33% +3.13% +6.11%] index_fill_ perm : Elapsed 0.004 ms (4.258 ms / 1000) 4.260 -> 4.233 ( -0.63%) [ +0.00% +2.02% +1.85% / +0.40% -0.63% +6.22%] index_fill_ perm_sorted : Elapsed 0.004 ms (4.260 ms / 1000) B = [3, 5] (stride (1, 3)) A = [3, 2] (stride (2, 1)) dim = 1 4.455 -> 4.556 ( +2.27%) [ +0.52% +4.67% +0.00% / +2.27% +4.26% +4.44%] index_add_ linear : Elapsed 0.004 ms (4.478 ms / 1000) 4.510 -> 4.477 ( -0.73%) [ +0.00% +2.86% +0.71% / +1.82% +0.07% -0.73%] index_copy_ linear : Elapsed 0.005 ms (4.510 ms / 1000) 4.460 -> 4.555 ( +2.13%) [ +0.00% +5.38% +3.90% / +3.63% +2.85% +2.13%] index_add_ reverse : Elapsed 0.004 ms (4.460 ms / 1000) 4.462 -> 4.485 ( +0.52%) [ +0.90% +4.39% +0.00% / +2.08% +0.90% +0.52%] index_copy_ reverse : Elapsed 0.005 ms (4.502 ms / 1000) 4.420 -> 4.472 ( +1.18%) [ +0.00% +6.04% +0.16% / +3.26% +5.18% +1.18%] index_add_ spread : Elapsed 0.004 ms (4.420 ms / 1000) 4.444 -> 4.470 ( +0.59%) [ +0.45% +4.05% +0.00% / +2.23% +0.59% +1.67%] index_copy_ spread : Elapsed 0.004 ms (4.464 ms / 1000) 4.445 -> 4.496 ( +1.15%) [ +0.00% +5.69% +3.76% / +3.67% +1.15% +5.85%] index_add_ strided 3 : Elapsed 0.004 ms (4.445 ms / 1000) 4.435 -> 4.504 ( +1.56%) [ +1.20% +4.96% +0.00% / +3.13% +3.18% +1.56%] index_copy_ strided 3 : Elapsed 0.004 ms (4.488 ms / 1000) 4.437 -> 4.493 ( +1.26%) [ +0.00% +5.54% +1.51% / +2.55% +1.26% +5.41%] index_add_ perm : Elapsed 0.004 ms (4.437 ms / 1000) 4.454 -> 4.458 ( +0.09%) [ +0.00% +4.76% +4.98% / +3.44% +0.09% +2.58%] index_copy_ perm : Elapsed 0.004 ms (4.454 ms / 1000) 4.481 -> 4.474 ( -0.16%) [ +3.15% +6.72% +0.00% / +2.01% +1.21% -0.16%] index_add_ perm_sorted : Elapsed 0.005 ms (4.622 ms / 1000) 4.466 -> 4.491 ( +0.56%) [ +4.30% +3.20% +0.00% / +1.84% +4.19% +0.56%] index_copy_ perm_sorted : Elapsed 0.005 ms (4.658 ms / 1000) 4.949 -> 5.020 ( +1.43%) [ +4.18% +3.44% +0.00% / +3.48% +1.43% +2.02%] index_select const : Elapsed 0.005 ms (5.156 ms / 1000) 5.075 -> 5.006 ( -1.36%) [ +2.46% +0.81% +0.00% / +1.26% -1.36% -0.14%] index_select wrap : Elapsed 0.005 ms (5.200 ms / 1000) 4.948 -> 5.032 ( +1.70%) [ +4.37% +3.58% +0.00% / +3.76% +4.89% +1.70%] index_select linear : Elapsed 0.005 ms (5.164 ms / 1000) 4.960 -> 5.012 ( +1.05%) [ +3.41% +3.51% +0.00% / +3.65% +1.05% +1.90%] index_select reverse : Elapsed 0.005 ms (5.129 ms / 1000) 5.075 -> 5.032 ( -0.85%) [ +1.30% +0.00% +0.69% / +0.81% -0.85% +0.30%] index_select skip64 : Elapsed 0.005 ms (5.141 ms / 1000) 4.972 -> 5.073 ( +2.03%) [ +5.19% +3.32% +0.00% / +2.86% +3.32% +2.03%] index_select skip256 : Elapsed 0.005 ms (5.230 ms / 1000) 4.928 -> 5.076 ( +3.00%) [ +4.46% +2.72% +0.00% / +3.73% +3.00% +3.73%] index_select spread : Elapsed 0.005 ms (5.148 ms / 1000) 5.135 -> 5.010 ( -2.43%) [ +3.19% +0.78% +0.00% / +2.03% -2.43% -0.47%] index_select random : Elapsed 0.005 ms (5.299 ms / 1000) 4.946 -> 5.064 ( +2.39%) [ +4.99% +3.82% +0.00% / +2.51% +4.97% +2.39%] index_select random_sorted : Elapsed 0.005 ms (5.193 ms / 1000) B = [3, 5] (stride (1, 3)) A = [3, 2] (stride (1, 3)) dim = 1 4.491 -> 4.489 ( -0.04%) [ +0.00% +3.03% +0.47% / +2.14% -0.04% +1.43%] index_add_ linear : Elapsed 0.004 ms (4.491 ms / 1000) 4.529 -> 4.462 ( -1.48%) [ +0.22% +0.86% +0.00% / +1.37% -1.48% +2.72%] index_copy_ linear : Elapsed 0.005 ms (4.539 ms / 1000) 4.432 -> 4.504 ( +1.62%) [ +0.00% +5.08% +1.76% / +3.16% +1.62% +1.62%] index_add_ reverse : Elapsed 0.004 ms (4.432 ms / 1000) 4.462 -> 4.544 ( +1.84%) [ +0.00% +2.76% +0.02% / +2.35% +2.17% +1.84%] index_copy_ reverse : Elapsed 0.004 ms (4.462 ms / 1000) 4.450 -> 4.496 ( +1.03%) [ +0.00% +4.34% +0.94% / +2.90% +1.71% +1.03%] index_add_ spread : Elapsed 0.004 ms (4.450 ms / 1000) 4.447 -> 4.489 ( +0.94%) [ +0.00% +4.45% +6.59% / +2.72% +0.94% +1.82%] index_copy_ spread : Elapsed 0.004 ms (4.447 ms / 1000) 4.444 -> 4.431 ( -0.29%) [ +0.00% +6.71% +1.33% / +3.31% +4.12% -0.29%] index_add_ strided 3 : Elapsed 0.004 ms (4.444 ms / 1000) 4.466 -> 4.463 ( -0.07%) [ +0.27% +8.55% +0.00% / +2.37% -0.07% +1.25%] index_copy_ strided 3 : Elapsed 0.004 ms (4.478 ms / 1000) 4.447 -> 4.529 ( +1.84%) [ +0.00% +5.94% +2.07% / +4.86% +1.84% +2.20%] index_add_ perm : Elapsed 0.004 ms (4.447 ms / 1000) 4.468 -> 4.472 ( +0.09%) [ +0.00% +4.16% +4.54% / +2.69% +0.09% +1.43%] index_copy_ perm : Elapsed 0.004 ms (4.468 ms / 1000) 4.425 -> 4.543 ( +2.67%) [ +0.00% +4.54% +2.06% / +4.56% +4.72% +2.67%] index_add_ perm_sorted : Elapsed 0.004 ms (4.425 ms / 1000) 4.456 -> 4.461 ( +0.11%) [ +0.00% +2.92% +0.18% / +1.75% +0.11% +2.38%] index_copy_ perm_sorted : Elapsed 0.004 ms (4.456 ms / 1000) 5.069 -> 5.012 ( -1.12%) [ +2.25% +0.00% +2.25% / +0.22% -1.12% +0.95%] index_select const : Elapsed 0.005 ms (5.183 ms / 1000) 4.922 -> 5.071 ( +3.03%) [ +4.94% +3.47% +0.00% / +3.03% +3.72% +3.35%] index_select wrap : Elapsed 0.005 ms (5.165 ms / 1000) 4.951 -> 5.002 ( +1.03%) [ +3.45% +2.22% +0.00% / +3.17% +1.03% +2.10%] index_select linear : Elapsed 0.005 ms (5.122 ms / 1000) 5.082 -> 5.107 ( +0.49%) [ +0.65% +0.00% +3.99% / +0.49% +1.10% +1.67%] index_select reverse : Elapsed 0.005 ms (5.115 ms / 1000) 4.978 -> 5.074 ( +1.93%) [ +5.95% +2.17% +0.00% / +2.09% +2.99% +1.93%] index_select skip64 : Elapsed 0.005 ms (5.274 ms / 1000) 4.929 -> 5.031 ( +2.07%) [ +7.71% +2.58% +0.00% / +3.57% +2.07% +3.79%] index_select skip256 : Elapsed 0.005 ms (5.309 ms / 1000) 5.120 -> 5.021 ( -1.93%) [ +1.74% +0.00% +0.06% / +0.37% -1.93% +1.25%] index_select spread : Elapsed 0.005 ms (5.209 ms / 1000) 4.944 -> 5.140 ( +3.96%) [ +4.39% +3.24% +0.00% / +3.96% +4.09% +4.65%] index_select random : Elapsed 0.005 ms (5.161 ms / 1000) 4.974 -> 5.028 ( +1.09%) [ +4.06% +2.85% +0.00% / +2.63% +1.09% +3.02%] index_select random_sorted : Elapsed 0.005 ms (5.176 ms / 1000) ==================== rep_count = 500 dimensions = [32, 256, 512] out_shape = [32, 512] in_shape = [256, 512] idx_dim = 0 B = [32, 512] (stride (512, 1)) dim = 0 fill_cnt = 256 11.029 -> 10.985 ( -0.40%) [ +0.35% +0.08% +0.00% / -0.40% -0.39% -0.38%] index_fill_ const : Elapsed 0.022 ms (11.068 ms / 500) 11.119 -> 11.079 ( -0.36%) [ +0.34% +0.00% +0.08% / -0.25% -0.36% -0.27%] index_fill_ linear : Elapsed 0.022 ms (11.157 ms / 500) 11.111 -> 11.060 ( -0.46%) [ +0.23% +0.00% +0.14% / -0.32% -0.43% -0.46%] index_fill_ reverse : Elapsed 0.022 ms (11.136 ms / 500) 11.046 -> 10.991 ( -0.50%) [ +0.21% +0.04% +0.00% / -0.50% +0.02% +0.11%] index_fill_ skip64 : Elapsed 0.022 ms (11.069 ms / 500) 11.042 -> 10.993 ( -0.44%) [ +0.17% +0.00% +0.03% / -0.44% -0.38% -0.42%] index_fill_ skip256 : Elapsed 0.022 ms (11.061 ms / 500) 11.023 -> 10.985 ( -0.34%) [ +0.26% +0.00% +0.22% / -0.34% -0.08% -0.18%] index_fill_ spread : Elapsed 0.022 ms (11.052 ms / 500) 11.107 -> 11.024 ( -0.75%) [ +0.10% +0.00% +0.04% / -0.75% -0.65% -0.69%] index_fill_ strided 3 : Elapsed 0.022 ms (11.118 ms / 500) 11.052 -> 11.022 ( -0.27%) [ +0.28% +0.06% +0.00% / -0.27% -0.08% -0.07%] index_fill_ strided 5 : Elapsed 0.022 ms (11.083 ms / 500) 11.064 -> 11.009 ( -0.50%) [ +0.38% +0.00% +0.12% / -0.50% -0.36% -0.28%] index_fill_ strided 7 : Elapsed 0.022 ms (11.106 ms / 500) 11.036 -> 10.984 ( -0.47%) [ +0.28% +0.10% +0.00% / -0.47% -0.41% -0.30%] index_fill_ strided 8 : Elapsed 0.022 ms (11.067 ms / 500) 11.057 -> 10.974 ( -0.75%) [ +0.23% +0.00% +0.14% / -0.24% -0.75% -0.66%] index_fill_ strided 16 : Elapsed 0.022 ms (11.082 ms / 500) 11.057 -> 11.020 ( -0.33%) [ +0.36% +0.14% +0.00% / -0.33% +0.16% +0.09%] index_fill_ random : Elapsed 0.022 ms (11.097 ms / 500) 11.128 -> 10.993 ( -1.21%) [ +0.17% +0.02% +0.00% / -0.29% -1.18% -1.21%] index_fill_ random_sorted : Elapsed 0.022 ms (11.147 ms / 500) B = [32, 512] (stride (512, 1)) A = [256, 512] (stride (512, 1)) dim = 0 17.425 -> 17.424 ( -0.01%) [ +0.10% +0.00% +0.10% / -0.01% +0.55% +0.52%] index_select const : Elapsed 0.035 ms (17.443 ms / 500) 17.577 -> 17.565 ( -0.07%) [ +0.02% +0.02% +0.00% / +0.10% +0.18% -0.07%] index_select wrap : Elapsed 0.035 ms (17.580 ms / 500) 17.567 -> 17.581 ( +0.08%) [ +0.00% +0.05% +0.01% / +0.08% +0.14% +0.18%] index_select linear : Elapsed 0.035 ms (17.567 ms / 500) 17.675 -> 17.580 ( -0.54%) [ +0.06% +0.00% +0.01% / +0.01% -0.54% -0.52%] index_select reverse : Elapsed 0.035 ms (17.686 ms / 500) 17.531 -> 17.418 ( -0.64%) [ +0.00% +0.09% +0.18% / +0.11% -0.64% -0.54%] index_select skip64 : Elapsed 0.035 ms (17.531 ms / 500) 17.464 -> 17.447 ( -0.10%) [ +0.05% +0.09% +0.00% / +0.08% -0.10% -0.10%] index_select skip256 : Elapsed 0.035 ms (17.473 ms / 500) 17.602 -> 17.534 ( -0.39%) [ +0.07% +0.00% +0.06% / +0.03% -0.39% -0.32%] index_select spread : Elapsed 0.035 ms (17.614 ms / 500) 17.583 -> 17.554 ( -0.16%) [ +0.05% +0.00% +0.10% / +0.21% -0.16% -0.12%] index_select strided 3 : Elapsed 0.035 ms (17.591 ms / 500) 17.465 -> 17.475 ( +0.06%) [ +0.01% +0.06% +0.00% / +0.06% +1.25% +1.38%] index_select strided 5 : Elapsed 0.035 ms (17.467 ms / 500) 17.601 -> 17.598 ( -0.02%) [ +0.01% +0.00% +0.06% / +0.02% -0.02% -0.02%] index_select strided 7 : Elapsed 0.035 ms (17.602 ms / 500) 17.563 -> 17.576 ( +0.07%) [ +0.03% +0.00% +0.05% / +0.07% +0.22% +0.34%] index_select strided 8 : Elapsed 0.035 ms (17.568 ms / 500) 17.500 -> 17.488 ( -0.07%) [ +0.00% +0.03% +0.04% / +0.02% -0.01% -0.07%] index_select strided 16 : Elapsed 0.035 ms (17.500 ms / 500) 17.439 -> 17.442 ( +0.02%) [ +0.00% +0.10% +0.12% / +0.07% +0.06% +0.02%] index_select strided 64 : Elapsed 0.035 ms (17.439 ms / 500) 17.568 -> 17.569 ( +0.01%) [ +0.08% +0.13% +0.00% / +0.13% +0.01% +0.03%] index_select strided 100 : Elapsed 0.035 ms (17.582 ms / 500) 17.574 -> 17.585 ( +0.06%) [ +0.01% +0.00% +0.05% / +0.06% +0.06% +0.06%] index_select strided 255 : Elapsed 0.035 ms (17.576 ms / 500) 17.612 -> 17.545 ( -0.38%) [ +0.03% +0.01% +0.00% / +0.01% -0.38% -0.33%] index_select random : Elapsed 0.035 ms (17.618 ms / 500) 17.620 -> 17.549 ( -0.40%) [ +0.07% +0.00% +0.01% / -0.03% -0.36% -0.40%] index_select random_sorted : Elapsed 0.035 ms (17.632 ms / 500) 17.566 -> 17.571 ( +0.03%) [ +0.00% +0.05% +0.03% / +0.05% +0.06% +0.03%] index_select perm : Elapsed 0.035 ms (17.566 ms / 500) 17.553 -> 17.566 ( +0.07%) [ +0.00% +0.11% +0.04% / +0.07% +0.13% +0.10%] index_select perm_sorted : Elapsed 0.035 ms (17.553 ms / 500) B = [32, 512] (stride (512, 1)) A = [256, 512] (stride (1, 256)) dim = 0 17.608 -> 17.615 ( +0.04%) [ +0.00% +0.05% +0.08% / +0.04% +0.17% +0.06%] index_select const : Elapsed 0.035 ms (17.608 ms / 500) 17.814 -> 17.814 ( +0.00%) [ +0.00% +0.01% +0.03% / +0.00% +0.03% +0.13%] index_select wrap : Elapsed 0.036 ms (17.814 ms / 500) 17.915 -> 17.821 ( -0.52%) [ +0.00% +0.00% +0.08% / +0.00% -0.39% -0.52%] index_select linear : Elapsed 0.036 ms (17.915 ms / 500) 17.924 -> 17.815 ( -0.61%) [ +0.00% +0.09% +0.05% / -0.04% -0.57% -0.61%] index_select reverse : Elapsed 0.036 ms (17.924 ms / 500) 17.621 -> 17.609 ( -0.07%) [ +0.00% +0.06% +0.06% / -0.03% -0.01% -0.07%] index_select skip64 : Elapsed 0.035 ms (17.621 ms / 500) 17.620 -> 17.602 ( -0.10%) [ +0.00% +0.05% +0.03% / +0.04% -0.05% -0.10%] index_select skip256 : Elapsed 0.035 ms (17.620 ms / 500) 19.069 -> 19.074 ( +0.03%) [ +0.09% +0.00% +0.05% / +0.03% +0.39% +0.46%] index_select spread : Elapsed 0.038 ms (19.086 ms / 500) 18.181 -> 18.182 ( +0.01%) [ +0.00% +0.02% +0.10% / +0.01% +0.36% +0.52%] index_select strided 3 : Elapsed 0.036 ms (18.181 ms / 500) 18.544 -> 18.543 ( -0.01%) [ +0.02% +0.02% +0.00% / -0.01% +0.28% +0.16%] index_select strided 5 : Elapsed 0.037 ms (18.547 ms / 500) 18.898 -> 18.895 ( -0.02%) [ +0.14% +0.03% +0.00% / -0.02% +0.30% +0.26%] index_select strided 7 : Elapsed 0.038 ms (18.924 ms / 500) 19.071 -> 19.071 ( +0.00%) [ +0.00% +0.10% +0.06% / +0.08% +0.08% +0.00%] index_select strided 8 : Elapsed 0.038 ms (19.071 ms / 500) 18.465 -> 18.435 ( -0.16%) [ +0.05% +0.00% +0.11% / +0.06% -0.16% -0.14%] index_select strided 16 : Elapsed 0.037 ms (18.475 ms / 500) 17.902 -> 17.880 ( -0.12%) [ +0.07% +0.12% +0.00% / +0.07% -0.12% -0.04%] index_select strided 64 : Elapsed 0.036 ms (17.914 ms / 500) 18.936 -> 18.918 ( -0.10%) [ +0.13% +0.00% +0.05% / +0.11% -0.08% -0.10%] index_select strided 100 : Elapsed 0.038 ms (18.961 ms / 500) 17.858 -> 17.871 ( +0.07%) [ +0.08% +0.00% +0.09% / +0.11% +0.17% +0.07%] index_select strided 255 : Elapsed 0.036 ms (17.873 ms / 500) 18.528 -> 18.523 ( -0.03%) [ +0.09% +0.00% +0.10% / -0.03% +0.12% +0.28%] index_select random : Elapsed 0.037 ms (18.544 ms / 500) 18.540 -> 18.523 ( -0.09%) [ +0.16% +0.00% +0.08% / +0.02% -0.07% -0.09%] index_select random_sorted : Elapsed 0.037 ms (18.569 ms / 500) 18.637 -> 18.591 ( -0.25%) [ +0.00% +0.10% +0.03% / +0.04% -0.25% -0.14%] index_select perm : Elapsed 0.037 ms (18.637 ms / 500) 18.618 -> 18.632 ( +0.08%) [ +0.12% +0.07% +0.00% / +0.08% +0.31% +0.31%] index_select perm_sorted : Elapsed 0.037 ms (18.640 ms / 500) B = [32, 512] (stride (1, 32)) dim = 0 fill_cnt = 256 GOOD 13.725 -> 10.900 (-20.58%) [ +0.26% +0.07% +0.00% / -20.07% -20.58% -20.54%] index_fill_ const : Elapsed 0.028 ms (13.761 ms / 500) GOOD 13.786 -> 10.971 (-20.42%) [ +0.13% +0.11% +0.00% / -20.42% -20.03% -20.03%] index_fill_ linear : Elapsed 0.028 ms (13.804 ms / 500) GOOD 13.784 -> 10.970 (-20.41%) [ +0.33% +0.05% +0.00% / -20.41% -20.36% -20.39%] index_fill_ reverse : Elapsed 0.028 ms (13.830 ms / 500) GOOD 13.728 -> 10.902 (-20.59%) [ +0.27% +0.03% +0.00% / -20.47% -20.59% -20.57%] index_fill_ skip64 : Elapsed 0.028 ms (13.765 ms / 500) GOOD 13.705 -> 10.910 (-20.39%) [ +0.20% +0.09% +0.00% / -20.30% -20.37% -20.39%] index_fill_ skip256 : Elapsed 0.027 ms (13.733 ms / 500) GOOD 13.815 -> 10.970 (-20.59%) [ +0.32% +0.26% +0.00% / -20.52% -20.46% -20.59%] index_fill_ spread : Elapsed 0.028 ms (13.859 ms / 500) GOOD 13.833 -> 10.975 (-20.66%) [ +0.17% +0.00% +0.05% / -20.22% -20.64% -20.66%] index_fill_ strided 3 : Elapsed 0.028 ms (13.856 ms / 500) GOOD 13.784 -> 10.986 (-20.30%) [ +0.23% +0.13% +0.00% / -20.30% -20.00% -19.94%] index_fill_ strided 5 : Elapsed 0.028 ms (13.816 ms / 500) GOOD 13.802 -> 10.961 (-20.58%) [ +0.08% +0.00% +0.00% / -20.11% -20.52% -20.58%] index_fill_ strided 7 : Elapsed 0.028 ms (13.813 ms / 500) GOOD 13.926 -> 11.117 (-20.17%) [ +0.28% +0.00% +0.08% / -20.17% -19.89% -19.85%] index_fill_ strided 8 : Elapsed 0.028 ms (13.965 ms / 500) GOOD 13.786 -> 10.965 (-20.46%) [ +0.19% +0.12% +0.00% / -20.30% -20.34% -20.46%] index_fill_ strided 16 : Elapsed 0.028 ms (13.812 ms / 500) GOOD 13.790 -> 10.969 (-20.46%) [ +0.25% +0.00% +0.10% / -20.29% -20.45% -20.46%] index_fill_ random : Elapsed 0.028 ms (13.825 ms / 500) GOOD 13.811 -> 10.977 (-20.52%) [ +0.20% +0.02% +0.00% / -20.42% -20.51% -20.52%] index_fill_ random_sorted : Elapsed 0.028 ms (13.838 ms / 500) B = [32, 512] (stride (1, 32)) A = [256, 512] (stride (512, 1)) dim = 0 17.554 -> 17.410 ( -0.82%) [ +0.00% +0.10% +0.01% / -0.57% -0.82% -0.75%] index_select const : Elapsed 0.035 ms (17.554 ms / 500) 17.768 -> 17.669 ( -0.56%) [ +0.05% +0.04% +0.00% / -0.28% -0.56% -0.51%] index_select wrap : Elapsed 0.036 ms (17.777 ms / 500) 17.767 -> 17.677 ( -0.51%) [ +0.14% +0.00% +0.03% / -0.22% -0.51% -0.51%] index_select linear : Elapsed 0.036 ms (17.792 ms / 500) 17.668 -> 17.680 ( +0.07%) [ +0.13% +0.04% +0.00% / +0.10% +0.07% +0.14%] index_select reverse : Elapsed 0.035 ms (17.691 ms / 500) 17.531 -> 17.423 ( -0.62%) [ +0.17% +0.00% +0.03% / -0.62% -0.60% -0.47%] index_select skip64 : Elapsed 0.035 ms (17.560 ms / 500) 17.541 -> 17.417 ( -0.71%) [ +0.09% +0.00% +0.03% / -0.53% -0.67% -0.71%] index_select skip256 : Elapsed 0.035 ms (17.556 ms / 500) 17.649 -> 17.808 ( +0.90%) [ +0.23% +0.09% +0.00% / +1.10% +0.90% +0.92%] index_select spread : Elapsed 0.035 ms (17.689 ms / 500) 17.762 -> 17.622 ( -0.79%) [ +0.00% +0.05% +0.07% / +0.61% -0.79% -0.78%] index_select strided 3 : Elapsed 0.036 ms (17.762 ms / 500) 17.675 -> 17.666 ( -0.05%) [ +0.07% +0.05% +0.00% / -0.05% +0.75% +0.78%] index_select strided 5 : Elapsed 0.035 ms (17.687 ms / 500) 17.707 -> 17.672 ( -0.20%) [ +0.08% +0.05% +0.00% / +0.52% -0.12% -0.20%] index_select strided 7 : Elapsed 0.035 ms (17.721 ms / 500) 17.689 -> 17.799 ( +0.62%) [ +0.01% +0.00% +0.02% / +0.90% +0.64% +0.62%] index_select strided 8 : Elapsed 0.035 ms (17.691 ms / 500) 17.590 -> 17.626 ( +0.20%) [ +0.00% +0.07% +0.04% / +0.30% +0.23% +0.20%] index_select strided 16 : Elapsed 0.035 ms (17.590 ms / 500) 17.552 -> 17.456 ( -0.55%) [ +0.10% +0.00% +0.06% / -0.49% -0.52% -0.55%] index_select strided 64 : Elapsed 0.035 ms (17.570 ms / 500) 17.706 -> 17.694 ( -0.07%) [ +0.04% +0.00% +0.06% / -0.07% +0.11% +0.12%] index_select strided 100 : Elapsed 0.035 ms (17.713 ms / 500) 17.723 -> 17.693 ( -0.17%) [ +0.01% +0.02% +0.00% / -0.17% -0.04% -0.12%] index_select strided 255 : Elapsed 0.035 ms (17.725 ms / 500) 17.602 -> 17.597 ( -0.03%) [ +0.19% +0.05% +0.00% / -0.03% +1.15% +1.20%] index_select random : Elapsed 0.035 ms (17.635 ms / 500) 17.622 -> 17.607 ( -0.09%) [ +0.03% +0.00% +0.04% / -0.09% +1.07% +1.05%] index_select random_sorted : Elapsed 0.035 ms (17.628 ms / 500) 17.684 -> 17.771 ( +0.49%) [ +0.04% +0.04% +0.00% / +0.49% +0.61% +0.52%] index_select perm : Elapsed 0.035 ms (17.691 ms / 500) 17.676 -> 17.775 ( +0.56%) [ +0.01% +0.01% +0.00% / +0.56% +0.57% +0.59%] index_select perm_sorted : Elapsed 0.035 ms (17.678 ms / 500) B = [32, 512] (stride (1, 32)) A = [256, 512] (stride (1, 256)) dim = 0 17.814 -> 17.524 ( -1.63%) [ +0.03% +0.00% +0.02% / -1.55% -1.59% -1.63%] index_select const : Elapsed 0.036 ms (17.820 ms / 500) 18.006 -> 17.566 ( -2.44%) [ +0.06% +0.00% +0.08% / -2.43% -2.44% -2.43%] index_select wrap : Elapsed 0.036 ms (18.016 ms / 500) 17.906 -> 17.570 ( -1.88%) [ +0.04% +0.09% +0.00% / -1.88% -1.80% -1.85%] index_select linear : Elapsed 0.036 ms (17.913 ms / 500) 17.908 -> 17.559 ( -1.95%) [ +0.03% +0.00% +0.07% / -1.95% -1.84% -1.78%] index_select reverse : Elapsed 0.036 ms (17.913 ms / 500) 17.702 -> 17.492 ( -1.19%) [ +0.00% +0.08% +0.03% / -0.95% -1.19% -1.14%] index_select skip64 : Elapsed 0.035 ms (17.702 ms / 500) 17.705 -> 17.497 ( -1.17%) [ +0.00% +0.06% +0.06% / -0.99% -1.05% -1.17%] index_select skip256 : Elapsed 0.035 ms (17.705 ms / 500) 19.177 -> 18.443 ( -3.83%) [ +0.04% +0.00% +0.05% / -3.79% -3.82% -3.83%] index_select spread : Elapsed 0.038 ms (19.185 ms / 500) 18.308 -> 17.843 ( -2.54%) [ +0.16% +0.10% +0.00% / -2.43% -2.45% -2.54%] index_select strided 3 : Elapsed 0.037 ms (18.337 ms / 500) 18.634 -> 18.068 ( -3.04%) [ +0.11% +0.00% +0.13% / -2.91% -3.04% -2.98%] index_select strided 5 : Elapsed 0.037 ms (18.654 ms / 500) 19.004 -> 18.343 ( -3.48%) [ +0.00% +0.08% +0.07% / -3.33% -3.48% -3.41%] index_select strided 7 : Elapsed 0.038 ms (19.004 ms / 500) 19.166 -> 18.439 ( -3.79%) [ +0.00% +0.09% +0.02% / -3.74% -3.79% -3.77%] index_select strided 8 : Elapsed 0.038 ms (19.166 ms / 500) 18.564 -> 18.011 ( -2.98%) [ +0.06% +0.13% +0.00% / -2.90% -2.98% -2.89%] index_select strided 16 : Elapsed 0.037 ms (18.575 ms / 500) 17.957 -> 17.649 ( -1.72%) [ +0.09% +0.02% +0.00% / -1.72% -1.69% -1.68%] index_select strided 64 : Elapsed 0.036 ms (17.973 ms / 500) 19.001 -> 18.456 ( -2.87%) [ +0.02% +0.02% +0.00% / -2.87% -2.80% -2.71%] index_select strided 100 : Elapsed 0.038 ms (19.005 ms / 500) 17.995 -> 17.657 ( -1.88%) [ +0.00% +0.10% +0.05% / -1.87% -1.88% -1.88%] index_select strided 255 : Elapsed 0.036 ms (17.995 ms / 500) 18.684 -> 18.230 ( -2.43%) [ +0.00% +0.07% +0.08% / -2.33% -2.38% -2.43%] index_select random : Elapsed 0.037 ms (18.684 ms / 500) 18.633 -> 18.147 ( -2.61%) [ +0.12% +0.10% +0.00% / -2.61% -2.58% -2.59%] index_select random_sorted : Elapsed 0.037 ms (18.656 ms / 500) 18.712 -> 18.289 ( -2.26%) [ +0.27% +0.00% +0.06% / -2.24% -2.26% -2.25%] index_select perm : Elapsed 0.038 ms (18.762 ms / 500) 18.737 -> 18.172 ( -3.02%) [ +0.00% +0.10% +0.17% / -2.95% -3.02% -2.95%] index_select perm_sorted : Elapsed 0.037 ms (18.737 ms / 500) out_shape = [256, 32] in_shape = [256, 512] idx_dim = 1 B = [256, 32] (stride (32, 1)) dim = 1 fill_cnt = 512 GOOD 10.082 -> 7.527 (-25.34%) [ +0.00% +0.09% +0.20% / -24.98% -25.34% -25.32%] index_fill_ const : Elapsed 0.020 ms (10.082 ms / 500) GOOD 9.943 -> 7.554 (-24.03%) [ +0.31% +0.00% +0.14% / -24.03% -23.74% -23.73%] index_fill_ linear : Elapsed 0.020 ms (9.974 ms / 500) GOOD 10.002 -> 7.549 (-24.53%) [ +0.35% +0.19% +0.00% / -24.53% -24.22% -24.33%] index_fill_ reverse : Elapsed 0.020 ms (10.037 ms / 500) GOOD 9.935 -> 7.509 (-24.42%) [ +0.70% +0.06% +0.00% / -23.86% -24.42% -24.34%] index_fill_ skip64 : Elapsed 0.020 ms (10.005 ms / 500) GOOD 9.975 -> 7.538 (-24.43%) [ +0.12% +0.46% +0.00% / -24.12% -24.43% -24.42%] index_fill_ skip256 : Elapsed 0.020 ms (9.987 ms / 500) GOOD 10.113 -> 7.561 (-25.23%) [ +0.17% +0.18% +0.00% / -25.23% -25.14% -25.14%] index_fill_ spread : Elapsed 0.020 ms (10.130 ms / 500) GOOD 10.105 -> 7.577 (-25.02%) [ +0.12% +0.00% +0.06% / -24.78% -25.00% -25.02%] index_fill_ strided 3 : Elapsed 0.020 ms (10.117 ms / 500) GOOD 10.094 -> 7.560 (-25.10%) [ +0.29% +0.16% +0.00% / -25.00% -25.10% -25.01%] index_fill_ strided 5 : Elapsed 0.020 ms (10.123 ms / 500) GOOD 10.207 -> 7.552 (-26.01%) [ +0.24% +0.02% +0.00% / -26.01% -25.69% -25.75%] index_fill_ strided 7 : Elapsed 0.020 ms (10.231 ms / 500) GOOD 10.174 -> 7.622 (-25.08%) [ +0.27% +0.28% +0.00% / -25.08% -24.47% -24.43%] index_fill_ strided 8 : Elapsed 0.020 ms (10.201 ms / 500) GOOD 10.109 -> 7.576 (-25.06%) [ +0.00% +0.04% +0.24% / -24.89% -25.05% -25.06%] index_fill_ strided 16 : Elapsed 0.020 ms (10.109 ms / 500) GOOD 10.054 -> 7.555 (-24.86%) [ +0.16% +0.30% +0.00% / -24.70% -24.86% -24.77%] index_fill_ random : Elapsed 0.020 ms (10.070 ms / 500) GOOD 10.087 -> 7.555 (-25.10%) [ +0.39% +0.04% +0.00% / -25.10% -24.95% -24.93%] index_fill_ random_sorted : Elapsed 0.020 ms (10.126 ms / 500) B = [256, 32] (stride (32, 1)) A = [256, 512] (stride (512, 1)) dim = 1 9.211 -> 9.033 ( -1.93%) [ +0.03% +0.08% +0.00% / -1.51% -1.93% -1.89%] index_select const : Elapsed 0.018 ms (9.214 ms / 500) 9.266 -> 9.059 ( -2.23%) [ +0.00% +0.00% +0.00% / -2.23% -2.13% -2.17%] index_select wrap : Elapsed 0.019 ms (9.266 ms / 500) 9.263 -> 9.045 ( -2.35%) [ +0.01% +0.01% +0.00% / -2.35% -2.07% -2.08%] index_select linear : Elapsed 0.019 ms (9.264 ms / 500) 9.241 -> 9.048 ( -2.09%) [ +0.19% +0.06% +0.00% / -2.09% -1.48% -1.54%] index_select reverse : Elapsed 0.019 ms (9.259 ms / 500) 9.152 -> 9.027 ( -1.37%) [ +0.12% +0.04% +0.00% / -1.37% -0.69% -0.72%] index_select skip64 : Elapsed 0.018 ms (9.163 ms / 500) 9.147 -> 9.039 ( -1.18%) [ +0.07% +0.01% +0.00% / -1.18% -1.01% -1.12%] index_select skip256 : Elapsed 0.018 ms (9.153 ms / 500) 10.082 -> 9.638 ( -4.40%) [ +0.00% +0.04% +0.04% / -4.34% -4.40% -4.30%] index_select spread : Elapsed 0.020 ms (10.082 ms / 500) 9.447 -> 9.190 ( -2.72%) [ +0.00% +0.18% +0.15% / -2.72% -2.63% -2.64%] index_select strided 3 : Elapsed 0.019 ms (9.447 ms / 500) 9.708 -> 9.318 ( -4.02%) [ +0.03% +0.03% +0.00% / -3.90% -3.88% -4.02%] index_select strided 5 : Elapsed 0.019 ms (9.711 ms / 500) 9.919 -> 9.459 ( -4.64%) [ +0.19% +0.15% +0.00% / -4.52% -4.64% -4.51%] index_select strided 7 : Elapsed 0.020 ms (9.938 ms / 500) good 10.032 -> 9.505 ( -5.25%) [ +0.00% +0.02% +0.08% / -5.11% -5.23% -5.25%] index_select strided 8 : Elapsed 0.020 ms (10.032 ms / 500) 10.099 -> 9.616 ( -4.78%) [ +0.15% +0.14% +0.00% / -4.39% -4.78% -4.66%] index_select strided 16 : Elapsed 0.020 ms (10.114 ms / 500) 9.509 -> 9.268 ( -2.53%) [ +0.08% +0.13% +0.00% / -2.43% -2.53% -2.35%] index_select strided 64 : Elapsed 0.019 ms (9.517 ms / 500) 10.090 -> 9.763 ( -3.24%) [ +0.00% +0.12% +0.11% / -3.24% -3.10% -3.21%] index_select strided 100 : Elapsed 0.020 ms (10.090 ms / 500) 9.449 -> 9.181 ( -2.84%) [ +0.17% +0.00% +0.05% / -2.84% -2.73% -2.56%] index_select strided 255 : Elapsed 0.019 ms (9.465 ms / 500) 9.217 -> 9.088 ( -1.40%) [ +0.12% +0.03% +0.00% / -1.40% -0.99% -1.04%] index_select strided 256 : Elapsed 0.018 ms (9.228 ms / 500) 9.407 -> 9.124 ( -3.01%) [ +0.27% +0.06% +0.00% / -3.01% -2.71% -2.54%] index_select strided 257 : Elapsed 0.019 ms (9.432 ms / 500) 9.927 -> 9.578 ( -3.52%) [ +0.11% +0.10% +0.00% / -3.18% -3.44% -3.52%] index_select random : Elapsed 0.020 ms (9.938 ms / 500) 9.926 -> 9.521 ( -4.08%) [ +0.18% +0.01% +0.00% / -3.96% -4.07% -4.08%] index_select random_sorted : Elapsed 0.020 ms (9.944 ms / 500) 9.936 -> 9.565 ( -3.73%) [ +0.21% +0.00% +0.07% / -3.32% -3.73% -3.66%] index_select perm : Elapsed 0.020 ms (9.957 ms / 500) 9.926 -> 9.556 ( -3.73%) [ +0.21% +0.00% +0.01% / -3.25% -3.73% -3.64%] index_select perm_sorted : Elapsed 0.020 ms (9.947 ms / 500) B = [256, 32] (stride (32, 1)) A = [256, 512] (stride (1, 256)) dim = 1 9.033 -> 8.961 ( -0.80%) [ +0.18% +0.06% +0.00% / -0.80% -0.51% -0.63%] index_select const : Elapsed 0.018 ms (9.049 ms / 500) 9.114 -> 9.151 ( +0.41%) [ +0.05% +0.09% +0.00% / +0.41% +0.59% +0.47%] index_select wrap : Elapsed 0.018 ms (9.119 ms / 500) 9.142 -> 9.136 ( -0.07%) [ +0.03% +0.19% +0.00% / +0.27% -0.07% +0.03%] index_select linear : Elapsed 0.018 ms (9.145 ms / 500) 9.140 -> 9.134 ( -0.07%) [ +0.01% +0.00% +0.00% / +0.30% +0.01% -0.07%] index_select reverse : Elapsed 0.018 ms (9.141 ms / 500) 9.021 -> 8.955 ( -0.73%) [ +0.03% +0.08% +0.00% / -0.73% -0.44% -0.40%] index_select skip64 : Elapsed 0.018 ms (9.024 ms / 500) 9.018 -> 8.960 ( -0.64%) [ +0.06% +0.10% +0.00% / -0.64% -0.45% -0.24%] index_select skip256 : Elapsed 0.018 ms (9.023 ms / 500) 9.070 -> 9.113 ( +0.47%) [ +0.03% +0.04% +0.00% / +0.47% +1.11% +1.29%] index_select spread : Elapsed 0.018 ms (9.073 ms / 500) 9.090 -> 9.179 ( +0.98%) [ +0.13% +0.00% +0.04% / +0.98% +1.65% +1.64%] index_select strided 3 : Elapsed 0.018 ms (9.102 ms / 500) 9.075 -> 9.182 ( +1.18%) [ +0.22% +0.00% +0.19% / +1.18% +1.79% +1.81%] index_select strided 5 : Elapsed 0.018 ms (9.095 ms / 500) 9.104 -> 9.214 ( +1.21%) [ +0.32% +0.22% +0.00% / +1.39% +1.25% +1.21%] index_select strided 7 : Elapsed 0.018 ms (9.133 ms / 500) 9.093 -> 9.139 ( +0.51%) [ +0.01% +0.04% +0.00% / +0.51% +0.84% +0.98%] index_select strided 8 : Elapsed 0.018 ms (9.094 ms / 500) 9.080 -> 9.133 ( +0.58%) [ +0.15% +0.02% +0.00% / +0.58% +0.70% +0.73%] index_select strided 16 : Elapsed 0.018 ms (9.094 ms / 500) 9.071 -> 8.992 ( -0.87%) [ +0.07% +0.00% +0.03% / -0.54% -0.87% -0.77%] index_select strided 64 : Elapsed 0.018 ms (9.077 ms / 500) 9.108 -> 9.161 ( +0.58%) [ +0.05% +0.03% +0.00% / +0.58% +0.89% +0.86%] index_select strided 100 : Elapsed 0.018 ms (9.113 ms / 500) 9.102 -> 9.187 ( +0.93%) [ +0.05% +0.00% +0.01% / +0.93% +1.24% +1.30%] index_select strided 255 : Elapsed 0.018 ms (9.107 ms / 500) 9.022 -> 8.967 ( -0.61%) [ +0.23% +0.13% +0.00% / -0.58% -0.61% -0.44%] index_select strided 256 : Elapsed 0.018 ms (9.043 ms / 500) 9.134 -> 9.189 ( +0.60%) [ +0.08% +0.13% +0.00% / +0.61% +0.66% +0.60%] index_select strided 257 : Elapsed 0.018 ms (9.141 ms / 500) 9.094 -> 9.151 ( +0.63%) [ +0.02% +0.14% +0.00% / +0.99% +0.63% +0.74%] index_select random : Elapsed 0.018 ms (9.096 ms / 500) 9.077 -> 9.162 ( +0.94%) [ +0.00% +0.11% +0.10% / +1.10% +0.94% +0.97%] index_select random_sorted : Elapsed 0.018 ms (9.077 ms / 500) 9.090 -> 9.205 ( +1.27%) [ +0.00% +0.04% +0.02% / +1.27% +1.75% +1.85%] index_select perm : Elapsed 0.018 ms (9.090 ms / 500) 9.112 -> 9.233 ( +1.33%) [ +0.18% +0.12% +0.00% / +1.33% +1.47% +1.47%] index_select perm_sorted : Elapsed 0.018 ms (9.128 ms / 500) B = [256, 32] (stride (1, 256)) dim = 1 fill_cnt = 512 7.593 -> 7.489 ( -1.37%) [ +0.51% +0.09% +0.00% / -0.94% -1.37% -1.28%] index_fill_ const : Elapsed 0.015 ms (7.632 ms / 500) 7.568 -> 7.526 ( -0.55%) [ +0.38% +0.00% +0.01% / -0.55% +4.32% +4.36%] index_fill_ linear : Elapsed 0.015 ms (7.597 ms / 500) 7.581 -> 7.572 ( -0.12%) [ +0.58% +0.05% +0.00% / -0.12% +2.30% +2.24%] index_fill_ reverse : Elapsed 0.015 ms (7.625 ms / 500) 7.674 -> 7.445 ( -2.98%) [ +0.78% +0.14% +0.00% / -0.27% -2.98% -2.91%] index_fill_ skip64 : Elapsed 0.015 ms (7.734 ms / 500) 7.630 -> 7.487 ( -1.87%) [ +0.55% +0.00% +0.00% / -0.88% -1.87% -1.69%] index_fill_ skip256 : Elapsed 0.015 ms (7.672 ms / 500) 7.564 -> 7.525 ( -0.52%) [ +0.49% +0.00% +0.07% / -0.49% -0.44% -0.52%] index_fill_ spread : Elapsed 0.015 ms (7.601 ms / 500) 7.514 -> 7.470 ( -0.59%) [ +0.48% +0.07% +0.00% / -0.59% +0.07% -0.07%] index_fill_ strided 3 : Elapsed 0.015 ms (7.550 ms / 500) 7.549 -> 7.513 ( -0.48%) [ +0.54% +0.19% +0.00% / -0.48% +4.13% +4.07%] index_fill_ strided 5 : Elapsed 0.015 ms (7.590 ms / 500) 7.524 -> 7.484 ( -0.53%) [ +0.57% +0.05% +0.00% / -0.53% +0.03% +0.21%] index_fill_ strided 7 : Elapsed 0.015 ms (7.567 ms / 500) 7.609 -> 7.483 ( -1.66%) [ +0.57% +0.00% +0.08% / -0.24% -1.66% -1.63%] index_fill_ strided 8 : Elapsed 0.015 ms (7.652 ms / 500) 7.832 -> 7.459 ( -4.76%) [ +0.37% +0.03% +0.00% / -0.49% -4.74% -4.76%] index_fill_ strided 16 : Elapsed 0.016 ms (7.861 ms / 500) 7.541 -> 7.500 ( -0.54%) [ +0.48% +0.16% +0.00% / -0.54% -0.32% -0.30%] index_fill_ random : Elapsed 0.015 ms (7.577 ms / 500) 7.552 -> 7.508 ( -0.58%) [ +0.50% +0.00% +0.01% / -0.58% -0.25% -0.08%] index_fill_ random_sorted : Elapsed 0.015 ms (7.590 ms / 500) B = [256, 32] (stride (1, 256)) A = [256, 512] (stride (512, 1)) dim = 1 9.108 -> 9.117 ( +0.10%) [ +0.05% +0.09% +0.00% / +0.10% +0.33% +0.27%] index_select const : Elapsed 0.018 ms (9.113 ms / 500) 9.249 -> 9.201 ( -0.52%) [ +0.05% +0.00% +0.00% / +0.04% -0.50% -0.52%] index_select wrap : Elapsed 0.019 ms (9.254 ms / 500) 9.244 -> 9.208 ( -0.39%) [ +0.03% +0.00% +0.04% / +0.00% -0.38% -0.39%] index_select linear : Elapsed 0.018 ms (9.247 ms / 500) 9.228 -> 9.210 ( -0.20%) [ +0.04% +0.00% +0.00% / +0.15% +0.00% -0.20%] index_select reverse : Elapsed 0.018 ms (9.232 ms / 500) 9.130 -> 9.115 ( -0.16%) [ +0.10% +0.00% +0.02% / +0.15% -0.13% -0.16%] index_select skip64 : Elapsed 0.018 ms (9.139 ms / 500) 9.110 -> 9.107 ( -0.03%) [ +0.00% +0.12% +0.00% / -0.03% +0.12% +0.00%] index_select skip256 : Elapsed 0.018 ms (9.110 ms / 500) 10.022 -> 10.049 ( +0.27%) [ +0.30% +0.14% +0.00% / +0.27% +0.44% +0.35%] index_select spread : Elapsed 0.020 ms (10.052 ms / 500) 9.417 -> 9.414 ( -0.03%) [ +0.15% +0.00% +0.04% / -0.03% +0.07% +0.05%] index_select strided 3 : Elapsed 0.019 ms (9.431 ms / 500) 9.669 -> 9.658 ( -0.11%) [ +0.00% +0.13% +0.02% / +0.02% -0.10% -0.11%] index_select strided 5 : Elapsed 0.019 ms (9.669 ms / 500) 9.840 -> 9.848 ( +0.08%) [ +0.15% +0.12% +0.00% / +0.08% +0.23% +0.25%] index_select strided 7 : Elapsed 0.020 ms (9.855 ms / 500) 9.946 -> 9.943 ( -0.03%) [ +0.12% +0.09% +0.00% / -0.03% +0.30% +0.19%] index_select strided 8 : Elapsed 0.020 ms (9.958 ms / 500) 10.005 -> 10.026 ( +0.21%) [ +0.09% +0.11% +0.00% / +0.21% +0.97% +0.88%] index_select strided 16 : Elapsed 0.020 ms (10.014 ms / 500) 9.423 -> 9.423 ( +0.00%) [ +0.05% +0.02% +0.00% / +0.00% +0.37% +0.31%] index_select strided 64 : Elapsed 0.019 ms (9.428 ms / 500) 10.043 -> 10.031 ( -0.12%) [ +0.08% +0.00% +0.04% / +0.01% -0.10% -0.12%] index_select strided 100 : Elapsed 0.020 ms (10.051 ms / 500) 9.413 -> 9.414 ( +0.01%) [ +0.07% +0.19% +0.00% / +0.01% +0.10% +0.16%] index_select strided 255 : Elapsed 0.019 ms (9.420 ms / 500) 9.173 -> 9.180 ( +0.08%) [ +0.09% +0.00% +0.01% / +0.08% +0.12% +0.11%] index_select strided 256 : Elapsed 0.018 ms (9.181 ms / 500) 9.367 -> 9.377 ( +0.11%) [ +0.00% +0.13% +0.12% / +0.11% +0.42% +0.43%] index_select strided 257 : Elapsed 0.019 ms (9.367 ms / 500) 9.854 -> 9.849 ( -0.05%) [ +0.07% +0.00% +0.04% / -0.05% +0.59% +0.52%] index_select random : Elapsed 0.020 ms (9.861 ms / 500) 9.852 -> 9.854 ( +0.02%) [ +0.00% +0.00% +0.05% / +0.02% +0.74% +0.81%] index_select random_sorted : Elapsed 0.020 ms (9.852 ms / 500) 9.905 -> 9.909 ( +0.04%) [ +0.07% +0.00% +0.08% / +0.04% +0.20% +0.30%] index_select perm : Elapsed 0.020 ms (9.912 ms / 500) 9.912 -> 9.926 ( +0.14%) [ +0.10% +0.00% +0.00% / +0.14% +0.30% +0.31%] index_select perm_sorted : Elapsed 0.020 ms (9.922 ms / 500) B = [256, 32] (stride (1, 256)) A = [256, 512] (stride (1, 256)) dim = 1 8.956 -> 8.958 ( +0.02%) [ +0.06% +0.00% +0.01% / +0.02% +0.17% +0.29%] index_select const : Elapsed 0.018 ms (8.961 ms / 500) 9.026 -> 9.039 ( +0.14%) [ +0.13% +0.08% +0.00% / +0.14% +0.16% +0.16%] index_select wrap : Elapsed 0.018 ms (9.038 ms / 500) 9.007 -> 9.017 ( +0.11%) [ +0.29% +0.13% +0.00% / +0.11% +0.68% +0.62%] index_select linear : Elapsed 0.018 ms (9.033 ms / 500) 9.009 -> 9.020 ( +0.12%) [ +0.14% +0.00% +0.01% / +0.12% +0.82% +0.88%] index_select reverse : Elapsed 0.018 ms (9.022 ms / 500) 8.964 -> 8.958 ( -0.07%) [ +0.16% +0.00% +0.20% / +0.13% -0.07% -0.07%] index_select skip64 : Elapsed 0.018 ms (8.978 ms / 500) 8.972 -> 8.953 ( -0.21%) [ +0.13% +0.03% +0.00% / +0.11% -0.21% -0.08%] index_select skip256 : Elapsed 0.018 ms (8.984 ms / 500) 9.058 -> 8.997 ( -0.67%) [ +0.00% +0.06% +0.01% / +0.02% -0.67% -0.51%] index_select spread : Elapsed 0.018 ms (9.058 ms / 500) 9.093 -> 9.020 ( -0.80%) [ +0.15% +0.00% +0.01% / +0.09% -0.76% -0.80%] index_select strided 3 : Elapsed 0.018 ms (9.107 ms / 500) 9.030 -> 9.036 ( +0.07%) [ +0.04% +0.00% +0.08% / +0.08% +0.07% +0.25%] index_select strided 5 : Elapsed 0.018 ms (9.034 ms / 500) 9.079 -> 9.021 ( -0.64%) [ +0.12% +0.00% +0.01% / +0.12% -0.52% -0.64%] index_select strided 7 : Elapsed 0.018 ms (9.090 ms / 500) 9.022 -> 9.021 ( -0.01%) [ +0.16% +0.00% +0.20% / +0.21% -0.01% +0.07%] index_select strided 8 : Elapsed 0.018 ms (9.036 ms / 500) 9.020 -> 9.018 ( -0.02%) [ +0.04% +0.02% +0.00% / +0.00% +0.09% -0.02%] index_select strided 16 : Elapsed 0.018 ms (9.024 ms / 500) 8.957 -> 8.972 ( +0.17%) [ +0.20% +0.21% +0.00% / +0.17% +0.23% +0.28%] index_select strided 64 : Elapsed 0.018 ms (8.975 ms / 500) 8.994 -> 8.995 ( +0.01%) [ +0.00% +0.03% +0.02% / +0.01% +0.64% +0.66%] index_select strided 100 : Elapsed 0.018 ms (8.994 ms / 500) 9.032 -> 9.032 ( +0.00%) [ +0.00% +0.00% +0.00% / +0.00% +0.29% +0.21%] index_select strided 255 : Elapsed 0.018 ms (9.032 ms / 500) 8.956 -> 8.950 ( -0.07%) [ +0.19% +0.02% +0.00% / +0.02% -0.07% -0.06%] index_select strided 256 : Elapsed 0.018 ms (8.973 ms / 500) 9.060 -> 9.017 ( -0.47%) [ +0.04% +0.00% +0.14% / +0.08% -0.41% -0.47%] index_select strided 257 : Elapsed 0.018 ms (9.064 ms / 500) 9.021 -> 9.008 ( -0.14%) [ +0.01% +0.07% +0.00% / -0.09% -0.14% -0.11%] index_select random : Elapsed 0.018 ms (9.022 ms / 500) 9.043 -> 9.010 ( -0.36%) [ +0.00% +0.07% +0.04% / +0.04% -0.34% -0.36%] index_select random_sorted : Elapsed 0.018 ms (9.043 ms / 500) 9.052 -> 9.017 ( -0.39%) [ +0.15% +0.00% +0.17% / +0.13% -0.39% -0.24%] index_select perm : Elapsed 0.018 ms (9.066 ms / 500) 9.037 -> 9.039 ( +0.02%) [ +0.15% +0.17% +0.00% / +0.21% +0.08% +0.02%] index_select perm_sorted : Elapsed 0.018 ms (9.051 ms / 500) out_shape = [32, 256] in_shape = [512, 256] idx_dim = 0 B = [32, 256] (stride (256, 1)) dim = 0 fill_cnt = 512 7.473 -> 7.444 ( -0.39%) [ +0.71% +0.19% +0.00% / -0.39% -0.05% -0.03%] index_fill_ const : Elapsed 0.015 ms (7.526 ms / 500) 7.672 -> 7.530 ( -1.85%) [ +0.61% +0.09% +0.00% / -0.18% -1.85% -1.66%] index_fill_ linear : Elapsed 0.015 ms (7.719 ms / 500) 7.578 -> 7.514 ( -0.84%) [ +0.55% +0.00% +0.00% / -0.40% -0.84% -0.71%] index_fill_ reverse : Elapsed 0.015 ms (7.620 ms / 500) 7.834 -> 7.465 ( -4.71%) [ +0.59% +0.00% +0.06% / +0.17% -4.58% -4.71%] index_fill_ skip64 : Elapsed 0.016 ms (7.880 ms / 500) 7.510 -> 7.477 ( -0.44%) [ +0.64% +0.00% +0.03% / -0.44% +1.36% +1.32%] index_fill_ skip256 : Elapsed 0.015 ms (7.558 ms / 500) 7.517 -> 7.486 ( -0.41%) [ +0.65% +0.00% +0.13% / -0.41% +0.37% +0.59%] index_fill_ spread : Elapsed 0.015 ms (7.566 ms / 500) 7.555 -> 7.509 ( -0.61%) [ +0.60% +0.00% +0.05% / -0.37% -0.54% -0.61%] index_fill_ strided 3 : Elapsed 0.015 ms (7.600 ms / 500) 7.571 -> 7.493 ( -1.03%) [ +0.55% +0.01% +0.00% / -0.52% -0.91% -1.03%] index_fill_ strided 5 : Elapsed 0.015 ms (7.613 ms / 500) 7.565 -> 7.524 ( -0.54%) [ +0.54% +0.00% +0.09% / -0.29% -0.54% -0.46%] index_fill_ strided 7 : Elapsed 0.015 ms (7.606 ms / 500) 7.522 -> 7.468 ( -0.72%) [ +0.53% +0.00% +0.08% / -0.72% +4.27% +4.31%] index_fill_ strided 8 : Elapsed 0.015 ms (7.562 ms / 500) 7.487 -> 7.439 ( -0.64%) [ +0.41% +0.00% +0.11% / -0.64% +3.86% +3.98%] index_fill_ strided 16 : Elapsed 0.015 ms (7.518 ms / 500) 7.567 -> 7.484 ( -1.10%) [ +0.45% +0.05% +0.00% / -0.16% -1.08% -1.10%] index_fill_ random : Elapsed 0.015 ms (7.601 ms / 500) 7.737 -> 7.487 ( -3.23%) [ +0.69% +0.31% +0.00% / +0.08% -3.23% -3.13%] index_fill_ random_sorted : Elapsed 0.016 ms (7.790 ms / 500) B = [32, 256] (stride (256, 1)) A = [512, 256] (stride (256, 1)) dim = 0 9.023 -> 8.952 ( -0.79%) [ +0.13% +0.06% +0.00% / +0.07% -0.76% -0.79%] index_select const : Elapsed 0.018 ms (9.035 ms / 500) 9.033 -> 9.048 ( +0.17%) [ +0.07% +0.12% +0.00% / +0.17% +0.31% +0.48%] index_select wrap : Elapsed 0.018 ms (9.039 ms / 500) 9.039 -> 9.050 ( +0.12%) [ +0.00% +0.04% +0.01% / +0.12% +0.33% +0.30%] index_select linear : Elapsed 0.018 ms (9.039 ms / 500) 9.024 -> 9.036 ( +0.13%) [ +0.03% +0.00% +0.01% / +0.13% +1.00% +0.96%] index_select reverse : Elapsed 0.018 ms (9.027 ms / 500) 8.942 -> 8.960 ( +0.20%) [ +0.18% +0.16% +0.00% / +0.20% +0.98% +1.10%] index_select skip64 : Elapsed 0.018 ms (8.958 ms / 500) 8.960 -> 8.959 ( -0.01%) [ +0.07% +0.00% +0.04% / -0.01% +0.40% +0.25%] index_select skip256 : Elapsed 0.018 ms (8.966 ms / 500) 8.997 -> 9.010 ( +0.14%) [ +0.04% +0.07% +0.00% / +0.14% +0.50% +0.54%] index_select spread : Elapsed 0.018 ms (9.001 ms / 500) 9.021 -> 9.024 ( +0.03%) [ +0.03% +0.07% +0.00% / +0.03% +0.25% +0.27%] index_select strided 3 : Elapsed 0.018 ms (9.024 ms / 500) 9.055 -> 9.020 ( -0.39%) [ +0.09% +0.10% +0.00% / +0.15% -0.35% -0.39%] index_select strided 5 : Elapsed 0.018 ms (9.063 ms / 500) 9.057 -> 9.032 ( -0.28%) [ +0.04% +0.00% +0.06% / +0.02% -0.28% -0.24%] index_select strided 7 : Elapsed 0.018 ms (9.061 ms / 500) 9.045 -> 9.031 ( -0.15%) [ +0.18% +0.19% +0.00% / +0.10% -0.15% -0.15%] index_select strided 8 : Elapsed 0.018 ms (9.061 ms / 500) 9.004 -> 9.009 ( +0.06%) [ +0.00% +0.02% +0.12% / +0.06% +0.14% +0.14%] index_select strided 16 : Elapsed 0.018 ms (9.004 ms / 500) 8.974 -> 8.974 ( +0.00%) [ +0.01% +0.06% +0.00% / +0.00% +0.26% +0.11%] index_select strided 64 : Elapsed 0.018 ms (8.975 ms / 500) 9.036 -> 9.033 ( -0.03%) [ +0.02% +0.00% +0.01% / -0.03% +0.11% +0.09%] index_select strided 100 : Elapsed 0.018 ms (9.038 ms / 500) 9.041 -> 9.040 ( -0.01%) [ +0.08% +0.15% +0.00% / -0.01% +0.00% +0.08%] index_select strided 255 : Elapsed 0.018 ms (9.048 ms / 500) 8.961 -> 8.959 ( -0.02%) [ +0.09% +0.11% +0.00% / -0.02% +0.17% +0.31%] index_select strided 256 : Elapsed 0.018 ms (8.969 ms / 500) 9.028 -> 9.040 ( +0.13%) [ +0.09% +0.00% +0.10% / +0.13% +0.38% +0.61%] index_select strided 257 : Elapsed 0.018 ms (9.036 ms / 500) 9.061 -> 9.023 ( -0.42%) [ +0.14% +0.09% +0.00% / +0.20% -0.28% -0.42%] index_select random : Elapsed 0.018 ms (9.074 ms / 500) 9.058 -> 9.024 ( -0.38%) [ +0.00% +0.02% +0.11% / +0.13% -0.23% -0.38%] index_select random_sorted : Elapsed 0.018 ms (9.058 ms / 500) 9.038 -> 9.031 ( -0.08%) [ +0.18% +0.01% +0.00% / +0.10% -0.04% -0.08%] index_select perm : Elapsed 0.018 ms (9.054 ms / 500) 9.043 -> 9.043 ( +0.00%) [ +0.00% +0.14% +0.09% / +0.02% +0.00% +0.06%] index_select perm_sorted : Elapsed 0.018 ms (9.043 ms / 500) B = [32, 256] (stride (256, 1)) A = [512, 256] (stride (1, 512)) dim = 0 9.124 -> 9.120 ( -0.04%) [ +0.14% +0.00% +0.03% / +0.13% -0.04% -0.04%] index_select const : Elapsed 0.018 ms (9.137 ms / 500) 9.213 -> 9.200 ( -0.14%) [ +0.14% +0.00% +0.00% / -0.02% -0.05% -0.14%] index_select wrap : Elapsed 0.018 ms (9.226 ms / 500) 9.241 -> 9.192 ( -0.53%) [ +0.06% +0.12% +0.00% / -0.04% -0.53% -0.48%] index_select linear : Elapsed 0.018 ms (9.247 ms / 500) 9.240 -> 9.185 ( -0.60%) [ +0.13% +0.00% +0.04% / -0.01% -0.60% -0.60%] index_select reverse : Elapsed 0.019 ms (9.252 ms / 500) 9.105 -> 9.107 ( +0.02%) [ +0.00% +0.09% +0.05% / +0.02% +0.23% +0.25%] index_select skip64 : Elapsed 0.018 ms (9.105 ms / 500) 9.101 -> 9.100 ( -0.01%) [ +0.04% +0.00% +0.03% / -0.01% +0.30% +0.34%] index_select skip256 : Elapsed 0.018 ms (9.105 ms / 500) 10.020 -> 10.030 ( +0.10%) [ +0.05% +0.00% +0.01% / +0.10% +1.05% +1.14%] index_select spread : Elapsed 0.020 ms (10.025 ms / 500) 9.372 -> 9.374 ( +0.02%) [ +0.09% +0.00% +0.04% / +0.02% +0.65% +0.70%] index_select strided 3 : Elapsed 0.019 ms (9.380 ms / 500) 9.573 -> 9.580 ( +0.07%) [ +0.07% +0.05% +0.00% / +0.07% +0.32% +0.32%] index_select strided 5 : Elapsed 0.019 ms (9.580 ms / 500) 9.758 -> 9.766 ( +0.08%) [ +0.00% +0.03% +0.03% / +0.08% +0.12% +0.31%] index_select strided 7 : Elapsed 0.020 ms (9.758 ms / 500) 9.838 -> 9.845 ( +0.07%) [ +0.16% +0.00% +0.20% / +0.07% +0.17% +0.20%] index_select strided 8 : Elapsed 0.020 ms (9.854 ms / 500) 10.022 -> 10.036 ( +0.14%) [ +0.06% +0.00% +0.03% / +0.14% +0.32% +0.26%] index_select strided 16 : Elapsed 0.020 ms (10.028 ms / 500) 9.459 -> 9.435 ( -0.25%) [ +0.04% +0.14% +0.00% / +0.20% -0.10% -0.25%] index_select strided 64 : Elapsed 0.019 ms (9.463 ms / 500) 10.055 -> 10.010 ( -0.45%) [ +0.00% +0.10% +0.11% / +0.14% -0.45% -0.41%] index_select strided 100 : Elapsed 0.020 ms (10.055 ms / 500) 9.391 -> 9.403 ( +0.13%) [ +0.00% +0.04% +0.11% / +0.13% +0.20% +0.21%] index_select strided 255 : Elapsed 0.019 ms (9.391 ms / 500) 9.162 -> 9.174 ( +0.13%) [ +0.24% +0.00% +0.15% / +0.13% +0.22% +0.15%] index_select strided 256 : Elapsed 0.018 ms (9.184 ms / 500) 9.372 -> 9.371 ( -0.01%) [ +0.00% +0.02% +0.05% / +0.07% -0.01% +0.01%] index_select strided 257 : Elapsed 0.019 ms (9.372 ms / 500) 9.911 -> 9.920 ( +0.09%) [ +0.11% +0.02% +0.00% / +0.09% +0.20% +0.28%] index_select random : Elapsed 0.020 ms (9.922 ms / 500) 9.870 -> 9.879 ( +0.09%) [ +0.17% +0.00% +0.07% / +0.09% +0.46% +0.69%] index_select random_sorted : Elapsed 0.020 ms (9.887 ms / 500) 9.883 -> 9.886 ( +0.03%) [ +0.00% +0.03% +0.09% / +0.03% +0.57% +0.52%] index_select perm : Elapsed 0.020 ms (9.883 ms / 500) 9.872 -> 9.863 ( -0.09%) [ +0.03% +0.09% +0.00% / -0.09% -0.02% -0.08%] index_select perm_sorted : Elapsed 0.020 ms (9.875 ms / 500) B = [32, 256] (stride (1, 32)) dim = 0 fill_cnt = 512 GOOD 9.818 -> 7.547 (-23.13%) [ +0.86% +0.00% +0.28% / -23.13% -22.98% -23.01%] index_fill_ const : Elapsed 0.020 ms (9.902 ms / 500) GOOD 9.845 -> 7.553 (-23.28%) [ +0.64% +0.08% +0.00% / -22.99% -23.28% -23.20%] index_fill_ linear : Elapsed 0.020 ms (9.908 ms / 500) GOOD 9.822 -> 7.575 (-22.88%) [ +0.97% +0.00% +0.12% / -22.88% -22.80% -22.77%] index_fill_ reverse : Elapsed 0.020 ms (9.917 ms / 500) GOOD 9.842 -> 7.509 (-23.70%) [ +0.84% +0.11% +0.00% / -23.70% -23.22% -23.26%] index_fill_ skip64 : Elapsed 0.020 ms (9.925 ms / 500) GOOD 9.829 -> 7.535 (-23.34%) [ +0.47% +0.00% +0.00% / -23.34% -22.97% -22.96%] index_fill_ skip256 : Elapsed 0.020 ms (9.875 ms / 500) GOOD 9.863 -> 7.553 (-23.42%) [ +0.49% +0.05% +0.00% / -23.21% -23.36% -23.42%] index_fill_ spread : Elapsed 0.020 ms (9.911 ms / 500) GOOD 9.809 -> 7.562 (-22.91%) [ +0.69% +0.21% +0.00% / -22.91% -22.88% -22.76%] index_fill_ strided 3 : Elapsed 0.020 ms (9.877 ms / 500) GOOD 9.845 -> 7.556 (-23.25%) [ +0.76% +0.14% +0.00% / -23.25% -22.89% -22.83%] index_fill_ strided 5 : Elapsed 0.020 ms (9.920 ms / 500) GOOD 9.853 -> 7.577 (-23.10%) [ +0.81% +0.00% +0.16% / -23.10% -23.07% -23.08%] index_fill_ strided 7 : Elapsed 0.020 ms (9.933 ms / 500) GOOD 9.927 -> 7.618 (-23.26%) [ +0.64% +0.00% +0.01% / -23.22% -23.26% -23.16%] index_fill_ strided 8 : Elapsed 0.020 ms (9.991 ms / 500) GOOD 9.901 -> 7.565 (-23.59%) [ +0.42% +0.01% +0.00% / -23.59% -23.19% -23.11%] index_fill_ strided 16 : Elapsed 0.020 ms (9.943 ms / 500) GOOD 9.868 -> 7.574 (-23.25%) [ +0.64% +0.00% +0.21% / -23.25% -23.12% -23.19%] index_fill_ random : Elapsed 0.020 ms (9.931 ms / 500) GOOD 9.864 -> 7.562 (-23.34%) [ +0.62% +0.15% +0.00% / -23.12% -23.26% -23.34%] index_fill_ random_sorted : Elapsed 0.020 ms (9.925 ms / 500) B = [32, 256] (stride (1, 32)) A = [512, 256] (stride (256, 1)) dim = 0 9.060 -> 8.956 ( -1.15%) [ +0.04% +0.11% +0.00% / -0.91% -1.05% -1.15%] index_select const : Elapsed 0.018 ms (9.064 ms / 500) 9.171 -> 9.155 ( -0.17%) [ +0.05% +0.10% +0.00% / +0.33% -0.17% -0.07%] index_select wrap : Elapsed 0.018 ms (9.176 ms / 500) 9.173 -> 9.160 ( -0.14%) [ +0.00% +0.07% +0.00% / +0.28% -0.03% -0.14%] index_select linear : Elapsed 0.018 ms (9.173 ms / 500) 9.147 -> 9.179 ( +0.35%) [ +0.00% +0.00% +0.03% / +0.35% +0.47% +0.44%] index_select reverse : Elapsed 0.018 ms (9.147 ms / 500) 9.064 -> 8.965 ( -1.09%) [ +0.12% +0.08% +0.00% / -0.93% -1.02% -1.09%] index_select skip64 : Elapsed 0.018 ms (9.075 ms / 500) 9.058 -> 8.960 ( -1.08%) [ +0.17% +0.07% +0.00% / -1.03% -1.08% -1.08%] index_select skip256 : Elapsed 0.018 ms (9.073 ms / 500) 9.100 -> 9.156 ( +0.62%) [ +0.10% +0.00% +0.04% / +0.92% +0.63% +0.62%] index_select spread : Elapsed 0.018 ms (9.109 ms / 500) 9.116 -> 9.180 ( +0.70%) [ +0.19% +0.00% +0.18% / +0.70% +0.88% +0.87%] index_select strided 3 : Elapsed 0.018 ms (9.133 ms / 500) 9.112 -> 9.160 ( +0.53%) [ +0.13% +0.03% +0.00% / +0.53% +0.85% +0.74%] index_select strided 5 : Elapsed 0.018 ms (9.124 ms / 500) 9.116 -> 9.161 ( +0.49%) [ +0.05% +0.00% +0.03% / +0.49% +1.14% +1.10%] index_select strided 7 : Elapsed 0.018 ms (9.121 ms / 500) 9.130 -> 9.170 ( +0.44%) [ +0.04% +0.02% +0.00% / +0.48% +0.44% +0.46%] index_select strided 8 : Elapsed 0.018 ms (9.134 ms / 500) 9.090 -> 9.171 ( +0.89%) [ +0.13% +0.00% +0.07% / +0.89% +0.92% +0.89%] index_select strided 16 : Elapsed 0.018 ms (9.102 ms / 500) 9.050 -> 8.978 ( -0.80%) [ +0.08% +0.00% +0.00% / -0.80% -0.60% -0.63%] index_select strided 64 : Elapsed 0.018 ms (9.057 ms / 500) 9.121 -> 9.166 ( +0.49%) [ +0.26% +0.10% +0.00% / +0.49% +0.65% +0.62%] index_select strided 100 : Elapsed 0.018 ms (9.145 ms / 500) 9.131 -> 9.212 ( +0.89%) [ +0.07% +0.00% +0.02% / +1.10% +0.89% +0.99%] index_select strided 255 : Elapsed 0.018 ms (9.137 ms / 500) 9.062 -> 8.969 ( -1.03%) [ +0.03% +0.11% +0.00% / -0.93% -1.02% -1.03%] index_select strided 256 : Elapsed 0.018 ms (9.065 ms / 500) 9.128 -> 9.224 ( +1.05%) [ +0.09% +0.00% +0.01% / +1.06% +1.13% +1.05%] index_select strided 257 : Elapsed 0.018 ms (9.136 ms / 500) 9.077 -> 9.151 ( +0.82%) [ +0.13% +0.00% +0.11% / +0.82% +2.40% +2.31%] index_select random : Elapsed 0.018 ms (9.089 ms / 500) 9.081 -> 9.141 ( +0.66%) [ +0.08% +0.03% +0.00% / +0.66% +2.21% +2.27%] index_select random_sorted : Elapsed 0.018 ms (9.088 ms / 500) 9.129 -> 9.183 ( +0.59%) [ +0.04% +0.00% +0.08% / +0.95% +0.59% +0.67%] index_select perm : Elapsed 0.018 ms (9.133 ms / 500) 9.134 -> 9.187 ( +0.58%) [ +0.10% +0.00% +0.01% / +0.85% +0.58% +0.72%] index_select perm_sorted : Elapsed 0.018 ms (9.143 ms / 500) B = [32, 256] (stride (1, 32)) A = [512, 256] (stride (1, 512)) dim = 0 9.187 -> 9.038 ( -1.62%) [ +0.17% +0.10% +0.00% / -1.59% -1.56% -1.62%] index_select const : Elapsed 0.018 ms (9.203 ms / 500) 9.282 -> 9.047 ( -2.53%) [ +0.08% +0.83% +0.00% / -2.53% -2.46% -2.46%] index_select wrap : Elapsed 0.019 ms (9.289 ms / 500) 9.257 -> 9.039 ( -2.35%) [ +0.00% +0.12% +0.01% / -2.35% -1.87% -1.89%] index_select linear : Elapsed 0.019 ms (9.257 ms / 500) 9.258 -> 9.051 ( -2.24%) [ +0.12% +0.06% +0.00% / -2.24% -1.88% -1.87%] index_select reverse : Elapsed 0.019 ms (9.269 ms / 500) 9.195 -> 9.023 ( -1.87%) [ +0.04% +0.13% +0.00% / -1.64% -1.84% -1.87%] index_select skip64 : Elapsed 0.018 ms (9.199 ms / 500) 9.207 -> 9.023 ( -2.00%) [ +0.03% +0.01% +0.00% / -1.72% -2.00% -1.96%] index_select skip256 : Elapsed 0.018 ms (9.210 ms / 500) good 10.140 -> 9.604 ( -5.29%) [ +0.06% +0.00% +0.12% / -4.78% -5.20% -5.29%] index_select spread : Elapsed 0.020 ms (10.146 ms / 500) 9.491 -> 9.176 ( -3.32%) [ +0.08% +0.07% +0.00% / -2.79% -3.32% -3.31%] index_select strided 3 : Elapsed 0.019 ms (9.499 ms / 500) 9.670 -> 9.306 ( -3.76%) [ +0.00% +0.01% +0.06% / -3.39% -3.76% -3.66%] index_select strided 5 : Elapsed 0.019 ms (9.670 ms / 500) 9.853 -> 9.438 ( -4.21%) [ +0.05% +0.05% +0.00% / -3.79% -4.21% -4.11%] index_select strided 7 : Elapsed 0.020 ms (9.858 ms / 500) 9.907 -> 9.481 ( -4.30%) [ +0.00% +0.00% +0.07% / -4.22% -4.30% -4.26%] index_select strided 8 : Elapsed 0.020 ms (9.907 ms / 500) 10.093 -> 9.607 ( -4.82%) [ +0.09% +0.14% +0.00% / -4.76% -4.77% -4.82%] index_select strided 16 : Elapsed 0.020 ms (10.102 ms / 500) 9.491 -> 9.208 ( -2.98%) [ +0.19% +0.04% +0.00% / -2.98% -2.76% -2.76%] index_select strided 64 : Elapsed 0.019 ms (9.509 ms / 500) 10.071 -> 9.743 ( -3.26%) [ +0.13% +0.00% +0.03% / -3.26% -3.12% -3.02%] index_select strided 100 : Elapsed 0.020 ms (10.084 ms / 500) 9.462 -> 9.163 ( -3.16%) [ +0.00% +0.01% +0.00% / -2.94% -3.16% -2.92%] index_select strided 255 : Elapsed 0.019 ms (9.462 ms / 500) 9.248 -> 9.093 ( -1.68%) [ +0.17% +0.03% +0.00% / -1.55% -1.64% -1.68%] index_select strided 256 : Elapsed 0.019 ms (9.264 ms / 500) 9.447 -> 9.113 ( -3.54%) [ +0.00% +0.05% +0.05% / -3.31% -3.44% -3.54%] index_select strided 257 : Elapsed 0.019 ms (9.447 ms / 500) 9.916 -> 9.548 ( -3.71%) [ +0.07% +0.00% +0.14% / -3.57% -3.71% -3.62%] index_select random : Elapsed 0.020 ms (9.923 ms / 500) 9.911 -> 9.535 ( -3.79%) [ +0.20% +0.00% +0.02% / -3.49% -3.79% -3.68%] index_select random_sorted : Elapsed 0.020 ms (9.931 ms / 500) 10.023 -> 9.564 ( -4.58%) [ +0.01% +0.00% +0.08% / -4.36% -4.53% -4.58%] index_select perm : Elapsed 0.020 ms (10.024 ms / 500) 9.928 -> 9.562 ( -3.69%) [ +0.22% +0.00% +0.08% / -3.69% -3.65% -3.64%] index_select perm_sorted : Elapsed 0.020 ms (9.950 ms / 500) out_shape = [512, 32] in_shape = [512, 256] idx_dim = 1 B = [512, 32] (stride (32, 1)) dim = 1 fill_cnt = 256 GOOD 13.842 -> 10.904 (-21.23%) [ +0.09% +0.00% +0.14% / -21.12% -21.20% -21.23%] index_fill_ const : Elapsed 0.028 ms (13.854 ms / 500) GOOD 13.891 -> 10.966 (-21.06%) [ +0.30% +0.00% +0.03% / -20.93% -21.04% -21.06%] index_fill_ linear : Elapsed 0.028 ms (13.932 ms / 500) GOOD 13.947 -> 10.986 (-21.23%) [ +0.02% +0.00% +0.09% / -21.23% -21.14% -21.12%] index_fill_ reverse : Elapsed 0.028 ms (13.950 ms / 500) GOOD 13.803 -> 10.914 (-20.93%) [ +0.25% +0.07% +0.00% / -20.93% -20.84% -20.89%] index_fill_ skip64 : Elapsed 0.028 ms (13.838 ms / 500) GOOD 13.825 -> 10.915 (-21.05%) [ +0.14% +0.00% +0.07% / -21.05% -20.84% -20.87%] index_fill_ skip256 : Elapsed 0.028 ms (13.845 ms / 500) GOOD 14.093 -> 10.952 (-22.29%) [ +0.06% +0.07% +0.00% / -22.29% -22.10% -22.12%] index_fill_ spread : Elapsed 0.028 ms (14.102 ms / 500) GOOD 13.985 -> 10.968 (-21.57%) [ +0.04% +0.00% +0.05% / -21.54% -21.57% -21.56%] index_fill_ strided 3 : Elapsed 0.028 ms (13.990 ms / 500) GOOD 14.284 -> 10.975 (-23.17%) [ +0.15% +0.00% +0.13% / -23.15% -23.17% -23.17%] index_fill_ strided 5 : Elapsed 0.029 ms (14.306 ms / 500) GOOD 13.917 -> 10.973 (-21.15%) [ +0.22% +0.11% +0.00% / -21.15% -21.15% -21.14%] index_fill_ strided 7 : Elapsed 0.028 ms (13.947 ms / 500) GOOD 14.020 -> 11.087 (-20.92%) [ +0.04% +0.00% +0.01% / -20.61% -20.92% -20.73%] index_fill_ strided 8 : Elapsed 0.028 ms (14.026 ms / 500) GOOD 13.937 -> 10.972 (-21.27%) [ +0.24% +0.23% +0.00% / -21.27% -21.10% -21.02%] index_fill_ strided 16 : Elapsed 0.028 ms (13.970 ms / 500) GOOD 13.865 -> 10.993 (-20.71%) [ +0.26% +0.09% +0.00% / -20.59% -20.71% -20.71%] index_fill_ random : Elapsed 0.028 ms (13.901 ms / 500) GOOD 13.974 -> 10.982 (-21.41%) [ +0.08% +0.00% +0.14% / -21.41% -21.31% -21.38%] index_fill_ random_sorted : Elapsed 0.028 ms (13.985 ms / 500) B = [512, 32] (stride (32, 1)) A = [512, 256] (stride (256, 1)) dim = 1 17.714 -> 17.508 ( -1.16%) [ +0.00% +0.02% +0.00% / -0.90% -1.15% -1.16%] index_select const : Elapsed 0.035 ms (17.714 ms / 500) 17.926 -> 17.566 ( -2.01%) [ +0.02% +0.08% +0.00% / -2.01% -1.94% -1.83%] index_select wrap : Elapsed 0.036 ms (17.929 ms / 500) 17.920 -> 17.590 ( -1.84%) [ +0.13% +0.00% +0.10% / -1.84% -1.79% -1.80%] index_select linear : Elapsed 0.036 ms (17.943 ms / 500) 18.034 -> 17.579 ( -2.52%) [ +0.16% +0.01% +0.00% / -2.48% -2.52% -2.49%] index_select reverse : Elapsed 0.036 ms (18.062 ms / 500) 17.824 -> 17.532 ( -1.64%) [ +0.06% +0.00% +0.01% / -1.51% -1.59% -1.64%] index_select skip64 : Elapsed 0.036 ms (17.835 ms / 500) 17.771 -> 17.533 ( -1.34%) [ +0.01% +0.07% +0.00% / -1.22% -1.34% -1.17%] index_select skip256 : Elapsed 0.036 ms (17.772 ms / 500) 19.263 -> 18.449 ( -4.23%) [ +0.06% +0.01% +0.00% / -4.09% -4.23% -4.10%] index_select spread : Elapsed 0.039 ms (19.274 ms / 500) 18.333 -> 17.866 ( -2.55%) [ +0.00% +0.08% +0.01% / -2.48% -2.55% -2.50%] index_select strided 3 : Elapsed 0.037 ms (18.333 ms / 500) 18.688 -> 18.077 ( -3.27%) [ +0.00% +0.02% +0.05% / -3.21% -3.18% -3.27%] index_select strided 5 : Elapsed 0.037 ms (18.688 ms / 500) 19.047 -> 18.368 ( -3.56%) [ +0.01% +0.00% +0.07% / -3.44% -3.56% -3.48%] index_select strided 7 : Elapsed 0.038 ms (19.048 ms / 500) 19.219 -> 18.446 ( -4.02%) [ +0.00% +0.03% +0.06% / -3.76% -4.02% -3.86%] index_select strided 8 : Elapsed 0.038 ms (19.219 ms / 500) 18.599 -> 18.012 ( -3.16%) [ +0.19% +0.16% +0.00% / -2.94% -3.11% -3.16%] index_select strided 16 : Elapsed 0.037 ms (18.634 ms / 500) 17.990 -> 17.710 ( -1.56%) [ +0.14% +0.00% +0.09% / -1.35% -1.56% -1.54%] index_select strided 64 : Elapsed 0.036 ms (18.016 ms / 500) 19.036 -> 18.505 ( -2.79%) [ +0.00% +0.06% +0.04% / -2.78% -2.76% -2.79%] index_select strided 100 : Elapsed 0.038 ms (19.036 ms / 500) 17.975 -> 17.658 ( -1.76%) [ +0.02% +0.10% +0.00% / -1.76% -1.75% -1.73%] index_select strided 255 : Elapsed 0.036 ms (17.978 ms / 500) 18.811 -> 18.263 ( -2.91%) [ +0.00% +0.02% +0.11% / -2.77% -2.91% -2.90%] index_select random : Elapsed 0.038 ms (18.811 ms / 500) 18.809 -> 18.246 ( -2.99%) [ +0.16% +0.13% +0.00% / -2.91% -2.99% -2.98%] index_select random_sorted : Elapsed 0.038 ms (18.839 ms / 500) 18.770 -> 18.208 ( -2.99%) [ +0.13% +0.06% +0.00% / -2.85% -2.91% -2.99%] index_select perm : Elapsed 0.038 ms (18.794 ms / 500) 18.760 -> 18.196 ( -3.01%) [ +0.14% +0.04% +0.00% / -2.80% -3.01% -2.95%] index_select perm_sorted : Elapsed 0.038 ms (18.787 ms / 500) B = [512, 32] (stride (32, 1)) A = [512, 256] (stride (1, 512)) dim = 1 17.543 -> 17.440 ( -0.59%) [ +0.00% +0.07% +0.02% / -0.59% -0.58% -0.53%] index_select const : Elapsed 0.035 ms (17.543 ms / 500) 17.673 -> 17.671 ( -0.01%) [ +0.00% +0.11% +0.06% / -0.01% +0.28% +0.25%] index_select wrap : Elapsed 0.035 ms (17.673 ms / 500) 17.784 -> 17.698 ( -0.48%) [ +0.12% +0.00% +0.05% / -0.47% -0.43% -0.48%] index_select linear : Elapsed 0.036 ms (17.805 ms / 500) 17.800 -> 17.689 ( -0.62%) [ +0.03% +0.00% +0.03% / -0.58% -0.62% -0.53%] index_select reverse : Elapsed 0.036 ms (17.805 ms / 500) 17.554 -> 17.428 ( -0.72%) [ +0.05% +0.00% +0.11% / -0.57% -0.66% -0.72%] index_select skip64 : Elapsed 0.035 ms (17.562 ms / 500) 17.561 -> 17.428 ( -0.76%) [ +0.03% +0.00% +0.05% / -0.58% -0.70% -0.76%] index_select skip256 : Elapsed 0.035 ms (17.566 ms / 500) 17.648 -> 17.687 ( +0.22%) [ +0.08% +0.00% +0.07% / +0.27% +0.25% +0.22%] index_select spread : Elapsed 0.035 ms (17.663 ms / 500) 17.637 -> 17.696 ( +0.33%) [ +0.00% +0.02% +0.07% / +0.33% +0.92% +0.90%] index_select strided 3 : Elapsed 0.035 ms (17.637 ms / 500) 17.740 -> 17.707 ( -0.19%) [ +0.03% +0.00% +0.06% / +0.60% -0.17% -0.19%] index_select strided 5 : Elapsed 0.035 ms (17.745 ms / 500) 17.633 -> 17.714 ( +0.46%) [ +0.11% +0.00% +0.05% / +0.46% +0.82% +0.86%] index_select strided 7 : Elapsed 0.035 ms (17.652 ms / 500) 17.668 -> 17.689 ( +0.12%) [ +0.15% +0.09% +0.00% / +0.18% +0.26% +0.12%] index_select strided 8 : Elapsed 0.035 ms (17.694 ms / 500) 17.617 -> 17.591 ( -0.15%) [ +0.00% +0.02% +0.09% / -0.15% -0.11% -0.10%] index_select strided 16 : Elapsed 0.035 ms (17.617 ms / 500) 17.600 -> 17.499 ( -0.57%) [ +0.11% +0.00% +0.10% / -0.57% -0.51% -0.53%] index_select strided 64 : Elapsed 0.035 ms (17.620 ms / 500) 17.703 -> 17.723 ( +0.11%) [ +0.11% +0.01% +0.00% / +0.11% +0.23% +0.19%] index_select strided 100 : Elapsed 0.035 ms (17.722 ms / 500) 17.672 -> 17.673 ( +0.01%) [ +0.02% +0.01% +0.00% / +0.06% +0.01% +0.03%] index_select strided 255 : Elapsed 0.035 ms (17.675 ms / 500) 17.700 -> 17.701 ( +0.01%) [ +0.08% +0.08% +0.00% / +0.23% +0.01% +0.02%] index_select random : Elapsed 0.035 ms (17.714 ms / 500) 17.714 -> 17.664 ( -0.28%) [ +0.01% +0.00% +0.05% / +0.25% -0.19% -0.28%] index_select random_sorted : Elapsed 0.035 ms (17.716 ms / 500) 17.720 -> 17.681 ( -0.22%) [ +0.08% +0.00% +0.07% / +0.47% -0.22% -0.16%] index_select perm : Elapsed 0.035 ms (17.735 ms / 500) 17.738 -> 17.707 ( -0.17%) [ +0.02% +0.00% +0.02% / +0.41% -0.14% -0.17%] index_select perm_sorted : Elapsed 0.035 ms (17.742 ms / 500) B = [512, 32] (stride (1, 512)) dim = 1 fill_cnt = 256 11.075 -> 11.008 ( -0.60%) [ +0.23% +0.00% +0.02% / -0.60% -0.59% -0.54%] index_fill_ const : Elapsed 0.022 ms (11.101 ms / 500) 11.126 -> 11.089 ( -0.33%) [ +0.28% +0.13% +0.00% / -0.33% -0.33% -0.19%] index_fill_ linear : Elapsed 0.022 ms (11.157 ms / 500) 11.125 -> 11.064 ( -0.55%) [ +0.17% +0.10% +0.00% / -0.39% -0.55% -0.50%] index_fill_ reverse : Elapsed 0.022 ms (11.144 ms / 500) 11.051 -> 10.993 ( -0.52%) [ +0.25% +0.21% +0.00% / -0.25% -0.49% -0.52%] index_fill_ skip64 : Elapsed 0.022 ms (11.079 ms / 500) 11.059 -> 10.994 ( -0.59%) [ +0.07% +0.01% +0.00% / -0.58% -0.59% +0.09%] index_fill_ skip256 : Elapsed 0.022 ms (11.067 ms / 500) 11.060 -> 10.982 ( -0.71%) [ +0.20% +0.03% +0.00% / -0.47% -0.71% -0.68%] index_fill_ spread : Elapsed 0.022 ms (11.082 ms / 500) 11.108 -> 11.023 ( -0.77%) [ +0.27% +0.00% +0.14% / -0.24% -0.77% -0.68%] index_fill_ strided 3 : Elapsed 0.022 ms (11.138 ms / 500) 11.085 -> 11.022 ( -0.57%) [ +0.38% +0.01% +0.00% / -0.57% +0.13% +0.27%] index_fill_ strided 5 : Elapsed 0.022 ms (11.127 ms / 500) 11.091 -> 10.997 ( -0.85%) [ +0.23% +0.20% +0.00% / -0.46% -0.85% -0.75%] index_fill_ strided 7 : Elapsed 0.022 ms (11.116 ms / 500) 11.074 -> 11.015 ( -0.53%) [ +0.36% +0.14% +0.00% / -0.40% -0.40% -0.53%] index_fill_ strided 8 : Elapsed 0.022 ms (11.114 ms / 500) 11.050 -> 11.003 ( -0.43%) [ +0.33% +0.25% +0.00% / -0.37% -0.37% -0.43%] index_fill_ strided 16 : Elapsed 0.022 ms (11.087 ms / 500) 11.078 -> 11.005 ( -0.66%) [ +0.18% +0.00% +0.07% / -0.43% -0.66% -0.56%] index_fill_ random : Elapsed 0.022 ms (11.098 ms / 500) 11.044 -> 10.985 ( -0.53%) [ +0.38% +0.17% +0.00% / -0.25% -0.53% -0.49%] index_fill_ random_sorted : Elapsed 0.022 ms (11.086 ms / 500) B = [512, 32] (stride (1, 512)) A = [512, 256] (stride (256, 1)) dim = 1 17.614 -> 17.605 ( -0.05%) [ +0.05% +0.00% +0.01% / -0.01% -0.05% -0.01%] index_select const : Elapsed 0.035 ms (17.622 ms / 500) 17.891 -> 17.821 ( -0.39%) [ +0.02% +0.20% +0.00% / +0.14% -0.32% -0.39%] index_select wrap : Elapsed 0.036 ms (17.894 ms / 500) 17.913 -> 17.803 ( -0.61%) [ +0.09% +0.00% +0.06% / -0.08% -0.61% -0.48%] index_select linear : Elapsed 0.036 ms (17.929 ms / 500) 17.814 -> 17.814 ( +0.00%) [ +0.15% +0.06% +0.00% / +0.00% +0.09% +0.16%] index_select reverse : Elapsed 0.036 ms (17.840 ms / 500) 17.591 -> 17.603 ( +0.07%) [ +0.10% +0.03% +0.00% / +0.07% +0.15% +0.20%] index_select skip64 : Elapsed 0.035 ms (17.608 ms / 500) 17.608 -> 17.611 ( +0.02%) [ +0.02% +0.01% +0.00% / +0.02% +0.02% +0.06%] index_select skip256 : Elapsed 0.035 ms (17.611 ms / 500) 19.134 -> 19.129 ( -0.03%) [ +0.05% +0.05% +0.00% / +0.00% -0.03% +0.03%] index_select spread : Elapsed 0.038 ms (19.143 ms / 500) 18.237 -> 18.202 ( -0.19%) [ +0.00% +0.10% +0.07% / +0.16% -0.13% -0.19%] index_select strided 3 : Elapsed 0.036 ms (18.237 ms / 500) 18.619 -> 18.562 ( -0.31%) [ +0.06% +0.04% +0.00% / +0.11% -0.18% -0.31%] index_select strided 5 : Elapsed 0.037 ms (18.630 ms / 500) 18.971 -> 18.921 ( -0.26%) [ +0.05% +0.00% +0.06% / -0.01% -0.26% -0.25%] index_select strided 7 : Elapsed 0.038 ms (18.981 ms / 500) 19.129 -> 19.103 ( -0.14%) [ +0.13% +0.00% +0.10% / +0.15% -0.14% -0.09%] index_select strided 8 : Elapsed 0.038 ms (19.154 ms / 500) 18.490 -> 18.505 ( +0.08%) [ +0.04% +0.00% +0.05% / +0.08% +0.10% +0.12%] index_select strided 16 : Elapsed 0.037 ms (18.497 ms / 500) 17.862 -> 17.879 ( +0.10%) [ +0.01% +0.00% +0.08% / +0.10% +0.19% +0.21%] index_select strided 64 : Elapsed 0.036 ms (17.864 ms / 500) 18.972 -> 18.962 ( -0.05%) [ +0.20% +0.00% +0.07% / +0.18% -0.01% -0.05%] index_select strided 100 : Elapsed 0.038 ms (19.009 ms / 500) 17.927 -> 17.894 ( -0.18%) [ +0.00% +0.08% +0.13% / +0.04% -0.18% -0.16%] index_select strided 255 : Elapsed 0.036 ms (17.927 ms / 500) 18.688 -> 18.707 ( +0.10%) [ +0.19% +0.00% +0.05% / +0.14% +0.10% +0.18%] index_select random : Elapsed 0.037 ms (18.723 ms / 500) 18.700 -> 18.700 ( +0.00%) [ +0.09% +0.00% +0.03% / +0.00% +0.02% +0.07%] index_select random_sorted : Elapsed 0.037 ms (18.716 ms / 500) 18.625 -> 18.619 ( -0.03%) [ +0.04% +0.01% +0.00% / -0.03% +0.11% +0.01%] index_select perm : Elapsed 0.037 ms (18.632 ms / 500) 18.614 -> 18.615 ( +0.01%) [ +0.02% +0.02% +0.00% / +0.01% +0.09% +0.07%] index_select perm_sorted : Elapsed 0.037 ms (18.618 ms / 500) B = [512, 32] (stride (1, 512)) A = [512, 256] (stride (1, 512)) dim = 1 17.507 -> 17.416 ( -0.52%) [ +0.09% +0.06% +0.00% / +0.08% -0.46% -0.52%] index_select const : Elapsed 0.035 ms (17.522 ms / 500) 17.637 -> 17.572 ( -0.37%) [ +0.09% +0.00% +0.05% / +0.07% -0.32% -0.37%] index_select wrap : Elapsed 0.035 ms (17.652 ms / 500) 17.545 -> 17.561 ( +0.09%) [ +0.15% +0.12% +0.00% / +0.13% +0.09% +0.10%] index_select linear : Elapsed 0.035 ms (17.572 ms / 500) 17.551 -> 17.564 ( +0.07%) [ +0.10% +0.13% +0.00% / +0.07% +0.15% +0.16%] index_select reverse : Elapsed 0.035 ms (17.568 ms / 500) 17.411 -> 17.435 ( +0.14%) [ +0.00% +0.13% +0.12% / +0.14% +0.54% +0.56%] index_select skip64 : Elapsed 0.035 ms (17.411 ms / 500) 17.409 -> 17.424 ( +0.09%) [ +0.07% +0.00% +0.09% / +0.09% +0.56% +0.60%] index_select skip256 : Elapsed 0.035 ms (17.421 ms / 500) 17.544 -> 17.559 ( +0.09%) [ +0.01% +0.00% +0.10% / +0.09% +0.10% +0.18%] index_select spread : Elapsed 0.035 ms (17.545 ms / 500) 17.518 -> 17.528 ( +0.06%) [ +0.06% +0.14% +0.00% / +0.06% +0.56% +0.59%] index_select strided 3 : Elapsed 0.035 ms (17.529 ms / 500) 17.614 -> 17.524 ( -0.51%) [ +0.03% +0.00% +0.10% / +0.11% -0.45% -0.51%] index_select strided 5 : Elapsed 0.035 ms (17.620 ms / 500) 17.529 -> 17.541 ( +0.07%) [ +0.03% +0.06% +0.00% / +0.07% +0.21% +0.18%] index_select strided 7 : Elapsed 0.035 ms (17.535 ms / 500) 17.540 -> 17.553 ( +0.07%) [ +0.12% +0.11% +0.00% / +0.07% +0.39% +0.38%] index_select strided 8 : Elapsed 0.035 ms (17.561 ms / 500) 17.483 -> 17.507 ( +0.14%) [ +0.09% +0.06% +0.00% / +0.14% +0.38% +0.27%] index_select strided 16 : Elapsed 0.035 ms (17.499 ms / 500) 17.442 -> 17.452 ( +0.06%) [ +0.06% +0.05% +0.00% / +0.06% +0.11% +0.15%] index_select strided 64 : Elapsed 0.035 ms (17.452 ms / 500) 17.550 -> 17.559 ( +0.05%) [ +0.05% +0.07% +0.00% / +0.05% +0.10% +0.14%] index_select strided 100 : Elapsed 0.035 ms (17.559 ms / 500) 17.587 -> 17.538 ( -0.28%) [ +0.05% +0.04% +0.00% / +0.11% -0.28% -0.16%] index_select strided 255 : Elapsed 0.035 ms (17.596 ms / 500) 17.570 -> 17.573 ( +0.02%) [ +0.00% +0.08% +0.02% / +0.02% +0.14% +0.06%] index_select random : Elapsed 0.035 ms (17.570 ms / 500) 17.520 -> 17.513 ( -0.04%) [ +0.02% +0.08% +0.00% / -0.04% +0.68% +0.61%] index_select random_sorted : Elapsed 0.035 ms (17.524 ms / 500) 17.649 -> 17.580 ( -0.39%) [ +0.02% +0.00% +0.07% / +0.05% -0.35% -0.39%] index_select perm : Elapsed 0.035 ms (17.652 ms / 500) 17.670 -> 17.541 ( -0.73%) [ +0.03% +0.02% +0.00% / +0.09% -0.68% -0.73%] index_select perm_sorted : Elapsed 0.035 ms (17.676 ms / 500) out_shape = [256, 512] in_shape = [32, 512] idx_dim = 0 B = [256, 512] (stride (512, 1)) dim = 0 fill_cnt = 32 11.107 -> 11.098 ( -0.08%) [ +0.00% +0.21% +0.42% / -0.04% +0.13% -0.08%] index_fill_ const : Elapsed 0.022 ms (11.107 ms / 500) 11.254 -> 11.182 ( -0.64%) [ +0.00% +0.08% +0.06% / -0.13% -0.43% -0.64%] index_fill_ linear : Elapsed 0.023 ms (11.254 ms / 500) 11.235 -> 11.196 ( -0.35%) [ +0.02% +0.00% +0.22% / +0.08% -0.35% -0.23%] index_fill_ reverse : Elapsed 0.022 ms (11.237 ms / 500) 11.176 -> 11.104 ( -0.64%) [ +0.04% +0.03% +0.00% / -0.64% -0.64% -0.57%] index_fill_ skip64 : Elapsed 0.022 ms (11.180 ms / 500) 11.158 -> 11.087 ( -0.64%) [ +0.27% +0.20% +0.00% / -0.13% -0.47% -0.64%] index_fill_ skip256 : Elapsed 0.022 ms (11.188 ms / 500) 11.294 -> 11.279 ( -0.13%) [ +0.00% +0.00% +0.11% / +0.12% -0.13% +0.17%] index_fill_ spread : Elapsed 0.023 ms (11.294 ms / 500) 11.253 -> 11.250 ( -0.03%) [ +0.00% +0.19% +0.34% / +0.30% +0.00% -0.03%] index_fill_ strided 3 : Elapsed 0.023 ms (11.253 ms / 500) 11.241 -> 11.222 ( -0.17%) [ +0.00% +0.13% +0.44% / +0.31% -0.15% -0.17%] index_fill_ strided 5 : Elapsed 0.022 ms (11.241 ms / 500) 11.224 -> 11.208 ( -0.14%) [ +0.21% +0.00% +0.13% / +0.20% -0.04% -0.14%] index_fill_ strided 7 : Elapsed 0.022 ms (11.248 ms / 500) 11.306 -> 11.285 ( -0.19%) [ +0.37% +0.00% +0.32% / +0.06% -0.15% -0.19%] index_fill_ strided 8 : Elapsed 0.023 ms (11.348 ms / 500) 11.177 -> 11.192 ( +0.13%) [ +0.30% +0.00% +0.31% / +0.52% +0.13% +0.23%] index_fill_ strided 16 : Elapsed 0.022 ms (11.210 ms / 500) 11.150 -> 11.129 ( -0.19%) [ +0.05% +0.00% +0.12% / -0.19% +0.26% +0.39%] index_fill_ strided 64 : Elapsed 0.022 ms (11.156 ms / 500) 11.215 -> 11.236 ( +0.19%) [ +0.24% +0.00% +0.12% / +0.21% +0.61% +0.19%] index_fill_ strided 100 : Elapsed 0.022 ms (11.242 ms / 500) 11.255 -> 11.211 ( -0.39%) [ +0.00% +0.02% +0.08% / +0.26% -0.39% -0.32%] index_fill_ strided 255 : Elapsed 0.023 ms (11.255 ms / 500) 11.248 -> 11.190 ( -0.52%) [ +0.20% +0.41% +0.00% / +0.16% -0.32% -0.52%] index_fill_ random : Elapsed 0.023 ms (11.270 ms / 500) 11.207 -> 11.215 ( +0.07%) [ +0.45% +0.15% +0.00% / +0.16% +0.29% +0.07%] index_fill_ random_sorted : Elapsed 0.023 ms (11.257 ms / 500) 11.212 -> 11.236 ( +0.21%) [ +0.60% +0.21% +0.00% / +0.21% +0.37% +0.38%] index_fill_ perm : Elapsed 0.023 ms (11.279 ms / 500) 11.213 -> 11.205 ( -0.07%) [ +0.32% +0.06% +0.00% / -0.07% +0.73% +0.45%] index_fill_ perm_sorted : Elapsed 0.022 ms (11.249 ms / 500) B = [256, 512] (stride (512, 1)) A = [32, 512] (stride (512, 1)) dim = 0 17.894 -> 17.733 ( -0.90%) [ +0.00% +0.02% +0.04% / -0.90% -0.35% -0.29%] index_add_ linear : Elapsed 0.036 ms (17.894 ms / 500) 17.724 -> 17.578 ( -0.82%) [ +0.03% +0.00% +0.05% / -0.82% -0.39% -0.35%] index_copy_ linear : Elapsed 0.035 ms (17.729 ms / 500) 17.871 -> 17.726 ( -0.81%) [ +0.03% +0.04% +0.00% / -0.81% -0.64% -0.65%] index_add_ reverse : Elapsed 0.036 ms (17.876 ms / 500) 17.705 -> 17.583 ( -0.69%) [ +0.00% +0.06% +0.08% / -0.69% -0.64% -0.65%] index_copy_ reverse : Elapsed 0.035 ms (17.705 ms / 500) 17.880 -> 17.730 ( -0.84%) [ +0.07% +0.00% +0.02% / -0.81% -0.84% -0.78%] index_add_ spread : Elapsed 0.036 ms (17.893 ms / 500) 17.658 -> 17.534 ( -0.70%) [ +0.13% +0.03% +0.00% / -0.63% -0.67% -0.70%] index_copy_ spread : Elapsed 0.035 ms (17.681 ms / 500) 18.050 -> 17.666 ( -2.13%) [ +0.01% +0.00% +0.01% / -0.96% -2.13% -2.10%] index_add_ strided 3 : Elapsed 0.036 ms (18.052 ms / 500) 17.832 -> 17.539 ( -1.64%) [ +0.07% +0.09% +0.00% / -0.84% -1.64% -1.64%] index_copy_ strided 3 : Elapsed 0.036 ms (17.844 ms / 500) 17.932 -> 17.760 ( -0.96%) [ +0.04% +0.11% +0.00% / -0.90% -0.96% -0.93%] index_add_ strided 5 : Elapsed 0.036 ms (17.939 ms / 500) 17.797 -> 17.573 ( -1.26%) [ +0.00% +0.11% +0.16% / -0.76% -1.26% -1.22%] index_copy_ strided 5 : Elapsed 0.036 ms (17.797 ms / 500) 17.973 -> 17.671 ( -1.68%) [ +0.06% +0.00% +0.00% / -0.88% -1.68% -1.61%] index_add_ strided 7 : Elapsed 0.036 ms (17.983 ms / 500) 17.773 -> 17.546 ( -1.28%) [ +0.02% +0.02% +0.00% / -0.75% -1.28% -1.23%] index_copy_ strided 7 : Elapsed 0.036 ms (17.777 ms / 500) 17.922 -> 17.735 ( -1.04%) [ +0.00% +0.03% +0.05% / -0.89% -0.92% -1.04%] index_add_ strided 255 : Elapsed 0.036 ms (17.922 ms / 500) 17.760 -> 17.560 ( -1.13%) [ +0.01% +0.00% +0.01% / -0.74% -0.99% -1.13%] index_copy_ strided 255 : Elapsed 0.036 ms (17.762 ms / 500) 17.961 -> 17.709 ( -1.40%) [ +0.07% +0.08% +0.00% / -0.83% -1.35% -1.40%] index_add_ perm : Elapsed 0.036 ms (17.974 ms / 500) 17.750 -> 17.584 ( -0.94%) [ +0.03% +0.04% +0.00% / -0.88% -0.94% -0.92%] index_copy_ perm : Elapsed 0.036 ms (17.756 ms / 500) 18.013 -> 17.695 ( -1.77%) [ +0.06% +0.00% +0.09% / -0.77% -1.73% -1.77%] index_add_ perm_sorted : Elapsed 0.036 ms (18.023 ms / 500) 17.771 -> 17.575 ( -1.10%) [ +0.10% +0.00% +0.10% / -0.83% -1.10% -1.07%] index_copy_ perm_sorted : Elapsed 0.036 ms (17.789 ms / 500) 24.762 -> 24.811 ( +0.20%) [ +0.14% +0.00% +0.05% / +0.20% +0.62% +0.63%] index_select const : Elapsed 0.050 ms (24.796 ms / 500) 25.100 -> 24.976 ( -0.49%) [ +0.00% +0.00% +0.05% / +0.05% -0.49% -0.44%] index_select wrap : Elapsed 0.050 ms (25.101 ms / 500) 24.894 -> 24.891 ( -0.01%) [ +0.00% +0.21% +0.17% / -0.01% +0.05% +0.04%] index_select linear : Elapsed 0.050 ms (24.894 ms / 500) 24.965 -> 24.988 ( +0.09%) [ +0.02% +0.00% +0.04% / +0.09% +0.34% +0.27%] index_select reverse : Elapsed 0.050 ms (24.971 ms / 500) 24.750 -> 24.728 ( -0.09%) [ +0.21% +0.00% +0.03% / +0.28% -0.01% -0.09%] index_select skip64 : Elapsed 0.050 ms (24.802 ms / 500) 24.742 -> 24.769 ( +0.11%) [ +0.36% +0.00% +0.02% / +0.11% +0.62% +0.51%] index_select skip256 : Elapsed 0.050 ms (24.831 ms / 500) 24.961 -> 24.821 ( -0.56%) [ +0.10% +0.14% +0.00% / +0.22% -0.56% -0.50%] index_select spread : Elapsed 0.050 ms (24.986 ms / 500) 24.947 -> 24.945 ( -0.01%) [ +0.00% +0.06% +0.12% / +0.16% -0.01% +0.06%] index_select strided 3 : Elapsed 0.050 ms (24.947 ms / 500) 24.938 -> 24.929 ( -0.04%) [ +0.03% +0.04% +0.00% / +0.06% +0.19% -0.04%] index_select strided 5 : Elapsed 0.050 ms (24.946 ms / 500) 24.950 -> 24.918 ( -0.13%) [ +0.00% +0.00% +0.05% / -0.13% -0.01% +0.13%] index_select strided 7 : Elapsed 0.050 ms (24.950 ms / 500) 24.746 -> 24.783 ( +0.15%) [ +0.38% +0.32% +0.00% / +0.32% +0.25% +0.15%] index_select strided 8 : Elapsed 0.050 ms (24.840 ms / 500) 24.743 -> 24.795 ( +0.21%) [ +0.36% +0.15% +0.00% / +0.28% +0.21% +0.30%] index_select strided 16 : Elapsed 0.050 ms (24.831 ms / 500) 24.914 -> 24.909 ( -0.02%) [ +0.00% +0.08% +0.10% / -0.02% +0.19% +0.14%] index_select random : Elapsed 0.050 ms (24.914 ms / 500) 24.844 -> 24.842 ( -0.01%) [ +0.00% +0.15% +0.10% / +0.15% +0.01% -0.01%] index_select random_sorted : Elapsed 0.050 ms (24.844 ms / 500) B = [256, 512] (stride (512, 1)) A = [32, 512] (stride (1, 32)) dim = 0 17.904 -> 17.730 ( -0.97%) [ +0.16% +0.07% +0.00% / -0.85% -0.97% -0.86%] index_add_ linear : Elapsed 0.036 ms (17.932 ms / 500) 17.820 -> 17.649 ( -0.96%) [ +0.03% +0.03% +0.00% / -0.96% -0.95% -0.94%] index_copy_ linear : Elapsed 0.036 ms (17.826 ms / 500) 18.003 -> 17.723 ( -1.56%) [ +0.05% +0.01% +0.00% / -0.97% -1.52% -1.56%] index_add_ reverse : Elapsed 0.036 ms (18.012 ms / 500) 17.913 -> 17.635 ( -1.55%) [ +0.12% +0.14% +0.00% / -0.94% -1.51% -1.55%] index_copy_ reverse : Elapsed 0.036 ms (17.935 ms / 500) 17.953 -> 17.743 ( -1.17%) [ +0.03% +0.06% +0.00% / -0.92% -1.17% -1.16%] index_add_ spread : Elapsed 0.036 ms (17.959 ms / 500) 17.878 -> 17.601 ( -1.55%) [ +0.03% +0.07% +0.00% / -0.98% -1.47% -1.55%] index_copy_ spread : Elapsed 0.036 ms (17.884 ms / 500) 17.934 -> 17.718 ( -1.20%) [ +0.06% +0.06% +0.00% / -0.79% -1.19% -1.20%] index_add_ strided 3 : Elapsed 0.036 ms (17.944 ms / 500) 17.812 -> 17.650 ( -0.91%) [ +0.10% +0.04% +0.00% / -0.86% -0.85% -0.91%] index_copy_ strided 3 : Elapsed 0.036 ms (17.830 ms / 500) 17.859 -> 17.712 ( -0.82%) [ +0.04% +0.08% +0.00% / -0.82% -0.55% -0.60%] index_add_ strided 5 : Elapsed 0.036 ms (17.866 ms / 500) 17.772 -> 17.635 ( -0.77%) [ +0.15% +0.16% +0.00% / -0.77% -0.62% -0.58%] index_copy_ strided 5 : Elapsed 0.036 ms (17.799 ms / 500) 17.944 -> 17.682 ( -1.46%) [ +0.03% +0.00% +0.01% / -1.01% -1.46% -1.37%] index_add_ strided 7 : Elapsed 0.036 ms (17.949 ms / 500) 17.824 -> 17.628 ( -1.10%) [ +0.00% +0.02% +0.05% / -1.03% -1.10% -0.99%] index_copy_ strided 7 : Elapsed 0.036 ms (17.824 ms / 500) 17.918 -> 17.735 ( -1.02%) [ +0.03% +0.02% +0.00% / -0.95% -1.02% -0.95%] index_add_ strided 255 : Elapsed 0.036 ms (17.923 ms / 500) 17.828 -> 17.649 ( -1.00%) [ +0.02% +0.00% +0.07% / -0.99% -1.00% -0.99%] index_copy_ strided 255 : Elapsed 0.036 ms (17.832 ms / 500) 17.979 -> 17.777 ( -1.12%) [ +0.06% +0.01% +0.00% / -0.90% -1.12% -1.04%] index_add_ perm : Elapsed 0.036 ms (17.990 ms / 500) 17.859 -> 17.654 ( -1.15%) [ +0.11% +0.00% +0.04% / -0.88% -1.15% -1.15%] index_copy_ perm : Elapsed 0.036 ms (17.879 ms / 500) 17.951 -> 17.757 ( -1.08%) [ +0.11% +0.00% +0.07% / -0.97% -1.08% -1.02%] index_add_ perm_sorted : Elapsed 0.036 ms (17.971 ms / 500) 17.868 -> 17.659 ( -1.17%) [ +0.05% +0.00% +0.01% / -0.96% -1.17% -1.09%] index_copy_ perm_sorted : Elapsed 0.036 ms (17.877 ms / 500) 25.692 -> 25.564 ( -0.50%) [ +0.05% +0.05% +0.00% / +0.09% -0.44% -0.50%] index_select const : Elapsed 0.051 ms (25.704 ms / 500) 25.568 -> 25.602 ( +0.13%) [ +0.13% +0.00% +0.13% / +0.13% +0.22% +0.36%] index_select wrap : Elapsed 0.051 ms (25.600 ms / 500) 25.579 -> 25.562 ( -0.07%) [ +0.00% +0.01% +0.03% / +0.09% -0.06% -0.07%] index_select linear : Elapsed 0.051 ms (25.579 ms / 500) 25.478 -> 25.493 ( +0.06%) [ +0.13% +0.02% +0.00% / +0.06% +0.22% +0.16%] index_select reverse : Elapsed 0.051 ms (25.510 ms / 500) 25.460 -> 25.500 ( +0.16%) [ +0.25% +0.03% +0.00% / +0.16% +0.28% +0.29%] index_select skip64 : Elapsed 0.051 ms (25.523 ms / 500) 25.490 -> 25.485 ( -0.02%) [ +0.04% +0.00% +0.01% / -0.02% +0.10% +0.05%] index_select skip256 : Elapsed 0.051 ms (25.500 ms / 500) 25.786 -> 25.657 ( -0.50%) [ +0.01% +0.02% +0.00% / -0.02% -0.50% -0.43%] index_select spread : Elapsed 0.052 ms (25.788 ms / 500) 25.616 -> 25.628 ( +0.05%) [ +0.01% +0.02% +0.00% / +0.05% +0.38% +0.33%] index_select strided 3 : Elapsed 0.051 ms (25.619 ms / 500) 25.939 -> 25.716 ( -0.86%) [ +0.04% +0.14% +0.00% / +0.13% -0.69% -0.86%] index_select strided 5 : Elapsed 0.052 ms (25.950 ms / 500) 25.531 -> 25.549 ( +0.07%) [ +0.09% +0.00% +0.01% / +0.07% +0.33% +0.38%] index_select strided 7 : Elapsed 0.051 ms (25.554 ms / 500) 25.576 -> 25.561 ( -0.06%) [ +0.09% +0.02% +0.00% / -0.06% -0.04% +0.03%] index_select strided 8 : Elapsed 0.051 ms (25.598 ms / 500) 25.528 -> 25.470 ( -0.23%) [ +0.03% +0.00% +0.02% / +0.07% -0.23% -0.16%] index_select strided 16 : Elapsed 0.051 ms (25.535 ms / 500) 25.539 -> 25.572 ( +0.13%) [ +0.05% +0.00% +0.10% / +0.13% +0.40% +0.34%] index_select random : Elapsed 0.051 ms (25.553 ms / 500) 25.846 -> 25.645 ( -0.78%) [ +0.05% +0.00% +0.07% / +0.12% -0.69% -0.78%] index_select random_sorted : Elapsed 0.052 ms (25.859 ms / 500) B = [256, 512] (stride (1, 256)) dim = 0 fill_cnt = 32 11.308 -> 11.147 ( -1.42%) [ +0.00% +0.04% +0.11% / -1.42% -1.24% -1.23%] index_fill_ const : Elapsed 0.023 ms (11.308 ms / 500) 11.341 -> 11.211 ( -1.15%) [ +0.38% +0.00% +0.15% / -0.93% -1.15% -1.14%] index_fill_ linear : Elapsed 0.023 ms (11.384 ms / 500) 11.355 -> 11.199 ( -1.37%) [ +0.12% +0.00% +0.38% / -1.08% -1.25% -1.37%] index_fill_ reverse : Elapsed 0.023 ms (11.369 ms / 500) 11.349 -> 11.148 ( -1.77%) [ +0.00% +0.05% +0.21% / -1.71% -1.72% -1.77%] index_fill_ skip64 : Elapsed 0.023 ms (11.349 ms / 500) 11.357 -> 11.152 ( -1.81%) [ +0.00% +0.08% +0.11% / -1.53% -1.81% -1.65%] index_fill_ skip256 : Elapsed 0.023 ms (11.357 ms / 500) 13.155 -> 13.005 ( -1.14%) [ +0.19% +0.26% +0.00% / -1.12% -1.14% -0.87%] index_fill_ spread : Elapsed 0.026 ms (13.180 ms / 500) 11.948 -> 11.744 ( -1.71%) [ +0.00% +0.33% +0.18% / -1.64% -1.70% -1.71%] index_fill_ strided 3 : Elapsed 0.024 ms (11.948 ms / 500) 12.458 -> 12.167 ( -2.34%) [ +0.18% +0.00% +0.00% / -2.30% -2.34% -2.26%] index_fill_ strided 5 : Elapsed 0.025 ms (12.480 ms / 500) 12.982 -> 12.625 ( -2.75%) [ +0.08% +0.00% +0.33% / -2.36% -2.75% -2.55%] index_fill_ strided 7 : Elapsed 0.026 ms (12.992 ms / 500) 13.153 -> 13.019 ( -1.02%) [ +0.00% +0.07% +0.15% / -1.02% -0.96% -0.80%] index_fill_ strided 8 : Elapsed 0.026 ms (13.153 ms / 500) 12.174 -> 11.789 ( -3.16%) [ +0.32% +0.16% +0.00% / -3.15% -3.10% -3.16%] index_fill_ strided 16 : Elapsed 0.024 ms (12.213 ms / 500) 11.471 -> 11.142 ( -2.87%) [ +0.16% +0.00% +0.10% / -2.87% -2.67% -2.43%] index_fill_ strided 64 : Elapsed 0.023 ms (11.489 ms / 500) 12.900 -> 12.457 ( -3.43%) [ +0.25% +0.00% +0.07% / -3.43% -3.18% -2.98%] index_fill_ strided 100 : Elapsed 0.026 ms (12.932 ms / 500) 11.449 -> 11.345 ( -0.91%) [ +0.00% +0.14% +0.11% / -0.73% -0.86% -0.91%] index_fill_ strided 255 : Elapsed 0.023 ms (11.449 ms / 500) 12.395 -> 12.024 ( -2.99%) [ +0.10% +0.45% +0.00% / -2.77% -2.70% -2.99%] index_fill_ random : Elapsed 0.025 ms (12.407 ms / 500) 12.423 -> 12.163 ( -2.09%) [ +0.00% +0.21% +0.06% / -1.99% -2.09% -2.08%] index_fill_ random_sorted : Elapsed 0.025 ms (12.423 ms / 500) 12.409 -> 12.066 ( -2.76%) [ +0.10% +0.00% +0.15% / -2.76% -2.51% -2.58%] index_fill_ perm : Elapsed 0.025 ms (12.421 ms / 500) 12.447 -> 12.201 ( -1.98%) [ +0.10% +0.00% +0.20% / -1.98% -1.77% -1.55%] index_fill_ perm_sorted : Elapsed 0.025 ms (12.459 ms / 500) B = [256, 512] (stride (1, 256)) A = [32, 512] (stride (512, 1)) dim = 0 18.146 -> 17.768 ( -2.08%) [ +0.04% +0.02% +0.00% / -2.03% -1.95% -2.08%] index_add_ linear : Elapsed 0.036 ms (18.153 ms / 500) 17.839 -> 17.688 ( -0.85%) [ +0.13% +0.06% +0.00% / -0.63% -0.76% -0.85%] index_copy_ linear : Elapsed 0.036 ms (17.862 ms / 500) 18.147 -> 17.758 ( -2.14%) [ +0.08% +0.00% +0.03% / -2.14% -1.90% -1.84%] index_add_ reverse : Elapsed 0.036 ms (18.162 ms / 500) 17.850 -> 17.713 ( -0.77%) [ +0.00% +0.04% +0.04% / -0.77% -0.64% -0.65%] index_copy_ reverse : Elapsed 0.036 ms (17.850 ms / 500) 20.226 -> 19.302 ( -4.57%) [ +0.05% +0.07% +0.00% / -4.57% -4.26% -4.33%] index_add_ spread : Elapsed 0.040 ms (20.237 ms / 500) 19.742 -> 19.424 ( -1.61%) [ +0.08% +0.00% +0.09% / -1.61% -1.44% -1.50%] index_copy_ spread : Elapsed 0.040 ms (19.758 ms / 500) 18.919 -> 18.197 ( -3.82%) [ +0.00% +0.12% +0.12% / -3.82% -3.69% -3.78%] index_add_ strided 3 : Elapsed 0.038 ms (18.919 ms / 500) 18.635 -> 18.301 ( -1.79%) [ +0.05% +0.11% +0.00% / -1.79% -1.76% -1.78%] index_copy_ strided 3 : Elapsed 0.037 ms (18.645 ms / 500) 19.472 -> 18.619 ( -4.38%) [ +0.01% +0.00% +0.09% / -4.38% -4.27% -4.31%] index_add_ strided 5 : Elapsed 0.039 ms (19.474 ms / 500) 19.134 -> 18.725 ( -2.14%) [ +0.00% +0.15% +0.09% / -1.88% -2.08% -2.14%] index_copy_ strided 5 : Elapsed 0.038 ms (19.134 ms / 500) 19.956 -> 19.166 ( -3.96%) [ +0.08% +0.00% +0.11% / -3.93% -3.96% -3.92%] index_add_ strided 7 : Elapsed 0.040 ms (19.971 ms / 500) 19.571 -> 19.175 ( -2.02%) [ +0.00% +0.05% +0.03% / -1.97% -1.93% -2.02%] index_copy_ strided 7 : Elapsed 0.039 ms (19.571 ms / 500) 18.272 -> 17.881 ( -2.14%) [ +0.00% +0.02% +0.04% / -2.14% -2.14% -2.09%] index_add_ strided 255 : Elapsed 0.037 ms (18.272 ms / 500) 17.983 -> 17.860 ( -0.68%) [ +0.06% +0.00% +0.02% / -0.64% -0.68% -0.68%] index_copy_ strided 255 : Elapsed 0.036 ms (17.994 ms / 500) 19.370 -> 18.754 ( -3.18%) [ +0.00% +0.01% +0.01% / -3.11% -3.18% -3.18%] index_add_ perm : Elapsed 0.039 ms (19.370 ms / 500) 18.898 -> 18.585 ( -1.66%) [ +0.02% +0.10% +0.00% / -1.51% -1.66% -1.62%] index_copy_ perm : Elapsed 0.038 ms (18.901 ms / 500) 19.388 -> 18.724 ( -3.42%) [ +0.08% +0.00% +0.10% / -3.42% -3.34% -3.33%] index_add_ perm_sorted : Elapsed 0.039 ms (19.403 ms / 500) 18.936 -> 18.597 ( -1.79%) [ +0.00% +0.02% +0.00% / -1.68% -1.79% -1.76%] index_copy_ perm_sorted : Elapsed 0.038 ms (18.936 ms / 500) good 26.545 -> 24.819 ( -6.50%) [ +0.04% +0.00% +0.09% / -6.50% -6.49% -6.47%] index_select const : Elapsed 0.053 ms (26.555 ms / 500) 26.655 -> 25.834 ( -3.08%) [ +0.00% +0.03% +0.14% / -2.64% -3.07% -3.08%] index_select wrap : Elapsed 0.053 ms (26.655 ms / 500) good 26.452 -> 24.897 ( -5.88%) [ +0.02% +0.00% +0.09% / -5.55% -5.88% -5.77%] index_select linear : Elapsed 0.053 ms (26.458 ms / 500) good 26.465 -> 24.874 ( -6.01%) [ +0.02% +0.07% +0.00% / -6.01% -5.72% -5.79%] index_select reverse : Elapsed 0.053 ms (26.470 ms / 500) good 26.359 -> 24.760 ( -6.07%) [ +0.03% +0.13% +0.00% / -5.85% -6.07% -5.74%] index_select skip64 : Elapsed 0.053 ms (26.368 ms / 500) good 26.426 -> 24.814 ( -6.10%) [ +0.23% +0.00% +0.23% / -6.10% -6.08% -6.04%] index_select skip256 : Elapsed 0.053 ms (26.487 ms / 500) good 26.603 -> 25.025 ( -5.93%) [ +0.00% +0.05% +0.13% / -5.81% -5.93% -5.80%] index_select spread : Elapsed 0.053 ms (26.603 ms / 500) 26.605 -> 26.046 ( -2.10%) [ +0.03% +0.02% +0.00% / -2.10% -2.00% -1.86%] index_select strided 3 : Elapsed 0.053 ms (26.614 ms / 500) 26.563 -> 26.067 ( -1.87%) [ +0.15% +0.00% +0.02% / -1.20% -1.84% -1.87%] index_select strided 5 : Elapsed 0.053 ms (26.604 ms / 500) 26.615 -> 26.125 ( -1.84%) [ +0.00% +0.07% +0.11% / -1.84% -1.69% -1.67%] index_select strided 7 : Elapsed 0.053 ms (26.615 ms / 500) good 26.366 -> 24.821 ( -5.86%) [ +0.05% +0.00% +0.13% / -5.59% -5.81% -5.86%] index_select strided 8 : Elapsed 0.053 ms (26.378 ms / 500) good 26.439 -> 24.818 ( -6.13%) [ +0.11% +0.00% +0.14% / -6.06% -6.12% -6.13%] index_select strided 16 : Elapsed 0.053 ms (26.467 ms / 500) 26.478 -> 25.429 ( -3.96%) [ +0.06% +0.00% +0.10% / -3.75% -3.92% -3.96%] index_select random : Elapsed 0.053 ms (26.495 ms / 500) good 26.453 -> 24.984 ( -5.55%) [ +0.17% +0.00% +0.02% / -5.55% -4.96% -4.93%] index_select random_sorted : Elapsed 0.053 ms (26.498 ms / 500) B = [256, 512] (stride (1, 256)) A = [32, 512] (stride (1, 32)) dim = 0 18.207 -> 17.717 ( -2.69%) [ +0.08% +0.09% +0.00% / -2.42% -2.69% -2.64%] index_add_ linear : Elapsed 0.036 ms (18.222 ms / 500) 17.954 -> 17.557 ( -2.21%) [ +0.06% +0.01% +0.00% / -2.05% -2.21% -2.16%] index_copy_ linear : Elapsed 0.036 ms (17.964 ms / 500) 18.287 -> 17.724 ( -3.08%) [ +0.23% +0.00% +0.15% / -2.85% -3.08% -2.96%] index_add_ reverse : Elapsed 0.037 ms (18.329 ms / 500) 18.050 -> 17.569 ( -2.66%) [ +0.16% +0.02% +0.00% / -2.61% -2.66% -2.65%] index_copy_ reverse : Elapsed 0.036 ms (18.079 ms / 500) 20.168 -> 19.384 ( -3.89%) [ +0.02% +0.05% +0.00% / -3.48% -3.82% -3.89%] index_add_ spread : Elapsed 0.040 ms (20.172 ms / 500) 19.739 -> 19.391 ( -1.76%) [ +0.14% +0.04% +0.00% / -1.54% -1.76% -1.76%] index_copy_ spread : Elapsed 0.040 ms (19.766 ms / 500) 18.745 -> 18.197 ( -2.92%) [ +0.16% +0.10% +0.00% / -2.85% -2.90% -2.92%] index_add_ strided 3 : Elapsed 0.038 ms (18.775 ms / 500) 18.495 -> 18.128 ( -1.98%) [ +0.21% +0.10% +0.00% / -1.98% -1.82% -1.88%] index_copy_ strided 3 : Elapsed 0.037 ms (18.533 ms / 500) 19.239 -> 18.625 ( -3.19%) [ +0.00% +0.02% +0.05% / -3.19% -3.18% -3.13%] index_add_ strided 5 : Elapsed 0.038 ms (19.239 ms / 500) 18.958 -> 18.569 ( -2.05%) [ +0.00% +0.03% +0.12% / -2.02% -1.99% -2.05%] index_copy_ strided 5 : Elapsed 0.038 ms (18.958 ms / 500) 19.785 -> 19.150 ( -3.21%) [ +0.06% +0.00% +0.12% / -2.98% -3.09% -3.21%] index_add_ strided 7 : Elapsed 0.040 ms (19.796 ms / 500) 19.459 -> 19.049 ( -2.11%) [ +0.00% +0.02% +0.07% / -1.98% -2.11% -2.06%] index_copy_ strided 7 : Elapsed 0.039 ms (19.459 ms / 500) 18.254 -> 17.864 ( -2.14%) [ +0.08% +0.12% +0.00% / -2.08% -2.14% -2.13%] index_add_ strided 255 : Elapsed 0.037 ms (18.269 ms / 500) 18.033 -> 17.697 ( -1.86%) [ +0.05% +0.03% +0.00% / -1.82% -1.86% -1.81%] index_copy_ strided 255 : Elapsed 0.036 ms (18.042 ms / 500) 19.390 -> 18.960 ( -2.22%) [ +0.03% +0.00% +0.05% / -1.93% -2.16% -2.22%] index_add_ perm : Elapsed 0.039 ms (19.396 ms / 500) 18.898 -> 18.524 ( -1.98%) [ +0.04% +0.00% +0.11% / -1.82% -1.97% -1.98%] index_copy_ perm : Elapsed 0.038 ms (18.905 ms / 500) 19.409 -> 18.896 ( -2.64%) [ +0.14% +0.00% +0.08% / -2.34% -2.56% -2.64%] index_add_ perm_sorted : Elapsed 0.039 ms (19.437 ms / 500) 18.940 -> 18.564 ( -1.99%) [ +0.08% +0.00% +0.05% / -1.77% -1.97% -1.99%] index_copy_ perm_sorted : Elapsed 0.038 ms (18.955 ms / 500) Good 28.196 -> 24.881 (-11.76%) [ +0.00% +0.08% +0.12% / -11.49% -11.76% -11.68%] index_select const : Elapsed 0.056 ms (28.196 ms / 500) Good 28.017 -> 24.862 (-11.26%) [ +0.11% +0.00% +0.07% / -11.21% -11.01% -11.26%] index_select wrap : Elapsed 0.056 ms (28.049 ms / 500) Good 28.039 -> 25.074 (-10.57%) [ +0.00% +0.09% +0.22% / -10.33% -10.57% -10.44%] index_select linear : Elapsed 0.056 ms (28.039 ms / 500) good 27.919 -> 25.133 ( -9.98%) [ +0.00% +0.04% +0.13% / -9.98% -9.84% -9.86%] index_select reverse : Elapsed 0.056 ms (27.919 ms / 500) Good 27.907 -> 24.816 (-11.08%) [ +0.08% +0.05% +0.00% / -10.86% -11.08% -10.95%] index_select skip64 : Elapsed 0.056 ms (27.930 ms / 500) Good 27.925 -> 24.867 (-10.95%) [ +0.07% +0.00% +0.10% / -10.95% -10.73% -10.69%] index_select skip256 : Elapsed 0.056 ms (27.945 ms / 500) Good 28.208 -> 25.316 (-10.25%) [ +0.06% +0.04% +0.00% / -10.16% -10.25% -10.21%] index_select spread : Elapsed 0.056 ms (28.224 ms / 500) Good 28.076 -> 24.896 (-11.33%) [ +0.10% +0.00% +0.04% / -11.33% -11.09% -11.05%] index_select strided 3 : Elapsed 0.056 ms (28.105 ms / 500) Good 28.447 -> 24.927 (-12.37%) [ +0.10% +0.04% +0.00% / -12.32% -12.32% -12.37%] index_select strided 5 : Elapsed 0.057 ms (28.475 ms / 500) Good 27.966 -> 24.874 (-11.06%) [ +0.04% +0.00% +0.14% / -11.06% -11.01% -10.72%] index_select strided 7 : Elapsed 0.056 ms (27.977 ms / 500) Good 28.062 -> 24.856 (-11.42%) [ +0.10% +0.00% +0.05% / -11.14% -11.26% -11.42%] index_select strided 8 : Elapsed 0.056 ms (28.091 ms / 500) Good 28.013 -> 24.884 (-11.17%) [ +0.00% +0.01% +0.04% / -10.98% -11.17% -11.11%] index_select strided 16 : Elapsed 0.056 ms (28.013 ms / 500) Good 27.994 -> 24.890 (-11.09%) [ +0.00% +0.03% +0.01% / -11.09% -11.03% -10.97%] index_select random : Elapsed 0.056 ms (27.994 ms / 500) Good 28.275 -> 25.234 (-10.76%) [ +0.16% +0.05% +0.00% / -10.66% -10.76% -10.71%] index_select random_sorted : Elapsed 0.057 ms (28.320 ms / 500) out_shape = [32, 256] in_shape = [32, 512] idx_dim = 1 B = [32, 256] (stride (256, 1)) dim = 1 fill_cnt = 512 good 4.636 -> 4.286 ( -7.55%) [ +0.17% +0.06% +0.00% / -6.77% -7.33% -7.55%] index_fill_ const : Elapsed 0.009 ms (4.644 ms / 500) good 4.792 -> 4.380 ( -8.60%) [ +0.35% +0.00% +0.02% / -8.35% -8.60% -8.60%] index_fill_ linear : Elapsed 0.010 ms (4.809 ms / 500) good 4.751 -> 4.397 ( -7.45%) [ +0.53% +0.00% +0.08% / -7.30% -7.18% -7.45%] index_fill_ reverse : Elapsed 0.010 ms (4.776 ms / 500) Good 4.808 -> 4.313 (-10.30%) [ +0.37% +0.00% +0.17% / -9.55% -10.27% -10.30%] index_fill_ skip64 : Elapsed 0.010 ms (4.826 ms / 500) good 4.649 -> 4.323 ( -7.01%) [ +0.00% +0.04% +0.04% / -6.84% -6.90% -7.01%] index_fill_ skip256 : Elapsed 0.009 ms (4.649 ms / 500) good 4.643 -> 4.375 ( -5.77%) [ +0.34% +0.00% +0.04% / -5.77% -5.71% -5.54%] index_fill_ spread : Elapsed 0.009 ms (4.659 ms / 500) good 4.689 -> 4.407 ( -6.01%) [ +0.19% +0.00% +0.19% / -5.50% -5.99% -6.01%] index_fill_ strided 3 : Elapsed 0.009 ms (4.698 ms / 500) good 4.748 -> 4.381 ( -7.73%) [ +0.29% +0.13% +0.00% / -6.66% -7.73% -7.62%] index_fill_ strided 5 : Elapsed 0.010 ms (4.762 ms / 500) 4.647 -> 4.456 ( -4.11%) [ +0.00% +0.15% +0.45% / -3.36% -4.11% -3.98%] index_fill_ strided 7 : Elapsed 0.009 ms (4.647 ms / 500) 4.732 -> 4.546 ( -3.93%) [ +0.21% +0.00% +0.15% / -3.87% -3.68% -3.93%] index_fill_ strided 8 : Elapsed 0.009 ms (4.742 ms / 500) 4.673 -> 4.489 ( -3.94%) [ +0.30% +0.06% +0.00% / -3.25% -3.79% -3.94%] index_fill_ strided 16 : Elapsed 0.009 ms (4.687 ms / 500) good 4.727 -> 4.345 ( -8.08%) [ +0.08% +0.00% +0.04% / -7.30% -8.08% -8.06%] index_fill_ strided 64 : Elapsed 0.009 ms (4.731 ms / 500) good 4.843 -> 4.592 ( -5.18%) [ +0.47% +0.00% +0.06% / -4.13% -5.18% -5.10%] index_fill_ strided 100 : Elapsed 0.010 ms (4.866 ms / 500) good 4.679 -> 4.390 ( -6.18%) [ +0.21% +0.00% +0.19% / -6.18% -5.41% -5.41%] index_fill_ strided 255 : Elapsed 0.009 ms (4.689 ms / 500) 4.759 -> 4.524 ( -4.94%) [ +0.36% +0.00% +0.23% / -4.33% -4.81% -4.94%] index_fill_ random : Elapsed 0.010 ms (4.776 ms / 500) good 4.726 -> 4.417 ( -6.54%) [ +0.11% +0.04% +0.00% / -5.86% -6.54% -6.45%] index_fill_ random_sorted : Elapsed 0.009 ms (4.731 ms / 500) B = [32, 256] (stride (256, 1)) A = [32, 512] (stride (512, 1)) dim = 1 9.172 -> 8.968 ( -2.22%) [ +0.23% +0.00% +0.04% / -2.01% -2.12% -2.22%] index_select const : Elapsed 0.018 ms (9.193 ms / 500) 9.281 -> 9.037 ( -2.63%) [ +0.09% +0.00% +0.04% / -2.26% -2.59% -2.63%] index_select wrap : Elapsed 0.019 ms (9.289 ms / 500) 9.273 -> 9.078 ( -2.10%) [ +0.12% +0.00% +0.00% / -2.10% -2.08% -1.88%] index_select linear : Elapsed 0.019 ms (9.284 ms / 500) 9.320 -> 9.093 ( -2.44%) [ +0.00% +0.02% +0.06% / -2.24% -2.44% -2.39%] index_select reverse : Elapsed 0.019 ms (9.320 ms / 500) 9.142 -> 9.008 ( -1.47%) [ +0.23% +0.09% +0.00% / -1.47% -0.69% -0.56%] index_select skip64 : Elapsed 0.018 ms (9.163 ms / 500) 9.122 -> 8.969 ( -1.68%) [ +0.00% +0.01% +0.01% / -1.68% -1.22% -1.33%] index_select skip256 : Elapsed 0.018 ms (9.122 ms / 500) 9.310 -> 9.099 ( -2.27%) [ +0.03% +0.02% +0.00% / -2.27% -2.06% -2.03%] index_select spread : Elapsed 0.019 ms (9.313 ms / 500) 9.311 -> 9.109 ( -2.17%) [ +0.11% +0.00% +0.08% / -2.17% -1.94% -1.95%] index_select strided 3 : Elapsed 0.019 ms (9.321 ms / 500) 9.345 -> 9.101 ( -2.61%) [ +0.11% +0.03% +0.00% / -2.50% -2.61% -2.56%] index_select strided 5 : Elapsed 0.019 ms (9.355 ms / 500) 9.360 -> 9.109 ( -2.68%) [ +0.14% +0.12% +0.00% / -2.38% -2.62% -2.68%] index_select strided 7 : Elapsed 0.019 ms (9.373 ms / 500) 9.332 -> 9.136 ( -2.10%) [ +0.06% +0.17% +0.00% / -2.10% -1.88% -1.80%] index_select strided 8 : Elapsed 0.019 ms (9.338 ms / 500) 9.203 -> 9.041 ( -1.76%) [ +0.00% +0.01% +0.08% / -1.76% -0.52% -0.49%] index_select strided 16 : Elapsed 0.018 ms (9.203 ms / 500) 9.252 -> 9.042 ( -2.27%) [ +0.11% +0.00% +0.13% / -1.87% -2.19% -2.27%] index_select strided 64 : Elapsed 0.019 ms (9.262 ms / 500) 9.351 -> 9.167 ( -1.97%) [ +0.12% +0.01% +0.00% / -1.57% -1.84% -1.97%] index_select strided 100 : Elapsed 0.019 ms (9.362 ms / 500) 9.326 -> 9.119 ( -2.22%) [ +0.15% +0.00% +0.14% / -2.22% -2.11% -2.13%] index_select strided 255 : Elapsed 0.019 ms (9.340 ms / 500) 9.164 -> 8.995 ( -1.84%) [ +0.12% +0.16% +0.00% / -1.83% -1.81% -1.84%] index_select strided 256 : Elapsed 0.018 ms (9.175 ms / 500) 9.305 -> 9.102 ( -2.18%) [ +0.00% +0.05% +0.02% / -2.18% -1.86% -1.89%] index_select strided 257 : Elapsed 0.019 ms (9.305 ms / 500) 9.303 -> 9.157 ( -1.57%) [ +0.00% +0.01% +0.03% / -1.57% -0.97% -0.96%] index_select random : Elapsed 0.019 ms (9.303 ms / 500) 9.346 -> 9.159 ( -2.00%) [ +0.12% +0.11% +0.00% / -2.00% -1.92% -1.94%] index_select random_sorted : Elapsed 0.019 ms (9.357 ms / 500) 9.351 -> 9.207 ( -1.54%) [ +0.00% +0.14% +0.06% / -1.54% -1.14% -1.09%] index_select perm : Elapsed 0.019 ms (9.351 ms / 500) 9.377 -> 9.133 ( -2.60%) [ +0.20% +0.17% +0.00% / -2.18% -2.60% -2.53%] index_select perm_sorted : Elapsed 0.019 ms (9.396 ms / 500) B = [32, 256] (stride (256, 1)) A = [32, 512] (stride (1, 32)) dim = 1 9.104 -> 8.974 ( -1.43%) [ +0.00% +0.00% +0.01% / -1.41% -1.41% -1.43%] index_select const : Elapsed 0.018 ms (9.104 ms / 500) 9.095 -> 9.101 ( +0.07%) [ +0.09% +0.13% +0.00% / +0.07% +1.10% +1.07%] index_select wrap : Elapsed 0.018 ms (9.103 ms / 500) 9.227 -> 9.144 ( -0.90%) [ +0.10% +0.00% +0.02% / -0.48% -0.86% -0.90%] index_select linear : Elapsed 0.018 ms (9.236 ms / 500) 9.156 -> 9.119 ( -0.40%) [ +0.09% +0.04% +0.00% / -0.40% +0.22% +0.10%] index_select reverse : Elapsed 0.018 ms (9.164 ms / 500) 9.119 -> 9.021 ( -1.07%) [ +0.22% +0.00% +0.01% / -1.07% -0.95% -1.07%] index_select skip64 : Elapsed 0.018 ms (9.139 ms / 500) 9.167 -> 8.965 ( -2.20%) [ +0.16% +0.00% +0.08% / -1.65% -2.20% -2.15%] index_select skip256 : Elapsed 0.018 ms (9.182 ms / 500) 9.109 -> 9.134 ( +0.27%) [ +0.00% +0.08% +0.04% / +0.27% +0.54% +0.58%] index_select spread : Elapsed 0.018 ms (9.109 ms / 500) 9.138 -> 9.150 ( +0.13%) [ +0.00% +0.04% +0.03% / +0.13% +0.22% +0.21%] index_select strided 3 : Elapsed 0.018 ms (9.138 ms / 500) 9.155 -> 9.156 ( +0.01%) [ +0.14% +0.00% +0.01% / +0.01% +0.17% +0.08%] index_select strided 5 : Elapsed 0.018 ms (9.168 ms / 500) 9.104 -> 9.137 ( +0.36%) [ +0.16% +0.00% +0.08% / +0.36% +1.49% +1.41%] index_select strided 7 : Elapsed 0.018 ms (9.119 ms / 500) 9.178 -> 9.107 ( -0.77%) [ +0.10% +0.00% +0.05% / -0.27% -0.74% -0.77%] index_select strided 8 : Elapsed 0.018 ms (9.187 ms / 500) 9.165 -> 9.064 ( -1.10%) [ +0.17% +0.12% +0.00% / -0.38% -1.10% -1.03%] index_select strided 16 : Elapsed 0.018 ms (9.181 ms / 500) 9.143 -> 9.028 ( -1.26%) [ +0.00% +0.08% +0.08% / -1.26% -1.13% -1.05%] index_select strided 64 : Elapsed 0.018 ms (9.143 ms / 500) 9.084 -> 9.103 ( +0.21%) [ +0.08% +0.04% +0.00% / +0.21% +1.13% +1.03%] index_select strided 100 : Elapsed 0.018 ms (9.091 ms / 500) 9.157 -> 9.140 ( -0.19%) [ +0.17% +0.05% +0.00% / +0.00% -0.15% -0.19%] index_select strided 255 : Elapsed 0.018 ms (9.173 ms / 500) 9.059 -> 8.959 ( -1.10%) [ +0.08% +0.22% +0.00% / -1.10% -0.91% -0.91%] index_select strided 256 : Elapsed 0.018 ms (9.066 ms / 500) 9.123 -> 9.141 ( +0.20%) [ +0.04% +0.08% +0.00% / +0.20% +0.33% +0.27%] index_select strided 257 : Elapsed 0.018 ms (9.127 ms / 500) 9.224 -> 9.128 ( -1.04%) [ +0.14% +0.15% +0.00% / -0.41% -0.99% -1.04%] index_select random : Elapsed 0.018 ms (9.237 ms / 500) 9.120 -> 9.109 ( -0.12%) [ +0.03% +0.05% +0.00% / -0.12% +0.48% +0.50%] index_select random_sorted : Elapsed 0.018 ms (9.123 ms / 500) 9.184 -> 9.163 ( -0.23%) [ +0.21% +0.00% +0.04% / -0.05% -0.23% -0.13%] index_select perm : Elapsed 0.018 ms (9.203 ms / 500) 9.211 -> 9.174 ( -0.40%) [ +0.00% +0.08% +0.14% / -0.40% -0.16% -0.28%] index_select perm_sorted : Elapsed 0.018 ms (9.211 ms / 500) B = [32, 256] (stride (1, 32)) dim = 1 fill_cnt = 512 4.380 -> 4.297 ( -1.89%) [ +0.11% +0.11% +0.00% / -1.53% -1.89% -1.87%] index_fill_ const : Elapsed 0.009 ms (4.385 ms / 500) 4.572 -> 4.438 ( -2.93%) [ +0.26% +0.09% +0.00% / -1.60% -2.93% -2.73%] index_fill_ linear : Elapsed 0.009 ms (4.584 ms / 500) 4.510 -> 4.368 ( -3.15%) [ +0.20% +0.04% +0.00% / -1.11% -3.13% -3.15%] index_fill_ reverse : Elapsed 0.009 ms (4.519 ms / 500) 4.545 -> 4.322 ( -4.91%) [ +0.29% +0.26% +0.00% / -1.34% -4.91% -4.84%] index_fill_ skip64 : Elapsed 0.009 ms (4.558 ms / 500) 4.411 -> 4.354 ( -1.29%) [ +0.09% +0.00% +0.05% / -1.29% +0.39% +0.32%] index_fill_ skip256 : Elapsed 0.009 ms (4.415 ms / 500) 4.413 -> 4.357 ( -1.27%) [ +0.11% +0.00% +0.23% / -1.27% +0.61% +0.54%] index_fill_ spread : Elapsed 0.009 ms (4.418 ms / 500) 4.460 -> 4.367 ( -2.09%) [ +0.11% +0.00% +0.02% / -1.52% -2.09% -1.91%] index_fill_ strided 3 : Elapsed 0.009 ms (4.465 ms / 500) 4.512 -> 4.365 ( -3.26%) [ +0.24% +0.02% +0.00% / -1.13% -3.26% -3.19%] index_fill_ strided 5 : Elapsed 0.009 ms (4.523 ms / 500) 4.429 -> 4.368 ( -1.38%) [ +0.38% +0.00% +0.20% / -1.38% -0.63% -0.70%] index_fill_ strided 7 : Elapsed 0.009 ms (4.446 ms / 500) 4.357 -> 4.301 ( -1.29%) [ +0.28% +0.00% +0.32% / -1.29% +1.79% +1.93%] index_fill_ strided 8 : Elapsed 0.009 ms (4.369 ms / 500) 4.354 -> 4.292 ( -1.42%) [ +0.30% +0.00% +0.14% / -1.42% +2.14% +1.88%] index_fill_ strided 16 : Elapsed 0.009 ms (4.367 ms / 500) 4.456 -> 4.293 ( -3.66%) [ +0.29% +0.22% +0.00% / -1.19% -3.46% -3.66%] index_fill_ strided 64 : Elapsed 0.009 ms (4.469 ms / 500) 4.512 -> 4.336 ( -3.90%) [ +0.18% +0.00% +0.04% / -1.40% -3.70% -3.90%] index_fill_ strided 100 : Elapsed 0.009 ms (4.520 ms / 500) 4.438 -> 4.380 ( -1.31%) [ +0.00% +0.16% +0.14% / -1.31% +0.50% +0.77%] index_fill_ strided 255 : Elapsed 0.009 ms (4.438 ms / 500) 4.470 -> 4.341 ( -2.89%) [ +0.11% +0.02% +0.00% / -1.39% -2.89% -2.82%] index_fill_ random : Elapsed 0.009 ms (4.475 ms / 500) 4.462 -> 4.339 ( -2.76%) [ +0.45% +0.04% +0.00% / -1.05% -2.76% -2.67%] index_fill_ random_sorted : Elapsed 0.009 ms (4.482 ms / 500) B = [32, 256] (stride (1, 32)) A = [32, 512] (stride (512, 1)) dim = 1 9.117 -> 9.065 ( -0.57%) [ +0.03% +0.00% +0.02% / +0.04% -0.57% -0.48%] index_select const : Elapsed 0.018 ms (9.120 ms / 500) 9.238 -> 9.191 ( -0.51%) [ +0.11% +0.16% +0.00% / +0.11% -0.49% -0.51%] index_select wrap : Elapsed 0.018 ms (9.248 ms / 500) 9.240 -> 9.247 ( +0.08%) [ +0.04% +0.00% +0.00% / +0.08% +0.32% +0.51%] index_select linear : Elapsed 0.018 ms (9.244 ms / 500) 9.281 -> 9.233 ( -0.52%) [ +0.15% +0.00% +0.01% / +0.13% -0.52% -0.38%] index_select reverse : Elapsed 0.019 ms (9.295 ms / 500) 9.098 -> 9.111 ( +0.14%) [ +0.13% +0.10% +0.00% / +0.14% +1.22% +1.32%] index_select skip64 : Elapsed 0.018 ms (9.110 ms / 500) 9.069 -> 9.076 ( +0.08%) [ +0.04% +0.11% +0.00% / +0.08% +0.45% +0.53%] index_select skip256 : Elapsed 0.018 ms (9.073 ms / 500) 9.260 -> 9.259 ( -0.01%) [ +0.00% +0.08% +0.01% / -0.01% +0.70% +0.76%] index_select spread : Elapsed 0.019 ms (9.260 ms / 500) 9.271 -> 9.284 ( +0.14%) [ +0.14% +0.00% +0.00% / +0.14% +0.66% +0.56%] index_select strided 3 : Elapsed 0.019 ms (9.284 ms / 500) 9.293 -> 9.269 ( -0.26%) [ +0.00% +0.04% +0.00% / -0.09% -0.22% -0.26%] index_select strided 5 : Elapsed 0.019 ms (9.293 ms / 500) 9.323 -> 9.271 ( -0.56%) [ +0.11% +0.00% +0.10% / +0.09% -0.46% -0.56%] index_select strided 7 : Elapsed 0.019 ms (9.333 ms / 500) 9.291 -> 9.289 ( -0.02%) [ +0.06% +0.00% +0.09% / -0.02% +0.64% +0.69%] index_select strided 8 : Elapsed 0.019 ms (9.297 ms / 500) 9.153 -> 9.165 ( +0.13%) [ +0.04% +0.15% +0.00% / +0.13% +2.10% +2.10%] index_select strided 16 : Elapsed 0.018 ms (9.157 ms / 500) 9.210 -> 9.151 ( -0.64%) [ +0.09% +0.00% +0.04% / +0.02% -0.64% -0.53%] index_select strided 64 : Elapsed 0.018 ms (9.218 ms / 500) 9.301 -> 9.287 ( -0.15%) [ +0.15% +0.00% +0.01% / +0.06% -0.15% -0.12%] index_select strided 100 : Elapsed 0.019 ms (9.315 ms / 500) 9.278 -> 9.287 ( +0.10%) [ +0.09% +0.12% +0.00% / +0.10% +0.36% +0.43%] index_select strided 255 : Elapsed 0.019 ms (9.286 ms / 500) 9.110 -> 9.109 ( -0.01%) [ +0.16% +0.14% +0.00% / -0.01% +0.12% +0.15%] index_select strided 256 : Elapsed 0.018 ms (9.125 ms / 500) 9.257 -> 9.265 ( +0.09%) [ +0.08% +0.00% +0.02% / +0.09% +0.78% +0.86%] index_select strided 257 : Elapsed 0.019 ms (9.264 ms / 500) 9.245 -> 9.257 ( +0.13%) [ +0.11% +0.00% +0.10% / +0.13% +1.25% +1.37%] index_select random : Elapsed 0.019 ms (9.255 ms / 500) 9.316 -> 9.320 ( +0.04%) [ +0.20% +0.12% +0.00% / +0.04% +0.32% +0.43%] index_select random_sorted : Elapsed 0.019 ms (9.335 ms / 500) 9.318 -> 9.321 ( +0.03%) [ +0.10% +0.00% +0.04% / +0.03% +0.93% +0.86%] index_select perm : Elapsed 0.019 ms (9.327 ms / 500) 9.340 -> 9.308 ( -0.34%) [ +0.20% +0.00% +0.06% / +0.13% -0.34% -0.33%] index_select perm_sorted : Elapsed 0.019 ms (9.359 ms / 500) B = [32, 256] (stride (1, 32)) A = [32, 512] (stride (1, 32)) dim = 1 9.021 -> 8.999 ( -0.24%) [ +0.21% +0.13% +0.00% / +0.19% -0.24% -0.12%] index_select const : Elapsed 0.018 ms (9.040 ms / 500) 9.032 -> 9.038 ( +0.07%) [ +0.11% +0.00% +0.08% / +0.07% +1.85% +2.08%] index_select wrap : Elapsed 0.018 ms (9.042 ms / 500) 9.176 -> 9.089 ( -0.95%) [ +0.11% +0.03% +0.00% / +0.13% -0.93% -0.95%] index_select linear : Elapsed 0.018 ms (9.186 ms / 500) 9.086 -> 9.095 ( +0.10%) [ +0.22% +0.04% +0.00% / +0.10% +0.72% +0.78%] index_select reverse : Elapsed 0.018 ms (9.106 ms / 500) 9.065 -> 9.072 ( +0.08%) [ +0.12% +0.12% +0.00% / +0.08% +0.22% +0.08%] index_select skip64 : Elapsed 0.018 ms (9.076 ms / 500) 9.098 -> 8.997 ( -1.11%) [ +0.15% +0.19% +0.00% / +0.08% -1.11% -1.06%] index_select skip256 : Elapsed 0.018 ms (9.112 ms / 500) 9.048 -> 9.050 ( +0.02%) [ +0.14% +0.08% +0.00% / +0.02% +0.56% +0.54%] index_select spread : Elapsed 0.018 ms (9.061 ms / 500) 9.062 -> 9.066 ( +0.04%) [ +0.03% +0.11% +0.00% / +0.04% +0.14% +0.17%] index_select strided 3 : Elapsed 0.018 ms (9.065 ms / 500) 9.088 -> 9.068 ( -0.22%) [ +0.13% +0.00% +0.04% / +0.22% -0.21% -0.22%] index_select strided 5 : Elapsed 0.018 ms (9.100 ms / 500) 9.038 -> 9.051 ( +0.14%) [ +0.27% +0.09% +0.00% / +0.14% +2.00% +1.98%] index_select strided 7 : Elapsed 0.018 ms (9.062 ms / 500) 9.122 -> 9.040 ( -0.90%) [ +0.13% +0.19% +0.00% / +0.23% -0.90% -0.90%] index_select strided 8 : Elapsed 0.018 ms (9.134 ms / 500) 9.120 -> 8.991 ( -1.41%) [ +0.07% +0.00% +0.00% / +0.11% -1.15% -1.41%] index_select strided 16 : Elapsed 0.018 ms (9.126 ms / 500) 9.075 -> 9.093 ( +0.20%) [ +0.00% +0.14% +0.10% / +0.25% +0.20% +0.23%] index_select strided 64 : Elapsed 0.018 ms (9.075 ms / 500) 9.014 -> 9.017 ( +0.03%) [ +0.24% +0.00% +0.01% / +0.03% +1.51% +1.61%] index_select strided 100 : Elapsed 0.018 ms (9.036 ms / 500) 9.077 -> 9.066 ( -0.12%) [ +0.23% +0.00% +0.21% / +0.23% -0.01% -0.12%] index_select strided 255 : Elapsed 0.018 ms (9.098 ms / 500) 8.995 -> 9.005 ( +0.11%) [ +0.08% +0.00% +0.01% / +0.11% +0.31% +0.29%] index_select strided 256 : Elapsed 0.018 ms (9.002 ms / 500) 9.059 -> 9.059 ( +0.00%) [ +0.02% +0.00% +0.12% / +0.00% +0.28% +0.41%] index_select strided 257 : Elapsed 0.018 ms (9.061 ms / 500) 9.169 -> 9.049 ( -1.31%) [ +0.04% +0.10% +0.00% / +0.12% -1.27% -1.31%] index_select random : Elapsed 0.018 ms (9.173 ms / 500) 9.057 -> 9.055 ( -0.02%) [ +0.17% +0.00% +0.11% / -0.02% +1.55% +1.45%] index_select random_sorted : Elapsed 0.018 ms (9.072 ms / 500) 9.135 -> 9.108 ( -0.30%) [ +0.05% +0.00% +0.04% / +0.00% -0.28% -0.30%] index_select perm : Elapsed 0.018 ms (9.140 ms / 500) 9.150 -> 9.145 ( -0.05%) [ +0.07% +0.13% +0.00% / +0.04% -0.05% -0.02%] index_select perm_sorted : Elapsed 0.018 ms (9.156 ms / 500) out_shape = [256, 32] in_shape = [512, 32] idx_dim = 0 B = [256, 32] (stride (32, 1)) dim = 0 fill_cnt = 512 4.379 -> 4.301 ( -1.78%) [ +0.21% +0.00% +0.07% / -1.42% -1.78% -1.78%] index_fill_ const : Elapsed 0.009 ms (4.388 ms / 500) 4.572 -> 4.449 ( -2.69%) [ +0.20% +0.07% +0.00% / -1.14% -2.67% -2.69%] index_fill_ linear : Elapsed 0.009 ms (4.581 ms / 500) 4.514 -> 4.375 ( -3.08%) [ +0.22% +0.04% +0.00% / -1.22% -3.08% -3.01%] index_fill_ reverse : Elapsed 0.009 ms (4.524 ms / 500) good 4.547 -> 4.318 ( -5.04%) [ +0.26% +0.00% +0.11% / -1.17% -5.04% -4.90%] index_fill_ skip64 : Elapsed 0.009 ms (4.559 ms / 500) 4.404 -> 4.340 ( -1.45%) [ +0.34% +0.00% +0.14% / -1.45% +0.68% +0.61%] index_fill_ skip256 : Elapsed 0.009 ms (4.419 ms / 500) 4.423 -> 4.351 ( -1.63%) [ +0.11% +0.00% +0.09% / -1.63% +0.54% +0.57%] index_fill_ spread : Elapsed 0.009 ms (4.428 ms / 500) 4.458 -> 4.367 ( -2.04%) [ +0.02% +0.00% +0.09% / -1.50% -2.04% -2.02%] index_fill_ strided 3 : Elapsed 0.009 ms (4.459 ms / 500) 4.509 -> 4.362 ( -3.26%) [ +0.18% +0.00% +0.04% / -1.31% -3.26% -3.19%] index_fill_ strided 5 : Elapsed 0.009 ms (4.517 ms / 500) 4.433 -> 4.360 ( -1.65%) [ +0.14% +0.00% +0.00% / -1.65% -0.70% -0.88%] index_fill_ strided 7 : Elapsed 0.009 ms (4.439 ms / 500) 4.361 -> 4.295 ( -1.51%) [ +0.30% +0.00% +0.18% / -1.51% +1.95% +1.88%] index_fill_ strided 8 : Elapsed 0.009 ms (4.374 ms / 500) 4.356 -> 4.287 ( -1.58%) [ +0.28% +0.16% +0.00% / -1.58% +2.09% +2.09%] index_fill_ strided 16 : Elapsed 0.009 ms (4.368 ms / 500) 4.459 -> 4.289 ( -3.81%) [ +0.07% +0.00% +0.09% / -1.55% -3.81% -3.75%] index_fill_ strided 64 : Elapsed 0.009 ms (4.462 ms / 500) 4.515 -> 4.337 ( -3.94%) [ +0.27% +0.20% +0.00% / -1.59% -3.81% -3.94%] index_fill_ strided 100 : Elapsed 0.009 ms (4.527 ms / 500) 4.442 -> 4.369 ( -1.64%) [ +0.11% +0.00% +0.02% / -1.64% +0.61% +0.47%] index_fill_ strided 255 : Elapsed 0.009 ms (4.447 ms / 500) 4.470 -> 4.340 ( -2.91%) [ +0.09% +0.00% +0.11% / -1.32% -2.73% -2.91%] index_fill_ random : Elapsed 0.009 ms (4.474 ms / 500) 4.466 -> 4.339 ( -2.84%) [ +0.34% +0.00% +0.18% / -1.21% -2.80% -2.84%] index_fill_ random_sorted : Elapsed 0.009 ms (4.481 ms / 500) B = [256, 32] (stride (32, 1)) A = [512, 32] (stride (32, 1)) dim = 0 9.055 -> 8.994 ( -0.67%) [ +0.00% +0.11% +0.02% / +0.14% -0.67% -0.66%] index_select const : Elapsed 0.018 ms (9.055 ms / 500) 9.131 -> 9.061 ( -0.77%) [ +0.10% +0.00% +0.03% / -0.04% -0.67% -0.77%] index_select wrap : Elapsed 0.018 ms (9.140 ms / 500) 9.139 -> 9.153 ( +0.15%) [ +0.20% +0.00% +0.09% / +0.15% +0.31% +0.24%] index_select linear : Elapsed 0.018 ms (9.157 ms / 500) 9.178 -> 9.134 ( -0.48%) [ +0.16% +0.00% +0.13% / +0.07% -0.48% -0.35%] index_select reverse : Elapsed 0.018 ms (9.193 ms / 500) 9.017 -> 9.025 ( +0.09%) [ +0.02% +0.00% +0.18% / +0.09% +1.22% +1.18%] index_select skip64 : Elapsed 0.018 ms (9.019 ms / 500) 8.995 -> 9.011 ( +0.18%) [ +0.07% +0.07% +0.00% / +0.18% +0.51% +0.66%] index_select skip256 : Elapsed 0.018 ms (9.001 ms / 500) 9.055 -> 9.065 ( +0.11%) [ +0.11% +0.00% +0.08% / +0.11% +0.68% +0.66%] index_select spread : Elapsed 0.018 ms (9.065 ms / 500) 9.082 -> 9.089 ( +0.08%) [ +0.07% +0.00% +0.04% / +0.08% +0.22% +0.30%] index_select strided 3 : Elapsed 0.018 ms (9.088 ms / 500) 9.107 -> 9.054 ( -0.58%) [ +0.14% +0.10% +0.00% / +0.14% -0.47% -0.58%] index_select strided 5 : Elapsed 0.018 ms (9.120 ms / 500) 9.124 -> 9.071 ( -0.58%) [ +0.34% +0.00% +0.10% / +0.14% -0.57% -0.58%] index_select strided 7 : Elapsed 0.018 ms (9.155 ms / 500) 9.076 -> 9.098 ( +0.24%) [ +0.19% +0.23% +0.00% / +0.24% +0.45% +0.40%] index_select strided 8 : Elapsed 0.018 ms (9.093 ms / 500) 8.978 -> 8.983 ( +0.06%) [ +0.07% +0.01% +0.00% / +0.06% +2.26% +2.26%] index_select strided 16 : Elapsed 0.018 ms (8.984 ms / 500) 9.124 -> 9.032 ( -1.01%) [ +0.00% +0.04% +0.04% / +0.01% -0.99% -1.01%] index_select strided 64 : Elapsed 0.018 ms (9.124 ms / 500) 9.080 -> 9.048 ( -0.35%) [ +0.08% +0.12% +0.00% / +0.17% -0.35% -0.33%] index_select strided 100 : Elapsed 0.018 ms (9.087 ms / 500) 9.089 -> 9.084 ( -0.06%) [ +0.02% +0.00% +0.06% / +0.00% +0.04% -0.06%] index_select strided 255 : Elapsed 0.018 ms (9.091 ms / 500) 9.038 -> 9.035 ( -0.03%) [ +0.00% +0.18% +0.02% / +0.09% -0.03% +0.10%] index_select strided 256 : Elapsed 0.018 ms (9.038 ms / 500) 9.051 -> 9.054 ( +0.03%) [ +0.00% +0.04% +0.10% / +0.03% +0.64% +0.63%] index_select strided 257 : Elapsed 0.018 ms (9.051 ms / 500) 9.045 -> 9.060 ( +0.17%) [ +0.07% +0.00% +0.12% / +0.17% +1.06% +1.17%] index_select random : Elapsed 0.018 ms (9.051 ms / 500) 9.106 -> 9.116 ( +0.11%) [ +0.00% +0.03% +0.09% / +0.11% +0.51% +0.58%] index_select random_sorted : Elapsed 0.018 ms (9.106 ms / 500) 9.129 -> 9.140 ( +0.12%) [ +0.18% +0.19% +0.00% / +0.12% +0.78% +0.77%] index_select perm : Elapsed 0.018 ms (9.145 ms / 500) 9.151 -> 9.095 ( -0.61%) [ +0.11% +0.00% +0.09% / +0.12% -0.61% -0.49%] index_select perm_sorted : Elapsed 0.018 ms (9.161 ms / 500) B = [256, 32] (stride (32, 1)) A = [512, 32] (stride (1, 512)) dim = 0 9.100 -> 9.081 ( -0.21%) [ +0.05% +0.05% +0.00% / +0.13% -0.18% -0.21%] index_select const : Elapsed 0.018 ms (9.105 ms / 500) 9.161 -> 9.163 ( +0.02%) [ +0.04% +0.10% +0.00% / +0.02% +1.56% +1.69%] index_select wrap : Elapsed 0.018 ms (9.165 ms / 500) 9.276 -> 9.214 ( -0.67%) [ +0.19% +0.14% +0.00% / +0.00% -0.67% -0.64%] index_select linear : Elapsed 0.019 ms (9.294 ms / 500) 9.194 -> 9.198 ( +0.04%) [ +0.13% +0.00% +0.05% / +0.04% +0.85% +0.77%] index_select reverse : Elapsed 0.018 ms (9.206 ms / 500) 9.169 -> 9.170 ( +0.01%) [ +0.10% +0.00% +0.02% / +0.01% +0.10% +0.01%] index_select skip64 : Elapsed 0.018 ms (9.178 ms / 500) 9.164 -> 9.067 ( -1.06%) [ +0.09% +0.00% +0.04% / +0.10% -1.01% -1.06%] index_select skip256 : Elapsed 0.018 ms (9.172 ms / 500) 9.244 -> 9.256 ( +0.13%) [ +0.04% +0.02% +0.00% / +0.13% +0.53% +0.50%] index_select spread : Elapsed 0.018 ms (9.248 ms / 500) 9.260 -> 9.265 ( +0.05%) [ +0.06% +0.04% +0.00% / +0.13% +0.23% +0.05%] index_select strided 3 : Elapsed 0.019 ms (9.266 ms / 500) 9.272 -> 9.271 ( -0.01%) [ +0.10% +0.00% +0.12% / +0.13% -0.01% +0.06%] index_select strided 5 : Elapsed 0.019 ms (9.281 ms / 500) 9.232 -> 9.234 ( +0.02%) [ +0.02% +0.00% +0.03% / +0.02% +1.64% +1.62%] index_select strided 7 : Elapsed 0.018 ms (9.234 ms / 500) 9.335 -> 9.287 ( -0.51%) [ +0.09% +0.01% +0.00% / +0.09% -0.51% -0.44%] index_select strided 8 : Elapsed 0.019 ms (9.343 ms / 500) 9.274 -> 9.184 ( -0.97%) [ +0.11% +0.09% +0.00% / +0.09% -0.97% -0.88%] index_select strided 16 : Elapsed 0.019 ms (9.284 ms / 500) 9.167 -> 9.177 ( +0.11%) [ +0.01% +0.17% +0.00% / +0.11% +0.51% +0.55%] index_select strided 64 : Elapsed 0.018 ms (9.168 ms / 500) 9.241 -> 9.253 ( +0.13%) [ +0.11% +0.03% +0.00% / +0.13% +1.32% +1.30%] index_select strided 100 : Elapsed 0.019 ms (9.251 ms / 500) 9.278 -> 9.275 ( -0.03%) [ +0.18% +0.05% +0.00% / +0.17% -0.03% -0.02%] index_select strided 255 : Elapsed 0.019 ms (9.295 ms / 500) 9.079 -> 9.084 ( +0.06%) [ +0.02% +0.00% +0.07% / +0.06% +0.34% +0.33%] index_select strided 256 : Elapsed 0.018 ms (9.081 ms / 500) 9.252 -> 9.254 ( +0.02%) [ +0.16% +0.02% +0.00% / +0.02% +0.43% +0.46%] index_select strided 257 : Elapsed 0.019 ms (9.267 ms / 500) 9.354 -> 9.265 ( -0.95%) [ +0.04% +0.04% +0.00% / +0.03% -0.94% -0.95%] index_select random : Elapsed 0.019 ms (9.358 ms / 500) 9.261 -> 9.269 ( +0.09%) [ +0.09% +0.00% +0.16% / +0.09% +1.11% +1.02%] index_select random_sorted : Elapsed 0.019 ms (9.269 ms / 500) 9.330 -> 9.306 ( -0.26%) [ +0.08% +0.08% +0.00% / +0.11% -0.26% -0.15%] index_select perm : Elapsed 0.019 ms (9.337 ms / 500) 9.323 -> 9.323 ( +0.00%) [ +0.06% +0.05% +0.00% / +0.00% +0.33% +0.44%] index_select perm_sorted : Elapsed 0.019 ms (9.329 ms / 500) B = [256, 32] (stride (1, 256)) dim = 0 fill_cnt = 512 good 4.622 -> 4.296 ( -7.05%) [ +0.48% +0.17% +0.00% / -6.49% -7.05% -7.01%] index_fill_ const : Elapsed 0.009 ms (4.644 ms / 500) good 4.785 -> 4.381 ( -8.44%) [ +0.31% +0.00% +0.04% / -8.38% -8.44% -8.30%] index_fill_ linear : Elapsed 0.010 ms (4.800 ms / 500) good 4.753 -> 4.396 ( -7.51%) [ +0.06% +0.00% +0.21% / -7.38% -7.28% -7.51%] index_fill_ reverse : Elapsed 0.010 ms (4.756 ms / 500) Good 4.812 -> 4.310 (-10.43%) [ +0.15% +0.17% +0.00% / -9.58% -10.25% -10.43%] index_fill_ skip64 : Elapsed 0.010 ms (4.819 ms / 500) good 4.650 -> 4.323 ( -7.03%) [ +0.17% +0.06% +0.00% / -6.82% -7.03% -7.03%] index_fill_ skip256 : Elapsed 0.009 ms (4.658 ms / 500) good 4.641 -> 4.381 ( -5.60%) [ +0.52% +0.09% +0.00% / -5.60% -5.49% -5.47%] index_fill_ spread : Elapsed 0.009 ms (4.665 ms / 500) good 4.694 -> 4.408 ( -6.09%) [ +0.00% +0.19% +0.04% / -5.73% -6.09% -5.92%] index_fill_ strided 3 : Elapsed 0.009 ms (4.694 ms / 500) good 4.747 -> 4.385 ( -7.63%) [ +0.27% +0.00% +0.23% / -6.89% -7.63% -7.52%] index_fill_ strided 5 : Elapsed 0.010 ms (4.760 ms / 500) 4.656 -> 4.457 ( -4.27%) [ +0.13% +0.00% +0.24% / -3.57% -4.27% -4.27%] index_fill_ strided 7 : Elapsed 0.009 ms (4.662 ms / 500) 4.735 -> 4.544 ( -4.03%) [ +0.15% +0.00% +0.00% / -3.91% -3.99% -4.03%] index_fill_ strided 8 : Elapsed 0.009 ms (4.742 ms / 500) 4.678 -> 4.487 ( -4.08%) [ +0.28% +0.00% +0.00% / -3.33% -4.00% -4.08%] index_fill_ strided 16 : Elapsed 0.009 ms (4.691 ms / 500) good 4.725 -> 4.350 ( -7.94%) [ +0.21% +0.00% +0.11% / -7.41% -7.94% -7.89%] index_fill_ strided 64 : Elapsed 0.009 ms (4.735 ms / 500) good 4.845 -> 4.596 ( -5.14%) [ +0.19% +0.08% +0.00% / -4.21% -4.97% -5.14%] index_fill_ strided 100 : Elapsed 0.010 ms (4.854 ms / 500) good 4.681 -> 4.376 ( -6.52%) [ +0.04% +0.00% +0.04% / -6.52% -5.34% -5.36%] index_fill_ strided 255 : Elapsed 0.009 ms (4.683 ms / 500) 4.768 -> 4.537 ( -4.84%) [ +0.34% +0.00% +0.06% / -4.40% -4.74% -4.84%] index_fill_ random : Elapsed 0.010 ms (4.784 ms / 500) good 4.723 -> 4.417 ( -6.48%) [ +0.19% +0.13% +0.00% / -5.95% -6.16% -6.48%] index_fill_ random_sorted : Elapsed 0.009 ms (4.732 ms / 500) B = [256, 32] (stride (1, 256)) A = [512, 32] (stride (32, 1)) dim = 0 9.139 -> 8.969 ( -1.86%) [ +0.12% +0.00% +0.04% / -1.52% -1.86% -1.84%] index_select const : Elapsed 0.018 ms (9.150 ms / 500) 9.181 -> 9.109 ( -0.78%) [ +0.03% +0.07% +0.00% / -0.37% -0.78% -0.74%] index_select wrap : Elapsed 0.018 ms (9.184 ms / 500) 9.205 -> 9.142 ( -0.68%) [ +0.04% +0.00% +0.04% / -0.68% -0.50% -0.58%] index_select linear : Elapsed 0.018 ms (9.209 ms / 500) 9.233 -> 9.150 ( -0.90%) [ +0.11% +0.01% +0.00% / -0.80% -0.74% -0.90%] index_select reverse : Elapsed 0.018 ms (9.243 ms / 500) 9.080 -> 8.987 ( -1.02%) [ +0.01% +0.09% +0.00% / -1.02% -0.35% -0.19%] index_select skip64 : Elapsed 0.018 ms (9.081 ms / 500) 9.063 -> 8.975 ( -0.97%) [ +0.00% +0.01% +0.07% / -0.97% -0.82% -0.70%] index_select skip256 : Elapsed 0.018 ms (9.063 ms / 500) 9.123 -> 9.146 ( +0.25%) [ +0.13% +0.16% +0.00% / +0.25% +0.56% +0.49%] index_select spread : Elapsed 0.018 ms (9.135 ms / 500) 9.140 -> 9.157 ( +0.19%) [ +0.16% +0.11% +0.00% / +0.19% +0.51% +0.44%] index_select strided 3 : Elapsed 0.018 ms (9.155 ms / 500) 9.177 -> 9.157 ( -0.22%) [ +0.13% +0.15% +0.00% / -0.01% -0.09% -0.22%] index_select strided 5 : Elapsed 0.018 ms (9.189 ms / 500) 9.179 -> 9.161 ( -0.20%) [ +0.08% +0.09% +0.00% / +0.20% -0.20% -0.19%] index_select strided 7 : Elapsed 0.018 ms (9.186 ms / 500) 9.145 -> 9.123 ( -0.24%) [ +0.15% +0.03% +0.00% / -0.24% -0.07% -0.09%] index_select strided 8 : Elapsed 0.018 ms (9.159 ms / 500) 9.040 -> 9.040 ( +0.00%) [ +0.10% +0.00% +0.01% / +0.00% +1.23% +1.27%] index_select strided 16 : Elapsed 0.018 ms (9.049 ms / 500) 9.160 -> 9.005 ( -1.69%) [ +0.00% +0.13% +0.19% / -1.03% -1.69% -1.62%] index_select strided 64 : Elapsed 0.018 ms (9.160 ms / 500) 9.147 -> 9.120 ( -0.30%) [ +0.28% +0.03% +0.00% / -0.07% -0.24% -0.30%] index_select strided 100 : Elapsed 0.018 ms (9.173 ms / 500) 9.156 -> 9.158 ( +0.02%) [ +0.05% +0.03% +0.00% / +0.02% +0.02% +0.15%] index_select strided 255 : Elapsed 0.018 ms (9.161 ms / 500) 9.115 -> 8.985 ( -1.43%) [ +0.01% +0.02% +0.00% / -1.42% -1.38% -1.43%] index_select strided 256 : Elapsed 0.018 ms (9.116 ms / 500) 9.100 -> 9.132 ( +0.35%) [ +0.19% +0.20% +0.00% / +0.35% +0.79% +0.79%] index_select strided 257 : Elapsed 0.018 ms (9.117 ms / 500) 9.096 -> 9.111 ( +0.16%) [ +0.05% +0.03% +0.00% / +0.16% +1.21% +1.26%] index_select random : Elapsed 0.018 ms (9.101 ms / 500) 9.149 -> 9.123 ( -0.28%) [ +0.13% +0.03% +0.00% / -0.28% +0.28% +0.08%] index_select random_sorted : Elapsed 0.018 ms (9.161 ms / 500) 9.197 -> 9.187 ( -0.11%) [ +0.00% +0.00% +0.01% / -0.11% +0.36% +0.36%] index_select perm : Elapsed 0.018 ms (9.197 ms / 500) 9.213 -> 9.171 ( -0.46%) [ +0.17% +0.00% +0.02% / -0.18% -0.46% -0.37%] index_select perm_sorted : Elapsed 0.018 ms (9.229 ms / 500) B = [256, 32] (stride (1, 256)) A = [512, 32] (stride (1, 512)) dim = 0 9.152 -> 8.978 ( -1.90%) [ +0.08% +0.12% +0.00% / -1.78% -1.90% -1.84%] index_select const : Elapsed 0.018 ms (9.159 ms / 500) 9.206 -> 9.029 ( -1.92%) [ +0.12% +0.03% +0.00% / -1.92% -0.91% -0.92%] index_select wrap : Elapsed 0.018 ms (9.217 ms / 500) 9.321 -> 9.058 ( -2.82%) [ +0.16% +0.04% +0.00% / -2.26% -2.78% -2.82%] index_select linear : Elapsed 0.019 ms (9.336 ms / 500) 9.238 -> 9.050 ( -2.04%) [ +0.15% +0.02% +0.00% / -2.04% -1.56% -1.44%] index_select reverse : Elapsed 0.019 ms (9.252 ms / 500) 9.202 -> 9.034 ( -1.83%) [ +0.08% +0.07% +0.00% / -1.70% -1.83% -1.73%] index_select skip64 : Elapsed 0.018 ms (9.209 ms / 500) 9.209 -> 8.975 ( -2.54%) [ +0.24% +0.00% +0.03% / -1.99% -2.54% -2.49%] index_select skip256 : Elapsed 0.018 ms (9.231 ms / 500) 9.292 -> 9.071 ( -2.38%) [ +0.06% +0.05% +0.00% / -2.38% -1.95% -1.88%] index_select spread : Elapsed 0.019 ms (9.298 ms / 500) 9.315 -> 9.103 ( -2.28%) [ +0.05% +0.00% +0.08% / -2.28% -2.20% -2.20%] index_select strided 3 : Elapsed 0.019 ms (9.320 ms / 500) 9.316 -> 9.085 ( -2.48%) [ +0.29% +0.17% +0.00% / -2.48% -2.13% -2.14%] index_select strided 5 : Elapsed 0.019 ms (9.343 ms / 500) 9.273 -> 9.084 ( -2.04%) [ +0.08% +0.00% +0.11% / -2.04% -0.75% -0.72%] index_select strided 7 : Elapsed 0.019 ms (9.280 ms / 500) 9.381 -> 9.134 ( -2.63%) [ +0.06% +0.12% +0.00% / -2.49% -2.62% -2.63%] index_select strided 8 : Elapsed 0.019 ms (9.387 ms / 500) 9.318 -> 9.063 ( -2.74%) [ +0.12% +0.10% +0.00% / -1.96% -2.53% -2.74%] index_select strided 16 : Elapsed 0.019 ms (9.329 ms / 500) 9.222 -> 9.068 ( -1.67%) [ +0.00% +0.00% +0.03% / -1.67% -1.47% -1.44%] index_select strided 64 : Elapsed 0.018 ms (9.222 ms / 500) 9.288 -> 9.142 ( -1.57%) [ +0.00% +0.03% +0.00% / -1.57% -0.57% -0.41%] index_select strided 100 : Elapsed 0.019 ms (9.288 ms / 500) 9.332 -> 9.115 ( -2.33%) [ +0.00% +0.21% +0.18% / -2.27% -2.33% -2.33%] index_select strided 255 : Elapsed 0.019 ms (9.332 ms / 500) 9.130 -> 8.976 ( -1.69%) [ +0.05% +0.09% +0.00% / -1.69% -1.41% -1.39%] index_select strided 256 : Elapsed 0.018 ms (9.135 ms / 500) 9.302 -> 9.098 ( -2.19%) [ +0.09% +0.00% +0.18% / -2.19% -1.96% -1.98%] index_select strided 257 : Elapsed 0.019 ms (9.310 ms / 500) 9.408 -> 9.169 ( -2.54%) [ +0.17% +0.23% +0.00% / -1.70% -2.54% -2.54%] index_select random : Elapsed 0.019 ms (9.424 ms / 500) 9.309 -> 9.116 ( -2.07%) [ +0.03% +0.03% +0.00% / -2.07% -1.19% -1.39%] index_select random_sorted : Elapsed 0.019 ms (9.312 ms / 500) 9.366 -> 9.205 ( -1.72%) [ +0.14% +0.02% +0.00% / -1.72% -1.68% -1.68%] index_select perm : Elapsed 0.019 ms (9.379 ms / 500) 9.360 -> 9.145 ( -2.30%) [ +0.13% +0.05% +0.00% / -2.30% -1.92% -1.98%] index_select perm_sorted : Elapsed 0.019 ms (9.372 ms / 500) out_shape = [512, 256] in_shape = [512, 32] idx_dim = 1 B = [512, 256] (stride (256, 1)) dim = 1 fill_cnt = 32 11.297 -> 11.153 ( -1.27%) [ +0.00% +0.04% +0.09% / -1.27% -1.15% -1.12%] index_fill_ const : Elapsed 0.023 ms (11.297 ms / 500) 11.359 -> 11.213 ( -1.29%) [ +0.10% +0.17% +0.00% / -1.18% -1.11% -1.29%] index_fill_ linear : Elapsed 0.023 ms (11.370 ms / 500) 11.336 -> 11.212 ( -1.09%) [ +0.50% +0.00% +0.41% / -1.00% -0.84% -1.09%] index_fill_ reverse : Elapsed 0.023 ms (11.393 ms / 500) 11.348 -> 11.150 ( -1.74%) [ +0.23% +0.00% +0.33% / -1.47% -1.72% -1.74%] index_fill_ skip64 : Elapsed 0.023 ms (11.374 ms / 500) 11.324 -> 11.131 ( -1.70%) [ +0.37% +0.39% +0.00% / -1.47% -1.59% -1.70%] index_fill_ skip256 : Elapsed 0.023 ms (11.366 ms / 500) 13.173 -> 13.011 ( -1.23%) [ +0.01% +0.05% +0.00% / -1.23% -0.88% -0.88%] index_fill_ spread : Elapsed 0.026 ms (13.174 ms / 500) 11.969 -> 11.729 ( -2.01%) [ +0.13% +0.28% +0.00% / -2.01% -1.90% -1.87%] index_fill_ strided 3 : Elapsed 0.024 ms (11.985 ms / 500) 12.438 -> 12.179 ( -2.08%) [ +0.22% +0.00% +0.41% / -1.94% -2.08% -1.99%] index_fill_ strided 5 : Elapsed 0.025 ms (12.465 ms / 500) 12.975 -> 12.649 ( -2.51%) [ +0.00% +0.05% +0.15% / -2.30% -2.17% -2.51%] index_fill_ strided 7 : Elapsed 0.026 ms (12.975 ms / 500) 13.156 -> 13.013 ( -1.09%) [ +0.08% +0.14% +0.00% / -1.09% -0.82% -0.91%] index_fill_ strided 8 : Elapsed 0.026 ms (13.167 ms / 500) 12.147 -> 11.809 ( -2.78%) [ +0.40% +0.29% +0.00% / -2.78% -2.77% -2.72%] index_fill_ strided 16 : Elapsed 0.024 ms (12.196 ms / 500) 11.478 -> 11.130 ( -3.03%) [ +0.00% +0.32% +0.23% / -3.03% -2.82% -2.79%] index_fill_ strided 64 : Elapsed 0.023 ms (11.478 ms / 500) 12.909 -> 12.438 ( -3.65%) [ +0.35% +0.08% +0.00% / -3.65% -3.01% -3.12%] index_fill_ strided 100 : Elapsed 0.026 ms (12.954 ms / 500) 11.442 -> 11.332 ( -0.96%) [ +0.13% +0.24% +0.00% / -0.66% -0.96% -0.16%] index_fill_ strided 255 : Elapsed 0.023 ms (11.457 ms / 500) 12.531 -> 12.162 ( -2.94%) [ +0.44% +0.49% +0.00% / -2.94% -2.82% -2.94%] index_fill_ random : Elapsed 0.025 ms (12.586 ms / 500) 12.519 -> 12.266 ( -2.02%) [ +0.14% +0.50% +0.00% / -1.90% -2.02% -1.76%] index_fill_ random_sorted : Elapsed 0.025 ms (12.536 ms / 500) 12.401 -> 12.107 ( -2.37%) [ +0.46% +0.23% +0.00% / -2.37% -2.13% -2.23%] index_fill_ perm : Elapsed 0.025 ms (12.458 ms / 500) 12.472 -> 12.241 ( -1.85%) [ +0.00% +0.02% +0.06% / -1.85% -1.52% -1.48%] index_fill_ perm_sorted : Elapsed 0.025 ms (12.472 ms / 500) B = [512, 256] (stride (256, 1)) A = [512, 32] (stride (32, 1)) dim = 1 18.195 -> 17.739 ( -2.51%) [ +0.14% +0.00% +0.07% / -2.45% -2.51% -2.48%] index_add_ linear : Elapsed 0.036 ms (18.220 ms / 500) 17.925 -> 17.569 ( -1.99%) [ +0.03% +0.07% +0.00% / -1.86% -1.96% -1.99%] index_copy_ linear : Elapsed 0.036 ms (17.931 ms / 500) 18.188 -> 17.734 ( -2.50%) [ +0.01% +0.00% +0.05% / -2.50% -2.32% -2.21%] index_add_ reverse : Elapsed 0.036 ms (18.190 ms / 500) 17.912 -> 17.585 ( -1.83%) [ +0.07% +0.04% +0.00% / -1.83% -1.78% -1.59%] index_copy_ reverse : Elapsed 0.036 ms (17.924 ms / 500) 20.024 -> 19.385 ( -3.19%) [ +0.19% +0.01% +0.00% / -3.19% -3.10% -3.18%] index_add_ spread : Elapsed 0.040 ms (20.062 ms / 500) 19.578 -> 19.396 ( -0.93%) [ +0.18% +0.10% +0.00% / -0.93% -0.77% -0.90%] index_copy_ spread : Elapsed 0.039 ms (19.614 ms / 500) 18.864 -> 18.200 ( -3.52%) [ +0.05% +0.00% +0.17% / -3.47% -3.51% -3.52%] index_add_ strided 3 : Elapsed 0.038 ms (18.874 ms / 500) 18.659 -> 18.129 ( -2.84%) [ +0.06% +0.00% +0.10% / -2.60% -2.84% -2.70%] index_copy_ strided 3 : Elapsed 0.037 ms (18.670 ms / 500) 19.334 -> 18.611 ( -3.74%) [ +0.00% +0.07% +0.10% / -3.63% -3.63% -3.74%] index_add_ strided 5 : Elapsed 0.039 ms (19.334 ms / 500) 19.086 -> 18.587 ( -2.61%) [ +0.00% +0.06% +0.02% / -2.46% -2.61% -2.55%] index_copy_ strided 5 : Elapsed 0.038 ms (19.086 ms / 500) 19.805 -> 19.158 ( -3.27%) [ +0.05% +0.01% +0.00% / -3.17% -3.27% -3.16%] index_add_ strided 7 : Elapsed 0.040 ms (19.815 ms / 500) 19.476 -> 19.063 ( -2.12%) [ +0.02% +0.00% +0.13% / -2.02% -2.12% -2.08%] index_copy_ strided 7 : Elapsed 0.039 ms (19.480 ms / 500) 18.266 -> 17.854 ( -2.26%) [ +0.09% +0.00% +0.04% / -2.16% -2.26% -2.12%] index_add_ strided 255 : Elapsed 0.037 ms (18.282 ms / 500) 18.052 -> 17.716 ( -1.86%) [ +0.07% +0.00% +0.04% / -1.73% -1.86% -1.85%] index_copy_ strided 255 : Elapsed 0.036 ms (18.064 ms / 500) 19.275 -> 18.744 ( -2.75%) [ +0.08% +0.00% +0.04% / -2.65% -2.73% -2.75%] index_add_ perm : Elapsed 0.039 ms (19.291 ms / 500) 18.841 -> 18.463 ( -2.01%) [ +0.15% +0.12% +0.00% / -1.95% -1.96% -2.01%] index_copy_ perm : Elapsed 0.038 ms (18.869 ms / 500) 19.286 -> 18.714 ( -2.97%) [ +0.01% +0.00% +0.02% / -2.90% -2.97% -2.93%] index_add_ perm_sorted : Elapsed 0.039 ms (19.287 ms / 500) 18.867 -> 18.486 ( -2.02%) [ +0.06% +0.03% +0.00% / -1.84% -2.02% -1.99%] index_copy_ perm_sorted : Elapsed 0.038 ms (18.878 ms / 500) Good 27.914 -> 24.865 (-10.92%) [ +0.00% +0.03% +0.01% / -10.87% -10.92% -10.78%] index_select const : Elapsed 0.056 ms (27.914 ms / 500) Good 28.121 -> 24.875 (-11.54%) [ +0.09% +0.00% +0.10% / -11.30% -11.38% -11.54%] index_select wrap : Elapsed 0.056 ms (28.146 ms / 500) Good 27.908 -> 25.068 (-10.18%) [ +0.07% +0.03% +0.00% / -9.94% -10.13% -10.18%] index_select linear : Elapsed 0.056 ms (27.928 ms / 500) good 27.891 -> 25.111 ( -9.97%) [ +0.07% +0.00% +0.01% / -9.97% -9.92% -9.92%] index_select reverse : Elapsed 0.056 ms (27.910 ms / 500) Good 27.791 -> 24.839 (-10.62%) [ +0.00% +0.00% +0.23% / -10.62% -10.62% -10.57%] index_select skip64 : Elapsed 0.056 ms (27.791 ms / 500) Good 28.151 -> 24.850 (-11.73%) [ +0.12% +0.00% +0.15% / -11.73% -11.40% -11.56%] index_select skip256 : Elapsed 0.056 ms (28.186 ms / 500) Good 28.124 -> 25.284 (-10.10%) [ +0.06% +0.00% +0.09% / -9.96% -9.97% -10.10%] index_select spread : Elapsed 0.056 ms (28.142 ms / 500) Good 28.143 -> 24.892 (-11.55%) [ +0.10% +0.01% +0.00% / -11.55% -11.33% -11.41%] index_select strided 3 : Elapsed 0.056 ms (28.171 ms / 500) Good 28.089 -> 24.904 (-11.34%) [ +0.00% +0.01% +0.00% / -11.28% -11.24% -11.34%] index_select strided 5 : Elapsed 0.056 ms (28.089 ms / 500) Good 28.075 -> 24.862 (-11.44%) [ +0.14% +0.00% +0.11% / -11.44% -11.23% -11.22%] index_select strided 7 : Elapsed 0.056 ms (28.113 ms / 500) Good 27.956 -> 24.843 (-11.14%) [ +0.08% +0.00% +0.07% / -10.75% -11.14% -11.01%] index_select strided 8 : Elapsed 0.056 ms (27.979 ms / 500) Good 28.009 -> 24.863 (-11.23%) [ +0.07% +0.00% +0.05% / -11.23% -11.11% -11.12%] index_select strided 16 : Elapsed 0.056 ms (28.028 ms / 500) Good 27.937 -> 24.888 (-10.91%) [ +0.11% +0.00% +0.07% / -10.78% -10.87% -10.91%] index_select random : Elapsed 0.056 ms (27.969 ms / 500) Good 28.048 -> 25.224 (-10.07%) [ +0.04% +0.00% +0.03% / -10.07% -9.95% -9.85%] index_select random_sorted : Elapsed 0.056 ms (28.060 ms / 500) B = [512, 256] (stride (256, 1)) A = [512, 32] (stride (1, 512)) dim = 1 18.181 -> 17.783 ( -2.19%) [ +0.13% +0.00% +0.02% / -1.97% -2.19% -2.12%] index_add_ linear : Elapsed 0.036 ms (18.205 ms / 500) 17.867 -> 17.708 ( -0.89%) [ +0.13% +0.00% +0.00% / -0.71% -0.89% -0.63%] index_copy_ linear : Elapsed 0.036 ms (17.890 ms / 500) 18.259 -> 17.787 ( -2.59%) [ +0.12% +0.00% +0.03% / -2.57% -2.59% -2.45%] index_add_ reverse : Elapsed 0.037 ms (18.281 ms / 500) 17.952 -> 17.709 ( -1.35%) [ +0.22% +0.06% +0.00% / -1.35% -1.21% -0.98%] index_copy_ reverse : Elapsed 0.036 ms (17.991 ms / 500) good 20.400 -> 19.365 ( -5.07%) [ +0.00% +0.00% +0.00% / -4.96% -5.03% -5.07%] index_add_ spread : Elapsed 0.041 ms (20.400 ms / 500) 19.867 -> 19.441 ( -2.14%) [ +0.05% +0.00% +0.02% / -1.95% -2.14% -1.92%] index_copy_ spread : Elapsed 0.040 ms (19.876 ms / 500) 18.842 -> 18.195 ( -3.43%) [ +0.03% +0.05% +0.00% / -3.43% -3.29% -3.25%] index_add_ strided 3 : Elapsed 0.038 ms (18.847 ms / 500) 18.517 -> 18.247 ( -1.46%) [ +0.04% +0.00% +0.08% / -1.46% -1.16% -0.96%] index_copy_ strided 3 : Elapsed 0.037 ms (18.525 ms / 500) 19.379 -> 18.586 ( -4.09%) [ +0.03% +0.01% +0.00% / -4.09% -3.91% -3.76%] index_add_ strided 5 : Elapsed 0.039 ms (19.384 ms / 500) 19.056 -> 18.681 ( -1.97%) [ +0.01% +0.01% +0.00% / -1.97% -1.77% -1.43%] index_copy_ strided 5 : Elapsed 0.038 ms (19.058 ms / 500) 19.972 -> 19.180 ( -3.97%) [ +0.00% +0.02% +0.08% / -3.97% -3.92% -3.92%] index_add_ strided 7 : Elapsed 0.040 ms (19.972 ms / 500) 19.536 -> 19.157 ( -1.94%) [ +0.15% +0.08% +0.00% / -1.92% -1.94% -1.73%] index_copy_ strided 7 : Elapsed 0.039 ms (19.565 ms / 500) 18.244 -> 17.884 ( -1.97%) [ +0.08% +0.00% +0.09% / -1.84% -1.97% -1.86%] index_add_ strided 255 : Elapsed 0.037 ms (18.259 ms / 500) 17.944 -> 17.843 ( -0.56%) [ +0.07% +0.00% +0.11% / -0.56% -0.46% -0.21%] index_copy_ strided 255 : Elapsed 0.036 ms (17.957 ms / 500) 19.660 -> 19.020 ( -3.26%) [ +0.10% +0.08% +0.00% / -3.23% -3.26% -3.11%] index_add_ perm : Elapsed 0.039 ms (19.680 ms / 500) 19.120 -> 18.762 ( -1.87%) [ +0.08% +0.02% +0.00% / -1.70% -1.87% -1.60%] index_copy_ perm : Elapsed 0.038 ms (19.136 ms / 500) 19.685 -> 18.976 ( -3.60%) [ +0.02% +0.08% +0.00% / -3.39% -3.60% -3.41%] index_add_ perm_sorted : Elapsed 0.039 ms (19.689 ms / 500) 19.126 -> 18.799 ( -1.71%) [ +0.00% +0.01% +0.14% / -1.70% -1.71% -1.54%] index_copy_ perm_sorted : Elapsed 0.038 ms (19.126 ms / 500) good 26.590 -> 24.824 ( -6.64%) [ +0.05% +0.15% +0.00% / -6.59% -6.64% -6.58%] index_select const : Elapsed 0.053 ms (26.604 ms / 500) 26.495 -> 25.884 ( -2.31%) [ +0.00% +0.00% +0.11% / -2.31% -2.14% -2.09%] index_select wrap : Elapsed 0.053 ms (26.496 ms / 500) good 26.450 -> 24.958 ( -5.64%) [ +0.06% +0.04% +0.00% / -5.50% -5.60% -5.64%] index_select linear : Elapsed 0.053 ms (26.467 ms / 500) good 26.548 -> 24.943 ( -6.05%) [ +0.08% +0.00% +0.12% / -5.99% -6.05% -5.96%] index_select reverse : Elapsed 0.053 ms (26.568 ms / 500) good 26.365 -> 24.795 ( -5.95%) [ +0.00% +0.09% +0.09% / -5.95% -5.90% -5.92%] index_select skip64 : Elapsed 0.053 ms (26.365 ms / 500) good 26.452 -> 24.826 ( -6.15%) [ +0.08% +0.07% +0.00% / -6.15% -6.15% -5.99%] index_select skip256 : Elapsed 0.053 ms (26.474 ms / 500) good 26.433 -> 25.071 ( -5.15%) [ +0.05% +0.05% +0.00% / -5.02% -5.08% -5.15%] index_select spread : Elapsed 0.053 ms (26.445 ms / 500) 26.612 -> 25.898 ( -2.68%) [ +0.03% +0.01% +0.00% / -2.68% -2.34% -2.45%] index_select strided 3 : Elapsed 0.053 ms (26.620 ms / 500) 26.720 -> 26.275 ( -1.67%) [ +0.04% +0.00% +0.09% / -1.54% -1.64% -1.67%] index_select strided 5 : Elapsed 0.053 ms (26.732 ms / 500) 26.537 -> 26.031 ( -1.91%) [ +0.06% +0.01% +0.00% / -1.91% -1.58% -1.54%] index_select strided 7 : Elapsed 0.053 ms (26.554 ms / 500) good 26.375 -> 24.812 ( -5.93%) [ +0.14% +0.00% +0.02% / -5.77% -5.93% -5.82%] index_select strided 8 : Elapsed 0.053 ms (26.413 ms / 500) good 26.343 -> 24.804 ( -5.84%) [ +0.04% +0.00% +0.03% / -5.74% -5.84% -5.74%] index_select strided 16 : Elapsed 0.053 ms (26.353 ms / 500) 26.488 -> 25.472 ( -3.84%) [ +0.04% +0.13% +0.00% / -3.84% -3.35% -3.33%] index_select random : Elapsed 0.053 ms (26.499 ms / 500) good 26.547 -> 25.090 ( -5.49%) [ +0.13% +0.11% +0.00% / -5.49% -5.48% -5.39%] index_select random_sorted : Elapsed 0.053 ms (26.582 ms / 500) B = [512, 256] (stride (1, 512)) dim = 1 fill_cnt = 32 11.101 -> 11.110 ( +0.08%) [ +0.00% +0.07% +0.04% / +0.13% +0.23% +0.08%] index_fill_ const : Elapsed 0.022 ms (11.101 ms / 500) 11.254 -> 11.171 ( -0.74%) [ +0.00% +0.02% +0.13% / -0.08% -0.74% -0.57%] index_fill_ linear : Elapsed 0.023 ms (11.254 ms / 500) 11.207 -> 11.189 ( -0.16%) [ +0.44% +0.55% +0.00% / +0.19% -0.04% -0.16%] index_fill_ reverse : Elapsed 0.023 ms (11.256 ms / 500) 11.143 -> 11.093 ( -0.45%) [ +0.13% +0.03% +0.00% / -0.16% -0.45% -0.15%] index_fill_ skip64 : Elapsed 0.022 ms (11.157 ms / 500) 11.178 -> 11.113 ( -0.58%) [ +0.00% +0.06% +0.05% / -0.58% -0.53% -0.56%] index_fill_ skip256 : Elapsed 0.022 ms (11.178 ms / 500) 11.245 -> 11.315 ( +0.62%) [ +0.00% +0.31% +0.65% / +0.62% +0.62% +0.63%] index_fill_ spread : Elapsed 0.022 ms (11.245 ms / 500) 11.272 -> 11.238 ( -0.30%) [ +0.00% +0.16% +0.05% / -0.07% -0.30% +0.01%] index_fill_ strided 3 : Elapsed 0.023 ms (11.272 ms / 500) 11.235 -> 11.203 ( -0.28%) [ +0.34% +0.00% +0.32% / +0.21% -0.18% -0.28%] index_fill_ strided 5 : Elapsed 0.023 ms (11.273 ms / 500) 11.233 -> 11.194 ( -0.35%) [ +0.50% +0.00% +0.34% / +0.21% -0.35% -0.01%] index_fill_ strided 7 : Elapsed 0.023 ms (11.289 ms / 500) 11.272 -> 11.275 ( +0.03%) [ +0.00% +0.74% +0.12% / +0.22% +0.03% +0.17%] index_fill_ strided 8 : Elapsed 0.023 ms (11.272 ms / 500) 11.226 -> 11.204 ( -0.20%) [ +0.37% +0.34% +0.00% / -0.04% -0.15% -0.20%] index_fill_ strided 16 : Elapsed 0.023 ms (11.268 ms / 500) 11.109 -> 11.141 ( +0.29%) [ +0.53% +0.07% +0.00% / +0.29% +0.87% +0.44%] index_fill_ strided 64 : Elapsed 0.022 ms (11.168 ms / 500) 11.207 -> 11.214 ( +0.06%) [ +0.20% +0.00% +0.23% / +0.06% +0.62% +0.21%] index_fill_ strided 100 : Elapsed 0.022 ms (11.229 ms / 500) 11.248 -> 11.210 ( -0.34%) [ +0.21% +0.13% +0.00% / +0.05% -0.29% -0.34%] index_fill_ strided 255 : Elapsed 0.023 ms (11.272 ms / 500) 11.234 -> 11.231 ( -0.03%) [ +0.13% +0.46% +0.00% / +0.45% -0.03% +0.09%] index_fill_ random : Elapsed 0.022 ms (11.249 ms / 500) 11.248 -> 11.249 ( +0.01%) [ +0.00% +0.43% +0.01% / +0.01% +0.16% +0.15%] index_fill_ random_sorted : Elapsed 0.022 ms (11.248 ms / 500) 11.260 -> 11.250 ( -0.09%) [ +0.00% +0.13% +0.20% / -0.09% +0.22% -0.04%] index_fill_ perm : Elapsed 0.023 ms (11.260 ms / 500) 11.206 -> 11.244 ( +0.34%) [ +0.00% +0.21% +0.34% / +0.34% +0.61% +0.55%] index_fill_ perm_sorted : Elapsed 0.022 ms (11.206 ms / 500) B = [512, 256] (stride (1, 512)) A = [512, 32] (stride (32, 1)) dim = 1 17.895 -> 17.725 ( -0.95%) [ +0.03% +0.05% +0.00% / -0.95% -0.30% -0.37%] index_add_ linear : Elapsed 0.036 ms (17.901 ms / 500) 17.821 -> 17.654 ( -0.94%) [ +0.00% +0.11% +0.04% / -0.94% -0.29% -0.48%] index_copy_ linear : Elapsed 0.036 ms (17.821 ms / 500) 17.873 -> 17.723 ( -0.84%) [ +0.00% +0.08% +0.01% / -0.84% -0.74% -0.81%] index_add_ reverse : Elapsed 0.036 ms (17.873 ms / 500) 17.782 -> 17.643 ( -0.78%) [ +0.03% +0.00% +0.06% / -0.78% -0.58% -0.66%] index_copy_ reverse : Elapsed 0.036 ms (17.787 ms / 500) 17.876 -> 17.730 ( -0.82%) [ +0.14% +0.00% +0.07% / -0.82% -0.58% -0.58%] index_add_ spread : Elapsed 0.036 ms (17.901 ms / 500) 17.751 -> 17.620 ( -0.74%) [ +0.05% +0.00% +0.12% / -0.74% -0.65% -0.73%] index_copy_ spread : Elapsed 0.036 ms (17.759 ms / 500) 18.036 -> 17.681 ( -1.97%) [ +0.09% +0.00% +0.02% / -0.88% -1.97% -1.92%] index_add_ strided 3 : Elapsed 0.036 ms (18.053 ms / 500) 17.922 -> 17.629 ( -1.63%) [ +0.00% +0.11% +0.03% / -0.86% -1.62% -1.63%] index_copy_ strided 3 : Elapsed 0.036 ms (17.922 ms / 500) 17.963 -> 17.756 ( -1.15%) [ +0.00% +0.07% +0.01% / -0.92% -1.15% -1.13%] index_add_ strided 5 : Elapsed 0.036 ms (17.963 ms / 500) 17.904 -> 17.650 ( -1.42%) [ +0.00% +0.01% +0.00% / -0.90% -1.42% -1.40%] index_copy_ strided 5 : Elapsed 0.036 ms (17.904 ms / 500) 17.959 -> 17.678 ( -1.56%) [ +0.06% +0.13% +0.00% / -0.89% -1.53% -1.56%] index_add_ strided 7 : Elapsed 0.036 ms (17.970 ms / 500) 17.834 -> 17.633 ( -1.13%) [ +0.04% +0.15% +0.00% / -0.84% -1.00% -1.13%] index_copy_ strided 7 : Elapsed 0.036 ms (17.842 ms / 500) 17.910 -> 17.729 ( -1.01%) [ +0.10% +0.00% +0.08% / -0.75% -0.97% -1.01%] index_add_ strided 255 : Elapsed 0.036 ms (17.928 ms / 500) 17.834 -> 17.642 ( -1.08%) [ +0.04% +0.03% +0.00% / -0.79% -1.01% -1.08%] index_copy_ strided 255 : Elapsed 0.036 ms (17.841 ms / 500) 17.859 -> 17.705 ( -0.86%) [ +0.09% +0.06% +0.00% / -0.86% -0.78% -0.69%] index_add_ perm : Elapsed 0.036 ms (17.875 ms / 500) 17.762 -> 17.629 ( -0.75%) [ +0.14% +0.11% +0.00% / -0.75% -0.63% -0.64%] index_copy_ perm : Elapsed 0.036 ms (17.786 ms / 500) 17.870 -> 17.711 ( -0.89%) [ +0.00% +0.14% +0.04% / -0.89% -0.45% -0.46%] index_add_ perm_sorted : Elapsed 0.036 ms (17.870 ms / 500) 17.795 -> 17.628 ( -0.94%) [ +0.04% +0.04% +0.00% / -0.94% -0.72% -0.74%] index_copy_ perm_sorted : Elapsed 0.036 ms (17.802 ms / 500) 25.466 -> 25.465 ( -0.00%) [ +0.03% +0.03% +0.00% / -0.00% +0.28% +0.27%] index_select const : Elapsed 0.051 ms (25.474 ms / 500) 25.642 -> 25.607 ( -0.14%) [ +0.10% +0.00% +0.00% / +0.00% -0.14% -0.14%] index_select wrap : Elapsed 0.051 ms (25.668 ms / 500) 25.582 -> 25.623 ( +0.16%) [ +0.13% +0.10% +0.00% / +0.16% +0.18% +0.21%] index_select linear : Elapsed 0.051 ms (25.614 ms / 500) 25.382 -> 25.383 ( +0.00%) [ +0.17% +0.00% +0.00% / +0.00% +0.49% +0.40%] index_select reverse : Elapsed 0.051 ms (25.425 ms / 500) 25.448 -> 25.433 ( -0.06%) [ +0.02% +0.00% +0.01% / +0.02% -0.06% -0.05%] index_select skip64 : Elapsed 0.051 ms (25.454 ms / 500) 25.614 -> 25.646 ( +0.12%) [ +0.09% +0.09% +0.00% / +0.12% +1.19% +1.22%] index_select skip256 : Elapsed 0.051 ms (25.638 ms / 500) 25.702 -> 25.471 ( -0.90%) [ +0.02% +0.00% +0.03% / -0.08% -0.86% -0.90%] index_select spread : Elapsed 0.051 ms (25.706 ms / 500) 25.512 -> 25.544 ( +0.13%) [ +0.14% +0.00% +0.07% / +0.13% +0.71% +0.58%] index_select strided 3 : Elapsed 0.051 ms (25.548 ms / 500) 25.620 -> 25.560 ( -0.23%) [ +0.09% +0.00% +0.05% / +0.06% -0.23% -0.16%] index_select strided 5 : Elapsed 0.051 ms (25.643 ms / 500) 25.594 -> 25.601 ( +0.03%) [ +0.10% +0.00% +0.00% / +0.03% +0.15% +0.13%] index_select strided 7 : Elapsed 0.051 ms (25.619 ms / 500) 25.577 -> 25.609 ( +0.13%) [ +0.04% +0.08% +0.00% / +0.13% +0.21% +0.18%] index_select strided 8 : Elapsed 0.051 ms (25.587 ms / 500) 25.515 -> 25.548 ( +0.13%) [ +0.05% +0.13% +0.00% / +0.13% +0.28% +0.21%] index_select strided 16 : Elapsed 0.051 ms (25.528 ms / 500) 25.585 -> 25.622 ( +0.14%) [ +0.06% +0.09% +0.00% / +0.14% +0.25% +0.18%] index_select random : Elapsed 0.051 ms (25.601 ms / 500) 25.435 -> 25.438 ( +0.01%) [ +0.02% +0.06% +0.00% / +0.01% +0.66% +0.64%] index_select random_sorted : Elapsed 0.051 ms (25.441 ms / 500) B = [512, 256] (stride (1, 512)) A = [512, 32] (stride (1, 512)) dim = 1 17.914 -> 17.733 ( -1.01%) [ +0.09% +0.00% +0.09% / -0.84% -0.96% -1.01%] index_add_ linear : Elapsed 0.036 ms (17.931 ms / 500) 17.740 -> 17.558 ( -1.03%) [ +0.01% +0.01% +0.00% / -0.83% -0.88% -1.03%] index_copy_ linear : Elapsed 0.035 ms (17.741 ms / 500) 17.982 -> 17.733 ( -1.38%) [ +0.03% +0.12% +0.00% / -0.75% -1.37% -1.38%] index_add_ reverse : Elapsed 0.036 ms (17.987 ms / 500) 17.821 -> 17.569 ( -1.41%) [ +0.06% +0.12% +0.00% / -0.86% -1.34% -1.41%] index_copy_ reverse : Elapsed 0.036 ms (17.831 ms / 500) 17.939 -> 17.739 ( -1.11%) [ +0.04% +0.00% +0.03% / -0.88% -1.08% -1.11%] index_add_ spread : Elapsed 0.036 ms (17.947 ms / 500) 17.763 -> 17.538 ( -1.27%) [ +0.04% +0.00% +0.07% / -0.86% -1.23% -1.27%] index_copy_ spread : Elapsed 0.036 ms (17.770 ms / 500) 17.955 -> 17.685 ( -1.50%) [ +0.06% +0.00% +0.05% / -0.86% -1.42% -1.50%] index_add_ strided 3 : Elapsed 0.036 ms (17.965 ms / 500) 17.729 -> 17.557 ( -0.97%) [ +0.00% +0.04% +0.06% / -0.97% -0.90% -0.95%] index_copy_ strided 3 : Elapsed 0.035 ms (17.729 ms / 500) 17.831 -> 17.700 ( -0.73%) [ +0.13% +0.02% +0.00% / -0.73% -0.30% -0.43%] index_add_ strided 5 : Elapsed 0.036 ms (17.854 ms / 500) 17.693 -> 17.556 ( -0.77%) [ +0.01% +0.01% +0.00% / -0.77% -0.42% -0.67%] index_copy_ strided 5 : Elapsed 0.035 ms (17.694 ms / 500) 17.937 -> 17.672 ( -1.48%) [ +0.12% +0.07% +0.00% / -0.84% -1.44% -1.48%] index_add_ strided 7 : Elapsed 0.036 ms (17.959 ms / 500) 17.745 -> 17.540 ( -1.16%) [ +0.07% +0.03% +0.00% / -0.86% -0.95% -1.16%] index_copy_ strided 7 : Elapsed 0.036 ms (17.758 ms / 500) 17.900 -> 17.721 ( -1.00%) [ +0.06% +0.00% +0.01% / -0.87% -1.00% -0.89%] index_add_ strided 255 : Elapsed 0.036 ms (17.910 ms / 500) 17.727 -> 17.567 ( -0.90%) [ +0.09% +0.03% +0.00% / -0.90% -0.87% -0.87%] index_copy_ strided 255 : Elapsed 0.035 ms (17.743 ms / 500) 17.912 -> 17.713 ( -1.11%) [ +0.00% +0.09% +0.04% / -0.81% -1.11% -1.06%] index_add_ perm : Elapsed 0.036 ms (17.912 ms / 500) 17.737 -> 17.549 ( -1.06%) [ +0.10% +0.17% +0.00% / -0.55% -1.05% -1.06%] index_copy_ perm : Elapsed 0.036 ms (17.754 ms / 500) 17.904 -> 17.754 ( -0.84%) [ +0.11% +0.14% +0.00% / -0.78% -0.84% -0.80%] index_add_ perm_sorted : Elapsed 0.036 ms (17.924 ms / 500) 17.746 -> 17.565 ( -1.02%) [ +0.01% +0.11% +0.00% / -0.83% -1.02% -0.96%] index_copy_ perm_sorted : Elapsed 0.035 ms (17.747 ms / 500) 24.776 -> 24.778 ( +0.01%) [ +0.10% +0.00% +0.00% / +0.21% +0.06% +0.01%] index_select const : Elapsed 0.050 ms (24.801 ms / 500) 24.905 -> 24.974 ( +0.28%) [ +0.00% +0.15% +0.32% / +0.28% +0.40% +0.31%] index_select wrap : Elapsed 0.050 ms (24.905 ms / 500) 24.875 -> 24.931 ( +0.23%) [ +0.00% +0.37% +0.19% / +0.32% +0.26% +0.23%] index_select linear : Elapsed 0.050 ms (24.875 ms / 500) 25.003 -> 25.020 ( +0.07%) [ +0.09% +0.02% +0.00% / +0.11% +0.07% +0.20%] index_select reverse : Elapsed 0.050 ms (25.026 ms / 500) 24.731 -> 24.788 ( +0.23%) [ +0.24% +0.00% +0.25% / +0.23% +0.27% +0.36%] index_select skip64 : Elapsed 0.050 ms (24.791 ms / 500) 24.765 -> 24.791 ( +0.10%) [ +0.12% +0.00% +0.02% / +0.12% +0.17% +0.10%] index_select skip256 : Elapsed 0.050 ms (24.794 ms / 500) 24.830 -> 24.826 ( -0.02%) [ +0.00% +0.11% +0.11% / +0.07% -0.02% +0.04%] index_select spread : Elapsed 0.050 ms (24.830 ms / 500) 24.939 -> 24.993 ( +0.22%) [ +0.18% +0.00% +0.21% / +0.22% +0.93% +0.83%] index_select strided 3 : Elapsed 0.050 ms (24.984 ms / 500) 25.099 -> 24.934 ( -0.66%) [ +0.12% +0.06% +0.00% / +0.08% -0.45% -0.66%] index_select strided 5 : Elapsed 0.050 ms (25.128 ms / 500) 24.935 -> 24.913 ( -0.09%) [ +0.00% +0.00% +0.18% / -0.09% +0.26% +0.23%] index_select strided 7 : Elapsed 0.050 ms (24.935 ms / 500) 24.804 -> 24.767 ( -0.15%) [ +0.16% +0.05% +0.00% / +0.10% -0.15% -0.11%] index_select strided 8 : Elapsed 0.050 ms (24.843 ms / 500) 24.783 -> 24.772 ( -0.04%) [ +0.16% +0.15% +0.00% / +0.12% +0.03% -0.04%] index_select strided 16 : Elapsed 0.050 ms (24.822 ms / 500) 24.893 -> 24.902 ( +0.04%) [ +0.00% +0.12% +0.09% / +0.04% +0.31% +0.38%] index_select random : Elapsed 0.050 ms (24.893 ms / 500) 25.021 -> 24.831 ( -0.76%) [ +0.03% +0.00% +0.12% / -0.02% -0.74% -0.76%] index_select random_sorted : Elapsed 0.050 ms (25.029 ms / 500) out_shape = [512, 256] in_shape = [32, 256] idx_dim = 0 B = [512, 256] (stride (256, 1)) dim = 0 fill_cnt = 32 10.723 -> 10.783 ( +0.56%) [ +0.30% +0.21% +0.00% / +0.56% +0.60% +0.56%] index_fill_ const : Elapsed 0.022 ms (10.755 ms / 500) 10.775 -> 10.825 ( +0.46%) [ +0.33% +0.21% +0.00% / +0.96% +0.60% +0.46%] index_fill_ linear : Elapsed 0.022 ms (10.811 ms / 500) 10.783 -> 10.830 ( +0.44%) [ +0.40% +0.19% +0.00% / +0.83% +0.44% +0.47%] index_fill_ reverse : Elapsed 0.022 ms (10.826 ms / 500) 10.727 -> 10.787 ( +0.56%) [ +0.00% +0.00% +0.10% / +0.80% +0.73% +0.56%] index_fill_ skip64 : Elapsed 0.021 ms (10.727 ms / 500) 10.740 -> 10.788 ( +0.45%) [ +0.26% +0.04% +0.00% / +0.49% +0.63% +0.45%] index_fill_ skip256 : Elapsed 0.022 ms (10.768 ms / 500) 10.778 -> 10.895 ( +1.09%) [ +0.32% +0.00% +0.41% / +1.23% +1.29% +1.09%] index_fill_ spread : Elapsed 0.022 ms (10.813 ms / 500) 10.756 -> 10.869 ( +1.05%) [ +0.54% +0.00% +0.11% / +1.05% +1.29% +1.15%] index_fill_ strided 3 : Elapsed 0.022 ms (10.814 ms / 500) 10.803 -> 10.857 ( +0.50%) [ +0.47% +0.00% +0.28% / +0.97% +0.74% +0.50%] index_fill_ strided 5 : Elapsed 0.022 ms (10.854 ms / 500) 10.814 -> 10.868 ( +0.50%) [ +0.03% +0.09% +0.00% / +0.95% +0.50% +0.50%] index_fill_ strided 7 : Elapsed 0.022 ms (10.817 ms / 500) 10.803 -> 10.849 ( +0.43%) [ +0.55% +0.00% +0.25% / +1.04% +0.43% +0.68%] index_fill_ strided 8 : Elapsed 0.022 ms (10.862 ms / 500) 10.867 -> 10.896 ( +0.27%) [ +0.00% +0.06% +0.15% / +0.73% +0.27% +0.61%] index_fill_ strided 16 : Elapsed 0.022 ms (10.867 ms / 500) 10.720 -> 10.789 ( +0.64%) [ +0.45% +0.08% +0.00% / +0.64% +1.07% +1.22%] index_fill_ strided 64 : Elapsed 0.022 ms (10.768 ms / 500) 10.773 -> 10.846 ( +0.68%) [ +0.00% +0.00% +0.17% / +0.68% +1.16% +1.20%] index_fill_ strided 100 : Elapsed 0.022 ms (10.773 ms / 500) 10.769 -> 10.838 ( +0.64%) [ +0.56% +0.00% +0.23% / +1.64% +0.64% +0.74%] index_fill_ strided 255 : Elapsed 0.022 ms (10.829 ms / 500) 10.747 -> 10.817 ( +0.65%) [ +0.48% +0.00% +0.25% / +1.06% +0.73% +0.65%] index_fill_ strided 256 : Elapsed 0.022 ms (10.799 ms / 500) 10.790 -> 10.873 ( +0.77%) [ +0.20% +0.07% +0.00% / +0.77% +1.02% +1.15%] index_fill_ strided 257 : Elapsed 0.022 ms (10.812 ms / 500) 10.768 -> 10.868 ( +0.93%) [ +0.21% +0.46% +0.00% / +0.93% +1.37% +1.38%] index_fill_ random : Elapsed 0.022 ms (10.791 ms / 500) 10.742 -> 10.869 ( +1.18%) [ +0.15% +0.20% +0.00% / +1.18% +1.54% +1.90%] index_fill_ random_sorted : Elapsed 0.022 ms (10.758 ms / 500) 10.753 -> 10.858 ( +0.98%) [ +0.00% +0.51% +0.16% / +0.98% +1.40% +1.24%] index_fill_ perm : Elapsed 0.022 ms (10.753 ms / 500) 10.811 -> 10.862 ( +0.47%) [ +0.18% +0.43% +0.00% / +0.92% +0.53% +0.47%] index_fill_ perm_sorted : Elapsed 0.022 ms (10.831 ms / 500) B = [512, 256] (stride (256, 1)) A = [32, 256] (stride (256, 1)) dim = 0 9.400 -> 9.133 ( -2.84%) [ +0.36% +0.00% +0.05% / -1.91% -2.84% -2.84%] index_add_ linear : Elapsed 0.019 ms (9.434 ms / 500) 9.289 -> 9.020 ( -2.90%) [ +0.04% +0.02% +0.00% / -2.02% -2.90% -2.85%] index_copy_ linear : Elapsed 0.019 ms (9.293 ms / 500) 9.341 -> 9.170 ( -1.83%) [ +0.00% +0.13% +0.05% / -1.83% -1.79% -1.80%] index_add_ reverse : Elapsed 0.019 ms (9.341 ms / 500) 9.208 -> 9.048 ( -1.74%) [ +0.13% +0.13% +0.00% / -1.74% -1.63% -1.60%] index_copy_ reverse : Elapsed 0.018 ms (9.220 ms / 500) 9.293 -> 9.126 ( -1.80%) [ +0.00% +0.05% +0.02% / -1.70% -1.80% -1.74%] index_add_ spread : Elapsed 0.019 ms (9.293 ms / 500) 9.192 -> 9.013 ( -1.95%) [ +0.13% +0.00% +0.13% / -1.87% -1.91% -1.95%] index_copy_ spread : Elapsed 0.018 ms (9.204 ms / 500) 9.294 -> 9.133 ( -1.73%) [ +0.08% +0.00% +0.02% / -1.73% -0.66% -0.75%] index_add_ strided 3 : Elapsed 0.019 ms (9.301 ms / 500) 9.189 -> 9.023 ( -1.81%) [ +0.02% +0.00% +0.01% / -1.81% -0.78% -0.76%] index_copy_ strided 3 : Elapsed 0.018 ms (9.191 ms / 500) 9.289 -> 9.131 ( -1.70%) [ +0.01% +0.02% +0.00% / -1.70% -0.89% -1.00%] index_add_ strided 5 : Elapsed 0.019 ms (9.290 ms / 500) 9.181 -> 9.020 ( -1.75%) [ +0.11% +0.00% +0.05% / -1.75% -1.02% -0.88%] index_copy_ strided 5 : Elapsed 0.018 ms (9.191 ms / 500) 9.295 -> 9.135 ( -1.72%) [ +0.09% +0.00% +0.00% / -1.72% -1.30% -1.32%] index_add_ strided 7 : Elapsed 0.019 ms (9.303 ms / 500) 9.193 -> 9.026 ( -1.82%) [ +0.16% +0.00% +0.20% / -1.82% -1.58% -1.66%] index_copy_ strided 7 : Elapsed 0.018 ms (9.208 ms / 500) 9.292 -> 9.139 ( -1.65%) [ +0.15% +0.00% +0.06% / -1.65% -1.33% -1.49%] index_add_ strided 255 : Elapsed 0.019 ms (9.306 ms / 500) 9.193 -> 9.030 ( -1.77%) [ +0.07% +0.05% +0.00% / -1.77% -1.09% -1.63%] index_copy_ strided 255 : Elapsed 0.018 ms (9.199 ms / 500) 9.344 -> 9.147 ( -2.11%) [ +0.11% +0.06% +0.00% / -1.70% -2.11% -2.04%] index_add_ strided 257 : Elapsed 0.019 ms (9.354 ms / 500) 9.202 -> 9.025 ( -1.92%) [ +0.24% +0.22% +0.00% / -1.31% -1.77% -1.92%] index_copy_ strided 257 : Elapsed 0.018 ms (9.224 ms / 500) 9.337 -> 9.126 ( -2.26%) [ +0.05% +0.07% +0.00% / -1.66% -2.26% -2.24%] index_add_ perm : Elapsed 0.019 ms (9.342 ms / 500) 9.218 -> 9.014 ( -2.21%) [ +0.05% +0.17% +0.00% / -1.67% -2.12% -2.21%] index_copy_ perm : Elapsed 0.018 ms (9.223 ms / 500) 9.359 -> 9.122 ( -2.53%) [ +0.09% +0.06% +0.00% / -1.81% -2.46% -2.53%] index_add_ perm_sorted : Elapsed 0.019 ms (9.367 ms / 500) 9.225 -> 9.005 ( -2.38%) [ +0.00% +0.05% +0.04% / -1.73% -1.89% -2.38%] index_copy_ perm_sorted : Elapsed 0.018 ms (9.225 ms / 500) 24.931 -> 24.749 ( -0.73%) [ +0.00% +0.09% +0.01% / +0.06% -0.63% -0.73%] index_select const : Elapsed 0.050 ms (24.931 ms / 500) 24.816 -> 24.834 ( +0.07%) [ +0.11% +0.00% +0.06% / +0.08% +0.09% +0.07%] index_select wrap : Elapsed 0.050 ms (24.843 ms / 500) 24.812 -> 24.840 ( +0.11%) [ +0.13% +0.00% +0.08% / +0.16% +0.16% +0.11%] index_select linear : Elapsed 0.050 ms (24.844 ms / 500) 24.904 -> 24.944 ( +0.16%) [ +0.00% +0.08% +0.14% / +0.16% +0.92% +0.85%] index_select reverse : Elapsed 0.050 ms (24.904 ms / 500) 24.745 -> 24.797 ( +0.21%) [ +0.12% +0.00% +0.03% / +0.21% +0.36% +0.36%] index_select skip64 : Elapsed 0.050 ms (24.774 ms / 500) 24.760 -> 24.778 ( +0.07%) [ +0.09% +0.00% +0.06% / +0.08% +0.23% +0.07%] index_select skip256 : Elapsed 0.050 ms (24.782 ms / 500) 24.780 -> 24.806 ( +0.10%) [ +0.00% +0.03% +0.05% / +0.15% +0.10% +0.15%] index_select spread : Elapsed 0.050 ms (24.780 ms / 500) 25.002 -> 24.830 ( -0.69%) [ +0.00% +0.02% +0.07% / +0.04% -0.67% -0.69%] index_select strided 3 : Elapsed 0.050 ms (25.002 ms / 500) 24.904 -> 24.847 ( -0.23%) [ +0.16% +0.00% +0.09% / +0.02% -0.23% -0.19%] index_select strided 5 : Elapsed 0.050 ms (24.943 ms / 500) 24.835 -> 24.830 ( -0.02%) [ +0.03% +0.11% +0.00% / +0.05% +0.08% -0.02%] index_select strided 7 : Elapsed 0.050 ms (24.842 ms / 500) 24.723 -> 24.762 ( +0.16%) [ +0.25% +0.00% +0.19% / +0.41% +0.31% +0.16%] index_select strided 8 : Elapsed 0.050 ms (24.786 ms / 500) 24.902 -> 24.763 ( -0.56%) [ +0.10% +0.00% +0.16% / +0.04% -0.49% -0.56%] index_select strided 16 : Elapsed 0.050 ms (24.928 ms / 500) 24.882 -> 24.863 ( -0.08%) [ +0.15% +0.00% +0.20% / +0.00% -0.07% -0.08%] index_select random : Elapsed 0.050 ms (24.920 ms / 500) 24.826 -> 24.813 ( -0.05%) [ +0.05% +0.00% +0.03% / -0.04% +0.05% -0.05%] index_select random_sorted : Elapsed 0.050 ms (24.838 ms / 500) B = [512, 256] (stride (256, 1)) A = [32, 256] (stride (1, 32)) dim = 0 9.277 -> 9.110 ( -1.80%) [ +0.10% +0.00% +0.03% / -1.80% -1.11% -1.07%] index_add_ linear : Elapsed 0.019 ms (9.286 ms / 500) 9.282 -> 9.120 ( -1.75%) [ +0.13% +0.01% +0.00% / -1.75% -1.37% -1.31%] index_copy_ linear : Elapsed 0.019 ms (9.294 ms / 500) 9.309 -> 9.117 ( -2.06%) [ +0.17% +0.11% +0.00% / -1.78% -2.06% -2.06%] index_add_ reverse : Elapsed 0.019 ms (9.325 ms / 500) 9.320 -> 9.114 ( -2.21%) [ +0.20% +0.00% +0.09% / -1.48% -2.14% -2.21%] index_copy_ reverse : Elapsed 0.019 ms (9.339 ms / 500) 9.267 -> 9.067 ( -2.16%) [ +0.08% +0.00% +0.01% / -1.68% -2.04% -2.16%] index_add_ spread : Elapsed 0.019 ms (9.274 ms / 500) 9.312 -> 9.097 ( -2.31%) [ +0.02% +0.00% +0.10% / -1.73% -2.29% -2.31%] index_copy_ spread : Elapsed 0.019 ms (9.314 ms / 500) 9.285 -> 9.121 ( -1.77%) [ +0.25% +0.02% +0.00% / -1.77% -1.17% -1.26%] index_add_ strided 3 : Elapsed 0.019 ms (9.308 ms / 500) 9.286 -> 9.127 ( -1.71%) [ +0.16% +0.00% +0.16% / -1.71% -1.52% -1.52%] index_copy_ strided 3 : Elapsed 0.019 ms (9.301 ms / 500) 9.276 -> 9.108 ( -1.81%) [ +0.05% +0.00% +0.03% / -1.81% -1.14% -1.14%] index_add_ strided 5 : Elapsed 0.019 ms (9.281 ms / 500) 9.278 -> 9.124 ( -1.66%) [ +0.13% +0.05% +0.00% / -1.66% -1.40% -1.47%] index_copy_ strided 5 : Elapsed 0.019 ms (9.290 ms / 500) 9.242 -> 9.068 ( -1.88%) [ +0.15% +0.06% +0.00% / -1.88% -1.26% -1.19%] index_add_ strided 7 : Elapsed 0.019 ms (9.256 ms / 500) 9.270 -> 9.107 ( -1.76%) [ +0.00% +0.03% +0.05% / -1.76% -1.60% -1.45%] index_copy_ strided 7 : Elapsed 0.019 ms (9.270 ms / 500) 9.249 -> 9.097 ( -1.64%) [ +0.13% +0.14% +0.00% / -1.64% -1.30% -1.43%] index_add_ strided 255 : Elapsed 0.019 ms (9.261 ms / 500) 9.265 -> 9.101 ( -1.77%) [ +0.06% +0.00% +0.03% / -1.77% -1.52% -1.68%] index_copy_ strided 255 : Elapsed 0.019 ms (9.271 ms / 500) 9.303 -> 9.115 ( -2.02%) [ +0.08% +0.00% +0.00% / -1.97% -1.89% -2.02%] index_add_ strided 257 : Elapsed 0.019 ms (9.310 ms / 500) 9.301 -> 9.116 ( -1.99%) [ +0.09% +0.06% +0.00% / -1.74% -1.90% -1.99%] index_copy_ strided 257 : Elapsed 0.019 ms (9.309 ms / 500) 9.264 -> 9.106 ( -1.71%) [ +0.00% +0.03% +0.04% / -1.71% -1.14% -1.17%] index_add_ perm : Elapsed 0.019 ms (9.264 ms / 500) 9.281 -> 9.122 ( -1.71%) [ +0.01% +0.00% +0.15% / -1.71% -1.58% -1.57%] index_copy_ perm : Elapsed 0.019 ms (9.282 ms / 500) 9.260 -> 9.109 ( -1.63%) [ +0.00% +0.06% +0.04% / -1.63% -0.80% -1.06%] index_add_ perm_sorted : Elapsed 0.019 ms (9.260 ms / 500) 9.264 -> 9.092 ( -1.86%) [ +0.01% +0.00% +0.16% / -1.86% -1.44% -1.45%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.265 ms / 500) 25.701 -> 25.710 ( +0.04%) [ +0.00% +0.07% +0.03% / +0.04% +0.12% +0.04%] index_select const : Elapsed 0.051 ms (25.701 ms / 500) 25.661 -> 25.675 ( +0.05%) [ +0.11% +0.00% +0.04% / +0.05% +0.50% +0.34%] index_select wrap : Elapsed 0.051 ms (25.689 ms / 500) 25.728 -> 25.754 ( +0.10%) [ +0.12% +0.00% +0.13% / +0.10% +0.44% +0.41%] index_select linear : Elapsed 0.052 ms (25.760 ms / 500) 25.767 -> 25.768 ( +0.00%) [ +0.21% +0.00% +0.12% / +0.00% +0.23% +0.22%] index_select reverse : Elapsed 0.052 ms (25.820 ms / 500) 25.708 -> 25.703 ( -0.02%) [ +0.08% +0.00% +0.03% / +0.08% +0.03% -0.02%] index_select skip64 : Elapsed 0.051 ms (25.728 ms / 500) 25.572 -> 25.573 ( +0.00%) [ +0.10% +0.08% +0.00% / +0.00% +0.35% +0.39%] index_select skip256 : Elapsed 0.051 ms (25.597 ms / 500) 25.621 -> 25.615 ( -0.02%) [ +0.06% +0.00% +0.01% / -0.02% +0.43% +0.36%] index_select spread : Elapsed 0.051 ms (25.637 ms / 500) 25.687 -> 25.668 ( -0.07%) [ +0.02% +0.02% +0.00% / -0.07% +0.83% +0.96%] index_select strided 3 : Elapsed 0.051 ms (25.691 ms / 500) 25.640 -> 25.652 ( +0.05%) [ +0.07% +0.01% +0.00% / +0.05% +0.53% +0.59%] index_select strided 5 : Elapsed 0.051 ms (25.659 ms / 500) 25.689 -> 25.717 ( +0.11%) [ +0.10% +0.00% +0.10% / +0.11% +0.53% +0.53%] index_select strided 7 : Elapsed 0.051 ms (25.714 ms / 500) 25.842 -> 25.872 ( +0.12%) [ +0.14% +0.00% +0.16% / +0.12% +0.23% +0.22%] index_select strided 8 : Elapsed 0.052 ms (25.878 ms / 500) 25.637 -> 25.630 ( -0.03%) [ +0.02% +0.05% +0.00% / -0.03% +0.95% +0.95%] index_select strided 16 : Elapsed 0.051 ms (25.642 ms / 500) 25.622 -> 25.672 ( +0.20%) [ +0.06% +0.09% +0.00% / +0.20% +0.61% +0.59%] index_select random : Elapsed 0.051 ms (25.637 ms / 500) 25.682 -> 25.719 ( +0.14%) [ +0.14% +0.00% +0.10% / +0.14% +0.62% +0.52%] index_select random_sorted : Elapsed 0.051 ms (25.718 ms / 500) B = [512, 256] (stride (1, 512)) dim = 0 fill_cnt = 32 10.803 -> 10.800 ( -0.03%) [ +0.06% +0.01% +0.00% / -0.03% +0.34% +0.34%] index_fill_ const : Elapsed 0.022 ms (10.810 ms / 500) 10.842 -> 10.875 ( +0.30%) [ +0.08% +0.10% +0.00% / +0.55% +0.53% +0.30%] index_fill_ linear : Elapsed 0.022 ms (10.851 ms / 500) 10.828 -> 10.839 ( +0.10%) [ +0.00% +0.30% +0.12% / +0.67% +0.46% +0.10%] index_fill_ reverse : Elapsed 0.022 ms (10.828 ms / 500) 10.772 -> 10.810 ( +0.35%) [ +0.34% +0.10% +0.00% / +0.59% +0.35% +0.61%] index_fill_ skip64 : Elapsed 0.022 ms (10.809 ms / 500) 10.797 -> 10.829 ( +0.30%) [ +0.06% +0.54% +0.00% / +0.30% +0.32% +0.46%] index_fill_ skip256 : Elapsed 0.022 ms (10.803 ms / 500) 11.651 -> 11.706 ( +0.47%) [ +0.21% +0.40% +0.00% / +0.48% +0.47% +0.67%] index_fill_ spread : Elapsed 0.023 ms (11.676 ms / 500) 11.064 -> 11.098 ( +0.31%) [ +0.00% +0.12% +0.08% / +0.37% +0.31% +0.31%] index_fill_ strided 3 : Elapsed 0.022 ms (11.064 ms / 500) 11.286 -> 11.307 ( +0.19%) [ +0.13% +0.32% +0.00% / +0.35% +0.19% +0.30%] index_fill_ strided 5 : Elapsed 0.023 ms (11.301 ms / 500) 11.512 -> 11.547 ( +0.30%) [ +0.14% +0.02% +0.00% / +0.47% +0.30% +0.40%] index_fill_ strided 7 : Elapsed 0.023 ms (11.528 ms / 500) 11.570 -> 11.781 ( +1.82%) [ +0.30% +0.00% +0.14% / +1.86% +1.93% +1.82%] index_fill_ strided 8 : Elapsed 0.023 ms (11.605 ms / 500) 11.605 -> 11.695 ( +0.78%) [ +0.00% +0.21% +0.22% / +0.78% +0.88% +0.91%] index_fill_ strided 16 : Elapsed 0.023 ms (11.605 ms / 500) 10.895 -> 10.906 ( +0.10%) [ +0.16% +0.00% +0.37% / +0.10% +0.58% +0.36%] index_fill_ strided 64 : Elapsed 0.022 ms (10.912 ms / 500) 11.564 -> 11.542 ( -0.19%) [ +0.14% +0.00% +0.59% / -0.19% +0.33% +0.22%] index_fill_ strided 100 : Elapsed 0.023 ms (11.580 ms / 500) 11.081 -> 11.111 ( +0.27%) [ +0.45% +0.14% +0.00% / +0.27% +0.27% +0.45%] index_fill_ strided 255 : Elapsed 0.022 ms (11.131 ms / 500) 10.885 -> 10.867 ( -0.17%) [ +0.12% +0.11% +0.00% / -0.14% -0.17% +0.07%] index_fill_ strided 256 : Elapsed 0.022 ms (10.898 ms / 500) 11.001 -> 11.076 ( +0.68%) [ +0.14% +0.00% +0.46% / +0.68% +0.74% +0.86%] index_fill_ strided 257 : Elapsed 0.022 ms (11.016 ms / 500) 11.398 -> 11.298 ( -0.88%) [ +0.05% +0.00% +0.47% / -0.64% -0.88% -0.65%] index_fill_ random : Elapsed 0.023 ms (11.404 ms / 500) 11.367 -> 11.434 ( +0.59%) [ +0.00% +0.12% +0.07% / +0.59% +0.94% +0.77%] index_fill_ random_sorted : Elapsed 0.023 ms (11.367 ms / 500) 11.362 -> 11.365 ( +0.03%) [ +0.00% +0.36% +0.36% / +0.03% +0.20% +0.33%] index_fill_ perm : Elapsed 0.023 ms (11.362 ms / 500) 11.400 -> 11.385 ( -0.13%) [ +0.21% +0.00% +0.07% / +0.24% +0.32% -0.13%] index_fill_ perm_sorted : Elapsed 0.023 ms (11.424 ms / 500) B = [512, 256] (stride (1, 512)) A = [32, 256] (stride (256, 1)) dim = 0 9.557 -> 9.131 ( -4.46%) [ +0.18% +0.03% +0.00% / -3.49% -4.46% -4.37%] index_add_ linear : Elapsed 0.019 ms (9.574 ms / 500) 9.363 -> 9.139 ( -2.39%) [ +0.02% +0.00% +0.04% / -1.25% -2.33% -2.39%] index_copy_ linear : Elapsed 0.019 ms (9.365 ms / 500) 9.477 -> 9.179 ( -3.14%) [ +0.14% +0.00% +0.13% / -3.00% -3.14% -3.11%] index_add_ reverse : Elapsed 0.019 ms (9.490 ms / 500) 9.296 -> 9.163 ( -1.43%) [ +0.09% +0.01% +0.00% / -1.09% -1.32% -1.43%] index_copy_ reverse : Elapsed 0.019 ms (9.304 ms / 500) good 10.694 -> 10.034 ( -6.17%) [ +0.09% +0.00% +0.09% / -5.98% -6.12% -6.17%] index_add_ spread : Elapsed 0.021 ms (10.704 ms / 500) 10.131 -> 9.870 ( -2.58%) [ +0.20% +0.00% +0.22% / -1.67% -2.50% -2.58%] index_copy_ spread : Elapsed 0.020 ms (10.151 ms / 500) 9.736 -> 9.378 ( -3.68%) [ +0.12% +0.13% +0.00% / -3.68% -3.63% -3.68%] index_add_ strided 3 : Elapsed 0.019 ms (9.748 ms / 500) 9.553 -> 9.427 ( -1.32%) [ +0.13% +0.04% +0.00% / -1.32% -1.11% -1.17%] index_copy_ strided 3 : Elapsed 0.019 ms (9.565 ms / 500) 10.064 -> 9.562 ( -4.99%) [ +0.00% +0.06% +0.09% / -4.99% -4.85% -4.75%] index_add_ strided 5 : Elapsed 0.020 ms (10.064 ms / 500) 9.813 -> 9.611 ( -2.06%) [ +0.25% +0.19% +0.00% / -2.06% -1.79% -1.83%] index_copy_ strided 5 : Elapsed 0.020 ms (9.838 ms / 500) good 10.332 -> 9.799 ( -5.16%) [ +0.17% +0.00% +0.03% / -4.93% -5.16% -5.09%] index_add_ strided 7 : Elapsed 0.021 ms (10.350 ms / 500) 10.038 -> 9.834 ( -2.03%) [ +0.23% +0.00% +0.26% / -2.03% -1.95% -1.84%] index_copy_ strided 7 : Elapsed 0.020 ms (10.061 ms / 500) 9.703 -> 9.314 ( -4.01%) [ +0.09% +0.00% +0.00% / -4.01% -3.92% -3.92%] index_add_ strided 255 : Elapsed 0.019 ms (9.712 ms / 500) 9.490 -> 9.364 ( -1.33%) [ +0.03% +0.00% +0.13% / -1.28% -1.33% -1.33%] index_copy_ strided 255 : Elapsed 0.019 ms (9.493 ms / 500) 9.679 -> 9.248 ( -4.45%) [ +0.18% +0.21% +0.00% / -3.97% -4.45% -4.33%] index_add_ strided 257 : Elapsed 0.019 ms (9.696 ms / 500) 9.500 -> 9.331 ( -1.78%) [ +0.09% +0.04% +0.00% / -1.19% -1.78% -1.67%] index_copy_ strided 257 : Elapsed 0.019 ms (9.509 ms / 500) good 10.405 -> 9.871 ( -5.13%) [ +0.03% +0.02% +0.00% / -4.78% -5.07% -5.13%] index_add_ perm : Elapsed 0.021 ms (10.408 ms / 500) 10.022 -> 9.728 ( -2.93%) [ +0.08% +0.00% +0.04% / -2.63% -2.93% -2.89%] index_copy_ perm : Elapsed 0.020 ms (10.030 ms / 500) good 10.440 -> 9.838 ( -5.77%) [ +0.17% +0.00% +0.05% / -5.02% -5.75% -5.77%] index_add_ perm_sorted : Elapsed 0.021 ms (10.458 ms / 500) 10.056 -> 9.707 ( -3.47%) [ +0.00% +0.01% +0.02% / -2.64% -3.47% -3.30%] index_copy_ perm_sorted : Elapsed 0.020 ms (10.056 ms / 500) good 26.700 -> 24.807 ( -7.09%) [ +0.00% +0.04% +0.15% / -7.09% -7.04% -7.04%] index_select const : Elapsed 0.053 ms (26.700 ms / 500) 26.387 -> 25.734 ( -2.47%) [ +0.00% +0.06% +0.01% / -2.47% -2.41% -2.47%] index_select wrap : Elapsed 0.053 ms (26.387 ms / 500) good 26.496 -> 24.914 ( -5.97%) [ +0.02% +0.00% +0.02% / -5.74% -5.97% -5.84%] index_select linear : Elapsed 0.053 ms (26.501 ms / 500) good 26.575 -> 24.827 ( -6.58%) [ +0.21% +0.13% +0.00% / -6.16% -6.55% -6.58%] index_select reverse : Elapsed 0.053 ms (26.630 ms / 500) good 26.430 -> 24.807 ( -6.14%) [ +0.00% +0.08% +0.03% / -6.14% -6.07% -6.06%] index_select skip64 : Elapsed 0.053 ms (26.430 ms / 500) good 26.369 -> 24.764 ( -6.09%) [ +0.24% +0.00% +0.13% / -5.93% -6.09% -6.08%] index_select skip256 : Elapsed 0.053 ms (26.431 ms / 500) good 26.360 -> 24.940 ( -5.39%) [ +0.20% +0.09% +0.00% / -5.39% -5.26% -5.24%] index_select spread : Elapsed 0.053 ms (26.412 ms / 500) 26.569 -> 26.159 ( -1.54%) [ +0.07% +0.00% +0.03% / -1.49% -1.53% -1.54%] index_select strided 3 : Elapsed 0.053 ms (26.588 ms / 500) 26.500 -> 25.796 ( -2.66%) [ +0.02% +0.05% +0.00% / -2.21% -2.65% -2.66%] index_select strided 5 : Elapsed 0.053 ms (26.504 ms / 500) 26.408 -> 25.719 ( -2.61%) [ +0.08% +0.08% +0.00% / -2.52% -2.61% -2.57%] index_select strided 7 : Elapsed 0.053 ms (26.430 ms / 500) good 26.328 -> 24.866 ( -5.55%) [ +0.12% +0.11% +0.00% / -5.48% -5.32% -5.55%] index_select strided 8 : Elapsed 0.053 ms (26.359 ms / 500) good 26.745 -> 24.814 ( -7.22%) [ +0.11% +0.05% +0.00% / -7.22% -7.09% -6.98%] index_select strided 16 : Elapsed 0.054 ms (26.774 ms / 500) 26.453 -> 25.164 ( -4.87%) [ +0.06% +0.08% +0.00% / -4.49% -4.87% -4.86%] index_select random : Elapsed 0.053 ms (26.468 ms / 500) good 26.324 -> 24.835 ( -5.66%) [ +0.13% +0.05% +0.00% / -5.33% -5.66% -5.53%] index_select random_sorted : Elapsed 0.053 ms (26.358 ms / 500) B = [512, 256] (stride (1, 512)) A = [32, 256] (stride (1, 32)) dim = 0 9.422 -> 9.160 ( -2.78%) [ +0.28% +0.10% +0.00% / -2.78% -2.27% -2.36%] index_add_ linear : Elapsed 0.019 ms (9.448 ms / 500) 9.358 -> 9.062 ( -3.16%) [ +0.17% +0.00% +0.10% / -3.16% -3.07% -3.01%] index_copy_ linear : Elapsed 0.019 ms (9.374 ms / 500) 9.470 -> 9.171 ( -3.16%) [ +0.03% +0.08% +0.00% / -3.08% -3.16% -3.05%] index_add_ reverse : Elapsed 0.019 ms (9.473 ms / 500) 9.403 -> 9.044 ( -3.82%) [ +0.00% +0.03% +0.06% / -3.40% -3.64% -3.82%] index_copy_ reverse : Elapsed 0.019 ms (9.403 ms / 500) 10.555 -> 10.109 ( -4.23%) [ +0.13% +0.00% +0.01% / -3.81% -4.05% -4.23%] index_add_ spread : Elapsed 0.021 ms (10.569 ms / 500) 10.189 -> 9.896 ( -2.88%) [ +0.00% +0.00% +0.09% / -2.45% -2.66% -2.88%] index_copy_ spread : Elapsed 0.020 ms (10.189 ms / 500) 9.682 -> 9.381 ( -3.11%) [ +0.00% +0.07% +0.14% / -3.11% -2.87% -2.92%] index_add_ strided 3 : Elapsed 0.019 ms (9.682 ms / 500) 9.636 -> 9.303 ( -3.46%) [ +0.04% +0.00% +0.07% / -3.46% -3.30% -3.22%] index_copy_ strided 3 : Elapsed 0.019 ms (9.640 ms / 500) 9.989 -> 9.577 ( -4.12%) [ +0.00% +0.13% +0.00% / -4.12% -3.79% -3.91%] index_add_ strided 5 : Elapsed 0.020 ms (9.989 ms / 500) 9.866 -> 9.498 ( -3.73%) [ +0.16% +0.00% +0.09% / -3.73% -3.43% -3.25%] index_copy_ strided 5 : Elapsed 0.020 ms (9.882 ms / 500) 10.236 -> 9.821 ( -4.05%) [ +0.10% +0.00% +0.12% / -4.05% -3.64% -3.54%] index_add_ strided 7 : Elapsed 0.020 ms (10.246 ms / 500) 10.098 -> 9.754 ( -3.41%) [ +0.00% +0.18% +0.04% / -3.41% -3.05% -3.04%] index_copy_ strided 7 : Elapsed 0.020 ms (10.098 ms / 500) 9.628 -> 9.337 ( -3.02%) [ +0.10% +0.10% +0.00% / -3.02% -2.63% -2.53%] index_add_ strided 255 : Elapsed 0.019 ms (9.638 ms / 500) 9.567 -> 9.274 ( -3.06%) [ +0.10% +0.00% +0.06% / -3.06% -2.99% -3.03%] index_copy_ strided 255 : Elapsed 0.019 ms (9.577 ms / 500) 9.642 -> 9.276 ( -3.80%) [ +0.07% +0.08% +0.00% / -3.76% -3.74% -3.80%] index_add_ strided 257 : Elapsed 0.019 ms (9.649 ms / 500) 9.601 -> 9.226 ( -3.91%) [ +0.07% +0.02% +0.00% / -3.64% -3.91% -3.80%] index_copy_ strided 257 : Elapsed 0.019 ms (9.608 ms / 500) 10.392 -> 9.915 ( -4.59%) [ +0.03% +0.13% +0.00% / -4.35% -4.55% -4.59%] index_add_ perm : Elapsed 0.021 ms (10.395 ms / 500) 10.099 -> 9.598 ( -4.96%) [ +0.18% +0.00% +0.07% / -4.50% -4.96% -4.80%] index_copy_ perm : Elapsed 0.020 ms (10.117 ms / 500) 10.231 -> 9.899 ( -3.25%) [ +0.10% +0.00% +0.14% / -3.25% -2.94% -3.08%] index_add_ perm_sorted : Elapsed 0.020 ms (10.241 ms / 500) 9.952 -> 9.688 ( -2.65%) [ +0.31% +0.25% +0.00% / -2.65% -2.50% -2.61%] index_copy_ perm_sorted : Elapsed 0.020 ms (9.983 ms / 500) Good 28.081 -> 24.820 (-11.61%) [ +0.08% +0.01% +0.00% / -11.58% -11.61% -11.58%] index_select const : Elapsed 0.056 ms (28.103 ms / 500) Good 28.059 -> 24.806 (-11.59%) [ +0.15% +0.00% +0.15% / -11.50% -11.59% -11.54%] index_select wrap : Elapsed 0.056 ms (28.100 ms / 500) Good 28.164 -> 24.856 (-11.75%) [ +0.00% +0.05% +0.09% / -11.75% -11.54% -11.55%] index_select linear : Elapsed 0.056 ms (28.164 ms / 500) Good 28.161 -> 25.092 (-10.90%) [ +0.17% +0.00% +0.09% / -10.48% -10.90% -10.74%] index_select reverse : Elapsed 0.056 ms (28.209 ms / 500) Good 28.131 -> 24.814 (-11.79%) [ +0.15% +0.00% +0.08% / -11.79% -11.76% -11.74%] index_select skip64 : Elapsed 0.056 ms (28.172 ms / 500) Good 27.981 -> 24.783 (-11.43%) [ +0.06% +0.12% +0.00% / -11.27% -11.43% -11.33%] index_select skip256 : Elapsed 0.056 ms (27.998 ms / 500) Good 27.990 -> 25.153 (-10.14%) [ +0.07% +0.06% +0.00% / -10.14% -10.13% -10.08%] index_select spread : Elapsed 0.056 ms (28.009 ms / 500) Good 28.061 -> 24.846 (-11.46%) [ +0.14% +0.04% +0.00% / -11.46% -11.42% -11.43%] index_select strided 3 : Elapsed 0.056 ms (28.100 ms / 500) Good 28.026 -> 24.818 (-11.45%) [ +0.12% +0.00% +0.00% / -11.45% -11.44% -11.42%] index_select strided 5 : Elapsed 0.056 ms (28.060 ms / 500) Good 28.146 -> 24.821 (-11.81%) [ +0.04% +0.00% +0.05% / -11.81% -11.45% -11.50%] index_select strided 7 : Elapsed 0.056 ms (28.158 ms / 500) Good 28.241 -> 24.829 (-12.08%) [ +0.00% +0.05% +0.00% / -12.08% -11.96% -11.96%] index_select strided 8 : Elapsed 0.056 ms (28.242 ms / 500) Good 28.025 -> 24.820 (-11.44%) [ +0.07% +0.00% +0.09% / -11.36% -11.44% -11.29%] index_select strided 16 : Elapsed 0.056 ms (28.044 ms / 500) Good 28.021 -> 24.815 (-11.44%) [ +0.17% +0.00% +0.10% / -11.40% -11.42% -11.44%] index_select random : Elapsed 0.056 ms (28.068 ms / 500) Good 28.069 -> 24.886 (-11.34%) [ +0.00% +0.04% +0.00% / -11.34% -10.99% -11.07%] index_select random_sorted : Elapsed 0.056 ms (28.070 ms / 500) out_shape = [32, 512] in_shape = [32, 256] idx_dim = 1 B = [32, 512] (stride (512, 1)) dim = 1 fill_cnt = 256 8.881 -> 8.775 ( -1.19%) [ +0.14% +0.10% +0.00% / -1.08% -1.19% -1.13%] index_fill_ const : Elapsed 0.018 ms (8.893 ms / 500) 8.947 -> 8.836 ( -1.24%) [ +0.19% +0.20% +0.00% / -1.21% -1.16% -1.24%] index_fill_ linear : Elapsed 0.018 ms (8.964 ms / 500) 8.954 -> 8.846 ( -1.21%) [ +0.10% +0.00% +0.03% / -1.11% -1.21% -1.09%] index_fill_ reverse : Elapsed 0.018 ms (8.963 ms / 500) 8.942 -> 8.788 ( -1.72%) [ +0.15% +0.03% +0.00% / -1.72% -1.46% -1.44%] index_fill_ skip64 : Elapsed 0.018 ms (8.955 ms / 500) 8.901 -> 8.794 ( -1.20%) [ +0.12% +0.12% +0.00% / -1.00% -1.18% -1.20%] index_fill_ skip256 : Elapsed 0.018 ms (8.912 ms / 500) 9.066 -> 9.010 ( -0.62%) [ +0.03% +0.00% +0.08% / -0.62% -0.36% -0.30%] index_fill_ spread : Elapsed 0.018 ms (9.069 ms / 500) 9.094 -> 8.961 ( -1.46%) [ +0.07% +0.00% +0.00% / -1.21% -1.46% -1.36%] index_fill_ strided 3 : Elapsed 0.018 ms (9.100 ms / 500) 9.067 -> 8.964 ( -1.14%) [ +0.24% +0.18% +0.00% / -1.08% -1.12% -1.14%] index_fill_ strided 5 : Elapsed 0.018 ms (9.089 ms / 500) 9.082 -> 8.970 ( -1.23%) [ +0.08% +0.12% +0.00% / -1.16% -1.21% -1.23%] index_fill_ strided 7 : Elapsed 0.018 ms (9.089 ms / 500) 9.110 -> 8.979 ( -1.44%) [ +0.15% +0.29% +0.00% / -1.32% -1.35% -1.44%] index_fill_ strided 8 : Elapsed 0.018 ms (9.124 ms / 500) 8.990 -> 8.870 ( -1.33%) [ +0.11% +0.07% +0.00% / -1.25% -1.33% -1.30%] index_fill_ strided 16 : Elapsed 0.018 ms (9.000 ms / 500) 8.944 -> 8.801 ( -1.60%) [ +0.12% +0.00% +0.02% / -1.36% -1.60% -1.60%] index_fill_ strided 64 : Elapsed 0.018 ms (8.955 ms / 500) 9.121 -> 9.000 ( -1.33%) [ +0.10% +0.09% +0.00% / -1.15% -1.27% -1.33%] index_fill_ strided 100 : Elapsed 0.018 ms (9.130 ms / 500) 9.061 -> 8.984 ( -0.85%) [ +0.20% +0.12% +0.00% / -0.85% -0.71% -0.78%] index_fill_ strided 255 : Elapsed 0.018 ms (9.079 ms / 500) 8.890 -> 8.783 ( -1.20%) [ +0.19% +0.00% +0.06% / -1.20% -1.07% -1.12%] index_fill_ strided 256 : Elapsed 0.018 ms (8.907 ms / 500) 9.081 -> 9.005 ( -0.84%) [ +0.01% +0.09% +0.00% / -0.84% -0.59% -0.50%] index_fill_ strided 257 : Elapsed 0.018 ms (9.082 ms / 500) 9.117 -> 8.986 ( -1.44%) [ +0.34% +0.12% +0.00% / -1.39% -1.44% -1.40%] index_fill_ random : Elapsed 0.018 ms (9.148 ms / 500) 9.094 -> 8.989 ( -1.15%) [ +0.15% +0.11% +0.00% / -1.15% -1.00% -0.98%] index_fill_ random_sorted : Elapsed 0.018 ms (9.108 ms / 500) 9.099 -> 8.986 ( -1.24%) [ +0.00% +0.14% +0.05% / -1.12% -1.24% -1.18%] index_fill_ perm : Elapsed 0.018 ms (9.099 ms / 500) 9.092 -> 8.991 ( -1.11%) [ +0.02% +0.07% +0.00% / -1.10% -1.11% -1.08%] index_fill_ perm_sorted : Elapsed 0.018 ms (9.094 ms / 500) B = [32, 512] (stride (512, 1)) A = [32, 256] (stride (256, 1)) dim = 1 9.438 -> 9.144 ( -3.12%) [ +0.14% +0.03% +0.00% / -3.10% -3.05% -3.12%] index_add_ linear : Elapsed 0.019 ms (9.451 ms / 500) 9.431 -> 9.033 ( -4.22%) [ +0.08% +0.00% +0.00% / -4.15% -4.20% -4.22%] index_copy_ linear : Elapsed 0.019 ms (9.439 ms / 500) 9.457 -> 9.160 ( -3.14%) [ +0.22% +0.00% +0.12% / -2.89% -3.13% -3.14%] index_add_ reverse : Elapsed 0.019 ms (9.478 ms / 500) 9.434 -> 9.042 ( -4.16%) [ +0.10% +0.00% +0.03% / -3.78% -4.16% -4.09%] index_copy_ reverse : Elapsed 0.019 ms (9.443 ms / 500) 9.542 -> 9.238 ( -3.19%) [ +0.13% +0.00% +0.02% / -3.19% -2.43% -2.38%] index_add_ spread : Elapsed 0.019 ms (9.554 ms / 500) 9.604 -> 9.237 ( -3.82%) [ +0.07% +0.00% +0.05% / -3.82% -3.39% -3.44%] index_copy_ spread : Elapsed 0.019 ms (9.611 ms / 500) 9.571 -> 9.281 ( -3.03%) [ +0.26% +0.11% +0.00% / -2.92% -2.95% -3.03%] index_add_ strided 3 : Elapsed 0.019 ms (9.596 ms / 500) 9.634 -> 9.229 ( -4.20%) [ +0.06% +0.08% +0.00% / -4.20% -4.00% -4.16%] index_copy_ strided 3 : Elapsed 0.019 ms (9.640 ms / 500) 9.526 -> 9.246 ( -2.94%) [ +0.17% +0.19% +0.00% / -2.94% -1.89% -1.95%] index_add_ strided 5 : Elapsed 0.019 ms (9.542 ms / 500) 9.568 -> 9.175 ( -4.11%) [ +0.16% +0.02% +0.00% / -4.11% -2.93% -2.95%] index_copy_ strided 5 : Elapsed 0.019 ms (9.583 ms / 500) 9.530 -> 9.239 ( -3.05%) [ +0.00% +0.03% +0.01% / -3.05% -1.94% -2.08%] index_add_ strided 7 : Elapsed 0.019 ms (9.530 ms / 500) 9.548 -> 9.195 ( -3.70%) [ +0.10% +0.00% +0.08% / -3.70% -2.98% -2.96%] index_copy_ strided 7 : Elapsed 0.019 ms (9.558 ms / 500) 9.513 -> 9.235 ( -2.92%) [ +0.19% +0.06% +0.00% / -2.92% -2.42% -2.45%] index_add_ strided 255 : Elapsed 0.019 ms (9.531 ms / 500) 9.566 -> 9.189 ( -3.94%) [ +0.05% +0.00% +0.06% / -3.94% -3.70% -3.70%] index_copy_ strided 255 : Elapsed 0.019 ms (9.571 ms / 500) 9.515 -> 9.221 ( -3.09%) [ +0.16% +0.08% +0.00% / -3.09% -2.72% -2.74%] index_add_ strided 257 : Elapsed 0.019 ms (9.530 ms / 500) 9.569 -> 9.207 ( -3.78%) [ +0.04% +0.13% +0.00% / -3.78% -3.33% -3.33%] index_copy_ strided 257 : Elapsed 0.019 ms (9.573 ms / 500) 9.568 -> 9.312 ( -2.68%) [ +0.21% +0.16% +0.00% / -2.68% -2.24% -2.38%] index_add_ perm : Elapsed 0.019 ms (9.588 ms / 500) 9.611 -> 9.229 ( -3.97%) [ +0.08% +0.00% +0.02% / -3.97% -3.93% -3.92%] index_copy_ perm : Elapsed 0.019 ms (9.619 ms / 500) 9.561 -> 9.253 ( -3.22%) [ +0.03% +0.06% +0.00% / -2.87% -3.20% -3.22%] index_add_ perm_sorted : Elapsed 0.019 ms (9.564 ms / 500) 9.599 -> 9.210 ( -4.05%) [ +0.20% +0.07% +0.00% / -3.65% -4.04% -4.05%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.618 ms / 500) 17.685 -> 17.424 ( -1.48%) [ +0.00% +0.06% +0.07% / -1.06% -1.45% -1.48%] index_select const : Elapsed 0.035 ms (17.685 ms / 500) 17.830 -> 17.536 ( -1.65%) [ +0.13% +0.00% +0.05% / -1.23% -1.65% -1.62%] index_select wrap : Elapsed 0.036 ms (17.853 ms / 500) 17.673 -> 17.527 ( -0.83%) [ +0.13% +0.00% +0.09% / -0.83% -0.62% -0.65%] index_select linear : Elapsed 0.035 ms (17.696 ms / 500) 17.721 -> 17.528 ( -1.09%) [ +0.03% +0.08% +0.00% / -1.04% -1.02% -1.09%] index_select reverse : Elapsed 0.035 ms (17.727 ms / 500) 17.672 -> 17.494 ( -1.01%) [ +0.01% +0.00% +0.05% / -1.01% -0.62% -0.52%] index_select skip64 : Elapsed 0.035 ms (17.673 ms / 500) 17.702 -> 17.415 ( -1.62%) [ +0.21% +0.22% +0.00% / -1.11% -1.62% -1.51%] index_select skip256 : Elapsed 0.035 ms (17.740 ms / 500) 17.780 -> 17.525 ( -1.43%) [ +0.05% +0.05% +0.00% / -1.33% -1.37% -1.43%] index_select spread : Elapsed 0.036 ms (17.789 ms / 500) 17.689 -> 17.519 ( -0.96%) [ +0.12% +0.03% +0.00% / -0.96% -0.80% -0.73%] index_select strided 3 : Elapsed 0.035 ms (17.710 ms / 500) 17.797 -> 17.580 ( -1.22%) [ +0.06% +0.07% +0.00% / -1.22% -1.10% -1.14%] index_select strided 5 : Elapsed 0.036 ms (17.807 ms / 500) 17.821 -> 17.487 ( -1.87%) [ +0.00% +0.04% +0.05% / -1.37% -1.87% -1.78%] index_select strided 7 : Elapsed 0.036 ms (17.821 ms / 500) 17.771 -> 17.521 ( -1.41%) [ +0.08% +0.00% +0.04% / -1.31% -1.41% -1.31%] index_select strided 8 : Elapsed 0.036 ms (17.785 ms / 500) 17.639 -> 17.509 ( -0.74%) [ +0.07% +0.10% +0.00% / -0.74% -0.52% -0.58%] index_select strided 16 : Elapsed 0.035 ms (17.651 ms / 500) 17.773 -> 17.488 ( -1.60%) [ +0.07% +0.00% +0.01% / -1.08% -1.60% -1.58%] index_select strided 64 : Elapsed 0.036 ms (17.785 ms / 500) 17.751 -> 17.568 ( -1.03%) [ +0.09% +0.00% +0.03% / -1.03% -0.91% -1.03%] index_select strided 100 : Elapsed 0.036 ms (17.767 ms / 500) 17.747 -> 17.510 ( -1.34%) [ +0.05% +0.02% +0.00% / -1.21% -1.34% -1.25%] index_select strided 255 : Elapsed 0.036 ms (17.756 ms / 500) 17.784 -> 17.521 ( -1.48%) [ +0.15% +0.02% +0.00% / -0.84% -1.48% -1.47%] index_select random : Elapsed 0.036 ms (17.811 ms / 500) 17.743 -> 17.574 ( -0.95%) [ +0.00% +0.01% +0.02% / -0.90% -0.95% -0.95%] index_select random_sorted : Elapsed 0.035 ms (17.743 ms / 500) B = [32, 512] (stride (512, 1)) A = [32, 256] (stride (1, 32)) dim = 1 9.384 -> 9.076 ( -3.28%) [ +0.09% +0.00% +0.09% / -3.28% -2.78% -2.89%] index_add_ linear : Elapsed 0.019 ms (9.392 ms / 500) 9.284 -> 9.111 ( -1.86%) [ +0.12% +0.06% +0.00% / -1.86% -1.57% -1.64%] index_copy_ linear : Elapsed 0.019 ms (9.295 ms / 500) 9.495 -> 9.119 ( -3.96%) [ +0.19% +0.00% +0.01% / -3.60% -3.96% -3.96%] index_add_ reverse : Elapsed 0.019 ms (9.513 ms / 500) 9.421 -> 9.113 ( -3.27%) [ +0.05% +0.00% +0.02% / -2.55% -3.17% -3.27%] index_copy_ reverse : Elapsed 0.019 ms (9.426 ms / 500) 9.499 -> 9.187 ( -3.28%) [ +0.03% +0.00% +0.06% / -3.28% -2.43% -2.44%] index_add_ spread : Elapsed 0.019 ms (9.502 ms / 500) 9.464 -> 9.298 ( -1.75%) [ +0.05% +0.11% +0.00% / -1.75% -0.93% -1.10%] index_copy_ spread : Elapsed 0.019 ms (9.469 ms / 500) 9.563 -> 9.246 ( -3.31%) [ +0.02% +0.06% +0.00% / -3.31% -3.04% -3.06%] index_add_ strided 3 : Elapsed 0.019 ms (9.565 ms / 500) 9.513 -> 9.295 ( -2.29%) [ +0.13% +0.00% +0.15% / -2.29% -2.04% -2.13%] index_copy_ strided 3 : Elapsed 0.019 ms (9.525 ms / 500) 9.585 -> 9.195 ( -4.07%) [ +0.24% +0.10% +0.00% / -4.07% -2.81% -2.69%] index_add_ strided 5 : Elapsed 0.019 ms (9.608 ms / 500) 9.576 -> 9.302 ( -2.86%) [ +0.22% +0.00% +0.14% / -2.86% -2.46% -2.41%] index_copy_ strided 5 : Elapsed 0.019 ms (9.597 ms / 500) 9.505 -> 9.198 ( -3.23%) [ +0.04% +0.11% +0.00% / -3.23% -1.87% -1.86%] index_add_ strided 7 : Elapsed 0.019 ms (9.509 ms / 500) 9.455 -> 9.263 ( -2.03%) [ +0.05% +0.00% +0.06% / -2.03% -1.07% -1.02%] index_copy_ strided 7 : Elapsed 0.019 ms (9.460 ms / 500) 9.545 -> 9.197 ( -3.65%) [ +0.08% +0.00% +0.02% / -3.65% -3.12% -3.22%] index_add_ strided 255 : Elapsed 0.019 ms (9.553 ms / 500) 9.506 -> 9.281 ( -2.37%) [ +0.08% +0.00% +0.00% / -2.26% -2.37% -2.32%] index_copy_ strided 255 : Elapsed 0.019 ms (9.514 ms / 500) 9.507 -> 9.157 ( -3.68%) [ +0.16% +0.00% +0.04% / -3.68% -3.11% -3.08%] index_add_ strided 257 : Elapsed 0.019 ms (9.522 ms / 500) 9.467 -> 9.281 ( -1.96%) [ +0.00% +0.05% +0.01% / -1.96% -1.58% -1.55%] index_copy_ strided 257 : Elapsed 0.019 ms (9.467 ms / 500) 9.536 -> 9.266 ( -2.83%) [ +0.02% +0.14% +0.00% / -2.83% -2.19% -2.02%] index_add_ perm : Elapsed 0.019 ms (9.538 ms / 500) 9.487 -> 9.286 ( -2.12%) [ +0.07% +0.01% +0.00% / -2.12% -1.70% -1.70%] index_copy_ perm : Elapsed 0.019 ms (9.494 ms / 500) 9.612 -> 9.214 ( -4.14%) [ +0.04% +0.06% +0.00% / -3.87% -4.14% -4.08%] index_add_ perm_sorted : Elapsed 0.019 ms (9.616 ms / 500) 9.585 -> 9.269 ( -3.30%) [ +0.03% +0.03% +0.00% / -2.80% -3.30% -3.28%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.588 ms / 500) 17.601 -> 17.475 ( -0.72%) [ +0.00% +0.04% +0.10% / -0.53% -0.72% -0.67%] index_select const : Elapsed 0.035 ms (17.601 ms / 500) 17.611 -> 17.549 ( -0.35%) [ +0.05% +0.03% +0.00% / -0.35% +0.00% -0.02%] index_select wrap : Elapsed 0.035 ms (17.620 ms / 500) 17.653 -> 17.559 ( -0.53%) [ +0.11% +0.06% +0.00% / -0.52% -0.53% -0.48%] index_select linear : Elapsed 0.035 ms (17.673 ms / 500) 17.715 -> 17.579 ( -0.77%) [ +0.03% +0.02% +0.00% / -0.27% -0.77% -0.76%] index_select reverse : Elapsed 0.035 ms (17.720 ms / 500) 17.582 -> 17.473 ( -0.62%) [ +0.06% +0.00% +0.01% / -0.62% -0.53% -0.59%] index_select skip64 : Elapsed 0.035 ms (17.593 ms / 500) 17.597 -> 17.472 ( -0.71%) [ +0.08% +0.03% +0.00% / -0.71% -0.64% -0.56%] index_select skip256 : Elapsed 0.035 ms (17.611 ms / 500) 17.660 -> 17.521 ( -0.79%) [ +0.11% +0.00% +0.01% / -0.62% -0.74% -0.79%] index_select spread : Elapsed 0.035 ms (17.679 ms / 500) 17.612 -> 17.604 ( -0.05%) [ +0.03% +0.00% +0.14% / -0.05% +0.20% +0.18%] index_select strided 3 : Elapsed 0.035 ms (17.617 ms / 500) 17.718 -> 17.649 ( -0.39%) [ +0.03% +0.11% +0.00% / -0.39% -0.24% -0.25%] index_select strided 5 : Elapsed 0.035 ms (17.723 ms / 500) 17.742 -> 17.583 ( -0.90%) [ +0.05% +0.00% +0.08% / -0.59% -0.90% -0.79%] index_select strided 7 : Elapsed 0.036 ms (17.751 ms / 500) 17.631 -> 17.526 ( -0.60%) [ +0.05% +0.00% +0.03% / -0.59% -0.59% -0.60%] index_select strided 8 : Elapsed 0.035 ms (17.639 ms / 500) 17.542 -> 17.493 ( -0.28%) [ +0.03% +0.03% +0.00% / -0.28% -0.22% -0.22%] index_select strided 16 : Elapsed 0.035 ms (17.548 ms / 500) 17.698 -> 17.440 ( -1.46%) [ +0.00% +0.03% +0.03% / -0.85% -1.46% -1.32%] index_select strided 64 : Elapsed 0.035 ms (17.698 ms / 500) 17.622 -> 17.541 ( -0.46%) [ +0.12% +0.00% +0.02% / -0.46% -0.25% -0.22%] index_select strided 100 : Elapsed 0.035 ms (17.644 ms / 500) 17.657 -> 17.554 ( -0.58%) [ +0.10% +0.00% +0.06% / -0.32% -0.58% -0.55%] index_select strided 255 : Elapsed 0.035 ms (17.674 ms / 500) 17.709 -> 17.572 ( -0.77%) [ +0.06% +0.03% +0.00% / -0.26% -0.77% -0.65%] index_select random : Elapsed 0.035 ms (17.720 ms / 500) 17.635 -> 17.562 ( -0.41%) [ +0.12% +0.09% +0.00% / -0.41% -0.27% -0.25%] index_select random_sorted : Elapsed 0.035 ms (17.656 ms / 500) B = [32, 512] (stride (1, 32)) dim = 1 fill_cnt = 256 8.837 -> 8.772 ( -0.74%) [ +0.22% +0.09% +0.00% / -0.62% -0.74% -0.60%] index_fill_ const : Elapsed 0.018 ms (8.856 ms / 500) 8.913 -> 8.857 ( -0.63%) [ +0.18% +0.07% +0.00% / -0.63% -0.45% -0.28%] index_fill_ linear : Elapsed 0.018 ms (8.929 ms / 500) 8.920 -> 8.877 ( -0.48%) [ +0.00% +0.25% +0.07% / -0.33% -0.48% -0.47%] index_fill_ reverse : Elapsed 0.018 ms (8.920 ms / 500) 8.888 -> 8.820 ( -0.77%) [ +0.00% +0.09% +0.05% / -0.77% -0.32% -0.28%] index_fill_ skip64 : Elapsed 0.018 ms (8.888 ms / 500) 8.845 -> 8.835 ( -0.11%) [ +0.20% +0.00% +0.02% / -0.11% -0.02% -0.03%] index_fill_ skip256 : Elapsed 0.018 ms (8.863 ms / 500) 8.894 -> 8.841 ( -0.60%) [ +0.00% +0.04% +0.00% / -0.46% -0.53% -0.60%] index_fill_ spread : Elapsed 0.018 ms (8.894 ms / 500) 8.910 -> 8.827 ( -0.93%) [ +0.03% +0.00% +0.06% / -0.74% -0.88% -0.93%] index_fill_ strided 3 : Elapsed 0.018 ms (8.913 ms / 500) 8.884 -> 8.844 ( -0.45%) [ +0.16% +0.00% +0.07% / -0.38% -0.45% -0.39%] index_fill_ strided 5 : Elapsed 0.018 ms (8.898 ms / 500) 8.890 -> 8.819 ( -0.80%) [ +0.12% +0.00% +0.10% / -0.54% -0.76% -0.80%] index_fill_ strided 7 : Elapsed 0.018 ms (8.901 ms / 500) 8.876 -> 8.805 ( -0.80%) [ +0.26% +0.15% +0.00% / -0.80% -0.45% -0.41%] index_fill_ strided 8 : Elapsed 0.018 ms (8.899 ms / 500) 8.873 -> 8.843 ( -0.34%) [ +0.14% +0.00% +0.06% / -0.29% -0.34% -0.25%] index_fill_ strided 16 : Elapsed 0.018 ms (8.885 ms / 500) 8.866 -> 8.827 ( -0.44%) [ +0.08% +0.20% +0.00% / -0.44% -0.10% -0.24%] index_fill_ strided 64 : Elapsed 0.018 ms (8.873 ms / 500) 8.891 -> 8.826 ( -0.73%) [ +0.26% +0.00% +0.00% / -0.38% -0.58% -0.73%] index_fill_ strided 100 : Elapsed 0.018 ms (8.914 ms / 500) 8.876 -> 8.858 ( -0.20%) [ +0.21% +0.03% +0.00% / -0.15% -0.20% -0.17%] index_fill_ strided 255 : Elapsed 0.018 ms (8.895 ms / 500) 8.843 -> 8.776 ( -0.76%) [ +0.31% +0.00% +0.17% / -0.58% -0.76% -0.62%] index_fill_ strided 256 : Elapsed 0.018 ms (8.870 ms / 500) 8.881 -> 8.823 ( -0.65%) [ +0.03% +0.00% +0.06% / -0.65% -0.56% -0.53%] index_fill_ strided 257 : Elapsed 0.018 ms (8.884 ms / 500) 8.856 -> 8.819 ( -0.42%) [ +0.05% +0.01% +0.00% / -0.37% -0.42% -0.25%] index_fill_ random : Elapsed 0.018 ms (8.860 ms / 500) 8.889 -> 8.824 ( -0.73%) [ +0.24% +0.00% +0.04% / -0.13% -0.70% -0.73%] index_fill_ random_sorted : Elapsed 0.018 ms (8.910 ms / 500) 8.932 -> 8.861 ( -0.79%) [ +0.22% +0.00% +0.09% / -0.44% -0.62% -0.79%] index_fill_ perm : Elapsed 0.018 ms (8.952 ms / 500) 8.904 -> 8.860 ( -0.49%) [ +0.21% +0.11% +0.00% / -0.13% -0.42% -0.49%] index_fill_ perm_sorted : Elapsed 0.018 ms (8.923 ms / 500) B = [32, 512] (stride (1, 32)) A = [32, 256] (stride (256, 1)) dim = 1 9.355 -> 9.162 ( -2.06%) [ +0.15% +0.07% +0.00% / -1.65% -2.06% -2.05%] index_add_ linear : Elapsed 0.019 ms (9.369 ms / 500) 9.373 -> 9.180 ( -2.06%) [ +0.02% +0.04% +0.00% / -1.40% -2.06% -1.98%] index_copy_ linear : Elapsed 0.019 ms (9.375 ms / 500) 9.404 -> 9.159 ( -2.61%) [ +0.10% +0.05% +0.00% / -1.71% -2.55% -2.61%] index_add_ reverse : Elapsed 0.019 ms (9.413 ms / 500) 9.390 -> 9.185 ( -2.18%) [ +0.14% +0.00% +0.15% / -1.57% -2.18% -2.11%] index_copy_ reverse : Elapsed 0.019 ms (9.403 ms / 500) 9.380 -> 9.193 ( -1.99%) [ +0.22% +0.00% +0.00% / -1.99% -1.29% -1.26%] index_add_ spread : Elapsed 0.019 ms (9.401 ms / 500) 9.420 -> 9.241 ( -1.90%) [ +0.03% +0.02% +0.00% / -1.90% -1.45% -1.44%] index_copy_ spread : Elapsed 0.019 ms (9.423 ms / 500) 9.433 -> 9.207 ( -2.40%) [ +0.15% +0.00% +0.15% / -1.87% -2.35% -2.40%] index_add_ strided 3 : Elapsed 0.019 ms (9.447 ms / 500) 9.444 -> 9.255 ( -2.00%) [ +0.16% +0.00% +0.12% / -1.84% -1.97% -2.00%] index_copy_ strided 3 : Elapsed 0.019 ms (9.459 ms / 500) 9.356 -> 9.184 ( -1.84%) [ +0.15% +0.00% +0.04% / -1.84% -0.98% -0.98%] index_add_ strided 5 : Elapsed 0.019 ms (9.370 ms / 500) 9.348 -> 9.190 ( -1.69%) [ +0.10% +0.07% +0.00% / -1.69% -0.79% -0.73%] index_copy_ strided 5 : Elapsed 0.019 ms (9.357 ms / 500) 9.326 -> 9.159 ( -1.79%) [ +0.03% +0.00% +0.13% / -1.79% -1.18% -1.16%] index_add_ strided 7 : Elapsed 0.019 ms (9.329 ms / 500) 9.325 -> 9.167 ( -1.69%) [ +0.25% +0.00% +0.18% / -1.69% -0.99% -1.02%] index_copy_ strided 7 : Elapsed 0.019 ms (9.348 ms / 500) 9.332 -> 9.163 ( -1.81%) [ +0.17% +0.09% +0.00% / -1.81% -1.34% -1.36%] index_add_ strided 255 : Elapsed 0.019 ms (9.348 ms / 500) 9.342 -> 9.173 ( -1.81%) [ +0.04% +0.06% +0.00% / -1.81% -1.27% -1.15%] index_copy_ strided 255 : Elapsed 0.019 ms (9.346 ms / 500) 9.345 -> 9.163 ( -1.95%) [ +0.03% +0.01% +0.00% / -1.95% -1.34% -1.27%] index_add_ strided 257 : Elapsed 0.019 ms (9.348 ms / 500) 9.354 -> 9.185 ( -1.81%) [ +0.04% +0.00% +0.11% / -1.81% -1.22% -1.07%] index_copy_ strided 257 : Elapsed 0.019 ms (9.358 ms / 500) 9.352 -> 9.182 ( -1.82%) [ +0.19% +0.00% +0.01% / -1.80% -1.74% -1.82%] index_add_ perm : Elapsed 0.019 ms (9.370 ms / 500) 9.364 -> 9.183 ( -1.93%) [ +0.04% +0.09% +0.00% / -1.77% -1.93% -1.72%] index_copy_ perm : Elapsed 0.019 ms (9.368 ms / 500) 9.378 -> 9.178 ( -2.13%) [ +0.09% +0.15% +0.00% / -1.91% -2.12% -2.13%] index_add_ perm_sorted : Elapsed 0.019 ms (9.386 ms / 500) 9.405 -> 9.188 ( -2.31%) [ +0.05% +0.00% +0.00% / -1.88% -2.31% -2.21%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.410 ms / 500) 17.564 -> 17.485 ( -0.45%) [ +0.06% +0.00% +0.02% / +0.08% -0.41% -0.45%] index_select const : Elapsed 0.035 ms (17.575 ms / 500) 17.721 -> 17.693 ( -0.16%) [ +0.01% +0.00% +0.02% / +0.05% -0.16% -0.14%] index_select wrap : Elapsed 0.035 ms (17.723 ms / 500) 17.580 -> 17.595 ( +0.09%) [ +0.01% +0.00% +0.07% / +0.09% +0.76% +0.82%] index_select linear : Elapsed 0.035 ms (17.582 ms / 500) 17.608 -> 17.608 ( +0.00%) [ +0.07% +0.00% +0.08% / +0.00% +0.40% +0.40%] index_select reverse : Elapsed 0.035 ms (17.620 ms / 500) 17.582 -> 17.575 ( -0.04%) [ +0.03% +0.00% +0.02% / -0.04% +0.68% +0.61%] index_select skip64 : Elapsed 0.035 ms (17.588 ms / 500) 17.599 -> 17.483 ( -0.66%) [ +0.07% +0.00% +0.04% / +0.15% -0.66% -0.62%] index_select skip256 : Elapsed 0.035 ms (17.611 ms / 500) 17.651 -> 17.660 ( +0.05%) [ +0.00% +0.05% +0.08% / +0.06% +0.05% +0.09%] index_select spread : Elapsed 0.035 ms (17.651 ms / 500) 17.575 -> 17.604 ( +0.17%) [ +0.11% +0.06% +0.00% / +0.17% +0.63% +0.64%] index_select strided 3 : Elapsed 0.035 ms (17.595 ms / 500) 17.697 -> 17.705 ( +0.05%) [ +0.06% +0.05% +0.00% / +0.05% +0.19% +0.14%] index_select strided 5 : Elapsed 0.035 ms (17.708 ms / 500) 17.708 -> 17.600 ( -0.61%) [ +0.00% +0.00% +0.02% / +0.03% -0.61% -0.50%] index_select strided 7 : Elapsed 0.035 ms (17.708 ms / 500) 17.646 -> 17.655 ( +0.05%) [ +0.00% +0.04% +0.04% / +0.05% +0.10% +0.12%] index_select strided 8 : Elapsed 0.035 ms (17.646 ms / 500) 17.540 -> 17.547 ( +0.04%) [ +0.09% +0.00% +0.06% / +0.04% +0.47% +0.48%] index_select strided 16 : Elapsed 0.035 ms (17.556 ms / 500) 17.676 -> 17.581 ( -0.54%) [ +0.01% +0.00% +0.02% / +0.00% -0.54% -0.54%] index_select strided 64 : Elapsed 0.035 ms (17.677 ms / 500) 17.656 -> 17.659 ( +0.02%) [ +0.00% +0.01% +0.04% / +0.02% +0.38% +0.35%] index_select strided 100 : Elapsed 0.035 ms (17.656 ms / 500) 17.620 -> 17.631 ( +0.06%) [ +0.13% +0.06% +0.00% / +0.06% +0.07% +0.09%] index_select strided 255 : Elapsed 0.035 ms (17.643 ms / 500) 17.673 -> 17.623 ( -0.28%) [ +0.03% +0.09% +0.00% / +0.08% -0.15% -0.28%] index_select random : Elapsed 0.035 ms (17.678 ms / 500) 17.627 -> 17.627 ( +0.00%) [ +0.00% +0.10% +0.02% / +0.00% +0.50% +0.58%] index_select random_sorted : Elapsed 0.035 ms (17.627 ms / 500) B = [32, 512] (stride (1, 32)) A = [32, 256] (stride (1, 32)) dim = 1 9.314 -> 9.150 ( -1.76%) [ +0.09% +0.10% +0.00% / -1.76% -1.45% -1.44%] index_add_ linear : Elapsed 0.019 ms (9.322 ms / 500) 9.219 -> 9.050 ( -1.83%) [ +0.09% +0.00% +0.02% / -1.83% -1.54% -1.46%] index_copy_ linear : Elapsed 0.018 ms (9.227 ms / 500) 9.445 -> 9.155 ( -3.07%) [ +0.00% +0.14% +0.03% / -1.95% -2.93% -3.07%] index_add_ reverse : Elapsed 0.019 ms (9.445 ms / 500) 9.360 -> 9.053 ( -3.28%) [ +0.00% +0.04% +0.03% / -1.79% -3.27% -3.28%] index_copy_ reverse : Elapsed 0.019 ms (9.360 ms / 500) 9.348 -> 9.158 ( -2.03%) [ +0.06% +0.01% +0.00% / -2.03% -0.90% -0.91%] index_add_ spread : Elapsed 0.019 ms (9.354 ms / 500) 9.236 -> 9.081 ( -1.68%) [ +0.15% +0.10% +0.00% / -1.68% -0.47% -0.55%] index_copy_ spread : Elapsed 0.018 ms (9.250 ms / 500) 9.410 -> 9.204 ( -2.19%) [ +0.05% +0.02% +0.00% / -1.87% -2.10% -2.19%] index_add_ strided 3 : Elapsed 0.019 ms (9.415 ms / 500) 9.308 -> 9.104 ( -2.19%) [ +0.11% +0.02% +0.00% / -1.86% -2.19% -2.14%] index_copy_ strided 3 : Elapsed 0.019 ms (9.318 ms / 500) 9.389 -> 9.216 ( -1.84%) [ +0.10% +0.00% +0.04% / -1.84% -1.42% -1.30%] index_add_ strided 5 : Elapsed 0.019 ms (9.398 ms / 500) 9.320 -> 9.142 ( -1.91%) [ +0.14% +0.00% +0.10% / -1.83% -1.91% -1.82%] index_copy_ strided 5 : Elapsed 0.019 ms (9.333 ms / 500) 9.304 -> 9.138 ( -1.78%) [ +0.00% +0.04% +0.10% / -1.78% -0.66% -0.67%] index_add_ strided 7 : Elapsed 0.019 ms (9.304 ms / 500) 9.212 -> 9.057 ( -1.68%) [ +0.03% +0.07% +0.00% / -1.68% -0.69% -0.71%] index_copy_ strided 7 : Elapsed 0.018 ms (9.215 ms / 500) 9.343 -> 9.176 ( -1.79%) [ +0.12% +0.00% +0.01% / -1.72% -1.79% -1.79%] index_add_ strided 255 : Elapsed 0.019 ms (9.354 ms / 500) 9.248 -> 9.060 ( -2.03%) [ +0.00% +0.02% +0.03% / -1.76% -2.03% -1.96%] index_copy_ strided 255 : Elapsed 0.018 ms (9.248 ms / 500) 9.317 -> 9.154 ( -1.75%) [ +0.00% +0.04% +0.01% / -1.75% -1.44% -1.45%] index_add_ strided 257 : Elapsed 0.019 ms (9.317 ms / 500) 9.202 -> 9.067 ( -1.47%) [ +0.21% +0.00% +0.12% / -1.47% -1.47% -1.27%] index_copy_ strided 257 : Elapsed 0.018 ms (9.221 ms / 500) 9.309 -> 9.139 ( -1.83%) [ +0.06% +0.00% +0.04% / -1.83% -1.15% -1.26%] index_add_ perm : Elapsed 0.019 ms (9.315 ms / 500) 9.220 -> 9.050 ( -1.84%) [ +0.09% +0.08% +0.00% / -1.84% -1.42% -1.46%] index_copy_ perm : Elapsed 0.018 ms (9.228 ms / 500) 9.415 -> 9.156 ( -2.75%) [ +0.16% +0.17% +0.00% / -1.74% -2.75% -2.69%] index_add_ perm_sorted : Elapsed 0.019 ms (9.430 ms / 500) 9.354 -> 9.050 ( -3.25%) [ +0.06% +0.00% +0.03% / -1.79% -3.25% -3.19%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.360 ms / 500) 17.491 -> 17.501 ( +0.06%) [ +0.00% +0.01% +0.07% / +0.06% +0.22% +0.25%] index_select const : Elapsed 0.035 ms (17.491 ms / 500) 17.504 -> 17.503 ( -0.01%) [ +0.05% +0.00% +0.03% / -0.01% +0.70% +0.77%] index_select wrap : Elapsed 0.035 ms (17.512 ms / 500) 17.540 -> 17.549 ( +0.05%) [ +0.00% +0.06% +0.03% / +0.05% +0.12% +0.14%] index_select linear : Elapsed 0.035 ms (17.540 ms / 500) 17.575 -> 17.524 ( -0.29%) [ +0.06% +0.07% +0.00% / +0.04% -0.29% -0.22%] index_select reverse : Elapsed 0.035 ms (17.585 ms / 500) 17.465 -> 17.456 ( -0.05%) [ +0.00% +0.04% +0.10% / -0.05% +0.39% +0.42%] index_select skip64 : Elapsed 0.035 ms (17.465 ms / 500) 17.504 -> 17.502 ( -0.01%) [ +0.04% +0.00% +0.03% / -0.01% +0.29% +0.26%] index_select skip256 : Elapsed 0.035 ms (17.511 ms / 500) 17.531 -> 17.513 ( -0.10%) [ +0.07% +0.00% +0.07% / +0.03% -0.10% -0.04%] index_select spread : Elapsed 0.035 ms (17.543 ms / 500) 17.509 -> 17.503 ( -0.03%) [ +0.03% +0.00% +0.00% / -0.03% +0.34% +0.37%] index_select strided 3 : Elapsed 0.035 ms (17.514 ms / 500) 17.620 -> 17.615 ( -0.03%) [ +0.06% +0.00% +0.02% / +0.02% -0.03% +0.03%] index_select strided 5 : Elapsed 0.035 ms (17.630 ms / 500) 17.611 -> 17.494 ( -0.66%) [ +0.00% +0.11% +0.11% / +0.12% -0.64% -0.66%] index_select strided 7 : Elapsed 0.035 ms (17.611 ms / 500) 17.490 -> 17.476 ( -0.08%) [ +0.03% +0.00% +0.03% / +0.07% -0.08% -0.01%] index_select strided 8 : Elapsed 0.035 ms (17.495 ms / 500) 17.436 -> 17.438 ( +0.01%) [ +0.02% +0.01% +0.00% / +0.01% +0.40% +0.38%] index_select strided 16 : Elapsed 0.035 ms (17.439 ms / 500) 17.586 -> 17.484 ( -0.58%) [ +0.00% +0.10% +0.02% / +0.07% -0.45% -0.58%] index_select strided 64 : Elapsed 0.035 ms (17.586 ms / 500) 17.514 -> 17.521 ( +0.04%) [ +0.03% +0.00% +0.01% / +0.04% +0.32% +0.32%] index_select strided 100 : Elapsed 0.035 ms (17.520 ms / 500) 17.538 -> 17.496 ( -0.24%) [ +0.06% +0.04% +0.00% / +0.10% -0.24% -0.21%] index_select strided 255 : Elapsed 0.035 ms (17.549 ms / 500) 17.587 -> 17.506 ( -0.46%) [ +0.00% +0.02% +0.06% / +0.07% -0.45% -0.46%] index_select random : Elapsed 0.035 ms (17.587 ms / 500) 17.546 -> 17.543 ( -0.02%) [ +0.02% +0.00% +0.03% / -0.02% +0.38% +0.31%] index_select random_sorted : Elapsed 0.035 ms (17.549 ms / 500) out_shape = [512, 32] in_shape = [256, 32] idx_dim = 0 B = [512, 32] (stride (32, 1)) dim = 0 fill_cnt = 256 8.829 -> 8.775 ( -0.61%) [ +0.16% +0.19% +0.00% / -0.55% -0.49% -0.61%] index_fill_ const : Elapsed 0.018 ms (8.843 ms / 500) 8.903 -> 8.860 ( -0.48%) [ +0.19% +0.25% +0.00% / -0.48% -0.09% -0.12%] index_fill_ linear : Elapsed 0.018 ms (8.920 ms / 500) 8.922 -> 8.877 ( -0.50%) [ +0.12% +0.00% +0.24% / -0.26% -0.36% -0.50%] index_fill_ reverse : Elapsed 0.018 ms (8.933 ms / 500) 8.886 -> 8.825 ( -0.69%) [ +0.00% +0.09% +0.09% / -0.69% -0.38% -0.20%] index_fill_ skip64 : Elapsed 0.018 ms (8.886 ms / 500) 8.848 -> 8.837 ( -0.12%) [ +0.12% +0.11% +0.00% / -0.12% -0.02% -0.01%] index_fill_ skip256 : Elapsed 0.018 ms (8.859 ms / 500) 8.883 -> 8.842 ( -0.46%) [ +0.08% +0.24% +0.00% / -0.20% -0.46% -0.45%] index_fill_ spread : Elapsed 0.018 ms (8.890 ms / 500) 8.913 -> 8.829 ( -0.94%) [ +0.00% +0.06% +0.01% / -0.76% -0.92% -0.94%] index_fill_ strided 3 : Elapsed 0.018 ms (8.913 ms / 500) 8.867 -> 8.835 ( -0.36%) [ +0.20% +0.16% +0.00% / -0.32% -0.32% -0.36%] index_fill_ strided 5 : Elapsed 0.018 ms (8.885 ms / 500) 8.885 -> 8.823 ( -0.70%) [ +0.23% +0.15% +0.00% / -0.59% -0.70% -0.53%] index_fill_ strided 7 : Elapsed 0.018 ms (8.905 ms / 500) 8.879 -> 8.806 ( -0.82%) [ +0.03% +0.00% +0.10% / -0.82% -0.42% -0.44%] index_fill_ strided 8 : Elapsed 0.018 ms (8.882 ms / 500) 8.878 -> 8.833 ( -0.51%) [ +0.17% +0.08% +0.00% / -0.41% -0.50% -0.51%] index_fill_ strided 16 : Elapsed 0.018 ms (8.893 ms / 500) 8.872 -> 8.838 ( -0.38%) [ +0.19% +0.21% +0.00% / -0.38% -0.09% -0.24%] index_fill_ strided 64 : Elapsed 0.018 ms (8.889 ms / 500) 8.888 -> 8.837 ( -0.57%) [ +0.24% +0.00% +0.03% / -0.29% -0.57% -0.56%] index_fill_ strided 100 : Elapsed 0.018 ms (8.909 ms / 500) 8.881 -> 8.861 ( -0.23%) [ +0.07% +0.01% +0.00% / -0.12% -0.23% -0.20%] index_fill_ strided 255 : Elapsed 0.018 ms (8.887 ms / 500) 8.842 -> 8.772 ( -0.79%) [ +0.29% +0.09% +0.00% / -0.55% -0.79% -0.78%] index_fill_ strided 256 : Elapsed 0.018 ms (8.868 ms / 500) 8.887 -> 8.833 ( -0.61%) [ +0.01% +0.00% +0.00% / -0.61% -0.51% -0.56%] index_fill_ strided 257 : Elapsed 0.018 ms (8.888 ms / 500) 8.896 -> 8.813 ( -0.93%) [ +0.35% +0.12% +0.00% / -0.93% -0.84% -0.88%] index_fill_ random : Elapsed 0.018 ms (8.927 ms / 500) 8.898 -> 8.844 ( -0.61%) [ +0.10% +0.07% +0.00% / -0.61% -0.43% -0.42%] index_fill_ random_sorted : Elapsed 0.018 ms (8.907 ms / 500) 8.914 -> 8.890 ( -0.27%) [ +0.00% +0.03% +0.04% / -0.27% -0.27% -0.13%] index_fill_ perm : Elapsed 0.018 ms (8.914 ms / 500) 8.899 -> 8.868 ( -0.35%) [ +0.29% +0.13% +0.00% / -0.35% -0.25% -0.24%] index_fill_ perm_sorted : Elapsed 0.018 ms (8.925 ms / 500) B = [512, 32] (stride (32, 1)) A = [256, 32] (stride (32, 1)) dim = 0 9.339 -> 9.161 ( -1.91%) [ +0.21% +0.14% +0.00% / -1.70% -1.82% -1.91%] index_add_ linear : Elapsed 0.019 ms (9.359 ms / 500) 9.265 -> 9.065 ( -2.16%) [ +0.05% +0.10% +0.00% / -1.71% -2.16% -2.14%] index_copy_ linear : Elapsed 0.019 ms (9.270 ms / 500) 9.375 -> 9.159 ( -2.30%) [ +0.23% +0.00% +0.12% / -1.60% -2.30% -2.28%] index_add_ reverse : Elapsed 0.019 ms (9.397 ms / 500) 9.290 -> 9.056 ( -2.52%) [ +0.09% +0.09% +0.00% / -1.72% -2.50% -2.52%] index_copy_ reverse : Elapsed 0.019 ms (9.298 ms / 500) 9.371 -> 9.199 ( -1.84%) [ +0.23% +0.04% +0.00% / -1.84% -1.12% -1.14%] index_add_ spread : Elapsed 0.019 ms (9.393 ms / 500) 9.327 -> 9.149 ( -1.91%) [ +0.08% +0.04% +0.00% / -1.91% -1.73% -1.74%] index_copy_ spread : Elapsed 0.019 ms (9.334 ms / 500) 9.430 -> 9.217 ( -2.26%) [ +0.13% +0.00% +0.06% / -1.85% -2.21% -2.26%] index_add_ strided 3 : Elapsed 0.019 ms (9.442 ms / 500) 9.369 -> 9.131 ( -2.54%) [ +0.16% +0.01% +0.00% / -1.93% -2.54% -2.50%] index_copy_ strided 3 : Elapsed 0.019 ms (9.384 ms / 500) 9.344 -> 9.191 ( -1.64%) [ +0.09% +0.05% +0.00% / -1.64% -0.81% -0.75%] index_add_ strided 5 : Elapsed 0.019 ms (9.352 ms / 500) 9.248 -> 9.073 ( -1.89%) [ +0.06% +0.09% +0.00% / -1.89% -0.83% -0.90%] index_copy_ strided 5 : Elapsed 0.019 ms (9.254 ms / 500) 9.319 -> 9.161 ( -1.70%) [ +0.09% +0.01% +0.00% / -1.70% -1.04% -1.09%] index_add_ strided 7 : Elapsed 0.019 ms (9.327 ms / 500) 9.230 -> 9.066 ( -1.78%) [ +0.07% +0.00% +0.12% / -1.78% -1.29% -1.18%] index_copy_ strided 7 : Elapsed 0.018 ms (9.236 ms / 500) 9.320 -> 9.159 ( -1.73%) [ +0.04% +0.01% +0.00% / -1.73% -1.08% -1.22%] index_add_ strided 255 : Elapsed 0.019 ms (9.324 ms / 500) 9.224 -> 9.054 ( -1.84%) [ +0.18% +0.10% +0.00% / -1.84% -1.14% -1.32%] index_copy_ strided 255 : Elapsed 0.018 ms (9.241 ms / 500) 9.334 -> 9.162 ( -1.84%) [ +0.02% +0.00% +0.11% / -1.84% -1.34% -1.32%] index_add_ strided 257 : Elapsed 0.019 ms (9.336 ms / 500) 9.245 -> 9.082 ( -1.76%) [ +0.01% +0.08% +0.00% / -1.76% -1.33% -1.45%] index_copy_ strided 257 : Elapsed 0.018 ms (9.246 ms / 500) 9.345 -> 9.175 ( -1.82%) [ +0.11% +0.11% +0.00% / -1.79% -1.82% -1.74%] index_add_ perm : Elapsed 0.019 ms (9.355 ms / 500) 9.264 -> 9.053 ( -2.28%) [ +0.14% +0.10% +0.00% / -1.74% -2.28% -2.18%] index_copy_ perm : Elapsed 0.019 ms (9.277 ms / 500) 9.385 -> 9.176 ( -2.23%) [ +0.11% +0.00% +0.04% / -1.75% -2.21% -2.23%] index_add_ perm_sorted : Elapsed 0.019 ms (9.395 ms / 500) 9.306 -> 9.058 ( -2.66%) [ +0.04% +0.00% +0.01% / -1.90% -2.65% -2.66%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.310 ms / 500) 17.492 -> 17.415 ( -0.44%) [ +0.09% +0.00% +0.10% / +0.06% -0.44% -0.37%] index_select const : Elapsed 0.035 ms (17.507 ms / 500) 17.628 -> 17.589 ( -0.22%) [ +0.00% +0.14% +0.09% / +0.02% -0.19% -0.22%] index_select wrap : Elapsed 0.035 ms (17.628 ms / 500) 17.490 -> 17.499 ( +0.05%) [ +0.00% +0.04% +0.03% / +0.05% +0.75% +0.77%] index_select linear : Elapsed 0.035 ms (17.490 ms / 500) 17.521 -> 17.537 ( +0.09%) [ +0.08% +0.00% +0.05% / +0.12% +0.21% +0.09%] index_select reverse : Elapsed 0.035 ms (17.535 ms / 500) 17.493 -> 17.502 ( +0.05%) [ +0.00% +0.09% +0.01% / +0.05% +0.50% +0.59%] index_select skip64 : Elapsed 0.035 ms (17.493 ms / 500) 17.546 -> 17.418 ( -0.73%) [ +0.06% +0.00% +0.06% / +0.03% -0.67% -0.73%] index_select skip256 : Elapsed 0.035 ms (17.557 ms / 500) 17.548 -> 17.542 ( -0.03%) [ +0.00% +0.05% +0.06% / +0.05% +0.03% -0.03%] index_select spread : Elapsed 0.035 ms (17.548 ms / 500) 17.491 -> 17.506 ( +0.09%) [ +0.00% +0.01% +0.05% / +0.09% +0.49% +0.60%] index_select strided 3 : Elapsed 0.035 ms (17.491 ms / 500) 17.615 -> 17.614 ( -0.01%) [ +0.00% +0.01% +0.04% / +0.04% -0.01% +0.04%] index_select strided 5 : Elapsed 0.035 ms (17.615 ms / 500) 17.618 -> 17.500 ( -0.67%) [ +0.03% +0.06% +0.00% / +0.02% -0.67% -0.65%] index_select strided 7 : Elapsed 0.035 ms (17.624 ms / 500) 17.491 -> 17.490 ( -0.01%) [ +0.01% +0.03% +0.00% / -0.01% +0.07% -0.01%] index_select strided 8 : Elapsed 0.035 ms (17.493 ms / 500) 17.430 -> 17.434 ( +0.02%) [ +0.02% +0.07% +0.00% / +0.02% +0.47% +0.34%] index_select strided 16 : Elapsed 0.035 ms (17.434 ms / 500) 17.594 -> 17.490 ( -0.59%) [ +0.04% +0.00% +0.05% / +0.09% -0.59% -0.57%] index_select strided 64 : Elapsed 0.035 ms (17.601 ms / 500) 17.493 -> 17.526 ( +0.19%) [ +0.19% +0.19% +0.00% / +0.19% +0.40% +0.49%] index_select strided 100 : Elapsed 0.035 ms (17.527 ms / 500) 17.526 -> 17.513 ( -0.07%) [ +0.01% +0.03% +0.00% / +0.12% +0.00% -0.07%] index_select strided 255 : Elapsed 0.035 ms (17.527 ms / 500) 17.573 -> 17.521 ( -0.30%) [ +0.00% +0.04% +0.05% / +0.03% -0.29% -0.30%] index_select random : Elapsed 0.035 ms (17.573 ms / 500) 17.516 -> 17.545 ( +0.17%) [ +0.02% +0.00% +0.03% / +0.17% +0.63% +0.58%] index_select random_sorted : Elapsed 0.035 ms (17.520 ms / 500) B = [512, 32] (stride (32, 1)) A = [256, 32] (stride (1, 256)) dim = 0 9.327 -> 9.158 ( -1.81%) [ +0.28% +0.05% +0.00% / -1.81% -1.52% -1.42%] index_add_ linear : Elapsed 0.019 ms (9.353 ms / 500) 9.345 -> 9.182 ( -1.74%) [ +0.15% +0.00% +0.19% / -1.74% -1.47% -1.52%] index_copy_ linear : Elapsed 0.019 ms (9.359 ms / 500) 9.443 -> 9.163 ( -2.97%) [ +0.17% +0.00% +0.10% / -1.73% -2.97% -2.94%] index_add_ reverse : Elapsed 0.019 ms (9.459 ms / 500) 9.457 -> 9.177 ( -2.96%) [ +0.04% +0.01% +0.00% / -1.84% -2.96% -2.92%] index_copy_ reverse : Elapsed 0.019 ms (9.461 ms / 500) 9.338 -> 9.171 ( -1.79%) [ +0.14% +0.00% +0.02% / -1.79% -0.79% -0.65%] index_add_ spread : Elapsed 0.019 ms (9.351 ms / 500) 9.361 -> 9.198 ( -1.74%) [ +0.02% +0.03% +0.00% / -1.74% -0.62% -0.76%] index_copy_ spread : Elapsed 0.019 ms (9.363 ms / 500) 9.417 -> 9.209 ( -2.21%) [ +0.00% +0.01% +0.02% / -1.82% -2.21% -2.18%] index_add_ strided 3 : Elapsed 0.019 ms (9.417 ms / 500) 9.418 -> 9.218 ( -2.12%) [ +0.17% +0.00% +0.05% / -1.81% -2.10% -2.12%] index_copy_ strided 3 : Elapsed 0.019 ms (9.434 ms / 500) 9.395 -> 9.213 ( -1.94%) [ +0.06% +0.00% +0.09% / -1.94% -1.37% -1.32%] index_add_ strided 5 : Elapsed 0.019 ms (9.401 ms / 500) 9.426 -> 9.237 ( -2.01%) [ +0.11% +0.01% +0.00% / -2.01% -1.63% -1.60%] index_copy_ strided 5 : Elapsed 0.019 ms (9.436 ms / 500) 9.306 -> 9.155 ( -1.62%) [ +0.13% +0.00% +0.15% / -1.62% -0.67% -0.71%] index_add_ strided 7 : Elapsed 0.019 ms (9.318 ms / 500) 9.330 -> 9.164 ( -1.78%) [ +0.16% +0.05% +0.00% / -1.78% -0.71% -0.79%] index_copy_ strided 7 : Elapsed 0.019 ms (9.345 ms / 500) 9.354 -> 9.172 ( -1.95%) [ +0.11% +0.09% +0.00% / -1.62% -1.95% -1.82%] index_add_ strided 255 : Elapsed 0.019 ms (9.364 ms / 500) 9.368 -> 9.189 ( -1.91%) [ +0.06% +0.00% +0.04% / -1.62% -1.91% -1.87%] index_copy_ strided 255 : Elapsed 0.019 ms (9.374 ms / 500) 9.321 -> 9.161 ( -1.72%) [ +0.16% +0.00% +0.04% / -1.72% -1.41% -1.38%] index_add_ strided 257 : Elapsed 0.019 ms (9.336 ms / 500) 9.341 -> 9.167 ( -1.86%) [ +0.13% +0.00% +0.01% / -1.86% -1.48% -1.46%] index_copy_ strided 257 : Elapsed 0.019 ms (9.353 ms / 500) 9.338 -> 9.166 ( -1.84%) [ +0.06% +0.00% +0.01% / -1.84% -1.49% -1.63%] index_add_ perm : Elapsed 0.019 ms (9.344 ms / 500) 9.346 -> 9.177 ( -1.81%) [ +0.16% +0.00% +0.09% / -1.81% -1.50% -1.60%] index_copy_ perm : Elapsed 0.019 ms (9.361 ms / 500) 9.446 -> 9.147 ( -3.17%) [ +0.13% +0.08% +0.00% / -1.96% -3.17% -3.12%] index_add_ perm_sorted : Elapsed 0.019 ms (9.458 ms / 500) 9.450 -> 9.176 ( -2.90%) [ +0.06% +0.04% +0.00% / -1.74% -2.90% -2.90%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.456 ms / 500) 17.552 -> 17.560 ( +0.05%) [ +0.01% +0.00% +0.10% / +0.05% +0.24% +0.25%] index_select const : Elapsed 0.035 ms (17.553 ms / 500) 17.597 -> 17.603 ( +0.03%) [ +0.00% +0.04% +0.06% / +0.03% +0.69% +0.75%] index_select wrap : Elapsed 0.035 ms (17.597 ms / 500) 17.635 -> 17.649 ( +0.08%) [ +0.15% +0.00% +0.09% / +0.08% +0.09% +0.14%] index_select linear : Elapsed 0.035 ms (17.661 ms / 500) 17.680 -> 17.617 ( -0.36%) [ +0.02% +0.02% +0.00% / +0.03% -0.33% -0.36%] index_select reverse : Elapsed 0.035 ms (17.684 ms / 500) 17.541 -> 17.568 ( +0.15%) [ +0.08% +0.03% +0.00% / +0.15% +0.46% +0.42%] index_select skip64 : Elapsed 0.035 ms (17.555 ms / 500) 17.543 -> 17.566 ( +0.13%) [ +0.00% +0.06% +0.06% / +0.13% +0.38% +0.44%] index_select skip256 : Elapsed 0.035 ms (17.543 ms / 500) 17.650 -> 17.619 ( -0.18%) [ +0.02% +0.00% +0.10% / +0.08% -0.13% -0.18%] index_select spread : Elapsed 0.035 ms (17.654 ms / 500) 17.595 -> 17.613 ( +0.10%) [ +0.09% +0.06% +0.00% / +0.10% +0.46% +0.33%] index_select strided 3 : Elapsed 0.035 ms (17.610 ms / 500) 17.698 -> 17.706 ( +0.05%) [ +0.00% +0.01% +0.02% / +0.05% +0.09% +0.11%] index_select strided 5 : Elapsed 0.035 ms (17.698 ms / 500) 17.700 -> 17.585 ( -0.65%) [ +0.00% +0.03% +0.06% / +0.10% -0.54% -0.65%] index_select strided 7 : Elapsed 0.035 ms (17.700 ms / 500) 17.647 -> 17.628 ( -0.11%) [ +0.05% +0.07% +0.00% / +0.12% -0.07% -0.11%] index_select strided 8 : Elapsed 0.035 ms (17.656 ms / 500) 17.547 -> 17.562 ( +0.09%) [ +0.07% +0.03% +0.00% / +0.09% +0.36% +0.34%] index_select strided 16 : Elapsed 0.035 ms (17.560 ms / 500) 17.671 -> 17.571 ( -0.57%) [ +0.07% +0.04% +0.00% / +0.05% -0.52% -0.57%] index_select strided 64 : Elapsed 0.035 ms (17.684 ms / 500) 17.655 -> 17.658 ( +0.02%) [ +0.01% +0.00% +0.08% / +0.02% +0.28% +0.29%] index_select strided 100 : Elapsed 0.035 ms (17.657 ms / 500) 17.645 -> 17.606 ( -0.22%) [ +0.00% +0.08% +0.06% / +0.10% -0.16% -0.22%] index_select strided 255 : Elapsed 0.035 ms (17.645 ms / 500) 17.688 -> 17.603 ( -0.48%) [ +0.00% +0.07% +0.05% / +0.08% -0.44% -0.48%] index_select random : Elapsed 0.035 ms (17.688 ms / 500) 17.640 -> 17.651 ( +0.06%) [ +0.02% +0.00% +0.00% / +0.06% +0.53% +0.51%] index_select random_sorted : Elapsed 0.035 ms (17.644 ms / 500) B = [512, 32] (stride (1, 512)) dim = 0 fill_cnt = 256 8.881 -> 8.789 ( -1.04%) [ +0.09% +0.00% +0.01% / -0.93% -1.02% -1.04%] index_fill_ const : Elapsed 0.018 ms (8.889 ms / 500) 8.962 -> 8.837 ( -1.39%) [ +0.16% +0.03% +0.00% / -1.25% -1.39% -1.23%] index_fill_ linear : Elapsed 0.018 ms (8.976 ms / 500) 8.965 -> 8.842 ( -1.37%) [ +0.19% +0.00% +0.00% / -1.37% -1.24% -1.20%] index_fill_ reverse : Elapsed 0.018 ms (8.982 ms / 500) 8.939 -> 8.788 ( -1.69%) [ +0.07% +0.00% +0.04% / -1.69% -1.42% -1.42%] index_fill_ skip64 : Elapsed 0.018 ms (8.945 ms / 500) 8.894 -> 8.797 ( -1.09%) [ +0.11% +0.00% +0.08% / -0.91% -1.08% -1.09%] index_fill_ skip256 : Elapsed 0.018 ms (8.904 ms / 500) 9.071 -> 9.009 ( -0.68%) [ +0.00% +0.04% +0.03% / -0.68% -0.53% -0.54%] index_fill_ spread : Elapsed 0.018 ms (9.071 ms / 500) 9.084 -> 8.963 ( -1.33%) [ +0.22% +0.09% +0.00% / -1.13% -1.33% -1.25%] index_fill_ strided 3 : Elapsed 0.018 ms (9.104 ms / 500) 9.069 -> 8.958 ( -1.22%) [ +0.00% +0.09% +0.13% / -1.22% -1.11% -1.14%] index_fill_ strided 5 : Elapsed 0.018 ms (9.069 ms / 500) 9.083 -> 8.961 ( -1.34%) [ +0.14% +0.15% +0.00% / -1.08% -1.18% -1.34%] index_fill_ strided 7 : Elapsed 0.018 ms (9.096 ms / 500) 9.106 -> 8.981 ( -1.37%) [ +0.31% +0.16% +0.00% / -1.31% -1.37% -1.26%] index_fill_ strided 8 : Elapsed 0.018 ms (9.134 ms / 500) 8.980 -> 8.861 ( -1.33%) [ +0.31% +0.00% +0.18% / -1.33% -1.28% -1.33%] index_fill_ strided 16 : Elapsed 0.018 ms (9.008 ms / 500) 8.939 -> 8.803 ( -1.52%) [ +0.03% +0.02% +0.00% / -1.22% -1.49% -1.52%] index_fill_ strided 64 : Elapsed 0.018 ms (8.942 ms / 500) 9.132 -> 8.992 ( -1.53%) [ +0.08% +0.00% +0.01% / -1.43% -1.49% -1.53%] index_fill_ strided 100 : Elapsed 0.018 ms (9.139 ms / 500) 9.062 -> 8.988 ( -0.82%) [ +0.18% +0.24% +0.00% / -0.82% -0.72% -0.58%] index_fill_ strided 255 : Elapsed 0.018 ms (9.078 ms / 500) 8.903 -> 8.780 ( -1.38%) [ +0.06% +0.06% +0.00% / -1.06% -1.38% -1.36%] index_fill_ strided 256 : Elapsed 0.018 ms (8.908 ms / 500) 9.087 -> 9.005 ( -0.90%) [ +0.01% +0.07% +0.00% / -0.90% -0.79% -0.74%] index_fill_ strided 257 : Elapsed 0.018 ms (9.088 ms / 500) 9.068 -> 8.974 ( -1.04%) [ +0.00% +0.14% +0.06% / -1.04% -0.86% -0.83%] index_fill_ random : Elapsed 0.018 ms (9.068 ms / 500) 9.073 -> 8.967 ( -1.17%) [ +0.11% +0.02% +0.00% / -0.76% -1.07% -1.17%] index_fill_ random_sorted : Elapsed 0.018 ms (9.083 ms / 500) 9.135 -> 8.981 ( -1.69%) [ +0.14% +0.00% +0.14% / -1.32% -1.69% -1.59%] index_fill_ perm : Elapsed 0.018 ms (9.148 ms / 500) 9.077 -> 8.977 ( -1.10%) [ +0.28% +0.00% +0.01% / -0.86% -1.10% -0.97%] index_fill_ perm_sorted : Elapsed 0.018 ms (9.102 ms / 500) B = [512, 32] (stride (1, 512)) A = [256, 32] (stride (32, 1)) dim = 0 9.414 -> 9.102 ( -3.31%) [ +0.13% +0.06% +0.00% / -3.31% -3.22% -3.29%] index_add_ linear : Elapsed 0.019 ms (9.426 ms / 500) 9.336 -> 9.117 ( -2.35%) [ +0.06% +0.03% +0.00% / -1.83% -2.34% -2.35%] index_copy_ linear : Elapsed 0.019 ms (9.342 ms / 500) 9.444 -> 9.108 ( -3.56%) [ +0.17% +0.00% +0.02% / -3.00% -3.41% -3.56%] index_add_ reverse : Elapsed 0.019 ms (9.460 ms / 500) 9.346 -> 9.121 ( -2.41%) [ +0.00% +0.02% +0.05% / -1.37% -2.41% -2.27%] index_copy_ reverse : Elapsed 0.019 ms (9.346 ms / 500) 9.552 -> 9.212 ( -3.56%) [ +0.16% +0.00% +0.06% / -3.56% -3.08% -3.12%] index_add_ spread : Elapsed 0.019 ms (9.567 ms / 500) 9.548 -> 9.338 ( -2.20%) [ +0.00% +0.09% +0.03% / -2.20% -2.01% -2.01%] index_copy_ spread : Elapsed 0.019 ms (9.548 ms / 500) 9.585 -> 9.240 ( -3.60%) [ +0.21% +0.08% +0.00% / -3.44% -3.56% -3.60%] index_add_ strided 3 : Elapsed 0.019 ms (9.605 ms / 500) 9.570 -> 9.315 ( -2.66%) [ +0.00% +0.14% +0.03% / -2.24% -2.64% -2.66%] index_copy_ strided 3 : Elapsed 0.019 ms (9.570 ms / 500) 9.540 -> 9.218 ( -3.38%) [ +0.13% +0.06% +0.00% / -3.38% -2.51% -2.58%] index_add_ strided 5 : Elapsed 0.019 ms (9.552 ms / 500) 9.483 -> 9.311 ( -1.81%) [ +0.13% +0.00% +0.09% / -1.81% -1.46% -1.49%] index_copy_ strided 5 : Elapsed 0.019 ms (9.495 ms / 500) 9.518 -> 9.230 ( -3.03%) [ +0.13% +0.00% +0.11% / -3.03% -2.16% -2.19%] index_add_ strided 7 : Elapsed 0.019 ms (9.530 ms / 500) 9.464 -> 9.304 ( -1.69%) [ +0.11% +0.00% +0.14% / -1.69% -1.45% -1.30%] index_copy_ strided 7 : Elapsed 0.019 ms (9.474 ms / 500) 9.519 -> 9.196 ( -3.39%) [ +0.04% +0.00% +0.05% / -3.39% -2.92% -2.91%] index_add_ strided 255 : Elapsed 0.019 ms (9.523 ms / 500) 9.469 -> 9.290 ( -1.89%) [ +0.20% +0.08% +0.00% / -1.73% -1.88% -1.89%] index_copy_ strided 255 : Elapsed 0.019 ms (9.488 ms / 500) 9.517 -> 9.177 ( -3.57%) [ +0.15% +0.05% +0.00% / -3.57% -3.22% -3.26%] index_add_ strided 257 : Elapsed 0.019 ms (9.531 ms / 500) 9.501 -> 9.295 ( -2.17%) [ +0.02% +0.00% +0.12% / -2.17% -1.82% -1.85%] index_copy_ strided 257 : Elapsed 0.019 ms (9.503 ms / 500) 9.568 -> 9.281 ( -3.00%) [ +0.09% +0.00% +0.00% / -3.00% -2.63% -2.58%] index_add_ perm : Elapsed 0.019 ms (9.577 ms / 500) 9.518 -> 9.310 ( -2.19%) [ +0.20% +0.21% +0.00% / -1.91% -2.19% -2.11%] index_copy_ perm : Elapsed 0.019 ms (9.537 ms / 500) 9.584 -> 9.194 ( -4.07%) [ +0.00% +0.01% +0.01% / -3.72% -4.07% -3.94%] index_add_ perm_sorted : Elapsed 0.019 ms (9.584 ms / 500) 9.528 -> 9.271 ( -2.70%) [ +0.14% +0.04% +0.00% / -2.40% -2.57% -2.70%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.541 ms / 500) 17.619 -> 17.427 ( -1.09%) [ +0.08% +0.00% +0.08% / -0.79% -1.04% -1.09%] index_select const : Elapsed 0.035 ms (17.633 ms / 500) 17.741 -> 17.605 ( -0.77%) [ +0.09% +0.00% +0.05% / -0.46% -0.74% -0.77%] index_select wrap : Elapsed 0.036 ms (17.757 ms / 500) 17.599 -> 17.559 ( -0.23%) [ +0.03% +0.01% +0.00% / -0.23% +0.00% -0.03%] index_select linear : Elapsed 0.035 ms (17.605 ms / 500) 17.634 -> 17.607 ( -0.15%) [ +0.05% +0.00% +0.06% / -0.15% -0.06% -0.08%] index_select reverse : Elapsed 0.035 ms (17.643 ms / 500) 17.602 -> 17.464 ( -0.78%) [ +0.06% +0.02% +0.00% / -0.78% -0.27% -0.31%] index_select skip64 : Elapsed 0.035 ms (17.613 ms / 500) 17.667 -> 17.414 ( -1.43%) [ +0.01% +0.00% +0.02% / -0.94% -1.32% -1.43%] index_select skip256 : Elapsed 0.035 ms (17.669 ms / 500) 17.678 -> 17.545 ( -0.75%) [ +0.12% +0.04% +0.00% / -0.75% -0.70% -0.69%] index_select spread : Elapsed 0.035 ms (17.699 ms / 500) 17.597 -> 17.596 ( -0.01%) [ +0.00% +0.07% +0.07% / -0.01% +0.34% +0.25%] index_select strided 3 : Elapsed 0.035 ms (17.597 ms / 500) 17.715 -> 17.659 ( -0.32%) [ +0.04% +0.00% +0.10% / -0.32% -0.11% -0.16%] index_select strided 5 : Elapsed 0.035 ms (17.722 ms / 500) 17.733 -> 17.596 ( -0.77%) [ +0.00% +0.10% +0.07% / -0.51% -0.77% -0.77%] index_select strided 7 : Elapsed 0.035 ms (17.733 ms / 500) 17.621 -> 17.527 ( -0.53%) [ +0.02% +0.00% +0.10% / -0.49% -0.47% -0.53%] index_select strided 8 : Elapsed 0.035 ms (17.624 ms / 500) 17.539 -> 17.463 ( -0.43%) [ +0.03% +0.00% +0.05% / -0.43% -0.18% -0.19%] index_select strided 16 : Elapsed 0.035 ms (17.544 ms / 500) 17.698 -> 17.465 ( -1.32%) [ +0.00% +0.05% +0.02% / -0.82% -1.30% -1.32%] index_select strided 64 : Elapsed 0.035 ms (17.698 ms / 500) 17.620 -> 17.540 ( -0.45%) [ +0.07% +0.00% +0.04% / -0.45% -0.22% -0.20%] index_select strided 100 : Elapsed 0.035 ms (17.633 ms / 500) 17.655 -> 17.564 ( -0.52%) [ +0.02% +0.05% +0.00% / -0.42% -0.52% -0.45%] index_select strided 255 : Elapsed 0.035 ms (17.659 ms / 500) 17.704 -> 17.593 ( -0.63%) [ +0.02% +0.02% +0.00% / -0.27% -0.60% -0.63%] index_select random : Elapsed 0.035 ms (17.707 ms / 500) 17.645 -> 17.584 ( -0.35%) [ +0.01% +0.01% +0.00% / -0.35% -0.33% -0.28%] index_select random_sorted : Elapsed 0.035 ms (17.646 ms / 500) B = [512, 32] (stride (1, 512)) A = [256, 32] (stride (1, 256)) dim = 0 9.416 -> 9.135 ( -2.98%) [ +0.12% +0.08% +0.00% / -2.98% -2.81% -2.79%] index_add_ linear : Elapsed 0.019 ms (9.427 ms / 500) 9.402 -> 9.032 ( -3.94%) [ +0.27% +0.09% +0.00% / -3.94% -3.87% -3.84%] index_copy_ linear : Elapsed 0.019 ms (9.427 ms / 500) 9.512 -> 9.145 ( -3.86%) [ +0.07% +0.08% +0.00% / -3.27% -3.86% -3.74%] index_add_ reverse : Elapsed 0.019 ms (9.519 ms / 500) 9.501 -> 9.026 ( -5.00%) [ +0.02% +0.13% +0.00% / -4.06% -4.94% -5.00%] index_copy_ reverse : Elapsed 0.019 ms (9.503 ms / 500) 9.502 -> 9.249 ( -2.66%) [ +0.03% +0.05% +0.00% / -2.66% -2.00% -1.94%] index_add_ spread : Elapsed 0.019 ms (9.505 ms / 500) 9.549 -> 9.228 ( -3.36%) [ +0.12% +0.06% +0.00% / -3.36% -2.64% -2.54%] index_copy_ spread : Elapsed 0.019 ms (9.560 ms / 500) 9.559 -> 9.305 ( -2.66%) [ +0.10% +0.20% +0.00% / -2.66% -2.60% -2.64%] index_add_ strided 3 : Elapsed 0.019 ms (9.569 ms / 500) 9.604 -> 9.215 ( -4.05%) [ +0.09% +0.00% +0.08% / -3.97% -4.01% -4.05%] index_copy_ strided 3 : Elapsed 0.019 ms (9.613 ms / 500) 9.588 -> 9.257 ( -3.45%) [ +0.03% +0.00% +0.16% / -3.45% -2.48% -2.48%] index_add_ strided 5 : Elapsed 0.019 ms (9.591 ms / 500) 9.639 -> 9.229 ( -4.25%) [ +0.15% +0.06% +0.00% / -4.25% -4.00% -3.93%] index_copy_ strided 5 : Elapsed 0.019 ms (9.653 ms / 500) 9.512 -> 9.225 ( -3.02%) [ +0.07% +0.01% +0.00% / -3.02% -1.88% -1.91%] index_add_ strided 7 : Elapsed 0.019 ms (9.519 ms / 500) 9.548 -> 9.184 ( -3.81%) [ +0.20% +0.07% +0.00% / -3.81% -2.71% -2.70%] index_copy_ strided 7 : Elapsed 0.019 ms (9.567 ms / 500) 9.555 -> 9.258 ( -3.11%) [ +0.06% +0.00% +0.15% / -3.11% -2.99% -2.99%] index_add_ strided 255 : Elapsed 0.019 ms (9.561 ms / 500) 9.607 -> 9.194 ( -4.30%) [ +0.18% +0.06% +0.00% / -4.22% -4.25% -4.30%] index_copy_ strided 255 : Elapsed 0.019 ms (9.624 ms / 500) 9.521 -> 9.213 ( -3.23%) [ +0.01% +0.08% +0.00% / -3.23% -2.84% -2.85%] index_add_ strided 257 : Elapsed 0.019 ms (9.522 ms / 500) 9.562 -> 9.201 ( -3.78%) [ +0.14% +0.00% +0.03% / -3.78% -3.53% -3.43%] index_copy_ strided 257 : Elapsed 0.019 ms (9.575 ms / 500) 9.580 -> 9.304 ( -2.88%) [ +0.04% +0.06% +0.00% / -2.88% -2.39% -2.36%] index_add_ perm : Elapsed 0.019 ms (9.584 ms / 500) 9.598 -> 9.220 ( -3.94%) [ +0.22% +0.00% +0.10% / -3.94% -3.69% -3.74%] index_copy_ perm : Elapsed 0.019 ms (9.619 ms / 500) 9.607 -> 9.251 ( -3.71%) [ +0.12% +0.00% +0.07% / -3.17% -3.71% -3.62%] index_add_ perm_sorted : Elapsed 0.019 ms (9.619 ms / 500) 9.662 -> 9.183 ( -4.96%) [ +0.03% +0.04% +0.00% / -4.23% -4.96% -4.94%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.665 ms / 500) 17.667 -> 17.490 ( -1.00%) [ +0.05% +0.00% +0.03% / -0.85% -1.00% -0.99%] index_select const : Elapsed 0.035 ms (17.676 ms / 500) 17.706 -> 17.513 ( -1.09%) [ +0.11% +0.00% +0.08% / -1.09% -0.91% -0.86%] index_select wrap : Elapsed 0.035 ms (17.725 ms / 500) 17.765 -> 17.523 ( -1.36%) [ +0.00% +0.05% +0.06% / -1.36% -1.28% -1.29%] index_select linear : Elapsed 0.036 ms (17.765 ms / 500) 17.808 -> 17.510 ( -1.67%) [ +0.18% +0.02% +0.00% / -1.19% -1.67% -1.60%] index_select reverse : Elapsed 0.036 ms (17.840 ms / 500) 17.642 -> 17.494 ( -0.84%) [ +0.15% +0.01% +0.00% / -0.71% -0.75% -0.84%] index_select skip64 : Elapsed 0.035 ms (17.668 ms / 500) 17.649 -> 17.479 ( -0.96%) [ +0.09% +0.12% +0.00% / -0.96% -0.71% -0.84%] index_select skip256 : Elapsed 0.035 ms (17.665 ms / 500) 17.759 -> 17.513 ( -1.39%) [ +0.14% +0.08% +0.00% / -1.11% -1.36% -1.39%] index_select spread : Elapsed 0.036 ms (17.783 ms / 500) 17.699 -> 17.534 ( -0.93%) [ +0.03% +0.07% +0.00% / -0.93% -0.81% -0.82%] index_select strided 3 : Elapsed 0.035 ms (17.704 ms / 500) 17.809 -> 17.575 ( -1.31%) [ +0.00% +0.00% +0.01% / -1.31% -1.03% -1.15%] index_select strided 5 : Elapsed 0.036 ms (17.809 ms / 500) 17.828 -> 17.506 ( -1.81%) [ +0.00% +0.00% +0.02% / -1.40% -1.81% -1.74%] index_select strided 7 : Elapsed 0.036 ms (17.828 ms / 500) 17.784 -> 17.511 ( -1.54%) [ +0.12% +0.00% +0.09% / -1.30% -1.51% -1.54%] index_select strided 8 : Elapsed 0.036 ms (17.806 ms / 500) 17.654 -> 17.511 ( -0.81%) [ +0.05% +0.00% +0.02% / -0.81% -0.63% -0.60%] index_select strided 16 : Elapsed 0.035 ms (17.662 ms / 500) 17.769 -> 17.484 ( -1.60%) [ +0.10% +0.03% +0.00% / -0.97% -1.60% -1.53%] index_select strided 64 : Elapsed 0.036 ms (17.786 ms / 500) 17.756 -> 17.570 ( -1.05%) [ +0.10% +0.00% +0.02% / -1.05% -0.85% -0.78%] index_select strided 100 : Elapsed 0.036 ms (17.773 ms / 500) 17.774 -> 17.518 ( -1.44%) [ +0.03% +0.02% +0.00% / -1.31% -1.41% -1.44%] index_select strided 255 : Elapsed 0.036 ms (17.779 ms / 500) 17.814 -> 17.523 ( -1.63%) [ +0.05% +0.00% +0.04% / -1.08% -1.62% -1.63%] index_select random : Elapsed 0.036 ms (17.823 ms / 500) 17.751 -> 17.564 ( -1.05%) [ +0.01% +0.08% +0.00% / -1.05% -0.88% -0.97%] index_select random_sorted : Elapsed 0.036 ms (17.753 ms / 500) out_shape = [256, 512] in_shape = [256, 32] idx_dim = 1 B = [256, 512] (stride (512, 1)) dim = 1 fill_cnt = 32 10.771 -> 10.818 ( +0.44%) [ +0.00% +0.41% +0.24% / +0.58% +0.61% +0.44%] index_fill_ const : Elapsed 0.022 ms (10.771 ms / 500) 10.849 -> 10.845 ( -0.04%) [ +0.06% +0.00% +0.11% / +0.52% +0.06% -0.04%] index_fill_ linear : Elapsed 0.022 ms (10.855 ms / 500) 10.859 -> 10.839 ( -0.18%) [ +0.15% +0.00% +0.06% / +0.52% -0.18% +0.06%] index_fill_ reverse : Elapsed 0.022 ms (10.875 ms / 500) 10.797 -> 10.818 ( +0.19%) [ +0.27% +0.37% +0.00% / +0.30% +0.19% +0.40%] index_fill_ skip64 : Elapsed 0.022 ms (10.826 ms / 500) 10.788 -> 10.809 ( +0.19%) [ +0.00% +0.11% +0.01% / +0.37% +0.19% +0.36%] index_fill_ skip256 : Elapsed 0.022 ms (10.788 ms / 500) 11.649 -> 11.689 ( +0.34%) [ +0.16% +0.00% +0.26% / +0.66% +0.43% +0.34%] index_fill_ spread : Elapsed 0.023 ms (11.668 ms / 500) 11.051 -> 11.101 ( +0.45%) [ +0.58% +0.00% +0.18% / +0.58% +0.45% +0.54%] index_fill_ strided 3 : Elapsed 0.022 ms (11.115 ms / 500) 11.271 -> 11.331 ( +0.53%) [ +0.22% +0.00% +0.42% / +0.53% +0.61% +0.53%] index_fill_ strided 5 : Elapsed 0.023 ms (11.296 ms / 500) 11.522 -> 11.525 ( +0.03%) [ +0.36% +0.00% +0.22% / +0.54% +0.09% +0.03%] index_fill_ strided 7 : Elapsed 0.023 ms (11.563 ms / 500) 11.569 -> 11.791 ( +1.92%) [ +0.23% +0.22% +0.00% / +1.92% +1.98% +2.01%] index_fill_ strided 8 : Elapsed 0.023 ms (11.596 ms / 500) 11.616 -> 11.697 ( +0.70%) [ +0.00% +0.25% +0.46% / +0.70% +0.87% +1.10%] index_fill_ strided 16 : Elapsed 0.023 ms (11.616 ms / 500) 10.883 -> 10.911 ( +0.26%) [ +0.15% +0.00% +0.28% / +0.72% +0.26% +0.67%] index_fill_ strided 64 : Elapsed 0.022 ms (10.899 ms / 500) 11.562 -> 11.551 ( -0.10%) [ +0.48% +0.33% +0.00% / -0.10% +0.63% +0.55%] index_fill_ strided 100 : Elapsed 0.023 ms (11.617 ms / 500) 11.075 -> 11.092 ( +0.15%) [ +0.22% +0.00% +0.07% / +0.31% +0.15% +0.32%] index_fill_ strided 255 : Elapsed 0.022 ms (11.099 ms / 500) 10.883 -> 10.857 ( -0.24%) [ +0.05% +0.00% +0.11% / +0.36% -0.24% +0.09%] index_fill_ strided 256 : Elapsed 0.022 ms (10.888 ms / 500) 10.997 -> 11.036 ( +0.35%) [ +0.27% +0.00% +0.18% / +0.35% +0.67% +0.64%] index_fill_ strided 257 : Elapsed 0.022 ms (11.027 ms / 500) 11.507 -> 11.448 ( -0.51%) [ +0.20% +0.00% +0.12% / -0.51% -0.43% -0.48%] index_fill_ random : Elapsed 0.023 ms (11.530 ms / 500) 11.483 -> 11.586 ( +0.90%) [ +0.17% +0.00% +0.26% / +0.90% +1.31% +1.28%] index_fill_ random_sorted : Elapsed 0.023 ms (11.503 ms / 500) 11.536 -> 11.518 ( -0.16%) [ +0.00% +0.16% +0.23% / -0.16% +0.36% +0.37%] index_fill_ perm : Elapsed 0.023 ms (11.536 ms / 500) 11.560 -> 11.583 ( +0.20%) [ +0.29% +0.00% +0.38% / +0.30% +0.20% +0.35%] index_fill_ perm_sorted : Elapsed 0.023 ms (11.593 ms / 500) B = [256, 512] (stride (512, 1)) A = [256, 32] (stride (32, 1)) dim = 1 9.529 -> 9.142 ( -4.06%) [ +0.13% +0.02% +0.00% / -3.39% -3.97% -4.06%] index_add_ linear : Elapsed 0.019 ms (9.541 ms / 500) 9.432 -> 9.021 ( -4.36%) [ +0.02% +0.00% +0.05% / -3.60% -4.36% -4.32%] index_copy_ linear : Elapsed 0.019 ms (9.434 ms / 500) 9.454 -> 9.177 ( -2.93%) [ +0.15% +0.10% +0.00% / -2.86% -2.90% -2.93%] index_add_ reverse : Elapsed 0.019 ms (9.468 ms / 500) 9.361 -> 9.043 ( -3.40%) [ +0.12% +0.16% +0.00% / -3.26% -3.38% -3.40%] index_copy_ reverse : Elapsed 0.019 ms (9.372 ms / 500) 10.519 -> 10.094 ( -4.04%) [ +0.19% +0.00% +0.07% / -3.50% -4.04% -3.94%] index_add_ spread : Elapsed 0.021 ms (10.539 ms / 500) 10.117 -> 9.848 ( -2.66%) [ +0.30% +0.08% +0.00% / -1.69% -2.64% -2.66%] index_copy_ spread : Elapsed 0.020 ms (10.147 ms / 500) 9.658 -> 9.377 ( -2.91%) [ +0.07% +0.00% +0.06% / -2.91% -2.71% -2.67%] index_add_ strided 3 : Elapsed 0.019 ms (9.665 ms / 500) 9.604 -> 9.284 ( -3.33%) [ +0.15% +0.00% +0.06% / -3.33% -2.89% -2.86%] index_copy_ strided 3 : Elapsed 0.019 ms (9.618 ms / 500) 9.954 -> 9.590 ( -3.66%) [ +0.23% +0.11% +0.00% / -3.64% -3.64% -3.66%] index_add_ strided 5 : Elapsed 0.020 ms (9.977 ms / 500) 9.840 -> 9.478 ( -3.68%) [ +0.18% +0.06% +0.00% / -3.68% -3.18% -3.14%] index_copy_ strided 5 : Elapsed 0.020 ms (9.858 ms / 500) 10.232 -> 9.828 ( -3.95%) [ +0.12% +0.00% +0.03% / -3.93% -3.92% -3.95%] index_add_ strided 7 : Elapsed 0.020 ms (10.244 ms / 500) 10.072 -> 9.725 ( -3.45%) [ +0.15% +0.10% +0.00% / -3.45% -3.10% -3.19%] index_copy_ strided 7 : Elapsed 0.020 ms (10.087 ms / 500) 9.633 -> 9.351 ( -2.93%) [ +0.18% +0.02% +0.00% / -2.93% -2.92% -2.91%] index_add_ strided 255 : Elapsed 0.019 ms (9.650 ms / 500) 9.542 -> 9.248 ( -3.08%) [ +0.15% +0.19% +0.00% / -2.94% -3.08% -3.06%] index_copy_ strided 255 : Elapsed 0.019 ms (9.556 ms / 500) 9.633 -> 9.284 ( -3.62%) [ +0.07% +0.00% +0.19% / -3.61% -3.62% -3.62%] index_add_ strided 257 : Elapsed 0.019 ms (9.640 ms / 500) 9.564 -> 9.220 ( -3.60%) [ +0.13% +0.00% +0.07% / -3.46% -3.59% -3.60%] index_copy_ strided 257 : Elapsed 0.019 ms (9.576 ms / 500) 10.234 -> 9.945 ( -2.82%) [ +0.14% +0.12% +0.00% / -2.43% -2.80% -2.82%] index_add_ perm : Elapsed 0.020 ms (10.248 ms / 500) 9.950 -> 9.603 ( -3.49%) [ +0.07% +0.06% +0.00% / -3.17% -3.49% -3.41%] index_copy_ perm : Elapsed 0.020 ms (9.957 ms / 500) 10.262 -> 9.953 ( -3.01%) [ +0.11% +0.17% +0.00% / -2.20% -2.92% -3.01%] index_add_ perm_sorted : Elapsed 0.021 ms (10.273 ms / 500) 9.981 -> 9.605 ( -3.77%) [ +0.12% +0.03% +0.00% / -3.09% -3.72% -3.77%] index_copy_ perm_sorted : Elapsed 0.020 ms (9.993 ms / 500) Good 28.196 -> 24.821 (-11.97%) [ +0.23% +0.00% +0.23% / -11.84% -11.97% -11.77%] index_select const : Elapsed 0.057 ms (28.262 ms / 500) Good 28.135 -> 24.814 (-11.80%) [ +0.05% +0.00% +0.02% / -11.80% -11.77% -11.75%] index_select wrap : Elapsed 0.056 ms (28.149 ms / 500) Good 28.215 -> 24.847 (-11.94%) [ +0.05% +0.00% +0.11% / -11.74% -11.94% -11.86%] index_select linear : Elapsed 0.056 ms (28.229 ms / 500) Good 28.152 -> 25.057 (-10.99%) [ +0.00% +0.14% +0.01% / -10.65% -10.99% -10.77%] index_select reverse : Elapsed 0.056 ms (28.152 ms / 500) Good 28.066 -> 24.814 (-11.59%) [ +0.10% +0.00% +0.06% / -11.53% -11.59% -11.55%] index_select skip64 : Elapsed 0.056 ms (28.094 ms / 500) Good 28.054 -> 24.813 (-11.55%) [ +0.04% +0.02% +0.00% / -11.53% -11.55% -11.54%] index_select skip256 : Elapsed 0.056 ms (28.064 ms / 500) Good 28.061 -> 25.130 (-10.45%) [ +0.09% +0.00% +0.03% / -10.45% -10.39% -10.37%] index_select spread : Elapsed 0.056 ms (28.087 ms / 500) Good 28.335 -> 24.843 (-12.32%) [ +0.12% +0.00% +0.02% / -12.32% -12.12% -12.13%] index_select strided 3 : Elapsed 0.057 ms (28.370 ms / 500) Good 28.211 -> 24.821 (-12.02%) [ +0.05% +0.00% +0.00% / -11.95% -11.98% -12.02%] index_select strided 5 : Elapsed 0.056 ms (28.226 ms / 500) Good 28.106 -> 24.803 (-11.75%) [ +0.15% +0.05% +0.00% / -11.65% -11.75% -11.74%] index_select strided 7 : Elapsed 0.056 ms (28.147 ms / 500) Good 28.104 -> 24.831 (-11.65%) [ +0.00% +0.05% +0.15% / -11.65% -11.59% -11.41%] index_select strided 8 : Elapsed 0.056 ms (28.104 ms / 500) Good 28.309 -> 24.832 (-12.28%) [ +0.00% +0.05% +0.09% / -12.28% -12.11% -12.18%] index_select strided 16 : Elapsed 0.057 ms (28.309 ms / 500) Good 28.202 -> 24.821 (-11.99%) [ +0.00% +0.01% +0.06% / -11.88% -11.99% -11.88%] index_select random : Elapsed 0.056 ms (28.202 ms / 500) Good 28.055 -> 24.969 (-11.00%) [ +0.10% +0.00% +0.04% / -10.70% -11.00% -10.86%] index_select random_sorted : Elapsed 0.056 ms (28.084 ms / 500) B = [256, 512] (stride (512, 1)) A = [256, 32] (stride (1, 256)) dim = 1 9.451 -> 9.178 ( -2.89%) [ +0.05% +0.05% +0.00% / -2.89% -2.27% -2.10%] index_add_ linear : Elapsed 0.019 ms (9.456 ms / 500) 9.293 -> 9.194 ( -1.07%) [ +0.31% +0.00% +0.04% / -1.07% -0.48% -0.66%] index_copy_ linear : Elapsed 0.019 ms (9.322 ms / 500) 9.484 -> 9.185 ( -3.15%) [ +0.03% +0.00% +0.08% / -3.15% -2.88% -3.07%] index_add_ reverse : Elapsed 0.019 ms (9.487 ms / 500) 9.331 -> 9.197 ( -1.44%) [ +0.24% +0.14% +0.00% / -1.21% -1.39% -1.44%] index_copy_ reverse : Elapsed 0.019 ms (9.353 ms / 500) good 10.703 -> 10.045 ( -6.15%) [ +0.00% +0.01% +0.03% / -6.07% -6.15% -6.07%] index_add_ spread : Elapsed 0.021 ms (10.703 ms / 500) 10.157 -> 9.926 ( -2.27%) [ +0.13% +0.00% +0.20% / -2.03% -2.27% -2.25%] index_copy_ spread : Elapsed 0.020 ms (10.170 ms / 500) 9.751 -> 9.380 ( -3.80%) [ +0.04% +0.03% +0.00% / -3.80% -3.50% -3.52%] index_add_ strided 3 : Elapsed 0.020 ms (9.755 ms / 500) 9.573 -> 9.433 ( -1.46%) [ +0.20% +0.00% +0.27% / -1.46% -1.15% -1.11%] index_copy_ strided 3 : Elapsed 0.019 ms (9.592 ms / 500) 10.062 -> 9.561 ( -4.98%) [ +0.14% +0.07% +0.00% / -4.98% -4.66% -4.74%] index_add_ strided 5 : Elapsed 0.020 ms (10.076 ms / 500) 9.841 -> 9.623 ( -2.22%) [ +0.11% +0.01% +0.00% / -2.22% -1.70% -1.77%] index_copy_ strided 5 : Elapsed 0.020 ms (9.852 ms / 500) good 10.321 -> 9.797 ( -5.08%) [ +0.14% +0.00% +0.10% / -5.08% -4.62% -4.61%] index_add_ strided 7 : Elapsed 0.021 ms (10.335 ms / 500) 10.060 -> 9.855 ( -2.04%) [ +0.12% +0.00% +0.15% / -2.04% -1.61% -1.43%] index_copy_ strided 7 : Elapsed 0.020 ms (10.072 ms / 500) 9.666 -> 9.332 ( -3.46%) [ +0.24% +0.21% +0.00% / -3.46% -3.16% -3.13%] index_add_ strided 255 : Elapsed 0.019 ms (9.689 ms / 500) 9.501 -> 9.400 ( -1.06%) [ +0.25% +0.15% +0.00% / -1.06% -0.77% -0.79%] index_copy_ strided 255 : Elapsed 0.019 ms (9.525 ms / 500) 9.687 -> 9.292 ( -4.08%) [ +0.11% +0.02% +0.00% / -4.06% -4.02% -4.08%] index_add_ strided 257 : Elapsed 0.019 ms (9.698 ms / 500) 9.517 -> 9.381 ( -1.43%) [ +0.14% +0.11% +0.00% / -1.43% -1.31% -1.37%] index_copy_ strided 257 : Elapsed 0.019 ms (9.530 ms / 500) good 10.405 -> 9.835 ( -5.48%) [ +0.01% +0.00% +0.12% / -5.48% -5.32% -5.47%] index_add_ perm : Elapsed 0.021 ms (10.406 ms / 500) 10.000 -> 9.703 ( -2.97%) [ +0.25% +0.34% +0.00% / -2.65% -2.97% -2.94%] index_copy_ perm : Elapsed 0.020 ms (10.025 ms / 500) 10.345 -> 9.836 ( -4.92%) [ +0.07% +0.00% +0.09% / -4.92% -4.49% -4.55%] index_add_ perm_sorted : Elapsed 0.021 ms (10.352 ms / 500) 9.974 -> 9.765 ( -2.10%) [ +0.10% +0.00% +0.00% / -2.10% -1.72% -1.73%] index_copy_ perm_sorted : Elapsed 0.020 ms (9.984 ms / 500) good 26.481 -> 24.774 ( -6.45%) [ +0.04% +0.00% +0.00% / -6.23% -6.45% -6.32%] index_select const : Elapsed 0.053 ms (26.491 ms / 500) 26.406 -> 25.742 ( -2.51%) [ +0.11% +0.00% +0.02% / -2.49% -2.51% -2.48%] index_select wrap : Elapsed 0.053 ms (26.436 ms / 500) good 26.595 -> 24.937 ( -6.23%) [ +0.01% +0.03% +0.00% / -6.23% -5.99% -6.06%] index_select linear : Elapsed 0.053 ms (26.598 ms / 500) good 26.678 -> 24.874 ( -6.76%) [ +0.00% +0.03% +0.05% / -6.76% -6.56% -6.44%] index_select reverse : Elapsed 0.053 ms (26.678 ms / 500) good 26.523 -> 24.790 ( -6.53%) [ +0.00% +0.08% +0.06% / -6.51% -6.46% -6.53%] index_select skip64 : Elapsed 0.053 ms (26.523 ms / 500) good 26.395 -> 24.730 ( -6.31%) [ +0.00% +0.07% +0.10% / -5.96% -6.31% -6.10%] index_select skip256 : Elapsed 0.053 ms (26.395 ms / 500) good 26.289 -> 24.933 ( -5.16%) [ +0.16% +0.17% +0.00% / -4.98% -5.16% -5.04%] index_select spread : Elapsed 0.053 ms (26.330 ms / 500) 26.400 -> 25.776 ( -2.36%) [ +0.02% +0.00% +0.03% / -2.36% -2.11% -2.14%] index_select strided 3 : Elapsed 0.053 ms (26.406 ms / 500) 26.369 -> 25.788 ( -2.20%) [ +0.09% +0.06% +0.00% / -2.20% -2.05% -2.15%] index_select strided 5 : Elapsed 0.053 ms (26.393 ms / 500) 26.432 -> 25.934 ( -1.88%) [ +0.11% +0.00% +0.00% / -1.88% -1.43% -1.45%] index_select strided 7 : Elapsed 0.053 ms (26.462 ms / 500) good 26.452 -> 24.859 ( -6.02%) [ +0.09% +0.00% +0.10% / -5.80% -5.79% -6.02%] index_select strided 8 : Elapsed 0.053 ms (26.475 ms / 500) good 26.387 -> 24.830 ( -5.90%) [ +0.26% +0.16% +0.00% / -5.89% -5.87% -5.90%] index_select strided 16 : Elapsed 0.053 ms (26.456 ms / 500) 26.355 -> 25.059 ( -4.92%) [ +0.09% +0.11% +0.00% / -4.92% -4.64% -4.64%] index_select random : Elapsed 0.053 ms (26.378 ms / 500) good 26.321 -> 24.902 ( -5.39%) [ +0.13% +0.00% +0.12% / -5.39% -5.23% -5.02%] index_select random_sorted : Elapsed 0.053 ms (26.354 ms / 500) B = [256, 512] (stride (1, 256)) dim = 1 fill_cnt = 32 10.711 -> 10.799 ( +0.82%) [ +0.24% +0.00% +0.13% / +0.99% +0.82% +0.95%] index_fill_ const : Elapsed 0.021 ms (10.737 ms / 500) 10.799 -> 10.811 ( +0.11%) [ +0.22% +0.06% +0.00% / +0.57% +0.11% +0.21%] index_fill_ linear : Elapsed 0.022 ms (10.823 ms / 500) 10.770 -> 10.808 ( +0.35%) [ +0.46% +0.00% +0.23% / +1.02% +0.35% +0.50%] index_fill_ reverse : Elapsed 0.022 ms (10.820 ms / 500) 10.708 -> 10.809 ( +0.94%) [ +0.24% +0.00% +0.35% / +0.96% +1.06% +0.94%] index_fill_ skip64 : Elapsed 0.021 ms (10.734 ms / 500) 10.711 -> 10.787 ( +0.71%) [ +0.29% +0.19% +0.00% / +1.05% +0.92% +0.71%] index_fill_ skip256 : Elapsed 0.021 ms (10.742 ms / 500) 10.790 -> 10.898 ( +1.00%) [ +0.08% +0.00% +0.21% / +1.00% +1.09% +1.23%] index_fill_ spread : Elapsed 0.022 ms (10.799 ms / 500) 10.777 -> 10.864 ( +0.81%) [ +0.22% +0.44% +0.00% / +0.81% +1.05% +1.14%] index_fill_ strided 3 : Elapsed 0.022 ms (10.801 ms / 500) 10.825 -> 10.853 ( +0.26%) [ +0.23% +0.10% +0.00% / +0.67% +0.60% +0.26%] index_fill_ strided 5 : Elapsed 0.022 ms (10.850 ms / 500) 10.794 -> 10.861 ( +0.62%) [ +0.40% +0.40% +0.00% / +1.21% +0.62% +0.82%] index_fill_ strided 7 : Elapsed 0.022 ms (10.837 ms / 500) 10.821 -> 10.868 ( +0.43%) [ +0.14% +0.36% +0.00% / +0.52% +0.43% +0.67%] index_fill_ strided 8 : Elapsed 0.022 ms (10.836 ms / 500) 10.812 -> 10.901 ( +0.82%) [ +0.00% +0.44% +0.20% / +1.16% +0.82% +0.86%] index_fill_ strided 16 : Elapsed 0.022 ms (10.812 ms / 500) 10.711 -> 10.849 ( +1.29%) [ +0.18% +0.21% +0.00% / +1.29% +1.29% +1.39%] index_fill_ strided 64 : Elapsed 0.021 ms (10.730 ms / 500) 10.758 -> 10.870 ( +1.04%) [ +0.00% +0.32% +0.33% / +1.04% +1.37% +1.35%] index_fill_ strided 100 : Elapsed 0.022 ms (10.758 ms / 500) 10.791 -> 10.840 ( +0.45%) [ +0.13% +0.00% +0.23% / +1.16% +0.45% +0.68%] index_fill_ strided 255 : Elapsed 0.022 ms (10.805 ms / 500) 10.743 -> 10.790 ( +0.44%) [ +0.00% +0.12% +0.17% / +1.14% +0.62% +0.44%] index_fill_ strided 256 : Elapsed 0.021 ms (10.743 ms / 500) 10.786 -> 10.881 ( +0.88%) [ +0.37% +0.35% +0.00% / +0.88% +1.08% +1.05%] index_fill_ strided 257 : Elapsed 0.022 ms (10.826 ms / 500) 10.784 -> 10.857 ( +0.68%) [ +0.00% +0.10% +0.07% / +0.68% +1.03% +1.29%] index_fill_ random : Elapsed 0.022 ms (10.784 ms / 500) 10.752 -> 10.883 ( +1.22%) [ +0.00% +0.61% +0.20% / +1.22% +1.39% +1.52%] index_fill_ random_sorted : Elapsed 0.022 ms (10.752 ms / 500) 10.761 -> 10.851 ( +0.84%) [ +0.07% +0.72% +0.00% / +0.84% +1.25% +1.11%] index_fill_ perm : Elapsed 0.022 ms (10.768 ms / 500) 10.778 -> 10.875 ( +0.90%) [ +0.54% +0.52% +0.00% / +1.35% +1.02% +0.90%] index_fill_ perm_sorted : Elapsed 0.022 ms (10.836 ms / 500) B = [256, 512] (stride (1, 256)) A = [256, 32] (stride (32, 1)) dim = 1 9.362 -> 9.075 ( -3.07%) [ +0.14% +0.19% +0.00% / -2.02% -3.07% -2.97%] index_add_ linear : Elapsed 0.019 ms (9.375 ms / 500) 9.367 -> 9.083 ( -3.03%) [ +0.09% +0.09% +0.00% / -1.91% -2.93% -3.03%] index_copy_ linear : Elapsed 0.019 ms (9.375 ms / 500) 9.283 -> 9.112 ( -1.84%) [ +0.08% +0.02% +0.00% / -1.84% -1.77% -1.78%] index_add_ reverse : Elapsed 0.019 ms (9.290 ms / 500) 9.293 -> 9.116 ( -1.90%) [ +0.20% +0.00% +0.08% / -1.90% -1.66% -1.70%] index_copy_ reverse : Elapsed 0.019 ms (9.312 ms / 500) 9.238 -> 9.084 ( -1.67%) [ +0.04% +0.00% +0.04% / -1.67% -1.66% -1.62%] index_add_ spread : Elapsed 0.018 ms (9.242 ms / 500) 9.261 -> 9.085 ( -1.90%) [ +0.00% +0.17% +0.14% / -1.73% -1.90% -1.78%] index_copy_ spread : Elapsed 0.019 ms (9.261 ms / 500) 9.241 -> 9.079 ( -1.75%) [ +0.08% +0.12% +0.00% / -1.75% -0.61% -0.61%] index_add_ strided 3 : Elapsed 0.018 ms (9.248 ms / 500) 9.268 -> 9.100 ( -1.81%) [ +0.12% +0.00% +0.00% / -1.81% -1.19% -0.92%] index_copy_ strided 3 : Elapsed 0.019 ms (9.279 ms / 500) 9.237 -> 9.088 ( -1.61%) [ +0.11% +0.00% +0.13% / -1.61% -0.76% -0.74%] index_add_ strided 5 : Elapsed 0.018 ms (9.247 ms / 500) 9.249 -> 9.085 ( -1.77%) [ +0.19% +0.02% +0.00% / -1.77% -0.89% -0.91%] index_copy_ strided 5 : Elapsed 0.019 ms (9.267 ms / 500) 9.248 -> 9.095 ( -1.65%) [ +0.17% +0.21% +0.00% / -1.65% -1.33% -1.38%] index_add_ strided 7 : Elapsed 0.019 ms (9.264 ms / 500) 9.258 -> 9.110 ( -1.60%) [ +0.26% +0.00% +0.15% / -1.60% -1.41% -1.48%] index_copy_ strided 7 : Elapsed 0.019 ms (9.282 ms / 500) 9.231 -> 9.075 ( -1.69%) [ +0.17% +0.22% +0.00% / -1.69% -1.33% -1.38%] index_add_ strided 255 : Elapsed 0.018 ms (9.247 ms / 500) 9.271 -> 9.090 ( -1.95%) [ +0.00% +0.11% +0.04% / -1.95% -1.59% -1.60%] index_copy_ strided 255 : Elapsed 0.019 ms (9.271 ms / 500) 9.288 -> 9.092 ( -2.11%) [ +0.06% +0.00% +0.03% / -1.87% -2.11% -2.06%] index_add_ strided 257 : Elapsed 0.019 ms (9.294 ms / 500) 9.279 -> 9.102 ( -1.91%) [ +0.16% +0.00% +0.03% / -1.67% -1.88% -1.91%] index_copy_ strided 257 : Elapsed 0.019 ms (9.294 ms / 500) 9.247 -> 9.075 ( -1.86%) [ +0.14% +0.13% +0.00% / -1.57% -1.86% -1.81%] index_add_ perm : Elapsed 0.019 ms (9.260 ms / 500) 9.272 -> 9.091 ( -1.95%) [ +0.10% +0.17% +0.00% / -1.56% -1.95% -1.94%] index_copy_ perm : Elapsed 0.019 ms (9.281 ms / 500) 9.275 -> 9.047 ( -2.46%) [ +0.00% +0.10% +0.01% / -1.78% -2.46% -2.35%] index_add_ perm_sorted : Elapsed 0.019 ms (9.275 ms / 500) 9.304 -> 9.070 ( -2.52%) [ +0.10% +0.00% +0.21% / -1.70% -2.46% -2.52%] index_copy_ perm_sorted : Elapsed 0.019 ms (9.313 ms / 500) 25.821 -> 25.627 ( -0.75%) [ +0.03% +0.11% +0.00% / +0.04% -0.75% -0.72%] index_select const : Elapsed 0.052 ms (25.828 ms / 500) 25.716 -> 25.633 ( -0.32%) [ +0.00% +0.00% +0.16% / +0.15% -0.32% -0.29%] index_select wrap : Elapsed 0.051 ms (25.716 ms / 500) 25.842 -> 25.719 ( -0.48%) [ +0.08% +0.06% +0.00% / +0.03% -0.27% -0.48%] index_select linear : Elapsed 0.052 ms (25.863 ms / 500) 25.747 -> 25.780 ( +0.13%) [ +0.13% +0.00% +0.12% / +0.13% +0.27% +0.23%] index_select reverse : Elapsed 0.052 ms (25.780 ms / 500) 25.670 -> 25.680 ( +0.04%) [ +0.00% +0.02% +0.10% / +0.04% +0.07% +0.17%] index_select skip64 : Elapsed 0.051 ms (25.670 ms / 500) 25.640 -> 25.584 ( -0.22%) [ +0.11% +0.00% +0.16% / +0.14% -0.20% -0.22%] index_select skip256 : Elapsed 0.051 ms (25.667 ms / 500) 25.703 -> 25.608 ( -0.37%) [ +0.07% +0.00% +0.02% / +0.16% -0.28% -0.37%] index_select spread : Elapsed 0.051 ms (25.720 ms / 500) 25.946 -> 25.697 ( -0.96%) [ +0.10% +0.00% +0.06% / +0.03% -0.96% -0.93%] index_select strided 3 : Elapsed 0.052 ms (25.972 ms / 500) 25.845 -> 25.650 ( -0.75%) [ +0.07% +0.00% +0.03% / +0.02% -0.74% -0.75%] index_select strided 5 : Elapsed 0.052 ms (25.862 ms / 500) 25.737 -> 25.624 ( -0.44%) [ +0.17% +0.00% +0.09% / +0.17% -0.38% -0.44%] index_select strided 7 : Elapsed 0.052 ms (25.782 ms / 500) 25.728 -> 25.617 ( -0.43%) [ +0.03% +0.10% +0.00% / +0.18% -0.43% -0.32%] index_select strided 8 : Elapsed 0.051 ms (25.736 ms / 500) 25.900 -> 25.655 ( -0.95%) [ +0.12% +0.08% +0.00% / +0.15% -0.78% -0.95%] index_select strided 16 : Elapsed 0.052 ms (25.931 ms / 500) 25.842 -> 25.645 ( -0.76%) [ +0.07% +0.00% +0.03% / -0.03% -0.70% -0.76%] index_select random : Elapsed 0.052 ms (25.860 ms / 500) 25.681 -> 25.584 ( -0.38%) [ +0.13% +0.15% +0.00% / +0.14% -0.37% -0.38%] index_select random_sorted : Elapsed 0.051 ms (25.714 ms / 500) B = [256, 512] (stride (1, 256)) A = [256, 32] (stride (1, 256)) dim = 1 9.315 -> 9.156 ( -1.71%) [ +0.15% +0.04% +0.00% / -1.71% -1.10% -0.93%] index_add_ linear : Elapsed 0.019 ms (9.329 ms / 500) 9.222 -> 9.062 ( -1.73%) [ +0.01% +0.00% +0.04% / -1.73% -1.42% -1.14%] index_copy_ linear : Elapsed 0.018 ms (9.223 ms / 500) 9.338 -> 9.172 ( -1.78%) [ +0.13% +0.00% +0.16% / -1.58% -1.78% -1.69%] index_add_ reverse : Elapsed 0.019 ms (9.350 ms / 500) 9.257 -> 9.066 ( -2.06%) [ +0.00% +0.06% +0.02% / -1.65% -2.06% -1.91%] index_copy_ reverse : Elapsed 0.019 ms (9.257 ms / 500) 9.295 -> 9.108 ( -2.01%) [ +0.13% +0.00% +0.09% / -1.66% -2.01% -1.99%] index_add_ spread : Elapsed 0.019 ms (9.307 ms / 500) 9.229 -> 9.029 ( -2.17%) [ +0.13% +0.00% +0.07% / -1.74% -2.17% -2.04%] index_copy_ spread : Elapsed 0.018 ms (9.241 ms / 500) 9.322 -> 9.168 ( -1.65%) [ +0.02% +0.23% +0.00% / -1.65% -1.21% -0.88%] index_add_ strided 3 : Elapsed 0.019 ms (9.324 ms / 500) 9.215 -> 9.044 ( -1.86%) [ +0.29% +0.21% +0.00% / -1.86% -1.44% -1.02%] index_copy_ strided 3 : Elapsed 0.018 ms (9.242 ms / 500) 9.301 -> 9.145 ( -1.68%) [ +0.00% +0.10% +0.00% / -1.68% -1.02% -0.98%] index_add_ strided 5 : Elapsed 0.019 ms (9.301 ms / 500) 9.216 -> 9.043 ( -1.88%) [ +0.01% +0.01% +0.00% / -1.88% -1.52% -1.38%] index_copy_ strided 5 : Elapsed 0.018 ms (9.217 ms / 500) 9.268 -> 9.119 ( -1.61%) [ +0.30% +0.06% +0.00% / -1.61% -0.87% -0.86%] index_add_ strided 7 : Elapsed 0.019 ms (9.296 ms / 500) 9.194 -> 9.036 ( -1.72%) [ +0.14% +0.08% +0.00% / -1.72% -1.50% -0.87%] index_copy_ strided 7 : Elapsed 0.018 ms (9.207 ms / 500) 9.283 -> 9.134 ( -1.61%) [ +0.17% +0.00% +0.05% / -1.61% -1.30% -0.98%] index_add_ strided 255 : Elapsed 0.019 ms (9.299 ms / 500) 9.201 -> 9.039 ( -1.76%) [ +0.12% +0.00% +0.02% / -1.76% -1.57% -1.20%] index_copy_ strided 255 : Elapsed 0.018 ms (9.212 ms / 500) 9.335 -> 9.166 ( -1.81%) [ +0.18% +0.00% +0.24% / -1.64% -1.81% -1.68%] index_add_ strided 257 : Elapsed 0.019 ms (9.352 ms / 500) 9.244 -> 9.048 ( -2.12%) [ +0.00% +0.04% +0.00% / -1.88% -2.12% -1.86%] index_copy_ strided 257 : Elapsed 0.018 ms (9.244 ms / 500) 9.300 -> 9.143 ( -1.69%) [ +0.11% +0.11% +0.00% / -1.69% -1.68% -1.56%] index_add_ perm : Elapsed 0.019 ms (9.310 ms / 500) 9.221 -> 9.037 ( -2.00%) [ +0.15% +0.09% +0.00% / -1.68% -2.00% -1.79%] index_copy_ perm : Elapsed 0.018 ms (9.235 ms / 500) 9.280 -> 9.137 ( -1.54%) [ +0.03% +0.04% +0.00% / -1.54% -1.29% -1.40%] index_add_ perm_sorted : Elapsed 0.019 ms (9.283 ms / 500) 9.203 -> 9.045 ( -1.72%) [ +0.09% +0.15% +0.00% / -1.72% -1.59% -1.49%] index_copy_ perm_sorted : Elapsed 0.018 ms (9.211 ms / 500) 24.841 -> 24.791 ( -0.20%) [ +0.07% +0.09% +0.00% / +0.19% -0.20% -0.01%] index_select const : Elapsed 0.050 ms (24.858 ms / 500) 24.835 -> 24.820 ( -0.06%) [ +0.00% +0.12% +0.16% / +0.05% -0.06% +0.08%] index_select wrap : Elapsed 0.050 ms (24.835 ms / 500) 24.832 -> 24.841 ( +0.04%) [ +0.00% +0.03% +0.04% / +0.04% +0.12% +0.29%] index_select linear : Elapsed 0.050 ms (24.832 ms / 500) 24.970 -> 24.914 ( -0.22%) [ +0.04% +0.00% +0.12% / +0.16% -0.22% -0.01%] index_select reverse : Elapsed 0.050 ms (24.981 ms / 500) 24.907 -> 24.783 ( -0.50%) [ +0.00% +0.02% +0.03% / +0.04% -0.50% -0.29%] index_select skip64 : Elapsed 0.050 ms (24.907 ms / 500) 24.723 -> 24.792 ( +0.28%) [ +0.30% +0.00% +0.13% / +0.30% +0.28% +0.28%] index_select skip256 : Elapsed 0.050 ms (24.798 ms / 500) 24.755 -> 24.815 ( +0.24%) [ +0.27% +0.11% +0.00% / +0.25% +0.24% +0.34%] index_select spread : Elapsed 0.050 ms (24.822 ms / 500) 24.841 -> 24.824 ( -0.07%) [ +0.03% +0.00% +0.08% / -0.07% +0.84% +0.85%] index_select strided 3 : Elapsed 0.050 ms (24.849 ms / 500) 24.819 -> 24.834 ( +0.06%) [ +0.07% +0.00% +0.03% / +0.06% +0.12% +0.18%] index_select strided 5 : Elapsed 0.050 ms (24.837 ms / 500) 24.817 -> 24.821 ( +0.02%) [ +0.06% +0.00% +0.15% / +0.09% +0.02% +0.23%] index_select strided 7 : Elapsed 0.050 ms (24.833 ms / 500) 24.837 -> 24.798 ( -0.16%) [ +0.25% +0.15% +0.00% / +0.21% -0.05% -0.16%] index_select strided 8 : Elapsed 0.050 ms (24.898 ms / 500) 24.780 -> 24.851 ( +0.29%) [ +0.18% +0.05% +0.00% / +0.29% +0.65% +0.71%] index_select strided 16 : Elapsed 0.050 ms (24.825 ms / 500) 24.800 -> 24.831 ( +0.12%) [ +0.11% +0.00% +0.09% / +0.12% +0.34% +0.52%] index_select random : Elapsed 0.050 ms (24.828 ms / 500) 24.791 -> 24.822 ( +0.13%) [ +0.11% +0.01% +0.00% / +0.19% +0.13% +0.21%] index_select random_sorted : Elapsed 0.050 ms (24.818 ms / 500) ==================== rep_count = 100 dimensions = [255, 256, 512] out_shape = [255, 512] in_shape = [256, 512] idx_dim = 0 B = [255, 512] (stride (512, 1)) dim = 0 fill_cnt = 256 2.578 -> 2.573 ( -0.19%) [ +0.00% +0.43% +0.35% / +0.12% -0.19% +0.00%] index_fill_ const : Elapsed 0.026 ms (2.578 ms / 100) 2.654 -> 2.656 ( +0.08%) [ +0.68% +0.15% +0.00% / +0.53% +0.08% +0.57%] index_fill_ linear : Elapsed 0.027 ms (2.672 ms / 100) 2.679 -> 2.649 ( -1.12%) [ +0.52% +0.00% +0.04% / -0.11% -1.12% -0.93%] index_fill_ reverse : Elapsed 0.027 ms (2.693 ms / 100) 2.583 -> 2.573 ( -0.39%) [ +0.31% +0.12% +0.00% / +0.85% +0.08% -0.39%] index_fill_ skip64 : Elapsed 0.026 ms (2.591 ms / 100) 2.576 -> 2.584 ( +0.31%) [ +0.19% +0.12% +0.00% / +0.31% +0.47% +0.50%] index_fill_ skip256 : Elapsed 0.026 ms (2.581 ms / 100) 2.667 -> 2.666 ( -0.04%) [ +0.04% +0.15% +0.00% / -0.04% +0.34% +0.41%] index_fill_ spread : Elapsed 0.027 ms (2.668 ms / 100) 2.597 -> 2.601 ( +0.15%) [ +0.31% +0.00% +0.73% / +0.15% +0.39% +0.31%] index_fill_ strided 3 : Elapsed 0.026 ms (2.605 ms / 100) 2.589 -> 2.587 ( -0.08%) [ +0.54% +0.00% +0.46% / -0.08% +0.12% +0.15%] index_fill_ strided 5 : Elapsed 0.026 ms (2.603 ms / 100) 2.653 -> 2.646 ( -0.26%) [ +0.38% +0.08% +0.00% / +0.08% -0.26% +0.23%] index_fill_ strided 7 : Elapsed 0.027 ms (2.663 ms / 100) 2.657 -> 2.651 ( -0.23%) [ +0.19% +0.41% +0.00% / +0.11% -0.23% +0.19%] index_fill_ strided 8 : Elapsed 0.027 ms (2.662 ms / 100) 2.676 -> 2.654 ( -0.82%) [ +0.41% +0.00% +0.45% / +0.37% -0.82% -0.67%] index_fill_ strided 16 : Elapsed 0.027 ms (2.687 ms / 100) 2.657 -> 2.649 ( -0.30%) [ +0.23% +0.00% +0.15% / +0.75% -0.30% +0.26%] index_fill_ strided 64 : Elapsed 0.027 ms (2.663 ms / 100) 2.601 -> 2.591 ( -0.38%) [ +0.23% +0.00% +0.23% / -0.38% +0.12% -0.04%] index_fill_ strided 100 : Elapsed 0.026 ms (2.607 ms / 100) 2.625 -> 2.625 ( +0.00%) [ +0.00% +0.19% +0.04% / +0.27% +0.11% +0.00%] index_fill_ random : Elapsed 0.026 ms (2.625 ms / 100) 2.621 -> 2.615 ( -0.23%) [ +0.31% +0.00% +0.23% / +0.34% -0.23% -0.04%] index_fill_ random_sorted : Elapsed 0.026 ms (2.629 ms / 100) B = [255, 512] (stride (512, 1)) A = [256, 512] (stride (512, 1)) dim = 0 4.981 -> 4.964 ( -0.34%) [ +0.02% +0.14% +0.00% / +0.34% -0.14% -0.34%] index_select const : Elapsed 0.050 ms (4.982 ms / 100) 5.205 -> 5.189 ( -0.31%) [ +0.00% +0.08% +0.04% / +0.19% -0.31% -0.10%] index_select wrap : Elapsed 0.052 ms (5.205 ms / 100) 5.193 -> 5.202 ( +0.17%) [ +0.17% +0.08% +0.00% / +0.54% +0.17% +0.21%] index_select linear : Elapsed 0.052 ms (5.202 ms / 100) 5.190 -> 5.203 ( +0.25%) [ +0.31% +0.00% +0.06% / +0.33% +0.33% +0.25%] index_select reverse : Elapsed 0.052 ms (5.206 ms / 100) 4.948 -> 4.965 ( +0.34%) [ +0.49% +0.00% +0.14% / +0.42% +0.34% +0.63%] index_select skip64 : Elapsed 0.050 ms (4.972 ms / 100) 4.952 -> 4.973 ( +0.42%) [ +0.02% +0.00% +0.00% / +0.50% +0.42% +0.77%] index_select skip256 : Elapsed 0.050 ms (4.953 ms / 100) 5.197 -> 5.191 ( -0.12%) [ +0.31% +0.00% +0.48% / +0.69% -0.12% -0.06%] index_select spread : Elapsed 0.052 ms (5.213 ms / 100) 5.208 -> 5.217 ( +0.17%) [ +0.00% +0.00% +0.13% / +0.17% +0.52% +0.67%] index_select strided 3 : Elapsed 0.052 ms (5.208 ms / 100) 5.233 -> 5.205 ( -0.54%) [ +0.27% +0.00% +0.13% / +0.00% -0.54% +0.06%] index_select strided 5 : Elapsed 0.052 ms (5.247 ms / 100) 5.200 -> 5.206 ( +0.12%) [ +0.10% +0.00% +0.00% / +0.12% +0.15% +0.21%] index_select strided 7 : Elapsed 0.052 ms (5.205 ms / 100) 4.974 -> 4.998 ( +0.48%) [ +0.00% +0.38% +0.38% / +0.72% +0.48% +0.82%] index_select strided 8 : Elapsed 0.050 ms (4.974 ms / 100) 4.975 -> 4.977 ( +0.04%) [ +0.12% +0.00% +0.14% / +0.20% +0.16% +0.04%] index_select strided 16 : Elapsed 0.050 ms (4.981 ms / 100) 4.956 -> 4.958 ( +0.04%) [ +0.42% +0.00% +0.10% / +0.04% +0.20% +0.16%] index_select strided 64 : Elapsed 0.050 ms (4.977 ms / 100) 5.066 -> 5.061 ( -0.10%) [ +0.00% +0.02% +0.26% / +0.47% -0.10% +0.22%] index_select strided 100 : Elapsed 0.051 ms (5.066 ms / 100) 5.189 -> 5.196 ( +0.13%) [ +0.19% +0.08% +0.00% / +0.29% +0.44% +0.13%] index_select strided 255 : Elapsed 0.052 ms (5.199 ms / 100) 5.159 -> 5.133 ( -0.50%) [ +0.00% +0.00% +0.10% / +0.39% -0.50% -0.35%] index_select random : Elapsed 0.052 ms (5.159 ms / 100) 5.116 -> 5.114 ( -0.04%) [ +0.14% +0.00% +0.10% / -0.04% +0.33% +0.25%] index_select random_sorted : Elapsed 0.051 ms (5.123 ms / 100) 5.232 -> 5.209 ( -0.44%) [ +0.21% +0.00% +0.27% / +0.29% -0.44% -0.23%] index_select perm : Elapsed 0.052 ms (5.243 ms / 100) 5.209 -> 5.215 ( +0.12%) [ +0.04% +0.04% +0.00% / +0.12% +0.29% +0.46%] index_select perm_sorted : Elapsed 0.052 ms (5.211 ms / 100) B = [255, 512] (stride (512, 1)) A = [256, 512] (stride (1, 256)) dim = 0 5.131 -> 5.132 ( +0.02%) [ +0.00% +0.06% +0.04% / +0.16% +0.02% +0.04%] index_select const : Elapsed 0.051 ms (5.131 ms / 100) 5.434 -> 5.450 ( +0.29%) [ +0.26% +0.00% +0.07% / +0.29% +0.50% +0.79%] index_select wrap : Elapsed 0.054 ms (5.448 ms / 100) 5.439 -> 5.454 ( +0.28%) [ +0.18% +0.00% +0.04% / +0.28% +0.42% +0.51%] index_select linear : Elapsed 0.054 ms (5.449 ms / 100) 5.467 -> 5.465 ( -0.04%) [ +0.00% +0.02% +0.05% / -0.04% +0.60% +0.53%] index_select reverse : Elapsed 0.055 ms (5.467 ms / 100) 5.139 -> 5.140 ( +0.02%) [ +0.06% +0.00% +0.04% / +0.02% +0.35% +0.37%] index_select skip64 : Elapsed 0.051 ms (5.142 ms / 100) 5.107 -> 5.119 ( +0.23%) [ +0.23% +0.00% +0.23% / +0.23% +0.23% +0.27%] index_select skip256 : Elapsed 0.051 ms (5.119 ms / 100) 5.425 -> 5.451 ( +0.48%) [ +0.18% +0.00% +0.20% / +0.48% +0.65% +0.72%] index_select spread : Elapsed 0.054 ms (5.435 ms / 100) 5.746 -> 5.754 ( +0.14%) [ +0.17% +0.00% +0.21% / +0.50% +0.14% +0.49%] index_select strided 3 : Elapsed 0.058 ms (5.756 ms / 100) 5.745 -> 5.757 ( +0.21%) [ +0.19% +0.00% +0.16% / +0.54% +0.21% +0.33%] index_select strided 5 : Elapsed 0.058 ms (5.756 ms / 100) 5.715 -> 5.724 ( +0.16%) [ +0.10% +0.10% +0.00% / +0.16% +0.24% +0.54%] index_select strided 7 : Elapsed 0.057 ms (5.721 ms / 100) 5.720 -> 5.718 ( -0.03%) [ +0.12% +0.00% +0.31% / +0.23% -0.03% +0.14%] index_select strided 8 : Elapsed 0.057 ms (5.727 ms / 100) 5.593 -> 5.593 ( +0.00%) [ +0.02% +0.00% +0.16% / +0.00% +0.07% +0.14%] index_select strided 16 : Elapsed 0.056 ms (5.594 ms / 100) 5.208 -> 5.216 ( +0.15%) [ +0.31% +0.00% +0.21% / +0.15% +0.31% +0.17%] index_select strided 64 : Elapsed 0.052 ms (5.224 ms / 100) 5.717 -> 5.720 ( +0.05%) [ +0.12% +0.09% +0.00% / +0.05% +0.19% +0.16%] index_select strided 100 : Elapsed 0.057 ms (5.724 ms / 100) 5.466 -> 5.480 ( +0.26%) [ +0.00% +0.11% +0.13% / +0.26% +0.38% +0.59%] index_select strided 255 : Elapsed 0.055 ms (5.466 ms / 100) 5.744 -> 5.740 ( -0.07%) [ +0.00% +0.09% +0.38% / +0.03% -0.07% +0.24%] index_select random : Elapsed 0.057 ms (5.744 ms / 100) 5.463 -> 5.470 ( +0.13%) [ +0.24% +0.16% +0.00% / +0.13% +0.59% +0.68%] index_select random_sorted : Elapsed 0.055 ms (5.476 ms / 100) 5.726 -> 5.737 ( +0.19%) [ +0.03% +0.33% +0.00% / +0.24% +0.30% +0.19%] index_select perm : Elapsed 0.057 ms (5.728 ms / 100) 5.468 -> 5.484 ( +0.29%) [ +0.00% +0.04% +0.07% / +0.29% +0.73% +0.84%] index_select perm_sorted : Elapsed 0.055 ms (5.468 ms / 100) B = [255, 512] (stride (1, 255)) dim = 0 fill_cnt = 256 Good 3.132 -> 2.564 (-18.14%) [ +0.00% +0.06% +0.00% / -18.14% -17.85% -17.66%] index_fill_ const : Elapsed 0.031 ms (3.132 ms / 100) Good 3.178 -> 2.625 (-17.40%) [ +0.09% +0.00% +0.00% / -16.93% -17.40% -17.02%] index_fill_ linear : Elapsed 0.032 ms (3.181 ms / 100) Good 3.193 -> 2.628 (-17.69%) [ +0.19% +0.06% +0.00% / -17.63% -17.63% -17.69%] index_fill_ reverse : Elapsed 0.032 ms (3.199 ms / 100) Good 3.117 -> 2.565 (-17.71%) [ +0.22% +0.00% +0.35% / -17.71% -17.58% -17.07%] index_fill_ skip64 : Elapsed 0.031 ms (3.124 ms / 100) Good 3.156 -> 2.565 (-18.73%) [ +0.00% +0.38% +0.41% / -18.63% -18.73% -18.66%] index_fill_ skip256 : Elapsed 0.032 ms (3.156 ms / 100) Good 3.177 -> 2.630 (-17.22%) [ +0.03% +0.09% +0.00% / -17.19% -17.06% -17.22%] index_fill_ spread : Elapsed 0.032 ms (3.178 ms / 100) Good 3.430 -> 2.930 (-14.58%) [ +0.23% +0.41% +0.00% / -14.58% -14.34% -14.34%] index_fill_ strided 3 : Elapsed 0.034 ms (3.438 ms / 100) Good 3.487 -> 3.047 (-12.62%) [ +0.00% +0.00% +0.17% / -12.19% -12.62% -12.53%] index_fill_ strided 5 : Elapsed 0.035 ms (3.487 ms / 100) Good 3.318 -> 2.965 (-10.64%) [ +0.33% +0.33% +0.00% / -10.52% -10.55% -10.64%] index_fill_ strided 7 : Elapsed 0.033 ms (3.329 ms / 100) good 3.295 -> 2.995 ( -9.10%) [ +0.36% +0.06% +0.00% / -8.80% -9.10% -9.04%] index_fill_ strided 8 : Elapsed 0.033 ms (3.307 ms / 100) good 3.314 -> 3.060 ( -7.66%) [ +0.24% +0.18% +0.00% / -6.88% -7.66% -7.51%] index_fill_ strided 16 : Elapsed 0.033 ms (3.322 ms / 100) Good 3.239 -> 2.863 (-11.61%) [ +0.03% +0.00% +0.03% / -11.33% -11.61% -11.36%] index_fill_ strided 64 : Elapsed 0.032 ms (3.240 ms / 100) 3.489 -> 3.375 ( -3.27%) [ +0.29% +0.06% +0.00% / -3.27% -3.27% -3.27%] index_fill_ strided 100 : Elapsed 0.035 ms (3.499 ms / 100) good 3.500 -> 3.214 ( -8.17%) [ +0.31% +0.26% +0.00% / -7.63% -8.17% -8.00%] index_fill_ random : Elapsed 0.035 ms (3.511 ms / 100) Good 3.386 -> 2.831 (-16.39%) [ +0.09% +0.00% +0.03% / -16.27% -16.39% -16.33%] index_fill_ random_sorted : Elapsed 0.034 ms (3.389 ms / 100) B = [255, 512] (stride (1, 255)) A = [256, 512] (stride (512, 1)) dim = 0 good 5.287 -> 4.963 ( -6.13%) [ +0.19% +0.00% +0.09% / -5.86% -6.13% -6.05%] index_select const : Elapsed 0.053 ms (5.297 ms / 100) 5.343 -> 5.375 ( +0.60%) [ +0.00% +0.17% +0.11% / +0.60% +1.01% +1.18%] index_select wrap : Elapsed 0.053 ms (5.343 ms / 100) 5.346 -> 5.388 ( +0.79%) [ +0.22% +0.00% +0.22% / +0.79% +0.97% +0.80%] index_select linear : Elapsed 0.054 ms (5.358 ms / 100) 5.349 -> 5.380 ( +0.58%) [ +0.00% +0.26% +0.15% / +0.58% +0.95% +1.29%] index_select reverse : Elapsed 0.053 ms (5.349 ms / 100) good 5.311 -> 4.965 ( -6.51%) [ +0.00% +0.13% +0.26% / -6.46% -6.46% -6.51%] index_select skip64 : Elapsed 0.053 ms (5.311 ms / 100) good 5.278 -> 4.967 ( -5.89%) [ +0.00% +0.06% +0.11% / -5.89% -5.85% -5.68%] index_select skip256 : Elapsed 0.053 ms (5.278 ms / 100) 5.352 -> 5.403 ( +0.95%) [ +0.00% +0.02% +0.04% / +0.95% +0.99% +1.25%] index_select spread : Elapsed 0.054 ms (5.352 ms / 100) 5.382 -> 5.461 ( +1.47%) [ +0.02% +0.17% +0.00% / +1.47% +1.77% +1.77%] index_select strided 3 : Elapsed 0.054 ms (5.383 ms / 100) 5.352 -> 5.467 ( +2.15%) [ +0.13% +0.22% +0.00% / +2.20% +2.15% +2.41%] index_select strided 5 : Elapsed 0.054 ms (5.359 ms / 100) 5.357 -> 5.445 ( +1.64%) [ +0.04% +0.07% +0.00% / +1.64% +1.98% +2.13%] index_select strided 7 : Elapsed 0.054 ms (5.359 ms / 100) 5.329 -> 5.484 ( +2.91%) [ +0.08% +0.00% +0.00% / +3.27% +2.91% +3.02%] index_select strided 8 : Elapsed 0.053 ms (5.333 ms / 100) 5.307 -> 5.156 ( -2.85%) [ +0.24% +0.00% +0.19% / -2.85% -2.75% -2.66%] index_select strided 16 : Elapsed 0.053 ms (5.320 ms / 100) good 5.314 -> 4.967 ( -6.53%) [ +0.13% +0.00% +0.06% / -6.53% -6.19% -6.30%] index_select strided 64 : Elapsed 0.053 ms (5.321 ms / 100) 5.330 -> 5.197 ( -2.50%) [ +0.23% +0.00% +0.11% / -2.05% -2.50% -2.35%] index_select strided 100 : Elapsed 0.053 ms (5.342 ms / 100) 5.348 -> 5.421 ( +1.36%) [ +0.07% +0.00% +0.09% / +1.42% +1.36% +1.65%] index_select strided 255 : Elapsed 0.054 ms (5.352 ms / 100) 5.334 -> 5.313 ( -0.39%) [ +0.19% +0.02% +0.00% / +0.06% -0.39% -0.39%] index_select random : Elapsed 0.053 ms (5.344 ms / 100) 5.331 -> 5.264 ( -1.26%) [ +0.23% +0.17% +0.00% / -1.26% -1.26% -1.20%] index_select random_sorted : Elapsed 0.053 ms (5.343 ms / 100) 5.340 -> 5.441 ( +1.89%) [ +0.22% +0.00% +0.06% / +1.89% +2.25% +2.27%] index_select perm : Elapsed 0.054 ms (5.352 ms / 100) 5.343 -> 5.383 ( +0.75%) [ +0.30% +0.02% +0.00% / +0.75% +1.27% +1.38%] index_select perm_sorted : Elapsed 0.054 ms (5.359 ms / 100) B = [255, 512] (stride (1, 255)) A = [256, 512] (stride (1, 256)) dim = 0 Good 5.673 -> 4.979 (-12.23%) [ +0.12% +0.12% +0.00% / -12.18% -12.23% -12.16%] index_select const : Elapsed 0.057 ms (5.680 ms / 100) good 5.747 -> 5.221 ( -9.15%) [ +0.00% +0.31% +0.07% / -9.15% -8.82% -8.80%] index_select wrap : Elapsed 0.057 ms (5.747 ms / 100) good 5.758 -> 5.226 ( -9.24%) [ +0.14% +0.07% +0.00% / -9.24% -8.91% -8.74%] index_select linear : Elapsed 0.058 ms (5.766 ms / 100) good 5.793 -> 5.231 ( -9.70%) [ +0.00% +0.07% +0.10% / -9.70% -9.48% -9.49%] index_select reverse : Elapsed 0.058 ms (5.793 ms / 100) Good 5.678 -> 5.078 (-10.57%) [ +0.28% +0.14% +0.00% / -10.51% -10.57% -10.53%] index_select skip64 : Elapsed 0.057 ms (5.694 ms / 100) Good 5.667 -> 4.974 (-12.23%) [ +0.00% +0.12% +0.02% / -12.02% -12.23% -11.82%] index_select skip256 : Elapsed 0.057 ms (5.667 ms / 100) good 5.759 -> 5.220 ( -9.36%) [ +0.12% +0.00% +0.16% / -8.99% -9.26% -9.36%] index_select spread : Elapsed 0.058 ms (5.766 ms / 100) Good 6.088 -> 5.185 (-14.83%) [ +0.00% +0.03% +0.11% / -14.83% -14.37% -14.29%] index_select strided 3 : Elapsed 0.061 ms (6.088 ms / 100) Good 6.108 -> 5.187 (-15.08%) [ +0.20% +0.00% +0.23% / -15.08% -14.78% -14.73%] index_select strided 5 : Elapsed 0.061 ms (6.120 ms / 100) Good 6.124 -> 5.205 (-15.01%) [ +0.15% +0.00% +0.10% / -15.01% -14.71% -14.53%] index_select strided 7 : Elapsed 0.061 ms (6.133 ms / 100) Good 6.121 -> 5.203 (-15.00%) [ +0.00% +0.02% +0.05% / -14.92% -15.00% -14.67%] index_select strided 8 : Elapsed 0.061 ms (6.121 ms / 100) Good 5.982 -> 5.126 (-14.31%) [ +0.12% +0.00% +0.43% / -14.31% -14.06% -13.91%] index_select strided 16 : Elapsed 0.060 ms (5.989 ms / 100) Good 5.727 -> 5.037 (-12.05%) [ +0.30% +0.16% +0.00% / -12.05% -11.63% -11.63%] index_select strided 64 : Elapsed 0.057 ms (5.744 ms / 100) Good 6.128 -> 5.234 (-14.59%) [ +0.16% +0.00% +0.34% / -14.59% -14.52% -14.44%] index_select strided 100 : Elapsed 0.061 ms (6.138 ms / 100) good 5.802 -> 5.239 ( -9.70%) [ +0.10% +0.00% +0.16% / -9.70% -9.46% -9.12%] index_select strided 255 : Elapsed 0.058 ms (5.808 ms / 100) Good 6.109 -> 5.244 (-14.16%) [ +0.08% +0.11% +0.00% / -14.16% -14.08% -14.01%] index_select random : Elapsed 0.061 ms (6.114 ms / 100) good 5.788 -> 5.219 ( -9.83%) [ +0.31% +0.03% +0.00% / -9.83% -9.45% -9.14%] index_select random_sorted : Elapsed 0.058 ms (5.806 ms / 100) Good 6.112 -> 5.223 (-14.55%) [ +0.20% +0.00% +0.07% / -14.50% -14.55% -14.30%] index_select perm : Elapsed 0.061 ms (6.124 ms / 100) good 5.780 -> 5.222 ( -9.65%) [ +0.10% +0.00% +0.05% / -9.65% -9.46% -9.29%] index_select perm_sorted : Elapsed 0.058 ms (5.786 ms / 100) out_shape = [256, 255] in_shape = [256, 512] idx_dim = 1 B = [256, 255] (stride (255, 1)) dim = 1 fill_cnt = 512 Good 3.199 -> 2.573 (-19.57%) [ +0.06% +0.25% +0.00% / -19.57% -19.47% -19.13%] index_fill_ const : Elapsed 0.032 ms (3.201 ms / 100) Good 3.177 -> 2.597 (-18.26%) [ +0.22% +0.09% +0.00% / -18.00% -18.26% -17.72%] index_fill_ linear : Elapsed 0.032 ms (3.184 ms / 100) Good 3.124 -> 2.596 (-16.90%) [ +0.48% +0.00% +0.22% / -16.71% -16.65% -16.90%] index_fill_ reverse : Elapsed 0.031 ms (3.139 ms / 100) Good 3.158 -> 2.572 (-18.56%) [ +0.00% +0.38% +0.22% / -18.43% -18.40% -18.56%] index_fill_ skip64 : Elapsed 0.032 ms (3.158 ms / 100) Good 3.185 -> 2.573 (-19.22%) [ +0.00% +0.19% +0.38% / -19.03% -19.22% -19.18%] index_fill_ skip256 : Elapsed 0.032 ms (3.185 ms / 100) Good 3.143 -> 2.590 (-17.59%) [ +0.00% +0.06% +0.16% / -17.59% -17.28% -16.89%] index_fill_ spread : Elapsed 0.031 ms (3.143 ms / 100) Good 3.287 -> 2.792 (-15.06%) [ +0.33% +0.00% +0.27% / -14.82% -15.06% -15.00%] index_fill_ strided 3 : Elapsed 0.033 ms (3.298 ms / 100) Good 3.300 -> 2.924 (-11.39%) [ +0.00% +0.30% +0.12% / -11.12% -11.39% -11.36%] index_fill_ strided 5 : Elapsed 0.033 ms (3.300 ms / 100) good 3.195 -> 2.928 ( -8.36%) [ +0.00% +0.19% +0.31% / -8.17% -8.36% -7.92%] index_fill_ strided 7 : Elapsed 0.032 ms (3.195 ms / 100) good 3.190 -> 2.979 ( -6.61%) [ +0.09% +0.00% +0.22% / -6.61% -6.27% -6.36%] index_fill_ strided 8 : Elapsed 0.032 ms (3.193 ms / 100) 3.189 -> 3.057 ( -4.14%) [ +0.25% +0.19% +0.00% / -3.92% -4.11% -4.14%] index_fill_ strided 16 : Elapsed 0.032 ms (3.197 ms / 100) Good 3.182 -> 2.854 (-10.31%) [ +0.00% +0.03% +0.06% / -10.28% -10.31% -10.25%] index_fill_ strided 64 : Elapsed 0.032 ms (3.182 ms / 100) 3.301 -> 3.253 ( -1.45%) [ +0.00% +0.15% +0.15% / -1.30% -1.45% -1.45%] index_fill_ strided 100 : Elapsed 0.033 ms (3.301 ms / 100) good 3.289 -> 3.111 ( -5.41%) [ +0.24% +0.00% +0.09% / -5.41% -5.38% -5.32%] index_fill_ random : Elapsed 0.033 ms (3.297 ms / 100) Good 3.225 -> 2.693 (-16.50%) [ +0.03% +0.06% +0.00% / -16.50% -16.37% -16.43%] index_fill_ random_sorted : Elapsed 0.032 ms (3.226 ms / 100) B = [256, 255] (stride (255, 1)) A = [256, 512] (stride (512, 1)) dim = 1 good 4.928 -> 4.643 ( -5.78%) [ +0.30% +0.47% +0.00% / -5.26% -5.78% -5.64%] index_select const : Elapsed 0.049 ms (4.943 ms / 100) 5.034 -> 4.800 ( -4.65%) [ +0.04% +0.00% +0.04% / -4.65% -4.07% -4.17%] index_select wrap : Elapsed 0.050 ms (5.036 ms / 100) 5.049 -> 4.822 ( -4.50%) [ +0.00% +0.20% +0.02% / -4.16% -4.50% -4.42%] index_select linear : Elapsed 0.050 ms (5.049 ms / 100) 5.037 -> 4.837 ( -3.97%) [ +0.00% +0.32% +0.30% / -3.97% -3.95% -3.91%] index_select reverse : Elapsed 0.050 ms (5.037 ms / 100) good 4.985 -> 4.719 ( -5.34%) [ +0.08% +0.00% +0.06% / -5.24% -5.32% -5.34%] index_select skip64 : Elapsed 0.050 ms (4.989 ms / 100) good 4.939 -> 4.651 ( -5.83%) [ +0.18% +0.00% +0.26% / -5.83% -5.57% -5.41%] index_select skip256 : Elapsed 0.049 ms (4.948 ms / 100) 5.150 -> 4.919 ( -4.49%) [ +0.00% +0.31% +0.23% / -4.12% -4.49% -4.43%] index_select spread : Elapsed 0.051 ms (5.150 ms / 100) good 5.267 -> 4.942 ( -6.17%) [ +0.02% +0.00% +0.11% / -6.17% -6.08% -6.17%] index_select strided 3 : Elapsed 0.053 ms (5.268 ms / 100) good 5.350 -> 4.939 ( -7.68%) [ +0.22% +0.06% +0.00% / -7.55% -7.68% -7.50%] index_select strided 5 : Elapsed 0.054 ms (5.362 ms / 100) good 5.386 -> 4.951 ( -8.08%) [ +0.02% +0.00% +0.09% / -8.08% -7.95% -7.76%] index_select strided 7 : Elapsed 0.054 ms (5.387 ms / 100) good 5.404 -> 4.960 ( -8.22%) [ +0.00% +0.28% +0.17% / -7.90% -8.22% -7.88%] index_select strided 8 : Elapsed 0.054 ms (5.404 ms / 100) good 5.256 -> 4.870 ( -7.34%) [ +0.08% +0.00% +0.15% / -7.34% -7.19% -7.13%] index_select strided 16 : Elapsed 0.053 ms (5.260 ms / 100) good 5.034 -> 4.739 ( -5.86%) [ +0.00% +0.10% +0.08% / -5.86% -5.36% -5.46%] index_select strided 64 : Elapsed 0.050 ms (5.034 ms / 100) good 5.406 -> 4.988 ( -7.73%) [ +0.24% +0.00% +0.22% / -7.42% -7.73% -7.71%] index_select strided 100 : Elapsed 0.054 ms (5.419 ms / 100) 5.207 -> 4.958 ( -4.78%) [ +0.27% +0.00% +0.40% / -4.69% -4.78% -4.63%] index_select strided 255 : Elapsed 0.052 ms (5.221 ms / 100) good 4.959 -> 4.665 ( -5.93%) [ +0.08% +0.04% +0.00% / -5.61% -5.93% -5.83%] index_select strided 256 : Elapsed 0.050 ms (4.963 ms / 100) 5.192 -> 4.955 ( -4.56%) [ +0.00% +0.13% +0.06% / -4.56% -4.51% -4.51%] index_select strided 257 : Elapsed 0.052 ms (5.192 ms / 100) good 5.372 -> 5.029 ( -6.38%) [ +0.00% +0.13% +0.04% / -6.38% -6.25% -6.18%] index_select random : Elapsed 0.054 ms (5.372 ms / 100) 5.158 -> 4.932 ( -4.38%) [ +0.56% +0.00% +0.33% / -3.92% -4.21% -4.38%] index_select random_sorted : Elapsed 0.052 ms (5.187 ms / 100) good 5.395 -> 5.022 ( -6.91%) [ +0.30% +0.00% +0.54% / -6.67% -6.86% -6.91%] index_select perm : Elapsed 0.054 ms (5.411 ms / 100) 5.159 -> 4.930 ( -4.44%) [ +0.00% +0.02% +0.29% / -4.44% -4.13% -4.01%] index_select perm_sorted : Elapsed 0.052 ms (5.159 ms / 100) B = [256, 255] (stride (255, 1)) A = [256, 512] (stride (1, 256)) dim = 1 4.786 -> 4.635 ( -3.16%) [ +0.02% +0.00% +0.15% / -2.82% -3.16% -2.97%] index_select const : Elapsed 0.048 ms (4.787 ms / 100) 4.844 -> 4.903 ( +1.22%) [ +0.06% +0.00% +0.06% / +1.22% +1.42% +1.36%] index_select wrap : Elapsed 0.048 ms (4.847 ms / 100) 4.863 -> 4.913 ( +1.03%) [ +0.00% +0.08% +0.14% / +1.17% +1.11% +1.03%] index_select linear : Elapsed 0.049 ms (4.863 ms / 100) 4.849 -> 4.915 ( +1.36%) [ +0.14% +0.00% +0.25% / +1.40% +1.38% +1.36%] index_select reverse : Elapsed 0.049 ms (4.856 ms / 100) 4.807 -> 4.657 ( -3.12%) [ +0.00% +0.12% +0.04% / -2.66% -3.12% -2.56%] index_select skip64 : Elapsed 0.048 ms (4.807 ms / 100) 4.789 -> 4.649 ( -2.92%) [ +0.00% +0.17% +0.10% / -2.92% -2.80% -2.82%] index_select skip256 : Elapsed 0.048 ms (4.789 ms / 100) 4.854 -> 4.901 ( +0.97%) [ +0.00% +0.06% +0.10% / +1.07% +1.38% +0.97%] index_select spread : Elapsed 0.049 ms (4.854 ms / 100) 4.840 -> 4.947 ( +2.21%) [ +0.04% +0.00% +0.37% / +2.21% +2.77% +3.26%] index_select strided 3 : Elapsed 0.048 ms (4.842 ms / 100) 4.839 -> 4.969 ( +2.69%) [ +0.00% +0.00% +0.04% / +2.73% +2.71% +2.69%] index_select strided 5 : Elapsed 0.048 ms (4.839 ms / 100) 4.849 -> 4.964 ( +2.37%) [ +0.00% +0.00% +0.35% / +2.37% +2.43% +2.43%] index_select strided 7 : Elapsed 0.048 ms (4.849 ms / 100) 4.826 -> 4.735 ( -1.89%) [ +0.00% +0.00% +0.00% / -1.12% -1.89% -1.51%] index_select strided 8 : Elapsed 0.048 ms (4.826 ms / 100) 4.794 -> 4.719 ( -1.56%) [ +0.08% +0.08% +0.00% / -1.56% -1.06% -0.86%] index_select strided 16 : Elapsed 0.048 ms (4.798 ms / 100) 4.764 -> 4.668 ( -2.02%) [ +0.00% +0.48% +0.25% / -2.02% -1.68% -1.66%] index_select strided 64 : Elapsed 0.048 ms (4.764 ms / 100) 4.832 -> 4.781 ( -1.06%) [ +0.08% +0.00% +0.23% / -0.43% -1.06% -0.43%] index_select strided 100 : Elapsed 0.048 ms (4.836 ms / 100) 4.847 -> 4.931 ( +1.73%) [ +0.02% +0.02% +0.00% / +1.77% +1.73% +1.77%] index_select strided 255 : Elapsed 0.048 ms (4.848 ms / 100) 4.792 -> 4.635 ( -3.28%) [ +0.00% +0.17% +0.27% / -2.98% -3.13% -3.28%] index_select strided 256 : Elapsed 0.048 ms (4.792 ms / 100) 4.847 -> 4.923 ( +1.57%) [ +0.17% +0.00% +0.14% / +1.57% +1.73% +2.04%] index_select strided 257 : Elapsed 0.049 ms (4.855 ms / 100) 4.812 -> 4.854 ( +0.87%) [ +0.08% +0.00% +0.21% / +0.87% +1.81% +1.70%] index_select random : Elapsed 0.048 ms (4.816 ms / 100) 4.809 -> 4.850 ( +0.85%) [ +0.10% +0.17% +0.00% / +0.85% +1.56% +1.98%] index_select random_sorted : Elapsed 0.048 ms (4.814 ms / 100) 4.869 -> 4.941 ( +1.48%) [ +0.00% +0.14% +0.27% / +1.48% +2.12% +2.22%] index_select perm : Elapsed 0.049 ms (4.869 ms / 100) 4.843 -> 4.920 ( +1.59%) [ +0.06% +0.00% +0.17% / +1.59% +2.58% +2.52%] index_select perm_sorted : Elapsed 0.048 ms (4.846 ms / 100) B = [256, 255] (stride (1, 256)) dim = 1 fill_cnt = 512 2.633 -> 2.626 ( -0.27%) [ +0.00% +0.00% +0.08% / -0.27% +0.34% +0.61%] index_fill_ const : Elapsed 0.026 ms (2.633 ms / 100) 2.664 -> 2.667 ( +0.11%) [ +0.23% +0.53% +0.00% / +0.11% +0.26% +0.11%] index_fill_ linear : Elapsed 0.027 ms (2.670 ms / 100) 2.640 -> 2.652 ( +0.45%) [ +0.00% +0.61% +0.11% / +0.68% +0.49% +0.45%] index_fill_ reverse : Elapsed 0.026 ms (2.640 ms / 100) 2.592 -> 2.583 ( -0.35%) [ +0.50% +0.00% +0.00% / +1.00% -0.12% -0.35%] index_fill_ skip64 : Elapsed 0.026 ms (2.605 ms / 100) 2.630 -> 2.621 ( -0.34%) [ +0.23% +0.19% +0.00% / -0.27% -0.34% -0.08%] index_fill_ skip256 : Elapsed 0.026 ms (2.636 ms / 100) 2.599 -> 2.601 ( +0.08%) [ +0.23% +0.19% +0.00% / +0.08% +0.15% +0.46%] index_fill_ spread : Elapsed 0.026 ms (2.605 ms / 100) 2.611 -> 2.597 ( -0.54%) [ +0.00% +0.46% +0.04% / +0.34% -0.54% +0.08%] index_fill_ strided 3 : Elapsed 0.026 ms (2.611 ms / 100) 2.600 -> 2.587 ( -0.50%) [ +0.27% +0.00% +0.00% / -0.35% -0.42% -0.50%] index_fill_ strided 5 : Elapsed 0.026 ms (2.607 ms / 100) 2.608 -> 2.604 ( -0.15%) [ +0.61% +0.00% +0.46% / -0.15% +1.23% +0.92%] index_fill_ strided 7 : Elapsed 0.026 ms (2.624 ms / 100) 2.608 -> 2.601 ( -0.27%) [ +0.00% +0.08% +0.12% / -0.04% -0.23% -0.27%] index_fill_ strided 8 : Elapsed 0.026 ms (2.608 ms / 100) 2.613 -> 2.613 ( +0.00%) [ +0.27% +0.00% +0.08% / +0.00% +0.77% +1.26%] index_fill_ strided 16 : Elapsed 0.026 ms (2.620 ms / 100) 2.601 -> 2.597 ( -0.15%) [ +0.35% +0.00% +0.23% / -0.15% +0.92% +0.96%] index_fill_ strided 64 : Elapsed 0.026 ms (2.610 ms / 100) 2.622 -> 2.587 ( -1.33%) [ +0.08% +0.04% +0.00% / -0.99% -1.26% -1.33%] index_fill_ strided 100 : Elapsed 0.026 ms (2.624 ms / 100) 2.606 -> 2.594 ( -0.46%) [ +0.31% +0.08% +0.00% / -0.08% -0.46% -0.46%] index_fill_ random : Elapsed 0.026 ms (2.614 ms / 100) 2.619 -> 2.616 ( -0.11%) [ +0.27% +0.00% +0.04% / -0.11% +0.50% +0.61%] index_fill_ random_sorted : Elapsed 0.026 ms (2.626 ms / 100) B = [256, 255] (stride (1, 256)) A = [256, 512] (stride (512, 1)) dim = 1 4.721 -> 4.714 ( -0.15%) [ +0.00% +0.28% +0.19% / +0.04% -0.15% +0.30%] index_select const : Elapsed 0.047 ms (4.721 ms / 100) 4.911 -> 4.914 ( +0.06%) [ +0.47% +0.26% +0.00% / +0.31% +0.06% +0.18%] index_select wrap : Elapsed 0.049 ms (4.934 ms / 100) 4.916 -> 4.909 ( -0.14%) [ +0.31% +0.08% +0.00% / -0.14% +0.14% +0.26%] index_select linear : Elapsed 0.049 ms (4.931 ms / 100) 4.912 -> 4.923 ( +0.22%) [ +0.12% +0.04% +0.00% / +0.22% +0.22% +0.69%] index_select reverse : Elapsed 0.049 ms (4.918 ms / 100) 4.748 -> 4.749 ( +0.02%) [ +0.00% +0.02% +0.04% / +0.02% +0.11% +0.27%] index_select skip64 : Elapsed 0.047 ms (4.748 ms / 100) 4.713 -> 4.706 ( -0.15%) [ +0.00% +0.04% +0.08% / -0.04% -0.15% +0.21%] index_select skip256 : Elapsed 0.047 ms (4.713 ms / 100) 5.114 -> 5.119 ( +0.10%) [ +0.00% +0.16% +0.23% / +0.10% +0.20% +0.33%] index_select spread : Elapsed 0.051 ms (5.114 ms / 100) 5.256 -> 5.250 ( -0.11%) [ +0.00% +0.19% +0.46% / +0.25% +0.02% -0.11%] index_select strided 3 : Elapsed 0.053 ms (5.256 ms / 100) 5.269 -> 5.278 ( +0.17%) [ +0.09% +0.34% +0.00% / +0.36% +0.17% +0.25%] index_select strided 5 : Elapsed 0.053 ms (5.274 ms / 100) 5.281 -> 5.294 ( +0.25%) [ +0.00% +0.11% +0.32% / +0.25% +0.40% +0.30%] index_select strided 7 : Elapsed 0.053 ms (5.281 ms / 100) 5.280 -> 5.280 ( +0.00%) [ +0.15% +0.09% +0.00% / +0.00% +0.25% +0.11%] index_select strided 8 : Elapsed 0.053 ms (5.288 ms / 100) 5.142 -> 5.141 ( -0.02%) [ +0.00% +0.29% +0.21% / +0.25% -0.02% +0.54%] index_select strided 16 : Elapsed 0.051 ms (5.142 ms / 100) 4.849 -> 4.842 ( -0.14%) [ +0.12% +0.06% +0.00% / +0.06% -0.02% -0.14%] index_select strided 64 : Elapsed 0.049 ms (4.855 ms / 100) 5.309 -> 5.296 ( -0.24%) [ +0.21% +0.00% +0.32% / +0.11% -0.24% +0.09%] index_select strided 100 : Elapsed 0.053 ms (5.320 ms / 100) 5.151 -> 5.155 ( +0.08%) [ +0.00% +0.06% +0.06% / +0.08% +0.43% +0.74%] index_select strided 255 : Elapsed 0.052 ms (5.151 ms / 100) 4.726 -> 4.735 ( +0.19%) [ +0.00% +0.08% +0.19% / +0.19% +0.23% +0.21%] index_select strided 256 : Elapsed 0.047 ms (4.726 ms / 100) 5.154 -> 5.145 ( -0.17%) [ +0.35% +0.00% +0.25% / +0.19% -0.17% +0.08%] index_select strided 257 : Elapsed 0.052 ms (5.172 ms / 100) 5.287 -> 5.300 ( +0.25%) [ +0.42% +0.17% +0.00% / +0.32% +0.25% +0.53%] index_select random : Elapsed 0.053 ms (5.309 ms / 100) 5.117 -> 5.110 ( -0.14%) [ +0.35% +0.00% +0.14% / +0.02% -0.14% +0.29%] index_select random_sorted : Elapsed 0.051 ms (5.135 ms / 100) 5.278 -> 5.280 ( +0.04%) [ +0.00% +0.45% +0.34% / +0.04% +0.61% +0.74%] index_select perm : Elapsed 0.053 ms (5.278 ms / 100) 5.105 -> 5.107 ( +0.04%) [ +0.20% +0.04% +0.00% / +0.04% +0.10% +0.16%] index_select perm_sorted : Elapsed 0.051 ms (5.115 ms / 100) B = [256, 255] (stride (1, 256)) A = [256, 512] (stride (1, 256)) dim = 1 4.619 -> 4.616 ( -0.06%) [ +0.32% +0.00% +0.19% / -0.06% +1.26% +1.19%] index_select const : Elapsed 0.046 ms (4.634 ms / 100) 4.780 -> 4.780 ( +0.00%) [ +0.00% +0.02% +0.06% / +0.00% +0.21% +0.40%] index_select wrap : Elapsed 0.048 ms (4.780 ms / 100) 4.783 -> 4.777 ( -0.13%) [ +0.15% +0.00% +0.06% / +0.13% -0.13% +0.10%] index_select linear : Elapsed 0.048 ms (4.790 ms / 100) 4.775 -> 4.775 ( +0.00%) [ +0.25% +0.00% +0.10% / +0.13% +0.00% +0.04%] index_select reverse : Elapsed 0.048 ms (4.787 ms / 100) 4.633 -> 4.631 ( -0.04%) [ +0.00% +0.11% +0.04% / +0.26% -0.04% +0.26%] index_select skip64 : Elapsed 0.046 ms (4.633 ms / 100) 4.622 -> 4.621 ( -0.02%) [ +0.00% +0.19% +0.17% / -0.02% +1.21% +1.45%] index_select skip256 : Elapsed 0.046 ms (4.622 ms / 100) 4.810 -> 4.788 ( -0.46%) [ +0.00% +0.08% +0.23% / +0.29% -0.46% -0.25%] index_select spread : Elapsed 0.048 ms (4.810 ms / 100) 4.776 -> 4.772 ( -0.08%) [ +0.27% +0.00% +0.08% / +0.40% -0.08% +0.02%] index_select strided 3 : Elapsed 0.048 ms (4.789 ms / 100) 4.770 -> 4.753 ( -0.36%) [ +0.00% +0.10% +0.08% / +0.38% -0.36% +0.06%] index_select strided 5 : Elapsed 0.048 ms (4.770 ms / 100) 4.788 -> 4.778 ( -0.21%) [ +0.04% +0.08% +0.00% / -0.21% -0.21% +0.06%] index_select strided 7 : Elapsed 0.048 ms (4.790 ms / 100) 4.663 -> 4.652 ( -0.24%) [ +0.17% +0.00% +0.06% / -0.24% +0.17% +0.41%] index_select strided 8 : Elapsed 0.047 ms (4.671 ms / 100) 4.679 -> 4.658 ( -0.45%) [ +0.17% +0.00% +0.04% / +0.11% -0.45% -0.38%] index_select strided 16 : Elapsed 0.047 ms (4.687 ms / 100) 4.635 -> 4.624 ( -0.24%) [ +0.02% +0.00% +0.02% / -0.09% -0.24% +0.22%] index_select strided 64 : Elapsed 0.046 ms (4.636 ms / 100) 4.692 -> 4.703 ( +0.23%) [ +0.19% +0.00% +0.38% / +0.23% +1.11% +1.11%] index_select strided 100 : Elapsed 0.047 ms (4.701 ms / 100) 4.810 -> 4.771 ( -0.81%) [ +0.15% +0.00% +0.17% / +0.02% -0.62% -0.81%] index_select strided 255 : Elapsed 0.048 ms (4.817 ms / 100) 4.614 -> 4.610 ( -0.09%) [ +0.00% +0.15% +0.17% / -0.09% +0.52% +0.82%] index_select strided 256 : Elapsed 0.046 ms (4.614 ms / 100) 4.767 -> 4.769 ( +0.04%) [ +0.52% +0.00% +0.06% / +0.21% +0.04% +0.21%] index_select strided 257 : Elapsed 0.048 ms (4.792 ms / 100) 4.759 -> 4.763 ( +0.08%) [ +0.00% +0.13% +0.00% / +0.08% +0.17% +0.32%] index_select random : Elapsed 0.048 ms (4.759 ms / 100) 4.757 -> 4.754 ( -0.06%) [ +0.21% +0.23% +0.00% / -0.06% +0.42% +0.67%] index_select random_sorted : Elapsed 0.048 ms (4.767 ms / 100) 4.762 -> 4.774 ( +0.25%) [ +0.25% +0.00% +0.19% / +0.25% +0.50% +0.55%] index_select perm : Elapsed 0.048 ms (4.774 ms / 100) 4.760 -> 4.763 ( +0.06%) [ +0.00% +0.15% +0.17% / +0.27% +0.38% +0.06%] index_select perm_sorted : Elapsed 0.048 ms (4.760 ms / 100) out_shape = [255, 256] in_shape = [512, 256] idx_dim = 0 B = [255, 256] (stride (256, 1)) dim = 0 fill_cnt = 512 2.588 -> 2.588 ( +0.00%) [ +0.00% +0.04% +0.00% / +0.00% +0.15% +0.04%] index_fill_ const : Elapsed 0.026 ms (2.588 ms / 100) 2.655 -> 2.621 ( -1.28%) [ +0.19% +0.00% +0.30% / -0.68% -1.28% -0.79%] index_fill_ linear : Elapsed 0.027 ms (2.660 ms / 100) 2.624 -> 2.616 ( -0.30%) [ +0.08% +0.00% +0.27% / -0.27% -0.11% -0.30%] index_fill_ reverse : Elapsed 0.026 ms (2.626 ms / 100) 2.589 -> 2.585 ( -0.15%) [ +0.23% +0.42% +0.00% / +0.19% +0.66% -0.15%] index_fill_ skip64 : Elapsed 0.026 ms (2.595 ms / 100) 2.589 -> 2.584 ( -0.19%) [ +0.00% +0.04% +0.19% / -0.19% +0.08% +0.08%] index_fill_ skip256 : Elapsed 0.026 ms (2.589 ms / 100) 2.616 -> 2.590 ( -0.99%) [ +0.65% +0.04% +0.00% / -0.54% -0.99% -0.23%] index_fill_ spread : Elapsed 0.026 ms (2.633 ms / 100) 2.598 -> 2.588 ( -0.38%) [ +0.31% +0.00% +0.00% / -0.38% -0.38% -0.23%] index_fill_ strided 3 : Elapsed 0.026 ms (2.606 ms / 100) 2.600 -> 2.594 ( -0.23%) [ +0.19% +0.00% +0.08% / -0.15% -0.23% -0.19%] index_fill_ strided 5 : Elapsed 0.026 ms (2.605 ms / 100) 2.617 -> 2.605 ( -0.46%) [ +0.04% +0.38% +0.00% / -0.04% -0.46% -0.27%] index_fill_ strided 7 : Elapsed 0.026 ms (2.618 ms / 100) 2.607 -> 2.598 ( -0.35%) [ +0.08% +0.00% +0.15% / -0.35% +0.31% +0.15%] index_fill_ strided 8 : Elapsed 0.026 ms (2.609 ms / 100) 2.610 -> 2.600 ( -0.38%) [ +0.27% +0.00% +0.27% / -0.38% -0.11% +0.15%] index_fill_ strided 16 : Elapsed 0.026 ms (2.617 ms / 100) 2.610 -> 2.600 ( -0.38%) [ +0.42% +0.50% +0.00% / +0.11% -0.38% +0.04%] index_fill_ strided 64 : Elapsed 0.026 ms (2.621 ms / 100) 2.601 -> 2.590 ( -0.42%) [ +0.23% +0.15% +0.00% / -0.08% -0.42% -0.35%] index_fill_ strided 100 : Elapsed 0.026 ms (2.607 ms / 100) 2.604 -> 2.599 ( -0.19%) [ +0.54% +0.35% +0.00% / -0.19% +0.58% +0.61%] index_fill_ random : Elapsed 0.026 ms (2.618 ms / 100) 2.617 -> 2.597 ( -0.76%) [ +0.31% +0.00% +0.38% / -0.42% -0.76% -0.34%] index_fill_ random_sorted : Elapsed 0.026 ms (2.625 ms / 100) B = [255, 256] (stride (256, 1)) A = [512, 256] (stride (256, 1)) dim = 0 4.620 -> 4.617 ( -0.06%) [ +0.00% +0.37% +0.24% / +0.19% -0.06% +0.11%] index_select const : Elapsed 0.046 ms (4.620 ms / 100) 4.782 -> 4.773 ( -0.19%) [ +0.00% +0.17% +0.08% / -0.02% -0.13% -0.19%] index_select wrap : Elapsed 0.048 ms (4.782 ms / 100) 4.804 -> 4.775 ( -0.60%) [ +0.06% +0.00% +0.04% / -0.08% -0.56% -0.60%] index_select linear : Elapsed 0.048 ms (4.807 ms / 100) 4.783 -> 4.786 ( +0.06%) [ +0.06% +0.00% +0.08% / +0.17% +0.06% +0.38%] index_select reverse : Elapsed 0.048 ms (4.786 ms / 100) 4.655 -> 4.626 ( -0.62%) [ +0.15% +0.00% +0.28% / +0.06% -0.62% -0.37%] index_select skip64 : Elapsed 0.047 ms (4.662 ms / 100) 4.620 -> 4.636 ( +0.35%) [ +0.00% +0.11% +0.39% / +0.35% +0.50% +0.87%] index_select skip256 : Elapsed 0.046 ms (4.620 ms / 100) 4.776 -> 4.785 ( +0.19%) [ +0.00% +0.21% +0.02% / +0.40% +0.19% +0.36%] index_select spread : Elapsed 0.048 ms (4.776 ms / 100) 4.785 -> 4.757 ( -0.59%) [ +0.21% +0.17% +0.00% / +0.27% -0.44% -0.59%] index_select strided 3 : Elapsed 0.048 ms (4.795 ms / 100) 4.765 -> 4.771 ( +0.13%) [ +0.34% +0.00% +0.08% / +0.13% +0.76% +0.80%] index_select strided 5 : Elapsed 0.048 ms (4.781 ms / 100) 4.776 -> 4.778 ( +0.04%) [ +0.00% +0.04% +0.17% / +0.04% +0.04% +0.52%] index_select strided 7 : Elapsed 0.048 ms (4.776 ms / 100) 4.678 -> 4.658 ( -0.43%) [ +0.06% +0.06% +0.00% / -0.06% -0.43% -0.11%] index_select strided 8 : Elapsed 0.047 ms (4.681 ms / 100) 4.644 -> 4.649 ( +0.11%) [ +0.00% +0.13% +0.22% / +0.11% +0.32% +0.30%] index_select strided 16 : Elapsed 0.046 ms (4.644 ms / 100) 4.636 -> 4.621 ( -0.32%) [ +0.04% +0.00% +0.00% / -0.32% +0.24% +0.50%] index_select strided 64 : Elapsed 0.046 ms (4.638 ms / 100) 4.717 -> 4.704 ( -0.28%) [ +0.00% +0.19% +0.32% / -0.28% -0.15% -0.06%] index_select strided 100 : Elapsed 0.047 ms (4.717 ms / 100) 4.774 -> 4.767 ( -0.15%) [ +0.08% +0.44% +0.00% / -0.15% +0.06% +0.27%] index_select strided 255 : Elapsed 0.048 ms (4.778 ms / 100) 4.618 -> 4.629 ( +0.24%) [ +0.19% +0.00% +0.24% / +0.30% +0.24% +0.43%] index_select strided 256 : Elapsed 0.046 ms (4.627 ms / 100) 4.775 -> 4.781 ( +0.13%) [ +0.00% +0.00% +0.10% / +0.29% +0.13% +0.19%] index_select strided 257 : Elapsed 0.048 ms (4.775 ms / 100) 4.742 -> 4.755 ( +0.27%) [ +0.00% +0.44% +0.27% / +0.27% +0.27% +0.55%] index_select random : Elapsed 0.047 ms (4.742 ms / 100) 4.758 -> 4.759 ( +0.02%) [ +0.08% +0.19% +0.00% / +0.02% +0.46% +0.48%] index_select random_sorted : Elapsed 0.048 ms (4.762 ms / 100) 4.826 -> 4.783 ( -0.89%) [ +0.00% +0.06% +0.02% / -0.19% -0.79% -0.89%] index_select perm : Elapsed 0.048 ms (4.826 ms / 100) 4.785 -> 4.801 ( +0.33%) [ +0.00% +0.25% +0.29% / +0.33% +0.46% +0.44%] index_select perm_sorted : Elapsed 0.048 ms (4.785 ms / 100) B = [255, 256] (stride (256, 1)) A = [512, 256] (stride (1, 512)) dim = 0 4.705 -> 4.709 ( +0.09%) [ +0.23% +0.13% +0.00% / +0.15% +0.09% +0.32%] index_select const : Elapsed 0.047 ms (4.716 ms / 100) 4.886 -> 4.878 ( -0.16%) [ +0.00% +0.29% +0.33% / -0.16% -0.06% +0.35%] index_select wrap : Elapsed 0.049 ms (4.886 ms / 100) 4.902 -> 4.903 ( +0.02%) [ +0.00% +0.24% +0.27% / +0.14% +0.02% +0.06%] index_select linear : Elapsed 0.049 ms (4.902 ms / 100) 4.898 -> 4.904 ( +0.12%) [ +0.00% +0.10% +0.04% / +0.18% +0.12% +0.29%] index_select reverse : Elapsed 0.049 ms (4.898 ms / 100) 4.782 -> 4.755 ( -0.56%) [ +0.15% +0.00% +0.00% / -0.08% -0.56% -0.48%] index_select skip64 : Elapsed 0.048 ms (4.789 ms / 100) 4.703 -> 4.699 ( -0.09%) [ +0.02% +0.00% +0.21% / -0.09% +0.57% +0.85%] index_select skip256 : Elapsed 0.047 ms (4.704 ms / 100) 5.065 -> 5.039 ( -0.51%) [ +0.00% +0.32% +0.20% / +0.02% -0.51% +0.26%] index_select spread : Elapsed 0.051 ms (5.065 ms / 100) 5.173 -> 5.166 ( -0.14%) [ +0.27% +0.00% +0.02% / -0.14% +0.25% +0.66%] index_select strided 3 : Elapsed 0.052 ms (5.187 ms / 100) 5.253 -> 5.242 ( -0.21%) [ +0.06% +0.23% +0.00% / -0.21% +0.04% +0.15%] index_select strided 5 : Elapsed 0.053 ms (5.256 ms / 100) 5.273 -> 5.273 ( +0.00%) [ +0.09% +0.15% +0.00% / +0.00% +0.32% +0.15%] index_select strided 7 : Elapsed 0.053 ms (5.278 ms / 100) 5.278 -> 5.265 ( -0.25%) [ +0.00% +0.00% +0.17% / -0.25% -0.25% -0.08%] index_select strided 8 : Elapsed 0.053 ms (5.278 ms / 100) 5.134 -> 5.116 ( -0.35%) [ +0.00% +0.04% +0.02% / -0.35% +0.55% +0.53%] index_select strided 16 : Elapsed 0.051 ms (5.134 ms / 100) 4.840 -> 4.849 ( +0.19%) [ +0.08% +0.00% +0.50% / +0.19% +0.41% +0.76%] index_select strided 64 : Elapsed 0.048 ms (4.844 ms / 100) 5.296 -> 5.278 ( -0.34%) [ +0.00% +0.11% +0.13% / -0.25% -0.09% -0.34%] index_select strided 100 : Elapsed 0.053 ms (5.296 ms / 100) 5.119 -> 5.122 ( +0.06%) [ +0.00% +0.00% +0.08% / +0.06% +0.08% +0.18%] index_select strided 255 : Elapsed 0.051 ms (5.119 ms / 100) 4.727 -> 4.725 ( -0.04%) [ +0.00% +0.02% +0.00% / +0.06% -0.04% -0.04%] index_select strided 256 : Elapsed 0.047 ms (4.727 ms / 100) 5.092 -> 5.090 ( -0.04%) [ +0.12% +0.00% +0.02% / -0.04% +0.18% +0.37%] index_select strided 257 : Elapsed 0.051 ms (5.098 ms / 100) 5.265 -> 5.269 ( +0.08%) [ +0.09% +0.00% +0.15% / +0.08% +0.19% +0.47%] index_select random : Elapsed 0.053 ms (5.270 ms / 100) 5.078 -> 5.092 ( +0.28%) [ +0.00% +0.02% +0.24% / +0.33% +0.65% +0.28%] index_select random_sorted : Elapsed 0.051 ms (5.078 ms / 100) 5.294 -> 5.274 ( -0.38%) [ +0.08% +0.00% +0.00% / -0.15% -0.38% -0.38%] index_select perm : Elapsed 0.053 ms (5.298 ms / 100) 5.079 -> 5.090 ( +0.22%) [ +0.33% +0.00% +0.49% / +0.22% +0.53% +0.59%] index_select perm_sorted : Elapsed 0.051 ms (5.096 ms / 100) B = [255, 256] (stride (1, 255)) dim = 0 fill_cnt = 512 Good 3.111 -> 2.566 (-17.52%) [ +0.16% +0.00% +0.10% / -17.52% -17.45% -17.29%] index_fill_ const : Elapsed 0.031 ms (3.116 ms / 100) Good 3.151 -> 2.602 (-17.42%) [ +0.13% +0.00% +0.13% / -17.42% -17.42% -17.01%] index_fill_ linear : Elapsed 0.032 ms (3.155 ms / 100) Good 3.130 -> 2.598 (-17.00%) [ +0.06% +0.13% +0.00% / -17.00% -16.74% -16.68%] index_fill_ reverse : Elapsed 0.031 ms (3.132 ms / 100) Good 3.120 -> 2.566 (-17.76%) [ +0.00% +0.06% +0.26% / -17.76% -17.76% -17.76%] index_fill_ skip64 : Elapsed 0.031 ms (3.120 ms / 100) Good 3.115 -> 2.568 (-17.56%) [ +0.00% +0.13% +0.06% / -17.56% -17.50% -17.56%] index_fill_ skip256 : Elapsed 0.031 ms (3.115 ms / 100) Good 3.120 -> 2.599 (-16.70%) [ +0.22% +0.10% +0.00% / -16.70% -16.57% -16.38%] index_fill_ spread : Elapsed 0.031 ms (3.127 ms / 100) Good 3.299 -> 2.812 (-14.76%) [ +0.24% +0.36% +0.00% / -14.46% -14.43% -14.76%] index_fill_ strided 3 : Elapsed 0.033 ms (3.307 ms / 100) Good 3.304 -> 2.907 (-12.02%) [ +0.03% +0.03% +0.00% / -11.35% -11.99% -12.02%] index_fill_ strided 5 : Elapsed 0.033 ms (3.305 ms / 100) good 3.195 -> 2.934 ( -8.17%) [ +0.31% +0.25% +0.00% / -8.17% -7.95% -7.86%] index_fill_ strided 7 : Elapsed 0.032 ms (3.205 ms / 100) good 3.187 -> 2.977 ( -6.59%) [ +0.00% +0.13% +0.16% / -6.40% -6.24% -6.59%] index_fill_ strided 8 : Elapsed 0.032 ms (3.187 ms / 100) 3.205 -> 3.073 ( -4.12%) [ +0.41% +0.00% +0.34% / -3.56% -4.02% -4.12%] index_fill_ strided 16 : Elapsed 0.032 ms (3.218 ms / 100) Good 3.172 -> 2.846 (-10.28%) [ +0.00% +0.22% +0.25% / -10.21% -10.21% -10.28%] index_fill_ strided 64 : Elapsed 0.032 ms (3.172 ms / 100) 3.307 -> 3.253 ( -1.63%) [ +0.18% +0.06% +0.00% / -1.57% -1.63% -1.54%] index_fill_ strided 100 : Elapsed 0.033 ms (3.313 ms / 100) 3.270 -> 3.110 ( -4.89%) [ +0.24% +0.34% +0.00% / -4.89% -4.77% -4.80%] index_fill_ random : Elapsed 0.033 ms (3.278 ms / 100) Good 3.210 -> 2.686 (-16.32%) [ +0.09% +0.00% +0.34% / -15.92% -16.07% -16.32%] index_fill_ random_sorted : Elapsed 0.032 ms (3.213 ms / 100) B = [255, 256] (stride (1, 255)) A = [512, 256] (stride (256, 1)) dim = 0 4.798 -> 4.649 ( -3.11%) [ +0.10% +0.00% +0.08% / -2.83% -3.00% -3.11%] index_select const : Elapsed 0.048 ms (4.803 ms / 100) 4.847 -> 4.895 ( +0.99%) [ +0.00% +0.04% +0.12% / +0.99% +1.40% +1.30%] index_select wrap : Elapsed 0.048 ms (4.847 ms / 100) 4.850 -> 4.897 ( +0.97%) [ +0.00% +0.25% +0.02% / +1.46% +0.97% +1.34%] index_select linear : Elapsed 0.049 ms (4.850 ms / 100) 4.850 -> 4.910 ( +1.24%) [ +0.00% +0.02% +0.21% / +1.24% +1.44% +1.46%] index_select reverse : Elapsed 0.048 ms (4.850 ms / 100) 4.768 -> 4.646 ( -2.56%) [ +0.17% +0.00% +0.00% / -2.56% -2.37% -2.41%] index_select skip64 : Elapsed 0.048 ms (4.776 ms / 100) 4.781 -> 4.653 ( -2.68%) [ +0.04% +0.00% +0.08% / -2.68% -2.53% -2.59%] index_select skip256 : Elapsed 0.048 ms (4.783 ms / 100) 4.842 -> 4.930 ( +1.82%) [ +0.21% +0.00% +0.17% / +1.82% +2.04% +1.82%] index_select spread : Elapsed 0.049 ms (4.852 ms / 100) 4.876 -> 4.973 ( +1.99%) [ +0.23% +0.00% +0.02% / +2.13% +1.99% +2.15%] index_select strided 3 : Elapsed 0.049 ms (4.887 ms / 100) 4.850 -> 4.973 ( +2.54%) [ +0.00% +0.23% +0.08% / +2.95% +2.54% +2.56%] index_select strided 5 : Elapsed 0.049 ms (4.850 ms / 100) 4.846 -> 4.962 ( +2.39%) [ +0.14% +0.25% +0.00% / +2.39% +2.50% +2.54%] index_select strided 7 : Elapsed 0.049 ms (4.853 ms / 100) 4.803 -> 4.734 ( -1.44%) [ +0.00% +0.10% +0.04% / -1.21% -1.44% -1.37%] index_select strided 8 : Elapsed 0.048 ms (4.803 ms / 100) 4.785 -> 4.799 ( +0.29%) [ +0.21% +0.00% +0.02% / +0.29% +0.36% +0.42%] index_select strided 16 : Elapsed 0.048 ms (4.795 ms / 100) 4.763 -> 4.654 ( -2.29%) [ +0.36% +0.00% +0.44% / -2.29% -2.25% -2.04%] index_select strided 64 : Elapsed 0.048 ms (4.780 ms / 100) 4.829 -> 4.803 ( -0.54%) [ +0.04% +0.00% +0.00% / -0.14% -0.54% -0.50%] index_select strided 100 : Elapsed 0.048 ms (4.831 ms / 100) 4.847 -> 4.951 ( +2.15%) [ +0.08% +0.06% +0.00% / +2.15% +2.35% +2.35%] index_select strided 255 : Elapsed 0.049 ms (4.851 ms / 100) 4.774 -> 4.653 ( -2.53%) [ +0.42% +0.00% +0.13% / -2.28% -2.53% -2.51%] index_select strided 256 : Elapsed 0.048 ms (4.794 ms / 100) 4.873 -> 4.949 ( +1.56%) [ +0.29% +0.00% +0.06% / +1.58% +1.68% +1.56%] index_select strided 257 : Elapsed 0.049 ms (4.887 ms / 100) 4.825 -> 4.905 ( +1.66%) [ +0.06% +0.00% +0.02% / +1.66% +1.76% +1.82%] index_select random : Elapsed 0.048 ms (4.828 ms / 100) 4.831 -> 4.862 ( +0.64%) [ +0.00% +0.10% +0.19% / +0.83% +0.64% +0.95%] index_select random_sorted : Elapsed 0.048 ms (4.831 ms / 100) 4.837 -> 4.962 ( +2.58%) [ +0.02% +0.23% +0.00% / +2.81% +2.79% +2.58%] index_select perm : Elapsed 0.048 ms (4.838 ms / 100) 4.846 -> 4.947 ( +2.08%) [ +0.21% +0.00% +0.12% / +2.17% +2.08% +2.41%] index_select perm_sorted : Elapsed 0.049 ms (4.856 ms / 100) B = [255, 256] (stride (1, 255)) A = [512, 256] (stride (1, 512)) dim = 0 good 4.944 -> 4.641 ( -6.13%) [ +0.00% +0.14% +0.28% / -6.13% -5.38% -5.14%] index_select const : Elapsed 0.049 ms (4.944 ms / 100) 5.048 -> 4.812 ( -4.68%) [ +0.02% +0.00% +0.18% / -4.38% -4.68% -4.66%] index_select wrap : Elapsed 0.050 ms (5.049 ms / 100) 5.043 -> 4.812 ( -4.58%) [ +0.06% +0.00% +0.28% / -4.58% -4.12% -4.26%] index_select linear : Elapsed 0.050 ms (5.046 ms / 100) 5.058 -> 4.813 ( -4.84%) [ +0.02% +0.00% +0.30% / -4.33% -4.84% -4.76%] index_select reverse : Elapsed 0.051 ms (5.059 ms / 100) good 4.966 -> 4.686 ( -5.64%) [ +0.08% +0.00% +0.04% / -5.58% -5.64% -5.13%] index_select skip64 : Elapsed 0.050 ms (4.970 ms / 100) good 4.944 -> 4.654 ( -5.87%) [ +0.61% +0.06% +0.00% / -5.87% -5.12% -4.87%] index_select skip256 : Elapsed 0.050 ms (4.974 ms / 100) good 5.198 -> 4.930 ( -5.16%) [ +0.19% +0.00% +0.12% / -4.75% -5.16% -5.16%] index_select spread : Elapsed 0.052 ms (5.208 ms / 100) good 5.271 -> 4.940 ( -6.28%) [ +0.04% +0.00% +0.11% / -6.28% -6.20% -6.17%] index_select strided 3 : Elapsed 0.053 ms (5.273 ms / 100) good 5.344 -> 4.937 ( -7.62%) [ +0.36% +0.17% +0.00% / -7.50% -7.62% -7.54%] index_select strided 5 : Elapsed 0.054 ms (5.363 ms / 100) good 5.391 -> 4.945 ( -8.27%) [ +0.00% +0.30% +0.33% / -7.90% -8.27% -8.20%] index_select strided 7 : Elapsed 0.054 ms (5.391 ms / 100) good 5.383 -> 4.953 ( -7.99%) [ +0.28% +0.00% +0.09% / -7.99% -7.84% -7.67%] index_select strided 8 : Elapsed 0.054 ms (5.398 ms / 100) good 5.252 -> 4.862 ( -7.43%) [ +0.29% +0.00% +0.11% / -6.85% -7.41% -7.43%] index_select strided 16 : Elapsed 0.053 ms (5.267 ms / 100) good 5.040 -> 4.738 ( -5.99%) [ +0.00% +0.14% +0.22% / -5.79% -5.79% -5.99%] index_select strided 64 : Elapsed 0.050 ms (5.040 ms / 100) good 5.394 -> 4.975 ( -7.77%) [ +0.00% +0.09% +0.48% / -7.77% -7.04% -7.19%] index_select strided 100 : Elapsed 0.054 ms (5.394 ms / 100) good 5.251 -> 4.958 ( -5.58%) [ +0.00% +0.08% +0.19% / -5.24% -5.58% -5.24%] index_select strided 255 : Elapsed 0.053 ms (5.251 ms / 100) good 4.945 -> 4.660 ( -5.76%) [ +0.34% +0.00% +0.12% / -5.76% -5.16% -5.14%] index_select strided 256 : Elapsed 0.050 ms (4.962 ms / 100) good 5.221 -> 4.944 ( -5.31%) [ +0.00% +0.27% +0.06% / -5.15% -5.31% -5.31%] index_select strided 257 : Elapsed 0.052 ms (5.221 ms / 100) good 5.383 -> 5.007 ( -6.98%) [ +0.06% +0.07% +0.00% / -6.98% -6.76% -6.98%] index_select random : Elapsed 0.054 ms (5.386 ms / 100) 5.171 -> 4.926 ( -4.74%) [ +0.12% +0.00% +0.35% / -4.33% -4.64% -4.74%] index_select random_sorted : Elapsed 0.052 ms (5.177 ms / 100) good 5.390 -> 5.001 ( -7.22%) [ +0.04% +0.00% +0.24% / -7.22% -6.62% -6.62%] index_select perm : Elapsed 0.054 ms (5.392 ms / 100) 5.185 -> 4.927 ( -4.98%) [ +0.31% +0.00% +0.00% / -4.71% -4.98% -4.96%] index_select perm_sorted : Elapsed 0.052 ms (5.201 ms / 100) out_shape = [512, 255] in_shape = [512, 256] idx_dim = 1 B = [512, 255] (stride (255, 1)) dim = 1 fill_cnt = 256 Good 3.133 -> 2.562 (-18.23%) [ +0.19% +0.00% +0.38% / -18.23% -18.16% -18.10%] index_fill_ const : Elapsed 0.031 ms (3.139 ms / 100) Good 3.167 -> 2.623 (-17.18%) [ +0.00% +0.32% +0.00% / -17.15% -16.92% -17.18%] index_fill_ linear : Elapsed 0.032 ms (3.167 ms / 100) Good 3.196 -> 2.627 (-17.80%) [ +0.31% +0.13% +0.00% / -17.52% -17.80% -17.65%] index_fill_ reverse : Elapsed 0.032 ms (3.206 ms / 100) Good 3.119 -> 2.559 (-17.95%) [ +0.06% +0.00% +0.00% / -17.79% -17.95% -17.73%] index_fill_ skip64 : Elapsed 0.031 ms (3.121 ms / 100) Good 3.148 -> 2.563 (-18.58%) [ +0.44% +0.00% +0.00% / -18.17% -18.49% -18.58%] index_fill_ skip256 : Elapsed 0.032 ms (3.162 ms / 100) Good 3.175 -> 2.633 (-17.07%) [ +0.35% +0.03% +0.00% / -17.07% -17.07% -17.07%] index_fill_ spread : Elapsed 0.032 ms (3.186 ms / 100) Good 3.438 -> 2.932 (-14.72%) [ +0.00% +0.23% +0.29% / -14.72% -14.43% -14.34%] index_fill_ strided 3 : Elapsed 0.034 ms (3.438 ms / 100) Good 3.493 -> 3.040 (-12.97%) [ +0.26% +0.14% +0.00% / -12.85% -12.97% -12.88%] index_fill_ strided 5 : Elapsed 0.035 ms (3.502 ms / 100) Good 3.306 -> 2.951 (-10.74%) [ +0.24% +0.18% +0.00% / -10.65% -10.71% -10.74%] index_fill_ strided 7 : Elapsed 0.033 ms (3.314 ms / 100) good 3.298 -> 2.999 ( -9.07%) [ +0.06% +0.00% +0.00% / -8.73% -9.07% -9.07%] index_fill_ strided 8 : Elapsed 0.033 ms (3.300 ms / 100) good 3.323 -> 3.080 ( -7.31%) [ +0.39% +0.00% +0.18% / -7.22% -7.31% -7.28%] index_fill_ strided 16 : Elapsed 0.033 ms (3.336 ms / 100) Good 3.255 -> 2.877 (-11.61%) [ +0.31% +0.12% +0.00% / -11.52% -11.46% -11.61%] index_fill_ strided 64 : Elapsed 0.033 ms (3.265 ms / 100) 3.498 -> 3.370 ( -3.66%) [ +0.14% +0.31% +0.00% / -3.66% -3.49% -3.43%] index_fill_ strided 100 : Elapsed 0.035 ms (3.503 ms / 100) good 3.504 -> 3.231 ( -7.79%) [ +0.31% +0.29% +0.00% / -7.62% -7.79% -7.76%] index_fill_ random : Elapsed 0.035 ms (3.515 ms / 100) Good 3.383 -> 2.828 (-16.41%) [ +0.00% +0.00% +0.09% / -16.02% -16.35% -16.41%] index_fill_ random_sorted : Elapsed 0.034 ms (3.383 ms / 100) B = [512, 255] (stride (255, 1)) A = [512, 256] (stride (256, 1)) dim = 1 Good 5.694 -> 4.984 (-12.47%) [ +0.00% +0.18% +0.02% / -12.33% -12.42% -12.47%] index_select const : Elapsed 0.057 ms (5.694 ms / 100) good 5.758 -> 5.223 ( -9.29%) [ +0.23% +0.00% +0.12% / -8.82% -9.29% -9.14%] index_select wrap : Elapsed 0.058 ms (5.771 ms / 100) good 5.756 -> 5.224 ( -9.24%) [ +0.03% +0.07% +0.00% / -9.10% -9.24% -9.17%] index_select linear : Elapsed 0.058 ms (5.758 ms / 100) good 5.763 -> 5.213 ( -9.54%) [ +0.21% +0.00% +0.17% / -9.37% -9.54% -9.49%] index_select reverse : Elapsed 0.058 ms (5.775 ms / 100) Good 5.668 -> 5.061 (-10.71%) [ +0.14% +0.09% +0.00% / -10.53% -10.46% -10.71%] index_select skip64 : Elapsed 0.057 ms (5.676 ms / 100) Good 5.660 -> 4.970 (-12.19%) [ +0.04% +0.00% +0.04% / -12.00% -12.07% -12.19%] index_select skip256 : Elapsed 0.057 ms (5.662 ms / 100) good 5.748 -> 5.222 ( -9.15%) [ +0.00% +0.09% +0.16% / -8.87% -9.06% -9.15%] index_select spread : Elapsed 0.057 ms (5.748 ms / 100) Good 6.080 -> 5.193 (-14.59%) [ +0.05% +0.00% +0.02% / -14.56% -14.39% -14.59%] index_select strided 3 : Elapsed 0.061 ms (6.083 ms / 100) Good 6.102 -> 5.190 (-14.95%) [ +0.38% +0.43% +0.00% / -14.59% -14.95% -14.72%] index_select strided 5 : Elapsed 0.061 ms (6.125 ms / 100) Good 6.126 -> 5.217 (-14.84%) [ +0.15% +0.00% +0.03% / -14.63% -14.81% -14.84%] index_select strided 7 : Elapsed 0.061 ms (6.135 ms / 100) Good 6.123 -> 5.208 (-14.94%) [ +0.00% +0.02% +0.08% / -14.85% -14.85% -14.94%] index_select strided 8 : Elapsed 0.061 ms (6.123 ms / 100) Good 5.962 -> 5.111 (-14.27%) [ +0.10% +0.00% +0.07% / -14.07% -14.24% -14.27%] index_select strided 16 : Elapsed 0.060 ms (5.968 ms / 100) Good 5.726 -> 5.012 (-12.47%) [ +0.09% +0.33% +0.00% / -12.31% -12.47% -12.05%] index_select strided 64 : Elapsed 0.057 ms (5.731 ms / 100) Good 6.107 -> 5.215 (-14.61%) [ +0.23% +0.05% +0.00% / -14.61% -14.57% -14.59%] index_select strided 100 : Elapsed 0.061 ms (6.121 ms / 100) good 5.778 -> 5.232 ( -9.45%) [ +0.05% +0.00% +0.17% / -9.09% -9.35% -9.45%] index_select strided 255 : Elapsed 0.058 ms (5.781 ms / 100) Good 6.103 -> 5.252 (-13.94%) [ +0.00% +0.02% +0.23% / -13.94% -13.94% -13.89%] index_select random : Elapsed 0.061 ms (6.103 ms / 100) good 5.778 -> 5.221 ( -9.64%) [ +0.14% +0.09% +0.00% / -9.42% -9.61% -9.64%] index_select random_sorted : Elapsed 0.058 ms (5.786 ms / 100) Good 6.124 -> 5.224 (-14.70%) [ +0.00% +0.10% +0.00% / -14.16% -14.70% -14.44%] index_select perm : Elapsed 0.061 ms (6.124 ms / 100) good 5.763 -> 5.227 ( -9.30%) [ +0.21% +0.00% +0.12% / -8.99% -9.14% -9.30%] index_select perm_sorted : Elapsed 0.058 ms (5.775 ms / 100) B = [512, 255] (stride (255, 1)) A = [512, 256] (stride (1, 512)) dim = 1 good 5.307 -> 4.980 ( -6.16%) [ +0.00% +0.08% +0.09% / -6.12% -6.16% -6.14%] index_select const : Elapsed 0.053 ms (5.307 ms / 100) 5.358 -> 5.375 ( +0.32%) [ +0.02% +0.00% +0.02% / +0.47% +0.32% +0.35%] index_select wrap : Elapsed 0.054 ms (5.359 ms / 100) 5.347 -> 5.352 ( +0.09%) [ +0.07% +0.00% +0.00% / +0.09% +0.58% +0.34%] index_select linear : Elapsed 0.054 ms (5.351 ms / 100) 5.349 -> 5.353 ( +0.07%) [ +0.00% +0.19% +0.13% / +0.39% +0.36% +0.07%] index_select reverse : Elapsed 0.053 ms (5.349 ms / 100) good 5.308 -> 4.955 ( -6.65%) [ +0.11% +0.11% +0.00% / -6.65% -6.50% -6.22%] index_select skip64 : Elapsed 0.053 ms (5.314 ms / 100) good 5.264 -> 4.960 ( -5.78%) [ +0.00% +0.09% +0.11% / -5.49% -5.78% -5.49%] index_select skip256 : Elapsed 0.053 ms (5.264 ms / 100) 5.348 -> 5.351 ( +0.06%) [ +0.15% +0.24% +0.00% / +0.06% +1.01% +0.92%] index_select spread : Elapsed 0.054 ms (5.356 ms / 100) 5.357 -> 5.424 ( +1.25%) [ +0.02% +0.00% +0.02% / +1.25% +1.74% +1.46%] index_select strided 3 : Elapsed 0.054 ms (5.358 ms / 100) 5.378 -> 5.426 ( +0.89%) [ +0.04% +0.00% +0.04% / +0.89% +1.36% +1.41%] index_select strided 5 : Elapsed 0.054 ms (5.380 ms / 100) 5.359 -> 5.434 ( +1.40%) [ +0.34% +0.00% +0.11% / +1.47% +1.57% +1.40%] index_select strided 7 : Elapsed 0.054 ms (5.377 ms / 100) 5.317 -> 5.213 ( -1.96%) [ +0.13% +0.00% +0.13% / -1.94% -1.96% -1.71%] index_select strided 8 : Elapsed 0.053 ms (5.324 ms / 100) 5.307 -> 5.053 ( -4.79%) [ +0.00% +0.02% +0.02% / -4.54% -4.58% -4.79%] index_select strided 16 : Elapsed 0.053 ms (5.307 ms / 100) good 5.285 -> 4.967 ( -6.02%) [ +0.13% +0.00% +0.11% / -5.73% -6.02% -5.71%] index_select strided 64 : Elapsed 0.053 ms (5.292 ms / 100) 5.326 -> 5.218 ( -2.03%) [ +0.13% +0.04% +0.00% / -2.03% -1.69% -1.75%] index_select strided 100 : Elapsed 0.053 ms (5.333 ms / 100) 5.352 -> 5.359 ( +0.13%) [ +0.15% +0.00% +0.17% / +0.67% +0.22% +0.13%] index_select strided 255 : Elapsed 0.054 ms (5.360 ms / 100) 5.333 -> 5.282 ( -0.96%) [ +0.04% +0.00% +0.00% / -0.96% +0.47% +0.17%] index_select random : Elapsed 0.053 ms (5.335 ms / 100) 5.327 -> 5.244 ( -1.56%) [ +0.13% +0.00% +0.09% / -1.56% -0.79% -0.69%] index_select random_sorted : Elapsed 0.053 ms (5.334 ms / 100) 5.360 -> 5.415 ( +1.03%) [ +0.24% +0.00% +0.00% / +1.03% +1.31% +1.27%] index_select perm : Elapsed 0.054 ms (5.373 ms / 100) 5.344 -> 5.359 ( +0.28%) [ +0.11% +0.00% +0.09% / +0.28% +0.36% +0.60%] index_select perm_sorted : Elapsed 0.053 ms (5.350 ms / 100) B = [512, 255] (stride (1, 512)) dim = 1 fill_cnt = 256 2.581 -> 2.575 ( -0.23%) [ +0.15% +0.00% +0.08% / -0.23% -0.23% -0.15%] index_fill_ const : Elapsed 0.026 ms (2.585 ms / 100) 2.664 -> 2.662 ( -0.08%) [ +0.56% +0.00% +0.23% / -0.08% +0.08% -0.08%] index_fill_ linear : Elapsed 0.027 ms (2.679 ms / 100) 2.665 -> 2.657 ( -0.30%) [ +0.79% +0.49% +0.00% / +0.11% -0.30% +0.11%] index_fill_ reverse : Elapsed 0.027 ms (2.686 ms / 100) 2.577 -> 2.571 ( -0.23%) [ +0.39% +0.39% +0.00% / -0.04% -0.23% -0.23%] index_fill_ skip64 : Elapsed 0.026 ms (2.587 ms / 100) 2.580 -> 2.571 ( -0.35%) [ +0.08% +0.08% +0.00% / -0.35% -0.16% +0.08%] index_fill_ skip256 : Elapsed 0.026 ms (2.582 ms / 100) 2.669 -> 2.646 ( -0.86%) [ +0.22% +0.00% +0.07% / -0.49% -0.86% -0.56%] index_fill_ spread : Elapsed 0.027 ms (2.675 ms / 100) 2.589 -> 2.594 ( +0.19%) [ +0.77% +0.58% +0.00% / +0.19% +0.42% +0.77%] index_fill_ strided 3 : Elapsed 0.026 ms (2.609 ms / 100) 2.591 -> 2.598 ( +0.27%) [ +0.66% +0.00% +0.50% / +0.27% +0.69% +0.35%] index_fill_ strided 5 : Elapsed 0.026 ms (2.608 ms / 100) 2.655 -> 2.655 ( +0.00%) [ +0.30% +0.00% +0.41% / +0.49% +0.23% +0.00%] index_fill_ strided 7 : Elapsed 0.027 ms (2.663 ms / 100) 2.668 -> 2.668 ( +0.00%) [ +0.26% +0.00% +0.07% / +0.30% +0.00% +0.11%] index_fill_ strided 8 : Elapsed 0.027 ms (2.675 ms / 100) 2.671 -> 2.661 ( -0.37%) [ +0.15% +0.00% +0.19% / -0.04% -0.37% -0.30%] index_fill_ strided 16 : Elapsed 0.027 ms (2.675 ms / 100) 2.662 -> 2.648 ( -0.53%) [ +0.34% +0.00% +0.00% / -0.19% -0.38% -0.53%] index_fill_ strided 64 : Elapsed 0.027 ms (2.671 ms / 100) 2.601 -> 2.588 ( -0.50%) [ +0.42% +0.15% +0.00% / -0.50% -0.27% -0.42%] index_fill_ strided 100 : Elapsed 0.026 ms (2.612 ms / 100) 2.635 -> 2.615 ( -0.76%) [ +0.11% +0.00% +0.04% / -0.57% -0.57% -0.76%] index_fill_ random : Elapsed 0.026 ms (2.638 ms / 100) 2.611 -> 2.599 ( -0.46%) [ +0.50% +0.11% +0.00% / +0.31% +0.00% -0.46%] index_fill_ random_sorted : Elapsed 0.026 ms (2.624 ms / 100) B = [512, 255] (stride (1, 512)) A = [512, 256] (stride (256, 1)) dim = 1 5.130 -> 5.114 ( -0.31%) [ +0.10% +0.00% +0.10% / +0.12% -0.31% -0.18%] index_select const : Elapsed 0.051 ms (5.135 ms / 100) 5.522 -> 5.523 ( +0.02%) [ +0.02% +0.13% +0.00% / +0.07% +0.02% +0.20%] index_select wrap : Elapsed 0.055 ms (5.523 ms / 100) 5.511 -> 5.526 ( +0.27%) [ +0.16% +0.34% +0.00% / +0.27% +0.34% +0.31%] index_select linear : Elapsed 0.055 ms (5.520 ms / 100) 5.561 -> 5.548 ( -0.23%) [ +0.00% +0.20% +0.05% / -0.23% +0.13% -0.02%] index_select reverse : Elapsed 0.056 ms (5.561 ms / 100) 5.153 -> 5.163 ( +0.19%) [ +0.00% +0.12% +0.16% / +0.19% +0.35% +0.47%] index_select skip64 : Elapsed 0.052 ms (5.153 ms / 100) 5.111 -> 5.115 ( +0.08%) [ +0.04% +0.00% +0.18% / +0.08% +0.12% +0.27%] index_select skip256 : Elapsed 0.051 ms (5.113 ms / 100) 5.522 -> 5.528 ( +0.11%) [ +0.18% +0.00% +0.04% / +0.11% +0.40% +0.29%] index_select spread : Elapsed 0.055 ms (5.532 ms / 100) 5.787 -> 5.779 ( -0.14%) [ +0.02% +0.00% +0.00% / -0.14% +0.47% +0.38%] index_select strided 3 : Elapsed 0.058 ms (5.788 ms / 100) 5.781 -> 5.782 ( +0.02%) [ +0.22% +0.07% +0.00% / +0.02% +0.52% +0.66%] index_select strided 5 : Elapsed 0.058 ms (5.794 ms / 100) 5.739 -> 5.729 ( -0.17%) [ +0.23% +0.00% +0.07% / -0.17% +0.14% +0.42%] index_select strided 7 : Elapsed 0.058 ms (5.752 ms / 100) 5.748 -> 5.727 ( -0.37%) [ +0.12% +0.16% +0.00% / -0.37% +0.19% +0.19%] index_select strided 8 : Elapsed 0.058 ms (5.755 ms / 100) 5.607 -> 5.614 ( +0.12%) [ +0.21% +0.18% +0.00% / +0.12% +0.66% +0.66%] index_select strided 16 : Elapsed 0.056 ms (5.619 ms / 100) 5.219 -> 5.220 ( +0.02%) [ +0.21% +0.00% +0.15% / +0.29% +0.13% +0.02%] index_select strided 64 : Elapsed 0.052 ms (5.230 ms / 100) 5.743 -> 5.741 ( -0.03%) [ +0.23% +0.17% +0.00% / +0.07% +0.40% -0.03%] index_select strided 100 : Elapsed 0.058 ms (5.756 ms / 100) 5.552 -> 5.561 ( +0.16%) [ +0.00% +0.04% +0.22% / +0.22% +0.40% +0.16%] index_select strided 255 : Elapsed 0.056 ms (5.552 ms / 100) 5.753 -> 5.765 ( +0.21%) [ +0.40% +0.21% +0.00% / +0.21% +0.47% +0.59%] index_select random : Elapsed 0.058 ms (5.776 ms / 100) 5.538 -> 5.536 ( -0.04%) [ +0.00% +0.07% +0.27% / -0.04% +0.07% +0.00%] index_select random_sorted : Elapsed 0.055 ms (5.538 ms / 100) 5.741 -> 5.746 ( +0.09%) [ +0.00% +0.21% +0.28% / +0.09% +0.84% +0.77%] index_select perm : Elapsed 0.057 ms (5.741 ms / 100) 5.552 -> 5.550 ( -0.04%) [ +0.00% +0.00% +0.13% / +0.22% -0.04% +0.05%] index_select perm_sorted : Elapsed 0.056 ms (5.552 ms / 100) B = [512, 255] (stride (1, 512)) A = [512, 256] (stride (1, 512)) dim = 1 4.952 -> 4.967 ( +0.30%) [ +0.24% +0.20% +0.00% / +0.34% +0.30% +0.30%] index_select const : Elapsed 0.050 ms (4.964 ms / 100) 5.194 -> 5.193 ( -0.02%) [ +0.25% +0.15% +0.00% / -0.02% +0.21% +0.23%] index_select wrap : Elapsed 0.052 ms (5.207 ms / 100) 5.191 -> 5.189 ( -0.04%) [ +0.23% +0.00% +0.17% / -0.04% +0.35% +0.42%] index_select linear : Elapsed 0.052 ms (5.203 ms / 100) 5.223 -> 5.194 ( -0.56%) [ +0.11% +0.00% +0.06% / -0.10% -0.56% -0.15%] index_select reverse : Elapsed 0.052 ms (5.229 ms / 100) 4.956 -> 4.965 ( +0.18%) [ +0.24% +0.00% +0.06% / +0.18% +0.73% +0.83%] index_select skip64 : Elapsed 0.050 ms (4.968 ms / 100) 4.951 -> 4.953 ( +0.04%) [ +0.36% +0.02% +0.00% / +0.04% +0.53% +0.50%] index_select skip256 : Elapsed 0.050 ms (4.969 ms / 100) 5.191 -> 5.196 ( +0.10%) [ +0.25% +0.00% +0.40% / +0.10% +0.27% +0.31%] index_select spread : Elapsed 0.052 ms (5.204 ms / 100) 5.192 -> 5.198 ( +0.12%) [ +0.00% +0.06% +0.19% / +0.12% +0.12% +0.23%] index_select strided 3 : Elapsed 0.052 ms (5.192 ms / 100) 5.190 -> 5.189 ( -0.02%) [ +0.23% +0.02% +0.00% / -0.02% +0.50% +0.39%] index_select strided 5 : Elapsed 0.052 ms (5.202 ms / 100) 5.204 -> 5.207 ( +0.06%) [ +0.15% +0.04% +0.00% / +0.06% +0.19% +0.17%] index_select strided 7 : Elapsed 0.052 ms (5.212 ms / 100) 4.981 -> 4.986 ( +0.10%) [ +0.00% +0.34% +0.32% / +0.10% +0.84% +0.82%] index_select strided 8 : Elapsed 0.050 ms (4.981 ms / 100) 5.005 -> 4.992 ( -0.26%) [ +0.10% +0.06% +0.00% / -0.04% -0.16% -0.26%] index_select strided 16 : Elapsed 0.050 ms (5.010 ms / 100) 4.945 -> 4.967 ( +0.44%) [ +0.34% +0.00% +0.30% / +0.44% +0.85% +1.01%] index_select strided 64 : Elapsed 0.050 ms (4.962 ms / 100) 5.057 -> 5.063 ( +0.12%) [ +0.16% +0.00% +0.14% / +0.12% +0.93% +0.67%] index_select strided 100 : Elapsed 0.051 ms (5.065 ms / 100) 5.223 -> 5.218 ( -0.10%) [ +0.11% +0.00% +0.10% / +0.02% -0.10% -0.08%] index_select strided 255 : Elapsed 0.052 ms (5.229 ms / 100) 5.132 -> 5.137 ( +0.10%) [ +0.00% +0.08% +0.19% / +0.10% +0.68% +0.74%] index_select random : Elapsed 0.051 ms (5.132 ms / 100) 5.120 -> 5.108 ( -0.23%) [ +0.04% +0.06% +0.00% / +0.14% -0.23% -0.02%] index_select random_sorted : Elapsed 0.051 ms (5.122 ms / 100) 5.189 -> 5.191 ( +0.04%) [ +0.10% +0.00% +0.21% / +0.04% +0.10% +0.27%] index_select perm : Elapsed 0.052 ms (5.194 ms / 100) 5.194 -> 5.197 ( +0.06%) [ +0.00% +0.02% +0.23% / +0.06% +0.06% +0.42%] index_select perm_sorted : Elapsed 0.052 ms (5.194 ms / 100) out_shape = [256, 512] in_shape = [255, 512] idx_dim = 0 B = [256, 512] (stride (512, 1)) dim = 0 fill_cnt = 255 2.581 -> 2.572 ( -0.35%) [ +0.00% +0.00% +0.00% / -0.31% -0.35% -0.12%] index_fill_ const : Elapsed 0.026 ms (2.581 ms / 100) 2.678 -> 2.648 ( -1.12%) [ +0.00% +0.26% +0.19% / -0.49% -0.86% -1.12%] index_fill_ linear : Elapsed 0.027 ms (2.678 ms / 100) 2.662 -> 2.653 ( -0.34%) [ +0.34% +0.00% +0.30% / +0.30% -0.11% -0.34%] index_fill_ reverse : Elapsed 0.027 ms (2.671 ms / 100) 2.574 -> 2.574 ( +0.00%) [ +0.19% +0.00% +0.16% / +0.00% +0.31% +0.39%] index_fill_ skip64 : Elapsed 0.026 ms (2.579 ms / 100) 2.587 -> 2.578 ( -0.35%) [ +0.04% +0.04% +0.00% / -0.04% -0.12% -0.35%] index_fill_ skip256 : Elapsed 0.026 ms (2.588 ms / 100) 2.659 -> 2.666 ( +0.26%) [ +0.30% +0.00% +0.26% / +0.45% +0.26% +0.26%] index_fill_ spread : Elapsed 0.027 ms (2.667 ms / 100) 2.663 -> 2.650 ( -0.49%) [ +0.34% +0.11% +0.00% / -0.11% -0.49% -0.26%] index_fill_ strided 3 : Elapsed 0.027 ms (2.672 ms / 100) 2.666 -> 2.645 ( -0.79%) [ +0.19% +0.00% +0.00% / -0.68% -0.79% -0.64%] index_fill_ strided 5 : Elapsed 0.027 ms (2.671 ms / 100) 2.654 -> 2.654 ( +0.00%) [ +0.38% +0.00% +0.19% / +0.15% +0.19% +0.00%] index_fill_ strided 7 : Elapsed 0.027 ms (2.664 ms / 100) 2.607 -> 2.584 ( -0.88%) [ +0.12% +0.31% +0.00% / -0.61% -0.88% -0.50%] index_fill_ strided 8 : Elapsed 0.026 ms (2.610 ms / 100) 2.597 -> 2.592 ( -0.19%) [ +0.19% +0.00% +0.12% / +0.62% +0.04% -0.19%] index_fill_ strided 16 : Elapsed 0.026 ms (2.602 ms / 100) 2.583 -> 2.580 ( -0.12%) [ +0.19% +0.62% +0.00% / -0.12% +0.15% +0.43%] index_fill_ strided 64 : Elapsed 0.026 ms (2.588 ms / 100) 2.594 -> 2.595 ( +0.04%) [ +0.23% +0.00% +0.15% / +0.04% +0.12% +0.15%] index_fill_ strided 100 : Elapsed 0.026 ms (2.600 ms / 100) 2.662 -> 2.649 ( -0.49%) [ +0.26% +0.26% +0.00% / +0.04% -0.41% -0.49%] index_fill_ strided 255 : Elapsed 0.027 ms (2.669 ms / 100) 2.633 -> 2.611 ( -0.84%) [ +0.49% +0.19% +0.00% / -0.53% -0.84% -0.76%] index_fill_ random : Elapsed 0.026 ms (2.646 ms / 100) 2.629 -> 2.622 ( -0.27%) [ +0.00% +0.15% +0.23% / -0.15% -0.27% -0.23%] index_fill_ random_sorted : Elapsed 0.026 ms (2.629 ms / 100) 2.666 -> 2.649 ( -0.64%) [ +0.00% +0.08% +0.08% / -0.64% -0.26% -0.34%] index_fill_ perm : Elapsed 0.027 ms (2.666 ms / 100) 2.651 -> 2.647 ( -0.15%) [ +0.34% +0.30% +0.00% / -0.15% +0.30% +0.34%] index_fill_ perm_sorted : Elapsed 0.027 ms (2.660 ms / 100) B = [256, 512] (stride (512, 1)) A = [255, 512] (stride (512, 1)) dim = 0 5.377 -> 5.347 ( -0.56%) [ +0.26% +0.00% +0.22% / -0.09% -0.54% -0.56%] index_add_ linear : Elapsed 0.054 ms (5.391 ms / 100) 5.222 -> 5.188 ( -0.65%) [ +0.50% +0.00% +0.21% / -0.10% -0.61% -0.65%] index_copy_ linear : Elapsed 0.052 ms (5.248 ms / 100) 5.368 -> 5.363 ( -0.09%) [ +0.00% +0.34% +0.17% / -0.07% -0.09% -0.04%] index_add_ reverse : Elapsed 0.054 ms (5.368 ms / 100) 5.205 -> 5.200 ( -0.10%) [ +0.00% +0.31% +0.06% / -0.06% -0.10% +0.06%] index_copy_ reverse : Elapsed 0.052 ms (5.205 ms / 100) 5.352 -> 5.367 ( +0.28%) [ +0.54% +0.00% +0.09% / +0.41% +0.41% +0.28%] index_add_ spread : Elapsed 0.054 ms (5.381 ms / 100) 5.203 -> 5.196 ( -0.13%) [ +0.46% +0.12% +0.00% / +0.10% -0.02% -0.13%] index_copy_ spread : Elapsed 0.052 ms (5.227 ms / 100) 5.390 -> 5.355 ( -0.65%) [ +0.22% +0.11% +0.00% / -0.17% -0.65% -0.54%] index_add_ strided 3 : Elapsed 0.054 ms (5.402 ms / 100) 5.222 -> 5.188 ( -0.65%) [ +0.10% +0.31% +0.00% / -0.10% -0.52% -0.65%] index_copy_ strided 3 : Elapsed 0.052 ms (5.227 ms / 100) 5.350 -> 5.338 ( -0.22%) [ +0.00% +0.06% +0.24% / -0.22% +1.08% +0.93%] index_add_ strided 5 : Elapsed 0.053 ms (5.350 ms / 100) 5.207 -> 5.198 ( -0.17%) [ +0.00% +0.04% +0.12% / -0.17% +0.15% +0.08%] index_copy_ strided 5 : Elapsed 0.052 ms (5.207 ms / 100) 5.378 -> 5.366 ( -0.22%) [ +0.00% +0.17% +0.11% / +0.17% -0.13% -0.22%] index_add_ strided 7 : Elapsed 0.054 ms (5.378 ms / 100) 5.215 -> 5.200 ( -0.29%) [ +0.00% +0.17% +0.08% / -0.08% -0.25% -0.29%] index_copy_ strided 7 : Elapsed 0.052 ms (5.215 ms / 100) 5.369 -> 5.371 ( +0.04%) [ +0.34% +0.22% +0.00% / +0.04% +0.32% +0.41%] index_add_ strided 255 : Elapsed 0.054 ms (5.387 ms / 100) 5.209 -> 5.201 ( -0.15%) [ +0.38% +0.21% +0.00% / -0.15% +0.44% +0.44%] index_copy_ strided 255 : Elapsed 0.052 ms (5.229 ms / 100) 5.412 -> 5.378 ( -0.63%) [ +0.17% +0.07% +0.00% / -0.37% -0.63% -0.55%] index_add_ perm : Elapsed 0.054 ms (5.421 ms / 100) 5.244 -> 5.206 ( -0.72%) [ +0.17% +0.11% +0.00% / -0.23% -0.72% -0.55%] index_copy_ perm : Elapsed 0.053 ms (5.253 ms / 100) 5.354 -> 5.348 ( -0.11%) [ +0.00% +0.41% +0.60% / +0.21% +0.17% -0.11%] index_add_ perm_sorted : Elapsed 0.054 ms (5.354 ms / 100) 5.210 -> 5.189 ( -0.40%) [ +0.00% +0.19% +0.23% / -0.21% -0.31% -0.40%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.210 ms / 100) 4.954 -> 4.962 ( +0.16%) [ +0.34% +0.02% +0.00% / +0.16% +0.20% +0.16%] index_select const : Elapsed 0.050 ms (4.971 ms / 100) 5.199 -> 5.193 ( -0.12%) [ +0.13% +0.00% +0.08% / +0.00% -0.12% +0.17%] index_select wrap : Elapsed 0.052 ms (5.206 ms / 100) 5.204 -> 5.199 ( -0.10%) [ +0.00% +0.00% +0.15% / +0.12% -0.10% -0.02%] index_select linear : Elapsed 0.052 ms (5.204 ms / 100) 5.198 -> 5.183 ( -0.29%) [ +0.08% +0.04% +0.00% / +0.04% -0.19% -0.29%] index_select reverse : Elapsed 0.052 ms (5.202 ms / 100) 4.955 -> 4.951 ( -0.08%) [ +0.18% +0.16% +0.00% / -0.08% +0.04% +0.12%] index_select skip64 : Elapsed 0.050 ms (4.964 ms / 100) 4.956 -> 4.950 ( -0.12%) [ +0.24% +0.22% +0.00% / +0.08% +0.08% -0.12%] index_select skip256 : Elapsed 0.050 ms (4.968 ms / 100) 5.201 -> 5.208 ( +0.13%) [ +0.00% +0.04% +0.38% / +0.13% +0.56% +0.56%] index_select spread : Elapsed 0.052 ms (5.201 ms / 100) 5.104 -> 5.082 ( -0.43%) [ +0.12% +0.14% +0.00% / +0.14% -0.43% -0.37%] index_select strided 3 : Elapsed 0.051 ms (5.110 ms / 100) 5.030 -> 5.023 ( -0.14%) [ +0.24% +0.02% +0.00% / +0.14% -0.14% +0.14%] index_select strided 5 : Elapsed 0.050 ms (5.042 ms / 100) 5.186 -> 5.200 ( +0.27%) [ +0.08% +0.23% +0.00% / +0.27% +0.54% +0.62%] index_select strided 7 : Elapsed 0.052 ms (5.190 ms / 100) 5.203 -> 5.202 ( -0.02%) [ +0.00% +0.31% +0.25% / +0.37% +0.02% -0.02%] index_select strided 8 : Elapsed 0.052 ms (5.203 ms / 100) 5.204 -> 5.205 ( +0.02%) [ +0.12% +0.00% +0.19% / +0.10% +0.13% +0.02%] index_select strided 16 : Elapsed 0.052 ms (5.210 ms / 100) 5.204 -> 5.208 ( +0.08%) [ +0.13% +0.00% +0.12% / +0.13% +0.15% +0.08%] index_select strided 64 : Elapsed 0.052 ms (5.211 ms / 100) 5.032 -> 5.025 ( -0.14%) [ +0.00% +0.06% +0.30% / +0.14% -0.08% -0.14%] index_select strided 100 : Elapsed 0.050 ms (5.032 ms / 100) 5.127 -> 5.126 ( -0.02%) [ +0.04% +0.08% +0.00% / -0.02% +0.57% +0.43%] index_select random : Elapsed 0.051 ms (5.129 ms / 100) 5.135 -> 5.128 ( -0.14%) [ +0.06% +0.00% +0.16% / +0.04% +0.04% -0.14%] index_select random_sorted : Elapsed 0.051 ms (5.138 ms / 100) B = [256, 512] (stride (512, 1)) A = [255, 512] (stride (1, 255)) dim = 0 5.619 -> 5.594 ( -0.44%) [ +0.00% +0.30% +0.37% / -0.44% -0.21% -0.14%] index_add_ linear : Elapsed 0.056 ms (5.619 ms / 100) 5.493 -> 5.471 ( -0.40%) [ +0.00% +0.35% +0.31% / -0.40% +0.02% +0.27%] index_copy_ linear : Elapsed 0.055 ms (5.493 ms / 100) 5.612 -> 5.585 ( -0.48%) [ +0.00% +0.43% +0.25% / -0.23% +0.00% -0.48%] index_add_ reverse : Elapsed 0.056 ms (5.612 ms / 100) 5.496 -> 5.463 ( -0.60%) [ +0.00% +0.44% +0.31% / -0.60% -0.22% -0.27%] index_copy_ reverse : Elapsed 0.055 ms (5.496 ms / 100) 5.620 -> 5.603 ( -0.30%) [ +0.05% +0.00% +0.05% / -0.30% -0.18% -0.05%] index_add_ spread : Elapsed 0.056 ms (5.623 ms / 100) 5.487 -> 5.493 ( +0.11%) [ +0.42% +0.00% +0.29% / +0.11% +0.22% +0.27%] index_copy_ spread : Elapsed 0.055 ms (5.510 ms / 100) 5.663 -> 5.630 ( -0.58%) [ +0.00% +0.12% +0.25% / -0.44% -0.58% -0.57%] index_add_ strided 3 : Elapsed 0.057 ms (5.663 ms / 100) 5.530 -> 5.485 ( -0.81%) [ +0.04% +0.00% +0.38% / -0.72% -0.63% -0.81%] index_copy_ strided 3 : Elapsed 0.055 ms (5.532 ms / 100) 5.641 -> 5.623 ( -0.32%) [ +0.00% +0.18% +0.25% / -0.32% +0.18% +0.07%] index_add_ strided 5 : Elapsed 0.056 ms (5.641 ms / 100) 5.498 -> 5.476 ( -0.40%) [ +0.00% +0.31% +0.27% / -0.40% +0.13% +0.33%] index_copy_ strided 5 : Elapsed 0.055 ms (5.498 ms / 100) 5.646 -> 5.628 ( -0.32%) [ +0.00% +0.19% +0.09% / -0.32% -0.19% -0.14%] index_add_ strided 7 : Elapsed 0.056 ms (5.646 ms / 100) 5.510 -> 5.495 ( -0.27%) [ +0.00% +0.20% +0.00% / -0.27% -0.09% -0.27%] index_copy_ strided 7 : Elapsed 0.055 ms (5.510 ms / 100) 5.618 -> 5.589 ( -0.52%) [ +0.00% +0.04% +0.37% / -0.52% -0.18% -0.39%] index_add_ strided 255 : Elapsed 0.056 ms (5.618 ms / 100) 5.498 -> 5.468 ( -0.55%) [ +0.00% +0.04% +0.09% / -0.55% -0.13% -0.11%] index_copy_ strided 255 : Elapsed 0.055 ms (5.498 ms / 100) 5.651 -> 5.639 ( -0.21%) [ +0.23% +0.34% +0.00% / -0.21% -0.14% -0.12%] index_add_ perm : Elapsed 0.057 ms (5.664 ms / 100) 5.512 -> 5.493 ( -0.34%) [ +0.27% +0.15% +0.00% / -0.34% -0.11% +0.15%] index_copy_ perm : Elapsed 0.055 ms (5.527 ms / 100) 5.611 -> 5.590 ( -0.37%) [ +0.23% +0.41% +0.00% / -0.37% -0.07% -0.11%] index_add_ perm_sorted : Elapsed 0.056 ms (5.624 ms / 100) 5.486 -> 5.472 ( -0.26%) [ +0.51% +0.57% +0.00% / -0.26% +0.22% +0.49%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.514 ms / 100) 5.104 -> 5.109 ( +0.10%) [ +0.14% +0.00% +0.10% / +0.10% +0.33% +0.43%] index_select const : Elapsed 0.051 ms (5.111 ms / 100) 5.480 -> 5.486 ( +0.11%) [ +0.20% +0.00% +0.00% / +0.11% +0.22% +0.29%] index_select wrap : Elapsed 0.055 ms (5.491 ms / 100) 5.474 -> 5.485 ( +0.20%) [ +0.00% +0.26% +0.18% / +0.20% +0.40% +0.60%] index_select linear : Elapsed 0.055 ms (5.474 ms / 100) 5.473 -> 5.472 ( -0.02%) [ +0.20% +0.02% +0.00% / -0.02% +0.49% +0.22%] index_select reverse : Elapsed 0.055 ms (5.484 ms / 100) 5.163 -> 5.159 ( -0.08%) [ +0.02% +0.29% +0.00% / +0.02% -0.08% +0.14%] index_select skip64 : Elapsed 0.052 ms (5.164 ms / 100) 5.116 -> 5.113 ( -0.06%) [ +0.00% +0.02% +0.10% / -0.06% +0.12% -0.06%] index_select skip256 : Elapsed 0.051 ms (5.116 ms / 100) 5.460 -> 5.464 ( +0.07%) [ +0.13% +0.15% +0.00% / +0.07% +0.59% +0.46%] index_select spread : Elapsed 0.055 ms (5.467 ms / 100) 5.755 -> 5.754 ( -0.02%) [ +0.02% +0.00% +0.14% / -0.02% +0.63% +0.50%] index_select strided 3 : Elapsed 0.058 ms (5.756 ms / 100) 5.759 -> 5.748 ( -0.19%) [ +0.00% +0.07% +0.03% / -0.19% +0.50% +0.50%] index_select strided 5 : Elapsed 0.058 ms (5.759 ms / 100) 5.722 -> 5.718 ( -0.07%) [ +0.03% +0.03% +0.00% / -0.07% +0.65% +0.52%] index_select strided 7 : Elapsed 0.057 ms (5.724 ms / 100) 5.718 -> 5.737 ( +0.33%) [ +0.35% +0.00% +0.42% / +0.35% +0.33% +0.51%] index_select strided 8 : Elapsed 0.057 ms (5.738 ms / 100) 5.696 -> 5.716 ( +0.35%) [ +0.16% +0.00% +0.35% / +0.35% +0.60% +0.72%] index_select strided 16 : Elapsed 0.057 ms (5.705 ms / 100) 5.683 -> 5.690 ( +0.12%) [ +0.05% +0.00% +0.16% / +0.12% +0.33% +0.48%] index_select strided 64 : Elapsed 0.057 ms (5.686 ms / 100) 5.735 -> 5.729 ( -0.10%) [ +0.24% +0.21% +0.00% / -0.10% +0.52% +0.31%] index_select strided 100 : Elapsed 0.057 ms (5.749 ms / 100) 5.737 -> 5.732 ( -0.09%) [ +0.00% +0.26% +0.16% / -0.09% +0.59% +0.52%] index_select random : Elapsed 0.057 ms (5.737 ms / 100) 5.467 -> 5.460 ( -0.13%) [ +0.09% +0.00% +0.07% / -0.13% +0.53% +0.62%] index_select random_sorted : Elapsed 0.055 ms (5.472 ms / 100) B = [256, 512] (stride (1, 256)) dim = 0 fill_cnt = 255 Good 3.155 -> 2.590 (-17.91%) [ +0.22% +0.03% +0.00% / -17.72% -17.91% -17.88%] index_fill_ const : Elapsed 0.032 ms (3.162 ms / 100) Good 3.176 -> 2.661 (-16.22%) [ +0.16% +0.00% +0.16% / -16.22% -16.18% -15.99%] index_fill_ linear : Elapsed 0.032 ms (3.181 ms / 100) Good 3.180 -> 2.646 (-16.79%) [ +0.03% +0.00% +0.38% / -16.51% -16.79% -16.64%] index_fill_ reverse : Elapsed 0.032 ms (3.181 ms / 100) Good 3.110 -> 2.561 (-17.65%) [ +0.00% +0.10% +0.35% / -17.14% -17.65% -17.62%] index_fill_ skip64 : Elapsed 0.031 ms (3.110 ms / 100) Good 3.140 -> 2.588 (-17.58%) [ +0.13% +0.19% +0.00% / -17.26% -17.58% -17.52%] index_fill_ skip256 : Elapsed 0.031 ms (3.144 ms / 100) Good 3.200 -> 2.645 (-17.34%) [ +0.28% +0.00% +0.19% / -16.97% -17.34% -16.94%] index_fill_ spread : Elapsed 0.032 ms (3.209 ms / 100) Good 3.218 -> 2.765 (-14.08%) [ +0.12% +0.00% +0.47% / -14.08% -14.05% -14.01%] index_fill_ strided 3 : Elapsed 0.032 ms (3.222 ms / 100) Good 3.290 -> 2.871 (-12.74%) [ +0.46% +0.24% +0.00% / -12.74% -12.34% -12.34%] index_fill_ strided 5 : Elapsed 0.033 ms (3.305 ms / 100) Good 3.314 -> 2.979 (-10.11%) [ +0.00% +0.12% +0.18% / -9.84% -10.11% -10.11%] index_fill_ strided 7 : Elapsed 0.033 ms (3.314 ms / 100) good 3.515 -> 3.215 ( -8.53%) [ +0.00% +0.09% +0.09% / -8.53% -8.34% -8.36%] index_fill_ strided 8 : Elapsed 0.035 ms (3.515 ms / 100) good 3.301 -> 3.094 ( -6.27%) [ +0.27% +0.06% +0.00% / -6.12% -6.21% -6.27%] index_fill_ strided 16 : Elapsed 0.033 ms (3.310 ms / 100) Good 3.176 -> 2.779 (-12.50%) [ +0.16% +0.35% +0.00% / -12.25% -12.47% -12.50%] index_fill_ strided 64 : Elapsed 0.032 ms (3.181 ms / 100) 3.505 -> 3.392 ( -3.22%) [ +0.23% +0.20% +0.00% / -3.00% -3.22% -3.00%] index_fill_ strided 100 : Elapsed 0.035 ms (3.513 ms / 100) Good 3.181 -> 2.660 (-16.38%) [ +0.13% +0.41% +0.00% / -16.38% -15.94% -15.97%] index_fill_ strided 255 : Elapsed 0.032 ms (3.185 ms / 100) good 3.499 -> 3.265 ( -6.69%) [ +0.34% +0.29% +0.00% / -6.60% -6.66% -6.69%] index_fill_ random : Elapsed 0.035 ms (3.511 ms / 100) Good 3.376 -> 2.836 (-16.00%) [ +0.21% +0.00% +0.18% / -15.67% -16.00% -15.79%] index_fill_ random_sorted : Elapsed 0.034 ms (3.383 ms / 100) good 3.340 -> 3.062 ( -8.32%) [ +0.03% +0.06% +0.00% / -8.32% -8.26% -8.29%] index_fill_ perm : Elapsed 0.033 ms (3.341 ms / 100) Good 3.182 -> 2.648 (-16.78%) [ +0.16% +0.00% +0.22% / -16.37% -16.78% -16.72%] index_fill_ perm_sorted : Elapsed 0.032 ms (3.187 ms / 100) B = [256, 512] (stride (1, 256)) A = [255, 512] (stride (512, 1)) dim = 0 good 5.943 -> 5.519 ( -7.13%) [ +0.32% +0.13% +0.00% / -6.63% -7.13% -7.00%] index_add_ linear : Elapsed 0.060 ms (5.962 ms / 100) 5.371 -> 5.390 ( +0.35%) [ +0.58% +0.32% +0.00% / +0.71% +0.37% +0.35%] index_copy_ linear : Elapsed 0.054 ms (5.402 ms / 100) good 5.959 -> 5.509 ( -7.55%) [ +0.20% +0.18% +0.00% / -7.48% -7.35% -7.55%] index_add_ reverse : Elapsed 0.060 ms (5.971 ms / 100) 5.389 -> 5.378 ( -0.20%) [ +0.15% +0.09% +0.00% / -0.17% -0.13% -0.20%] index_copy_ reverse : Elapsed 0.054 ms (5.397 ms / 100) good 5.944 -> 5.498 ( -7.50%) [ +0.07% +0.27% +0.00% / -7.20% -7.50% -7.50%] index_add_ spread : Elapsed 0.059 ms (5.948 ms / 100) 5.380 -> 5.377 ( -0.06%) [ +0.19% +0.13% +0.00% / +0.06% -0.02% -0.06%] index_copy_ spread : Elapsed 0.054 ms (5.390 ms / 100) 5.774 -> 5.553 ( -3.83%) [ +0.50% +0.19% +0.00% / -3.83% -3.65% -3.52%] index_add_ strided 3 : Elapsed 0.058 ms (5.803 ms / 100) 5.456 -> 5.412 ( -0.81%) [ +0.16% +0.04% +0.00% / -0.81% -0.77% -0.75%] index_copy_ strided 3 : Elapsed 0.055 ms (5.465 ms / 100) 5.784 -> 5.634 ( -2.59%) [ +0.64% +0.21% +0.00% / -2.47% -2.58% -2.59%] index_add_ strided 5 : Elapsed 0.058 ms (5.821 ms / 100) 5.474 -> 5.449 ( -0.46%) [ +0.47% +0.26% +0.00% / -0.29% -0.42% -0.46%] index_copy_ strided 5 : Elapsed 0.055 ms (5.500 ms / 100) 5.737 -> 5.738 ( +0.02%) [ +0.00% +0.51% +0.16% / +0.02% +0.30% +0.35%] index_add_ strided 7 : Elapsed 0.057 ms (5.737 ms / 100) 5.519 -> 5.526 ( +0.13%) [ +0.02% +0.00% +0.20% / +0.13% +0.67% +0.85%] index_copy_ strided 7 : Elapsed 0.055 ms (5.520 ms / 100) good 5.952 -> 5.523 ( -7.21%) [ +0.49% +0.00% +0.12% / -7.19% -7.21% -7.09%] index_add_ strided 255 : Elapsed 0.060 ms (5.981 ms / 100) 5.390 -> 5.380 ( -0.19%) [ +0.48% +0.00% +0.11% / +0.24% -0.02% -0.19%] index_copy_ strided 255 : Elapsed 0.054 ms (5.416 ms / 100) 5.838 -> 5.804 ( -0.58%) [ +0.38% +0.00% +0.10% / -0.58% -0.38% -0.31%] index_add_ perm : Elapsed 0.059 ms (5.860 ms / 100) 5.554 -> 5.610 ( +1.01%) [ +0.29% +0.00% +0.22% / +1.01% +1.01% +1.21%] index_copy_ perm : Elapsed 0.056 ms (5.570 ms / 100) good 5.937 -> 5.516 ( -7.09%) [ +0.00% +0.24% +0.34% / -6.92% -6.99% -7.09%] index_add_ perm_sorted : Elapsed 0.059 ms (5.937 ms / 100) 5.390 -> 5.383 ( -0.13%) [ +0.11% +0.02% +0.00% / +0.26% -0.13% +0.09%] index_copy_ perm_sorted : Elapsed 0.054 ms (5.396 ms / 100) good 5.274 -> 4.968 ( -5.80%) [ +0.11% +0.23% +0.00% / -5.69% -5.80% -5.63%] index_select const : Elapsed 0.053 ms (5.280 ms / 100) 5.342 -> 5.316 ( -0.49%) [ +0.19% +0.00% +0.00% / -0.06% -0.49% -0.28%] index_select wrap : Elapsed 0.054 ms (5.352 ms / 100) 5.342 -> 5.314 ( -0.52%) [ +0.02% +0.00% +0.06% / -0.22% -0.28% -0.52%] index_select linear : Elapsed 0.053 ms (5.343 ms / 100) 5.350 -> 5.320 ( -0.56%) [ +0.00% +0.21% +0.02% / -0.56% -0.52% -0.52%] index_select reverse : Elapsed 0.054 ms (5.350 ms / 100) good 5.283 -> 4.961 ( -6.10%) [ +0.38% +0.00% +0.17% / -5.83% -6.10% -5.89%] index_select skip64 : Elapsed 0.053 ms (5.303 ms / 100) good 5.292 -> 4.971 ( -6.07%) [ +0.25% +0.00% +0.19% / -6.07% -5.93% -6.05%] index_select skip256 : Elapsed 0.053 ms (5.305 ms / 100) 5.347 -> 5.330 ( -0.32%) [ +0.00% +0.11% +0.19% / +0.26% -0.11% -0.32%] index_select spread : Elapsed 0.053 ms (5.347 ms / 100) 5.368 -> 5.237 ( -2.44%) [ +0.20% +0.02% +0.00% / -2.16% -2.42% -2.44%] index_select strided 3 : Elapsed 0.054 ms (5.379 ms / 100) 5.342 -> 5.193 ( -2.79%) [ +0.19% +0.00% +0.13% / -2.49% -2.79% -2.71%] index_select strided 5 : Elapsed 0.054 ms (5.352 ms / 100) 5.342 -> 5.396 ( +1.01%) [ +0.04% +0.02% +0.00% / +1.01% +1.14% +1.05%] index_select strided 7 : Elapsed 0.053 ms (5.344 ms / 100) 5.346 -> 5.315 ( -0.58%) [ +0.26% +0.00% +0.19% / -0.21% -0.58% -0.41%] index_select strided 8 : Elapsed 0.054 ms (5.360 ms / 100) 5.351 -> 5.403 ( +0.97%) [ +0.09% +0.17% +0.00% / +0.99% +0.97% +1.36%] index_select strided 16 : Elapsed 0.054 ms (5.356 ms / 100) 5.359 -> 5.397 ( +0.71%) [ +0.02% +0.00% +0.07% / +0.80% +0.71% +0.78%] index_select strided 64 : Elapsed 0.054 ms (5.360 ms / 100) 5.327 -> 5.210 ( -2.20%) [ +0.26% +0.17% +0.00% / -2.18% -2.08% -2.20%] index_select strided 100 : Elapsed 0.053 ms (5.341 ms / 100) 5.322 -> 5.271 ( -0.96%) [ +0.00% +0.11% +0.09% / -0.30% -0.68% -0.96%] index_select random : Elapsed 0.053 ms (5.322 ms / 100) 5.334 -> 5.219 ( -2.16%) [ +0.21% +0.13% +0.00% / -2.16% -2.06% -1.86%] index_select random_sorted : Elapsed 0.053 ms (5.345 ms / 100) B = [256, 512] (stride (1, 256)) A = [255, 512] (stride (1, 255)) dim = 0 Good 6.310 -> 5.369 (-14.91%) [ +0.00% +0.52% +0.52% / -14.91% -14.53% -14.44%] index_add_ linear : Elapsed 0.063 ms (6.310 ms / 100) Good 5.800 -> 5.208 (-10.21%) [ +0.00% +0.31% +0.16% / -10.21% -10.10% -10.19%] index_copy_ linear : Elapsed 0.058 ms (5.800 ms / 100) Good 6.372 -> 5.375 (-15.65%) [ +0.00% +0.22% +0.03% / -15.41% -15.57% -15.65%] index_add_ reverse : Elapsed 0.064 ms (6.372 ms / 100) Good 5.789 -> 5.198 (-10.21%) [ +0.07% +0.00% +0.03% / -9.64% -10.16% -10.21%] index_copy_ reverse : Elapsed 0.058 ms (5.793 ms / 100) Good 6.339 -> 5.395 (-14.89%) [ +0.00% +0.14% +0.24% / -14.78% -14.83% -14.89%] index_add_ spread : Elapsed 0.063 ms (6.339 ms / 100) Good 5.809 -> 5.213 (-10.26%) [ +0.17% +0.02% +0.00% / -9.90% -10.19% -10.26%] index_copy_ spread : Elapsed 0.058 ms (5.819 ms / 100) Good 6.419 -> 5.415 (-15.64%) [ +0.00% +0.39% +0.09% / -15.64% -15.44% -15.35%] index_add_ strided 3 : Elapsed 0.064 ms (6.419 ms / 100) Good 5.977 -> 5.216 (-12.73%) [ +0.28% +0.28% +0.00% / -12.73% -12.45% -12.36%] index_copy_ strided 3 : Elapsed 0.060 ms (5.994 ms / 100) Good 6.384 -> 5.465 (-14.40%) [ +0.34% +0.00% +0.50% / -14.40% -14.14% -14.40%] index_add_ strided 5 : Elapsed 0.064 ms (6.406 ms / 100) Good 5.995 -> 5.226 (-12.83%) [ +0.50% +0.00% +0.22% / -12.68% -12.76% -12.83%] index_copy_ strided 5 : Elapsed 0.060 ms (6.025 ms / 100) Good 6.208 -> 5.550 (-10.60%) [ +0.00% +0.11% +0.27% / -10.60% -10.50% -10.55%] index_add_ strided 7 : Elapsed 0.062 ms (6.208 ms / 100) Good 5.993 -> 5.263 (-12.18%) [ +0.00% +0.13% +0.35% / -12.16% -12.18% -12.10%] index_copy_ strided 7 : Elapsed 0.060 ms (5.993 ms / 100) Good 6.360 -> 5.384 (-15.35%) [ +0.22% +0.22% +0.00% / -15.06% -15.35% -15.35%] index_add_ strided 255 : Elapsed 0.064 ms (6.374 ms / 100) Good 5.816 -> 5.205 (-10.51%) [ +0.15% +0.14% +0.00% / -10.37% -10.51% -10.49%] index_copy_ strided 255 : Elapsed 0.058 ms (5.825 ms / 100) Good 6.371 -> 5.608 (-11.98%) [ +0.14% +0.00% +0.58% / -11.98% -11.66% -11.68%] index_add_ perm : Elapsed 0.064 ms (6.380 ms / 100) Good 6.014 -> 5.285 (-12.12%) [ +0.02% +0.00% +0.32% / -12.12% -11.84% -11.69%] index_copy_ perm : Elapsed 0.060 ms (6.015 ms / 100) Good 6.357 -> 5.400 (-15.05%) [ +0.00% +0.22% +0.02% / -15.05% -14.83% -14.93%] index_add_ perm_sorted : Elapsed 0.064 ms (6.357 ms / 100) Good 5.804 -> 5.223 (-10.01%) [ +0.07% +0.03% +0.00% / -10.01% -9.82% -9.82%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.808 ms / 100) Good 5.662 -> 4.970 (-12.22%) [ +0.09% +0.05% +0.00% / -11.96% -11.82% -12.22%] index_select const : Elapsed 0.057 ms (5.667 ms / 100) good 5.764 -> 5.222 ( -9.40%) [ +0.09% +0.00% +0.02% / -9.16% -9.16% -9.40%] index_select wrap : Elapsed 0.058 ms (5.769 ms / 100) good 5.778 -> 5.226 ( -9.55%) [ +0.03% +0.14% +0.00% / -9.14% -9.55% -9.40%] index_select linear : Elapsed 0.058 ms (5.780 ms / 100) good 5.768 -> 5.221 ( -9.48%) [ +0.02% +0.00% +0.12% / -9.48% -9.26% -9.26%] index_select reverse : Elapsed 0.058 ms (5.769 ms / 100) good 5.635 -> 5.081 ( -9.83%) [ +0.20% +0.02% +0.00% / -9.69% -9.71% -9.83%] index_select skip64 : Elapsed 0.056 ms (5.646 ms / 100) Good 5.669 -> 4.981 (-12.14%) [ +0.14% +0.16% +0.00% / -12.07% -12.10% -12.14%] index_select skip256 : Elapsed 0.057 ms (5.677 ms / 100) good 5.742 -> 5.213 ( -9.21%) [ +0.05% +0.00% +0.23% / -8.83% -9.21% -8.92%] index_select spread : Elapsed 0.057 ms (5.745 ms / 100) Good 6.076 -> 5.206 (-14.32%) [ +0.23% +0.00% +0.16% / -13.99% -14.32% -14.29%] index_select strided 3 : Elapsed 0.061 ms (6.090 ms / 100) Good 6.101 -> 5.203 (-14.72%) [ +0.00% +0.10% +0.11% / -14.47% -14.64% -14.72%] index_select strided 5 : Elapsed 0.061 ms (6.101 ms / 100) Good 6.104 -> 5.210 (-14.65%) [ +0.00% +0.11% +0.03% / -14.27% -14.61% -14.65%] index_select strided 7 : Elapsed 0.061 ms (6.104 ms / 100) Good 6.095 -> 5.189 (-14.86%) [ +0.00% +0.07% +0.07% / -14.86% -14.86% -14.50%] index_select strided 8 : Elapsed 0.061 ms (6.095 ms / 100) Good 6.035 -> 5.209 (-13.69%) [ +0.00% +0.25% +0.03% / -13.37% -13.44% -13.69%] index_select strided 16 : Elapsed 0.060 ms (6.035 ms / 100) Good 5.905 -> 5.258 (-10.96%) [ +0.08% +0.07% +0.00% / -10.52% -10.86% -10.96%] index_select strided 64 : Elapsed 0.059 ms (5.910 ms / 100) Good 6.097 -> 5.229 (-14.24%) [ +0.00% +0.00% +0.03% / -14.09% -13.92% -14.24%] index_select strided 100 : Elapsed 0.061 ms (6.097 ms / 100) Good 6.083 -> 5.253 (-13.64%) [ +0.00% +0.44% +0.15% / -13.61% -13.64% -13.58%] index_select random : Elapsed 0.061 ms (6.083 ms / 100) good 5.785 -> 5.237 ( -9.47%) [ +0.00% +0.19% +0.35% / -9.40% -9.47% -9.37%] index_select random_sorted : Elapsed 0.058 ms (5.785 ms / 100) out_shape = [255, 256] in_shape = [255, 512] idx_dim = 1 B = [255, 256] (stride (256, 1)) dim = 1 fill_cnt = 512 Good 3.150 -> 2.572 (-18.35%) [ +0.22% +0.00% +0.13% / -18.13% -18.35% -18.29%] index_fill_ const : Elapsed 0.032 ms (3.157 ms / 100) Good 3.165 -> 2.598 (-17.91%) [ +0.28% +0.00% +0.16% / -17.91% -17.88% -17.73%] index_fill_ linear : Elapsed 0.032 ms (3.174 ms / 100) Good 3.126 -> 2.604 (-16.70%) [ +0.19% +0.03% +0.00% / -16.67% -16.70% -16.70%] index_fill_ reverse : Elapsed 0.031 ms (3.132 ms / 100) Good 3.134 -> 2.566 (-18.12%) [ +0.10% +0.00% +0.16% / -17.65% -18.12% -18.12%] index_fill_ skip64 : Elapsed 0.031 ms (3.137 ms / 100) Good 3.137 -> 2.576 (-17.88%) [ +0.26% +0.26% +0.00% / -17.88% -17.85% -17.79%] index_fill_ skip256 : Elapsed 0.031 ms (3.145 ms / 100) Good 3.137 -> 2.592 (-17.37%) [ +0.03% +0.16% +0.00% / -17.25% -17.34% -17.37%] index_fill_ spread : Elapsed 0.031 ms (3.138 ms / 100) Good 3.191 -> 2.656 (-16.77%) [ +0.09% +0.16% +0.00% / -16.14% -16.77% -16.67%] index_fill_ strided 3 : Elapsed 0.032 ms (3.194 ms / 100) Good 3.190 -> 2.754 (-13.67%) [ +0.16% +0.00% +0.34% / -13.32% -13.67% -13.57%] index_fill_ strided 5 : Elapsed 0.032 ms (3.195 ms / 100) good 3.195 -> 2.879 ( -9.89%) [ +0.25% +0.00% +0.03% / -9.67% -9.58% -9.89%] index_fill_ strided 7 : Elapsed 0.032 ms (3.203 ms / 100) good 3.290 -> 2.965 ( -9.88%) [ +0.09% +0.33% +0.00% / -9.79% -9.60% -9.88%] index_fill_ strided 8 : Elapsed 0.033 ms (3.293 ms / 100) good 3.191 -> 2.958 ( -7.30%) [ +0.16% +0.09% +0.00% / -7.11% -7.30% -6.96%] index_fill_ strided 16 : Elapsed 0.032 ms (3.196 ms / 100) Good 3.121 -> 2.730 (-12.53%) [ +0.38% +0.00% +0.35% / -12.46% -12.50% -12.53%] index_fill_ strided 64 : Elapsed 0.031 ms (3.133 ms / 100) 3.290 -> 3.236 ( -1.64%) [ +0.21% +0.00% +0.12% / -1.64% -1.46% -1.34%] index_fill_ strided 100 : Elapsed 0.033 ms (3.297 ms / 100) Good 3.147 -> 2.601 (-17.35%) [ +0.06% +0.06% +0.00% / -17.22% -17.32% -17.35%] index_fill_ strided 255 : Elapsed 0.031 ms (3.149 ms / 100) good 3.284 -> 3.067 ( -6.61%) [ +0.15% +0.03% +0.00% / -6.61% -6.33% -6.43%] index_fill_ random : Elapsed 0.033 ms (3.289 ms / 100) Good 3.219 -> 2.690 (-16.43%) [ +0.00% +0.09% +0.06% / -16.34% -16.37% -16.43%] index_fill_ random_sorted : Elapsed 0.032 ms (3.219 ms / 100) B = [255, 256] (stride (256, 1)) A = [255, 512] (stride (512, 1)) dim = 1 good 4.942 -> 4.643 ( -6.05%) [ +0.04% +0.04% +0.00% / -5.93% -6.05% -5.91%] index_select const : Elapsed 0.049 ms (4.944 ms / 100) good 5.036 -> 4.780 ( -5.08%) [ +0.24% +0.06% +0.00% / -5.08% -4.86% -4.75%] index_select wrap : Elapsed 0.050 ms (5.048 ms / 100) good 5.056 -> 4.794 ( -5.18%) [ +0.06% +0.22% +0.00% / -4.79% -4.92% -5.18%] index_select linear : Elapsed 0.051 ms (5.059 ms / 100) 5.040 -> 4.795 ( -4.86%) [ +0.00% +0.00% +0.10% / -4.76% -4.86% -4.44%] index_select reverse : Elapsed 0.050 ms (5.040 ms / 100) good 4.977 -> 4.714 ( -5.28%) [ +0.16% +0.10% +0.00% / -5.06% -5.28% -5.14%] index_select skip64 : Elapsed 0.050 ms (4.985 ms / 100) good 4.928 -> 4.651 ( -5.62%) [ +0.37% +0.12% +0.00% / -5.52% -5.62% -5.40%] index_select skip256 : Elapsed 0.049 ms (4.946 ms / 100) good 5.172 -> 4.895 ( -5.36%) [ +0.00% +0.41% +0.12% / -5.34% -5.24% -5.36%] index_select spread : Elapsed 0.052 ms (5.172 ms / 100) good 5.281 -> 4.907 ( -7.08%) [ +0.13% +0.09% +0.00% / -7.08% -6.53% -6.46%] index_select strided 3 : Elapsed 0.053 ms (5.288 ms / 100) good 5.351 -> 4.922 ( -8.02%) [ +0.09% +0.11% +0.00% / -8.02% -7.91% -8.02%] index_select strided 5 : Elapsed 0.054 ms (5.356 ms / 100) good 5.384 -> 4.909 ( -8.82%) [ +0.00% +0.30% +0.59% / -8.82% -8.28% -8.41%] index_select strided 7 : Elapsed 0.054 ms (5.384 ms / 100) good 5.409 -> 4.919 ( -9.06%) [ +0.00% +0.11% +0.07% / -9.00% -8.80% -9.06%] index_select strided 8 : Elapsed 0.054 ms (5.409 ms / 100) good 5.245 -> 4.830 ( -7.91%) [ +0.00% +0.10% +0.02% / -7.91% -7.68% -7.40%] index_select strided 16 : Elapsed 0.052 ms (5.245 ms / 100) good 5.030 -> 4.736 ( -5.84%) [ +0.14% +0.00% +0.16% / -5.75% -5.84% -5.73%] index_select strided 64 : Elapsed 0.050 ms (5.037 ms / 100) good 5.403 -> 4.965 ( -8.11%) [ +0.00% +0.19% +0.26% / -7.81% -8.11% -8.03%] index_select strided 100 : Elapsed 0.054 ms (5.403 ms / 100) good 5.243 -> 4.915 ( -6.26%) [ +0.02% +0.00% +0.15% / -6.26% -5.86% -5.26%] index_select strided 255 : Elapsed 0.052 ms (5.244 ms / 100) good 4.947 -> 4.663 ( -5.74%) [ +0.00% +0.12% +0.12% / -5.74% -5.48% -5.54%] index_select strided 256 : Elapsed 0.049 ms (4.947 ms / 100) good 5.227 -> 4.903 ( -6.20%) [ +0.38% +0.00% +0.27% / -6.20% -6.10% -6.14%] index_select strided 257 : Elapsed 0.052 ms (5.247 ms / 100) good 5.378 -> 4.977 ( -7.46%) [ +0.00% +0.20% +0.00% / -7.46% -6.95% -6.88%] index_select random : Elapsed 0.054 ms (5.378 ms / 100) 5.159 -> 4.918 ( -4.67%) [ +0.23% +0.00% +0.25% / -4.67% -4.52% -4.30%] index_select random_sorted : Elapsed 0.052 ms (5.171 ms / 100) good 5.404 -> 4.997 ( -7.53%) [ +0.00% +0.09% +0.09% / -7.31% -7.44% -7.53%] index_select perm : Elapsed 0.054 ms (5.404 ms / 100) good 5.192 -> 4.928 ( -5.08%) [ +0.15% +0.02% +0.00% / -5.08% -4.89% -4.93%] index_select perm_sorted : Elapsed 0.052 ms (5.200 ms / 100) B = [255, 256] (stride (256, 1)) A = [255, 512] (stride (1, 255)) dim = 1 4.807 -> 4.639 ( -3.49%) [ +0.27% +0.00% +0.37% / -3.49% -3.33% -3.31%] index_select const : Elapsed 0.048 ms (4.820 ms / 100) 4.841 -> 4.889 ( +0.99%) [ +0.12% +0.00% +0.02% / +1.26% +1.30% +0.99%] index_select wrap : Elapsed 0.048 ms (4.847 ms / 100) 4.842 -> 4.895 ( +1.09%) [ +0.02% +0.04% +0.00% / +1.20% +1.09% +1.14%] index_select linear : Elapsed 0.048 ms (4.843 ms / 100) 4.872 -> 4.898 ( +0.53%) [ +0.14% +0.00% +0.12% / +0.53% +0.57% +0.92%] index_select reverse : Elapsed 0.049 ms (4.879 ms / 100) 4.807 -> 4.657 ( -3.12%) [ +0.19% +0.00% +0.33% / -2.79% -3.12% -3.12%] index_select skip64 : Elapsed 0.048 ms (4.816 ms / 100) 4.821 -> 4.647 ( -3.61%) [ +0.08% +0.00% +0.10% / -3.30% -3.38% -3.61%] index_select skip256 : Elapsed 0.048 ms (4.825 ms / 100) 4.895 -> 4.927 ( +0.65%) [ +0.20% +0.04% +0.00% / +0.69% +0.80% +0.65%] index_select spread : Elapsed 0.049 ms (4.905 ms / 100) 4.862 -> 4.963 ( +2.08%) [ +0.27% +0.00% +0.23% / +2.08% +2.22% +2.45%] index_select strided 3 : Elapsed 0.049 ms (4.875 ms / 100) 4.882 -> 4.964 ( +1.68%) [ +0.10% +0.00% +0.20% / +1.68% +2.19% +2.03%] index_select strided 5 : Elapsed 0.049 ms (4.887 ms / 100) 4.861 -> 4.941 ( +1.65%) [ +0.02% +0.08% +0.00% / +1.65% +2.53% +2.49%] index_select strided 7 : Elapsed 0.049 ms (4.862 ms / 100) 4.823 -> 4.740 ( -1.72%) [ +0.06% +0.02% +0.00% / -1.72% -1.60% -1.68%] index_select strided 8 : Elapsed 0.048 ms (4.826 ms / 100) 4.795 -> 4.712 ( -1.73%) [ +0.00% +0.17% +0.17% / -1.73% -1.69% -1.65%] index_select strided 16 : Elapsed 0.048 ms (4.795 ms / 100) 4.809 -> 4.675 ( -2.79%) [ +0.25% +0.29% +0.00% / -2.22% -2.79% -2.72%] index_select strided 64 : Elapsed 0.048 ms (4.821 ms / 100) 4.831 -> 4.812 ( -0.39%) [ +0.10% +0.00% +0.17% / -0.39% -0.06% -0.06%] index_select strided 100 : Elapsed 0.048 ms (4.836 ms / 100) 4.863 -> 4.922 ( +1.21%) [ +0.41% +0.00% +0.41% / +1.21% +1.60% +1.50%] index_select strided 255 : Elapsed 0.049 ms (4.883 ms / 100) 4.818 -> 4.647 ( -3.55%) [ +0.02% +0.00% +0.31% / -3.20% -3.55% -3.53%] index_select strided 256 : Elapsed 0.048 ms (4.819 ms / 100) 4.850 -> 4.895 ( +0.93%) [ +0.16% +0.10% +0.00% / +0.93% +1.61% +1.30%] index_select strided 257 : Elapsed 0.049 ms (4.858 ms / 100) 4.861 -> 4.917 ( +1.15%) [ +0.29% +0.00% +0.10% / +1.34% +1.15% +1.15%] index_select random : Elapsed 0.049 ms (4.875 ms / 100) 4.860 -> 4.894 ( +0.70%) [ +0.12% +0.00% +0.14% / +0.82% +0.70% +1.03%] index_select random_sorted : Elapsed 0.049 ms (4.866 ms / 100) 4.866 -> 4.932 ( +1.36%) [ +0.16% +0.10% +0.00% / +1.36% +2.14% +2.26%] index_select perm : Elapsed 0.049 ms (4.874 ms / 100) 4.855 -> 4.930 ( +1.54%) [ +0.00% +0.35% +0.23% / +1.54% +2.33% +2.18%] index_select perm_sorted : Elapsed 0.049 ms (4.855 ms / 100) B = [255, 256] (stride (1, 255)) dim = 1 fill_cnt = 512 2.875 -> 2.882 ( +0.24%) [ +0.21% +0.00% +0.03% / +0.24% +0.49% +0.63%] index_fill_ const : Elapsed 0.029 ms (2.881 ms / 100) 2.738 -> 2.724 ( -0.51%) [ +0.04% +0.00% +0.37% / -0.51% -0.07% -0.22%] index_fill_ linear : Elapsed 0.027 ms (2.739 ms / 100) 2.676 -> 2.665 ( -0.41%) [ +0.00% +0.22% +0.34% / -0.41% +0.26% +0.60%] index_fill_ reverse : Elapsed 0.027 ms (2.676 ms / 100) 2.773 -> 2.772 ( -0.04%) [ +0.00% +0.07% +0.11% / -0.04% +0.29% +0.47%] index_fill_ skip64 : Elapsed 0.028 ms (2.773 ms / 100) 2.830 -> 2.843 ( +0.46%) [ +0.57% +0.00% +0.11% / +1.48% +0.49% +0.46%] index_fill_ skip256 : Elapsed 0.028 ms (2.846 ms / 100) 2.607 -> 2.605 ( -0.08%) [ +0.38% +0.00% +0.04% / -0.08% +0.12% -0.04%] index_fill_ spread : Elapsed 0.026 ms (2.617 ms / 100) 2.613 -> 2.606 ( -0.27%) [ +0.15% +0.08% +0.00% / -0.27% +0.00% -0.11%] index_fill_ strided 3 : Elapsed 0.026 ms (2.617 ms / 100) 2.626 -> 2.616 ( -0.38%) [ +0.19% +0.00% +0.00% / +0.00% -0.04% -0.38%] index_fill_ strided 5 : Elapsed 0.026 ms (2.631 ms / 100) 2.627 -> 2.601 ( -0.99%) [ +0.00% +0.04% +0.00% / -0.11% -0.88% -0.99%] index_fill_ strided 7 : Elapsed 0.026 ms (2.627 ms / 100) 2.599 -> 2.591 ( -0.31%) [ +0.23% +0.15% +0.00% / -0.31% -0.04% -0.12%] index_fill_ strided 8 : Elapsed 0.026 ms (2.605 ms / 100) 2.593 -> 2.584 ( -0.35%) [ +0.15% +0.00% +0.08% / +0.35% -0.35% -0.35%] index_fill_ strided 16 : Elapsed 0.026 ms (2.597 ms / 100) 2.601 -> 2.608 ( +0.27%) [ +0.46% +0.31% +0.00% / +0.27% +0.35% +0.31%] index_fill_ strided 64 : Elapsed 0.026 ms (2.613 ms / 100) 2.606 -> 2.591 ( -0.58%) [ +0.00% +0.00% +0.04% / -0.58% +0.00% +0.08%] index_fill_ strided 100 : Elapsed 0.026 ms (2.606 ms / 100) 2.638 -> 2.605 ( -1.25%) [ +0.27% +0.00% +0.42% / -0.57% -1.21% -1.25%] index_fill_ strided 255 : Elapsed 0.026 ms (2.645 ms / 100) 2.607 -> 2.594 ( -0.50%) [ +0.50% +0.73% +0.00% / +0.35% -0.50% +0.04%] index_fill_ random : Elapsed 0.026 ms (2.620 ms / 100) 2.602 -> 2.612 ( +0.38%) [ +0.00% +0.27% +0.19% / +0.38% +0.69% +0.88%] index_fill_ random_sorted : Elapsed 0.026 ms (2.602 ms / 100) B = [255, 256] (stride (1, 255)) A = [255, 512] (stride (512, 1)) dim = 1 4.734 -> 4.728 ( -0.13%) [ +0.11% +0.06% +0.00% / +0.13% -0.13% +0.13%] index_select const : Elapsed 0.047 ms (4.739 ms / 100) 4.933 -> 4.930 ( -0.06%) [ +0.83% +0.10% +0.00% / +0.39% +0.04% -0.06%] index_select wrap : Elapsed 0.050 ms (4.974 ms / 100) 4.920 -> 4.927 ( +0.14%) [ +0.33% +0.02% +0.00% / +0.14% +0.39% +0.35%] index_select linear : Elapsed 0.049 ms (4.936 ms / 100) 4.917 -> 4.921 ( +0.08%) [ +0.31% +0.00% +0.08% / +0.12% +0.08% +0.14%] index_select reverse : Elapsed 0.049 ms (4.932 ms / 100) 4.753 -> 4.747 ( -0.13%) [ +0.06% +0.00% +0.06% / +0.04% -0.13% -0.02%] index_select skip64 : Elapsed 0.048 ms (4.756 ms / 100) 4.692 -> 4.709 ( +0.36%) [ +0.00% +0.19% +0.40% / +0.36% +0.43% +0.40%] index_select skip256 : Elapsed 0.047 ms (4.692 ms / 100) 5.110 -> 5.112 ( +0.04%) [ +0.12% +0.00% +0.31% / +0.14% +0.04% +0.29%] index_select spread : Elapsed 0.051 ms (5.116 ms / 100) 5.255 -> 5.265 ( +0.19%) [ +0.13% +0.06% +0.00% / +0.19% +0.48% +0.46%] index_select strided 3 : Elapsed 0.053 ms (5.262 ms / 100) 5.292 -> 5.279 ( -0.25%) [ +0.00% +0.02% +0.04% / -0.17% -0.25% +0.02%] index_select strided 5 : Elapsed 0.053 ms (5.292 ms / 100) 5.295 -> 5.294 ( -0.02%) [ +0.11% +0.00% +0.32% / +0.13% +0.00% -0.02%] index_select strided 7 : Elapsed 0.053 ms (5.301 ms / 100) 5.275 -> 5.266 ( -0.17%) [ +0.00% +0.06% +0.15% / +0.08% +0.23% -0.17%] index_select strided 8 : Elapsed 0.053 ms (5.275 ms / 100) 5.139 -> 5.130 ( -0.18%) [ +0.00% +0.08% +0.16% / -0.18% +0.00% +0.06%] index_select strided 16 : Elapsed 0.051 ms (5.139 ms / 100) 4.850 -> 4.837 ( -0.27%) [ +0.00% +0.10% +0.00% / +0.06% -0.27% +0.00%] index_select strided 64 : Elapsed 0.049 ms (4.850 ms / 100) 5.286 -> 5.287 ( +0.02%) [ +0.30% +0.38% +0.00% / +0.02% +0.15% +0.23%] index_select strided 100 : Elapsed 0.053 ms (5.302 ms / 100) 5.174 -> 5.162 ( -0.23%) [ +0.23% +0.00% +0.06% / -0.17% -0.23% -0.23%] index_select strided 255 : Elapsed 0.052 ms (5.186 ms / 100) 4.726 -> 4.730 ( +0.08%) [ +0.28% +0.00% +0.11% / +0.08% +0.36% +0.59%] index_select strided 256 : Elapsed 0.047 ms (4.739 ms / 100) 5.164 -> 5.165 ( +0.02%) [ +0.08% +0.00% +0.04% / +0.02% +0.02% +0.23%] index_select strided 257 : Elapsed 0.052 ms (5.168 ms / 100) 5.302 -> 5.303 ( +0.02%) [ +0.00% +0.25% +0.51% / +0.30% +0.02% +0.11%] index_select random : Elapsed 0.053 ms (5.302 ms / 100) 5.129 -> 5.130 ( +0.02%) [ +0.18% +0.14% +0.00% / +0.02% +0.39% +0.58%] index_select random_sorted : Elapsed 0.051 ms (5.138 ms / 100) 5.305 -> 5.295 ( -0.19%) [ +0.00% +0.02% +0.17% / +0.28% -0.13% -0.19%] index_select perm : Elapsed 0.053 ms (5.305 ms / 100) 5.109 -> 5.114 ( +0.10%) [ +0.12% +0.08% +0.00% / +0.10% +0.35% +0.49%] index_select perm_sorted : Elapsed 0.051 ms (5.115 ms / 100) B = [255, 256] (stride (1, 255)) A = [255, 512] (stride (1, 255)) dim = 1 4.682 -> 4.633 ( -1.05%) [ +0.06% +0.00% +0.11% / -0.13% -1.05% -0.70%] index_select const : Elapsed 0.047 ms (4.685 ms / 100) 4.774 -> 4.784 ( +0.21%) [ +0.36% +0.21% +0.00% / +0.21% +1.19% +1.32%] index_select wrap : Elapsed 0.048 ms (4.791 ms / 100) 4.777 -> 4.777 ( +0.00%) [ +0.10% +0.13% +0.00% / +0.15% +0.00% +0.06%] index_select linear : Elapsed 0.048 ms (4.782 ms / 100) 4.806 -> 4.804 ( -0.04%) [ +0.00% +0.08% +0.25% / -0.02% -0.04% +0.04%] index_select reverse : Elapsed 0.048 ms (4.806 ms / 100) 4.655 -> 4.654 ( -0.02%) [ +0.00% +0.21% +0.13% / +0.32% -0.02% -0.02%] index_select skip64 : Elapsed 0.047 ms (4.655 ms / 100) 4.639 -> 4.647 ( +0.17%) [ +0.04% +0.17% +0.00% / +0.17% +0.17% +0.17%] index_select skip256 : Elapsed 0.046 ms (4.641 ms / 100) 4.813 -> 4.815 ( +0.04%) [ +0.12% +0.04% +0.00% / +0.04% +0.75% +0.83%] index_select spread : Elapsed 0.048 ms (4.819 ms / 100) 4.864 -> 4.828 ( -0.74%) [ +0.00% +0.08% +0.02% / -0.12% -0.72% -0.74%] index_select strided 3 : Elapsed 0.049 ms (4.864 ms / 100) 4.807 -> 4.796 ( -0.23%) [ +0.00% +0.00% +0.23% / -0.23% +0.10% +0.06%] index_select strided 5 : Elapsed 0.048 ms (4.807 ms / 100) 4.816 -> 4.799 ( -0.35%) [ +0.00% +0.02% +0.12% / -0.02% -0.35% -0.17%] index_select strided 7 : Elapsed 0.048 ms (4.816 ms / 100) 4.688 -> 4.692 ( +0.09%) [ +0.00% +0.17% +0.11% / +0.30% +0.26% +0.09%] index_select strided 8 : Elapsed 0.047 ms (4.688 ms / 100) 4.679 -> 4.681 ( +0.04%) [ +0.32% +0.00% +0.13% / +0.15% +0.04% +0.53%] index_select strided 16 : Elapsed 0.047 ms (4.694 ms / 100) 4.655 -> 4.644 ( -0.24%) [ +0.00% +0.06% +0.28% / +0.13% -0.24% +0.02%] index_select strided 64 : Elapsed 0.047 ms (4.655 ms / 100) 4.746 -> 4.735 ( -0.23%) [ +0.00% +0.00% +0.00% / -0.23% +0.13% +0.23%] index_select strided 100 : Elapsed 0.047 ms (4.746 ms / 100) 4.814 -> 4.822 ( +0.17%) [ +0.00% +0.08% +0.21% / +0.17% +0.81% +1.00%] index_select strided 255 : Elapsed 0.048 ms (4.814 ms / 100) 4.684 -> 4.638 ( -0.98%) [ +0.00% +0.21% +0.00% / +0.09% -0.98% -0.98%] index_select strided 256 : Elapsed 0.047 ms (4.684 ms / 100) 4.772 -> 4.766 ( -0.13%) [ +0.00% +0.19% +0.23% / -0.13% +0.57% +0.63%] index_select strided 257 : Elapsed 0.048 ms (4.772 ms / 100) 4.779 -> 4.774 ( -0.10%) [ +0.00% +0.06% +0.06% / +0.29% -0.10% +0.21%] index_select random : Elapsed 0.048 ms (4.779 ms / 100) 4.773 -> 4.785 ( +0.25%) [ +0.00% +0.19% +0.04% / +0.25% +0.40% +0.27%] index_select random_sorted : Elapsed 0.048 ms (4.773 ms / 100) 4.808 -> 4.814 ( +0.12%) [ +0.00% +0.08% +0.04% / +0.12% +0.12% +0.37%] index_select perm : Elapsed 0.048 ms (4.808 ms / 100) 4.805 -> 4.816 ( +0.23%) [ +0.29% +0.00% +0.35% / +0.31% +0.31% +0.23%] index_select perm_sorted : Elapsed 0.048 ms (4.819 ms / 100) out_shape = [256, 255] in_shape = [512, 255] idx_dim = 0 B = [256, 255] (stride (255, 1)) dim = 0 fill_cnt = 512 2.627 -> 2.625 ( -0.08%) [ +0.34% +0.00% +0.38% / -0.08% +0.11% +0.11%] index_fill_ const : Elapsed 0.026 ms (2.636 ms / 100) 2.655 -> 2.626 ( -1.09%) [ +0.26% +0.00% +0.15% / -0.64% -1.09% -0.72%] index_fill_ linear : Elapsed 0.027 ms (2.662 ms / 100) 2.584 -> 2.572 ( -0.46%) [ +0.35% +0.00% +0.31% / +0.04% -0.43% -0.46%] index_fill_ reverse : Elapsed 0.026 ms (2.593 ms / 100) 2.756 -> 2.761 ( +0.18%) [ +0.00% +0.29% +0.18% / +0.18% +0.47% +0.83%] index_fill_ skip64 : Elapsed 0.028 ms (2.756 ms / 100) 2.630 -> 2.620 ( -0.38%) [ +0.00% +0.04% +0.11% / -0.38% +0.08% +0.15%] index_fill_ skip256 : Elapsed 0.026 ms (2.630 ms / 100) 2.623 -> 2.596 ( -1.03%) [ +0.46% +0.00% +0.19% / -0.50% -0.99% -1.03%] index_fill_ spread : Elapsed 0.026 ms (2.635 ms / 100) 2.616 -> 2.608 ( -0.31%) [ +0.19% +0.00% +0.38% / -0.31% +0.11% -0.15%] index_fill_ strided 3 : Elapsed 0.026 ms (2.621 ms / 100) 2.624 -> 2.607 ( -0.65%) [ +0.15% +0.00% +0.46% / +0.19% -0.65% -0.50%] index_fill_ strided 5 : Elapsed 0.026 ms (2.628 ms / 100) 2.628 -> 2.616 ( -0.46%) [ +0.00% +0.00% +0.27% / -0.30% -0.46% -0.04%] index_fill_ strided 7 : Elapsed 0.026 ms (2.628 ms / 100) 2.595 -> 2.585 ( -0.39%) [ +0.08% +0.00% +0.15% / -0.39% -0.27% -0.04%] index_fill_ strided 8 : Elapsed 0.026 ms (2.597 ms / 100) 2.587 -> 2.586 ( -0.04%) [ +0.19% +0.08% +0.00% / -0.04% +0.04% +0.00%] index_fill_ strided 16 : Elapsed 0.026 ms (2.592 ms / 100) 2.609 -> 2.611 ( +0.08%) [ +0.19% +0.00% +0.31% / +0.38% +0.19% +0.08%] index_fill_ strided 64 : Elapsed 0.026 ms (2.614 ms / 100) 2.611 -> 2.594 ( -0.65%) [ +0.15% +0.04% +0.00% / -0.27% -0.65% -0.61%] index_fill_ strided 100 : Elapsed 0.026 ms (2.615 ms / 100) 2.611 -> 2.600 ( -0.42%) [ +0.42% +0.00% +0.46% / -0.42% +0.38% +0.34%] index_fill_ strided 255 : Elapsed 0.026 ms (2.622 ms / 100) 2.633 -> 2.601 ( -1.22%) [ +0.15% +0.11% +0.00% / -0.84% -1.22% -1.14%] index_fill_ random : Elapsed 0.026 ms (2.637 ms / 100) 2.609 -> 2.605 ( -0.15%) [ +0.00% +0.00% +0.00% / +0.04% +0.31% -0.15%] index_fill_ random_sorted : Elapsed 0.026 ms (2.609 ms / 100) B = [256, 255] (stride (255, 1)) A = [512, 255] (stride (255, 1)) dim = 0 4.635 -> 4.639 ( +0.09%) [ +0.06% +0.00% +0.24% / +0.24% +0.28% +0.09%] index_select const : Elapsed 0.046 ms (4.638 ms / 100) 4.780 -> 4.787 ( +0.15%) [ +0.21% +0.17% +0.00% / +0.15% +0.17% +0.15%] index_select wrap : Elapsed 0.048 ms (4.790 ms / 100) 4.795 -> 4.775 ( -0.42%) [ +0.00% +0.27% +0.19% / +0.25% -0.35% -0.42%] index_select linear : Elapsed 0.048 ms (4.795 ms / 100) 4.818 -> 4.815 ( -0.06%) [ +0.08% +0.00% +0.08% / +0.00% -0.06% +0.15%] index_select reverse : Elapsed 0.048 ms (4.822 ms / 100) 4.659 -> 4.627 ( -0.69%) [ +0.00% +0.30% +0.19% / +0.54% -0.69% -0.34%] index_select skip64 : Elapsed 0.047 ms (4.659 ms / 100) 4.648 -> 4.644 ( -0.09%) [ +0.06% +0.17% +0.00% / -0.09% +0.56% +0.49%] index_select skip256 : Elapsed 0.047 ms (4.651 ms / 100) 4.819 -> 4.809 ( -0.21%) [ +0.12% +0.15% +0.00% / +0.15% -0.21% -0.02%] index_select spread : Elapsed 0.048 ms (4.825 ms / 100) 4.813 -> 4.801 ( -0.25%) [ +0.06% +0.00% +0.06% / -0.04% -0.08% -0.25%] index_select strided 3 : Elapsed 0.048 ms (4.816 ms / 100) 4.797 -> 4.806 ( +0.19%) [ +0.25% +0.00% +0.42% / +0.42% +0.35% +0.19%] index_select strided 5 : Elapsed 0.048 ms (4.809 ms / 100) 4.827 -> 4.798 ( -0.60%) [ +0.21% +0.00% +0.08% / +0.12% -0.58% -0.60%] index_select strided 7 : Elapsed 0.048 ms (4.837 ms / 100) 4.705 -> 4.689 ( -0.34%) [ +0.06% +0.13% +0.00% / +0.06% -0.34% -0.09%] index_select strided 8 : Elapsed 0.047 ms (4.708 ms / 100) 4.664 -> 4.667 ( +0.06%) [ +0.00% +0.28% +0.17% / +0.06% +0.32% +0.56%] index_select strided 16 : Elapsed 0.047 ms (4.664 ms / 100) 4.635 -> 4.652 ( +0.37%) [ +0.00% +0.32% +0.35% / +0.37% +0.67% +0.50%] index_select strided 64 : Elapsed 0.046 ms (4.635 ms / 100) 4.769 -> 4.755 ( -0.29%) [ +0.00% +0.13% +0.10% / +0.15% -0.29% -0.29%] index_select strided 100 : Elapsed 0.048 ms (4.769 ms / 100) 4.807 -> 4.808 ( +0.02%) [ +0.04% +0.00% +0.06% / +0.02% +0.21% +0.21%] index_select strided 255 : Elapsed 0.048 ms (4.809 ms / 100) 4.634 -> 4.633 ( -0.02%) [ +0.00% +0.13% +0.22% / +0.11% -0.02% +0.22%] index_select strided 256 : Elapsed 0.046 ms (4.634 ms / 100) 4.788 -> 4.780 ( -0.17%) [ +0.08% +0.33% +0.00% / +0.00% -0.17% +0.02%] index_select strided 257 : Elapsed 0.048 ms (4.792 ms / 100) 4.798 -> 4.783 ( -0.31%) [ +0.15% +0.00% +0.19% / -0.02% +0.08% -0.31%] index_select random : Elapsed 0.048 ms (4.805 ms / 100) 4.780 -> 4.786 ( +0.13%) [ +0.00% +0.10% +0.17% / +0.13% +0.98% +0.77%] index_select random_sorted : Elapsed 0.048 ms (4.780 ms / 100) 4.820 -> 4.808 ( -0.25%) [ +0.00% +0.15% +0.27% / +0.27% -0.02% -0.25%] index_select perm : Elapsed 0.048 ms (4.820 ms / 100) 4.797 -> 4.801 ( +0.08%) [ +0.00% +0.06% +0.33% / +0.08% +0.60% +0.25%] index_select perm_sorted : Elapsed 0.048 ms (4.797 ms / 100) B = [256, 255] (stride (255, 1)) A = [512, 255] (stride (1, 512)) dim = 0 4.693 -> 4.701 ( +0.17%) [ +0.00% +0.34% +0.23% / +0.17% +0.26% +0.23%] index_select const : Elapsed 0.047 ms (4.693 ms / 100) 4.899 -> 4.903 ( +0.08%) [ +0.22% +0.24% +0.00% / +0.33% +0.08% +0.18%] index_select wrap : Elapsed 0.049 ms (4.910 ms / 100) 4.903 -> 4.904 ( +0.02%) [ +0.14% +0.16% +0.00% / +0.16% +0.06% +0.02%] index_select linear : Elapsed 0.049 ms (4.910 ms / 100) 4.898 -> 4.904 ( +0.12%) [ +0.20% +0.45% +0.00% / +0.12% +0.43% +0.39%] index_select reverse : Elapsed 0.049 ms (4.908 ms / 100) 4.770 -> 4.761 ( -0.19%) [ +0.00% +0.02% +0.08% / -0.19% -0.13% -0.15%] index_select skip64 : Elapsed 0.048 ms (4.770 ms / 100) 4.706 -> 4.713 ( +0.15%) [ +0.00% +0.30% +0.17% / +0.15% +0.66% +0.21%] index_select skip256 : Elapsed 0.047 ms (4.706 ms / 100) 5.121 -> 5.109 ( -0.23%) [ +0.18% +0.00% +0.20% / -0.23% -0.20% -0.16%] index_select spread : Elapsed 0.051 ms (5.130 ms / 100) 5.197 -> 5.196 ( -0.02%) [ +0.00% +0.02% +0.21% / -0.02% +0.87% +0.96%] index_select strided 3 : Elapsed 0.052 ms (5.197 ms / 100) 5.260 -> 5.263 ( +0.06%) [ +0.00% +0.10% +0.36% / +0.29% +0.21% +0.06%] index_select strided 5 : Elapsed 0.053 ms (5.260 ms / 100) 5.275 -> 5.289 ( +0.27%) [ +0.21% +0.00% +0.38% / +0.27% +0.49% +0.61%] index_select strided 7 : Elapsed 0.053 ms (5.286 ms / 100) 5.277 -> 5.282 ( +0.09%) [ +0.27% +0.00% +0.36% / +0.09% +0.11% +0.25%] index_select strided 8 : Elapsed 0.053 ms (5.291 ms / 100) 5.140 -> 5.140 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.00% +0.21% +0.27%] index_select strided 16 : Elapsed 0.051 ms (5.142 ms / 100) 4.874 -> 4.831 ( -0.88%) [ +0.00% +0.02% +0.18% / +0.16% -0.39% -0.88%] index_select strided 64 : Elapsed 0.049 ms (4.874 ms / 100) 5.283 -> 5.276 ( -0.13%) [ +0.13% +0.15% +0.00% / -0.13% +0.78% +0.95%] index_select strided 100 : Elapsed 0.053 ms (5.290 ms / 100) 5.131 -> 5.152 ( +0.41%) [ +0.04% +0.00% +0.21% / +0.41% +0.68% +0.76%] index_select strided 255 : Elapsed 0.051 ms (5.133 ms / 100) 4.747 -> 4.725 ( -0.46%) [ +0.06% +0.04% +0.00% / -0.06% -0.36% -0.46%] index_select strided 256 : Elapsed 0.047 ms (4.750 ms / 100) 5.129 -> 5.134 ( +0.10%) [ +0.00% +0.35% +0.04% / +0.10% +0.43% +0.45%] index_select strided 257 : Elapsed 0.051 ms (5.129 ms / 100) 5.273 -> 5.294 ( +0.40%) [ +0.11% +0.44% +0.00% / +0.40% +0.42% +0.59%] index_select random : Elapsed 0.053 ms (5.279 ms / 100) 5.071 -> 5.074 ( +0.06%) [ +0.06% +0.00% +0.20% / +0.06% +0.53% +0.43%] index_select random_sorted : Elapsed 0.051 ms (5.074 ms / 100) 5.272 -> 5.285 ( +0.25%) [ +0.30% +0.00% +0.02% / +0.25% +0.59% +0.46%] index_select perm : Elapsed 0.053 ms (5.288 ms / 100) 5.087 -> 5.075 ( -0.24%) [ +0.12% +0.00% +0.28% / -0.24% +1.08% +0.88%] index_select perm_sorted : Elapsed 0.051 ms (5.093 ms / 100) B = [256, 255] (stride (1, 256)) dim = 0 fill_cnt = 512 Good 3.107 -> 2.572 (-17.22%) [ +0.16% +0.06% +0.00% / -17.12% -17.22% -17.03%] index_fill_ const : Elapsed 0.031 ms (3.112 ms / 100) Good 3.141 -> 2.600 (-17.22%) [ +0.06% +0.00% +0.19% / -17.06% -17.19% -17.22%] index_fill_ linear : Elapsed 0.031 ms (3.143 ms / 100) Good 3.101 -> 2.603 (-16.06%) [ +0.00% +0.00% +0.03% / -16.06% -15.99% -15.83%] index_fill_ reverse : Elapsed 0.031 ms (3.101 ms / 100) Good 3.100 -> 2.564 (-17.29%) [ +0.00% +0.23% +0.26% / -17.10% -17.29% -17.13%] index_fill_ skip64 : Elapsed 0.031 ms (3.100 ms / 100) Good 3.114 -> 2.560 (-17.79%) [ +0.19% +0.00% +0.22% / -17.50% -17.79% -17.79%] index_fill_ skip256 : Elapsed 0.031 ms (3.120 ms / 100) Good 3.126 -> 2.590 (-17.15%) [ +0.26% +0.00% +0.10% / -17.05% -16.95% -17.15%] index_fill_ spread : Elapsed 0.031 ms (3.134 ms / 100) Good 3.191 -> 2.677 (-16.11%) [ +0.00% +0.22% +0.06% / -15.92% -16.11% -16.11%] index_fill_ strided 3 : Elapsed 0.032 ms (3.191 ms / 100) Good 3.204 -> 2.776 (-13.36%) [ +0.06% +0.22% +0.00% / -13.33% -13.36% -13.14%] index_fill_ strided 5 : Elapsed 0.032 ms (3.206 ms / 100) good 3.204 -> 2.906 ( -9.30%) [ +0.03% +0.25% +0.00% / -8.83% -9.21% -9.30%] index_fill_ strided 7 : Elapsed 0.032 ms (3.205 ms / 100) good 3.288 -> 3.015 ( -8.30%) [ +0.21% +0.00% +0.21% / -8.30% -8.15% -8.18%] index_fill_ strided 8 : Elapsed 0.033 ms (3.295 ms / 100) good 3.178 -> 3.017 ( -5.07%) [ +0.00% +0.09% +0.63% / -5.07% -4.85% -5.07%] index_fill_ strided 16 : Elapsed 0.032 ms (3.178 ms / 100) Good 3.124 -> 2.752 (-11.91%) [ +0.00% +0.26% +0.80% / -11.78% -11.91% -11.75%] index_fill_ strided 64 : Elapsed 0.031 ms (3.124 ms / 100) 3.286 -> 3.298 ( +0.37%) [ +0.30% +0.27% +0.00% / +0.46% +0.70% +0.37%] index_fill_ strided 100 : Elapsed 0.033 ms (3.296 ms / 100) Good 3.164 -> 2.600 (-17.83%) [ +0.22% +0.00% +0.00% / -17.60% -17.83% -17.73%] index_fill_ strided 255 : Elapsed 0.032 ms (3.171 ms / 100) good 3.282 -> 3.115 ( -5.09%) [ +0.18% +0.03% +0.00% / -5.09% -4.97% -4.66%] index_fill_ random : Elapsed 0.033 ms (3.288 ms / 100) Good 3.217 -> 2.692 (-16.32%) [ +0.47% +0.12% +0.00% / -16.32% -16.10% -16.20%] index_fill_ random_sorted : Elapsed 0.032 ms (3.232 ms / 100) B = [256, 255] (stride (1, 256)) A = [512, 255] (stride (255, 1)) dim = 0 4.819 -> 4.647 ( -3.57%) [ +0.00% +0.00% +0.12% / -3.57% -3.17% -3.30%] index_select const : Elapsed 0.048 ms (4.819 ms / 100) 4.851 -> 4.900 ( +1.01%) [ +0.00% +0.16% +0.27% / +1.36% +1.01% +1.40%] index_select wrap : Elapsed 0.049 ms (4.851 ms / 100) 4.829 -> 4.902 ( +1.51%) [ +0.21% +0.00% +0.29% / +1.51% +1.84% +1.97%] index_select linear : Elapsed 0.048 ms (4.839 ms / 100) 4.864 -> 4.902 ( +0.78%) [ +0.00% +0.21% +0.02% / +1.21% +0.78% +1.01%] index_select reverse : Elapsed 0.049 ms (4.864 ms / 100) 4.807 -> 4.642 ( -3.43%) [ +0.10% +0.00% +0.17% / -3.37% -3.43% -3.35%] index_select skip64 : Elapsed 0.048 ms (4.812 ms / 100) 4.791 -> 4.640 ( -3.15%) [ +0.06% +0.00% +0.02% / -3.15% -2.98% -3.01%] index_select skip256 : Elapsed 0.048 ms (4.794 ms / 100) 4.869 -> 4.936 ( +1.38%) [ +0.00% +0.16% +0.21% / +1.38% +1.58% +1.50%] index_select spread : Elapsed 0.049 ms (4.869 ms / 100) 4.865 -> 4.973 ( +2.22%) [ +0.12% +0.00% +0.31% / +2.22% +2.51% +2.67%] index_select strided 3 : Elapsed 0.049 ms (4.871 ms / 100) 4.888 -> 4.979 ( +1.86%) [ +0.12% +0.00% +0.27% / +1.86% +2.21% +2.23%] index_select strided 5 : Elapsed 0.049 ms (4.894 ms / 100) 4.898 -> 4.976 ( +1.59%) [ +0.06% +0.00% +0.12% / +1.59% +1.88% +1.74%] index_select strided 7 : Elapsed 0.049 ms (4.901 ms / 100) 4.817 -> 4.735 ( -1.70%) [ +0.00% +0.21% +0.23% / -1.70% -1.64% -1.49%] index_select strided 8 : Elapsed 0.048 ms (4.817 ms / 100) 4.808 -> 4.805 ( -0.06%) [ +0.02% +0.00% +0.00% / +0.10% -0.06% +0.00%] index_select strided 16 : Elapsed 0.048 ms (4.809 ms / 100) 4.791 -> 4.649 ( -2.96%) [ +0.15% +0.00% +0.00% / -2.80% -2.96% -2.86%] index_select strided 64 : Elapsed 0.048 ms (4.798 ms / 100) 4.832 -> 4.799 ( -0.68%) [ +0.08% +0.00% +0.00% / -0.41% -0.68% -0.50%] index_select strided 100 : Elapsed 0.048 ms (4.836 ms / 100) 4.885 -> 4.924 ( +0.80%) [ +0.00% +0.18% +0.29% / +1.41% +1.06% +0.80%] index_select strided 255 : Elapsed 0.049 ms (4.885 ms / 100) 4.801 -> 4.648 ( -3.19%) [ +0.15% +0.10% +0.00% / -3.19% -2.75% -2.96%] index_select strided 256 : Elapsed 0.048 ms (4.808 ms / 100) 4.850 -> 4.929 ( +1.63%) [ +0.23% +0.00% +0.43% / +2.00% +1.71% +1.63%] index_select strided 257 : Elapsed 0.049 ms (4.861 ms / 100) 4.877 -> 4.900 ( +0.47%) [ +0.29% +0.00% +0.18% / +0.47% +0.51% +0.47%] index_select random : Elapsed 0.049 ms (4.891 ms / 100) 4.821 -> 4.884 ( +1.31%) [ +0.00% +0.04% +0.23% / +1.31% +1.49% +1.60%] index_select random_sorted : Elapsed 0.048 ms (4.821 ms / 100) 4.877 -> 4.964 ( +1.78%) [ +0.08% +0.00% +0.14% / +2.03% +1.89% +1.78%] index_select perm : Elapsed 0.049 ms (4.881 ms / 100) 4.863 -> 4.941 ( +1.60%) [ +0.29% +0.00% +0.23% / +2.34% +1.60% +1.60%] index_select perm_sorted : Elapsed 0.049 ms (4.877 ms / 100) B = [256, 255] (stride (1, 256)) A = [512, 255] (stride (1, 512)) dim = 0 good 4.992 -> 4.666 ( -6.53%) [ +0.08% +0.06% +0.00% / -6.51% -6.53% -6.53%] index_select const : Elapsed 0.050 ms (4.996 ms / 100) 5.044 -> 4.801 ( -4.82%) [ +0.00% +0.10% +0.16% / -4.82% -4.64% -4.64%] index_select wrap : Elapsed 0.050 ms (5.044 ms / 100) 5.039 -> 4.798 ( -4.78%) [ +0.00% +0.24% +0.22% / -4.78% -4.70% -4.62%] index_select linear : Elapsed 0.050 ms (5.039 ms / 100) 5.023 -> 4.805 ( -4.34%) [ +0.00% +0.48% +0.52% / -4.34% -4.20% -3.96%] index_select reverse : Elapsed 0.050 ms (5.023 ms / 100) 4.960 -> 4.714 ( -4.96%) [ +0.34% +0.00% +0.18% / -4.70% -4.82% -4.96%] index_select skip64 : Elapsed 0.050 ms (4.977 ms / 100) good 4.944 -> 4.644 ( -6.07%) [ +0.10% +0.16% +0.00% / -5.99% -6.07% -5.74%] index_select skip256 : Elapsed 0.049 ms (4.949 ms / 100) good 5.174 -> 4.885 ( -5.59%) [ +0.12% +0.00% +0.27% / -5.59% -5.14% -5.10%] index_select spread : Elapsed 0.052 ms (5.180 ms / 100) good 5.321 -> 4.925 ( -7.44%) [ +0.23% +0.30% +0.00% / -7.24% -7.35% -7.44%] index_select strided 3 : Elapsed 0.053 ms (5.333 ms / 100) good 5.334 -> 4.912 ( -7.91%) [ +0.21% +0.04% +0.00% / -7.91% -7.74% -7.50%] index_select strided 5 : Elapsed 0.053 ms (5.345 ms / 100) good 5.382 -> 4.917 ( -8.64%) [ +0.02% +0.00% +0.04% / -8.47% -8.64% -8.45%] index_select strided 7 : Elapsed 0.054 ms (5.383 ms / 100) good 5.389 -> 4.911 ( -8.87%) [ +0.02% +0.00% +0.06% / -8.87% -8.70% -8.83%] index_select strided 8 : Elapsed 0.054 ms (5.390 ms / 100) good 5.241 -> 4.831 ( -7.82%) [ +0.00% +0.02% +0.04% / -7.82% -7.48% -7.40%] index_select strided 16 : Elapsed 0.052 ms (5.241 ms / 100) good 5.029 -> 4.711 ( -6.32%) [ +0.00% +0.30% +0.16% / -5.75% -6.01% -6.32%] index_select strided 64 : Elapsed 0.050 ms (5.029 ms / 100) good 5.393 -> 4.966 ( -7.92%) [ +0.06% +0.24% +0.00% / -7.88% -7.77% -7.92%] index_select strided 100 : Elapsed 0.054 ms (5.396 ms / 100) good 5.232 -> 4.921 ( -5.94%) [ +0.06% +0.19% +0.00% / -5.94% -5.35% -5.33%] index_select strided 255 : Elapsed 0.052 ms (5.235 ms / 100) good 4.998 -> 4.671 ( -6.54%) [ +0.08% +0.00% +0.04% / -6.20% -6.54% -6.44%] index_select strided 256 : Elapsed 0.050 ms (5.002 ms / 100) good 5.218 -> 4.878 ( -6.52%) [ +0.25% +0.00% +0.21% / -6.52% -5.94% -5.73%] index_select strided 257 : Elapsed 0.052 ms (5.231 ms / 100) good 5.378 -> 4.966 ( -7.66%) [ +0.19% +0.00% +0.26% / -7.66% -7.34% -7.38%] index_select random : Elapsed 0.054 ms (5.388 ms / 100) good 5.180 -> 4.914 ( -5.14%) [ +0.06% +0.27% +0.00% / -5.14% -4.86% -5.02%] index_select random_sorted : Elapsed 0.052 ms (5.183 ms / 100) good 5.375 -> 5.005 ( -6.88%) [ +0.00% +0.20% +0.45% / -6.73% -6.88% -6.88%] index_select perm : Elapsed 0.054 ms (5.375 ms / 100) good 5.178 -> 4.915 ( -5.08%) [ +0.00% +0.25% +0.33% / -5.08% -4.71% -4.38%] index_select perm_sorted : Elapsed 0.052 ms (5.178 ms / 100) out_shape = [512, 256] in_shape = [512, 255] idx_dim = 1 B = [512, 256] (stride (256, 1)) dim = 1 fill_cnt = 255 Good 3.130 -> 2.575 (-17.73%) [ +0.00% +0.19% +0.19% / -17.67% -17.73% -17.60%] index_fill_ const : Elapsed 0.031 ms (3.130 ms / 100) Good 3.200 -> 2.651 (-17.16%) [ +0.28% +0.16% +0.00% / -16.56% -17.12% -17.16%] index_fill_ linear : Elapsed 0.032 ms (3.209 ms / 100) Good 3.181 -> 2.660 (-16.38%) [ +0.28% +0.00% +0.94% / -16.19% -16.32% -16.38%] index_fill_ reverse : Elapsed 0.032 ms (3.190 ms / 100) Good 3.110 -> 2.564 (-17.56%) [ +0.00% +0.32% +0.06% / -17.49% -17.56% -17.46%] index_fill_ skip64 : Elapsed 0.031 ms (3.110 ms / 100) Good 3.122 -> 2.592 (-16.98%) [ +0.35% +0.00% +0.22% / -16.98% -16.62% -16.91%] index_fill_ skip256 : Elapsed 0.031 ms (3.133 ms / 100) Good 3.193 -> 2.647 (-17.10%) [ +0.03% +0.25% +0.00% / -17.10% -16.66% -16.60%] index_fill_ spread : Elapsed 0.032 ms (3.194 ms / 100) Good 3.230 -> 2.754 (-14.74%) [ +0.22% +0.34% +0.00% / -14.37% -14.55% -14.74%] index_fill_ strided 3 : Elapsed 0.032 ms (3.237 ms / 100) Good 3.294 -> 2.870 (-12.87%) [ +0.46% +0.18% +0.00% / -12.87% -12.51% -12.78%] index_fill_ strided 5 : Elapsed 0.033 ms (3.309 ms / 100) Good 3.311 -> 2.979 (-10.03%) [ +0.18% +0.00% +0.15% / -9.82% -10.03% -9.94%] index_fill_ strided 7 : Elapsed 0.033 ms (3.317 ms / 100) good 3.524 -> 3.228 ( -8.40%) [ +0.03% +0.00% +0.00% / -8.00% -8.37% -8.40%] index_fill_ strided 8 : Elapsed 0.035 ms (3.525 ms / 100) good 3.300 -> 3.087 ( -6.45%) [ +0.45% +0.27% +0.00% / -6.45% -6.30% -6.39%] index_fill_ strided 16 : Elapsed 0.033 ms (3.315 ms / 100) Good 3.163 -> 2.781 (-12.08%) [ +0.00% +0.03% +0.13% / -12.08% -11.98% -11.73%] index_fill_ strided 64 : Elapsed 0.032 ms (3.163 ms / 100) 3.520 -> 3.402 ( -3.35%) [ +0.09% +0.00% +0.00% / -2.53% -3.35% -3.27%] index_fill_ strided 100 : Elapsed 0.035 ms (3.523 ms / 100) Good 3.192 -> 2.647 (-17.07%) [ +0.00% +0.19% +0.44% / -16.64% -16.95% -17.07%] index_fill_ strided 255 : Elapsed 0.032 ms (3.192 ms / 100) good 3.510 -> 3.244 ( -7.58%) [ +0.00% +0.40% +0.37% / -7.58% -7.35% -7.38%] index_fill_ random : Elapsed 0.035 ms (3.510 ms / 100) Good 3.366 -> 2.840 (-15.63%) [ +0.00% +0.12% +0.12% / -15.42% -15.63% -15.54%] index_fill_ random_sorted : Elapsed 0.034 ms (3.366 ms / 100) good 3.318 -> 3.078 ( -7.23%) [ +0.00% +0.15% +0.48% / -7.23% -6.90% -6.87%] index_fill_ perm : Elapsed 0.033 ms (3.318 ms / 100) Good 3.160 -> 2.656 (-15.95%) [ +0.00% +0.13% +0.32% / -15.95% -15.73% -15.79%] index_fill_ perm_sorted : Elapsed 0.032 ms (3.160 ms / 100) B = [512, 256] (stride (256, 1)) A = [512, 255] (stride (255, 1)) dim = 1 Good 6.337 -> 5.388 (-14.98%) [ +0.63% +0.00% +0.41% / -14.71% -14.98% -14.93%] index_add_ linear : Elapsed 0.064 ms (6.377 ms / 100) Good 5.810 -> 5.212 (-10.29%) [ +0.07% +0.03% +0.00% / -10.00% -10.17% -10.29%] index_copy_ linear : Elapsed 0.058 ms (5.814 ms / 100) Good 6.378 -> 5.387 (-15.54%) [ +0.36% +0.00% +0.58% / -15.32% -15.52% -15.54%] index_add_ reverse : Elapsed 0.064 ms (6.401 ms / 100) Good 5.818 -> 5.218 (-10.31%) [ +0.14% +0.00% +0.07% / -10.30% -10.31% -10.21%] index_copy_ reverse : Elapsed 0.058 ms (5.826 ms / 100) Good 6.334 -> 5.373 (-15.17%) [ +0.00% +0.43% +0.17% / -14.89% -15.17% -15.01%] index_add_ spread : Elapsed 0.063 ms (6.334 ms / 100) Good 5.793 -> 5.195 (-10.32%) [ +0.03% +0.16% +0.00% / -9.91% -10.32% -10.29%] index_copy_ spread : Elapsed 0.058 ms (5.795 ms / 100) Good 6.398 -> 5.424 (-15.22%) [ +0.27% +0.16% +0.00% / -15.22% -15.21% -15.21%] index_add_ strided 3 : Elapsed 0.064 ms (6.415 ms / 100) Good 5.946 -> 5.222 (-12.18%) [ +0.35% +0.42% +0.00% / -11.97% -12.04% -12.18%] index_copy_ strided 3 : Elapsed 0.060 ms (5.967 ms / 100) Good 6.382 -> 5.456 (-14.51%) [ +0.16% +0.00% +0.08% / -14.35% -14.51% -14.46%] index_add_ strided 5 : Elapsed 0.064 ms (6.392 ms / 100) Good 5.987 -> 5.226 (-12.71%) [ +0.68% +0.40% +0.00% / -12.66% -12.68% -12.71%] index_copy_ strided 5 : Elapsed 0.060 ms (6.028 ms / 100) Good 6.240 -> 5.561 (-10.88%) [ +0.08% +0.06% +0.00% / -10.88% -10.74% -10.83%] index_add_ strided 7 : Elapsed 0.062 ms (6.245 ms / 100) Good 6.007 -> 5.270 (-12.27%) [ +0.27% +0.02% +0.00% / -12.22% -12.27% -12.20%] index_copy_ strided 7 : Elapsed 0.060 ms (6.023 ms / 100) Good 6.365 -> 5.388 (-15.35%) [ +0.00% +0.09% +0.57% / -15.35% -14.99% -15.10%] index_add_ strided 255 : Elapsed 0.064 ms (6.365 ms / 100) Good 5.835 -> 5.224 (-10.47%) [ +0.02% +0.00% +0.26% / -10.39% -10.37% -10.47%] index_copy_ strided 255 : Elapsed 0.058 ms (5.836 ms / 100) Good 6.372 -> 5.632 (-11.61%) [ +0.00% +0.30% +0.28% / -11.61% -11.61% -11.39%] index_add_ perm : Elapsed 0.064 ms (6.372 ms / 100) Good 6.021 -> 5.317 (-11.69%) [ +0.00% +0.17% +0.17% / -11.66% -11.69% -11.53%] index_copy_ perm : Elapsed 0.060 ms (6.021 ms / 100) Good 6.374 -> 5.383 (-15.55%) [ +0.00% +0.82% +0.30% / -15.06% -15.55% -15.47%] index_add_ perm_sorted : Elapsed 0.064 ms (6.374 ms / 100) Good 5.807 -> 5.201 (-10.44%) [ +0.00% +0.41% +0.33% / -9.71% -10.38% -10.44%] index_copy_ perm_sorted : Elapsed 0.058 ms (5.807 ms / 100) Good 5.671 -> 5.000 (-11.83%) [ +0.00% +0.11% +0.11% / -11.83% -11.81% -11.73%] index_select const : Elapsed 0.057 ms (5.671 ms / 100) good 5.770 -> 5.233 ( -9.31%) [ +0.07% +0.00% +0.09% / -9.31% -9.08% -9.06%] index_select wrap : Elapsed 0.058 ms (5.774 ms / 100) good 5.752 -> 5.225 ( -9.16%) [ +0.12% +0.12% +0.00% / -8.88% -8.97% -9.16%] index_select linear : Elapsed 0.058 ms (5.759 ms / 100) good 5.763 -> 5.221 ( -9.40%) [ +0.00% +0.16% +0.12% / -9.40% -9.06% -9.20%] index_select reverse : Elapsed 0.058 ms (5.763 ms / 100) good 5.633 -> 5.084 ( -9.75%) [ +0.23% +0.21% +0.00% / -9.64% -9.75% -9.66%] index_select skip64 : Elapsed 0.056 ms (5.646 ms / 100) Good 5.672 -> 4.985 (-12.11%) [ +0.04% +0.02% +0.00% / -12.11% -11.94% -11.90%] index_select skip256 : Elapsed 0.057 ms (5.674 ms / 100) good 5.761 -> 5.241 ( -9.03%) [ +0.00% +0.33% +0.21% / -8.92% -8.87% -9.03%] index_select spread : Elapsed 0.058 ms (5.761 ms / 100) Good 6.078 -> 5.203 (-14.40%) [ +0.05% +0.08% +0.00% / -14.40% -14.10% -14.30%] index_select strided 3 : Elapsed 0.061 ms (6.081 ms / 100) Good 6.078 -> 5.195 (-14.53%) [ +0.00% +0.31% +0.16% / -14.53% -14.45% -14.23%] index_select strided 5 : Elapsed 0.061 ms (6.078 ms / 100) Good 6.079 -> 5.194 (-14.56%) [ +0.00% +0.07% +0.49% / -14.46% -14.26% -14.56%] index_select strided 7 : Elapsed 0.061 ms (6.079 ms / 100) Good 6.083 -> 5.185 (-14.76%) [ +0.00% +0.12% +0.00% / -14.58% -14.71% -14.76%] index_select strided 8 : Elapsed 0.061 ms (6.083 ms / 100) Good 6.037 -> 5.199 (-13.88%) [ +0.17% +0.40% +0.00% / -13.88% -13.73% -13.65%] index_select strided 16 : Elapsed 0.060 ms (6.047 ms / 100) Good 5.906 -> 5.263 (-10.89%) [ +0.22% +0.00% +0.24% / -10.50% -10.53% -10.89%] index_select strided 64 : Elapsed 0.059 ms (5.919 ms / 100) Good 6.095 -> 5.229 (-14.21%) [ +0.15% +0.00% +0.21% / -14.21% -13.83% -13.88%] index_select strided 100 : Elapsed 0.061 ms (6.104 ms / 100) Good 6.090 -> 5.249 (-13.81%) [ +0.00% +0.20% +0.38% / -13.79% -13.81% -13.81%] index_select random : Elapsed 0.061 ms (6.090 ms / 100) good 5.795 -> 5.232 ( -9.72%) [ +0.10% +0.02% +0.00% / -9.72% -9.49% -9.39%] index_select random_sorted : Elapsed 0.058 ms (5.801 ms / 100) B = [512, 256] (stride (256, 1)) A = [512, 255] (stride (1, 512)) dim = 1 good 5.950 -> 5.503 ( -7.51%) [ +0.05% +0.00% +0.22% / -7.51% -7.19% -7.18%] index_add_ linear : Elapsed 0.060 ms (5.953 ms / 100) 5.387 -> 5.363 ( -0.45%) [ +0.00% +0.02% +0.04% / -0.45% +0.24% +0.26%] index_copy_ linear : Elapsed 0.054 ms (5.387 ms / 100) good 5.955 -> 5.510 ( -7.47%) [ +0.18% +0.00% +0.05% / -7.30% -7.36% -7.47%] index_add_ reverse : Elapsed 0.060 ms (5.966 ms / 100) 5.375 -> 5.375 ( +0.00%) [ +0.19% +0.00% +0.06% / +0.00% +0.20% +0.30%] index_copy_ reverse : Elapsed 0.054 ms (5.385 ms / 100) good 5.970 -> 5.533 ( -7.32%) [ +0.03% +0.00% +0.02% / -7.32% -7.22% -7.09%] index_add_ spread : Elapsed 0.060 ms (5.972 ms / 100) 5.387 -> 5.405 ( +0.33%) [ +0.30% +0.09% +0.00% / +0.33% +0.61% +0.76%] index_copy_ spread : Elapsed 0.054 ms (5.403 ms / 100) 5.785 -> 5.563 ( -3.84%) [ +0.00% +0.10% +0.14% / -3.84% -3.53% -3.30%] index_add_ strided 3 : Elapsed 0.058 ms (5.785 ms / 100) 5.448 -> 5.407 ( -0.75%) [ +0.00% +0.11% +0.06% / -0.75% -0.39% -0.28%] index_copy_ strided 3 : Elapsed 0.054 ms (5.448 ms / 100) 5.796 -> 5.642 ( -2.66%) [ +0.33% +0.52% +0.00% / -2.66% -2.50% -2.61%] index_add_ strided 5 : Elapsed 0.058 ms (5.815 ms / 100) 5.479 -> 5.471 ( -0.15%) [ +0.11% +0.00% +0.24% / -0.11% -0.15% -0.15%] index_copy_ strided 5 : Elapsed 0.055 ms (5.485 ms / 100) 5.748 -> 5.713 ( -0.61%) [ +0.56% +0.12% +0.00% / -0.61% -0.17% -0.14%] index_add_ strided 7 : Elapsed 0.058 ms (5.780 ms / 100) 5.512 -> 5.502 ( -0.18%) [ +0.24% +0.13% +0.00% / -0.18% +0.42% +0.29%] index_copy_ strided 7 : Elapsed 0.055 ms (5.525 ms / 100) good 5.963 -> 5.510 ( -7.60%) [ +0.18% +0.02% +0.00% / -7.60% -7.36% -7.28%] index_add_ strided 255 : Elapsed 0.060 ms (5.974 ms / 100) 5.382 -> 5.360 ( -0.41%) [ +0.45% +0.11% +0.00% / -0.41% +0.07% +0.02%] index_copy_ strided 255 : Elapsed 0.054 ms (5.406 ms / 100) 5.868 -> 5.815 ( -0.90%) [ +0.00% +0.12% +0.17% / -0.90% -0.05% -0.09%] index_add_ perm : Elapsed 0.059 ms (5.868 ms / 100) 5.512 -> 5.624 ( +2.03%) [ +0.02% +0.00% +0.31% / +2.03% +2.87% +2.81%] index_copy_ perm : Elapsed 0.055 ms (5.513 ms / 100) good 5.953 -> 5.528 ( -7.14%) [ +0.00% +0.12% +0.13% / -7.09% -7.14% -6.95%] index_add_ perm_sorted : Elapsed 0.060 ms (5.953 ms / 100) 5.386 -> 5.396 ( +0.19%) [ +0.09% +0.07% +0.00% / +0.19% +0.54% +0.37%] index_copy_ perm_sorted : Elapsed 0.054 ms (5.391 ms / 100) good 5.275 -> 4.968 ( -5.82%) [ +0.00% +0.11% +0.13% / -5.74% -5.71% -5.82%] index_select const : Elapsed 0.053 ms (5.275 ms / 100) 5.341 -> 5.341 ( +0.00%) [ +0.19% +0.00% +0.07% / +0.00% +0.06% +0.17%] index_select wrap : Elapsed 0.054 ms (5.351 ms / 100) 5.342 -> 5.353 ( +0.21%) [ +0.17% +0.30% +0.00% / +0.21% +0.22% +0.36%] index_select linear : Elapsed 0.054 ms (5.351 ms / 100) 5.338 -> 5.322 ( -0.30%) [ +0.09% +0.06% +0.00% / -0.30% +0.37% +0.34%] index_select reverse : Elapsed 0.053 ms (5.343 ms / 100) good 5.262 -> 4.960 ( -5.74%) [ +0.21% +0.08% +0.00% / -5.51% -5.74% -5.53%] index_select skip64 : Elapsed 0.053 ms (5.273 ms / 100) good 5.286 -> 4.964 ( -6.09%) [ +0.23% +0.00% +0.13% / -6.09% -5.98% -6.02%] index_select skip256 : Elapsed 0.053 ms (5.298 ms / 100) 5.329 -> 5.331 ( +0.04%) [ +0.23% +0.00% +0.19% / +0.04% +0.23% +0.53%] index_select spread : Elapsed 0.053 ms (5.341 ms / 100) 5.337 -> 5.271 ( -1.24%) [ +0.15% +0.13% +0.00% / -1.24% -0.43% -0.69%] index_select strided 3 : Elapsed 0.053 ms (5.345 ms / 100) 5.361 -> 5.359 ( -0.04%) [ +0.15% +0.15% +0.00% / +0.06% -0.04% +0.04%] index_select strided 5 : Elapsed 0.054 ms (5.369 ms / 100) 5.363 -> 5.416 ( +0.99%) [ +0.19% +0.07% +0.00% / +0.99% +1.21% +1.38%] index_select strided 7 : Elapsed 0.054 ms (5.373 ms / 100) 5.353 -> 5.323 ( -0.56%) [ +0.00% +0.00% +0.17% / -0.56% +0.00% +0.07%] index_select strided 8 : Elapsed 0.054 ms (5.353 ms / 100) 5.352 -> 5.414 ( +1.16%) [ +0.00% +0.32% +0.22% / +1.16% +1.48% +1.40%] index_select strided 16 : Elapsed 0.054 ms (5.352 ms / 100) 5.347 -> 5.402 ( +1.03%) [ +0.06% +0.07% +0.00% / +1.03% +1.03% +1.10%] index_select strided 64 : Elapsed 0.053 ms (5.350 ms / 100) 5.308 -> 5.347 ( +0.73%) [ +0.26% +0.00% +0.38% / +0.73% +0.79% +0.87%] index_select strided 100 : Elapsed 0.053 ms (5.322 ms / 100) 5.335 -> 5.263 ( -1.35%) [ +0.26% +0.00% +0.04% / -0.88% -1.11% -1.35%] index_select random : Elapsed 0.053 ms (5.349 ms / 100) 5.319 -> 5.213 ( -1.99%) [ +0.00% +0.08% +0.06% / -1.99% -1.64% -1.39%] index_select random_sorted : Elapsed 0.053 ms (5.319 ms / 100) B = [512, 256] (stride (1, 512)) dim = 1 fill_cnt = 255 2.593 -> 2.581 ( -0.46%) [ +0.19% +0.00% +0.19% / -0.23% -0.35% -0.46%] index_fill_ const : Elapsed 0.026 ms (2.598 ms / 100) 2.677 -> 2.675 ( -0.07%) [ +0.19% +0.11% +0.00% / +0.26% -0.04% -0.07%] index_fill_ linear : Elapsed 0.027 ms (2.682 ms / 100) 2.659 -> 2.654 ( -0.19%) [ +0.00% +0.08% +0.60% / -0.19% -0.04% -0.04%] index_fill_ reverse : Elapsed 0.027 ms (2.659 ms / 100) 2.598 -> 2.569 ( -1.12%) [ +0.04% +0.08% +0.00% / -0.19% -1.12% -0.73%] index_fill_ skip64 : Elapsed 0.026 ms (2.599 ms / 100) 2.583 -> 2.573 ( -0.39%) [ +0.08% +0.12% +0.00% / +0.66% -0.31% -0.39%] index_fill_ skip256 : Elapsed 0.026 ms (2.585 ms / 100) 2.661 -> 2.659 ( -0.08%) [ +0.30% +0.00% +0.04% / +0.19% -0.08% +0.15%] index_fill_ spread : Elapsed 0.027 ms (2.669 ms / 100) 2.665 -> 2.672 ( +0.26%) [ +0.53% +0.11% +0.00% / +0.49% +0.49% +0.26%] index_fill_ strided 3 : Elapsed 0.027 ms (2.679 ms / 100) 2.656 -> 2.665 ( +0.34%) [ +0.45% +0.38% +0.00% / +0.56% +0.49% +0.34%] index_fill_ strided 5 : Elapsed 0.027 ms (2.668 ms / 100) 2.661 -> 2.651 ( -0.38%) [ +0.23% +0.00% +0.30% / -0.38% +0.00% +0.08%] index_fill_ strided 7 : Elapsed 0.027 ms (2.667 ms / 100) 2.596 -> 2.589 ( -0.27%) [ +0.39% +0.00% +0.19% / +0.35% -0.12% -0.27%] index_fill_ strided 8 : Elapsed 0.026 ms (2.606 ms / 100) 2.589 -> 2.586 ( -0.12%) [ +0.00% +0.04% +0.00% / +0.04% -0.12% -0.04%] index_fill_ strided 16 : Elapsed 0.026 ms (2.589 ms / 100) 2.589 -> 2.574 ( -0.58%) [ +0.27% +0.46% +0.00% / -0.23% -0.46% -0.58%] index_fill_ strided 64 : Elapsed 0.026 ms (2.596 ms / 100) 2.591 -> 2.586 ( -0.19%) [ +0.00% +0.42% +0.19% / +0.42% -0.08% -0.19%] index_fill_ strided 100 : Elapsed 0.026 ms (2.591 ms / 100) 2.661 -> 2.659 ( -0.08%) [ +0.30% +0.08% +0.00% / -0.08% +0.04% +0.19%] index_fill_ strided 255 : Elapsed 0.027 ms (2.669 ms / 100) 2.625 -> 2.627 ( +0.08%) [ +0.15% +0.30% +0.00% / +0.08% +0.46% +0.38%] index_fill_ random : Elapsed 0.026 ms (2.629 ms / 100) 2.627 -> 2.611 ( -0.61%) [ +0.04% +0.00% +0.08% / -0.04% -0.61% -0.38%] index_fill_ random_sorted : Elapsed 0.026 ms (2.628 ms / 100) 2.678 -> 2.647 ( -1.16%) [ +0.04% +0.22% +0.00% / -0.60% -0.93% -1.16%] index_fill_ perm : Elapsed 0.027 ms (2.679 ms / 100) 2.665 -> 2.654 ( -0.41%) [ +0.04% +0.00% +0.04% / -0.41% -0.26% +0.00%] index_fill_ perm_sorted : Elapsed 0.027 ms (2.666 ms / 100) B = [512, 256] (stride (1, 512)) A = [512, 255] (stride (255, 1)) dim = 1 5.630 -> 5.584 ( -0.82%) [ +0.00% +0.05% +0.18% / -0.75% -0.75% -0.82%] index_add_ linear : Elapsed 0.056 ms (5.630 ms / 100) 5.509 -> 5.459 ( -0.91%) [ +0.00% +0.09% +0.02% / -0.91% -0.53% -0.42%] index_copy_ linear : Elapsed 0.055 ms (5.509 ms / 100) 5.639 -> 5.585 ( -0.96%) [ +0.00% +0.32% +0.18% / -0.73% -0.96% -0.94%] index_add_ reverse : Elapsed 0.056 ms (5.639 ms / 100) 5.511 -> 5.464 ( -0.85%) [ +0.07% +0.00% +0.29% / -0.85% -0.80% -0.78%] index_copy_ reverse : Elapsed 0.055 ms (5.515 ms / 100) 5.611 -> 5.589 ( -0.39%) [ +0.32% +0.20% +0.00% / -0.25% -0.39% -0.39%] index_add_ spread : Elapsed 0.056 ms (5.629 ms / 100) 5.493 -> 5.465 ( -0.51%) [ +0.00% +0.29% +0.16% / -0.51% -0.27% -0.22%] index_copy_ spread : Elapsed 0.055 ms (5.493 ms / 100) 5.649 -> 5.594 ( -0.97%) [ +0.30% +0.00% +0.27% / -0.51% -0.97% -0.74%] index_add_ strided 3 : Elapsed 0.057 ms (5.666 ms / 100) 5.501 -> 5.467 ( -0.62%) [ +0.33% +0.00% +0.25% / -0.33% -0.62% -0.45%] index_copy_ strided 3 : Elapsed 0.055 ms (5.519 ms / 100) 5.623 -> 5.616 ( -0.12%) [ +0.50% +0.68% +0.00% / -0.12% +0.28% +0.39%] index_add_ strided 5 : Elapsed 0.057 ms (5.651 ms / 100) 5.493 -> 5.480 ( -0.24%) [ +0.35% +0.60% +0.00% / -0.24% +0.11% +0.40%] index_copy_ strided 5 : Elapsed 0.055 ms (5.512 ms / 100) 5.639 -> 5.597 ( -0.74%) [ +0.00% +0.34% +0.14% / -0.43% -0.60% -0.74%] index_add_ strided 7 : Elapsed 0.056 ms (5.639 ms / 100) 5.493 -> 5.473 ( -0.36%) [ +0.36% +0.25% +0.00% / -0.29% -0.13% -0.36%] index_copy_ strided 7 : Elapsed 0.055 ms (5.513 ms / 100) 5.611 -> 5.594 ( -0.30%) [ +0.48% +0.21% +0.00% / -0.30% -0.29% -0.02%] index_add_ strided 255 : Elapsed 0.056 ms (5.638 ms / 100) 5.504 -> 5.476 ( -0.51%) [ +0.29% +0.00% +0.00% / -0.51% -0.51% -0.40%] index_copy_ strided 255 : Elapsed 0.055 ms (5.520 ms / 100) 5.644 -> 5.587 ( -1.01%) [ +0.50% +0.27% +0.00% / -0.41% -0.89% -1.01%] index_add_ perm : Elapsed 0.057 ms (5.672 ms / 100) 5.511 -> 5.448 ( -1.14%) [ +0.56% +0.51% +0.00% / -0.29% -0.89% -1.14%] index_copy_ perm : Elapsed 0.055 ms (5.542 ms / 100) 5.621 -> 5.585 ( -0.64%) [ +0.00% +0.04% +0.04% / -0.39% -0.64% -0.57%] index_add_ perm_sorted : Elapsed 0.056 ms (5.621 ms / 100) 5.484 -> 5.470 ( -0.26%) [ +0.20% +0.26% +0.00% / -0.22% -0.26% +0.07%] index_copy_ perm_sorted : Elapsed 0.055 ms (5.495 ms / 100) 5.131 -> 5.121 ( -0.19%) [ +0.18% +0.00% +0.00% / +0.29% -0.19% -0.10%] index_select const : Elapsed 0.051 ms (5.140 ms / 100) 5.458 -> 5.468 ( +0.18%) [ +0.42% +0.31% +0.00% / +0.27% +0.18% +0.29%] index_select wrap : Elapsed 0.055 ms (5.481 ms / 100) 5.462 -> 5.459 ( -0.05%) [ +0.04% +0.11% +0.00% / -0.05% +0.29% +0.00%] index_select linear : Elapsed 0.055 ms (5.464 ms / 100) 5.462 -> 5.465 ( +0.05%) [ +0.13% +0.00% +0.04% / +0.16% +0.05% +0.26%] index_select reverse : Elapsed 0.055 ms (5.469 ms / 100) 5.151 -> 5.148 ( -0.06%) [ +0.00% +0.02% +0.04% / -0.06% +0.39% +0.39%] index_select skip64 : Elapsed 0.052 ms (5.151 ms / 100) 5.106 -> 5.120 ( +0.27%) [ +0.27% +0.06% +0.00% / +0.27% +0.39% +0.37%] index_select skip256 : Elapsed 0.051 ms (5.120 ms / 100) 5.462 -> 5.470 ( +0.15%) [ +0.00% +0.11% +0.33% / +0.15% +0.33% +0.26%] index_select spread : Elapsed 0.055 ms (5.462 ms / 100) 5.758 -> 5.755 ( -0.05%) [ +0.00% +0.19% +0.12% / +0.12% -0.05% +0.03%] index_select strided 3 : Elapsed 0.058 ms (5.758 ms / 100) 5.752 -> 5.734 ( -0.31%) [ +0.35% +0.00% +0.28% / +0.33% -0.31% +0.03%] index_select strided 5 : Elapsed 0.058 ms (5.772 ms / 100) 5.703 -> 5.706 ( +0.05%) [ +0.00% +0.02% +0.21% / +0.32% +0.11% +0.05%] index_select strided 7 : Elapsed 0.057 ms (5.703 ms / 100) 5.714 -> 5.700 ( -0.25%) [ +0.04% +0.00% +0.02% / -0.25% -0.16% +0.04%] index_select strided 8 : Elapsed 0.057 ms (5.716 ms / 100) 5.698 -> 5.689 ( -0.16%) [ +0.40% +0.35% +0.00% / +0.18% -0.16% +0.28%] index_select strided 16 : Elapsed 0.057 ms (5.721 ms / 100) 5.682 -> 5.679 ( -0.05%) [ +0.00% +0.18% +0.02% / -0.05% +0.04% +0.05%] index_select strided 64 : Elapsed 0.057 ms (5.682 ms / 100) 5.749 -> 5.744 ( -0.09%) [ +0.03% +0.30% +0.00% / -0.02% -0.09% +0.03%] index_select strided 100 : Elapsed 0.058 ms (5.751 ms / 100) 5.726 -> 5.742 ( +0.28%) [ +0.17% +0.33% +0.00% / +0.28% +0.35% +0.42%] index_select random : Elapsed 0.057 ms (5.736 ms / 100) 5.474 -> 5.466 ( -0.15%) [ +0.02% +0.00% +0.13% / -0.04% +0.04% -0.15%] index_select random_sorted : Elapsed 0.055 ms (5.475 ms / 100) B = [512, 256] (stride (1, 512)) A = [512, 255] (stride (1, 512)) dim = 1 5.359 -> 5.363 ( +0.07%) [ +0.00% +0.06% +0.09% / +0.09% +0.07% +0.26%] index_add_ linear : Elapsed 0.054 ms (5.359 ms / 100) 5.203 -> 5.193 ( -0.19%) [ +0.04% +0.13% +0.00% / -0.19% +0.17% +0.13%] index_copy_ linear : Elapsed 0.052 ms (5.205 ms / 100) 5.362 -> 5.352 ( -0.19%) [ +0.09% +0.00% +0.11% / +0.26% -0.19% -0.15%] index_add_ reverse : Elapsed 0.054 ms (5.367 ms / 100) 5.209 -> 5.193 ( -0.31%) [ +0.00% +0.21% +0.06% / -0.08% -0.08% -0.31%] index_copy_ reverse : Elapsed 0.052 ms (5.209 ms / 100) 5.367 -> 5.348 ( -0.35%) [ +0.13% +0.41% +0.00% / +0.00% -0.35% -0.35%] index_add_ spread : Elapsed 0.054 ms (5.374 ms / 100) 5.203 -> 5.198 ( -0.10%) [ +0.00% +0.27% +0.37% / +0.12% -0.10% -0.10%] index_copy_ spread : Elapsed 0.052 ms (5.203 ms / 100) 5.373 -> 5.371 ( -0.04%) [ +0.35% +0.00% +0.30% / -0.04% -0.04% -0.04%] index_add_ strided 3 : Elapsed 0.054 ms (5.392 ms / 100) 5.210 -> 5.204 ( -0.12%) [ +0.25% +0.00% +0.13% / -0.04% -0.02% -0.12%] index_copy_ strided 3 : Elapsed 0.052 ms (5.223 ms / 100) 5.368 -> 5.340 ( -0.52%) [ +0.00% +0.00% +0.15% / -0.52% +0.67% +0.50%] index_add_ strided 5 : Elapsed 0.054 ms (5.368 ms / 100) 5.204 -> 5.183 ( -0.40%) [ +0.00% +0.00% +0.21% / -0.40% +0.15% +0.17%] index_copy_ strided 5 : Elapsed 0.052 ms (5.204 ms / 100) 5.369 -> 5.361 ( -0.15%) [ +0.22% +0.20% +0.00% / +0.02% +0.00% -0.15%] index_add_ strided 7 : Elapsed 0.054 ms (5.381 ms / 100) 5.206 -> 5.194 ( -0.23%) [ +0.33% +0.29% +0.00% / +0.48% -0.13% -0.23%] index_copy_ strided 7 : Elapsed 0.052 ms (5.223 ms / 100) 5.355 -> 5.361 ( +0.11%) [ +0.45% +0.34% +0.00% / +0.11% +0.34% +0.35%] index_add_ strided 255 : Elapsed 0.054 ms (5.379 ms / 100) 5.214 -> 5.190 ( -0.46%) [ +0.00% +0.00% +0.04% / -0.46% -0.25% -0.44%] index_copy_ strided 255 : Elapsed 0.052 ms (5.214 ms / 100) 5.371 -> 5.380 ( +0.17%) [ +0.00% +0.04% +0.13% / +0.20% +0.17% +0.20%] index_add_ perm : Elapsed 0.054 ms (5.371 ms / 100) 5.208 -> 5.195 ( -0.25%) [ +0.31% +0.00% +0.13% / +0.10% -0.25% +0.08%] index_copy_ perm : Elapsed 0.052 ms (5.224 ms / 100) 5.359 -> 5.360 ( +0.02%) [ +0.00% +0.06% +0.11% / +0.02% +0.58% +0.49%] index_add_ perm_sorted : Elapsed 0.054 ms (5.359 ms / 100) 5.202 -> 5.209 ( +0.13%) [ +0.00% +0.31% +0.12% / +0.13% +0.54% +0.46%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.202 ms / 100) 4.951 -> 4.965 ( +0.28%) [ +0.42% +0.16% +0.00% / +0.28% +0.30% +0.32%] index_select const : Elapsed 0.050 ms (4.972 ms / 100) 5.202 -> 5.202 ( +0.00%) [ +0.00% +0.15% +0.06% / +0.37% +0.00% +0.17%] index_select wrap : Elapsed 0.052 ms (5.202 ms / 100) 5.203 -> 5.200 ( -0.06%) [ +0.08% +0.15% +0.00% / -0.04% -0.06% +0.04%] index_select linear : Elapsed 0.052 ms (5.207 ms / 100) 5.188 -> 5.205 ( +0.33%) [ +0.37% +0.00% +0.13% / +0.35% +0.37% +0.33%] index_select reverse : Elapsed 0.052 ms (5.207 ms / 100) 4.956 -> 4.968 ( +0.24%) [ +0.18% +0.10% +0.00% / +0.24% +0.38% +0.24%] index_select skip64 : Elapsed 0.050 ms (4.965 ms / 100) 4.947 -> 4.955 ( +0.16%) [ +0.28% +0.26% +0.00% / +0.46% +0.16% +0.75%] index_select skip256 : Elapsed 0.050 ms (4.961 ms / 100) 5.190 -> 5.194 ( +0.08%) [ +0.15% +0.00% +0.12% / +0.08% +0.10% +0.46%] index_select spread : Elapsed 0.052 ms (5.198 ms / 100) 5.088 -> 5.072 ( -0.31%) [ +0.00% +0.16% +0.10% / -0.20% -0.31% -0.10%] index_select strided 3 : Elapsed 0.051 ms (5.088 ms / 100) 5.036 -> 5.038 ( +0.04%) [ +0.28% +0.00% +0.18% / +0.38% +0.04% +0.30%] index_select strided 5 : Elapsed 0.051 ms (5.050 ms / 100) 5.217 -> 5.190 ( -0.52%) [ +0.00% +0.04% +0.13% / +0.25% -0.52% -0.21%] index_select strided 7 : Elapsed 0.052 ms (5.217 ms / 100) 5.195 -> 5.207 ( +0.23%) [ +0.46% +0.00% +0.40% / +0.23% +0.37% +0.25%] index_select strided 8 : Elapsed 0.052 ms (5.219 ms / 100) 5.200 -> 5.203 ( +0.06%) [ +0.46% +0.00% +0.21% / +0.06% +0.29% +0.35%] index_select strided 16 : Elapsed 0.052 ms (5.224 ms / 100) 5.205 -> 5.202 ( -0.06%) [ +0.15% +0.00% +0.15% / +0.15% -0.06% +0.04%] index_select strided 64 : Elapsed 0.052 ms (5.213 ms / 100) 5.013 -> 5.033 ( +0.40%) [ +0.36% +0.00% +0.12% / +0.46% +0.40% +0.44%] index_select strided 100 : Elapsed 0.050 ms (5.031 ms / 100) 5.137 -> 5.143 ( +0.12%) [ +0.39% +0.25% +0.00% / +0.12% +0.43% +0.47%] index_select random : Elapsed 0.052 ms (5.157 ms / 100) 5.115 -> 5.121 ( +0.12%) [ +0.10% +0.00% +0.16% / +0.12% +0.57% +0.65%] index_select random_sorted : Elapsed 0.051 ms (5.120 ms / 100) out_shape = [512, 256] in_shape = [255, 256] idx_dim = 0 B = [512, 256] (stride (256, 1)) dim = 0 fill_cnt = 255 2.414 -> 2.398 ( -0.66%) [ +0.00% +0.08% +0.12% / -0.33% -0.66% -0.66%] index_fill_ const : Elapsed 0.024 ms (2.414 ms / 100) 2.491 -> 2.460 ( -1.24%) [ +0.20% +0.28% +0.00% / -0.72% -1.04% -1.24%] index_fill_ linear : Elapsed 0.025 ms (2.496 ms / 100) 2.480 -> 2.460 ( -0.81%) [ +0.00% +0.24% +0.36% / -0.12% -0.81% -0.65%] index_fill_ reverse : Elapsed 0.025 ms (2.480 ms / 100) 2.411 -> 2.401 ( -0.41%) [ +0.00% +0.21% +0.41% / -0.41% +0.29% +0.21%] index_fill_ skip64 : Elapsed 0.024 ms (2.411 ms / 100) 2.418 -> 2.404 ( -0.58%) [ +0.12% +0.41% +0.00% / -0.58% +0.04% -0.08%] index_fill_ skip256 : Elapsed 0.024 ms (2.421 ms / 100) 2.470 -> 2.478 ( +0.32%) [ +0.28% +0.77% +0.00% / +0.61% +0.32% +0.32%] index_fill_ spread : Elapsed 0.025 ms (2.477 ms / 100) 2.474 -> 2.457 ( -0.69%) [ +0.00% +0.12% +0.28% / -0.69% -0.61% -0.69%] index_fill_ strided 3 : Elapsed 0.025 ms (2.474 ms / 100) 2.460 -> 2.461 ( +0.04%) [ +0.37% +0.00% +0.53% / +0.04% +0.04% +0.08%] index_fill_ strided 5 : Elapsed 0.025 ms (2.469 ms / 100) 2.470 -> 2.460 ( -0.40%) [ +0.57% +0.32% +0.00% / -0.16% -0.40% -0.04%] index_fill_ strided 7 : Elapsed 0.025 ms (2.484 ms / 100) 2.441 -> 2.407 ( -1.39%) [ +0.00% +0.00% +0.33% / -0.53% -1.31% -1.39%] index_fill_ strided 8 : Elapsed 0.024 ms (2.441 ms / 100) 2.434 -> 2.423 ( -0.45%) [ +0.00% +0.16% +0.04% / +0.29% -0.37% -0.45%] index_fill_ strided 16 : Elapsed 0.024 ms (2.434 ms / 100) 2.411 -> 2.417 ( +0.25%) [ +0.54% +0.00% +0.17% / +0.25% +0.75% +0.62%] index_fill_ strided 64 : Elapsed 0.024 ms (2.424 ms / 100) 2.435 -> 2.424 ( -0.45%) [ +0.21% +0.21% +0.00% / -0.45% -0.21% +0.00%] index_fill_ strided 100 : Elapsed 0.024 ms (2.440 ms / 100) 2.470 -> 2.450 ( -0.81%) [ +0.32% +0.28% +0.00% / +0.04% -0.77% -0.81%] index_fill_ strided 255 : Elapsed 0.025 ms (2.478 ms / 100) 2.420 -> 2.392 ( -1.16%) [ +0.04% +0.00% +0.54% / -0.54% -1.12% -1.16%] index_fill_ strided 256 : Elapsed 0.024 ms (2.421 ms / 100) 2.471 -> 2.461 ( -0.40%) [ +0.61% +0.00% +0.73% / -0.40% -0.12% -0.04%] index_fill_ strided 257 : Elapsed 0.025 ms (2.486 ms / 100) 2.458 -> 2.445 ( -0.53%) [ +0.00% +0.28% +0.12% / -0.53% -0.20% -0.20%] index_fill_ random : Elapsed 0.025 ms (2.458 ms / 100) 2.454 -> 2.448 ( -0.24%) [ +0.29% +0.00% +0.45% / -0.12% -0.24% +0.04%] index_fill_ random_sorted : Elapsed 0.025 ms (2.461 ms / 100) 2.491 -> 2.479 ( -0.48%) [ +0.20% +0.00% +0.32% / -0.12% -0.48% -0.40%] index_fill_ perm : Elapsed 0.025 ms (2.496 ms / 100) 2.491 -> 2.473 ( -0.72%) [ +0.00% +0.08% +0.08% / -0.60% -0.64% -0.72%] index_fill_ perm_sorted : Elapsed 0.025 ms (2.491 ms / 100) B = [512, 256] (stride (256, 1)) A = [255, 256] (stride (256, 1)) dim = 0 4.894 -> 4.864 ( -0.61%) [ +0.33% +0.39% +0.00% / -0.12% -0.61% -0.51%] index_add_ linear : Elapsed 0.049 ms (4.910 ms / 100) 4.804 -> 4.774 ( -0.62%) [ +0.60% +0.71% +0.00% / +0.02% -0.62% -0.52%] index_copy_ linear : Elapsed 0.048 ms (4.833 ms / 100) 4.873 -> 4.873 ( +0.00%) [ +0.39% +0.60% +0.00% / +0.00% +0.29% +0.14%] index_add_ reverse : Elapsed 0.049 ms (4.892 ms / 100) 4.792 -> 4.766 ( -0.54%) [ +0.27% +0.40% +0.00% / -0.54% -0.17% +0.00%] index_copy_ reverse : Elapsed 0.048 ms (4.805 ms / 100) 4.873 -> 4.871 ( -0.04%) [ +0.33% +0.02% +0.00% / +0.04% +0.12% -0.04%] index_add_ spread : Elapsed 0.049 ms (4.889 ms / 100) 4.802 -> 4.781 ( -0.44%) [ +0.08% +0.04% +0.00% / -0.44% -0.44% -0.35%] index_copy_ spread : Elapsed 0.048 ms (4.806 ms / 100) 4.880 -> 4.868 ( -0.25%) [ +0.08% +0.10% +0.00% / -0.25% -0.14% +0.12%] index_add_ strided 3 : Elapsed 0.049 ms (4.884 ms / 100) 4.795 -> 4.778 ( -0.35%) [ +0.21% +0.00% +0.02% / -0.33% -0.35% -0.21%] index_copy_ strided 3 : Elapsed 0.048 ms (4.805 ms / 100) 4.854 -> 4.851 ( -0.06%) [ +0.31% +0.33% +0.00% / -0.06% +0.97% +0.74%] index_add_ strided 5 : Elapsed 0.049 ms (4.869 ms / 100) 4.775 -> 4.769 ( -0.13%) [ +0.44% +0.44% +0.00% / -0.13% +0.40% +0.40%] index_copy_ strided 5 : Elapsed 0.048 ms (4.796 ms / 100) 4.888 -> 4.860 ( -0.57%) [ +0.00% +0.04% +0.33% / -0.37% -0.41% -0.57%] index_add_ strided 7 : Elapsed 0.049 ms (4.888 ms / 100) 4.799 -> 4.771 ( -0.58%) [ +0.00% +0.38% +0.29% / -0.25% -0.42% -0.58%] index_copy_ strided 7 : Elapsed 0.048 ms (4.799 ms / 100) 4.879 -> 4.880 ( +0.02%) [ +0.66% +0.00% +0.06% / +0.02% +0.82% +0.55%] index_add_ strided 255 : Elapsed 0.049 ms (4.911 ms / 100) 4.796 -> 4.786 ( -0.21%) [ +0.48% +0.10% +0.00% / -0.21% +0.60% +0.33%] index_copy_ strided 255 : Elapsed 0.048 ms (4.819 ms / 100) 4.918 -> 4.885 ( -0.67%) [ +0.00% +0.28% +0.33% / -0.37% -0.65% -0.67%] index_add_ strided 257 : Elapsed 0.049 ms (4.918 ms / 100) 4.829 -> 4.797 ( -0.66%) [ +0.00% +0.29% +0.39% / -0.17% -0.39% -0.66%] index_copy_ strided 257 : Elapsed 0.048 ms (4.829 ms / 100) 4.893 -> 4.858 ( -0.72%) [ +0.39% +0.00% +0.14% / +0.47% -0.33% -0.72%] index_add_ perm : Elapsed 0.049 ms (4.912 ms / 100) 4.809 -> 4.775 ( -0.71%) [ +0.17% +0.27% +0.00% / +0.21% -0.54% -0.71%] index_copy_ perm : Elapsed 0.048 ms (4.817 ms / 100) 4.878 -> 4.837 ( -0.84%) [ +0.06% +0.00% +0.04% / -0.84% -0.16% -0.21%] index_add_ perm_sorted : Elapsed 0.049 ms (4.881 ms / 100) 4.790 -> 4.761 ( -0.61%) [ +0.13% +0.27% +0.00% / -0.61% -0.31% -0.19%] index_copy_ perm_sorted : Elapsed 0.048 ms (4.796 ms / 100) 4.968 -> 4.967 ( -0.02%) [ +0.28% +0.00% +0.20% / +0.20% -0.02% +0.06%] index_select const : Elapsed 0.050 ms (4.982 ms / 100) 5.116 -> 5.117 ( +0.02%) [ +0.18% +0.22% +0.00% / +0.39% +0.02% +0.20%] index_select wrap : Elapsed 0.051 ms (5.125 ms / 100) 5.090 -> 5.093 ( +0.06%) [ +0.14% +0.00% +0.28% / +0.18% +0.06% +0.14%] index_select linear : Elapsed 0.051 ms (5.097 ms / 100) 5.079 -> 5.095 ( +0.32%) [ +0.06% +0.00% +0.20% / +0.32% +1.12% +0.87%] index_select reverse : Elapsed 0.051 ms (5.082 ms / 100) 4.958 -> 4.957 ( -0.02%) [ +0.14% +0.00% +0.16% / +0.04% -0.02% +0.34%] index_select skip64 : Elapsed 0.050 ms (4.965 ms / 100) 4.956 -> 4.967 ( +0.22%) [ +0.00% +0.14% +0.30% / +0.32% +0.22% +0.32%] index_select skip256 : Elapsed 0.050 ms (4.956 ms / 100) 5.067 -> 5.070 ( +0.06%) [ +0.06% +0.26% +0.00% / +0.06% +0.30% +0.49%] index_select spread : Elapsed 0.051 ms (5.070 ms / 100) 5.070 -> 5.012 ( -1.14%) [ +0.08% +0.20% +0.00% / +0.10% -1.14% -0.93%] index_select strided 3 : Elapsed 0.051 ms (5.074 ms / 100) 4.972 -> 4.983 ( +0.22%) [ +0.00% +0.16% +0.20% / +0.22% +0.72% +0.48%] index_select strided 5 : Elapsed 0.050 ms (4.972 ms / 100) 5.115 -> 5.103 ( -0.23%) [ +0.00% +0.25% +0.23% / +0.22% -0.23% +0.06%] index_select strided 7 : Elapsed 0.051 ms (5.115 ms / 100) 5.127 -> 5.126 ( -0.02%) [ +0.00% +0.04% +0.16% / -0.02% +0.53% +0.37%] index_select strided 8 : Elapsed 0.051 ms (5.127 ms / 100) 5.153 -> 5.129 ( -0.47%) [ +0.00% +0.04% +0.04% / +0.14% -0.45% -0.47%] index_select strided 16 : Elapsed 0.052 ms (5.153 ms / 100) 5.109 -> 5.114 ( +0.10%) [ +0.22% +0.00% +0.12% / +0.10% +0.22% +0.22%] index_select strided 64 : Elapsed 0.051 ms (5.120 ms / 100) 4.973 -> 4.978 ( +0.10%) [ +0.00% +0.16% +0.22% / +0.10% +0.80% +0.80%] index_select strided 100 : Elapsed 0.050 ms (4.973 ms / 100) 5.131 -> 5.078 ( -1.03%) [ +0.12% +0.00% +0.06% / +0.14% -0.90% -1.03%] index_select random : Elapsed 0.051 ms (5.137 ms / 100) 5.067 -> 5.085 ( +0.36%) [ +0.26% +0.00% +0.16% / +0.36% +0.45% +0.85%] index_select random_sorted : Elapsed 0.051 ms (5.080 ms / 100) B = [512, 256] (stride (256, 1)) A = [255, 256] (stride (1, 255)) dim = 0 5.008 -> 4.969 ( -0.78%) [ +0.20% +0.00% +0.06% / -0.64% -0.78% -0.52%] index_add_ linear : Elapsed 0.050 ms (5.018 ms / 100) 4.931 -> 4.898 ( -0.67%) [ +0.24% +0.00% +0.04% / +0.00% -0.67% -0.51%] index_copy_ linear : Elapsed 0.049 ms (4.943 ms / 100) 4.991 -> 4.966 ( -0.50%) [ +0.18% +0.24% +0.00% / -0.28% -0.50% -0.28%] index_add_ reverse : Elapsed 0.050 ms (5.000 ms / 100) 4.912 -> 4.893 ( -0.39%) [ +0.49% +0.00% +0.04% / -0.39% -0.24% -0.18%] index_copy_ reverse : Elapsed 0.049 ms (4.936 ms / 100) 4.997 -> 4.971 ( -0.52%) [ +0.00% +0.00% +0.26% / -0.36% -0.52% -0.48%] index_add_ spread : Elapsed 0.050 ms (4.997 ms / 100) 4.914 -> 4.892 ( -0.45%) [ +0.00% +0.28% +0.35% / +0.12% -0.45% -0.33%] index_copy_ spread : Elapsed 0.049 ms (4.914 ms / 100) 4.978 -> 4.973 ( -0.10%) [ +0.00% +0.26% +0.22% / -0.10% +0.14% +0.14%] index_add_ strided 3 : Elapsed 0.050 ms (4.978 ms / 100) 4.905 -> 4.896 ( -0.18%) [ +0.00% +0.06% +0.06% / -0.18% +0.14% +0.10%] index_copy_ strided 3 : Elapsed 0.049 ms (4.905 ms / 100) 4.986 -> 4.971 ( -0.30%) [ +0.10% +0.00% +0.24% / -0.24% -0.30% -0.14%] index_add_ strided 5 : Elapsed 0.050 ms (4.991 ms / 100) 4.897 -> 4.901 ( +0.08%) [ +0.22% +0.00% +0.25% / +0.08% +0.25% +0.08%] index_copy_ strided 5 : Elapsed 0.049 ms (4.908 ms / 100) 4.997 -> 4.971 ( -0.52%) [ +0.08% +0.00% +0.24% / -0.40% -0.50% -0.52%] index_add_ strided 7 : Elapsed 0.050 ms (5.001 ms / 100) 4.909 -> 4.899 ( -0.20%) [ +0.47% +0.37% +0.00% / +0.02% -0.20% -0.08%] index_copy_ strided 7 : Elapsed 0.049 ms (4.932 ms / 100) 4.995 -> 4.989 ( -0.12%) [ +0.36% +0.00% +0.04% / -0.12% -0.12% -0.08%] index_add_ strided 255 : Elapsed 0.050 ms (5.013 ms / 100) 4.920 -> 4.926 ( +0.12%) [ +0.49% +0.08% +0.00% / +0.12% +0.28% +0.16%] index_copy_ strided 255 : Elapsed 0.049 ms (4.944 ms / 100) 5.014 -> 4.982 ( -0.64%) [ +0.00% +0.10% +0.08% / -0.28% -0.64% -0.54%] index_add_ strided 257 : Elapsed 0.050 ms (5.014 ms / 100) 4.939 -> 4.916 ( -0.47%) [ +0.24% +0.00% +0.02% / -0.40% -0.26% -0.47%] index_copy_ strided 257 : Elapsed 0.050 ms (4.951 ms / 100) 5.006 -> 4.975 ( -0.62%) [ +0.46% +0.00% +0.12% / -0.14% -0.58% -0.62%] index_add_ perm : Elapsed 0.050 ms (5.029 ms / 100) 4.925 -> 4.899 ( -0.53%) [ +0.16% +0.14% +0.00% / +0.41% -0.53% -0.39%] index_copy_ perm : Elapsed 0.049 ms (4.933 ms / 100) 5.005 -> 4.975 ( -0.60%) [ +0.20% +0.24% +0.00% / -0.44% -0.60% -0.50%] index_add_ perm_sorted : Elapsed 0.050 ms (5.015 ms / 100) 4.911 -> 4.879 ( -0.65%) [ +0.39% +0.59% +0.00% / -0.10% -0.24% -0.65%] index_copy_ perm_sorted : Elapsed 0.049 ms (4.930 ms / 100) 5.137 -> 5.118 ( -0.37%) [ +0.14% +0.04% +0.00% / +0.02% -0.29% -0.37%] index_select const : Elapsed 0.051 ms (5.144 ms / 100) 5.288 -> 5.298 ( +0.19%) [ +0.06% +0.00% +0.08% / +0.43% +0.19% +0.25%] index_select wrap : Elapsed 0.053 ms (5.291 ms / 100) 5.286 -> 5.285 ( -0.02%) [ +0.00% +0.02% +0.19% / -0.02% +0.26% +0.11%] index_select linear : Elapsed 0.053 ms (5.286 ms / 100) 5.157 -> 5.167 ( +0.19%) [ +0.04% +0.04% +0.00% / +0.19% +0.50% +0.66%] index_select reverse : Elapsed 0.052 ms (5.159 ms / 100) 5.136 -> 5.141 ( +0.10%) [ +0.00% +0.16% +0.00% / +0.12% +0.27% +0.10%] index_select skip64 : Elapsed 0.051 ms (5.136 ms / 100) 5.108 -> 5.100 ( -0.16%) [ +0.08% +0.06% +0.00% / +0.18% -0.12% -0.16%] index_select skip256 : Elapsed 0.051 ms (5.112 ms / 100) 5.214 -> 5.208 ( -0.12%) [ +0.19% +0.23% +0.00% / -0.12% +0.21% +0.19%] index_select spread : Elapsed 0.052 ms (5.224 ms / 100) 5.335 -> 5.322 ( -0.24%) [ +0.19% +0.00% +0.13% / +0.19% -0.24% -0.24%] index_select strided 3 : Elapsed 0.053 ms (5.345 ms / 100) 5.309 -> 5.306 ( -0.06%) [ +0.08% +0.00% +0.41% / +0.28% -0.06% -0.06%] index_select strided 5 : Elapsed 0.053 ms (5.313 ms / 100) 5.318 -> 5.300 ( -0.34%) [ +0.23% +0.00% +0.15% / +0.08% -0.34% -0.21%] index_select strided 7 : Elapsed 0.053 ms (5.330 ms / 100) 5.326 -> 5.321 ( -0.09%) [ +0.00% +0.08% +0.15% / -0.09% +0.21% +0.19%] index_select strided 8 : Elapsed 0.053 ms (5.326 ms / 100) 5.342 -> 5.321 ( -0.39%) [ +0.00% +0.15% +0.39% / +0.07% -0.19% -0.39%] index_select strided 16 : Elapsed 0.053 ms (5.342 ms / 100) 5.306 -> 5.313 ( +0.13%) [ +0.00% +0.25% +0.19% / +0.13% +0.25% +0.19%] index_select strided 64 : Elapsed 0.053 ms (5.306 ms / 100) 5.296 -> 5.303 ( +0.13%) [ +0.21% +0.00% +0.38% / +0.17% +0.13% +0.28%] index_select strided 100 : Elapsed 0.053 ms (5.307 ms / 100) 5.341 -> 5.341 ( +0.00%) [ +0.00% +0.06% +0.17% / +0.02% +0.04% +0.00%] index_select random : Elapsed 0.053 ms (5.341 ms / 100) 5.213 -> 5.219 ( +0.12%) [ +0.00% +0.06% +0.31% / +0.12% +0.54% +0.40%] index_select random_sorted : Elapsed 0.052 ms (5.213 ms / 100) B = [512, 256] (stride (1, 512)) dim = 0 fill_cnt = 255 good 2.656 -> 2.411 ( -9.22%) [ +0.00% +0.00% +0.23% / -8.96% -9.22% -9.07%] index_fill_ const : Elapsed 0.027 ms (2.656 ms / 100) good 2.694 -> 2.430 ( -9.80%) [ +0.07% +0.00% +0.07% / -8.98% -9.80% -9.47%] index_fill_ linear : Elapsed 0.027 ms (2.696 ms / 100) good 2.673 -> 2.447 ( -8.45%) [ +0.56% +0.64% +0.00% / -8.45% -8.34% -8.27%] index_fill_ reverse : Elapsed 0.027 ms (2.688 ms / 100) good 2.653 -> 2.398 ( -9.61%) [ +0.00% +0.08% +0.11% / -9.61% -9.23% -9.35%] index_fill_ skip64 : Elapsed 0.027 ms (2.653 ms / 100) good 2.642 -> 2.395 ( -9.35%) [ +0.00% +0.23% +0.04% / -9.16% -9.35% -9.08%] index_fill_ skip256 : Elapsed 0.026 ms (2.642 ms / 100) good 2.917 -> 2.688 ( -7.85%) [ +0.24% +0.00% +0.03% / -7.85% -7.40% -7.40%] index_fill_ spread : Elapsed 0.029 ms (2.924 ms / 100) good 2.954 -> 2.697 ( -8.70%) [ +0.54% +0.17% +0.00% / -8.70% -8.50% -8.36%] index_fill_ strided 3 : Elapsed 0.030 ms (2.970 ms / 100) good 3.010 -> 2.755 ( -8.47%) [ +0.23% +0.00% +0.17% / -8.11% -8.47% -8.21%] index_fill_ strided 5 : Elapsed 0.030 ms (3.017 ms / 100) good 3.027 -> 2.806 ( -7.30%) [ +0.03% +0.00% +0.13% / -7.20% -7.30% -6.84%] index_fill_ strided 7 : Elapsed 0.030 ms (3.028 ms / 100) good 3.025 -> 2.834 ( -6.31%) [ +0.20% +0.17% +0.00% / -5.69% -6.31% -6.25%] index_fill_ strided 8 : Elapsed 0.030 ms (3.031 ms / 100) good 2.829 -> 2.656 ( -6.12%) [ +0.00% +0.00% +0.11% / -6.12% -5.55% -5.44%] index_fill_ strided 16 : Elapsed 0.028 ms (2.829 ms / 100) good 2.685 -> 2.449 ( -8.79%) [ +0.04% +0.00% +0.19% / -8.79% -8.27% -8.23%] index_fill_ strided 64 : Elapsed 0.027 ms (2.686 ms / 100) 3.032 -> 2.899 ( -4.39%) [ +0.00% +0.07% +0.00% / -3.59% -4.16% -4.39%] index_fill_ strided 100 : Elapsed 0.030 ms (3.032 ms / 100) good 2.916 -> 2.690 ( -7.75%) [ +0.31% +0.31% +0.00% / -7.75% -7.27% -7.24%] index_fill_ strided 255 : Elapsed 0.029 ms (2.925 ms / 100) Good 2.662 -> 2.387 (-10.33%) [ +0.30% +0.34% +0.00% / -9.88% -9.95% -10.33%] index_fill_ strided 256 : Elapsed 0.027 ms (2.670 ms / 100) good 2.928 -> 2.704 ( -7.65%) [ +0.20% +0.00% +0.03% / -7.38% -7.65% -7.55%] index_fill_ strided 257 : Elapsed 0.029 ms (2.934 ms / 100) 3.014 -> 2.868 ( -4.84%) [ +0.10% +0.10% +0.00% / -4.84% -4.61% -4.38%] index_fill_ random : Elapsed 0.030 ms (3.017 ms / 100) good 2.904 -> 2.680 ( -7.71%) [ +0.07% +0.00% +0.14% / -7.71% -7.37% -7.13%] index_fill_ random_sorted : Elapsed 0.029 ms (2.906 ms / 100) good 3.036 -> 2.880 ( -5.14%) [ +0.13% +0.00% +0.13% / -4.41% -5.07% -5.14%] index_fill_ perm : Elapsed 0.030 ms (3.040 ms / 100) good 2.914 -> 2.681 ( -8.00%) [ +0.00% +0.03% +0.17% / -7.52% -8.00% -7.45%] index_fill_ perm_sorted : Elapsed 0.029 ms (2.914 ms / 100) B = [512, 256] (stride (1, 512)) A = [255, 256] (stride (256, 1)) dim = 0 5.075 -> 4.963 ( -2.21%) [ +0.22% +0.00% +0.14% / -1.87% -2.21% -1.97%] index_add_ linear : Elapsed 0.051 ms (5.086 ms / 100) 4.869 -> 4.895 ( +0.53%) [ +0.18% +0.10% +0.00% / +1.01% +0.60% +0.53%] index_copy_ linear : Elapsed 0.049 ms (4.878 ms / 100) 5.063 -> 4.976 ( -1.72%) [ +0.26% +0.34% +0.00% / -1.68% -1.72% -1.62%] index_add_ reverse : Elapsed 0.051 ms (5.076 ms / 100) 4.857 -> 4.905 ( +0.99%) [ +0.10% +0.25% +0.00% / +0.99% +1.30% +1.21%] index_copy_ reverse : Elapsed 0.049 ms (4.862 ms / 100) 5.351 -> 5.198 ( -2.86%) [ +0.37% +0.00% +0.19% / -2.69% -2.82% -2.86%] index_add_ spread : Elapsed 0.054 ms (5.371 ms / 100) 5.175 -> 5.225 ( +0.97%) [ +0.33% +0.00% +0.15% / +1.18% +0.97% +0.97%] index_copy_ spread : Elapsed 0.052 ms (5.192 ms / 100) 5.336 -> 5.209 ( -2.38%) [ +0.13% +0.00% +0.04% / -2.27% -1.82% -2.38%] index_add_ strided 3 : Elapsed 0.053 ms (5.343 ms / 100) 5.168 -> 5.239 ( +1.37%) [ +0.00% +0.33% +0.23% / +1.47% +1.37% +1.63%] index_copy_ strided 3 : Elapsed 0.052 ms (5.168 ms / 100) 5.377 -> 5.246 ( -2.44%) [ +0.26% +0.00% +0.30% / -2.40% -2.38% -2.44%] index_add_ strided 5 : Elapsed 0.054 ms (5.391 ms / 100) 5.206 -> 5.231 ( +0.48%) [ +0.27% +0.00% +0.19% / +0.61% +0.50% +0.48%] index_copy_ strided 5 : Elapsed 0.052 ms (5.220 ms / 100) 5.383 -> 5.274 ( -2.02%) [ +0.45% +0.00% +0.06% / -2.02% -1.88% -1.71%] index_add_ strided 7 : Elapsed 0.054 ms (5.407 ms / 100) 5.233 -> 5.244 ( +0.21%) [ +0.36% +0.00% +0.06% / +0.54% +0.21% +0.34%] index_copy_ strided 7 : Elapsed 0.053 ms (5.252 ms / 100) 5.376 -> 5.203 ( -3.22%) [ +0.22% +0.28% +0.00% / -3.22% -2.90% -2.86%] index_add_ strided 255 : Elapsed 0.054 ms (5.388 ms / 100) 5.143 -> 5.220 ( +1.50%) [ +0.33% +0.27% +0.00% / +1.50% +1.83% +1.69%] index_copy_ strided 255 : Elapsed 0.052 ms (5.160 ms / 100) 5.362 -> 5.227 ( -2.52%) [ +0.41% +0.00% +0.32% / -2.44% -2.52% -2.35%] index_add_ strided 257 : Elapsed 0.054 ms (5.384 ms / 100) 5.153 -> 5.233 ( +1.55%) [ +0.14% +0.00% +0.00% / +2.15% +1.55% +1.69%] index_copy_ strided 257 : Elapsed 0.052 ms (5.160 ms / 100) 5.424 -> 5.347 ( -1.42%) [ +0.00% +0.41% +0.61% / -1.42% -0.59% -0.68%] index_add_ perm : Elapsed 0.054 ms (5.424 ms / 100) 5.226 -> 5.281 ( +1.05%) [ +0.04% +0.34% +0.00% / +1.05% +1.44% +1.51%] index_copy_ perm : Elapsed 0.052 ms (5.228 ms / 100) 5.337 -> 5.182 ( -2.90%) [ +0.15% +0.00% +0.04% / -2.90% -2.66% -2.59%] index_add_ perm_sorted : Elapsed 0.053 ms (5.345 ms / 100) 5.141 -> 5.223 ( +1.60%) [ +0.00% +0.35% +0.00% / +1.60% +1.93% +1.79%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.141 ms / 100) good 5.303 -> 4.970 ( -6.28%) [ +0.00% +0.19% +0.21% / -6.02% -6.24% -6.28%] index_select const : Elapsed 0.053 ms (5.303 ms / 100) 5.309 -> 5.204 ( -1.98%) [ +0.26% +0.00% +0.19% / -1.98% -1.66% -1.81%] index_select wrap : Elapsed 0.053 ms (5.323 ms / 100) 5.313 -> 5.135 ( -3.35%) [ +0.09% +0.00% +0.21% / -3.35% -3.01% -3.03%] index_select linear : Elapsed 0.053 ms (5.318 ms / 100) 5.289 -> 5.183 ( -2.00%) [ +0.00% +0.13% +0.19% / -2.00% -1.61% -1.66%] index_select reverse : Elapsed 0.053 ms (5.289 ms / 100) good 5.336 -> 4.972 ( -6.82%) [ +0.13% +0.00% +0.00% / -6.71% -6.82% -6.75%] index_select skip64 : Elapsed 0.053 ms (5.343 ms / 100) good 5.327 -> 4.956 ( -6.96%) [ +0.17% +0.00% +0.17% / -6.63% -6.74% -6.96%] index_select skip256 : Elapsed 0.053 ms (5.336 ms / 100) 5.272 -> 5.194 ( -1.48%) [ +0.00% +0.02% +0.11% / -1.48% -1.18% -1.21%] index_select spread : Elapsed 0.053 ms (5.272 ms / 100) 5.331 -> 5.347 ( +0.30%) [ +0.09% +0.00% +0.09% / +0.30% +0.39% +0.41%] index_select strided 3 : Elapsed 0.053 ms (5.336 ms / 100) 5.275 -> 5.271 ( -0.08%) [ +0.13% +0.00% +0.15% / -0.08% +0.61% +0.72%] index_select strided 5 : Elapsed 0.053 ms (5.282 ms / 100) 5.305 -> 5.261 ( -0.83%) [ +0.25% +0.00% +0.02% / -0.83% -0.23% -0.30%] index_select strided 7 : Elapsed 0.053 ms (5.318 ms / 100) 5.320 -> 5.207 ( -2.12%) [ +0.06% +0.00% +0.11% / -2.12% -1.24% -1.32%] index_select strided 8 : Elapsed 0.053 ms (5.323 ms / 100) 5.300 -> 5.264 ( -0.68%) [ +0.40% +0.00% +0.21% / -0.68% -0.23% -0.47%] index_select strided 16 : Elapsed 0.053 ms (5.321 ms / 100) 5.306 -> 5.253 ( -1.00%) [ +0.45% +0.00% +0.13% / -0.74% -0.89% -1.00%] index_select strided 64 : Elapsed 0.053 ms (5.330 ms / 100) 5.278 -> 5.325 ( +0.89%) [ +0.02% +0.00% +0.11% / +0.89% +1.40% +1.38%] index_select strided 100 : Elapsed 0.053 ms (5.279 ms / 100) 5.294 -> 5.219 ( -1.42%) [ +0.26% +0.06% +0.00% / -1.42% -0.94% -0.85%] index_select random : Elapsed 0.053 ms (5.308 ms / 100) 5.278 -> 5.187 ( -1.72%) [ +0.34% +0.00% +0.09% / -1.31% -1.53% -1.72%] index_select random_sorted : Elapsed 0.053 ms (5.296 ms / 100) B = [512, 256] (stride (1, 512)) A = [255, 256] (stride (1, 255)) dim = 0 good 5.199 -> 4.883 ( -6.08%) [ +0.00% +0.13% +0.10% / -5.73% -6.08% -5.79%] index_add_ linear : Elapsed 0.052 ms (5.199 ms / 100) 5.028 -> 4.791 ( -4.71%) [ +0.00% +0.32% +0.14% / -4.59% -4.69% -4.71%] index_copy_ linear : Elapsed 0.050 ms (5.028 ms / 100) good 5.213 -> 4.894 ( -6.12%) [ +0.15% +0.33% +0.00% / -6.12% -5.91% -5.97%] index_add_ reverse : Elapsed 0.052 ms (5.221 ms / 100) 5.029 -> 4.790 ( -4.75%) [ +0.20% +0.40% +0.00% / -4.75% -4.51% -4.73%] index_copy_ reverse : Elapsed 0.050 ms (5.039 ms / 100) good 5.477 -> 5.076 ( -7.32%) [ +0.49% +0.00% +0.27% / -7.25% -7.07% -7.32%] index_add_ spread : Elapsed 0.055 ms (5.504 ms / 100) 5.317 -> 5.080 ( -4.46%) [ +0.30% +0.00% +0.32% / -3.99% -4.46% -4.42%] index_copy_ spread : Elapsed 0.053 ms (5.333 ms / 100) good 5.502 -> 5.092 ( -7.45%) [ +0.24% +0.31% +0.00% / -7.45% -6.89% -7.07%] index_add_ strided 3 : Elapsed 0.055 ms (5.515 ms / 100) good 5.378 -> 5.098 ( -5.21%) [ +0.09% +0.32% +0.00% / -5.09% -5.09% -5.21%] index_copy_ strided 3 : Elapsed 0.054 ms (5.383 ms / 100) good 5.577 -> 5.130 ( -8.02%) [ +0.14% +0.00% +0.36% / -7.76% -8.02% -7.87%] index_add_ strided 5 : Elapsed 0.056 ms (5.585 ms / 100) good 5.453 -> 5.101 ( -6.46%) [ +0.00% +0.17% +0.18% / -5.98% -6.46% -6.22%] index_copy_ strided 5 : Elapsed 0.055 ms (5.453 ms / 100) good 5.539 -> 5.163 ( -6.79%) [ +0.34% +0.83% +0.00% / -6.79% -6.63% -6.37%] index_add_ strided 7 : Elapsed 0.056 ms (5.558 ms / 100) good 5.447 -> 5.112 ( -6.15%) [ +0.33% +0.29% +0.00% / -5.95% -6.15% -6.08%] index_copy_ strided 7 : Elapsed 0.055 ms (5.465 ms / 100) good 5.566 -> 5.107 ( -8.25%) [ +0.00% +0.29% +0.18% / -7.92% -8.25% -7.96%] index_add_ strided 255 : Elapsed 0.056 ms (5.566 ms / 100) good 5.375 -> 5.090 ( -5.30%) [ +0.02% +0.00% +0.09% / -4.97% -5.30% -5.27%] index_copy_ strided 255 : Elapsed 0.054 ms (5.376 ms / 100) good 5.530 -> 5.081 ( -8.12%) [ +0.18% +0.38% +0.00% / -8.12% -7.52% -7.50%] index_add_ strided 257 : Elapsed 0.055 ms (5.540 ms / 100) 5.345 -> 5.082 ( -4.92%) [ +0.30% +0.28% +0.00% / -4.92% -4.68% -4.75%] index_copy_ strided 257 : Elapsed 0.054 ms (5.361 ms / 100) good 5.616 -> 5.205 ( -7.32%) [ +0.61% +0.00% +0.02% / -7.32% -7.21% -7.16%] index_add_ perm : Elapsed 0.057 ms (5.650 ms / 100) good 5.450 -> 5.099 ( -6.44%) [ +0.06% +0.02% +0.00% / -6.04% -6.44% -6.35%] index_copy_ perm : Elapsed 0.055 ms (5.453 ms / 100) good 5.479 -> 5.065 ( -7.56%) [ +0.53% +0.26% +0.00% / -7.56% -6.94% -6.99%] index_add_ perm_sorted : Elapsed 0.055 ms (5.508 ms / 100) 5.330 -> 5.076 ( -4.77%) [ +0.47% +0.09% +0.00% / -4.77% -4.33% -4.28%] index_copy_ perm_sorted : Elapsed 0.054 ms (5.355 ms / 100) Good 5.620 -> 4.962 (-11.71%) [ +0.02% +0.05% +0.00% / -11.71% -11.51% -11.48%] index_select const : Elapsed 0.056 ms (5.621 ms / 100) Good 5.725 -> 5.100 (-10.92%) [ +0.30% +0.00% +0.19% / -10.90% -10.92% -10.90%] index_select wrap : Elapsed 0.057 ms (5.742 ms / 100) Good 5.738 -> 5.095 (-11.21%) [ +0.09% +0.03% +0.00% / -11.21% -10.65% -10.61%] index_select linear : Elapsed 0.057 ms (5.743 ms / 100) good 5.612 -> 5.146 ( -8.30%) [ +0.05% +0.00% +0.12% / -8.30% -7.82% -7.93%] index_select reverse : Elapsed 0.056 ms (5.615 ms / 100) Good 5.642 -> 4.997 (-11.43%) [ +0.23% +0.00% +0.11% / -11.40% -11.15% -11.43%] index_select skip64 : Elapsed 0.057 ms (5.655 ms / 100) Good 5.679 -> 4.984 (-12.24%) [ +0.14% +0.00% +0.11% / -12.11% -12.24% -12.10%] index_select skip256 : Elapsed 0.057 ms (5.687 ms / 100) good 5.676 -> 5.167 ( -8.97%) [ +0.00% +0.09% +0.07% / -8.97% -8.93% -8.90%] index_select spread : Elapsed 0.057 ms (5.676 ms / 100) Good 5.797 -> 5.082 (-12.33%) [ +0.10% +0.00% +0.19% / -12.01% -12.13% -12.33%] index_select strided 3 : Elapsed 0.058 ms (5.803 ms / 100) Good 5.797 -> 5.102 (-11.99%) [ +0.00% +0.03% +0.19% / -11.68% -11.99% -11.87%] index_select strided 5 : Elapsed 0.058 ms (5.797 ms / 100) Good 5.807 -> 5.116 (-11.90%) [ +0.14% +0.00% +0.28% / -11.35% -11.90% -11.83%] index_select strided 7 : Elapsed 0.058 ms (5.815 ms / 100) Good 5.804 -> 5.109 (-11.97%) [ +0.24% +0.00% +0.12% / -11.97% -11.60% -11.78%] index_select strided 8 : Elapsed 0.058 ms (5.818 ms / 100) Good 5.795 -> 5.104 (-11.92%) [ +0.12% +0.00% +0.07% / -11.92% -11.80% -11.72%] index_select strided 16 : Elapsed 0.058 ms (5.802 ms / 100) Good 5.756 -> 5.132 (-10.84%) [ +0.00% +0.16% +0.03% / -10.67% -10.84% -10.84%] index_select strided 64 : Elapsed 0.058 ms (5.756 ms / 100) Good 5.816 -> 5.138 (-11.66%) [ +0.09% +0.02% +0.00% / -11.43% -11.66% -11.62%] index_select strided 100 : Elapsed 0.058 ms (5.821 ms / 100) Good 5.805 -> 5.115 (-11.89%) [ +0.05% +0.09% +0.00% / -11.89% -11.71% -11.58%] index_select random : Elapsed 0.058 ms (5.808 ms / 100) good 5.689 -> 5.158 ( -9.33%) [ +0.00% +0.02% +0.05% / -9.33% -9.12% -9.28%] index_select random_sorted : Elapsed 0.057 ms (5.689 ms / 100) out_shape = [255, 512] in_shape = [255, 256] idx_dim = 1 B = [255, 512] (stride (512, 1)) dim = 1 fill_cnt = 256 good 2.651 -> 2.395 ( -9.66%) [ +0.19% +0.08% +0.00% / -9.66% -9.32% -9.62%] index_fill_ const : Elapsed 0.027 ms (2.656 ms / 100) good 2.665 -> 2.454 ( -7.92%) [ +0.15% +0.00% +0.15% / -7.69% -7.88% -7.92%] index_fill_ linear : Elapsed 0.027 ms (2.669 ms / 100) good 2.688 -> 2.461 ( -8.44%) [ +0.37% +0.00% +0.22% / -8.44% -8.30% -8.33%] index_fill_ reverse : Elapsed 0.027 ms (2.698 ms / 100) good 2.646 -> 2.388 ( -9.75%) [ +0.64% +0.00% +0.23% / -9.75% -9.30% -9.15%] index_fill_ skip64 : Elapsed 0.027 ms (2.663 ms / 100) good 2.646 -> 2.395 ( -9.49%) [ +0.00% +0.15% +0.26% / -9.22% -9.49% -9.33%] index_fill_ skip256 : Elapsed 0.026 ms (2.646 ms / 100) good 2.920 -> 2.725 ( -6.68%) [ +0.00% +0.07% +0.27% / -6.68% -6.47% -6.40%] index_fill_ spread : Elapsed 0.029 ms (2.920 ms / 100) good 2.951 -> 2.699 ( -8.54%) [ +0.51% +0.00% +0.41% / -8.34% -8.54% -8.27%] index_fill_ strided 3 : Elapsed 0.030 ms (2.966 ms / 100) good 3.003 -> 2.716 ( -9.56%) [ +0.20% +0.10% +0.00% / -9.09% -9.42% -9.56%] index_fill_ strided 5 : Elapsed 0.030 ms (3.009 ms / 100) good 3.026 -> 2.774 ( -8.33%) [ +0.00% +0.23% +0.40% / -8.26% -8.23% -8.33%] index_fill_ strided 7 : Elapsed 0.030 ms (3.026 ms / 100) good 3.014 -> 2.775 ( -7.93%) [ +0.20% +0.00% +0.17% / -7.76% -7.93% -7.76%] index_fill_ strided 8 : Elapsed 0.030 ms (3.020 ms / 100) good 2.850 -> 2.606 ( -8.56%) [ +0.00% +0.18% +0.04% / -8.00% -8.53% -8.56%] index_fill_ strided 16 : Elapsed 0.028 ms (2.850 ms / 100) good 2.698 -> 2.443 ( -9.45%) [ +0.63% +0.44% +0.00% / -9.04% -9.45% -9.23%] index_fill_ strided 64 : Elapsed 0.027 ms (2.715 ms / 100) good 3.037 -> 2.879 ( -5.20%) [ +0.10% +0.03% +0.00% / -5.20% -4.81% -5.07%] index_fill_ strided 100 : Elapsed 0.030 ms (3.040 ms / 100) good 2.935 -> 2.701 ( -7.97%) [ +0.27% +0.00% +0.17% / -7.90% -7.97% -7.94%] index_fill_ strided 255 : Elapsed 0.029 ms (2.943 ms / 100) good 2.659 -> 2.395 ( -9.93%) [ +0.00% +0.08% +0.34% / -9.55% -9.63% -9.93%] index_fill_ strided 256 : Elapsed 0.027 ms (2.659 ms / 100) good 2.940 -> 2.718 ( -7.55%) [ +0.00% +0.34% +0.17% / -7.55% -7.21% -7.28%] index_fill_ strided 257 : Elapsed 0.029 ms (2.940 ms / 100) good 3.021 -> 2.847 ( -5.76%) [ +0.00% +0.00% +0.07% / -5.43% -5.76% -5.56%] index_fill_ random : Elapsed 0.030 ms (3.021 ms / 100) good 2.907 -> 2.680 ( -7.81%) [ +0.00% +0.10% +0.03% / -7.81% -7.60% -7.50%] index_fill_ random_sorted : Elapsed 0.029 ms (2.907 ms / 100) good 3.023 -> 2.859 ( -5.43%) [ +0.26% +0.00% +0.43% / -5.43% -5.13% -5.13%] index_fill_ perm : Elapsed 0.030 ms (3.031 ms / 100) good 2.912 -> 2.695 ( -7.45%) [ +0.41% +0.86% +0.00% / -7.31% -7.25% -7.45%] index_fill_ perm_sorted : Elapsed 0.029 ms (2.924 ms / 100) B = [255, 512] (stride (512, 1)) A = [255, 256] (stride (256, 1)) dim = 1 good 5.290 -> 4.893 ( -7.50%) [ +0.00% +0.25% +0.40% / -7.28% -7.50% -7.32%] index_add_ linear : Elapsed 0.053 ms (5.290 ms / 100) good 5.073 -> 4.803 ( -5.32%) [ +0.08% +0.00% +0.16% / -4.69% -5.32% -5.30%] index_copy_ linear : Elapsed 0.051 ms (5.077 ms / 100) good 5.293 -> 4.875 ( -7.90%) [ +0.00% +0.17% +0.13% / -7.50% -7.90% -7.56%] index_add_ reverse : Elapsed 0.053 ms (5.293 ms / 100) good 5.068 -> 4.782 ( -5.64%) [ +0.00% +0.00% +0.02% / -4.91% -5.58% -5.64%] index_copy_ reverse : Elapsed 0.051 ms (5.068 ms / 100) good 5.563 -> 5.076 ( -8.75%) [ +0.00% +0.00% +0.25% / -8.75% -8.63% -8.75%] index_add_ spread : Elapsed 0.056 ms (5.563 ms / 100) 5.350 -> 5.091 ( -4.84%) [ +0.28% +0.00% +0.24% / -4.56% -4.84% -4.80%] index_copy_ spread : Elapsed 0.054 ms (5.365 ms / 100) good 5.606 -> 5.077 ( -9.44%) [ +0.36% +0.00% +0.27% / -9.44% -9.20% -9.28%] index_add_ strided 3 : Elapsed 0.056 ms (5.626 ms / 100) good 5.399 -> 5.069 ( -6.11%) [ +0.26% +0.00% +0.41% / -5.74% -6.11% -5.85%] index_copy_ strided 3 : Elapsed 0.054 ms (5.413 ms / 100) good 5.598 -> 5.074 ( -9.36%) [ +0.00% +0.20% +0.30% / -9.36% -8.88% -8.75%] index_add_ strided 5 : Elapsed 0.056 ms (5.598 ms / 100) good 5.437 -> 5.085 ( -6.47%) [ +0.00% +0.28% +0.15% / -6.38% -6.47% -6.18%] index_copy_ strided 5 : Elapsed 0.054 ms (5.437 ms / 100) good 5.539 -> 5.120 ( -7.56%) [ +0.11% +0.67% +0.00% / -7.55% -7.56% -7.22%] index_add_ strided 7 : Elapsed 0.055 ms (5.545 ms / 100) good 5.451 -> 5.103 ( -6.38%) [ +0.00% +0.31% +0.06% / -6.26% -6.38% -6.29%] index_copy_ strided 7 : Elapsed 0.055 ms (5.451 ms / 100) good 5.609 -> 5.080 ( -9.43%) [ +0.00% +0.23% +0.07% / -9.43% -9.38% -9.22%] index_add_ strided 255 : Elapsed 0.056 ms (5.609 ms / 100) good 5.381 -> 5.080 ( -5.59%) [ +0.00% +0.26% +0.06% / -5.37% -5.22% -5.59%] index_copy_ strided 255 : Elapsed 0.054 ms (5.381 ms / 100) good 5.587 -> 5.064 ( -9.36%) [ +0.45% +0.50% +0.00% / -9.36% -9.11% -8.70%] index_add_ strided 257 : Elapsed 0.056 ms (5.612 ms / 100) good 5.370 -> 5.098 ( -5.07%) [ +0.24% +0.30% +0.00% / -5.07% -5.05% -4.93%] index_copy_ strided 257 : Elapsed 0.054 ms (5.383 ms / 100) good 5.645 -> 5.221 ( -7.51%) [ +0.12% +0.58% +0.00% / -7.51% -7.26% -7.33%] index_add_ perm : Elapsed 0.057 ms (5.652 ms / 100) good 5.483 -> 5.113 ( -6.75%) [ +0.13% +0.05% +0.00% / -6.60% -6.60% -6.75%] index_copy_ perm : Elapsed 0.055 ms (5.490 ms / 100) good 5.548 -> 5.080 ( -8.44%) [ +0.00% +0.41% +0.05% / -8.17% -8.44% -8.29%] index_add_ perm_sorted : Elapsed 0.055 ms (5.548 ms / 100) 5.347 -> 5.084 ( -4.92%) [ +0.04% +0.19% +0.00% / -4.38% -4.90% -4.92%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.349 ms / 100) Good 5.675 -> 4.975 (-12.33%) [ +0.07% +0.00% +0.23% / -12.04% -12.33% -12.19%] index_select const : Elapsed 0.057 ms (5.679 ms / 100) Good 5.737 -> 5.067 (-11.68%) [ +0.05% +0.02% +0.00% / -11.61% -11.43% -11.68%] index_select wrap : Elapsed 0.057 ms (5.740 ms / 100) Good 5.738 -> 5.082 (-11.43%) [ +0.23% +0.09% +0.00% / -11.43% -11.43% -11.42%] index_select linear : Elapsed 0.058 ms (5.751 ms / 100) good 5.624 -> 5.157 ( -8.30%) [ +0.16% +0.00% +0.09% / -8.14% -8.30% -8.21%] index_select reverse : Elapsed 0.056 ms (5.633 ms / 100) Good 5.655 -> 4.995 (-11.67%) [ +0.00% +0.07% +0.02% / -11.67% -11.53% -11.51%] index_select skip64 : Elapsed 0.057 ms (5.655 ms / 100) Good 5.645 -> 4.953 (-12.26%) [ +0.12% +0.00% +0.05% / -11.96% -12.01% -12.26%] index_select skip256 : Elapsed 0.057 ms (5.652 ms / 100) good 5.713 -> 5.161 ( -9.66%) [ +0.05% +0.00% +0.12% / -9.66% -9.38% -9.54%] index_select spread : Elapsed 0.057 ms (5.716 ms / 100) Good 5.824 -> 5.072 (-12.91%) [ +0.02% +0.00% +0.17% / -12.62% -12.91% -12.79%] index_select strided 3 : Elapsed 0.058 ms (5.825 ms / 100) Good 5.824 -> 5.082 (-12.74%) [ +0.21% +0.02% +0.00% / -12.65% -12.60% -12.74%] index_select strided 5 : Elapsed 0.058 ms (5.836 ms / 100) Good 5.810 -> 5.091 (-12.38%) [ +0.21% +0.00% +0.34% / -12.25% -12.38% -12.38%] index_select strided 7 : Elapsed 0.058 ms (5.822 ms / 100) Good 5.854 -> 5.103 (-12.83%) [ +0.00% +0.02% +0.03% / -12.83% -12.27% -12.50%] index_select strided 8 : Elapsed 0.059 ms (5.854 ms / 100) Good 5.763 -> 5.069 (-12.04%) [ +0.00% +0.12% +0.05% / -11.97% -11.97% -12.04%] index_select strided 16 : Elapsed 0.058 ms (5.763 ms / 100) Good 5.681 -> 4.995 (-12.08%) [ +0.00% +0.00% +0.02% / -12.08% -12.00% -12.02%] index_select strided 64 : Elapsed 0.057 ms (5.681 ms / 100) Good 5.806 -> 5.115 (-11.90%) [ +0.03% +0.00% +0.19% / -11.90% -11.88% -11.90%] index_select strided 100 : Elapsed 0.058 ms (5.808 ms / 100) Good 5.738 -> 5.095 (-11.21%) [ +0.37% +0.10% +0.00% / -11.05% -11.12% -11.21%] index_select strided 255 : Elapsed 0.058 ms (5.759 ms / 100) Good 5.831 -> 5.108 (-12.40%) [ +0.14% +0.00% +0.03% / -12.40% -12.21% -12.30%] index_select random : Elapsed 0.058 ms (5.839 ms / 100) good 5.708 -> 5.159 ( -9.62%) [ +0.00% +0.18% +0.18% / -9.62% -9.55% -9.50%] index_select random_sorted : Elapsed 0.057 ms (5.708 ms / 100) B = [255, 512] (stride (512, 1)) A = [255, 256] (stride (1, 255)) dim = 1 5.137 -> 4.971 ( -3.23%) [ +0.35% +0.00% +0.27% / -3.06% -3.23% -3.21%] index_add_ linear : Elapsed 0.052 ms (5.155 ms / 100) 4.897 -> 4.892 ( -0.10%) [ +0.27% +0.00% +0.25% / +0.45% +0.04% -0.10%] index_copy_ linear : Elapsed 0.049 ms (4.910 ms / 100) 5.127 -> 4.962 ( -3.22%) [ +0.21% +0.00% +0.14% / -2.95% -3.08% -3.22%] index_add_ reverse : Elapsed 0.051 ms (5.138 ms / 100) 4.876 -> 4.890 ( +0.29%) [ +0.06% +0.23% +0.00% / +0.59% +0.29% +0.41%] index_copy_ reverse : Elapsed 0.049 ms (4.879 ms / 100) 5.370 -> 5.174 ( -3.65%) [ +0.34% +0.06% +0.00% / -3.54% -3.65% -3.48%] index_add_ spread : Elapsed 0.054 ms (5.388 ms / 100) 5.139 -> 5.209 ( +1.36%) [ +0.16% +0.12% +0.00% / +1.44% +1.36% +1.44%] index_copy_ spread : Elapsed 0.051 ms (5.147 ms / 100) 5.388 -> 5.192 ( -3.64%) [ +0.45% +0.17% +0.00% / -3.49% -3.64% -3.47%] index_add_ strided 3 : Elapsed 0.054 ms (5.412 ms / 100) 5.184 -> 5.197 ( +0.25%) [ +0.25% +0.02% +0.00% / +0.37% +0.41% +0.25%] index_copy_ strided 3 : Elapsed 0.052 ms (5.197 ms / 100) 5.399 -> 5.197 ( -3.74%) [ +0.00% +0.06% +0.33% / -3.74% -3.24% -3.35%] index_add_ strided 5 : Elapsed 0.054 ms (5.399 ms / 100) 5.221 -> 5.221 ( +0.00%) [ +0.29% +0.00% +0.19% / +0.02% +0.00% +0.00%] index_copy_ strided 5 : Elapsed 0.052 ms (5.236 ms / 100) 5.377 -> 5.249 ( -2.38%) [ +0.11% +0.00% +0.20% / -2.16% -2.29% -2.38%] index_add_ strided 7 : Elapsed 0.054 ms (5.383 ms / 100) 5.230 -> 5.234 ( +0.08%) [ +0.00% +0.08% +0.06% / +0.44% +0.08% +0.08%] index_copy_ strided 7 : Elapsed 0.052 ms (5.230 ms / 100) 5.423 -> 5.203 ( -4.06%) [ +0.17% +0.37% +0.00% / -3.89% -4.06% -4.02%] index_add_ strided 255 : Elapsed 0.054 ms (5.432 ms / 100) 5.164 -> 5.212 ( +0.93%) [ +0.33% +0.45% +0.00% / +1.07% +1.14% +0.93%] index_copy_ strided 255 : Elapsed 0.052 ms (5.181 ms / 100) 5.397 -> 5.175 ( -4.11%) [ +0.02% +0.00% +0.00% / -3.95% -3.78% -4.11%] index_add_ strided 257 : Elapsed 0.054 ms (5.398 ms / 100) 5.154 -> 5.224 ( +1.36%) [ +0.17% +0.16% +0.00% / +1.49% +1.36% +1.36%] index_copy_ strided 257 : Elapsed 0.052 ms (5.163 ms / 100) 5.454 -> 5.347 ( -1.96%) [ +0.13% +0.00% +0.04% / -1.94% -1.96% -1.80%] index_add_ perm : Elapsed 0.055 ms (5.461 ms / 100) 5.245 -> 5.269 ( +0.46%) [ +0.44% +0.21% +0.00% / +0.59% +0.67% +0.46%] index_copy_ perm : Elapsed 0.053 ms (5.268 ms / 100) 5.380 -> 5.193 ( -3.48%) [ +0.20% +0.48% +0.00% / -3.18% -3.40% -3.48%] index_add_ perm_sorted : Elapsed 0.054 ms (5.391 ms / 100) 5.169 -> 5.211 ( +0.81%) [ +0.48% +0.46% +0.00% / +1.37% +0.85% +0.81%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.194 ms / 100) good 5.426 -> 4.968 ( -8.44%) [ +0.04% +0.00% +0.04% / -8.09% -8.44% -8.33%] index_select const : Elapsed 0.054 ms (5.428 ms / 100) 5.311 -> 5.213 ( -1.85%) [ +0.13% +0.00% +0.13% / -1.71% -1.85% -1.71%] index_select wrap : Elapsed 0.053 ms (5.318 ms / 100) 5.361 -> 5.186 ( -3.26%) [ +0.37% +0.00% +0.15% / -3.26% -3.25% -3.21%] index_select linear : Elapsed 0.054 ms (5.381 ms / 100) 5.352 -> 5.226 ( -2.35%) [ +0.34% +0.13% +0.00% / -2.35% -2.30% -2.28%] index_select reverse : Elapsed 0.054 ms (5.370 ms / 100) good 5.354 -> 4.973 ( -7.12%) [ +0.00% +0.00% +0.21% / -7.12% -7.12% -7.00%] index_select skip64 : Elapsed 0.054 ms (5.354 ms / 100) good 5.410 -> 4.953 ( -8.45%) [ +0.00% +0.04% +0.07% / -8.19% -8.08% -8.45%] index_select skip256 : Elapsed 0.054 ms (5.410 ms / 100) 5.323 -> 5.216 ( -2.01%) [ +0.00% +0.09% +0.34% / -2.01% -1.82% -1.90%] index_select spread : Elapsed 0.053 ms (5.323 ms / 100) 5.373 -> 5.275 ( -1.82%) [ +0.11% +0.11% +0.00% / -1.82% -1.60% -1.64%] index_select strided 3 : Elapsed 0.054 ms (5.379 ms / 100) 5.364 -> 5.245 ( -2.22%) [ +0.09% +0.00% +0.07% / -2.16% -2.09% -2.22%] index_select strided 5 : Elapsed 0.054 ms (5.369 ms / 100) 5.357 -> 5.255 ( -1.90%) [ +0.15% +0.00% +0.06% / -1.66% -1.61% -1.90%] index_select strided 7 : Elapsed 0.054 ms (5.365 ms / 100) 5.352 -> 5.224 ( -2.39%) [ +0.15% +0.00% +0.19% / -1.74% -2.28% -2.39%] index_select strided 8 : Elapsed 0.054 ms (5.360 ms / 100) 5.356 -> 5.191 ( -3.08%) [ +0.15% +0.00% +0.09% / -2.50% -2.97% -3.08%] index_select strided 16 : Elapsed 0.054 ms (5.364 ms / 100) good 5.312 -> 4.973 ( -6.38%) [ +0.06% +0.00% +0.08% / -6.38% -6.36% -6.38%] index_select strided 64 : Elapsed 0.053 ms (5.315 ms / 100) 5.327 -> 5.150 ( -3.32%) [ +0.00% +0.00% +0.15% / -3.32% -3.13% -3.19%] index_select strided 100 : Elapsed 0.053 ms (5.327 ms / 100) 5.363 -> 5.228 ( -2.52%) [ +0.13% +0.00% +0.02% / -2.16% -2.52% -2.50%] index_select strided 255 : Elapsed 0.054 ms (5.370 ms / 100) 5.359 -> 5.234 ( -2.33%) [ +0.17% +0.06% +0.00% / -2.33% -1.94% -1.85%] index_select random : Elapsed 0.054 ms (5.368 ms / 100) 5.335 -> 5.172 ( -3.06%) [ +0.06% +0.00% +0.21% / -3.06% -2.61% -2.62%] index_select random_sorted : Elapsed 0.053 ms (5.338 ms / 100) B = [255, 512] (stride (1, 255)) dim = 1 fill_cnt = 256 2.463 -> 2.475 ( +0.49%) [ +0.61% +0.24% +0.00% / +0.69% +0.81% +0.49%] index_fill_ const : Elapsed 0.025 ms (2.478 ms / 100) 2.478 -> 2.478 ( +0.00%) [ +0.24% +0.12% +0.00% / +0.24% +0.00% +0.12%] index_fill_ linear : Elapsed 0.025 ms (2.484 ms / 100) 2.463 -> 2.443 ( -0.81%) [ +0.37% +0.24% +0.00% / -0.45% -0.81% -0.61%] index_fill_ reverse : Elapsed 0.025 ms (2.472 ms / 100) 2.424 -> 2.412 ( -0.50%) [ +0.00% +0.00% +0.17% / -0.08% -0.50% -0.41%] index_fill_ skip64 : Elapsed 0.024 ms (2.424 ms / 100) 2.483 -> 2.479 ( -0.16%) [ +0.40% +0.36% +0.00% / -0.04% +0.16% -0.16%] index_fill_ skip256 : Elapsed 0.025 ms (2.493 ms / 100) 2.460 -> 2.448 ( -0.49%) [ +0.04% +0.00% +0.20% / -0.49% -0.28% -0.45%] index_fill_ spread : Elapsed 0.025 ms (2.461 ms / 100) 2.449 -> 2.433 ( -0.65%) [ +0.00% +0.00% +0.16% / -0.65% +0.08% +0.12%] index_fill_ strided 3 : Elapsed 0.024 ms (2.449 ms / 100) 2.476 -> 2.439 ( -1.49%) [ +0.36% +0.40% +0.00% / -0.73% -1.45% -1.49%] index_fill_ strided 5 : Elapsed 0.025 ms (2.485 ms / 100) 2.469 -> 2.438 ( -1.26%) [ +0.08% +0.24% +0.00% / -0.16% -1.26% -1.05%] index_fill_ strided 7 : Elapsed 0.025 ms (2.471 ms / 100) 2.432 -> 2.427 ( -0.21%) [ +0.66% +0.33% +0.00% / -0.21% +0.37% +0.45%] index_fill_ strided 8 : Elapsed 0.024 ms (2.448 ms / 100) 2.425 -> 2.425 ( +0.00%) [ +0.45% +0.37% +0.00% / +0.99% +0.33% +0.00%] index_fill_ strided 16 : Elapsed 0.024 ms (2.436 ms / 100) 2.427 -> 2.416 ( -0.45%) [ +0.21% +0.00% +0.45% / -0.45% -0.45% +0.12%] index_fill_ strided 64 : Elapsed 0.024 ms (2.432 ms / 100) 2.443 -> 2.424 ( -0.78%) [ +0.00% +0.08% +0.00% / -0.78% -0.33% +0.04%] index_fill_ strided 100 : Elapsed 0.024 ms (2.443 ms / 100) 2.460 -> 2.447 ( -0.53%) [ +0.12% +0.16% +0.00% / -0.20% -0.53% -0.24%] index_fill_ strided 255 : Elapsed 0.025 ms (2.463 ms / 100) 2.415 -> 2.399 ( -0.66%) [ +0.08% +0.00% +0.00% / -0.66% -0.41% -0.17%] index_fill_ strided 256 : Elapsed 0.024 ms (2.417 ms / 100) 2.500 -> 2.466 ( -1.36%) [ +0.28% +0.12% +0.00% / -0.76% -1.20% -1.36%] index_fill_ strided 257 : Elapsed 0.025 ms (2.507 ms / 100) 2.455 -> 2.431 ( -0.98%) [ +0.00% +0.16% +0.24% / +0.00% -0.57% -0.98%] index_fill_ random : Elapsed 0.025 ms (2.455 ms / 100) 2.449 -> 2.446 ( -0.12%) [ +0.00% +0.69% +0.20% / -0.12% +0.08% +0.00%] index_fill_ random_sorted : Elapsed 0.024 ms (2.449 ms / 100) 2.460 -> 2.459 ( -0.04%) [ +0.00% +0.00% +0.37% / -0.04% +0.33% +0.69%] index_fill_ perm : Elapsed 0.025 ms (2.460 ms / 100) 2.463 -> 2.453 ( -0.41%) [ +0.20% +0.00% +0.32% / -0.41% -0.37% +0.16%] index_fill_ perm_sorted : Elapsed 0.025 ms (2.468 ms / 100) B = [255, 512] (stride (1, 255)) A = [255, 256] (stride (256, 1)) dim = 1 4.972 -> 4.968 ( -0.08%) [ +0.38% +0.00% +0.30% / -0.08% +0.18% +0.22%] index_add_ linear : Elapsed 0.050 ms (4.991 ms / 100) 4.915 -> 4.903 ( -0.24%) [ +0.35% +0.00% +0.12% / -0.24% +0.02% +0.20%] index_copy_ linear : Elapsed 0.049 ms (4.932 ms / 100) 5.013 -> 4.977 ( -0.72%) [ +0.00% +0.02% +0.28% / -0.58% -0.58% -0.72%] index_add_ reverse : Elapsed 0.050 ms (5.013 ms / 100) 4.929 -> 4.902 ( -0.55%) [ +0.24% +0.02% +0.00% / -0.53% -0.24% -0.55%] index_copy_ reverse : Elapsed 0.049 ms (4.941 ms / 100) 5.023 -> 5.005 ( -0.36%) [ +0.00% +0.14% +0.16% / -0.36% -0.34% -0.16%] index_add_ spread : Elapsed 0.050 ms (5.023 ms / 100) 4.934 -> 4.941 ( +0.14%) [ +0.18% +0.26% +0.00% / +0.18% +0.14% +0.14%] index_copy_ spread : Elapsed 0.049 ms (4.943 ms / 100) 5.058 -> 5.008 ( -0.99%) [ +0.00% +0.22% +0.00% / -0.61% -0.99% -0.73%] index_add_ strided 3 : Elapsed 0.051 ms (5.058 ms / 100) 4.967 -> 4.929 ( -0.77%) [ +0.00% +0.06% +0.20% / -0.14% -0.77% -0.72%] index_copy_ strided 3 : Elapsed 0.050 ms (4.967 ms / 100) 5.037 -> 5.024 ( -0.26%) [ +0.00% +0.36% +0.36% / -0.26% +0.10% +0.08%] index_add_ strided 5 : Elapsed 0.050 ms (5.037 ms / 100) 4.951 -> 4.932 ( -0.38%) [ +0.00% +0.10% +0.02% / -0.38% +0.26% +0.14%] index_copy_ strided 5 : Elapsed 0.050 ms (4.951 ms / 100) 5.011 -> 5.008 ( -0.06%) [ +0.00% +0.12% +0.16% / -0.06% +0.20% +0.28%] index_add_ strided 7 : Elapsed 0.050 ms (5.011 ms / 100) 4.921 -> 4.925 ( +0.08%) [ +0.18% +0.00% +0.04% / +0.20% +0.12% +0.08%] index_copy_ strided 7 : Elapsed 0.049 ms (4.930 ms / 100) 5.020 -> 4.980 ( -0.80%) [ +0.18% +0.20% +0.00% / -0.80% -0.20% -0.14%] index_add_ strided 255 : Elapsed 0.050 ms (5.029 ms / 100) 4.936 -> 4.928 ( -0.16%) [ +0.43% +0.00% +0.04% / -0.16% +0.10% -0.02%] index_copy_ strided 255 : Elapsed 0.050 ms (4.957 ms / 100) 4.999 -> 4.969 ( -0.60%) [ +0.00% +0.12% +0.00% / -0.60% +0.26% +0.08%] index_add_ strided 257 : Elapsed 0.050 ms (4.999 ms / 100) 4.930 -> 4.910 ( -0.41%) [ +0.30% +0.26% +0.00% / -0.41% +0.49% +0.18%] index_copy_ strided 257 : Elapsed 0.049 ms (4.945 ms / 100) 5.035 -> 5.031 ( -0.08%) [ +0.08% +0.00% +0.22% / -0.08% +0.42% +0.46%] index_add_ perm : Elapsed 0.050 ms (5.039 ms / 100) 4.931 -> 4.929 ( -0.04%) [ +0.39% +0.00% +0.04% / -0.04% +0.30% +0.71%] index_copy_ perm : Elapsed 0.049 ms (4.950 ms / 100) 5.019 -> 5.005 ( -0.28%) [ +0.10% +0.08% +0.00% / -0.28% +0.58% +0.42%] index_add_ perm_sorted : Elapsed 0.050 ms (5.024 ms / 100) 4.942 -> 4.932 ( -0.20%) [ +0.24% +0.00% +0.06% / -0.20% +0.22% +0.26%] index_copy_ perm_sorted : Elapsed 0.050 ms (4.954 ms / 100) 5.085 -> 5.086 ( +0.02%) [ +0.00% +0.14% +0.08% / +0.02% +0.20% +0.31%] index_select const : Elapsed 0.051 ms (5.085 ms / 100) 5.309 -> 5.324 ( +0.28%) [ +0.08% +0.13% +0.00% / +0.28% +0.49% +0.36%] index_select wrap : Elapsed 0.053 ms (5.313 ms / 100) 5.293 -> 5.288 ( -0.09%) [ +0.02% +0.00% +0.00% / -0.09% +0.83% +0.89%] index_select linear : Elapsed 0.053 ms (5.294 ms / 100) 5.156 -> 5.158 ( +0.04%) [ +0.08% +0.00% +0.14% / +0.04% +0.74% +0.56%] index_select reverse : Elapsed 0.052 ms (5.160 ms / 100) 5.144 -> 5.143 ( -0.02%) [ +0.00% +0.04% +0.10% / -0.02% +0.23% +0.12%] index_select skip64 : Elapsed 0.051 ms (5.144 ms / 100) 5.091 -> 5.095 ( +0.08%) [ +0.10% +0.18% +0.00% / +0.08% +0.84% +0.81%] index_select skip256 : Elapsed 0.051 ms (5.096 ms / 100) 5.211 -> 5.214 ( +0.06%) [ +0.17% +0.00% +0.10% / +0.06% +0.63% +0.58%] index_select spread : Elapsed 0.052 ms (5.220 ms / 100) 5.336 -> 5.333 ( -0.06%) [ +0.13% +0.02% +0.00% / +0.17% +0.02% -0.06%] index_select strided 3 : Elapsed 0.053 ms (5.343 ms / 100) 5.322 -> 5.326 ( +0.08%) [ +0.23% +0.11% +0.00% / +0.08% +0.45% +0.47%] index_select strided 5 : Elapsed 0.053 ms (5.334 ms / 100) 5.346 -> 5.319 ( -0.51%) [ +0.26% +0.00% +0.09% / +0.13% -0.51% -0.41%] index_select strided 7 : Elapsed 0.054 ms (5.360 ms / 100) 5.312 -> 5.314 ( +0.04%) [ +0.21% +0.19% +0.00% / +0.04% +0.09% +0.36%] index_select strided 8 : Elapsed 0.053 ms (5.323 ms / 100) 5.249 -> 5.254 ( +0.10%) [ +0.00% +0.21% +0.11% / +0.10% +0.59% +0.48%] index_select strided 16 : Elapsed 0.052 ms (5.249 ms / 100) 5.143 -> 5.155 ( +0.23%) [ +0.12% +0.10% +0.00% / +0.23% +0.27% +0.29%] index_select strided 64 : Elapsed 0.051 ms (5.149 ms / 100) 5.339 -> 5.316 ( -0.43%) [ +0.00% +0.24% +0.13% / +0.32% -0.36% -0.43%] index_select strided 100 : Elapsed 0.053 ms (5.339 ms / 100) 5.308 -> 5.311 ( +0.06%) [ +0.00% +0.09% +0.09% / +0.06% +0.08% +0.15%] index_select strided 255 : Elapsed 0.053 ms (5.308 ms / 100) 5.324 -> 5.334 ( +0.19%) [ +0.23% +0.00% +0.08% / +0.19% +0.56% +0.53%] index_select random : Elapsed 0.053 ms (5.336 ms / 100) 5.215 -> 5.222 ( +0.13%) [ +0.27% +0.00% +0.19% / +0.13% +0.31% +0.17%] index_select random_sorted : Elapsed 0.052 ms (5.229 ms / 100) B = [255, 512] (stride (1, 255)) A = [255, 256] (stride (1, 255)) dim = 1 4.863 -> 4.854 ( -0.19%) [ +0.14% +0.00% +0.35% / -0.19% +0.12% +0.41%] index_add_ linear : Elapsed 0.049 ms (4.870 ms / 100) 4.780 -> 4.770 ( -0.21%) [ +0.29% +0.00% +0.21% / -0.21% -0.15% -0.17%] index_copy_ linear : Elapsed 0.048 ms (4.794 ms / 100) 4.899 -> 4.874 ( -0.51%) [ +0.00% +0.00% +0.08% / +0.14% -0.51% -0.08%] index_add_ reverse : Elapsed 0.049 ms (4.899 ms / 100) 4.801 -> 4.774 ( -0.56%) [ +0.06% +0.08% +0.00% / -0.15% -0.56% -0.56%] index_copy_ reverse : Elapsed 0.048 ms (4.804 ms / 100) 4.896 -> 4.871 ( -0.51%) [ +0.10% +0.00% +0.06% / -0.51% -0.22% +0.08%] index_add_ spread : Elapsed 0.049 ms (4.901 ms / 100) 4.801 -> 4.775 ( -0.54%) [ +0.23% +0.00% +0.17% / -0.54% -0.44% -0.08%] index_copy_ spread : Elapsed 0.048 ms (4.812 ms / 100) 4.900 -> 4.888 ( -0.24%) [ +0.12% +0.00% +0.22% / -0.02% -0.22% -0.24%] index_add_ strided 3 : Elapsed 0.049 ms (4.906 ms / 100) 4.795 -> 4.784 ( -0.23%) [ +0.56% +0.00% +0.27% / -0.04% -0.23% -0.15%] index_copy_ strided 3 : Elapsed 0.048 ms (4.822 ms / 100) 4.912 -> 4.899 ( -0.26%) [ +0.14% +0.10% +0.00% / -0.26% +0.16% -0.04%] index_add_ strided 5 : Elapsed 0.049 ms (4.919 ms / 100) 4.807 -> 4.800 ( -0.15%) [ +0.31% +0.33% +0.00% / -0.15% +0.35% +0.00%] index_copy_ strided 5 : Elapsed 0.048 ms (4.822 ms / 100) 4.929 -> 4.889 ( -0.81%) [ +0.63% +0.49% +0.00% / -0.37% -0.81% -0.73%] index_add_ strided 7 : Elapsed 0.050 ms (4.960 ms / 100) 4.834 -> 4.780 ( -1.12%) [ +0.46% +0.41% +0.00% / -0.29% -1.12% -0.74%] index_copy_ strided 7 : Elapsed 0.049 ms (4.856 ms / 100) 4.909 -> 4.874 ( -0.71%) [ +0.12% +0.04% +0.00% / -0.41% -0.71% -0.47%] index_add_ strided 255 : Elapsed 0.049 ms (4.915 ms / 100) 4.810 -> 4.790 ( -0.42%) [ +0.00% +0.08% +0.10% / -0.04% -0.42% -0.37%] index_copy_ strided 255 : Elapsed 0.048 ms (4.810 ms / 100) 4.880 -> 4.878 ( -0.04%) [ +0.49% +0.00% +0.16% / -0.04% +0.16% +0.33%] index_add_ strided 257 : Elapsed 0.049 ms (4.904 ms / 100) 4.801 -> 4.802 ( +0.02%) [ +0.42% +0.10% +0.00% / +0.02% +0.12% +0.25%] index_copy_ strided 257 : Elapsed 0.048 ms (4.821 ms / 100) 4.903 -> 4.890 ( -0.27%) [ +0.00% +0.22% +0.24% / -0.27% +0.02% -0.16%] index_add_ perm : Elapsed 0.049 ms (4.903 ms / 100) 4.803 -> 4.783 ( -0.42%) [ +0.19% +0.33% +0.00% / -0.25% -0.23% -0.42%] index_copy_ perm : Elapsed 0.048 ms (4.812 ms / 100) 4.894 -> 4.875 ( -0.39%) [ +0.00% +0.16% +0.22% / -0.02% -0.39% -0.37%] index_add_ perm_sorted : Elapsed 0.049 ms (4.894 ms / 100) 4.797 -> 4.774 ( -0.48%) [ +0.00% +0.17% +0.19% / -0.25% -0.48% -0.42%] index_copy_ perm_sorted : Elapsed 0.048 ms (4.797 ms / 100) 4.954 -> 4.962 ( +0.16%) [ +0.14% +0.00% +0.06% / +0.32% +0.16% +0.36%] index_select const : Elapsed 0.050 ms (4.961 ms / 100) 5.111 -> 5.113 ( +0.04%) [ +0.18% +0.10% +0.00% / +0.08% +0.10% +0.04%] index_select wrap : Elapsed 0.051 ms (5.120 ms / 100) 5.089 -> 5.095 ( +0.12%) [ +0.00% +0.12% +0.31% / +0.12% +0.77% +0.69%] index_select linear : Elapsed 0.051 ms (5.089 ms / 100) 5.099 -> 5.103 ( +0.08%) [ +0.24% +0.10% +0.00% / +0.20% +0.08% +0.22%] index_select reverse : Elapsed 0.051 ms (5.111 ms / 100) 4.959 -> 4.965 ( +0.12%) [ +0.20% +0.06% +0.00% / +0.24% +0.22% +0.12%] index_select skip64 : Elapsed 0.050 ms (4.969 ms / 100) 4.955 -> 4.966 ( +0.22%) [ +0.02% +0.00% +0.16% / +0.22% +1.01% +0.97%] index_select skip256 : Elapsed 0.050 ms (4.956 ms / 100) 5.072 -> 5.083 ( +0.22%) [ +0.02% +0.00% +0.12% / +0.22% +0.57% +0.35%] index_select spread : Elapsed 0.051 ms (5.073 ms / 100) 5.143 -> 5.146 ( +0.06%) [ +0.02% +0.10% +0.00% / +0.08% +0.06% +0.12%] index_select strided 3 : Elapsed 0.051 ms (5.144 ms / 100) 5.148 -> 5.145 ( -0.06%) [ +0.08% +0.00% +0.19% / +0.04% -0.06% -0.06%] index_select strided 5 : Elapsed 0.052 ms (5.152 ms / 100) 5.181 -> 5.150 ( -0.60%) [ +0.08% +0.00% +0.25% / +0.37% -0.58% -0.60%] index_select strided 7 : Elapsed 0.052 ms (5.185 ms / 100) 4.981 -> 4.983 ( +0.04%) [ +0.28% +0.00% +0.08% / +0.20% +0.04% +0.20%] index_select strided 8 : Elapsed 0.050 ms (4.995 ms / 100) 4.980 -> 4.975 ( -0.10%) [ +0.00% +0.36% +0.32% / +0.26% -0.10% +0.32%] index_select strided 16 : Elapsed 0.050 ms (4.980 ms / 100) 4.962 -> 4.960 ( -0.04%) [ +0.18% +0.00% +0.16% / +0.18% +0.16% -0.04%] index_select strided 64 : Elapsed 0.050 ms (4.971 ms / 100) 5.049 -> 5.023 ( -0.51%) [ +0.04% +0.00% +0.14% / +0.14% -0.51% -0.46%] index_select strided 100 : Elapsed 0.051 ms (5.051 ms / 100) 5.142 -> 5.144 ( +0.04%) [ +0.25% +0.00% +0.04% / +0.04% +0.21% +0.06%] index_select strided 255 : Elapsed 0.052 ms (5.155 ms / 100) 5.121 -> 5.103 ( -0.35%) [ +0.14% +0.00% +0.02% / +0.10% -0.25% -0.35%] index_select random : Elapsed 0.051 ms (5.128 ms / 100) 5.092 -> 5.080 ( -0.24%) [ +0.20% +0.00% +0.06% / -0.02% -0.24% -0.12%] index_select random_sorted : Elapsed 0.051 ms (5.102 ms / 100) out_shape = [512, 255] in_shape = [256, 255] idx_dim = 0 B = [512, 255] (stride (255, 1)) dim = 0 fill_cnt = 256 2.409 -> 2.392 ( -0.71%) [ +0.00% +0.17% +0.08% / -0.71% -0.71% -0.71%] index_fill_ const : Elapsed 0.024 ms (2.409 ms / 100) 2.474 -> 2.466 ( -0.32%) [ +0.00% +0.08% +0.32% / -0.12% -0.32% -0.32%] index_fill_ linear : Elapsed 0.025 ms (2.474 ms / 100) 2.468 -> 2.429 ( -1.58%) [ +0.00% +0.32% +0.32% / -0.93% -1.58% -1.54%] index_fill_ reverse : Elapsed 0.025 ms (2.468 ms / 100) 2.418 -> 2.409 ( -0.37%) [ +0.12% +0.00% +0.08% / +0.37% -0.08% -0.37%] index_fill_ skip64 : Elapsed 0.024 ms (2.421 ms / 100) 2.417 -> 2.421 ( +0.17%) [ +0.29% +0.00% +0.04% / +0.25% +0.17% +0.33%] index_fill_ skip256 : Elapsed 0.024 ms (2.424 ms / 100) 2.469 -> 2.446 ( -0.93%) [ +0.00% +0.20% +0.73% / -0.32% -0.93% -0.53%] index_fill_ spread : Elapsed 0.025 ms (2.469 ms / 100) 2.466 -> 2.457 ( -0.36%) [ +0.00% +0.08% +0.12% / -0.36% -0.24% -0.04%] index_fill_ strided 3 : Elapsed 0.025 ms (2.466 ms / 100) 2.460 -> 2.442 ( -0.73%) [ +0.28% +0.57% +0.00% / -0.53% -0.73% -0.57%] index_fill_ strided 5 : Elapsed 0.025 ms (2.467 ms / 100) 2.452 -> 2.435 ( -0.69%) [ +0.16% +0.00% +0.12% / -0.45% -0.61% -0.69%] index_fill_ strided 7 : Elapsed 0.025 ms (2.456 ms / 100) 2.432 -> 2.419 ( -0.53%) [ +0.29% +0.00% +0.16% / -0.21% -0.53% -0.33%] index_fill_ strided 8 : Elapsed 0.024 ms (2.439 ms / 100) 2.445 -> 2.423 ( -0.90%) [ +0.08% +0.00% +0.04% / -0.61% -0.90% -0.70%] index_fill_ strided 16 : Elapsed 0.024 ms (2.447 ms / 100) 2.432 -> 2.412 ( -0.82%) [ +0.00% +0.25% +0.21% / -0.12% -0.82% -0.78%] index_fill_ strided 64 : Elapsed 0.024 ms (2.432 ms / 100) 2.439 -> 2.430 ( -0.37%) [ +0.16% +0.00% +0.25% / -0.37% +0.16% +0.49%] index_fill_ strided 100 : Elapsed 0.024 ms (2.443 ms / 100) 2.475 -> 2.458 ( -0.69%) [ +0.00% +0.04% +0.00% / -0.69% -0.57% -0.61%] index_fill_ strided 255 : Elapsed 0.025 ms (2.475 ms / 100) 2.412 -> 2.396 ( -0.66%) [ +0.46% +0.00% +0.54% / +0.25% -0.54% -0.66%] index_fill_ strided 256 : Elapsed 0.024 ms (2.423 ms / 100) 2.495 -> 2.471 ( -0.96%) [ +0.40% +0.00% +0.36% / -0.68% -0.96% -0.76%] index_fill_ strided 257 : Elapsed 0.025 ms (2.505 ms / 100) 2.454 -> 2.441 ( -0.53%) [ +0.00% +0.24% +0.37% / -0.37% -0.53% -0.45%] index_fill_ random : Elapsed 0.025 ms (2.454 ms / 100) 2.444 -> 2.435 ( -0.37%) [ +0.29% +0.45% +0.00% / -0.20% -0.16% -0.37%] index_fill_ random_sorted : Elapsed 0.025 ms (2.451 ms / 100) 2.454 -> 2.441 ( -0.53%) [ +0.41% +0.00% +0.00% / -0.53% -0.08% -0.24%] index_fill_ perm : Elapsed 0.025 ms (2.464 ms / 100) 2.460 -> 2.455 ( -0.20%) [ +0.00% +0.41% +0.08% / -0.04% -0.20% -0.20%] index_fill_ perm_sorted : Elapsed 0.025 ms (2.460 ms / 100) B = [512, 255] (stride (255, 1)) A = [256, 255] (stride (255, 1)) dim = 0 4.910 -> 4.883 ( -0.55%) [ +0.02% +0.00% +0.16% / +0.20% -0.39% -0.55%] index_add_ linear : Elapsed 0.049 ms (4.911 ms / 100) 4.834 -> 4.800 ( -0.70%) [ +0.00% +0.04% +0.04% / -0.25% -0.70% -0.62%] index_copy_ linear : Elapsed 0.048 ms (4.834 ms / 100) 4.910 -> 4.878 ( -0.65%) [ +0.02% +0.12% +0.00% / +0.10% -0.65% -0.53%] index_add_ reverse : Elapsed 0.049 ms (4.911 ms / 100) 4.811 -> 4.784 ( -0.56%) [ +0.00% +0.12% +0.23% / +0.04% -0.50% -0.56%] index_copy_ reverse : Elapsed 0.048 ms (4.811 ms / 100) 4.906 -> 4.896 ( -0.20%) [ +0.06% +0.22% +0.00% / -0.20% -0.02% -0.16%] index_add_ spread : Elapsed 0.049 ms (4.909 ms / 100) 4.806 -> 4.791 ( -0.31%) [ +0.12% +0.37% +0.00% / -0.23% -0.31% +0.02%] index_copy_ spread : Elapsed 0.048 ms (4.812 ms / 100) 4.902 -> 4.902 ( +0.00%) [ +0.04% +0.00% +0.00% / +0.10% +0.00% +0.00%] index_add_ strided 3 : Elapsed 0.049 ms (4.904 ms / 100) 4.804 -> 4.792 ( -0.25%) [ +0.00% +0.19% +0.21% / -0.12% -0.19% -0.25%] index_copy_ strided 3 : Elapsed 0.048 ms (4.804 ms / 100) 4.896 -> 4.893 ( -0.06%) [ +0.53% +0.00% +0.37% / -0.06% +0.14% +0.31%] index_add_ strided 5 : Elapsed 0.049 ms (4.922 ms / 100) 4.797 -> 4.794 ( -0.06%) [ +0.46% +0.00% +0.27% / +0.10% -0.06% +0.17%] index_copy_ strided 5 : Elapsed 0.048 ms (4.819 ms / 100) 4.884 -> 4.893 ( +0.18%) [ +0.49% +0.41% +0.00% / +0.18% +0.68% +0.92%] index_add_ strided 7 : Elapsed 0.049 ms (4.908 ms / 100) 4.792 -> 4.785 ( -0.15%) [ +0.19% +0.21% +0.00% / -0.15% +0.08% +0.52%] index_copy_ strided 7 : Elapsed 0.048 ms (4.801 ms / 100) 4.893 -> 4.892 ( -0.02%) [ +0.57% +0.00% +0.41% / -0.02% +0.27% +0.20%] index_add_ strided 255 : Elapsed 0.049 ms (4.921 ms / 100) 4.802 -> 4.788 ( -0.29%) [ +0.21% +0.00% +0.40% / -0.29% -0.17% -0.21%] index_copy_ strided 255 : Elapsed 0.048 ms (4.812 ms / 100) 4.894 -> 4.877 ( -0.35%) [ +0.00% +0.00% +0.12% / -0.35% +0.55% +0.59%] index_add_ strided 257 : Elapsed 0.049 ms (4.894 ms / 100) 4.817 -> 4.805 ( -0.25%) [ +0.10% +0.00% +0.00% / -0.25% +0.31% +0.19%] index_copy_ strided 257 : Elapsed 0.048 ms (4.822 ms / 100) 4.947 -> 4.885 ( -1.25%) [ +0.32% +0.00% +0.42% / -0.14% -1.25% -1.13%] index_add_ perm : Elapsed 0.050 ms (4.963 ms / 100) 4.837 -> 4.800 ( -0.76%) [ +0.41% +0.00% +0.64% / +0.08% -0.76% -0.72%] index_copy_ perm : Elapsed 0.049 ms (4.857 ms / 100) 4.926 -> 4.867 ( -1.20%) [ +0.53% +0.35% +0.00% / +0.20% -1.20% -1.16%] index_add_ perm_sorted : Elapsed 0.050 ms (4.952 ms / 100) 4.835 -> 4.778 ( -1.18%) [ +0.37% +0.31% +0.00% / -0.10% -0.99% -1.18%] index_copy_ perm_sorted : Elapsed 0.049 ms (4.853 ms / 100) 4.990 -> 4.972 ( -0.36%) [ +0.06% +0.22% +0.00% / +0.00% -0.36% -0.06%] index_select const : Elapsed 0.050 ms (4.993 ms / 100) 5.118 -> 5.126 ( +0.16%) [ +0.10% +0.04% +0.00% / +0.16% +0.18% +0.20%] index_select wrap : Elapsed 0.051 ms (5.123 ms / 100) 5.097 -> 5.090 ( -0.14%) [ +0.00% +0.10% +0.08% / +0.02% -0.14% -0.02%] index_select linear : Elapsed 0.051 ms (5.097 ms / 100) 5.113 -> 5.110 ( -0.06%) [ +0.25% +0.00% +0.06% / -0.04% +0.02% -0.06%] index_select reverse : Elapsed 0.051 ms (5.126 ms / 100) 4.956 -> 4.956 ( +0.00%) [ +0.24% +0.00% +0.18% / +0.00% +0.32% +0.42%] index_select skip64 : Elapsed 0.050 ms (4.968 ms / 100) 4.955 -> 4.964 ( +0.18%) [ +0.00% +0.04% +0.38% / +0.34% +0.20% +0.18%] index_select skip256 : Elapsed 0.050 ms (4.955 ms / 100) 5.095 -> 5.083 ( -0.24%) [ +0.14% +0.00% +0.20% / +0.14% -0.24% -0.22%] index_select spread : Elapsed 0.051 ms (5.102 ms / 100) 5.152 -> 5.158 ( +0.12%) [ +0.00% +0.08% +0.16% / +0.27% +0.14% +0.12%] index_select strided 3 : Elapsed 0.052 ms (5.152 ms / 100) 5.131 -> 5.142 ( +0.21%) [ +0.00% +0.31% +0.14% / +0.21% +1.23% +1.44%] index_select strided 5 : Elapsed 0.051 ms (5.131 ms / 100) 5.139 -> 5.144 ( +0.10%) [ +0.35% +0.16% +0.00% / +0.19% +0.10% +0.27%] index_select strided 7 : Elapsed 0.052 ms (5.157 ms / 100) 5.002 -> 4.995 ( -0.14%) [ +0.22% +0.00% +0.18% / +0.04% -0.14% +0.00%] index_select strided 8 : Elapsed 0.050 ms (5.013 ms / 100) 4.958 -> 4.966 ( +0.16%) [ +0.20% +0.00% +0.06% / +0.16% +0.40% +0.26%] index_select strided 16 : Elapsed 0.050 ms (4.968 ms / 100) 4.953 -> 4.970 ( +0.34%) [ +0.20% +0.00% +0.18% / +0.34% +0.59% +0.75%] index_select strided 64 : Elapsed 0.050 ms (4.963 ms / 100) 5.007 -> 5.006 ( -0.02%) [ +0.14% +0.14% +0.00% / -0.02% +0.20% +0.32%] index_select strided 100 : Elapsed 0.050 ms (5.014 ms / 100) 5.150 -> 5.159 ( +0.17%) [ +0.00% +0.04% +0.19% / +0.17% +0.43% +0.39%] index_select strided 255 : Elapsed 0.051 ms (5.150 ms / 100) 5.104 -> 5.107 ( +0.06%) [ +0.14% +0.25% +0.00% / +0.06% +0.65% +0.92%] index_select random : Elapsed 0.051 ms (5.111 ms / 100) 5.112 -> 5.094 ( -0.35%) [ +0.00% +0.25% +0.29% / +0.14% -0.18% -0.35%] index_select random_sorted : Elapsed 0.051 ms (5.112 ms / 100) B = [512, 255] (stride (255, 1)) A = [256, 255] (stride (1, 256)) dim = 0 5.015 -> 4.976 ( -0.78%) [ +0.24% +0.00% +0.04% / -0.28% -0.78% -0.44%] index_add_ linear : Elapsed 0.050 ms (5.027 ms / 100) 4.947 -> 4.934 ( -0.26%) [ +0.04% +0.00% +0.02% / -0.12% -0.26% -0.22%] index_copy_ linear : Elapsed 0.049 ms (4.949 ms / 100) 5.017 -> 4.988 ( -0.58%) [ +0.20% +0.00% +0.12% / -0.06% -0.58% -0.58%] index_add_ reverse : Elapsed 0.050 ms (5.027 ms / 100) 4.930 -> 4.930 ( +0.00%) [ +0.39% +0.00% +0.26% / +0.02% +0.02% +0.00%] index_copy_ reverse : Elapsed 0.049 ms (4.949 ms / 100) 5.037 -> 4.990 ( -0.93%) [ +0.18% +0.12% +0.00% / -0.93% -0.60% -0.71%] index_add_ spread : Elapsed 0.050 ms (5.046 ms / 100) 4.949 -> 4.931 ( -0.36%) [ +0.06% +0.14% +0.00% / -0.16% -0.24% -0.36%] index_copy_ spread : Elapsed 0.050 ms (4.952 ms / 100) 5.018 -> 4.981 ( -0.74%) [ +0.00% +0.22% +0.26% / -0.74% +0.14% +0.04%] index_add_ strided 3 : Elapsed 0.050 ms (5.018 ms / 100) 4.940 -> 4.926 ( -0.28%) [ +0.00% +0.16% +0.04% / -0.26% -0.24% -0.28%] index_copy_ strided 3 : Elapsed 0.049 ms (4.940 ms / 100) 5.035 -> 5.016 ( -0.38%) [ +0.28% +0.20% +0.00% / +0.10% -0.38% -0.26%] index_add_ strided 5 : Elapsed 0.050 ms (5.049 ms / 100) 4.932 -> 4.933 ( +0.02%) [ +0.51% +0.41% +0.00% / +0.30% +0.08% +0.02%] index_copy_ strided 5 : Elapsed 0.050 ms (4.957 ms / 100) 5.000 -> 5.007 ( +0.14%) [ +0.44% +0.42% +0.00% / +0.14% +0.44% +0.50%] index_add_ strided 7 : Elapsed 0.050 ms (5.022 ms / 100) 4.915 -> 4.907 ( -0.16%) [ +0.49% +0.41% +0.00% / -0.16% +0.31% +0.47%] index_copy_ strided 7 : Elapsed 0.049 ms (4.939 ms / 100) 5.017 -> 5.011 ( -0.12%) [ +0.18% +0.10% +0.00% / +0.10% -0.12% -0.10%] index_add_ strided 255 : Elapsed 0.050 ms (5.026 ms / 100) 4.933 -> 4.933 ( +0.00%) [ +0.18% +0.22% +0.00% / +0.30% +0.00% +0.04%] index_copy_ strided 255 : Elapsed 0.049 ms (4.942 ms / 100) 5.012 -> 4.996 ( -0.32%) [ +0.38% +0.14% +0.00% / -0.32% +0.22% +0.10%] index_add_ strided 257 : Elapsed 0.050 ms (5.031 ms / 100) 4.941 -> 4.952 ( +0.22%) [ +0.53% +0.18% +0.00% / +0.22% +0.63% +0.30%] index_copy_ strided 257 : Elapsed 0.050 ms (4.967 ms / 100) 5.045 -> 5.032 ( -0.26%) [ +0.00% +0.61% +0.02% / -0.26% -0.26% -0.12%] index_add_ perm : Elapsed 0.050 ms (5.045 ms / 100) 4.967 -> 4.945 ( -0.44%) [ +0.16% +0.26% +0.00% / -0.36% -0.34% -0.44%] index_copy_ perm : Elapsed 0.050 ms (4.975 ms / 100) 5.028 -> 4.982 ( -0.91%) [ +0.30% +0.20% +0.00% / -0.04% -0.91% -0.88%] index_add_ perm_sorted : Elapsed 0.050 ms (5.043 ms / 100) 4.945 -> 4.917 ( -0.57%) [ +0.14% +0.28% +0.00% / +0.14% -0.57% -0.22%] index_copy_ perm_sorted : Elapsed 0.050 ms (4.952 ms / 100) 5.127 -> 5.112 ( -0.29%) [ +0.00% +0.06% +0.10% / -0.02% -0.20% -0.29%] index_select const : Elapsed 0.051 ms (5.127 ms / 100) 5.308 -> 5.306 ( -0.04%) [ +0.09% +0.15% +0.00% / +0.08% -0.04% +0.36%] index_select wrap : Elapsed 0.053 ms (5.313 ms / 100) 5.299 -> 5.297 ( -0.04%) [ +0.17% +0.13% +0.00% / +0.13% -0.04% +0.00%] index_select linear : Elapsed 0.053 ms (5.308 ms / 100) 5.170 -> 5.165 ( -0.10%) [ +0.00% +0.14% +0.00% / -0.04% +0.00% -0.10%] index_select reverse : Elapsed 0.052 ms (5.170 ms / 100) 5.134 -> 5.133 ( -0.02%) [ +0.12% +0.10% +0.00% / -0.02% +0.10% +0.31%] index_select skip64 : Elapsed 0.051 ms (5.140 ms / 100) 5.102 -> 5.099 ( -0.06%) [ +0.16% +0.00% +0.06% / -0.06% -0.06% +0.10%] index_select skip256 : Elapsed 0.051 ms (5.110 ms / 100) 5.222 -> 5.225 ( +0.06%) [ +0.33% +0.00% +0.21% / +0.19% +0.23% +0.06%] index_select spread : Elapsed 0.052 ms (5.239 ms / 100) 5.332 -> 5.336 ( +0.08%) [ +0.00% +0.00% +0.00% / +0.08% +0.13% +0.17%] index_select strided 3 : Elapsed 0.053 ms (5.332 ms / 100) 5.307 -> 5.316 ( +0.17%) [ +0.00% +0.13% +0.17% / +0.17% +0.51% +0.90%] index_select strided 5 : Elapsed 0.053 ms (5.307 ms / 100) 5.309 -> 5.312 ( +0.06%) [ +0.11% +0.08% +0.00% / +0.17% +0.13% +0.06%] index_select strided 7 : Elapsed 0.053 ms (5.315 ms / 100) 5.334 -> 5.307 ( -0.51%) [ +0.21% +0.00% +0.07% / -0.02% -0.13% -0.51%] index_select strided 8 : Elapsed 0.053 ms (5.345 ms / 100) 5.259 -> 5.261 ( +0.04%) [ +0.29% +0.15% +0.00% / +0.04% +0.29% +0.36%] index_select strided 16 : Elapsed 0.053 ms (5.274 ms / 100) 5.136 -> 5.141 ( +0.10%) [ +0.12% +0.00% +0.00% / +0.10% +0.55% +0.68%] index_select strided 64 : Elapsed 0.051 ms (5.142 ms / 100) 5.305 -> 5.316 ( +0.21%) [ +0.41% +0.09% +0.00% / +0.23% +0.26% +0.21%] index_select strided 100 : Elapsed 0.053 ms (5.327 ms / 100) 5.302 -> 5.308 ( +0.11%) [ +0.32% +0.00% +0.11% / +0.11% +0.26% +0.21%] index_select strided 255 : Elapsed 0.053 ms (5.319 ms / 100) 5.330 -> 5.330 ( +0.00%) [ +0.08% +0.00% +0.26% / +0.04% +0.00% +0.09%] index_select random : Elapsed 0.053 ms (5.334 ms / 100) 5.234 -> 5.209 ( -0.48%) [ +0.15% +0.00% +0.04% / -0.08% -0.23% -0.48%] index_select random_sorted : Elapsed 0.052 ms (5.242 ms / 100) B = [512, 255] (stride (1, 512)) dim = 0 fill_cnt = 256 good 2.647 -> 2.399 ( -9.37%) [ +0.42% +0.15% +0.00% / -9.10% -9.37% -9.33%] index_fill_ const : Elapsed 0.027 ms (2.658 ms / 100) good 2.664 -> 2.465 ( -7.47%) [ +0.64% +0.00% +0.71% / -7.47% -7.28% -7.13%] index_fill_ linear : Elapsed 0.027 ms (2.681 ms / 100) good 2.685 -> 2.470 ( -8.01%) [ +0.00% +0.00% +0.11% / -8.01% -7.97% -7.90%] index_fill_ reverse : Elapsed 0.027 ms (2.685 ms / 100) good 2.663 -> 2.409 ( -9.54%) [ +0.53% +0.00% +0.15% / -9.54% -9.46% -9.43%] index_fill_ skip64 : Elapsed 0.027 ms (2.677 ms / 100) good 2.649 -> 2.390 ( -9.78%) [ +0.19% +0.00% +0.26% / -9.78% -9.44% -9.63%] index_fill_ skip256 : Elapsed 0.027 ms (2.654 ms / 100) good 2.913 -> 2.713 ( -6.87%) [ +0.34% +0.34% +0.00% / -6.49% -6.45% -6.87%] index_fill_ spread : Elapsed 0.029 ms (2.923 ms / 100) good 2.945 -> 2.690 ( -8.66%) [ +0.00% +0.24% +0.48% / -8.66% -7.50% -7.88%] index_fill_ strided 3 : Elapsed 0.029 ms (2.945 ms / 100) good 3.012 -> 2.724 ( -9.56%) [ +0.10% +0.00% +0.07% / -9.16% -9.56% -9.40%] index_fill_ strided 5 : Elapsed 0.030 ms (3.015 ms / 100) good 3.031 -> 2.781 ( -8.25%) [ +0.03% +0.16% +0.00% / -7.42% -8.15% -8.25%] index_fill_ strided 7 : Elapsed 0.030 ms (3.032 ms / 100) good 3.024 -> 2.786 ( -7.87%) [ +0.20% +0.00% +0.07% / -7.87% -6.85% -7.41%] index_fill_ strided 8 : Elapsed 0.030 ms (3.030 ms / 100) good 2.836 -> 2.620 ( -7.62%) [ +0.14% +0.00% +0.00% / -7.30% -7.51% -7.62%] index_fill_ strided 16 : Elapsed 0.028 ms (2.840 ms / 100) good 2.688 -> 2.446 ( -9.00%) [ +0.00% +0.22% +0.71% / -9.00% -8.82% -8.89%] index_fill_ strided 64 : Elapsed 0.027 ms (2.688 ms / 100) good 3.036 -> 2.877 ( -5.24%) [ +0.00% +0.46% +0.49% / -5.24% -4.55% -4.55%] index_fill_ strided 100 : Elapsed 0.030 ms (3.036 ms / 100) good 2.931 -> 2.690 ( -8.22%) [ +0.00% +0.00% +0.00% / -8.19% -7.54% -8.22%] index_fill_ strided 255 : Elapsed 0.029 ms (2.931 ms / 100) good 2.652 -> 2.396 ( -9.65%) [ +0.11% +0.00% +0.04% / -9.46% -9.65% -9.31%] index_fill_ strided 256 : Elapsed 0.027 ms (2.655 ms / 100) good 2.935 -> 2.717 ( -7.43%) [ +0.48% +0.00% +0.48% / -6.85% -7.12% -7.43%] index_fill_ strided 257 : Elapsed 0.029 ms (2.949 ms / 100) good 3.013 -> 2.853 ( -5.31%) [ +0.00% +0.10% +0.20% / -4.91% -5.31% -5.24%] index_fill_ random : Elapsed 0.030 ms (3.013 ms / 100) good 2.917 -> 2.694 ( -7.64%) [ +0.00% +0.24% +0.17% / -7.64% -6.99% -7.27%] index_fill_ random_sorted : Elapsed 0.029 ms (2.917 ms / 100) good 3.026 -> 2.862 ( -5.42%) [ +0.07% +0.33% +0.00% / -5.42% -4.73% -4.40%] index_fill_ perm : Elapsed 0.030 ms (3.028 ms / 100) good 2.921 -> 2.701 ( -7.53%) [ +0.31% +0.51% +0.00% / -7.53% -7.43% -7.02%] index_fill_ perm_sorted : Elapsed 0.029 ms (2.930 ms / 100) B = [512, 255] (stride (1, 512)) A = [256, 255] (stride (255, 1)) dim = 0 5.065 -> 4.961 ( -2.05%) [ +0.00% +0.12% +0.30% / -2.05% -2.03% -1.90%] index_add_ linear : Elapsed 0.051 ms (5.065 ms / 100) 4.846 -> 4.879 ( +0.68%) [ +0.00% +0.12% +0.25% / +0.68% +1.03% +1.20%] index_copy_ linear : Elapsed 0.048 ms (4.846 ms / 100) 5.081 -> 4.964 ( -2.30%) [ +0.00% +0.35% +0.22% / -2.30% -2.16% -2.22%] index_add_ reverse : Elapsed 0.051 ms (5.081 ms / 100) 4.864 -> 4.877 ( +0.27%) [ +0.19% +0.00% +0.21% / +0.27% +0.45% +0.47%] index_copy_ reverse : Elapsed 0.049 ms (4.873 ms / 100) 5.332 -> 5.184 ( -2.78%) [ +0.00% +0.00% +0.02% / -2.78% -2.66% -2.53%] index_add_ spread : Elapsed 0.053 ms (5.332 ms / 100) 5.144 -> 5.214 ( +1.36%) [ +0.04% +0.00% +0.02% / +1.56% +1.44% +1.36%] index_copy_ spread : Elapsed 0.051 ms (5.146 ms / 100) 5.368 -> 5.211 ( -2.92%) [ +0.28% +0.00% +0.15% / -2.83% -2.92% -2.92%] index_add_ strided 3 : Elapsed 0.054 ms (5.383 ms / 100) 5.223 -> 5.214 ( -0.17%) [ +0.04% +0.00% +0.04% / +0.29% -0.04% -0.17%] index_copy_ strided 3 : Elapsed 0.052 ms (5.225 ms / 100) 5.387 -> 5.237 ( -2.78%) [ +0.32% +0.04% +0.00% / -2.78% -2.21% -2.17%] index_add_ strided 5 : Elapsed 0.054 ms (5.404 ms / 100) 5.193 -> 5.224 ( +0.60%) [ +0.39% +0.00% +0.39% / +0.60% +0.96% +1.02%] index_copy_ strided 5 : Elapsed 0.052 ms (5.213 ms / 100) 5.385 -> 5.265 ( -2.23%) [ +0.15% +0.07% +0.00% / -1.78% -2.17% -2.23%] index_add_ strided 7 : Elapsed 0.054 ms (5.393 ms / 100) 5.224 -> 5.236 ( +0.23%) [ +0.31% +0.25% +0.00% / +0.54% +0.23% +0.52%] index_copy_ strided 7 : Elapsed 0.052 ms (5.240 ms / 100) 5.370 -> 5.207 ( -3.04%) [ +0.11% +0.00% +0.54% / -3.02% -3.04% -2.98%] index_add_ strided 255 : Elapsed 0.054 ms (5.376 ms / 100) 5.133 -> 5.215 ( +1.60%) [ +0.29% +0.00% +0.41% / +1.62% +1.62% +1.60%] index_copy_ strided 255 : Elapsed 0.051 ms (5.148 ms / 100) 5.387 -> 5.188 ( -3.69%) [ +0.19% +0.00% +0.13% / -3.68% -3.60% -3.69%] index_add_ strided 257 : Elapsed 0.054 ms (5.397 ms / 100) 5.139 -> 5.212 ( +1.42%) [ +0.35% +0.00% +0.37% / +1.42% +1.58% +1.42%] index_copy_ strided 257 : Elapsed 0.052 ms (5.157 ms / 100) 5.444 -> 5.341 ( -1.89%) [ +0.09% +0.00% +0.13% / -0.88% -1.87% -1.89%] index_add_ perm : Elapsed 0.054 ms (5.449 ms / 100) 5.237 -> 5.264 ( +0.52%) [ +0.02% +0.00% +0.17% / +1.03% +0.52% +0.52%] index_copy_ perm : Elapsed 0.052 ms (5.238 ms / 100) 5.348 -> 5.191 ( -2.94%) [ +0.09% +0.00% +0.09% / -2.94% -2.58% -2.47%] index_add_ perm_sorted : Elapsed 0.054 ms (5.353 ms / 100) 5.135 -> 5.205 ( +1.36%) [ +0.00% +0.12% +0.41% / +1.36% +1.79% +1.87%] index_copy_ perm_sorted : Elapsed 0.051 ms (5.135 ms / 100) good 5.394 -> 4.968 ( -7.90%) [ +0.00% +0.09% +0.15% / -7.90% -7.88% -7.86%] index_select const : Elapsed 0.054 ms (5.394 ms / 100) 5.317 -> 5.229 ( -1.66%) [ +0.19% +0.00% +0.09% / -1.26% -1.66% -1.52%] index_select wrap : Elapsed 0.053 ms (5.327 ms / 100) 5.380 -> 5.184 ( -3.64%) [ +0.15% +0.00% +0.07% / -3.64% -3.38% -3.46%] index_select linear : Elapsed 0.054 ms (5.388 ms / 100) 5.346 -> 5.194 ( -2.84%) [ +0.17% +0.26% +0.00% / -2.84% -2.54% -2.62%] index_select reverse : Elapsed 0.054 ms (5.355 ms / 100) good 5.365 -> 4.963 ( -7.49%) [ +0.00% +0.15% +0.02% / -7.27% -7.44% -7.49%] index_select skip64 : Elapsed 0.054 ms (5.365 ms / 100) good 5.402 -> 4.961 ( -8.16%) [ +0.00% +0.06% +0.06% / -8.15% -8.16% -7.94%] index_select skip256 : Elapsed 0.054 ms (5.402 ms / 100) 5.296 -> 5.200 ( -1.81%) [ +0.23% +0.02% +0.00% / -1.81% -1.32% -1.32%] index_select spread : Elapsed 0.053 ms (5.308 ms / 100) 5.368 -> 5.279 ( -1.66%) [ +0.24% +0.00% +0.19% / -1.55% -1.53% -1.66%] index_select strided 3 : Elapsed 0.054 ms (5.381 ms / 100) 5.340 -> 5.262 ( -1.46%) [ +0.04% +0.00% +0.19% / -1.46% -0.58% -0.58%] index_select strided 5 : Elapsed 0.053 ms (5.342 ms / 100) 5.389 -> 5.264 ( -2.32%) [ +0.37% +0.00% +0.11% / -2.32% -2.21% -2.15%] index_select strided 7 : Elapsed 0.054 ms (5.409 ms / 100) 5.313 -> 5.222 ( -1.71%) [ +0.04% +0.00% +0.09% / -1.71% -1.58% -1.54%] index_select strided 8 : Elapsed 0.053 ms (5.315 ms / 100) 5.292 -> 5.079 ( -4.02%) [ +0.00% +0.15% +0.04% / -4.02% -3.85% -4.01%] index_select strided 16 : Elapsed 0.053 ms (5.292 ms / 100) good 5.295 -> 4.975 ( -6.04%) [ +0.36% +0.00% +0.11% / -6.04% -5.61% -5.85%] index_select strided 64 : Elapsed 0.053 ms (5.314 ms / 100) 5.368 -> 5.140 ( -4.25%) [ +0.24% +0.00% +0.22% / -3.86% -4.10% -4.25%] index_select strided 100 : Elapsed 0.054 ms (5.381 ms / 100) 5.352 -> 5.252 ( -1.87%) [ +0.04% +0.00% +0.07% / -1.87% -1.72% -1.76%] index_select strided 255 : Elapsed 0.054 ms (5.354 ms / 100) 5.351 -> 5.221 ( -2.43%) [ +0.11% +0.17% +0.00% / -1.70% -2.43% -2.32%] index_select random : Elapsed 0.054 ms (5.357 ms / 100) 5.328 -> 5.179 ( -2.80%) [ +0.21% +0.00% +0.02% / -2.46% -2.80% -2.53%] index_select random_sorted : Elapsed 0.053 ms (5.339 ms / 100) B = [512, 255] (stride (1, 512)) A = [256, 255] (stride (1, 256)) dim = 0 good 5.213 -> 4.876 ( -6.46%) [ +0.00% +0.35% +0.29% / -6.46% -6.23% -6.33%] index_add_ linear : Elapsed 0.052 ms (5.213 ms / 100) 5.026 -> 4.788 ( -4.74%) [ +0.24% +0.24% +0.00% / -4.74% -4.60% -4.66%] index_copy_ linear : Elapsed 0.050 ms (5.038 ms / 100) good 5.242 -> 4.873 ( -7.04%) [ +0.00% +0.11% +0.08% / -6.85% -7.04% -7.04%] index_add_ reverse : Elapsed 0.052 ms (5.242 ms / 100) good 5.041 -> 4.769 ( -5.40%) [ +0.00% +0.08% +0.06% / -4.98% -5.14% -5.40%] index_copy_ reverse : Elapsed 0.050 ms (5.041 ms / 100) good 5.483 -> 5.050 ( -7.90%) [ +0.00% +0.24% +0.42% / -7.90% -7.24% -7.30%] index_add_ spread : Elapsed 0.055 ms (5.483 ms / 100) 5.320 -> 5.083 ( -4.45%) [ +0.00% +0.17% +0.24% / -4.45% -4.29% -4.23%] index_copy_ spread : Elapsed 0.053 ms (5.320 ms / 100) good 5.539 -> 5.069 ( -8.49%) [ +0.22% +0.05% +0.00% / -8.23% -8.34% -8.49%] index_add_ strided 3 : Elapsed 0.056 ms (5.551 ms / 100) good 5.403 -> 5.079 ( -6.00%) [ +0.17% +0.11% +0.00% / -5.77% -5.83% -6.00%] index_copy_ strided 3 : Elapsed 0.054 ms (5.412 ms / 100) good 5.575 -> 5.094 ( -8.63%) [ +0.02% +0.07% +0.00% / -8.63% -8.09% -7.87%] index_add_ strided 5 : Elapsed 0.056 ms (5.576 ms / 100) good 5.426 -> 5.095 ( -6.10%) [ +0.00% +0.07% +0.09% / -6.10% -5.92% -5.86%] index_copy_ strided 5 : Elapsed 0.054 ms (5.426 ms / 100) good 5.563 -> 5.137 ( -7.66%) [ +0.54% +0.07% +0.00% / -6.99% -7.66% -7.66%] index_add_ strided 7 : Elapsed 0.056 ms (5.593 ms / 100) good 5.489 -> 5.102 ( -7.05%) [ +0.20% +0.04% +0.00% / -6.83% -7.05% -7.01%] index_copy_ strided 7 : Elapsed 0.055 ms (5.500 ms / 100) good 5.564 -> 5.076 ( -8.77%) [ +0.00% +0.31% +0.77% / -8.77% -8.47% -8.47%] index_add_ strided 255 : Elapsed 0.056 ms (5.564 ms / 100) good 5.358 -> 5.089 ( -5.02%) [ +0.00% +0.15% +0.37% / -4.85% -4.95% -5.02%] index_copy_ strided 255 : Elapsed 0.054 ms (5.358 ms / 100) good 5.578 -> 5.070 ( -9.11%) [ +0.00% +0.07% +0.18% / -9.11% -9.00% -9.09%] index_add_ strided 257 : Elapsed 0.056 ms (5.578 ms / 100) 5.358 -> 5.094 ( -4.93%) [ +0.22% +0.28% +0.00% / -4.70% -4.93% -4.83%] index_copy_ strided 257 : Elapsed 0.054 ms (5.370 ms / 100) good 5.625 -> 5.203 ( -7.50%) [ +0.00% +0.20% +0.36% / -7.50% -7.13% -7.32%] index_add_ perm : Elapsed 0.056 ms (5.625 ms / 100) good 5.444 -> 5.088 ( -6.54%) [ +0.00% +0.22% +0.33% / -6.54% -6.06% -6.17%] index_copy_ perm : Elapsed 0.054 ms (5.444 ms / 100) good 5.500 -> 5.072 ( -7.78%) [ +0.07% +0.00% +0.07% / -7.73% -7.78% -7.60%] index_add_ perm_sorted : Elapsed 0.055 ms (5.504 ms / 100) 5.323 -> 5.069 ( -4.77%) [ +0.17% +0.08% +0.00% / -4.77% -4.75% -4.68%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.332 ms / 100) Good 5.607 -> 4.966 (-11.43%) [ +0.18% +0.00% +0.21% / -11.43% -11.38% -11.27%] index_select const : Elapsed 0.056 ms (5.617 ms / 100) Good 5.719 -> 5.062 (-11.49%) [ +0.00% +0.14% +0.09% / -11.26% -11.37% -11.49%] index_select wrap : Elapsed 0.057 ms (5.719 ms / 100) Good 5.750 -> 5.089 (-11.50%) [ +0.19% +0.00% +0.10% / -11.50% -11.18% -11.04%] index_select linear : Elapsed 0.058 ms (5.761 ms / 100) good 5.620 -> 5.151 ( -8.35%) [ +0.11% +0.00% +0.14% / -8.02% -8.35% -8.29%] index_select reverse : Elapsed 0.056 ms (5.626 ms / 100) Good 5.631 -> 4.999 (-11.22%) [ +0.20% +0.12% +0.00% / -11.22% -11.17% -11.21%] index_select skip64 : Elapsed 0.056 ms (5.642 ms / 100) Good 5.635 -> 4.972 (-11.77%) [ +0.00% +0.07% +0.00% / -11.55% -11.77% -11.68%] index_select skip256 : Elapsed 0.056 ms (5.635 ms / 100) good 5.684 -> 5.157 ( -9.27%) [ +0.00% +0.04% +0.04% / -8.78% -9.25% -9.27%] index_select spread : Elapsed 0.057 ms (5.684 ms / 100) Good 5.819 -> 5.076 (-12.77%) [ +0.00% +0.09% +0.02% / -12.77% -12.75% -12.75%] index_select strided 3 : Elapsed 0.058 ms (5.819 ms / 100) Good 5.806 -> 5.086 (-12.40%) [ +0.07% +0.12% +0.00% / -12.40% -12.28% -12.00%] index_select strided 5 : Elapsed 0.058 ms (5.810 ms / 100) Good 5.854 -> 5.110 (-12.71%) [ +0.09% +0.12% +0.00% / -12.71% -12.30% -12.08%] index_select strided 7 : Elapsed 0.059 ms (5.859 ms / 100) Good 5.822 -> 5.127 (-11.94%) [ +0.19% +0.02% +0.00% / -11.94% -11.59% -11.49%] index_select strided 8 : Elapsed 0.058 ms (5.833 ms / 100) Good 5.740 -> 5.065 (-11.76%) [ +0.00% +0.19% +0.10% / -11.74% -11.67% -11.76%] index_select strided 16 : Elapsed 0.057 ms (5.740 ms / 100) Good 5.669 -> 4.988 (-12.01%) [ +0.12% +0.00% +0.07% / -11.94% -12.01% -11.71%] index_select strided 64 : Elapsed 0.057 ms (5.676 ms / 100) Good 5.856 -> 5.137 (-12.28%) [ +0.00% +0.10% +0.12% / -12.28% -12.00% -11.94%] index_select strided 100 : Elapsed 0.059 ms (5.856 ms / 100) Good 5.747 -> 5.096 (-11.33%) [ +0.00% +0.31% +0.02% / -11.33% -11.12% -11.29%] index_select strided 255 : Elapsed 0.057 ms (5.747 ms / 100) Good 5.796 -> 5.102 (-11.97%) [ +0.00% +0.24% +0.09% / -11.90% -11.77% -11.97%] index_select random : Elapsed 0.058 ms (5.796 ms / 100) good 5.665 -> 5.163 ( -8.86%) [ +0.07% +0.19% +0.00% / -8.77% -8.77% -8.86%] index_select random_sorted : Elapsed 0.057 ms (5.669 ms / 100) out_shape = [256, 512] in_shape = [256, 255] idx_dim = 1 B = [256, 512] (stride (512, 1)) dim = 1 fill_cnt = 255 good 2.653 -> 2.397 ( -9.65%) [ +0.00% +0.11% +0.00% / -9.61% -9.65% -9.42%] index_fill_ const : Elapsed 0.027 ms (2.653 ms / 100) good 2.699 -> 2.440 ( -9.60%) [ +0.15% +0.00% +0.04% / -9.23% -9.60% -9.48%] index_fill_ linear : Elapsed 0.027 ms (2.703 ms / 100) good 2.685 -> 2.443 ( -9.01%) [ +0.00% +0.04% +0.04% / -9.01% -8.60% -8.98%] index_fill_ reverse : Elapsed 0.027 ms (2.685 ms / 100) good 2.628 -> 2.388 ( -9.13%) [ +0.04% +0.23% +0.00% / -8.75% -9.13% -9.02%] index_fill_ skip64 : Elapsed 0.026 ms (2.629 ms / 100) good 2.649 -> 2.398 ( -9.48%) [ +0.34% +0.26% +0.00% / -9.48% -8.64% -9.06%] index_fill_ skip256 : Elapsed 0.027 ms (2.658 ms / 100) good 2.914 -> 2.687 ( -7.79%) [ +0.45% +0.00% +0.14% / -7.79% -7.52% -7.34%] index_fill_ spread : Elapsed 0.029 ms (2.927 ms / 100) good 2.947 -> 2.702 ( -8.31%) [ +0.10% +0.00% +0.10% / -8.31% -8.28% -8.28%] index_fill_ strided 3 : Elapsed 0.030 ms (2.950 ms / 100) good 2.997 -> 2.745 ( -8.41%) [ +0.00% +0.60% +0.37% / -8.41% -8.21% -8.17%] index_fill_ strided 5 : Elapsed 0.030 ms (2.997 ms / 100) good 3.024 -> 2.795 ( -7.57%) [ +0.30% +0.03% +0.00% / -7.54% -7.47% -7.57%] index_fill_ strided 7 : Elapsed 0.030 ms (3.033 ms / 100) good 3.033 -> 2.816 ( -7.15%) [ +0.00% +0.30% +0.26% / -6.46% -6.89% -7.15%] index_fill_ strided 8 : Elapsed 0.030 ms (3.033 ms / 100) good 2.843 -> 2.665 ( -6.26%) [ +0.18% +0.67% +0.00% / -6.26% -6.19% -6.23%] index_fill_ strided 16 : Elapsed 0.028 ms (2.848 ms / 100) good 2.696 -> 2.462 ( -8.68%) [ +0.07% +0.11% +0.00% / -8.68% -8.46% -8.12%] index_fill_ strided 64 : Elapsed 0.027 ms (2.698 ms / 100) 3.041 -> 2.901 ( -4.60%) [ +0.10% +0.00% +0.03% / -4.44% -4.60% -4.57%] index_fill_ strided 100 : Elapsed 0.030 ms (3.044 ms / 100) good 2.931 -> 2.696 ( -8.02%) [ +0.38% +0.00% +0.48% / -7.98% -8.02% -7.92%] index_fill_ strided 255 : Elapsed 0.029 ms (2.942 ms / 100) Good 2.665 -> 2.396 (-10.09%) [ +0.00% +0.41% +0.04% / -10.09% -9.94% -9.68%] index_fill_ strided 256 : Elapsed 0.027 ms (2.665 ms / 100) good 2.930 -> 2.695 ( -8.02%) [ +0.00% +0.14% +0.20% / -8.02% -7.85% -7.99%] index_fill_ strided 257 : Elapsed 0.029 ms (2.930 ms / 100) 3.001 -> 2.858 ( -4.77%) [ +0.00% +0.00% +0.13% / -4.77% -4.27% -4.23%] index_fill_ random : Elapsed 0.030 ms (3.001 ms / 100) good 2.897 -> 2.682 ( -7.42%) [ +0.00% +0.31% +0.10% / -7.42% -7.28% -6.90%] index_fill_ random_sorted : Elapsed 0.029 ms (2.897 ms / 100) good 3.022 -> 2.865 ( -5.20%) [ +0.00% +0.46% +0.43% / -4.24% -4.86% -5.20%] index_fill_ perm : Elapsed 0.030 ms (3.022 ms / 100) good 2.923 -> 2.689 ( -8.01%) [ +0.00% +0.00% +0.07% / -8.01% -7.80% -7.77%] index_fill_ perm_sorted : Elapsed 0.029 ms (2.923 ms / 100) B = [256, 512] (stride (512, 1)) A = [256, 255] (stride (255, 1)) dim = 1 good 5.274 -> 4.906 ( -6.98%) [ +0.11% +0.00% +0.19% / -6.56% -6.98% -6.83%] index_add_ linear : Elapsed 0.053 ms (5.280 ms / 100) good 5.067 -> 4.793 ( -5.41%) [ +0.00% +0.04% +0.02% / -4.74% -5.29% -5.41%] index_copy_ linear : Elapsed 0.051 ms (5.067 ms / 100) good 5.267 -> 4.902 ( -6.93%) [ +0.00% +0.15% +0.44% / -6.93% -6.84% -6.84%] index_add_ reverse : Elapsed 0.053 ms (5.267 ms / 100) 5.046 -> 4.799 ( -4.89%) [ +0.06% +0.00% +0.40% / -4.84% -4.72% -4.89%] index_copy_ reverse : Elapsed 0.050 ms (5.049 ms / 100) good 5.529 -> 5.082 ( -8.08%) [ +0.13% +0.00% +0.07% / -7.63% -8.08% -7.99%] index_add_ spread : Elapsed 0.055 ms (5.536 ms / 100) 5.331 -> 5.083 ( -4.65%) [ +0.02% +0.15% +0.00% / -4.26% -4.65% -4.50%] index_copy_ spread : Elapsed 0.053 ms (5.332 ms / 100) good 5.583 -> 5.094 ( -8.76%) [ +0.47% +0.20% +0.00% / -8.76% -8.51% -8.56%] index_add_ strided 3 : Elapsed 0.056 ms (5.609 ms / 100) good 5.387 -> 5.092 ( -5.48%) [ +0.24% +0.28% +0.00% / -5.36% -5.48% -5.48%] index_copy_ strided 3 : Elapsed 0.054 ms (5.400 ms / 100) good 5.582 -> 5.113 ( -8.40%) [ +0.30% +0.34% +0.00% / -8.12% -8.40% -8.17%] index_add_ strided 5 : Elapsed 0.056 ms (5.599 ms / 100) good 5.433 -> 5.099 ( -6.15%) [ +0.42% +0.07% +0.00% / -5.87% -6.13% -6.15%] index_copy_ strided 5 : Elapsed 0.055 ms (5.456 ms / 100) good 5.561 -> 5.156 ( -7.28%) [ +0.05% +0.31% +0.00% / -6.92% -7.25% -7.28%] index_add_ strided 7 : Elapsed 0.056 ms (5.564 ms / 100) good 5.458 -> 5.115 ( -6.28%) [ +0.31% +0.22% +0.00% / -5.94% -6.28% -6.08%] index_copy_ strided 7 : Elapsed 0.055 ms (5.475 ms / 100) good 5.591 -> 5.119 ( -8.44%) [ +0.00% +0.21% +0.39% / -8.44% -8.28% -8.03%] index_add_ strided 255 : Elapsed 0.056 ms (5.591 ms / 100) good 5.386 -> 5.109 ( -5.14%) [ +0.00% +0.00% +0.06% / -5.14% -5.07% -4.99%] index_copy_ strided 255 : Elapsed 0.054 ms (5.386 ms / 100) good 5.580 -> 5.120 ( -8.24%) [ +0.18% +0.22% +0.00% / -8.06% -8.15% -8.24%] index_add_ strided 257 : Elapsed 0.056 ms (5.590 ms / 100) 5.376 -> 5.112 ( -4.91%) [ +0.13% +0.20% +0.00% / -4.89% -4.91% -4.84%] index_copy_ strided 257 : Elapsed 0.054 ms (5.383 ms / 100) good 5.655 -> 5.209 ( -7.89%) [ +0.32% +0.00% +0.11% / -7.21% -7.78% -7.89%] index_add_ perm : Elapsed 0.057 ms (5.673 ms / 100) good 5.467 -> 5.108 ( -6.57%) [ +0.38% +0.15% +0.00% / -5.96% -6.57% -6.37%] index_copy_ perm : Elapsed 0.055 ms (5.488 ms / 100) good 5.555 -> 5.084 ( -8.48%) [ +0.16% +0.18% +0.00% / -8.48% -8.48% -8.32%] index_add_ perm_sorted : Elapsed 0.056 ms (5.564 ms / 100) good 5.350 -> 5.080 ( -5.05%) [ +0.00% +0.24% +0.09% / -4.86% -5.05% -4.75%] index_copy_ perm_sorted : Elapsed 0.053 ms (5.350 ms / 100) Good 5.669 -> 4.966 (-12.40%) [ +0.00% +0.02% +0.04% / -12.17% -12.14% -12.40%] index_select const : Elapsed 0.057 ms (5.669 ms / 100) Good 5.734 -> 5.078 (-11.44%) [ +0.00% +0.02% +0.07% / -11.25% -11.20% -11.44%] index_select wrap : Elapsed 0.057 ms (5.734 ms / 100) Good 5.743 -> 5.090 (-11.37%) [ +0.10% +0.00% +0.16% / -11.37% -11.07% -11.13%] index_select linear : Elapsed 0.057 ms (5.749 ms / 100) good 5.615 -> 5.178 ( -7.78%) [ +0.12% +0.00% +0.20% / -7.64% -7.78% -7.50%] index_select reverse : Elapsed 0.056 ms (5.622 ms / 100) Good 5.647 -> 5.000 (-11.46%) [ +0.16% +0.11% +0.00% / -11.46% -11.23% -11.33%] index_select skip64 : Elapsed 0.057 ms (5.656 ms / 100) Good 5.634 -> 4.971 (-11.77%) [ +0.21% +0.07% +0.00% / -11.36% -11.77% -11.70%] index_select skip256 : Elapsed 0.056 ms (5.646 ms / 100) good 5.686 -> 5.176 ( -8.97%) [ +0.25% +0.00% +0.25% / -8.42% -8.97% -8.90%] index_select spread : Elapsed 0.057 ms (5.700 ms / 100) Good 5.840 -> 5.093 (-12.79%) [ +0.05% +0.21% +0.00% / -12.79% -12.62% -12.65%] index_select strided 3 : Elapsed 0.058 ms (5.843 ms / 100) Good 5.819 -> 5.086 (-12.60%) [ +0.03% +0.00% +0.05% / -12.60% -12.41% -12.41%] index_select strided 5 : Elapsed 0.058 ms (5.821 ms / 100) Good 5.812 -> 5.094 (-12.35%) [ +0.00% +0.07% +0.09% / -11.84% -12.13% -12.35%] index_select strided 7 : Elapsed 0.058 ms (5.812 ms / 100) Good 5.826 -> 5.092 (-12.60%) [ +0.12% +0.09% +0.00% / -12.03% -12.60% -12.31%] index_select strided 8 : Elapsed 0.058 ms (5.833 ms / 100) Good 5.834 -> 5.098 (-12.62%) [ +0.00% +0.05% +0.15% / -12.62% -12.27% -12.34%] index_select strided 16 : Elapsed 0.058 ms (5.834 ms / 100) Good 5.763 -> 5.106 (-11.40%) [ +0.33% +0.00% +0.10% / -11.40% -11.11% -10.93%] index_select strided 64 : Elapsed 0.058 ms (5.782 ms / 100) Good 5.816 -> 5.116 (-12.04%) [ +0.10% +0.09% +0.00% / -12.04% -11.74% -11.69%] index_select strided 100 : Elapsed 0.058 ms (5.822 ms / 100) Good 5.844 -> 5.110 (-12.56%) [ +0.00% +0.03% +0.24% / -12.56% -12.35% -12.46%] index_select random : Elapsed 0.058 ms (5.844 ms / 100) good 5.682 -> 5.176 ( -8.91%) [ +0.18% +0.02% +0.00% / -8.73% -8.71% -8.91%] index_select random_sorted : Elapsed 0.057 ms (5.692 ms / 100) B = [256, 512] (stride (512, 1)) A = [256, 255] (stride (1, 256)) dim = 1 5.124 -> 4.982 ( -2.77%) [ +0.59% +0.00% +0.21% / -2.22% -2.77% -2.48%] index_add_ linear : Elapsed 0.052 ms (5.154 ms / 100) 4.879 -> 4.910 ( +0.64%) [ +0.23% +0.00% +0.23% / +1.07% +0.64% +0.84%] index_copy_ linear : Elapsed 0.049 ms (4.890 ms / 100) 5.109 -> 4.969 ( -2.74%) [ +0.00% +0.23% +0.23% / -2.74% -2.19% -2.39%] index_add_ reverse : Elapsed 0.051 ms (5.109 ms / 100) 4.861 -> 4.910 ( +1.01%) [ +0.12% +0.00% +0.21% / +1.01% +1.13% +1.01%] index_copy_ reverse : Elapsed 0.049 ms (4.867 ms / 100) 5.381 -> 5.175 ( -3.83%) [ +0.00% +0.13% +0.04% / -3.18% -3.83% -3.74%] index_add_ spread : Elapsed 0.054 ms (5.381 ms / 100) 5.153 -> 5.210 ( +1.11%) [ +0.00% +0.10% +0.12% / +1.49% +1.11% +1.28%] index_copy_ spread : Elapsed 0.052 ms (5.153 ms / 100) 5.382 -> 5.204 ( -3.31%) [ +0.00% +0.30% +0.22% / -3.31% -3.27% -3.18%] index_add_ strided 3 : Elapsed 0.054 ms (5.382 ms / 100) 5.175 -> 5.225 ( +0.97%) [ +0.00% +0.04% +0.04% / +1.10% +1.08% +0.97%] index_copy_ strided 3 : Elapsed 0.052 ms (5.175 ms / 100) 5.369 -> 5.232 ( -2.55%) [ +0.13% +0.35% +0.00% / -2.29% -2.50% -2.55%] index_add_ strided 5 : Elapsed 0.054 ms (5.376 ms / 100) 5.204 -> 5.234 ( +0.58%) [ +0.00% +0.21% +0.02% / +0.94% +0.69% +0.58%] index_copy_ strided 5 : Elapsed 0.052 ms (5.204 ms / 100) 5.392 -> 5.297 ( -1.76%) [ +0.15% +0.02% +0.00% / -1.69% -1.74% -1.76%] index_add_ strided 7 : Elapsed 0.054 ms (5.400 ms / 100) 5.239 -> 5.260 ( +0.40%) [ +0.08% +0.00% +0.08% / +0.67% +0.40% +0.48%] index_copy_ strided 7 : Elapsed 0.052 ms (5.243 ms / 100) 5.402 -> 5.224 ( -3.30%) [ +0.13% +0.00% +0.07% / -3.02% -3.30% -3.15%] index_add_ strided 255 : Elapsed 0.054 ms (5.409 ms / 100) 5.168 -> 5.236 ( +1.32%) [ +0.15% +0.12% +0.00% / +1.63% +1.49% +1.32%] index_copy_ strided 255 : Elapsed 0.052 ms (5.176 ms / 100) 5.423 -> 5.217 ( -3.80%) [ +0.20% +0.00% +0.06% / -3.32% -3.71% -3.80%] index_add_ strided 257 : Elapsed 0.054 ms (5.434 ms / 100) 5.189 -> 5.239 ( +0.96%) [ +0.00% +0.23% +0.02% / +1.25% +0.96% +1.08%] index_copy_ strided 257 : Elapsed 0.052 ms (5.189 ms / 100) 5.441 -> 5.347 ( -1.73%) [ +0.00% +0.20% +0.20% / -1.16% -1.73% -1.62%] index_add_ perm : Elapsed 0.054 ms (5.441 ms / 100) 5.251 -> 5.267 ( +0.30%) [ +0.00% +0.21% +0.04% / +1.14% +0.55% +0.30%] index_copy_ perm : Elapsed 0.053 ms (5.251 ms / 100) 5.362 -> 5.186 ( -3.28%) [ +0.32% +0.00% +0.30% / -3.28% -3.11% -3.06%] index_add_ perm_sorted : Elapsed 0.054 ms (5.379 ms / 100) 5.140 -> 5.214 ( +1.44%) [ +0.45% +0.00% +0.16% / +1.44% +1.73% +1.71%] index_copy_ perm_sorted : Elapsed 0.052 ms (5.163 ms / 100) good 5.304 -> 4.969 ( -6.32%) [ +0.19% +0.00% +0.15% / -6.32% -6.26% -6.28%] index_select const : Elapsed 0.053 ms (5.314 ms / 100) 5.323 -> 5.216 ( -2.01%) [ +0.06% +0.00% +0.17% / -2.01% -1.99% -2.01%] index_select wrap : Elapsed 0.053 ms (5.326 ms / 100) 5.328 -> 5.147 ( -3.40%) [ +0.00% +0.15% +0.13% / -3.40% -3.17% -3.17%] index_select linear : Elapsed 0.053 ms (5.328 ms / 100) 5.320 -> 5.205 ( -2.16%) [ +0.09% +0.00% +0.02% / -2.05% -2.16% -2.14%] index_select reverse : Elapsed 0.053 ms (5.325 ms / 100) good 5.308 -> 4.956 ( -6.63%) [ +0.00% +0.02% +0.11% / -6.29% -6.20% -6.63%] index_select skip64 : Elapsed 0.053 ms (5.308 ms / 100) good 5.290 -> 4.967 ( -6.11%) [ +0.17% +0.04% +0.00% / -5.97% -6.09% -6.11%] index_select skip256 : Elapsed 0.053 ms (5.299 ms / 100) 5.284 -> 5.198 ( -1.63%) [ +0.00% +0.11% +0.09% / -1.63% -1.57% -1.53%] index_select spread : Elapsed 0.053 ms (5.284 ms / 100) 5.337 -> 5.239 ( -1.84%) [ +0.00% +0.07% +0.17% / -1.52% -1.84% -1.61%] index_select strided 3 : Elapsed 0.053 ms (5.337 ms / 100) 5.293 -> 5.478 ( +3.50%) [ +0.08% +0.13% +0.00% / +3.50% +4.70% +4.89%] index_select strided 5 : Elapsed 0.053 ms (5.297 ms / 100) 5.324 -> 5.241 ( -1.56%) [ +0.15% +0.06% +0.00% / -1.09% -1.56% -1.50%] index_select strided 7 : Elapsed 0.053 ms (5.332 ms / 100) 5.327 -> 5.230 ( -1.82%) [ +0.04% +0.00% +0.06% / -1.33% -1.82% -1.73%] index_select strided 8 : Elapsed 0.053 ms (5.329 ms / 100) 5.367 -> 5.224 ( -2.66%) [ +0.30% +0.15% +0.00% / -2.66% -2.61% -2.52%] index_select strided 16 : Elapsed 0.054 ms (5.383 ms / 100) 5.320 -> 5.255 ( -1.22%) [ +0.00% +0.06% +0.11% / -1.22% -0.98% -0.83%] index_select strided 64 : Elapsed 0.053 ms (5.320 ms / 100) 5.292 -> 5.502 ( +3.97%) [ +0.26% +0.26% +0.00% / +3.97% +5.05% +5.27%] index_select strided 100 : Elapsed 0.053 ms (5.306 ms / 100) 5.332 -> 5.199 ( -2.49%) [ +0.08% +0.00% +0.06% / -2.49% -2.06% -2.18%] index_select random : Elapsed 0.053 ms (5.336 ms / 100) 5.289 -> 5.160 ( -2.44%) [ +0.00% +0.17% +0.02% / -2.44% -2.29% -2.31%] index_select random_sorted : Elapsed 0.053 ms (5.289 ms / 100) B = [256, 512] (stride (1, 256)) dim = 1 fill_cnt = 255 2.418 -> 2.411 ( -0.29%) [ +0.62% +0.25% +0.00% / +0.17% -0.04% -0.29%] index_fill_ const : Elapsed 0.024 ms (2.433 ms / 100) 2.485 -> 2.476 ( -0.36%) [ +0.36% +0.00% +0.24% / -0.08% +0.16% -0.36%] index_fill_ linear : Elapsed 0.025 ms (2.494 ms / 100) 2.476 -> 2.481 ( +0.20%) [ +0.16% +0.04% +0.00% / +0.20% +0.53% +0.32%] index_fill_ reverse : Elapsed 0.025 ms (2.480 ms / 100) 2.408 -> 2.392 ( -0.66%) [ +0.00% +0.25% +0.04% / -0.66% -0.33% -0.25%] index_fill_ skip64 : Elapsed 0.024 ms (2.408 ms / 100) 2.412 -> 2.390 ( -0.91%) [ +0.08% +0.00% +0.08% / -0.50% -0.41% -0.91%] index_fill_ skip256 : Elapsed 0.024 ms (2.414 ms / 100) 2.474 -> 2.460 ( -0.57%) [ +0.40% +0.12% +0.00% / -0.57% +0.08% +0.00%] index_fill_ spread : Elapsed 0.025 ms (2.484 ms / 100) 2.489 -> 2.459 ( -1.21%) [ +0.08% +0.08% +0.00% / -1.21% -1.17% -1.12%] index_fill_ strided 3 : Elapsed 0.025 ms (2.491 ms / 100) 2.482 -> 2.459 ( -0.93%) [ +0.08% +0.00% +0.52% / +0.20% -0.93% -0.81%] index_fill_ strided 5 : Elapsed 0.025 ms (2.484 ms / 100) 2.478 -> 2.470 ( -0.32%) [ +0.00% +0.24% +0.81% / -0.32% +0.24% +0.52%] index_fill_ strided 7 : Elapsed 0.025 ms (2.478 ms / 100) 2.441 -> 2.419 ( -0.90%) [ +0.00% +0.16% +0.25% / -0.29% -0.90% -0.29%] index_fill_ strided 8 : Elapsed 0.024 ms (2.441 ms / 100) 2.425 -> 2.417 ( -0.33%) [ +0.00% +0.25% +0.29% / -0.33% +0.21% -0.16%] index_fill_ strided 16 : Elapsed 0.024 ms (2.425 ms / 100) 2.407 -> 2.393 ( -0.58%) [ +0.25% +0.00% +0.54% / -0.58% -0.37% -0.04%] index_fill_ strided 64 : Elapsed 0.024 ms (2.413 ms / 100) 2.431 -> 2.412 ( -0.78%) [ +0.33% +0.00% +0.12% / -0.78% -0.49% -0.53%] index_fill_ strided 100 : Elapsed 0.024 ms (2.439 ms / 100) 2.474 -> 2.462 ( -0.49%) [ +0.08% +0.00% +0.08% / -0.49% -0.16% -0.12%] index_fill_ strided 255 : Elapsed 0.025 ms (2.476 ms / 100) 2.435 -> 2.401 ( -1.40%) [ +0.00% +0.04% +0.00% / -1.36% -1.40% -1.23%] index_fill_ strided 256 : Elapsed 0.024 ms (2.435 ms / 100) 2.481 -> 2.465 ( -0.64%) [ +0.40% +0.32% +0.00% / +0.32% -0.64% -0.44%] index_fill_ strided 257 : Elapsed 0.025 ms (2.491 ms / 100) 2.465 -> 2.455 ( -0.41%) [ +0.00% +0.41% +0.16% / -0.41% -0.08% +0.04%] index_fill_ random : Elapsed 0.025 ms (2.465 ms / 100) 2.464 -> 2.446 ( -0.73%) [ +0.00% +0.32% +0.16% / -0.28% -0.28% -0.73%] index_fill_ random_sorted : Elapsed 0.025 ms (2.464 ms / 100) 2.484 -> 2.482 ( -0.08%) [ +0.00% +0.12% +0.64% / -0.08% +0.04% +0.36%] index_fill_ perm : Elapsed 0.025 ms (2.484 ms / 100) 2.475 -> 2.456 ( -0.77%) [ +0.04% +0.00% +0.08% / -0.53% -0.77% -0.73%] index_fill_ perm_sorted : Elapsed 0.025 ms (2.476 ms / 100) B = [256, 512] (stride (1, 256)) A = [256, 255] (stride (255, 1)) dim = 1 4.971 -> 4.963 ( -0.16%) [ +0.00% +0.06% +0.06% / +0.10% -0.06% -0.16%] index_add_ linear : Elapsed 0.050 ms (4.971 ms / 100) 4.906 -> 4.900 ( -0.12%) [ +0.22% +0.22% +0.00% / +0.41% -0.12% -0.12%] index_copy_ linear : Elapsed 0.049 ms (4.917 ms / 100) 4.968 -> 4.969 ( +0.02%) [ +0.00% +0.16% +0.20% / +0.40% +0.26% +0.02%] index_add_ reverse : Elapsed 0.050 ms (4.968 ms / 100) 4.895 -> 4.903 ( +0.16%) [ +0.00% +0.33% +0.20% / +1.86% +0.22% +0.16%] index_copy_ reverse : Elapsed 0.049 ms (4.895 ms / 100) 5.007 -> 4.971 ( -0.72%) [ +0.36% +0.04% +0.00% / -0.40% -0.56% -0.72%] index_add_ spread : Elapsed 0.050 ms (5.025 ms / 100) 4.929 -> 4.900 ( -0.59%) [ +0.30% +0.00% +0.04% / +1.74% -0.59% -0.43%] index_copy_ spread : Elapsed 0.049 ms (4.944 ms / 100) 4.971 -> 4.975 ( +0.08%) [ +0.14% +0.18% +0.00% / +0.08% +0.93% +0.82%] index_add_ strided 3 : Elapsed 0.050 ms (4.978 ms / 100) 4.894 -> 4.906 ( +0.25%) [ +0.27% +0.20% +0.00% / +0.25% +1.02% +0.88%] index_copy_ strided 3 : Elapsed 0.049 ms (4.907 ms / 100) 5.007 -> 4.965 ( -0.84%) [ +0.58% +0.14% +0.00% / -0.20% -0.84% -0.70%] index_add_ strided 5 : Elapsed 0.050 ms (5.036 ms / 100) 4.906 -> 4.888 ( -0.37%) [ +0.57% +0.00% +0.24% / +0.69% -0.37% -0.35%] index_copy_ strided 5 : Elapsed 0.049 ms (4.934 ms / 100) 4.993 -> 4.977 ( -0.32%) [ +0.00% +0.16% +0.28% / -0.32% +0.02% +0.18%] index_add_ strided 7 : Elapsed 0.050 ms (4.993 ms / 100) 4.907 -> 4.912 ( +0.10%) [ +0.16% +0.00% +0.12% / +0.22% +0.10% +0.12%] index_copy_ strided 7 : Elapsed 0.049 ms (4.915 ms / 100) 4.982 -> 4.984 ( +0.04%) [ +0.12% +0.00% +0.34% / +0.60% +0.10% +0.04%] index_add_ strided 255 : Elapsed 0.050 ms (4.988 ms / 100) 4.903 -> 4.902 ( -0.02%) [ +0.00% +0.08% +0.27% / +1.24% +0.27% -0.02%] index_copy_ strided 255 : Elapsed 0.049 ms (4.903 ms / 100) 4.981 -> 4.981 ( +0.00%) [ +0.16% +0.00% +0.16% / +0.14% +0.00% +0.04%] index_add_ strided 257 : Elapsed 0.050 ms (4.989 ms / 100) 4.904 -> 4.906 ( +0.04%) [ +0.08% +0.00% +0.24% / +0.45% +0.04% +0.14%] index_copy_ strided 257 : Elapsed 0.049 ms (4.908 ms / 100) 5.040 -> 4.986 ( -1.07%) [ +0.00% +0.77% +0.14% / -0.34% -1.07% -0.85%] index_add_ perm : Elapsed 0.050 ms (5.040 ms / 100) 4.944 -> 4.908 ( -0.73%) [ +0.00% +0.36% +0.08% / +0.24% -0.38% -0.73%] index_copy_ perm : Elapsed 0.049 ms (4.944 ms / 100) 5.004 -> 4.989 ( -0.30%) [ +0.00% +0.08% +0.30% / -0.30% -0.06% -0.04%] index_add_ perm_sorted : Elapsed 0.050 ms (5.004 ms / 100) 4.921 -> 4.923 ( +0.04%) [ +0.08% +0.00% +0.04% / +0.33% +0.10% +0.04%] index_copy_ perm_sorted : Elapsed 0.049 ms (4.925 ms / 100) 5.101 -> 5.105 ( +0.08%) [ +0.14% +0.00% +0.10% / +0.22% +0.08% +0.20%] index_select const : Elapsed 0.051 ms (5.108 ms / 100) 5.293 -> 5.304 ( +0.21%) [ +0.21% +0.00% +0.06% / +0.21% +0.70% +0.74%] index_select wrap : Elapsed 0.053 ms (5.304 ms / 100) 5.280 -> 5.294 ( +0.27%) [ +0.13% +0.00% +0.13% / +0.27% +0.61% +0.62%] index_select linear : Elapsed 0.053 ms (5.287 ms / 100) 5.158 -> 5.169 ( +0.21%) [ +0.16% +0.06% +0.00% / +0.21% +0.27% +0.25%] index_select reverse : Elapsed 0.052 ms (5.166 ms / 100) 5.163 -> 5.145 ( -0.35%) [ +0.17% +0.08% +0.00% / -0.04% -0.31% -0.35%] index_select skip64 : Elapsed 0.052 ms (5.172 ms / 100) 5.096 -> 5.087 ( -0.18%) [ +0.20% +0.00% +0.10% / +0.18% -0.06% -0.18%] index_select skip256 : Elapsed 0.051 ms (5.106 ms / 100) 5.202 -> 5.208 ( +0.12%) [ +0.27% +0.35% +0.00% / +0.12% +0.23% +0.21%] index_select spread : Elapsed 0.052 ms (5.216 ms / 100) 5.330 -> 5.331 ( +0.02%) [ +0.00% +0.06% +0.06% / +0.02% +0.21% +0.56%] index_select strided 3 : Elapsed 0.053 ms (5.330 ms / 100) 5.308 -> 5.325 ( +0.32%) [ +0.17% +0.23% +0.00% / +0.32% +0.81% +0.79%] index_select strided 5 : Elapsed 0.053 ms (5.317 ms / 100) 5.292 -> 5.301 ( +0.17%) [ +0.36% +0.00% +0.28% / +0.17% +0.59% +0.74%] index_select strided 7 : Elapsed 0.053 ms (5.311 ms / 100) 5.311 -> 5.306 ( -0.09%) [ +0.11% +0.08% +0.00% / -0.09% +0.32% +0.23%] index_select strided 8 : Elapsed 0.053 ms (5.317 ms / 100) 5.329 -> 5.339 ( +0.19%) [ +0.21% +0.00% +0.00% / +0.19% +0.30% +0.38%] index_select strided 16 : Elapsed 0.053 ms (5.340 ms / 100) 5.307 -> 5.314 ( +0.13%) [ +0.11% +0.00% +0.08% / +0.13% +1.06% +0.87%] index_select strided 64 : Elapsed 0.053 ms (5.313 ms / 100) 5.300 -> 5.297 ( -0.06%) [ +0.00% +0.21% +0.15% / -0.06% +0.09% +0.32%] index_select strided 100 : Elapsed 0.053 ms (5.300 ms / 100) 5.307 -> 5.311 ( +0.08%) [ +0.34% +0.19% +0.00% / +0.08% +0.38% +0.43%] index_select random : Elapsed 0.053 ms (5.325 ms / 100) 5.219 -> 5.220 ( +0.02%) [ +0.10% +0.00% +0.00% / +0.02% +0.23% +0.31%] index_select random_sorted : Elapsed 0.052 ms (5.224 ms / 100) B = [256, 512] (stride (1, 256)) A = [256, 255] (stride (1, 256)) dim = 1 4.881 -> 4.855 ( -0.53%) [ +0.10% +0.02% +0.00% / -0.33% -0.37% -0.53%] index_add_ linear : Elapsed 0.049 ms (4.886 ms / 100) 4.797 -> 4.770 ( -0.56%) [ +0.06% +0.00% +0.00% / -0.52% -0.50% -0.56%] index_copy_ linear : Elapsed 0.048 ms (4.800 ms / 100) 4.868 -> 4.862 ( -0.12%) [ +0.00% +0.47% +0.02% / -0.12% +0.02% +0.04%] index_add_ reverse : Elapsed 0.049 ms (4.868 ms / 100) 4.782 -> 4.771 ( -0.23%) [ +0.00% +0.33% +0.10% / -0.23% -0.15% -0.06%] index_copy_ reverse : Elapsed 0.048 ms (4.782 ms / 100) 4.877 -> 4.862 ( -0.31%) [ +0.37% +0.00% +0.39% / -0.14% -0.10% -0.31%] index_add_ spread : Elapsed 0.049 ms (4.895 ms / 100) 4.791 -> 4.767 ( -0.50%) [ +0.23% +0.00% +0.23% / -0.29% -0.38% -0.50%] index_copy_ spread : Elapsed 0.048 ms (4.802 ms / 100) 4.863 -> 4.882 ( +0.39%) [ +0.00% +0.62% +0.35% / +0.39% +0.84% +0.80%] index_add_ strided 3 : Elapsed 0.049 ms (4.863 ms / 100) 4.776 -> 4.788 ( +0.25%) [ +0.00% +0.42% +0.29% / +0.25% +0.82% +0.77%] index_copy_ strided 3 : Elapsed 0.048 ms (4.776 ms / 100) 4.939 -> 4.848 ( -1.84%) [ +0.14% +0.22% +0.00% / +0.22% -1.76% -1.84%] index_add_ strided 5 : Elapsed 0.049 ms (4.946 ms / 100) 4.851 -> 4.769 ( -1.69%) [ +0.04% +0.00% +0.00% / +0.16% -1.28% -1.69%] index_copy_ strided 5 : Elapsed 0.049 ms (4.853 ms / 100) 4.872 -> 4.874 ( +0.04%) [ +0.16% +0.00% +0.27% / +0.04% +0.21% +0.16%] index_add_ strided 7 : Elapsed 0.049 ms (4.880 ms / 100) 4.785 -> 4.783 ( -0.04%) [ +0.40% +0.00% +0.38% / +0.54% -0.04% +0.08%] index_copy_ strided 7 : Elapsed 0.048 ms (4.804 ms / 100) 4.866 -> 4.851 ( -0.31%) [ +0.70% +0.00% +0.51% / -0.08% +0.04% -0.31%] index_add_ strided 255 : Elapsed 0.049 ms (4.900 ms / 100) 4.799 -> 4.769 ( -0.63%) [ +0.44% +0.00% +0.02% / -0.13% -0.56% -0.63%] index_copy_ strided 255 : Elapsed 0.048 ms (4.820 ms / 100) 4.884 -> 4.871 ( -0.27%) [ +0.18% +0.39% +0.00% / -0.27% -0.04% -0.16%] index_add_ strided 257 : Elapsed 0.049 ms (4.893 ms / 100) 4.787 -> 4.782 ( -0.10%) [ +0.31% +0.46% +0.00% / +0.38% +0.02% -0.10%] index_copy_ strided 257 : Elapsed 0.048 ms (4.802 ms / 100) 4.890 -> 4.872 ( -0.37%) [ +0.25% +0.00% +0.20% / +0.08% -0.37% -0.25%] index_add_ perm : Elapsed 0.049 ms (4.902 ms / 100) 4.792 -> 4.781 ( -0.23%) [ +0.65% +0.00% +0.56% / +0.33% -0.23% -0.13%] index_copy_ perm : Elapsed 0.048 ms (4.823 ms / 100) 4.894 -> 4.848 ( -0.94%) [ +0.27% +0.00% +0.39% / -0.39% -0.94% -0.43%] index_add_ perm_sorted : Elapsed 0.049 ms (4.907 ms / 100) 4.805 -> 4.771 ( -0.71%) [ +0.27% +0.00% +0.12% / -0.31% -0.71% -0.31%] index_copy_ perm_sorted : Elapsed 0.048 ms (4.818 ms / 100) 4.949 -> 4.965 ( +0.32%) [ +0.28% +0.14% +0.00% / +0.32% +0.32% +0.44%] index_select const : Elapsed 0.050 ms (4.963 ms / 100) 5.119 -> 5.128 ( +0.18%) [ +0.00% +0.04% +0.06% / +0.18% +0.70% +0.88%] index_select wrap : Elapsed 0.051 ms (5.119 ms / 100) 5.094 -> 5.090 ( -0.08%) [ +0.00% +0.06% +0.16% / +0.00% -0.08% -0.04%] index_select linear : Elapsed 0.051 ms (5.094 ms / 100) 5.080 -> 5.078 ( -0.04%) [ +0.37% +0.00% +0.08% / +0.35% -0.04% +0.10%] index_select reverse : Elapsed 0.051 ms (5.099 ms / 100) 4.952 -> 4.960 ( +0.16%) [ +0.46% +0.00% +0.04% / +0.16% +0.18% +0.28%] index_select skip64 : Elapsed 0.050 ms (4.975 ms / 100) 4.997 -> 4.964 ( -0.66%) [ +0.20% +0.08% +0.00% / +0.22% -0.62% -0.66%] index_select skip256 : Elapsed 0.050 ms (5.007 ms / 100) 5.048 -> 5.056 ( +0.16%) [ +0.00% +0.20% +0.22% / +0.16% +0.24% +0.44%] index_select spread : Elapsed 0.050 ms (5.048 ms / 100) 5.019 -> 5.017 ( -0.04%) [ +0.12% +0.00% +0.02% / -0.04% +0.08% +0.02%] index_select strided 3 : Elapsed 0.050 ms (5.025 ms / 100) 4.971 -> 4.982 ( +0.22%) [ +0.04% +0.24% +0.00% / +0.22% +0.70% +0.95%] index_select strided 5 : Elapsed 0.050 ms (4.973 ms / 100) 5.124 -> 5.125 ( +0.02%) [ +0.00% +0.08% +0.02% / +0.02% +0.43% +0.51%] index_select strided 7 : Elapsed 0.051 ms (5.124 ms / 100) 5.119 -> 5.113 ( -0.12%) [ +0.29% +0.00% +0.25% / +0.31% +0.00% -0.12%] index_select strided 8 : Elapsed 0.051 ms (5.134 ms / 100) 5.129 -> 5.131 ( +0.04%) [ +0.14% +0.00% +0.08% / +0.19% +0.12% +0.04%] index_select strided 16 : Elapsed 0.051 ms (5.136 ms / 100) 5.117 -> 5.122 ( +0.10%) [ +0.27% +0.00% +0.18% / +0.14% +0.10% +0.14%] index_select strided 64 : Elapsed 0.051 ms (5.131 ms / 100) 4.980 -> 4.993 ( +0.26%) [ +0.24% +0.04% +0.00% / +0.26% +1.45% +1.49%] index_select strided 100 : Elapsed 0.050 ms (4.992 ms / 100) 5.084 -> 5.077 ( -0.14%) [ +0.31% +0.00% +0.31% / +0.63% -0.12% -0.14%] index_select random : Elapsed 0.051 ms (5.100 ms / 100) 5.063 -> 5.050 ( -0.26%) [ +0.22% +0.00% +0.28% / +0.43% -0.26% -0.16%] index_select random_sorted : Elapsed 0.051 ms (5.074 ms / 100) ==================== rep_count = 100 dimensions = [15, 1000, 2048] out_shape = [15, 2048] in_shape = [1000, 2048] idx_dim = 0 B = [15, 2048] (stride (2048, 1)) dim = 0 fill_cnt = 1000 8.030 -> 8.061 ( +0.39%) [ +0.86% +0.00% +0.15% / +0.40% +0.39% +0.50%] index_fill_ const : Elapsed 0.081 ms (8.099 ms / 100) 8.045 -> 8.139 ( +1.17%) [ +0.86% +0.09% +0.00% / +1.17% +1.53% +1.59%] index_fill_ linear : Elapsed 0.081 ms (8.114 ms / 100) 8.039 -> 8.100 ( +0.76%) [ +0.67% +0.00% +0.58% / +0.78% +0.83% +0.76%] index_fill_ reverse : Elapsed 0.081 ms (8.093 ms / 100) 8.030 -> 8.107 ( +0.96%) [ +0.68% +0.00% +0.32% / +0.96% +1.06% +1.08%] index_fill_ skip64 : Elapsed 0.081 ms (8.085 ms / 100) 7.952 -> 8.009 ( +0.72%) [ +0.70% +0.04% +0.00% / +0.72% +1.63% +1.63%] index_fill_ skip256 : Elapsed 0.080 ms (8.008 ms / 100) 8.052 -> 8.114 ( +0.77%) [ +1.07% +0.00% +0.26% / +0.92% +1.14% +0.77%] index_fill_ spread : Elapsed 0.081 ms (8.138 ms / 100) 7.976 -> 8.018 ( +0.53%) [ +0.48% +0.06% +0.00% / +0.53% +1.42% +1.43%] index_fill_ strided 3 : Elapsed 0.080 ms (8.014 ms / 100) 8.039 -> 8.091 ( +0.65%) [ +0.60% +0.20% +0.00% / +0.65% +1.12% +0.97%] index_fill_ strided 5 : Elapsed 0.081 ms (8.087 ms / 100) 8.015 -> 8.062 ( +0.59%) [ +0.76% +0.00% +0.06% / +0.69% +0.59% +0.64%] index_fill_ strided 7 : Elapsed 0.081 ms (8.076 ms / 100) 8.039 -> 8.099 ( +0.75%) [ +0.67% +0.11% +0.00% / +1.28% +0.75% +0.82%] index_fill_ strided 8 : Elapsed 0.081 ms (8.093 ms / 100) 8.030 -> 8.055 ( +0.31%) [ +0.68% +0.00% +0.11% / +0.65% +0.45% +0.31%] index_fill_ random : Elapsed 0.081 ms (8.085 ms / 100) 8.023 -> 8.104 ( +1.01%) [ +0.80% +0.22% +0.00% / +1.65% +1.08% +1.01%] index_fill_ random_sorted : Elapsed 0.081 ms (8.087 ms / 100) B = [15, 2048] (stride (2048, 1)) A = [1000, 2048] (stride (2048, 1)) dim = 0 1.068 -> 1.076 ( +0.75%) [ +0.75% +0.00% +0.00% / +0.75% +0.84% +0.75%] index_select const : Elapsed 0.011 ms (1.076 ms / 100) 1.065 -> 1.067 ( +0.19%) [ +1.13% +0.28% +0.00% / +1.03% +0.19% +0.28%] index_select wrap : Elapsed 0.011 ms (1.077 ms / 100) 1.067 -> 1.067 ( +0.00%) [ +0.84% +0.00% +0.00% / +0.75% +0.09% +0.00%] index_select linear : Elapsed 0.011 ms (1.076 ms / 100) 1.064 -> 1.068 ( +0.38%) [ +0.94% +0.09% +0.00% / +0.85% +0.38% +0.38%] index_select reverse : Elapsed 0.011 ms (1.074 ms / 100) 1.074 -> 1.080 ( +0.56%) [ +0.74% +0.00% +0.00% / +0.93% +0.65% +0.56%] index_select skip64 : Elapsed 0.011 ms (1.082 ms / 100) 1.075 -> 1.082 ( +0.65%) [ +0.74% +0.00% +0.00% / +0.65% +0.65% +0.74%] index_select skip256 : Elapsed 0.011 ms (1.083 ms / 100) 1.065 -> 1.069 ( +0.38%) [ +0.85% +0.00% +0.09% / +0.94% +0.38% +0.38%] index_select spread : Elapsed 0.011 ms (1.074 ms / 100) 1.069 -> 1.064 ( -0.47%) [ +0.75% +0.09% +0.00% / +0.75% -0.47% -0.37%] index_select strided 3 : Elapsed 0.011 ms (1.077 ms / 100) 1.068 -> 1.064 ( -0.37%) [ +1.03% +0.00% +0.00% / +0.84% -0.28% -0.37%] index_select strided 5 : Elapsed 0.011 ms (1.079 ms / 100) 1.066 -> 1.070 ( +0.38%) [ +0.84% +0.00% +0.00% / +0.84% +0.38% +0.38%] index_select strided 7 : Elapsed 0.011 ms (1.075 ms / 100) 1.066 -> 1.070 ( +0.38%) [ +0.84% +0.19% +0.00% / +0.84% +0.47% +0.38%] index_select strided 8 : Elapsed 0.011 ms (1.075 ms / 100) 1.062 -> 1.071 ( +0.85%) [ +1.22% +0.09% +0.00% / +0.94% +0.94% +0.85%] index_select strided 16 : Elapsed 0.011 ms (1.075 ms / 100) 1.063 -> 1.071 ( +0.75%) [ +0.94% +0.00% +0.00% / +0.94% +0.75% +0.85%] index_select strided 64 : Elapsed 0.011 ms (1.073 ms / 100) 1.067 -> 1.063 ( -0.37%) [ +0.84% +0.19% +0.00% / +0.84% -0.28% -0.37%] index_select strided 100 : Elapsed 0.011 ms (1.076 ms / 100) 1.068 -> 1.064 ( -0.37%) [ +0.94% +0.09% +0.00% / +0.84% -0.28% -0.37%] index_select strided 255 : Elapsed 0.011 ms (1.078 ms / 100) 1.065 -> 1.069 ( +0.38%) [ +0.85% +0.19% +0.00% / +0.94% +0.47% +0.38%] index_select strided 256 : Elapsed 0.011 ms (1.074 ms / 100) 1.063 -> 1.070 ( +0.66%) [ +1.13% +0.19% +0.00% / +1.22% +0.66% +0.66%] index_select strided 257 : Elapsed 0.011 ms (1.075 ms / 100) 1.062 -> 1.070 ( +0.75%) [ +0.94% +0.09% +0.00% / +1.13% +0.94% +0.75%] index_select random : Elapsed 0.011 ms (1.072 ms / 100) 1.063 -> 1.071 ( +0.75%) [ +0.94% +0.09% +0.00% / +1.03% +0.85% +0.75%] index_select random_sorted : Elapsed 0.011 ms (1.073 ms / 100) 1.064 -> 1.070 ( +0.56%) [ +0.94% +0.19% +0.00% / +1.88% +0.56% +0.56%] index_select perm : Elapsed 0.011 ms (1.074 ms / 100) 1.066 -> 1.070 ( +0.38%) [ +0.94% +0.00% +0.00% / +0.94% +0.38% +0.47%] index_select perm_sorted : Elapsed 0.011 ms (1.076 ms / 100) B = [15, 2048] (stride (2048, 1)) A = [1000, 2048] (stride (1, 1000)) dim = 0 1.268 -> 1.269 ( +0.08%) [ +0.87% +0.39% +0.00% / +0.95% +0.08% +0.39%] index_select const : Elapsed 0.013 ms (1.279 ms / 100) 1.285 -> 1.284 ( -0.08%) [ +0.93% +0.23% +0.00% / +1.09% -0.08% -0.08%] index_select wrap : Elapsed 0.013 ms (1.297 ms / 100) 1.288 -> 1.283 ( -0.39%) [ +1.09% +0.08% +0.00% / +0.93% -0.31% -0.39%] index_select linear : Elapsed 0.013 ms (1.302 ms / 100) 1.280 -> 1.280 ( +0.00%) [ +1.02% +0.47% +0.00% / +0.86% +0.00% +0.23%] index_select reverse : Elapsed 0.013 ms (1.293 ms / 100) 1.266 -> 1.265 ( -0.08%) [ +0.71% +0.16% +0.00% / +0.63% -0.08% +0.55%] index_select skip64 : Elapsed 0.013 ms (1.275 ms / 100) 1.270 -> 1.272 ( +0.16%) [ +0.79% +0.16% +0.00% / +0.71% +0.63% +0.16%] index_select skip256 : Elapsed 0.013 ms (1.280 ms / 100) 2.763 -> 2.740 ( -0.83%) [ +0.33% +0.14% +0.00% / +0.54% -0.62% -0.83%] index_select spread : Elapsed 0.028 ms (2.772 ms / 100) 1.407 -> 1.404 ( -0.21%) [ +0.78% +0.14% +0.00% / +0.85% -0.21% -0.14%] index_select strided 3 : Elapsed 0.014 ms (1.418 ms / 100) 1.605 -> 1.580 ( -1.56%) [ +0.25% +0.12% +0.00% / +0.81% -1.56% -1.43%] index_select strided 5 : Elapsed 0.016 ms (1.609 ms / 100) 1.974 -> 1.927 ( -2.38%) [ +0.51% +0.00% +0.10% / +0.71% -2.38% -2.13%] index_select strided 7 : Elapsed 0.020 ms (1.984 ms / 100) 2.224 -> 2.163 ( -2.74%) [ +0.36% +0.22% +0.00% / +0.49% -2.74% -2.65%] index_select strided 8 : Elapsed 0.022 ms (2.232 ms / 100) 2.726 -> 2.691 ( -1.28%) [ +0.29% +0.00% +0.15% / +0.29% -1.28% -1.21%] index_select strided 16 : Elapsed 0.027 ms (2.734 ms / 100) 2.763 -> 2.752 ( -0.40%) [ +0.25% +0.04% +0.00% / +0.47% -0.40% -0.18%] index_select strided 64 : Elapsed 0.028 ms (2.770 ms / 100) 2.594 -> 2.583 ( -0.42%) [ +0.46% +0.23% +0.00% / +0.19% -0.35% -0.42%] index_select strided 100 : Elapsed 0.026 ms (2.606 ms / 100) 2.752 -> 2.742 ( -0.36%) [ +0.55% +0.22% +0.00% / +0.40% -0.36% -0.25%] index_select strided 255 : Elapsed 0.028 ms (2.767 ms / 100) 2.768 -> 2.761 ( -0.25%) [ +0.58% +0.18% +0.00% / +0.47% -0.22% -0.25%] index_select strided 256 : Elapsed 0.028 ms (2.784 ms / 100) 2.766 -> 2.759 ( -0.25%) [ +0.22% +0.07% +0.00% / +0.43% -0.25% -0.18%] index_select strided 257 : Elapsed 0.028 ms (2.772 ms / 100) 2.556 -> 2.554 ( -0.08%) [ +0.39% +0.00% +0.35% / +0.43% -0.04% -0.08%] index_select random : Elapsed 0.026 ms (2.566 ms / 100) 2.533 -> 2.525 ( -0.32%) [ +0.36% +0.00% +0.16% / +0.24% -0.32% -0.16%] index_select random_sorted : Elapsed 0.025 ms (2.542 ms / 100) 2.621 -> 2.607 ( -0.53%) [ +0.46% +0.19% +0.00% / +0.53% -0.53% -0.23%] index_select perm : Elapsed 0.026 ms (2.633 ms / 100) 2.556 -> 2.544 ( -0.47%) [ +0.23% +0.08% +0.00% / +0.27% -0.47% -0.31%] index_select perm_sorted : Elapsed 0.026 ms (2.562 ms / 100) B = [15, 2048] (stride (1, 15)) dim = 0 fill_cnt = 1000 GOOD 14.550 -> 7.825 (-46.22%) [ +0.00% +0.15% +0.28% / -46.14% -46.22% -46.20%] index_fill_ const : Elapsed 0.145 ms (14.550 ms / 100) GOOD 14.787 -> 7.817 (-47.14%) [ +0.03% +0.13% +0.00% / -47.14% -47.04% -47.02%] index_fill_ linear : Elapsed 0.148 ms (14.791 ms / 100) GOOD 14.514 -> 7.833 (-46.03%) [ +0.51% +0.00% +0.27% / -46.02% -46.03% -46.00%] index_fill_ reverse : Elapsed 0.146 ms (14.588 ms / 100) GOOD 14.728 -> 7.831 (-46.83%) [ +0.02% +0.00% +0.20% / -46.83% -46.80% -46.82%] index_fill_ skip64 : Elapsed 0.147 ms (14.731 ms / 100) GOOD 14.769 -> 7.837 (-46.94%) [ +0.24% +0.00% +0.39% / -46.94% -46.91% -46.91%] index_fill_ skip256 : Elapsed 0.148 ms (14.805 ms / 100) GOOD 14.742 -> 7.825 (-46.92%) [ +0.41% +0.00% +0.16% / -46.85% -46.92% -46.92%] index_fill_ spread : Elapsed 0.148 ms (14.802 ms / 100) GOOD 14.758 -> 7.893 (-46.52%) [ +0.46% +0.66% +0.00% / -46.52% -46.50% -46.50%] index_fill_ strided 3 : Elapsed 0.148 ms (14.826 ms / 100) GOOD 14.739 -> 7.892 (-46.45%) [ +0.60% +0.00% +0.39% / -46.45% -46.45% -46.39%] index_fill_ strided 5 : Elapsed 0.148 ms (14.828 ms / 100) GOOD 14.526 -> 7.849 (-45.97%) [ +0.06% +0.00% +0.08% / -45.92% -45.92% -45.97%] index_fill_ strided 7 : Elapsed 0.145 ms (14.535 ms / 100) GOOD 14.699 -> 7.850 (-46.60%) [ +0.01% +0.00% +0.05% / -46.60% -46.55% -46.56%] index_fill_ strided 8 : Elapsed 0.147 ms (14.700 ms / 100) GOOD 14.562 -> 7.852 (-46.08%) [ +0.02% +0.00% +0.41% / -46.08% -46.07% -46.05%] index_fill_ random : Elapsed 0.146 ms (14.565 ms / 100) GOOD 14.853 -> 7.816 (-47.38%) [ +0.53% +0.00% +0.40% / -47.38% -47.30% -47.29%] index_fill_ random_sorted : Elapsed 0.149 ms (14.932 ms / 100) B = [15, 2048] (stride (1, 15)) A = [1000, 2048] (stride (2048, 1)) dim = 0 1.148 -> 1.151 ( +0.26%) [ +1.13% +0.17% +0.00% / +1.05% +0.44% +0.26%] index_select const : Elapsed 0.012 ms (1.161 ms / 100) 1.149 -> 1.150 ( +0.09%) [ +1.04% +0.00% +0.00% / +0.87% +0.09% +0.09%] index_select wrap : Elapsed 0.012 ms (1.161 ms / 100) 1.149 -> 1.149 ( +0.00%) [ +0.78% +0.09% +0.00% / +0.78% +0.00% +0.09%] index_select linear : Elapsed 0.012 ms (1.158 ms / 100) 1.147 -> 1.150 ( +0.26%) [ +0.70% +0.00% +0.00% / +0.70% +0.35% +0.26%] index_select reverse : Elapsed 0.012 ms (1.155 ms / 100) 1.153 -> 1.152 ( -0.09%) [ +0.78% +0.00% +0.43% / +0.78% -0.09% +0.00%] index_select skip64 : Elapsed 0.012 ms (1.162 ms / 100) 1.155 -> 1.156 ( +0.09%) [ +0.69% +0.09% +0.00% / +0.95% +0.09% +0.26%] index_select skip256 : Elapsed 0.012 ms (1.163 ms / 100) 1.147 -> 1.153 ( +0.52%) [ +0.87% +0.00% +0.00% / +0.78% +0.52% +0.61%] index_select spread : Elapsed 0.012 ms (1.157 ms / 100) 1.150 -> 1.147 ( -0.26%) [ +0.96% +0.17% +0.00% / +0.78% -0.26% -0.26%] index_select strided 3 : Elapsed 0.012 ms (1.161 ms / 100) 1.148 -> 1.147 ( -0.09%) [ +1.05% +0.00% +0.09% / +0.87% -0.09% +0.00%] index_select strided 5 : Elapsed 0.012 ms (1.160 ms / 100) 1.147 -> 1.151 ( +0.35%) [ +1.05% +0.09% +0.00% / +0.87% +0.35% +0.44%] index_select strided 7 : Elapsed 0.012 ms (1.159 ms / 100) 1.147 -> 1.151 ( +0.35%) [ +0.78% +0.09% +0.00% / +0.87% +0.44% +0.35%] index_select strided 8 : Elapsed 0.012 ms (1.156 ms / 100) 1.144 -> 1.153 ( +0.79%) [ +1.05% +0.17% +0.00% / +1.05% +0.79% +0.79%] index_select strided 16 : Elapsed 0.012 ms (1.156 ms / 100) 1.147 -> 1.151 ( +0.35%) [ +0.70% +0.00% +0.00% / +0.61% +0.44% +0.35%] index_select strided 64 : Elapsed 0.012 ms (1.155 ms / 100) 1.148 -> 1.147 ( -0.09%) [ +0.87% +0.17% +0.00% / +0.78% -0.09% +0.00%] index_select strided 100 : Elapsed 0.012 ms (1.158 ms / 100) 1.149 -> 1.146 ( -0.26%) [ +0.96% +0.17% +0.00% / +0.87% -0.09% -0.26%] index_select strided 255 : Elapsed 0.012 ms (1.160 ms / 100) 1.147 -> 1.153 ( +0.52%) [ +0.70% +0.09% +0.00% / +0.78% +0.52% +0.52%] index_select strided 256 : Elapsed 0.012 ms (1.155 ms / 100) 1.146 -> 1.153 ( +0.61%) [ +0.87% +0.09% +0.00% / +0.79% +0.61% +0.61%] index_select strided 257 : Elapsed 0.012 ms (1.156 ms / 100) 1.144 -> 1.153 ( +0.79%) [ +1.05% +0.17% +0.00% / +0.96% +0.79% +0.79%] index_select random : Elapsed 0.012 ms (1.156 ms / 100) 1.144 -> 1.152 ( +0.70%) [ +1.05% +0.17% +0.00% / +0.79% +0.79% +0.70%] index_select random_sorted : Elapsed 0.012 ms (1.156 ms / 100) 1.149 -> 1.152 ( +0.26%) [ +0.87% +0.00% +0.00% / +0.52% +0.26% +0.35%] index_select perm : Elapsed 0.012 ms (1.159 ms / 100) 1.148 -> 1.152 ( +0.35%) [ +0.78% +0.00% +0.00% / +0.87% +0.35% +0.52%] index_select perm_sorted : Elapsed 0.012 ms (1.157 ms / 100) B = [15, 2048] (stride (1, 15)) A = [1000, 2048] (stride (1, 1000)) dim = 0 1.284 -> 1.286 ( +0.16%) [ +1.01% +0.23% +0.00% / +0.78% +0.39% +0.16%] index_select const : Elapsed 0.013 ms (1.297 ms / 100) 1.312 -> 1.299 ( -0.99%) [ +0.53% +0.08% +0.00% / +0.61% -0.99% -0.91%] index_select wrap : Elapsed 0.013 ms (1.319 ms / 100) 1.305 -> 1.300 ( -0.38%) [ +1.07% +0.15% +0.00% / +0.84% -0.38% -0.31%] index_select linear : Elapsed 0.013 ms (1.319 ms / 100) 1.301 -> 1.299 ( -0.15%) [ +0.69% +0.23% +0.00% / +1.00% -0.15% -0.15%] index_select reverse : Elapsed 0.013 ms (1.310 ms / 100) 1.284 -> 1.283 ( -0.08%) [ +0.78% +0.00% +0.16% / +0.55% -0.08% +0.23%] index_select skip64 : Elapsed 0.013 ms (1.294 ms / 100) 1.285 -> 1.288 ( +0.23%) [ +1.01% +0.00% +0.23% / +0.86% +0.23% +0.31%] index_select skip256 : Elapsed 0.013 ms (1.298 ms / 100) 2.748 -> 2.745 ( -0.11%) [ +0.18% +0.00% +0.07% / +0.44% +0.00% -0.11%] index_select spread : Elapsed 0.028 ms (2.753 ms / 100) 1.424 -> 1.417 ( -0.49%) [ +0.70% +0.00% +0.07% / +0.84% -0.21% -0.49%] index_select strided 3 : Elapsed 0.014 ms (1.434 ms / 100) 1.602 -> 1.579 ( -1.44%) [ +0.69% +0.00% +0.56% / +1.12% -1.44% -1.37%] index_select strided 5 : Elapsed 0.016 ms (1.613 ms / 100) 1.944 -> 1.906 ( -1.95%) [ +0.77% +0.00% +0.15% / +1.08% -1.95% -1.70%] index_select strided 7 : Elapsed 0.020 ms (1.959 ms / 100) 2.213 -> 2.156 ( -2.58%) [ +0.18% +0.00% +0.09% / +0.41% -2.58% -2.49%] index_select strided 8 : Elapsed 0.022 ms (2.217 ms / 100) 2.730 -> 2.692 ( -1.39%) [ +0.37% +0.00% +0.11% / +0.07% -1.39% -0.95%] index_select strided 16 : Elapsed 0.027 ms (2.740 ms / 100) 2.761 -> 2.754 ( -0.25%) [ +0.25% +0.00% +0.00% / +0.18% -0.07% -0.25%] index_select strided 64 : Elapsed 0.028 ms (2.768 ms / 100) 2.606 -> 2.601 ( -0.19%) [ +0.12% +0.23% +0.00% / +0.38% -0.19% -0.04%] index_select strided 100 : Elapsed 0.026 ms (2.609 ms / 100) 2.757 -> 2.747 ( -0.36%) [ +0.25% +0.15% +0.00% / +0.25% -0.07% -0.36%] index_select strided 255 : Elapsed 0.028 ms (2.764 ms / 100) 2.761 -> 2.764 ( +0.11%) [ +0.36% +0.07% +0.00% / +0.40% +0.11% +0.18%] index_select strided 256 : Elapsed 0.028 ms (2.771 ms / 100) 2.758 -> 2.761 ( +0.11%) [ +0.51% +0.00% +0.04% / +0.11% +0.11% +1.27%] index_select strided 257 : Elapsed 0.028 ms (2.772 ms / 100) 2.636 -> 2.638 ( +0.08%) [ +0.30% +0.19% +0.00% / +0.46% +0.08% +0.30%] index_select random : Elapsed 0.026 ms (2.644 ms / 100) 2.496 -> 2.501 ( +0.20%) [ +0.56% +0.08% +0.00% / +0.56% +0.24% +0.20%] index_select random_sorted : Elapsed 0.025 ms (2.510 ms / 100) 2.650 -> 2.639 ( -0.42%) [ +0.60% +0.04% +0.00% / +0.53% -0.23% -0.42%] index_select perm : Elapsed 0.027 ms (2.666 ms / 100) 2.651 -> 2.637 ( -0.53%) [ +0.45% +0.19% +0.00% / +0.19% -0.53% -0.53%] index_select perm_sorted : Elapsed 0.027 ms (2.663 ms / 100) out_shape = [1000, 15] in_shape = [1000, 2048] idx_dim = 1 B = [1000, 15] (stride (15, 1)) dim = 1 fill_cnt = 2048 GOOD 14.701 -> 8.440 (-42.59%) [ +0.23% +0.00% +0.11% / -42.59% -42.54% -42.53%] index_fill_ const : Elapsed 0.147 ms (14.735 ms / 100) GOOD 14.840 -> 8.432 (-43.18%) [ +0.07% +0.07% +0.00% / -43.18% -43.08% -43.03%] index_fill_ linear : Elapsed 0.149 ms (14.851 ms / 100) GOOD 14.824 -> 8.440 (-43.07%) [ +0.14% +0.00% +0.14% / -43.03% -43.07% -43.05%] index_fill_ reverse : Elapsed 0.148 ms (14.845 ms / 100) GOOD 14.689 -> 8.439 (-42.55%) [ +0.16% +0.00% +0.05% / -42.54% -42.55% -42.49%] index_fill_ skip64 : Elapsed 0.147 ms (14.713 ms / 100) GOOD 14.703 -> 8.436 (-42.62%) [ +0.14% +0.02% +0.00% / -42.57% -42.62% -42.56%] index_fill_ skip256 : Elapsed 0.147 ms (14.724 ms / 100) GOOD 14.639 -> 8.446 (-42.30%) [ +0.22% +0.00% +0.03% / -42.30% -42.28% -42.30%] index_fill_ spread : Elapsed 0.147 ms (14.671 ms / 100) GOOD 14.518 -> 8.482 (-41.58%) [ +0.12% +0.10% +0.00% / -41.58% -41.58% -41.51%] index_fill_ strided 3 : Elapsed 0.145 ms (14.535 ms / 100) GOOD 14.539 -> 8.475 (-41.71%) [ +0.05% +0.01% +0.00% / -41.71% -41.68% -41.65%] index_fill_ strided 5 : Elapsed 0.145 ms (14.546 ms / 100) GOOD 14.457 -> 8.457 (-41.50%) [ +0.43% +0.12% +0.00% / -41.49% -41.50% -41.43%] index_fill_ strided 7 : Elapsed 0.145 ms (14.519 ms / 100) GOOD 14.483 -> 8.454 (-41.63%) [ +0.19% +0.00% +0.14% / -41.46% -41.59% -41.63%] index_fill_ strided 8 : Elapsed 0.145 ms (14.511 ms / 100) GOOD 14.480 -> 8.464 (-41.55%) [ +0.25% +0.00% +0.01% / -41.49% -41.55% -41.53%] index_fill_ random : Elapsed 0.145 ms (14.516 ms / 100) GOOD 14.638 -> 8.440 (-42.34%) [ +0.20% +0.03% +0.00% / -42.34% -42.27% -42.34%] index_fill_ random_sorted : Elapsed 0.147 ms (14.667 ms / 100) B = [1000, 15] (stride (15, 1)) A = [1000, 2048] (stride (2048, 1)) dim = 1 1.189 -> 1.196 ( +0.59%) [ +0.67% +0.00% +0.00% / +0.59% +1.09% +0.84%] index_select const : Elapsed 0.012 ms (1.197 ms / 100) 1.189 -> 1.201 ( +1.01%) [ +0.93% +0.17% +0.00% / +1.01% +1.01% +1.09%] index_select wrap : Elapsed 0.012 ms (1.200 ms / 100) 1.185 -> 1.199 ( +1.18%) [ +1.18% +0.00% +0.08% / +1.18% +1.60% +1.52%] index_select linear : Elapsed 0.012 ms (1.199 ms / 100) 1.186 -> 1.197 ( +0.93%) [ +1.10% +0.08% +0.00% / +0.93% +1.60% +1.35%] index_select reverse : Elapsed 0.012 ms (1.199 ms / 100) 1.186 -> 1.199 ( +1.10%) [ +0.93% +0.17% +0.00% / +1.10% +1.52% +1.35%] index_select skip64 : Elapsed 0.012 ms (1.197 ms / 100) 1.184 -> 1.202 ( +1.52%) [ +1.01% +0.25% +0.00% / +1.94% +1.52% +1.52%] index_select skip256 : Elapsed 0.012 ms (1.196 ms / 100) 1.805 -> 1.816 ( +0.61%) [ +0.50% +0.44% +0.00% / +0.61% +1.05% +1.39%] index_select spread : Elapsed 0.018 ms (1.814 ms / 100) 1.186 -> 1.199 ( +1.10%) [ +1.01% +0.00% +0.17% / +1.10% +1.69% +1.43%] index_select strided 3 : Elapsed 0.012 ms (1.198 ms / 100) 1.190 -> 1.201 ( +0.92%) [ +0.92% +0.00% +0.17% / +0.92% +1.51% +1.09%] index_select strided 5 : Elapsed 0.012 ms (1.201 ms / 100) 1.190 -> 1.200 ( +0.84%) [ +1.01% +0.00% +0.00% / +0.84% +1.26% +1.34%] index_select strided 7 : Elapsed 0.012 ms (1.202 ms / 100) 1.191 -> 1.204 ( +1.09%) [ +1.09% +0.00% +0.17% / +1.09% +1.34% +1.34%] index_select strided 8 : Elapsed 0.012 ms (1.204 ms / 100) 1.190 -> 1.203 ( +1.09%) [ +1.01% +0.34% +0.00% / +1.09% +1.51% +1.34%] index_select strided 16 : Elapsed 0.012 ms (1.202 ms / 100) 1.803 -> 1.817 ( +0.78%) [ +0.72% +0.22% +0.00% / +0.78% +1.33% +1.55%] index_select strided 64 : Elapsed 0.018 ms (1.816 ms / 100) 1.797 -> 1.811 ( +0.78%) [ +0.61% +0.28% +0.00% / +0.78% +1.45% +2.06%] index_select strided 100 : Elapsed 0.018 ms (1.808 ms / 100) 1.190 -> 1.202 ( +1.01%) [ +1.26% +0.50% +0.00% / +1.01% +1.51% +1.60%] index_select strided 255 : Elapsed 0.012 ms (1.205 ms / 100) 1.191 -> 1.202 ( +0.92%) [ +1.09% +0.17% +0.00% / +0.92% +0.92% +0.92%] index_select strided 256 : Elapsed 0.012 ms (1.204 ms / 100) 1.195 -> 1.204 ( +0.75%) [ +0.75% +0.00% +0.08% / +0.75% +0.84% +0.75%] index_select strided 257 : Elapsed 0.012 ms (1.204 ms / 100) 1.346 -> 1.358 ( +0.89%) [ +0.52% +0.00% +0.30% / +0.89% +2.60% +2.30%] index_select random : Elapsed 0.014 ms (1.353 ms / 100) 1.459 -> 1.468 ( +0.62%) [ +0.21% +0.00% +0.55% / +0.62% +1.44% +1.37%] index_select random_sorted : Elapsed 0.015 ms (1.462 ms / 100) 1.798 -> 1.810 ( +0.67%) [ +0.61% +0.00% +0.06% / +0.67% +1.39% +1.17%] index_select perm : Elapsed 0.018 ms (1.809 ms / 100) 1.801 -> 1.810 ( +0.50%) [ +0.61% +0.11% +0.00% / +0.50% +0.89% +0.94%] index_select perm_sorted : Elapsed 0.018 ms (1.812 ms / 100) B = [1000, 15] (stride (15, 1)) A = [1000, 2048] (stride (1, 1000)) dim = 1 1.127 -> 1.141 ( +1.24%) [ +1.51% +0.00% +0.09% / +1.24% +1.69% +1.51%] index_select const : Elapsed 0.011 ms (1.144 ms / 100) 1.131 -> 1.143 ( +1.06%) [ +1.15% +0.18% +0.00% / +1.06% +1.50% +1.50%] index_select wrap : Elapsed 0.011 ms (1.144 ms / 100) 1.129 -> 1.138 ( +0.80%) [ +0.89% +0.00% +0.00% / +0.80% +1.51% +1.33%] index_select linear : Elapsed 0.011 ms (1.139 ms / 100) 1.134 -> 1.147 ( +1.15%) [ +1.15% +0.18% +0.00% / +1.15% +1.50% +1.32%] index_select reverse : Elapsed 0.011 ms (1.147 ms / 100) 1.126 -> 1.135 ( +0.80%) [ +1.07% +0.00% +0.09% / +0.80% +1.60% +1.60%] index_select skip64 : Elapsed 0.011 ms (1.138 ms / 100) 1.132 -> 1.143 ( +0.97%) [ +1.15% +0.09% +0.00% / +0.97% +1.59% +1.77%] index_select skip256 : Elapsed 0.011 ms (1.145 ms / 100) 1.133 -> 1.143 ( +0.88%) [ +1.06% +0.00% +0.00% / +0.88% +1.32% +1.24%] index_select spread : Elapsed 0.011 ms (1.145 ms / 100) 1.133 -> 1.143 ( +0.88%) [ +1.15% +0.00% +0.09% / +0.88% +1.59% +1.68%] index_select strided 3 : Elapsed 0.011 ms (1.146 ms / 100) 1.132 -> 1.143 ( +0.97%) [ +1.06% +0.00% +0.18% / +0.97% +1.50% +1.41%] index_select strided 5 : Elapsed 0.011 ms (1.144 ms / 100) 1.135 -> 1.145 ( +0.88%) [ +1.32% +0.26% +0.00% / +0.88% +1.41% +1.23%] index_select strided 7 : Elapsed 0.011 ms (1.150 ms / 100) 1.125 -> 1.135 ( +0.89%) [ +1.16% +0.00% +0.00% / +0.89% +1.51% +1.33%] index_select strided 8 : Elapsed 0.011 ms (1.138 ms / 100) 1.126 -> 1.133 ( +0.62%) [ +0.98% +0.00% +0.00% / +0.62% +1.24% +1.51%] index_select strided 16 : Elapsed 0.011 ms (1.137 ms / 100) 1.127 -> 1.135 ( +0.71%) [ +0.98% +0.00% +0.18% / +0.71% +1.24% +1.33%] index_select strided 64 : Elapsed 0.011 ms (1.138 ms / 100) 1.126 -> 1.135 ( +0.80%) [ +1.07% +0.09% +0.00% / +0.80% +1.33% +1.33%] index_select strided 100 : Elapsed 0.011 ms (1.138 ms / 100) 1.133 -> 1.141 ( +0.71%) [ +1.06% +0.00% +0.00% / +0.71% +1.50% +1.50%] index_select strided 255 : Elapsed 0.011 ms (1.145 ms / 100) 1.124 -> 1.135 ( +0.98%) [ +1.16% +0.00% +0.18% / +0.98% +1.25% +1.33%] index_select strided 256 : Elapsed 0.011 ms (1.137 ms / 100) 1.134 -> 1.146 ( +1.06%) [ +1.06% +0.00% +0.18% / +1.06% +1.06% +1.15%] index_select strided 257 : Elapsed 0.011 ms (1.146 ms / 100) 1.139 -> 1.148 ( +0.79%) [ +1.23% +0.00% +0.00% / +0.79% +0.97% +0.88%] index_select random : Elapsed 0.012 ms (1.153 ms / 100) 1.136 -> 1.148 ( +1.06%) [ +0.79% +0.00% +0.00% / +1.06% +1.23% +1.32%] index_select random_sorted : Elapsed 0.011 ms (1.145 ms / 100) 1.134 -> 1.143 ( +0.79%) [ +0.97% +0.00% +0.09% / +0.79% +1.06% +0.97%] index_select perm : Elapsed 0.011 ms (1.145 ms / 100) 1.137 -> 1.147 ( +0.88%) [ +1.06% +0.18% +0.00% / +0.88% +1.06% +1.23%] index_select perm_sorted : Elapsed 0.011 ms (1.149 ms / 100) B = [1000, 15] (stride (1, 1000)) dim = 1 fill_cnt = 2048 9.010 -> 8.956 ( -0.60%) [ +0.92% +0.13% +0.00% / +0.68% -0.53% -0.60%] index_fill_ const : Elapsed 0.091 ms (9.093 ms / 100) 9.034 -> 8.966 ( -0.75%) [ +1.08% +0.25% +0.00% / +0.62% -0.59% -0.75%] index_fill_ linear : Elapsed 0.091 ms (9.132 ms / 100) 8.955 -> 8.991 ( +0.40%) [ +0.87% +0.22% +0.00% / +0.40% +0.68% +1.02%] index_fill_ reverse : Elapsed 0.090 ms (9.033 ms / 100) 8.970 -> 8.884 ( -0.96%) [ +0.74% +0.04% +0.00% / +0.48% -0.96% -0.91%] index_fill_ skip64 : Elapsed 0.090 ms (9.036 ms / 100) 8.744 -> 8.804 ( +0.69%) [ +0.86% +0.14% +0.00% / +0.69% +3.08% +2.93%] index_fill_ skip256 : Elapsed 0.088 ms (8.819 ms / 100) 8.917 -> 8.979 ( +0.70%) [ +0.81% +0.08% +0.00% / +0.70% +1.20% +1.36%] index_fill_ spread : Elapsed 0.090 ms (8.989 ms / 100) 8.980 -> 8.770 ( -2.34%) [ +0.97% +0.00% +0.24% / +0.61% -2.34% -2.31%] index_fill_ strided 3 : Elapsed 0.091 ms (9.067 ms / 100) 8.998 -> 8.980 ( -0.20%) [ +0.90% +0.04% +0.00% / +0.38% -0.08% -0.20%] index_fill_ strided 5 : Elapsed 0.091 ms (9.079 ms / 100) 8.925 -> 8.976 ( +0.57%) [ +0.58% +0.00% +0.25% / +0.57% +1.86% +1.79%] index_fill_ strided 7 : Elapsed 0.090 ms (8.977 ms / 100) 8.933 -> 8.955 ( +0.25%) [ +0.51% +0.02% +0.00% / +0.25% +1.56% +1.41%] index_fill_ strided 8 : Elapsed 0.090 ms (8.979 ms / 100) 8.969 -> 8.990 ( +0.23%) [ +1.20% +0.37% +0.00% / +0.60% +0.60% +0.23%] index_fill_ random : Elapsed 0.091 ms (9.077 ms / 100) 8.886 -> 8.791 ( -1.07%) [ +0.62% +0.02% +0.00% / +0.71% -0.97% -1.07%] index_fill_ random_sorted : Elapsed 0.089 ms (8.941 ms / 100) B = [1000, 15] (stride (1, 1000)) A = [1000, 2048] (stride (2048, 1)) dim = 1 1.183 -> 1.188 ( +0.42%) [ +0.42% +0.00% +0.17% / +0.51% +0.42% +0.76%] index_select const : Elapsed 0.012 ms (1.188 ms / 100) 1.187 -> 1.194 ( +0.59%) [ +1.10% +0.00% +0.00% / +1.10% +0.59% +0.93%] index_select wrap : Elapsed 0.012 ms (1.200 ms / 100) 1.182 -> 1.191 ( +0.76%) [ +1.35% +0.17% +0.00% / +0.85% +0.76% +0.85%] index_select linear : Elapsed 0.012 ms (1.198 ms / 100) 1.182 -> 1.191 ( +0.76%) [ +1.27% +0.00% +0.00% / +1.61% +0.76% +0.93%] index_select reverse : Elapsed 0.012 ms (1.197 ms / 100) 1.186 -> 1.193 ( +0.59%) [ +1.18% +0.25% +0.00% / +0.67% +1.10% +0.59%] index_select skip64 : Elapsed 0.012 ms (1.200 ms / 100) 1.180 -> 1.191 ( +0.93%) [ +1.36% +0.34% +0.00% / +0.93% +1.02% +1.19%] index_select skip256 : Elapsed 0.012 ms (1.196 ms / 100) 1.816 -> 1.830 ( +0.77%) [ +0.50% +0.17% +0.00% / +0.77% +1.76% +1.54%] index_select spread : Elapsed 0.018 ms (1.825 ms / 100) 1.184 -> 1.194 ( +0.84%) [ +0.84% +0.00% +0.17% / +0.93% +1.10% +0.84%] index_select strided 3 : Elapsed 0.012 ms (1.194 ms / 100) 1.186 -> 1.197 ( +0.93%) [ +1.26% +0.42% +0.00% / +1.01% +0.93% +1.18%] index_select strided 5 : Elapsed 0.012 ms (1.201 ms / 100) 1.178 -> 1.193 ( +1.27%) [ +1.10% +0.68% +0.00% / +1.27% +1.36% +1.27%] index_select strided 7 : Elapsed 0.012 ms (1.191 ms / 100) 1.184 -> 1.192 ( +0.68%) [ +0.93% +0.00% +0.00% / +0.68% +1.27% +1.35%] index_select strided 8 : Elapsed 0.012 ms (1.195 ms / 100) 1.187 -> 1.197 ( +0.84%) [ +0.76% +0.00% +0.08% / +1.01% +0.84% +1.10%] index_select strided 16 : Elapsed 0.012 ms (1.196 ms / 100) 1.820 -> 1.842 ( +1.21%) [ +0.88% +0.00% +0.22% / +1.21% +1.43% +1.21%] index_select strided 64 : Elapsed 0.018 ms (1.836 ms / 100) 1.809 -> 1.819 ( +0.55%) [ +0.55% +0.06% +0.00% / +0.55% +1.55% +1.99%] index_select strided 100 : Elapsed 0.018 ms (1.819 ms / 100) 1.183 -> 1.194 ( +0.93%) [ +1.18% +0.42% +0.00% / +0.93% +1.27% +1.44%] index_select strided 255 : Elapsed 0.012 ms (1.197 ms / 100) 1.181 -> 1.191 ( +0.85%) [ +1.10% +0.00% +0.42% / +0.85% +0.93% +0.85%] index_select strided 256 : Elapsed 0.012 ms (1.194 ms / 100) 1.185 -> 1.192 ( +0.59%) [ +0.76% +0.08% +0.00% / +1.01% +0.59% +0.93%] index_select strided 257 : Elapsed 0.012 ms (1.194 ms / 100) 1.652 -> 1.652 ( +0.00%) [ +1.09% +0.00% +0.12% / +1.09% +0.00% +0.24%] index_select random : Elapsed 0.017 ms (1.670 ms / 100) 1.716 -> 1.723 ( +0.41%) [ +0.00% +0.23% +0.12% / +0.41% +0.52% +0.70%] index_select random_sorted : Elapsed 0.017 ms (1.716 ms / 100) 1.717 -> 1.711 ( -0.35%) [ +0.58% +0.00% +0.00% / +0.41% -0.35% -0.12%] index_select perm : Elapsed 0.017 ms (1.727 ms / 100) 1.733 -> 1.749 ( +0.92%) [ +0.35% +0.17% +0.00% / +0.92% +2.08% +2.14%] index_select perm_sorted : Elapsed 0.017 ms (1.739 ms / 100) B = [1000, 15] (stride (1, 1000)) A = [1000, 2048] (stride (1, 1000)) dim = 1 1.055 -> 1.062 ( +0.66%) [ +0.66% +0.00% +0.00% / +0.66% +1.42% +1.33%] index_select const : Elapsed 0.011 ms (1.062 ms / 100) 1.052 -> 1.061 ( +0.86%) [ +1.14% +0.48% +0.00% / +0.86% +1.52% +1.43%] index_select wrap : Elapsed 0.011 ms (1.064 ms / 100) 1.057 -> 1.065 ( +0.76%) [ +0.66% +0.09% +0.00% / +0.76% +1.04% +0.95%] index_select linear : Elapsed 0.011 ms (1.064 ms / 100) 1.059 -> 1.068 ( +0.85%) [ +0.66% +0.00% +0.19% / +0.85% +1.23% +1.23%] index_select reverse : Elapsed 0.011 ms (1.066 ms / 100) 1.046 -> 1.056 ( +0.96%) [ +0.96% +0.00% +0.10% / +0.96% +1.63% +1.72%] index_select skip64 : Elapsed 0.011 ms (1.056 ms / 100) 1.056 -> 1.064 ( +0.76%) [ +0.85% +0.00% +0.09% / +0.76% +1.33% +1.33%] index_select skip256 : Elapsed 0.011 ms (1.065 ms / 100) 1.059 -> 1.067 ( +0.76%) [ +0.66% +0.19% +0.00% / +0.76% +1.32% +1.32%] index_select spread : Elapsed 0.011 ms (1.066 ms / 100) 1.060 -> 1.066 ( +0.57%) [ +0.57% +0.00% +0.00% / +0.57% +1.23% +1.23%] index_select strided 3 : Elapsed 0.011 ms (1.066 ms / 100) 1.059 -> 1.066 ( +0.66%) [ +0.66% +0.00% +0.09% / +0.66% +1.32% +1.13%] index_select strided 5 : Elapsed 0.011 ms (1.066 ms / 100) 1.058 -> 1.064 ( +0.57%) [ +0.66% +0.00% +0.19% / +0.57% +1.42% +1.32%] index_select strided 7 : Elapsed 0.011 ms (1.065 ms / 100) 1.049 -> 1.058 ( +0.86%) [ +1.24% +0.10% +0.00% / +0.86% +1.43% +2.96%] index_select strided 8 : Elapsed 0.011 ms (1.062 ms / 100) 1.047 -> 1.060 ( +1.24%) [ +1.34% +0.10% +0.00% / +1.24% +1.34% +1.34%] index_select strided 16 : Elapsed 0.011 ms (1.061 ms / 100) 1.046 -> 1.059 ( +1.24%) [ +1.24% +0.19% +0.00% / +1.24% +1.53% +1.34%] index_select strided 64 : Elapsed 0.011 ms (1.059 ms / 100) 1.048 -> 1.061 ( +1.24%) [ +1.15% +0.00% +0.00% / +1.24% +1.62% +1.53%] index_select strided 100 : Elapsed 0.011 ms (1.060 ms / 100) 1.057 -> 1.066 ( +0.85%) [ +0.85% +0.09% +0.00% / +0.85% +1.61% +1.61%] index_select strided 255 : Elapsed 0.011 ms (1.066 ms / 100) 1.047 -> 1.058 ( +1.05%) [ +1.05% +0.10% +0.00% / +1.05% +1.15% +1.15%] index_select strided 256 : Elapsed 0.011 ms (1.058 ms / 100) 1.057 -> 1.066 ( +0.85%) [ +0.76% +0.00% +0.28% / +0.85% +1.32% +1.23%] index_select strided 257 : Elapsed 0.011 ms (1.065 ms / 100) 1.062 -> 1.073 ( +1.04%) [ +1.04% +0.00% +0.00% / +1.04% +1.13% +1.13%] index_select random : Elapsed 0.011 ms (1.073 ms / 100) 1.058 -> 1.067 ( +0.85%) [ +0.95% +0.19% +0.00% / +0.85% +1.42% +1.13%] index_select random_sorted : Elapsed 0.011 ms (1.068 ms / 100) 1.058 -> 1.068 ( +0.95%) [ +0.85% +0.19% +0.00% / +0.95% +1.13% +1.04%] index_select perm : Elapsed 0.011 ms (1.067 ms / 100) 1.062 -> 1.073 ( +1.04%) [ +1.13% +0.00% +0.00% / +1.04% +1.41% +1.41%] index_select perm_sorted : Elapsed 0.011 ms (1.074 ms / 100) out_shape = [15, 1000] in_shape = [2048, 1000] idx_dim = 0 B = [15, 1000] (stride (1000, 1)) dim = 0 fill_cnt = 2048 9.032 -> 8.955 ( -0.85%) [ +1.04% +0.12% +0.00% / +0.75% -0.85% -0.79%] index_fill_ const : Elapsed 0.091 ms (9.126 ms / 100) 9.046 -> 8.954 ( -1.02%) [ +0.44% +0.00% +0.02% / +0.66% -0.98% -1.02%] index_fill_ linear : Elapsed 0.091 ms (9.086 ms / 100) 8.960 -> 9.004 ( +0.49%) [ +0.71% +0.09% +0.00% / +0.71% +0.96% +0.49%] index_fill_ reverse : Elapsed 0.090 ms (9.024 ms / 100) 8.949 -> 8.898 ( -0.57%) [ +0.82% +0.00% +0.04% / +0.54% -0.57% -0.44%] index_fill_ skip64 : Elapsed 0.090 ms (9.022 ms / 100) 8.765 -> 8.807 ( +0.48%) [ +0.62% +0.07% +0.00% / +0.48% +2.67% +2.73%] index_fill_ skip256 : Elapsed 0.088 ms (8.819 ms / 100) 8.892 -> 8.962 ( +0.79%) [ +0.96% +0.40% +0.00% / +0.79% +1.74% +1.51%] index_fill_ spread : Elapsed 0.090 ms (8.977 ms / 100) 8.999 -> 8.768 ( -2.57%) [ +0.72% +0.08% +0.00% / +0.47% -2.57% -2.51%] index_fill_ strided 3 : Elapsed 0.091 ms (9.064 ms / 100) 9.016 -> 8.989 ( -0.30%) [ +0.44% +0.13% +0.00% / +0.09% -0.30% -0.20%] index_fill_ strided 5 : Elapsed 0.091 ms (9.056 ms / 100) 8.929 -> 8.958 ( +0.32%) [ +0.92% +0.00% +0.18% / +0.32% +1.53% +1.71%] index_fill_ strided 7 : Elapsed 0.090 ms (9.011 ms / 100) 8.919 -> 8.965 ( +0.52%) [ +0.55% +0.03% +0.00% / +0.52% +1.64% +2.01%] index_fill_ strided 8 : Elapsed 0.090 ms (8.968 ms / 100) 8.970 -> 9.002 ( +0.36%) [ +1.17% +0.00% +0.39% / +0.75% +0.36% +0.36%] index_fill_ random : Elapsed 0.091 ms (9.075 ms / 100) 8.878 -> 8.783 ( -1.07%) [ +0.70% +0.05% +0.00% / +0.69% -1.07% -1.00%] index_fill_ random_sorted : Elapsed 0.089 ms (8.940 ms / 100) B = [15, 1000] (stride (1000, 1)) A = [2048, 1000] (stride (1000, 1)) dim = 0 1.055 -> 1.062 ( +0.66%) [ +0.66% +0.09% +0.00% / +0.66% +1.71% +1.52%] index_select const : Elapsed 0.011 ms (1.062 ms / 100) 1.054 -> 1.061 ( +0.66%) [ +0.57% +0.00% +0.00% / +0.66% +1.33% +1.99%] index_select wrap : Elapsed 0.011 ms (1.060 ms / 100) 1.060 -> 1.066 ( +0.57%) [ +0.57% +0.00% +0.00% / +0.57% +1.23% +1.04%] index_select linear : Elapsed 0.011 ms (1.066 ms / 100) 1.059 -> 1.071 ( +1.13%) [ +0.85% +0.09% +0.00% / +1.13% +1.51% +1.51%] index_select reverse : Elapsed 0.011 ms (1.068 ms / 100) 1.046 -> 1.055 ( +0.86%) [ +0.96% +0.00% +0.00% / +0.86% +1.63% +1.72%] index_select skip64 : Elapsed 0.011 ms (1.056 ms / 100) 1.054 -> 1.063 ( +0.85%) [ +0.85% +0.57% +0.00% / +0.85% +1.61% +1.61%] index_select skip256 : Elapsed 0.011 ms (1.063 ms / 100) 1.053 -> 1.060 ( +0.66%) [ +0.76% +0.00% +0.00% / +0.66% +1.23% +1.23%] index_select spread : Elapsed 0.011 ms (1.061 ms / 100) 1.058 -> 1.065 ( +0.66%) [ +0.76% +0.00% +0.00% / +0.66% +1.32% +1.42%] index_select strided 3 : Elapsed 0.011 ms (1.066 ms / 100) 1.054 -> 1.063 ( +0.85%) [ +0.85% +0.09% +0.00% / +0.85% +1.33% +1.33%] index_select strided 5 : Elapsed 0.011 ms (1.063 ms / 100) 1.060 -> 1.069 ( +0.85%) [ +0.66% +0.09% +0.00% / +0.85% +0.94% +0.94%] index_select strided 7 : Elapsed 0.011 ms (1.067 ms / 100) 1.048 -> 1.060 ( +1.15%) [ +1.15% +0.10% +0.00% / +1.15% +1.43% +1.43%] index_select strided 8 : Elapsed 0.011 ms (1.060 ms / 100) 1.045 -> 1.056 ( +1.05%) [ +1.44% +0.10% +0.00% / +1.05% +1.44% +1.44%] index_select strided 16 : Elapsed 0.011 ms (1.060 ms / 100) 1.045 -> 1.059 ( +1.34%) [ +1.53% +0.19% +0.00% / +1.34% +1.44% +1.44%] index_select strided 64 : Elapsed 0.011 ms (1.061 ms / 100) 1.047 -> 1.059 ( +1.15%) [ +1.24% +0.19% +0.00% / +1.15% +1.53% +1.62%] index_select strided 100 : Elapsed 0.011 ms (1.060 ms / 100) 1.059 -> 1.064 ( +0.47%) [ +0.47% +0.00% +0.00% / +0.47% +1.23% +1.32%] index_select strided 255 : Elapsed 0.011 ms (1.064 ms / 100) 1.046 -> 1.057 ( +1.05%) [ +1.05% +0.00% +0.00% / +1.05% +1.05% +1.24%] index_select strided 256 : Elapsed 0.011 ms (1.057 ms / 100) 1.053 -> 1.060 ( +0.66%) [ +0.66% +0.19% +0.00% / +0.66% +0.76% +0.85%] index_select strided 257 : Elapsed 0.011 ms (1.060 ms / 100) 1.059 -> 1.069 ( +0.94%) [ +0.94% +0.00% +0.09% / +0.94% +1.23% +1.32%] index_select random : Elapsed 0.011 ms (1.069 ms / 100) 1.050 -> 1.060 ( +0.95%) [ +0.95% +0.19% +0.00% / +0.95% +1.81% +1.81%] index_select random_sorted : Elapsed 0.011 ms (1.060 ms / 100) 1.054 -> 1.063 ( +0.85%) [ +0.76% +0.38% +0.00% / +0.85% +0.95% +0.85%] index_select perm : Elapsed 0.011 ms (1.062 ms / 100) 1.059 -> 1.067 ( +0.76%) [ +0.76% +0.00% +0.00% / +0.76% +0.76% +0.94%] index_select perm_sorted : Elapsed 0.011 ms (1.067 ms / 100) B = [15, 1000] (stride (1000, 1)) A = [2048, 1000] (stride (1, 2048)) dim = 0 1.178 -> 1.185 ( +0.59%) [ +0.59% +0.17% +0.00% / +0.59% +1.53% +1.53%] index_select const : Elapsed 0.012 ms (1.185 ms / 100) 1.181 -> 1.189 ( +0.68%) [ +0.76% +0.17% +0.00% / +0.68% +1.61% +1.61%] index_select wrap : Elapsed 0.012 ms (1.190 ms / 100) 1.177 -> 1.188 ( +0.93%) [ +1.10% +0.08% +0.00% / +0.93% +1.70% +1.61%] index_select linear : Elapsed 0.012 ms (1.190 ms / 100) 1.175 -> 1.184 ( +0.77%) [ +1.02% +0.51% +0.00% / +0.77% +1.70% +2.04%] index_select reverse : Elapsed 0.012 ms (1.187 ms / 100) 1.178 -> 1.189 ( +0.93%) [ +0.85% +0.34% +0.00% / +0.93% +1.70% +1.78%] index_select skip64 : Elapsed 0.012 ms (1.188 ms / 100) 1.177 -> 1.186 ( +0.76%) [ +0.76% +0.08% +0.00% / +0.76% +1.53% +1.27%] index_select skip256 : Elapsed 0.012 ms (1.186 ms / 100) 1.811 -> 1.826 ( +0.83%) [ +0.28% +0.11% +0.00% / +0.83% +0.99% +1.21%] index_select spread : Elapsed 0.018 ms (1.816 ms / 100) 1.179 -> 1.189 ( +0.85%) [ +0.85% +0.00% +0.08% / +0.85% +1.53% +1.53%] index_select strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.184 -> 1.193 ( +0.76%) [ +1.01% +0.00% +0.17% / +0.76% +1.52% +1.27%] index_select strided 5 : Elapsed 0.012 ms (1.196 ms / 100) 1.180 -> 1.189 ( +0.76%) [ +1.02% +0.25% +0.00% / +0.76% +1.69% +1.53%] index_select strided 7 : Elapsed 0.012 ms (1.192 ms / 100) 1.184 -> 1.194 ( +0.84%) [ +0.76% +0.00% +0.42% / +0.84% +1.60% +1.94%] index_select strided 8 : Elapsed 0.012 ms (1.193 ms / 100) 1.187 -> 1.199 ( +1.01%) [ +0.76% +0.25% +0.00% / +1.01% +1.35% +1.10%] index_select strided 16 : Elapsed 0.012 ms (1.196 ms / 100) 1.807 -> 1.821 ( +0.77%) [ +0.61% +0.39% +0.00% / +0.77% +0.89% +1.44%] index_select strided 64 : Elapsed 0.018 ms (1.818 ms / 100) good 1.795 -> 1.696 ( -5.52%) [ +0.61% +0.39% +0.00% / +0.95% -5.52% -4.79%] index_select strided 100 : Elapsed 0.018 ms (1.806 ms / 100) 1.205 -> 1.222 ( +1.41%) [ +0.91% +0.50% +0.00% / +1.41% +4.15% +3.98%] index_select strided 255 : Elapsed 0.012 ms (1.216 ms / 100) 1.185 -> 1.192 ( +0.59%) [ +0.59% +0.25% +0.00% / +0.59% +0.93% +0.84%] index_select strided 256 : Elapsed 0.012 ms (1.192 ms / 100) 1.187 -> 1.199 ( +1.01%) [ +1.10% +0.00% +0.08% / +1.01% +1.01% +1.01%] index_select strided 257 : Elapsed 0.012 ms (1.200 ms / 100) 1.646 -> 1.604 ( -2.55%) [ +0.73% +0.00% +0.18% / +0.79% -2.55% -2.31%] index_select random : Elapsed 0.017 ms (1.658 ms / 100) 1.673 -> 1.619 ( -3.23%) [ +0.60% +0.00% +0.00% / +0.84% -3.23% -3.11%] index_select random_sorted : Elapsed 0.017 ms (1.683 ms / 100) 1.659 -> 1.618 ( -2.47%) [ +0.48% +0.00% +0.00% / +0.90% -2.47% -2.29%] index_select perm : Elapsed 0.017 ms (1.667 ms / 100) 1.683 -> 1.638 ( -2.67%) [ +1.01% +0.53% +0.00% / +0.83% -2.67% -2.67%] index_select perm_sorted : Elapsed 0.017 ms (1.700 ms / 100) B = [15, 1000] (stride (1, 15)) dim = 0 fill_cnt = 2048 GOOD 14.705 -> 8.445 (-42.57%) [ +0.18% +0.00% +0.14% / -42.56% -42.57% -42.57%] index_fill_ const : Elapsed 0.147 ms (14.732 ms / 100) GOOD 14.828 -> 8.444 (-43.05%) [ +0.33% +0.04% +0.00% / -43.05% -43.05% -43.04%] index_fill_ linear : Elapsed 0.149 ms (14.877 ms / 100) GOOD 14.818 -> 8.442 (-43.03%) [ +0.27% +0.19% +0.00% / -43.02% -43.03% -43.02%] index_fill_ reverse : Elapsed 0.149 ms (14.858 ms / 100) GOOD 14.700 -> 8.438 (-42.60%) [ +0.31% +0.00% +0.10% / -42.57% -42.60% -42.58%] index_fill_ skip64 : Elapsed 0.147 ms (14.746 ms / 100) GOOD 14.693 -> 8.439 (-42.56%) [ +0.18% +0.04% +0.00% / -42.52% -42.56% -42.56%] index_fill_ skip256 : Elapsed 0.147 ms (14.719 ms / 100) GOOD 14.635 -> 8.441 (-42.32%) [ +0.04% +0.05% +0.00% / -42.32% -42.28% -42.31%] index_fill_ spread : Elapsed 0.146 ms (14.641 ms / 100) GOOD 14.528 -> 8.487 (-41.58%) [ +0.12% +0.02% +0.00% / -41.58% -41.56% -41.58%] index_fill_ strided 3 : Elapsed 0.145 ms (14.545 ms / 100) GOOD 14.510 -> 8.473 (-41.61%) [ +0.28% +0.00% +0.28% / -41.61% -41.59% -41.59%] index_fill_ strided 5 : Elapsed 0.146 ms (14.551 ms / 100) GOOD 14.455 -> 8.459 (-41.48%) [ +0.13% +0.24% +0.00% / -41.48% -41.45% -41.44%] index_fill_ strided 7 : Elapsed 0.145 ms (14.474 ms / 100) GOOD 14.497 -> 8.455 (-41.68%) [ +0.03% +0.00% +0.08% / -41.62% -41.68% -41.64%] index_fill_ strided 8 : Elapsed 0.145 ms (14.502 ms / 100) GOOD 14.488 -> 8.458 (-41.62%) [ +0.32% +0.00% +0.06% / -41.58% -41.62% -41.61%] index_fill_ random : Elapsed 0.145 ms (14.535 ms / 100) GOOD 14.640 -> 8.436 (-42.38%) [ +0.27% +0.00% +0.14% / -42.38% -42.28% -42.30%] index_fill_ random_sorted : Elapsed 0.147 ms (14.679 ms / 100) B = [15, 1000] (stride (1, 15)) A = [2048, 1000] (stride (1000, 1)) dim = 0 1.128 -> 1.136 ( +0.71%) [ +1.15% +0.27% +0.00% / +0.89% +0.98% +0.71%] index_select const : Elapsed 0.011 ms (1.141 ms / 100) 1.127 -> 1.138 ( +0.98%) [ +0.89% +0.09% +0.00% / +0.98% +1.51% +1.42%] index_select wrap : Elapsed 0.011 ms (1.137 ms / 100) 1.129 -> 1.139 ( +0.89%) [ +0.97% +0.00% +0.00% / +0.89% +1.24% +1.42%] index_select linear : Elapsed 0.011 ms (1.140 ms / 100) 1.133 -> 1.143 ( +0.88%) [ +0.97% +0.18% +0.00% / +0.88% +1.59% +1.41%] index_select reverse : Elapsed 0.011 ms (1.144 ms / 100) 1.126 -> 1.134 ( +0.71%) [ +0.89% +0.00% +0.00% / +0.80% +0.71% +0.71%] index_select skip64 : Elapsed 0.011 ms (1.136 ms / 100) 1.130 -> 1.140 ( +0.88%) [ +1.42% +0.18% +0.00% / +1.06% +0.88% +0.88%] index_select skip256 : Elapsed 0.011 ms (1.146 ms / 100) 1.128 -> 1.138 ( +0.89%) [ +0.80% +0.00% +0.00% / +0.89% +1.60% +1.51%] index_select spread : Elapsed 0.011 ms (1.137 ms / 100) 1.134 -> 1.145 ( +0.97%) [ +1.06% +0.00% +0.00% / +0.97% +1.41% +1.59%] index_select strided 3 : Elapsed 0.011 ms (1.146 ms / 100) 1.130 -> 1.143 ( +1.15%) [ +1.15% +0.35% +0.00% / +1.15% +1.50% +1.33%] index_select strided 5 : Elapsed 0.011 ms (1.143 ms / 100) 1.132 -> 1.143 ( +0.97%) [ +1.15% +0.00% +0.18% / +0.97% +1.33% +1.24%] index_select strided 7 : Elapsed 0.011 ms (1.145 ms / 100) 1.123 -> 1.134 ( +0.98%) [ +0.98% +0.00% +0.18% / +0.98% +1.42% +1.42%] index_select strided 8 : Elapsed 0.011 ms (1.134 ms / 100) 1.122 -> 1.133 ( +0.98%) [ +0.98% +0.18% +0.00% / +0.98% +1.34% +1.34%] index_select strided 16 : Elapsed 0.011 ms (1.133 ms / 100) 1.122 -> 1.134 ( +1.07%) [ +1.16% +0.18% +0.00% / +1.07% +1.69% +1.69%] index_select strided 64 : Elapsed 0.011 ms (1.135 ms / 100) 1.123 -> 1.134 ( +0.98%) [ +1.07% +0.00% +0.09% / +0.98% +1.78% +1.51%] index_select strided 100 : Elapsed 0.011 ms (1.135 ms / 100) 1.130 -> 1.139 ( +0.80%) [ +0.97% +0.00% +0.00% / +0.80% +1.59% +1.33%] index_select strided 255 : Elapsed 0.011 ms (1.141 ms / 100) 1.123 -> 1.133 ( +0.89%) [ +1.07% +0.00% +0.00% / +0.89% +1.16% +1.16%] index_select strided 256 : Elapsed 0.011 ms (1.135 ms / 100) 1.129 -> 1.137 ( +0.71%) [ +0.97% +0.00% +0.00% / +1.15% +0.71% +0.80%] index_select strided 257 : Elapsed 0.011 ms (1.140 ms / 100) 1.133 -> 1.143 ( +0.88%) [ +0.97% +0.00% +0.00% / +0.88% +1.41% +1.41%] index_select random : Elapsed 0.011 ms (1.144 ms / 100) 1.131 -> 1.141 ( +0.88%) [ +0.97% +0.18% +0.00% / +0.88% +1.06% +1.24%] index_select random_sorted : Elapsed 0.011 ms (1.142 ms / 100) 1.127 -> 1.140 ( +1.15%) [ +0.98% +0.00% +0.18% / +1.15% +1.24% +1.24%] index_select perm : Elapsed 0.011 ms (1.138 ms / 100) 1.132 -> 1.144 ( +1.06%) [ +1.33% +0.00% +0.09% / +1.06% +1.15% +1.24%] index_select perm_sorted : Elapsed 0.011 ms (1.147 ms / 100) B = [15, 1000] (stride (1, 15)) A = [2048, 1000] (stride (1, 2048)) dim = 0 1.184 -> 1.193 ( +0.76%) [ +0.93% +0.00% +0.25% / +0.76% +1.44% +1.27%] index_select const : Elapsed 0.012 ms (1.195 ms / 100) 1.187 -> 1.196 ( +0.76%) [ +0.67% +0.00% +0.17% / +0.76% +1.26% +1.26%] index_select wrap : Elapsed 0.012 ms (1.195 ms / 100) 1.185 -> 1.195 ( +0.84%) [ +0.84% +0.17% +0.00% / +0.84% +1.43% +1.27%] index_select linear : Elapsed 0.012 ms (1.195 ms / 100) 1.182 -> 1.192 ( +0.85%) [ +0.76% +0.17% +0.00% / +0.85% +1.95% +1.69%] index_select reverse : Elapsed 0.012 ms (1.191 ms / 100) 1.184 -> 1.195 ( +0.93%) [ +1.01% +0.00% +0.00% / +0.93% +1.69% +1.77%] index_select skip64 : Elapsed 0.012 ms (1.196 ms / 100) 1.185 -> 1.193 ( +0.68%) [ +1.52% +0.17% +0.00% / +0.68% +1.60% +1.27%] index_select skip256 : Elapsed 0.012 ms (1.203 ms / 100) 1.793 -> 1.814 ( +1.17%) [ +1.12% +0.39% +0.00% / +1.17% +1.28% +1.51%] index_select spread : Elapsed 0.018 ms (1.813 ms / 100) 1.187 -> 1.196 ( +0.76%) [ +0.93% +0.00% +0.17% / +0.76% +1.26% +1.26%] index_select strided 3 : Elapsed 0.012 ms (1.198 ms / 100) 1.188 -> 1.197 ( +0.76%) [ +0.76% +0.00% +0.08% / +0.76% +1.26% +1.35%] index_select strided 5 : Elapsed 0.012 ms (1.197 ms / 100) 1.190 -> 1.199 ( +0.76%) [ +0.76% +0.00% +0.17% / +0.76% +1.34% +1.18%] index_select strided 7 : Elapsed 0.012 ms (1.199 ms / 100) 1.189 -> 1.198 ( +0.76%) [ +1.09% +0.00% +0.08% / +0.76% +1.35% +1.35%] index_select strided 8 : Elapsed 0.012 ms (1.202 ms / 100) 1.190 -> 1.203 ( +1.09%) [ +1.01% +0.17% +0.00% / +1.09% +1.43% +1.43%] index_select strided 16 : Elapsed 0.012 ms (1.202 ms / 100) 1.793 -> 1.801 ( +0.45%) [ +0.73% +0.45% +0.00% / +0.45% +1.12% +1.17%] index_select strided 64 : Elapsed 0.018 ms (1.806 ms / 100) good 1.776 -> 1.685 ( -5.12%) [ +0.73% +0.34% +0.00% / +0.90% -5.01% -5.12%] index_select strided 100 : Elapsed 0.018 ms (1.789 ms / 100) 1.226 -> 1.236 ( +0.82%) [ +0.98% +0.33% +0.00% / +0.82% +4.00% +3.67%] index_select strided 255 : Elapsed 0.012 ms (1.238 ms / 100) 1.190 -> 1.199 ( +0.76%) [ +0.76% +0.00% +0.00% / +0.76% +1.01% +0.92%] index_select strided 256 : Elapsed 0.012 ms (1.199 ms / 100) 1.192 -> 1.203 ( +0.92%) [ +0.92% +0.00% +0.08% / +1.01% +0.92% +0.92%] index_select strided 257 : Elapsed 0.012 ms (1.203 ms / 100) 1.734 -> 1.729 ( -0.29%) [ +0.58% +0.29% +0.00% / +0.92% -0.23% -0.29%] index_select random : Elapsed 0.017 ms (1.744 ms / 100) 1.771 -> 1.760 ( -0.62%) [ +1.07% +0.00% +0.06% / +1.13% -0.62% +0.28%] index_select random_sorted : Elapsed 0.018 ms (1.790 ms / 100) 1.789 -> 1.760 ( -1.62%) [ +1.12% +0.00% +0.11% / +1.12% -1.62% -1.17%] index_select perm : Elapsed 0.018 ms (1.809 ms / 100) 1.798 -> 1.774 ( -1.33%) [ +0.67% +0.06% +0.00% / +0.61% -1.33% -0.89%] index_select perm_sorted : Elapsed 0.018 ms (1.810 ms / 100) out_shape = [2048, 15] in_shape = [2048, 1000] idx_dim = 1 B = [2048, 15] (stride (15, 1)) dim = 1 fill_cnt = 1000 GOOD 14.556 -> 7.826 (-46.24%) [ +0.34% +0.34% +0.00% / -46.10% -46.24% -46.20%] index_fill_ const : Elapsed 0.146 ms (14.605 ms / 100) GOOD 14.760 -> 7.822 (-47.01%) [ +0.09% +0.00% +0.12% / -47.01% -46.92% -46.93%] index_fill_ linear : Elapsed 0.148 ms (14.773 ms / 100) GOOD 14.492 -> 7.829 (-45.98%) [ +0.44% +0.35% +0.00% / -45.87% -45.98% -45.96%] index_fill_ reverse : Elapsed 0.146 ms (14.556 ms / 100) GOOD 14.724 -> 7.830 (-46.82%) [ +0.20% +0.00% +0.14% / -46.82% -46.80% -46.77%] index_fill_ skip64 : Elapsed 0.148 ms (14.753 ms / 100) GOOD 14.776 -> 7.841 (-46.93%) [ +0.00% +0.16% +0.05% / -46.93% -46.93% -46.89%] index_fill_ skip256 : Elapsed 0.148 ms (14.776 ms / 100) GOOD 14.732 -> 7.822 (-46.90%) [ +0.00% +0.22% +0.43% / -46.80% -46.88% -46.90%] index_fill_ spread : Elapsed 0.147 ms (14.732 ms / 100) GOOD 14.803 -> 7.893 (-46.68%) [ +0.22% +0.17% +0.00% / -46.68% -46.63% -46.63%] index_fill_ strided 3 : Elapsed 0.148 ms (14.835 ms / 100) GOOD 14.774 -> 7.890 (-46.60%) [ +0.30% +0.06% +0.00% / -46.60% -46.58% -46.58%] index_fill_ strided 5 : Elapsed 0.148 ms (14.818 ms / 100) GOOD 14.546 -> 7.843 (-46.08%) [ +0.09% +0.00% +0.10% / -46.04% -46.04% -46.08%] index_fill_ strided 7 : Elapsed 0.146 ms (14.559 ms / 100) GOOD 14.695 -> 7.846 (-46.61%) [ +0.10% +0.05% +0.00% / -46.61% -46.55% -46.53%] index_fill_ strided 8 : Elapsed 0.147 ms (14.709 ms / 100) GOOD 14.542 -> 7.849 (-46.03%) [ +0.12% +0.00% +0.07% / -45.99% -46.03% -46.00%] index_fill_ random : Elapsed 0.146 ms (14.559 ms / 100) GOOD 14.891 -> 7.823 (-47.46%) [ +0.21% +0.00% +0.11% / -47.46% -47.44% -47.40%] index_fill_ random_sorted : Elapsed 0.149 ms (14.923 ms / 100) B = [2048, 15] (stride (15, 1)) A = [2048, 1000] (stride (1000, 1)) dim = 1 1.273 -> 1.284 ( +0.86%) [ +0.86% +0.39% +0.00% / +0.86% +1.96% +1.81%] index_select const : Elapsed 0.013 ms (1.284 ms / 100) 1.286 -> 1.296 ( +0.78%) [ +0.93% +0.31% +0.00% / +0.78% +2.64% +2.41%] index_select wrap : Elapsed 0.013 ms (1.298 ms / 100) 1.288 -> 1.298 ( +0.78%) [ +1.09% +0.23% +0.00% / +0.78% +2.33% +2.48%] index_select linear : Elapsed 0.013 ms (1.302 ms / 100) 1.281 -> 1.291 ( +0.78%) [ +0.86% +0.08% +0.00% / +0.78% +2.89% +2.81%] index_select reverse : Elapsed 0.013 ms (1.292 ms / 100) 1.273 -> 1.284 ( +0.86%) [ +0.86% +0.08% +0.00% / +0.86% +2.28% +2.36%] index_select skip64 : Elapsed 0.013 ms (1.284 ms / 100) 1.276 -> 1.288 ( +0.94%) [ +0.94% +0.39% +0.00% / +0.94% +2.43% +2.43%] index_select skip256 : Elapsed 0.013 ms (1.288 ms / 100) 2.780 -> 2.790 ( +0.36%) [ +0.32% +0.18% +0.00% / +0.36% +1.51% +1.44%] index_select spread : Elapsed 0.028 ms (2.789 ms / 100) 1.404 -> 1.417 ( +0.93%) [ +0.85% +0.07% +0.00% / +0.93% +2.92% +2.85%] index_select strided 3 : Elapsed 0.014 ms (1.416 ms / 100) 1.545 -> 1.564 ( +1.23%) [ +1.17% +0.58% +0.00% / +1.23% +5.89% +6.08%] index_select strided 5 : Elapsed 0.016 ms (1.563 ms / 100) 1.874 -> 1.891 ( +0.91%) [ +0.59% +0.16% +0.00% / +0.91% +6.51% +6.56%] index_select strided 7 : Elapsed 0.019 ms (1.885 ms / 100) 2.161 -> 2.174 ( +0.60%) [ +0.56% +0.23% +0.00% / +0.60% +2.78% +3.19%] index_select strided 8 : Elapsed 0.022 ms (2.173 ms / 100) 2.736 -> 2.733 ( -0.11%) [ +0.48% +0.07% +0.00% / +0.48% +0.00% -0.11%] index_select strided 16 : Elapsed 0.027 ms (2.749 ms / 100) 2.778 -> 2.794 ( +0.58%) [ +0.50% +0.22% +0.00% / +0.58% +2.09% +2.23%] index_select strided 64 : Elapsed 0.028 ms (2.792 ms / 100) 2.640 -> 2.630 ( -0.38%) [ +0.30% +0.00% +0.08% / +0.23% -0.30% -0.38%] index_select strided 100 : Elapsed 0.026 ms (2.648 ms / 100) 2.770 -> 2.764 ( -0.22%) [ +0.51% +0.00% +0.14% / +0.32% +0.07% -0.22%] index_select strided 255 : Elapsed 0.028 ms (2.784 ms / 100) 2.776 -> 2.790 ( +0.50%) [ +0.32% +0.18% +0.00% / +0.50% +0.54% +0.58%] index_select strided 256 : Elapsed 0.028 ms (2.785 ms / 100) 2.777 -> 2.784 ( +0.25%) [ +0.22% +0.07% +0.00% / +0.29% +0.25% +0.32%] index_select strided 257 : Elapsed 0.028 ms (2.783 ms / 100) 2.735 -> 2.741 ( +0.22%) [ +0.44% +0.00% +0.15% / +0.37% +0.26% +0.22%] index_select random : Elapsed 0.027 ms (2.747 ms / 100) 2.703 -> 2.711 ( +0.30%) [ +0.37% +0.11% +0.00% / +0.30% +0.33% +0.37%] index_select random_sorted : Elapsed 0.027 ms (2.713 ms / 100) 2.805 -> 2.796 ( -0.32%) [ +0.29% +0.00% +0.04% / +0.25% -0.14% -0.32%] index_select perm : Elapsed 0.028 ms (2.813 ms / 100) 2.785 -> 2.793 ( +0.29%) [ +0.18% +0.00% +0.22% / +0.29% +2.55% +2.84%] index_select perm_sorted : Elapsed 0.028 ms (2.790 ms / 100) B = [2048, 15] (stride (15, 1)) A = [2048, 1000] (stride (1, 2048)) dim = 1 1.150 -> 1.156 ( +0.52%) [ +1.13% +0.17% +0.00% / +0.87% +0.70% +0.52%] index_select const : Elapsed 0.012 ms (1.163 ms / 100) 1.148 -> 1.149 ( +0.09%) [ +1.13% +0.26% +0.00% / +1.13% +0.09% +0.26%] index_select wrap : Elapsed 0.012 ms (1.161 ms / 100) 1.150 -> 1.148 ( -0.17%) [ +0.87% +0.09% +0.00% / +0.78% +0.09% -0.17%] index_select linear : Elapsed 0.012 ms (1.160 ms / 100) 1.147 -> 1.152 ( +0.44%) [ +0.87% +0.00% +0.00% / +0.78% +0.52% +0.44%] index_select reverse : Elapsed 0.012 ms (1.157 ms / 100) 1.153 -> 1.159 ( +0.52%) [ +0.87% +0.00% +0.17% / +0.87% +0.52% +0.61%] index_select skip64 : Elapsed 0.012 ms (1.163 ms / 100) 1.155 -> 1.162 ( +0.61%) [ +0.95% +0.00% +0.17% / +0.78% +0.61% +0.78%] index_select skip256 : Elapsed 0.012 ms (1.166 ms / 100) 1.148 -> 1.155 ( +0.61%) [ +1.05% +0.26% +0.00% / +0.78% +0.61% +0.61%] index_select spread : Elapsed 0.012 ms (1.160 ms / 100) 1.149 -> 1.146 ( -0.26%) [ +1.13% +0.26% +0.00% / +0.96% +0.00% -0.26%] index_select strided 3 : Elapsed 0.012 ms (1.162 ms / 100) 1.149 -> 1.148 ( -0.09%) [ +0.96% +0.00% +0.17% / +0.87% -0.09% -0.09%] index_select strided 5 : Elapsed 0.012 ms (1.160 ms / 100) 1.148 -> 1.153 ( +0.44%) [ +0.78% +0.00% +0.00% / +0.87% +0.52% +0.44%] index_select strided 7 : Elapsed 0.012 ms (1.157 ms / 100) 1.148 -> 1.152 ( +0.35%) [ +0.96% +0.00% +0.09% / +0.87% +0.35% +0.35%] index_select strided 8 : Elapsed 0.012 ms (1.159 ms / 100) 1.146 -> 1.153 ( +0.61%) [ +0.79% +0.09% +0.00% / +0.79% +0.70% +0.61%] index_select strided 16 : Elapsed 0.012 ms (1.155 ms / 100) 1.146 -> 1.152 ( +0.52%) [ +0.70% +0.00% +0.09% / +0.79% +0.70% +0.52%] index_select strided 64 : Elapsed 0.012 ms (1.154 ms / 100) 1.149 -> 1.146 ( -0.26%) [ +0.87% +0.44% +0.00% / +0.78% -0.26% -0.17%] index_select strided 100 : Elapsed 0.012 ms (1.159 ms / 100) 1.150 -> 1.147 ( -0.26%) [ +0.87% +0.09% +0.00% / +0.78% -0.17% -0.26%] index_select strided 255 : Elapsed 0.012 ms (1.160 ms / 100) 1.149 -> 1.152 ( +0.26%) [ +1.04% +0.00% +0.09% / +0.70% +0.26% +0.44%] index_select strided 256 : Elapsed 0.012 ms (1.161 ms / 100) 1.148 -> 1.154 ( +0.52%) [ +0.87% +0.09% +0.00% / +0.70% +0.52% +0.61%] index_select strided 257 : Elapsed 0.012 ms (1.158 ms / 100) 1.146 -> 1.155 ( +0.79%) [ +0.87% +0.17% +0.00% / +0.79% +0.96% +0.79%] index_select random : Elapsed 0.012 ms (1.156 ms / 100) 1.146 -> 1.153 ( +0.61%) [ +0.70% +0.00% +0.00% / +0.87% +0.70% +0.61%] index_select random_sorted : Elapsed 0.012 ms (1.154 ms / 100) 1.148 -> 1.150 ( +0.17%) [ +0.87% +0.00% +0.00% / +0.87% +0.35% +0.17%] index_select perm : Elapsed 0.012 ms (1.158 ms / 100) 1.149 -> 1.153 ( +0.35%) [ +0.87% +0.00% +0.09% / +0.78% +0.35% +0.35%] index_select perm_sorted : Elapsed 0.012 ms (1.159 ms / 100) B = [2048, 15] (stride (1, 2048)) dim = 1 fill_cnt = 1000 8.020 -> 8.064 ( +0.55%) [ +0.65% +0.09% +0.00% / +0.55% +0.76% +0.77%] index_fill_ const : Elapsed 0.081 ms (8.072 ms / 100) 8.065 -> 8.134 ( +0.86%) [ +0.55% +0.11% +0.00% / +0.86% +1.38% +1.57%] index_fill_ linear : Elapsed 0.081 ms (8.109 ms / 100) 8.052 -> 8.082 ( +0.37%) [ +0.89% +0.00% +0.01% / +0.52% +0.37% +0.60%] index_fill_ reverse : Elapsed 0.081 ms (8.124 ms / 100) 8.019 -> 8.115 ( +1.20%) [ +1.00% +0.00% +0.25% / +1.20% +1.43% +1.31%] index_fill_ skip64 : Elapsed 0.081 ms (8.099 ms / 100) 7.949 -> 8.029 ( +1.01%) [ +0.92% +0.15% +0.00% / +1.01% +1.99% +2.03%] index_fill_ skip256 : Elapsed 0.080 ms (8.022 ms / 100) 8.066 -> 8.120 ( +0.67%) [ +0.88% +0.05% +0.00% / +0.67% +0.72% +0.86%] index_fill_ spread : Elapsed 0.081 ms (8.137 ms / 100) 7.954 -> 8.028 ( +0.93%) [ +0.99% +0.16% +0.00% / +0.93% +1.62% +1.82%] index_fill_ strided 3 : Elapsed 0.080 ms (8.033 ms / 100) 8.035 -> 8.085 ( +0.62%) [ +0.76% +0.31% +0.00% / +0.62% +1.06% +1.27%] index_fill_ strided 5 : Elapsed 0.081 ms (8.096 ms / 100) 8.015 -> 8.046 ( +0.39%) [ +0.92% +0.15% +0.00% / +0.71% +0.39% +0.70%] index_fill_ strided 7 : Elapsed 0.081 ms (8.089 ms / 100) 8.023 -> 8.091 ( +0.85%) [ +0.65% +0.00% +0.20% / +1.37% +0.85% +0.85%] index_fill_ strided 8 : Elapsed 0.081 ms (8.075 ms / 100) 8.030 -> 8.051 ( +0.26%) [ +0.60% +0.01% +0.00% / +0.81% +0.31% +0.26%] index_fill_ random : Elapsed 0.081 ms (8.078 ms / 100) 8.034 -> 8.101 ( +0.83%) [ +0.66% +0.21% +0.00% / +1.28% +0.83% +1.01%] index_fill_ random_sorted : Elapsed 0.081 ms (8.087 ms / 100) B = [2048, 15] (stride (1, 2048)) A = [2048, 1000] (stride (1000, 1)) dim = 1 1.245 -> 1.258 ( +1.04%) [ +0.72% +0.24% +0.00% / +1.04% +2.41% +2.41%] index_select const : Elapsed 0.013 ms (1.254 ms / 100) 1.263 -> 1.277 ( +1.11%) [ +0.79% +0.00% +0.24% / +1.11% +2.93% +2.77%] index_select wrap : Elapsed 0.013 ms (1.273 ms / 100) 1.268 -> 1.273 ( +0.39%) [ +0.79% +0.00% +0.08% / +0.39% +2.52% +2.68%] index_select linear : Elapsed 0.013 ms (1.278 ms / 100) 1.253 -> 1.264 ( +0.88%) [ +0.96% +0.00% +0.08% / +0.88% +3.67% +4.15%] index_select reverse : Elapsed 0.013 ms (1.265 ms / 100) 1.245 -> 1.256 ( +0.88%) [ +0.88% +0.24% +0.00% / +0.88% +3.13% +2.97%] index_select skip64 : Elapsed 0.013 ms (1.256 ms / 100) 1.248 -> 1.260 ( +0.96%) [ +0.88% +0.24% +0.00% / +0.96% +3.12% +3.12%] index_select skip256 : Elapsed 0.013 ms (1.259 ms / 100) 2.796 -> 2.806 ( +0.36%) [ +0.25% +0.04% +0.00% / +0.36% +0.82% +0.61%] index_select spread : Elapsed 0.028 ms (2.803 ms / 100) 1.384 -> 1.395 ( +0.79%) [ +0.87% +0.22% +0.00% / +0.79% +2.96% +3.03%] index_select strided 3 : Elapsed 0.014 ms (1.396 ms / 100) 1.545 -> 1.559 ( +0.91%) [ +0.91% +0.13% +0.00% / +0.91% +6.02% +5.83%] index_select strided 5 : Elapsed 0.016 ms (1.559 ms / 100) 1.895 -> 1.908 ( +0.69%) [ +0.74% +0.00% +0.21% / +0.69% +5.91% +6.33%] index_select strided 7 : Elapsed 0.019 ms (1.909 ms / 100) 2.176 -> 2.189 ( +0.60%) [ +0.51% +0.18% +0.00% / +0.60% +2.44% +2.44%] index_select strided 8 : Elapsed 0.022 ms (2.187 ms / 100) 2.744 -> 2.717 ( -0.98%) [ +0.18% +0.04% +0.00% / +0.26% -0.91% -0.98%] index_select strided 16 : Elapsed 0.027 ms (2.749 ms / 100) 2.786 -> 2.798 ( +0.43%) [ +0.50% +0.00% +0.22% / +0.43% +1.26% +1.33%] index_select strided 64 : Elapsed 0.028 ms (2.800 ms / 100) 2.625 -> 2.619 ( -0.23%) [ +0.53% +0.00% +0.08% / +0.19% +0.15% -0.23%] index_select strided 100 : Elapsed 0.026 ms (2.639 ms / 100) 2.766 -> 2.753 ( -0.47%) [ +0.80% +0.00% +0.72% / +0.36% -0.33% -0.47%] index_select strided 255 : Elapsed 0.028 ms (2.788 ms / 100) 2.787 -> 2.778 ( -0.32%) [ +0.43% +0.22% +0.00% / +0.47% -0.32% -0.11%] index_select strided 256 : Elapsed 0.028 ms (2.799 ms / 100) 2.790 -> 2.772 ( -0.65%) [ +0.29% +0.07% +0.00% / +0.32% -0.65% -0.57%] index_select strided 257 : Elapsed 0.028 ms (2.798 ms / 100) 2.789 -> 2.782 ( -0.25%) [ +0.07% +0.04% +0.00% / +0.14% -0.25% -0.18%] index_select random : Elapsed 0.028 ms (2.791 ms / 100) 2.814 -> 2.813 ( -0.04%) [ +0.07% +0.07% +0.00% / -0.04% +0.28% +0.11%] index_select random_sorted : Elapsed 0.028 ms (2.816 ms / 100) 2.790 -> 2.774 ( -0.57%) [ +0.18% +0.14% +0.00% / -0.07% -0.57% -0.54%] index_select perm : Elapsed 0.028 ms (2.795 ms / 100) 2.807 -> 2.809 ( +0.07%) [ +0.32% +0.00% +0.25% / +0.43% +0.07% +0.39%] index_select perm_sorted : Elapsed 0.028 ms (2.816 ms / 100) B = [2048, 15] (stride (1, 2048)) A = [2048, 1000] (stride (1, 2048)) dim = 1 1.068 -> 1.075 ( +0.66%) [ +1.12% +0.00% +0.00% / +0.75% +0.75% +0.66%] index_select const : Elapsed 0.011 ms (1.080 ms / 100) 1.065 -> 1.067 ( +0.19%) [ +1.13% +0.19% +0.00% / +1.03% +0.19% +0.28%] index_select wrap : Elapsed 0.011 ms (1.077 ms / 100) 1.066 -> 1.066 ( +0.00%) [ +1.03% +0.09% +0.00% / +0.84% +0.00% +0.09%] index_select linear : Elapsed 0.011 ms (1.077 ms / 100) 1.064 -> 1.068 ( +0.38%) [ +1.03% +0.09% +0.00% / +0.75% +0.38% +0.47%] index_select reverse : Elapsed 0.011 ms (1.075 ms / 100) 1.073 -> 1.077 ( +0.37%) [ +0.84% +0.00% +0.00% / +0.75% +0.37% +0.37%] index_select skip64 : Elapsed 0.011 ms (1.082 ms / 100) 1.075 -> 1.080 ( +0.47%) [ +0.84% +0.09% +0.00% / +0.74% +0.56% +0.47%] index_select skip256 : Elapsed 0.011 ms (1.084 ms / 100) 1.065 -> 1.069 ( +0.38%) [ +1.03% +0.09% +0.00% / +0.85% +0.38% +0.47%] index_select spread : Elapsed 0.011 ms (1.076 ms / 100) 1.069 -> 1.065 ( -0.37%) [ +0.94% +0.09% +0.00% / +0.94% -0.37% -0.37%] index_select strided 3 : Elapsed 0.011 ms (1.079 ms / 100) 1.068 -> 1.064 ( -0.37%) [ +0.94% +0.00% +0.19% / +0.84% -0.37% -0.37%] index_select strided 5 : Elapsed 0.011 ms (1.078 ms / 100) 1.066 -> 1.071 ( +0.47%) [ +0.84% +0.00% +0.00% / +0.84% +0.47% +0.47%] index_select strided 7 : Elapsed 0.011 ms (1.075 ms / 100) 1.066 -> 1.068 ( +0.19%) [ +0.94% +0.00% +0.00% / +0.75% +0.19% +0.38%] index_select strided 8 : Elapsed 0.011 ms (1.076 ms / 100) 1.062 -> 1.071 ( +0.85%) [ +1.04% +0.09% +0.00% / +0.94% +0.85% +0.94%] index_select strided 16 : Elapsed 0.011 ms (1.073 ms / 100) 1.063 -> 1.072 ( +0.85%) [ +1.03% +0.00% +0.00% / +0.94% +0.85% +0.85%] index_select strided 64 : Elapsed 0.011 ms (1.074 ms / 100) 1.069 -> 1.063 ( -0.56%) [ +0.94% +0.00% +0.09% / +0.84% -0.47% -0.56%] index_select strided 100 : Elapsed 0.011 ms (1.079 ms / 100) 1.068 -> 1.064 ( -0.37%) [ +0.94% +0.19% +0.00% / +0.84% -0.28% -0.37%] index_select strided 255 : Elapsed 0.011 ms (1.078 ms / 100) 1.065 -> 1.069 ( +0.38%) [ +0.75% +0.00% +0.00% / +0.85% +0.38% +0.38%] index_select strided 256 : Elapsed 0.011 ms (1.073 ms / 100) 1.064 -> 1.069 ( +0.47%) [ +1.03% +0.19% +0.00% / +0.85% +0.75% +0.47%] index_select strided 257 : Elapsed 0.011 ms (1.075 ms / 100) 1.062 -> 1.072 ( +0.94%) [ +1.60% +0.09% +0.00% / +0.94% +0.94% +1.51%] index_select random : Elapsed 0.011 ms (1.079 ms / 100) 1.063 -> 1.072 ( +0.85%) [ +0.94% +0.00% +0.09% / +0.85% +0.85% +0.85%] index_select random_sorted : Elapsed 0.011 ms (1.073 ms / 100) 1.066 -> 1.068 ( +0.19%) [ +0.75% +0.09% +0.00% / +0.84% +0.28% +0.19%] index_select perm : Elapsed 0.011 ms (1.074 ms / 100) 1.067 -> 1.069 ( +0.19%) [ +0.84% +0.00% +0.00% / +0.75% +0.28% +0.19%] index_select perm_sorted : Elapsed 0.011 ms (1.076 ms / 100) out_shape = [1000, 2048] in_shape = [15, 2048] idx_dim = 0 B = [1000, 2048] (stride (2048, 1)) dim = 0 fill_cnt = 15 0.837 -> 0.844 ( +0.84%) [ +1.67% +0.00% +27.24% / +0.84% +2.75% +12.07%] index_fill_ const : Elapsed 0.009 ms (0.851 ms / 100) 0.839 -> 0.846 ( +0.83%) [ +1.43% +0.00% +0.24% / +0.83% +11.68% +9.77%] index_fill_ linear : Elapsed 0.009 ms (0.851 ms / 100) 0.839 -> 0.847 ( +0.95%) [ +0.95% +0.00% +0.12% / +0.95% +1.43% +4.29%] index_fill_ reverse : Elapsed 0.008 ms (0.847 ms / 100) 0.838 -> 0.845 ( +0.84%) [ +1.19% +0.12% +0.00% / +0.84% +1.31% +1.55%] index_fill_ skip64 : Elapsed 0.008 ms (0.848 ms / 100) 0.838 -> 0.846 ( +0.95%) [ +1.19% +0.12% +0.00% / +0.95% +1.43% +1.07%] index_fill_ skip256 : Elapsed 0.008 ms (0.848 ms / 100) 0.842 -> 0.848 ( +0.71%) [ +1.31% +2.73% +0.00% / +0.83% +0.71% +0.83%] index_fill_ spread : Elapsed 0.009 ms (0.853 ms / 100) 0.841 -> 0.848 ( +0.83%) [ +1.19% +0.24% +0.00% / +1.07% +0.83% +3.69%] index_fill_ strided 3 : Elapsed 0.009 ms (0.851 ms / 100) 0.837 -> 0.844 ( +0.84%) [ +0.96% +0.12% +0.00% / +0.84% +1.67% +1.31%] index_fill_ strided 5 : Elapsed 0.008 ms (0.845 ms / 100) 0.837 -> 0.844 ( +0.84%) [ +0.96% +0.24% +0.00% / +0.84% +1.67% +1.43%] index_fill_ strided 7 : Elapsed 0.008 ms (0.845 ms / 100) 0.837 -> 0.845 ( +0.96%) [ +1.79% +0.12% +0.00% / +0.96% +1.55% +1.55%] index_fill_ strided 8 : Elapsed 0.009 ms (0.852 ms / 100) 0.837 -> 0.844 ( +0.84%) [ +0.96% +0.36% +0.00% / +0.84% +1.67% +1.55%] index_fill_ strided 16 : Elapsed 0.008 ms (0.845 ms / 100) 0.838 -> 0.849 ( +1.31%) [ +1.07% +0.12% +0.00% / +1.31% +1.31% +1.31%] index_fill_ strided 64 : Elapsed 0.008 ms (0.847 ms / 100) 0.837 -> 0.846 ( +1.08%) [ +1.31% +0.24% +0.00% / +1.08% +1.55% +1.31%] index_fill_ strided 100 : Elapsed 0.008 ms (0.848 ms / 100) 0.839 -> 0.848 ( +1.07%) [ +1.07% +0.36% +0.00% / +1.31% +1.31% +1.07%] index_fill_ strided 255 : Elapsed 0.008 ms (0.848 ms / 100) 0.839 -> 0.847 ( +0.95%) [ +1.91% +0.24% +0.00% / +0.95% +1.07% +0.95%] index_fill_ strided 256 : Elapsed 0.009 ms (0.855 ms / 100) 0.843 -> 0.844 ( +0.12%) [ +0.95% +0.00% +0.12% / +0.83% +0.12% +0.24%] index_fill_ strided 257 : Elapsed 0.009 ms (0.851 ms / 100) 0.842 -> 0.845 ( +0.36%) [ +1.07% +0.48% +0.00% / +0.95% +0.48% +0.36%] index_fill_ random : Elapsed 0.009 ms (0.851 ms / 100) 0.841 -> 0.846 ( +0.59%) [ +1.07% +0.00% +0.00% / +1.19% +0.59% +0.59%] index_fill_ random_sorted : Elapsed 0.008 ms (0.850 ms / 100) 0.842 -> 0.847 ( +0.59%) [ +0.95% +0.12% +0.00% / +0.71% +0.59% +0.59%] index_fill_ perm : Elapsed 0.009 ms (0.850 ms / 100) 0.841 -> 0.849 ( +0.95%) [ +1.31% +0.24% +0.00% / +0.95% +0.95% +0.95%] index_fill_ perm_sorted : Elapsed 0.009 ms (0.852 ms / 100) B = [1000, 2048] (stride (2048, 1)) A = [15, 2048] (stride (2048, 1)) dim = 0 1.116 -> 1.124 ( +0.72%) [ +0.90% +0.36% +0.00% / +0.72% +0.81% +0.72%] index_add_ linear : Elapsed 0.011 ms (1.126 ms / 100) 1.057 -> 1.066 ( +0.85%) [ +1.04% +0.19% +0.00% / +0.85% +0.85% +0.85%] index_copy_ linear : Elapsed 0.011 ms (1.068 ms / 100) 1.123 -> 1.121 ( -0.18%) [ +2.23% +0.09% +0.00% / +0.53% -0.18% +0.09%] index_add_ reverse : Elapsed 0.011 ms (1.148 ms / 100) 1.062 -> 1.061 ( -0.09%) [ +0.94% +0.09% +0.00% / +0.85% -0.09% -0.09%] index_copy_ reverse : Elapsed 0.011 ms (1.072 ms / 100) 1.120 -> 1.121 ( +0.09%) [ +0.89% +0.45% +0.00% / +0.71% +0.09% +0.18%] index_add_ spread : Elapsed 0.011 ms (1.130 ms / 100) 1.063 -> 1.062 ( -0.09%) [ +0.94% +0.00% +11.01% / +1.03% -0.09% +0.00%] index_copy_ spread : Elapsed 0.011 ms (1.073 ms / 100) 1.122 -> 1.122 ( +0.00%) [ +2.32% +0.00% +0.09% / +0.36% +0.09% +0.00%] index_add_ strided 3 : Elapsed 0.011 ms (1.148 ms / 100) 1.059 -> 1.062 ( +0.28%) [ +0.94% +0.09% +0.00% / +0.85% +0.28% +0.38%] index_copy_ strided 3 : Elapsed 0.011 ms (1.069 ms / 100) 1.120 -> 1.121 ( +0.09%) [ +0.54% +0.00% +0.09% / +0.62% +0.27% +0.09%] index_add_ strided 7 : Elapsed 0.011 ms (1.126 ms / 100) 1.059 -> 1.064 ( +0.47%) [ +0.85% +0.09% +0.00% / +1.04% +0.47% +0.47%] index_copy_ strided 7 : Elapsed 0.011 ms (1.068 ms / 100) 1.121 -> 1.122 ( +0.09%) [ +0.71% +0.09% +0.00% / +0.45% +0.09% +0.09%] index_add_ strided 257 : Elapsed 0.011 ms (1.129 ms / 100) 1.061 -> 1.064 ( +0.28%) [ +0.85% +0.00% +0.00% / +0.94% +0.28% +0.28%] index_copy_ strided 257 : Elapsed 0.011 ms (1.070 ms / 100) 1.118 -> 1.122 ( +0.36%) [ +0.81% +4.11% +0.00% / +0.81% +0.36% +0.45%] index_add_ perm : Elapsed 0.011 ms (1.127 ms / 100) 1.061 -> 1.065 ( +0.38%) [ +0.85% +0.00% +0.00% / +0.94% +0.38% +0.47%] index_copy_ perm : Elapsed 0.011 ms (1.070 ms / 100) 1.122 -> 1.120 ( -0.18%) [ +0.89% +0.18% +0.00% / +0.71% -0.18% +0.00%] index_add_ perm_sorted : Elapsed 0.011 ms (1.132 ms / 100) 1.065 -> 1.060 ( -0.47%) [ +0.85% +0.00% +0.00% / +0.75% -0.47% -0.47%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.074 ms / 100) 13.484 -> 13.534 ( +0.37%) [ +0.79% +0.00% +1.16% / +0.37% +0.68% +0.72%] index_select const : Elapsed 0.136 ms (13.590 ms / 100) 14.144 -> 14.224 ( +0.57%) [ +0.00% +0.33% +0.18% / +0.78% +0.81% +0.57%] index_select wrap : Elapsed 0.141 ms (14.144 ms / 100) 13.577 -> 13.621 ( +0.32%) [ +0.65% +0.00% +0.18% / +0.57% +0.32% +0.58%] index_select linear : Elapsed 0.137 ms (13.665 ms / 100) 13.644 -> 13.659 ( +0.11%) [ +0.59% +0.00% +0.38% / +0.55% +0.11% +0.45%] index_select reverse : Elapsed 0.137 ms (13.725 ms / 100) 13.486 -> 13.557 ( +0.53%) [ +0.67% +0.19% +0.00% / +0.55% +0.53% +1.02%] index_select skip64 : Elapsed 0.136 ms (13.577 ms / 100) 13.525 -> 13.536 ( +0.08%) [ +0.36% +0.00% +0.42% / +0.66% +0.12% +0.08%] index_select skip256 : Elapsed 0.136 ms (13.574 ms / 100) 13.529 -> 13.578 ( +0.36%) [ +0.68% +0.04% +0.00% / +0.54% +0.36% +0.36%] index_select spread : Elapsed 0.136 ms (13.621 ms / 100) 13.692 -> 13.735 ( +0.31%) [ +0.37% +0.00% +1.04% / +0.31% +0.38% +0.37%] index_select strided 3 : Elapsed 0.137 ms (13.743 ms / 100) 13.531 -> 13.598 ( +0.50%) [ +0.79% +0.14% +0.00% / +0.59% +0.50% +0.65%] index_select strided 5 : Elapsed 0.136 ms (13.638 ms / 100) 14.087 -> 14.144 ( +0.40%) [ +0.84% +0.26% +0.00% / +0.40% +1.07% +1.27%] index_select strided 7 : Elapsed 0.142 ms (14.206 ms / 100) 14.163 -> 14.228 ( +0.46%) [ +0.06% +0.00% +0.30% / +0.46% +0.88% +0.81%] index_select strided 8 : Elapsed 0.142 ms (14.171 ms / 100) 14.091 -> 14.139 ( +0.34%) [ +0.33% +0.03% +0.00% / +0.34% +0.59% +0.49%] index_select random : Elapsed 0.141 ms (14.138 ms / 100) 13.542 -> 13.562 ( +0.15%) [ +0.35% +0.01% +0.00% / +0.44% +0.15% +0.31%] index_select random_sorted : Elapsed 0.136 ms (13.589 ms / 100) B = [1000, 2048] (stride (2048, 1)) A = [15, 2048] (stride (1, 15)) dim = 0 1.221 -> 1.232 ( +0.90%) [ +1.06% +0.33% +0.00% / +0.90% +1.88% +5.41%] index_add_ linear : Elapsed 0.012 ms (1.234 ms / 100) 1.179 -> 1.187 ( +0.68%) [ +0.59% +0.17% +0.00% / +0.68% +0.85% +0.93%] index_copy_ linear : Elapsed 0.012 ms (1.186 ms / 100) 1.216 -> 1.227 ( +0.90%) [ +0.99% +0.00% +0.16% / +0.90% +1.40% +1.56%] index_add_ reverse : Elapsed 0.012 ms (1.228 ms / 100) 1.166 -> 1.175 ( +0.77%) [ +0.86% +0.09% +0.00% / +0.77% +1.03% +0.94%] index_copy_ reverse : Elapsed 0.012 ms (1.176 ms / 100) 1.218 -> 1.226 ( +0.66%) [ +0.82% +0.08% +0.00% / +0.66% +1.40% +1.40%] index_add_ spread : Elapsed 0.012 ms (1.228 ms / 100) 1.169 -> 1.177 ( +0.68%) [ +0.43% +0.17% +0.00% / +0.68% +1.28% +1.28%] index_copy_ spread : Elapsed 0.012 ms (1.174 ms / 100) 1.222 -> 1.228 ( +0.49%) [ +0.82% +0.16% +0.00% / +0.49% +1.72% +1.96%] index_add_ strided 3 : Elapsed 0.012 ms (1.232 ms / 100) 1.175 -> 1.187 ( +1.02%) [ +0.94% +0.26% +0.00% / +1.02% +1.11% +1.28%] index_copy_ strided 3 : Elapsed 0.012 ms (1.186 ms / 100) 1.216 -> 1.229 ( +1.07%) [ +1.23% +0.41% +0.00% / +1.07% +1.97% +2.06%] index_add_ strided 7 : Elapsed 0.012 ms (1.231 ms / 100) 1.171 -> 1.180 ( +0.77%) [ +0.85% +0.51% +0.00% / +0.77% +1.37% +1.45%] index_copy_ strided 7 : Elapsed 0.012 ms (1.181 ms / 100) 1.225 -> 1.229 ( +0.33%) [ +0.41% +0.00% +0.00% / +0.33% +1.14% +1.06%] index_add_ strided 257 : Elapsed 0.012 ms (1.230 ms / 100) 1.173 -> 1.185 ( +1.02%) [ +0.68% +0.34% +0.00% / +1.02% +1.11% +1.19%] index_copy_ strided 257 : Elapsed 0.012 ms (1.181 ms / 100) 1.224 -> 1.236 ( +0.98%) [ +0.98% +0.41% +0.00% / +0.98% +1.31% +1.31%] index_add_ perm : Elapsed 0.012 ms (1.236 ms / 100) 1.179 -> 1.187 ( +0.68%) [ +0.68% +0.00% +3.73% / +0.76% +0.68% +1.02%] index_copy_ perm : Elapsed 0.012 ms (1.187 ms / 100) 1.220 -> 1.229 ( +0.74%) [ +0.82% +0.41% +0.00% / +0.74% +1.07% +0.82%] index_add_ perm_sorted : Elapsed 0.012 ms (1.230 ms / 100) 1.170 -> 1.178 ( +0.68%) [ +1.03% +0.26% +0.00% / +0.77% +0.68% +0.68%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.182 ms / 100) 15.142 -> 15.131 ( -0.07%) [ +0.22% +0.06% +0.00% / -0.07% +0.52% +0.40%] index_select const : Elapsed 0.152 ms (15.176 ms / 100) 15.360 -> 15.388 ( +0.18%) [ +0.25% +0.00% +0.03% / +0.18% +0.48% +0.38%] index_select wrap : Elapsed 0.154 ms (15.398 ms / 100) 15.145 -> 15.150 ( +0.03%) [ +0.20% +0.00% +0.07% / +0.03% +0.07% +0.22%] index_select linear : Elapsed 0.152 ms (15.175 ms / 100) 15.219 -> 15.193 ( -0.17%) [ +0.14% +0.03% +0.00% / +0.09% -0.14% -0.17%] index_select reverse : Elapsed 0.152 ms (15.241 ms / 100) 15.204 -> 15.118 ( -0.57%) [ +0.09% +0.00% +0.09% / +0.02% -0.57% -0.55%] index_select skip64 : Elapsed 0.152 ms (15.218 ms / 100) 15.186 -> 15.199 ( +0.09%) [ +0.19% +0.18% +0.00% / +0.14% +0.21% +0.09%] index_select skip256 : Elapsed 0.152 ms (15.215 ms / 100) 15.337 -> 15.334 ( -0.02%) [ +0.20% +0.12% +0.00% / +0.12% -0.02% +0.01%] index_select spread : Elapsed 0.154 ms (15.367 ms / 100) 15.385 -> 15.403 ( +0.12%) [ +0.21% +0.00% +0.06% / +0.12% +0.30% +0.37%] index_select strided 3 : Elapsed 0.154 ms (15.417 ms / 100) 15.161 -> 15.183 ( +0.15%) [ +0.03% +0.00% +0.15% / +0.15% +0.74% +0.63%] index_select strided 5 : Elapsed 0.152 ms (15.166 ms / 100) 15.407 -> 15.432 ( +0.16%) [ +0.12% +0.01% +0.00% / +0.16% +0.23% +0.16%] index_select strided 7 : Elapsed 0.154 ms (15.426 ms / 100) 15.235 -> 15.270 ( +0.23%) [ +0.07% +0.00% +0.16% / +0.23% +0.78% +0.79%] index_select strided 8 : Elapsed 0.152 ms (15.245 ms / 100) 15.352 -> 15.383 ( +0.20%) [ +0.28% +0.11% +0.00% / +0.20% +0.32% +0.28%] index_select random : Elapsed 0.154 ms (15.395 ms / 100) 15.212 -> 15.225 ( +0.09%) [ +0.02% +0.05% +0.00% / +0.09% +0.48% +0.49%] index_select random_sorted : Elapsed 0.152 ms (15.215 ms / 100) B = [1000, 2048] (stride (1, 1000)) dim = 0 fill_cnt = 15 0.964 -> 0.971 ( +0.73%) [ +0.93% +0.00% +16.18% / +0.73% +2.59% +2.49%] index_fill_ const : Elapsed 0.010 ms (0.973 ms / 100) 0.967 -> 0.977 ( +1.03%) [ +1.24% +0.10% +0.00% / +1.03% +1.03% +1.14%] index_fill_ linear : Elapsed 0.010 ms (0.979 ms / 100) 0.966 -> 0.977 ( +1.14%) [ +1.14% +0.31% +0.00% / +1.14% +1.24% +1.24%] index_fill_ reverse : Elapsed 0.010 ms (0.977 ms / 100) 0.967 -> 0.978 ( +1.14%) [ +1.14% +0.31% +0.00% / +1.14% +1.65% +1.65%] index_fill_ skip64 : Elapsed 0.010 ms (0.978 ms / 100) 0.965 -> 0.976 ( +1.14%) [ +1.45% +0.21% +0.00% / +1.14% +1.66% +1.87%] index_fill_ skip256 : Elapsed 0.010 ms (0.979 ms / 100) 2.177 -> 2.180 ( +0.14%) [ +0.00% +0.14% +0.05% / +0.14% +0.41% +0.60%] index_fill_ spread : Elapsed 0.022 ms (2.177 ms / 100) 0.973 -> 0.978 ( +0.51%) [ +0.92% +0.10% +0.00% / +0.82% +0.51% +0.72%] index_fill_ strided 3 : Elapsed 0.010 ms (0.982 ms / 100) 1.003 -> 1.015 ( +1.20%) [ +1.00% +0.00% +0.40% / +1.20% +4.19% +3.89%] index_fill_ strided 5 : Elapsed 0.010 ms (1.013 ms / 100) 1.206 -> 1.157 ( -4.06%) [ +0.17% +0.00% +0.00% / +0.41% -3.98% -4.06%] index_fill_ strided 7 : Elapsed 0.012 ms (1.208 ms / 100) good 1.332 -> 1.251 ( -6.08%) [ +0.45% +0.00% +0.15% / +0.38% -6.08% -5.86%] index_fill_ strided 8 : Elapsed 0.013 ms (1.338 ms / 100) 1.621 -> 1.590 ( -1.91%) [ +0.19% +0.00% +0.68% / +0.00% -1.60% -1.91%] index_fill_ strided 16 : Elapsed 0.016 ms (1.624 ms / 100) 2.176 -> 2.178 ( +0.09%) [ +0.00% +0.00% +0.14% / +0.09% +0.51% +0.64%] index_fill_ strided 64 : Elapsed 0.022 ms (2.176 ms / 100) 2.206 -> 2.185 ( -0.95%) [ +0.00% +0.00% +0.18% / +0.41% -0.45% -0.95%] index_fill_ strided 100 : Elapsed 0.022 ms (2.206 ms / 100) 2.230 -> 2.228 ( -0.09%) [ +0.18% +0.04% +0.00% / -0.09% +0.31% +0.40%] index_fill_ strided 255 : Elapsed 0.022 ms (2.234 ms / 100) 2.291 -> 2.295 ( +0.17%) [ +0.22% +0.22% +0.00% / +0.26% +1.09% +0.17%] index_fill_ strided 256 : Elapsed 0.023 ms (2.296 ms / 100) 2.336 -> 2.339 ( +0.13%) [ +0.86% +0.17% +0.00% / +0.13% +0.26% +0.26%] index_fill_ strided 257 : Elapsed 0.024 ms (2.356 ms / 100) 2.197 -> 2.188 ( -0.41%) [ +0.14% +0.09% +0.00% / +0.09% -0.41% -0.27%] index_fill_ random : Elapsed 0.022 ms (2.200 ms / 100) 2.035 -> 2.031 ( -0.20%) [ +0.10% +0.05% +0.00% / +0.15% -0.20% -0.20%] index_fill_ random_sorted : Elapsed 0.020 ms (2.037 ms / 100) 2.235 -> 2.231 ( -0.18%) [ +0.18% +0.00% +0.04% / -0.18% +0.18% +0.18%] index_fill_ perm : Elapsed 0.022 ms (2.239 ms / 100) 1.950 -> 1.938 ( -0.62%) [ +0.15% +0.15% +0.00% / +0.21% -0.56% -0.62%] index_fill_ perm_sorted : Elapsed 0.020 ms (1.953 ms / 100) B = [1000, 2048] (stride (1, 1000)) A = [15, 2048] (stride (2048, 1)) dim = 0 1.207 -> 1.218 ( +0.91%) [ +0.91% +0.00% +0.25% / +0.91% +1.57% +1.66%] index_add_ linear : Elapsed 0.012 ms (1.218 ms / 100) 1.183 -> 1.191 ( +0.68%) [ +0.76% +0.17% +0.00% / +0.68% +1.44% +1.35%] index_copy_ linear : Elapsed 0.012 ms (1.192 ms / 100) 1.212 -> 1.222 ( +0.83%) [ +0.41% +0.33% +0.00% / +1.07% +0.83% +0.91%] index_add_ reverse : Elapsed 0.012 ms (1.217 ms / 100) 1.187 -> 1.191 ( +0.34%) [ +0.51% +0.25% +0.00% / +0.51% +0.34% +0.34%] index_copy_ reverse : Elapsed 0.012 ms (1.193 ms / 100) 4.435 -> 4.447 ( +0.27%) [ +0.29% +0.05% +0.00% / +0.27% +0.97% +0.97%] index_add_ spread : Elapsed 0.044 ms (4.448 ms / 100) 2.675 -> 2.669 ( -0.22%) [ +0.07% +0.15% +0.00% / +0.45% -0.11% -0.22%] index_copy_ spread : Elapsed 0.027 ms (2.677 ms / 100) 1.376 -> 1.390 ( +1.02%) [ +1.31% +0.00% +0.44% / +1.02% +1.45% +1.89%] index_add_ strided 3 : Elapsed 0.014 ms (1.394 ms / 100) 1.308 -> 1.312 ( +0.31%) [ +0.92% +0.00% +0.00% / +1.22% +0.38% +0.31%] index_copy_ strided 3 : Elapsed 0.013 ms (1.320 ms / 100) 1.916 -> 1.927 ( +0.57%) [ +0.63% +0.00% +0.21% / +0.57% +8.09% +8.04%] index_add_ strided 7 : Elapsed 0.019 ms (1.928 ms / 100) 1.582 -> 1.592 ( +0.63%) [ +1.01% +0.19% +0.00% / +0.63% +2.40% +2.40%] index_copy_ strided 7 : Elapsed 0.016 ms (1.598 ms / 100) 4.710 -> 4.714 ( +0.08%) [ +0.32% +0.00% +0.30% / +0.08% +0.47% +0.72%] index_add_ strided 257 : Elapsed 0.047 ms (4.725 ms / 100) 2.814 -> 2.813 ( -0.04%) [ +0.32% +0.00% +0.43% / -0.04% +0.46% +0.28%] index_copy_ strided 257 : Elapsed 0.028 ms (2.823 ms / 100) 4.385 -> 4.346 ( -0.89%) [ +0.00% +0.07% +0.05% / +0.16% -0.78% -0.89%] index_add_ perm : Elapsed 0.044 ms (4.385 ms / 100) 2.585 -> 2.571 ( -0.54%) [ +0.00% +0.23% +0.08% / +0.08% -0.39% -0.54%] index_copy_ perm : Elapsed 0.026 ms (2.585 ms / 100) 4.028 -> 4.030 ( +0.05%) [ +0.27% +0.00% +2.33% / +0.05% +0.89% +0.52%] index_add_ perm_sorted : Elapsed 0.040 ms (4.039 ms / 100) 2.400 -> 2.391 ( -0.37%) [ +0.00% +0.00% +0.25% / -0.37% +0.75% +0.75%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.400 ms / 100) GOOD 22.133 -> 13.363 (-39.62%) [ +0.53% +0.00% +0.84% / -39.47% -39.62% -39.52%] index_select const : Elapsed 0.223 ms (22.251 ms / 100) GOOD 22.634 -> 17.677 (-21.90%) [ +1.64% +1.33% +0.00% / -21.44% -21.82% -21.90%] index_select wrap : Elapsed 0.230 ms (23.006 ms / 100) GOOD 22.394 -> 13.433 (-40.02%) [ +0.41% +0.00% +0.04% / -40.02% -39.94% -39.97%] index_select linear : Elapsed 0.225 ms (22.485 ms / 100) GOOD 22.437 -> 13.478 (-39.93%) [ +0.31% +0.00% +0.85% / -39.76% -39.89% -39.93%] index_select reverse : Elapsed 0.225 ms (22.506 ms / 100) GOOD 22.116 -> 13.351 (-39.63%) [ +0.01% +0.00% +0.86% / -39.47% -39.63% -39.56%] index_select skip64 : Elapsed 0.221 ms (22.119 ms / 100) GOOD 22.078 -> 13.402 (-39.30%) [ +0.08% +0.00% +0.56% / -39.12% -39.19% -39.30%] index_select skip256 : Elapsed 0.221 ms (22.096 ms / 100) GOOD 21.931 -> 13.672 (-37.66%) [ +0.04% +0.25% +0.00% / -37.63% -37.66% -37.65%] index_select spread : Elapsed 0.219 ms (21.939 ms / 100) GOOD 22.593 -> 14.412 (-36.21%) [ +0.92% +0.00% +0.27% / -36.21% -35.94% -35.89%] index_select strided 3 : Elapsed 0.228 ms (22.801 ms / 100) GOOD 22.406 -> 13.602 (-39.29%) [ +0.28% +0.00% +0.34% / -39.29% -39.10% -39.09%] index_select strided 5 : Elapsed 0.225 ms (22.469 ms / 100) GOOD 22.831 -> 17.652 (-22.68%) [ +0.32% +0.00% +1.17% / -21.90% -22.63% -22.68%] index_select strided 7 : Elapsed 0.229 ms (22.903 ms / 100) GOOD 22.541 -> 17.380 (-22.90%) [ +0.00% +1.37% +1.15% / -22.90% -22.79% -22.74%] index_select strided 8 : Elapsed 0.225 ms (22.541 ms / 100) GOOD 22.846 -> 16.818 (-26.39%) [ +0.95% +0.24% +0.00% / -25.91% -26.39% -26.37%] index_select random : Elapsed 0.231 ms (23.063 ms / 100) GOOD 21.958 -> 13.661 (-37.79%) [ +0.30% +0.01% +0.00% / -37.79% -37.72% -37.68%] index_select random_sorted : Elapsed 0.220 ms (22.023 ms / 100) B = [1000, 2048] (stride (1, 1000)) A = [15, 2048] (stride (1, 15)) dim = 0 1.259 -> 1.279 ( +1.59%) [ +1.03% +0.16% +0.00% / +1.59% +2.70% +2.78%] index_add_ linear : Elapsed 0.013 ms (1.272 ms / 100) 1.264 -> 1.275 ( +0.87%) [ +0.95% +0.00% +0.00% / +0.87% +1.27% +1.50%] index_copy_ linear : Elapsed 0.013 ms (1.276 ms / 100) 1.265 -> 1.271 ( +0.47%) [ +0.40% +0.00% +0.32% / +0.47% +2.13% +1.98%] index_add_ reverse : Elapsed 0.013 ms (1.270 ms / 100) 1.267 -> 1.272 ( +0.39%) [ +0.95% +0.00% +0.63% / +0.87% +0.39% +0.71%] index_copy_ reverse : Elapsed 0.013 ms (1.279 ms / 100) 4.397 -> 4.411 ( +0.32%) [ +0.00% +0.07% +0.18% / +0.32% +1.27% +1.23%] index_add_ spread : Elapsed 0.044 ms (4.397 ms / 100) 2.405 -> 2.411 ( +0.25%) [ +0.37% +0.25% +0.00% / +0.25% +0.29% +0.37%] index_copy_ spread : Elapsed 0.024 ms (2.414 ms / 100) 1.396 -> 1.418 ( +1.58%) [ +0.64% +0.21% +0.00% / +1.58% +1.58% +1.72%] index_add_ strided 3 : Elapsed 0.014 ms (1.405 ms / 100) 1.355 -> 1.362 ( +0.52%) [ +0.74% +0.37% +0.00% / +0.52% +2.21% +2.36%] index_copy_ strided 3 : Elapsed 0.014 ms (1.365 ms / 100) 1.810 -> 1.817 ( +0.39%) [ +0.72% +0.28% +0.00% / +0.39% +4.20% +4.31%] index_add_ strided 7 : Elapsed 0.018 ms (1.823 ms / 100) 1.564 -> 1.570 ( +0.38%) [ +0.58% +0.19% +0.00% / +0.38% +2.94% +3.07%] index_copy_ strided 7 : Elapsed 0.016 ms (1.573 ms / 100) 4.707 -> 4.714 ( +0.15%) [ +0.02% +0.23% +0.00% / +0.15% +0.55% +0.53%] index_add_ strided 257 : Elapsed 0.047 ms (4.708 ms / 100) 2.601 -> 2.609 ( +0.31%) [ +0.00% +0.19% +0.50% / +0.31% +0.50% +0.35%] index_copy_ strided 257 : Elapsed 0.026 ms (2.601 ms / 100) 4.524 -> 4.516 ( -0.18%) [ +0.22% +0.00% +0.15% / -0.18% +0.42% +0.66%] index_add_ perm : Elapsed 0.045 ms (4.534 ms / 100) 2.548 -> 2.544 ( -0.16%) [ +0.00% +0.12% +0.04% / +0.31% -0.16% +0.00%] index_copy_ perm : Elapsed 0.025 ms (2.548 ms / 100) 3.849 -> 3.849 ( +0.00%) [ +0.00% +0.10% +0.10% / +0.00% +0.68% +0.75%] index_add_ perm_sorted : Elapsed 0.038 ms (3.849 ms / 100) 2.135 -> 2.115 ( -0.94%) [ +0.52% +0.23% +0.00% / +0.14% -0.94% -0.84%] index_copy_ perm_sorted : Elapsed 0.021 ms (2.146 ms / 100) GOOD 26.142 -> 13.585 (-48.03%) [ +0.49% +0.42% +0.00% / -47.93% -48.03% -48.03%] index_select const : Elapsed 0.263 ms (26.270 ms / 100) GOOD 25.951 -> 13.590 (-47.63%) [ +0.76% +0.00% +0.27% / -47.55% -47.63% -47.62%] index_select wrap : Elapsed 0.261 ms (26.148 ms / 100) GOOD 25.598 -> 13.794 (-46.11%) [ +0.00% +0.34% +0.39% / -46.11% -46.07% -46.09%] index_select linear : Elapsed 0.256 ms (25.598 ms / 100) GOOD 25.874 -> 13.738 (-46.90%) [ +0.75% +0.27% +0.00% / -46.81% -46.84% -46.90%] index_select reverse : Elapsed 0.261 ms (26.067 ms / 100) GOOD 26.039 -> 13.572 (-47.88%) [ +0.74% +0.74% +0.00% / -47.76% -47.81% -47.88%] index_select skip64 : Elapsed 0.262 ms (26.231 ms / 100) GOOD 25.841 -> 13.574 (-47.47%) [ +0.57% +0.00% +0.17% / -47.42% -47.47% -47.47%] index_select skip256 : Elapsed 0.260 ms (25.988 ms / 100) GOOD 26.223 -> 13.789 (-47.42%) [ +0.00% +0.49% +1.69% / -47.29% -47.42% -47.40%] index_select spread : Elapsed 0.262 ms (26.223 ms / 100) GOOD 26.199 -> 13.602 (-48.08%) [ +0.50% +0.14% +0.00% / -48.02% -48.08% -47.92%] index_select strided 3 : Elapsed 0.263 ms (26.331 ms / 100) GOOD 25.585 -> 13.613 (-46.79%) [ +0.30% +0.00% +0.27% / -46.79% -46.75% -46.75%] index_select strided 5 : Elapsed 0.257 ms (25.661 ms / 100) GOOD 26.152 -> 13.585 (-48.05%) [ +0.73% +0.75% +0.00% / -47.91% -48.05% -47.98%] index_select strided 7 : Elapsed 0.263 ms (26.342 ms / 100) GOOD 25.742 -> 13.621 (-47.09%) [ +0.19% +0.10% +0.00% / -47.09% -46.99% -47.08%] index_select strided 8 : Elapsed 0.258 ms (25.790 ms / 100) GOOD 26.113 -> 13.611 (-47.88%) [ +0.31% +0.17% +0.00% / -47.81% -47.87% -47.88%] index_select random : Elapsed 0.262 ms (26.193 ms / 100) GOOD 25.923 -> 13.805 (-46.75%) [ +0.10% +0.00% +0.02% / -46.69% -46.68% -46.75%] index_select random_sorted : Elapsed 0.259 ms (25.950 ms / 100) out_shape = [15, 1000] in_shape = [15, 2048] idx_dim = 1 B = [15, 1000] (stride (1000, 1)) dim = 1 fill_cnt = 2048 Good 2.008 -> 1.762 (-12.25%) [ +0.45% +0.20% +0.00% / -12.25% -11.90% -11.95%] index_fill_ const : Elapsed 0.020 ms (2.017 ms / 100) good 1.955 -> 1.770 ( -9.46%) [ +0.00% +0.05% +0.10% / -9.46% -9.10% -9.10%] index_fill_ linear : Elapsed 0.020 ms (1.955 ms / 100) good 1.927 -> 1.774 ( -7.94%) [ +0.10% +0.05% +0.00% / -7.68% -7.94% -7.94%] index_fill_ reverse : Elapsed 0.019 ms (1.929 ms / 100) good 1.936 -> 1.760 ( -9.09%) [ +0.00% +0.05% +0.15% / -8.94% -9.09% -8.99%] index_fill_ skip64 : Elapsed 0.019 ms (1.936 ms / 100) Good 2.045 -> 1.755 (-14.18%) [ +0.05% +0.10% +0.00% / -13.74% -13.99% -14.18%] index_fill_ skip256 : Elapsed 0.020 ms (2.046 ms / 100) good 1.888 -> 1.772 ( -6.14%) [ +0.16% +0.05% +0.00% / -6.14% -5.83% -6.04%] index_fill_ spread : Elapsed 0.019 ms (1.891 ms / 100) good 1.891 -> 1.786 ( -5.55%) [ +0.37% +0.00% +0.11% / -5.50% -5.55% -5.39%] index_fill_ strided 3 : Elapsed 0.019 ms (1.898 ms / 100) good 1.912 -> 1.808 ( -5.44%) [ +0.31% +0.10% +0.00% / -5.44% -4.71% -5.13%] index_fill_ strided 5 : Elapsed 0.019 ms (1.918 ms / 100) good 1.896 -> 1.801 ( -5.01%) [ +0.16% +0.26% +0.00% / -4.32% -5.01% -4.38%] index_fill_ strided 7 : Elapsed 0.019 ms (1.899 ms / 100) 1.915 -> 1.836 ( -4.13%) [ +0.52% +0.05% +0.00% / -3.55% -3.92% -4.13%] index_fill_ strided 8 : Elapsed 0.019 ms (1.925 ms / 100) 1.915 -> 1.849 ( -3.45%) [ +0.57% +0.31% +0.00% / -3.29% -3.19% -3.45%] index_fill_ strided 16 : Elapsed 0.019 ms (1.926 ms / 100) 1.910 -> 1.885 ( -1.31%) [ +0.84% +0.47% +0.00% / -1.05% -1.31% -0.84%] index_fill_ strided 64 : Elapsed 0.019 ms (1.926 ms / 100) 1.890 -> 1.806 ( -4.44%) [ +0.11% +0.00% +0.05% / -4.44% -4.23% -4.18%] index_fill_ strided 100 : Elapsed 0.019 ms (1.892 ms / 100) 1.916 -> 1.892 ( -1.25%) [ +0.16% +0.10% +0.00% / -1.20% -1.25% -1.15%] index_fill_ strided 255 : Elapsed 0.019 ms (1.919 ms / 100) 1.912 -> 1.889 ( -1.20%) [ +0.05% +0.00% +0.10% / -1.20% -1.05% -0.89%] index_fill_ strided 256 : Elapsed 0.019 ms (1.913 ms / 100) 1.896 -> 1.867 ( -1.53%) [ +0.21% +0.16% +0.00% / -1.53% -1.42% -1.32%] index_fill_ strided 257 : Elapsed 0.019 ms (1.900 ms / 100) 1.912 -> 1.869 ( -2.25%) [ +0.31% +0.00% +0.31% / -1.94% -2.25% -1.94%] index_fill_ random : Elapsed 0.019 ms (1.918 ms / 100) good 1.907 -> 1.794 ( -5.93%) [ +0.10% +0.00% +0.10% / -5.93% -5.72% -5.82%] index_fill_ random_sorted : Elapsed 0.019 ms (1.909 ms / 100) B = [15, 1000] (stride (1000, 1)) A = [15, 2048] (stride (2048, 1)) dim = 1 3.268 -> 3.250 ( -0.55%) [ +0.03% +0.28% +0.00% / -0.43% -0.55% -0.34%] index_select const : Elapsed 0.033 ms (3.269 ms / 100) 3.303 -> 3.265 ( -1.15%) [ +0.00% +0.00% +0.00% / -1.15% -0.76% -0.79%] index_select wrap : Elapsed 0.033 ms (3.303 ms / 100) 3.307 -> 3.279 ( -0.85%) [ +0.27% +0.00% +0.21% / -0.85% -0.76% -0.70%] index_select linear : Elapsed 0.033 ms (3.316 ms / 100) 3.301 -> 3.270 ( -0.94%) [ +0.06% +0.00% +0.00% / -0.94% -0.67% -0.85%] index_select reverse : Elapsed 0.033 ms (3.303 ms / 100) 3.273 -> 3.247 ( -0.79%) [ +0.24% +0.00% +0.18% / -0.79% -0.09% -0.31%] index_select skip64 : Elapsed 0.033 ms (3.281 ms / 100) 3.265 -> 3.242 ( -0.70%) [ +0.06% +0.18% +0.00% / -0.46% -0.70% -0.67%] index_select skip256 : Elapsed 0.033 ms (3.267 ms / 100) 3.347 -> 3.299 ( -1.43%) [ +0.03% +0.03% +0.00% / -1.43% -0.96% -0.93%] index_select spread : Elapsed 0.033 ms (3.348 ms / 100) 3.343 -> 3.301 ( -1.26%) [ +0.06% +0.18% +0.00% / -1.26% -1.23% -1.26%] index_select strided 3 : Elapsed 0.033 ms (3.345 ms / 100) 3.350 -> 3.295 ( -1.64%) [ +0.12% +0.00% +0.09% / -1.01% -1.64% -1.55%] index_select strided 5 : Elapsed 0.034 ms (3.354 ms / 100) 3.339 -> 3.302 ( -1.11%) [ +0.18% +0.09% +0.00% / -1.08% -0.87% -1.11%] index_select strided 7 : Elapsed 0.033 ms (3.345 ms / 100) 3.350 -> 3.293 ( -1.70%) [ +0.00% +0.30% +0.18% / -0.99% -1.70% -1.70%] index_select strided 8 : Elapsed 0.034 ms (3.350 ms / 100) 3.312 -> 3.276 ( -1.09%) [ +0.21% +0.03% +0.00% / -1.09% -0.82% -0.94%] index_select strided 16 : Elapsed 0.033 ms (3.319 ms / 100) 3.302 -> 3.272 ( -0.91%) [ +0.18% +0.18% +0.00% / -0.82% -0.91% -0.79%] index_select strided 64 : Elapsed 0.033 ms (3.308 ms / 100) 3.356 -> 3.318 ( -1.13%) [ +0.00% +0.03% +0.18% / -1.13% -0.80% -1.01%] index_select strided 100 : Elapsed 0.034 ms (3.356 ms / 100) 3.365 -> 3.302 ( -1.87%) [ +0.27% +0.21% +0.00% / -1.49% -1.69% -1.87%] index_select strided 255 : Elapsed 0.034 ms (3.374 ms / 100) 3.273 -> 3.246 ( -0.82%) [ +0.24% +0.18% +0.00% / -0.82% -0.58% -0.61%] index_select strided 256 : Elapsed 0.033 ms (3.281 ms / 100) 3.367 -> 3.306 ( -1.81%) [ +0.06% +0.12% +0.00% / -1.22% -1.81% -1.60%] index_select strided 257 : Elapsed 0.034 ms (3.369 ms / 100) 3.359 -> 3.312 ( -1.40%) [ +0.03% +0.03% +0.00% / -0.92% -1.40% -0.95%] index_select random : Elapsed 0.034 ms (3.360 ms / 100) 3.343 -> 3.292 ( -1.53%) [ +0.00% +0.15% +0.00% / -1.44% -1.53% -1.35%] index_select random_sorted : Elapsed 0.033 ms (3.343 ms / 100) 3.357 -> 3.307 ( -1.49%) [ +0.21% +0.00% +0.48% / -0.77% -1.22% -1.49%] index_select perm : Elapsed 0.034 ms (3.364 ms / 100) 3.343 -> 3.291 ( -1.56%) [ +0.00% +0.15% +0.27% / -1.17% -1.50% -1.56%] index_select perm_sorted : Elapsed 0.033 ms (3.343 ms / 100) B = [15, 1000] (stride (1000, 1)) A = [15, 2048] (stride (1, 15)) dim = 1 3.262 -> 3.234 ( -0.86%) [ +0.12% +0.03% +0.00% / -0.46% -0.86% -0.61%] index_select const : Elapsed 0.033 ms (3.266 ms / 100) 3.291 -> 3.277 ( -0.43%) [ +0.15% +0.00% +0.12% / -0.43% -0.03% -0.06%] index_select wrap : Elapsed 0.033 ms (3.296 ms / 100) 3.284 -> 3.272 ( -0.37%) [ +0.15% +0.00% +0.09% / -0.27% -0.18% -0.37%] index_select linear : Elapsed 0.033 ms (3.289 ms / 100) 3.302 -> 3.288 ( -0.42%) [ +0.21% +0.00% +0.09% / -0.33% -0.42% -0.39%] index_select reverse : Elapsed 0.033 ms (3.309 ms / 100) 3.262 -> 3.244 ( -0.55%) [ +0.09% +0.00% +0.18% / -0.52% -0.55% -0.46%] index_select skip64 : Elapsed 0.033 ms (3.265 ms / 100) 3.278 -> 3.243 ( -1.07%) [ +0.09% +0.00% +0.24% / -0.46% -1.07% -1.01%] index_select skip256 : Elapsed 0.033 ms (3.281 ms / 100) 3.302 -> 3.299 ( -0.09%) [ +0.15% +0.00% +0.06% / -0.09% +0.06% +0.00%] index_select spread : Elapsed 0.033 ms (3.307 ms / 100) 3.318 -> 3.316 ( -0.06%) [ +0.06% +0.03% +0.00% / -0.06% +0.18% +0.54%] index_select strided 3 : Elapsed 0.033 ms (3.320 ms / 100) 3.304 -> 3.310 ( +0.18%) [ +0.00% +0.18% +0.18% / +0.18% +0.39% +0.24%] index_select strided 5 : Elapsed 0.033 ms (3.304 ms / 100) 3.315 -> 3.305 ( -0.30%) [ +0.00% +0.06% +0.06% / -0.30% +0.54% +0.72%] index_select strided 7 : Elapsed 0.033 ms (3.315 ms / 100) 3.276 -> 3.264 ( -0.37%) [ +0.12% +0.03% +0.00% / -0.12% -0.18% -0.37%] index_select strided 8 : Elapsed 0.033 ms (3.280 ms / 100) 3.288 -> 3.272 ( -0.49%) [ +0.09% +0.00% +0.09% / -0.27% -0.46% -0.49%] index_select strided 16 : Elapsed 0.033 ms (3.291 ms / 100) 3.266 -> 3.248 ( -0.55%) [ +0.06% +0.00% +0.09% / -0.34% -0.52% -0.55%] index_select strided 64 : Elapsed 0.033 ms (3.268 ms / 100) 3.299 -> 3.294 ( -0.15%) [ +0.00% +0.03% +0.00% / -0.15% +0.21% +0.21%] index_select strided 100 : Elapsed 0.033 ms (3.299 ms / 100) 3.302 -> 3.294 ( -0.24%) [ +0.12% +0.00% +0.12% / -0.06% -0.24% -0.09%] index_select strided 255 : Elapsed 0.033 ms (3.306 ms / 100) 3.263 -> 3.238 ( -0.77%) [ +0.15% +0.00% +0.00% / -0.77% -0.55% -0.55%] index_select strided 256 : Elapsed 0.033 ms (3.268 ms / 100) 3.314 -> 3.310 ( -0.12%) [ +0.24% +0.00% +0.06% / -0.12% +0.00% +0.18%] index_select strided 257 : Elapsed 0.033 ms (3.322 ms / 100) 3.301 -> 3.298 ( -0.09%) [ +0.00% +0.15% +0.09% / +0.06% -0.09% +0.00%] index_select random : Elapsed 0.033 ms (3.301 ms / 100) 3.301 -> 3.297 ( -0.12%) [ +0.36% +0.00% +0.18% / -0.09% -0.09% -0.12%] index_select random_sorted : Elapsed 0.033 ms (3.313 ms / 100) 3.309 -> 3.304 ( -0.15%) [ +0.00% +0.09% +0.00% / +0.33% -0.15% +0.18%] index_select perm : Elapsed 0.033 ms (3.309 ms / 100) 3.310 -> 3.304 ( -0.18%) [ +0.09% +0.00% +0.00% / -0.06% -0.18% +0.00%] index_select perm_sorted : Elapsed 0.033 ms (3.313 ms / 100) B = [15, 1000] (stride (1, 15)) dim = 1 fill_cnt = 2048 1.791 -> 1.781 ( -0.56%) [ +0.45% +0.28% +0.00% / -0.56% -0.56% -0.45%] index_fill_ const : Elapsed 0.018 ms (1.799 ms / 100) 1.812 -> 1.798 ( -0.77%) [ +0.28% +0.00% +0.39% / -0.77% -0.72% -0.39%] index_fill_ linear : Elapsed 0.018 ms (1.817 ms / 100) 1.794 -> 1.781 ( -0.72%) [ +0.56% +0.00% +0.33% / -0.61% -0.45% -0.72%] index_fill_ reverse : Elapsed 0.018 ms (1.804 ms / 100) 1.793 -> 1.776 ( -0.95%) [ +0.45% +0.00% +0.11% / -0.84% -0.95% -0.89%] index_fill_ skip64 : Elapsed 0.018 ms (1.801 ms / 100) 1.782 -> 1.775 ( -0.39%) [ +0.45% +0.22% +0.00% / -0.39% +0.06% +0.17%] index_fill_ skip256 : Elapsed 0.018 ms (1.790 ms / 100) 1.798 -> 1.785 ( -0.72%) [ +0.33% +0.00% +0.39% / -0.72% -0.39% -0.33%] index_fill_ spread : Elapsed 0.018 ms (1.804 ms / 100) 1.814 -> 1.793 ( -1.16%) [ +0.39% +0.11% +0.00% / -0.66% -1.16% -0.77%] index_fill_ strided 3 : Elapsed 0.018 ms (1.821 ms / 100) 1.810 -> 1.795 ( -0.83%) [ +0.06% +0.17% +0.00% / -0.55% -0.55% -0.83%] index_fill_ strided 5 : Elapsed 0.018 ms (1.811 ms / 100) 1.815 -> 1.802 ( -0.72%) [ +0.22% +0.06% +0.00% / -0.66% -0.72% -0.66%] index_fill_ strided 7 : Elapsed 0.018 ms (1.819 ms / 100) 1.794 -> 1.783 ( -0.61%) [ +0.39% +0.22% +0.00% / -0.61% -0.17% -0.17%] index_fill_ strided 8 : Elapsed 0.018 ms (1.801 ms / 100) 1.797 -> 1.787 ( -0.56%) [ +0.33% +0.22% +0.00% / -0.56% -0.11% -0.17%] index_fill_ strided 16 : Elapsed 0.018 ms (1.803 ms / 100) 1.795 -> 1.786 ( -0.50%) [ +0.45% +0.33% +0.00% / -0.50% -0.22% -0.22%] index_fill_ strided 64 : Elapsed 0.018 ms (1.803 ms / 100) 1.796 -> 1.779 ( -0.95%) [ +0.22% +0.00% +0.22% / -0.61% -0.95% -0.72%] index_fill_ strided 100 : Elapsed 0.018 ms (1.800 ms / 100) 1.809 -> 1.789 ( -1.11%) [ +0.39% +0.00% +0.28% / -0.83% -0.55% -1.11%] index_fill_ strided 255 : Elapsed 0.018 ms (1.816 ms / 100) 1.797 -> 1.788 ( -0.50%) [ +0.22% +0.00% +0.11% / -0.45% -0.50% -0.33%] index_fill_ strided 256 : Elapsed 0.018 ms (1.801 ms / 100) 1.807 -> 1.800 ( -0.39%) [ +0.44% +0.00% +0.11% / -0.39% -0.33% +0.06%] index_fill_ strided 257 : Elapsed 0.018 ms (1.815 ms / 100) 1.816 -> 1.803 ( -0.72%) [ +0.06% +0.00% +0.06% / -0.44% -0.72% -0.50%] index_fill_ random : Elapsed 0.018 ms (1.817 ms / 100) 1.812 -> 1.786 ( -1.43%) [ +0.22% +0.00% +0.06% / -0.99% -1.32% -1.43%] index_fill_ random_sorted : Elapsed 0.018 ms (1.816 ms / 100) B = [15, 1000] (stride (1, 15)) A = [15, 2048] (stride (2048, 1)) dim = 1 3.247 -> 3.252 ( +0.15%) [ +0.03% +0.18% +0.00% / +0.15% +0.28% +0.28%] index_select const : Elapsed 0.032 ms (3.248 ms / 100) 3.279 -> 3.287 ( +0.24%) [ +0.06% +0.03% +0.00% / +0.24% +0.58% +0.43%] index_select wrap : Elapsed 0.033 ms (3.281 ms / 100) 3.285 -> 3.293 ( +0.24%) [ +0.09% +0.00% +0.12% / +0.30% +0.37% +0.24%] index_select linear : Elapsed 0.033 ms (3.288 ms / 100) 3.283 -> 3.285 ( +0.06%) [ +0.21% +0.37% +0.00% / +0.06% +0.24% +0.40%] index_select reverse : Elapsed 0.033 ms (3.290 ms / 100) 3.265 -> 3.265 ( +0.00%) [ +0.00% +0.00% +0.12% / +0.00% +0.12% +0.25%] index_select skip64 : Elapsed 0.033 ms (3.265 ms / 100) 3.253 -> 3.251 ( -0.06%) [ +0.06% +0.00% +0.03% / +0.12% +0.00% -0.06%] index_select skip256 : Elapsed 0.033 ms (3.255 ms / 100) 3.326 -> 3.323 ( -0.09%) [ +0.00% +0.00% +0.15% / -0.09% +0.33% +0.24%] index_select spread : Elapsed 0.033 ms (3.326 ms / 100) 3.327 -> 3.316 ( -0.33%) [ +0.12% +0.00% +0.06% / +0.03% -0.33% -0.03%] index_select strided 3 : Elapsed 0.033 ms (3.331 ms / 100) 3.324 -> 3.328 ( +0.12%) [ +0.30% +0.24% +0.00% / +0.27% +0.12% +0.12%] index_select strided 5 : Elapsed 0.033 ms (3.334 ms / 100) 3.319 -> 3.319 ( +0.00%) [ +0.15% +0.00% +0.12% / +0.09% +0.15% +0.00%] index_select strided 7 : Elapsed 0.033 ms (3.324 ms / 100) 3.333 -> 3.311 ( -0.66%) [ +0.03% +0.06% +0.00% / -0.15% -0.48% -0.66%] index_select strided 8 : Elapsed 0.033 ms (3.334 ms / 100) 3.292 -> 3.291 ( -0.03%) [ +0.12% +0.00% +0.09% / +0.27% +0.00% -0.03%] index_select strided 16 : Elapsed 0.033 ms (3.296 ms / 100) 3.275 -> 3.285 ( +0.31%) [ +0.34% +0.31% +0.00% / +0.43% +0.31% +0.46%] index_select strided 64 : Elapsed 0.033 ms (3.286 ms / 100) 3.329 -> 3.335 ( +0.18%) [ +0.00% +0.36% +0.24% / +0.21% +0.24% +0.18%] index_select strided 100 : Elapsed 0.033 ms (3.329 ms / 100) 3.336 -> 3.326 ( -0.30%) [ +0.39% +0.30% +0.00% / +0.18% -0.30% -0.21%] index_select strided 255 : Elapsed 0.033 ms (3.349 ms / 100) 3.253 -> 3.258 ( +0.15%) [ +0.18% +0.28% +0.00% / +0.15% +0.46% +0.28%] index_select strided 256 : Elapsed 0.033 ms (3.259 ms / 100) 3.346 -> 3.329 ( -0.51%) [ +0.03% +0.00% +2.84% / +0.06% -0.51% -0.48%] index_select strided 257 : Elapsed 0.033 ms (3.347 ms / 100) 3.334 -> 3.331 ( -0.09%) [ +0.00% +0.00% +0.21% / +0.24% -0.09% -0.06%] index_select random : Elapsed 0.033 ms (3.334 ms / 100) 3.326 -> 3.325 ( -0.03%) [ +0.00% +0.12% +0.18% / +0.06% -0.03% +0.09%] index_select random_sorted : Elapsed 0.033 ms (3.326 ms / 100) 3.341 -> 3.324 ( -0.51%) [ +0.00% +0.03% +0.09% / -0.09% -0.51% -0.42%] index_select perm : Elapsed 0.033 ms (3.341 ms / 100) 3.326 -> 3.320 ( -0.18%) [ +0.15% +0.00% +0.03% / -0.06% -0.18% -0.15%] index_select perm_sorted : Elapsed 0.033 ms (3.331 ms / 100) B = [15, 1000] (stride (1, 15)) A = [15, 2048] (stride (1, 15)) dim = 1 3.242 -> 3.238 ( -0.12%) [ +0.09% +0.03% +0.00% / +0.09% +0.22% -0.12%] index_select const : Elapsed 0.032 ms (3.245 ms / 100) 3.268 -> 3.268 ( +0.00%) [ +0.15% +0.00% +0.43% / +0.00% +0.34% +0.34%] index_select wrap : Elapsed 0.033 ms (3.273 ms / 100) 3.263 -> 3.264 ( +0.03%) [ +0.15% +0.00% +0.12% / +0.06% +0.18% +0.03%] index_select linear : Elapsed 0.033 ms (3.268 ms / 100) 3.277 -> 3.275 ( -0.06%) [ +0.06% +0.00% +1.71% / +0.15% -0.06% +0.24%] index_select reverse : Elapsed 0.033 ms (3.279 ms / 100) 3.240 -> 3.239 ( -0.03%) [ +0.00% +0.19% +0.00% / -0.03% +0.28% +0.52%] index_select skip64 : Elapsed 0.032 ms (3.240 ms / 100) 3.261 -> 3.251 ( -0.31%) [ +0.00% +0.12% +0.09% / +0.03% -0.31% -0.28%] index_select skip256 : Elapsed 0.033 ms (3.261 ms / 100) 3.277 -> 3.279 ( +0.06%) [ +0.12% +0.24% +0.00% / +0.06% +0.46% +0.34%] index_select spread : Elapsed 0.033 ms (3.281 ms / 100) 3.291 -> 3.295 ( +0.12%) [ +0.24% +0.12% +0.00% / +0.12% +0.21% +0.15%] index_select strided 3 : Elapsed 0.033 ms (3.299 ms / 100) 3.288 -> 3.288 ( +0.00%) [ +0.18% +0.00% +0.00% / +0.00% +0.00% +0.09%] index_select strided 5 : Elapsed 0.033 ms (3.294 ms / 100) 3.292 -> 3.301 ( +0.27%) [ +0.15% +0.00% +0.18% / +0.27% +0.73% +0.91%] index_select strided 7 : Elapsed 0.033 ms (3.297 ms / 100) 3.260 -> 3.254 ( -0.18%) [ +0.06% +0.00% +0.09% / +0.21% -0.09% -0.18%] index_select strided 8 : Elapsed 0.033 ms (3.262 ms / 100) 3.267 -> 3.274 ( +0.21%) [ +0.00% +0.15% +0.18% / +0.37% +0.21% +0.34%] index_select strided 16 : Elapsed 0.033 ms (3.267 ms / 100) 3.246 -> 3.242 ( -0.12%) [ +0.00% +0.00% +0.06% / +0.12% -0.03% -0.12%] index_select strided 64 : Elapsed 0.032 ms (3.246 ms / 100) 3.274 -> 3.277 ( +0.09%) [ +0.15% +0.00% +0.21% / +0.24% +0.15% +0.09%] index_select strided 100 : Elapsed 0.033 ms (3.279 ms / 100) 3.278 -> 3.281 ( +0.09%) [ +0.00% +0.31% +0.00% / +0.09% +0.37% +0.37%] index_select strided 255 : Elapsed 0.033 ms (3.278 ms / 100) 3.240 -> 3.240 ( +0.00%) [ +0.22% +0.06% +0.00% / +0.00% +0.19% +0.31%] index_select strided 256 : Elapsed 0.032 ms (3.247 ms / 100) 3.285 -> 3.295 ( +0.30%) [ +0.43% +0.30% +0.00% / +0.30% +0.61% +0.55%] index_select strided 257 : Elapsed 0.033 ms (3.299 ms / 100) 3.276 -> 3.276 ( +0.00%) [ +0.18% +0.12% +0.00% / +0.12% +0.00% +0.15%] index_select random : Elapsed 0.033 ms (3.282 ms / 100) 3.278 -> 3.281 ( +0.09%) [ +0.24% +0.00% +0.09% / +0.09% +0.09% +0.34%] index_select random_sorted : Elapsed 0.033 ms (3.286 ms / 100) 3.282 -> 3.285 ( +0.09%) [ +0.12% +0.00% +0.15% / +0.09% +0.30% +0.40%] index_select perm : Elapsed 0.033 ms (3.286 ms / 100) 3.291 -> 3.288 ( -0.09%) [ +0.00% +0.03% +0.09% / +0.18% -0.09% -0.06%] index_select perm_sorted : Elapsed 0.033 ms (3.291 ms / 100) out_shape = [1000, 15] in_shape = [2048, 15] idx_dim = 0 B = [1000, 15] (stride (15, 1)) dim = 0 fill_cnt = 2048 1.791 -> 1.779 ( -0.67%) [ +0.34% +0.39% +0.00% / -0.34% -0.67% -0.50%] index_fill_ const : Elapsed 0.018 ms (1.797 ms / 100) 1.815 -> 1.800 ( -0.83%) [ +0.11% +0.17% +0.00% / -0.77% -0.83% -0.83%] index_fill_ linear : Elapsed 0.018 ms (1.817 ms / 100) 1.797 -> 1.782 ( -0.83%) [ +0.28% +0.17% +0.00% / -0.83% -0.78% -0.50%] index_fill_ reverse : Elapsed 0.018 ms (1.802 ms / 100) 1.794 -> 1.779 ( -0.84%) [ +0.33% +0.00% +0.28% / -0.61% -0.84% -0.84%] index_fill_ skip64 : Elapsed 0.018 ms (1.800 ms / 100) 1.789 -> 1.777 ( -0.67%) [ +0.39% +0.00% +0.11% / -0.67% -0.45% -0.39%] index_fill_ skip256 : Elapsed 0.018 ms (1.796 ms / 100) 1.800 -> 1.789 ( -0.61%) [ +0.17% +0.06% +0.00% / -0.39% -0.33% -0.61%] index_fill_ spread : Elapsed 0.018 ms (1.803 ms / 100) 1.812 -> 1.792 ( -1.10%) [ +0.39% +0.17% +0.00% / -0.50% -1.10% -0.88%] index_fill_ strided 3 : Elapsed 0.018 ms (1.819 ms / 100) 1.806 -> 1.794 ( -0.66%) [ +0.22% +0.17% +0.00% / -0.44% -0.66% -0.66%] index_fill_ strided 5 : Elapsed 0.018 ms (1.810 ms / 100) 1.809 -> 1.798 ( -0.61%) [ +0.55% +0.00% +0.33% / -0.61% -0.17% +0.06%] index_fill_ strided 7 : Elapsed 0.018 ms (1.819 ms / 100) 1.799 -> 1.793 ( -0.33%) [ +0.00% +0.11% +0.22% / -0.33% -0.28% -0.17%] index_fill_ strided 8 : Elapsed 0.018 ms (1.799 ms / 100) 1.801 -> 1.788 ( -0.72%) [ +0.28% +0.11% +0.00% / -0.44% -0.72% -0.56%] index_fill_ strided 16 : Elapsed 0.018 ms (1.806 ms / 100) 1.802 -> 1.787 ( -0.83%) [ +0.17% +0.00% +0.00% / -0.72% -0.83% -0.72%] index_fill_ strided 64 : Elapsed 0.018 ms (1.805 ms / 100) 1.798 -> 1.782 ( -0.89%) [ +0.39% +0.11% +0.00% / -0.83% -0.67% -0.89%] index_fill_ strided 100 : Elapsed 0.018 ms (1.805 ms / 100) 1.812 -> 1.797 ( -0.83%) [ +0.11% +0.00% +0.06% / -0.66% -0.83% -0.83%] index_fill_ strided 255 : Elapsed 0.018 ms (1.814 ms / 100) 1.797 -> 1.789 ( -0.45%) [ +0.06% +0.00% +0.11% / -0.45% -0.45% -0.39%] index_fill_ strided 256 : Elapsed 0.018 ms (1.798 ms / 100) 1.817 -> 1.801 ( -0.88%) [ +0.28% +0.11% +0.00% / -0.44% -0.88% -0.72%] index_fill_ strided 257 : Elapsed 0.018 ms (1.822 ms / 100) 1.817 -> 1.800 ( -0.94%) [ +0.39% +0.06% +0.00% / -0.77% -0.94% -0.72%] index_fill_ random : Elapsed 0.018 ms (1.824 ms / 100) 1.804 -> 1.790 ( -0.78%) [ +0.22% +0.06% +0.00% / -0.78% -0.39% -0.72%] index_fill_ random_sorted : Elapsed 0.018 ms (1.808 ms / 100) B = [1000, 15] (stride (15, 1)) A = [2048, 15] (stride (15, 1)) dim = 0 3.249 -> 3.248 ( -0.03%) [ +0.00% +0.03% +0.00% / -0.03% +0.40% +0.28%] index_select const : Elapsed 0.032 ms (3.249 ms / 100) 3.258 -> 3.266 ( +0.25%) [ +0.25% +0.15% +0.00% / +0.25% +0.77% +0.49%] index_select wrap : Elapsed 0.033 ms (3.266 ms / 100) 3.270 -> 3.274 ( +0.12%) [ +0.28% +0.00% +0.21% / +0.15% +0.12% +0.24%] index_select linear : Elapsed 0.033 ms (3.279 ms / 100) 3.264 -> 3.272 ( +0.25%) [ +0.00% +0.00% +0.31% / +0.25% +0.49% +0.43%] index_select reverse : Elapsed 0.033 ms (3.264 ms / 100) 3.244 -> 3.243 ( -0.03%) [ +0.00% +0.09% +0.31% / -0.03% +0.28% +0.18%] index_select skip64 : Elapsed 0.032 ms (3.244 ms / 100) 3.242 -> 3.242 ( +0.00%) [ +0.00% +0.15% +0.15% / +0.00% +0.15% +0.00%] index_select skip256 : Elapsed 0.032 ms (3.242 ms / 100) 3.285 -> 3.296 ( +0.33%) [ +0.18% +0.00% +0.03% / +0.33% +0.49% +0.55%] index_select spread : Elapsed 0.033 ms (3.291 ms / 100) 3.287 -> 3.286 ( -0.03%) [ +0.12% +0.00% +0.12% / +0.21% -0.03% -0.03%] index_select strided 3 : Elapsed 0.033 ms (3.291 ms / 100) 3.299 -> 3.297 ( -0.06%) [ +0.52% +0.00% +0.21% / +0.24% -0.06% +0.03%] index_select strided 5 : Elapsed 0.033 ms (3.316 ms / 100) 3.285 -> 3.286 ( +0.03%) [ +0.06% +0.24% +0.00% / +0.03% +0.30% +0.27%] index_select strided 7 : Elapsed 0.033 ms (3.287 ms / 100) 3.272 -> 3.259 ( -0.40%) [ +0.12% +0.00% +0.28% / +0.00% -0.31% -0.40%] index_select strided 8 : Elapsed 0.033 ms (3.276 ms / 100) 3.250 -> 3.252 ( +0.06%) [ +0.15% +0.15% +0.00% / +0.06% +0.06% +0.34%] index_select strided 16 : Elapsed 0.033 ms (3.255 ms / 100) 3.258 -> 3.258 ( +0.00%) [ +0.03% +0.00% +0.06% / +0.00% +0.21% +0.18%] index_select strided 64 : Elapsed 0.033 ms (3.259 ms / 100) 3.270 -> 3.269 ( -0.03%) [ +0.00% +0.06% +0.00% / +0.00% -0.03% +0.21%] index_select strided 100 : Elapsed 0.033 ms (3.270 ms / 100) 3.290 -> 3.285 ( -0.15%) [ +0.00% +0.03% +0.43% / +0.12% +0.24% -0.15%] index_select strided 255 : Elapsed 0.033 ms (3.290 ms / 100) 3.232 -> 3.240 ( +0.25%) [ +0.00% +0.28% +0.19% / +0.25% +0.43% +0.59%] index_select strided 256 : Elapsed 0.032 ms (3.232 ms / 100) 3.291 -> 3.284 ( -0.21%) [ +0.00% +0.09% +0.09% / +0.00% -0.12% -0.21%] index_select strided 257 : Elapsed 0.033 ms (3.291 ms / 100) 3.277 -> 3.279 ( +0.06%) [ +0.34% +0.00% +0.34% / +0.15% +0.12% +0.06%] index_select random : Elapsed 0.033 ms (3.288 ms / 100) 3.278 -> 3.278 ( +0.00%) [ +0.06% +0.18% +0.00% / +0.15% +0.00% +0.00%] index_select random_sorted : Elapsed 0.033 ms (3.280 ms / 100) 3.286 -> 3.285 ( -0.03%) [ +0.18% +0.24% +0.00% / +0.24% -0.03% -0.03%] index_select perm : Elapsed 0.033 ms (3.292 ms / 100) 3.283 -> 3.276 ( -0.21%) [ +0.00% +0.27% +0.18% / +0.34% -0.21% -0.03%] index_select perm_sorted : Elapsed 0.033 ms (3.283 ms / 100) B = [1000, 15] (stride (15, 1)) A = [2048, 15] (stride (1, 2048)) dim = 0 3.245 -> 3.245 ( +0.00%) [ +0.00% +0.22% +0.03% / +0.22% +0.18% +0.00%] index_select const : Elapsed 0.032 ms (3.245 ms / 100) 3.300 -> 3.305 ( +0.15%) [ +0.15% +0.00% +0.06% / +0.15% +0.21% +0.27%] index_select wrap : Elapsed 0.033 ms (3.305 ms / 100) 3.286 -> 3.287 ( +0.03%) [ +0.09% +0.00% +0.06% / +0.27% +0.03% +0.03%] index_select linear : Elapsed 0.033 ms (3.289 ms / 100) 3.303 -> 3.291 ( -0.36%) [ +0.06% +0.06% +0.00% / +0.09% -0.21% -0.36%] index_select reverse : Elapsed 0.033 ms (3.305 ms / 100) 3.257 -> 3.262 ( +0.15%) [ +0.09% +0.00% +0.34% / +0.15% +0.34% +0.52%] index_select skip64 : Elapsed 0.033 ms (3.260 ms / 100) 3.265 -> 3.260 ( -0.15%) [ +0.34% +0.09% +0.00% / +0.34% -0.06% -0.15%] index_select skip256 : Elapsed 0.033 ms (3.276 ms / 100) 3.313 -> 3.315 ( +0.06%) [ +0.03% +0.06% +0.00% / +0.06% +0.24% +0.36%] index_select spread : Elapsed 0.033 ms (3.314 ms / 100) 3.324 -> 3.330 ( +0.18%) [ +0.00% +0.24% +0.12% / +0.18% +0.54% +0.45%] index_select strided 3 : Elapsed 0.033 ms (3.324 ms / 100) 3.310 -> 3.312 ( +0.06%) [ +0.00% +0.27% +0.39% / +0.12% +0.06% +0.48%] index_select strided 5 : Elapsed 0.033 ms (3.310 ms / 100) 3.318 -> 3.325 ( +0.21%) [ +0.00% +0.21% +0.09% / +0.21% +0.84% +0.66%] index_select strided 7 : Elapsed 0.033 ms (3.318 ms / 100) 3.310 -> 3.309 ( -0.03%) [ +0.00% +0.18% +0.09% / -0.03% +0.39% +0.30%] index_select strided 8 : Elapsed 0.033 ms (3.310 ms / 100) 3.305 -> 3.310 ( +0.15%) [ +0.18% +0.03% +0.00% / +0.30% +0.15% +0.61%] index_select strided 16 : Elapsed 0.033 ms (3.311 ms / 100) 3.271 -> 3.272 ( +0.03%) [ +0.00% +0.09% +0.43% / +0.18% +0.03% +0.15%] index_select strided 64 : Elapsed 0.033 ms (3.271 ms / 100) 3.334 -> 3.334 ( +0.00%) [ +0.09% +0.00% +0.09% / +0.00% +0.54% +0.63%] index_select strided 100 : Elapsed 0.033 ms (3.337 ms / 100) 3.330 -> 3.324 ( -0.18%) [ +0.18% +0.00% +0.12% / -0.18% +0.21% +0.27%] index_select strided 255 : Elapsed 0.033 ms (3.336 ms / 100) 3.258 -> 3.260 ( +0.06%) [ +0.00% +0.09% +0.06% / +0.06% +0.18% +0.31%] index_select strided 256 : Elapsed 0.033 ms (3.258 ms / 100) 3.345 -> 3.345 ( +0.00%) [ +0.21% +0.00% +0.15% / +0.00% +0.30% +0.06%] index_select strided 257 : Elapsed 0.034 ms (3.352 ms / 100) 3.326 -> 3.333 ( +0.21%) [ +0.00% +0.12% +0.27% / +0.21% +0.39% +0.30%] index_select random : Elapsed 0.033 ms (3.326 ms / 100) 3.320 -> 3.331 ( +0.33%) [ +0.24% +0.00% +0.30% / +0.33% +0.51% +0.66%] index_select random_sorted : Elapsed 0.033 ms (3.328 ms / 100) 3.334 -> 3.328 ( -0.18%) [ +0.09% +0.00% +0.03% / -0.18% -0.12% +0.24%] index_select perm : Elapsed 0.033 ms (3.337 ms / 100) 3.327 -> 3.330 ( +0.09%) [ +0.06% +0.18% +0.00% / +0.12% +0.15% +0.09%] index_select perm_sorted : Elapsed 0.033 ms (3.329 ms / 100) B = [1000, 15] (stride (1, 1000)) dim = 0 fill_cnt = 2048 Good 2.010 -> 1.757 (-12.59%) [ +0.00% +0.15% +0.60% / -12.59% -11.99% -11.99%] index_fill_ const : Elapsed 0.020 ms (2.010 ms / 100) good 1.956 -> 1.769 ( -9.56%) [ +0.26% +0.00% +0.31% / -9.56% -9.00% -9.10%] index_fill_ linear : Elapsed 0.020 ms (1.961 ms / 100) good 1.927 -> 1.773 ( -7.99%) [ +0.26% +0.00% +0.16% / -7.89% -7.99% -7.84%] index_fill_ reverse : Elapsed 0.019 ms (1.932 ms / 100) good 1.936 -> 1.760 ( -9.09%) [ +0.00% +0.15% +0.31% / -8.73% -9.09% -8.83%] index_fill_ skip64 : Elapsed 0.019 ms (1.936 ms / 100) Good 2.043 -> 1.758 (-13.95%) [ +0.34% +0.20% +0.00% / -13.61% -13.95% -13.61%] index_fill_ skip256 : Elapsed 0.021 ms (2.050 ms / 100) good 1.886 -> 1.773 ( -5.99%) [ +0.16% +0.11% +0.00% / -5.83% -5.83% -5.99%] index_fill_ spread : Elapsed 0.019 ms (1.889 ms / 100) good 1.889 -> 1.786 ( -5.45%) [ +0.58% +0.00% +0.37% / -5.45% -5.40% -5.03%] index_fill_ strided 3 : Elapsed 0.019 ms (1.900 ms / 100) good 1.919 -> 1.812 ( -5.58%) [ +0.21% +0.00% +0.00% / -5.58% -5.00% -5.26%] index_fill_ strided 5 : Elapsed 0.019 ms (1.923 ms / 100) 1.894 -> 1.809 ( -4.49%) [ +0.32% +0.37% +0.00% / -4.44% -4.49% -4.28%] index_fill_ strided 7 : Elapsed 0.019 ms (1.900 ms / 100) 1.913 -> 1.845 ( -3.55%) [ +0.21% +0.00% +0.05% / -3.55% -3.24% -3.08%] index_fill_ strided 8 : Elapsed 0.019 ms (1.917 ms / 100) 1.914 -> 1.855 ( -3.08%) [ +0.42% +0.37% +0.00% / -2.82% -2.72% -3.08%] index_fill_ strided 16 : Elapsed 0.019 ms (1.922 ms / 100) 1.909 -> 1.885 ( -1.26%) [ +0.21% +0.00% +0.00% / -1.00% -1.26% -1.10%] index_fill_ strided 64 : Elapsed 0.019 ms (1.913 ms / 100) 1.889 -> 1.805 ( -4.45%) [ +0.21% +0.11% +0.00% / -4.45% -4.13% -3.92%] index_fill_ strided 100 : Elapsed 0.019 ms (1.893 ms / 100) 1.921 -> 1.894 ( -1.41%) [ +0.21% +0.00% +0.10% / -1.41% -1.30% -1.35%] index_fill_ strided 255 : Elapsed 0.019 ms (1.925 ms / 100) 1.910 -> 1.889 ( -1.10%) [ +0.31% +0.05% +0.00% / -0.84% -1.10% -1.10%] index_fill_ strided 256 : Elapsed 0.019 ms (1.916 ms / 100) 1.894 -> 1.861 ( -1.74%) [ +0.05% +0.16% +0.00% / -1.53% -1.74% -1.58%] index_fill_ strided 257 : Elapsed 0.019 ms (1.895 ms / 100) 1.916 -> 1.875 ( -2.14%) [ +0.00% +0.31% +0.21% / -2.14% -1.72% -2.14%] index_fill_ random : Elapsed 0.019 ms (1.916 ms / 100) good 1.910 -> 1.787 ( -6.44%) [ +0.26% +0.16% +0.00% / -5.92% -6.44% -6.44%] index_fill_ random_sorted : Elapsed 0.019 ms (1.915 ms / 100) B = [1000, 15] (stride (1, 1000)) A = [2048, 15] (stride (15, 1)) dim = 0 3.274 -> 3.245 ( -0.89%) [ +0.06% +0.12% +0.00% / -0.89% -0.64% -0.64%] index_select const : Elapsed 0.033 ms (3.276 ms / 100) 3.281 -> 3.278 ( -0.09%) [ +0.09% +0.21% +0.00% / -0.09% +0.12% +0.06%] index_select wrap : Elapsed 0.033 ms (3.284 ms / 100) 3.294 -> 3.278 ( -0.49%) [ +0.00% +0.00% +0.12% / -0.30% -0.39% -0.49%] index_select linear : Elapsed 0.033 ms (3.294 ms / 100) 3.288 -> 3.274 ( -0.43%) [ +0.00% +0.03% +0.09% / -0.43% -0.24% -0.21%] index_select reverse : Elapsed 0.033 ms (3.288 ms / 100) 3.264 -> 3.250 ( -0.43%) [ +0.15% +0.00% +0.25% / -0.43% -0.09% +0.03%] index_select skip64 : Elapsed 0.033 ms (3.269 ms / 100) 3.261 -> 3.237 ( -0.74%) [ +0.03% +0.28% +0.00% / -0.71% -0.67% -0.74%] index_select skip256 : Elapsed 0.033 ms (3.262 ms / 100) 3.312 -> 3.312 ( +0.00%) [ +0.27% +0.00% +0.06% / +0.00% +0.06% +0.00%] index_select spread : Elapsed 0.033 ms (3.321 ms / 100) 3.307 -> 3.305 ( -0.06%) [ +0.00% +0.06% +0.21% / +0.33% +0.03% -0.06%] index_select strided 3 : Elapsed 0.033 ms (3.307 ms / 100) 3.322 -> 3.310 ( -0.36%) [ +0.00% +0.06% +0.03% / +0.18% -0.36% -0.15%] index_select strided 5 : Elapsed 0.033 ms (3.322 ms / 100) 3.307 -> 3.309 ( +0.06%) [ +0.00% +0.03% +0.06% / +0.12% +0.06% +0.36%] index_select strided 7 : Elapsed 0.033 ms (3.307 ms / 100) 3.295 -> 3.269 ( -0.79%) [ +0.09% +0.06% +0.00% / -0.18% -0.79% -0.61%] index_select strided 8 : Elapsed 0.033 ms (3.298 ms / 100) 3.272 -> 3.266 ( -0.18%) [ +0.06% +0.06% +0.00% / -0.18% -0.18% +0.06%] index_select strided 16 : Elapsed 0.033 ms (3.274 ms / 100) 3.282 -> 3.261 ( -0.64%) [ +0.12% +0.00% +0.06% / -0.64% -0.46% -0.43%] index_select strided 64 : Elapsed 0.033 ms (3.286 ms / 100) 3.291 -> 3.295 ( +0.12%) [ +0.24% +0.00% +0.09% / +0.12% +0.24% +0.24%] index_select strided 100 : Elapsed 0.033 ms (3.299 ms / 100) 3.315 -> 3.297 ( -0.54%) [ +0.03% +0.00% +0.06% / -0.24% -0.54% -0.45%] index_select strided 255 : Elapsed 0.033 ms (3.316 ms / 100) 3.258 -> 3.237 ( -0.64%) [ +0.21% +0.21% +0.00% / -0.64% -0.18% -0.40%] index_select strided 256 : Elapsed 0.033 ms (3.265 ms / 100) 3.310 -> 3.300 ( -0.30%) [ +0.00% +0.12% +0.15% / -0.06% -0.30% -0.18%] index_select strided 257 : Elapsed 0.033 ms (3.310 ms / 100) 3.304 -> 3.299 ( -0.15%) [ +0.09% +0.09% +0.00% / +0.09% +0.12% -0.15%] index_select random : Elapsed 0.033 ms (3.307 ms / 100) 3.301 -> 3.289 ( -0.36%) [ +0.00% +0.27% +0.09% / -0.18% -0.36% -0.30%] index_select random_sorted : Elapsed 0.033 ms (3.301 ms / 100) 3.312 -> 3.311 ( -0.03%) [ +0.30% +0.00% +0.09% / +0.27% -0.03% +0.03%] index_select perm : Elapsed 0.033 ms (3.322 ms / 100) 3.305 -> 3.289 ( -0.48%) [ +0.42% +0.15% +0.00% / +0.21% -0.48% -0.45%] index_select perm_sorted : Elapsed 0.033 ms (3.319 ms / 100) B = [1000, 15] (stride (1, 1000)) A = [2048, 15] (stride (1, 2048)) dim = 0 3.260 -> 3.238 ( -0.67%) [ +0.21% +0.18% +0.00% / -0.67% -0.64% -0.43%] index_select const : Elapsed 0.033 ms (3.267 ms / 100) 3.318 -> 3.280 ( -1.15%) [ +0.03% +0.00% +0.09% / -1.12% -1.05% -1.15%] index_select wrap : Elapsed 0.033 ms (3.319 ms / 100) 3.309 -> 3.262 ( -1.42%) [ +0.09% +0.00% +0.15% / -1.00% -1.42% -1.24%] index_select linear : Elapsed 0.033 ms (3.312 ms / 100) 3.317 -> 3.272 ( -1.36%) [ +0.09% +0.09% +0.00% / -0.45% -1.36% -1.24%] index_select reverse : Elapsed 0.033 ms (3.320 ms / 100) 3.270 -> 3.244 ( -0.80%) [ +0.00% +0.12% +0.09% / -0.80% -0.37% -0.52%] index_select skip64 : Elapsed 0.033 ms (3.270 ms / 100) 3.276 -> 3.250 ( -0.79%) [ +0.40% +0.00% +0.46% / -0.24% -0.76% -0.79%] index_select skip256 : Elapsed 0.033 ms (3.289 ms / 100) 3.332 -> 3.290 ( -1.26%) [ +0.09% +0.21% +0.00% / -1.26% -1.14% -0.99%] index_select spread : Elapsed 0.033 ms (3.335 ms / 100) 3.352 -> 3.307 ( -1.34%) [ +0.00% +0.00% +0.09% / -1.34% -0.87% -0.72%] index_select strided 3 : Elapsed 0.034 ms (3.352 ms / 100) 3.330 -> 3.293 ( -1.11%) [ +0.00% +0.15% +0.27% / -1.11% -0.90% -0.81%] index_select strided 5 : Elapsed 0.033 ms (3.330 ms / 100) 3.340 -> 3.309 ( -0.93%) [ +0.00% +0.27% +0.33% / -0.93% -0.54% -0.27%] index_select strided 7 : Elapsed 0.033 ms (3.340 ms / 100) 3.326 -> 3.293 ( -0.99%) [ +0.00% +0.27% +0.27% / -0.90% -0.81% -0.99%] index_select strided 8 : Elapsed 0.033 ms (3.326 ms / 100) 3.317 -> 3.296 ( -0.63%) [ +0.00% +0.15% +0.33% / -0.63% -0.51% -0.39%] index_select strided 16 : Elapsed 0.033 ms (3.317 ms / 100) 3.299 -> 3.264 ( -1.06%) [ +0.15% +0.00% +0.00% / -0.88% -1.06% -0.97%] index_select strided 64 : Elapsed 0.033 ms (3.304 ms / 100) 3.357 -> 3.333 ( -0.71%) [ +0.00% +0.21% +0.24% / -0.71% -0.36% -0.45%] index_select strided 100 : Elapsed 0.034 ms (3.357 ms / 100) 3.352 -> 3.303 ( -1.46%) [ +0.24% +0.15% +0.00% / -1.43% -1.40% -1.46%] index_select strided 255 : Elapsed 0.034 ms (3.360 ms / 100) 3.275 -> 3.245 ( -0.92%) [ +0.24% +0.15% +0.00% / -0.92% -0.61% -0.67%] index_select strided 256 : Elapsed 0.033 ms (3.283 ms / 100) 3.366 -> 3.310 ( -1.66%) [ +0.00% +0.15% +0.06% / -1.66% -1.49% -1.22%] index_select strided 257 : Elapsed 0.034 ms (3.366 ms / 100) 3.348 -> 3.316 ( -0.96%) [ +0.03% +0.00% +0.18% / -0.81% -0.87% -0.96%] index_select random : Elapsed 0.033 ms (3.349 ms / 100) 3.345 -> 3.303 ( -1.26%) [ +0.06% +0.03% +0.00% / -1.23% -1.26% -1.14%] index_select random_sorted : Elapsed 0.033 ms (3.347 ms / 100) 3.346 -> 3.322 ( -0.72%) [ +0.00% +0.06% +0.30% / -0.72% -0.72% -0.72%] index_select perm : Elapsed 0.033 ms (3.346 ms / 100) 3.348 -> 3.304 ( -1.31%) [ +0.06% +0.00% +0.21% / -1.14% -1.08% -1.31%] index_select perm_sorted : Elapsed 0.033 ms (3.350 ms / 100) out_shape = [2048, 1000] in_shape = [2048, 15] idx_dim = 1 B = [2048, 1000] (stride (1000, 1)) dim = 1 fill_cnt = 15 0.961 -> 0.971 ( +1.04%) [ +3.95% +1.25% +0.00% / +1.04% +3.02% +4.27%] index_fill_ const : Elapsed 0.010 ms (0.999 ms / 100) 0.968 -> 0.977 ( +0.93%) [ +1.55% +0.41% +0.00% / +0.93% +1.03% +1.55%] index_fill_ linear : Elapsed 0.010 ms (0.983 ms / 100) 0.968 -> 0.978 ( +1.03%) [ +0.93% +0.00% +0.21% / +1.03% +1.03% +1.03%] index_fill_ reverse : Elapsed 0.010 ms (0.977 ms / 100) 0.966 -> 0.974 ( +0.83%) [ +0.93% +0.00% +0.10% / +0.83% +1.66% +1.76%] index_fill_ skip64 : Elapsed 0.010 ms (0.975 ms / 100) 0.965 -> 0.976 ( +1.14%) [ +1.24% +0.31% +0.00% / +1.14% +1.87% +1.76%] index_fill_ skip256 : Elapsed 0.010 ms (0.977 ms / 100) 2.170 -> 2.180 ( +0.46%) [ +0.51% +0.00% +0.23% / +0.46% +0.78% +0.46%] index_fill_ spread : Elapsed 0.022 ms (2.181 ms / 100) 0.970 -> 0.977 ( +0.72%) [ +1.13% +0.10% +0.00% / +1.24% +0.72% +0.93%] index_fill_ strided 3 : Elapsed 0.010 ms (0.981 ms / 100) 1.008 -> 1.015 ( +0.69%) [ +1.39% +0.00% +0.20% / +0.69% +2.68% +2.98%] index_fill_ strided 5 : Elapsed 0.010 ms (1.022 ms / 100) 1.204 -> 1.154 ( -4.15%) [ +0.91% +0.00% +0.08% / +0.58% -4.15% -4.07%] index_fill_ strided 7 : Elapsed 0.012 ms (1.215 ms / 100) good 1.333 -> 1.247 ( -6.45%) [ +0.23% +0.00% +0.08% / +0.08% -5.78% -6.45%] index_fill_ strided 8 : Elapsed 0.013 ms (1.336 ms / 100) 1.618 -> 1.591 ( -1.67%) [ +0.00% +0.12% +0.06% / +0.19% -1.67% -1.61%] index_fill_ strided 16 : Elapsed 0.016 ms (1.618 ms / 100) 2.169 -> 2.174 ( +0.23%) [ +0.28% +0.00% +0.09% / +0.23% +0.74% +0.65%] index_fill_ strided 64 : Elapsed 0.022 ms (2.175 ms / 100) 2.203 -> 2.187 ( -0.73%) [ +0.27% +0.00% +0.09% / +0.23% -0.50% -0.73%] index_fill_ strided 100 : Elapsed 0.022 ms (2.209 ms / 100) 2.228 -> 2.230 ( +0.09%) [ +0.09% +0.09% +0.00% / +0.09% +0.40% +0.49%] index_fill_ strided 255 : Elapsed 0.022 ms (2.230 ms / 100) 2.295 -> 2.292 ( -0.13%) [ +0.09% +0.00% +0.26% / -0.13% -0.04% -0.09%] index_fill_ strided 256 : Elapsed 0.023 ms (2.297 ms / 100) 2.334 -> 2.340 ( +0.26%) [ +0.13% +0.00% +0.09% / +0.26% +0.30% +0.30%] index_fill_ strided 257 : Elapsed 0.023 ms (2.337 ms / 100) 2.314 -> 2.316 ( +0.09%) [ +0.04% +0.35% +0.00% / +0.09% +0.69% +0.65%] index_fill_ random : Elapsed 0.023 ms (2.315 ms / 100) 2.087 -> 2.085 ( -0.10%) [ +0.14% +0.00% +0.10% / +0.29% -0.05% -0.10%] index_fill_ random_sorted : Elapsed 0.021 ms (2.090 ms / 100) 2.236 -> 2.241 ( +0.22%) [ +0.04% +0.09% +0.00% / +0.22% +0.36% +0.31%] index_fill_ perm : Elapsed 0.022 ms (2.237 ms / 100) 1.933 -> 1.930 ( -0.16%) [ +0.00% +0.10% +0.00% / -0.16% +0.41% +0.47%] index_fill_ perm_sorted : Elapsed 0.019 ms (1.933 ms / 100) B = [2048, 1000] (stride (1000, 1)) A = [2048, 15] (stride (15, 1)) dim = 1 1.262 -> 1.270 ( +0.63%) [ +1.03% +0.24% +0.00% / +0.63% +1.51% +1.35%] index_add_ linear : Elapsed 0.013 ms (1.275 ms / 100) 1.258 -> 1.268 ( +0.79%) [ +1.03% +0.08% +0.00% / +0.79% +1.59% +1.51%] index_copy_ linear : Elapsed 0.013 ms (1.271 ms / 100) 1.244 -> 1.256 ( +0.96%) [ +1.53% +0.64% +0.00% / +0.96% +2.65% +2.57%] index_add_ reverse : Elapsed 0.013 ms (1.263 ms / 100) 1.250 -> 1.260 ( +0.80%) [ +0.96% +1.04% +0.00% / +0.80% +1.92% +2.56%] index_copy_ reverse : Elapsed 0.013 ms (1.262 ms / 100) 4.406 -> 4.419 ( +0.30%) [ +0.23% +0.00% +0.14% / +0.30% +0.95% +1.00%] index_add_ spread : Elapsed 0.044 ms (4.416 ms / 100) 2.411 -> 2.409 ( -0.08%) [ +0.41% +0.00% +0.29% / +0.33% +0.08% -0.08%] index_copy_ spread : Elapsed 0.024 ms (2.421 ms / 100) 1.382 -> 1.386 ( +0.29%) [ +0.65% +0.00% +0.14% / +0.29% +1.01% +1.16%] index_add_ strided 3 : Elapsed 0.014 ms (1.391 ms / 100) 1.349 -> 1.357 ( +0.59%) [ +0.82% +0.07% +0.00% / +0.67% +0.59% +0.59%] index_copy_ strided 3 : Elapsed 0.014 ms (1.360 ms / 100) 1.796 -> 1.804 ( +0.45%) [ +0.11% +0.06% +0.00% / +0.45% +5.12% +5.57%] index_add_ strided 7 : Elapsed 0.018 ms (1.798 ms / 100) 1.550 -> 1.564 ( +0.90%) [ +1.29% +0.52% +0.00% / +0.90% +2.39% +2.45%] index_copy_ strided 7 : Elapsed 0.016 ms (1.570 ms / 100) 4.717 -> 4.712 ( -0.11%) [ +0.11% +0.00% +0.08% / -0.11% +0.21% +0.32%] index_add_ strided 257 : Elapsed 0.047 ms (4.722 ms / 100) 2.606 -> 2.619 ( +0.50%) [ +0.46% +0.00% +0.12% / +0.50% +0.54% +0.50%] index_copy_ strided 257 : Elapsed 0.026 ms (2.618 ms / 100) 4.564 -> 4.549 ( -0.33%) [ +0.02% +0.00% +0.00% / -0.11% -0.33% -0.09%] index_add_ perm : Elapsed 0.046 ms (4.565 ms / 100) 2.498 -> 2.484 ( -0.56%) [ +0.28% +0.00% +0.32% / -0.04% -0.08% -0.56%] index_copy_ perm : Elapsed 0.025 ms (2.505 ms / 100) 4.158 -> 4.164 ( +0.14%) [ +0.22% +0.00% +0.24% / +0.36% +0.14% +0.29%] index_add_ perm_sorted : Elapsed 0.042 ms (4.167 ms / 100) 2.289 -> 2.287 ( -0.09%) [ +0.00% +0.26% +0.39% / +0.13% -0.09% -0.04%] index_copy_ perm_sorted : Elapsed 0.023 ms (2.289 ms / 100) GOOD 25.576 -> 13.570 (-46.94%) [ +0.00% +0.20% +0.00% / -46.92% -46.93% -46.94%] index_select const : Elapsed 0.256 ms (25.577 ms / 100) GOOD 26.067 -> 13.597 (-47.84%) [ +0.05% +0.00% +0.16% / -47.84% -47.81% -47.81%] index_select wrap : Elapsed 0.261 ms (26.080 ms / 100) GOOD 25.608 -> 13.750 (-46.31%) [ +0.00% +0.03% +0.27% / -46.31% -46.27% -46.26%] index_select linear : Elapsed 0.256 ms (25.608 ms / 100) GOOD 26.173 -> 13.767 (-47.40%) [ +1.10% +2.01% +0.00% / -47.37% -47.40% -47.34%] index_select reverse : Elapsed 0.265 ms (26.460 ms / 100) GOOD 25.746 -> 13.543 (-47.40%) [ +0.37% +0.00% +0.16% / -47.31% -47.34% -47.40%] index_select skip64 : Elapsed 0.258 ms (25.841 ms / 100) GOOD 25.955 -> 13.578 (-47.69%) [ +0.15% +0.14% +0.00% / -47.69% -47.59% -47.64%] index_select skip256 : Elapsed 0.260 ms (25.994 ms / 100) GOOD 25.785 -> 13.768 (-46.60%) [ +0.10% +0.10% +0.00% / -46.60% -46.56% -46.60%] index_select spread : Elapsed 0.258 ms (25.812 ms / 100) GOOD 26.088 -> 13.593 (-47.90%) [ +0.40% +0.00% +0.33% / -47.90% -47.73% -47.77%] index_select strided 3 : Elapsed 0.262 ms (26.192 ms / 100) GOOD 25.588 -> 13.580 (-46.93%) [ +0.44% +0.29% +0.00% / -46.93% -46.85% -46.90%] index_select strided 5 : Elapsed 0.257 ms (25.701 ms / 100) GOOD 26.181 -> 13.593 (-48.08%) [ +1.00% +0.15% +0.00% / -48.07% -48.08% -48.05%] index_select strided 7 : Elapsed 0.264 ms (26.443 ms / 100) GOOD 25.581 -> 13.567 (-46.96%) [ +0.09% +0.00% +0.19% / -46.96% -46.83% -46.87%] index_select strided 8 : Elapsed 0.256 ms (25.605 ms / 100) GOOD 26.192 -> 13.612 (-48.03%) [ +0.11% +0.00% +0.02% / -48.03% -48.00% -48.00%] index_select random : Elapsed 0.262 ms (26.220 ms / 100) GOOD 25.620 -> 13.744 (-46.35%) [ +0.25% +0.16% +0.00% / -46.35% -46.20% -46.23%] index_select random_sorted : Elapsed 0.257 ms (25.684 ms / 100) B = [2048, 1000] (stride (1000, 1)) A = [2048, 15] (stride (1, 2048)) dim = 1 1.184 -> 1.190 ( +0.51%) [ +0.59% +0.00% +0.00% / +0.51% +5.91% +5.74%] index_add_ linear : Elapsed 0.012 ms (1.191 ms / 100) 1.180 -> 1.189 ( +0.76%) [ +0.76% +0.00% +0.76% / +0.76% +2.63% +2.54%] index_copy_ linear : Elapsed 0.012 ms (1.189 ms / 100) 1.191 -> 1.195 ( +0.34%) [ +0.84% +0.25% +0.00% / +0.34% +6.05% +4.37%] index_add_ reverse : Elapsed 0.012 ms (1.201 ms / 100) 1.188 -> 1.194 ( +0.51%) [ +0.67% +0.00% +0.59% / +0.51% +1.43% +1.52%] index_copy_ reverse : Elapsed 0.012 ms (1.196 ms / 100) 4.427 -> 4.437 ( +0.23%) [ +0.00% +0.29% +0.20% / +0.23% +1.29% +1.40%] index_add_ spread : Elapsed 0.044 ms (4.427 ms / 100) 2.673 -> 2.688 ( +0.56%) [ +0.45% +0.15% +0.00% / +0.56% +0.75% +0.75%] index_copy_ spread : Elapsed 0.027 ms (2.685 ms / 100) 1.355 -> 1.356 ( +0.07%) [ +0.15% +0.07% +0.00% / +0.07% +5.98% +6.64%] index_add_ strided 3 : Elapsed 0.014 ms (1.357 ms / 100) 1.289 -> 1.296 ( +0.54%) [ +0.78% +0.31% +0.00% / +0.54% +4.42% +4.19%] index_copy_ strided 3 : Elapsed 0.013 ms (1.299 ms / 100) 1.880 -> 1.888 ( +0.43%) [ +0.53% +0.11% +0.00% / +0.43% +4.52% +4.26%] index_add_ strided 7 : Elapsed 0.019 ms (1.890 ms / 100) 1.576 -> 1.586 ( +0.63%) [ +0.57% +0.00% +0.06% / +0.63% +5.33% +5.65%] index_copy_ strided 7 : Elapsed 0.016 ms (1.585 ms / 100) 4.716 -> 4.714 ( -0.04%) [ +0.13% +0.30% +0.00% / -0.04% +0.45% +0.45%] index_add_ strided 257 : Elapsed 0.047 ms (4.722 ms / 100) 2.824 -> 2.824 ( +0.00%) [ +0.04% +0.11% +0.00% / +0.00% +0.78% +0.81%] index_copy_ strided 257 : Elapsed 0.028 ms (2.825 ms / 100) 4.299 -> 4.287 ( -0.28%) [ +0.16% +0.19% +0.00% / +0.00% -0.28% -0.02%] index_add_ perm : Elapsed 0.043 ms (4.306 ms / 100) 2.594 -> 2.591 ( -0.12%) [ +0.12% +0.23% +0.00% / -0.12% +0.27% +0.35%] index_copy_ perm : Elapsed 0.026 ms (2.597 ms / 100) 3.790 -> 3.782 ( -0.21%) [ +0.21% +0.05% +0.00% / -0.21% +0.74% +0.61%] index_add_ perm_sorted : Elapsed 0.038 ms (3.798 ms / 100) 2.364 -> 2.375 ( +0.47%) [ +0.08% +0.04% +0.00% / +0.47% +0.89% +0.76%] index_copy_ perm_sorted : Elapsed 0.024 ms (2.366 ms / 100) GOOD 22.379 -> 13.395 (-40.14%) [ +0.00% +0.08% +1.32% / -40.07% -40.14% -40.10%] index_select const : Elapsed 0.224 ms (22.379 ms / 100) GOOD 22.663 -> 17.836 (-21.30%) [ +0.30% +0.08% +0.00% / -21.30% -20.90% -21.02%] index_select wrap : Elapsed 0.227 ms (22.730 ms / 100) GOOD 22.632 -> 13.453 (-40.56%) [ +0.00% +0.79% +0.64% / -40.56% -40.39% -40.51%] index_select linear : Elapsed 0.226 ms (22.632 ms / 100) GOOD 22.236 -> 13.433 (-39.59%) [ +0.22% +0.00% +0.27% / -39.54% -39.59% -39.53%] index_select reverse : Elapsed 0.223 ms (22.286 ms / 100) GOOD 22.815 -> 13.379 (-41.36%) [ +0.00% +0.79% +0.54% / -41.08% -41.36% -41.35%] index_select skip64 : Elapsed 0.228 ms (22.815 ms / 100) GOOD 22.433 -> 13.389 (-40.32%) [ +0.16% +0.00% +0.85% / -39.95% -40.29% -40.32%] index_select skip256 : Elapsed 0.225 ms (22.468 ms / 100) GOOD 22.042 -> 13.693 (-37.88%) [ +0.00% +0.21% +0.01% / -37.87% -37.88% -37.86%] index_select spread : Elapsed 0.220 ms (22.042 ms / 100) GOOD 22.354 -> 14.876 (-33.45%) [ +0.00% +1.10% +0.41% / -33.37% -33.45% -33.41%] index_select strided 3 : Elapsed 0.224 ms (22.354 ms / 100) GOOD 22.466 -> 13.656 (-39.21%) [ +0.00% +0.19% +0.16% / -39.21% -39.01% -38.98%] index_select strided 5 : Elapsed 0.225 ms (22.466 ms / 100) GOOD 23.001 -> 17.897 (-22.19%) [ +0.85% +0.00% +0.21% / -22.19% -22.02% -22.18%] index_select strided 7 : Elapsed 0.232 ms (23.197 ms / 100) GOOD 22.784 -> 17.313 (-24.01%) [ +1.10% +0.00% +0.56% / -24.01% -23.53% -23.49%] index_select strided 8 : Elapsed 0.230 ms (23.034 ms / 100) GOOD 22.400 -> 17.102 (-23.65%) [ +0.28% +0.36% +0.00% / -23.65% -23.49% -23.39%] index_select random : Elapsed 0.225 ms (22.463 ms / 100) GOOD 22.255 -> 13.685 (-38.51%) [ +0.33% +0.00% +1.03% / -38.51% -38.45% -38.47%] index_select random_sorted : Elapsed 0.223 ms (22.328 ms / 100) B = [2048, 1000] (stride (1, 2048)) dim = 1 fill_cnt = 15 0.835 -> 0.844 ( +1.08%) [ +1.08% +0.00% +0.12% / +1.08% +2.04% +2.28%] index_fill_ const : Elapsed 0.008 ms (0.844 ms / 100) 0.839 -> 0.846 ( +0.83%) [ +0.95% +0.12% +0.00% / +0.83% +1.43% +1.67%] index_fill_ linear : Elapsed 0.008 ms (0.847 ms / 100) 0.838 -> 0.846 ( +0.95%) [ +1.31% +0.24% +0.00% / +0.95% +1.67% +1.91%] index_fill_ reverse : Elapsed 0.008 ms (0.849 ms / 100) 0.838 -> 0.847 ( +1.07%) [ +1.67% +0.24% +0.00% / +1.07% +1.43% +1.67%] index_fill_ skip64 : Elapsed 0.009 ms (0.852 ms / 100) 0.838 -> 0.847 ( +1.07%) [ +0.95% +0.36% +0.00% / +1.07% +1.67% +1.31%] index_fill_ skip256 : Elapsed 0.008 ms (0.846 ms / 100) 0.842 -> 0.849 ( +0.83%) [ +0.71% +0.36% +0.00% / +0.83% +1.19% +0.95%] index_fill_ spread : Elapsed 0.008 ms (0.848 ms / 100) 0.842 -> 0.849 ( +0.83%) [ +0.83% +0.00% +0.00% / +0.83% +0.83% +1.19%] index_fill_ strided 3 : Elapsed 0.008 ms (0.849 ms / 100) 0.837 -> 0.845 ( +0.96%) [ +0.96% +0.12% +0.00% / +0.96% +1.67% +1.43%] index_fill_ strided 5 : Elapsed 0.008 ms (0.845 ms / 100) 0.838 -> 0.846 ( +0.95%) [ +0.84% +0.12% +0.00% / +0.95% +1.43% +1.67%] index_fill_ strided 7 : Elapsed 0.008 ms (0.845 ms / 100) 0.837 -> 0.845 ( +0.96%) [ +0.96% +0.12% +0.00% / +0.96% +2.87% +1.67%] index_fill_ strided 8 : Elapsed 0.008 ms (0.845 ms / 100) 0.835 -> 0.845 ( +1.20%) [ +1.20% +0.48% +0.00% / +1.20% +1.92% +2.16%] index_fill_ strided 16 : Elapsed 0.008 ms (0.845 ms / 100) 0.838 -> 0.847 ( +1.07%) [ +0.95% +0.24% +0.00% / +1.07% +1.31% +1.43%] index_fill_ strided 64 : Elapsed 0.008 ms (0.846 ms / 100) 0.839 -> 0.847 ( +0.95%) [ +0.83% +0.00% +0.12% / +0.95% +1.07% +3.81%] index_fill_ strided 100 : Elapsed 0.008 ms (0.846 ms / 100) 0.842 -> 0.846 ( +0.48%) [ +0.71% +0.00% +0.00% / +0.95% +0.48% +0.71%] index_fill_ strided 255 : Elapsed 0.008 ms (0.848 ms / 100) 0.839 -> 0.848 ( +1.07%) [ +1.07% +0.36% +0.00% / +1.31% +1.07% +1.19%] index_fill_ strided 256 : Elapsed 0.008 ms (0.848 ms / 100) 0.843 -> 0.845 ( +0.24%) [ +0.83% +0.12% +0.00% / +0.95% +0.24% +0.47%] index_fill_ strided 257 : Elapsed 0.008 ms (0.850 ms / 100) 0.843 -> 0.845 ( +0.24%) [ +0.95% +0.12% +0.00% / +0.83% +0.24% +0.47%] index_fill_ random : Elapsed 0.009 ms (0.851 ms / 100) 0.842 -> 0.846 ( +0.48%) [ +0.83% +0.36% +0.00% / +0.83% +0.59% +0.48%] index_fill_ random_sorted : Elapsed 0.008 ms (0.849 ms / 100) 0.841 -> 0.845 ( +0.48%) [ +0.83% +0.12% +0.00% / +1.19% +0.48% +0.48%] index_fill_ perm : Elapsed 0.008 ms (0.848 ms / 100) 0.841 -> 0.849 ( +0.95%) [ +0.95% +0.24% +0.00% / +0.95% +1.19% +1.19%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.849 ms / 100) B = [2048, 1000] (stride (1, 2048)) A = [2048, 15] (stride (15, 1)) dim = 1 1.227 -> 1.234 ( +0.57%) [ +0.81% +0.33% +0.00% / +1.06% +0.65% +0.57%] index_add_ linear : Elapsed 0.012 ms (1.237 ms / 100) 1.187 -> 1.187 ( +0.00%) [ +0.25% +0.08% +0.00% / +0.25% +0.08% +0.00%] index_copy_ linear : Elapsed 0.012 ms (1.190 ms / 100) 1.233 -> 1.231 ( -0.16%) [ +0.97% +0.08% +0.00% / +0.97% +0.08% -0.16%] index_add_ reverse : Elapsed 0.012 ms (1.245 ms / 100) 1.183 -> 1.188 ( +0.42%) [ +0.76% +0.00% +0.08% / +1.01% +0.42% +0.42%] index_copy_ reverse : Elapsed 0.012 ms (1.192 ms / 100) 1.234 -> 1.235 ( +0.08%) [ +0.81% +0.24% +0.00% / +0.89% +0.08% +1.78%] index_add_ spread : Elapsed 0.012 ms (1.244 ms / 100) 1.187 -> 1.188 ( +0.08%) [ +0.84% +0.08% +0.00% / +0.67% +0.08% +0.17%] index_copy_ spread : Elapsed 0.012 ms (1.197 ms / 100) 1.229 -> 1.235 ( +0.49%) [ +1.22% +0.41% +0.00% / +1.06% +0.57% +0.49%] index_add_ strided 3 : Elapsed 0.012 ms (1.244 ms / 100) 1.185 -> 1.187 ( +0.17%) [ +0.34% +0.17% +0.00% / +0.25% +0.17% +0.25%] index_copy_ strided 3 : Elapsed 0.012 ms (1.189 ms / 100) 1.238 -> 1.243 ( +0.40%) [ +0.73% +0.00% +0.00% / +0.48% +2.75% +0.40%] index_add_ strided 7 : Elapsed 0.012 ms (1.247 ms / 100) 1.190 -> 1.197 ( +0.59%) [ +0.67% +0.08% +0.00% / +0.59% +0.59% +0.76%] index_copy_ strided 7 : Elapsed 0.012 ms (1.198 ms / 100) 1.238 -> 1.237 ( -0.08%) [ +0.73% +0.16% +0.00% / +0.81% -0.08% +0.32%] index_add_ strided 257 : Elapsed 0.012 ms (1.247 ms / 100) 1.191 -> 1.198 ( +0.59%) [ +0.84% +0.08% +0.00% / +0.84% +0.59% +0.59%] index_copy_ strided 257 : Elapsed 0.012 ms (1.201 ms / 100) 1.233 -> 1.233 ( +0.00%) [ +0.81% +0.32% +0.00% / +0.65% +0.08% +0.00%] index_add_ perm : Elapsed 0.012 ms (1.243 ms / 100) 1.185 -> 1.187 ( +0.17%) [ +0.59% +0.17% +0.00% / +0.68% +0.17% +0.25%] index_copy_ perm : Elapsed 0.012 ms (1.192 ms / 100) 1.238 -> 1.228 ( -0.81%) [ +1.05% +0.24% +0.00% / +0.65% -0.81% -0.81%] index_add_ perm_sorted : Elapsed 0.013 ms (1.251 ms / 100) 1.190 -> 1.182 ( -0.67%) [ +1.09% +0.25% +0.00% / +0.76% -0.59% -0.67%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.203 ms / 100) 15.203 -> 15.242 ( +0.26%) [ +0.14% +0.00% +0.03% / +0.34% +0.26% +0.38%] index_select const : Elapsed 0.152 ms (15.225 ms / 100) 15.338 -> 15.319 ( -0.12%) [ +0.04% +0.03% +0.00% / +0.10% -0.08% -0.12%] index_select wrap : Elapsed 0.153 ms (15.344 ms / 100) 15.320 -> 15.302 ( -0.12%) [ +0.14% +0.05% +0.00% / +0.01% -0.12% -0.10%] index_select linear : Elapsed 0.153 ms (15.341 ms / 100) 15.243 -> 15.252 ( +0.06%) [ +0.20% +0.13% +0.00% / +0.06% +0.78% +0.79%] index_select reverse : Elapsed 0.153 ms (15.273 ms / 100) 15.180 -> 15.209 ( +0.19%) [ +0.00% +0.00% +0.03% / +0.19% +0.97% +1.05%] index_select skip64 : Elapsed 0.152 ms (15.180 ms / 100) 15.253 -> 15.282 ( +0.19%) [ +0.09% +0.00% +0.09% / +0.19% +1.04% +1.01%] index_select skip256 : Elapsed 0.153 ms (15.266 ms / 100) 15.172 -> 15.199 ( +0.18%) [ +0.18% +0.06% +0.00% / +0.18% +1.40% +1.34%] index_select spread : Elapsed 0.152 ms (15.200 ms / 100) 15.352 -> 15.366 ( +0.09%) [ +0.09% +0.00% +0.05% / +0.09% +1.05% +0.89%] index_select strided 3 : Elapsed 0.154 ms (15.366 ms / 100) 15.291 -> 15.244 ( -0.31%) [ +0.13% +0.00% +0.16% / +0.19% -0.31% -0.01%] index_select strided 5 : Elapsed 0.153 ms (15.311 ms / 100) 15.374 -> 15.351 ( -0.15%) [ +0.07% +0.01% +0.00% / +0.01% -0.14% -0.15%] index_select strided 7 : Elapsed 0.154 ms (15.384 ms / 100) 15.307 -> 15.298 ( -0.06%) [ +0.20% +0.00% +0.07% / +0.30% -0.06% +0.16%] index_select strided 8 : Elapsed 0.153 ms (15.337 ms / 100) 15.388 -> 15.374 ( -0.09%) [ +0.00% +0.03% +0.08% / +0.01% +0.05% -0.09%] index_select random : Elapsed 0.154 ms (15.388 ms / 100) 15.331 -> 15.318 ( -0.08%) [ +0.28% +0.00% +0.33% / +0.18% -0.07% -0.08%] index_select random_sorted : Elapsed 0.154 ms (15.374 ms / 100) B = [2048, 1000] (stride (1, 2048)) A = [2048, 15] (stride (1, 2048)) dim = 1 1.113 -> 1.121 ( +0.72%) [ +0.81% +0.18% +0.00% / +0.72% +1.44% +1.53%] index_add_ linear : Elapsed 0.011 ms (1.122 ms / 100) 1.057 -> 1.065 ( +0.76%) [ +0.76% +0.09% +0.00% / +0.76% +1.32% +1.23%] index_copy_ linear : Elapsed 0.011 ms (1.065 ms / 100) 1.115 -> 1.123 ( +0.72%) [ +0.99% +0.18% +0.00% / +0.72% +1.17% +0.90%] index_add_ reverse : Elapsed 0.011 ms (1.126 ms / 100) 1.057 -> 1.067 ( +0.95%) [ +0.95% +0.09% +0.00% / +0.95% +1.14% +0.95%] index_copy_ reverse : Elapsed 0.011 ms (1.067 ms / 100) 1.116 -> 1.125 ( +0.81%) [ +0.81% +0.27% +0.00% / +0.81% +0.99% +0.90%] index_add_ spread : Elapsed 0.011 ms (1.125 ms / 100) 1.056 -> 1.068 ( +1.14%) [ +1.04% +0.28% +0.00% / +1.14% +1.14% +1.14%] index_copy_ spread : Elapsed 0.011 ms (1.067 ms / 100) 1.112 -> 1.122 ( +0.90%) [ +0.90% +0.18% +0.00% / +0.90% +1.44% +1.53%] index_add_ strided 3 : Elapsed 0.011 ms (1.122 ms / 100) 1.057 -> 1.066 ( +0.85%) [ +0.66% +0.09% +0.00% / +0.85% +1.32% +1.32%] index_copy_ strided 3 : Elapsed 0.011 ms (1.064 ms / 100) 1.114 -> 1.124 ( +0.90%) [ +0.72% +0.09% +0.00% / +0.90% +1.44% +1.35%] index_add_ strided 7 : Elapsed 0.011 ms (1.122 ms / 100) 1.055 -> 1.069 ( +1.33%) [ +0.95% +0.28% +0.00% / +1.33% +1.33% +1.42%] index_copy_ strided 7 : Elapsed 0.011 ms (1.065 ms / 100) 1.116 -> 1.126 ( +0.90%) [ +0.90% +0.09% +0.00% / +0.99% +0.90% +1.08%] index_add_ strided 257 : Elapsed 0.011 ms (1.126 ms / 100) 1.057 -> 1.068 ( +1.04%) [ +0.85% +0.19% +0.00% / +1.42% +1.04% +1.04%] index_copy_ strided 257 : Elapsed 0.011 ms (1.066 ms / 100) 1.114 -> 1.127 ( +1.17%) [ +0.81% +0.18% +0.00% / +1.44% +1.35% +1.17%] index_add_ perm : Elapsed 0.011 ms (1.123 ms / 100) 1.057 -> 1.069 ( +1.14%) [ +0.95% +0.09% +0.00% / +1.70% +1.23% +1.14%] index_copy_ perm : Elapsed 0.011 ms (1.067 ms / 100) 1.119 -> 1.121 ( +0.18%) [ +0.71% +0.09% +0.00% / +0.71% +0.36% +0.18%] index_add_ perm_sorted : Elapsed 0.011 ms (1.127 ms / 100) 1.059 -> 1.066 ( +0.66%) [ +0.76% +0.00% +0.00% / +1.13% +0.66% +0.66%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.067 ms / 100) 13.518 -> 13.540 ( +0.16%) [ +0.30% +0.00% +0.08% / +0.37% +0.16% +0.34%] index_select const : Elapsed 0.136 ms (13.558 ms / 100) 14.039 -> 14.121 ( +0.58%) [ +0.48% +0.00% +0.14% / +1.03% +0.86% +0.58%] index_select wrap : Elapsed 0.141 ms (14.107 ms / 100) 13.541 -> 13.610 ( +0.51%) [ +0.66% +0.00% +0.29% / +0.86% +0.60% +0.51%] index_select linear : Elapsed 0.136 ms (13.631 ms / 100) 13.612 -> 13.680 ( +0.50%) [ +0.75% +0.00% +0.43% / +0.76% +0.50% +0.65%] index_select reverse : Elapsed 0.137 ms (13.714 ms / 100) 13.473 -> 13.557 ( +0.62%) [ +0.62% +0.00% +0.36% / +0.62% +0.66% +0.82%] index_select skip64 : Elapsed 0.136 ms (13.556 ms / 100) 13.476 -> 13.540 ( +0.47%) [ +0.49% +0.00% +0.05% / +0.57% +0.65% +0.47%] index_select skip256 : Elapsed 0.135 ms (13.542 ms / 100) 13.512 -> 13.581 ( +0.51%) [ +0.63% +0.00% +0.12% / +0.65% +0.58% +0.51%] index_select spread : Elapsed 0.136 ms (13.597 ms / 100) 13.635 -> 13.607 ( -0.21%) [ +0.80% +1.17% +0.00% / +1.66% -0.21% -0.04%] index_select strided 3 : Elapsed 0.137 ms (13.744 ms / 100) 13.519 -> 13.596 ( +0.57%) [ +0.73% +0.39% +0.00% / +0.78% +0.57% +0.58%] index_select strided 5 : Elapsed 0.136 ms (13.618 ms / 100) 14.094 -> 14.176 ( +0.58%) [ +0.73% +0.00% +0.15% / +0.99% +0.58% +0.93%] index_select strided 7 : Elapsed 0.142 ms (14.197 ms / 100) 14.069 -> 14.065 ( -0.03%) [ +0.21% +0.00% +0.21% / +0.33% -0.03% +0.61%] index_select strided 8 : Elapsed 0.141 ms (14.098 ms / 100) 14.063 -> 14.054 ( -0.06%) [ +0.19% +0.08% +0.00% / +0.46% +0.03% -0.06%] index_select random : Elapsed 0.141 ms (14.090 ms / 100) 13.510 -> 13.574 ( +0.47%) [ +0.58% +0.00% +0.14% / +0.58% +0.64% +0.47%] index_select random_sorted : Elapsed 0.136 ms (13.588 ms / 100) out_shape = [2048, 1000] in_shape = [15, 1000] idx_dim = 0 B = [2048, 1000] (stride (1000, 1)) dim = 0 fill_cnt = 15 0.827 -> 0.842 ( +1.81%) [ +1.21% +0.36% +0.00% / +2.18% +1.81% +1.93%] index_fill_ const : Elapsed 0.008 ms (0.837 ms / 100) 0.830 -> 0.840 ( +1.20%) [ +0.96% +0.12% +0.00% / +1.20% +1.57% +1.57%] index_fill_ linear : Elapsed 0.008 ms (0.838 ms / 100) 0.830 -> 0.840 ( +1.20%) [ +0.96% +0.24% +0.00% / +1.20% +1.69% +1.69%] index_fill_ reverse : Elapsed 0.008 ms (0.838 ms / 100) 0.828 -> 0.835 ( +0.85%) [ +0.85% +0.00% +0.12% / +0.85% +1.57% +1.81%] index_fill_ skip64 : Elapsed 0.008 ms (0.835 ms / 100) 0.827 -> 0.836 ( +1.09%) [ +1.33% +0.12% +0.00% / +1.09% +1.93% +1.81%] index_fill_ skip256 : Elapsed 0.008 ms (0.838 ms / 100) 0.830 -> 0.841 ( +1.33%) [ +1.33% +0.24% +0.00% / +1.33% +1.45% +1.57%] index_fill_ spread : Elapsed 0.008 ms (0.841 ms / 100) 0.831 -> 0.842 ( +1.32%) [ +0.96% +0.12% +0.00% / +6.50% +1.32% +1.44%] index_fill_ strided 3 : Elapsed 0.008 ms (0.839 ms / 100) 0.830 -> 0.838 ( +0.96%) [ +1.08% +0.00% +0.00% / +0.96% +1.20% +1.45%] index_fill_ strided 5 : Elapsed 0.008 ms (0.839 ms / 100) 0.830 -> 0.838 ( +0.96%) [ +0.96% +0.12% +0.00% / +0.96% +1.33% +1.20%] index_fill_ strided 7 : Elapsed 0.008 ms (0.838 ms / 100) 0.831 -> 0.842 ( +1.32%) [ +1.08% +0.00% +0.00% / +2.17% +1.44% +1.32%] index_fill_ strided 8 : Elapsed 0.008 ms (0.840 ms / 100) 0.832 -> 0.842 ( +1.20%) [ +0.84% +0.00% +0.00% / +1.32% +1.20% +1.44%] index_fill_ strided 16 : Elapsed 0.008 ms (0.839 ms / 100) 0.829 -> 0.838 ( +1.09%) [ +0.97% +0.12% +0.00% / +1.09% +1.69% +1.57%] index_fill_ strided 64 : Elapsed 0.008 ms (0.837 ms / 100) 0.830 -> 0.839 ( +1.08%) [ +0.72% +0.00% +0.00% / +1.08% +1.33% +1.33%] index_fill_ strided 100 : Elapsed 0.008 ms (0.836 ms / 100) 0.832 -> 0.837 ( +0.60%) [ +0.96% +0.24% +0.00% / +0.96% +0.60% +0.72%] index_fill_ strided 255 : Elapsed 0.008 ms (0.840 ms / 100) 0.832 -> 0.837 ( +0.60%) [ +1.08% +0.24% +0.00% / +1.92% +0.96% +0.60%] index_fill_ strided 256 : Elapsed 0.008 ms (0.841 ms / 100) 0.833 -> 0.839 ( +0.72%) [ +1.08% +0.36% +0.00% / +1.32% +0.72% +0.84%] index_fill_ strided 257 : Elapsed 0.008 ms (0.842 ms / 100) 0.834 -> 0.839 ( +0.60%) [ +1.08% +0.00% +0.00% / +1.68% +0.72% +0.60%] index_fill_ random : Elapsed 0.008 ms (0.843 ms / 100) 0.830 -> 0.839 ( +1.08%) [ +1.20% +0.24% +0.00% / +1.20% +1.08% +1.08%] index_fill_ random_sorted : Elapsed 0.008 ms (0.840 ms / 100) 0.832 -> 0.839 ( +0.84%) [ +1.08% +0.12% +0.00% / +0.96% +0.84% +0.84%] index_fill_ perm : Elapsed 0.008 ms (0.841 ms / 100) 0.832 -> 0.839 ( +0.84%) [ +1.08% +0.36% +0.00% / +2.88% +0.96% +0.84%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.841 ms / 100) B = [2048, 1000] (stride (1000, 1)) A = [15, 1000] (stride (1000, 1)) dim = 0 1.104 -> 1.122 ( +1.63%) [ +1.45% +0.36% +0.00% / +1.63% +1.81% +1.63%] index_add_ linear : Elapsed 0.011 ms (1.120 ms / 100) 1.053 -> 1.059 ( +0.57%) [ +0.85% +0.00% +0.00% / +0.57% +1.33% +1.23%] index_copy_ linear : Elapsed 0.011 ms (1.062 ms / 100) 1.104 -> 1.122 ( +1.63%) [ +1.18% +0.27% +0.00% / +1.72% +1.63% +1.63%] index_add_ reverse : Elapsed 0.011 ms (1.117 ms / 100) 1.049 -> 1.064 ( +1.43%) [ +0.86% +0.00% +0.10% / +1.43% +1.43% +1.62%] index_copy_ reverse : Elapsed 0.011 ms (1.058 ms / 100) 1.107 -> 1.123 ( +1.45%) [ +1.26% +0.45% +0.00% / +1.45% +1.72% +1.72%] index_add_ spread : Elapsed 0.011 ms (1.121 ms / 100) 1.056 -> 1.068 ( +1.14%) [ +0.66% +0.09% +0.00% / +1.14% +1.23% +1.33%] index_copy_ spread : Elapsed 0.011 ms (1.063 ms / 100) 1.107 -> 1.121 ( +1.26%) [ +1.17% +0.36% +0.00% / +1.26% +1.72% +1.54%] index_add_ strided 3 : Elapsed 0.011 ms (1.120 ms / 100) 1.054 -> 1.066 ( +1.14%) [ +0.57% +0.09% +0.00% / +1.14% +1.42% +1.14%] index_copy_ strided 3 : Elapsed 0.011 ms (1.060 ms / 100) 1.103 -> 1.118 ( +1.36%) [ +1.00% +0.36% +0.00% / +1.36% +1.72% +1.45%] index_add_ strided 5 : Elapsed 0.011 ms (1.114 ms / 100) 1.047 -> 1.061 ( +1.34%) [ +1.05% +0.00% +0.10% / +1.34% +1.62% +1.53%] index_copy_ strided 5 : Elapsed 0.011 ms (1.058 ms / 100) 1.110 -> 1.126 ( +1.44%) [ +0.99% +0.27% +0.00% / +1.98% +1.44% +1.44%] index_add_ strided 7 : Elapsed 0.011 ms (1.121 ms / 100) 1.056 -> 1.068 ( +1.14%) [ +0.85% +0.00% +0.09% / +10.89% +1.14% +1.14%] index_copy_ strided 7 : Elapsed 0.011 ms (1.065 ms / 100) 1.106 -> 1.118 ( +1.08%) [ +1.27% +0.18% +0.00% / +1.08% +1.45% +1.36%] index_add_ strided 255 : Elapsed 0.011 ms (1.120 ms / 100) 1.054 -> 1.058 ( +0.38%) [ +0.47% +0.00% +0.00% / +0.38% +1.14% +1.04%] index_copy_ strided 255 : Elapsed 0.011 ms (1.059 ms / 100) 1.110 -> 1.122 ( +1.08%) [ +0.90% +0.18% +0.00% / +1.08% +1.80% +1.44%] index_add_ strided 257 : Elapsed 0.011 ms (1.120 ms / 100) 1.055 -> 1.064 ( +0.85%) [ +0.66% +0.09% +0.00% / +0.85% +1.71% +1.23%] index_copy_ strided 257 : Elapsed 0.011 ms (1.062 ms / 100) 1.104 -> 1.119 ( +1.36%) [ +1.27% +0.36% +0.00% / +1.36% +1.54% +1.63%] index_add_ perm : Elapsed 0.011 ms (1.118 ms / 100) 1.050 -> 1.059 ( +0.86%) [ +0.67% +0.00% +0.00% / +0.86% +1.71% +1.43%] index_copy_ perm : Elapsed 0.011 ms (1.057 ms / 100) 1.110 -> 1.120 ( +0.90%) [ +1.08% +0.00% +0.00% / +0.90% +1.17% +1.08%] index_add_ perm_sorted : Elapsed 0.011 ms (1.122 ms / 100) 1.055 -> 1.058 ( +0.28%) [ +0.38% +0.00% +0.19% / +0.28% +1.04% +0.95%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.059 ms / 100) 13.377 -> 13.434 ( +0.43%) [ +0.69% +0.12% +0.00% / +0.49% +0.58% +0.43%] index_select const : Elapsed 0.135 ms (13.469 ms / 100) 13.550 -> 13.617 ( +0.49%) [ +0.38% +0.10% +0.00% / +0.51% +0.54% +0.49%] index_select wrap : Elapsed 0.136 ms (13.602 ms / 100) 13.401 -> 13.454 ( +0.40%) [ +0.65% +0.00% +0.16% / +0.46% +0.40% +0.40%] index_select linear : Elapsed 0.135 ms (13.488 ms / 100) 13.474 -> 13.516 ( +0.31%) [ +0.46% +0.00% +0.01% / +0.51% +0.33% +0.31%] index_select reverse : Elapsed 0.135 ms (13.536 ms / 100) 13.377 -> 13.419 ( +0.31%) [ +0.55% +0.00% +0.07% / +0.31% +0.58% +0.43%] index_select skip64 : Elapsed 0.135 ms (13.451 ms / 100) 13.372 -> 13.433 ( +0.46%) [ +0.53% +0.00% +0.13% / +0.64% +0.60% +0.46%] index_select skip256 : Elapsed 0.134 ms (13.443 ms / 100) 13.416 -> 13.461 ( +0.34%) [ +0.59% +0.00% +0.13% / +0.60% +0.34% +0.47%] index_select spread : Elapsed 0.135 ms (13.495 ms / 100) 13.430 -> 13.457 ( +0.20%) [ +0.66% +0.00% +0.20% / +0.61% +0.39% +0.20%] index_select strided 3 : Elapsed 0.135 ms (13.519 ms / 100) 13.390 -> 13.451 ( +0.46%) [ +0.43% +0.00% +0.04% / +0.46% +0.67% +0.66%] index_select strided 5 : Elapsed 0.134 ms (13.448 ms / 100) 13.563 -> 13.624 ( +0.45%) [ +0.38% +0.00% +0.10% / +0.52% +0.60% +0.45%] index_select strided 7 : Elapsed 0.136 ms (13.614 ms / 100) 13.564 -> 13.605 ( +0.30%) [ +0.46% +0.00% +0.17% / +0.30% +0.45% +0.41%] index_select strided 8 : Elapsed 0.136 ms (13.626 ms / 100) 13.572 -> 13.617 ( +0.33%) [ +0.63% +0.00% +0.05% / +0.45% +0.33% +0.41%] index_select random : Elapsed 0.137 ms (13.658 ms / 100) 13.428 -> 13.457 ( +0.22%) [ +0.56% +0.07% +0.00% / +0.44% +0.34% +0.22%] index_select random_sorted : Elapsed 0.135 ms (13.503 ms / 100) B = [2048, 1000] (stride (1000, 1)) A = [15, 1000] (stride (1, 15)) dim = 0 1.201 -> 1.214 ( +1.08%) [ +0.92% +0.33% +0.00% / +1.08% +1.08% +1.17%] index_add_ linear : Elapsed 0.012 ms (1.212 ms / 100) 1.154 -> 1.160 ( +0.52%) [ +1.04% +0.35% +0.00% / +1.73% +0.52% +0.69%] index_copy_ linear : Elapsed 0.012 ms (1.166 ms / 100) 1.194 -> 1.209 ( +1.26%) [ +1.17% +0.25% +0.00% / +1.26% +1.76% +1.84%] index_add_ reverse : Elapsed 0.012 ms (1.208 ms / 100) 1.147 -> 1.159 ( +1.05%) [ +0.96% +0.26% +0.00% / +1.05% +1.57% +1.66%] index_copy_ reverse : Elapsed 0.012 ms (1.158 ms / 100) 1.196 -> 1.206 ( +0.84%) [ +1.25% +0.33% +0.00% / +0.84% +1.34% +1.25%] index_add_ spread : Elapsed 0.012 ms (1.211 ms / 100) 1.144 -> 1.154 ( +0.87%) [ +1.05% +0.09% +0.00% / +0.87% +0.96% +1.14%] index_copy_ spread : Elapsed 0.012 ms (1.156 ms / 100) 1.199 -> 1.209 ( +0.83%) [ +1.08% +0.42% +0.00% / +0.83% +1.33% +1.42%] index_add_ strided 3 : Elapsed 0.012 ms (1.212 ms / 100) 1.153 -> 1.159 ( +0.52%) [ +0.87% +0.00% +0.00% / +0.69% +0.78% +0.52%] index_copy_ strided 3 : Elapsed 0.012 ms (1.163 ms / 100) 1.201 -> 1.211 ( +0.83%) [ +0.83% +0.25% +0.00% / +0.83% +1.25% +1.42%] index_add_ strided 5 : Elapsed 0.012 ms (1.211 ms / 100) 1.152 -> 1.162 ( +0.87%) [ +0.87% +0.00% +0.17% / +0.87% +0.87% +1.13%] index_copy_ strided 5 : Elapsed 0.012 ms (1.162 ms / 100) 1.201 -> 1.214 ( +1.08%) [ +1.00% +0.42% +0.00% / +1.08% +1.33% +1.33%] index_add_ strided 7 : Elapsed 0.012 ms (1.213 ms / 100) 1.152 -> 1.166 ( +1.22%) [ +0.95% +0.09% +0.00% / +1.22% +1.22% +1.30%] index_copy_ strided 7 : Elapsed 0.012 ms (1.163 ms / 100) 1.205 -> 1.212 ( +0.58%) [ +1.08% +1.00% +0.00% / +1.16% +0.58% +0.83%] index_add_ strided 255 : Elapsed 0.012 ms (1.218 ms / 100) 1.160 -> 1.160 ( +0.00%) [ +0.60% +0.26% +0.00% / +0.69% +0.09% +0.00%] index_copy_ strided 255 : Elapsed 0.012 ms (1.167 ms / 100) 1.199 -> 1.208 ( +0.75%) [ +0.83% +0.00% +0.00% / +0.83% +1.42% +0.75%] index_add_ strided 257 : Elapsed 0.012 ms (1.209 ms / 100) 1.147 -> 1.152 ( +0.44%) [ +0.61% +0.17% +0.00% / +0.87% +0.52% +0.44%] index_copy_ strided 257 : Elapsed 0.012 ms (1.154 ms / 100) 1.197 -> 1.212 ( +1.25%) [ +1.00% +0.33% +0.00% / +1.67% +1.25% +1.34%] index_add_ perm : Elapsed 0.012 ms (1.209 ms / 100) 1.149 -> 1.158 ( +0.78%) [ +0.87% +0.00% +0.00% / +0.78% +1.22% +1.22%] index_copy_ perm : Elapsed 0.012 ms (1.159 ms / 100) 1.204 -> 1.213 ( +0.75%) [ +0.91% +1.08% +0.00% / +0.75% +1.08% +1.16%] index_add_ perm_sorted : Elapsed 0.012 ms (1.215 ms / 100) 1.156 -> 1.160 ( +0.35%) [ +0.87% +0.09% +0.00% / +1.56% +0.35% +0.61%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.166 ms / 100) 15.023 -> 15.024 ( +0.01%) [ +0.00% +0.07% +0.11% / +0.01% +0.90% +0.85%] index_select const : Elapsed 0.150 ms (15.023 ms / 100) 15.028 -> 15.060 ( +0.21%) [ +0.25% +0.07% +0.00% / +0.21% +1.02% +1.03%] index_select wrap : Elapsed 0.151 ms (15.065 ms / 100) 15.057 -> 15.074 ( +0.11%) [ +0.23% +0.00% +0.19% / +0.11% +0.62% +0.60%] index_select linear : Elapsed 0.151 ms (15.091 ms / 100) 15.032 -> 15.058 ( +0.17%) [ +0.12% +0.17% +0.00% / +0.17% +1.13% +0.99%] index_select reverse : Elapsed 0.151 ms (15.050 ms / 100) 15.031 -> 15.038 ( +0.05%) [ +0.02% +0.01% +0.00% / +0.05% +1.03% +1.04%] index_select skip64 : Elapsed 0.150 ms (15.034 ms / 100) 15.013 -> 15.029 ( +0.11%) [ +0.19% +0.00% +0.11% / +0.11% +0.93% +0.77%] index_select skip256 : Elapsed 0.150 ms (15.041 ms / 100) 15.080 -> 15.104 ( +0.16%) [ +0.15% +0.09% +0.00% / +0.16% +0.92% +0.96%] index_select spread : Elapsed 0.151 ms (15.102 ms / 100) 15.065 -> 15.090 ( +0.17%) [ +0.11% +0.00% +0.09% / +0.17% +1.17% +1.17%] index_select strided 3 : Elapsed 0.151 ms (15.082 ms / 100) 15.023 -> 15.040 ( +0.11%) [ +0.18% +0.03% +0.00% / +0.11% +1.04% +1.13%] index_select strided 5 : Elapsed 0.150 ms (15.050 ms / 100) 15.095 -> 15.115 ( +0.13%) [ +0.07% +0.00% +0.01% / +0.13% +0.78% +0.73%] index_select strided 7 : Elapsed 0.151 ms (15.105 ms / 100) 15.088 -> 15.104 ( +0.11%) [ +0.05% +0.00% +0.08% / +0.11% +0.76% +0.76%] index_select strided 8 : Elapsed 0.151 ms (15.096 ms / 100) 15.118 -> 15.152 ( +0.22%) [ +0.13% +0.00% +0.12% / +0.22% +0.42% +0.47%] index_select random : Elapsed 0.151 ms (15.137 ms / 100) 15.109 -> 15.156 ( +0.31%) [ +0.32% +0.10% +0.00% / +0.31% +0.75% +0.86%] index_select random_sorted : Elapsed 0.152 ms (15.157 ms / 100) B = [2048, 1000] (stride (1, 2048)) dim = 0 fill_cnt = 15 0.952 -> 0.960 ( +0.84%) [ +1.05% +0.00% +0.00% / +0.84% +1.37% +1.68%] index_fill_ const : Elapsed 0.010 ms (0.962 ms / 100) 0.957 -> 0.963 ( +0.63%) [ +0.63% +0.00% +0.00% / +0.63% +1.15% +4.81%] index_fill_ linear : Elapsed 0.010 ms (0.963 ms / 100) 0.957 -> 0.963 ( +0.63%) [ +0.63% +0.21% +0.00% / +0.63% +1.04% +1.25%] index_fill_ reverse : Elapsed 0.010 ms (0.963 ms / 100) 0.952 -> 0.961 ( +0.95%) [ +0.84% +0.00% +0.00% / +0.95% +1.79% +1.58%] index_fill_ skip64 : Elapsed 0.010 ms (0.960 ms / 100) 0.951 -> 0.966 ( +1.58%) [ +0.95% +0.11% +0.00% / +1.79% +1.58% +1.58%] index_fill_ skip256 : Elapsed 0.010 ms (0.960 ms / 100) 1.085 -> 1.053 ( -2.95%) [ +0.00% +1.11% +0.00% / -0.37% -2.21% -2.95%] index_fill_ spread : Elapsed 0.011 ms (1.085 ms / 100) 0.955 -> 0.965 ( +1.05%) [ +0.84% +0.42% +0.00% / +1.05% +1.26% +1.26%] index_fill_ strided 3 : Elapsed 0.010 ms (0.963 ms / 100) 0.952 -> 0.962 ( +1.05%) [ +1.16% +0.42% +0.00% / +1.05% +1.47% +1.47%] index_fill_ strided 5 : Elapsed 0.010 ms (0.963 ms / 100) 0.954 -> 0.963 ( +0.94%) [ +0.84% +0.10% +0.00% / +0.94% +1.26% +0.94%] index_fill_ strided 7 : Elapsed 0.010 ms (0.962 ms / 100) 0.956 -> 0.964 ( +0.84%) [ +0.73% +0.00% +0.10% / +0.84% +1.26% +1.05%] index_fill_ strided 8 : Elapsed 0.010 ms (0.963 ms / 100) 0.956 -> 0.965 ( +0.94%) [ +0.73% +0.00% +0.00% / +0.94% +1.15% +1.26%] index_fill_ strided 16 : Elapsed 0.010 ms (0.963 ms / 100) 0.998 -> 0.970 ( -2.81%) [ +0.00% +0.00% +0.10% / +0.00% -2.81% -2.81%] index_fill_ strided 64 : Elapsed 0.010 ms (0.998 ms / 100) good 1.087 -> 1.004 ( -7.64%) [ +0.37% +0.74% +0.00% / +0.46% -7.64% -7.36%] index_fill_ strided 100 : Elapsed 0.011 ms (1.091 ms / 100) 0.956 -> 0.962 ( +0.63%) [ +0.94% +0.00% +0.21% / +1.05% +0.63% +0.73%] index_fill_ strided 255 : Elapsed 0.010 ms (0.965 ms / 100) 0.957 -> 0.963 ( +0.63%) [ +1.04% +0.21% +0.00% / +0.84% +1.04% +0.63%] index_fill_ strided 256 : Elapsed 0.010 ms (0.967 ms / 100) 0.957 -> 0.963 ( +0.63%) [ +1.04% +0.21% +0.00% / +1.15% +0.63% +1.04%] index_fill_ strided 257 : Elapsed 0.010 ms (0.967 ms / 100) 0.988 -> 0.967 ( -2.13%) [ +0.10% +0.51% +0.00% / +0.40% -2.13% -2.02%] index_fill_ random : Elapsed 0.010 ms (0.989 ms / 100) 0.995 -> 0.963 ( -3.22%) [ +0.50% +0.80% +0.00% / +0.50% -3.12% -3.22%] index_fill_ random_sorted : Elapsed 0.010 ms (1.000 ms / 100) 1.038 -> 1.015 ( -2.22%) [ +0.00% +0.19% +0.19% / +0.19% -2.22% -2.22%] index_fill_ perm : Elapsed 0.010 ms (1.038 ms / 100) 0.978 -> 0.967 ( -1.12%) [ +0.72% +0.51% +0.00% / +0.51% -1.12% -1.12%] index_fill_ perm_sorted : Elapsed 0.010 ms (0.985 ms / 100) B = [2048, 1000] (stride (1, 2048)) A = [15, 1000] (stride (1000, 1)) dim = 0 1.108 -> 1.118 ( +0.90%) [ +0.99% +0.27% +0.00% / +0.90% +1.35% +1.17%] index_add_ linear : Elapsed 0.011 ms (1.119 ms / 100) 1.165 -> 1.174 ( +0.77%) [ +1.97% +0.00% +0.09% / +0.77% +1.03% +1.20%] index_copy_ linear : Elapsed 0.012 ms (1.188 ms / 100) 1.104 -> 1.116 ( +1.09%) [ +1.09% +0.27% +0.00% / +1.09% +1.45% +1.63%] index_add_ reverse : Elapsed 0.011 ms (1.116 ms / 100) 1.161 -> 1.174 ( +1.12%) [ +1.38% +0.00% +0.00% / +1.12% +2.07% +1.64%] index_copy_ reverse : Elapsed 0.012 ms (1.177 ms / 100) 1.977 -> 1.892 ( -4.30%) [ +0.00% +0.35% +1.11% / +0.56% -4.30% -4.30%] index_add_ spread : Elapsed 0.020 ms (1.977 ms / 100) 1.274 -> 1.244 ( -2.35%) [ +0.55% +0.00% +0.24% / +0.39% -2.28% -2.35%] index_copy_ spread : Elapsed 0.013 ms (1.281 ms / 100) 1.108 -> 1.116 ( +0.72%) [ +0.72% +0.18% +0.00% / +0.72% +1.44% +1.71%] index_add_ strided 3 : Elapsed 0.011 ms (1.116 ms / 100) 1.159 -> 1.171 ( +1.04%) [ +1.12% +0.09% +0.00% / +1.04% +1.90% +2.50%] index_copy_ strided 3 : Elapsed 0.012 ms (1.172 ms / 100) 1.102 -> 1.113 ( +1.00%) [ +1.27% +0.45% +0.00% / +1.00% +2.36% +1.81%] index_add_ strided 5 : Elapsed 0.011 ms (1.116 ms / 100) 1.158 -> 1.168 ( +0.86%) [ +1.04% +0.00% +0.52% / +0.86% +2.07% +1.81%] index_copy_ strided 5 : Elapsed 0.012 ms (1.170 ms / 100) 1.112 -> 1.121 ( +0.81%) [ +0.90% +0.09% +0.00% / +0.81% +1.62% +1.35%] index_add_ strided 7 : Elapsed 0.011 ms (1.122 ms / 100) 1.165 -> 1.184 ( +1.63%) [ +1.29% +0.00% +0.00% / +1.63% +2.15% +1.80%] index_copy_ strided 7 : Elapsed 0.012 ms (1.180 ms / 100) 1.111 -> 1.119 ( +0.72%) [ +0.99% +0.00% +1.71% / +0.72% +1.17% +1.35%] index_add_ strided 255 : Elapsed 0.011 ms (1.122 ms / 100) 1.163 -> 1.176 ( +1.12%) [ +1.38% +0.00% +0.00% / +1.12% +1.55% +1.72%] index_copy_ strided 255 : Elapsed 0.012 ms (1.179 ms / 100) 1.112 -> 1.125 ( +1.17%) [ +1.26% +0.36% +0.00% / +1.17% +1.62% +1.89%] index_add_ strided 257 : Elapsed 0.011 ms (1.126 ms / 100) 1.166 -> 1.181 ( +1.29%) [ +1.80% +0.17% +0.00% / +1.29% +1.97% +1.63%] index_copy_ strided 257 : Elapsed 0.012 ms (1.187 ms / 100) good 1.560 -> 1.448 ( -7.18%) [ +0.64% +0.00% +0.51% / +0.38% -6.99% -7.18%] index_add_ perm : Elapsed 0.016 ms (1.570 ms / 100) 1.230 -> 1.186 ( -3.58%) [ +0.81% +0.00% +0.08% / +1.22% -3.25% -3.58%] index_copy_ perm : Elapsed 0.012 ms (1.240 ms / 100) good 1.549 -> 1.431 ( -7.62%) [ +1.16% +0.00% +0.32% / +0.65% -7.62% -6.97%] index_add_ perm_sorted : Elapsed 0.016 ms (1.567 ms / 100) 1.186 -> 1.182 ( -0.34%) [ +0.59% +1.10% +0.00% / +0.51% -0.34% +0.08%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.193 ms / 100) GOOD 20.850 -> 13.479 (-35.35%) [ +0.12% +0.00% +0.02% / -35.01% -35.35% -35.21%] index_select const : Elapsed 0.209 ms (20.876 ms / 100) Good 20.838 -> 17.224 (-17.34%) [ +0.18% +0.01% +0.00% / -17.34% -16.49% -16.47%] index_select wrap : Elapsed 0.209 ms (20.875 ms / 100) GOOD 20.890 -> 13.542 (-35.17%) [ +0.15% +0.00% +0.07% / -34.87% -35.17% -35.16%] index_select linear : Elapsed 0.209 ms (20.921 ms / 100) GOOD 20.873 -> 13.636 (-34.67%) [ +0.14% +0.05% +0.00% / -34.67% -34.36% -34.40%] index_select reverse : Elapsed 0.209 ms (20.903 ms / 100) GOOD 20.833 -> 13.451 (-35.43%) [ +0.05% +0.01% +0.00% / -35.33% -35.27% -35.43%] index_select skip64 : Elapsed 0.208 ms (20.844 ms / 100) GOOD 20.821 -> 13.428 (-35.51%) [ +0.04% +0.00% +0.02% / -34.87% -35.51% -35.41%] index_select skip256 : Elapsed 0.208 ms (20.830 ms / 100) GOOD 20.785 -> 13.526 (-34.92%) [ +0.07% +0.00% +0.09% / -34.92% -34.85% -34.88%] index_select spread : Elapsed 0.208 ms (20.800 ms / 100) GOOD 20.851 -> 14.129 (-32.24%) [ +0.05% +0.00% +0.05% / -32.19% -32.13% -32.24%] index_select strided 3 : Elapsed 0.209 ms (20.861 ms / 100) GOOD 20.828 -> 13.385 (-35.74%) [ +0.13% +0.12% +0.00% / -35.73% -35.74% -35.71%] index_select strided 5 : Elapsed 0.209 ms (20.856 ms / 100) Good 20.815 -> 17.128 (-17.71%) [ +0.11% +0.00% +0.12% / -17.42% -17.71% -17.52%] index_select strided 7 : Elapsed 0.208 ms (20.838 ms / 100) Good 20.837 -> 16.982 (-18.50%) [ +0.12% +0.00% +0.11% / -18.50% -17.32% -17.33%] index_select strided 8 : Elapsed 0.209 ms (20.861 ms / 100) GOOD 20.865 -> 16.572 (-20.58%) [ +0.04% +0.00% +0.00% / -20.58% -19.67% -19.53%] index_select random : Elapsed 0.209 ms (20.873 ms / 100) GOOD 20.885 -> 13.517 (-35.28%) [ +0.01% +0.02% +0.00% / -35.10% -35.24% -35.28%] index_select random_sorted : Elapsed 0.209 ms (20.888 ms / 100) B = [2048, 1000] (stride (1, 2048)) A = [15, 1000] (stride (1, 15)) dim = 0 1.203 -> 1.213 ( +0.83%) [ +0.58% +0.08% +0.00% / +0.91% +1.00% +0.83%] index_add_ linear : Elapsed 0.012 ms (1.210 ms / 100) 1.237 -> 1.248 ( +0.89%) [ +1.37% +0.00% +0.00% / +0.97% +0.89% +1.13%] index_copy_ linear : Elapsed 0.013 ms (1.254 ms / 100) 1.197 -> 1.209 ( +1.00%) [ +0.84% +0.17% +0.00% / +1.00% +1.59% +1.25%] index_add_ reverse : Elapsed 0.012 ms (1.207 ms / 100) 1.215 -> 1.229 ( +1.15%) [ +1.32% +0.25% +0.00% / +1.23% +1.15% +1.23%] index_copy_ reverse : Elapsed 0.012 ms (1.231 ms / 100) 1.901 -> 1.867 ( -1.79%) [ +0.00% +0.16% +0.00% / +0.26% -0.89% -1.79%] index_add_ spread : Elapsed 0.019 ms (1.901 ms / 100) 1.254 -> 1.259 ( +0.40%) [ +0.40% +0.24% +0.00% / +0.40% +0.56% +0.64%] index_copy_ spread : Elapsed 0.013 ms (1.259 ms / 100) 1.199 -> 1.212 ( +1.08%) [ +1.42% +0.25% +0.00% / +1.08% +1.42% +1.58%] index_add_ strided 3 : Elapsed 0.012 ms (1.216 ms / 100) 1.232 -> 1.245 ( +1.06%) [ +0.97% +0.24% +0.00% / +1.22% +1.22% +1.06%] index_copy_ strided 3 : Elapsed 0.012 ms (1.244 ms / 100) 1.199 -> 1.212 ( +1.08%) [ +0.92% +0.50% +0.00% / +1.08% +1.42% +1.42%] index_add_ strided 5 : Elapsed 0.012 ms (1.210 ms / 100) 1.226 -> 1.235 ( +0.73%) [ +1.06% +0.33% +0.00% / +0.73% +1.31% +1.88%] index_copy_ strided 5 : Elapsed 0.012 ms (1.239 ms / 100) 1.204 -> 1.214 ( +0.83%) [ +1.00% +0.08% +0.00% / +0.83% +1.16% +1.08%] index_add_ strided 7 : Elapsed 0.012 ms (1.216 ms / 100) 1.228 -> 1.241 ( +1.06%) [ +1.14% +0.00% +0.08% / +1.14% +1.06% +1.06%] index_copy_ strided 7 : Elapsed 0.012 ms (1.242 ms / 100) 1.208 -> 1.219 ( +0.91%) [ +0.91% +0.08% +0.00% / +0.99% +0.91% +0.91%] index_add_ strided 255 : Elapsed 0.012 ms (1.219 ms / 100) 1.240 -> 1.245 ( +0.40%) [ +0.56% +0.00% +0.16% / +1.45% +0.56% +0.40%] index_copy_ strided 255 : Elapsed 0.012 ms (1.247 ms / 100) 1.202 -> 1.211 ( +0.75%) [ +0.92% +0.17% +0.00% / +0.92% +0.75% +0.75%] index_add_ strided 257 : Elapsed 0.012 ms (1.213 ms / 100) 1.236 -> 1.240 ( +0.32%) [ +1.13% +0.00% +0.00% / +0.32% +1.38% +1.62%] index_copy_ strided 257 : Elapsed 0.013 ms (1.250 ms / 100) 1.752 -> 1.698 ( -3.08%) [ +0.00% +1.94% +0.51% / -0.46% -3.08% -2.57%] index_add_ perm : Elapsed 0.018 ms (1.752 ms / 100) 1.230 -> 1.237 ( +0.57%) [ +0.98% +0.16% +0.00% / +0.65% +0.57% +1.06%] index_copy_ perm : Elapsed 0.012 ms (1.242 ms / 100) 1.636 -> 1.574 ( -3.79%) [ +0.79% +0.00% +0.73% / +1.16% -3.79% -3.79%] index_add_ perm_sorted : Elapsed 0.016 ms (1.649 ms / 100) 1.240 -> 1.248 ( +0.65%) [ +1.45% +0.00% +0.16% / +1.13% +0.65% +0.89%] index_copy_ perm_sorted : Elapsed 0.013 ms (1.258 ms / 100) GOOD 24.070 -> 13.505 (-43.89%) [ +0.10% +0.00% +0.07% / -43.84% -43.88% -43.89%] index_select const : Elapsed 0.241 ms (24.095 ms / 100) GOOD 24.056 -> 13.500 (-43.88%) [ +0.02% +0.05% +0.00% / -43.80% -43.88% -43.84%] index_select wrap : Elapsed 0.241 ms (24.062 ms / 100) GOOD 24.110 -> 14.080 (-41.60%) [ +0.05% +0.06% +0.00% / -41.60% -41.39% -41.52%] index_select linear : Elapsed 0.241 ms (24.122 ms / 100) GOOD 24.047 -> 14.138 (-41.21%) [ +0.05% +0.02% +0.00% / -40.94% -41.21% -41.11%] index_select reverse : Elapsed 0.241 ms (24.059 ms / 100) GOOD 24.097 -> 13.503 (-43.96%) [ +0.00% +0.01% +0.01% / -43.90% -43.96% -43.93%] index_select skip64 : Elapsed 0.241 ms (24.097 ms / 100) GOOD 24.107 -> 13.497 (-44.01%) [ +0.07% +0.00% +0.00% / -43.78% -44.01% -43.88%] index_select skip256 : Elapsed 0.241 ms (24.124 ms / 100) GOOD 24.068 -> 13.552 (-43.69%) [ +0.10% +0.08% +0.00% / -43.62% -43.60% -43.69%] index_select spread : Elapsed 0.241 ms (24.093 ms / 100) GOOD 24.126 -> 13.521 (-43.96%) [ +0.05% +0.00% +0.00% / -43.82% -43.94% -43.96%] index_select strided 3 : Elapsed 0.241 ms (24.139 ms / 100) GOOD 24.071 -> 13.520 (-43.83%) [ +0.04% +0.04% +0.00% / -43.82% -43.83% -43.75%] index_select strided 5 : Elapsed 0.241 ms (24.081 ms / 100) GOOD 24.072 -> 13.503 (-43.91%) [ +0.00% +0.01% +0.02% / -43.88% -43.79% -43.91%] index_select strided 7 : Elapsed 0.241 ms (24.072 ms / 100) GOOD 24.093 -> 13.510 (-43.93%) [ +0.15% +0.00% +0.05% / -43.92% -43.93% -43.91%] index_select strided 8 : Elapsed 0.241 ms (24.128 ms / 100) GOOD 24.160 -> 13.506 (-44.10%) [ +0.02% +0.00% +0.00% / -43.89% -44.10% -44.02%] index_select random : Elapsed 0.242 ms (24.166 ms / 100) GOOD 24.104 -> 13.571 (-43.70%) [ +0.02% +0.00% +0.00% / -43.65% -43.67% -43.70%] index_select random_sorted : Elapsed 0.241 ms (24.110 ms / 100) out_shape = [15, 2048] in_shape = [15, 1000] idx_dim = 1 B = [15, 2048] (stride (2048, 1)) dim = 1 fill_cnt = 1000 2.281 -> 2.239 ( -1.84%) [ +0.22% +0.57% +0.00% / -1.45% -1.84% -1.71%] index_fill_ const : Elapsed 0.023 ms (2.286 ms / 100) 2.257 -> 2.242 ( -0.66%) [ +0.71% +0.04% +0.00% / -0.66% -0.04% -0.09%] index_fill_ linear : Elapsed 0.023 ms (2.273 ms / 100) 2.275 -> 2.244 ( -1.36%) [ +0.00% +0.18% +0.44% / -0.35% -1.19% -1.36%] index_fill_ reverse : Elapsed 0.023 ms (2.275 ms / 100) 2.248 -> 2.232 ( -0.71%) [ +0.49% +0.00% +0.27% / -0.71% -0.40% -0.49%] index_fill_ skip64 : Elapsed 0.023 ms (2.259 ms / 100) 2.251 -> 2.242 ( -0.40%) [ +0.40% +0.67% +0.00% / -0.36% -0.40% -0.13%] index_fill_ skip256 : Elapsed 0.023 ms (2.260 ms / 100) 2.339 -> 2.312 ( -1.15%) [ +0.04% +0.00% +0.04% / -1.03% -1.15% -1.11%] index_fill_ spread : Elapsed 0.023 ms (2.340 ms / 100) 2.321 -> 2.316 ( -0.22%) [ +0.30% +0.43% +0.00% / -0.22% +0.22% +0.22%] index_fill_ strided 3 : Elapsed 0.023 ms (2.328 ms / 100) 2.327 -> 2.314 ( -0.56%) [ +0.56% +0.00% +0.09% / +0.09% -0.52% -0.56%] index_fill_ strided 5 : Elapsed 0.023 ms (2.340 ms / 100) 2.340 -> 2.322 ( -0.77%) [ +0.04% +0.21% +0.00% / +0.04% -0.38% -0.77%] index_fill_ strided 7 : Elapsed 0.023 ms (2.341 ms / 100) 2.323 -> 2.325 ( +0.09%) [ +0.00% +0.17% +0.26% / +0.17% +0.73% +0.09%] index_fill_ strided 8 : Elapsed 0.023 ms (2.323 ms / 100) 2.299 -> 2.289 ( -0.43%) [ +0.00% +0.00% +0.04% / -0.09% -0.43% -0.43%] index_fill_ strided 16 : Elapsed 0.023 ms (2.299 ms / 100) 2.261 -> 2.249 ( -0.53%) [ +0.00% +0.09% +0.13% / -0.53% -0.18% -0.31%] index_fill_ strided 64 : Elapsed 0.023 ms (2.261 ms / 100) 2.340 -> 2.329 ( -0.47%) [ +0.43% +0.00% +0.47% / +0.00% -0.47% +0.17%] index_fill_ strided 100 : Elapsed 0.023 ms (2.350 ms / 100) 2.328 -> 2.321 ( -0.30%) [ +0.52% +0.47% +0.00% / -0.30% +0.21% +0.26%] index_fill_ strided 255 : Elapsed 0.023 ms (2.340 ms / 100) 2.263 -> 2.247 ( -0.71%) [ +0.00% +0.13% +0.00% / -0.09% -0.57% -0.71%] index_fill_ strided 256 : Elapsed 0.023 ms (2.263 ms / 100) 2.337 -> 2.325 ( -0.51%) [ +0.21% +0.04% +0.00% / -0.39% -0.26% -0.51%] index_fill_ strided 257 : Elapsed 0.023 ms (2.342 ms / 100) 2.335 -> 2.329 ( -0.26%) [ +0.00% +0.17% +0.34% / -0.26% +0.51% -0.13%] index_fill_ random : Elapsed 0.023 ms (2.335 ms / 100) 2.330 -> 2.299 ( -1.33%) [ +0.00% +0.09% +0.09% / -0.47% -1.12% -1.33%] index_fill_ random_sorted : Elapsed 0.023 ms (2.330 ms / 100) 2.328 -> 2.340 ( +0.52%) [ +0.60% +0.43% +0.00% / +0.56% +0.52% +0.86%] index_fill_ perm : Elapsed 0.023 ms (2.342 ms / 100) 2.330 -> 2.305 ( -1.07%) [ +0.00% +0.09% +0.00% / -0.73% -1.07% -0.94%] index_fill_ perm_sorted : Elapsed 0.023 ms (2.330 ms / 100) B = [15, 2048] (stride (2048, 1)) A = [15, 1000] (stride (1000, 1)) dim = 1 3.371 -> 3.311 ( -1.78%) [ +0.12% +0.00% +0.06% / -1.78% -1.57% -1.33%] index_add_ linear : Elapsed 0.034 ms (3.375 ms / 100) 3.331 -> 3.269 ( -1.86%) [ +0.00% +0.09% +0.09% / -1.86% -1.80% -1.83%] index_copy_ linear : Elapsed 0.033 ms (3.331 ms / 100) 3.378 -> 3.303 ( -2.22%) [ +0.09% +0.00% +0.09% / -2.19% -2.22% -2.16%] index_add_ reverse : Elapsed 0.034 ms (3.381 ms / 100) 3.333 -> 3.259 ( -2.22%) [ +0.00% +0.21% +0.06% / -2.13% -2.22% -2.01%] index_copy_ reverse : Elapsed 0.033 ms (3.333 ms / 100) 3.424 -> 3.347 ( -2.25%) [ +0.00% +0.06% +0.06% / -1.93% -2.13% -2.25%] index_add_ spread : Elapsed 0.034 ms (3.424 ms / 100) 3.404 -> 3.333 ( -2.09%) [ +0.41% +0.00% +0.15% / -1.85% -2.09% -1.88%] index_copy_ spread : Elapsed 0.034 ms (3.418 ms / 100) 3.409 -> 3.343 ( -1.94%) [ +0.03% +0.06% +0.00% / -1.94% -1.47% -1.70%] index_add_ strided 3 : Elapsed 0.034 ms (3.410 ms / 100) 3.396 -> 3.336 ( -1.77%) [ +0.29% +0.00% +0.06% / -1.77% -1.68% -1.68%] index_copy_ strided 3 : Elapsed 0.034 ms (3.406 ms / 100) 3.423 -> 3.371 ( -1.52%) [ +0.15% +0.18% +0.00% / -1.52% -1.20% -1.20%] index_add_ strided 5 : Elapsed 0.034 ms (3.428 ms / 100) 3.406 -> 3.334 ( -2.11%) [ +0.18% +0.03% +0.00% / -2.11% -1.44% -1.41%] index_copy_ strided 5 : Elapsed 0.034 ms (3.412 ms / 100) 3.430 -> 3.373 ( -1.66%) [ +0.03% +0.06% +0.00% / -1.60% -1.49% -1.66%] index_add_ strided 7 : Elapsed 0.034 ms (3.431 ms / 100) 3.407 -> 3.345 ( -1.82%) [ +0.09% +0.00% +0.12% / -1.53% -1.79% -1.82%] index_copy_ strided 7 : Elapsed 0.034 ms (3.410 ms / 100) 3.446 -> 3.400 ( -1.33%) [ +0.06% +0.00% +0.26% / -1.33% -0.78% -0.96%] index_add_ strided 255 : Elapsed 0.034 ms (3.448 ms / 100) 3.416 -> 3.360 ( -1.64%) [ +0.03% +0.00% +0.12% / -1.64% -1.14% -1.32%] index_copy_ strided 255 : Elapsed 0.034 ms (3.417 ms / 100) 3.441 -> 3.388 ( -1.54%) [ +0.20% +0.26% +0.00% / -1.54% -1.37% -1.37%] index_add_ strided 257 : Elapsed 0.034 ms (3.448 ms / 100) 3.410 -> 3.356 ( -1.58%) [ +0.18% +0.21% +0.00% / -1.38% -1.58% -1.44%] index_copy_ strided 257 : Elapsed 0.034 ms (3.416 ms / 100) 3.455 -> 3.408 ( -1.36%) [ +0.12% +0.14% +0.00% / -1.10% -1.07% -1.36%] index_add_ perm : Elapsed 0.035 ms (3.459 ms / 100) 3.427 -> 3.363 ( -1.87%) [ +0.06% +0.03% +0.00% / -1.34% -1.87% -1.75%] index_copy_ perm : Elapsed 0.034 ms (3.429 ms / 100) 3.413 -> 3.344 ( -2.02%) [ +0.12% +0.32% +0.00% / -2.02% -2.02% -1.90%] index_add_ perm_sorted : Elapsed 0.034 ms (3.417 ms / 100) 3.402 -> 3.331 ( -2.09%) [ +0.00% +0.44% +0.03% / -2.09% -1.79% -2.06%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.402 ms / 100) 4.522 -> 4.454 ( -1.50%) [ +0.00% +0.15% +0.18% / -1.50% -1.50% -1.37%] index_select const : Elapsed 0.045 ms (4.522 ms / 100) 4.566 -> 4.484 ( -1.80%) [ +0.35% +0.18% +0.00% / -1.77% -1.80% -1.75%] index_select wrap : Elapsed 0.046 ms (4.582 ms / 100) 4.572 -> 4.484 ( -1.92%) [ +0.26% +0.09% +0.00% / -1.84% -1.92% -1.64%] index_select linear : Elapsed 0.046 ms (4.584 ms / 100) 4.561 -> 4.487 ( -1.62%) [ +0.00% +0.13% +0.15% / -1.29% -1.47% -1.62%] index_select reverse : Elapsed 0.046 ms (4.561 ms / 100) 4.521 -> 4.461 ( -1.33%) [ +0.00% +0.42% +0.35% / -1.28% -1.15% -1.33%] index_select skip64 : Elapsed 0.045 ms (4.521 ms / 100) 4.526 -> 4.457 ( -1.52%) [ +0.00% +0.02% +0.09% / -1.52% -1.37% -1.52%] index_select skip256 : Elapsed 0.045 ms (4.526 ms / 100) 4.573 -> 4.502 ( -1.55%) [ +0.02% +0.04% +0.00% / -1.53% -1.40% -1.55%] index_select spread : Elapsed 0.046 ms (4.574 ms / 100) 4.568 -> 4.463 ( -2.30%) [ +0.00% +0.39% +0.09% / -1.55% -2.06% -2.30%] index_select strided 3 : Elapsed 0.046 ms (4.568 ms / 100) 4.566 -> 4.481 ( -1.86%) [ +0.00% +0.04% +0.24% / -1.53% -1.84% -1.86%] index_select strided 5 : Elapsed 0.046 ms (4.566 ms / 100) 4.559 -> 4.478 ( -1.78%) [ +0.09% +0.20% +0.00% / -1.43% -1.78% -1.60%] index_select strided 7 : Elapsed 0.046 ms (4.563 ms / 100) 4.571 -> 4.483 ( -1.93%) [ +0.00% +0.13% +0.13% / -1.60% -1.93% -1.90%] index_select strided 8 : Elapsed 0.046 ms (4.571 ms / 100) 4.596 -> 4.492 ( -2.26%) [ +0.00% +0.00% +0.02% / -1.78% -2.26% -2.13%] index_select strided 16 : Elapsed 0.046 ms (4.596 ms / 100) 4.619 -> 4.482 ( -2.97%) [ +0.00% +0.02% +0.19% / -2.51% -2.71% -2.97%] index_select strided 64 : Elapsed 0.046 ms (4.619 ms / 100) 4.568 -> 4.466 ( -2.23%) [ +0.18% +0.00% +0.09% / -1.88% -2.10% -2.23%] index_select strided 100 : Elapsed 0.046 ms (4.576 ms / 100) 4.596 -> 4.493 ( -2.24%) [ +0.04% +0.00% +0.02% / -1.68% -2.24% -2.09%] index_select strided 255 : Elapsed 0.046 ms (4.598 ms / 100) 4.583 -> 4.504 ( -1.72%) [ +0.00% +0.20% +0.33% / -1.72% -1.68% -1.57%] index_select strided 256 : Elapsed 0.046 ms (4.583 ms / 100) 4.585 -> 4.503 ( -1.79%) [ +0.11% +0.07% +0.00% / -1.50% -1.66% -1.79%] index_select strided 257 : Elapsed 0.046 ms (4.590 ms / 100) 4.599 -> 4.504 ( -2.07%) [ +0.00% +0.22% +0.04% / -1.48% -2.07% -2.07%] index_select random : Elapsed 0.046 ms (4.599 ms / 100) 4.563 -> 4.492 ( -1.56%) [ +0.11% +0.00% +0.11% / -1.14% -1.56% -1.49%] index_select random_sorted : Elapsed 0.046 ms (4.568 ms / 100) B = [15, 2048] (stride (2048, 1)) A = [15, 1000] (stride (1, 15)) dim = 1 3.370 -> 3.298 ( -2.14%) [ +0.62% +0.00% +0.27% / -1.90% -2.14% -1.99%] index_add_ linear : Elapsed 0.034 ms (3.391 ms / 100) 3.326 -> 3.268 ( -1.74%) [ +0.12% +0.00% +0.15% / -1.35% -1.74% -1.65%] index_copy_ linear : Elapsed 0.033 ms (3.330 ms / 100) 3.360 -> 3.294 ( -1.96%) [ +0.03% +0.09% +0.00% / -1.96% -1.10% -0.74%] index_add_ reverse : Elapsed 0.034 ms (3.361 ms / 100) 3.309 -> 3.260 ( -1.48%) [ +0.18% +0.00% +0.03% / -1.48% -0.42% -0.45%] index_copy_ reverse : Elapsed 0.033 ms (3.315 ms / 100) 3.414 -> 3.334 ( -2.34%) [ +0.00% +0.35% +0.21% / -1.70% -2.26% -2.34%] index_add_ spread : Elapsed 0.034 ms (3.414 ms / 100) 3.392 -> 3.334 ( -1.71%) [ +0.00% +0.12% +0.06% / -1.24% -1.71% -1.68%] index_copy_ spread : Elapsed 0.034 ms (3.392 ms / 100) 3.405 -> 3.344 ( -1.79%) [ +0.06% +0.06% +0.00% / -1.79% -0.79% -0.76%] index_add_ strided 3 : Elapsed 0.034 ms (3.407 ms / 100) 3.376 -> 3.348 ( -0.83%) [ +0.27% +0.12% +0.00% / -0.83% -0.36% -0.36%] index_copy_ strided 3 : Elapsed 0.034 ms (3.385 ms / 100) 3.418 -> 3.365 ( -1.55%) [ +0.06% +0.12% +0.00% / -1.55% -1.14% -1.14%] index_add_ strided 5 : Elapsed 0.034 ms (3.420 ms / 100) 3.387 -> 3.351 ( -1.06%) [ +0.00% +0.03% +0.06% / -1.06% -0.68% -0.65%] index_copy_ strided 5 : Elapsed 0.034 ms (3.387 ms / 100) 3.425 -> 3.390 ( -1.02%) [ +0.00% +0.00% +0.03% / -1.02% -0.38% -0.53%] index_add_ strided 7 : Elapsed 0.034 ms (3.425 ms / 100) 3.392 -> 3.362 ( -0.88%) [ +0.21% +0.00% +0.15% / -0.88% -0.62% -0.32%] index_copy_ strided 7 : Elapsed 0.034 ms (3.399 ms / 100) 3.425 -> 3.401 ( -0.70%) [ +0.26% +0.44% +0.00% / -0.70% -0.58% -0.47%] index_add_ strided 255 : Elapsed 0.034 ms (3.434 ms / 100) 3.394 -> 3.364 ( -0.88%) [ +0.00% +0.15% +0.00% / -0.88% -0.68% -0.77%] index_copy_ strided 255 : Elapsed 0.034 ms (3.394 ms / 100) 3.426 -> 3.394 ( -0.93%) [ +0.47% +0.23% +0.00% / -0.93% -0.61% -0.47%] index_add_ strided 257 : Elapsed 0.034 ms (3.442 ms / 100) 3.389 -> 3.358 ( -0.91%) [ +0.03% +0.06% +0.00% / -0.91% -0.53% -0.68%] index_copy_ strided 257 : Elapsed 0.034 ms (3.390 ms / 100) 3.443 -> 3.416 ( -0.78%) [ +0.09% +0.00% +0.20% / -0.76% -0.78% -0.64%] index_add_ perm : Elapsed 0.034 ms (3.446 ms / 100) 3.405 -> 3.374 ( -0.91%) [ +0.00% +0.06% +0.12% / -0.79% -0.91% -0.62%] index_copy_ perm : Elapsed 0.034 ms (3.405 ms / 100) 3.410 -> 3.333 ( -2.26%) [ +0.00% +0.21% +0.06% / -2.26% -1.76% -1.85%] index_add_ perm_sorted : Elapsed 0.034 ms (3.410 ms / 100) 3.383 -> 3.340 ( -1.27%) [ +0.00% +0.06% +0.03% / -1.24% -0.86% -1.27%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.383 ms / 100) 4.510 -> 4.446 ( -1.42%) [ +0.22% +0.00% +0.29% / -1.13% -1.42% -1.11%] index_select const : Elapsed 0.045 ms (4.520 ms / 100) 4.525 -> 4.504 ( -0.46%) [ +0.09% +0.00% +0.04% / -0.46% -0.33% -0.35%] index_select wrap : Elapsed 0.045 ms (4.529 ms / 100) 4.522 -> 4.486 ( -0.80%) [ +0.00% +0.09% +0.24% / -0.22% -0.80% -0.80%] index_select linear : Elapsed 0.045 ms (4.522 ms / 100) 4.528 -> 4.493 ( -0.77%) [ +0.00% +0.24% +0.15% / -0.77% -0.75% -0.55%] index_select reverse : Elapsed 0.045 ms (4.528 ms / 100) 4.497 -> 4.451 ( -1.02%) [ +0.00% +0.04% +0.07% / -0.80% -0.96% -1.02%] index_select skip64 : Elapsed 0.045 ms (4.497 ms / 100) 4.498 -> 4.456 ( -0.93%) [ +0.00% +0.20% +0.09% / -0.93% -0.78% -0.76%] index_select skip256 : Elapsed 0.045 ms (4.498 ms / 100) 4.543 -> 4.495 ( -1.06%) [ +0.13% +0.15% +0.00% / -1.06% -0.95% -1.01%] index_select spread : Elapsed 0.045 ms (4.549 ms / 100) 4.529 -> 4.522 ( -0.15%) [ +0.00% +0.18% +0.20% / +0.00% -0.15% -0.13%] index_select strided 3 : Elapsed 0.045 ms (4.529 ms / 100) 4.513 -> 4.490 ( -0.51%) [ +0.04% +0.13% +0.00% / -0.51% -0.35% -0.44%] index_select strided 5 : Elapsed 0.045 ms (4.515 ms / 100) 4.541 -> 4.508 ( -0.73%) [ +0.00% +0.33% +0.31% / -0.26% -0.46% -0.73%] index_select strided 7 : Elapsed 0.045 ms (4.541 ms / 100) 4.509 -> 4.475 ( -0.75%) [ +0.00% +0.22% +0.16% / -0.31% -0.49% -0.75%] index_select strided 8 : Elapsed 0.045 ms (4.509 ms / 100) 4.514 -> 4.486 ( -0.62%) [ +0.22% +0.00% +0.16% / -0.58% -0.62% -0.44%] index_select strided 16 : Elapsed 0.045 ms (4.524 ms / 100) 4.528 -> 4.486 ( -0.93%) [ +0.00% +0.13% +0.15% / -0.64% -0.93% -0.77%] index_select strided 64 : Elapsed 0.045 ms (4.528 ms / 100) 4.511 -> 4.458 ( -1.17%) [ +0.13% +0.00% +0.27% / -0.64% -1.17% -1.09%] index_select strided 100 : Elapsed 0.045 ms (4.517 ms / 100) 4.516 -> 4.500 ( -0.35%) [ +0.00% +0.20% +0.13% / -0.13% -0.31% -0.35%] index_select strided 255 : Elapsed 0.045 ms (4.516 ms / 100) 4.509 -> 4.473 ( -0.80%) [ +0.20% +0.07% +0.00% / -0.80% -0.53% -0.40%] index_select strided 256 : Elapsed 0.045 ms (4.518 ms / 100) 4.533 -> 4.518 ( -0.33%) [ +0.33% +0.18% +0.00% / -0.33% -0.29% -0.11%] index_select strided 257 : Elapsed 0.045 ms (4.548 ms / 100) 4.581 -> 4.516 ( -1.42%) [ +0.02% +0.07% +0.00% / -1.42% -1.16% -1.31%] index_select random : Elapsed 0.046 ms (4.582 ms / 100) 4.570 -> 4.486 ( -1.84%) [ +0.15% +0.00% +0.20% / -1.44% -1.58% -1.84%] index_select random_sorted : Elapsed 0.046 ms (4.577 ms / 100) B = [15, 2048] (stride (1, 15)) dim = 1 fill_cnt = 1000 2.244 -> 2.241 ( -0.13%) [ +0.27% +0.00% +0.04% / +0.18% +0.09% -0.13%] index_fill_ const : Elapsed 0.022 ms (2.250 ms / 100) 2.250 -> 2.250 ( +0.00%) [ +0.04% +0.00% +0.04% / +0.00% +0.71% +0.84%] index_fill_ linear : Elapsed 0.023 ms (2.251 ms / 100) 2.259 -> 2.250 ( -0.40%) [ +0.18% +0.00% +0.13% / +0.22% -0.31% -0.40%] index_fill_ reverse : Elapsed 0.023 ms (2.263 ms / 100) 2.228 -> 2.233 ( +0.22%) [ +0.00% +0.13% +0.27% / +0.22% +0.99% +1.03%] index_fill_ skip64 : Elapsed 0.022 ms (2.228 ms / 100) 2.236 -> 2.236 ( +0.00%) [ +0.81% +0.00% +0.36% / +0.00% +0.22% +0.67%] index_fill_ skip256 : Elapsed 0.023 ms (2.254 ms / 100) 2.290 -> 2.282 ( -0.35%) [ +0.26% +0.00% +0.22% / +0.09% -0.35% -0.35%] index_fill_ spread : Elapsed 0.023 ms (2.296 ms / 100) 2.276 -> 2.277 ( +0.04%) [ +0.00% +0.53% +0.04% / +0.18% +0.18% +0.04%] index_fill_ strided 3 : Elapsed 0.023 ms (2.276 ms / 100) 2.285 -> 2.286 ( +0.04%) [ +0.00% +0.13% +0.09% / +0.04% +0.22% +0.61%] index_fill_ strided 5 : Elapsed 0.023 ms (2.285 ms / 100) 2.268 -> 2.270 ( +0.09%) [ +0.09% +0.35% +0.00% / +0.09% +0.31% +0.09%] index_fill_ strided 7 : Elapsed 0.023 ms (2.270 ms / 100) 2.255 -> 2.254 ( -0.04%) [ +0.62% +0.53% +0.00% / +0.22% +0.04% -0.04%] index_fill_ strided 8 : Elapsed 0.023 ms (2.269 ms / 100) 2.237 -> 2.245 ( +0.36%) [ +0.40% +0.00% +0.22% / +0.36% +0.40% +0.36%] index_fill_ strided 16 : Elapsed 0.022 ms (2.246 ms / 100) 2.243 -> 2.253 ( +0.45%) [ +0.27% +0.18% +0.00% / +0.53% +0.49% +0.45%] index_fill_ strided 64 : Elapsed 0.022 ms (2.249 ms / 100) 2.252 -> 2.254 ( +0.09%) [ +0.00% +0.22% +0.00% / +0.58% +0.18% +0.09%] index_fill_ strided 100 : Elapsed 0.023 ms (2.252 ms / 100) 2.269 -> 2.278 ( +0.40%) [ +0.00% +0.00% +0.04% / +0.53% +0.44% +0.40%] index_fill_ strided 255 : Elapsed 0.023 ms (2.269 ms / 100) 2.237 -> 2.234 ( -0.13%) [ +0.18% +0.04% +0.00% / -0.09% -0.13% +0.31%] index_fill_ strided 256 : Elapsed 0.022 ms (2.241 ms / 100) 2.270 -> 2.274 ( +0.18%) [ +0.00% +0.22% +0.22% / +0.48% +0.26% +0.18%] index_fill_ strided 257 : Elapsed 0.023 ms (2.270 ms / 100) 2.273 -> 2.268 ( -0.22%) [ +0.09% +0.00% +0.09% / -0.18% -0.22% -0.18%] index_fill_ random : Elapsed 0.023 ms (2.275 ms / 100) 2.256 -> 2.273 ( +0.75%) [ +0.71% +0.00% +0.40% / +0.98% +0.84% +0.75%] index_fill_ random_sorted : Elapsed 0.023 ms (2.272 ms / 100) 2.266 -> 2.271 ( +0.22%) [ +0.00% +0.22% +0.22% / +0.44% +0.44% +0.22%] index_fill_ perm : Elapsed 0.023 ms (2.266 ms / 100) 2.270 -> 2.276 ( +0.26%) [ +0.00% +0.26% +0.31% / +0.26% +0.40% +0.40%] index_fill_ perm_sorted : Elapsed 0.023 ms (2.270 ms / 100) B = [15, 2048] (stride (1, 15)) A = [15, 1000] (stride (1000, 1)) dim = 1 3.336 -> 3.308 ( -0.84%) [ +0.18% +0.00% +0.15% / -0.69% -0.81% -0.84%] index_add_ linear : Elapsed 0.033 ms (3.342 ms / 100) 3.312 -> 3.275 ( -1.12%) [ +0.15% +0.00% +0.09% / -0.85% -1.00% -1.12%] index_copy_ linear : Elapsed 0.033 ms (3.317 ms / 100) 3.334 -> 3.304 ( -0.90%) [ +0.00% +0.06% +0.18% / -0.90% -0.48% -0.57%] index_add_ reverse : Elapsed 0.033 ms (3.334 ms / 100) 3.307 -> 3.281 ( -0.79%) [ +0.09% +0.03% +0.00% / -0.79% -0.70% -0.79%] index_copy_ reverse : Elapsed 0.033 ms (3.310 ms / 100) 3.366 -> 3.330 ( -1.07%) [ +0.21% +0.03% +0.00% / -0.83% -1.04% -1.07%] index_add_ spread : Elapsed 0.034 ms (3.373 ms / 100) 3.353 -> 3.314 ( -1.16%) [ +0.27% +0.00% +0.27% / -0.84% -1.16% -0.84%] index_copy_ spread : Elapsed 0.034 ms (3.362 ms / 100) 3.342 -> 3.317 ( -0.75%) [ +0.00% +0.18% +0.15% / -0.75% -0.42% -0.27%] index_add_ strided 3 : Elapsed 0.033 ms (3.342 ms / 100) 3.336 -> 3.310 ( -0.78%) [ +0.00% +0.00% +0.00% / -0.78% -0.66% -0.60%] index_copy_ strided 3 : Elapsed 0.033 ms (3.336 ms / 100) 3.352 -> 3.327 ( -0.75%) [ +0.00% +0.18% +0.15% / -0.75% -0.54% -0.57%] index_add_ strided 5 : Elapsed 0.034 ms (3.352 ms / 100) 3.342 -> 3.314 ( -0.84%) [ +0.06% +0.00% +0.09% / -0.84% -0.69% -0.75%] index_copy_ strided 5 : Elapsed 0.033 ms (3.344 ms / 100) 3.350 -> 3.318 ( -0.96%) [ +0.24% +0.00% +0.36% / -0.63% -0.81% -0.96%] index_add_ strided 7 : Elapsed 0.034 ms (3.358 ms / 100) 3.339 -> 3.306 ( -0.99%) [ +0.00% +0.03% +0.03% / -0.78% -0.99% -0.87%] index_copy_ strided 7 : Elapsed 0.033 ms (3.339 ms / 100) 3.355 -> 3.325 ( -0.89%) [ +0.09% +0.33% +0.00% / -0.89% -0.51% -0.33%] index_add_ strided 255 : Elapsed 0.034 ms (3.358 ms / 100) 3.324 -> 3.306 ( -0.54%) [ +0.06% +0.06% +0.00% / -0.54% -0.54% -0.36%] index_copy_ strided 255 : Elapsed 0.033 ms (3.326 ms / 100) 3.350 -> 3.314 ( -1.07%) [ +0.00% +0.12% +0.03% / -0.90% -1.07% -1.04%] index_add_ strided 257 : Elapsed 0.034 ms (3.350 ms / 100) 3.331 -> 3.302 ( -0.87%) [ +0.00% +0.15% +0.00% / -0.63% -0.87% -0.72%] index_copy_ strided 257 : Elapsed 0.033 ms (3.331 ms / 100) 3.370 -> 3.337 ( -0.98%) [ +0.00% +0.24% +0.12% / -0.86% -0.98% -0.89%] index_add_ perm : Elapsed 0.034 ms (3.370 ms / 100) 3.345 -> 3.311 ( -1.02%) [ +0.27% +0.30% +0.00% / -0.81% -0.99% -1.02%] index_copy_ perm : Elapsed 0.034 ms (3.354 ms / 100) 3.345 -> 3.307 ( -1.14%) [ +0.15% +0.00% +0.21% / -0.72% -1.14% -0.51%] index_add_ perm_sorted : Elapsed 0.034 ms (3.350 ms / 100) 3.329 -> 3.297 ( -0.96%) [ +0.06% +0.00% +0.21% / -0.42% -0.69% -0.96%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.331 ms / 100) 4.474 -> 4.472 ( -0.04%) [ +0.00% +0.04% +0.04% / +0.11% +0.09% -0.04%] index_select const : Elapsed 0.045 ms (4.474 ms / 100) 4.520 -> 4.511 ( -0.20%) [ +0.13% +0.00% +0.11% / -0.13% -0.20% -0.15%] index_select wrap : Elapsed 0.045 ms (4.526 ms / 100) 4.529 -> 4.504 ( -0.55%) [ +0.00% +0.20% +0.24% / +0.20% -0.53% -0.55%] index_select linear : Elapsed 0.045 ms (4.529 ms / 100) 4.537 -> 4.510 ( -0.60%) [ +0.15% +0.15% +0.00% / +0.20% -0.60% -0.31%] index_select reverse : Elapsed 0.045 ms (4.544 ms / 100) 4.483 -> 4.481 ( -0.04%) [ +0.00% +0.09% +0.04% / -0.04% +0.71% +0.38%] index_select skip64 : Elapsed 0.045 ms (4.483 ms / 100) 4.479 -> 4.478 ( -0.02%) [ +0.07% +0.13% +0.00% / -0.02% +0.22% +0.27%] index_select skip256 : Elapsed 0.045 ms (4.482 ms / 100) 4.512 -> 4.506 ( -0.13%) [ +0.13% +0.00% +0.27% / +0.38% -0.07% -0.13%] index_select spread : Elapsed 0.045 ms (4.518 ms / 100) 4.517 -> 4.511 ( -0.13%) [ +0.00% +0.04% +0.15% / +0.15% -0.13% -0.04%] index_select strided 3 : Elapsed 0.045 ms (4.517 ms / 100) 4.519 -> 4.519 ( +0.00%) [ +0.20% +0.00% +0.13% / +0.27% +0.00% +0.02%] index_select strided 5 : Elapsed 0.045 ms (4.528 ms / 100) 4.514 -> 4.515 ( +0.02%) [ +0.13% +0.00% +0.07% / +0.02% +0.55% +0.31%] index_select strided 7 : Elapsed 0.045 ms (4.520 ms / 100) 4.514 -> 4.519 ( +0.11%) [ +0.13% +0.31% +0.00% / +0.11% +0.49% +0.40%] index_select strided 8 : Elapsed 0.045 ms (4.520 ms / 100) 4.541 -> 4.525 ( -0.35%) [ +0.31% +0.00% +0.35% / +0.00% -0.26% -0.35%] index_select strided 16 : Elapsed 0.046 ms (4.555 ms / 100) 4.567 -> 4.526 ( -0.90%) [ +0.13% +0.00% +0.00% / -0.20% -0.90% -0.72%] index_select strided 64 : Elapsed 0.046 ms (4.573 ms / 100) 4.514 -> 4.489 ( -0.55%) [ +0.00% +0.22% +0.18% / +0.02% -0.55% -0.18%] index_select strided 100 : Elapsed 0.045 ms (4.514 ms / 100) 4.533 -> 4.538 ( +0.11%) [ +0.24% +0.38% +0.00% / +0.15% +0.26% +0.11%] index_select strided 255 : Elapsed 0.045 ms (4.544 ms / 100) 4.528 -> 4.520 ( -0.18%) [ +0.02% +0.20% +0.00% / -0.07% +0.11% -0.18%] index_select strided 256 : Elapsed 0.045 ms (4.529 ms / 100) 4.529 -> 4.522 ( -0.15%) [ +0.07% +0.11% +0.00% / -0.04% -0.07% -0.15%] index_select strided 257 : Elapsed 0.045 ms (4.532 ms / 100) 4.544 -> 4.504 ( -0.88%) [ +0.07% +0.04% +0.00% / -0.11% -0.64% -0.88%] index_select random : Elapsed 0.045 ms (4.547 ms / 100) 4.517 -> 4.491 ( -0.58%) [ +0.00% +0.24% +0.18% / -0.04% -0.38% -0.58%] index_select random_sorted : Elapsed 0.045 ms (4.517 ms / 100) B = [15, 2048] (stride (1, 15)) A = [15, 1000] (stride (1, 15)) dim = 1 3.341 -> 3.296 ( -1.35%) [ +0.06% +0.00% +0.00% / -1.05% -1.35% -1.35%] index_add_ linear : Elapsed 0.033 ms (3.343 ms / 100) 3.304 -> 3.251 ( -1.60%) [ +0.12% +0.00% +0.06% / -1.00% -1.48% -1.60%] index_copy_ linear : Elapsed 0.033 ms (3.308 ms / 100) 3.325 -> 3.303 ( -0.66%) [ +0.15% +0.00% +0.09% / -0.66% +0.12% +0.00%] index_add_ reverse : Elapsed 0.033 ms (3.330 ms / 100) 3.280 -> 3.256 ( -0.73%) [ +0.12% +0.00% +0.24% / -0.73% +0.06% +0.06%] index_copy_ reverse : Elapsed 0.033 ms (3.284 ms / 100) 3.367 -> 3.323 ( -1.31%) [ +0.00% +0.03% +0.03% / -0.92% -1.22% -1.31%] index_add_ spread : Elapsed 0.034 ms (3.367 ms / 100) 3.335 -> 3.293 ( -1.26%) [ +0.00% +0.15% +0.30% / -0.48% -1.26% -1.26%] index_copy_ spread : Elapsed 0.033 ms (3.335 ms / 100) 3.347 -> 3.312 ( -1.05%) [ +0.00% +0.09% +0.03% / -1.05% -0.21% -0.33%] index_add_ strided 3 : Elapsed 0.033 ms (3.347 ms / 100) 3.314 -> 3.286 ( -0.84%) [ +0.06% +0.03% +0.00% / -0.84% +0.00% -0.27%] index_copy_ strided 3 : Elapsed 0.033 ms (3.316 ms / 100) 3.350 -> 3.324 ( -0.78%) [ +0.00% +0.09% +0.09% / -0.78% -0.69% -0.45%] index_add_ strided 5 : Elapsed 0.033 ms (3.350 ms / 100) 3.324 -> 3.298 ( -0.78%) [ +0.06% +0.00% +0.06% / -0.78% -0.57% -0.45%] index_copy_ strided 5 : Elapsed 0.033 ms (3.326 ms / 100) 3.357 -> 3.324 ( -0.98%) [ +0.18% +0.12% +0.00% / -0.98% -0.77% -0.80%] index_add_ strided 7 : Elapsed 0.034 ms (3.363 ms / 100) 3.321 -> 3.292 ( -0.87%) [ +0.06% +0.00% +0.06% / -0.87% -0.66% -0.63%] index_copy_ strided 7 : Elapsed 0.033 ms (3.323 ms / 100) 3.344 -> 3.315 ( -0.87%) [ +0.00% +0.18% +0.24% / -0.87% -0.36% -0.42%] index_add_ strided 255 : Elapsed 0.033 ms (3.344 ms / 100) 3.304 -> 3.277 ( -0.82%) [ +0.00% +0.00% +0.15% / -0.82% -0.48% -0.61%] index_copy_ strided 255 : Elapsed 0.033 ms (3.304 ms / 100) 3.342 -> 3.314 ( -0.84%) [ +0.15% +0.09% +0.00% / -0.84% -0.21% -0.51%] index_add_ strided 257 : Elapsed 0.033 ms (3.347 ms / 100) 3.298 -> 3.263 ( -1.06%) [ +0.18% +0.18% +0.00% / -1.06% -0.33% -0.30%] index_copy_ strided 257 : Elapsed 0.033 ms (3.304 ms / 100) 3.365 -> 3.325 ( -1.19%) [ +0.00% +0.03% +0.03% / -0.74% -1.19% -0.83%] index_add_ perm : Elapsed 0.034 ms (3.365 ms / 100) 3.325 -> 3.290 ( -1.05%) [ +0.27% +0.12% +0.00% / -0.66% -1.05% -0.87%] index_copy_ perm : Elapsed 0.033 ms (3.334 ms / 100) 3.344 -> 3.313 ( -0.93%) [ +0.00% +0.00% +0.24% / -0.66% -0.93% -0.69%] index_add_ perm_sorted : Elapsed 0.033 ms (3.344 ms / 100) 3.306 -> 3.278 ( -0.85%) [ +0.15% +0.24% +0.00% / -0.67% -0.85% -0.70%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.311 ms / 100) 4.475 -> 4.476 ( +0.02%) [ +0.00% +0.00% +0.13% / +0.04% +0.02% +0.16%] index_select const : Elapsed 0.045 ms (4.475 ms / 100) 4.479 -> 4.472 ( -0.16%) [ +0.18% +0.07% +0.00% / +0.13% -0.07% -0.16%] index_select wrap : Elapsed 0.045 ms (4.487 ms / 100) 4.475 -> 4.460 ( -0.34%) [ +0.11% +0.00% +0.09% / +0.09% -0.34% -0.22%] index_select linear : Elapsed 0.045 ms (4.480 ms / 100) 4.496 -> 4.487 ( -0.20%) [ +0.00% +0.13% +0.20% / +0.27% -0.07% -0.20%] index_select reverse : Elapsed 0.045 ms (4.496 ms / 100) 4.439 -> 4.443 ( +0.09%) [ +0.00% +0.36% +0.16% / +0.09% +0.52% +0.56%] index_select skip64 : Elapsed 0.044 ms (4.439 ms / 100) 4.452 -> 4.454 ( +0.04%) [ +0.00% +0.09% +0.31% / +0.11% +0.04% +0.27%] index_select skip256 : Elapsed 0.045 ms (4.452 ms / 100) 4.506 -> 4.495 ( -0.24%) [ +0.24% +0.18% +0.00% / +0.18% -0.02% -0.24%] index_select spread : Elapsed 0.045 ms (4.517 ms / 100) 4.485 -> 4.486 ( +0.02%) [ +0.22% +0.00% +0.07% / +0.25% +0.07% +0.02%] index_select strided 3 : Elapsed 0.045 ms (4.495 ms / 100) 4.465 -> 4.461 ( -0.09%) [ +0.16% +0.13% +0.00% / -0.09% +0.40% +0.16%] index_select strided 5 : Elapsed 0.045 ms (4.472 ms / 100) 4.502 -> 4.497 ( -0.11%) [ +0.00% +0.07% +0.04% / -0.11% +0.40% +0.16%] index_select strided 7 : Elapsed 0.045 ms (4.502 ms / 100) 4.466 -> 4.463 ( -0.07%) [ +0.02% +0.09% +0.00% / -0.07% +0.20% +0.38%] index_select strided 8 : Elapsed 0.045 ms (4.467 ms / 100) 4.472 -> 4.468 ( -0.09%) [ +0.00% +0.18% +0.11% / +0.13% +0.09% -0.09%] index_select strided 16 : Elapsed 0.045 ms (4.472 ms / 100) 4.484 -> 4.468 ( -0.36%) [ +0.11% +0.00% +0.09% / -0.18% -0.36% -0.36%] index_select strided 64 : Elapsed 0.045 ms (4.489 ms / 100) 4.471 -> 4.452 ( -0.42%) [ +0.16% +0.00% +0.07% / -0.09% -0.31% -0.42%] index_select strided 100 : Elapsed 0.045 ms (4.478 ms / 100) 4.467 -> 4.465 ( -0.04%) [ +0.20% +0.00% +0.13% / +0.11% +0.27% -0.04%] index_select strided 255 : Elapsed 0.045 ms (4.476 ms / 100) 4.462 -> 4.468 ( +0.13%) [ +0.09% +0.16% +0.00% / +0.20% +0.31% +0.13%] index_select strided 256 : Elapsed 0.045 ms (4.466 ms / 100) 4.490 -> 4.489 ( -0.02%) [ +0.13% +0.24% +0.00% / +0.29% -0.02% +0.24%] index_select strided 257 : Elapsed 0.045 ms (4.496 ms / 100) 4.537 -> 4.514 ( -0.51%) [ +0.04% +0.15% +0.00% / +0.20% -0.51% -0.42%] index_select random : Elapsed 0.045 ms (4.539 ms / 100) 4.535 -> 4.496 ( -0.86%) [ +0.00% +0.04% +0.09% / +0.20% -0.86% -0.64%] index_select random_sorted : Elapsed 0.045 ms (4.535 ms / 100) out_shape = [2048, 15] in_shape = [1000, 15] idx_dim = 0 B = [2048, 15] (stride (15, 1)) dim = 0 fill_cnt = 1000 2.244 -> 2.240 ( -0.18%) [ +0.00% +0.04% +0.13% / +0.09% -0.18% -0.18%] index_fill_ const : Elapsed 0.022 ms (2.244 ms / 100) 2.243 -> 2.248 ( +0.22%) [ +0.36% +0.00% +0.13% / +0.22% +1.07% +0.94%] index_fill_ linear : Elapsed 0.023 ms (2.251 ms / 100) 2.258 -> 2.255 ( -0.13%) [ +0.00% +0.04% +0.13% / +0.18% +0.27% -0.13%] index_fill_ reverse : Elapsed 0.023 ms (2.258 ms / 100) 2.231 -> 2.230 ( -0.04%) [ +0.36% +0.00% +0.40% / -0.04% +0.45% +0.63%] index_fill_ skip64 : Elapsed 0.022 ms (2.239 ms / 100) 2.244 -> 2.234 ( -0.45%) [ +0.22% +0.00% +0.36% / -0.45% +0.31% -0.22%] index_fill_ skip256 : Elapsed 0.022 ms (2.249 ms / 100) 2.288 -> 2.285 ( -0.13%) [ +0.00% +0.35% +0.35% / +0.17% -0.13% -0.04%] index_fill_ spread : Elapsed 0.023 ms (2.288 ms / 100) 2.279 -> 2.281 ( +0.09%) [ +0.09% +0.00% +0.04% / +0.09% +0.26% +0.13%] index_fill_ strided 3 : Elapsed 0.023 ms (2.281 ms / 100) 2.279 -> 2.282 ( +0.13%) [ +0.04% +0.00% +0.13% / +0.57% +0.26% +0.13%] index_fill_ strided 5 : Elapsed 0.023 ms (2.280 ms / 100) 2.277 -> 2.275 ( -0.09%) [ +0.00% +0.09% +0.18% / +0.31% +0.09% -0.09%] index_fill_ strided 7 : Elapsed 0.023 ms (2.277 ms / 100) 2.246 -> 2.245 ( -0.04%) [ +0.49% +0.00% +0.40% / +0.67% -0.04% +0.53%] index_fill_ strided 8 : Elapsed 0.023 ms (2.257 ms / 100) 2.248 -> 2.246 ( -0.09%) [ +0.31% +0.00% +0.09% / -0.09% +0.18% +0.36%] index_fill_ strided 16 : Elapsed 0.023 ms (2.255 ms / 100) 2.235 -> 2.241 ( +0.27%) [ +0.00% +0.36% +0.31% / +0.27% +0.49% +0.27%] index_fill_ strided 64 : Elapsed 0.022 ms (2.235 ms / 100) 2.263 -> 2.259 ( -0.18%) [ +0.66% +0.00% +0.13% / +0.22% -0.18% -0.13%] index_fill_ strided 100 : Elapsed 0.023 ms (2.278 ms / 100) 2.255 -> 2.269 ( +0.62%) [ +0.31% +0.27% +0.00% / +0.62% +0.71% +0.67%] index_fill_ strided 255 : Elapsed 0.023 ms (2.262 ms / 100) 2.234 -> 2.242 ( +0.36%) [ +0.54% +0.36% +0.00% / +0.36% +0.49% +0.58%] index_fill_ strided 256 : Elapsed 0.022 ms (2.246 ms / 100) 2.265 -> 2.268 ( +0.13%) [ +0.00% +0.00% +0.00% / +0.40% +0.13% +0.22%] index_fill_ strided 257 : Elapsed 0.023 ms (2.265 ms / 100) 2.275 -> 2.274 ( -0.04%) [ +0.18% +0.00% +0.13% / -0.04% +0.22% +0.35%] index_fill_ random : Elapsed 0.023 ms (2.279 ms / 100) 2.271 -> 2.253 ( -0.79%) [ +0.22% +0.09% +0.00% / +0.09% -0.70% -0.79%] index_fill_ random_sorted : Elapsed 0.023 ms (2.276 ms / 100) 2.278 -> 2.280 ( +0.09%) [ +0.00% +0.22% +0.40% / +0.22% +0.09% +0.31%] index_fill_ perm : Elapsed 0.023 ms (2.278 ms / 100) 2.274 -> 2.262 ( -0.53%) [ +0.26% +0.00% +0.04% / +0.62% -0.44% -0.53%] index_fill_ perm_sorted : Elapsed 0.023 ms (2.280 ms / 100) B = [2048, 15] (stride (15, 1)) A = [1000, 15] (stride (15, 1)) dim = 0 3.340 -> 3.315 ( -0.75%) [ +0.03% +0.00% +0.03% / -0.75% -0.69% -0.75%] index_add_ linear : Elapsed 0.033 ms (3.341 ms / 100) 3.299 -> 3.272 ( -0.82%) [ +0.06% +0.00% +0.06% / -0.76% -0.82% -0.79%] index_copy_ linear : Elapsed 0.033 ms (3.301 ms / 100) 3.328 -> 3.303 ( -0.75%) [ +0.00% +0.24% +0.03% / -0.75% -0.42% -0.39%] index_add_ reverse : Elapsed 0.033 ms (3.328 ms / 100) 3.291 -> 3.259 ( -0.97%) [ +0.06% +0.12% +0.00% / -0.97% -0.94% -0.64%] index_copy_ reverse : Elapsed 0.033 ms (3.293 ms / 100) 3.366 -> 3.336 ( -0.89%) [ +0.00% +0.15% +0.06% / -0.83% -0.89% -0.59%] index_add_ spread : Elapsed 0.034 ms (3.366 ms / 100) 3.342 -> 3.305 ( -1.11%) [ +0.00% +0.15% +0.06% / -0.66% -1.11% -1.05%] index_copy_ spread : Elapsed 0.033 ms (3.342 ms / 100) 3.345 -> 3.318 ( -0.81%) [ +0.00% +0.12% +0.00% / -0.78% -0.81% -0.45%] index_add_ strided 3 : Elapsed 0.033 ms (3.345 ms / 100) 3.319 -> 3.288 ( -0.93%) [ +0.03% +0.09% +0.00% / -0.93% -0.51% -0.63%] index_copy_ strided 3 : Elapsed 0.033 ms (3.320 ms / 100) 3.353 -> 3.328 ( -0.75%) [ +0.00% +0.00% +0.18% / -0.75% -0.39% -0.24%] index_add_ strided 5 : Elapsed 0.034 ms (3.353 ms / 100) 3.328 -> 3.299 ( -0.87%) [ +0.03% +0.00% +0.09% / -0.87% -0.51% -0.39%] index_copy_ strided 5 : Elapsed 0.033 ms (3.329 ms / 100) 3.351 -> 3.314 ( -1.10%) [ +0.00% +0.15% +0.12% / -0.81% -0.98% -1.10%] index_add_ strided 7 : Elapsed 0.034 ms (3.351 ms / 100) 3.320 -> 3.286 ( -1.02%) [ +0.06% +0.00% +0.15% / -0.84% -1.02% -0.93%] index_copy_ strided 7 : Elapsed 0.033 ms (3.322 ms / 100) 3.356 -> 3.327 ( -0.86%) [ +0.27% +0.00% +0.12% / -0.86% -0.30% -0.33%] index_add_ strided 255 : Elapsed 0.034 ms (3.365 ms / 100) 3.315 -> 3.281 ( -1.03%) [ +0.24% +0.09% +0.00% / -1.03% -0.51% -0.33%] index_copy_ strided 255 : Elapsed 0.033 ms (3.323 ms / 100) 3.350 -> 3.321 ( -0.87%) [ +0.15% +0.06% +0.00% / -0.87% -0.69% -0.84%] index_add_ strided 257 : Elapsed 0.034 ms (3.355 ms / 100) 3.311 -> 3.276 ( -1.06%) [ +0.27% +0.00% +0.18% / -0.66% -1.06% -0.69%] index_copy_ strided 257 : Elapsed 0.033 ms (3.320 ms / 100) 3.366 -> 3.333 ( -0.98%) [ +0.00% +0.12% +0.06% / -0.98% -0.53% -0.62%] index_add_ perm : Elapsed 0.034 ms (3.366 ms / 100) 3.324 -> 3.299 ( -0.75%) [ +0.27% +0.00% +0.18% / -0.75% -0.69% -0.54%] index_copy_ perm : Elapsed 0.033 ms (3.333 ms / 100) 3.342 -> 3.315 ( -0.81%) [ +0.06% +0.00% +0.03% / -0.81% -0.60% -0.66%] index_add_ perm_sorted : Elapsed 0.033 ms (3.344 ms / 100) 3.316 -> 3.281 ( -1.06%) [ +0.27% +0.00% +0.00% / -1.06% -1.03% -0.90%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.325 ms / 100) 4.466 -> 4.464 ( -0.04%) [ +0.02% +0.07% +0.00% / -0.04% +0.00% +0.02%] index_select const : Elapsed 0.045 ms (4.467 ms / 100) 4.497 -> 4.490 ( -0.16%) [ +0.00% +0.07% +0.18% / -0.04% +0.02% -0.16%] index_select wrap : Elapsed 0.045 ms (4.497 ms / 100) 4.494 -> 4.469 ( -0.56%) [ +0.00% +0.13% +0.38% / +0.31% -0.51% -0.56%] index_select linear : Elapsed 0.045 ms (4.494 ms / 100) 4.534 -> 4.499 ( -0.77%) [ +0.00% +0.09% +0.09% / +0.02% -0.77% -0.68%] index_select reverse : Elapsed 0.045 ms (4.534 ms / 100) 4.451 -> 4.459 ( +0.18%) [ +0.36% +0.09% +0.00% / +0.18% +0.72% +0.67%] index_select skip64 : Elapsed 0.045 ms (4.467 ms / 100) 4.468 -> 4.474 ( +0.13%) [ +0.00% +0.20% +0.00% / +0.13% +0.22% +0.40%] index_select skip256 : Elapsed 0.045 ms (4.468 ms / 100) 4.515 -> 4.505 ( -0.22%) [ +0.00% +0.20% +0.09% / +0.13% -0.20% -0.22%] index_select spread : Elapsed 0.045 ms (4.515 ms / 100) 4.502 -> 4.492 ( -0.22%) [ +0.02% +0.00% +0.04% / +0.24% -0.20% -0.22%] index_select strided 3 : Elapsed 0.045 ms (4.503 ms / 100) 4.472 -> 4.477 ( +0.11%) [ +0.00% +0.38% +0.25% / +0.11% +0.20% +0.34%] index_select strided 5 : Elapsed 0.045 ms (4.472 ms / 100) 4.493 -> 4.503 ( +0.22%) [ +0.00% +0.27% +0.09% / +0.22% +0.42% +0.69%] index_select strided 7 : Elapsed 0.045 ms (4.493 ms / 100) 4.477 -> 4.470 ( -0.16%) [ +0.02% +0.13% +0.00% / -0.16% +0.31% +0.67%] index_select strided 8 : Elapsed 0.045 ms (4.478 ms / 100) 4.501 -> 4.468 ( -0.73%) [ +0.13% +0.00% +0.09% / -0.13% -0.31% -0.73%] index_select strided 16 : Elapsed 0.045 ms (4.507 ms / 100) 4.502 -> 4.476 ( -0.58%) [ +0.13% +0.07% +0.00% / +0.00% -0.56% -0.58%] index_select strided 64 : Elapsed 0.045 ms (4.508 ms / 100) 4.480 -> 4.474 ( -0.13%) [ +0.00% +0.16% +0.07% / +0.11% -0.13% -0.09%] index_select strided 100 : Elapsed 0.045 ms (4.480 ms / 100) 4.477 -> 4.481 ( +0.09%) [ +0.27% +0.07% +0.00% / +0.36% +0.09% +0.20%] index_select strided 255 : Elapsed 0.045 ms (4.489 ms / 100) 4.472 -> 4.468 ( -0.09%) [ +0.02% +0.00% +0.04% / -0.09% +0.13% -0.02%] index_select strided 256 : Elapsed 0.045 ms (4.473 ms / 100) 4.504 -> 4.498 ( -0.13%) [ +0.00% +0.02% +0.22% / +0.22% +0.00% -0.13%] index_select strided 257 : Elapsed 0.045 ms (4.504 ms / 100) 4.520 -> 4.499 ( -0.46%) [ +0.18% +0.00% +0.11% / +0.18% -0.35% -0.46%] index_select random : Elapsed 0.045 ms (4.528 ms / 100) 4.498 -> 4.482 ( -0.36%) [ +0.00% +0.31% +0.44% / +0.36% -0.36% -0.18%] index_select random_sorted : Elapsed 0.045 ms (4.498 ms / 100) B = [2048, 15] (stride (15, 1)) A = [1000, 15] (stride (1, 1000)) dim = 0 3.336 -> 3.300 ( -1.08%) [ +0.09% +0.21% +0.00% / -0.69% -1.08% -0.93%] index_add_ linear : Elapsed 0.033 ms (3.339 ms / 100) 3.310 -> 3.277 ( -1.00%) [ +0.00% +0.36% +0.33% / -0.66% -1.00% -0.82%] index_copy_ linear : Elapsed 0.033 ms (3.310 ms / 100) 3.334 -> 3.305 ( -0.87%) [ +0.00% +0.00% +0.18% / -0.87% +0.00% +0.00%] index_add_ reverse : Elapsed 0.033 ms (3.334 ms / 100) 3.301 -> 3.271 ( -0.91%) [ +0.06% +0.06% +0.00% / -0.91% +0.36% +0.09%] index_copy_ reverse : Elapsed 0.033 ms (3.303 ms / 100) 3.363 -> 3.325 ( -1.13%) [ +0.09% +0.00% +0.21% / -0.86% -1.13% -1.07%] index_add_ spread : Elapsed 0.034 ms (3.366 ms / 100) 3.358 -> 3.308 ( -1.49%) [ +0.03% +0.03% +0.00% / -1.01% -1.49% -1.16%] index_copy_ spread : Elapsed 0.034 ms (3.359 ms / 100) 3.345 -> 3.317 ( -0.84%) [ +0.06% +0.03% +0.00% / -0.84% -0.12% -0.12%] index_add_ strided 3 : Elapsed 0.033 ms (3.347 ms / 100) 3.335 -> 3.303 ( -0.96%) [ +0.06% +0.06% +0.00% / -0.96% -0.63% -0.42%] index_copy_ strided 3 : Elapsed 0.033 ms (3.337 ms / 100) 3.344 -> 3.325 ( -0.57%) [ +0.00% +0.12% +0.30% / -0.57% -0.33% -0.15%] index_add_ strided 5 : Elapsed 0.033 ms (3.344 ms / 100) 3.334 -> 3.313 ( -0.63%) [ +0.00% +0.24% +0.36% / -0.45% -0.63% -0.30%] index_copy_ strided 5 : Elapsed 0.033 ms (3.334 ms / 100) 3.356 -> 3.332 ( -0.72%) [ +0.03% +0.00% +0.18% / -0.72% -0.60% -0.57%] index_add_ strided 7 : Elapsed 0.034 ms (3.357 ms / 100) 3.336 -> 3.315 ( -0.63%) [ +0.09% +0.00% +0.12% / -0.63% -0.42% -0.39%] index_copy_ strided 7 : Elapsed 0.033 ms (3.339 ms / 100) 3.342 -> 3.315 ( -0.81%) [ +0.12% +0.00% +0.15% / -0.81% -0.39% -0.12%] index_add_ strided 255 : Elapsed 0.033 ms (3.346 ms / 100) 3.314 -> 3.300 ( -0.42%) [ +0.18% +0.00% +0.21% / -0.42% -0.30% -0.18%] index_copy_ strided 255 : Elapsed 0.033 ms (3.320 ms / 100) 3.342 -> 3.315 ( -0.81%) [ +0.09% +0.00% +0.03% / -0.81% +1.62% -0.51%] index_add_ strided 257 : Elapsed 0.033 ms (3.345 ms / 100) 3.314 -> 3.289 ( -0.75%) [ +0.00% +0.15% +0.06% / -0.75% -0.21% -0.45%] index_copy_ strided 257 : Elapsed 0.033 ms (3.314 ms / 100) 3.363 -> 3.337 ( -0.77%) [ +0.00% +0.06% +0.00% / -0.77% -0.71% -0.59%] index_add_ perm : Elapsed 0.034 ms (3.363 ms / 100) 3.339 -> 3.312 ( -0.81%) [ +0.00% +0.18% +0.21% / -0.81% -0.51% -0.75%] index_copy_ perm : Elapsed 0.033 ms (3.339 ms / 100) 3.342 -> 3.315 ( -0.81%) [ +0.00% +0.18% +0.15% / -0.75% -0.81% -0.54%] index_add_ perm_sorted : Elapsed 0.033 ms (3.342 ms / 100) 3.318 -> 3.297 ( -0.63%) [ +0.00% +0.36% +0.27% / -0.63% -0.36% -0.54%] index_copy_ perm_sorted : Elapsed 0.033 ms (3.318 ms / 100) 4.485 -> 4.485 ( +0.00%) [ +0.00% +0.07% +0.00% / +0.18% +0.00% +0.07%] index_select const : Elapsed 0.045 ms (4.485 ms / 100) 4.513 -> 4.498 ( -0.33%) [ +0.04% +0.00% +0.02% / -0.33% -0.09% -0.16%] index_select wrap : Elapsed 0.045 ms (4.515 ms / 100) 4.510 -> 4.495 ( -0.33%) [ +0.00% +0.07% +0.09% / -0.20% -0.33% -0.27%] index_select linear : Elapsed 0.045 ms (4.510 ms / 100) 4.509 -> 4.492 ( -0.38%) [ +0.24% +0.29% +0.00% / -0.02% -0.38% -0.27%] index_select reverse : Elapsed 0.045 ms (4.520 ms / 100) 4.471 -> 4.473 ( +0.04%) [ +0.00% +0.13% +0.07% / +0.04% +0.45% +0.31%] index_select skip64 : Elapsed 0.045 ms (4.471 ms / 100) 4.469 -> 4.465 ( -0.09%) [ +0.00% +0.20% +0.04% / +0.11% -0.02% -0.09%] index_select skip256 : Elapsed 0.045 ms (4.469 ms / 100) 4.497 -> 4.496 ( -0.02%) [ +0.20% +0.22% +0.00% / -0.02% +0.42% +0.38%] index_select spread : Elapsed 0.045 ms (4.506 ms / 100) 4.501 -> 4.504 ( +0.07%) [ +0.00% +0.11% +0.07% / +0.07% +0.18% +0.20%] index_select strided 3 : Elapsed 0.045 ms (4.501 ms / 100) 4.500 -> 4.509 ( +0.20%) [ +0.02% +0.00% +0.18% / +0.22% +0.20% +0.33%] index_select strided 5 : Elapsed 0.045 ms (4.501 ms / 100) 4.506 -> 4.507 ( +0.02%) [ +0.00% +0.18% +0.04% / +0.02% +0.27% +0.47%] index_select strided 7 : Elapsed 0.045 ms (4.506 ms / 100) 4.502 -> 4.520 ( +0.40%) [ +0.29% +0.53% +0.00% / +0.40% +0.44% +0.69%] index_select strided 8 : Elapsed 0.045 ms (4.515 ms / 100) 4.515 -> 4.524 ( +0.20%) [ +0.00% +0.11% +0.09% / +0.20% +0.24% +0.53%] index_select strided 16 : Elapsed 0.045 ms (4.515 ms / 100) 4.535 -> 4.534 ( -0.02%) [ +0.04% +0.00% +0.07% / +0.00% +0.07% -0.02%] index_select strided 64 : Elapsed 0.045 ms (4.537 ms / 100) 4.488 -> 4.467 ( -0.47%) [ +0.00% +0.02% +0.31% / +0.09% -0.13% -0.47%] index_select strided 100 : Elapsed 0.045 ms (4.488 ms / 100) 4.512 -> 4.517 ( +0.11%) [ +0.00% +0.38% +0.09% / +0.11% +0.33% +0.33%] index_select strided 255 : Elapsed 0.045 ms (4.512 ms / 100) 4.507 -> 4.517 ( +0.22%) [ +0.00% +0.13% +0.29% / +0.24% +0.22% +0.33%] index_select strided 256 : Elapsed 0.045 ms (4.507 ms / 100) 4.524 -> 4.511 ( -0.29%) [ +0.15% +0.00% +0.00% / -0.29% -0.24% +0.11%] index_select strided 257 : Elapsed 0.045 ms (4.531 ms / 100) 4.550 -> 4.525 ( -0.55%) [ +0.33% +0.00% +0.22% / +0.42% -0.55% -0.53%] index_select random : Elapsed 0.046 ms (4.565 ms / 100) 4.540 -> 4.511 ( -0.64%) [ +0.18% +0.00% +0.09% / -0.15% -0.37% -0.64%] index_select random_sorted : Elapsed 0.045 ms (4.548 ms / 100) B = [2048, 15] (stride (1, 2048)) dim = 0 fill_cnt = 1000 2.280 -> 2.235 ( -1.97%) [ +0.22% +0.22% +0.00% / -1.36% -1.97% -1.80%] index_fill_ const : Elapsed 0.023 ms (2.285 ms / 100) 2.259 -> 2.243 ( -0.71%) [ +0.00% +0.09% +0.00% / -0.71% -0.62% +0.13%] index_fill_ linear : Elapsed 0.023 ms (2.259 ms / 100) 2.277 -> 2.250 ( -1.19%) [ +0.66% +0.00% +0.00% / -0.22% -1.01% -1.19%] index_fill_ reverse : Elapsed 0.023 ms (2.292 ms / 100) 2.253 -> 2.237 ( -0.71%) [ +0.49% +0.00% +0.36% / -0.71% -0.67% -0.22%] index_fill_ skip64 : Elapsed 0.023 ms (2.264 ms / 100) 2.254 -> 2.238 ( -0.71%) [ +0.31% +0.09% +0.00% / -0.71% -0.27% -0.40%] index_fill_ skip256 : Elapsed 0.023 ms (2.261 ms / 100) 2.337 -> 2.309 ( -1.20%) [ +0.09% +0.00% +0.21% / -0.73% -1.20% -0.77%] index_fill_ spread : Elapsed 0.023 ms (2.339 ms / 100) 2.327 -> 2.316 ( -0.47%) [ +0.17% +0.00% +0.09% / -0.47% +0.00% -0.04%] index_fill_ strided 3 : Elapsed 0.023 ms (2.331 ms / 100) 2.325 -> 2.320 ( -0.22%) [ +0.00% +0.34% +0.30% / -0.22% +0.09% +0.43%] index_fill_ strided 5 : Elapsed 0.023 ms (2.325 ms / 100) 2.327 -> 2.322 ( -0.21%) [ +0.21% +0.30% +0.00% / -0.09% -0.21% +0.00%] index_fill_ strided 7 : Elapsed 0.023 ms (2.332 ms / 100) 2.332 -> 2.320 ( -0.51%) [ +0.51% +0.00% +0.00% / +0.47% -0.47% -0.51%] index_fill_ strided 8 : Elapsed 0.023 ms (2.344 ms / 100) 2.289 -> 2.284 ( -0.22%) [ +0.09% +0.00% +0.13% / -0.22% -0.13% -0.17%] index_fill_ strided 16 : Elapsed 0.023 ms (2.291 ms / 100) 2.267 -> 2.256 ( -0.49%) [ +0.35% +0.00% +0.09% / +0.09% -0.49% -0.31%] index_fill_ strided 64 : Elapsed 0.023 ms (2.275 ms / 100) 2.335 -> 2.331 ( -0.17%) [ +0.47% +0.26% +0.00% / +0.00% -0.17% +0.17%] index_fill_ strided 100 : Elapsed 0.023 ms (2.346 ms / 100) 2.336 -> 2.334 ( -0.09%) [ +0.00% +0.17% +0.26% / +0.21% -0.09% +0.17%] index_fill_ strided 255 : Elapsed 0.023 ms (2.336 ms / 100) 2.248 -> 2.239 ( -0.40%) [ +0.00% +0.40% +0.49% / -0.18% -0.40% -0.18%] index_fill_ strided 256 : Elapsed 0.022 ms (2.248 ms / 100) 2.331 -> 2.331 ( +0.00%) [ +0.34% +0.21% +0.00% / +0.13% +0.00% +0.34%] index_fill_ strided 257 : Elapsed 0.023 ms (2.339 ms / 100) 2.331 -> 2.325 ( -0.26%) [ +0.39% +0.00% +0.21% / -0.04% -0.26% +0.13%] index_fill_ random : Elapsed 0.023 ms (2.340 ms / 100) 2.325 -> 2.307 ( -0.77%) [ +0.00% +0.17% +0.17% / -0.60% -0.77% -0.22%] index_fill_ random_sorted : Elapsed 0.023 ms (2.325 ms / 100) 2.334 -> 2.329 ( -0.21%) [ +0.13% +0.00% +0.13% / -0.13% +0.39% -0.21%] index_fill_ perm : Elapsed 0.023 ms (2.337 ms / 100) 2.326 -> 2.315 ( -0.47%) [ +0.21% +0.00% +0.34% / -0.47% -0.43% -0.13%] index_fill_ perm_sorted : Elapsed 0.023 ms (2.331 ms / 100) B = [2048, 15] (stride (1, 2048)) A = [1000, 15] (stride (15, 1)) dim = 0 3.358 -> 3.311 ( -1.40%) [ +0.18% +0.36% +0.00% / -1.40% -1.31% -1.25%] index_add_ linear : Elapsed 0.034 ms (3.364 ms / 100) 3.318 -> 3.275 ( -1.30%) [ +0.03% +0.00% +0.12% / -1.15% -1.21% -1.30%] index_copy_ linear : Elapsed 0.033 ms (3.319 ms / 100) 3.371 -> 3.302 ( -2.05%) [ +0.06% +0.00% +0.00% / -1.90% -1.78% -2.05%] index_add_ reverse : Elapsed 0.034 ms (3.373 ms / 100) 3.320 -> 3.268 ( -1.57%) [ +0.00% +0.00% +0.03% / -1.05% -1.57% -1.42%] index_copy_ reverse : Elapsed 0.033 ms (3.320 ms / 100) 3.419 -> 3.339 ( -2.34%) [ +0.12% +0.06% +0.00% / -1.96% -2.34% -2.25%] index_add_ spread : Elapsed 0.034 ms (3.423 ms / 100) 3.393 -> 3.334 ( -1.74%) [ +0.12% +0.00% +0.15% / -1.36% -1.50% -1.74%] index_copy_ spread : Elapsed 0.034 ms (3.397 ms / 100) 3.402 -> 3.345 ( -1.68%) [ +0.00% +0.18% +0.00% / -1.68% -1.35% -1.15%] index_add_ strided 3 : Elapsed 0.034 ms (3.402 ms / 100) 3.383 -> 3.349 ( -1.01%) [ +0.06% +0.00% +0.09% / -0.98% -0.95% -1.01%] index_copy_ strided 3 : Elapsed 0.034 ms (3.385 ms / 100) 3.423 -> 3.368 ( -1.61%) [ +0.03% +0.00% +0.12% / -1.61% -1.31% -1.08%] index_add_ strided 5 : Elapsed 0.034 ms (3.424 ms / 100) 3.391 -> 3.354 ( -1.09%) [ +0.00% +0.12% +0.09% / -1.09% -0.83% -0.88%] index_copy_ strided 5 : Elapsed 0.034 ms (3.391 ms / 100) 3.420 -> 3.376 ( -1.29%) [ +0.00% +0.06% +0.23% / -0.94% -1.29% -1.17%] index_add_ strided 7 : Elapsed 0.034 ms (3.420 ms / 100) 3.394 -> 3.352 ( -1.24%) [ +0.06% +0.00% +0.09% / -0.82% -1.03% -1.24%] index_copy_ strided 7 : Elapsed 0.034 ms (3.396 ms / 100) 3.444 -> 3.406 ( -1.10%) [ +0.00% +0.03% +0.06% / -1.10% -0.73% -0.46%] index_add_ strided 255 : Elapsed 0.034 ms (3.444 ms / 100) 3.397 -> 3.372 ( -0.74%) [ +0.21% +0.38% +0.00% / -0.74% -0.56% -0.53%] index_copy_ strided 255 : Elapsed 0.034 ms (3.404 ms / 100) 3.436 -> 3.391 ( -1.31%) [ +0.17% +0.00% +0.06% / -1.25% -0.99% -1.31%] index_add_ strided 257 : Elapsed 0.034 ms (3.442 ms / 100) 3.400 -> 3.361 ( -1.15%) [ +0.00% +0.29% +0.09% / -0.94% -1.15% -1.15%] index_copy_ strided 257 : Elapsed 0.034 ms (3.400 ms / 100) 3.452 -> 3.412 ( -1.16%) [ +0.00% +0.26% +0.29% / -0.87% -0.98% -1.16%] index_add_ perm : Elapsed 0.035 ms (3.452 ms / 100) 3.412 -> 3.370 ( -1.23%) [ +0.09% +0.09% +0.00% / -0.79% -1.11% -1.23%] index_copy_ perm : Elapsed 0.034 ms (3.415 ms / 100) 3.408 -> 3.338 ( -2.05%) [ +0.12% +0.15% +0.00% / -1.94% -2.05% -2.02%] index_add_ perm_sorted : Elapsed 0.034 ms (3.412 ms / 100) 3.390 -> 3.346 ( -1.30%) [ +0.12% +0.00% +0.21% / -1.27% -1.30% -1.18%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.394 ms / 100) 4.508 -> 4.452 ( -1.24%) [ +0.00% +0.07% +0.18% / -1.24% -0.98% -0.87%] index_select const : Elapsed 0.045 ms (4.508 ms / 100) 4.540 -> 4.490 ( -1.10%) [ +0.11% +0.15% +0.00% / -0.70% -1.10% -0.93%] index_select wrap : Elapsed 0.045 ms (4.545 ms / 100) 4.549 -> 4.488 ( -1.34%) [ +0.20% +0.11% +0.00% / -1.30% -1.34% -1.34%] index_select linear : Elapsed 0.046 ms (4.558 ms / 100) 4.572 -> 4.492 ( -1.75%) [ +0.04% +0.02% +0.00% / -1.47% -1.75% -1.68%] index_select reverse : Elapsed 0.046 ms (4.574 ms / 100) 4.496 -> 4.445 ( -1.13%) [ +0.42% +0.00% +0.11% / -0.82% -0.87% -1.13%] index_select skip64 : Elapsed 0.045 ms (4.515 ms / 100) 4.507 -> 4.458 ( -1.09%) [ +0.00% +0.20% +0.18% / -0.87% -1.09% -1.07%] index_select skip256 : Elapsed 0.045 ms (4.507 ms / 100) 4.547 -> 4.496 ( -1.12%) [ +0.26% +0.00% +0.22% / -1.01% -1.12% -0.97%] index_select spread : Elapsed 0.046 ms (4.559 ms / 100) 4.549 -> 4.498 ( -1.12%) [ +0.22% +0.00% +0.07% / -0.31% -1.03% -1.12%] index_select strided 3 : Elapsed 0.046 ms (4.559 ms / 100) 4.524 -> 4.473 ( -1.13%) [ +0.24% +0.00% +0.07% / -0.60% -1.13% -0.84%] index_select strided 5 : Elapsed 0.045 ms (4.535 ms / 100) 4.536 -> 4.504 ( -0.71%) [ +0.18% +0.00% +0.31% / -0.53% -0.71% -0.53%] index_select strided 7 : Elapsed 0.045 ms (4.544 ms / 100) 4.521 -> 4.473 ( -1.06%) [ +0.13% +0.00% +0.02% / -0.64% -1.06% -0.95%] index_select strided 8 : Elapsed 0.045 ms (4.527 ms / 100) 4.538 -> 4.475 ( -1.39%) [ +0.13% +0.00% +0.37% / -1.21% -1.21% -1.39%] index_select strided 16 : Elapsed 0.045 ms (4.544 ms / 100) 4.550 -> 4.488 ( -1.36%) [ +0.00% +0.20% +0.18% / -1.25% -1.36% -1.34%] index_select strided 64 : Elapsed 0.045 ms (4.550 ms / 100) 4.535 -> 4.459 ( -1.68%) [ +0.00% +0.02% +0.07% / -1.68% -1.48% -1.59%] index_select strided 100 : Elapsed 0.045 ms (4.535 ms / 100) 4.530 -> 4.495 ( -0.77%) [ +0.26% +0.07% +0.00% / -0.51% -0.64% -0.77%] index_select strided 255 : Elapsed 0.045 ms (4.542 ms / 100) 4.517 -> 4.470 ( -1.04%) [ +0.00% +0.20% +0.07% / -1.04% -0.53% -0.69%] index_select strided 256 : Elapsed 0.045 ms (4.517 ms / 100) 4.555 -> 4.520 ( -0.77%) [ +0.00% +0.11% +0.00% / -0.77% -0.64% -0.72%] index_select strided 257 : Elapsed 0.046 ms (4.555 ms / 100) 4.562 -> 4.508 ( -1.18%) [ +0.15% +0.00% +0.18% / -0.44% -1.18% -1.07%] index_select random : Elapsed 0.046 ms (4.569 ms / 100) 4.537 -> 4.487 ( -1.10%) [ +0.00% +0.24% +0.33% / -0.55% -1.08% -1.10%] index_select random_sorted : Elapsed 0.045 ms (4.537 ms / 100) B = [2048, 15] (stride (1, 2048)) A = [1000, 15] (stride (1, 1000)) dim = 0 3.385 -> 3.298 ( -2.57%) [ +0.00% +0.12% +0.06% / -2.27% -2.48% -2.57%] index_add_ linear : Elapsed 0.034 ms (3.385 ms / 100) 3.342 -> 3.252 ( -2.69%) [ +0.00% +0.09% +0.03% / -2.15% -2.69% -2.63%] index_copy_ linear : Elapsed 0.033 ms (3.342 ms / 100) 3.368 -> 3.291 ( -2.29%) [ +0.03% +0.12% +0.00% / -2.29% -1.28% -1.13%] index_add_ reverse : Elapsed 0.034 ms (3.369 ms / 100) 3.320 -> 3.251 ( -2.08%) [ +0.21% +0.00% +0.00% / -2.08% -1.23% -1.17%] index_copy_ reverse : Elapsed 0.033 ms (3.327 ms / 100) 3.422 -> 3.336 ( -2.51%) [ +0.20% +0.00% +0.15% / -2.05% -2.51% -2.48%] index_add_ spread : Elapsed 0.034 ms (3.429 ms / 100) 3.406 -> 3.322 ( -2.47%) [ +0.00% +0.06% +0.00% / -2.08% -2.38% -2.47%] index_copy_ spread : Elapsed 0.034 ms (3.406 ms / 100) 3.411 -> 3.348 ( -1.85%) [ +0.00% +0.03% +0.06% / -1.85% -1.29% -1.26%] index_add_ strided 3 : Elapsed 0.034 ms (3.411 ms / 100) 3.394 -> 3.325 ( -2.03%) [ +0.21% +0.15% +0.00% / -2.03% -1.18% -1.36%] index_copy_ strided 3 : Elapsed 0.034 ms (3.401 ms / 100) 3.418 -> 3.360 ( -1.70%) [ +0.12% +0.00% +0.18% / -1.70% -1.38% -1.40%] index_add_ strided 5 : Elapsed 0.034 ms (3.422 ms / 100) 3.400 -> 3.341 ( -1.74%) [ +0.06% +0.09% +0.00% / -1.74% -1.65% -1.53%] index_copy_ strided 5 : Elapsed 0.034 ms (3.402 ms / 100) 3.429 -> 3.381 ( -1.40%) [ +0.03% +0.06% +0.00% / -1.40% -1.25% -1.02%] index_add_ strided 7 : Elapsed 0.034 ms (3.430 ms / 100) 3.400 -> 3.354 ( -1.35%) [ +0.26% +0.35% +0.00% / -1.35% -1.12% -1.06%] index_copy_ strided 7 : Elapsed 0.034 ms (3.409 ms / 100) 3.437 -> 3.396 ( -1.19%) [ +0.09% +0.12% +0.00% / -1.19% -0.99% -1.02%] index_add_ strided 255 : Elapsed 0.034 ms (3.440 ms / 100) 3.404 -> 3.357 ( -1.38%) [ +0.12% +0.18% +0.00% / -1.29% -1.29% -1.38%] index_copy_ strided 255 : Elapsed 0.034 ms (3.408 ms / 100) 3.433 -> 3.384 ( -1.43%) [ +0.17% +0.00% +0.23% / -1.43% -0.99% -0.96%] index_add_ strided 257 : Elapsed 0.034 ms (3.439 ms / 100) 3.405 -> 3.351 ( -1.59%) [ +0.00% +0.15% +0.00% / -1.59% -1.29% -1.12%] index_copy_ strided 257 : Elapsed 0.034 ms (3.405 ms / 100) 3.453 -> 3.401 ( -1.51%) [ +0.06% +0.00% +0.06% / -1.07% -1.36% -1.51%] index_add_ perm : Elapsed 0.035 ms (3.455 ms / 100) 3.415 -> 3.365 ( -1.46%) [ +0.00% +0.20% +0.12% / -1.20% -1.46% -1.38%] index_copy_ perm : Elapsed 0.034 ms (3.415 ms / 100) 3.409 -> 3.338 ( -2.08%) [ +0.21% +0.38% +0.00% / -2.08% -1.88% -1.91%] index_add_ perm_sorted : Elapsed 0.034 ms (3.416 ms / 100) 3.394 -> 3.325 ( -2.03%) [ +0.03% +0.32% +0.00% / -1.89% -1.97% -2.03%] index_copy_ perm_sorted : Elapsed 0.034 ms (3.395 ms / 100) 4.523 -> 4.451 ( -1.59%) [ +0.00% +0.24% +0.09% / -1.22% -1.59% -1.59%] index_select const : Elapsed 0.045 ms (4.523 ms / 100) 4.549 -> 4.479 ( -1.54%) [ +0.00% +0.22% +0.31% / -1.41% -1.25% -1.54%] index_select wrap : Elapsed 0.045 ms (4.549 ms / 100) 4.555 -> 4.486 ( -1.51%) [ +0.00% +0.22% +0.09% / -1.12% -1.43% -1.51%] index_select linear : Elapsed 0.046 ms (4.555 ms / 100) 4.542 -> 4.495 ( -1.03%) [ +0.09% +0.00% +0.15% / -1.03% -0.95% -0.97%] index_select reverse : Elapsed 0.045 ms (4.546 ms / 100) 4.528 -> 4.466 ( -1.37%) [ +0.00% +0.04% +0.09% / -1.13% -1.33% -1.37%] index_select skip64 : Elapsed 0.045 ms (4.528 ms / 100) 4.519 -> 4.459 ( -1.33%) [ +0.11% +0.00% +0.15% / -1.15% -1.28% -1.33%] index_select skip256 : Elapsed 0.045 ms (4.524 ms / 100) 4.545 -> 4.516 ( -0.64%) [ +0.04% +0.00% +0.04% / -0.62% -0.53% -0.64%] index_select spread : Elapsed 0.045 ms (4.547 ms / 100) 4.542 -> 4.488 ( -1.19%) [ +0.00% +0.46% +0.13% / -1.19% -1.19% -1.14%] index_select strided 3 : Elapsed 0.045 ms (4.542 ms / 100) 4.552 -> 4.488 ( -1.41%) [ +0.18% +0.00% +0.35% / -1.41% -1.34% -1.25%] index_select strided 5 : Elapsed 0.046 ms (4.560 ms / 100) 4.563 -> 4.487 ( -1.67%) [ +0.24% +0.00% +0.26% / -1.40% -1.64% -1.67%] index_select strided 7 : Elapsed 0.046 ms (4.574 ms / 100) 4.566 -> 4.494 ( -1.58%) [ +0.31% +0.00% +0.24% / -1.58% -1.55% -1.53%] index_select strided 8 : Elapsed 0.046 ms (4.580 ms / 100) 4.570 -> 4.489 ( -1.77%) [ +0.24% +0.33% +0.00% / -1.77% -1.49% -1.27%] index_select strided 16 : Elapsed 0.046 ms (4.581 ms / 100) 4.594 -> 4.497 ( -2.11%) [ +0.09% +0.09% +0.00% / -2.11% -1.68% -1.68%] index_select strided 64 : Elapsed 0.046 ms (4.598 ms / 100) 4.551 -> 4.469 ( -1.80%) [ +0.04% +0.07% +0.00% / -1.65% -1.67% -1.80%] index_select strided 100 : Elapsed 0.046 ms (4.553 ms / 100) 4.570 -> 4.503 ( -1.47%) [ +0.00% +0.00% +0.04% / -1.47% -1.18% -1.23%] index_select strided 255 : Elapsed 0.046 ms (4.570 ms / 100) 4.574 -> 4.500 ( -1.62%) [ +0.22% +0.00% +0.15% / -1.62% -1.46% -1.31%] index_select strided 256 : Elapsed 0.046 ms (4.584 ms / 100) 4.577 -> 4.504 ( -1.59%) [ +0.00% +0.00% +0.15% / -1.59% -1.33% -1.31%] index_select strided 257 : Elapsed 0.046 ms (4.577 ms / 100) 4.610 -> 4.504 ( -2.30%) [ +0.00% +0.24% +0.13% / -2.30% -2.30% -2.28%] index_select random : Elapsed 0.046 ms (4.610 ms / 100) 4.577 -> 4.501 ( -1.66%) [ +0.07% +0.00% +0.11% / -1.62% -1.59% -1.66%] index_select random_sorted : Elapsed 0.046 ms (4.580 ms / 100) out_shape = [1000, 2048] in_shape = [1000, 15] idx_dim = 1 B = [1000, 2048] (stride (2048, 1)) dim = 1 fill_cnt = 15 0.951 -> 0.961 ( +1.05%) [ +1.26% +2.00% +0.00% / +1.05% +2.73% +1.79%] index_fill_ const : Elapsed 0.010 ms (0.963 ms / 100) 0.957 -> 0.963 ( +0.63%) [ +0.84% +0.84% +0.00% / +0.63% +1.46% +1.15%] index_fill_ linear : Elapsed 0.010 ms (0.965 ms / 100) 0.955 -> 0.962 ( +0.73%) [ +1.05% +0.21% +0.00% / +0.73% +1.57% +1.36%] index_fill_ reverse : Elapsed 0.010 ms (0.965 ms / 100) 0.952 -> 0.960 ( +0.84%) [ +0.84% +0.42% +0.00% / +0.84% +1.79% +1.58%] index_fill_ skip64 : Elapsed 0.010 ms (0.960 ms / 100) 0.951 -> 0.959 ( +0.84%) [ +1.05% +0.11% +0.00% / +0.84% +1.68% +1.79%] index_fill_ skip256 : Elapsed 0.010 ms (0.961 ms / 100) 1.086 -> 1.065 ( -1.93%) [ +0.18% +0.74% +0.00% / -0.18% -1.93% -1.66%] index_fill_ spread : Elapsed 0.011 ms (1.088 ms / 100) 0.956 -> 0.964 ( +0.84%) [ +0.84% +0.10% +0.00% / +0.84% +1.36% +1.15%] index_fill_ strided 3 : Elapsed 0.010 ms (0.964 ms / 100) 0.953 -> 0.962 ( +0.94%) [ +0.94% +0.42% +0.00% / +0.94% +1.26% +1.26%] index_fill_ strided 5 : Elapsed 0.010 ms (0.962 ms / 100) 0.955 -> 0.962 ( +0.73%) [ +1.05% +0.00% +0.00% / +0.73% +1.36% +0.94%] index_fill_ strided 7 : Elapsed 0.010 ms (0.965 ms / 100) 0.957 -> 0.962 ( +0.52%) [ +0.84% +0.00% +0.00% / +0.52% +0.94% +0.94%] index_fill_ strided 8 : Elapsed 0.010 ms (0.965 ms / 100) 0.955 -> 0.967 ( +1.26%) [ +1.15% +0.00% +0.21% / +1.36% +1.47% +1.26%] index_fill_ strided 16 : Elapsed 0.010 ms (0.966 ms / 100) 0.994 -> 0.969 ( -2.52%) [ +0.10% +0.00% +0.60% / +0.70% -2.41% -2.52%] index_fill_ strided 64 : Elapsed 0.010 ms (0.995 ms / 100) good 1.083 -> 1.004 ( -7.29%) [ +0.46% +0.00% +0.83% / +0.46% -7.29% -6.83%] index_fill_ strided 100 : Elapsed 0.011 ms (1.088 ms / 100) 0.959 -> 0.963 ( +0.42%) [ +0.83% +0.00% +0.00% / +0.83% +0.42% +0.42%] index_fill_ strided 255 : Elapsed 0.010 ms (0.967 ms / 100) 0.958 -> 0.963 ( +0.52%) [ +0.94% +0.10% +0.00% / +2.40% +0.63% +0.52%] index_fill_ strided 256 : Elapsed 0.010 ms (0.967 ms / 100) 0.957 -> 0.963 ( +0.63%) [ +0.94% +0.31% +0.00% / +1.04% +0.63% +0.73%] index_fill_ strided 257 : Elapsed 0.010 ms (0.966 ms / 100) 1.060 -> 1.023 ( -3.49%) [ +0.38% +0.38% +0.00% / +0.28% -3.49% -3.49%] index_fill_ random : Elapsed 0.011 ms (1.064 ms / 100) 0.985 -> 0.973 ( -1.22%) [ +0.41% +0.81% +0.00% / +0.71% -1.12% -1.22%] index_fill_ random_sorted : Elapsed 0.010 ms (0.989 ms / 100) 0.957 -> 0.965 ( +0.84%) [ +1.15% +0.84% +0.00% / +1.46% +0.84% +0.84%] index_fill_ perm : Elapsed 0.010 ms (0.968 ms / 100) 0.959 -> 0.964 ( +0.52%) [ +0.73% +0.00% +0.00% / +0.73% +0.52% +0.63%] index_fill_ perm_sorted : Elapsed 0.010 ms (0.966 ms / 100) B = [1000, 2048] (stride (2048, 1)) A = [1000, 15] (stride (15, 1)) dim = 1 1.208 -> 1.220 ( +0.99%) [ +0.91% +0.41% +0.00% / +0.99% +1.90% +1.82%] index_add_ linear : Elapsed 0.012 ms (1.219 ms / 100) 1.216 -> 1.229 ( +1.07%) [ +1.07% +0.25% +0.00% / +1.07% +1.89% +1.89%] index_copy_ linear : Elapsed 0.012 ms (1.229 ms / 100) 1.207 -> 1.219 ( +0.99%) [ +1.24% +0.41% +0.00% / +0.99% +1.74% +1.57%] index_add_ reverse : Elapsed 0.012 ms (1.222 ms / 100) 1.217 -> 1.231 ( +1.15%) [ +0.90% +0.49% +0.00% / +1.31% +1.15% +1.15%] index_copy_ reverse : Elapsed 0.012 ms (1.228 ms / 100) 1.899 -> 1.875 ( -1.26%) [ +0.00% +0.16% +0.21% / +0.16% -1.26% -0.74%] index_add_ spread : Elapsed 0.019 ms (1.899 ms / 100) 1.230 -> 1.240 ( +0.81%) [ +0.98% +0.00% +0.24% / +0.81% +0.89% +1.22%] index_copy_ spread : Elapsed 0.012 ms (1.242 ms / 100) 1.208 -> 1.221 ( +1.08%) [ +0.91% +0.41% +0.00% / +1.08% +1.66% +1.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.219 ms / 100) 1.215 -> 1.227 ( +0.99%) [ +0.99% +0.16% +0.00% / +0.99% +1.73% +1.81%] index_copy_ strided 3 : Elapsed 0.012 ms (1.227 ms / 100) 1.205 -> 1.218 ( +1.08%) [ +0.91% +0.00% +0.00% / +1.08% +1.74% +1.49%] index_add_ strided 5 : Elapsed 0.012 ms (1.216 ms / 100) 1.206 -> 1.220 ( +1.16%) [ +1.16% +0.17% +0.00% / +1.16% +1.49% +1.41%] index_copy_ strided 5 : Elapsed 0.012 ms (1.220 ms / 100) 1.215 -> 1.228 ( +1.07%) [ +1.07% +0.33% +0.00% / +1.07% +1.56% +1.65%] index_add_ strided 7 : Elapsed 0.012 ms (1.228 ms / 100) 1.220 -> 1.233 ( +1.07%) [ +0.90% +0.00% +0.16% / +1.07% +1.39% +1.39%] index_copy_ strided 7 : Elapsed 0.012 ms (1.231 ms / 100) 1.210 -> 1.224 ( +1.16%) [ +1.24% +0.25% +0.00% / +1.16% +1.74% +1.57%] index_add_ strided 255 : Elapsed 0.012 ms (1.225 ms / 100) 1.219 -> 1.230 ( +0.90%) [ +0.98% +0.00% +0.00% / +0.90% +1.23% +1.23%] index_copy_ strided 255 : Elapsed 0.012 ms (1.231 ms / 100) 1.220 -> 1.230 ( +0.82%) [ +1.07% +0.00% +0.00% / +0.82% +1.31% +1.15%] index_add_ strided 257 : Elapsed 0.012 ms (1.233 ms / 100) 1.222 -> 1.234 ( +0.98%) [ +1.15% +0.25% +0.00% / +0.98% +1.47% +1.23%] index_copy_ strided 257 : Elapsed 0.012 ms (1.236 ms / 100) 1.763 -> 1.763 ( +0.00%) [ +0.00% +0.23% +0.23% / +0.00% +0.85% +1.82%] index_add_ perm : Elapsed 0.018 ms (1.763 ms / 100) 1.223 -> 1.231 ( +0.65%) [ +0.90% +0.00% +0.00% / +0.74% +0.65% +0.82%] index_copy_ perm : Elapsed 0.012 ms (1.234 ms / 100) 1.594 -> 1.542 ( -3.26%) [ +0.00% +1.94% +1.38% / +1.25% -2.20% -3.26%] index_add_ perm_sorted : Elapsed 0.016 ms (1.594 ms / 100) 1.220 -> 1.227 ( +0.57%) [ +0.98% +0.16% +0.00% / +0.57% +1.64% +1.97%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.232 ms / 100) GOOD 24.137 -> 13.536 (-43.92%) [ +0.00% +0.00% +0.01% / -43.90% -43.92% -43.88%] index_select const : Elapsed 0.241 ms (24.138 ms / 100) GOOD 24.089 -> 13.538 (-43.80%) [ +0.10% +0.10% +0.00% / -43.80% -43.63% -43.64%] index_select wrap : Elapsed 0.241 ms (24.112 ms / 100) GOOD 24.094 -> 14.055 (-41.67%) [ +0.00% +0.00% +0.05% / -40.87% -41.58% -41.67%] index_select linear : Elapsed 0.241 ms (24.094 ms / 100) GOOD 24.075 -> 14.171 (-41.14%) [ +0.16% +0.04% +0.00% / -41.14% -40.71% -40.92%] index_select reverse : Elapsed 0.241 ms (24.113 ms / 100) GOOD 24.116 -> 13.508 (-43.99%) [ +0.07% +0.05% +0.00% / -43.88% -43.92% -43.99%] index_select skip64 : Elapsed 0.241 ms (24.134 ms / 100) GOOD 24.105 -> 13.504 (-43.98%) [ +0.01% +0.00% +0.04% / -43.66% -43.97% -43.98%] index_select skip256 : Elapsed 0.241 ms (24.107 ms / 100) GOOD 24.071 -> 13.563 (-43.65%) [ +0.03% +0.01% +0.00% / -43.55% -43.65% -43.60%] index_select spread : Elapsed 0.241 ms (24.079 ms / 100) GOOD 24.153 -> 13.545 (-43.92%) [ +0.04% +0.00% +0.00% / -43.75% -43.79% -43.92%] index_select strided 3 : Elapsed 0.242 ms (24.163 ms / 100) GOOD 24.080 -> 13.515 (-43.87%) [ +0.00% +0.05% +0.02% / -43.78% -43.87% -43.82%] index_select strided 5 : Elapsed 0.241 ms (24.080 ms / 100) GOOD 24.067 -> 13.517 (-43.84%) [ +0.09% +0.00% +0.08% / -43.64% -43.84% -43.72%] index_select strided 7 : Elapsed 0.241 ms (24.088 ms / 100) GOOD 24.051 -> 13.525 (-43.77%) [ +0.06% +0.03% +0.00% / -43.72% -43.77% -43.76%] index_select strided 8 : Elapsed 0.241 ms (24.066 ms / 100) GOOD 24.076 -> 13.535 (-43.78%) [ +0.03% +0.00% +0.04% / -43.59% -43.78% -43.75%] index_select random : Elapsed 0.241 ms (24.084 ms / 100) GOOD 24.104 -> 13.594 (-43.60%) [ +0.00% +0.07% +0.00% / -43.54% -43.60% -43.60%] index_select random_sorted : Elapsed 0.241 ms (24.104 ms / 100) B = [1000, 2048] (stride (2048, 1)) A = [1000, 15] (stride (1, 1000)) dim = 1 1.105 -> 1.117 ( +1.09%) [ +1.18% +0.00% +0.18% / +1.09% +1.72% +1.45%] index_add_ linear : Elapsed 0.011 ms (1.118 ms / 100) 1.162 -> 1.178 ( +1.38%) [ +1.55% +0.17% +0.00% / +1.38% +2.15% +1.98%] index_copy_ linear : Elapsed 0.012 ms (1.180 ms / 100) 1.105 -> 1.115 ( +0.90%) [ +1.00% +0.09% +0.00% / +0.90% +1.54% +1.27%] index_add_ reverse : Elapsed 0.011 ms (1.116 ms / 100) 1.161 -> 1.174 ( +1.12%) [ +0.95% +0.09% +0.00% / +1.12% +1.72% +1.21%] index_copy_ reverse : Elapsed 0.012 ms (1.172 ms / 100) good 1.999 -> 1.863 ( -6.80%) [ +0.10% +0.00% +0.10% / -1.30% -6.30% -6.80%] index_add_ spread : Elapsed 0.020 ms (2.001 ms / 100) 1.253 -> 1.258 ( +0.40%) [ +0.56% +0.16% +0.00% / +0.40% +0.72% +0.88%] index_copy_ spread : Elapsed 0.013 ms (1.260 ms / 100) 1.105 -> 1.117 ( +1.09%) [ +1.27% +0.27% +0.00% / +1.09% +1.63% +1.45%] index_add_ strided 3 : Elapsed 0.011 ms (1.119 ms / 100) 1.164 -> 1.178 ( +1.20%) [ +1.29% +0.09% +0.00% / +1.20% +1.89% +1.89%] index_copy_ strided 3 : Elapsed 0.012 ms (1.179 ms / 100) 1.111 -> 1.120 ( +0.81%) [ +0.81% +0.18% +0.00% / +0.81% +1.71% +1.53%] index_add_ strided 5 : Elapsed 0.011 ms (1.120 ms / 100) 1.162 -> 1.174 ( +1.03%) [ +0.95% +0.00% +0.09% / +1.03% +4.30% +1.64%] index_copy_ strided 5 : Elapsed 0.012 ms (1.173 ms / 100) 1.110 -> 1.122 ( +1.08%) [ +0.99% +0.18% +0.00% / +1.08% +1.71% +1.71%] index_add_ strided 7 : Elapsed 0.011 ms (1.121 ms / 100) 1.161 -> 1.177 ( +1.38%) [ +1.03% +0.17% +0.00% / +1.55% +1.38% +1.55%] index_copy_ strided 7 : Elapsed 0.012 ms (1.173 ms / 100) 1.112 -> 1.124 ( +1.08%) [ +1.08% +0.18% +0.00% / +1.08% +1.62% +1.53%] index_add_ strided 255 : Elapsed 0.011 ms (1.124 ms / 100) 1.166 -> 1.183 ( +1.46%) [ +1.37% +0.26% +0.00% / +1.46% +1.54% +1.97%] index_copy_ strided 255 : Elapsed 0.012 ms (1.182 ms / 100) 1.116 -> 1.123 ( +0.63%) [ +0.72% +3.14% +0.00% / +0.63% +1.08% +0.90%] index_add_ strided 257 : Elapsed 0.011 ms (1.124 ms / 100) 1.168 -> 1.179 ( +0.94%) [ +1.54% +0.00% +0.09% / +0.94% +1.37% +1.54%] index_copy_ strided 257 : Elapsed 0.012 ms (1.186 ms / 100) 1.762 -> 1.764 ( +0.11%) [ +0.45% +0.57% +0.00% / +0.11% +0.17% +8.23%] index_add_ perm : Elapsed 0.018 ms (1.770 ms / 100) 1.235 -> 1.230 ( -0.40%) [ +0.00% +0.08% +0.00% / +0.73% -0.40% +0.49%] index_copy_ perm : Elapsed 0.012 ms (1.235 ms / 100) 1.724 -> 1.739 ( +0.87%) [ +0.41% +0.17% +0.00% / +0.87% +1.57% +1.57%] index_add_ perm_sorted : Elapsed 0.017 ms (1.731 ms / 100) 1.210 -> 1.215 ( +0.41%) [ +1.32% +0.00% +0.66% / +1.16% +0.41% +1.40%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.226 ms / 100) GOOD 20.948 -> 13.393 (-36.07%) [ +0.05% +0.00% +0.07% / -35.76% -35.93% -36.07%] index_select const : Elapsed 0.210 ms (20.959 ms / 100) good 20.819 -> 18.934 ( -9.05%) [ +0.09% +0.01% +0.00% / -8.84% -8.53% -9.05%] index_select wrap : Elapsed 0.208 ms (20.838 ms / 100) GOOD 20.976 -> 13.531 (-35.49%) [ +0.08% +0.00% +0.02% / -35.07% -35.49% -35.41%] index_select linear : Elapsed 0.210 ms (20.993 ms / 100) GOOD 20.913 -> 13.610 (-34.92%) [ +0.09% +0.09% +0.00% / -34.92% -34.90% -34.76%] index_select reverse : Elapsed 0.209 ms (20.932 ms / 100) GOOD 21.025 -> 13.464 (-35.96%) [ +0.00% +0.15% +0.07% / -35.77% -35.88% -35.96%] index_select skip64 : Elapsed 0.210 ms (21.025 ms / 100) GOOD 20.990 -> 13.448 (-35.93%) [ +0.06% +0.00% +0.17% / -35.65% -35.77% -35.93%] index_select skip256 : Elapsed 0.210 ms (21.002 ms / 100) GOOD 20.807 -> 13.519 (-35.03%) [ +0.00% +0.01% +0.02% / -35.01% -35.03% -34.94%] index_select spread : Elapsed 0.208 ms (20.807 ms / 100) GOOD 20.873 -> 14.532 (-30.38%) [ +0.11% +0.00% +0.09% / -30.33% -30.38% -30.35%] index_select strided 3 : Elapsed 0.209 ms (20.895 ms / 100) GOOD 20.819 -> 13.404 (-35.62%) [ +0.10% +0.15% +0.00% / -35.56% -35.62% -35.54%] index_select strided 5 : Elapsed 0.208 ms (20.840 ms / 100) Good 20.852 -> 18.551 (-11.03%) [ +0.22% +0.10% +0.00% / -11.03% -10.82% -10.94%] index_select strided 7 : Elapsed 0.209 ms (20.897 ms / 100) Good 20.858 -> 18.331 (-12.12%) [ +0.09% +0.05% +0.00% / -12.12% -11.19% -11.66%] index_select strided 8 : Elapsed 0.209 ms (20.877 ms / 100) Good 20.887 -> 17.764 (-14.95%) [ +0.12% +0.00% +0.07% / -14.66% -14.95% -14.92%] index_select random : Elapsed 0.209 ms (20.913 ms / 100) GOOD 20.831 -> 13.521 (-35.09%) [ +0.15% +0.04% +0.00% / -35.02% -35.09% -35.07%] index_select random_sorted : Elapsed 0.209 ms (20.863 ms / 100) B = [1000, 2048] (stride (1, 1000)) dim = 1 fill_cnt = 15 0.828 -> 0.836 ( +0.97%) [ +1.09% +0.12% +0.00% / +0.97% +1.69% +1.81%] index_fill_ const : Elapsed 0.008 ms (0.837 ms / 100) 0.831 -> 0.839 ( +0.96%) [ +1.08% +0.12% +0.00% / +0.96% +1.32% +1.56%] index_fill_ linear : Elapsed 0.008 ms (0.840 ms / 100) 0.831 -> 0.840 ( +1.08%) [ +0.84% +0.00% +0.00% / +1.08% +1.44% +1.44%] index_fill_ reverse : Elapsed 0.008 ms (0.838 ms / 100) 0.827 -> 0.835 ( +0.97%) [ +1.21% +0.12% +0.00% / +0.97% +1.69% +1.81%] index_fill_ skip64 : Elapsed 0.008 ms (0.837 ms / 100) 0.829 -> 0.837 ( +0.97%) [ +1.09% +0.00% +0.00% / +0.97% +1.57% +1.57%] index_fill_ skip256 : Elapsed 0.008 ms (0.838 ms / 100) 0.830 -> 0.839 ( +1.08%) [ +1.33% +0.24% +0.00% / +1.08% +1.57% +1.57%] index_fill_ spread : Elapsed 0.008 ms (0.841 ms / 100) 0.831 -> 0.839 ( +0.96%) [ +1.20% +0.12% +0.00% / +0.96% +3.49% +1.44%] index_fill_ strided 3 : Elapsed 0.008 ms (0.841 ms / 100) 0.828 -> 0.839 ( +1.33%) [ +1.33% +0.36% +0.00% / +1.33% +1.45% +1.33%] index_fill_ strided 5 : Elapsed 0.008 ms (0.839 ms / 100) 0.830 -> 0.839 ( +1.08%) [ +1.08% +0.00% +0.12% / +1.08% +1.20% +1.45%] index_fill_ strided 7 : Elapsed 0.008 ms (0.839 ms / 100) 0.830 -> 0.840 ( +1.20%) [ +1.20% +0.48% +0.00% / +1.20% +6.27% +10.72%] index_fill_ strided 8 : Elapsed 0.008 ms (0.840 ms / 100) 0.831 -> 0.839 ( +0.96%) [ +1.08% +0.24% +0.00% / +0.96% +1.32% +1.56%] index_fill_ strided 16 : Elapsed 0.008 ms (0.840 ms / 100) 0.830 -> 0.838 ( +0.96%) [ +0.84% +0.00% +0.12% / +0.96% +1.33% +1.20%] index_fill_ strided 64 : Elapsed 0.008 ms (0.837 ms / 100) 0.830 -> 0.839 ( +1.08%) [ +0.96% +0.00% +0.00% / +1.08% +1.20% +1.57%] index_fill_ strided 100 : Elapsed 0.008 ms (0.838 ms / 100) 0.833 -> 0.839 ( +0.72%) [ +0.96% +1.08% +0.00% / +1.20% +0.72% +0.72%] index_fill_ strided 255 : Elapsed 0.008 ms (0.841 ms / 100) 0.833 -> 0.839 ( +0.72%) [ +1.08% +0.36% +0.00% / +0.96% +0.72% +0.72%] index_fill_ strided 256 : Elapsed 0.008 ms (0.842 ms / 100) 0.834 -> 0.839 ( +0.60%) [ +0.84% +0.00% +0.00% / +0.96% +0.60% +0.72%] index_fill_ strided 257 : Elapsed 0.008 ms (0.841 ms / 100) 0.833 -> 0.839 ( +0.72%) [ +1.20% +0.36% +0.00% / +1.08% +0.72% +0.84%] index_fill_ random : Elapsed 0.008 ms (0.843 ms / 100) 0.832 -> 0.839 ( +0.84%) [ +1.08% +0.00% +0.12% / +0.96% +0.96% +0.84%] index_fill_ random_sorted : Elapsed 0.008 ms (0.841 ms / 100) 0.831 -> 0.839 ( +0.96%) [ +1.08% +0.36% +0.00% / +0.96% +0.96% +1.20%] index_fill_ perm : Elapsed 0.008 ms (0.840 ms / 100) 0.833 -> 0.839 ( +0.72%) [ +0.96% +0.12% +0.00% / +1.08% +0.96% +0.72%] index_fill_ perm_sorted : Elapsed 0.008 ms (0.841 ms / 100) B = [1000, 2048] (stride (1, 1000)) A = [1000, 15] (stride (15, 1)) dim = 1 1.207 -> 1.219 ( +0.99%) [ +1.49% +0.75% +0.00% / +0.99% +1.82% +1.91%] index_add_ linear : Elapsed 0.012 ms (1.225 ms / 100) 1.165 -> 1.178 ( +1.12%) [ +1.80% +0.52% +0.00% / +1.12% +1.63% +1.72%] index_copy_ linear : Elapsed 0.012 ms (1.186 ms / 100) 1.211 -> 1.220 ( +0.74%) [ +0.83% +0.00% +0.00% / +0.74% +1.49% +1.24%] index_add_ reverse : Elapsed 0.012 ms (1.221 ms / 100) 1.162 -> 1.175 ( +1.12%) [ +1.12% +0.09% +0.00% / +1.12% +1.46% +1.29%] index_copy_ reverse : Elapsed 0.012 ms (1.175 ms / 100) 1.215 -> 1.226 ( +0.91%) [ +1.15% +0.41% +0.00% / +0.91% +1.32% +1.48%] index_add_ spread : Elapsed 0.012 ms (1.229 ms / 100) 1.171 -> 1.185 ( +1.20%) [ +1.20% +0.00% +0.26% / +1.20% +1.28% +10.50%] index_copy_ spread : Elapsed 0.012 ms (1.185 ms / 100) 1.208 -> 1.219 ( +0.91%) [ +0.75% +0.33% +0.00% / +0.91% +1.57% +1.57%] index_add_ strided 3 : Elapsed 0.012 ms (1.217 ms / 100) 1.163 -> 1.176 ( +1.12%) [ +0.77% +0.00% +0.09% / +1.12% +1.63% +1.72%] index_copy_ strided 3 : Elapsed 0.012 ms (1.172 ms / 100) 1.205 -> 1.217 ( +1.00%) [ +1.16% +0.08% +0.00% / +1.00% +1.74% +1.66%] index_add_ strided 5 : Elapsed 0.012 ms (1.219 ms / 100) 1.159 -> 1.173 ( +1.21%) [ +1.12% +0.00% +0.69% / +1.21% +2.07% +2.16%] index_copy_ strided 5 : Elapsed 0.012 ms (1.172 ms / 100) 1.213 -> 1.226 ( +1.07%) [ +0.99% +0.41% +0.00% / +1.07% +1.81% +1.73%] index_add_ strided 7 : Elapsed 0.012 ms (1.225 ms / 100) 1.169 -> 1.183 ( +1.20%) [ +0.68% +0.34% +0.00% / +1.20% +1.63% +1.63%] index_copy_ strided 7 : Elapsed 0.012 ms (1.177 ms / 100) 1.209 -> 1.220 ( +0.91%) [ +0.83% +0.00% +0.00% / +0.91% +1.49% +1.41%] index_add_ strided 255 : Elapsed 0.012 ms (1.219 ms / 100) 1.161 -> 1.175 ( +1.21%) [ +0.86% +0.00% +0.17% / +1.21% +1.98% +1.55%] index_copy_ strided 255 : Elapsed 0.012 ms (1.171 ms / 100) 1.215 -> 1.227 ( +0.99%) [ +1.23% +0.41% +0.00% / +0.99% +1.40% +1.56%] index_add_ strided 257 : Elapsed 0.012 ms (1.230 ms / 100) 1.172 -> 1.187 ( +1.28%) [ +1.02% +0.09% +0.00% / +1.45% +1.28% +1.62%] index_copy_ strided 257 : Elapsed 0.012 ms (1.184 ms / 100) 1.210 -> 1.219 ( +0.74%) [ +1.16% +0.00% +0.00% / +0.74% +1.40% +1.49%] index_add_ perm : Elapsed 0.012 ms (1.224 ms / 100) 1.161 -> 1.173 ( +1.03%) [ +0.78% +0.26% +0.00% / +1.03% +1.89% +1.55%] index_copy_ perm : Elapsed 0.012 ms (1.170 ms / 100) 1.209 -> 1.222 ( +1.08%) [ +1.24% +0.25% +0.00% / +1.08% +1.65% +1.57%] index_add_ perm_sorted : Elapsed 0.012 ms (1.224 ms / 100) 1.167 -> 1.182 ( +1.29%) [ +1.29% +0.17% +0.00% / +1.29% +1.46% +1.71%] index_copy_ perm_sorted : Elapsed 0.012 ms (1.182 ms / 100) 14.973 -> 14.978 ( +0.03%) [ +0.17% +0.00% +0.03% / +0.15% +0.03% +0.09%] index_select const : Elapsed 0.150 ms (14.998 ms / 100) 14.923 -> 14.947 ( +0.16%) [ +0.21% +0.05% +0.00% / +0.16% +0.64% +0.58%] index_select wrap : Elapsed 0.150 ms (14.955 ms / 100) 14.908 -> 14.924 ( +0.11%) [ +0.15% +0.07% +0.00% / +0.11% +0.30% +0.29%] index_select linear : Elapsed 0.149 ms (14.930 ms / 100) 14.946 -> 14.939 ( -0.05%) [ +0.10% +0.02% +0.00% / +0.04% +0.06% -0.05%] index_select reverse : Elapsed 0.150 ms (14.961 ms / 100) 14.975 -> 14.946 ( -0.19%) [ +0.07% +0.00% +0.02% / +0.17% -0.19% -0.17%] index_select skip64 : Elapsed 0.150 ms (14.986 ms / 100) 14.977 -> 14.946 ( -0.21%) [ +0.00% +0.01% +0.64% / +0.11% -0.21% -0.14%] index_select skip256 : Elapsed 0.150 ms (14.977 ms / 100) 14.963 -> 14.961 ( -0.01%) [ +0.00% +0.03% +0.09% / -0.01% +0.14% +0.29%] index_select spread : Elapsed 0.150 ms (14.963 ms / 100) 15.003 -> 15.018 ( +0.10%) [ +0.24% +0.00% +0.11% / +0.10% +0.21% +0.19%] index_select strided 3 : Elapsed 0.150 ms (15.039 ms / 100) 14.909 -> 14.920 ( +0.07%) [ +0.18% +0.01% +0.00% / +0.09% +0.08% +0.07%] index_select strided 5 : Elapsed 0.149 ms (14.936 ms / 100) 14.915 -> 14.920 ( +0.03%) [ +0.03% +0.01% +0.00% / +0.03% +0.10% +0.19%] index_select strided 7 : Elapsed 0.149 ms (14.920 ms / 100) 14.896 -> 14.928 ( +0.21%) [ +0.28% +0.00% +0.11% / +0.21% +0.28% +0.40%] index_select strided 8 : Elapsed 0.149 ms (14.937 ms / 100) 14.936 -> 14.939 ( +0.02%) [ +0.19% +0.00% +0.04% / +0.17% +0.09% +0.02%] index_select random : Elapsed 0.150 ms (14.964 ms / 100) 14.972 -> 14.983 ( +0.07%) [ +0.02% +0.01% +0.00% / +0.07% +0.07% +0.07%] index_select random_sorted : Elapsed 0.150 ms (14.975 ms / 100) B = [1000, 2048] (stride (1, 1000)) A = [1000, 15] (stride (1, 1000)) dim = 1 1.109 -> 1.119 ( +0.90%) [ +0.99% +0.00% +1.71% / +0.90% +0.99% +1.08%] index_add_ linear : Elapsed 0.011 ms (1.120 ms / 100) 1.053 -> 1.059 ( +0.57%) [ +0.47% +0.09% +0.00% / +0.57% +0.95% +1.14%] index_copy_ linear : Elapsed 0.011 ms (1.058 ms / 100) 1.104 -> 1.117 ( +1.18%) [ +0.91% +0.18% +0.00% / +1.18% +1.45% +2.17%] index_add_ reverse : Elapsed 0.011 ms (1.114 ms / 100) 1.048 -> 1.057 ( +0.86%) [ +0.76% +0.19% +0.00% / +0.86% +1.24% +2.00%] index_copy_ reverse : Elapsed 0.011 ms (1.056 ms / 100) 1.110 -> 1.120 ( +0.90%) [ +1.08% +0.27% +0.00% / +0.90% +1.35% +1.71%] index_add_ spread : Elapsed 0.011 ms (1.122 ms / 100) 1.056 -> 1.065 ( +0.85%) [ +3.41% +0.00% +0.09% / +0.85% +1.04% +1.23%] index_copy_ spread : Elapsed 0.011 ms (1.092 ms / 100) 1.106 -> 1.119 ( +1.18%) [ +1.18% +0.00% +0.00% / +1.18% +1.54% +1.63%] index_add_ strided 3 : Elapsed 0.011 ms (1.119 ms / 100) 1.054 -> 1.057 ( +0.28%) [ +0.38% +0.00% +0.00% / +0.28% +1.04% +1.04%] index_copy_ strided 3 : Elapsed 0.011 ms (1.058 ms / 100) 1.108 -> 1.120 ( +1.08%) [ +0.99% +0.36% +0.00% / +1.08% +1.90% +1.81%] index_add_ strided 5 : Elapsed 0.011 ms (1.119 ms / 100) 1.055 -> 1.064 ( +0.85%) [ +0.76% +0.00% +0.09% / +0.85% +1.42% +1.42%] index_copy_ strided 5 : Elapsed 0.011 ms (1.063 ms / 100) 1.107 -> 1.119 ( +1.08%) [ +1.08% +0.27% +0.00% / +1.08% +1.81% +2.08%] index_add_ strided 7 : Elapsed 0.011 ms (1.119 ms / 100) 1.056 -> 1.063 ( +0.66%) [ +0.57% +0.00% +0.00% / +0.66% +1.42% +1.52%] index_copy_ strided 7 : Elapsed 0.011 ms (1.062 ms / 100) 1.108 -> 1.119 ( +0.99%) [ +1.08% +0.18% +0.00% / +0.99% +1.53% +2.17%] index_add_ strided 255 : Elapsed 0.011 ms (1.120 ms / 100) 1.055 -> 1.060 ( +0.47%) [ +0.47% +0.00% +0.09% / +0.47% +1.14% +1.90%] index_copy_ strided 255 : Elapsed 0.011 ms (1.060 ms / 100) 1.109 -> 1.122 ( +1.17%) [ +1.08% +0.36% +0.00% / +1.17% +1.17% +1.35%] index_add_ strided 257 : Elapsed 0.011 ms (1.121 ms / 100) 1.054 -> 1.065 ( +1.04%) [ +1.04% +0.19% +0.00% / +1.04% +1.23% +1.14%] index_copy_ strided 257 : Elapsed 0.011 ms (1.065 ms / 100) 1.105 -> 1.118 ( +1.18%) [ +1.09% +0.00% +0.09% / +1.18% +1.18% +1.27%] index_add_ perm : Elapsed 0.011 ms (1.117 ms / 100) 1.050 -> 1.058 ( +0.76%) [ +0.86% +0.10% +0.00% / +0.76% +0.86% +0.86%] index_copy_ perm : Elapsed 0.011 ms (1.059 ms / 100) 1.106 -> 1.121 ( +1.36%) [ +1.18% +0.18% +0.00% / +1.45% +1.45% +1.36%] index_add_ perm_sorted : Elapsed 0.011 ms (1.119 ms / 100) 1.051 -> 1.060 ( +0.86%) [ +0.76% +0.48% +0.00% / +0.86% +1.14% +1.05%] index_copy_ perm_sorted : Elapsed 0.011 ms (1.059 ms / 100) 13.388 -> 13.463 ( +0.56%) [ +0.52% +0.02% +0.00% / +0.63% +0.56% +0.71%] index_select const : Elapsed 0.135 ms (13.458 ms / 100) 13.830 -> 13.910 ( +0.58%) [ +0.43% +0.04% +0.00% / +0.65% +0.58% +0.77%] index_select wrap : Elapsed 0.139 ms (13.889 ms / 100) 13.453 -> 13.484 ( +0.23%) [ +0.53% +0.00% +0.18% / +0.57% +0.23% +0.42%] index_select linear : Elapsed 0.135 ms (13.524 ms / 100) 13.479 -> 13.538 ( +0.44%) [ +0.43% +0.01% +0.00% / +0.44% +0.59% +0.69%] index_select reverse : Elapsed 0.135 ms (13.537 ms / 100) 13.431 -> 13.461 ( +0.22%) [ +0.29% +0.00% +0.04% / +0.45% +0.42% +0.22%] index_select skip64 : Elapsed 0.135 ms (13.470 ms / 100) 13.397 -> 13.449 ( +0.39%) [ +0.60% +0.05% +0.00% / +0.46% +0.51% +0.39%] index_select skip256 : Elapsed 0.135 ms (13.478 ms / 100) 13.446 -> 13.502 ( +0.42%) [ +0.34% +0.01% +0.00% / +0.42% +0.48% +0.56%] index_select spread : Elapsed 0.135 ms (13.492 ms / 100) 13.480 -> 13.566 ( +0.64%) [ +0.44% +0.00% +0.11% / +0.64% +0.91% +0.91%] index_select strided 3 : Elapsed 0.135 ms (13.539 ms / 100) 13.416 -> 13.494 ( +0.58%) [ +0.45% +0.15% +0.00% / +0.66% +0.69% +0.58%] index_select strided 5 : Elapsed 0.135 ms (13.477 ms / 100) 13.792 -> 13.921 ( +0.94%) [ +0.65% +0.04% +0.00% / +0.94% +1.22% +1.07%] index_select strided 7 : Elapsed 0.139 ms (13.881 ms / 100) 13.830 -> 13.904 ( +0.54%) [ +0.54% +0.00% +0.12% / +0.67% +0.54% +0.55%] index_select strided 8 : Elapsed 0.139 ms (13.904 ms / 100) 13.829 -> 13.903 ( +0.54%) [ +0.37% +0.20% +0.00% / +0.54% +0.87% +0.86%] index_select random : Elapsed 0.139 ms (13.880 ms / 100) 13.437 -> 13.506 ( +0.51%) [ +0.63% +0.04% +0.00% / +0.51% +0.54% +0.57%] index_select random_sorted : Elapsed 0.135 ms (13.521 ms / 100)